2400 files changed, 223932 insertions, 101979 deletions
diff --git a/contrib/llvm/include/llvm-c/Core.h b/contrib/llvm/include/llvm-c/Core.h
index 76f8b31580ac..7f5c05d21e65 100644
--- a/contrib/llvm/include/llvm-c/Core.h
+++ b/contrib/llvm/include/llvm-c/Core.h
@@ -55,51 +55,6 @@ extern "C" {
  */
 
 typedef enum {
-    LLVMZExtAttribute       = 1<<0,
-    LLVMSExtAttribute       = 1<<1,
-    LLVMNoReturnAttribute   = 1<<2,
-    LLVMInRegAttribute      = 1<<3,
-    LLVMStructRetAttribute  = 1<<4,
-    LLVMNoUnwindAttribute   = 1<<5,
-    LLVMNoAliasAttribute    = 1<<6,
-    LLVMByValAttribute      = 1<<7,
-    LLVMNestAttribute       = 1<<8,
-    LLVMReadNoneAttribute   = 1<<9,
-    LLVMReadOnlyAttribute   = 1<<10,
-    LLVMNoInlineAttribute   = 1<<11,
-    LLVMAlwaysInlineAttribute    = 1<<12,
-    LLVMOptimizeForSizeAttribute = 1<<13,
-    LLVMStackProtectAttribute    = 1<<14,
-    LLVMStackProtectReqAttribute = 1<<15,
-    LLVMAlignment = 31<<16,
-    LLVMNoCaptureAttribute  = 1<<21,
-    LLVMNoRedZoneAttribute  = 1<<22,
-    LLVMNoImplicitFloatAttribute = 1<<23,
-    LLVMNakedAttribute      = 1<<24,
-    LLVMInlineHintAttribute = 1<<25,
-    LLVMStackAlignment = 7<<26,
-    LLVMReturnsTwice = 1 << 29,
-    LLVMUWTable = 1 << 30,
-    LLVMNonLazyBind = 1 << 31
-
-    /* FIXME: These attributes are currently not included in the C API as
-       a temporary measure until the API/ABI impact to the C API is understood
-       and the path forward agreed upon.
-    LLVMSanitizeAddressAttribute = 1ULL << 32,
-    LLVMStackProtectStrongAttribute = 1ULL<<35,
-    LLVMColdAttribute = 1ULL << 40,
-    LLVMOptimizeNoneAttribute = 1ULL << 42,
-    LLVMInAllocaAttribute = 1ULL << 43,
-    LLVMNonNullAttribute = 1ULL << 44,
-    LLVMJumpTableAttribute = 1ULL << 45,
-    LLVMConvergentAttribute = 1ULL << 46,
-    LLVMSafeStackAttribute = 1ULL << 47,
-    LLVMSwiftSelfAttribute = 1ULL << 48,
-    LLVMSwiftErrorAttribute = 1ULL << 49,
-    */
-} LLVMAttribute;
-
-typedef enum {
   /* Terminator Instructions */
   LLVMRet            = 1,
   LLVMBr             = 2,
@@ -1752,6 +1707,7 @@ LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant)
 LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
+LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstExactSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
 LLVMValueRef LLVMConstFDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant);
@@ -2010,8 +1966,6 @@ void LLVMSetGC(LLVMValueRef Fn, const char *Name);
  *
  * @see llvm::Function::addAttribute()
  */
-void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
-
 void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
                              LLVMAttributeRef A);
 unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx);
@@ -2036,18 +1990,6 @@ void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
                                         const char *V);
 
 /**
- * Obtain an attribute from a function.
- *
- * @see llvm::Function::getAttributes()
- */
-LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn);
-
-/**
- * Remove an attribute from a function.
- */
-void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA);
-
-/**
  * @defgroup LLVMCCoreValueFunctionParameters Function Parameters
  *
  * Functions in this group relate to arguments/parameters on functions.
@@ -2129,25 +2071,6 @@ LLVMValueRef LLVMGetNextParam(LLVMValueRef Arg);
 LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg);
 
 /**
- * Add an attribute to a function argument.
- *
- * @see llvm::Argument::addAttr()
- */
-void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA);
-
-/**
- * Remove an attribute from a function argument.
- *
- * @see llvm::Argument::removeAttr()
- */
-void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA);
-
-/**
- * Get an attribute from a function argument.
- */
-LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg);
-
-/**
  * Set the alignment for a function parameter.
  *
  * @see llvm::Argument::addAttr()
@@ -2595,9 +2518,6 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC);
  */
 unsigned LLVMGetInstructionCallConv(LLVMValueRef Instr);
 
-void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute);
-void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
-                              LLVMAttribute);
 void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
                                 unsigned Align);
 
@@ -2962,6 +2882,8 @@ LLVMValueRef LLVMBuildFMul(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name);
 LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name);
+LLVMValueRef LLVMBuildExactUDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
+                                const char *Name);
 LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name);
 LLVMValueRef LLVMBuildExactSDiv(LLVMBuilderRef, LLVMValueRef LHS, LLVMValueRef RHS,
diff --git a/contrib/llvm/include/llvm-c/Transforms/Scalar.h b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
index b8a09984aa4d..8991e0904849 100644
--- a/contrib/llvm/include/llvm-c/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm-c/Transforms/Scalar.h
@@ -56,6 +56,9 @@ void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM);
 /** See llvm::createGVNPass function. */
 void LLVMAddGVNPass(LLVMPassManagerRef PM);
 
+/** See llvm::createGVNPass function. */
+void LLVMAddNewGVNPass(LLVMPassManagerRef PM);
+
 /** See llvm::createIndVarSimplifyPass function. */
 void LLVMAddIndVarSimplifyPass(LLVMPassManagerRef PM);
 
@@ -135,6 +138,9 @@ void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM);
 /** See llvm::createEarlyCSEPass function */
 void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM);
 
+/** See llvm::createEarlyCSEPass function */
+void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM);
+
 /** See llvm::createLowerExpectIntrinsicPass function */
 void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM);
 
diff --git a/contrib/llvm/include/llvm-c/lto.h b/contrib/llvm/include/llvm-c/lto.h
index b1f5a45d6650..c3af74cdedab 100644
--- a/contrib/llvm/include/llvm-c/lto.h
+++ b/contrib/llvm/include/llvm-c/lto.h
@@ -44,7 +44,7 @@ typedef bool lto_bool_t;
  * @{
  */
 
-#define LTO_API_VERSION 20
+#define LTO_API_VERSION 21
 
 /**
  * \since prior to LTO_API_VERSION=3
@@ -145,10 +145,10 @@ extern lto_bool_t
 lto_module_has_objc_category(const void *mem, size_t length);
 
 /**
-* Checks if a buffer is a loadable object file.
-*
-* \since prior to LTO_API_VERSION=3
-*/
+ * Checks if a buffer is a loadable object file.
+ *
+ * \since prior to LTO_API_VERSION=3
+ */
 extern lto_bool_t lto_module_is_object_file_in_memory(const void *mem,
                                                       size_t length);
 
@@ -637,6 +637,29 @@ extern LTOObjectBuffer thinlto_module_get_object(thinlto_code_gen_t cg,
                                                  unsigned int index);
 
 /**
+ * Returns the number of object files produced by the ThinLTO CodeGenerator.
+ *
+ * It usually matches the number of input files, but this is not a guarantee of
+ * the API and may change in future implementation, so the client should not
+ * assume it.
+ *
+ * \since LTO_API_VERSION=21
+ */
+unsigned int thinlto_module_get_num_object_files(thinlto_code_gen_t cg);
+
+/**
+ * Returns the path to the ith object file produced by the ThinLTO
+ * CodeGenerator.
+ *
+ * Client should use \p thinlto_module_get_num_object_files() to get the number
+ * of available objects.
+ *
+ * \since LTO_API_VERSION=21
+ */
+const char *thinlto_module_get_object_file(thinlto_code_gen_t cg,
+                                           unsigned int index);
+
+/**
  * Sets which PIC code model to generate.
  * Returns true on error (check lto_get_error_message() for details).
  *
@@ -725,6 +748,17 @@ extern void thinlto_codegen_set_savetemps_dir(thinlto_code_gen_t cg,
                                               const char *save_temps_dir);
 
 /**
+ * Set the path to a directory where to save generated object files. This
+ * path can be used by a linker to request on-disk files instead of in-memory
+ * buffers. When set, results are available through
+ * thinlto_module_get_object_file() instead of thinlto_module_get_object().
+ *
+ * \since LTO_API_VERSION=21
+ */
+void thinlto_set_generated_objects_dir(thinlto_code_gen_t cg,
+                                       const char *save_temps_dir);
+
+/**
  * Sets the cpu to generate code for.
  *
  * \since LTO_API_VERSION=18
diff --git a/contrib/llvm/include/llvm/ADT/APFloat.h b/contrib/llvm/include/llvm/ADT/APFloat.h
index 3f6bd00a779c..00304230a991 100644
--- a/contrib/llvm/include/llvm/ADT/APFloat.h
+++ b/contrib/llvm/include/llvm/ADT/APFloat.h
@@ -18,12 +18,16 @@
 #define LLVM_ADT_APFLOAT_H
 
 #include "llvm/ADT/APInt.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <memory>
 
 namespace llvm {
 
 struct fltSemantics;
 class APSInt;
 class StringRef;
+class APFloat;
+class raw_ostream;
 
 template <typename T> class SmallVectorImpl;
 
@@ -121,33 +125,30 @@ enum lostFraction { // Example of truncated bits:
 ///
 /// New operations: sqrt, IEEE remainder, C90 fmod, nexttoward.
 ///
-class APFloat {
-public:
 
+// This is the common type definitions shared by APFloat and its internal
+// implementation classes. This struct should not define any non-static data
+// members.
+struct APFloatBase {
   /// A signed type to represent a floating point numbers unbiased exponent.
   typedef signed short ExponentType;
 
   /// \name Floating Point Semantics.
   /// @{
 
-  static const fltSemantics IEEEhalf;
-  static const fltSemantics IEEEsingle;
-  static const fltSemantics IEEEdouble;
-  static const fltSemantics IEEEquad;
-  static const fltSemantics PPCDoubleDouble;
-  static const fltSemantics x87DoubleExtended;
+  static const fltSemantics &IEEEhalf();
+  static const fltSemantics &IEEEsingle();
+  static const fltSemantics &IEEEdouble();
+  static const fltSemantics &IEEEquad();
+  static const fltSemantics &PPCDoubleDouble();
+  static const fltSemantics &x87DoubleExtended();
 
   /// A Pseudo fltsemantic used to construct APFloats that cannot conflict with
   /// anything real.
-  static const fltSemantics Bogus;
+  static const fltSemantics &Bogus();
 
   /// @}
 
-  static unsigned int semanticsPrecision(const fltSemantics &);
-  static ExponentType semanticsMinExponent(const fltSemantics &);
-  static ExponentType semanticsMaxExponent(const fltSemantics &);
-  static unsigned int semanticsSizeInBits(const fltSemantics &);
-
   /// IEEE-754R 5.11: Floating Point Comparison Relations.
   enum cmpResult {
     cmpLessThan,
@@ -190,19 +191,39 @@ public:
     uninitialized
   };
 
+  /// \brief Enumeration of \c ilogb error results.
+  enum IlogbErrorKinds {
+    IEK_Zero = INT_MIN + 1,
+    IEK_NaN = INT_MIN,
+    IEK_Inf = INT_MAX
+  };
+
+  static unsigned int semanticsPrecision(const fltSemantics &);
+  static ExponentType semanticsMinExponent(const fltSemantics &);
+  static ExponentType semanticsMaxExponent(const fltSemantics &);
+  static unsigned int semanticsSizeInBits(const fltSemantics &);
+
+  /// Returns the size of the floating point number (in bits) in the given
+  /// semantics.
+  static unsigned getSizeInBits(const fltSemantics &Sem);
+};
+
+namespace detail {
+
+class IEEEFloat final : public APFloatBase {
+public:
   /// \name Constructors
   /// @{
 
-  APFloat(const fltSemantics &); // Default construct to 0.0
-  APFloat(const fltSemantics &, StringRef);
-  APFloat(const fltSemantics &, integerPart);
-  APFloat(const fltSemantics &, uninitializedTag);
-  APFloat(const fltSemantics &, const APInt &);
-  explicit APFloat(double d);
-  explicit APFloat(float f);
-  APFloat(const APFloat &);
-  APFloat(APFloat &&);
-  ~APFloat();
+  IEEEFloat(const fltSemantics &); // Default construct to 0.0
+  IEEEFloat(const fltSemantics &, integerPart);
+  IEEEFloat(const fltSemantics &, uninitializedTag);
+  IEEEFloat(const fltSemantics &, const APInt &);
+  explicit IEEEFloat(double d);
+  explicit IEEEFloat(float f);
+  IEEEFloat(const IEEEFloat &);
+  IEEEFloat(IEEEFloat &&);
+  ~IEEEFloat();
 
   /// @}
 
@@ -212,79 +233,6 @@ public:
   /// \name Convenience "constructors"
   /// @{
 
-  /// Factory for Positive and Negative Zero.
-  ///
-  /// \param Negative True iff the number should be negative.
-  static APFloat getZero(const fltSemantics &Sem, bool Negative = false) {
-    APFloat Val(Sem, uninitialized);
-    Val.makeZero(Negative);
-    return Val;
-  }
-
-  /// Factory for Positive and Negative Infinity.
-  ///
-  /// \param Negative True iff the number should be negative.
-  static APFloat getInf(const fltSemantics &Sem, bool Negative = false) {
-    APFloat Val(Sem, uninitialized);
-    Val.makeInf(Negative);
-    return Val;
-  }
-
-  /// Factory for QNaN values.
-  ///
-  /// \param Negative - True iff the NaN generated should be negative.
-  /// \param type - The unspecified fill bits for creating the NaN, 0 by
-  /// default.  The value is truncated as necessary.
-  static APFloat getNaN(const fltSemantics &Sem, bool Negative = false,
-                        unsigned type = 0) {
-    if (type) {
-      APInt fill(64, type);
-      return getQNaN(Sem, Negative, &fill);
-    } else {
-      return getQNaN(Sem, Negative, nullptr);
-    }
-  }
-
-  /// Factory for QNaN values.
-  static APFloat getQNaN(const fltSemantics &Sem, bool Negative = false,
-                         const APInt *payload = nullptr) {
-    return makeNaN(Sem, false, Negative, payload);
-  }
-
-  /// Factory for SNaN values.
-  static APFloat getSNaN(const fltSemantics &Sem, bool Negative = false,
-                         const APInt *payload = nullptr) {
-    return makeNaN(Sem, true, Negative, payload);
-  }
-
-  /// Returns the largest finite number in the given semantics.
-  ///
-  /// \param Negative - True iff the number should be negative
-  static APFloat getLargest(const fltSemantics &Sem, bool Negative = false);
-
-  /// Returns the smallest (by magnitude) finite number in the given semantics.
-  /// Might be denormalized, which implies a relative loss of precision.
-  ///
-  /// \param Negative - True iff the number should be negative
-  static APFloat getSmallest(const fltSemantics &Sem, bool Negative = false);
-
-  /// Returns the smallest (by magnitude) normalized finite number in the given
-  /// semantics.
-  ///
-  /// \param Negative - True iff the number should be negative
-  static APFloat getSmallestNormalized(const fltSemantics &Sem,
-                                       bool Negative = false);
-
-  /// Returns a float which is bitcasted from an all one value int.
-  ///
-  /// \param BitWidth - Select float type
-  /// \param isIEEE   - If 128 bit number, select between PPC and IEEE
-  static APFloat getAllOnesValue(unsigned BitWidth, bool isIEEE = false);
-
-  /// Returns the size of the floating point number (in bits) in the given
-  /// semantics.
-  static unsigned getSizeInBits(const fltSemantics &Sem);
-
   /// @}
 
   /// Used to insert APFloat objects, or objects that contain APFloat objects,
@@ -294,47 +242,47 @@ public:
   /// \name Arithmetic
   /// @{
 
-  opStatus add(const APFloat &, roundingMode);
-  opStatus subtract(const APFloat &, roundingMode);
-  opStatus multiply(const APFloat &, roundingMode);
-  opStatus divide(const APFloat &, roundingMode);
+  opStatus add(const IEEEFloat &, roundingMode);
+  opStatus subtract(const IEEEFloat &, roundingMode);
+  opStatus multiply(const IEEEFloat &, roundingMode);
+  opStatus divide(const IEEEFloat &, roundingMode);
   /// IEEE remainder.
-  opStatus remainder(const APFloat &);
+  opStatus remainder(const IEEEFloat &);
   /// C fmod, or llvm frem.
-  opStatus mod(const APFloat &);
-  opStatus fusedMultiplyAdd(const APFloat &, const APFloat &, roundingMode);
+  opStatus mod(const IEEEFloat &);
+  opStatus fusedMultiplyAdd(const IEEEFloat &, const IEEEFloat &, roundingMode);
   opStatus roundToIntegral(roundingMode);
   /// IEEE-754R 5.3.1: nextUp/nextDown.
   opStatus next(bool nextDown);
 
   /// \brief Operator+ overload which provides the default
   /// \c nmNearestTiesToEven rounding mode and *no* error checking.
-  APFloat operator+(const APFloat &RHS) const {
-    APFloat Result = *this;
+  IEEEFloat operator+(const IEEEFloat &RHS) const {
+    IEEEFloat Result = *this;
     Result.add(RHS, rmNearestTiesToEven);
     return Result;
   }
 
   /// \brief Operator- overload which provides the default
   /// \c nmNearestTiesToEven rounding mode and *no* error checking.
-  APFloat operator-(const APFloat &RHS) const {
-    APFloat Result = *this;
+  IEEEFloat operator-(const IEEEFloat &RHS) const {
+    IEEEFloat Result = *this;
     Result.subtract(RHS, rmNearestTiesToEven);
     return Result;
   }
 
   /// \brief Operator* overload which provides the default
   /// \c nmNearestTiesToEven rounding mode and *no* error checking.
-  APFloat operator*(const APFloat &RHS) const {
-    APFloat Result = *this;
+  IEEEFloat operator*(const IEEEFloat &RHS) const {
+    IEEEFloat Result = *this;
     Result.multiply(RHS, rmNearestTiesToEven);
     return Result;
   }
 
   /// \brief Operator/ overload which provides the default
   /// \c nmNearestTiesToEven rounding mode and *no* error checking.
-  APFloat operator/(const APFloat &RHS) const {
-    APFloat Result = *this;
+  IEEEFloat operator/(const IEEEFloat &RHS) const {
+    IEEEFloat Result = *this;
     Result.divide(RHS, rmNearestTiesToEven);
     return Result;
   }
@@ -346,11 +294,11 @@ public:
 
   void changeSign();
   void clearSign();
-  void copySign(const APFloat &);
+  void copySign(const IEEEFloat &);
 
   /// \brief A static helper to produce a copy of an APFloat value with its sign
   /// copied from some other APFloat.
-  static APFloat copySign(APFloat Value, const APFloat &Sign) {
+  static IEEEFloat copySign(IEEEFloat Value, const IEEEFloat &Sign) {
     Value.copySign(Sign);
     return Value;
   }
@@ -379,14 +327,14 @@ public:
   /// The definition of equality is not straightforward for floating point, so
   /// we won't use operator==.  Use one of the following, or write whatever it
   /// is you really mean.
-  bool operator==(const APFloat &) const = delete;
+  bool operator==(const IEEEFloat &) const = delete;
 
   /// IEEE comparison with another floating point number (NaNs compare
   /// unordered, 0==-0).
-  cmpResult compare(const APFloat &) const;
+  cmpResult compare(const IEEEFloat &) const;
 
   /// Bitwise comparison for equality (QNaNs compare equal, 0!=-0).
-  bool bitwiseIsEqual(const APFloat &) const;
+  bool bitwiseIsEqual(const IEEEFloat &) const;
 
   /// Write out a hexadecimal representation of the floating point value to DST,
   /// which must be of sufficient size, in the C99 form [-]0xh.hhhhp[+-]d.
@@ -456,8 +404,8 @@ public:
 
   /// @}
 
-  APFloat &operator=(const APFloat &);
-  APFloat &operator=(APFloat &&);
+  IEEEFloat &operator=(const IEEEFloat &);
+  IEEEFloat &operator=(IEEEFloat &&);
 
   /// \brief Overload to compute a hash code for an APFloat value.
   ///
@@ -468,7 +416,7 @@ public:
   /// emphasizes producing different codes for different inputs in order to
   /// be used in canonicalization and memoization. As such, equality is
   /// bitwiseIsEqual, and 0 != -0.
-  friend hash_code hash_value(const APFloat &Arg);
+  friend hash_code hash_value(const IEEEFloat &Arg);
 
   /// Converts this value into a decimal string.
   ///
@@ -495,14 +443,7 @@ public:
 
   /// If this value has an exact multiplicative inverse, store it in inv and
   /// return true.
-  bool getExactInverse(APFloat *inv) const;
-
-  /// \brief Enumeration of \c ilogb error results.
-  enum IlogbErrorKinds {
-    IEK_Zero = INT_MIN+1,
-    IEK_NaN = INT_MIN,
-    IEK_Inf = INT_MAX
-  };
+  bool getExactInverse(IEEEFloat *inv) const;
 
   /// \brief Returns the exponent of the internal representation of the APFloat.
   ///
@@ -513,15 +454,35 @@ public:
   ///   0   -> \c IEK_Zero
   ///   Inf -> \c IEK_Inf
   ///
-  friend int ilogb(const APFloat &Arg);
+  friend int ilogb(const IEEEFloat &Arg);
 
   /// \brief Returns: X * 2^Exp for integral exponents.
-  friend APFloat scalbn(APFloat X, int Exp, roundingMode);
+  friend IEEEFloat scalbn(IEEEFloat X, int Exp, roundingMode);
 
-  friend APFloat frexp(const APFloat &X, int &Exp, roundingMode);
+  friend IEEEFloat frexp(const IEEEFloat &X, int &Exp, roundingMode);
 
-private:
+  /// \name Special value setters.
+  /// @{
+
+  void makeLargest(bool Neg = false);
+  void makeSmallest(bool Neg = false);
+  void makeNaN(bool SNaN = false, bool Neg = false,
+               const APInt *fill = nullptr);
+  void makeInf(bool Neg = false);
+  void makeZero(bool Neg = false);
+  void makeQuiet();
 
+  /// Returns the smallest (by magnitude) normalized finite number in the given
+  /// semantics.
+  ///
+  /// \param Negative - True iff the number should be negative
+  void makeSmallestNormalized(bool Negative = false);
+
+  /// @}
+
+  cmpResult compareAbsoluteValue(const IEEEFloat &) const;
+
+private:
   /// \name Simple Queries
   /// @{
 
@@ -534,11 +495,11 @@ private:
   /// \name Significand operations.
   /// @{
 
-  integerPart addSignificand(const APFloat &);
-  integerPart subtractSignificand(const APFloat &, integerPart);
-  lostFraction addOrSubtractSignificand(const APFloat &, bool subtract);
-  lostFraction multiplySignificand(const APFloat &, const APFloat *);
-  lostFraction divideSignificand(const APFloat &);
+  integerPart addSignificand(const IEEEFloat &);
+  integerPart subtractSignificand(const IEEEFloat &, integerPart);
+  lostFraction addOrSubtractSignificand(const IEEEFloat &, bool subtract);
+  lostFraction multiplySignificand(const IEEEFloat &, const IEEEFloat *);
+  lostFraction divideSignificand(const IEEEFloat &);
   void incrementSignificand();
   void initialize(const fltSemantics *);
   void shiftSignificandLeft(unsigned int);
@@ -556,25 +517,10 @@ private:
   /// \name Arithmetic on special values.
   /// @{
 
-  opStatus addOrSubtractSpecials(const APFloat &, bool subtract);
-  opStatus divideSpecials(const APFloat &);
-  opStatus multiplySpecials(const APFloat &);
-  opStatus modSpecials(const APFloat &);
-
-  /// @}
-
-  /// \name Special value setters.
-  /// @{
-
-  void makeLargest(bool Neg = false);
-  void makeSmallest(bool Neg = false);
-  void makeNaN(bool SNaN = false, bool Neg = false,
-               const APInt *fill = nullptr);
-  static APFloat makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
-                         const APInt *fill);
-  void makeInf(bool Neg = false);
-  void makeZero(bool Neg = false);
-  void makeQuiet();
+  opStatus addOrSubtractSpecials(const IEEEFloat &, bool subtract);
+  opStatus divideSpecials(const IEEEFloat &);
+  opStatus multiplySpecials(const IEEEFloat &);
+  opStatus modSpecials(const IEEEFloat &);
 
   /// @}
 
@@ -583,8 +529,7 @@ private:
 
   bool convertFromStringSpecials(StringRef str);
   opStatus normalize(roundingMode, lostFraction);
-  opStatus addOrSubtract(const APFloat &, roundingMode, bool subtract);
-  cmpResult compareAbsoluteValue(const APFloat &) const;
+  opStatus addOrSubtract(const IEEEFloat &, roundingMode, bool subtract);
   opStatus handleOverflow(roundingMode);
   bool roundAwayFromZero(roundingMode, lostFraction, unsigned int) const;
   opStatus convertToSignExtendedInteger(integerPart *, unsigned int, bool,
@@ -614,10 +559,11 @@ private:
   void initFromF80LongDoubleAPInt(const APInt &api);
   void initFromPPCDoubleDoubleAPInt(const APInt &api);
 
-  void assign(const APFloat &);
-  void copySignificand(const APFloat &);
+  void assign(const IEEEFloat &);
+  void copySignificand(const IEEEFloat &);
   void freeSignificand();
 
+  /// Note: this must be the first data member.
   /// The semantics that this value obeys.
   const fltSemantics *semantics;
 
@@ -642,20 +588,513 @@ private:
   unsigned int sign : 1;
 };
 
+hash_code hash_value(const IEEEFloat &Arg);
+int ilogb(const IEEEFloat &Arg);
+IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode);
+IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM);
+
+// This mode implements more precise float in terms of two APFloats.
+// The interface and layout is designed for arbitray underlying semantics,
+// though currently only PPCDoubleDouble semantics are supported, whose
+// corresponding underlying semantics are IEEEdouble.
+class DoubleAPFloat final : public APFloatBase {
+  // Note: this must be the first data member.
+  const fltSemantics *Semantics;
+  std::unique_ptr<APFloat[]> Floats;
+
+  opStatus addImpl(const APFloat &a, const APFloat &aa, const APFloat &c,
+                   const APFloat &cc, roundingMode RM);
+
+  opStatus addWithSpecial(const DoubleAPFloat &LHS, const DoubleAPFloat &RHS,
+                          DoubleAPFloat &Out, roundingMode RM);
+
+public:
+  DoubleAPFloat(const fltSemantics &S);
+  DoubleAPFloat(const fltSemantics &S, uninitializedTag);
+  DoubleAPFloat(const fltSemantics &S, integerPart);
+  DoubleAPFloat(const fltSemantics &S, const APInt &I);
+  DoubleAPFloat(const fltSemantics &S, APFloat &&First, APFloat &&Second);
+  DoubleAPFloat(const DoubleAPFloat &RHS);
+  DoubleAPFloat(DoubleAPFloat &&RHS);
+
+  DoubleAPFloat &operator=(const DoubleAPFloat &RHS);
+
+  DoubleAPFloat &operator=(DoubleAPFloat &&RHS) {
+    if (this != &RHS) {
+      this->~DoubleAPFloat();
+      new (this) DoubleAPFloat(std::move(RHS));
+    }
+    return *this;
+  }
+
+  bool needsCleanup() const { return Floats != nullptr; }
+
+  APFloat &getFirst() { return Floats[0]; }
+  const APFloat &getFirst() const { return Floats[0]; }
+  APFloat &getSecond() { return Floats[1]; }
+  const APFloat &getSecond() const { return Floats[1]; }
+
+  opStatus add(const DoubleAPFloat &RHS, roundingMode RM);
+  opStatus subtract(const DoubleAPFloat &RHS, roundingMode RM);
+  void changeSign();
+  cmpResult compareAbsoluteValue(const DoubleAPFloat &RHS) const;
+
+  fltCategory getCategory() const;
+  bool isNegative() const;
+
+  void makeInf(bool Neg);
+  void makeNaN(bool SNaN, bool Neg, const APInt *fill);
+};
+
+} // End detail namespace
+
+// This is a interface class that is currently forwarding functionalities from
+// detail::IEEEFloat.
+class APFloat : public APFloatBase {
+  typedef detail::IEEEFloat IEEEFloat;
+  typedef detail::DoubleAPFloat DoubleAPFloat;
+
+  static_assert(std::is_standard_layout<IEEEFloat>::value, "");
+
+  union Storage {
+    const fltSemantics *semantics;
+    IEEEFloat IEEE;
+    DoubleAPFloat Double;
+
+    explicit Storage(IEEEFloat F, const fltSemantics &S);
+    explicit Storage(DoubleAPFloat F, const fltSemantics &S)
+        : Double(std::move(F)) {
+      assert(&S == &PPCDoubleDouble());
+    }
+
+    template <typename... ArgTypes>
+    Storage(const fltSemantics &Semantics, ArgTypes &&... Args) {
+      if (usesLayout<IEEEFloat>(Semantics)) {
+        new (&IEEE) IEEEFloat(Semantics, std::forward<ArgTypes>(Args)...);
+        return;
+      }
+      if (usesLayout<DoubleAPFloat>(Semantics)) {
+        new (&Double) DoubleAPFloat(Semantics, std::forward<ArgTypes>(Args)...);
+        return;
+      }
+      llvm_unreachable("Unexpected semantics");
+    }
+
+    ~Storage() {
+      if (usesLayout<IEEEFloat>(*semantics)) {
+        IEEE.~IEEEFloat();
+        return;
+      }
+      if (usesLayout<DoubleAPFloat>(*semantics)) {
+        Double.~DoubleAPFloat();
+        return;
+      }
+      llvm_unreachable("Unexpected semantics");
+    }
+
+    Storage(const Storage &RHS) {
+      if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+        new (this) IEEEFloat(RHS.IEEE);
+        return;
+      }
+      if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+        new (this) DoubleAPFloat(RHS.Double);
+        return;
+      }
+      llvm_unreachable("Unexpected semantics");
+    }
+
+    Storage(Storage &&RHS) {
+      if (usesLayout<IEEEFloat>(*RHS.semantics)) {
+        new (this) IEEEFloat(std::move(RHS.IEEE));
+        return;
+      }
+      if (usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+        new (this) DoubleAPFloat(std::move(RHS.Double));
+        return;
+      }
+      llvm_unreachable("Unexpected semantics");
+    }
+
+    Storage &operator=(const Storage &RHS) {
+      if (usesLayout<IEEEFloat>(*semantics) &&
+          usesLayout<IEEEFloat>(*RHS.semantics)) {
+        IEEE = RHS.IEEE;
+      } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+                 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+        Double = RHS.Double;
+      } else if (this != &RHS) {
+        this->~Storage();
+        new (this) Storage(RHS);
+      }
+      return *this;
+    }
+
+    Storage &operator=(Storage &&RHS) {
+      if (usesLayout<IEEEFloat>(*semantics) &&
+          usesLayout<IEEEFloat>(*RHS.semantics)) {
+        IEEE = std::move(RHS.IEEE);
+      } else if (usesLayout<DoubleAPFloat>(*semantics) &&
+                 usesLayout<DoubleAPFloat>(*RHS.semantics)) {
+        Double = std::move(RHS.Double);
+      } else if (this != &RHS) {
+        this->~Storage();
+        new (this) Storage(std::move(RHS));
+      }
+      return *this;
+    }
+  } U;
+
+  template <typename T> static bool usesLayout(const fltSemantics &Semantics) {
+    static_assert(std::is_same<T, IEEEFloat>::value ||
+                  std::is_same<T, DoubleAPFloat>::value, "");
+    if (std::is_same<T, DoubleAPFloat>::value) {
+      return &Semantics == &PPCDoubleDouble();
+    }
+    return &Semantics != &PPCDoubleDouble();
+  }
+
+  IEEEFloat &getIEEE() {
+    if (usesLayout<IEEEFloat>(*U.semantics))
+      return U.IEEE;
+    if (usesLayout<DoubleAPFloat>(*U.semantics))
+      return U.Double.getFirst().U.IEEE;
+    llvm_unreachable("Unexpected semantics");
+  }
+
+  const IEEEFloat &getIEEE() const {
+    if (usesLayout<IEEEFloat>(*U.semantics))
+      return U.IEEE;
+    if (usesLayout<DoubleAPFloat>(*U.semantics))
+      return U.Double.getFirst().U.IEEE;
+    llvm_unreachable("Unexpected semantics");
+  }
+
+  void makeZero(bool Neg) { getIEEE().makeZero(Neg); }
+
+  void makeInf(bool Neg) {
+    if (usesLayout<IEEEFloat>(*U.semantics))
+      return U.IEEE.makeInf(Neg);
+    if (usesLayout<DoubleAPFloat>(*U.semantics))
+      return U.Double.makeInf(Neg);
+    llvm_unreachable("Unexpected semantics");
+  }
+
+  void makeNaN(bool SNaN, bool Neg, const APInt *fill) {
+    getIEEE().makeNaN(SNaN, Neg, fill);
+  }
+
+  void makeLargest(bool Neg) { getIEEE().makeLargest(Neg); }
+
+  void makeSmallest(bool Neg) { getIEEE().makeSmallest(Neg); }
+
+  void makeSmallestNormalized(bool Neg) {
+    getIEEE().makeSmallestNormalized(Neg);
+  }
+
+  // FIXME: This is due to clang 3.3 (or older version) always checks for the
+  // default constructor in an array aggregate initialization, even if no
+  // elements in the array is default initialized.
+  APFloat() : U(IEEEdouble()) {
+    llvm_unreachable("This is a workaround for old clang.");
+  }
+
+  explicit APFloat(IEEEFloat F, const fltSemantics &S) : U(std::move(F), S) {}
+  explicit APFloat(DoubleAPFloat F, const fltSemantics &S)
+      : U(std::move(F), S) {}
+
+  cmpResult compareAbsoluteValue(const APFloat &RHS) const {
+    assert(&getSemantics() == &RHS.getSemantics());
+    if (usesLayout<IEEEFloat>(getSemantics()))
+      return U.IEEE.compareAbsoluteValue(RHS.U.IEEE);
+    if (usesLayout<DoubleAPFloat>(getSemantics()))
+      return U.Double.compareAbsoluteValue(RHS.U.Double);
+    llvm_unreachable("Unexpected semantics");
+  }
+
+public:
+  APFloat(const fltSemantics &Semantics) : U(Semantics) {}
+  APFloat(const fltSemantics &Semantics, StringRef S);
+  APFloat(const fltSemantics &Semantics, integerPart I) : U(Semantics, I) {}
+  // TODO: Remove this constructor. This isn't faster than the first one.
+  APFloat(const fltSemantics &Semantics, uninitializedTag)
+      : U(Semantics, uninitialized) {}
+  APFloat(const fltSemantics &Semantics, const APInt &I) : U(Semantics, I) {}
+  explicit APFloat(double d) : U(IEEEFloat(d), IEEEdouble()) {}
+  explicit APFloat(float f) : U(IEEEFloat(f), IEEEsingle()) {}
+  APFloat(const APFloat &RHS) = default;
+  APFloat(APFloat &&RHS) = default;
+
+  ~APFloat() = default;
+
+  bool needsCleanup() const {
+    if (usesLayout<IEEEFloat>(getSemantics()))
+      return U.IEEE.needsCleanup();
+    if (usesLayout<DoubleAPFloat>(getSemantics()))
+      return U.Double.needsCleanup();
+    llvm_unreachable("Unexpected semantics");
+  }
+
+  /// Factory for Positive and Negative Zero.
+  ///
+  /// \param Negative True iff the number should be negative.
+  static APFloat getZero(const fltSemantics &Sem, bool Negative = false) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeZero(Negative);
+    return Val;
+  }
+
+  /// Factory for Positive and Negative Infinity.
+  ///
+  /// \param Negative True iff the number should be negative.
+  static APFloat getInf(const fltSemantics &Sem, bool Negative = false) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeInf(Negative);
+    return Val;
+  }
+
+  /// Factory for NaN values.
+  ///
+  /// \param Negative - True iff the NaN generated should be negative.
+  /// \param type - The unspecified fill bits for creating the NaN, 0 by
+  /// default.  The value is truncated as necessary.
+  static APFloat getNaN(const fltSemantics &Sem, bool Negative = false,
+                        unsigned type = 0) {
+    if (type) {
+      APInt fill(64, type);
+      return getQNaN(Sem, Negative, &fill);
+    } else {
+      return getQNaN(Sem, Negative, nullptr);
+    }
+  }
+
+  /// Factory for QNaN values.
+  static APFloat getQNaN(const fltSemantics &Sem, bool Negative = false,
+                         const APInt *payload = nullptr) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeNaN(false, Negative, payload);
+    return Val;
+  }
+
+  /// Factory for SNaN values.
+  static APFloat getSNaN(const fltSemantics &Sem, bool Negative = false,
+                         const APInt *payload = nullptr) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeNaN(true, Negative, payload);
+    return Val;
+  }
+
+  /// Returns the largest finite number in the given semantics.
+  ///
+  /// \param Negative - True iff the number should be negative
+  static APFloat getLargest(const fltSemantics &Sem, bool Negative = false) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeLargest(Negative);
+    return Val;
+  }
+
+  /// Returns the smallest (by magnitude) finite number in the given semantics.
+  /// Might be denormalized, which implies a relative loss of precision.
+  ///
+  /// \param Negative - True iff the number should be negative
+  static APFloat getSmallest(const fltSemantics &Sem, bool Negative = false) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeSmallest(Negative);
+    return Val;
+  }
+
+  /// Returns the smallest (by magnitude) normalized finite number in the given
+  /// semantics.
+  ///
+  /// \param Negative - True iff the number should be negative
+  static APFloat getSmallestNormalized(const fltSemantics &Sem,
+                                       bool Negative = false) {
+    APFloat Val(Sem, uninitialized);
+    Val.makeSmallestNormalized(Negative);
+    return Val;
+  }
+
+  /// Returns a float which is bitcasted from an all one value int.
+  ///
+  /// \param BitWidth - Select float type
+  /// \param isIEEE   - If 128 bit number, select between PPC and IEEE
+  static APFloat getAllOnesValue(unsigned BitWidth, bool isIEEE = false);
+
+  void Profile(FoldingSetNodeID &NID) const { getIEEE().Profile(NID); }
+
+  opStatus add(const APFloat &RHS, roundingMode RM) {
+    if (usesLayout<IEEEFloat>(getSemantics()))
+      return U.IEEE.add(RHS.U.IEEE, RM);
+    if (usesLayout<DoubleAPFloat>(getSemantics()))
+      return U.Double.add(RHS.U.Double, RM);
+    llvm_unreachable("Unexpected semantics");
+  }
+  opStatus subtract(const APFloat &RHS, roundingMode RM) {
+    if (usesLayout<IEEEFloat>(getSemantics()))
+      return U.IEEE.subtract(RHS.U.IEEE, RM);
+    if (usesLayout<DoubleAPFloat>(getSemantics()))
+      return U.Double.subtract(RHS.U.Double, RM);
+    llvm_unreachable("Unexpected semantics");
+  }
+  opStatus multiply(const APFloat &RHS, roundingMode RM) {
+    return getIEEE().multiply(RHS.getIEEE(), RM);
+  }
+  opStatus divide(const APFloat &RHS, roundingMode RM) {
+    return getIEEE().divide(RHS.getIEEE(), RM);
+  }
+  opStatus remainder(const APFloat &RHS) {
+    return getIEEE().remainder(RHS.getIEEE());
+  }
+  opStatus mod(const APFloat &RHS) { return getIEEE().mod(RHS.getIEEE()); }
+  opStatus fusedMultiplyAdd(const APFloat &Multiplicand, const APFloat &Addend,
+                            roundingMode RM) {
+    return getIEEE().fusedMultiplyAdd(Multiplicand.getIEEE(), Addend.getIEEE(),
+                                      RM);
+  }
+  opStatus roundToIntegral(roundingMode RM) {
+    return getIEEE().roundToIntegral(RM);
+  }
+  opStatus next(bool nextDown) { return getIEEE().next(nextDown); }
+
+  APFloat operator+(const APFloat &RHS) const {
+    return APFloat(getIEEE() + RHS.getIEEE(), getSemantics());
+  }
+
+  APFloat operator-(const APFloat &RHS) const {
+    return APFloat(getIEEE() - RHS.getIEEE(), getSemantics());
+  }
+
+  APFloat operator*(const APFloat &RHS) const {
+    return APFloat(getIEEE() * RHS.getIEEE(), getSemantics());
+  }
+
+  APFloat operator/(const APFloat &RHS) const {
+    return APFloat(getIEEE() / RHS.getIEEE(), getSemantics());
+  }
+
+  void changeSign() { getIEEE().changeSign(); }
+  void clearSign() { getIEEE().clearSign(); }
+  void copySign(const APFloat &RHS) { getIEEE().copySign(RHS.getIEEE()); }
+
+  static APFloat copySign(APFloat Value, const APFloat &Sign) {
+    return APFloat(IEEEFloat::copySign(Value.getIEEE(), Sign.getIEEE()),
+                   Value.getSemantics());
+  }
+
+  opStatus convert(const fltSemantics &ToSemantics, roundingMode RM,
+                   bool *losesInfo);
+  opStatus convertToInteger(integerPart *Input, unsigned int Width,
+                            bool IsSigned, roundingMode RM,
+                            bool *IsExact) const {
+    return getIEEE().convertToInteger(Input, Width, IsSigned, RM, IsExact);
+  }
+  opStatus convertToInteger(APSInt &Result, roundingMode RM,
+                            bool *IsExact) const {
+    return getIEEE().convertToInteger(Result, RM, IsExact);
+  }
+  opStatus convertFromAPInt(const APInt &Input, bool IsSigned,
+                            roundingMode RM) {
+    return getIEEE().convertFromAPInt(Input, IsSigned, RM);
+  }
+  opStatus convertFromSignExtendedInteger(const integerPart *Input,
+                                          unsigned int InputSize, bool IsSigned,
+                                          roundingMode RM) {
+    return getIEEE().convertFromSignExtendedInteger(Input, InputSize, IsSigned,
+                                                    RM);
+  }
+  opStatus convertFromZeroExtendedInteger(const integerPart *Input,
+                                          unsigned int InputSize, bool IsSigned,
+                                          roundingMode RM) {
+    return getIEEE().convertFromZeroExtendedInteger(Input, InputSize, IsSigned,
+                                                    RM);
+  }
+  opStatus convertFromString(StringRef, roundingMode);
+  APInt bitcastToAPInt() const { return getIEEE().bitcastToAPInt(); }
+  double convertToDouble() const { return getIEEE().convertToDouble(); }
+  float convertToFloat() const { return getIEEE().convertToFloat(); }
+
+  bool operator==(const APFloat &) const = delete;
+
+  cmpResult compare(const APFloat &RHS) const {
+    return getIEEE().compare(RHS.getIEEE());
+  }
+
+  bool bitwiseIsEqual(const APFloat &RHS) const {
+    return getIEEE().bitwiseIsEqual(RHS.getIEEE());
+  }
+
+  unsigned int convertToHexString(char *DST, unsigned int HexDigits,
+                                  bool UpperCase, roundingMode RM) const {
+    return getIEEE().convertToHexString(DST, HexDigits, UpperCase, RM);
+  }
+
+  bool isZero() const { return getCategory() == fcZero; }
+  bool isInfinity() const { return getCategory() == fcInfinity; }
+  bool isNaN() const { return getCategory() == fcNaN; }
+
+  bool isNegative() const { return getIEEE().isNegative(); }
+  bool isDenormal() const { return getIEEE().isDenormal(); }
+  bool isSignaling() const { return getIEEE().isSignaling(); }
+
+  bool isNormal() const { return !isDenormal() && isFiniteNonZero(); }
+  bool isFinite() const { return !isNaN() && !isInfinity(); }
+
+  fltCategory getCategory() const { return getIEEE().getCategory(); }
+  const fltSemantics &getSemantics() const { return *U.semantics; }
+  bool isNonZero() const { return !isZero(); }
+  bool isFiniteNonZero() const { return isFinite() && !isZero(); }
+  bool isPosZero() const { return isZero() && !isNegative(); }
+  bool isNegZero() const { return isZero() && isNegative(); }
+  bool isSmallest() const { return getIEEE().isSmallest(); }
+  bool isLargest() const { return getIEEE().isLargest(); }
+  bool isInteger() const { return getIEEE().isInteger(); }
+
+  APFloat &operator=(const APFloat &RHS) = default;
+  APFloat &operator=(APFloat &&RHS) = default;
+
+  void toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision = 0,
+                unsigned FormatMaxPadding = 3) const {
+    return getIEEE().toString(Str, FormatPrecision, FormatMaxPadding);
+  }
+
+  void print(raw_ostream &) const;
+  void dump() const;
+
+  bool getExactInverse(APFloat *inv) const {
+    return getIEEE().getExactInverse(inv ? &inv->getIEEE() : nullptr);
+  }
+
+  // This is for internal test only.
+  // TODO: Remove it after the PPCDoubleDouble transition.
+  const APFloat &getSecondFloat() const {
+    assert(&getSemantics() == &PPCDoubleDouble());
+    return U.Double.getSecond();
+  }
+
+  friend hash_code hash_value(const APFloat &Arg);
+  friend int ilogb(const APFloat &Arg) { return ilogb(Arg.getIEEE()); }
+  friend APFloat scalbn(APFloat X, int Exp, roundingMode RM);
+  friend APFloat frexp(const APFloat &X, int &Exp, roundingMode RM);
+  friend IEEEFloat;
+  friend DoubleAPFloat;
+};
+
 /// See friend declarations above.
 ///
 /// These additional declarations are required in order to compile LLVM with IBM
 /// xlC compiler.
 hash_code hash_value(const APFloat &Arg);
-int ilogb(const APFloat &Arg);
-APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode);
+inline APFloat scalbn(APFloat X, int Exp, APFloat::roundingMode RM) {
+  return APFloat(scalbn(X.getIEEE(), Exp, RM), X.getSemantics());
+}
 
 /// \brief Equivalent of C standard library function.
 ///
 /// While the C standard says Exp is an unspecified value for infinity and nan,
 /// this returns INT_MAX for infinities, and INT_MIN for NaNs.
-APFloat frexp(const APFloat &Val, int &Exp, APFloat::roundingMode RM);
-
+inline APFloat frexp(const APFloat &X, int &Exp, APFloat::roundingMode RM) {
+  return APFloat(frexp(X.getIEEE(), Exp, RM), X.getSemantics());
+}
 /// \brief Returns the absolute value of the argument.
 inline APFloat abs(APFloat X) {
   X.clearSign();
diff --git a/contrib/llvm/include/llvm/ADT/APInt.h b/contrib/llvm/include/llvm/ADT/APInt.h
index d77d1c7ca93f..2c0713da256c 100644
--- a/contrib/llvm/include/llvm/ADT/APInt.h
+++ b/contrib/llvm/include/llvm/ADT/APInt.h
@@ -40,6 +40,10 @@ const unsigned int host_char_bit = 8;
 const unsigned int integerPartWidth =
     host_char_bit * static_cast<unsigned int>(sizeof(integerPart));
 
+class APInt;
+
+inline APInt operator-(APInt);
+
 //===----------------------------------------------------------------------===//
 //                              APInt Class
 //===----------------------------------------------------------------------===//
@@ -70,7 +74,7 @@ const unsigned int integerPartWidth =
 ///   * In general, the class tries to follow the style of computation that LLVM
 ///     uses in its IR. This simplifies its use for LLVM.
 ///
-class APInt {
+class LLVM_NODISCARD APInt {
   unsigned BitWidth; ///< The number of bits in this APInt.
 
   /// This union is used to store the integer value. When the
@@ -620,18 +624,6 @@ public:
     return Result;
   }
 
-  /// \brief Unary negation operator
-  ///
-  /// Negates *this using two's complement logic.
-  ///
-  /// \returns An APInt value representing the negation of *this.
-  APInt operator-() const {
-    APInt Result(*this);
-    Result.flipAllBits();
-    ++Result;
-    return Result;
-  }
-
   /// \brief Logical negation operator.
   ///
   /// Performs logical negation operation on this APInt.
@@ -750,6 +742,7 @@ public:
   ///
   /// \returns *this
   APInt &operator+=(const APInt &RHS);
+  APInt &operator+=(uint64_t RHS);
 
   /// \brief Subtraction assignment operator.
   ///
@@ -757,6 +750,7 @@ public:
   ///
   /// \returns *this
   APInt &operator-=(const APInt &RHS);
+  APInt &operator-=(uint64_t RHS);
 
   /// \brief Left-shift assignment function.
   ///
@@ -783,9 +777,7 @@ public:
       return APInt(getBitWidth(), VAL & RHS.VAL);
     return AndSlowCase(RHS);
   }
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT And(const APInt &RHS) const {
-    return this->operator&(RHS);
-  }
+  APInt And(const APInt &RHS) const { return this->operator&(RHS); }
 
   /// \brief Bitwise OR operator.
   ///
@@ -805,9 +797,7 @@ public:
   /// calling operator|.
   ///
   /// \returns An APInt value representing the bitwise OR of *this and RHS.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT Or(const APInt &RHS) const {
-    return this->operator|(RHS);
-  }
+  APInt Or(const APInt &RHS) const { return this->operator|(RHS); }
 
   /// \brief Bitwise XOR operator.
   ///
@@ -827,27 +817,13 @@ public:
   /// through the usage of operator^.
   ///
   /// \returns An APInt value representing the bitwise XOR of *this and RHS.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT Xor(const APInt &RHS) const {
-    return this->operator^(RHS);
-  }
+  APInt Xor(const APInt &RHS) const { return this->operator^(RHS); }
 
   /// \brief Multiplication operator.
   ///
   /// Multiplies this APInt by RHS and returns the result.
   APInt operator*(const APInt &RHS) const;
 
-  /// \brief Addition operator.
-  ///
-  /// Adds RHS to this APInt and returns the result.
-  APInt operator+(const APInt &RHS) const;
-  APInt operator+(uint64_t RHS) const;
-
-  /// \brief Subtraction operator.
-  ///
-  /// Subtracts RHS from this APInt and returns the result.
-  APInt operator-(const APInt &RHS) const;
-  APInt operator-(uint64_t RHS) const;
-
   /// \brief Left logical shift operator.
   ///
   /// Shifts this APInt left by \p Bits and returns the result.
@@ -861,17 +837,17 @@ public:
   /// \brief Arithmetic right-shift function.
   ///
   /// Arithmetic right-shift this APInt by shiftAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT ashr(unsigned shiftAmt) const;
+  APInt ashr(unsigned shiftAmt) const;
 
   /// \brief Logical right-shift function.
   ///
   /// Logical right-shift this APInt by shiftAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT lshr(unsigned shiftAmt) const;
+  APInt lshr(unsigned shiftAmt) const;
 
   /// \brief Left-shift function.
   ///
   /// Left-shift this APInt by shiftAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(unsigned shiftAmt) const {
+  APInt shl(unsigned shiftAmt) const {
     assert(shiftAmt <= BitWidth && "Invalid shift amount");
     if (isSingleWord()) {
       if (shiftAmt >= BitWidth)
@@ -882,31 +858,31 @@ public:
   }
 
   /// \brief Rotate left by rotateAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT rotl(unsigned rotateAmt) const;
+  APInt rotl(unsigned rotateAmt) const;
 
   /// \brief Rotate right by rotateAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT rotr(unsigned rotateAmt) const;
+  APInt rotr(unsigned rotateAmt) const;
 
   /// \brief Arithmetic right-shift function.
   ///
   /// Arithmetic right-shift this APInt by shiftAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT ashr(const APInt &shiftAmt) const;
+  APInt ashr(const APInt &shiftAmt) const;
 
   /// \brief Logical right-shift function.
   ///
   /// Logical right-shift this APInt by shiftAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT lshr(const APInt &shiftAmt) const;
+  APInt lshr(const APInt &shiftAmt) const;
 
   /// \brief Left-shift function.
   ///
   /// Left-shift this APInt by shiftAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT shl(const APInt &shiftAmt) const;
+  APInt shl(const APInt &shiftAmt) const;
 
   /// \brief Rotate left by rotateAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT rotl(const APInt &rotateAmt) const;
+  APInt rotl(const APInt &rotateAmt) const;
 
   /// \brief Rotate right by rotateAmt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT rotr(const APInt &rotateAmt) const;
+  APInt rotr(const APInt &rotateAmt) const;
 
   /// \brief Unsigned division operation.
   ///
@@ -914,12 +890,12 @@ public:
   /// RHS are treated as unsigned quantities for purposes of this division.
   ///
   /// \returns a new APInt value containing the division result
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT udiv(const APInt &RHS) const;
+  APInt udiv(const APInt &RHS) const;
 
   /// \brief Signed division function for APInt.
   ///
   /// Signed divide this APInt by APInt RHS.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT sdiv(const APInt &RHS) const;
+  APInt sdiv(const APInt &RHS) const;
 
   /// \brief Unsigned remainder operation.
   ///
@@ -930,12 +906,12 @@ public:
   /// is *this.
   ///
   /// \returns a new APInt value containing the remainder result
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT urem(const APInt &RHS) const;
+  APInt urem(const APInt &RHS) const;
 
   /// \brief Function for signed remainder operation.
   ///
   /// Signed remainder operation on APInt.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT srem(const APInt &RHS) const;
+  APInt srem(const APInt &RHS) const;
 
   /// \brief Dual division/remainder interface.
   ///
@@ -1178,7 +1154,7 @@ public:
   ///
   /// Truncate the APInt to a specified width. It is an error to specify a width
   /// that is greater than or equal to the current width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT trunc(unsigned width) const;
+  APInt trunc(unsigned width) const;
 
   /// \brief Sign extend to a new width.
   ///
@@ -1186,38 +1162,38 @@ public:
   /// bit is set, the fill on the left will be done with 1 bits, otherwise zero.
   /// It is an error to specify a width that is less than or equal to the
   /// current width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT sext(unsigned width) const;
+  APInt sext(unsigned width) const;
 
   /// \brief Zero extend to a new width.
   ///
   /// This operation zero extends the APInt to a new width. The high order bits
   /// are filled with 0 bits.  It is an error to specify a width that is less
   /// than or equal to the current width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT zext(unsigned width) const;
+  APInt zext(unsigned width) const;
 
   /// \brief Sign extend or truncate to width
   ///
   /// Make this APInt have the bit width given by \p width. The value is sign
   /// extended, truncated, or left alone to make it that width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT sextOrTrunc(unsigned width) const;
+  APInt sextOrTrunc(unsigned width) const;
 
   /// \brief Zero extend or truncate to width
   ///
   /// Make this APInt have the bit width given by \p width. The value is zero
   /// extended, truncated, or left alone to make it that width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT zextOrTrunc(unsigned width) const;
+  APInt zextOrTrunc(unsigned width) const;
 
   /// \brief Sign extend or truncate to width
   ///
   /// Make this APInt have the bit width given by \p width. The value is sign
   /// extended, or left alone to make it that width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT sextOrSelf(unsigned width) const;
+  APInt sextOrSelf(unsigned width) const;
 
   /// \brief Zero extend or truncate to width
   ///
   /// Make this APInt have the bit width given by \p width. The value is zero
   /// extended, or left alone to make it that width.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT zextOrSelf(unsigned width) const;
+  APInt zextOrSelf(unsigned width) const;
 
   /// @}
   /// \name Bit Manipulation Operators
@@ -1454,11 +1430,11 @@ public:
   std::string toString(unsigned Radix, bool Signed) const;
 
   /// \returns a byte-swapped representation of this APInt Value.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT byteSwap() const;
+  APInt byteSwap() const;
 
   /// \returns the value with the bit representation reversed of this APInt
   /// Value.
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT reverseBits() const;
+  APInt reverseBits() const;
 
   /// \brief Converts this APInt to a double value.
   double roundToDouble(bool isSigned) const;
@@ -1501,7 +1477,7 @@ public:
   ///
   /// The conversion does not do a translation from double to integer, it just
   /// re-interprets the bits of the double.
-  static APInt LLVM_ATTRIBUTE_UNUSED_RESULT doubleToBits(double V) {
+  static APInt doubleToBits(double V) {
     union {
       uint64_t I;
       double D;
@@ -1514,7 +1490,7 @@ public:
   ///
   /// The conversion does not do a translation from float to integer, it just
   /// re-interprets the bits of the float.
-  static APInt LLVM_ATTRIBUTE_UNUSED_RESULT floatToBits(float V) {
+  static APInt floatToBits(float V) {
     union {
       unsigned I;
       float F;
@@ -1532,7 +1508,9 @@ public:
 
   /// \returns the ceil log base 2 of this APInt.
   unsigned ceilLogBase2() const {
-    return BitWidth - (*this - 1).countLeadingZeros();
+    APInt temp(*this);
+    --temp;
+    return BitWidth - temp.countLeadingZeros();
   }
 
   /// \returns the nearest log base 2 of this APInt. Ties round up.
@@ -1573,12 +1551,12 @@ public:
   }
 
   /// \brief Compute the square root
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT sqrt() const;
+  APInt sqrt() const;
 
   /// \brief Get the absolute value;
   ///
   /// If *this is < 0 then return -(*this), otherwise *this;
-  APInt LLVM_ATTRIBUTE_UNUSED_RESULT abs() const {
+  APInt abs() const {
     if (isNegative())
       return -(*this);
     return *this;
@@ -1750,6 +1728,55 @@ inline raw_ostream &operator<<(raw_ostream &OS, const APInt &I) {
   return OS;
 }
 
+inline APInt operator-(APInt v) {
+  v.flipAllBits();
+  ++v;
+  return v;
+}
+
+inline APInt operator+(APInt a, const APInt &b) {
+  a += b;
+  return a;
+}
+
+inline APInt operator+(const APInt &a, APInt &&b) {
+  b += a;
+  return std::move(b);
+}
+
+inline APInt operator+(APInt a, uint64_t RHS) {
+  a += RHS;
+  return a;
+}
+
+inline APInt operator+(uint64_t LHS, APInt b) {
+  b += LHS;
+  return b;
+}
+
+inline APInt operator-(APInt a, const APInt &b) {
+  a -= b;
+  return a;
+}
+
+inline APInt operator-(const APInt &a, APInt &&b) {
+  b = -std::move(b);
+  b += a;
+  return std::move(b);
+}
+
+inline APInt operator-(APInt a, uint64_t RHS) {
+  a -= RHS;
+  return a;
+}
+
+inline APInt operator-(uint64_t LHS, APInt b) {
+  b = -std::move(b);
+  b += LHS;
+  return b;
+}
+
+
 namespace APIntOps {
 
 /// \brief Determine the smaller of two APInts considered to be signed.
diff --git a/contrib/llvm/include/llvm/ADT/APSInt.h b/contrib/llvm/include/llvm/ADT/APSInt.h
index a6552d0a2f36..813b3686d6b1 100644
--- a/contrib/llvm/include/llvm/ADT/APSInt.h
+++ b/contrib/llvm/include/llvm/ADT/APSInt.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 
-class APSInt : public APInt {
+class LLVM_NODISCARD APSInt : public APInt {
   bool IsUnsigned;
 
 public:
@@ -78,22 +78,22 @@ public:
     return isSigned() ? getSExtValue() : getZExtValue();
   }
 
-  APSInt LLVM_ATTRIBUTE_UNUSED_RESULT trunc(uint32_t width) const {
+  APSInt trunc(uint32_t width) const {
     return APSInt(APInt::trunc(width), IsUnsigned);
   }
 
-  APSInt LLVM_ATTRIBUTE_UNUSED_RESULT extend(uint32_t width) const {
+  APSInt extend(uint32_t width) const {
     if (IsUnsigned)
       return APSInt(zext(width), IsUnsigned);
     else
       return APSInt(sext(width), IsUnsigned);
   }
 
-  APSInt LLVM_ATTRIBUTE_UNUSED_RESULT extOrTrunc(uint32_t width) const {
-      if (IsUnsigned)
-        return APSInt(zextOrTrunc(width), IsUnsigned);
-      else
-        return APSInt(sextOrTrunc(width), IsUnsigned);
+  APSInt extOrTrunc(uint32_t width) const {
+    if (IsUnsigned)
+      return APSInt(zextOrTrunc(width), IsUnsigned);
+    else
+      return APSInt(sextOrTrunc(width), IsUnsigned);
   }
 
   const APSInt &operator%=(const APSInt &RHS) {
@@ -235,25 +235,19 @@ public:
     assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
     return APSInt(static_cast<const APInt&>(*this) & RHS, IsUnsigned);
   }
-  APSInt LLVM_ATTRIBUTE_UNUSED_RESULT And(const APSInt& RHS) const {
-    return this->operator&(RHS);
-  }
+  APSInt And(const APSInt &RHS) const { return this->operator&(RHS); }
 
   APSInt operator|(const APSInt& RHS) const {
     assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
     return APSInt(static_cast<const APInt&>(*this) | RHS, IsUnsigned);
   }
-  APSInt LLVM_ATTRIBUTE_UNUSED_RESULT Or(const APSInt& RHS) const {
-    return this->operator|(RHS);
-  }
+  APSInt Or(const APSInt &RHS) const { return this->operator|(RHS); }
 
   APSInt operator^(const APSInt &RHS) const {
     assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
     return APSInt(static_cast<const APInt&>(*this) ^ RHS, IsUnsigned);
   }
-  APSInt LLVM_ATTRIBUTE_UNUSED_RESULT Xor(const APSInt& RHS) const {
-    return this->operator^(RHS);
-  }
+  APSInt Xor(const APSInt &RHS) const { return this->operator^(RHS); }
 
   APSInt operator*(const APSInt& RHS) const {
     assert(IsUnsigned == RHS.IsUnsigned && "Signedness mismatch!");
diff --git a/contrib/llvm/include/llvm/ADT/AllocatorList.h b/contrib/llvm/include/llvm/ADT/AllocatorList.h
new file mode 100644
index 000000000000..05a549f96ec7
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/AllocatorList.h
@@ -0,0 +1,226 @@
+//===- llvm/ADT/AllocatorList.h - Custom allocator list ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ALLOCATORLIST_H
+#define LLVM_ADT_ALLOCATORLIST_H
+
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/simple_ilist.h"
+#include "llvm/Support/Allocator.h"
+#include <type_traits>
+
+namespace llvm {
+
+/// A linked-list with a custom, local allocator.
+///
+/// Expose a std::list-like interface that owns and uses a custom LLVM-style
+/// allocator (e.g., BumpPtrAllocator), leveraging \a simple_ilist for the
+/// implementation details.
+///
+/// Because this list owns the allocator, calling \a splice() with a different
+/// list isn't generally safe.  As such, \a splice has been left out of the
+/// interface entirely.
+template <class T, class AllocatorT> class AllocatorList : AllocatorT {
+  struct Node : ilist_node<Node> {
+    Node(Node &&) = delete;
+    Node(const Node &) = delete;
+    Node &operator=(Node &&) = delete;
+    Node &operator=(const Node &) = delete;
+
+    Node(T &&V) : V(std::move(V)) {}
+    Node(const T &V) : V(V) {}
+    template <class... Ts> Node(Ts &&... Vs) : V(std::forward<Ts>(Vs)...) {}
+    T V;
+  };
+
+  typedef simple_ilist<Node> list_type;
+  list_type List;
+
+  AllocatorT &getAlloc() { return *this; }
+  const AllocatorT &getAlloc() const { return *this; }
+
+  template <class... ArgTs> Node *create(ArgTs &&... Args) {
+    return new (getAlloc()) Node(std::forward<ArgTs>(Args)...);
+  }
+
+  struct Cloner {
+    AllocatorList &AL;
+    Cloner(AllocatorList &AL) : AL(AL) {}
+    Node *operator()(const Node &N) const { return AL.create(N.V); }
+  };
+
+  struct Disposer {
+    AllocatorList &AL;
+    Disposer(AllocatorList &AL) : AL(AL) {}
+    void operator()(Node *N) const {
+      N->~Node();
+      AL.getAlloc().Deallocate(N);
+    }
+  };
+
+public:
+  typedef T value_type;
+  typedef T *pointer;
+  typedef T &reference;
+  typedef const T *const_pointer;
+  typedef const T &const_reference;
+  typedef typename list_type::size_type size_type;
+  typedef typename list_type::difference_type difference_type;
+
+private:
+  template <class ValueT, class IteratorBase>
+  class IteratorImpl
+      : public iterator_adaptor_base<IteratorImpl<ValueT, IteratorBase>,
+                                     IteratorBase,
+                                     std::bidirectional_iterator_tag, ValueT> {
+    template <class OtherValueT, class OtherIteratorBase>
+    friend class IteratorImpl;
+    friend AllocatorList;
+
+    typedef iterator_adaptor_base<IteratorImpl<ValueT, IteratorBase>,
+                                  IteratorBase, std::bidirectional_iterator_tag,
+                                  ValueT>
+        base_type;
+
+  public:
+    typedef ValueT value_type;
+    typedef ValueT *pointer;
+    typedef ValueT &reference;
+
+    IteratorImpl() = default;
+    IteratorImpl(const IteratorImpl &) = default;
+    IteratorImpl &operator=(const IteratorImpl &) = default;
+    ~IteratorImpl() = default;
+
+    explicit IteratorImpl(const IteratorBase &I) : base_type(I) {}
+
+    template <class OtherValueT, class OtherIteratorBase>
+    IteratorImpl(const IteratorImpl<OtherValueT, OtherIteratorBase> &X,
+                 typename std::enable_if<std::is_convertible<
+                     OtherIteratorBase, IteratorBase>::value>::type * = nullptr)
+        : base_type(X.wrapped()) {}
+
+    reference operator*() const { return base_type::wrapped()->V; }
+    pointer operator->() const { return &operator*(); }
+
+    friend bool operator==(const IteratorImpl &L, const IteratorImpl &R) {
+      return L.wrapped() == R.wrapped();
+    }
+    friend bool operator!=(const IteratorImpl &L, const IteratorImpl &R) {
+      return !(L == R);
+    }
+  };
+
+public:
+  typedef IteratorImpl<T, typename list_type::iterator> iterator;
+  typedef IteratorImpl<T, typename list_type::reverse_iterator>
+      reverse_iterator;
+  typedef IteratorImpl<const T, typename list_type::const_iterator>
+      const_iterator;
+  typedef IteratorImpl<const T, typename list_type::const_reverse_iterator>
+      const_reverse_iterator;
+
+  AllocatorList() = default;
+  AllocatorList(AllocatorList &&X)
+      : AllocatorT(std::move(X.getAlloc())), List(std::move(X.List)) {}
+  AllocatorList(const AllocatorList &X) {
+    List.cloneFrom(X.List, Cloner(*this), Disposer(*this));
+  }
+  AllocatorList &operator=(AllocatorList &&X) {
+    clear(); // Dispose of current nodes explicitly.
+    List = std::move(X.List);
+    getAlloc() = std::move(X.getAlloc());
+    return *this;
+  }
+  AllocatorList &operator=(const AllocatorList &X) {
+    List.cloneFrom(X.List, Cloner(*this), Disposer(*this));
+    return *this;
+  }
+  ~AllocatorList() { clear(); }
+
+  void swap(AllocatorList &RHS) {
+    List.swap(RHS.List);
+    std::swap(getAlloc(), RHS.getAlloc());
+  }
+
+  bool empty() { return List.empty(); }
+  size_t size() { return List.size(); }
+
+  iterator begin() { return iterator(List.begin()); }
+  iterator end() { return iterator(List.end()); }
+  const_iterator begin() const { return const_iterator(List.begin()); }
+  const_iterator end() const { return const_iterator(List.end()); }
+  reverse_iterator rbegin() { return reverse_iterator(List.rbegin()); }
+  reverse_iterator rend() { return reverse_iterator(List.rend()); }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator(List.rbegin());
+  }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(List.rend());
+  }
+
+  T &back() { return List.back().V; }
+  T &front() { return List.front().V; }
+  const T &back() const { return List.back().V; }
+  const T &front() const { return List.front().V; }
+
+  template <class... Ts> iterator emplace(iterator I, Ts &&... Vs) {
+    return iterator(List.insert(I.wrapped(), *create(std::forward<Ts>(Vs)...)));
+  }
+
+  iterator insert(iterator I, T &&V) {
+    return iterator(List.insert(I.wrapped(), *create(std::move(V))));
+  }
+  iterator insert(iterator I, const T &V) {
+    return iterator(List.insert(I.wrapped(), *create(V)));
+  }
+
+  template <class Iterator>
+  void insert(iterator I, Iterator First, Iterator Last) {
+    for (; First != Last; ++First)
+      List.insert(I.wrapped(), *create(*First));
+  }
+
+  iterator erase(iterator I) {
+    return iterator(List.eraseAndDispose(I.wrapped(), Disposer(*this)));
+  }
+
+  iterator erase(iterator First, iterator Last) {
+    return iterator(
+        List.eraseAndDispose(First.wrapped(), Last.wrapped(), Disposer(*this)));
+  }
+
+  void clear() { List.clearAndDispose(Disposer(*this)); }
+  void pop_back() { List.eraseAndDispose(--List.end(), Disposer(*this)); }
+  void pop_front() { List.eraseAndDispose(List.begin(), Disposer(*this)); }
+  void push_back(T &&V) { insert(end(), std::move(V)); }
+  void push_front(T &&V) { insert(begin(), std::move(V)); }
+  void push_back(const T &V) { insert(end(), V); }
+  void push_front(const T &V) { insert(begin(), V); }
+  template <class... Ts> void emplace_back(Ts &&... Vs) {
+    emplace(end(), std::forward<Ts>(Vs)...);
+  }
+  template <class... Ts> void emplace_front(Ts &&... Vs) {
+    emplace(begin(), std::forward<Ts>(Vs)...);
+  }
+
+  /// Reset the underlying allocator.
+  ///
+  /// \pre \c empty()
+  void resetAlloc() {
+    assert(empty() && "Cannot reset allocator if not empty");
+    getAlloc().Reset();
+  }
+};
+
+template <class T> using BumpPtrList = AllocatorList<T, BumpPtrAllocator>;
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_ALLOCATORLIST_H
diff --git a/contrib/llvm/include/llvm/ADT/ArrayRef.h b/contrib/llvm/include/llvm/ADT/ArrayRef.h
index 95a1e62ef005..b3fe31f4a806 100644
--- a/contrib/llvm/include/llvm/ADT/ArrayRef.h
+++ b/contrib/llvm/include/llvm/ADT/ArrayRef.h
@@ -12,7 +12,9 @@
 
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include <array>
 #include <vector>
 
 namespace llvm {
@@ -28,7 +30,7 @@ namespace llvm {
   /// This is intended to be trivially copyable, so it should be passed by
   /// value.
   template<typename T>
-  class ArrayRef {
+  class LLVM_NODISCARD ArrayRef {
   public:
     typedef const T *iterator;
     typedef const T *const_iterator;
@@ -78,10 +80,14 @@ namespace llvm {
     /*implicit*/ ArrayRef(const std::vector<T, A> &Vec)
       : Data(Vec.data()), Length(Vec.size()) {}
 
+    /// Construct an ArrayRef from a std::array
+    template <size_t N>
+    /*implicit*/ constexpr ArrayRef(const std::array<T, N> &Arr)
+        : Data(Arr.data()), Length(N) {}
+
     /// Construct an ArrayRef from a C array.
     template <size_t N>
-    /*implicit*/ LLVM_CONSTEXPR ArrayRef(const T (&Arr)[N])
-      : Data(Arr), Length(N) {}
+    /*implicit*/ constexpr ArrayRef(const T (&Arr)[N]) : Data(Arr), Length(N) {}
 
     /// Construct an ArrayRef from a std::initializer_list.
     /*implicit*/ ArrayRef(const std::initializer_list<T> &Vec)
@@ -160,12 +166,6 @@ namespace llvm {
       return std::equal(begin(), end(), RHS.begin());
     }
 
-    /// slice(n) - Chop off the first N elements of the array.
-    ArrayRef<T> slice(size_t N) const {
-      assert(N <= size() && "Invalid specifier");
-      return ArrayRef<T>(data()+N, size()-N);
-    }
-
     /// slice(n, m) - Chop off the first N elements of the array, and keep M
     /// elements in the array.
     ArrayRef<T> slice(size_t N, size_t M) const {
@@ -173,6 +173,9 @@ namespace llvm {
       return ArrayRef<T>(data()+N, M);
     }
 
+    /// slice(n) - Chop off the first N elements of the array.
+    ArrayRef<T> slice(size_t N) const { return slice(N, size() - N); }
+
     /// \brief Drop the first \p N elements of the array.
     ArrayRef<T> drop_front(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
@@ -185,6 +188,44 @@ namespace llvm {
       return slice(0, size() - N);
     }
 
+    /// \brief Return a copy of *this with the first N elements satisfying the
+    /// given predicate removed.
+    template <class PredicateT> ArrayRef<T> drop_while(PredicateT Pred) const {
+      return ArrayRef<T>(find_if_not(*this, Pred), end());
+    }
+
+    /// \brief Return a copy of *this with the first N elements not satisfying
+    /// the given predicate removed.
+    template <class PredicateT> ArrayRef<T> drop_until(PredicateT Pred) const {
+      return ArrayRef<T>(find_if(*this, Pred), end());
+    }
+
+    /// \brief Return a copy of *this with only the first \p N elements.
+    ArrayRef<T> take_front(size_t N = 1) const {
+      if (N >= size())
+        return *this;
+      return drop_back(size() - N);
+    }
+
+    /// \brief Return a copy of *this with only the last \p N elements.
+    ArrayRef<T> take_back(size_t N = 1) const {
+      if (N >= size())
+        return *this;
+      return drop_front(size() - N);
+    }
+
+    /// \brief Return the first N elements of this Array that satisfy the given
+    /// predicate.
+    template <class PredicateT> ArrayRef<T> take_while(PredicateT Pred) const {
+      return ArrayRef<T>(begin(), find_if_not(*this, Pred));
+    }
+
+    /// \brief Return the first N elements of this Array that don't satisfy the
+    /// given predicate.
+    template <class PredicateT> ArrayRef<T> take_until(PredicateT Pred) const {
+      return ArrayRef<T>(begin(), find_if(*this, Pred));
+    }
+
     /// @}
     /// @name Operator Overloads
     /// @{
@@ -193,6 +234,22 @@ namespace llvm {
       return Data[Index];
     }
 
+    /// Disallow accidental assignment from a temporary.
+    ///
+    /// The declaration here is extra complicated so that "arrayRef = {}"
+    /// continues to select the move assignment operator.
+    template <typename U>
+    typename std::enable_if<std::is_same<U, T>::value, ArrayRef<T>>::type &
+    operator=(U &&Temporary) = delete;
+
+    /// Disallow accidental assignment from a temporary.
+    ///
+    /// The declaration here is extra complicated so that "arrayRef = {}"
+    /// continues to select the move assignment operator.
+    template <typename U>
+    typename std::enable_if<std::is_same<U, T>::value, ArrayRef<T>>::type &
+    operator=(std::initializer_list<U>) = delete;
+
     /// @}
     /// @name Expensive Operations
     /// @{
@@ -223,7 +280,7 @@ namespace llvm {
   /// This is intended to be trivially copyable, so it should be passed by
   /// value.
   template<typename T>
-  class MutableArrayRef : public ArrayRef<T> {
+  class LLVM_NODISCARD MutableArrayRef : public ArrayRef<T> {
   public:
     typedef T *iterator;
 
@@ -253,10 +310,14 @@ namespace llvm {
     /*implicit*/ MutableArrayRef(std::vector<T> &Vec)
     : ArrayRef<T>(Vec) {}
 
+    /// Construct an ArrayRef from a std::array
+    template <size_t N>
+    /*implicit*/ constexpr MutableArrayRef(std::array<T, N> &Arr)
+        : ArrayRef<T>(Arr) {}
+
     /// Construct an MutableArrayRef from a C array.
     template <size_t N>
-    /*implicit*/ LLVM_CONSTEXPR MutableArrayRef(T (&Arr)[N])
-      : ArrayRef<T>(Arr) {}
+    /*implicit*/ constexpr MutableArrayRef(T (&Arr)[N]) : ArrayRef<T>(Arr) {}
 
     T *data() const { return const_cast<T*>(ArrayRef<T>::data()); }
 
@@ -278,17 +339,16 @@ namespace llvm {
       return data()[this->size()-1];
     }
 
-    /// slice(n) - Chop off the first N elements of the array.
-    MutableArrayRef<T> slice(size_t N) const {
-      assert(N <= this->size() && "Invalid specifier");
-      return MutableArrayRef<T>(data()+N, this->size()-N);
-    }
-
     /// slice(n, m) - Chop off the first N elements of the array, and keep M
     /// elements in the array.
     MutableArrayRef<T> slice(size_t N, size_t M) const {
-      assert(N+M <= this->size() && "Invalid specifier");
-      return MutableArrayRef<T>(data()+N, M);
+      assert(N + M <= this->size() && "Invalid specifier");
+      return MutableArrayRef<T>(this->data() + N, M);
+    }
+
+    /// slice(n) - Chop off the first N elements of the array.
+    MutableArrayRef<T> slice(size_t N) const {
+      return slice(N, this->size() - N);
     }
 
     /// \brief Drop the first \p N elements of the array.
@@ -302,6 +362,48 @@ namespace llvm {
       return slice(0, this->size() - N);
     }
 
+    /// \brief Return a copy of *this with the first N elements satisfying the
+    /// given predicate removed.
+    template <class PredicateT>
+    MutableArrayRef<T> drop_while(PredicateT Pred) const {
+      return MutableArrayRef<T>(find_if_not(*this, Pred), end());
+    }
+
+    /// \brief Return a copy of *this with the first N elements not satisfying
+    /// the given predicate removed.
+    template <class PredicateT>
+    MutableArrayRef<T> drop_until(PredicateT Pred) const {
+      return MutableArrayRef<T>(find_if(*this, Pred), end());
+    }
+
+    /// \brief Return a copy of *this with only the first \p N elements.
+    MutableArrayRef<T> take_front(size_t N = 1) const {
+      if (N >= this->size())
+        return *this;
+      return drop_back(this->size() - N);
+    }
+
+    /// \brief Return a copy of *this with only the last \p N elements.
+    MutableArrayRef<T> take_back(size_t N = 1) const {
+      if (N >= this->size())
+        return *this;
+      return drop_front(this->size() - N);
+    }
+
+    /// \brief Return the first N elements of this Array that satisfy the given
+    /// predicate.
+    template <class PredicateT>
+    MutableArrayRef<T> take_while(PredicateT Pred) const {
+      return MutableArrayRef<T>(begin(), find_if_not(*this, Pred));
+    }
+
+    /// \brief Return the first N elements of this Array that don't satisfy the
+    /// given predicate.
+    template <class PredicateT>
+    MutableArrayRef<T> take_until(PredicateT Pred) const {
+      return MutableArrayRef<T>(begin(), find_if(*this, Pred));
+    }
+
     /// @}
     /// @name Operator Overloads
     /// @{
@@ -311,6 +413,25 @@ namespace llvm {
     }
   };
 
+  /// This is a MutableArrayRef that owns its array.
+  template <typename T> class OwningArrayRef : public MutableArrayRef<T> {
+  public:
+    OwningArrayRef() {}
+    OwningArrayRef(size_t Size) : MutableArrayRef<T>(new T[Size], Size) {}
+    OwningArrayRef(ArrayRef<T> Data)
+        : MutableArrayRef<T>(new T[Data.size()], Data.size()) {
+      std::copy(Data.begin(), Data.end(), this->begin());
+    }
+    OwningArrayRef(OwningArrayRef &&Other) { *this = Other; }
+    OwningArrayRef &operator=(OwningArrayRef &&Other) {
+      delete[] this->data();
+      this->MutableArrayRef<T>::operator=(Other);
+      Other.MutableArrayRef<T>::operator=(MutableArrayRef<T>());
+      return *this;
+    }
+    ~OwningArrayRef() { delete[] this->data(); }
+  };
+
   /// @name ArrayRef Convenience constructors
   /// @{
 
diff --git a/contrib/llvm/include/llvm/ADT/BitVector.h b/contrib/llvm/include/llvm/ADT/BitVector.h
index 661437126d48..cf3756d0d9c1 100644
--- a/contrib/llvm/include/llvm/ADT/BitVector.h
+++ b/contrib/llvm/include/llvm/ADT/BitVector.h
@@ -21,6 +21,7 @@
 #include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <utility>
 
 namespace llvm {
 
@@ -45,14 +46,13 @@ public:
     BitWord *WordRef;
     unsigned BitPos;
 
-    reference();  // Undefined
-
   public:
     reference(BitVector &b, unsigned Idx) {
       WordRef = &b.Bits[Idx / BITWORD_SIZE];
       BitPos = Idx % BITWORD_SIZE;
     }
 
+    reference() = delete;
     reference(const reference&) = default;
 
     reference &operator=(reference t) {
diff --git a/contrib/llvm/include/llvm/ADT/CachedHashString.h b/contrib/llvm/include/llvm/ADT/CachedHashString.h
new file mode 100644
index 000000000000..a56a6213a073
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/CachedHashString.h
@@ -0,0 +1,184 @@
+//===- llvm/ADT/CachedHashString.h - Prehashed string/StringRef -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines CachedHashString and CachedHashStringRef.  These are owning
+// and not-owning string types that store their hash in addition to their string
+// data.
+//
+// Unlike std::string, CachedHashString can be used in DenseSet/DenseMap
+// (because, unlike std::string, CachedHashString lets us have empty and
+// tombstone values).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_CACHED_HASH_STRING_H
+#define LLVM_ADT_CACHED_HASH_STRING_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// A container which contains a StringRef plus a precomputed hash.
+class CachedHashStringRef {
+  const char *P;
+  uint32_t Size;
+  uint32_t Hash;
+
+public:
+  // Explicit because hashing a string isn't free.
+  explicit CachedHashStringRef(StringRef S)
+      : CachedHashStringRef(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
+
+  CachedHashStringRef(StringRef S, uint32_t Hash)
+      : P(S.data()), Size(S.size()), Hash(Hash) {
+    assert(S.size() <= std::numeric_limits<uint32_t>::max());
+  }
+
+  StringRef val() const { return StringRef(P, Size); }
+  uint32_t size() const { return Size; }
+  uint32_t hash() const { return Hash; }
+};
+
+template <> struct DenseMapInfo<CachedHashStringRef> {
+  static CachedHashStringRef getEmptyKey() {
+    return CachedHashStringRef(DenseMapInfo<StringRef>::getEmptyKey(), 0);
+  }
+  static CachedHashStringRef getTombstoneKey() {
+    return CachedHashStringRef(DenseMapInfo<StringRef>::getTombstoneKey(), 1);
+  }
+  static unsigned getHashValue(const CachedHashStringRef &S) {
+    assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
+    assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
+    return S.hash();
+  }
+  static bool isEqual(const CachedHashStringRef &LHS,
+                      const CachedHashStringRef &RHS) {
+    return LHS.hash() == RHS.hash() &&
+           DenseMapInfo<StringRef>::isEqual(LHS.val(), RHS.val());
+  }
+};
+
+/// A container which contains a string, which it owns, plus a precomputed hash.
+///
+/// We do not null-terminate the string.
+class CachedHashString {
+  friend struct DenseMapInfo<CachedHashString>;
+
+  char *P;
+  uint32_t Size;
+  uint32_t Hash;
+
+  static char *getEmptyKeyPtr() { return DenseMapInfo<char *>::getEmptyKey(); }
+  static char *getTombstoneKeyPtr() {
+    return DenseMapInfo<char *>::getTombstoneKey();
+  }
+
+  bool isEmptyOrTombstone() const {
+    return P == getEmptyKeyPtr() || P == getTombstoneKeyPtr();
+  }
+
+  struct ConstructEmptyOrTombstoneTy {};
+
+  CachedHashString(ConstructEmptyOrTombstoneTy, char *EmptyOrTombstonePtr)
+      : P(EmptyOrTombstonePtr), Size(0), Hash(0) {
+    assert(isEmptyOrTombstone());
+  }
+
+  // TODO: Use small-string optimization to avoid allocating.
+
+public:
+  explicit CachedHashString(const char *S) : CachedHashString(StringRef(S)) {}
+
+  // Explicit because copying and hashing a string isn't free.
+  explicit CachedHashString(StringRef S)
+      : CachedHashString(S, DenseMapInfo<StringRef>::getHashValue(S)) {}
+
+  CachedHashString(StringRef S, uint32_t Hash)
+      : P(new char[S.size()]), Size(S.size()), Hash(Hash) {
+    memcpy(P, S.data(), S.size());
+  }
+
+  // Ideally this class would not be copyable.  But SetVector requires copyable
+  // keys, and we want this to be usable there.
+  CachedHashString(const CachedHashString &Other)
+      : Size(Other.Size), Hash(Other.Hash) {
+    if (Other.isEmptyOrTombstone()) {
+      P = Other.P;
+    } else {
+      P = new char[Size];
+      memcpy(P, Other.P, Size);
+    }
+  }
+
+  CachedHashString &operator=(CachedHashString Other) {
+    swap(*this, Other);
+    return *this;
+  }
+
+  CachedHashString(CachedHashString &&Other) noexcept
+      : P(Other.P), Size(Other.Size), Hash(Other.Hash) {
+    Other.P = getEmptyKeyPtr();
+  }
+
+  ~CachedHashString() {
+    if (!isEmptyOrTombstone())
+      delete[] P;
+  }
+
+  StringRef val() const { return StringRef(P, Size); }
+  uint32_t size() const { return Size; }
+  uint32_t hash() const { return Hash; }
+
+  operator StringRef() const { return val(); }
+  operator CachedHashStringRef() const {
+    return CachedHashStringRef(val(), Hash);
+  }
+
+  friend void swap(CachedHashString &LHS, CachedHashString &RHS) {
+    using std::swap;
+    swap(LHS.P, RHS.P);
+    swap(LHS.Size, RHS.Size);
+    swap(LHS.Hash, RHS.Hash);
+  }
+};
+
+template <> struct DenseMapInfo<CachedHashString> {
+  static CachedHashString getEmptyKey() {
+    return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
+                            CachedHashString::getEmptyKeyPtr());
+  }
+  static CachedHashString getTombstoneKey() {
+    return CachedHashString(CachedHashString::ConstructEmptyOrTombstoneTy(),
+                            CachedHashString::getTombstoneKeyPtr());
+  }
+  static unsigned getHashValue(const CachedHashString &S) {
+    assert(!isEqual(S, getEmptyKey()) && "Cannot hash the empty key!");
+    assert(!isEqual(S, getTombstoneKey()) && "Cannot hash the tombstone key!");
+    return S.hash();
+  }
+  static bool isEqual(const CachedHashString &LHS,
+                      const CachedHashString &RHS) {
+    if (LHS.hash() != RHS.hash())
+      return false;
+    if (LHS.P == CachedHashString::getEmptyKeyPtr())
+      return RHS.P == CachedHashString::getEmptyKeyPtr();
+    if (LHS.P == CachedHashString::getTombstoneKeyPtr())
+      return RHS.P == CachedHashString::getTombstoneKeyPtr();
+
+    // This is safe because if RHS.P is the empty or tombstone key, it will have
+    // length 0, so we'll never dereference its pointer.
+    return LHS.val() == RHS.val();
+  }
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h b/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h
index 3dd862c8b220..5ea0fe872868 100644
--- a/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h
+++ b/contrib/llvm/include/llvm/ADT/DAGDeltaAlgorithm.h
@@ -10,6 +10,7 @@
 #define LLVM_ADT_DAGDELTAALGORITHM_H
 
 #include <set>
+#include <utility>
 #include <vector>
 
 namespace llvm {
@@ -37,6 +38,7 @@ namespace llvm {
 /// should satisfy.
 class DAGDeltaAlgorithm {
   virtual void anchor();
+
 public:
   typedef unsigned change_ty;
   typedef std::pair<change_ty, change_ty> edge_ty;
@@ -46,7 +48,7 @@ public:
   typedef std::vector<changeset_ty> changesetlist_ty;
 
 public:
-  virtual ~DAGDeltaAlgorithm() {}
+  virtual ~DAGDeltaAlgorithm() = default;
 
   /// Run - Minimize the DAG formed by the \p Changes vertices and the
   /// \p Dependencies edges by executing \see ExecuteOneTest() on subsets of
@@ -74,4 +76,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_DAGDELTAALGORITHM_H
diff --git a/contrib/llvm/include/llvm/ADT/DenseMap.h b/contrib/llvm/include/llvm/ADT/DenseMap.h
index 917c086beba3..0b4b09d4b733 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMap.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMap.h
@@ -19,20 +19,20 @@
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/PointerLikeTypeTraits.h"
 #include "llvm/Support/type_traits.h"
 #include <algorithm>
 #include <cassert>
-#include <climits>
 #include <cstddef>
 #include <cstring>
 #include <iterator>
+#include <limits>
 #include <new>
 #include <utility>
 
 namespace llvm {
 
 namespace detail {
+
 // We extend a pair to allow users to override the bucket type with their own
 // implementation without requiring two members.
 template <typename KeyT, typename ValueT>
@@ -42,7 +42,8 @@ struct DenseMapPair : public std::pair<KeyT, ValueT> {
   ValueT &getSecond() { return std::pair<KeyT, ValueT>::second; }
   const ValueT &getSecond() const { return std::pair<KeyT, ValueT>::second; }
 };
-}
+
+} // end namespace detail
 
 template <
     typename KeyT, typename ValueT, typename KeyInfoT = DenseMapInfo<KeyT>,
@@ -76,7 +77,7 @@ public:
     return const_iterator(getBucketsEnd(), getBucketsEnd(), *this, true);
   }
 
-  bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const {
+  LLVM_NODISCARD bool empty() const {
     return getNumEntries() == 0;
   }
   unsigned size() const { return getNumEntries(); }
@@ -169,30 +170,45 @@ public:
   // If the key is already in the map, it returns false and doesn't update the
   // value.
   std::pair<iterator, bool> insert(const std::pair<KeyT, ValueT> &KV) {
+    return try_emplace(KV.first, KV.second);
+  }
+
+  // Inserts key,value pair into the map if the key isn't already in the map.
+  // If the key is already in the map, it returns false and doesn't update the
+  // value.
+  std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) {
+    return try_emplace(std::move(KV.first), std::move(KV.second));
+  }
+
+  // Inserts key,value pair into the map if the key isn't already in the map.
+  // The value is constructed in-place if the key is not in the map, otherwise
+  // it is not moved.
+  template <typename... Ts>
+  std::pair<iterator, bool> try_emplace(KeyT &&Key, Ts &&... Args) {
     BucketT *TheBucket;
-    if (LookupBucketFor(KV.first, TheBucket))
+    if (LookupBucketFor(Key, TheBucket))
       return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                             false); // Already in map.
 
     // Otherwise, insert the new element.
-    TheBucket = InsertIntoBucket(KV.first, KV.second, TheBucket);
+    TheBucket =
+        InsertIntoBucket(TheBucket, std::move(Key), std::forward<Ts>(Args)...);
     return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                           true);
   }
 
   // Inserts key,value pair into the map if the key isn't already in the map.
-  // If the key is already in the map, it returns false and doesn't update the
-  // value.
-  std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) {
+  // The value is constructed in-place if the key is not in the map, otherwise
+  // it is not moved.
+  template <typename... Ts>
+  std::pair<iterator, bool> try_emplace(const KeyT &Key, Ts &&... Args) {
     BucketT *TheBucket;
-    if (LookupBucketFor(KV.first, TheBucket))
+    if (LookupBucketFor(Key, TheBucket))
       return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                             false); // Already in map.
 
     // Otherwise, insert the new element.
-    TheBucket = InsertIntoBucket(std::move(KV.first),
-                                 std::move(KV.second),
-                                 TheBucket);
+    TheBucket = InsertIntoBucket(TheBucket, Key, std::forward<Ts>(Args)...);
     return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                           true);
   }
@@ -211,8 +227,8 @@ public:
                             false); // Already in map.
 
     // Otherwise, insert the new element.
-    TheBucket = InsertIntoBucket(std::move(KV.first), std::move(KV.second), Val,
-                                 TheBucket);
+    TheBucket = InsertIntoBucketWithLookup(TheBucket, std::move(KV.first),
+                                           std::move(KV.second), Val);
     return std::make_pair(iterator(TheBucket, getBucketsEnd(), *this, true),
                           true);
   }
@@ -224,7 +240,6 @@ public:
       insert(*I);
   }
 
-
   bool erase(const KeyT &Val) {
     BucketT *TheBucket;
     if (!LookupBucketFor(Val, TheBucket))
@@ -249,7 +264,7 @@ public:
     if (LookupBucketFor(Key, TheBucket))
       return *TheBucket;
 
-    return *InsertIntoBucket(Key, ValueT(), TheBucket);
+    return *InsertIntoBucket(TheBucket, Key);
   }
 
   ValueT &operator[](const KeyT &Key) {
@@ -261,7 +276,7 @@ public:
     if (LookupBucketFor(Key, TheBucket))
       return *TheBucket;
 
-    return *InsertIntoBucket(std::move(Key), ValueT(), TheBucket);
+    return *InsertIntoBucket(TheBucket, std::move(Key));
   }
 
   ValueT &operator[](KeyT &&Key) {
@@ -429,36 +444,19 @@ private:
     static_cast<DerivedT *>(this)->shrink_and_clear();
   }
 
-
-  BucketT *InsertIntoBucket(const KeyT &Key, const ValueT &Value,
-                            BucketT *TheBucket) {
+  template <typename KeyArg, typename... ValueArgs>
+  BucketT *InsertIntoBucket(BucketT *TheBucket, KeyArg &&Key,
+                            ValueArgs &&... Values) {
     TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket);
 
-    TheBucket->getFirst() = Key;
-    ::new (&TheBucket->getSecond()) ValueT(Value);
-    return TheBucket;
-  }
-
-  BucketT *InsertIntoBucket(const KeyT &Key, ValueT &&Value,
-                            BucketT *TheBucket) {
-    TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket);
-
-    TheBucket->getFirst() = Key;
-    ::new (&TheBucket->getSecond()) ValueT(std::move(Value));
-    return TheBucket;
-  }
-
-  BucketT *InsertIntoBucket(KeyT &&Key, ValueT &&Value, BucketT *TheBucket) {
-    TheBucket = InsertIntoBucketImpl(Key, Key, TheBucket);
-
-    TheBucket->getFirst() = std::move(Key);
-    ::new (&TheBucket->getSecond()) ValueT(std::move(Value));
+    TheBucket->getFirst() = std::forward<KeyArg>(Key);
+    ::new (&TheBucket->getSecond()) ValueT(std::forward<ValueArgs>(Values)...);
     return TheBucket;
   }
 
   template <typename LookupKeyT>
-  BucketT *InsertIntoBucket(KeyT &&Key, ValueT &&Value, LookupKeyT &Lookup,
-                            BucketT *TheBucket) {
+  BucketT *InsertIntoBucketWithLookup(BucketT *TheBucket, KeyT &&Key,
+                                      ValueT &&Value, LookupKeyT &Lookup) {
     TheBucket = InsertIntoBucketImpl(Key, Lookup, TheBucket);
 
     TheBucket->getFirst() = std::move(Key);
@@ -530,7 +528,7 @@ private:
 
     unsigned BucketNo = getHashValue(Val) & (NumBuckets-1);
     unsigned ProbeAmt = 1;
-    while (1) {
+    while (true) {
       const BucketT *ThisBucket = BucketsPtr + BucketNo;
       // Found Val's bucket?  If so, return it.
       if (LLVM_LIKELY(KeyInfoT::isEqual(Val, ThisBucket->getFirst()))) {
@@ -744,6 +742,8 @@ class SmallDenseMap
   // simplicity of referring to them.
   typedef DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT> BaseT;
   friend class DenseMapBase<SmallDenseMap, KeyT, ValueT, KeyInfoT, BucketT>;
+  static_assert(isPowerOf2_64(InlineBuckets),
+                "InlineBuckets must be a power of 2.");
 
   unsigned Small : 1;
   unsigned NumEntries : 31;
@@ -968,7 +968,8 @@ private:
     return NumEntries;
   }
   void setNumEntries(unsigned Num) {
-    assert(Num < INT_MAX && "Cannot support more than INT_MAX entries");
+    // NumEntries is hardcoded to be 31 bits wide.
+    assert(Num < (1U << 31) && "Cannot support more than 1<<31 entries");
     NumEntries = Num;
   }
 
@@ -1042,8 +1043,10 @@ public:
   typedef value_type *pointer;
   typedef value_type &reference;
   typedef std::forward_iterator_tag iterator_category;
+
 private:
   pointer Ptr, End;
+
 public:
   DenseMapIterator() : Ptr(nullptr), End(nullptr) {}
 
@@ -1117,4 +1120,4 @@ capacity_in_bytes(const DenseMap<KeyT, ValueT, KeyInfoT> &X) {
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_DENSEMAP_H
diff --git a/contrib/llvm/include/llvm/ADT/DenseMapInfo.h b/contrib/llvm/include/llvm/ADT/DenseMapInfo.h
index 18c692e0cbcc..a844ebcccf5b 100644
--- a/contrib/llvm/include/llvm/ADT/DenseMapInfo.h
+++ b/contrib/llvm/include/llvm/ADT/DenseMapInfo.h
@@ -30,36 +30,6 @@ struct DenseMapInfo {
   //static bool isEqual(const T &LHS, const T &RHS);
 };
 
-template <typename T> struct CachedHash {
-  CachedHash(T Val) : Val(std::move(Val)) {
-    Hash = DenseMapInfo<T>::getHashValue(Val);
-  }
-  CachedHash(T Val, unsigned Hash) : Val(std::move(Val)), Hash(Hash) {}
-  T Val;
-  unsigned Hash;
-};
-
-// Provide DenseMapInfo for all CachedHash<T>.
-template <typename T> struct DenseMapInfo<CachedHash<T>> {
-  static CachedHash<T> getEmptyKey() {
-    T N = DenseMapInfo<T>::getEmptyKey();
-    return {N, 0};
-  }
-  static CachedHash<T> getTombstoneKey() {
-    T N = DenseMapInfo<T>::getTombstoneKey();
-    return {N, 0};
-  }
-  static unsigned getHashValue(CachedHash<T> Val) {
-    assert(!isEqual(Val, getEmptyKey()) && "Cannot hash the empty key!");
-    assert(!isEqual(Val, getTombstoneKey()) &&
-           "Cannot hash the tombstone key!");
-    return Val.Hash;
-  }
-  static bool isEqual(CachedHash<T> A, CachedHash<T> B) {
-    return DenseMapInfo<T>::isEqual(A.Val, B.Val);
-  }
-};
-
 // Provide DenseMapInfo for all pointers.
 template<typename T>
 struct DenseMapInfo<T*> {
diff --git a/contrib/llvm/include/llvm/ADT/DenseSet.h b/contrib/llvm/include/llvm/ADT/DenseSet.h
index 3724a09623f3..b25d3b7cba6f 100644
--- a/contrib/llvm/include/llvm/ADT/DenseSet.h
+++ b/contrib/llvm/include/llvm/ADT/DenseSet.h
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the DenseSet class.
+// This file defines the DenseSet and SmallDenseSet classes.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,6 +15,7 @@
 #define LLVM_ADT_DENSESET_H
 
 #include "llvm/ADT/DenseMap.h"
+#include <initializer_list>
 
 namespace llvm {
 
@@ -32,13 +33,18 @@ public:
   DenseSetEmpty &getSecond() { return *this; }
   const DenseSetEmpty &getSecond() const { return *this; }
 };
-}
 
-/// DenseSet - This implements a dense probed hash-table based set.
-template<typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT> >
-class DenseSet {
-  typedef DenseMap<ValueT, detail::DenseSetEmpty, ValueInfoT,
-                   detail::DenseSetPair<ValueT>> MapTy;
+/// Base class for DenseSet and DenseSmallSet.
+///
+/// MapTy should be either
+///
+///   DenseMap<ValueT, detail::DenseSetEmpty, ValueInfoT,
+///            detail::DenseSetPair<ValueT>>
+///
+/// or the equivalent SmallDenseMap type.  ValueInfoT must implement the
+/// DenseMapInfo "concept".
+template <typename ValueT, typename MapTy, typename ValueInfoT>
+class DenseSetImpl {
   static_assert(sizeof(typename MapTy::value_type) == sizeof(ValueT),
                 "DenseMap buckets unexpectedly large!");
   MapTy TheMap;
@@ -48,7 +54,12 @@ public:
   typedef ValueT value_type;
   typedef unsigned size_type;
 
-  explicit DenseSet(unsigned NumInitBuckets = 0) : TheMap(NumInitBuckets) {}
+  explicit DenseSetImpl(unsigned InitialReserve = 0) : TheMap(InitialReserve) {}
+
+  DenseSetImpl(std::initializer_list<ValueT> Elems)
+      : DenseSetImpl(Elems.size()) {
+    insert(Elems.begin(), Elems.end());
+  }
 
   bool empty() const { return TheMap.empty(); }
   size_type size() const { return TheMap.size(); }
@@ -58,6 +69,10 @@ public:
   /// the Size of the set.
   void resize(size_t Size) { TheMap.resize(Size); }
 
+  /// Grow the DenseSet so that it can contain at least \p NumEntries items
+  /// before resizing again.
+  void reserve(size_t Size) { TheMap.reserve(Size); }
+
   void clear() {
     TheMap.clear();
   }
@@ -71,15 +86,13 @@ public:
     return TheMap.erase(V);
   }
 
-  void swap(DenseSet& RHS) {
-    TheMap.swap(RHS.TheMap);
-  }
+  void swap(DenseSetImpl &RHS) { TheMap.swap(RHS.TheMap); }
 
   // Iterators.
 
   class Iterator {
     typename MapTy::iterator I;
-    friend class DenseSet;
+    friend class DenseSetImpl;
 
   public:
     typedef typename MapTy::iterator::difference_type difference_type;
@@ -131,6 +144,9 @@ public:
   const_iterator end() const { return ConstIterator(TheMap.end()); }
 
   iterator find(const ValueT &V) { return Iterator(TheMap.find(V)); }
+  const_iterator find(const ValueT &V) const {
+    return ConstIterator(TheMap.find(V));
+  }
 
   /// Alternative version of find() which allows a different, and possibly less
   /// expensive, key type.
@@ -151,7 +167,12 @@ public:
 
   std::pair<iterator, bool> insert(const ValueT &V) {
     detail::DenseSetEmpty Empty;
-    return TheMap.insert(std::make_pair(V, Empty));
+    return TheMap.try_emplace(V, Empty);
+  }
+
+  std::pair<iterator, bool> insert(ValueT &&V) {
+    detail::DenseSetEmpty Empty;
+    return TheMap.try_emplace(std::move(V), Empty);
   }
 
   /// Alternative version of insert that uses a different (and possibly less
@@ -159,12 +180,11 @@ public:
   template <typename LookupKeyT>
   std::pair<iterator, bool> insert_as(const ValueT &V,
                                       const LookupKeyT &LookupKey) {
-    return insert_as(ValueT(V), LookupKey);
+    return TheMap.insert_as({V, detail::DenseSetEmpty()}, LookupKey);
   }
   template <typename LookupKeyT>
   std::pair<iterator, bool> insert_as(ValueT &&V, const LookupKeyT &LookupKey) {
-    detail::DenseSetEmpty Empty;
-    return TheMap.insert_as(std::make_pair(std::move(V), Empty), LookupKey);
+    return TheMap.insert_as({std::move(V), detail::DenseSetEmpty()}, LookupKey);
   }
 
   // Range insertion of values.
@@ -175,6 +195,42 @@ public:
   }
 };
 
+} // namespace detail
+
+/// Implements a dense probed hash-table based set.
+template <typename ValueT, typename ValueInfoT = DenseMapInfo<ValueT>>
+class DenseSet : public detail::DenseSetImpl<
+                     ValueT, DenseMap<ValueT, detail::DenseSetEmpty, ValueInfoT,
+                                      detail::DenseSetPair<ValueT>>,
+                     ValueInfoT> {
+  using BaseT =
+      detail::DenseSetImpl<ValueT,
+                           DenseMap<ValueT, detail::DenseSetEmpty, ValueInfoT,
+                                    detail::DenseSetPair<ValueT>>,
+                           ValueInfoT>;
+
+public:
+  using BaseT::BaseT;
+};
+
+/// Implements a dense probed hash-table based set with some number of buckets
+/// stored inline.
+template <typename ValueT, unsigned InlineBuckets = 4,
+          typename ValueInfoT = DenseMapInfo<ValueT>>
+class SmallDenseSet
+    : public detail::DenseSetImpl<
+          ValueT, SmallDenseMap<ValueT, detail::DenseSetEmpty, InlineBuckets,
+                                ValueInfoT, detail::DenseSetPair<ValueT>>,
+          ValueInfoT> {
+  using BaseT = detail::DenseSetImpl<
+      ValueT, SmallDenseMap<ValueT, detail::DenseSetEmpty, InlineBuckets,
+                            ValueInfoT, detail::DenseSetPair<ValueT>>,
+      ValueInfoT>;
+
+public:
+  using BaseT::BaseT;
+};
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h b/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
index c9317b8539b3..c54573204588 100644
--- a/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
+++ b/contrib/llvm/include/llvm/ADT/DepthFirstIterator.h
@@ -34,10 +34,13 @@
 #define LLVM_ADT_DEPTHFIRSTITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/iterator_range.h"
+#include <iterator>
 #include <set>
+#include <utility>
 #include <vector>
 
 namespace llvm {
@@ -55,44 +58,56 @@ class df_iterator_storage<SetType, true> {
 public:
   df_iterator_storage(SetType &VSet) : Visited(VSet) {}
   df_iterator_storage(const df_iterator_storage &S) : Visited(S.Visited) {}
+
   SetType &Visited;
 };
 
+// The visited stated for the iteration is a simple set augmented with
+// one more method, completed, which is invoked when all children of a
+// node have been processed. It is intended to distinguish of back and
+// cross edges in the spanning tree but is not used in the common case.
+template <typename NodeRef, unsigned SmallSize=8>
+struct df_iterator_default_set : public SmallPtrSet<NodeRef, SmallSize> {
+  typedef SmallPtrSet<NodeRef, SmallSize>  BaseSet;
+  typedef typename BaseSet::iterator iterator;
+  std::pair<iterator,bool> insert(NodeRef N) { return BaseSet::insert(N) ; }
+  template <typename IterT>
+  void insert(IterT Begin, IterT End) { BaseSet::insert(Begin,End); }
+
+  void completed(NodeRef) { }
+};
+
 // Generic Depth First Iterator
-template<class GraphT,
-class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
-         bool ExtStorage = false, class GT = GraphTraits<GraphT> >
-class df_iterator : public std::iterator<std::forward_iterator_tag,
-                                         typename GT::NodeType, ptrdiff_t>,
-                    public df_iterator_storage<SetType, ExtStorage> {
-  typedef std::iterator<std::forward_iterator_tag,
-                        typename GT::NodeType, ptrdiff_t> super;
-
-  typedef typename GT::NodeType          NodeType;
+template <class GraphT,
+          class SetType =
+              df_iterator_default_set<typename GraphTraits<GraphT>::NodeRef>,
+          bool ExtStorage = false, class GT = GraphTraits<GraphT>>
+class df_iterator
+    : public std::iterator<std::forward_iterator_tag, typename GT::NodeRef>,
+      public df_iterator_storage<SetType, ExtStorage> {
+  typedef std::iterator<std::forward_iterator_tag, typename GT::NodeRef> super;
+
+  typedef typename GT::NodeRef NodeRef;
   typedef typename GT::ChildIteratorType ChildItTy;
-  typedef PointerIntPair<NodeType*, 1>   PointerIntTy;
+
+  // First element is node reference, second is the 'next child' to visit.
+  // The second child is initialized lazily to pick up graph changes during the
+  // DFS.
+  typedef std::pair<NodeRef, Optional<ChildItTy>> StackElement;
 
   // VisitStack - Used to maintain the ordering.  Top = current block
-  // First element is node pointer, second is the 'next child' to visit
-  // if the int in PointerIntTy is 0, the 'next child' to visit is invalid
-  std::vector<std::pair<PointerIntTy, ChildItTy>> VisitStack;
+  std::vector<StackElement> VisitStack;
 
 private:
-  inline df_iterator(NodeType *Node) {
+  inline df_iterator(NodeRef Node) {
     this->Visited.insert(Node);
-    VisitStack.push_back(
-        std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node)));
+    VisitStack.push_back(StackElement(Node, None));
   }
-  inline df_iterator() {
-    // End is when stack is empty
-  }
-  inline df_iterator(NodeType *Node, SetType &S)
-    : df_iterator_storage<SetType, ExtStorage>(S) {
-    if (!S.count(Node)) {
-      VisitStack.push_back(
-          std::make_pair(PointerIntTy(Node, 0), GT::child_begin(Node)));
-      this->Visited.insert(Node);
-    }
+  inline df_iterator() = default; // End is when stack is empty
+  inline df_iterator(NodeRef Node, SetType &S)
+      : df_iterator_storage<SetType, ExtStorage>(S) {
+    if (this->Visited.insert(Node).second)
+      VisitStack.push_back(StackElement(Node, None));
   }
   inline df_iterator(SetType &S)
     : df_iterator_storage<SetType, ExtStorage>(S) {
@@ -101,26 +116,26 @@ private:
 
   inline void toNext() {
     do {
-      std::pair<PointerIntTy, ChildItTy> &Top = VisitStack.back();
-      NodeType *Node = Top.first.getPointer();
-      ChildItTy &It  = Top.second;
-      if (!Top.first.getInt()) {
-        // now retrieve the real begin of the children before we dive in
-        It = GT::child_begin(Node);
-        Top.first.setInt(1);
-      }
+      NodeRef Node = VisitStack.back().first;
+      Optional<ChildItTy> &Opt = VisitStack.back().second;
+
+      if (!Opt)
+        Opt.emplace(GT::child_begin(Node));
 
-      while (It != GT::child_end(Node)) {
-        NodeType *Next = *It++;
+      // Notice that we directly mutate *Opt here, so that
+      // VisitStack.back().second actually gets updated as the iterator
+      // increases.
+      while (*Opt != GT::child_end(Node)) {
+        NodeRef Next = *(*Opt)++;
         // Has our next sibling been visited?
-        if (Next && this->Visited.insert(Next).second) {
+        if (this->Visited.insert(Next).second) {
           // No, do it now.
-          VisitStack.push_back(
-              std::make_pair(PointerIntTy(Next, 0), GT::child_begin(Next)));
+          VisitStack.push_back(StackElement(Next, None));
           return;
         }
       }
-
+      this->Visited.completed(Node);
+      
       // Oops, ran out of successors... go up a level on the stack.
       VisitStack.pop_back();
     } while (!VisitStack.empty());
@@ -146,13 +161,13 @@ public:
   }
   bool operator!=(const df_iterator &x) const { return !(*this == x); }
 
-  pointer operator*() const { return VisitStack.back().first.getPointer(); }
+  const NodeRef &operator*() const { return VisitStack.back().first; }
 
   // This is a nonstandard operator-> that dereferences the pointer an extra
   // time... so that you can actually call methods ON the Node, because
   // the contained type is a pointer.  This allows BBIt->getTerminator() f.e.
   //
-  NodeType *operator->() const { return **this; }
+  NodeRef operator->() const { return **this; }
 
   df_iterator &operator++() { // Preincrement
     toNext();
@@ -180,7 +195,7 @@ public:
   // specified node.  This is public, and will probably be used to iterate over
   // nodes that a depth first iteration did not find: ie unreachable nodes.
   //
-  bool nodeVisited(NodeType *Node) const {
+  bool nodeVisited(NodeRef Node) const {
     return this->Visited.count(Node) != 0;
   }
 
@@ -190,9 +205,7 @@ public:
 
   /// getPath - Return the n'th node in the path from the entry node to the
   /// current node.
-  NodeType *getPath(unsigned n) const {
-    return VisitStack[n].first.getPointer();
-  }
+  NodeRef getPath(unsigned n) const { return VisitStack[n].first; }
 };
 
 // Provide global constructors that automatically figure out correct types...
@@ -214,7 +227,7 @@ iterator_range<df_iterator<T>> depth_first(const T& G) {
 }
 
 // Provide global definitions of external depth first iterators...
-template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeType*> >
+template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeRef>>
 struct df_ext_iterator : public df_iterator<T, SetTy, true> {
   df_ext_iterator(const df_iterator<T, SetTy, true> &V)
     : df_iterator<T, SetTy, true>(V) {}
@@ -238,7 +251,8 @@ iterator_range<df_ext_iterator<T, SetTy>> depth_first_ext(const T& G,
 
 // Provide global definitions of inverse depth first iterators...
 template <class T,
-  class SetTy = llvm::SmallPtrSet<typename GraphTraits<T>::NodeType*, 8>,
+          class SetTy =
+              df_iterator_default_set<typename GraphTraits<T>::NodeRef>,
           bool External = false>
 struct idf_iterator : public df_iterator<Inverse<T>, SetTy, External> {
   idf_iterator(const df_iterator<Inverse<T>, SetTy, External> &V)
@@ -262,7 +276,7 @@ iterator_range<idf_iterator<T>> inverse_depth_first(const T& G) {
 }
 
 // Provide global definitions of external inverse depth first iterators...
-template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeType*> >
+template <class T, class SetTy = std::set<typename GraphTraits<T>::NodeRef>>
 struct idf_ext_iterator : public idf_iterator<T, SetTy, true> {
   idf_ext_iterator(const idf_iterator<T, SetTy, true> &V)
     : idf_iterator<T, SetTy, true>(V) {}
@@ -286,6 +300,6 @@ iterator_range<idf_ext_iterator<T, SetTy>> inverse_depth_first_ext(const T& G,
   return make_range(idf_ext_begin(G, S), idf_ext_end(G, S));
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_DEPTHFIRSTITERATOR_H
diff --git a/contrib/llvm/include/llvm/ADT/EpochTracker.h b/contrib/llvm/include/llvm/ADT/EpochTracker.h
index 582d58179d13..db39ba4e0c50 100644
--- a/contrib/llvm/include/llvm/ADT/EpochTracker.h
+++ b/contrib/llvm/include/llvm/ADT/EpochTracker.h
@@ -16,28 +16,14 @@
 #ifndef LLVM_ADT_EPOCH_TRACKER_H
 #define LLVM_ADT_EPOCH_TRACKER_H
 
+#include "llvm/Config/abi-breaking.h"
 #include "llvm/Config/llvm-config.h"
 
 #include <cstdint>
 
 namespace llvm {
 
-#ifndef LLVM_ENABLE_ABI_BREAKING_CHECKS
-
-class DebugEpochBase {
-public:
-  void incrementEpoch() {}
-
-  class HandleBase {
-  public:
-    HandleBase() = default;
-    explicit HandleBase(const DebugEpochBase *) {}
-    bool isHandleInSync() const { return true; }
-    const void *getEpochAddress() const { return nullptr; }
-  };
-};
-
-#else
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
 
 /// \brief A base class for data structure classes wishing to make iterators
 /// ("handles") pointing into themselves fail-fast.  When building without
@@ -92,6 +78,21 @@ public:
   };
 };
 
+#else
+
+class DebugEpochBase {
+public:
+  void incrementEpoch() {}
+
+  class HandleBase {
+  public:
+    HandleBase() = default;
+    explicit HandleBase(const DebugEpochBase *) {}
+    bool isHandleInSync() const { return true; }
+    const void *getEpochAddress() const { return nullptr; }
+  };
+};
+
 #endif // LLVM_ENABLE_ABI_BREAKING_CHECKS
 
 } // namespace llvm
diff --git a/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h b/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h
index d6a26f88e67d..8fcac178ffc9 100644
--- a/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h
+++ b/contrib/llvm/include/llvm/ADT/EquivalenceClasses.h
@@ -15,9 +15,10 @@
 #ifndef LLVM_ADT_EQUIVALENCECLASSES_H
 #define LLVM_ADT_EQUIVALENCECLASSES_H
 
-#include "llvm/Support/DataTypes.h"
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
+#include <iterator>
 #include <set>
 
 namespace llvm {
@@ -70,6 +71,7 @@ class EquivalenceClasses {
     friend class EquivalenceClasses;
     mutable const ECValue *Leader, *Next;
     ElemTy Data;
+
     // ECValue ctor - Start out with EndOfList pointing to this node, Next is
     // Null, isLeader = true.
     ECValue(const ElemTy &Elt)
@@ -81,6 +83,7 @@ class EquivalenceClasses {
       // Path compression.
       return Leader = Leader->getLeader();
     }
+
     const ECValue *getEndOfList() const {
       assert(isLeader() && "Cannot get the end of a list for a non-leader!");
       return Leader;
@@ -90,6 +93,7 @@ class EquivalenceClasses {
       assert(getNext() == nullptr && "Already has a next pointer!");
       Next = (const ECValue*)((intptr_t)NewNext | (intptr_t)isLeader());
     }
+
   public:
     ECValue(const ECValue &RHS) : Leader(this), Next((ECValue*)(intptr_t)1),
                                   Data(RHS.Data) {
@@ -115,7 +119,7 @@ class EquivalenceClasses {
   std::set<ECValue> TheMapping;
 
 public:
-  EquivalenceClasses() {}
+  EquivalenceClasses() = default;
   EquivalenceClasses(const EquivalenceClasses &RHS) {
     operator=(RHS);
   }
@@ -187,7 +191,6 @@ public:
     return NC;
   }
 
-
   //===--------------------------------------------------------------------===//
   // Mutation methods
 
@@ -210,7 +213,6 @@ public:
     return findLeader(TheMapping.find(V));
   }
 
-
   /// union - Merge the two equivalence sets for the specified values, inserting
   /// them if they do not already exist in the equivalence set.
   member_iterator unionSets(const ElemTy &V1, const ElemTy &V2) {
@@ -243,12 +245,13 @@ public:
                           const ElemTy, ptrdiff_t> super;
     const ECValue *Node;
     friend class EquivalenceClasses;
+
   public:
     typedef size_t size_type;
     typedef typename super::pointer pointer;
     typedef typename super::reference reference;
 
-    explicit member_iterator() {}
+    explicit member_iterator() = default;
     explicit member_iterator(const ECValue *N) : Node(N) {}
 
     reference operator*() const {
@@ -278,6 +281,6 @@ public:
   };
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_EQUIVALENCECLASSES_H
diff --git a/contrib/llvm/include/llvm/ADT/FoldingSet.h b/contrib/llvm/include/llvm/ADT/FoldingSet.h
index f16258af4ae2..dab18297dd3b 100644
--- a/contrib/llvm/include/llvm/ADT/FoldingSet.h
+++ b/contrib/llvm/include/llvm/ADT/FoldingSet.h
@@ -19,8 +19,13 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
 
 namespace llvm {
+
 /// This folding set used for two purposes:
 ///   1. Given information about a node we want to create, look up the unique
 ///      instance of the node in the set.  If the node already exists, return
@@ -184,6 +189,7 @@ public:
   /// EltCount-th node won't cause a rebucket operation. reserve is permitted
   /// to allocate more space than requested by EltCount.
   void reserve(unsigned EltCount);
+
   /// capacity - Returns the number of nodes permitted in the folding set
   /// before a rebucket operation is performed.
   unsigned capacity() {
@@ -200,14 +206,17 @@ private:
   /// NewBucketCount must be a power of two, and must be greater than the old
   /// bucket count.
   void GrowBucketCount(unsigned NewBucketCount);
+
 protected:
   /// GetNodeProfile - Instantiations of the FoldingSet template implement
   /// this function to gather data bits for the given node.
   virtual void GetNodeProfile(Node *N, FoldingSetNodeID &ID) const = 0;
+
   /// NodeEquals - Instantiations of the FoldingSet template implement
   /// this function to compare the given node with the given ID.
   virtual bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash,
                           FoldingSetNodeID &TempID) const=0;
+
   /// ComputeNodeHash - Instantiations of the FoldingSet template implement
   /// this function to compute a hash value for the given node.
   virtual unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const = 0;
@@ -215,8 +224,6 @@ protected:
 
 //===----------------------------------------------------------------------===//
 
-template<typename T> struct FoldingSetTrait;
-
 /// DefaultFoldingSetTrait - This class provides default implementations
 /// for FoldingSetTrait implementations.
 ///
@@ -252,8 +259,6 @@ template<typename T> struct DefaultFoldingSetTrait {
 template<typename T> struct FoldingSetTrait
   : public DefaultFoldingSetTrait<T> {};
 
-template<typename T, typename Ctx> struct ContextualFoldingSetTrait;
-
 /// DefaultContextualFoldingSetTrait - Like DefaultFoldingSetTrait, but
 /// for ContextualFoldingSets.
 template<typename T, typename Ctx>
@@ -261,6 +266,7 @@ struct DefaultContextualFoldingSetTrait {
   static void Profile(T &X, FoldingSetNodeID &ID, Ctx Context) {
     X.Profile(ID, Context);
   }
+
   static inline bool Equals(T &X, const FoldingSetNodeID &ID, unsigned IDHash,
                             FoldingSetNodeID &TempID, Ctx Context);
   static inline unsigned ComputeHash(T &X, FoldingSetNodeID &TempID,
@@ -279,11 +285,11 @@ template<typename T, typename Ctx> struct ContextualFoldingSetTrait
 /// is often much larger than necessary, and the possibility of heap
 /// allocation means it requires a non-trivial destructor call.
 class FoldingSetNodeIDRef {
-  const unsigned *Data;
-  size_t Size;
+  const unsigned *Data = nullptr;
+  size_t Size = 0;
 
 public:
-  FoldingSetNodeIDRef() : Data(nullptr), Size(0) {}
+  FoldingSetNodeIDRef() = default;
   FoldingSetNodeIDRef(const unsigned *D, size_t S) : Data(D), Size(S) {}
 
   /// ComputeHash - Compute a strong hash value for this FoldingSetNodeIDRef,
@@ -313,7 +319,7 @@ class FoldingSetNodeID {
   SmallVector<unsigned, 32> Bits;
 
 public:
-  FoldingSetNodeID() {}
+  FoldingSetNodeID() = default;
 
   FoldingSetNodeID(FoldingSetNodeIDRef Ref)
     : Bits(Ref.getData(), Ref.getData() + Ref.getSize()) {}
@@ -418,6 +424,7 @@ private:
     T *TN = static_cast<T *>(N);
     FoldingSetTrait<T>::Profile(*TN, ID);
   }
+
   /// NodeEquals - Instantiations may optionally provide a way to compare a
   /// node with a specified ID.
   bool NodeEquals(Node *N, const FoldingSetNodeID &ID, unsigned IDHash,
@@ -425,6 +432,7 @@ private:
     T *TN = static_cast<T *>(N);
     return FoldingSetTrait<T>::Equals(*TN, ID, IDHash, TempID);
   }
+
   /// ComputeNodeHash - Instantiations may optionally provide a way to compute a
   /// hash value directly from a node.
   unsigned ComputeNodeHash(Node *N, FoldingSetNodeID &TempID) const override {
@@ -483,7 +491,7 @@ public:
 ///
 /// T must be a subclass of FoldingSetNode and implement a Profile
 /// function with signature
-///   void Profile(llvm::FoldingSetNodeID &, Ctx);
+///   void Profile(FoldingSetNodeID &, Ctx);
 template <class T, class Ctx>
 class ContextualFoldingSet final : public FoldingSetImpl {
   // Unfortunately, this can't derive from FoldingSet<T> because the
@@ -501,12 +509,14 @@ private:
     T *TN = static_cast<T *>(N);
     ContextualFoldingSetTrait<T, Ctx>::Profile(*TN, ID, Context);
   }
+
   bool NodeEquals(FoldingSetImpl::Node *N, const FoldingSetNodeID &ID,
                   unsigned IDHash, FoldingSetNodeID &TempID) const override {
     T *TN = static_cast<T *>(N);
     return ContextualFoldingSetTrait<T, Ctx>::Equals(*TN, ID, IDHash, TempID,
                                                      Context);
   }
+
   unsigned ComputeNodeHash(FoldingSetImpl::Node *N,
                            FoldingSetNodeID &TempID) const override {
     T *TN = static_cast<T *>(N);
@@ -558,7 +568,7 @@ public:
 /// to provide the interface of FoldingSet but with deterministic iteration
 /// order based on the insertion order. T must be a subclass of FoldingSetNode
 /// and implement a Profile function.
-template <class T, class VectorT = SmallVector<T*, 8> >
+template <class T, class VectorT = SmallVector<T*, 8>>
 class FoldingSetVector {
   FoldingSet<T> Set;
   VectorT Vector;
@@ -623,7 +633,9 @@ public:
 class FoldingSetIteratorImpl {
 protected:
   FoldingSetNode *NodePtr;
+
   FoldingSetIteratorImpl(void **Bucket);
+
   void advance();
 
 public:
@@ -754,11 +766,12 @@ template<typename T> struct FoldingSetTrait<T*> {
 template <typename T1, typename T2>
 struct FoldingSetTrait<std::pair<T1, T2>> {
   static inline void Profile(const std::pair<T1, T2> &P,
-                             llvm::FoldingSetNodeID &ID) {
+                             FoldingSetNodeID &ID) {
     ID.Add(P.first);
     ID.Add(P.second);
   }
 };
-} // End of namespace llvm.
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_ADT_FOLDINGSET_H
diff --git a/contrib/llvm/include/llvm/ADT/GraphTraits.h b/contrib/llvm/include/llvm/ADT/GraphTraits.h
index eb67b7c83659..29bbcb010eee 100644
--- a/contrib/llvm/include/llvm/ADT/GraphTraits.h
+++ b/contrib/llvm/include/llvm/ADT/GraphTraits.h
@@ -27,14 +27,10 @@ template<class GraphType>
 struct GraphTraits {
   // Elements to provide:
 
-  // NOTICE: We are in a transition from migration interfaces that require
-  // NodeType *, to NodeRef. NodeRef is required to be cheap to copy, but does
-  // not have to be a raw pointer. In the transition, user should define
-  // NodeType, and NodeRef = NodeType *.
-  //
-  // typedef NodeType          - Type of Node in the graph
-  // typedef NodeRef           - NodeType *
-  // typedef ChildIteratorType - Type used to iterate over children in graph
+  // typedef NodeRef           - Type of Node token in the graph, which should
+  //                             be cheap to copy.
+  // typedef ChildIteratorType - Type used to iterate over children in graph,
+  //                             dereference to a NodeRef.
 
   // static NodeRef getEntryNode(const GraphType &)
   //    Return the entry node of the graph
@@ -45,7 +41,7 @@ struct GraphTraits {
   //    node list for the specified node.
   //
 
-  // typedef  ...iterator nodes_iterator;
+  // typedef  ...iterator nodes_iterator; - dereference to a NodeRef
   // static nodes_iterator nodes_begin(GraphType *G)
   // static nodes_iterator nodes_end  (GraphType *G)
   //    nodes_iterator/begin/end - Allow iteration over all nodes in the graph
@@ -88,23 +84,7 @@ struct Inverse {
 
 // Provide a partial specialization of GraphTraits so that the inverse of an
 // inverse falls back to the original graph.
-template<class T>
-struct GraphTraits<Inverse<Inverse<T> > > {
-  typedef typename GraphTraits<T>::NodeType NodeType;
-  typedef typename GraphTraits<T>::ChildIteratorType ChildIteratorType;
-
-  static NodeType *getEntryNode(Inverse<Inverse<T> > *G) {
-    return GraphTraits<T>::getEntryNode(G->Graph.Graph);
-  }
-
-  static ChildIteratorType child_begin(NodeType* N) {
-    return GraphTraits<T>::child_begin(N);
-  }
-
-  static ChildIteratorType child_end(NodeType* N) {
-    return GraphTraits<T>::child_end(N);
-  }
-};
+template <class T> struct GraphTraits<Inverse<Inverse<T>>> : GraphTraits<T> {};
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableList.h b/contrib/llvm/include/llvm/ADT/ImmutableList.h
index a1d26bd97045..e5f51bafe995 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableList.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableList.h
@@ -16,8 +16,9 @@
 
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DataTypes.h"
 #include <cassert>
+#include <cstdint>
+#include <new>
 
 namespace llvm {
 
@@ -25,18 +26,18 @@ template <typename T> class ImmutableListFactory;
 
 template <typename T>
 class ImmutableListImpl : public FoldingSetNode {
+  friend class ImmutableListFactory<T>;
+
   T Head;
   const ImmutableListImpl* Tail;
 
   ImmutableListImpl(const T& head, const ImmutableListImpl* tail = nullptr)
     : Head(head), Tail(tail) {}
 
-  friend class ImmutableListFactory<T>;
-
-  void operator=(const ImmutableListImpl&) = delete;
-  ImmutableListImpl(const ImmutableListImpl&) = delete;
-
 public:
+  ImmutableListImpl(const ImmutableListImpl &) = delete;
+  ImmutableListImpl &operator=(const ImmutableListImpl &) = delete;
+
   const T& getHead() const { return Head; }
   const ImmutableListImpl* getTail() const { return Tail; }
 
@@ -79,15 +80,17 @@ public:
   }
 
   class iterator {
-    const ImmutableListImpl<T>* L;
+    const ImmutableListImpl<T>* L = nullptr;
+
   public:
-    iterator() : L(nullptr) {}
+    iterator() = default;
     iterator(ImmutableList l) : L(l.getInternalPointer()) {}
 
     iterator& operator++() { L = L->getTail(); return *this; }
     bool operator==(const iterator& I) const { return L == I.L; }
     bool operator!=(const iterator& I) const { return L != I.L; }
     const value_type& operator*() const { return L->getHead(); }
+
     ImmutableList getList() const { return L; }
   };
 
@@ -121,7 +124,7 @@ public:
 
   /// getHead - Returns the head of the list.
   const T& getHead() {
-    assert (!isEmpty() && "Cannot get the head of an empty list.");
+    assert(!isEmpty() && "Cannot get the head of an empty list.");
     return X->getHead();
   }
 
@@ -145,7 +148,7 @@ class ImmutableListFactory {
   uintptr_t Allocator;
 
   bool ownsAllocator() const {
-    return Allocator & 0x1 ? false : true;
+    return (Allocator & 0x1) == 0;
   }
 
   BumpPtrAllocator& getAllocator() const {
@@ -203,18 +206,21 @@ public:
 //===----------------------------------------------------------------------===//
 
 template<typename T> struct DenseMapInfo;
-template<typename T> struct DenseMapInfo<ImmutableList<T> > {
+template<typename T> struct DenseMapInfo<ImmutableList<T>> {
   static inline ImmutableList<T> getEmptyKey() {
     return reinterpret_cast<ImmutableListImpl<T>*>(-1);
   }
+
   static inline ImmutableList<T> getTombstoneKey() {
     return reinterpret_cast<ImmutableListImpl<T>*>(-2);
   }
+
   static unsigned getHashValue(ImmutableList<T> X) {
     uintptr_t PtrVal = reinterpret_cast<uintptr_t>(X.getInternalPointer());
     return (unsigned((uintptr_t)PtrVal) >> 4) ^
            (unsigned((uintptr_t)PtrVal) >> 9);
   }
+
   static bool isEqual(ImmutableList<T> X1, ImmutableList<T> X2) {
     return X1 == X2;
   }
@@ -222,8 +228,8 @@ template<typename T> struct DenseMapInfo<ImmutableList<T> > {
 
 template <typename T> struct isPodLike;
 template <typename T>
-struct isPodLike<ImmutableList<T> > { static const bool value = true; };
+struct isPodLike<ImmutableList<T>> { static const bool value = true; };
 
-} // end llvm namespace
+} // end namespace llvm
 
 #endif // LLVM_ADT_IMMUTABLELIST_H
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableMap.h b/contrib/llvm/include/llvm/ADT/ImmutableMap.h
index 7480cd73da61..f197d407ba3b 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableMap.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableMap.h
@@ -14,7 +14,10 @@
 #ifndef LLVM_ADT_IMMUTABLEMAP_H
 #define LLVM_ADT_IMMUTABLEMAP_H
 
+#include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/ImmutableSet.h"
+#include "llvm/Support/Allocator.h"
+#include <utility>
 
 namespace llvm {
 
@@ -56,7 +59,7 @@ struct ImutKeyValueInfo {
 };
 
 template <typename KeyT, typename ValT,
-          typename ValInfo = ImutKeyValueInfo<KeyT,ValT> >
+          typename ValInfo = ImutKeyValueInfo<KeyT,ValT>>
 class ImmutableMap {
 public:
   typedef typename ValInfo::value_type      value_type;
@@ -106,6 +109,9 @@ public:
     Factory(BumpPtrAllocator &Alloc, bool canonicalize = true)
         : F(Alloc), Canonicalize(canonicalize) {}
 
+    Factory(const Factory &) = delete;
+    Factory &operator=(const Factory &) = delete;
+
     ImmutableMap getEmptyMap() { return ImmutableMap(F.getEmptyTree()); }
 
     ImmutableMap add(ImmutableMap Old, key_type_ref K, data_type_ref D) {
@@ -121,10 +127,6 @@ public:
     typename TreeTy::Factory *getTreeFactory() const {
       return const_cast<typename TreeTy::Factory *>(&F);
     }
-
-  private:
-    Factory(const Factory& RHS) = delete;
-    void operator=(const Factory& RHS) = delete;
   };
 
   bool contains(key_type_ref K) const {
@@ -203,9 +205,10 @@ public:
   //===--------------------------------------------------===//
 
   class iterator : public ImutAVLValueIterator<ImmutableMap> {
+    friend class ImmutableMap;
+
     iterator() = default;
     explicit iterator(TreeTy *Tree) : iterator::ImutAVLValueIterator(Tree) {}
-    friend class ImmutableMap;
 
   public:
     key_type_ref getKey() const { return (*this)->first; }
@@ -248,7 +251,7 @@ public:
 
 // NOTE: This will possibly become the new implementation of ImmutableMap some day.
 template <typename KeyT, typename ValT,
-typename ValInfo = ImutKeyValueInfo<KeyT,ValT> >
+typename ValInfo = ImutKeyValueInfo<KeyT,ValT>>
 class ImmutableMapRef {
 public:
   typedef typename ValInfo::value_type      value_type;
@@ -362,9 +365,10 @@ public:
   //===--------------------------------------------------===//
 
   class iterator : public ImutAVLValueIterator<ImmutableMapRef> {
+    friend class ImmutableMapRef;
+
     iterator() = default;
     explicit iterator(TreeTy *Tree) : iterator::ImutAVLValueIterator(Tree) {}
-    friend class ImmutableMapRef;
 
   public:
     key_type_ref getKey() const { return (*this)->first; }
diff --git a/contrib/llvm/include/llvm/ADT/ImmutableSet.h b/contrib/llvm/include/llvm/ADT/ImmutableSet.h
index 87026f019fec..0724a28306a0 100644
--- a/contrib/llvm/include/llvm/ADT/ImmutableSet.h
+++ b/contrib/llvm/include/llvm/ADT/ImmutableSet.h
@@ -16,12 +16,16 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <functional>
 #include <vector>
+#include <cstdint>
+#include <iterator>
+#include <new>
 
 namespace llvm {
 
@@ -329,11 +333,13 @@ private:
 
 public:
   void retain() { ++refCount; }
+
   void release() {
     assert(refCount > 0);
     if (--refCount == 0)
       destroy();
   }
+
   void destroy() {
     if (left)
       left->release();
@@ -375,7 +381,7 @@ class ImutAVLFactory {
   std::vector<TreeTy*> freeNodes;
 
   bool ownsAllocator() const {
-    return Allocator & 0x1 ? false : true;
+    return (Allocator & 0x1) == 0;
   }
 
   BumpPtrAllocator& getAllocator() const {
@@ -414,7 +420,6 @@ public:
   TreeTy* getEmptyTree() const { return nullptr; }
 
 protected:
-
   //===--------------------------------------------------===//
   // A bunch of quick helper functions used for reasoning
   // about the properties of trees and their children.
@@ -649,13 +654,14 @@ class ImutAVLTreeGenericIterator
     : public std::iterator<std::bidirectional_iterator_tag,
                            ImutAVLTree<ImutInfo>> {
   SmallVector<uintptr_t,20> stack;
+
 public:
   enum VisitFlag { VisitedNone=0x0, VisitedLeft=0x1, VisitedRight=0x3,
                    Flags=0x3 };
 
   typedef ImutAVLTree<ImutInfo> TreeTy;
 
-  ImutAVLTreeGenericIterator() {}
+  ImutAVLTreeGenericIterator() = default;
   ImutAVLTreeGenericIterator(const TreeTy *Root) {
     if (Root) stack.push_back(reinterpret_cast<uintptr_t>(Root));
   }
@@ -671,7 +677,6 @@ public:
     return stack.back() & Flags;
   }
 
-
   bool atEnd() const { return stack.empty(); }
 
   bool atBeginning() const {
@@ -881,7 +886,6 @@ struct ImutProfileInfo<bool> {
   }
 };
 
-
 /// Generic profile trait for pointer types.  We treat pointers as
 /// references to unique objects.
 template <typename T>
@@ -901,7 +905,6 @@ struct ImutProfileInfo<T*> {
 //  for element profiling.
 //===----------------------------------------------------------------------===//
 
-
 /// ImutContainerInfo - Generic definition of comparison operations for
 ///   elements of immutable containers that defaults to using
 ///   std::equal_to<> and std::less<> to perform comparison of elements.
@@ -954,7 +957,7 @@ struct ImutContainerInfo<T*> : public ImutProfileInfo<T*> {
 // Immutable Set
 //===----------------------------------------------------------------------===//
 
-template <typename ValT, typename ValInfo = ImutContainerInfo<ValT> >
+template <typename ValT, typename ValInfo = ImutContainerInfo<ValT>>
 class ImmutableSet {
 public:
   typedef typename ValInfo::value_type      value_type;
@@ -972,9 +975,11 @@ public:
   explicit ImmutableSet(TreeTy* R) : Root(R) {
     if (Root) { Root->retain(); }
   }
+
   ImmutableSet(const ImmutableSet &X) : Root(X.Root) {
     if (Root) { Root->retain(); }
   }
+
   ImmutableSet &operator=(const ImmutableSet &X) {
     if (Root != X.Root) {
       if (X.Root) { X.Root->retain(); }
@@ -983,6 +988,7 @@ public:
     }
     return *this;
   }
+
   ~ImmutableSet() {
     if (Root) { Root->release(); }
   }
@@ -998,6 +1004,9 @@ public:
     Factory(BumpPtrAllocator& Alloc, bool canonicalize = true)
       : F(Alloc), Canonicalize(canonicalize) {}
 
+    Factory(const Factory& RHS) = delete;
+    void operator=(const Factory& RHS) = delete;
+
     /// getEmptySet - Returns an immutable set that contains no elements.
     ImmutableSet getEmptySet() {
       return ImmutableSet(F.getEmptyTree());
@@ -1032,10 +1041,6 @@ public:
     typename TreeTy::Factory *getTreeFactory() const {
       return const_cast<typename TreeTy::Factory *>(&F);
     }
-
-  private:
-    Factory(const Factory& RHS) = delete;
-    void operator=(const Factory& RHS) = delete;
   };
 
   friend class Factory;
@@ -1104,7 +1109,7 @@ public:
 };
 
 // NOTE: This may some day replace the current ImmutableSet.
-template <typename ValT, typename ValInfo = ImutContainerInfo<ValT> >
+template <typename ValT, typename ValInfo = ImutContainerInfo<ValT>>
 class ImmutableSetRef {
 public:
   typedef typename ValInfo::value_type      value_type;
@@ -1126,11 +1131,13 @@ public:
       Factory(F) {
     if (Root) { Root->retain(); }
   }
+
   ImmutableSetRef(const ImmutableSetRef &X)
     : Root(X.Root),
       Factory(X.Factory) {
     if (Root) { Root->retain(); }
   }
+
   ImmutableSetRef &operator=(const ImmutableSetRef &X) {
     if (Root != X.Root) {
       if (X.Root) { X.Root->retain(); }
@@ -1215,4 +1222,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_IMMUTABLESET_H
diff --git a/contrib/llvm/include/llvm/ADT/IntervalMap.h b/contrib/llvm/include/llvm/ADT/IntervalMap.h
index f8843b2a4e50..430b9671bd1d 100644
--- a/contrib/llvm/include/llvm/ADT/IntervalMap.h
+++ b/contrib/llvm/include/llvm/ADT/IntervalMap.h
@@ -104,11 +104,14 @@
 #include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/RecyclingAllocator.h"
+#include <algorithm>
+#include <cassert>
 #include <iterator>
+#include <new>
+#include <utility>
 
 namespace llvm {
 
-
 //===----------------------------------------------------------------------===//
 //---                              Key traits                              ---//
 //===----------------------------------------------------------------------===//
@@ -131,7 +134,6 @@ namespace llvm {
 
 template <typename T>
 struct IntervalMapInfo {
-
   /// startLess - Return true if x is not in [a;b].
   /// This is x < a both for closed intervals and for [a;b) half-open intervals.
   static inline bool startLess(const T &x, const T &a) {
@@ -150,11 +152,15 @@ struct IntervalMapInfo {
     return a+1 == b;
   }
 
+  /// nonEmpty - Return true if [a;b] is non-empty.
+  /// This is a <= b for a closed interval, a < b for [a;b) half-open intervals.
+  static inline bool nonEmpty(const T &a, const T &b) {
+    return a <= b;
+  }
 };
 
 template <typename T>
 struct IntervalMapHalfOpenInfo {
-
   /// startLess - Return true if x is not in [a;b).
   static inline bool startLess(const T &x, const T &a) {
     return x < a;
@@ -170,19 +176,18 @@ struct IntervalMapHalfOpenInfo {
     return a == b;
   }
 
+  /// nonEmpty - Return true if [a;b) is non-empty.
+  static inline bool nonEmpty(const T &a, const T &b) {
+    return a < b;
+  }
 };
 
 /// IntervalMapImpl - Namespace used for IntervalMap implementation details.
 /// It should be considered private to the implementation.
 namespace IntervalMapImpl {
 
-// Forward declarations.
-template <typename, typename, unsigned, typename> class LeafNode;
-template <typename, typename, unsigned, typename> class BranchNode;
-
 typedef std::pair<unsigned,unsigned> IdxPair;
 
-
 //===----------------------------------------------------------------------===//
 //---                    IntervalMapImpl::NodeBase                         ---//
 //===----------------------------------------------------------------------===//
@@ -406,7 +411,6 @@ IdxPair distribute(unsigned Nodes, unsigned Elements, unsigned Capacity,
                    const unsigned *CurSize, unsigned NewSize[],
                    unsigned Position, bool Grow);
 
-
 //===----------------------------------------------------------------------===//
 //---                   IntervalMapImpl::NodeSizer                         ---//
 //===----------------------------------------------------------------------===//
@@ -459,10 +463,8 @@ struct NodeSizer {
   /// different kinds of maps.
   typedef RecyclingAllocator<BumpPtrAllocator, char,
                              AllocBytes, CacheLineBytes> Allocator;
-
 };
 
-
 //===----------------------------------------------------------------------===//
 //---                     IntervalMapImpl::NodeRef                         ---//
 //===----------------------------------------------------------------------===//
@@ -494,7 +496,7 @@ class NodeRef {
 
 public:
   /// NodeRef - Create a null ref.
-  NodeRef() {}
+  NodeRef() = default;
 
   /// operator bool - Detect a null ref.
   explicit operator bool() const { return pip.getOpaqueValue(); }
@@ -674,7 +676,6 @@ insertFrom(unsigned &Pos, unsigned Size, KeyT a, KeyT b, ValT y) {
   return Size + 1;
 }
 
-
 //===----------------------------------------------------------------------===//
 //---                   IntervalMapImpl::BranchNode                        ---//
 //===----------------------------------------------------------------------===//
@@ -919,8 +920,7 @@ public:
   }
 };
 
-} // namespace IntervalMapImpl
-
+} // end namespace IntervalMapImpl
 
 //===----------------------------------------------------------------------===//
 //---                          IntervalMap                                ----//
@@ -928,7 +928,7 @@ public:
 
 template <typename KeyT, typename ValT,
           unsigned N = IntervalMapImpl::NodeSizer<KeyT, ValT>::LeafSize,
-          typename Traits = IntervalMapInfo<KeyT> >
+          typename Traits = IntervalMapInfo<KeyT>>
 class IntervalMap {
   typedef IntervalMapImpl::NodeSizer<KeyT, ValT> Sizer;
   typedef IntervalMapImpl::LeafNode<KeyT, ValT, Sizer::LeafSize, Traits> Leaf;
@@ -995,6 +995,7 @@ private:
     assert(!branched() && "Cannot acces leaf data in branched root");
     return dataAs<RootLeaf>();
   }
+
   RootBranchData &rootBranchData() const {
     assert(branched() && "Cannot access branch data in non-branched root");
     return dataAs<RootBranchData>();
@@ -1003,6 +1004,7 @@ private:
     assert(branched() && "Cannot access branch data in non-branched root");
     return dataAs<RootBranchData>();
   }
+
   const RootBranch &rootBranch() const { return rootBranchData().node; }
   RootBranch &rootBranch()             { return rootBranchData().node; }
   KeyT rootBranchStart() const { return rootBranchData().start; }
@@ -1041,7 +1043,7 @@ private:
 
 public:
   explicit IntervalMap(Allocator &a) : height(0), rootSize(0), allocator(a) {
-    assert((uintptr_t(data.buffer) & (alignOf<RootLeaf>() - 1)) == 0 &&
+    assert((uintptr_t(data.buffer) & (alignof(RootLeaf) - 1)) == 0 &&
            "Insufficient alignment");
     new(&rootLeaf()) RootLeaf();
   }
@@ -1149,7 +1151,6 @@ treeSafeLookup(KeyT x, ValT NotFound) const {
   return NR.get<Leaf>().safeLookup(x, NotFound);
 }
 
-
 // branchRoot - Switch from a leaf root to a branched root.
 // Return the new (root offset, node offset) corresponding to Position.
 template <typename KeyT, typename ValT, unsigned N, typename Traits>
@@ -1284,6 +1285,7 @@ clear() {
 template <typename KeyT, typename ValT, unsigned N, typename Traits>
 class IntervalMap<KeyT, ValT, N, Traits>::const_iterator :
   public std::iterator<std::bidirectional_iterator_tag, ValT> {
+
 protected:
   friend class IntervalMap;
 
@@ -1436,7 +1438,6 @@ public:
       path.leafOffset() =
         map->rootLeaf().findFrom(path.leafOffset(), map->rootSize, x);
   }
-
 };
 
 /// pathFillFind - Complete path by searching for x.
@@ -1523,7 +1524,7 @@ class IntervalMap<KeyT, ValT, N, Traits>::iterator : public const_iterator {
 
 public:
   /// iterator - Create null iterator.
-  iterator() {}
+  iterator() = default;
 
   /// setStart - Move the start of the current interval.
   /// This may cause coalescing with the previous interval.
@@ -1589,7 +1590,6 @@ public:
     operator--();
     return tmp;
   }
-
 };
 
 /// canCoalesceLeft - Can the current interval coalesce to the left after
@@ -1669,7 +1669,7 @@ iterator::setNodeStop(unsigned Level, KeyT Stop) {
 template <typename KeyT, typename ValT, unsigned N, typename Traits>
 void IntervalMap<KeyT, ValT, N, Traits>::
 iterator::setStart(KeyT a) {
-  assert(Traits::stopLess(a, this->stop()) && "Cannot move start beyond stop");
+  assert(Traits::nonEmpty(a, this->stop()) && "Cannot move start beyond stop");
   KeyT &CurStart = this->unsafeStart();
   if (!Traits::startLess(a, CurStart) || !canCoalesceLeft(a, this->value())) {
     CurStart = a;
@@ -1685,7 +1685,7 @@ iterator::setStart(KeyT a) {
 template <typename KeyT, typename ValT, unsigned N, typename Traits>
 void IntervalMap<KeyT, ValT, N, Traits>::
 iterator::setStop(KeyT b) {
-  assert(Traits::stopLess(this->start(), b) && "Cannot move stop beyond start");
+  assert(Traits::nonEmpty(this->start(), b) && "Cannot move stop beyond start");
   if (Traits::startLess(b, this->stop()) ||
       !canCoalesceRight(b, this->value())) {
     setStopUnchecked(b);
@@ -1790,7 +1790,6 @@ iterator::insert(KeyT a, KeyT b, ValT y) {
   treeInsert(a, b, y);
 }
 
-
 template <typename KeyT, typename ValT, unsigned N, typename Traits>
 void IntervalMap<KeyT, ValT, N, Traits>::
 iterator::treeInsert(KeyT a, KeyT b, ValT y) {
@@ -2151,6 +2150,6 @@ public:
   }
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_INTERVALMAP_H
diff --git a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
index 8057ec10be00..559fb40773aa 100644
--- a/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
+++ b/contrib/llvm/include/llvm/ADT/IntrusiveRefCntPtr.h
@@ -7,14 +7,49 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines IntrusiveRefCntPtr, a template class that
-// implements a "smart" pointer for objects that maintain their own
-// internal reference count, and RefCountedBase/RefCountedBaseVPTR, two
-// generic base classes for objects that wish to have their lifetimes
-// managed using reference counting.
+// This file defines the RefCountedBase, ThreadSafeRefCountedBase, and
+// IntrusiveRefCntPtr classes.
 //
-// IntrusiveRefCntPtr is similar to Boost's intrusive_ptr with added
-// LLVM-style casting.
+// IntrusiveRefCntPtr is a smart pointer to an object which maintains a
+// reference count.  (ThreadSafe)RefCountedBase is a mixin class that adds a
+// refcount member variable and methods for updating the refcount.  An object
+// that inherits from (ThreadSafe)RefCountedBase deletes itself when its
+// refcount hits zero.
+//
+// For example:
+//
+//   class MyClass : public RefCountedBase<MyClass> {};
+//
+//   void foo() {
+//     // Objects that inherit from RefCountedBase should always be instantiated
+//     // on the heap, never on the stack.
+//     IntrusiveRefCntPtr<MyClass> Ptr1(new MyClass());
+//
+//     // Copying an IntrusiveRefCntPtr increases the pointee's refcount by 1.
+//     IntrusiveRefCntPtr<MyClass> Ptr2(Ptr1);
+//
+//     // Constructing an IntrusiveRefCntPtr has no effect on the object's
+//     // refcount.  After a move, the moved-from pointer is null.
+//     IntrusiveRefCntPtr<MyClass> Ptr3(std::move(Ptr1));
+//     assert(Ptr1 == nullptr);
+//
+//     // Clearing an IntrusiveRefCntPtr decreases the pointee's refcount by 1.
+//     Ptr2.reset();
+//
+//     // The object deletes itself when we return from the function, because
+//     // Ptr3's destructor decrements its refcount to 0.
+//   }
+//
+// You can use IntrusiveRefCntPtr with isa<T>(), dyn_cast<T>(), etc.:
+//
+//   IntrusiveRefCntPtr<MyClass> Ptr(new MyClass());
+//   OtherClass *Other = dyn_cast<OtherClass>(Ptr);  // Ptr.get() not required
+//
+// IntrusiveRefCntPtr works with any class that
+//
+//  - inherits from (ThreadSafe)RefCountedBase,
+//  - has Retain() and Release() methods, or
+//  - specializes IntrusiveRefCntPtrInfo.
 //
 //===----------------------------------------------------------------------===//
 
@@ -27,261 +62,207 @@
 
 namespace llvm {
 
-  template <class T>
-  class IntrusiveRefCntPtr;
-
-//===----------------------------------------------------------------------===//
-/// RefCountedBase - A generic base class for objects that wish to
-///  have their lifetimes managed using reference counts. Classes
-///  subclass RefCountedBase to obtain such functionality, and are
-///  typically handled with IntrusiveRefCntPtr "smart pointers" (see below)
-///  which automatically handle the management of reference counts.
-///  Objects that subclass RefCountedBase should not be allocated on
-///  the stack, as invoking "delete" (which is called when the
-///  reference count hits 0) on such objects is an error.
-//===----------------------------------------------------------------------===//
-  template <class Derived>
-  class RefCountedBase {
-    mutable unsigned ref_cnt;
-
-  public:
-    RefCountedBase() : ref_cnt(0) {}
-    RefCountedBase(const RefCountedBase &) : ref_cnt(0) {}
-
-    void Retain() const { ++ref_cnt; }
-    void Release() const {
-      assert (ref_cnt > 0 && "Reference count is already zero.");
-      if (--ref_cnt == 0) delete static_cast<const Derived*>(this);
-    }
-  };
-
-//===----------------------------------------------------------------------===//
-/// RefCountedBaseVPTR - A class that has the same function as
-///  RefCountedBase, but with a virtual destructor. Should be used
-///  instead of RefCountedBase for classes that already have virtual
-///  methods to enforce dynamic allocation via 'new'. Classes that
-///  inherit from RefCountedBaseVPTR can't be allocated on stack -
-///  attempting to do this will produce a compile error.
-//===----------------------------------------------------------------------===//
-  class RefCountedBaseVPTR {
-    mutable unsigned ref_cnt;
-    virtual void anchor();
-
-  protected:
-    RefCountedBaseVPTR() : ref_cnt(0) {}
-    RefCountedBaseVPTR(const RefCountedBaseVPTR &) : ref_cnt(0) {}
-
-    virtual ~RefCountedBaseVPTR() {}
-
-    void Retain() const { ++ref_cnt; }
-    void Release() const {
-      assert (ref_cnt > 0 && "Reference count is already zero.");
-      if (--ref_cnt == 0) delete this;
-    }
-
-    template <typename T>
-    friend struct IntrusiveRefCntPtrInfo;
-  };
+/// A CRTP mixin class that adds reference counting to a type.
+///
+/// The lifetime of an object which inherits from RefCountedBase is managed by
+/// calls to Release() and Retain(), which increment and decrement the object's
+/// refcount, respectively.  When a Release() call decrements the refcount to 0,
+/// the object deletes itself.
+///
+/// Objects that inherit from RefCountedBase should always be allocated with
+/// operator new.
+template <class Derived> class RefCountedBase {
+  mutable unsigned RefCount = 0;
 
+public:
+  RefCountedBase() = default;
+  RefCountedBase(const RefCountedBase &) : RefCount(0) {}
 
-  template <typename T> struct IntrusiveRefCntPtrInfo {
-    static void retain(T *obj) { obj->Retain(); }
-    static void release(T *obj) { obj->Release(); }
-  };
+  void Retain() const { ++RefCount; }
+  void Release() const {
+    assert(RefCount > 0 && "Reference count is already zero.");
+    if (--RefCount == 0)
+      delete static_cast<const Derived *>(this);
+  }
+};
 
-/// \brief A thread-safe version of \c llvm::RefCountedBase.
-///
-/// A generic base class for objects that wish to have their lifetimes managed
-/// using reference counts. Classes subclass \c ThreadSafeRefCountedBase to
-/// obtain such functionality, and are typically handled with
-/// \c IntrusiveRefCntPtr "smart pointers" which automatically handle the
-/// management of reference counts.
-template <class Derived>
-class ThreadSafeRefCountedBase {
+/// A thread-safe version of \c RefCountedBase.
+template <class Derived> class ThreadSafeRefCountedBase {
   mutable std::atomic<int> RefCount;
 
 protected:
   ThreadSafeRefCountedBase() : RefCount(0) {}
 
 public:
-  void Retain() const { ++RefCount; }
+  void Retain() const { RefCount.fetch_add(1, std::memory_order_relaxed); }
 
   void Release() const {
-    int NewRefCount = --RefCount;
+    int NewRefCount = RefCount.fetch_sub(1, std::memory_order_acq_rel) - 1;
     assert(NewRefCount >= 0 && "Reference count was already zero.");
     if (NewRefCount == 0)
-      delete static_cast<const Derived*>(this);
+      delete static_cast<const Derived *>(this);
   }
 };
 
-//===----------------------------------------------------------------------===//
-/// IntrusiveRefCntPtr - A template class that implements a "smart pointer"
-///  that assumes the wrapped object has a reference count associated
-///  with it that can be managed via calls to
-///  IntrusivePtrAddRef/IntrusivePtrRelease.  The smart pointers
-///  manage reference counts via the RAII idiom: upon creation of
-///  smart pointer the reference count of the wrapped object is
-///  incremented and upon destruction of the smart pointer the
-///  reference count is decremented.  This class also safely handles
-///  wrapping NULL pointers.
+/// Class you can specialize to provide custom retain/release functionality for
+/// a type.
 ///
-/// Reference counting is implemented via calls to
-///  Obj->Retain()/Obj->Release(). Release() is required to destroy
-///  the object when the reference count reaches zero. Inheriting from
-///  RefCountedBase/RefCountedBaseVPTR takes care of this
-///  automatically.
-//===----------------------------------------------------------------------===//
-  template <typename T>
-  class IntrusiveRefCntPtr {
-    T* Obj;
-
-  public:
-    typedef T element_type;
-
-    explicit IntrusiveRefCntPtr() : Obj(nullptr) {}
-
-    IntrusiveRefCntPtr(T* obj) : Obj(obj) {
-      retain();
-    }
-
-    IntrusiveRefCntPtr(const IntrusiveRefCntPtr& S) : Obj(S.Obj) {
-      retain();
-    }
-
-    IntrusiveRefCntPtr(IntrusiveRefCntPtr&& S) : Obj(S.Obj) {
-      S.Obj = nullptr;
-    }
-
-    template <class X>
-    IntrusiveRefCntPtr(IntrusiveRefCntPtr<X>&& S) : Obj(S.get()) {
-      S.Obj = nullptr;
-    }
-
-    template <class X>
-    IntrusiveRefCntPtr(const IntrusiveRefCntPtr<X>& S)
-      : Obj(S.get()) {
-      retain();
-    }
-
-    IntrusiveRefCntPtr& operator=(IntrusiveRefCntPtr S) {
-      swap(S);
-      return *this;
-    }
-
-    ~IntrusiveRefCntPtr() { release(); }
-
-    T& operator*() const { return *Obj; }
-
-    T* operator->() const { return Obj; }
-
-    T* get() const { return Obj; }
-
-    explicit operator bool() const { return Obj; }
-
-    void swap(IntrusiveRefCntPtr& other) {
-      T* tmp = other.Obj;
-      other.Obj = Obj;
-      Obj = tmp;
-    }
-
-    void reset() {
-      release();
-      Obj = nullptr;
-    }
+/// Usually specializing this class is not necessary, as IntrusiveRefCntPtr
+/// works with any type which defines Retain() and Release() functions -- you
+/// can define those functions yourself if RefCountedBase doesn't work for you.
+///
+/// One case when you might want to specialize this type is if you have
+///  - Foo.h defines type Foo and includes Bar.h, and
+///  - Bar.h uses IntrusiveRefCntPtr<Foo> in inline functions.
+///
+/// Because Foo.h includes Bar.h, Bar.h can't include Foo.h in order to pull in
+/// the declaration of Foo.  Without the declaration of Foo, normally Bar.h
+/// wouldn't be able to use IntrusiveRefCntPtr<Foo>, which wants to call
+/// T::Retain and T::Release.
+///
+/// To resolve this, Bar.h could include a third header, FooFwd.h, which
+/// forward-declares Foo and specializes IntrusiveRefCntPtrInfo<Foo>.  Then
+/// Bar.h could use IntrusiveRefCntPtr<Foo>, although it still couldn't call any
+/// functions on Foo itself, because Foo would be an incomplete type.
+template <typename T> struct IntrusiveRefCntPtrInfo {
+  static void retain(T *obj) { obj->Retain(); }
+  static void release(T *obj) { obj->Release(); }
+};
 
-    void resetWithoutRelease() {
-      Obj = nullptr;
-    }
+/// A smart pointer to a reference-counted object that inherits from
+/// RefCountedBase or ThreadSafeRefCountedBase.
+///
+/// This class increments its pointee's reference count when it is created, and
+/// decrements its refcount when it's destroyed (or is changed to point to a
+/// different object).
+template <typename T> class IntrusiveRefCntPtr {
+  T *Obj = nullptr;
 
-  private:
-    void retain() { if (Obj) IntrusiveRefCntPtrInfo<T>::retain(Obj); }
-    void release() { if (Obj) IntrusiveRefCntPtrInfo<T>::release(Obj); }
+public:
+  typedef T element_type;
 
-    template <typename X>
-    friend class IntrusiveRefCntPtr;
-  };
+  explicit IntrusiveRefCntPtr() = default;
+  IntrusiveRefCntPtr(T *obj) : Obj(obj) { retain(); }
+  IntrusiveRefCntPtr(const IntrusiveRefCntPtr &S) : Obj(S.Obj) { retain(); }
+  IntrusiveRefCntPtr(IntrusiveRefCntPtr &&S) : Obj(S.Obj) { S.Obj = nullptr; }
 
-  template<class T, class U>
-  inline bool operator==(const IntrusiveRefCntPtr<T>& A,
-                         const IntrusiveRefCntPtr<U>& B)
-  {
-    return A.get() == B.get();
+  template <class X>
+  IntrusiveRefCntPtr(IntrusiveRefCntPtr<X> &&S) : Obj(S.get()) {
+    S.Obj = nullptr;
   }
 
-  template<class T, class U>
-  inline bool operator!=(const IntrusiveRefCntPtr<T>& A,
-                         const IntrusiveRefCntPtr<U>& B)
-  {
-    return A.get() != B.get();
+  template <class X>
+  IntrusiveRefCntPtr(const IntrusiveRefCntPtr<X> &S) : Obj(S.get()) {
+    retain();
   }
 
-  template<class T, class U>
-  inline bool operator==(const IntrusiveRefCntPtr<T>& A,
-                         U* B)
-  {
-    return A.get() == B;
+  IntrusiveRefCntPtr &operator=(IntrusiveRefCntPtr S) {
+    swap(S);
+    return *this;
   }
 
-  template<class T, class U>
-  inline bool operator!=(const IntrusiveRefCntPtr<T>& A,
-                         U* B)
-  {
-    return A.get() != B;
-  }
+  ~IntrusiveRefCntPtr() { release(); }
 
-  template<class T, class U>
-  inline bool operator==(T* A,
-                         const IntrusiveRefCntPtr<U>& B)
-  {
-    return A == B.get();
-  }
+  T &operator*() const { return *Obj; }
+  T *operator->() const { return Obj; }
+  T *get() const { return Obj; }
+  explicit operator bool() const { return Obj; }
 
-  template<class T, class U>
-  inline bool operator!=(T* A,
-                         const IntrusiveRefCntPtr<U>& B)
-  {
-    return A != B.get();
+  void swap(IntrusiveRefCntPtr &other) {
+    T *tmp = other.Obj;
+    other.Obj = Obj;
+    Obj = tmp;
   }
 
-  template <class T>
-  bool operator==(std::nullptr_t A, const IntrusiveRefCntPtr<T> &B) {
-    return !B;
+  void reset() {
+    release();
+    Obj = nullptr;
   }
 
-  template <class T>
-  bool operator==(const IntrusiveRefCntPtr<T> &A, std::nullptr_t B) {
-    return B == A;
-  }
+  void resetWithoutRelease() { Obj = nullptr; }
 
-  template <class T>
-  bool operator!=(std::nullptr_t A, const IntrusiveRefCntPtr<T> &B) {
-    return !(A == B);
+private:
+  void retain() {
+    if (Obj)
+      IntrusiveRefCntPtrInfo<T>::retain(Obj);
   }
-
-  template <class T>
-  bool operator!=(const IntrusiveRefCntPtr<T> &A, std::nullptr_t B) {
-    return !(A == B);
+  void release() {
+    if (Obj)
+      IntrusiveRefCntPtrInfo<T>::release(Obj);
   }
 
-//===----------------------------------------------------------------------===//
-// LLVM-style downcasting support for IntrusiveRefCntPtr objects
-//===----------------------------------------------------------------------===//
+  template <typename X> friend class IntrusiveRefCntPtr;
+};
+
+template <class T, class U>
+inline bool operator==(const IntrusiveRefCntPtr<T> &A,
+                       const IntrusiveRefCntPtr<U> &B) {
+  return A.get() == B.get();
+}
+
+template <class T, class U>
+inline bool operator!=(const IntrusiveRefCntPtr<T> &A,
+                       const IntrusiveRefCntPtr<U> &B) {
+  return A.get() != B.get();
+}
+
+template <class T, class U>
+inline bool operator==(const IntrusiveRefCntPtr<T> &A, U *B) {
+  return A.get() == B;
+}
+
+template <class T, class U>
+inline bool operator!=(const IntrusiveRefCntPtr<T> &A, U *B) {
+  return A.get() != B;
+}
+
+template <class T, class U>
+inline bool operator==(T *A, const IntrusiveRefCntPtr<U> &B) {
+  return A == B.get();
+}
+
+template <class T, class U>
+inline bool operator!=(T *A, const IntrusiveRefCntPtr<U> &B) {
+  return A != B.get();
+}
+
+template <class T>
+bool operator==(std::nullptr_t A, const IntrusiveRefCntPtr<T> &B) {
+  return !B;
+}
+
+template <class T>
+bool operator==(const IntrusiveRefCntPtr<T> &A, std::nullptr_t B) {
+  return B == A;
+}
+
+template <class T>
+bool operator!=(std::nullptr_t A, const IntrusiveRefCntPtr<T> &B) {
+  return !(A == B);
+}
+
+template <class T>
+bool operator!=(const IntrusiveRefCntPtr<T> &A, std::nullptr_t B) {
+  return !(A == B);
+}
+
+// Make IntrusiveRefCntPtr work with dyn_cast, isa, and the other idioms from
+// Casting.h.
+template <typename From> struct simplify_type;
+
+template <class T> struct simplify_type<IntrusiveRefCntPtr<T>> {
+  typedef T *SimpleType;
+  static SimpleType getSimplifiedValue(IntrusiveRefCntPtr<T> &Val) {
+    return Val.get();
+  }
+};
 
-  template <typename From> struct simplify_type;
-
-  template<class T> struct simplify_type<IntrusiveRefCntPtr<T> > {
-    typedef T* SimpleType;
-    static SimpleType getSimplifiedValue(IntrusiveRefCntPtr<T>& Val) {
-      return Val.get();
-    }
-  };
-
-  template<class T> struct simplify_type<const IntrusiveRefCntPtr<T> > {
-    typedef /*const*/ T* SimpleType;
-    static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T>& Val) {
-      return Val.get();
-    }
-  };
+template <class T> struct simplify_type<const IntrusiveRefCntPtr<T>> {
+  typedef /*const*/ T *SimpleType;
+  static SimpleType getSimplifiedValue(const IntrusiveRefCntPtr<T> &Val) {
+    return Val.get();
+  }
+};
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/ADT/MapVector.h b/contrib/llvm/include/llvm/ADT/MapVector.h
index f19a50b7ba84..ac1885758cb9 100644
--- a/contrib/llvm/include/llvm/ADT/MapVector.h
+++ b/contrib/llvm/include/llvm/ADT/MapVector.h
@@ -30,6 +30,7 @@ template<typename KeyT, typename ValueT,
          typename MapType = llvm::DenseMap<KeyT, unsigned>,
          typename VectorType = std::vector<std::pair<KeyT, ValueT> > >
 class MapVector {
+  typedef typename VectorType::value_type value_type;
   typedef typename VectorType::size_type size_type;
 
   MapType Map;
@@ -41,6 +42,12 @@ public:
   typedef typename VectorType::reverse_iterator reverse_iterator;
   typedef typename VectorType::const_reverse_iterator const_reverse_iterator;
 
+  /// Clear the MapVector and return the underlying vector.
+  VectorType takeVector() {
+    Map.clear();
+    return std::move(Vector);
+  }
+
   size_type size() const { return Vector.size(); }
 
   iterator begin() { return Vector.begin(); }
@@ -83,7 +90,10 @@ public:
     return Vector[I].second;
   }
 
+  // Returns a copy of the value.  Only allowed if ValueT is copyable.
   ValueT lookup(const KeyT &Key) const {
+    static_assert(std::is_copy_constructible<ValueT>::value,
+                  "Cannot call lookup() if ValueT is not copyable.");
     typename MapType::const_iterator Pos = Map.find(Key);
     return Pos == Map.end()? ValueT() : Vector[Pos->second].second;
   }
@@ -100,6 +110,19 @@ public:
     return std::make_pair(begin() + I, false);
   }
 
+  std::pair<iterator, bool> insert(std::pair<KeyT, ValueT> &&KV) {
+    // Copy KV.first into the map, then move it into the vector.
+    std::pair<KeyT, unsigned> Pair = std::make_pair(KV.first, 0);
+    std::pair<typename MapType::iterator, bool> Result = Map.insert(Pair);
+    unsigned &I = Result.first->second;
+    if (Result.second) {
+      Vector.push_back(std::move(KV));
+      I = Vector.size() - 1;
+      return std::make_pair(std::prev(end()), true);
+    }
+    return std::make_pair(begin() + I, false);
+  }
+
   size_type count(const KeyT &Key) const {
     typename MapType::const_iterator Pos = Map.find(Key);
     return Pos == Map.end()? 0 : 1;
diff --git a/contrib/llvm/include/llvm/ADT/Optional.h b/contrib/llvm/include/llvm/ADT/Optional.h
index d9acaf6d23b0..701872c9f63f 100644
--- a/contrib/llvm/include/llvm/ADT/Optional.h
+++ b/contrib/llvm/include/llvm/ADT/Optional.h
@@ -129,7 +129,7 @@ public:
   T& operator*() LLVM_LVALUE_FUNCTION { assert(hasVal); return *getPointer(); }
 
   template <typename U>
-  LLVM_CONSTEXPR T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION {
+  constexpr T getValueOr(U &&value) const LLVM_LVALUE_FUNCTION {
     return hasValue() ? getValue() : std::forward<U>(value);
   }
 
@@ -150,18 +150,43 @@ template <typename T> struct isPodLike<Optional<T> > {
   static const bool value = isPodLike<T>::value;
 };
 
-/// \brief Poison comparison between two \c Optional objects. Clients needs to
-/// explicitly compare the underlying values and account for empty \c Optional
-/// objects.
-///
-/// This routine will never be defined. It returns \c void to help diagnose
-/// errors at compile time.
-template<typename T, typename U>
-void operator==(const Optional<T> &X, const Optional<U> &Y);
+template <typename T, typename U>
+bool operator==(const Optional<T> &X, const Optional<U> &Y) {
+  if (X && Y)
+    return *X == *Y;
+  return X.hasValue() == Y.hasValue();
+}
+
+template <typename T, typename U>
+bool operator!=(const Optional<T> &X, const Optional<U> &Y) {
+  return !(X == Y);
+}
+
+template <typename T, typename U>
+bool operator<(const Optional<T> &X, const Optional<U> &Y) {
+  if (X && Y)
+    return *X < *Y;
+  return X.hasValue() < Y.hasValue();
+}
+
+template <typename T, typename U>
+bool operator<=(const Optional<T> &X, const Optional<U> &Y) {
+  return !(Y < X);
+}
+
+template <typename T, typename U>
+bool operator>(const Optional<T> &X, const Optional<U> &Y) {
+  return Y < X;
+}
+
+template <typename T, typename U>
+bool operator>=(const Optional<T> &X, const Optional<U> &Y) {
+  return !(X < Y);
+}
 
 template<typename T>
 bool operator==(const Optional<T> &X, NoneType) {
-  return !X.hasValue();
+  return !X;
 }
 
 template<typename T>
@@ -178,50 +203,86 @@ template<typename T>
 bool operator!=(NoneType, const Optional<T> &X) {
   return X != None;
 }
-/// \brief Poison comparison between two \c Optional objects. Clients needs to
-/// explicitly compare the underlying values and account for empty \c Optional
-/// objects.
-///
-/// This routine will never be defined. It returns \c void to help diagnose
-/// errors at compile time.
-template<typename T, typename U>
-void operator!=(const Optional<T> &X, const Optional<U> &Y);
-
-/// \brief Poison comparison between two \c Optional objects. Clients needs to
-/// explicitly compare the underlying values and account for empty \c Optional
-/// objects.
-///
-/// This routine will never be defined. It returns \c void to help diagnose
-/// errors at compile time.
-template<typename T, typename U>
-void operator<(const Optional<T> &X, const Optional<U> &Y);
-
-/// \brief Poison comparison between two \c Optional objects. Clients needs to
-/// explicitly compare the underlying values and account for empty \c Optional
-/// objects.
-///
-/// This routine will never be defined. It returns \c void to help diagnose
-/// errors at compile time.
-template<typename T, typename U>
-void operator<=(const Optional<T> &X, const Optional<U> &Y);
-
-/// \brief Poison comparison between two \c Optional objects. Clients needs to
-/// explicitly compare the underlying values and account for empty \c Optional
-/// objects.
-///
-/// This routine will never be defined. It returns \c void to help diagnose
-/// errors at compile time.
-template<typename T, typename U>
-void operator>=(const Optional<T> &X, const Optional<U> &Y);
-
-/// \brief Poison comparison between two \c Optional objects. Clients needs to
-/// explicitly compare the underlying values and account for empty \c Optional
-/// objects.
-///
-/// This routine will never be defined. It returns \c void to help diagnose
-/// errors at compile time.
-template<typename T, typename U>
-void operator>(const Optional<T> &X, const Optional<U> &Y);
+
+template <typename T> bool operator<(const Optional<T> &X, NoneType) {
+  return false;
+}
+
+template <typename T> bool operator<(NoneType, const Optional<T> &X) {
+  return X.hasValue();
+}
+
+template <typename T> bool operator<=(const Optional<T> &X, NoneType) {
+  return !(None < X);
+}
+
+template <typename T> bool operator<=(NoneType, const Optional<T> &X) {
+  return !(X < None);
+}
+
+template <typename T> bool operator>(const Optional<T> &X, NoneType) {
+  return None < X;
+}
+
+template <typename T> bool operator>(NoneType, const Optional<T> &X) {
+  return X < None;
+}
+
+template <typename T> bool operator>=(const Optional<T> &X, NoneType) {
+  return None <= X;
+}
+
+template <typename T> bool operator>=(NoneType, const Optional<T> &X) {
+  return X <= None;
+}
+
+template <typename T> bool operator==(const Optional<T> &X, const T &Y) {
+  return X && *X == Y;
+}
+
+template <typename T> bool operator==(const T &X, const Optional<T> &Y) {
+  return Y && X == *Y;
+}
+
+template <typename T> bool operator!=(const Optional<T> &X, const T &Y) {
+  return !(X == Y);
+}
+
+template <typename T> bool operator!=(const T &X, const Optional<T> &Y) {
+  return !(X == Y);
+}
+
+template <typename T> bool operator<(const Optional<T> &X, const T &Y) {
+  return !X || *X < Y;
+}
+
+template <typename T> bool operator<(const T &X, const Optional<T> &Y) {
+  return Y && X < *Y;
+}
+
+template <typename T> bool operator<=(const Optional<T> &X, const T &Y) {
+  return !(Y < X);
+}
+
+template <typename T> bool operator<=(const T &X, const Optional<T> &Y) {
+  return !(Y < X);
+}
+
+template <typename T> bool operator>(const Optional<T> &X, const T &Y) {
+  return Y < X;
+}
+
+template <typename T> bool operator>(const T &X, const Optional<T> &Y) {
+  return Y < X;
+}
+
+template <typename T> bool operator>=(const Optional<T> &X, const T &Y) {
+  return !(X < Y);
+}
+
+template <typename T> bool operator>=(const T &X, const Optional<T> &Y) {
+  return !(X < Y);
+}
 
 } // end llvm namespace
 
diff --git a/contrib/llvm/include/llvm/ADT/PackedVector.h b/contrib/llvm/include/llvm/ADT/PackedVector.h
index 09267173fd77..8f925f1ff5cb 100644
--- a/contrib/llvm/include/llvm/ADT/PackedVector.h
+++ b/contrib/llvm/include/llvm/ADT/PackedVector.h
@@ -15,6 +15,7 @@
 #define LLVM_ADT_PACKEDVECTOR_H
 
 #include "llvm/ADT/BitVector.h"
+#include <cassert>
 #include <limits>
 
 namespace llvm {
@@ -83,14 +84,15 @@ public:
     PackedVector &Vec;
     const unsigned Idx;
 
-    reference(); // Undefined
   public:
+    reference() = delete;
     reference(PackedVector &vec, unsigned idx) : Vec(vec), Idx(idx) {}
 
     reference &operator=(T val) {
       Vec.setValue(Vec.Bits, Idx, val);
       return *this;
     }
+
     operator T() const {
       return Vec.getValue(Vec.Bits, Idx);
     }
@@ -144,6 +146,6 @@ public:
 // Leave BitNum=0 undefined.
 template <typename T> class PackedVector<T, 0>;
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_PACKEDVECTOR_H
diff --git a/contrib/llvm/include/llvm/ADT/PointerSumType.h b/contrib/llvm/include/llvm/ADT/PointerSumType.h
index 6b8618fc5a17..005b1c645c93 100644
--- a/contrib/llvm/include/llvm/ADT/PointerSumType.h
+++ b/contrib/llvm/include/llvm/ADT/PointerSumType.h
@@ -54,7 +54,7 @@ struct PointerSumTypeHelper;
 ///
 /// It also default constructs to a zero tag with a null pointer, whatever that
 /// would be. This means that the zero value for the tag type is significant
-/// and may be desireable to set to a state that is particularly desirable to
+/// and may be desirable to set to a state that is particularly desirable to
 /// default construct.
 ///
 /// There is no support for constructing or accessing with a dynamic tag as
diff --git a/contrib/llvm/include/llvm/ADT/PointerUnion.h b/contrib/llvm/include/llvm/ADT/PointerUnion.h
index 6b3fe5749ad5..a8ac18645f3a 100644
--- a/contrib/llvm/include/llvm/ADT/PointerUnion.h
+++ b/contrib/llvm/include/llvm/ADT/PointerUnion.h
@@ -17,7 +17,10 @@
 
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <cassert>
+#include <cstdint>
+#include <cstddef>
 
 namespace llvm {
 
@@ -57,6 +60,7 @@ template <typename PT1, typename PT2> class PointerUnionUIntTraits {
 public:
   static inline void *getAsVoidPointer(void *P) { return P; }
   static inline void *getFromVoidPointer(void *P) { return P; }
+
   enum {
     PT1BitsAv = (int)(PointerLikeTypeTraits<PT1>::NumLowBitsAvailable),
     PT2BitsAv = (int)(PointerLikeTypeTraits<PT2>::NumLowBitsAvailable),
@@ -97,7 +101,7 @@ private:
   template <typename T> struct UNION_DOESNT_CONTAIN_TYPE {};
 
 public:
-  PointerUnion() {}
+  PointerUnion() = default;
 
   PointerUnion(PT1 V)
       : Val(const_cast<void *>(
@@ -208,6 +212,7 @@ public:
   static inline void *getAsVoidPointer(const PointerUnion<PT1, PT2> &P) {
     return P.getOpaqueValue();
   }
+
   static inline PointerUnion<PT1, PT2> getFromVoidPointer(void *P) {
     return PointerUnion<PT1, PT2>::getFromOpaqueValue(P);
   }
@@ -249,7 +254,7 @@ private:
   };
 
 public:
-  PointerUnion3() {}
+  PointerUnion3() = default;
 
   PointerUnion3(PT1 V) { Val = InnerUnion(V); }
   PointerUnion3(PT2 V) { Val = InnerUnion(V); }
@@ -328,6 +333,7 @@ public:
   static inline void *getAsVoidPointer(const PointerUnion3<PT1, PT2, PT3> &P) {
     return P.getOpaqueValue();
   }
+
   static inline PointerUnion3<PT1, PT2, PT3> getFromVoidPointer(void *P) {
     return PointerUnion3<PT1, PT2, PT3>::getFromOpaqueValue(P);
   }
@@ -352,7 +358,7 @@ private:
   ValTy Val;
 
 public:
-  PointerUnion4() {}
+  PointerUnion4() = default;
 
   PointerUnion4(PT1 V) { Val = InnerUnion1(V); }
   PointerUnion4(PT2 V) { Val = InnerUnion1(V); }
@@ -435,6 +441,7 @@ public:
   getAsVoidPointer(const PointerUnion4<PT1, PT2, PT3, PT4> &P) {
     return P.getOpaqueValue();
   }
+
   static inline PointerUnion4<PT1, PT2, PT3, PT4> getFromVoidPointer(void *P) {
     return PointerUnion4<PT1, PT2, PT3, PT4>::getFromOpaqueValue(P);
   }
@@ -469,6 +476,6 @@ template <typename T, typename U> struct DenseMapInfo<PointerUnion<T, U>> {
   }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_POINTERUNION_H
diff --git a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
index 0cc504b5c39e..e519b5c07964 100644
--- a/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
+++ b/contrib/llvm/include/llvm/ADT/PostOrderIterator.h
@@ -17,9 +17,12 @@
 #define LLVM_ADT_POSTORDERITERATOR_H
 
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include <iterator>
 #include <set>
+#include <utility>
 #include <vector>
 
 namespace llvm {
@@ -54,22 +57,23 @@ namespace llvm {
 template<class SetType, bool External>
 class po_iterator_storage {
   SetType Visited;
+
 public:
   // Return true if edge destination should be visited.
-  template<typename NodeType>
-  bool insertEdge(NodeType *From, NodeType *To) {
+  template <typename NodeRef>
+  bool insertEdge(Optional<NodeRef> From, NodeRef To) {
     return Visited.insert(To).second;
   }
 
   // Called after all children of BB have been visited.
-  template<typename NodeType>
-  void finishPostorder(NodeType *BB) {}
+  template <typename NodeRef> void finishPostorder(NodeRef BB) {}
 };
 
 /// Specialization of po_iterator_storage that references an external set.
 template<class SetType>
 class po_iterator_storage<SetType, true> {
   SetType &Visited;
+
 public:
   po_iterator_storage(SetType &VSet) : Visited(VSet) {}
   po_iterator_storage(const po_iterator_storage &S) : Visited(S.Visited) {}
@@ -77,51 +81,50 @@ public:
   // Return true if edge destination should be visited, called with From = 0 for
   // the root node.
   // Graph edges can be pruned by specializing this function.
-  template <class NodeType> bool insertEdge(NodeType *From, NodeType *To) {
+  template <class NodeRef> bool insertEdge(Optional<NodeRef> From, NodeRef To) {
     return Visited.insert(To).second;
   }
 
   // Called after all children of BB have been visited.
-  template<class NodeType>
-  void finishPostorder(NodeType *BB) {}
+  template <class NodeRef> void finishPostorder(NodeRef BB) {}
 };
 
-template<class GraphT,
-  class SetType = llvm::SmallPtrSet<typename GraphTraits<GraphT>::NodeType*, 8>,
-  bool ExtStorage = false,
-  class GT = GraphTraits<GraphT> >
-class po_iterator : public std::iterator<std::forward_iterator_tag,
-                                         typename GT::NodeType, ptrdiff_t>,
-                    public po_iterator_storage<SetType, ExtStorage> {
-  typedef std::iterator<std::forward_iterator_tag,
-                        typename GT::NodeType, ptrdiff_t> super;
-  typedef typename GT::NodeType          NodeType;
+template <class GraphT,
+          class SetType =
+              SmallPtrSet<typename GraphTraits<GraphT>::NodeRef, 8>,
+          bool ExtStorage = false, class GT = GraphTraits<GraphT>>
+class po_iterator
+    : public std::iterator<std::forward_iterator_tag, typename GT::NodeRef>,
+      public po_iterator_storage<SetType, ExtStorage> {
+  typedef std::iterator<std::forward_iterator_tag, typename GT::NodeRef> super;
+  typedef typename GT::NodeRef NodeRef;
   typedef typename GT::ChildIteratorType ChildItTy;
 
   // VisitStack - Used to maintain the ordering.  Top = current block
   // First element is basic block pointer, second is the 'next child' to visit
-  std::vector<std::pair<NodeType *, ChildItTy> > VisitStack;
+  std::vector<std::pair<NodeRef, ChildItTy>> VisitStack;
 
   void traverseChild() {
     while (VisitStack.back().second != GT::child_end(VisitStack.back().first)) {
-      NodeType *BB = *VisitStack.back().second++;
-      if (this->insertEdge(VisitStack.back().first, BB)) {
+      NodeRef BB = *VisitStack.back().second++;
+      if (this->insertEdge(Optional<NodeRef>(VisitStack.back().first), BB)) {
         // If the block is not visited...
         VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
       }
     }
   }
 
-  po_iterator(NodeType *BB) {
-    this->insertEdge((NodeType*)nullptr, BB);
+  po_iterator(NodeRef BB) {
+    this->insertEdge(Optional<NodeRef>(), BB);
     VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
     traverseChild();
   }
-  po_iterator() {} // End is when stack is empty.
 
-  po_iterator(NodeType *BB, SetType &S)
+  po_iterator() = default; // End is when stack is empty.
+
+  po_iterator(NodeRef BB, SetType &S)
       : po_iterator_storage<SetType, ExtStorage>(S) {
-    if (this->insertEdge((NodeType*)nullptr, BB)) {
+    if (this->insertEdge(Optional<NodeRef>(), BB)) {
       VisitStack.push_back(std::make_pair(BB, GT::child_begin(BB)));
       traverseChild();
     }
@@ -130,6 +133,7 @@ class po_iterator : public std::iterator<std::forward_iterator_tag,
   po_iterator(SetType &S)
       : po_iterator_storage<SetType, ExtStorage>(S) {
   } // End is when stack is empty.
+
 public:
   typedef typename super::pointer pointer;
 
@@ -149,13 +153,13 @@ public:
   }
   bool operator!=(const po_iterator &x) const { return !(*this == x); }
 
-  pointer operator*() const { return VisitStack.back().first; }
+  const NodeRef &operator*() const { return VisitStack.back().first; }
 
   // This is a nonstandard operator-> that dereferences the pointer an extra
   // time... so that you can actually call methods ON the BasicBlock, because
   // the contained type is a pointer.  This allows BBIt->getTerminator() f.e.
   //
-  NodeType *operator->() const { return **this; }
+  NodeRef operator->() const { return **this; }
 
   po_iterator &operator++() { // Preincrement
     this->finishPostorder(VisitStack.back().first);
@@ -184,7 +188,7 @@ template <class T> iterator_range<po_iterator<T>> post_order(const T &G) {
 }
 
 // Provide global definitions of external postorder iterators...
-template<class T, class SetType=std::set<typename GraphTraits<T>::NodeType*> >
+template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>>
 struct po_ext_iterator : public po_iterator<T, SetType, true> {
   po_ext_iterator(const po_iterator<T, SetType, true> &V) :
   po_iterator<T, SetType, true>(V) {}
@@ -206,10 +210,9 @@ iterator_range<po_ext_iterator<T, SetType>> post_order_ext(const T &G, SetType &
 }
 
 // Provide global definitions of inverse post order iterators...
-template <class T,
-          class SetType = std::set<typename GraphTraits<T>::NodeType*>,
+template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>,
           bool External = false>
-struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External > {
+struct ipo_iterator : public po_iterator<Inverse<T>, SetType, External> {
   ipo_iterator(const po_iterator<Inverse<T>, SetType, External> &V) :
      po_iterator<Inverse<T>, SetType, External> (V) {}
 };
@@ -230,8 +233,7 @@ iterator_range<ipo_iterator<T>> inverse_post_order(const T &G) {
 }
 
 // Provide global definitions of external inverse postorder iterators...
-template <class T,
-          class SetType = std::set<typename GraphTraits<T>::NodeType*> >
+template <class T, class SetType = std::set<typename GraphTraits<T>::NodeRef>>
 struct ipo_ext_iterator : public ipo_iterator<T, SetType, true> {
   ipo_ext_iterator(const ipo_iterator<T, SetType, true> &V) :
     ipo_iterator<T, SetType, true>(V) {}
@@ -278,15 +280,17 @@ inverse_post_order_ext(const T &G, SetType &S) {
 // }
 //
 
-template<class GraphT, class GT = GraphTraits<GraphT> >
+template<class GraphT, class GT = GraphTraits<GraphT>>
 class ReversePostOrderTraversal {
-  typedef typename GT::NodeType NodeType;
-  std::vector<NodeType*> Blocks;       // Block list in normal PO order
-  void Initialize(NodeType *BB) {
+  typedef typename GT::NodeRef NodeRef;
+  std::vector<NodeRef> Blocks; // Block list in normal PO order
+
+  void Initialize(NodeRef BB) {
     std::copy(po_begin(BB), po_end(BB), std::back_inserter(Blocks));
   }
+
 public:
-  typedef typename std::vector<NodeType*>::reverse_iterator rpo_iterator;
+  typedef typename std::vector<NodeRef>::reverse_iterator rpo_iterator;
 
   ReversePostOrderTraversal(GraphT G) { Initialize(GT::getEntryNode(G)); }
 
@@ -295,6 +299,6 @@ public:
   rpo_iterator end() { return Blocks.rend(); }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_POSTORDERITERATOR_H
diff --git a/contrib/llvm/include/llvm/ADT/PriorityQueue.h b/contrib/llvm/include/llvm/ADT/PriorityQueue.h
index 827d0b346e59..8ba871e25304 100644
--- a/contrib/llvm/include/llvm/ADT/PriorityQueue.h
+++ b/contrib/llvm/include/llvm/ADT/PriorityQueue.h
@@ -46,8 +46,7 @@ public:
   ///
   void erase_one(const T &t) {
     // Linear-search to find the element.
-    typename Sequence::size_type i =
-      std::find(this->c.begin(), this->c.end(), t) - this->c.begin();
+    typename Sequence::size_type i = find(this->c, t) - this->c.begin();
 
     // Logarithmic-time heap bubble-up.
     while (i != 0) {
diff --git a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
index 00549b88fd02..c0b4709e98f8 100644
--- a/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
+++ b/contrib/llvm/include/llvm/ADT/PriorityWorklist.h
@@ -17,10 +17,12 @@
 #define LLVM_ADT_PRIORITYWORKLIST_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
 #include <algorithm>
 #include <cassert>
-#include <utility>
+#include <cstddef>
 #include <vector>
 
 namespace llvm {
@@ -59,7 +61,7 @@ public:
   typedef typename MapT::size_type size_type;
 
   /// Construct an empty PriorityWorklist
-  PriorityWorklist() {}
+  PriorityWorklist() = default;
 
   /// Determine if the PriorityWorklist is empty or not.
   bool empty() const {
@@ -115,7 +117,7 @@ public:
     } while (!V.empty() && V.back() == T());
   }
 
-  T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val() {
+  LLVM_NODISCARD T pop_back_val() {
     T Ret = back();
     pop_back();
     return Ret;
@@ -147,7 +149,7 @@ public:
   /// write it:
   ///
   /// \code
-  ///   V.erase(std::remove_if(V.begin(), V.end(), P), V.end());
+  ///   V.erase(remove_if(V, P), V.end());
   /// \endcode
   ///
   /// However, PriorityWorklist doesn't expose non-const iterators, making any
@@ -156,8 +158,8 @@ public:
   /// \returns true if any element is removed.
   template <typename UnaryPredicate>
   bool erase_if(UnaryPredicate P) {
-    typename VectorT::iterator E = std::remove_if(
-        V.begin(), V.end(), TestAndEraseFromMap<UnaryPredicate>(P, M));
+    typename VectorT::iterator E =
+        remove_if(V, TestAndEraseFromMap<UnaryPredicate>(P, M));
     if (E == V.end())
       return false;
     for (auto I = V.begin(); I != E; ++I)
@@ -216,9 +218,9 @@ class SmallPriorityWorklist
     : public PriorityWorklist<T, SmallVector<T, N>,
                               SmallDenseMap<T, ptrdiff_t>> {
 public:
-  SmallPriorityWorklist() {}
+  SmallPriorityWorklist() = default;
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_PRIORITYWORKLIST_H
diff --git a/contrib/llvm/include/llvm/ADT/SCCIterator.h b/contrib/llvm/include/llvm/ADT/SCCIterator.h
index e89345c0f348..9a8a7b168fce 100644
--- a/contrib/llvm/include/llvm/ADT/SCCIterator.h
+++ b/contrib/llvm/include/llvm/ADT/SCCIterator.h
@@ -26,6 +26,9 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/iterator.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
 #include <vector>
 
 namespace llvm {
@@ -93,7 +96,7 @@ class scc_iterator : public iterator_facade_base<
   }
 
   /// End is when the DFS stack is empty.
-  scc_iterator() {}
+  scc_iterator() = default;
 
 public:
   static scc_iterator begin(const GraphT &G) {
@@ -230,15 +233,15 @@ template <class T> scc_iterator<T> scc_end(const T &G) {
 }
 
 /// \brief Construct the begin iterator for a deduced graph type T's Inverse<T>.
-template <class T> scc_iterator<Inverse<T> > scc_begin(const Inverse<T> &G) {
-  return scc_iterator<Inverse<T> >::begin(G);
+template <class T> scc_iterator<Inverse<T>> scc_begin(const Inverse<T> &G) {
+  return scc_iterator<Inverse<T>>::begin(G);
 }
 
 /// \brief Construct the end iterator for a deduced graph type T's Inverse<T>.
-template <class T> scc_iterator<Inverse<T> > scc_end(const Inverse<T> &G) {
-  return scc_iterator<Inverse<T> >::end(G);
+template <class T> scc_iterator<Inverse<T>> scc_end(const Inverse<T> &G) {
+  return scc_iterator<Inverse<T>>::end(G);
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SCCITERATOR_H
diff --git a/contrib/llvm/include/llvm/ADT/STLExtras.h b/contrib/llvm/include/llvm/ADT/STLExtras.h
index 00b796f63818..ec121e0d55cd 100644
--- a/contrib/llvm/include/llvm/ADT/STLExtras.h
+++ b/contrib/llvm/include/llvm/ADT/STLExtras.h
@@ -24,18 +24,25 @@
 #include <functional>
 #include <iterator>
 #include <memory>
+#include <tuple>
 #include <utility> // for std::pair
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
+
+// Only used by compiler if both template types are the same.  Useful when
+// using SFINAE to test for the existence of member functions.
+template <typename T, T> struct SameType;
+
 namespace detail {
 
 template <typename RangeT>
-using IterOfRange = decltype(std::begin(std::declval<RangeT>()));
+using IterOfRange = decltype(std::begin(std::declval<RangeT &>()));
 
 } // End detail namespace
 
@@ -208,11 +215,24 @@ inline mapped_iterator<ItTy, FuncTy> map_iterator(const ItTy &I, FuncTy F) {
   return mapped_iterator<ItTy, FuncTy>(I, F);
 }
 
-/// \brief Metafunction to determine if type T has a member called rbegin().
-template <typename T> struct has_rbegin {
-  template <typename U> static char(&f(const U &, decltype(&U::rbegin)))[1];
-  static char(&f(...))[2];
-  const static bool value = sizeof(f(std::declval<T>(), nullptr)) == 1;
+/// Helper to determine if type T has a member called rbegin().
+template <typename Ty> class has_rbegin_impl {
+  typedef char yes[1];
+  typedef char no[2];
+
+  template <typename Inner>
+  static yes& test(Inner *I, decltype(I->rbegin()) * = nullptr);
+
+  template <typename>
+  static no& test(...);
+
+public:
+  static const bool value = sizeof(test<Ty>(nullptr)) == sizeof(yes);
+};
+
+/// Metafunction to determine if T& or T has a member called rbegin().
+template <typename Ty>
+struct has_rbegin : has_rbegin_impl<typename std::remove_reference<Ty>::type> {
 };
 
 // Returns an iterator_range over the given container which iterates in reverse.
@@ -327,6 +347,240 @@ make_filter_range(RangeT &&Range, PredicateT Pred) {
                     FilterIteratorT(std::end(std::forward<RangeT>(Range))));
 }
 
+// forward declarations required by zip_shortest/zip_first
+template <typename R, typename UnaryPredicate>
+bool all_of(R &&range, UnaryPredicate P);
+
+template <size_t... I> struct index_sequence;
+
+template <class... Ts> struct index_sequence_for;
+
+namespace detail {
+template <typename... Iters> class zip_first {
+public:
+  typedef std::input_iterator_tag iterator_category;
+  typedef std::tuple<decltype(*std::declval<Iters>())...> value_type;
+  std::tuple<Iters...> iterators;
+
+private:
+  template <size_t... Ns> value_type deres(index_sequence<Ns...>) {
+    return value_type(*std::get<Ns>(iterators)...);
+  }
+
+  template <size_t... Ns> decltype(iterators) tup_inc(index_sequence<Ns...>) {
+    return std::tuple<Iters...>(std::next(std::get<Ns>(iterators))...);
+  }
+
+public:
+  value_type operator*() { return deres(index_sequence_for<Iters...>{}); }
+
+  void operator++() { iterators = tup_inc(index_sequence_for<Iters...>{}); }
+
+  bool operator!=(const zip_first<Iters...> &other) const {
+    return std::get<0>(iterators) != std::get<0>(other.iterators);
+  }
+  zip_first(Iters &&... ts) : iterators(std::forward<Iters>(ts)...) {}
+};
+
+template <typename... Iters> class zip_shortest : public zip_first<Iters...> {
+  template <size_t... Ns>
+  bool test(const zip_first<Iters...> &other, index_sequence<Ns...>) const {
+    return all_of(std::initializer_list<bool>{std::get<Ns>(this->iterators) !=
+                                              std::get<Ns>(other.iterators)...},
+                  identity<bool>{});
+  }
+
+public:
+  bool operator!=(const zip_first<Iters...> &other) const {
+    return test(other, index_sequence_for<Iters...>{});
+  }
+  zip_shortest(Iters &&... ts)
+      : zip_first<Iters...>(std::forward<Iters>(ts)...) {}
+};
+
+template <template <typename...> class ItType, typename... Args> class zippy {
+public:
+  typedef ItType<decltype(std::begin(std::declval<Args>()))...> iterator;
+
+private:
+  std::tuple<Args...> ts;
+
+  template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) {
+    return iterator(std::begin(std::get<Ns>(ts))...);
+  }
+  template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) {
+    return iterator(std::end(std::get<Ns>(ts))...);
+  }
+
+public:
+  iterator begin() { return begin_impl(index_sequence_for<Args...>{}); }
+  iterator end() { return end_impl(index_sequence_for<Args...>{}); }
+  zippy(Args &&... ts_) : ts(std::forward<Args>(ts_)...) {}
+};
+} // End detail namespace
+
+/// zip iterator for two or more iteratable types.
+template <typename T, typename U, typename... Args>
+detail::zippy<detail::zip_shortest, T, U, Args...> zip(T &&t, U &&u,
+                                                       Args &&... args) {
+  return detail::zippy<detail::zip_shortest, T, U, Args...>(
+      std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
+}
+
+/// zip iterator that, for the sake of efficiency, assumes the first iteratee to
+/// be the shortest.
+template <typename T, typename U, typename... Args>
+detail::zippy<detail::zip_first, T, U, Args...> zip_first(T &&t, U &&u,
+                                                          Args &&... args) {
+  return detail::zippy<detail::zip_first, T, U, Args...>(
+      std::forward<T>(t), std::forward<U>(u), std::forward<Args>(args)...);
+}
+
+/// Iterator wrapper that concatenates sequences together.
+///
+/// This can concatenate different iterators, even with different types, into
+/// a single iterator provided the value types of all the concatenated
+/// iterators expose `reference` and `pointer` types that can be converted to
+/// `ValueT &` and `ValueT *` respectively. It doesn't support more
+/// interesting/customized pointer or reference types.
+///
+/// Currently this only supports forward or higher iterator categories as
+/// inputs and always exposes a forward iterator interface.
+template <typename ValueT, typename... IterTs>
+class concat_iterator
+    : public iterator_facade_base<concat_iterator<ValueT, IterTs...>,
+                                  std::forward_iterator_tag, ValueT> {
+  typedef typename concat_iterator::iterator_facade_base BaseT;
+
+  /// We store both the current and end iterators for each concatenated
+  /// sequence in a tuple of pairs.
+  ///
+  /// Note that something like iterator_range seems nice at first here, but the
+  /// range properties are of little benefit and end up getting in the way
+  /// because we need to do mutation on the current iterators.
+  std::tuple<std::pair<IterTs, IterTs>...> IterPairs;
+
+  /// Attempts to increment a specific iterator.
+  ///
+  /// Returns true if it was able to increment the iterator. Returns false if
+  /// the iterator is already at the end iterator.
+  template <size_t Index> bool incrementHelper() {
+    auto &IterPair = std::get<Index>(IterPairs);
+    if (IterPair.first == IterPair.second)
+      return false;
+
+    ++IterPair.first;
+    return true;
+  }
+
+  /// Increments the first non-end iterator.
+  ///
+  /// It is an error to call this with all iterators at the end.
+  template <size_t... Ns> void increment(index_sequence<Ns...>) {
+    // Build a sequence of functions to increment each iterator if possible.
+    bool (concat_iterator::*IncrementHelperFns[])() = {
+        &concat_iterator::incrementHelper<Ns>...};
+
+    // Loop over them, and stop as soon as we succeed at incrementing one.
+    for (auto &IncrementHelperFn : IncrementHelperFns)
+      if ((this->*IncrementHelperFn)())
+        return;
+
+    llvm_unreachable("Attempted to increment an end concat iterator!");
+  }
+
+  /// Returns null if the specified iterator is at the end. Otherwise,
+  /// dereferences the iterator and returns the address of the resulting
+  /// reference.
+  template <size_t Index> ValueT *getHelper() const {
+    auto &IterPair = std::get<Index>(IterPairs);
+    if (IterPair.first == IterPair.second)
+      return nullptr;
+
+    return &*IterPair.first;
+  }
+
+  /// Finds the first non-end iterator, dereferences, and returns the resulting
+  /// reference.
+  ///
+  /// It is an error to call this with all iterators at the end.
+  template <size_t... Ns> ValueT &get(index_sequence<Ns...>) const {
+    // Build a sequence of functions to get from iterator if possible.
+    ValueT *(concat_iterator::*GetHelperFns[])() const = {
+        &concat_iterator::getHelper<Ns>...};
+
+    // Loop over them, and return the first result we find.
+    for (auto &GetHelperFn : GetHelperFns)
+      if (ValueT *P = (this->*GetHelperFn)())
+        return *P;
+
+    llvm_unreachable("Attempted to get a pointer from an end concat iterator!");
+  }
+
+public:
+  /// Constructs an iterator from a squence of ranges.
+  ///
+  /// We need the full range to know how to switch between each of the
+  /// iterators.
+  template <typename... RangeTs>
+  explicit concat_iterator(RangeTs &&... Ranges)
+      : IterPairs({std::begin(Ranges), std::end(Ranges)}...) {}
+
+  using BaseT::operator++;
+  concat_iterator &operator++() {
+    increment(index_sequence_for<IterTs...>());
+    return *this;
+  }
+
+  ValueT &operator*() const { return get(index_sequence_for<IterTs...>()); }
+
+  bool operator==(const concat_iterator &RHS) const {
+    return IterPairs == RHS.IterPairs;
+  }
+};
+
+namespace detail {
+/// Helper to store a sequence of ranges being concatenated and access them.
+///
+/// This is designed to facilitate providing actual storage when temporaries
+/// are passed into the constructor such that we can use it as part of range
+/// based for loops.
+template <typename ValueT, typename... RangeTs> class concat_range {
+public:
+  typedef concat_iterator<ValueT,
+                          decltype(std::begin(std::declval<RangeTs &>()))...>
+      iterator;
+
+private:
+  std::tuple<RangeTs...> Ranges;
+
+  template <size_t... Ns> iterator begin_impl(index_sequence<Ns...>) {
+    return iterator(std::get<Ns>(Ranges)...);
+  }
+  template <size_t... Ns> iterator end_impl(index_sequence<Ns...>) {
+    return iterator(make_range(std::end(std::get<Ns>(Ranges)),
+                               std::end(std::get<Ns>(Ranges)))...);
+  }
+
+public:
+  iterator begin() { return begin_impl(index_sequence_for<RangeTs...>{}); }
+  iterator end() { return end_impl(index_sequence_for<RangeTs...>{}); }
+  concat_range(RangeTs &&... Ranges)
+      : Ranges(std::forward<RangeTs>(Ranges)...) {}
+};
+}
+
+/// Concatenated range across two or more ranges.
+///
+/// The desired value type must be explicitly specified.
+template <typename ValueT, typename... RangeTs>
+detail::concat_range<ValueT, RangeTs...> concat(RangeTs &&... Ranges) {
+  static_assert(sizeof...(RangeTs) > 1,
+                "Need more than one range to concatenate!");
+  return detail::concat_range<ValueT, RangeTs...>(
+      std::forward<RangeTs>(Ranges)...);
+}
+
 //===----------------------------------------------------------------------===//
 //     Extra additions to <utility>
 //===----------------------------------------------------------------------===//
@@ -353,7 +607,7 @@ struct less_second {
 template <class T, T... I> struct integer_sequence {
   typedef T value_type;
 
-  static LLVM_CONSTEXPR size_t size() { return sizeof...(I); }
+  static constexpr size_t size() { return sizeof...(I); }
 };
 
 /// \brief Alias for the common case of a sequence of size_ts.
@@ -369,13 +623,30 @@ struct build_index_impl<0, I...> : index_sequence<I...> {};
 template <class... Ts>
 struct index_sequence_for : build_index_impl<sizeof...(Ts)> {};
 
+/// Utility type to build an inheritance chain that makes it easy to rank
+/// overload candidates.
+template <int N> struct rank : rank<N - 1> {};
+template <> struct rank<0> {};
+
+/// \brief traits class for checking whether type T is one of any of the given
+/// types in the variadic list.
+template <typename T, typename... Ts> struct is_one_of {
+  static const bool value = false;
+};
+
+template <typename T, typename U, typename... Ts>
+struct is_one_of<T, U, Ts...> {
+  static const bool value =
+      std::is_same<T, U>::value || is_one_of<T, Ts...>::value;
+};
+
 //===----------------------------------------------------------------------===//
 //     Extra additions for arrays
 //===----------------------------------------------------------------------===//
 
 /// Find the length of an array.
 template <class T, std::size_t N>
-LLVM_CONSTEXPR inline size_t array_lengthof(T (&)[N]) {
+constexpr inline size_t array_lengthof(T (&)[N]) {
   return N;
 }
 
@@ -445,8 +716,8 @@ inline void array_pod_sort(
 /// container.
 template<typename Container>
 void DeleteContainerPointers(Container &C) {
-  for (typename Container::iterator I = C.begin(), E = C.end(); I != E; ++I)
-    delete *I;
+  for (auto V : C)
+    delete V;
   C.clear();
 }
 
@@ -454,77 +725,106 @@ void DeleteContainerPointers(Container &C) {
 /// deletes the second elements and then clears the container.
 template<typename Container>
 void DeleteContainerSeconds(Container &C) {
-  for (typename Container::iterator I = C.begin(), E = C.end(); I != E; ++I)
-    delete I->second;
+  for (auto &V : C)
+    delete V.second;
   C.clear();
 }
 
 /// Provide wrappers to std::all_of which take ranges instead of having to pass
 /// begin/end explicitly.
-template<typename R, class UnaryPredicate>
-bool all_of(R &&Range, UnaryPredicate &&P) {
-  return std::all_of(Range.begin(), Range.end(),
-                     std::forward<UnaryPredicate>(P));
+template <typename R, typename UnaryPredicate>
+bool all_of(R &&Range, UnaryPredicate P) {
+  return std::all_of(std::begin(Range), std::end(Range), P);
 }
 
 /// Provide wrappers to std::any_of which take ranges instead of having to pass
 /// begin/end explicitly.
-template <typename R, class UnaryPredicate>
-bool any_of(R &&Range, UnaryPredicate &&P) {
-  return std::any_of(Range.begin(), Range.end(),
-                     std::forward<UnaryPredicate>(P));
+template <typename R, typename UnaryPredicate>
+bool any_of(R &&Range, UnaryPredicate P) {
+  return std::any_of(std::begin(Range), std::end(Range), P);
 }
 
 /// Provide wrappers to std::none_of which take ranges instead of having to pass
 /// begin/end explicitly.
-template <typename R, class UnaryPredicate>
-bool none_of(R &&Range, UnaryPredicate &&P) {
-  return std::none_of(Range.begin(), Range.end(),
-                      std::forward<UnaryPredicate>(P));
+template <typename R, typename UnaryPredicate>
+bool none_of(R &&Range, UnaryPredicate P) {
+  return std::none_of(std::begin(Range), std::end(Range), P);
 }
 
 /// Provide wrappers to std::find which take ranges instead of having to pass
 /// begin/end explicitly.
-template<typename R, class T>
-auto find(R &&Range, const T &val) -> decltype(Range.begin()) {
-  return std::find(Range.begin(), Range.end(), val);
+template <typename R, typename T>
+auto find(R &&Range, const T &Val) -> decltype(std::begin(Range)) {
+  return std::find(std::begin(Range), std::end(Range), Val);
 }
 
 /// Provide wrappers to std::find_if which take ranges instead of having to pass
 /// begin/end explicitly.
-template <typename R, class T>
-auto find_if(R &&Range, const T &Pred) -> decltype(Range.begin()) {
-  return std::find_if(Range.begin(), Range.end(), Pred);
+template <typename R, typename UnaryPredicate>
+auto find_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range)) {
+  return std::find_if(std::begin(Range), std::end(Range), P);
+}
+
+template <typename R, typename UnaryPredicate>
+auto find_if_not(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range)) {
+  return std::find_if_not(std::begin(Range), std::end(Range), P);
 }
 
 /// Provide wrappers to std::remove_if which take ranges instead of having to
 /// pass begin/end explicitly.
-template<typename R, class UnaryPredicate>
-auto remove_if(R &&Range, UnaryPredicate &&P) -> decltype(Range.begin()) {
-  return std::remove_if(Range.begin(), Range.end(), P);
+template <typename R, typename UnaryPredicate>
+auto remove_if(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range)) {
+  return std::remove_if(std::begin(Range), std::end(Range), P);
 }
 
 /// Wrapper function around std::find to detect if an element exists
 /// in a container.
 template <typename R, typename E>
 bool is_contained(R &&Range, const E &Element) {
-  return std::find(Range.begin(), Range.end(), Element) != Range.end();
+  return std::find(std::begin(Range), std::end(Range), Element) !=
+         std::end(Range);
+}
+
+/// Wrapper function around std::count to count the number of times an element
+/// \p Element occurs in the given range \p Range.
+template <typename R, typename E>
+auto count(R &&Range, const E &Element) -> typename std::iterator_traits<
+    decltype(std::begin(Range))>::difference_type {
+  return std::count(std::begin(Range), std::end(Range), Element);
 }
 
 /// Wrapper function around std::count_if to count the number of times an
 /// element satisfying a given predicate occurs in a range.
 template <typename R, typename UnaryPredicate>
-auto count_if(R &&Range, UnaryPredicate &&P)
-    -> typename std::iterator_traits<decltype(Range.begin())>::difference_type {
-  return std::count_if(Range.begin(), Range.end(), P);
+auto count_if(R &&Range, UnaryPredicate P) -> typename std::iterator_traits<
+    decltype(std::begin(Range))>::difference_type {
+  return std::count_if(std::begin(Range), std::end(Range), P);
 }
 
 /// Wrapper function around std::transform to apply a function to a range and
 /// store the result elsewhere.
-template <typename R, class OutputIt, typename UnaryPredicate>
-OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate &&P) {
-  return std::transform(Range.begin(), Range.end(), d_first,
-                        std::forward<UnaryPredicate>(P));
+template <typename R, typename OutputIt, typename UnaryPredicate>
+OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P) {
+  return std::transform(std::begin(Range), std::end(Range), d_first, P);
+}
+
+/// Provide wrappers to std::partition which take ranges instead of having to
+/// pass begin/end explicitly.
+template <typename R, typename UnaryPredicate>
+auto partition(R &&Range, UnaryPredicate P) -> decltype(std::begin(Range)) {
+  return std::partition(std::begin(Range), std::end(Range), P);
+}
+
+/// Provide a container algorithm similar to C++ Library Fundamentals v2's
+/// `erase_if` which is equivalent to:
+///
+///   C.erase(remove_if(C, pred), C.end());
+///
+/// This version works for any container with an erase method call accepting
+/// two iterators.
+template <typename Container, typename UnaryPredicate>
+void erase_if(Container &C, UnaryPredicate P) {
+  C.erase(remove_if(C, P), C.end());
 }
 
 //===----------------------------------------------------------------------===//
@@ -608,6 +908,92 @@ template <typename T> struct deref {
   }
 };
 
+namespace detail {
+template <typename R> class enumerator_impl {
+public:
+  template <typename X> struct result_pair {
+    result_pair(std::size_t Index, X Value) : Index(Index), Value(Value) {}
+
+    const std::size_t Index;
+    X Value;
+  };
+
+  class iterator {
+    typedef
+        typename std::iterator_traits<IterOfRange<R>>::reference iter_reference;
+    typedef result_pair<iter_reference> result_type;
+
+  public:
+    iterator(IterOfRange<R> &&Iter, std::size_t Index)
+        : Iter(Iter), Index(Index) {}
+
+    result_type operator*() const { return result_type(Index, *Iter); }
+
+    iterator &operator++() {
+      ++Iter;
+      ++Index;
+      return *this;
+    }
+
+    bool operator!=(const iterator &RHS) const { return Iter != RHS.Iter; }
+
+  private:
+    IterOfRange<R> Iter;
+    std::size_t Index;
+  };
+
+public:
+  explicit enumerator_impl(R &&Range) : Range(std::forward<R>(Range)) {}
+
+  iterator begin() { return iterator(std::begin(Range), 0); }
+  iterator end() { return iterator(std::end(Range), std::size_t(-1)); }
+
+private:
+  R Range;
+};
+}
+
+/// Given an input range, returns a new range whose values are are pair (A,B)
+/// such that A is the 0-based index of the item in the sequence, and B is
+/// the value from the original sequence.  Example:
+///
+/// std::vector<char> Items = {'A', 'B', 'C', 'D'};
+/// for (auto X : enumerate(Items)) {
+///   printf("Item %d - %c\n", X.Index, X.Value);
+/// }
+///
+/// Output:
+///   Item 0 - A
+///   Item 1 - B
+///   Item 2 - C
+///   Item 3 - D
+///
+template <typename R> detail::enumerator_impl<R> enumerate(R &&Range) {
+  return detail::enumerator_impl<R>(std::forward<R>(Range));
+}
+
+namespace detail {
+template <typename F, typename Tuple, std::size_t... I>
+auto apply_tuple_impl(F &&f, Tuple &&t, index_sequence<I...>)
+    -> decltype(std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...)) {
+  return std::forward<F>(f)(std::get<I>(std::forward<Tuple>(t))...);
+}
+}
+
+/// Given an input tuple (a1, a2, ..., an), pass the arguments of the
+/// tuple variadically to f as if by calling f(a1, a2, ..., an) and
+/// return the result.
+template <typename F, typename Tuple>
+auto apply_tuple(F &&f, Tuple &&t) -> decltype(detail::apply_tuple_impl(
+    std::forward<F>(f), std::forward<Tuple>(t),
+    build_index_impl<
+        std::tuple_size<typename std::decay<Tuple>::type>::value>{})) {
+  using Indices = build_index_impl<
+      std::tuple_size<typename std::decay<Tuple>::type>::value>;
+
+  return detail::apply_tuple_impl(std::forward<F>(f), std::forward<Tuple>(t),
+                                  Indices{});
+}
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/ADT/ScopeExit.h b/contrib/llvm/include/llvm/ADT/ScopeExit.h
new file mode 100644
index 000000000000..4e64352c77df
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/ScopeExit.h
@@ -0,0 +1,54 @@
+//===- llvm/ADT/ScopeExit.h - Execute code at scope exit --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the make_scope_exit function, which executes user-defined
+// cleanup logic at scope exit.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SCOPE_EXIT_H
+#define LLVM_ADT_SCOPE_EXIT_H
+
+#include "llvm/Support/Compiler.h"
+
+#include <type_traits>
+#include <utility>
+
+namespace llvm {
+namespace detail {
+
+template <typename Callable> class scope_exit {
+  Callable ExitFunction;
+
+public:
+  template <typename Fp>
+  explicit scope_exit(Fp &&F) : ExitFunction(std::forward<Fp>(F)) {}
+
+  scope_exit(scope_exit &&Rhs) : ExitFunction(std::move(Rhs.ExitFunction)) {}
+
+  ~scope_exit() { ExitFunction(); }
+};
+
+} // end namespace detail
+
+// Keeps the callable object that is passed in, and execute it at the
+// destruction of the returned object (usually at the scope exit where the
+// returned object is kept).
+//
+// Interface is specified by p0052r2.
+template <typename Callable>
+LLVM_NODISCARD detail::scope_exit<typename std::decay<Callable>::type>
+make_scope_exit(Callable &&F) {
+  return detail::scope_exit<typename std::decay<Callable>::type>(
+      std::forward<Callable>(F));
+}
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ADT/ScopedHashTable.h b/contrib/llvm/include/llvm/ADT/ScopedHashTable.h
index 4af3d6d37e33..ad805b0991c1 100644
--- a/contrib/llvm/include/llvm/ADT/ScopedHashTable.h
+++ b/contrib/llvm/include/llvm/ADT/ScopedHashTable.h
@@ -32,7 +32,10 @@
 #define LLVM_ADT_SCOPEDHASHTABLE_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/Allocator.h"
+#include <cassert>
+#include <new>
 
 namespace llvm {
 
@@ -46,6 +49,7 @@ class ScopedHashTableVal {
   ScopedHashTableVal *NextForKey;
   K Key;
   V Val;
+
   ScopedHashTableVal(const K &key, const V &val) : Key(key), Val(val) {}
 
 public:
@@ -89,11 +93,11 @@ class ScopedHashTableScope {
   /// LastValInScope - This is the last value that was inserted for this scope
   /// or null if none have been inserted yet.
   ScopedHashTableVal<K, V> *LastValInScope;
-  void operator=(ScopedHashTableScope &) = delete;
-  ScopedHashTableScope(ScopedHashTableScope &) = delete;
 
 public:
   ScopedHashTableScope(ScopedHashTable<K, V, KInfo, AllocatorTy> &HT);
+  ScopedHashTableScope(ScopedHashTableScope &) = delete;
+  ScopedHashTableScope &operator=(ScopedHashTableScope &) = delete;
   ~ScopedHashTableScope();
 
   ScopedHashTableScope *getParentScope() { return PrevScope; }
@@ -101,6 +105,7 @@ public:
 
 private:
   friend class ScopedHashTable<K, V, KInfo, AllocatorTy>;
+
   ScopedHashTableVal<K, V> *getLastValInScope() {
     return LastValInScope;
   }
@@ -150,19 +155,20 @@ public:
   typedef unsigned size_type;
 
 private:
+  friend class ScopedHashTableScope<K, V, KInfo, AllocatorTy>;
+
   typedef ScopedHashTableVal<K, V> ValTy;
   DenseMap<K, ValTy*, KInfo> TopLevelMap;
-  ScopeTy *CurScope;
+  ScopeTy *CurScope = nullptr;
 
   AllocatorTy Allocator;
 
-  ScopedHashTable(const ScopedHashTable &); // NOT YET IMPLEMENTED
-  void operator=(const ScopedHashTable &);  // NOT YET IMPLEMENTED
-  friend class ScopedHashTableScope<K, V, KInfo, AllocatorTy>;
-
 public:
-  ScopedHashTable() : CurScope(nullptr) {}
+  ScopedHashTable() = default;
   ScopedHashTable(AllocatorTy A) : CurScope(0), Allocator(A) {}
+  ScopedHashTable(const ScopedHashTable &) = delete;
+  ScopedHashTable &operator=(const ScopedHashTable &) = delete;
+
   ~ScopedHashTable() {
     assert(!CurScope && TopLevelMap.empty() && "Scope imbalance!");
   }
@@ -253,4 +259,4 @@ ScopedHashTableScope<K, V, KInfo, Allocator>::~ScopedHashTableScope() {
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SCOPEDHASHTABLE_H
diff --git a/contrib/llvm/include/llvm/ADT/SetVector.h b/contrib/llvm/include/llvm/ADT/SetVector.h
index 2bb0fdbd3370..4dc18bc52178 100644
--- a/contrib/llvm/include/llvm/ADT/SetVector.h
+++ b/contrib/llvm/include/llvm/ADT/SetVector.h
@@ -20,11 +20,13 @@
 #ifndef LLVM_ADT_SETVECTOR_H
 #define LLVM_ADT_SETVECTOR_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Compiler.h"
 #include <algorithm>
 #include <cassert>
-#include <utility>
+#include <iterator>
 #include <vector>
 
 namespace llvm {
@@ -51,7 +53,7 @@ public:
   typedef typename vector_type::size_type size_type;
 
   /// \brief Construct an empty SetVector
-  SetVector() {}
+  SetVector() = default;
 
   /// \brief Initialize a SetVector with a range of elements
   template<typename It>
@@ -61,6 +63,12 @@ public:
 
   ArrayRef<T> getArrayRef() const { return vector_; }
 
+  /// Clear the SetVector and return the underlying vector.
+  Vector takeVector() {
+    set_.clear();
+    return std::move(vector_);
+  }
+
   /// \brief Determine if the SetVector is empty or not.
   bool empty() const {
     return vector_.empty();
@@ -143,8 +151,7 @@ public:
   /// \brief Remove an item from the set vector.
   bool remove(const value_type& X) {
     if (set_.erase(X)) {
-      typename vector_type::iterator I =
-        std::find(vector_.begin(), vector_.end(), X);
+      typename vector_type::iterator I = find(vector_, X);
       assert(I != vector_.end() && "Corrupted SetVector instances!");
       vector_.erase(I);
       return true;
@@ -176,7 +183,7 @@ public:
   /// write it:
   ///
   /// \code
-  ///   V.erase(std::remove_if(V.begin(), V.end(), P), V.end());
+  ///   V.erase(remove_if(V, P), V.end());
   /// \endcode
   ///
   /// However, SetVector doesn't expose non-const iterators, making any
@@ -185,9 +192,8 @@ public:
   /// \returns true if any element is removed.
   template <typename UnaryPredicate>
   bool remove_if(UnaryPredicate P) {
-    typename vector_type::iterator I
-      = std::remove_if(vector_.begin(), vector_.end(),
-                       TestAndEraseFromSet<UnaryPredicate>(P, set_));
+    typename vector_type::iterator I =
+        llvm::remove_if(vector_, TestAndEraseFromSet<UnaryPredicate>(P, set_));
     if (I == vector_.end())
       return false;
     vector_.erase(I, vector_.end());
@@ -213,7 +219,7 @@ public:
     vector_.pop_back();
   }
 
-  T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val() {
+  LLVM_NODISCARD T pop_back_val() {
     T Ret = back();
     pop_back();
     return Ret;
@@ -283,9 +289,10 @@ private:
 /// \brief A SetVector that performs no allocations if smaller than
 /// a certain size.
 template <typename T, unsigned N>
-class SmallSetVector : public SetVector<T, SmallVector<T, N>, SmallSet<T, N> > {
+class SmallSetVector
+    : public SetVector<T, SmallVector<T, N>, SmallDenseSet<T, N>> {
 public:
-  SmallSetVector() {}
+  SmallSetVector() = default;
 
   /// \brief Initialize a SmallSetVector with a range of elements
   template<typename It>
@@ -294,7 +301,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-// vim: sw=2 ai
-#endif
+#endif // LLVM_ADT_SETVECTOR_H
diff --git a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
index eaed6aa05dcb..49feb9da897a 100644
--- a/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/contrib/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -15,19 +15,25 @@
 #ifndef LLVM_ADT_SMALLPTRSET_H
 #define LLVM_ADT_SMALLPTRSET_H
 
+#include "llvm/Config/abi-breaking.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
 #include <cassert>
 #include <cstddef>
 #include <cstring>
 #include <cstdlib>
+#include <initializer_list>
 #include <iterator>
 #include <utility>
 
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
 namespace llvm {
+template <class T = void> struct ReverseIterate { static bool value; };
+template <class T> bool ReverseIterate<T>::value = false;
+}
+#endif
 
-class SmallPtrSetIteratorImpl;
+namespace llvm {
 
 /// SmallPtrSetImplBase - This is the common code shared among all the
 /// SmallPtrSet<>'s, which is almost everything.  SmallPtrSet has two modes, one
@@ -71,12 +77,14 @@ protected:
                       const SmallPtrSetImplBase &that);
   SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize,
                       SmallPtrSetImplBase &&that);
+
   explicit SmallPtrSetImplBase(const void **SmallStorage, unsigned SmallSize)
       : SmallArray(SmallStorage), CurArray(SmallStorage),
         CurArraySize(SmallSize), NumNonEmpty(0), NumTombstones(0) {
     assert(SmallSize && (SmallSize & (SmallSize-1)) == 0 &&
            "Initial size must be a power of two!");
   }
+
   ~SmallPtrSetImplBase() {
     if (!isSmall())
       free(CurArray);
@@ -84,7 +92,10 @@ protected:
 
 public:
   typedef unsigned size_type;
-  bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const { return size() == 0; }
+
+  SmallPtrSetImplBase &operator=(const SmallPtrSetImplBase &) = delete;
+
+  LLVM_NODISCARD bool empty() const { return size() == 0; }
   size_type size() const { return NumNonEmpty - NumTombstones; }
 
   void clear() {
@@ -103,6 +114,7 @@ public:
 
 protected:
   static void *getTombstoneMarker() { return reinterpret_cast<void*>(-2); }
+
   static void *getEmptyMarker() {
     // Note that -1 is chosen to make clear() efficiently implementable with
     // memset and because it's not a valid pointer value.
@@ -150,22 +162,38 @@ protected:
   /// return true, otherwise return false.  This is hidden from the client so
   /// that the derived class can check that the right type of pointer is passed
   /// in.
-  bool erase_imp(const void * Ptr);
+  bool erase_imp(const void * Ptr) {
+    const void *const *P = find_imp(Ptr);
+    if (P == EndPointer())
+      return false;
+    
+    const void ** Loc = const_cast<const void **>(P);
+    assert(*Loc == Ptr && "broken find!");
+    *Loc = getTombstoneMarker();
+    NumTombstones++;
+    return true;
+  }
 
-  bool count_imp(const void * Ptr) const {
+  /// Returns the raw pointer needed to construct an iterator.  If element not
+  /// found, this will be EndPointer.  Otherwise, it will be a pointer to the
+  /// slot which stores Ptr;
+  const void *const * find_imp(const void * Ptr) const {
     if (isSmall()) {
       // Linear search for the item.
       for (const void *const *APtr = SmallArray,
                       *const *E = SmallArray + NumNonEmpty; APtr != E; ++APtr)
         if (*APtr == Ptr)
-          return true;
-      return false;
+          return APtr;
+      return EndPointer();
     }
 
     // Big set case.
-    return *FindBucketFor(Ptr) == Ptr;
+    auto *Bucket = FindBucketFor(Ptr);
+    if (*Bucket == Ptr)
+      return Bucket;
+    return EndPointer();
   }
-
+  
 private:
   bool isSmall() const { return CurArray == SmallArray; }
 
@@ -177,8 +205,6 @@ private:
   /// Grow - Allocate a larger backing store for the buckets and move it over.
   void Grow(unsigned NewSize);
 
-  void operator=(const SmallPtrSetImplBase &RHS) = delete;
-
 protected:
   /// swap - Swaps the elements of two sets.
   /// Note: This method assumes that both sets have the same small size.
@@ -204,6 +230,12 @@ protected:
 public:
   explicit SmallPtrSetIteratorImpl(const void *const *BP, const void*const *E)
     : Bucket(BP), End(E) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    if (ReverseIterate<bool>::value) {
+      RetreatIfNotValid();
+      return;
+    }
+#endif
     AdvanceIfNotValid();
   }
 
@@ -225,6 +257,17 @@ protected:
             *Bucket == SmallPtrSetImplBase::getTombstoneMarker()))
       ++Bucket;
   }
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+  void RetreatIfNotValid() {
+    --Bucket;
+    assert(Bucket <= End);
+    while (Bucket != End &&
+           (*Bucket == SmallPtrSetImplBase::getEmptyMarker() ||
+            *Bucket == SmallPtrSetImplBase::getTombstoneMarker())) {
+      --Bucket;
+    }
+  }
+#endif
 };
 
 /// SmallPtrSetIterator - This implements a const_iterator for SmallPtrSet.
@@ -250,13 +293,21 @@ public:
   }
 
   inline SmallPtrSetIterator& operator++() {          // Preincrement
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    if (ReverseIterate<bool>::value) {
+      RetreatIfNotValid();
+      return *this;
+    }
+#endif
     ++Bucket;
     AdvanceIfNotValid();
     return *this;
   }
 
   SmallPtrSetIterator operator++(int) {        // Postincrement
-    SmallPtrSetIterator tmp = *this; ++*this; return tmp;
+    SmallPtrSetIterator tmp = *this;
+    ++*this;
+    return tmp;
   }
 };
 
@@ -294,8 +345,6 @@ template <typename PtrType>
 class SmallPtrSetImpl : public SmallPtrSetImplBase {
   typedef PointerLikeTypeTraits<PtrType> PtrTraits;
 
-  SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
-
 protected:
   // Constructors that forward to the base.
   SmallPtrSetImpl(const void **SmallStorage, const SmallPtrSetImpl &that)
@@ -310,6 +359,8 @@ public:
   typedef SmallPtrSetIterator<PtrType> iterator;
   typedef SmallPtrSetIterator<PtrType> const_iterator;
 
+  SmallPtrSetImpl(const SmallPtrSetImpl &) = delete;
+
   /// Inserts Ptr if and only if there is no element in the container equal to
   /// Ptr. The bool component of the returned pair is true if and only if the
   /// insertion takes place, and the iterator component of the pair points to
@@ -327,7 +378,11 @@ public:
 
   /// count - Return 1 if the specified pointer is in the set, 0 otherwise.
   size_type count(PtrType Ptr) const {
-    return count_imp(PtrTraits::getAsVoidPointer(Ptr)) ? 1 : 0;
+    return find(Ptr) != endPtr() ? 1 : 0;
+  }
+  iterator find(PtrType Ptr) const {
+    auto *P = find_imp(PtrTraits::getAsVoidPointer(Ptr));
+    return iterator(P, EndPointer());
   }
 
   template <typename IterT>
@@ -336,10 +391,27 @@ public:
       insert(*I);
   }
 
+  void insert(std::initializer_list<PtrType> IL) {
+    insert(IL.begin(), IL.end());
+  }
+
   inline iterator begin() const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    if (ReverseIterate<bool>::value)
+      return endPtr();
+#endif
     return iterator(CurArray, EndPointer());
   }
   inline iterator end() const {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    if (ReverseIterate<bool>::value)
+      return iterator(CurArray, CurArray);
+#endif
+    return endPtr();
+  }
+
+private:
+  inline iterator endPtr() const {
     const void *const *End = EndPointer();
     return iterator(End, End);
   }
@@ -374,6 +446,11 @@ public:
     this->insert(I, E);
   }
 
+  SmallPtrSet(std::initializer_list<PtrType> IL)
+      : BaseT(SmallStorage, SmallSizePowTwo) {
+    this->insert(IL.begin(), IL.end());
+  }
+
   SmallPtrSet<PtrType, SmallSize> &
   operator=(const SmallPtrSet<PtrType, SmallSize> &RHS) {
     if (&RHS != this)
@@ -381,26 +458,36 @@ public:
     return *this;
   }
 
-  SmallPtrSet<PtrType, SmallSize>&
+  SmallPtrSet<PtrType, SmallSize> &
   operator=(SmallPtrSet<PtrType, SmallSize> &&RHS) {
     if (&RHS != this)
       this->MoveFrom(SmallSizePowTwo, std::move(RHS));
     return *this;
   }
 
+  SmallPtrSet<PtrType, SmallSize> &
+  operator=(std::initializer_list<PtrType> IL) {
+    this->clear();
+    this->insert(IL.begin(), IL.end());
+    return *this;
+  }
+
   /// swap - Swaps the elements of two sets.
   void swap(SmallPtrSet<PtrType, SmallSize> &RHS) {
     SmallPtrSetImplBase::swap(RHS);
   }
 };
-}
+
+} // end namespace llvm
 
 namespace std {
+
   /// Implement std::swap in terms of SmallPtrSet swap.
   template<class T, unsigned N>
   inline void swap(llvm::SmallPtrSet<T, N> &LHS, llvm::SmallPtrSet<T, N> &RHS) {
     LHS.swap(RHS);
   }
-}
 
-#endif
+} // end namespace std
+
+#endif // LLVM_ADT_SMALLPTRSET_H
diff --git a/contrib/llvm/include/llvm/ADT/SmallSet.h b/contrib/llvm/include/llvm/ADT/SmallSet.h
index aaa5ff0ae939..6dac1677b7a2 100644
--- a/contrib/llvm/include/llvm/ADT/SmallSet.h
+++ b/contrib/llvm/include/llvm/ADT/SmallSet.h
@@ -17,7 +17,11 @@
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Compiler.h"
+#include <cstddef>
+#include <functional>
 #include <set>
+#include <utility>
 
 namespace llvm {
 
@@ -28,7 +32,7 @@ namespace llvm {
 ///
 /// Note that this set does not provide a way to iterate over members in the
 /// set.
-template <typename T, unsigned N,  typename C = std::less<T> >
+template <typename T, unsigned N, typename C = std::less<T>>
 class SmallSet {
   /// Use a SmallVector to hold the elements here (even though it will never
   /// reach its 'large' stage) to avoid calling the default ctors of elements
@@ -45,9 +49,10 @@ class SmallSet {
 
 public:
   typedef size_t size_type;
-  SmallSet() {}
 
-  bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const {
+  SmallSet() = default;
+
+  LLVM_NODISCARD bool empty() const {
     return Vector.empty() && Set.empty();
   }
 
@@ -133,4 +138,4 @@ class SmallSet<PointeeType*, N> : public SmallPtrSet<PointeeType*, N> {};
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SMALLSET_H
diff --git a/contrib/llvm/include/llvm/ADT/SmallString.h b/contrib/llvm/include/llvm/ADT/SmallString.h
index e569f54481a2..ff46e85ccb09 100644
--- a/contrib/llvm/include/llvm/ADT/SmallString.h
+++ b/contrib/llvm/include/llvm/ADT/SmallString.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include <cstddef>
 
 namespace llvm {
 
@@ -25,7 +26,7 @@ template<unsigned InternalLen>
 class SmallString : public SmallVector<char, InternalLen> {
 public:
   /// Default ctor - Initialize to empty.
-  SmallString() {}
+  SmallString() = default;
 
   /// Initialize from a StringRef.
   SmallString(StringRef S) : SmallVector<char, InternalLen>(S.begin(), S.end()) {}
@@ -79,7 +80,6 @@ public:
     SmallVectorImpl<char>::append(NumInputs, Elt);
   }
 
-
   /// Append from a StringRef.
   void append(StringRef RHS) {
     SmallVectorImpl<char>::append(RHS.begin(), RHS.end());
@@ -292,6 +292,6 @@ public:
   }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SMALLSTRING_H
diff --git a/contrib/llvm/include/llvm/ADT/SmallVector.h b/contrib/llvm/include/llvm/ADT/SmallVector.h
index 42eedc63e079..b9588214023c 100644
--- a/contrib/llvm/include/llvm/ADT/SmallVector.h
+++ b/contrib/llvm/include/llvm/ADT/SmallVector.h
@@ -27,6 +27,9 @@
 #include <initializer_list>
 #include <iterator>
 #include <memory>
+#include <new>
+#include <type_traits>
+#include <utility>
 
 namespace llvm {
 
@@ -54,11 +57,9 @@ public:
     return size_t((char*)CapacityX - (char*)BeginX);
   }
 
-  bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const { return BeginX == EndX; }
+  LLVM_NODISCARD bool empty() const { return BeginX == EndX; }
 };
 
-template <typename T, unsigned N> struct SmallVectorStorage;
-
 /// This is the part of SmallVectorTemplateBase which does not depend on whether
 /// the type T is a POD. The extra dummy template argument is used by ArrayRef
 /// to avoid unnecessarily requiring T to be complete.
@@ -70,7 +71,7 @@ private:
   // Allocate raw space for N elements of type T.  If T has a ctor or dtor, we
   // don't want it to be automatically run, so we need to represent the space as
   // something else.  Use an array of char of sufficient alignment.
-  typedef llvm::AlignedCharArrayUnion<T> U;
+  typedef AlignedCharArrayUnion<T> U;
   U FirstEl;
   // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
 
@@ -93,6 +94,7 @@ protected:
   }
 
   void setEnd(T *P) { this->EndX = P; }
+
 public:
   typedef size_t size_type;
   typedef ptrdiff_t difference_type;
@@ -117,11 +119,12 @@ public:
   iterator end() { return (iterator)this->EndX; }
   LLVM_ATTRIBUTE_ALWAYS_INLINE
   const_iterator end() const { return (const_iterator)this->EndX; }
+
 protected:
   iterator capacity_ptr() { return (iterator)this->CapacityX; }
   const_iterator capacity_ptr() const { return (const_iterator)this->CapacityX;}
-public:
 
+public:
   // reverse iterator creation methods.
   reverse_iterator rbegin()            { return reverse_iterator(end()); }
   const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
@@ -298,6 +301,7 @@ protected:
   void grow(size_t MinSize = 0) {
     this->grow_pod(MinSize*sizeof(T), sizeof(T));
   }
+
 public:
   void push_back(const T &Elt) {
     if (LLVM_UNLIKELY(this->EndX >= this->CapacityX))
@@ -311,14 +315,12 @@ public:
   }
 };
 
-
 /// This class consists of common code factored out of the SmallVector class to
 /// reduce code duplication based on the SmallVector 'N' template parameter.
 template <typename T>
 class SmallVectorImpl : public SmallVectorTemplateBase<T, isPodLike<T>::value> {
   typedef SmallVectorTemplateBase<T, isPodLike<T>::value > SuperClass;
 
-  SmallVectorImpl(const SmallVectorImpl&) = delete;
 public:
   typedef typename SuperClass::iterator iterator;
   typedef typename SuperClass::const_iterator const_iterator;
@@ -331,6 +333,8 @@ protected:
   }
 
 public:
+  SmallVectorImpl(const SmallVectorImpl &) = delete;
+
   ~SmallVectorImpl() {
     // Destroy the constructed elements in the vector.
     this->destroy_range(this->begin(), this->end());
@@ -340,7 +344,6 @@ public:
       free(this->begin());
   }
 
-
   void clear() {
     this->destroy_range(this->begin(), this->end());
     this->EndX = this->BeginX;
@@ -376,7 +379,7 @@ public:
       this->grow(N);
   }
 
-  T LLVM_ATTRIBUTE_UNUSED_RESULT pop_back_val() {
+  LLVM_NODISCARD T pop_back_val() {
     T Result = ::std::move(this->back());
     this->pop_back();
     return Result;
@@ -668,7 +671,6 @@ public:
   }
 };
 
-
 template <typename T>
 void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
   if (this == &RHS) return;
@@ -841,6 +843,7 @@ template <typename T, unsigned N>
 class SmallVector : public SmallVectorImpl<T> {
   /// Inline space for elements which aren't stored in the base class.
   SmallVectorStorage<T, N> Storage;
+
 public:
   SmallVector() : SmallVectorImpl<T>(N) {
   }
@@ -856,7 +859,7 @@ public:
   }
 
   template <typename RangeTy>
-  explicit SmallVector(const llvm::iterator_range<RangeTy> R)
+  explicit SmallVector(const iterator_range<RangeTy> &R)
       : SmallVectorImpl<T>(N) {
     this->append(R.begin(), R.end());
   }
@@ -906,9 +909,10 @@ static inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
   return X.capacity_in_bytes();
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
 namespace std {
+
   /// Implement std::swap in terms of SmallVector swap.
   template<typename T>
   inline void
@@ -922,6 +926,7 @@ namespace std {
   swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
     LHS.swap(RHS);
   }
-}
 
-#endif
+} // end namespace std
+
+#endif // LLVM_ADT_SMALLVECTOR_H
diff --git a/contrib/llvm/include/llvm/ADT/SparseBitVector.h b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
index e6e72413da4e..e2822c46e266 100644
--- a/contrib/llvm/include/llvm/ADT/SparseBitVector.h
+++ b/contrib/llvm/include/llvm/ADT/SparseBitVector.h
@@ -15,14 +15,14 @@
 #ifndef LLVM_ADT_SPARSEBITVECTOR_H
 #define LLVM_ADT_SPARSEBITVECTOR_H
 
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cassert>
 #include <climits>
+#include <cstring>
+#include <iterator>
+#include <list>
 
 namespace llvm {
 
@@ -39,9 +39,7 @@ namespace llvm {
 /// etc) do not perform as well in practice as a linked list with this iterator
 /// kept up to date.  They are also significantly more memory intensive.
 
-template <unsigned ElementSize = 128>
-struct SparseBitVectorElement
-  : public ilist_node<SparseBitVectorElement<ElementSize> > {
+template <unsigned ElementSize = 128> struct SparseBitVectorElement {
 public:
   typedef unsigned long BitWord;
   typedef unsigned size_type;
@@ -55,8 +53,7 @@ private:
   // Index of Element in terms of where first bit starts.
   unsigned ElementIndex;
   BitWord Bits[BITWORDS_PER_ELEMENT];
-  // Needed for sentinels
-  friend struct ilist_sentinel_traits<SparseBitVectorElement>;
+
   SparseBitVectorElement() {
     ElementIndex = ~0U;
     memset(&Bits[0], 0, sizeof (BitWord) * BITWORDS_PER_ELEMENT);
@@ -84,7 +81,7 @@ public:
 
   // Return the bits that make up word Idx in our element.
   BitWord word(unsigned Idx) const {
-    assert (Idx < BITWORDS_PER_ELEMENT);
+    assert(Idx < BITWORDS_PER_ELEMENT);
     return Bits[Idx];
   }
 
@@ -144,8 +141,8 @@ public:
     unsigned WordPos = Curr / BITWORD_SIZE;
     unsigned BitPos = Curr % BITWORD_SIZE;
     BitWord Copy = Bits[WordPos];
-    assert (WordPos <= BITWORDS_PER_ELEMENT
-            && "Word Position outside of element");
+    assert(WordPos <= BITWORDS_PER_ELEMENT
+           && "Word Position outside of element");
 
     // Mask off previous bits.
     Copy &= ~0UL << BitPos;
@@ -244,25 +241,9 @@ public:
   }
 };
 
-template <unsigned ElementSize>
-struct ilist_traits<SparseBitVectorElement<ElementSize> >
-  : public ilist_default_traits<SparseBitVectorElement<ElementSize> > {
-  typedef SparseBitVectorElement<ElementSize> Element;
-
-  Element *createSentinel() const { return static_cast<Element *>(&Sentinel); }
-  static void destroySentinel(Element *) {}
-
-  Element *provideInitialHead() const { return createSentinel(); }
-  Element *ensureHead(Element *) const { return createSentinel(); }
-  static void noteHead(Element *, Element *) {}
-
-private:
-  mutable ilist_half_node<Element> Sentinel;
-};
-
 template <unsigned ElementSize = 128>
 class SparseBitVector {
-  typedef ilist<SparseBitVectorElement<ElementSize> > ElementList;
+  typedef std::list<SparseBitVectorElement<ElementSize>> ElementList;
   typedef typename ElementList::iterator ElementListIter;
   typedef typename ElementList::const_iterator ElementListConstIter;
   enum {
@@ -310,7 +291,7 @@ class SparseBitVector {
   private:
     bool AtEnd;
 
-    const SparseBitVector<ElementSize> *BitVector;
+    const SparseBitVector<ElementSize> *BitVector = nullptr;
 
     // Current element inside of bitmap.
     ElementListConstIter Iter;
@@ -380,7 +361,20 @@ class SparseBitVector {
         }
       }
     }
+
   public:
+    SparseBitVectorIterator() = default;
+
+    SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
+                            bool end = false):BitVector(RHS) {
+      Iter = BitVector->Elements.begin();
+      BitNumber = 0;
+      Bits = 0;
+      WordNumber = ~0;
+      AtEnd = end;
+      AdvanceToFirstNonZero();
+    }
+
     // Preincrement.
     inline SparseBitVectorIterator& operator++() {
       ++BitNumber;
@@ -413,29 +407,16 @@ class SparseBitVector {
     bool operator!=(const SparseBitVectorIterator &RHS) const {
       return !(*this == RHS);
     }
-
-    SparseBitVectorIterator(): BitVector(nullptr) {
-    }
-
-    SparseBitVectorIterator(const SparseBitVector<ElementSize> *RHS,
-                            bool end = false):BitVector(RHS) {
-      Iter = BitVector->Elements.begin();
-      BitNumber = 0;
-      Bits = 0;
-      WordNumber = ~0;
-      AtEnd = end;
-      AdvanceToFirstNonZero();
-    }
   };
+
 public:
   typedef SparseBitVectorIterator iterator;
 
-  SparseBitVector () {
-    CurrElementIter = Elements.begin ();
+  SparseBitVector() {
+    CurrElementIter = Elements.begin();
   }
 
-  ~SparseBitVector() {
-  }
+  ~SparseBitVector() = default;
 
   // SparseBitVector copy ctor.
   SparseBitVector(const SparseBitVector &RHS) {
@@ -510,26 +491,21 @@ public:
 
   void set(unsigned Idx) {
     unsigned ElementIndex = Idx / ElementSize;
-    SparseBitVectorElement<ElementSize> *Element;
     ElementListIter ElementIter;
     if (Elements.empty()) {
-      Element = new SparseBitVectorElement<ElementSize>(ElementIndex);
-      ElementIter = Elements.insert(Elements.end(), Element);
-
+      ElementIter = Elements.emplace(Elements.end(), ElementIndex);
     } else {
       ElementIter = FindLowerBound(ElementIndex);
 
       if (ElementIter == Elements.end() ||
           ElementIter->index() != ElementIndex) {
-        Element = new SparseBitVectorElement<ElementSize>(ElementIndex);
         // We may have hit the beginning of our SparseBitVector, in which case,
         // we may need to insert right after this element, which requires moving
         // the current iterator forward one, because insert does insert before.
         if (ElementIter != Elements.end() &&
             ElementIter->index() < ElementIndex)
-          ElementIter = Elements.insert(++ElementIter, Element);
-        else
-          ElementIter = Elements.insert(ElementIter, Element);
+          ++ElementIter;
+        ElementIter = Elements.emplace(ElementIter, ElementIndex);
       }
     }
     CurrElementIter = ElementIter;
@@ -537,7 +513,7 @@ public:
     ElementIter->set(Idx % ElementSize);
   }
 
-  bool test_and_set (unsigned Idx) {
+  bool test_and_set(unsigned Idx) {
     bool old = test(Idx);
     if (!old) {
       set(Idx);
@@ -577,8 +553,7 @@ public:
 
     while (Iter2 != RHS.Elements.end()) {
       if (Iter1 == Elements.end() || Iter1->index() > Iter2->index()) {
-        Elements.insert(Iter1,
-                        new SparseBitVectorElement<ElementSize>(*Iter2));
+        Elements.insert(Iter1, *Iter2);
         ++Iter2;
         changed = true;
       } else if (Iter1->index() == Iter2->index()) {
@@ -725,31 +700,19 @@ public:
         ++Iter2;
       } else if (Iter1->index() == Iter2->index()) {
         bool BecameZero = false;
-        SparseBitVectorElement<ElementSize> *NewElement =
-          new SparseBitVectorElement<ElementSize>(Iter1->index());
-        NewElement->intersectWithComplement(*Iter1, *Iter2, BecameZero);
-        if (!BecameZero) {
-          Elements.push_back(NewElement);
-        }
-        else
-          delete NewElement;
+        Elements.emplace_back(Iter1->index());
+        Elements.back().intersectWithComplement(*Iter1, *Iter2, BecameZero);
+        if (BecameZero)
+          Elements.pop_back();
         ++Iter1;
         ++Iter2;
       } else {
-        SparseBitVectorElement<ElementSize> *NewElement =
-          new SparseBitVectorElement<ElementSize>(*Iter1);
-        Elements.push_back(NewElement);
-        ++Iter1;
+        Elements.push_back(*Iter1++);
       }
     }
 
     // copy the remaining elements
-    while (Iter1 != RHS1.Elements.end()) {
-        SparseBitVectorElement<ElementSize> *NewElement =
-          new SparseBitVectorElement<ElementSize>(*Iter1);
-        Elements.push_back(NewElement);
-        ++Iter1;
-      }
+    std::copy(Iter1, RHS1.Elements.end(), std::back_inserter(Elements));
   }
 
   void intersectWithComplement(const SparseBitVector<ElementSize> *RHS1,
@@ -819,6 +782,7 @@ public:
 
     return BitCount;
   }
+
   iterator begin() const {
     return iterator(this);
   }
@@ -899,6 +863,7 @@ void dump(const SparseBitVector<ElementSize> &LHS, raw_ostream &out) {
   }
   out << "]\n";
 }
+
 } // end namespace llvm
 
 #endif // LLVM_ADT_SPARSEBITVECTOR_H
diff --git a/contrib/llvm/include/llvm/ADT/SparseMultiSet.h b/contrib/llvm/include/llvm/ADT/SparseMultiSet.h
index e3aa2589b79f..08da4b68ebaa 100644
--- a/contrib/llvm/include/llvm/ADT/SparseMultiSet.h
+++ b/contrib/llvm/include/llvm/ADT/SparseMultiSet.h
@@ -21,7 +21,15 @@
 #ifndef LLVM_ADT_SPARSEMULTISET_H
 #define LLVM_ADT_SPARSEMULTISET_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <limits>
+#include <utility>
 
 namespace llvm {
 
@@ -73,7 +81,7 @@ namespace llvm {
 /// @tparam SparseT     An unsigned integer type. See above.
 ///
 template<typename ValueT,
-         typename KeyFunctorT = llvm::identity<unsigned>,
+         typename KeyFunctorT = identity<unsigned>,
          typename SparseT = uint8_t>
 class SparseMultiSet {
   static_assert(std::numeric_limits<SparseT>::is_integer &&
@@ -113,16 +121,16 @@ class SparseMultiSet {
   typedef typename KeyFunctorT::argument_type KeyT;
   typedef SmallVector<SMSNode, 8> DenseT;
   DenseT Dense;
-  SparseT *Sparse;
-  unsigned Universe;
+  SparseT *Sparse = nullptr;
+  unsigned Universe = 0;
   KeyFunctorT KeyIndexOf;
   SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf;
 
   /// We have a built-in recycler for reusing tombstone slots. This recycler
   /// puts a singly-linked free list into tombstone slots, allowing us quick
   /// erasure, iterator preservation, and dense size.
-  unsigned FreelistIdx;
-  unsigned NumFree;
+  unsigned FreelistIdx = SMSNode::INVALID;
+  unsigned NumFree = 0;
 
   unsigned sparseIndex(const ValueT &Val) const {
     assert(ValIndexOf(Val) < Universe &&
@@ -131,11 +139,6 @@ class SparseMultiSet {
   }
   unsigned sparseIndex(const SMSNode &N) const { return sparseIndex(N.Data); }
 
-  // Disable copy construction and assignment.
-  // This data structure is not meant to be used that way.
-  SparseMultiSet(const SparseMultiSet&) = delete;
-  SparseMultiSet &operator=(const SparseMultiSet&) = delete;
-
   /// Whether the given entry is the head of the list. List heads's previous
   /// pointers are to the tail of the list, allowing for efficient access to the
   /// list tail. D must be a valid entry node.
@@ -187,9 +190,9 @@ public:
   typedef const ValueT *const_pointer;
   typedef unsigned size_type;
 
-  SparseMultiSet()
-    : Sparse(nullptr), Universe(0), FreelistIdx(SMSNode::INVALID), NumFree(0) {}
-
+  SparseMultiSet() = default;
+  SparseMultiSet(const SparseMultiSet &) = delete;
+  SparseMultiSet &operator=(const SparseMultiSet &) = delete;
   ~SparseMultiSet() { free(Sparse); }
 
   /// Set the universe size which determines the largest key the set can hold.
@@ -218,6 +221,7 @@ public:
   class iterator_base : public std::iterator<std::bidirectional_iterator_tag,
                                              ValueT> {
     friend class SparseMultiSet;
+
     SMSPtrTy SMS;
     unsigned Idx;
     unsigned SparseIdx;
@@ -515,4 +519,4 @@ private:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SPARSEMULTISET_H
diff --git a/contrib/llvm/include/llvm/ADT/SparseSet.h b/contrib/llvm/include/llvm/ADT/SparseSet.h
index 5b6494d17129..00c18c743219 100644
--- a/contrib/llvm/include/llvm/ADT/SparseSet.h
+++ b/contrib/llvm/include/llvm/ADT/SparseSet.h
@@ -22,8 +22,11 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
 #include <limits>
+#include <utility>
 
 namespace llvm {
 
@@ -115,7 +118,7 @@ struct SparseSetValFunctor<KeyT, KeyT, KeyFunctorT> {
 /// @tparam SparseT     An unsigned integer type. See above.
 ///
 template<typename ValueT,
-         typename KeyFunctorT = llvm::identity<unsigned>,
+         typename KeyFunctorT = identity<unsigned>,
          typename SparseT = uint8_t>
 class SparseSet {
   static_assert(std::numeric_limits<SparseT>::is_integer &&
@@ -126,16 +129,11 @@ class SparseSet {
   typedef SmallVector<ValueT, 8> DenseT;
   typedef unsigned size_type;
   DenseT Dense;
-  SparseT *Sparse;
-  unsigned Universe;
+  SparseT *Sparse = nullptr;
+  unsigned Universe = 0;
   KeyFunctorT KeyIndexOf;
   SparseSetValFunctor<KeyT, ValueT, KeyFunctorT> ValIndexOf;
 
-  // Disable copy construction and assignment.
-  // This data structure is not meant to be used that way.
-  SparseSet(const SparseSet&) = delete;
-  SparseSet &operator=(const SparseSet&) = delete;
-
 public:
   typedef ValueT value_type;
   typedef ValueT &reference;
@@ -143,7 +141,9 @@ public:
   typedef ValueT *pointer;
   typedef const ValueT *const_pointer;
 
-  SparseSet() : Sparse(nullptr), Universe(0) {}
+  SparseSet() = default;
+  SparseSet(const SparseSet &) = delete;
+  SparseSet &operator=(const SparseSet &) = delete;
   ~SparseSet() { free(Sparse); }
 
   /// setUniverse - Set the universe size which determines the largest key the
@@ -308,9 +308,8 @@ public:
     erase(I);
     return true;
   }
-
 };
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_SPARSESET_H
diff --git a/contrib/llvm/include/llvm/ADT/Statistic.h b/contrib/llvm/include/llvm/ADT/Statistic.h
index 32175fdc7c5c..53fa2a50fcba 100644
--- a/contrib/llvm/include/llvm/ADT/Statistic.h
+++ b/contrib/llvm/include/llvm/ADT/Statistic.h
@@ -32,6 +32,7 @@
 #include <memory>
 
 namespace llvm {
+
 class raw_ostream;
 class raw_fd_ostream;
 
@@ -140,16 +141,17 @@ protected:
     TsanHappensAfter(this);
     return *this;
   }
+
   void RegisterStatistic();
 };
 
 // STATISTIC - A macro to make definition of statistics really simple.  This
 // automatically passes the DEBUG_TYPE of the file into the statistic.
 #define STATISTIC(VARNAME, DESC)                                               \
-  static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC, {0}, 0}
+  static llvm::Statistic VARNAME = {DEBUG_TYPE, #VARNAME, DESC, {0}, false}
 
 /// \brief Enable the collection and printing of statistics.
-void EnableStatistics();
+void EnableStatistics(bool PrintOnExit = true);
 
 /// \brief Check if statistics are enabled.
 bool AreStatisticsEnabled();
@@ -163,9 +165,12 @@ void PrintStatistics();
 /// \brief Print statistics to the given output stream.
 void PrintStatistics(raw_ostream &OS);
 
-/// Print statistics in JSON format.
+/// Print statistics in JSON format. This does include all global timers (\see
+/// Timer, TimerGroup). Note that the timers are cleared after printing and will
+/// not be printed in human readable form or in a second call of
+/// PrintStatisticsJSON().
 void PrintStatisticsJSON(raw_ostream &OS);
 
-} // end llvm namespace
+} // end namespace llvm
 
 #endif // LLVM_ADT_STATISTIC_H
diff --git a/contrib/llvm/include/llvm/ADT/StringExtras.h b/contrib/llvm/include/llvm/ADT/StringExtras.h
index bdbb4d3f5932..488748a5f605 100644
--- a/contrib/llvm/include/llvm/ADT/StringExtras.h
+++ b/contrib/llvm/include/llvm/ADT/StringExtras.h
@@ -19,6 +19,7 @@
 #include <iterator>
 
 namespace llvm {
+class raw_ostream;
 template<typename T> class SmallVectorImpl;
 
 /// hexdigit - Return the hexadecimal character for the
@@ -150,6 +151,12 @@ static inline StringRef getOrdinalSuffix(unsigned Val) {
   }
 }
 
+/// PrintEscapedString - Print each character of the specified string, escaping
+/// it if it is not printable or if it is an escape char.
+void PrintEscapedString(StringRef Name, raw_ostream &Out);
+
+namespace detail {
+
 template <typename IteratorT>
 inline std::string join_impl(IteratorT Begin, IteratorT End,
                              StringRef Separator, std::input_iterator_tag) {
@@ -184,12 +191,64 @@ inline std::string join_impl(IteratorT Begin, IteratorT End,
   return S;
 }
 
+template <typename Sep>
+inline void join_items_impl(std::string &Result, Sep Separator) {}
+
+template <typename Sep, typename Arg>
+inline void join_items_impl(std::string &Result, Sep Separator,
+                            const Arg &Item) {
+  Result += Item;
+}
+
+template <typename Sep, typename Arg1, typename... Args>
+inline void join_items_impl(std::string &Result, Sep Separator, const Arg1 &A1,
+                            Args &&... Items) {
+  Result += A1;
+  Result += Separator;
+  join_items_impl(Result, Separator, std::forward<Args>(Items)...);
+}
+
+inline size_t join_one_item_size(char C) { return 1; }
+inline size_t join_one_item_size(const char *S) { return S ? ::strlen(S) : 0; }
+
+template <typename T> inline size_t join_one_item_size(const T &Str) {
+  return Str.size();
+}
+
+inline size_t join_items_size() { return 0; }
+
+template <typename A1> inline size_t join_items_size(const A1 &A) {
+  return join_one_item_size(A);
+}
+template <typename A1, typename... Args>
+inline size_t join_items_size(const A1 &A, Args &&... Items) {
+  return join_one_item_size(A) + join_items_size(std::forward<Args>(Items)...);
+}
+}
+
 /// Joins the strings in the range [Begin, End), adding Separator between
 /// the elements.
 template <typename IteratorT>
 inline std::string join(IteratorT Begin, IteratorT End, StringRef Separator) {
   typedef typename std::iterator_traits<IteratorT>::iterator_category tag;
-  return join_impl(Begin, End, Separator, tag());
+  return detail::join_impl(Begin, End, Separator, tag());
+}
+
+/// Joins the strings in the parameter pack \p Items, adding \p Separator
+/// between the elements.  All arguments must be implicitly convertible to
+/// std::string, or there should be an overload of std::string::operator+=()
+/// that accepts the argument explicitly.
+template <typename Sep, typename... Args>
+inline std::string join_items(Sep Separator, Args &&... Items) {
+  std::string Result;
+  if (sizeof...(Items) == 0)
+    return Result;
+
+  size_t NS = detail::join_one_item_size(Separator);
+  size_t NI = detail::join_items_size(std::forward<Args>(Items)...);
+  Result.reserve(NI + (sizeof...(Items) - 1) * NS + 1);
+  detail::join_items_impl(Result, Separator, std::forward<Args>(Items)...);
+  return Result;
 }
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/ADT/StringMap.h b/contrib/llvm/include/llvm/ADT/StringMap.h
index 260275295c99..24e3ecf71b13 100644
--- a/contrib/llvm/include/llvm/ADT/StringMap.h
+++ b/contrib/llvm/include/llvm/ADT/StringMap.h
@@ -17,10 +17,17 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/PointerLikeTypeTraits.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
 #include <cstring>
 #include <utility>
+#include <initializer_list>
+#include <new>
+#include <utility>
 
 namespace llvm {
+
   template<typename ValueT>
   class StringMapConstIterator;
   template<typename ValueT>
@@ -119,8 +126,6 @@ public:
 /// and data.
 template<typename ValueTy>
 class StringMapEntry : public StringMapEntryBase {
-  StringMapEntry(StringMapEntry &E) = delete;
-
 public:
   ValueTy second;
 
@@ -129,6 +134,7 @@ public:
   template <typename... InitTy>
   StringMapEntry(unsigned strLen, InitTy &&... InitVals)
       : StringMapEntryBase(strLen), second(std::forward<InitTy>(InitVals)...) {}
+  StringMapEntry(StringMapEntry &E) = delete;
 
   StringRef getKey() const {
     return StringRef(getKeyData(), getKeyLength());
@@ -157,7 +163,7 @@ public:
     // terminator.
     unsigned AllocSize = static_cast<unsigned>(sizeof(StringMapEntry))+
       KeyLength+1;
-    unsigned Alignment = alignOf<StringMapEntry>();
+    unsigned Alignment = alignof(StringMapEntry);
 
     StringMapEntry *NewItem =
       static_cast<StringMapEntry*>(Allocator.Allocate(AllocSize,Alignment));
@@ -329,9 +335,7 @@ public:
 
   /// Lookup the ValueTy for the \p Key, or create a default constructed value
   /// if the key is not in the map.
-  ValueTy &operator[](StringRef Key) {
-    return emplace_second(Key).first->second;
-  }
+  ValueTy &operator[](StringRef Key) { return try_emplace(Key).first->second; }
 
   /// count - Return 1 if the element is in the map, 0 otherwise.
   size_type count(StringRef Key) const {
@@ -362,7 +366,7 @@ public:
   /// if and only if the insertion takes place, and the iterator component of
   /// the pair points to the element with key equivalent to the key of the pair.
   std::pair<iterator, bool> insert(std::pair<StringRef, ValueTy> KV) {
-    return emplace_second(KV.first, std::move(KV.second));
+    return try_emplace(KV.first, std::move(KV.second));
   }
 
   /// Emplace a new element for the specified key into the map if the key isn't
@@ -370,7 +374,7 @@ public:
   /// if and only if the insertion takes place, and the iterator component of
   /// the pair points to the element with key equivalent to the key of the pair.
   template <typename... ArgsTy>
-  std::pair<iterator, bool> emplace_second(StringRef Key, ArgsTy &&... Args) {
+  std::pair<iterator, bool> try_emplace(StringRef Key, ArgsTy &&... Args) {
     unsigned BucketNo = LookupBucketFor(Key);
     StringMapEntryBase *&Bucket = TheTable[BucketNo];
     if (Bucket && Bucket != getTombstoneVal())
@@ -442,12 +446,12 @@ public:
 
 template <typename ValueTy> class StringMapConstIterator {
 protected:
-  StringMapEntryBase **Ptr;
+  StringMapEntryBase **Ptr = nullptr;
 
 public:
   typedef StringMapEntry<ValueTy> value_type;
 
-  StringMapConstIterator() : Ptr(nullptr) { }
+  StringMapConstIterator() = default;
 
   explicit StringMapConstIterator(StringMapEntryBase **Bucket,
                                   bool NoAdvance = false)
@@ -488,11 +492,13 @@ private:
 template<typename ValueTy>
 class StringMapIterator : public StringMapConstIterator<ValueTy> {
 public:
-  StringMapIterator() {}
+  StringMapIterator() = default;
+
   explicit StringMapIterator(StringMapEntryBase **Bucket,
                              bool NoAdvance = false)
     : StringMapConstIterator<ValueTy>(Bucket, NoAdvance) {
   }
+
   StringMapEntry<ValueTy> &operator*() const {
     return *static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
@@ -500,6 +506,7 @@ public:
     return static_cast<StringMapEntry<ValueTy>*>(*this->Ptr);
   }
 };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_ADT_STRINGMAP_H
diff --git a/contrib/llvm/include/llvm/ADT/StringRef.h b/contrib/llvm/include/llvm/ADT/StringRef.h
index 398ca6920249..d80a848c44a1 100644
--- a/contrib/llvm/include/llvm/ADT/StringRef.h
+++ b/contrib/llvm/include/llvm/ADT/StringRef.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_ADT_STRINGREF_H
 #define LLVM_ADT_STRINGREF_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
 #include <algorithm>
@@ -32,6 +33,10 @@ namespace llvm {
 
   bool getAsSignedInteger(StringRef Str, unsigned Radix, long long &Result);
 
+  bool consumeUnsignedInteger(StringRef &Str, unsigned Radix,
+                              unsigned long long &Result);
+  bool consumeSignedInteger(StringRef &Str, unsigned Radix, long long &Result);
+
   /// StringRef - Represent a constant reference to a string, i.e. a character
   /// array and a length, which need not be null terminated.
   ///
@@ -48,10 +53,10 @@ namespace llvm {
 
   private:
     /// The start of the string, in an external buffer.
-    const char *Data;
+    const char *Data = nullptr;
 
     /// The length of the string.
-    size_t Length;
+    size_t Length = 0;
 
     // Workaround memcmp issue with null pointers (undefined behavior)
     // by providing a specialized version
@@ -66,28 +71,31 @@ namespace llvm {
     /// @{
 
     /// Construct an empty string ref.
-    /*implicit*/ StringRef() : Data(nullptr), Length(0) {}
+    /*implicit*/ StringRef() = default;
+
+    /// Disable conversion from nullptr.  This prevents things like
+    /// if (S == nullptr)
+    StringRef(std::nullptr_t) = delete;
 
     /// Construct a string ref from a cstring.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ StringRef(const char *Str)
-      : Data(Str) {
-        assert(Str && "StringRef cannot be built from a NULL argument");
-        Length = ::strlen(Str); // invoking strlen(NULL) is undefined behavior
-      }
+        : Data(Str), Length(Str ? ::strlen(Str) : 0) {}
 
     /// Construct a string ref from a pointer and length.
     LLVM_ATTRIBUTE_ALWAYS_INLINE
-    /*implicit*/ StringRef(const char *data, size_t length)
-      : Data(data), Length(length) {
-        assert((data || length == 0) &&
-        "StringRef cannot be built from a NULL argument with non-null length");
-      }
+    /*implicit*/ constexpr StringRef(const char *data, size_t length)
+        : Data(data), Length(length) {}
 
     /// Construct a string ref from an std::string.
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     /*implicit*/ StringRef(const std::string &Str)
       : Data(Str.data()), Length(Str.length()) {}
 
+    static StringRef withNullAsEmpty(const char *data) {
+      return StringRef(data ? data : "");
+    }
+
     /// @}
     /// @name Iterators
     /// @{
@@ -112,31 +120,37 @@ namespace llvm {
 
     /// data - Get a pointer to the start of the string (which may not be null
     /// terminated).
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     const char *data() const { return Data; }
 
     /// empty - Check if the string is empty.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool empty() const { return Length == 0; }
 
     /// size - Get the string size.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t size() const { return Length; }
 
     /// front - Get the first character in the string.
+    LLVM_NODISCARD
     char front() const {
       assert(!empty());
       return Data[0];
     }
 
     /// back - Get the last character in the string.
+    LLVM_NODISCARD
     char back() const {
       assert(!empty());
       return Data[Length-1];
     }
 
     // copy - Allocate copy in Allocator and return StringRef to it.
-    template <typename Allocator> StringRef copy(Allocator &A) const {
+    template <typename Allocator>
+    LLVM_NODISCARD StringRef copy(Allocator &A) const {
       // Don't request a length 0 copy from the allocator.
       if (empty())
         return StringRef();
@@ -147,6 +161,7 @@ namespace llvm {
 
     /// equals - Check for string equality, this is more efficient than
     /// compare() when the relative ordering of inequal strings isn't needed.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool equals(StringRef RHS) const {
       return (Length == RHS.Length &&
@@ -154,12 +169,14 @@ namespace llvm {
     }
 
     /// equals_lower - Check for string equality, ignoring case.
+    LLVM_NODISCARD
     bool equals_lower(StringRef RHS) const {
       return Length == RHS.Length && compare_lower(RHS) == 0;
     }
 
     /// compare - Compare two strings; the result is -1, 0, or 1 if this string
     /// is lexicographically less than, equal to, or greater than the \p RHS.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     int compare(StringRef RHS) const {
       // Check the prefix for a mismatch.
@@ -173,10 +190,12 @@ namespace llvm {
     }
 
     /// compare_lower - Compare two strings, ignoring case.
+    LLVM_NODISCARD
     int compare_lower(StringRef RHS) const;
 
     /// compare_numeric - Compare two strings, treating sequences of digits as
     /// numbers.
+    LLVM_NODISCARD
     int compare_numeric(StringRef RHS) const;
 
     /// \brief Determine the edit distance between this string and another
@@ -197,10 +216,12 @@ namespace llvm {
     /// or (if \p AllowReplacements is \c true) replacements needed to
     /// transform one of the given strings into the other. If zero,
     /// the strings are identical.
+    LLVM_NODISCARD
     unsigned edit_distance(StringRef Other, bool AllowReplacements = true,
                            unsigned MaxEditDistance = 0) const;
 
     /// str - Get the contents as an std::string.
+    LLVM_NODISCARD
     std::string str() const {
       if (!Data) return std::string();
       return std::string(Data, Length);
@@ -210,11 +231,21 @@ namespace llvm {
     /// @name Operator Overloads
     /// @{
 
+    LLVM_NODISCARD
     char operator[](size_t Index) const {
       assert(Index < Length && "Invalid index!");
       return Data[Index];
     }
 
+    /// Disallow accidental assignment from a temporary std::string.
+    ///
+    /// The declaration here is extra complicated so that `stringRef = {}`
+    /// and `stringRef = "abc"` continue to select the move assignment operator.
+    template <typename T>
+    typename std::enable_if<std::is_same<T, std::string>::value,
+                            StringRef>::type &
+    operator=(T &&Str) = delete;
+
     /// @}
     /// @name Type Conversions
     /// @{
@@ -228,6 +259,7 @@ namespace llvm {
     /// @{
 
     /// Check if this string starts with the given \p Prefix.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool startswith(StringRef Prefix) const {
       return Length >= Prefix.Length &&
@@ -235,9 +267,11 @@ namespace llvm {
     }
 
     /// Check if this string starts with the given \p Prefix, ignoring case.
+    LLVM_NODISCARD
     bool startswith_lower(StringRef Prefix) const;
 
     /// Check if this string ends with the given \p Suffix.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     bool endswith(StringRef Suffix) const {
       return Length >= Suffix.Length &&
@@ -245,6 +279,7 @@ namespace llvm {
     }
 
     /// Check if this string ends with the given \p Suffix, ignoring case.
+    LLVM_NODISCARD
     bool endswith_lower(StringRef Suffix) const;
 
     /// @}
@@ -255,6 +290,7 @@ namespace llvm {
     ///
     /// \returns The index of the first occurrence of \p C, or npos if not
     /// found.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     size_t find(char C, size_t From = 0) const {
       size_t FindBegin = std::min(From, Length);
@@ -266,16 +302,58 @@ namespace llvm {
       return npos;
     }
 
+    /// Search for the first character \p C in the string, ignoring case.
+    ///
+    /// \returns The index of the first occurrence of \p C, or npos if not
+    /// found.
+    LLVM_NODISCARD
+    size_t find_lower(char C, size_t From = 0) const;
+
+    /// Search for the first character satisfying the predicate \p F
+    ///
+    /// \returns The index of the first character satisfying \p F starting from
+    /// \p From, or npos if not found.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    size_t find_if(function_ref<bool(char)> F, size_t From = 0) const {
+      StringRef S = drop_front(From);
+      while (!S.empty()) {
+        if (F(S.front()))
+          return size() - S.size();
+        S = S.drop_front();
+      }
+      return npos;
+    }
+
+    /// Search for the first character not satisfying the predicate \p F
+    ///
+    /// \returns The index of the first character not satisfying \p F starting
+    /// from \p From, or npos if not found.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    size_t find_if_not(function_ref<bool(char)> F, size_t From = 0) const {
+      return find_if([F](char c) { return !F(c); }, From);
+    }
+
     /// Search for the first string \p Str in the string.
     ///
     /// \returns The index of the first occurrence of \p Str, or npos if not
     /// found.
+    LLVM_NODISCARD
     size_t find(StringRef Str, size_t From = 0) const;
 
+    /// Search for the first string \p Str in the string, ignoring case.
+    ///
+    /// \returns The index of the first occurrence of \p Str, or npos if not
+    /// found.
+    LLVM_NODISCARD
+    size_t find_lower(StringRef Str, size_t From = 0) const;
+
     /// Search for the last character \p C in the string.
     ///
     /// \returns The index of the last occurrence of \p C, or npos if not
     /// found.
+    LLVM_NODISCARD
     size_t rfind(char C, size_t From = npos) const {
       From = std::min(From, Length);
       size_t i = From;
@@ -287,14 +365,30 @@ namespace llvm {
       return npos;
     }
 
+    /// Search for the last character \p C in the string, ignoring case.
+    ///
+    /// \returns The index of the last occurrence of \p C, or npos if not
+    /// found.
+    LLVM_NODISCARD
+    size_t rfind_lower(char C, size_t From = npos) const;
+
     /// Search for the last string \p Str in the string.
     ///
     /// \returns The index of the last occurrence of \p Str, or npos if not
     /// found.
+    LLVM_NODISCARD
     size_t rfind(StringRef Str) const;
 
+    /// Search for the last string \p Str in the string, ignoring case.
+    ///
+    /// \returns The index of the last occurrence of \p Str, or npos if not
+    /// found.
+    LLVM_NODISCARD
+    size_t rfind_lower(StringRef Str) const;
+
     /// Find the first character in the string that is \p C, or npos if not
     /// found. Same as find.
+    LLVM_NODISCARD
     size_t find_first_of(char C, size_t From = 0) const {
       return find(C, From);
     }
@@ -303,20 +397,24 @@ namespace llvm {
     /// not found.
     ///
     /// Complexity: O(size() + Chars.size())
+    LLVM_NODISCARD
     size_t find_first_of(StringRef Chars, size_t From = 0) const;
 
     /// Find the first character in the string that is not \p C or npos if not
     /// found.
+    LLVM_NODISCARD
     size_t find_first_not_of(char C, size_t From = 0) const;
 
     /// Find the first character in the string that is not in the string
     /// \p Chars, or npos if not found.
     ///
     /// Complexity: O(size() + Chars.size())
+    LLVM_NODISCARD
     size_t find_first_not_of(StringRef Chars, size_t From = 0) const;
 
     /// Find the last character in the string that is \p C, or npos if not
     /// found.
+    LLVM_NODISCARD
     size_t find_last_of(char C, size_t From = npos) const {
       return rfind(C, From);
     }
@@ -325,23 +423,53 @@ namespace llvm {
     /// found.
     ///
     /// Complexity: O(size() + Chars.size())
+    LLVM_NODISCARD
     size_t find_last_of(StringRef Chars, size_t From = npos) const;
 
     /// Find the last character in the string that is not \p C, or npos if not
     /// found.
+    LLVM_NODISCARD
     size_t find_last_not_of(char C, size_t From = npos) const;
 
     /// Find the last character in the string that is not in \p Chars, or
     /// npos if not found.
     ///
     /// Complexity: O(size() + Chars.size())
+    LLVM_NODISCARD
     size_t find_last_not_of(StringRef Chars, size_t From = npos) const;
 
+    /// Return true if the given string is a substring of *this, and false
+    /// otherwise.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool contains(StringRef Other) const { return find(Other) != npos; }
+
+    /// Return true if the given character is contained in *this, and false
+    /// otherwise.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool contains(char C) const { return find_first_of(C) != npos; }
+
+    /// Return true if the given string is a substring of *this, and false
+    /// otherwise.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool contains_lower(StringRef Other) const {
+      return find_lower(Other) != npos;
+    }
+
+    /// Return true if the given character is contained in *this, and false
+    /// otherwise.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool contains_lower(char C) const { return find_lower(C) != npos; }
+
     /// @}
     /// @name Helpful Algorithms
     /// @{
 
     /// Return the number of occurrences of \p C in the string.
+    LLVM_NODISCARD
     size_t count(char C) const {
       size_t Count = 0;
       for (size_t i = 0, e = Length; i != e; ++i)
@@ -386,6 +514,37 @@ namespace llvm {
       return false;
     }
 
+    /// Parse the current string as an integer of the specified radix.  If
+    /// \p Radix is specified as zero, this does radix autosensing using
+    /// extended C rules: 0 is octal, 0x is hex, 0b is binary.
+    ///
+    /// If the string does not begin with a number of the specified radix,
+    /// this returns true to signify the error. The string is considered
+    /// erroneous if empty or if it overflows T.
+    /// The portion of the string representing the discovered numeric value
+    /// is removed from the beginning of the string.
+    template <typename T>
+    typename std::enable_if<std::numeric_limits<T>::is_signed, bool>::type
+    consumeInteger(unsigned Radix, T &Result) {
+      long long LLVal;
+      if (consumeSignedInteger(*this, Radix, LLVal) ||
+          static_cast<long long>(static_cast<T>(LLVal)) != LLVal)
+        return true;
+      Result = LLVal;
+      return false;
+    }
+
+    template <typename T>
+    typename std::enable_if<!std::numeric_limits<T>::is_signed, bool>::type
+    consumeInteger(unsigned Radix, T &Result) {
+      unsigned long long ULLVal;
+      if (consumeUnsignedInteger(*this, Radix, ULLVal) ||
+          static_cast<unsigned long long>(static_cast<T>(ULLVal)) != ULLVal)
+        return true;
+      Result = ULLVal;
+      return false;
+    }
+
     /// Parse the current string as an integer of the specified \p Radix, or of
     /// an autosensed radix if the \p Radix given is 0.  The current value in
     /// \p Result is discarded, and the storage is changed to be wide enough to
@@ -403,9 +562,11 @@ namespace llvm {
     /// @{
 
     // Convert the given ASCII string to lowercase.
+    LLVM_NODISCARD
     std::string lower() const;
 
     /// Convert the given ASCII string to uppercase.
+    LLVM_NODISCARD
     std::string upper() const;
 
     /// @}
@@ -421,14 +582,54 @@ namespace llvm {
     /// \param N The number of characters to included in the substring. If N
     /// exceeds the number of characters remaining in the string, the string
     /// suffix (starting with \p Start) will be returned.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef substr(size_t Start, size_t N = npos) const {
       Start = std::min(Start, Length);
       return StringRef(Data + Start, std::min(N, Length - Start));
     }
 
+    /// Return a StringRef equal to 'this' but with only the first \p N
+    /// elements remaining.  If \p N is greater than the length of the
+    /// string, the entire string is returned.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    StringRef take_front(size_t N = 1) const {
+      if (N >= size())
+        return *this;
+      return drop_back(size() - N);
+    }
+
+    /// Return a StringRef equal to 'this' but with only the first \p N
+    /// elements remaining.  If \p N is greater than the length of the
+    /// string, the entire string is returned.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    StringRef take_back(size_t N = 1) const {
+      if (N >= size())
+        return *this;
+      return drop_front(size() - N);
+    }
+
+    /// Return the longest prefix of 'this' such that every character
+    /// in the prefix satisfies the given predicate.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    StringRef take_while(function_ref<bool(char)> F) const {
+      return substr(0, find_if_not(F));
+    }
+
+    /// Return the longest prefix of 'this' such that no character in
+    /// the prefix satisfies the given predicate.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    StringRef take_until(function_ref<bool(char)> F) const {
+      return substr(0, find_if(F));
+    }
+
     /// Return a StringRef equal to 'this' but with the first \p N elements
     /// dropped.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_front(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
@@ -437,12 +638,51 @@ namespace llvm {
 
     /// Return a StringRef equal to 'this' but with the last \p N elements
     /// dropped.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef drop_back(size_t N = 1) const {
       assert(size() >= N && "Dropping more elements than exist");
       return substr(0, size()-N);
     }
 
+    /// Return a StringRef equal to 'this', but with all characters satisfying
+    /// the given predicate dropped from the beginning of the string.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    StringRef drop_while(function_ref<bool(char)> F) const {
+      return substr(find_if_not(F));
+    }
+
+    /// Return a StringRef equal to 'this', but with all characters not
+    /// satisfying the given predicate dropped from the beginning of the string.
+    LLVM_NODISCARD
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    StringRef drop_until(function_ref<bool(char)> F) const {
+      return substr(find_if(F));
+    }
+
+    /// Returns true if this StringRef has the given prefix and removes that
+    /// prefix.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool consume_front(StringRef Prefix) {
+      if (!startswith(Prefix))
+        return false;
+
+      *this = drop_front(Prefix.size());
+      return true;
+    }
+
+    /// Returns true if this StringRef has the given suffix and removes that
+    /// suffix.
+    LLVM_ATTRIBUTE_ALWAYS_INLINE
+    bool consume_back(StringRef Suffix) {
+      if (!endswith(Suffix))
+        return false;
+
+      *this = drop_back(Suffix.size());
+      return true;
+    }
+
     /// Return a reference to the substring from [Start, End).
     ///
     /// \param Start The index of the starting character in the substring; if
@@ -454,6 +694,7 @@ namespace llvm {
     /// remaining in the string, the string suffix (starting with \p Start)
     /// will be returned. If this is less than \p Start, an empty string will
     /// be returned.
+    LLVM_NODISCARD
     LLVM_ATTRIBUTE_ALWAYS_INLINE
     StringRef slice(size_t Start, size_t End) const {
       Start = std::min(Start, Length);
@@ -471,6 +712,7 @@ namespace llvm {
     ///
     /// \param Separator The character to split on.
     /// \returns The split substrings.
+    LLVM_NODISCARD
     std::pair<StringRef, StringRef> split(char Separator) const {
       size_t Idx = find(Separator);
       if (Idx == npos)
@@ -488,6 +730,7 @@ namespace llvm {
     ///
     /// \param Separator - The string to split on.
     /// \return - The split substrings.
+    LLVM_NODISCARD
     std::pair<StringRef, StringRef> split(StringRef Separator) const {
       size_t Idx = find(Separator);
       if (Idx == npos)
@@ -540,6 +783,7 @@ namespace llvm {
     ///
     /// \param Separator - The character to split on.
     /// \return - The split substrings.
+    LLVM_NODISCARD
     std::pair<StringRef, StringRef> rsplit(char Separator) const {
       size_t Idx = rfind(Separator);
       if (Idx == npos)
@@ -549,36 +793,42 @@ namespace llvm {
 
     /// Return string with consecutive \p Char characters starting from the
     /// the left removed.
+    LLVM_NODISCARD
     StringRef ltrim(char Char) const {
       return drop_front(std::min(Length, find_first_not_of(Char)));
     }
 
     /// Return string with consecutive characters in \p Chars starting from
     /// the left removed.
+    LLVM_NODISCARD
     StringRef ltrim(StringRef Chars = " \t\n\v\f\r") const {
       return drop_front(std::min(Length, find_first_not_of(Chars)));
     }
 
     /// Return string with consecutive \p Char characters starting from the
     /// right removed.
+    LLVM_NODISCARD
     StringRef rtrim(char Char) const {
       return drop_back(Length - std::min(Length, find_last_not_of(Char) + 1));
     }
 
     /// Return string with consecutive characters in \p Chars starting from
     /// the right removed.
+    LLVM_NODISCARD
     StringRef rtrim(StringRef Chars = " \t\n\v\f\r") const {
       return drop_back(Length - std::min(Length, find_last_not_of(Chars) + 1));
     }
 
     /// Return string with consecutive \p Char characters starting from the
     /// left and right removed.
+    LLVM_NODISCARD
     StringRef trim(char Char) const {
       return ltrim(Char).rtrim(Char);
     }
 
     /// Return string with consecutive characters in \p Chars starting from
     /// the left and right removed.
+    LLVM_NODISCARD
     StringRef trim(StringRef Chars = " \t\n\v\f\r") const {
       return ltrim(Chars).rtrim(Chars);
     }
@@ -586,6 +836,28 @@ namespace llvm {
     /// @}
   };
 
+  /// A wrapper around a string literal that serves as a proxy for constructing
+  /// global tables of StringRefs with the length computed at compile time.
+  /// In order to avoid the invocation of a global constructor, StringLiteral
+  /// should *only* be used in a constexpr context, as such:
+  ///
+  /// constexpr StringLiteral S("test");
+  ///
+  class StringLiteral : public StringRef {
+  public:
+    template <size_t N>
+    constexpr StringLiteral(const char (&Str)[N])
+#if defined(__clang__) && __has_attribute(enable_if)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wgcc-compat"
+        __attribute((enable_if(__builtin_strlen(Str) == N - 1,
+                               "invalid string literal")))
+#pragma clang diagnostic pop
+#endif
+        : StringRef(Str, N - 1) {
+    }
+  };
+
   /// @name StringRef Comparison Operators
   /// @{
 
@@ -595,9 +867,7 @@ namespace llvm {
   }
 
   LLVM_ATTRIBUTE_ALWAYS_INLINE
-  inline bool operator!=(StringRef LHS, StringRef RHS) {
-    return !(LHS == RHS);
-  }
+  inline bool operator!=(StringRef LHS, StringRef RHS) { return !(LHS == RHS); }
 
   inline bool operator<(StringRef LHS, StringRef RHS) {
     return LHS.compare(RHS) == -1;
@@ -622,6 +892,7 @@ namespace llvm {
   /// @}
 
   /// \brief Compute a hash_code for a StringRef.
+  LLVM_NODISCARD
   hash_code hash_value(StringRef S);
 
   // StringRefs can be treated like a POD type.
diff --git a/contrib/llvm/include/llvm/ADT/StringSwitch.h b/contrib/llvm/include/llvm/ADT/StringSwitch.h
index 42b0fc4bc441..75577b7738ba 100644
--- a/contrib/llvm/include/llvm/ADT/StringSwitch.h
+++ b/contrib/llvm/include/llvm/ADT/StringSwitch.h
@@ -53,104 +53,197 @@ public:
   explicit StringSwitch(StringRef S)
   : Str(S), Result(nullptr) { }
 
+  // StringSwitch is not copyable.
+  StringSwitch(const StringSwitch &) = delete;
+  void operator=(const StringSwitch &) = delete;
+
+  StringSwitch(StringSwitch &&other) {
+    *this = std::move(other);
+  }
+  StringSwitch &operator=(StringSwitch &&other) {
+    Str = other.Str;
+    Result = other.Result;
+    return *this;
+  }
+
+  ~StringSwitch() = default;
+
+  // Case-sensitive case matchers
   template<unsigned N>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& Case(const char (&S)[N], const T& Value) {
+    assert(N);
     if (!Result && N-1 == Str.size() &&
-        (std::memcmp(S, Str.data(), N-1) == 0)) {
+        (N == 1 || std::memcmp(S, Str.data(), N-1) == 0)) {
       Result = &Value;
     }
-
     return *this;
   }
 
   template<unsigned N>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& EndsWith(const char (&S)[N], const T &Value) {
+    assert(N);
     if (!Result && Str.size() >= N-1 &&
-        std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0) {
+        (N == 1 || std::memcmp(S, Str.data() + Str.size() + 1 - N, N-1) == 0)) {
       Result = &Value;
     }
-
     return *this;
   }
 
   template<unsigned N>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
   StringSwitch& StartsWith(const char (&S)[N], const T &Value) {
+    assert(N);
     if (!Result && Str.size() >= N-1 &&
-        std::memcmp(S, Str.data(), N-1) == 0) {
+        (N == 1 || std::memcmp(S, Str.data(), N-1) == 0)) {
       Result = &Value;
     }
-
     return *this;
   }
 
   template<unsigned N0, unsigned N1>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
-  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const T& Value) {
-    if (!Result && (
-        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
-        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0))) {
-      Result = &Value;
-    }
-
-    return *this;
+    return Case(S0, Value).Case(S1, Value);
   }
 
   template<unsigned N0, unsigned N1, unsigned N2>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
-  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const T& Value) {
-    if (!Result && (
-        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
-        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
-        (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0))) {
-      Result = &Value;
-    }
-
-    return *this;
+    return Case(S0, Value).Cases(S1, S2, Value);
   }
 
   template<unsigned N0, unsigned N1, unsigned N2, unsigned N3>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
-  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const char (&S3)[N3],
                       const T& Value) {
-    if (!Result && (
-        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
-        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
-        (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) ||
-        (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0))) {
-      Result = &Value;
-    }
-
-    return *this;
+    return Case(S0, Value).Cases(S1, S2, S3, Value);
   }
 
   template<unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
   LLVM_ATTRIBUTE_ALWAYS_INLINE
-  StringSwitch& Cases(const char (&S0)[N0], const char (&S1)[N1],
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
                       const char (&S2)[N2], const char (&S3)[N3],
                       const char (&S4)[N4], const T& Value) {
-    if (!Result && (
-        (N0-1 == Str.size() && std::memcmp(S0, Str.data(), N0-1) == 0) ||
-        (N1-1 == Str.size() && std::memcmp(S1, Str.data(), N1-1) == 0) ||
-        (N2-1 == Str.size() && std::memcmp(S2, Str.data(), N2-1) == 0) ||
-        (N3-1 == Str.size() && std::memcmp(S3, Str.data(), N3-1) == 0) ||
-        (N4-1 == Str.size() && std::memcmp(S4, Str.data(), N4-1) == 0))) {
+    return Case(S0, Value).Cases(S1, S2, S3, S4, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4,
+            unsigned N5>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const char (&S4)[N4], const char (&S5)[N5],
+                      const T &Value) {
+    return Case(S0, Value).Cases(S1, S2, S3, S4, S5, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4,
+            unsigned N5, unsigned N6>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const char (&S4)[N4], const char (&S5)[N5],
+                      const char (&S6)[N6], const T &Value) {
+    return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4,
+            unsigned N5, unsigned N6, unsigned N7>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const char (&S4)[N4], const char (&S5)[N5],
+                      const char (&S6)[N6], const char (&S7)[N7],
+                      const T &Value) {
+    return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4,
+            unsigned N5, unsigned N6, unsigned N7, unsigned N8>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const char (&S4)[N4], const char (&S5)[N5],
+                      const char (&S6)[N6], const char (&S7)[N7],
+                      const char (&S8)[N8], const T &Value) {
+    return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, S8, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4,
+            unsigned N5, unsigned N6, unsigned N7, unsigned N8, unsigned N9>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE
+  StringSwitch &Cases(const char (&S0)[N0], const char (&S1)[N1],
+                      const char (&S2)[N2], const char (&S3)[N3],
+                      const char (&S4)[N4], const char (&S5)[N5],
+                      const char (&S6)[N6], const char (&S7)[N7],
+                      const char (&S8)[N8], const char (&S9)[N9],
+                      const T &Value) {
+    return Case(S0, Value).Cases(S1, S2, S3, S4, S5, S6, S7, S8, S9, Value);
+  }
+
+  // Case-insensitive case matchers.
+  template <unsigned N>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &CaseLower(const char (&S)[N],
+                                                       const T &Value) {
+    if (!Result && Str.equals_lower(StringRef(S, N - 1)))
       Result = &Value;
-    }
 
     return *this;
   }
 
+  template <unsigned N>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &EndsWithLower(const char (&S)[N],
+                                                           const T &Value) {
+    if (!Result && Str.endswith_lower(StringRef(S, N - 1)))
+      Result = &Value;
+
+    return *this;
+  }
+
+  template <unsigned N>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &StartsWithLower(const char (&S)[N],
+                                                             const T &Value) {
+    if (!Result && Str.startswith_lower(StringRef(S, N - 1)))
+      Result = &Value;
+
+    return *this;
+  }
+  template <unsigned N0, unsigned N1>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &
+  CasesLower(const char (&S0)[N0], const char (&S1)[N1], const T &Value) {
+    return CaseLower(S0, Value).CaseLower(S1, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &
+  CasesLower(const char (&S0)[N0], const char (&S1)[N1], const char (&S2)[N2],
+             const T &Value) {
+    return CaseLower(S0, Value).CasesLower(S1, S2, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &
+  CasesLower(const char (&S0)[N0], const char (&S1)[N1], const char (&S2)[N2],
+             const char (&S3)[N3], const T &Value) {
+    return CaseLower(S0, Value).CasesLower(S1, S2, S3, Value);
+  }
+
+  template <unsigned N0, unsigned N1, unsigned N2, unsigned N3, unsigned N4>
+  LLVM_ATTRIBUTE_ALWAYS_INLINE StringSwitch &
+  CasesLower(const char (&S0)[N0], const char (&S1)[N1], const char (&S2)[N2],
+             const char (&S3)[N3], const char (&S4)[N4], const T &Value) {
+    return CaseLower(S0, Value).CasesLower(S1, S2, S3, S4, Value);
+  }
+
   LLVM_ATTRIBUTE_ALWAYS_INLINE
-  R Default(const T& Value) const {
+  R Default(const T &Value) const {
     if (Result)
       return *Result;
-
     return Value;
   }
 
diff --git a/contrib/llvm/include/llvm/ADT/TinyPtrVector.h b/contrib/llvm/include/llvm/ADT/TinyPtrVector.h
index 605f0e70a857..ca43b6046193 100644
--- a/contrib/llvm/include/llvm/ADT/TinyPtrVector.h
+++ b/contrib/llvm/include/llvm/ADT/TinyPtrVector.h
@@ -11,8 +11,13 @@
 #define LLVM_ADT_TINYPTRVECTOR_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <type_traits>
 
 namespace llvm {
 
@@ -25,15 +30,16 @@ namespace llvm {
 template <typename EltTy>
 class TinyPtrVector {
 public:
-  typedef llvm::SmallVector<EltTy, 4> VecTy;
+  typedef SmallVector<EltTy, 4> VecTy;
   typedef typename VecTy::value_type value_type;
-  typedef llvm::PointerUnion<EltTy, VecTy *> PtrUnion;
+  typedef PointerUnion<EltTy, VecTy *> PtrUnion;
 
 private:
   PtrUnion Val;
 
 public:
-  TinyPtrVector() {}
+  TinyPtrVector() = default;
+
   ~TinyPtrVector() {
     if (VecTy *V = Val.template dyn_cast<VecTy*>())
       delete V;
@@ -43,6 +49,7 @@ public:
     if (VecTy *V = Val.template dyn_cast<VecTy*>())
       Val = new VecTy(*V);
   }
+
   TinyPtrVector &operator=(const TinyPtrVector &RHS) {
     if (this == &RHS)
       return *this;
@@ -74,6 +81,7 @@ public:
   TinyPtrVector(TinyPtrVector &&RHS) : Val(RHS.Val) {
     RHS.Val = (EltTy)nullptr;
   }
+
   TinyPtrVector &operator=(TinyPtrVector &&RHS) {
     if (this == &RHS)
       return *this;
@@ -170,6 +178,7 @@ public:
 
     return Val.template get<VecTy *>()->begin();
   }
+
   iterator end() {
     if (Val.template is<EltTy>())
       return begin() + (Val.isNull() ? 0 : 1);
@@ -187,9 +196,11 @@ public:
 
   reverse_iterator rbegin() { return reverse_iterator(end()); }
   reverse_iterator rend() { return reverse_iterator(begin()); }
+
   const_reverse_iterator rbegin() const {
     return const_reverse_iterator(end());
   }
+
   const_reverse_iterator rend() const {
     return const_reverse_iterator(begin());
   }
@@ -329,6 +340,7 @@ public:
     return Val.template get<VecTy*>()->insert(begin() + Offset, From, To);
   }
 };
+
 } // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_TINYPTRVECTOR_H
diff --git a/contrib/llvm/include/llvm/ADT/Triple.h b/contrib/llvm/include/llvm/ADT/Triple.h
index b98f8407d075..d4130e1e85ae 100644
--- a/contrib/llvm/include/llvm/ADT/Triple.h
+++ b/contrib/llvm/include/llvm/ADT/Triple.h
@@ -64,11 +64,14 @@ public:
     ppc64le,        // PPC64LE: powerpc64le
     r600,           // R600: AMD GPUs HD2XXX - HD6XXX
     amdgcn,         // AMDGCN: AMD GCN GPUs
+    riscv32,        // RISC-V (32-bit): riscv32
+    riscv64,        // RISC-V (64-bit): riscv64
     sparc,          // Sparc: sparc
     sparcv9,        // Sparcv9: Sparcv9
     sparcel,        // Sparc: (endianness = little). NB: 'Sparcle' is a CPU variant
     systemz,        // SystemZ: s390x
     tce,            // TCE (http://tce.cs.tut.fi/): tce
+    tcele,          // TCE little endian (http://tce.cs.tut.fi/): tcele
     thumb,          // Thumb (little endian): thumb, thumbv.*
     thumbeb,        // Thumb (big endian): thumbeb
     x86,            // X86: i[3-9]86
@@ -99,6 +102,7 @@ public:
     ARMSubArch_v8_2a,
     ARMSubArch_v8_1a,
     ARMSubArch_v8,
+    ARMSubArch_v8r,
     ARMSubArch_v8m_baseline,
     ARMSubArch_v8m_mainline,
     ARMSubArch_v7,
@@ -144,6 +148,7 @@ public:
     Darwin,
     DragonFly,
     FreeBSD,
+    Fuchsia,
     IOS,
     KFreeBSD,
     Linux,
@@ -168,7 +173,8 @@ public:
     TvOS,       // Apple tvOS
     WatchOS,    // Apple watchOS
     Mesa3D,
-    LastOSType = Mesa3D
+    Contiki,
+    LastOSType = Contiki
   };
   enum EnvironmentType {
     UnknownEnvironment,
@@ -191,7 +197,8 @@ public:
     Cygnus,
     AMDOpenCL,
     CoreCLR,
-    LastEnvironmentType = CoreCLR
+    OpenCL,
+    LastEnvironmentType = OpenCL
   };
   enum ObjectFormatType {
     UnknownObjectFormat,
@@ -461,6 +468,10 @@ public:
     return getOS() == Triple::FreeBSD;
   }
 
+  bool isOSFuchsia() const {
+    return getOS() == Triple::Fuchsia;
+  }
+
   bool isOSDragonFly() const { return getOS() == Triple::DragonFly; }
 
   bool isOSSolaris() const {
@@ -482,6 +493,10 @@ public:
            Env == Triple::GNUX32;
   }
 
+  bool isOSContiki() const {
+    return getOS() == Triple::Contiki;
+  }
+
   /// Checks if the environment could be MSVC.
   bool isWindowsMSVCEnvironment() const {
     return getOS() == Triple::Win32 &&
@@ -690,7 +705,7 @@ public:
   /// @{
 
   /// getArchTypeName - Get the canonical name for the \p Kind architecture.
-  static const char *getArchTypeName(ArchType Kind);
+  static StringRef getArchTypeName(ArchType Kind);
 
   /// getArchTypePrefix - Get the "prefix" canonical name for the \p Kind
   /// architecture. This is the prefix used by the architecture specific
@@ -698,17 +713,17 @@ public:
   /// Intrinsic::getIntrinsicForGCCBuiltin().
   ///
   /// \return - The architecture prefix, or 0 if none is defined.
-  static const char *getArchTypePrefix(ArchType Kind);
+  static StringRef getArchTypePrefix(ArchType Kind);
 
   /// getVendorTypeName - Get the canonical name for the \p Kind vendor.
-  static const char *getVendorTypeName(VendorType Kind);
+  static StringRef getVendorTypeName(VendorType Kind);
 
   /// getOSTypeName - Get the canonical name for the \p Kind operating system.
-  static const char *getOSTypeName(OSType Kind);
+  static StringRef getOSTypeName(OSType Kind);
 
   /// getEnvironmentTypeName - Get the canonical name for the \p Kind
   /// environment.
-  static const char *getEnvironmentTypeName(EnvironmentType Kind);
+  static StringRef getEnvironmentTypeName(EnvironmentType Kind);
 
   /// @}
   /// @name Static helpers for converting alternate architecture names.
diff --git a/contrib/llvm/include/llvm/ADT/Twine.h b/contrib/llvm/include/llvm/ADT/Twine.h
index 81b1a6d946fc..f5f00dcfafe5 100644
--- a/contrib/llvm/include/llvm/ADT/Twine.h
+++ b/contrib/llvm/include/llvm/ADT/Twine.h
@@ -12,12 +12,14 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
+#include <cstdint>
 #include <string>
 
 namespace llvm {
+
+  class formatv_object_base;
   class raw_ostream;
 
   /// Twine - A lightweight data structure for efficiently representing the
@@ -101,6 +103,9 @@ namespace llvm {
       /// A pointer to a SmallString instance.
       SmallStringKind,
 
+      /// A pointer to a formatv_object_base instance.
+      FormatvObjectKind,
+
       /// A char value, to render as a character.
       CharKind,
 
@@ -136,6 +141,7 @@ namespace llvm {
       const std::string *stdString;
       const StringRef *stringRef;
       const SmallVectorImpl<char> *smallString;
+      const formatv_object_base *formatvObject;
       char character;
       unsigned int decUI;
       int decI;
@@ -146,7 +152,6 @@ namespace llvm {
       const uint64_t *uHex;
     };
 
-  private:
     /// LHS - The prefix in the concatenation, which may be uninitialized for
     /// Null or Empty kinds.
     Child LHS;
@@ -158,7 +163,6 @@ namespace llvm {
     /// RHSKind - The NodeKind of the right hand side, \see getRHSKind().
     NodeKind RHSKind;
 
-  private:
     /// Construct a nullary twine; the kind must be NullKind or EmptyKind.
     explicit Twine(NodeKind Kind)
       : LHSKind(Kind), RHSKind(EmptyKind) {
@@ -179,10 +183,6 @@ namespace llvm {
       assert(isValid() && "Invalid twine!");
     }
 
-    /// Since the intended use of twines is as temporary objects, assignments
-    /// when concatenating might cause undefined behavior or stack corruptions
-    Twine &operator=(const Twine &Other) = delete;
-
     /// Check for the null twine.
     bool isNull() const {
       return getLHSKind() == NullKind;
@@ -295,6 +295,13 @@ namespace llvm {
       assert(isValid() && "Invalid twine!");
     }
 
+    /// Construct from a formatv_object_base.
+    /*implicit*/ Twine(const formatv_object_base &Fmt)
+        : LHSKind(FormatvObjectKind), RHSKind(EmptyKind) {
+      LHS.formatvObject = &Fmt;
+      assert(isValid() && "Invalid twine!");
+    }
+
     /// Construct from a char.
     explicit Twine(char Val)
       : LHSKind(CharKind), RHSKind(EmptyKind) {
@@ -370,6 +377,10 @@ namespace llvm {
       assert(isValid() && "Invalid twine!");
     }
 
+    /// Since the intended use of twines is as temporary objects, assignments
+    /// when concatenating might cause undefined behavior or stack corruptions
+    Twine &operator=(const Twine &) = delete;
+
     /// Create a 'null' string, which is an empty string that always
     /// concatenates to form another empty string.
     static Twine createNull() {
@@ -535,6 +546,7 @@ namespace llvm {
   }
 
   /// @}
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_ADT_TWINE_H
diff --git a/contrib/llvm/include/llvm/ADT/ilist.h b/contrib/llvm/include/llvm/ADT/ilist.h
index 8e4d45dfef22..a788f811e4c6 100644
--- a/contrib/llvm/include/llvm/ADT/ilist.h
+++ b/contrib/llvm/include/llvm/ADT/ilist.h
@@ -11,459 +11,270 @@
 // (i.e. each node of the list must contain a next and previous field for the
 // list.
 //
-// The ilist_traits trait class is used to gain access to the next and previous
-// fields of the node type that the list is instantiated with.  If it is not
-// specialized, the list defaults to using the getPrev(), getNext() method calls
-// to get the next and previous pointers.
+// The ilist class itself should be a plug in replacement for list.  This list
+// replacement does not provide a constant time size() method, so be careful to
+// use empty() when you really want to know if it's empty.
 //
-// The ilist class itself, should be a plug in replacement for list, assuming
-// that the nodes contain next/prev pointers.  This list replacement does not
-// provide a constant time size() method, so be careful to use empty() when you
-// really want to know if it's empty.
-//
-// The ilist class is implemented by allocating a 'tail' node when the list is
-// created (using ilist_traits<>::createSentinel()).  This tail node is
-// absolutely required because the user must be able to compute end()-1. Because
-// of this, users of the direct next/prev links will see an extra link on the
-// end of the list, which should be ignored.
-//
-// Requirements for a user of this list:
-//
-//   1. The user must provide {g|s}et{Next|Prev} methods, or specialize
-//      ilist_traits to provide an alternate way of getting and setting next and
-//      prev links.
+// The ilist class is implemented as a circular list.  The list itself contains
+// a sentinel node, whose Next points at begin() and whose Prev points at
+// rbegin().  The sentinel node itself serves as end() and rend().
 //
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ADT_ILIST_H
 #define LLVM_ADT_ILIST_H
 
-#include "llvm/Support/Compiler.h"
-#include <algorithm>
+#include "llvm/ADT/simple_ilist.h"
 #include <cassert>
 #include <cstddef>
 #include <iterator>
 
 namespace llvm {
 
-template<typename NodeTy, typename Traits> class iplist;
-template<typename NodeTy> class ilist_iterator;
-
-/// ilist_nextprev_traits - A fragment for template traits for intrusive list
-/// that provides default next/prev implementations for common operations.
+/// Use delete by default for iplist and ilist.
 ///
-template<typename NodeTy>
-struct ilist_nextprev_traits {
-  static NodeTy *getPrev(NodeTy *N) { return N->getPrev(); }
-  static NodeTy *getNext(NodeTy *N) { return N->getNext(); }
-  static const NodeTy *getPrev(const NodeTy *N) { return N->getPrev(); }
-  static const NodeTy *getNext(const NodeTy *N) { return N->getNext(); }
-
-  static void setPrev(NodeTy *N, NodeTy *Prev) { N->setPrev(Prev); }
-  static void setNext(NodeTy *N, NodeTy *Next) { N->setNext(Next); }
+/// Specialize this to get different behaviour for ownership-related API.  (If
+/// you really want ownership semantics, consider using std::list or building
+/// something like \a BumpPtrList.)
+///
+/// \see ilist_noalloc_traits
+template <typename NodeTy> struct ilist_alloc_traits {
+  static void deleteNode(NodeTy *V) { delete V; }
 };
 
-template<typename NodeTy>
-struct ilist_traits;
-
-/// ilist_sentinel_traits - A fragment for template traits for intrusive list
-/// that provides default sentinel implementations for common operations.
+/// Custom traits to do nothing on deletion.
 ///
-/// ilist_sentinel_traits implements a lazy dynamic sentinel allocation
-/// strategy. The sentinel is stored in the prev field of ilist's Head.
+/// Specialize ilist_alloc_traits to inherit from this to disable the
+/// non-intrusive deletion in iplist (which implies ownership).
 ///
-template<typename NodeTy>
-struct ilist_sentinel_traits {
-  /// createSentinel - create the dynamic sentinel
-  static NodeTy *createSentinel() { return new NodeTy(); }
-
-  /// destroySentinel - deallocate the dynamic sentinel
-  static void destroySentinel(NodeTy *N) { delete N; }
-
-  /// provideInitialHead - when constructing an ilist, provide a starting
-  /// value for its Head
-  /// @return null node to indicate that it needs to be allocated later
-  static NodeTy *provideInitialHead() { return nullptr; }
-
-  /// ensureHead - make sure that Head is either already
-  /// initialized or assigned a fresh sentinel
-  /// @return the sentinel
-  static NodeTy *ensureHead(NodeTy *&Head) {
-    if (!Head) {
-      Head = ilist_traits<NodeTy>::createSentinel();
-      ilist_traits<NodeTy>::noteHead(Head, Head);
-      ilist_traits<NodeTy>::setNext(Head, nullptr);
-      return Head;
-    }
-    return ilist_traits<NodeTy>::getPrev(Head);
-  }
-
-  /// noteHead - stash the sentinel into its default location
-  static void noteHead(NodeTy *NewHead, NodeTy *Sentinel) {
-    ilist_traits<NodeTy>::setPrev(NewHead, Sentinel);
-  }
-};
-
-template <typename NodeTy> class ilist_half_node;
-template <typename NodeTy> class ilist_node;
-
-/// Traits with an embedded ilist_node as a sentinel.
+/// If you want purely intrusive semantics with no callbacks, consider using \a
+/// simple_ilist instead.
 ///
-/// FIXME: The downcast in createSentinel() is UB.
-template <typename NodeTy> struct ilist_embedded_sentinel_traits {
-  /// Get hold of the node that marks the end of the list.
-  NodeTy *createSentinel() const {
-    // Since i(p)lists always publicly derive from their corresponding traits,
-    // placing a data member in this class will augment the i(p)list.  But since
-    // the NodeTy is expected to be publicly derive from ilist_node<NodeTy>,
-    // there is a legal viable downcast from it to NodeTy. We use this trick to
-    // superimpose an i(p)list with a "ghostly" NodeTy, which becomes the
-    // sentinel. Dereferencing the sentinel is forbidden (save the
-    // ilist_node<NodeTy>), so no one will ever notice the superposition.
-    return static_cast<NodeTy *>(&Sentinel);
-  }
-  static void destroySentinel(NodeTy *) {}
-
-  NodeTy *provideInitialHead() const { return createSentinel(); }
-  NodeTy *ensureHead(NodeTy *) const { return createSentinel(); }
-  static void noteHead(NodeTy *, NodeTy *) {}
-
-private:
-  mutable ilist_node<NodeTy> Sentinel;
+/// \code
+/// template <>
+/// struct ilist_alloc_traits<MyType> : ilist_noalloc_traits<MyType> {};
+/// \endcode
+template <typename NodeTy> struct ilist_noalloc_traits {
+  static void deleteNode(NodeTy *V) {}
 };
 
-/// Trait with an embedded ilist_half_node as a sentinel.
+/// Callbacks do nothing by default in iplist and ilist.
 ///
-/// FIXME: The downcast in createSentinel() is UB.
-template <typename NodeTy> struct ilist_half_embedded_sentinel_traits {
-  /// Get hold of the node that marks the end of the list.
-  NodeTy *createSentinel() const {
-    // See comment in ilist_embedded_sentinel_traits::createSentinel().
-    return static_cast<NodeTy *>(&Sentinel);
-  }
-  static void destroySentinel(NodeTy *) {}
-
-  NodeTy *provideInitialHead() const { return createSentinel(); }
-  NodeTy *ensureHead(NodeTy *) const { return createSentinel(); }
-  static void noteHead(NodeTy *, NodeTy *) {}
+/// Specialize this for to use callbacks for when nodes change their list
+/// membership.
+template <typename NodeTy> struct ilist_callback_traits {
+  void addNodeToList(NodeTy *) {}
+  void removeNodeFromList(NodeTy *) {}
 
-private:
-  mutable ilist_half_node<NodeTy> Sentinel;
+  /// Callback before transferring nodes to this list.
+  ///
+  /// \pre \c this!=&OldList
+  template <class Iterator>
+  void transferNodesFromList(ilist_callback_traits &OldList, Iterator /*first*/,
+                             Iterator /*last*/) {
+    (void)OldList;
+  }
 };
 
-/// ilist_node_traits - A fragment for template traits for intrusive list
-/// that provides default node related operations.
+/// A fragment for template traits for intrusive list that provides default
+/// node related operations.
 ///
-template<typename NodeTy>
-struct ilist_node_traits {
-  static NodeTy *createNode(const NodeTy &V) { return new NodeTy(V); }
-  static void deleteNode(NodeTy *V) { delete V; }
-
-  void addNodeToList(NodeTy *) {}
-  void removeNodeFromList(NodeTy *) {}
-  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
-                             ilist_iterator<NodeTy> /*first*/,
-                             ilist_iterator<NodeTy> /*last*/) {}
-};
+/// TODO: Remove this layer of indirection.  It's not necessary.
+template <typename NodeTy>
+struct ilist_node_traits : ilist_alloc_traits<NodeTy>,
+                           ilist_callback_traits<NodeTy> {};
 
-/// ilist_default_traits - Default template traits for intrusive list.
-/// By inheriting from this, you can easily use default implementations
-/// for all common operations.
+/// Default template traits for intrusive list.
 ///
-template<typename NodeTy>
-struct ilist_default_traits : public ilist_nextprev_traits<NodeTy>,
-                              public ilist_sentinel_traits<NodeTy>,
-                              public ilist_node_traits<NodeTy> {
-};
+/// By inheriting from this, you can easily use default implementations for all
+/// common operations.
+///
+/// TODO: Remove this customization point.  Specializing ilist_traits is
+/// already fully general.
+template <typename NodeTy>
+struct ilist_default_traits : public ilist_node_traits<NodeTy> {};
 
-// Template traits for intrusive list.  By specializing this template class, you
-// can change what next/prev fields are used to store the links...
-template<typename NodeTy>
+/// Template traits for intrusive list.
+///
+/// Customize callbacks and allocation semantics.
+template <typename NodeTy>
 struct ilist_traits : public ilist_default_traits<NodeTy> {};
 
-// Const traits are the same as nonconst traits...
-template<typename Ty>
-struct ilist_traits<const Ty> : public ilist_traits<Ty> {};
+/// Const traits should never be instantiated.
+template <typename Ty> struct ilist_traits<const Ty> {};
 
-//===----------------------------------------------------------------------===//
-// Iterator for intrusive list.
-//
-template <typename NodeTy>
-class ilist_iterator
-    : public std::iterator<std::bidirectional_iterator_tag, NodeTy, ptrdiff_t> {
-public:
-  typedef ilist_traits<NodeTy> Traits;
-  typedef std::iterator<std::bidirectional_iterator_tag, NodeTy, ptrdiff_t>
-      super;
+namespace ilist_detail {
 
-  typedef typename super::value_type value_type;
-  typedef typename super::difference_type difference_type;
-  typedef typename super::pointer pointer;
-  typedef typename super::reference reference;
+template <class T> T &make();
 
-private:
-  pointer NodePtr;
+/// Type trait to check for a traits class that has a getNext member (as a
+/// canary for any of the ilist_nextprev_traits API).
+template <class TraitsT, class NodeT> struct HasGetNext {
+  typedef char Yes[1];
+  typedef char No[2];
+  template <size_t N> struct SFINAE {};
 
-public:
-  explicit ilist_iterator(pointer NP) : NodePtr(NP) {}
-  explicit ilist_iterator(reference NR) : NodePtr(&NR) {}
-  ilist_iterator() : NodePtr(nullptr) {}
-
-  // This is templated so that we can allow constructing a const iterator from
-  // a nonconst iterator...
-  template <class node_ty>
-  ilist_iterator(const ilist_iterator<node_ty> &RHS)
-      : NodePtr(RHS.getNodePtrUnchecked()) {}
-
-  // This is templated so that we can allow assigning to a const iterator from
-  // a nonconst iterator...
-  template <class node_ty>
-  const ilist_iterator &operator=(const ilist_iterator<node_ty> &RHS) {
-    NodePtr = RHS.getNodePtrUnchecked();
-    return *this;
-  }
-
-  void reset(pointer NP) { NodePtr = NP; }
+  template <class U>
+  static Yes &test(U *I, decltype(I->getNext(&make<NodeT>())) * = 0);
+  template <class> static No &test(...);
 
-  // Accessors...
-  explicit operator pointer() const { return NodePtr; }
-  reference operator*() const { return *NodePtr; }
-  pointer operator->() const { return &operator*(); }
+public:
+  static const bool value = sizeof(test<TraitsT>(nullptr)) == sizeof(Yes);
+};
 
-  // Comparison operators
-  template <class Y> bool operator==(const ilist_iterator<Y> &RHS) const {
-    return NodePtr == RHS.getNodePtrUnchecked();
-  }
-  template <class Y> bool operator!=(const ilist_iterator<Y> &RHS) const {
-    return NodePtr != RHS.getNodePtrUnchecked();
-  }
+/// Type trait to check for a traits class that has a createSentinel member (as
+/// a canary for any of the ilist_sentinel_traits API).
+template <class TraitsT> struct HasCreateSentinel {
+  typedef char Yes[1];
+  typedef char No[2];
 
-  // Increment and decrement operators...
-  ilist_iterator &operator--() {
-    NodePtr = Traits::getPrev(NodePtr);
-    assert(NodePtr && "--'d off the beginning of an ilist!");
-    return *this;
-  }
-  ilist_iterator &operator++() {
-    NodePtr = Traits::getNext(NodePtr);
-    return *this;
-  }
-  ilist_iterator operator--(int) {
-    ilist_iterator tmp = *this;
-    --*this;
-    return tmp;
-  }
-  ilist_iterator operator++(int) {
-    ilist_iterator tmp = *this;
-    ++*this;
-    return tmp;
-  }
+  template <class U>
+  static Yes &test(U *I, decltype(I->createSentinel()) * = 0);
+  template <class> static No &test(...);
 
-  // Internal interface, do not use...
-  pointer getNodePtrUnchecked() const { return NodePtr; }
+public:
+  static const bool value = sizeof(test<TraitsT>(nullptr)) == sizeof(Yes);
 };
 
-// Allow ilist_iterators to convert into pointers to a node automatically when
-// used by the dyn_cast, cast, isa mechanisms...
-
-template<typename From> struct simplify_type;
+/// Type trait to check for a traits class that has a createNode member.
+/// Allocation should be managed in a wrapper class, instead of in
+/// ilist_traits.
+template <class TraitsT, class NodeT> struct HasCreateNode {
+  typedef char Yes[1];
+  typedef char No[2];
+  template <size_t N> struct SFINAE {};
 
-template<typename NodeTy> struct simplify_type<ilist_iterator<NodeTy> > {
-  typedef NodeTy* SimpleType;
+  template <class U>
+  static Yes &test(U *I, decltype(I->createNode(make<NodeT>())) * = 0);
+  template <class> static No &test(...);
 
-  static SimpleType getSimplifiedValue(ilist_iterator<NodeTy> &Node) {
-    return &*Node;
-  }
+public:
+  static const bool value = sizeof(test<TraitsT>(nullptr)) == sizeof(Yes);
 };
-template<typename NodeTy> struct simplify_type<const ilist_iterator<NodeTy> > {
-  typedef /*const*/ NodeTy* SimpleType;
 
-  static SimpleType getSimplifiedValue(const ilist_iterator<NodeTy> &Node) {
-    return &*Node;
-  }
+template <class TraitsT, class NodeT> struct HasObsoleteCustomization {
+  static const bool value = HasGetNext<TraitsT, NodeT>::value ||
+                            HasCreateSentinel<TraitsT>::value ||
+                            HasCreateNode<TraitsT, NodeT>::value;
 };
 
+} // end namespace ilist_detail
 
 //===----------------------------------------------------------------------===//
 //
-/// iplist - The subset of list functionality that can safely be used on nodes
-/// of polymorphic types, i.e. a heterogeneous list with a common base class that
-/// holds the next/prev pointers.  The only state of the list itself is a single
-/// pointer to the head of the list.
+/// A wrapper around an intrusive list with callbacks and non-intrusive
+/// ownership.
 ///
-/// This list can be in one of three interesting states:
-/// 1. The list may be completely unconstructed.  In this case, the head
-///    pointer is null.  When in this form, any query for an iterator (e.g.
-///    begin() or end()) causes the list to transparently change to state #2.
-/// 2. The list may be empty, but contain a sentinel for the end iterator. This
-///    sentinel is created by the Traits::createSentinel method and is a link
-///    in the list.  When the list is empty, the pointer in the iplist points
-///    to the sentinel.  Once the sentinel is constructed, it
-///    is not destroyed until the list is.
-/// 3. The list may contain actual objects in it, which are stored as a doubly
-///    linked list of nodes.  One invariant of the list is that the predecessor
-///    of the first node in the list always points to the last node in the list,
-///    and the successor pointer for the sentinel (which always stays at the
-///    end of the list) is always null.
+/// This wraps a purely intrusive list (like simple_ilist) with a configurable
+/// traits class.  The traits can implement callbacks and customize the
+/// ownership semantics.
 ///
-template<typename NodeTy, typename Traits=ilist_traits<NodeTy> >
-class iplist : public Traits {
-  mutable NodeTy *Head;
-
-  // Use the prev node pointer of 'head' as the tail pointer.  This is really a
-  // circularly linked list where we snip the 'next' link from the sentinel node
-  // back to the first node in the list (to preserve assertions about going off
-  // the end of the list).
-  NodeTy *getTail() { return this->ensureHead(Head); }
-  const NodeTy *getTail() const { return this->ensureHead(Head); }
-  void setTail(NodeTy *N) const { this->noteHead(Head, N); }
-
-  /// CreateLazySentinel - This method verifies whether the sentinel for the
-  /// list has been created and lazily makes it if not.
-  void CreateLazySentinel() const {
-    this->ensureHead(Head);
-  }
+/// This is a subset of ilist functionality that can safely be used on nodes of
+/// polymorphic types, i.e. a heterogeneous list with a common base class that
+/// holds the next/prev pointers.  The only state of the list itself is an
+/// ilist_sentinel, which holds pointers to the first and last nodes in the
+/// list.
+template <class IntrusiveListT, class TraitsT>
+class iplist_impl : public TraitsT, IntrusiveListT {
+  typedef IntrusiveListT base_list_type;
+
+protected:
+  typedef iplist_impl iplist_impl_type;
+
+public:
+  typedef typename base_list_type::pointer pointer;
+  typedef typename base_list_type::const_pointer const_pointer;
+  typedef typename base_list_type::reference reference;
+  typedef typename base_list_type::const_reference const_reference;
+  typedef typename base_list_type::value_type value_type;
+  typedef typename base_list_type::size_type size_type;
+  typedef typename base_list_type::difference_type difference_type;
+  typedef typename base_list_type::iterator iterator;
+  typedef typename base_list_type::const_iterator const_iterator;
+  typedef typename base_list_type::reverse_iterator reverse_iterator;
+  typedef
+      typename base_list_type::const_reverse_iterator const_reverse_iterator;
 
-  static bool op_less(NodeTy &L, NodeTy &R) { return L < R; }
-  static bool op_equal(NodeTy &L, NodeTy &R) { return L == R; }
+private:
+  // TODO: Drop this assertion and the transitive type traits anytime after
+  // v4.0 is branched (i.e,. keep them for one release to help out-of-tree code
+  // update).
+  static_assert(
+      !ilist_detail::HasObsoleteCustomization<TraitsT, value_type>::value,
+      "ilist customization points have changed!");
 
-  // No fundamental reason why iplist can't be copyable, but the default
-  // copy/copy-assign won't do.
-  iplist(const iplist &) = delete;
-  void operator=(const iplist &) = delete;
+  static bool op_less(const_reference L, const_reference R) { return L < R; }
+  static bool op_equal(const_reference L, const_reference R) { return L == R; }
 
 public:
-  typedef NodeTy *pointer;
-  typedef const NodeTy *const_pointer;
-  typedef NodeTy &reference;
-  typedef const NodeTy &const_reference;
-  typedef NodeTy value_type;
-  typedef ilist_iterator<NodeTy> iterator;
-  typedef ilist_iterator<const NodeTy> const_iterator;
-  typedef size_t size_type;
-  typedef ptrdiff_t difference_type;
-  typedef std::reverse_iterator<const_iterator>  const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>  reverse_iterator;
-
-  iplist() : Head(this->provideInitialHead()) {}
-  ~iplist() {
-    if (!Head) return;
-    clear();
-    Traits::destroySentinel(getTail());
-  }
+  iplist_impl() = default;
 
-  // Iterator creation methods.
-  iterator begin() {
-    CreateLazySentinel();
-    return iterator(Head);
-  }
-  const_iterator begin() const {
-    CreateLazySentinel();
-    return const_iterator(Head);
-  }
-  iterator end() {
-    CreateLazySentinel();
-    return iterator(getTail());
-  }
-  const_iterator end() const {
-    CreateLazySentinel();
-    return const_iterator(getTail());
-  }
+  iplist_impl(const iplist_impl &) = delete;
+  iplist_impl &operator=(const iplist_impl &) = delete;
 
-  // reverse iterator creation methods.
-  reverse_iterator rbegin()            { return reverse_iterator(end()); }
-  const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
-  reverse_iterator rend()              { return reverse_iterator(begin()); }
-  const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
+  iplist_impl(iplist_impl &&X)
+      : TraitsT(std::move(X)), IntrusiveListT(std::move(X)) {}
+  iplist_impl &operator=(iplist_impl &&X) {
+    *static_cast<TraitsT *>(this) = std::move(X);
+    *static_cast<IntrusiveListT *>(this) = std::move(X);
+    return *this;
+  }
 
+  ~iplist_impl() { clear(); }
 
   // Miscellaneous inspection routines.
   size_type max_size() const { return size_type(-1); }
-  bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const {
-    return !Head || Head == getTail();
-  }
 
-  // Front and back accessor functions...
-  reference front() {
-    assert(!empty() && "Called front() on empty list!");
-    return *Head;
-  }
-  const_reference front() const {
-    assert(!empty() && "Called front() on empty list!");
-    return *Head;
-  }
-  reference back() {
-    assert(!empty() && "Called back() on empty list!");
-    return *this->getPrev(getTail());
-  }
-  const_reference back() const {
-    assert(!empty() && "Called back() on empty list!");
-    return *this->getPrev(getTail());
-  }
+  using base_list_type::begin;
+  using base_list_type::end;
+  using base_list_type::rbegin;
+  using base_list_type::rend;
+  using base_list_type::empty;
+  using base_list_type::front;
+  using base_list_type::back;
 
-  void swap(iplist &RHS) {
+  void swap(iplist_impl &RHS) {
     assert(0 && "Swap does not use list traits callback correctly yet!");
-    std::swap(Head, RHS.Head);
+    base_list_type::swap(RHS);
   }
 
-  iterator insert(iterator where, NodeTy *New) {
-    NodeTy *CurNode = where.getNodePtrUnchecked();
-    NodeTy *PrevNode = this->getPrev(CurNode);
-    this->setNext(New, CurNode);
-    this->setPrev(New, PrevNode);
-
-    if (CurNode != Head)  // Is PrevNode off the beginning of the list?
-      this->setNext(PrevNode, New);
-    else
-      Head = New;
-    this->setPrev(CurNode, New);
-
-    this->addNodeToList(New);  // Notify traits that we added a node...
-    return iterator(New);
+  iterator insert(iterator where, pointer New) {
+    this->addNodeToList(New); // Notify traits that we added a node...
+    return base_list_type::insert(where, *New);
   }
 
-  iterator insert(iterator where, const NodeTy &New) {
-    return this->insert(where, new NodeTy(New));
+  iterator insert(iterator where, const_reference New) {
+    return this->insert(where, new value_type(New));
   }
 
-  iterator insertAfter(iterator where, NodeTy *New) {
+  iterator insertAfter(iterator where, pointer New) {
     if (empty())
       return insert(begin(), New);
     else
       return insert(++where, New);
   }
 
-  NodeTy *remove(iterator &IT) {
-    assert(IT != end() && "Cannot remove end of list!");
-    NodeTy *Node = &*IT;
-    NodeTy *NextNode = this->getNext(Node);
-    NodeTy *PrevNode = this->getPrev(Node);
+  /// Clone another list.
+  template <class Cloner> void cloneFrom(const iplist_impl &L2, Cloner clone) {
+    clear();
+    for (const_reference V : L2)
+      push_back(clone(V));
+  }
 
-    if (Node != Head)  // Is PrevNode off the beginning of the list?
-      this->setNext(PrevNode, NextNode);
-    else
-      Head = NextNode;
-    this->setPrev(NextNode, PrevNode);
-    IT.reset(NextNode);
-    this->removeNodeFromList(Node);  // Notify traits that we removed a node...
-
-    // Set the next/prev pointers of the current node to null.  This isn't
-    // strictly required, but this catches errors where a node is removed from
-    // an ilist (and potentially deleted) with iterators still pointing at it.
-    // When those iterators are incremented or decremented, they will assert on
-    // the null next/prev pointer instead of "usually working".
-    this->setNext(Node, nullptr);
-    this->setPrev(Node, nullptr);
+  pointer remove(iterator &IT) {
+    pointer Node = &*IT++;
+    this->removeNodeFromList(Node); // Notify traits that we removed a node...
+    base_list_type::remove(*Node);
     return Node;
   }
 
-  NodeTy *remove(const iterator &IT) {
+  pointer remove(const iterator &IT) {
     iterator MutIt = IT;
     return remove(MutIt);
   }
 
-  NodeTy *remove(NodeTy *IT) { return remove(iterator(IT)); }
-  NodeTy *remove(NodeTy &IT) { return remove(iterator(IT)); }
+  pointer remove(pointer IT) { return remove(iterator(IT)); }
+  pointer remove(reference IT) { return remove(iterator(IT)); }
 
   // erase - remove a node from the controlled sequence... and delete it.
   iterator erase(iterator where) {
@@ -471,82 +282,36 @@ public:
     return where;
   }
 
-  iterator erase(NodeTy *IT) { return erase(iterator(IT)); }
-  iterator erase(NodeTy &IT) { return erase(iterator(IT)); }
+  iterator erase(pointer IT) { return erase(iterator(IT)); }
+  iterator erase(reference IT) { return erase(iterator(IT)); }
 
   /// Remove all nodes from the list like clear(), but do not call
   /// removeNodeFromList() or deleteNode().
   ///
   /// This should only be used immediately before freeing nodes in bulk to
   /// avoid traversing the list and bringing all the nodes into cache.
-  void clearAndLeakNodesUnsafely() {
-    if (Head) {
-      Head = getTail();
-      this->setPrev(Head, Head);
-    }
-  }
+  void clearAndLeakNodesUnsafely() { base_list_type::clear(); }
 
 private:
   // transfer - The heart of the splice function.  Move linked list nodes from
   // [first, last) into position.
   //
-  void transfer(iterator position, iplist &L2, iterator first, iterator last) {
-    assert(first != last && "Should be checked by callers");
-    // Position cannot be contained in the range to be transferred.
-    // Check for the most common mistake.
-    assert(position != first &&
-           "Insertion point can't be one of the transferred nodes");
-
-    if (position != last) {
-      // Note: we have to be careful about the case when we move the first node
-      // in the list.  This node is the list sentinel node and we can't move it.
-      NodeTy *ThisSentinel = getTail();
-      setTail(nullptr);
-      NodeTy *L2Sentinel = L2.getTail();
-      L2.setTail(nullptr);
-
-      // Remove [first, last) from its old position.
-      NodeTy *First = &*first, *Prev = this->getPrev(First);
-      NodeTy *Next = last.getNodePtrUnchecked(), *Last = this->getPrev(Next);
-      if (Prev)
-        this->setNext(Prev, Next);
-      else
-        L2.Head = Next;
-      this->setPrev(Next, Prev);
-
-      // Splice [first, last) into its new position.
-      NodeTy *PosNext = position.getNodePtrUnchecked();
-      NodeTy *PosPrev = this->getPrev(PosNext);
-
-      // Fix head of list...
-      if (PosPrev)
-        this->setNext(PosPrev, First);
-      else
-        Head = First;
-      this->setPrev(First, PosPrev);
-
-      // Fix end of list...
-      this->setNext(Last, PosNext);
-      this->setPrev(PosNext, Last);
-
-      this->transferNodesFromList(L2, iterator(First), iterator(PosNext));
-
-      // Now that everything is set, restore the pointers to the list sentinels.
-      L2.setTail(L2Sentinel);
-      setTail(ThisSentinel);
-    }
+  void transfer(iterator position, iplist_impl &L2, iterator first, iterator last) {
+    if (position == last)
+      return;
+
+    if (this != &L2) // Notify traits we moved the nodes...
+      this->transferNodesFromList(L2, first, last);
+
+    base_list_type::splice(position, L2, first, last);
   }
 
 public:
-
   //===----------------------------------------------------------------------===
   // Functionality derived from other functions defined above...
   //
 
-  size_type LLVM_ATTRIBUTE_UNUSED_RESULT size() const {
-    if (!Head) return 0; // Don't require construction of sentinel if empty.
-    return std::distance(begin(), end());
-  }
+  using base_list_type::size;
 
   iterator erase(iterator first, iterator last) {
     while (first != last)
@@ -554,11 +319,11 @@ public:
     return last;
   }
 
-  void clear() { if (Head) erase(begin(), end()); }
+  void clear() { erase(begin(), end()); }
 
   // Front and back inserters...
-  void push_front(NodeTy *val) { insert(begin(), val); }
-  void push_back(NodeTy *val) { insert(end(), val); }
+  void push_front(pointer val) { insert(begin(), val); }
+  void push_back(pointer val) { insert(end(), val); }
   void pop_front() {
     assert(!empty() && "pop_front() on empty list!");
     erase(begin());
@@ -574,179 +339,96 @@ public:
   }
 
   // Splice members - defined in terms of transfer...
-  void splice(iterator where, iplist &L2) {
+  void splice(iterator where, iplist_impl &L2) {
     if (!L2.empty())
       transfer(where, L2, L2.begin(), L2.end());
   }
-  void splice(iterator where, iplist &L2, iterator first) {
+  void splice(iterator where, iplist_impl &L2, iterator first) {
     iterator last = first; ++last;
     if (where == first || where == last) return; // No change
     transfer(where, L2, first, last);
   }
-  void splice(iterator where, iplist &L2, iterator first, iterator last) {
+  void splice(iterator where, iplist_impl &L2, iterator first, iterator last) {
     if (first != last) transfer(where, L2, first, last);
   }
-  void splice(iterator where, iplist &L2, NodeTy &N) {
+  void splice(iterator where, iplist_impl &L2, reference N) {
     splice(where, L2, iterator(N));
   }
-  void splice(iterator where, iplist &L2, NodeTy *N) {
+  void splice(iterator where, iplist_impl &L2, pointer N) {
     splice(where, L2, iterator(N));
   }
 
   template <class Compare>
-  void merge(iplist &Right, Compare comp) {
+  void merge(iplist_impl &Right, Compare comp) {
     if (this == &Right)
       return;
-    iterator First1 = begin(), Last1 = end();
-    iterator First2 = Right.begin(), Last2 = Right.end();
-    while (First1 != Last1 && First2 != Last2) {
-      if (comp(*First2, *First1)) {
-        iterator Next = First2;
-        transfer(First1, Right, First2, ++Next);
-        First2 = Next;
-      } else {
-        ++First1;
-      }
-    }
-    if (First2 != Last2)
-      transfer(Last1, Right, First2, Last2);
+    this->transferNodesFromList(Right, Right.begin(), Right.end());
+    base_list_type::merge(Right, comp);
   }
-  void merge(iplist &Right) { return merge(Right, op_less); }
+  void merge(iplist_impl &Right) { return merge(Right, op_less); }
 
-  template <class Compare>
-  void sort(Compare comp) {
-    // The list is empty, vacuously sorted.
-    if (empty())
-      return;
-    // The list has a single element, vacuously sorted.
-    if (std::next(begin()) == end())
-      return;
-    // Find the split point for the list.
-    iterator Center = begin(), End = begin();
-    while (End != end() && std::next(End) != end()) {
-      Center = std::next(Center);
-      End = std::next(std::next(End));
-    }
-    // Split the list into two.
-    iplist RightHalf;
-    RightHalf.splice(RightHalf.begin(), *this, Center, end());
-
-    // Sort the two sublists.
-    sort(comp);
-    RightHalf.sort(comp);
-
-    // Merge the two sublists back together.
-    merge(RightHalf, comp);
-  }
-  void sort() { sort(op_less); }
+  using base_list_type::sort;
 
   /// \brief Get the previous node, or \c nullptr for the list head.
-  NodeTy *getPrevNode(NodeTy &N) const {
+  pointer getPrevNode(reference N) const {
     auto I = N.getIterator();
     if (I == begin())
       return nullptr;
     return &*std::prev(I);
   }
   /// \brief Get the previous node, or \c nullptr for the list head.
-  const NodeTy *getPrevNode(const NodeTy &N) const {
-    return getPrevNode(const_cast<NodeTy &>(N));
+  const_pointer getPrevNode(const_reference N) const {
+    return getPrevNode(const_cast<reference >(N));
   }
 
   /// \brief Get the next node, or \c nullptr for the list tail.
-  NodeTy *getNextNode(NodeTy &N) const {
+  pointer getNextNode(reference N) const {
     auto Next = std::next(N.getIterator());
     if (Next == end())
       return nullptr;
     return &*Next;
   }
   /// \brief Get the next node, or \c nullptr for the list tail.
-  const NodeTy *getNextNode(const NodeTy &N) const {
-    return getNextNode(const_cast<NodeTy &>(N));
+  const_pointer getNextNode(const_reference N) const {
+    return getNextNode(const_cast<reference >(N));
   }
 };
 
+/// An intrusive list with ownership and callbacks specified/controlled by
+/// ilist_traits, only with API safe for polymorphic types.
+///
+/// The \p Options parameters are the same as those for \a simple_ilist.  See
+/// there for a description of what's available.
+template <class T, class... Options>
+class iplist
+    : public iplist_impl<simple_ilist<T, Options...>, ilist_traits<T>> {
+  typedef typename iplist::iplist_impl_type iplist_impl_type;
 
-template<typename NodeTy>
-struct ilist : public iplist<NodeTy> {
-  typedef typename iplist<NodeTy>::size_type size_type;
-  typedef typename iplist<NodeTy>::iterator iterator;
-
-  ilist() {}
-  ilist(const ilist &right) : iplist<NodeTy>() {
-    insert(this->begin(), right.begin(), right.end());
-  }
-  explicit ilist(size_type count) {
-    insert(this->begin(), count, NodeTy());
-  }
-  ilist(size_type count, const NodeTy &val) {
-    insert(this->begin(), count, val);
-  }
-  template<class InIt> ilist(InIt first, InIt last) {
-    insert(this->begin(), first, last);
-  }
-
-  // bring hidden functions into scope
-  using iplist<NodeTy>::insert;
-  using iplist<NodeTy>::push_front;
-  using iplist<NodeTy>::push_back;
-
-  // Main implementation here - Insert for a node passed by value...
-  iterator insert(iterator where, const NodeTy &val) {
-    return insert(where, this->createNode(val));
-  }
-
-
-  // Front and back inserters...
-  void push_front(const NodeTy &val) { insert(this->begin(), val); }
-  void push_back(const NodeTy &val) { insert(this->end(), val); }
-
-  void insert(iterator where, size_type count, const NodeTy &val) {
-    for (; count != 0; --count) insert(where, val);
-  }
-
-  // Assign special forms...
-  void assign(size_type count, const NodeTy &val) {
-    iterator I = this->begin();
-    for (; I != this->end() && count != 0; ++I, --count)
-      *I = val;
-    if (count != 0)
-      insert(this->end(), val, val);
-    else
-      erase(I, this->end());
-  }
-  template<class InIt> void assign(InIt first1, InIt last1) {
-    iterator first2 = this->begin(), last2 = this->end();
-    for ( ; first1 != last1 && first2 != last2; ++first1, ++first2)
-      *first1 = *first2;
-    if (first2 == last2)
-      erase(first1, last1);
-    else
-      insert(last1, first2, last2);
-  }
-
+public:
+  iplist() = default;
 
-  // Resize members...
-  void resize(size_type newsize, NodeTy val) {
-    iterator i = this->begin();
-    size_type len = 0;
-    for ( ; i != this->end() && len < newsize; ++i, ++len) /* empty*/ ;
+  iplist(const iplist &X) = delete;
+  iplist &operator=(const iplist &X) = delete;
 
-    if (len == newsize)
-      erase(i, this->end());
-    else                                          // i == end()
-      insert(this->end(), newsize - len, val);
+  iplist(iplist &&X) : iplist_impl_type(std::move(X)) {}
+  iplist &operator=(iplist &&X) {
+    *static_cast<iplist_impl_type *>(this) = std::move(X);
+    return *this;
   }
-  void resize(size_type newsize) { resize(newsize, NodeTy()); }
 };
 
-} // End llvm namespace
+template <class T, class... Options> using ilist = iplist<T, Options...>;
+
+} // end namespace llvm
 
 namespace std {
+
   // Ensure that swap uses the fast list swap...
   template<class Ty>
   void swap(llvm::iplist<Ty> &Left, llvm::iplist<Ty> &Right) {
     Left.swap(Right);
   }
-}  // End 'std' extensions...
+
+} // end namespace std
 
 #endif // LLVM_ADT_ILIST_H
diff --git a/contrib/llvm/include/llvm/ADT/ilist_base.h b/contrib/llvm/include/llvm/ADT/ilist_base.h
new file mode 100644
index 000000000000..1ffc864bea2f
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/ilist_base.h
@@ -0,0 +1,95 @@
+//===- llvm/ADT/ilist_base.h - Intrusive List Base ---------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ILIST_BASE_H
+#define LLVM_ADT_ILIST_BASE_H
+
+#include "llvm/ADT/ilist_node_base.h"
+#include <cassert>
+#include <cstddef>
+#include <type_traits>
+
+namespace llvm {
+
+/// Implementations of list algorithms using ilist_node_base.
+template <bool EnableSentinelTracking> class ilist_base {
+public:
+  typedef ilist_node_base<EnableSentinelTracking> node_base_type;
+
+  static void insertBeforeImpl(node_base_type &Next, node_base_type &N) {
+    node_base_type &Prev = *Next.getPrev();
+    N.setNext(&Next);
+    N.setPrev(&Prev);
+    Prev.setNext(&N);
+    Next.setPrev(&N);
+  }
+
+  static void removeImpl(node_base_type &N) {
+    node_base_type *Prev = N.getPrev();
+    node_base_type *Next = N.getNext();
+    Next->setPrev(Prev);
+    Prev->setNext(Next);
+
+    // Not strictly necessary, but helps catch a class of bugs.
+    N.setPrev(nullptr);
+    N.setNext(nullptr);
+  }
+
+  static void removeRangeImpl(node_base_type &First, node_base_type &Last) {
+    node_base_type *Prev = First.getPrev();
+    node_base_type *Final = Last.getPrev();
+    Last.setPrev(Prev);
+    Prev->setNext(&Last);
+
+    // Not strictly necessary, but helps catch a class of bugs.
+    First.setPrev(nullptr);
+    Final->setNext(nullptr);
+  }
+
+  static void transferBeforeImpl(node_base_type &Next, node_base_type &First,
+                                 node_base_type &Last) {
+    if (&Next == &Last || &First == &Last)
+      return;
+
+    // Position cannot be contained in the range to be transferred.
+    assert(&Next != &First &&
+           // Check for the most common mistake.
+           "Insertion point can't be one of the transferred nodes");
+
+    node_base_type &Final = *Last.getPrev();
+
+    // Detach from old list/position.
+    First.getPrev()->setNext(&Last);
+    Last.setPrev(First.getPrev());
+
+    // Splice [First, Final] into its new list/position.
+    node_base_type &Prev = *Next.getPrev();
+    Final.setNext(&Next);
+    First.setPrev(&Prev);
+    Prev.setNext(&First);
+    Next.setPrev(&Final);
+  }
+
+  template <class T> static void insertBefore(T &Next, T &N) {
+    insertBeforeImpl(Next, N);
+  }
+
+  template <class T> static void remove(T &N) { removeImpl(N); }
+  template <class T> static void removeRange(T &First, T &Last) {
+    removeRangeImpl(First, Last);
+  }
+
+  template <class T> static void transferBefore(T &Next, T &First, T &Last) {
+    transferBeforeImpl(Next, First, Last);
+  }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_ILIST_BASE_H
diff --git a/contrib/llvm/include/llvm/ADT/ilist_iterator.h b/contrib/llvm/include/llvm/ADT/ilist_iterator.h
new file mode 100644
index 000000000000..ef532d2cf172
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/ilist_iterator.h
@@ -0,0 +1,185 @@
+//===- llvm/ADT/ilist_iterator.h - Intrusive List Iterator -------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ILIST_ITERATOR_H
+#define LLVM_ADT_ILIST_ITERATOR_H
+
+#include "llvm/ADT/ilist_node.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
+#include <type_traits>
+
+namespace llvm {
+
+namespace ilist_detail {
+
+/// Find const-correct node types.
+template <class OptionsT, bool IsConst> struct IteratorTraits;
+template <class OptionsT> struct IteratorTraits<OptionsT, false> {
+  typedef typename OptionsT::value_type value_type;
+  typedef typename OptionsT::pointer pointer;
+  typedef typename OptionsT::reference reference;
+  typedef ilist_node_impl<OptionsT> *node_pointer;
+  typedef ilist_node_impl<OptionsT> &node_reference;
+};
+template <class OptionsT> struct IteratorTraits<OptionsT, true> {
+  typedef const typename OptionsT::value_type value_type;
+  typedef typename OptionsT::const_pointer pointer;
+  typedef typename OptionsT::const_reference reference;
+  typedef const ilist_node_impl<OptionsT> *node_pointer;
+  typedef const ilist_node_impl<OptionsT> &node_reference;
+};
+
+template <bool IsReverse> struct IteratorHelper;
+template <> struct IteratorHelper<false> : ilist_detail::NodeAccess {
+  typedef ilist_detail::NodeAccess Access;
+  template <class T> static void increment(T *&I) { I = Access::getNext(*I); }
+  template <class T> static void decrement(T *&I) { I = Access::getPrev(*I); }
+};
+template <> struct IteratorHelper<true> : ilist_detail::NodeAccess {
+  typedef ilist_detail::NodeAccess Access;
+  template <class T> static void increment(T *&I) { I = Access::getPrev(*I); }
+  template <class T> static void decrement(T *&I) { I = Access::getNext(*I); }
+};
+
+} // end namespace ilist_detail
+
+/// Iterator for intrusive lists  based on ilist_node.
+template <class OptionsT, bool IsReverse, bool IsConst>
+class ilist_iterator : ilist_detail::SpecificNodeAccess<OptionsT> {
+  friend ilist_iterator<OptionsT, IsReverse, !IsConst>;
+  friend ilist_iterator<OptionsT, !IsReverse, IsConst>;
+  friend ilist_iterator<OptionsT, !IsReverse, !IsConst>;
+
+  typedef ilist_detail::IteratorTraits<OptionsT, IsConst> Traits;
+  typedef ilist_detail::SpecificNodeAccess<OptionsT> Access;
+
+public:
+  typedef typename Traits::value_type value_type;
+  typedef typename Traits::pointer pointer;
+  typedef typename Traits::reference reference;
+  typedef ptrdiff_t difference_type;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  typedef typename OptionsT::const_pointer const_pointer;
+  typedef typename OptionsT::const_reference const_reference;
+
+private:
+  typedef typename Traits::node_pointer node_pointer;
+  typedef typename Traits::node_reference node_reference;
+
+  node_pointer NodePtr;
+
+public:
+  /// Create from an ilist_node.
+  explicit ilist_iterator(node_reference N) : NodePtr(&N) {}
+
+  explicit ilist_iterator(pointer NP) : NodePtr(Access::getNodePtr(NP)) {}
+  explicit ilist_iterator(reference NR) : NodePtr(Access::getNodePtr(&NR)) {}
+  ilist_iterator() : NodePtr(nullptr) {}
+
+  // This is templated so that we can allow constructing a const iterator from
+  // a nonconst iterator...
+  template <bool RHSIsConst>
+  ilist_iterator(
+      const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS,
+      typename std::enable_if<IsConst || !RHSIsConst, void *>::type = nullptr)
+      : NodePtr(RHS.NodePtr) {}
+
+  // This is templated so that we can allow assigning to a const iterator from
+  // a nonconst iterator...
+  template <bool RHSIsConst>
+  typename std::enable_if<IsConst || !RHSIsConst, ilist_iterator &>::type
+  operator=(const ilist_iterator<OptionsT, IsReverse, RHSIsConst> &RHS) {
+    NodePtr = RHS.NodePtr;
+    return *this;
+  }
+
+  /// Convert from an iterator to its reverse.
+  ///
+  /// TODO: Roll this into the implicit constructor once we're sure that no one
+  /// is relying on the std::reverse_iterator off-by-one semantics.
+  ilist_iterator<OptionsT, !IsReverse, IsConst> getReverse() const {
+    if (NodePtr)
+      return ilist_iterator<OptionsT, !IsReverse, IsConst>(*NodePtr);
+    return ilist_iterator<OptionsT, !IsReverse, IsConst>();
+  }
+
+  /// Const-cast.
+  ilist_iterator<OptionsT, IsReverse, false> getNonConst() const {
+    if (NodePtr)
+      return ilist_iterator<OptionsT, IsReverse, false>(
+          const_cast<typename ilist_iterator<OptionsT, IsReverse,
+                                             false>::node_reference>(*NodePtr));
+    return ilist_iterator<OptionsT, IsReverse, false>();
+  }
+
+  // Accessors...
+  reference operator*() const {
+    assert(!NodePtr->isKnownSentinel());
+    return *Access::getValuePtr(NodePtr);
+  }
+  pointer operator->() const { return &operator*(); }
+
+  // Comparison operators
+  friend bool operator==(const ilist_iterator &LHS, const ilist_iterator &RHS) {
+    return LHS.NodePtr == RHS.NodePtr;
+  }
+  friend bool operator!=(const ilist_iterator &LHS, const ilist_iterator &RHS) {
+    return LHS.NodePtr != RHS.NodePtr;
+  }
+
+  // Increment and decrement operators...
+  ilist_iterator &operator--() {
+    NodePtr = IsReverse ? NodePtr->getNext() : NodePtr->getPrev();
+    return *this;
+  }
+  ilist_iterator &operator++() {
+    NodePtr = IsReverse ? NodePtr->getPrev() : NodePtr->getNext();
+    return *this;
+  }
+  ilist_iterator operator--(int) {
+    ilist_iterator tmp = *this;
+    --*this;
+    return tmp;
+  }
+  ilist_iterator operator++(int) {
+    ilist_iterator tmp = *this;
+    ++*this;
+    return tmp;
+  }
+
+  /// Get the underlying ilist_node.
+  node_pointer getNodePtr() const { return static_cast<node_pointer>(NodePtr); }
+
+  /// Check for end.  Only valid if ilist_sentinel_tracking<true>.
+  bool isEnd() const { return NodePtr ? NodePtr->isSentinel() : false; }
+};
+
+template <typename From> struct simplify_type;
+
+/// Allow ilist_iterators to convert into pointers to a node automatically when
+/// used by the dyn_cast, cast, isa mechanisms...
+///
+/// FIXME: remove this, since there is no implicit conversion to NodeTy.
+template <class OptionsT, bool IsConst>
+struct simplify_type<ilist_iterator<OptionsT, false, IsConst>> {
+  typedef ilist_iterator<OptionsT, false, IsConst> iterator;
+  typedef typename iterator::pointer SimpleType;
+
+  static SimpleType getSimplifiedValue(const iterator &Node) { return &*Node; }
+};
+template <class OptionsT, bool IsConst>
+struct simplify_type<const ilist_iterator<OptionsT, false, IsConst>>
+    : simplify_type<ilist_iterator<OptionsT, false, IsConst>> {};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_ILIST_ITERATOR_H
diff --git a/contrib/llvm/include/llvm/ADT/ilist_node.h b/contrib/llvm/include/llvm/ADT/ilist_node.h
index 7e5a0e0e5ad8..7244d0f40586 100644
--- a/contrib/llvm/include/llvm/ADT/ilist_node.h
+++ b/contrib/llvm/include/llvm/ADT/ilist_node.h
@@ -15,65 +15,232 @@
 #ifndef LLVM_ADT_ILIST_NODE_H
 #define LLVM_ADT_ILIST_NODE_H
 
+#include "llvm/ADT/ilist_node_base.h"
+#include "llvm/ADT/ilist_node_options.h"
+
 namespace llvm {
 
+namespace ilist_detail {
+struct NodeAccess;
+} // end namespace ilist_detail
+
 template<typename NodeTy>
 struct ilist_traits;
-template <typename NodeTy> struct ilist_embedded_sentinel_traits;
-template <typename NodeTy> struct ilist_half_embedded_sentinel_traits;
 
-/// ilist_half_node - Base class that provides prev services for sentinels.
+template <class OptionsT, bool IsReverse, bool IsConst> class ilist_iterator;
+template <class OptionsT> class ilist_sentinel;
+
+/// Implementation for an ilist node.
 ///
-template<typename NodeTy>
-class ilist_half_node {
-  friend struct ilist_traits<NodeTy>;
-  friend struct ilist_half_embedded_sentinel_traits<NodeTy>;
-  NodeTy *Prev;
+/// Templated on an appropriate \a ilist_detail::node_options, usually computed
+/// by \a ilist_detail::compute_node_options.
+///
+/// This is a wrapper around \a ilist_node_base whose main purpose is to
+/// provide type safety: you can't insert nodes of \a ilist_node_impl into the
+/// wrong \a simple_ilist or \a iplist.
+template <class OptionsT> class ilist_node_impl : OptionsT::node_base_type {
+  typedef typename OptionsT::value_type value_type;
+  typedef typename OptionsT::node_base_type node_base_type;
+  typedef typename OptionsT::list_base_type list_base_type;
+
+  friend typename OptionsT::list_base_type;
+  friend struct ilist_detail::NodeAccess;
+  friend class ilist_sentinel<OptionsT>;
+  friend class ilist_iterator<OptionsT, false, false>;
+  friend class ilist_iterator<OptionsT, false, true>;
+  friend class ilist_iterator<OptionsT, true, false>;
+  friend class ilist_iterator<OptionsT, true, true>;
+
 protected:
-  NodeTy *getPrev() { return Prev; }
-  const NodeTy *getPrev() const { return Prev; }
-  void setPrev(NodeTy *P) { Prev = P; }
-  ilist_half_node() : Prev(nullptr) {}
-};
+  ilist_node_impl() = default;
 
-template<typename NodeTy>
-struct ilist_nextprev_traits;
+  typedef ilist_iterator<OptionsT, false, false> self_iterator;
+  typedef ilist_iterator<OptionsT, false, true> const_self_iterator;
+  typedef ilist_iterator<OptionsT, true, false> reverse_self_iterator;
+  typedef ilist_iterator<OptionsT, true, true> const_reverse_self_iterator;
 
-template <typename NodeTy> class ilist_iterator;
+private:
+  ilist_node_impl *getPrev() {
+    return static_cast<ilist_node_impl *>(node_base_type::getPrev());
+  }
+  ilist_node_impl *getNext() {
+    return static_cast<ilist_node_impl *>(node_base_type::getNext());
+  }
+
+  const ilist_node_impl *getPrev() const {
+    return static_cast<ilist_node_impl *>(node_base_type::getPrev());
+  }
+  const ilist_node_impl *getNext() const {
+    return static_cast<ilist_node_impl *>(node_base_type::getNext());
+  }
 
-/// ilist_node - Base class that provides next/prev services for nodes
-/// that use ilist_nextprev_traits or ilist_default_traits.
+  void setPrev(ilist_node_impl *N) { node_base_type::setPrev(N); }
+  void setNext(ilist_node_impl *N) { node_base_type::setNext(N); }
+
+public:
+  self_iterator getIterator() { return self_iterator(*this); }
+  const_self_iterator getIterator() const { return const_self_iterator(*this); }
+  reverse_self_iterator getReverseIterator() {
+    return reverse_self_iterator(*this);
+  }
+  const_reverse_self_iterator getReverseIterator() const {
+    return const_reverse_self_iterator(*this);
+  }
+
+  // Under-approximation, but always available for assertions.
+  using node_base_type::isKnownSentinel;
+
+  /// Check whether this is the sentinel node.
+  ///
+  /// This requires sentinel tracking to be explicitly enabled.  Use the
+  /// ilist_sentinel_tracking<true> option to get this API.
+  bool isSentinel() const {
+    static_assert(OptionsT::is_sentinel_tracking_explicit,
+                  "Use ilist_sentinel_tracking<true> to enable isSentinel()");
+    return node_base_type::isSentinel();
+  }
+};
+
+/// An intrusive list node.
 ///
-template<typename NodeTy>
-class ilist_node : private ilist_half_node<NodeTy> {
-  friend struct ilist_nextprev_traits<NodeTy>;
-  friend struct ilist_traits<NodeTy>;
-  friend struct ilist_half_embedded_sentinel_traits<NodeTy>;
-  friend struct ilist_embedded_sentinel_traits<NodeTy>;
-  NodeTy *Next;
-  NodeTy *getNext() { return Next; }
-  const NodeTy *getNext() const { return Next; }
-  void setNext(NodeTy *N) { Next = N; }
+/// A base class to enable membership in intrusive lists, including \a
+/// simple_ilist, \a iplist, and \a ilist.  The first template parameter is the
+/// \a value_type for the list.
+///
+/// An ilist node can be configured with compile-time options to change
+/// behaviour and/or add API.
+///
+/// By default, an \a ilist_node knows whether it is the list sentinel (an
+/// instance of \a ilist_sentinel) if and only if
+/// LLVM_ENABLE_ABI_BREAKING_CHECKS.  The function \a isKnownSentinel() always
+/// returns \c false tracking is off.  Sentinel tracking steals a bit from the
+/// "prev" link, which adds a mask operation when decrementing an iterator, but
+/// enables bug-finding assertions in \a ilist_iterator.
+///
+/// To turn sentinel tracking on all the time, pass in the
+/// ilist_sentinel_tracking<true> template parameter.  This also enables the \a
+/// isSentinel() function.  The same option must be passed to the intrusive
+/// list.  (ilist_sentinel_tracking<false> turns sentinel tracking off all the
+/// time.)
+///
+/// A type can inherit from ilist_node multiple times by passing in different
+/// \a ilist_tag options.  This allows a single instance to be inserted into
+/// multiple lists simultaneously, where each list is given the same tag.
+///
+/// \example
+/// struct A {};
+/// struct B {};
+/// struct N : ilist_node<N, ilist_tag<A>>, ilist_node<N, ilist_tag<B>> {};
+///
+/// void foo() {
+///   simple_ilist<N, ilist_tag<A>> ListA;
+///   simple_ilist<N, ilist_tag<B>> ListB;
+///   N N1;
+///   ListA.push_back(N1);
+///   ListB.push_back(N1);
+/// }
+/// \endexample
+///
+/// See \a is_valid_option for steps on adding a new option.
+template <class T, class... Options>
+class ilist_node
+    : public ilist_node_impl<
+          typename ilist_detail::compute_node_options<T, Options...>::type> {
+  static_assert(ilist_detail::check_options<Options...>::value,
+                "Unrecognized node option!");
+};
+
+namespace ilist_detail {
+/// An access class for ilist_node private API.
+///
+/// This gives access to the private parts of ilist nodes.  Nodes for an ilist
+/// should friend this class if they inherit privately from ilist_node.
+///
+/// Using this class outside of the ilist implementation is unsupported.
+struct NodeAccess {
+protected:
+  template <class OptionsT>
+  static ilist_node_impl<OptionsT> *getNodePtr(typename OptionsT::pointer N) {
+    return N;
+  }
+  template <class OptionsT>
+  static const ilist_node_impl<OptionsT> *
+  getNodePtr(typename OptionsT::const_pointer N) {
+    return N;
+  }
+  template <class OptionsT>
+  static typename OptionsT::pointer getValuePtr(ilist_node_impl<OptionsT> *N) {
+    return static_cast<typename OptionsT::pointer>(N);
+  }
+  template <class OptionsT>
+  static typename OptionsT::const_pointer
+  getValuePtr(const ilist_node_impl<OptionsT> *N) {
+    return static_cast<typename OptionsT::const_pointer>(N);
+  }
+
+  template <class OptionsT>
+  static ilist_node_impl<OptionsT> *getPrev(ilist_node_impl<OptionsT> &N) {
+    return N.getPrev();
+  }
+  template <class OptionsT>
+  static ilist_node_impl<OptionsT> *getNext(ilist_node_impl<OptionsT> &N) {
+    return N.getNext();
+  }
+  template <class OptionsT>
+  static const ilist_node_impl<OptionsT> *
+  getPrev(const ilist_node_impl<OptionsT> &N) {
+    return N.getPrev();
+  }
+  template <class OptionsT>
+  static const ilist_node_impl<OptionsT> *
+  getNext(const ilist_node_impl<OptionsT> &N) {
+    return N.getNext();
+  }
+};
+
+template <class OptionsT> struct SpecificNodeAccess : NodeAccess {
 protected:
-  ilist_node() : Next(nullptr) {}
+  typedef typename OptionsT::pointer pointer;
+  typedef typename OptionsT::const_pointer const_pointer;
+  typedef ilist_node_impl<OptionsT> node_type;
+
+  static node_type *getNodePtr(pointer N) {
+    return NodeAccess::getNodePtr<OptionsT>(N);
+  }
+  static const node_type *getNodePtr(const_pointer N) {
+    return NodeAccess::getNodePtr<OptionsT>(N);
+  }
+  static pointer getValuePtr(node_type *N) {
+    return NodeAccess::getValuePtr<OptionsT>(N);
+  }
+  static const_pointer getValuePtr(const node_type *N) {
+    return NodeAccess::getValuePtr<OptionsT>(N);
+  }
+};
+} // end namespace ilist_detail
 
+template <class OptionsT>
+class ilist_sentinel : public ilist_node_impl<OptionsT> {
 public:
-  ilist_iterator<NodeTy> getIterator() {
-    // FIXME: Stop downcasting to create the iterator (potential UB).
-    return ilist_iterator<NodeTy>(static_cast<NodeTy *>(this));
+  ilist_sentinel() {
+    this->initializeSentinel();
+    reset();
   }
-  ilist_iterator<const NodeTy> getIterator() const {
-    // FIXME: Stop downcasting to create the iterator (potential UB).
-    return ilist_iterator<const NodeTy>(static_cast<const NodeTy *>(this));
+
+  void reset() {
+    this->setPrev(this);
+    this->setNext(this);
   }
+
+  bool empty() const { return this == this->getPrev(); }
 };
 
 /// An ilist node that can access its parent list.
 ///
 /// Requires \c NodeTy to have \a getParent() to find the parent node, and the
 /// \c ParentTy to have \a getSublistAccess() to get a reference to the list.
-template <typename NodeTy, typename ParentTy>
-class ilist_node_with_parent : public ilist_node<NodeTy> {
+template <typename NodeTy, typename ParentTy, class... Options>
+class ilist_node_with_parent : public ilist_node<NodeTy, Options...> {
 protected:
   ilist_node_with_parent() = default;
 
diff --git a/contrib/llvm/include/llvm/ADT/ilist_node_base.h b/contrib/llvm/include/llvm/ADT/ilist_node_base.h
new file mode 100644
index 000000000000..e5062ac4eaad
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/ilist_node_base.h
@@ -0,0 +1,53 @@
+//===- llvm/ADT/ilist_node_base.h - Intrusive List Node Base -----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ILIST_NODE_BASE_H
+#define LLVM_ADT_ILIST_NODE_BASE_H
+
+#include "llvm/ADT/PointerIntPair.h"
+
+namespace llvm {
+
+/// Base class for ilist nodes.
+///
+/// Optionally tracks whether this node is the sentinel.
+template <bool EnableSentinelTracking> class ilist_node_base;
+
+template <> class ilist_node_base<false> {
+  ilist_node_base *Prev = nullptr;
+  ilist_node_base *Next = nullptr;
+
+public:
+  void setPrev(ilist_node_base *Prev) { this->Prev = Prev; }
+  void setNext(ilist_node_base *Next) { this->Next = Next; }
+  ilist_node_base *getPrev() const { return Prev; }
+  ilist_node_base *getNext() const { return Next; }
+
+  bool isKnownSentinel() const { return false; }
+  void initializeSentinel() {}
+};
+
+template <> class ilist_node_base<true> {
+  PointerIntPair<ilist_node_base *, 1> PrevAndSentinel;
+  ilist_node_base *Next = nullptr;
+
+public:
+  void setPrev(ilist_node_base *Prev) { PrevAndSentinel.setPointer(Prev); }
+  void setNext(ilist_node_base *Next) { this->Next = Next; }
+  ilist_node_base *getPrev() const { return PrevAndSentinel.getPointer(); }
+  ilist_node_base *getNext() const { return Next; }
+
+  bool isSentinel() const { return PrevAndSentinel.getInt(); }
+  bool isKnownSentinel() const { return isSentinel(); }
+  void initializeSentinel() { PrevAndSentinel.setInt(true); }
+};
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_ILIST_NODE_BASE_H
diff --git a/contrib/llvm/include/llvm/ADT/ilist_node_options.h b/contrib/llvm/include/llvm/ADT/ilist_node_options.h
new file mode 100644
index 000000000000..c33df1eeb819
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/ilist_node_options.h
@@ -0,0 +1,133 @@
+//===- llvm/ADT/ilist_node_options.h - ilist_node Options -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ILIST_NODE_OPTIONS_H
+#define LLVM_ADT_ILIST_NODE_OPTIONS_H
+
+#include "llvm/Config/abi-breaking.h"
+#include "llvm/Config/llvm-config.h"
+
+#include <type_traits>
+
+namespace llvm {
+
+template <bool EnableSentinelTracking> class ilist_node_base;
+template <bool EnableSentinelTracking> class ilist_base;
+
+/// Option to choose whether to track sentinels.
+///
+/// This option affects the ABI for the nodes.  When not specified explicitly,
+/// the ABI depends on LLVM_ENABLE_ABI_BREAKING_CHECKS.  Specify explicitly to
+/// enable \a ilist_node::isSentinel().
+template <bool EnableSentinelTracking> struct ilist_sentinel_tracking {};
+
+/// Option to specify a tag for the node type.
+///
+/// This option allows a single value type to be inserted in multiple lists
+/// simultaneously.  See \a ilist_node for usage examples.
+template <class Tag> struct ilist_tag {};
+
+namespace ilist_detail {
+
+/// Helper trait for recording whether an option is specified explicitly.
+template <bool IsExplicit> struct explicitness {
+  static const bool is_explicit = IsExplicit;
+};
+typedef explicitness<true> is_explicit;
+typedef explicitness<false> is_implicit;
+
+/// Check whether an option is valid.
+///
+/// The steps for adding and enabling a new ilist option include:
+/// \li define the option, ilist_foo<Bar>, above;
+/// \li add new parameters for Bar to \a ilist_detail::node_options;
+/// \li add an extraction meta-function, ilist_detail::extract_foo;
+/// \li call extract_foo from \a ilist_detail::compute_node_options and pass it
+/// into \a ilist_detail::node_options; and
+/// \li specialize \c is_valid_option<ilist_foo<Bar>> to inherit from \c
+/// std::true_type to get static assertions passing in \a simple_ilist and \a
+/// ilist_node.
+template <class Option> struct is_valid_option : std::false_type {};
+
+/// Extract sentinel tracking option.
+///
+/// Look through \p Options for the \a ilist_sentinel_tracking option, with the
+/// default depending on LLVM_ENABLE_ABI_BREAKING_CHECKS.
+template <class... Options> struct extract_sentinel_tracking;
+template <bool EnableSentinelTracking, class... Options>
+struct extract_sentinel_tracking<
+    ilist_sentinel_tracking<EnableSentinelTracking>, Options...>
+    : std::integral_constant<bool, EnableSentinelTracking>, is_explicit {};
+template <class Option1, class... Options>
+struct extract_sentinel_tracking<Option1, Options...>
+    : extract_sentinel_tracking<Options...> {};
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+template <> struct extract_sentinel_tracking<> : std::true_type, is_implicit {};
+#else
+template <>
+struct extract_sentinel_tracking<> : std::false_type, is_implicit {};
+#endif
+template <bool EnableSentinelTracking>
+struct is_valid_option<ilist_sentinel_tracking<EnableSentinelTracking>>
+    : std::true_type {};
+
+/// Extract custom tag option.
+///
+/// Look through \p Options for the \a ilist_tag option, pulling out the
+/// custom tag type, using void as a default.
+template <class... Options> struct extract_tag;
+template <class Tag, class... Options>
+struct extract_tag<ilist_tag<Tag>, Options...> {
+  typedef Tag type;
+};
+template <class Option1, class... Options>
+struct extract_tag<Option1, Options...> : extract_tag<Options...> {};
+template <> struct extract_tag<> { typedef void type; };
+template <class Tag> struct is_valid_option<ilist_tag<Tag>> : std::true_type {};
+
+/// Check whether options are valid.
+///
+/// The conjunction of \a is_valid_option on each individual option.
+template <class... Options> struct check_options;
+template <> struct check_options<> : std::true_type {};
+template <class Option1, class... Options>
+struct check_options<Option1, Options...>
+    : std::integral_constant<bool, is_valid_option<Option1>::value &&
+                                       check_options<Options...>::value> {};
+
+/// Traits for options for \a ilist_node.
+///
+/// This is usually computed via \a compute_node_options.
+template <class T, bool EnableSentinelTracking, bool IsSentinelTrackingExplicit,
+          class TagT>
+struct node_options {
+  typedef T value_type;
+  typedef T *pointer;
+  typedef T &reference;
+  typedef const T *const_pointer;
+  typedef const T &const_reference;
+
+  static const bool enable_sentinel_tracking = EnableSentinelTracking;
+  static const bool is_sentinel_tracking_explicit = IsSentinelTrackingExplicit;
+  typedef TagT tag;
+  typedef ilist_node_base<enable_sentinel_tracking> node_base_type;
+  typedef ilist_base<enable_sentinel_tracking> list_base_type;
+};
+
+template <class T, class... Options> struct compute_node_options {
+  typedef node_options<T, extract_sentinel_tracking<Options...>::value,
+                       extract_sentinel_tracking<Options...>::is_explicit,
+                       typename extract_tag<Options...>::type>
+      type;
+};
+
+} // end namespace ilist_detail
+} // end namespace llvm
+
+#endif // LLVM_ADT_ILIST_NODE_OPTIONS_H
diff --git a/contrib/llvm/include/llvm/ADT/iterator.h b/contrib/llvm/include/llvm/ADT/iterator.h
index 0bd28d5c6cd0..9ccacc10db0d 100644
--- a/contrib/llvm/include/llvm/ADT/iterator.h
+++ b/contrib/llvm/include/llvm/ADT/iterator.h
@@ -12,6 +12,7 @@
 
 #include <cstddef>
 #include <iterator>
+#include <type_traits>
 
 namespace llvm {
 
@@ -256,6 +257,23 @@ struct pointee_iterator
   T &operator*() const { return **this->I; }
 };
 
-}
+template <typename WrappedIteratorT,
+          typename T = decltype(&*std::declval<WrappedIteratorT>())>
+class pointer_iterator
+    : public iterator_adaptor_base<pointer_iterator<WrappedIteratorT>,
+                                   WrappedIteratorT, T> {
+  mutable T Ptr;
+
+public:
+  pointer_iterator() = default;
+
+  explicit pointer_iterator(WrappedIteratorT u)
+      : pointer_iterator::iterator_adaptor_base(std::move(u)) {}
+
+  T &operator*() { return Ptr = &*this->I; }
+  const T &operator*() const { return Ptr = &*this->I; }
+};
+
+} // end namespace llvm
 
-#endif
+#endif // LLVM_ADT_ITERATOR_H
diff --git a/contrib/llvm/include/llvm/ADT/simple_ilist.h b/contrib/llvm/include/llvm/ADT/simple_ilist.h
new file mode 100644
index 000000000000..a1ab59170840
--- /dev/null
+++ b/contrib/llvm/include/llvm/ADT/simple_ilist.h
@@ -0,0 +1,310 @@
+//===- llvm/ADT/simple_ilist.h - Simple Intrusive List ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_SIMPLE_ILIST_H
+#define LLVM_ADT_SIMPLE_ILIST_H
+
+#include "llvm/ADT/ilist_base.h"
+#include "llvm/ADT/ilist_iterator.h"
+#include "llvm/ADT/ilist_node.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+
+namespace llvm {
+
+/// A simple intrusive list implementation.
+///
+/// This is a simple intrusive list for a \c T that inherits from \c
+/// ilist_node<T>.  The list never takes ownership of anything inserted in it.
+///
+/// Unlike \a iplist<T> and \a ilist<T>, \a simple_ilist<T> never allocates or
+/// deletes values, and has no callback traits.
+///
+/// The API for adding nodes include \a push_front(), \a push_back(), and \a
+/// insert().  These all take values by reference (not by pointer), except for
+/// the range version of \a insert().
+///
+/// There are three sets of API for discarding nodes from the list: \a
+/// remove(), which takes a reference to the node to remove, \a erase(), which
+/// takes an iterator or iterator range and returns the next one, and \a
+/// clear(), which empties out the container.  All three are constant time
+/// operations.  None of these deletes any nodes; in particular, if there is a
+/// single node in the list, then these have identical semantics:
+/// \li \c L.remove(L.front());
+/// \li \c L.erase(L.begin());
+/// \li \c L.clear();
+///
+/// As a convenience for callers, there are parallel APIs that take a \c
+/// Disposer (such as \c std::default_delete<T>): \a removeAndDispose(), \a
+/// eraseAndDispose(), and \a clearAndDispose().  These have different names
+/// because the extra semantic is otherwise non-obvious.  They are equivalent
+/// to calling \a std::for_each() on the range to be discarded.
+///
+/// The currently available \p Options customize the nodes in the list.  The
+/// same options must be specified in the \a ilist_node instantation for
+/// compatibility (although the order is irrelevant).
+/// \li Use \a ilist_tag to designate which ilist_node for a given \p T this
+/// list should use.  This is useful if a type \p T is part of multiple,
+/// independent lists simultaneously.
+/// \li Use \a ilist_sentinel_tracking to always (or never) track whether a
+/// node is a sentinel.  Specifying \c true enables the \a
+/// ilist_node::isSentinel() API.  Unlike \a ilist_node::isKnownSentinel(),
+/// which is only appropriate for assertions, \a ilist_node::isSentinel() is
+/// appropriate for real logic.
+///
+/// Here are examples of \p Options usage:
+/// \li \c simple_ilist<T> gives the defaults.  \li \c
+/// simple_ilist<T,ilist_sentinel_tracking<true>> enables the \a
+/// ilist_node::isSentinel() API.
+/// \li \c simple_ilist<T,ilist_tag<A>,ilist_sentinel_tracking<false>>
+/// specifies a tag of A and that tracking should be off (even when
+/// LLVM_ENABLE_ABI_BREAKING_CHECKS are enabled).
+/// \li \c simple_ilist<T,ilist_sentinel_tracking<false>,ilist_tag<A>> is
+/// equivalent to the last.
+///
+/// See \a is_valid_option for steps on adding a new option.
+template <typename T, class... Options>
+class simple_ilist
+    : ilist_detail::compute_node_options<T, Options...>::type::list_base_type,
+      ilist_detail::SpecificNodeAccess<
+          typename ilist_detail::compute_node_options<T, Options...>::type> {
+  static_assert(ilist_detail::check_options<Options...>::value,
+                "Unrecognized node option!");
+  typedef
+      typename ilist_detail::compute_node_options<T, Options...>::type OptionsT;
+  typedef typename OptionsT::list_base_type list_base_type;
+  ilist_sentinel<OptionsT> Sentinel;
+
+public:
+  typedef typename OptionsT::value_type value_type;
+  typedef typename OptionsT::pointer pointer;
+  typedef typename OptionsT::reference reference;
+  typedef typename OptionsT::const_pointer const_pointer;
+  typedef typename OptionsT::const_reference const_reference;
+  typedef ilist_iterator<OptionsT, false, false> iterator;
+  typedef ilist_iterator<OptionsT, false, true> const_iterator;
+  typedef ilist_iterator<OptionsT, true, false> reverse_iterator;
+  typedef ilist_iterator<OptionsT, true, true> const_reverse_iterator;
+  typedef size_t size_type;
+  typedef ptrdiff_t difference_type;
+
+  simple_ilist() = default;
+  ~simple_ilist() = default;
+
+  // No copy constructors.
+  simple_ilist(const simple_ilist &) = delete;
+  simple_ilist &operator=(const simple_ilist &) = delete;
+
+  // Move constructors.
+  simple_ilist(simple_ilist &&X) { splice(end(), X); }
+  simple_ilist &operator=(simple_ilist &&X) {
+    clear();
+    splice(end(), X);
+    return *this;
+  }
+
+  iterator begin() { return ++iterator(Sentinel); }
+  const_iterator begin() const { return ++const_iterator(Sentinel); }
+  iterator end() { return iterator(Sentinel); }
+  const_iterator end() const { return const_iterator(Sentinel); }
+  reverse_iterator rbegin() { return ++reverse_iterator(Sentinel); }
+  const_reverse_iterator rbegin() const {
+    return ++const_reverse_iterator(Sentinel);
+  }
+  reverse_iterator rend() { return reverse_iterator(Sentinel); }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(Sentinel);
+  }
+
+  /// Check if the list is empty in constant time.
+  LLVM_NODISCARD bool empty() const { return Sentinel.empty(); }
+
+  /// Calculate the size of the list in linear time.
+  LLVM_NODISCARD size_type size() const {
+    return std::distance(begin(), end());
+  }
+
+  reference front() { return *begin(); }
+  const_reference front() const { return *begin(); }
+  reference back() { return *rbegin(); }
+  const_reference back() const { return *rbegin(); }
+
+  /// Insert a node at the front; never copies.
+  void push_front(reference Node) { insert(begin(), Node); }
+
+  /// Insert a node at the back; never copies.
+  void push_back(reference Node) { insert(end(), Node); }
+
+  /// Remove the node at the front; never deletes.
+  void pop_front() { erase(begin()); }
+
+  /// Remove the node at the back; never deletes.
+  void pop_back() { erase(--end()); }
+
+  /// Swap with another list in place using std::swap.
+  void swap(simple_ilist &X) { std::swap(*this, X); }
+
+  /// Insert a node by reference; never copies.
+  iterator insert(iterator I, reference Node) {
+    list_base_type::insertBefore(*I.getNodePtr(), *this->getNodePtr(&Node));
+    return iterator(&Node);
+  }
+
+  /// Insert a range of nodes; never copies.
+  template <class Iterator>
+  void insert(iterator I, Iterator First, Iterator Last) {
+    for (; First != Last; ++First)
+      insert(I, *First);
+  }
+
+  /// Clone another list.
+  template <class Cloner, class Disposer>
+  void cloneFrom(const simple_ilist &L2, Cloner clone, Disposer dispose) {
+    clearAndDispose(dispose);
+    for (const_reference V : L2)
+      push_back(*clone(V));
+  }
+
+  /// Remove a node by reference; never deletes.
+  ///
+  /// \see \a erase() for removing by iterator.
+  /// \see \a removeAndDispose() if the node should be deleted.
+  void remove(reference N) { list_base_type::remove(*this->getNodePtr(&N)); }
+
+  /// Remove a node by reference and dispose of it.
+  template <class Disposer>
+  void removeAndDispose(reference N, Disposer dispose) {
+    remove(N);
+    dispose(&N);
+  }
+
+  /// Remove a node by iterator; never deletes.
+  ///
+  /// \see \a remove() for removing by reference.
+  /// \see \a eraseAndDispose() it the node should be deleted.
+  iterator erase(iterator I) {
+    assert(I != end() && "Cannot remove end of list!");
+    remove(*I++);
+    return I;
+  }
+
+  /// Remove a range of nodes; never deletes.
+  ///
+  /// \see \a eraseAndDispose() if the nodes should be deleted.
+  iterator erase(iterator First, iterator Last) {
+    list_base_type::removeRange(*First.getNodePtr(), *Last.getNodePtr());
+    return Last;
+  }
+
+  /// Remove a node by iterator and dispose of it.
+  template <class Disposer>
+  iterator eraseAndDispose(iterator I, Disposer dispose) {
+    auto Next = std::next(I);
+    erase(I);
+    dispose(&*I);
+    return Next;
+  }
+
+  /// Remove a range of nodes and dispose of them.
+  template <class Disposer>
+  iterator eraseAndDispose(iterator First, iterator Last, Disposer dispose) {
+    while (First != Last)
+      First = eraseAndDispose(First, dispose);
+    return Last;
+  }
+
+  /// Clear the list; never deletes.
+  ///
+  /// \see \a clearAndDispose() if the nodes should be deleted.
+  void clear() { Sentinel.reset(); }
+
+  /// Clear the list and dispose of the nodes.
+  template <class Disposer> void clearAndDispose(Disposer dispose) {
+    eraseAndDispose(begin(), end(), dispose);
+  }
+
+  /// Splice in another list.
+  void splice(iterator I, simple_ilist &L2) {
+    splice(I, L2, L2.begin(), L2.end());
+  }
+
+  /// Splice in a node from another list.
+  void splice(iterator I, simple_ilist &L2, iterator Node) {
+    splice(I, L2, Node, std::next(Node));
+  }
+
+  /// Splice in a range of nodes from another list.
+  void splice(iterator I, simple_ilist &, iterator First, iterator Last) {
+    list_base_type::transferBefore(*I.getNodePtr(), *First.getNodePtr(),
+                                   *Last.getNodePtr());
+  }
+
+  /// Merge in another list.
+  ///
+  /// \pre \c this and \p RHS are sorted.
+  ///@{
+  void merge(simple_ilist &RHS) { merge(RHS, std::less<T>()); }
+  template <class Compare> void merge(simple_ilist &RHS, Compare comp);
+  ///@}
+
+  /// Sort the list.
+  ///@{
+  void sort() { sort(std::less<T>()); }
+  template <class Compare> void sort(Compare comp);
+  ///@}
+};
+
+template <class T, class... Options>
+template <class Compare>
+void simple_ilist<T, Options...>::merge(simple_ilist &RHS, Compare comp) {
+  if (this == &RHS || RHS.empty())
+    return;
+  iterator LI = begin(), LE = end();
+  iterator RI = RHS.begin(), RE = RHS.end();
+  while (LI != LE) {
+    if (comp(*RI, *LI)) {
+      // Transfer a run of at least size 1 from RHS to LHS.
+      iterator RunStart = RI++;
+      RI = std::find_if(RI, RE, [&](reference RV) { return !comp(RV, *LI); });
+      splice(LI, RHS, RunStart, RI);
+      if (RI == RE)
+        return;
+    }
+    ++LI;
+  }
+  // Transfer the remaining RHS nodes once LHS is finished.
+  splice(LE, RHS, RI, RE);
+}
+
+template <class T, class... Options>
+template <class Compare>
+void simple_ilist<T, Options...>::sort(Compare comp) {
+  // Vacuously sorted.
+  if (empty() || std::next(begin()) == end())
+    return;
+
+  // Split the list in the middle.
+  iterator Center = begin(), End = begin();
+  while (End != end() && ++End != end()) {
+    ++Center;
+    ++End;
+  }
+  simple_ilist RHS;
+  RHS.splice(RHS.end(), *this, Center, end());
+
+  // Sort the sublists and merge back together.
+  sort(comp);
+  RHS.sort(comp);
+  merge(RHS, comp);
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_SIMPLE_ILIST_H
diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
index d6308b7073a0..d8e50438e722 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysis.h
@@ -112,8 +112,10 @@ enum FunctionModRefLocation {
   FMRL_Nowhere = 0,
   /// Access to memory via argument pointers.
   FMRL_ArgumentPointees = 4,
+  /// Memory that is inaccessible via LLVM IR.
+  FMRL_InaccessibleMem = 8,
   /// Access to any memory.
-  FMRL_Anywhere = 8 | FMRL_ArgumentPointees
+  FMRL_Anywhere = 16 | FMRL_InaccessibleMem | FMRL_ArgumentPointees
 };
 
 /// Summary of how a function affects memory in the program.
@@ -143,6 +145,22 @@ enum FunctionModRefBehavior {
   /// This property corresponds to the IntrArgMemOnly LLVM intrinsic flag.
   FMRB_OnlyAccessesArgumentPointees = FMRL_ArgumentPointees | MRI_ModRef,
 
+  /// The only memory references in this function (if it has any) are
+  /// references of memory that is otherwise inaccessible via LLVM IR.
+  ///
+  /// This property corresponds to the LLVM IR inaccessiblememonly attribute.
+  FMRB_OnlyAccessesInaccessibleMem = FMRL_InaccessibleMem | MRI_ModRef,
+
+  /// The function may perform non-volatile loads and stores of objects
+  /// pointed to by its pointer-typed arguments, with arbitrary offsets, and
+  /// it may also perform loads and stores of memory that is otherwise
+  /// inaccessible via LLVM IR.
+  ///
+  /// This property corresponds to the LLVM IR
+  /// inaccessiblemem_or_argmemonly attribute.
+  FMRB_OnlyAccessesInaccessibleOrArgMem = FMRL_InaccessibleMem |
+                                          FMRL_ArgumentPointees | MRI_ModRef,
+
   /// This function does not perform any non-local stores or volatile loads,
   /// but may read from any memory location.
   ///
@@ -179,6 +197,20 @@ public:
     AAs.emplace_back(new Model<AAResultT>(AAResult, *this));
   }
 
+  /// Register a function analysis ID that the results aggregation depends on.
+  ///
+  /// This is used in the new pass manager to implement the invalidation logic
+  /// where we must invalidate the results aggregation if any of our component
+  /// analyses become invalid.
+  void addAADependencyID(AnalysisKey *ID) { AADeps.push_back(ID); }
+
+  /// Handle invalidation events in the new pass manager.
+  ///
+  /// The aggregation is invalidated if any of the underlying analyses is
+  /// invalidated.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);
+
   //===--------------------------------------------------------------------===//
   /// \name Alias Queries
   /// @{
@@ -339,6 +371,26 @@ public:
     return (MRB & MRI_ModRef) && (MRB & FMRL_ArgumentPointees);
   }
 
+  /// Checks if functions with the specified behavior are known to read and
+  /// write at most from memory that is inaccessible from LLVM IR.
+  static bool onlyAccessesInaccessibleMem(FunctionModRefBehavior MRB) {
+    return !(MRB & FMRL_Anywhere & ~FMRL_InaccessibleMem);
+  }
+
+  /// Checks if functions with the specified behavior are known to potentially
+  /// read or write from memory that is inaccessible from LLVM IR.
+  static bool doesAccessInaccessibleMem(FunctionModRefBehavior MRB) {
+    return (MRB & MRI_ModRef) && (MRB & FMRL_InaccessibleMem);
+  }
+
+  /// Checks if functions with the specified behavior are known to read and
+  /// write at most from memory that is inaccessible from LLVM IR or objects
+  /// pointed to by their pointer-typed arguments (with arbitrary offsets).
+  static bool onlyAccessesInaccessibleOrArgMem(FunctionModRefBehavior MRB) {
+    return !(MRB & FMRL_Anywhere &
+             ~(FMRL_InaccessibleMem | FMRL_ArgumentPointees));
+  }
+
   /// getModRefInfo (for call sites) - Return information about whether
   /// a particular call site modifies or reads the specified memory location.
   ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
@@ -571,6 +623,8 @@ private:
   const TargetLibraryInfo &TLI;
 
   std::vector<std::unique_ptr<Concept>> AAs;
+
+  std::vector<AnalysisKey *> AADeps;
 };
 
 /// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
@@ -854,20 +908,6 @@ class AAManager : public AnalysisInfoMixin<AAManager> {
 public:
   typedef AAResults Result;
 
-  // This type hase value semantics. We have to spell these out because MSVC
-  // won't synthesize them.
-  AAManager() {}
-  AAManager(AAManager &&Arg) : ResultGetters(std::move(Arg.ResultGetters)) {}
-  AAManager(const AAManager &Arg) : ResultGetters(Arg.ResultGetters) {}
-  AAManager &operator=(AAManager &&RHS) {
-    ResultGetters = std::move(RHS.ResultGetters);
-    return *this;
-  }
-  AAManager &operator=(const AAManager &RHS) {
-    ResultGetters = RHS.ResultGetters;
-    return *this;
-  }
-
   /// Register a specific AA result.
   template <typename AnalysisT> void registerFunctionAnalysis() {
     ResultGetters.push_back(&getFunctionAAResultImpl<AnalysisT>);
@@ -878,7 +918,7 @@ public:
     ResultGetters.push_back(&getModuleAAResultImpl<AnalysisT>);
   }
 
-  Result run(Function &F, AnalysisManager<Function> &AM) {
+  Result run(Function &F, FunctionAnalysisManager &AM) {
     Result R(AM.getResult<TargetLibraryAnalysis>(F));
     for (auto &Getter : ResultGetters)
       (*Getter)(F, AM, R);
@@ -887,26 +927,30 @@ public:
 
 private:
   friend AnalysisInfoMixin<AAManager>;
-  static char PassID;
+  static AnalysisKey Key;
 
-  SmallVector<void (*)(Function &F, AnalysisManager<Function> &AM,
+  SmallVector<void (*)(Function &F, FunctionAnalysisManager &AM,
                        AAResults &AAResults),
               4> ResultGetters;
 
   template <typename AnalysisT>
   static void getFunctionAAResultImpl(Function &F,
-                                      AnalysisManager<Function> &AM,
+                                      FunctionAnalysisManager &AM,
                                       AAResults &AAResults) {
     AAResults.addAAResult(AM.template getResult<AnalysisT>(F));
+    AAResults.addAADependencyID(AnalysisT::ID());
   }
 
   template <typename AnalysisT>
-  static void getModuleAAResultImpl(Function &F, AnalysisManager<Function> &AM,
+  static void getModuleAAResultImpl(Function &F, FunctionAnalysisManager &AM,
                                     AAResults &AAResults) {
-    auto &MAM =
-        AM.getResult<ModuleAnalysisManagerFunctionProxy>(F).getManager();
-    if (auto *R = MAM.template getCachedResult<AnalysisT>(*F.getParent()))
+    auto &MAMProxy = AM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
+    auto &MAM = MAMProxy.getManager();
+    if (auto *R = MAM.template getCachedResult<AnalysisT>(*F.getParent())) {
       AAResults.addAAResult(*R);
+      MAMProxy
+          .template registerOuterAnalysisInvalidation<AnalysisT, AAManager>();
+    }
   }
 };
 
diff --git a/contrib/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h b/contrib/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
index 505ed0d9723a..214574852655 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasAnalysisEvaluator.h
@@ -53,7 +53,7 @@ public:
   ~AAEvaluator();
 
   /// \brief Run the pass over the function.
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
 private:
   // Allow the legacy pass to run this using an internal API.
diff --git a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
index cec56889c0ae..5d11b22c6eed 100644
--- a/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
+++ b/contrib/llvm/include/llvm/Analysis/AliasSetTracker.h
@@ -115,15 +115,21 @@ class AliasSet : public ilist_node<AliasSet> {
     }
   };
 
-  PointerRec *PtrList, **PtrListEnd;  // Doubly linked list of nodes.
-  AliasSet *Forward;             // Forwarding pointer.
+  // Doubly linked list of nodes.
+  PointerRec *PtrList, **PtrListEnd;
+  // Forwarding pointer.
+  AliasSet *Forward;
 
   /// All instructions without a specific address in this alias set.
   std::vector<AssertingVH<Instruction> > UnknownInsts;
 
   /// Number of nodes pointing to this AliasSet plus the number of AliasSets
   /// forwarding to it.
-  unsigned RefCount : 28;
+  unsigned RefCount : 27;
+
+  // Signifies that this set should be considered to alias any pointer.
+  // Use when the tracker holding this set is saturated.
+  unsigned AliasAny : 1;
 
   /// The kinds of access this alias set models.
   ///
@@ -153,7 +159,10 @@ class AliasSet : public ilist_node<AliasSet> {
   /// True if this alias set contains volatile loads or stores.
   unsigned Volatile : 1;
 
+  unsigned SetSize;
+
   void addRef() { ++RefCount; }
+
   void dropRef(AliasSetTracker &AST) {
     assert(RefCount >= 1 && "Invalid reference count detected!");
     if (--RefCount == 0)
@@ -189,6 +198,10 @@ public:
   iterator end()   const { return iterator(); }
   bool empty() const { return PtrList == nullptr; }
 
+  // Unfortunately, ilist::size() is linear, so we have to add code to keep
+  // track of the list's exact size.
+  unsigned size() { return SetSize; }
+
   void print(raw_ostream &OS) const;
   void dump() const;
 
@@ -226,13 +239,11 @@ public:
   };
 
 private:
-  // Can only be created by AliasSetTracker. Also, ilist creates one
-  // to serve as a sentinel.
-  friend struct ilist_sentinel_traits<AliasSet>;
+  // Can only be created by AliasSetTracker.
   AliasSet()
-    : PtrList(nullptr), PtrListEnd(&PtrList), Forward(nullptr), RefCount(0),
-      Access(NoAccess), Alias(SetMustAlias), Volatile(false) {
-  }
+      : PtrList(nullptr), PtrListEnd(&PtrList), Forward(nullptr), RefCount(0),
+        AliasAny(false), Access(NoAccess), Alias(SetMustAlias),
+        Volatile(false), SetSize(0) {}
 
   AliasSet(const AliasSet &AS) = delete;
   void operator=(const AliasSet &AS) = delete;
@@ -317,7 +328,8 @@ class AliasSetTracker {
 public:
   /// Create an empty collection of AliasSets, and use the specified alias
   /// analysis object to disambiguate load and store addresses.
-  explicit AliasSetTracker(AliasAnalysis &aa) : AA(aa) {}
+  explicit AliasSetTracker(AliasAnalysis &aa)
+      : AA(aa), TotalMayAliasSetSize(0), AliasAnyAS(nullptr) {}
   ~AliasSetTracker() { clear(); }
 
   /// These methods are used to add different types of instructions to the alias
@@ -332,27 +344,16 @@ public:
   /// These methods return true if inserting the instruction resulted in the
   /// addition of a new alias set (i.e., the pointer did not alias anything).
   ///
-  bool add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo); // Add a loc.
-  bool add(LoadInst *LI);
-  bool add(StoreInst *SI);
-  bool add(VAArgInst *VAAI);
-  bool add(MemSetInst *MSI);
-  bool add(Instruction *I);       // Dispatch to one of the other add methods...
+  void add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo); // Add a loc.
+  void add(LoadInst *LI);
+  void add(StoreInst *SI);
+  void add(VAArgInst *VAAI);
+  void add(MemSetInst *MSI);
+  void add(MemTransferInst *MTI);
+  void add(Instruction *I);       // Dispatch to one of the other add methods...
   void add(BasicBlock &BB);       // Add all instructions in basic block
   void add(const AliasSetTracker &AST); // Add alias relations from another AST
-  bool addUnknown(Instruction *I);
-
-  /// These methods are used to remove all entries that might be aliased by the
-  /// specified instruction. These methods return true if any alias sets were
-  /// eliminated.
-  bool remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo);
-  bool remove(LoadInst *LI);
-  bool remove(StoreInst *SI);
-  bool remove(VAArgInst *VAAI);
-  bool remove(MemSetInst *MSI);
-  bool remove(Instruction *I);
-  void remove(AliasSet &AS);
-  bool removeUnknown(Instruction *I);
+  void addUnknown(Instruction *I);
 
   void clear();
 
@@ -364,8 +365,7 @@ public:
   /// set is created to contain the pointer (because the pointer didn't alias
   /// anything).
   AliasSet &getAliasSetForPointer(Value *P, uint64_t Size,
-                                  const AAMDNodes &AAInfo,
-                                  bool *New = nullptr);
+                                  const AAMDNodes &AAInfo);
 
   /// Return the alias set containing the location specified if one exists,
   /// otherwise return null.
@@ -374,11 +374,6 @@ public:
     return mergeAliasSetsForPointer(P, Size, AAInfo);
   }
 
-  /// Return true if the specified location is represented by this alias set,
-  /// false otherwise. This does not modify the AST object or alias sets.
-  bool containsPointer(const Value *P, uint64_t Size,
-                       const AAMDNodes &AAInfo) const;
-
   /// Return true if the specified instruction "may" (or must) alias one of the
   /// members in any of the sets.
   bool containsUnknown(const Instruction *I) const;
@@ -412,6 +407,14 @@ public:
 
 private:
   friend class AliasSet;
+
+  // The total number of pointers contained in all "may" alias sets.
+  unsigned TotalMayAliasSetSize;
+
+  // A non-null value signifies this AST is saturated. A saturated AST lumps
+  // all pointers into a single "May" set.
+  AliasSet *AliasAnyAS;
+
   void removeAliasSet(AliasSet *AS);
 
   /// Just like operator[] on the map, except that it creates an entry for the
@@ -424,16 +427,14 @@ private:
   }
 
   AliasSet &addPointer(Value *P, uint64_t Size, const AAMDNodes &AAInfo,
-                       AliasSet::AccessLattice E,
-                       bool &NewSet) {
-    NewSet = false;
-    AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo, &NewSet);
-    AS.Access |= E;
-    return AS;
-  }
+                       AliasSet::AccessLattice E);
   AliasSet *mergeAliasSetsForPointer(const Value *Ptr, uint64_t Size,
                                      const AAMDNodes &AAInfo);
 
+  /// Merge all alias sets into a single set that is considered to alias any
+  /// pointer.
+  AliasSet &mergeAllAliasSets();
+
   AliasSet *findAliasSetForUnknownInst(Instruction *Inst);
 };
 
diff --git a/contrib/llvm/include/llvm/Analysis/AssumptionCache.h b/contrib/llvm/include/llvm/Analysis/AssumptionCache.h
index 06f2a117ac21..406a1fe9f560 100644
--- a/contrib/llvm/include/llvm/Analysis/AssumptionCache.h
+++ b/contrib/llvm/include/llvm/Analysis/AssumptionCache.h
@@ -95,17 +95,11 @@ public:
 /// assumption caches for a given function.
 class AssumptionAnalysis : public AnalysisInfoMixin<AssumptionAnalysis> {
   friend AnalysisInfoMixin<AssumptionAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef AssumptionCache Result;
 
-  AssumptionAnalysis() {}
-  AssumptionAnalysis(const AssumptionAnalysis &Arg) {}
-  AssumptionAnalysis(AssumptionAnalysis &&Arg) {}
-  AssumptionAnalysis &operator=(const AssumptionAnalysis &RHS) { return *this; }
-  AssumptionAnalysis &operator=(AssumptionAnalysis &&RHS) { return *this; }
-
   AssumptionCache run(Function &F, FunctionAnalysisManager &) {
     return AssumptionCache(F);
   }
@@ -117,7 +111,7 @@ class AssumptionPrinterPass : public PassInfoMixin<AssumptionPrinterPass> {
 
 public:
   explicit AssumptionPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief An immutable pass that tracks lazily created \c AssumptionCache
diff --git a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
index a3195d17b029..addfffa01061 100644
--- a/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -33,9 +33,10 @@ class LoopInfo;
 
 /// This is the AA result object for the basic, local, and stateless alias
 /// analysis. It implements the AA query interface in an entirely stateless
-/// manner. As one consequence, it is never invalidated. While it does retain
-/// some storage, that is used as an optimization and not to preserve
-/// information from query to query.
+/// manner. As one consequence, it is never invalidated due to IR changes.
+/// While it does retain some storage, that is used as an optimization and not
+/// to preserve information from query to query. However it does retain handles
+/// to various other analyses and must be recomputed when those analyses are.
 class BasicAAResult : public AAResultBase<BasicAAResult> {
   friend AAResultBase<BasicAAResult>;
 
@@ -58,10 +59,9 @@ public:
       : AAResultBase(std::move(Arg)), DL(Arg.DL), TLI(Arg.TLI), AC(Arg.AC),
         DT(Arg.DT), LI(Arg.LI) {}
 
-  /// Handle invalidation events from the new pass manager.
-  ///
-  /// By definition, this result is stateless and so remains valid.
-  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+  /// Handle invalidation events in the new pass manager.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
 
@@ -185,25 +185,28 @@ private:
 
   AliasResult aliasPHI(const PHINode *PN, uint64_t PNSize,
                        const AAMDNodes &PNAAInfo, const Value *V2,
-                       uint64_t V2Size, const AAMDNodes &V2AAInfo);
+                       uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                       const Value *UnderV2);
 
   AliasResult aliasSelect(const SelectInst *SI, uint64_t SISize,
                           const AAMDNodes &SIAAInfo, const Value *V2,
-                          uint64_t V2Size, const AAMDNodes &V2AAInfo);
+                          uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                          const Value *UnderV2);
 
   AliasResult aliasCheck(const Value *V1, uint64_t V1Size, AAMDNodes V1AATag,
-                         const Value *V2, uint64_t V2Size, AAMDNodes V2AATag);
+                         const Value *V2, uint64_t V2Size, AAMDNodes V2AATag,
+                         const Value *O1 = nullptr, const Value *O2 = nullptr);
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
 class BasicAA : public AnalysisInfoMixin<BasicAA> {
   friend AnalysisInfoMixin<BasicAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef BasicAAResult Result;
 
-  BasicAAResult run(Function &F, AnalysisManager<Function> &AM);
+  BasicAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the BasicAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
index 7d48dfc9121e..562041d11fa1 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -61,6 +61,11 @@ public:
   /// the enclosing function's count (if available) and returns the value.
   Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB) const;
 
+  /// \brief Returns the estimated profile count of \p Freq.
+  /// This uses the frequency \p Freq and multiplies it by
+  /// the enclosing function's count (if available) and returns the value.
+  Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;
+
   // Set the frequency of the given basic block.
   void setBlockFreq(const BasicBlock *BB, uint64_t Freq);
 
@@ -85,14 +90,14 @@ public:
 class BlockFrequencyAnalysis
     : public AnalysisInfoMixin<BlockFrequencyAnalysis> {
   friend AnalysisInfoMixin<BlockFrequencyAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
   typedef BlockFrequencyInfo Result;
 
   /// \brief Run the analysis pass over a function and produce BFI.
-  Result run(Function &F, AnalysisManager<Function> &AM);
+  Result run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for the \c BlockFrequencyInfo results.
@@ -102,7 +107,7 @@ class BlockFrequencyPrinterPass
 
 public:
   explicit BlockFrequencyPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Legacy analysis pass which computes \c BlockFrequencyInfo.
diff --git a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 7ed06b1bb68f..3f4428d18740 100644
--- a/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -482,6 +482,8 @@ public:
   BlockFrequency getBlockFreq(const BlockNode &Node) const;
   Optional<uint64_t> getBlockProfileCount(const Function &F,
                                           const BlockNode &Node) const;
+  Optional<uint64_t> getProfileCountFromFreq(const Function &F,
+                                             uint64_t Freq) const;
 
   void setBlockFreq(const BlockNode &Node, uint64_t Freq);
 
@@ -925,6 +927,10 @@ public:
                                           const BlockT *BB) const {
     return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB));
   }
+  Optional<uint64_t> getProfileCountFromFreq(const Function &F,
+                                             uint64_t Freq) const {
+    return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
+  }
   void setBlockFreq(const BlockT *BB, uint64_t Freq);
   Scaled64 getFloatingBlockFreq(const BlockT *BB) const {
     return BlockFrequencyInfoImplBase::getFloatingBlockFreq(getNode(BB));
@@ -1245,7 +1251,7 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
       : DefaultDOTGraphTraits(isSimple) {}
 
   typedef GraphTraits<BlockFrequencyInfoT *> GTraits;
-  typedef typename GTraits::NodeType NodeType;
+  typedef typename GTraits::NodeRef NodeRef;
   typedef typename GTraits::ChildIteratorType EdgeIter;
   typedef typename GTraits::nodes_iterator NodeIter;
 
@@ -1254,8 +1260,7 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
     return G->getFunction()->getName();
   }
 
-  std::string getNodeAttributes(const NodeType *Node,
-                                const BlockFrequencyInfoT *Graph,
+  std::string getNodeAttributes(NodeRef Node, const BlockFrequencyInfoT *Graph,
                                 unsigned HotPercentThreshold = 0) {
     std::string Result;
     if (!HotPercentThreshold)
@@ -1266,9 +1271,9 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
       for (NodeIter I = GTraits::nodes_begin(Graph),
                     E = GTraits::nodes_end(Graph);
            I != E; ++I) {
-        NodeType &N = *I;
+        NodeRef N = *I;
         MaxFrequency =
-            std::max(MaxFrequency, Graph->getBlockFreq(&N).getFrequency());
+            std::max(MaxFrequency, Graph->getBlockFreq(N).getFrequency());
       }
     }
     BlockFrequency Freq = Graph->getBlockFreq(Node);
@@ -1285,8 +1290,8 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
     return Result;
   }
 
-  std::string getNodeLabel(const NodeType *Node,
-                           const BlockFrequencyInfoT *Graph, GVDAGType GType) {
+  std::string getNodeLabel(NodeRef Node, const BlockFrequencyInfoT *Graph,
+                           GVDAGType GType) {
     std::string Result;
     raw_string_ostream OS(Result);
 
@@ -1313,7 +1318,7 @@ struct BFIDOTGraphTraitsBase : public DefaultDOTGraphTraits {
     return Result;
   }
 
-  std::string getEdgeAttributes(const NodeType *Node, EdgeIter EI,
+  std::string getEdgeAttributes(NodeRef Node, EdgeIter EI,
                                 const BlockFrequencyInfoT *BFI,
                                 const BranchProbabilityInfoT *BPI,
                                 unsigned HotPercentThreshold = 0) {
diff --git a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
index 6434ba962ebc..14b7a7f529f7 100644
--- a/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/BranchProbabilityInfo.h
@@ -178,14 +178,14 @@ private:
 class BranchProbabilityAnalysis
     : public AnalysisInfoMixin<BranchProbabilityAnalysis> {
   friend AnalysisInfoMixin<BranchProbabilityAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
   typedef BranchProbabilityInfo Result;
 
   /// \brief Run the analysis pass over a function and produce BPI.
-  BranchProbabilityInfo run(Function &F, AnalysisManager<Function> &AM);
+  BranchProbabilityInfo run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for the \c BranchProbabilityAnalysis results.
@@ -195,7 +195,7 @@ class BranchProbabilityPrinterPass
 
 public:
   explicit BranchProbabilityPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Legacy analysis pass which computes \c BranchProbabilityInfo.
diff --git a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
index 035764837e6f..efaa9d6df8ea 100644
--- a/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
+++ b/contrib/llvm/include/llvm/Analysis/CFGPrinter.h
@@ -7,6 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 //
+// This file defines a 'dot-cfg' analysis pass, which emits the
+// cfg.<fnname>.dot file for each function in the program, with a graph of the
+// CFG for that function.
+//
 // This file defines external functions that can be called to explicitly
 // instantiate the CFG printer.
 //
@@ -19,9 +23,34 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Support/GraphWriter.h"
 
 namespace llvm {
+class CFGViewerPass
+    : public PassInfoMixin<CFGViewerPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+class CFGOnlyViewerPass
+    : public PassInfoMixin<CFGOnlyViewerPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+class CFGPrinterPass
+    : public PassInfoMixin<CFGPrinterPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+class CFGOnlyPrinterPass
+    : public PassInfoMixin<CFGOnlyPrinterPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
 template<>
 struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
 
@@ -118,13 +147,42 @@ struct DOTGraphTraits<const Function*> : public DefaultDOTGraphTraits {
     }
     return "";
   }
+
+  /// Display the raw branch weights from PGO.
+  std::string getEdgeAttributes(const BasicBlock *Node, succ_const_iterator I,
+                                const Function *F) {
+    const TerminatorInst *TI = Node->getTerminator();
+    if (TI->getNumSuccessors() == 1)
+      return "";
+
+    MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+    if (!WeightsNode)
+      return "";
+
+    MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
+    if (MDName->getString() != "branch_weights")
+      return "";
+
+    unsigned OpNo = I.getSuccessorIndex() + 1;
+    if (OpNo >= WeightsNode->getNumOperands())
+      return "";
+    ConstantInt *Weight =
+        mdconst::dyn_extract<ConstantInt>(WeightsNode->getOperand(OpNo));
+    if (!Weight)
+      return "";
+
+    // Prepend a 'W' to indicate that this is a weight rather than the actual
+    // profile count (due to scaling).
+    Twine Attrs = "label=\"W:" + Twine(Weight->getZExtValue()) + "\"";
+    return Attrs.str();
+  }
 };
 } // End llvm namespace
 
 namespace llvm {
   class FunctionPass;
-  FunctionPass *createCFGPrinterPass ();
-  FunctionPass *createCFGOnlyPrinterPass ();
+  FunctionPass *createCFGPrinterLegacyPassPass ();
+  FunctionPass *createCFGOnlyPrinterLegacyPassPass ();
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 48eca888419a..f3520aa3fe82 100644
--- a/contrib/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -42,7 +42,10 @@ public:
 
   /// Handle invalidation events from the new pass manager.
   /// By definition, this result is stateless and so remains valid.
-  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
   /// Evict the given function from cache
   void evict(const Function &Fn);
 
@@ -103,12 +106,12 @@ private:
 /// in particular to leverage invalidation to trigger re-computation.
 class CFLAndersAA : public AnalysisInfoMixin<CFLAndersAA> {
   friend AnalysisInfoMixin<CFLAndersAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef CFLAndersAAResult Result;
 
-  CFLAndersAAResult run(Function &F, AnalysisManager<Function> &AM);
+  CFLAndersAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the CFLAndersAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 80a00d02b811..3aae9a1e9b2e 100644
--- a/contrib/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -45,7 +45,10 @@ public:
   /// Handle invalidation events from the new pass manager.
   ///
   /// By definition, this result is stateless and so remains valid.
-  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
 
   /// \brief Inserts the given Function into the cache.
   void scan(Function *Fn);
@@ -81,16 +84,6 @@ public:
     return QueryResult;
   }
 
-  /// Get the location associated with a pointer argument of a callsite.
-  ModRefInfo getArgModRefInfo(ImmutableCallSite CS, unsigned ArgIdx);
-
-  /// Returns the behavior when calling the given call site.
-  FunctionModRefBehavior getModRefBehavior(ImmutableCallSite CS);
-
-  /// Returns the behavior when calling the given function. For use when the
-  /// call site is not known.
-  FunctionModRefBehavior getModRefBehavior(const Function *F);
-
 private:
   struct FunctionHandle final : public CallbackVH {
     FunctionHandle(Function *Fn, CFLSteensAAResult *Result)
@@ -132,12 +125,12 @@ private:
 /// in particular to leverage invalidation to trigger re-computation of sets.
 class CFLSteensAA : public AnalysisInfoMixin<CFLSteensAA> {
   friend AnalysisInfoMixin<CFLSteensAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef CFLSteensAAResult Result;
 
-  CFLSteensAAResult run(Function &F, AnalysisManager<Function> &AM);
+  CFLSteensAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the CFLSteensAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
index 3263ecec4e26..54ef1a688d37 100644
--- a/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/contrib/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -11,49 +11,272 @@
 /// This header provides classes for managing passes over SCCs of the call
 /// graph. These passes form an important component of LLVM's interprocedural
 /// optimizations. Because they operate on the SCCs of the call graph, and they
-/// traverse the graph in post order, they can effectively do pair-wise
-/// interprocedural optimizations for all call edges in the program. At each
-/// call site edge, the callee has already been optimized as much as is
-/// possible. This in turn allows very accurate analysis of it for IPO.
+/// traverse the graph in post-order, they can effectively do pair-wise
+/// interprocedural optimizations for all call edges in the program while
+/// incrementally refining it and improving the context of these pair-wise
+/// optimizations. At each call site edge, the callee has already been
+/// optimized as much as is possible. This in turn allows very accurate
+/// analysis of it for IPO.
+///
+/// A secondary more general goal is to be able to isolate optimization on
+/// unrelated parts of the IR module. This is useful to ensure our
+/// optimizations are principled and don't miss oportunities where refinement
+/// of one part of the module influence transformations in another part of the
+/// module. But this is also useful if we want to parallelize the optimizations
+/// across common large module graph shapes which tend to be very wide and have
+/// large regions of unrelated cliques.
+///
+/// To satisfy these goals, we use the LazyCallGraph which provides two graphs
+/// nested inside each other (and built lazily from the bottom-up): the call
+/// graph proper, and a reference graph. The reference graph is super set of
+/// the call graph and is a conservative approximation of what could through
+/// scalar or CGSCC transforms *become* the call graph. Using this allows us to
+/// ensure we optimize functions prior to them being introduced into the call
+/// graph by devirtualization or other technique, and thus ensures that
+/// subsequent pair-wise interprocedural optimizations observe the optimized
+/// form of these functions. The (potentially transitive) reference
+/// reachability used by the reference graph is a conservative approximation
+/// that still allows us to have independent regions of the graph.
+///
+/// FIXME: There is one major drawback of the reference graph: in its naive
+/// form it is quadratic because it contains a distinct edge for each
+/// (potentially indirect) reference, even if are all through some common
+/// global table of function pointers. This can be fixed in a number of ways
+/// that essentially preserve enough of the normalization. While it isn't
+/// expected to completely preclude the usability of this, it will need to be
+/// addressed.
+///
+///
+/// All of these issues are made substantially more complex in the face of
+/// mutations to the call graph while optimization passes are being run. When
+/// mutations to the call graph occur we want to achieve two different things:
+///
+/// - We need to update the call graph in-flight and invalidate analyses
+///   cached on entities in the graph. Because of the cache-based analysis
+///   design of the pass manager, it is essential to have stable identities for
+///   the elements of the IR that passes traverse, and to invalidate any
+///   analyses cached on these elements as the mutations take place.
+///
+/// - We want to preserve the incremental and post-order traversal of the
+///   graph even as it is refined and mutated. This means we want optimization
+///   to observe the most refined form of the call graph and to do so in
+///   post-order.
+///
+/// To address this, the CGSCC manager uses both worklists that can be expanded
+/// by passes which transform the IR, and provides invalidation tests to skip
+/// entries that become dead. This extra data is provided to every SCC pass so
+/// that it can carefully update the manager's traversal as the call graph
+/// mutates.
+///
+/// We also provide support for running function passes within the CGSCC walk,
+/// and there we provide automatic update of the call graph including of the
+/// pass manager to reflect call graph changes that fall out naturally as part
+/// of scalar transformations.
+///
+/// The patterns used to ensure the goals of post-order visitation of the fully
+/// refined graph:
+///
+/// 1) Sink toward the "bottom" as the graph is refined. This means that any
+///    iteration continues in some valid post-order sequence after the mutation
+///    has altered the structure.
+///
+/// 2) Enqueue in post-order, including the current entity. If the current
+///    entity's shape changes, it and everything after it in post-order needs
+///    to be visited to observe that shape.
 ///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_ANALYSIS_CGSCCPASSMANAGER_H
 #define LLVM_ANALYSIS_CGSCCPASSMANAGER_H
 
+#include "llvm/ADT/PriorityWorklist.h"
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/ValueHandle.h"
 
 namespace llvm {
 
-extern template class PassManager<LazyCallGraph::SCC>;
-/// \brief The CGSCC pass manager.
-///
-/// See the documentation for the PassManager template for details. It runs
-/// a sequency of SCC passes over each SCC that the manager is run over. This
-/// typedef serves as a convenient way to refer to this construct.
-typedef PassManager<LazyCallGraph::SCC> CGSCCPassManager;
+struct CGSCCUpdateResult;
 
-extern template class AnalysisManager<LazyCallGraph::SCC>;
+/// Extern template declaration for the analysis set for this IR unit.
+extern template class AllAnalysesOn<LazyCallGraph::SCC>;
+
+extern template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
 /// \brief The CGSCC analysis manager.
 ///
 /// See the documentation for the AnalysisManager template for detail
 /// documentation. This typedef serves as a convenient way to refer to this
 /// construct in the adaptors and proxies used to integrate this into the larger
 /// pass manager infrastructure.
-typedef AnalysisManager<LazyCallGraph::SCC> CGSCCAnalysisManager;
+typedef AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>
+    CGSCCAnalysisManager;
+
+// Explicit specialization and instantiation declarations for the pass manager.
+// See the comments on the definition of the specialization for details on how
+// it differs from the primary template.
+template <>
+PreservedAnalyses
+PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
+            CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
+                                      CGSCCAnalysisManager &AM,
+                                      LazyCallGraph &G, CGSCCUpdateResult &UR);
+extern template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
+                                  LazyCallGraph &, CGSCCUpdateResult &>;
+
+/// \brief The CGSCC pass manager.
+///
+/// See the documentation for the PassManager template for details. It runs
+/// a sequency of SCC passes over each SCC that the manager is run over. This
+/// typedef serves as a convenient way to refer to this construct.
+typedef PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
+                    CGSCCUpdateResult &>
+    CGSCCPassManager;
+
+/// An explicit specialization of the require analysis template pass.
+template <typename AnalysisT>
+struct RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC, CGSCCAnalysisManager,
+                           LazyCallGraph &, CGSCCUpdateResult &>
+    : PassInfoMixin<RequireAnalysisPass<AnalysisT, LazyCallGraph::SCC,
+                                        CGSCCAnalysisManager, LazyCallGraph &,
+                                        CGSCCUpdateResult &>> {
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &) {
+    (void)AM.template getResult<AnalysisT>(C, CG);
+    return PreservedAnalyses::all();
+  }
+};
 
-extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
 /// A proxy from a \c CGSCCAnalysisManager to a \c Module.
 typedef InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>
     CGSCCAnalysisManagerModuleProxy;
 
-extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
-                                                LazyCallGraph::SCC>;
+/// We need a specialized result for the \c CGSCCAnalysisManagerModuleProxy so
+/// it can have access to the call graph in order to walk all the SCCs when
+/// invalidating things.
+template <> class CGSCCAnalysisManagerModuleProxy::Result {
+public:
+  explicit Result(CGSCCAnalysisManager &InnerAM, LazyCallGraph &G)
+      : InnerAM(&InnerAM), G(&G) {}
+
+  /// \brief Accessor for the analysis manager.
+  CGSCCAnalysisManager &getManager() { return *InnerAM; }
+
+  /// \brief Handler for invalidation of the Module.
+  ///
+  /// If the proxy analysis itself is preserved, then we assume that the set of
+  /// SCCs in the Module hasn't changed. Thus any pointers to SCCs in the
+  /// CGSCCAnalysisManager are still valid, and we don't need to call \c clear
+  /// on the CGSCCAnalysisManager.
+  ///
+  /// Regardless of whether this analysis is marked as preserved, all of the
+  /// analyses in the \c CGSCCAnalysisManager are potentially invalidated based
+  /// on the set of preserved analyses.
+  bool invalidate(Module &M, const PreservedAnalyses &PA,
+                  ModuleAnalysisManager::Invalidator &Inv);
+
+private:
+  CGSCCAnalysisManager *InnerAM;
+  LazyCallGraph *G;
+};
+
+/// Provide a specialized run method for the \c CGSCCAnalysisManagerModuleProxy
+/// so it can pass the lazy call graph to the result.
+template <>
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM);
+
+// Ensure the \c CGSCCAnalysisManagerModuleProxy is provided as an extern
+// template.
+extern template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
+
+extern template class OuterAnalysisManagerProxy<
+    ModuleAnalysisManager, LazyCallGraph::SCC, LazyCallGraph &>;
 /// A proxy from a \c ModuleAnalysisManager to an \c SCC.
-typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC>
+typedef OuterAnalysisManagerProxy<ModuleAnalysisManager, LazyCallGraph::SCC,
+                                  LazyCallGraph &>
     ModuleAnalysisManagerCGSCCProxy;
 
+/// Support structure for SCC passes to communicate updates the call graph back
+/// to the CGSCC pass manager infrsatructure.
+///
+/// The CGSCC pass manager runs SCC passes which are allowed to update the call
+/// graph and SCC structures. This means the structure the pass manager works
+/// on is mutating underneath it. In order to support that, there needs to be
+/// careful communication about the precise nature and ramifications of these
+/// updates to the pass management infrastructure.
+///
+/// All SCC passes will have to accept a reference to the management layer's
+/// update result struct and use it to reflect the results of any CG updates
+/// performed.
+///
+/// Passes which do not change the call graph structure in any way can just
+/// ignore this argument to their run method.
+struct CGSCCUpdateResult {
+  /// Worklist of the RefSCCs queued for processing.
+  ///
+  /// When a pass refines the graph and creates new RefSCCs or causes them to
+  /// have a different shape or set of component SCCs it should add the RefSCCs
+  /// to this worklist so that we visit them in the refined form.
+  ///
+  /// This worklist is in reverse post-order, as we pop off the back in order
+  /// to observe RefSCCs in post-order. When adding RefSCCs, clients should add
+  /// them in reverse post-order.
+  SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> &RCWorklist;
+
+  /// Worklist of the SCCs queued for processing.
+  ///
+  /// When a pass refines the graph and creates new SCCs or causes them to have
+  /// a different shape or set of component functions it should add the SCCs to
+  /// this worklist so that we visit them in the refined form.
+  ///
+  /// Note that if the SCCs are part of a RefSCC that is added to the \c
+  /// RCWorklist, they don't need to be added here as visiting the RefSCC will
+  /// be sufficient to re-visit the SCCs within it.
+  ///
+  /// This worklist is in reverse post-order, as we pop off the back in order
+  /// to observe SCCs in post-order. When adding SCCs, clients should add them
+  /// in reverse post-order.
+  SmallPriorityWorklist<LazyCallGraph::SCC *, 1> &CWorklist;
+
+  /// The set of invalidated RefSCCs which should be skipped if they are found
+  /// in \c RCWorklist.
+  ///
+  /// This is used to quickly prune out RefSCCs when they get deleted and
+  /// happen to already be on the worklist. We use this primarily to avoid
+  /// scanning the list and removing entries from it.
+  SmallPtrSetImpl<LazyCallGraph::RefSCC *> &InvalidatedRefSCCs;
+
+  /// The set of invalidated SCCs which should be skipped if they are found
+  /// in \c CWorklist.
+  ///
+  /// This is used to quickly prune out SCCs when they get deleted and happen
+  /// to already be on the worklist. We use this primarily to avoid scanning
+  /// the list and removing entries from it.
+  SmallPtrSetImpl<LazyCallGraph::SCC *> &InvalidatedSCCs;
+
+  /// If non-null, the updated current \c RefSCC being processed.
+  ///
+  /// This is set when a graph refinement takes place an the "current" point in
+  /// the graph moves "down" or earlier in the post-order walk. This will often
+  /// cause the "current" RefSCC to be a newly created RefSCC object and the
+  /// old one to be added to the above worklist. When that happens, this
+  /// pointer is non-null and can be used to continue processing the "top" of
+  /// the post-order walk.
+  LazyCallGraph::RefSCC *UpdatedRC;
+
+  /// If non-null, the updated current \c SCC being processed.
+  ///
+  /// This is set when a graph refinement takes place an the "current" point in
+  /// the graph moves "down" or earlier in the post-order walk. This will often
+  /// cause the "current" SCC to be a newly created SCC object and the old one
+  /// to be added to the above worklist. When that happens, this pointer is
+  /// non-null and can be used to continue processing the "top" of the
+  /// post-order walk.
+  LazyCallGraph::SCC *UpdatedC;
+};
+
 /// \brief The core module pass which does a post-order walk of the SCCs and
 /// runs a CGSCC pass over each one.
 ///
@@ -97,35 +320,129 @@ public:
     // Get the call graph for this module.
     LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
 
+    // We keep worklists to allow us to push more work onto the pass manager as
+    // the passes are run.
+    SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
+    SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
+
+    // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
+    // iterating off the worklists.
+    SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
+    SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
+
+    CGSCCUpdateResult UR = {RCWorklist,    CWorklist, InvalidRefSCCSet,
+                            InvalidSCCSet, nullptr,   nullptr};
+
     PreservedAnalyses PA = PreservedAnalyses::all();
-    for (LazyCallGraph::RefSCC &RC : CG.postorder_ref_sccs()) {
-      if (DebugLogging)
-        dbgs() << "Running an SCC pass across the RefSCC: " << RC << "\n";
-
-      for (LazyCallGraph::SCC &C : RC) {
-        PreservedAnalyses PassPA = Pass.run(C, CGAM);
-
-        // We know that the CGSCC pass couldn't have invalidated any other
-        // SCC's analyses (that's the contract of a CGSCC pass), so
-        // directly handle the CGSCC analysis manager's invalidation here. We
-        // also update the preserved set of analyses to reflect that invalidated
-        // analyses are now safe to preserve.
-        // FIXME: This isn't quite correct. We need to handle the case where the
-        // pass updated the CG, particularly some child of the current SCC, and
-        // invalidate its analyses.
-        PassPA = CGAM.invalidate(C, std::move(PassPA));
-
-        // Then intersect the preserved set so that invalidation of module
-        // analyses will eventually occur when the module pass completes.
-        PA.intersect(std::move(PassPA));
-      }
+    for (auto RCI = CG.postorder_ref_scc_begin(),
+              RCE = CG.postorder_ref_scc_end();
+         RCI != RCE;) {
+      assert(RCWorklist.empty() &&
+             "Should always start with an empty RefSCC worklist");
+      // The postorder_ref_sccs range we are walking is lazily constructed, so
+      // we only push the first one onto the worklist. The worklist allows us
+      // to capture *new* RefSCCs created during transformations.
+      //
+      // We really want to form RefSCCs lazily because that makes them cheaper
+      // to update as the program is simplified and allows us to have greater
+      // cache locality as forming a RefSCC touches all the parts of all the
+      // functions within that RefSCC.
+      //
+      // We also eagerly increment the iterator to the next position because
+      // the CGSCC passes below may delete the current RefSCC.
+      RCWorklist.insert(&*RCI++);
+
+      do {
+        LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
+        if (InvalidRefSCCSet.count(RC)) {
+          if (DebugLogging)
+            dbgs() << "Skipping an invalid RefSCC...\n";
+          continue;
+        }
+
+        assert(CWorklist.empty() &&
+               "Should always start with an empty SCC worklist");
+
+        if (DebugLogging)
+          dbgs() << "Running an SCC pass across the RefSCC: " << *RC << "\n";
+
+        // Push the initial SCCs in reverse post-order as we'll pop off the the
+        // back and so see this in post-order.
+        for (LazyCallGraph::SCC &C : reverse(*RC))
+          CWorklist.insert(&C);
+
+        do {
+          LazyCallGraph::SCC *C = CWorklist.pop_back_val();
+          // Due to call graph mutations, we may have invalid SCCs or SCCs from
+          // other RefSCCs in the worklist. The invalid ones are dead and the
+          // other RefSCCs should be queued above, so we just need to skip both
+          // scenarios here.
+          if (InvalidSCCSet.count(C)) {
+            if (DebugLogging)
+              dbgs() << "Skipping an invalid SCC...\n";
+            continue;
+          }
+          if (&C->getOuterRefSCC() != RC) {
+            if (DebugLogging)
+              dbgs() << "Skipping an SCC that is now part of some other "
+                        "RefSCC...\n";
+            continue;
+          }
+
+          do {
+            // Check that we didn't miss any update scenario.
+            assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
+            assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+            assert(&C->getOuterRefSCC() == RC &&
+                   "Processing an SCC in a different RefSCC!");
+
+            UR.UpdatedRC = nullptr;
+            UR.UpdatedC = nullptr;
+            PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
+
+            // We handle invalidating the CGSCC analysis manager's information
+            // for the (potentially updated) SCC here. Note that any other SCCs
+            // whose structure has changed should have been invalidated by
+            // whatever was updating the call graph. This SCC gets invalidated
+            // late as it contains the nodes that were actively being
+            // processed.
+            CGAM.invalidate(*(UR.UpdatedC ? UR.UpdatedC : C), PassPA);
+
+            // Then intersect the preserved set so that invalidation of module
+            // analyses will eventually occur when the module pass completes.
+            PA.intersect(std::move(PassPA));
+
+            // The pass may have restructured the call graph and refined the
+            // current SCC and/or RefSCC. We need to update our current SCC and
+            // RefSCC pointers to follow these. Also, when the current SCC is
+            // refined, re-run the SCC pass over the newly refined SCC in order
+            // to observe the most precise SCC model available. This inherently
+            // cannot cycle excessively as it only happens when we split SCCs
+            // apart, at most converging on a DAG of single nodes.
+            // FIXME: If we ever start having RefSCC passes, we'll want to
+            // iterate there too.
+            RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
+            C = UR.UpdatedC ? UR.UpdatedC : C;
+            if (DebugLogging && UR.UpdatedC)
+              dbgs() << "Re-running SCC passes after a refinement of the "
+                        "current SCC: "
+                     << *UR.UpdatedC << "\n";
+
+            // Note that both `C` and `RC` may at this point refer to deleted,
+            // invalid SCC and RefSCCs respectively. But we will short circuit
+            // the processing when we check them in the loop above.
+          } while (UR.UpdatedC);
+
+        } while (!CWorklist.empty());
+      } while (!RCWorklist.empty());
     }
 
-    // By definition we preserve the proxy. This precludes *any* invalidation
-    // of CGSCC analyses by the proxy, but that's OK because we've taken
-    // care to invalidate analyses in the CGSCC analysis manager
-    // incrementally above.
+    // By definition we preserve the call garph, all SCC analyses, and the
+    // analysis proxies by handling them above and in any nested pass managers.
+    PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+    PA.preserve<LazyCallGraphAnalysis>();
     PA.preserve<CGSCCAnalysisManagerModuleProxy>();
+    PA.preserve<FunctionAnalysisManagerModuleProxy>();
     return PA;
   }
 
@@ -142,17 +459,54 @@ createModuleToPostOrderCGSCCPassAdaptor(CGSCCPassT Pass, bool DebugLogging = fal
   return ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>(std::move(Pass), DebugLogging);
 }
 
-extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
-                                                LazyCallGraph::SCC>;
 /// A proxy from a \c FunctionAnalysisManager to an \c SCC.
-typedef InnerAnalysisManagerProxy<FunctionAnalysisManager, LazyCallGraph::SCC>
-    FunctionAnalysisManagerCGSCCProxy;
+///
+/// When a module pass runs and triggers invalidation, both the CGSCC and
+/// Function analysis manager proxies on the module get an invalidation event.
+/// We don't want to fully duplicate responsibility for most of the
+/// invalidation logic. Instead, this layer is only responsible for SCC-local
+/// invalidation events. We work with the module's FunctionAnalysisManager to
+/// invalidate function analyses.
+class FunctionAnalysisManagerCGSCCProxy
+    : public AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy> {
+public:
+  class Result {
+  public:
+    explicit Result(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
+
+    /// \brief Accessor for the analysis manager.
+    FunctionAnalysisManager &getManager() { return *FAM; }
+
+    bool invalidate(LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
+                    CGSCCAnalysisManager::Invalidator &Inv);
+
+  private:
+    FunctionAnalysisManager *FAM;
+  };
+
+  /// Computes the \c FunctionAnalysisManager and stores it in the result proxy.
+  Result run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, LazyCallGraph &);
+
+private:
+  friend AnalysisInfoMixin<FunctionAnalysisManagerCGSCCProxy>;
+  static AnalysisKey Key;
+};
 
 extern template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>;
 /// A proxy from a \c CGSCCAnalysisManager to a \c Function.
 typedef OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>
     CGSCCAnalysisManagerFunctionProxy;
 
+/// Helper to update the call graph after running a function pass.
+///
+/// Function passes can only mutate the call graph in specific ways. This
+/// routine provides a helper that updates the call graph in those ways
+/// including returning whether any changes were made and populating a CG
+/// update result struct for the overall CGSCC walk.
+LazyCallGraph::SCC &updateCGAndAnalysisManagerForFunctionPass(
+    LazyCallGraph &G, LazyCallGraph::SCC &C, LazyCallGraph::Node &N,
+    CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging = false);
+
 /// \brief Adaptor that maps from a SCC to its functions.
 ///
 /// Designed to allow composition of a FunctionPass(Manager) and
@@ -185,37 +539,61 @@ public:
   }
 
   /// \brief Runs the function pass across every function in the module.
-  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM) {
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR) {
     // Setup the function analysis manager from its proxy.
     FunctionAnalysisManager &FAM =
-        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager();
+        AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
+
+    SmallVector<LazyCallGraph::Node *, 4> Nodes;
+    for (LazyCallGraph::Node &N : C)
+      Nodes.push_back(&N);
+
+    // The SCC may get split while we are optimizing functions due to deleting
+    // edges. If this happens, the current SCC can shift, so keep track of
+    // a pointer we can overwrite.
+    LazyCallGraph::SCC *CurrentC = &C;
 
     if (DebugLogging)
       dbgs() << "Running function passes across an SCC: " << C << "\n";
 
     PreservedAnalyses PA = PreservedAnalyses::all();
-    for (LazyCallGraph::Node &N : C) {
-      PreservedAnalyses PassPA = Pass.run(N.getFunction(), FAM);
+    for (LazyCallGraph::Node *N : Nodes) {
+      // Skip nodes from other SCCs. These may have been split out during
+      // processing. We'll eventually visit those SCCs and pick up the nodes
+      // there.
+      if (CG.lookupSCC(*N) != CurrentC)
+        continue;
+
+      PreservedAnalyses PassPA = Pass.run(N->getFunction(), FAM);
 
       // We know that the function pass couldn't have invalidated any other
       // function's analyses (that's the contract of a function pass), so
       // directly handle the function analysis manager's invalidation here.
-      // Also, update the preserved analyses to reflect that once invalidated
-      // these can again be preserved.
-      PassPA = FAM.invalidate(N.getFunction(), std::move(PassPA));
+      FAM.invalidate(N->getFunction(), PassPA);
 
       // Then intersect the preserved set so that invalidation of module
       // analyses will eventually occur when the module pass completes.
       PA.intersect(std::move(PassPA));
+
+      // Update the call graph based on this function pass. This may also
+      // update the current SCC to point to a smaller, more refined SCC.
+      CurrentC = &updateCGAndAnalysisManagerForFunctionPass(
+          CG, *CurrentC, *N, AM, UR, DebugLogging);
+      assert(CG.lookupSCC(*N) == CurrentC &&
+             "Current SCC not updated to the SCC containing the current node!");
     }
 
-    // By definition we preserve the proxy. This precludes *any* invalidation
-    // of function analyses by the proxy, but that's OK because we've taken
-    // care to invalidate analyses in the function analysis manager
-    // incrementally above.
-    // FIXME: We need to update the call graph here to account for any deleted
-    // edges!
+    // By definition we preserve the proxy. And we preserve all analyses on
+    // Functions. This precludes *any* invalidation of function analyses by the
+    // proxy, but that's OK because we've taken care to invalidate analyses in
+    // the function analysis manager incrementally above.
+    PA.preserveSet<AllAnalysesOn<Function>>();
     PA.preserve<FunctionAnalysisManagerCGSCCProxy>();
+
+    // We've also ensured that we updated the call graph along the way.
+    PA.preserve<LazyCallGraphAnalysis>();
+
     return PA;
   }
 
@@ -232,6 +610,185 @@ createCGSCCToFunctionPassAdaptor(FunctionPassT Pass, bool DebugLogging = false)
   return CGSCCToFunctionPassAdaptor<FunctionPassT>(std::move(Pass),
                                                    DebugLogging);
 }
+
+/// A helper that repeats an SCC pass each time an indirect call is refined to
+/// a direct call by that pass.
+///
+/// While the CGSCC pass manager works to re-visit SCCs and RefSCCs as they
+/// change shape, we may also want to repeat an SCC pass if it simply refines
+/// an indirect call to a direct call, even if doing so does not alter the
+/// shape of the graph. Note that this only pertains to direct calls to
+/// functions where IPO across the SCC may be able to compute more precise
+/// results. For intrinsics, we assume scalar optimizations already can fully
+/// reason about them.
+///
+/// This repetition has the potential to be very large however, as each one
+/// might refine a single call site. As a consequence, in practice we use an
+/// upper bound on the number of repetitions to limit things.
+template <typename PassT>
+class DevirtSCCRepeatedPass
+    : public PassInfoMixin<DevirtSCCRepeatedPass<PassT>> {
+public:
+  explicit DevirtSCCRepeatedPass(PassT Pass, int MaxIterations,
+                                 bool DebugLogging = false)
+      : Pass(std::move(Pass)), MaxIterations(MaxIterations),
+        DebugLogging(DebugLogging) {}
+
+  /// Runs the wrapped pass up to \c MaxIterations on the SCC, iterating
+  /// whenever an indirect call is refined.
+  PreservedAnalyses run(LazyCallGraph::SCC &InitialC, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR) {
+    PreservedAnalyses PA = PreservedAnalyses::all();
+
+    // The SCC may be refined while we are running passes over it, so set up
+    // a pointer that we can update.
+    LazyCallGraph::SCC *C = &InitialC;
+
+    // Collect value handles for all of the indirect call sites.
+    SmallVector<WeakVH, 8> CallHandles;
+
+    // Struct to track the counts of direct and indirect calls in each function
+    // of the SCC.
+    struct CallCount {
+      int Direct;
+      int Indirect;
+    };
+
+    // Put value handles on all of the indirect calls and return the number of
+    // direct calls for each function in the SCC.
+    auto ScanSCC = [](LazyCallGraph::SCC &C,
+                      SmallVectorImpl<WeakVH> &CallHandles) {
+      assert(CallHandles.empty() && "Must start with a clear set of handles.");
+
+      SmallVector<CallCount, 4> CallCounts;
+      for (LazyCallGraph::Node &N : C) {
+        CallCounts.push_back({0, 0});
+        CallCount &Count = CallCounts.back();
+        for (Instruction &I : instructions(N.getFunction()))
+          if (auto CS = CallSite(&I)) {
+            if (CS.getCalledFunction()) {
+              ++Count.Direct;
+            } else {
+              ++Count.Indirect;
+              CallHandles.push_back(WeakVH(&I));
+            }
+          }
+      }
+
+      return CallCounts;
+    };
+
+    // Populate the initial call handles and get the initial call counts.
+    auto CallCounts = ScanSCC(*C, CallHandles);
+
+    for (int Iteration = 0;; ++Iteration) {
+      PreservedAnalyses PassPA = Pass.run(*C, AM, CG, UR);
+
+      // If the SCC structure has changed, bail immediately and let the outer
+      // CGSCC layer handle any iteration to reflect the refined structure.
+      if (UR.UpdatedC && UR.UpdatedC != C) {
+        PA.intersect(std::move(PassPA));
+        break;
+      }
+
+      // Check that we didn't miss any update scenario.
+      assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
+      assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+      assert((int)CallCounts.size() == C->size() &&
+             "Cannot have changed the size of the SCC!");
+
+      // Check whether any of the handles were devirtualized.
+      auto IsDevirtualizedHandle = [&](WeakVH &CallH) {
+        if (!CallH)
+          return false;
+        auto CS = CallSite(CallH);
+        if (!CS)
+          return false;
+
+        // If the call is still indirect, leave it alone.
+        Function *F = CS.getCalledFunction();
+        if (!F)
+          return false;
+
+        if (DebugLogging)
+          dbgs() << "Found devirutalized call from "
+                 << CS.getParent()->getParent()->getName() << " to "
+                 << F->getName() << "\n";
+
+        // We now have a direct call where previously we had an indirect call,
+        // so iterate to process this devirtualization site.
+        return true;
+      };
+      bool Devirt = any_of(CallHandles, IsDevirtualizedHandle);
+
+      // Rescan to build up a new set of handles and count how many direct
+      // calls remain. If we decide to iterate, this also sets up the input to
+      // the next iteration.
+      CallHandles.clear();
+      auto NewCallCounts = ScanSCC(*C, CallHandles);
+
+      // If we haven't found an explicit devirtualization already see if we
+      // have decreased the number of indirect calls and increased the number
+      // of direct calls for any function in the SCC. This can be fooled by all
+      // manner of transformations such as DCE and other things, but seems to
+      // work well in practice.
+      if (!Devirt)
+        for (int i = 0, Size = C->size(); i < Size; ++i)
+          if (CallCounts[i].Indirect > NewCallCounts[i].Indirect &&
+              CallCounts[i].Direct < NewCallCounts[i].Direct) {
+            Devirt = true;
+            break;
+          }
+
+      if (!Devirt) {
+        PA.intersect(std::move(PassPA));
+        break;
+      }
+
+      // Otherwise, if we've already hit our max, we're done.
+      if (Iteration >= MaxIterations) {
+        if (DebugLogging)
+          dbgs() << "Found another devirtualization after hitting the max "
+                    "number of repetitions ("
+                 << MaxIterations << ") on SCC: " << *C << "\n";
+        PA.intersect(std::move(PassPA));
+        break;
+      }
+
+      if (DebugLogging)
+        dbgs() << "Repeating an SCC pass after finding a devirtualization in: "
+               << *C << "\n";
+
+      // Move over the new call counts in preparation for iterating.
+      CallCounts = std::move(NewCallCounts);
+
+      // Update the analysis manager with each run and intersect the total set
+      // of preserved analyses so we're ready to iterate.
+      AM.invalidate(*C, PassPA);
+      PA.intersect(std::move(PassPA));
+    }
+
+    // Note that we don't add any preserved entries here unlike a more normal
+    // "pass manager" because we only handle invalidation *between* iterations,
+    // not after the last iteration.
+    return PA;
+  }
+
+private:
+  PassT Pass;
+  int MaxIterations;
+  bool DebugLogging;
+};
+
+/// \brief A function to deduce a function pass type and wrap it in the
+/// templated adaptor.
+template <typename PassT>
+DevirtSCCRepeatedPass<PassT>
+createDevirtSCCRepeatedPass(PassT Pass, int MaxIterations,
+                            bool DebugLogging = false) {
+  return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations,
+                                      DebugLogging);
+}
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/CallGraph.h b/contrib/llvm/include/llvm/Analysis/CallGraph.h
index f37e843fed5e..4ecbaa75ac75 100644
--- a/contrib/llvm/include/llvm/Analysis/CallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/CallGraph.h
@@ -297,7 +297,7 @@ private:
 /// resulting data.
 class CallGraphAnalysis : public AnalysisInfoMixin<CallGraphAnalysis> {
   friend AnalysisInfoMixin<CallGraphAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief A formulaic typedef to inform clients of the result type.
@@ -315,7 +315,7 @@ class CallGraphPrinterPass : public PassInfoMixin<CallGraphPrinterPass> {
 
 public:
   explicit CallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// \brief The \c ModulePass which wraps up a \c CallGraph and the logic to
@@ -409,94 +409,87 @@ public:
 // Provide graph traits for tranversing call graphs using standard graph
 // traversals.
 template <> struct GraphTraits<CallGraphNode *> {
-  typedef CallGraphNode NodeType;
   typedef CallGraphNode *NodeRef;
 
   typedef CallGraphNode::CallRecord CGNPairTy;
-  typedef std::pointer_to_unary_function<CGNPairTy, CallGraphNode *>
-  CGNDerefFun;
 
-  static NodeType *getEntryNode(CallGraphNode *CGN) { return CGN; }
+  static NodeRef getEntryNode(CallGraphNode *CGN) { return CGN; }
 
-  typedef mapped_iterator<NodeType::iterator, CGNDerefFun> ChildIteratorType;
+  static CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; }
 
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return map_iterator(N->begin(), CGNDerefFun(CGNDeref));
+  typedef mapped_iterator<CallGraphNode::iterator, decltype(&CGNGetValue)>
+      ChildIteratorType;
+
+  static ChildIteratorType child_begin(NodeRef N) {
+    return ChildIteratorType(N->begin(), &CGNGetValue);
   }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return map_iterator(N->end(), CGNDerefFun(CGNDeref));
+  static ChildIteratorType child_end(NodeRef N) {
+    return ChildIteratorType(N->end(), &CGNGetValue);
   }
-
-  static CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; }
 };
 
 template <> struct GraphTraits<const CallGraphNode *> {
-  typedef const CallGraphNode NodeType;
   typedef const CallGraphNode *NodeRef;
 
   typedef CallGraphNode::CallRecord CGNPairTy;
-  typedef std::pointer_to_unary_function<CGNPairTy, const CallGraphNode *>
-      CGNDerefFun;
 
-  static NodeType *getEntryNode(const CallGraphNode *CGN) { return CGN; }
+  static NodeRef getEntryNode(const CallGraphNode *CGN) { return CGN; }
 
-  typedef mapped_iterator<NodeType::const_iterator, CGNDerefFun>
+  static const CallGraphNode *CGNGetValue(CGNPairTy P) { return P.second; }
+
+  typedef mapped_iterator<CallGraphNode::const_iterator, decltype(&CGNGetValue)>
       ChildIteratorType;
 
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return map_iterator(N->begin(), CGNDerefFun(CGNDeref));
+  static ChildIteratorType child_begin(NodeRef N) {
+    return ChildIteratorType(N->begin(), &CGNGetValue);
   }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return map_iterator(N->end(), CGNDerefFun(CGNDeref));
+  static ChildIteratorType child_end(NodeRef N) {
+    return ChildIteratorType(N->end(), &CGNGetValue);
   }
-
-  static const CallGraphNode *CGNDeref(CGNPairTy P) { return P.second; }
 };
 
 template <>
 struct GraphTraits<CallGraph *> : public GraphTraits<CallGraphNode *> {
-  static NodeType *getEntryNode(CallGraph *CGN) {
+  static NodeRef getEntryNode(CallGraph *CGN) {
     return CGN->getExternalCallingNode(); // Start at the external node!
   }
   typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>>
       PairTy;
-  typedef std::pointer_to_unary_function<const PairTy &, CallGraphNode &>
-      DerefFun;
+  static CallGraphNode *CGGetValuePtr(const PairTy &P) {
+    return P.second.get();
+  }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef mapped_iterator<CallGraph::iterator, DerefFun> nodes_iterator;
+  typedef mapped_iterator<CallGraph::iterator, decltype(&CGGetValuePtr)>
+      nodes_iterator;
   static nodes_iterator nodes_begin(CallGraph *CG) {
-    return map_iterator(CG->begin(), DerefFun(CGdereference));
+    return nodes_iterator(CG->begin(), &CGGetValuePtr);
   }
   static nodes_iterator nodes_end(CallGraph *CG) {
-    return map_iterator(CG->end(), DerefFun(CGdereference));
+    return nodes_iterator(CG->end(), &CGGetValuePtr);
   }
-
-  static CallGraphNode &CGdereference(const PairTy &P) { return *P.second; }
 };
 
 template <>
 struct GraphTraits<const CallGraph *> : public GraphTraits<
                                             const CallGraphNode *> {
-  static NodeType *getEntryNode(const CallGraph *CGN) {
+  static NodeRef getEntryNode(const CallGraph *CGN) {
     return CGN->getExternalCallingNode(); // Start at the external node!
   }
   typedef std::pair<const Function *const, std::unique_ptr<CallGraphNode>>
       PairTy;
-  typedef std::pointer_to_unary_function<const PairTy &, const CallGraphNode &>
-      DerefFun;
+  static const CallGraphNode *CGGetValuePtr(const PairTy &P) {
+    return P.second.get();
+  }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef mapped_iterator<CallGraph::const_iterator, DerefFun> nodes_iterator;
+  typedef mapped_iterator<CallGraph::const_iterator, decltype(&CGGetValuePtr)>
+      nodes_iterator;
   static nodes_iterator nodes_begin(const CallGraph *CG) {
-    return map_iterator(CG->begin(), DerefFun(CGdereference));
+    return nodes_iterator(CG->begin(), &CGGetValuePtr);
   }
   static nodes_iterator nodes_end(const CallGraph *CG) {
-    return map_iterator(CG->end(), DerefFun(CGdereference));
-  }
-
-  static const CallGraphNode &CGdereference(const PairTy &P) {
-    return *P.second;
+    return nodes_iterator(CG->end(), &CGGetValuePtr);
   }
 };
 
diff --git a/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h b/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
index cb35b3292be7..f86f64bbb67d 100644
--- a/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
+++ b/contrib/llvm/include/llvm/Analysis/CallGraphSCCPass.h
@@ -94,8 +94,8 @@ class CallGraphSCC {
 public:
   CallGraphSCC(CallGraph &cg, void *context) : CG(cg), Context(context) {}
 
-  void initialize(CallGraphNode *const *I, CallGraphNode *const *E) {
-    Nodes.assign(I, E);
+  void initialize(ArrayRef<CallGraphNode *> NewNodes) {
+    Nodes.assign(NewNodes.begin(), NewNodes.end());
   }
 
   bool isSingular() const { return Nodes.size() == 1; }
diff --git a/contrib/llvm/include/llvm/Analysis/CodeMetrics.h b/contrib/llvm/include/llvm/Analysis/CodeMetrics.h
index f512aca57865..9e861ac18825 100644
--- a/contrib/llvm/include/llvm/Analysis/CodeMetrics.h
+++ b/contrib/llvm/include/llvm/Analysis/CodeMetrics.h
@@ -87,7 +87,7 @@ struct CodeMetrics {
 
   /// \brief Add information about a block to the current state.
   void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
-                         SmallPtrSetImpl<const Value*> &EphValues);
+                         const SmallPtrSetImpl<const Value*> &EphValues);
 
   /// \brief Collect a loop's ephemeral values (those used only by an assume
   /// or similar intrinsics in the loop).
diff --git a/contrib/llvm/include/llvm/Analysis/ConstantFolding.h b/contrib/llvm/include/llvm/Analysis/ConstantFolding.h
index b1504004d83c..517842c8b0dc 100644
--- a/contrib/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/contrib/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -23,8 +23,10 @@
 namespace llvm {
 class APInt;
 template <typename T> class ArrayRef;
+class CallSite;
 class Constant;
 class ConstantExpr;
+class ConstantVector;
 class DataLayout;
 class Function;
 class GlobalValue;
@@ -45,11 +47,10 @@ bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset,
 Constant *ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
                                   const TargetLibraryInfo *TLI = nullptr);
 
-/// ConstantFoldConstantExpression - Attempt to fold the constant expression
-/// using the specified DataLayout.  If successful, the constant result is
-/// result is returned, if not, null is returned.
-Constant *
-ConstantFoldConstantExpression(const ConstantExpr *CE, const DataLayout &DL,
+/// ConstantFoldConstant - Attempt to fold the constant using the
+/// specified DataLayout.
+/// If successful, the constant result is returned, if not, null is returned.
+Constant *ConstantFoldConstant(const Constant *C, const DataLayout &DL,
                                const TargetLibraryInfo *TLI = nullptr);
 
 /// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -62,19 +63,6 @@ Constant *ConstantFoldInstOperands(Instruction *I, ArrayRef<Constant *> Ops,
                                    const DataLayout &DL,
                                    const TargetLibraryInfo *TLI = nullptr);
 
-/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
-/// specified operands.  If successful, the constant result is returned, if not,
-/// null is returned.  Note that this function can fail when attempting to
-/// fold instructions like loads and stores, which have no constant expression
-/// form.
-///
-/// This function doesn't work for compares (use ConstantFoldCompareInstOperands
-/// for this) and GEPs.
-Constant *ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
-                                   ArrayRef<Constant *> Ops,
-                                   const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI = nullptr);
-
 /// ConstantFoldCompareInstOperands - Attempt to constant fold a compare
 /// instruction (icmp/fcmp) with the specified operands.  If it fails, it
 /// returns a constant expression of the specified operands.
@@ -137,6 +125,10 @@ bool canConstantFoldCallTo(const Function *F);
 /// with the specified arguments, returning null if unsuccessful.
 Constant *ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
                            const TargetLibraryInfo *TLI = nullptr);
+
+/// \brief Check whether the given call has no side-effects.
+/// Specifically checks for math routimes which sometimes set errno.
+bool isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/DemandedBits.h b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
index fafd5d00b481..c603274a7161 100644
--- a/contrib/llvm/include/llvm/Analysis/DemandedBits.h
+++ b/contrib/llvm/include/llvm/Analysis/DemandedBits.h
@@ -89,7 +89,7 @@ public:
 /// An analysis that produces \c DemandedBits for a function.
 class DemandedBitsAnalysis : public AnalysisInfoMixin<DemandedBitsAnalysis> {
   friend AnalysisInfoMixin<DemandedBitsAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
@@ -97,7 +97,7 @@ public:
 
   /// \brief Run the analysis pass over a function and produce demanded bits
   /// information.
-  DemandedBits run(Function &F, AnalysisManager<Function> &AM);
+  DemandedBits run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for DemandedBits
@@ -106,7 +106,7 @@ class DemandedBitsPrinterPass : public PassInfoMixin<DemandedBitsPrinterPass> {
 
 public:
   explicit DemandedBitsPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Create a demanded bits analysis pass.
diff --git a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
index 32dd367a9c0a..90f33b8c42e5 100644
--- a/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/DependenceAnalysis.h
@@ -70,13 +70,8 @@ template <typename T> class ArrayRef;
   /// itelf.
   class Dependence {
   protected:
-    Dependence(const Dependence &) = default;
-
-    // FIXME: When we move to MSVC 2015 as the base compiler for Visual Studio
-    // support, uncomment this line to allow a defaulted move constructor for
-    // Dependence. Currently, FullDependence relies on the copy constructor, but
-    // that is acceptable given the triviality of the class.
-    // Dependence(Dependence &&) = default;
+    Dependence(Dependence &&) = default;
+    Dependence &operator=(Dependence &&) = default;
 
   public:
     Dependence(Instruction *Source,
@@ -222,11 +217,6 @@ template <typename T> class ArrayRef;
     FullDependence(Instruction *Src, Instruction *Dst, bool LoopIndependent,
                    unsigned Levels);
 
-    FullDependence(FullDependence &&RHS)
-        : Dependence(std::move(RHS)), Levels(RHS.Levels),
-          LoopIndependent(RHS.LoopIndependent), Consistent(RHS.Consistent),
-          DV(std::move(RHS.DV)) {}
-
     /// isLoopIndependent - Returns true if this is a loop-independent
     /// dependence.
     bool isLoopIndependent() const override { return LoopIndependent; }
@@ -931,7 +921,7 @@ template <typename T> class ArrayRef;
     Result run(Function &F, FunctionAnalysisManager &FAM);
 
   private:
-    static char PassID;
+    static AnalysisKey Key;
     friend struct AnalysisInfoMixin<DependenceAnalysis>;
   }; // class DependenceAnalysis
 
diff --git a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
index 79672e4e4225..b9667f801ed3 100644
--- a/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/DominanceFrontier.h
@@ -171,14 +171,14 @@ extern template class ForwardDominanceFrontierBase<BasicBlock>;
 class DominanceFrontierAnalysis
     : public AnalysisInfoMixin<DominanceFrontierAnalysis> {
   friend AnalysisInfoMixin<DominanceFrontierAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
   typedef DominanceFrontier Result;
 
   /// \brief Run the analysis pass over a function and produce a dominator tree.
-  DominanceFrontier run(Function &F, AnalysisManager<Function> &AM);
+  DominanceFrontier run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for the \c DominanceFrontier.
@@ -188,7 +188,7 @@ class DominanceFrontierPrinterPass
 
 public:
   explicit DominanceFrontierPrinterPass(raw_ostream &OS);
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/EHPersonalities.h b/contrib/llvm/include/llvm/Analysis/EHPersonalities.h
index a26c575cfe10..2c45ab4693e6 100644
--- a/contrib/llvm/include/llvm/Analysis/EHPersonalities.h
+++ b/contrib/llvm/include/llvm/Analysis/EHPersonalities.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Support/ErrorHandling.h"
 
 namespace llvm {
@@ -39,6 +40,10 @@ enum class EHPersonality {
 /// Unknown.
 EHPersonality classifyEHPersonality(const Value *Pers);
 
+StringRef getEHPersonalityName(EHPersonality Pers);
+
+EHPersonality getDefaultEHPersonality(const Triple &T);
+
 /// \brief Returns true if this personality function catches asynchronous
 /// exceptions.
 inline bool isAsynchronousEHPersonality(EHPersonality Pers) {
diff --git a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
index 4c0a98949778..09cef68ce70f 100644
--- a/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
+++ b/contrib/llvm/include/llvm/Analysis/GlobalsModRef.h
@@ -120,12 +120,12 @@ private:
 /// Analysis pass providing a never-invalidated alias analysis result.
 class GlobalsAA : public AnalysisInfoMixin<GlobalsAA> {
   friend AnalysisInfoMixin<GlobalsAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef GlobalsAAResult Result;
 
-  GlobalsAAResult run(Module &M, AnalysisManager<Module> &AM);
+  GlobalsAAResult run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the GlobalsAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/IVUsers.h b/contrib/llvm/include/llvm/Analysis/IVUsers.h
index e68a77526b96..e1a5467d8b63 100644
--- a/contrib/llvm/include/llvm/Analysis/IVUsers.h
+++ b/contrib/llvm/include/llvm/Analysis/IVUsers.h
@@ -16,6 +16,7 @@
 #define LLVM_ANALYSIS_IVUSERS_H
 
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/IR/ValueHandle.h"
 
@@ -90,33 +91,6 @@ private:
   void deleted() override;
 };
 
-template<> struct ilist_traits<IVStrideUse>
-  : public ilist_default_traits<IVStrideUse> {
-  // createSentinel is used to get hold of a node that marks the end of
-  // the list...
-  // The sentinel is relative to this instance, so we use a non-static
-  // method.
-  IVStrideUse *createSentinel() const {
-    // since i(p)lists always publicly derive from the corresponding
-    // traits, placing a data member in this class will augment i(p)list.
-    // But since the NodeTy is expected to publicly derive from
-    // ilist_node<NodeTy>, there is a legal viable downcast from it
-    // to NodeTy. We use this trick to superpose i(p)list with a "ghostly"
-    // NodeTy, which becomes the sentinel. Dereferencing the sentinel is
-    // forbidden (save the ilist_node<NodeTy>) so no one will ever notice
-    // the superposition.
-    return static_cast<IVStrideUse*>(&Sentinel);
-  }
-  static void destroySentinel(IVStrideUse*) {}
-
-  IVStrideUse *provideInitialHead() const { return createSentinel(); }
-  IVStrideUse *ensureHead(IVStrideUse*) const { return createSentinel(); }
-  static void noteHead(IVStrideUse*, IVStrideUse*) {}
-
-private:
-  mutable ilist_node<IVStrideUse> Sentinel;
-};
-
 class IVUsers {
   friend class IVStrideUse;
   Loop *L;
@@ -137,6 +111,17 @@ public:
   IVUsers(Loop *L, AssumptionCache *AC, LoopInfo *LI, DominatorTree *DT,
           ScalarEvolution *SE);
 
+  IVUsers(IVUsers &&X)
+      : L(std::move(X.L)), AC(std::move(X.AC)), DT(std::move(X.DT)),
+        SE(std::move(X.SE)), Processed(std::move(X.Processed)),
+        IVUses(std::move(X.IVUses)), EphValues(std::move(X.EphValues)) {
+    for (IVStrideUse &U : IVUses)
+      U.Parent = this;
+  }
+  IVUsers(const IVUsers &) = delete;
+  IVUsers &operator=(IVUsers &&) = delete;
+  IVUsers &operator=(const IVUsers &) = delete;
+
   Loop *getLoop() const { return L; }
 
   /// AddUsersIfInteresting - Inspect the specified Instruction.  If it is a
@@ -203,12 +188,12 @@ public:
 /// Analysis pass that exposes the \c IVUsers for a loop.
 class IVUsersAnalysis : public AnalysisInfoMixin<IVUsersAnalysis> {
   friend AnalysisInfoMixin<IVUsersAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef IVUsers Result;
 
-  IVUsers run(Loop &L, AnalysisManager<Loop> &AM);
+  IVUsers run(Loop &L, LoopAnalysisManager &AM);
 };
 
 /// Printer pass for the \c IVUsers for a loop.
@@ -217,7 +202,7 @@ class IVUsersPrinterPass : public PassInfoMixin<IVUsersPrinterPass> {
 
 public:
   explicit IVUsersPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Analysis/InlineCost.h b/contrib/llvm/include/llvm/Analysis/InlineCost.h
index 2928d2be30e5..5e7b00261f63 100644
--- a/contrib/llvm/include/llvm/Analysis/InlineCost.h
+++ b/contrib/llvm/include/llvm/Analysis/InlineCost.h
@@ -15,6 +15,7 @@
 #define LLVM_ANALYSIS_INLINECOST_H
 
 #include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include <cassert>
 #include <climits>
 
@@ -27,16 +28,26 @@ class ProfileSummaryInfo;
 class TargetTransformInfo;
 
 namespace InlineConstants {
-  // Various magic constants used to adjust heuristics.
-  const int InstrCost = 5;
-  const int IndirectCallThreshold = 100;
-  const int CallPenalty = 25;
-  const int LastCallToStaticBonus = -15000;
-  const int ColdccPenalty = 2000;
-  const int NoreturnPenalty = 10000;
-  /// Do not inline functions which allocate this many bytes on the stack
-  /// when the caller is recursive.
-  const unsigned TotalAllocaSizeRecursiveCaller = 1024;
+// Various thresholds used by inline cost analysis.
+/// Use when optsize (-Os) is specified.
+const int OptSizeThreshold = 50;
+
+/// Use when minsize (-Oz) is specified.
+const int OptMinSizeThreshold = 5;
+
+/// Use when -O3 is specified.
+const int OptAggressiveThreshold = 250;
+
+// Various magic constants used to adjust heuristics.
+const int InstrCost = 5;
+const int IndirectCallThreshold = 100;
+const int CallPenalty = 25;
+const int LastCallToStaticBonus = 15000;
+const int ColdccPenalty = 2000;
+const int NoreturnPenalty = 10000;
+/// Do not inline functions which allocate this many bytes on the stack
+/// when the caller is recursive.
+const unsigned TotalAllocaSizeRecursiveCaller = 1024;
 }
 
 /// \brief Represents the cost of inlining a function.
@@ -99,6 +110,52 @@ public:
   int getCostDelta() const { return Threshold - getCost(); }
 };
 
+/// Thresholds to tune inline cost analysis. The inline cost analysis decides
+/// the condition to apply a threshold and applies it. Otherwise,
+/// DefaultThreshold is used. If a threshold is Optional, it is applied only
+/// when it has a valid value. Typically, users of inline cost analysis
+/// obtain an InlineParams object through one of the \c getInlineParams methods
+/// and pass it to \c getInlineCost. Some specialized versions of inliner
+/// (such as the pre-inliner) might have custom logic to compute \c InlineParams
+/// object.
+
+struct InlineParams {
+  /// The default threshold to start with for a callee.
+  int DefaultThreshold;
+
+  /// Threshold to use for callees with inline hint.
+  Optional<int> HintThreshold;
+
+  /// Threshold to use for cold callees.
+  Optional<int> ColdThreshold;
+
+  /// Threshold to use when the caller is optimized for size.
+  Optional<int> OptSizeThreshold;
+
+  /// Threshold to use when the caller is optimized for minsize.
+  Optional<int> OptMinSizeThreshold;
+
+  /// Threshold to use when the callsite is considered hot.
+  Optional<int> HotCallSiteThreshold;
+};
+
+/// Generate the parameters to tune the inline cost analysis based only on the
+/// commandline options.
+InlineParams getInlineParams();
+
+/// Generate the parameters to tune the inline cost analysis based on command
+/// line options. If -inline-threshold option is not explicitly passed,
+/// \p Threshold is used as the default threshold.
+InlineParams getInlineParams(int Threshold);
+
+/// Generate the parameters to tune the inline cost analysis based on command
+/// line options. If -inline-threshold option is not explicitly passed,
+/// the default threshold is computed from \p OptLevel and \p SizeOptLevel.
+/// An \p OptLevel value above 3 is considered an aggressive optimization mode.
+/// \p SizeOptLevel of 1 corresponds to the the -Os flag and 2 corresponds to
+/// the -Oz flag.
+InlineParams getInlineParams(unsigned OptLevel, unsigned SizeOptLevel);
+
 /// \brief Get an InlineCost object representing the cost of inlining this
 /// callsite.
 ///
@@ -110,23 +167,22 @@ public:
 ///
 /// Also note that calling this function *dynamically* computes the cost of
 /// inlining the callsite. It is an expensive, heavyweight call.
-InlineCost getInlineCost(CallSite CS, int DefaultThreshold,
-                         TargetTransformInfo &CalleeTTI,
-                         AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI);
+InlineCost
+getInlineCost(CallSite CS, const InlineParams &Params,
+              TargetTransformInfo &CalleeTTI,
+              std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+              ProfileSummaryInfo *PSI);
 
 /// \brief Get an InlineCost with the callee explicitly specified.
 /// This allows you to calculate the cost of inlining a function via a
 /// pointer. This behaves exactly as the version with no explicit callee
 /// parameter in all other respects.
 //
-InlineCost getInlineCost(CallSite CS, Function *Callee, int DefaultThreshold,
-                         TargetTransformInfo &CalleeTTI,
-                         AssumptionCacheTracker *ACT, ProfileSummaryInfo *PSI);
-
-int computeThresholdFromOptLevels(unsigned OptLevel, unsigned SizeOptLevel);
-
-/// \brief Return the default value of -inline-threshold.
-int getDefaultInlineThreshold();
+InlineCost
+getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
+              TargetTransformInfo &CalleeTTI,
+              std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+              ProfileSummaryInfo *PSI);
 
 /// \brief Minimal filter to detect invalid constructs for inlining.
 bool isInlineViable(Function &Callee);
diff --git a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
index 410fa4165a91..47d6118313cb 100644
--- a/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
+++ b/contrib/llvm/include/llvm/Analysis/InstructionSimplify.h
@@ -238,12 +238,13 @@ namespace llvm {
                                     AssumptionCache *AC = nullptr,
                                     const Instruction *CxtI = nullptr);
 
-  /// Given operands for an TruncInst, fold the result or return null.
-  Value *SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
-                           const TargetLibraryInfo *TLI = nullptr,
-                           const DominatorTree *DT = nullptr,
-                           AssumptionCache *AC = nullptr,
-                           const Instruction *CxtI = nullptr);
+  /// Given operands for a CastInst, fold the result or return null.
+  Value *SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+                          const DataLayout &DL,
+                          const TargetLibraryInfo *TLI = nullptr,
+                          const DominatorTree *DT = nullptr,
+                          AssumptionCache *AC = nullptr,
+                          const Instruction *CxtI = nullptr);
 
   //=== Helper functions for higher up the class hierarchy.
 
diff --git a/contrib/llvm/include/llvm/Analysis/Interval.h b/contrib/llvm/include/llvm/Analysis/Interval.h
index a904753adaab..a63a004043cc 100644
--- a/contrib/llvm/include/llvm/Analysis/Interval.h
+++ b/contrib/llvm/include/llvm/Analysis/Interval.h
@@ -121,30 +121,22 @@ inline Interval::pred_iterator pred_end(Interval *I)   {
 }
 
 template <> struct GraphTraits<Interval*> {
-  typedef Interval NodeType;
+  typedef Interval *NodeRef;
   typedef Interval::succ_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(Interval *I) { return I; }
+  static NodeRef getEntryNode(Interval *I) { return I; }
 
   /// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return succ_begin(N);
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return succ_end(N);
-  }
+  static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); }
+  static ChildIteratorType child_end(NodeRef N) { return succ_end(N); }
 };
 
 template <> struct GraphTraits<Inverse<Interval*> > {
-  typedef Interval NodeType;
+  typedef Interval *NodeRef;
   typedef Interval::pred_iterator ChildIteratorType;
-  static NodeType *getEntryNode(Inverse<Interval *> G) { return G.Graph; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return pred_begin(N);
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return pred_end(N);
-  }
+  static NodeRef getEntryNode(Inverse<Interval *> G) { return G.Graph; }
+  static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); }
+  static ChildIteratorType child_end(NodeRef N) { return pred_end(N); }
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
index 37da5617b913..af788c818f80 100644
--- a/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/contrib/llvm/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -89,7 +89,6 @@ private:
   DenseMap<DomTreeNode *, unsigned> DomLevels;
   const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
   const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
-  SmallVector<BasicBlock *, 32> PHIBlocks;
 };
 typedef IDFCalculator<BasicBlock *> ForwardIDFCalculator;
 typedef IDFCalculator<Inverse<BasicBlock *>> ReverseIDFCalculator;
diff --git a/contrib/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h b/contrib/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h
index a2d24bb9eb88..5a02b9dce463 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyBlockFrequencyInfo.h
@@ -18,6 +18,7 @@
 #define LLVM_ANALYSIS_LAZYBLOCKFREQUENCYINFO_H
 
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
@@ -57,21 +58,21 @@ class LazyBlockFrequencyInfoPass : public FunctionPass {
   class LazyBlockFrequencyInfo {
   public:
     LazyBlockFrequencyInfo()
-        : Calculated(false), F(nullptr), BPI(nullptr), LI(nullptr) {}
+        : Calculated(false), F(nullptr), BPIPass(nullptr), LI(nullptr) {}
 
     /// Set up the per-function input.
-    void setAnalysis(const Function *F, const BranchProbabilityInfo *BPI,
+    void setAnalysis(const Function *F, LazyBranchProbabilityInfoPass *BPIPass,
                      const LoopInfo *LI) {
       this->F = F;
-      this->BPI = BPI;
+      this->BPIPass = BPIPass;
       this->LI = LI;
     }
 
     /// Retrieve the BFI with the block frequencies computed.
     BlockFrequencyInfo &getCalculated() {
       if (!Calculated) {
-        assert(F && BPI && LI && "call setAnalysis");
-        BFI.calculate(*F, *BPI, *LI);
+        assert(F && BPIPass && LI && "call setAnalysis");
+        BFI.calculate(*F, BPIPass->getBPI(), *LI);
         Calculated = true;
       }
       return BFI;
@@ -91,7 +92,7 @@ class LazyBlockFrequencyInfoPass : public FunctionPass {
     BlockFrequencyInfo BFI;
     bool Calculated;
     const Function *F;
-    const BranchProbabilityInfo *BPI;
+    LazyBranchProbabilityInfoPass *BPIPass;
     const LoopInfo *LI;
   };
 
diff --git a/contrib/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h b/contrib/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
new file mode 100644
index 000000000000..c76fa1e819ae
--- /dev/null
+++ b/contrib/llvm/include/llvm/Analysis/LazyBranchProbabilityInfo.h
@@ -0,0 +1,109 @@
+//===- LazyBranchProbabilityInfo.h - Lazy Branch Probability ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
+// The difference is that with this pass the branch probabilities are not
+// computed when the analysis pass is executed but rather when the BPI results
+// is explicitly requested by the analysis client.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_LAZYBRANCHPROBABILITYINFO_H
+#define LLVM_ANALYSIS_LAZYBRANCHPROBABILITYINFO_H
+
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Pass.h"
+
+namespace llvm {
+class AnalysisUsage;
+class Function;
+class LoopInfo;
+
+/// \brief This is an alternative analysis pass to
+/// BranchProbabilityInfoWrapperPass.  The difference is that with this pass the
+/// branch probabilities are not computed when the analysis pass is executed but
+/// rather when the BPI results is explicitly requested by the analysis client.
+///
+/// There are some additional requirements for any client pass that wants to use
+/// the analysis:
+///
+/// 1. The pass needs to initialize dependent passes with:
+///
+///   INITIALIZE_PASS_DEPENDENCY(LazyBPIPass)
+///
+/// 2. Similarly, getAnalysisUsage should call:
+///
+///   LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU)
+///
+/// 3. The computed BPI should be requested with
+///    getAnalysis<LazyBranchProbabilityInfoPass>().getBPI() before LoopInfo
+///    could be invalidated for example by changing the CFG.
+///
+/// Note that it is expected that we wouldn't need this functionality for the
+/// new PM since with the new PM, analyses are executed on demand.
+class LazyBranchProbabilityInfoPass : public FunctionPass {
+
+  /// Wraps a BPI to allow lazy computation of the branch probabilities.
+  ///
+  /// A pass that only conditionally uses BPI can uncondtionally require the
+  /// analysis without paying for the overhead if BPI doesn't end up being used.
+  class LazyBranchProbabilityInfo {
+  public:
+    LazyBranchProbabilityInfo(const Function *F, const LoopInfo *LI)
+        : Calculated(false), F(F), LI(LI) {}
+
+    /// Retrieve the BPI with the branch probabilities computed.
+    BranchProbabilityInfo &getCalculated() {
+      if (!Calculated) {
+        assert(F && LI && "call setAnalysis");
+        BPI.calculate(*F, *LI);
+        Calculated = true;
+      }
+      return BPI;
+    }
+
+    const BranchProbabilityInfo &getCalculated() const {
+      return const_cast<LazyBranchProbabilityInfo *>(this)->getCalculated();
+    }
+
+  private:
+    BranchProbabilityInfo BPI;
+    bool Calculated;
+    const Function *F;
+    const LoopInfo *LI;
+  };
+
+  std::unique_ptr<LazyBranchProbabilityInfo> LBPI;
+
+public:
+  static char ID;
+
+  LazyBranchProbabilityInfoPass();
+
+  /// \brief Compute and return the branch probabilities.
+  BranchProbabilityInfo &getBPI() { return LBPI->getCalculated(); }
+
+  /// \brief Compute and return the branch probabilities.
+  const BranchProbabilityInfo &getBPI() const { return LBPI->getCalculated(); }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  /// Helper for client passes to set up the analysis usage on behalf of this
+  /// pass.
+  static void getLazyBPIAnalysisUsage(AnalysisUsage &AU);
+
+  bool runOnFunction(Function &F) override;
+  void releaseMemory() override;
+  void print(raw_ostream &OS, const Module *M) const override;
+};
+
+/// \brief Helper for client passes to initialize dependent passes for LBPI.
+void initializeLazyBPIPassPass(PassRegistry &Registry);
+}
+#endif
diff --git a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
index 9f62eaa2e9f8..566e526f89b3 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyCallGraph.h
@@ -44,6 +44,7 @@
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
@@ -149,6 +150,9 @@ public:
     /// around but clear them.
     operator bool() const;
 
+    /// Returnss the \c Kind of the edge.
+    Kind getKind() const;
+
     /// Test whether the edge represents a direct call to a function.
     ///
     /// This requires that the edge is not null.
@@ -177,6 +181,7 @@ public:
 
   private:
     friend class LazyCallGraph::Node;
+    friend class LazyCallGraph::RefSCC;
 
     PointerIntPair<PointerUnion<Function *, Node *>, 1, Kind> Value;
 
@@ -194,6 +199,7 @@ public:
   class Node {
     friend class LazyCallGraph;
     friend class LazyCallGraph::SCC;
+    friend class LazyCallGraph::RefSCC;
 
     LazyCallGraph *G;
     Function &F;
@@ -223,6 +229,11 @@ public:
     /// Internal helper to remove the edge to the given function.
     void removeEdgeInternal(Function &ChildF);
 
+    void clear() {
+      Edges.clear();
+      EdgeIndexMap.clear();
+    }
+
     /// Print the name of this node's function.
     friend raw_ostream &operator<<(raw_ostream &OS, const Node &N) {
       return OS << N.F.getName();
@@ -248,6 +259,11 @@ public:
     }
     const Edge &operator[](Node &N) const { return (*this)[N.getFunction()]; }
 
+    const Edge *lookup(Function &F) const {
+      auto EI = EdgeIndexMap.find(&F);
+      return EI != EdgeIndexMap.end() ? &Edges[EI->second] : nullptr;
+    }
+
     call_edge_iterator call_begin() const {
       return call_edge_iterator(Edges.begin(), Edges.end());
     }
@@ -410,6 +426,32 @@ public:
 
     RefSCC &getOuterRefSCC() const { return *OuterRefSCC; }
 
+    /// Test if this SCC is a parent of \a C.
+    ///
+    /// Note that this is linear in the number of edges departing the current
+    /// SCC.
+    bool isParentOf(const SCC &C) const;
+
+    /// Test if this SCC is an ancestor of \a C.
+    ///
+    /// Note that in the worst case this is linear in the number of edges
+    /// departing the current SCC and every SCC in the entire graph reachable
+    /// from this SCC. Thus this very well may walk every edge in the entire
+    /// call graph! Do not call this in a tight loop!
+    bool isAncestorOf(const SCC &C) const;
+
+    /// Test if this SCC is a child of \a C.
+    ///
+    /// See the comments for \c isParentOf for detailed notes about the
+    /// complexity of this routine.
+    bool isChildOf(const SCC &C) const { return C.isParentOf(*this); }
+
+    /// Test if this SCC is a descendant of \a C.
+    ///
+    /// See the comments for \c isParentOf for detailed notes about the
+    /// complexity of this routine.
+    bool isDescendantOf(const SCC &C) const { return C.isAncestorOf(*this); }
+
     /// Provide a short name by printing this SCC to a std::string.
     ///
     /// This copes with the fact that we don't have a name per-se for an SCC
@@ -453,6 +495,12 @@ public:
     /// formRefSCCFast on the graph itself.
     RefSCC(LazyCallGraph &G);
 
+    void clear() {
+      Parents.clear();
+      SCCs.clear();
+      SCCIndices.clear();
+    }
+
     /// Print a short description useful for debugging or logging.
     ///
     /// We print the SCCs wrapped in '[]'s and skipping the middle SCCs if
@@ -494,6 +542,10 @@ public:
     void verify();
 #endif
 
+    /// Handle any necessary parent set updates after inserting a trivial ref
+    /// or call edge.
+    void handleTrivialEdgeInsertion(Node &SourceN, Node &TargetN);
+
   public:
     typedef pointee_iterator<SmallVectorImpl<SCC *>::const_iterator> iterator;
     typedef iterator_range<iterator> range;
@@ -518,7 +570,7 @@ public:
       return make_range(parent_begin(), parent_end());
     }
 
-    /// Test if this SCC is a parent of \a C.
+    /// Test if this RefSCC is a parent of \a C.
     bool isParentOf(const RefSCC &C) const { return C.isChildOf(*this); }
 
     /// Test if this RefSCC is an ancestor of \a C.
@@ -532,9 +584,9 @@ public:
     /// Test if this RefSCC is a descendant of \a C.
     bool isDescendantOf(const RefSCC &C) const;
 
-    /// Provide a short name by printing this SCC to a std::string.
+    /// Provide a short name by printing this RefSCC to a std::string.
     ///
-    /// This copes with the fact that we don't have a name per-se for an SCC
+    /// This copes with the fact that we don't have a name per-se for an RefSCC
     /// while still making the use of this in debugging and logging useful.
     std::string getName() const {
       std::string Name;
@@ -548,7 +600,7 @@ public:
     /// \name Mutation API
     ///
     /// These methods provide the core API for updating the call graph in the
-    /// presence of a (potentially still in-flight) DFS-found SCCs.
+    /// presence of (potentially still in-flight) DFS-found RefSCCs and SCCs.
     ///
     /// Note that these methods sometimes have complex runtimes, so be careful
     /// how you call them.
@@ -568,18 +620,34 @@ public:
     SmallVector<SCC *, 1> switchInternalEdgeToCall(Node &SourceN,
                                                    Node &TargetN);
 
-    /// Make an existing internal call edge into a ref edge.
+    /// Make an existing internal call edge between separate SCCs into a ref
+    /// edge.
+    ///
+    /// If SourceN and TargetN in separate SCCs within this RefSCC, changing
+    /// the call edge between them to a ref edge is a trivial operation that
+    /// does not require any structural changes to the call graph.
+    void switchTrivialInternalEdgeToRef(Node &SourceN, Node &TargetN);
+
+    /// Make an existing internal call edge within a single SCC into a ref
+    /// edge.
+    ///
+    /// Since SourceN and TargetN are part of a single SCC, this SCC may be
+    /// split up due to breaking a cycle in the call edges that formed it. If
+    /// that happens, then this routine will insert new SCCs into the postorder
+    /// list *before* the SCC of TargetN (previously the SCC of both). This
+    /// preserves postorder as the TargetN can reach all of the other nodes by
+    /// definition of previously being in a single SCC formed by the cycle from
+    /// SourceN to TargetN.
+    ///
+    /// The newly added SCCs are added *immediately* and contiguously
+    /// prior to the TargetN SCC and return the range covering the new SCCs in
+    /// the RefSCC's postorder sequence. You can directly iterate the returned
+    /// range to observe all of the new SCCs in postorder.
     ///
-    /// If SourceN and TargetN are part of a single SCC, it may be split up due
-    /// to breaking a cycle in the call edges that formed it. If that happens,
-    /// then this routine will insert new SCCs into the postorder list *before*
-    /// the SCC of TargetN (previously the SCC of both). This preserves
-    /// postorder as the TargetN can reach all of the other nodes by definition
-    /// of previously being in a single SCC formed by the cycle from SourceN to
-    /// TargetN. The newly added nodes are added *immediately* and contiguously
-    /// prior to the TargetN SCC and so they may be iterated starting from
-    /// there.
-    void switchInternalEdgeToRef(Node &SourceN, Node &TargetN);
+    /// Note that if SourceN and TargetN are in separate SCCs, the simpler
+    /// routine `switchTrivialInternalEdgeToRef` should be used instead.
+    iterator_range<iterator> switchInternalEdgeToRef(Node &SourceN,
+                                                     Node &TargetN);
 
     /// Make an existing outgoing ref edge into a call edge.
     ///
@@ -699,15 +767,41 @@ public:
     SmallVector<RefSCC *, 1> removeInternalRefEdge(Node &SourceN,
                                                    Node &TargetN);
 
+    /// A convenience wrapper around the above to handle trivial cases of
+    /// inserting a new call edge.
+    ///
+    /// This is trivial whenever the target is in the same SCC as the source or
+    /// the edge is an outgoing edge to some descendant SCC. In these cases
+    /// there is no change to the cyclic structure of SCCs or RefSCCs.
+    ///
+    /// To further make calling this convenient, it also handles inserting
+    /// already existing edges.
+    void insertTrivialCallEdge(Node &SourceN, Node &TargetN);
+
+    /// A convenience wrapper around the above to handle trivial cases of
+    /// inserting a new ref edge.
+    ///
+    /// This is trivial whenever the target is in the same RefSCC as the source
+    /// or the edge is an outgoing edge to some descendant RefSCC. In these
+    /// cases there is no change to the cyclic structure of the RefSCCs.
+    ///
+    /// To further make calling this convenient, it also handles inserting
+    /// already existing edges.
+    void insertTrivialRefEdge(Node &SourceN, Node &TargetN);
+
     ///@}
   };
 
-  /// A post-order depth-first SCC iterator over the call graph.
+  /// A post-order depth-first RefSCC iterator over the call graph.
   ///
-  /// This iterator triggers the Tarjan DFS-based formation of the SCC DAG for
-  /// the call graph, walking it lazily in depth-first post-order. That is, it
-  /// always visits SCCs for a callee prior to visiting the SCC for a caller
-  /// (when they are in different SCCs).
+  /// This iterator triggers the Tarjan DFS-based formation of the RefSCC (and
+  /// SCC) DAG for the call graph, walking it lazily in depth-first post-order.
+  /// That is, it always visits RefSCCs for the target of a reference edge
+  /// prior to visiting the RefSCC for a source of the edge (when they are in
+  /// different RefSCCs).
+  ///
+  /// When forming each RefSCC, the call edges within it are used to form SCCs
+  /// within it, so iterating this also controls the lazy formation of SCCs.
   class postorder_ref_scc_iterator
       : public iterator_facade_base<postorder_ref_scc_iterator,
                                     std::forward_iterator_tag, RefSCC> {
@@ -718,27 +812,39 @@ public:
     struct IsAtEndT {};
 
     LazyCallGraph *G;
-    RefSCC *C;
+    RefSCC *RC;
 
-    // Build the begin iterator for a node.
-    postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G) {
-      C = G.getNextRefSCCInPostOrder();
-    }
+    /// Build the begin iterator for a node.
+    postorder_ref_scc_iterator(LazyCallGraph &G) : G(&G), RC(getRC(G, 0)) {}
 
-    // Build the end iterator for a node. This is selected purely by overload.
+    /// Build the end iterator for a node. This is selected purely by overload.
     postorder_ref_scc_iterator(LazyCallGraph &G, IsAtEndT /*Nonce*/)
-        : G(&G), C(nullptr) {}
+        : G(&G), RC(nullptr) {}
+
+    /// Get the post-order RefSCC at the given index of the postorder walk,
+    /// populating it if necessary.
+    static RefSCC *getRC(LazyCallGraph &G, int Index) {
+      if (Index == (int)G.PostOrderRefSCCs.size())
+        if (!G.buildNextRefSCCInPostOrder())
+          // We're at the end.
+          return nullptr;
+
+      assert(Index < (int)G.PostOrderRefSCCs.size() &&
+             "Built the next post-order RefSCC without growing list!");
+      return G.PostOrderRefSCCs[Index];
+    }
 
   public:
     bool operator==(const postorder_ref_scc_iterator &Arg) const {
-      return G == Arg.G && C == Arg.C;
+      return G == Arg.G && RC == Arg.RC;
     }
 
-    reference operator*() const { return *C; }
+    reference operator*() const { return *RC; }
 
     using iterator_facade_base::operator++;
     postorder_ref_scc_iterator &operator++() {
-      C = G->getNextRefSCCInPostOrder();
+      assert(RC && "Cannot increment the end iterator!");
+      RC = getRC(*G, G->RefSCCIndices.find(RC)->second + 1);
       return *this;
     }
   };
@@ -777,7 +883,7 @@ public:
 
   /// Lookup a function's SCC in the graph.
   ///
-  /// \returns null if the function hasn't been assigned an SCC via the SCC
+  /// \returns null if the function hasn't been assigned an SCC via the RefSCC
   /// iterator walk.
   SCC *lookupSCC(Node &N) const { return SCCMap.lookup(&N); }
 
@@ -809,8 +915,9 @@ public:
   /// call graph. They can be used to update the core node-graph during
   /// a node-based inorder traversal that precedes any SCC-based traversal.
   ///
-  /// Once you begin manipulating a call graph's SCCs, you must perform all
-  /// mutation of the graph via the SCC methods.
+  /// Once you begin manipulating a call graph's SCCs, most mutation of the
+  /// graph must be performed via a RefSCC method. There are some exceptions
+  /// below.
 
   /// Update the call graph after inserting a new edge.
   void insertEdge(Node &Caller, Function &Callee, Edge::Kind EK);
@@ -830,6 +937,72 @@ public:
 
   ///@}
 
+  ///@{
+  /// \name General Mutation API
+  ///
+  /// There are a very limited set of mutations allowed on the graph as a whole
+  /// once SCCs have started to be formed. These routines have strict contracts
+  /// but may be called at any point.
+
+  /// Remove a dead function from the call graph (typically to delete it).
+  ///
+  /// Note that the function must have an empty use list, and the call graph
+  /// must be up-to-date prior to calling this. That means it is by itself in
+  /// a maximal SCC which is by itself in a maximal RefSCC, etc. No structural
+  /// changes result from calling this routine other than potentially removing
+  /// entry points into the call graph.
+  ///
+  /// If SCC formation has begun, this function must not be part of the current
+  /// DFS in order to call this safely. Typically, the function will have been
+  /// fully visited by the DFS prior to calling this routine.
+  void removeDeadFunction(Function &F);
+
+  ///@}
+
+  ///@{
+  /// \name Static helpers for code doing updates to the call graph.
+  ///
+  /// These helpers are used to implement parts of the call graph but are also
+  /// useful to code doing updates or otherwise wanting to walk the IR in the
+  /// same patterns as when we build the call graph.
+
+  /// Recursively visits the defined functions whose address is reachable from
+  /// every constant in the \p Worklist.
+  ///
+  /// Doesn't recurse through any constants already in the \p Visited set, and
+  /// updates that set with every constant visited.
+  ///
+  /// For each defined function, calls \p Callback with that function.
+  template <typename CallbackT>
+  static void visitReferences(SmallVectorImpl<Constant *> &Worklist,
+                              SmallPtrSetImpl<Constant *> &Visited,
+                              CallbackT Callback) {
+    while (!Worklist.empty()) {
+      Constant *C = Worklist.pop_back_val();
+
+      if (Function *F = dyn_cast<Function>(C)) {
+        if (!F->isDeclaration())
+          Callback(*F);
+        continue;
+      }
+
+      if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
+        // The blockaddress constant expression is a weird special case, we
+        // can't generically walk its operands the way we do for all other
+        // constants.
+        if (Visited.insert(BA->getFunction()).second)
+          Worklist.push_back(BA->getFunction());
+        continue;
+      }
+
+      for (Value *Op : C->operand_values())
+        if (Visited.insert(cast<Constant>(Op)).second)
+          Worklist.push_back(cast<Constant>(Op));
+    }
+  }
+
+  ///@}
+
 private:
   typedef SmallVectorImpl<Node *>::reverse_iterator node_stack_iterator;
   typedef iterator_range<node_stack_iterator> node_stack_range;
@@ -858,6 +1031,15 @@ private:
   /// Allocator that holds all the call graph RefSCCs.
   SpecificBumpPtrAllocator<RefSCC> RefSCCBPA;
 
+  /// The post-order sequence of RefSCCs.
+  ///
+  /// This list is lazily formed the first time we walk the graph.
+  SmallVector<RefSCC *, 16> PostOrderRefSCCs;
+
+  /// A map from RefSCC to the index for it in the postorder sequence of
+  /// RefSCCs.
+  DenseMap<RefSCC *, int> RefSCCIndices;
+
   /// The leaf RefSCCs of the graph.
   ///
   /// These are all of the RefSCCs which have no children.
@@ -869,7 +1051,7 @@ private:
   /// Set of entry nodes not-yet-processed into RefSCCs.
   SmallVector<Function *, 4> RefSCCEntryNodes;
 
-  /// Stack of nodes the DFS has walked but not yet put into a SCC.
+  /// Stack of nodes the DFS has walked but not yet put into a RefSCC.
   SmallVector<Node *, 4> PendingRefSCCStack;
 
   /// Counter for the next DFS number to assign.
@@ -905,8 +1087,24 @@ private:
   /// and updates the root leaf list.
   void connectRefSCC(RefSCC &RC);
 
-  /// Retrieve the next node in the post-order RefSCC walk of the call graph.
-  RefSCC *getNextRefSCCInPostOrder();
+  /// Get the index of a RefSCC within the postorder traversal.
+  ///
+  /// Requires that this RefSCC is a valid one in the (perhaps partial)
+  /// postorder traversed part of the graph.
+  int getRefSCCIndex(RefSCC &RC) {
+    auto IndexIt = RefSCCIndices.find(&RC);
+    assert(IndexIt != RefSCCIndices.end() && "RefSCC doesn't have an index!");
+    assert(PostOrderRefSCCs[IndexIt->second] == &RC &&
+           "Index does not point back at RC!");
+    return IndexIt->second;
+  }
+
+  /// Builds the next node in the post-order RefSCC walk of the call graph and
+  /// appends it to the \c PostOrderRefSCCs vector.
+  ///
+  /// Returns true if a new RefSCC was successfully constructed, and false if
+  /// there are no more RefSCCs to build in the graph.
+  bool buildNextRefSCCInPostOrder();
 };
 
 inline LazyCallGraph::Edge::Edge() : Value() {}
@@ -917,9 +1115,14 @@ inline LazyCallGraph::Edge::operator bool() const {
   return !Value.getPointer().isNull();
 }
 
+inline LazyCallGraph::Edge::Kind LazyCallGraph::Edge::getKind() const {
+  assert(*this && "Queried a null edge!");
+  return Value.getInt();
+}
+
 inline bool LazyCallGraph::Edge::isCall() const {
   assert(*this && "Queried a null edge!");
-  return Value.getInt() == Call;
+  return getKind() == Call;
 }
 
 inline Function &LazyCallGraph::Edge::getFunction() const {
@@ -953,26 +1156,26 @@ inline LazyCallGraph::Node &LazyCallGraph::Edge::getNode(LazyCallGraph &G) {
 
 // Provide GraphTraits specializations for call graphs.
 template <> struct GraphTraits<LazyCallGraph::Node *> {
-  typedef LazyCallGraph::Node NodeType;
+  typedef LazyCallGraph::Node *NodeRef;
   typedef LazyCallGraph::edge_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(NodeType *N) { return N; }
-  static ChildIteratorType child_begin(NodeType *N) { return N->begin(); }
-  static ChildIteratorType child_end(NodeType *N) { return N->end(); }
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 template <> struct GraphTraits<LazyCallGraph *> {
-  typedef LazyCallGraph::Node NodeType;
+  typedef LazyCallGraph::Node *NodeRef;
   typedef LazyCallGraph::edge_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(NodeType *N) { return N; }
-  static ChildIteratorType child_begin(NodeType *N) { return N->begin(); }
-  static ChildIteratorType child_end(NodeType *N) { return N->end(); }
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 
 /// An analysis pass which computes the call graph for a module.
 class LazyCallGraphAnalysis : public AnalysisInfoMixin<LazyCallGraphAnalysis> {
   friend AnalysisInfoMixin<LazyCallGraphAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// Inform generic clients of the result type.
diff --git a/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h b/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h
index c85cf2c5da56..610791023a7d 100644
--- a/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -109,7 +109,7 @@ public:
   Result run(Function &F, FunctionAnalysisManager &FAM);
 
 private:
-  static char PassID;
+  static AnalysisKey Key;
   friend struct AnalysisInfoMixin<LazyValueAnalysis>;
 };
 
diff --git a/contrib/llvm/include/llvm/Analysis/Loads.h b/contrib/llvm/include/llvm/Analysis/Loads.h
index 39f80f489e12..139bf3c2116f 100644
--- a/contrib/llvm/include/llvm/Analysis/Loads.h
+++ b/contrib/llvm/include/llvm/Analysis/Loads.h
@@ -71,15 +71,13 @@ extern cl::opt<unsigned> DefMaxInstsToScan;
 /// the only relevant load gets deleted.)
 ///
 /// \param Load The load we want to replace.
-/// \param ScanBB The basic block to scan. FIXME: This is redundant.
+/// \param ScanBB The basic block to scan.
 /// \param [in,out] ScanFrom The location to start scanning from. When this
 /// function returns, it points at the last instruction scanned.
 /// \param MaxInstsToScan The maximum number of instructions to scan. If this
 /// is zero, the whole block will be scanned.
 /// \param AA Optional pointer to alias analysis, to make the scan more
 /// precise.
-/// \param [out] AATags The aliasing metadata for the operation which produced
-/// the value. FIXME: This is basically useless.
 /// \param [out] IsLoadCSE Whether the returned value is a load from the same
 /// location in memory, as opposed to the value operand of a store.
 ///
@@ -89,7 +87,6 @@ Value *FindAvailableLoadedValue(LoadInst *Load,
                                 BasicBlock::iterator &ScanFrom,
                                 unsigned MaxInstsToScan = DefMaxInstsToScan,
                                 AliasAnalysis *AA = nullptr,
-                                AAMDNodes *AATags = nullptr,
                                 bool *IsLoadCSE = nullptr);
 
 }
diff --git a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
index 619fab283102..76066f6003e7 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -20,7 +20,9 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/raw_ostream.h"
@@ -34,6 +36,7 @@ class Loop;
 class SCEV;
 class SCEVUnionPredicate;
 class LoopAccessInfo;
+class OptimizationRemarkEmitter;
 
 /// Optimization analysis message produced during vectorization. Messages inform
 /// the user why vectorization did not occur.
@@ -63,10 +66,9 @@ public:
   /// \brief Emit an analysis note for \p PassName with the debug location from
   /// the instruction in \p Message if available.  Otherwise use the location of
   /// \p TheLoop.
-  static void emitAnalysis(const LoopAccessReport &Message,
-                           const Function *TheFunction,
-                           const Loop *TheLoop,
-                           const char *PassName);
+  static void emitAnalysis(const LoopAccessReport &Message, const Loop *TheLoop,
+                           const char *PassName,
+                           OptimizationRemarkEmitter &ORE);
 };
 
 /// \brief Collection of parameters shared beetween the Loop Vectorizer and the
@@ -516,38 +518,6 @@ public:
   LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI,
                  AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI);
 
-  // FIXME:
-  // Hack for MSVC 2013 which sems like it can't synthesize this even 
-  // with default keyword:
-  // LoopAccessInfo(LoopAccessInfo &&LAI) = default;
-  LoopAccessInfo(LoopAccessInfo &&LAI)
-      : PSE(std::move(LAI.PSE)), PtrRtChecking(std::move(LAI.PtrRtChecking)),
-        DepChecker(std::move(LAI.DepChecker)), TheLoop(LAI.TheLoop),
-        NumLoads(LAI.NumLoads), NumStores(LAI.NumStores),
-        MaxSafeDepDistBytes(LAI.MaxSafeDepDistBytes), CanVecMem(LAI.CanVecMem),
-        StoreToLoopInvariantAddress(LAI.StoreToLoopInvariantAddress),
-        Report(std::move(LAI.Report)),
-        SymbolicStrides(std::move(LAI.SymbolicStrides)),
-        StrideSet(std::move(LAI.StrideSet)) {}
-  // LoopAccessInfo &operator=(LoopAccessInfo &&LAI) = default;
-  LoopAccessInfo &operator=(LoopAccessInfo &&LAI) {
-    assert(this != &LAI);
-
-    PSE = std::move(LAI.PSE);
-    PtrRtChecking = std::move(LAI.PtrRtChecking);
-    DepChecker = std::move(LAI.DepChecker);
-    TheLoop = LAI.TheLoop;
-    NumLoads = LAI.NumLoads;
-    NumStores = LAI.NumStores;
-    MaxSafeDepDistBytes = LAI.MaxSafeDepDistBytes;
-    CanVecMem = LAI.CanVecMem;
-    StoreToLoopInvariantAddress = LAI.StoreToLoopInvariantAddress;
-    Report = std::move(LAI.Report);
-    SymbolicStrides = std::move(LAI.SymbolicStrides);
-    StrideSet = std::move(LAI.StrideSet);
-    return *this;
-  }
-
   /// Return true we can analyze the memory accesses in the loop and there are
   /// no memory dependence cycles.
   bool canVectorizeMemory() const { return CanVecMem; }
@@ -594,7 +564,7 @@ public:
 
   /// \brief The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
-  const Optional<LoopAccessReport> &getReport() const { return Report; }
+  const OptimizationRemarkAnalysis *getReport() const { return Report.get(); }
 
   /// \brief the Memory Dependence Checker which can determine the
   /// loop-independent and loop-carried dependences between memory accesses.
@@ -640,7 +610,13 @@ private:
   /// pass.
   bool canAnalyzeLoop();
 
-  void emitAnalysis(LoopAccessReport &Message);
+  /// \brief Save the analysis remark.
+  ///
+  /// LAA does not directly emits the remarks.  Instead it stores it which the
+  /// client can retrieve and presents as its own analysis
+  /// (e.g. -Rpass-analysis=loop-vectorize).
+  OptimizationRemarkAnalysis &recordAnalysis(StringRef RemarkName,
+                                             Instruction *Instr = nullptr);
 
   /// \brief Collect memory access with loop invariant strides.
   ///
@@ -674,7 +650,7 @@ private:
 
   /// \brief The diagnostics report generated for the analysis.  E.g. why we
   /// couldn't analyze the loop.
-  Optional<LoopAccessReport> Report;
+  std::unique_ptr<OptimizationRemarkAnalysis> Report;
 
   /// \brief If an access has a symbolic strides, this maps the pointer value to
   /// the stride symbol.
@@ -712,7 +688,7 @@ const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
 /// run-time assumptions.
 int64_t getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp,
                      const ValueToValueMap &StridesMap = ValueToValueMap(),
-                     bool Assume = false);
+                     bool Assume = false, bool ShouldCheckWrap = true);
 
 /// \brief Returns true if the memory operations \p A and \p B are consecutive.
 /// This is a simple API that does not depend on the analysis pass. 
@@ -773,11 +749,11 @@ private:
 class LoopAccessAnalysis
     : public AnalysisInfoMixin<LoopAccessAnalysis> {
   friend AnalysisInfoMixin<LoopAccessAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef LoopAccessInfo Result;
-  Result run(Loop &, AnalysisManager<Loop> &);
+  Result run(Loop &, LoopAnalysisManager &);
   static StringRef name() { return "LoopAccessAnalysis"; }
 };
 
@@ -788,7 +764,7 @@ class LoopAccessInfoPrinterPass
 
 public:
   explicit LoopAccessInfoPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 
 inline Instruction *MemoryDepChecker::Dependence::getSource(
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfo.h b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
index 35dc6bcb6864..0c99c6297c1e 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfo.h
@@ -168,6 +168,19 @@ public:
     return false;
   }
 
+  /// Returns true if \p BB is a loop-latch.
+  /// A latch block is a block that contains a branch back to the header.
+  /// This function is useful when there are multiple latches in a loop
+  /// because \fn getLoopLatch will return nullptr in that case.
+  bool isLoopLatch(const BlockT *BB) const {
+    assert(contains(BB) && "block does not belong to the loop");
+
+    BlockT *Header = getHeader();
+    auto PredBegin = GraphTraits<Inverse<BlockT*> >::child_begin(Header);
+    auto PredEnd = GraphTraits<Inverse<BlockT*> >::child_end(Header);
+    return std::find(PredBegin, PredEnd, BB) != PredEnd;
+  }
+
   /// Calculate the number of back edges to the loop header.
   unsigned getNumBackEdges() const {
     unsigned NumBackEdges = 0;
@@ -316,7 +329,7 @@ public:
   /// Blocks as appropriate. This does not update the mapping in the LoopInfo
   /// class.
   void removeBlockFromLoop(BlockT *BB) {
-    auto I = std::find(Blocks.begin(), Blocks.end(), BB);
+    auto I = find(Blocks, BB);
     assert(I != Blocks.end() && "N is not in this list!");
     Blocks.erase(I);
 
@@ -329,7 +342,8 @@ public:
   /// Verify loop structure of this loop and all nested loops.
   void verifyLoopNest(DenseSet<const LoopT*> *Loops) const;
 
-  void print(raw_ostream &OS, unsigned Depth = 0) const;
+  /// Print loop with all the BBs inside it.
+  void print(raw_ostream &OS, unsigned Depth = 0, bool Verbose = false) const;
 
 protected:
   friend class LoopInfoBase<BlockT, LoopT>;
@@ -353,6 +367,27 @@ extern template class LoopBase<BasicBlock, Loop>;
 /// in the CFG are neccessarily loops.
 class Loop : public LoopBase<BasicBlock, Loop> {
 public:
+  /// \brief A range representing the start and end location of a loop.
+  class LocRange {
+    DebugLoc Start;
+    DebugLoc End;
+
+  public:
+    LocRange() {}
+    LocRange(DebugLoc Start) : Start(std::move(Start)), End(std::move(Start)) {}
+    LocRange(DebugLoc Start, DebugLoc End) : Start(std::move(Start)),
+                                             End(std::move(End)) {}
+
+    const DebugLoc &getStart() const { return Start; }
+    const DebugLoc &getEnd() const { return End; }
+
+    /// \brief Check for null.
+    ///
+    explicit operator bool() const {
+      return Start && End;
+    }
+  };
+
   Loop() {}
 
   /// Return true if the specified value is loop invariant.
@@ -398,7 +433,7 @@ public:
   bool isLCSSAForm(DominatorTree &DT) const;
 
   /// Return true if this Loop and all inner subloops are in LCSSA form.
-  bool isRecursivelyLCSSAForm(DominatorTree &DT) const;
+  bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const;
 
   /// Return true if the Loop is in the form that the LoopSimplify form
   /// transforms loops to, which is sometimes called normal form.
@@ -451,6 +486,7 @@ public:
   BasicBlock *getUniqueExitBlock() const;
 
   void dump() const;
+  void dumpVerbose() const;
 
   /// Return the debug location of the start of this loop.
   /// This looks for a BB terminating instruction with a known debug
@@ -459,6 +495,9 @@ public:
   /// it returns an unknown location.
   DebugLoc getStartLoc() const;
 
+  /// Return the source code span of the loop.
+  LocRange getLocRange() const;
+
   StringRef getName() const {
     if (BasicBlock *Header = getHeader())
       if (Header->hasName())
@@ -579,7 +618,7 @@ public:
   /// loop.
   void changeTopLevelLoop(LoopT *OldLoop,
                           LoopT *NewLoop) {
-    auto I = std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop);
+    auto I = find(TopLevelLoops, OldLoop);
     assert(I != TopLevelLoops.end() && "Old loop not at top level!");
     *I = NewLoop;
     assert(!NewLoop->ParentLoop && !OldLoop->ParentLoop &&
@@ -620,7 +659,7 @@ public:
   // Debugging
   void print(raw_ostream &OS) const;
 
-  void verify() const;
+  void verify(const DominatorTreeBase<BlockT> &DomTree) const;
 };
 
 // Implementation in LoopInfoImpl.h
@@ -746,40 +785,32 @@ public:
 
 // Allow clients to walk the list of nested loops...
 template <> struct GraphTraits<const Loop*> {
-  typedef const Loop NodeType;
+  typedef const Loop *NodeRef;
   typedef LoopInfo::iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(const Loop *L) { return L; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->end();
-  }
+  static NodeRef getEntryNode(const Loop *L) { return L; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 
 template <> struct GraphTraits<Loop*> {
-  typedef Loop NodeType;
+  typedef Loop *NodeRef;
   typedef LoopInfo::iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(Loop *L) { return L; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->end();
-  }
+  static NodeRef getEntryNode(Loop *L) { return L; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 
 /// \brief Analysis pass that exposes the \c LoopInfo for a function.
 class LoopAnalysis : public AnalysisInfoMixin<LoopAnalysis> {
   friend AnalysisInfoMixin<LoopAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef LoopInfo Result;
 
-  LoopInfo run(Function &F, AnalysisManager<Function> &AM);
+  LoopInfo run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for the \c LoopAnalysis results.
@@ -788,7 +819,12 @@ class LoopPrinterPass : public PassInfoMixin<LoopPrinterPass> {
 
 public:
   explicit LoopPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// \brief Verifier pass for the \c LoopAnalysis results.
+struct LoopVerifierPass : public PassInfoMixin<LoopVerifierPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief The legacy pass manager's analysis pass to compute loop information.
diff --git a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
index 816a15452dae..833a2202a568 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopInfoImpl.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/Dominators.h"
@@ -137,7 +138,7 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopPredecessor() const {
   for (typename InvBlockTraits::ChildIteratorType PI =
          InvBlockTraits::child_begin(Header),
          PE = InvBlockTraits::child_end(Header); PI != PE; ++PI) {
-    typename InvBlockTraits::NodeType *N = *PI;
+    typename InvBlockTraits::NodeRef N = *PI;
     if (!contains(N)) {     // If the block is not in the loop...
       if (Out && Out != N)
         return nullptr;     // Multiple predecessors outside the loop
@@ -162,7 +163,7 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const {
     InvBlockTraits::child_end(Header);
   BlockT *Latch = nullptr;
   for (; PI != PE; ++PI) {
-    typename InvBlockTraits::NodeType *N = *PI;
+    typename InvBlockTraits::NodeRef N = *PI;
     if (contains(N)) {
       if (Latch) return nullptr;
       Latch = N;
@@ -185,8 +186,13 @@ BlockT *LoopBase<BlockT, LoopT>::getLoopLatch() const {
 template<class BlockT, class LoopT>
 void LoopBase<BlockT, LoopT>::
 addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LIB) {
-  assert((Blocks.empty() || LIB[getHeader()] == this) &&
-         "Incorrect LI specified for this loop!");
+#ifndef NDEBUG
+  if (!Blocks.empty()) {
+    auto SameHeader = LIB[getHeader()];
+    assert(contains(SameHeader) && getHeader() == SameHeader->getHeader()
+           && "Incorrect LI specified for this loop!");
+  }
+#endif
   assert(NewBB && "Cannot add a null basic block to the loop!");
   assert(!LIB[NewBB] && "BasicBlock already in the loop!");
 
@@ -211,8 +217,7 @@ void LoopBase<BlockT, LoopT>::
 replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild) {
   assert(OldChild->ParentLoop == this && "This loop is already broken!");
   assert(!NewChild->ParentLoop && "NewChild already has a parent!");
-  typename std::vector<LoopT *>::iterator I =
-    std::find(SubLoops.begin(), SubLoops.end(), OldChild);
+  typename std::vector<LoopT *>::iterator I = find(SubLoops, OldChild);
   assert(I != SubLoops.end() && "OldChild not in loop!");
   *I = NewChild;
   OldChild->ParentLoop = nullptr;
@@ -228,9 +233,9 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
   // Setup for using a depth-first iterator to visit every block in the loop.
   SmallVector<BlockT*, 8> ExitBBs;
   getExitBlocks(ExitBBs);
-  llvm::SmallPtrSet<BlockT*, 8> VisitSet;
+  df_iterator_default_set<BlockT*> VisitSet;
   VisitSet.insert(ExitBBs.begin(), ExitBBs.end());
-  df_ext_iterator<BlockT*, llvm::SmallPtrSet<BlockT*, 8> >
+  df_ext_iterator<BlockT*, df_iterator_default_set<BlockT*>>
     BI = df_ext_begin(getHeader(), VisitSet),
     BE = df_ext_end(getHeader(), VisitSet);
 
@@ -240,28 +245,23 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
   // Check the individual blocks.
   for ( ; BI != BE; ++BI) {
     BlockT *BB = *BI;
-    bool HasInsideLoopSuccs = false;
-    bool HasInsideLoopPreds = false;
-    SmallVector<BlockT *, 2> OutsideLoopPreds;
 
-    typedef GraphTraits<BlockT*> BlockTraits;
-    for (typename BlockTraits::ChildIteratorType SI =
-           BlockTraits::child_begin(BB), SE = BlockTraits::child_end(BB);
-         SI != SE; ++SI)
-      if (contains(*SI)) {
-        HasInsideLoopSuccs = true;
-        break;
-      }
-    typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
-    for (typename InvBlockTraits::ChildIteratorType PI =
-           InvBlockTraits::child_begin(BB), PE = InvBlockTraits::child_end(BB);
-         PI != PE; ++PI) {
-      BlockT *N = *PI;
-      if (contains(N))
-        HasInsideLoopPreds = true;
-      else
-        OutsideLoopPreds.push_back(N);
-    }
+    assert(std::any_of(GraphTraits<BlockT*>::child_begin(BB),
+                       GraphTraits<BlockT*>::child_end(BB),
+                       [&](BlockT *B){return contains(B);}) &&
+           "Loop block has no in-loop successors!");
+
+    assert(std::any_of(GraphTraits<Inverse<BlockT*> >::child_begin(BB),
+                       GraphTraits<Inverse<BlockT*> >::child_end(BB),
+                       [&](BlockT *B){return contains(B);}) &&
+           "Loop block has no in-loop predecessors!");
+
+    SmallVector<BlockT *, 2> OutsideLoopPreds;
+    std::for_each(GraphTraits<Inverse<BlockT*> >::child_begin(BB),
+                  GraphTraits<Inverse<BlockT*> >::child_end(BB),
+                  [&](BlockT *B){if (!contains(B))
+                      OutsideLoopPreds.push_back(B);
+                  });
 
     if (BB == getHeader()) {
         assert(!OutsideLoopPreds.empty() && "Loop is unreachable!");
@@ -275,8 +275,6 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
           assert(CB != OutsideLoopPreds[i] &&
                  "Loop has multiple entry points!");
     }
-    assert(HasInsideLoopPreds && "Loop block has no in-loop predecessors!");
-    assert(HasInsideLoopSuccs && "Loop block has no in-loop successors!");
     assert(BB != &getHeader()->getParent()->front() &&
            "Loop contains function entry block!");
 
@@ -296,8 +294,7 @@ void LoopBase<BlockT, LoopT>::verifyLoop() const {
 
   // Check the parent loop pointer.
   if (ParentLoop) {
-    assert(std::find(ParentLoop->begin(), ParentLoop->end(), this) !=
-           ParentLoop->end() &&
+    assert(is_contained(*ParentLoop, this) &&
            "Loop is not a subloop of its parent!");
   }
 #endif
@@ -316,17 +313,24 @@ void LoopBase<BlockT, LoopT>::verifyLoopNest(
 }
 
 template<class BlockT, class LoopT>
-void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth) const {
+void LoopBase<BlockT, LoopT>::print(raw_ostream &OS, unsigned Depth,
+                                    bool Verbose) const {
   OS.indent(Depth*2) << "Loop at depth " << getLoopDepth()
        << " containing: ";
 
+  BlockT *H = getHeader();
   for (unsigned i = 0; i < getBlocks().size(); ++i) {
-    if (i) OS << ",";
     BlockT *BB = getBlocks()[i];
-    BB->printAsOperand(OS, false);
-    if (BB == getHeader())    OS << "<header>";
-    if (BB == getLoopLatch()) OS << "<latch>";
-    if (isLoopExiting(BB))    OS << "<exiting>";
+    if (!Verbose) {
+      if (i) OS << ",";
+      BB->printAsOperand(OS, false);
+    } else OS << "\n";
+
+    if (BB == H) OS << "<header>";
+    if (isLoopLatch(BB)) OS << "<latch>";
+    if (isLoopExiting(BB)) OS << "<exiting>";
+    if (Verbose)
+      BB->print(OS);
   }
   OS << "\n";
 
@@ -516,8 +520,26 @@ void LoopInfoBase<BlockT, LoopT>::print(raw_ostream &OS) const {
 #endif
 }
 
-template<class BlockT, class LoopT>
-void LoopInfoBase<BlockT, LoopT>::verify() const {
+template <typename T>
+bool compareVectors(std::vector<T> &BB1, std::vector<T> &BB2) {
+  std::sort(BB1.begin(), BB1.end());
+  std::sort(BB2.begin(), BB2.end());
+  return BB1 == BB2;
+}
+
+template <class BlockT, class LoopT>
+static void
+addInnerLoopsToHeadersMap(DenseMap<BlockT *, const LoopT *> &LoopHeaders,
+                          const LoopInfoBase<BlockT, LoopT> &LI,
+                          const LoopT &L) {
+  LoopHeaders[L.getHeader()] = &L;
+  for (LoopT *SL : L)
+    addInnerLoopsToHeadersMap(LoopHeaders, LI, *SL);
+}
+
+template <class BlockT, class LoopT>
+void LoopInfoBase<BlockT, LoopT>::verify(
+    const DominatorTreeBase<BlockT> &DomTree) const {
   DenseSet<const LoopT*> Loops;
   for (iterator I = begin(), E = end(); I != E; ++I) {
     assert(!(*I)->getParentLoop() && "Top-level loop has a parent!");
@@ -532,6 +554,48 @@ void LoopInfoBase<BlockT, LoopT>::verify() const {
     assert(Loops.count(L) && "orphaned loop");
     assert(L->contains(BB) && "orphaned block");
   }
+
+  // Recompute LoopInfo to verify loops structure.
+  LoopInfoBase<BlockT, LoopT> OtherLI;
+  OtherLI.analyze(DomTree);
+
+  DenseMap<BlockT *, const LoopT *> LoopHeaders1;
+  DenseMap<BlockT *, const LoopT *> LoopHeaders2;
+
+  for (LoopT *L : *this)
+    addInnerLoopsToHeadersMap(LoopHeaders1, *this, *L);
+  for (LoopT *L : OtherLI)
+    addInnerLoopsToHeadersMap(LoopHeaders2, OtherLI, *L);
+  assert(LoopHeaders1.size() == LoopHeaders2.size() &&
+         "LoopInfo is incorrect.");
+
+  auto compareLoops = [&](const LoopT *L1, const LoopT *L2) {
+    BlockT *H1 = L1->getHeader();
+    BlockT *H2 = L2->getHeader();
+    if (H1 != H2)
+      return false;
+    std::vector<BlockT *> BB1 = L1->getBlocks();
+    std::vector<BlockT *> BB2 = L2->getBlocks();
+    if (!compareVectors(BB1, BB2))
+      return false;
+
+    std::vector<BlockT *> SubLoopHeaders1;
+    std::vector<BlockT *> SubLoopHeaders2;
+    for (LoopT *L : *L1)
+      SubLoopHeaders1.push_back(L->getHeader());
+    for (LoopT *L : *L2)
+      SubLoopHeaders2.push_back(L->getHeader());
+
+    if (!compareVectors(SubLoopHeaders1, SubLoopHeaders2))
+      return false;
+    return true;
+  };
+
+  for (auto &I : LoopHeaders1) {
+    BlockT *H = I.first;
+    bool LoopsMatch = compareLoops(LoopHeaders1[H], LoopHeaders2[H]);
+    assert(LoopsMatch && "LoopInfo is incorrect.");
+  }
 #endif
 }
 
diff --git a/contrib/llvm/include/llvm/Analysis/LoopIterator.h b/contrib/llvm/include/llvm/Analysis/LoopIterator.h
index e3dd96354c65..461f74351821 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopIterator.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopIterator.h
@@ -31,6 +31,66 @@ namespace llvm {
 
 class LoopBlocksTraversal;
 
+// A traits type that is intended to be used in graph algorithms. The graph
+// traits starts at the loop header, and traverses the BasicBlocks that are in
+// the loop body, but not the loop header. Since the loop header is skipped,
+// the back edges are excluded.
+//
+// TODO: Explore the possibility to implement LoopBlocksTraversal in terms of
+//       LoopBodyTraits, so that insertEdge doesn't have to be specialized.
+struct LoopBodyTraits {
+  using NodeRef = std::pair<const Loop *, BasicBlock *>;
+
+  // This wraps a const Loop * into the iterator, so we know which edges to
+  // filter out.
+  class WrappedSuccIterator
+      : public iterator_adaptor_base<
+            WrappedSuccIterator, succ_iterator,
+            typename std::iterator_traits<succ_iterator>::iterator_category,
+            NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
+    using BaseT = iterator_adaptor_base<
+        WrappedSuccIterator, succ_iterator,
+        typename std::iterator_traits<succ_iterator>::iterator_category,
+        NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>;
+
+    const Loop *L;
+
+  public:
+    WrappedSuccIterator(succ_iterator Begin, const Loop *L)
+        : BaseT(Begin), L(L) {}
+
+    NodeRef operator*() const { return {L, *I}; }
+  };
+
+  struct LoopBodyFilter {
+    bool operator()(NodeRef N) const {
+      const Loop *L = N.first;
+      return N.second != L->getHeader() && L->contains(N.second);
+    }
+  };
+
+  using ChildIteratorType =
+      filter_iterator<WrappedSuccIterator, LoopBodyFilter>;
+
+  static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; }
+
+  static ChildIteratorType child_begin(NodeRef Node) {
+    return make_filter_range(make_range<WrappedSuccIterator>(
+                                 {succ_begin(Node.second), Node.first},
+                                 {succ_end(Node.second), Node.first}),
+                             LoopBodyFilter{})
+        .begin();
+  }
+
+  static ChildIteratorType child_end(NodeRef Node) {
+    return make_filter_range(make_range<WrappedSuccIterator>(
+                                 {succ_begin(Node.second), Node.first},
+                                 {succ_end(Node.second), Node.first}),
+                             LoopBodyFilter{})
+        .end();
+  }
+};
+
 /// Store the result of a depth first search within basic blocks contained by a
 /// single loop.
 ///
@@ -114,7 +174,7 @@ template<> class po_iterator_storage<LoopBlocksTraversal, true> {
 public:
   po_iterator_storage(LoopBlocksTraversal &lbs) : LBT(lbs) {}
   // These functions are defined below.
-  bool insertEdge(BasicBlock *From, BasicBlock *To);
+  bool insertEdge(Optional<BasicBlock *> From, BasicBlock *To);
   void finishPostorder(BasicBlock *BB);
 };
 
@@ -166,8 +226,8 @@ public:
   }
 };
 
-inline bool po_iterator_storage<LoopBlocksTraversal, true>::
-insertEdge(BasicBlock *From, BasicBlock *To) {
+inline bool po_iterator_storage<LoopBlocksTraversal, true>::insertEdge(
+    Optional<BasicBlock *> From, BasicBlock *To) {
   return LBT.visitPreorder(To);
 }
 
diff --git a/contrib/llvm/include/llvm/Analysis/LoopPass.h b/contrib/llvm/include/llvm/Analysis/LoopPass.h
index 89debec04e94..496ae189e57b 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopPass.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopPass.h
@@ -107,9 +107,7 @@ public:
   // LPPassManager needs LoopInfo.
   void getAnalysisUsage(AnalysisUsage &Info) const override;
 
-  const char *getPassName() const override {
-    return "Loop Pass Manager";
-  }
+  StringRef getPassName() const override { return "Loop Pass Manager"; }
 
   PMDataManager *getAsPMDataManager() override { return this; }
   Pass *getAsPass() override { return this; }
@@ -157,6 +155,22 @@ private:
   Loop *CurrentLoop;
 };
 
+// This pass is required by the LCSSA transformation. It is used inside
+// LPPassManager to check if current pass preserves LCSSA form, and if it does
+// pass manager calls lcssa verification for the current loop.
+struct LCSSAVerificationPass : public FunctionPass {
+  static char ID;
+  LCSSAVerificationPass() : FunctionPass(ID) {
+    initializeLCSSAVerificationPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override { return false; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+};
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/LoopPassManager.h b/contrib/llvm/include/llvm/Analysis/LoopPassManager.h
index a89551851259..ae9c16502feb 100644
--- a/contrib/llvm/include/llvm/Analysis/LoopPassManager.h
+++ b/contrib/llvm/include/llvm/Analysis/LoopPassManager.h
@@ -16,7 +16,11 @@
 #define LLVM_ANALYSIS_LOOPPASSMANAGER_H
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -38,11 +42,21 @@ extern template class AnalysisManager<Loop>;
 /// pass manager infrastructure.
 typedef AnalysisManager<Loop> LoopAnalysisManager;
 
-extern template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>;
 /// A proxy from a \c LoopAnalysisManager to a \c Function.
 typedef InnerAnalysisManagerProxy<LoopAnalysisManager, Function>
     LoopAnalysisManagerFunctionProxy;
 
+/// Specialization of the invalidate method for the \c
+/// LoopAnalysisManagerFunctionProxy's result.
+template <>
+bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv);
+
+// Ensure the \c LoopAnalysisManagerFunctionProxy is provided as an extern
+// template.
+extern template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>;
+
 extern template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>;
 /// A proxy from a \c FunctionAnalysisManager to a \c Loop.
 typedef OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>
@@ -64,21 +78,6 @@ class FunctionToLoopPassAdaptor
 public:
   explicit FunctionToLoopPassAdaptor(LoopPassT Pass)
       : Pass(std::move(Pass)) {}
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  FunctionToLoopPassAdaptor(const FunctionToLoopPassAdaptor &Arg)
-      : Pass(Arg.Pass) {}
-  FunctionToLoopPassAdaptor(FunctionToLoopPassAdaptor &&Arg)
-      : Pass(std::move(Arg.Pass)) {}
-  friend void swap(FunctionToLoopPassAdaptor &LHS,
-                   FunctionToLoopPassAdaptor &RHS) {
-    using std::swap;
-    swap(LHS.Pass, RHS.Pass);
-  }
-  FunctionToLoopPassAdaptor &operator=(FunctionToLoopPassAdaptor RHS) {
-    swap(*this, RHS);
-    return *this;
-  }
 
   /// \brief Runs the loop passes across every loop in the function.
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM) {
@@ -88,6 +87,14 @@ public:
     // Get the loop structure for this function
     LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
 
+    // Also precompute all of the function analyses used by loop passes.
+    // FIXME: These should be handed into the loop passes when the loop pass
+    // management layer is reworked to follow the design of CGSCC.
+    (void)AM.getResult<AAManager>(F);
+    (void)AM.getResult<DominatorTreeAnalysis>(F);
+    (void)AM.getResult<ScalarEvolutionAnalysis>(F);
+    (void)AM.getResult<TargetLibraryAnalysis>(F);
+
     PreservedAnalyses PA = PreservedAnalyses::all();
 
     // We want to visit the loops in reverse post-order. We'll build the stack
@@ -104,24 +111,24 @@ public:
     // post-order.
     for (auto *L : reverse(Loops)) {
       PreservedAnalyses PassPA = Pass.run(*L, LAM);
-      assert(PassPA.preserved(getLoopPassPreservedAnalyses()) &&
-             "Loop passes must preserve all relevant analyses");
+      // FIXME: We should verify the set of analyses relevant to Loop passes
+      // are preserved.
 
       // We know that the loop pass couldn't have invalidated any other loop's
       // analyses (that's the contract of a loop pass), so directly handle the
-      // loop analysis manager's invalidation here.  Also, update the
-      // preserved analyses to reflect that once invalidated these can again
-      // be preserved.
-      PassPA = LAM.invalidate(*L, std::move(PassPA));
+      // loop analysis manager's invalidation here.
+      LAM.invalidate(*L, PassPA);
 
       // Then intersect the preserved set so that invalidation of module
       // analyses will eventually occur when the module pass completes.
       PA.intersect(std::move(PassPA));
     }
 
-    // By definition we preserve the proxy. This precludes *any* invalidation of
-    // loop analyses by the proxy, but that's OK because we've taken care to
-    // invalidate analyses in the loop analysis manager incrementally above.
+    // By definition we preserve the proxy. We also preserve all analyses on
+    // Loops. This precludes *any* invalidation of loop analyses by the proxy,
+    // but that's OK because we've taken care to invalidate analyses in the
+    // loop analysis manager incrementally above.
+    PA.preserveSet<AllAnalysesOn<Loop>>();
     PA.preserve<LoopAnalysisManagerFunctionProxy>();
     return PA;
   }
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 140b731c59de..b58f07e69475 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -141,6 +141,16 @@ bool getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
                    const TargetLibraryInfo *TLI, bool RoundToAlign = false,
                    ObjSizeMode Mode = ObjSizeMode::Exact);
 
+/// Try to turn a call to @llvm.objectsize into an integer value of the given
+/// Type. Returns null on failure.
+/// If MustSucceed is true, this function will not return null, and may return
+/// conservative values governed by the second argument of the call to
+/// objectsize.
+ConstantInt *lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+                                 const DataLayout &DL,
+                                 const TargetLibraryInfo *TLI,
+                                 bool MustSucceed);
+
 typedef std::pair<APInt, APInt> SizeOffsetType;
 
 /// \brief Evaluate the size and offset of an object pointed to by a Value*
diff --git a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
index b19dabbfc32e..33dbd22f7a20 100644
--- a/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -70,7 +70,7 @@ class MemDepResult {
     ///   1. This could be a load or store for dependence queries on
     ///      load/store.  The value loaded or stored is the produced value.
     ///      Note that the pointer operand may be different than that of the
-    ///      queried pointer due to must aliases and phi translation.  Note
+    ///      queried pointer due to must aliases and phi translation. Note
     ///      that the def may not be the same type as the query, the pointers
     ///      may just be must aliases.
     ///   2. For loads and stores, this could be an allocation instruction. In
@@ -350,9 +350,18 @@ public:
                           DominatorTree &DT)
       : AA(AA), AC(AC), TLI(TLI), DT(DT) {}
 
+  /// Handle invalidation in the new PM.
+  bool invalidate(Function &F, const PreservedAnalyses &PA,
+                  FunctionAnalysisManager::Invalidator &Inv);
+
+  /// Some methods limit the number of instructions they will examine.
+  /// The return value of this method is the default limit that will be
+  /// used if no limit is explicitly passed in.
+  unsigned getDefaultBlockScanLimit() const;
+
   /// Returns the instruction on which a memory operation depends.
   ///
-  /// See the class comment for more details.  It is illegal to call this on
+  /// See the class comment for more details. It is illegal to call this on
   /// non-memory instructions.
   MemDepResult getDependency(Instruction *QueryInst);
 
@@ -409,19 +418,25 @@ public:
   /// operations.  If isLoad is false, this routine ignores may-aliases
   /// with reads from read-only locations. If possible, pass the query
   /// instruction as well; this function may take advantage of the metadata
-  /// annotated to the query instruction to refine the result.
+  /// annotated to the query instruction to refine the result. \p Limit
+  /// can be used to set the maximum number of instructions that will be
+  /// examined to find the pointer dependency. On return, it will be set to
+  /// the number of instructions left to examine. If a null pointer is passed
+  /// in, the limit will default to the value of -memdep-block-scan-limit.
   ///
   /// Note that this is an uncached query, and thus may be inefficient.
   MemDepResult getPointerDependencyFrom(const MemoryLocation &Loc, bool isLoad,
                                         BasicBlock::iterator ScanIt,
                                         BasicBlock *BB,
-                                        Instruction *QueryInst = nullptr);
+                                        Instruction *QueryInst = nullptr,
+                                        unsigned *Limit = nullptr);
 
   MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
                                               bool isLoad,
                                               BasicBlock::iterator ScanIt,
                                               BasicBlock *BB,
-                                              Instruction *QueryInst);
+                                              Instruction *QueryInst,
+                                              unsigned *Limit = nullptr);
 
   /// This analysis looks for other loads and stores with invariant.group
   /// metadata and the same pointer operand. Returns Unknown if it does not
@@ -474,12 +489,12 @@ private:
 class MemoryDependenceAnalysis
     : public AnalysisInfoMixin<MemoryDependenceAnalysis> {
   friend AnalysisInfoMixin<MemoryDependenceAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef MemoryDependenceResults Result;
 
-  MemoryDependenceResults run(Function &F, AnalysisManager<Function> &AM);
+  MemoryDependenceResults run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// A wrapper analysis pass for the legacy pass manager that exposes a \c
diff --git a/contrib/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h b/contrib/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h
index 9f03610ba5b1..4f77170d9f68 100644
--- a/contrib/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/ModuleSummaryAnalysis.h
@@ -16,48 +16,39 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
-
 class BlockFrequencyInfo;
+class ProfileSummaryInfo;
 
-/// Class to build a module summary index for the given Module, possibly from
-/// a Pass.
-class ModuleSummaryIndexBuilder {
-  /// The index being built
-  std::unique_ptr<ModuleSummaryIndex> Index;
-  /// The module for which we are building an index
-  const Module *M;
+/// Direct function to compute a \c ModuleSummaryIndex from a given module.
+///
+/// If operating within a pass manager which has defined ways to compute the \c
+/// BlockFrequencyInfo for a given function, that can be provided via
+/// a std::function callback. Otherwise, this routine will manually construct
+/// that information.
+ModuleSummaryIndex buildModuleSummaryIndex(
+    const Module &M,
+    std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
+    ProfileSummaryInfo *PSI);
+
+/// Analysis pass to provide the ModuleSummaryIndex object.
+class ModuleSummaryIndexAnalysis
+    : public AnalysisInfoMixin<ModuleSummaryIndexAnalysis> {
+  friend AnalysisInfoMixin<ModuleSummaryIndexAnalysis>;
+  static AnalysisKey Key;
 
 public:
-  /// Default constructor
-  ModuleSummaryIndexBuilder() = default;
-
-  /// Constructor that builds an index for the given Module. An optional
-  /// callback can be supplied to obtain the frequency info for a function.
-  ModuleSummaryIndexBuilder(
-      const Module *M,
-      std::function<BlockFrequencyInfo *(const Function &F)> Ftor = nullptr);
-
-  /// Get a reference to the index owned by builder
-  ModuleSummaryIndex &getIndex() const { return *Index; }
+  typedef ModuleSummaryIndex Result;
 
-  /// Take ownership of the built index
-  std::unique_ptr<ModuleSummaryIndex> takeIndex() { return std::move(Index); }
-
-private:
-  /// Compute summary for given function with optional frequency information
-  void computeFunctionSummary(const Function &F,
-                              BlockFrequencyInfo *BFI = nullptr);
-
-  /// Compute summary for given variable with optional frequency information
-  void computeVariableSummary(const GlobalVariable &V);
+  Result run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the ModuleSummaryIndex object.
 class ModuleSummaryIndexWrapperPass : public ModulePass {
-  std::unique_ptr<ModuleSummaryIndexBuilder> IndexBuilder;
+  Optional<ModuleSummaryIndex> Index;
 
 public:
   static char ID;
@@ -65,10 +56,8 @@ public:
   ModuleSummaryIndexWrapperPass();
 
   /// Get the index built by pass
-  ModuleSummaryIndex &getIndex() { return IndexBuilder->getIndex(); }
-  const ModuleSummaryIndex &getIndex() const {
-    return IndexBuilder->getIndex();
-  }
+  ModuleSummaryIndex &getIndex() { return *Index; }
+  const ModuleSummaryIndex &getIndex() const { return *Index; }
 
   bool runOnModule(Module &M) override;
   bool doFinalization(Module &M) override;
@@ -81,11 +70,6 @@ public:
 // object for the module, to be written to bitcode or LLVM assembly.
 //
 ModulePass *createModuleSummaryIndexWrapperPass();
-
-/// Returns true if \p M is eligible for ThinLTO promotion.
-///
-/// Currently we check if it has any any InlineASM that uses an internal symbol.
-bool moduleCanBeRenamedForThinLTO(const Module &M);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
index 067a964bcce1..db524ff64ecd 100644
--- a/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -48,7 +48,10 @@ public:
   /// Handle invalidation events from the new pass manager.
   ///
   /// By definition, this result is stateless and so remains valid.
-  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
   bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
@@ -63,12 +66,12 @@ public:
 /// Analysis pass providing a never-invalidated alias analysis result.
 class ObjCARCAA : public AnalysisInfoMixin<ObjCARCAA> {
   friend AnalysisInfoMixin<ObjCARCAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef ObjCARCAAResult Result;
 
-  ObjCARCAAResult run(Function &F, AnalysisManager<Function> &AM);
+  ObjCARCAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the ObjCARCAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h b/contrib/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
index b455a6527bf6..39269269c244 100644
--- a/contrib/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/OptimizationDiagnosticInfo.h
@@ -16,24 +16,44 @@
 #define LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 
 namespace llvm {
-class BlockFrequencyInfo;
 class DebugLoc;
-class Function;
 class LLVMContext;
 class Loop;
 class Pass;
 class Twine;
 class Value;
 
+/// The optimization diagnostic interface.
+///
+/// It allows reporting when optimizations are performed and when they are not
+/// along with the reasons for it.  Hotness information of the corresponding
+/// code region can be included in the remark if DiagnosticHotnessRequested is
+/// enabled in the LLVM context.
 class OptimizationRemarkEmitter {
 public:
   OptimizationRemarkEmitter(Function *F, BlockFrequencyInfo *BFI)
       : F(F), BFI(BFI) {}
 
+  /// \brief This variant can be used to generate ORE on demand (without the
+  /// analysis pass).
+  ///
+  /// Note that this ctor has a very different cost depending on whether
+  /// F->getContext().getDiagnosticHotnessRequested() is on or not.  If it's off
+  /// the operation is free.
+  ///
+  /// Whereas if DiagnosticHotnessRequested is on, it is fairly expensive
+  /// operation since BFI and all its required analyses are computed.  This is
+  /// for example useful for CGSCC passes that can't use function analyses
+  /// passes in the old PM.
+  OptimizationRemarkEmitter(Function *F);
+
   OptimizationRemarkEmitter(OptimizationRemarkEmitter &&Arg)
       : F(Arg.F), BFI(Arg.BFI) {}
 
@@ -43,33 +63,187 @@ public:
     return *this;
   }
 
+  /// The new interface to emit remarks.
+  void emit(DiagnosticInfoOptimizationBase &OptDiag);
+
+  /// Emit an optimization-applied message.
+  ///
+  /// \p PassName is the name of the pass emitting the message. If -Rpass= is
+  /// given and \p PassName matches the regular expression in -Rpass, then the
+  /// remark will be emitted. \p Fn is the function triggering the remark, \p
+  /// DLoc is the debug location where the diagnostic is generated. \p V is the
+  /// IR Value that identifies the code region. \p Msg is the message string to
+  /// use.
+  void emitOptimizationRemark(const char *PassName, const DebugLoc &DLoc,
+                              const Value *V, const Twine &Msg);
+
+  /// \brief Same as above but derives the IR Value for the code region and the
+  /// debug location from the Loop parameter \p L.
+  void emitOptimizationRemark(const char *PassName, Loop *L, const Twine &Msg);
+
+  /// \brief Same as above but derives the debug location and the code region
+  /// from the debug location and the basic block of \p Inst, respectively.
+  void emitOptimizationRemark(const char *PassName, Instruction *Inst,
+                              const Twine &Msg) {
+    emitOptimizationRemark(PassName, Inst->getDebugLoc(), Inst->getParent(),
+                           Msg);
+  }
+
   /// Emit an optimization-missed message.
   ///
   /// \p PassName is the name of the pass emitting the message. If
   /// -Rpass-missed= is given and the name matches the regular expression in
-  /// -Rpass, then the remark will be emitted. \p Fn is the function triggering
-  /// the remark, \p DLoc is the debug location where the diagnostic is
-  /// generated. \p V is the IR Value that identifies the code region. \p Msg is
-  /// the message string to use.
+  /// -Rpass, then the remark will be emitted.  \p DLoc is the debug location
+  /// where the diagnostic is generated. \p V is the IR Value that identifies
+  /// the code region. \p Msg is the message string to use.  If \p IsVerbose is
+  /// true, the message is considered verbose and will only be emitted when
+  /// verbose output is turned on.
   void emitOptimizationRemarkMissed(const char *PassName, const DebugLoc &DLoc,
-                                    Value *V, const Twine &Msg);
+                                    const Value *V, const Twine &Msg,
+                                    bool IsVerbose = false);
 
   /// \brief Same as above but derives the IR Value for the code region and the
   /// debug location from the Loop parameter \p L.
   void emitOptimizationRemarkMissed(const char *PassName, Loop *L,
-                                    const Twine &Msg);
+                                    const Twine &Msg, bool IsVerbose = false);
+
+  /// \brief Same as above but derives the debug location and the code region
+  /// from the debug location and the basic block of \p Inst, respectively.
+  void emitOptimizationRemarkMissed(const char *PassName, Instruction *Inst,
+                                    const Twine &Msg, bool IsVerbose = false) {
+    emitOptimizationRemarkMissed(PassName, Inst->getDebugLoc(),
+                                 Inst->getParent(), Msg, IsVerbose);
+  }
+
+  /// Emit an optimization analysis remark message.
+  ///
+  /// \p PassName is the name of the pass emitting the message. If
+  /// -Rpass-analysis= is given and \p PassName matches the regular expression
+  /// in -Rpass, then the remark will be emitted. \p DLoc is the debug location
+  /// where the diagnostic is generated. \p V is the IR Value that identifies
+  /// the code region. \p Msg is the message string to use. If \p IsVerbose is
+  /// true, the message is considered verbose and will only be emitted when
+  /// verbose output is turned on.
+  void emitOptimizationRemarkAnalysis(const char *PassName,
+                                      const DebugLoc &DLoc, const Value *V,
+                                      const Twine &Msg, bool IsVerbose = false);
+
+  /// \brief Same as above but derives the IR Value for the code region and the
+  /// debug location from the Loop parameter \p L.
+  void emitOptimizationRemarkAnalysis(const char *PassName, Loop *L,
+                                      const Twine &Msg, bool IsVerbose = false);
+
+  /// \brief Same as above but derives the debug location and the code region
+  /// from the debug location and the basic block of \p Inst, respectively.
+  void emitOptimizationRemarkAnalysis(const char *PassName, Instruction *Inst,
+                                      const Twine &Msg,
+                                      bool IsVerbose = false) {
+    emitOptimizationRemarkAnalysis(PassName, Inst->getDebugLoc(),
+                                   Inst->getParent(), Msg, IsVerbose);
+  }
+
+  /// \brief This variant allows specifying what should be emitted for missed
+  /// and analysis remarks in one call.
+  ///
+  /// \p PassName is the name of the pass emitting the message. If
+  /// -Rpass-missed= is given and \p PassName matches the regular expression, \p
+  /// MsgForMissedRemark is emitted.
+  ///
+  /// If -Rpass-analysis= is given and \p PassName matches the regular
+  /// expression, \p MsgForAnalysisRemark is emitted.
+  ///
+  /// The debug location and the code region is derived from \p Inst. If \p
+  /// IsVerbose is true, the message is considered verbose and will only be
+  /// emitted when verbose output is turned on.
+  void emitOptimizationRemarkMissedAndAnalysis(
+      const char *PassName, Instruction *Inst, const Twine &MsgForMissedRemark,
+      const Twine &MsgForAnalysisRemark, bool IsVerbose = false) {
+    emitOptimizationRemarkAnalysis(PassName, Inst, MsgForAnalysisRemark,
+                                   IsVerbose);
+    emitOptimizationRemarkMissed(PassName, Inst, MsgForMissedRemark, IsVerbose);
+  }
+
+  /// \brief Emit an optimization analysis remark related to floating-point
+  /// non-commutativity.
+  ///
+  /// \p PassName is the name of the pass emitting the message. If
+  /// -Rpass-analysis= is given and \p PassName matches the regular expression
+  /// in -Rpass, then the remark will be emitted. \p Fn is the function
+  /// triggering the remark, \p DLoc is the debug location where the diagnostic
+  /// is generated.\p V is the IR Value that identifies the code region.  \p Msg
+  /// is the message string to use.
+  void emitOptimizationRemarkAnalysisFPCommute(const char *PassName,
+                                               const DebugLoc &DLoc,
+                                               const Value *V,
+                                               const Twine &Msg);
+
+  /// \brief Emit an optimization analysis remark related to pointer aliasing.
+  ///
+  /// \p PassName is the name of the pass emitting the message. If
+  /// -Rpass-analysis= is given and \p PassName matches the regular expression
+  /// in -Rpass, then the remark will be emitted. \p Fn is the function
+  /// triggering the remark, \p DLoc is the debug location where the diagnostic
+  /// is generated.\p V is the IR Value that identifies the code region.  \p Msg
+  /// is the message string to use.
+  void emitOptimizationRemarkAnalysisAliasing(const char *PassName,
+                                              const DebugLoc &DLoc,
+                                              const Value *V, const Twine &Msg);
+
+  /// \brief Same as above but derives the IR Value for the code region and the
+  /// debug location from the Loop parameter \p L.
+  void emitOptimizationRemarkAnalysisAliasing(const char *PassName, Loop *L,
+                                              const Twine &Msg);
+
+  /// \brief Whether we allow for extra compile-time budget to perform more
+  /// analysis to produce fewer false positives.
+  ///
+  /// This is useful when reporting missed optimizations.  In this case we can
+  /// use the extra analysis (1) to filter trivial false positives or (2) to
+  /// provide more context so that non-trivial false positives can be quickly
+  /// detected by the user.
+  bool allowExtraAnalysis() const {
+    // For now, only allow this with -fsave-optimization-record since the -Rpass
+    // options are handled in the front-end.
+    return F->getContext().getDiagnosticsOutputFile();
+  }
 
 private:
   Function *F;
 
   BlockFrequencyInfo *BFI;
 
-  Optional<uint64_t> computeHotness(Value *V);
+  /// If we generate BFI on demand, we need to free it when ORE is freed.
+  std::unique_ptr<BlockFrequencyInfo> OwnedBFI;
+
+  /// Compute hotness from IR value (currently assumed to be a block) if PGO is
+  /// available.
+  Optional<uint64_t> computeHotness(const Value *V);
+
+  /// Similar but use value from \p OptDiag and update hotness there.
+  void computeHotness(DiagnosticInfoOptimizationBase &OptDiag);
+
+  /// \brief Only allow verbose messages if we know we're filtering by hotness
+  /// (BFI is only set in this case).
+  bool shouldEmitVerbose() { return BFI != nullptr; }
 
   OptimizationRemarkEmitter(const OptimizationRemarkEmitter &) = delete;
   void operator=(const OptimizationRemarkEmitter &) = delete;
 };
 
+/// \brief Add a small namespace to avoid name clashes with the classes used in
+/// the streaming interface.  We want these to be short for better
+/// write/readability.
+namespace ore {
+using NV = DiagnosticInfoOptimizationBase::Argument;
+using setIsVerbose = DiagnosticInfoOptimizationBase::setIsVerbose;
+using setExtraArgs = DiagnosticInfoOptimizationBase::setExtraArgs;
+}
+
+/// OptimizationRemarkEmitter legacy analysis pass
+///
+/// Note that this pass shouldn't generally be marked as preserved by other
+/// passes.  It's holding onto BFI, so if the pass does not preserve BFI, BFI
+/// could be freed.
 class OptimizationRemarkEmitterWrapperPass : public FunctionPass {
   std::unique_ptr<OptimizationRemarkEmitter> ORE;
 
@@ -91,14 +265,14 @@ public:
 class OptimizationRemarkEmitterAnalysis
     : public AnalysisInfoMixin<OptimizationRemarkEmitterAnalysis> {
   friend AnalysisInfoMixin<OptimizationRemarkEmitterAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
   typedef OptimizationRemarkEmitter Result;
 
   /// \brief Run the analysis pass over a function and produce BFI.
-  Result run(Function &F, AnalysisManager<Function> &AM);
+  Result run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 #endif // LLVM_IR_OPTIMIZATIONDIAGNOSTICINFO_H
diff --git a/contrib/llvm/include/llvm/Analysis/PostDominators.h b/contrib/llvm/include/llvm/Analysis/PostDominators.h
index 99240a40408e..34361dac8c16 100644
--- a/contrib/llvm/include/llvm/Analysis/PostDominators.h
+++ b/contrib/llvm/include/llvm/Analysis/PostDominators.h
@@ -26,21 +26,13 @@ struct PostDominatorTree : public DominatorTreeBase<BasicBlock> {
   typedef DominatorTreeBase<BasicBlock> Base;
 
   PostDominatorTree() : DominatorTreeBase<BasicBlock>(true) {}
-
-  PostDominatorTree(PostDominatorTree &&Arg)
-    : Base(std::move(static_cast<Base &>(Arg))) {}
-
-  PostDominatorTree &operator=(PostDominatorTree &&RHS) {
-    Base::operator=(std::move(static_cast<Base &>(RHS)));
-    return *this;
-  }
 };
 
 /// \brief Analysis pass which computes a \c PostDominatorTree.
 class PostDominatorTreeAnalysis
     : public AnalysisInfoMixin<PostDominatorTreeAnalysis> {
   friend AnalysisInfoMixin<PostDominatorTreeAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
@@ -58,7 +50,7 @@ class PostDominatorTreePrinterPass
 
 public:
   explicit PostDominatorTreePrinterPass(raw_ostream &OS);
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 struct PostDominatorTreeWrapperPass : public FunctionPass {
@@ -89,7 +81,7 @@ FunctionPass* createPostDomTree();
 
 template <> struct GraphTraits<PostDominatorTree*>
   : public GraphTraits<DomTreeNode*> {
-  static NodeType *getEntryNode(PostDominatorTree *DT) {
+  static NodeRef getEntryNode(PostDominatorTree *DT) {
     return DT->getRootNode();
   }
 
diff --git a/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h b/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
index cd624c8404da..d7fe76e278e3 100644
--- a/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -27,6 +27,8 @@
 #include <memory>
 
 namespace llvm {
+class BasicBlock;
+class BlockFrequencyInfo;
 class ProfileSummary;
 /// \brief Analysis providing profile information.
 ///
@@ -51,14 +53,16 @@ public:
   ProfileSummaryInfo(Module &M) : M(M) {}
   ProfileSummaryInfo(ProfileSummaryInfo &&Arg)
       : M(Arg.M), Summary(std::move(Arg.Summary)) {}
+  /// \brief Returns true if \p F has hot function entry.
+  bool isFunctionEntryHot(const Function *F);
+  /// \brief Returns true if \p F has cold function entry.
+  bool isFunctionEntryCold(const Function *F);
   /// \brief Returns true if \p F is a hot function.
-  bool isHotFunction(const Function *F);
-  /// \brief Returns true if \p F is a cold function.
-  bool isColdFunction(const Function *F);
-  /// \brief Returns true if count \p C is considered hot.
   bool isHotCount(uint64_t C);
   /// \brief Returns true if count \p C is considered cold.
   bool isColdCount(uint64_t C);
+  /// \brief Returns true if BasicBlock \p B is considered hot.
+  bool isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI);
 };
 
 /// An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
@@ -69,7 +73,12 @@ public:
   static char ID;
   ProfileSummaryInfoWrapperPass();
 
-  ProfileSummaryInfo *getPSI(Module &M);
+  ProfileSummaryInfo *getPSI() {
+    return &*PSI;
+  }
+
+  bool doInitialization(Module &M) override;
+  bool doFinalization(Module &M) override;
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
   }
@@ -81,21 +90,11 @@ class ProfileSummaryAnalysis
 public:
   typedef ProfileSummaryInfo Result;
 
-  ProfileSummaryAnalysis() {}
-  ProfileSummaryAnalysis(const ProfileSummaryAnalysis &Arg) {}
-  ProfileSummaryAnalysis(ProfileSummaryAnalysis &&Arg) {}
-  ProfileSummaryAnalysis &operator=(const ProfileSummaryAnalysis &RHS) {
-    return *this;
-  }
-  ProfileSummaryAnalysis &operator=(ProfileSummaryAnalysis &&RHS) {
-    return *this;
-  }
-
   Result run(Module &M, ModuleAnalysisManager &);
 
 private:
   friend AnalysisInfoMixin<ProfileSummaryAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 };
 
 /// \brief Printer pass that uses \c ProfileSummaryAnalysis.
@@ -105,7 +104,7 @@ class ProfileSummaryPrinterPass
 
 public:
   explicit ProfileSummaryPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfo.h b/contrib/llvm/include/llvm/Analysis/RegionInfo.h
index 91bfd435f08c..f2f27a137a88 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionInfo.h
@@ -39,6 +39,7 @@
 
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/PassManager.h"
@@ -278,7 +279,7 @@ class RegionBase : public RegionNodeBase<Tr> {
   // The subregions of this region.
   RegionSet children;
 
-  typedef std::map<BlockT *, RegionNodeT *> BBNodeMapT;
+  typedef std::map<BlockT *, std::unique_ptr<RegionNodeT>> BBNodeMapT;
 
   // Save the BasicBlock RegionNodes that are element of this Region.
   mutable BBNodeMapT BBNodeMap;
@@ -567,10 +568,10 @@ public:
 
   public:
     typedef block_iterator_wrapper<IsConst> Self;
-    typedef typename super::pointer pointer;
+    typedef typename super::value_type value_type;
 
     // Construct the begin iterator.
-    block_iterator_wrapper(pointer Entry, pointer Exit)
+    block_iterator_wrapper(value_type Entry, value_type Exit)
         : super(df_begin(Entry)) {
       // Mark the exit of the region as visited, so that the children of the
       // exit and the exit itself, i.e. the block outside the region will never
@@ -579,7 +580,7 @@ public:
     }
 
     // Construct the end iterator.
-    block_iterator_wrapper() : super(df_end<pointer>((BlockT *)nullptr)) {}
+    block_iterator_wrapper() : super(df_end<value_type>((BlockT *)nullptr)) {}
 
     /*implicit*/ block_iterator_wrapper(super I) : super(I) {}
 
@@ -625,18 +626,26 @@ public:
   /// are direct children of this Region. It does not iterate over any
   /// RegionNodes that are also element of a subregion of this Region.
   //@{
-  typedef df_iterator<RegionNodeT *, SmallPtrSet<RegionNodeT *, 8>, false,
-                      GraphTraits<RegionNodeT *>> element_iterator;
+  typedef df_iterator<RegionNodeT *, df_iterator_default_set<RegionNodeT *>,
+                      false, GraphTraits<RegionNodeT *>>
+      element_iterator;
 
-  typedef df_iterator<const RegionNodeT *, SmallPtrSet<const RegionNodeT *, 8>,
-                      false,
-                      GraphTraits<const RegionNodeT *>> const_element_iterator;
+  typedef df_iterator<const RegionNodeT *,
+                      df_iterator_default_set<const RegionNodeT *>, false,
+                      GraphTraits<const RegionNodeT *>>
+      const_element_iterator;
 
   element_iterator element_begin();
   element_iterator element_end();
+  iterator_range<element_iterator> elements() {
+    return make_range(element_begin(), element_end());
+  }
 
   const_element_iterator element_begin() const;
   const_element_iterator element_end() const;
+  iterator_range<const_element_iterator> elements() const {
+    return make_range(element_begin(), element_end());
+  }
   //@}
 };
 
@@ -669,7 +678,6 @@ class RegionInfoBase {
   friend class MachineRegionInfo;
   typedef DenseMap<BlockT *, BlockT *> BBtoBBMap;
   typedef DenseMap<BlockT *, RegionT *> BBtoRegionMap;
-  typedef SmallPtrSet<RegionT *, 4> RegionSet;
 
   RegionInfoBase();
   virtual ~RegionInfoBase();
@@ -925,12 +933,12 @@ public:
 /// \brief Analysis pass that exposes the \c RegionInfo for a function.
 class RegionInfoAnalysis : public AnalysisInfoMixin<RegionInfoAnalysis> {
   friend AnalysisInfoMixin<RegionInfoAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef RegionInfo Result;
 
-  RegionInfo run(Function &F, AnalysisManager<Function> &AM);
+  RegionInfo run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for the \c RegionInfo.
@@ -939,12 +947,12 @@ class RegionInfoPrinterPass : public PassInfoMixin<RegionInfoPrinterPass> {
 
 public:
   explicit RegionInfoPrinterPass(raw_ostream &OS);
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Verifier pass for the \c RegionInfo.
 struct RegionInfoVerifierPass : PassInfoMixin<RegionInfoVerifierPass> {
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 template <>
diff --git a/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h b/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h
index 15dd1a2000e6..a16c534484b3 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionInfoImpl.h
@@ -38,12 +38,6 @@ RegionBase<Tr>::RegionBase(BlockT *Entry, BlockT *Exit,
 
 template <class Tr>
 RegionBase<Tr>::~RegionBase() {
-  // Free the cached nodes.
-  for (typename BBNodeMapT::iterator it = BBNodeMap.begin(),
-                                     ie = BBNodeMap.end();
-       it != ie; ++it)
-    delete it->second;
-
   // Only clean the cache for this Region. Caches of child Regions will be
   // cleaned when the child Regions are deleted.
   BBNodeMap.clear();
@@ -71,10 +65,9 @@ void RegionBase<Tr>::replaceEntryRecursive(BlockT *NewEntry) {
     RegionQueue.pop_back();
 
     R->replaceEntry(NewEntry);
-    for (typename RegionT::const_iterator RI = R->begin(), RE = R->end();
-         RI != RE; ++RI) {
-      if ((*RI)->getEntry() == OldEntry)
-        RegionQueue.push_back(RI->get());
+    for (std::unique_ptr<RegionT> &Child : *R) {
+      if (Child->getEntry() == OldEntry)
+        RegionQueue.push_back(Child.get());
     }
   }
 }
@@ -90,10 +83,9 @@ void RegionBase<Tr>::replaceExitRecursive(BlockT *NewExit) {
     RegionQueue.pop_back();
 
     R->replaceExit(NewExit);
-    for (typename RegionT::const_iterator RI = R->begin(), RE = R->end();
-         RI != RE; ++RI) {
-      if ((*RI)->getExit() == OldExit)
-        RegionQueue.push_back(RI->get());
+    for (std::unique_ptr<RegionT> &Child : *R) {
+      if (Child->getExit() == OldExit)
+        RegionQueue.push_back(Child.get());
     }
   }
 }
@@ -160,13 +152,10 @@ typename Tr::LoopT *RegionBase<Tr>::outermostLoopInRegion(LoopInfoT *LI,
 template <class Tr>
 typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getEnteringBlock() const {
   BlockT *entry = getEntry();
-  BlockT *Pred;
   BlockT *enteringBlock = nullptr;
 
-  for (PredIterTy PI = InvBlockTraits::child_begin(entry),
-                  PE = InvBlockTraits::child_end(entry);
-       PI != PE; ++PI) {
-    Pred = *PI;
+  for (BlockT *Pred : make_range(InvBlockTraits::child_begin(entry),
+                                 InvBlockTraits::child_end(entry))) {
     if (DT->getNode(Pred) && !contains(Pred)) {
       if (enteringBlock)
         return nullptr;
@@ -181,16 +170,13 @@ typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getEnteringBlock() const {
 template <class Tr>
 typename RegionBase<Tr>::BlockT *RegionBase<Tr>::getExitingBlock() const {
   BlockT *exit = getExit();
-  BlockT *Pred;
   BlockT *exitingBlock = nullptr;
 
   if (!exit)
     return nullptr;
 
-  for (PredIterTy PI = InvBlockTraits::child_begin(exit),
-                  PE = InvBlockTraits::child_end(exit);
-       PI != PE; ++PI) {
-    Pred = *PI;
+  for (BlockT *Pred : make_range(InvBlockTraits::child_begin(exit),
+                                 InvBlockTraits::child_end(exit))) {
     if (contains(Pred)) {
       if (exitingBlock)
         return nullptr;
@@ -239,19 +225,17 @@ void RegionBase<Tr>::verifyBBInRegion(BlockT *BB) const {
 
   BlockT *entry = getEntry(), *exit = getExit();
 
-  for (SuccIterTy SI = BlockTraits::child_begin(BB),
-                  SE = BlockTraits::child_end(BB);
-       SI != SE; ++SI) {
-    if (!contains(*SI) && exit != *SI)
+  for (BlockT *Succ :
+       make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) {
+    if (!contains(Succ) && exit != Succ)
       llvm_unreachable("Broken region found: edges leaving the region must go "
                        "to the exit node!");
   }
 
   if (entry != BB) {
-    for (PredIterTy SI = InvBlockTraits::child_begin(BB),
-                    SE = InvBlockTraits::child_end(BB);
-         SI != SE; ++SI) {
-      if (!contains(*SI))
+    for (BlockT *Pred : make_range(InvBlockTraits::child_begin(BB),
+                                   InvBlockTraits::child_end(BB))) {
+      if (!contains(Pred))
         llvm_unreachable("Broken region found: edges entering the region must "
                          "go to the entry node!");
     }
@@ -266,11 +250,10 @@ void RegionBase<Tr>::verifyWalk(BlockT *BB, std::set<BlockT *> *visited) const {
 
   verifyBBInRegion(BB);
 
-  for (SuccIterTy SI = BlockTraits::child_begin(BB),
-                  SE = BlockTraits::child_end(BB);
-       SI != SE; ++SI) {
-    if (*SI != exit && visited->find(*SI) == visited->end())
-      verifyWalk(*SI, visited);
+  for (BlockT *Succ :
+       make_range(BlockTraits::child_begin(BB), BlockTraits::child_end(BB))) {
+    if (Succ != exit && visited->find(Succ) == visited->end())
+      verifyWalk(Succ, visited);
   }
 }
 
@@ -288,9 +271,8 @@ void RegionBase<Tr>::verifyRegion() const {
 
 template <class Tr>
 void RegionBase<Tr>::verifyRegionNest() const {
-  for (typename RegionT::const_iterator RI = begin(), RE = end(); RI != RE;
-       ++RI)
-    (*RI)->verifyRegionNest();
+  for (const std::unique_ptr<RegionT> &R : *this)
+    R->verifyRegionNest();
 
   verifyRegion();
 }
@@ -345,13 +327,13 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getBBNode(BlockT *BB) const {
 
   typename BBNodeMapT::const_iterator at = BBNodeMap.find(BB);
 
-  if (at != BBNodeMap.end())
-    return at->second;
-
-  auto Deconst = const_cast<RegionBase<Tr> *>(this);
-  RegionNodeT *NewNode = new RegionNodeT(static_cast<RegionT *>(Deconst), BB);
-  BBNodeMap.insert(std::make_pair(BB, NewNode));
-  return NewNode;
+  if (at == BBNodeMap.end()) {
+    auto Deconst = const_cast<RegionBase<Tr> *>(this);
+    typename BBNodeMapT::value_type V = {
+        BB, make_unique<RegionNodeT>(static_cast<RegionT *>(Deconst), BB)};
+    at = BBNodeMap.insert(std::move(V)).first;
+  }
+  return at->second.get();
 }
 
 template <class Tr>
@@ -365,9 +347,9 @@ typename Tr::RegionNodeT *RegionBase<Tr>::getNode(BlockT *BB) const {
 
 template <class Tr>
 void RegionBase<Tr>::transferChildrenTo(RegionT *To) {
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    (*I)->parent = To;
-    To->children.push_back(std::move(*I));
+  for (std::unique_ptr<RegionT> &R : *this) {
+    R->parent = To;
+    To->children.push_back(std::move(R));
   }
   children.clear();
 }
@@ -375,9 +357,10 @@ void RegionBase<Tr>::transferChildrenTo(RegionT *To) {
 template <class Tr>
 void RegionBase<Tr>::addSubRegion(RegionT *SubRegion, bool moveChildren) {
   assert(!SubRegion->parent && "SubRegion already has a parent!");
-  assert(std::find_if(begin(), end(), [&](const std::unique_ptr<RegionT> &R) {
-           return R.get() == SubRegion;
-         }) == children.end() &&
+  assert(find_if(*this,
+                 [&](const std::unique_ptr<RegionT> &R) {
+                   return R.get() == SubRegion;
+                 }) == children.end() &&
          "Subregion already exists!");
 
   SubRegion->parent = static_cast<RegionT *>(this);
@@ -389,9 +372,9 @@ void RegionBase<Tr>::addSubRegion(RegionT *SubRegion, bool moveChildren) {
   assert(SubRegion->children.empty() &&
          "SubRegions that contain children are not supported");
 
-  for (element_iterator I = element_begin(), E = element_end(); I != E; ++I) {
-    if (!(*I)->isSubRegion()) {
-      BlockT *BB = (*I)->template getNodeAs<BlockT>();
+  for (RegionNodeT *Element : elements()) {
+    if (!Element->isSubRegion()) {
+      BlockT *BB = Element->template getNodeAs<BlockT>();
 
       if (SubRegion->contains(BB))
         RI->setRegionFor(BB, SubRegion);
@@ -399,12 +382,12 @@ void RegionBase<Tr>::addSubRegion(RegionT *SubRegion, bool moveChildren) {
   }
 
   std::vector<std::unique_ptr<RegionT>> Keep;
-  for (iterator I = begin(), E = end(); I != E; ++I) {
-    if (SubRegion->contains(I->get()) && I->get() != SubRegion) {
-      (*I)->parent = SubRegion;
-      SubRegion->children.push_back(std::move(*I));
+  for (std::unique_ptr<RegionT> &R : *this) {
+    if (SubRegion->contains(R.get()) && R.get() != SubRegion) {
+      R->parent = SubRegion;
+      SubRegion->children.push_back(std::move(R));
     } else
-      Keep.push_back(std::move(*I));
+      Keep.push_back(std::move(R));
   }
 
   children.clear();
@@ -418,9 +401,10 @@ template <class Tr>
 typename Tr::RegionT *RegionBase<Tr>::removeSubRegion(RegionT *Child) {
   assert(Child->parent == this && "Child is not a child of this region!");
   Child->parent = nullptr;
-  typename RegionSet::iterator I = std::find_if(
-      children.begin(), children.end(),
-      [&](const std::unique_ptr<RegionT> &R) { return R.get() == Child; });
+  typename RegionSet::iterator I =
+      find_if(children, [&](const std::unique_ptr<RegionT> &R) {
+        return R.get() == Child;
+      });
   assert(I != children.end() && "Region does not exit. Unable to remove.");
   children.erase(children.begin() + (I - begin()));
   return Child;
@@ -446,10 +430,9 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const {
   RegionT *R = RI->getRegionFor(exit);
 
   if (R->getEntry() != exit) {
-    for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
-                    PE = InvBlockTraits::child_end(getExit());
-         PI != PE; ++PI)
-      if (!contains(*PI))
+    for (BlockT *Pred : make_range(InvBlockTraits::child_begin(getExit()),
+                                   InvBlockTraits::child_end(getExit())))
+      if (!contains(Pred))
         return nullptr;
     if (Tr::getNumSuccessors(exit) == 1)
       return new RegionT(getEntry(), *BlockTraits::child_begin(exit), RI, DT);
@@ -459,10 +442,9 @@ typename Tr::RegionT *RegionBase<Tr>::getExpandedRegion() const {
   while (R->getParent() && R->getParent()->getEntry() == exit)
     R = R->getParent();
 
-  for (PredIterTy PI = InvBlockTraits::child_begin(getExit()),
-                  PE = InvBlockTraits::child_end(getExit());
-       PI != PE; ++PI) {
-    if (!(contains(*PI) || R->contains(*PI)))
+  for (BlockT *Pred : make_range(InvBlockTraits::child_begin(getExit()),
+                                 InvBlockTraits::child_end(getExit()))) {
+    if (!(contains(Pred) || R->contains(Pred)))
       return nullptr;
   }
 
@@ -487,9 +469,8 @@ void RegionBase<Tr>::print(raw_ostream &OS, bool print_tree, unsigned level,
       for (const auto *BB : blocks())
         OS << BB->getName() << ", "; // TODO: remove the last ","
     } else if (Style == PrintRN) {
-      for (const_element_iterator I = element_begin(), E = element_end();
-           I != E; ++I) {
-        OS << **I << ", "; // TODO: remove the last ",
+      for (const RegionNodeT *Element : elements()) {
+        OS << *Element << ", "; // TODO: remove the last ",
       }
     }
 
@@ -497,8 +478,8 @@ void RegionBase<Tr>::print(raw_ostream &OS, bool print_tree, unsigned level,
   }
 
   if (print_tree) {
-    for (const_iterator RI = begin(), RE = end(); RI != RE; ++RI)
-      (*RI)->print(OS, print_tree, level + 1, Style);
+    for (const std::unique_ptr<RegionT> &R : *this)
+      R->print(OS, print_tree, level + 1, Style);
   }
 
   if (Style != PrintNone)
@@ -514,15 +495,9 @@ void RegionBase<Tr>::dump() const {
 
 template <class Tr>
 void RegionBase<Tr>::clearNodeCache() {
-  // Free the cached nodes.
-  for (typename BBNodeMapT::iterator I = BBNodeMap.begin(),
-                                     IE = BBNodeMap.end();
-       I != IE; ++I)
-    delete I->second;
-
   BBNodeMap.clear();
-  for (typename RegionT::iterator RI = begin(), RE = end(); RI != RE; ++RI)
-    (*RI)->clearNodeCache();
+  for (std::unique_ptr<RegionT> &R : *this)
+    R->clearNodeCache();
 }
 
 //===----------------------------------------------------------------------===//
@@ -541,12 +516,12 @@ RegionInfoBase<Tr>::~RegionInfoBase() {
 template <class Tr>
 void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
   assert(R && "Re must be non-null");
-  for (auto I = R->element_begin(), E = R->element_end(); I != E; ++I) {
-    if (I->isSubRegion()) {
-      const RegionT *SR = I->template getNodeAs<RegionT>();
+  for (const typename Tr::RegionNodeT *Element : R->elements()) {
+    if (Element->isSubRegion()) {
+      const RegionT *SR = Element->template getNodeAs<RegionT>();
       verifyBBMap(SR);
     } else {
-      BlockT *BB = I->template getNodeAs<BlockT>();
+      BlockT *BB = Element->template getNodeAs<BlockT>();
       if (getRegionFor(BB) != R)
         llvm_unreachable("BB map does not match region nesting");
     }
@@ -556,10 +531,8 @@ void RegionInfoBase<Tr>::verifyBBMap(const RegionT *R) const {
 template <class Tr>
 bool RegionInfoBase<Tr>::isCommonDomFrontier(BlockT *BB, BlockT *entry,
                                              BlockT *exit) const {
-  for (PredIterTy PI = InvBlockTraits::child_begin(BB),
-                  PE = InvBlockTraits::child_end(BB);
-       PI != PE; ++PI) {
-    BlockT *P = *PI;
+  for (BlockT *P : make_range(InvBlockTraits::child_begin(BB),
+                              InvBlockTraits::child_end(BB))) {
     if (DT->dominates(entry, P) && !DT->dominates(exit, P))
       return false;
   }
@@ -590,20 +563,18 @@ bool RegionInfoBase<Tr>::isRegion(BlockT *entry, BlockT *exit) const {
   DST *exitSuccs = &DF->find(exit)->second;
 
   // Do not allow edges leaving the region.
-  for (typename DST::iterator SI = entrySuccs->begin(), SE = entrySuccs->end();
-       SI != SE; ++SI) {
-    if (*SI == exit || *SI == entry)
+  for (BlockT *Succ : *entrySuccs) {
+    if (Succ == exit || Succ == entry)
       continue;
-    if (exitSuccs->find(*SI) == exitSuccs->end())
+    if (exitSuccs->find(Succ) == exitSuccs->end())
       return false;
-    if (!isCommonDomFrontier(*SI, entry, exit))
+    if (!isCommonDomFrontier(Succ, entry, exit))
       return false;
   }
 
   // Do not allow edges pointing into the region.
-  for (typename DST::iterator SI = exitSuccs->begin(), SE = exitSuccs->end();
-       SI != SE; ++SI) {
-    if (DT->properlyDominates(entry, *SI) && *SI != exit)
+  for (BlockT *Succ : *exitSuccs) {
+    if (DT->properlyDominates(entry, Succ) && Succ != exit)
       return false;
   }
 
@@ -663,7 +634,7 @@ typename Tr::RegionT *RegionInfoBase<Tr>::createRegion(BlockT *entry,
 
   RegionT *region =
       new RegionT(entry, exit, static_cast<RegionInfoT *>(this), DT);
-  BBtoRegion.insert(std::make_pair(entry, region));
+  BBtoRegion.insert({entry, region});
 
 #ifdef EXPENSIVE_CHECKS
   region->verifyRegion();
@@ -758,9 +729,8 @@ void RegionInfoBase<Tr>::buildRegionsTree(DomTreeNodeT *N, RegionT *region) {
     BBtoRegion[BB] = region;
   }
 
-  for (typename DomTreeNodeT::iterator CI = N->begin(), CE = N->end(); CI != CE;
-       ++CI) {
-    buildRegionsTree(*CI, region);
+  for (DomTreeNodeBase<BlockT> *C : *N) {
+    buildRegionsTree(C, region);
   }
 }
 
@@ -850,10 +820,9 @@ RegionInfoBase<Tr>::getMaxRegionExit(BlockT *BB) const {
            ExitR->getParent()->getEntry() == Exit)
       ExitR = ExitR->getParent();
 
-    for (PredIterTy PI = InvBlockTraits::child_begin(Exit),
-                    PE = InvBlockTraits::child_end(Exit);
-         PI != PE; ++PI) {
-      if (!R->contains(*PI) && !ExitR->contains(*PI))
+    for (BlockT *Pred : make_range(InvBlockTraits::child_begin(Exit),
+                                   InvBlockTraits::child_end(Exit))) {
+      if (!R->contains(Pred) && !ExitR->contains(Pred))
         break;
     }
 
diff --git a/contrib/llvm/include/llvm/Analysis/RegionIterator.h b/contrib/llvm/include/llvm/Analysis/RegionIterator.h
index ced58dfabdd5..de2f3bf3f12b 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionIterator.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionIterator.h
@@ -30,10 +30,10 @@ namespace llvm {
 ///
 /// For a subregion RegionNode there is just one successor. The RegionNode
 /// representing the exit of the subregion.
-template<class NodeType, class BlockT, class RegionT>
-class RNSuccIterator : public std::iterator<std::forward_iterator_tag,
-                                           NodeType, ptrdiff_t> {
-  typedef std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> super;
+template <class NodeRef, class BlockT, class RegionT>
+class RNSuccIterator
+    : public std::iterator<std::forward_iterator_tag, NodeRef> {
+  typedef std::iterator<std::forward_iterator_tag, NodeRef> super;
 
   typedef GraphTraits<BlockT*> BlockTraits;
   typedef typename BlockTraits::ChildIteratorType SuccIterTy;
@@ -49,8 +49,14 @@ class RNSuccIterator : public std::iterator<std::forward_iterator_tag,
     ItRgEnd    // At the end of the regionnode successor.
   };
 
+  static_assert(std::is_pointer<NodeRef>::value,
+                "FIXME: Currently RNSuccIterator only supports NodeRef as "
+                "pointers due to the use of pointer-specific data structures "
+                "(e.g. PointerIntPair and SmallPtrSet) internally. Generalize "
+                "it to support non-pointer types");
+
   // Use two bit to represent the mode iterator.
-  PointerIntPair<NodeType*, 2, ItMode> Node;
+  PointerIntPair<NodeRef, 2, ItMode> Node;
 
   // The block successor iterator.
   SuccIterTy BItor;
@@ -62,15 +68,15 @@ class RNSuccIterator : public std::iterator<std::forward_iterator_tag,
     Node.setInt(ItRgEnd);
   }
 
-  NodeType* getNode() const{ return Node.getPointer(); }
+  NodeRef getNode() const { return Node.getPointer(); }
 
   // isRegionMode - Is the current iterator in region mode?
   bool isRegionMode() const { return Node.getInt() != ItBB; }
 
   // Get the immediate successor. This function may return a Basic Block
   // RegionNode or a subregion RegionNode.
-  NodeType* getISucc(BlockT* BB) const {
-    NodeType *succ;
+  NodeRef getISucc(BlockT *BB) const {
+    NodeRef succ;
     succ = getNode()->getParent()->getNode(BB);
     assert(succ && "BB not in Region or entered subregion!");
     return succ;
@@ -87,14 +93,14 @@ class RNSuccIterator : public std::iterator<std::forward_iterator_tag,
     return getNode()->getParent()->getExit() == BB;
   }
 public:
-  typedef RNSuccIterator<NodeType, BlockT, RegionT> Self;
+  typedef RNSuccIterator<NodeRef, BlockT, RegionT> Self;
 
-  typedef typename super::pointer pointer;
+  typedef typename super::value_type value_type;
 
   /// @brief Create begin iterator of a RegionNode.
-  inline RNSuccIterator(NodeType* node)
-    : Node(node, node->isSubRegion() ? ItRgBegin : ItBB),
-      BItor(BlockTraits::child_begin(node->getEntry())) {
+  inline RNSuccIterator(NodeRef node)
+      : Node(node, node->isSubRegion() ? ItRgBegin : ItBB),
+        BItor(BlockTraits::child_begin(node->getEntry())) {
 
     // Skip the exit block
     if (!isRegionMode())
@@ -106,9 +112,9 @@ public:
   }
 
   /// @brief Create an end iterator.
-  inline RNSuccIterator(NodeType* node, bool)
-    : Node(node, node->isSubRegion() ? ItRgEnd : ItBB),
-      BItor(BlockTraits::child_end(node->getEntry())) {}
+  inline RNSuccIterator(NodeRef node, bool)
+      : Node(node, node->isSubRegion() ? ItRgEnd : ItBB),
+        BItor(BlockTraits::child_end(node->getEntry())) {}
 
   inline bool operator==(const Self& x) const {
     assert(isRegionMode() == x.isRegionMode() && "Broken iterator!");
@@ -120,7 +126,7 @@ public:
 
   inline bool operator!=(const Self& x) const { return !operator==(x); }
 
-  inline pointer operator*() const {
+  inline value_type operator*() const {
     BlockT *BB = isRegionMode() ? getRegionSucc() : *BItor;
     assert(!isExit(BB) && "Iterator out of range!");
     return getISucc(BB);
@@ -154,43 +160,41 @@ public:
 /// The Flat Region iterator will iterate over all BasicBlock RegionNodes that
 /// are contained in the Region and its subregions. This is close to a virtual
 /// control flow graph of the Region.
-template<class NodeType, class BlockT, class RegionT>
-class RNSuccIterator<FlatIt<NodeType>, BlockT, RegionT>
-  : public std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> {
-  typedef std::iterator<std::forward_iterator_tag, NodeType, ptrdiff_t> super;
+template <class NodeRef, class BlockT, class RegionT>
+class RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>
+    : public std::iterator<std::forward_iterator_tag, NodeRef> {
+  typedef std::iterator<std::forward_iterator_tag, NodeRef> super;
   typedef GraphTraits<BlockT*> BlockTraits;
   typedef typename BlockTraits::ChildIteratorType SuccIterTy;
 
-  NodeType* Node;
+  NodeRef Node;
   SuccIterTy Itor;
 
 public:
-  typedef RNSuccIterator<FlatIt<NodeType>, BlockT, RegionT> Self;
-  typedef typename super::pointer pointer;
+  typedef RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT> Self;
+  typedef typename super::value_type value_type;
 
   /// @brief Create the iterator from a RegionNode.
   ///
   /// Note that the incoming node must be a bb node, otherwise it will trigger
   /// an assertion when we try to get a BasicBlock.
-  inline RNSuccIterator(NodeType* node) :
-    Node(node),
-    Itor(BlockTraits::child_begin(node->getEntry())) {
-      assert(!Node->isSubRegion()
-             && "Subregion node not allowed in flat iterating mode!");
-      assert(Node->getParent() && "A BB node must have a parent!");
-
-      // Skip the exit block of the iterating region.
-      while (BlockTraits::child_end(Node->getEntry()) != Itor
-          && Node->getParent()->getExit() == *Itor)
-        ++Itor;
+  inline RNSuccIterator(NodeRef node)
+      : Node(node), Itor(BlockTraits::child_begin(node->getEntry())) {
+    assert(!Node->isSubRegion() &&
+           "Subregion node not allowed in flat iterating mode!");
+    assert(Node->getParent() && "A BB node must have a parent!");
+
+    // Skip the exit block of the iterating region.
+    while (BlockTraits::child_end(Node->getEntry()) != Itor &&
+           Node->getParent()->getExit() == *Itor)
+      ++Itor;
   }
 
   /// @brief Create an end iterator
-  inline RNSuccIterator(NodeType* node, bool) :
-    Node(node),
-    Itor(BlockTraits::child_end(node->getEntry())) {
-      assert(!Node->isSubRegion()
-             && "Subregion node not allowed in flat iterating mode!");
+  inline RNSuccIterator(NodeRef node, bool)
+      : Node(node), Itor(BlockTraits::child_end(node->getEntry())) {
+    assert(!Node->isSubRegion() &&
+           "Subregion node not allowed in flat iterating mode!");
   }
 
   inline bool operator==(const Self& x) const {
@@ -202,7 +206,7 @@ public:
 
   inline bool operator!=(const Self& x) const { return !operator==(x); }
 
-  inline pointer operator*() const {
+  inline value_type operator*() const {
     BlockT *BB = *Itor;
 
     // Get the iterating region.
@@ -232,14 +236,14 @@ public:
   }
 };
 
-template<class NodeType, class BlockT, class RegionT>
-inline RNSuccIterator<NodeType, BlockT, RegionT> succ_begin(NodeType* Node) {
-  return RNSuccIterator<NodeType, BlockT, RegionT>(Node);
+template <class NodeRef, class BlockT, class RegionT>
+inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_begin(NodeRef Node) {
+  return RNSuccIterator<NodeRef, BlockT, RegionT>(Node);
 }
 
-template<class NodeType, class BlockT, class RegionT>
-inline RNSuccIterator<NodeType, BlockT, RegionT> succ_end(NodeType* Node) {
-  return RNSuccIterator<NodeType, BlockT, RegionT>(Node, true);
+template <class NodeRef, class BlockT, class RegionT>
+inline RNSuccIterator<NodeRef, BlockT, RegionT> succ_end(NodeRef Node) {
+  return RNSuccIterator<NodeRef, BlockT, RegionT>(Node, true);
 }
 
 //===--------------------------------------------------------------------===//
@@ -249,58 +253,60 @@ inline RNSuccIterator<NodeType, BlockT, RegionT> succ_end(NodeType* Node) {
 // NodeT can either be region node or const region node, otherwise child_begin
 // and child_end fail.
 
-#define RegionNodeGraphTraits(NodeT, BlockT, RegionT)   \
-  template<> struct GraphTraits<NodeT*> {      \
-  typedef NodeT NodeType; \
-  typedef RNSuccIterator<NodeType, BlockT, RegionT> ChildIteratorType;  \
-  static NodeType *getEntryNode(NodeType* N) { return N; } \
-  static inline ChildIteratorType child_begin(NodeType *N) { \
-    return RNSuccIterator<NodeType, BlockT, RegionT>(N);             \
-  } \
-  static inline ChildIteratorType child_end(NodeType *N) { \
-    return RNSuccIterator<NodeType, BlockT, RegionT>(N, true);     \
-  } \
-}; \
-template<> struct GraphTraits<FlatIt<NodeT*>> {  \
-  typedef NodeT NodeType; \
-  typedef RNSuccIterator<FlatIt<NodeT>, BlockT, RegionT > ChildIteratorType;    \
-  static NodeType *getEntryNode(NodeType* N) { return N; } \
-  static inline ChildIteratorType child_begin(NodeType *N) { \
-    return RNSuccIterator<FlatIt<NodeType>, BlockT, RegionT>(N); \
-  } \
-  static inline ChildIteratorType child_end(NodeType *N) { \
-    return RNSuccIterator<FlatIt<NodeType>, BlockT, RegionT>(N, true); \
-  } \
-}
+#define RegionNodeGraphTraits(NodeT, BlockT, RegionT)                          \
+  template <> struct GraphTraits<NodeT *> {                                    \
+    typedef NodeT *NodeRef;                                                    \
+    typedef RNSuccIterator<NodeRef, BlockT, RegionT> ChildIteratorType;        \
+    static NodeRef getEntryNode(NodeRef N) { return N; }                       \
+    static inline ChildIteratorType child_begin(NodeRef N) {                   \
+      return RNSuccIterator<NodeRef, BlockT, RegionT>(N);                      \
+    }                                                                          \
+    static inline ChildIteratorType child_end(NodeRef N) {                     \
+      return RNSuccIterator<NodeRef, BlockT, RegionT>(N, true);                \
+    }                                                                          \
+  };                                                                           \
+  template <> struct GraphTraits<FlatIt<NodeT *>> {                            \
+    typedef NodeT *NodeRef;                                                    \
+    typedef RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>                   \
+        ChildIteratorType;                                                     \
+    static NodeRef getEntryNode(NodeRef N) { return N; }                       \
+    static inline ChildIteratorType child_begin(NodeRef N) {                   \
+      return RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>(N);              \
+    }                                                                          \
+    static inline ChildIteratorType child_end(NodeRef N) {                     \
+      return RNSuccIterator<FlatIt<NodeRef>, BlockT, RegionT>(N, true);        \
+    }                                                                          \
+  }
 
-#define RegionGraphTraits(RegionT, NodeT) \
-template<> struct GraphTraits<RegionT*> \
-  : public GraphTraits<NodeT*> { \
-  typedef df_iterator<NodeType*> nodes_iterator; \
-  static NodeType *getEntryNode(RegionT* R) { \
-    return R->getNode(R->getEntry()); \
-  } \
-  static nodes_iterator nodes_begin(RegionT* R) { \
-    return nodes_iterator::begin(getEntryNode(R)); \
-  } \
-  static nodes_iterator nodes_end(RegionT* R) { \
-    return nodes_iterator::end(getEntryNode(R)); \
-  } \
-}; \
-template<> struct GraphTraits<FlatIt<RegionT*> > \
-  : public GraphTraits<FlatIt<NodeT*> > { \
-  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false, \
-  GraphTraits<FlatIt<NodeType*> > > nodes_iterator; \
-  static NodeType *getEntryNode(RegionT* R) { \
-    return R->getBBNode(R->getEntry()); \
-  } \
-  static nodes_iterator nodes_begin(RegionT* R) { \
-    return nodes_iterator::begin(getEntryNode(R)); \
-  } \
-  static nodes_iterator nodes_end(RegionT* R) { \
-    return nodes_iterator::end(getEntryNode(R)); \
-  } \
-}
+#define RegionGraphTraits(RegionT, NodeT)                                      \
+  template <> struct GraphTraits<RegionT *> : public GraphTraits<NodeT *> {    \
+    typedef df_iterator<NodeRef> nodes_iterator;                               \
+    static NodeRef getEntryNode(RegionT *R) {                                  \
+      return R->getNode(R->getEntry());                                        \
+    }                                                                          \
+    static nodes_iterator nodes_begin(RegionT *R) {                            \
+      return nodes_iterator::begin(getEntryNode(R));                           \
+    }                                                                          \
+    static nodes_iterator nodes_end(RegionT *R) {                              \
+      return nodes_iterator::end(getEntryNode(R));                             \
+    }                                                                          \
+  };                                                                           \
+  template <>                                                                  \
+  struct GraphTraits<FlatIt<RegionT *>>                                        \
+      : public GraphTraits<FlatIt<NodeT *>> {                                  \
+    typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,      \
+                        GraphTraits<FlatIt<NodeRef>>>                          \
+        nodes_iterator;                                                        \
+    static NodeRef getEntryNode(RegionT *R) {                                  \
+      return R->getBBNode(R->getEntry());                                      \
+    }                                                                          \
+    static nodes_iterator nodes_begin(RegionT *R) {                            \
+      return nodes_iterator::begin(getEntryNode(R));                           \
+    }                                                                          \
+    static nodes_iterator nodes_end(RegionT *R) {                              \
+      return nodes_iterator::end(getEntryNode(R));                             \
+    }                                                                          \
+  }
 
 RegionNodeGraphTraits(RegionNode, BasicBlock, Region);
 RegionNodeGraphTraits(const RegionNode, BasicBlock, Region);
@@ -310,10 +316,11 @@ RegionGraphTraits(const Region, const RegionNode);
 
 template <> struct GraphTraits<RegionInfo*>
   : public GraphTraits<FlatIt<RegionNode*> > {
-  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false,
-                      GraphTraits<FlatIt<NodeType*> > > nodes_iterator;
+  typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
+                      GraphTraits<FlatIt<NodeRef>>>
+      nodes_iterator;
 
-  static NodeType *getEntryNode(RegionInfo *RI) {
+  static NodeRef getEntryNode(RegionInfo *RI) {
     return GraphTraits<FlatIt<Region*> >::getEntryNode(RI->getTopLevelRegion());
   }
   static nodes_iterator nodes_begin(RegionInfo* RI) {
@@ -326,10 +333,11 @@ template <> struct GraphTraits<RegionInfo*>
 
 template <> struct GraphTraits<RegionInfoPass*>
   : public GraphTraits<RegionInfo *> {
-  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false,
-                      GraphTraits<FlatIt<NodeType*> > > nodes_iterator;
+  typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
+                      GraphTraits<FlatIt<NodeRef>>>
+      nodes_iterator;
 
-  static NodeType *getEntryNode(RegionInfoPass *RI) {
+  static NodeRef getEntryNode(RegionInfoPass *RI) {
     return GraphTraits<RegionInfo*>::getEntryNode(&RI->getRegionInfo());
   }
   static nodes_iterator nodes_begin(RegionInfoPass* RI) {
diff --git a/contrib/llvm/include/llvm/Analysis/RegionPass.h b/contrib/llvm/include/llvm/Analysis/RegionPass.h
index bd51c49e87db..b5f38139abf2 100644
--- a/contrib/llvm/include/llvm/Analysis/RegionPass.h
+++ b/contrib/llvm/include/llvm/Analysis/RegionPass.h
@@ -101,9 +101,7 @@ public:
   /// RGPassManager needs RegionInfo.
   void getAnalysisUsage(AnalysisUsage &Info) const override;
 
-  const char *getPassName() const override {
-    return "Region Pass Manager";
-  }
+  StringRef getPassName() const override { return "Region Pass Manager"; }
 
   PMDataManager *getAsPMDataManager() override { return this; }
   Pass *getAsPass() override { return this; }
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
index 535b623d31ac..9dcffe1ac5fb 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -36,1769 +36,1723 @@
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-  class APInt;
-  class AssumptionCache;
-  class Constant;
-  class ConstantInt;
-  class DominatorTree;
-  class Type;
-  class ScalarEvolution;
-  class DataLayout;
-  class TargetLibraryInfo;
-  class LLVMContext;
-  class Operator;
-  class SCEV;
-  class SCEVAddRecExpr;
-  class SCEVConstant;
-  class SCEVExpander;
-  class SCEVPredicate;
-  class SCEVUnknown;
-  class Function;
-
-  template <> struct FoldingSetTrait<SCEV>;
-  template <> struct FoldingSetTrait<SCEVPredicate>;
-
-  /// This class represents an analyzed expression in the program.  These are
-  /// opaque objects that the client is not allowed to do much with directly.
-  ///
-  class SCEV : public FoldingSetNode {
-    friend struct FoldingSetTrait<SCEV>;
-
-    /// A reference to an Interned FoldingSetNodeID for this node.  The
-    /// ScalarEvolution's BumpPtrAllocator holds the data.
-    FoldingSetNodeIDRef FastID;
-
-    // The SCEV baseclass this node corresponds to
-    const unsigned short SCEVType;
-
-  protected:
-    /// This field is initialized to zero and may be used in subclasses to store
-    /// miscellaneous information.
-    unsigned short SubclassData;
-
-  private:
-    SCEV(const SCEV &) = delete;
-    void operator=(const SCEV &) = delete;
+class APInt;
+class AssumptionCache;
+class Constant;
+class ConstantInt;
+class DominatorTree;
+class Type;
+class ScalarEvolution;
+class DataLayout;
+class TargetLibraryInfo;
+class LLVMContext;
+class Operator;
+class SCEV;
+class SCEVAddRecExpr;
+class SCEVConstant;
+class SCEVExpander;
+class SCEVPredicate;
+class SCEVUnknown;
+class Function;
+
+template <> struct FoldingSetTrait<SCEV>;
+template <> struct FoldingSetTrait<SCEVPredicate>;
+
+/// This class represents an analyzed expression in the program.  These are
+/// opaque objects that the client is not allowed to do much with directly.
+///
+class SCEV : public FoldingSetNode {
+  friend struct FoldingSetTrait<SCEV>;
+
+  /// A reference to an Interned FoldingSetNodeID for this node.  The
+  /// ScalarEvolution's BumpPtrAllocator holds the data.
+  FoldingSetNodeIDRef FastID;
+
+  // The SCEV baseclass this node corresponds to
+  const unsigned short SCEVType;
+
+protected:
+  /// This field is initialized to zero and may be used in subclasses to store
+  /// miscellaneous information.
+  unsigned short SubclassData;
+
+private:
+  SCEV(const SCEV &) = delete;
+  void operator=(const SCEV &) = delete;
+
+public:
+  /// NoWrapFlags are bitfield indices into SubclassData.
+  ///
+  /// Add and Mul expressions may have no-unsigned-wrap <NUW> or
+  /// no-signed-wrap <NSW> properties, which are derived from the IR
+  /// operator. NSW is a misnomer that we use to mean no signed overflow or
+  /// underflow.
+  ///
+  /// AddRec expressions may have a no-self-wraparound <NW> property if, in
+  /// the integer domain, abs(step) * max-iteration(loop) <=
+  /// unsigned-max(bitwidth).  This means that the recurrence will never reach
+  /// its start value if the step is non-zero.  Computing the same value on
+  /// each iteration is not considered wrapping, and recurrences with step = 0
+  /// are trivially <NW>.  <NW> is independent of the sign of step and the
+  /// value the add recurrence starts with.
+  ///
+  /// Note that NUW and NSW are also valid properties of a recurrence, and
+  /// either implies NW. For convenience, NW will be set for a recurrence
+  /// whenever either NUW or NSW are set.
+  enum NoWrapFlags {
+    FlagAnyWrap = 0,    // No guarantee.
+    FlagNW = (1 << 0),  // No self-wrap.
+    FlagNUW = (1 << 1), // No unsigned wrap.
+    FlagNSW = (1 << 2), // No signed wrap.
+    NoWrapMask = (1 << 3) - 1
+  };
 
-  public:
-    /// NoWrapFlags are bitfield indices into SubclassData.
-    ///
-    /// Add and Mul expressions may have no-unsigned-wrap <NUW> or
-    /// no-signed-wrap <NSW> properties, which are derived from the IR
-    /// operator. NSW is a misnomer that we use to mean no signed overflow or
-    /// underflow.
-    ///
-    /// AddRec expressions may have a no-self-wraparound <NW> property if, in
-    /// the integer domain, abs(step) * max-iteration(loop) <=
-    /// unsigned-max(bitwidth).  This means that the recurrence will never reach
-    /// its start value if the step is non-zero.  Computing the same value on
-    /// each iteration is not considered wrapping, and recurrences with step = 0
-    /// are trivially <NW>.  <NW> is independent of the sign of step and the
-    /// value the add recurrence starts with.
-    ///
-    /// Note that NUW and NSW are also valid properties of a recurrence, and
-    /// either implies NW. For convenience, NW will be set for a recurrence
-    /// whenever either NUW or NSW are set.
-    enum NoWrapFlags { FlagAnyWrap = 0,          // No guarantee.
-                       FlagNW      = (1 << 0),   // No self-wrap.
-                       FlagNUW     = (1 << 1),   // No unsigned wrap.
-                       FlagNSW     = (1 << 2),   // No signed wrap.
-                       NoWrapMask  = (1 << 3) -1 };
+  explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy)
+      : FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
 
-    explicit SCEV(const FoldingSetNodeIDRef ID, unsigned SCEVTy) :
-      FastID(ID), SCEVType(SCEVTy), SubclassData(0) {}
+  unsigned getSCEVType() const { return SCEVType; }
 
-    unsigned getSCEVType() const { return SCEVType; }
+  /// Return the LLVM type of this SCEV expression.
+  ///
+  Type *getType() const;
 
-    /// Return the LLVM type of this SCEV expression.
-    ///
-    Type *getType() const;
+  /// Return true if the expression is a constant zero.
+  ///
+  bool isZero() const;
 
-    /// Return true if the expression is a constant zero.
-    ///
-    bool isZero() const;
+  /// Return true if the expression is a constant one.
+  ///
+  bool isOne() const;
 
-    /// Return true if the expression is a constant one.
-    ///
-    bool isOne() const;
+  /// Return true if the expression is a constant all-ones value.
+  ///
+  bool isAllOnesValue() const;
 
-    /// Return true if the expression is a constant all-ones value.
-    ///
-    bool isAllOnesValue() const;
+  /// Return true if the specified scev is negated, but not a constant.
+  bool isNonConstantNegative() const;
 
-    /// Return true if the specified scev is negated, but not a constant.
-    bool isNonConstantNegative() const;
+  /// Print out the internal representation of this scalar to the specified
+  /// stream.  This should really only be used for debugging purposes.
+  void print(raw_ostream &OS) const;
 
-    /// Print out the internal representation of this scalar to the specified
-    /// stream.  This should really only be used for debugging purposes.
-    void print(raw_ostream &OS) const;
+  /// This method is used for debugging.
+  ///
+  void dump() const;
+};
+
+// Specialize FoldingSetTrait for SCEV to avoid needing to compute
+// temporary FoldingSetNodeID values.
+template <> struct FoldingSetTrait<SCEV> : DefaultFoldingSetTrait<SCEV> {
+  static void Profile(const SCEV &X, FoldingSetNodeID &ID) { ID = X.FastID; }
+  static bool Equals(const SCEV &X, const FoldingSetNodeID &ID, unsigned IDHash,
+                     FoldingSetNodeID &TempID) {
+    return ID == X.FastID;
+  }
+  static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) {
+    return X.FastID.ComputeHash();
+  }
+};
 
-    /// This method is used for debugging.
-    ///
-    void dump() const;
-  };
+inline raw_ostream &operator<<(raw_ostream &OS, const SCEV &S) {
+  S.print(OS);
+  return OS;
+}
 
-  // Specialize FoldingSetTrait for SCEV to avoid needing to compute
-  // temporary FoldingSetNodeID values.
-  template<> struct FoldingSetTrait<SCEV> : DefaultFoldingSetTrait<SCEV> {
-    static void Profile(const SCEV &X, FoldingSetNodeID& ID) {
-      ID = X.FastID;
-    }
-    static bool Equals(const SCEV &X, const FoldingSetNodeID &ID,
-                       unsigned IDHash, FoldingSetNodeID &TempID) {
-      return ID == X.FastID;
-    }
-    static unsigned ComputeHash(const SCEV &X, FoldingSetNodeID &TempID) {
-      return X.FastID.ComputeHash();
-    }
-  };
+/// An object of this class is returned by queries that could not be answered.
+/// For example, if you ask for the number of iterations of a linked-list
+/// traversal loop, you will get one of these.  None of the standard SCEV
+/// operations are valid on this class, it is just a marker.
+struct SCEVCouldNotCompute : public SCEV {
+  SCEVCouldNotCompute();
 
-  inline raw_ostream &operator<<(raw_ostream &OS, const SCEV &S) {
-    S.print(OS);
-    return OS;
-  }
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static bool classof(const SCEV *S);
+};
 
-  /// An object of this class is returned by queries that could not be answered.
-  /// For example, if you ask for the number of iterations of a linked-list
-  /// traversal loop, you will get one of these.  None of the standard SCEV
-  /// operations are valid on this class, it is just a marker.
-  struct SCEVCouldNotCompute : public SCEV {
-    SCEVCouldNotCompute();
+/// This class represents an assumption made using SCEV expressions which can
+/// be checked at run-time.
+class SCEVPredicate : public FoldingSetNode {
+  friend struct FoldingSetTrait<SCEVPredicate>;
 
-    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static bool classof(const SCEV *S);
-  };
+  /// A reference to an Interned FoldingSetNodeID for this node.  The
+  /// ScalarEvolution's BumpPtrAllocator holds the data.
+  FoldingSetNodeIDRef FastID;
 
-  /// This class represents an assumption made using SCEV expressions which can
-  /// be checked at run-time.
-  class SCEVPredicate : public FoldingSetNode {
-    friend struct FoldingSetTrait<SCEVPredicate>;
+public:
+  enum SCEVPredicateKind { P_Union, P_Equal, P_Wrap };
 
-    /// A reference to an Interned FoldingSetNodeID for this node.  The
-    /// ScalarEvolution's BumpPtrAllocator holds the data.
-    FoldingSetNodeIDRef FastID;
+protected:
+  SCEVPredicateKind Kind;
+  ~SCEVPredicate() = default;
+  SCEVPredicate(const SCEVPredicate &) = default;
+  SCEVPredicate &operator=(const SCEVPredicate &) = default;
 
-  public:
-    enum SCEVPredicateKind { P_Union, P_Equal, P_Wrap };
+public:
+  SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind);
 
-  protected:
-    SCEVPredicateKind Kind;
-    ~SCEVPredicate() = default;
-    SCEVPredicate(const SCEVPredicate&) = default;
-    SCEVPredicate &operator=(const SCEVPredicate&) = default;
+  SCEVPredicateKind getKind() const { return Kind; }
 
-  public:
-    SCEVPredicate(const FoldingSetNodeIDRef ID, SCEVPredicateKind Kind);
+  /// Returns the estimated complexity of this predicate.  This is roughly
+  /// measured in the number of run-time checks required.
+  virtual unsigned getComplexity() const { return 1; }
 
-    SCEVPredicateKind getKind() const { return Kind; }
+  /// Returns true if the predicate is always true. This means that no
+  /// assumptions were made and nothing needs to be checked at run-time.
+  virtual bool isAlwaysTrue() const = 0;
 
-    /// Returns the estimated complexity of this predicate.  This is roughly
-    /// measured in the number of run-time checks required.
-    virtual unsigned getComplexity() const { return 1; }
+  /// Returns true if this predicate implies \p N.
+  virtual bool implies(const SCEVPredicate *N) const = 0;
 
-    /// Returns true if the predicate is always true. This means that no
-    /// assumptions were made and nothing needs to be checked at run-time.
-    virtual bool isAlwaysTrue() const = 0;
+  /// Prints a textual representation of this predicate with an indentation of
+  /// \p Depth.
+  virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0;
 
-    /// Returns true if this predicate implies \p N.
-    virtual bool implies(const SCEVPredicate *N) const = 0;
+  /// Returns the SCEV to which this predicate applies, or nullptr if this is
+  /// a SCEVUnionPredicate.
+  virtual const SCEV *getExpr() const = 0;
+};
 
-    /// Prints a textual representation of this predicate with an indentation of
-    /// \p Depth.
-    virtual void print(raw_ostream &OS, unsigned Depth = 0) const = 0;
+inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) {
+  P.print(OS);
+  return OS;
+}
 
-    /// Returns the SCEV to which this predicate applies, or nullptr if this is
-    /// a SCEVUnionPredicate.
-    virtual const SCEV *getExpr() const = 0;
-  };
+// Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute
+// temporary FoldingSetNodeID values.
+template <>
+struct FoldingSetTrait<SCEVPredicate> : DefaultFoldingSetTrait<SCEVPredicate> {
 
-  inline raw_ostream &operator<<(raw_ostream &OS, const SCEVPredicate &P) {
-    P.print(OS);
-    return OS;
+  static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) {
+    ID = X.FastID;
   }
 
-  // Specialize FoldingSetTrait for SCEVPredicate to avoid needing to compute
-  // temporary FoldingSetNodeID values.
-  template <>
-  struct FoldingSetTrait<SCEVPredicate>
-      : DefaultFoldingSetTrait<SCEVPredicate> {
-
-    static void Profile(const SCEVPredicate &X, FoldingSetNodeID &ID) {
-      ID = X.FastID;
-    }
-
-    static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID,
-                       unsigned IDHash, FoldingSetNodeID &TempID) {
-      return ID == X.FastID;
-    }
-    static unsigned ComputeHash(const SCEVPredicate &X,
-                                FoldingSetNodeID &TempID) {
-      return X.FastID.ComputeHash();
-    }
+  static bool Equals(const SCEVPredicate &X, const FoldingSetNodeID &ID,
+                     unsigned IDHash, FoldingSetNodeID &TempID) {
+    return ID == X.FastID;
+  }
+  static unsigned ComputeHash(const SCEVPredicate &X,
+                              FoldingSetNodeID &TempID) {
+    return X.FastID.ComputeHash();
+  }
+};
+
+/// This class represents an assumption that two SCEV expressions are equal,
+/// and this can be checked at run-time. We assume that the left hand side is
+/// a SCEVUnknown and the right hand side a constant.
+class SCEVEqualPredicate final : public SCEVPredicate {
+  /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a
+  /// constant.
+  const SCEVUnknown *LHS;
+  const SCEVConstant *RHS;
+
+public:
+  SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS,
+                     const SCEVConstant *RHS);
+
+  /// Implementation of the SCEVPredicate interface
+  bool implies(const SCEVPredicate *N) const override;
+  void print(raw_ostream &OS, unsigned Depth = 0) const override;
+  bool isAlwaysTrue() const override;
+  const SCEV *getExpr() const override;
+
+  /// Returns the left hand side of the equality.
+  const SCEVUnknown *getLHS() const { return LHS; }
+
+  /// Returns the right hand side of the equality.
+  const SCEVConstant *getRHS() const { return RHS; }
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SCEVPredicate *P) {
+    return P->getKind() == P_Equal;
+  }
+};
+
+/// This class represents an assumption made on an AddRec expression. Given an
+/// affine AddRec expression {a,+,b}, we assume that it has the nssw or nusw
+/// flags (defined below) in the first X iterations of the loop, where X is a
+/// SCEV expression returned by getPredicatedBackedgeTakenCount).
+///
+/// Note that this does not imply that X is equal to the backedge taken
+/// count. This means that if we have a nusw predicate for i32 {0,+,1} with a
+/// predicated backedge taken count of X, we only guarantee that {0,+,1} has
+/// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we
+/// have more than X iterations.
+class SCEVWrapPredicate final : public SCEVPredicate {
+public:
+  /// Similar to SCEV::NoWrapFlags, but with slightly different semantics
+  /// for FlagNUSW. The increment is considered to be signed, and a + b
+  /// (where b is the increment) is considered to wrap if:
+  ///    zext(a + b) != zext(a) + sext(b)
+  ///
+  /// If Signed is a function that takes an n-bit tuple and maps to the
+  /// integer domain as the tuples value interpreted as twos complement,
+  /// and Unsigned a function that takes an n-bit tuple and maps to the
+  /// integer domain as as the base two value of input tuple, then a + b
+  /// has IncrementNUSW iff:
+  ///
+  /// 0 <= Unsigned(a) + Signed(b) < 2^n
+  ///
+  /// The IncrementNSSW flag has identical semantics with SCEV::FlagNSW.
+  ///
+  /// Note that the IncrementNUSW flag is not commutative: if base + inc
+  /// has IncrementNUSW, then inc + base doesn't neccessarily have this
+  /// property. The reason for this is that this is used for sign/zero
+  /// extending affine AddRec SCEV expressions when a SCEVWrapPredicate is
+  /// assumed. A {base,+,inc} expression is already non-commutative with
+  /// regards to base and inc, since it is interpreted as:
+  ///     (((base + inc) + inc) + inc) ...
+  enum IncrementWrapFlags {
+    IncrementAnyWrap = 0,     // No guarantee.
+    IncrementNUSW = (1 << 0), // No unsigned with signed increment wrap.
+    IncrementNSSW = (1 << 1), // No signed with signed increment wrap
+                              // (equivalent with SCEV::NSW)
+    IncrementNoWrapMask = (1 << 2) - 1
   };
 
-  /// This class represents an assumption that two SCEV expressions are equal,
-  /// and this can be checked at run-time. We assume that the left hand side is
-  /// a SCEVUnknown and the right hand side a constant.
-  class SCEVEqualPredicate final : public SCEVPredicate {
-    /// We assume that LHS == RHS, where LHS is a SCEVUnknown and RHS a
-    /// constant.
-    const SCEVUnknown *LHS;
-    const SCEVConstant *RHS;
-
-  public:
-    SCEVEqualPredicate(const FoldingSetNodeIDRef ID, const SCEVUnknown *LHS,
-                       const SCEVConstant *RHS);
-
-    /// Implementation of the SCEVPredicate interface
-    bool implies(const SCEVPredicate *N) const override;
-    void print(raw_ostream &OS, unsigned Depth = 0) const override;
-    bool isAlwaysTrue() const override;
-    const SCEV *getExpr() const override;
+  /// Convenient IncrementWrapFlags manipulation methods.
+  LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags
+  clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags,
+             SCEVWrapPredicate::IncrementWrapFlags OffFlags) {
+    assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!");
+    assert((OffFlags & IncrementNoWrapMask) == OffFlags &&
+           "Invalid flags value!");
+    return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & ~OffFlags);
+  }
 
-    /// Returns the left hand side of the equality.
-    const SCEVUnknown *getLHS() const { return LHS; }
+  LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags
+  maskFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, int Mask) {
+    assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!");
+    assert((Mask & IncrementNoWrapMask) == Mask && "Invalid mask value!");
 
-    /// Returns the right hand side of the equality.
-    const SCEVConstant *getRHS() const { return RHS; }
+    return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & Mask);
+  }
 
-    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVPredicate *P) {
-      return P->getKind() == P_Equal;
-    }
-  };
+  LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags
+  setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags,
+           SCEVWrapPredicate::IncrementWrapFlags OnFlags) {
+    assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!");
+    assert((OnFlags & IncrementNoWrapMask) == OnFlags &&
+           "Invalid flags value!");
 
-  /// This class represents an assumption made on an AddRec expression. Given an
-  /// affine AddRec expression {a,+,b}, we assume that it has the nssw or nusw
-  /// flags (defined below) in the first X iterations of the loop, where X is a
-  /// SCEV expression returned by getPredicatedBackedgeTakenCount).
-  ///
-  /// Note that this does not imply that X is equal to the backedge taken
-  /// count. This means that if we have a nusw predicate for i32 {0,+,1} with a
-  /// predicated backedge taken count of X, we only guarantee that {0,+,1} has
-  /// nusw in the first X iterations. {0,+,1} may still wrap in the loop if we
-  /// have more than X iterations.
-  class SCEVWrapPredicate final : public SCEVPredicate {
-  public:
-    /// Similar to SCEV::NoWrapFlags, but with slightly different semantics
-    /// for FlagNUSW. The increment is considered to be signed, and a + b
-    /// (where b is the increment) is considered to wrap if:
-    ///    zext(a + b) != zext(a) + sext(b)
-    ///
-    /// If Signed is a function that takes an n-bit tuple and maps to the
-    /// integer domain as the tuples value interpreted as twos complement,
-    /// and Unsigned a function that takes an n-bit tuple and maps to the
-    /// integer domain as as the base two value of input tuple, then a + b
-    /// has IncrementNUSW iff:
-    ///
-    /// 0 <= Unsigned(a) + Signed(b) < 2^n
-    ///
-    /// The IncrementNSSW flag has identical semantics with SCEV::FlagNSW.
-    ///
-    /// Note that the IncrementNUSW flag is not commutative: if base + inc
-    /// has IncrementNUSW, then inc + base doesn't neccessarily have this
-    /// property. The reason for this is that this is used for sign/zero
-    /// extending affine AddRec SCEV expressions when a SCEVWrapPredicate is
-    /// assumed. A {base,+,inc} expression is already non-commutative with
-    /// regards to base and inc, since it is interpreted as:
-    ///     (((base + inc) + inc) + inc) ...
-    enum IncrementWrapFlags {
-      IncrementAnyWrap = 0,     // No guarantee.
-      IncrementNUSW = (1 << 0), // No unsigned with signed increment wrap.
-      IncrementNSSW = (1 << 1), // No signed with signed increment wrap
-                                // (equivalent with SCEV::NSW)
-      IncrementNoWrapMask = (1 << 2) - 1
-    };
-
-    /// Convenient IncrementWrapFlags manipulation methods.
-    static SCEVWrapPredicate::IncrementWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT
-    clearFlags(SCEVWrapPredicate::IncrementWrapFlags Flags,
-               SCEVWrapPredicate::IncrementWrapFlags OffFlags) {
-      assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!");
-      assert((OffFlags & IncrementNoWrapMask) == OffFlags &&
-             "Invalid flags value!");
-      return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & ~OffFlags);
-    }
+    return (SCEVWrapPredicate::IncrementWrapFlags)(Flags | OnFlags);
+  }
 
-    static SCEVWrapPredicate::IncrementWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT
-    maskFlags(SCEVWrapPredicate::IncrementWrapFlags Flags, int Mask) {
-      assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!");
-      assert((Mask & IncrementNoWrapMask) == Mask && "Invalid mask value!");
+  /// Returns the set of SCEVWrapPredicate no wrap flags implied by a
+  /// SCEVAddRecExpr.
+  LLVM_NODISCARD static SCEVWrapPredicate::IncrementWrapFlags
+  getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE);
+
+private:
+  const SCEVAddRecExpr *AR;
+  IncrementWrapFlags Flags;
+
+public:
+  explicit SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
+                             const SCEVAddRecExpr *AR,
+                             IncrementWrapFlags Flags);
+
+  /// Returns the set assumed no overflow flags.
+  IncrementWrapFlags getFlags() const { return Flags; }
+  /// Implementation of the SCEVPredicate interface
+  const SCEV *getExpr() const override;
+  bool implies(const SCEVPredicate *N) const override;
+  void print(raw_ostream &OS, unsigned Depth = 0) const override;
+  bool isAlwaysTrue() const override;
+
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SCEVPredicate *P) {
+    return P->getKind() == P_Wrap;
+  }
+};
+
+/// This class represents a composition of other SCEV predicates, and is the
+/// class that most clients will interact with.  This is equivalent to a
+/// logical "AND" of all the predicates in the union.
+///
+/// NB! Unlike other SCEVPredicate sub-classes this class does not live in the
+/// ScalarEvolution::Preds folding set.  This is why the \c add function is sound.
+class SCEVUnionPredicate final : public SCEVPredicate {
+private:
+  typedef DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>>
+      PredicateMap;
+
+  /// Vector with references to all predicates in this union.
+  SmallVector<const SCEVPredicate *, 16> Preds;
+  /// Maps SCEVs to predicates for quick look-ups.
+  PredicateMap SCEVToPreds;
+
+public:
+  SCEVUnionPredicate();
+
+  const SmallVectorImpl<const SCEVPredicate *> &getPredicates() const {
+    return Preds;
+  }
 
-      return (SCEVWrapPredicate::IncrementWrapFlags)(Flags & Mask);
-    }
+  /// Adds a predicate to this union.
+  void add(const SCEVPredicate *N);
 
-    static SCEVWrapPredicate::IncrementWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT
-    setFlags(SCEVWrapPredicate::IncrementWrapFlags Flags,
-             SCEVWrapPredicate::IncrementWrapFlags OnFlags) {
-      assert((Flags & IncrementNoWrapMask) == Flags && "Invalid flags value!");
-      assert((OnFlags & IncrementNoWrapMask) == OnFlags &&
-             "Invalid flags value!");
+  /// Returns a reference to a vector containing all predicates which apply to
+  /// \p Expr.
+  ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr);
 
-      return (SCEVWrapPredicate::IncrementWrapFlags)(Flags | OnFlags);
-    }
+  /// Implementation of the SCEVPredicate interface
+  bool isAlwaysTrue() const override;
+  bool implies(const SCEVPredicate *N) const override;
+  void print(raw_ostream &OS, unsigned Depth) const override;
+  const SCEV *getExpr() const override;
 
-    /// Returns the set of SCEVWrapPredicate no wrap flags implied by a
-    /// SCEVAddRecExpr.
-    static SCEVWrapPredicate::IncrementWrapFlags
-    getImpliedFlags(const SCEVAddRecExpr *AR, ScalarEvolution &SE);
+  /// We estimate the complexity of a union predicate as the size number of
+  /// predicates in the union.
+  unsigned getComplexity() const override { return Preds.size(); }
 
-  private:
-    const SCEVAddRecExpr *AR;
-    IncrementWrapFlags Flags;
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const SCEVPredicate *P) {
+    return P->getKind() == P_Union;
+  }
+};
+
+/// The main scalar evolution driver. Because client code (intentionally)
+/// can't do much with the SCEV objects directly, they must ask this class
+/// for services.
+class ScalarEvolution {
+public:
+  /// An enum describing the relationship between a SCEV and a loop.
+  enum LoopDisposition {
+    LoopVariant,   ///< The SCEV is loop-variant (unknown).
+    LoopInvariant, ///< The SCEV is loop-invariant.
+    LoopComputable ///< The SCEV varies predictably with the loop.
+  };
 
-  public:
-    explicit SCEVWrapPredicate(const FoldingSetNodeIDRef ID,
-                               const SCEVAddRecExpr *AR,
-                               IncrementWrapFlags Flags);
-
-    /// Returns the set assumed no overflow flags.
-    IncrementWrapFlags getFlags() const { return Flags; }
-    /// Implementation of the SCEVPredicate interface
-    const SCEV *getExpr() const override;
-    bool implies(const SCEVPredicate *N) const override;
-    void print(raw_ostream &OS, unsigned Depth = 0) const override;
-    bool isAlwaysTrue() const override;
-
-    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVPredicate *P) {
-      return P->getKind() == P_Wrap;
-    }
+  /// An enum describing the relationship between a SCEV and a basic block.
+  enum BlockDisposition {
+    DoesNotDominateBlock,  ///< The SCEV does not dominate the block.
+    DominatesBlock,        ///< The SCEV dominates the block.
+    ProperlyDominatesBlock ///< The SCEV properly dominates the block.
   };
 
-  /// This class represents a composition of other SCEV predicates, and is the
-  /// class that most clients will interact with.  This is equivalent to a
-  /// logical "AND" of all the predicates in the union.
-  class SCEVUnionPredicate final : public SCEVPredicate {
-  private:
-    typedef DenseMap<const SCEV *, SmallVector<const SCEVPredicate *, 4>>
-        PredicateMap;
+  /// Convenient NoWrapFlags manipulation that hides enum casts and is
+  /// visible in the ScalarEvolution name space.
+  LLVM_NODISCARD static SCEV::NoWrapFlags maskFlags(SCEV::NoWrapFlags Flags,
+                                                    int Mask) {
+    return (SCEV::NoWrapFlags)(Flags & Mask);
+  }
+  LLVM_NODISCARD static SCEV::NoWrapFlags setFlags(SCEV::NoWrapFlags Flags,
+                                                   SCEV::NoWrapFlags OnFlags) {
+    return (SCEV::NoWrapFlags)(Flags | OnFlags);
+  }
+  LLVM_NODISCARD static SCEV::NoWrapFlags
+  clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) {
+    return (SCEV::NoWrapFlags)(Flags & ~OffFlags);
+  }
 
-    /// Vector with references to all predicates in this union.
-    SmallVector<const SCEVPredicate *, 16> Preds;
-    /// Maps SCEVs to predicates for quick look-ups.
-    PredicateMap SCEVToPreds;
+private:
+  /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
+  /// Value is deleted.
+  class SCEVCallbackVH final : public CallbackVH {
+    ScalarEvolution *SE;
+    void deleted() override;
+    void allUsesReplacedWith(Value *New) override;
 
   public:
-    SCEVUnionPredicate();
-
-    const SmallVectorImpl<const SCEVPredicate *> &getPredicates() const {
-      return Preds;
-    }
-
-    /// Adds a predicate to this union.
-    void add(const SCEVPredicate *N);
-
-    /// Returns a reference to a vector containing all predicates which apply to
-    /// \p Expr.
-    ArrayRef<const SCEVPredicate *> getPredicatesForExpr(const SCEV *Expr);
-
-    /// Implementation of the SCEVPredicate interface
-    bool isAlwaysTrue() const override;
-    bool implies(const SCEVPredicate *N) const override;
-    void print(raw_ostream &OS, unsigned Depth) const override;
-    const SCEV *getExpr() const override;
-
-    /// We estimate the complexity of a union predicate as the size number of
-    /// predicates in the union.
-    unsigned getComplexity() const override { return Preds.size(); }
-
-    /// Methods for support type inquiry through isa, cast, and dyn_cast:
-    static inline bool classof(const SCEVPredicate *P) {
-      return P->getKind() == P_Union;
-    }
+    SCEVCallbackVH(Value *V, ScalarEvolution *SE = nullptr);
   };
 
-  /// The main scalar evolution driver. Because client code (intentionally)
-  /// can't do much with the SCEV objects directly, they must ask this class
-  /// for services.
-  class ScalarEvolution {
-  public:
-    /// An enum describing the relationship between a SCEV and a loop.
-    enum LoopDisposition {
-      LoopVariant,    ///< The SCEV is loop-variant (unknown).
-      LoopInvariant,  ///< The SCEV is loop-invariant.
-      LoopComputable  ///< The SCEV varies predictably with the loop.
-    };
-
-    /// An enum describing the relationship between a SCEV and a basic block.
-    enum BlockDisposition {
-      DoesNotDominateBlock,  ///< The SCEV does not dominate the block.
-      DominatesBlock,        ///< The SCEV dominates the block.
-      ProperlyDominatesBlock ///< The SCEV properly dominates the block.
-    };
-
-    /// Convenient NoWrapFlags manipulation that hides enum casts and is
-    /// visible in the ScalarEvolution name space.
-    static SCEV::NoWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT
-    maskFlags(SCEV::NoWrapFlags Flags, int Mask) {
-      return (SCEV::NoWrapFlags)(Flags & Mask);
-    }
-    static SCEV::NoWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT
-    setFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OnFlags) {
-      return (SCEV::NoWrapFlags)(Flags | OnFlags);
-    }
-    static SCEV::NoWrapFlags LLVM_ATTRIBUTE_UNUSED_RESULT
-    clearFlags(SCEV::NoWrapFlags Flags, SCEV::NoWrapFlags OffFlags) {
-      return (SCEV::NoWrapFlags)(Flags & ~OffFlags);
-    }
+  friend class SCEVCallbackVH;
+  friend class SCEVExpander;
+  friend class SCEVUnknown;
 
-  private:
-    /// A CallbackVH to arrange for ScalarEvolution to be notified whenever a
-    /// Value is deleted.
-    class SCEVCallbackVH final : public CallbackVH {
-      ScalarEvolution *SE;
-      void deleted() override;
-      void allUsesReplacedWith(Value *New) override;
-    public:
-      SCEVCallbackVH(Value *V, ScalarEvolution *SE = nullptr);
-    };
-
-    friend class SCEVCallbackVH;
-    friend class SCEVExpander;
-    friend class SCEVUnknown;
-
-    /// The function we are analyzing.
-    ///
-    Function &F;
+  /// The function we are analyzing.
+  ///
+  Function &F;
 
-    /// Does the module have any calls to the llvm.experimental.guard intrinsic
-    /// at all?  If this is false, we avoid doing work that will only help if
-    /// thare are guards present in the IR.
-    ///
-    bool HasGuards;
+  /// Does the module have any calls to the llvm.experimental.guard intrinsic
+  /// at all?  If this is false, we avoid doing work that will only help if
+  /// thare are guards present in the IR.
+  ///
+  bool HasGuards;
 
-    /// The target library information for the target we are targeting.
-    ///
-    TargetLibraryInfo &TLI;
+  /// The target library information for the target we are targeting.
+  ///
+  TargetLibraryInfo &TLI;
 
-    /// The tracker for @llvm.assume intrinsics in this function.
-    AssumptionCache &AC;
+  /// The tracker for @llvm.assume intrinsics in this function.
+  AssumptionCache &AC;
 
-    /// The dominator tree.
-    ///
-    DominatorTree &DT;
+  /// The dominator tree.
+  ///
+  DominatorTree &DT;
 
-    /// The loop information for the function we are currently analyzing.
-    ///
-    LoopInfo &LI;
+  /// The loop information for the function we are currently analyzing.
+  ///
+  LoopInfo &LI;
 
-    /// This SCEV is used to represent unknown trip counts and things.
-    std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute;
+  /// This SCEV is used to represent unknown trip counts and things.
+  std::unique_ptr<SCEVCouldNotCompute> CouldNotCompute;
 
-    /// The typedef for HasRecMap.
-    ///
-    typedef DenseMap<const SCEV *, bool> HasRecMapType;
+  /// The typedef for HasRecMap.
+  ///
+  typedef DenseMap<const SCEV *, bool> HasRecMapType;
 
-    /// This is a cache to record whether a SCEV contains any scAddRecExpr.
-    HasRecMapType HasRecMap;
+  /// This is a cache to record whether a SCEV contains any scAddRecExpr.
+  HasRecMapType HasRecMap;
 
-    /// The typedef for ExprValueMap.
-    ///
-    typedef DenseMap<const SCEV *, SetVector<Value *>> ExprValueMapType;
+  /// The typedef for ExprValueMap.
+  ///
+  typedef std::pair<Value *, ConstantInt *> ValueOffsetPair;
+  typedef DenseMap<const SCEV *, SetVector<ValueOffsetPair>> ExprValueMapType;
 
-    /// ExprValueMap -- This map records the original values from which
-    /// the SCEV expr is generated from.
-    ExprValueMapType ExprValueMap;
+  /// ExprValueMap -- This map records the original values from which
+  /// the SCEV expr is generated from.
+  ///
+  /// We want to represent the mapping as SCEV -> ValueOffsetPair instead
+  /// of SCEV -> Value:
+  /// Suppose we know S1 expands to V1, and
+  ///  S1 = S2 + C_a
+  ///  S3 = S2 + C_b
+  /// where C_a and C_b are different SCEVConstants. Then we'd like to
+  /// expand S3 as V1 - C_a + C_b instead of expanding S2 literally.
+  /// It is helpful when S2 is a complex SCEV expr.
+  ///
+  /// In order to do that, we represent ExprValueMap as a mapping from
+  /// SCEV to ValueOffsetPair. We will save both S1->{V1, 0} and
+  /// S2->{V1, C_a} into the map when we create SCEV for V1. When S3
+  /// is expanded, it will first expand S2 to V1 - C_a because of
+  /// S2->{V1, C_a} in the map, then expand S3 to V1 - C_a + C_b.
+  ///
+  /// Note: S->{V, Offset} in the ExprValueMap means S can be expanded
+  /// to V - Offset.
+  ExprValueMapType ExprValueMap;
 
-    /// The typedef for ValueExprMap.
-    ///
-    typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *> >
+  /// The typedef for ValueExprMap.
+  ///
+  typedef DenseMap<SCEVCallbackVH, const SCEV *, DenseMapInfo<Value *>>
       ValueExprMapType;
 
-    /// This is a cache of the values we have analyzed so far.
-    ///
-    ValueExprMapType ValueExprMap;
-
-    /// Mark predicate values currently being processed by isImpliedCond.
-    DenseSet<Value*> PendingLoopPredicates;
-
-    /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of
-    /// conditions dominating the backedge of a loop.
-    bool WalkingBEDominatingConds;
-
-    /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a
-    /// predicate by splitting it into a set of independent predicates.
-    bool ProvingSplitPredicate;
-
-    /// Information about the number of loop iterations for which a loop exit's
-    /// branch condition evaluates to the not-taken path.  This is a temporary
-    /// pair of exact and max expressions that are eventually summarized in
-    /// ExitNotTakenInfo and BackedgeTakenInfo.
-    struct ExitLimit {
-      const SCEV *Exact;
-      const SCEV *Max;
-
-      /// A predicate union guard for this ExitLimit. The result is only
-      /// valid if this predicate evaluates to 'true' at run-time.
-      SCEVUnionPredicate Pred;
-
-      /*implicit*/ ExitLimit(const SCEV *E) : Exact(E), Max(E) {}
-
-      ExitLimit(const SCEV *E, const SCEV *M, SCEVUnionPredicate &P)
-          : Exact(E), Max(M), Pred(P) {
-        assert((isa<SCEVCouldNotCompute>(Exact) ||
-                !isa<SCEVCouldNotCompute>(Max)) &&
-               "Exact is not allowed to be less precise than Max");
-      }
-
-      /// Test whether this ExitLimit contains any computed information, or
-      /// whether it's all SCEVCouldNotCompute values.
-      bool hasAnyInfo() const {
-        return !isa<SCEVCouldNotCompute>(Exact) ||
-          !isa<SCEVCouldNotCompute>(Max);
-      }
-
-      /// Test whether this ExitLimit contains all information.
-      bool hasFullInfo() const { return !isa<SCEVCouldNotCompute>(Exact); }
-    };
-
-    /// Forward declaration of ExitNotTakenExtras
-    struct ExitNotTakenExtras;
-
-    /// Information about the number of times a particular loop exit may be
-    /// reached before exiting the loop.
-    struct ExitNotTakenInfo {
-      AssertingVH<BasicBlock> ExitingBlock;
-      const SCEV *ExactNotTaken;
-
-      ExitNotTakenExtras *ExtraInfo;
-      bool Complete;
-
-      ExitNotTakenInfo()
-          : ExitingBlock(nullptr), ExactNotTaken(nullptr), ExtraInfo(nullptr),
-            Complete(true) {}
-
-      ExitNotTakenInfo(BasicBlock *ExitBlock, const SCEV *Expr,
-                       ExitNotTakenExtras *Ptr)
-          : ExitingBlock(ExitBlock), ExactNotTaken(Expr), ExtraInfo(Ptr),
-            Complete(true) {}
-
-      /// Return true if all loop exits are computable.
-      bool isCompleteList() const { return Complete; }
-
-      /// Sets the incomplete property, indicating that one of the loop exits
-      /// doesn't have a corresponding ExitNotTakenInfo entry.
-      void setIncomplete() { Complete = false; }
-
-      /// Returns a pointer to the predicate associated with this information,
-      /// or nullptr if this doesn't exist (meaning always true).
-      SCEVUnionPredicate *getPred() const {
-        if (ExtraInfo)
-          return &ExtraInfo->Pred;
-
-        return nullptr;
-      }
-
-      /// Return true if the SCEV predicate associated with this information
-      /// is always true.
-      bool hasAlwaysTruePred() const {
-        return !getPred() || getPred()->isAlwaysTrue();
-      }
-
-      /// Defines a simple forward iterator for ExitNotTakenInfo.
-      class ExitNotTakenInfoIterator
-          : public std::iterator<std::forward_iterator_tag, ExitNotTakenInfo> {
-        const ExitNotTakenInfo *Start;
-        unsigned Position;
-
-      public:
-        ExitNotTakenInfoIterator(const ExitNotTakenInfo *Start,
-                                 unsigned Position)
-            : Start(Start), Position(Position) {}
-
-        const ExitNotTakenInfo &operator*() const {
-          if (Position == 0)
-            return *Start;
-
-          return Start->ExtraInfo->Exits[Position - 1];
-        }
-
-        const ExitNotTakenInfo *operator->() const {
-          if (Position == 0)
-            return Start;
-
-          return &Start->ExtraInfo->Exits[Position - 1];
-        }
-
-        bool operator==(const ExitNotTakenInfoIterator &RHS) const {
-          return Start == RHS.Start && Position == RHS.Position;
-        }
-
-        bool operator!=(const ExitNotTakenInfoIterator &RHS) const {
-          return Start != RHS.Start || Position != RHS.Position;
-        }
-
-        ExitNotTakenInfoIterator &operator++() { // Preincrement
-          if (!Start)
-            return *this;
-
-          unsigned Elements =
-              Start->ExtraInfo ? Start->ExtraInfo->Exits.size() + 1 : 1;
-
-          ++Position;
-
-          // We've run out of elements.
-          if (Position == Elements) {
-            Start = nullptr;
-            Position = 0;
-          }
-
-          return *this;
-        }
-        ExitNotTakenInfoIterator operator++(int) { // Postincrement
-          ExitNotTakenInfoIterator Tmp = *this;
-          ++*this;
-          return Tmp;
-        }
-      };
-
-      /// Iterators
-      ExitNotTakenInfoIterator begin() const {
-        return ExitNotTakenInfoIterator(this, 0);
-      }
-      ExitNotTakenInfoIterator end() const {
-        return ExitNotTakenInfoIterator(nullptr, 0);
-      }
-    };
-
-    /// Describes the extra information that a ExitNotTakenInfo can have.
-    struct ExitNotTakenExtras {
-      /// The predicate associated with the ExitNotTakenInfo struct.
-      SCEVUnionPredicate Pred;
-
-      /// The extra exits in the loop. Only the ExitNotTakenExtras structure
-      /// pointed to by the first ExitNotTakenInfo struct (associated with the
-      /// first loop exit) will populate this vector to prevent having
-      /// redundant information.
-      SmallVector<ExitNotTakenInfo, 4> Exits;
-    };
-
-    /// A struct containing the information attached to a backedge.
-    struct EdgeInfo {
-      EdgeInfo(BasicBlock *Block, const SCEV *Taken, SCEVUnionPredicate &P) :
-          ExitBlock(Block), Taken(Taken), Pred(std::move(P)) {}
-
-      /// The exit basic block.
-      BasicBlock *ExitBlock;
-
-      /// The (exact) number of time we take the edge back.
-      const SCEV *Taken;
-
-      /// The SCEV predicated associated with Taken. If Pred doesn't evaluate
-      /// to true, the information in Taken is not valid (or equivalent with
-      /// a CouldNotCompute.
-      SCEVUnionPredicate Pred;
-    };
-
-    /// Information about the backedge-taken count of a loop. This currently
-    /// includes an exact count and a maximum count.
-    ///
-    class BackedgeTakenInfo {
-      /// A list of computable exits and their not-taken counts.  Loops almost
-      /// never have more than one computable exit.
-      ExitNotTakenInfo ExitNotTaken;
-
-      /// An expression indicating the least maximum backedge-taken count of the
-      /// loop that is known, or a SCEVCouldNotCompute. This expression is only
-      /// valid if the predicates associated with all loop exits are true.
-      const SCEV *Max;
-
-    public:
-      BackedgeTakenInfo() : Max(nullptr) {}
-
-      /// Initialize BackedgeTakenInfo from a list of exact exit counts.
-      BackedgeTakenInfo(SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete,
-                        const SCEV *MaxCount);
-
-      /// Test whether this BackedgeTakenInfo contains any computed information,
-      /// or whether it's all SCEVCouldNotCompute values.
-      bool hasAnyInfo() const {
-        return ExitNotTaken.ExitingBlock || !isa<SCEVCouldNotCompute>(Max);
-      }
-
-      /// Test whether this BackedgeTakenInfo contains complete information.
-      bool hasFullInfo() const { return ExitNotTaken.isCompleteList(); }
-
-      /// Return an expression indicating the exact backedge-taken count of the
-      /// loop if it is known or SCEVCouldNotCompute otherwise. This is the
-      /// number of times the loop header can be guaranteed to execute, minus
-      /// one.
-      ///
-      /// If the SCEV predicate associated with the answer can be different
-      /// from AlwaysTrue, we must add a (non null) Predicates argument.
-      /// The SCEV predicate associated with the answer will be added to
-      /// Predicates. A run-time check needs to be emitted for the SCEV
-      /// predicate in order for the answer to be valid.
-      ///
-      /// Note that we should always know if we need to pass a predicate
-      /// argument or not from the way the ExitCounts vector was computed.
-      /// If we allowed SCEV predicates to be generated when populating this
-      /// vector, this information can contain them and therefore a
-      /// SCEVPredicate argument should be added to getExact.
-      const SCEV *getExact(ScalarEvolution *SE,
-                           SCEVUnionPredicate *Predicates = nullptr) const;
-
-      /// Return the number of times this loop exit may fall through to the back
-      /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
-      /// this block before this number of iterations, but may exit via another
-      /// block.
-      const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
-
-      /// Get the max backedge taken count for the loop.
-      const SCEV *getMax(ScalarEvolution *SE) const;
-
-      /// Return true if any backedge taken count expressions refer to the given
-      /// subexpression.
-      bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
-
-      /// Invalidate this result and free associated memory.
-      void clear();
-    };
-
-    /// Cache the backedge-taken count of the loops for this function as they
-    /// are computed.
-    DenseMap<const Loop *, BackedgeTakenInfo> BackedgeTakenCounts;
-
-    /// Cache the predicated backedge-taken count of the loops for this
-    /// function as they are computed.
-    DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
-
-    /// This map contains entries for all of the PHI instructions that we
-    /// attempt to compute constant evolutions for.  This allows us to avoid
-    /// potentially expensive recomputation of these properties.  An instruction
-    /// maps to null if we are unable to compute its exit value.
-    DenseMap<PHINode*, Constant*> ConstantEvolutionLoopExitValue;
-
-    /// This map contains entries for all the expressions that we attempt to
-    /// compute getSCEVAtScope information for, which can be expensive in
-    /// extreme cases.
-    DenseMap<const SCEV *,
-             SmallVector<std::pair<const Loop *, const SCEV *>, 2> > ValuesAtScopes;
-
-    /// Memoized computeLoopDisposition results.
-    DenseMap<const SCEV *,
-             SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>>
-        LoopDispositions;
-
-    /// Cache for \c loopHasNoAbnormalExits.
-    DenseMap<const Loop *, bool> LoopHasNoAbnormalExits;
-
-    /// Returns true if \p L contains no instruction that can abnormally exit
-    /// the loop (i.e. via throwing an exception, by terminating the thread
-    /// cleanly or by infinite looping in a called function).  Strictly
-    /// speaking, the last one is not leaving the loop, but is identical to
-    /// leaving the loop for reasoning about undefined behavior.
-    bool loopHasNoAbnormalExits(const Loop *L);
-
-    /// Compute a LoopDisposition value.
-    LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
-
-    /// Memoized computeBlockDisposition results.
-    DenseMap<
-        const SCEV *,
-        SmallVector<PointerIntPair<const BasicBlock *, 2, BlockDisposition>, 2>>
-        BlockDispositions;
-
-    /// Compute a BlockDisposition value.
-    BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
-
-    /// Memoized results from getRange
-    DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
-
-    /// Memoized results from getRange
-    DenseMap<const SCEV *, ConstantRange> SignedRanges;
-
-    /// Used to parameterize getRange
-    enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED };
-
-    /// Set the memoized range for the given SCEV.
-    const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint,
-                                  const ConstantRange &CR) {
-      DenseMap<const SCEV *, ConstantRange> &Cache =
-          Hint == HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges;
-
-      auto Pair = Cache.insert({S, CR});
-      if (!Pair.second)
-        Pair.first->second = CR;
-      return Pair.first->second;
+  /// This is a cache of the values we have analyzed so far.
+  ///
+  ValueExprMapType ValueExprMap;
+
+  /// Mark predicate values currently being processed by isImpliedCond.
+  SmallPtrSet<Value *, 6> PendingLoopPredicates;
+
+  /// Set to true by isLoopBackedgeGuardedByCond when we're walking the set of
+  /// conditions dominating the backedge of a loop.
+  bool WalkingBEDominatingConds;
+
+  /// Set to true by isKnownPredicateViaSplitting when we're trying to prove a
+  /// predicate by splitting it into a set of independent predicates.
+  bool ProvingSplitPredicate;
+
+  /// Information about the number of loop iterations for which a loop exit's
+  /// branch condition evaluates to the not-taken path.  This is a temporary
+  /// pair of exact and max expressions that are eventually summarized in
+  /// ExitNotTakenInfo and BackedgeTakenInfo.
+  struct ExitLimit {
+    const SCEV *ExactNotTaken; // The exit is not taken exactly this many times
+    const SCEV *MaxNotTaken; // The exit is not taken at most this many times
+    bool MaxOrZero; // Not taken either exactly MaxNotTaken or zero times
+
+    /// A set of predicate guards for this ExitLimit. The result is only valid
+    /// if all of the predicates in \c Predicates evaluate to 'true' at
+    /// run-time.
+    SmallPtrSet<const SCEVPredicate *, 4> Predicates;
+
+    void addPredicate(const SCEVPredicate *P) {
+      assert(!isa<SCEVUnionPredicate>(P) && "Only add leaf predicates here!");
+      Predicates.insert(P);
     }
 
-    /// Determine the range for a particular SCEV.
-    ConstantRange getRange(const SCEV *S, RangeSignHint Hint);
-
-    /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}.
-    /// Helper for \c getRange.
-    ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
-                                      const SCEV *MaxBECount,
-                                      unsigned BitWidth);
-
-    /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
-    /// Stop} by "factoring out" a ternary expression from the add recurrence.
-    /// Helper called by \c getRange.
-    ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
-                                       const SCEV *MaxBECount,
-                                       unsigned BitWidth);
+    /*implicit*/ ExitLimit(const SCEV *E)
+        : ExactNotTaken(E), MaxNotTaken(E), MaxOrZero(false) {}
+
+    ExitLimit(
+        const SCEV *E, const SCEV *M, bool MaxOrZero,
+        ArrayRef<const SmallPtrSetImpl<const SCEVPredicate *> *> PredSetList)
+        : ExactNotTaken(E), MaxNotTaken(M), MaxOrZero(MaxOrZero) {
+      assert((isa<SCEVCouldNotCompute>(ExactNotTaken) ||
+              !isa<SCEVCouldNotCompute>(MaxNotTaken)) &&
+             "Exact is not allowed to be less precise than Max");
+      for (auto *PredSet : PredSetList)
+        for (auto *P : *PredSet)
+          addPredicate(P);
+    }
 
-    /// We know that there is no SCEV for the specified value.  Analyze the
-    /// expression.
-    const SCEV *createSCEV(Value *V);
+    ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero,
+              const SmallPtrSetImpl<const SCEVPredicate *> &PredSet)
+        : ExitLimit(E, M, MaxOrZero, {&PredSet}) {}
 
-    /// Provide the special handling we need to analyze PHI SCEVs.
-    const SCEV *createNodeForPHI(PHINode *PN);
+    ExitLimit(const SCEV *E, const SCEV *M, bool MaxOrZero)
+        : ExitLimit(E, M, MaxOrZero, None) {}
 
-    /// Helper function called from createNodeForPHI.
-    const SCEV *createAddRecFromPHI(PHINode *PN);
+    /// Test whether this ExitLimit contains any computed information, or
+    /// whether it's all SCEVCouldNotCompute values.
+    bool hasAnyInfo() const {
+      return !isa<SCEVCouldNotCompute>(ExactNotTaken) ||
+             !isa<SCEVCouldNotCompute>(MaxNotTaken);
+    }
 
-    /// Helper function called from createNodeForPHI.
-    const SCEV *createNodeFromSelectLikePHI(PHINode *PN);
+    /// Test whether this ExitLimit contains all information.
+    bool hasFullInfo() const {
+      return !isa<SCEVCouldNotCompute>(ExactNotTaken);
+    }
+  };
 
-    /// Provide special handling for a select-like instruction (currently this
-    /// is either a select instruction or a phi node).  \p I is the instruction
-    /// being processed, and it is assumed equivalent to "Cond ? TrueVal :
-    /// FalseVal".
-    const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond,
-                                         Value *TrueVal, Value *FalseVal);
+  /// Information about the number of times a particular loop exit may be
+  /// reached before exiting the loop.
+  struct ExitNotTakenInfo {
+    AssertingVH<BasicBlock> ExitingBlock;
+    const SCEV *ExactNotTaken;
+    std::unique_ptr<SCEVUnionPredicate> Predicate;
+    bool hasAlwaysTruePredicate() const {
+      return !Predicate || Predicate->isAlwaysTrue();
+    }
 
-    /// Provide the special handling we need to analyze GEP SCEVs.
-    const SCEV *createNodeForGEP(GEPOperator *GEP);
+    explicit ExitNotTakenInfo(AssertingVH<BasicBlock> ExitingBlock,
+                              const SCEV *ExactNotTaken,
+                              std::unique_ptr<SCEVUnionPredicate> Predicate)
+        : ExitingBlock(ExitingBlock), ExactNotTaken(ExactNotTaken),
+          Predicate(std::move(Predicate)) {}
+  };
 
-    /// Implementation code for getSCEVAtScope; called at most once for each
-    /// SCEV+Loop pair.
-    ///
-    const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
-
-    /// This looks up computed SCEV values for all instructions that depend on
-    /// the given instruction and removes them from the ValueExprMap map if they
-    /// reference SymName. This is used during PHI resolution.
-    void forgetSymbolicName(Instruction *I, const SCEV *SymName);
-
-    /// Return the BackedgeTakenInfo for the given loop, lazily computing new
-    /// values if the loop hasn't been analyzed yet. The returned result is
-    /// guaranteed not to be predicated.
-    const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
-
-    /// Similar to getBackedgeTakenInfo, but will add predicates as required
-    /// with the purpose of returning complete information.
-    const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L);
-
-    /// Compute the number of times the specified loop will iterate.
-    /// If AllowPredicates is set, we will create new SCEV predicates as
-    /// necessary in order to return an exact answer.
-    BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L,
-                                                bool AllowPredicates = false);
-
-    /// Compute the number of times the backedge of the specified loop will
-    /// execute if it exits via the specified block. If AllowPredicates is set,
-    /// this call will try to use a minimal set of SCEV predicates in order to
-    /// return an exact answer.
-    ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
-                               bool AllowPredicates = false);
-
-    /// Compute the number of times the backedge of the specified loop will
-    /// execute if its exit condition were a conditional branch of ExitCond,
-    /// TBB, and FBB.
-    ///
-    /// \p ControlsExit is true if ExitCond directly controls the exit
-    /// branch. In this case, we can assume that the loop exits only if the
-    /// condition is true and can infer that failing to meet the condition prior
-    /// to integer wraparound results in undefined behavior.
-    ///
-    /// If \p AllowPredicates is set, this call will try to use a minimal set of
-    /// SCEV predicates in order to return an exact answer.
-    ExitLimit computeExitLimitFromCond(const Loop *L,
-                                       Value *ExitCond,
-                                       BasicBlock *TBB,
-                                       BasicBlock *FBB,
-                                       bool ControlsExit,
-                                       bool AllowPredicates = false);
-
-    /// Compute the number of times the backedge of the specified loop will
-    /// execute if its exit condition were a conditional branch of the ICmpInst
-    /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try
-    /// to use a minimal set of SCEV predicates in order to return an exact
-    /// answer.
-    ExitLimit computeExitLimitFromICmp(const Loop *L,
-                                       ICmpInst *ExitCond,
-                                       BasicBlock *TBB,
-                                       BasicBlock *FBB,
-                                       bool IsSubExpr,
-                                       bool AllowPredicates = false);
-
-    /// Compute the number of times the backedge of the specified loop will
-    /// execute if its exit condition were a switch with a single exiting case
-    /// to ExitingBB.
-    ExitLimit
-    computeExitLimitFromSingleExitSwitch(const Loop *L, SwitchInst *Switch,
-                               BasicBlock *ExitingBB, bool IsSubExpr);
-
-    /// Given an exit condition of 'icmp op load X, cst', try to see if we can
-    /// compute the backedge-taken count.
-    ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI,
-                                                  Constant *RHS,
-                                                  const Loop *L,
-                                                  ICmpInst::Predicate p);
-
-    /// Compute the exit limit of a loop that is controlled by a
-    /// "(IV >> 1) != 0" type comparison.  We cannot compute the exact trip
-    /// count in these cases (since SCEV has no way of expressing them), but we
-    /// can still sometimes compute an upper bound.
-    ///
-    /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred
-    /// RHS`.
-    ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS,
-                                           const Loop *L,
-                                           ICmpInst::Predicate Pred);
-
-    /// If the loop is known to execute a constant number of times (the
-    /// condition evolves only from constants), try to evaluate a few iterations
-    /// of the loop until we get the exit condition gets a value of ExitWhen
-    /// (true or false).  If we cannot evaluate the exit count of the loop,
-    /// return CouldNotCompute.
-    const SCEV *computeExitCountExhaustively(const Loop *L,
-                                             Value *Cond,
-                                             bool ExitWhen);
-
-    /// Return the number of times an exit condition comparing the specified
-    /// value to zero will execute.  If not computable, return CouldNotCompute.
-    /// If AllowPredicates is set, this call will try to use a minimal set of
-    /// SCEV predicates in order to return an exact answer.
-    ExitLimit howFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr,
-                           bool AllowPredicates = false);
-
-    /// Return the number of times an exit condition checking the specified
-    /// value for nonzero will execute.  If not computable, return
-    /// CouldNotCompute.
-    ExitLimit howFarToNonZero(const SCEV *V, const Loop *L);
-
-    /// Return the number of times an exit condition containing the specified
-    /// less-than comparison will execute.  If not computable, return
-    /// CouldNotCompute.
-    ///
-    /// \p isSigned specifies whether the less-than is signed.
-    ///
-    /// \p ControlsExit is true when the LHS < RHS condition directly controls
-    /// the branch (loops exits only if condition is true). In this case, we can
-    /// use NoWrapFlags to skip overflow checks.
-    ///
-    /// If \p AllowPredicates is set, this call will try to use a minimal set of
-    /// SCEV predicates in order to return an exact answer.
-    ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
-                               bool isSigned, bool ControlsExit,
-                               bool AllowPredicates = false);
-
-    ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
-                                  const Loop *L, bool isSigned, bool IsSubExpr,
-                                  bool AllowPredicates = false);
-
-    /// Return a predecessor of BB (which may not be an immediate predecessor)
-    /// which has exactly one successor from which BB is reachable, or null if
-    /// no such block is found.
-    std::pair<BasicBlock *, BasicBlock *>
-    getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
-
-    /// Test whether the condition described by Pred, LHS, and RHS is true
-    /// whenever the given FoundCondValue value evaluates to true.
-    bool isImpliedCond(ICmpInst::Predicate Pred,
-                       const SCEV *LHS, const SCEV *RHS,
-                       Value *FoundCondValue,
-                       bool Inverse);
-
-    /// Test whether the condition described by Pred, LHS, and RHS is true
-    /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
-    /// true.
-    bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS,
-                       const SCEV *RHS, ICmpInst::Predicate FoundPred,
-                       const SCEV *FoundLHS, const SCEV *FoundRHS);
-
-    /// Test whether the condition described by Pred, LHS, and RHS is true
-    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
-    /// true.
-    bool isImpliedCondOperands(ICmpInst::Predicate Pred,
-                               const SCEV *LHS, const SCEV *RHS,
-                               const SCEV *FoundLHS, const SCEV *FoundRHS);
-
-    /// Test whether the condition described by Pred, LHS, and RHS is true
-    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
-    /// true.
-    bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
-                                     const SCEV *LHS, const SCEV *RHS,
-                                     const SCEV *FoundLHS,
-                                     const SCEV *FoundRHS);
-
-    /// Test whether the condition described by Pred, LHS, and RHS is true
-    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
-    /// true.  Utility function used by isImpliedCondOperands.  Tries to get
-    /// cases like "X `sgt` 0 => X - 1 `sgt` -1".
-    bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
-                                        const SCEV *LHS, const SCEV *RHS,
-                                        const SCEV *FoundLHS,
-                                        const SCEV *FoundRHS);
-
-    /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied
-    /// by a call to \c @llvm.experimental.guard in \p BB.
-    bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred,
-                           const SCEV *LHS, const SCEV *RHS);
-
-    /// Test whether the condition described by Pred, LHS, and RHS is true
-    /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
-    /// true.
-    ///
-    /// This routine tries to rule out certain kinds of integer overflow, and
-    /// then tries to reason about arithmetic properties of the predicates.
-    bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred,
-                                            const SCEV *LHS, const SCEV *RHS,
-                                            const SCEV *FoundLHS,
-                                            const SCEV *FoundRHS);
-
-    /// If we know that the specified Phi is in the header of its containing
-    /// loop, we know the loop executes a constant number of times, and the PHI
-    /// node is just a recurrence involving constants, fold it.
-    Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt& BEs,
-                                                const Loop *L);
-
-    /// Test if the given expression is known to satisfy the condition described
-    /// by Pred and the known constant ranges of LHS and RHS.
-    ///
-    bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred,
-                                           const SCEV *LHS, const SCEV *RHS);
+  /// Information about the backedge-taken count of a loop. This currently
+  /// includes an exact count and a maximum count.
+  ///
+  class BackedgeTakenInfo {
+    /// A list of computable exits and their not-taken counts.  Loops almost
+    /// never have more than one computable exit.
+    SmallVector<ExitNotTakenInfo, 1> ExitNotTaken;
 
-    /// Try to prove the condition described by "LHS Pred RHS" by ruling out
-    /// integer overflow.
-    ///
-    /// For instance, this will return true for "A s< (A + C)<nsw>" if C is
-    /// positive.
-    bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred,
-                                       const SCEV *LHS, const SCEV *RHS);
-
-    /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to
-    /// prove them individually.
-    bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS,
-                                      const SCEV *RHS);
-
-    /// Try to match the Expr as "(L + R)<Flags>".
-    bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
-                        SCEV::NoWrapFlags &Flags);
-
-    /// Return true if More == (Less + C), where C is a constant.  This is
-    /// intended to be used as a cheaper substitute for full SCEV subtraction.
-    bool computeConstantDifference(const SCEV *Less, const SCEV *More,
-                                   APInt &C);
-
-    /// Drop memoized information computed for S.
-    void forgetMemoizedResults(const SCEV *S);
-
-    /// Return an existing SCEV for V if there is one, otherwise return nullptr.
-    const SCEV *getExistingSCEV(Value *V);
-
-    /// Return false iff given SCEV contains a SCEVUnknown with NULL value-
-    /// pointer.
-    bool checkValidity(const SCEV *S) const;
-
-    /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be
-    /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}.  This is
-    /// equivalent to proving no signed (resp. unsigned) wrap in
-    /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr`
-    /// (resp. `SCEVZeroExtendExpr`).
+    /// The pointer part of \c MaxAndComplete is an expression indicating the
+    /// least maximum backedge-taken count of the loop that is known, or a
+    /// SCEVCouldNotCompute. This expression is only valid if the predicates
+    /// associated with all loop exits are true.
     ///
-    template<typename ExtendOpTy>
-    bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step,
-                                   const Loop *L);
+    /// The integer part of \c MaxAndComplete is a boolean indicating if \c
+    /// ExitNotTaken has an element for every exiting block in the loop.
+    PointerIntPair<const SCEV *, 1> MaxAndComplete;
 
-    /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation.
-    SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
+    /// True iff the backedge is taken either exactly Max or zero times.
+    bool MaxOrZero;
 
-    bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
-                                  ICmpInst::Predicate Pred, bool &Increasing);
-
-    /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
-    /// is monotonically increasing or decreasing.  In the former case set
-    /// `Increasing` to true and in the latter case set `Increasing` to false.
-    ///
-    /// A predicate is said to be monotonically increasing if may go from being
-    /// false to being true as the loop iterates, but never the other way
-    /// around.  A predicate is said to be monotonically decreasing if may go
-    /// from being true to being false as the loop iterates, but never the other
-    /// way around.
-    bool isMonotonicPredicate(const SCEVAddRecExpr *LHS,
-                              ICmpInst::Predicate Pred, bool &Increasing);
-
-    /// Return SCEV no-wrap flags that can be proven based on reasoning about
-    /// how poison produced from no-wrap flags on this value (e.g. a nuw add)
-    /// would trigger undefined behavior on overflow.
-    SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
-
-    /// Return true if the SCEV corresponding to \p I is never poison.  Proving
-    /// this is more complex than proving that just \p I is never poison, since
-    /// SCEV commons expressions across control flow, and you can have cases
-    /// like:
-    ///
-    ///   idx0 = a + b;
-    ///   ptr[idx0] = 100;
-    ///   if (<condition>) {
-    ///     idx1 = a +nsw b;
-    ///     ptr[idx1] = 200;
-    ///   }
-    ///
-    /// where the SCEV expression (+ a b) is guaranteed to not be poison (and
-    /// hence not sign-overflow) only if "<condition>" is true.  Since both
-    /// `idx0` and `idx1` will be mapped to the same SCEV expression, (+ a b),
-    /// it is not okay to annotate (+ a b) with <nsw> in the above example.
-    bool isSCEVExprNeverPoison(const Instruction *I);
-
-    /// This is like \c isSCEVExprNeverPoison but it specifically works for
-    /// instructions that will get mapped to SCEV add recurrences.  Return true
-    /// if \p I will never generate poison under the assumption that \p I is an
-    /// add recurrence on the loop \p L.
-    bool isAddRecNeverPoison(const Instruction *I, const Loop *L);
+    /// \name Helper projection functions on \c MaxAndComplete.
+    /// @{
+    bool isComplete() const { return MaxAndComplete.getInt(); }
+    const SCEV *getMax() const { return MaxAndComplete.getPointer(); }
+    /// @}
 
   public:
-    ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
-                    DominatorTree &DT, LoopInfo &LI);
-    ~ScalarEvolution();
-    ScalarEvolution(ScalarEvolution &&Arg);
-
-    LLVMContext &getContext() const { return F.getContext(); }
-
-    /// Test if values of the given type are analyzable within the SCEV
-    /// framework. This primarily includes integer types, and it can optionally
-    /// include pointer types if the ScalarEvolution class has access to
-    /// target-specific information.
-    bool isSCEVable(Type *Ty) const;
-
-    /// Return the size in bits of the specified type, for which isSCEVable must
-    /// return true.
-    uint64_t getTypeSizeInBits(Type *Ty) const;
-
-    /// Return a type with the same bitwidth as the given type and which
-    /// represents how SCEV will treat the given type, for which isSCEVable must
-    /// return true. For pointer types, this is the pointer-sized integer type.
-    Type *getEffectiveSCEVType(Type *Ty) const;
-
-    /// Return true if the SCEV is a scAddRecExpr or it contains
-    /// scAddRecExpr. The result will be cached in HasRecMap.
-    ///
-    bool containsAddRecurrence(const SCEV *S);
-
-    /// Return the Value set from which the SCEV expr is generated.
-    SetVector<Value *> *getSCEVValues(const SCEV *S);
-
-    /// Erase Value from ValueExprMap and ExprValueMap.
-    void eraseValueFromMap(Value *V);
-
-    /// Return a SCEV expression for the full generality of the specified
-    /// expression.
-    const SCEV *getSCEV(Value *V);
-
-    const SCEV *getConstant(ConstantInt *V);
-    const SCEV *getConstant(const APInt& Val);
-    const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
-    const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
-    const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty);
-    const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
-    const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
-    const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
-    const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
-      return getAddExpr(Ops, Flags);
-    }
-    const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
-      return getAddExpr(Ops, Flags);
-    }
-    const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
-    const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
-      return getMulExpr(Ops, Flags);
-    }
-    const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
-                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
-      SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
-      return getMulExpr(Ops, Flags);
-    }
-    const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
-    const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS);
-    const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step,
-                              const Loop *L, SCEV::NoWrapFlags Flags);
-    const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
-                              const Loop *L, SCEV::NoWrapFlags Flags);
-    const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands,
-                              const Loop *L, SCEV::NoWrapFlags Flags) {
-      SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end());
-      return getAddRecExpr(NewOp, L, Flags);
-    }
-    /// Returns an expression for a GEP
-    ///
-    /// \p PointeeType The type used as the basis for the pointer arithmetics
-    /// \p BaseExpr The expression for the pointer operand.
-    /// \p IndexExprs The expressions for the indices.
-    /// \p InBounds Whether the GEP is in bounds.
-    const SCEV *getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
-                           const SmallVectorImpl<const SCEV *> &IndexExprs,
-                           bool InBounds = false);
-    const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
-    const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
-    const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
-    const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
-    const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS);
-    const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS);
-    const SCEV *getUnknown(Value *V);
-    const SCEV *getCouldNotCompute();
-
-    /// Return a SCEV for the constant 0 of a specific type.
-    const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); }
-
-    /// Return a SCEV for the constant 1 of a specific type.
-    const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); }
-
-    /// Return an expression for sizeof AllocTy that is type IntTy
-    ///
-    const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
-
-    /// Return an expression for offsetof on the given field with type IntTy
-    ///
-    const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo);
+    BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {}
 
-    /// Return the SCEV object corresponding to -V.
-    ///
-    const SCEV *getNegativeSCEV(const SCEV *V,
-                                SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
+    BackedgeTakenInfo(BackedgeTakenInfo &&) = default;
+    BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default;
 
-    /// Return the SCEV object corresponding to ~V.
-    ///
-    const SCEV *getNotSCEV(const SCEV *V);
-
-    /// Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
-    const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
-                             SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
-
-    /// Return a SCEV corresponding to a conversion of the input value to the
-    /// specified type.  If the type must be extended, it is zero extended.
-    const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
-
-    /// Return a SCEV corresponding to a conversion of the input value to the
-    /// specified type.  If the type must be extended, it is sign extended.
-    const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
-
-    /// Return a SCEV corresponding to a conversion of the input value to the
-    /// specified type.  If the type must be extended, it is zero extended.  The
-    /// conversion must not be narrowing.
-    const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty);
-
-    /// Return a SCEV corresponding to a conversion of the input value to the
-    /// specified type.  If the type must be extended, it is sign extended.  The
-    /// conversion must not be narrowing.
-    const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty);
-
-    /// Return a SCEV corresponding to a conversion of the input value to the
-    /// specified type. If the type must be extended, it is extended with
-    /// unspecified bits. The conversion must not be narrowing.
-    const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty);
-
-    /// Return a SCEV corresponding to a conversion of the input value to the
-    /// specified type.  The conversion must not be widening.
-    const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty);
-
-    /// Promote the operands to the wider of the types using zero-extension, and
-    /// then perform a umax operation with them.
-    const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS,
-                                           const SCEV *RHS);
-
-    /// Promote the operands to the wider of the types using zero-extension, and
-    /// then perform a umin operation with them.
-    const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS,
-                                           const SCEV *RHS);
-
-    /// Transitively follow the chain of pointer-type operands until reaching a
-    /// SCEV that does not have a single pointer operand. This returns a
-    /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner
-    /// cases do exist.
-    const SCEV *getPointerBase(const SCEV *V);
-
-    /// Return a SCEV expression for the specified value at the specified scope
-    /// in the program.  The L value specifies a loop nest to evaluate the
-    /// expression at, where null is the top-level or a specified loop is
-    /// immediately inside of the loop.
-    ///
-    /// This method can be used to compute the exit value for a variable defined
-    /// in a loop by querying what the value will hold in the parent loop.
-    ///
-    /// In the case that a relevant loop exit value cannot be computed, the
-    /// original value V is returned.
-    const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L);
-
-    /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L).
-    const SCEV *getSCEVAtScope(Value *V, const Loop *L);
-
-    /// Test whether entry to the loop is protected by a conditional between LHS
-    /// and RHS.  This is used to help avoid max expressions in loop trip
-    /// counts, and to eliminate casts.
-    bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
-                                  const SCEV *LHS, const SCEV *RHS);
-
-    /// Test whether the backedge of the loop is protected by a conditional
-    /// between LHS and RHS.  This is used to to eliminate casts.
-    bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
-                                     const SCEV *LHS, const SCEV *RHS);
-
-    /// Returns the maximum trip count of the loop if it is a single-exit
-    /// loop and we can compute a small maximum for that loop.
-    ///
-    /// Implemented in terms of the \c getSmallConstantTripCount overload with
-    /// the single exiting block passed to it. See that routine for details.
-    unsigned getSmallConstantTripCount(Loop *L);
-
-    /// Returns the maximum trip count of this loop as a normal unsigned
-    /// value. Returns 0 if the trip count is unknown or not constant. This
-    /// "trip count" assumes that control exits via ExitingBlock. More
-    /// precisely, it is the number of times that control may reach ExitingBlock
-    /// before taking the branch. For loops with multiple exits, it may not be
-    /// the number times that the loop header executes if the loop exits
-    /// prematurely via another branch.
-    unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
-
-    /// Returns the largest constant divisor of the trip count of the
-    /// loop if it is a single-exit loop and we can compute a small maximum for
-    /// that loop.
-    ///
-    /// Implemented in terms of the \c getSmallConstantTripMultiple overload with
-    /// the single exiting block passed to it. See that routine for details.
-    unsigned getSmallConstantTripMultiple(Loop *L);
-
-    /// Returns the largest constant divisor of the trip count of this loop as a
-    /// normal unsigned value, if possible. This means that the actual trip
-    /// count is always a multiple of the returned value (don't forget the trip
-    /// count could very well be zero as well!). As explained in the comments
-    /// for getSmallConstantTripCount, this assumes that control exits the loop
-    /// via ExitingBlock.
-    unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
-
-    /// Get the expression for the number of loop iterations for which this loop
-    /// is guaranteed not to exit via ExitingBlock. Otherwise return
-    /// SCEVCouldNotCompute.
-    const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock);
-
-    /// If the specified loop has a predictable backedge-taken count, return it,
-    /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count
-    /// is the number of times the loop header will be branched to from within
-    /// the loop. This is one less than the trip count of the loop, since it
-    /// doesn't count the first iteration, when the header is branched to from
-    /// outside the loop.
-    ///
-    /// Note that it is not valid to call this method on a loop without a
-    /// loop-invariant backedge-taken count (see
-    /// hasLoopInvariantBackedgeTakenCount).
-    ///
-    const SCEV *getBackedgeTakenCount(const Loop *L);
-
-    /// Similar to getBackedgeTakenCount, except it will add a set of
-    /// SCEV predicates to Predicates that are required to be true in order for
-    /// the answer to be correct. Predicates can be checked with run-time
-    /// checks and can be used to perform loop versioning.
-    const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
-                                                SCEVUnionPredicate &Predicates);
-
-    /// Similar to getBackedgeTakenCount, except return the least SCEV value
-    /// that is known never to be less than the actual backedge taken count.
-    const SCEV *getMaxBackedgeTakenCount(const Loop *L);
-
-    /// Return true if the specified loop has an analyzable loop-invariant
-    /// backedge-taken count.
-    bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
-
-    /// This method should be called by the client when it has changed a loop in
-    /// a way that may effect ScalarEvolution's ability to compute a trip count,
-    /// or if the loop is deleted.  This call is potentially expensive for large
-    /// loop bodies.
-    void forgetLoop(const Loop *L);
-
-    /// This method should be called by the client when it has changed a value
-    /// in a way that may effect its value, or which may disconnect it from a
-    /// def-use chain linking it to a loop.
-    void forgetValue(Value *V);
-
-    /// Called when the client has changed the disposition of values in
-    /// this loop.
-    ///
-    /// We don't have a way to invalidate per-loop dispositions. Clear and
-    /// recompute is simpler.
-    void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); }
+    typedef std::pair<BasicBlock *, ExitLimit> EdgeExitInfo;
 
-    /// Determine the minimum number of zero bits that S is guaranteed to end in
-    /// (at every loop iteration).  It is, at the same time, the minimum number
-    /// of times S is divisible by 2.  For example, given {4,+,8} it returns 2.
-    /// If S is guaranteed to be 0, it returns the bitwidth of S.
-    uint32_t GetMinTrailingZeros(const SCEV *S);
+    /// Initialize BackedgeTakenInfo from a list of exact exit counts.
+    BackedgeTakenInfo(SmallVectorImpl<EdgeExitInfo> &&ExitCounts, bool Complete,
+                      const SCEV *MaxCount, bool MaxOrZero);
 
-    /// Determine the unsigned range for a particular SCEV.
-    ///
-    ConstantRange getUnsignedRange(const SCEV *S) {
-      return getRange(S, HINT_RANGE_UNSIGNED);
+    /// Test whether this BackedgeTakenInfo contains any computed information,
+    /// or whether it's all SCEVCouldNotCompute values.
+    bool hasAnyInfo() const {
+      return !ExitNotTaken.empty() || !isa<SCEVCouldNotCompute>(getMax());
     }
 
-    /// Determine the signed range for a particular SCEV.
-    ///
-    ConstantRange getSignedRange(const SCEV *S) {
-      return getRange(S, HINT_RANGE_SIGNED);
-    }
+    /// Test whether this BackedgeTakenInfo contains complete information.
+    bool hasFullInfo() const { return isComplete(); }
+
+    /// Return an expression indicating the exact backedge-taken count of the
+    /// loop if it is known or SCEVCouldNotCompute otherwise. This is the
+    /// number of times the loop header can be guaranteed to execute, minus
+    /// one.
+    ///
+    /// If the SCEV predicate associated with the answer can be different
+    /// from AlwaysTrue, we must add a (non null) Predicates argument.
+    /// The SCEV predicate associated with the answer will be added to
+    /// Predicates. A run-time check needs to be emitted for the SCEV
+    /// predicate in order for the answer to be valid.
+    ///
+    /// Note that we should always know if we need to pass a predicate
+    /// argument or not from the way the ExitCounts vector was computed.
+    /// If we allowed SCEV predicates to be generated when populating this
+    /// vector, this information can contain them and therefore a
+    /// SCEVPredicate argument should be added to getExact.
+    const SCEV *getExact(ScalarEvolution *SE,
+                         SCEVUnionPredicate *Predicates = nullptr) const;
+
+    /// Return the number of times this loop exit may fall through to the back
+    /// edge, or SCEVCouldNotCompute. The loop is guaranteed not to exit via
+    /// this block before this number of iterations, but may exit via another
+    /// block.
+    const SCEV *getExact(BasicBlock *ExitingBlock, ScalarEvolution *SE) const;
 
-    /// Test if the given expression is known to be negative.
-    ///
-    bool isKnownNegative(const SCEV *S);
+    /// Get the max backedge taken count for the loop.
+    const SCEV *getMax(ScalarEvolution *SE) const;
 
-    /// Test if the given expression is known to be positive.
-    ///
-    bool isKnownPositive(const SCEV *S);
+    /// Return true if the number of times this backedge is taken is either the
+    /// value returned by getMax or zero.
+    bool isMaxOrZero(ScalarEvolution *SE) const;
 
-    /// Test if the given expression is known to be non-negative.
-    ///
-    bool isKnownNonNegative(const SCEV *S);
+    /// Return true if any backedge taken count expressions refer to the given
+    /// subexpression.
+    bool hasOperand(const SCEV *S, ScalarEvolution *SE) const;
 
-    /// Test if the given expression is known to be non-positive.
-    ///
-    bool isKnownNonPositive(const SCEV *S);
+    /// Invalidate this result and free associated memory.
+    void clear();
+  };
 
-    /// Test if the given expression is known to be non-zero.
-    ///
-    bool isKnownNonZero(const SCEV *S);
+  /// Cache the backedge-taken count of the loops for this function as they
+  /// are computed.
+  DenseMap<const Loop *, BackedgeTakenInfo> BackedgeTakenCounts;
+
+  /// Cache the predicated backedge-taken count of the loops for this
+  /// function as they are computed.
+  DenseMap<const Loop *, BackedgeTakenInfo> PredicatedBackedgeTakenCounts;
+
+  /// This map contains entries for all of the PHI instructions that we
+  /// attempt to compute constant evolutions for.  This allows us to avoid
+  /// potentially expensive recomputation of these properties.  An instruction
+  /// maps to null if we are unable to compute its exit value.
+  DenseMap<PHINode *, Constant *> ConstantEvolutionLoopExitValue;
+
+  /// This map contains entries for all the expressions that we attempt to
+  /// compute getSCEVAtScope information for, which can be expensive in
+  /// extreme cases.
+  DenseMap<const SCEV *, SmallVector<std::pair<const Loop *, const SCEV *>, 2>>
+      ValuesAtScopes;
+
+  /// Memoized computeLoopDisposition results.
+  DenseMap<const SCEV *,
+           SmallVector<PointerIntPair<const Loop *, 2, LoopDisposition>, 2>>
+      LoopDispositions;
+
+  struct LoopProperties {
+    /// Set to true if the loop contains no instruction that can have side
+    /// effects (i.e. via throwing an exception, volatile or atomic access).
+    bool HasNoAbnormalExits;
+
+    /// Set to true if the loop contains no instruction that can abnormally exit
+    /// the loop (i.e. via throwing an exception, by terminating the thread
+    /// cleanly or by infinite looping in a called function).  Strictly
+    /// speaking, the last one is not leaving the loop, but is identical to
+    /// leaving the loop for reasoning about undefined behavior.
+    bool HasNoSideEffects;
+  };
 
-    /// Test if the given expression is known to satisfy the condition described
-    /// by Pred, LHS, and RHS.
-    ///
-    bool isKnownPredicate(ICmpInst::Predicate Pred,
-                          const SCEV *LHS, const SCEV *RHS);
-
-    /// Return true if the result of the predicate LHS `Pred` RHS is loop
-    /// invariant with respect to L.  Set InvariantPred, InvariantLHS and
-    /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
-    /// loop invariant form of LHS `Pred` RHS.
-    bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
-                                  const SCEV *RHS, const Loop *L,
-                                  ICmpInst::Predicate &InvariantPred,
-                                  const SCEV *&InvariantLHS,
-                                  const SCEV *&InvariantRHS);
-
-    /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
-    /// iff any changes were made. If the operands are provably equal or
-    /// unequal, LHS and RHS are set to the same value and Pred is set to either
-    /// ICMP_EQ or ICMP_NE.
-    ///
-    bool SimplifyICmpOperands(ICmpInst::Predicate &Pred,
-                              const SCEV *&LHS,
-                              const SCEV *&RHS,
-                              unsigned Depth = 0);
-
-    /// Return the "disposition" of the given SCEV with respect to the given
-    /// loop.
-    LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L);
-
-    /// Return true if the value of the given SCEV is unchanging in the
-    /// specified loop.
-    bool isLoopInvariant(const SCEV *S, const Loop *L);
-
-    /// Return true if the given SCEV changes value in a known way in the
-    /// specified loop.  This property being true implies that the value is
-    /// variant in the loop AND that we can emit an expression to compute the
-    /// value of the expression at any particular loop iteration.
-    bool hasComputableLoopEvolution(const SCEV *S, const Loop *L);
-
-    /// Return the "disposition" of the given SCEV with respect to the given
-    /// block.
-    BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB);
+  /// Cache for \c getLoopProperties.
+  DenseMap<const Loop *, LoopProperties> LoopPropertiesCache;
 
-    /// Return true if elements that makes up the given SCEV dominate the
-    /// specified basic block.
-    bool dominates(const SCEV *S, const BasicBlock *BB);
+  /// Return a \c LoopProperties instance for \p L, creating one if necessary.
+  LoopProperties getLoopProperties(const Loop *L);
 
-    /// Return true if elements that makes up the given SCEV properly dominate
-    /// the specified basic block.
-    bool properlyDominates(const SCEV *S, const BasicBlock *BB);
+  bool loopHasNoSideEffects(const Loop *L) {
+    return getLoopProperties(L).HasNoSideEffects;
+  }
 
-    /// Test whether the given SCEV has Op as a direct or indirect operand.
-    bool hasOperand(const SCEV *S, const SCEV *Op) const;
+  bool loopHasNoAbnormalExits(const Loop *L) {
+    return getLoopProperties(L).HasNoAbnormalExits;
+  }
 
-    /// Return the size of an element read or written by Inst.
-    const SCEV *getElementSize(Instruction *Inst);
+  /// Compute a LoopDisposition value.
+  LoopDisposition computeLoopDisposition(const SCEV *S, const Loop *L);
 
-    /// Compute the array dimensions Sizes from the set of Terms extracted from
-    /// the memory access function of this SCEVAddRecExpr (second step of
-    /// delinearization).
-    void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
-                             SmallVectorImpl<const SCEV *> &Sizes,
-                             const SCEV *ElementSize) const;
+  /// Memoized computeBlockDisposition results.
+  DenseMap<
+      const SCEV *,
+      SmallVector<PointerIntPair<const BasicBlock *, 2, BlockDisposition>, 2>>
+      BlockDispositions;
 
-    void print(raw_ostream &OS) const;
-    void verify() const;
+  /// Compute a BlockDisposition value.
+  BlockDisposition computeBlockDisposition(const SCEV *S, const BasicBlock *BB);
 
-    /// Collect parametric terms occurring in step expressions (first step of
-    /// delinearization).
-    void collectParametricTerms(const SCEV *Expr,
-                                SmallVectorImpl<const SCEV *> &Terms);
+  /// Memoized results from getRange
+  DenseMap<const SCEV *, ConstantRange> UnsignedRanges;
 
+  /// Memoized results from getRange
+  DenseMap<const SCEV *, ConstantRange> SignedRanges;
 
+  /// Used to parameterize getRange
+  enum RangeSignHint { HINT_RANGE_UNSIGNED, HINT_RANGE_SIGNED };
 
-    /// Return in Subscripts the access functions for each dimension in Sizes
-    /// (third step of delinearization).
-    void computeAccessFunctions(const SCEV *Expr,
-                                SmallVectorImpl<const SCEV *> &Subscripts,
-                                SmallVectorImpl<const SCEV *> &Sizes);
+  /// Set the memoized range for the given SCEV.
+  const ConstantRange &setRange(const SCEV *S, RangeSignHint Hint,
+                                const ConstantRange &CR) {
+    DenseMap<const SCEV *, ConstantRange> &Cache =
+        Hint == HINT_RANGE_UNSIGNED ? UnsignedRanges : SignedRanges;
 
-    /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
-    /// subscripts and sizes of an array access.
-    ///
-    /// The delinearization is a 3 step process: the first two steps compute the
-    /// sizes of each subscript and the third step computes the access functions
-    /// for the delinearized array:
-    ///
-    /// 1. Find the terms in the step functions
-    /// 2. Compute the array size
-    /// 3. Compute the access function: divide the SCEV by the array size
-    ///    starting with the innermost dimensions found in step 2. The Quotient
-    ///    is the SCEV to be divided in the next step of the recursion. The
-    ///    Remainder is the subscript of the innermost dimension. Loop over all
-    ///    array dimensions computed in step 2.
-    ///
-    /// To compute a uniform array size for several memory accesses to the same
-    /// object, one can collect in step 1 all the step terms for all the memory
-    /// accesses, and compute in step 2 a unique array shape. This guarantees
-    /// that the array shape will be the same across all memory accesses.
-    ///
-    /// FIXME: We could derive the result of steps 1 and 2 from a description of
-    /// the array shape given in metadata.
-    ///
-    /// Example:
-    ///
-    /// A[][n][m]
-    ///
-    /// for i
-    ///   for j
-    ///     for k
-    ///       A[j+k][2i][5i] =
-    ///
-    /// The initial SCEV:
-    ///
-    /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
-    ///
-    /// 1. Find the different terms in the step functions:
-    /// -> [2*m, 5, n*m, n*m]
-    ///
-    /// 2. Compute the array size: sort and unique them
-    /// -> [n*m, 2*m, 5]
-    /// find the GCD of all the terms = 1
-    /// divide by the GCD and erase constant terms
-    /// -> [n*m, 2*m]
-    /// GCD = m
-    /// divide by GCD -> [n, 2]
-    /// remove constant terms
-    /// -> [n]
-    /// size of the array is A[unknown][n][m]
-    ///
-    /// 3. Compute the access function
-    /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
-    /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
-    /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
-    /// The remainder is the subscript of the innermost array dimension: [5i].
-    ///
-    /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
-    /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
-    /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
-    /// The Remainder is the subscript of the next array dimension: [2i].
-    ///
-    /// The subscript of the outermost dimension is the Quotient: [j+k].
-    ///
-    /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
-    void delinearize(const SCEV *Expr,
-                     SmallVectorImpl<const SCEV *> &Subscripts,
-                     SmallVectorImpl<const SCEV *> &Sizes,
-                     const SCEV *ElementSize);
-
-    /// Return the DataLayout associated with the module this SCEV instance is
-    /// operating on.
-    const DataLayout &getDataLayout() const {
-      return F.getParent()->getDataLayout();
-    }
+    auto Pair = Cache.insert({S, CR});
+    if (!Pair.second)
+      Pair.first->second = CR;
+    return Pair.first->second;
+  }
 
-    const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS,
-                                           const SCEVConstant *RHS);
-
-    const SCEVPredicate *
-    getWrapPredicate(const SCEVAddRecExpr *AR,
-                     SCEVWrapPredicate::IncrementWrapFlags AddedFlags);
-
-    /// Re-writes the SCEV according to the Predicates in \p A.
-    const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L,
-                                      SCEVUnionPredicate &A);
-    /// Tries to convert the \p S expression to an AddRec expression,
-    /// adding additional predicates to \p Preds as required.
-    const SCEVAddRecExpr *
-    convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
-                                      SCEVUnionPredicate &Preds);
-
-  private:
-    /// Compute the backedge taken count knowing the interval difference, the
-    /// stride and presence of the equality in the comparison.
-    const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
-                               bool Equality);
-
-    /// Verify if an linear IV with positive stride can overflow when in a
-    /// less-than comparison, knowing the invariant term of the comparison,
-    /// the stride and the knowledge of NSW/NUW flags on the recurrence.
-    bool doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
-                            bool IsSigned, bool NoWrap);
-
-    /// Verify if an linear IV with negative stride can overflow when in a
-    /// greater-than comparison, knowing the invariant term of the comparison,
-    /// the stride and the knowledge of NSW/NUW flags on the recurrence.
-    bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride,
-                            bool IsSigned, bool NoWrap);
-
-  private:
-    FoldingSet<SCEV> UniqueSCEVs;
-    FoldingSet<SCEVPredicate> UniquePreds;
-    BumpPtrAllocator SCEVAllocator;
-
-    /// The head of a linked list of all SCEVUnknown values that have been
-    /// allocated. This is used by releaseMemory to locate them all and call
-    /// their destructors.
-    SCEVUnknown *FirstUnknown;
-  };
+  /// Determine the range for a particular SCEV.
+  ConstantRange getRange(const SCEV *S, RangeSignHint Hint);
 
-  /// Analysis pass that exposes the \c ScalarEvolution for a function.
-  class ScalarEvolutionAnalysis
-      : public AnalysisInfoMixin<ScalarEvolutionAnalysis> {
-    friend AnalysisInfoMixin<ScalarEvolutionAnalysis>;
-    static char PassID;
+  /// Determines the range for the affine SCEVAddRecExpr {\p Start,+,\p Stop}.
+  /// Helper for \c getRange.
+  ConstantRange getRangeForAffineAR(const SCEV *Start, const SCEV *Stop,
+                                    const SCEV *MaxBECount, unsigned BitWidth);
 
-  public:
-    typedef ScalarEvolution Result;
+  /// Try to compute a range for the affine SCEVAddRecExpr {\p Start,+,\p
+  /// Stop} by "factoring out" a ternary expression from the add recurrence.
+  /// Helper called by \c getRange.
+  ConstantRange getRangeViaFactoring(const SCEV *Start, const SCEV *Stop,
+                                     const SCEV *MaxBECount, unsigned BitWidth);
 
-    ScalarEvolution run(Function &F, AnalysisManager<Function> &AM);
-  };
+  /// We know that there is no SCEV for the specified value.  Analyze the
+  /// expression.
+  const SCEV *createSCEV(Value *V);
 
-  /// Printer pass for the \c ScalarEvolutionAnalysis results.
-  class ScalarEvolutionPrinterPass
-      : public PassInfoMixin<ScalarEvolutionPrinterPass> {
-    raw_ostream &OS;
+  /// Provide the special handling we need to analyze PHI SCEVs.
+  const SCEV *createNodeForPHI(PHINode *PN);
 
-  public:
-    explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {}
-    PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
-  };
+  /// Helper function called from createNodeForPHI.
+  const SCEV *createAddRecFromPHI(PHINode *PN);
 
-  class ScalarEvolutionWrapperPass : public FunctionPass {
-    std::unique_ptr<ScalarEvolution> SE;
+  /// Helper function called from createNodeForPHI.
+  const SCEV *createNodeFromSelectLikePHI(PHINode *PN);
 
-  public:
-    static char ID;
-
-    ScalarEvolutionWrapperPass();
+  /// Provide special handling for a select-like instruction (currently this
+  /// is either a select instruction or a phi node).  \p I is the instruction
+  /// being processed, and it is assumed equivalent to "Cond ? TrueVal :
+  /// FalseVal".
+  const SCEV *createNodeForSelectOrPHI(Instruction *I, Value *Cond,
+                                       Value *TrueVal, Value *FalseVal);
 
-    ScalarEvolution &getSE() { return *SE; }
-    const ScalarEvolution &getSE() const { return *SE; }
+  /// Provide the special handling we need to analyze GEP SCEVs.
+  const SCEV *createNodeForGEP(GEPOperator *GEP);
 
-    bool runOnFunction(Function &F) override;
-    void releaseMemory() override;
-    void getAnalysisUsage(AnalysisUsage &AU) const override;
-    void print(raw_ostream &OS, const Module * = nullptr) const override;
-    void verifyAnalysis() const override;
-  };
+  /// Implementation code for getSCEVAtScope; called at most once for each
+  /// SCEV+Loop pair.
+  ///
+  const SCEV *computeSCEVAtScope(const SCEV *S, const Loop *L);
+
+  /// This looks up computed SCEV values for all instructions that depend on
+  /// the given instruction and removes them from the ValueExprMap map if they
+  /// reference SymName. This is used during PHI resolution.
+  void forgetSymbolicName(Instruction *I, const SCEV *SymName);
+
+  /// Return the BackedgeTakenInfo for the given loop, lazily computing new
+  /// values if the loop hasn't been analyzed yet. The returned result is
+  /// guaranteed not to be predicated.
+  const BackedgeTakenInfo &getBackedgeTakenInfo(const Loop *L);
+
+  /// Similar to getBackedgeTakenInfo, but will add predicates as required
+  /// with the purpose of returning complete information.
+  const BackedgeTakenInfo &getPredicatedBackedgeTakenInfo(const Loop *L);
+
+  /// Compute the number of times the specified loop will iterate.
+  /// If AllowPredicates is set, we will create new SCEV predicates as
+  /// necessary in order to return an exact answer.
+  BackedgeTakenInfo computeBackedgeTakenCount(const Loop *L,
+                                              bool AllowPredicates = false);
+
+  /// Compute the number of times the backedge of the specified loop will
+  /// execute if it exits via the specified block. If AllowPredicates is set,
+  /// this call will try to use a minimal set of SCEV predicates in order to
+  /// return an exact answer.
+  ExitLimit computeExitLimit(const Loop *L, BasicBlock *ExitingBlock,
+                             bool AllowPredicates = false);
+
+  /// Compute the number of times the backedge of the specified loop will
+  /// execute if its exit condition were a conditional branch of ExitCond,
+  /// TBB, and FBB.
+  ///
+  /// \p ControlsExit is true if ExitCond directly controls the exit
+  /// branch. In this case, we can assume that the loop exits only if the
+  /// condition is true and can infer that failing to meet the condition prior
+  /// to integer wraparound results in undefined behavior.
+  ///
+  /// If \p AllowPredicates is set, this call will try to use a minimal set of
+  /// SCEV predicates in order to return an exact answer.
+  ExitLimit computeExitLimitFromCond(const Loop *L, Value *ExitCond,
+                                     BasicBlock *TBB, BasicBlock *FBB,
+                                     bool ControlsExit,
+                                     bool AllowPredicates = false);
+
+  /// Compute the number of times the backedge of the specified loop will
+  /// execute if its exit condition were a conditional branch of the ICmpInst
+  /// ExitCond, TBB, and FBB. If AllowPredicates is set, this call will try
+  /// to use a minimal set of SCEV predicates in order to return an exact
+  /// answer.
+  ExitLimit computeExitLimitFromICmp(const Loop *L, ICmpInst *ExitCond,
+                                     BasicBlock *TBB, BasicBlock *FBB,
+                                     bool IsSubExpr,
+                                     bool AllowPredicates = false);
+
+  /// Compute the number of times the backedge of the specified loop will
+  /// execute if its exit condition were a switch with a single exiting case
+  /// to ExitingBB.
+  ExitLimit computeExitLimitFromSingleExitSwitch(const Loop *L,
+                                                 SwitchInst *Switch,
+                                                 BasicBlock *ExitingBB,
+                                                 bool IsSubExpr);
+
+  /// Given an exit condition of 'icmp op load X, cst', try to see if we can
+  /// compute the backedge-taken count.
+  ExitLimit computeLoadConstantCompareExitLimit(LoadInst *LI, Constant *RHS,
+                                                const Loop *L,
+                                                ICmpInst::Predicate p);
+
+  /// Compute the exit limit of a loop that is controlled by a
+  /// "(IV >> 1) != 0" type comparison.  We cannot compute the exact trip
+  /// count in these cases (since SCEV has no way of expressing them), but we
+  /// can still sometimes compute an upper bound.
+  ///
+  /// Return an ExitLimit for a loop whose backedge is guarded by `LHS Pred
+  /// RHS`.
+  ExitLimit computeShiftCompareExitLimit(Value *LHS, Value *RHS, const Loop *L,
+                                         ICmpInst::Predicate Pred);
+
+  /// If the loop is known to execute a constant number of times (the
+  /// condition evolves only from constants), try to evaluate a few iterations
+  /// of the loop until we get the exit condition gets a value of ExitWhen
+  /// (true or false).  If we cannot evaluate the exit count of the loop,
+  /// return CouldNotCompute.
+  const SCEV *computeExitCountExhaustively(const Loop *L, Value *Cond,
+                                           bool ExitWhen);
+
+  /// Return the number of times an exit condition comparing the specified
+  /// value to zero will execute.  If not computable, return CouldNotCompute.
+  /// If AllowPredicates is set, this call will try to use a minimal set of
+  /// SCEV predicates in order to return an exact answer.
+  ExitLimit howFarToZero(const SCEV *V, const Loop *L, bool IsSubExpr,
+                         bool AllowPredicates = false);
+
+  /// Return the number of times an exit condition checking the specified
+  /// value for nonzero will execute.  If not computable, return
+  /// CouldNotCompute.
+  ExitLimit howFarToNonZero(const SCEV *V, const Loop *L);
+
+  /// Return the number of times an exit condition containing the specified
+  /// less-than comparison will execute.  If not computable, return
+  /// CouldNotCompute.
+  ///
+  /// \p isSigned specifies whether the less-than is signed.
+  ///
+  /// \p ControlsExit is true when the LHS < RHS condition directly controls
+  /// the branch (loops exits only if condition is true). In this case, we can
+  /// use NoWrapFlags to skip overflow checks.
+  ///
+  /// If \p AllowPredicates is set, this call will try to use a minimal set of
+  /// SCEV predicates in order to return an exact answer.
+  ExitLimit howManyLessThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
+                             bool isSigned, bool ControlsExit,
+                             bool AllowPredicates = false);
+
+  ExitLimit howManyGreaterThans(const SCEV *LHS, const SCEV *RHS, const Loop *L,
+                                bool isSigned, bool IsSubExpr,
+                                bool AllowPredicates = false);
+
+  /// Return a predecessor of BB (which may not be an immediate predecessor)
+  /// which has exactly one successor from which BB is reachable, or null if
+  /// no such block is found.
+  std::pair<BasicBlock *, BasicBlock *>
+  getPredecessorWithUniqueSuccessorForBB(BasicBlock *BB);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the given FoundCondValue value evaluates to true.
+  bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+                     Value *FoundCondValue, bool Inverse);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by FoundPred, FoundLHS, FoundRHS is
+  /// true.
+  bool isImpliedCond(ICmpInst::Predicate Pred, const SCEV *LHS, const SCEV *RHS,
+                     ICmpInst::Predicate FoundPred, const SCEV *FoundLHS,
+                     const SCEV *FoundRHS);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true.
+  bool isImpliedCondOperands(ICmpInst::Predicate Pred, const SCEV *LHS,
+                             const SCEV *RHS, const SCEV *FoundLHS,
+                             const SCEV *FoundRHS);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true.
+  bool isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                   const SCEV *RHS, const SCEV *FoundLHS,
+                                   const SCEV *FoundRHS);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true.  Utility function used by isImpliedCondOperands.  Tries to get
+  /// cases like "X `sgt` 0 => X - 1 `sgt` -1".
+  bool isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                      const SCEV *RHS, const SCEV *FoundLHS,
+                                      const SCEV *FoundRHS);
+
+  /// Return true if the condition denoted by \p LHS \p Pred \p RHS is implied
+  /// by a call to \c @llvm.experimental.guard in \p BB.
+  bool isImpliedViaGuard(BasicBlock *BB, ICmpInst::Predicate Pred,
+                         const SCEV *LHS, const SCEV *RHS);
+
+  /// Test whether the condition described by Pred, LHS, and RHS is true
+  /// whenever the condition described by Pred, FoundLHS, and FoundRHS is
+  /// true.
+  ///
+  /// This routine tries to rule out certain kinds of integer overflow, and
+  /// then tries to reason about arithmetic properties of the predicates.
+  bool isImpliedCondOperandsViaNoOverflow(ICmpInst::Predicate Pred,
+                                          const SCEV *LHS, const SCEV *RHS,
+                                          const SCEV *FoundLHS,
+                                          const SCEV *FoundRHS);
+
+  /// If we know that the specified Phi is in the header of its containing
+  /// loop, we know the loop executes a constant number of times, and the PHI
+  /// node is just a recurrence involving constants, fold it.
+  Constant *getConstantEvolutionLoopExitValue(PHINode *PN, const APInt &BEs,
+                                              const Loop *L);
+
+  /// Test if the given expression is known to satisfy the condition described
+  /// by Pred and the known constant ranges of LHS and RHS.
+  ///
+  bool isKnownPredicateViaConstantRanges(ICmpInst::Predicate Pred,
+                                         const SCEV *LHS, const SCEV *RHS);
 
-  /// An interface layer with SCEV used to manage how we see SCEV expressions
-  /// for values in the context of existing predicates. We can add new
-  /// predicates, but we cannot remove them.
-  ///
-  /// This layer has multiple purposes:
-  ///   - provides a simple interface for SCEV versioning.
-  ///   - guarantees that the order of transformations applied on a SCEV
-  ///     expression for a single Value is consistent across two different
-  ///     getSCEV calls. This means that, for example, once we've obtained
-  ///     an AddRec expression for a certain value through expression
-  ///     rewriting, we will continue to get an AddRec expression for that
-  ///     Value.
-  ///   - lowers the number of expression rewrites.
-  class PredicatedScalarEvolution {
-  public:
-    PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L);
-    const SCEVUnionPredicate &getUnionPredicate() const;
+  /// Try to prove the condition described by "LHS Pred RHS" by ruling out
+  /// integer overflow.
+  ///
+  /// For instance, this will return true for "A s< (A + C)<nsw>" if C is
+  /// positive.
+  bool isKnownPredicateViaNoOverflow(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                     const SCEV *RHS);
+
+  /// Try to split Pred LHS RHS into logical conjunctions (and's) and try to
+  /// prove them individually.
+  bool isKnownPredicateViaSplitting(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                    const SCEV *RHS);
+
+  /// Try to match the Expr as "(L + R)<Flags>".
+  bool splitBinaryAdd(const SCEV *Expr, const SCEV *&L, const SCEV *&R,
+                      SCEV::NoWrapFlags &Flags);
+
+  /// Compute \p LHS - \p RHS and returns the result as an APInt if it is a
+  /// constant, and None if it isn't.
+  ///
+  /// This is intended to be a cheaper version of getMinusSCEV.  We can be
+  /// frugal here since we just bail out of actually constructing and
+  /// canonicalizing an expression in the cases where the result isn't going
+  /// to be a constant.
+  Optional<APInt> computeConstantDifference(const SCEV *LHS, const SCEV *RHS);
+
+  /// Drop memoized information computed for S.
+  void forgetMemoizedResults(const SCEV *S);
+
+  /// Return an existing SCEV for V if there is one, otherwise return nullptr.
+  const SCEV *getExistingSCEV(Value *V);
+
+  /// Return false iff given SCEV contains a SCEVUnknown with NULL value-
+  /// pointer.
+  bool checkValidity(const SCEV *S) const;
+
+  /// Return true if `ExtendOpTy`({`Start`,+,`Step`}) can be proved to be
+  /// equal to {`ExtendOpTy`(`Start`),+,`ExtendOpTy`(`Step`)}.  This is
+  /// equivalent to proving no signed (resp. unsigned) wrap in
+  /// {`Start`,+,`Step`} if `ExtendOpTy` is `SCEVSignExtendExpr`
+  /// (resp. `SCEVZeroExtendExpr`).
+  ///
+  template <typename ExtendOpTy>
+  bool proveNoWrapByVaryingStart(const SCEV *Start, const SCEV *Step,
+                                 const Loop *L);
 
-    /// Returns the SCEV expression of V, in the context of the current SCEV
-    /// predicate.  The order of transformations applied on the expression of V
-    /// returned by ScalarEvolution is guaranteed to be preserved, even when
-    /// adding new predicates.
-    const SCEV *getSCEV(Value *V);
+  /// Try to prove NSW or NUW on \p AR relying on ConstantRange manipulation.
+  SCEV::NoWrapFlags proveNoWrapViaConstantRanges(const SCEVAddRecExpr *AR);
 
-    /// Get the (predicated) backedge count for the analyzed loop.
-    const SCEV *getBackedgeTakenCount();
+  bool isMonotonicPredicateImpl(const SCEVAddRecExpr *LHS,
+                                ICmpInst::Predicate Pred, bool &Increasing);
 
-    /// Adds a new predicate.
-    void addPredicate(const SCEVPredicate &Pred);
+  /// Return true if, for all loop invariant X, the predicate "LHS `Pred` X"
+  /// is monotonically increasing or decreasing.  In the former case set
+  /// `Increasing` to true and in the latter case set `Increasing` to false.
+  ///
+  /// A predicate is said to be monotonically increasing if may go from being
+  /// false to being true as the loop iterates, but never the other way
+  /// around.  A predicate is said to be monotonically decreasing if may go
+  /// from being true to being false as the loop iterates, but never the other
+  /// way around.
+  bool isMonotonicPredicate(const SCEVAddRecExpr *LHS, ICmpInst::Predicate Pred,
+                            bool &Increasing);
+
+  /// Return SCEV no-wrap flags that can be proven based on reasoning about
+  /// how poison produced from no-wrap flags on this value (e.g. a nuw add)
+  /// would trigger undefined behavior on overflow.
+  SCEV::NoWrapFlags getNoWrapFlagsFromUB(const Value *V);
+
+  /// Return true if the SCEV corresponding to \p I is never poison.  Proving
+  /// this is more complex than proving that just \p I is never poison, since
+  /// SCEV commons expressions across control flow, and you can have cases
+  /// like:
+  ///
+  ///   idx0 = a + b;
+  ///   ptr[idx0] = 100;
+  ///   if (<condition>) {
+  ///     idx1 = a +nsw b;
+  ///     ptr[idx1] = 200;
+  ///   }
+  ///
+  /// where the SCEV expression (+ a b) is guaranteed to not be poison (and
+  /// hence not sign-overflow) only if "<condition>" is true.  Since both
+  /// `idx0` and `idx1` will be mapped to the same SCEV expression, (+ a b),
+  /// it is not okay to annotate (+ a b) with <nsw> in the above example.
+  bool isSCEVExprNeverPoison(const Instruction *I);
+
+  /// This is like \c isSCEVExprNeverPoison but it specifically works for
+  /// instructions that will get mapped to SCEV add recurrences.  Return true
+  /// if \p I will never generate poison under the assumption that \p I is an
+  /// add recurrence on the loop \p L.
+  bool isAddRecNeverPoison(const Instruction *I, const Loop *L);
+
+public:
+  ScalarEvolution(Function &F, TargetLibraryInfo &TLI, AssumptionCache &AC,
+                  DominatorTree &DT, LoopInfo &LI);
+  ~ScalarEvolution();
+  ScalarEvolution(ScalarEvolution &&Arg);
+
+  LLVMContext &getContext() const { return F.getContext(); }
+
+  /// Test if values of the given type are analyzable within the SCEV
+  /// framework. This primarily includes integer types, and it can optionally
+  /// include pointer types if the ScalarEvolution class has access to
+  /// target-specific information.
+  bool isSCEVable(Type *Ty) const;
+
+  /// Return the size in bits of the specified type, for which isSCEVable must
+  /// return true.
+  uint64_t getTypeSizeInBits(Type *Ty) const;
+
+  /// Return a type with the same bitwidth as the given type and which
+  /// represents how SCEV will treat the given type, for which isSCEVable must
+  /// return true. For pointer types, this is the pointer-sized integer type.
+  Type *getEffectiveSCEVType(Type *Ty) const;
+
+  /// Return true if the SCEV is a scAddRecExpr or it contains
+  /// scAddRecExpr. The result will be cached in HasRecMap.
+  ///
+  bool containsAddRecurrence(const SCEV *S);
+
+  /// Return the Value set from which the SCEV expr is generated.
+  SetVector<ValueOffsetPair> *getSCEVValues(const SCEV *S);
+
+  /// Erase Value from ValueExprMap and ExprValueMap.
+  void eraseValueFromMap(Value *V);
+
+  /// Return a SCEV expression for the full generality of the specified
+  /// expression.
+  const SCEV *getSCEV(Value *V);
+
+  const SCEV *getConstant(ConstantInt *V);
+  const SCEV *getConstant(const APInt &Val);
+  const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
+  const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
+  const SCEV *getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
+  const SCEV *getAddExpr(const SCEV *LHS, const SCEV *RHS,
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+    SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+    return getAddExpr(Ops, Flags);
+  }
+  const SCEV *getAddExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+    SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
+    return getAddExpr(Ops, Flags);
+  }
+  const SCEV *getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
+  const SCEV *getMulExpr(const SCEV *LHS, const SCEV *RHS,
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+    SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+    return getMulExpr(Ops, Flags);
+  }
+  const SCEV *getMulExpr(const SCEV *Op0, const SCEV *Op1, const SCEV *Op2,
+                         SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap) {
+    SmallVector<const SCEV *, 3> Ops = {Op0, Op1, Op2};
+    return getMulExpr(Ops, Flags);
+  }
+  const SCEV *getUDivExpr(const SCEV *LHS, const SCEV *RHS);
+  const SCEV *getUDivExactExpr(const SCEV *LHS, const SCEV *RHS);
+  const SCEV *getAddRecExpr(const SCEV *Start, const SCEV *Step, const Loop *L,
+                            SCEV::NoWrapFlags Flags);
+  const SCEV *getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
+                            const Loop *L, SCEV::NoWrapFlags Flags);
+  const SCEV *getAddRecExpr(const SmallVectorImpl<const SCEV *> &Operands,
+                            const Loop *L, SCEV::NoWrapFlags Flags) {
+    SmallVector<const SCEV *, 4> NewOp(Operands.begin(), Operands.end());
+    return getAddRecExpr(NewOp, L, Flags);
+  }
+  /// Returns an expression for a GEP
+  ///
+  /// \p GEP The GEP. The indices contained in the GEP itself are ignored,
+  /// instead we use IndexExprs.
+  /// \p IndexExprs The expressions for the indices.
+  const SCEV *getGEPExpr(GEPOperator *GEP,
+                         const SmallVectorImpl<const SCEV *> &IndexExprs);
+  const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
+  const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+  const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
+  const SCEV *getUMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+  const SCEV *getSMinExpr(const SCEV *LHS, const SCEV *RHS);
+  const SCEV *getUMinExpr(const SCEV *LHS, const SCEV *RHS);
+  const SCEV *getUnknown(Value *V);
+  const SCEV *getCouldNotCompute();
+
+  /// Return a SCEV for the constant 0 of a specific type.
+  const SCEV *getZero(Type *Ty) { return getConstant(Ty, 0); }
+
+  /// Return a SCEV for the constant 1 of a specific type.
+  const SCEV *getOne(Type *Ty) { return getConstant(Ty, 1); }
+
+  /// Return an expression for sizeof AllocTy that is type IntTy
+  ///
+  const SCEV *getSizeOfExpr(Type *IntTy, Type *AllocTy);
 
-    /// Attempts to produce an AddRecExpr for V by adding additional SCEV
-    /// predicates. If we can't transform the expression into an AddRecExpr we
-    /// return nullptr and not add additional SCEV predicates to the current
-    /// context.
-    const SCEVAddRecExpr *getAsAddRec(Value *V);
+  /// Return an expression for offsetof on the given field with type IntTy
+  ///
+  const SCEV *getOffsetOfExpr(Type *IntTy, StructType *STy, unsigned FieldNo);
 
-    /// Proves that V doesn't overflow by adding SCEV predicate.
-    void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags);
+  /// Return the SCEV object corresponding to -V.
+  ///
+  const SCEV *getNegativeSCEV(const SCEV *V,
+                              SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
 
-    /// Returns true if we've proved that V doesn't wrap by means of a SCEV
-    /// predicate.
-    bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags);
+  /// Return the SCEV object corresponding to ~V.
+  ///
+  const SCEV *getNotSCEV(const SCEV *V);
 
-    /// Returns the ScalarEvolution analysis used.
-    ScalarEvolution *getSE() const { return &SE; }
+  /// Return LHS-RHS.  Minus is represented in SCEV as A+B*-1.
+  const SCEV *getMinusSCEV(const SCEV *LHS, const SCEV *RHS,
+                           SCEV::NoWrapFlags Flags = SCEV::FlagAnyWrap);
 
-    /// We need to explicitly define the copy constructor because of FlagsMap.
-    PredicatedScalarEvolution(const PredicatedScalarEvolution&);
+  /// Return a SCEV corresponding to a conversion of the input value to the
+  /// specified type.  If the type must be extended, it is zero extended.
+  const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
+
+  /// Return a SCEV corresponding to a conversion of the input value to the
+  /// specified type.  If the type must be extended, it is sign extended.
+  const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
+
+  /// Return a SCEV corresponding to a conversion of the input value to the
+  /// specified type.  If the type must be extended, it is zero extended.  The
+  /// conversion must not be narrowing.
+  const SCEV *getNoopOrZeroExtend(const SCEV *V, Type *Ty);
+
+  /// Return a SCEV corresponding to a conversion of the input value to the
+  /// specified type.  If the type must be extended, it is sign extended.  The
+  /// conversion must not be narrowing.
+  const SCEV *getNoopOrSignExtend(const SCEV *V, Type *Ty);
+
+  /// Return a SCEV corresponding to a conversion of the input value to the
+  /// specified type. If the type must be extended, it is extended with
+  /// unspecified bits. The conversion must not be narrowing.
+  const SCEV *getNoopOrAnyExtend(const SCEV *V, Type *Ty);
+
+  /// Return a SCEV corresponding to a conversion of the input value to the
+  /// specified type.  The conversion must not be widening.
+  const SCEV *getTruncateOrNoop(const SCEV *V, Type *Ty);
+
+  /// Promote the operands to the wider of the types using zero-extension, and
+  /// then perform a umax operation with them.
+  const SCEV *getUMaxFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS);
+
+  /// Promote the operands to the wider of the types using zero-extension, and
+  /// then perform a umin operation with them.
+  const SCEV *getUMinFromMismatchedTypes(const SCEV *LHS, const SCEV *RHS);
+
+  /// Transitively follow the chain of pointer-type operands until reaching a
+  /// SCEV that does not have a single pointer operand. This returns a
+  /// SCEVUnknown pointer for well-formed pointer-type expressions, but corner
+  /// cases do exist.
+  const SCEV *getPointerBase(const SCEV *V);
+
+  /// Return a SCEV expression for the specified value at the specified scope
+  /// in the program.  The L value specifies a loop nest to evaluate the
+  /// expression at, where null is the top-level or a specified loop is
+  /// immediately inside of the loop.
+  ///
+  /// This method can be used to compute the exit value for a variable defined
+  /// in a loop by querying what the value will hold in the parent loop.
+  ///
+  /// In the case that a relevant loop exit value cannot be computed, the
+  /// original value V is returned.
+  const SCEV *getSCEVAtScope(const SCEV *S, const Loop *L);
+
+  /// This is a convenience function which does getSCEVAtScope(getSCEV(V), L).
+  const SCEV *getSCEVAtScope(Value *V, const Loop *L);
+
+  /// Test whether entry to the loop is protected by a conditional between LHS
+  /// and RHS.  This is used to help avoid max expressions in loop trip
+  /// counts, and to eliminate casts.
+  bool isLoopEntryGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                                const SCEV *LHS, const SCEV *RHS);
+
+  /// Test whether the backedge of the loop is protected by a conditional
+  /// between LHS and RHS.  This is used to to eliminate casts.
+  bool isLoopBackedgeGuardedByCond(const Loop *L, ICmpInst::Predicate Pred,
+                                   const SCEV *LHS, const SCEV *RHS);
+
+  /// Returns the maximum trip count of the loop if it is a single-exit
+  /// loop and we can compute a small maximum for that loop.
+  ///
+  /// Implemented in terms of the \c getSmallConstantTripCount overload with
+  /// the single exiting block passed to it. See that routine for details.
+  unsigned getSmallConstantTripCount(Loop *L);
+
+  /// Returns the maximum trip count of this loop as a normal unsigned
+  /// value. Returns 0 if the trip count is unknown or not constant. This
+  /// "trip count" assumes that control exits via ExitingBlock. More
+  /// precisely, it is the number of times that control may reach ExitingBlock
+  /// before taking the branch. For loops with multiple exits, it may not be
+  /// the number times that the loop header executes if the loop exits
+  /// prematurely via another branch.
+  unsigned getSmallConstantTripCount(Loop *L, BasicBlock *ExitingBlock);
+
+  /// Returns the upper bound of the loop trip count as a normal unsigned
+  /// value.
+  /// Returns 0 if the trip count is unknown or not constant.
+  unsigned getSmallConstantMaxTripCount(Loop *L);
+
+  /// Returns the largest constant divisor of the trip count of the
+  /// loop if it is a single-exit loop and we can compute a small maximum for
+  /// that loop.
+  ///
+  /// Implemented in terms of the \c getSmallConstantTripMultiple overload with
+  /// the single exiting block passed to it. See that routine for details.
+  unsigned getSmallConstantTripMultiple(Loop *L);
+
+  /// Returns the largest constant divisor of the trip count of this loop as a
+  /// normal unsigned value, if possible. This means that the actual trip
+  /// count is always a multiple of the returned value (don't forget the trip
+  /// count could very well be zero as well!). As explained in the comments
+  /// for getSmallConstantTripCount, this assumes that control exits the loop
+  /// via ExitingBlock.
+  unsigned getSmallConstantTripMultiple(Loop *L, BasicBlock *ExitingBlock);
+
+  /// Get the expression for the number of loop iterations for which this loop
+  /// is guaranteed not to exit via ExitingBlock. Otherwise return
+  /// SCEVCouldNotCompute.
+  const SCEV *getExitCount(Loop *L, BasicBlock *ExitingBlock);
+
+  /// If the specified loop has a predictable backedge-taken count, return it,
+  /// otherwise return a SCEVCouldNotCompute object. The backedge-taken count
+  /// is the number of times the loop header will be branched to from within
+  /// the loop. This is one less than the trip count of the loop, since it
+  /// doesn't count the first iteration, when the header is branched to from
+  /// outside the loop.
+  ///
+  /// Note that it is not valid to call this method on a loop without a
+  /// loop-invariant backedge-taken count (see
+  /// hasLoopInvariantBackedgeTakenCount).
+  ///
+  const SCEV *getBackedgeTakenCount(const Loop *L);
+
+  /// Similar to getBackedgeTakenCount, except it will add a set of
+  /// SCEV predicates to Predicates that are required to be true in order for
+  /// the answer to be correct. Predicates can be checked with run-time
+  /// checks and can be used to perform loop versioning.
+  const SCEV *getPredicatedBackedgeTakenCount(const Loop *L,
+                                              SCEVUnionPredicate &Predicates);
+
+  /// Similar to getBackedgeTakenCount, except return the least SCEV value
+  /// that is known never to be less than the actual backedge taken count.
+  const SCEV *getMaxBackedgeTakenCount(const Loop *L);
+
+  /// Return true if the backedge taken count is either the value returned by
+  /// getMaxBackedgeTakenCount or zero.
+  bool isBackedgeTakenCountMaxOrZero(const Loop *L);
+
+  /// Return true if the specified loop has an analyzable loop-invariant
+  /// backedge-taken count.
+  bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
+
+  /// This method should be called by the client when it has changed a loop in
+  /// a way that may effect ScalarEvolution's ability to compute a trip count,
+  /// or if the loop is deleted.  This call is potentially expensive for large
+  /// loop bodies.
+  void forgetLoop(const Loop *L);
+
+  /// This method should be called by the client when it has changed a value
+  /// in a way that may effect its value, or which may disconnect it from a
+  /// def-use chain linking it to a loop.
+  void forgetValue(Value *V);
+
+  /// Called when the client has changed the disposition of values in
+  /// this loop.
+  ///
+  /// We don't have a way to invalidate per-loop dispositions. Clear and
+  /// recompute is simpler.
+  void forgetLoopDispositions(const Loop *L) { LoopDispositions.clear(); }
 
-    /// Print the SCEV mappings done by the Predicated Scalar Evolution.
-    /// The printed text is indented by \p Depth.
-    void print(raw_ostream &OS, unsigned Depth) const;
+  /// Determine the minimum number of zero bits that S is guaranteed to end in
+  /// (at every loop iteration).  It is, at the same time, the minimum number
+  /// of times S is divisible by 2.  For example, given {4,+,8} it returns 2.
+  /// If S is guaranteed to be 0, it returns the bitwidth of S.
+  uint32_t GetMinTrailingZeros(const SCEV *S);
 
-  private:
-    /// Increments the version number of the predicate.  This needs to be called
-    /// every time the SCEV predicate changes.
-    void updateGeneration();
+  /// Determine the unsigned range for a particular SCEV.
+  ///
+  ConstantRange getUnsignedRange(const SCEV *S) {
+    return getRange(S, HINT_RANGE_UNSIGNED);
+  }
 
-    /// Holds a SCEV and the version number of the SCEV predicate used to
-    /// perform the rewrite of the expression.
-    typedef std::pair<unsigned, const SCEV *> RewriteEntry;
+  /// Determine the signed range for a particular SCEV.
+  ///
+  ConstantRange getSignedRange(const SCEV *S) {
+    return getRange(S, HINT_RANGE_SIGNED);
+  }
 
-    /// Maps a SCEV to the rewrite result of that SCEV at a certain version
-    /// number. If this number doesn't match the current Generation, we will
-    /// need to do a rewrite. To preserve the transformation order of previous
-    /// rewrites, we will rewrite the previous result instead of the original
-    /// SCEV.
-    DenseMap<const SCEV *, RewriteEntry> RewriteMap;
+  /// Test if the given expression is known to be negative.
+  ///
+  bool isKnownNegative(const SCEV *S);
 
-    /// Records what NoWrap flags we've added to a Value *.
-    ValueMap<Value *, SCEVWrapPredicate::IncrementWrapFlags> FlagsMap;
+  /// Test if the given expression is known to be positive.
+  ///
+  bool isKnownPositive(const SCEV *S);
 
-    /// The ScalarEvolution analysis.
-    ScalarEvolution &SE;
+  /// Test if the given expression is known to be non-negative.
+  ///
+  bool isKnownNonNegative(const SCEV *S);
 
-    /// The analyzed Loop.
-    const Loop &L;
+  /// Test if the given expression is known to be non-positive.
+  ///
+  bool isKnownNonPositive(const SCEV *S);
 
-    /// The SCEVPredicate that forms our context. We will rewrite all
-    /// expressions assuming that this predicate true.
-    SCEVUnionPredicate Preds;
+  /// Test if the given expression is known to be non-zero.
+  ///
+  bool isKnownNonZero(const SCEV *S);
 
-    /// Marks the version of the SCEV predicate used. When rewriting a SCEV
-    /// expression we mark it with the version of the predicate. We use this to
-    /// figure out if the predicate has changed from the last rewrite of the
-    /// SCEV. If so, we need to perform a new rewrite.
-    unsigned Generation;
+  /// Test if the given expression is known to satisfy the condition described
+  /// by Pred, LHS, and RHS.
+  ///
+  bool isKnownPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+                        const SCEV *RHS);
+
+  /// Return true if the result of the predicate LHS `Pred` RHS is loop
+  /// invariant with respect to L.  Set InvariantPred, InvariantLHS and
+  /// InvariantLHS so that InvariantLHS `InvariantPred` InvariantRHS is the
+  /// loop invariant form of LHS `Pred` RHS.
+  bool isLoopInvariantPredicate(ICmpInst::Predicate Pred, const SCEV *LHS,
+                                const SCEV *RHS, const Loop *L,
+                                ICmpInst::Predicate &InvariantPred,
+                                const SCEV *&InvariantLHS,
+                                const SCEV *&InvariantRHS);
+
+  /// Simplify LHS and RHS in a comparison with predicate Pred. Return true
+  /// iff any changes were made. If the operands are provably equal or
+  /// unequal, LHS and RHS are set to the same value and Pred is set to either
+  /// ICMP_EQ or ICMP_NE.
+  ///
+  bool SimplifyICmpOperands(ICmpInst::Predicate &Pred, const SCEV *&LHS,
+                            const SCEV *&RHS, unsigned Depth = 0);
+
+  /// Return the "disposition" of the given SCEV with respect to the given
+  /// loop.
+  LoopDisposition getLoopDisposition(const SCEV *S, const Loop *L);
+
+  /// Return true if the value of the given SCEV is unchanging in the
+  /// specified loop.
+  bool isLoopInvariant(const SCEV *S, const Loop *L);
+
+  /// Return true if the given SCEV changes value in a known way in the
+  /// specified loop.  This property being true implies that the value is
+  /// variant in the loop AND that we can emit an expression to compute the
+  /// value of the expression at any particular loop iteration.
+  bool hasComputableLoopEvolution(const SCEV *S, const Loop *L);
+
+  /// Return the "disposition" of the given SCEV with respect to the given
+  /// block.
+  BlockDisposition getBlockDisposition(const SCEV *S, const BasicBlock *BB);
+
+  /// Return true if elements that makes up the given SCEV dominate the
+  /// specified basic block.
+  bool dominates(const SCEV *S, const BasicBlock *BB);
+
+  /// Return true if elements that makes up the given SCEV properly dominate
+  /// the specified basic block.
+  bool properlyDominates(const SCEV *S, const BasicBlock *BB);
+
+  /// Test whether the given SCEV has Op as a direct or indirect operand.
+  bool hasOperand(const SCEV *S, const SCEV *Op) const;
+
+  /// Return the size of an element read or written by Inst.
+  const SCEV *getElementSize(Instruction *Inst);
+
+  /// Compute the array dimensions Sizes from the set of Terms extracted from
+  /// the memory access function of this SCEVAddRecExpr (second step of
+  /// delinearization).
+  void findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms,
+                           SmallVectorImpl<const SCEV *> &Sizes,
+                           const SCEV *ElementSize) const;
+
+  void print(raw_ostream &OS) const;
+  void verify() const;
+
+  /// Collect parametric terms occurring in step expressions (first step of
+  /// delinearization).
+  void collectParametricTerms(const SCEV *Expr,
+                              SmallVectorImpl<const SCEV *> &Terms);
+
+  /// Return in Subscripts the access functions for each dimension in Sizes
+  /// (third step of delinearization).
+  void computeAccessFunctions(const SCEV *Expr,
+                              SmallVectorImpl<const SCEV *> &Subscripts,
+                              SmallVectorImpl<const SCEV *> &Sizes);
+
+  /// Split this SCEVAddRecExpr into two vectors of SCEVs representing the
+  /// subscripts and sizes of an array access.
+  ///
+  /// The delinearization is a 3 step process: the first two steps compute the
+  /// sizes of each subscript and the third step computes the access functions
+  /// for the delinearized array:
+  ///
+  /// 1. Find the terms in the step functions
+  /// 2. Compute the array size
+  /// 3. Compute the access function: divide the SCEV by the array size
+  ///    starting with the innermost dimensions found in step 2. The Quotient
+  ///    is the SCEV to be divided in the next step of the recursion. The
+  ///    Remainder is the subscript of the innermost dimension. Loop over all
+  ///    array dimensions computed in step 2.
+  ///
+  /// To compute a uniform array size for several memory accesses to the same
+  /// object, one can collect in step 1 all the step terms for all the memory
+  /// accesses, and compute in step 2 a unique array shape. This guarantees
+  /// that the array shape will be the same across all memory accesses.
+  ///
+  /// FIXME: We could derive the result of steps 1 and 2 from a description of
+  /// the array shape given in metadata.
+  ///
+  /// Example:
+  ///
+  /// A[][n][m]
+  ///
+  /// for i
+  ///   for j
+  ///     for k
+  ///       A[j+k][2i][5i] =
+  ///
+  /// The initial SCEV:
+  ///
+  /// A[{{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k]
+  ///
+  /// 1. Find the different terms in the step functions:
+  /// -> [2*m, 5, n*m, n*m]
+  ///
+  /// 2. Compute the array size: sort and unique them
+  /// -> [n*m, 2*m, 5]
+  /// find the GCD of all the terms = 1
+  /// divide by the GCD and erase constant terms
+  /// -> [n*m, 2*m]
+  /// GCD = m
+  /// divide by GCD -> [n, 2]
+  /// remove constant terms
+  /// -> [n]
+  /// size of the array is A[unknown][n][m]
+  ///
+  /// 3. Compute the access function
+  /// a. Divide {{{0,+,2*m+5}_i, +, n*m}_j, +, n*m}_k by the innermost size m
+  /// Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k
+  /// Remainder: {{{0,+,5}_i, +, 0}_j, +, 0}_k
+  /// The remainder is the subscript of the innermost array dimension: [5i].
+  ///
+  /// b. Divide Quotient: {{{0,+,2}_i, +, n}_j, +, n}_k by next outer size n
+  /// Quotient: {{{0,+,0}_i, +, 1}_j, +, 1}_k
+  /// Remainder: {{{0,+,2}_i, +, 0}_j, +, 0}_k
+  /// The Remainder is the subscript of the next array dimension: [2i].
+  ///
+  /// The subscript of the outermost dimension is the Quotient: [j+k].
+  ///
+  /// Overall, we have: A[][n][m], and the access function: A[j+k][2i][5i].
+  void delinearize(const SCEV *Expr, SmallVectorImpl<const SCEV *> &Subscripts,
+                   SmallVectorImpl<const SCEV *> &Sizes,
+                   const SCEV *ElementSize);
+
+  /// Return the DataLayout associated with the module this SCEV instance is
+  /// operating on.
+  const DataLayout &getDataLayout() const {
+    return F.getParent()->getDataLayout();
+  }
 
-    /// The backedge taken count.
-    const SCEV *BackedgeCount;
-  };
+  const SCEVPredicate *getEqualPredicate(const SCEVUnknown *LHS,
+                                         const SCEVConstant *RHS);
+
+  const SCEVPredicate *
+  getWrapPredicate(const SCEVAddRecExpr *AR,
+                   SCEVWrapPredicate::IncrementWrapFlags AddedFlags);
+
+  /// Re-writes the SCEV according to the Predicates in \p A.
+  const SCEV *rewriteUsingPredicate(const SCEV *S, const Loop *L,
+                                    SCEVUnionPredicate &A);
+  /// Tries to convert the \p S expression to an AddRec expression,
+  /// adding additional predicates to \p Preds as required.
+  const SCEVAddRecExpr *convertSCEVToAddRecWithPredicates(
+      const SCEV *S, const Loop *L,
+      SmallPtrSetImpl<const SCEVPredicate *> &Preds);
+
+private:
+  /// Compute the backedge taken count knowing the interval difference, the
+  /// stride and presence of the equality in the comparison.
+  const SCEV *computeBECount(const SCEV *Delta, const SCEV *Stride,
+                             bool Equality);
+
+  /// Verify if an linear IV with positive stride can overflow when in a
+  /// less-than comparison, knowing the invariant term of the comparison,
+  /// the stride and the knowledge of NSW/NUW flags on the recurrence.
+  bool doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
+                          bool NoWrap);
+
+  /// Verify if an linear IV with negative stride can overflow when in a
+  /// greater-than comparison, knowing the invariant term of the comparison,
+  /// the stride and the knowledge of NSW/NUW flags on the recurrence.
+  bool doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, bool IsSigned,
+                          bool NoWrap);
+
+private:
+  FoldingSet<SCEV> UniqueSCEVs;
+  FoldingSet<SCEVPredicate> UniquePreds;
+  BumpPtrAllocator SCEVAllocator;
+
+  /// The head of a linked list of all SCEVUnknown values that have been
+  /// allocated. This is used by releaseMemory to locate them all and call
+  /// their destructors.
+  SCEVUnknown *FirstUnknown;
+};
+
+/// Analysis pass that exposes the \c ScalarEvolution for a function.
+class ScalarEvolutionAnalysis
+    : public AnalysisInfoMixin<ScalarEvolutionAnalysis> {
+  friend AnalysisInfoMixin<ScalarEvolutionAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  typedef ScalarEvolution Result;
+
+  ScalarEvolution run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Printer pass for the \c ScalarEvolutionAnalysis results.
+class ScalarEvolutionPrinterPass
+    : public PassInfoMixin<ScalarEvolutionPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit ScalarEvolutionPrinterPass(raw_ostream &OS) : OS(OS) {}
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+class ScalarEvolutionWrapperPass : public FunctionPass {
+  std::unique_ptr<ScalarEvolution> SE;
+
+public:
+  static char ID;
+
+  ScalarEvolutionWrapperPass();
+
+  ScalarEvolution &getSE() { return *SE; }
+  const ScalarEvolution &getSE() const { return *SE; }
+
+  bool runOnFunction(Function &F) override;
+  void releaseMemory() override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void print(raw_ostream &OS, const Module * = nullptr) const override;
+  void verifyAnalysis() const override;
+};
+
+/// An interface layer with SCEV used to manage how we see SCEV expressions
+/// for values in the context of existing predicates. We can add new
+/// predicates, but we cannot remove them.
+///
+/// This layer has multiple purposes:
+///   - provides a simple interface for SCEV versioning.
+///   - guarantees that the order of transformations applied on a SCEV
+///     expression for a single Value is consistent across two different
+///     getSCEV calls. This means that, for example, once we've obtained
+///     an AddRec expression for a certain value through expression
+///     rewriting, we will continue to get an AddRec expression for that
+///     Value.
+///   - lowers the number of expression rewrites.
+class PredicatedScalarEvolution {
+public:
+  PredicatedScalarEvolution(ScalarEvolution &SE, Loop &L);
+  const SCEVUnionPredicate &getUnionPredicate() const;
+
+  /// Returns the SCEV expression of V, in the context of the current SCEV
+  /// predicate.  The order of transformations applied on the expression of V
+  /// returned by ScalarEvolution is guaranteed to be preserved, even when
+  /// adding new predicates.
+  const SCEV *getSCEV(Value *V);
+
+  /// Get the (predicated) backedge count for the analyzed loop.
+  const SCEV *getBackedgeTakenCount();
+
+  /// Adds a new predicate.
+  void addPredicate(const SCEVPredicate &Pred);
+
+  /// Attempts to produce an AddRecExpr for V by adding additional SCEV
+  /// predicates. If we can't transform the expression into an AddRecExpr we
+  /// return nullptr and not add additional SCEV predicates to the current
+  /// context.
+  const SCEVAddRecExpr *getAsAddRec(Value *V);
+
+  /// Proves that V doesn't overflow by adding SCEV predicate.
+  void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags);
+
+  /// Returns true if we've proved that V doesn't wrap by means of a SCEV
+  /// predicate.
+  bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags);
+
+  /// Returns the ScalarEvolution analysis used.
+  ScalarEvolution *getSE() const { return &SE; }
+
+  /// We need to explicitly define the copy constructor because of FlagsMap.
+  PredicatedScalarEvolution(const PredicatedScalarEvolution &);
+
+  /// Print the SCEV mappings done by the Predicated Scalar Evolution.
+  /// The printed text is indented by \p Depth.
+  void print(raw_ostream &OS, unsigned Depth) const;
+
+private:
+  /// Increments the version number of the predicate.  This needs to be called
+  /// every time the SCEV predicate changes.
+  void updateGeneration();
+
+  /// Holds a SCEV and the version number of the SCEV predicate used to
+  /// perform the rewrite of the expression.
+  typedef std::pair<unsigned, const SCEV *> RewriteEntry;
+
+  /// Maps a SCEV to the rewrite result of that SCEV at a certain version
+  /// number. If this number doesn't match the current Generation, we will
+  /// need to do a rewrite. To preserve the transformation order of previous
+  /// rewrites, we will rewrite the previous result instead of the original
+  /// SCEV.
+  DenseMap<const SCEV *, RewriteEntry> RewriteMap;
+
+  /// Records what NoWrap flags we've added to a Value *.
+  ValueMap<Value *, SCEVWrapPredicate::IncrementWrapFlags> FlagsMap;
+
+  /// The ScalarEvolution analysis.
+  ScalarEvolution &SE;
+
+  /// The analyzed Loop.
+  const Loop &L;
+
+  /// The SCEVPredicate that forms our context. We will rewrite all
+  /// expressions assuming that this predicate true.
+  SCEVUnionPredicate Preds;
+
+  /// Marks the version of the SCEV predicate used. When rewriting a SCEV
+  /// expression we mark it with the version of the predicate. We use this to
+  /// figure out if the predicate has changed from the last rewrite of the
+  /// SCEV. If so, we need to perform a new rewrite.
+  unsigned Generation;
+
+  /// The backedge taken count.
+  const SCEV *BackedgeCount;
+};
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index ac10370b4131..329be51e5eac 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -40,12 +40,12 @@ private:
 /// Analysis pass providing a never-invalidated alias analysis result.
 class SCEVAA : public AnalysisInfoMixin<SCEVAA> {
   friend AnalysisInfoMixin<SCEVAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef SCEVAAResult Result;
 
-  SCEVAAResult run(Function &F, AnalysisManager<Function> &AM);
+  SCEVAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the SCEVAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
index 1acf952ab70c..517592a3d049 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -14,12 +14,14 @@
 #ifndef LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H
 #define LLVM_ANALYSIS_SCALAREVOLUTIONEXPANDER_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Analysis/TargetFolder.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/ValueHandle.h"
-#include <set>
 
 namespace llvm {
   class TargetTransformInfo;
@@ -28,8 +30,8 @@ namespace llvm {
   /// all materialized values are safe to speculate.
   bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE);
 
-  /// This class uses information about analyze scalars to
-  /// rewrite expressions in canonical form.
+  /// This class uses information about analyze scalars to rewrite expressions
+  /// in canonical form.
   ///
   /// Clients should create an instance of this class when rewriting is needed,
   /// and destroy it when finished to allow the release of the associated
@@ -42,42 +44,41 @@ namespace llvm {
     const char* IVName;
 
     // InsertedExpressions caches Values for reuse, so must track RAUW.
-    std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >
-      InsertedExpressions;
+    DenseMap<std::pair<const SCEV *, Instruction *>, TrackingVH<Value>>
+        InsertedExpressions;
+
     // InsertedValues only flags inserted instructions so needs no RAUW.
-    std::set<AssertingVH<Value> > InsertedValues;
-    std::set<AssertingVH<Value> > InsertedPostIncValues;
+    DenseSet<AssertingVH<Value>> InsertedValues;
+    DenseSet<AssertingVH<Value>> InsertedPostIncValues;
 
     /// A memoization of the "relevant" loop for a given SCEV.
     DenseMap<const SCEV *, const Loop *> RelevantLoops;
 
-    /// \brief Addrecs referring to any of the given loops are expanded
-    /// in post-inc mode. For example, expanding {1,+,1}<L> in post-inc mode
-    /// returns the add instruction that adds one to the phi for {0,+,1}<L>,
-    /// as opposed to a new phi starting at 1. This is only supported in
-    /// non-canonical mode.
+    /// Addrecs referring to any of the given loops are expanded in post-inc
+    /// mode. For example, expanding {1,+,1}<L> in post-inc mode returns the add
+    /// instruction that adds one to the phi for {0,+,1}<L>, as opposed to a new
+    /// phi starting at 1. This is only supported in non-canonical mode.
     PostIncLoopSet PostIncLoops;
 
-    /// \brief When this is non-null, addrecs expanded in the loop it indicates
-    /// should be inserted with increments at IVIncInsertPos.
+    /// When this is non-null, addrecs expanded in the loop it indicates should
+    /// be inserted with increments at IVIncInsertPos.
     const Loop *IVIncInsertLoop;
 
-    /// \brief When expanding addrecs in the IVIncInsertLoop loop, insert the IV
+    /// When expanding addrecs in the IVIncInsertLoop loop, insert the IV
     /// increment at this position.
     Instruction *IVIncInsertPos;
 
-    /// \brief Phis that complete an IV chain. Reuse
-    std::set<AssertingVH<PHINode> > ChainedPhis;
+    /// Phis that complete an IV chain. Reuse
+    DenseSet<AssertingVH<PHINode>> ChainedPhis;
 
-    /// \brief When true, expressions are expanded in "canonical" form. In
-    /// particular, addrecs are expanded as arithmetic based on a canonical
-    /// induction variable. When false, expression are expanded in a more
-    /// literal form.
+    /// When true, expressions are expanded in "canonical" form. In particular,
+    /// addrecs are expanded as arithmetic based on a canonical induction
+    /// variable. When false, expression are expanded in a more literal form.
     bool CanonicalMode;
 
-    /// \brief When invoked from LSR, the expander is in "strength reduction"
-    /// mode. The only difference is that phi's are only reused if they are
-    /// already in "expanded" form.
+    /// When invoked from LSR, the expander is in "strength reduction" mode. The
+    /// only difference is that phi's are only reused if they are already in
+    /// "expanded" form.
     bool LSRMode;
 
     typedef IRBuilder<TargetFolder> BuilderType;
@@ -130,7 +131,7 @@ namespace llvm {
     friend struct SCEVVisitor<SCEVExpander, Value*>;
 
   public:
-    /// \brief Construct a SCEVExpander in "canonical" mode.
+    /// Construct a SCEVExpander in "canonical" mode.
     explicit SCEVExpander(ScalarEvolution &se, const DataLayout &DL,
                           const char *name)
         : SE(se), DL(DL), IVName(name), IVIncInsertLoop(nullptr),
@@ -150,9 +151,9 @@ namespace llvm {
     void setDebugType(const char* s) { DebugType = s; }
 #endif
 
-    /// \brief Erase the contents of the InsertedExpressions map so that users
-    /// trying to expand the same expression into multiple BasicBlocks or
-    /// different places within the same BasicBlock can do so.
+    /// Erase the contents of the InsertedExpressions map so that users trying
+    /// to expand the same expression into multiple BasicBlocks or different
+    /// places within the same BasicBlock can do so.
     void clear() {
       InsertedExpressions.clear();
       InsertedValues.clear();
@@ -160,8 +161,8 @@ namespace llvm {
       ChainedPhis.clear();
     }
 
-    /// \brief Return true for expressions that may incur non-trivial cost to
-    /// evaluate at runtime.
+    /// Return true for expressions that may incur non-trivial cost to evaluate
+    /// at runtime.
     ///
     /// At is an optional parameter which specifies point in code where user is
     /// going to expand this expression. Sometimes this knowledge can lead to a
@@ -172,63 +173,60 @@ namespace llvm {
       return isHighCostExpansionHelper(Expr, L, At, Processed);
     }
 
-    /// \brief This method returns the canonical induction variable of the
-    /// specified type for the specified loop (inserting one if there is none).
-    /// A canonical induction variable starts at zero and steps by one on each
+    /// This method returns the canonical induction variable of the specified
+    /// type for the specified loop (inserting one if there is none).  A
+    /// canonical induction variable starts at zero and steps by one on each
     /// iteration.
     PHINode *getOrInsertCanonicalInductionVariable(const Loop *L, Type *Ty);
 
-    /// \brief Return the induction variable increment's IV operand.
+    /// Return the induction variable increment's IV operand.
     Instruction *getIVIncOperand(Instruction *IncV, Instruction *InsertPos,
                                  bool allowScale);
 
-    /// \brief Utility for hoisting an IV increment.
+    /// Utility for hoisting an IV increment.
     bool hoistIVInc(Instruction *IncV, Instruction *InsertPos);
 
-    /// \brief replace congruent phis with their most canonical
-    /// representative. Return the number of phis eliminated.
+    /// replace congruent phis with their most canonical representative. Return
+    /// the number of phis eliminated.
     unsigned replaceCongruentIVs(Loop *L, const DominatorTree *DT,
                                  SmallVectorImpl<WeakVH> &DeadInsts,
                                  const TargetTransformInfo *TTI = nullptr);
 
-    /// \brief Insert code to directly compute the specified SCEV expression
-    /// into the program.  The inserted code is inserted into the specified
-    /// block.
+    /// Insert code to directly compute the specified SCEV expression into the
+    /// program.  The inserted code is inserted into the specified block.
     Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
 
-    /// \brief Insert code to directly compute the specified SCEV expression
-    /// into the program.  The inserted code is inserted into the SCEVExpander's
-    /// current insertion point. If a type is specified, the result will be
-    /// expanded to have that type, with a cast if necessary.
+    /// Insert code to directly compute the specified SCEV expression into the
+    /// program.  The inserted code is inserted into the SCEVExpander's current
+    /// insertion point. If a type is specified, the result will be expanded to
+    /// have that type, with a cast if necessary.
     Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
 
 
-    /// \brief Generates a code sequence that evaluates this predicate.
-    /// The inserted instructions will be at position \p Loc.
-    /// The result will be of type i1 and will have a value of 0 when the
-    /// predicate is false and 1 otherwise.
+    /// Generates a code sequence that evaluates this predicate.  The inserted
+    /// instructions will be at position \p Loc.  The result will be of type i1
+    /// and will have a value of 0 when the predicate is false and 1 otherwise.
     Value *expandCodeForPredicate(const SCEVPredicate *Pred, Instruction *Loc);
 
-    /// \brief A specialized variant of expandCodeForPredicate, handling the
-    /// case when we are expanding code for a SCEVEqualPredicate.
+    /// A specialized variant of expandCodeForPredicate, handling the case when
+    /// we are expanding code for a SCEVEqualPredicate.
     Value *expandEqualPredicate(const SCEVEqualPredicate *Pred,
                                 Instruction *Loc);
 
-    /// \brief Generates code that evaluates if the \p AR expression will
-    /// overflow.
+    /// Generates code that evaluates if the \p AR expression will overflow.
     Value *generateOverflowCheck(const SCEVAddRecExpr *AR, Instruction *Loc,
                                  bool Signed);
 
-    /// \brief A specialized variant of expandCodeForPredicate, handling the
-    /// case when we are expanding code for a SCEVWrapPredicate.
+    /// A specialized variant of expandCodeForPredicate, handling the case when
+    /// we are expanding code for a SCEVWrapPredicate.
     Value *expandWrapPredicate(const SCEVWrapPredicate *P, Instruction *Loc);
 
-    /// \brief A specialized variant of expandCodeForPredicate, handling the
-    /// case when we are expanding code for a SCEVUnionPredicate.
+    /// A specialized variant of expandCodeForPredicate, handling the case when
+    /// we are expanding code for a SCEVUnionPredicate.
     Value *expandUnionPredicate(const SCEVUnionPredicate *Pred,
                                 Instruction *Loc);
 
-    /// \brief Set the current IV increment loop and position.
+    /// Set the current IV increment loop and position.
     void setIVIncInsertPos(const Loop *L, Instruction *Pos) {
       assert(!CanonicalMode &&
              "IV increment positions are not supported in CanonicalMode");
@@ -236,7 +234,7 @@ namespace llvm {
       IVIncInsertPos = Pos;
     }
 
-    /// \brief Enable post-inc expansion for addrecs referring to the given
+    /// Enable post-inc expansion for addrecs referring to the given
     /// loops. Post-inc expansion is only supported in non-canonical mode.
     void setPostInc(const PostIncLoopSet &L) {
       assert(!CanonicalMode &&
@@ -244,7 +242,7 @@ namespace llvm {
       PostIncLoops = L;
     }
 
-    /// \brief Disable all post-inc expansion.
+    /// Disable all post-inc expansion.
     void clearPostInc() {
       PostIncLoops.clear();
 
@@ -253,30 +251,29 @@ namespace llvm {
       InsertedPostIncValues.clear();
     }
 
-    /// \brief Disable the behavior of expanding expressions in canonical form
-    /// rather than in a more literal form. Non-canonical mode is useful for
-    /// late optimization passes.
+    /// Disable the behavior of expanding expressions in canonical form rather
+    /// than in a more literal form. Non-canonical mode is useful for late
+    /// optimization passes.
     void disableCanonicalMode() { CanonicalMode = false; }
 
     void enableLSRMode() { LSRMode = true; }
 
-    /// \brief Set the current insertion point. This is useful if multiple calls
-    /// to expandCodeFor() are going to be made with the same insert point and
-    /// the insert point may be moved during one of the expansions (e.g. if the
+    /// Set the current insertion point. This is useful if multiple calls to
+    /// expandCodeFor() are going to be made with the same insert point and the
+    /// insert point may be moved during one of the expansions (e.g. if the
     /// insert point is not a block terminator).
     void setInsertPoint(Instruction *IP) {
       assert(IP);
       Builder.SetInsertPoint(IP);
     }
 
-    /// \brief Clear the current insertion point. This is useful if the
-    /// instruction that had been serving as the insertion point may have been
-    /// deleted.
+    /// Clear the current insertion point. This is useful if the instruction
+    /// that had been serving as the insertion point may have been deleted.
     void clearInsertPoint() {
       Builder.ClearInsertionPoint();
     }
 
-    /// \brief Return true if the specified instruction was inserted by the code
+    /// Return true if the specified instruction was inserted by the code
     /// rewriter.  If so, the client should not modify the instruction.
     bool isInsertedInstruction(Instruction *I) const {
       return InsertedValues.count(I) || InsertedPostIncValues.count(I);
@@ -284,7 +281,14 @@ namespace llvm {
 
     void setChainedPhi(PHINode *PN) { ChainedPhis.insert(PN); }
 
-    /// \brief Try to find LLVM IR value for S available at the point At.
+    /// Try to find existing LLVM IR value for S available at the point At.
+    Value *getExactExistingExpansion(const SCEV *S, const Instruction *At,
+                                     Loop *L);
+
+    /// Try to find the ValueOffsetPair for S. The function is mainly used to
+    /// check whether S can be expanded cheaply.  If this returns a non-None
+    /// value, we know we can codegen the `ValueOffsetPair` into a suitable
+    /// expansion identical with S so that S can be expanded cheaply.
     ///
     /// L is a hint which tells in which loop to look for the suitable value.
     /// On success return value which is equivalent to the expanded S at point
@@ -292,44 +296,46 @@ namespace llvm {
     ///
     /// Note that this function does not perform an exhaustive search. I.e if it
     /// didn't find any value it does not mean that there is no such value.
-    Value *findExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
+    ///
+    Optional<ScalarEvolution::ValueOffsetPair>
+    getRelatedExistingExpansion(const SCEV *S, const Instruction *At, Loop *L);
 
   private:
     LLVMContext &getContext() const { return SE.getContext(); }
 
-    /// \brief Recursive helper function for isHighCostExpansion.
+    /// Recursive helper function for isHighCostExpansion.
     bool isHighCostExpansionHelper(const SCEV *S, Loop *L,
                                    const Instruction *At,
                                    SmallPtrSetImpl<const SCEV *> &Processed);
 
-    /// \brief Insert the specified binary operator, doing a small amount
-    /// of work to avoid inserting an obviously redundant operation.
+    /// Insert the specified binary operator, doing a small amount of work to
+    /// avoid inserting an obviously redundant operation.
     Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
 
-    /// \brief Arrange for there to be a cast of V to Ty at IP, reusing an
-    /// existing cast if a suitable one exists, moving an existing cast if a
-    /// suitable one exists but isn't in the right place, or or creating a new
-    /// one.
+    /// Arrange for there to be a cast of V to Ty at IP, reusing an existing
+    /// cast if a suitable one exists, moving an existing cast if a suitable one
+    /// exists but isn't in the right place, or or creating a new one.
     Value *ReuseOrCreateCast(Value *V, Type *Ty,
                              Instruction::CastOps Op,
                              BasicBlock::iterator IP);
 
-    /// \brief Insert a cast of V to the specified type, which must be possible
-    /// with a noop cast, doing what we can to share the casts.
+    /// Insert a cast of V to the specified type, which must be possible with a
+    /// noop cast, doing what we can to share the casts.
     Value *InsertNoopCastOfTo(Value *V, Type *Ty);
 
-    /// \brief Expand a SCEVAddExpr with a pointer type into a GEP
-    /// instead of using ptrtoint+arithmetic+inttoptr.
+    /// Expand a SCEVAddExpr with a pointer type into a GEP instead of using
+    /// ptrtoint+arithmetic+inttoptr.
     Value *expandAddToGEP(const SCEV *const *op_begin,
                           const SCEV *const *op_end,
                           PointerType *PTy, Type *Ty, Value *V);
 
-    /// \brief Find a previous Value in ExprValueMap for expand.
-    Value *FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
+    /// Find a previous Value in ExprValueMap for expand.
+    ScalarEvolution::ValueOffsetPair
+    FindValueInExprValueMap(const SCEV *S, const Instruction *InsertPt);
 
     Value *expand(const SCEV *S);
 
-    /// \brief Determine the most "relevant" loop for the given SCEV.
+    /// Determine the most "relevant" loop for the given SCEV.
     const Loop *getRelevantLoop(const SCEV *);
 
     Value *visitConstant(const SCEVConstant *S) {
diff --git a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
index ff24cafbe680..fdcd8be00dde 100644
--- a/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/contrib/llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -537,14 +537,58 @@ namespace llvm {
     T.visitAll(Root);
   }
 
-  /// Recursively visits a SCEV expression and re-writes it.
+  /// Return true if any node in \p Root satisfies the predicate \p Pred.
+  template <typename PredTy>
+  bool SCEVExprContains(const SCEV *Root, PredTy Pred) {
+    struct FindClosure {
+      bool Found = false;
+      PredTy Pred;
+
+      FindClosure(PredTy Pred) : Pred(Pred) {}
+
+      bool follow(const SCEV *S) {
+        if (!Pred(S))
+          return true;
+
+        Found = true;
+        return false;
+      }
+
+      bool isDone() const { return Found; }
+    };
+
+    FindClosure FC(Pred);
+    visitAll(Root, FC);
+    return FC.Found;
+  }
+
+  /// This visitor recursively visits a SCEV expression and re-writes it.
+  /// The result from each visit is cached, so it will return the same
+  /// SCEV for the same input.
   template<typename SC>
   class SCEVRewriteVisitor : public SCEVVisitor<SC, const SCEV *> {
   protected:
     ScalarEvolution &SE;
+    // Memoize the result of each visit so that we only compute once for
+    // the same input SCEV. This is to avoid redundant computations when
+    // a SCEV is referenced by multiple SCEVs. Without memoization, this
+    // visit algorithm would have exponential time complexity in the worst
+    // case, causing the compiler to hang on certain tests.
+    DenseMap<const SCEV *, const SCEV *> RewriteResults;
+
   public:
     SCEVRewriteVisitor(ScalarEvolution &SE) : SE(SE) {}
 
+    const SCEV *visit(const SCEV *S) {
+      auto It = RewriteResults.find(S);
+      if (It != RewriteResults.end())
+        return It->second;
+      auto* Visited = SCEVVisitor<SC, const SCEV *>::visit(S);
+      auto Result = RewriteResults.try_emplace(S, Visited);
+      assert(Result.second && "Should insert a new entry");
+      return Result.first->second;
+    }
+
     const SCEV *visitConstant(const SCEVConstant *Constant) {
       return Constant;
     }
diff --git a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
index 87b85d4e6635..a7b57310d2d0 100644
--- a/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/contrib/llvm/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -27,14 +27,13 @@ class ScopedNoAliasAAResult : public AAResultBase<ScopedNoAliasAAResult> {
   friend AAResultBase<ScopedNoAliasAAResult>;
 
 public:
-  explicit ScopedNoAliasAAResult() : AAResultBase() {}
-  ScopedNoAliasAAResult(ScopedNoAliasAAResult &&Arg)
-      : AAResultBase(std::move(Arg)) {}
-
   /// Handle invalidation events from the new pass manager.
   ///
   /// By definition, this result is stateless and so remains valid.
-  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
   ModRefInfo getModRefInfo(ImmutableCallSite CS, const MemoryLocation &Loc);
@@ -42,19 +41,17 @@ public:
 
 private:
   bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
-  void collectMDInDomain(const MDNode *List, const MDNode *Domain,
-                         SmallPtrSetImpl<const MDNode *> &Nodes) const;
 };
 
 /// Analysis pass providing a never-invalidated alias analysis result.
 class ScopedNoAliasAA : public AnalysisInfoMixin<ScopedNoAliasAA> {
   friend AnalysisInfoMixin<ScopedNoAliasAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef ScopedNoAliasAAResult Result;
 
-  ScopedNoAliasAAResult run(Function &F, AnalysisManager<Function> &AM);
+  ScopedNoAliasAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the ScopedNoAliasAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/TargetFolder.h b/contrib/llvm/include/llvm/Analysis/TargetFolder.h
index 12bf9fe78a47..ae75d3773362 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetFolder.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetFolder.h
@@ -34,9 +34,8 @@ class TargetFolder {
 
   /// Fold - Fold the constant using target specific information.
   Constant *Fold(Constant *C) const {
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      if (Constant *CF = ConstantFoldConstantExpression(CE, DL))
-        return CF;
+    if (Constant *CF = ConstantFoldConstant(C, DL))
+      return CF;
     return C;
   }
 
diff --git a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index b2a593d67dca..5d5e5b127e63 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -734,6 +734,9 @@ TLI_DEFINE_STRING_INTERNAL("memcpy")
 /// void *memmove(void *s1, const void *s2, size_t n);
 TLI_DEFINE_ENUM_INTERNAL(memmove)
 TLI_DEFINE_STRING_INTERNAL("memmove")
+/// void *mempcpy(void *s1, const void *s2, size_t n);
+TLI_DEFINE_ENUM_INTERNAL(mempcpy)
+TLI_DEFINE_STRING_INTERNAL("mempcpy")
 // void *memrchr(const void *s, int c, size_t n);
 TLI_DEFINE_ENUM_INTERNAL(memrchr)
 TLI_DEFINE_STRING_INTERNAL("memrchr")
diff --git a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index 7efa6f059707..196fbc7faa8d 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -25,8 +25,8 @@ template <typename T> class ArrayRef;
 /// Function 'VectorFnName' is equivalent to 'ScalarFnName' vectorized
 /// by a factor 'VectorizationFactor'.
 struct VecDesc {
-  const char *ScalarFnName;
-  const char *VectorFnName;
+  StringRef ScalarFnName;
+  StringRef VectorFnName;
   unsigned VectorizationFactor;
 };
 
@@ -50,7 +50,8 @@ class TargetLibraryInfoImpl {
 
   unsigned char AvailableArray[(LibFunc::NumLibFuncs+3)/4];
   llvm::DenseMap<unsigned, std::string> CustomNames;
-  static const char *const StandardNames[LibFunc::NumLibFuncs];
+  static StringRef const StandardNames[LibFunc::NumLibFuncs];
+  bool ShouldExtI32Param, ShouldExtI32Return, ShouldSignExtI32Param;
 
   enum AvailabilityState {
     StandardName = 3, // (memset to all ones)
@@ -85,8 +86,9 @@ public:
   /// addVectorizableFunctionsFromVecLib for filling up the tables of
   /// vectorizable functions.
   enum VectorLibrary {
-    NoLibrary, // Don't use any vector library.
-    Accelerate // Use Accelerate framework.
+    NoLibrary,  // Don't use any vector library.
+    Accelerate, // Use Accelerate framework.
+    SVML        // Intel short vector math library.
   };
 
   TargetLibraryInfoImpl();
@@ -171,6 +173,26 @@ public:
   ///
   /// Set VF to the vectorization factor.
   StringRef getScalarizedFunction(StringRef F, unsigned &VF) const;
+
+  /// Set to true iff i32 parameters to library functions should have signext
+  /// or zeroext attributes if they correspond to C-level int or unsigned int,
+  /// respectively.
+  void setShouldExtI32Param(bool Val) {
+    ShouldExtI32Param = Val;
+  }
+
+  /// Set to true iff i32 results from library functions should have signext
+  /// or zeroext attributes if they correspond to C-level int or unsigned int,
+  /// respectively.
+  void setShouldExtI32Return(bool Val) {
+    ShouldExtI32Return = Val;
+  }
+
+  /// Set to true iff i32 parameters to library functions should have signext
+  /// attribute if they correspond to C-level int or unsigned int.
+  void setShouldSignExtI32Param(bool Val) {
+    ShouldSignExtI32Param = Val;
+  }
 };
 
 /// Provides information about what library functions are available for
@@ -251,7 +273,7 @@ public:
     case LibFunc::exp2:      case LibFunc::exp2f:      case LibFunc::exp2l:
     case LibFunc::memcmp:    case LibFunc::strcmp:     case LibFunc::strcpy:
     case LibFunc::stpcpy:    case LibFunc::strlen:     case LibFunc::strnlen:
-    case LibFunc::memchr:
+    case LibFunc::memchr:    case LibFunc::mempcpy:
       return true;
     }
     return false;
@@ -267,11 +289,38 @@ public:
     return Impl->CustomNames.find(F)->second;
   }
 
+  /// Returns extension attribute kind to be used for i32 parameters
+  /// correpsonding to C-level int or unsigned int.  May be zeroext, signext,
+  /// or none.
+  Attribute::AttrKind getExtAttrForI32Param(bool Signed = true) const {
+    if (Impl->ShouldExtI32Param)
+      return Signed ? Attribute::SExt : Attribute::ZExt;
+    if (Impl->ShouldSignExtI32Param)
+      return Attribute::SExt;
+    return Attribute::None;
+  }
+
+  /// Returns extension attribute kind to be used for i32 return values
+  /// correpsonding to C-level int or unsigned int.  May be zeroext, signext,
+  /// or none.
+  Attribute::AttrKind getExtAttrForI32Return(bool Signed = true) const {
+    if (Impl->ShouldExtI32Return)
+      return Signed ? Attribute::SExt : Attribute::ZExt;
+    return Attribute::None;
+  }
+
   /// Handle invalidation from the pass manager.
   ///
   /// If we try to invalidate this info, just return false. It cannot become
-  /// invalid even if the module changes.
-  bool invalidate(Module &, const PreservedAnalyses &) { return false; }
+  /// invalid even if the module or function changes.
+  bool invalidate(Module &, const PreservedAnalyses &,
+                  ModuleAnalysisManager::Invalidator &) {
+    return false;
+  }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
 };
 
 /// Analysis pass providing the \c TargetLibraryInfo.
@@ -295,21 +344,12 @@ public:
   TargetLibraryAnalysis(TargetLibraryInfoImpl PresetInfoImpl)
       : PresetInfoImpl(std::move(PresetInfoImpl)) {}
 
-  // Move semantics. We spell out the constructors for MSVC.
-  TargetLibraryAnalysis(TargetLibraryAnalysis &&Arg)
-      : PresetInfoImpl(std::move(Arg.PresetInfoImpl)), Impls(std::move(Arg.Impls)) {}
-  TargetLibraryAnalysis &operator=(TargetLibraryAnalysis &&RHS) {
-    PresetInfoImpl = std::move(RHS.PresetInfoImpl);
-    Impls = std::move(RHS.Impls);
-    return *this;
-  }
-
   TargetLibraryInfo run(Module &M, ModuleAnalysisManager &);
   TargetLibraryInfo run(Function &F, FunctionAnalysisManager &);
 
 private:
   friend AnalysisInfoMixin<TargetLibraryAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
   Optional<TargetLibraryInfoImpl> PresetInfoImpl;
 
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 7570d22a803c..d583614284ff 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -87,7 +87,8 @@ public:
   /// When used as a result of \c TargetIRAnalysis this method will be called
   /// when the function this was computed for changes. When it returns false,
   /// the information is preserved across those changes.
-  bool invalidate(Function &, const PreservedAnalyses &) {
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
     // FIXME: We should probably in some way ensure that the subtarget
     // information for a function hasn't changed.
     return false;
@@ -242,13 +243,17 @@ public:
     /// profitable. Set this to UINT_MAX to disable the loop body cost
     /// restriction.
     unsigned Threshold;
-    /// If complete unrolling will reduce the cost of the loop below its
-    /// expected dynamic cost while rolled by this percentage, apply a discount
-    /// (below) to its unrolled cost.
-    unsigned PercentDynamicCostSavedThreshold;
-    /// The discount applied to the unrolled cost when the *dynamic* cost
-    /// savings of unrolling exceed the \c PercentDynamicCostSavedThreshold.
-    unsigned DynamicCostSavingsDiscount;
+    /// If complete unrolling will reduce the cost of the loop, we will boost
+    /// the Threshold by a certain percent to allow more aggressive complete
+    /// unrolling. This value provides the maximum boost percentage that we
+    /// can apply to Threshold (The value should be no less than 100).
+    /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
+    ///                                    MaxPercentThresholdBoost / 100)
+    /// E.g. if complete unrolling reduces the loop execution time by 50%
+    /// then we boost the threshold by the factor of 2x. If unrolling is not
+    /// expected to reduce the running time, then we do not increase the
+    /// threshold.
+    unsigned MaxPercentThresholdBoost;
     /// The cost threshold for the unrolled loop when optimizing for size (set
     /// to UINT_MAX to disable).
     unsigned OptSizeThreshold;
@@ -264,6 +269,13 @@ public:
     /// transformation will select an unrolling factor based on the current cost
     /// threshold and other factors.
     unsigned Count;
+    /// A forced peeling factor (the number of bodied of the original loop
+    /// that should be peeled off before the loop body). When set to 0, the
+    /// unrolling transformation will select a peeling factor based on profile
+    /// information and other factors.
+    unsigned PeelCount;
+    /// Default unroll count for loops with run-time trip count.
+    unsigned DefaultUnrollRuntimeCount;
     // Set the maximum unrolling factor. The unrolling factor may be selected
     // using the appropriate cost threshold, but may not exceed this number
     // (set to UINT_MAX to disable). This does not apply in cases where the
@@ -273,6 +285,11 @@ public:
     /// applies even if full unrolling is selected. This allows a target to fall
     /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
     unsigned FullUnrollMaxCount;
+    // Represents number of instructions optimized when "back edge"
+    // becomes "fall through" in unrolled loop.
+    // For now we count a conditional branch on a backedge and a comparison
+    // feeding it.
+    unsigned BEInsns;
     /// Allow partial unrolling (unrolling of loops to expand the size of the
     /// loop body, not only to eliminate small constant-trip-count loops).
     bool Partial;
@@ -288,6 +305,10 @@ public:
     /// Apply loop unroll on any kind of loop
     /// (mainly to loops that fail runtime unrolling).
     bool Force;
+    /// Allow using trip count upper bound to unroll loops.
+    bool UpperBound;
+    /// Allow peeling off loop iterations for loops with low dynamic tripcount.
+    bool AllowPeeling;
   };
 
   /// \brief Get target-customized preferences for the generic loop unrolling
@@ -351,6 +372,12 @@ public:
                            bool HasBaseReg, int64_t Scale,
                            unsigned AddrSpace = 0) const;
 
+  /// \brief Return true if target supports the load / store
+  /// instruction with the given Offset on the form reg + Offset. It
+  /// may be that Offset is too big for a certain type (register
+  /// class).
+  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const;
+  
   /// \brief Return true if it's free to truncate a value of type Ty1 to type
   /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
   /// by referencing its sub-register AX.
@@ -373,6 +400,10 @@ public:
   /// target.
   bool shouldBuildLookupTables() const;
 
+  /// \brief Return true if switches should be turned into lookup tables
+  /// containing this constant value for the target.
+  bool shouldBuildLookupTablesForConstant(Constant *C) const;
+
   /// \brief Don't restrict interleaved unrolling to small loops.
   bool enableAggressiveInterleaving(bool LoopHasReductions) const;
 
@@ -389,7 +420,8 @@ public:
   bool isFPVectorizationPotentiallyUnsafe() const;
 
   /// \brief Determine if the target supports unaligned memory accesses.
-  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace = 0,
+  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                      unsigned BitWidth, unsigned AddressSpace = 0,
                                       unsigned Alignment = 1,
                                       bool *Fast = nullptr) const;
 
@@ -435,7 +467,11 @@ public:
     SK_Reverse,         ///< Reverse the order of the vector.
     SK_Alternate,       ///< Choose alternate elements from vector.
     SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
-    SK_ExtractSubvector ///< ExtractSubvector Index indicates start offset.
+    SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
+    SK_PermuteTwoSrc,   ///< Merge elements from two source vectors into one
+                        ///< with any shuffle mask.
+    SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
+                        ///< shuffle mask.
   };
 
   /// \brief Additional information about an operand's possible values.
@@ -457,10 +493,6 @@ public:
   /// \return The width of the largest scalar or vector register type.
   unsigned getRegisterBitWidth(bool Vector) const;
 
-  /// \return The bitwidth of the largest vector type that should be used to
-  /// load/store in the given address space.
-  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
-
   /// \return The size of a cache line in bytes.
   unsigned getCacheLineSize() const;
 
@@ -611,6 +643,38 @@ public:
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
 
+  /// \returns The bitwidth of the largest vector type that should be used to
+  /// load/store in the given address space.
+  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
+
+  /// \returns True if the load instruction is legal to vectorize.
+  bool isLegalToVectorizeLoad(LoadInst *LI) const;
+
+  /// \returns True if the store instruction is legal to vectorize.
+  bool isLegalToVectorizeStore(StoreInst *SI) const;
+
+  /// \returns True if it is legal to vectorize the given load chain.
+  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+                                   unsigned Alignment,
+                                   unsigned AddrSpace) const;
+
+  /// \returns True if it is legal to vectorize the given store chain.
+  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+                                    unsigned Alignment,
+                                    unsigned AddrSpace) const;
+
+  /// \returns The new vector factor value if the target doesn't support \p
+  /// SizeInBytes loads or has a better vector factor.
+  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+                               unsigned ChainSizeInBytes,
+                               VectorType *VecTy) const;
+
+  /// \returns The new vector factor value if the target doesn't support \p
+  /// SizeInBytes stores or has a better vector factor.
+  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+                                unsigned ChainSizeInBytes,
+                                VectorType *VecTy) const;
+
   /// @}
 
 private:
@@ -659,16 +723,19 @@ public:
   virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
                                    int64_t BaseOffset, bool HasBaseReg,
                                    int64_t Scale, unsigned AddrSpace) = 0;
+  virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) = 0;
   virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
   virtual bool isProfitableToHoist(Instruction *I) = 0;
   virtual bool isTypeLegal(Type *Ty) = 0;
   virtual unsigned getJumpBufAlignment() = 0;
   virtual unsigned getJumpBufSize() = 0;
   virtual bool shouldBuildLookupTables() = 0;
+  virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
   virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
   virtual bool enableInterleavedAccessVectorization() = 0;
   virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
-  virtual bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+  virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                              unsigned BitWidth,
                                               unsigned AddressSpace,
                                               unsigned Alignment,
                                               bool *Fast) = 0;
@@ -684,7 +751,6 @@ public:
                             Type *Ty) = 0;
   virtual unsigned getNumberOfRegisters(bool Vector) = 0;
   virtual unsigned getRegisterBitWidth(bool Vector) = 0;
-  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) = 0;
   virtual unsigned getCacheLineSize() = 0;
   virtual unsigned getPrefetchDistance() = 0;
   virtual unsigned getMinPrefetchStride() = 0;
@@ -737,6 +803,21 @@ public:
                                                    Type *ExpectedType) = 0;
   virtual bool areInlineCompatible(const Function *Caller,
                                    const Function *Callee) const = 0;
+  virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
+  virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
+  virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
+  virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+                                           unsigned Alignment,
+                                           unsigned AddrSpace) const = 0;
+  virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+                                            unsigned Alignment,
+                                            unsigned AddrSpace) const = 0;
+  virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+                                       unsigned ChainSizeInBytes,
+                                       VectorType *VecTy) const = 0;
+  virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+                                        unsigned ChainSizeInBytes,
+                                        VectorType *VecTy) const = 0;
 };
 
 template <typename T>
@@ -820,6 +901,9 @@ public:
     return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
                                      Scale, AddrSpace);
   }
+  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) override {
+    return Impl.isFoldableMemAccessOffset(I, Offset);
+  }
   bool isTruncateFree(Type *Ty1, Type *Ty2) override {
     return Impl.isTruncateFree(Ty1, Ty2);
   }
@@ -832,6 +916,9 @@ public:
   bool shouldBuildLookupTables() override {
     return Impl.shouldBuildLookupTables();
   }
+  bool shouldBuildLookupTablesForConstant(Constant *C) override {
+    return Impl.shouldBuildLookupTablesForConstant(C);
+  }
   bool enableAggressiveInterleaving(bool LoopHasReductions) override {
     return Impl.enableAggressiveInterleaving(LoopHasReductions);
   }
@@ -841,9 +928,10 @@ public:
   bool isFPVectorizationPotentiallyUnsafe() override {
     return Impl.isFPVectorizationPotentiallyUnsafe();
   }
-  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                      unsigned BitWidth, unsigned AddressSpace,
                                       unsigned Alignment, bool *Fast) override {
-    return Impl.allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+    return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
                                                Alignment, Fast);
   }
   PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
@@ -875,10 +963,6 @@ public:
     return Impl.getRegisterBitWidth(Vector);
   }
 
-  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) override {
-    return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
-  }
-
   unsigned getCacheLineSize() override {
     return Impl.getCacheLineSize();
   }
@@ -978,6 +1062,37 @@ public:
                            const Function *Callee) const override {
     return Impl.areInlineCompatible(Caller, Callee);
   }
+  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
+    return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
+  }
+  bool isLegalToVectorizeLoad(LoadInst *LI) const override {
+    return Impl.isLegalToVectorizeLoad(LI);
+  }
+  bool isLegalToVectorizeStore(StoreInst *SI) const override {
+    return Impl.isLegalToVectorizeStore(SI);
+  }
+  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+                                   unsigned Alignment,
+                                   unsigned AddrSpace) const override {
+    return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
+                                            AddrSpace);
+  }
+  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+                                    unsigned Alignment,
+                                    unsigned AddrSpace) const override {
+    return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
+                                             AddrSpace);
+  }
+  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+                               unsigned ChainSizeInBytes,
+                               VectorType *VecTy) const override {
+    return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
+  }
+  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+                                unsigned ChainSizeInBytes,
+                                VectorType *VecTy) const override {
+    return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
+  }
 };
 
 template <typename T>
@@ -1025,11 +1140,11 @@ public:
     return *this;
   }
 
-  Result run(const Function &F, AnalysisManager<Function> &);
+  Result run(const Function &F, FunctionAnalysisManager &);
 
 private:
   friend AnalysisInfoMixin<TargetIRAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
   /// \brief The callback used to produce a result.
   ///
diff --git a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index a97624bc2ab0..68b38a7fa538 100644
--- a/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/contrib/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -152,6 +152,15 @@ public:
     case Intrinsic::var_annotation:
     case Intrinsic::experimental_gc_result:
     case Intrinsic::experimental_gc_relocate:
+    case Intrinsic::coro_alloc:
+    case Intrinsic::coro_begin:
+    case Intrinsic::coro_free:
+    case Intrinsic::coro_end:
+    case Intrinsic::coro_frame:
+    case Intrinsic::coro_size:
+    case Intrinsic::coro_suspend:
+    case Intrinsic::coro_param:
+    case Intrinsic::coro_subfn_addr:
       // These intrinsics don't actually represent code after lowering.
       return TTI::TCC_Free;
     }
@@ -226,6 +235,8 @@ public:
     return -1;
   }
 
+  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) { return true; }
+
   bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
 
   bool isProfitableToHoist(Instruction *I) { return true; }
@@ -237,6 +248,7 @@ public:
   unsigned getJumpBufSize() { return 0; }
 
   bool shouldBuildLookupTables() { return true; }
+  bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
 
   bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
 
@@ -244,7 +256,8 @@ public:
 
   bool isFPVectorizationPotentiallyUnsafe() { return false; }
 
-  bool allowsMisalignedMemoryAccesses(unsigned BitWidth,
+  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                      unsigned BitWidth,
                                       unsigned AddressSpace,
                                       unsigned Alignment,
                                       bool *Fast) { return false; }
@@ -278,8 +291,6 @@ public:
 
   unsigned getRegisterBitWidth(bool Vector) { return 32; }
 
-  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) { return 128; }
-
   unsigned getCacheLineSize() { return 0; }
 
   unsigned getPrefetchDistance() { return 0; }
@@ -381,6 +392,36 @@ public:
            (Caller->getFnAttribute("target-features") ==
             Callee->getFnAttribute("target-features"));
   }
+
+  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
+
+  bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
+
+  bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
+
+  bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
+                                   unsigned Alignment,
+                                   unsigned AddrSpace) const {
+    return true;
+  }
+
+  bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
+                                    unsigned Alignment,
+                                    unsigned AddrSpace) const {
+    return true;
+  }
+
+  unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
+                               unsigned ChainSizeInBytes,
+                               VectorType *VecTy) const {
+    return VF;
+  }
+
+  unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
+                                unsigned ChainSizeInBytes,
+                                VectorType *VecTy) const {
+    return VF;
+  }
 };
 
 /// \brief CRTP base class for use as a mix-in that aids implementing
@@ -394,12 +435,6 @@ protected:
   explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
 
 public:
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  TargetTransformInfoImplCRTPBase(const TargetTransformInfoImplCRTPBase &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)) {}
-  TargetTransformInfoImplCRTPBase(TargetTransformInfoImplCRTPBase &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
-
   using BaseT::getCallCost;
 
   unsigned getCallCost(const Function *F, int NumArgs) {
@@ -447,18 +482,22 @@ public:
     int64_t BaseOffset = 0;
     int64_t Scale = 0;
 
-    // Assumes the address space is 0 when Ptr is nullptr.
-    unsigned AS =
-        (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
-    auto GTI = gep_type_begin(PointeeType, AS, Operands);
+    auto GTI = gep_type_begin(PointeeType, Operands);
+    Type *TargetType;
     for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
+      TargetType = GTI.getIndexedType();
       // We assume that the cost of Scalar GEP with constant index and the
       // cost of Vector GEP with splat constant index are the same.
       const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
       if (!ConstIdx)
         if (auto Splat = getSplatValue(*I))
           ConstIdx = dyn_cast<ConstantInt>(Splat);
-      if (isa<SequentialType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
+        // For structures the index is always splat or scalar constant
+        assert(ConstIdx && "Unexpected GEP index");
+        uint64_t Field = ConstIdx->getZExtValue();
+        BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
+      } else {
         int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
         if (ConstIdx)
           BaseOffset += ConstIdx->getSExtValue() * ElementSize;
@@ -469,20 +508,16 @@ public:
             return TTI::TCC_Basic;
           Scale = ElementSize;
         }
-      } else {
-        StructType *STy = cast<StructType>(*GTI);
-        // For structures the index is always splat or scalar constant
-        assert(ConstIdx && "Unexpected GEP index");
-        uint64_t Field = ConstIdx->getZExtValue();
-        BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
       }
     }
 
+    // Assumes the address space is 0 when Ptr is nullptr.
+    unsigned AS =
+        (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
     if (static_cast<T *>(this)->isLegalAddressingMode(
-            PointerType::get(*GTI, AS), const_cast<GlobalValue *>(BaseGV),
-            BaseOffset, HasBaseReg, Scale, AS)) {
+            TargetType, const_cast<GlobalValue *>(BaseGV), BaseOffset,
+            HasBaseReg, Scale, AS))
       return TTI::TCC_Free;
-    }
     return TTI::TCC_Basic;
   }
 
diff --git a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
index 229b0f97b983..fd726e6cd37f 100644
--- a/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
+++ b/contrib/llvm/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -27,13 +27,13 @@ class TypeBasedAAResult : public AAResultBase<TypeBasedAAResult> {
   friend AAResultBase<TypeBasedAAResult>;
 
 public:
-  explicit TypeBasedAAResult() {}
-  TypeBasedAAResult(TypeBasedAAResult &&Arg) : AAResultBase(std::move(Arg)) {}
-
   /// Handle invalidation events from the new pass manager.
   ///
   /// By definition, this result is stateless and so remains valid.
-  bool invalidate(Function &, const PreservedAnalyses &) { return false; }
+  bool invalidate(Function &, const PreservedAnalyses &,
+                  FunctionAnalysisManager::Invalidator &) {
+    return false;
+  }
 
   AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
   bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
@@ -50,12 +50,12 @@ private:
 /// Analysis pass providing a never-invalidated alias analysis result.
 class TypeBasedAA : public AnalysisInfoMixin<TypeBasedAA> {
   friend AnalysisInfoMixin<TypeBasedAA>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   typedef TypeBasedAAResult Result;
 
-  TypeBasedAAResult run(Function &F, AnalysisManager<Function> &AM);
+  TypeBasedAAResult run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// Legacy wrapper pass to provide the TypeBasedAAResult object.
diff --git a/contrib/llvm/include/llvm/Analysis/ValueTracking.h b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
index 2c6221d4933f..dd767217345a 100644
--- a/contrib/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/contrib/llvm/include/llvm/Analysis/ValueTracking.h
@@ -16,7 +16,6 @@
 #define LLVM_ANALYSIS_VALUETRACKING_H
 
 #include "llvm/IR/CallSite.h"
-#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/Support/DataTypes.h"
@@ -49,7 +48,7 @@ template <typename T> class ArrayRef;
   /// where V is a vector, the known zero and known one values are the
   /// same width as the vector element, and the bit is set only if it is true
   /// for all of the elements in the vector.
-  void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+  void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                         const DataLayout &DL, unsigned Depth = 0,
                         AssumptionCache *AC = nullptr,
                         const Instruction *CxtI = nullptr,
@@ -60,14 +59,15 @@ template <typename T> class ArrayRef;
   void computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
                                          APInt &KnownZero, APInt &KnownOne);
   /// Return true if LHS and RHS have no common bits set.
-  bool haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+  bool haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
+                           const DataLayout &DL,
                            AssumptionCache *AC = nullptr,
                            const Instruction *CxtI = nullptr,
                            const DominatorTree *DT = nullptr);
 
   /// Determine whether the sign bit is known to be zero or one. Convenience
   /// wrapper around computeKnownBits.
-  void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+  void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                       const DataLayout &DL, unsigned Depth = 0,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
@@ -78,7 +78,7 @@ template <typename T> class ArrayRef;
   /// of two when defined. Supports values with integer or pointer type and
   /// vectors of integers. If 'OrZero' is set, then return true if the given
   /// value is either a power of two or zero.
-  bool isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL,
+  bool isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
                               bool OrZero = false, unsigned Depth = 0,
                               AssumptionCache *AC = nullptr,
                               const Instruction *CxtI = nullptr,
@@ -88,34 +88,35 @@ template <typename T> class ArrayRef;
   /// vectors, return true if every element is known to be non-zero when
   /// defined. Supports values with integer or pointer type and vectors of
   /// integers.
-  bool isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth = 0,
+  bool isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
                       const DominatorTree *DT = nullptr);
 
   /// Returns true if the give value is known to be non-negative.
-  bool isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth = 0,
+  bool isKnownNonNegative(const Value *V, const DataLayout &DL,
+                          unsigned Depth = 0,
                           AssumptionCache *AC = nullptr,
                           const Instruction *CxtI = nullptr,
                           const DominatorTree *DT = nullptr);
 
   /// Returns true if the given value is known be positive (i.e. non-negative
   /// and non-zero).
-  bool isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth = 0,
+  bool isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                        AssumptionCache *AC = nullptr,
                        const Instruction *CxtI = nullptr,
                        const DominatorTree *DT = nullptr);
 
   /// Returns true if the given value is known be negative (i.e. non-positive
   /// and non-zero).
-  bool isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth = 0,
+  bool isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth = 0,
                        AssumptionCache *AC = nullptr,
                        const Instruction *CxtI = nullptr,
                        const DominatorTree *DT = nullptr);
 
   /// Return true if the given values are known to be non-equal when defined.
   /// Supports scalar integer types only.
-  bool isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
+  bool isKnownNonEqual(const Value *V1, const Value *V2, const DataLayout &DL,
                       AssumptionCache *AC = nullptr,
                       const Instruction *CxtI = nullptr,
                       const DominatorTree *DT = nullptr);
@@ -129,7 +130,8 @@ template <typename T> class ArrayRef;
   /// where V is a vector, the mask, known zero, and known one values are the
   /// same width as the vector element, and the bit is set only if it is true
   /// for all of the elements in the vector.
-  bool MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+  bool MaskedValueIsZero(const Value *V, const APInt &Mask,
+                         const DataLayout &DL,
                          unsigned Depth = 0, AssumptionCache *AC = nullptr,
                          const Instruction *CxtI = nullptr,
                          const DominatorTree *DT = nullptr);
@@ -141,7 +143,7 @@ template <typename T> class ArrayRef;
   /// equal to each other, so we return 3. For vectors, return the number of
   /// sign bits for the vector element with the mininum number of known sign
   /// bits.
-  unsigned ComputeNumSignBits(Value *Op, const DataLayout &DL,
+  unsigned ComputeNumSignBits(const Value *Op, const DataLayout &DL,
                               unsigned Depth = 0, AssumptionCache *AC = nullptr,
                               const Instruction *CxtI = nullptr,
                               const DominatorTree *DT = nullptr);
@@ -213,7 +215,7 @@ template <typename T> class ArrayRef;
 
   /// If we can compute the length of the string pointed to by the specified
   /// pointer, return 'len+1'.  If we can't, return 0.
-  uint64_t GetStringLength(Value *V);
+  uint64_t GetStringLength(const Value *V);
 
   /// This method strips off any GEP address adjustments and pointer casts from
   /// the specified value, returning the original object being addressed. Note
@@ -306,12 +308,12 @@ template <typename T> class ArrayRef;
   bool isKnownNonNull(const Value *V);
 
   /// Return true if this pointer couldn't possibly be null. If the context
-  /// instruction is specified, perform context-sensitive analysis and return
-  /// true if the pointer couldn't possibly be null at the specified
-  /// instruction.
+  /// instruction and dominator tree are specified, perform context-sensitive
+  /// analysis and return true if the pointer couldn't possibly be null at the
+  /// specified instruction.
   bool isKnownNonNullAt(const Value *V,
                         const Instruction *CtxI = nullptr,
-                        const DominatorTree *DT  = nullptr);
+                        const DominatorTree *DT = nullptr);
 
   /// Return true if it is valid to use the assumptions provided by an
   /// assume intrinsic, I, at the point in the control-flow identified by the
@@ -320,23 +322,25 @@ template <typename T> class ArrayRef;
                                const DominatorTree *DT = nullptr);
 
   enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
-  OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+  OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
+                                               const Value *RHS,
                                                const DataLayout &DL,
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
                                                const DominatorTree *DT);
-  OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+  OverflowResult computeOverflowForUnsignedAdd(const Value *LHS,
+                                               const Value *RHS,
                                                const DataLayout &DL,
                                                AssumptionCache *AC,
                                                const Instruction *CxtI,
                                                const DominatorTree *DT);
-  OverflowResult computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+  OverflowResult computeOverflowForSignedAdd(const Value *LHS, const Value *RHS,
                                              const DataLayout &DL,
                                              AssumptionCache *AC = nullptr,
                                              const Instruction *CxtI = nullptr,
                                              const DominatorTree *DT = nullptr);
   /// This version also leverages the sign bit of Add if known.
-  OverflowResult computeOverflowForSignedAdd(AddOperator *Add,
+  OverflowResult computeOverflowForSignedAdd(const AddOperator *Add,
                                              const DataLayout &DL,
                                              AssumptionCache *AC = nullptr,
                                              const Instruction *CxtI = nullptr,
@@ -345,7 +349,8 @@ template <typename T> class ArrayRef;
   /// Returns true if the arithmetic part of the \p II 's result is
   /// used only along the paths control dependent on the computation
   /// not overflowing, \p II being an <op>.with.overflow intrinsic.
-  bool isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT);
+  bool isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+                                 const DominatorTree &DT);
 
   /// Return true if this function can prove that the instruction I will
   /// always transfer execution to one of its successors (including the next
@@ -445,11 +450,16 @@ template <typename T> class ArrayRef;
   ///
   SelectPatternResult matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                                          Instruction::CastOps *CastOp = nullptr);
-
-  /// Parse out a conservative ConstantRange from !range metadata.
-  ///
-  /// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20).
-  ConstantRange getConstantRangeFromMetadata(MDNode &RangeMD);
+  static inline SelectPatternResult
+  matchSelectPattern(const Value *V, const Value *&LHS, const Value *&RHS,
+                     Instruction::CastOps *CastOp = nullptr) {
+    Value *L = const_cast<Value*>(LHS);
+    Value *R = const_cast<Value*>(RHS);
+    auto Result = matchSelectPattern(const_cast<Value*>(V), L, R);
+    LHS = L;
+    RHS = R;
+    return Result;
+  }
 
   /// Return true if RHS is known to be implied true by LHS.  Return false if
   /// RHS is known to be implied false by LHS.  Otherwise, return None if no
@@ -461,10 +471,13 @@ template <typename T> class ArrayRef;
   ///  T | T | F
   ///  F | T | T
   /// (A)
-  Optional<bool> isImpliedCondition(
-      Value *LHS, Value *RHS, const DataLayout &DL, bool InvertAPred = false,
-      unsigned Depth = 0, AssumptionCache *AC = nullptr,
-      const Instruction *CxtI = nullptr, const DominatorTree *DT = nullptr);
+  Optional<bool> isImpliedCondition(const Value *LHS, const Value *RHS,
+                                    const DataLayout &DL,
+                                    bool InvertAPred = false,
+                                    unsigned Depth = 0,
+                                    AssumptionCache *AC = nullptr,
+                                    const Instruction *CxtI = nullptr,
+                                    const DominatorTree *DT = nullptr);
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Bitcode/BitCodes.h b/contrib/llvm/include/llvm/Bitcode/BitCodes.h
index 66400b697c5c..cfc7a1d7d6bd 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitCodes.h
@@ -25,6 +25,14 @@
 #include <cassert>
 
 namespace llvm {
+/// Offsets of the 32-bit fields of bitcode wrapper header.
+static const unsigned BWH_MagicField = 0 * 4;
+static const unsigned BWH_VersionField = 1 * 4;
+static const unsigned BWH_OffsetField = 2 * 4;
+static const unsigned BWH_SizeField = 3 * 4;
+static const unsigned BWH_CPUTypeField = 4 * 4;
+static const unsigned BWH_HeaderSize = 5 * 4;
+
 namespace bitc {
   enum StandardWidths {
     BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
diff --git a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h b/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
index 76a60a0b8d25..9e042b17241f 100644
--- a/contrib/llvm/include/llvm/Bitcode/ReaderWriter.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -1,4 +1,4 @@
-//===-- llvm/Bitcode/ReaderWriter.h - Bitcode reader/writers ----*- C++ -*-===//
+//===-- llvm/Bitcode/BitcodeReader.h - Bitcode reader ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,108 +7,135 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This header defines interfaces to read and write LLVM bitcode files/streams.
+// This header defines interfaces to read LLVM bitcode files/streams.
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_BITCODE_READERWRITER_H
-#define LLVM_BITCODE_READERWRITER_H
+#ifndef LLVM_BITCODE_BITCODEREADER_H
+#define LLVM_BITCODE_BITCODEREADER_H
 
+#include "llvm/Bitcode/BitCodes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include <memory>
-#include <string>
 
 namespace llvm {
-  class BitstreamWriter;
-  class DataStreamer;
   class LLVMContext;
   class Module;
-  class ModulePass;
-  class raw_ostream;
 
-  /// Offsets of the 32-bit fields of bitcode wrapper header.
-  static const unsigned BWH_MagicField = 0*4;
-  static const unsigned BWH_VersionField = 1*4;
-  static const unsigned BWH_OffsetField = 2*4;
-  static const unsigned BWH_SizeField = 3*4;
-  static const unsigned BWH_CPUTypeField = 4*4;
-  static const unsigned BWH_HeaderSize = 5*4;
+  // These functions are for converting Expected/Error values to
+  // ErrorOr/std::error_code for compatibility with legacy clients. FIXME:
+  // Remove these functions once no longer needed by the C and libLTO APIs.
+
+  std::error_code errorToErrorCodeAndEmitErrors(LLVMContext &Ctx, Error Err);
+
+  template <typename T>
+  ErrorOr<T> expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected<T> Val) {
+    if (!Val)
+      return errorToErrorCodeAndEmitErrors(Ctx, Val.takeError());
+    return std::move(*Val);
+  }
+
+  /// Represents a module in a bitcode file.
+  class BitcodeModule {
+    // This covers the identification (if present) and module blocks.
+    ArrayRef<uint8_t> Buffer;
+    StringRef ModuleIdentifier;
+
+    // The bitstream location of the IDENTIFICATION_BLOCK.
+    uint64_t IdentificationBit;
+
+    // The bitstream location of this module's MODULE_BLOCK.
+    uint64_t ModuleBit;
+
+    BitcodeModule(ArrayRef<uint8_t> Buffer, StringRef ModuleIdentifier,
+                  uint64_t IdentificationBit, uint64_t ModuleBit)
+        : Buffer(Buffer), ModuleIdentifier(ModuleIdentifier),
+          IdentificationBit(IdentificationBit), ModuleBit(ModuleBit) {}
+
+    // Calls the ctor.
+    friend Expected<std::vector<BitcodeModule>>
+    getBitcodeModuleList(MemoryBufferRef Buffer);
+
+    Expected<std::unique_ptr<Module>> getModuleImpl(LLVMContext &Context,
+                                                    bool MaterializeAll,
+                                                    bool ShouldLazyLoadMetadata,
+                                                    bool IsImporting);
+
+  public:
+    StringRef getBuffer() const {
+      return StringRef((const char *)Buffer.begin(), Buffer.size());
+    }
+
+    StringRef getModuleIdentifier() const { return ModuleIdentifier; }
+
+    /// Read the bitcode module and prepare for lazy deserialization of function
+    /// bodies. If ShouldLazyLoadMetadata is true, lazily load metadata as well.
+    /// If IsImporting is true, this module is being parsed for ThinLTO
+    /// importing into another module.
+    Expected<std::unique_ptr<Module>> getLazyModule(LLVMContext &Context,
+                                                    bool ShouldLazyLoadMetadata,
+                                                    bool IsImporting);
+
+    /// Read the entire bitcode module and return it.
+    Expected<std::unique_ptr<Module>> parseModule(LLVMContext &Context);
+
+    /// Check if the given bitcode buffer contains a summary block.
+    Expected<bool> hasSummary();
+
+    /// Parse the specified bitcode buffer, returning the module summary index.
+    Expected<std::unique_ptr<ModuleSummaryIndex>> getSummary();
+  };
+
+  /// Returns a list of modules in the specified bitcode buffer.
+  Expected<std::vector<BitcodeModule>>
+  getBitcodeModuleList(MemoryBufferRef Buffer);
 
   /// Read the header of the specified bitcode buffer and prepare for lazy
   /// deserialization of function bodies. If ShouldLazyLoadMetadata is true,
-  /// lazily load metadata as well. If successful, this moves Buffer. On
-  /// error, this *does not* move Buffer.
-  ErrorOr<std::unique_ptr<Module>>
-  getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
-                       LLVMContext &Context,
-                       bool ShouldLazyLoadMetadata = false);
-
-  /// Read the header of the specified stream and prepare for lazy
-  /// deserialization and streaming of function bodies.
-  ErrorOr<std::unique_ptr<Module>>
-  getStreamedBitcodeModule(StringRef Name,
-                           std::unique_ptr<DataStreamer> Streamer,
-                           LLVMContext &Context);
+  /// lazily load metadata as well. If IsImporting is true, this module is
+  /// being parsed for ThinLTO importing into another module.
+  Expected<std::unique_ptr<Module>>
+  getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
+                       bool ShouldLazyLoadMetadata = false,
+                       bool IsImporting = false);
+
+  /// Like getLazyBitcodeModule, except that the module takes ownership of
+  /// the memory buffer if successful. If successful, this moves Buffer. On
+  /// error, this *does not* move Buffer. If IsImporting is true, this module is
+  /// being parsed for ThinLTO importing into another module.
+  Expected<std::unique_ptr<Module>> getOwningLazyBitcodeModule(
+      std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
+      bool ShouldLazyLoadMetadata = false, bool IsImporting = false);
 
   /// Read the header of the specified bitcode buffer and extract just the
   /// triple information. If successful, this returns a string. On error, this
   /// returns "".
-  std::string getBitcodeTargetTriple(MemoryBufferRef Buffer,
-                                     LLVMContext &Context);
+  Expected<std::string> getBitcodeTargetTriple(MemoryBufferRef Buffer);
 
   /// Return true if \p Buffer contains a bitcode file with ObjC code (category
   /// or class) in it.
-  bool isBitcodeContainingObjCCategory(MemoryBufferRef Buffer,
-                                       LLVMContext &Context);
+  Expected<bool> isBitcodeContainingObjCCategory(MemoryBufferRef Buffer);
 
   /// Read the header of the specified bitcode buffer and extract just the
   /// producer string information. If successful, this returns a string. On
   /// error, this returns "".
-  std::string getBitcodeProducerString(MemoryBufferRef Buffer,
-                                       LLVMContext &Context);
+  Expected<std::string> getBitcodeProducerString(MemoryBufferRef Buffer);
 
   /// Read the specified bitcode file, returning the module.
-  ErrorOr<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
-                                                    LLVMContext &Context);
+  Expected<std::unique_ptr<Module>> parseBitcodeFile(MemoryBufferRef Buffer,
+                                                     LLVMContext &Context);
 
   /// Check if the given bitcode buffer contains a summary block.
-  bool
-  hasGlobalValueSummary(MemoryBufferRef Buffer,
-                        const DiagnosticHandlerFunction &DiagnosticHandler);
+  Expected<bool> hasGlobalValueSummary(MemoryBufferRef Buffer);
 
   /// Parse the specified bitcode buffer, returning the module summary index.
-  ErrorOr<std::unique_ptr<ModuleSummaryIndex>>
-  getModuleSummaryIndex(MemoryBufferRef Buffer,
-                        const DiagnosticHandlerFunction &DiagnosticHandler);
-
-  /// \brief Write the specified module to the specified raw output stream.
-  ///
-  /// For streams where it matters, the given stream should be in "binary"
-  /// mode.
-  ///
-  /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
-  /// Value in \c M.  These will be reconstructed exactly when \a M is
-  /// deserialized.
-  ///
-  /// If \c EmitSummaryIndex, emit the module's summary index (currently
-  /// for use in ThinLTO optimization).
-  void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
-                          bool ShouldPreserveUseListOrder = false,
-                          const ModuleSummaryIndex *Index = nullptr,
-                          bool GenerateHash = false);
-
-  /// Write the specified module summary index to the given raw output stream,
-  /// where it will be written in a new bitcode block. This is used when
-  /// writing the combined index file for ThinLTO. When writing a subset of the
-  /// index for a distributed backend, provide the \p ModuleToSummariesForIndex
-  /// map.
-  void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
-                        std::map<std::string, GVSummaryMapTy>
-                            *ModuleToSummariesForIndex = nullptr);
+  Expected<std::unique_ptr<ModuleSummaryIndex>>
+  getModuleSummaryIndex(MemoryBufferRef Buffer);
 
   /// isBitcodeWrapper - Return true if the given bytes are the magic bytes
   /// for an LLVM IR bitcode wrapper.
@@ -183,26 +210,11 @@ namespace llvm {
   }
 
   const std::error_category &BitcodeErrorCategory();
-  enum class BitcodeError { InvalidBitcodeSignature = 1, CorruptedBitcode };
+  enum class BitcodeError { CorruptedBitcode = 1 };
   inline std::error_code make_error_code(BitcodeError E) {
     return std::error_code(static_cast<int>(E), BitcodeErrorCategory());
   }
 
-  class BitcodeDiagnosticInfo : public DiagnosticInfo {
-    const Twine &Msg;
-    std::error_code EC;
-
-  public:
-    BitcodeDiagnosticInfo(std::error_code EC, DiagnosticSeverity Severity,
-                          const Twine &Msg);
-    void print(DiagnosticPrinter &DP) const override;
-    std::error_code getError() const { return EC; }
-
-    static bool classof(const DiagnosticInfo *DI) {
-      return DI->getKind() == DK_Bitcode;
-    }
-  };
-
 } // End llvm namespace
 
 namespace std {
diff --git a/contrib/llvm/include/llvm/Bitcode/BitcodeWriter.h b/contrib/llvm/include/llvm/Bitcode/BitcodeWriter.h
new file mode 100644
index 000000000000..4f72f98bbf9c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeWriter.h
@@ -0,0 +1,80 @@
+//===-- llvm/Bitcode/BitcodeWriter.h - Bitcode writers ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to write LLVM bitcode files/streams.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_BITCODEWRITER_H
+#define LLVM_BITCODE_BITCODEWRITER_H
+
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include <string>
+
+namespace llvm {
+  class BitstreamWriter;
+  class Module;
+  class raw_ostream;
+
+  class BitcodeWriter {
+    SmallVectorImpl<char> &Buffer;
+    std::unique_ptr<BitstreamWriter> Stream;
+
+   public:
+    /// Create a BitcodeWriter that writes to Buffer.
+    BitcodeWriter(SmallVectorImpl<char> &Buffer);
+
+    ~BitcodeWriter();
+
+    /// Write the specified module to the buffer specified at construction time.
+    ///
+    /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
+    /// Value in \c M.  These will be reconstructed exactly when \a M is
+    /// deserialized.
+    ///
+    /// If \c Index is supplied, the bitcode will contain the summary index
+    /// (currently for use in ThinLTO optimization).
+    ///
+    /// \p GenerateHash enables hashing the Module and including the hash in the
+    /// bitcode (currently for use in ThinLTO incremental build).
+    void writeModule(const Module *M, bool ShouldPreserveUseListOrder = false,
+                     const ModuleSummaryIndex *Index = nullptr,
+                     bool GenerateHash = false);
+  };
+
+  /// \brief Write the specified module to the specified raw output stream.
+  ///
+  /// For streams where it matters, the given stream should be in "binary"
+  /// mode.
+  ///
+  /// If \c ShouldPreserveUseListOrder, encode the use-list order for each \a
+  /// Value in \c M.  These will be reconstructed exactly when \a M is
+  /// deserialized.
+  ///
+  /// If \c Index is supplied, the bitcode will contain the summary index
+  /// (currently for use in ThinLTO optimization).
+  ///
+  /// \p GenerateHash enables hashing the Module and including the hash in the
+  /// bitcode (currently for use in ThinLTO incremental build).
+  void WriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                          bool ShouldPreserveUseListOrder = false,
+                          const ModuleSummaryIndex *Index = nullptr,
+                          bool GenerateHash = false);
+
+  /// Write the specified module summary index to the given raw output stream,
+  /// where it will be written in a new bitcode block. This is used when
+  /// writing the combined index file for ThinLTO. When writing a subset of the
+  /// index for a distributed backend, provide the \p ModuleToSummariesForIndex
+  /// map.
+  void WriteIndexToFile(const ModuleSummaryIndex &Index, raw_ostream &Out,
+                        const std::map<std::string, GVSummaryMapTy>
+                            *ModuleToSummariesForIndex = nullptr);
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h b/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
index 946255b878a6..9ac6fba16b96 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitcodeWriterPass.h
@@ -44,7 +44,7 @@ ModulePass *createBitcodeWriterPass(raw_ostream &Str,
 ///
 /// Note that this is intended for use with the new pass manager. To construct
 /// a pass for the legacy pass manager, use the function above.
-class BitcodeWriterPass {
+class BitcodeWriterPass : public PassInfoMixin<BitcodeWriterPass> {
   raw_ostream &OS;
   bool ShouldPreserveUseListOrder;
   bool EmitSummaryIndex;
@@ -68,8 +68,6 @@ public:
   /// \brief Run the bitcode writer pass, and output the module to the selected
   /// output stream.
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
-
-  static StringRef name() { return "BitcodeWriterPass"; }
 };
 
 }
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
index b331ceea051c..4d95a6ce8a16 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitstreamReader.h
@@ -15,21 +15,28 @@
 #ifndef LLVM_BITCODE_BITSTREAMREADER_H
 #define LLVM_BITCODE_BITSTREAMREADER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/IntrusiveRefCntPtr.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Bitcode/BitCodes.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/StreamingMemoryObject.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cassert>
 #include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 
 namespace llvm {
 
-/// This class is used to read from an LLVM bitcode stream, maintaining
-/// information that is global to decoding the entire file. While a file is
-/// being read, multiple cursors can be independently advanced or skipped around
-/// within the file.  These are represented by the BitstreamCursor class.
-class BitstreamReader {
+/// This class maintains the abbreviations read from a block info block.
+class BitstreamBlockInfo {
 public:
   /// This contains information emitted to BLOCKINFO_BLOCK blocks. These
   /// describe abbreviations that all blocks of the specified ID inherit.
@@ -37,64 +44,13 @@ public:
     unsigned BlockID;
     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> Abbrevs;
     std::string Name;
-
     std::vector<std::pair<unsigned, std::string> > RecordNames;
   };
-private:
-  std::unique_ptr<MemoryObject> BitcodeBytes;
 
+private:
   std::vector<BlockInfo> BlockInfoRecords;
 
-  /// This is set to true if we don't care about the block/record name
-  /// information in the BlockInfo block. Only llvm-bcanalyzer uses this.
-  bool IgnoreBlockInfoNames;
-
-  BitstreamReader(const BitstreamReader&) = delete;
-  void operator=(const BitstreamReader&) = delete;
 public:
-  BitstreamReader() : IgnoreBlockInfoNames(true) {
-  }
-
-  BitstreamReader(const unsigned char *Start, const unsigned char *End)
-      : IgnoreBlockInfoNames(true) {
-    init(Start, End);
-  }
-
-  BitstreamReader(std::unique_ptr<MemoryObject> BitcodeBytes)
-      : BitcodeBytes(std::move(BitcodeBytes)), IgnoreBlockInfoNames(true) {}
-
-  BitstreamReader(BitstreamReader &&Other) {
-    *this = std::move(Other);
-  }
-
-  BitstreamReader &operator=(BitstreamReader &&Other) {
-    BitcodeBytes = std::move(Other.BitcodeBytes);
-    // Explicitly swap block info, so that nothing gets destroyed twice.
-    std::swap(BlockInfoRecords, Other.BlockInfoRecords);
-    IgnoreBlockInfoNames = Other.IgnoreBlockInfoNames;
-    return *this;
-  }
-
-  void init(const unsigned char *Start, const unsigned char *End) {
-    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
-    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
-  }
-
-  MemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
-
-  /// This is called by clients that want block/record name information.
-  void CollectBlockInfoNames() { IgnoreBlockInfoNames = false; }
-  bool isIgnoringBlockInfoNames() { return IgnoreBlockInfoNames; }
-
-  //===--------------------------------------------------------------------===//
-  // Block Manipulation
-  //===--------------------------------------------------------------------===//
-
-  /// Return true if we've already read and processed the block info block for
-  /// this Bitstream. We only process it for the first cursor that walks over
-  /// it.
-  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
-
   /// If there is block info for the specified ID, return it, otherwise return
   /// null.
   const BlockInfo *getBlockInfo(unsigned BlockID) const {
@@ -118,33 +74,21 @@ public:
     BlockInfoRecords.back().BlockID = BlockID;
     return BlockInfoRecords.back();
   }
-
-  /// Takes block info from the other bitstream reader.
-  ///
-  /// This is a "take" operation because BlockInfo records are non-trivial, and
-  /// indeed rather expensive.
-  void takeBlockInfo(BitstreamReader &&Other) {
-    assert(!hasBlockInfoRecords());
-    BlockInfoRecords = std::move(Other.BlockInfoRecords);
-  }
 };
 
 /// This represents a position within a bitstream. There may be multiple
 /// independent cursors reading within one bitstream, each maintaining their
 /// own local state.
 class SimpleBitstreamCursor {
-  BitstreamReader *R = nullptr;
+  ArrayRef<uint8_t> BitcodeBytes;
   size_t NextChar = 0;
 
-  // The size of the bicode. 0 if we don't know it yet.
-  size_t Size = 0;
-
+public:
   /// This is the current data we have pulled from the stream but have not
   /// returned to the client. This is specifically and intentionally defined to
   /// follow the word size of the host machine for efficiency. We use word_t in
   /// places that are aware of this to make it perfectly explicit what is going
   /// on.
-public:
   typedef size_t word_t;
 
 private:
@@ -158,23 +102,21 @@ public:
   static const size_t MaxChunkSize = sizeof(word_t) * 8;
 
   SimpleBitstreamCursor() = default;
-
-  explicit SimpleBitstreamCursor(BitstreamReader &R) : R(&R) {}
-  explicit SimpleBitstreamCursor(BitstreamReader *R) : R(R) {}
+  explicit SimpleBitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
+      : BitcodeBytes(BitcodeBytes) {}
+  explicit SimpleBitstreamCursor(StringRef BitcodeBytes)
+      : BitcodeBytes(reinterpret_cast<const uint8_t *>(BitcodeBytes.data()),
+                     BitcodeBytes.size()) {}
+  explicit SimpleBitstreamCursor(MemoryBufferRef BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes.getBuffer()) {}
 
   bool canSkipToPos(size_t pos) const {
     // pos can be skipped to if it is a valid address or one byte past the end.
-    return pos == 0 ||
-           R->getBitcodeBytes().isValidAddress(static_cast<uint64_t>(pos - 1));
+    return pos <= BitcodeBytes.size();
   }
 
   bool AtEndOfStream() {
-    if (BitsInCurWord != 0)
-      return false;
-    if (Size != 0)
-      return Size <= NextChar;
-    fillCurWord();
-    return BitsInCurWord == 0;
+    return BitsInCurWord == 0 && BitcodeBytes.size() <= NextChar;
   }
 
   /// Return the bit # of the bit we are reading.
@@ -185,8 +127,7 @@ public:
   // Return the byte # of the current bit.
   uint64_t getCurrentByteNo() const { return GetCurrentBitNo() / 8; }
 
-  BitstreamReader *getBitStreamReader() { return R; }
-  const BitstreamReader *getBitStreamReader() const { return R; }
+  ArrayRef<uint8_t> getBitcodeBytes() const { return BitcodeBytes; }
 
   /// Reset the stream to the specified bit number.
   void JumpToBit(uint64_t BitNo) {
@@ -203,27 +144,9 @@ public:
       Read(WordBitNo);
   }
 
-  /// Reset the stream to the bit pointed at by the specified pointer.
-  ///
-  /// The pointer must be a dereferenceable pointer into the bytes in the
-  /// underlying memory object.
-  void jumpToPointer(const uint8_t *Pointer) {
-    auto *Pointer0 = getPointerToByte(0, 1);
-    assert((intptr_t)Pointer0 <= (intptr_t)Pointer &&
-           "Expected pointer into bitstream");
-
-    JumpToBit(8 * (Pointer - Pointer0));
-    assert((intptr_t)getPointerToByte(getCurrentByteNo(), 1) ==
-               (intptr_t)Pointer &&
-           "Expected to reach pointer");
-  }
-  void jumpToPointer(const char *Pointer) {
-    jumpToPointer((const uint8_t *)Pointer);
-  }
-
   /// Get a pointer into the bitstream at the specified byte offset.
   const uint8_t *getPointerToByte(uint64_t ByteNo, uint64_t NumBytes) {
-    return R->getBitcodeBytes().getPointer(ByteNo, NumBytes);
+    return BitcodeBytes.data() + ByteNo;
   }
 
   /// Get a pointer into the bitstream at the specified bit offset.
@@ -235,26 +158,24 @@ public:
   }
 
   void fillCurWord() {
-    if (Size != 0 && NextChar >= Size)
+    if (NextChar >= BitcodeBytes.size())
       report_fatal_error("Unexpected end of file");
 
     // Read the next word from the stream.
-    uint8_t Array[sizeof(word_t)] = {0};
-
-    uint64_t BytesRead =
-        R->getBitcodeBytes().readBytes(Array, sizeof(Array), NextChar);
-
-    // If we run out of data, stop at the end of the stream.
-    if (BytesRead == 0) {
+    const uint8_t *NextCharPtr = BitcodeBytes.data() + NextChar;
+    unsigned BytesRead;
+    if (BitcodeBytes.size() >= NextChar + sizeof(word_t)) {
+      BytesRead = sizeof(word_t);
+      CurWord =
+          support::endian::read<word_t, support::little, support::unaligned>(
+              NextCharPtr);
+    } else {
+      // Short read.
+      BytesRead = BitcodeBytes.size() - NextChar;
       CurWord = 0;
-      BitsInCurWord = 0;
-      Size = NextChar;
-      return;
+      for (unsigned B = 0; B != BytesRead; ++B)
+        CurWord |= uint64_t(NextCharPtr[B]) << (B * 8);
     }
-
-    CurWord =
-        support::endian::read<word_t, support::little, support::unaligned>(
-            Array);
     NextChar += BytesRead;
     BitsInCurWord = BytesRead * 8;
   }
@@ -283,9 +204,9 @@ public:
 
     fillCurWord();
 
-    // If we run out of data, stop at the end of the stream.
+    // If we run out of data, abort.
     if (BitsLeft > BitsInCurWord)
-      return 0;
+      report_fatal_error("Unexpected end of file");
 
     word_t R2 = CurWord & (~word_t(0) >> (BitsInWord - BitsLeft));
 
@@ -306,7 +227,7 @@ public:
 
     uint32_t Result = 0;
     unsigned NextBit = 0;
-    while (1) {
+    while (true) {
       Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
 
       if ((Piece & (1U << (NumBits-1))) == 0)
@@ -326,7 +247,7 @@ public:
 
     uint64_t Result = 0;
     unsigned NextBit = 0;
-    while (1) {
+    while (true) {
       Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
 
       if ((Piece & (1U << (NumBits-1))) == 0)
@@ -351,31 +272,7 @@ public:
   }
 
   /// Skip to the end of the file.
-  void skipToEnd() { NextChar = R->getBitcodeBytes().getExtent(); }
-
-  /// Prevent the cursor from reading past a byte boundary.
-  ///
-  /// Prevent the cursor from requesting byte reads past \c Limit.  This is
-  /// useful when working with a cursor on a StreamingMemoryObject, when it's
-  /// desirable to avoid invalidating the result of getPointerToByte().
-  ///
-  /// If \c Limit is on a word boundary, AtEndOfStream() will return true if
-  /// the cursor position reaches or exceeds \c Limit, regardless of the true
-  /// number of available bytes.  Otherwise, AtEndOfStream() returns true when
-  /// it reaches or exceeds the next word boundary.
-  void setArtificialByteLimit(uint64_t Limit) {
-    assert(getCurrentByteNo() < Limit && "Move cursor before lowering limit");
-
-    // Round to word boundary.
-    Limit = alignTo(Limit, sizeof(word_t));
-
-    // Only change size if the new one is lower.
-    if (!Size || Size > Limit)
-      Size = Limit;
-  }
-
-  /// Return the Size, if known.
-  uint64_t getSizeIfKnown() const { return Size; }
+  void skipToEnd() { NextChar = BitcodeBytes.size(); }
 };
 
 /// When advancing through a bitstream cursor, each advance can discover a few
@@ -394,12 +291,15 @@ struct BitstreamEntry {
   static BitstreamEntry getError() {
     BitstreamEntry E; E.Kind = Error; return E;
   }
+
   static BitstreamEntry getEndBlock() {
     BitstreamEntry E; E.Kind = EndBlock; return E;
   }
+
   static BitstreamEntry getSubBlock(unsigned ID) {
     BitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
   }
+
   static BitstreamEntry getRecord(unsigned AbbrevID) {
     BitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
   }
@@ -421,34 +321,32 @@ class BitstreamCursor : SimpleBitstreamCursor {
   struct Block {
     unsigned PrevCodeSize;
     std::vector<IntrusiveRefCntPtr<BitCodeAbbrev>> PrevAbbrevs;
+
     explicit Block(unsigned PCS) : PrevCodeSize(PCS) {}
   };
 
   /// This tracks the codesize of parent blocks.
   SmallVector<Block, 8> BlockScope;
 
+  BitstreamBlockInfo *BlockInfo = nullptr;
 
 public:
   static const size_t MaxChunkSize = sizeof(word_t) * 8;
 
   BitstreamCursor() = default;
-
-  explicit BitstreamCursor(BitstreamReader &R) { init(&R); }
-
-  void init(BitstreamReader *R) {
-    freeState();
-    SimpleBitstreamCursor::operator=(SimpleBitstreamCursor(R));
-    CurCodeSize = 2;
-  }
-
-  void freeState();
+  explicit BitstreamCursor(ArrayRef<uint8_t> BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes) {}
+  explicit BitstreamCursor(StringRef BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes) {}
+  explicit BitstreamCursor(MemoryBufferRef BitcodeBytes)
+      : SimpleBitstreamCursor(BitcodeBytes) {}
 
   using SimpleBitstreamCursor::canSkipToPos;
   using SimpleBitstreamCursor::AtEndOfStream;
+  using SimpleBitstreamCursor::getBitcodeBytes;
   using SimpleBitstreamCursor::GetCurrentBitNo;
   using SimpleBitstreamCursor::getCurrentByteNo;
   using SimpleBitstreamCursor::getPointerToByte;
-  using SimpleBitstreamCursor::getBitStreamReader;
   using SimpleBitstreamCursor::JumpToBit;
   using SimpleBitstreamCursor::fillCurWord;
   using SimpleBitstreamCursor::Read;
@@ -471,7 +369,10 @@ public:
 
   /// Advance the current bitstream, returning the next entry in the stream.
   BitstreamEntry advance(unsigned Flags = 0) {
-    while (1) {
+    while (true) {
+      if (AtEndOfStream())
+        return BitstreamEntry::getError();
+
       unsigned Code = ReadCode();
       if (Code == bitc::END_BLOCK) {
         // Pop the end of the block unless Flags tells us not to.
@@ -498,7 +399,7 @@ public:
   /// This is a convenience function for clients that don't expect any
   /// subblocks. This just skips over them automatically.
   BitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
-    while (1) {
+    while (true) {
       // If we found a normal entry, return it.
       BitstreamEntry Entry = advance(Flags);
       if (Entry.Kind != BitstreamEntry::SubBlock)
@@ -514,7 +415,6 @@ public:
     return Read(CurCodeSize);
   }
 
-
   // Block header:
   //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
 
@@ -558,7 +458,6 @@ public:
   }
 
 private:
-
   void popBlockScope() {
     CurCodeSize = BlockScope.back().PrevCodeSize;
 
@@ -590,9 +489,19 @@ public:
   //===--------------------------------------------------------------------===//
   void ReadAbbrevRecord();
 
-  bool ReadBlockInfoBlock();
+  /// Read and return a block info block from the bitstream. If an error was
+  /// encountered, return None.
+  ///
+  /// \param ReadBlockInfoNames Whether to read block/record name information in
+  /// the BlockInfo block. Only llvm-bcanalyzer uses this.
+  Optional<BitstreamBlockInfo>
+  ReadBlockInfoBlock(bool ReadBlockInfoNames = false);
+
+  /// Set the block info to be used by this BitstreamCursor to interpret
+  /// abbreviated records.
+  void setBlockInfo(BitstreamBlockInfo *BI) { BlockInfo = BI; }
 };
 
-} // End llvm namespace
+} // end llvm namespace
 
-#endif
+#endif // LLVM_BITCODE_BITSTREAMREADER_H
diff --git a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
index d613f5e18954..8eb6e8aef7a2 100644
--- a/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
+++ b/contrib/llvm/include/llvm/Bitcode/BitstreamWriter.h
@@ -112,6 +112,11 @@ public:
         &Out[ByteNo], NewWord, BitNo & 7);
   }
 
+  void BackpatchWord64(uint64_t BitNo, uint64_t Val) {
+    BackpatchWord(BitNo, (uint32_t)Val);
+    BackpatchWord(BitNo + 32, (uint32_t)(Val >> 32));
+  }
+
   void Emit(uint32_t Val, unsigned NumBits) {
     assert(NumBits && NumBits <= 32 && "Invalid value size!");
     assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
@@ -131,15 +136,6 @@ public:
     CurBit = (CurBit+NumBits) & 31;
   }
 
-  void Emit64(uint64_t Val, unsigned NumBits) {
-    if (NumBits <= 32)
-      Emit((uint32_t)Val, NumBits);
-    else {
-      Emit((uint32_t)Val, 32);
-      Emit((uint32_t)(Val >> 32), NumBits-32);
-    }
-  }
-
   void FlushToWord() {
     if (CurBit) {
       WriteWord(CurValue);
@@ -506,9 +502,10 @@ public:
   //===--------------------------------------------------------------------===//
 
   /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
-  void EnterBlockInfoBlock(unsigned CodeWidth) {
-    EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, CodeWidth);
+  void EnterBlockInfoBlock() {
+    EnterSubblock(bitc::BLOCKINFO_BLOCK_ID, 2);
     BlockInfoCurBID = ~0U;
+    BlockInfoRecords.clear();
   }
 private:
   /// SwitchToBlockID - If we aren't already talking about the specified block
diff --git a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 52d4f01b7985..c996c38261c0 100644
--- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -120,9 +120,8 @@ enum AttributeCodes {
   // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0
   PARAMATTR_CODE_ENTRY_OLD = 1, // ENTRY: [paramidx0, attr0,
                                 //         paramidx1, attr1...]
-  PARAMATTR_CODE_ENTRY = 2,     // ENTRY: [paramidx0, attrgrp0,
-                                //         paramidx1, attrgrp1, ...]
-  PARAMATTR_GRP_CODE_ENTRY = 3  // ENTRY: [id, attr0, att1, ...]
+  PARAMATTR_CODE_ENTRY = 2,     // ENTRY: [attrgrp0, attrgrp1, ...]
+  PARAMATTR_GRP_CODE_ENTRY = 3  // ENTRY: [grpid, idx, attr0, attr1, ...]
 };
 
 /// TYPE blocks have codes for each type primitive they use.
@@ -170,11 +169,6 @@ enum OperandBundleTagCode {
   OPERAND_BUNDLE_TAG = 1, // TAG: [strchr x N]
 };
 
-// The type symbol table only has one code (TST_ENTRY_CODE).
-enum TypeSymtabCodes {
-  TST_CODE_ENTRY = 1 // TST_ENTRY: [typeid, namechar x N]
-};
-
 // Value symbol table codes.
 enum ValueSymtabCodes {
   VST_CODE_ENTRY = 1,   // VST_ENTRY: [valueid, namechar x N]
@@ -194,20 +188,20 @@ enum ModulePathSymtabCodes {
 // and combined index cases.
 enum GlobalValueSummarySymtabCodes {
   // PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid,
-  //             n x (valueid, callsitecount)]
+  //             n x (valueid)]
   FS_PERMODULE = 1,
   // PERMODULE_PROFILE: [valueid, flags, instcount, numrefs,
   //                     numrefs x valueid,
-  //                     n x (valueid, callsitecount, profilecount)]
+  //                     n x (valueid, hotness)]
   FS_PERMODULE_PROFILE = 2,
   // PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
   FS_PERMODULE_GLOBALVAR_INIT_REFS = 3,
   // COMBINED: [valueid, modid, flags, instcount, numrefs, numrefs x valueid,
-  //            n x (valueid, callsitecount)]
+  //            n x (valueid)]
   FS_COMBINED = 4,
   // COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs,
   //                    numrefs x valueid,
-  //                    n x (valueid, callsitecount, profilecount)]
+  //                    n x (valueid, hotness)]
   FS_COMBINED_PROFILE = 5,
   // COMBINED_GLOBALVAR_INIT_REFS: [valueid, modid, flags, n x valueid]
   FS_COMBINED_GLOBALVAR_INIT_REFS = 6,
@@ -219,45 +213,50 @@ enum GlobalValueSummarySymtabCodes {
   FS_COMBINED_ORIGINAL_NAME = 9,
   // VERSION of the summary, bumped when adding flags for instance.
   FS_VERSION = 10,
+  // The list of llvm.type.test type identifiers used by the following function.
+  FS_TYPE_TESTS = 11,
 };
 
 enum MetadataCodes {
-  METADATA_STRING_OLD = 1,       // MDSTRING:      [values]
-  METADATA_VALUE = 2,            // VALUE:         [type num, value num]
-  METADATA_NODE = 3,             // NODE:          [n x md num]
-  METADATA_NAME = 4,             // STRING:        [values]
-  METADATA_DISTINCT_NODE = 5,    // DISTINCT_NODE: [n x md num]
-  METADATA_KIND = 6,             // [n x [id, name]]
-  METADATA_LOCATION = 7,         // [distinct, line, col, scope, inlined-at?]
-  METADATA_OLD_NODE = 8,         // OLD_NODE:      [n x (type num, value num)]
-  METADATA_OLD_FN_NODE = 9,      // OLD_FN_NODE:   [n x (type num, value num)]
-  METADATA_NAMED_NODE = 10,      // NAMED_NODE:    [n x mdnodes]
-  METADATA_ATTACHMENT = 11,      // [m x [value, [n x [id, mdnode]]]
-  METADATA_GENERIC_DEBUG = 12,   // [distinct, tag, vers, header, n x md num]
-  METADATA_SUBRANGE = 13,        // [distinct, count, lo]
-  METADATA_ENUMERATOR = 14,      // [distinct, value, name]
-  METADATA_BASIC_TYPE = 15,      // [distinct, tag, name, size, align, enc]
-  METADATA_FILE = 16,            // [distinct, filename, directory]
-  METADATA_DERIVED_TYPE = 17,    // [distinct, ...]
-  METADATA_COMPOSITE_TYPE = 18,  // [distinct, ...]
-  METADATA_SUBROUTINE_TYPE = 19, // [distinct, flags, types, cc]
-  METADATA_COMPILE_UNIT = 20,    // [distinct, ...]
-  METADATA_SUBPROGRAM = 21,      // [distinct, ...]
-  METADATA_LEXICAL_BLOCK = 22,   // [distinct, scope, file, line, column]
+  METADATA_STRING_OLD = 1,     // MDSTRING:      [values]
+  METADATA_VALUE = 2,          // VALUE:         [type num, value num]
+  METADATA_NODE = 3,           // NODE:          [n x md num]
+  METADATA_NAME = 4,           // STRING:        [values]
+  METADATA_DISTINCT_NODE = 5,  // DISTINCT_NODE: [n x md num]
+  METADATA_KIND = 6,           // [n x [id, name]]
+  METADATA_LOCATION = 7,       // [distinct, line, col, scope, inlined-at?]
+  METADATA_OLD_NODE = 8,       // OLD_NODE:      [n x (type num, value num)]
+  METADATA_OLD_FN_NODE = 9,    // OLD_FN_NODE:   [n x (type num, value num)]
+  METADATA_NAMED_NODE = 10,    // NAMED_NODE:    [n x mdnodes]
+  METADATA_ATTACHMENT = 11,    // [m x [value, [n x [id, mdnode]]]
+  METADATA_GENERIC_DEBUG = 12, // [distinct, tag, vers, header, n x md num]
+  METADATA_SUBRANGE = 13,      // [distinct, count, lo]
+  METADATA_ENUMERATOR = 14,    // [distinct, value, name]
+  METADATA_BASIC_TYPE = 15,    // [distinct, tag, name, size, align, enc]
+  METADATA_FILE = 16, // [distinct, filename, directory, checksumkind, checksum]
+  METADATA_DERIVED_TYPE = 17,       // [distinct, ...]
+  METADATA_COMPOSITE_TYPE = 18,     // [distinct, ...]
+  METADATA_SUBROUTINE_TYPE = 19,    // [distinct, flags, types, cc]
+  METADATA_COMPILE_UNIT = 20,       // [distinct, ...]
+  METADATA_SUBPROGRAM = 21,         // [distinct, ...]
+  METADATA_LEXICAL_BLOCK = 22,      // [distinct, scope, file, line, column]
   METADATA_LEXICAL_BLOCK_FILE = 23, //[distinct, scope, file, discriminator]
-  METADATA_NAMESPACE = 24,          // [distinct, scope, file, name, line]
-  METADATA_TEMPLATE_TYPE = 25,      // [distinct, scope, name, type, ...]
-  METADATA_TEMPLATE_VALUE = 26,     // [distinct, scope, name, type, value, ...]
-  METADATA_GLOBAL_VAR = 27,         // [distinct, ...]
-  METADATA_LOCAL_VAR = 28,          // [distinct, ...]
-  METADATA_EXPRESSION = 29,         // [distinct, n x element]
-  METADATA_OBJC_PROPERTY = 30,      // [distinct, name, file, line, ...]
+  METADATA_NAMESPACE = 24, // [distinct, scope, file, name, line, exportSymbols]
+  METADATA_TEMPLATE_TYPE = 25,   // [distinct, scope, name, type, ...]
+  METADATA_TEMPLATE_VALUE = 26,  // [distinct, scope, name, type, value, ...]
+  METADATA_GLOBAL_VAR = 27,      // [distinct, ...]
+  METADATA_LOCAL_VAR = 28,       // [distinct, ...]
+  METADATA_EXPRESSION = 29,      // [distinct, n x element]
+  METADATA_OBJC_PROPERTY = 30,   // [distinct, name, file, line, ...]
   METADATA_IMPORTED_ENTITY = 31, // [distinct, tag, scope, entity, line, name]
   METADATA_MODULE = 32,          // [distinct, scope, name, ...]
   METADATA_MACRO = 33,           // [distinct, macinfo, line, name, value]
   METADATA_MACRO_FILE = 34,      // [distinct, macinfo, line, file, ...]
   METADATA_STRINGS = 35,         // [count, offset] blob([lengths][chars])
   METADATA_GLOBAL_DECL_ATTACHMENT = 36, // [valueid, n x [id, mdnode]]
+  METADATA_GLOBAL_VAR_EXPR = 37,        // [distinct, var, expr]
+  METADATA_INDEX_OFFSET = 38,           // [offset]
+  METADATA_INDEX = 39,                  // [bitpos]
 };
 
 // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
@@ -286,8 +285,9 @@ enum ConstantsCodes {
   CST_CODE_CE_INBOUNDS_GEP = 20, // INBOUNDS_GEP:  [n x operands]
   CST_CODE_BLOCKADDRESS = 21,    // CST_CODE_BLOCKADDRESS [fnty, fnval, bb#]
   CST_CODE_DATA = 22,            // DATA:          [n x elements]
-  CST_CODE_INLINEASM = 23        // INLINEASM:     [sideeffect|alignstack|
+  CST_CODE_INLINEASM = 23,       // INLINEASM:     [sideeffect|alignstack|
                                  //                 asmdialect,asmstr,conststr]
+  CST_CODE_CE_GEP_WITH_INRANGE_INDEX = 24, //      [opty, flags, n x operands]
 };
 
 /// CastOpcodes - These are values used in the bitcode files to encode which
diff --git a/contrib/llvm/include/llvm/CodeGen/Analysis.h b/contrib/llvm/include/llvm/CodeGen/Analysis.h
index 2e4dc49a1e26..f20185c4499a 100644
--- a/contrib/llvm/include/llvm/CodeGen/Analysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/Analysis.h
@@ -105,11 +105,21 @@ ISD::CondCode getICmpCondCode(ICmpInst::Predicate Pred);
 /// This function only tests target-independent requirements.
 bool isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM);
 
+/// Test if given that the input instruction is in the tail call position, if
+/// there is an attribute mismatch between the caller and the callee that will
+/// inhibit tail call optimizations.
+/// \p AllowDifferingSizes is an output parameter which, if forming a tail call
+/// is permitted, determines whether it's permitted only if the size of the
+/// caller's and callee's return types match exactly.
+bool attributesPermitTailCall(const Function *F, const Instruction *I,
+                              const ReturnInst *Ret,
+                              const TargetLoweringBase &TLI,
+                              bool *AllowDifferingSizes = nullptr);
+
 /// Test if given that the input instruction is in the tail call position if the
 /// return type or any attributes of the function will inhibit tail call
 /// optimization.
-bool returnTypeIsEligibleForTailCall(const Function *F,
-                                     const Instruction *I,
+bool returnTypeIsEligibleForTailCall(const Function *F, const Instruction *I,
                                      const ReturnInst *Ret,
                                      const TargetLoweringBase &TLI);
 
diff --git a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
index de618d173573..c1be46ddd7b5 100644
--- a/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
+++ b/contrib/llvm/include/llvm/CodeGen/AsmPrinter.h
@@ -89,10 +89,6 @@ public:
   /// This is a pointer to the current MachineModuleInfo.
   MachineModuleInfo *MMI;
 
-  /// Name-mangler for global names.
-  ///
-  Mangler *Mang;
-
   /// The symbol for the current function. This is recalculated at the beginning
   /// of each call to runOnMachineFunction().
   ///
@@ -126,11 +122,16 @@ private:
 
   struct HandlerInfo {
     AsmPrinterHandler *Handler;
-    const char *TimerName, *TimerGroupName;
+    const char *TimerName;
+    const char *TimerDescription;
+    const char *TimerGroupName;
+    const char *TimerGroupDescription;
     HandlerInfo(AsmPrinterHandler *Handler, const char *TimerName,
-                const char *TimerGroupName)
+                const char *TimerDescription, const char *TimerGroupName,
+                const char *TimerGroupDescription)
         : Handler(Handler), TimerName(TimerName),
-          TimerGroupName(TimerGroupName) {}
+          TimerDescription(TimerDescription), TimerGroupName(TimerGroupName),
+          TimerGroupDescription(TimerGroupDescription) {}
   };
   /// A vector of all debug/EH info emitters we should use. This vector
   /// maintains ownership of the emitters.
@@ -148,6 +149,9 @@ public:
   DwarfDebug *getDwarfDebug() { return DD; }
   DwarfDebug *getDwarfDebug() const { return DD; }
 
+  uint16_t getDwarfVersion() const;
+  void setDwarfVersion(uint16_t Version);
+
   bool isPositionIndependent() const;
 
   /// Return true if assembly output should contain comments.
@@ -176,9 +180,6 @@ public:
 
   void EmitToStreamer(MCStreamer &S, const MCInst &Inst);
 
-  /// Return the target triple string.
-  StringRef getTargetTriple() const;
-
   /// Return the current section we are emitting to.
   const MCSection *getCurrentSection() const;
 
@@ -188,6 +189,34 @@ public:
   MCSymbol *getSymbol(const GlobalValue *GV) const;
 
   //===------------------------------------------------------------------===//
+  // XRay instrumentation implementation.
+  //===------------------------------------------------------------------===//
+public:
+  // This describes the kind of sled we're storing in the XRay table.
+  enum class SledKind : uint8_t {
+    FUNCTION_ENTER = 0,
+    FUNCTION_EXIT = 1,
+    TAIL_CALL = 2,
+  };
+
+  // The table will contain these structs that point to the sled, the function
+  // containing the sled, and what kind of sled (and whether they should always
+  // be instrumented).
+  struct XRayFunctionEntry {
+    const MCSymbol *Sled;
+    const MCSymbol *Function;
+    SledKind Kind;
+    bool AlwaysInstrument;
+    const class Function *Fn;
+  };
+
+  // All the sleds to be emitted.
+  std::vector<XRayFunctionEntry> Sleds;
+
+  // Helper function to record a given XRay sled.
+  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
+
+  //===------------------------------------------------------------------===//
   // MachineFunctionPass Implementation.
   //===------------------------------------------------------------------===//
 
@@ -439,10 +468,6 @@ public:
   /// Get the value for DW_AT_APPLE_isa. Zero if no isa encoding specified.
   virtual unsigned getISAEncoding() { return 0; }
 
-  /// EmitDwarfRegOp - Emit a dwarf register operation.
-  virtual void EmitDwarfRegOp(ByteStreamer &BS,
-                              const MachineLocation &MLoc) const;
-
   //===------------------------------------------------------------------===//
   // Dwarf Lowering Routines
   //===------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 69951afb623c..df0dc1a38ae7 100644
--- a/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -60,8 +60,9 @@ private:
     return Cost;
   }
 
-  /// Estimate the cost overhead of SK_Alternate shuffle.
-  unsigned getAltShuffleOverhead(Type *Ty) {
+  /// Estimate a cost of shuffle as a sequence of extract and insert
+  /// operations.
+  unsigned getPermuteShuffleOverhead(Type *Ty) {
     assert(Ty->isVectorTy() && "Can only shuffle vectors");
     unsigned Cost = 0;
     // Shuffle cost is equal to the cost of extracting element from its argument
@@ -97,18 +98,13 @@ protected:
   using TargetTransformInfoImplBase::DL;
 
 public:
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  BasicTTIImplBase(const BasicTTIImplBase &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)) {}
-  BasicTTIImplBase(BasicTTIImplBase &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
-
   /// \name Scalar TTI Implementations
   /// @{
-  bool allowsMisalignedMemoryAccesses(unsigned BitWidth, unsigned AddressSpace,
+  bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                      unsigned BitWidth, unsigned AddressSpace,
                                       unsigned Alignment, bool *Fast) const {
-    MVT M = MVT::getIntegerVT(BitWidth);
-    return getTLI()->allowsMisalignedMemoryAccesses(M, AddressSpace, Alignment, Fast);
+    EVT E = EVT::getIntegerVT(Context, BitWidth);
+    return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
   }
 
   bool hasBranchDivergence() { return false; }
@@ -144,6 +140,10 @@ public:
     return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
   }
 
+  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) {
+    return getTLI()->isFoldableMemAccessOffset(I, Offset);
+  }
+
   bool isTruncateFree(Type *Ty1, Type *Ty2) {
     return getTLI()->isTruncateFree(Ty1, Ty2);
   }
@@ -279,8 +279,17 @@ public:
     }
 
     // Enable runtime and partial unrolling up to the specified size.
-    UP.Partial = UP.Runtime = true;
-    UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
+    // Enable using trip count upper bound to unroll loops.
+    UP.Partial = UP.Runtime = UP.UpperBound = true;
+    UP.PartialThreshold = MaxOps;
+
+    // Avoid unrolling when optimizing for size.
+    UP.OptSizeThreshold = 0;
+    UP.PartialOptSizeThreshold = 0;
+
+    // Set number of instructions optimized when "back edge"
+    // becomes "fall through" to default value of 2.
+    UP.BEInsns = 2;
   }
 
   /// @}
@@ -343,8 +352,9 @@ public:
 
   unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                           Type *SubTp) {
-    if (Kind == TTI::SK_Alternate) {
-      return getAltShuffleOverhead(Tp);
+    if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
+        Kind == TTI::SK_PermuteSingleSrc) {
+      return getPermuteShuffleOverhead(Tp);
     }
     return 1;
   }
@@ -919,16 +929,71 @@ public:
 
   unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
     assert(Ty->isVectorTy() && "Expect a vector type");
+    Type *ScalarTy = Ty->getVectorElementType();
     unsigned NumVecElts = Ty->getVectorNumElements();
     unsigned NumReduxLevels = Log2_32(NumVecElts);
-    unsigned ArithCost =
-        NumReduxLevels *
-        static_cast<T *>(this)->getArithmeticInstrCost(Opcode, Ty);
-    // Assume the pairwise shuffles add a cost.
-    unsigned ShuffleCost =
-        NumReduxLevels * (IsPairwise + 1) *
-        static_cast<T *>(this)
-            ->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
+    // Try to calculate arithmetic and shuffle op costs for reduction operations.
+    // We're assuming that reduction operation are performing the following way:
+    // 1. Non-pairwise reduction
+    // %val1 = shufflevector<n x t> %val, <n x t> %undef,
+    // <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
+    //            \----------------v-------------/  \----------v------------/
+    //                            n/2 elements               n/2 elements
+    // %red1 = op <n x t> %val, <n x t> val1
+    // After this operation we have a vector %red1 with only maningfull the
+    // first n/2 elements, the second n/2 elements are undefined and can be
+    // dropped. All other operations are actually working with the vector of
+    // length n/2, not n. though the real vector length is still n.
+    // %val2 = shufflevector<n x t> %red1, <n x t> %undef,
+    // <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
+    //            \----------------v-------------/  \----------v------------/
+    //                            n/4 elements               3*n/4 elements
+    // %red2 = op <n x t> %red1, <n x t> val2  - working with the vector of
+    // length n/2, the resulting vector has length n/4 etc.
+    // 2. Pairwise reduction:
+    // Everything is the same except for an additional shuffle operation which
+    // is used to produce operands for pairwise kind of reductions.
+    // %val1 = shufflevector<n x t> %val, <n x t> %undef,
+    // <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
+    //            \-------------v----------/  \----------v------------/
+    //                   n/2 elements               n/2 elements
+    // %val2 = shufflevector<n x t> %val, <n x t> %undef,
+    // <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
+    //            \-------------v----------/  \----------v------------/
+    //                   n/2 elements               n/2 elements
+    // %red1 = op <n x t> %val1, <n x t> val2
+    // Again, the operation is performed on <n x t> vector, but the resulting
+    // vector %red1 is <n/2 x t> vector.
+    //
+    // The cost model should take into account that the actual length of the
+    // vector is reduced on each iteration.
+    unsigned ArithCost = 0;
+    unsigned ShuffleCost = 0;
+    auto *ConcreteTTI = static_cast<T *>(this);
+    std::pair<unsigned, MVT> LT =
+        ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
+    unsigned LongVectorCount = 0;
+    unsigned MVTLen =
+        LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
+    while (NumVecElts > MVTLen) {
+      NumVecElts /= 2;
+      // Assume the pairwise shuffles add a cost.
+      ShuffleCost += (IsPairwise + 1) *
+                     ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+                                                 NumVecElts, Ty);
+      ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
+      Ty = VectorType::get(ScalarTy, NumVecElts);
+      ++LongVectorCount;
+    }
+    // The minimal length of the vector is limited by the real length of vector
+    // operations performed on the current platform. That's why several final
+    // reduction opertions are perfomed on the vectors with the same
+    // architecture-dependent length.
+    ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
+                   ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
+                                               NumVecElts, Ty);
+    ArithCost += (NumReduxLevels - LongVectorCount) *
+                 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
     return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
   }
 
@@ -951,13 +1016,6 @@ class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
 
 public:
   explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
-
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  BasicTTIImpl(const BasicTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  BasicTTIImpl(BasicTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
 };
 
 }
diff --git a/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h b/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h
index 92e58564e040..bfbd22823eb8 100644
--- a/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/contrib/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -296,6 +296,12 @@ public:
   void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
                               CCAssignFn Fn);
 
+  /// The function will invoke AnalyzeFormalArguments.
+  void AnalyzeArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
+                        CCAssignFn Fn) {
+    AnalyzeFormalArguments(Ins, Fn);
+  }
+
   /// AnalyzeReturn - Analyze the returned values of a return,
   /// incorporating info about the result values into this state.
   void AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
@@ -318,11 +324,22 @@ public:
                            SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
                            CCAssignFn Fn);
 
+  /// The function will invoke AnalyzeCallOperands.
+  void AnalyzeArguments(const SmallVectorImpl<ISD::OutputArg> &Outs,
+                        CCAssignFn Fn) {
+    AnalyzeCallOperands(Outs, Fn);
+  }
+
   /// AnalyzeCallResult - Analyze the return values of a call,
   /// incorporating info about the passed values into this state.
   void AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
                          CCAssignFn Fn);
 
+  /// A shadow allocated register is a register that was allocated
+  /// but wasn't added to the location list (Locs).
+  /// \returns true if the register was allocated as shadow or false otherwise.
+  bool IsShadowAllocatedReg(unsigned Reg) const;
+
   /// AnalyzeCallResult - Same as above except it's specialized for calls which
   /// produce a single value.
   void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
@@ -423,7 +440,7 @@ public:
 
   void ensureMaxAlignment(unsigned Align) {
     if (!AnalyzingMustTailForwardedRegs)
-      MF.getFrameInfo()->ensureMaxAlignment(Align);
+      MF.getFrameInfo().ensureMaxAlignment(Align);
   }
 
   /// Version of AllocateStack with extra register to be shadowed.
@@ -521,6 +538,37 @@ public:
                                 const SmallVectorImpl<ISD::InputArg> &Ins,
                                 CCAssignFn CalleeFn, CCAssignFn CallerFn);
 
+  /// The function runs an additional analysis pass over function arguments.
+  /// It will mark each argument with the attribute flag SecArgPass.
+  /// After running, it will sort the locs list.
+  template <class T>
+  void AnalyzeArgumentsSecondPass(const SmallVectorImpl<T> &Args,
+                                  CCAssignFn Fn) {
+    unsigned NumFirstPassLocs = Locs.size();
+
+    /// Creates similar argument list to \p Args in which each argument is
+    /// marked using SecArgPass flag.
+    SmallVector<T, 16> SecPassArg;
+    // SmallVector<ISD::InputArg, 16> SecPassArg;
+    for (auto Arg : Args) {
+      Arg.Flags.setSecArgPass();
+      SecPassArg.push_back(Arg);
+    }
+
+    // Run the second argument pass
+    AnalyzeArguments(SecPassArg, Fn);
+
+    // Sort the locations of the arguments according to their original position.
+    SmallVector<CCValAssign, 16> TmpArgLocs;
+    std::swap(TmpArgLocs, Locs);
+    auto B = TmpArgLocs.begin(), E = TmpArgLocs.end();
+    std::merge(B, B + NumFirstPassLocs, B + NumFirstPassLocs, E,
+               std::back_inserter(Locs),
+               [](const CCValAssign &A, const CCValAssign &B) -> bool {
+                 return A.getValNo() < B.getValNo();
+               });
+  }
+
 private:
   /// MarkAllocated - Mark a register and all of its aliases as allocated.
   void MarkAllocated(unsigned Reg);
diff --git a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
index 6376c06768b3..aab522d00de7 100644
--- a/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
+++ b/contrib/llvm/include/llvm/CodeGen/CommandFlags.h
@@ -27,7 +27,6 @@
 #include "llvm/Support/Host.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRecip.h"
 #include <string>
 using namespace llvm;
 
@@ -54,7 +53,12 @@ cl::opt<Reloc::Model> RelocModel(
                    "Fully relocatable, position independent code"),
         clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
                    "Relocatable external references, non-relocatable code"),
-        clEnumValEnd));
+        clEnumValN(Reloc::ROPI, "ropi",
+                   "Code and read-only data relocatable, accessed PC-relative"),
+        clEnumValN(Reloc::RWPI, "rwpi",
+                   "Read-write data relocatable, accessed relative to static base"),
+        clEnumValN(Reloc::ROPI_RWPI, "ropi-rwpi",
+                   "Combination of ropi and rwpi")));
 
 static inline Optional<Reloc::Model> getRelocModel() {
   if (RelocModel.getNumOccurrences()) {
@@ -71,8 +75,7 @@ TMModel("thread-model",
         cl::values(clEnumValN(ThreadModel::POSIX, "posix",
                               "POSIX thread model"),
                    clEnumValN(ThreadModel::Single, "single",
-                              "Single thread model"),
-                   clEnumValEnd));
+                              "Single thread model")));
 
 cl::opt<llvm::CodeModel::Model>
 CMModel("code-model",
@@ -87,8 +90,7 @@ CMModel("code-model",
                    clEnumValN(CodeModel::Medium, "medium",
                               "Medium code model"),
                    clEnumValN(CodeModel::Large, "large",
-                              "Large code model"),
-                   clEnumValEnd));
+                              "Large code model")));
 
 cl::opt<llvm::ExceptionHandling>
 ExceptionModel("exception-model",
@@ -103,8 +105,7 @@ ExceptionModel("exception-model",
                           clEnumValN(ExceptionHandling::ARM, "arm",
                                      "ARM EHABI exceptions"),
                           clEnumValN(ExceptionHandling::WinEH, "wineh",
-                                     "Windows exception model"),
-                          clEnumValEnd));
+                                     "Windows exception model")));
 
 cl::opt<TargetMachine::CodeGenFileType>
 FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
@@ -115,8 +116,7 @@ FileType("filetype", cl::init(TargetMachine::CGFT_AssemblyFile),
              clEnumValN(TargetMachine::CGFT_ObjectFile, "obj",
                         "Emit a native object ('.o') file"),
              clEnumValN(TargetMachine::CGFT_Null, "null",
-                        "Emit nothing, for performance testing"),
-             clEnumValEnd));
+                        "Emit nothing, for performance testing")));
 
 cl::opt<bool>
 EnableFPMAD("enable-fp-mad",
@@ -144,6 +144,25 @@ EnableNoNaNsFPMath("enable-no-nans-fp-math",
                    cl::init(false));
 
 cl::opt<bool>
+EnableNoTrappingFPMath("enable-no-trapping-fp-math",
+                       cl::desc("Enable setting the FP exceptions build "
+                                "attribute not to use exceptions"),
+                       cl::init(false));
+
+cl::opt<llvm::FPDenormal::DenormalMode>
+DenormalMode("denormal-fp-math",
+          cl::desc("Select which denormal numbers the code is permitted to require"),
+          cl::init(FPDenormal::IEEE),
+          cl::values(
+              clEnumValN(FPDenormal::IEEE, "ieee",
+                         "IEEE 754 denormal numbers"),
+              clEnumValN(FPDenormal::PreserveSign, "preserve-sign",
+                         "the sign of a  flushed-to-zero number is preserved "
+                         "in the sign of 0"),
+              clEnumValN(FPDenormal::PositiveZero, "positive-zero",
+                         "denormals are flushed to positive zero")));
+
+cl::opt<bool>
 EnableHonorSignDependentRoundingFPMath("enable-sign-dependent-rounding-fp-math",
       cl::Hidden,
       cl::desc("Force codegen to assume rounding mode can change dynamically"),
@@ -159,8 +178,7 @@ FloatABIForCalls("float-abi",
                      clEnumValN(FloatABI::Soft, "soft",
                                 "Soft float ABI (implied by -soft-float)"),
                      clEnumValN(FloatABI::Hard, "hard",
-                                "Hard float ABI (uses FP registers)"),
-                     clEnumValEnd));
+                                "Hard float ABI (uses FP registers)")));
 
 cl::opt<llvm::FPOpFusion::FPOpFusionMode>
 FuseFPOps("fp-contract",
@@ -172,14 +190,7 @@ FuseFPOps("fp-contract",
               clEnumValN(FPOpFusion::Standard, "on",
                          "Only fuse 'blessed' FP ops."),
               clEnumValN(FPOpFusion::Strict, "off",
-                         "Only fuse FP ops when the result won't be affected."),
-              clEnumValEnd));
-
-cl::list<std::string>
-ReciprocalOps("recip",
-  cl::CommaSeparated,
-  cl::desc("Choose reciprocal operation types and parameters."),
-  cl::value_desc("all,none,default,divf,!vec-sqrtd,vec-divd:0,sqrt:9..."));
+                         "Only fuse FP ops when the result won't be affected.")));
 
 cl::opt<bool>
 DontPlaceZerosInBSS("nozero-initialized-in-bss",
@@ -221,14 +232,10 @@ UseCtors("use-ctors",
              cl::desc("Use .ctors instead of .init_array."),
              cl::init(false));
 
-cl::opt<std::string> StopAfter("stop-after",
-                            cl::desc("Stop compilation after a specific pass"),
-                            cl::value_desc("pass-name"),
-                                      cl::init(""));
-cl::opt<std::string> StartAfter("start-after",
-                          cl::desc("Resume compilation after a specific pass"),
-                          cl::value_desc("pass-name"),
-                          cl::init(""));
+cl::opt<bool> RelaxELFRelocations(
+    "relax-elf-relocations",
+    cl::desc("Emit GOTPCRELX/REX_GOTPCRELX instead of GOTPCREL on x86-64 ELF"),
+    cl::init(false));
 
 cl::opt<bool> DataSections("data-sections",
                            cl::desc("Emit data into separate sections"),
@@ -247,21 +254,6 @@ cl::opt<bool> UniqueSectionNames("unique-section-names",
                                  cl::desc("Give unique names to every section"),
                                  cl::init(true));
 
-cl::opt<llvm::JumpTable::JumpTableType>
-JTableType("jump-table-type",
-          cl::desc("Choose the type of Jump-Instruction Table for jumptable."),
-          cl::init(JumpTable::Single),
-          cl::values(
-              clEnumValN(JumpTable::Single, "single",
-                         "Create a single table for all jumptable functions"),
-              clEnumValN(JumpTable::Arity, "arity",
-                         "Create one table per number of parameters."),
-              clEnumValN(JumpTable::Simplified, "simplified",
-                         "Create one table per simplified function type."),
-              clEnumValN(JumpTable::Full, "full",
-                         "Create one table per unique function type."),
-              clEnumValEnd));
-
 cl::opt<llvm::EABI> EABIVersion(
     "meabi", cl::desc("Set EABI type (default depends on triple):"),
     cl::init(EABI::Default),
@@ -269,7 +261,7 @@ cl::opt<llvm::EABI> EABIVersion(
                           "Triple default EABI version"),
                clEnumValN(EABI::EABI4, "4", "EABI version 4"),
                clEnumValN(EABI::EABI5, "5", "EABI version 5"),
-               clEnumValN(EABI::GNU, "gnu", "EABI GNU"), clEnumValEnd));
+               clEnumValN(EABI::GNU, "gnu", "EABI GNU")));
 
 cl::opt<DebuggerKind>
 DebuggerTuningOpt("debugger-tune",
@@ -279,8 +271,7 @@ DebuggerTuningOpt("debugger-tune",
                       clEnumValN(DebuggerKind::GDB, "gdb", "gdb"),
                       clEnumValN(DebuggerKind::LLDB, "lldb", "lldb"),
                       clEnumValN(DebuggerKind::SCE, "sce",
-                                 "SCE targets (e.g. PS4)"),
-                      clEnumValEnd));
+                                 "SCE targets (e.g. PS4)")));
 
 // Common utility function tightly tied to the options listed here. Initializes
 // a TargetOptions object with CodeGen flags and returns it.
@@ -288,10 +279,11 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   TargetOptions Options;
   Options.LessPreciseFPMADOption = EnableFPMAD;
   Options.AllowFPOpFusion = FuseFPOps;
-  Options.Reciprocals = TargetRecip(ReciprocalOps);
   Options.UnsafeFPMath = EnableUnsafeFPMath;
   Options.NoInfsFPMath = EnableNoInfsFPMath;
   Options.NoNaNsFPMath = EnableNoNaNsFPMath;
+  Options.NoTrappingFPMath = EnableNoTrappingFPMath;
+  Options.FPDenormalMode = DenormalMode;
   Options.HonorSignDependentRoundingFPMathOption =
       EnableHonorSignDependentRoundingFPMath;
   if (FloatABIForCalls != FloatABI::Default)
@@ -301,6 +293,7 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.StackAlignmentOverride = OverrideStackAlignment;
   Options.StackSymbolOrdering = StackSymbolOrdering;
   Options.UseInitArray = !UseCtors;
+  Options.RelaxELFRelocations = RelaxELFRelocations;
   Options.DataSections = DataSections;
   Options.FunctionSections = FunctionSections;
   Options.UniqueSectionNames = UniqueSectionNames;
@@ -308,7 +301,6 @@ static inline TargetOptions InitTargetOptionsFromCodeGenFlags() {
   Options.ExceptionModel = ExceptionModel;
 
   Options.MCOptions = InitMCTargetOptionsFromFlags();
-  Options.JTType = JTableType;
 
   Options.ThreadModel = TMModel;
   Options.EABIVersion = EABIVersion;
@@ -385,7 +377,8 @@ static inline void setFunctionAttributes(StringRef CPU, StringRef Features,
               if (F->getIntrinsicID() == Intrinsic::debugtrap ||
                   F->getIntrinsicID() == Intrinsic::trap)
                 Call->addAttribute(llvm::AttributeSet::FunctionIndex,
-                                   "trap-func-name", TrapFuncName);
+                                   Attribute::get(Ctx, "trap-func-name",
+                                                  TrapFuncName));
 
     // Let NewAttrs override Attrs.
     NewAttrs = Attrs.addAttributes(Ctx, AttributeSet::FunctionIndex, NewAttrs);
diff --git a/contrib/llvm/include/llvm/CodeGen/DIE.h b/contrib/llvm/include/llvm/CodeGen/DIE.h
index 7d6e66fa6ec2..1e3476cd8395 100644
--- a/contrib/llvm/include/llvm/CodeGen/DIE.h
+++ b/contrib/llvm/include/llvm/CodeGen/DIE.h
@@ -16,87 +16,96 @@
 
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/PointerIntPair.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/DwarfStringPoolEntry.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Dwarf.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <new>
+#include <type_traits>
+#include <vector>
 
 namespace llvm {
+
 class AsmPrinter;
+class DIE;
+class DIEUnit;
 class MCExpr;
+class MCSection;
 class MCSymbol;
 class raw_ostream;
-class DwarfTypeUnit;
 
 //===--------------------------------------------------------------------===//
-/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
-/// Dwarf abbreviation.
+/// Dwarf abbreviation data, describes one attribute of a Dwarf abbreviation.
 class DIEAbbrevData {
-  /// Attribute - Dwarf attribute code.
-  ///
+  /// Dwarf attribute code.
   dwarf::Attribute Attribute;
 
-  /// Form - Dwarf form code.
-  ///
+  /// Dwarf form code.
   dwarf::Form Form;
 
 public:
   DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {}
 
-  // Accessors.
+  /// Accessors.
+  /// @{
   dwarf::Attribute getAttribute() const { return Attribute; }
   dwarf::Form getForm() const { return Form; }
+  /// @}
 
-  /// Profile - Used to gather unique data for the abbreviation folding set.
-  ///
+  /// Used to gather unique data for the abbreviation folding set.
   void Profile(FoldingSetNodeID &ID) const;
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
-/// information object.
+/// Dwarf abbreviation, describes the organization of a debug information
+/// object.
 class DIEAbbrev : public FoldingSetNode {
   /// Unique number for node.
-  ///
   unsigned Number;
 
-  /// Tag - Dwarf tag code.
-  ///
+  /// Dwarf tag code.
   dwarf::Tag Tag;
 
-  /// Children - Whether or not this node has children.
+  /// Whether or not this node has children.
   ///
-  // This cheats a bit in all of the uses since the values in the standard
-  // are 0 and 1 for no children and children respectively.
+  /// This cheats a bit in all of the uses since the values in the standard
+  /// are 0 and 1 for no children and children respectively.
   bool Children;
 
-  /// Data - Raw data bytes for abbreviation.
-  ///
+  /// Raw data bytes for abbreviation.
   SmallVector<DIEAbbrevData, 12> Data;
 
 public:
-  DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C), Data() {}
+  DIEAbbrev(dwarf::Tag T, bool C) : Tag(T), Children(C) {}
 
-  // Accessors.
+  /// Accessors.
+  /// @{
   dwarf::Tag getTag() const { return Tag; }
   unsigned getNumber() const { return Number; }
   bool hasChildren() const { return Children; }
   const SmallVectorImpl<DIEAbbrevData> &getData() const { return Data; }
   void setChildrenFlag(bool hasChild) { Children = hasChild; }
   void setNumber(unsigned N) { Number = N; }
+  /// @}
 
-  /// AddAttribute - Adds another set of attribute information to the
-  /// abbreviation.
+  /// Adds another set of attribute information to the abbreviation.
   void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) {
     Data.push_back(DIEAbbrevData(Attribute, Form));
   }
 
-  /// Profile - Used to gather unique data for the abbreviation folding set.
-  ///
+  /// Used to gather unique data for the abbreviation folding set.
   void Profile(FoldingSetNodeID &ID) const;
 
-  /// Emit - Print the abbreviation using the specified asm printer.
-  ///
+  /// Print the abbreviation using the specified asm printer.
   void Emit(const AsmPrinter *AP) const;
 
   void print(raw_ostream &O);
@@ -104,7 +113,38 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEInteger - An integer value DIE.
+/// Helps unique DIEAbbrev objects and assigns abbreviation numbers.
+///
+/// This class will unique the DIE abbreviations for a llvm::DIE object and
+/// assign a unique abbreviation number to each unique DIEAbbrev object it
+/// finds. The resulting collection of DIEAbbrev objects can then be emitted
+/// into the .debug_abbrev section.
+class DIEAbbrevSet {
+  /// The bump allocator to use when creating DIEAbbrev objects in the uniqued
+  /// storage container.
+  BumpPtrAllocator &Alloc;
+  /// \brief FoldingSet that uniques the abbreviations.
+  llvm::FoldingSet<DIEAbbrev> AbbreviationsSet;
+  /// A list of all the unique abbreviations in use.
+  std::vector<DIEAbbrev *> Abbreviations;
+
+public:
+  DIEAbbrevSet(BumpPtrAllocator &A) : Alloc(A) {}
+  ~DIEAbbrevSet();
+  /// Generate the abbreviation declaration for a DIE and return a pointer to
+  /// the generated abbreviation.
+  ///
+  /// \param Die the debug info entry to generate the abbreviation for.
+  /// \returns A reference to the uniqued abbreviation declaration that is
+  /// owned by this class.
+  DIEAbbrev &uniqueAbbreviation(DIE &Die);
+
+  /// Print all abbreviations using the specified asm printer.
+  void Emit(const AsmPrinter *AP, MCSection *Section) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// An integer value DIE.
 ///
 class DIEInteger {
   uint64_t Integer;
@@ -112,8 +152,7 @@ class DIEInteger {
 public:
   explicit DIEInteger(uint64_t I) : Integer(I) {}
 
-  /// BestForm - Choose the best form for integer.
-  ///
+  /// Choose the best form for integer.
   static dwarf::Form BestForm(bool IsSigned, uint64_t Int) {
     if (IsSigned) {
       const int64_t SignedInt = Int;
@@ -144,16 +183,14 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEExpr - An expression DIE.
-//
+/// An expression DIE.
 class DIEExpr {
   const MCExpr *Expr;
 
 public:
   explicit DIEExpr(const MCExpr *E) : Expr(E) {}
 
-  /// getValue - Get MCExpr.
-  ///
+  /// Get MCExpr.
   const MCExpr *getValue() const { return Expr; }
 
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
@@ -163,16 +200,14 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIELabel - A label DIE.
-//
+/// A label DIE.
 class DIELabel {
   const MCSymbol *Label;
 
 public:
   explicit DIELabel(const MCSymbol *L) : Label(L) {}
 
-  /// getValue - Get MCSymbol.
-  ///
+  /// Get MCSymbol.
   const MCSymbol *getValue() const { return Label; }
 
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
@@ -182,7 +217,7 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEDelta - A simple label difference DIE.
+/// A simple label difference DIE.
 ///
 class DIEDelta {
   const MCSymbol *LabelHi;
@@ -198,15 +233,16 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEString - A container for string values.
+/// A container for string pool string values.
 ///
+/// This class is used with the DW_FORM_strp and DW_FORM_GNU_str_index forms.
 class DIEString {
   DwarfStringPoolEntryRef S;
 
 public:
   DIEString(DwarfStringPoolEntryRef S) : S(S) {}
 
-  /// getString - Grab the string out of the object.
+  /// Grab the string out of the object.
   StringRef getString() const { return S.getString(); }
 
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
@@ -216,9 +252,31 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEEntry - A pointer to another debug information entry.  An instance of
-/// this class can also be used as a proxy for a debug information entry not
-/// yet defined (ie. types.)
+/// A container for inline string values.
+///
+/// This class is used with the DW_FORM_string form.
+class DIEInlineString {
+  StringRef S;
+
+public:
+  template <typename Allocator>
+  explicit DIEInlineString(StringRef Str, Allocator &A) : S(Str.copy(A)) {}
+
+  ~DIEInlineString() = default;
+
+  /// Grab the string out of the object.
+  StringRef getString() const { return S; }
+
+  void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
+  unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
+
+  void print(raw_ostream &O) const;
+};
+
+//===--------------------------------------------------------------------===//
+/// A pointer to another debug information entry.  An instance of this class can
+/// also be used as a proxy for a debug information entry not yet defined
+/// (ie. types.)
 class DIE;
 class DIEEntry {
   DIE *Entry;
@@ -230,30 +288,23 @@ public:
 
   DIE &getEntry() const { return *Entry; }
 
-  /// Returns size of a ref_addr entry.
-  static unsigned getRefAddrSize(const AsmPrinter *AP);
-
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
-  unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
-    return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP)
-                                           : sizeof(int32_t);
-  }
+  unsigned SizeOf(const AsmPrinter *AP, dwarf::Form Form) const;
 
   void print(raw_ostream &O) const;
 };
 
 //===--------------------------------------------------------------------===//
-/// DIELocList - Represents a pointer to a location list in the debug_loc
+/// Represents a pointer to a location list in the debug_loc
 /// section.
-//
 class DIELocList {
-  // Index into the .debug_loc vector.
+  /// Index into the .debug_loc vector.
   size_t Index;
 
 public:
   DIELocList(size_t I) : Index(I) {}
 
-  /// getValue - Grab the current index out.
+  /// Grab the current index out.
   size_t getValue() const { return Index; }
 
   void EmitValue(const AsmPrinter *AP, dwarf::Form Form) const;
@@ -263,9 +314,8 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
-/// DIEValue - A debug information entry value. Some of these roughly correlate
+/// A debug information entry value. Some of these roughly correlate
 /// to DWARF attribute classes.
-///
 class DIEBlock;
 class DIELoc;
 class DIEValue {
@@ -277,8 +327,7 @@ public:
   };
 
 private:
-  /// Ty - Type of data stored in the value.
-  ///
+  /// Type of data stored in the value.
   Type Ty = isNone;
   dwarf::Attribute Attribute = (dwarf::Attribute)0;
   dwarf::Form Form = (dwarf::Form)0;
@@ -355,9 +404,11 @@ private:
 
 public:
   DIEValue() = default;
+
   DIEValue(const DIEValue &X) : Ty(X.Ty), Attribute(X.Attribute), Form(X.Form) {
     copyVal(X);
   }
+
   DIEValue &operator=(const DIEValue &X) {
     destroyVal();
     Ty = X.Ty;
@@ -366,6 +417,7 @@ public:
     copyVal(X);
     return *this;
   }
+
   ~DIEValue() { destroyVal(); }
 
 #define HANDLE_DIEVALUE_SMALL(T)                                               \
@@ -381,11 +433,13 @@ public:
   }
 #include "llvm/CodeGen/DIEValue.def"
 
-  // Accessors
+  /// Accessors.
+  /// @{
   Type getType() const { return Ty; }
   dwarf::Attribute getAttribute() const { return Attribute; }
   dwarf::Form getForm() const { return Form; }
   explicit operator bool() const { return Ty; }
+  /// @}
 
 #define HANDLE_DIEVALUE_SMALL(T)                                               \
   const DIE##T &getDIE##T() const {                                            \
@@ -399,12 +453,10 @@ public:
   }
 #include "llvm/CodeGen/DIEValue.def"
 
-  /// EmitValue - Emit value via the Dwarf writer.
-  ///
+  /// Emit value via the Dwarf writer.
   void EmitValue(const AsmPrinter *AP) const;
 
-  /// SizeOf - Return the size of a value in bytes.
-  ///
+  /// Return the size of a value in bytes.
   unsigned SizeOf(const AsmPrinter *AP) const;
 
   void print(raw_ostream &O) const;
@@ -413,6 +465,7 @@ public:
 
 struct IntrusiveBackListNode {
   PointerIntPair<IntrusiveBackListNode *, 1> Next;
+
   IntrusiveBackListNode() : Next(this, true) {}
 
   IntrusiveBackListNode *getNext() const {
@@ -576,39 +629,34 @@ public:
   }
 
   value_range values() {
-    return llvm::make_range(value_iterator(List.begin()),
-                            value_iterator(List.end()));
+    return make_range(value_iterator(List.begin()), value_iterator(List.end()));
   }
   const_value_range values() const {
-    return llvm::make_range(const_value_iterator(List.begin()),
-                            const_value_iterator(List.end()));
+    return make_range(const_value_iterator(List.begin()),
+                      const_value_iterator(List.end()));
   }
 };
 
 //===--------------------------------------------------------------------===//
-/// DIE - A structured debug information entry.  Has an abbreviation which
+/// A structured debug information entry.  Has an abbreviation which
 /// describes its organization.
 class DIE : IntrusiveBackListNode, public DIEValueList {
   friend class IntrusiveBackList<DIE>;
+  friend class DIEUnit;
 
-  /// Offset - Offset in debug info section.
-  ///
+  /// Dwarf unit relative offset.
   unsigned Offset;
-
-  /// Size - Size of instance + children.
-  ///
+  /// Size of instance + children.
   unsigned Size;
-
   unsigned AbbrevNumber = ~0u;
-
-  /// Tag - Dwarf tag code.
-  ///
+  /// Dwarf tag code.
   dwarf::Tag Tag = (dwarf::Tag)0;
-
   /// Children DIEs.
   IntrusiveBackList<DIE> Children;
 
-  DIE *Parent = nullptr;
+  /// The owner is either the parent DIE for children of other DIEs, or a
+  /// DIEUnit which contains this DIE as its unit DIE.
+  PointerUnion<DIE *, DIEUnit *> Owner;
 
   DIE() = delete;
   explicit DIE(dwarf::Tag Tag) : Offset(0), Size(0), Tag(Tag) {}
@@ -618,9 +666,15 @@ public:
     return new (Alloc) DIE(Tag);
   }
 
+  DIE(const DIE &RHS) = delete;
+  DIE(DIE &&RHS) = delete;
+  void operator=(const DIE &RHS) = delete;
+  void operator=(const DIE &&RHS) = delete;
+
   // Accessors.
   unsigned getAbbrevNumber() const { return AbbrevNumber; }
   dwarf::Tag getTag() const { return Tag; }
+  /// Get the compile/type unit relative offset of this DIE.
   unsigned getOffset() const { return Offset; }
   unsigned getSize() const { return Size; }
   bool hasChildren() const { return !Children.empty(); }
@@ -631,13 +685,13 @@ public:
   typedef iterator_range<const_child_iterator> const_child_range;
 
   child_range children() {
-    return llvm::make_range(Children.begin(), Children.end());
+    return make_range(Children.begin(), Children.end());
   }
   const_child_range children() const {
-    return llvm::make_range(Children.begin(), Children.end());
+    return make_range(Children.begin(), Children.end());
   }
 
-  DIE *getParent() const { return Parent; }
+  DIE *getParent() const;
 
   /// Generate the abbreviation for this DIE.
   ///
@@ -648,19 +702,50 @@ public:
   /// Set the abbreviation number for this DIE.
   void setAbbrevNumber(unsigned I) { AbbrevNumber = I; }
 
-  /// Climb up the parent chain to get the compile or type unit DIE this DIE
-  /// belongs to.
-  const DIE *getUnit() const;
-  /// Similar to getUnit, returns null when DIE is not added to an
-  /// owner yet.
-  const DIE *getUnitOrNull() const;
+  /// Get the absolute offset within the .debug_info or .debug_types section
+  /// for this DIE.
+  unsigned getDebugSectionOffset() const;
+
+  /// Compute the offset of this DIE and all its children.
+  ///
+  /// This function gets called just before we are going to generate the debug
+  /// information and gives each DIE a chance to figure out its CU relative DIE
+  /// offset, unique its abbreviation and fill in the abbreviation code, and
+  /// return the unit offset that points to where the next DIE will be emitted
+  /// within the debug unit section. After this function has been called for all
+  /// DIE objects, the DWARF can be generated since all DIEs will be able to
+  /// properly refer to other DIE objects since all DIEs have calculated their
+  /// offsets.
+  ///
+  /// \param AP AsmPrinter to use when calculating sizes.
+  /// \param AbbrevSet the abbreviation used to unique DIE abbreviations.
+  /// \param CUOffset the compile/type unit relative offset in bytes.
+  /// \returns the offset for the DIE that follows this DIE within the
+  /// current compile/type unit.
+  unsigned computeOffsetsAndAbbrevs(const AsmPrinter *AP,
+                                    DIEAbbrevSet &AbbrevSet, unsigned CUOffset);
+
+  /// Climb up the parent chain to get the compile unit or type unit DIE that
+  /// this DIE belongs to.
+  ///
+  /// \returns the compile or type unit DIE that owns this DIE, or NULL if
+  /// this DIE hasn't been added to a unit DIE.
+  const DIE *getUnitDie() const;
+
+  /// Climb up the parent chain to get the compile unit or type unit that this
+  /// DIE belongs to.
+  ///
+  /// \returns the DIEUnit that represents the compile or type unit that owns
+  /// this DIE, or NULL if this DIE hasn't been added to a unit DIE.
+  const DIEUnit *getUnit() const;
+
   void setOffset(unsigned O) { Offset = O; }
   void setSize(unsigned S) { Size = S; }
 
   /// Add a child to the DIE.
   DIE &addChild(DIE *Child) {
     assert(!Child->getParent() && "Child should be orphaned");
-    Child->Parent = this;
+    Child->Owner = this;
     Children.push_back(*Child);
     return Children.back();
   }
@@ -676,6 +761,52 @@ public:
 };
 
 //===--------------------------------------------------------------------===//
+/// Represents a compile or type unit.
+class DIEUnit {
+  /// The compile unit or type unit DIE. This variable must be an instance of
+  /// DIE so that we can calculate the DIEUnit from any DIE by traversing the
+  /// parent backchain and getting the Unit DIE, and then casting itself to a
+  /// DIEUnit. This allows us to be able to find the DIEUnit for any DIE without
+  /// having to store a pointer to the DIEUnit in each DIE instance.
+  DIE Die;
+  /// The section this unit will be emitted in. This may or may not be set to
+  /// a valid section depending on the client that is emitting DWARF.
+  MCSection *Section;
+  uint64_t Offset; /// .debug_info or .debug_types absolute section offset.
+  uint32_t Length; /// The length in bytes of all of the DIEs in this unit.
+  const uint16_t Version; /// The Dwarf version number for this unit.
+  const uint8_t AddrSize; /// The size in bytes of an address for this unit.
+public:
+  DIEUnit(uint16_t Version, uint8_t AddrSize, dwarf::Tag UnitTag);
+  DIEUnit(const DIEUnit &RHS) = delete;
+  DIEUnit(DIEUnit &&RHS) = delete;
+  void operator=(const DIEUnit &RHS) = delete;
+  void operator=(const DIEUnit &&RHS) = delete;
+  /// Set the section that this DIEUnit will be emitted into.
+  ///
+  /// This function is used by some clients to set the section. Not all clients
+  /// that emit DWARF use this section variable.
+  void setSection(MCSection *Section) {
+    assert(!this->Section);
+    this->Section = Section;
+  }
+
+  /// Return the section that this DIEUnit will be emitted into.
+  ///
+  /// \returns Section pointer which can be NULL.
+  MCSection *getSection() const { return Section; }
+  void setDebugSectionOffset(unsigned O) { Offset = O; }
+  unsigned getDebugSectionOffset() const { return Offset; }
+  void setLength(uint64_t L) { Length = L; }
+  uint64_t getLength() const { return Length; }
+  uint16_t getDwarfVersion() const { return Version; }
+  uint16_t getAddressSize() const { return AddrSize; }
+  DIE &getUnitDie() { return Die; }
+  const DIE &getUnitDie() const { return Die; }
+};
+
+  
+//===--------------------------------------------------------------------===//
 /// DIELoc - Represents an expression location.
 //
 class DIELoc : public DIEValueList {
@@ -740,6 +871,6 @@ public:
   void print(raw_ostream &O) const;
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_CODEGEN_ASMPRINTER_DIE_H
diff --git a/contrib/llvm/include/llvm/CodeGen/DIEValue.def b/contrib/llvm/include/llvm/CodeGen/DIEValue.def
index c5ff4010b2e4..a3fce9b1d20c 100644
--- a/contrib/llvm/include/llvm/CodeGen/DIEValue.def
+++ b/contrib/llvm/include/llvm/CodeGen/DIEValue.def
@@ -40,6 +40,7 @@ HANDLE_DIEVALUE_SMALL(Entry)
 HANDLE_DIEVALUE_LARGE(Block)
 HANDLE_DIEVALUE_LARGE(Loc)
 HANDLE_DIEVALUE_SMALL(LocList)
+HANDLE_DIEVALUE_LARGE(InlineString)
 
 #undef HANDLE_DIEVALUE
 #undef HANDLE_DIEVALUE_SMALL
diff --git a/contrib/llvm/include/llvm/CodeGen/FastISel.h b/contrib/llvm/include/llvm/CodeGen/FastISel.h
index 4bff48de38e4..cdaea250c33b 100644
--- a/contrib/llvm/include/llvm/CodeGen/FastISel.h
+++ b/contrib/llvm/include/llvm/CodeGen/FastISel.h
@@ -154,7 +154,7 @@ public:
 
     CallLoweringInfo &setCallee(const DataLayout &DL, MCContext &Ctx,
                                 CallingConv::ID CC, Type *ResultTy,
-                                const char *Target, ArgListTy &&ArgsList,
+                                StringRef Target, ArgListTy &&ArgsList,
                                 unsigned FixedArgs = ~0U);
 
     CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultTy,
@@ -356,19 +356,6 @@ protected:
   virtual unsigned fastEmit_ri(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
                                bool Op0IsKill, uint64_t Imm);
 
-  /// \brief This method is called by target-independent code to request that an
-  /// instruction with the given type, opcode, and register and floating-point
-  /// immediate operands be emitted.
-  virtual unsigned fastEmit_rf(MVT VT, MVT RetVT, unsigned Opcode, unsigned Op0,
-                               bool Op0IsKill, const ConstantFP *FPImm);
-
-  /// \brief This method is called by target-independent code to request that an
-  /// instruction with the given type, opcode, and register and immediate
-  /// operands be emitted.
-  virtual unsigned fastEmit_rri(MVT VT, MVT RetVT, unsigned Opcode,
-                                unsigned Op0, bool Op0IsKill, unsigned Op1,
-                                bool Op1IsKill, uint64_t Imm);
-
   /// \brief This method is a wrapper of fastEmit_ri.
   ///
   /// It first tries to emit an instruction with an immediate operand using
diff --git a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
index 010e34179efc..75cd7da9d6b9 100644
--- a/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/FunctionLoweringInfo.h
@@ -33,7 +33,6 @@ namespace llvm {
 class AllocaInst;
 class BasicBlock;
 class BranchProbabilityInfo;
-class CallInst;
 class Function;
 class GlobalVariable;
 class Instruction;
@@ -72,36 +71,36 @@ public:
   /// MBBMap - A mapping from LLVM basic blocks to their machine code entry.
   DenseMap<const BasicBlock*, MachineBasicBlock *> MBBMap;
 
-  typedef SmallVector<unsigned, 1> SwiftErrorVRegs;
+  /// A map from swifterror value in a basic block to the virtual register it is
+  /// currently represented by.
+  llvm::DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
+      SwiftErrorVRegDefMap;
+
+  /// A list of upward exposed vreg uses that need to be satisfied by either a
+  /// copy def or a phi node at the beginning of the basic block representing
+  /// the predecessor(s) swifterror value.
+  llvm::DenseMap<std::pair<const MachineBasicBlock *, const Value *>, unsigned>
+      SwiftErrorVRegUpwardsUse;
+
+  /// The swifterror argument of the current function.
+  const Value *SwiftErrorArg;
+
   typedef SmallVector<const Value*, 1> SwiftErrorValues;
   /// A function can only have a single swifterror argument. And if it does
   /// have a swifterror argument, it must be the first entry in
   /// SwiftErrorVals.
   SwiftErrorValues SwiftErrorVals;
 
-  /// Track the virtual register for each swifterror value in a given basic
-  /// block. Entries in SwiftErrorVRegs have the same ordering as entries
-  /// in SwiftErrorVals.
-  /// Note that another choice that is more straight-forward is to use
-  /// Map<const MachineBasicBlock*, Map<Value*, unsigned/*VReg*/>>. It
-  /// maintains a map from swifterror values to virtual registers for each
-  /// machine basic block. This choice does not require a one-to-one
-  /// correspondence between SwiftErrorValues and SwiftErrorVRegs. But because
-  /// of efficiency concern, we do not choose it.
-  llvm::DenseMap<const MachineBasicBlock*, SwiftErrorVRegs> SwiftErrorMap;
-
-  /// Track the virtual register for each swifterror value at the end of a basic
-  /// block when we need the assignment of a virtual register before the basic
-  /// block is visited. When we actually visit the basic block, we will make
-  /// sure the swifterror value is in the correct virtual register.
-  llvm::DenseMap<const MachineBasicBlock*, SwiftErrorVRegs>
-      SwiftErrorWorklist;
-
-  /// Find the swifterror virtual register in SwiftErrorMap. We will assert
-  /// failure when the value does not exist in swifterror map.
-  unsigned findSwiftErrorVReg(const MachineBasicBlock*, const Value*) const;
-  /// Set the swifterror virtual register in SwiftErrorMap.
-  void setSwiftErrorVReg(const MachineBasicBlock *MBB, const Value*, unsigned);
+
+  /// Get or create the swifterror value virtual register in
+  /// SwiftErrorVRegDefMap for this basic block.
+  unsigned getOrCreateSwiftErrorVReg(const MachineBasicBlock *,
+                                     const Value *);
+
+  /// Set the swifterror virtual register in the SwiftErrorVRegDefMap for this
+  /// basic block.
+  void setCurrentSwiftErrorVReg(const MachineBasicBlock *MBB, const Value *,
+                                unsigned);
 
   /// ValueMap - Since we emit code for the function a basic block at a time,
   /// we must remember which virtual registers hold the values for
@@ -297,18 +296,6 @@ private:
   IndexedMap<LiveOutInfo, VirtReg2IndexFunctor> LiveOutRegInfo;
 };
 
-/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
-/// being passed to this variadic function, and set the MachineModuleInfo's
-/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
-/// reference to _fltused on Windows, which will link in MSVCRT's
-/// floating-point support.
-void ComputeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo *MMI);
-
-/// AddLandingPadInfo - Extract the exception handling information from the
-/// landingpad instruction and add them to the specified machine module info.
-void AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
-                       MachineBasicBlock *MBB);
-
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index bbd0b6d88593..0b157bf937a3 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -16,17 +16,70 @@
 #define LLVM_CODEGEN_GLOBALISEL_CALLLOWERING_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/Target/TargetCallingConv.h"
 
 namespace llvm {
 // Forward declarations.
 class MachineIRBuilder;
+class MachineOperand;
 class TargetLowering;
 class Value;
 
 class CallLowering {
   const TargetLowering *TLI;
- protected:
+public:
+  struct ArgInfo {
+    unsigned Reg;
+    Type *Ty;
+    ISD::ArgFlagsTy Flags;
+
+    ArgInfo(unsigned Reg, Type *Ty, ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy{})
+        : Reg(Reg), Ty(Ty), Flags(Flags) {}
+  };
+
+  /// Argument handling is mostly uniform between the four places that
+  /// make these decisions: function formal arguments, call
+  /// instruction args, call instruction returns and function
+  /// returns. However, once a decision has been made on where an
+  /// arugment should go, exactly what happens can vary slightly. This
+  /// class abstracts the differences.
+  struct ValueHandler {
+    /// Materialize a VReg containing the address of the specified
+    /// stack-based object. This is either based on a FrameIndex or
+    /// direct SP manipulation, depending on the context. \p MPO
+    /// should be initialized to an appropriate description of the
+    /// address created.
+    virtual unsigned getStackAddress(uint64_t Size, int64_t Offset,
+                                     MachinePointerInfo &MPO) = 0;
+
+    /// The specified value has been assigned to a physical register,
+    /// handle the appropriate COPY (either to or from) and mark any
+    /// relevant uses/defines as needed.
+    virtual void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+                                  CCValAssign &VA) = 0;
+
+    /// The specified value has been assigned to a stack
+    /// location. Load or store it there, with appropriate extension
+    /// if necessary.
+    virtual void assignValueToAddress(unsigned ValVReg, unsigned Addr,
+                                      uint64_t Size, MachinePointerInfo &MPO,
+                                      CCValAssign &VA) = 0;
+
+    unsigned extendRegister(unsigned ValReg, CCValAssign &VA);
+
+    ValueHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+        : MIRBuilder(MIRBuilder), MRI(MRI) {}
+
+    virtual ~ValueHandler() {}
+
+    MachineIRBuilder &MIRBuilder;
+    MachineRegisterInfo &MRI;
+  };
+
+protected:
   /// Getter for generic TargetLowering class.
   const TargetLowering *getTLI() const {
     return TLI;
@@ -37,7 +90,20 @@ class CallLowering {
     const XXXTargetLowering *getTLI() const {
     return static_cast<const XXXTargetLowering *>(TLI);
   }
- public:
+
+
+  template <typename FuncInfoTy>
+  void setArgFlags(ArgInfo &Arg, unsigned OpNum, const DataLayout &DL,
+                   const FuncInfoTy &FuncInfo) const;
+
+  /// Invoke the \p AssignFn on each of the given \p Args and then use
+  /// \p Callback to move them to the assigned locations.
+  ///
+  /// \return True if everything has succeeded, false otherwise.
+  bool handleAssignments(MachineIRBuilder &MIRBuilder, CCAssignFn *AssignFn,
+                         ArrayRef<ArgInfo> Args, ValueHandler &Callback) const;
+
+public:
   CallLowering(const TargetLowering *TLI) : TLI(TLI) {}
   virtual ~CallLowering() {}
 
@@ -46,8 +112,8 @@ class CallLowering {
   /// This hook is used by GlobalISel.
   ///
   /// \return True if the lowering succeeds, false otherwise.
-  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder, const Value *Val,
-                           unsigned VReg) const {
+  virtual bool lowerReturn(MachineIRBuilder &MIRBuilder,
+                           const Value *Val, unsigned VReg) const {
     return false;
   }
 
@@ -60,12 +126,54 @@ class CallLowering {
   /// lowering.
   ///
   /// \return True if the lowering succeeded, false otherwise.
-  virtual bool
-  lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                       const Function::ArgumentListType &Args,
-                       const SmallVectorImpl<unsigned> &VRegs) const {
+  virtual bool lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+                                    const Function &F,
+                                    ArrayRef<unsigned> VRegs) const {
     return false;
   }
+
+  /// This hook must be implemented to lower the given call instruction,
+  /// including argument and return value marshalling.
+  ///
+  /// \p Callee is the destination of the call. It should be either a register,
+  /// globaladdress, or externalsymbol.
+  ///
+  /// \p ResTy is the type returned by the function
+  ///
+  /// \p ResReg is the generic virtual register that the returned
+  /// value should be lowered into.
+  ///
+  /// \p ArgTys is a list of the types each member of \p ArgRegs has; used by
+  /// the target to decide which register/stack slot should be allocated.
+  ///
+  /// \p ArgRegs is a list of virtual registers containing each argument that
+  /// needs to be passed.
+  ///
+  /// \return true if the lowering succeeded, false otherwise.
+  virtual bool lowerCall(MachineIRBuilder &MIRBuilder,
+                         const MachineOperand &Callee, const ArgInfo &OrigRet,
+                         ArrayRef<ArgInfo> OrigArgs) const {
+    return false;
+  }
+
+  /// This hook must be implemented to lower the given call instruction,
+  /// including argument and return value marshalling.
+  ///
+  /// \p ResReg is a register where the call's return value should be stored (or
+  /// 0 if there is no return value).
+  ///
+  /// \p ArgRegs is a list of virtual registers containing each argument that
+  /// needs to be passed.
+  ///
+  /// \p GetCalleeReg is a callback to materialize a register for the callee if
+  /// the target determines it cannot jump to the destination based purely on \p
+  /// CI. This might be because \p CI is indirect, or because of the limited
+  /// range of an immediate jump.
+  ///
+  /// \return true if the lowering succeeded, false otherwise.
+  virtual bool lowerCall(MachineIRBuilder &MIRBuilder, const CallInst &CI,
+                         unsigned ResReg, ArrayRef<unsigned> ArgRegs,
+                         std::function<unsigned()> GetCalleeReg) const;
 };
 } // End namespace llvm.
 
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelAccessor.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
index 7c5ec9f3adc0..8dea38059ea4 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/GISelAccessor.h
@@ -17,6 +17,8 @@
 
 namespace llvm {
 class CallLowering;
+class InstructionSelector;
+class LegalizerInfo;
 class RegisterBankInfo;
 
 /// The goal of this helper class is to gather the accessor to all
@@ -27,6 +29,10 @@ class RegisterBankInfo;
 struct GISelAccessor {
   virtual ~GISelAccessor() {}
   virtual const CallLowering *getCallLowering() const { return nullptr;}
+  virtual const InstructionSelector *getInstructionSelector() const {
+    return nullptr;
+  }
+  virtual const LegalizerInfo *getLegalizerInfo() const { return nullptr; }
   virtual const RegisterBankInfo *getRegBankInfo() const { return nullptr;}
 };
 } // End namespace llvm;
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
index 833e87493cad..76e0d47ceea3 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
@@ -35,6 +35,7 @@ class MachineBasicBlock;
 class MachineFunction;
 class MachineInstr;
 class MachineRegisterInfo;
+class TargetPassConfig;
 
 // Technically the pass should run on an hypothetical MachineModule,
 // since it should translate Global into some sort of MachineGlobal.
@@ -64,14 +65,25 @@ private:
   // do not appear in that map.
   SmallSetVector<const Constant *, 8> Constants;
 
+  // N.b. it's not completely obvious that this will be sufficient for every
+  // LLVM IR construct (with "invoke" being the obvious candidate to mess up our
+  // lives.
   DenseMap<const BasicBlock *, MachineBasicBlock *> BBToMBB;
 
+  // List of stubbed PHI instructions, for values and basic blocks to be filled
+  // in once all MachineBasicBlocks have been created.
+  SmallVector<std::pair<const PHINode *, MachineInstr *>, 4> PendingPHIs;
+
+  /// Record of what frame index has been allocated to specified allocas for
+  /// this function.
+  DenseMap<const AllocaInst *, int> FrameIndices;
+
   /// Methods for translating form LLVM IR to MachineInstr.
   /// \see ::translate for general information on the translate methods.
   /// @{
 
   /// Translate \p Inst into its corresponding MachineInstr instruction(s).
-  /// Insert the newly translated instruction(s) right where the MIRBuilder
+  /// Insert the newly translated instruction(s) right where the CurBuilder
   /// is set.
   ///
   /// The general algorithm is:
@@ -93,51 +105,305 @@ private:
   /// \return true if the translation succeeded.
   bool translate(const Instruction &Inst);
 
+  /// Materialize \p C into virtual-register \p Reg. The generic instructions
+  /// performing this materialization will be inserted into the entry block of
+  /// the function.
+  ///
+  /// \return true if the materialization succeeded.
+  bool translate(const Constant &C, unsigned Reg);
+
+  /// Translate an LLVM bitcast into generic IR. Either a COPY or a G_BITCAST is
+  /// emitted.
+  bool translateBitCast(const User &U, MachineIRBuilder &MIRBuilder);
+
+  /// Translate an LLVM load instruction into generic IR.
+  bool translateLoad(const User &U, MachineIRBuilder &MIRBuilder);
+
+  /// Translate an LLVM store instruction into generic IR.
+  bool translateStore(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateMemcpy(const CallInst &CI, MachineIRBuilder &MIRBuilder);
+
+  void getStackGuard(unsigned DstReg, MachineIRBuilder &MIRBuilder);
+
+  bool translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+                                  MachineIRBuilder &MIRBuilder);
+
+  bool translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+                               MachineIRBuilder &MIRBuilder);
+
+  /// Translate call instruction.
+  /// \pre \p U is a call instruction.
+  bool translateCall(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateInvoke(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateLandingPad(const User &U, MachineIRBuilder &MIRBuilder);
+
+  /// Translate one of LLVM's cast instructions into MachineInstrs, with the
+  /// given generic Opcode.
+  bool translateCast(unsigned Opcode, const User &U,
+                     MachineIRBuilder &MIRBuilder);
+
+  /// Translate static alloca instruction (i.e. one  of constant size and in the
+  /// first basic block).
+  bool translateStaticAlloca(const AllocaInst &Inst,
+                             MachineIRBuilder &MIRBuilder);
+
+  /// Translate a phi instruction.
+  bool translatePHI(const User &U, MachineIRBuilder &MIRBuilder);
+
+  /// Translate a comparison (icmp or fcmp) instruction or constant.
+  bool translateCompare(const User &U, MachineIRBuilder &MIRBuilder);
+
+  /// Translate an integer compare instruction (or constant).
+  bool translateICmp(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCompare(U, MIRBuilder);
+  }
+
+  /// Translate a floating-point compare instruction (or constant).
+  bool translateFCmp(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCompare(U, MIRBuilder);
+  }
+
+
+  /// Add remaining operands onto phis we've translated. Executed after all
+  /// MachineBasicBlocks for the function have been created.
+  void finishPendingPhis();
+
   /// Translate \p Inst into a binary operation \p Opcode.
-  /// \pre \p Inst is a binary operation.
-  bool translateBinaryOp(unsigned Opcode, const Instruction &Inst);
+  /// \pre \p U is a binary operation.
+  bool translateBinaryOp(unsigned Opcode, const User &U,
+                         MachineIRBuilder &MIRBuilder);
 
   /// Translate branch (br) instruction.
-  /// \pre \p Inst is a branch instruction.
-  bool translateBr(const Instruction &Inst);
+  /// \pre \p U is a branch instruction.
+  bool translateBr(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateExtractValue(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateInsertValue(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateSelect(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateGetElementPtr(const User &U, MachineIRBuilder &MIRBuilder);
 
   /// Translate return (ret) instruction.
   /// The target needs to implement CallLowering::lowerReturn for
   /// this to succeed.
-  /// \pre \p Inst is a return instruction.
-  bool translateReturn(const Instruction &Inst);
+  /// \pre \p U is a return instruction.
+  bool translateRet(const User &U, MachineIRBuilder &MIRBuilder);
+
+  bool translateAdd(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_ADD, U, MIRBuilder);
+  }
+  bool translateSub(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_SUB, U, MIRBuilder);
+  }
+  bool translateAnd(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_AND, U, MIRBuilder);
+  }
+  bool translateMul(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_MUL, U, MIRBuilder);
+  }
+  bool translateOr(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_OR, U, MIRBuilder);
+  }
+  bool translateXor(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_XOR, U, MIRBuilder);
+  }
+
+  bool translateUDiv(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_UDIV, U, MIRBuilder);
+  }
+  bool translateSDiv(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_SDIV, U, MIRBuilder);
+  }
+  bool translateURem(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_UREM, U, MIRBuilder);
+  }
+  bool translateSRem(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_SREM, U, MIRBuilder);
+  }
+  bool translateAlloca(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateStaticAlloca(cast<AllocaInst>(U), MIRBuilder);
+  }
+  bool translateIntToPtr(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_INTTOPTR, U, MIRBuilder);
+  }
+  bool translatePtrToInt(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_PTRTOINT, U, MIRBuilder);
+  }
+  bool translateTrunc(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_TRUNC, U, MIRBuilder);
+  }
+  bool translateFPTrunc(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_FPTRUNC, U, MIRBuilder);
+  }
+  bool translateFPExt(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_FPEXT, U, MIRBuilder);
+  }
+  bool translateFPToUI(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_FPTOUI, U, MIRBuilder);
+  }
+  bool translateFPToSI(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_FPTOSI, U, MIRBuilder);
+  }
+  bool translateUIToFP(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_UITOFP, U, MIRBuilder);
+  }
+  bool translateSIToFP(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_SITOFP, U, MIRBuilder);
+  }
+  bool translateUnreachable(const User &U, MachineIRBuilder &MIRBuilder) {
+    return true;
+  }
+  bool translateSExt(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_SEXT, U, MIRBuilder);
+  }
+
+  bool translateZExt(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateCast(TargetOpcode::G_ZEXT, U, MIRBuilder);
+  }
+
+  bool translateShl(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_SHL, U, MIRBuilder);
+  }
+  bool translateLShr(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_LSHR, U, MIRBuilder);
+  }
+  bool translateAShr(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_ASHR, U, MIRBuilder);
+  }
+
+  bool translateFAdd(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_FADD, U, MIRBuilder);
+  }
+  bool translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
+  }
+  bool translateFMul(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_FMUL, U, MIRBuilder);
+  }
+  bool translateFDiv(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_FDIV, U, MIRBuilder);
+  }
+  bool translateFRem(const User &U, MachineIRBuilder &MIRBuilder) {
+    return translateBinaryOp(TargetOpcode::G_FREM, U, MIRBuilder);
+  }
+
+
+  // Stubs to keep the compiler happy while we implement the rest of the
+  // translation.
+  bool translateSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateIndirectBr(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateResume(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateCleanupRet(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateCatchRet(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateCatchSwitch(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateFence(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateAtomicCmpXchg(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateAtomicRMW(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateAddrSpaceCast(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateCleanupPad(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateCatchPad(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateUserOp1(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateUserOp2(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateVAArg(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateExtractElement(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateInsertElement(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+  bool translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder) {
+    return false;
+  }
+
   /// @}
 
   // Builder for machine instruction a la IRBuilder.
   // I.e., compared to regular MIBuilder, this one also inserts the instruction
   // in the current block, it can creates block, etc., basically a kind of
   // IRBuilder, but for Machine IR.
-  MachineIRBuilder MIRBuilder;
+  MachineIRBuilder CurBuilder;
+
+  // Builder set to the entry block (just after ABI lowering instructions). Used
+  // as a convenient location for Constants.
+  MachineIRBuilder EntryBuilder;
+
+  // The MachineFunction currently being translated.
+  MachineFunction *MF;
 
   /// MachineRegisterInfo used to create virtual registers.
   MachineRegisterInfo *MRI;
 
+  const DataLayout *DL;
+
+  /// Current target configuration. Controls how the pass handles errors.
+  const TargetPassConfig *TPC;
+
   // * Insert all the code needed to materialize the constants
   // at the proper place. E.g., Entry block or dominator block
   // of each constant depending on how fancy we want to be.
   // * Clear the different maps.
-  void finalize();
+  void finalizeFunction();
 
   /// Get the VReg that represents \p Val.
   /// If such VReg does not exist, it is created.
   unsigned getOrCreateVReg(const Value &Val);
 
+  /// Get the frame index that represents \p Val.
+  /// If such VReg does not exist, it is created.
+  int getOrCreateFrameIndex(const AllocaInst &AI);
+
+  /// Get the alignment of the given memory operation instruction. This will
+  /// either be the explicitly specified value or the ABI-required alignment for
+  /// the type being accessed (according to the Module's DataLayout).
+  unsigned getMemOpAlignment(const Instruction &I);
+
   /// Get the MachineBasicBlock that represents \p BB.
   /// If such basic block does not exist, it is created.
   MachineBasicBlock &getOrCreateBB(const BasicBlock &BB);
 
+
 public:
   // Ctor, nothing fancy.
   IRTranslator();
 
-  const char *getPassName() const override {
-    return "IRTranslator";
-  }
+  StringRef getPassName() const override { return "IRTranslator"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   // Algo:
   //   CallLowering = MF.subtarget.getCallLowering()
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
new file mode 100644
index 000000000000..01521c46ab6a
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelect.h
@@ -0,0 +1,53 @@
+//== llvm/CodeGen/GlobalISel/InstructionSelect.h -----------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file This file describes the interface of the MachineFunctionPass
+/// responsible for selecting (possibly generic) machine instructions to
+/// target-specific instructions.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECT_H
+#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECT_H
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+/// This pass is responsible for selecting generic machine instructions to
+/// target-specific instructions.  It relies on the InstructionSelector provided
+/// by the target.
+/// Selection is done by examining blocks in post-order, and instructions in
+/// reverse order.
+///
+/// \post for all inst in MF: not isPreISelGenericOpcode(inst.opcode)
+class InstructionSelect : public MachineFunctionPass {
+public:
+  static char ID;
+  StringRef getPassName() const override { return "InstructionSelect"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties()
+        .set(MachineFunctionProperties::Property::IsSSA)
+        .set(MachineFunctionProperties::Property::Legalized)
+        .set(MachineFunctionProperties::Property::RegBankSelected);
+  }
+
+  MachineFunctionProperties getSetProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::Selected);
+  }
+
+  InstructionSelect();
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
new file mode 100644
index 000000000000..63b4f7b9507f
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/InstructionSelector.h
@@ -0,0 +1,63 @@
+//==-- llvm/CodeGen/GlobalISel/InstructionSelector.h -------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file declares the API for the instruction selector.
+/// This class is responsible for selecting machine instructions.
+/// It's implemented by the target. It's used by the InstructionSelect pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
+#define LLVM_CODEGEN_GLOBALISEL_INSTRUCTIONSELECTOR_H
+
+namespace llvm {
+class MachineInstr;
+class RegisterBankInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+/// Provides the logic to select generic machine instructions.
+class InstructionSelector {
+public:
+  virtual ~InstructionSelector() {}
+
+  /// Select the (possibly generic) instruction \p I to only use target-specific
+  /// opcodes. It is OK to insert multiple instructions, but they cannot be
+  /// generic pre-isel instructions.
+  ///
+  /// \returns whether selection succeeded.
+  /// \pre  I.getParent() && I.getParent()->getParent()
+  /// \post
+  ///   if returns true:
+  ///     for I in all mutated/inserted instructions:
+  ///       !isPreISelGenericOpcode(I.getOpcode())
+  ///
+  virtual bool select(MachineInstr &I) const = 0;
+
+protected:
+  InstructionSelector();
+
+  /// Mutate the newly-selected instruction \p I to constrain its (possibly
+  /// generic) virtual register operands to the instruction's register class.
+  /// This could involve inserting COPYs before (for uses) or after (for defs).
+  /// This requires the number of operands to match the instruction description.
+  /// \returns whether operand regclass constraining succeeded.
+  ///
+  // FIXME: Not all instructions have the same number of operands. We should
+  // probably expose a constrain helper per operand and let the target selector
+  // constrain individual registers, like fast-isel.
+  bool constrainSelectedInstRegOperands(MachineInstr &I,
+                                        const TargetInstrInfo &TII,
+                                        const TargetRegisterInfo &TRI,
+                                        const RegisterBankInfo &RBI) const;
+};
+
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
new file mode 100644
index 000000000000..8284ab6dac65
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -0,0 +1,65 @@
+//== llvm/CodeGen/GlobalISel/LegalizePass.h ------------- -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file A pass to convert the target-illegal operations created by IR -> MIR
+/// translation into ones the target expects to be able to select. This may
+/// occur in multiple phases, for example G_ADD <2 x i8> -> G_ADD <2 x i16> ->
+/// G_ADD <4 x i16>.
+///
+/// The LegalizeHelper class is where most of the work happens, and is designed
+/// to be callable from other passes that find themselves with an illegal
+/// instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZEMACHINEIRPASS_H
+#define LLVM_CODEGEN_GLOBALISEL_LEGALIZEMACHINEIRPASS_H
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class MachineRegisterInfo;
+
+class Legalizer : public MachineFunctionPass {
+public:
+  static char ID;
+
+private:
+
+  /// Initialize the field members using \p MF.
+  void init(MachineFunction &MF);
+
+public:
+  // Ctor, nothing fancy.
+  Legalizer();
+
+  StringRef getPassName() const override { return "Legalizer"; }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::IsSSA);
+  }
+
+  MachineFunctionProperties getSetProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::Legalized);
+  }
+
+  bool combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
+                       const TargetInstrInfo &TII);
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+};
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
new file mode 100644
index 000000000000..56c444ca46be
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -0,0 +1,104 @@
+//== llvm/CodeGen/GlobalISel/LegalizerHelper.h ---------------- -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file A pass to convert the target-illegal operations created by IR -> MIR
+/// translation into ones the target expects to be able to select. This may
+/// occur in multiple phases, for example G_ADD <2 x i8> -> G_ADD <2 x i16> ->
+/// G_ADD <4 x i16>.
+///
+/// The LegalizerHelper class is where most of the work happens, and is
+/// designed to be callable from other passes that find themselves with an
+/// illegal instruction.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H
+#define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZEHELPER_H
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LowLevelType.h"
+
+namespace llvm {
+// Forward declarations.
+class LegalizerInfo;
+class Legalizer;
+class MachineRegisterInfo;
+
+class LegalizerHelper {
+public:
+  enum LegalizeResult {
+    /// Instruction was already legal and no change was made to the
+    /// MachineFunction.
+    AlreadyLegal,
+
+    /// Instruction has been legalized and the MachineFunction changed.
+    Legalized,
+
+    /// Some kind of error has occurred and we could not legalize this
+    /// instruction.
+    UnableToLegalize,
+  };
+
+  LegalizerHelper(MachineFunction &MF);
+
+  /// Replace \p MI by a sequence of legal instructions that can implement the
+  /// same operation. Note that this means \p MI may be deleted, so any iterator
+  /// steps should be performed before calling this function. \p Helper should
+  /// be initialized to the MachineFunction containing \p MI.
+  ///
+  /// Considered as an opaque blob, the legal code will use and define the same
+  /// registers as \p MI.
+  LegalizeResult legalizeInstrStep(MachineInstr &MI,
+                                   const LegalizerInfo &LegalizerInfo);
+
+  LegalizeResult legalizeInstr(MachineInstr &MI,
+                               const LegalizerInfo &LegalizerInfo);
+
+  /// Legalize an instruction by emiting a runtime library call instead.
+  LegalizeResult libcall(MachineInstr &MI);
+
+  /// Legalize an instruction by reducing the width of the underlying scalar
+  /// type.
+  LegalizeResult narrowScalar(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
+
+  /// Legalize an instruction by performing the operation on a wider scalar type
+  /// (for example a 16-bit addition can be safely performed at 32-bits
+  /// precision, ignoring the unused bits).
+  LegalizeResult widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+
+  /// Legalize an instruction by splitting it into simpler parts, hopefully
+  /// understood by the target.
+  LegalizeResult lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty);
+
+  /// Legalize a vector instruction by splitting into multiple components, each
+  /// acting on the same scalar type as the original but with fewer elements.
+  LegalizeResult fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+                                     LLT NarrowTy);
+
+  /// Legalize a vector instruction by increasing the number of vector elements
+  /// involved and ignoring the added elements later.
+  LegalizeResult moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
+                                    LLT WideTy);
+
+private:
+
+  /// Helper function to split a wide generic register into bitwise blocks with
+  /// the given Type (which implies the number of blocks needed). The generic
+  /// registers created are appended to Ops, starting at bit 0 of Reg.
+  void extractParts(unsigned Reg, LLT Ty, int NumParts,
+                    SmallVectorImpl<unsigned> &Ops);
+
+  MachineIRBuilder MIRBuilder;
+  MachineRegisterInfo &MRI;
+};
+
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
new file mode 100644
index 000000000000..edf52daf3f8f
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/LegalizerInfo.h
@@ -0,0 +1,207 @@
+//==-- llvm/CodeGen/GlobalISel/LegalizerInfo.h -------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Interface for Targets to specify which operations they can successfully
+/// select and how the others should be expanded most efficiently.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZER_H
+#define LLVM_CODEGEN_GLOBALISEL_MACHINELEGALIZER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/Target/TargetOpcodes.h"
+
+#include <cstdint>
+#include <functional>
+
+namespace llvm {
+class LLVMContext;
+class MachineInstr;
+class MachineRegisterInfo;
+class Type;
+class VectorType;
+
+/// Legalization is decided based on an instruction's opcode, which type slot
+/// we're considering, and what the existing type is. These aspects are gathered
+/// together for convenience in the InstrAspect class.
+struct InstrAspect {
+  unsigned Opcode;
+  unsigned Idx;
+  LLT Type;
+
+  InstrAspect(unsigned Opcode, LLT Type) : Opcode(Opcode), Idx(0), Type(Type) {}
+  InstrAspect(unsigned Opcode, unsigned Idx, LLT Type)
+      : Opcode(Opcode), Idx(Idx), Type(Type) {}
+
+  bool operator==(const InstrAspect &RHS) const {
+    return Opcode == RHS.Opcode && Idx == RHS.Idx && Type == RHS.Type;
+  }
+};
+
+class LegalizerInfo {
+public:
+  enum LegalizeAction : std::uint8_t {
+    /// The operation is expected to be selectable directly by the target, and
+    /// no transformation is necessary.
+    Legal,
+
+    /// The operation should be synthesized from multiple instructions acting on
+    /// a narrower scalar base-type. For example a 64-bit add might be
+    /// implemented in terms of 32-bit add-with-carry.
+    NarrowScalar,
+
+    /// The operation should be implemented in terms of a wider scalar
+    /// base-type. For example a <2 x s8> add could be implemented as a <2
+    /// x s32> add (ignoring the high bits).
+    WidenScalar,
+
+    /// The (vector) operation should be implemented by splitting it into
+    /// sub-vectors where the operation is legal. For example a <8 x s64> add
+    /// might be implemented as 4 separate <2 x s64> adds.
+    FewerElements,
+
+    /// The (vector) operation should be implemented by widening the input
+    /// vector and ignoring the lanes added by doing so. For example <2 x i8> is
+    /// rarely legal, but you might perform an <8 x i8> and then only look at
+    /// the first two results.
+    MoreElements,
+
+    /// The operation itself must be expressed in terms of simpler actions on
+    /// this target. E.g. a SREM replaced by an SDIV and subtraction.
+    Lower,
+
+    /// The operation should be implemented as a call to some kind of runtime
+    /// support library. For example this usually happens on machines that don't
+    /// support floating-point operations natively.
+    Libcall,
+
+    /// The target wants to do something special with this combination of
+    /// operand and type. A callback will be issued when it is needed.
+    Custom,
+
+    /// This operation is completely unsupported on the target. A programming
+    /// error has occurred.
+    Unsupported,
+
+    /// Sentinel value for when no action was found in the specified table.
+    NotFound,
+  };
+
+  LegalizerInfo();
+
+  /// Compute any ancillary tables needed to quickly decide how an operation
+  /// should be handled. This must be called after all "set*Action"methods but
+  /// before any query is made or incorrect results may be returned.
+  void computeTables();
+
+  /// More friendly way to set an action for common types that have an LLT
+  /// representation.
+  void setAction(const InstrAspect &Aspect, LegalizeAction Action) {
+    TablesInitialized = false;
+    unsigned Opcode = Aspect.Opcode - FirstOp;
+    if (Actions[Opcode].size() <= Aspect.Idx)
+      Actions[Opcode].resize(Aspect.Idx + 1);
+    Actions[Aspect.Opcode - FirstOp][Aspect.Idx][Aspect.Type] = Action;
+  }
+
+  /// If an operation on a given vector type (say <M x iN>) isn't explicitly
+  /// specified, we proceed in 2 stages. First we legalize the underlying scalar
+  /// (so that there's at least one legal vector with that scalar), then we
+  /// adjust the number of elements in the vector so that it is legal. The
+  /// desired action in the first step is controlled by this function.
+  void setScalarInVectorAction(unsigned Opcode, LLT ScalarTy,
+                               LegalizeAction Action) {
+    assert(!ScalarTy.isVector());
+    ScalarInVectorActions[std::make_pair(Opcode, ScalarTy)] = Action;
+  }
+
+
+  /// Determine what action should be taken to legalize the given generic
+  /// instruction opcode, type-index and type. Requires computeTables to have
+  /// been called.
+  ///
+  /// \returns a pair consisting of the kind of legalization that should be
+  /// performed and the destination type.
+  std::pair<LegalizeAction, LLT> getAction(const InstrAspect &Aspect) const;
+
+  /// Determine what action should be taken to legalize the given generic
+  /// instruction.
+  ///
+  /// \returns a tuple consisting of the LegalizeAction that should be
+  /// performed, the type-index it should be performed on and the destination
+  /// type.
+  std::tuple<LegalizeAction, unsigned, LLT>
+  getAction(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+
+  /// Iterate the given function (typically something like doubling the width)
+  /// on Ty until we find a legal type for this operation.
+  LLT findLegalType(const InstrAspect &Aspect,
+                    std::function<LLT(LLT)> NextType) const {
+    LegalizeAction Action;
+    const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
+    LLT Ty = Aspect.Type;
+    do {
+      Ty = NextType(Ty);
+      auto ActionIt = Map.find(Ty);
+      if (ActionIt == Map.end())
+        Action = DefaultActions.find(Aspect.Opcode)->second;
+      else
+        Action = ActionIt->second;
+    } while(Action != Legal);
+    return Ty;
+  }
+
+  /// Find what type it's actually OK to perform the given operation on, given
+  /// the general approach we've decided to take.
+  LLT findLegalType(const InstrAspect &Aspect, LegalizeAction Action) const;
+
+  std::pair<LegalizeAction, LLT> findLegalAction(const InstrAspect &Aspect,
+                                                 LegalizeAction Action) const {
+    return std::make_pair(Action, findLegalType(Aspect, Action));
+  }
+
+  /// Find the specified \p Aspect in the primary (explicitly set) Actions
+  /// table. Returns either the action the target requested or NotFound if there
+  /// was no setAction call.
+  LegalizeAction findInActions(const InstrAspect &Aspect) const {
+    if (Aspect.Opcode < FirstOp || Aspect.Opcode > LastOp)
+      return NotFound;
+    if (Aspect.Idx >= Actions[Aspect.Opcode - FirstOp].size())
+      return NotFound;
+    const TypeMap &Map = Actions[Aspect.Opcode - FirstOp][Aspect.Idx];
+    auto ActionIt =  Map.find(Aspect.Type);
+    if (ActionIt == Map.end())
+      return NotFound;
+
+    return ActionIt->second;
+  }
+
+  bool isLegal(const MachineInstr &MI, const MachineRegisterInfo &MRI) const;
+
+private:
+  static const int FirstOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START;
+  static const int LastOp = TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
+
+  typedef DenseMap<LLT, LegalizeAction> TypeMap;
+  typedef DenseMap<std::pair<unsigned, LLT>, LegalizeAction> SIVActionMap;
+
+  SmallVector<TypeMap, 1> Actions[LastOp - FirstOp + 1];
+  SIVActionMap ScalarInVectorActions;
+  DenseMap<std::pair<unsigned, LLT>, uint16_t> MaxLegalVectorElts;
+  DenseMap<unsigned, LegalizeAction> DefaultActions;
+
+  bool TablesInitialized;
+};
+
+
+} // End namespace llvm.
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
index efdc59a9cddf..ecd3e5e1e138 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h
@@ -17,8 +17,13 @@
 #include "llvm/CodeGen/GlobalISel/Types.h"
 
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
 
+#include <queue>
+
 namespace llvm {
 
 // Forward declarations.
@@ -35,21 +40,26 @@ class MachineIRBuilder {
   MachineFunction *MF;
   /// Information used to access the description of the opcodes.
   const TargetInstrInfo *TII;
+  /// Information used to verify types are consistent.
+  const MachineRegisterInfo *MRI;
   /// Debug location to be set to any instruction we create.
   DebugLoc DL;
 
   /// Fields describing the insertion point.
   /// @{
   MachineBasicBlock *MBB;
-  MachineInstr *MI;
-  bool Before;
+  MachineBasicBlock::iterator II;
   /// @}
 
+  std::function<void(MachineInstr *)> InsertedInstr;
+
   const TargetInstrInfo &getTII() {
     assert(TII && "TargetInstrInfo is not set");
     return *TII;
   }
 
+  void validateTruncExt(unsigned Dst, unsigned Src, bool IsExtend);
+
 public:
   /// Getter for the function we currently build.
   MachineFunction &getMF() {
@@ -64,82 +74,432 @@ public:
   }
 
   /// Current insertion point for new instructions.
-  MachineBasicBlock::iterator getInsertPt();
+  MachineBasicBlock::iterator getInsertPt() {
+    return II;
+  }
+
+  /// Set the insertion point before the specified position.
+  /// \pre MBB must be in getMF().
+  /// \pre II must be a valid iterator in MBB.
+  void setInsertPt(MachineBasicBlock &MBB, MachineBasicBlock::iterator II);
+  /// @}
 
   /// Setters for the insertion point.
   /// @{
   /// Set the MachineFunction where to build instructions.
   void setMF(MachineFunction &);
 
-  /// Set the insertion point to the beginning (\p Beginning = true) or end
-  /// (\p Beginning = false) of \p MBB.
+  /// Set the insertion point to the  end of \p MBB.
   /// \pre \p MBB must be contained by getMF().
-  void setMBB(MachineBasicBlock &MBB, bool Beginning = false);
+  void setMBB(MachineBasicBlock &MBB);
 
-  /// Set the insertion point to before (\p Before = true) or after
-  /// (\p Before = false) \p MI.
+  /// Set the insertion point to before MI.
   /// \pre MI must be in getMF().
-  void setInstr(MachineInstr &MI, bool Before = false);
+  void setInstr(MachineInstr &MI);
+  /// @}
+
+  /// Control where instructions we create are recorded (typically for
+  /// visiting again later during legalization).
+  /// @{
+  void recordInsertions(std::function<void(MachineInstr *)> InsertedInstr);
+  void stopRecordingInsertions();
   /// @}
 
   /// Set the debug location to \p DL for all the next build instructions.
   void setDebugLoc(const DebugLoc &DL) { this->DL = DL; }
 
-  /// Build and insert <empty> = \p Opcode [\p Ty] <empty>.
-  /// \p Ty is the type of the instruction if \p Opcode describes
-  /// a generic machine instruction. \p Ty must be nullptr if \p Opcode
-  /// does not describe a generic instruction.
+  /// Build and insert <empty> = \p Opcode <empty>.
   /// The insertion point is the one set by the last call of either
   /// setBasicBlock or setMI.
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre Ty == nullptr or isPreISelGenericOpcode(Opcode)
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildInstr(unsigned Opcode);
+
+  /// Build but don't insert <empty> = \p Opcode <empty>.
+  ///
+  /// \pre setMF, setBasicBlock or setMI  must have been called.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildInstrNoInsert(unsigned Opcode);
+
+  /// Insert an existing instruction at the insertion point.
+  MachineInstrBuilder insertInstr(MachineInstrBuilder MIB);
+
+  /// Build and insert \p Res<def> = G_FRAME_INDEX \p Idx
+  ///
+  /// G_FRAME_INDEX materializes the address of an alloca value or other
+  /// stack-based object.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with pointer type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildFrameIndex(unsigned Res, int Idx);
+
+  /// Build and insert \p Res<def> = G_GLOBAL_VALUE \p GV
+  ///
+  /// G_GLOBAL_VALUE materializes the address of the specified global
+  /// into \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with pointer type
+  ///      in the same address space as \p GV.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildGlobalValue(unsigned Res, const GlobalValue *GV);
+
+  /// Build and insert \p Res<def> = G_ADD \p Op0, \p Op1
+  ///
+  /// G_ADD sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+  /// truncated to their width.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildAdd(unsigned Res, unsigned Op0,
+                               unsigned Op1);
+
+  /// Build and insert \p Res<def> = G_SUB \p Op0, \p Op1
+  ///
+  /// G_SUB sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+  /// truncated to their width.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildSub(unsigned Res, unsigned Op0,
+                               unsigned Op1);
+
+  /// Build and insert \p Res<def> = G_MUL \p Op0, \p Op1
+  ///
+  /// G_MUL sets \p Res to the sum of integer parameters \p Op0 and \p Op1,
+  /// truncated to their width.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same (scalar or vector) type).
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildMul(unsigned Res, unsigned Op0,
+                               unsigned Op1);
+
+  /// Build and insert \p Res<def> = G_GEP \p Op0, \p Op1
+  ///
+  /// G_GEP adds \p Op1 bytes to the pointer specified by \p Op0,
+  /// storing the resulting pointer in \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res and \p Op0 must be generic virtual registers with pointer
+  ///      type.
+  /// \pre \p Op1 must be a generic virtual register with scalar type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildGEP(unsigned Res, unsigned Op0,
+                               unsigned Op1);
+
+  /// Build and insert \p Res<def>, \p CarryOut<def> = G_UADDE \p Op0,
+  /// \p Op1, \p CarryIn
+  ///
+  /// G_UADDE sets \p Res to \p Op0 + \p Op1 + \p CarryIn (truncated to the bit
+  /// width) and sets \p CarryOut to 1 if the result overflowed in unsigned
+  /// arithmetic.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same scalar type.
+  /// \pre \p CarryOut and \p CarryIn must be generic virtual
+  ///      registers with the same scalar type (typically s1)
   ///
   /// \return The newly created instruction.
-  MachineInstr *buildInstr(unsigned Opcode, Type *Ty);
+  MachineInstrBuilder buildUAdde(unsigned Res, unsigned CarryOut, unsigned Op0,
+                                 unsigned Op1, unsigned CarryIn);
 
-  /// Build and insert <empty> = \p Opcode [\p Ty] \p BB.
+  /// Build and insert \p Res<def> = G_ANYEXT \p Op0
+  ///
+  /// G_ANYEXT produces a register of the specified width, with bits 0 to
+  /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are unspecified
+  /// (i.e. this is neither zero nor sign-extension). For a vector register,
+  /// each element is extended individually.
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre Ty == nullptr or isPreISelGenericOpcode(Opcode)
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
-  MachineInstr *buildInstr(unsigned Opcode, Type *Ty, MachineBasicBlock &BB);
+  MachineInstrBuilder buildAnyExt(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = \p Opcode [\p Ty] \p Op0, \p Op1.
+  /// Build and insert \p Res<def> = G_SEXT \p Op
+  ///
+  /// G_SEXT produces a register of the specified width, with bits 0 to
+  /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are duplicated from the
+  /// high bit of \p Op (i.e. 2s-complement sign extended).
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre Ty == nullptr or isPreISelGenericOpcode(Opcode)
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
-  MachineInstr *buildInstr(unsigned Opcode, Type *Ty, unsigned Res,
-                           unsigned Op0, unsigned Op1);
+  MachineInstrBuilder buildSExt(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = \p Opcode \p Op0, \p Op1.
-  /// I.e., instruction with a non-generic opcode.
+  /// Build and insert \p Res<def> = G_ZEXT \p Op
+  ///
+  /// G_ZEXT produces a register of the specified width, with bits 0 to
+  /// sizeof(\p Ty) * 8 set to \p Op. The remaining bits are 0. For a vector
+  /// register, each element is extended individually.
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre not isPreISelGenericOpcode(\p Opcode)
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be smaller than \p Res
   ///
   /// \return The newly created instruction.
-  MachineInstr *buildInstr(unsigned Opcode, unsigned Res, unsigned Op0,
-                           unsigned Op1);
+  MachineInstrBuilder buildZExt(unsigned Res, unsigned Op);
 
-  /// Build and insert \p Res<def> = \p Opcode \p Op0.
+  /// Build and insert \p Res<def> = G_SEXT \p Op, \p Res = G_TRUNC \p Op, or
+  /// \p Res = COPY \p Op depending on the differing sizes of \p Res and \p Op.
+  ///  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildSExtOrTrunc(unsigned Res, unsigned Op);
+
+  /// Build and insert G_BR \p Dest
+  ///
+  /// G_BR is an unconditional branch to \p Dest.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildBr(MachineBasicBlock &BB);
+
+  /// Build and insert G_BRCOND \p Tst, \p Dest
+  ///
+  /// G_BRCOND is a conditional branch to \p Dest.
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre not isPreISelGenericOpcode(\p Opcode)
+  /// \pre \p Tst must be a generic virtual register with scalar
+  ///      type. At the beginning of legalization, this will be a single
+  ///      bit (s1). Targets with interesting flags registers may change
+  ///      this. For a wider type, whether the branch is taken must only
+  ///      depend on bit 0 (for now).
   ///
   /// \return The newly created instruction.
-  MachineInstr *buildInstr(unsigned Opcode, unsigned Res, unsigned Op0);
+  MachineInstrBuilder buildBrCond(unsigned Tst, MachineBasicBlock &BB);
 
-  /// Build and insert <empty> = \p Opcode <empty>.
+  /// Build and insert \p Res = G_CONSTANT \p Val
+  ///
+  /// G_CONSTANT is an integer constant with the specified size and value. \p
+  /// Val will be extended or truncated to the size of \p Reg.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar or pointer
+  ///      type.
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildConstant(unsigned Res, const ConstantInt &Val);
+
+  /// Build and insert \p Res = G_CONSTANT \p Val
+  ///
+  /// G_CONSTANT is an integer constant with the specified size and value.
   ///
   /// \pre setBasicBlock or setMI must have been called.
-  /// \pre not isPreISelGenericOpcode(\p Opcode)
+  /// \pre \p Res must be a generic virtual register with scalar type.
   ///
   /// \return The newly created instruction.
-  MachineInstr *buildInstr(unsigned Opcode);
+  MachineInstrBuilder buildConstant(unsigned Res, int64_t Val);
+
+  /// Build and insert \p Res = G_FCONSTANT \p Val
+  ///
+  /// G_FCONSTANT is a floating-point constant with the specified size and
+  /// value.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar type.
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildFConstant(unsigned Res, const ConstantFP &Val);
+
+  /// Build and insert \p Res<def> = COPY Op
+  ///
+  /// Register-to-register COPY sets \p Res to \p Op.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildCopy(unsigned Res, unsigned Op);
+
+  /// Build and insert `Res<def> = G_LOAD Addr, MMO`.
+  ///
+  /// Loads the value stored at \p Addr. Puts the result in \p Res.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildLoad(unsigned Res, unsigned Addr,
+                                MachineMemOperand &MMO);
+
+  /// Build and insert `G_STORE Val, Addr, MMO`.
+  ///
+  /// Stores the value \p Val to \p Addr.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Val must be a generic virtual register.
+  /// \pre \p Addr must be a generic virtual register with pointer type.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildStore(unsigned Val, unsigned Addr,
+                                 MachineMemOperand &MMO);
+
+  /// Build and insert `Res0<def>, ... = G_EXTRACT Src, Idx0, ...`.
+  ///
+  /// If \p Res[i] has size N bits, G_EXTRACT sets \p Res[i] to bits `[Idxs[i],
+  /// Idxs[i] + N)` of \p Src.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre Indices must be in ascending order of bit position.
+  /// \pre Each member of \p Results and \p Src must be a generic
+  ///      virtual register.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildExtract(ArrayRef<unsigned> Results,
+                                   ArrayRef<uint64_t> Indices, unsigned Src);
+
+  /// Build and insert \p Res<def> = G_SEQUENCE \p Op0, \p Idx0...
+  ///
+  /// G_SEQUENCE inserts each element of Ops into an IMPLICIT_DEF register,
+  /// where each entry starts at the bit-index specified by \p Indices.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre The final element of the sequence must not extend past the end of the
+  ///      destination register.
+  /// \pre The bits defined by each Op (derived from index and scalar size) must
+  ///      not overlap.
+  /// \pre \p Indices must be in ascending order of bit position.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildSequence(unsigned Res,
+                                    ArrayRef<unsigned> Ops,
+                                    ArrayRef<uint64_t> Indices);
+
+  void addUsesWithIndices(MachineInstrBuilder MIB) {}
+
+  template <typename... ArgTys>
+  void addUsesWithIndices(MachineInstrBuilder MIB, unsigned Reg,
+                          unsigned BitIndex, ArgTys... Args) {
+    MIB.addUse(Reg).addImm(BitIndex);
+    addUsesWithIndices(MIB, Args...);
+  }
+
+  template <typename... ArgTys>
+  MachineInstrBuilder buildSequence(unsigned Res, unsigned Op,
+                                    unsigned Index, ArgTys... Args) {
+    MachineInstrBuilder MIB =
+        buildInstr(TargetOpcode::G_SEQUENCE).addDef(Res);
+    addUsesWithIndices(MIB, Op, Index, Args...);
+    return MIB;
+  }
+
+  template <typename... ArgTys>
+  MachineInstrBuilder buildInsert(unsigned Res, unsigned Src,
+                                  unsigned Op, unsigned Index, ArgTys... Args) {
+    MachineInstrBuilder MIB =
+        buildInstr(TargetOpcode::G_INSERT).addDef(Res).addUse(Src);
+    addUsesWithIndices(MIB, Op, Index, Args...);
+    return MIB;
+  }
+
+  /// Build and insert either a G_INTRINSIC (if \p HasSideEffects is false) or
+  /// G_INTRINSIC_W_SIDE_EFFECTS instruction. Its first operand will be the
+  /// result register definition unless \p Reg is NoReg (== 0). The second
+  /// operand will be the intrinsic's ID.
+  ///
+  /// Callers are expected to add the required definitions and uses afterwards.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildIntrinsic(Intrinsic::ID ID, unsigned Res,
+                                     bool HasSideEffects);
+
+  /// Build and insert \p Res<def> = G_FPTRUNC \p Op
+  ///
+  /// G_FPTRUNC converts a floating-point value into one with a smaller type.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  /// \pre \p Res must be smaller than \p Op
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildFPTrunc(unsigned Res, unsigned Op);
+
+  /// Build and insert \p Res<def> = G_TRUNC \p Op
+  ///
+  /// G_TRUNC extracts the low bits of a type. For a vector type each element is
+  /// truncated independently before being packed into the destination.
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res must be a generic virtual register with scalar or vector type.
+  /// \pre \p Op must be a generic virtual register with scalar or vector type.
+  /// \pre \p Res must be smaller than \p Op
+  ///
+  /// \return The newly created instruction.
+  MachineInstrBuilder buildTrunc(unsigned Res, unsigned Op);
+
+  /// Build and insert a \p Res = G_ICMP \p Pred, \p Op0, \p Op1
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+
+  /// \pre \p Res must be a generic virtual register with scalar or
+  ///      vector type. Typically this starts as s1 or <N x s1>.
+  /// \pre \p Op0 and Op1 must be generic virtual registers with the
+  ///      same number of elements as \p Res. If \p Res is a scalar,
+  ///      \p Op0 must be either a scalar or pointer.
+  /// \pre \p Pred must be an integer predicate.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildICmp(CmpInst::Predicate Pred,
+                                unsigned Res, unsigned Op0, unsigned Op1);
+
+  /// Build and insert a \p Res = G_FCMP \p Pred\p Op0, \p Op1
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+
+  /// \pre \p Res must be a generic virtual register with scalar or
+  ///      vector type. Typically this starts as s1 or <N x s1>.
+  /// \pre \p Op0 and Op1 must be generic virtual registers with the
+  ///      same number of elements as \p Res (or scalar, if \p Res is
+  ///      scalar).
+  /// \pre \p Pred must be a floating-point predicate.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildFCmp(CmpInst::Predicate Pred,
+                                unsigned Res, unsigned Op0, unsigned Op1);
+
+  /// Build and insert a \p Res = G_SELECT \p Tst, \p Op0, \p Op1
+  ///
+  /// \pre setBasicBlock or setMI must have been called.
+  /// \pre \p Res, \p Op0 and \p Op1 must be generic virtual registers
+  ///      with the same type.
+  /// \pre \p Tst must be a generic virtual register with scalar, pointer or
+  ///      vector type. If vector then it must have the same number of
+  ///      elements as the other parameters.
+  ///
+  /// \return a MachineInstrBuilder for the newly created instruction.
+  MachineInstrBuilder buildSelect(unsigned Res, unsigned Tst,
+                                  unsigned Op0, unsigned Op1);
 };
 
 } // End namespace llvm.
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
index b393744e67cb..106fc9ffb8b5 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h
@@ -74,6 +74,7 @@ class BlockFrequency;
 class MachineBranchProbabilityInfo;
 class MachineBlockFrequencyInfo;
 class MachineRegisterInfo;
+class TargetPassConfig;
 class TargetRegisterInfo;
 
 /// This pass implements the reg bank selector pass used in the GlobalISel
@@ -476,8 +477,12 @@ private:
   /// Optimization mode of the pass.
   Mode OptMode;
 
+  /// Current target configuration. Controls how the pass handles errors.
+  const TargetPassConfig *TPC;
+
   /// Assign the register bank of each operand of \p MI.
-  void assignInstr(MachineInstr &MI);
+  /// \return True on success, false otherwise.
+  bool assignInstr(MachineInstr &MI);
 
   /// Initialize the field members using \p MF.
   void init(MachineFunction &MF);
@@ -520,7 +525,9 @@ private:
   ///
   /// \note The caller is supposed to do the rewriting of op if need be.
   /// I.e., Reg = op ... => <NewRegs> = NewOp ...
-  void repairReg(MachineOperand &MO,
+  ///
+  /// \return True if the repairing worked, false otherwise.
+  bool repairReg(MachineOperand &MO,
                  const RegisterBankInfo::ValueMapping &ValMapping,
                  RegBankSelect::RepairingPlacement &RepairPt,
                  const iterator_range<SmallVectorImpl<unsigned>::const_iterator>
@@ -570,7 +577,8 @@ private:
   /// Apply \p Mapping to \p MI. \p RepairPts represents the different
   /// mapping action that need to happen for the mapping to be
   /// applied.
-  void applyMapping(MachineInstr &MI,
+  /// \return True if the mapping was applied sucessfully, false otherwise.
+  bool applyMapping(MachineInstr &MI,
                     const RegisterBankInfo::InstructionMapping &InstrMapping,
                     SmallVectorImpl<RepairingPlacement> &RepairPts);
 
@@ -578,12 +586,21 @@ public:
   /// Create a RegBankSelect pass with the specified \p RunningMode.
   RegBankSelect(Mode RunningMode = Fast);
 
-  const char *getPassName() const override {
-    return "RegBankSelect";
-  }
+  StringRef getPassName() const override { return "RegBankSelect"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties()
+        .set(MachineFunctionProperties::Property::IsSSA)
+        .set(MachineFunctionProperties::Property::Legalized);
+  }
+
+  MachineFunctionProperties getSetProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::RegBankSelected);
+  }
+
   /// Walk through \p MF and assign a register bank to every virtual register
   /// that are still mapped to nothing.
   /// The target needs to provide a RegisterBankInfo and in particular
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h
index e886382fd8e8..075677d30179 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBank.h
@@ -37,15 +37,16 @@ private:
   /// initialized yet.
   static const unsigned InvalidID;
 
-  /// Only the RegisterBankInfo can create RegisterBank.
+  /// Only the RegisterBankInfo can initialize RegisterBank properly.
+  friend RegisterBankInfo;
+
+public:
   /// The default constructor will leave the object in
   /// an invalid state. I.e. isValid() == false.
-  /// The field must be updated to fix that.
+  /// The fields must be updated to fix that and only
+  /// RegisterBankInfo instances are allowed to do that
   RegisterBank();
 
-  friend RegisterBankInfo;
-
-public:
   /// Get the identifier of this register bank.
   unsigned getID() const { return ID; }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
index 19d170365858..4d4a226eb2d2 100644
--- a/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/RegisterBankInfo.h
@@ -16,6 +16,8 @@
 #define LLVM_CODEGEN_GLOBALISEL_REGBANKINFO_H
 
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineValueType.h" // For SimpleValueType.
@@ -82,15 +84,88 @@ public:
 
   /// Helper struct that represents how a value is mapped through
   /// different register banks.
+  ///
+  /// \note: So far we do not have any users of the complex mappings
+  /// (mappings with more than one partial mapping), but when we do,
+  /// we would have needed to duplicate partial mappings.
+  /// The alternative could be to use an array of pointers of partial
+  /// mapping (i.e., PartialMapping **BreakDown) and duplicate the
+  /// pointers instead.
+  ///
+  /// E.g.,
+  /// Let say we have a 32-bit add and a <2 x 32-bit> vadd. We
+  /// can expand the
+  /// <2 x 32-bit> add into 2 x 32-bit add.
+  ///
+  /// Currently the TableGen-like file would look like:
+  /// \code
+  /// PartialMapping[] = {
+  /// /*32-bit add*/ {0, 32, GPR},
+  /// /*2x32-bit add*/ {0, 32, GPR}, {0, 32, GPR}, // <-- Same entry 3x
+  /// /*<2x32-bit> vadd {0, 64, VPR}
+  /// }; // PartialMapping duplicated.
+  ///
+  /// ValueMapping[] {
+  ///   /*plain 32-bit add*/ {&PartialMapping[0], 1},
+  ///   /*expanded vadd on 2xadd*/ {&PartialMapping[1], 2},
+  ///   /*plain <2x32-bit> vadd*/ {&PartialMapping[3], 1}
+  /// };
+  /// \endcode
+  ///
+  /// With the array of pointer, we would have:
+  /// \code
+  /// PartialMapping[] = {
+  /// /*32-bit add*/ {0, 32, GPR},
+  /// /*<2x32-bit> vadd {0, 64, VPR}
+  /// }; // No more duplication.
+  ///
+  /// BreakDowns[] = {
+  /// /*AddBreakDown*/ &PartialMapping[0],
+  /// /*2xAddBreakDown*/ &PartialMapping[0], &PartialMapping[0],
+  /// /*VAddBreakDown*/ &PartialMapping[1]
+  /// }; // Addresses of PartialMapping duplicated (smaller).
+  ///
+  /// ValueMapping[] {
+  ///   /*plain 32-bit add*/ {&BreakDowns[0], 1},
+  ///   /*expanded vadd on 2xadd*/ {&BreakDowns[1], 2},
+  ///   /*plain <2x32-bit> vadd*/ {&BreakDowns[3], 1}
+  /// };
+  /// \endcode
+  ///
+  /// Given that a PartialMapping is actually small, the code size
+  /// impact is actually a degradation. Moreover the compile time will
+  /// be hit by the additional indirection.
+  /// If PartialMapping gets bigger we may reconsider.
   struct ValueMapping {
     /// How the value is broken down between the different register banks.
-    SmallVector<PartialMapping, 2> BreakDown;
+    const PartialMapping *BreakDown;
+
+    /// Number of partial mapping to break down this value.
+    unsigned NumBreakDowns;
+
+    /// The default constructor creates an invalid (isValid() == false)
+    /// instance.
+    ValueMapping() : ValueMapping(nullptr, 0) {}
 
-    /// Verify that this mapping makes sense for a value of \p ExpectedBitWidth.
+    /// Initialize a ValueMapping with the given parameter.
+    /// \p BreakDown needs to have a life time at least as long
+    /// as this instance.
+    ValueMapping(const PartialMapping *BreakDown, unsigned NumBreakDowns)
+        : BreakDown(BreakDown), NumBreakDowns(NumBreakDowns) {}
+
+    /// Iterators through the PartialMappings.
+    const PartialMapping *begin() const { return BreakDown; }
+    const PartialMapping *end() const { return BreakDown + NumBreakDowns; }
+
+    /// Check if this ValueMapping is valid.
+    bool isValid() const { return BreakDown && NumBreakDowns; }
+
+    /// Verify that this mapping makes sense for a value of
+    /// \p MeaningfulBitWidth.
     /// \note This method does not check anything when assertions are disabled.
     ///
     /// \return True is the check was successful.
-    bool verify(unsigned ExpectedBitWidth) const;
+    bool verify(unsigned MeaningfulBitWidth) const;
 
     /// Print this on dbgs() stream.
     void dump() const;
@@ -109,11 +184,11 @@ public:
     /// Cost of this mapping.
     unsigned Cost;
     /// Mapping of all the operands.
-    std::unique_ptr<ValueMapping[]> OperandsMapping;
+    const ValueMapping *OperandsMapping;
     /// Number of operands.
     unsigned NumOperands;
 
-    ValueMapping &getOperandMapping(unsigned i) {
+    const ValueMapping &getOperandMapping(unsigned i) {
       assert(i < getNumOperands() && "Out of bound operand");
       return OperandsMapping[i];
     }
@@ -127,11 +202,13 @@ public:
     /// at the index i.
     ///
     /// \pre ID != InvalidMappingID
-    InstructionMapping(unsigned ID, unsigned Cost, unsigned NumOperands)
-        : ID(ID), Cost(Cost), NumOperands(NumOperands) {
+    InstructionMapping(unsigned ID, unsigned Cost,
+                       const ValueMapping *OperandsMapping,
+                       unsigned NumOperands)
+        : ID(ID), Cost(Cost), OperandsMapping(OperandsMapping),
+          NumOperands(NumOperands) {
       assert(getID() != InvalidMappingID &&
              "Use the default constructor for invalid mapping");
-      OperandsMapping.reset(new ValueMapping[getNumOperands()]);
     }
 
     /// Default constructor.
@@ -148,25 +225,26 @@ public:
     unsigned getNumOperands() const { return NumOperands; }
 
     /// Get the value mapping of the ith operand.
+    /// \pre The mapping for the ith operand has been set.
+    /// \pre The ith operand is a register.
     const ValueMapping &getOperandMapping(unsigned i) const {
-      return const_cast<InstructionMapping *>(this)->getOperandMapping(i);
+      const ValueMapping &ValMapping =
+          const_cast<InstructionMapping *>(this)->getOperandMapping(i);
+      return ValMapping;
     }
 
-    /// Get the value mapping of the ith operand.
-    void setOperandMapping(unsigned i, const ValueMapping &ValMapping) {
-      getOperandMapping(i) = ValMapping;
+    /// Set the mapping for all the operands.
+    /// In other words, OpdsMapping should hold at least getNumOperands
+    /// ValueMapping.
+    void setOperandsMapping(const ValueMapping *OpdsMapping) {
+      OperandsMapping = OpdsMapping;
     }
 
     /// Check whether this object is valid.
     /// This is a lightweight check for obvious wrong instance.
-    bool isValid() const { return getID() != InvalidMappingID; }
-
-    /// Set the operand mapping for the \p OpIdx-th operand.
-    /// The mapping will consist of only one element in the break down list.
-    /// This element will map to \p RegBank and fully define a mask, whose
-    /// bitwidth matches the size of \p MaskSize.
-    void setOperandMapping(unsigned OpIdx, unsigned MaskSize,
-                           const RegisterBank &RegBank);
+    bool isValid() const {
+      return getID() != InvalidMappingID && OperandsMapping;
+    }
 
     /// Verifiy that this mapping makes sense for \p MI.
     /// \pre \p MI must be connected to a MachineFunction.
@@ -188,12 +266,13 @@ public:
   /// \todo When we move to TableGen this should be an array ref.
   typedef SmallVector<InstructionMapping, 4> InstructionMappings;
 
-  /// Helper class use to get/create the virtual registers that will be used
+  /// Helper class used to get/create the virtual registers that will be used
   /// to replace the MachineOperand when applying a mapping.
   class OperandsMapper {
     /// The OpIdx-th cell contains the index in NewVRegs where the VRegs of the
     /// OpIdx-th operand starts. -1 means we do not have such mapping yet.
-    std::unique_ptr<int[]> OpToNewVRegIdx;
+    /// Note: We use a SmallVector to avoid heap allocation for most cases.
+    SmallVector<int, 8> OpToNewVRegIdx;
     /// Hold the registers that will be used to map MI with InstrMapping.
     SmallVector<unsigned, 8> NewVRegs;
     /// Current MachineRegisterInfo, used to create new virtual registers.
@@ -287,12 +366,21 @@ public:
 
 protected:
   /// Hold the set of supported register banks.
-  std::unique_ptr<RegisterBank[]> RegBanks;
+  RegisterBank **RegBanks;
   /// Total number of register banks.
   unsigned NumRegBanks;
 
-  /// Mapping from MVT::SimpleValueType to register banks.
-  std::unique_ptr<const RegisterBank *[]> VTToRegBank;
+  /// Keep dynamically allocated PartialMapping in a separate map.
+  /// This shouldn't be needed when everything gets TableGen'ed.
+  mutable DenseMap<unsigned, const PartialMapping *> MapOfPartialMappings;
+
+  /// Keep dynamically allocated ValueMapping in a separate map.
+  /// This shouldn't be needed when everything gets TableGen'ed.
+  mutable DenseMap<unsigned, const ValueMapping *> MapOfValueMappings;
+
+  /// Keep dynamically allocated array of ValueMapping in a separate map.
+  /// This shouldn't be needed when everything gets TableGen'ed.
+  mutable DenseMap<unsigned, ValueMapping *> MapOfOperandsMappings;
 
   /// Create a RegisterBankInfo that can accomodate up to \p NumRegBanks
   /// RegisterBank instances.
@@ -300,7 +388,7 @@ protected:
   /// \note For the verify method to succeed all the \p NumRegBanks
   /// must be initialized by createRegisterBank and updated with
   /// addRegBankCoverage RegisterBank.
-  RegisterBankInfo(unsigned NumRegBanks);
+  RegisterBankInfo(RegisterBank **RegBanks, unsigned NumRegBanks);
 
   /// This constructor is meaningless.
   /// It just provides a default constructor that can be used at link time
@@ -325,14 +413,6 @@ protected:
   /// It also adjusts the size of the register bank to reflect the maximal
   /// size of a value that can be hold into that register bank.
   ///
-  /// If \p AddTypeMapping is true, this method also records what types can
-  /// be mapped to \p ID. Although this done by default, targets may want to
-  /// disable it, espicially if a given type may be mapped on different
-  /// register bank. Indeed, in such case, this method only records the
-  /// first register bank where the type matches.
-  /// This information is only used to provide default mapping
-  /// (see getInstrMappingImpl).
-  ///
   /// \note This method does *not* add the super classes of \p RCId.
   /// The rationale is if \p ID covers the registers of \p RCId, that
   /// does not necessarily mean that \p ID covers the set of registers
@@ -343,43 +423,12 @@ protected:
   ///
   /// \todo TableGen should just generate the BitSet vector for us.
   void addRegBankCoverage(unsigned ID, unsigned RCId,
-                          const TargetRegisterInfo &TRI,
-                          bool AddTypeMapping = true);
+                          const TargetRegisterInfo &TRI);
 
   /// Get the register bank identified by \p ID.
   RegisterBank &getRegBank(unsigned ID) {
     assert(ID < getNumRegBanks() && "Accessing an unknown register bank");
-    return RegBanks[ID];
-  }
-
-  /// Get the register bank that has been recorded to cover \p SVT.
-  const RegisterBank *getRegBankForType(MVT::SimpleValueType SVT) const {
-    if (!VTToRegBank)
-      return nullptr;
-    assert(SVT < MVT::SimpleValueType::LAST_VALUETYPE && "Out-of-bound access");
-    return VTToRegBank.get()[SVT];
-  }
-
-  /// Record \p RegBank as the register bank that covers \p SVT.
-  /// If a record was already set for \p SVT, the mapping is not
-  /// updated, unless \p Force == true
-  ///
-  /// \post if getRegBankForType(SVT)\@pre == nullptr then
-  ///                       getRegBankForType(SVT) == &RegBank
-  /// \post if Force == true then getRegBankForType(SVT) == &RegBank
-  void recordRegBankForType(const RegisterBank &RegBank,
-                            MVT::SimpleValueType SVT, bool Force = false) {
-    if (!VTToRegBank) {
-      VTToRegBank.reset(
-          new const RegisterBank *[MVT::SimpleValueType::LAST_VALUETYPE]);
-      std::fill(&VTToRegBank[0],
-                &VTToRegBank[MVT::SimpleValueType::LAST_VALUETYPE], nullptr);
-    }
-    assert(SVT < MVT::SimpleValueType::LAST_VALUETYPE && "Out-of-bound access");
-    // If we want to override the mapping or the mapping does not exits yet,
-    // set the register bank for SVT.
-    if (Force || !getRegBankForType(SVT))
-      VTToRegBank.get()[SVT] = &RegBank;
+    return *RegBanks[ID];
   }
 
   /// Try to get the mapping of \p MI.
@@ -393,14 +442,65 @@ protected:
   ///
   /// This implementation is able to get the mapping of:
   /// - Target specific instructions by looking at the encoding constraints.
-  /// - Any instruction if all the register operands are already been assigned
+  /// - Any instruction if all the register operands have already been assigned
   ///   a register, a register class, or a register bank.
-  /// - Copies and phis if at least one of the operand has been assigned a
+  /// - Copies and phis if at least one of the operands has been assigned a
   ///   register, a register class, or a register bank.
   /// In other words, this method will likely fail to find a mapping for
   /// any generic opcode that has not been lowered by target specific code.
   InstructionMapping getInstrMappingImpl(const MachineInstr &MI) const;
 
+  /// Get the uniquely generated PartialMapping for the
+  /// given arguments.
+  const PartialMapping &getPartialMapping(unsigned StartIdx, unsigned Length,
+                                          const RegisterBank &RegBank) const;
+
+  /// Methods to get a uniquely generated ValueMapping.
+  /// @{
+
+  /// The most common ValueMapping consists of a single PartialMapping.
+  /// Feature a method for that.
+  const ValueMapping &getValueMapping(unsigned StartIdx, unsigned Length,
+                                      const RegisterBank &RegBank) const;
+
+  /// Get the ValueMapping for the given arguments.
+  const ValueMapping &getValueMapping(const PartialMapping *BreakDown,
+                                      unsigned NumBreakDowns) const;
+  /// @}
+
+  /// Methods to get a uniquely generated array of ValueMapping.
+  /// @{
+
+  /// Get the uniquely generated array of ValueMapping for the
+  /// elements of between \p Begin and \p End.
+  ///
+  /// Elements that are nullptr will be replaced by
+  /// invalid ValueMapping (ValueMapping::isValid == false).
+  ///
+  /// \pre The pointers on ValueMapping between \p Begin and \p End
+  /// must uniquely identify a ValueMapping. Otherwise, there is no
+  /// guarantee that the return instance will be unique, i.e., another
+  /// OperandsMapping could have the same content.
+  template <typename Iterator>
+  const ValueMapping *getOperandsMapping(Iterator Begin, Iterator End) const;
+
+  /// Get the uniquely generated array of ValueMapping for the
+  /// elements of \p OpdsMapping.
+  ///
+  /// Elements of \p OpdsMapping that are nullptr will be replaced by
+  /// invalid ValueMapping (ValueMapping::isValid == false).
+  const ValueMapping *getOperandsMapping(
+      const SmallVectorImpl<const ValueMapping *> &OpdsMapping) const;
+
+  /// Get the uniquely generated array of ValueMapping for the
+  /// given arguments.
+  ///
+  /// Arguments that are nullptr will be replaced by invalid
+  /// ValueMapping (ValueMapping::isValid == false).
+  const ValueMapping *getOperandsMapping(
+      std::initializer_list<const ValueMapping *> OpdsMapping) const;
+  /// @}
+
   /// Get the register bank for the \p OpIdx-th operand of \p MI form
   /// the encoding constraints, if any.
   ///
@@ -429,7 +529,7 @@ protected:
   }
 
 public:
-  virtual ~RegisterBankInfo() {}
+  virtual ~RegisterBankInfo();
 
   /// Get the register bank identified by \p ID.
   const RegisterBank &getRegBank(unsigned ID) const {
@@ -479,6 +579,15 @@ public:
     return &A != &B;
   }
 
+  /// Constrain the (possibly generic) virtual register \p Reg to \p RC.
+  ///
+  /// \pre \p Reg is a virtual register that either has a bank or a class.
+  /// \returns The constrained register class, or nullptr if there is none.
+  /// \note This is a generic variant of MachineRegisterInfo::constrainRegClass
+  static const TargetRegisterClass *
+  constrainGenericRegister(unsigned Reg, const TargetRegisterClass &RC,
+                           MachineRegisterInfo &MRI);
+
   /// Identifier used when the related instruction mapping instance
   /// is generated by target independent code.
   /// Make sure not to use that identifier to avoid possible collision.
@@ -494,7 +603,7 @@ public:
   /// This mapping should be the direct translation of \p MI.
   /// In other words, when \p MI is mapped with the returned mapping,
   /// only the register banks of the operands of \p MI need to be updated.
-  /// In particular, neither the opcode or the type of \p MI needs to be
+  /// In particular, neither the opcode nor the type of \p MI needs to be
   /// updated for this direct mapping.
   ///
   /// The target independent implementation gives a mapping based on
@@ -597,6 +706,10 @@ operator<<(raw_ostream &OS, const RegisterBankInfo::OperandsMapper &OpdMapper) {
   OpdMapper.print(OS, /*ForDebug*/ false);
   return OS;
 }
+
+/// Hashing function for PartialMapping.
+/// It is required for the hashing of ValueMapping.
+hash_code hash_value(const RegisterBankInfo::PartialMapping &PartMapping);
 } // End namespace llvm.
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
new file mode 100644
index 000000000000..f5d5f5cdf0cd
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -0,0 +1,43 @@
+//==-- llvm/CodeGen/GlobalISel/Utils.h ---------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file declares the API of helper functions used throughout the
+/// GlobalISel pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_UTILS_H
+#define LLVM_CODEGEN_GLOBALISEL_UTILS_H
+
+namespace llvm {
+
+class MachineFunction;
+class MachineInstr;
+class MachineRegisterInfo;
+class MCInstrDesc;
+class RegisterBankInfo;
+class TargetInstrInfo;
+class TargetRegisterInfo;
+
+/// Try to constrain Reg so that it is usable by argument OpIdx of the
+/// provided MCInstrDesc \p II. If this fails, create a new virtual
+/// register in the correct class and insert a COPY before \p InsertPt.
+/// The debug location of \p InsertPt is used for the new copy.
+///
+/// \return The virtual register constrained to the right register class.
+unsigned constrainOperandRegClass(const MachineFunction &MF,
+                                  const TargetRegisterInfo &TRI,
+                                  MachineRegisterInfo &MRI,
+                                  const TargetInstrInfo &TII,
+                                  const RegisterBankInfo &RBI,
+                                  MachineInstr &InsertPt, const MCInstrDesc &II,
+                                  unsigned Reg, unsigned OpIdx);
+
+} // End namespace llvm.
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
index e7a9ced25099..420b03ec02bd 100644
--- a/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -70,7 +70,7 @@ namespace ISD {
     /// of the frame or return address to return.  An index of zero corresponds
     /// to the current function's frame or return address, an index of one to
     /// the parent's frame or return address, and so on.
-    FRAMEADDR, RETURNADDR,
+    FRAMEADDR, RETURNADDR, ADDROFRETURNADDR,
 
     /// LOCAL_RECOVER - Represents the llvm.localrecover intrinsic.
     /// Materializes the offset from the local object pointer of another
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
index 04e840dea2ca..a86706223261 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveInterval.h
@@ -23,7 +23,6 @@
 
 #include "llvm/ADT/IntEqClasses.h"
 #include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
@@ -58,7 +57,7 @@ namespace llvm {
       : id(i), def(d)
     { }
 
-    /// VNInfo construtor, copies values from orig, except for the value number.
+    /// VNInfo constructor, copies values from orig, except for the value number.
     VNInfo(unsigned i, const VNInfo &orig)
       : id(i), def(orig.def)
     { }
@@ -316,6 +315,10 @@ namespace llvm {
     /// add liveness for a dead def.
     VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator);
 
+    /// Create a def of value @p VNI. Return @p VNI. If there already exists
+    /// a definition at VNI->def, the value defined there must be @p VNI.
+    VNInfo *createDeadDef(VNInfo *VNI);
+
     /// Create a copy of the given value. The new value will be identical except
     /// for the Value number.
     VNInfo *createValueCopy(const VNInfo *orig,
@@ -451,10 +454,29 @@ namespace llvm {
     /// may have grown since it was inserted).
     iterator addSegment(Segment S);
 
+    /// Attempt to extend a value defined after @p StartIdx to include @p Use.
+    /// Both @p StartIdx and @p Use should be in the same basic block. In case
+    /// of subranges, an extension could be prevented by an explicit "undef"
+    /// caused by a <def,read-undef> on a non-overlapping lane. The list of
+    /// location of such "undefs" should be provided in @p Undefs.
+    /// The return value is a pair: the first element is VNInfo of the value
+    /// that was extended (possibly nullptr), the second is a boolean value
+    /// indicating whether an "undef" was encountered.
     /// If this range is live before @p Use in the basic block that starts at
-    /// @p StartIdx, extend it to be live up to @p Use, and return the value. If
-    /// there is no segment before @p Use, return nullptr.
-    VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Use);
+    /// @p StartIdx, and there is no intervening "undef", extend it to be live
+    /// up to @p Use, and return the pair {value, false}. If there is no
+    /// segment before @p Use and there is no "undef" between @p StartIdx and
+    /// @p Use, return {nullptr, false}. If there is an "undef" before @p Use,
+    /// return {nullptr, true}.
+    std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs,
+        SlotIndex StartIdx, SlotIndex Use);
+
+    /// Simplified version of the above "extendInBlock", which assumes that
+    /// no register lanes are undefined by <def,read-undef> operands.
+    /// If this range is live before @p Use in the basic block that starts
+    /// at @p StartIdx, extend it to be live up to @p Use, and return the
+    /// value. If there is no segment before @p Use, return nullptr.
+    VNInfo *extendInBlock(SlotIndex StartIdx, SlotIndex Kill);
 
     /// join - Join two live ranges (this, and other) together.  This applies
     /// mappings to the value numbers in the LHS/RHS ranges as specified.  If
@@ -555,6 +577,16 @@ namespace llvm {
       return thisIndex < otherIndex;
     }
 
+    /// Returns true if there is an explicit "undef" between @p Begin
+    /// @p End.
+    bool isUndefIn(ArrayRef<SlotIndex> Undefs, SlotIndex Begin,
+                   SlotIndex End) const {
+      return std::any_of(Undefs.begin(), Undefs.end(),
+                [Begin,End] (SlotIndex Idx) -> bool {
+                  return Begin <= Idx && Idx < End;
+                });
+    }
+
     /// Flush segment set into the regular segment vector.
     /// The method is to be called after the live range
     /// has been created, if use of the segment set was
@@ -581,7 +613,6 @@ namespace llvm {
     friend class LiveRangeUpdater;
     void addSegmentToSet(Segment S);
     void markValNoForDeletion(VNInfo *V);
-
   };
 
   inline raw_ostream &operator<<(raw_ostream &OS, const LiveRange &LR) {
@@ -641,7 +672,7 @@ namespace llvm {
         P = P->Next;
         return *this;
       }
-      SingleLinkedListIterator<T> &operator++(int) {
+      SingleLinkedListIterator<T> operator++(int) {
         SingleLinkedListIterator res = *this;
         ++*this;
         return res;
@@ -729,6 +760,13 @@ namespace llvm {
       weight = llvm::huge_valf;
     }
 
+    /// For a given lane mask @p LaneMask, compute indexes at which the
+    /// lane is marked undefined by subregister <def,read-undef> definitions.
+    void computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
+                               LaneBitmask LaneMask,
+                               const MachineRegisterInfo &MRI,
+                               const SlotIndexes &Indexes) const;
+
     bool operator<(const LiveInterval& other) const {
       const SlotIndex &thisIndex = beginIndex();
       const SlotIndex &otherIndex = other.beginIndex();
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
index d4ee0582cc41..f8dc52566dc0 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveIntervalAnalysis.h
@@ -163,16 +163,24 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     /// LiveInterval::removeEmptySubranges() afterwards.
     void shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg);
 
-    /// extendToIndices - Extend the live range of LI to reach all points in
-    /// Indices. The points in the Indices array must be jointly dominated by
-    /// existing defs in LI. PHI-defs are added as needed to maintain SSA form.
+    /// Extend the live range @p LR to reach all points in @p Indices. The
+    /// points in the @p Indices array must be jointly dominated by the union
+    /// of the existing defs in @p LR and points in @p Undefs.
     ///
-    /// If a SlotIndex in Indices is the end index of a basic block, LI will be
-    /// extended to be live out of the basic block.
+    /// PHI-defs are added as needed to maintain SSA form.
+    ///
+    /// If a SlotIndex in @p Indices is the end index of a basic block, @p LR
+    /// will be extended to be live out of the basic block.
+    /// If a SlotIndex in @p Indices is jointy dominated only by points in
+    /// @p Undefs, the live range will not be extended to that point.
     ///
     /// See also LiveRangeCalc::extend().
-    void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices);
+    void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices,
+                         ArrayRef<SlotIndex> Undefs);
 
+    void extendToIndices(LiveRange &LR, ArrayRef<SlotIndex> Indices) {
+      extendToIndices(LR, Indices, /*Undefs=*/{});
+    }
 
     /// If @p LR has a live value at @p Kill, prune its live range by removing
     /// any liveness reachable from Kill. Add live range end points to
@@ -253,8 +261,8 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
       Indexes->removeMachineInstrFromMaps(MI);
     }
 
-    void ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
-      Indexes->replaceMachineInstrInMaps(MI, NewMI);
+    SlotIndex ReplaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
+      return Indexes->replaceMachineInstrInMaps(MI, NewMI);
     }
 
     VNInfo::Allocator& getVNInfoAllocator() { return VNInfoAllocator; }
@@ -392,6 +400,13 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
       return RegUnitRanges[Unit];
     }
 
+    /// removeRegUnit - Remove computed live range for Unit. Subsequent uses
+    /// should rely on on-demand recomputation.
+    void removeRegUnit(unsigned Unit) {
+      delete RegUnitRanges[Unit];
+      RegUnitRanges[Unit] = nullptr;
+    }
+
     /// Remove value numbers and related live segments starting at position
     /// @p Pos that are part of any liverange of physical register @p Reg or one
     /// of its subregisters.
@@ -444,7 +459,8 @@ extern cl::opt<bool> UseSegmentSetForPhysRegs;
     void repairOldRegInRange(MachineBasicBlock::iterator Begin,
                              MachineBasicBlock::iterator End,
                              const SlotIndex endIdx, LiveRange &LR,
-                             unsigned Reg, LaneBitmask LaneMask = ~0u);
+                             unsigned Reg,
+                             LaneBitmask LaneMask = LaneBitmask::getAll());
 
     class HMEditor;
   };
diff --git a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
index 1cea9d5b90d6..9e04c467fadc 100644
--- a/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
+++ b/contrib/llvm/include/llvm/CodeGen/LivePhysRegs.h
@@ -31,8 +31,10 @@
 
 #include "llvm/ADT/SparseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include <cassert>
+#include <utility>
 
 namespace llvm {
 
@@ -41,14 +43,15 @@ class MachineInstr;
 /// \brief A set of live physical registers with functions to track liveness
 /// when walking backward/forward through a basic block.
 class LivePhysRegs {
-  const TargetRegisterInfo *TRI;
+  const TargetRegisterInfo *TRI = nullptr;
   SparseSet<unsigned> LiveRegs;
 
   LivePhysRegs(const LivePhysRegs&) = delete;
   LivePhysRegs &operator=(const LivePhysRegs&) = delete;
+
 public:
   /// \brief Constructs a new empty LivePhysRegs set.
-  LivePhysRegs() : TRI(nullptr), LiveRegs() {}
+  LivePhysRegs() = default;
 
   /// \brief Constructs and initialize an empty LivePhysRegs set.
   LivePhysRegs(const TargetRegisterInfo *TRI) : TRI(TRI) {
@@ -57,11 +60,10 @@ public:
   }
 
   /// \brief Clear and initialize the LivePhysRegs set.
-  void init(const TargetRegisterInfo *TRI) {
-    assert(TRI && "Invalid TargetRegisterInfo pointer.");
-    this->TRI = TRI;
+  void init(const TargetRegisterInfo &TRI) {
+    this->TRI = &TRI;
     LiveRegs.clear();
-    LiveRegs.setUniverse(TRI->getNumRegs());
+    LiveRegs.setUniverse(TRI.getNumRegs());
   }
 
   /// \brief Clears the LivePhysRegs set.
@@ -141,6 +143,11 @@ public:
 
   /// \brief Dumps the currently live registers to the debug output.
   void dump() const;
+
+private:
+  /// Adds live-in registers from basic block @p MBB, taking associated
+  /// lane masks into consideration.
+  void addBlockLiveIns(const MachineBasicBlock &MBB);
 };
 
 inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
@@ -148,6 +155,13 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
   return OS;
 }
 
-} // namespace llvm
+/// Compute the live-in list for \p MBB assuming all of its successors live-in
+/// lists are up-to-date. Uses the given LivePhysReg instance \p LiveRegs; This
+/// is just here to avoid repeated heap allocations when calling this multiple
+/// times in a pass.
+void computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
+                    MachineBasicBlock &MBB);
+
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_LIVEPHYSREGS_H
diff --git a/contrib/llvm/include/llvm/CodeGen/LiveVariables.h b/contrib/llvm/include/llvm/CodeGen/LiveVariables.h
index bc210dda08c0..d6e947c03dbd 100644
--- a/contrib/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/contrib/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -92,8 +92,7 @@ public:
     /// machine instruction. Returns true if there was a kill
     /// corresponding to this instruction, false otherwise.
     bool removeKill(MachineInstr &MI) {
-      std::vector<MachineInstr *>::iterator I =
-          std::find(Kills.begin(), Kills.end(), &MI);
+      std::vector<MachineInstr *>::iterator I = find(Kills, &MI);
       if (I == Kills.end())
         return false;
       Kills.erase(I);
diff --git a/contrib/llvm/include/llvm/CodeGen/LowLevelType.h b/contrib/llvm/include/llvm/CodeGen/LowLevelType.h
new file mode 100644
index 000000000000..b8885c3a95fd
--- /dev/null
+++ b/contrib/llvm/include/llvm/CodeGen/LowLevelType.h
@@ -0,0 +1,206 @@
+//== llvm/CodeGen/GlobalISel/LowLevelType.h -------------------- -*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// Implement a low-level type suitable for MachineInstr level instruction
+/// selection.
+///
+/// For a type attached to a MachineInstr, we only care about 2 details: total
+/// size and the number of vector lanes (if any). Accordingly, there are 4
+/// possible valid type-kinds:
+///
+///    * `sN` for scalars and aggregates
+///    * `<N x sM>` for vectors, which must have at least 2 elements.
+///    * `pN` for pointers
+///
+/// Other information required for correct selection is expected to be carried
+/// by the opcode, or non-type flags. For example the distinction between G_ADD
+/// and G_FADD for int/float or fast-math flags.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOWLEVELTYPE_H
+#define LLVM_CODEGEN_GLOBALISEL_LOWLEVELTYPE_H
+
+#include <cassert>
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+
+class DataLayout;
+class LLVMContext;
+class Type;
+class raw_ostream;
+
+class LLT {
+public:
+  enum TypeKind : uint16_t {
+    Invalid,
+    Scalar,
+    Pointer,
+    Vector,
+  };
+
+  /// Get a low-level scalar or aggregate "bag of bits".
+  static LLT scalar(unsigned SizeInBits) {
+    assert(SizeInBits > 0 && "invalid scalar size");
+    return LLT{Scalar, 1, SizeInBits};
+  }
+
+  /// Get a low-level pointer in the given address space (defaulting to 0).
+  static LLT pointer(uint16_t AddressSpace, unsigned SizeInBits) {
+    return LLT{Pointer, AddressSpace, SizeInBits};
+  }
+
+  /// Get a low-level vector of some number of elements and element width.
+  /// \p NumElements must be at least 2.
+  static LLT vector(uint16_t NumElements, unsigned ScalarSizeInBits) {
+    assert(NumElements > 1 && "invalid number of vector elements");
+    return LLT{Vector, NumElements, ScalarSizeInBits};
+  }
+
+  /// Get a low-level vector of some number of elements and element type.
+  static LLT vector(uint16_t NumElements, LLT ScalarTy) {
+    assert(NumElements > 1 && "invalid number of vector elements");
+    assert(ScalarTy.isScalar() && "invalid vector element type");
+    return LLT{Vector, NumElements, ScalarTy.getSizeInBits()};
+  }
+
+  explicit LLT(TypeKind Kind, uint16_t NumElements, unsigned SizeInBits)
+    : SizeInBits(SizeInBits), ElementsOrAddrSpace(NumElements), Kind(Kind) {
+    assert((Kind != Vector || ElementsOrAddrSpace > 1) &&
+           "invalid number of vector elements");
+  }
+
+  explicit LLT() : SizeInBits(0), ElementsOrAddrSpace(0), Kind(Invalid) {}
+
+  /// Construct a low-level type based on an LLVM type.
+  explicit LLT(Type &Ty, const DataLayout &DL);
+
+  explicit LLT(MVT VT);
+
+  bool isValid() const { return Kind != Invalid; }
+
+  bool isScalar() const { return Kind == Scalar; }
+
+  bool isPointer() const { return Kind == Pointer; }
+
+  bool isVector() const { return Kind == Vector; }
+
+  /// Returns the number of elements in a vector LLT. Must only be called on
+  /// vector types.
+  uint16_t getNumElements() const {
+    assert(isVector() && "cannot get number of elements on scalar/aggregate");
+    return ElementsOrAddrSpace;
+  }
+
+  /// Returns the total size of the type. Must only be called on sized types.
+  unsigned getSizeInBits() const {
+    if (isPointer() || isScalar())
+      return SizeInBits;
+    return SizeInBits * ElementsOrAddrSpace;
+  }
+
+  unsigned getScalarSizeInBits() const {
+    return SizeInBits;
+  }
+
+  unsigned getAddressSpace() const {
+    assert(isPointer() && "cannot get address space of non-pointer type");
+    return ElementsOrAddrSpace;
+  }
+
+  /// Returns the vector's element type. Only valid for vector types.
+  LLT getElementType() const {
+    assert(isVector() && "cannot get element type of scalar/aggregate");
+    return scalar(SizeInBits);
+  }
+
+  /// Get a low-level type with half the size of the original, by halving the
+  /// size of the scalar type involved. For example `s32` will become `s16`,
+  /// `<2 x s32>` will become `<2 x s16>`.
+  LLT halfScalarSize() const {
+    assert(!isPointer() && getScalarSizeInBits() > 1 &&
+           getScalarSizeInBits() % 2 == 0 && "cannot half size of this type");
+    return LLT{Kind, ElementsOrAddrSpace, SizeInBits / 2};
+  }
+
+  /// Get a low-level type with twice the size of the original, by doubling the
+  /// size of the scalar type involved. For example `s32` will become `s64`,
+  /// `<2 x s32>` will become `<2 x s64>`.
+  LLT doubleScalarSize() const {
+    assert(!isPointer() && "cannot change size of this type");
+    return LLT{Kind, ElementsOrAddrSpace, SizeInBits * 2};
+  }
+
+  /// Get a low-level type with half the size of the original, by halving the
+  /// number of vector elements of the scalar type involved. The source must be
+  /// a vector type with an even number of elements. For example `<4 x s32>`
+  /// will become `<2 x s32>`, `<2 x s32>` will become `s32`.
+  LLT halfElements() const {
+    assert(isVector() && ElementsOrAddrSpace % 2 == 0 &&
+           "cannot half odd vector");
+    if (ElementsOrAddrSpace == 2)
+      return scalar(SizeInBits);
+
+    return LLT{Vector, static_cast<uint16_t>(ElementsOrAddrSpace / 2),
+               SizeInBits};
+  }
+
+  /// Get a low-level type with twice the size of the original, by doubling the
+  /// number of vector elements of the scalar type involved. The source must be
+  /// a vector type. For example `<2 x s32>` will become `<4 x s32>`. Doubling
+  /// the number of elements in sN produces <2 x sN>.
+  LLT doubleElements() const {
+    assert(!isPointer() && "cannot double elements in pointer");
+    return LLT{Vector, static_cast<uint16_t>(ElementsOrAddrSpace * 2),
+               SizeInBits};
+  }
+
+  void print(raw_ostream &OS) const;
+
+  bool operator==(const LLT &RHS) const {
+    return Kind == RHS.Kind && SizeInBits == RHS.SizeInBits &&
+           ElementsOrAddrSpace == RHS.ElementsOrAddrSpace;
+  }
+
+  bool operator!=(const LLT &RHS) const { return !(*this == RHS); }
+
+  friend struct DenseMapInfo<LLT>;
+private:
+  unsigned SizeInBits;
+  uint16_t ElementsOrAddrSpace;
+  TypeKind Kind;
+};
+
+inline raw_ostream& operator<<(raw_ostream &OS, const LLT &Ty) {
+  Ty.print(OS);
+  return OS;
+}
+
+template<> struct DenseMapInfo<LLT> {
+  static inline LLT getEmptyKey() {
+    return LLT{LLT::Invalid, 0, -1u};
+  }
+  static inline LLT getTombstoneKey() {
+    return LLT{LLT::Invalid, 0, -2u};
+  }
+  static inline unsigned getHashValue(const LLT &Ty) {
+    uint64_t Val = ((uint64_t)Ty.SizeInBits << 32) |
+                   ((uint64_t)Ty.ElementsOrAddrSpace << 16) | (uint64_t)Ty.Kind;
+    return DenseMapInfo<uint64_t>::getHashValue(Val);
+  }
+  static bool isEqual(const LLT &LHS, const LLT &RHS) {
+    return LHS == RHS;
+  }
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
index 7f9c44833336..778f72c06e65 100644
--- a/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
+++ b/contrib/llvm/include/llvm/CodeGen/MIRYamlMapping.h
@@ -381,13 +381,12 @@ struct MachineFunction {
   StringRef Name;
   unsigned Alignment = 0;
   bool ExposesReturnsTwice = false;
-  bool HasInlineAsm = false;
-  // MachineFunctionProperties
-  bool AllVRegsAllocated = false;
+  // GISel MachineFunctionProperties.
+  bool Legalized = false;
+  bool RegBankSelected = false;
+  bool Selected = false;
   // Register information
-  bool IsSSA = false;
   bool TracksRegLiveness = false;
-  bool TracksSubRegLiveness = false;
   std::vector<VirtualRegisterDefinition> VirtualRegisters;
   std::vector<MachineFunctionLiveIn> LiveIns;
   Optional<std::vector<FlowStringValue>> CalleeSavedRegisters;
@@ -406,11 +405,10 @@ template <> struct MappingTraits<MachineFunction> {
     YamlIO.mapRequired("name", MF.Name);
     YamlIO.mapOptional("alignment", MF.Alignment);
     YamlIO.mapOptional("exposesReturnsTwice", MF.ExposesReturnsTwice);
-    YamlIO.mapOptional("hasInlineAsm", MF.HasInlineAsm);
-    YamlIO.mapOptional("allVRegsAllocated", MF.AllVRegsAllocated);
-    YamlIO.mapOptional("isSSA", MF.IsSSA);
+    YamlIO.mapOptional("legalized", MF.Legalized);
+    YamlIO.mapOptional("regBankSelected", MF.RegBankSelected);
+    YamlIO.mapOptional("selected", MF.Selected);
     YamlIO.mapOptional("tracksRegLiveness", MF.TracksRegLiveness);
-    YamlIO.mapOptional("tracksSubRegLiveness", MF.TracksSubRegLiveness);
     YamlIO.mapOptional("registers", MF.VirtualRegisters);
     YamlIO.mapOptional("liveins", MF.LiveIns);
     YamlIO.mapOptional("calleeSavedRegisters", MF.CalleeSavedRegisters);
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 2923371c1005..be811c6fe437 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineInstrBundleIterator.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/Support/BranchProbability.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/DataTypes.h"
 #include <functional>
@@ -35,36 +36,21 @@ class StringRef;
 class raw_ostream;
 class MachineBranchProbabilityInfo;
 
-// Forward declaration to avoid circular include problem with TargetRegisterInfo
-typedef unsigned LaneBitmask;
-
-template <>
-struct ilist_traits<MachineInstr> : public ilist_default_traits<MachineInstr> {
+template <> struct ilist_traits<MachineInstr> {
 private:
-  mutable ilist_half_node<MachineInstr> Sentinel;
+  friend class MachineBasicBlock; // Set by the owning MachineBasicBlock.
+  MachineBasicBlock *Parent;
 
-  // this is only set by the MachineBasicBlock owning the LiveList
-  friend class MachineBasicBlock;
-  MachineBasicBlock* Parent;
+  typedef simple_ilist<MachineInstr, ilist_sentinel_tracking<true>>::iterator
+      instr_iterator;
 
 public:
-  MachineInstr *createSentinel() const {
-    return static_cast<MachineInstr*>(&Sentinel);
-  }
-  void destroySentinel(MachineInstr *) const {}
-
-  MachineInstr *provideInitialHead() const { return createSentinel(); }
-  MachineInstr *ensureHead(MachineInstr*) const { return createSentinel(); }
-  static void noteHead(MachineInstr*, MachineInstr*) {}
+  void addNodeToList(MachineInstr *N);
+  void removeNodeFromList(MachineInstr *N);
+  void transferNodesFromList(ilist_traits &OldList, instr_iterator First,
+                             instr_iterator Last);
 
-  void addNodeToList(MachineInstr* N);
-  void removeNodeFromList(MachineInstr* N);
-  void transferNodesFromList(ilist_traits &SrcTraits,
-                             ilist_iterator<MachineInstr> First,
-                             ilist_iterator<MachineInstr> Last);
-  void deleteNode(MachineInstr *N);
-private:
-  void createNode(const MachineInstr &);
+  void deleteNode(MachineInstr *MI);
 };
 
 class MachineBasicBlock
@@ -83,7 +69,7 @@ public:
   };
 
 private:
-  typedef ilist<MachineInstr> Instructions;
+  typedef ilist<MachineInstr, ilist_sentinel_tracking<true>> Instructions;
   Instructions Insts;
   const BasicBlock *BB;
   int Number;
@@ -161,15 +147,14 @@ public:
 
   typedef Instructions::iterator                                 instr_iterator;
   typedef Instructions::const_iterator                     const_instr_iterator;
-  typedef std::reverse_iterator<instr_iterator>          reverse_instr_iterator;
-  typedef
-  std::reverse_iterator<const_instr_iterator>      const_reverse_instr_iterator;
+  typedef Instructions::reverse_iterator reverse_instr_iterator;
+  typedef Instructions::const_reverse_iterator const_reverse_instr_iterator;
 
   typedef MachineInstrBundleIterator<MachineInstr> iterator;
   typedef MachineInstrBundleIterator<const MachineInstr> const_iterator;
-  typedef std::reverse_iterator<const_iterator>          const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>                      reverse_iterator;
-
+  typedef MachineInstrBundleIterator<MachineInstr, true> reverse_iterator;
+  typedef MachineInstrBundleIterator<const MachineInstr, true>
+      const_reverse_iterator;
 
   unsigned size() const { return (unsigned)Insts.size(); }
   bool empty() const { return Insts.empty(); }
@@ -204,10 +189,16 @@ public:
   const_iterator          begin() const { return instr_begin();  }
   iterator                end  ()       { return instr_end();    }
   const_iterator          end  () const { return instr_end();    }
-  reverse_iterator       rbegin()       { return instr_rbegin(); }
-  const_reverse_iterator rbegin() const { return instr_rbegin(); }
-  reverse_iterator       rend  ()       { return instr_rend();   }
-  const_reverse_iterator rend  () const { return instr_rend();   }
+  reverse_iterator rbegin() {
+    return reverse_iterator::getAtBundleBegin(instr_rbegin());
+  }
+  const_reverse_iterator rbegin() const {
+    return const_reverse_iterator::getAtBundleBegin(instr_rbegin());
+  }
+  reverse_iterator rend() { return reverse_iterator(instr_rend()); }
+  const_reverse_iterator rend() const {
+    return const_reverse_iterator(instr_rend());
+  }
 
   /// Support for MachineInstr::getNextNode().
   static Instructions MachineBasicBlock::*getSublistAccess(MachineInstr *) {
@@ -285,7 +276,8 @@ public:
   /// Adds the specified register as a live in. Note that it is an error to add
   /// the same register to the same set more than once unless the intention is
   /// to call sortUniqueLiveIns after all registers are added.
-  void addLiveIn(MCPhysReg PhysReg, LaneBitmask LaneMask = ~0u) {
+  void addLiveIn(MCPhysReg PhysReg,
+                 LaneBitmask LaneMask = LaneBitmask::getAll()) {
     LiveIns.push_back(RegisterMaskPair(PhysReg, LaneMask));
   }
   void addLiveIn(const RegisterMaskPair &RegMaskPair) {
@@ -297,16 +289,21 @@ public:
   /// LiveIn insertion.
   void sortUniqueLiveIns();
 
+  /// Clear live in list.
+  void clearLiveIns();
+
   /// Add PhysReg as live in to this block, and ensure that there is a copy of
   /// PhysReg to a virtual register of class RC. Return the virtual register
   /// that is a copy of the live in PhysReg.
   unsigned addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC);
 
   /// Remove the specified register from the live in set.
-  void removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u);
+  void removeLiveIn(MCPhysReg Reg,
+                    LaneBitmask LaneMask = LaneBitmask::getAll());
 
   /// Return true if the specified register is in the live in set.
-  bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask = ~0u) const;
+  bool isLiveIn(MCPhysReg Reg,
+                LaneBitmask LaneMask = LaneBitmask::getAll()) const;
 
   // Iteration support for live in sets.  These sets are kept in sorted
   // order by their register number.
@@ -462,10 +459,15 @@ public:
   iterator getFirstNonPHI();
 
   /// Return the first instruction in MBB after I that is not a PHI or a label.
-  /// This is the correct point to insert copies at the beginning of a basic
-  /// block.
+  /// This is the correct point to insert lowered copies at the beginning of a
+  /// basic block that must be before any debugging information.
   iterator SkipPHIsAndLabels(iterator I);
 
+  /// Return the first instruction in MBB after I that is not a PHI, label or
+  /// debug.  This is the correct point to insert copies at the beginning of a
+  /// basic block.
+  iterator SkipPHIsLabelsAndDebug(iterator I);
+
   /// Returns an iterator to the first terminator instruction of this basic
   /// block. If a terminator does not exist, it returns end().
   iterator getFirstTerminator();
@@ -705,7 +707,7 @@ private:
   BranchProbability getSuccProbability(const_succ_iterator Succ) const;
 
   // Methods used to maintain doubly linked list of blocks...
-  friend struct ilist_traits<MachineBasicBlock>;
+  friend struct ilist_callback_traits<MachineBasicBlock>;
 
   // Machine-CFG mutators
 
@@ -739,31 +741,21 @@ struct MBB2NumberFunctor :
 //
 
 template <> struct GraphTraits<MachineBasicBlock *> {
-  typedef MachineBasicBlock NodeType;
   typedef MachineBasicBlock *NodeRef;
   typedef MachineBasicBlock::succ_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(MachineBasicBlock *BB) { return BB; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->succ_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->succ_end();
-  }
+  static NodeRef getEntryNode(MachineBasicBlock *BB) { return BB; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
 };
 
 template <> struct GraphTraits<const MachineBasicBlock *> {
-  typedef const MachineBasicBlock NodeType;
   typedef const MachineBasicBlock *NodeRef;
   typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(const MachineBasicBlock *BB) { return BB; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->succ_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->succ_end();
-  }
+  static NodeRef getEntryNode(const MachineBasicBlock *BB) { return BB; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
 };
 
 // Provide specializations of GraphTraits to be able to treat a
@@ -773,33 +765,23 @@ template <> struct GraphTraits<const MachineBasicBlock *> {
 // instead of the successor edges.
 //
 template <> struct GraphTraits<Inverse<MachineBasicBlock*> > {
-  typedef MachineBasicBlock NodeType;
   typedef MachineBasicBlock *NodeRef;
   typedef MachineBasicBlock::pred_iterator ChildIteratorType;
-  static NodeType *getEntryNode(Inverse<MachineBasicBlock *> G) {
+  static NodeRef getEntryNode(Inverse<MachineBasicBlock *> G) {
     return G.Graph;
   }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->pred_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->pred_end();
-  }
+  static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
 };
 
 template <> struct GraphTraits<Inverse<const MachineBasicBlock*> > {
-  typedef const MachineBasicBlock NodeType;
   typedef const MachineBasicBlock *NodeRef;
   typedef MachineBasicBlock::const_pred_iterator ChildIteratorType;
-  static NodeType *getEntryNode(Inverse<const MachineBasicBlock*> G) {
+  static NodeRef getEntryNode(Inverse<const MachineBasicBlock *> G) {
     return G.Graph;
   }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->pred_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->pred_end();
-  }
+  static ChildIteratorType child_begin(NodeRef N) { return N->pred_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->pred_end(); }
 };
 
 
@@ -827,6 +809,28 @@ public:
   MachineBasicBlock::iterator getInitial() { return I; }
 };
 
+/// Increment \p It until it points to a non-debug instruction or to \p End
+/// and return the resulting iterator. This function should only be used
+/// MachineBasicBlock::{iterator, const_iterator, instr_iterator,
+/// const_instr_iterator} and the respective reverse iterators.
+template<typename IterT>
+inline IterT skipDebugInstructionsForward(IterT It, IterT End) {
+  while (It != End && It->isDebugValue())
+    It++;
+  return It;
+}
+
+/// Decrement \p It until it points to a non-debug instruction or to \p Begin
+/// and return the resulting iterator. This function should only be used
+/// MachineBasicBlock::{iterator, const_iterator, instr_iterator,
+/// const_instr_iterator} and the respective reverse iterators.
+template<class IterT>
+inline IterT skipDebugInstructionsBackward(IterT It, IterT Begin) {
+  while (It != Begin && It->isDebugValue())
+    It--;
+  return It;
+}
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
index 7a236086ed09..bfa5bf6c2845 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineBlockFrequencyInfo.h
@@ -52,6 +52,7 @@ public:
   BlockFrequency getBlockFreq(const MachineBasicBlock *MBB) const;
 
   Optional<uint64_t> getBlockProfileCount(const MachineBasicBlock *MBB) const;
+  Optional<uint64_t> getProfileCountFromFreq(uint64_t Freq) const;
 
   const MachineFunction *getFunction() const;
   const MachineBranchProbabilityInfo *getMBPI() const;
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
index ed7cc277e8b6..76e1df89169e 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -271,14 +271,12 @@ public:
 
 template <class Node, class ChildIterator>
 struct MachineDomTreeGraphTraitsBase {
-  typedef Node NodeType;
+  typedef Node *NodeRef;
   typedef ChildIterator ChildIteratorType;
 
-  static NodeType *getEntryNode(NodeType *N) { return N; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 
 template <class T> struct GraphTraits;
@@ -296,7 +294,7 @@ struct GraphTraits<const MachineDomTreeNode *>
 
 template <> struct GraphTraits<MachineDominatorTree*>
   : public GraphTraits<MachineDomTreeNode *> {
-  static NodeType *getEntryNode(MachineDominatorTree *DT) {
+  static NodeRef getEntryNode(MachineDominatorTree *DT) {
     return DT->getRootNode();
   }
 };
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
index 59755674c69e..2fab8137564e 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFrameInfo.h
@@ -103,7 +103,7 @@ class MachineFrameInfo {
 
     /// If true, this stack slot is used to spill a value (could be deopt
     /// and/or GC related) over a statepoint. We know that the address of the
-    /// slot can't alias any LLVM IR value.  This is very similiar to a Spill
+    /// slot can't alias any LLVM IR value.  This is very similar to a Spill
     /// Slot, but is created by statepoint lowering is SelectionDAG, not the
     /// register allocator. 
     bool isStatepointSpillSlot;
@@ -544,7 +544,8 @@ public:
 
   /// Create a spill slot at a fixed location on the stack.
   /// Returns an index with a negative value.
-  int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset);
+  int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset,
+                                  bool Immutable = false);
 
   /// Returns true if the specified index corresponds to a fixed stack object.
   bool isFixedObjectIndex(int ObjectIdx) const {
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
index 4aa9a92e6bee..0c21b3254631 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -20,10 +20,14 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/ilist.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ArrayRecycler.h"
 #include "llvm/Support/Compiler.h"
@@ -48,29 +52,18 @@ class TargetRegisterClass;
 struct MachinePointerInfo;
 struct WinEHFuncInfo;
 
-template <>
-struct ilist_traits<MachineBasicBlock>
-    : public ilist_default_traits<MachineBasicBlock> {
-  mutable ilist_half_node<MachineBasicBlock> Sentinel;
-public:
-  // FIXME: This downcast is UB. See llvm.org/PR26753.
-  LLVM_NO_SANITIZE("object-size")
-  MachineBasicBlock *createSentinel() const {
-    return static_cast<MachineBasicBlock*>(&Sentinel);
-  }
-  void destroySentinel(MachineBasicBlock *) const {}
-
-  MachineBasicBlock *provideInitialHead() const { return createSentinel(); }
-  MachineBasicBlock *ensureHead(MachineBasicBlock*) const {
-    return createSentinel();
-  }
-  static void noteHead(MachineBasicBlock*, MachineBasicBlock*) {}
+template <> struct ilist_alloc_traits<MachineBasicBlock> {
+  void deleteNode(MachineBasicBlock *MBB);
+};
 
+template <> struct ilist_callback_traits<MachineBasicBlock> {
   void addNodeToList(MachineBasicBlock* MBB);
   void removeNodeFromList(MachineBasicBlock* MBB);
-  void deleteNode(MachineBasicBlock *MBB);
-private:
-  void createNode(const MachineBasicBlock &);
+
+  template <class Iterator>
+  void transferNodesFromList(ilist_callback_traits &OldList, Iterator, Iterator) {
+    llvm_unreachable("Never transfer between lists");
+  }
 };
 
 /// MachineFunctionInfo - This class can be derived from and used by targets to
@@ -94,8 +87,6 @@ struct MachineFunctionInfo {
 /// Each of these has checking code in the MachineVerifier, and passes can
 /// require that a property be set.
 class MachineFunctionProperties {
-  // TODO: Add MachineVerifier checks for AllVRegsAllocated
-  // TODO: Add a way to print the properties and make more useful error messages
   // Possible TODO: Allow targets to extend this (perhaps by allowing the
   // constructor to specify the size of the bit vector)
   // Possible TODO: Allow requiring the negative (e.g. VRegsAllocated could be
@@ -108,6 +99,7 @@ public:
   // Property descriptions:
   // IsSSA: True when the machine function is in SSA form and virtual registers
   //  have a single def.
+  // NoPHIs: The machine function does not contain any PHI instruction.
   // TracksLiveness: True when tracking register liveness accurately.
   //  While this property is set, register liveness information in basic block
   //  live-in lists and machine instruction operands (e.g. kill flags, implicit
@@ -115,13 +107,31 @@ public:
   //  that affect the values in registers, for example by the register
   //  scavenger.
   //  When this property is clear, liveness is no longer reliable.
-  // AllVRegsAllocated: All virtual registers have been allocated; i.e. all
-  //  register operands are physical registers.
+  // NoVRegs: The machine function does not use any virtual registers.
+  // Legalized: In GlobalISel: the MachineLegalizer ran and all pre-isel generic
+  //  instructions have been legalized; i.e., all instructions are now one of:
+  //   - generic and always legal (e.g., COPY)
+  //   - target-specific
+  //   - legal pre-isel generic instructions.
+  // RegBankSelected: In GlobalISel: the RegBankSelect pass ran and all generic
+  //  virtual registers have been assigned to a register bank.
+  // Selected: In GlobalISel: the InstructionSelect pass ran and all pre-isel
+  //  generic instructions have been eliminated; i.e., all instructions are now
+  //  target-specific or non-pre-isel generic instructions (e.g., COPY).
+  //  Since only pre-isel generic instructions can have generic virtual register
+  //  operands, this also means that all generic virtual registers have been
+  //  constrained to virtual registers (assigned to register classes) and that
+  //  all sizes attached to them have been eliminated.
   enum class Property : unsigned {
     IsSSA,
+    NoPHIs,
     TracksLiveness,
-    AllVRegsAllocated,
-    LastProperty,
+    NoVRegs,
+    FailedISel,
+    Legalized,
+    RegBankSelected,
+    Selected,
+    LastProperty = Selected,
   };
 
   bool hasProperty(Property P) const {
@@ -131,15 +141,20 @@ public:
     Properties.set(static_cast<unsigned>(P));
     return *this;
   }
-  MachineFunctionProperties &clear(Property P) {
+  MachineFunctionProperties &reset(Property P) {
     Properties.reset(static_cast<unsigned>(P));
     return *this;
   }
+  /// Reset all the properties.
+  MachineFunctionProperties &reset() {
+    Properties.reset();
+    return *this;
+  }
   MachineFunctionProperties &set(const MachineFunctionProperties &MFP) {
     Properties |= MFP.Properties;
     return *this;
   }
-  MachineFunctionProperties &clear(const MachineFunctionProperties &MFP) {
+  MachineFunctionProperties &reset(const MachineFunctionProperties &MFP) {
     Properties.reset(MFP.Properties);
     return *this;
   }
@@ -149,13 +164,34 @@ public:
     return !V.Properties.test(Properties);
   }
 
-  // Print the MachineFunctionProperties in human-readable form. If OnlySet is
-  // true, only print the properties that are set.
-  void print(raw_ostream &ROS, bool OnlySet=false) const;
+  /// Print the MachineFunctionProperties in human-readable form.
+  void print(raw_ostream &OS) const;
 
 private:
   BitVector Properties =
-      BitVector(static_cast<unsigned>(Property::LastProperty));
+      BitVector(static_cast<unsigned>(Property::LastProperty)+1);
+};
+
+struct SEHHandler {
+  /// Filter or finally function. Null indicates a catch-all.
+  const Function *FilterOrFinally;
+
+  /// Address of block to recover at. Null for a finally handler.
+  const BlockAddress *RecoverBA;
+};
+
+
+/// This structure is used to retain landing pad info for the current function.
+struct LandingPadInfo {
+  MachineBasicBlock *LandingPadBlock;      // Landing pad block.
+  SmallVector<MCSymbol *, 1> BeginLabels;  // Labels prior to invoke.
+  SmallVector<MCSymbol *, 1> EndLabels;    // Labels after invoke.
+  SmallVector<SEHHandler, 1> SEHHandlers;  // SEH handlers active at this lpad.
+  MCSymbol *LandingPadLabel;               // Label at beginning of landing pad.
+  std::vector<int> TypeIds;               // List of type ids (filters negative).
+
+  explicit LandingPadInfo(MachineBasicBlock *MBB)
+      : LandingPadBlock(MBB), LandingPadLabel(nullptr) {}
 };
 
 class MachineFunction {
@@ -224,6 +260,9 @@ class MachineFunction {
   /// True if the function includes any inline assembly.
   bool HasInlineAsm = false;
 
+  /// True if any WinCFI instruction have been emitted in this function.
+  Optional<bool> HasWinCFI;
+
   /// Current high-level properties of the IR of the function (e.g. is in SSA
   /// form or whether registers have been allocated)
   MachineFunctionProperties Properties;
@@ -231,13 +270,77 @@ class MachineFunction {
   // Allocation management for pseudo source values.
   std::unique_ptr<PseudoSourceValueManager> PSVManager;
 
+  /// List of moves done by a function's prolog.  Used to construct frame maps
+  /// by debug and exception handling consumers.
+  std::vector<MCCFIInstruction> FrameInstructions;
+
+  /// \name Exception Handling
+  /// \{
+
+  /// List of LandingPadInfo describing the landing pad information.
+  std::vector<LandingPadInfo> LandingPads;
+
+  /// Map a landing pad's EH symbol to the call site indexes.
+  DenseMap<MCSymbol*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
+
+  /// Map of invoke call site index values to associated begin EH_LABEL.
+  DenseMap<MCSymbol*, unsigned> CallSiteMap;
+
+  bool CallsEHReturn = false;
+  bool CallsUnwindInit = false;
+  bool HasEHFunclets = false;
+
+  /// List of C++ TypeInfo used.
+  std::vector<const GlobalValue *> TypeInfos;
+
+  /// List of typeids encoding filters used.
+  std::vector<unsigned> FilterIds;
+
+  /// List of the indices in FilterIds corresponding to filter terminators.
+  std::vector<unsigned> FilterEnds;
+
+  EHPersonality PersonalityTypeCache = EHPersonality::Unknown;
+
+  /// \}
+
   MachineFunction(const MachineFunction &) = delete;
   void operator=(const MachineFunction&) = delete;
+
+  /// Clear all the members of this MachineFunction, but the ones used
+  /// to initialize again the MachineFunction.
+  /// More specifically, this deallocates all the dynamically allocated
+  /// objects and get rid of all the XXXInfo data structure, but keep
+  /// unchanged the references to Fn, Target, MMI, and FunctionNumber.
+  void clear();
+  /// Allocate and initialize the different members.
+  /// In particular, the XXXInfo data structure.
+  /// \pre Fn, Target, MMI, and FunctionNumber are properly set.
+  void init();
 public:
+
+  struct VariableDbgInfo {
+    const DILocalVariable *Var;
+    const DIExpression *Expr;
+    unsigned Slot;
+    const DILocation *Loc;
+
+    VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
+                    unsigned Slot, const DILocation *Loc)
+        : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
+  };
+  typedef SmallVector<VariableDbgInfo, 4> VariableDbgInfoMapTy;
+  VariableDbgInfoMapTy VariableDbgInfos;
+
   MachineFunction(const Function *Fn, const TargetMachine &TM,
                   unsigned FunctionNum, MachineModuleInfo &MMI);
   ~MachineFunction();
 
+  /// Reset the instance as if it was just created.
+  void reset() {
+    clear();
+    init();
+  }
+
   MachineModuleInfo &getMMI() const { return MMI; }
   MCContext &getContext() const { return Ctx; }
 
@@ -283,8 +386,8 @@ public:
   /// This object contains information about objects allocated on the stack
   /// frame of the current function in an abstract way.
   ///
-  MachineFrameInfo *getFrameInfo() { return FrameInfo; }
-  const MachineFrameInfo *getFrameInfo() const { return FrameInfo; }
+  MachineFrameInfo &getFrameInfo() { return *FrameInfo; }
+  const MachineFrameInfo &getFrameInfo() const { return *FrameInfo; }
 
   /// getJumpTableInfo - Return the jump table info object for the current
   /// function.  This object contains information about jump tables in the
@@ -345,6 +448,12 @@ public:
     HasInlineAsm = B;
   }
 
+  bool hasWinCFI() const {
+    assert(HasWinCFI.hasValue() && "HasWinCFI not set yet!");
+    return *HasWinCFI;
+  }
+  void setHasWinCFI(bool v) { HasWinCFI = v; }
+
   /// Get the function properties
   const MachineFunctionProperties &getProperties() const { return Properties; }
   MachineFunctionProperties &getProperties() { return Properties; }
@@ -422,8 +531,8 @@ public:
   // Provide accessors for the MachineBasicBlock list...
   typedef BasicBlockListType::iterator iterator;
   typedef BasicBlockListType::const_iterator const_iterator;
-  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
-  typedef std::reverse_iterator<iterator>             reverse_iterator;
+  typedef BasicBlockListType::const_reverse_iterator const_reverse_iterator;
+  typedef BasicBlockListType::reverse_iterator reverse_iterator;
 
   /// Support for MachineBasicBlock::getNextNode().
   static BasicBlockListType MachineFunction::*
@@ -530,11 +639,13 @@ public:
   /// getMachineMemOperand - Allocate a new MachineMemOperand.
   /// MachineMemOperands are owned by the MachineFunction and need not be
   /// explicitly deallocated.
-  MachineMemOperand *getMachineMemOperand(MachinePointerInfo PtrInfo,
-                                          MachineMemOperand::Flags f,
-                                          uint64_t s, unsigned base_alignment,
-                                          const AAMDNodes &AAInfo = AAMDNodes(),
-                                          const MDNode *Ranges = nullptr);
+  MachineMemOperand *getMachineMemOperand(
+      MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
+      unsigned base_alignment, const AAMDNodes &AAInfo = AAMDNodes(),
+      const MDNode *Ranges = nullptr,
+      SynchronizationScope SynchScope = CrossThread,
+      AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
+      AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);
 
   /// getMachineMemOperand - Allocate a new MachineMemOperand by copying
   /// an existing one, adjusting by an offset and using the given size.
@@ -601,8 +712,139 @@ public:
   /// getPICBaseSymbol - Return a function-local symbol to represent the PIC
   /// base.
   MCSymbol *getPICBaseSymbol() const;
+
+  /// Returns a reference to a list of cfi instructions in the function's
+  /// prologue.  Used to construct frame maps for debug and exception handling
+  /// comsumers.
+  const std::vector<MCCFIInstruction> &getFrameInstructions() const {
+    return FrameInstructions;
+  }
+
+  LLVM_NODISCARD unsigned addFrameInst(const MCCFIInstruction &Inst) {
+    FrameInstructions.push_back(Inst);
+    return FrameInstructions.size() - 1;
+  }
+
+  /// \name Exception Handling
+  /// \{
+
+  bool callsEHReturn() const { return CallsEHReturn; }
+  void setCallsEHReturn(bool b) { CallsEHReturn = b; }
+
+  bool callsUnwindInit() const { return CallsUnwindInit; }
+  void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
+
+  bool hasEHFunclets() const { return HasEHFunclets; }
+  void setHasEHFunclets(bool V) { HasEHFunclets = V; }
+
+  /// Find or create an LandingPadInfo for the specified MachineBasicBlock.
+  LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
+
+  /// Remap landing pad labels and remove any deleted landing pads.
+  void tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = nullptr);
+
+  /// Return a reference to the landing pad info for the current function.
+  const std::vector<LandingPadInfo> &getLandingPads() const {
+    return LandingPads;
+  }
+
+  /// Provide the begin and end labels of an invoke style call and associate it
+  /// with a try landing pad block.
+  void addInvoke(MachineBasicBlock *LandingPad,
+                 MCSymbol *BeginLabel, MCSymbol *EndLabel);
+
+  /// Add a new panding pad.  Returns the label ID for the landing pad entry.
+  MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
+
+  /// Provide the catch typeinfo for a landing pad.
+  void addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                        ArrayRef<const GlobalValue *> TyInfo);
+
+  /// Provide the filter typeinfo for a landing pad.
+  void addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                         ArrayRef<const GlobalValue *> TyInfo);
+
+  /// Add a cleanup action for a landing pad.
+  void addCleanup(MachineBasicBlock *LandingPad);
+
+  void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter,
+                          const BlockAddress *RecoverLabel);
+
+  void addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+                            const Function *Cleanup);
+
+  /// Return the type id for the specified typeinfo.  This is function wide.
+  unsigned getTypeIDFor(const GlobalValue *TI);
+
+  /// Return the id of the filter encoded by TyIds.  This is function wide.
+  int getFilterIDFor(std::vector<unsigned> &TyIds);
+
+  /// Map the landing pad's EH symbol to the call site indexes.
+  void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);
+
+  /// Get the call site indexes for a landing pad EH symbol.
+  SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
+    assert(hasCallSiteLandingPad(Sym) &&
+           "missing call site number for landing pad!");
+    return LPadToCallSiteMap[Sym];
+  }
+
+  /// Return true if the landing pad Eh symbol has an associated call site.
+  bool hasCallSiteLandingPad(MCSymbol *Sym) {
+    return !LPadToCallSiteMap[Sym].empty();
+  }
+
+  /// Map the begin label for a call site.
+  void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) {
+    CallSiteMap[BeginLabel] = Site;
+  }
+
+  /// Get the call site number for a begin label.
+  unsigned getCallSiteBeginLabel(MCSymbol *BeginLabel) const {
+    assert(hasCallSiteBeginLabel(BeginLabel) &&
+           "Missing call site number for EH_LABEL!");
+    return CallSiteMap.lookup(BeginLabel);
+  }
+
+  /// Return true if the begin label has a call site number associated with it.
+  bool hasCallSiteBeginLabel(MCSymbol *BeginLabel) const {
+    return CallSiteMap.count(BeginLabel);
+  }
+
+  /// Return a reference to the C++ typeinfo for the current function.
+  const std::vector<const GlobalValue *> &getTypeInfos() const {
+    return TypeInfos;
+  }
+
+  /// Return a reference to the typeids encoding filters used in the current
+  /// function.
+  const std::vector<unsigned> &getFilterIds() const {
+    return FilterIds;
+  }
+
+  /// \}
+
+  /// Collect information used to emit debugging information of a variable.
+  void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
+                          unsigned Slot, const DILocation *Loc) {
+    VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
+  }
+
+  VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfos; }
+  const VariableDbgInfoMapTy &getVariableDbgInfo() const {
+    return VariableDbgInfos;
+  }
 };
 
+/// \name Exception Handling
+/// \{
+
+/// Extract the exception handling information from the landingpad instruction
+/// and add them to the specified machine module info.
+void addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB);
+
+/// \}
+
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for function basic block graphs (CFGs)
 //===--------------------------------------------------------------------===//
@@ -614,29 +856,29 @@ public:
 //
 template <> struct GraphTraits<MachineFunction*> :
   public GraphTraits<MachineBasicBlock*> {
-  static NodeType *getEntryNode(MachineFunction *F) {
-    return &F->front();
-  }
+  static NodeRef getEntryNode(MachineFunction *F) { return &F->front(); }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef MachineFunction::iterator nodes_iterator;
-  static nodes_iterator nodes_begin(MachineFunction *F) { return F->begin(); }
-  static nodes_iterator nodes_end  (MachineFunction *F) { return F->end(); }
+  typedef pointer_iterator<MachineFunction::iterator> nodes_iterator;
+  static nodes_iterator nodes_begin(MachineFunction *F) {
+    return nodes_iterator(F->begin());
+  }
+  static nodes_iterator nodes_end(MachineFunction *F) {
+    return nodes_iterator(F->end());
+  }
   static unsigned       size       (MachineFunction *F) { return F->size(); }
 };
 template <> struct GraphTraits<const MachineFunction*> :
   public GraphTraits<const MachineBasicBlock*> {
-  static NodeType *getEntryNode(const MachineFunction *F) {
-    return &F->front();
-  }
+  static NodeRef getEntryNode(const MachineFunction *F) { return &F->front(); }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef MachineFunction::const_iterator nodes_iterator;
+  typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator;
   static nodes_iterator nodes_begin(const MachineFunction *F) {
-    return F->begin();
+    return nodes_iterator(F->begin());
   }
   static nodes_iterator nodes_end  (const MachineFunction *F) {
-    return F->end();
+    return nodes_iterator(F->end());
   }
   static unsigned       size       (const MachineFunction *F)  {
     return F->size();
@@ -651,13 +893,13 @@ template <> struct GraphTraits<const MachineFunction*> :
 //
 template <> struct GraphTraits<Inverse<MachineFunction*> > :
   public GraphTraits<Inverse<MachineBasicBlock*> > {
-  static NodeType *getEntryNode(Inverse<MachineFunction*> G) {
+  static NodeRef getEntryNode(Inverse<MachineFunction *> G) {
     return &G.Graph->front();
   }
 };
 template <> struct GraphTraits<Inverse<const MachineFunction*> > :
   public GraphTraits<Inverse<const MachineBasicBlock*> > {
-  static NodeType *getEntryNode(Inverse<const MachineFunction *> G) {
+  static NodeRef getEntryNode(Inverse<const MachineFunction *> G) {
     return &G.Graph->front();
   }
 };
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h b/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
deleted file mode 100644
index 4c0f5e63ea1d..000000000000
--- a/contrib/llvm/include/llvm/CodeGen/MachineFunctionAnalysis.h
+++ /dev/null
@@ -1,55 +0,0 @@
-//===-- MachineFunctionAnalysis.h - Owner of MachineFunctions ----*-C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the MachineFunctionAnalysis class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS_H
-#define LLVM_CODEGEN_MACHINEFUNCTIONANALYSIS_H
-
-#include "llvm/Pass.h"
-
-namespace llvm {
-
-class MachineFunction;
-class MachineFunctionInitializer;
-class TargetMachine;
-
-/// MachineFunctionAnalysis - This class is a Pass that manages a
-/// MachineFunction object.
-struct MachineFunctionAnalysis : public FunctionPass {
-private:
-  const TargetMachine &TM;
-  MachineFunction *MF;
-  unsigned NextFnNum;
-  MachineFunctionInitializer *MFInitializer;
-
-public:
-  static char ID;
-  explicit MachineFunctionAnalysis(const TargetMachine &tm,
-                                   MachineFunctionInitializer *MFInitializer);
-  ~MachineFunctionAnalysis() override;
-
-  MachineFunction &getMF() const { return *MF; }
-
-  const char* getPassName() const override {
-    return "Machine Function Analysis";
-  }
-
-private:
-  bool doInitialization(Module &M) override;
-  bool runOnFunction(Function &F) override;
-  void releaseMemory() override;
-  void getAnalysisUsage(AnalysisUsage &AU) const override;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
index 8f1cb9b6f659..bac93e5d3a4c 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -39,9 +39,6 @@ class DIExpression;
 class TargetInstrInfo;
 class TargetRegisterClass;
 class TargetRegisterInfo;
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-class Type;
-#endif
 class MachineFunction;
 class MachineMemOperand;
 
@@ -53,7 +50,8 @@ class MachineMemOperand;
 /// without having their destructor called.
 ///
 class MachineInstr
-    : public ilist_node_with_parent<MachineInstr, MachineBasicBlock> {
+    : public ilist_node_with_parent<MachineInstr, MachineBasicBlock,
+                                    ilist_sentinel_tracking<true>> {
 public:
   typedef MachineMemOperand **mmo_iterator;
 
@@ -62,7 +60,7 @@ public:
   /// otherwise easily derivable from the IR text.
   ///
   enum CommentFlag {
-    ReloadReuse = 0x1
+    ReloadReuse = 0x1 // higher bits are reserved for target dep comments.
   };
 
   enum MIFlag {
@@ -106,13 +104,6 @@ private:
 
   DebugLoc debugLoc;                    // Source line information.
 
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-  /// Type of the instruction in case of a generic opcode.
-  /// \invariant This must be nullptr is getOpcode() is not
-  /// in the range of generic opcodes.
-  Type *Ty;
-#endif
-
   MachineInstr(const MachineInstr&) = delete;
   void operator=(const MachineInstr&) = delete;
   // Use MachineFunction::DeleteMachineInstr() instead.
@@ -120,7 +111,7 @@ private:
 
   // Intrusive list support
   friend struct ilist_traits<MachineInstr>;
-  friend struct ilist_traits<MachineBasicBlock>;
+  friend struct ilist_callback_traits<MachineBasicBlock>;
   void setParent(MachineBasicBlock *P) { Parent = P; }
 
   /// This constructor creates a copy of the given
@@ -152,8 +143,8 @@ public:
   }
 
   /// Set a flag for the AsmPrinter.
-  void setAsmPrinterFlag(CommentFlag Flag) {
-    AsmPrinterFlags |= (uint8_t)Flag;
+  void setAsmPrinterFlag(uint8_t Flag) {
+    AsmPrinterFlags |= Flag;
   }
 
   /// Clear specific AsmPrinter flags.
@@ -187,10 +178,6 @@ public:
     Flags &= ~((uint8_t)Flag);
   }
 
-  /// Set the type of the instruction.
-  /// \pre getOpcode() is in the range of the generic opcodes.
-  void setType(Type *Ty);
-  Type *getType() const;
 
   /// Return true if MI is in a bundle (but not the first MI in a bundle).
   ///
@@ -404,10 +391,10 @@ public:
   bool hasProperty(unsigned MCFlag, QueryType Type = AnyInBundle) const {
     // Inline the fast path for unbundled or bundle-internal instructions.
     if (Type == IgnoreBundle || !isBundled() || isBundledWithPred())
-      return getDesc().getFlags() & (1 << MCFlag);
+      return getDesc().getFlags() & (1ULL << MCFlag);
 
     // If this is the first instruction in a bundle, take the slow path.
-    return hasPropertyInBundle(1 << MCFlag, Type);
+    return hasPropertyInBundle(1ULL << MCFlag, Type);
   }
 
   /// Return true if this instruction can have a variable number of operands.
@@ -734,8 +721,11 @@ public:
     IgnoreVRegDefs  // Ignore virtual register definitions
   };
 
-  /// Return true if this instruction is identical to (same
-  /// opcode and same operands as) the specified instruction.
+  /// Return true if this instruction is identical to \p Other.
+  /// Two instructions are identical if they have the same opcode and all their
+  /// operands are identical (with respect to MachineOperand::isIdenticalTo()).
+  /// Note that this means liveness related flags (dead, undef, kill) do not
+  /// affect the notion of identical.
   bool isIdenticalTo(const MachineInstr &Other,
                      MICheckType Check = CheckDefs) const;
 
@@ -1124,12 +1114,14 @@ public:
   /// ordered or volatile memory references.
   bool hasOrderedMemoryRef() const;
 
-  /// Return true if this instruction is loading from a
-  /// location whose value is invariant across the function.  For example,
-  /// loading a value from the constant pool or from the argument area of
-  /// a function if it does not change.  This should only return true of *all*
-  /// loads the instruction does are invariant (if it does multiple loads).
-  bool isInvariantLoad(AliasAnalysis *AA) const;
+  /// Return true if this load instruction never traps and points to a memory
+  /// location whose value doesn't change during the execution of this function.
+  ///
+  /// Examples include loading a value from the constant pool or from the
+  /// argument area of a function (if it does not change).  If the instruction
+  /// does multiple loads, this returns true only if all of the loads are
+  /// dereferenceable and invariant.
+  bool isDereferenceableInvariantLoad(AliasAnalysis *AA) const;
 
   /// If the specified instruction is a PHI that always merges together the
   /// same virtual register, return the register, otherwise return 0.
@@ -1157,10 +1149,11 @@ public:
   //
   // Debugging support
   //
-  void print(raw_ostream &OS, bool SkipOpers = false) const;
-  void print(raw_ostream &OS, ModuleSlotTracker &MST,
-             bool SkipOpers = false) const;
-  void dump() const;
+  void print(raw_ostream &OS, bool SkipOpers = false,
+             const TargetInstrInfo *TII = nullptr) const;
+  void print(raw_ostream &OS, ModuleSlotTracker &MST, bool SkipOpers = false,
+             const TargetInstrInfo *TII = nullptr) const;
+  void dump(const TargetInstrInfo *TII = nullptr) const;
 
   //===--------------------------------------------------------------------===//
   // Accessors used to build up machine instructions.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
index 37b67aa0cf5c..3c8a3626f364 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -83,6 +83,21 @@ public:
     return *this;
   }
 
+  /// Add a virtual register definition operand.
+  const MachineInstrBuilder &addDef(unsigned RegNo, unsigned Flags = 0,
+                                    unsigned SubReg = 0) const {
+    return addReg(RegNo, Flags | RegState::Define, SubReg);
+  }
+
+  /// Add a virtual register use operand. It is an error for Flags to contain
+  /// `RegState::Define` when calling this function.
+  const MachineInstrBuilder &addUse(unsigned RegNo, unsigned Flags = 0,
+                                    unsigned SubReg = 0) const {
+    assert(!(Flags & RegState::Define) &&
+           "Misleading addUse defines register, use addReg instead.");
+    return addReg(RegNo, Flags, SubReg);
+  }
+
   /// Add a new immediate operand.
   const MachineInstrBuilder &addImm(int64_t Val) const {
     MI->addOperand(*MF, MachineOperand::CreateImm(Val));
@@ -190,6 +205,16 @@ public:
     return *this;
   }
 
+  const MachineInstrBuilder &addIntrinsicID(Intrinsic::ID ID) const {
+    MI->addOperand(*MF, MachineOperand::CreateIntrinsicID(ID));
+    return *this;
+  }
+
+  const MachineInstrBuilder &addPredicate(CmpInst::Predicate Pred) const {
+    MI->addOperand(*MF, MachineOperand::CreatePredicate(Pred));
+    return *this;
+  }
+
   const MachineInstrBuilder &addSym(MCSymbol *Sym,
                                     unsigned char TargetFlags = 0) const {
     MI->addOperand(*MF, MachineOperand::CreateMCSymbol(Sym, TargetFlags));
@@ -435,7 +460,8 @@ public:
   /// Create an MIBundleBuilder representing an existing instruction or bundle
   /// that has MI as its head.
   explicit MIBundleBuilder(MachineInstr *MI)
-      : MBB(*MI->getParent()), Begin(MI), End(getBundleEnd(*MI)) {}
+      : MBB(*MI->getParent()), Begin(MI),
+        End(getBundleEnd(MI->getIterator())) {}
 
   /// Return a reference to the basic block containing this bundle.
   MachineBasicBlock &getMBB() const { return MBB; }
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
index c0033a5148cf..995c7001d928 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundle.h
@@ -41,34 +41,33 @@ MachineBasicBlock::instr_iterator finalizeBundle(MachineBasicBlock &MBB,
 /// MachineFunction. Return true if any bundles are finalized.
 bool finalizeBundles(MachineFunction &MF);
 
-/// getBundleStart - Returns the first instruction in the bundle containing MI.
-///
-inline MachineInstr &getBundleStart(MachineInstr &MI) {
-  MachineBasicBlock::instr_iterator I(MI);
+/// Returns an iterator to the first instruction in the bundle containing \p I.
+inline MachineBasicBlock::instr_iterator getBundleStart(
+    MachineBasicBlock::instr_iterator I) {
   while (I->isBundledWithPred())
     --I;
-  return *I;
+  return I;
 }
 
-inline const MachineInstr &getBundleStart(const MachineInstr &MI) {
-  MachineBasicBlock::const_instr_iterator I(MI);
+/// Returns an iterator to the first instruction in the bundle containing \p I.
+inline MachineBasicBlock::const_instr_iterator getBundleStart(
+    MachineBasicBlock::const_instr_iterator I) {
   while (I->isBundledWithPred())
     --I;
-  return *I;
+  return I;
 }
 
-/// Return an iterator pointing beyond the bundle containing MI.
-inline MachineBasicBlock::instr_iterator getBundleEnd(MachineInstr &MI) {
-  MachineBasicBlock::instr_iterator I(MI);
+/// Returns an iterator pointing beyond the bundle containing \p I.
+inline MachineBasicBlock::instr_iterator getBundleEnd(
+    MachineBasicBlock::instr_iterator I) {
   while (I->isBundledWithSucc())
     ++I;
   return ++I;
 }
 
-/// Return an iterator pointing beyond the bundle containing MI.
-inline MachineBasicBlock::const_instr_iterator
-getBundleEnd(const MachineInstr &MI) {
-  MachineBasicBlock::const_instr_iterator I(MI);
+/// Returns an iterator pointing beyond the bundle containing \p I.
+inline MachineBasicBlock::const_instr_iterator getBundleEnd(
+    MachineBasicBlock::const_instr_iterator I) {
   while (I->isBundledWithSucc())
     ++I;
   return ++I;
@@ -115,7 +114,7 @@ protected:
   ///
   explicit MachineOperandIteratorBase(MachineInstr &MI, bool WholeBundle) {
     if (WholeBundle) {
-      InstrI = getBundleStart(MI).getIterator();
+      InstrI = getBundleStart(MI.getIterator());
       InstrE = MI.getParent()->instr_end();
     } else {
       InstrI = InstrE = MI.getIterator();
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundleIterator.h b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundleIterator.h
index 45a9a188f90e..2d77cfcae20f 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineInstrBundleIterator.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineInstrBundleIterator.h
@@ -19,23 +19,126 @@
 
 namespace llvm {
 
+template <class T, bool IsReverse> struct MachineInstrBundleIteratorTraits;
+template <class T> struct MachineInstrBundleIteratorTraits<T, false> {
+  typedef simple_ilist<T, ilist_sentinel_tracking<true>> list_type;
+  typedef typename list_type::iterator instr_iterator;
+  typedef typename list_type::iterator nonconst_instr_iterator;
+  typedef typename list_type::const_iterator const_instr_iterator;
+};
+template <class T> struct MachineInstrBundleIteratorTraits<T, true> {
+  typedef simple_ilist<T, ilist_sentinel_tracking<true>> list_type;
+  typedef typename list_type::reverse_iterator instr_iterator;
+  typedef typename list_type::reverse_iterator nonconst_instr_iterator;
+  typedef typename list_type::const_reverse_iterator const_instr_iterator;
+};
+template <class T> struct MachineInstrBundleIteratorTraits<const T, false> {
+  typedef simple_ilist<T, ilist_sentinel_tracking<true>> list_type;
+  typedef typename list_type::const_iterator instr_iterator;
+  typedef typename list_type::iterator nonconst_instr_iterator;
+  typedef typename list_type::const_iterator const_instr_iterator;
+};
+template <class T> struct MachineInstrBundleIteratorTraits<const T, true> {
+  typedef simple_ilist<T, ilist_sentinel_tracking<true>> list_type;
+  typedef typename list_type::const_reverse_iterator instr_iterator;
+  typedef typename list_type::reverse_iterator nonconst_instr_iterator;
+  typedef typename list_type::const_reverse_iterator const_instr_iterator;
+};
+
+template <bool IsReverse> struct MachineInstrBundleIteratorHelper;
+template <> struct MachineInstrBundleIteratorHelper<false> {
+  /// Get the beginning of the current bundle.
+  template <class Iterator> static Iterator getBundleBegin(Iterator I) {
+    if (!I.isEnd())
+      while (I->isBundledWithPred())
+        --I;
+    return I;
+  }
+
+  /// Get the final node of the current bundle.
+  template <class Iterator> static Iterator getBundleFinal(Iterator I) {
+    if (!I.isEnd())
+      while (I->isBundledWithSucc())
+        ++I;
+    return I;
+  }
+
+  /// Increment forward ilist iterator.
+  template <class Iterator> static void increment(Iterator &I) {
+    I = std::next(getBundleFinal(I));
+  }
+
+  /// Decrement forward ilist iterator.
+  template <class Iterator> static void decrement(Iterator &I) {
+    I = getBundleBegin(std::prev(I));
+  }
+};
+
+template <> struct MachineInstrBundleIteratorHelper<true> {
+  /// Get the beginning of the current bundle.
+  template <class Iterator> static Iterator getBundleBegin(Iterator I) {
+    return MachineInstrBundleIteratorHelper<false>::getBundleBegin(
+               I.getReverse())
+        .getReverse();
+  }
+
+  /// Get the final node of the current bundle.
+  template <class Iterator> static Iterator getBundleFinal(Iterator I) {
+    return MachineInstrBundleIteratorHelper<false>::getBundleFinal(
+               I.getReverse())
+        .getReverse();
+  }
+
+  /// Increment reverse ilist iterator.
+  template <class Iterator> static void increment(Iterator &I) {
+    I = getBundleBegin(std::next(I));
+  }
+
+  /// Decrement reverse ilist iterator.
+  template <class Iterator> static void decrement(Iterator &I) {
+    I = std::prev(getBundleFinal(I));
+  }
+};
+
 /// MachineBasicBlock iterator that automatically skips over MIs that are
 /// inside bundles (i.e. walk top level MIs only).
-template <typename Ty>
-class MachineInstrBundleIterator
-    : public std::iterator<std::bidirectional_iterator_tag, Ty, ptrdiff_t> {
-  typedef ilist_iterator<Ty> instr_iterator;
+template <typename Ty, bool IsReverse = false>
+class MachineInstrBundleIterator : MachineInstrBundleIteratorHelper<IsReverse> {
+  typedef MachineInstrBundleIteratorTraits<Ty, IsReverse> Traits;
+  typedef typename Traits::instr_iterator instr_iterator;
   instr_iterator MII;
 
 public:
-  MachineInstrBundleIterator(instr_iterator MI) : MII(MI) {}
+  typedef typename instr_iterator::value_type value_type;
+  typedef typename instr_iterator::difference_type difference_type;
+  typedef typename instr_iterator::pointer pointer;
+  typedef typename instr_iterator::reference reference;
+  typedef std::bidirectional_iterator_tag iterator_category;
+
+  typedef typename instr_iterator::const_pointer const_pointer;
+  typedef typename instr_iterator::const_reference const_reference;
+
+private:
+  typedef typename Traits::nonconst_instr_iterator nonconst_instr_iterator;
+  typedef typename Traits::const_instr_iterator const_instr_iterator;
+  typedef MachineInstrBundleIterator<
+      typename nonconst_instr_iterator::value_type, IsReverse>
+      nonconst_iterator;
+  typedef MachineInstrBundleIterator<Ty, !IsReverse> reverse_iterator;
 
-  MachineInstrBundleIterator(Ty &MI) : MII(MI) {
+public:
+  MachineInstrBundleIterator(instr_iterator MI) : MII(MI) {
+    assert((!MI.getNodePtr() || MI.isEnd() || !MI->isBundledWithPred()) &&
+           "It's not legal to initialize MachineInstrBundleIterator with a "
+           "bundled MI");
+  }
+
+  MachineInstrBundleIterator(reference MI) : MII(MI) {
     assert(!MI.isBundledWithPred() && "It's not legal to initialize "
                                       "MachineInstrBundleIterator with a "
                                       "bundled MI");
   }
-  MachineInstrBundleIterator(Ty *MI) : MII(MI) {
+  MachineInstrBundleIterator(pointer MI) : MII(MI) {
     // FIXME: This conversion should be explicit.
     assert((!MI || !MI->isBundledWithPred()) && "It's not legal to initialize "
                                                 "MachineInstrBundleIterator "
@@ -43,34 +146,101 @@ public:
   }
   // Template allows conversion from const to nonconst.
   template <class OtherTy>
-  MachineInstrBundleIterator(const MachineInstrBundleIterator<OtherTy> &I)
+  MachineInstrBundleIterator(
+      const MachineInstrBundleIterator<OtherTy, IsReverse> &I,
+      typename std::enable_if<std::is_convertible<OtherTy *, Ty *>::value,
+                              void *>::type = nullptr)
       : MII(I.getInstrIterator()) {}
   MachineInstrBundleIterator() : MII(nullptr) {}
 
-  Ty &operator*() const { return *MII; }
-  Ty *operator->() const { return &operator*(); }
+  /// Get the bundle iterator for the given instruction's bundle.
+  static MachineInstrBundleIterator getAtBundleBegin(instr_iterator MI) {
+    return MachineInstrBundleIteratorHelper<IsReverse>::getBundleBegin(MI);
+  }
+
+  reference operator*() const { return *MII; }
+  pointer operator->() const { return &operator*(); }
+
+  /// Check for null.
+  bool isValid() const { return MII.getNodePtr(); }
 
-  // FIXME: This conversion should be explicit.
-  operator Ty *() const { return MII.getNodePtrUnchecked(); }
+  friend bool operator==(const MachineInstrBundleIterator &L,
+                         const MachineInstrBundleIterator &R) {
+    return L.MII == R.MII;
+  }
+  friend bool operator==(const MachineInstrBundleIterator &L,
+                         const const_instr_iterator &R) {
+    return L.MII == R; // Avoid assertion about validity of R.
+  }
+  friend bool operator==(const const_instr_iterator &L,
+                         const MachineInstrBundleIterator &R) {
+    return L == R.MII; // Avoid assertion about validity of L.
+  }
+  friend bool operator==(const MachineInstrBundleIterator &L,
+                         const nonconst_instr_iterator &R) {
+    return L.MII == R; // Avoid assertion about validity of R.
+  }
+  friend bool operator==(const nonconst_instr_iterator &L,
+                         const MachineInstrBundleIterator &R) {
+    return L == R.MII; // Avoid assertion about validity of L.
+  }
+  friend bool operator==(const MachineInstrBundleIterator &L, const_pointer R) {
+    return L == const_instr_iterator(R); // Avoid assertion about validity of R.
+  }
+  friend bool operator==(const_pointer L, const MachineInstrBundleIterator &R) {
+    return const_instr_iterator(L) == R; // Avoid assertion about validity of L.
+  }
+  friend bool operator==(const MachineInstrBundleIterator &L,
+                         const_reference R) {
+    return L == &R; // Avoid assertion about validity of R.
+  }
+  friend bool operator==(const_reference L,
+                         const MachineInstrBundleIterator &R) {
+    return &L == R; // Avoid assertion about validity of L.
+  }
 
-  bool operator==(const MachineInstrBundleIterator &X) const {
-    return MII == X.MII;
+  friend bool operator!=(const MachineInstrBundleIterator &L,
+                         const MachineInstrBundleIterator &R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const MachineInstrBundleIterator &L,
+                         const const_instr_iterator &R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const const_instr_iterator &L,
+                         const MachineInstrBundleIterator &R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const MachineInstrBundleIterator &L,
+                         const nonconst_instr_iterator &R) {
+    return !(L == R);
   }
-  bool operator!=(const MachineInstrBundleIterator &X) const {
-    return !operator==(X);
+  friend bool operator!=(const nonconst_instr_iterator &L,
+                         const MachineInstrBundleIterator &R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const MachineInstrBundleIterator &L, const_pointer R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const_pointer L, const MachineInstrBundleIterator &R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const MachineInstrBundleIterator &L,
+                         const_reference R) {
+    return !(L == R);
+  }
+  friend bool operator!=(const_reference L,
+                         const MachineInstrBundleIterator &R) {
+    return !(L == R);
   }
 
   // Increment and decrement operators...
   MachineInstrBundleIterator &operator--() {
-    do
-      --MII;
-    while (MII->isBundledWithPred());
+    this->decrement(MII);
     return *this;
   }
   MachineInstrBundleIterator &operator++() {
-    while (MII->isBundledWithSucc())
-      ++MII;
-    ++MII;
+    this->increment(MII);
     return *this;
   }
   MachineInstrBundleIterator operator--(int) {
@@ -85,6 +255,10 @@ public:
   }
 
   instr_iterator getInstrIterator() const { return MII; }
+
+  nonconst_iterator getNonConstIterator() const { return MII.getNonConst(); }
+
+  reverse_iterator getReverse() const { return MII.getReverse(); }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index 224a2a1aa59f..dc72ae1810ee 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -54,6 +54,12 @@ public:
   /// that contains the header.
   MachineBasicBlock *getBottomBlock();
 
+  /// \brief Find the block that contains the loop control variable and the
+  /// loop test. This will return the latch block if it's one of the exiting
+  /// blocks. Otherwise, return the exiting block. Return 'null' when
+  /// multiple exiting blocks are present.
+  MachineBasicBlock *findLoopControlBlock();
+
   void dump() const;
 
 private:
@@ -81,6 +87,14 @@ public:
 
   LoopInfoBase<MachineBasicBlock, MachineLoop>& getBase() { return LI; }
 
+  /// \brief Find the block that either is the loop preheader, or could
+  /// speculatively be used as the preheader. This is e.g. useful to place
+  /// loop setup code. Code that cannot be speculated should not be placed
+  /// here. SpeculativePreheader is controlling whether it also tries to
+  /// find the speculative preheader if the regular preheader is not present.
+  MachineBasicBlock *findLoopPreheader(MachineLoop *L,
+                                       bool SpeculativePreheader = false) const;
+
   /// The iterator interface to the top-level loops in the current function.
   typedef LoopInfoBase<MachineBasicBlock, MachineLoop>::iterator iterator;
   inline iterator begin() const { return LI.begin(); }
@@ -148,29 +162,21 @@ public:
 
 // Allow clients to walk the list of nested loops...
 template <> struct GraphTraits<const MachineLoop*> {
-  typedef const MachineLoop NodeType;
+  typedef const MachineLoop *NodeRef;
   typedef MachineLoopInfo::iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(const MachineLoop *L) { return L; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->end();
-  }
+  static NodeRef getEntryNode(const MachineLoop *L) { return L; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 
 template <> struct GraphTraits<MachineLoop*> {
-  typedef MachineLoop NodeType;
+  typedef MachineLoop *NodeRef;
   typedef MachineLoopInfo::iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(MachineLoop *L) { return L; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->end();
-  }
+  static NodeRef getEntryNode(MachineLoop *L) { return L; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
index 5fa7058733b3..a311124a35ba 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineMemOperand.h
@@ -19,8 +19,10 @@
 #include "llvm/ADT/BitmaskEnum.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Value.h"  // PointerLikeTypeTraits<Value*>
+#include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -101,32 +103,53 @@ public:
     MOVolatile = 1u << 2,
     /// The memory access is non-temporal.
     MONonTemporal = 1u << 3,
-    /// The memory access is invariant.
-    MOInvariant = 1u << 4,
+    /// The memory access is dereferenceable (i.e., doesn't trap).
+    MODereferenceable = 1u << 4,
+    /// The memory access always returns the same value (or traps).
+    MOInvariant = 1u << 5,
 
     // Reserved for use by target-specific passes.
-    MOTargetFlag1 = 1u << 5,
-    MOTargetFlag2 = 1u << 6,
-    MOTargetFlag3 = 1u << 7,
+    MOTargetFlag1 = 1u << 6,
+    MOTargetFlag2 = 1u << 7,
+    MOTargetFlag3 = 1u << 8,
 
     LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ MOTargetFlag3)
   };
 
 private:
+  /// Atomic information for this memory operation.
+  struct MachineAtomicInfo {
+    /// Synchronization scope for this memory operation.
+    unsigned SynchScope : 1;      // enum SynchronizationScope
+    /// Atomic ordering requirements for this memory operation. For cmpxchg
+    /// atomic operations, atomic ordering requirements when store occurs.
+    unsigned Ordering : 4;        // enum AtomicOrdering
+    /// For cmpxchg atomic operations, atomic ordering requirements when store
+    /// does not occur.
+    unsigned FailureOrdering : 4; // enum AtomicOrdering
+  };
+
   MachinePointerInfo PtrInfo;
   uint64_t Size;
   Flags FlagVals;
   uint16_t BaseAlignLog2; // log_2(base_alignment) + 1
+  MachineAtomicInfo AtomicInfo;
   AAMDNodes AAInfo;
   const MDNode *Ranges;
 
 public:
   /// Construct a MachineMemOperand object with the specified PtrInfo, flags,
-  /// size, and base alignment.
+  /// size, and base alignment. For atomic operations the synchronization scope
+  /// and atomic ordering requirements must also be specified. For cmpxchg
+  /// atomic operations the atomic ordering requirements when store does not
+  /// occur must also be specified.
   MachineMemOperand(MachinePointerInfo PtrInfo, Flags flags, uint64_t s,
                     unsigned base_alignment,
                     const AAMDNodes &AAInfo = AAMDNodes(),
-                    const MDNode *Ranges = nullptr);
+                    const MDNode *Ranges = nullptr,
+                    SynchronizationScope SynchScope = CrossThread,
+                    AtomicOrdering Ordering = AtomicOrdering::NotAtomic,
+                    AtomicOrdering FailureOrdering = AtomicOrdering::NotAtomic);
 
   const MachinePointerInfo &getPointerInfo() const { return PtrInfo; }
 
@@ -174,12 +197,35 @@ public:
   /// Return the range tag for the memory reference.
   const MDNode *getRanges() const { return Ranges; }
 
+  /// Return the synchronization scope for this memory operation.
+  SynchronizationScope getSynchScope() const {
+    return static_cast<SynchronizationScope>(AtomicInfo.SynchScope);
+  }
+
+  /// Return the atomic ordering requirements for this memory operation. For
+  /// cmpxchg atomic operations, return the atomic ordering requirements when
+  /// store occurs.
+  AtomicOrdering getOrdering() const {
+    return static_cast<AtomicOrdering>(AtomicInfo.Ordering);
+  }
+
+  /// For cmpxchg atomic operations, return the atomic ordering requirements
+  /// when store does not occur.
+  AtomicOrdering getFailureOrdering() const {
+    return static_cast<AtomicOrdering>(AtomicInfo.FailureOrdering);
+  }
+
   bool isLoad() const { return FlagVals & MOLoad; }
   bool isStore() const { return FlagVals & MOStore; }
   bool isVolatile() const { return FlagVals & MOVolatile; }
   bool isNonTemporal() const { return FlagVals & MONonTemporal; }
+  bool isDereferenceable() const { return FlagVals & MODereferenceable; }
   bool isInvariant() const { return FlagVals & MOInvariant; }
 
+  /// Returns true if this operation has an atomic ordering requirement of
+  /// unordered or higher, false otherwise.
+  bool isAtomic() const { return getOrdering() != AtomicOrdering::NotAtomic; }
+
   /// Returns true if this memory operation doesn't have any ordering
   /// constraints other than normal aliasing. Volatile and atomic memory
   /// operations can't be reordered.
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
index 77571124a1b8..182d23ef3c90 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineModuleInfo.h
@@ -35,62 +35,38 @@
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Dwarf.h"
 
 namespace llvm {
 
 //===----------------------------------------------------------------------===//
 // Forward declarations.
+class BlockAddress;
+class CallInst;
 class Constant;
 class GlobalVariable;
-class BlockAddress;
+class LandingPadInst;
 class MDNode;
 class MMIAddrLabelMap;
 class MachineBasicBlock;
 class MachineFunction;
+class MachineFunctionInitializer;
 class Module;
 class PointerType;
 class StructType;
 
-struct SEHHandler {
-  // Filter or finally function. Null indicates a catch-all.
-  const Function *FilterOrFinally;
-
-  // Address of block to recover at. Null for a finally handler.
-  const BlockAddress *RecoverBA;
-};
-
 //===----------------------------------------------------------------------===//
-/// LandingPadInfo - This structure is used to retain landing pad info for
-/// the current function.
+/// This class can be derived from and used by targets to hold private
+/// target-specific information for each Module.  Objects of type are
+/// accessed/created with MMI::getInfo and destroyed when the MachineModuleInfo
+/// is destroyed.
 ///
-struct LandingPadInfo {
-  MachineBasicBlock *LandingPadBlock;      // Landing pad block.
-  SmallVector<MCSymbol *, 1> BeginLabels;  // Labels prior to invoke.
-  SmallVector<MCSymbol *, 1> EndLabels;    // Labels after invoke.
-  SmallVector<SEHHandler, 1> SEHHandlers;  // SEH handlers active at this lpad.
-  MCSymbol *LandingPadLabel;               // Label at beginning of landing pad.
-  std::vector<int> TypeIds;               // List of type ids (filters negative).
-
-  explicit LandingPadInfo(MachineBasicBlock *MBB)
-      : LandingPadBlock(MBB), LandingPadLabel(nullptr) {}
-};
-
-//===----------------------------------------------------------------------===//
-/// MachineModuleInfoImpl - This class can be derived from and used by targets
-/// to hold private target-specific information for each Module.  Objects of
-/// type are accessed/created with MMI::getInfo and destroyed when the
-/// MachineModuleInfo is destroyed.
-/// 
 class MachineModuleInfoImpl {
 public:
   typedef PointerIntPair<MCSymbol*, 1, bool> StubValueTy;
@@ -104,127 +80,99 @@ protected:
 };
 
 //===----------------------------------------------------------------------===//
-/// MachineModuleInfo - This class contains meta information specific to a
-/// module.  Queries can be made by different debugging and exception handling
-/// schemes and reformated for specific use.
+/// This class contains meta information specific to a module.  Queries can be
+/// made by different debugging and exception handling schemes and reformated
+/// for specific use.
 ///
 class MachineModuleInfo : public ImmutablePass {
-  /// Context - This is the MCContext used for the entire code generator.
+  const TargetMachine &TM;
+
+  /// This is the MCContext used for the entire code generator.
   MCContext Context;
 
-  /// TheModule - This is the LLVM Module being worked on.
+  /// This is the LLVM Module being worked on.
   const Module *TheModule;
 
-  /// ObjFileMMI - This is the object-file-format-specific implementation of
+  /// This is the object-file-format-specific implementation of
   /// MachineModuleInfoImpl, which lets targets accumulate whatever info they
   /// want.
   MachineModuleInfoImpl *ObjFileMMI;
 
-  /// List of moves done by a function's prolog.  Used to construct frame maps
-  /// by debug and exception handling consumers.
-  std::vector<MCCFIInstruction> FrameInstructions;
+  /// \name Exception Handling
+  /// \{
 
-  /// LandingPads - List of LandingPadInfo describing the landing pad
-  /// information in the current function.
-  std::vector<LandingPadInfo> LandingPads;
-
-  /// LPadToCallSiteMap - Map a landing pad's EH symbol to the call site
-  /// indexes.
-  DenseMap<MCSymbol*, SmallVector<unsigned, 4> > LPadToCallSiteMap;
-
-  /// CallSiteMap - Map of invoke call site index values to associated begin
-  /// EH_LABEL for the current function.
-  DenseMap<MCSymbol*, unsigned> CallSiteMap;
+  /// Vector of all personality functions ever seen. Used to emit common EH
+  /// frames.
+  std::vector<const Function *> Personalities;
 
-  /// CurCallSite - The current call site index being processed, if any. 0 if
-  /// none.
+  /// The current call site index being processed, if any. 0 if none.
   unsigned CurCallSite;
 
-  /// TypeInfos - List of C++ TypeInfo used in the current function.
-  std::vector<const GlobalValue *> TypeInfos;
-
-  /// FilterIds - List of typeids encoding filters used in the current function.
-  std::vector<unsigned> FilterIds;
-
-  /// FilterEnds - List of the indices in FilterIds corresponding to filter
-  /// terminators.
-  std::vector<unsigned> FilterEnds;
-
-  /// Personalities - Vector of all personality functions ever seen. Used to
-  /// emit common EH frames.
-  std::vector<const Function *> Personalities;
+  /// \}
 
-  /// AddrLabelSymbols - This map keeps track of which symbol is being used for
-  /// the specified basic block's address of label.
+  /// This map keeps track of which symbol is being used for the specified
+  /// basic block's address of label.
   MMIAddrLabelMap *AddrLabelSymbols;
 
-  bool CallsEHReturn;
-  bool CallsUnwindInit;
-  bool HasEHFunclets;
-
   // TODO: Ideally, what we'd like is to have a switch that allows emitting 
   // synchronous (precise at call-sites only) CFA into .eh_frame. However,
   // even under this switch, we'd like .debug_frame to be precise when using.
   // -g. At this moment, there's no way to specify that some CFI directives
   // go into .eh_frame only, while others go into .debug_frame only.
 
-  /// DbgInfoAvailable - True if debugging information is available
-  /// in this module.
+  /// True if debugging information is available in this module.
   bool DbgInfoAvailable;
 
-  /// UsesVAFloatArgument - True if this module calls VarArg function with
-  /// floating-point arguments.  This is used to emit an undefined reference
-  /// to _fltused on Windows targets.
+  /// True if this module calls VarArg function with floating-point arguments.
+  /// This is used to emit an undefined reference to _fltused on Windows
+  /// targets.
   bool UsesVAFloatArgument;
 
-  /// UsesMorestackAddr - True if the module calls the __morestack function
-  /// indirectly, as is required under the large code model on x86. This is used
-  /// to emit a definition of a symbol, __morestack_addr, containing the
-  /// address. See comments in lib/Target/X86/X86FrameLowering.cpp for more
-  /// details.
+  /// True if the module calls the __morestack function indirectly, as is
+  /// required under the large code model on x86. This is used to emit
+  /// a definition of a symbol, __morestack_addr, containing the address. See
+  /// comments in lib/Target/X86/X86FrameLowering.cpp for more details.
   bool UsesMorestackAddr;
 
-  EHPersonality PersonalityTypeCache;
+  MachineFunctionInitializer *MFInitializer;
+  /// Maps IR Functions to their corresponding MachineFunctions.
+  DenseMap<const Function*, std::unique_ptr<MachineFunction>> MachineFunctions;
+  /// Next unique number available for a MachineFunction.
+  unsigned NextFnNum = 0;
+  const Function *LastRequest = nullptr; ///< Used for shortcut/cache.
+  MachineFunction *LastResult = nullptr; ///< Used for shortcut/cache.
 
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  struct VariableDbgInfo {
-    const DILocalVariable *Var;
-    const DIExpression *Expr;
-    unsigned Slot;
-    const DILocation *Loc;
-
-    VariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
-                    unsigned Slot, const DILocation *Loc)
-        : Var(Var), Expr(Expr), Slot(Slot), Loc(Loc) {}
-  };
-  typedef SmallVector<VariableDbgInfo, 4> VariableDbgInfoMapTy;
-  VariableDbgInfoMapTy VariableDbgInfos;
-
-  MachineModuleInfo();  // DUMMY CONSTRUCTOR, DO NOT CALL.
-  // Real constructor.
-  MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI,
-                    const MCObjectFileInfo *MOFI);
+  explicit MachineModuleInfo(const TargetMachine *TM = nullptr);
   ~MachineModuleInfo() override;
 
   // Initialization and Finalization
   bool doInitialization(Module &) override;
   bool doFinalization(Module &) override;
 
-  /// EndFunction - Discard function meta information.
-  ///
-  void EndFunction();
-
   const MCContext &getContext() const { return Context; }
   MCContext &getContext() { return Context; }
 
   void setModule(const Module *M) { TheModule = M; }
   const Module *getModule() const { return TheModule; }
 
-  /// getInfo - Keep track of various per-function pieces of information for
-  /// backends that would like to do so.
-  ///
+  void setMachineFunctionInitializer(MachineFunctionInitializer *MFInit) {
+    MFInitializer = MFInit;
+  }
+
+  /// Returns the MachineFunction constructed for the IR function \p F.
+  /// Creates a new MachineFunction and runs the MachineFunctionInitializer
+  /// if none exists yet.
+  MachineFunction &getMachineFunction(const Function &F);
+
+  /// Delete the MachineFunction \p MF and reset the link in the IR Function to
+  /// Machine Function map.
+  void deleteMachineFunctionFor(Function &F);
+
+  /// Keep track of various per-function pieces of information for backends
+  /// that would like to do so.
   template<typename Ty>
   Ty &getObjFileInfo() {
     if (ObjFileMMI == nullptr)
@@ -237,20 +185,10 @@ public:
     return const_cast<MachineModuleInfo*>(this)->getObjFileInfo<Ty>();
   }
 
-  /// hasDebugInfo - Returns true if valid debug info is present.
-  ///
+  /// Returns true if valid debug info is present.
   bool hasDebugInfo() const { return DbgInfoAvailable; }
   void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; }
 
-  bool callsEHReturn() const { return CallsEHReturn; }
-  void setCallsEHReturn(bool b) { CallsEHReturn = b; }
-
-  bool callsUnwindInit() const { return CallsUnwindInit; }
-  void setCallsUnwindInit(bool b) { CallsUnwindInit = b; }
-
-  bool hasEHFunclets() const { return HasEHFunclets; }
-  void setHasEHFunclets(bool V) { HasEHFunclets = V; }
-
   bool usesVAFloatArgument() const {
     return UsesVAFloatArgument;
   }
@@ -267,166 +205,52 @@ public:
     UsesMorestackAddr = b;
   }
 
-  /// \brief Returns a reference to a list of cfi instructions in the current
-  /// function's prologue.  Used to construct frame maps for debug and exception
-  /// handling comsumers.
-  const std::vector<MCCFIInstruction> &getFrameInstructions() const {
-    return FrameInstructions;
-  }
-
-  unsigned LLVM_ATTRIBUTE_UNUSED_RESULT
-  addFrameInst(const MCCFIInstruction &Inst) {
-    FrameInstructions.push_back(Inst);
-    return FrameInstructions.size() - 1;
-  }
-
-  /// getAddrLabelSymbol - Return the symbol to be used for the specified basic
-  /// block when its address is taken.  This cannot be its normal LBB label
-  /// because the block may be accessed outside its containing function.
+  /// Return the symbol to be used for the specified basic block when its
+  /// address is taken.  This cannot be its normal LBB label because the block
+  /// may be accessed outside its containing function.
   MCSymbol *getAddrLabelSymbol(const BasicBlock *BB) {
     return getAddrLabelSymbolToEmit(BB).front();
   }
 
-  /// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
-  /// basic block when its address is taken.  If other blocks were RAUW'd to
-  /// this one, we may have to emit them as well, return the whole set.
+  /// Return the symbol to be used for the specified basic block when its
+  /// address is taken.  If other blocks were RAUW'd to this one, we may have
+  /// to emit them as well, return the whole set.
   ArrayRef<MCSymbol *> getAddrLabelSymbolToEmit(const BasicBlock *BB);
 
-  /// takeDeletedSymbolsForFunction - If the specified function has had any
-  /// references to address-taken blocks generated, but the block got deleted,
-  /// return the symbol now so we can emit it.  This prevents emitting a
-  /// reference to a symbol that has no definition.
+  /// If the specified function has had any references to address-taken blocks
+  /// generated, but the block got deleted, return the symbol now so we can
+  /// emit it.  This prevents emitting a reference to a symbol that has no
+  /// definition.
   void takeDeletedSymbolsForFunction(const Function *F,
                                      std::vector<MCSymbol*> &Result);
 
+  /// \name Exception Handling
+  /// \{
 
-  //===- EH ---------------------------------------------------------------===//
-
-  /// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
-  /// specified MachineBasicBlock.
-  LandingPadInfo &getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad);
-
-  /// addInvoke - Provide the begin and end labels of an invoke style call and
-  /// associate it with a try landing pad block.
-  void addInvoke(MachineBasicBlock *LandingPad,
-                 MCSymbol *BeginLabel, MCSymbol *EndLabel);
+  /// Set the call site currently being processed.
+  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
 
-  /// addLandingPad - Add a new panding pad.  Returns the label ID for the
-  /// landing pad entry.
-  MCSymbol *addLandingPad(MachineBasicBlock *LandingPad);
+  /// Get the call site currently being processed, if any.  return zero if
+  /// none.
+  unsigned getCurrentCallSite() { return CurCallSite; }
 
-  /// addPersonality - Provide the personality function for the exception
-  /// information.
+  /// Provide the personality function for the exception information.
   void addPersonality(const Function *Personality);
 
-  /// getPersonalities - Return array of personality functions ever seen.
+  /// Return array of personality functions ever seen.
   const std::vector<const Function *>& getPersonalities() const {
     return Personalities;
   }
+  /// \}
+}; // End class MachineModuleInfo
 
-  /// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
-  ///
-  void addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                        ArrayRef<const GlobalValue *> TyInfo);
-
-  /// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
-  ///
-  void addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                         ArrayRef<const GlobalValue *> TyInfo);
-
-  /// addCleanup - Add a cleanup action for a landing pad.
-  ///
-  void addCleanup(MachineBasicBlock *LandingPad);
-
-  void addSEHCatchHandler(MachineBasicBlock *LandingPad, const Function *Filter,
-                          const BlockAddress *RecoverLabel);
-
-  void addSEHCleanupHandler(MachineBasicBlock *LandingPad,
-                            const Function *Cleanup);
-
-  /// getTypeIDFor - Return the type id for the specified typeinfo.  This is
-  /// function wide.
-  unsigned getTypeIDFor(const GlobalValue *TI);
-
-  /// getFilterIDFor - Return the id of the filter encoded by TyIds.  This is
-  /// function wide.
-  int getFilterIDFor(std::vector<unsigned> &TyIds);
-
-  /// TidyLandingPads - Remap landing pad labels and remove any deleted landing
-  /// pads.
-  void TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap = nullptr);
-
-  /// getLandingPads - Return a reference to the landing pad info for the
-  /// current function.
-  const std::vector<LandingPadInfo> &getLandingPads() const {
-    return LandingPads;
-  }
-
-  /// setCallSiteLandingPad - Map the landing pad's EH symbol to the call
-  /// site indexes.
-  void setCallSiteLandingPad(MCSymbol *Sym, ArrayRef<unsigned> Sites);
-
-  /// getCallSiteLandingPad - Get the call site indexes for a landing pad EH
-  /// symbol.
-  SmallVectorImpl<unsigned> &getCallSiteLandingPad(MCSymbol *Sym) {
-    assert(hasCallSiteLandingPad(Sym) &&
-           "missing call site number for landing pad!");
-    return LPadToCallSiteMap[Sym];
-  }
-
-  /// hasCallSiteLandingPad - Return true if the landing pad Eh symbol has an
-  /// associated call site.
-  bool hasCallSiteLandingPad(MCSymbol *Sym) {
-    return !LPadToCallSiteMap[Sym].empty();
-  }
-
-  /// setCallSiteBeginLabel - Map the begin label for a call site.
-  void setCallSiteBeginLabel(MCSymbol *BeginLabel, unsigned Site) {
-    CallSiteMap[BeginLabel] = Site;
-  }
-
-  /// getCallSiteBeginLabel - Get the call site number for a begin label.
-  unsigned getCallSiteBeginLabel(MCSymbol *BeginLabel) {
-    assert(hasCallSiteBeginLabel(BeginLabel) &&
-           "Missing call site number for EH_LABEL!");
-    return CallSiteMap[BeginLabel];
-  }
-
-  /// hasCallSiteBeginLabel - Return true if the begin label has a call site
-  /// number associated with it.
-  bool hasCallSiteBeginLabel(MCSymbol *BeginLabel) {
-    return CallSiteMap[BeginLabel] != 0;
-  }
-
-  /// setCurrentCallSite - Set the call site currently being processed.
-  void setCurrentCallSite(unsigned Site) { CurCallSite = Site; }
-
-  /// getCurrentCallSite - Get the call site currently being processed, if any.
-  /// return zero if none.
-  unsigned getCurrentCallSite() { return CurCallSite; }
-
-  /// getTypeInfos - Return a reference to the C++ typeinfo for the current
-  /// function.
-  const std::vector<const GlobalValue *> &getTypeInfos() const {
-    return TypeInfos;
-  }
-
-  /// getFilterIds - Return a reference to the typeids encoding filters used in
-  /// the current function.
-  const std::vector<unsigned> &getFilterIds() const {
-    return FilterIds;
-  }
-
-  /// setVariableDbgInfo - Collect information used to emit debugging
-  /// information of a variable.
-  void setVariableDbgInfo(const DILocalVariable *Var, const DIExpression *Expr,
-                          unsigned Slot, const DILocation *Loc) {
-    VariableDbgInfos.emplace_back(Var, Expr, Slot, Loc);
-  }
-
-  VariableDbgInfoMapTy &getVariableDbgInfo() { return VariableDbgInfos; }
+//===- MMI building helpers -----------------------------------------------===//
 
-}; // End class MachineModuleInfo
+/// Determine if any floating-point values are being passed to this variadic
+/// function, and set the MachineModuleInfo's usesVAFloatArgument flag if so.
+/// This flag is used to emit an undefined reference to _fltused on Windows,
+/// which will link in MSVCRT's floating-point support.
+void computeUsesVAFloatArgument(const CallInst &I, MachineModuleInfo &MMI);
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h
index ee0a9cf11e6a..5df99a6c807e 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineOperand.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineOperand.h
@@ -15,6 +15,7 @@
 #define LLVM_CODEGEN_MACHINEOPERAND_H
 
 #include "llvm/Support/DataTypes.h"
+#include "llvm/IR/Intrinsics.h"
 #include <cassert>
 
 namespace llvm {
@@ -29,6 +30,7 @@ class MachineRegisterInfo;
 class MDNode;
 class ModuleSlotTracker;
 class TargetMachine;
+class TargetIntrinsicInfo;
 class TargetRegisterInfo;
 class hash_code;
 class raw_ostream;
@@ -60,7 +62,9 @@ public:
     MO_RegisterLiveOut,   ///< Mask of live-out registers.
     MO_Metadata,          ///< Metadata reference (for debug info)
     MO_MCSymbol,          ///< MCSymbol reference (for debug/eh info)
-    MO_CFIIndex           ///< MCCFIInstruction index.
+    MO_CFIIndex,          ///< MCCFIInstruction index.
+    MO_IntrinsicID,       ///< Intrinsic ID for ISel
+    MO_Predicate,         ///< Generic predicate for ISel
   };
 
 private:
@@ -160,6 +164,8 @@ private:
     const MDNode *MD;        // For MO_Metadata.
     MCSymbol *Sym;           // For MO_MCSymbol.
     unsigned CFIIndex;       // For MO_CFI.
+    Intrinsic::ID IntrinsicID; // For MO_IntrinsicID.
+    unsigned Pred;           // For MO_Predicate
 
     struct {                  // For MO_Register.
       // Register number is in SmallContents.RegNo.
@@ -218,9 +224,12 @@ public:
   ///
   void clearParent() { ParentMI = nullptr; }
 
-  void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr) const;
+  void print(raw_ostream &os, const TargetRegisterInfo *TRI = nullptr,
+             const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
   void print(raw_ostream &os, ModuleSlotTracker &MST,
-             const TargetRegisterInfo *TRI = nullptr) const;
+             const TargetRegisterInfo *TRI = nullptr,
+             const TargetIntrinsicInfo *IntrinsicInfo = nullptr) const;
+  LLVM_DUMP_METHOD void dump() const;
 
   //===--------------------------------------------------------------------===//
   // Accessors that tell you what kind of MachineOperand you're looking at.
@@ -258,7 +267,8 @@ public:
   bool isMetadata() const { return OpKind == MO_Metadata; }
   bool isMCSymbol() const { return OpKind == MO_MCSymbol; }
   bool isCFIIndex() const { return OpKind == MO_CFIIndex; }
-
+  bool isIntrinsicID() const { return OpKind == MO_IntrinsicID; }
+  bool isPredicate() const { return OpKind == MO_Predicate; }
   //===--------------------------------------------------------------------===//
   // Accessors for Register Operands
   //===--------------------------------------------------------------------===//
@@ -453,6 +463,16 @@ public:
     return Contents.CFIIndex;
   }
 
+  Intrinsic::ID getIntrinsicID() const {
+    assert(isIntrinsicID() && "Wrong MachineOperand accessor");
+    return Contents.IntrinsicID;
+  }
+
+  unsigned getPredicate() const {
+    assert(isPredicate() && "Wrong MachineOperand accessor");
+    return Contents.Pred;
+  }
+
   /// Return the offset from the symbol in this operand. This always returns 0
   /// for ExternalSymbol operands.
   int64_t getOffset() const {
@@ -547,8 +567,8 @@ public:
   // Other methods.
   //===--------------------------------------------------------------------===//
 
-  /// isIdenticalTo - Return true if this operand is identical to the specified
-  /// operand. Note: This method ignores isKill and isDead properties.
+  /// Returns true if this operand is identical to the specified operand except
+  /// for liveness related flags (isKill, isUndef and isDead).
   bool isIdenticalTo(const MachineOperand &Other) const;
 
   /// \brief MachineOperand hash_value overload.
@@ -574,6 +594,9 @@ public:
   /// ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
   void ChangeToMCSymbol(MCSymbol *Sym);
 
+  /// Replace this operand with a frame index.
+  void ChangeToFrameIndex(int Idx);
+
   /// ChangeToRegister - Replace this operand with a new register operand of
   /// the specified value.  If an operand is known to be an register already,
   /// the setReg method should be used.
@@ -732,6 +755,18 @@ public:
     return Op;
   }
 
+  static MachineOperand CreateIntrinsicID(Intrinsic::ID ID) {
+    MachineOperand Op(MachineOperand::MO_IntrinsicID);
+    Op.Contents.IntrinsicID = ID;
+    return Op;
+  }
+
+  static MachineOperand CreatePredicate(unsigned Pred) {
+    MachineOperand Op(MachineOperand::MO_Predicate);
+    Op.Contents.Pred = Pred;
+    return Op;
+  }
+
   friend class MachineInstr;
   friend class MachineRegisterInfo;
 private:
diff --git a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
index 6731983c5874..db914b1f8bc7 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachinePassRegistry.h
@@ -37,8 +37,8 @@ class MachinePassRegistryListener {
 public:
   MachinePassRegistryListener() {}
   virtual ~MachinePassRegistryListener() {}
-  virtual void NotifyAdd(const char *N, MachinePassCtor C, const char *D) = 0;
-  virtual void NotifyRemove(const char *N) = 0;
+  virtual void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) = 0;
+  virtual void NotifyRemove(StringRef N) = 0;
 };
 
 
@@ -52,8 +52,8 @@ class MachinePassRegistryNode {
 private:
 
   MachinePassRegistryNode *Next;        // Next function pass in list.
-  const char *Name;                     // Name of function pass.
-  const char *Description;              // Description string.
+  StringRef Name;                       // Name of function pass.
+  StringRef Description;                // Description string.
   MachinePassCtor Ctor;                 // Function pass creator.
 
 public:
@@ -68,8 +68,8 @@ public:
   // Accessors
   MachinePassRegistryNode *getNext()      const { return Next; }
   MachinePassRegistryNode **getNextAddress()    { return &Next; }
-  const char *getName()                   const { return Name; }
-  const char *getDescription()            const { return Description; }
+  StringRef getName()                   const { return Name; }
+  StringRef getDescription()            const { return Description; }
   MachinePassCtor getCtor()               const { return Ctor; }
   void setNext(MachinePassRegistryNode *N)      { Next = N; }
 
@@ -143,10 +143,10 @@ public:
 
   // Implement the MachinePassRegistryListener callbacks.
   //
-  void NotifyAdd(const char *N, MachinePassCtor C, const char *D) override {
+  void NotifyAdd(StringRef N, MachinePassCtor C, StringRef D) override {
     this->addLiteralOption(N, (typename RegistryClass::FunctionPassCtor)C, D);
   }
-  void NotifyRemove(const char *N) override {
+  void NotifyRemove(StringRef N) override {
     this->removeLiteralOption(N);
   }
 };
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h
index df9823f741dc..21f847c7e5ba 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegionInfo.h
@@ -142,10 +142,11 @@ RegionGraphTraits(const MachineRegion, const MachineRegionNode);
 
 template <> struct GraphTraits<MachineRegionInfo*>
   : public GraphTraits<FlatIt<MachineRegionNode*> > {
-  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false,
-                      GraphTraits<FlatIt<NodeType*> > > nodes_iterator;
+  typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
+                      GraphTraits<FlatIt<NodeRef>>>
+      nodes_iterator;
 
-  static NodeType *getEntryNode(MachineRegionInfo *RI) {
+  static NodeRef getEntryNode(MachineRegionInfo *RI) {
     return GraphTraits<FlatIt<MachineRegion*> >::getEntryNode(RI->getTopLevelRegion());
   }
   static nodes_iterator nodes_begin(MachineRegionInfo* RI) {
@@ -158,10 +159,11 @@ template <> struct GraphTraits<MachineRegionInfo*>
 
 template <> struct GraphTraits<MachineRegionInfoPass*>
   : public GraphTraits<MachineRegionInfo *> {
-  typedef df_iterator<NodeType*, SmallPtrSet<NodeType*, 8>, false,
-                      GraphTraits<FlatIt<NodeType*> > > nodes_iterator;
+  typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
+                      GraphTraits<FlatIt<NodeRef>>>
+      nodes_iterator;
 
-  static NodeType *getEntryNode(MachineRegionInfoPass *RI) {
+  static NodeRef getEntryNode(MachineRegionInfoPass *RI) {
     return GraphTraits<MachineRegionInfo*>::getEntryNode(&RI->getRegionInfo());
   }
   static nodes_iterator nodes_begin(MachineRegionInfoPass* RI) {
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
index 07d2d016f274..c599caf7535d 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/iterator_range.h"
 // PointerUnion needs to have access to the full RegisterBank type.
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/LowLevelType.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBundle.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -51,7 +52,7 @@ private:
   Delegate *TheDelegate;
 
   /// True if subregister liveness is tracked.
-  bool TracksSubRegLiveness;
+  const bool TracksSubRegLiveness;
 
   /// VRegInfo - Information we keep for each virtual register.
   ///
@@ -104,17 +105,9 @@ private:
   /// started.
   BitVector ReservedRegs;
 
-  typedef DenseMap<unsigned, unsigned> VRegToSizeMap;
+  typedef DenseMap<unsigned, LLT> VRegToTypeMap;
   /// Map generic virtual registers to their actual size.
-  mutable std::unique_ptr<VRegToSizeMap> VRegToSize;
-
-  /// Accessor for VRegToSize. This accessor should only be used
-  /// by global-isel related work.
-  VRegToSizeMap &getVRegToSize() const {
-    if (!VRegToSize)
-      VRegToSize.reset(new VRegToSizeMap);
-    return *VRegToSize.get();
-  }
+  mutable std::unique_ptr<VRegToTypeMap> VRegToType;
 
   /// Keep track of the physical registers that are live in to the function.
   /// Live in values are typically arguments in registers.  LiveIn values are
@@ -166,7 +159,7 @@ public:
 
   // leaveSSA - Indicates that the machine function is no longer in SSA form.
   void leaveSSA() {
-    MF->getProperties().clear(MachineFunctionProperties::Property::IsSSA);
+    MF->getProperties().reset(MachineFunctionProperties::Property::IsSSA);
   }
 
   /// tracksLiveness - Returns true when tracking register liveness accurately.
@@ -182,7 +175,7 @@ public:
   /// This should be called by late passes that invalidate the liveness
   /// information.
   void invalidateLiveness() {
-    MF->getProperties().clear(
+    MF->getProperties().reset(
         MachineFunctionProperties::Property::TracksLiveness);
   }
 
@@ -199,10 +192,6 @@ public:
     return TracksSubRegLiveness;
   }
 
-  void enableSubRegLiveness(bool Enable = true) {
-    TracksSubRegLiveness = Enable;
-  }
-
   //===--------------------------------------------------------------------===//
   // Register Info
   //===--------------------------------------------------------------------===//
@@ -553,10 +542,9 @@ public:
   void dumpUses(unsigned RegNo) const;
 #endif
 
-  /// isConstantPhysReg - Returns true if PhysReg is unallocatable and constant
-  /// throughout the function.  It is safe to move instructions that read such
-  /// a physreg.
-  bool isConstantPhysReg(unsigned PhysReg, const MachineFunction &MF) const;
+  /// Returns true if PhysReg is unallocatable and constant throughout the
+  /// function. Writing to a constant register has no effect.
+  bool isConstantPhysReg(unsigned PhysReg) const;
 
   /// Get an iterator over the pressure sets affected by the given physical or
   /// virtual register. If RegUnit is physical, it must be a register unit (from
@@ -645,18 +633,35 @@ public:
   ///
   unsigned createVirtualRegister(const TargetRegisterClass *RegClass);
 
-  /// Get the size in bits of \p VReg or 0 if VReg is not a generic
+  /// Accessor for VRegToType. This accessor should only be used
+  /// by global-isel related work.
+  VRegToTypeMap &getVRegToType() const {
+    if (!VRegToType)
+      VRegToType.reset(new VRegToTypeMap);
+    return *VRegToType.get();
+  }
+
+  /// Get the low-level type of \p VReg or LLT{} if VReg is not a generic
   /// (target independent) virtual register.
-  unsigned getSize(unsigned VReg) const;
+  LLT getType(unsigned VReg) const;
+
+  /// Set the low-level type of \p VReg to \p Ty.
+  void setType(unsigned VReg, LLT Ty);
+
+  /// Create and return a new generic virtual register with low-level
+  /// type \p Ty.
+  unsigned createGenericVirtualRegister(LLT Ty);
 
-  /// Set the size in bits of \p VReg to \p Size.
-  /// Although the size should be set at build time, mir infrastructure
-  /// is not yet able to do it.
-  void setSize(unsigned VReg, unsigned Size);
+  /// Remove all types associated to virtual registers (after instruction
+  /// selection and constraining of all generic virtual registers).
+  void clearVirtRegTypes();
 
-  /// Create and return a new generic virtual register with a size of \p Size.
-  /// \pre Size > 0.
-  unsigned createGenericVirtualRegister(unsigned Size);
+  /// Creates a new virtual register that has no register class, register bank
+  /// or size assigned yet. This is only allowed to be used
+  /// temporarily while constructing machine instructions. Most operations are
+  /// undefined on an incomplete register until one of setRegClass(),
+  /// setRegBank() or setSize() has been called on it.
+  unsigned createIncompleteVirtualRegister();
 
   /// getNumVirtRegs - Return the number of virtual registers created.
   ///
@@ -892,10 +897,11 @@ public:
           advance();
         } while (Op && Op->getParent() == P);
       } else if (ByBundle) {
-        MachineInstr &P = getBundleStart(*Op->getParent());
+        MachineBasicBlock::instr_iterator P =
+            getBundleStart(Op->getParent()->getIterator());
         do {
           advance();
-        } while (Op && &getBundleStart(*Op->getParent()) == &P);
+        } while (Op && getBundleStart(Op->getParent()->getIterator()) == P);
       }
 
       return *this;
@@ -994,10 +1000,11 @@ public:
           advance();
         } while (Op && Op->getParent() == P);
       } else if (ByBundle) {
-        MachineInstr &P = getBundleStart(*Op->getParent());
+        MachineBasicBlock::instr_iterator P =
+            getBundleStart(Op->getParent()->getIterator());
         do {
           advance();
-        } while (Op && &getBundleStart(*Op->getParent()) == &P);
+        } while (Op && getBundleStart(Op->getParent()->getIterator()) == P);
       }
 
       return *this;
@@ -1010,7 +1017,7 @@ public:
     MachineInstr &operator*() const {
       assert(Op && "Cannot dereference end iterator!");
       if (ByBundle)
-        return getBundleStart(*Op->getParent());
+        return *getBundleStart(Op->getParent()->getIterator());
       return *Op->getParent();
     }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
index 06e992179031..81b8741fea27 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -42,8 +42,8 @@
 //
 // ScheduleDAGInstrs *<Target>PassConfig::
 // createMachineScheduler(MachineSchedContext *C) {
-//   ScheduleDAGMI *DAG = new ScheduleDAGMI(C, CustomStrategy(C));
-//   DAG->addMutation(new CustomDependencies(DAG->TII, DAG->TRI));
+//   ScheduleDAGMI *DAG = createGenericSchedLive(C);
+//   DAG->addMutation(new CustomDAGMutation(...));
 //   return DAG;
 // }
 //
@@ -75,12 +75,27 @@
 #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H
 #define LLVM_CODEGEN_MACHINESCHEDULER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachinePassRegistry.h"
 #include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
 #include <memory>
+#include <string>
+#include <vector>
 
 namespace llvm {
 
@@ -91,7 +106,6 @@ class LiveIntervals;
 class MachineDominatorTree;
 class MachineLoopInfo;
 class RegisterClassInfo;
-class ScheduleDAGInstrs;
 class SchedDFSResult;
 class ScheduleHazardRecognizer;
 
@@ -126,6 +140,7 @@ public:
     : MachinePassRegistryNode(N, D, (MachinePassCtor)C) {
     Registry.Add(this);
   }
+
   ~MachineSchedRegistry() { Registry.Remove(this); }
 
   // Accessors.
@@ -133,9 +148,11 @@ public:
   MachineSchedRegistry *getNext() const {
     return (MachineSchedRegistry *)MachinePassRegistryNode::getNext();
   }
+
   static MachineSchedRegistry *getList() {
     return (MachineSchedRegistry *)Registry.getList();
   }
+
   static void setListener(MachinePassRegistryListener *L) {
     Registry.setListener(L);
   }
@@ -173,8 +190,9 @@ struct MachineSchedPolicy {
 ///   initPolicy -> shouldTrackPressure -> initialize(DAG) -> registerRoots
 class MachineSchedStrategy {
   virtual void anchor();
+
 public:
-  virtual ~MachineSchedStrategy() {}
+  virtual ~MachineSchedStrategy() = default;
 
   /// Optionally override the per-region scheduling policy.
   virtual void initPolicy(MachineBasicBlock::iterator Begin,
@@ -256,8 +274,7 @@ public:
                 bool RemoveKillFlags)
       : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA),
         LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU),
-        CurrentTop(), CurrentBottom(), NextClusterPred(nullptr),
-        NextClusterSucc(nullptr) {
+        NextClusterPred(nullptr), NextClusterSucc(nullptr) {
 #ifndef NDEBUG
     NumInstrsScheduled = 0;
 #endif
@@ -278,7 +295,8 @@ public:
   ///
   /// ScheduleDAGMI takes ownership of the Mutation object.
   void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
-    Mutations.push_back(std::move(Mutation));
+    if (Mutation)
+      Mutations.push_back(std::move(Mutation));
   }
 
   /// \brief True if an edge can be added from PredSU to SuccSU without creating
@@ -362,6 +380,9 @@ protected:
 
   MachineBasicBlock::iterator LiveRegionEnd;
 
+  /// Maps vregs to the SUnits of their uses in the current scheduling region.
+  VReg2SUnitMultiMap VRegUses;
+
   // Map each SU to its summary of pressure changes. This array is updated for
   // liveness during bottom-up scheduling. Top-down scheduling may proceed but
   // has no affect on the pressure diffs.
@@ -474,6 +495,8 @@ protected:
 
   void updateScheduledPressure(const SUnit *SU,
                                const std::vector<unsigned> &NewMaxPressure);
+
+  void collectVRegUses(SUnit &SU);
 };
 
 //===----------------------------------------------------------------------===//
@@ -518,9 +541,7 @@ public:
 
   ArrayRef<SUnit*> elements() { return Queue; }
 
-  iterator find(SUnit *SU) {
-    return std::find(Queue.begin(), Queue.end(), SU);
-  }
+  iterator find(SUnit *SU) { return llvm::find(Queue, SU); }
 
   void push(SUnit *SU) {
     Queue.push_back(SU);
@@ -591,10 +612,6 @@ private:
   /// instruction.
   bool CheckPending;
 
-  // For heuristics, keep a list of the nodes that immediately depend on the
-  // most recently scheduled node.
-  SmallPtrSet<const SUnit*, 8> NextSUs;
-
   /// Number of cycles it takes to issue the instructions scheduled in this
   /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls.
   /// See getStalls().
@@ -671,10 +688,6 @@ public:
   /// Micro-ops issued in the current cycle
   unsigned getCurrMOps() const { return CurrMOps; }
 
-  /// Return true if the given SU is used by the most recently scheduled
-  /// instruction.
-  bool isNextSU(const SUnit *SU) const { return NextSUs.count(SU); }
-
   // The latency of dependence chains leading into this zone.
   unsigned getDependentLatency() const { return DependentLatency; }
 
@@ -728,10 +741,6 @@ public:
 
   void releaseNode(SUnit *SU, unsigned ReadyCycle);
 
-  void releaseTopNode(SUnit *SU);
-
-  void releaseBottomNode(SUnit *SU);
-
   void bumpCycle(unsigned NextCycle);
 
   void incExecutedResources(unsigned PIdx, unsigned Count);
@@ -861,7 +870,7 @@ protected:
   const TargetRegisterInfo *TRI;
 
   SchedRemainder Rem;
-protected:
+
   GenericSchedulerBase(const MachineSchedContext *C):
     Context(C), SchedModel(nullptr), TRI(nullptr) {}
 
@@ -876,18 +885,6 @@ protected:
 /// GenericScheduler shrinks the unscheduled zone using heuristics to balance
 /// the schedule.
 class GenericScheduler : public GenericSchedulerBase {
-  ScheduleDAGMILive *DAG;
-
-  // State of the top and bottom scheduled instruction boundaries.
-  SchedBoundary Top;
-  SchedBoundary Bot;
-
-  /// Candidate last picked from Top boundary.
-  SchedCandidate TopCand;
-  /// Candidate last picked from Bot boundary.
-  SchedCandidate BotCand;
-
-  MachineSchedPolicy RegionPolicy;
 public:
   GenericScheduler(const MachineSchedContext *C):
     GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"),
@@ -914,18 +911,37 @@ public:
   void schedNode(SUnit *SU, bool IsTopNode) override;
 
   void releaseTopNode(SUnit *SU) override {
-    Top.releaseTopNode(SU);
+    if (SU->isScheduled)
+      return;
+
+    Top.releaseNode(SU, SU->TopReadyCycle);
     TopCand.SU = nullptr;
   }
 
   void releaseBottomNode(SUnit *SU) override {
-    Bot.releaseBottomNode(SU);
+    if (SU->isScheduled)
+      return;
+
+    Bot.releaseNode(SU, SU->BotReadyCycle);
     BotCand.SU = nullptr;
   }
 
   void registerRoots() override;
 
 protected:
+  ScheduleDAGMILive *DAG;
+
+  MachineSchedPolicy RegionPolicy;
+
+  // State of the top and bottom scheduled instruction boundaries.
+  SchedBoundary Top;
+  SchedBoundary Bot;
+
+  /// Candidate last picked from Top boundary.
+  SchedCandidate TopCand;
+  /// Candidate last picked from Bot boundary.
+  SchedCandidate BotCand;
+
   void checkAcyclicLatency();
 
   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
@@ -955,11 +971,12 @@ class PostGenericScheduler : public GenericSchedulerBase {
   ScheduleDAGMI *DAG;
   SchedBoundary Top;
   SmallVector<SUnit*, 8> BotRoots;
+
 public:
   PostGenericScheduler(const MachineSchedContext *C):
     GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
 
-  ~PostGenericScheduler() override {}
+  ~PostGenericScheduler() override = default;
 
   void initPolicy(MachineBasicBlock::iterator Begin,
                   MachineBasicBlock::iterator End,
@@ -983,7 +1000,9 @@ public:
   void schedNode(SUnit *SU, bool IsTopNode) override;
 
   void releaseTopNode(SUnit *SU) override {
-    Top.releaseTopNode(SU);
+    if (SU->isScheduled)
+      return;
+    Top.releaseNode(SU, SU->TopReadyCycle);
   }
 
   // Only called for roots.
@@ -997,6 +1016,29 @@ protected:
   void pickNodeFromQueue(SchedCandidate &Cand);
 };
 
-} // namespace llvm
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+/// Adds default DAG mutations.
+ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
 
-#endif
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
+
+std::unique_ptr<ScheduleDAGMutation>
+createLoadClusterDAGMutation(const TargetInstrInfo *TII,
+                             const TargetRegisterInfo *TRI);
+
+std::unique_ptr<ScheduleDAGMutation>
+createStoreClusterDAGMutation(const TargetInstrInfo *TII,
+                              const TargetRegisterInfo *TRI);
+
+std::unique_ptr<ScheduleDAGMutation>
+createMacroFusionDAGMutation(const TargetInstrInfo *TII);
+
+std::unique_ptr<ScheduleDAGMutation>
+createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
+                               const TargetRegisterInfo *TRI);
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINESCHEDULER_H
diff --git a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
index 0bb53d1a5374..de7064f07c3e 100644
--- a/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
+++ b/contrib/llvm/include/llvm/CodeGen/MachineValueType.h
@@ -178,8 +178,8 @@ class MVT {
 
     SimpleValueType SimpleTy;
 
-    LLVM_CONSTEXPR MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {}
-    LLVM_CONSTEXPR MVT(SimpleValueType SVT) : SimpleTy(SVT) { }
+    constexpr MVT() : SimpleTy(INVALID_SIMPLE_VALUE_TYPE) {}
+    constexpr MVT(SimpleValueType SVT) : SimpleTy(SVT) {}
 
     bool operator>(const MVT& S)  const { return SimpleTy >  S.SimpleTy; }
     bool operator<(const MVT& S)  const { return SimpleTy <  S.SimpleTy; }
diff --git a/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h b/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h
index 8301ca4d8536..83487e6a808a 100644
--- a/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h
+++ b/contrib/llvm/include/llvm/CodeGen/PBQP/Graph.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CODEGEN_PBQP_GRAPH_H
 #define LLVM_CODEGEN_PBQP_GRAPH_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Debug.h"
 #include <algorithm>
 #include <cassert>
@@ -109,13 +110,6 @@ namespace PBQP {
         ThisEdgeAdjIdxs[1] = NodeEntry::getInvalidAdjEdgeIdx();
       }
 
-      void invalidate() {
-        NIds[0] = NIds[1] = Graph::invalidNodeId();
-        ThisEdgeAdjIdxs[0] = ThisEdgeAdjIdxs[1] =
-          NodeEntry::getInvalidAdjEdgeIdx();
-        Costs = nullptr;
-      }
-
       void connectToN(Graph &G, EdgeId ThisEdgeId, unsigned NIdx) {
         assert(ThisEdgeAdjIdxs[NIdx] == NodeEntry::getInvalidAdjEdgeIdx() &&
                "Edge already connected to NIds[NIdx].");
@@ -123,15 +117,6 @@ namespace PBQP {
         ThisEdgeAdjIdxs[NIdx] = N.addAdjEdgeId(ThisEdgeId);
       }
 
-      void connectTo(Graph &G, EdgeId ThisEdgeId, NodeId NId) {
-        if (NId == NIds[0])
-          connectToN(G, ThisEdgeId, 0);
-        else {
-          assert(NId == NIds[1] && "Edge does not connect NId.");
-          connectToN(G, ThisEdgeId, 1);
-        }
-      }
-
       void connect(Graph &G, EdgeId ThisEdgeId) {
         connectToN(G, ThisEdgeId, 0);
         connectToN(G, ThisEdgeId, 1);
@@ -262,9 +247,7 @@ namespace PBQP {
 
     private:
       NodeId findNextInUse(NodeId NId) const {
-        while (NId < EndNId &&
-               std::find(FreeNodeIds.begin(), FreeNodeIds.end(), NId) !=
-               FreeNodeIds.end()) {
+        while (NId < EndNId && is_contained(FreeNodeIds, NId)) {
           ++NId;
         }
         return NId;
@@ -288,9 +271,7 @@ namespace PBQP {
 
     private:
       EdgeId findNextInUse(EdgeId EId) const {
-        while (EId < EndEId &&
-               std::find(FreeEdgeIds.begin(), FreeEdgeIds.end(), EId) !=
-               FreeEdgeIds.end()) {
+        while (EId < EndEId && is_contained(FreeEdgeIds, EId)) {
           ++EId;
         }
         return EId;
diff --git a/contrib/llvm/include/llvm/CodeGen/PBQP/Math.h b/contrib/llvm/include/llvm/CodeGen/PBQP/Math.h
index 2792608e29cc..278787550a43 100644
--- a/contrib/llvm/include/llvm/CodeGen/PBQP/Math.h
+++ b/contrib/llvm/include/llvm/CodeGen/PBQP/Math.h
@@ -27,121 +27,78 @@ public:
 
   /// \brief Construct a PBQP vector of the given size.
   explicit Vector(unsigned Length)
-    : Length(Length), Data(new PBQPNum[Length]) {
-    // llvm::dbgs() << "Constructing PBQP::Vector "
-    //              << this << " (length " << Length << ")\n";
-  }
+    : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {}
 
   /// \brief Construct a PBQP vector with initializer.
   Vector(unsigned Length, PBQPNum InitVal)
-    : Length(Length), Data(new PBQPNum[Length]) {
-    // llvm::dbgs() << "Constructing PBQP::Vector "
-    //              << this << " (length " << Length << ", fill "
-    //              << InitVal << ")\n";
-    std::fill(Data, Data + Length, InitVal);
+    : Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
+    std::fill(Data.get(), Data.get() + Length, InitVal);
   }
 
   /// \brief Copy construct a PBQP vector.
   Vector(const Vector &V)
-    : Length(V.Length), Data(new PBQPNum[Length]) {
-    // llvm::dbgs() << "Copy-constructing PBQP::Vector " << this
-    //              << " from PBQP::Vector " << &V << "\n";
-    std::copy(V.Data, V.Data + Length, Data);
+    : Length(V.Length), Data(llvm::make_unique<PBQPNum []>(Length)) {
+    std::copy(V.Data.get(), V.Data.get() + Length, Data.get());
   }
 
   /// \brief Move construct a PBQP vector.
   Vector(Vector &&V)
-    : Length(V.Length), Data(V.Data) {
-    V.Length = 0;
-    V.Data = nullptr;
-  }
-
-  /// \brief Destroy this vector, return its memory.
-  ~Vector() {
-    // llvm::dbgs() << "Deleting PBQP::Vector " << this << "\n";
-    delete[] Data;
-  }
-
-  /// \brief Copy-assignment operator.
-  Vector& operator=(const Vector &V) {
-    // llvm::dbgs() << "Assigning to PBQP::Vector " << this
-    //              << " from PBQP::Vector " << &V << "\n";
-    delete[] Data;
-    Length = V.Length;
-    Data = new PBQPNum[Length];
-    std::copy(V.Data, V.Data + Length, Data);
-    return *this;
-  }
-
-  /// \brief Move-assignment operator.
-  Vector& operator=(Vector &&V) {
-    delete[] Data;
-    Length = V.Length;
-    Data = V.Data;
+    : Length(V.Length), Data(std::move(V.Data)) {
     V.Length = 0;
-    V.Data = nullptr;
-    return *this;
   }
 
   /// \brief Comparison operator.
   bool operator==(const Vector &V) const {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
+    assert(Length != 0 && Data && "Invalid vector");
     if (Length != V.Length)
       return false;
-    return std::equal(Data, Data + Length, V.Data);
+    return std::equal(Data.get(), Data.get() + Length, V.Data.get());
   }
 
   /// \brief Return the length of the vector
   unsigned getLength() const {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
+    assert(Length != 0 && Data && "Invalid vector");
     return Length;
   }
 
   /// \brief Element access.
   PBQPNum& operator[](unsigned Index) {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
+    assert(Length != 0 && Data && "Invalid vector");
     assert(Index < Length && "Vector element access out of bounds.");
     return Data[Index];
   }
 
   /// \brief Const element access.
   const PBQPNum& operator[](unsigned Index) const {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
+    assert(Length != 0 && Data && "Invalid vector");
     assert(Index < Length && "Vector element access out of bounds.");
     return Data[Index];
   }
 
   /// \brief Add another vector to this one.
   Vector& operator+=(const Vector &V) {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
-    assert(Length == V.Length && "Vector length mismatch.");
-    std::transform(Data, Data + Length, V.Data, Data, std::plus<PBQPNum>());
-    return *this;
-  }
-
-  /// \brief Subtract another vector from this one.
-  Vector& operator-=(const Vector &V) {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
+    assert(Length != 0 && Data && "Invalid vector");
     assert(Length == V.Length && "Vector length mismatch.");
-    std::transform(Data, Data + Length, V.Data, Data, std::minus<PBQPNum>());
+    std::transform(Data.get(), Data.get() + Length, V.Data.get(), Data.get(),
+                   std::plus<PBQPNum>());
     return *this;
   }
 
   /// \brief Returns the index of the minimum value in this vector
   unsigned minIndex() const {
-    assert(Length != 0 && Data != nullptr && "Invalid vector");
-    return std::min_element(Data, Data + Length) - Data;
+    assert(Length != 0 && Data && "Invalid vector");
+    return std::min_element(Data.get(), Data.get() + Length) - Data.get();
   }
 
 private:
   unsigned Length;
-  PBQPNum *Data;
+  std::unique_ptr<PBQPNum []> Data;
 };
 
 /// \brief Return a hash_value for the given vector.
 inline hash_code hash_value(const Vector &V) {
-  unsigned *VBegin = reinterpret_cast<unsigned*>(V.Data);
-  unsigned *VEnd = reinterpret_cast<unsigned*>(V.Data + V.Length);
+  unsigned *VBegin = reinterpret_cast<unsigned*>(V.Data.get());
+  unsigned *VEnd = reinterpret_cast<unsigned*>(V.Data.get() + V.Length);
   return hash_combine(V.Length, hash_combine_range(VBegin, VEnd));
 }
 
@@ -167,89 +124,67 @@ public:
 
   /// \brief Construct a PBQP Matrix with the given dimensions.
   Matrix(unsigned Rows, unsigned Cols) :
-    Rows(Rows), Cols(Cols), Data(new PBQPNum[Rows * Cols]) {
+    Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
   }
 
   /// \brief Construct a PBQP Matrix with the given dimensions and initial
   /// value.
   Matrix(unsigned Rows, unsigned Cols, PBQPNum InitVal)
-    : Rows(Rows), Cols(Cols), Data(new PBQPNum[Rows * Cols]) {
-    std::fill(Data, Data + (Rows * Cols), InitVal);
+    : Rows(Rows), Cols(Cols),
+      Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+    std::fill(Data.get(), Data.get() + (Rows * Cols), InitVal);
   }
 
   /// \brief Copy construct a PBQP matrix.
   Matrix(const Matrix &M)
-    : Rows(M.Rows), Cols(M.Cols), Data(new PBQPNum[Rows * Cols]) {
-    std::copy(M.Data, M.Data + (Rows * Cols), Data);
+    : Rows(M.Rows), Cols(M.Cols),
+      Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
+    std::copy(M.Data.get(), M.Data.get() + (Rows * Cols), Data.get());
   }
 
   /// \brief Move construct a PBQP matrix.
   Matrix(Matrix &&M)
-    : Rows(M.Rows), Cols(M.Cols), Data(M.Data) {
+    : Rows(M.Rows), Cols(M.Cols), Data(std::move(M.Data)) {
     M.Rows = M.Cols = 0;
-    M.Data = nullptr;
-  }
-
-  /// \brief Destroy this matrix, return its memory.
-  ~Matrix() { delete[] Data; }
-
-  /// \brief Copy-assignment operator.
-  Matrix& operator=(const Matrix &M) {
-    delete[] Data;
-    Rows = M.Rows; Cols = M.Cols;
-    Data = new PBQPNum[Rows * Cols];
-    std::copy(M.Data, M.Data + (Rows * Cols), Data);
-    return *this;
-  }
-
-  /// \brief Move-assignment operator.
-  Matrix& operator=(Matrix &&M) {
-    delete[] Data;
-    Rows = M.Rows;
-    Cols = M.Cols;
-    Data = M.Data;
-    M.Rows = M.Cols = 0;
-    M.Data = nullptr;
-    return *this;
   }
 
   /// \brief Comparison operator.
   bool operator==(const Matrix &M) const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     if (Rows != M.Rows || Cols != M.Cols)
       return false;
-    return std::equal(Data, Data + (Rows * Cols), M.Data);
+    return std::equal(Data.get(), Data.get() + (Rows * Cols), M.Data.get());
   }
 
   /// \brief Return the number of rows in this matrix.
   unsigned getRows() const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     return Rows;
   }
 
   /// \brief Return the number of cols in this matrix.
   unsigned getCols() const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     return Cols;
   }
 
   /// \brief Matrix element access.
   PBQPNum* operator[](unsigned R) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     assert(R < Rows && "Row out of bounds.");
-    return Data + (R * Cols);
+    return Data.get() + (R * Cols);
   }
 
   /// \brief Matrix element access.
   const PBQPNum* operator[](unsigned R) const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     assert(R < Rows && "Row out of bounds.");
-    return Data + (R * Cols);
+    return Data.get() + (R * Cols);
   }
 
   /// \brief Returns the given row as a vector.
   Vector getRowAsVector(unsigned R) const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Vector V(Cols);
     for (unsigned C = 0; C < Cols; ++C)
       V[C] = (*this)[R][C];
@@ -258,40 +193,16 @@ public:
 
   /// \brief Returns the given column as a vector.
   Vector getColAsVector(unsigned C) const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Vector V(Rows);
     for (unsigned R = 0; R < Rows; ++R)
       V[R] = (*this)[R][C];
     return V;
   }
 
-  /// \brief Reset the matrix to the given value.
-  Matrix& reset(PBQPNum Val = 0) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    std::fill(Data, Data + (Rows * Cols), Val);
-    return *this;
-  }
-
-  /// \brief Set a single row of this matrix to the given value.
-  Matrix& setRow(unsigned R, PBQPNum Val) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    assert(R < Rows && "Row out of bounds.");
-    std::fill(Data + (R * Cols), Data + ((R + 1) * Cols), Val);
-    return *this;
-  }
-
-  /// \brief Set a single column of this matrix to the given value.
-  Matrix& setCol(unsigned C, PBQPNum Val) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    assert(C < Cols && "Column out of bounds.");
-    for (unsigned R = 0; R < Rows; ++R)
-      (*this)[R][C] = Val;
-    return *this;
-  }
-
   /// \brief Matrix transpose.
   Matrix transpose() const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Matrix M(Cols, Rows);
     for (unsigned r = 0; r < Rows; ++r)
       for (unsigned c = 0; c < Cols; ++c)
@@ -299,87 +210,33 @@ public:
     return M;
   }
 
-  /// \brief Returns the diagonal of the matrix as a vector.
-  ///
-  /// Matrix must be square.
-  Vector diagonalize() const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    assert(Rows == Cols && "Attempt to diagonalize non-square matrix.");
-    Vector V(Rows);
-    for (unsigned r = 0; r < Rows; ++r)
-      V[r] = (*this)[r][r];
-    return V;
-  }
-
   /// \brief Add the given matrix to this one.
   Matrix& operator+=(const Matrix &M) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     assert(Rows == M.Rows && Cols == M.Cols &&
            "Matrix dimensions mismatch.");
-    std::transform(Data, Data + (Rows * Cols), M.Data, Data,
-                   std::plus<PBQPNum>());
+    std::transform(Data.get(), Data.get() + (Rows * Cols), M.Data.get(),
+                   Data.get(), std::plus<PBQPNum>());
     return *this;
   }
 
   Matrix operator+(const Matrix &M) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
+    assert(Rows != 0 && Cols != 0 && Data && "Invalid matrix");
     Matrix Tmp(*this);
     Tmp += M;
     return Tmp;
   }
 
-  /// \brief Returns the minimum of the given row
-  PBQPNum getRowMin(unsigned R) const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    assert(R < Rows && "Row out of bounds");
-    return *std::min_element(Data + (R * Cols), Data + ((R + 1) * Cols));
-  }
-
-  /// \brief Returns the minimum of the given column
-  PBQPNum getColMin(unsigned C) const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    PBQPNum MinElem = (*this)[0][C];
-    for (unsigned R = 1; R < Rows; ++R)
-      if ((*this)[R][C] < MinElem)
-        MinElem = (*this)[R][C];
-    return MinElem;
-  }
-
-  /// \brief Subtracts the given scalar from the elements of the given row.
-  Matrix& subFromRow(unsigned R, PBQPNum Val) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    assert(R < Rows && "Row out of bounds");
-    std::transform(Data + (R * Cols), Data + ((R + 1) * Cols),
-                   Data + (R * Cols),
-                   std::bind2nd(std::minus<PBQPNum>(), Val));
-    return *this;
-  }
-
-  /// \brief Subtracts the given scalar from the elements of the given column.
-  Matrix& subFromCol(unsigned C, PBQPNum Val) {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    for (unsigned R = 0; R < Rows; ++R)
-      (*this)[R][C] -= Val;
-    return *this;
-  }
-
-  /// \brief Returns true if this is a zero matrix.
-  bool isZero() const {
-    assert(Rows != 0 && Cols != 0 && Data != nullptr && "Invalid matrix");
-    return find_if(Data, Data + (Rows * Cols),
-                   std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
-      Data + (Rows * Cols);
-  }
-
 private:
   unsigned Rows, Cols;
-  PBQPNum *Data;
+  std::unique_ptr<PBQPNum []> Data;
 };
 
 /// \brief Return a hash_code for the given matrix.
 inline hash_code hash_value(const Matrix &M) {
-  unsigned *MBegin = reinterpret_cast<unsigned*>(M.Data);
-  unsigned *MEnd = reinterpret_cast<unsigned*>(M.Data + (M.Rows * M.Cols));
+  unsigned *MBegin = reinterpret_cast<unsigned*>(M.Data.get());
+  unsigned *MEnd =
+    reinterpret_cast<unsigned*>(M.Data.get() + (M.Rows * M.Cols));
   return hash_combine(M.Rows, M.Cols, hash_combine_range(MBegin, MEnd));
 }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/PBQP/Solution.h b/contrib/llvm/include/llvm/CodeGen/PBQP/Solution.h
index a3bfaeb7e6c7..bd74805a2397 100644
--- a/contrib/llvm/include/llvm/CodeGen/PBQP/Solution.h
+++ b/contrib/llvm/include/llvm/CodeGen/PBQP/Solution.h
@@ -38,38 +38,6 @@ namespace PBQP {
     Solution()
       : r0Reductions(0), r1Reductions(0), r2Reductions(0), rNReductions(0) {}
 
-    /// \brief Number of nodes for which selections have been made.
-    /// @return Number of nodes for which selections have been made.
-    unsigned numNodes() const { return selections.size(); }
-
-    /// \brief Records a reduction via the R0 rule. Should be called from the
-    ///        solver only.
-    void recordR0() { ++r0Reductions; }
-
-    /// \brief Returns the number of R0 reductions applied to solve the problem.
-    unsigned numR0Reductions() const { return r0Reductions; }
-
-    /// \brief Records a reduction via the R1 rule. Should be called from the
-    ///        solver only.
-    void recordR1() { ++r1Reductions; }
-
-    /// \brief Returns the number of R1 reductions applied to solve the problem.
-    unsigned numR1Reductions() const { return r1Reductions; }
-
-    /// \brief Records a reduction via the R2 rule. Should be called from the
-    ///        solver only.
-    void recordR2() { ++r2Reductions; }
-
-    /// \brief Returns the number of R2 reductions applied to solve the problem.
-    unsigned numR2Reductions() const { return r2Reductions; }
-
-    /// \brief Records a reduction via the RN rule. Should be called from the
-    ///        solver only.
-    void recordRN() { ++ rNReductions; }
-
-    /// \brief Returns the number of RN reductions applied to solve the problem.
-    unsigned numRNReductions() const { return rNReductions; }
-
     /// \brief Set the selection for a given node.
     /// @param nodeId Node id.
     /// @param selection Selection for nodeId.
diff --git a/contrib/llvm/include/llvm/CodeGen/Passes.h b/contrib/llvm/include/llvm/CodeGen/Passes.h
index ae9e5dfe2d65..a9fd301691d6 100644
--- a/contrib/llvm/include/llvm/CodeGen/Passes.h
+++ b/contrib/llvm/include/llvm/CodeGen/Passes.h
@@ -20,8 +20,8 @@
 
 namespace llvm {
 
-class Function;
 class FunctionPass;
+class MachineFunction;
 class MachineFunctionPass;
 class ModulePass;
 class Pass;
@@ -43,6 +43,9 @@ namespace llvm {
   /// the entry block.
   FunctionPass *createUnreachableBlockEliminationPass();
 
+  /// Insert mcount-like function calls.
+  FunctionPass *createCountingFunctionInserterPass();
+
   /// MachineFunctionPrinter pass - This pass prints out the machine function to
   /// the given stream as a debugging tool.
   MachineFunctionPass *
@@ -53,6 +56,12 @@ namespace llvm {
   /// using the MIR serialization format.
   MachineFunctionPass *createPrintMIRPass(raw_ostream &OS);
 
+  /// This pass resets a MachineFunction when it has the FailedISel property
+  /// as if it was just created.
+  /// If EmitFallbackDiag is true, the pass will emit a
+  /// DiagnosticInfoISelFallback for every MachineFunction it resets.
+  MachineFunctionPass *createResetMachineFunctionPass(bool EmitFallbackDiag);
+
   /// createCodeGenPreparePass - Transform the code to expose more pattern
   /// matching during instruction selection.
   FunctionPass *createCodeGenPreparePass(const TargetMachine *TM = nullptr);
@@ -115,6 +124,9 @@ namespace llvm {
   // instruction and update the MachineFunctionInfo with that information.
   extern char &ShrinkWrapID;
 
+  /// Greedy register allocator.
+  extern char &RAGreedyID;
+
   /// VirtRegRewriter pass. Rewrite virtual registers to physical registers as
   /// assigned in VirtRegMap.
   extern char &VirtRegRewriterID;
@@ -172,6 +184,10 @@ namespace llvm {
   /// branches.
   extern char &BranchFolderPassID;
 
+  /// BranchRelaxation - This pass replaces branches that need to jump further
+  /// than is supported by a branch instruction.
+  extern char &BranchRelaxationPassID;
+
   /// MachineFunctionPrinterPass - This pass prints out MachineInstr's.
   extern char &MachineFunctionPrinterPassID;
 
@@ -202,7 +218,8 @@ namespace llvm {
   /// IfConverter - This pass performs machine code if conversion.
   extern char &IfConverterID;
 
-  FunctionPass *createIfConverter(std::function<bool(const Function &)> Ftor);
+  FunctionPass *createIfConverter(
+      std::function<bool(const MachineFunction &)> Ftor);
 
   /// MachineBlockPlacement - This pass places basic blocks based on branch
   /// probabilities.
@@ -313,7 +330,7 @@ namespace llvm {
   extern char &UnpackMachineBundlesID;
 
   FunctionPass *
-  createUnpackMachineBundles(std::function<bool(const Function &)> Ftor);
+  createUnpackMachineBundles(std::function<bool(const MachineFunction &)> Ftor);
 
   /// FinalizeMachineBundles - This pass finalize machine instruction
   /// bundles (created earlier, e.g. during pre-RA scheduling).
@@ -374,6 +391,12 @@ namespace llvm {
   /// and propagates register usage information of callee to caller
   /// if available with PysicalRegisterUsageInfo pass.
   FunctionPass *createRegUsageInfoPropPass();
+
+  /// This pass performs software pipelining on machine instructions.
+  extern char &MachinePipelinerID;
+
+  /// This pass frees the memory occupied by the MachineFunction.
+  FunctionPass *createFreeMachineFunctionPass();
 } // End llvm namespace
 
 /// Target machine pass initializer for passes with dependencies. Use with
diff --git a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
index c3f6fde9fb3f..681ccb4b997c 100644
--- a/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
+++ b/contrib/llvm/include/llvm/CodeGen/PseudoSourceValue.h
@@ -42,7 +42,8 @@ public:
     ConstantPool,
     FixedStack,
     GlobalValueCallEntry,
-    ExternalSymbolCallEntry
+    ExternalSymbolCallEntry,
+    TargetCustom
   };
 
 private:
@@ -67,6 +68,9 @@ public:
   bool isGOT() const { return Kind == GOT; }
   bool isConstantPool() const { return Kind == ConstantPool; }
   bool isJumpTable() const { return Kind == JumpTable; }
+  unsigned getTargetCustom() const {
+    return (Kind >= TargetCustom) ? ((Kind+1) - TargetCustom) : 0;
+  }
 
   /// Test whether the memory pointed to by this PseudoSourceValue has a
   /// constant value.
diff --git a/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h b/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h
index 21952272ffdb..2cad90bbb703 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegAllocPBQP.h
@@ -89,26 +89,7 @@ public:
     std::copy(OptVec.begin(), OptVec.end(), Opts.get());
   }
 
-  AllowedRegVector(const AllowedRegVector &Other)
-    : NumOpts(Other.NumOpts), Opts(new unsigned[NumOpts]) {
-    std::copy(Other.Opts.get(), Other.Opts.get() + NumOpts, Opts.get());
-  }
-
-  AllowedRegVector(AllowedRegVector &&Other)
-    : NumOpts(std::move(Other.NumOpts)), Opts(std::move(Other.Opts)) {}
-
-  AllowedRegVector& operator=(const AllowedRegVector &Other) {
-    NumOpts = Other.NumOpts;
-    Opts.reset(new unsigned[NumOpts]);
-    std::copy(Other.Opts.get(), Other.Opts.get() + NumOpts, Opts.get());
-    return *this;
-  }
-
-  AllowedRegVector& operator=(AllowedRegVector &&Other) {
-    NumOpts = std::move(Other.NumOpts);
-    Opts = std::move(Other.Opts);
-    return *this;
-  }
+  AllowedRegVector(AllowedRegVector &&) = default;
 
   unsigned size() const { return NumOpts; }
   unsigned operator[](size_t I) const { return Opts[I]; }
@@ -163,10 +144,6 @@ public:
     return VRegItr->second;
   }
 
-  void eraseNodeIdForVReg(unsigned VReg) {
-    VRegToNodeId.erase(VReg);
-  }
-
   AllowedRegVecRef getAllowedRegs(AllowedRegVector Allowed) {
     return AllowedRegVecs.getValue(std::move(Allowed));
   }
@@ -199,8 +176,6 @@ public:
 #endif
       {}
 
-  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
-  // MSVC synthesizes move constructors properly.
   NodeMetadata(const NodeMetadata &Other)
     : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
       OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg),
@@ -215,48 +190,9 @@ public:
     }
   }
 
-  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
-  // MSVC synthesizes move constructors properly.
-  NodeMetadata(NodeMetadata &&Other)
-    : RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
-      OptUnsafeEdges(std::move(Other.OptUnsafeEdges)), VReg(Other.VReg),
-      AllowedRegs(std::move(Other.AllowedRegs))
-#ifndef NDEBUG
-      , everConservativelyAllocatable(Other.everConservativelyAllocatable)
-#endif
-  {}
-
-  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
-  // MSVC synthesizes move constructors properly.
-  NodeMetadata& operator=(const NodeMetadata &Other) {
-    RS = Other.RS;
-    NumOpts = Other.NumOpts;
-    DeniedOpts = Other.DeniedOpts;
-    OptUnsafeEdges.reset(new unsigned[NumOpts]);
-    std::copy(Other.OptUnsafeEdges.get(), Other.OptUnsafeEdges.get() + NumOpts,
-              OptUnsafeEdges.get());
-    VReg = Other.VReg;
-    AllowedRegs = Other.AllowedRegs;
-#ifndef NDEBUG
-    everConservativelyAllocatable = Other.everConservativelyAllocatable;
-#endif
-    return *this;
-  }
+  NodeMetadata(NodeMetadata &&Other) = default;
 
-  // FIXME: Re-implementing default behavior to work around MSVC. Remove once
-  // MSVC synthesizes move constructors properly.
-  NodeMetadata& operator=(NodeMetadata &&Other) {
-    RS = Other.RS;
-    NumOpts = Other.NumOpts;
-    DeniedOpts = Other.DeniedOpts;
-    OptUnsafeEdges = std::move(Other.OptUnsafeEdges);
-    VReg = Other.VReg;
-    AllowedRegs = std::move(Other.AllowedRegs);
-#ifndef NDEBUG
-    everConservativelyAllocatable = Other.everConservativelyAllocatable;
-#endif
-    return *this;
-  }
+  NodeMetadata& operator=(NodeMetadata &&Other) = default;
 
   void setVReg(unsigned VReg) { this->VReg = VReg; }
   unsigned getVReg() const { return VReg; }
@@ -284,7 +220,6 @@ public:
 #endif
   }
 
-
   void handleAddEdge(const MatrixMetadata& MD, bool Transpose) {
     DeniedOpts += Transpose ? MD.getWorstRow() : MD.getWorstCol();
     const bool* UnsafeOpts =
@@ -369,11 +304,6 @@ public:
     handleReconnectEdge(EId, G.getEdgeNode2Id(EId));
   }
 
-  void handleRemoveEdge(EdgeId EId) {
-    handleDisconnectEdge(EId, G.getEdgeNode1Id(EId));
-    handleDisconnectEdge(EId, G.getEdgeNode2Id(EId));
-  }
-
   void handleDisconnectEdge(EdgeId EId, NodeId NId) {
     NodeMetadata& NMd = G.getNodeMetadata(NId);
     const MatrixMetadata& MMd = G.getEdgeCosts(EId).getMetadata();
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
index aaddac40ca76..313be355e7d7 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterPressure.h
@@ -278,7 +278,7 @@ public:
     unsigned SparseIndex = getSparseIndexFromReg(Reg);
     RegSet::const_iterator I = Regs.find(SparseIndex);
     if (I == Regs.end())
-      return 0;
+      return LaneBitmask::getNone();
     return I->LaneMask;
   }
 
@@ -288,11 +288,11 @@ public:
     unsigned SparseIndex = getSparseIndexFromReg(Pair.RegUnit);
     auto InsertRes = Regs.insert(IndexMaskPair(SparseIndex, Pair.LaneMask));
     if (!InsertRes.second) {
-      unsigned PrevMask = InsertRes.first->LaneMask;
+      LaneBitmask PrevMask = InsertRes.first->LaneMask;
       InsertRes.first->LaneMask |= Pair.LaneMask;
       return PrevMask;
     }
-    return 0;
+    return LaneBitmask::getNone();
   }
 
   /// Clears the \p Pair.LaneMask lanes of \p Pair.Reg (mark them as dead).
@@ -301,8 +301,8 @@ public:
     unsigned SparseIndex = getSparseIndexFromReg(Pair.RegUnit);
     RegSet::iterator I = Regs.find(SparseIndex);
     if (I == Regs.end())
-      return 0;
-    unsigned PrevMask = I->LaneMask;
+      return LaneBitmask::getNone();
+    LaneBitmask PrevMask = I->LaneMask;
     I->LaneMask &= ~Pair.LaneMask;
     return PrevMask;
   }
@@ -315,7 +315,7 @@ public:
   void appendTo(ContainerT &To) const {
     for (const IndexMaskPair &P : Regs) {
       unsigned Reg = getRegFromSparseIndex(P.Index);
-      if (P.LaneMask != 0)
+      if (P.LaneMask.any())
         To.push_back(RegisterMaskPair(Reg, P.LaneMask));
     }
   }
diff --git a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
index efe1a3c6d0f7..1b9232b90193 100644
--- a/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
+++ b/contrib/llvm/include/llvm/CodeGen/RegisterScavenging.h
@@ -75,6 +75,12 @@ public:
   /// Start tracking liveness from the begin of basic block \p MBB.
   void enterBasicBlock(MachineBasicBlock &MBB);
 
+  /// Start tracking liveness from the end of basic block \p MBB.
+  /// Use backward() to move towards the beginning of the block. This is
+  /// preferred to enterBasicBlock() and forward() because it does not depend
+  /// on the presence of kill flags.
+  void enterBasicBlockEnd(MachineBasicBlock &MBB);
+
   /// Move the internal MBB iterator and update register states.
   void forward();
 
@@ -94,6 +100,17 @@ public:
     while (MBBI != I) unprocess();
   }
 
+  /// Update internal register state and move MBB iterator backwards.
+  /// Contrary to unprocess() this method gives precise results even in the
+  /// absence of kill flags.
+  void backward();
+
+  /// Call backward() as long as the internal iterator does not point to \p I.
+  void backward(MachineBasicBlock::iterator I) {
+    while (MBBI != I)
+      backward();
+  }
+
   /// Move the internal MBB iterator but do not update register states.
   void skipTo(MachineBasicBlock::iterator I) {
     if (I == MachineBasicBlock::iterator(nullptr))
@@ -147,7 +164,7 @@ public:
   }
 
   /// Tell the scavenger a register is used.
-  void setRegUsed(unsigned Reg, LaneBitmask LaneMask = ~0u);
+  void setRegUsed(unsigned Reg, LaneBitmask LaneMask = LaneBitmask::getAll());
 private:
   /// Returns true if a register is reserved. It is never "unused".
   bool isReserved(unsigned Reg) const { return MRI->isReserved(Reg); }
@@ -168,6 +185,9 @@ private:
   /// Add all Reg Units that Reg contains to BV.
   void addRegUnits(BitVector &BV, unsigned Reg);
 
+  /// Remove all Reg Units that \p Reg contains from \p BV.
+  void removeRegUnits(BitVector &BV, unsigned Reg);
+
   /// Return the candidate register that is unused for the longest after
   /// StartMI. UseMI is set to the instruction where the search stopped.
   ///
@@ -177,9 +197,11 @@ private:
                            unsigned InstrLimit,
                            MachineBasicBlock::iterator &UseMI);
 
-  /// Allow resetting register state info for multiple
-  /// passes over/within the same function.
-  void initRegState();
+  /// Initialize RegisterScavenger.
+  void init(MachineBasicBlock &MBB);
+
+  /// Mark live-in registers of basic block as used.
+  void setLiveInsUsed(const MachineBasicBlock &MBB);
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h b/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
index 16d305c7297f..ddfabb0c44d6 100644
--- a/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
+++ b/contrib/llvm/include/llvm/CodeGen/RuntimeLibcalls.h
@@ -333,6 +333,13 @@ namespace RTLIB {
     MEMSET,
     MEMMOVE,
 
+    // ELEMENT-WISE ATOMIC MEMORY
+    MEMCPY_ELEMENT_ATOMIC_1,
+    MEMCPY_ELEMENT_ATOMIC_2,
+    MEMCPY_ELEMENT_ATOMIC_4,
+    MEMCPY_ELEMENT_ATOMIC_8,
+    MEMCPY_ELEMENT_ATOMIC_16,
+
     // EXCEPTION HANDLING
     UNWIND_RESUME,
 
@@ -503,6 +510,10 @@ namespace RTLIB {
   /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or
   /// UNKNOWN_LIBCALL if there is none.
   Libcall getSYNC(unsigned Opc, MVT VT);
+
+  /// getMEMCPY_ELEMENT_ATOMIC - Return MEMCPY_ELEMENT_ATOMIC_* value for the
+  /// given element size or UNKNOW_LIBCALL if there is none.
+  Libcall getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize);
 }
 }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
index 6469cabd3de1..ed4e0bc8a4a1 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAG.h
@@ -679,24 +679,24 @@ namespace llvm {
   };
 
   template <> struct GraphTraits<SUnit*> {
-    typedef SUnit NodeType;
+    typedef SUnit *NodeRef;
     typedef SUnitIterator ChildIteratorType;
-    static inline NodeType *getEntryNode(SUnit *N) { return N; }
-    static inline ChildIteratorType child_begin(NodeType *N) {
+    static NodeRef getEntryNode(SUnit *N) { return N; }
+    static ChildIteratorType child_begin(NodeRef N) {
       return SUnitIterator::begin(N);
     }
-    static inline ChildIteratorType child_end(NodeType *N) {
+    static ChildIteratorType child_end(NodeRef N) {
       return SUnitIterator::end(N);
     }
   };
 
   template <> struct GraphTraits<ScheduleDAG*> : public GraphTraits<SUnit*> {
-    typedef std::vector<SUnit>::iterator nodes_iterator;
+    typedef pointer_iterator<std::vector<SUnit>::iterator> nodes_iterator;
     static nodes_iterator nodes_begin(ScheduleDAG *G) {
-      return G->SUnits.begin();
+      return nodes_iterator(G->SUnits.begin());
     }
     static nodes_iterator nodes_end(ScheduleDAG *G) {
-      return G->SUnits.end();
+      return nodes_iterator(G->SUnits.end());
     }
   };
 
diff --git a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index 12124ecc4b3e..2746765f6e45 100644
--- a/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/contrib/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -100,7 +100,7 @@ namespace llvm {
   class ScheduleDAGInstrs : public ScheduleDAG {
   protected:
     const MachineLoopInfo *MLI;
-    const MachineFrameInfo *MFI;
+    const MachineFrameInfo &MFI;
 
     /// TargetSchedModel provides an interface to the machine model.
     TargetSchedModel SchedModel;
@@ -138,11 +138,6 @@ namespace llvm {
     /// scheduling region is mapped to an SUnit.
     DenseMap<MachineInstr*, SUnit*> MISUnitMap;
 
-    /// After calling BuildSchedGraph, each vreg used in the scheduling region
-    /// is mapped to a set of SUnits. These include all local vreg uses, not
-    /// just the uses for a singly defined vreg.
-    VReg2SUnitMultiMap VRegUses;
-
     /// State internal to DAG building.
     /// -------------------------------
 
@@ -333,8 +328,6 @@ namespace llvm {
     /// Returns a mask for which lanes get read/written by the given (register)
     /// machine operand.
     LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
-
-    void collectVRegUses(SUnit *SU);
   };
 
   /// newSUnit - Creates a new SUnit and return a ptr to it.
@@ -345,7 +338,6 @@ namespace llvm {
     SUnits.emplace_back(MI, (unsigned)SUnits.size());
     assert((Addr == nullptr || Addr == &SUnits[0]) &&
            "SUnits std::vector reallocated on the fly!");
-    SUnits.back().OrigNode = &SUnits.back();
     return &SUnits.back();
   }
 
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
index 29cce873c2f3..7927982e782d 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -81,24 +81,10 @@ template<> struct FoldingSetTrait<SDVTListNode> : DefaultFoldingSetTrait<SDVTLis
   }
 };
 
-template<> struct ilist_traits<SDNode> : public ilist_default_traits<SDNode> {
-private:
-  mutable ilist_half_node<SDNode> Sentinel;
-public:
-  SDNode *createSentinel() const {
-    return static_cast<SDNode*>(&Sentinel);
-  }
-  static void destroySentinel(SDNode *) {}
-
-  SDNode *provideInitialHead() const { return createSentinel(); }
-  SDNode *ensureHead(SDNode*) const { return createSentinel(); }
-  static void noteHead(SDNode*, SDNode*) {}
-
+template <> struct ilist_alloc_traits<SDNode> {
   static void deleteNode(SDNode *) {
     llvm_unreachable("ilist_traits<SDNode> shouldn't see a deleteNode call!");
   }
-private:
-  static void createNode(const SDNode &);
 };
 
 /// Keeps track of dbg_value information through SDISel.  We do
@@ -197,8 +183,8 @@ class SelectionDAG {
   /// The AllocatorType for allocating SDNodes. We use
   /// pool allocation with recycling.
   typedef RecyclingAllocator<BumpPtrAllocator, SDNode, sizeof(LargestSDNode),
-                             AlignOf<MostAlignedSDNode>::Alignment>
-    NodeAllocatorType;
+                             alignof(MostAlignedSDNode)>
+      NodeAllocatorType;
 
   /// Pool allocation for nodes.
   NodeAllocatorType NodeAllocator;
@@ -284,6 +270,22 @@ private:
         SDNodeT(std::forward<ArgTypes>(Args)...);
   }
 
+  /// Build a synthetic SDNodeT with the given args and extract its subclass
+  /// data as an integer (e.g. for use in a folding set).
+  ///
+  /// The args to this function are the same as the args to SDNodeT's
+  /// constructor, except the second arg (assumed to be a const DebugLoc&) is
+  /// omitted.
+  template <typename SDNodeT, typename... ArgTypes>
+  static uint16_t getSyntheticNodeSubclassData(unsigned IROrder,
+                                               ArgTypes &&... Args) {
+    // The compiler can reduce this expression to a constant iff we pass an
+    // empty DebugLoc.  Thankfully, the debug location doesn't have any bearing
+    // on the subclass data.
+    return SDNodeT(IROrder, DebugLoc(), std::forward<ArgTypes>(Args)...)
+        .getRawSubclassData();
+  }
+
   void createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
     assert(!Node->OperandList && "Node already has operands");
     SDUse *Ops = OperandRecycler.allocate(
@@ -663,11 +665,6 @@ public:
     return getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
   }
 
-  /// Return a splat ISD::BUILD_VECTOR node, but with Op's SDLoc.
-  SDValue getSplatBuildVector(EVT VT, SDValue Op) {
-    return getSplatBuildVector(VT, SDLoc(Op), Op);
-  }
-
   /// \brief Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to
   /// the shuffle node in input but with swapped operands.
   ///
@@ -859,10 +856,7 @@ public:
                            SynchronizationScope SynchScope);
   SDValue getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                            SDVTList VTs, SDValue Chain, SDValue Ptr,
-                           SDValue Cmp, SDValue Swp, MachineMemOperand *MMO,
-                           AtomicOrdering SuccessOrdering,
-                           AtomicOrdering FailureOrdering,
-                           SynchronizationScope SynchScope);
+                           SDValue Cmp, SDValue Swp, MachineMemOperand *MMO);
 
   /// Gets a node for an atomic op, produces result (if relevant)
   /// and chain and takes 2 operands.
@@ -871,26 +865,18 @@ public:
                     unsigned Alignment, AtomicOrdering Ordering,
                     SynchronizationScope SynchScope);
   SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain,
-                    SDValue Ptr, SDValue Val, MachineMemOperand *MMO,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope);
+                    SDValue Ptr, SDValue Val, MachineMemOperand *MMO);
 
   /// Gets a node for an atomic op, produces result and chain and
   /// takes 1 operand.
   SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, EVT VT,
-                    SDValue Chain, SDValue Ptr, MachineMemOperand *MMO,
-                    AtomicOrdering Ordering, SynchronizationScope SynchScope);
+                    SDValue Chain, SDValue Ptr, MachineMemOperand *MMO);
 
   /// Gets a node for an atomic op, produces result and chain and takes N
   /// operands.
   SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                     SDVTList VTList, ArrayRef<SDValue> Ops,
-                    MachineMemOperand *MMO, AtomicOrdering SuccessOrdering,
-                    AtomicOrdering FailureOrdering,
-                    SynchronizationScope SynchScope);
-  SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
-                    SDVTList VTList, ArrayRef<SDValue> Ops,
-                    MachineMemOperand *MMO, AtomicOrdering Ordering,
-                    SynchronizationScope SynchScope);
+                    MachineMemOperand *MMO);
 
   /// Creates a MemIntrinsicNode that may produce a
   /// result and takes a list of operands. Opcode may be INTRINSIC_VOID,
@@ -968,14 +954,24 @@ public:
 
   SDValue getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr,
                         SDValue Mask, SDValue Src0, EVT MemVT,
-                        MachineMemOperand *MMO, ISD::LoadExtType);
+                        MachineMemOperand *MMO, ISD::LoadExtType,
+                        bool IsExpanding = false);
   SDValue getMaskedStore(SDValue Chain, const SDLoc &dl, SDValue Val,
                          SDValue Ptr, SDValue Mask, EVT MemVT,
-                         MachineMemOperand *MMO, bool IsTrunc);
+                         MachineMemOperand *MMO, bool IsTruncating = false, 
+                         bool IsCompressing = false);
   SDValue getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
                           ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
   SDValue getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
                            ArrayRef<SDValue> Ops, MachineMemOperand *MMO);
+
+  /// Return (create a new or find existing) a target-specific node.
+  /// TargetMemSDNode should be derived class from MemSDNode.
+  template <class TargetMemSDNode>
+  SDValue getTargetMemSDNode(SDVTList VTs, ArrayRef<SDValue> Ops,
+                             const SDLoc &dl, EVT MemVT,
+                             MachineMemOperand *MMO);
+
   /// Construct a node to track a Value* through the backend.
   SDValue getSrcValue(const Value *v);
 
@@ -1033,16 +1029,10 @@ public:
                        EVT VT2, ArrayRef<SDValue> Ops);
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
                        EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
-  SDNode *SelectNodeTo(SDNode *N, unsigned MachineOpc, EVT VT1,
-                       EVT VT2, EVT VT3, EVT VT4, ArrayRef<SDValue> Ops);
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
                        EVT VT2, SDValue Op1);
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
                        EVT VT2, SDValue Op1, SDValue Op2);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
-                       EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
-  SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, EVT VT1,
-                       EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3);
   SDNode *SelectNodeTo(SDNode *N, unsigned TargetOpc, SDVTList VTs,
                        ArrayRef<SDValue> Ops);
 
@@ -1067,10 +1057,6 @@ public:
   MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT,
                                 ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
-                                EVT VT2);
-  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
-                                EVT VT2, SDValue Op1);
-  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
                                 EVT VT2, SDValue Op1, SDValue Op2);
   MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
                                 EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3);
@@ -1083,9 +1069,6 @@ public:
                                 SDValue Op3);
   MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
                                 EVT VT2, EVT VT3, ArrayRef<SDValue> Ops);
-  MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT1,
-                                EVT VT2, EVT VT3, EVT VT4,
-                                ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl,
                                 ArrayRef<EVT> ResultTys, ArrayRef<SDValue> Ops);
   MachineSDNode *getMachineNode(unsigned Opcode, const SDLoc &dl, SDVTList VTs,
@@ -1205,12 +1188,12 @@ public:
   static const fltSemantics &EVTToAPFloatSemantics(EVT VT) {
     switch (VT.getScalarType().getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unknown FP format");
-    case MVT::f16:     return APFloat::IEEEhalf;
-    case MVT::f32:     return APFloat::IEEEsingle;
-    case MVT::f64:     return APFloat::IEEEdouble;
-    case MVT::f80:     return APFloat::x87DoubleExtended;
-    case MVT::f128:    return APFloat::IEEEquad;
-    case MVT::ppcf128: return APFloat::PPCDoubleDouble;
+    case MVT::f16:     return APFloat::IEEEhalf();
+    case MVT::f32:     return APFloat::IEEEsingle();
+    case MVT::f64:     return APFloat::IEEEdouble();
+    case MVT::f80:     return APFloat::x87DoubleExtended();
+    case MVT::f128:    return APFloat::IEEEquad();
+    case MVT::ppcf128: return APFloat::PPCDoubleDouble();
     }
   }
 
@@ -1281,12 +1264,22 @@ public:
     const;
 
   /// Determine which bits of Op are known to be either zero or one and return
-  /// them in the KnownZero/KnownOne bitsets.  Targets can implement the
-  /// computeKnownBitsForTargetNode method in the TargetLowering class to allow
-  /// target nodes to be understood.
+  /// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
+  /// those that are shared by every vector element.
+  /// Targets can implement the computeKnownBitsForTargetNode method in the
+  /// TargetLowering class to allow target nodes to be understood.
   void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
                         unsigned Depth = 0) const;
 
+  /// Determine which bits of Op are known to be either zero or one and return
+  /// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
+  /// us to only collect the known bits that are shared by the requested vector
+  /// elements.
+  /// Targets can implement the computeKnownBitsForTargetNode method in the
+  /// TargetLowering class to allow target nodes to be understood.
+  void computeKnownBits(SDValue Op, APInt &KnownZero, APInt &KnownOne,
+                        const APInt &DemandedElts, unsigned Depth = 0) const;
+
   /// Test if the given value is known to have exactly one bit set. This differs
   /// from computeKnownBits in that it doesn't necessarily determine which bit
   /// is set.
@@ -1377,6 +1370,16 @@ public:
   /// Test whether the given value is a constant int or similar node.
   SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N);
 
+  /// Test whether the given value is a constant FP or similar node.
+  SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N);
+
+  /// \returns true if \p N is any kind of constant or build_vector of
+  /// constants, int or float. If a vector, it may not necessarily be a splat.
+  inline bool isConstantValueOfAnyType(SDValue N) {
+    return isConstantIntBuildVectorOrConstantInt(N) ||
+           isConstantFPBuildVectorOrConstantFP(N);
+  }
+
 private:
   void InsertNode(SDNode *N);
   bool RemoveNodeFromCSEMaps(SDNode *N);
@@ -1386,7 +1389,7 @@ private:
                                void *&InsertPos);
   SDNode *FindModifiedNodeSlot(SDNode *N, ArrayRef<SDValue> Ops,
                                void *&InsertPos);
-  SDNode *UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &loc);
+  SDNode *UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &loc);
 
   void DeleteNodeNotInCSEMaps(SDNode *N);
   void DeallocateNode(SDNode *N);
@@ -1424,15 +1427,51 @@ private:
 };
 
 template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
-  typedef SelectionDAG::allnodes_iterator nodes_iterator;
+  typedef pointer_iterator<SelectionDAG::allnodes_iterator> nodes_iterator;
   static nodes_iterator nodes_begin(SelectionDAG *G) {
-    return G->allnodes_begin();
+    return nodes_iterator(G->allnodes_begin());
   }
   static nodes_iterator nodes_end(SelectionDAG *G) {
-    return G->allnodes_end();
+    return nodes_iterator(G->allnodes_end());
   }
 };
 
+template <class TargetMemSDNode>
+SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs,
+                                         ArrayRef<SDValue> Ops,
+                                         const SDLoc &dl, EVT MemVT,
+                                         MachineMemOperand *MMO) {
+
+  /// Compose node ID and try to find an existing node.
+  FoldingSetNodeID ID;
+  unsigned Opcode =
+    TargetMemSDNode(dl.getIROrder(), DebugLoc(), VTs, MemVT, MMO).getOpcode();
+  ID.AddInteger(Opcode);
+  ID.AddPointer(VTs.VTs);
+  for (auto& Op : Ops) {
+    ID.AddPointer(Op.getNode());
+    ID.AddInteger(Op.getResNo());
+  }
+  ID.AddInteger(MemVT.getRawBits());
+  ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
+  ID.AddInteger(getSyntheticNodeSubclassData<TargetMemSDNode>(
+    dl.getIROrder(), VTs, MemVT, MMO));
+
+  void *IP = nullptr;
+  if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
+    cast<TargetMemSDNode>(E)->refineAlignment(MMO);
+    return SDValue(E, 0);
+  }
+
+  /// Existing node was not found. Create a new one.
+  auto *N = newSDNode<TargetMemSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
+                                       MemVT, MMO);
+  createOperands(N, Ops);
+  CSEMap.InsertNode(N, IP);
+  InsertNode(N);
+  return SDValue(N, 0);
+}
+
 }  // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index cfcc4117f93b..d4b7170eac3c 100644
--- a/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -19,23 +19,37 @@
 #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H
 #define LLVM_CODEGEN_SELECTIONDAGNODES_H
 
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
 #include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <string>
+#include <tuple>
 
 namespace llvm {
 
@@ -44,12 +58,9 @@ class GlobalValue;
 class MachineBasicBlock;
 class MachineConstantPoolValue;
 class SDNode;
-class HandleSDNode;
 class Value;
 class MCSymbol;
 template <typename T> struct DenseMapInfo;
-template <typename T> struct simplify_type;
-template <typename T> struct ilist_traits;
 
 void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
                     bool force = false);
@@ -64,6 +75,7 @@ struct SDVTList {
 };
 
 namespace ISD {
+
   /// Node predicates
 
   /// If N is a BUILD_VECTOR node whose elements are all the same constant or
@@ -89,7 +101,8 @@ namespace ISD {
   /// Return true if the node has at least one operand and all operands of the
   /// specified node are ISD::UNDEF.
   bool allOperandsUndef(const SDNode *N);
-}  // end llvm:ISD namespace
+
+} // end namespace ISD
 
 //===----------------------------------------------------------------------===//
 /// Unlike LLVM values, Selection DAG nodes may return multiple
@@ -107,6 +120,7 @@ class SDValue {
 
   SDNode *Node;       // The node defining the value we are using.
   unsigned ResNo;     // Which return value of the node we are using.
+
 public:
   SDValue() : Node(nullptr), ResNo(0) {}
   SDValue(SDNode *node, unsigned resno);
@@ -188,29 +202,30 @@ public:
   inline bool hasOneUse() const;
 };
 
-
 template<> struct DenseMapInfo<SDValue> {
   static inline SDValue getEmptyKey() {
     SDValue V;
     V.ResNo = -1U;
     return V;
   }
+
   static inline SDValue getTombstoneKey() {
     SDValue V;
     V.ResNo = -2U;
     return V;
   }
+
   static unsigned getHashValue(const SDValue &Val) {
     return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^
             (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo();
   }
+
   static bool isEqual(const SDValue &LHS, const SDValue &RHS) {
     return LHS == RHS;
   }
 };
 template <> struct isPodLike<SDValue> { static const bool value = true; };
 
-
 /// Allow casting operators to work directly on
 /// SDValues as if they were SDNode*'s.
 template<> struct simplify_type<SDValue> {
@@ -244,7 +259,7 @@ class SDUse {
   void operator=(const SDUse &U) = delete;
 
 public:
-  SDUse() : Val(), User(nullptr), Prev(nullptr), Next(nullptr) {}
+  SDUse() : User(nullptr), Prev(nullptr), Next(nullptr) {}
 
   /// Normally SDUse will just implicitly convert to an SDValue that it holds.
   operator const SDValue&() const { return Val; }
@@ -372,14 +387,6 @@ public:
   bool hasAllowReciprocal() const { return AllowReciprocal; }
   bool hasVectorReduction() const { return VectorReduction; }
 
-  /// Return a raw encoding of the flags.
-  /// This function should only be used to add data to the NodeID value.
-  unsigned getRawFlags() const {
-    return (NoUnsignedWrap << 0) | (NoSignedWrap << 1) | (Exact << 2) |
-    (UnsafeAlgebra << 3) | (NoNaNs << 4) | (NoInfs << 5) |
-    (NoSignedZeros << 6) | (AllowReciprocal << 7);
-  }
-
   /// Clear any flags in this flag set that aren't also set in Flags.
   void intersectWith(const SDNodeFlags *Flags) {
     NoUnsignedWrap &= Flags->NoUnsignedWrap;
@@ -400,15 +407,90 @@ private:
   /// The operation that this node performs.
   int16_t NodeType;
 
-  /// This tracks whether this node has one or more dbg_value
-  /// nodes corresponding to it.
-  uint16_t HasDebugValue : 1;
-
 protected:
-  /// This member is defined by this class, but is not used for
-  /// anything.  Subclasses can use it to hold whatever state they find useful.
-  /// This field is initialized to zero by the ctor.
-  uint16_t SubclassData : 15;
+  // We define a set of mini-helper classes to help us interpret the bits in our
+  // SubclassData.  These are designed to fit within a uint16_t so they pack
+  // with NodeType.
+
+  class SDNodeBitfields {
+    friend class SDNode;
+    friend class MemIntrinsicSDNode;
+    friend class MemSDNode;
+
+    uint16_t HasDebugValue : 1;
+    uint16_t IsMemIntrinsic : 1;
+  };
+  enum { NumSDNodeBits = 2 };
+
+  class ConstantSDNodeBitfields {
+    friend class ConstantSDNode;
+
+    uint16_t : NumSDNodeBits;
+
+    uint16_t IsOpaque : 1;
+  };
+
+  class MemSDNodeBitfields {
+    friend class MemSDNode;
+    friend class MemIntrinsicSDNode;
+    friend class AtomicSDNode;
+
+    uint16_t : NumSDNodeBits;
+
+    uint16_t IsVolatile : 1;
+    uint16_t IsNonTemporal : 1;
+    uint16_t IsDereferenceable : 1;
+    uint16_t IsInvariant : 1;
+  };
+  enum { NumMemSDNodeBits = NumSDNodeBits + 4 };
+
+  class LSBaseSDNodeBitfields {
+    friend class LSBaseSDNode;
+    uint16_t : NumMemSDNodeBits;
+
+    uint16_t AddressingMode : 3; // enum ISD::MemIndexedMode
+  };
+  enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 };
+
+  class LoadSDNodeBitfields {
+    friend class LoadSDNode;
+    friend class MaskedLoadSDNode;
+
+    uint16_t : NumLSBaseSDNodeBits;
+
+    uint16_t ExtTy : 2; // enum ISD::LoadExtType
+    uint16_t IsExpanding : 1;
+  };
+
+  class StoreSDNodeBitfields {
+    friend class StoreSDNode;
+    friend class MaskedStoreSDNode;
+
+    uint16_t : NumLSBaseSDNodeBits;
+
+    uint16_t IsTruncating : 1;
+    uint16_t IsCompressing : 1;
+  };
+
+  union {
+    char RawSDNodeBits[sizeof(uint16_t)];
+    SDNodeBitfields SDNodeBits;
+    ConstantSDNodeBitfields ConstantSDNodeBits;
+    MemSDNodeBitfields MemSDNodeBits;
+    LSBaseSDNodeBitfields LSBaseSDNodeBits;
+    LoadSDNodeBitfields LoadSDNodeBits;
+    StoreSDNodeBitfields StoreSDNodeBits;
+  };
+
+  // RawSDNodeBits must cover the entirety of the union.  This means that all of
+  // the union's members must have size <= RawSDNodeBits.  We write the RHS as
+  // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter.
+  static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide");
+  static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide");
+  static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide");
+  static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide");
+  static_assert(sizeof(LoadSDNodeBitfields) <= 4, "field too wide");
+  static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide");
 
 private:
   /// Unique id per SDNode in the DAG.
@@ -441,7 +523,6 @@ private:
   static const EVT *getValueTypeList(EVT VT);
 
   friend class SelectionDAG;
-  friend struct ilist_traits<SDNode>;
   // TODO: unfriend HandleSDNode once we fix its operand handling.
   friend class HandleSDNode;
 
@@ -481,7 +562,8 @@ public:
   /// proper classof relationship.
   bool isMemIntrinsic() const {
     return (NodeType == ISD::INTRINSIC_W_CHAIN ||
-            NodeType == ISD::INTRINSIC_VOID) && ((SubclassData >> 13) & 1);
+            NodeType == ISD::INTRINSIC_VOID) &&
+           SDNodeBits.IsMemIntrinsic;
   }
 
   /// Test if this node has a post-isel opcode, directly
@@ -496,11 +578,8 @@ public:
     return ~NodeType;
   }
 
-  /// Get this bit.
-  bool getHasDebugValue() const { return HasDebugValue; }
-
-  /// Set this bit.
-  void setHasDebugValue(bool b) { HasDebugValue = b; }
+  bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; }
+  void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; }
 
   /// Return true if there are no uses of this node.
   bool use_empty() const { return UseList == nullptr; }
@@ -538,9 +617,11 @@ public:
   class use_iterator
     : public std::iterator<std::forward_iterator_tag, SDUse, ptrdiff_t> {
     SDUse *Op;
-    explicit use_iterator(SDUse *op) : Op(op) {
-    }
+
     friend class SDNode;
+
+    explicit use_iterator(SDUse *op) : Op(op) {}
+
   public:
     typedef std::iterator<std::forward_iterator_tag,
                           SDUse, ptrdiff_t>::reference reference;
@@ -668,6 +749,7 @@ public:
   }
 
   typedef SDUse* op_iterator;
+
   op_iterator op_begin() const { return OperandList; }
   op_iterator op_end() const { return OperandList+NumOperands; }
   ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); }
@@ -767,7 +849,6 @@ public:
   void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr,
                        unsigned depth = 100) const;
 
-
   /// Dump this node, for debugging.
   void dump() const;
 
@@ -815,10 +896,10 @@ protected:
   /// SDNodes are created without any operands, and never own the operand
   /// storage. To add operands, see SelectionDAG::createOperands.
   SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs)
-      : NodeType(Opc), HasDebugValue(false), SubclassData(0), NodeId(-1),
-        OperandList(nullptr), ValueList(VTs.VTs), UseList(nullptr),
-        NumOperands(0), NumValues(VTs.NumVTs), IROrder(Order),
+      : NodeType(Opc), NodeId(-1), OperandList(nullptr), ValueList(VTs.VTs),
+        UseList(nullptr), NumOperands(0), NumValues(VTs.NumVTs), IROrder(Order),
         debugLoc(std::move(dl)) {
+    memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits));
     assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
     assert(NumValues == VTs.NumVTs &&
            "NumValues wasn't wide enough for its operands!");
@@ -851,16 +932,19 @@ public:
     if (I)
       DL = I->getDebugLoc();
   }
+
   unsigned getIROrder() const { return IROrder; }
   const DebugLoc &getDebugLoc() const { return DL; }
 };
 
-
 // Define inline functions from the SDValue class.
 
 inline SDValue::SDValue(SDNode *node, unsigned resno)
     : Node(node), ResNo(resno) {
-  assert((!Node || ResNo < Node->getNumValues()) &&
+  // Explicitly check for !ResNo to avoid use-after-free, because there are
+  // callers that use SDValue(N, 0) with a deleted N to indicate successful
+  // combines.
+  assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&
          "Invalid result number for the given node!");
   assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.");
 }
@@ -868,48 +952,63 @@ inline SDValue::SDValue(SDNode *node, unsigned resno)
 inline unsigned SDValue::getOpcode() const {
   return Node->getOpcode();
 }
+
 inline EVT SDValue::getValueType() const {
   return Node->getValueType(ResNo);
 }
+
 inline unsigned SDValue::getNumOperands() const {
   return Node->getNumOperands();
 }
+
 inline const SDValue &SDValue::getOperand(unsigned i) const {
   return Node->getOperand(i);
 }
+
 inline uint64_t SDValue::getConstantOperandVal(unsigned i) const {
   return Node->getConstantOperandVal(i);
 }
+
 inline bool SDValue::isTargetOpcode() const {
   return Node->isTargetOpcode();
 }
+
 inline bool SDValue::isTargetMemoryOpcode() const {
   return Node->isTargetMemoryOpcode();
 }
+
 inline bool SDValue::isMachineOpcode() const {
   return Node->isMachineOpcode();
 }
+
 inline unsigned SDValue::getMachineOpcode() const {
   return Node->getMachineOpcode();
 }
+
 inline bool SDValue::isUndef() const {
   return Node->isUndef();
 }
+
 inline bool SDValue::use_empty() const {
   return !Node->hasAnyUseOfValue(ResNo);
 }
+
 inline bool SDValue::hasOneUse() const {
   return Node->hasNUsesOfValue(1, ResNo);
 }
+
 inline const DebugLoc &SDValue::getDebugLoc() const {
   return Node->getDebugLoc();
 }
+
 inline void SDValue::dump() const {
   return Node->dump();
 }
+
 inline void SDValue::dumpr() const {
   return Node->dumpr();
 }
+
 // Define inline functions from the SDUse class.
 
 inline void SDUse::set(const SDValue &V) {
@@ -956,9 +1055,11 @@ static bool isBinOpWithFlags(unsigned Opcode) {
 class BinaryWithFlagsSDNode : public SDNode {
 public:
   SDNodeFlags Flags;
+
   BinaryWithFlagsSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
                         SDVTList VTs, const SDNodeFlags &NodeFlags)
       : SDNode(Opc, Order, dl, VTs), Flags(NodeFlags) {}
+
   static bool classof(const SDNode *N) {
     return isBinOpWithFlags(N->getOpcode());
   }
@@ -970,6 +1071,7 @@ public:
 /// the AllNodes list.
 class HandleSDNode : public SDNode {
   SDUse Op;
+
 public:
   explicit HandleSDNode(SDValue X)
     : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) {
@@ -987,6 +1089,7 @@ public:
     OperandList = &Op;
   }
   ~HandleSDNode();
+
   const SDValue &getValue() const { return Op; }
 };
 
@@ -1032,25 +1135,27 @@ public:
     return MMO->getAlignment();
   }
 
-  /// Return the SubclassData value, which contains an
+  /// Return the SubclassData value, without HasDebugValue. This contains an
   /// encoding of the volatile flag, as well as bits used by subclasses. This
   /// function should only be used to compute a FoldingSetNodeID value.
+  /// The HasDebugValue bit is masked out because CSE map needs to match
+  /// nodes with debug info with nodes without debug info.
   unsigned getRawSubclassData() const {
-    return SubclassData;
-  }
-
-  // We access subclass data here so that we can check consistency
-  // with MachineMemOperand information.
-  bool isVolatile() const { return (SubclassData >> 5) & 1; }
-  bool isNonTemporal() const { return (SubclassData >> 6) & 1; }
-  bool isInvariant() const { return (SubclassData >> 7) & 1; }
-
-  AtomicOrdering getOrdering() const {
-    return AtomicOrdering((SubclassData >> 8) & 15);
-  }
-  SynchronizationScope getSynchScope() const {
-    return SynchronizationScope((SubclassData >> 12) & 1);
-  }
+    uint16_t Data;
+    union {
+      char RawSDNodeBits[sizeof(uint16_t)];
+      SDNodeBitfields SDNodeBits;
+    };
+    memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits));
+    SDNodeBits.HasDebugValue = 0;
+    memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits));
+    return Data;
+  }
+
+  bool isVolatile() const { return MemSDNodeBits.IsVolatile; }
+  bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; }
+  bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; }
+  bool isInvariant() const { return MemSDNodeBits.IsInvariant; }
 
   // Returns the offset from the location of the access.
   int64_t getSrcValueOffset() const { return MMO->getOffset(); }
@@ -1061,6 +1166,14 @@ public:
   /// Returns the Ranges that describes the dereference.
   const MDNode *getRanges() const { return MMO->getRanges(); }
 
+  /// Return the synchronization scope for this memory operation.
+  SynchronizationScope getSynchScope() const { return MMO->getSynchScope(); }
+
+  /// Return the atomic ordering requirements for this memory operation. For
+  /// cmpxchg atomic operations, return the atomic ordering requirements when
+  /// store occurs.
+  AtomicOrdering getOrdering() const { return MMO->getOrdering(); }
+
   /// Return the type of the in-memory value.
   EVT getMemoryVT() const { return MemoryVT; }
 
@@ -1123,57 +1236,27 @@ public:
 
 /// This is an SDNode representing atomic operations.
 class AtomicSDNode : public MemSDNode {
-  /// For cmpxchg instructions, the ordering requirements when a store does not
-  /// occur.
-  AtomicOrdering FailureOrdering;
-
-  void InitAtomic(AtomicOrdering SuccessOrdering,
-                  AtomicOrdering FailureOrdering,
-                  SynchronizationScope SynchScope) {
-    // This must match encodeMemSDNodeFlags() in SelectionDAG.cpp.
-    assert((AtomicOrdering)((unsigned)SuccessOrdering & 15) ==
-               SuccessOrdering &&
-           "Ordering may not require more than 4 bits!");
-    assert((AtomicOrdering)((unsigned)FailureOrdering & 15) ==
-               FailureOrdering &&
-           "Ordering may not require more than 4 bits!");
-    assert((SynchScope & 1) == SynchScope &&
-           "SynchScope may not require more than 1 bit!");
-    SubclassData |= (unsigned)SuccessOrdering << 8;
-    SubclassData |= SynchScope << 12;
-    this->FailureOrdering = FailureOrdering;
-    assert(getSuccessOrdering() == SuccessOrdering &&
-           "Ordering encoding error!");
-    assert(getFailureOrdering() == FailureOrdering &&
-           "Ordering encoding error!");
-    assert(getSynchScope() == SynchScope && "Synch-scope encoding error!");
-  }
-
 public:
   AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL,
-               EVT MemVT, MachineMemOperand *MMO,
-               AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
-               SynchronizationScope SynchScope)
-      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {
-    InitAtomic(SuccessOrdering, FailureOrdering, SynchScope);
-  }
+               EVT MemVT, MachineMemOperand *MMO)
+      : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) {}
 
   const SDValue &getBasePtr() const { return getOperand(1); }
   const SDValue &getVal() const { return getOperand(2); }
 
-  AtomicOrdering getSuccessOrdering() const {
-    return getOrdering();
+  /// Returns true if this SDNode represents cmpxchg atomic operation, false
+  /// otherwise.
+  bool isCompareAndSwap() const {
+    unsigned Op = getOpcode();
+    return Op == ISD::ATOMIC_CMP_SWAP ||
+           Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
   }
 
-  // Not quite enough room in SubclassData for everything, so failure gets its
-  // own field.
+  /// For cmpxchg atomic operations, return the atomic ordering requirements
+  /// when store does not occur.
   AtomicOrdering getFailureOrdering() const {
-    return FailureOrdering;
-  }
-
-  bool isCompareAndSwap() const {
-    unsigned Op = getOpcode();
-    return Op == ISD::ATOMIC_CMP_SWAP || Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS;
+    assert(isCompareAndSwap() && "Must be cmpxchg operation");
+    return MMO->getFailureOrdering();
   }
 
   // Methods to support isa and dyn_cast
@@ -1205,7 +1288,7 @@ public:
   MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
                      SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO)
       : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) {
-    SubclassData |= 1u << 13;
+    SDNodeBits.IsMemIntrinsic = true;
   }
 
   // Methods to support isa and dyn_cast
@@ -1230,8 +1313,10 @@ class ShuffleVectorSDNode : public SDNode {
   // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and
   // is freed when the SelectionDAG object is destroyed.
   const int *Mask;
+
 protected:
   friend class SelectionDAG;
+
   ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M)
       : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {}
 
@@ -1240,12 +1325,14 @@ public:
     EVT VT = getValueType(0);
     return makeArrayRef(Mask, VT.getVectorNumElements());
   }
+
   int getMaskElt(unsigned Idx) const {
     assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!");
     return Mask[Idx];
   }
 
   bool isSplat() const { return isSplatMask(Mask, getValueType(0)); }
+
   int  getSplatIndex() const {
     assert(isSplat() && "Cannot get splat index for non-splat!");
     EVT VT = getValueType(0);
@@ -1255,6 +1342,7 @@ public:
     }
     llvm_unreachable("Splat with all undef indices?");
   }
+
   static bool isSplatMask(const int *Mask, EVT VT);
 
   /// Change values in a shuffle permute mask assuming
@@ -1279,16 +1367,18 @@ public:
 
 class ConstantSDNode : public SDNode {
   const ConstantInt *Value;
+
   friend class SelectionDAG;
+
   ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val,
                  const DebugLoc &DL, EVT VT)
       : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DL,
                getSDVTList(VT)),
         Value(val) {
-    SubclassData |= (uint16_t)isOpaque;
+    ConstantSDNodeBits.IsOpaque = isOpaque;
   }
-public:
 
+public:
   const ConstantInt *getConstantIntValue() const { return Value; }
   const APInt &getAPIntValue() const { return Value->getValue(); }
   uint64_t getZExtValue() const { return Value->getZExtValue(); }
@@ -1298,7 +1388,7 @@ public:
   bool isNullValue() const { return Value->isNullValue(); }
   bool isAllOnesValue() const { return Value->isAllOnesValue(); }
 
-  bool isOpaque() const { return SubclassData & 1; }
+  bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; }
 
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::Constant ||
@@ -1308,7 +1398,9 @@ public:
 
 class ConstantFPSDNode : public SDNode {
   const ConstantFP *Value;
+
   friend class SelectionDAG;
+
   ConstantFPSDNode(bool isTarget, const ConstantFP *val, const DebugLoc &DL,
                    EVT VT)
       : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, DL,
@@ -1316,7 +1408,6 @@ class ConstantFPSDNode : public SDNode {
         Value(val) {}
 
 public:
-
   const APFloat& getValueAPF() const { return Value->getValueAPF(); }
   const ConstantFP *getConstantFPValue() const { return Value; }
 
@@ -1359,16 +1450,26 @@ public:
 
 /// Returns true if \p V is a constant integer zero.
 bool isNullConstant(SDValue V);
+
 /// Returns true if \p V is an FP constant with a value of positive zero.
 bool isNullFPConstant(SDValue V);
+
 /// Returns true if \p V is an integer constant with all bits set.
 bool isAllOnesConstant(SDValue V);
+
 /// Returns true if \p V is a constant integer one.
 bool isOneConstant(SDValue V);
+
 /// Returns true if \p V is a bitwise not operation. Assumes that an all ones
 /// constant is canonicalized to be operand 1.
 bool isBitwiseNot(SDValue V);
 
+/// Returns the SDNode if it is a constant splat BuildVector or constant int.
+ConstantSDNode *isConstOrConstSplat(SDValue V);
+
+/// Returns the SDNode if it is a constant splat BuildVector or constant float.
+ConstantFPSDNode *isConstOrConstSplatFP(SDValue V);
+
 class GlobalAddressSDNode : public SDNode {
   const GlobalValue *TheGlobal;
   int64_t Offset;
@@ -1379,7 +1480,6 @@ class GlobalAddressSDNode : public SDNode {
                       unsigned char TargetFlags);
 
 public:
-
   const GlobalValue *getGlobal() const { return TheGlobal; }
   int64_t getOffset() const { return Offset; }
   unsigned char getTargetFlags() const { return TargetFlags; }
@@ -1396,13 +1496,15 @@ public:
 
 class FrameIndexSDNode : public SDNode {
   int FI;
+
   friend class SelectionDAG;
+
   FrameIndexSDNode(int fi, EVT VT, bool isTarg)
     : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex,
       0, DebugLoc(), getSDVTList(VT)), FI(fi) {
   }
-public:
 
+public:
   int getIndex() const { return FI; }
 
   static bool classof(const SDNode *N) {
@@ -1414,13 +1516,15 @@ public:
 class JumpTableSDNode : public SDNode {
   int JTI;
   unsigned char TargetFlags;
+
   friend class SelectionDAG;
+
   JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned char TF)
     : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable,
       0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) {
   }
-public:
 
+public:
   int getIndex() const { return JTI; }
   unsigned char getTargetFlags() const { return TargetFlags; }
 
@@ -1438,7 +1542,9 @@ class ConstantPoolSDNode : public SDNode {
   int Offset;  // It's a MachineConstantPoolValue if top bit is set.
   unsigned Alignment;  // Minimum alignment requirement of CP (not log2 value).
   unsigned char TargetFlags;
+
   friend class SelectionDAG;
+
   ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o,
                      unsigned Align, unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
@@ -1447,6 +1553,7 @@ class ConstantPoolSDNode : public SDNode {
     assert(Offset >= 0 && "Offset is too large");
     Val.ConstVal = c;
   }
+
   ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v,
                      EVT VT, int o, unsigned Align, unsigned char TF)
     : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0,
@@ -1456,8 +1563,8 @@ class ConstantPoolSDNode : public SDNode {
     Val.MachineCPVal = v;
     Offset |= 1 << (sizeof(unsigned)*CHAR_BIT-1);
   }
-public:
 
+public:
   bool isMachineConstantPoolEntry() const {
     return Offset < 0;
   }
@@ -1494,13 +1601,13 @@ class TargetIndexSDNode : public SDNode {
   unsigned char TargetFlags;
   int Index;
   int64_t Offset;
+
   friend class SelectionDAG;
-public:
 
+public:
   TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned char TF)
     : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)),
       TargetFlags(TF), Index(Idx), Offset(Ofs) {}
-public:
 
   unsigned char getTargetFlags() const { return TargetFlags; }
   int getIndex() const { return Index; }
@@ -1513,15 +1620,17 @@ public:
 
 class BasicBlockSDNode : public SDNode {
   MachineBasicBlock *MBB;
+
   friend class SelectionDAG;
+
   /// Debug info is meaningful and potentially useful here, but we create
   /// blocks out of order when they're jumped to, which makes it a bit
   /// harder.  Let's see if we need it first.
   explicit BasicBlockSDNode(MachineBasicBlock *mbb)
     : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb)
   {}
-public:
 
+public:
   MachineBasicBlock *getBasicBlock() const { return MBB; }
 
   static bool classof(const SDNode *N) {
@@ -1533,6 +1642,7 @@ public:
 class BuildVectorSDNode : public SDNode {
   // These are constructed as SDNodes and then cast to BuildVectorSDNodes.
   explicit BuildVectorSDNode() = delete;
+
 public:
   /// Check if this is a constant splat, and if so, find the
   /// smallest element size that splats the vector.  If MinSplatBits is
@@ -1591,7 +1701,9 @@ public:
 ///
 class SrcValueSDNode : public SDNode {
   const Value *V;
+
   friend class SelectionDAG;
+
   /// Create a SrcValue for a general value.
   explicit SrcValueSDNode(const Value *v)
     : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {}
@@ -1607,12 +1719,14 @@ public:
 
 class MDNodeSDNode : public SDNode {
   const MDNode *MD;
+
   friend class SelectionDAG;
+
   explicit MDNodeSDNode(const MDNode *md)
   : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md)
   {}
-public:
 
+public:
   const MDNode *getMD() const { return MD; }
 
   static bool classof(const SDNode *N) {
@@ -1622,12 +1736,13 @@ public:
 
 class RegisterSDNode : public SDNode {
   unsigned Reg;
+
   friend class SelectionDAG;
+
   RegisterSDNode(unsigned reg, EVT VT)
-    : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {
-  }
-public:
+    : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {}
 
+public:
   unsigned getReg() const { return Reg; }
 
   static bool classof(const SDNode *N) {
@@ -1638,12 +1753,14 @@ public:
 class RegisterMaskSDNode : public SDNode {
   // The memory for RegMask is not owned by the node.
   const uint32_t *RegMask;
+
   friend class SelectionDAG;
+
   RegisterMaskSDNode(const uint32_t *mask)
     : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)),
       RegMask(mask) {}
-public:
 
+public:
   const uint32_t *getRegMask() const { return RegMask; }
 
   static bool classof(const SDNode *N) {
@@ -1655,12 +1772,15 @@ class BlockAddressSDNode : public SDNode {
   const BlockAddress *BA;
   int64_t Offset;
   unsigned char TargetFlags;
+
   friend class SelectionDAG;
+
   BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
                      int64_t o, unsigned char Flags)
     : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
              BA(ba), Offset(o), TargetFlags(Flags) {
   }
+
 public:
   const BlockAddress *getBlockAddress() const { return BA; }
   int64_t getOffset() const { return Offset; }
@@ -1674,7 +1794,9 @@ public:
 
 class EHLabelSDNode : public SDNode {
   MCSymbol *Label;
+
   friend class SelectionDAG;
+
   EHLabelSDNode(unsigned Order, const DebugLoc &dl, MCSymbol *L)
       : SDNode(ISD::EH_LABEL, Order, dl, getSDVTList(MVT::Other)), Label(L) {}
 
@@ -1691,12 +1813,12 @@ class ExternalSymbolSDNode : public SDNode {
   unsigned char TargetFlags;
 
   friend class SelectionDAG;
+
   ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned char TF, EVT VT)
     : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol,
-             0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {
-  }
-public:
+             0, DebugLoc(), getSDVTList(VT)), Symbol(Sym), TargetFlags(TF) {}
 
+public:
   const char *getSymbol() const { return Symbol; }
   unsigned char getTargetFlags() const { return TargetFlags; }
 
@@ -1723,13 +1845,14 @@ public:
 
 class CondCodeSDNode : public SDNode {
   ISD::CondCode Condition;
+
   friend class SelectionDAG;
+
   explicit CondCodeSDNode(ISD::CondCode Cond)
     : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)),
-      Condition(Cond) {
-  }
-public:
+      Condition(Cond) {}
 
+public:
   ISD::CondCode get() const { return Condition; }
 
   static bool classof(const SDNode *N) {
@@ -1741,7 +1864,9 @@ public:
 /// future and most targets don't support it.
 class CvtRndSatSDNode : public SDNode {
   ISD::CvtCode CvtCode;
+
   friend class SelectionDAG;
+
   explicit CvtRndSatSDNode(EVT VT, unsigned Order, const DebugLoc &dl,
                            ISD::CvtCode Code)
       : SDNode(ISD::CONVERT_RNDSAT, Order, dl, getSDVTList(VT)), CvtCode(Code) {
@@ -1759,13 +1884,14 @@ public:
 /// to parameterize some operations.
 class VTSDNode : public SDNode {
   EVT ValueType;
+
   friend class SelectionDAG;
+
   explicit VTSDNode(EVT VT)
     : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)),
-      ValueType(VT) {
-  }
-public:
+      ValueType(VT) {}
 
+public:
   EVT getVT() const { return ValueType; }
 
   static bool classof(const SDNode *N) {
@@ -1780,8 +1906,8 @@ public:
                SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT,
                MachineMemOperand *MMO)
       : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) {
-    SubclassData |= AM << 2;
-    assert(getAddressingMode() == AM && "MemIndexedMode encoding error!");
+    LSBaseSDNodeBits.AddressingMode = AM;
+    assert(getAddressingMode() == AM && "Value truncated");
   }
 
   const SDValue &getOffset() const {
@@ -1791,7 +1917,7 @@ public:
   /// Return the addressing mode for this load or store:
   /// unindexed, pre-inc, pre-dec, post-inc, or post-dec.
   ISD::MemIndexedMode getAddressingMode() const {
-    return ISD::MemIndexedMode((SubclassData >> 2) & 7);
+    return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode);
   }
 
   /// Return true if this is a pre/post inc/dec load/store.
@@ -1809,21 +1935,21 @@ public:
 /// This class is used to represent ISD::LOAD nodes.
 class LoadSDNode : public LSBaseSDNode {
   friend class SelectionDAG;
+
   LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
              ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT,
              MachineMemOperand *MMO)
       : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) {
-    SubclassData |= (unsigned short)ETy;
-    assert(getExtensionType() == ETy && "LoadExtType encoding error!");
+    LoadSDNodeBits.ExtTy = ETy;
     assert(readMem() && "Load MachineMemOperand is not a load!");
     assert(!writeMem() && "Load MachineMemOperand is a store!");
   }
-public:
 
+public:
   /// Return whether this is a plain node,
   /// or one of the varieties of value-extending loads.
   ISD::LoadExtType getExtensionType() const {
-    return ISD::LoadExtType(SubclassData & 3);
+    return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
   }
 
   const SDValue &getBasePtr() const { return getOperand(1); }
@@ -1837,21 +1963,21 @@ public:
 /// This class is used to represent ISD::STORE nodes.
 class StoreSDNode : public LSBaseSDNode {
   friend class SelectionDAG;
+
   StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
               ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT,
               MachineMemOperand *MMO)
       : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) {
-    SubclassData |= (unsigned short)isTrunc;
-    assert(isTruncatingStore() == isTrunc && "isTrunc encoding error!");
+    StoreSDNodeBits.IsTruncating = isTrunc;
     assert(!readMem() && "Store MachineMemOperand is a load!");
     assert(writeMem() && "Store MachineMemOperand is not a store!");
   }
-public:
 
+public:
   /// Return true if the op does a truncation before store.
   /// For integers this is the same as doing a TRUNCATE and storing the result.
   /// For floats, it is the same as doing an FP_ROUND and storing the result.
-  bool isTruncatingStore() const { return SubclassData & 1; }
+  bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
 
   const SDValue &getValue() const { return getOperand(1); }
   const SDValue &getBasePtr() const { return getOperand(2); }
@@ -1866,6 +1992,7 @@ public:
 class MaskedLoadStoreSDNode : public MemSDNode {
 public:
   friend class SelectionDAG;
+
   MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order,
                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
                         MachineMemOperand *MMO)
@@ -1889,34 +2016,48 @@ class MaskedLoadSDNode : public MaskedLoadStoreSDNode {
 public:
   friend class SelectionDAG;
   MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
-                   ISD::LoadExtType ETy, EVT MemVT, MachineMemOperand *MMO)
+                   ISD::LoadExtType ETy, bool IsExpanding, EVT MemVT,
+                   MachineMemOperand *MMO)
       : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, MemVT, MMO) {
-    SubclassData |= (unsigned short)ETy;
+    LoadSDNodeBits.ExtTy = ETy;
+    LoadSDNodeBits.IsExpanding = IsExpanding;
   }
 
   ISD::LoadExtType getExtensionType() const {
-    return ISD::LoadExtType(SubclassData & 3);
+    return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy);
   }
+
   const SDValue &getSrc0() const { return getOperand(3); }
   static bool classof(const SDNode *N) {
     return N->getOpcode() == ISD::MLOAD;
   }
+
+  bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; }
 };
 
 /// This class is used to represent an MSTORE node
 class MaskedStoreSDNode : public MaskedLoadStoreSDNode {
-
 public:
   friend class SelectionDAG;
+
   MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
-                    bool isTrunc, EVT MemVT, MachineMemOperand *MMO)
+                    bool isTrunc, bool isCompressing, EVT MemVT, 
+                    MachineMemOperand *MMO)
       : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, MemVT, MMO) {
-    SubclassData |= (unsigned short)isTrunc;
+    StoreSDNodeBits.IsTruncating = isTrunc;
+    StoreSDNodeBits.IsCompressing = isCompressing;
   }
+
   /// Return true if the op does a truncation before store.
   /// For integers this is the same as doing a TRUNCATE and storing the result.
   /// For floats, it is the same as doing an FP_ROUND and storing the result.
-  bool isTruncatingStore() const { return SubclassData & 1; }
+  bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; }
+
+  /// Returns true if the op does a compression to the vector before storing.
+  /// The node contiguously stores the active elements (integers or floats) 
+  /// in src (those with their respective bit set in writemask k) to unaligned 
+  /// memory at base_addr.
+  bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; }
 
   const SDValue &getValue() const { return getOperand(3); }
 
@@ -1931,6 +2072,7 @@ public:
 class MaskedGatherScatterSDNode : public MemSDNode {
 public:
   friend class SelectionDAG;
+
   MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order,
                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
                             MachineMemOperand *MMO)
@@ -1956,6 +2098,7 @@ public:
 class MaskedGatherSDNode : public MaskedGatherScatterSDNode {
 public:
   friend class SelectionDAG;
+
   MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
                      EVT MemVT, MachineMemOperand *MMO)
       : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO) {}
@@ -1968,9 +2111,9 @@ public:
 /// This class is used to represent an MSCATTER node
 ///
 class MaskedScatterSDNode : public MaskedGatherScatterSDNode {
-
 public:
   friend class SelectionDAG;
+
   MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
                       EVT MemVT, MachineMemOperand *MMO)
       : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO) {}
@@ -1989,6 +2132,7 @@ public:
 
 private:
   friend class SelectionDAG;
+
   MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs)
       : SDNode(Opc, Order, DL, VTs), MemRefs(nullptr), MemRefsEnd(nullptr) {}
 
@@ -2021,6 +2165,7 @@ class SDNodeIterator : public std::iterator<std::forward_iterator_tag,
   unsigned Operand;
 
   SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {}
+
 public:
   bool operator==(const SDNodeIterator& x) const {
     return Operand == x.Operand;
@@ -2055,13 +2200,14 @@ public:
 };
 
 template <> struct GraphTraits<SDNode*> {
-  typedef SDNode NodeType;
+  typedef SDNode *NodeRef;
   typedef SDNodeIterator ChildIteratorType;
-  static inline NodeType *getEntryNode(SDNode *N) { return N; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
+
+  static NodeRef getEntryNode(SDNode *N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) {
     return SDNodeIterator::begin(N);
   }
-  static inline ChildIteratorType child_end(NodeType *N) {
+  static ChildIteratorType child_end(NodeRef N) {
     return SDNodeIterator::end(N);
   }
 };
@@ -2078,6 +2224,7 @@ typedef AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
 typedef GlobalAddressSDNode MostAlignedSDNode;
 
 namespace ISD {
+
   /// Returns true if the specified node is a non-extending and unindexed load.
   inline bool isNormalLoad(const SDNode *N) {
     const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N);
@@ -2138,8 +2285,9 @@ namespace ISD {
     return isa<StoreSDNode>(N) &&
       cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED;
   }
-}
 
-} // end llvm namespace
+} // end namespace ISD
+
+} // end namespace llvm
 
-#endif
+#endif // LLVM_CODEGEN_SELECTIONDAGNODES_H
diff --git a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
index afb0288b024f..2ac3b3d86cb6 100644
--- a/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/contrib/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -69,23 +69,8 @@ namespace llvm {
   };
 
   template <>
-  struct ilist_traits<IndexListEntry> : public ilist_default_traits<IndexListEntry> {
-  private:
-    mutable ilist_half_node<IndexListEntry> Sentinel;
-  public:
-    IndexListEntry *createSentinel() const {
-      return static_cast<IndexListEntry*>(&Sentinel);
-    }
-    void destroySentinel(IndexListEntry *) const {}
-
-    IndexListEntry *provideInitialHead() const { return createSentinel(); }
-    IndexListEntry *ensureHead(IndexListEntry*) const { return createSentinel(); }
-    static void noteHead(IndexListEntry*, IndexListEntry*) {}
-    void deleteNode(IndexListEntry *N) {}
-
-  private:
-    void createNode(const IndexListEntry &);
-  };
+  struct ilist_alloc_traits<IndexListEntry>
+      : public ilist_noalloc_traits<IndexListEntry> {};
 
   /// SlotIndex - An opaque wrapper around machine indexes.
   class SlotIndex {
@@ -361,9 +346,8 @@ namespace llvm {
 
     IndexListEntry* createEntry(MachineInstr *mi, unsigned index) {
       IndexListEntry *entry =
-        static_cast<IndexListEntry*>(
-          ileAllocator.Allocate(sizeof(IndexListEntry),
-          alignOf<IndexListEntry>()));
+          static_cast<IndexListEntry *>(ileAllocator.Allocate(
+              sizeof(IndexListEntry), alignof(IndexListEntry)));
 
       new (entry) IndexListEntry(mi, index);
 
@@ -421,7 +405,8 @@ namespace llvm {
     /// Returns the base index for the given instruction.
     SlotIndex getInstructionIndex(const MachineInstr &MI) const {
       // Instructions inside a bundle have the same number as the bundle itself.
-      Mi2IndexMap::const_iterator itr = mi2iMap.find(&getBundleStart(MI));
+      const MachineInstr &BundleStart = *getBundleStart(MI.getIterator());
+      Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleStart);
       assert(itr != mi2iMap.end() && "Instruction not found in maps.");
       return itr->second;
     }
@@ -632,11 +617,12 @@ namespace llvm {
     }
 
     /// ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in
-    /// maps used by register allocator.
-    void replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
+    /// maps used by register allocator. \returns the index where the new
+    /// instruction was inserted.
+    SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI) {
       Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
       if (mi2iItr == mi2iMap.end())
-        return;
+        return SlotIndex();
       SlotIndex replaceBaseIndex = mi2iItr->second;
       IndexListEntry *miEntry(replaceBaseIndex.listEntry());
       assert(miEntry->getInstr() == &MI &&
@@ -644,6 +630,7 @@ namespace llvm {
       miEntry->setInstr(&NewMI);
       mi2iMap.erase(mi2iItr);
       mi2iMap.insert(std::make_pair(&NewMI, replaceBaseIndex));
+      return replaceBaseIndex;
     }
 
     /// Add the given MachineBasicBlock into the maps.
diff --git a/contrib/llvm/include/llvm/CodeGen/StackMaps.h b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
index 918848f6b2a1..7b55b7968635 100644
--- a/contrib/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/contrib/llvm/include/llvm/CodeGen/StackMaps.h
@@ -22,6 +22,37 @@ class AsmPrinter;
 class MCExpr;
 class MCStreamer;
 
+/// \brief MI-level stackmap operands.
+///
+/// MI stackmap operations take the form:
+/// <id>, <numBytes>, live args...
+class StackMapOpers {
+public:
+  /// Enumerate the meta operands.
+  enum { IDPos, NBytesPos };
+
+private:
+  const MachineInstr* MI;
+
+public:
+  explicit StackMapOpers(const MachineInstr *MI);
+
+  /// Return the ID for the given stackmap
+  uint64_t getID() const { return MI->getOperand(IDPos).getImm(); }
+
+  /// Return the number of patchable bytes the given stackmap should emit.
+  uint32_t getNumPatchBytes() const {
+    return MI->getOperand(NBytesPos).getImm();
+  }
+
+  /// Get the operand index of the variable list of non-argument operands.
+  /// These hold the "live state".
+  unsigned getVarIdx() const {
+    // Skip ID, nShadowBytes.
+    return 2;
+  }
+};
+
 /// \brief MI-level patchpoint operands.
 ///
 /// MI patchpoint operations take the form:
@@ -44,36 +75,57 @@ public:
 private:
   const MachineInstr *MI;
   bool HasDef;
-  bool IsAnyReg;
+
+  unsigned getMetaIdx(unsigned Pos = 0) const {
+    assert(Pos < MetaEnd && "Meta operand index out of range.");
+    return (HasDef ? 1 : 0) + Pos;
+  }
+
+  const MachineOperand &getMetaOper(unsigned Pos) const {
+    return MI->getOperand(getMetaIdx(Pos));
+  }
 
 public:
   explicit PatchPointOpers(const MachineInstr *MI);
 
-  bool isAnyReg() const { return IsAnyReg; }
+  bool isAnyReg() const { return (getCallingConv() == CallingConv::AnyReg); }
   bool hasDef() const { return HasDef; }
 
-  unsigned getMetaIdx(unsigned Pos = 0) const {
-    assert(Pos < MetaEnd && "Meta operand index out of range.");
-    return (HasDef ? 1 : 0) + Pos;
+  /// Return the ID for the given patchpoint.
+  uint64_t getID() const { return getMetaOper(IDPos).getImm(); }
+
+  /// Return the number of patchable bytes the given patchpoint should emit.
+  uint32_t getNumPatchBytes() const {
+    return getMetaOper(NBytesPos).getImm();
   }
 
-  const MachineOperand &getMetaOper(unsigned Pos) {
-    return MI->getOperand(getMetaIdx(Pos));
+  /// Returns the target of the underlying call.
+  const MachineOperand &getCallTarget() const {
+    return getMetaOper(TargetPos);
+  }
+
+  /// Returns the calling convention
+  CallingConv::ID getCallingConv() const {
+    return getMetaOper(CCPos).getImm();
   }
 
   unsigned getArgIdx() const { return getMetaIdx() + MetaEnd; }
 
+  /// Return the number of call arguments
+  uint32_t getNumCallArgs() const {
+    return MI->getOperand(getMetaIdx(NArgPos)).getImm();
+  }
+
   /// Get the operand index of the variable list of non-argument operands.
   /// These hold the "live state".
   unsigned getVarIdx() const {
-    return getMetaIdx() + MetaEnd +
-           MI->getOperand(getMetaIdx(NArgPos)).getImm();
+    return getMetaIdx() + MetaEnd + getNumCallArgs();
   }
 
   /// Get the index at which stack map locations will be recorded.
   /// Arguments are not recorded unless the anyregcc convention is used.
   unsigned getStackMapStartIdx() const {
-    if (IsAnyReg)
+    if (isAnyReg())
       return getArgIdx();
     return getVarIdx();
   }
@@ -167,7 +219,7 @@ public:
   void reset() {
     CSInfos.clear();
     ConstPool.clear();
-    FnStackSize.clear();
+    FnInfos.clear();
   }
 
   /// \brief Generate a stackmap record for a stackmap instruction.
@@ -191,7 +243,13 @@ private:
   typedef SmallVector<Location, 8> LocationVec;
   typedef SmallVector<LiveOutReg, 8> LiveOutVec;
   typedef MapVector<uint64_t, uint64_t> ConstantPool;
-  typedef MapVector<const MCSymbol *, uint64_t> FnStackSizeMap;
+
+  struct FunctionInfo {
+    uint64_t StackSize;
+    uint64_t RecordCount;
+    FunctionInfo() : StackSize(0), RecordCount(1) {}
+    explicit FunctionInfo(uint64_t StackSize) : StackSize(StackSize), RecordCount(1) {}
+  };
 
   struct CallsiteInfo {
     const MCExpr *CSOffsetExpr;
@@ -205,12 +263,13 @@ private:
           LiveOuts(std::move(LiveOuts)) {}
   };
 
+  typedef MapVector<const MCSymbol *, FunctionInfo> FnInfoMap;
   typedef std::vector<CallsiteInfo> CallsiteInfoList;
 
   AsmPrinter &AP;
   CallsiteInfoList CSInfos;
   ConstantPool ConstPool;
-  FnStackSizeMap FnStackSize;
+  FnInfoMap FnInfos;
 
   MachineInstr::const_mop_iterator
   parseOperand(MachineInstr::const_mop_iterator MOI,
diff --git a/contrib/llvm/include/llvm/CodeGen/TailDuplicator.h b/contrib/llvm/include/llvm/CodeGen/TailDuplicator.h
index 8e65199418a6..b667245fd3c0 100644
--- a/contrib/llvm/include/llvm/CodeGen/TailDuplicator.h
+++ b/contrib/llvm/include/llvm/CodeGen/TailDuplicator.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_CODEGEN_TAILDUPLICATOR_H
 #define LLVM_CODEGEN_TAILDUPLICATOR_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -26,6 +27,8 @@
 
 namespace llvm {
 
+extern cl::opt<unsigned> TailDupIndirectBranchSize;
+
 /// Utility class to perform tail duplication.
 class TailDuplicator {
   const TargetInstrInfo *TII;
@@ -33,7 +36,10 @@ class TailDuplicator {
   const MachineBranchProbabilityInfo *MBPI;
   const MachineModuleInfo *MMI;
   MachineRegisterInfo *MRI;
+  MachineFunction *MF;
   bool PreRegAlloc;
+  bool LayoutMode;
+  unsigned TailDupSize;
 
   // A list of virtual registers for which to update SSA form.
   SmallVector<unsigned, 16> SSAUpdateVRs;
@@ -45,14 +51,33 @@ class TailDuplicator {
   DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
 
 public:
-  void initMF(MachineFunction &MF, const MachineModuleInfo *MMI,
-              const MachineBranchProbabilityInfo *MBPI);
-  bool tailDuplicateBlocks(MachineFunction &MF);
+  /// Prepare to run on a specific machine function.
+  /// @param MF - Function that will be processed
+  /// @param MBPI - Branch Probability Info. Used to propagate correct
+  ///     probabilities when modifying the CFG.
+  /// @param LayoutMode - When true, don't use the existing layout to make
+  ///     decisions.
+  /// @param TailDupSize - Maxmimum size of blocks to tail-duplicate. Zero
+  ///     default implies using the command line value TailDupSize.
+  void initMF(MachineFunction &MF,
+              const MachineBranchProbabilityInfo *MBPI,
+              bool LayoutMode, unsigned TailDupSize = 0);
+  bool tailDuplicateBlocks();
   static bool isSimpleBB(MachineBasicBlock *TailBB);
-  bool shouldTailDuplicate(const MachineFunction &MF, bool IsSimple,
-                           MachineBasicBlock &TailBB);
-  bool tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
-                              MachineBasicBlock *MBB);
+  bool shouldTailDuplicate(bool IsSimple, MachineBasicBlock &TailBB);
+  /// Returns true if TailBB can successfully be duplicated into PredBB
+  bool canTailDuplicate(MachineBasicBlock *TailBB, MachineBasicBlock *PredBB);
+  /// Tail duplicate a single basic block into its predecessors, and then clean
+  /// up.
+  /// If \p DuplicatePreds is not null, it will be updated to contain the list
+  /// of predecessors that received a copy of \p MBB.
+  /// If \p RemovalCallback is non-null. It will be called before MBB is
+  /// deleted.
+  bool tailDuplicateAndUpdate(
+      bool IsSimple, MachineBasicBlock *MBB,
+      MachineBasicBlock *ForcedLayoutPred,
+      SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds = nullptr,
+      llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
 
 private:
   typedef TargetInstrInfo::RegSubRegPair RegSubRegPair;
@@ -65,7 +90,7 @@ private:
                   SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
                   const DenseSet<unsigned> &UsedByPhi, bool Remove);
   void duplicateInstruction(MachineInstr *MI, MachineBasicBlock *TailBB,
-                            MachineBasicBlock *PredBB, MachineFunction &MF,
+                            MachineBasicBlock *PredBB,
                             DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
                             const DenseSet<unsigned> &UsedByPhi);
   void updateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
@@ -76,15 +101,18 @@ private:
                          SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                          const DenseSet<unsigned> &RegsUsedByPhi,
                          SmallVectorImpl<MachineInstr *> &Copies);
-  bool tailDuplicate(MachineFunction &MF, bool IsSimple,
+  bool tailDuplicate(bool IsSimple,
                      MachineBasicBlock *TailBB,
+                     MachineBasicBlock *ForcedLayoutPred,
                      SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                      SmallVectorImpl<MachineInstr *> &Copies);
   void appendCopies(MachineBasicBlock *MBB,
                  SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos,
                  SmallVectorImpl<MachineInstr *> &Copies);
 
-  void removeDeadBlock(MachineBasicBlock *MBB);
+  void removeDeadBlock(
+      MachineBasicBlock *MBB,
+      llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback = nullptr);
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index c856435f5ddc..cc71fa3918a1 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -33,7 +33,7 @@ namespace llvm {
 
 class TargetLoweringObjectFileELF : public TargetLoweringObjectFile {
   bool UseInitArray;
-  mutable unsigned NextUniqueID = 0;
+  mutable unsigned NextUniqueID = 1;  // ID 0 is reserved for execute-only sections
 
 protected:
   MCSymbolRefExpr::VariantKind PLTRelativeVariantKind =
@@ -53,15 +53,13 @@ public:
                                    const Constant *C,
                                    unsigned &Align) const override;
 
-  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+  MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 
-  MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
+  MCSection *getSectionForJumpTable(const Function &F,
                                     const TargetMachine &TM) const override;
 
   bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
@@ -69,14 +67,14 @@ public:
 
   /// Return an MCExpr to use for a reference to the specified type info global
   /// variable from exception handling information.
-  const MCExpr *
-  getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
-                          Mangler &Mang, const TargetMachine &TM,
-                          MachineModuleInfo *MMI,
-                          MCStreamer &Streamer) const override;
+  const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                        unsigned Encoding,
+                                        const TargetMachine &TM,
+                                        MachineModuleInfo *MMI,
+                                        MCStreamer &Streamer) const override;
 
   // The symbol that gets passed to .cfi_personality.
-  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
+  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
                                     const TargetMachine &TM,
                                     MachineModuleInfo *MMI) const override;
 
@@ -87,7 +85,7 @@ public:
                                   const MCSymbol *KeySym) const override;
 
   const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
-                                       const GlobalValue *RHS, Mangler &Mang,
+                                       const GlobalValue *RHS,
                                        const TargetMachine &TM) const override;
 };
 
@@ -98,17 +96,17 @@ public:
   ~TargetLoweringObjectFileMachO() override {}
   TargetLoweringObjectFileMachO();
 
+  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+
   /// Emit the module flags that specify the garbage collection information.
   void emitModuleFlags(MCStreamer &Streamer,
                        ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                       Mangler &Mang, const TargetMachine &TM) const override;
+                       const TargetMachine &TM) const override;
 
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 
-  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+  MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
   MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
@@ -116,14 +114,14 @@ public:
                                    unsigned &Align) const override;
 
   /// The mach-o version of this method defaults to returning a stub reference.
-  const MCExpr *
-  getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
-                          Mangler &Mang, const TargetMachine &TM,
-                          MachineModuleInfo *MMI,
-                          MCStreamer &Streamer) const override;
+  const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                        unsigned Encoding,
+                                        const TargetMachine &TM,
+                                        MachineModuleInfo *MMI,
+                                        MCStreamer &Streamer) const override;
 
   // The symbol that gets passed to .cfi_personality.
-  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
+  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
                                     const TargetMachine &TM,
                                     MachineModuleInfo *MMI) const override;
 
@@ -134,7 +132,7 @@ public:
                                           MCStreamer &Streamer) const override;
 
   void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
-                         Mangler &Mang, const TargetMachine &TM) const override;
+                         const TargetMachine &TM) const override;
 };
 
 
@@ -145,33 +143,32 @@ class TargetLoweringObjectFileCOFF : public TargetLoweringObjectFile {
 public:
   ~TargetLoweringObjectFileCOFF() override {}
 
-  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+  void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+  MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 
   void getNameWithPrefix(SmallVectorImpl<char> &OutName, const GlobalValue *GV,
-                         Mangler &Mang, const TargetMachine &TM) const override;
+                         const TargetMachine &TM) const override;
 
-  MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
+  MCSection *getSectionForJumpTable(const Function &F,
                                     const TargetMachine &TM) const override;
 
   /// Emit Obj-C garbage collection and linker options. Only linker option
   /// emission is implemented for COFF.
   void emitModuleFlags(MCStreamer &Streamer,
                        ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                       Mangler &Mang, const TargetMachine &TM) const override;
+                       const TargetMachine &TM) const override;
 
   MCSection *getStaticCtorSection(unsigned Priority,
                                   const MCSymbol *KeySym) const override;
   MCSection *getStaticDtorSection(unsigned Priority,
                                   const MCSymbol *KeySym) const override;
 
-  void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV,
-                                const Mangler &Mang) const override;
+  void emitLinkerFlagsForGlobal(raw_ostream &OS,
+                                const GlobalValue *GV) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
index 9309655a972e..2287f9aca4bf 100644
--- a/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/contrib/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -94,8 +94,10 @@ public:
 
 private:
   PassManagerBase *PM;
-  AnalysisID StartBefore, StartAfter;
-  AnalysisID StopAfter;
+  AnalysisID StartBefore = nullptr;
+  AnalysisID StartAfter = nullptr;
+  AnalysisID StopBefore = nullptr;
+  AnalysisID StopAfter = nullptr;
   bool Started;
   bool Stopped;
   bool AddingMachinePasses;
@@ -143,11 +145,14 @@ public:
   /// This function expects that at least one of the StartAfter or the
   /// StartBefore pass IDs is null.
   void setStartStopPasses(AnalysisID StartBefore, AnalysisID StartAfter,
-                          AnalysisID StopAfter) {
-    if (StartAfter)
-      assert(!StartBefore && "Start after and start before passes are given");
+                          AnalysisID StopBefore, AnalysisID StopAfter) {
+    assert(!(StartBefore && StartAfter) &&
+           "Start after and start before passes are given");
+    assert(!(StopBefore && StopAfter) &&
+           "Stop after and stop before passed are given");
     this->StartBefore = StartBefore;
     this->StartAfter = StartAfter;
+    this->StopBefore = StopBefore;
     this->StopAfter = StopAfter;
     Started = (StartAfter == nullptr) && (StartBefore == nullptr);
   }
@@ -218,6 +223,14 @@ public:
   virtual bool addIRTranslator() { return true; }
 
   /// This method may be implemented by targets that want to run passes
+  /// immediately before legalization.
+  virtual void addPreLegalizeMachineIR() {}
+
+  /// This method should install a legalize pass, which converts the instruction
+  /// sequence into one that can be selected by the target.
+  virtual bool addLegalizeMachineIR() { return true; }
+
+  /// This method may be implemented by targets that want to run passes
   /// immediately before the register bank selection.
   virtual void addPreRegBankSelect() {}
 
@@ -226,6 +239,16 @@ public:
   /// class or register banks.
   virtual bool addRegBankSelect() { return true; }
 
+  /// This method may be implemented by targets that want to run passes
+  /// immediately before the (global) instruction selection.
+  virtual void addPreGlobalInstructionSelect() {}
+
+  /// This method should install a (global) instruction selector pass, which
+  /// converts possibly generic instructions to fully target-specific
+  /// instructions, thereby constraining all generic virtual registers to
+  /// register classes.
+  virtual bool addGlobalInstructionSelect() { return true; }
+
   /// Add the complete, standard set of LLVM CodeGen passes.
   /// Fully developed targets will not generally override this.
   virtual void addMachinePasses();
@@ -263,6 +286,16 @@ public:
   /// verification is enabled.
   void addVerifyPass(const std::string &Banner);
 
+  /// Check whether or not GlobalISel should abort on error.
+  /// When this is disable, GlobalISel will fall back on SDISel instead of
+  /// erroring out.
+  virtual bool isGlobalISelAbortEnabled() const;
+
+  /// Check whether or not a diagnostic should be emitted when GlobalISel
+  /// uses the fallback path. In other words, it will emit a diagnostic
+  /// when GlobalISel failed and isGlobalISelAbortEnabled is false.
+  virtual bool reportDiagnosticWhenGlobalISelFallback() const;
+
 protected:
   // Helper to verify the analysis is really immutable.
   void setOpt(bool &Opt, bool Val);
diff --git a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
index 524a90803df8..2699fa28f0f1 100644
--- a/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
+++ b/contrib/llvm/include/llvm/CodeGen/ValueTypes.h
@@ -34,9 +34,9 @@ namespace llvm {
     Type *LLVMTy;
 
   public:
-    LLVM_CONSTEXPR EVT() : V(MVT::INVALID_SIMPLE_VALUE_TYPE), LLVMTy(nullptr) {}
-    LLVM_CONSTEXPR EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(nullptr) {}
-    LLVM_CONSTEXPR EVT(MVT S) : V(S), LLVMTy(nullptr) {}
+    constexpr EVT() : V(MVT::INVALID_SIMPLE_VALUE_TYPE), LLVMTy(nullptr) {}
+    constexpr EVT(MVT::SimpleValueType SVT) : V(SVT), LLVMTy(nullptr) {}
+    constexpr EVT(MVT S) : V(S), LLVMTy(nullptr) {}
 
     bool operator==(EVT VT) const {
       return !(*this != VT);
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/ByteStream.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/ByteStream.h
deleted file mode 100644
index f398c93723e7..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/ByteStream.h
+++ /dev/null
@@ -1,58 +0,0 @@
-//===- ByteStream.h - Reads stream data from a byte sequence ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_BYTESTREAM_H
-#define LLVM_DEBUGINFO_CODEVIEW_BYTESTREAM_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <memory>
-#include <type_traits>
-
-namespace llvm {
-namespace codeview {
-class StreamReader;
-
-template <bool Writable = false> class ByteStream : public StreamInterface {
-  typedef typename std::conditional<Writable, MutableArrayRef<uint8_t>,
-                                    ArrayRef<uint8_t>>::type ArrayType;
-
-public:
-  ByteStream() {}
-  explicit ByteStream(ArrayType Data) : Data(Data) {}
-  ~ByteStream() override {}
-
-  Error readBytes(uint32_t Offset, uint32_t Size,
-                  ArrayRef<uint8_t> &Buffer) const override;
-  Error readLongestContiguousChunk(uint32_t Offset,
-                                   ArrayRef<uint8_t> &Buffer) const override;
-
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) const override;
-
-  uint32_t getLength() const override;
-
-  Error commit() const override;
-
-  ArrayRef<uint8_t> data() const { return Data; }
-  StringRef str() const;
-
-private:
-  ArrayType Data;
-};
-
-extern template class ByteStream<true>;
-extern template class ByteStream<false>;
-
-} // end namespace pdb
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_BYTESTREAM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVDebugRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVDebugRecord.h
new file mode 100644
index 000000000000..5a0bb4266ba2
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVDebugRecord.h
@@ -0,0 +1,55 @@
+//===- CVDebugRecord.h ------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_CVDEBUGRECORD_H
+#define LLVM_DEBUGINFO_CODEVIEW_CVDEBUGRECORD_H
+
+#include "llvm/Support/Endian.h"
+
+namespace llvm {
+namespace OMF {
+struct Signature {
+  enum ID : uint32_t {
+    PDB70 = 0x53445352, // RSDS
+    PDB20 = 0x3031424e, // NB10
+    CV50 = 0x3131424e,  // NB11
+    CV41 = 0x3930424e,  // NB09
+  };
+
+  support::ulittle32_t CVSignature;
+  support::ulittle32_t Offset;
+};
+}
+
+namespace codeview {
+struct PDB70DebugInfo {
+  support::ulittle32_t CVSignature;
+  uint8_t Signature[16];
+  support::ulittle32_t Age;
+  // char PDBFileName[];
+};
+
+struct PDB20DebugInfo {
+  support::ulittle32_t CVSignature;
+  support::ulittle32_t Offset;
+  support::ulittle32_t Signature;
+  support::ulittle32_t Age;
+  // char PDBFileName[];
+};
+
+union DebugInfo {
+  struct OMF::Signature Signature;
+  struct PDB20DebugInfo PDB20;
+  struct PDB70DebugInfo PDB70;
+};
+}
+}
+
+#endif
+
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
index dba359fcbe82..a327d450db55 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVRecord.h
@@ -11,46 +11,73 @@
 #define LLVM_DEBUGINFO_CODEVIEW_RECORDITERATOR_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
 
 namespace llvm {
+
 namespace codeview {
 
-template <typename Kind> struct CVRecord {
-  uint32_t Length;
+template <typename Kind> class CVRecord {
+public:
+  CVRecord() = default;
+  CVRecord(Kind K, ArrayRef<uint8_t> Data) : Type(K), RecordData(Data) {}
+
+  uint32_t length() const { return RecordData.size(); }
+  Kind kind() const { return Type; }
+  ArrayRef<uint8_t> data() const { return RecordData; }
+
+  ArrayRef<uint8_t> content() const {
+    return RecordData.drop_front(sizeof(RecordPrefix));
+  }
+
+  Optional<uint32_t> hash() const { return Hash; }
+
+  void setHash(uint32_t Value) { Hash = Value; }
+
   Kind Type;
-  ArrayRef<uint8_t> Data;
-  ArrayRef<uint8_t> RawData;
+  ArrayRef<uint8_t> RecordData;
+  Optional<uint32_t> Hash;
 };
 
-template <typename Kind> struct VarStreamArrayExtractor<CVRecord<Kind>> {
-  Error operator()(StreamRef Stream, uint32_t &Len,
-                   CVRecord<Kind> &Item) const {
+} // end namespace codeview
+
+namespace msf {
+
+template <typename Kind>
+struct VarStreamArrayExtractor<codeview::CVRecord<Kind>> {
+  Error operator()(ReadableStreamRef Stream, uint32_t &Len,
+                   codeview::CVRecord<Kind> &Item) const {
+    using namespace codeview;
     const RecordPrefix *Prefix = nullptr;
     StreamReader Reader(Stream);
     uint32_t Offset = Reader.getOffset();
 
     if (auto EC = Reader.readObject(Prefix))
       return EC;
-    Item.Length = Prefix->RecordLen;
-    if (Item.Length < 2)
+    if (Prefix->RecordLen < 2)
       return make_error<CodeViewError>(cv_error_code::corrupt_record);
-    Item.Type = static_cast<Kind>(uint16_t(Prefix->RecordKind));
+    Kind K = static_cast<Kind>(uint16_t(Prefix->RecordKind));
 
     Reader.setOffset(Offset);
+    ArrayRef<uint8_t> RawData;
     if (auto EC =
-            Reader.readBytes(Item.RawData, Item.Length + sizeof(uint16_t)))
+            Reader.readBytes(RawData, Prefix->RecordLen + sizeof(uint16_t)))
       return EC;
-    Item.Data = Item.RawData.slice(sizeof(RecordPrefix));
-    Len = Prefix->RecordLen + 2;
+    Item = codeview::CVRecord<Kind>(K, RawData);
+    Len = Item.length();
     return Error::success();
   }
 };
-}
-}
 
-#endif
+} // end namespace msf
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_RECORDITERATOR_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
index 7c88956c984e..b2d3f5ea34a8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVSymbolVisitor.h
@@ -18,83 +18,17 @@
 
 namespace llvm {
 namespace codeview {
+class SymbolVisitorCallbacks;
 
-template <typename Derived> class CVSymbolVisitor {
+class CVSymbolVisitor {
 public:
-  CVSymbolVisitor(SymbolVisitorDelegate *Delegate) : Delegate(Delegate) {}
+  CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks);
 
-  bool hadError() const { return HadError; }
-
-  template <typename T>
-  bool consumeObject(ArrayRef<uint8_t> &Data, const T *&Res) {
-    if (Data.size() < sizeof(*Res)) {
-      HadError = true;
-      return false;
-    }
-    Res = reinterpret_cast<const T *>(Data.data());
-    Data = Data.drop_front(sizeof(*Res));
-    return true;
-  }
-
-/// Actions to take on known symbols. By default, they do nothing. Visit methods
-/// for member records take the FieldData by non-const reference and are
-/// expected to consume the trailing bytes used by the field.
-/// FIXME: Make the visitor interpret the trailing bytes so that clients don't
-/// need to.
-#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
-  void visit##Name(SymbolRecordKind Kind, Name &Record) {}
-#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
-#include "CVSymbolTypes.def"
-
-  void visitSymbolRecord(const CVRecord<SymbolKind> &Record) {
-    ArrayRef<uint8_t> Data = Record.Data;
-    auto *DerivedThis = static_cast<Derived *>(this);
-    DerivedThis->visitSymbolBegin(Record.Type, Data);
-    uint32_t RecordOffset = Delegate ? Delegate->getRecordOffset(Data) : 0;
-    switch (Record.Type) {
-    default:
-      DerivedThis->visitUnknownSymbol(Record.Type, Data);
-      break;
-#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
-  case EnumName: {                                                             \
-    SymbolRecordKind RK = static_cast<SymbolRecordKind>(EnumName);             \
-    auto Result = Name::deserialize(RK, RecordOffset, Data);                   \
-    if (Result.getError())                                                     \
-      return parseError();                                                     \
-    DerivedThis->visit##Name(Record.Type, *Result);                            \
-    break;                                                                     \
-  }
-#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)                \
-  SYMBOL_RECORD(EnumVal, EnumVal, AliasName)
-#include "CVSymbolTypes.def"
-    }
-    DerivedThis->visitSymbolEnd(Record.Type, Record.Data);
-  }
-
-  /// Visits the symbol records in Data. Sets the error flag on parse failures.
-  void visitSymbolStream(const CVSymbolArray &Symbols) {
-    for (const auto &I : Symbols) {
-      visitSymbolRecord(I);
-      if (hadError())
-        break;
-    }
-  }
-
-  /// Action to take on unknown symbols. By default, they are ignored.
-  void visitUnknownSymbol(SymbolKind Kind, ArrayRef<uint8_t> Data) {}
-
-  /// Paired begin/end actions for all symbols. Receives all record data,
-  /// including the fixed-length record prefix.
-  void visitSymbolBegin(SymbolKind Leaf, ArrayRef<uint8_t> RecordData) {}
-  void visitSymbolEnd(SymbolKind Leaf, ArrayRef<uint8_t> OriginalSymData) {}
-
-  /// Helper for returning from a void function when the stream is corrupted.
-  void parseError() { HadError = true; }
+  Error visitSymbolRecord(CVSymbol &Record);
+  Error visitSymbolStream(const CVSymbolArray &Symbols);
 
 private:
-  SymbolVisitorDelegate *Delegate;
-  /// Whether a symbol stream parsing error was encountered.
-  bool HadError = false;
+  SymbolVisitorCallbacks &Callbacks;
 };
 
 } // end namespace codeview
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
index 930ac6930c24..d1b0363a4133 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CVTypeVisitor.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H
 #define LLVM_DEBUGINFO_CODEVIEW_CVTYPEVISITOR_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
@@ -22,16 +23,14 @@ class CVTypeVisitor {
 public:
   explicit CVTypeVisitor(TypeVisitorCallbacks &Callbacks);
 
-  Error visitTypeRecord(const CVRecord<TypeLeafKind> &Record);
+  Error visitTypeRecord(CVType &Record);
+  Error visitMemberRecord(CVMemberRecord &Record);
 
   /// Visits the type records in Data. Sets the error flag on parse failures.
   Error visitTypeStream(const CVTypeArray &Types);
 
-  Error skipPadding(ArrayRef<uint8_t> &Data);
-
-  /// Visits individual member records of a field list record. Member records do
-  /// not describe their own length, and need special handling.
-  Error visitFieldList(const CVRecord<TypeLeafKind> &Record);
+  Error visitFieldListMemberStream(ArrayRef<uint8_t> FieldList);
+  Error visitFieldListMemberStream(msf::StreamReader Reader);
 
 private:
   /// The interface to the class that gets notified of each visitation.
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
index 1ee203b4f8fa..e21cfa3d030a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -21,8 +21,6 @@ namespace codeview {
 enum class TypeRecordKind : uint16_t {
 #define TYPE_RECORD(lf_ename, value, name) name = value,
 #include "TypeRecords.def"
-  // FIXME: Add serialization support
-  FieldList = 0x1203,
 };
 
 /// Duplicate copy of the above enum, but using the official CV names. Useful
@@ -278,6 +276,7 @@ enum class MethodOptions : uint16_t {
 CV_DEFINE_ENUM_CLASS_FLAGS_OPERATORS(MethodOptions)
 
 /// Equivalent to CV_modifier_t.
+/// TODO: Add flag for _Atomic modifier
 enum class ModifierOptions : uint16_t {
   None = 0x0000,
   Const = 0x0001,
@@ -526,7 +525,7 @@ enum class RegisterId : uint16_t {
 };
 
 /// These values correspond to the THUNK_ORDINAL enumeration.
-enum class ThunkOrdinal {
+enum class ThunkOrdinal : uint8_t {
   Standard,
   ThisAdjustor,
   Vcall,
@@ -536,7 +535,7 @@ enum class ThunkOrdinal {
   BranchIsland
 };
 
-enum class TrampolineType { TrampIncremental, BranchIsland };
+enum class TrampolineType : uint16_t { TrampIncremental, BranchIsland };
 
 // These values correspond to the CV_SourceChksum_t enumeration.
 enum class FileChecksumKind : uint8_t { None, MD5, SHA1, SHA256 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
index 69ff29aab6f1..0556fd0e19f2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewError.h
@@ -21,6 +21,7 @@ enum class cv_error_code {
   insufficient_buffer,
   operation_unsupported,
   corrupt_record,
+  unknown_member_record,
 };
 
 /// Base class for errors originating when parsing raw PDB files
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h
deleted file mode 100644
index 14d057a249a5..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewOStream.h
+++ /dev/null
@@ -1,39 +0,0 @@
-//===- CodeViewOStream.h ----------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H
-#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWOSTREAM_H
-
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-
-namespace llvm {
-namespace codeview {
-
-template <typename Writer> class CodeViewOStream {
-private:
-  CodeViewOStream(const CodeViewOStream &) = delete;
-  CodeViewOStream &operator=(const CodeViewOStream &) = delete;
-
-public:
-  typedef typename Writer::LabelType LabelType;
-
-public:
-  explicit CodeViewOStream(Writer &W);
-
-private:
-  uint64_t size() const { return W.tell(); }
-
-private:
-  Writer &W;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
new file mode 100644
index 000000000000..5a036b9d5b6c
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -0,0 +1,170 @@
+//===- CodeViewRecordIO.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H
+#define LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H
+
+#include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <type_traits>
+
+namespace llvm {
+namespace codeview {
+
+class CodeViewRecordIO {
+  uint32_t getCurrentOffset() const {
+    return (isWriting()) ? Writer->getOffset() : Reader->getOffset();
+  }
+
+public:
+  explicit CodeViewRecordIO(msf::StreamReader &Reader) : Reader(&Reader) {}
+  explicit CodeViewRecordIO(msf::StreamWriter &Writer) : Writer(&Writer) {}
+
+  Error beginRecord(Optional<uint32_t> MaxLength);
+  Error endRecord();
+
+  Error mapInteger(TypeIndex &TypeInd);
+
+  bool isReading() const { return Reader != nullptr; }
+  bool isWriting() const { return !isReading(); }
+
+  uint32_t maxFieldLength() const;
+
+  template <typename T> Error mapObject(T &Value) {
+    if (isWriting())
+      return Writer->writeObject(Value);
+
+    const T *ValuePtr;
+    if (auto EC = Reader->readObject(ValuePtr))
+      return EC;
+    Value = *ValuePtr;
+    return Error::success();
+  }
+
+  template <typename T> Error mapInteger(T &Value) {
+    if (isWriting())
+      return Writer->writeInteger(Value);
+
+    return Reader->readInteger(Value);
+  }
+
+  template <typename T> Error mapEnum(T &Value) {
+    if (sizeof(Value) > maxFieldLength())
+      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
+
+    using U = typename std::underlying_type<T>::type;
+    U X;
+    if (isWriting())
+      X = static_cast<U>(Value);
+
+    if (auto EC = mapInteger(X))
+      return EC;
+    if (isReading())
+      Value = static_cast<T>(X);
+    return Error::success();
+  }
+
+  Error mapEncodedInteger(int64_t &Value);
+  Error mapEncodedInteger(uint64_t &Value);
+  Error mapEncodedInteger(APSInt &Value);
+  Error mapStringZ(StringRef &Value);
+  Error mapGuid(StringRef &Guid);
+
+  Error mapStringZVectorZ(std::vector<StringRef> &Value);
+
+  template <typename SizeType, typename T, typename ElementMapper>
+  Error mapVectorN(T &Items, const ElementMapper &Mapper) {
+    SizeType Size;
+    if (isWriting()) {
+      Size = static_cast<SizeType>(Items.size());
+      if (auto EC = Writer->writeInteger(Size))
+        return EC;
+
+      for (auto &X : Items) {
+        if (auto EC = Mapper(*this, X))
+          return EC;
+      }
+    } else {
+      if (auto EC = Reader->readInteger(Size))
+        return EC;
+      for (SizeType I = 0; I < Size; ++I) {
+        typename T::value_type Item;
+        if (auto EC = Mapper(*this, Item))
+          return EC;
+        Items.push_back(Item);
+      }
+    }
+
+    return Error::success();
+  }
+
+  template <typename T, typename ElementMapper>
+  Error mapVectorTail(T &Items, const ElementMapper &Mapper) {
+    if (isWriting()) {
+      for (auto &Item : Items) {
+        if (auto EC = Mapper(*this, Item))
+          return EC;
+      }
+    } else {
+      typename T::value_type Field;
+      // Stop when we run out of bytes or we hit record padding bytes.
+      while (!Reader->empty() && Reader->peek() < 0xf0 /* LF_PAD0 */) {
+        if (auto EC = Mapper(*this, Field))
+          return EC;
+        Items.push_back(Field);
+      }
+    }
+    return Error::success();
+  }
+
+  Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes);
+  Error mapByteVectorTail(std::vector<uint8_t> &Bytes);
+
+  Error skipPadding();
+
+private:
+  Error writeEncodedSignedInteger(const int64_t &Value);
+  Error writeEncodedUnsignedInteger(const uint64_t &Value);
+
+  struct RecordLimit {
+    uint32_t BeginOffset;
+    Optional<uint32_t> MaxLength;
+
+    Optional<uint32_t> bytesRemaining(uint32_t CurrentOffset) const {
+      if (!MaxLength.hasValue())
+        return None;
+      assert(CurrentOffset >= BeginOffset);
+
+      uint32_t BytesUsed = CurrentOffset - BeginOffset;
+      if (BytesUsed >= *MaxLength)
+        return 0;
+      return *MaxLength - BytesUsed;
+    }
+  };
+
+  SmallVector<RecordLimit, 2> Limits;
+
+  msf::StreamReader *Reader = nullptr;
+  msf::StreamWriter *Writer = nullptr;
+};
+
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_CODEVIEWRECORDIO_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
index 021288e57618..10d1c581a196 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/EnumTables.h
@@ -20,6 +20,7 @@
 namespace llvm {
 namespace codeview {
 ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames();
+ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames();
 ArrayRef<EnumEntry<uint16_t>> getRegisterNames();
 ArrayRef<EnumEntry<uint8_t>> getProcSymFlagNames();
 ArrayRef<EnumEntry<uint16_t>> getLocalFlagNames();
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
deleted file mode 100644
index 75a075157d22..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/FieldListRecordBuilder.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- FieldListRecordBuilder.h ---------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H
-#define LLVM_DEBUGINFO_CODEVIEW_FIELDLISTRECORDBUILDER_H
-
-#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-
-namespace llvm {
-namespace codeview {
-
-class MethodInfo {
-public:
-  MethodInfo() : Access(), Kind(), Options(), Type(), VTableSlotOffset(-1) {}
-
-  MethodInfo(MemberAccess Access, MethodKind Kind, MethodOptions Options,
-             TypeIndex Type, int32_t VTableSlotOffset)
-      : Access(Access), Kind(Kind), Options(Options), Type(Type),
-        VTableSlotOffset(VTableSlotOffset) {}
-
-  MemberAccess getAccess() const { return Access; }
-  MethodKind getKind() const { return Kind; }
-  MethodOptions getOptions() const { return Options; }
-  TypeIndex getType() const { return Type; }
-  int32_t getVTableSlotOffset() const { return VTableSlotOffset; }
-
-private:
-  MemberAccess Access;
-  MethodKind Kind;
-  MethodOptions Options;
-  TypeIndex Type;
-  int32_t VTableSlotOffset;
-};
-
-class FieldListRecordBuilder : public ListRecordBuilder {
-private:
-  FieldListRecordBuilder(const FieldListRecordBuilder &) = delete;
-  void operator=(const FieldListRecordBuilder &) = delete;
-
-public:
-  FieldListRecordBuilder();
-
-  void reset() { ListRecordBuilder::reset(); }
-
-  void writeBaseClass(const BaseClassRecord &Record);
-  void writeEnumerator(const EnumeratorRecord &Record);
-  void writeDataMember(const DataMemberRecord &Record);
-  void writeOneMethod(const OneMethodRecord &Record);
-  void writeOverloadedMethod(const OverloadedMethodRecord &Record);
-  void writeNestedType(const NestedTypeRecord &Record);
-  void writeStaticDataMember(const StaticDataMemberRecord &Record);
-  void writeVirtualBaseClass(const VirtualBaseClassRecord &Record);
-  void writeVFPtr(const VFPtrRecord &Type);
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
deleted file mode 100644
index 00bf03d417a2..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/ListRecordBuilder.h
+++ /dev/null
@@ -1,65 +0,0 @@
-//===- ListRecordBuilder.h --------------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H
-#define LLVM_DEBUGINFO_CODEVIEW_LISTRECORDBUILDER_H
-
-#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
-
-namespace llvm {
-namespace codeview {
-class TypeTableBuilder;
-
-class ListRecordBuilder {
-private:
-  ListRecordBuilder(const ListRecordBuilder &) = delete;
-  ListRecordBuilder &operator=(const ListRecordBuilder &) = delete;
-
-protected:
-  const int MethodKindShift = 2;
-
-  explicit ListRecordBuilder(TypeRecordKind Kind);
-
-public:
-  llvm::StringRef str() { return Builder.str(); }
-
-  void reset() {
-    Builder.reset(Kind);
-    ContinuationOffsets.clear();
-    SubrecordStart = 0;
-  }
-
-  void writeListContinuation(const ListContinuationRecord &R);
-
-  /// Writes this list record as a possible sequence of records.
-  TypeIndex writeListRecord(TypeTableBuilder &Table);
-
-protected:
-  void finishSubRecord();
-
-  TypeRecordBuilder &getBuilder() { return Builder; }
-
-private:
-  size_t getLastContinuationStart() const {
-    return ContinuationOffsets.empty() ? 0 : ContinuationOffsets.back();
-  }
-  size_t getLastContinuationEnd() const { return Builder.size(); }
-  size_t getLastContinuationSize() const {
-    return getLastContinuationEnd() - getLastContinuationStart();
-  }
-
-  TypeRecordKind Kind;
-  TypeRecordBuilder Builder;
-  SmallVector<size_t, 4> ContinuationOffsets;
-  size_t SubrecordStart = 0;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
deleted file mode 100644
index 002f885c7c5a..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h
+++ /dev/null
@@ -1,48 +0,0 @@
-//===- MemoryTypeTableBuilder.h ---------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
-#define LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-#include <vector>
-
-namespace llvm {
-namespace codeview {
-
-class MemoryTypeTableBuilder : public TypeTableBuilder {
-public:
-  MemoryTypeTableBuilder() {}
-
-  bool empty() const { return Records.empty(); }
-
-  template <typename TFunc> void ForEachRecord(TFunc Func) {
-    uint32_t Index = TypeIndex::FirstNonSimpleIndex;
-
-    for (StringRef R : Records) {
-      Func(TypeIndex(Index), R);
-      ++Index;
-    }
-  }
-
-protected:
-  TypeIndex writeRecord(llvm::StringRef Data) override;
-
-private:
-  std::vector<StringRef> Records;
-  BumpPtrAllocator RecordStorage;
-  DenseMap<StringRef, TypeIndex> HashedRecords;
-};
-
-} // end namespace codeview
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_MEMORYTYPETABLEBUILDER_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
deleted file mode 100644
index faa404d41b1f..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/MethodListRecordBuilder.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- MethodListRecordBuilder.h --------------------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H
-#define LLVM_DEBUGINFO_CODEVIEW_METHODLISTRECORDBUILDER_H
-
-#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
-
-namespace llvm {
-namespace codeview {
-
-class MethodInfo;
-
-class MethodListRecordBuilder : public ListRecordBuilder {
-private:
-  MethodListRecordBuilder(const MethodListRecordBuilder &) = delete;
-  MethodListRecordBuilder &operator=(const MethodListRecordBuilder &) = delete;
-
-public:
-  MethodListRecordBuilder();
-
-  void writeMethod(MemberAccess Access, MethodKind Kind, MethodOptions Options,
-                   TypeIndex Type, int32_t VTableSlotOffset);
-  void writeMethod(const MethodInfo &Method);
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstream.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstream.h
index 6affac801d4d..8860ae42fc09 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstream.h
@@ -11,8 +11,8 @@
 #define LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H
 
 #include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 
@@ -59,29 +59,31 @@ struct ColumnNumberEntry {
 class ModuleSubstream {
 public:
   ModuleSubstream();
-  ModuleSubstream(ModuleSubstreamKind Kind, StreamRef Data);
-  static Error initialize(StreamRef Stream, ModuleSubstream &Info);
+  ModuleSubstream(ModuleSubstreamKind Kind, msf::ReadableStreamRef Data);
+  static Error initialize(msf::ReadableStreamRef Stream, ModuleSubstream &Info);
   uint32_t getRecordLength() const;
   ModuleSubstreamKind getSubstreamKind() const;
-  StreamRef getRecordData() const;
+  msf::ReadableStreamRef getRecordData() const;
 
 private:
   ModuleSubstreamKind Kind;
-  StreamRef Data;
+  msf::ReadableStreamRef Data;
 };
 
-template <> struct VarStreamArrayExtractor<ModuleSubstream> {
-  Error operator()(StreamRef Stream, uint32_t &Length,
-                   ModuleSubstream &Info) const {
-    if (auto EC = ModuleSubstream::initialize(Stream, Info))
+typedef msf::VarStreamArray<ModuleSubstream> ModuleSubstreamArray;
+} // namespace codeview
+
+namespace msf {
+template <> struct VarStreamArrayExtractor<codeview::ModuleSubstream> {
+  Error operator()(ReadableStreamRef Stream, uint32_t &Length,
+                   codeview::ModuleSubstream &Info) const {
+    if (auto EC = codeview::ModuleSubstream::initialize(Stream, Info))
       return EC;
     Length = Info.getRecordLength();
     return Error::success();
   }
 };
-
-typedef VarStreamArray<ModuleSubstream> ModuleSubstreamArray;
-}
-}
+} // namespace msf
+} // namespace llvm
 
 #endif // LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h
index 6df230903712..f9927d660933 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h
@@ -10,28 +10,75 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H
 #define LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/CodeViewError.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
 
 namespace llvm {
+
 namespace codeview {
 
 struct LineColumnEntry {
   support::ulittle32_t NameIndex;
-  FixedStreamArray<LineNumberEntry> LineNumbers;
-  FixedStreamArray<ColumnNumberEntry> Columns;
+  msf::FixedStreamArray<LineNumberEntry> LineNumbers;
+  msf::FixedStreamArray<ColumnNumberEntry> Columns;
 };
 
-template <> class VarStreamArrayExtractor<LineColumnEntry> {
+struct FileChecksumEntry {
+  uint32_t FileNameOffset;    // Byte offset of filename in global stringtable.
+  FileChecksumKind Kind;      // The type of checksum.
+  ArrayRef<uint8_t> Checksum; // The bytes of the checksum.
+};
+
+typedef msf::VarStreamArray<LineColumnEntry> LineInfoArray;
+typedef msf::VarStreamArray<FileChecksumEntry> FileChecksumArray;
+
+class IModuleSubstreamVisitor {
 public:
-  VarStreamArrayExtractor(const LineSubstreamHeader *Header) : Header(Header) {}
+  virtual ~IModuleSubstreamVisitor() = default;
 
-  Error operator()(StreamRef Stream, uint32_t &Len,
-                   LineColumnEntry &Item) const {
+  virtual Error visitUnknown(ModuleSubstreamKind Kind,
+                             msf::ReadableStreamRef Data) = 0;
+  virtual Error visitSymbols(msf::ReadableStreamRef Data);
+  virtual Error visitLines(msf::ReadableStreamRef Data,
+                           const LineSubstreamHeader *Header,
+                           const LineInfoArray &Lines);
+  virtual Error visitStringTable(msf::ReadableStreamRef Data);
+  virtual Error visitFileChecksums(msf::ReadableStreamRef Data,
+                                   const FileChecksumArray &Checksums);
+  virtual Error visitFrameData(msf::ReadableStreamRef Data);
+  virtual Error visitInlineeLines(msf::ReadableStreamRef Data);
+  virtual Error visitCrossScopeImports(msf::ReadableStreamRef Data);
+  virtual Error visitCrossScopeExports(msf::ReadableStreamRef Data);
+  virtual Error visitILLines(msf::ReadableStreamRef Data);
+  virtual Error visitFuncMDTokenMap(msf::ReadableStreamRef Data);
+  virtual Error visitTypeMDTokenMap(msf::ReadableStreamRef Data);
+  virtual Error visitMergedAssemblyInput(msf::ReadableStreamRef Data);
+  virtual Error visitCoffSymbolRVA(msf::ReadableStreamRef Data);
+};
+
+Error visitModuleSubstream(const ModuleSubstream &R,
+                           IModuleSubstreamVisitor &V);
+} // end namespace codeview
+
+namespace msf {
+
+template <> class VarStreamArrayExtractor<codeview::LineColumnEntry> {
+public:
+  VarStreamArrayExtractor(const codeview::LineSubstreamHeader *Header)
+      : Header(Header) {}
+
+  Error operator()(ReadableStreamRef Stream, uint32_t &Len,
+                   codeview::LineColumnEntry &Item) const {
+    using namespace codeview;
     const LineFileBlockHeader *BlockHeader;
     StreamReader Reader(Stream);
     if (auto EC = Reader.readObject(BlockHeader))
@@ -61,19 +108,14 @@ public:
   }
 
 private:
-  const LineSubstreamHeader *Header;
+  const codeview::LineSubstreamHeader *Header;
 };
 
-struct FileChecksumEntry {
-  uint32_t FileNameOffset;    // Byte offset of filename in global stringtable.
-  FileChecksumKind Kind;      // The type of checksum.
-  ArrayRef<uint8_t> Checksum; // The bytes of the checksum.
-};
-
-template <> class VarStreamArrayExtractor<FileChecksumEntry> {
+template <> class VarStreamArrayExtractor<codeview::FileChecksumEntry> {
 public:
-  Error operator()(StreamRef Stream, uint32_t &Len,
-                   FileChecksumEntry &Item) const {
+  Error operator()(ReadableStreamRef Stream, uint32_t &Len,
+                   codeview::FileChecksumEntry &Item) const {
+    using namespace codeview;
     const FileChecksum *Header;
     StreamReader Reader(Stream);
     if (auto EC = Reader.readObject(Header))
@@ -87,35 +129,8 @@ public:
   }
 };
 
-typedef VarStreamArray<LineColumnEntry> LineInfoArray;
-typedef VarStreamArray<FileChecksumEntry> FileChecksumArray;
-
-class IModuleSubstreamVisitor {
-public:
-  virtual ~IModuleSubstreamVisitor() {}
-
-  virtual Error visitUnknown(ModuleSubstreamKind Kind, StreamRef Data) = 0;
-  virtual Error visitSymbols(StreamRef Data);
-  virtual Error visitLines(StreamRef Data, const LineSubstreamHeader *Header,
-                           const LineInfoArray &Lines);
-  virtual Error visitStringTable(StreamRef Data);
-  virtual Error visitFileChecksums(StreamRef Data,
-                                   const FileChecksumArray &Checksums);
-  virtual Error visitFrameData(StreamRef Data);
-  virtual Error visitInlineeLines(StreamRef Data);
-  virtual Error visitCrossScopeImports(StreamRef Data);
-  virtual Error visitCrossScopeExports(StreamRef Data);
-  virtual Error visitILLines(StreamRef Data);
-  virtual Error visitFuncMDTokenMap(StreamRef Data);
-  virtual Error visitTypeMDTokenMap(StreamRef Data);
-  virtual Error visitMergedAssemblyInput(StreamRef Data);
-  virtual Error visitCoffSymbolRVA(StreamRef Data);
-};
-
-Error visitModuleSubstream(const ModuleSubstream &R,
-                           IModuleSubstreamVisitor &V);
+} // end namespace msf
 
-} // namespace codeview
-} // namespace llvm
+} // end namespace llvm
 
 #endif // LLVM_DEBUGINFO_CODEVIEW_MODULESUBSTREAMVISITOR_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
index 84179f5f81f7..97b6f561bb97 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/RecordSerialization.h
@@ -13,8 +13,11 @@
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Endian.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
 #include <cinttypes>
 #include <tuple>
 
@@ -24,8 +27,12 @@ using llvm::support::little32_t;
 using llvm::support::ulittle16_t;
 using llvm::support::ulittle32_t;
 
+/// Limit on the size of all codeview symbol and type records, including the
+/// RecordPrefix. MSVC does not emit any records larger than this.
+enum : unsigned { MaxRecordLength = 0xFF00 };
+
 struct RecordPrefix {
-  ulittle16_t RecordLen;  // Record length, starting from &Leaf.
+  ulittle16_t RecordLen;  // Record length, starting from &RecordKind.
   ulittle16_t RecordKind; // Record kind enum (SymRecordKind or TypeRecordKind)
 };
 
@@ -34,54 +41,40 @@ struct RecordPrefix {
 StringRef getBytesAsCharacters(ArrayRef<uint8_t> LeafData);
 StringRef getBytesAsCString(ArrayRef<uint8_t> LeafData);
 
-/// Consumes sizeof(T) bytes from the given byte sequence. Returns an error if
-/// there are not enough bytes remaining. Reinterprets the consumed bytes as a
-/// T object and points 'Res' at them.
-template <typename T, typename U>
-inline std::error_code consumeObject(U &Data, const T *&Res) {
-  if (Data.size() < sizeof(*Res))
-    return std::make_error_code(std::errc::illegal_byte_sequence);
-  Res = reinterpret_cast<const T *>(Data.data());
-  Data = Data.drop_front(sizeof(*Res));
-  return std::error_code();
-}
-
-inline std::error_code consume(ArrayRef<uint8_t> &Data) {
-  return std::error_code();
-}
+inline Error consume(msf::StreamReader &Reader) { return Error::success(); }
 
 /// Decodes a numeric "leaf" value. These are integer literals encountered in
 /// the type stream. If the value is positive and less than LF_NUMERIC (1 <<
 /// 15), it is emitted directly in Data. Otherwise, it has a tag like LF_CHAR
 /// that indicates the bitwidth and sign of the numeric data.
-std::error_code consume(ArrayRef<uint8_t> &Data, APSInt &Num);
-std::error_code consume(StringRef &Data, APSInt &Num);
+Error consume(msf::StreamReader &Reader, APSInt &Num);
 
 /// Decodes a numeric leaf value that is known to be a particular type.
-std::error_code consume_numeric(ArrayRef<uint8_t> &Data, uint64_t &Value);
+Error consume_numeric(msf::StreamReader &Reader, uint64_t &Value);
 
 /// Decodes signed and unsigned fixed-length integers.
-std::error_code consume(ArrayRef<uint8_t> &Data, uint32_t &Item);
-std::error_code consume(StringRef &Data, uint32_t &Item);
-std::error_code consume(ArrayRef<uint8_t> &Data, int32_t &Item);
+Error consume(msf::StreamReader &Reader, uint32_t &Item);
+Error consume(msf::StreamReader &Reader, int32_t &Item);
 
 /// Decodes a null terminated string.
-std::error_code consume(ArrayRef<uint8_t> &Data, StringRef &Item);
+Error consume(msf::StreamReader &Reader, StringRef &Item);
+
+Error consume(StringRef &Data, APSInt &Num);
+Error consume(StringRef &Data, uint32_t &Item);
 
 /// Decodes an arbitrary object whose layout matches that of the underlying
 /// byte sequence, and returns a pointer to the object.
-template <typename T>
-std::error_code consume(ArrayRef<uint8_t> &Data, T *&Item) {
-  return consumeObject(Data, Item);
+template <typename T> Error consume(msf::StreamReader &Reader, T *&Item) {
+  return Reader.readObject(Item);
 }
 
 template <typename T, typename U> struct serialize_conditional_impl {
   serialize_conditional_impl(T &Item, U Func) : Item(Item), Func(Func) {}
 
-  std::error_code deserialize(ArrayRef<uint8_t> &Data) const {
+  Error deserialize(msf::StreamReader &Reader) const {
     if (!Func())
-      return std::error_code();
-    return consume(Data, Item);
+      return Error::success();
+    return consume(Reader, Item);
   }
 
   T &Item;
@@ -96,22 +89,8 @@ serialize_conditional_impl<T, U> serialize_conditional(T &Item, U Func) {
 template <typename T, typename U> struct serialize_array_impl {
   serialize_array_impl(ArrayRef<T> &Item, U Func) : Item(Item), Func(Func) {}
 
-  std::error_code deserialize(ArrayRef<uint8_t> &Data) const {
-    uint32_t N = Func();
-    if (N == 0)
-      return std::error_code();
-
-    uint32_t Size = sizeof(T) * N;
-
-    if (Size / sizeof(T) != N)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-
-    if (Data.size() < Size)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-
-    Item = ArrayRef<T>(reinterpret_cast<const T *>(Data.data()), N);
-    Data = Data.drop_front(Size);
-    return std::error_code();
+  Error deserialize(msf::StreamReader &Reader) const {
+    return Reader.readArray(Item, Func());
   }
 
   ArrayRef<T> &Item;
@@ -121,15 +100,15 @@ template <typename T, typename U> struct serialize_array_impl {
 template <typename T> struct serialize_vector_tail_impl {
   serialize_vector_tail_impl(std::vector<T> &Item) : Item(Item) {}
 
-  std::error_code deserialize(ArrayRef<uint8_t> &Data) const {
+  Error deserialize(msf::StreamReader &Reader) const {
     T Field;
     // Stop when we run out of bytes or we hit record padding bytes.
-    while (!Data.empty() && Data.front() < LF_PAD0) {
-      if (auto EC = consume(Data, Field))
+    while (!Reader.empty() && Reader.peek() < LF_PAD0) {
+      if (auto EC = consume(Reader, Field))
         return EC;
       Item.push_back(Field);
     }
-    return std::error_code();
+    return Error::success();
   }
 
   std::vector<T> &Item;
@@ -139,21 +118,18 @@ struct serialize_null_term_string_array_impl {
   serialize_null_term_string_array_impl(std::vector<StringRef> &Item)
       : Item(Item) {}
 
-  std::error_code deserialize(ArrayRef<uint8_t> &Data) const {
-    if (Data.empty())
-      return std::make_error_code(std::errc::illegal_byte_sequence);
+  Error deserialize(msf::StreamReader &Reader) const {
+    if (Reader.empty())
+      return make_error<CodeViewError>(cv_error_code::insufficient_buffer,
+                                       "Null terminated string is empty!");
 
-    StringRef Field;
-    // Stop when we run out of bytes or we hit record padding bytes.
-    while (Data.front() != 0) {
-      if (auto EC = consume(Data, Field))
+    while (Reader.peek() != 0) {
+      StringRef Field;
+      if (auto EC = Reader.readZeroString(Field))
         return EC;
       Item.push_back(Field);
-      if (Data.empty())
-        return std::make_error_code(std::errc::illegal_byte_sequence);
     }
-    Data = Data.drop_front(1);
-    return std::error_code();
+    return Reader.skip(1);
   }
 
   std::vector<StringRef> &Item;
@@ -162,10 +138,9 @@ struct serialize_null_term_string_array_impl {
 template <typename T> struct serialize_arrayref_tail_impl {
   serialize_arrayref_tail_impl(ArrayRef<T> &Item) : Item(Item) {}
 
-  std::error_code deserialize(ArrayRef<uint8_t> &Data) const {
-    uint32_t Count = Data.size() / sizeof(T);
-    Item = ArrayRef<T>(reinterpret_cast<const T *>(Data.begin()), Count);
-    return std::error_code();
+  Error deserialize(msf::StreamReader &Reader) const {
+    uint32_t Count = Reader.bytesRemaining() / sizeof(T);
+    return Reader.readArray(Item, Count);
   }
 
   ArrayRef<T> &Item;
@@ -174,8 +149,8 @@ template <typename T> struct serialize_arrayref_tail_impl {
 template <typename T> struct serialize_numeric_impl {
   serialize_numeric_impl(T &Item) : Item(Item) {}
 
-  std::error_code deserialize(ArrayRef<uint8_t> &Data) const {
-    return consume_numeric(Data, Item);
+  Error deserialize(msf::StreamReader &Reader) const {
+    return consume_numeric(Reader, Item);
   }
 
   T &Item;
@@ -226,52 +201,50 @@ template <typename T> serialize_numeric_impl<T> serialize_numeric(T &Item) {
 #define CV_NUMERIC_FIELD(I) serialize_numeric(I)
 
 template <typename T, typename U>
-std::error_code consume(ArrayRef<uint8_t> &Data,
-                        const serialize_conditional_impl<T, U> &Item) {
-  return Item.deserialize(Data);
+Error consume(msf::StreamReader &Reader,
+              const serialize_conditional_impl<T, U> &Item) {
+  return Item.deserialize(Reader);
 }
 
 template <typename T, typename U>
-std::error_code consume(ArrayRef<uint8_t> &Data,
-                        const serialize_array_impl<T, U> &Item) {
-  return Item.deserialize(Data);
+Error consume(msf::StreamReader &Reader,
+              const serialize_array_impl<T, U> &Item) {
+  return Item.deserialize(Reader);
 }
 
-inline std::error_code
-consume(ArrayRef<uint8_t> &Data,
-        const serialize_null_term_string_array_impl &Item) {
-  return Item.deserialize(Data);
+inline Error consume(msf::StreamReader &Reader,
+                     const serialize_null_term_string_array_impl &Item) {
+  return Item.deserialize(Reader);
 }
 
 template <typename T>
-std::error_code consume(ArrayRef<uint8_t> &Data,
-                        const serialize_vector_tail_impl<T> &Item) {
-  return Item.deserialize(Data);
+Error consume(msf::StreamReader &Reader,
+              const serialize_vector_tail_impl<T> &Item) {
+  return Item.deserialize(Reader);
 }
 
 template <typename T>
-std::error_code consume(ArrayRef<uint8_t> &Data,
-                        const serialize_arrayref_tail_impl<T> &Item) {
-  return Item.deserialize(Data);
+Error consume(msf::StreamReader &Reader,
+              const serialize_arrayref_tail_impl<T> &Item) {
+  return Item.deserialize(Reader);
 }
 
 template <typename T>
-std::error_code consume(ArrayRef<uint8_t> &Data,
-                        const serialize_numeric_impl<T> &Item) {
-  return Item.deserialize(Data);
+Error consume(msf::StreamReader &Reader,
+              const serialize_numeric_impl<T> &Item) {
+  return Item.deserialize(Reader);
 }
 
 template <typename T, typename U, typename... Args>
-std::error_code consume(ArrayRef<uint8_t> &Data, T &&X, U &&Y,
-                        Args &&... Rest) {
-  if (auto EC = consume(Data, X))
+Error consume(msf::StreamReader &Reader, T &&X, U &&Y, Args &&... Rest) {
+  if (auto EC = consume(Reader, X))
     return EC;
-  return consume(Data, Y, std::forward<Args>(Rest)...);
+  return consume(Reader, Y, std::forward<Args>(Rest)...);
 }
 
 #define CV_DESERIALIZE(...)                                                    \
   if (auto EC = consume(__VA_ARGS__))                                          \
-    return EC;
+    return std::move(EC);
 }
 }
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamRef.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamRef.h
deleted file mode 100644
index a4f244a32289..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamRef.h
+++ /dev/null
@@ -1,104 +0,0 @@
-//===- StreamRef.h - A copyable reference to a stream -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STREAMREF_H
-#define LLVM_DEBUGINFO_CODEVIEW_STREAMREF_H
-
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-
-namespace llvm {
-namespace codeview {
-
-class StreamRef {
-public:
-  StreamRef() : Stream(nullptr), ViewOffset(0), Length(0) {}
-  StreamRef(const StreamInterface &Stream)
-      : Stream(&Stream), ViewOffset(0), Length(Stream.getLength()) {}
-  StreamRef(const StreamInterface &Stream, uint32_t Offset, uint32_t Length)
-      : Stream(&Stream), ViewOffset(Offset), Length(Length) {}
-
-  // Use StreamRef.slice() instead.
-  StreamRef(const StreamRef &S, uint32_t Offset, uint32_t Length) = delete;
-
-  Error readBytes(uint32_t Offset, uint32_t Size,
-                  ArrayRef<uint8_t> &Buffer) const {
-    if (ViewOffset + Offset < Offset)
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-    if (Size + Offset > Length)
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-    return Stream->readBytes(ViewOffset + Offset, Size, Buffer);
-  }
-
-  // Given an offset into the stream, read as much as possible without copying
-  // any data.
-  Error readLongestContiguousChunk(uint32_t Offset,
-                                   ArrayRef<uint8_t> &Buffer) const {
-    if (Offset >= Length)
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-
-    if (auto EC = Stream->readLongestContiguousChunk(Offset, Buffer))
-      return EC;
-    // This StreamRef might refer to a smaller window over a larger stream.  In
-    // that case we will have read out more bytes than we should return, because
-    // we should not read past the end of the current view.
-    uint32_t MaxLength = Length - Offset;
-    if (Buffer.size() > MaxLength)
-      Buffer = Buffer.slice(0, MaxLength);
-    return Error::success();
-  }
-
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const {
-    if (Data.size() + Offset > Length)
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-    return Stream->writeBytes(ViewOffset + Offset, Data);
-  }
-
-  uint32_t getLength() const { return Length; }
-
-  Error commit() const { return Stream->commit(); }
-
-  StreamRef drop_front(uint32_t N) const {
-    if (!Stream)
-      return StreamRef();
-
-    N = std::min(N, Length);
-    return StreamRef(*Stream, ViewOffset + N, Length - N);
-  }
-
-  StreamRef keep_front(uint32_t N) const {
-    if (!Stream)
-      return StreamRef();
-    N = std::min(N, Length);
-    return StreamRef(*Stream, ViewOffset, N);
-  }
-
-  StreamRef slice(uint32_t Offset, uint32_t Len) const {
-    return drop_front(Offset).keep_front(Len);
-  }
-
-  bool operator==(const StreamRef &Other) const {
-    if (Stream != Other.Stream)
-      return false;
-    if (ViewOffset != Other.ViewOffset)
-      return false;
-    if (Length != Other.Length)
-      return false;
-    return true;
-  }
-
-private:
-  const StreamInterface *Stream;
-  uint32_t ViewOffset;
-  uint32_t Length;
-};
-}
-}
-
-#endif // LLVM_DEBUGINFO_CODEVIEW_STREAMREF_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
new file mode 100644
index 000000000000..13c2bb14ecf5
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -0,0 +1,74 @@
+//===- SymbolDeserializer.h -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLDESERIALIZER_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLDESERIALIZER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+class SymbolVisitorDelegate;
+class SymbolDeserializer : public SymbolVisitorCallbacks {
+  struct MappingInfo {
+    explicit MappingInfo(ArrayRef<uint8_t> RecordData)
+        : Stream(RecordData), Reader(Stream), Mapping(Reader) {}
+
+    msf::ByteStream Stream;
+    msf::StreamReader Reader;
+    SymbolRecordMapping Mapping;
+  };
+
+public:
+  explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate)
+      : Delegate(Delegate) {}
+
+  Error visitSymbolBegin(CVSymbol &Record) override {
+    assert(!Mapping && "Already in a symbol mapping!");
+    Mapping = llvm::make_unique<MappingInfo>(Record.content());
+    return Mapping->Mapping.visitSymbolBegin(Record);
+  }
+  Error visitSymbolEnd(CVSymbol &Record) override {
+    assert(Mapping && "Not in a symbol mapping!");
+    auto EC = Mapping->Mapping.visitSymbolEnd(Record);
+    Mapping.reset();
+    return EC;
+  }
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownRecord(CVSymbol &CVR, Name &Record) override {               \
+    return visitKnownRecordImpl(CVR, Record);                                  \
+  }
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "CVSymbolTypes.def"
+
+private:
+  template <typename T> Error visitKnownRecordImpl(CVSymbol &CVR, T &Record) {
+
+    Record.RecordOffset =
+        Delegate ? Delegate->getRecordOffset(Mapping->Reader) : 0;
+    if (auto EC = Mapping->Mapping.visitKnownRecord(CVR, Record))
+      return EC;
+    return Error::success();
+  }
+
+  SymbolVisitorDelegate *Delegate;
+  std::unique_ptr<MappingInfo> Mapping;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
index 30b0a40451cb..823636c398de 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumpDelegate.h
@@ -10,20 +10,17 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLDUMPDELEGATE_H
 #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLDUMPDELEGATE_H
 
-#include "SymbolVisitorDelegate.h"
-
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-
-#include <stdint.h>
+#include "llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h"
+#include <cstdint>
 
 namespace llvm {
-
 namespace codeview {
 
 class SymbolDumpDelegate : public SymbolVisitorDelegate {
 public:
-  virtual ~SymbolDumpDelegate() {}
+  ~SymbolDumpDelegate() override = default;
 
   virtual void printRelocatedField(StringRef Label, uint32_t RelocOffset,
                                    uint32_t Offset,
@@ -31,6 +28,7 @@ public:
   virtual void printBinaryBlockWithRelocs(StringRef Label,
                                           ArrayRef<uint8_t> Block) = 0;
 };
+
 } // end namespace codeview
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index 648e40f55810..eb63f7895a1e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -35,11 +35,11 @@ public:
   /// and true otherwise.  This should be called in order, since the dumper
   /// maintains state about previous records which are necessary for cross
   /// type references.
-  bool dump(const CVRecord<SymbolKind> &Record);
+  Error dump(CVRecord<SymbolKind> &Record);
 
   /// Dumps the type records in Data. Returns false if there was a type stream
   /// parse error, and true otherwise.
-  bool dump(const CVSymbolArray &Symbols);
+  Error dump(const CVSymbolArray &Symbols);
 
 private:
   ScopedPrinter &W;
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
index 77e894fba4a9..57772d39e972 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecord.h
@@ -11,23 +11,24 @@
 #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORD_H
 
 #include "llvm/ADT/APSInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
+#include <cstddef>
+#include <cstdint>
+#include <vector>
 
 namespace llvm {
 namespace codeview {
 
-using llvm::support::ulittle16_t;
-using llvm::support::ulittle32_t;
-using llvm::support::little32_t;
-
 class SymbolRecord {
 protected:
   explicit SymbolRecord(SymbolRecordKind Kind) : Kind(Kind) {}
@@ -42,216 +43,119 @@ private:
 // S_GPROC32, S_LPROC32, S_GPROC32_ID, S_LPROC32_ID, S_LPROC32_DPC or
 // S_LPROC32_DPC_ID
 class ProcSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t PtrParent;
-    ulittle32_t PtrEnd;
-    ulittle32_t PtrNext;
-    ulittle32_t CodeSize;
-    ulittle32_t DbgStart;
-    ulittle32_t DbgEnd;
-    TypeIndex FunctionType;
-    ulittle32_t CodeOffset;
-    ulittle16_t Segment;
-    uint8_t Flags; // ProcSymFlags enum
-                   // Name: The null-terminated name follows.
-  };
-
-  ProcSym(SymbolRecordKind Kind, uint32_t RecordOffset, const Hdr *H,
-          StringRef Name)
-      : SymbolRecord(Kind), RecordOffset(RecordOffset), Header(*H), Name(Name) {
-  }
+  static constexpr uint32_t RelocationOffset = 32;
 
-  static ErrorOr<ProcSym> deserialize(SymbolRecordKind Kind,
-                                      uint32_t RecordOffset,
-                                      ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return ProcSym(Kind, RecordOffset, H, Name);
-  }
+public:
+  explicit ProcSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  ProcSym(SymbolRecordKind Kind, uint32_t RecordOffset)
+      : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, CodeOffset);
-  }
-
-  uint32_t RecordOffset;
-  Hdr Header;
+    return RecordOffset + RelocationOffset;
+  }
+
+  uint32_t Parent = 0;
+  uint32_t End = 0;
+  uint32_t Next = 0;
+  uint32_t CodeSize = 0;
+  uint32_t DbgStart = 0;
+  uint32_t DbgEnd = 0;
+  TypeIndex FunctionType;
+  uint32_t CodeOffset = 0;
+  uint16_t Segment = 0;
+  ProcSymFlags Flags = ProcSymFlags::None;
   StringRef Name;
+
+  uint32_t RecordOffset = 0;
 };
 
 // S_THUNK32
 class Thunk32Sym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Parent;
-    ulittle32_t End;
-    ulittle32_t Next;
-    ulittle32_t Off;
-    ulittle16_t Seg;
-    ulittle16_t Len;
-    uint8_t Ord; // ThunkOrdinal enumeration
-                 // Name: The null-terminated name follows.
-                 // Variant portion of thunk
-  };
-
-  Thunk32Sym(SymbolRecordKind Kind, uint32_t RecordOffset, const Hdr *H,
-             StringRef Name, ArrayRef<uint8_t> VariantData)
-      : SymbolRecord(Kind), RecordOffset(RecordOffset), Header(*H), Name(Name),
-        VariantData(VariantData) {}
-
-  static ErrorOr<Thunk32Sym> deserialize(SymbolRecordKind Kind,
-                                         uint32_t RecordOffset,
-                                         ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    ArrayRef<uint8_t> VariantData;
-
-    CV_DESERIALIZE(Data, H, Name, CV_ARRAY_FIELD_TAIL(VariantData));
-
-    return Thunk32Sym(Kind, RecordOffset, H, Name, VariantData);
-  }
+  explicit Thunk32Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  Thunk32Sym(SymbolRecordKind Kind, uint32_t RecordOffset)
+      : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  uint32_t Parent;
+  uint32_t End;
+  uint32_t Next;
+  uint32_t Offset;
+  uint16_t Segment;
+  uint16_t Length;
+  ThunkOrdinal Thunk;
   StringRef Name;
   ArrayRef<uint8_t> VariantData;
+
+  uint32_t RecordOffset;
 };
 
 // S_TRAMPOLINE
 class TrampolineSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle16_t Type; // TrampolineType enum
-    ulittle16_t Size;
-    ulittle32_t ThunkOff;
-    ulittle32_t TargetOff;
-    ulittle16_t ThunkSection;
-    ulittle16_t TargetSection;
-  };
-
-  TrampolineSym(SymbolRecordKind Kind, uint32_t RecordOffset, const Hdr *H)
-      : SymbolRecord(Kind), RecordOffset(RecordOffset), Header(*H) {}
-
-  static ErrorOr<TrampolineSym> deserialize(SymbolRecordKind Kind,
-                                            uint32_t RecordOffset,
-                                            ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
+  explicit TrampolineSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  TrampolineSym(SymbolRecordKind Kind, uint32_t RecordOffset)
+      : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
-    CV_DESERIALIZE(Data, H);
-
-    return TrampolineSym(Kind, RecordOffset, H);
-  }
+  TrampolineType Type;
+  uint16_t Size;
+  uint32_t ThunkOffset;
+  uint32_t TargetOffset;
+  uint16_t ThunkSection;
+  uint16_t TargetSection;
 
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_SECTION
 class SectionSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle16_t SectionNumber;
-    uint8_t Alignment;
-    uint8_t Reserved; // Must be 0
-    ulittle32_t Rva;
-    ulittle32_t Length;
-    ulittle32_t Characteristics;
-    // Name: The null-terminated name follows.
-  };
-
-  SectionSym(SymbolRecordKind Kind, uint32_t RecordOffset, const Hdr *H,
-             StringRef Name)
-      : SymbolRecord(Kind), RecordOffset(RecordOffset), Header(*H), Name(Name) {
-  }
-
-  static ErrorOr<SectionSym> deserialize(SymbolRecordKind Kind,
-                                         uint32_t RecordOffset,
-                                         ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-
-    CV_DESERIALIZE(Data, H, Name);
+  explicit SectionSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  SectionSym(SymbolRecordKind Kind, uint32_t RecordOffset)
+      : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
-    return SectionSym(Kind, RecordOffset, H, Name);
-  }
+  uint16_t SectionNumber;
+  uint8_t Alignment;
+  uint32_t Rva;
+  uint32_t Length;
+  uint32_t Characteristics;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_COFFGROUP
 class CoffGroupSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Size;
-    ulittle32_t Characteristics;
-    ulittle32_t Offset;
-    ulittle16_t Segment;
-    // Name: The null-terminated name follows.
-  };
-
-  CoffGroupSym(SymbolRecordKind Kind, uint32_t RecordOffset, const Hdr *H,
-               StringRef Name)
-      : SymbolRecord(Kind), RecordOffset(RecordOffset), Header(*H), Name(Name) {
-  }
-
-  static ErrorOr<CoffGroupSym> deserialize(SymbolRecordKind Kind,
-                                           uint32_t RecordOffset,
-                                           ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-
-    CV_DESERIALIZE(Data, H, Name);
+  explicit CoffGroupSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  CoffGroupSym(SymbolRecordKind Kind, uint32_t RecordOffset)
+      : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
-    return CoffGroupSym(Kind, RecordOffset, H, Name);
-  }
+  uint32_t Size;
+  uint32_t Characteristics;
+  uint32_t Offset;
+  uint16_t Segment;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 class ScopeEndSym : public SymbolRecord {
 public:
+  explicit ScopeEndSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
   ScopeEndSym(SymbolRecordKind Kind, uint32_t RecordOffset)
       : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
-  static ErrorOr<ScopeEndSym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    return ScopeEndSym(Kind, RecordOffset);
-  }
   uint32_t RecordOffset;
 };
 
 class CallerSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Count;
-  };
-
-  CallerSym(SymbolRecordKind Kind, uint32_t RecordOffset, const Hdr *Header,
-            ArrayRef<TypeIndex> Indices)
-      : SymbolRecord(Kind), RecordOffset(RecordOffset), Header(*Header),
-        Indices(Indices) {}
-
-  static ErrorOr<CallerSym> deserialize(SymbolRecordKind Kind,
-                                        uint32_t RecordOffset,
-                                        ArrayRef<uint8_t> &Data) {
-    const Hdr *Header;
-    ArrayRef<TypeIndex> Indices;
-
-    CV_DESERIALIZE(Data, Header, CV_ARRAY_FIELD_N(Indices, Header->Count));
-
-    return CallerSym(Kind, RecordOffset, Header, Indices);
-  }
+  explicit CallerSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  CallerSym(SymbolRecordKind Kind, uint32_t RecordOffset)
+      : SymbolRecord(Kind), RecordOffset(RecordOffset) {}
 
+  std::vector<TypeIndex> Indices;
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<TypeIndex> Indices;
 };
 
 struct BinaryAnnotationIterator {
@@ -264,7 +168,7 @@ struct BinaryAnnotationIterator {
   };
 
   BinaryAnnotationIterator(ArrayRef<uint8_t> Annotations) : Data(Annotations) {}
-  BinaryAnnotationIterator() {}
+  BinaryAnnotationIterator() = default;
   BinaryAnnotationIterator(const BinaryAnnotationIterator &Other)
       : Data(Other.Data) {}
 
@@ -435,1018 +339,608 @@ private:
 // S_INLINESITE
 class InlineSiteSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t PtrParent;
-    ulittle32_t PtrEnd;
-    TypeIndex Inlinee;
-    // BinaryAnnotations
-  };
-
-  InlineSiteSym(uint32_t RecordOffset, const Hdr *H,
-                ArrayRef<uint8_t> Annotations)
+  explicit InlineSiteSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  InlineSiteSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::InlineSiteSym),
-        RecordOffset(RecordOffset), Header(*H), Annotations(Annotations) {}
-
-  static ErrorOr<InlineSiteSym> deserialize(SymbolRecordKind Kind,
-                                            uint32_t RecordOffset,
-                                            ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<uint8_t> Annotations;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Annotations));
-
-    return InlineSiteSym(RecordOffset, H, Annotations);
-  }
+        RecordOffset(RecordOffset) {}
 
   llvm::iterator_range<BinaryAnnotationIterator> annotations() const {
-    return llvm::make_range(BinaryAnnotationIterator(Annotations),
+    return llvm::make_range(BinaryAnnotationIterator(AnnotationData),
                             BinaryAnnotationIterator());
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  uint32_t Parent;
+  uint32_t End;
+  TypeIndex Inlinee;
+  std::vector<uint8_t> AnnotationData;
 
-private:
-  ArrayRef<uint8_t> Annotations;
+  uint32_t RecordOffset;
 };
 
 // S_PUB32
 class PublicSym32 : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Index; // Type index, or Metadata token if a managed symbol
-    ulittle32_t Off;
-    ulittle16_t Seg;
-    // Name: The null-terminated name follows.
-  };
-
-  PublicSym32(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::PublicSym32), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
-
-  static ErrorOr<PublicSym32> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
+  explicit PublicSym32(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit PublicSym32(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::PublicSym32),
+        RecordOffset(RecordOffset) {}
 
-    return PublicSym32(RecordOffset, H, Name);
-  }
+  uint32_t Index;
+  uint32_t Offset;
+  uint16_t Segment;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_REGISTER
 class RegisterSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Index;    // Type index or Metadata token
-    ulittle16_t Register; // RegisterId enumeration
-    // Name: The null-terminated name follows.
-  };
-
-  RegisterSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::RegisterSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
+  explicit RegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  RegisterSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::RegisterSym),
+        RecordOffset(RecordOffset) {}
 
-  static ErrorOr<RegisterSym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return RegisterSym(RecordOffset, H, Name);
-  }
+  uint32_t Index;
+  RegisterId Register;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_PROCREF, S_LPROCREF
 class ProcRefSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t SumName;   // SUC of the name (?)
-    ulittle32_t SymOffset; // Offset of actual symbol in $$Symbols
-    ulittle16_t Mod;       // Module containing the actual symbol
-                           // Name:  The null-terminated name follows.
-  };
-
-  ProcRefSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::ProcRefSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
-
-  static ErrorOr<ProcRefSym> deserialize(SymbolRecordKind Kind,
-                                         uint32_t RecordOffset,
-                                         ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return ProcRefSym(RecordOffset, H, Name);
+  explicit ProcRefSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit ProcRefSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::ProcRefSym), RecordOffset(RecordOffset) {
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  uint32_t SumName;
+  uint32_t SymOffset;
+  uint16_t Module;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 // S_LOCAL
 class LocalSym : public SymbolRecord {
 public:
-  struct Hdr {
-    TypeIndex Type;
-    ulittle16_t Flags; // LocalSymFlags enum
-                       // Name: The null-terminated name follows.
-  };
-
-  LocalSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::LocalSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
+  explicit LocalSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit LocalSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::LocalSym), RecordOffset(RecordOffset) {}
 
-  static ErrorOr<LocalSym> deserialize(SymbolRecordKind Kind,
-                                       uint32_t RecordOffset,
-                                       ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return LocalSym(RecordOffset, H, Name);
-  }
+  TypeIndex Type;
+  LocalSymFlags Flags;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 struct LocalVariableAddrRange {
-  ulittle32_t OffsetStart;
-  ulittle16_t ISectStart;
-  ulittle16_t Range;
+  uint32_t OffsetStart;
+  uint16_t ISectStart;
+  uint16_t Range;
 };
 
 struct LocalVariableAddrGap {
-  ulittle16_t GapStartOffset;
-  ulittle16_t Range;
+  uint16_t GapStartOffset;
+  uint16_t Range;
 };
 
 enum : uint16_t { MaxDefRange = 0xf000 };
 
 // S_DEFRANGE
 class DefRangeSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t Program;
-    LocalVariableAddrRange Range;
-    // LocalVariableAddrGap Gaps[];
-  };
+  static constexpr uint32_t RelocationOffset = 8;
 
-  DefRangeSym(uint32_t RecordOffset, const Hdr *H,
-              ArrayRef<LocalVariableAddrGap> Gaps)
-      : SymbolRecord(SymbolRecordKind::DefRangeSym), RecordOffset(RecordOffset),
-        Header(*H), Gaps(Gaps) {}
-
-  static ErrorOr<DefRangeSym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<LocalVariableAddrGap> Gaps;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Gaps));
-
-    return DefRangeSym(RecordOffset, H, Gaps);
-  }
+public:
+  explicit DefRangeSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit DefRangeSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::DefRangeSym),
+        RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, Range);
+    return RecordOffset + RelocationOffset;
   }
 
+  uint32_t Program;
+  LocalVariableAddrRange Range;
+  std::vector<LocalVariableAddrGap> Gaps;
+
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<LocalVariableAddrGap> Gaps;
 };
 
 // S_DEFRANGE_SUBFIELD
 class DefRangeSubfieldSym : public SymbolRecord {
+  static constexpr uint32_t RelocationOffset = 12;
+
 public:
-  struct Hdr {
-    ulittle32_t Program;
-    ulittle16_t OffsetInParent;
-    LocalVariableAddrRange Range;
-    // LocalVariableAddrGap Gaps[];
-  };
-  DefRangeSubfieldSym(uint32_t RecordOffset, const Hdr *H,
-                      ArrayRef<LocalVariableAddrGap> Gaps)
+  explicit DefRangeSubfieldSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  DefRangeSubfieldSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::DefRangeSubfieldSym),
-        RecordOffset(RecordOffset), Header(*H), Gaps(Gaps) {}
-
-  static ErrorOr<DefRangeSubfieldSym> deserialize(SymbolRecordKind Kind,
-                                                  uint32_t RecordOffset,
-                                                  ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<LocalVariableAddrGap> Gaps;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Gaps));
-
-    return DefRangeSubfieldSym(RecordOffset, H, Gaps);
-  }
+        RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, Range);
+    return RecordOffset + RelocationOffset;
   }
 
+  uint32_t Program;
+  uint16_t OffsetInParent;
+  LocalVariableAddrRange Range;
+  std::vector<LocalVariableAddrGap> Gaps;
+
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<LocalVariableAddrGap> Gaps;
 };
 
 // S_DEFRANGE_REGISTER
 class DefRangeRegisterSym : public SymbolRecord {
 public:
-  struct Hdr {
+  struct Header {
     ulittle16_t Register;
     ulittle16_t MayHaveNoName;
-    LocalVariableAddrRange Range;
-    // LocalVariableAddrGap Gaps[];
   };
-
-  DefRangeRegisterSym(uint32_t RecordOffset, const Hdr *H,
-                      ArrayRef<LocalVariableAddrGap> Gaps)
+  explicit DefRangeRegisterSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  DefRangeRegisterSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::DefRangeRegisterSym),
-        RecordOffset(RecordOffset), Header(*H), Gaps(Gaps) {}
-
-  DefRangeRegisterSym(uint16_t Register, uint16_t MayHaveNoName,
-                      uint32_t OffsetStart, uint16_t ISectStart, uint16_t Range,
-                      ArrayRef<LocalVariableAddrGap> Gaps)
-      : SymbolRecord(SymbolRecordKind::DefRangeRegisterSym), RecordOffset(0),
-        Gaps(Gaps) {
-    Header.Register = Register;
-    Header.MayHaveNoName = MayHaveNoName;
-    Header.Range.OffsetStart = OffsetStart;
-    Header.Range.ISectStart = ISectStart;
-    Header.Range.Range = Range;
-  }
+        RecordOffset(RecordOffset) {}
 
-  static ErrorOr<DefRangeRegisterSym> deserialize(SymbolRecordKind Kind,
-                                                  uint32_t RecordOffset,
-                                                  ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<LocalVariableAddrGap> Gaps;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Gaps));
+  uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); }
 
-    return DefRangeRegisterSym(RecordOffset, H, Gaps);
-  }
-
-  uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, Range);
-  }
+  Header Hdr;
+  LocalVariableAddrRange Range;
+  std::vector<LocalVariableAddrGap> Gaps;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<LocalVariableAddrGap> Gaps;
 };
 
 // S_DEFRANGE_SUBFIELD_REGISTER
 class DefRangeSubfieldRegisterSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle16_t Register; // Register to which the variable is relative
+  struct Header {
+    ulittle16_t Register;
     ulittle16_t MayHaveNoName;
     ulittle32_t OffsetInParent;
-    LocalVariableAddrRange Range;
-    // LocalVariableAddrGap Gaps[];
   };
-
-  DefRangeSubfieldRegisterSym(uint32_t RecordOffset, const Hdr *H,
-                              ArrayRef<LocalVariableAddrGap> Gaps)
+  explicit DefRangeSubfieldRegisterSym(SymbolRecordKind Kind)
+      : SymbolRecord(Kind) {}
+  DefRangeSubfieldRegisterSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::DefRangeSubfieldRegisterSym),
-        RecordOffset(RecordOffset), Header(*H), Gaps(Gaps) {}
-
-  DefRangeSubfieldRegisterSym(uint16_t Register, uint16_t MayHaveNoName,
-                              uint32_t OffsetInParent,
-                              ArrayRef<LocalVariableAddrGap> Gaps)
-      : SymbolRecord(SymbolRecordKind::DefRangeSubfieldRegisterSym),
-        RecordOffset(0), Gaps(Gaps) {
-    Header.Register = Register;
-    Header.MayHaveNoName = MayHaveNoName;
-    Header.OffsetInParent = OffsetInParent;
-  }
+        RecordOffset(RecordOffset) {}
 
-  static ErrorOr<DefRangeSubfieldRegisterSym>
-  deserialize(SymbolRecordKind Kind, uint32_t RecordOffset,
-              ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<LocalVariableAddrGap> Gaps;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Gaps));
+  uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); }
 
-    return DefRangeSubfieldRegisterSym(RecordOffset, H, Gaps);
-  }
-
-  uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, Range);
-  }
+  Header Hdr;
+  LocalVariableAddrRange Range;
+  std::vector<LocalVariableAddrGap> Gaps;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<LocalVariableAddrGap> Gaps;
 };
 
 // S_DEFRANGE_FRAMEPOINTER_REL
 class DefRangeFramePointerRelSym : public SymbolRecord {
-public:
-  struct Hdr {
-    little32_t Offset; // Offset from the frame pointer register
-    LocalVariableAddrRange Range;
-    // LocalVariableAddrGap Gaps[];
-  };
+  static constexpr uint32_t RelocationOffset = 8;
 
-  DefRangeFramePointerRelSym(uint32_t RecordOffset, const Hdr *H,
-                             ArrayRef<LocalVariableAddrGap> Gaps)
+public:
+  explicit DefRangeFramePointerRelSym(SymbolRecordKind Kind)
+      : SymbolRecord(Kind) {}
+  DefRangeFramePointerRelSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelSym),
-        RecordOffset(RecordOffset), Header(*H), Gaps(Gaps) {}
-
-  static ErrorOr<DefRangeFramePointerRelSym>
-  deserialize(SymbolRecordKind Kind, uint32_t RecordOffset,
-              ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<LocalVariableAddrGap> Gaps;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Gaps));
-
-    return DefRangeFramePointerRelSym(RecordOffset, H, Gaps);
-  }
+        RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, Range);
+    return RecordOffset + RelocationOffset;
   }
 
+  int32_t Offset;
+  LocalVariableAddrRange Range;
+  std::vector<LocalVariableAddrGap> Gaps;
+
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<LocalVariableAddrGap> Gaps;
 };
 
 // S_DEFRANGE_REGISTER_REL
 class DefRangeRegisterRelSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle16_t BaseRegister;
+  struct Header {
+    ulittle16_t Register;
     ulittle16_t Flags;
     little32_t BasePointerOffset;
-    LocalVariableAddrRange Range;
-    // LocalVariableAddrGap Gaps[];
   };
-
-  DefRangeRegisterRelSym(uint32_t RecordOffset, const Hdr *H,
-                         ArrayRef<LocalVariableAddrGap> Gaps)
+  explicit DefRangeRegisterRelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit DefRangeRegisterRelSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym),
-        RecordOffset(RecordOffset), Header(*H), Gaps(Gaps) {}
-
-  DefRangeRegisterRelSym(uint16_t BaseRegister, uint16_t Flags,
-                         int32_t BasePointerOffset, uint32_t OffsetStart,
-                         uint16_t ISectStart, uint16_t Range,
-                         ArrayRef<LocalVariableAddrGap> Gaps)
-      : SymbolRecord(SymbolRecordKind::DefRangeRegisterRelSym), RecordOffset(0),
-        Gaps(Gaps) {
-    Header.BaseRegister = BaseRegister;
-    Header.Flags = Flags;
-    Header.BasePointerOffset = BasePointerOffset;
-    Header.Range.OffsetStart = OffsetStart;
-    Header.Range.ISectStart = ISectStart;
-    Header.Range.Range = Range;
-  }
+        RecordOffset(RecordOffset) {}
 
-  static ErrorOr<DefRangeRegisterRelSym> deserialize(SymbolRecordKind Kind,
-                                                     uint32_t RecordOffset,
-                                                     ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    ArrayRef<LocalVariableAddrGap> Gaps;
-    CV_DESERIALIZE(Data, H, CV_ARRAY_FIELD_TAIL(Gaps));
+  // The flags implement this notional bitfield:
+  //   uint16_t IsSubfield : 1;
+  //   uint16_t Padding : 3;
+  //   uint16_t OffsetInParent : 12;
+  enum : uint16_t {
+    IsSubfieldFlag = 1,
+    OffsetInParentShift = 4,
+  };
 
-    return DefRangeRegisterRelSym(RecordOffset, H, Gaps);
-  }
+  bool hasSpilledUDTMember() const { return Hdr.Flags & IsSubfieldFlag; }
+  uint16_t offsetInParent() const { return Hdr.Flags >> OffsetInParentShift; }
 
-  bool hasSpilledUDTMember() const { return Header.Flags & 1; }
-  uint16_t offsetInParent() const { return Header.Flags >> 4; }
+  uint32_t getRelocationOffset() const { return RecordOffset + sizeof(Header); }
 
-  uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, Range);
-  }
+  Header Hdr;
+  LocalVariableAddrRange Range;
+  std::vector<LocalVariableAddrGap> Gaps;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  ArrayRef<LocalVariableAddrGap> Gaps;
 };
 
 // S_DEFRANGE_FRAMEPOINTER_REL_FULL_SCOPE
 class DefRangeFramePointerRelFullScopeSym : public SymbolRecord {
 public:
-  struct Hdr {
-    little32_t Offset; // Offset from the frame pointer register
-  };
-
-  DefRangeFramePointerRelFullScopeSym(uint32_t RecordOffset, const Hdr *H)
+  explicit DefRangeFramePointerRelFullScopeSym(SymbolRecordKind Kind)
+      : SymbolRecord(Kind) {}
+  explicit DefRangeFramePointerRelFullScopeSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::DefRangeFramePointerRelFullScopeSym),
-        RecordOffset(RecordOffset), Header(*H) {}
-
-  static ErrorOr<DefRangeFramePointerRelFullScopeSym>
-  deserialize(SymbolRecordKind Kind, uint32_t RecordOffset,
-              ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    CV_DESERIALIZE(Data, H);
+        RecordOffset(RecordOffset) {}
 
-    return DefRangeFramePointerRelFullScopeSym(RecordOffset, H);
-  }
+  int32_t Offset;
 
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_BLOCK32
 class BlockSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t PtrParent;
-    ulittle32_t PtrEnd;
-    ulittle32_t CodeSize;
-    ulittle32_t CodeOffset;
-    ulittle16_t Segment;
-    // Name: The null-terminated name follows.
-  };
+  static constexpr uint32_t RelocationOffset = 16;
 
-  BlockSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::BlockSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
-
-  static ErrorOr<BlockSym> deserialize(SymbolRecordKind Kind,
-                                       uint32_t RecordOffset,
-                                       ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return BlockSym(RecordOffset, H, Name);
-  }
+public:
+  explicit BlockSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit BlockSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::BlockSym), RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, CodeOffset);
+    return RecordOffset + RelocationOffset;
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  uint32_t Parent;
+  uint32_t End;
+  uint32_t CodeSize;
+  uint32_t CodeOffset;
+  uint16_t Segment;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 // S_LABEL32
 class LabelSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t CodeOffset;
-    ulittle16_t Segment;
-    uint8_t Flags; // CV_PROCFLAGS
-                   // Name: The null-terminated name follows.
-  };
-
-  LabelSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::LabelSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
+  static constexpr uint32_t RelocationOffset = 4;
 
-  static ErrorOr<LabelSym> deserialize(SymbolRecordKind Kind,
-                                       uint32_t RecordOffset,
-                                       ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return LabelSym(RecordOffset, H, Name);
-  }
+public:
+  explicit LabelSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit LabelSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::LabelSym), RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, CodeOffset);
+    return RecordOffset + RelocationOffset;
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  uint32_t CodeOffset;
+  uint16_t Segment;
+  ProcSymFlags Flags;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 // S_OBJNAME
 class ObjNameSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Signature;
-    // Name: The null-terminated name follows.
-  };
-
-  ObjNameSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
-
-  static ErrorOr<ObjNameSym> deserialize(SymbolRecordKind Kind,
-                                         uint32_t RecordOffset,
-                                         ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return ObjNameSym(RecordOffset, H, Name);
+  explicit ObjNameSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  ObjNameSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::ObjNameSym), RecordOffset(RecordOffset) {
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  uint32_t Signature;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 // S_ENVBLOCK
 class EnvBlockSym : public SymbolRecord {
 public:
-  struct Hdr {
-    uint8_t Reserved;
-    // Sequence of zero terminated strings.
-  };
-
-  EnvBlockSym(uint32_t RecordOffset, const Hdr *H,
-              const std::vector<StringRef> &Fields)
-      : SymbolRecord(SymbolRecordKind::EnvBlockSym), RecordOffset(RecordOffset),
-        Header(*H), Fields(Fields) {}
-
-  static ErrorOr<EnvBlockSym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    std::vector<StringRef> Fields;
-    CV_DESERIALIZE(Data, H, CV_STRING_ARRAY_NULL_TERM(Fields));
+  explicit EnvBlockSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  EnvBlockSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::EnvBlockSym),
+        RecordOffset(RecordOffset) {}
 
-    return EnvBlockSym(RecordOffset, H, Fields);
-  }
+  std::vector<StringRef> Fields;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  std::vector<StringRef> Fields;
 };
 
 // S_EXPORT
 class ExportSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle16_t Ordinal;
-    ulittle16_t Flags; // ExportFlags
-                       // Name: The null-terminated name follows.
-  };
+  explicit ExportSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  ExportSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::ExportSym), RecordOffset(RecordOffset) {}
 
-  ExportSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::ExportSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
-
-  static ErrorOr<ExportSym> deserialize(SymbolRecordKind Kind,
-                                        uint32_t RecordOffset,
-                                        ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return ExportSym(RecordOffset, H, Name);
-  }
+  uint16_t Ordinal;
+  ExportFlags Flags;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_FILESTATIC
 class FileStaticSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Index;             // Type Index
-    ulittle32_t ModFilenameOffset; // Index of mod filename in string table
-    ulittle16_t Flags;             // LocalSymFlags enum
-                                   // Name: The null-terminated name follows.
-  };
-
-  FileStaticSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
+  explicit FileStaticSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  FileStaticSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::FileStaticSym),
-        RecordOffset(RecordOffset), Header(*H), Name(Name) {}
-
-  static ErrorOr<FileStaticSym> deserialize(SymbolRecordKind Kind,
-                                            uint32_t RecordOffset,
-                                            ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
+        RecordOffset(RecordOffset) {}
 
-    return FileStaticSym(RecordOffset, H, Name);
-  }
+  uint32_t Index;
+  uint32_t ModFilenameOffset;
+  LocalSymFlags Flags;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_COMPILE2
 class Compile2Sym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t flags; // CompileSym2Flags enum
-    uint8_t getLanguage() const { return flags & 0xFF; }
-    unsigned short Machine; // CPUType enum
-    unsigned short VersionFrontendMajor;
-    unsigned short VersionFrontendMinor;
-    unsigned short VersionFrontendBuild;
-    unsigned short VersionBackendMajor;
-    unsigned short VersionBackendMinor;
-    unsigned short VersionBackendBuild;
-    // Version: The null-terminated version string follows.
-    // Optional block of zero terminated strings terminated with a double zero.
-  };
-
-  Compile2Sym(uint32_t RecordOffset, const Hdr *H, StringRef Version)
-      : SymbolRecord(SymbolRecordKind::Compile2Sym), RecordOffset(RecordOffset),
-        Header(*H), Version(Version) {}
-
-  static ErrorOr<Compile2Sym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Version;
-    CV_DESERIALIZE(Data, H, Version);
+  explicit Compile2Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  Compile2Sym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::Compile2Sym),
+        RecordOffset(RecordOffset) {}
+
+  CompileSym2Flags Flags;
+  CPUType Machine;
+  uint16_t VersionFrontendMajor;
+  uint16_t VersionFrontendMinor;
+  uint16_t VersionFrontendBuild;
+  uint16_t VersionBackendMajor;
+  uint16_t VersionBackendMinor;
+  uint16_t VersionBackendBuild;
+  StringRef Version;
+  std::vector<StringRef> ExtraStrings;
 
-    return Compile2Sym(RecordOffset, H, Version);
-  }
+  uint8_t getLanguage() const { return static_cast<uint32_t>(Flags) & 0xFF; }
+  uint32_t getFlags() const { return static_cast<uint32_t>(Flags) & ~0xFF; }
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Version;
 };
 
 // S_COMPILE3
 class Compile3Sym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t flags; // CompileSym3Flags enum
-    uint8_t getLanguage() const { return flags & 0xff; }
-    ulittle16_t Machine; // CPUType enum
-    ulittle16_t VersionFrontendMajor;
-    ulittle16_t VersionFrontendMinor;
-    ulittle16_t VersionFrontendBuild;
-    ulittle16_t VersionFrontendQFE;
-    ulittle16_t VersionBackendMajor;
-    ulittle16_t VersionBackendMinor;
-    ulittle16_t VersionBackendBuild;
-    ulittle16_t VersionBackendQFE;
-    // VersionString: The null-terminated version string follows.
-  };
-
-  Compile3Sym(uint32_t RecordOffset, const Hdr *H, StringRef Version)
-      : SymbolRecord(SymbolRecordKind::Compile3Sym), RecordOffset(RecordOffset),
-        Header(*H), Version(Version) {}
+  explicit Compile3Sym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  Compile3Sym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::Compile3Sym),
+        RecordOffset(RecordOffset) {}
+
+  CompileSym3Flags Flags;
+  CPUType Machine;
+  uint16_t VersionFrontendMajor;
+  uint16_t VersionFrontendMinor;
+  uint16_t VersionFrontendBuild;
+  uint16_t VersionFrontendQFE;
+  uint16_t VersionBackendMajor;
+  uint16_t VersionBackendMinor;
+  uint16_t VersionBackendBuild;
+  uint16_t VersionBackendQFE;
+  StringRef Version;
 
-  static ErrorOr<Compile3Sym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Version;
-    CV_DESERIALIZE(Data, H, Version);
-
-    return Compile3Sym(RecordOffset, H, Version);
-  }
+  uint8_t getLanguage() const { return static_cast<uint32_t>(Flags) & 0xFF; }
+  uint32_t getFlags() const { return static_cast<uint32_t>(Flags) & ~0xFF; }
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Version;
 };
 
 // S_FRAMEPROC
 class FrameProcSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t TotalFrameBytes;
-    ulittle32_t PaddingFrameBytes;
-    ulittle32_t OffsetToPadding;
-    ulittle32_t BytesOfCalleeSavedRegisters;
-    ulittle32_t OffsetOfExceptionHandler;
-    ulittle16_t SectionIdOfExceptionHandler;
-    ulittle32_t Flags;
-  };
-
-  FrameProcSym(uint32_t RecordOffset, const Hdr *H)
+  explicit FrameProcSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit FrameProcSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::FrameProcSym),
-        RecordOffset(RecordOffset), Header(*H) {}
+        RecordOffset(RecordOffset) {}
 
-  static ErrorOr<FrameProcSym> deserialize(SymbolRecordKind Kind,
-                                           uint32_t RecordOffset,
-                                           ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    CV_DESERIALIZE(Data, H);
-
-    return FrameProcSym(RecordOffset, H);
-  }
+  uint32_t TotalFrameBytes;
+  uint32_t PaddingFrameBytes;
+  uint32_t OffsetToPadding;
+  uint32_t BytesOfCalleeSavedRegisters;
+  uint32_t OffsetOfExceptionHandler;
+  uint16_t SectionIdOfExceptionHandler;
+  FrameProcedureOptions Flags;
 
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_CALLSITEINFO
 class CallSiteInfoSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t CodeOffset;
-    ulittle16_t Segment;
-    ulittle16_t Reserved;
-    TypeIndex Type;
-  };
+  static constexpr uint32_t RelocationOffset = 4;
 
-  CallSiteInfoSym(uint32_t RecordOffset, const Hdr *H)
-      : SymbolRecord(SymbolRecordKind::CallSiteInfoSym),
-        RecordOffset(RecordOffset), Header(*H) {}
-
-  static ErrorOr<CallSiteInfoSym> deserialize(SymbolRecordKind Kind,
-                                              uint32_t RecordOffset,
-                                              ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    CV_DESERIALIZE(Data, H);
-
-    return CallSiteInfoSym(RecordOffset, H);
-  }
+public:
+  explicit CallSiteInfoSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit CallSiteInfoSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::CallSiteInfoSym) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, CodeOffset);
+    return RecordOffset + RelocationOffset;
   }
 
+  uint32_t CodeOffset;
+  uint16_t Segment;
+  TypeIndex Type;
+
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_HEAPALLOCSITE
 class HeapAllocationSiteSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t CodeOffset;
-    ulittle16_t Segment;
-    ulittle16_t CallInstructionSize;
-    TypeIndex Type;
-  };
+  static constexpr uint32_t RelocationOffset = 4;
 
-  HeapAllocationSiteSym(uint32_t RecordOffset, const Hdr *H)
+public:
+  explicit HeapAllocationSiteSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit HeapAllocationSiteSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::HeapAllocationSiteSym),
-        RecordOffset(RecordOffset), Header(*H) {}
-
-  static ErrorOr<HeapAllocationSiteSym> deserialize(SymbolRecordKind Kind,
-                                                    uint32_t RecordOffset,
-                                                    ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    CV_DESERIALIZE(Data, H);
-
-    return HeapAllocationSiteSym(RecordOffset, H);
-  }
+        RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, CodeOffset);
+    return RecordOffset + RelocationOffset;
   }
 
+  uint32_t CodeOffset;
+  uint16_t Segment;
+  uint16_t CallInstructionSize;
+  TypeIndex Type;
+
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_FRAMECOOKIE
 class FrameCookieSym : public SymbolRecord {
-public:
-  struct Hdr {
-    ulittle32_t CodeOffset;
-    ulittle16_t Register;
-    uint8_t CookieKind;
-    uint8_t Flags;
-  };
-
-  FrameCookieSym(uint32_t RecordOffset, const Hdr *H)
-      : SymbolRecord(SymbolRecordKind::FrameCookieSym),
-        RecordOffset(RecordOffset), Header(*H) {}
+  static constexpr uint32_t RelocationOffset = 4;
 
-  static ErrorOr<FrameCookieSym> deserialize(SymbolRecordKind Kind,
-                                             uint32_t RecordOffset,
-                                             ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    CV_DESERIALIZE(Data, H);
-
-    return FrameCookieSym(RecordOffset, H);
-  }
+public:
+  explicit FrameCookieSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit FrameCookieSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::FrameCookieSym) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, CodeOffset);
+    return RecordOffset + RelocationOffset;
   }
 
+  uint32_t CodeOffset;
+  uint16_t Register;
+  uint8_t CookieKind;
+  uint8_t Flags;
+
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_UDT, S_COBOLUDT
 class UDTSym : public SymbolRecord {
 public:
-  struct Hdr {
-    TypeIndex Type; // Type of the UDT
-                    // Name: The null-terminated name follows.
-  };
-
-  UDTSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::UDTSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
+  explicit UDTSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit UDTSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::UDTSym) {}
 
-  static ErrorOr<UDTSym> deserialize(SymbolRecordKind Kind,
-                                     uint32_t RecordOffset,
-                                     ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return UDTSym(RecordOffset, H, Name);
-  }
+  TypeIndex Type;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_BUILDINFO
 class BuildInfoSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t BuildId;
-  };
-
-  BuildInfoSym(uint32_t RecordOffset, const Hdr *H)
+  explicit BuildInfoSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  BuildInfoSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::BuildInfoSym),
-        RecordOffset(RecordOffset), Header(*H) {}
+        RecordOffset(RecordOffset) {}
 
-  static ErrorOr<BuildInfoSym> deserialize(SymbolRecordKind Kind,
-                                           uint32_t RecordOffset,
-                                           ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    CV_DESERIALIZE(Data, H);
-
-    return BuildInfoSym(RecordOffset, H);
-  }
+  uint32_t BuildId;
 
   uint32_t RecordOffset;
-  Hdr Header;
 };
 
 // S_BPREL32
 class BPRelativeSym : public SymbolRecord {
 public:
-  struct Hdr {
-    little32_t Offset; // Offset from the base pointer register
-    TypeIndex Type;    // Type of the variable
-                       // Name: The null-terminated name follows.
-  };
-
-  BPRelativeSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
+  explicit BPRelativeSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit BPRelativeSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::BPRelativeSym),
-        RecordOffset(RecordOffset), Header(*H), Name(Name) {}
-
-  static ErrorOr<BPRelativeSym> deserialize(SymbolRecordKind Kind,
-                                            uint32_t RecordOffset,
-                                            ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
+        RecordOffset(RecordOffset) {}
 
-    return BPRelativeSym(RecordOffset, H, Name);
-  }
+  int32_t Offset;
+  TypeIndex Type;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_REGREL32
 class RegRelativeSym : public SymbolRecord {
 public:
-  struct Hdr {
-    ulittle32_t Offset;   // Offset from the register
-    TypeIndex Type;       // Type of the variable
-    ulittle16_t Register; // Register to which the variable is relative
-                          // Name: The null-terminated name follows.
-  };
-
-  RegRelativeSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
+  explicit RegRelativeSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit RegRelativeSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::RegRelativeSym),
-        RecordOffset(RecordOffset), Header(*H), Name(Name) {}
-
-  static ErrorOr<RegRelativeSym> deserialize(SymbolRecordKind Kind,
-                                             uint32_t RecordOffset,
-                                             ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
+        RecordOffset(RecordOffset) {}
 
-    return RegRelativeSym(RecordOffset, H, Name);
-  }
+  uint32_t Offset;
+  TypeIndex Type;
+  uint16_t Register;
+  StringRef Name;
 
   uint32_t RecordOffset;
-  Hdr Header;
-  StringRef Name;
 };
 
 // S_CONSTANT, S_MANCONSTANT
 class ConstantSym : public SymbolRecord {
 public:
-  struct Hdr {
-    TypeIndex Type;
-    // Value: The value of the constant.
-    // Name: The null-terminated name follows.
-  };
-
-  ConstantSym(uint32_t RecordOffset, const Hdr *H, const APSInt &Value,
-              StringRef Name)
-      : SymbolRecord(SymbolRecordKind::ConstantSym), RecordOffset(RecordOffset),
-        Header(*H), Value(Value), Name(Name) {}
-
-  static ErrorOr<ConstantSym> deserialize(SymbolRecordKind Kind,
-                                          uint32_t RecordOffset,
-                                          ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    APSInt Value;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Value, Name);
-
-    return ConstantSym(RecordOffset, H, Value, Name);
-  }
+  explicit ConstantSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  ConstantSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::ConstantSym),
+        RecordOffset(RecordOffset) {}
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  TypeIndex Type;
   APSInt Value;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 // S_LDATA32, S_GDATA32, S_LMANDATA, S_GMANDATA
 class DataSym : public SymbolRecord {
-public:
-  struct Hdr {
-    TypeIndex Type;
-    ulittle32_t DataOffset;
-    ulittle16_t Segment;
-    // Name: The null-terminated name follows.
-  };
-
-  DataSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
-      : SymbolRecord(SymbolRecordKind::DataSym), RecordOffset(RecordOffset),
-        Header(*H), Name(Name) {}
+  static constexpr uint32_t RelocationOffset = 8;
 
-  static ErrorOr<DataSym> deserialize(SymbolRecordKind Kind,
-                                      uint32_t RecordOffset,
-                                      ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return DataSym(RecordOffset, H, Name);
-  }
+public:
+  explicit DataSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  DataSym(uint32_t RecordOffset)
+      : SymbolRecord(SymbolRecordKind::DataSym), RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, DataOffset);
+    return RecordOffset + RelocationOffset;
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  TypeIndex Type;
+  uint32_t DataOffset;
+  uint16_t Segment;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 // S_LTHREAD32, S_GTHREAD32
 class ThreadLocalDataSym : public SymbolRecord {
-public:
-  struct Hdr {
-    TypeIndex Type;
-    ulittle32_t DataOffset;
-    ulittle16_t Segment;
-    // Name: The null-terminated name follows.
-  };
+  static constexpr uint32_t RelocationOffset = 8;
 
-  ThreadLocalDataSym(uint32_t RecordOffset, const Hdr *H, StringRef Name)
+public:
+  explicit ThreadLocalDataSym(SymbolRecordKind Kind) : SymbolRecord(Kind) {}
+  explicit ThreadLocalDataSym(uint32_t RecordOffset)
       : SymbolRecord(SymbolRecordKind::ThreadLocalDataSym),
-        RecordOffset(RecordOffset), Header(*H), Name(Name) {}
-
-  static ErrorOr<ThreadLocalDataSym> deserialize(SymbolRecordKind Kind,
-                                                 uint32_t RecordOffset,
-                                                 ArrayRef<uint8_t> &Data) {
-    const Hdr *H = nullptr;
-    StringRef Name;
-    CV_DESERIALIZE(Data, H, Name);
-
-    return ThreadLocalDataSym(RecordOffset, H, Name);
-  }
+        RecordOffset(RecordOffset) {}
 
   uint32_t getRelocationOffset() const {
-    return RecordOffset + offsetof(Hdr, DataOffset);
+    return RecordOffset + RelocationOffset;
   }
 
-  uint32_t RecordOffset;
-  Hdr Header;
+  TypeIndex Type;
+  uint32_t DataOffset;
+  uint16_t Segment;
   StringRef Name;
+
+  uint32_t RecordOffset;
 };
 
 typedef CVRecord<SymbolKind> CVSymbol;
-typedef VarStreamArray<CVSymbol> CVSymbolArray;
+typedef msf::VarStreamArray<CVSymbol> CVSymbolArray;
 
-} // namespace codeview
-} // namespace llvm
+} // end namespace codeview
+} // end namespace llvm
 
-#endif
+#endif // LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORD_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
new file mode 100644
index 000000000000..1bd14ed1347a
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
@@ -0,0 +1,44 @@
+//===- SymbolRecordMapping.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDMAPPING_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLRECORDMAPPING_H
+
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+
+namespace llvm {
+namespace msf {
+class StreamReader;
+class StreamWriter;
+}
+
+namespace codeview {
+class SymbolRecordMapping : public SymbolVisitorCallbacks {
+public:
+  explicit SymbolRecordMapping(msf::StreamReader &Reader) : IO(Reader) {}
+  explicit SymbolRecordMapping(msf::StreamWriter &Writer) : IO(Writer) {}
+
+  Error visitSymbolBegin(CVSymbol &Record) override;
+  Error visitSymbolEnd(CVSymbol &Record) override;
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownRecord(CVSymbol &CVR, Name &Record) override;
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "CVSymbolTypes.def"
+
+private:
+  Optional<SymbolKind> Kind;
+
+  CodeViewRecordIO IO;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
new file mode 100644
index 000000000000..4eb914e7ae6b
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
@@ -0,0 +1,96 @@
+//===- symbolSerializer.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLSERIALIZER_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+
+class SymbolSerializer : public SymbolVisitorCallbacks {
+  uint32_t RecordStart = 0;
+  msf::StreamWriter &Writer;
+  SymbolRecordMapping Mapping;
+  Optional<SymbolKind> CurrentSymbol;
+
+  Error writeRecordPrefix(SymbolKind Kind) {
+    RecordPrefix Prefix;
+    Prefix.RecordKind = Kind;
+    Prefix.RecordLen = 0;
+    if (auto EC = Writer.writeObject(Prefix))
+      return EC;
+    return Error::success();
+  }
+
+public:
+  explicit SymbolSerializer(msf::StreamWriter &Writer)
+      : Writer(Writer), Mapping(Writer) {}
+
+  virtual Error visitSymbolBegin(CVSymbol &Record) override {
+    assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!");
+
+    RecordStart = Writer.getOffset();
+    if (auto EC = writeRecordPrefix(Record.kind()))
+      return EC;
+
+    CurrentSymbol = Record.kind();
+    if (auto EC = Mapping.visitSymbolBegin(Record))
+      return EC;
+
+    return Error::success();
+  }
+
+  virtual Error visitSymbolEnd(CVSymbol &Record) override {
+    assert(CurrentSymbol.hasValue() && "Not in a symbol mapping!");
+
+    if (auto EC = Mapping.visitSymbolEnd(Record))
+      return EC;
+
+    uint32_t RecordEnd = Writer.getOffset();
+    Writer.setOffset(RecordStart);
+    uint16_t Length = RecordEnd - Writer.getOffset() - 2;
+    if (auto EC = Writer.writeInteger(Length))
+      return EC;
+
+    Writer.setOffset(RecordEnd);
+    CurrentSymbol.reset();
+
+    return Error::success();
+  }
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  virtual Error visitKnownRecord(CVSymbol &CVR, Name &Record) override {       \
+    return visitKnownRecordImpl(CVR, Record);                                  \
+  }
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "CVSymbolTypes.def"
+
+private:
+  template <typename RecordKind>
+  Error visitKnownRecordImpl(CVSymbol &CVR, RecordKind &Record) {
+    return Mapping.visitKnownRecord(CVR, Record);
+  }
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
new file mode 100644
index 000000000000..96a93bf7e576
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h
@@ -0,0 +1,71 @@
+//===- SymbolVisitorCallbackPipeline.h --------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORCALLBACKPIPELINE_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORCALLBACKPIPELINE_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace llvm {
+namespace codeview {
+
+class SymbolVisitorCallbackPipeline : public SymbolVisitorCallbacks {
+public:
+  SymbolVisitorCallbackPipeline() = default;
+
+  Error visitUnknownSymbol(CVSymbol &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitUnknownSymbol(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitSymbolBegin(CVSymbol &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitSymbolBegin(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitSymbolEnd(CVSymbol &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitSymbolEnd(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  void addCallbackToPipeline(SymbolVisitorCallbacks &Callbacks) {
+    Pipeline.push_back(&Callbacks);
+  }
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownRecord(CVSymbol &CVR, Name &Record) override {               \
+    for (auto Visitor : Pipeline) {                                            \
+      if (auto EC = Visitor->visitKnownRecord(CVR, Record))                    \
+        return EC;                                                             \
+    }                                                                          \
+    return Error::success();                                                   \
+  }
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def"
+
+private:
+  std::vector<SymbolVisitorCallbacks *> Pipeline;
+};
+
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORCALLBACKPIPELINE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
new file mode 100644
index 000000000000..aaa9d2e85e13
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h
@@ -0,0 +1,48 @@
+//===- SymbolVisitorCallbacks.h ---------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORCALLBACKS_H
+#define LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORCALLBACKS_H
+
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace codeview {
+
+class SymbolVisitorCallbacks {
+  friend class CVSymbolVisitor;
+
+public:
+  virtual ~SymbolVisitorCallbacks() = default;
+
+  /// Action to take on unknown symbols. By default, they are ignored.
+  virtual Error visitUnknownSymbol(CVSymbol &Record) {
+    return Error::success();
+  }
+
+  /// Paired begin/end actions for all symbols. Receives all record data,
+  /// including the fixed-length record prefix.  visitSymbolBegin() should
+  /// return
+  /// the type of the Symbol, or an error if it cannot be determined.
+  virtual Error visitSymbolBegin(CVSymbol &Record) { return Error::success(); }
+  virtual Error visitSymbolEnd(CVSymbol &Record) { return Error::success(); }
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  virtual Error visitKnownRecord(CVSymbol &CVR, Name &Record) {                \
+    return Error::success();                                                   \
+  }
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "CVSymbolTypes.def"
+};
+
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORCALLBACKS_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
index a4965168c3db..2b468a289fd8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/SymbolVisitorDelegate.h
@@ -10,24 +10,28 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORDELEGATE_H
 #define LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORDELEGATE_H
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-
-#include <stdint.h>
+#include <cstdint>
 
 namespace llvm {
 
+namespace msf {
+class StreamReader;
+} // end namespace msf
+
 namespace codeview {
 
 class SymbolVisitorDelegate {
 public:
-  virtual ~SymbolVisitorDelegate() {}
+  virtual ~SymbolVisitorDelegate() = default;
 
-  virtual uint32_t getRecordOffset(ArrayRef<uint8_t> Record) = 0;
+  virtual uint32_t getRecordOffset(msf::StreamReader Reader) = 0;
   virtual StringRef getFileNameForFileOffset(uint32_t FileOffset) = 0;
   virtual StringRef getStringTable() = 0;
 };
+
 } // end namespace codeview
+
 } // end namespace llvm
 
 #endif // LLVM_DEBUGINFO_CODEVIEW_SYMBOLVISITORDELEGATE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
new file mode 100644
index 000000000000..dc5eaf82845b
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDeserializer.h
@@ -0,0 +1,136 @@
+//===- TypeDeserializer.h ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEDESERIALIZER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPEDESERIALIZER_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+
+namespace llvm {
+namespace codeview {
+
+class TypeDeserializer : public TypeVisitorCallbacks {
+  struct MappingInfo {
+    explicit MappingInfo(ArrayRef<uint8_t> RecordData)
+        : Stream(RecordData), Reader(Stream), Mapping(Reader) {}
+
+    msf::ByteStream Stream;
+    msf::StreamReader Reader;
+    TypeRecordMapping Mapping;
+  };
+
+public:
+  TypeDeserializer() = default;
+
+  Error visitTypeBegin(CVType &Record) override {
+    assert(!Mapping && "Already in a type mapping!");
+    Mapping = llvm::make_unique<MappingInfo>(Record.content());
+    return Mapping->Mapping.visitTypeBegin(Record);
+  }
+
+  Error visitTypeEnd(CVType &Record) override {
+    assert(Mapping && "Not in a type mapping!");
+    auto EC = Mapping->Mapping.visitTypeEnd(Record);
+    Mapping.reset();
+    return EC;
+  }
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  Error visitKnownRecord(CVType &CVR, Name##Record &Record) override {         \
+    return visitKnownRecordImpl<Name##Record>(CVR, Record);                    \
+  }
+#define MEMBER_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "TypeRecords.def"
+
+private:
+  template <typename RecordType>
+  Error visitKnownRecordImpl(CVType &CVR, RecordType &Record) {
+    return Mapping->Mapping.visitKnownRecord(CVR, Record);
+  }
+
+  std::unique_ptr<MappingInfo> Mapping;
+};
+
+class FieldListDeserializer : public TypeVisitorCallbacks {
+  struct MappingInfo {
+    explicit MappingInfo(msf::StreamReader &R)
+        : Reader(R), Mapping(Reader), StartOffset(0) {}
+
+    msf::StreamReader &Reader;
+    TypeRecordMapping Mapping;
+    uint32_t StartOffset;
+  };
+
+public:
+  explicit FieldListDeserializer(msf::StreamReader &Reader) : Mapping(Reader) {
+    CVType FieldList;
+    FieldList.Type = TypeLeafKind::LF_FIELDLIST;
+    consumeError(Mapping.Mapping.visitTypeBegin(FieldList));
+  }
+
+  ~FieldListDeserializer() override {
+    CVType FieldList;
+    FieldList.Type = TypeLeafKind::LF_FIELDLIST;
+    consumeError(Mapping.Mapping.visitTypeEnd(FieldList));
+  }
+
+  Error visitMemberBegin(CVMemberRecord &Record) override {
+    Mapping.StartOffset = Mapping.Reader.getOffset();
+    return Mapping.Mapping.visitMemberBegin(Record);
+  }
+
+  Error visitMemberEnd(CVMemberRecord &Record) override {
+    if (auto EC = Mapping.Mapping.visitMemberEnd(Record))
+      return EC;
+    return Error::success();
+  }
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override { \
+    return visitKnownMemberImpl<Name##Record>(CVR, Record);                    \
+  }
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "TypeRecords.def"
+
+private:
+  template <typename RecordType>
+  Error visitKnownMemberImpl(CVMemberRecord &CVR, RecordType &Record) {
+    if (auto EC = Mapping.Mapping.visitKnownMember(CVR, Record))
+      return EC;
+
+    uint32_t EndOffset = Mapping.Reader.getOffset();
+    uint32_t RecordLength = EndOffset - Mapping.StartOffset;
+    Mapping.Reader.setOffset(Mapping.StartOffset);
+    if (auto EC = Mapping.Reader.readBytes(CVR.Data, RecordLength))
+      return EC;
+    assert(Mapping.Reader.getOffset() == EndOffset);
+    return Error::success();
+  }
+  MappingInfo Mapping;
+};
+
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPEDESERIALIZER_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDumper.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDumper.h
index ca79ab076e5e..5a8b555cec02 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDumper.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeDumper.h
@@ -63,18 +63,20 @@ public:
   ScopedPrinter *getPrinter() { return W; }
 
   /// Action to take on unknown types. By default, they are ignored.
-  Error visitUnknownType(const CVRecord<TypeLeafKind> &Record) override;
-  Error visitUnknownMember(const CVRecord<TypeLeafKind> &Record) override;
+  Error visitUnknownType(CVType &Record) override;
+  Error visitUnknownMember(CVMemberRecord &Record) override;
 
   /// Paired begin/end actions for all types. Receives all record data,
   /// including the fixed-length record prefix.
-  Error visitTypeBegin(const CVRecord<TypeLeafKind> &Record) override;
-  Error visitTypeEnd(const CVRecord<TypeLeafKind> &Record) override;
+  Error visitTypeBegin(CVType &Record) override;
+  Error visitTypeEnd(CVType &Record) override;
+  Error visitMemberBegin(CVMemberRecord &Record) override;
+  Error visitMemberEnd(CVMemberRecord &Record) override;
 
 #define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
-  Error visit##Name(Name##Record &Record) override;
+  Error visitKnownRecord(CVType &CVR, Name##Record &Record) override;
 #define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
-  TYPE_RECORD(EnumName, EnumVal, Name)
+  Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override;
 #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "TypeRecords.def"
@@ -86,6 +88,7 @@ private:
 
   ScopedPrinter *W;
 
+  bool IsInFieldList = false;
   bool PrintRecordBytes = false;
 
   /// Name of the current type. Only valid before visitTypeEnd.
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
index c2ebf3848892..3c11d248fa72 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeIndex.h
@@ -93,7 +93,7 @@ public:
   static const uint32_t SimpleModeMask = 0x00000700;
 
 public:
-  TypeIndex() : Index(0) {}
+  TypeIndex() : Index(static_cast<uint32_t>(SimpleTypeKind::None)) {}
   explicit TypeIndex(uint32_t Index) : Index(Index) {}
   explicit TypeIndex(SimpleTypeKind Kind)
       : Index(static_cast<uint32_t>(Kind)) {}
@@ -101,6 +101,7 @@ public:
       : Index(static_cast<uint32_t>(Kind) | static_cast<uint32_t>(Mode)) {}
 
   uint32_t getIndex() const { return Index; }
+  void setIndex(uint32_t I) { Index = I; }
   bool isSimple() const { return Index < FirstNonSimpleIndex; }
 
   bool isNoneType() const { return *this == None(); }
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
index 42751fbd4af1..4f1c047815d2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecord.h
@@ -12,27 +12,54 @@
 
 #include "llvm/ADT/APSInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/Support/ErrorOr.h"
-#include <cinttypes>
-#include <utility>
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/Support/Endian.h"
+#include <algorithm>
+#include <cstdint>
+#include <vector>
 
 namespace llvm {
+
+namespace msf {
+class StreamReader;
+} // end namespace msf
+
 namespace codeview {
 
-using llvm::support::little32_t;
-using llvm::support::ulittle16_t;
-using llvm::support::ulittle32_t;
+using support::little32_t;
+using support::ulittle16_t;
+using support::ulittle32_t;
+
+typedef CVRecord<TypeLeafKind> CVType;
+
+struct CVMemberRecord {
+  TypeLeafKind Kind;
+  ArrayRef<uint8_t> Data;
+};
+typedef msf::VarStreamArray<CVType> CVTypeArray;
 
 /// Equvalent to CV_fldattr_t in cvinfo.h.
 struct MemberAttributes {
-  ulittle16_t Attrs;
+  uint16_t Attrs = 0;
   enum {
     MethodKindShift = 2,
   };
+  MemberAttributes() = default;
+
+  explicit MemberAttributes(MemberAccess Access)
+      : Attrs(static_cast<uint16_t>(Access)) {}
+
+  MemberAttributes(MemberAccess Access, MethodKind Kind, MethodOptions Flags) {
+    Attrs = static_cast<uint16_t>(Access);
+    Attrs |= (static_cast<uint16_t>(Kind) << MethodKindShift);
+    Attrs |= static_cast<uint16_t>(Flags);
+  }
 
   /// Get the access specifier. Valid for any kind of member.
   MemberAccess getAccess() const {
@@ -73,7 +100,7 @@ struct MemberAttributes {
 // if it represents a member pointer.
 class MemberPointerInfo {
 public:
-  MemberPointerInfo() {}
+  MemberPointerInfo() = default;
 
   MemberPointerInfo(TypeIndex ContainingType,
                     PointerToMemberRepresentation Representation)
@@ -83,25 +110,18 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<MemberPointerInfo> deserialize(ArrayRef<uint8_t> &Data);
-
   TypeIndex getContainingType() const { return ContainingType; }
   PointerToMemberRepresentation getRepresentation() const {
     return Representation;
   }
 
-private:
-  struct Layout {
-    TypeIndex ClassType;
-    ulittle16_t Representation; // PointerToMemberRepresentation
-  };
-
   TypeIndex ContainingType;
   PointerToMemberRepresentation Representation;
 };
 
 class TypeRecord {
 protected:
+  TypeRecord() = default;
   explicit TypeRecord(TypeRecordKind Kind) : Kind(Kind) {}
 
 public:
@@ -114,6 +134,7 @@ private:
 // LF_MODIFIER
 class ModifierRecord : public TypeRecord {
 public:
+  explicit ModifierRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   ModifierRecord(TypeIndex ModifiedType, ModifierOptions Modifiers)
       : TypeRecord(TypeRecordKind::Modifier), ModifiedType(ModifiedType),
         Modifiers(Modifiers) {}
@@ -122,18 +143,9 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<ModifierRecord> deserialize(TypeRecordKind Kind,
-                                             ArrayRef<uint8_t> &Data);
-
   TypeIndex getModifiedType() const { return ModifiedType; }
   ModifierOptions getModifiers() const { return Modifiers; }
 
-private:
-  struct Layout {
-    TypeIndex ModifiedType;
-    ulittle16_t Modifiers; // ModifierOptions
-  };
-
   TypeIndex ModifiedType;
   ModifierOptions Modifiers;
 };
@@ -141,6 +153,7 @@ private:
 // LF_PROCEDURE
 class ProcedureRecord : public TypeRecord {
 public:
+  explicit ProcedureRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   ProcedureRecord(TypeIndex ReturnType, CallingConvention CallConv,
                   FunctionOptions Options, uint16_t ParameterCount,
                   TypeIndex ArgumentList)
@@ -152,26 +165,12 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<ProcedureRecord> deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data);
-
-  static uint32_t getLayoutSize() { return 2 + sizeof(Layout); }
-
   TypeIndex getReturnType() const { return ReturnType; }
   CallingConvention getCallConv() const { return CallConv; }
   FunctionOptions getOptions() const { return Options; }
   uint16_t getParameterCount() const { return ParameterCount; }
   TypeIndex getArgumentList() const { return ArgumentList; }
 
-private:
-  struct Layout {
-    TypeIndex ReturnType;
-    CallingConvention CallConv;
-    FunctionOptions Options;
-    ulittle16_t NumParameters;
-    TypeIndex ArgListType;
-  };
-
   TypeIndex ReturnType;
   CallingConvention CallConv;
   FunctionOptions Options;
@@ -182,6 +181,8 @@ private:
 // LF_MFUNCTION
 class MemberFunctionRecord : public TypeRecord {
 public:
+  explicit MemberFunctionRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+
   MemberFunctionRecord(TypeIndex ReturnType, TypeIndex ClassType,
                        TypeIndex ThisType, CallingConvention CallConv,
                        FunctionOptions Options, uint16_t ParameterCount,
@@ -196,9 +197,6 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<MemberFunctionRecord> deserialize(TypeRecordKind Kind,
-                                                   ArrayRef<uint8_t> &Data);
-
   TypeIndex getReturnType() const { return ReturnType; }
   TypeIndex getClassType() const { return ClassType; }
   TypeIndex getThisType() const { return ThisType; }
@@ -208,18 +206,6 @@ public:
   TypeIndex getArgumentList() const { return ArgumentList; }
   int32_t getThisPointerAdjustment() const { return ThisPointerAdjustment; }
 
-private:
-  struct Layout {
-    TypeIndex ReturnType;
-    TypeIndex ClassType;
-    TypeIndex ThisType;
-    CallingConvention CallConv;
-    FunctionOptions Options;
-    ulittle16_t NumParameters;
-    TypeIndex ArgListType;
-    little32_t ThisAdjustment;
-  };
-
   TypeIndex ReturnType;
   TypeIndex ClassType;
   TypeIndex ThisType;
@@ -233,6 +219,7 @@ private:
 // LF_MFUNC_ID
 class MemberFuncIdRecord : public TypeRecord {
 public:
+  explicit MemberFuncIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   MemberFuncIdRecord(TypeIndex ClassType, TypeIndex FunctionType,
                          StringRef Name)
       : TypeRecord(TypeRecordKind::MemberFuncId), ClassType(ClassType),
@@ -242,18 +229,9 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<MemberFuncIdRecord> deserialize(TypeRecordKind Kind,
-                                                 ArrayRef<uint8_t> &Data);
   TypeIndex getClassType() const { return ClassType; }
   TypeIndex getFunctionType() const { return FunctionType; }
   StringRef getName() const { return Name; }
-
-private:
-  struct Layout {
-    TypeIndex ClassType;
-    TypeIndex FunctionType;
-    // Name: The null-terminated name follows.
-  };
   TypeIndex ClassType;
   TypeIndex FunctionType;
   StringRef Name;
@@ -262,6 +240,8 @@ private:
 // LF_ARGLIST, LF_SUBSTR_LIST
 class ArgListRecord : public TypeRecord {
 public:
+  explicit ArgListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+
   ArgListRecord(TypeRecordKind Kind, ArrayRef<TypeIndex> Indices)
       : TypeRecord(Kind), StringIndices(Indices) {}
 
@@ -269,19 +249,8 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<ArgListRecord> deserialize(TypeRecordKind Kind,
-                                            ArrayRef<uint8_t> &Data);
-
   ArrayRef<TypeIndex> getIndices() const { return StringIndices; }
 
-  static uint32_t getLayoutSize() { return 2 + sizeof(Layout); }
-
-private:
-  struct Layout {
-    ulittle32_t NumArgs; // Number of arguments
-                         // ArgTypes[]: Type indicies of arguments
-  };
-
   std::vector<TypeIndex> StringIndices;
 };
 
@@ -294,94 +263,96 @@ public:
   static const uint32_t PointerModeShift = 5;
   static const uint32_t PointerModeMask = 0x07;
 
+  static const uint32_t PointerOptionMask = 0xFF;
+
   static const uint32_t PointerSizeShift = 13;
   static const uint32_t PointerSizeMask = 0xFF;
 
-  PointerRecord(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode,
-                PointerOptions Options, uint8_t Size)
-      : PointerRecord(ReferentType, Kind, Mode, Options, Size,
-                      MemberPointerInfo()) {}
+  explicit PointerRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
 
-  PointerRecord(TypeIndex ReferentType, PointerKind Kind, PointerMode Mode,
-                PointerOptions Options, uint8_t Size,
+  PointerRecord(TypeIndex ReferentType, uint32_t Attrs)
+      : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType),
+        Attrs(Attrs) {}
+
+  PointerRecord(TypeIndex ReferentType, PointerKind PK, PointerMode PM,
+                PointerOptions PO, uint8_t Size)
+      : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType),
+        Attrs(calcAttrs(PK, PM, PO, Size)) {}
+
+  PointerRecord(TypeIndex ReferentType, PointerKind PK, PointerMode PM,
+                PointerOptions PO, uint8_t Size,
+                const MemberPointerInfo &Member)
+      : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType),
+        Attrs(calcAttrs(PK, PM, PO, Size)), MemberInfo(Member) {}
+
+  PointerRecord(TypeIndex ReferentType, uint32_t Attrs,
                 const MemberPointerInfo &Member)
       : TypeRecord(TypeRecordKind::Pointer), ReferentType(ReferentType),
-        PtrKind(Kind), Mode(Mode), Options(Options), Size(Size),
-        MemberInfo(Member) {}
+        Attrs(Attrs), MemberInfo(Member) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<PointerRecord> deserialize(TypeRecordKind Kind,
-                                            ArrayRef<uint8_t> &Data);
-
   TypeIndex getReferentType() const { return ReferentType; }
-  PointerKind getPointerKind() const { return PtrKind; }
-  PointerMode getMode() const { return Mode; }
-  PointerOptions getOptions() const { return Options; }
-  uint8_t getSize() const { return Size; }
-  MemberPointerInfo getMemberInfo() const { return MemberInfo; }
 
-  bool isPointerToMember() const {
-    return Mode == PointerMode::PointerToDataMember ||
-           Mode == PointerMode::PointerToMemberFunction;
+  PointerKind getPointerKind() const {
+    return static_cast<PointerKind>((Attrs >> PointerKindShift) &
+                                    PointerKindMask);
+  }
+
+  PointerMode getMode() const {
+    return static_cast<PointerMode>((Attrs >> PointerModeShift) &
+                                    PointerModeMask);
   }
-  bool isFlat() const {
-    return !!(uint32_t(Options) & uint32_t(PointerOptions::Flat32));
+
+  PointerOptions getOptions() const {
+    return static_cast<PointerOptions>(Attrs);
   }
-  bool isConst() const {
-    return !!(uint32_t(Options) & uint32_t(PointerOptions::Const));
+
+  uint8_t getSize() const {
+    return (Attrs >> PointerSizeShift) & PointerSizeMask;
   }
+
+  MemberPointerInfo getMemberInfo() const { return *MemberInfo; }
+
+  bool isPointerToMember() const {
+    return getMode() == PointerMode::PointerToDataMember ||
+           getMode() == PointerMode::PointerToMemberFunction;
+  }
+
+  bool isFlat() const { return !!(Attrs & uint32_t(PointerOptions::Flat32)); }
+  bool isConst() const { return !!(Attrs & uint32_t(PointerOptions::Const)); }
+
   bool isVolatile() const {
-    return !!(uint32_t(Options) & uint32_t(PointerOptions::Volatile));
+    return !!(Attrs & uint32_t(PointerOptions::Volatile));
   }
+
   bool isUnaligned() const {
-    return !!(uint32_t(Options) & uint32_t(PointerOptions::Unaligned));
+    return !!(Attrs & uint32_t(PointerOptions::Unaligned));
   }
 
-private:
-  struct Layout {
-    TypeIndex PointeeType;
-    ulittle32_t Attrs; // pointer attributes
-                       // if pointer to member:
-                       //   PointerToMemberTail
-    PointerKind getPtrKind() const {
-      return PointerKind(Attrs & PointerKindMask);
-    }
-    PointerMode getPtrMode() const {
-      return PointerMode((Attrs >> PointerModeShift) & PointerModeMask);
-    }
-    uint8_t getPtrSize() const {
-      return (Attrs >> PointerSizeShift) & PointerSizeMask;
-    }
-    bool isFlat() const { return Attrs & (1 << 8); }
-    bool isVolatile() const { return Attrs & (1 << 9); }
-    bool isConst() const { return Attrs & (1 << 10); }
-    bool isUnaligned() const { return Attrs & (1 << 11); }
-
-    bool isPointerToDataMember() const {
-      return getPtrMode() == PointerMode::PointerToDataMember;
-    }
-    bool isPointerToMemberFunction() const {
-      return getPtrMode() == PointerMode::PointerToMemberFunction;
-    }
-    bool isPointerToMember() const {
-      return isPointerToMemberFunction() || isPointerToDataMember();
-    }
-  };
-
   TypeIndex ReferentType;
-  PointerKind PtrKind;
-  PointerMode Mode;
-  PointerOptions Options;
-  uint8_t Size;
-  MemberPointerInfo MemberInfo;
+  uint32_t Attrs;
+
+  Optional<MemberPointerInfo> MemberInfo;
+
+private:
+  static uint32_t calcAttrs(PointerKind PK, PointerMode PM, PointerOptions PO,
+                            uint8_t Size) {
+    uint32_t A = 0;
+    A |= static_cast<uint32_t>(PK);
+    A |= static_cast<uint32_t>(PO);
+    A |= (static_cast<uint32_t>(PM) << PointerModeShift);
+    A |= (static_cast<uint32_t>(Size) << PointerSizeShift);
+    return A;
+  }
 };
 
 // LF_NESTTYPE
 class NestedTypeRecord : public TypeRecord {
 public:
+  explicit NestedTypeRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   NestedTypeRecord(TypeIndex Type, StringRef Name)
       : TypeRecord(TypeRecordKind::NestedType), Type(Type), Name(Name) {}
 
@@ -389,26 +360,31 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<NestedTypeRecord> deserialize(TypeRecordKind Kind,
-                                               ArrayRef<uint8_t> &Data);
-
   TypeIndex getNestedType() const { return Type; }
   StringRef getName() const { return Name; }
 
-private:
-  struct Layout {
-    ulittle16_t Pad0; // Should be zero
-    TypeIndex Type;   // Type index of nested type
-                      // Name: Null-terminated string
-  };
-
   TypeIndex Type;
   StringRef Name;
 };
 
+// LF_FIELDLIST
+class FieldListRecord : public TypeRecord {
+public:
+  explicit FieldListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  explicit FieldListRecord(ArrayRef<uint8_t> Data)
+      : TypeRecord(TypeRecordKind::FieldList), Data(Data) {}
+
+  /// Rewrite member type indices with IndexMap. Returns false if a type index
+  /// is not in the map.
+  bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap) { return false; }
+
+  ArrayRef<uint8_t> Data;
+};
+
 // LF_ARRAY
 class ArrayRecord : public TypeRecord {
 public:
+  explicit ArrayRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   ArrayRecord(TypeIndex ElementType, TypeIndex IndexType, uint64_t Size,
               StringRef Name)
       : TypeRecord(TypeRecordKind::Array), ElementType(ElementType),
@@ -418,30 +394,20 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<ArrayRecord> deserialize(TypeRecordKind Kind,
-                                          ArrayRef<uint8_t> &Data);
-
   TypeIndex getElementType() const { return ElementType; }
   TypeIndex getIndexType() const { return IndexType; }
   uint64_t getSize() const { return Size; }
-  llvm::StringRef getName() const { return Name; }
-
-private:
-  struct Layout {
-    TypeIndex ElementType;
-    TypeIndex IndexType;
-    // SizeOf: LF_NUMERIC encoded size in bytes. Not element count!
-    // Name: The null-terminated name follows.
-  };
+  StringRef getName() const { return Name; }
 
   TypeIndex ElementType;
   TypeIndex IndexType;
   uint64_t Size;
-  llvm::StringRef Name;
+  StringRef Name;
 };
 
 class TagRecord : public TypeRecord {
 protected:
+  explicit TagRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   TagRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options,
             TypeIndex FieldList, StringRef Name, StringRef UniqueName)
       : TypeRecord(Kind), MemberCount(MemberCount), Options(Options),
@@ -457,13 +423,16 @@ public:
   static const int WinRTKindShift = 14;
   static const int WinRTKindMask = 0xC000;
 
+  bool hasUniqueName() const {
+    return (Options & ClassOptions::HasUniqueName) != ClassOptions::None;
+  }
+
   uint16_t getMemberCount() const { return MemberCount; }
   ClassOptions getOptions() const { return Options; }
   TypeIndex getFieldList() const { return FieldList; }
   StringRef getName() const { return Name; }
   StringRef getUniqueName() const { return UniqueName; }
 
-private:
   uint16_t MemberCount;
   ClassOptions Options;
   TypeIndex FieldList;
@@ -474,45 +443,34 @@ private:
 // LF_CLASS, LF_STRUCTURE, LF_INTERFACE
 class ClassRecord : public TagRecord {
 public:
+  explicit ClassRecord(TypeRecordKind Kind) : TagRecord(Kind) {}
   ClassRecord(TypeRecordKind Kind, uint16_t MemberCount, ClassOptions Options,
-              HfaKind Hfa, WindowsRTClassKind WinRTKind, TypeIndex FieldList,
-              TypeIndex DerivationList, TypeIndex VTableShape, uint64_t Size,
-              StringRef Name, StringRef UniqueName)
+              TypeIndex FieldList, TypeIndex DerivationList,
+              TypeIndex VTableShape, uint64_t Size, StringRef Name,
+              StringRef UniqueName)
       : TagRecord(Kind, MemberCount, Options, FieldList, Name, UniqueName),
-        Hfa(Hfa), WinRTKind(WinRTKind), DerivationList(DerivationList),
-        VTableShape(VTableShape), Size(Size) {}
+        DerivationList(DerivationList), VTableShape(VTableShape), Size(Size) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<ClassRecord> deserialize(TypeRecordKind Kind,
-                                          ArrayRef<uint8_t> &Data);
+  HfaKind getHfa() const {
+    uint16_t Value = static_cast<uint16_t>(Options);
+    Value = (Value & HfaKindMask) >> HfaKindShift;
+    return static_cast<HfaKind>(Value);
+  }
+
+  WindowsRTClassKind getWinRTKind() const {
+    uint16_t Value = static_cast<uint16_t>(Options);
+    Value = (Value & WinRTKindMask) >> WinRTKindShift;
+    return static_cast<WindowsRTClassKind>(Value);
+  }
 
-  HfaKind getHfa() const { return Hfa; }
-  WindowsRTClassKind getWinRTKind() const { return WinRTKind; }
   TypeIndex getDerivationList() const { return DerivationList; }
   TypeIndex getVTableShape() const { return VTableShape; }
   uint64_t getSize() const { return Size; }
 
-private:
-  struct Layout {
-    ulittle16_t MemberCount; // Number of members in FieldList.
-    ulittle16_t Properties;  // ClassOptions bitset
-    TypeIndex FieldList;     // LF_FIELDLIST: List of all kinds of members
-    TypeIndex DerivedFrom;   // LF_DERIVED: List of known derived classes
-    TypeIndex VShape;        // LF_VTSHAPE: Shape of the vftable
-    // SizeOf: The 'sizeof' the UDT in bytes is encoded as an LF_NUMERIC
-    // integer.
-    // Name: The null-terminated name follows.
-
-    bool hasUniqueName() const {
-      return Properties & uint16_t(ClassOptions::HasUniqueName);
-    }
-  };
-
-  HfaKind Hfa;
-  WindowsRTClassKind WinRTKind;
   TypeIndex DerivationList;
   TypeIndex VTableShape;
   uint64_t Size;
@@ -520,40 +478,28 @@ private:
 
 // LF_UNION
 struct UnionRecord : public TagRecord {
-  UnionRecord(uint16_t MemberCount, ClassOptions Options, HfaKind Hfa,
-              TypeIndex FieldList, uint64_t Size, StringRef Name,
-              StringRef UniqueName)
+  explicit UnionRecord(TypeRecordKind Kind) : TagRecord(Kind) {}
+  UnionRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList,
+              uint64_t Size, StringRef Name, StringRef UniqueName)
       : TagRecord(TypeRecordKind::Union, MemberCount, Options, FieldList, Name,
                   UniqueName),
-        Hfa(Hfa), Size(Size) {}
+        Size(Size) {}
 
-  static ErrorOr<UnionRecord> deserialize(TypeRecordKind Kind,
-                                          ArrayRef<uint8_t> &Data);
+  HfaKind getHfa() const {
+    uint16_t Value = static_cast<uint16_t>(Options);
+    Value = (Value & HfaKindMask) >> HfaKindShift;
+    return static_cast<HfaKind>(Value);
+  }
 
-  HfaKind getHfa() const { return Hfa; }
   uint64_t getSize() const { return Size; }
 
-private:
-  struct Layout {
-    ulittle16_t MemberCount; // Number of members in FieldList.
-    ulittle16_t Properties;  // ClassOptions bitset
-    TypeIndex FieldList;     // LF_FIELDLIST: List of all kinds of members
-    // SizeOf: The 'sizeof' the UDT in bytes is encoded as an LF_NUMERIC
-    // integer.
-    // Name: The null-terminated name follows.
-
-    bool hasUniqueName() const {
-      return Properties & uint16_t(ClassOptions::HasUniqueName);
-    }
-  };
-
-  HfaKind Hfa;
   uint64_t Size;
 };
 
 // LF_ENUM
 class EnumRecord : public TagRecord {
 public:
+  explicit EnumRecord(TypeRecordKind Kind) : TagRecord(Kind) {}
   EnumRecord(uint16_t MemberCount, ClassOptions Options, TypeIndex FieldList,
              StringRef Name, StringRef UniqueName, TypeIndex UnderlyingType)
       : TagRecord(TypeRecordKind::Enum, MemberCount, Options, FieldList, Name,
@@ -563,30 +509,14 @@ public:
   /// Rewrite member type indices with IndexMap. Returns false if a type index is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<EnumRecord> deserialize(TypeRecordKind Kind,
-                                         ArrayRef<uint8_t> &Data);
-
   TypeIndex getUnderlyingType() const { return UnderlyingType; }
-
-private:
-  struct Layout {
-    ulittle16_t NumEnumerators; // Number of enumerators
-    ulittle16_t Properties;
-    TypeIndex UnderlyingType;
-    TypeIndex FieldListType;
-    // Name: The null-terminated name follows.
-
-    bool hasUniqueName() const {
-      return Properties & uint16_t(ClassOptions::HasUniqueName);
-    }
-  };
-
   TypeIndex UnderlyingType;
 };
 
 // LF_BITFIELD
 class BitFieldRecord : public TypeRecord {
 public:
+  explicit BitFieldRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   BitFieldRecord(TypeIndex Type, uint8_t BitSize, uint8_t BitOffset)
       : TypeRecord(TypeRecordKind::BitField), Type(Type), BitSize(BitSize),
         BitOffset(BitOffset) {}
@@ -595,20 +525,9 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<BitFieldRecord> deserialize(TypeRecordKind Kind,
-                                             ArrayRef<uint8_t> &Data);
-
   TypeIndex getType() const { return Type; }
   uint8_t getBitOffset() const { return BitOffset; }
   uint8_t getBitSize() const { return BitSize; }
-
-private:
-  struct Layout {
-    TypeIndex Type;
-    uint8_t BitSize;
-    uint8_t BitOffset;
-  };
-
   TypeIndex Type;
   uint8_t BitSize;
   uint8_t BitOffset;
@@ -617,6 +536,7 @@ private:
 // LF_VTSHAPE
 class VFTableShapeRecord : public TypeRecord {
 public:
+  explicit VFTableShapeRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   explicit VFTableShapeRecord(ArrayRef<VFTableSlotKind> Slots)
       : TypeRecord(TypeRecordKind::VFTableShape), SlotsRef(Slots) {}
   explicit VFTableShapeRecord(std::vector<VFTableSlotKind> Slots)
@@ -626,26 +546,13 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<VFTableShapeRecord> deserialize(TypeRecordKind Kind,
-                                                 ArrayRef<uint8_t> &Data);
-
   ArrayRef<VFTableSlotKind> getSlots() const {
     if (!SlotsRef.empty())
       return SlotsRef;
     return Slots;
   }
-  uint32_t getEntryCount() const { return getSlots().size(); }
-
-private:
-  struct Layout {
-    // Number of vftable entries. Each method may have more than one entry due
-    // to
-    // things like covariant return types.
-    ulittle16_t VFEntryCount;
-    // Descriptors[]: 4-bit virtual method descriptors of type CV_VTS_desc_e.
-  };
 
-private:
+  uint32_t getEntryCount() const { return getSlots().size(); }
   ArrayRef<VFTableSlotKind> SlotsRef;
   std::vector<VFTableSlotKind> Slots;
 };
@@ -653,6 +560,7 @@ private:
 // LF_TYPESERVER2
 class TypeServer2Record : public TypeRecord {
 public:
+  explicit TypeServer2Record(TypeRecordKind Kind) : TypeRecord(Kind) {}
   TypeServer2Record(StringRef Guid, uint32_t Age, StringRef Name)
       : TypeRecord(TypeRecordKind::TypeServer2), Guid(Guid), Age(Age),
         Name(Name) {}
@@ -661,22 +569,12 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<TypeServer2Record> deserialize(TypeRecordKind Kind,
-                                                ArrayRef<uint8_t> &Data);
-
   StringRef getGuid() const { return Guid; }
 
   uint32_t getAge() const { return Age; }
 
   StringRef getName() const { return Name; }
 
-private:
-  struct Layout {
-    char Guid[16]; // GUID
-    ulittle32_t Age;
-    // Name: Name of the PDB as a null-terminated string
-  };
-
   StringRef Guid;
   uint32_t Age;
   StringRef Name;
@@ -685,6 +583,7 @@ private:
 // LF_STRING_ID
 class StringIdRecord : public TypeRecord {
 public:
+  explicit StringIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   StringIdRecord(TypeIndex Id, StringRef String)
       : TypeRecord(TypeRecordKind::StringId), Id(Id), String(String) {}
 
@@ -692,19 +591,9 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<StringIdRecord> deserialize(TypeRecordKind Kind,
-                                             ArrayRef<uint8_t> &Data);
-
   TypeIndex getId() const { return Id; }
 
   StringRef getString() const { return String; }
-
-private:
-  struct Layout {
-    TypeIndex id;
-    // Name: Name of the PDB as a null-terminated string
-  };
-
   TypeIndex Id;
   StringRef String;
 };
@@ -712,6 +601,7 @@ private:
 // LF_FUNC_ID
 class FuncIdRecord : public TypeRecord {
 public:
+  explicit FuncIdRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   FuncIdRecord(TypeIndex ParentScope, TypeIndex FunctionType, StringRef Name)
       : TypeRecord(TypeRecordKind::FuncId), ParentScope(ParentScope),
         FunctionType(FunctionType), Name(Name) {}
@@ -720,22 +610,12 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<FuncIdRecord> deserialize(TypeRecordKind Kind,
-                                           ArrayRef<uint8_t> &Data);
-
   TypeIndex getParentScope() const { return ParentScope; }
 
   TypeIndex getFunctionType() const { return FunctionType; }
 
   StringRef getName() const { return Name; }
 
-private:
-  struct Layout {
-    TypeIndex ParentScope;
-    TypeIndex FunctionType;
-    // Name: The null-terminated name follows.
-  };
-
   TypeIndex ParentScope;
   TypeIndex FunctionType;
   StringRef Name;
@@ -744,6 +624,7 @@ private:
 // LF_UDT_SRC_LINE
 class UdtSourceLineRecord : public TypeRecord {
 public:
+  explicit UdtSourceLineRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   UdtSourceLineRecord(TypeIndex UDT, TypeIndex SourceFile, uint32_t LineNumber)
       : TypeRecord(TypeRecordKind::UdtSourceLine), UDT(UDT),
         SourceFile(SourceFile), LineNumber(LineNumber) {}
@@ -752,20 +633,10 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<UdtSourceLineRecord> deserialize(TypeRecordKind Kind,
-                                                  ArrayRef<uint8_t> &Data);
-
   TypeIndex getUDT() const { return UDT; }
   TypeIndex getSourceFile() const { return SourceFile; }
   uint32_t getLineNumber() const { return LineNumber; }
 
-private:
-  struct Layout {
-    TypeIndex UDT;        // The user-defined type
-    TypeIndex SourceFile; // StringID containing the source filename
-    ulittle32_t LineNumber;
-  };
-
   TypeIndex UDT;
   TypeIndex SourceFile;
   uint32_t LineNumber;
@@ -774,6 +645,7 @@ private:
 // LF_UDT_MOD_SRC_LINE
 class UdtModSourceLineRecord : public TypeRecord {
 public:
+  explicit UdtModSourceLineRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   UdtModSourceLineRecord(TypeIndex UDT, TypeIndex SourceFile,
                          uint32_t LineNumber, uint16_t Module)
       : TypeRecord(TypeRecordKind::UdtSourceLine), UDT(UDT),
@@ -781,28 +653,11 @@ public:
 
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<UdtModSourceLineRecord> deserialize(TypeRecordKind Kind,
-                                                     ArrayRef<uint8_t> &Data) {
-    const Layout *L = nullptr;
-    CV_DESERIALIZE(Data, L);
-
-    return UdtModSourceLineRecord(L->UDT, L->SourceFile, L->LineNumber,
-                                  L->Module);
-  }
-
   TypeIndex getUDT() const { return UDT; }
   TypeIndex getSourceFile() const { return SourceFile; }
   uint32_t getLineNumber() const { return LineNumber; }
   uint16_t getModule() const { return Module; }
 
-private:
-  struct Layout {
-    TypeIndex UDT;        // The user-defined type
-    TypeIndex SourceFile; // StringID containing the source filename
-    ulittle32_t LineNumber;
-    ulittle16_t Module; // Module that contributes this UDT definition
-  };
-
   TypeIndex UDT;
   TypeIndex SourceFile;
   uint32_t LineNumber;
@@ -812,6 +667,7 @@ private:
 // LF_BUILDINFO
 class BuildInfoRecord : public TypeRecord {
 public:
+  explicit BuildInfoRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   BuildInfoRecord(ArrayRef<TypeIndex> ArgIndices)
       : TypeRecord(TypeRecordKind::BuildInfo),
         ArgIndices(ArgIndices.begin(), ArgIndices.end()) {}
@@ -820,111 +676,73 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<BuildInfoRecord> deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data);
-
   ArrayRef<TypeIndex> getArgs() const { return ArgIndices; }
-
-private:
-  struct Layout {
-    ulittle16_t NumArgs; // Number of arguments
-                         // ArgTypes[]: Type indicies of arguments
-  };
   SmallVector<TypeIndex, 4> ArgIndices;
 };
 
 // LF_VFTABLE
 class VFTableRecord : public TypeRecord {
 public:
+  explicit VFTableRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   VFTableRecord(TypeIndex CompleteClass, TypeIndex OverriddenVFTable,
                 uint32_t VFPtrOffset, StringRef Name,
                 ArrayRef<StringRef> Methods)
-      : TypeRecord(TypeRecordKind::VFTable),
-        CompleteClass(CompleteClass), OverriddenVFTable(OverriddenVFTable),
-        VFPtrOffset(VFPtrOffset), Name(Name), MethodNamesRef(Methods) {}
-  VFTableRecord(TypeIndex CompleteClass, TypeIndex OverriddenVFTable,
-                uint32_t VFPtrOffset, StringRef Name,
-                const std::vector<StringRef> &Methods)
-      : TypeRecord(TypeRecordKind::VFTable),
-        CompleteClass(CompleteClass), OverriddenVFTable(OverriddenVFTable),
-        VFPtrOffset(VFPtrOffset), Name(Name), MethodNames(Methods) {}
+      : TypeRecord(TypeRecordKind::VFTable), CompleteClass(CompleteClass),
+        OverriddenVFTable(OverriddenVFTable), VFPtrOffset(VFPtrOffset) {
+    MethodNames.push_back(Name);
+    MethodNames.insert(MethodNames.end(), Methods.begin(), Methods.end());
+  }
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<VFTableRecord> deserialize(TypeRecordKind Kind,
-                                            ArrayRef<uint8_t> &Data);
-
   TypeIndex getCompleteClass() const { return CompleteClass; }
   TypeIndex getOverriddenVTable() const { return OverriddenVFTable; }
   uint32_t getVFPtrOffset() const { return VFPtrOffset; }
-  StringRef getName() const { return Name; }
+  StringRef getName() const { return makeArrayRef(MethodNames).front(); }
   ArrayRef<StringRef> getMethodNames() const {
-    if (!MethodNamesRef.empty())
-      return MethodNamesRef;
-    return MethodNames;
+    return makeArrayRef(MethodNames).drop_front();
   }
 
-private:
-  struct Layout {
-    TypeIndex CompleteClass;     // Class that owns this vftable.
-    TypeIndex OverriddenVFTable; // VFTable that this overrides.
-    ulittle32_t VFPtrOffset;     // VFPtr offset in CompleteClass
-    ulittle32_t NamesLen;        // Length of subsequent names array in bytes.
-    // Names: A sequence of null-terminated strings. First string is vftable
-    // names.
-  };
-
   TypeIndex CompleteClass;
   TypeIndex OverriddenVFTable;
-  ulittle32_t VFPtrOffset;
-  StringRef Name;
-  ArrayRef<StringRef> MethodNamesRef;
+  uint32_t VFPtrOffset;
   std::vector<StringRef> MethodNames;
 };
 
 // LF_ONEMETHOD
 class OneMethodRecord : public TypeRecord {
 public:
-  OneMethodRecord(TypeIndex Type, MethodKind Kind, MethodOptions Options,
-                  MemberAccess Access, int32_t VFTableOffset, StringRef Name)
-      : TypeRecord(TypeRecordKind::OneMethod), Type(Type), Kind(Kind),
-        Options(Options), Access(Access), VFTableOffset(VFTableOffset),
-        Name(Name) {}
+  OneMethodRecord() : TypeRecord(TypeRecordKind::OneMethod) {}
+  explicit OneMethodRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  OneMethodRecord(TypeIndex Type, MemberAttributes Attrs, int32_t VFTableOffset,
+                  StringRef Name)
+      : TypeRecord(TypeRecordKind::OneMethod), Type(Type), Attrs(Attrs),
+        VFTableOffset(VFTableOffset), Name(Name) {}
+  OneMethodRecord(TypeIndex Type, MemberAccess Access, MethodKind MK,
+                  MethodOptions Options, int32_t VFTableOffset, StringRef Name)
+      : TypeRecord(TypeRecordKind::OneMethod), Type(Type),
+        Attrs(Access, MK, Options), VFTableOffset(VFTableOffset), Name(Name) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<OneMethodRecord> deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data);
-
   TypeIndex getType() const { return Type; }
-  MethodKind getKind() const { return Kind; }
-  MethodOptions getOptions() const { return Options; }
-  MemberAccess getAccess() const { return Access; }
+  MethodKind getMethodKind() const { return Attrs.getMethodKind(); }
+  MethodOptions getOptions() const { return Attrs.getFlags(); }
+  MemberAccess getAccess() const { return Attrs.getAccess(); }
   int32_t getVFTableOffset() const { return VFTableOffset; }
   StringRef getName() const { return Name; }
 
   bool isIntroducingVirtual() const {
-    return Kind == MethodKind::IntroducingVirtual ||
-           Kind == MethodKind::PureIntroducingVirtual;
+    return getMethodKind() == MethodKind::IntroducingVirtual ||
+           getMethodKind() == MethodKind::PureIntroducingVirtual;
   }
 
-private:
-  struct Layout {
-    MemberAttributes Attrs;
-    TypeIndex Type;
-    // If is introduced virtual method:
-    //   VFTableOffset: int32_t offset in vftable
-    // Name: Null-terminated string
-  };
-
   TypeIndex Type;
-  MethodKind Kind;
-  MethodOptions Options;
-  MemberAccess Access;
+  MemberAttributes Attrs;
   int32_t VFTableOffset;
   StringRef Name;
 };
@@ -932,6 +750,7 @@ private:
 // LF_METHODLIST
 class MethodOverloadListRecord : public TypeRecord {
 public:
+  explicit MethodOverloadListRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   MethodOverloadListRecord(ArrayRef<OneMethodRecord> Methods)
       : TypeRecord(TypeRecordKind::MethodOverloadList), Methods(Methods) {}
 
@@ -939,27 +758,14 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<MethodOverloadListRecord> deserialize(TypeRecordKind Kind,
-                                                       ArrayRef<uint8_t> &Data);
-
   ArrayRef<OneMethodRecord> getMethods() const { return Methods; }
-
-private:
-  struct Layout {
-    MemberAttributes Attrs;
-    ulittle16_t Padding;
-
-    TypeIndex Type;
-    // If is introduced virtual method:
-    //   VFTableOffset: int32_t offset in vftable
-  };
-
   std::vector<OneMethodRecord> Methods;
 };
 
 /// For method overload sets.  LF_METHOD
 class OverloadedMethodRecord : public TypeRecord {
 public:
+  explicit OverloadedMethodRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   OverloadedMethodRecord(uint16_t NumOverloads, TypeIndex MethodList,
                          StringRef Name)
       : TypeRecord(TypeRecordKind::OverloadedMethod),
@@ -969,20 +775,9 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<OverloadedMethodRecord> deserialize(TypeRecordKind Kind,
-                                                     ArrayRef<uint8_t> &Data);
-
   uint16_t getNumOverloads() const { return NumOverloads; }
   TypeIndex getMethodList() const { return MethodList; }
   StringRef getName() const { return Name; }
-
-private:
-  struct Layout {
-    ulittle16_t MethodCount; // Size of overload set
-    TypeIndex MethList;      // Type index of methods in overload set
-                             // Name: Null-terminated string
-  };
-
   uint16_t NumOverloads;
   TypeIndex MethodList;
   StringRef Name;
@@ -991,32 +786,26 @@ private:
 // LF_MEMBER
 class DataMemberRecord : public TypeRecord {
 public:
+  explicit DataMemberRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  DataMemberRecord(MemberAttributes Attrs, TypeIndex Type, uint64_t Offset,
+                   StringRef Name)
+      : TypeRecord(TypeRecordKind::DataMember), Attrs(Attrs), Type(Type),
+        FieldOffset(Offset), Name(Name) {}
   DataMemberRecord(MemberAccess Access, TypeIndex Type, uint64_t Offset,
                    StringRef Name)
-      : TypeRecord(TypeRecordKind::DataMember), Access(Access), Type(Type),
+      : TypeRecord(TypeRecordKind::DataMember), Attrs(Access), Type(Type),
         FieldOffset(Offset), Name(Name) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<DataMemberRecord> deserialize(TypeRecordKind Kind,
-                                               ArrayRef<uint8_t> &Data);
-
-  MemberAccess getAccess() const { return Access; }
+  MemberAccess getAccess() const { return Attrs.getAccess(); }
   TypeIndex getType() const { return Type; }
   uint64_t getFieldOffset() const { return FieldOffset; }
   StringRef getName() const { return Name; }
 
-private:
-  struct Layout {
-    MemberAttributes Attrs; // Access control attributes, etc
-    TypeIndex Type;
-    // FieldOffset: LF_NUMERIC encoded byte offset
-    // Name: Null-terminated string
-  };
-
-  MemberAccess Access;
+  MemberAttributes Attrs;
   TypeIndex Type;
   uint64_t FieldOffset;
   StringRef Name;
@@ -1025,29 +814,23 @@ private:
 // LF_STMEMBER
 class StaticDataMemberRecord : public TypeRecord {
 public:
+  explicit StaticDataMemberRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  StaticDataMemberRecord(MemberAttributes Attrs, TypeIndex Type, StringRef Name)
+      : TypeRecord(TypeRecordKind::StaticDataMember), Attrs(Attrs), Type(Type),
+        Name(Name) {}
   StaticDataMemberRecord(MemberAccess Access, TypeIndex Type, StringRef Name)
-      : TypeRecord(TypeRecordKind::StaticDataMember), Access(Access),
-        Type(Type), Name(Name) {}
+      : TypeRecord(TypeRecordKind::StaticDataMember), Attrs(Access), Type(Type),
+        Name(Name) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<StaticDataMemberRecord> deserialize(TypeRecordKind Kind,
-                                                     ArrayRef<uint8_t> &Data);
-
-  MemberAccess getAccess() const { return Access; }
+  MemberAccess getAccess() const { return Attrs.getAccess(); }
   TypeIndex getType() const { return Type; }
   StringRef getName() const { return Name; }
 
-private:
-  struct Layout {
-    MemberAttributes Attrs; // Access control attributes, etc
-    TypeIndex Type;
-    // Name: Null-terminated string
-  };
-
-  MemberAccess Access;
+  MemberAttributes Attrs;
   TypeIndex Type;
   StringRef Name;
 };
@@ -1055,29 +838,23 @@ private:
 // LF_ENUMERATE
 class EnumeratorRecord : public TypeRecord {
 public:
+  explicit EnumeratorRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  EnumeratorRecord(MemberAttributes Attrs, APSInt Value, StringRef Name)
+      : TypeRecord(TypeRecordKind::Enumerator), Attrs(Attrs),
+        Value(std::move(Value)), Name(Name) {}
   EnumeratorRecord(MemberAccess Access, APSInt Value, StringRef Name)
-      : TypeRecord(TypeRecordKind::Enumerator), Access(Access),
+      : TypeRecord(TypeRecordKind::Enumerator), Attrs(Access),
         Value(std::move(Value)), Name(Name) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<EnumeratorRecord> deserialize(TypeRecordKind Kind,
-                                               ArrayRef<uint8_t> &Data);
-
-  MemberAccess getAccess() const { return Access; }
+  MemberAccess getAccess() const { return Attrs.getAccess(); }
   APSInt getValue() const { return Value; }
   StringRef getName() const { return Name; }
 
-private:
-  struct Layout {
-    MemberAttributes Attrs; // Access control attributes, etc
-                            // EnumValue: LF_NUMERIC encoded enumerator value
-                            // Name: Null-terminated string
-  };
-
-  MemberAccess Access;
+  MemberAttributes Attrs;
   APSInt Value;
   StringRef Name;
 };
@@ -1085,6 +862,7 @@ private:
 // LF_VFUNCTAB
 class VFPtrRecord : public TypeRecord {
 public:
+  explicit VFPtrRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   VFPtrRecord(TypeIndex Type)
       : TypeRecord(TypeRecordKind::VFPtr), Type(Type) {}
 
@@ -1092,44 +870,31 @@ public:
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<VFPtrRecord> deserialize(TypeRecordKind Kind,
-                                          ArrayRef<uint8_t> &Data);
-
   TypeIndex getType() const { return Type; }
 
-private:
-  struct Layout {
-    ulittle16_t Pad0;
-    TypeIndex Type; // Type of vfptr
-  };
   TypeIndex Type;
 };
 
 // LF_BCLASS, LF_BINTERFACE
 class BaseClassRecord : public TypeRecord {
 public:
+  explicit BaseClassRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  BaseClassRecord(MemberAttributes Attrs, TypeIndex Type, uint64_t Offset)
+      : TypeRecord(TypeRecordKind::BaseClass), Attrs(Attrs), Type(Type),
+        Offset(Offset) {}
   BaseClassRecord(MemberAccess Access, TypeIndex Type, uint64_t Offset)
-      : TypeRecord(TypeRecordKind::BaseClass), Access(Access), Type(Type),
+      : TypeRecord(TypeRecordKind::BaseClass), Attrs(Access), Type(Type),
         Offset(Offset) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<BaseClassRecord> deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data);
-
-  MemberAccess getAccess() const { return Access; }
+  MemberAccess getAccess() const { return Attrs.getAccess(); }
   TypeIndex getBaseType() const { return Type; }
   uint64_t getBaseOffset() const { return Offset; }
 
-private:
-  struct Layout {
-    MemberAttributes Attrs; // Access control attributes, etc
-    TypeIndex BaseType;     // Base class type
-    // BaseOffset: LF_NUMERIC encoded byte offset of base from derived.
-  };
-  MemberAccess Access;
+  MemberAttributes Attrs;
   TypeIndex Type;
   uint64_t Offset;
 };
@@ -1137,34 +902,29 @@ private:
 // LF_VBCLASS, LF_IVBCLASS
 class VirtualBaseClassRecord : public TypeRecord {
 public:
-  VirtualBaseClassRecord(MemberAccess Access, TypeIndex BaseType,
-                         TypeIndex VBPtrType, uint64_t Offset, uint64_t Index)
-      : TypeRecord(TypeRecordKind::VirtualBaseClass), Access(Access),
-        BaseType(BaseType), VBPtrType(VBPtrType), VBPtrOffset(Offset),
-        VTableIndex(Index) {}
+  explicit VirtualBaseClassRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
+  VirtualBaseClassRecord(TypeRecordKind Kind, MemberAttributes Attrs,
+                         TypeIndex BaseType, TypeIndex VBPtrType,
+                         uint64_t Offset, uint64_t Index)
+      : TypeRecord(Kind), Attrs(Attrs), BaseType(BaseType),
+        VBPtrType(VBPtrType), VBPtrOffset(Offset), VTableIndex(Index) {}
+  VirtualBaseClassRecord(TypeRecordKind Kind, MemberAccess Access,
+                         TypeIndex BaseType, TypeIndex VBPtrType,
+                         uint64_t Offset, uint64_t Index)
+      : TypeRecord(Kind), Attrs(Access), BaseType(BaseType),
+        VBPtrType(VBPtrType), VBPtrOffset(Offset), VTableIndex(Index) {}
 
   /// Rewrite member type indices with IndexMap. Returns false if a type index
   /// is not in the map.
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<VirtualBaseClassRecord> deserialize(TypeRecordKind Kind,
-                                                     ArrayRef<uint8_t> &Data);
-
-  MemberAccess getAccess() const { return Access; }
+  MemberAccess getAccess() const { return Attrs.getAccess(); }
   TypeIndex getBaseType() const { return BaseType; }
   TypeIndex getVBPtrType() const { return VBPtrType; }
   uint64_t getVBPtrOffset() const { return VBPtrOffset; }
   uint64_t getVTableIndex() const { return VTableIndex; }
 
-private:
-  struct Layout {
-    MemberAttributes Attrs; // Access control attributes, etc.
-    TypeIndex BaseType;     // Base class type
-    TypeIndex VBPtrType;    // Virtual base pointer type
-    // VBPtrOffset: Offset of vbptr from vfptr encoded as LF_NUMERIC.
-    // VBTableIndex: Index of vbase within vbtable encoded as LF_NUMERIC.
-  };
-  MemberAccess Access;
+  MemberAttributes Attrs;
   TypeIndex BaseType;
   TypeIndex VBPtrType;
   uint64_t VBPtrOffset;
@@ -1175,6 +935,7 @@ private:
 /// together. The first will end in an LF_INDEX record that points to the next.
 class ListContinuationRecord : public TypeRecord {
 public:
+  explicit ListContinuationRecord(TypeRecordKind Kind) : TypeRecord(Kind) {}
   ListContinuationRecord(TypeIndex ContinuationIndex)
       : TypeRecord(TypeRecordKind::ListContinuation),
         ContinuationIndex(ContinuationIndex) {}
@@ -1183,20 +944,11 @@ public:
 
   bool remapTypeIndices(ArrayRef<TypeIndex> IndexMap);
 
-  static ErrorOr<ListContinuationRecord> deserialize(TypeRecordKind Kind,
-                                                     ArrayRef<uint8_t> &Data);
-
-private:
-  struct Layout {
-    ulittle16_t Pad0;
-    TypeIndex ContinuationIndex;
-  };
   TypeIndex ContinuationIndex;
 };
 
-typedef CVRecord<TypeLeafKind> CVType;
-typedef VarStreamArray<CVType> CVTypeArray;
-}
-}
+} // end namespace codeview
+
+} // end namespace llvm
 
-#endif
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPERECORD_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
index eb7993baab89..5a6507ee7f5b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordBuilder.h
@@ -47,6 +47,12 @@ public:
   llvm::StringRef str();
 
   uint64_t size() const { return Stream.tell(); }
+  TypeRecordKind kind() const { return Kind; }
+
+  /// Returns the number of bytes remaining before this record is larger than
+  /// the maximum record length. Accounts for the extra two byte size field in
+  /// the header.
+  size_t maxBytesRemaining() const { return MaxRecordLength - size() - 2; }
 
   void truncate(uint64_t Size) {
     // This works because raw_svector_ostream is not buffered.
@@ -56,10 +62,12 @@ public:
 
   void reset(TypeRecordKind K) {
     Buffer.clear();
+    Kind = K;
     writeTypeRecordKind(K);
   }
 
 private:
+  TypeRecordKind Kind;
   llvm::SmallVector<char, 256> Buffer;
   llvm::raw_svector_ostream Stream;
   llvm::support::endian::Writer<llvm::support::endianness::little> Writer;
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
new file mode 100644
index 000000000000..fe470a72abbb
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecordMapping.h
@@ -0,0 +1,52 @@
+//===- TypeRecordMapping.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPERECORDMAPPING_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace msf {
+class StreamReader;
+class StreamWriter;
+}
+namespace codeview {
+class TypeRecordMapping : public TypeVisitorCallbacks {
+public:
+  explicit TypeRecordMapping(msf::StreamReader &Reader) : IO(Reader) {}
+  explicit TypeRecordMapping(msf::StreamWriter &Writer) : IO(Writer) {}
+
+  Error visitTypeBegin(CVType &Record) override;
+  Error visitTypeEnd(CVType &Record) override;
+
+  Error visitMemberBegin(CVMemberRecord &Record) override;
+  Error visitMemberEnd(CVMemberRecord &Record) override;
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  Error visitKnownRecord(CVType &CVR, Name##Record &Record) override;
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override;
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "TypeRecords.def"
+
+private:
+  Optional<TypeLeafKind> TypeKind;
+  Optional<TypeLeafKind> MemberKind;
+
+  CodeViewRecordIO IO;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecords.def b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecords.def
index 0959f4bf19c7..c98dbac21a7a 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecords.def
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeRecords.def
@@ -43,6 +43,8 @@ TYPE_RECORD(LF_PROCEDURE, 0x1008, Procedure)
 TYPE_RECORD(LF_MFUNCTION, 0x1009, MemberFunction)
 TYPE_RECORD(LF_ARGLIST, 0x1201, ArgList)
 
+TYPE_RECORD(LF_FIELDLIST, 0x1203, FieldList)
+
 TYPE_RECORD(LF_ARRAY, 0x1503, Array)
 TYPE_RECORD(LF_CLASS, 0x1504, Class)
 TYPE_RECORD_ALIAS(LF_STRUCTURE, 0x1505, Struct, Class)
@@ -159,7 +161,6 @@ CV_TYPE(LF_OEM2, 0x1011)
 
 CV_TYPE(LF_SKIP, 0x1200)
 CV_TYPE(LF_DEFARG_ST, 0x1202)
-CV_TYPE(LF_FIELDLIST, 0x1203)
 CV_TYPE(LF_DERIVED, 0x1204)
 CV_TYPE(LF_DIMCONU, 0x1207)
 CV_TYPE(LF_DIMCONLU, 0x1208)
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h
new file mode 100644
index 000000000000..e05922194638
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeSerializer.h
@@ -0,0 +1,140 @@
+//===- TypeSerializer.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPESERIALIZER_H
+
+#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+namespace codeview {
+
+class TypeSerializer : public TypeVisitorCallbacks {
+  struct SubRecord {
+    SubRecord(TypeLeafKind K, uint32_t S) : Kind(K), Size(S) {}
+
+    TypeLeafKind Kind;
+    uint32_t Size = 0;
+  };
+  struct RecordSegment {
+    SmallVector<SubRecord, 16> SubRecords;
+
+    uint32_t length() const {
+      uint32_t L = sizeof(RecordPrefix);
+      for (const auto &R : SubRecords) {
+        L += R.Size;
+      }
+      return L;
+    }
+  };
+
+  typedef SmallVector<MutableArrayRef<uint8_t>, 2> RecordList;
+
+  static constexpr uint8_t ContinuationLength = 8;
+  BumpPtrAllocator &RecordStorage;
+  RecordSegment CurrentSegment;
+  RecordList FieldListSegments;
+
+  TypeIndex LastTypeIndex;
+  Optional<TypeLeafKind> TypeKind;
+  Optional<TypeLeafKind> MemberKind;
+  std::vector<uint8_t> RecordBuffer;
+  msf::MutableByteStream Stream;
+  msf::StreamWriter Writer;
+  TypeRecordMapping Mapping;
+
+  RecordList SeenRecords;
+  StringMap<TypeIndex> HashedRecords;
+
+  bool isInFieldList() const;
+  TypeIndex calcNextTypeIndex() const;
+  TypeIndex incrementTypeIndex();
+  MutableArrayRef<uint8_t> getCurrentSubRecordData();
+  MutableArrayRef<uint8_t> getCurrentRecordData();
+  Error writeRecordPrefix(TypeLeafKind Kind);
+  TypeIndex insertRecordBytesPrivate(MutableArrayRef<uint8_t> Record);
+
+  Expected<MutableArrayRef<uint8_t>>
+  addPadding(MutableArrayRef<uint8_t> Record);
+
+public:
+  explicit TypeSerializer(BumpPtrAllocator &Storage);
+
+  ArrayRef<MutableArrayRef<uint8_t>> records() const;
+  TypeIndex getLastTypeIndex() const;
+  TypeIndex insertRecordBytes(MutableArrayRef<uint8_t> Record);
+  Expected<TypeIndex> visitTypeEndGetIndex(CVType &Record);
+
+  Error visitTypeBegin(CVType &Record) override;
+  Error visitTypeEnd(CVType &Record) override;
+  Error visitMemberBegin(CVMemberRecord &Record) override;
+  Error visitMemberEnd(CVMemberRecord &Record) override;
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  virtual Error visitKnownRecord(CVType &CVR, Name##Record &Record) override { \
+    return visitKnownRecordImpl(CVR, Record);                                  \
+  }
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override { \
+    return visitKnownMemberImpl<Name##Record>(CVR, Record);                    \
+  }
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+
+private:
+  template <typename RecordKind>
+  Error visitKnownRecordImpl(CVType &CVR, RecordKind &Record) {
+    return Mapping.visitKnownRecord(CVR, Record);
+  }
+
+  template <typename RecordType>
+  Error visitKnownMemberImpl(CVMemberRecord &CVR, RecordType &Record) {
+    assert(CVR.Kind == static_cast<TypeLeafKind>(Record.getKind()));
+
+    if (auto EC = Writer.writeEnum(CVR.Kind))
+      return EC;
+
+    if (auto EC = Mapping.visitKnownMember(CVR, Record))
+      return EC;
+
+    // Get all the data that was just written and is yet to be committed to
+    // the current segment.  Then pad it to 4 bytes.
+    MutableArrayRef<uint8_t> ThisRecord = getCurrentSubRecordData();
+    auto ExpectedRecord = addPadding(ThisRecord);
+    if (!ExpectedRecord)
+      return ExpectedRecord.takeError();
+    ThisRecord = *ExpectedRecord;
+
+    CurrentSegment.SubRecords.emplace_back(CVR.Kind, ThisRecord.size());
+    CVR.Data = ThisRecord;
+
+    // Both the last subrecord and the total length of this segment should be
+    // multiples of 4.
+    assert(ThisRecord.size() % 4 == 0);
+    assert(CurrentSegment.length() % 4 == 0);
+
+    return Error::success();
+  }
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
index 5b2aa6186147..4e6d81ece318 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeTableBuilder.h
@@ -10,61 +10,120 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
 #define LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <type_traits>
 
 namespace llvm {
-
-class StringRef;
-
 namespace codeview {
 
-class FieldListRecordBuilder;
-class MethodListRecordBuilder;
-class TypeRecordBuilder;
-
 class TypeTableBuilder {
 private:
+  TypeIndex handleError(Error EC) const {
+    assert(false && "Couldn't write Type!");
+    consumeError(std::move(EC));
+    return TypeIndex();
+  }
+
+  BumpPtrAllocator &Allocator;
+  TypeSerializer Serializer;
+
+public:
+  explicit TypeTableBuilder(BumpPtrAllocator &Allocator)
+      : Allocator(Allocator), Serializer(Allocator) {}
   TypeTableBuilder(const TypeTableBuilder &) = delete;
   TypeTableBuilder &operator=(const TypeTableBuilder &) = delete;
 
-protected:
-  TypeTableBuilder();
+  bool empty() const { return Serializer.records().empty(); }
 
-public:
-  virtual ~TypeTableBuilder();
+  BumpPtrAllocator &getAllocator() const { return Allocator; }
+
+  template <typename T> TypeIndex writeKnownType(T &Record) {
+    static_assert(!std::is_same<T, FieldListRecord>::value,
+                  "Can't serialize FieldList!");
+
+    CVType Type;
+    Type.Type = static_cast<TypeLeafKind>(Record.getKind());
+    if (auto EC = Serializer.visitTypeBegin(Type))
+      return handleError(std::move(EC));
+    if (auto EC = Serializer.visitKnownRecord(Type, Record))
+      return handleError(std::move(EC));
+
+    auto ExpectedIndex = Serializer.visitTypeEndGetIndex(Type);
+    if (!ExpectedIndex)
+      return handleError(ExpectedIndex.takeError());
+
+    return *ExpectedIndex;
+  }
+
+  TypeIndex writeSerializedRecord(MutableArrayRef<uint8_t> Record) {
+    return Serializer.insertRecordBytes(Record);
+  }
+
+  template <typename TFunc> void ForEachRecord(TFunc Func) {
+    uint32_t Index = TypeIndex::FirstNonSimpleIndex;
+
+    for (auto Record : Serializer.records()) {
+      Func(TypeIndex(Index), Record);
+      ++Index;
+    }
+  }
+
+  ArrayRef<MutableArrayRef<uint8_t>> records() const {
+    return Serializer.records();
+  }
+};
+
+class FieldListRecordBuilder {
+  TypeTableBuilder &TypeTable;
+  TypeSerializer TempSerializer;
+  CVType Type;
 
 public:
-  TypeIndex writeModifier(const ModifierRecord &Record);
-  TypeIndex writeProcedure(const ProcedureRecord &Record);
-  TypeIndex writeMemberFunction(const MemberFunctionRecord &Record);
-  TypeIndex writeArgList(const ArgListRecord &Record);
-  TypeIndex writePointer(const PointerRecord &Record);
-  TypeIndex writeArray(const ArrayRecord &Record);
-  TypeIndex writeClass(const ClassRecord &Record);
-  TypeIndex writeUnion(const UnionRecord &Record);
-  TypeIndex writeEnum(const EnumRecord &Record);
-  TypeIndex writeBitField(const BitFieldRecord &Record);
-  TypeIndex writeVFTableShape(const VFTableShapeRecord &Record);
-  TypeIndex writeStringId(const StringIdRecord &Record);
-  TypeIndex writeVFTable(const VFTableRecord &Record);
-  TypeIndex writeUdtSourceLine(const UdtSourceLineRecord &Record);
-  TypeIndex writeUdtModSourceLine(const UdtModSourceLineRecord &Record);
-  TypeIndex writeFuncId(const FuncIdRecord &Record);
-  TypeIndex writeMemberFuncId(const MemberFuncIdRecord &Record);
-  TypeIndex writeBuildInfo(const BuildInfoRecord &Record);
-  TypeIndex writeMethodOverloadList(const MethodOverloadListRecord &Record);
-  TypeIndex writeTypeServer2(const TypeServer2Record &Record);
-
-  TypeIndex writeFieldList(FieldListRecordBuilder &FieldList);
-
-  TypeIndex writeRecord(TypeRecordBuilder &builder);
-
-  virtual TypeIndex writeRecord(llvm::StringRef record) = 0;
+  explicit FieldListRecordBuilder(TypeTableBuilder &TypeTable)
+      : TypeTable(TypeTable), TempSerializer(TypeTable.getAllocator()) {
+    Type.Type = TypeLeafKind::LF_FIELDLIST;
+  }
+
+  void begin() {
+    if (auto EC = TempSerializer.visitTypeBegin(Type))
+      consumeError(std::move(EC));
+  }
+
+  template <typename T> void writeMemberType(T &Record) {
+    CVMemberRecord CVMR;
+    CVMR.Kind = static_cast<TypeLeafKind>(Record.getKind());
+    if (auto EC = TempSerializer.visitMemberBegin(CVMR))
+      consumeError(std::move(EC));
+    if (auto EC = TempSerializer.visitKnownMember(CVMR, Record))
+      consumeError(std::move(EC));
+    if (auto EC = TempSerializer.visitMemberEnd(CVMR))
+      consumeError(std::move(EC));
+  }
+
+  TypeIndex end() {
+    if (auto EC = TempSerializer.visitTypeEnd(Type)) {
+      consumeError(std::move(EC));
+      return TypeIndex();
+    }
+
+    TypeIndex Index;
+    for (auto Record : TempSerializer.records()) {
+      Index = TypeTable.writeSerializedRecord(Record);
+    }
+    return Index;
+  }
 };
-}
-}
 
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPETABLEBUILDER_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
new file mode 100644
index 000000000000..f25129691041
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h
@@ -0,0 +1,114 @@
+//===- TypeVisitorCallbackPipeline.h ----------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEVISITORCALLBACKPIPELINE_H
+#define LLVM_DEBUGINFO_CODEVIEW_TYPEVISITORCALLBACKPIPELINE_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace llvm {
+namespace codeview {
+
+class TypeVisitorCallbackPipeline : public TypeVisitorCallbacks {
+public:
+  TypeVisitorCallbackPipeline() = default;
+
+  Error visitUnknownType(CVRecord<TypeLeafKind> &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitUnknownType(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitUnknownMember(CVMemberRecord &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitUnknownMember(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitTypeBegin(CVType &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitTypeBegin(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitTypeEnd(CVType &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitTypeEnd(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitMemberBegin(CVMemberRecord &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitMemberBegin(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  Error visitMemberEnd(CVMemberRecord &Record) override {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitMemberEnd(Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  void addCallbackToPipeline(TypeVisitorCallbacks &Callbacks) {
+    Pipeline.push_back(&Callbacks);
+  }
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  Error visitKnownRecord(CVType &CVR, Name##Record &Record) override {         \
+    return visitKnownRecordImpl(CVR, Record);                                  \
+  }
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownMember(CVMemberRecord &CVMR, Name##Record &Record)           \
+      override {                                                               \
+    return visitKnownMemberImpl(CVMR, Record);                                 \
+  }
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+
+private:
+  template <typename T> Error visitKnownRecordImpl(CVType &CVR, T &Record) {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitKnownRecord(CVR, Record))
+        return EC;
+    }
+    return Error::success();
+  }
+
+  template <typename T>
+  Error visitKnownMemberImpl(CVMemberRecord &CVMR, T &Record) {
+    for (auto Visitor : Pipeline) {
+      if (auto EC = Visitor->visitKnownMember(CVMR, Record))
+        return EC;
+    }
+    return Error::success();
+  }
+  std::vector<TypeVisitorCallbacks *> Pipeline;
+};
+
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPEVISITORCALLBACKPIPELINE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
index 310847ec5d2d..5e27df346b00 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
+++ b/contrib/llvm/include/llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h
@@ -10,54 +10,53 @@
 #ifndef LLVM_DEBUGINFO_CODEVIEW_TYPEVISITORCALLBACKS_H
 #define LLVM_DEBUGINFO_CODEVIEW_TYPEVISITORCALLBACKS_H
 
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
 namespace codeview {
+
 class TypeVisitorCallbacks {
   friend class CVTypeVisitor;
 
 public:
-  virtual ~TypeVisitorCallbacks() {}
+  virtual ~TypeVisitorCallbacks() = default;
 
   /// Action to take on unknown types. By default, they are ignored.
-  virtual Error visitUnknownType(const CVRecord<TypeLeafKind> &Record) {
-    return Error::success();
-  }
-  virtual Error visitUnknownMember(const CVRecord<TypeLeafKind> &Record) {
-    return Error::success();
-  }
-
+  virtual Error visitUnknownType(CVType &Record) { return Error::success(); }
   /// Paired begin/end actions for all types. Receives all record data,
-  /// including the fixed-length record prefix.
-  virtual Error visitTypeBegin(const CVRecord<TypeLeafKind> &Record) {
-    return Error::success();
-  }
-  virtual Error visitTypeEnd(const CVRecord<TypeLeafKind> &Record) {
+  /// including the fixed-length record prefix.  visitTypeBegin() should return
+  /// the type of the Record, or an error if it cannot be determined.
+  virtual Error visitTypeBegin(CVType &Record) { return Error::success(); }
+  virtual Error visitTypeEnd(CVType &Record) { return Error::success(); }
+
+  virtual Error visitUnknownMember(CVMemberRecord &Record) {
     return Error::success();
   }
 
-  virtual Error visitFieldListBegin(const CVRecord<TypeLeafKind> &Record) {
+  virtual Error visitMemberBegin(CVMemberRecord &Record) {
     return Error::success();
   }
 
-  virtual Error visitFieldListEnd(const CVRecord<TypeLeafKind> &Record) {
+  virtual Error visitMemberEnd(CVMemberRecord &Record) {
     return Error::success();
   }
 
 #define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
-  virtual Error visit##Name(Name##Record &Record) { return Error::success(); }
+  virtual Error visitKnownRecord(CVType &CVR, Name##Record &Record) {          \
+    return Error::success();                                                   \
+  }
 #define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
-  TYPE_RECORD(EnumName, EnumVal, Name)
+  virtual Error visitKnownMember(CVMemberRecord &CVM, Name##Record &Record) {  \
+    return Error::success();                                                   \
+  }
+
 #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "TypeRecords.def"
 };
-}
-}
 
-#endif
+} // end namespace codeview
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_CODEVIEW_TYPEVISITORCALLBACKS_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DIContext.h b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
index 2f88371979ea..804419c517df 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DIContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DIContext.h
@@ -17,9 +17,12 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
 #include <string>
+#include <tuple>
+#include <utility>
 
 namespace llvm {
 
@@ -32,8 +35,12 @@ struct DILineInfo {
   uint32_t Line;
   uint32_t Column;
 
+  // DWARF-specific.
+  uint32_t Discriminator;
+
   DILineInfo()
-      : FileName("<invalid>"), FunctionName("<invalid>"), Line(0), Column(0) {}
+      : FileName("<invalid>"), FunctionName("<invalid>"), Line(0), Column(0),
+        Discriminator(0) {}
 
   bool operator==(const DILineInfo &RHS) const {
     return Line == RHS.Line && Column == RHS.Column &&
@@ -42,6 +49,10 @@ struct DILineInfo {
   bool operator!=(const DILineInfo &RHS) const {
     return !(*this == RHS);
   }
+  bool operator<(const DILineInfo &RHS) const {
+    return std::tie(FileName, FunctionName, Line, Column) <
+           std::tie(RHS.FileName, RHS.FunctionName, RHS.Line, RHS.Column);
+  }
 };
 
 typedef SmallVector<std::pair<uint64_t, DILineInfo>, 16> DILineInfoTable;
@@ -49,19 +60,24 @@ typedef SmallVector<std::pair<uint64_t, DILineInfo>, 16> DILineInfoTable;
 /// DIInliningInfo - a format-neutral container for inlined code description.
 class DIInliningInfo {
   SmallVector<DILineInfo, 4> Frames;
- public:
-  DIInliningInfo() {}
+
+public:
+  DIInliningInfo() = default;
+
   DILineInfo getFrame(unsigned Index) const {
     assert(Index < Frames.size());
     return Frames[Index];
   }
+
   DILineInfo *getMutableFrame(unsigned Index) {
     assert(Index < Frames.size());
     return &Frames[Index];
   }
+
   uint32_t getNumberOfFrames() const {
     return Frames.size();
   }
+
   void addFrame(const DILineInfo &Frame) {
     Frames.push_back(Frame);
   }
@@ -124,6 +140,7 @@ enum DIDumpType {
   DIDT_AppleNamespaces,
   DIDT_AppleObjC,
   DIDT_CUIndex,
+  DIDT_GdbIndex,
   DIDT_TUIndex,
 };
 
@@ -133,13 +150,14 @@ public:
     CK_DWARF,
     CK_PDB
   };
-  DIContextKind getKind() const { return Kind; }
 
   DIContext(DIContextKind K) : Kind(K) {}
-  virtual ~DIContext() {}
+  virtual ~DIContext() = default;
+
+  DIContextKind getKind() const { return Kind; }
 
   virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All,
-                    bool DumpEH = false) = 0;
+                    bool DumpEH = false, bool SummarizeTypes = false) = 0;
 
   virtual DILineInfo getLineInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
@@ -147,6 +165,7 @@ public:
       uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
   virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address,
       DILineInfoSpecifier Specifier = DILineInfoSpecifier()) = 0;
+
 private:
   const DIContextKind Kind;
 };
@@ -192,6 +211,6 @@ public:
   virtual std::unique_ptr<LoadedObjectInfo> clone() const = 0;
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_DEBUGINFO_DICONTEXT_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
index 6ab5d5ce6f6e..778817f57bf5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h
@@ -10,26 +10,41 @@
 #ifndef LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
 #define LLVM_LIB_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataExtractor.h"
-
+#include "llvm/Support/Dwarf.h"
 namespace llvm {
 
+class DWARFUnit;
+class DWARFFormValue;
 class raw_ostream;
 
 class DWARFAbbreviationDeclaration {
 public:
   struct AttributeSpec {
-    AttributeSpec(uint16_t Attr, uint16_t Form) : Attr(Attr), Form(Form) {}
-    uint16_t Attr;
-    uint16_t Form;
+    AttributeSpec(dwarf::Attribute A, dwarf::Form F, Optional<uint8_t> S)
+        : Attr(A), Form(F), ByteSize(S) {}
+    dwarf::Attribute Attr;
+    dwarf::Form Form;
+    /// If ByteSize has a value, then it contains the fixed size in bytes for
+    /// the Form in this object. If ByteSize doesn't have a value, then the
+    /// byte size of Form either varies according to the DWARFUnit that it is
+    /// contained in or the value size varies and must be decoded from the
+    /// debug information in order to determine its size.
+    Optional<uint8_t> ByteSize;
+    /// Get the fixed byte size of this Form if possible. This function might
+    /// use the DWARFUnit to calculate the size of the Form, like for
+    /// DW_AT_address and DW_AT_ref_addr, so this isn't just an accessor for
+    /// the ByteSize member.
+    Optional<uint8_t> getByteSize(const DWARFUnit &U) const;
   };
   typedef SmallVector<AttributeSpec, 8> AttributeSpecVector;
 
   DWARFAbbreviationDeclaration();
 
   uint32_t getCode() const { return Code; }
-  uint32_t getTag() const { return Tag; }
+  dwarf::Tag getTag() const { return Tag; }
   bool hasChildren() const { return HasChildren; }
 
   typedef iterator_range<AttributeSpecVector::const_iterator>
@@ -39,22 +54,77 @@ public:
     return attr_iterator_range(AttributeSpecs.begin(), AttributeSpecs.end());
   }
 
-  uint16_t getFormByIndex(uint32_t idx) const {
-    return idx < AttributeSpecs.size() ? AttributeSpecs[idx].Form : 0;
+  dwarf::Form getFormByIndex(uint32_t idx) const {
+    if (idx < AttributeSpecs.size())
+      return AttributeSpecs[idx].Form;
+    return dwarf::Form(0);
   }
 
-  uint32_t findAttributeIndex(uint16_t attr) const;
+  /// Get the index of the specified attribute.
+  ///
+  /// Searches the this abbreviation declaration for the index of the specified
+  /// attribute.
+  ///
+  /// \param attr DWARF attribute to search for.
+  /// \returns Optional index of the attribute if found, None otherwise.
+  Optional<uint32_t> findAttributeIndex(dwarf::Attribute attr) const;
+
+  /// Extract a DWARF form value from a DIE specified by DIE offset.
+  ///
+  /// Extract an attribute value for a DWARFUnit given the DIE offset and the
+  /// attribute.
+  ///
+  /// \param DIEOffset the DIE offset that points to the ULEB128 abbreviation
+  /// code in the .debug_info data.
+  /// \param Attr DWARF attribute to search for.
+  /// \param U the DWARFUnit the contains the DIE.
+  /// \returns Optional DWARF form value if the attribute was extracted.
+  Optional<DWARFFormValue> getAttributeValue(const uint32_t DIEOffset,
+                                             const dwarf::Attribute Attr,
+                                             const DWARFUnit &U) const;
+
   bool extract(DataExtractor Data, uint32_t* OffsetPtr);
   void dump(raw_ostream &OS) const;
 
+  // Return an optional byte size of all attribute data in this abbreviation
+  // if a constant byte size can be calculated given a DWARFUnit. This allows
+  // DWARF parsing to be faster as many DWARF DIEs have a fixed byte size.
+  Optional<size_t> getFixedAttributesByteSize(const DWARFUnit &U) const;
+
 private:
   void clear();
 
+  /// A helper structure that can quickly determine the size in bytes of an
+  /// abbreviation declaration.
+  struct FixedSizeInfo {
+    /// The fixed byte size for fixed size forms.
+    uint16_t NumBytes;
+    /// Number of DW_FORM_address forms in this abbrevation declaration.
+    uint8_t NumAddrs;
+    /// Number of DW_FORM_ref_addr forms in this abbrevation declaration.
+    uint8_t NumRefAddrs;
+    /// Number of 4 byte in DWARF32 and 8 byte in DWARF64 forms.
+    uint8_t NumDwarfOffsets;
+    /// Constructor
+    FixedSizeInfo()
+        : NumBytes(0), NumAddrs(0), NumRefAddrs(0), NumDwarfOffsets(0) {}
+    /// Calculate the fixed size in bytes given a DWARFUnit.
+    ///
+    /// \param U the DWARFUnit to use when determing the byte size.
+    /// \returns the size in bytes for all attribute data in this abbreviation.
+    /// The returned size does not include bytes for the  ULEB128 abbreviation
+    /// code
+    size_t getByteSize(const DWARFUnit &U) const;
+  };
+
   uint32_t Code;
-  uint32_t Tag;
+  dwarf::Tag Tag;
+  uint8_t CodeByteSize;
   bool HasChildren;
-
   AttributeSpecVector AttributeSpecs;
+  /// If this abbreviation has a fixed byte size then FixedAttributeSize member
+  /// variable below will have a value.
+  Optional<FixedSizeInfo> FixedAttributeSize;
 };
 
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
index 47dbf5fd4f56..63343728fa99 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
+#include "llvm/Support/Dwarf.h"
 #include <cstdint>
 
 namespace llvm {
@@ -30,7 +31,7 @@ class DWARFAcceleratorTable {
 
   struct HeaderData {
     typedef uint16_t AtomType;
-    typedef uint16_t Form;
+    typedef dwarf::Form Form;
     uint32_t DIEOffsetBase;
     SmallVector<std::pair<AtomType, Form>, 3> Atoms;
   };
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
index 741a31cb582b..ef310e704005 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFContext.h
@@ -10,18 +10,31 @@
 #ifndef LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H
 #define LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAranges.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugFrame.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLine.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugLoc.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugMacro.h"
-#include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
 #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
+#include "llvm/Object/ObjectFile.h"
+#include <cstdint>
+#include <deque>
+#include <map>
+#include <memory>
+#include <utility>
 
 namespace llvm {
 
@@ -30,17 +43,17 @@ namespace llvm {
 // dwarf where we expect relocated values. This adds a bit of complexity to the
 // dwarf parsing/extraction at the benefit of not allocating memory for the
 // entire size of the debug info sections.
-typedef DenseMap<uint64_t, std::pair<uint8_t, int64_t> > RelocAddrMap;
+typedef DenseMap<uint64_t, std::pair<uint8_t, int64_t>> RelocAddrMap;
 
 /// DWARFContext
 /// This data structure is the top level entity that deals with dwarf debug
 /// information parsing. The actual data is supplied through pure virtual
 /// methods that a concrete implementation provides.
 class DWARFContext : public DIContext {
-
   DWARFUnitSection<DWARFCompileUnit> CUs;
   std::deque<DWARFUnitSection<DWARFTypeUnit>> TUs;
   std::unique_ptr<DWARFUnitIndex> CUIndex;
+  std::unique_ptr<DWARFGdbIndex> GdbIndex;
   std::unique_ptr<DWARFUnitIndex> TUIndex;
   std::unique_ptr<DWARFDebugAbbrev> Abbrev;
   std::unique_ptr<DWARFDebugLoc> Loc;
@@ -55,9 +68,6 @@ class DWARFContext : public DIContext {
   std::unique_ptr<DWARFDebugAbbrev> AbbrevDWO;
   std::unique_ptr<DWARFDebugLocDWO> LocDWO;
 
-  DWARFContext(DWARFContext &) = delete;
-  DWARFContext &operator=(DWARFContext &) = delete;
-
   /// Read compile units from the debug_info section (if necessary)
   /// and store them in CUs.
   void parseCompileUnits();
@@ -76,13 +86,15 @@ class DWARFContext : public DIContext {
 
 public:
   DWARFContext() : DIContext(CK_DWARF) {}
+  DWARFContext(DWARFContext &) = delete;
+  DWARFContext &operator=(DWARFContext &) = delete;
 
   static bool classof(const DIContext *DICtx) {
     return DICtx->getKind() == CK_DWARF;
   }
 
   void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All,
-            bool DumpEH = false) override;
+            bool DumpEH = false, bool SummarizeTypes = false) override;
 
   typedef DWARFUnitSection<DWARFCompileUnit>::iterator_range cu_iterator_range;
   typedef DWARFUnitSection<DWARFTypeUnit>::iterator_range tu_iterator_range;
@@ -149,6 +161,7 @@ public:
   }
 
   const DWARFUnitIndex &getCUIndex();
+  DWARFGdbIndex &getGdbIndex();
   const DWARFUnitIndex &getTUIndex();
 
   /// Get a pointer to the parsed DebugAbbrev object.
@@ -220,11 +233,13 @@ public:
   virtual const DWARFSection& getAppleNamespacesSection() = 0;
   virtual const DWARFSection& getAppleObjCSection() = 0;
   virtual StringRef getCUIndexSection() = 0;
+  virtual StringRef getGdbIndexSection() = 0;
   virtual StringRef getTUIndexSection() = 0;
 
   static bool isSupportedVersion(unsigned version) {
     return version == 2 || version == 3 || version == 4 || version == 5;
   }
+
 private:
   /// Return the compile unit that includes an offset (relative to .debug_info).
   DWARFCompileUnit *getCompileUnitForOffset(uint32_t Offset);
@@ -239,6 +254,7 @@ private:
 /// pointers to it.
 class DWARFContextInMemory : public DWARFContext {
   virtual void anchor();
+
   bool IsLittleEndian;
   uint8_t AddressSize;
   DWARFSection InfoSection;
@@ -272,6 +288,7 @@ class DWARFContextInMemory : public DWARFContext {
   DWARFSection AppleNamespacesSection;
   DWARFSection AppleObjCSection;
   StringRef CUIndexSection;
+  StringRef GdbIndexSection;
   StringRef TUIndexSection;
 
   SmallVector<SmallString<32>, 4> UncompressedSections;
@@ -279,6 +296,7 @@ class DWARFContextInMemory : public DWARFContext {
 public:
   DWARFContextInMemory(const object::ObjectFile &Obj,
     const LoadedObjectInfo *L = nullptr);
+
   bool isLittleEndian() const override { return IsLittleEndian; }
   uint8_t getAddressSize() const override { return AddressSize; }
   const DWARFSection &getInfoSection() override { return InfoSection; }
@@ -318,9 +336,10 @@ public:
     return AddrSection;
   }
   StringRef getCUIndexSection() override { return CUIndexSection; }
+  StringRef getGdbIndexSection() override { return GdbIndexSection; }
   StringRef getTUIndexSection() override { return TUIndexSection; }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_DEBUGINFO_DWARFCONTEXT_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
index 67c4a2bb3e67..f732deef548c 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h
@@ -23,6 +23,9 @@ class DWARFAbbreviationDeclarationSet {
   uint32_t FirstAbbrCode;
   std::vector<DWARFAbbreviationDeclaration> Decls;
 
+  typedef std::vector<DWARFAbbreviationDeclaration>::const_iterator
+      const_iterator;
+
 public:
   DWARFAbbreviationDeclarationSet();
 
@@ -33,6 +36,14 @@ public:
   const DWARFAbbreviationDeclaration *
   getAbbreviationDeclaration(uint32_t AbbrCode) const;
 
+  const_iterator begin() const {
+    return Decls.begin();
+  }
+
+  const_iterator end() const {
+    return Decls.end();
+  }
+
 private:
   void clear();
 };
@@ -53,6 +64,14 @@ public:
   void dump(raw_ostream &OS) const;
   void extract(DataExtractor Data);
 
+  DWARFAbbreviationDeclarationSetMap::const_iterator begin() const {
+    return AbbrDeclSets.begin();
+  }
+
+  DWARFAbbreviationDeclarationSetMap::const_iterator end() const {
+    return AbbrDeclSets.end();
+  }
+
 private:
   void clear();
 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
index 837a8e63469e..5a602392add8 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h
@@ -59,6 +59,8 @@ public:
 
   uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; }
 
+  const Header &getHeader() const { return HeaderData; }
+
   desc_iterator_range descriptors() const {
     return desc_iterator_range(ArangeDescriptors.begin(),
                                ArangeDescriptors.end());
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
index f29d5fe9ecde..f36f470980b1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h
@@ -15,6 +15,7 @@
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Dwarf.h"
 
 namespace llvm {
 
@@ -25,134 +26,39 @@ class DWARFContext;
 class DWARFFormValue;
 struct DWARFDebugInfoEntryInlinedChain;
 
-/// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data.
-class DWARFDebugInfoEntryMinimal {
+/// DWARFDebugInfoEntry - A DIE with only the minimum required data.
+class DWARFDebugInfoEntry {
   /// Offset within the .debug_info of the start of this entry.
   uint32_t Offset;
 
-  /// How many to add to "this" to get the sibling.
-  uint32_t SiblingIdx;
+  /// The integer depth of this DIE within the compile unit DIEs where the
+  /// compile/type unit DIE has a depth of zero.
+  uint32_t Depth;
 
   const DWARFAbbreviationDeclaration *AbbrevDecl;
 public:
-  DWARFDebugInfoEntryMinimal()
-    : Offset(0), SiblingIdx(0), AbbrevDecl(nullptr) {}
-
-  void dump(raw_ostream &OS, DWARFUnit *u, unsigned recurseDepth,
-            unsigned indent = 0) const;
-  void dumpAttribute(raw_ostream &OS, DWARFUnit *u, uint32_t *offset_ptr,
-                     uint16_t attr, uint16_t form, unsigned indent = 0) const;
+  DWARFDebugInfoEntry()
+    : Offset(0), Depth(0), AbbrevDecl(nullptr) {}
 
   /// Extracts a debug info entry, which is a child of a given unit,
   /// starting at a given offset. If DIE can't be extracted, returns false and
   /// doesn't change OffsetPtr.
-  bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr);
-
-  uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; }
-  bool isNULL() const { return AbbrevDecl == nullptr; }
-
-  /// Returns true if DIE represents a subprogram (not inlined).
-  bool isSubprogramDIE() const;
-  /// Returns true if DIE represents a subprogram or an inlined
-  /// subroutine.
-  bool isSubroutineDIE() const;
+  bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr);
+  /// High performance extraction should use this call.
+  bool extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
+                   const DataExtractor &DebugInfoData,
+                   uint32_t UEndOffset,
+                   uint32_t Depth);
 
   uint32_t getOffset() const { return Offset; }
-  bool hasChildren() const { return !isNULL() && AbbrevDecl->hasChildren(); }
-
-  // We know we are kept in a vector of contiguous entries, so we know
-  // our sibling will be some index after "this".
-  const DWARFDebugInfoEntryMinimal *getSibling() const {
-    return SiblingIdx > 0 ? this + SiblingIdx : nullptr;
+  uint32_t getDepth() const { return Depth; }
+  dwarf::Tag getTag() const {
+    return AbbrevDecl ? AbbrevDecl->getTag() : dwarf::DW_TAG_null;
   }
-
-  // We know we are kept in a vector of contiguous entries, so we know
-  // we don't need to store our child pointer, if we have a child it will
-  // be the next entry in the list...
-  const DWARFDebugInfoEntryMinimal *getFirstChild() const {
-    return hasChildren() ? this + 1 : nullptr;
-  }
-
-  void setSibling(const DWARFDebugInfoEntryMinimal *Sibling) {
-    if (Sibling) {
-      // We know we are kept in a vector of contiguous entries, so we know
-      // our sibling will be some index after "this".
-      SiblingIdx = Sibling - this;
-    } else
-      SiblingIdx = 0;
-  }
-
+  bool hasChildren() const { return AbbrevDecl && AbbrevDecl->hasChildren(); }
   const DWARFAbbreviationDeclaration *getAbbreviationDeclarationPtr() const {
     return AbbrevDecl;
   }
-
-  bool getAttributeValue(const DWARFUnit *U, const uint16_t Attr,
-                         DWARFFormValue &FormValue) const;
-
-  const char *getAttributeValueAsString(const DWARFUnit *U, const uint16_t Attr,
-                                        const char *FailValue) const;
-
-  uint64_t getAttributeValueAsAddress(const DWARFUnit *U, const uint16_t Attr,
-                                      uint64_t FailValue) const;
-
-  uint64_t getAttributeValueAsUnsignedConstant(const DWARFUnit *U,
-                                               const uint16_t Attr,
-                                               uint64_t FailValue) const;
-
-  uint64_t getAttributeValueAsReference(const DWARFUnit *U, const uint16_t Attr,
-                                        uint64_t FailValue) const;
-
-  uint64_t getAttributeValueAsSectionOffset(const DWARFUnit *U,
-                                            const uint16_t Attr,
-                                            uint64_t FailValue) const;
-
-  uint64_t getRangesBaseAttribute(const DWARFUnit *U, uint64_t FailValue) const;
-
-  /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU.
-  /// Returns true if both attributes are present.
-  bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC,
-                       uint64_t &HighPC) const;
-
-  DWARFAddressRangesVector getAddressRanges(const DWARFUnit *U) const;
-
-  void collectChildrenAddressRanges(const DWARFUnit *U,
-                                    DWARFAddressRangesVector &Ranges) const;
-
-  bool addressRangeContainsAddress(const DWARFUnit *U,
-                                   const uint64_t Address) const;
-
-  /// If a DIE represents a subprogram (or inlined subroutine),
-  /// returns its mangled name (or short name, if mangled is missing).
-  /// This name may be fetched from specification or abstract origin
-  /// for this subprogram. Returns null if no name is found.
-  const char *getSubroutineName(const DWARFUnit *U, DINameKind Kind) const;
-
-  /// Return the DIE name resolving DW_AT_sepcification or
-  /// DW_AT_abstract_origin references if necessary.
-  /// Returns null if no name is found.
-  const char *getName(const DWARFUnit *U, DINameKind Kind) const;
-
-  /// Retrieves values of DW_AT_call_file, DW_AT_call_line and
-  /// DW_AT_call_column from DIE (or zeroes if they are missing).
-  void getCallerFrame(const DWARFUnit *U, uint32_t &CallFile,
-                      uint32_t &CallLine, uint32_t &CallColumn) const;
-
-  /// Get inlined chain for a given address, rooted at the current DIE.
-  /// Returns empty chain if address is not contained in address range
-  /// of current DIE.
-  DWARFDebugInfoEntryInlinedChain
-  getInlinedChainForAddress(const DWARFUnit *U, const uint64_t Address) const;
-};
-
-/// DWARFDebugInfoEntryInlinedChain - represents a chain of inlined_subroutine
-/// DIEs, (possibly ending with subprogram DIE), all of which are contained
-/// in some concrete inlined instance tree. Address range for each DIE
-/// (except the last DIE) in this chain is contained in address
-/// range for next DIE in the chain.
-struct DWARFDebugInfoEntryInlinedChain {
-  DWARFDebugInfoEntryInlinedChain() : U(nullptr) {}
-  SmallVector<DWARFDebugInfoEntryMinimal, 4> DIEs;
-  const DWARFUnit *U;
 };
 
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
index 760950b726b3..ca9a6c822876 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugLine.h
@@ -188,6 +188,8 @@ public:
     bool lookupAddressRange(uint64_t address, uint64_t size,
                             std::vector<uint32_t> &result) const;
 
+    bool hasFileAtIndex(uint64_t FileIndex) const;
+
     // Extracts filename by its index in filename table in prologue.
     // Returns true on success.
     bool getFileNameByIndex(uint64_t FileIndex, const char *CompDir,
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
index d13c7f553ba3..5a0352dacdb9 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugMacro.h
@@ -12,7 +12,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/DataExtractor.h"
-#include "llvm/Support/Dwarf.h"
+#include <cstdint>
 
 namespace llvm {
 
@@ -46,13 +46,14 @@ class DWARFDebugMacro {
   MacroList Macros;
 
 public:
-  DWARFDebugMacro() {}
+  DWARFDebugMacro() = default;
+
   /// Print the macro list found within the debug_macinfo section.
   void dump(raw_ostream &OS) const;
   /// Parse the debug_macinfo section accessible via the 'data' parameter.
   void parse(DataExtractor data);
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_DEBUGINFO_DWARF_DWARFDEBUGMACRO_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
new file mode 100644
index 000000000000..2b23837e32d6
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDebugPubTable.h
@@ -0,0 +1,77 @@
+//===-- DWARFDebugPubTable.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDEBUGPUBTABLE_H
+#define LLVM_LIB_DEBUGINFO_DWARFDEBUGPUBTABLE_H
+
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Dwarf.h"
+#include <vector>
+
+namespace llvm {
+
+class raw_ostream;
+
+/// Represents structure for holding and parsing .debug_pub* tables.
+class DWARFDebugPubTable {
+public:
+  struct Entry {
+    /// Section offset from the beginning of the compilation unit.
+    uint32_t SecOffset;
+
+    /// An entry of the various gnu_pub* debug sections.
+    llvm::dwarf::PubIndexEntryDescriptor Descriptor;
+
+    /// The name of the object as given by the DW_AT_name attribute of the
+    /// referenced DIE.
+    const char *Name;
+  };
+
+  /// Each table consists of sets of variable length entries. Each set describes
+  /// the names of global objects and functions, or global types, respectively,
+  /// whose definitions are represented by debugging information entries owned
+  /// by a single compilation unit.
+  struct Set {
+    /// The total length of the entries for that set, not including the length
+    /// field itself.
+    uint32_t Length;
+
+    /// This number is specific to the name lookup table and is independent of
+    /// the DWARF version number.
+    uint16_t Version;
+
+    /// The offset from the beginning of the .debug_info section of the
+    /// compilation unit header referenced by the set.
+    uint32_t Offset;
+
+    /// The size in bytes of the contents of the .debug_info section generated
+    /// to represent that compilation unit.
+    uint32_t Size;
+
+    std::vector<Entry> Entries;
+  };
+
+private:
+  std::vector<Set> Sets;
+
+  /// gnu styled tables contains additional information.
+  /// This flag determines whether or not section we parse is debug_gnu* table.
+  bool GnuStyle;
+
+public:
+  DWARFDebugPubTable(StringRef Data, bool LittleEndian, bool GnuStyle);
+  void dump(StringRef Name, raw_ostream &OS) const;
+
+  ArrayRef<Set> getData() { return Sets; }
+};
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
new file mode 100644
index 000000000000..f33758de6a55
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFDie.h
@@ -0,0 +1,369 @@
+//===-- DWARFDie.h --------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_DWARFDIE_H
+#define LLVM_LIB_DEBUGINFO_DWARFDIE_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+
+namespace llvm {
+    
+class DWARFUnit;
+class DWARFDebugInfoEntry;
+class raw_ostream;
+  
+//===----------------------------------------------------------------------===//
+/// Utility class that carries the DWARF compile/type unit and the debug info
+/// entry in an object.
+///
+/// When accessing information from a debug info entry we always need to DWARF
+/// compile/type unit in order to extract the info correctly as some information
+/// is relative to the compile/type unit. Prior to this class the DWARFUnit and
+/// the DWARFDebugInfoEntry was passed around separately and there was the
+/// possibility for error if the wrong DWARFUnit was used to extract a unit
+/// relative offset. This class helps to ensure that this doesn't happen and
+/// also simplifies the attribute extraction calls by not having to specify the
+/// DWARFUnit for each call.
+class DWARFDie {
+  DWARFUnit *U;
+  const DWARFDebugInfoEntry *Die;
+public:
+  DWARFDie() : U(nullptr), Die(nullptr) {}
+  DWARFDie(DWARFUnit *Unit, const DWARFDebugInfoEntry * D) : U(Unit), Die(D) {}
+  
+  bool isValid() const { return U && Die; }
+  explicit operator bool() const { return isValid(); }
+  bool operator ==(const DWARFDie &RHS) const {
+    return Die == RHS.Die && U == RHS.U;
+  }
+  const DWARFDebugInfoEntry *getDebugInfoEntry() const { return Die; }
+  DWARFUnit *getDwarfUnit() const { return U; }
+
+
+  /// Get the abbreviation declaration for this DIE.
+  ///
+  /// \returns the abbreviation declaration or NULL for null tags.
+  const DWARFAbbreviationDeclaration *getAbbreviationDeclarationPtr() const {
+    assert(isValid() && "must check validity prior to calling");
+    return Die->getAbbreviationDeclarationPtr();
+  }
+
+  /// Get the absolute offset into the debug info or types section.
+  ///
+  /// \returns the DIE offset or -1U if invalid.
+  uint32_t getOffset() const {
+    assert(isValid() && "must check validity prior to calling");
+    return Die->getOffset();
+  }
+  
+  dwarf::Tag getTag() const {
+    auto AbbrevDecl = getAbbreviationDeclarationPtr();
+    if (AbbrevDecl)
+      return AbbrevDecl->getTag();
+    return dwarf::DW_TAG_null;
+  }
+
+  bool hasChildren() const {
+    assert(isValid() && "must check validity prior to calling");
+    return Die->hasChildren();
+  }
+  
+  /// Returns true for a valid DIE that terminates a sibling chain.
+  bool isNULL() const {
+    return getAbbreviationDeclarationPtr() == nullptr;
+  }
+  /// Returns true if DIE represents a subprogram (not inlined).
+  bool isSubprogramDIE() const;
+
+  /// Returns true if DIE represents a subprogram or an inlined subroutine.
+  bool isSubroutineDIE() const;
+
+  /// Get the parent of this DIE object.
+  ///
+  /// \returns a valid DWARFDie instance if this object has a parent or an
+  /// invalid DWARFDie instance if it doesn't.
+  DWARFDie getParent() const;
+  
+  /// Get the sibling of this DIE object.
+  ///
+  /// \returns a valid DWARFDie instance if this object has a sibling or an
+  /// invalid DWARFDie instance if it doesn't.
+  DWARFDie getSibling() const;
+  
+  /// Get the first child of this DIE object.
+  ///
+  /// \returns a valid DWARFDie instance if this object has children or an
+  /// invalid DWARFDie instance if it doesn't.
+  DWARFDie getFirstChild() const {
+    if (isValid() && Die->hasChildren())
+      return DWARFDie(U, Die + 1);
+    return DWARFDie();
+  }
+  
+  /// Dump the DIE and all of its attributes to the supplied stream.
+  ///
+  /// \param OS the stream to use for output.
+  /// \param recurseDepth the depth to recurse to when dumping this DIE and its
+  /// children.
+  /// \param indent the number of characters to indent each line that is output.
+  void dump(raw_ostream &OS, unsigned recurseDepth, unsigned indent = 0) const;
+  
+  /// Extract the specified attribute from this DIE.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns an optional DWARFFormValue that will have the form value if the
+  /// attribute was successfully extracted.
+  Optional<DWARFFormValue> getAttributeValue(dwarf::Attribute Attr) const;
+  
+  /// Extract the specified attribute from this DIE as a C string.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \param FailValue the value to return if this DIE doesn't have this
+  /// attribute.
+  /// \returns the NULL terminated C string value owned by the DWARF section
+  /// that contains the string or FailValue if the attribute doesn't exist or
+  /// if the attribute's form isn't a form that describes an string.
+  const char *getAttributeValueAsString(dwarf::Attribute Attr,
+                                        const char *FailValue) const;
+  
+  /// Extract the specified attribute from this DIE as an address.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \param FailValue the value to return if this DIE doesn't have this
+  /// attribute.
+  /// \returns the address value of the attribute or FailValue if the
+  /// attribute doesn't exist or if the attribute's form isn't a form that
+  /// describes an address.
+  uint64_t getAttributeValueAsAddress(dwarf::Attribute Attr,
+                                      uint64_t FailValue) const;
+
+  /// Extract the specified attribute from this DIE as an address.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns an optional value for the attribute.
+  Optional<uint64_t> getAttributeValueAsAddress(dwarf::Attribute Attr) const;
+  
+  /// Extract the specified attribute from this DIE as a signed integer.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \param FailValue the value to return if this DIE doesn't have this
+  /// attribute.
+  /// \returns the signed integer constant value of the attribute or FailValue
+  /// if the attribute doesn't exist or if the attribute's form isn't a form
+  /// that describes a signed integer.
+  int64_t getAttributeValueAsSignedConstant(dwarf::Attribute Attr,
+                                            int64_t FailValue) const;
+
+  /// Extract the specified attribute from this DIE as a signed integer.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns an optional value for the attribute.
+  Optional<int64_t>
+  getAttributeValueAsSignedConstant(dwarf::Attribute Attr) const;
+  
+  /// Extract the specified attribute from this DIE as an unsigned integer.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \param FailValue the value to return if this DIE doesn't have this
+  /// attribute.
+  /// \returns the unsigned integer constant value of the attribute or FailValue
+  /// if the attribute doesn't exist or if the attribute's form isn't a form
+  /// that describes an unsigned integer.
+  uint64_t getAttributeValueAsUnsignedConstant(dwarf::Attribute Attr,
+                                               uint64_t FailValue) const;
+  
+  /// Extract the specified attribute from this DIE as an unsigned integer.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns an optional value for the attribute.
+  Optional<uint64_t>
+  getAttributeValueAsUnsignedConstant(dwarf::Attribute Attr) const;
+
+  /// Extract the specified attribute from this DIE as absolute DIE Offset.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \param FailValue the value to return if this DIE doesn't have this
+  /// attribute.
+  /// \returns the unsigned integer constant value of the attribute or FailValue
+  /// if the attribute doesn't exist or if the attribute's form isn't a form
+  /// that describes a reference.
+  uint64_t getAttributeValueAsReference(dwarf::Attribute Attr,
+                                        uint64_t FailValue) const;
+  
+  /// Extract the specified attribute from this DIE as absolute DIE Offset.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns an optional value for the attribute.
+  Optional<uint64_t> getAttributeValueAsReference(dwarf::Attribute Attr) const;
+  
+  /// Extract the specified attribute from this DIE as absolute section offset.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \param FailValue the value to return if this DIE doesn't have this
+  /// attribute.
+  /// \returns the unsigned integer constant value of the attribute or FailValue
+  /// if the attribute doesn't exist or if the attribute's form isn't a form
+  /// that describes a section offset.
+  uint64_t getAttributeValueAsSectionOffset(dwarf::Attribute Attr,
+                                            uint64_t FailValue) const;
+  /// Extract the specified attribute from this DIE as absolute section offset.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns an optional value for the attribute.
+  Optional<uint64_t>
+  getAttributeValueAsSectionOffset(dwarf::Attribute Attr) const;
+  
+  /// Extract the specified attribute from this DIE as the referenced DIE.
+  ///
+  /// Regardless of the reference type, return the correct DWARFDie instance if
+  /// the attribute exists. The returned DWARFDie object might be from another
+  /// DWARFUnit, but that is all encapsulated in the new DWARFDie object.
+  ///
+  /// Extract an attribute value from this DIE only. This call doesn't look
+  /// for the attribute value in any DW_AT_specification or
+  /// DW_AT_abstract_origin referenced DIEs.
+  ///
+  /// \param Attr the attribute to extract.
+  /// \returns a valid DWARFDie instance if the attribute exists, or an invalid
+  /// DWARFDie object if it doesn't.
+  DWARFDie getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const;
+
+  /// Extract the range base attribute from this DIE as absolute section offset.
+  ///
+  /// This is a utility function that checks for either the DW_AT_rnglists_base
+  /// or DW_AT_GNU_ranges_base attribute.
+  ///
+  /// \returns anm optional absolute section offset value for the attribute.
+  Optional<uint64_t> getRangesBaseAttribute() const;
+  
+  /// Get the DW_AT_high_pc attribute value as an address.
+  ///
+  /// In DWARF version 4 and later the high PC can be encoded as an offset from
+  /// the DW_AT_low_pc. This function takes care of extracting the value as an
+  /// address or offset and adds it to the low PC if needed and returns the
+  /// value as an optional in case the DIE doesn't have a DW_AT_high_pc
+  /// attribute.
+  ///
+  /// \param LowPC the low PC that might be needed to calculate the high PC.
+  /// \returns an optional address value for the attribute.
+  Optional<uint64_t> getHighPC(uint64_t LowPC) const;
+
+  /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU.
+  /// Returns true if both attributes are present.
+  bool getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const;
+  
+  /// Get the address ranges for this DIE.
+  ///
+  /// Get the hi/low PC range if both attributes are available or exrtracts the
+  /// non-contiguous address ranges from the DW_AT_ranges attribute.
+  ///
+  /// Extracts the range information from this DIE only. This call doesn't look
+  /// for the range in any DW_AT_specification or DW_AT_abstract_origin DIEs.
+  ///
+  /// \returns a address range vector that might be empty if no address range
+  /// information is available.
+  DWARFAddressRangesVector getAddressRanges() const;
+  
+  /// Get all address ranges for any DW_TAG_subprogram DIEs in this DIE or any
+  /// of its children.
+  ///
+  /// Get the hi/low PC range if both attributes are available or exrtracts the
+  /// non-contiguous address ranges from the DW_AT_ranges attribute for this DIE
+  /// and all children.
+  ///
+  /// \param Ranges the addres range vector to fill in.
+  void collectChildrenAddressRanges(DWARFAddressRangesVector &Ranges) const;
+  
+  bool addressRangeContainsAddress(const uint64_t Address) const;
+  
+  /// If a DIE represents a subprogram (or inlined subroutine), returns its
+  /// mangled name (or short name, if mangled is missing). This name may be
+  /// fetched from specification or abstract origin for this subprogram.
+  /// Returns null if no name is found.
+  const char *getSubroutineName(DINameKind Kind) const;
+  
+  /// Return the DIE name resolving DW_AT_sepcification or DW_AT_abstract_origin
+  /// references if necessary. Returns null if no name is found.
+  const char *getName(DINameKind Kind) const;
+  
+  /// Retrieves values of DW_AT_call_file, DW_AT_call_line and DW_AT_call_column
+  /// from DIE (or zeroes if they are missing). This function looks for
+  /// DW_AT_call attributes in this DIE only, it will not resolve the attribute
+  /// values in any DW_AT_specification or DW_AT_abstract_origin DIEs.
+  /// \param CallFile filled in with non-zero if successful, zero if there is no
+  /// DW_AT_call_file attribute in this DIE.
+  /// \param CallLine filled in with non-zero if successful, zero if there is no
+  /// DW_AT_call_line attribute in this DIE.
+  /// \param CallColumn filled in with non-zero if successful, zero if there is
+  /// no DW_AT_call_column attribute in this DIE.
+  void getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
+                      uint32_t &CallColumn) const;
+  
+  /// Get inlined chain for a given address, rooted at the current DIE.
+  /// Returns empty chain if address is not contained in address range
+  /// of current DIE.
+  void
+  getInlinedChainForAddress(const uint64_t Address,
+                            SmallVectorImpl<DWARFDie> &InlinedChain) const;
+
+};
+
+
+} // end namespace llvm
+
+#endif  // LLVM_LIB_DEBUGINFO_DWARFDIE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
index b2f750dd7945..920880cea10c 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFFormValue.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Dwarf.h"
 
 namespace llvm {
 
@@ -48,15 +49,17 @@ private:
     const uint8_t* data;
   };
 
-  uint16_t Form;   // Form for this value.
+  dwarf::Form Form; // Form for this value.
   ValueType Value; // Contains all data for the form.
+  const DWARFUnit *U; // Remember the DWARFUnit at extract time.
 
 public:
-  DWARFFormValue(uint16_t Form = 0) : Form(Form) {}
-  uint16_t getForm() const { return Form; }
+  DWARFFormValue(dwarf::Form F = dwarf::Form(0)) : Form(F), U(nullptr) {}
+  dwarf::Form getForm() const { return Form; }
+  void setForm(dwarf::Form F) { Form = F; }
   bool isFormClass(FormClass FC) const;
-
-  void dump(raw_ostream &OS, const DWARFUnit *U) const;
+  const DWARFUnit *getUnit() const { return U; }
+  void dump(raw_ostream &OS) const;
 
   /// \brief extracts a value in data at offset *offset_ptr.
   ///
@@ -64,34 +67,98 @@ public:
   /// case no relocation processing will be performed and some
   /// kind of forms that depend on Unit information are disallowed.
   /// \returns whether the extraction succeeded.
-  bool extractValue(DataExtractor data, uint32_t *offset_ptr,
-                    const DWARFUnit *u);
+  bool extractValue(const DataExtractor &Data, uint32_t *OffsetPtr,
+                    const DWARFUnit *U);
   bool isInlinedCStr() const {
     return Value.data != nullptr && Value.data == (const uint8_t*)Value.cstr;
   }
 
   /// getAsFoo functions below return the extracted value as Foo if only
   /// DWARFFormValue has form class is suitable for representing Foo.
-  Optional<uint64_t> getAsReference(const DWARFUnit *U) const;
+  Optional<uint64_t> getAsReference() const;
   Optional<uint64_t> getAsUnsignedConstant() const;
   Optional<int64_t> getAsSignedConstant() const;
-  Optional<const char *> getAsCString(const DWARFUnit *U) const;
-  Optional<uint64_t> getAsAddress(const DWARFUnit *U) const;
+  Optional<const char *> getAsCString() const;
+  Optional<uint64_t> getAsAddress() const;
   Optional<uint64_t> getAsSectionOffset() const;
   Optional<ArrayRef<uint8_t>> getAsBlock() const;
-
+  Optional<uint64_t> getAsCStringOffset() const;
+  Optional<uint64_t> getAsReferenceUVal() const;
+  /// Get the fixed byte size for a given form.
+  ///
+  /// If the form always has a fixed valid byte size that doesn't depend on a
+  /// DWARFUnit, then an Optional with a value will be returned. If the form
+  /// can vary in size depending on the DWARFUnit (DWARF version, address byte
+  /// size, or DWARF 32/64) and the DWARFUnit is valid, then an Optional with a
+  /// valid value is returned. If the form is always encoded using a variable
+  /// length storage format (ULEB or SLEB numbers or blocks) or the size
+  /// depends on a DWARFUnit and the DWARFUnit is NULL, then None will be
+  /// returned.
+  /// \param Form The DWARF form to get the fixed byte size for
+  /// \param U The DWARFUnit that can be used to help determine the byte size.
+  ///
+  /// \returns Optional<uint8_t> value with the fixed byte size or None if
+  /// \p Form doesn't have a fixed byte size or a DWARFUnit wasn't supplied
+  /// and was needed to calculate the byte size.
+  static Optional<uint8_t> getFixedByteSize(dwarf::Form Form,
+                                            const DWARFUnit *U = nullptr);
+  /// Get the fixed byte size for a given form.
+  ///
+  /// If the form has a fixed byte size given a valid DWARF version and address
+  /// byte size, then an Optional with a valid value is returned. If the form
+  /// is always encoded using a variable length storage format (ULEB or SLEB
+  /// numbers or blocks) then None will be returned.
+  ///
+  /// \param Form DWARF form to get the fixed byte size for
+  /// \param Version DWARF version number.
+  /// \param AddrSize size of an address in bytes.
+  /// \param Format enum value from llvm::dwarf::DwarfFormat.
+  /// \returns Optional<uint8_t> value with the fixed byte size or None if
+  /// \p Form doesn't have a fixed byte size.
+  static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, uint16_t Version,
+                                            uint8_t AddrSize,
+                                            llvm::dwarf::DwarfFormat Format);
+
+  /// Skip a form in \p debug_info_data at offset specified by \p offset_ptr.
+  ///
+  /// Skips the bytes for this form in the debug info and updates the offset.
+  ///
+  /// \param debug_info_data the .debug_info data to use to skip the value.
+  /// \param offset_ptr a reference to the offset that will be updated.
+  /// \param U the DWARFUnit to use when skipping the form in case the form
+  /// size differs according to data in the DWARFUnit.
+  /// \returns true on success, false if the form was not skipped.
   bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr,
-                 const DWARFUnit *u) const;
-  static bool skipValue(uint16_t form, DataExtractor debug_info_data,
-                        uint32_t *offset_ptr, const DWARFUnit *u);
-  static bool skipValue(uint16_t form, DataExtractor debug_info_data,
+                 const DWARFUnit *U) const;
+  /// Skip a form in \p debug_info_data at offset specified by \p offset_ptr.
+  ///
+  /// Skips the bytes for this form in the debug info and updates the offset.
+  ///
+  /// \param form the DW_FORM enumeration that indicates the form to skip.
+  /// \param debug_info_data the .debug_info data to use to skip the value.
+  /// \param offset_ptr a reference to the offset that will be updated.
+  /// \param U the DWARFUnit to use when skipping the form in case the form
+  /// size differs according to data in the DWARFUnit.
+  /// \returns true on success, false if the form was not skipped.
+  static bool skipValue(dwarf::Form form, DataExtractor debug_info_data,
+                        uint32_t *offset_ptr, const DWARFUnit *U);
+  /// Skip a form in \p debug_info_data at offset specified by \p offset_ptr.
+  ///
+  /// Skips the bytes for this form in the debug info and updates the offset.
+  ///
+  /// \param form the DW_FORM enumeration that indicates the form to skip.
+  /// \param debug_info_data the .debug_info data to use to skip the value.
+  /// \param offset_ptr a reference to the offset that will be updated.
+  /// \param Version DWARF version number.
+  /// \param AddrSize size of an address in bytes.
+  /// \param Format enum value from llvm::dwarf::DwarfFormat.
+  /// \returns true on success, false if the form was not skipped.
+  static bool skipValue(dwarf::Form form, DataExtractor debug_info_data,
                         uint32_t *offset_ptr, uint16_t Version,
-                        uint8_t AddrSize);
+                        uint8_t AddrSize, llvm::dwarf::DwarfFormat Format);
 
-  static ArrayRef<uint8_t> getFixedFormSizes(uint8_t AddrSize,
-                                             uint16_t Version);
 private:
-  void dumpString(raw_ostream &OS, const DWARFUnit *U) const;
+  void dumpString(raw_ostream &OS) const;
 };
 
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
new file mode 100644
index 000000000000..66041be96566
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFGdbIndex.h
@@ -0,0 +1,68 @@
+//===-- DWARFGdbIndex.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_DWARFGDBINDEX_H
+#define LLVM_LIB_DEBUGINFO_DWARFGDBINDEX_H
+
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+class DWARFGdbIndex {
+  uint32_t Version;
+
+  uint32_t CuListOffset;
+  uint32_t AddressAreaOffset;
+  uint32_t SymbolTableOffset;
+  uint32_t ConstantPoolOffset;
+
+  struct CompUnitEntry {
+    uint64_t Offset; // Offset of a CU in the .debug_info section.
+    uint64_t Length; // Length of that CU.
+  };
+  SmallVector<CompUnitEntry, 0> CuList;
+
+  struct AddressEntry {
+    uint64_t LowAddress;  // The low address.
+    uint64_t HighAddress; // The high address.
+    uint32_t CuIndex;     // The CU index.
+  };
+  SmallVector<AddressEntry, 0> AddressArea;
+
+  struct SymTableEntry {
+    uint32_t NameOffset; // Offset of the symbol's name in the constant pool.
+    uint32_t VecOffset;  // Offset of the CU vector in the constant pool.
+  };
+  SmallVector<SymTableEntry, 0> SymbolTable;
+
+  // Each value is CU index + attributes.
+  SmallVector<std::pair<uint32_t, SmallVector<uint32_t, 0>>, 0>
+      ConstantPoolVectors;
+
+  StringRef ConstantPoolStrings;
+  uint32_t StringPoolOffset;
+
+  void dumpCUList(raw_ostream &OS) const;
+  void dumpAddressArea(raw_ostream &OS) const;
+  void dumpSymbolTable(raw_ostream &OS) const;
+  void dumpConstantPool(raw_ostream &OS) const;
+
+  bool parseImpl(DataExtractor Data);
+
+public:
+  void dump(raw_ostream &OS);
+  void parse(DataExtractor Data);
+
+  bool HasContent = false;
+  bool HasError = false;
+};
+}
+
+#endif // LLVM_LIB_DEBUGINFO_DWARFGDBINDEX_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
index a697edd32072..4f1e1292a1f1 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFTypeUnit.h
@@ -29,7 +29,7 @@ public:
   uint32_t getHeaderSize() const override {
     return DWARFUnit::getHeaderSize() + 12;
   }
-  void dump(raw_ostream &OS);
+  void dump(raw_ostream &OS, bool Brief = false);
   static const DWARFSectionKind Section = DW_SECT_TYPES;
 
 protected:
diff --git a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
index 9c3fe3be6aa6..db7b59be90c2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
+++ b/contrib/llvm/include/llvm/DebugInfo/DWARF/DWARFUnit.h
@@ -10,10 +10,13 @@
 #ifndef LLVM_LIB_DEBUGINFO_DWARFUNIT_H
 #define LLVM_LIB_DEBUGINFO_DWARFUNIT_H
 
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugRangeList.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/DebugInfo/DWARF/DWARFRelocMap.h"
 #include "llvm/DebugInfo/DWARF/DWARFSection.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
@@ -124,7 +127,9 @@ class DWARFUnit {
   uint8_t AddrSize;
   uint64_t BaseAddr;
   // The compile unit debug information entry items.
-  std::vector<DWARFDebugInfoEntryMinimal> DieArray;
+  std::vector<DWARFDebugInfoEntry> DieArray;
+  typedef iterator_range<std::vector<DWARFDebugInfoEntry>::iterator>
+      die_iterator_range;
 
   class DWOHolder {
     object::OwningBinary<object::ObjectFile> DWOFile;
@@ -138,6 +143,12 @@ class DWARFUnit {
 
   const DWARFUnitIndex::Entry *IndexEntry;
 
+  uint32_t getDIEIndex(const DWARFDebugInfoEntry *Die) {
+    auto First = DieArray.data();
+    assert(Die >= First && Die < First + DieArray.size());
+    return Die - First;
+  }
+
 protected:
   virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr);
   /// Size in bytes of the unit header.
@@ -191,30 +202,46 @@ public:
   uint32_t getNextUnitOffset() const { return Offset + Length + 4; }
   uint32_t getLength() const { return Length; }
   uint16_t getVersion() const { return Version; }
+  dwarf::DwarfFormat getFormat() const {
+    return dwarf::DwarfFormat::DWARF32; // FIXME: Support DWARF64.
+  }
   const DWARFAbbreviationDeclarationSet *getAbbreviations() const {
     return Abbrevs;
   }
   uint8_t getAddressByteSize() const { return AddrSize; }
+  uint8_t getRefAddrByteSize() const {
+    if (Version == 2)
+      return AddrSize;
+    return getDwarfOffsetByteSize();
+  }
+  uint8_t getDwarfOffsetByteSize() const {
+    if (getFormat() == dwarf::DwarfFormat::DWARF64)
+      return 8;
+    return 4;
+  }
   uint64_t getBaseAddress() const { return BaseAddr; }
 
   void setBaseAddress(uint64_t base_addr) {
     BaseAddr = base_addr;
   }
 
-  const DWARFDebugInfoEntryMinimal *getUnitDIE(bool ExtractUnitDIEOnly = true) {
+  DWARFDie getUnitDIE(bool ExtractUnitDIEOnly = true) {
     extractDIEsIfNeeded(ExtractUnitDIEOnly);
-    return DieArray.empty() ? nullptr : &DieArray[0];
+    if (DieArray.empty())
+      return DWARFDie();
+    return DWARFDie(this, &DieArray[0]);
   }
 
   const char *getCompilationDir();
-  uint64_t getDWOId();
+  Optional<uint64_t> getDWOId();
 
   void collectAddressRanges(DWARFAddressRangesVector &CURanges);
 
   /// getInlinedChainForAddress - fetches inlined chain for a given address.
   /// Returns empty chain if there is no subprogram containing address. The
   /// chain is valid as long as parsed compile unit DIEs are not cleared.
-  DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address);
+  void getInlinedChainForAddress(uint64_t Address,
+                                 SmallVectorImpl<DWARFDie> &InlinedChain);
 
   /// getUnitSection - Return the DWARFUnitSection containing this unit.
   const DWARFUnitSectionBase &getUnitSection() const { return UnitSection; }
@@ -232,30 +259,34 @@ public:
   /// created by this unit. In other word, it's illegal to call this
   /// method on a DIE that isn't accessible by following
   /// children/sibling links starting from this unit's getUnitDIE().
-  uint32_t getDIEIndex(const DWARFDebugInfoEntryMinimal *DIE) {
-    assert(!DieArray.empty() && DIE >= &DieArray[0] &&
-           DIE < &DieArray[0] + DieArray.size());
-    return DIE - &DieArray[0];
+  uint32_t getDIEIndex(const DWARFDie &D) {
+    return getDIEIndex(D.getDebugInfoEntry());
   }
 
   /// \brief Return the DIE object at the given index.
-  const DWARFDebugInfoEntryMinimal *getDIEAtIndex(unsigned Index) const {
+  DWARFDie getDIEAtIndex(unsigned Index) {
     assert(Index < DieArray.size());
-    return &DieArray[Index];
+    return DWARFDie(this, &DieArray[Index]);
   }
 
+  DWARFDie getParent(const DWARFDebugInfoEntry *Die);
+  DWARFDie getSibling(const DWARFDebugInfoEntry *Die);
+
   /// \brief Return the DIE object for a given offset inside the
   /// unit's DIE vector.
   ///
   /// The unit needs to have its DIEs extracted for this method to work.
-  const DWARFDebugInfoEntryMinimal *getDIEForOffset(uint32_t Offset) const {
+  DWARFDie getDIEForOffset(uint32_t Offset) {
+    extractDIEsIfNeeded(false);
     assert(!DieArray.empty());
     auto it = std::lower_bound(
         DieArray.begin(), DieArray.end(), Offset,
-        [](const DWARFDebugInfoEntryMinimal &LHS, uint32_t Offset) {
+        [](const DWARFDebugInfoEntry &LHS, uint32_t Offset) {
           return LHS.getOffset() < Offset;
         });
-    return it == DieArray.end() ? nullptr : &*it;
+    if (it == DieArray.end())
+      return DWARFDie();
+    return DWARFDie(this, &*it);
   }
 
   uint32_t getLineTableOffset() const {
@@ -265,6 +296,11 @@ public:
     return 0;
   }
 
+  die_iterator_range dies() {
+    extractDIEsIfNeeded(false);
+    return die_iterator_range(DieArray.begin(), DieArray.end());
+  }
+
 private:
   /// Size in bytes of the .debug_info data associated with this compile unit.
   size_t getDebugInfoSize() const { return Length + 4 - getHeaderSize(); }
@@ -274,11 +310,7 @@ private:
   size_t extractDIEsIfNeeded(bool CUDieOnly);
   /// extractDIEsToVector - Appends all parsed DIEs to a vector.
   void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs,
-                           std::vector<DWARFDebugInfoEntryMinimal> &DIEs) const;
-  /// setDIERelations - We read in all of the DIE entries into our flat list
-  /// of DIE entries and now we need to go back through all of them and set the
-  /// parent, sibling and child pointers for quick DIE navigation.
-  void setDIERelations();
+                           std::vector<DWARFDebugInfoEntry> &DIEs) const;
   /// clearDIEs - Clear parsed DIEs to keep memory usage low.
   void clearDIEs(bool KeepCUDie);
 
@@ -289,7 +321,7 @@ private:
   /// getSubprogramForAddress - Returns subprogram DIE with address range
   /// encompassing the provided address. The pointer is alive as long as parsed
   /// compile unit DIEs are not cleared.
-  const DWARFDebugInfoEntryMinimal *getSubprogramForAddress(uint64_t Address);
+  DWARFDie getSubprogramForAddress(uint64_t Address);
 };
 
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/ByteStream.h b/contrib/llvm/include/llvm/DebugInfo/MSF/ByteStream.h
new file mode 100644
index 000000000000..547844be5e5d
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/ByteStream.h
@@ -0,0 +1,169 @@
+//===- ByteStream.h - Reads stream data from a byte sequence ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_MSF_BYTESTREAM_H
+#define LLVM_DEBUGINFO_MSF_BYTESTREAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileOutputBuffer.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <cstdint>
+#include <cstring>
+#include <memory>
+
+namespace llvm {
+namespace msf {
+
+class ByteStream : public ReadableStream {
+public:
+  ByteStream() = default;
+  explicit ByteStream(ArrayRef<uint8_t> Data) : Data(Data) {}
+  explicit ByteStream(StringRef Data)
+      : Data(Data.bytes_begin(), Data.bytes_end()) {}
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const override {
+    if (Offset > Data.size())
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    if (Data.size() < Size + Offset)
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    Buffer = Data.slice(Offset, Size);
+    return Error::success();
+  }
+
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const override {
+    if (Offset >= Data.size())
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    Buffer = Data.slice(Offset);
+    return Error::success();
+  }
+
+  uint32_t getLength() const override { return Data.size(); }
+
+  ArrayRef<uint8_t> data() const { return Data; }
+
+  StringRef str() const {
+    const char *CharData = reinterpret_cast<const char *>(Data.data());
+    return StringRef(CharData, Data.size());
+  }
+
+protected:
+  ArrayRef<uint8_t> Data;
+};
+
+// MemoryBufferByteStream behaves like a read-only ByteStream, but has its data
+// backed by an llvm::MemoryBuffer.  It also owns the underlying MemoryBuffer.
+class MemoryBufferByteStream : public ByteStream {
+public:
+  explicit MemoryBufferByteStream(std::unique_ptr<MemoryBuffer> Buffer)
+      : ByteStream(ArrayRef<uint8_t>(Buffer->getBuffer().bytes_begin(),
+                                     Buffer->getBuffer().bytes_end())),
+        MemBuffer(std::move(Buffer)) {}
+
+  std::unique_ptr<MemoryBuffer> MemBuffer;
+};
+
+class MutableByteStream : public WritableStream {
+public:
+  MutableByteStream() = default;
+  explicit MutableByteStream(MutableArrayRef<uint8_t> Data)
+      : Data(Data), ImmutableStream(Data) {}
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const override {
+    return ImmutableStream.readBytes(Offset, Size, Buffer);
+  }
+
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const override {
+    return ImmutableStream.readLongestContiguousChunk(Offset, Buffer);
+  }
+
+  uint32_t getLength() const override { return ImmutableStream.getLength(); }
+
+  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) const override {
+    if (Buffer.empty())
+      return Error::success();
+
+    if (Data.size() < Buffer.size())
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    if (Offset > Buffer.size() - Data.size())
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+
+    uint8_t *DataPtr = const_cast<uint8_t *>(Data.data());
+    ::memcpy(DataPtr + Offset, Buffer.data(), Buffer.size());
+    return Error::success();
+  }
+
+  Error commit() const override { return Error::success(); }
+
+  MutableArrayRef<uint8_t> data() const { return Data; }
+
+private:
+  MutableArrayRef<uint8_t> Data;
+  ByteStream ImmutableStream;
+};
+
+// A simple adapter that acts like a ByteStream but holds ownership over
+// and underlying FileOutputBuffer.
+class FileBufferByteStream : public WritableStream {
+private:
+  class StreamImpl : public MutableByteStream {
+  public:
+    StreamImpl(std::unique_ptr<FileOutputBuffer> Buffer)
+        : MutableByteStream(MutableArrayRef<uint8_t>(Buffer->getBufferStart(),
+                                                     Buffer->getBufferEnd())),
+          FileBuffer(std::move(Buffer)) {}
+
+    Error commit() const override {
+      if (FileBuffer->commit())
+        return llvm::make_error<MSFError>(msf_error_code::not_writable);
+      return Error::success();
+    }
+
+  private:
+    std::unique_ptr<FileOutputBuffer> FileBuffer;
+  };
+
+public:
+  explicit FileBufferByteStream(std::unique_ptr<FileOutputBuffer> Buffer)
+      : Impl(std::move(Buffer)) {}
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const override {
+    return Impl.readBytes(Offset, Size, Buffer);
+  }
+
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const override {
+    return Impl.readLongestContiguousChunk(Offset, Buffer);
+  }
+
+  uint32_t getLength() const override { return Impl.getLength(); }
+
+  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const override {
+    return Impl.writeBytes(Offset, Data);
+  }
+
+  Error commit() const override { return Impl.commit(); }
+
+private:
+  StreamImpl Impl;
+};
+
+} // end namespace msf
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_BYTESTREAM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IPDBFile.h b/contrib/llvm/include/llvm/DebugInfo/MSF/IMSFFile.h
index fccea2ac2470..f98e715e6b15 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IPDBFile.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/IMSFFile.h
@@ -1,4 +1,4 @@
-//===- IPDBFile.h - Abstract base class for a PDB file ----------*- C++ -*-===//
+//===- IMSFFile.h - Abstract base class for an MSF file ---------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,23 +7,20 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_PDB_RAW_IPDBFILE_H
-#define LLVM_DEBUGINFO_PDB_RAW_IPDBFILE_H
+#ifndef LLVM_DEBUGINFO_MSF_IMSFFILE_H
+#define LLVM_DEBUGINFO_MSF_IMSFFILE_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
-
-#include <stdint.h>
+#include <cstdint>
 
 namespace llvm {
-namespace pdb {
+namespace msf {
 
-class IPDBFile {
+class IMSFFile {
 public:
-  virtual ~IPDBFile() {}
+  virtual ~IMSFFile() = default;
 
   virtual uint32_t getBlockSize() const = 0;
   virtual uint32_t getBlockCount() const = 0;
@@ -38,7 +35,8 @@ public:
   virtual Error setBlockData(uint32_t BlockIndex, uint32_t Offset,
                              ArrayRef<uint8_t> Data) const = 0;
 };
-}
-}
 
-#endif // LLVM_DEBUGINFO_PDB_RAW_IPDBFILE_H
+} // end namespace msf
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_IMSFFILE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MsfBuilder.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h
index 92d9bc042cce..6d067cc1c238 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MsfBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFBuilder.h
@@ -7,14 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MSFBUILDER_H
-#define LLVM_DEBUGINFO_PDB_RAW_MSFBUILDER_H
+#ifndef LLVM_DEBUGINFO_MSF_MSFBUILDER_H
+#define LLVM_DEBUGINFO_MSF_MSFBUILDER_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 
-#include "llvm/DebugInfo/PDB/Raw/MsfCommon.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
 
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
@@ -24,19 +23,19 @@
 #include <vector>
 
 namespace llvm {
-namespace pdb {
-class MsfBuilder {
+namespace msf {
+class MSFBuilder {
 public:
-  /// \brief Create a new `MsfBuilder`.
+  /// \brief Create a new `MSFBuilder`.
   ///
   /// \param BlockSize The internal block size used by the PDB file.  See
   /// isValidBlockSize() for a list of valid block sizes.
   ///
   /// \param MinBlockCount Causes the builder to reserve up front space for
-  /// at least `MinBlockCount` blocks.  This is useful when using `MsfBuilder`
-  /// to read an existing PDB that you want to write back out later.  The
-  /// original PDB file's SuperBlock contains the exact number of blocks used
-  /// by the file, so is a good hint as to how many blocks the new PDB file
+  /// at least `MinBlockCount` blocks.  This is useful when using `MSFBuilder`
+  /// to read an existing MSF that you want to write back out later.  The
+  /// original MSF file's SuperBlock contains the exact number of blocks used
+  /// by the file, so is a good hint as to how many blocks the new MSF file
   /// will contain.  Furthermore, it is actually necessary in this case.  To
   /// preserve stability of the file's layout, it is helpful to try to keep
   /// all streams mapped to their original block numbers.  To ensure that this
@@ -45,7 +44,7 @@ public:
   ///
   /// \param CanGrow If true, any operation which results in an attempt to
   /// locate a free block when all available blocks have been exhausted will
-  /// allocate a new block, thereby growing the size of the final PDB file.
+  /// allocate a new block, thereby growing the size of the final MSF file.
   /// When false, any such attempt will result in an error.  This is especially
   /// useful in testing scenarios when you know your test isn't going to do
   /// anything to increase the size of the file, so having an Error returned if
@@ -55,34 +54,34 @@ public:
   /// failed.  Currently the only way this can fail is if an invalid block size
   /// is specified, or `MinBlockCount` does not leave enough room for the
   /// mandatory reserved blocks required by an MSF file.
-  static Expected<MsfBuilder> create(BumpPtrAllocator &Allocator,
+  static Expected<MSFBuilder> create(BumpPtrAllocator &Allocator,
                                      uint32_t BlockSize,
                                      uint32_t MinBlockCount = 0,
                                      bool CanGrow = true);
 
   /// Request the block map to be at a specific block address.  This is useful
-  /// when editing a PDB and you want the layout to be as stable as possible.
+  /// when editing a MSF and you want the layout to be as stable as possible.
   Error setBlockMapAddr(uint32_t Addr);
   Error setDirectoryBlocksHint(ArrayRef<uint32_t> DirBlocks);
   void setFreePageMap(uint32_t Fpm);
   void setUnknown1(uint32_t Unk1);
 
   /// Add a stream to the MSF file with the given size, occupying the given
-  /// list of blocks.  This is useful when reading a PDB file and you want a
+  /// list of blocks.  This is useful when reading a MSF file and you want a
   /// particular stream to occupy the original set of blocks.  If the given
   /// blocks are already allocated, or if the number of blocks specified is
   /// incorrect for the given stream size, this function will return an Error.
-  Error addStream(uint32_t Size, ArrayRef<uint32_t> Blocks);
+  Expected<uint32_t> addStream(uint32_t Size, ArrayRef<uint32_t> Blocks);
 
   /// Add a stream to the MSF file with the given size, occupying any available
   /// blocks that the builder decides to use.  This is useful when building a
   /// new PDB file from scratch and you don't care what blocks a stream occupies
   /// but you just want it to work.
-  Error addStream(uint32_t Size);
+  Expected<uint32_t> addStream(uint32_t Size);
 
   /// Update the size of an existing stream.  This will allocate or deallocate
   /// blocks as needed to match the requested size.  This can fail if `CanGrow`
-  /// was set to false when initializing the `MsfBuilder`.
+  /// was set to false when initializing the `MSFBuilder`.
   Error setStreamSize(uint32_t Idx, uint32_t Size);
 
   /// Get the total number of streams in the MSF layout.  This should return 1
@@ -112,10 +111,12 @@ public:
 
   /// Finalize the layout and build the headers and structures that describe the
   /// MSF layout and can be written directly to the MSF file.
-  Expected<msf::Layout> build();
+  Expected<MSFLayout> build();
+
+  BumpPtrAllocator &getAllocator() { return Allocator; }
 
 private:
-  MsfBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow,
+  MSFBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow,
              BumpPtrAllocator &Allocator);
 
   Error allocateBlocks(uint32_t NumBlocks, MutableArrayRef<uint32_t> Blocks);
@@ -127,7 +128,7 @@ private:
 
   bool IsGrowable;
   uint32_t FreePageMap;
-  uint32_t Unknown1;
+  uint32_t Unknown1 = 0;
   uint32_t BlockSize;
   uint32_t MininumBlocks;
   uint32_t BlockMapAddr;
@@ -135,7 +136,7 @@ private:
   std::vector<uint32_t> DirectoryBlocks;
   std::vector<std::pair<uint32_t, BlockList>> StreamData;
 };
-}
-}
+} // namespace msf
+} // namespace llvm
 
-#endif
+#endif // LLVM_DEBUGINFO_MSF_MSFBUILDER_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MsfCommon.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
index 2f6a6986eba9..93a9c808b736 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MsfCommon.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFCommon.h
@@ -1,4 +1,4 @@
-//===- MsfCommon.h - Common types and functions for MSF files ---*- C++ -*-===//
+//===- MSFCommon.h - Common types and functions for MSF files ---*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,10 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MSFCOMMON_H
-#define LLVM_DEBUGINFO_PDB_RAW_MSFCOMMON_H
+#ifndef LLVM_DEBUGINFO_MSF_MSFCOMMON_H
+#define LLVM_DEBUGINFO_MSF_MSFCOMMON_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
 
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -19,7 +20,6 @@
 #include <vector>
 
 namespace llvm {
-namespace pdb {
 namespace msf {
 static const char Magic[] = {'M',  'i',  'c',    'r', 'o', 's',  'o',  'f',
                              't',  ' ',  'C',    '/', 'C', '+',  '+',  ' ',
@@ -38,7 +38,7 @@ struct SuperBlock {
   // The index of the free block map.
   support::ulittle32_t FreeBlockMapBlock;
   // This contains the number of blocks resident in the file system.  In
-  // practice, NumBlocks * BlockSize is equivalent to the size of the PDB
+  // practice, NumBlocks * BlockSize is equivalent to the size of the MSF
   // file.
   support::ulittle32_t NumBlocks;
   // This contains the number of bytes which make up the directory.
@@ -49,8 +49,10 @@ struct SuperBlock {
   support::ulittle32_t BlockMapAddr;
 };
 
-struct Layout {
-  SuperBlock *SB;
+struct MSFLayout {
+  MSFLayout() : SB(nullptr) {}
+  const SuperBlock *SB;
+  BitVector FreePageMap;
   ArrayRef<support::ulittle32_t> DirectoryBlocks;
   ArrayRef<support::ulittle32_t> StreamSizes;
   std::vector<ArrayRef<support::ulittle32_t>> StreamMap;
@@ -82,9 +84,21 @@ inline uint64_t blockToOffset(uint64_t BlockNumber, uint64_t BlockSize) {
   return BlockNumber * BlockSize;
 }
 
-Error validateSuperBlock(const SuperBlock &SB);
+inline uint32_t getFpmIntervalLength(const MSFLayout &L) {
+  return L.SB->BlockSize;
 }
+
+inline uint32_t getNumFpmIntervals(const MSFLayout &L) {
+  uint32_t Length = getFpmIntervalLength(L);
+  return llvm::alignTo(L.SB->NumBlocks, Length) / Length;
 }
+
+inline uint32_t getFullFpmByteSize(const MSFLayout &L) {
+  return llvm::alignTo(L.SB->NumBlocks, 8) / 8;
 }
 
-#endif
+Error validateSuperBlock(const SuperBlock &SB);
+} // namespace msf
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_MSFCOMMON_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
new file mode 100644
index 000000000000..e66aeca3cd45
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFError.h
@@ -0,0 +1,47 @@
+//===- MSFError.h - Error extensions for MSF Files --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_MSF_MSFERROR_H
+#define LLVM_DEBUGINFO_MSF_MSFERROR_H
+
+#include "llvm/Support/Error.h"
+
+#include <string>
+
+namespace llvm {
+namespace msf {
+enum class msf_error_code {
+  unspecified = 1,
+  insufficient_buffer,
+  not_writable,
+  no_stream,
+  invalid_format,
+  block_in_use
+};
+
+/// Base class for errors originating when parsing raw PDB files
+class MSFError : public ErrorInfo<MSFError> {
+public:
+  static char ID;
+  MSFError(msf_error_code C);
+  MSFError(const std::string &Context);
+  MSFError(msf_error_code C, const std::string &Context);
+
+  void log(raw_ostream &OS) const override;
+  const std::string &getErrorMessage() const;
+  std::error_code convertToErrorCode() const override;
+
+private:
+  std::string ErrMsg;
+  msf_error_code Code;
+};
+} // namespace msf
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_MSFERROR_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/MSFStreamLayout.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFStreamLayout.h
new file mode 100644
index 000000000000..bdde98f52662
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MSFStreamLayout.h
@@ -0,0 +1,35 @@
+//===- MSFStreamLayout.h - Describes the layout of a stream -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_MSF_MSFSTREAMLAYOUT_H
+#define LLVM_DEBUGINFO_MSF_MSFSTREAMLAYOUT_H
+
+#include "llvm/Support/Endian.h"
+
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace msf {
+
+/// \brief Describes the layout of a stream in an MSF layout.  A "stream" here
+/// is defined as any logical unit of data which may be arranged inside the MSF
+/// file as a sequence of (possibly discontiguous) blocks.  When we want to read
+/// from a particular MSF Stream, we fill out a stream layout structure and the
+/// reader uses it to determine which blocks in the underlying MSF file contain
+/// the data, so that it can be pieced together in the right order.
+class MSFStreamLayout {
+public:
+  uint32_t Length;
+  std::vector<support::ulittle32_t> Blocks;
+};
+} // namespace msf
+} // namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_MSFSTREAMLAYOUT_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
new file mode 100644
index 000000000000..fff4e9cecef5
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -0,0 +1,144 @@
+//===- MappedBlockStream.h - Discontiguous stream data in an MSF -*- C++
+//-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_MSF_MAPPEDBLOCKSTREAM_H
+#define LLVM_DEBUGINFO_MSF_MAPPEDBLOCKSTREAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/MSF/MSFStreamLayout.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <vector>
+
+namespace llvm {
+namespace msf {
+
+struct MSFLayout;
+
+/// MappedBlockStream represents data stored in an MSF file into chunks of a
+/// particular size (called the Block Size), and whose chunks may not be
+/// necessarily contiguous.  The arrangement of these chunks MSF the file
+/// is described by some other metadata contained within the MSF file.  In
+/// the case of a standard MSF Stream, the layout of the stream's blocks
+/// is described by the MSF "directory", but in the case of the directory
+/// itself, the layout is described by an array at a fixed location within
+/// the MSF.  MappedBlockStream provides methods for reading from and writing
+/// to one of these streams transparently, as if it were a contiguous sequence
+/// of bytes.
+class MappedBlockStream : public ReadableStream {
+  friend class WritableMappedBlockStream;
+public:
+  static std::unique_ptr<MappedBlockStream>
+  createStream(uint32_t BlockSize, uint32_t NumBlocks,
+               const MSFStreamLayout &Layout, const ReadableStream &MsfData);
+
+  static std::unique_ptr<MappedBlockStream>
+  createIndexedStream(const MSFLayout &Layout, const ReadableStream &MsfData,
+                      uint32_t StreamIndex);
+
+  static std::unique_ptr<MappedBlockStream>
+  createFpmStream(const MSFLayout &Layout, const ReadableStream &MsfData);
+
+  static std::unique_ptr<MappedBlockStream>
+  createDirectoryStream(const MSFLayout &Layout, const ReadableStream &MsfData);
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const override;
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const override;
+
+  uint32_t getLength() const override;
+
+  uint32_t getNumBytesCopied() const;
+
+  llvm::BumpPtrAllocator &getAllocator() { return Pool; }
+
+  void invalidateCache();
+
+  uint32_t getBlockSize() const { return BlockSize; }
+  uint32_t getNumBlocks() const { return NumBlocks; }
+  uint32_t getStreamLength() const { return StreamLayout.Length; }
+
+protected:
+  MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks,
+                    const MSFStreamLayout &StreamLayout,
+                    const ReadableStream &MsfData);
+
+private:
+  const MSFStreamLayout &getStreamLayout() const { return StreamLayout; }
+  void fixCacheAfterWrite(uint32_t Offset, ArrayRef<uint8_t> Data) const;
+
+  Error readBytes(uint32_t Offset, MutableArrayRef<uint8_t> Buffer) const;
+  bool tryReadContiguously(uint32_t Offset, uint32_t Size,
+                           ArrayRef<uint8_t> &Buffer) const;
+
+  const uint32_t BlockSize;
+  const uint32_t NumBlocks;
+  const MSFStreamLayout StreamLayout;
+  const ReadableStream &MsfData;
+
+  typedef MutableArrayRef<uint8_t> CacheEntry;
+  mutable llvm::BumpPtrAllocator Pool;
+  mutable DenseMap<uint32_t, std::vector<CacheEntry>> CacheMap;
+};
+
+class WritableMappedBlockStream : public WritableStream {
+public:
+  static std::unique_ptr<WritableMappedBlockStream>
+  createStream(uint32_t BlockSize, uint32_t NumBlocks,
+               const MSFStreamLayout &Layout, const WritableStream &MsfData);
+
+  static std::unique_ptr<WritableMappedBlockStream>
+  createIndexedStream(const MSFLayout &Layout, const WritableStream &MsfData,
+                      uint32_t StreamIndex);
+
+  static std::unique_ptr<WritableMappedBlockStream>
+  createDirectoryStream(const MSFLayout &Layout, const WritableStream &MsfData);
+
+  static std::unique_ptr<WritableMappedBlockStream>
+  createFpmStream(const MSFLayout &Layout, const WritableStream &MsfData);
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const override;
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const override;
+  uint32_t getLength() const override;
+
+  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) const override;
+
+  Error commit() const override;
+
+  const MSFStreamLayout &getStreamLayout() const {
+    return ReadInterface.getStreamLayout();
+  }
+  uint32_t getBlockSize() const { return ReadInterface.getBlockSize(); }
+  uint32_t getNumBlocks() const { return ReadInterface.getNumBlocks(); }
+  uint32_t getStreamLength() const { return ReadInterface.getStreamLength(); }
+
+protected:
+  WritableMappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks,
+                            const MSFStreamLayout &StreamLayout,
+                            const WritableStream &MsfData);
+
+private:
+  MappedBlockStream ReadInterface;
+
+  const WritableStream &WriteInterface;
+};
+
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_MAPPEDBLOCKSTREAM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/SequencedItemStream.h b/contrib/llvm/include/llvm/DebugInfo/MSF/SequencedItemStream.h
new file mode 100644
index 000000000000..1949beef9fff
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/SequencedItemStream.h
@@ -0,0 +1,93 @@
+//===- SequencedItemStream.h ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_MSF_SEQUENCEDITEMSTREAM_H
+#define LLVM_DEBUGINFO_MSF_SEQUENCEDITEMSTREAM_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/Support/Error.h"
+#include <cstddef>
+#include <cstdint>
+
+namespace llvm {
+namespace msf {
+
+template <typename T> struct SequencedItemTraits {
+  static size_t length(const T &Item) = delete;
+  static ArrayRef<uint8_t> bytes(const T &Item) = delete;
+};
+
+/// SequencedItemStream represents a sequence of objects stored in a
+/// standard container but for which it is useful to view as a stream of
+/// contiguous bytes.  An example of this might be if you have a std::vector
+/// of TPI records, where each record contains a byte sequence that
+/// represents that one record serialized, but where each consecutive item
+/// might not be allocated immediately after the previous item.  Using a
+/// SequencedItemStream, we can adapt the VarStreamArray class to trivially
+/// extract one item at a time, allowing the data to be used anywhere a
+/// VarStreamArray could be used.
+template <typename T, typename Traits = SequencedItemTraits<T>>
+class SequencedItemStream : public ReadableStream {
+public:
+  SequencedItemStream() = default;
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const override {
+    auto ExpectedIndex = translateOffsetIndex(Offset);
+    if (!ExpectedIndex)
+      return ExpectedIndex.takeError();
+    const auto &Item = Items[*ExpectedIndex];
+    if (Size > Traits::length(Item))
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    Buffer = Traits::bytes(Item).take_front(Size);
+    return Error::success();
+  }
+
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const override {
+    auto ExpectedIndex = translateOffsetIndex(Offset);
+    if (!ExpectedIndex)
+      return ExpectedIndex.takeError();
+    Buffer = Traits::bytes(Items[*ExpectedIndex]);
+    return Error::success();
+  }
+
+  void setItems(ArrayRef<T> ItemArray) { Items = ItemArray; }
+
+  uint32_t getLength() const override {
+    uint32_t Size = 0;
+    for (const auto &Item : Items)
+      Size += Traits::length(Item);
+    return Size;
+  }
+
+private:
+  Expected<uint32_t> translateOffsetIndex(uint32_t Offset) const {
+    uint32_t CurrentOffset = 0;
+    uint32_t CurrentIndex = 0;
+    for (const auto &Item : Items) {
+      if (CurrentOffset >= Offset)
+        break;
+      CurrentOffset += Traits::length(Item);
+      ++CurrentIndex;
+    }
+    if (CurrentOffset != Offset)
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    return CurrentIndex;
+  }
+
+  ArrayRef<T> Items;
+};
+
+} // end namespace msf
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_SEQUENCEDITEMSTREAM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamArray.h b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamArray.h
index 0b9349aac753..d8b74bc75c94 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamArray.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamArray.h
@@ -1,4 +1,4 @@
-//===- StreamArray.h - Array backed by an arbitrary stream ----------------===//
+//===- StreamArray.h - Array backed by an arbitrary stream ------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,17 +7,17 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STREAMARRAY_H
-#define LLVM_DEBUGINFO_CODEVIEW_STREAMARRAY_H
+#ifndef LLVM_DEBUGINFO_MSF_STREAMARRAY_H
+#define LLVM_DEBUGINFO_MSF_STREAMARRAY_H
 
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Error.h"
-
-#include <functional>
-#include <type_traits>
+#include <cassert>
+#include <cstdint>
 
 namespace llvm {
-namespace codeview {
+namespace msf {
 
 /// VarStreamArrayExtractor is intended to be specialized to provide customized
 /// extraction logic.  On input it receives a StreamRef pointing to the
@@ -34,7 +34,8 @@ namespace codeview {
 template <typename T> struct VarStreamArrayExtractor {
   // Method intentionally deleted.  You must provide an explicit specialization
   // with the following method implemented.
-  Error operator()(StreamRef Stream, uint32_t &Len, T &Item) const = delete;
+  Error operator()(ReadableStreamRef Stream, uint32_t &Len,
+                   T &Item) const = delete;
 };
 
 /// VarStreamArray represents an array of variable length records backed by a
@@ -74,17 +75,19 @@ template <typename ValueType, typename Extractor> class VarStreamArrayIterator;
 
 template <typename ValueType,
           typename Extractor = VarStreamArrayExtractor<ValueType>>
+
 class VarStreamArray {
   friend class VarStreamArrayIterator<ValueType, Extractor>;
 
 public:
   typedef VarStreamArrayIterator<ValueType, Extractor> Iterator;
 
-  VarStreamArray() {}
+  VarStreamArray() = default;
   explicit VarStreamArray(const Extractor &E) : E(E) {}
 
-  explicit VarStreamArray(StreamRef Stream) : Stream(Stream) {}
-  VarStreamArray(StreamRef Stream, const Extractor &E) : Stream(Stream), E(E) {}
+  explicit VarStreamArray(ReadableStreamRef Stream) : Stream(Stream) {}
+  VarStreamArray(ReadableStreamRef Stream, const Extractor &E)
+      : Stream(Stream), E(E) {}
 
   VarStreamArray(const VarStreamArray<ValueType, Extractor> &Other)
       : Stream(Other.Stream), E(Other.E) {}
@@ -97,10 +100,10 @@ public:
 
   const Extractor &getExtractor() const { return E; }
 
-  StreamRef getUnderlyingStream() const { return Stream; }
+  ReadableStreamRef getUnderlyingStream() const { return Stream; }
 
 private:
-  StreamRef Stream;
+  ReadableStreamRef Stream;
   Extractor E;
 };
 
@@ -122,9 +125,9 @@ public:
       }
     }
   }
-  VarStreamArrayIterator() {}
+  VarStreamArrayIterator() = default;
   explicit VarStreamArrayIterator(const Extractor &E) : Extract(E) {}
-  ~VarStreamArrayIterator() {}
+  ~VarStreamArrayIterator() = default;
 
   bool operator==(const IterType &R) const {
     if (Array && R.Array) {
@@ -189,7 +192,7 @@ private:
   }
 
   ValueType ThisValue;
-  StreamRef IterRef;
+  ReadableStreamRef IterRef;
   const ArrayType *Array{nullptr};
   uint32_t ThisLen{0};
   bool HasError{false};
@@ -203,8 +206,8 @@ template <typename T> class FixedStreamArray {
   friend class FixedStreamArrayIterator<T>;
 
 public:
-  FixedStreamArray() : Stream() {}
-  FixedStreamArray(StreamRef Stream) : Stream(Stream) {
+  FixedStreamArray() = default;
+  FixedStreamArray(ReadableStreamRef Stream) : Stream(Stream) {
     assert(Stream.getLength() % sizeof(T) == 0);
   }
 
@@ -226,14 +229,15 @@ public:
   FixedStreamArrayIterator<T> begin() const {
     return FixedStreamArrayIterator<T>(*this, 0);
   }
+
   FixedStreamArrayIterator<T> end() const {
     return FixedStreamArrayIterator<T>(*this, size());
   }
 
-  StreamRef getUnderlyingStream() const { return Stream; }
+  ReadableStreamRef getUnderlyingStream() const { return Stream; }
 
 private:
-  StreamRef Stream;
+  ReadableStreamRef Stream;
 };
 
 template <typename T> class FixedStreamArrayIterator {
@@ -269,7 +273,7 @@ private:
   uint32_t Index;
 };
 
-} // namespace codeview
+} // namespace msf
 } // namespace llvm
 
-#endif // LLVM_DEBUGINFO_CODEVIEW_STREAMARRAY_H
+#endif // LLVM_DEBUGINFO_MSF_STREAMARRAY_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamInterface.h b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamInterface.h
index 241aec457870..09782d8e3b30 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamInterface.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamInterface.h
@@ -7,26 +7,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STREAMINTERFACE_H
-#define LLVM_DEBUGINFO_CODEVIEW_STREAMINTERFACE_H
+#ifndef LLVM_DEBUGINFO_MSF_STREAMINTERFACE_H
+#define LLVM_DEBUGINFO_MSF_STREAMINTERFACE_H
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Error.h"
 #include <cstdint>
 
 namespace llvm {
-namespace codeview {
-
-/// StreamInterface abstracts the notion of a data stream.  This way, an
-/// implementation could implement trivial reading from a contiguous memory
-/// buffer or, as in the case of PDB files, reading from a set of possibly
-/// discontiguous blocks.  The implementation is required to return references
-/// to stable memory, so if this is not possible (for example in the case of
-/// a PDB file with discontiguous blocks, it must keep its own pool of temp
-/// storage.
-class StreamInterface {
+namespace msf {
+
+class ReadableStream {
 public:
-  virtual ~StreamInterface() {}
+  virtual ~ReadableStream() = default;
 
   // Given an offset into the stream and a number of bytes, attempt to read
   // the bytes and set the output ArrayRef to point to a reference into the
@@ -39,17 +32,22 @@ public:
   virtual Error readLongestContiguousChunk(uint32_t Offset,
                                            ArrayRef<uint8_t> &Buffer) const = 0;
 
+  virtual uint32_t getLength() const = 0;
+};
+
+class WritableStream : public ReadableStream {
+public:
+  ~WritableStream() override = default;
+
   // Attempt to write the given bytes into the stream at the desired offset.
   // This will always necessitate a copy.  Cannot shrink or grow the stream,
   // only writes into existing allocated space.
   virtual Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const = 0;
 
-  virtual uint32_t getLength() const = 0;
-
   virtual Error commit() const = 0;
 };
 
-} // end namespace codeview
+} // end namespace msf
 } // end namespace llvm
 
-#endif // LLVM_DEBUGINFO_CODEVIEW_STREAMINTERFACE_H
+#endif // LLVM_DEBUGINFO_MSF_STREAMINTERFACE_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamReader.h b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamReader.h
index 2f497c2c43f1..fc2ca78dc18f 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamReader.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamReader.h
@@ -7,35 +7,40 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STREAMREADER_H
-#define LLVM_DEBUGINFO_CODEVIEW_STREAMREADER_H
+#ifndef LLVM_DEBUGINFO_MSF_STREAMREADER_H
+#define LLVM_DEBUGINFO_MSF_STREAMREADER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 
 #include <string>
 
 namespace llvm {
-namespace codeview {
-
-class StreamRef;
+namespace msf {
 
 class StreamReader {
 public:
-  StreamReader(StreamRef Stream);
+  StreamReader(ReadableStreamRef Stream);
 
   Error readLongestContiguousChunk(ArrayRef<uint8_t> &Buffer);
   Error readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size);
+  Error readInteger(uint8_t &Dest);
   Error readInteger(uint16_t &Dest);
   Error readInteger(uint32_t &Dest);
+  Error readInteger(uint64_t &Dest);
+  Error readInteger(int8_t &Dest);
+  Error readInteger(int16_t &Dest);
+  Error readInteger(int32_t &Dest);
+  Error readInteger(int64_t &Dest);
   Error readZeroString(StringRef &Dest);
   Error readFixedString(StringRef &Dest, uint32_t Length);
-  Error readStreamRef(StreamRef &Ref);
-  Error readStreamRef(StreamRef &Ref, uint32_t Length);
+  Error readStreamRef(ReadableStreamRef &Ref);
+  Error readStreamRef(ReadableStreamRef &Ref, uint32_t Length);
 
   template <typename T> Error readEnum(T &Dest) {
     typename std::underlying_type<T>::type N;
@@ -61,8 +66,8 @@ public:
       return Error::success();
     }
 
-    if (NumElements > UINT32_MAX/sizeof(T))
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
+    if (NumElements > UINT32_MAX / sizeof(T))
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
 
     if (auto EC = readBytes(Bytes, NumElements * sizeof(T)))
       return EC;
@@ -72,7 +77,7 @@ public:
 
   template <typename T, typename U>
   Error readArray(VarStreamArray<T, U> &Array, uint32_t Size) {
-    StreamRef S;
+    ReadableStreamRef S;
     if (auto EC = readStreamRef(S, Size))
       return EC;
     Array = VarStreamArray<T, U>(S, Array.getExtractor());
@@ -87,25 +92,30 @@ public:
     }
     uint32_t Length = NumItems * sizeof(T);
     if (Length / sizeof(T) != NumItems)
-      return make_error<CodeViewError>(cv_error_code::corrupt_record);
+      return make_error<MSFError>(msf_error_code::invalid_format);
     if (Offset + Length > Stream.getLength())
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-    StreamRef View = Stream.slice(Offset, Length);
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    ReadableStreamRef View = Stream.slice(Offset, Length);
     Array = FixedStreamArray<T>(View);
     Offset += Length;
     return Error::success();
   }
 
+  bool empty() const { return bytesRemaining() == 0; }
   void setOffset(uint32_t Off) { Offset = Off; }
   uint32_t getOffset() const { return Offset; }
   uint32_t getLength() const { return Stream.getLength(); }
   uint32_t bytesRemaining() const { return getLength() - getOffset(); }
 
+  Error skip(uint32_t Amount);
+
+  uint8_t peek() const;
+
 private:
-  StreamRef Stream;
+  ReadableStreamRef Stream;
   uint32_t Offset;
 };
-} // namespace codeview
+} // namespace msf
 } // namespace llvm
 
-#endif // LLVM_DEBUGINFO_CODEVIEW_STREAMREADER_H
+#endif // LLVM_DEBUGINFO_MSF_STREAMREADER_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/MSF/StreamRef.h b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamRef.h
new file mode 100644
index 000000000000..eee71e53a39b
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamRef.h
@@ -0,0 +1,135 @@
+//===- StreamRef.h - A copyable reference to a stream -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_MSF_STREAMREF_H
+#define LLVM_DEBUGINFO_MSF_STREAMREF_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+
+namespace llvm {
+namespace msf {
+
+template <class StreamType, class RefType> class StreamRefBase {
+public:
+  StreamRefBase() : Stream(nullptr), ViewOffset(0), Length(0) {}
+  StreamRefBase(const StreamType &Stream, uint32_t Offset, uint32_t Length)
+      : Stream(&Stream), ViewOffset(Offset), Length(Length) {}
+
+  uint32_t getLength() const { return Length; }
+  const StreamType *getStream() const { return Stream; }
+
+  RefType drop_front(uint32_t N) const {
+    if (!Stream)
+      return RefType();
+
+    N = std::min(N, Length);
+    return RefType(*Stream, ViewOffset + N, Length - N);
+  }
+
+  RefType keep_front(uint32_t N) const {
+    if (!Stream)
+      return RefType();
+    N = std::min(N, Length);
+    return RefType(*Stream, ViewOffset, N);
+  }
+
+  RefType slice(uint32_t Offset, uint32_t Len) const {
+    return drop_front(Offset).keep_front(Len);
+  }
+
+  bool operator==(const RefType &Other) const {
+    if (Stream != Other.Stream)
+      return false;
+    if (ViewOffset != Other.ViewOffset)
+      return false;
+    if (Length != Other.Length)
+      return false;
+    return true;
+  }
+
+protected:
+  const StreamType *Stream;
+  uint32_t ViewOffset;
+  uint32_t Length;
+};
+
+class ReadableStreamRef
+    : public StreamRefBase<ReadableStream, ReadableStreamRef> {
+public:
+  ReadableStreamRef() = default;
+  ReadableStreamRef(const ReadableStream &Stream)
+      : StreamRefBase(Stream, 0, Stream.getLength()) {}
+  ReadableStreamRef(const ReadableStream &Stream, uint32_t Offset,
+                    uint32_t Length)
+      : StreamRefBase(Stream, Offset, Length) {}
+
+  // Use StreamRef.slice() instead.
+  ReadableStreamRef(const ReadableStreamRef &S, uint32_t Offset,
+                    uint32_t Length) = delete;
+
+  Error readBytes(uint32_t Offset, uint32_t Size,
+                  ArrayRef<uint8_t> &Buffer) const {
+    if (ViewOffset + Offset < Offset)
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    if (Size + Offset > Length)
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    return Stream->readBytes(ViewOffset + Offset, Size, Buffer);
+  }
+
+  // Given an offset into the stream, read as much as possible without copying
+  // any data.
+  Error readLongestContiguousChunk(uint32_t Offset,
+                                   ArrayRef<uint8_t> &Buffer) const {
+    if (Offset >= Length)
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+
+    if (auto EC = Stream->readLongestContiguousChunk(Offset, Buffer))
+      return EC;
+    // This StreamRef might refer to a smaller window over a larger stream.  In
+    // that case we will have read out more bytes than we should return, because
+    // we should not read past the end of the current view.
+    uint32_t MaxLength = Length - Offset;
+    if (Buffer.size() > MaxLength)
+      Buffer = Buffer.slice(0, MaxLength);
+    return Error::success();
+  }
+};
+
+class WritableStreamRef
+    : public StreamRefBase<WritableStream, WritableStreamRef> {
+public:
+  WritableStreamRef() = default;
+  WritableStreamRef(const WritableStream &Stream)
+      : StreamRefBase(Stream, 0, Stream.getLength()) {}
+  WritableStreamRef(const WritableStream &Stream, uint32_t Offset,
+                    uint32_t Length)
+      : StreamRefBase(Stream, Offset, Length) {}
+
+  // Use StreamRef.slice() instead.
+  WritableStreamRef(const WritableStreamRef &S, uint32_t Offset,
+                    uint32_t Length) = delete;
+
+  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Data) const {
+    if (Data.size() + Offset > Length)
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
+    return Stream->writeBytes(ViewOffset + Offset, Data);
+  }
+
+  Error commit() const { return Stream->commit(); }
+};
+
+} // end namespace msf
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_STREAMREF_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamWriter.h b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamWriter.h
index 4d393d2ef790..2bb14434dd83 100644
--- a/contrib/llvm/include/llvm/DebugInfo/CodeView/StreamWriter.h
+++ b/contrib/llvm/include/llvm/DebugInfo/MSF/StreamWriter.h
@@ -7,34 +7,39 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DEBUGINFO_CODEVIEW_STREAMWRITER_H
-#define LLVM_DEBUGINFO_CODEVIEW_STREAMWRITER_H
+#ifndef LLVM_DEBUGINFO_MSF_STREAMWRITER_H
+#define LLVM_DEBUGINFO_MSF_STREAMWRITER_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Error.h"
-
-#include <string>
+#include <cstdint>
+#include <type_traits>
 
 namespace llvm {
-namespace codeview {
-
-class StreamRef;
+namespace msf {
 
 class StreamWriter {
 public:
-  StreamWriter(StreamRef Stream);
+  StreamWriter() = default;
+  explicit StreamWriter(WritableStreamRef Stream);
 
   Error writeBytes(ArrayRef<uint8_t> Buffer);
+  Error writeInteger(uint8_t Int);
   Error writeInteger(uint16_t Dest);
   Error writeInteger(uint32_t Dest);
+  Error writeInteger(uint64_t Dest);
+  Error writeInteger(int8_t Int);
+  Error writeInteger(int16_t Dest);
+  Error writeInteger(int32_t Dest);
+  Error writeInteger(int64_t Dest);
   Error writeZeroString(StringRef Str);
   Error writeFixedString(StringRef Str);
-  Error writeStreamRef(StreamRef Ref);
-  Error writeStreamRef(StreamRef Ref, uint32_t Size);
+  Error writeStreamRef(ReadableStreamRef Ref);
+  Error writeStreamRef(ReadableStreamRef Ref, uint32_t Size);
 
   template <typename T> Error writeEnum(T Num) {
     return writeInteger(
@@ -51,11 +56,11 @@ public:
   }
 
   template <typename T> Error writeArray(ArrayRef<T> Array) {
-    if (Array.size() == 0)
+    if (Array.empty())
       return Error::success();
 
     if (Array.size() > UINT32_MAX / sizeof(T))
-      return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
+      return make_error<MSFError>(msf_error_code::insufficient_buffer);
 
     return writeBytes(
         ArrayRef<uint8_t>(reinterpret_cast<const uint8_t *>(Array.data()),
@@ -77,10 +82,11 @@ public:
   uint32_t bytesRemaining() const { return getLength() - getOffset(); }
 
 private:
-  StreamRef Stream;
-  uint32_t Offset;
+  WritableStreamRef Stream;
+  uint32_t Offset = 0;
 };
-} // namespace codeview
-} // namespace llvm
 
-#endif // LLVM_DEBUGINFO_CODEVIEW_STREAMREADER_H
+} // end namespace msf
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_MSF_STREAMWRITER_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
index 50f5c40bcac9..9bf073831565 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/ConcreteSymbolEnumerator.h
@@ -10,8 +10,11 @@
 #ifndef LLVM_DEBUGINFO_PDB_CONCRETESYMBOLENUMERATOR_H
 #define LLVM_DEBUGINFO_PDB_CONCRETESYMBOLENUMERATOR_H
 
-#include "IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cstdint>
 #include <memory>
 
 namespace llvm {
@@ -23,7 +26,7 @@ public:
   ConcreteSymbolEnumerator(std::unique_ptr<IPDBEnumSymbols> SymbolEnumerator)
       : Enumerator(std::move(SymbolEnumerator)) {}
 
-  ~ConcreteSymbolEnumerator() override {}
+  ~ConcreteSymbolEnumerator() override = default;
 
   uint32_t getChildCount() const override {
     return Enumerator->getChildCount();
@@ -55,7 +58,8 @@ private:
 
   std::unique_ptr<IPDBEnumSymbols> Enumerator;
 };
-}
-}
 
-#endif
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_CONCRETESYMBOLENUMERATOR_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
index f198d07e99d4..35a39a0df5ca 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/DIA/DIAError.h
@@ -10,10 +10,9 @@
 #ifndef LLVM_DEBUGINFO_PDB_DIA_DIAERROR_H
 #define LLVM_DEBUGINFO_PDB_DIA_DIAERROR_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
-#include <string>
-
 namespace llvm {
 namespace pdb {
 enum class dia_error_code {
@@ -30,11 +29,11 @@ class DIAError : public ErrorInfo<DIAError> {
 public:
   static char ID;
   DIAError(dia_error_code C);
-  DIAError(const std::string &Context);
-  DIAError(dia_error_code C, const std::string &Context);
+  DIAError(StringRef Context);
+  DIAError(dia_error_code C, StringRef Context);
 
   void log(raw_ostream &OS) const override;
-  const std::string &getErrorMessage() const;
+  StringRef getErrorMessage() const;
   std::error_code convertToErrorCode() const override;
 
 private:
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
index 959c26161044..466cb455651b 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/GenericError.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_DEBUGINFO_PDB_ERROR_H
 #define LLVM_DEBUGINFO_PDB_ERROR_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
@@ -26,11 +27,11 @@ class GenericError : public ErrorInfo<GenericError> {
 public:
   static char ID;
   GenericError(generic_error_code C);
-  GenericError(const std::string &Context);
-  GenericError(generic_error_code C, const std::string &Context);
+  GenericError(StringRef Context);
+  GenericError(generic_error_code C, StringRef Context);
 
   void log(raw_ostream &OS) const override;
-  const std::string &getErrorMessage() const;
+  StringRef getErrorMessage() const;
   std::error_code convertToErrorCode() const override;
 
 private:
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
index 8e9f6f883679..e48dc250822e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBEnumChildren.h
@@ -10,7 +10,7 @@
 #ifndef LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
 #define LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
 
-#include "PDBTypes.h"
+#include <cstdint>
 #include <memory>
 
 namespace llvm {
@@ -21,7 +21,7 @@ public:
   typedef std::unique_ptr<ChildType> ChildTypePtr;
   typedef IPDBEnumChildren<ChildType> MyType;
 
-  virtual ~IPDBEnumChildren() {}
+  virtual ~IPDBEnumChildren() = default;
 
   virtual uint32_t getChildCount() const = 0;
   virtual ChildTypePtr getChildAtIndex(uint32_t Index) const = 0;
@@ -29,7 +29,8 @@ public:
   virtual void reset() = 0;
   virtual MyType *clone() const = 0;
 };
-}
-}
 
-#endif
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_IPDBENUMCHILDREN_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
index 3d2c37eff2e3..15e97ac198e5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/IPDBSession.h
@@ -40,7 +40,7 @@ public:
     T *ConcreteSymbol = dyn_cast<T>(Symbol.get());
     if (!ConcreteSymbol)
       return nullptr;
-    Symbol.release();
+    (void)Symbol.release();
     return std::unique_ptr<T>(ConcreteSymbol);
   }
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
index 836e39248438..84ab8ed173cb 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBContext.h
@@ -12,14 +12,18 @@
 
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include <cstdint>
+#include <memory>
+#include <string>
 
 namespace llvm {
 
 namespace object {
 class COFFObjectFile;
-  }
+} // end namespace object
+
+namespace pdb {
 
-  namespace pdb {
   /// PDBContext
   /// This data structure is the top level entity that deals with PDB debug
   /// information parsing.  This data structure exists only when there is a
@@ -27,20 +31,18 @@ class COFFObjectFile;
   /// (e.g. PDB and DWARF).  More control and power over the debug information
   /// access can be had by using the PDB interfaces directly.
   class PDBContext : public DIContext {
-
-    PDBContext(PDBContext &) = delete;
-    PDBContext &operator=(PDBContext &) = delete;
-
   public:
     PDBContext(const object::COFFObjectFile &Object,
                std::unique_ptr<IPDBSession> PDBSession);
+    PDBContext(PDBContext &) = delete;
+    PDBContext &operator=(PDBContext &) = delete;
 
     static bool classof(const DIContext *DICtx) {
       return DICtx->getKind() == CK_PDB;
     }
 
     void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All,
-              bool DumpEH = false) override;
+              bool DumpEH = false, bool SummarizeTypes = false) override;
 
     DILineInfo getLineInfoForAddress(
         uint64_t Address,
@@ -56,7 +58,9 @@ class COFFObjectFile;
     std::string getFunctionName(uint64_t Address, DINameKind NameKind) const;
     std::unique_ptr<IPDBSession> Session;
   };
-  }
-}
 
-#endif
+} // end namespace pdb
+
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_PDBCONTEXT_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
index a9325a434366..0d232f15d745 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/PDBTypes.h
@@ -12,9 +12,10 @@
 
 #include "llvm/Config/llvm-config.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
-#include <functional>
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 #include <cstdint>
 #include <cstring>
+#include <functional>
 
 namespace llvm {
 namespace pdb {
@@ -73,13 +74,6 @@ enum class PDB_ReaderType {
   Raw = 1,
 };
 
-/// Defines a 128-bit unique identifier.  This maps to a GUID on Windows, but
-/// is abstracted here for the purposes of non-Windows platforms that don't have
-/// the GUID structure defined.
-struct PDB_UniqueId {
-  char Guid[16];
-};
-
 /// An enumeration indicating the type of data contained in this table.
 enum class PDB_TableType {
   Symbols,
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStream.h
index 6ab3c8067558..c97ca32ab43d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStream.h
@@ -11,10 +11,10 @@
 #define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAM_H
 
 #include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
 #include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
@@ -36,32 +36,8 @@ class ISectionContribVisitor;
 class DbiStream {
   friend class DbiStreamBuilder;
 
-  struct HeaderInfo {
-    support::little32_t VersionSignature;
-    support::ulittle32_t VersionHeader;
-    support::ulittle32_t Age;                     // Should match InfoStream.
-    support::ulittle16_t GlobalSymbolStreamIndex; // Global symbol stream #
-    support::ulittle16_t BuildNumber;             // See DbiBuildNo structure.
-    support::ulittle16_t PublicSymbolStreamIndex; // Public symbols stream #
-    support::ulittle16_t PdbDllVersion;           // version of mspdbNNN.dll
-    support::ulittle16_t SymRecordStreamIndex;    // Symbol records stream #
-    support::ulittle16_t PdbDllRbld;              // rbld number of mspdbNNN.dll
-    support::little32_t ModiSubstreamSize;        // Size of module info stream
-    support::little32_t SecContrSubstreamSize;    // Size of sec. contrib stream
-    support::little32_t SectionMapSize;           // Size of sec. map substream
-    support::little32_t FileInfoSize;             // Size of file info substream
-    support::little32_t TypeServerSize;           // Size of type server map
-    support::ulittle32_t MFCTypeServerIndex;      // Index of MFC Type Server
-    support::little32_t OptionalDbgHdrSize;       // Size of DbgHeader info
-    support::little32_t ECSubstreamSize; // Size of EC stream (what is EC?)
-    support::ulittle16_t Flags;          // See DbiFlags enum.
-    support::ulittle16_t MachineType;    // See PDB_MachineType enum.
-
-    support::ulittle32_t Reserved; // Pad to 64 bytes
-  };
-
 public:
-  DbiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream);
+  DbiStream(PDBFile &File, std::unique_ptr<msf::MappedBlockStream> Stream);
   ~DbiStream();
   Error reload();
 
@@ -86,8 +62,6 @@ public:
 
   PDB_Machine getMachineType() const;
 
-  enum { InvalidStreamIndex = 0xffff };
-
   /// If the given stream type is present, returns its stream index. If it is
   /// not present, returns InvalidStreamIndex.
   uint32_t getDebugStreamIndex(DbgHeaderType Type) const;
@@ -96,16 +70,15 @@ public:
 
   Expected<StringRef> getFileNameForIndex(uint32_t Index) const;
 
-  codeview::FixedStreamArray<object::coff_section> getSectionHeaders();
+  msf::FixedStreamArray<object::coff_section> getSectionHeaders();
 
-  codeview::FixedStreamArray<object::FpoData> getFpoRecords();
+  msf::FixedStreamArray<object::FpoData> getFpoRecords();
 
-  codeview::FixedStreamArray<SecMapEntry> getSectionMap() const;
+  msf::FixedStreamArray<SecMapEntry> getSectionMap() const;
   void visitSectionContributions(ISectionContribVisitor &Visitor) const;
 
-  Error commit();
-
 private:
+  Error initializeModInfoArray();
   Error initializeSectionContributionData();
   Error initializeSectionHeadersData();
   Error initializeSectionMapData();
@@ -113,35 +86,35 @@ private:
   Error initializeFpoRecords();
 
   PDBFile &Pdb;
-  std::unique_ptr<MappedBlockStream> Stream;
+  std::unique_ptr<msf::MappedBlockStream> Stream;
 
   std::vector<ModuleInfoEx> ModuleInfos;
   NameHashTable ECNames;
 
-  codeview::StreamRef ModInfoSubstream;
-  codeview::StreamRef SecContrSubstream;
-  codeview::StreamRef SecMapSubstream;
-  codeview::StreamRef FileInfoSubstream;
-  codeview::StreamRef TypeServerMapSubstream;
-  codeview::StreamRef ECSubstream;
+  msf::ReadableStreamRef ModInfoSubstream;
+  msf::ReadableStreamRef SecContrSubstream;
+  msf::ReadableStreamRef SecMapSubstream;
+  msf::ReadableStreamRef FileInfoSubstream;
+  msf::ReadableStreamRef TypeServerMapSubstream;
+  msf::ReadableStreamRef ECSubstream;
 
-  codeview::StreamRef NamesBuffer;
+  msf::ReadableStreamRef NamesBuffer;
 
-  codeview::FixedStreamArray<support::ulittle16_t> DbgStreams;
+  msf::FixedStreamArray<support::ulittle16_t> DbgStreams;
 
   PdbRaw_DbiSecContribVer SectionContribVersion;
-  codeview::FixedStreamArray<SectionContrib> SectionContribs;
-  codeview::FixedStreamArray<SectionContrib2> SectionContribs2;
-  codeview::FixedStreamArray<SecMapEntry> SectionMap;
-  codeview::FixedStreamArray<support::little32_t> FileNameOffsets;
+  msf::FixedStreamArray<SectionContrib> SectionContribs;
+  msf::FixedStreamArray<SectionContrib2> SectionContribs2;
+  msf::FixedStreamArray<SecMapEntry> SectionMap;
+  msf::FixedStreamArray<support::little32_t> FileNameOffsets;
 
-  std::unique_ptr<MappedBlockStream> SectionHeaderStream;
-  codeview::FixedStreamArray<object::coff_section> SectionHeaders;
+  std::unique_ptr<msf::MappedBlockStream> SectionHeaderStream;
+  msf::FixedStreamArray<object::coff_section> SectionHeaders;
 
-  std::unique_ptr<MappedBlockStream> FpoStream;
-  codeview::FixedStreamArray<object::FpoData> FpoRecords;
+  std::unique_ptr<msf::MappedBlockStream> FpoStream;
+  msf::FixedStreamArray<object::FpoData> FpoRecords;
 
-  const HeaderInfo *Header;
+  const DbiStreamHeader *Header;
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h
index 2c7350f3c3e7..99a3ac7fb1da 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h
@@ -11,20 +11,31 @@
 #define LLVM_DEBUGINFO_PDB_RAW_PDBDBISTREAMBUILDER_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/Error.h"
 
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/Support/Endian.h"
 
 namespace llvm {
+namespace msf {
+class MSFBuilder;
+}
+namespace object {
+struct coff_section;
+}
 namespace pdb {
 class DbiStream;
+struct DbiStreamHeader;
 class PDBFile;
 
 class DbiStreamBuilder {
 public:
-  DbiStreamBuilder();
+  DbiStreamBuilder(msf::MSFBuilder &Msf);
 
   DbiStreamBuilder(const DbiStreamBuilder &) = delete;
   DbiStreamBuilder &operator=(const DbiStreamBuilder &) = delete;
@@ -36,12 +47,57 @@ public:
   void setPdbDllRbld(uint16_t R);
   void setFlags(uint16_t F);
   void setMachineType(PDB_Machine M);
+  void setSectionContribs(ArrayRef<SectionContrib> SecMap);
+  void setSectionMap(ArrayRef<SecMapEntry> SecMap);
+
+  // Add given bytes as a new stream.
+  Error addDbgStream(pdb::DbgHeaderType Type, ArrayRef<uint8_t> Data);
 
   uint32_t calculateSerializedLength() const;
 
-  Expected<std::unique_ptr<DbiStream>> build(PDBFile &File);
+  Error addModuleInfo(StringRef ObjFile, StringRef Module);
+  Error addModuleSourceFile(StringRef Module, StringRef File);
+
+  Error finalizeMsfLayout();
+
+  Error commit(const msf::MSFLayout &Layout,
+               const msf::WritableStream &Buffer);
+
+  // A helper function to create Section Contributions from COFF input
+  // section headers.
+  static std::vector<SectionContrib>
+  createSectionContribs(ArrayRef<llvm::object::coff_section> SecHdrs);
+
+  // A helper function to create a Section Map from a COFF section header.
+  static std::vector<SecMapEntry>
+  createSectionMap(ArrayRef<llvm::object::coff_section> SecHdrs);
 
 private:
+  struct DebugStream {
+    ArrayRef<uint8_t> Data;
+    uint16_t StreamNumber = 0;
+  };
+
+  Error finalize();
+  uint32_t calculateModiSubstreamSize() const;
+  uint32_t calculateSectionContribsStreamSize() const;
+  uint32_t calculateSectionMapStreamSize() const;
+  uint32_t calculateFileInfoSubstreamSize() const;
+  uint32_t calculateNamesBufferSize() const;
+  uint32_t calculateDbgStreamsSize() const;
+
+  Error generateModiSubstream();
+  Error generateFileInfoSubstream();
+
+  struct ModuleInfo {
+    std::vector<StringRef> SourceFiles;
+    StringRef Obj;
+    StringRef Mod;
+  };
+
+  msf::MSFBuilder &Msf;
+  BumpPtrAllocator &Allocator;
+
   Optional<PdbRaw_DbiVer> VerHeader;
   uint32_t Age;
   uint16_t BuildNumber;
@@ -49,6 +105,20 @@ private:
   uint16_t PdbDllRbld;
   uint16_t Flags;
   PDB_Machine MachineType;
+
+  const DbiStreamHeader *Header;
+
+  StringMap<std::unique_ptr<ModuleInfo>> ModuleInfos;
+  std::vector<ModuleInfo *> ModuleInfoList;
+
+  StringMap<uint32_t> SourceFileNames;
+
+  msf::WritableStreamRef NamesBuffer;
+  msf::MutableByteStream ModInfoBuffer;
+  msf::MutableByteStream FileInfoBuffer;
+  ArrayRef<SectionContrib> SectionContribs;
+  ArrayRef<SecMapEntry> SectionMap;
+  llvm::SmallVector<DebugStream, (int)DbgHeaderType::Max> DbgStreams;
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h
deleted file mode 100644
index 0f354315122c..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===- DirectoryStreamData.h ---------------------------------- *- C++ --*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_DIRECTORYSTREAMDATA_H
-#define LLVM_DEBUGINFO_PDB_RAW_DIRECTORYSTREAMDATA_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/PDB/Raw/IPDBStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/Support/Endian.h"
-
-namespace llvm {
-namespace pdb {
-class IPDBFile;
-
-class DirectoryStreamData : public IPDBStreamData {
-public:
-  DirectoryStreamData(const PDBFile &File) : File(File) {}
-
-  virtual uint32_t getLength() { return File.getNumDirectoryBytes(); }
-  virtual llvm::ArrayRef<llvm::support::ulittle32_t> getStreamBlocks() {
-    return File.getDirectoryBlockArray();
-  }
-
-private:
-  const PDBFile &File;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/GlobalsStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/GlobalsStream.h
new file mode 100644
index 000000000000..175f093cf53c
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/GlobalsStream.h
@@ -0,0 +1,45 @@
+//===- GlobalsStream.h - PDB Index of Symbols by Name ------ ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
+#define LLVM_DEBUGINFO_PDB_RAW_GLOBALS_STREAM_H
+
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace pdb {
+class DbiStream;
+class PDBFile;
+
+class GlobalsStream {
+public:
+  explicit GlobalsStream(std::unique_ptr<msf::MappedBlockStream> Stream);
+  ~GlobalsStream();
+  Error commit();
+  msf::FixedStreamArray<support::ulittle32_t> getHashBuckets() const {
+    return HashBuckets;
+  }
+  uint32_t getNumBuckets() const { return NumBuckets; }
+  Error reload();
+
+private:
+  msf::FixedStreamArray<support::ulittle32_t> HashBuckets;
+  msf::FixedStreamArray<PSHashRecord> HashRecords;
+  uint32_t NumBuckets;
+  std::unique_ptr<msf::MappedBlockStream> Stream;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IPDBStreamData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IPDBStreamData.h
deleted file mode 100644
index ab3c9f770755..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IPDBStreamData.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===- IPDBStreamData.h - Base interface for PDB Stream Data ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_IPDBSTREAMDATA_H
-#define LLVM_DEBUGINFO_PDB_RAW_IPDBSTREAMDATA_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/Support/Endian.h"
-
-namespace llvm {
-namespace pdb {
-/// IPDBStream abstracts the notion of PDB stream data.  Although we already
-/// have another stream abstraction (namely in the form of StreamInterface
-/// and MappedBlockStream), they assume that the stream data is referenced
-/// the same way.  Namely, by looking in the directory to get the list of
-/// stream blocks, and by looking in the array of stream lengths to get the
-/// length.  This breaks down for the directory itself, however, since its
-/// length and list of blocks are stored elsewhere.  By abstracting the
-/// notion of stream data further, we can use a MappedBlockStream to read
-/// from the directory itself, or from an indexed stream which references
-/// the directory.
-class IPDBStreamData {
-public:
-  virtual ~IPDBStreamData() {}
-
-  virtual uint32_t getLength() = 0;
-  virtual ArrayRef<support::ulittle32_t> getStreamBlocks() = 0;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h
index 355a25a38ef8..fb00d6ad4bc7 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h
@@ -12,17 +12,19 @@
 
 namespace llvm {
 namespace pdb {
+
 struct SectionContrib;
 struct SectionContrib2;
 
 class ISectionContribVisitor {
 public:
-  virtual ~ISectionContribVisitor() {}
+  virtual ~ISectionContribVisitor() = default;
 
   virtual void visit(const SectionContrib &C) = 0;
   virtual void visit(const SectionContrib2 &C) = 0;
 };
-} // namespace pdb
-} // namespace llvm
+
+} // end namespace pdb
+} // end namespace llvm
 
 #endif // LLVM_DEBUGINFO_PDB_RAW_ISECTIONCONTRIBVISITOR_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IndexedStreamData.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IndexedStreamData.h
deleted file mode 100644
index 30563bc5b898..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/IndexedStreamData.h
+++ /dev/null
@@ -1,34 +0,0 @@
-//===- IndexedStreamData.h - Standard PDB Stream Data -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_INDEXEDSTREAMDATA_H
-#define LLVM_DEBUGINFO_PDB_RAW_INDEXEDSTREAMDATA_H
-
-#include "llvm/DebugInfo/PDB/Raw/IPDBStreamData.h"
-
-namespace llvm {
-namespace pdb {
-class IPDBFile;
-
-class IndexedStreamData : public IPDBStreamData {
-public:
-  IndexedStreamData(uint32_t StreamIdx, const IPDBFile &File);
-  virtual ~IndexedStreamData() {}
-
-  uint32_t getLength() override;
-  ArrayRef<support::ulittle32_t> getStreamBlocks() override;
-
-private:
-  uint32_t StreamIdx;
-  const IPDBFile &File;
-};
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStream.h
index 1980bec7153e..6b8b94ff1a36 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStream.h
@@ -11,8 +11,8 @@
 #define LLVM_DEBUGINFO_PDB_RAW_PDBINFOSTREAM_H
 
 #include "llvm/ADT/StringMap.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/NameMap.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 
@@ -27,18 +27,10 @@ class PDBFile;
 class InfoStream {
   friend class InfoStreamBuilder;
 
-  struct HeaderInfo {
-    support::ulittle32_t Version;
-    support::ulittle32_t Signature;
-    support::ulittle32_t Age;
-    PDB_UniqueId Guid;
-  };
-
 public:
-  InfoStream(std::unique_ptr<MappedBlockStream> Stream);
+  InfoStream(std::unique_ptr<msf::MappedBlockStream> Stream);
 
   Error reload();
-  Error commit();
 
   PdbRaw_ImplVer getVersion() const;
   uint32_t getSignature() const;
@@ -49,7 +41,7 @@ public:
   iterator_range<StringMapConstIterator<uint32_t>> named_streams() const;
 
 private:
-  std::unique_ptr<MappedBlockStream> Stream;
+  std::unique_ptr<msf::MappedBlockStream> Stream;
 
   // PDB file format version.  We only support VC70.  See the enumeration
   // `PdbRaw_ImplVer` for the other possible values.
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h
index e9869bb27863..cb60b1eb69bd 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h
@@ -19,12 +19,16 @@
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 
 namespace llvm {
+namespace msf {
+class MSFBuilder;
+class StreamWriter;
+}
 namespace pdb {
 class PDBFile;
 
 class InfoStreamBuilder {
 public:
-  InfoStreamBuilder();
+  InfoStreamBuilder(msf::MSFBuilder &Msf);
   InfoStreamBuilder(const InfoStreamBuilder &) = delete;
   InfoStreamBuilder &operator=(const InfoStreamBuilder &) = delete;
 
@@ -37,13 +41,18 @@ public:
 
   uint32_t calculateSerializedLength() const;
 
-  Expected<std::unique_ptr<InfoStream>> build(PDBFile &File);
+  Error finalizeMsfLayout();
+
+  Error commit(const msf::MSFLayout &Layout,
+               const msf::WritableStream &Buffer) const;
 
 private:
-  Optional<PdbRaw_ImplVer> Ver;
-  Optional<uint32_t> Sig;
-  Optional<uint32_t> Age;
-  Optional<PDB_UniqueId> Guid;
+  msf::MSFBuilder &Msf;
+
+  PdbRaw_ImplVer Ver;
+  uint32_t Sig;
+  uint32_t Age;
+  PDB_UniqueId Guid;
 
   NameMapBuilder NamedStreams;
 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h
deleted file mode 100644
index 36424c0d16ab..000000000000
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/MappedBlockStream.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- MappedBlockStream.h - Reads stream data from a PDBFile ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_DEBUGINFO_PDB_RAW_MAPPEDBLOCKSTREAM_H
-#define LLVM_DEBUGINFO_PDB_RAW_MAPPEDBLOCKSTREAM_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/DebugInfo/PDB/Raw/IPDBStreamData.h"
-#include "llvm/Support/Allocator.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstdint>
-#include <vector>
-
-namespace llvm {
-namespace pdb {
-
-class IPDBFile;
-class PDBFile;
-
-class MappedBlockStream : public codeview::StreamInterface {
-public:
-  Error readBytes(uint32_t Offset, uint32_t Size,
-                  ArrayRef<uint8_t> &Buffer) const override;
-  Error readLongestContiguousChunk(uint32_t Offset,
-                                   ArrayRef<uint8_t> &Buffer) const override;
-  Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Buffer) const override;
-
-  uint32_t getLength() const override;
-  Error commit() const override;
-
-  uint32_t getNumBytesCopied() const;
-
-  static Expected<std::unique_ptr<MappedBlockStream>>
-  createIndexedStream(uint32_t StreamIdx, const IPDBFile &File);
-  static Expected<std::unique_ptr<MappedBlockStream>>
-  createDirectoryStream(const PDBFile &File);
-
-  llvm::BumpPtrAllocator &getAllocator() { return Pool; }
-
-protected:
-  MappedBlockStream(std::unique_ptr<IPDBStreamData> Data, const IPDBFile &File);
-
-  Error readBytes(uint32_t Offset, MutableArrayRef<uint8_t> Buffer) const;
-  bool tryReadContiguously(uint32_t Offset, uint32_t Size,
-                           ArrayRef<uint8_t> &Buffer) const;
-
-  const IPDBFile &Pdb;
-  std::unique_ptr<IPDBStreamData> Data;
-
-  typedef MutableArrayRef<uint8_t> CacheEntry;
-  mutable llvm::BumpPtrAllocator Pool;
-  mutable DenseMap<uint32_t, std::vector<CacheEntry>> CacheMap;
-};
-
-} // end namespace pdb
-} // end namespace llvm
-
-#endif // LLVM_DEBUGINFO_PDB_RAW_MAPPEDBLOCKSTREAM_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h
index b8da0bfabf38..bf5cf53b3313 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModInfo.h
@@ -11,24 +11,26 @@
 #define LLVM_DEBUGINFO_PDB_RAW_MODINFO_H
 
 #include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/Support/Error.h"
 #include <cstdint>
 #include <vector>
 
 namespace llvm {
+
 namespace pdb {
 
 class ModInfo {
-private:
-  struct FileLayout;
+  friend class DbiStreamBuilder;
 
 public:
   ModInfo();
   ModInfo(const ModInfo &Info);
   ~ModInfo();
 
-  static Error initialize(codeview::StreamRef Stream, ModInfo &Info);
+  static Error initialize(msf::ReadableStreamRef Stream, ModInfo &Info);
 
   bool hasECInfo() const;
   uint16_t getTypeServerIndex() const;
@@ -48,13 +50,12 @@ public:
 private:
   StringRef ModuleName;
   StringRef ObjFileName;
-  const FileLayout *Layout;
+  const ModuleInfoHeader *Layout = nullptr;
 };
 
 struct ModuleInfoEx {
   ModuleInfoEx(const ModInfo &Info) : Info(Info) {}
-  ModuleInfoEx(const ModuleInfoEx &Ex)
-      : Info(Ex.Info), SourceFiles(Ex.SourceFiles) {}
+  ModuleInfoEx(const ModuleInfoEx &Ex) = default;
 
   ModInfo Info;
   std::vector<StringRef> SourceFiles;
@@ -62,9 +63,10 @@ struct ModuleInfoEx {
 
 } // end namespace pdb
 
-namespace codeview {
+namespace msf {
+
 template <> struct VarStreamArrayExtractor<pdb::ModInfo> {
-  Error operator()(StreamRef Stream, uint32_t &Length,
+  Error operator()(ReadableStreamRef Stream, uint32_t &Length,
                    pdb::ModInfo &Info) const {
     if (auto EC = pdb::ModInfo::initialize(Stream, Info))
       return EC;
@@ -72,7 +74,8 @@ template <> struct VarStreamArrayExtractor<pdb::ModInfo> {
     return Error::success();
   }
 };
-}
+
+} // end namespace msf
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModStream.h
index d22962cc1e28..d5e7a6830d8d 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/ModStream.h
@@ -13,10 +13,10 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVRecord.h"
 #include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
@@ -26,11 +26,14 @@ class ModInfo;
 
 class ModStream {
 public:
-  ModStream(const ModInfo &Module, std::unique_ptr<MappedBlockStream> Stream);
+  ModStream(const ModInfo &Module,
+            std::unique_ptr<msf::MappedBlockStream> Stream);
   ~ModStream();
 
   Error reload();
 
+  uint32_t signature() const { return Signature; }
+
   iterator_range<codeview::CVSymbolArray::Iterator>
   symbols(bool *HadError) const;
 
@@ -42,12 +45,14 @@ public:
 private:
   const ModInfo &Mod;
 
-  std::unique_ptr<MappedBlockStream> Stream;
+  uint32_t Signature;
+
+  std::unique_ptr<msf::MappedBlockStream> Stream;
 
   codeview::CVSymbolArray SymbolsSubstream;
-  codeview::StreamRef LinesSubstream;
-  codeview::StreamRef C13LinesSubstream;
-  codeview::StreamRef GlobalRefsSubstream;
+  msf::ReadableStreamRef LinesSubstream;
+  msf::ReadableStreamRef C13LinesSubstream;
+  msf::ReadableStreamRef GlobalRefsSubstream;
 
   codeview::ModuleSubstreamArray LineInfo;
 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h
index c9e060a3a70f..00d022d4d8e2 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameHashTable.h
@@ -12,15 +12,15 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 #include <cstdint>
 #include <vector>
 
 namespace llvm {
-namespace codeview {
+namespace msf {
 class StreamReader;
 }
 namespace pdb {
@@ -29,7 +29,7 @@ class NameHashTable {
 public:
   NameHashTable();
 
-  Error load(codeview::StreamReader &Stream);
+  Error load(msf::StreamReader &Stream);
 
   uint32_t getNameCount() const { return NameCount; }
   uint32_t getHashVersion() const { return HashVersion; }
@@ -38,11 +38,11 @@ public:
   StringRef getStringForID(uint32_t ID) const;
   uint32_t getIDForString(StringRef Str) const;
 
-  codeview::FixedStreamArray<support::ulittle32_t> name_ids() const;
+  msf::FixedStreamArray<support::ulittle32_t> name_ids() const;
 
 private:
-  codeview::StreamRef NamesBuffer;
-  codeview::FixedStreamArray<support::ulittle32_t> IDs;
+  msf::ReadableStreamRef NamesBuffer;
+  msf::FixedStreamArray<support::ulittle32_t> IDs;
   uint32_t Signature;
   uint32_t HashVersion;
   uint32_t NameCount;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMap.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMap.h
index 8a9b0d187ace..de1163bc3079 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMap.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMap.h
@@ -16,7 +16,7 @@
 #include <cstdint>
 
 namespace llvm {
-namespace codeview {
+namespace msf {
 class StreamReader;
 class StreamWriter;
 }
@@ -28,8 +28,7 @@ class NameMap {
 public:
   NameMap();
 
-  Error load(codeview::StreamReader &Stream);
-  Error commit(codeview::StreamWriter &Writer);
+  Error load(msf::StreamReader &Stream);
 
   bool tryGetValue(StringRef Name, uint32_t &Value) const;
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h
index bf49bfd9bf2e..f5244ac21808 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/NameMapBuilder.h
@@ -17,6 +17,9 @@
 #include <memory>
 
 namespace llvm {
+namespace msf {
+class StreamWriter;
+}
 namespace pdb {
 class NameMap;
 
@@ -27,6 +30,7 @@ public:
   void addMapping(StringRef Name, uint32_t Mapping);
 
   Expected<std::unique_ptr<NameMap>> build();
+  Error commit(msf::StreamWriter &Writer) const;
 
   uint32_t calculateSerializedLength() const;
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h
index f4d7eb47d3b9..29f5b2163d83 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFile.h
@@ -11,10 +11,10 @@
 #define LLVM_DEBUGINFO_PDB_RAW_PDBFILE_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/DebugInfo/PDB/Raw/IPDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/MsfCommon.h"
+#include "llvm/DebugInfo/MSF/IMSFFile.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -24,25 +24,26 @@
 
 namespace llvm {
 
-namespace codeview {
-class StreamInterface;
+namespace msf {
+class MappedBlockStream;
 }
 
 namespace pdb {
 class DbiStream;
+class GlobalsStream;
 class InfoStream;
-class MappedBlockStream;
 class NameHashTable;
 class PDBFileBuilder;
 class PublicsStream;
 class SymbolStream;
 class TpiStream;
 
-class PDBFile : public IPDBFile {
+class PDBFile : public msf::IMSFFile {
   friend PDBFileBuilder;
 
 public:
-  explicit PDBFile(std::unique_ptr<codeview::StreamInterface> PdbFileBuffer);
+  PDBFile(std::unique_ptr<msf::ReadableStream> PdbFileBuffer,
+          BumpPtrAllocator &Allocator);
   ~PDBFile() override;
 
   uint32_t getFreeBlockMapBlock() const;
@@ -66,11 +67,18 @@ public:
   Error setBlockData(uint32_t BlockIndex, uint32_t Offset,
                      ArrayRef<uint8_t> Data) const override;
 
-  ArrayRef<support::ulittle32_t> getStreamSizes() const { return StreamSizes; }
+  ArrayRef<uint32_t> getFpmPages() const { return FpmPages; }
+
+  ArrayRef<support::ulittle32_t> getStreamSizes() const {
+    return ContainerLayout.StreamSizes;
+  }
   ArrayRef<ArrayRef<support::ulittle32_t>> getStreamMap() const {
-    return StreamMap;
+    return ContainerLayout.StreamMap;
   }
 
+  const msf::MSFLayout &getMsfLayout() const { return ContainerLayout; }
+  const msf::ReadableStream &getMsfBuffer() const { return *Buffer; }
+
   ArrayRef<support::ulittle32_t> getDirectoryBlockArray() const;
 
   Error parseFileHeaders();
@@ -78,33 +86,45 @@ public:
 
   Expected<InfoStream &> getPDBInfoStream();
   Expected<DbiStream &> getPDBDbiStream();
+  Expected<GlobalsStream &> getPDBGlobalsStream();
   Expected<TpiStream &> getPDBTpiStream();
   Expected<TpiStream &> getPDBIpiStream();
   Expected<PublicsStream &> getPDBPublicsStream();
   Expected<SymbolStream &> getPDBSymbolStream();
   Expected<NameHashTable &> getStringTable();
 
-  Error commit();
+  BumpPtrAllocator &getAllocator() { return Allocator; }
+
+  bool hasPDBDbiStream() const;
+  bool hasPDBGlobalsStream();
+  bool hasPDBInfoStream();
+  bool hasPDBIpiStream() const;
+  bool hasPDBPublicsStream();
+  bool hasPDBSymbolStream();
+  bool hasPDBTpiStream() const;
+  bool hasStringTable();
+
+ private:
+  Expected<std::unique_ptr<msf::MappedBlockStream>> safelyCreateIndexedStream(
+      const msf::MSFLayout &Layout, const msf::ReadableStream &MsfData,
+      uint32_t StreamIndex) const;
 
-private:
-  Error setSuperBlock(const msf::SuperBlock *Block);
+  BumpPtrAllocator &Allocator;
 
-  BumpPtrAllocator Allocator;
+  std::unique_ptr<msf::ReadableStream> Buffer;
 
-  std::unique_ptr<codeview::StreamInterface> Buffer;
-  const msf::SuperBlock *SB;
-  ArrayRef<support::ulittle32_t> StreamSizes;
-  ArrayRef<support::ulittle32_t> DirectoryBlocks;
-  std::vector<ArrayRef<support::ulittle32_t>> StreamMap;
+  std::vector<uint32_t> FpmPages;
+  msf::MSFLayout ContainerLayout;
 
+  std::unique_ptr<GlobalsStream> Globals;
   std::unique_ptr<InfoStream> Info;
   std::unique_ptr<DbiStream> Dbi;
   std::unique_ptr<TpiStream> Tpi;
   std::unique_ptr<TpiStream> Ipi;
   std::unique_ptr<PublicsStream> Publics;
   std::unique_ptr<SymbolStream> Symbols;
-  std::unique_ptr<MappedBlockStream> DirectoryStream;
-  std::unique_ptr<MappedBlockStream> StringTableStream;
+  std::unique_ptr<msf::MappedBlockStream> DirectoryStream;
+  std::unique_ptr<msf::MappedBlockStream> StringTableStream;
   std::unique_ptr<NameHashTable> StringTable;
 };
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h
index 47c755b43269..27fc4b53b649 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PDBFileBuilder.h
@@ -13,45 +13,50 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
 
-#include "llvm/DebugInfo/PDB/Raw/MsfBuilder.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-
 #include <memory>
 #include <vector>
 
 namespace llvm {
-namespace codeview {
-class StreamInterface;
+namespace msf {
+class MSFBuilder;
 }
 namespace pdb {
 class DbiStreamBuilder;
 class InfoStreamBuilder;
-class PDBFile;
+class TpiStreamBuilder;
 
 class PDBFileBuilder {
 public:
-  explicit PDBFileBuilder(
-      std::unique_ptr<codeview::StreamInterface> FileBuffer);
+  explicit PDBFileBuilder(BumpPtrAllocator &Allocator);
   PDBFileBuilder(const PDBFileBuilder &) = delete;
   PDBFileBuilder &operator=(const PDBFileBuilder &) = delete;
 
-  Error initialize(const msf::SuperBlock &Super);
+  Error initialize(uint32_t BlockSize);
 
-  MsfBuilder &getMsfBuilder();
+  msf::MSFBuilder &getMsfBuilder();
   InfoStreamBuilder &getInfoBuilder();
   DbiStreamBuilder &getDbiBuilder();
+  TpiStreamBuilder &getTpiBuilder();
+  TpiStreamBuilder &getIpiBuilder();
 
-  Expected<std::unique_ptr<PDBFile>> build();
+  Error commit(StringRef Filename);
 
 private:
+  Expected<msf::MSFLayout> finalizeMsfLayout() const;
+
+  BumpPtrAllocator &Allocator;
+
+  std::unique_ptr<msf::MSFBuilder> Msf;
   std::unique_ptr<InfoStreamBuilder> Info;
   std::unique_ptr<DbiStreamBuilder> Dbi;
-
-  std::unique_ptr<PDBFile> File;
-  std::unique_ptr<MsfBuilder> Msf;
+  std::unique_ptr<TpiStreamBuilder> Tpi;
+  std::unique_ptr<TpiStreamBuilder> Ipi;
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PublicsStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PublicsStream.h
index f5bfb0ed60a9..577f2986ff24 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PublicsStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/PublicsStream.h
@@ -10,10 +10,10 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_PUBLICSSTREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_PUBLICSSTREAM_H
 
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 
@@ -22,14 +22,14 @@
 namespace llvm {
 namespace pdb {
 class DbiStream;
+struct GSIHashHeader;
 class PDBFile;
 
 class PublicsStream {
-  struct GSIHashHeader;
   struct HeaderInfo;
 
 public:
-  PublicsStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream);
+  PublicsStream(PDBFile &File, std::unique_ptr<msf::MappedBlockStream> Stream);
   ~PublicsStream();
   Error reload();
 
@@ -38,16 +38,16 @@ public:
   uint32_t getNumBuckets() const { return NumBuckets; }
   iterator_range<codeview::CVSymbolArray::Iterator>
   getSymbols(bool *HadError) const;
-  codeview::FixedStreamArray<support::ulittle32_t> getHashBuckets() const {
+  msf::FixedStreamArray<support::ulittle32_t> getHashBuckets() const {
     return HashBuckets;
   }
-  codeview::FixedStreamArray<support::ulittle32_t> getAddressMap() const {
+  msf::FixedStreamArray<support::ulittle32_t> getAddressMap() const {
     return AddressMap;
   }
-  codeview::FixedStreamArray<support::ulittle32_t> getThunkMap() const {
+  msf::FixedStreamArray<support::ulittle32_t> getThunkMap() const {
     return ThunkMap;
   }
-  codeview::FixedStreamArray<SectionOffset> getSectionOffsets() const {
+  msf::FixedStreamArray<SectionOffset> getSectionOffsets() const {
     return SectionOffsets;
   }
 
@@ -56,14 +56,14 @@ public:
 private:
   PDBFile &Pdb;
 
-  std::unique_ptr<MappedBlockStream> Stream;
+  std::unique_ptr<msf::MappedBlockStream> Stream;
   uint32_t NumBuckets = 0;
   ArrayRef<uint8_t> Bitmap;
-  codeview::FixedStreamArray<PSHashRecord> HashRecords;
-  codeview::FixedStreamArray<support::ulittle32_t> HashBuckets;
-  codeview::FixedStreamArray<support::ulittle32_t> AddressMap;
-  codeview::FixedStreamArray<support::ulittle32_t> ThunkMap;
-  codeview::FixedStreamArray<SectionOffset> SectionOffsets;
+  msf::FixedStreamArray<PSHashRecord> HashRecords;
+  msf::FixedStreamArray<support::ulittle32_t> HashBuckets;
+  msf::FixedStreamArray<support::ulittle32_t> AddressMap;
+  msf::FixedStreamArray<support::ulittle32_t> ThunkMap;
+  msf::FixedStreamArray<SectionOffset> SectionOffsets;
 
   const HeaderInfo *Header;
   const GSIHashHeader *HashHdr;
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h
index 8daaf47882d8..af114ff52491 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawConstants.h
@@ -17,6 +17,8 @@
 namespace llvm {
 namespace pdb {
 
+const uint16_t kInvalidStreamIndex = 0xFFFF;
+
 enum PdbRaw_ImplVer : uint32_t {
   PdbImplVC2 = 19941610,
   PdbImplVC4 = 19950623,
@@ -61,6 +63,8 @@ enum SpecialStream : uint32_t {
   StreamTPI = 2,
   StreamDBI = 3,
   StreamIPI = 4,
+
+  kSpecialStreamCount
 };
 
 enum class DbgHeaderType : uint16_t {
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawError.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawError.h
index b0687cddbf48..f96b8066bbe5 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawError.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawError.h
@@ -19,11 +19,14 @@ namespace pdb {
 enum class raw_error_code {
   unspecified = 1,
   feature_unsupported,
+  invalid_format,
   corrupt_file,
   insufficient_buffer,
   no_stream,
   index_out_of_bounds,
   invalid_block_address,
+  duplicate_entry,
+  no_entry,
   not_writable,
   invalid_tpi_hash,
 };
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawSession.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawSession.h
index 73d281eab1a7..5a6c469fcc8e 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawSession.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawSession.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Error.h"
 
 namespace llvm {
@@ -20,7 +21,8 @@ class PDBFile;
 
 class RawSession : public IPDBSession {
 public:
-  explicit RawSession(std::unique_ptr<PDBFile> PdbFile);
+  RawSession(std::unique_ptr<PDBFile> PdbFile,
+             std::unique_ptr<BumpPtrAllocator> Allocator);
   ~RawSession() override;
 
   static Error createFromPdb(StringRef Path,
@@ -68,6 +70,7 @@ public:
 
 private:
   std::unique_ptr<PDBFile> Pdb;
+  std::unique_ptr<BumpPtrAllocator> Allocator;
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h
index afcfe9405c0f..d404b3994dbc 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/RawTypes.h
@@ -80,6 +80,228 @@ struct TypeIndexOffset {
   support::ulittle32_t Offset;
 };
 
+/// Some of the values are stored in bitfields.  Since this needs to be portable
+/// across compilers and architectures (big / little endian in particular) we
+/// can't use the actual structures below, but must instead do the shifting
+/// and masking ourselves.  The struct definitions are provided for reference.
+struct DbiFlags {
+  ///  uint16_t IncrementalLinking : 1; // True if linked incrementally
+  ///  uint16_t IsStripped : 1;         // True if private symbols were
+  ///  stripped.
+  ///  uint16_t HasCTypes : 1;          // True if linked with /debug:ctypes.
+  ///  uint16_t Reserved : 13;
+  static const uint16_t FlagIncrementalMask = 0x0001;
+  static const uint16_t FlagStrippedMask = 0x0002;
+  static const uint16_t FlagHasCTypesMask = 0x0004;
+};
+
+struct DbiBuildNo {
+  ///  uint16_t MinorVersion : 8;
+  ///  uint16_t MajorVersion : 7;
+  ///  uint16_t NewVersionFormat : 1;
+  static const uint16_t BuildMinorMask = 0x00FF;
+  static const uint16_t BuildMinorShift = 0;
+
+  static const uint16_t BuildMajorMask = 0x7F00;
+  static const uint16_t BuildMajorShift = 8;
+};
+
+/// The fixed size header that appears at the beginning of the DBI Stream.
+struct DbiStreamHeader {
+  support::little32_t VersionSignature;
+  support::ulittle32_t VersionHeader;
+
+  /// How "old" is this DBI Stream. Should match the age of the PDB InfoStream.
+  support::ulittle32_t Age;
+
+  /// Global symbol stream #
+  support::ulittle16_t GlobalSymbolStreamIndex;
+
+  /// See DbiBuildNo structure.
+  support::ulittle16_t BuildNumber;
+
+  /// Public symbols stream #
+  support::ulittle16_t PublicSymbolStreamIndex;
+
+  /// version of mspdbNNN.dll
+  support::ulittle16_t PdbDllVersion;
+
+  /// Symbol records stream #
+  support::ulittle16_t SymRecordStreamIndex;
+
+  /// rbld number of mspdbNNN.dll
+  support::ulittle16_t PdbDllRbld;
+
+  /// Size of module info stream
+  support::little32_t ModiSubstreamSize;
+
+  /// Size of sec. contrib stream
+  support::little32_t SecContrSubstreamSize;
+
+  /// Size of sec. map substream
+  support::little32_t SectionMapSize;
+
+  /// Size of file info substream
+  support::little32_t FileInfoSize;
+
+  /// Size of type server map
+  support::little32_t TypeServerSize;
+
+  /// Index of MFC Type Server
+  support::ulittle32_t MFCTypeServerIndex;
+
+  /// Size of DbgHeader info
+  support::little32_t OptionalDbgHdrSize;
+
+  /// Size of EC stream (what is EC?)
+  support::little32_t ECSubstreamSize;
+
+  /// See DbiFlags enum.
+  support::ulittle16_t Flags;
+
+  /// See PDB_MachineType enum.
+  support::ulittle16_t MachineType;
+
+  /// Pad to 64 bytes
+  support::ulittle32_t Reserved;
+};
+static_assert(sizeof(DbiStreamHeader) == 64, "Invalid DbiStreamHeader size!");
+
+struct SectionContribEntry {
+  support::ulittle16_t Section;
+  char Padding1[2];
+  support::little32_t Offset;
+  support::little32_t Size;
+  support::ulittle32_t Characteristics;
+  support::ulittle16_t ModuleIndex;
+  char Padding2[2];
+  support::ulittle32_t DataCrc;
+  support::ulittle32_t RelocCrc;
+};
+
+/// The header preceeding the File Info Substream of the DBI stream.
+struct FileInfoSubstreamHeader {
+  /// Total # of modules, should match number of records in the ModuleInfo
+  /// substream.
+  support::ulittle16_t NumModules;
+
+  /// Total # of source files. This value is not accurate because PDB actually
+  /// supports more than 64k source files, so we ignore it and compute the value
+  /// from other stream fields.
+  support::ulittle16_t NumSourceFiles;
+
+  /// Following this header the File Info Substream is laid out as follows:
+  ///   ulittle16_t ModIndices[NumModules];
+  ///   ulittle16_t ModFileCounts[NumModules];
+  ///   ulittle32_t FileNameOffsets[NumSourceFiles];
+  ///   char Names[][NumSourceFiles];
+  /// with the caveat that `NumSourceFiles` cannot be trusted, so
+  /// it is computed by summing the `ModFileCounts` array.
+};
+
+struct ModInfoFlags {
+  ///  uint16_t fWritten : 1;   // True if ModInfo is dirty
+  ///  uint16_t fECEnabled : 1; // Is EC symbolic info present?  (What is EC?)
+  ///  uint16_t unused : 6;     // Reserved
+  ///  uint16_t iTSM : 8;       // Type Server Index for this module
+  static const uint16_t HasECFlagMask = 0x2;
+
+  static const uint16_t TypeServerIndexMask = 0xFF00;
+  static const uint16_t TypeServerIndexShift = 8;
+};
+
+/// The header preceeding each entry in the Module Info substream of the DBI
+/// stream.
+struct ModuleInfoHeader {
+  /// Currently opened module. This field is a pointer in the reference
+  /// implementation, but that won't work on 64-bit systems, and anyway it
+  /// doesn't make sense to read a pointer from a file. For now it is unused,
+  /// so just ignore it.
+  support::ulittle32_t Mod;
+
+  /// First section contribution of this module.
+  SectionContribEntry SC;
+
+  /// See ModInfoFlags definition.
+  support::ulittle16_t Flags;
+
+  /// Stream Number of module debug info
+  support::ulittle16_t ModDiStream;
+
+  /// Size of local symbol debug info in above stream
+  support::ulittle32_t SymBytes;
+
+  /// Size of line number debug info in above stream
+  support::ulittle32_t LineBytes;
+
+  /// Size of C13 line number info in above stream
+  support::ulittle32_t C13Bytes;
+
+  /// Number of files contributing to this module
+  support::ulittle16_t NumFiles;
+
+  /// Padding so the next field is 4-byte aligned.
+  char Padding1[2];
+
+  /// Array of [0..NumFiles) DBI name buffer offsets.  This field is a pointer
+  /// in the reference implementation, but as with `Mod`, we ignore it for now
+  /// since it is unused.
+  support::ulittle32_t FileNameOffs;
+
+  /// Name Index for src file name
+  support::ulittle32_t SrcFileNameNI;
+
+  /// Name Index for path to compiler PDB
+  support::ulittle32_t PdbFilePathNI;
+
+  /// Following this header are two zero terminated strings.
+  /// char ModuleName[];
+  /// char ObjFileName[];
+};
+
+/// Defines a 128-bit unique identifier.  This maps to a GUID on Windows, but
+/// is abstracted here for the purposes of non-Windows platforms that don't have
+/// the GUID structure defined.
+struct PDB_UniqueId {
+  uint8_t Guid[16];
+};
+
+// The header preceeding the global TPI stream.
+// This corresponds to `HDR` in PDB/dbi/tpi.h.
+struct TpiStreamHeader {
+  struct EmbeddedBuf {
+    support::little32_t Off;
+    support::ulittle32_t Length;
+  };
+
+  support::ulittle32_t Version;
+  support::ulittle32_t HeaderSize;
+  support::ulittle32_t TypeIndexBegin;
+  support::ulittle32_t TypeIndexEnd;
+  support::ulittle32_t TypeRecordBytes;
+
+  // The following members correspond to `TpiHash` in PDB/dbi/tpi.h.
+  support::ulittle16_t HashStreamIndex;
+  support::ulittle16_t HashAuxStreamIndex;
+  support::ulittle32_t HashKeySize;
+  support::ulittle32_t NumHashBuckets;
+
+  EmbeddedBuf HashValueBuffer;
+  EmbeddedBuf IndexOffsetBuffer;
+  EmbeddedBuf HashAdjBuffer;
+};
+
+const uint32_t MinTpiHashBuckets = 0x1000;
+const uint32_t MaxTpiHashBuckets = 0x40000;
+
+/// The header preceeding the global PDB Stream (Stream 1)
+struct InfoStreamHeader {
+  support::ulittle32_t Version;
+  support::ulittle32_t Signature;
+  support::ulittle32_t Age;
+  PDB_UniqueId Guid;
+};
+
 } // namespace pdb
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/SymbolStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/SymbolStream.h
index 685a23411a3b..41d5e6ad64a0 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/SymbolStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/SymbolStream.h
@@ -10,19 +10,20 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBSYMBOLSTREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_PDBSYMBOLSTREAM_H
 
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 
 #include "llvm/Support/Error.h"
 
 namespace llvm {
+namespace msf {
+class MappedBlockStream;
+}
 namespace pdb {
 class PDBFile;
 
 class SymbolStream {
 public:
-  SymbolStream(std::unique_ptr<MappedBlockStream> Stream);
+  SymbolStream(std::unique_ptr<msf::MappedBlockStream> Stream);
   ~SymbolStream();
   Error reload();
 
@@ -33,7 +34,7 @@ public:
 
 private:
   codeview::CVSymbolArray SymbolRecords;
-  std::unique_ptr<MappedBlockStream> Stream;
+  std::unique_ptr<msf::MappedBlockStream> Stream;
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiHashing.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiHashing.h
new file mode 100644
index 000000000000..67a4952fcdfe
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiHashing.h
@@ -0,0 +1,95 @@
+//===- TpiHashing.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_TPIHASHING_H
+#define LLVM_DEBUGINFO_PDB_TPIHASHING_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstdint>
+#include <string>
+
+namespace llvm {
+namespace pdb {
+
+class TpiHashUpdater : public codeview::TypeVisitorCallbacks {
+public:
+  TpiHashUpdater() = default;
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  virtual Error visitKnownRecord(codeview::CVType &CVR,                        \
+                                 codeview::Name##Record &Record) override {    \
+    visitKnownRecordImpl(CVR, Record);                                         \
+    return Error::success();                                                   \
+  }
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+
+private:
+  template <typename RecordKind>
+  void visitKnownRecordImpl(codeview::CVType &CVR, RecordKind &Record) {
+    CVR.Hash = 0;
+  }
+
+  void visitKnownRecordImpl(codeview::CVType &CVR,
+                            codeview::UdtSourceLineRecord &Rec);
+  void visitKnownRecordImpl(codeview::CVType &CVR,
+                            codeview::UdtModSourceLineRecord &Rec);
+  void visitKnownRecordImpl(codeview::CVType &CVR, codeview::ClassRecord &Rec);
+  void visitKnownRecordImpl(codeview::CVType &CVR, codeview::EnumRecord &Rec);
+  void visitKnownRecordImpl(codeview::CVType &CVR, codeview::UnionRecord &Rec);
+};
+
+class TpiHashVerifier : public codeview::TypeVisitorCallbacks {
+public:
+  TpiHashVerifier(msf::FixedStreamArray<support::ulittle32_t> &HashValues,
+                  uint32_t NumHashBuckets)
+      : HashValues(HashValues), NumHashBuckets(NumHashBuckets) {}
+
+  Error visitKnownRecord(codeview::CVType &CVR,
+                         codeview::UdtSourceLineRecord &Rec) override;
+  Error visitKnownRecord(codeview::CVType &CVR,
+                         codeview::UdtModSourceLineRecord &Rec) override;
+  Error visitKnownRecord(codeview::CVType &CVR,
+                         codeview::ClassRecord &Rec) override;
+  Error visitKnownRecord(codeview::CVType &CVR,
+                         codeview::EnumRecord &Rec) override;
+  Error visitKnownRecord(codeview::CVType &CVR,
+                         codeview::UnionRecord &Rec) override;
+  Error visitTypeBegin(codeview::CVType &CVR) override;
+
+private:
+  Error verifySourceLine(codeview::TypeIndex TI);
+
+  Error errorInvalidHash() {
+    return make_error<RawError>(
+        raw_error_code::invalid_tpi_hash,
+        "Type index is 0x" +
+            utohexstr(codeview::TypeIndex::FirstNonSimpleIndex + Index));
+  }
+
+  msf::FixedStreamArray<support::ulittle32_t> HashValues;
+  codeview::CVType RawRecord;
+  uint32_t NumHashBuckets;
+  uint32_t Index = -1;
+};
+
+} // end namespace pdb
+} // end namespace llvm
+
+#endif // LLVM_DEBUGINFO_PDB_TPIHASHING_H
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
index 4f36d70aabed..de21abe4c817 100644
--- a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStream.h
@@ -10,11 +10,9 @@
 #ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
 #define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAM_H
 
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 #include "llvm/Support/raw_ostream.h"
@@ -22,14 +20,18 @@
 #include "llvm/Support/Error.h"
 
 namespace llvm {
+namespace msf {
+class MappedBlockStream;
+}
 namespace pdb {
 class PDBFile;
 
 class TpiStream {
-  struct HeaderInfo;
+  friend class TpiStreamBuilder;
 
 public:
-  TpiStream(const PDBFile &File, std::unique_ptr<MappedBlockStream> Stream);
+  TpiStream(const PDBFile &File,
+            std::unique_ptr<msf::MappedBlockStream> Stream);
   ~TpiStream();
   Error reload();
 
@@ -43,9 +45,9 @@ public:
 
   uint32_t getHashKeySize() const;
   uint32_t NumHashBuckets() const;
-  codeview::FixedStreamArray<support::ulittle32_t> getHashValues() const;
-  codeview::FixedStreamArray<TypeIndexOffset> getTypeIndexOffsets() const;
-  codeview::FixedStreamArray<TypeIndexOffset> getHashAdjustments() const;
+  msf::FixedStreamArray<support::ulittle32_t> getHashValues() const;
+  msf::FixedStreamArray<TypeIndexOffset> getTypeIndexOffsets() const;
+  msf::FixedStreamArray<TypeIndexOffset> getHashAdjustments() const;
 
   iterator_range<codeview::CVTypeArray::Iterator> types(bool *HadError) const;
 
@@ -55,16 +57,16 @@ private:
   Error verifyHashValues();
 
   const PDBFile &Pdb;
-  std::unique_ptr<MappedBlockStream> Stream;
+  std::unique_ptr<msf::MappedBlockStream> Stream;
 
   codeview::CVTypeArray TypeRecords;
 
-  std::unique_ptr<MappedBlockStream> HashStream;
-  codeview::FixedStreamArray<support::ulittle32_t> HashValues;
-  codeview::FixedStreamArray<TypeIndexOffset> TypeIndexOffsets;
-  codeview::FixedStreamArray<TypeIndexOffset> HashAdjustments;
+  std::unique_ptr<msf::ReadableStream> HashStream;
+  msf::FixedStreamArray<support::ulittle32_t> HashValues;
+  msf::FixedStreamArray<TypeIndexOffset> TypeIndexOffsets;
+  msf::FixedStreamArray<TypeIndexOffset> HashAdjustments;
 
-  const HeaderInfo *Header;
+  const TpiStreamHeader *Header;
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h
new file mode 100644
index 000000000000..f9a642126f53
--- /dev/null
+++ b/contrib/llvm/include/llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h
@@ -0,0 +1,82 @@
+//===- TpiStreamBuilder.h - PDB Tpi Stream Creation -------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAMBUILDER_H
+#define LLVM_DEBUGINFO_PDB_RAW_PDBTPISTREAMBUILDER_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/SequencedItemStream.h"
+#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
+
+#include <vector>
+
+namespace llvm {
+namespace codeview {
+class TypeRecord;
+}
+namespace msf {
+class ByteStream;
+class MSFBuilder;
+struct MSFLayout;
+class ReadableStreamRef;
+class WritableStream;
+
+template <> struct SequencedItemTraits<llvm::codeview::CVType> {
+  static size_t length(const codeview::CVType &Item) { return Item.length(); }
+  static ArrayRef<uint8_t> bytes(const codeview::CVType &Item) {
+    return Item.data();
+  }
+};
+}
+namespace pdb {
+class PDBFile;
+class TpiStream;
+struct TpiStreamHeader;
+
+class TpiStreamBuilder {
+public:
+  explicit TpiStreamBuilder(msf::MSFBuilder &Msf, uint32_t StreamIdx);
+  ~TpiStreamBuilder();
+
+  TpiStreamBuilder(const TpiStreamBuilder &) = delete;
+  TpiStreamBuilder &operator=(const TpiStreamBuilder &) = delete;
+
+  void setVersionHeader(PdbRaw_TpiVer Version);
+  void addTypeRecord(const codeview::CVType &Record);
+
+  Error finalizeMsfLayout();
+
+  Error commit(const msf::MSFLayout &Layout, const msf::WritableStream &Buffer);
+
+  uint32_t calculateSerializedLength() const;
+
+private:
+  uint32_t calculateHashBufferSize() const;
+  Error finalize();
+
+  msf::MSFBuilder &Msf;
+  BumpPtrAllocator &Allocator;
+
+  Optional<PdbRaw_TpiVer> VerHeader;
+  std::vector<codeview::CVType> TypeRecords;
+  msf::SequencedItemStream<codeview::CVType> TypeRecordStream;
+  uint32_t HashStreamIndex = kInvalidStreamIndex;
+  std::unique_ptr<msf::ByteStream> HashValueStream;
+
+  const TpiStreamHeader *Header;
+  uint32_t Idx;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Demangle/Demangle.h b/contrib/llvm/include/llvm/Demangle/Demangle.h
new file mode 100644
index 000000000000..d2eb56b39f9b
--- /dev/null
+++ b/contrib/llvm/include/llvm/Demangle/Demangle.h
@@ -0,0 +1,28 @@
+//===--- Demangle.h ---------------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include <cstddef>
+
+namespace llvm {
+/// This is a llvm local version of __cxa_demangle. Other than the name and
+/// being in the llvm namespace it is identical.
+///
+/// The mangled_name is demangled into buf and returned. If the buffer is not
+/// large enough, realloc is used to expand it.
+///
+/// The *status will be set to
+///   unknown_error: -4
+///   invalid_args:  -3
+///   invalid_mangled_name: -2
+///   memory_alloc_failure: -1
+///   success: 0
+
+char *itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
+                      int *status);
+}
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
index ab13028b3ae0..f68337c43271 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/ExecutionEngine.h
@@ -137,13 +137,13 @@ protected:
                                 std::unique_ptr<Module> M,
                                 std::string *ErrorStr,
                                 std::shared_ptr<MCJITMemoryManager> MM,
-                                std::shared_ptr<RuntimeDyld::SymbolResolver> SR,
+                                std::shared_ptr<JITSymbolResolver> SR,
                                 std::unique_ptr<TargetMachine> TM);
 
   static ExecutionEngine *(*OrcMCJITReplacementCtor)(
                                 std::string *ErrorStr,
                                 std::shared_ptr<MCJITMemoryManager> MM,
-                                std::shared_ptr<RuntimeDyld::SymbolResolver> SR,
+                                std::shared_ptr<JITSymbolResolver> SR,
                                 std::unique_ptr<TargetMachine> TM);
 
   static ExecutionEngine *(*InterpCtor)(std::unique_ptr<Module> M,
@@ -198,22 +198,33 @@ public:
 
   const DataLayout &getDataLayout() const { return DL; }
 
-  /// removeModule - Remove a Module from the list of modules.  Returns true if
-  /// M is found.
+  /// removeModule - Removes a Module from the list of modules, but does not
+  /// free the module's memory. Returns true if M is found, in which case the
+  /// caller assumes responsibility for deleting the module.
+  //
+  // FIXME: This stealth ownership transfer is horrible. This will probably be
+  //        fixed by deleting ExecutionEngine.
   virtual bool removeModule(Module *M);
 
   /// FindFunctionNamed - Search all of the active modules to find the function that
   /// defines FnName.  This is very slow operation and shouldn't be used for
   /// general code.
-  virtual Function *FindFunctionNamed(const char *FnName);
+  virtual Function *FindFunctionNamed(StringRef FnName);
 
   /// FindGlobalVariableNamed - Search all of the active modules to find the global variable
   /// that defines Name.  This is very slow operation and shouldn't be used for
   /// general code.
-  virtual GlobalVariable *FindGlobalVariableNamed(const char *Name, bool AllowInternal = false);
+  virtual GlobalVariable *FindGlobalVariableNamed(StringRef Name, bool AllowInternal = false);
 
   /// runFunction - Execute the specified function with the specified arguments,
   /// and return the result.
+  ///
+  /// For MCJIT execution engines, clients are encouraged to use the
+  /// "GetFunctionAddress" method (rather than runFunction) and cast the
+  /// returned uint64_t to the desired function pointer type. However, for
+  /// backwards compatibility MCJIT's implementation can execute 'main-like'
+  /// function (i.e. those returning void or int, and taking either no
+  /// arguments or (int, char*[])).
   virtual GenericValue runFunction(Function *F,
                                    ArrayRef<GenericValue> ArgValues) = 0;
 
@@ -516,7 +527,7 @@ private:
   std::string *ErrorStr;
   CodeGenOpt::Level OptLevel;
   std::shared_ptr<MCJITMemoryManager> MemMgr;
-  std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver;
+  std::shared_ptr<JITSymbolResolver> Resolver;
   TargetOptions Options;
   Optional<Reloc::Model> RelocModel;
   CodeModel::Model CMModel;
@@ -555,7 +566,7 @@ public:
   setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM);
 
   EngineBuilder&
-  setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR);
+  setSymbolResolver(std::unique_ptr<JITSymbolResolver> SR);
 
   /// setErrorStr - Set the error string to write to on error.  This option
   /// defaults to NULL.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
index be7c0448bb78..94ec4e36a199 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/JITEventListener.h
@@ -18,18 +18,18 @@
 #include "RuntimeDyld.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/Support/DataTypes.h"
+#include <cstdint>
 #include <vector>
 
 namespace llvm {
-class Function;
+
+class IntelJITEventsWrapper;
 class MachineFunction;
 class OProfileWrapper;
-class IntelJITEventsWrapper;
 
 namespace object {
   class ObjectFile;
-}
+} // end namespace object
 
 /// JITEvent_EmittedFunctionDetails - Helper struct for containing information
 /// about a generated machine code function.
@@ -60,8 +60,8 @@ public:
   typedef JITEvent_EmittedFunctionDetails EmittedFunctionDetails;
 
 public:
-  JITEventListener() {}
-  virtual ~JITEventListener() {}
+  JITEventListener() = default;
+  virtual ~JITEventListener() = default;
 
   /// NotifyObjectEmitted - Called after an object has been successfully
   /// emitted to memory.  NotifyFunctionEmitted will not be called for
@@ -81,7 +81,7 @@ public:
   // Get a pointe to the GDB debugger registration listener.
   static JITEventListener *createGDBRegistrationListener();
 
-#if defined(LLVM_USE_INTEL_JITEVENTS) && LLVM_USE_INTEL_JITEVENTS
+#if LLVM_USE_INTEL_JITEVENTS
   // Construct an IntelJITEventListener
   static JITEventListener *createIntelJITEventListener();
 
@@ -97,7 +97,7 @@ public:
   }
 #endif // USE_INTEL_JITEVENTS
 
-#if defined(LLVM_USE_OPROFILE) && LLVM_USE_OPROFILE
+#if LLVM_USE_OPROFILE
   // Construct an OProfileJITEventListener
   static JITEventListener *createOProfileJITEventListener();
 
@@ -105,7 +105,6 @@ public:
   static JITEventListener *createOProfileJITEventListener(
                                       OProfileWrapper* AlternativeImpl);
 #else
-
   static JITEventListener *createOProfileJITEventListener() { return nullptr; }
 
   static JITEventListener *createOProfileJITEventListener(
@@ -113,10 +112,11 @@ public:
     return nullptr;
   }
 #endif // USE_OPROFILE
+
 private:
   virtual void anchor();
 };
 
-} // end namespace llvm.
+} // end namespace llvm
 
-#endif // defined LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H
+#endif // LLVM_EXECUTIONENGINE_JITEVENTLISTENER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
new file mode 100644
index 000000000000..88929482ce76
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -0,0 +1,197 @@
+//===----------- JITSymbol.h - JIT symbol abstraction -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Abstraction for target process addresses.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_JITSYMBOL_H
+#define LLVM_EXECUTIONENGINE_JITSYMBOL_H
+
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <string>
+
+namespace llvm {
+
+class GlobalValue;
+
+namespace object {
+  class BasicSymbolRef;
+} // end namespace object
+
+/// @brief Represents an address in the target process's address space.
+typedef uint64_t JITTargetAddress;
+
+/// @brief Flags for symbols in the JIT.
+class JITSymbolFlags {
+public:
+  typedef uint8_t UnderlyingType;
+
+  enum FlagNames : UnderlyingType {
+    None = 0,
+    Weak = 1U << 0,
+    Common = 1U << 1,
+    Absolute = 1U << 2,
+    Exported = 1U << 3
+  };
+
+  /// @brief Default-construct a JITSymbolFlags instance.
+  JITSymbolFlags() : Flags(None) {}
+
+  /// @brief Construct a JITSymbolFlags instance from the given flags.
+  JITSymbolFlags(FlagNames Flags) : Flags(Flags) {}
+
+  /// @brief Returns true is the Weak flag is set.
+  bool isWeak() const {
+    return (Flags & Weak) == Weak;
+  }
+
+  /// @brief Returns true is the Weak flag is set.
+  bool isCommon() const {
+    return (Flags & Common) == Common;
+  }
+
+  bool isStrongDefinition() const {
+    return !isWeak() && !isCommon();
+  }
+
+  /// @brief Returns true is the Weak flag is set.
+  bool isExported() const {
+    return (Flags & Exported) == Exported;
+  }
+
+  operator UnderlyingType&() { return Flags; }
+
+  /// Construct a JITSymbolFlags value based on the flags of the given global
+  /// value.
+  static JITSymbolFlags fromGlobalValue(const GlobalValue &GV);
+
+  /// Construct a JITSymbolFlags value based on the flags of the given libobject
+  /// symbol.
+  static JITSymbolFlags fromObjectSymbol(const object::BasicSymbolRef &Symbol);
+
+private:
+  UnderlyingType Flags;
+};
+
+/// @brief Represents a symbol that has been evaluated to an address already.
+class JITEvaluatedSymbol {
+public:
+  /// @brief Create a 'null' symbol.
+  JITEvaluatedSymbol(std::nullptr_t)
+      : Address(0) {}
+
+  /// @brief Create a symbol for the given address and flags.
+  JITEvaluatedSymbol(JITTargetAddress Address, JITSymbolFlags Flags)
+      : Address(Address), Flags(Flags) {}
+
+  /// @brief An evaluated symbol converts to 'true' if its address is non-zero.
+  explicit operator bool() const { return Address != 0; }
+
+  /// @brief Return the address of this symbol.
+  JITTargetAddress getAddress() const { return Address; }
+
+  /// @brief Return the flags for this symbol.
+  JITSymbolFlags getFlags() const { return Flags; }
+
+private:
+  JITTargetAddress Address;
+  JITSymbolFlags Flags;
+};
+
+/// @brief Represents a symbol in the JIT.
+class JITSymbol {
+public:
+  typedef std::function<JITTargetAddress()> GetAddressFtor;
+
+  /// @brief Create a 'null' symbol that represents failure to find a symbol
+  ///        definition.
+  JITSymbol(std::nullptr_t)
+      : CachedAddr(0) {}
+
+  /// @brief Create a symbol for a definition with a known address.
+  JITSymbol(JITTargetAddress Addr, JITSymbolFlags Flags)
+      : CachedAddr(Addr), Flags(Flags) {}
+
+  /// @brief Construct a JITSymbol from a JITEvaluatedSymbol.
+  JITSymbol(JITEvaluatedSymbol Sym)
+      : CachedAddr(Sym.getAddress()), Flags(Sym.getFlags()) {}
+
+  /// @brief Create a symbol for a definition that doesn't have a known address
+  ///        yet.
+  /// @param GetAddress A functor to materialize a definition (fixing the
+  ///        address) on demand.
+  ///
+  ///   This constructor allows a JIT layer to provide a reference to a symbol
+  /// definition without actually materializing the definition up front. The
+  /// user can materialize the definition at any time by calling the getAddress
+  /// method.
+  JITSymbol(GetAddressFtor GetAddress, JITSymbolFlags Flags)
+      : GetAddress(std::move(GetAddress)), CachedAddr(0), Flags(Flags) {}
+
+  /// @brief Returns true if the symbol exists, false otherwise.
+  explicit operator bool() const { return CachedAddr || GetAddress; }
+
+  /// @brief Get the address of the symbol in the target address space. Returns
+  ///        '0' if the symbol does not exist.
+  JITTargetAddress getAddress() {
+    if (GetAddress) {
+      CachedAddr = GetAddress();
+      assert(CachedAddr && "Symbol could not be materialized.");
+      GetAddress = nullptr;
+    }
+    return CachedAddr;
+  }
+
+  JITSymbolFlags getFlags() const { return Flags; }
+
+private:
+  GetAddressFtor GetAddress;
+  JITTargetAddress CachedAddr;
+  JITSymbolFlags Flags;
+};
+
+/// \brief Symbol resolution.
+class JITSymbolResolver {
+public:
+  virtual ~JITSymbolResolver() = default;
+
+  /// This method returns the address of the specified symbol if it exists
+  /// within the logical dynamic library represented by this JITSymbolResolver.
+  /// Unlike findSymbol, queries through this interface should return addresses
+  /// for hidden symbols.
+  ///
+  /// This is of particular importance for the Orc JIT APIs, which support lazy
+  /// compilation by breaking up modules: Each of those broken out modules
+  /// must be able to resolve hidden symbols provided by the others. Clients
+  /// writing memory managers for MCJIT can usually ignore this method.
+  ///
+  /// This method will be queried by RuntimeDyld when checking for previous
+  /// definitions of common symbols.
+  virtual JITSymbol findSymbolInLogicalDylib(const std::string &Name) = 0;
+
+  /// This method returns the address of the specified function or variable.
+  /// It is used to resolve symbols during module linking.
+  ///
+  /// If the returned symbol's address is equal to ~0ULL then RuntimeDyld will
+  /// skip all relocations for that symbol, and the client will be responsible
+  /// for handling them manually.
+  virtual JITSymbol findSymbol(const std::string &Name) = 0;
+
+private:
+  virtual void anchor();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_JITSYMBOL_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbolFlags.h b/contrib/llvm/include/llvm/ExecutionEngine/JITSymbolFlags.h
deleted file mode 100644
index 7e1d57dabc81..000000000000
--- a/contrib/llvm/include/llvm/ExecutionEngine/JITSymbolFlags.h
+++ /dev/null
@@ -1,91 +0,0 @@
-//===------ JITSymbolFlags.h - Flags for symbols in the JIT -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Symbol flags for symbols in the JIT (e.g. weak, exported).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_JITSYMBOLFLAGS_H
-#define LLVM_EXECUTIONENGINE_JITSYMBOLFLAGS_H
-
-#include "llvm/IR/GlobalValue.h"
-#include "llvm/Object/SymbolicFile.h"
-
-namespace llvm {
-
-/// @brief Flags for symbols in the JIT.
-enum class JITSymbolFlags : char {
-  None = 0,
-  Weak = 1U << 0,
-  Exported = 1U << 1
-};
-
-inline JITSymbolFlags operator|(JITSymbolFlags LHS, JITSymbolFlags RHS) {
-  typedef std::underlying_type<JITSymbolFlags>::type UT;
-  return static_cast<JITSymbolFlags>(
-           static_cast<UT>(LHS) | static_cast<UT>(RHS));
-}
-
-inline JITSymbolFlags& operator |=(JITSymbolFlags &LHS, JITSymbolFlags RHS) {
-  LHS = LHS | RHS;
-  return LHS;
-}
-
-inline JITSymbolFlags operator&(JITSymbolFlags LHS, JITSymbolFlags RHS) {
-  typedef std::underlying_type<JITSymbolFlags>::type UT;
-  return static_cast<JITSymbolFlags>(
-           static_cast<UT>(LHS) & static_cast<UT>(RHS));
-}
-
-inline JITSymbolFlags& operator &=(JITSymbolFlags &LHS, JITSymbolFlags RHS) {
-  LHS = LHS & RHS;
-  return LHS;
-}
-
-/// @brief Base class for symbols in the JIT.
-class JITSymbolBase {
-public:
-  JITSymbolBase(JITSymbolFlags Flags) : Flags(Flags) {}
-
-  JITSymbolFlags getFlags() const { return Flags; }
-
-  bool isWeak() const {
-    return (Flags & JITSymbolFlags::Weak) == JITSymbolFlags::Weak;
-  }
-
-  bool isExported() const {
-    return (Flags & JITSymbolFlags::Exported) == JITSymbolFlags::Exported;
-  }
-
-  static JITSymbolFlags flagsFromGlobalValue(const GlobalValue &GV) {
-    JITSymbolFlags Flags = JITSymbolFlags::None;
-    if (GV.hasWeakLinkage())
-      Flags |= JITSymbolFlags::Weak;
-    if (!GV.hasLocalLinkage() && !GV.hasHiddenVisibility())
-      Flags |= JITSymbolFlags::Exported;
-    return Flags;
-  }
-
-  static JITSymbolFlags
-  flagsFromObjectSymbol(const object::BasicSymbolRef &Symbol) {
-    JITSymbolFlags Flags = JITSymbolFlags::None;
-    if (Symbol.getFlags() & object::BasicSymbolRef::SF_Weak)
-      Flags |= JITSymbolFlags::Weak;
-    if (Symbol.getFlags() & object::BasicSymbolRef::SF_Exported)
-      Flags |= JITSymbolFlags::Exported;
-    return Flags;
-  }
-
-private:
-  JITSymbolFlags Flags;
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h b/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h
index cc01a4e58999..077044408e09 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/ObjectCache.h
@@ -1,4 +1,4 @@
-//===-- ObjectCache.h - Class definition for the ObjectCache -----C++ -*-===//
+//===-- ObjectCache.h - Class definition for the ObjectCache ----*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,6 +11,7 @@
 #define LLVM_EXECUTIONENGINE_OBJECTCACHE_H
 
 #include "llvm/Support/MemoryBuffer.h"
+#include <memory>
 
 namespace llvm {
 
@@ -21,10 +22,11 @@ class Module;
 /// have already been compiled and an object file is available.
 class ObjectCache {
   virtual void anchor();
+
 public:
-  ObjectCache() { }
+  ObjectCache() = default;
 
-  virtual ~ObjectCache() { }
+  virtual ~ObjectCache() = default;
 
   /// notifyObjectCompiled - Provides a pointer to compiled code for Module M.
   virtual void notifyObjectCompiled(const Module *M, MemoryBufferRef Obj) = 0;
@@ -35,6 +37,6 @@ public:
   virtual std::unique_ptr<MemoryBuffer> getObject(const Module* M) = 0;
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_EXECUTIONENGINE_OBJECTCACHE_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/ObjectMemoryBuffer.h b/contrib/llvm/include/llvm/ExecutionEngine/ObjectMemoryBuffer.h
index b07561152ec0..0f00ad006a7d 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/ObjectMemoryBuffer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/ObjectMemoryBuffer.h
@@ -49,7 +49,7 @@ public:
     init(this->SV.begin(), this->SV.end(), false);
   }
 
-  const char* getBufferIdentifier() const override { return BufferName.c_str(); }
+  StringRef getBufferIdentifier() const override { return BufferName; }
 
   BufferKind getBufferKind() const override { return MemoryBuffer_Malloc; }
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index ef88dd03ad4f..aa096478cd9e 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -15,16 +15,35 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
 
-#include "IndirectionUtils.h"
-#include "LambdaResolver.h"
-#include "LogicalDylib.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <functional>
+#include <iterator>
 #include <list>
 #include <memory>
 #include <set>
+#include <string>
 #include <utility>
+#include <vector>
 
 namespace llvm {
 namespace orc {
@@ -41,11 +60,11 @@ template <typename BaseLayerT,
           typename IndirectStubsMgrT = IndirectStubsManager>
 class CompileOnDemandLayer {
 private:
-
   template <typename MaterializerFtor>
   class LambdaMaterializer final : public ValueMaterializer {
   public:
     LambdaMaterializer(MaterializerFtor M) : M(std::move(M)) {}
+
     Value *materialize(Value *V) final { return M(V); }
 
   private:
@@ -67,7 +86,8 @@ private:
     ResourceOwner() = default;
     ResourceOwner(const ResourceOwner&) = delete;
     ResourceOwner& operator=(const ResourceOwner&) = delete;
-    virtual ~ResourceOwner() { }
+    virtual ~ResourceOwner() = default;
+
     virtual ResourceT& getResource() const = 0;
   };
 
@@ -76,7 +96,9 @@ private:
   public:
     ResourceOwnerImpl(ResourcePtrT ResourcePtr)
       : ResourcePtr(std::move(ResourcePtr)) {}
+
     ResourceT& getResource() const override { return *ResourcePtr; }
+
   private:
     ResourcePtrT ResourcePtr;
   };
@@ -88,76 +110,80 @@ private:
     return llvm::make_unique<RO>(std::move(ResourcePtr));
   }
 
-  struct LogicalModuleResources {
-    std::unique_ptr<ResourceOwner<Module>> SourceModule;
-    std::set<const Function*> StubsToClone;
-    std::unique_ptr<IndirectStubsMgrT> StubsMgr;
-
-    LogicalModuleResources() = default;
-
-    // Explicit move constructor to make MSVC happy.
-    LogicalModuleResources(LogicalModuleResources &&Other)
-        : SourceModule(std::move(Other.SourceModule)),
-          StubsToClone(std::move(Other.StubsToClone)),
-          StubsMgr(std::move(Other.StubsMgr)) {}
-
-    // Explicit move assignment to make MSVC happy.
-    LogicalModuleResources& operator=(LogicalModuleResources &&Other) {
-      SourceModule = std::move(Other.SourceModule);
-      StubsToClone = std::move(Other.StubsToClone);
-      StubsMgr = std::move(Other.StubsMgr);
-      return *this;
-    }
-
-    JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
-      if (Name.endswith("$stub_ptr") && !ExportedSymbolsOnly) {
-        assert(!ExportedSymbolsOnly && "Stubs are never exported");
-        return StubsMgr->findPointer(Name.drop_back(9));
-      }
-      return StubsMgr->findStub(Name, ExportedSymbolsOnly);
+  class StaticGlobalRenamer {
+  public:
+    StaticGlobalRenamer() = default;
+    StaticGlobalRenamer(StaticGlobalRenamer &&) = default;
+    StaticGlobalRenamer &operator=(StaticGlobalRenamer &&) = default;
+
+    void rename(Module &M) {
+      for (auto &F : M)
+        if (F.hasLocalLinkage())
+          F.setName("$static." + Twine(NextId++));
+      for (auto &G : M.globals())
+        if (G.hasLocalLinkage())
+          G.setName("$static." + Twine(NextId++));
     }
 
+  private:
+    unsigned NextId = 0;
   };
 
-  struct LogicalDylibResources {
-    typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)>
-      SymbolResolverFtor;
+  struct LogicalDylib {
+    typedef std::function<JITSymbol(const std::string&)> SymbolResolverFtor;
 
     typedef std::function<typename BaseLayerT::ModuleSetHandleT(
                             BaseLayerT&,
                             std::unique_ptr<Module>,
-                            std::unique_ptr<RuntimeDyld::SymbolResolver>)>
+                            std::unique_ptr<JITSymbolResolver>)>
       ModuleAdderFtor;
 
-    LogicalDylibResources() = default;
+    struct SourceModuleEntry {
+      std::unique_ptr<ResourceOwner<Module>> SourceMod;
+      std::set<Function*> StubsToClone;
+    };
 
-    // Explicit move constructor to make MSVC happy.
-    LogicalDylibResources(LogicalDylibResources &&Other)
-      : ExternalSymbolResolver(std::move(Other.ExternalSymbolResolver)),
-        MemMgr(std::move(Other.MemMgr)),
-        ModuleAdder(std::move(Other.ModuleAdder)) {}
+    typedef std::vector<SourceModuleEntry> SourceModulesList;
+    typedef typename SourceModulesList::size_type SourceModuleHandle;
 
-    // Explicit move assignment operator to make MSVC happy.
-    LogicalDylibResources& operator=(LogicalDylibResources &&Other) {
-      ExternalSymbolResolver = std::move(Other.ExternalSymbolResolver);
-      MemMgr = std::move(Other.MemMgr);
-      ModuleAdder = std::move(Other.ModuleAdder);
-      return *this;
+    SourceModuleHandle
+    addSourceModule(std::unique_ptr<ResourceOwner<Module>> M) {
+      SourceModuleHandle H = SourceModules.size();
+      SourceModules.push_back(SourceModuleEntry());
+      SourceModules.back().SourceMod = std::move(M);
+      return H;
     }
 
-    std::unique_ptr<RuntimeDyld::SymbolResolver> ExternalSymbolResolver;
+    Module& getSourceModule(SourceModuleHandle H) {
+      return SourceModules[H].SourceMod->getResource();
+    }
+
+    std::set<Function*>& getStubsToClone(SourceModuleHandle H) {
+      return SourceModules[H].StubsToClone;
+    }
+
+    JITSymbol findSymbol(BaseLayerT &BaseLayer, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
+      if (auto Sym = StubsMgr->findStub(Name, ExportedSymbolsOnly))
+        return Sym;
+      for (auto BLH : BaseLayerHandles)
+        if (auto Sym = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
+          return Sym;
+      return nullptr;
+    }
+
+    std::unique_ptr<JITSymbolResolver> ExternalSymbolResolver;
     std::unique_ptr<ResourceOwner<RuntimeDyld::MemoryManager>> MemMgr;
+    std::unique_ptr<IndirectStubsMgrT> StubsMgr;
+    StaticGlobalRenamer StaticRenamer;
     ModuleAdderFtor ModuleAdder;
+    SourceModulesList SourceModules;
+    std::vector<BaseLayerModuleSetHandleT> BaseLayerHandles;
   };
 
-  typedef LogicalDylib<BaseLayerT, LogicalModuleResources,
-                       LogicalDylibResources> CODLogicalDylib;
-
-  typedef typename CODLogicalDylib::LogicalModuleHandle LogicalModuleHandle;
-  typedef std::list<CODLogicalDylib> LogicalDylibList;
+  typedef std::list<LogicalDylib> LogicalDylibList;
 
 public:
-
   /// @brief Handle to a set of loaded modules.
   typedef typename LogicalDylibList::iterator ModuleSetHandleT;
 
@@ -185,18 +211,17 @@ public:
                                 MemoryManagerPtrT MemMgr,
                                 SymbolResolverPtrT Resolver) {
 
-    LogicalDylibs.push_back(CODLogicalDylib(BaseLayer));
-    auto &LDResources = LogicalDylibs.back().getDylibResources();
-
-    LDResources.ExternalSymbolResolver = std::move(Resolver);
+    LogicalDylibs.push_back(LogicalDylib());
+    auto &LD = LogicalDylibs.back();
+    LD.ExternalSymbolResolver = std::move(Resolver);
+    LD.StubsMgr = CreateIndirectStubsManager();
 
     auto &MemMgrRef = *MemMgr;
-    LDResources.MemMgr =
-      wrapOwnership<RuntimeDyld::MemoryManager>(std::move(MemMgr));
+    LD.MemMgr = wrapOwnership<RuntimeDyld::MemoryManager>(std::move(MemMgr));
 
-    LDResources.ModuleAdder =
+    LD.ModuleAdder =
       [&MemMgrRef](BaseLayerT &B, std::unique_ptr<Module> M,
-                   std::unique_ptr<RuntimeDyld::SymbolResolver> R) {
+                   std::unique_ptr<JITSymbolResolver> R) {
         std::vector<std::unique_ptr<Module>> Ms;
         Ms.push_back(std::move(M));
         return B.addModuleSet(std::move(Ms), &MemMgrRef, std::move(R));
@@ -223,9 +248,12 @@ public:
   /// @return A handle for the given named symbol, if it exists.
   JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
     for (auto LDI = LogicalDylibs.begin(), LDE = LogicalDylibs.end();
-         LDI != LDE; ++LDI)
-      if (auto Symbol = findSymbolIn(LDI, Name, ExportedSymbolsOnly))
-        return Symbol;
+         LDI != LDE; ++LDI) {
+      if (auto Sym = LDI->StubsMgr->findStub(Name, ExportedSymbolsOnly))
+        return Sym;
+      if (auto Sym = findSymbolIn(LDI, Name, ExportedSymbolsOnly))
+        return Sym;
+    }
     return BaseLayer.findSymbol(Name, ExportedSymbolsOnly);
   }
 
@@ -233,25 +261,50 @@ public:
   ///        below this one.
   JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name,
                          bool ExportedSymbolsOnly) {
-    return H->findSymbol(Name, ExportedSymbolsOnly);
+    return H->findSymbol(BaseLayer, Name, ExportedSymbolsOnly);
   }
 
-private:
+  /// @brief Update the stub for the given function to point at FnBodyAddr.
+  /// This can be used to support re-optimization.
+  /// @return true if the function exists and the stub is updated, false
+  ///         otherwise.
+  //
+  // FIXME: We should track and free associated resources (unused compile
+  //        callbacks, uncompiled IR, and no-longer-needed/reachable function
+  //        implementations).
+  // FIXME: Return Error once the JIT APIs are Errorized.
+  bool updatePointer(std::string FuncName, JITTargetAddress FnBodyAddr) {
+    //Find out which logical dylib contains our symbol
+    auto LDI = LogicalDylibs.begin();
+    for (auto LDE = LogicalDylibs.end(); LDI != LDE; ++LDI) {
+      if (auto LMResources = LDI->getLogicalModuleResourcesForSymbol(FuncName, false)) {
+        Module &SrcM = LMResources->SourceModule->getResource();
+        std::string CalledFnName = mangle(FuncName, SrcM.getDataLayout());
+        if (auto EC = LMResources->StubsMgr->updatePointer(CalledFnName, FnBodyAddr))
+          return false;
+        else
+          return true;
+      }
+    }
+    return false;
+  }
 
+private:
   template <typename ModulePtrT>
-  void addLogicalModule(CODLogicalDylib &LD, ModulePtrT SrcMPtr) {
+  void addLogicalModule(LogicalDylib &LD, ModulePtrT SrcMPtr) {
+
+    // Rename all static functions / globals to $static.X :
+    // This will unique the names across all modules in the logical dylib,
+    // simplifying symbol lookup.
+    LD.StaticRenamer.rename(*SrcMPtr);
 
     // Bump the linkage and rename any anonymous/privote members in SrcM to
     // ensure that everything will resolve properly after we partition SrcM.
     makeAllSymbolsExternallyAccessible(*SrcMPtr);
 
     // Create a logical module handle for SrcM within the logical dylib.
-    auto LMH = LD.createLogicalModule();
-    auto &LMResources =  LD.getLogicalModuleResources(LMH);
-
-    LMResources.SourceModule = wrapOwnership<Module>(std::move(SrcMPtr));
-
-    Module &SrcM = LMResources.SourceModule->getResource();
+    Module &SrcM = *SrcMPtr;
+    auto LMId = LD.addSourceModule(wrapOwnership<Module>(std::move(SrcMPtr)));
 
     // Create stub functions.
     const DataLayout &DL = SrcM.getDataLayout();
@@ -262,34 +315,40 @@ private:
         if (F.isDeclaration())
           continue;
 
+        // Skip weak functions for which we already have definitions.
+        auto MangledName = mangle(F.getName(), DL);
+        if (F.hasWeakLinkage() || F.hasLinkOnceLinkage())
+          if (auto Sym = LD.findSymbol(BaseLayer, MangledName, false))
+            continue;
+
         // Record all functions defined by this module.
         if (CloneStubsIntoPartitions)
-          LMResources.StubsToClone.insert(&F);
+          LD.getStubsToClone(LMId).insert(&F);
 
         // Create a callback, associate it with the stub for the function,
         // and set the compile action to compile the partition containing the
         // function.
         auto CCInfo = CompileCallbackMgr.getCompileCallback();
-        StubInits[mangle(F.getName(), DL)] =
+        StubInits[MangledName] =
           std::make_pair(CCInfo.getAddress(),
-                         JITSymbolBase::flagsFromGlobalValue(F));
-        CCInfo.setCompileAction([this, &LD, LMH, &F]() {
-          return this->extractAndCompile(LD, LMH, F);
+                         JITSymbolFlags::fromGlobalValue(F));
+        CCInfo.setCompileAction([this, &LD, LMId, &F]() {
+          return this->extractAndCompile(LD, LMId, F);
         });
       }
 
-      LMResources.StubsMgr = CreateIndirectStubsManager();
-      auto EC = LMResources.StubsMgr->createStubs(StubInits);
+      auto EC = LD.StubsMgr->createStubs(StubInits);
       (void)EC;
       // FIXME: This should be propagated back to the user. Stub creation may
       //        fail for remote JITs.
       assert(!EC && "Error generating stubs");
     }
 
-    // If this module doesn't contain any globals or aliases we can bail out
-    // early and avoid the overhead of creating and managing an empty globals
-    // module.
-    if (SrcM.global_empty() && SrcM.alias_empty())
+    // If this module doesn't contain any globals, aliases, or module flags then
+    // we can bail out early and avoid the overhead of creating and managing an
+    // empty globals module.
+    if (SrcM.global_empty() && SrcM.alias_empty() &&
+        !SrcM.getModuleFlagsMetadata())
       return;
 
     // Create the GlobalValues module.
@@ -309,12 +368,15 @@ private:
       if (!VMap.count(&A))
         cloneGlobalAliasDecl(*GVsM, A, VMap);
 
+    // Clone the module flags.
+    cloneModuleFlagsMetadata(*GVsM, SrcM, VMap);
+
     // Now we need to clone the GV and alias initializers.
 
     // Initializers may refer to functions declared (but not defined) in this
     // module. Build a materializer to clone decls on demand.
     auto Materializer = createLambdaMaterializer(
-      [this, &GVsM, &LMResources](Value *V) -> Value* {
+      [this, &LD, &GVsM](Value *V) -> Value* {
         if (auto *F = dyn_cast<Function>(V)) {
           // Decls in the original module just get cloned.
           if (F->isDeclaration())
@@ -325,7 +387,7 @@ private:
           // instead.
           const DataLayout &DL = GVsM->getDataLayout();
           std::string FName = mangle(F->getName(), DL);
-          auto StubSym = LMResources.StubsMgr->findStub(FName, false);
+          auto StubSym = LD.StubsMgr->findStub(FName, false);
           unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(F->getType());
           ConstantInt *StubAddr =
             ConstantInt::get(GVsM->getContext(),
@@ -357,21 +419,20 @@ private:
 
     // Build a resolver for the globals module and add it to the base layer.
     auto GVsResolver = createLambdaResolver(
-        [&LD, LMH](const std::string &Name) {
-          auto &LMResources = LD.getLogicalModuleResources(LMH);
-          if (auto Sym = LMResources.StubsMgr->findStub(Name, false))
-            return Sym.toRuntimeDyldSymbol();
-          auto &LDResolver = LD.getDylibResources().ExternalSymbolResolver;
-          return LDResolver->findSymbolInLogicalDylib(Name);
+        [this, &LD, LMId](const std::string &Name) {
+          if (auto Sym = LD.StubsMgr->findStub(Name, false))
+            return Sym;
+          if (auto Sym = LD.findSymbol(BaseLayer, Name, false))
+            return Sym;
+          return LD.ExternalSymbolResolver->findSymbolInLogicalDylib(Name);
         },
         [&LD](const std::string &Name) {
-          auto &LDResolver = LD.getDylibResources().ExternalSymbolResolver;
-          return LDResolver->findSymbol(Name);
+          return LD.ExternalSymbolResolver->findSymbol(Name);
         });
 
-    auto GVsH = LD.getDylibResources().ModuleAdder(BaseLayer, std::move(GVsM),
-                                                   std::move(GVsResolver));
-    LD.addToLogicalModule(LMH, GVsH);
+    auto GVsH = LD.ModuleAdder(BaseLayer, std::move(GVsM),
+                               std::move(GVsResolver));
+    LD.BaseLayerHandles.push_back(GVsH);
   }
 
   static std::string mangle(StringRef Name, const DataLayout &DL) {
@@ -383,11 +444,11 @@ private:
     return MangledName;
   }
 
-  TargetAddress extractAndCompile(CODLogicalDylib &LD,
-                                  LogicalModuleHandle LMH,
-                                  Function &F) {
-    auto &LMResources = LD.getLogicalModuleResources(LMH);
-    Module &SrcM = LMResources.SourceModule->getResource();
+  JITTargetAddress
+  extractAndCompile(LogicalDylib &LD,
+                    typename LogicalDylib::SourceModuleHandle LMId,
+                    Function &F) {
+    Module &SrcM = LD.getSourceModule(LMId);
 
     // If F is a declaration we must already have compiled it.
     if (F.isDeclaration())
@@ -397,15 +458,15 @@ private:
     std::string CalledFnName = mangle(F.getName(), SrcM.getDataLayout());
 
     auto Part = Partition(F);
-    auto PartH = emitPartition(LD, LMH, Part);
+    auto PartH = emitPartition(LD, LMId, Part);
 
-    TargetAddress CalledAddr = 0;
+    JITTargetAddress CalledAddr = 0;
     for (auto *SubF : Part) {
       std::string FnName = mangle(SubF->getName(), SrcM.getDataLayout());
       auto FnBodySym = BaseLayer.findSymbolIn(PartH, FnName, false);
       assert(FnBodySym && "Couldn't find function body.");
 
-      TargetAddress FnBodyAddr = FnBodySym.getAddress();
+      JITTargetAddress FnBodyAddr = FnBodySym.getAddress();
 
       // If this is the function we're calling record the address so we can
       // return it from this function.
@@ -413,7 +474,7 @@ private:
         CalledAddr = FnBodyAddr;
 
       // Update the function body pointer for the stub.
-      if (auto EC = LMResources.StubsMgr->updatePointer(FnName, FnBodyAddr))
+      if (auto EC = LD.StubsMgr->updatePointer(FnName, FnBodyAddr))
         return 0;
     }
 
@@ -421,11 +482,11 @@ private:
   }
 
   template <typename PartitionT>
-  BaseLayerModuleSetHandleT emitPartition(CODLogicalDylib &LD,
-                                          LogicalModuleHandle LMH,
-                                          const PartitionT &Part) {
-    auto &LMResources = LD.getLogicalModuleResources(LMH);
-    Module &SrcM = LMResources.SourceModule->getResource();
+  BaseLayerModuleSetHandleT
+  emitPartition(LogicalDylib &LD,
+                typename LogicalDylib::SourceModuleHandle LMId,
+                const PartitionT &Part) {
+    Module &SrcM = LD.getSourceModule(LMId);
 
     // Create the module.
     std::string NewName = SrcM.getName();
@@ -438,14 +499,14 @@ private:
     M->setDataLayout(SrcM.getDataLayout());
     ValueToValueMapTy VMap;
 
-    auto Materializer = createLambdaMaterializer([this, &LMResources, &M,
+    auto Materializer = createLambdaMaterializer([this, &LD, &LMId, &M,
                                                   &VMap](Value *V) -> Value * {
       if (auto *GV = dyn_cast<GlobalVariable>(V))
         return cloneGlobalVariableDecl(*M, *GV);
 
       if (auto *F = dyn_cast<Function>(V)) {
         // Check whether we want to clone an available_externally definition.
-        if (!LMResources.StubsToClone.count(F))
+        if (!LD.getStubsToClone(LMId).count(F))
           return cloneFunctionDecl(*M, *F);
 
         // Ok - we want an inlinable stub. For that to work we need a decl
@@ -485,19 +546,16 @@ private:
 
     // Create memory manager and symbol resolver.
     auto Resolver = createLambdaResolver(
-        [this, &LD, LMH](const std::string &Name) {
-          if (auto Sym = LD.findSymbolInternally(LMH, Name))
-            return Sym.toRuntimeDyldSymbol();
-          auto &LDResolver = LD.getDylibResources().ExternalSymbolResolver;
-          return LDResolver->findSymbolInLogicalDylib(Name);
+        [this, &LD, LMId](const std::string &Name) {
+          if (auto Sym = LD.findSymbol(BaseLayer, Name, false))
+            return Sym;
+          return LD.ExternalSymbolResolver->findSymbolInLogicalDylib(Name);
         },
         [this, &LD](const std::string &Name) {
-          auto &LDResolver = LD.getDylibResources().ExternalSymbolResolver;
-          return LDResolver->findSymbol(Name);
+          return LD.ExternalSymbolResolver->findSymbol(Name);
         });
 
-    return LD.getDylibResources().ModuleAdder(BaseLayer, std::move(M),
-                                              std::move(Resolver));
+    return LD.ModuleAdder(BaseLayer, std::move(M), std::move(Resolver));
   }
 
   BaseLayerT &BaseLayer;
@@ -509,7 +567,7 @@ private:
   bool CloneStubsIntoPartitions;
 };
 
-} // End namespace orc.
-} // End namespace llvm.
+} // end namespace orc
+} // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_COMPILEONDEMANDLAYER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
index c10508cc8a62..a32278b8a81e 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ExecutionUtils.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
 #define LLVM_EXECUTIONENGINE_ORC_EXECUTIONUTILS_H
 
-#include "JITSymbol.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include <vector>
 
@@ -43,12 +43,12 @@ public:
   ///   This class provides a read-only view of the element with any casts on
   /// the function stripped away.
   struct Element {
-    Element(unsigned Priority, const Function *Func, const Value *Data)
+    Element(unsigned Priority, Function *Func, Value *Data)
       : Priority(Priority), Func(Func), Data(Data) {}
 
     unsigned Priority;
-    const Function *Func;
-    const Value *Data;
+    Function *Func;
+    Value *Data;
   };
 
   /// @brief Construct an iterator instance. If End is true then this iterator
@@ -144,10 +144,10 @@ public:
   }
 
   /// Search overrided symbols.
-  RuntimeDyld::SymbolInfo searchOverrides(const std::string &Name) {
+  JITEvaluatedSymbol searchOverrides(const std::string &Name) {
     auto I = CXXRuntimeOverrides.find(Name);
     if (I != CXXRuntimeOverrides.end())
-      return RuntimeDyld::SymbolInfo(I->second, JITSymbolFlags::Exported);
+      return JITEvaluatedSymbol(I->second, JITSymbolFlags::Exported);
     return nullptr;
   }
 
@@ -158,15 +158,15 @@ public:
 private:
 
   template <typename PtrTy>
-  TargetAddress toTargetAddress(PtrTy* P) {
-    return static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(P));
+  JITTargetAddress toTargetAddress(PtrTy* P) {
+    return static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(P));
   }
 
-  void addOverride(const std::string &Name, TargetAddress Addr) {
+  void addOverride(const std::string &Name, JITTargetAddress Addr) {
     CXXRuntimeOverrides.insert(std::make_pair(Name, Addr));
   }
 
-  StringMap<TargetAddress> CXXRuntimeOverrides;
+  StringMap<JITTargetAddress> CXXRuntimeOverrides;
 
   typedef void (*DestructorPtr)(void*);
   typedef std::pair<DestructorPtr, void*> CXXDestructorDataPair;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
index 9fa222c340f8..634d1480ae4c 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/GlobalMappingLayer.h
@@ -15,7 +15,7 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_GLOBALMAPPINGLAYER_H
 
-#include "JITSymbol.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include <map>
 
 namespace llvm {
@@ -52,7 +52,7 @@ public:
   void removeModuleSet(ModuleSetHandleT H) { BaseLayer.removeModuleSet(H); }
 
   /// @brief Manually set the address to return for the given symbol.
-  void setGlobalMapping(const std::string &Name, TargetAddress Addr) {
+  void setGlobalMapping(const std::string &Name, JITTargetAddress Addr) {
     SymbolTable[Name] = Addr;
   }
 
@@ -99,7 +99,7 @@ public:
 
 private:
   BaseLayerT &BaseLayer;
-  std::map<std::string, TargetAddress> SymbolTable;
+  std::map<std::string, JITTargetAddress> SymbolTable;
 };
 
 } // End namespace orc.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
index e6ce18a42b8b..f16dd021ea51 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRCompileLayer.h
@@ -14,9 +14,8 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_IRCOMPILELAYER_H
 
-#include "JITSymbol.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Object/ObjectFile.h"
 #include <memory>
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
index 4dabb9a41494..c67297b111b9 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IRTransformLayer.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_IRTRANSFORMLAYER_H
 
-#include "JITSymbol.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 
 namespace llvm {
 namespace orc {
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index 51172c51e136..07bbd921dad6 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -14,14 +14,26 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_INDIRECTIONUTILS_H
 #define LLVM_EXECUTIONENGINE_ORC_INDIRECTIONUTILS_H
 
-#include "JITSymbol.h"
-#include "LambdaResolver.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <functional>
+#include <map>
+#include <memory>
+#include <system_error>
+#include <utility>
+#include <vector>
 
 namespace llvm {
 namespace orc {
@@ -29,37 +41,37 @@ namespace orc {
 /// @brief Target-independent base class for compile callback management.
 class JITCompileCallbackManager {
 public:
-  typedef std::function<TargetAddress()> CompileFtor;
+  typedef std::function<JITTargetAddress()> CompileFtor;
 
   /// @brief Handle to a newly created compile callback. Can be used to get an
   ///        IR constant representing the address of the trampoline, and to set
   ///        the compile action for the callback.
   class CompileCallbackInfo {
   public:
-    CompileCallbackInfo(TargetAddress Addr, CompileFtor &Compile)
+    CompileCallbackInfo(JITTargetAddress Addr, CompileFtor &Compile)
         : Addr(Addr), Compile(Compile) {}
 
-    TargetAddress getAddress() const { return Addr; }
+    JITTargetAddress getAddress() const { return Addr; }
     void setCompileAction(CompileFtor Compile) {
       this->Compile = std::move(Compile);
     }
 
   private:
-    TargetAddress Addr;
+    JITTargetAddress Addr;
     CompileFtor &Compile;
   };
 
   /// @brief Construct a JITCompileCallbackManager.
   /// @param ErrorHandlerAddress The address of an error handler in the target
   ///                            process to be used if a compile callback fails.
-  JITCompileCallbackManager(TargetAddress ErrorHandlerAddress)
+  JITCompileCallbackManager(JITTargetAddress ErrorHandlerAddress)
       : ErrorHandlerAddress(ErrorHandlerAddress) {}
 
-  virtual ~JITCompileCallbackManager() {}
+  virtual ~JITCompileCallbackManager() = default;
 
   /// @brief Execute the callback for the given trampoline id. Called by the JIT
   ///        to compile functions on demand.
-  TargetAddress executeCompileCallback(TargetAddress TrampolineAddr) {
+  JITTargetAddress executeCompileCallback(JITTargetAddress TrampolineAddr) {
     auto I = ActiveTrampolines.find(TrampolineAddr);
     // FIXME: Also raise an error in the Orc error-handler when we finally have
     //        one.
@@ -86,13 +98,13 @@ public:
 
   /// @brief Reserve a compile callback.
   CompileCallbackInfo getCompileCallback() {
-    TargetAddress TrampolineAddr = getAvailableTrampolineAddr();
+    JITTargetAddress TrampolineAddr = getAvailableTrampolineAddr();
     auto &Compile = this->ActiveTrampolines[TrampolineAddr];
     return CompileCallbackInfo(TrampolineAddr, Compile);
   }
 
   /// @brief Get a CompileCallbackInfo for an existing callback.
-  CompileCallbackInfo getCompileCallbackInfo(TargetAddress TrampolineAddr) {
+  CompileCallbackInfo getCompileCallbackInfo(JITTargetAddress TrampolineAddr) {
     auto I = ActiveTrampolines.find(TrampolineAddr);
     assert(I != ActiveTrampolines.end() && "Not an active trampoline.");
     return CompileCallbackInfo(I->first, I->second);
@@ -103,7 +115,7 @@ public:
   ///   Note: Callbacks are auto-released after they execute. This method should
   /// only be called to manually release a callback that is not going to
   /// execute.
-  void releaseCompileCallback(TargetAddress TrampolineAddr) {
+  void releaseCompileCallback(JITTargetAddress TrampolineAddr) {
     auto I = ActiveTrampolines.find(TrampolineAddr);
     assert(I != ActiveTrampolines.end() && "Not an active trampoline.");
     ActiveTrampolines.erase(I);
@@ -111,19 +123,19 @@ public:
   }
 
 protected:
-  TargetAddress ErrorHandlerAddress;
+  JITTargetAddress ErrorHandlerAddress;
 
-  typedef std::map<TargetAddress, CompileFtor> TrampolineMapT;
+  typedef std::map<JITTargetAddress, CompileFtor> TrampolineMapT;
   TrampolineMapT ActiveTrampolines;
-  std::vector<TargetAddress> AvailableTrampolines;
+  std::vector<JITTargetAddress> AvailableTrampolines;
 
 private:
-  TargetAddress getAvailableTrampolineAddr() {
+  JITTargetAddress getAvailableTrampolineAddr() {
     if (this->AvailableTrampolines.empty())
       grow();
     assert(!this->AvailableTrampolines.empty() &&
            "Failed to grow available trampolines.");
-    TargetAddress TrampolineAddr = this->AvailableTrampolines.back();
+    JITTargetAddress TrampolineAddr = this->AvailableTrampolines.back();
     this->AvailableTrampolines.pop_back();
     return TrampolineAddr;
   }
@@ -141,7 +153,7 @@ public:
   /// @brief Construct a InProcessJITCompileCallbackManager.
   /// @param ErrorHandlerAddress The address of an error handler in the target
   ///                            process to be used if a compile callback fails.
-  LocalJITCompileCallbackManager(TargetAddress ErrorHandlerAddress)
+  LocalJITCompileCallbackManager(JITTargetAddress ErrorHandlerAddress)
       : JITCompileCallbackManager(ErrorHandlerAddress) {
 
     /// Set up the resolver block.
@@ -161,11 +173,12 @@ public:
   }
 
 private:
-  static TargetAddress reenter(void *CCMgr, void *TrampolineId) {
+  static JITTargetAddress reenter(void *CCMgr, void *TrampolineId) {
     JITCompileCallbackManager *Mgr =
         static_cast<JITCompileCallbackManager *>(CCMgr);
     return Mgr->executeCompileCallback(
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(TrampolineId)));
+        static_cast<JITTargetAddress>(
+            reinterpret_cast<uintptr_t>(TrampolineId)));
   }
 
   void grow() override {
@@ -188,7 +201,7 @@ private:
 
     for (unsigned I = 0; I < NumTrampolines; ++I)
       this->AvailableTrampolines.push_back(
-          static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(
+          static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(
               TrampolineMem + (I * TargetT::TrampolineSize))));
 
     EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
@@ -207,12 +220,12 @@ private:
 class IndirectStubsManager {
 public:
   /// @brief Map type for initializing the manager. See init.
-  typedef StringMap<std::pair<TargetAddress, JITSymbolFlags>> StubInitsMap;
+  typedef StringMap<std::pair<JITTargetAddress, JITSymbolFlags>> StubInitsMap;
 
-  virtual ~IndirectStubsManager() {}
+  virtual ~IndirectStubsManager() = default;
 
   /// @brief Create a single stub with the given name, target address and flags.
-  virtual Error createStub(StringRef StubName, TargetAddress StubAddr,
+  virtual Error createStub(StringRef StubName, JITTargetAddress StubAddr,
                            JITSymbolFlags StubFlags) = 0;
 
   /// @brief Create StubInits.size() stubs with the given names, target
@@ -228,7 +241,7 @@ public:
   virtual JITSymbol findPointer(StringRef Name) = 0;
 
   /// @brief Change the value of the implementation pointer for the stub.
-  virtual Error updatePointer(StringRef Name, TargetAddress NewAddr) = 0;
+  virtual Error updatePointer(StringRef Name, JITTargetAddress NewAddr) = 0;
 
 private:
   virtual void anchor();
@@ -239,7 +252,7 @@ private:
 template <typename TargetT>
 class LocalIndirectStubsManager : public IndirectStubsManager {
 public:
-  Error createStub(StringRef StubName, TargetAddress StubAddr,
+  Error createStub(StringRef StubName, JITTargetAddress StubAddr,
                    JITSymbolFlags StubFlags) override {
     if (auto Err = reserveStubs(1))
       return Err;
@@ -268,9 +281,9 @@ public:
     void *StubAddr = IndirectStubsInfos[Key.first].getStub(Key.second);
     assert(StubAddr && "Missing stub address");
     auto StubTargetAddr =
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(StubAddr));
+        static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(StubAddr));
     auto StubSymbol = JITSymbol(StubTargetAddr, I->second.second);
-    if (ExportedStubsOnly && !StubSymbol.isExported())
+    if (ExportedStubsOnly && !StubSymbol.getFlags().isExported())
       return nullptr;
     return StubSymbol;
   }
@@ -283,11 +296,11 @@ public:
     void *PtrAddr = IndirectStubsInfos[Key.first].getPtr(Key.second);
     assert(PtrAddr && "Missing pointer address");
     auto PtrTargetAddr =
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(PtrAddr));
+        static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(PtrAddr));
     return JITSymbol(PtrTargetAddr, I->second.second);
   }
 
-  Error updatePointer(StringRef Name, TargetAddress NewAddr) override {
+  Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
     auto I = StubIndexes.find(Name);
     assert(I != StubIndexes.end() && "No stub pointer for symbol");
     auto Key = I->second.first;
@@ -313,7 +326,7 @@ private:
     return Error::success();
   }
 
-  void createStubInternal(StringRef StubName, TargetAddress InitAddr,
+  void createStubInternal(StringRef StubName, JITTargetAddress InitAddr,
                           JITSymbolFlags StubFlags) {
     auto Key = FreeStubs.back();
     FreeStubs.pop_back();
@@ -335,7 +348,7 @@ private:
 /// manager if a compile callback fails.
 std::unique_ptr<JITCompileCallbackManager>
 createLocalCompileCallbackManager(const Triple &T,
-                                  TargetAddress ErrorHandlerAddress);
+                                  JITTargetAddress ErrorHandlerAddress);
 
 /// @brief Create a local indriect stubs manager builder.
 ///
@@ -348,7 +361,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T);
 ///
 ///   Usage example: Turn a trampoline address into a function pointer constant
 /// for use in a stub.
-Constant *createIRTypedAddress(FunctionType &FT, TargetAddress Addr);
+Constant *createIRTypedAddress(FunctionType &FT, JITTargetAddress Addr);
 
 /// @brief Create a function pointer with the given type, name, and initializer
 ///        in the given Module.
@@ -410,11 +423,15 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV,
                                    ValueMaterializer *Materializer = nullptr,
                                    GlobalVariable *NewGV = nullptr);
 
-/// @brief Clone
+/// @brief Clone a global alias declaration into a new module.
 GlobalAlias *cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
                                   ValueToValueMapTy &VMap);
 
-} // End namespace orc.
-} // End namespace llvm.
+/// @brief Clone module flags metadata into the destination module.
+void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
+                              ValueToValueMapTy &VMap);
+
+} // end namespace orc
+} // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_INDIRECTIONUTILS_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITSymbol.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITSymbol.h
deleted file mode 100644
index 464417e4e6d5..000000000000
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/JITSymbol.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===----------- JITSymbol.h - JIT symbol abstraction -----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Abstraction for target process addresses.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_JITSYMBOL_H
-#define LLVM_EXECUTIONENGINE_ORC_JITSYMBOL_H
-
-#include "llvm/ExecutionEngine/JITSymbolFlags.h"
-#include "llvm/ExecutionEngine/RuntimeDyld.h"
-#include "llvm/Support/DataTypes.h"
-#include <cassert>
-#include <functional>
-
-namespace llvm {
-namespace orc {
-
-/// @brief Represents an address in the target process's address space.
-typedef uint64_t TargetAddress;
-
-/// @brief Represents a symbol in the JIT.
-class JITSymbol : public JITSymbolBase {
-public:
-
-  typedef std::function<TargetAddress()> GetAddressFtor;
-
-  /// @brief Create a 'null' symbol that represents failure to find a symbol
-  ///        definition.
-  JITSymbol(std::nullptr_t)
-      : JITSymbolBase(JITSymbolFlags::None), CachedAddr(0) {}
-
-  /// @brief Create a symbol for a definition with a known address.
-  JITSymbol(TargetAddress Addr, JITSymbolFlags Flags)
-    : JITSymbolBase(Flags), CachedAddr(Addr) {}
-
-  /// @brief Create a symbol for a definition that doesn't have a known address
-  ///        yet.
-  /// @param GetAddress A functor to materialize a definition (fixing the
-  ///        address) on demand.
-  ///
-  ///   This constructor allows a JIT layer to provide a reference to a symbol
-  /// definition without actually materializing the definition up front. The
-  /// user can materialize the definition at any time by calling the getAddress
-  /// method.
-  JITSymbol(GetAddressFtor GetAddress, JITSymbolFlags Flags)
-      : JITSymbolBase(Flags), GetAddress(std::move(GetAddress)), CachedAddr(0) {}
-
-  /// @brief Create a JITSymbol from a RuntimeDyld::SymbolInfo.
-  JITSymbol(const RuntimeDyld::SymbolInfo &Sym)
-    : JITSymbolBase(Sym.getFlags()), CachedAddr(Sym.getAddress()) {}
-
-  /// @brief Returns true if the symbol exists, false otherwise.
-  explicit operator bool() const { return CachedAddr || GetAddress; }
-
-  /// @brief Get the address of the symbol in the target address space. Returns
-  ///        '0' if the symbol does not exist.
-  TargetAddress getAddress() {
-    if (GetAddress) {
-      CachedAddr = GetAddress();
-      assert(CachedAddr && "Symbol could not be materialized.");
-      GetAddress = nullptr;
-    }
-    return CachedAddr;
-  }
-
-  /// @brief Convert this JITSymbol to a RuntimeDyld::SymbolInfo.
-  RuntimeDyld::SymbolInfo toRuntimeDyldSymbol() {
-    return RuntimeDyld::SymbolInfo(getAddress(), getFlags());
-  }
-
-private:
-  GetAddressFtor GetAddress;
-  TargetAddress CachedAddr;
-};
-
-} // End namespace orc.
-} // End namespace llvm.
-
-#endif // LLVM_EXECUTIONENGINE_ORC_JITSYMBOL_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
index a42b9d5c29d1..cbe2a80edf1c 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LambdaResolver.h
@@ -23,7 +23,7 @@ namespace llvm {
 namespace orc {
 
 template <typename DylibLookupFtorT, typename ExternalLookupFtorT>
-class LambdaResolver : public RuntimeDyld::SymbolResolver {
+class LambdaResolver : public JITSymbolResolver {
 public:
 
   LambdaResolver(DylibLookupFtorT DylibLookupFtor,
@@ -31,12 +31,11 @@ public:
       : DylibLookupFtor(DylibLookupFtor),
         ExternalLookupFtor(ExternalLookupFtor) {}
 
-  RuntimeDyld::SymbolInfo
-  findSymbolInLogicalDylib(const std::string &Name) final {
+  JITSymbol findSymbolInLogicalDylib(const std::string &Name) final {
     return DylibLookupFtor(Name);
   }
 
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) final {
+  JITSymbol findSymbol(const std::string &Name) final {
     return ExternalLookupFtor(Name);
   }
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
index c5fb6b847b30..53d4c0cfe5d4 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LazyEmittingLayer.h
@@ -14,14 +14,20 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
 
-#include "JITSymbol.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Mangler.h"
 #include "llvm/IR/Module.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringMap.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
 #include <list>
+#include <memory>
+#include <string>
 
 namespace llvm {
 namespace orc {
@@ -39,8 +45,8 @@ public:
 private:
   class EmissionDeferredSet {
   public:
-    EmissionDeferredSet() : EmitState(NotEmitted) {}
-    virtual ~EmissionDeferredSet() {}
+    EmissionDeferredSet() = default;
+    virtual ~EmissionDeferredSet() = default;
 
     JITSymbol find(StringRef Name, bool ExportedSymbolsOnly, BaseLayerT &B) {
       switch (EmitState) {
@@ -50,9 +56,9 @@ private:
           // (a StringRef) may go away before the lambda is executed.
           // FIXME: Use capture-init when we move to C++14.
           std::string PName = Name;
-          JITSymbolFlags Flags = JITSymbolBase::flagsFromGlobalValue(*GV);
+          JITSymbolFlags Flags = JITSymbolFlags::fromGlobalValue(*GV);
           auto GetAddress =
-            [this, ExportedSymbolsOnly, PName, &B]() -> TargetAddress {
+            [this, ExportedSymbolsOnly, PName, &B]() -> JITTargetAddress {
               if (this->EmitState == Emitting)
                 return 0;
               else if (this->EmitState == NotEmitted) {
@@ -106,7 +112,7 @@ private:
     virtual BaseLayerHandleT emitToBaseLayer(BaseLayerT &BaseLayer) = 0;
 
   private:
-    enum { NotEmitted, Emitting, Emitted } EmitState;
+    enum { NotEmitted, Emitting, Emitted } EmitState = NotEmitted;
     BaseLayerHandleT Handle;
   };
 
@@ -121,7 +127,6 @@ private:
           Resolver(std::move(Resolver)) {}
 
   protected:
-
     const GlobalValue* searchGVs(StringRef Name,
                                  bool ExportedSymbolsOnly) const override {
       // FIXME: We could clean all this up if we had a way to reliably demangle
@@ -277,7 +282,6 @@ public:
   void emitAndFinalize(ModuleSetHandleT H) {
     (*H)->emitAndFinalize(BaseLayer);
   }
-
 };
 
 template <typename BaseLayerT>
@@ -293,7 +297,7 @@ LazyEmittingLayer<BaseLayerT>::EmissionDeferredSet::create(
                                 std::move(Resolver));
 }
 
-} // End namespace orc.
-} // End namespace llvm.
+} // end namespace orc
+} // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_LAZYEMITTINGLAYER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
deleted file mode 100644
index 883fa9eac560..000000000000
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/LogicalDylib.h
+++ /dev/null
@@ -1,135 +0,0 @@
-//===--- LogicalDylib.h - Simulates dylib-style symbol lookup ---*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Simulates symbol resolution inside a dylib.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
-#define LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
-
-#include "llvm/ExecutionEngine/Orc/JITSymbol.h"
-#include <string>
-#include <vector>
-
-namespace llvm {
-namespace orc {
-
-template <typename BaseLayerT,
-          typename LogicalModuleResources,
-          typename LogicalDylibResources>
-class LogicalDylib {
-public:
-  typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
-private:
-
-  typedef std::vector<BaseLayerModuleSetHandleT> BaseLayerHandleList;
-
-  struct LogicalModule {
-    // Make this move-only to ensure they don't get duplicated across moves of
-    // LogicalDylib or anything like that.
-    LogicalModule(LogicalModule &&RHS)
-        : Resources(std::move(RHS.Resources)),
-          BaseLayerHandles(std::move(RHS.BaseLayerHandles)) {}
-    LogicalModule() = default;
-    LogicalModuleResources Resources;
-    BaseLayerHandleList BaseLayerHandles;
-  };
-  typedef std::vector<LogicalModule> LogicalModuleList;
-
-public:
-
-  typedef typename BaseLayerHandleList::iterator BaseLayerHandleIterator;
-  typedef typename LogicalModuleList::iterator LogicalModuleHandle;
-
-  LogicalDylib(BaseLayerT &BaseLayer) : BaseLayer(BaseLayer) {}
-
-  ~LogicalDylib() {
-    for (auto &LM : LogicalModules)
-      for (auto BLH : LM.BaseLayerHandles)
-        BaseLayer.removeModuleSet(BLH);
-  }
-
-  // If possible, remove this and ~LogicalDylib once the work in the dtor is
-  // moved to members (eg: self-unregistering base layer handles).
-  LogicalDylib(LogicalDylib &&RHS)
-      : BaseLayer(std::move(RHS.BaseLayer)),
-        LogicalModules(std::move(RHS.LogicalModules)),
-        DylibResources(std::move(RHS.DylibResources)) {}
-
-  LogicalModuleHandle createLogicalModule() {
-    LogicalModules.push_back(LogicalModule());
-    return std::prev(LogicalModules.end());
-  }
-
-  void addToLogicalModule(LogicalModuleHandle LMH,
-                          BaseLayerModuleSetHandleT BaseLayerHandle) {
-    LMH->BaseLayerHandles.push_back(BaseLayerHandle);
-  }
-
-  LogicalModuleResources& getLogicalModuleResources(LogicalModuleHandle LMH) {
-    return LMH->Resources;
-  }
-
-  BaseLayerHandleIterator moduleHandlesBegin(LogicalModuleHandle LMH) {
-    return LMH->BaseLayerHandles.begin();
-  }
-
-  BaseLayerHandleIterator moduleHandlesEnd(LogicalModuleHandle LMH) {
-    return LMH->BaseLayerHandles.end();
-  }
-
-  JITSymbol findSymbolInLogicalModule(LogicalModuleHandle LMH,
-                                      const std::string &Name,
-                                      bool ExportedSymbolsOnly) {
-
-    if (auto StubSym = LMH->Resources.findSymbol(Name, ExportedSymbolsOnly))
-      return StubSym;
-
-    for (auto BLH : LMH->BaseLayerHandles)
-      if (auto Symbol = BaseLayer.findSymbolIn(BLH, Name, ExportedSymbolsOnly))
-        return Symbol;
-    return nullptr;
-  }
-
-  JITSymbol findSymbolInternally(LogicalModuleHandle LMH,
-                                 const std::string &Name) {
-    if (auto Symbol = findSymbolInLogicalModule(LMH, Name, false))
-      return Symbol;
-
-    for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
-           LMI != LME; ++LMI) {
-      if (LMI != LMH)
-        if (auto Symbol = findSymbolInLogicalModule(LMI, Name, false))
-          return Symbol;
-    }
-
-    return nullptr;
-  }
-
-  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
-    for (auto LMI = LogicalModules.begin(), LME = LogicalModules.end();
-         LMI != LME; ++LMI)
-      if (auto Sym = findSymbolInLogicalModule(LMI, Name, ExportedSymbolsOnly))
-        return Sym;
-    return nullptr;
-  }
-
-  LogicalDylibResources& getDylibResources() { return DylibResources; }
-
-protected:
-  BaseLayerT BaseLayer;
-  LogicalModuleList LogicalModules;
-  LogicalDylibResources DylibResources;
-};
-
-} // End namespace orc.
-} // End namespace llvm.
-
-#endif // LLVM_EXECUTIONENGINE_ORC_LOGICALDYLIB_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
index 1560c6d86e0f..957b94912b3f 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/NullResolver.h
@@ -22,12 +22,11 @@ namespace orc {
 
 /// SymbolResolver impliementation that rejects all resolution requests.
 /// Useful for clients that have no cross-object fixups.
-class NullResolver : public RuntimeDyld::SymbolResolver {
+class NullResolver : public JITSymbolResolver {
 public:
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) final;
+  JITSymbol findSymbol(const std::string &Name) final;
 
-  RuntimeDyld::SymbolInfo
-  findSymbolInLogicalDylib(const std::string &Name) final;
+  JITSymbol findSymbolInLogicalDylib(const std::string &Name) final;
 };
 
 } // End namespace orc.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index a7798d8beb8d..0588d2228598 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -14,12 +14,23 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
 
-#include "JITSymbol.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include <cassert>
+#include <algorithm>
+#include <functional>
 #include <list>
 #include <memory>
+#include <string>
+#include <utility>
+#include <vector>
 
 namespace llvm {
 namespace orc {
@@ -34,11 +45,11 @@ protected:
   /// had been provided by this instance. Higher level layers are responsible
   /// for taking any action required to handle the missing symbols.
   class LinkedObjectSet {
-    LinkedObjectSet(const LinkedObjectSet&) = delete;
-    void operator=(const LinkedObjectSet&) = delete;
   public:
     LinkedObjectSet() = default;
-    virtual ~LinkedObjectSet() {}
+    LinkedObjectSet(const LinkedObjectSet&) = delete;
+    void operator=(const LinkedObjectSet&) = delete;
+    virtual ~LinkedObjectSet() = default;
 
     virtual void finalize() = 0;
 
@@ -46,21 +57,22 @@ protected:
     getSymbolMaterializer(std::string Name) = 0;
 
     virtual void mapSectionAddress(const void *LocalAddress,
-                                   TargetAddress TargetAddr) const = 0;
+                                   JITTargetAddress TargetAddr) const = 0;
 
     JITSymbol getSymbol(StringRef Name, bool ExportedSymbolsOnly) {
       auto SymEntry = SymbolTable.find(Name);
       if (SymEntry == SymbolTable.end())
         return nullptr;
-      if (!SymEntry->second.isExported() && ExportedSymbolsOnly)
+      if (!SymEntry->second.getFlags().isExported() && ExportedSymbolsOnly)
         return nullptr;
       if (!Finalized)
         return JITSymbol(getSymbolMaterializer(Name),
                          SymEntry->second.getFlags());
       return JITSymbol(SymEntry->second);
     }
+
   protected:
-    StringMap<RuntimeDyld::SymbolInfo> SymbolTable;
+    StringMap<JITEvaluatedSymbol> SymbolTable;
     bool Finalized = false;
   };
 
@@ -71,7 +83,6 @@ public:
   typedef LinkedObjectSetListT::iterator ObjSetHandleT;
 };
 
-
 /// @brief Default (no-op) action to perform when loading objects.
 class DoNothingOnNotifyLoaded {
 public:
@@ -89,12 +100,10 @@ public:
 template <typename NotifyLoadedFtor = DoNothingOnNotifyLoaded>
 class ObjectLinkingLayer : public ObjectLinkingLayerBase {
 public:
-
   /// @brief Functor for receiving finalization notifications.
   typedef std::function<void(ObjSetHandleT)> NotifyFinalizedFtor;
 
 private:
-
   template <typename ObjSetT, typename MemoryManagerPtrT,
             typename SymbolResolverPtrT, typename FinalizerFtor>
   class ConcreteLinkedObjectSet : public LinkedObjectSet {
@@ -122,10 +131,10 @@ private:
       RTDyld.setProcessAllSections(PFC->ProcessAllSections);
       PFC->RTDyld = &RTDyld;
 
+      this->Finalized = true;
       PFC->Finalizer(PFC->Handle, RTDyld, std::move(PFC->Objects),
                      [&]() {
                        this->updateSymbolTable(RTDyld);
-                       this->Finalized = true;
                      });
 
       // Release resources.
@@ -144,14 +153,13 @@ private:
     }
 
     void mapSectionAddress(const void *LocalAddress,
-                           TargetAddress TargetAddr) const override {
+                           JITTargetAddress TargetAddr) const override {
       assert(PFC && "mapSectionAddress called on finalized LinkedObjectSet");
       assert(PFC->RTDyld && "mapSectionAddress called on raw LinkedObjectSet");
       PFC->RTDyld->mapSectionAddress(LocalAddress, TargetAddr);
     }
 
   private:
-
     void buildInitialSymbolTable(const ObjSetT &Objects) {
       for (const auto &Obj : Objects)
         for (auto &Symbol : getObject(*Obj).symbols()) {
@@ -163,9 +171,9 @@ private:
             consumeError(SymbolName.takeError());
             continue;
           }
-          auto Flags = JITSymbol::flagsFromObjectSymbol(Symbol);
+          auto Flags = JITSymbolFlags::fromObjectSymbol(Symbol);
           SymbolTable.insert(
-            std::make_pair(*SymbolName, RuntimeDyld::SymbolInfo(0, Flags)));
+            std::make_pair(*SymbolName, JITEvaluatedSymbol(0, Flags)));
         }
     }
 
@@ -212,7 +220,6 @@ private:
   }
 
 public:
-
   /// @brief LoadedObjectInfo list. Contains a list of owning pointers to
   ///        RuntimeDyld::LoadedObjectInfo instances.
   typedef std::vector<std::unique_ptr<RuntimeDyld::LoadedObjectInfo>>
@@ -248,7 +255,6 @@ public:
   ObjSetHandleT addObjectSet(ObjSetT Objects,
                              MemoryManagerPtrT MemMgr,
                              SymbolResolverPtrT Resolver) {
-
     auto Finalizer = [&](ObjSetHandleT H, RuntimeDyld &RTDyld,
                          const ObjSetT &Objs,
                          std::function<void()> LOSHandleLoad) {
@@ -322,7 +328,7 @@ public:
 
   /// @brief Map section addresses for the objects associated with the handle H.
   void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress,
-                         TargetAddress TargetAddr) {
+                         JITTargetAddress TargetAddr) {
     (*H)->mapSectionAddress(LocalAddress, TargetAddr);
   }
 
@@ -334,7 +340,6 @@ public:
   }
 
 private:
-
   static const object::ObjectFile& getObject(const object::ObjectFile &Obj) {
     return Obj;
   }
@@ -351,7 +356,7 @@ private:
   bool ProcessAllSections;
 };
 
-} // End namespace orc.
-} // End namespace llvm
+} // end namespace orc
+} // end namespace llvm
 
 #endif // LLVM_EXECUTIONENGINE_ORC_OBJECTLINKINGLAYER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
index 2ffe71c94356..173c106cd3ec 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H
 #define LLVM_EXECUTIONENGINE_ORC_OBJECTTRANSFORMLAYER_H
 
-#include "JITSymbol.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 
 namespace llvm {
 namespace orc {
@@ -83,7 +83,7 @@ public:
 
   /// @brief Map section addresses for the objects associated with the handle H.
   void mapSectionAddress(ObjSetHandleT H, const void *LocalAddress,
-                         TargetAddress TargetAddr) {
+                         JITTargetAddress TargetAddr) {
     BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr);
   }
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 4a8d0b0b801c..fa236b0de88a 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -37,7 +37,8 @@ public:
   static const unsigned TrampolineSize = 1;
   static const unsigned ResolverCodeSize = 1;
 
-  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+  typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr,
+                                           void *TrampolineId);
 
   static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
                                 void *CallbackMgr) {
@@ -115,7 +116,8 @@ public:
 
   typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
 
-  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+  typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr,
+                                           void *TrampolineId);
 
   /// @brief Write the resolver code into the given memory. The user is be
   ///        responsible for allocating the memory and setting permissions.
@@ -170,7 +172,8 @@ public:
 class OrcX86_64_SysV : public OrcX86_64_Base {
 public:
   static const unsigned ResolverCodeSize = 0x6C;
-  typedef TargetAddress(*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+  typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr,
+                                           void *TrampolineId);
 
   /// @brief Write the resolver code into the given memory. The user is be
   ///        responsible for allocating the memory and setting permissions.
@@ -184,7 +187,8 @@ public:
 class OrcX86_64_Win32 : public OrcX86_64_Base {
 public:
   static const unsigned ResolverCodeSize = 0x74;
-  typedef TargetAddress(*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+  typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr,
+                                           void *TrampolineId);
 
   /// @brief Write the resolver code into the given memory. The user is be
   ///        responsible for allocating the memory and setting permissions.
@@ -203,7 +207,8 @@ public:
 
   typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
 
-  typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+  typedef JITTargetAddress (*JITReentryFn)(void *CallbackMgr,
+                                           void *TrampolineId);
 
   /// @brief Write the resolver code into the given memory. The user is be
   ///        responsible for allocating the memory and setting permissions.
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
index 1b3f25fae162..b74988cce2fb 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcError.h
@@ -27,8 +27,10 @@ enum class OrcErrorCode : int {
   RemoteMProtectAddrUnrecognized,
   RemoteIndirectStubsOwnerDoesNotExist,
   RemoteIndirectStubsOwnerIdAlreadyInUse,
+  RPCResponseAbandoned,
   UnexpectedRPCCall,
   UnexpectedRPCResponse,
+  UnknownRPCFunction
 };
 
 Error orcError(OrcErrorCode ErrCode);
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
index 5c867e7e7fd4..8647db56cd2f 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -8,8 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines the OrcRemoteTargetClient class and helpers. This class
-// can be used to communicate over an RPCChannel with an OrcRemoteTargetServer
-// instance to support remote-JITing.
+// can be used to communicate over an RawByteChannel with an
+// OrcRemoteTargetServer instance to support remote-JITing.
 //
 //===----------------------------------------------------------------------===//
 
@@ -36,23 +36,6 @@ namespace remote {
 template <typename ChannelT>
 class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI {
 public:
-  // FIXME: Remove move/copy ops once MSVC supports synthesizing move ops.
-
-  OrcRemoteTargetClient(const OrcRemoteTargetClient &) = delete;
-  OrcRemoteTargetClient &operator=(const OrcRemoteTargetClient &) = delete;
-
-  OrcRemoteTargetClient(OrcRemoteTargetClient &&Other)
-      : Channel(Other.Channel), ExistingError(std::move(Other.ExistingError)),
-        RemoteTargetTriple(std::move(Other.RemoteTargetTriple)),
-        RemotePointerSize(std::move(Other.RemotePointerSize)),
-        RemotePageSize(std::move(Other.RemotePageSize)),
-        RemoteTrampolineSize(std::move(Other.RemoteTrampolineSize)),
-        RemoteIndirectStubSize(std::move(Other.RemoteIndirectStubSize)),
-        AllocatorIds(std::move(Other.AllocatorIds)),
-        IndirectStubOwnerIds(std::move(Other.IndirectStubOwnerIds)) {}
-
-  OrcRemoteTargetClient &operator=(OrcRemoteTargetClient &&) = delete;
-
   /// Remote memory manager.
   class RCMemoryManager : public RuntimeDyld::MemoryManager {
   public:
@@ -61,18 +44,10 @@ public:
       DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
     }
 
-    RCMemoryManager(RCMemoryManager &&Other)
-        : Client(std::move(Other.Client)), Id(std::move(Other.Id)),
-          Unmapped(std::move(Other.Unmapped)),
-          Unfinalized(std::move(Other.Unfinalized)) {}
-
-    RCMemoryManager operator=(RCMemoryManager &&Other) {
-      Client = std::move(Other.Client);
-      Id = std::move(Other.Id);
-      Unmapped = std::move(Other.Unmapped);
-      Unfinalized = std::move(Other.Unfinalized);
-      return *this;
-    }
+    RCMemoryManager(const RCMemoryManager &) = delete;
+    RCMemoryManager &operator=(const RCMemoryManager &) = delete;
+    RCMemoryManager(RCMemoryManager &&) = default;
+    RCMemoryManager &operator=(RCMemoryManager &&) = default;
 
     ~RCMemoryManager() override {
       Client.destroyRemoteAllocator(Id);
@@ -185,7 +160,7 @@ public:
       DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n");
       for (auto &ObjAllocs : Unmapped) {
         {
-          TargetAddress NextCodeAddr = ObjAllocs.RemoteCodeAddr;
+          JITTargetAddress NextCodeAddr = ObjAllocs.RemoteCodeAddr;
           for (auto &Alloc : ObjAllocs.CodeAllocs) {
             NextCodeAddr = alignTo(NextCodeAddr, Alloc.getAlign());
             Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextCodeAddr);
@@ -197,7 +172,7 @@ public:
           }
         }
         {
-          TargetAddress NextRODataAddr = ObjAllocs.RemoteRODataAddr;
+          JITTargetAddress NextRODataAddr = ObjAllocs.RemoteRODataAddr;
           for (auto &Alloc : ObjAllocs.RODataAllocs) {
             NextRODataAddr = alignTo(NextRODataAddr, Alloc.getAlign());
             Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRODataAddr);
@@ -210,7 +185,7 @@ public:
           }
         }
         {
-          TargetAddress NextRWDataAddr = ObjAllocs.RemoteRWDataAddr;
+          JITTargetAddress NextRWDataAddr = ObjAllocs.RemoteRWDataAddr;
           for (auto &Alloc : ObjAllocs.RWDataAllocs) {
             NextRWDataAddr = alignTo(NextRWDataAddr, Alloc.getAlign());
             Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRWDataAddr);
@@ -366,18 +341,10 @@ public:
       Alloc(uint64_t Size, unsigned Align)
           : Size(Size), Align(Align), Contents(new char[Size + Align - 1]) {}
 
-      Alloc(Alloc &&Other)
-          : Size(std::move(Other.Size)), Align(std::move(Other.Align)),
-            Contents(std::move(Other.Contents)),
-            RemoteAddr(std::move(Other.RemoteAddr)) {}
-
-      Alloc &operator=(Alloc &&Other) {
-        Size = std::move(Other.Size);
-        Align = std::move(Other.Align);
-        Contents = std::move(Other.Contents);
-        RemoteAddr = std::move(Other.RemoteAddr);
-        return *this;
-      }
+      Alloc(const Alloc &) = delete;
+      Alloc &operator=(const Alloc &) = delete;
+      Alloc(Alloc &&) = default;
+      Alloc &operator=(Alloc &&) = default;
 
       uint64_t getSize() const { return Size; }
 
@@ -389,43 +356,29 @@ public:
         return reinterpret_cast<char *>(LocalAddr);
       }
 
-      void setRemoteAddress(TargetAddress RemoteAddr) {
+      void setRemoteAddress(JITTargetAddress RemoteAddr) {
         this->RemoteAddr = RemoteAddr;
       }
 
-      TargetAddress getRemoteAddress() const { return RemoteAddr; }
+      JITTargetAddress getRemoteAddress() const { return RemoteAddr; }
 
     private:
       uint64_t Size;
       unsigned Align;
       std::unique_ptr<char[]> Contents;
-      TargetAddress RemoteAddr = 0;
+      JITTargetAddress RemoteAddr = 0;
     };
 
     struct ObjectAllocs {
       ObjectAllocs() = default;
-
-      ObjectAllocs(ObjectAllocs &&Other)
-          : RemoteCodeAddr(std::move(Other.RemoteCodeAddr)),
-            RemoteRODataAddr(std::move(Other.RemoteRODataAddr)),
-            RemoteRWDataAddr(std::move(Other.RemoteRWDataAddr)),
-            CodeAllocs(std::move(Other.CodeAllocs)),
-            RODataAllocs(std::move(Other.RODataAllocs)),
-            RWDataAllocs(std::move(Other.RWDataAllocs)) {}
-
-      ObjectAllocs &operator=(ObjectAllocs &&Other) {
-        RemoteCodeAddr = std::move(Other.RemoteCodeAddr);
-        RemoteRODataAddr = std::move(Other.RemoteRODataAddr);
-        RemoteRWDataAddr = std::move(Other.RemoteRWDataAddr);
-        CodeAllocs = std::move(Other.CodeAllocs);
-        RODataAllocs = std::move(Other.RODataAllocs);
-        RWDataAllocs = std::move(Other.RWDataAllocs);
-        return *this;
-      }
-
-      TargetAddress RemoteCodeAddr = 0;
-      TargetAddress RemoteRODataAddr = 0;
-      TargetAddress RemoteRWDataAddr = 0;
+      ObjectAllocs(const ObjectAllocs &) = delete;
+      ObjectAllocs &operator=(const ObjectAllocs &) = delete;
+      ObjectAllocs(ObjectAllocs &&) = default;
+      ObjectAllocs &operator=(ObjectAllocs &&) = default;
+
+      JITTargetAddress RemoteCodeAddr = 0;
+      JITTargetAddress RemoteRODataAddr = 0;
+      JITTargetAddress RemoteRWDataAddr = 0;
       std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
     };
 
@@ -450,7 +403,7 @@ public:
       }
     }
 
-    Error createStub(StringRef StubName, TargetAddress StubAddr,
+    Error createStub(StringRef StubName, JITTargetAddress StubAddr,
                      JITSymbolFlags StubFlags) override {
       if (auto Err = reserveStubs(1))
         return Err;
@@ -477,7 +430,7 @@ public:
       auto Key = I->second.first;
       auto Flags = I->second.second;
       auto StubSymbol = JITSymbol(getStubAddr(Key), Flags);
-      if (ExportedStubsOnly && !StubSymbol.isExported())
+      if (ExportedStubsOnly && !StubSymbol.getFlags().isExported())
         return nullptr;
       return StubSymbol;
     }
@@ -491,7 +444,7 @@ public:
       return JITSymbol(getPtrAddr(Key), Flags);
     }
 
-    Error updatePointer(StringRef Name, TargetAddress NewAddr) override {
+    Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override {
       auto I = StubIndexes.find(Name);
       assert(I != StubIndexes.end() && "No stub pointer for symbol");
       auto Key = I->second.first;
@@ -500,8 +453,8 @@ public:
 
   private:
     struct RemoteIndirectStubsInfo {
-      TargetAddress StubBase;
-      TargetAddress PtrBase;
+      JITTargetAddress StubBase;
+      JITTargetAddress PtrBase;
       unsigned NumStubs;
     };
 
@@ -517,8 +470,8 @@ public:
         return Error::success();
 
       unsigned NewStubsRequired = NumStubs - FreeStubs.size();
-      TargetAddress StubBase;
-      TargetAddress PtrBase;
+      JITTargetAddress StubBase;
+      JITTargetAddress PtrBase;
       unsigned NumStubsEmitted;
 
       if (auto StubInfoOrErr = Remote.emitIndirectStubs(Id, NewStubsRequired))
@@ -535,7 +488,7 @@ public:
       return Error::success();
     }
 
-    Error createStubInternal(StringRef StubName, TargetAddress InitAddr,
+    Error createStubInternal(StringRef StubName, JITTargetAddress InitAddr,
                              JITSymbolFlags StubFlags) {
       auto Key = FreeStubs.back();
       FreeStubs.pop_back();
@@ -543,14 +496,14 @@ public:
       return Remote.writePointer(getPtrAddr(Key), InitAddr);
     }
 
-    TargetAddress getStubAddr(StubKey K) {
+    JITTargetAddress getStubAddr(StubKey K) {
       assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 &&
              "Missing stub address");
       return RemoteIndirectStubsInfos[K.first].StubBase +
              K.second * Remote.getIndirectStubSize();
     }
 
-    TargetAddress getPtrAddr(StubKey K) {
+    JITTargetAddress getPtrAddr(StubKey K) {
       assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 &&
              "Missing pointer address");
       return RemoteIndirectStubsInfos[K.first].PtrBase +
@@ -561,13 +514,13 @@ public:
   /// Remote compile callback manager.
   class RCCompileCallbackManager : public JITCompileCallbackManager {
   public:
-    RCCompileCallbackManager(TargetAddress ErrorHandlerAddress,
+    RCCompileCallbackManager(JITTargetAddress ErrorHandlerAddress,
                              OrcRemoteTargetClient &Remote)
         : JITCompileCallbackManager(ErrorHandlerAddress), Remote(Remote) {}
 
   private:
     void grow() override {
-      TargetAddress BlockAddr = 0;
+      JITTargetAddress BlockAddr = 0;
       uint32_t NumTrampolines = 0;
       if (auto TrampolineInfoOrErr = Remote.emitTrampolineBlock())
         std::tie(BlockAddr, NumTrampolines) = *TrampolineInfoOrErr;
@@ -587,48 +540,38 @@ public:
   /// Create an OrcRemoteTargetClient.
   /// Channel is the ChannelT instance to communicate on. It is assumed that
   /// the channel is ready to be read from and written to.
-  static Expected<OrcRemoteTargetClient> Create(ChannelT &Channel) {
-    Error Err;
-    OrcRemoteTargetClient H(Channel, Err);
+  static Expected<std::unique_ptr<OrcRemoteTargetClient>>
+  Create(ChannelT &Channel) {
+    Error Err = Error::success();
+    std::unique_ptr<OrcRemoteTargetClient> Client(
+        new OrcRemoteTargetClient(Channel, Err));
     if (Err)
       return std::move(Err);
-    return Expected<OrcRemoteTargetClient>(std::move(H));
+    return std::move(Client);
   }
 
   /// Call the int(void) function at the given address in the target and return
   /// its result.
-  Expected<int> callIntVoid(TargetAddress Addr) {
+  Expected<int> callIntVoid(JITTargetAddress Addr) {
     DEBUG(dbgs() << "Calling int(*)(void) " << format("0x%016x", Addr) << "\n");
-
-    auto Listen = [&](RPCChannel &C, uint32_t Id) {
-      return listenForCompileRequests(C, Id);
-    };
-    return callSTHandling<CallIntVoid>(Channel, Listen, Addr);
+    return callB<CallIntVoid>(Addr);
   }
 
   /// Call the int(int, char*[]) function at the given address in the target and
   /// return its result.
-  Expected<int> callMain(TargetAddress Addr,
+  Expected<int> callMain(JITTargetAddress Addr,
                          const std::vector<std::string> &Args) {
     DEBUG(dbgs() << "Calling int(*)(int, char*[]) " << format("0x%016x", Addr)
                  << "\n");
-
-    auto Listen = [&](RPCChannel &C, uint32_t Id) {
-      return listenForCompileRequests(C, Id);
-    };
-    return callSTHandling<CallMain>(Channel, Listen, Addr, Args);
+    return callB<CallMain>(Addr, Args);
   }
 
   /// Call the void() function at the given address in the target and wait for
   /// it to finish.
-  Error callVoidVoid(TargetAddress Addr) {
+  Error callVoidVoid(JITTargetAddress Addr) {
     DEBUG(dbgs() << "Calling void(*)(void) " << format("0x%016x", Addr)
                  << "\n");
-
-    auto Listen = [&](RPCChannel &C, uint32_t Id) {
-      return listenForCompileRequests(C, Id);
-    };
-    return callSTHandling<CallVoidVoid>(Channel, Listen, Addr);
+    return callB<CallVoidVoid>(Addr);
   }
 
   /// Create an RCMemoryManager which will allocate its memory on the remote
@@ -637,7 +580,7 @@ public:
     assert(!MM && "MemoryManager should be null before creation.");
 
     auto Id = AllocatorIds.getNext();
-    if (auto Err = callST<CreateRemoteAllocator>(Channel, Id))
+    if (auto Err = callB<CreateRemoteAllocator>(Id))
       return Err;
     MM = llvm::make_unique<RCMemoryManager>(*this, Id);
     return Error::success();
@@ -648,20 +591,20 @@ public:
   Error createIndirectStubsManager(std::unique_ptr<RCIndirectStubsManager> &I) {
     assert(!I && "Indirect stubs manager should be null before creation.");
     auto Id = IndirectStubOwnerIds.getNext();
-    if (auto Err = callST<CreateIndirectStubsOwner>(Channel, Id))
+    if (auto Err = callB<CreateIndirectStubsOwner>(Id))
       return Err;
     I = llvm::make_unique<RCIndirectStubsManager>(*this, Id);
     return Error::success();
   }
 
   Expected<RCCompileCallbackManager &>
-  enableCompileCallbacks(TargetAddress ErrorHandlerAddress) {
+  enableCompileCallbacks(JITTargetAddress ErrorHandlerAddress) {
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
     // Emit the resolver block on the JIT server.
-    if (auto Err = callST<EmitResolverBlock>(Channel))
+    if (auto Err = callB<EmitResolverBlock>())
       return std::move(Err);
 
     // Create the callback manager.
@@ -673,23 +616,32 @@ public:
   /// Search for symbols in the remote process. Note: This should be used by
   /// symbol resolvers *after* they've searched the local symbol table in the
   /// JIT stack.
-  Expected<TargetAddress> getSymbolAddress(StringRef Name) {
+  Expected<JITTargetAddress> getSymbolAddress(StringRef Name) {
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
-    return callST<GetSymbolAddress>(Channel, Name);
+    return callB<GetSymbolAddress>(Name);
   }
 
   /// Get the triple for the remote target.
   const std::string &getTargetTriple() const { return RemoteTargetTriple; }
 
-  Error terminateSession() { return callST<TerminateSession>(Channel); }
+  Error terminateSession() { return callB<TerminateSession>(); }
 
 private:
-  OrcRemoteTargetClient(ChannelT &Channel, Error &Err) : Channel(Channel) {
-    ErrorAsOutParameter EAO(Err);
-    if (auto RIOrErr = callST<GetRemoteInfo>(Channel)) {
+  OrcRemoteTargetClient(ChannelT &Channel, Error &Err)
+      : OrcRemoteTargetRPCAPI(Channel) {
+    ErrorAsOutParameter EAO(&Err);
+
+    addHandler<RequestCompile>(
+        [this](JITTargetAddress Addr) -> JITTargetAddress {
+          if (CallbackManager)
+            return CallbackManager->executeCompileCallback(Addr);
+          return 0;
+        });
+
+    if (auto RIOrErr = callB<GetRemoteInfo>()) {
       std::tie(RemoteTargetTriple, RemotePointerSize, RemotePageSize,
                RemoteTrampolineSize, RemoteIndirectStubSize) = *RIOrErr;
       Err = Error::success();
@@ -698,12 +650,12 @@ private:
     }
   }
 
-  Error deregisterEHFrames(TargetAddress Addr, uint32_t Size) {
-    return callST<RegisterEHFrames>(Channel, Addr, Size);
+  Error deregisterEHFrames(JITTargetAddress Addr, uint32_t Size) {
+    return callB<RegisterEHFrames>(Addr, Size);
   }
 
   void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
-    if (auto Err = callST<DestroyRemoteAllocator>(Channel, Id)) {
+    if (auto Err = callB<DestroyRemoteAllocator>(Id)) {
       // FIXME: This will be triggered by a removeModuleSet call: Propagate
       //        error return up through that.
       llvm_unreachable("Failed to destroy remote allocator.");
@@ -713,20 +665,20 @@ private:
 
   Error destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) {
     IndirectStubOwnerIds.release(Id);
-    return callST<DestroyIndirectStubsOwner>(Channel, Id);
+    return callB<DestroyIndirectStubsOwner>(Id);
   }
 
-  Expected<std::tuple<TargetAddress, TargetAddress, uint32_t>>
+  Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
   emitIndirectStubs(ResourceIdMgr::ResourceId Id, uint32_t NumStubsRequired) {
-    return callST<EmitIndirectStubs>(Channel, Id, NumStubsRequired);
+    return callB<EmitIndirectStubs>(Id, NumStubsRequired);
   }
 
-  Expected<std::tuple<TargetAddress, uint32_t>> emitTrampolineBlock() {
+  Expected<std::tuple<JITTargetAddress, uint32_t>> emitTrampolineBlock() {
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
-    return callST<EmitTrampolineBlock>(Channel);
+    return callB<EmitTrampolineBlock>();
   }
 
   uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; }
@@ -735,79 +687,53 @@ private:
 
   uint32_t getTrampolineSize() const { return RemoteTrampolineSize; }
 
-  Error listenForCompileRequests(RPCChannel &C, uint32_t &Id) {
-    assert(CallbackManager &&
-           "No calback manager. enableCompileCallbacks must be called first");
-
-    // Check for an 'out-of-band' error, e.g. from an MM destructor.
-    if (ExistingError)
-      return std::move(ExistingError);
-
-    // FIXME: CompileCallback could be an anonymous lambda defined at the use
-    //        site below, but that triggers a GCC 4.7 ICE. When we move off
-    //        GCC 4.7, tidy this up.
-    auto CompileCallback =
-      [this](TargetAddress Addr) -> Expected<TargetAddress> {
-        return this->CallbackManager->executeCompileCallback(Addr);
-      };
-
-    if (Id == RequestCompileId) {
-      if (auto Err = handle<RequestCompile>(C, CompileCallback))
-        return Err;
-      return Error::success();
-    }
-    // else
-    return orcError(OrcErrorCode::UnexpectedRPCCall);
-  }
-
-  Expected<std::vector<char>> readMem(char *Dst, TargetAddress Src,
+  Expected<std::vector<char>> readMem(char *Dst, JITTargetAddress Src,
                                       uint64_t Size) {
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
-    return callST<ReadMem>(Channel, Src, Size);
+    return callB<ReadMem>(Src, Size);
   }
 
-  Error registerEHFrames(TargetAddress &RAddr, uint32_t Size) {
-    return callST<RegisterEHFrames>(Channel, RAddr, Size);
+  Error registerEHFrames(JITTargetAddress &RAddr, uint32_t Size) {
+    return callB<RegisterEHFrames>(RAddr, Size);
   }
 
-  Expected<TargetAddress> reserveMem(ResourceIdMgr::ResourceId Id,
-                                     uint64_t Size, uint32_t Align) {
+  Expected<JITTargetAddress> reserveMem(ResourceIdMgr::ResourceId Id,
+                                        uint64_t Size, uint32_t Align) {
 
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
-    return callST<ReserveMem>(Channel, Id, Size, Align);
+    return callB<ReserveMem>(Id, Size, Align);
   }
 
   Error setProtections(ResourceIdMgr::ResourceId Id,
-                       TargetAddress RemoteSegAddr, unsigned ProtFlags) {
-    return callST<SetProtections>(Channel, Id, RemoteSegAddr, ProtFlags);
+                       JITTargetAddress RemoteSegAddr, unsigned ProtFlags) {
+    return callB<SetProtections>(Id, RemoteSegAddr, ProtFlags);
   }
 
-  Error writeMem(TargetAddress Addr, const char *Src, uint64_t Size) {
+  Error writeMem(JITTargetAddress Addr, const char *Src, uint64_t Size) {
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
-    return callST<WriteMem>(Channel, DirectBufferWriter(Src, Addr, Size));
+    return callB<WriteMem>(DirectBufferWriter(Src, Addr, Size));
   }
 
-  Error writePointer(TargetAddress Addr, TargetAddress PtrVal) {
+  Error writePointer(JITTargetAddress Addr, JITTargetAddress PtrVal) {
     // Check for an 'out-of-band' error, e.g. from an MM destructor.
     if (ExistingError)
       return std::move(ExistingError);
 
-    return callST<WritePtr>(Channel, Addr, PtrVal);
+    return callB<WritePtr>(Addr, PtrVal);
   }
 
   static Error doNothing() { return Error::success(); }
 
-  ChannelT &Channel;
-  Error ExistingError;
+  Error ExistingError = Error::success();
   std::string RemoteTargetTriple;
   uint32_t RemotePointerSize = 0;
   uint32_t RemotePageSize = 0;
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
index 74d851522f79..ab2b0fad89fd 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -16,9 +16,9 @@
 #ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
 #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
 
-#include "JITSymbol.h"
-#include "RPCChannel.h"
 #include "RPCUtils.h"
+#include "RawByteChannel.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 
 namespace llvm {
 namespace orc {
@@ -27,42 +27,63 @@ namespace remote {
 class DirectBufferWriter {
 public:
   DirectBufferWriter() = default;
-  DirectBufferWriter(const char *Src, TargetAddress Dst, uint64_t Size)
+  DirectBufferWriter(const char *Src, JITTargetAddress Dst, uint64_t Size)
       : Src(Src), Dst(Dst), Size(Size) {}
 
   const char *getSrc() const { return Src; }
-  TargetAddress getDst() const { return Dst; }
+  JITTargetAddress getDst() const { return Dst; }
   uint64_t getSize() const { return Size; }
 
 private:
   const char *Src;
-  TargetAddress Dst;
+  JITTargetAddress Dst;
   uint64_t Size;
 };
 
-inline Error serialize(RPCChannel &C, const DirectBufferWriter &DBW) {
-  if (auto EC = serialize(C, DBW.getDst()))
-    return EC;
-  if (auto EC = serialize(C, DBW.getSize()))
-    return EC;
-  return C.appendBytes(DBW.getSrc(), DBW.getSize());
-}
-
-inline Error deserialize(RPCChannel &C, DirectBufferWriter &DBW) {
-  TargetAddress Dst;
-  if (auto EC = deserialize(C, Dst))
-    return EC;
-  uint64_t Size;
-  if (auto EC = deserialize(C, Size))
-    return EC;
-  char *Addr = reinterpret_cast<char *>(static_cast<uintptr_t>(Dst));
+} // end namespace remote
+
+namespace rpc {
+
+template <> class RPCTypeName<remote::DirectBufferWriter> {
+public:
+  static const char *getName() { return "DirectBufferWriter"; }
+};
 
-  DBW = DirectBufferWriter(0, Dst, Size);
+template <typename ChannelT>
+class SerializationTraits<
+    ChannelT, remote::DirectBufferWriter, remote::DirectBufferWriter,
+    typename std::enable_if<
+        std::is_base_of<RawByteChannel, ChannelT>::value>::type> {
+public:
+  static Error serialize(ChannelT &C, const remote::DirectBufferWriter &DBW) {
+    if (auto EC = serializeSeq(C, DBW.getDst()))
+      return EC;
+    if (auto EC = serializeSeq(C, DBW.getSize()))
+      return EC;
+    return C.appendBytes(DBW.getSrc(), DBW.getSize());
+  }
+
+  static Error deserialize(ChannelT &C, remote::DirectBufferWriter &DBW) {
+    JITTargetAddress Dst;
+    if (auto EC = deserializeSeq(C, Dst))
+      return EC;
+    uint64_t Size;
+    if (auto EC = deserializeSeq(C, Size))
+      return EC;
+    char *Addr = reinterpret_cast<char *>(static_cast<uintptr_t>(Dst));
+
+    DBW = remote::DirectBufferWriter(0, Dst, Size);
+
+    return C.readBytes(Addr, Size);
+  }
+};
 
-  return C.readBytes(Addr, Size);
-}
+} // end namespace rpc
 
-class OrcRemoteTargetRPCAPI : public RPC<RPCChannel> {
+namespace remote {
+
+class OrcRemoteTargetRPCAPI
+    : public rpc::SingleThreadedRPC<rpc::RawByteChannel> {
 protected:
   class ResourceIdMgr {
   public:
@@ -86,117 +107,157 @@ protected:
 
 public:
   // FIXME: Remove constructors once MSVC supports synthesizing move-ops.
-  OrcRemoteTargetRPCAPI() = default;
-  OrcRemoteTargetRPCAPI(const OrcRemoteTargetRPCAPI &) = delete;
-  OrcRemoteTargetRPCAPI &operator=(const OrcRemoteTargetRPCAPI &) = delete;
-
-  OrcRemoteTargetRPCAPI(OrcRemoteTargetRPCAPI &&) {}
-  OrcRemoteTargetRPCAPI &operator=(OrcRemoteTargetRPCAPI &&) { return *this; }
-
-  enum JITFuncId : uint32_t {
-    InvalidId = RPCFunctionIdTraits<JITFuncId>::InvalidId,
-    CallIntVoidId = RPCFunctionIdTraits<JITFuncId>::FirstValidId,
-    CallMainId,
-    CallVoidVoidId,
-    CreateRemoteAllocatorId,
-    CreateIndirectStubsOwnerId,
-    DeregisterEHFramesId,
-    DestroyRemoteAllocatorId,
-    DestroyIndirectStubsOwnerId,
-    EmitIndirectStubsId,
-    EmitResolverBlockId,
-    EmitTrampolineBlockId,
-    GetSymbolAddressId,
-    GetRemoteInfoId,
-    ReadMemId,
-    RegisterEHFramesId,
-    ReserveMemId,
-    RequestCompileId,
-    SetProtectionsId,
-    TerminateSessionId,
-    WriteMemId,
-    WritePtrId
-  };
-
-  static const char *getJITFuncIdName(JITFuncId Id);
-
-  typedef Function<CallIntVoidId, int32_t(TargetAddress Addr)> CallIntVoid;
-
-  typedef Function<CallMainId,
-                   int32_t(TargetAddress Addr, std::vector<std::string> Args)>
-      CallMain;
-
-  typedef Function<CallVoidVoidId, void(TargetAddress FnAddr)> CallVoidVoid;
-
-  typedef Function<CreateRemoteAllocatorId,
-                   void(ResourceIdMgr::ResourceId AllocatorID)>
-      CreateRemoteAllocator;
-
-  typedef Function<CreateIndirectStubsOwnerId,
-                   void(ResourceIdMgr::ResourceId StubOwnerID)>
-      CreateIndirectStubsOwner;
-
-  typedef Function<DeregisterEHFramesId,
-                   void(TargetAddress Addr, uint32_t Size)>
-      DeregisterEHFrames;
-
-  typedef Function<DestroyRemoteAllocatorId,
-                   void(ResourceIdMgr::ResourceId AllocatorID)>
-      DestroyRemoteAllocator;
-
-  typedef Function<DestroyIndirectStubsOwnerId,
-                   void(ResourceIdMgr::ResourceId StubsOwnerID)>
-      DestroyIndirectStubsOwner;
+  OrcRemoteTargetRPCAPI(rpc::RawByteChannel &C)
+      : rpc::SingleThreadedRPC<rpc::RawByteChannel>(C, true) {}
+
+  class CallIntVoid
+      : public rpc::Function<CallIntVoid, int32_t(JITTargetAddress Addr)> {
+  public:
+    static const char *getName() { return "CallIntVoid"; }
+  };
+
+  class CallMain
+      : public rpc::Function<CallMain, int32_t(JITTargetAddress Addr,
+                                               std::vector<std::string> Args)> {
+  public:
+    static const char *getName() { return "CallMain"; }
+  };
+
+  class CallVoidVoid
+      : public rpc::Function<CallVoidVoid, void(JITTargetAddress FnAddr)> {
+  public:
+    static const char *getName() { return "CallVoidVoid"; }
+  };
+
+  class CreateRemoteAllocator
+      : public rpc::Function<CreateRemoteAllocator,
+                             void(ResourceIdMgr::ResourceId AllocatorID)> {
+  public:
+    static const char *getName() { return "CreateRemoteAllocator"; }
+  };
+
+  class CreateIndirectStubsOwner
+      : public rpc::Function<CreateIndirectStubsOwner,
+                             void(ResourceIdMgr::ResourceId StubOwnerID)> {
+  public:
+    static const char *getName() { return "CreateIndirectStubsOwner"; }
+  };
+
+  class DeregisterEHFrames
+      : public rpc::Function<DeregisterEHFrames,
+                             void(JITTargetAddress Addr, uint32_t Size)> {
+  public:
+    static const char *getName() { return "DeregisterEHFrames"; }
+  };
+
+  class DestroyRemoteAllocator
+      : public rpc::Function<DestroyRemoteAllocator,
+                             void(ResourceIdMgr::ResourceId AllocatorID)> {
+  public:
+    static const char *getName() { return "DestroyRemoteAllocator"; }
+  };
+
+  class DestroyIndirectStubsOwner
+      : public rpc::Function<DestroyIndirectStubsOwner,
+                             void(ResourceIdMgr::ResourceId StubsOwnerID)> {
+  public:
+    static const char *getName() { return "DestroyIndirectStubsOwner"; }
+  };
 
   /// EmitIndirectStubs result is (StubsBase, PtrsBase, NumStubsEmitted).
-  typedef Function<EmitIndirectStubsId,
-                   std::tuple<TargetAddress, TargetAddress, uint32_t>(
-                       ResourceIdMgr::ResourceId StubsOwnerID,
-                       uint32_t NumStubsRequired)>
-      EmitIndirectStubs;
+  class EmitIndirectStubs
+      : public rpc::Function<
+            EmitIndirectStubs,
+            std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>(
+                ResourceIdMgr::ResourceId StubsOwnerID,
+                uint32_t NumStubsRequired)> {
+  public:
+    static const char *getName() { return "EmitIndirectStubs"; }
+  };
 
-  typedef Function<EmitResolverBlockId, void()> EmitResolverBlock;
+  class EmitResolverBlock : public rpc::Function<EmitResolverBlock, void()> {
+  public:
+    static const char *getName() { return "EmitResolverBlock"; }
+  };
 
   /// EmitTrampolineBlock result is (BlockAddr, NumTrampolines).
-  typedef Function<EmitTrampolineBlockId, std::tuple<TargetAddress, uint32_t>()>
-      EmitTrampolineBlock;
+  class EmitTrampolineBlock
+      : public rpc::Function<EmitTrampolineBlock,
+                             std::tuple<JITTargetAddress, uint32_t>()> {
+  public:
+    static const char *getName() { return "EmitTrampolineBlock"; }
+  };
 
-  typedef Function<GetSymbolAddressId, TargetAddress(std::string SymbolName)>
-      GetSymbolAddress;
+  class GetSymbolAddress
+      : public rpc::Function<GetSymbolAddress,
+                             JITTargetAddress(std::string SymbolName)> {
+  public:
+    static const char *getName() { return "GetSymbolAddress"; }
+  };
 
   /// GetRemoteInfo result is (Triple, PointerSize, PageSize, TrampolineSize,
   ///                          IndirectStubsSize).
-  typedef Function<GetRemoteInfoId, std::tuple<std::string, uint32_t, uint32_t,
-                                               uint32_t, uint32_t>()>
-      GetRemoteInfo;
+  class GetRemoteInfo
+      : public rpc::Function<
+            GetRemoteInfo,
+            std::tuple<std::string, uint32_t, uint32_t, uint32_t, uint32_t>()> {
+  public:
+    static const char *getName() { return "GetRemoteInfo"; }
+  };
 
-  typedef Function<ReadMemId,
-                   std::vector<char>(TargetAddress Src, uint64_t Size)>
-      ReadMem;
+  class ReadMem
+      : public rpc::Function<ReadMem, std::vector<uint8_t>(JITTargetAddress Src,
+                                                           uint64_t Size)> {
+  public:
+    static const char *getName() { return "ReadMem"; }
+  };
 
-  typedef Function<RegisterEHFramesId, void(TargetAddress Addr, uint32_t Size)>
-      RegisterEHFrames;
+  class RegisterEHFrames
+      : public rpc::Function<RegisterEHFrames,
+                             void(JITTargetAddress Addr, uint32_t Size)> {
+  public:
+    static const char *getName() { return "RegisterEHFrames"; }
+  };
 
-  typedef Function<ReserveMemId,
-                   TargetAddress(ResourceIdMgr::ResourceId AllocID,
-                                 uint64_t Size, uint32_t Align)>
-      ReserveMem;
+  class ReserveMem
+      : public rpc::Function<ReserveMem,
+                             JITTargetAddress(ResourceIdMgr::ResourceId AllocID,
+                                              uint64_t Size, uint32_t Align)> {
+  public:
+    static const char *getName() { return "ReserveMem"; }
+  };
 
-  typedef Function<RequestCompileId,
-                   TargetAddress(TargetAddress TrampolineAddr)>
-      RequestCompile;
+  class RequestCompile
+      : public rpc::Function<
+            RequestCompile, JITTargetAddress(JITTargetAddress TrampolineAddr)> {
+  public:
+    static const char *getName() { return "RequestCompile"; }
+  };
 
-  typedef Function<SetProtectionsId,
-                   void(ResourceIdMgr::ResourceId AllocID, TargetAddress Dst,
-                        uint32_t ProtFlags)>
-      SetProtections;
+  class SetProtections
+      : public rpc::Function<SetProtections,
+                             void(ResourceIdMgr::ResourceId AllocID,
+                                  JITTargetAddress Dst, uint32_t ProtFlags)> {
+  public:
+    static const char *getName() { return "SetProtections"; }
+  };
 
-  typedef Function<TerminateSessionId, void()> TerminateSession;
+  class TerminateSession : public rpc::Function<TerminateSession, void()> {
+  public:
+    static const char *getName() { return "TerminateSession"; }
+  };
 
-  typedef Function<WriteMemId, void(DirectBufferWriter DB)> WriteMem;
+  class WriteMem
+      : public rpc::Function<WriteMem, void(remote::DirectBufferWriter DB)> {
+  public:
+    static const char *getName() { return "WriteMem"; }
+  };
 
-  typedef Function<WritePtrId, void(TargetAddress Dst, TargetAddress Val)>
-      WritePtr;
+  class WritePtr : public rpc::Function<WritePtr, void(JITTargetAddress Dst,
+                                                       JITTargetAddress Val)> {
+  public:
+    static const char *getName() { return "WritePtr"; }
+  };
 };
 
 } // end namespace remote
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
index bf4299c69b24..506330fe3a5e 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -16,12 +16,28 @@
 #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
 
 #include "OrcRemoteTargetRPCAPI.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Memory.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <functional>
 #include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <tuple>
+#include <type_traits>
+#include <vector>
 
 #define DEBUG_TYPE "orc-remote"
 
@@ -32,7 +48,7 @@ namespace remote {
 template <typename ChannelT, typename TargetT>
 class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI {
 public:
-  typedef std::function<TargetAddress(const std::string &Name)>
+  typedef std::function<JITTargetAddress(const std::string &Name)>
       SymbolLookupFtor;
 
   typedef std::function<void(uint8_t *Addr, uint32_t Size)>
@@ -41,94 +57,50 @@ public:
   OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup,
                         EHFrameRegistrationFtor EHFramesRegister,
                         EHFrameRegistrationFtor EHFramesDeregister)
-      : Channel(Channel), SymbolLookup(std::move(SymbolLookup)),
+      : OrcRemoteTargetRPCAPI(Channel), SymbolLookup(std::move(SymbolLookup)),
         EHFramesRegister(std::move(EHFramesRegister)),
-        EHFramesDeregister(std::move(EHFramesDeregister)) {}
+        EHFramesDeregister(std::move(EHFramesDeregister)),
+        TerminateFlag(false) {
+
+    using ThisT = typename std::remove_reference<decltype(*this)>::type;
+    addHandler<CallIntVoid>(*this, &ThisT::handleCallIntVoid);
+    addHandler<CallMain>(*this, &ThisT::handleCallMain);
+    addHandler<CallVoidVoid>(*this, &ThisT::handleCallVoidVoid);
+    addHandler<CreateRemoteAllocator>(*this,
+                                      &ThisT::handleCreateRemoteAllocator);
+    addHandler<CreateIndirectStubsOwner>(
+        *this, &ThisT::handleCreateIndirectStubsOwner);
+    addHandler<DeregisterEHFrames>(*this, &ThisT::handleDeregisterEHFrames);
+    addHandler<DestroyRemoteAllocator>(*this,
+                                       &ThisT::handleDestroyRemoteAllocator);
+    addHandler<DestroyIndirectStubsOwner>(
+        *this, &ThisT::handleDestroyIndirectStubsOwner);
+    addHandler<EmitIndirectStubs>(*this, &ThisT::handleEmitIndirectStubs);
+    addHandler<EmitResolverBlock>(*this, &ThisT::handleEmitResolverBlock);
+    addHandler<EmitTrampolineBlock>(*this, &ThisT::handleEmitTrampolineBlock);
+    addHandler<GetSymbolAddress>(*this, &ThisT::handleGetSymbolAddress);
+    addHandler<GetRemoteInfo>(*this, &ThisT::handleGetRemoteInfo);
+    addHandler<ReadMem>(*this, &ThisT::handleReadMem);
+    addHandler<RegisterEHFrames>(*this, &ThisT::handleRegisterEHFrames);
+    addHandler<ReserveMem>(*this, &ThisT::handleReserveMem);
+    addHandler<SetProtections>(*this, &ThisT::handleSetProtections);
+    addHandler<TerminateSession>(*this, &ThisT::handleTerminateSession);
+    addHandler<WriteMem>(*this, &ThisT::handleWriteMem);
+    addHandler<WritePtr>(*this, &ThisT::handleWritePtr);
+  }
 
   // FIXME: Remove move/copy ops once MSVC supports synthesizing move ops.
   OrcRemoteTargetServer(const OrcRemoteTargetServer &) = delete;
   OrcRemoteTargetServer &operator=(const OrcRemoteTargetServer &) = delete;
 
-  OrcRemoteTargetServer(OrcRemoteTargetServer &&Other)
-      : Channel(Other.Channel), SymbolLookup(std::move(Other.SymbolLookup)),
-        EHFramesRegister(std::move(Other.EHFramesRegister)),
-        EHFramesDeregister(std::move(Other.EHFramesDeregister)) {}
-
+  OrcRemoteTargetServer(OrcRemoteTargetServer &&Other) = default;
   OrcRemoteTargetServer &operator=(OrcRemoteTargetServer &&) = delete;
 
-  Error handleKnownFunction(JITFuncId Id) {
-    typedef OrcRemoteTargetServer ThisT;
-
-    DEBUG(dbgs() << "Handling known proc: " << getJITFuncIdName(Id) << "\n");
-
-    switch (Id) {
-    case CallIntVoidId:
-      return handle<CallIntVoid>(Channel, *this, &ThisT::handleCallIntVoid);
-    case CallMainId:
-      return handle<CallMain>(Channel, *this, &ThisT::handleCallMain);
-    case CallVoidVoidId:
-      return handle<CallVoidVoid>(Channel, *this, &ThisT::handleCallVoidVoid);
-    case CreateRemoteAllocatorId:
-      return handle<CreateRemoteAllocator>(Channel, *this,
-                                           &ThisT::handleCreateRemoteAllocator);
-    case CreateIndirectStubsOwnerId:
-      return handle<CreateIndirectStubsOwner>(
-          Channel, *this, &ThisT::handleCreateIndirectStubsOwner);
-    case DeregisterEHFramesId:
-      return handle<DeregisterEHFrames>(Channel, *this,
-                                        &ThisT::handleDeregisterEHFrames);
-    case DestroyRemoteAllocatorId:
-      return handle<DestroyRemoteAllocator>(
-          Channel, *this, &ThisT::handleDestroyRemoteAllocator);
-    case DestroyIndirectStubsOwnerId:
-      return handle<DestroyIndirectStubsOwner>(
-          Channel, *this, &ThisT::handleDestroyIndirectStubsOwner);
-    case EmitIndirectStubsId:
-      return handle<EmitIndirectStubs>(Channel, *this,
-                                       &ThisT::handleEmitIndirectStubs);
-    case EmitResolverBlockId:
-      return handle<EmitResolverBlock>(Channel, *this,
-                                       &ThisT::handleEmitResolverBlock);
-    case EmitTrampolineBlockId:
-      return handle<EmitTrampolineBlock>(Channel, *this,
-                                         &ThisT::handleEmitTrampolineBlock);
-    case GetSymbolAddressId:
-      return handle<GetSymbolAddress>(Channel, *this,
-                                      &ThisT::handleGetSymbolAddress);
-    case GetRemoteInfoId:
-      return handle<GetRemoteInfo>(Channel, *this, &ThisT::handleGetRemoteInfo);
-    case ReadMemId:
-      return handle<ReadMem>(Channel, *this, &ThisT::handleReadMem);
-    case RegisterEHFramesId:
-      return handle<RegisterEHFrames>(Channel, *this,
-                                      &ThisT::handleRegisterEHFrames);
-    case ReserveMemId:
-      return handle<ReserveMem>(Channel, *this, &ThisT::handleReserveMem);
-    case SetProtectionsId:
-      return handle<SetProtections>(Channel, *this,
-                                    &ThisT::handleSetProtections);
-    case WriteMemId:
-      return handle<WriteMem>(Channel, *this, &ThisT::handleWriteMem);
-    case WritePtrId:
-      return handle<WritePtr>(Channel, *this, &ThisT::handleWritePtr);
-    default:
-      return orcError(OrcErrorCode::UnexpectedRPCCall);
-    }
-
-    llvm_unreachable("Unhandled JIT RPC procedure Id.");
+  Expected<JITTargetAddress> requestCompile(JITTargetAddress TrampolineAddr) {
+    return callB<RequestCompile>(TrampolineAddr);
   }
 
-  Expected<TargetAddress> requestCompile(TargetAddress TrampolineAddr) {
-    auto Listen = [&](RPCChannel &C, uint32_t Id) {
-      return handleKnownFunction(static_cast<JITFuncId>(Id));
-    };
-
-    return callSTHandling<RequestCompile>(Channel, Listen, TrampolineAddr);
-  }
-
-  Error handleTerminateSession() {
-    return handle<TerminateSession>(Channel, []() { return Error::success(); });
-  }
+  bool receivedTerminate() const { return TerminateFlag; }
 
 private:
   struct Allocator {
@@ -171,16 +143,16 @@ private:
 
   static Error doNothing() { return Error::success(); }
 
-  static TargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
+  static JITTargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
     auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr);
-    auto AddrOrErr = T->requestCompile(static_cast<TargetAddress>(
+    auto AddrOrErr = T->requestCompile(static_cast<JITTargetAddress>(
         reinterpret_cast<uintptr_t>(TrampolineAddr)));
     // FIXME: Allow customizable failure substitution functions.
     assert(AddrOrErr && "Compile request failed");
     return *AddrOrErr;
   }
 
-  Expected<int32_t> handleCallIntVoid(TargetAddress Addr) {
+  Expected<int32_t> handleCallIntVoid(JITTargetAddress Addr) {
     typedef int (*IntVoidFnTy)();
     IntVoidFnTy Fn =
         reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr));
@@ -192,7 +164,7 @@ private:
     return Result;
   }
 
-  Expected<int32_t> handleCallMain(TargetAddress Addr,
+  Expected<int32_t> handleCallMain(JITTargetAddress Addr,
                                    std::vector<std::string> Args) {
     typedef int (*MainFnTy)(int, const char *[]);
 
@@ -211,7 +183,7 @@ private:
     return Result;
   }
 
-  Error handleCallVoidVoid(TargetAddress Addr) {
+  Error handleCallVoidVoid(JITTargetAddress Addr) {
     typedef void (*VoidVoidFnTy)();
     VoidVoidFnTy Fn =
         reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr));
@@ -241,7 +213,7 @@ private:
     return Error::success();
   }
 
-  Error handleDeregisterEHFrames(TargetAddress TAddr, uint32_t Size) {
+  Error handleDeregisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
     uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
     DEBUG(dbgs() << "  Registering EH frames at " << format("0x%016x", TAddr)
                  << ", Size = " << Size << " bytes\n");
@@ -266,7 +238,7 @@ private:
     return Error::success();
   }
 
-  Expected<std::tuple<TargetAddress, TargetAddress, uint32_t>>
+  Expected<std::tuple<JITTargetAddress, JITTargetAddress, uint32_t>>
   handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id,
                           uint32_t NumStubsRequired) {
     DEBUG(dbgs() << "  ISMgr " << Id << " request " << NumStubsRequired
@@ -281,10 +253,10 @@ private:
             TargetT::emitIndirectStubsBlock(IS, NumStubsRequired, nullptr))
       return std::move(Err);
 
-    TargetAddress StubsBase =
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getStub(0)));
-    TargetAddress PtrsBase =
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getPtr(0)));
+    JITTargetAddress StubsBase = static_cast<JITTargetAddress>(
+        reinterpret_cast<uintptr_t>(IS.getStub(0)));
+    JITTargetAddress PtrsBase = static_cast<JITTargetAddress>(
+        reinterpret_cast<uintptr_t>(IS.getPtr(0)));
     uint32_t NumStubsEmitted = IS.getNumStubs();
 
     auto &BlockList = StubOwnerItr->second;
@@ -309,7 +281,7 @@ private:
         sys::Memory::MF_READ | sys::Memory::MF_EXEC));
   }
 
-  Expected<std::tuple<TargetAddress, uint32_t>> handleEmitTrampolineBlock() {
+  Expected<std::tuple<JITTargetAddress, uint32_t>> handleEmitTrampolineBlock() {
     std::error_code EC;
     auto TrampolineBlock =
         sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
@@ -332,14 +304,14 @@ private:
 
     TrampolineBlocks.push_back(std::move(TrampolineBlock));
 
-    auto TrampolineBaseAddr =
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(TrampolineMem));
+    auto TrampolineBaseAddr = static_cast<JITTargetAddress>(
+        reinterpret_cast<uintptr_t>(TrampolineMem));
 
     return std::make_tuple(TrampolineBaseAddr, NumTrampolines);
   }
 
-  Expected<TargetAddress> handleGetSymbolAddress(const std::string &Name) {
-    TargetAddress Addr = SymbolLookup(Name);
+  Expected<JITTargetAddress> handleGetSymbolAddress(const std::string &Name) {
+    JITTargetAddress Addr = SymbolLookup(Name);
     DEBUG(dbgs() << "  Symbol '" << Name << "' =  " << format("0x%016x", Addr)
                  << "\n");
     return Addr;
@@ -362,21 +334,22 @@ private:
                            IndirectStubSize);
   }
 
-  Expected<std::vector<char>> handleReadMem(TargetAddress RSrc, uint64_t Size) {
-    char *Src = reinterpret_cast<char *>(static_cast<uintptr_t>(RSrc));
+  Expected<std::vector<uint8_t>> handleReadMem(JITTargetAddress RSrc,
+                                               uint64_t Size) {
+    uint8_t *Src = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(RSrc));
 
     DEBUG(dbgs() << "  Reading " << Size << " bytes from "
                  << format("0x%016x", RSrc) << "\n");
 
-    std::vector<char> Buffer;
+    std::vector<uint8_t> Buffer;
     Buffer.resize(Size);
-    for (char *P = Src; Size != 0; --Size)
+    for (uint8_t *P = Src; Size != 0; --Size)
       Buffer.push_back(*P++);
 
     return Buffer;
   }
 
-  Error handleRegisterEHFrames(TargetAddress TAddr, uint32_t Size) {
+  Error handleRegisterEHFrames(JITTargetAddress TAddr, uint32_t Size) {
     uint8_t *Addr = reinterpret_cast<uint8_t *>(static_cast<uintptr_t>(TAddr));
     DEBUG(dbgs() << "  Registering EH frames at " << format("0x%016x", TAddr)
                  << ", Size = " << Size << " bytes\n");
@@ -384,8 +357,8 @@ private:
     return Error::success();
   }
 
-  Expected<TargetAddress> handleReserveMem(ResourceIdMgr::ResourceId Id,
-                                           uint64_t Size, uint32_t Align) {
+  Expected<JITTargetAddress> handleReserveMem(ResourceIdMgr::ResourceId Id,
+                                              uint64_t Size, uint32_t Align) {
     auto I = Allocators.find(Id);
     if (I == Allocators.end())
       return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
@@ -397,14 +370,14 @@ private:
     DEBUG(dbgs() << "  Allocator " << Id << " reserved " << LocalAllocAddr
                  << " (" << Size << " bytes, alignment " << Align << ")\n");
 
-    TargetAddress AllocAddr =
-        static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(LocalAllocAddr));
+    JITTargetAddress AllocAddr = static_cast<JITTargetAddress>(
+        reinterpret_cast<uintptr_t>(LocalAllocAddr));
 
     return AllocAddr;
   }
 
-  Error handleSetProtections(ResourceIdMgr::ResourceId Id, TargetAddress Addr,
-                             uint32_t Flags) {
+  Error handleSetProtections(ResourceIdMgr::ResourceId Id,
+                             JITTargetAddress Addr, uint32_t Flags) {
     auto I = Allocators.find(Id);
     if (I == Allocators.end())
       return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
@@ -417,13 +390,18 @@ private:
     return Allocator.setProtections(LocalAddr, Flags);
   }
 
+  Error handleTerminateSession() {
+    TerminateFlag = true;
+    return Error::success();
+  }
+
   Error handleWriteMem(DirectBufferWriter DBW) {
     DEBUG(dbgs() << "  Writing " << DBW.getSize() << " bytes to "
                  << format("0x%016x", DBW.getDst()) << "\n");
     return Error::success();
   }
 
-  Error handleWritePtr(TargetAddress Addr, TargetAddress PtrVal) {
+  Error handleWritePtr(JITTargetAddress Addr, JITTargetAddress PtrVal) {
     DEBUG(dbgs() << "  Writing pointer *" << format("0x%016x", Addr) << " = "
                  << format("0x%016x", PtrVal) << "\n");
     uintptr_t *Ptr =
@@ -432,7 +410,6 @@ private:
     return Error::success();
   }
 
-  ChannelT &Channel;
   SymbolLookupFtor SymbolLookup;
   EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister;
   std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
@@ -440,6 +417,7 @@ private:
   std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
   sys::OwningMemoryBlock ResolverBlock;
   std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+  bool TerminateFlag;
 };
 
 } // end namespace remote
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h
deleted file mode 100644
index c569e3cf05b4..000000000000
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCChannel.h
+++ /dev/null
@@ -1,249 +0,0 @@
-//===- llvm/ExecutionEngine/Orc/RPCChannel.h --------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
-#define LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
-
-#include "OrcError.h"
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Error.h"
-#include <cstddef>
-#include <cstdint>
-#include <mutex>
-#include <string>
-#include <tuple>
-#include <vector>
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-/// Interface for byte-streams to be used with RPC.
-class RPCChannel {
-public:
-  virtual ~RPCChannel() {}
-
-  /// Read Size bytes from the stream into *Dst.
-  virtual Error readBytes(char *Dst, unsigned Size) = 0;
-
-  /// Read size bytes from *Src and append them to the stream.
-  virtual Error appendBytes(const char *Src, unsigned Size) = 0;
-
-  /// Flush the stream if possible.
-  virtual Error send() = 0;
-
-  /// Get the lock for stream reading.
-  std::mutex &getReadLock() { return readLock; }
-
-  /// Get the lock for stream writing.
-  std::mutex &getWriteLock() { return writeLock; }
-
-private:
-  std::mutex readLock, writeLock;
-};
-
-/// Notify the channel that we're starting a message send.
-/// Locks the channel for writing.
-inline Error startSendMessage(RPCChannel &C) {
-  C.getWriteLock().lock();
-  return Error::success();
-}
-
-/// Notify the channel that we're ending a message send.
-/// Unlocks the channel for writing.
-inline Error endSendMessage(RPCChannel &C) {
-  C.getWriteLock().unlock();
-  return Error::success();
-}
-
-/// Notify the channel that we're starting a message receive.
-/// Locks the channel for reading.
-inline Error startReceiveMessage(RPCChannel &C) {
-  C.getReadLock().lock();
-  return Error::success();
-}
-
-/// Notify the channel that we're ending a message receive.
-/// Unlocks the channel for reading.
-inline Error endReceiveMessage(RPCChannel &C) {
-  C.getReadLock().unlock();
-  return Error::success();
-}
-
-/// RPC channel serialization for a variadic list of arguments.
-template <typename T, typename... Ts>
-Error serializeSeq(RPCChannel &C, const T &Arg, const Ts &... Args) {
-  if (auto Err = serialize(C, Arg))
-    return Err;
-  return serializeSeq(C, Args...);
-}
-
-/// RPC channel serialization for an (empty) variadic list of arguments.
-inline Error serializeSeq(RPCChannel &C) { return Error::success(); }
-
-/// RPC channel deserialization for a variadic list of arguments.
-template <typename T, typename... Ts>
-Error deserializeSeq(RPCChannel &C, T &Arg, Ts &... Args) {
-  if (auto Err = deserialize(C, Arg))
-    return Err;
-  return deserializeSeq(C, Args...);
-}
-
-/// RPC channel serialization for an (empty) variadic list of arguments.
-inline Error deserializeSeq(RPCChannel &C) { return Error::success(); }
-
-/// RPC channel serialization for integer primitives.
-template <typename T>
-typename std::enable_if<
-    std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
-        std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
-        std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
-        std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
-    Error>::type
-serialize(RPCChannel &C, T V) {
-  support::endian::byte_swap<T, support::big>(V);
-  return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
-}
-
-/// RPC channel deserialization for integer primitives.
-template <typename T>
-typename std::enable_if<
-    std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
-        std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
-        std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
-        std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
-    Error>::type
-deserialize(RPCChannel &C, T &V) {
-  if (auto Err = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
-    return Err;
-  support::endian::byte_swap<T, support::big>(V);
-  return Error::success();
-}
-
-/// RPC channel serialization for enums.
-template <typename T>
-typename std::enable_if<std::is_enum<T>::value, Error>::type
-serialize(RPCChannel &C, T V) {
-  return serialize(C, static_cast<typename std::underlying_type<T>::type>(V));
-}
-
-/// RPC channel deserialization for enums.
-template <typename T>
-typename std::enable_if<std::is_enum<T>::value, Error>::type
-deserialize(RPCChannel &C, T &V) {
-  typename std::underlying_type<T>::type Tmp;
-  Error Err = deserialize(C, Tmp);
-  V = static_cast<T>(Tmp);
-  return Err;
-}
-
-/// RPC channel serialization for bools.
-inline Error serialize(RPCChannel &C, bool V) {
-  uint8_t VN = V ? 1 : 0;
-  return C.appendBytes(reinterpret_cast<const char *>(&VN), 1);
-}
-
-/// RPC channel deserialization for bools.
-inline Error deserialize(RPCChannel &C, bool &V) {
-  uint8_t VN = 0;
-  if (auto Err = C.readBytes(reinterpret_cast<char *>(&VN), 1))
-    return Err;
-
-  V = (VN != 0);
-  return Error::success();
-}
-
-/// RPC channel serialization for StringRefs.
-/// Note: There is no corresponding deseralization for this, as StringRef
-/// doesn't own its memory and so can't hold the deserialized data.
-inline Error serialize(RPCChannel &C, StringRef S) {
-  if (auto Err = serialize(C, static_cast<uint64_t>(S.size())))
-    return Err;
-  return C.appendBytes((const char *)S.bytes_begin(), S.size());
-}
-
-/// RPC channel serialization for std::strings.
-inline Error serialize(RPCChannel &C, const std::string &S) {
-  return serialize(C, StringRef(S));
-}
-
-/// RPC channel deserialization for std::strings.
-inline Error deserialize(RPCChannel &C, std::string &S) {
-  uint64_t Count;
-  if (auto Err = deserialize(C, Count))
-    return Err;
-  S.resize(Count);
-  return C.readBytes(&S[0], Count);
-}
-
-// Serialization helper for std::tuple.
-template <typename TupleT, size_t... Is>
-inline Error serializeTupleHelper(RPCChannel &C, const TupleT &V,
-                                  llvm::index_sequence<Is...> _) {
-  return serializeSeq(C, std::get<Is>(V)...);
-}
-
-/// RPC channel serialization for std::tuple.
-template <typename... ArgTs>
-inline Error serialize(RPCChannel &C, const std::tuple<ArgTs...> &V) {
-  return serializeTupleHelper(C, V, llvm::index_sequence_for<ArgTs...>());
-}
-
-// Serialization helper for std::tuple.
-template <typename TupleT, size_t... Is>
-inline Error deserializeTupleHelper(RPCChannel &C, TupleT &V,
-                                    llvm::index_sequence<Is...> _) {
-  return deserializeSeq(C, std::get<Is>(V)...);
-}
-
-/// RPC channel deserialization for std::tuple.
-template <typename... ArgTs>
-inline Error deserialize(RPCChannel &C, std::tuple<ArgTs...> &V) {
-  return deserializeTupleHelper(C, V, llvm::index_sequence_for<ArgTs...>());
-}
-
-/// RPC channel serialization for ArrayRef<T>.
-template <typename T> Error serialize(RPCChannel &C, const ArrayRef<T> &A) {
-  if (auto Err = serialize(C, static_cast<uint64_t>(A.size())))
-    return Err;
-
-  for (const auto &E : A)
-    if (auto Err = serialize(C, E))
-      return Err;
-
-  return Error::success();
-}
-
-/// RPC channel serialization for std::array<T>.
-template <typename T> Error serialize(RPCChannel &C, const std::vector<T> &V) {
-  return serialize(C, ArrayRef<T>(V));
-}
-
-/// RPC channel deserialization for std::array<T>.
-template <typename T> Error deserialize(RPCChannel &C, std::vector<T> &V) {
-  uint64_t Count = 0;
-  if (auto Err = deserialize(C, Count))
-    return Err;
-
-  V.resize(Count);
-  for (auto &E : V)
-    if (auto Err = deserialize(C, E))
-      return Err;
-
-  return Error::success();
-}
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
-
-#endif // LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
new file mode 100644
index 000000000000..359a9d81b22b
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
@@ -0,0 +1,373 @@
+//===- llvm/ExecutionEngine/Orc/RPCSerialization.h --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
+#define LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
+
+#include "OrcError.h"
+#include "llvm/Support/thread.h"
+#include <mutex>
+#include <sstream>
+
+namespace llvm {
+namespace orc {
+namespace rpc {
+
+template <typename T>
+class RPCTypeName;
+
+/// TypeNameSequence is a utility for rendering sequences of types to a string
+/// by rendering each type, separated by ", ".
+template <typename... ArgTs> class RPCTypeNameSequence {};
+
+/// Render an empty TypeNameSequence to an ostream.
+template <typename OStream>
+OStream &operator<<(OStream &OS, const RPCTypeNameSequence<> &V) {
+  return OS;
+}
+
+/// Render a TypeNameSequence of a single type to an ostream.
+template <typename OStream, typename ArgT>
+OStream &operator<<(OStream &OS, const RPCTypeNameSequence<ArgT> &V) {
+  OS << RPCTypeName<ArgT>::getName();
+  return OS;
+}
+
+/// Render a TypeNameSequence of more than one type to an ostream.
+template <typename OStream, typename ArgT1, typename ArgT2, typename... ArgTs>
+OStream&
+operator<<(OStream &OS, const RPCTypeNameSequence<ArgT1, ArgT2, ArgTs...> &V) {
+  OS << RPCTypeName<ArgT1>::getName() << ", "
+     << RPCTypeNameSequence<ArgT2, ArgTs...>();
+  return OS;
+}
+
+template <>
+class RPCTypeName<void> {
+public:
+  static const char* getName() { return "void"; }
+};
+
+template <>
+class RPCTypeName<int8_t> {
+public:
+  static const char* getName() { return "int8_t"; }
+};
+
+template <>
+class RPCTypeName<uint8_t> {
+public:
+  static const char* getName() { return "uint8_t"; }
+};
+
+template <>
+class RPCTypeName<int16_t> {
+public:
+  static const char* getName() { return "int16_t"; }
+};
+
+template <>
+class RPCTypeName<uint16_t> {
+public:
+  static const char* getName() { return "uint16_t"; }
+};
+
+template <>
+class RPCTypeName<int32_t> {
+public:
+  static const char* getName() { return "int32_t"; }
+};
+
+template <>
+class RPCTypeName<uint32_t> {
+public:
+  static const char* getName() { return "uint32_t"; }
+};
+
+template <>
+class RPCTypeName<int64_t> {
+public:
+  static const char* getName() { return "int64_t"; }
+};
+
+template <>
+class RPCTypeName<uint64_t> {
+public:
+  static const char* getName() { return "uint64_t"; }
+};
+
+template <>
+class RPCTypeName<bool> {
+public:
+  static const char* getName() { return "bool"; }
+};
+
+template <>
+class RPCTypeName<std::string> {
+public:
+  static const char* getName() { return "std::string"; }
+};
+
+template <typename T1, typename T2>
+class RPCTypeName<std::pair<T1, T2>> {
+public:
+  static const char* getName() {
+    std::lock_guard<std::mutex> Lock(NameMutex);
+    if (Name.empty())
+      raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence<T1, T2>()
+                               << ">";
+    return Name.data();
+  }
+private:
+  static std::mutex NameMutex;
+  static std::string Name;
+};
+
+template <typename T1, typename T2>
+std::mutex RPCTypeName<std::pair<T1, T2>>::NameMutex;
+template <typename T1, typename T2>
+std::string RPCTypeName<std::pair<T1, T2>>::Name;
+
+template <typename... ArgTs>
+class RPCTypeName<std::tuple<ArgTs...>> {
+public:
+  static const char* getName() {
+    std::lock_guard<std::mutex> Lock(NameMutex);
+    if (Name.empty())
+      raw_string_ostream(Name) << "std::tuple<"
+                               << RPCTypeNameSequence<ArgTs...>() << ">";
+    return Name.data();
+  }
+private:
+  static std::mutex NameMutex;
+  static std::string Name;
+};
+
+template <typename... ArgTs>
+std::mutex RPCTypeName<std::tuple<ArgTs...>>::NameMutex;
+template <typename... ArgTs>
+std::string RPCTypeName<std::tuple<ArgTs...>>::Name;
+
+template <typename T>
+class RPCTypeName<std::vector<T>> {
+public:
+  static const char*getName() {
+    std::lock_guard<std::mutex> Lock(NameMutex);
+    if (Name.empty())
+      raw_string_ostream(Name) << "std::vector<" << RPCTypeName<T>::getName()
+                               << ">";
+    return Name.data();
+  }
+
+private:
+  static std::mutex NameMutex;
+  static std::string Name;
+};
+
+template <typename T>
+std::mutex RPCTypeName<std::vector<T>>::NameMutex;
+template <typename T>
+std::string RPCTypeName<std::vector<T>>::Name;
+
+
+/// The SerializationTraits<ChannelT, T> class describes how to serialize and
+/// deserialize an instance of type T to/from an abstract channel of type
+/// ChannelT. It also provides a representation of the type's name via the
+/// getName method.
+///
+/// Specializations of this class should provide the following functions:
+///
+///   @code{.cpp}
+///
+///   static const char* getName();
+///   static Error serialize(ChannelT&, const T&);
+///   static Error deserialize(ChannelT&, T&);
+///
+///   @endcode
+///
+/// The third argument of SerializationTraits is intended to support SFINAE.
+/// E.g.:
+///
+///   @code{.cpp}
+///
+///   class MyVirtualChannel { ... };
+///
+///   template <DerivedChannelT>
+///   class SerializationTraits<DerivedChannelT, bool,
+///         typename std::enable_if<
+///           std::is_base_of<VirtChannel, DerivedChannel>::value
+///         >::type> {
+///   public:
+///     static const char* getName() { ... };
+///   }
+///
+///   @endcode
+template <typename ChannelT, typename WireType,
+          typename ConcreteType = WireType, typename = void>
+class SerializationTraits;
+
+template <typename ChannelT>
+class SequenceTraits {
+public:
+  static Error emitSeparator(ChannelT &C) { return Error::success(); }
+  static Error consumeSeparator(ChannelT &C) { return Error::success(); }
+};
+
+/// Utility class for serializing sequences of values of varying types.
+/// Specializations of this class contain 'serialize' and 'deserialize' methods
+/// for the given channel. The ArgTs... list will determine the "over-the-wire"
+/// types to be serialized. The serialize and deserialize methods take a list
+/// CArgTs... ("caller arg types") which must be the same length as ArgTs...,
+/// but may be different types from ArgTs, provided that for each CArgT there
+/// is a SerializationTraits specialization
+/// SerializeTraits<ChannelT, ArgT, CArgT> with methods that can serialize the
+/// caller argument to over-the-wire value.
+template <typename ChannelT, typename... ArgTs>
+class SequenceSerialization;
+
+template <typename ChannelT>
+class SequenceSerialization<ChannelT> {
+public:
+  static Error serialize(ChannelT &C) { return Error::success(); }
+  static Error deserialize(ChannelT &C) { return Error::success(); }
+};
+
+template <typename ChannelT, typename ArgT>
+class SequenceSerialization<ChannelT, ArgT> {
+public:
+
+  template <typename CArgT>
+  static Error serialize(ChannelT &C, const CArgT &CArg) {
+    return SerializationTraits<ChannelT, ArgT, CArgT>::serialize(C, CArg);
+  }
+
+  template <typename CArgT>
+  static Error deserialize(ChannelT &C, CArgT &CArg) {
+    return SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg);
+  }
+};
+
+template <typename ChannelT, typename ArgT, typename... ArgTs>
+class SequenceSerialization<ChannelT, ArgT, ArgTs...> {
+public:
+
+  template <typename CArgT, typename... CArgTs>
+  static Error serialize(ChannelT &C, const CArgT &CArg,
+                         const CArgTs&... CArgs) {
+    if (auto Err =
+        SerializationTraits<ChannelT, ArgT, CArgT>::serialize(C, CArg))
+      return Err;
+    if (auto Err = SequenceTraits<ChannelT>::emitSeparator(C))
+      return Err;
+    return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, CArgs...);
+  }
+
+  template <typename CArgT, typename... CArgTs>
+  static Error deserialize(ChannelT &C, CArgT &CArg,
+                           CArgTs&... CArgs) {
+    if (auto Err =
+        SerializationTraits<ChannelT, ArgT, CArgT>::deserialize(C, CArg))
+      return Err;
+    if (auto Err = SequenceTraits<ChannelT>::consumeSeparator(C))
+      return Err;
+    return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, CArgs...);
+  }
+};
+
+template <typename ChannelT, typename... ArgTs>
+Error serializeSeq(ChannelT &C, const ArgTs &... Args) {
+  return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, Args...);
+}
+
+template <typename ChannelT, typename... ArgTs>
+Error deserializeSeq(ChannelT &C, ArgTs &... Args) {
+  return SequenceSerialization<ChannelT, ArgTs...>::deserialize(C, Args...);
+}
+
+/// SerializationTraits default specialization for std::pair.
+template <typename ChannelT, typename T1, typename T2>
+class SerializationTraits<ChannelT, std::pair<T1, T2>> {
+public:
+  static Error serialize(ChannelT &C, const std::pair<T1, T2> &V) {
+    return serializeSeq(C, V.first, V.second);
+  }
+
+  static Error deserialize(ChannelT &C, std::pair<T1, T2> &V) {
+    return deserializeSeq(C, V.first, V.second);
+  }
+};
+
+/// SerializationTraits default specialization for std::tuple.
+template <typename ChannelT, typename... ArgTs>
+class SerializationTraits<ChannelT, std::tuple<ArgTs...>> {
+public:
+
+  /// RPC channel serialization for std::tuple.
+  static Error serialize(ChannelT &C, const std::tuple<ArgTs...> &V) {
+    return serializeTupleHelper(C, V, llvm::index_sequence_for<ArgTs...>());
+  }
+
+  /// RPC channel deserialization for std::tuple.
+  static Error deserialize(ChannelT &C, std::tuple<ArgTs...> &V) {
+    return deserializeTupleHelper(C, V, llvm::index_sequence_for<ArgTs...>());
+  }
+
+private:
+  // Serialization helper for std::tuple.
+  template <size_t... Is>
+  static Error serializeTupleHelper(ChannelT &C, const std::tuple<ArgTs...> &V,
+                                    llvm::index_sequence<Is...> _) {
+    return serializeSeq(C, std::get<Is>(V)...);
+  }
+
+  // Serialization helper for std::tuple.
+  template <size_t... Is>
+  static Error deserializeTupleHelper(ChannelT &C, std::tuple<ArgTs...> &V,
+                                      llvm::index_sequence<Is...> _) {
+    return deserializeSeq(C, std::get<Is>(V)...);
+  }
+};
+
+/// SerializationTraits default specialization for std::vector.
+template <typename ChannelT, typename T>
+class SerializationTraits<ChannelT, std::vector<T>> {
+public:
+
+  /// Serialize a std::vector<T> from std::vector<T>.
+  static Error serialize(ChannelT &C, const std::vector<T> &V) {
+    if (auto Err = serializeSeq(C, static_cast<uint64_t>(V.size())))
+      return Err;
+
+    for (const auto &E : V)
+      if (auto Err = serializeSeq(C, E))
+        return Err;
+
+    return Error::success();
+  }
+
+  /// Deserialize a std::vector<T> to a std::vector<T>.
+  static Error deserialize(ChannelT &C, std::vector<T> &V) {
+    uint64_t Count = 0;
+    if (auto Err = deserializeSeq(C, Count))
+      return Err;
+
+    V.resize(Count);
+    for (auto &E : V)
+      if (auto Err = deserializeSeq(C, E))
+        return Err;
+
+    return Error::success();
+  }
+};
+
+} // end namespace rpc
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_RPCSERIALIZATION_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 966a49684348..f51fbe153a41 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -1,4 +1,4 @@
-//===----- RPCUTils.h - Basic tilities for building RPC APIs ----*- C++ -*-===//
+//===------- RPCUTils.h - Utilities for building RPC APIs -------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,11 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Basic utilities for building RPC APIs.
+// Utilities to support construction of simple RPC APIs.
+//
+// The RPC utilities aim for ease of use (minimal conceptual overhead) for C++
+// programmers, high performance, low memory overhead, and efficient use of the
+// communications channel.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,11 +19,12 @@
 #define LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
 
 #include <map>
+#include <thread>
 #include <vector>
 
-#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/ExecutionEngine/Orc/RPCSerialization.h"
 
 #ifdef _MSC_VER
 // concrt.h depends on eh.h for __uncaught_exception declaration
@@ -40,485 +45,804 @@
 
 namespace llvm {
 namespace orc {
-namespace remote {
+namespace rpc {
 
-/// Describes reserved RPC Function Ids.
-///
-/// The default implementation will serve for integer and enum function id
-/// types. If you want to use a custom type as your FunctionId you can
-/// specialize this class and provide unique values for InvalidId,
-/// ResponseId and FirstValidId.
+template <typename DerivedFunc, typename FnT> class Function;
 
-template <typename T> class RPCFunctionIdTraits {
+// RPC Function class.
+// DerivedFunc should be a user defined class with a static 'getName()' method
+// returning a const char* representing the function's name.
+template <typename DerivedFunc, typename RetT, typename... ArgTs>
+class Function<DerivedFunc, RetT(ArgTs...)> {
 public:
-  static const T InvalidId = static_cast<T>(0);
-  static const T ResponseId = static_cast<T>(1);
-  static const T FirstValidId = static_cast<T>(2);
+  /// User defined function type.
+  using Type = RetT(ArgTs...);
+
+  /// Return type.
+  using ReturnType = RetT;
+
+  /// Returns the full function prototype as a string.
+  static const char *getPrototype() {
+    std::lock_guard<std::mutex> Lock(NameMutex);
+    if (Name.empty())
+      raw_string_ostream(Name)
+          << RPCTypeName<RetT>::getName() << " " << DerivedFunc::getName()
+          << "(" << llvm::orc::rpc::RPCTypeNameSequence<ArgTs...>() << ")";
+    return Name.data();
+  }
+
+private:
+  static std::mutex NameMutex;
+  static std::string Name;
 };
 
-// Base class containing utilities that require partial specialization.
-// These cannot be included in RPC, as template class members cannot be
-// partially specialized.
-class RPCBase {
-protected:
-  // RPC Function description type.
-  //
-  // This class provides the information and operations needed to support the
-  // RPC primitive operations (call, expect, etc) for a given function. It
-  // is specialized for void and non-void functions to deal with the differences
-  // betwen the two. Both specializations have the same interface:
-  //
-  // Id - The function's unique identifier.
-  // OptionalReturn - The return type for asyncronous calls.
-  // ErrorReturn - The return type for synchronous calls.
-  // optionalToErrorReturn - Conversion from a valid OptionalReturn to an
-  //                         ErrorReturn.
-  // readResult - Deserialize a result from a channel.
-  // abandon - Abandon a promised (asynchronous) result.
-  // respond - Retun a result on the channel.
-  template <typename FunctionIdT, FunctionIdT FuncId, typename FnT>
-  class FunctionHelper {};
-
-  // RPC Function description specialization for non-void functions.
-  template <typename FunctionIdT, FunctionIdT FuncId, typename RetT,
-            typename... ArgTs>
-  class FunctionHelper<FunctionIdT, FuncId, RetT(ArgTs...)> {
-  public:
-    static_assert(FuncId != RPCFunctionIdTraits<FunctionIdT>::InvalidId &&
-                      FuncId != RPCFunctionIdTraits<FunctionIdT>::ResponseId,
-                  "Cannot define custom function with InvalidId or ResponseId. "
-                  "Please use RPCFunctionTraits<FunctionIdT>::FirstValidId.");
+template <typename DerivedFunc, typename RetT, typename... ArgTs>
+std::mutex Function<DerivedFunc, RetT(ArgTs...)>::NameMutex;
 
-    static const FunctionIdT Id = FuncId;
+template <typename DerivedFunc, typename RetT, typename... ArgTs>
+std::string Function<DerivedFunc, RetT(ArgTs...)>::Name;
 
-    typedef Optional<RetT> OptionalReturn;
+/// Provides a typedef for a tuple containing the decayed argument types.
+template <typename T> class FunctionArgsTuple;
 
-    typedef Expected<RetT> ErrorReturn;
+template <typename RetT, typename... ArgTs>
+class FunctionArgsTuple<RetT(ArgTs...)> {
+public:
+  using Type = std::tuple<typename std::decay<
+      typename std::remove_reference<ArgTs>::type>::type...>;
+};
 
-    static ErrorReturn optionalToErrorReturn(OptionalReturn &&V) {
-      assert(V && "Return value not available");
-      return std::move(*V);
-    }
+/// Allocates RPC function ids during autonegotiation.
+/// Specializations of this class must provide four members:
+///
+/// static T getInvalidId():
+///   Should return a reserved id that will be used to represent missing
+/// functions during autonegotiation.
+///
+/// static T getResponseId():
+///   Should return a reserved id that will be used to send function responses
+/// (return values).
+///
+/// static T getNegotiateId():
+///   Should return a reserved id for the negotiate function, which will be used
+/// to negotiate ids for user defined functions.
+///
+/// template <typename Func> T allocate():
+///   Allocate a unique id for function Func.
+template <typename T, typename = void> class RPCFunctionIdAllocator;
+
+/// This specialization of RPCFunctionIdAllocator provides a default
+/// implementation for integral types.
+template <typename T>
+class RPCFunctionIdAllocator<
+    T, typename std::enable_if<std::is_integral<T>::value>::type> {
+public:
+  static T getInvalidId() { return T(0); }
+  static T getResponseId() { return T(1); }
+  static T getNegotiateId() { return T(2); }
 
-    template <typename ChannelT>
-    static Error readResult(ChannelT &C, std::promise<OptionalReturn> &P) {
-      RetT Val;
-      auto Err = deserialize(C, Val);
-      auto Err2 = endReceiveMessage(C);
-      Err = joinErrors(std::move(Err), std::move(Err2));
+  template <typename Func> T allocate() { return NextId++; }
 
-      if (Err) {
-        P.set_value(OptionalReturn());
-        return Err;
-      }
-      P.set_value(std::move(Val));
-      return Error::success();
-    }
+private:
+  T NextId = 3;
+};
 
-    static void abandon(std::promise<OptionalReturn> &P) {
-      P.set_value(OptionalReturn());
-    }
+namespace detail {
 
-    template <typename ChannelT, typename SequenceNumberT>
-    static Error respond(ChannelT &C, SequenceNumberT SeqNo,
-                         ErrorReturn &Result) {
-      FunctionIdT ResponseId = RPCFunctionIdTraits<FunctionIdT>::ResponseId;
+// FIXME: Remove MSVCPError/MSVCPExpected once MSVC's future implementation
+//        supports classes without default constructors.
+#ifdef _MSC_VER
 
-      // If the handler returned an error then bail out with that.
-      if (!Result)
-        return Result.takeError();
+namespace msvc_hacks {
 
-      // Otherwise open a new message on the channel and send the result.
-      if (auto Err = startSendMessage(C))
-        return Err;
-      if (auto Err = serializeSeq(C, ResponseId, SeqNo, *Result))
-        return Err;
-      return endSendMessage(C);
-    }
-  };
+// Work around MSVC's future implementation's use of default constructors:
+// A default constructed value in the promise will be overwritten when the
+// real error is set - so the default constructed Error has to be checked
+// already.
+class MSVCPError : public Error {
+public:
+  MSVCPError() { (void)!!*this; }
 
-  // RPC Function description specialization for void functions.
-  template <typename FunctionIdT, FunctionIdT FuncId, typename... ArgTs>
-  class FunctionHelper<FunctionIdT, FuncId, void(ArgTs...)> {
-  public:
-    static_assert(FuncId != RPCFunctionIdTraits<FunctionIdT>::InvalidId &&
-                      FuncId != RPCFunctionIdTraits<FunctionIdT>::ResponseId,
-                  "Cannot define custom function with InvalidId or ResponseId. "
-                  "Please use RPCFunctionTraits<FunctionIdT>::FirstValidId.");
+  MSVCPError(MSVCPError &&Other) : Error(std::move(Other)) {}
 
-    static const FunctionIdT Id = FuncId;
+  MSVCPError &operator=(MSVCPError Other) {
+    Error::operator=(std::move(Other));
+    return *this;
+  }
 
-    typedef bool OptionalReturn;
-    typedef Error ErrorReturn;
+  MSVCPError(Error Err) : Error(std::move(Err)) {}
+};
 
-    static ErrorReturn optionalToErrorReturn(OptionalReturn &&V) {
-      assert(V && "Return value not available");
-      return Error::success();
-    }
+// Work around MSVC's future implementation, similar to MSVCPError.
+template <typename T> class MSVCPExpected : public Expected<T> {
+public:
+  MSVCPExpected()
+      : Expected<T>(make_error<StringError>("", inconvertibleErrorCode())) {
+    consumeError(this->takeError());
+  }
 
-    template <typename ChannelT>
-    static Error readResult(ChannelT &C, std::promise<OptionalReturn> &P) {
-      // Void functions don't have anything to deserialize, so we're good.
-      P.set_value(true);
-      return endReceiveMessage(C);
-    }
+  MSVCPExpected(MSVCPExpected &&Other) : Expected<T>(std::move(Other)) {}
 
-    static void abandon(std::promise<OptionalReturn> &P) { P.set_value(false); }
+  MSVCPExpected &operator=(MSVCPExpected &&Other) {
+    Expected<T>::operator=(std::move(Other));
+    return *this;
+  }
 
-    template <typename ChannelT, typename SequenceNumberT>
-    static Error respond(ChannelT &C, SequenceNumberT SeqNo,
-                         ErrorReturn &Result) {
-      const FunctionIdT ResponseId =
-          RPCFunctionIdTraits<FunctionIdT>::ResponseId;
+  MSVCPExpected(Error Err) : Expected<T>(std::move(Err)) {}
+
+  template <typename OtherT>
+  MSVCPExpected(
+      OtherT &&Val,
+      typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
+          nullptr)
+      : Expected<T>(std::move(Val)) {}
+
+  template <class OtherT>
+  MSVCPExpected(
+      Expected<OtherT> &&Other,
+      typename std::enable_if<std::is_convertible<OtherT, T>::value>::type * =
+          nullptr)
+      : Expected<T>(std::move(Other)) {}
+
+  template <class OtherT>
+  explicit MSVCPExpected(
+      Expected<OtherT> &&Other,
+      typename std::enable_if<!std::is_convertible<OtherT, T>::value>::type * =
+          nullptr)
+      : Expected<T>(std::move(Other)) {}
+};
 
-      // If the handler returned an error then bail out with that.
-      if (Result)
-        return std::move(Result);
+} // end namespace msvc_hacks
 
-      // Otherwise open a new message on the channel and send the result.
-      if (auto Err = startSendMessage(C))
-        return Err;
-      if (auto Err = serializeSeq(C, ResponseId, SeqNo))
-        return Err;
-      return endSendMessage(C);
-    }
-  };
+#endif // _MSC_VER
 
-  // Helper for the call primitive.
-  template <typename ChannelT, typename SequenceNumberT, typename Func>
-  class CallHelper;
+// ResultTraits provides typedefs and utilities specific to the return type
+// of functions.
+template <typename RetT> class ResultTraits {
+public:
+  // The return type wrapped in llvm::Expected.
+  using ErrorReturnType = Expected<RetT>;
 
-  template <typename ChannelT, typename SequenceNumberT, typename FunctionIdT,
-            FunctionIdT FuncId, typename RetT, typename... ArgTs>
-  class CallHelper<ChannelT, SequenceNumberT,
-                   FunctionHelper<FunctionIdT, FuncId, RetT(ArgTs...)>> {
-  public:
-    static Error call(ChannelT &C, SequenceNumberT SeqNo,
-                      const ArgTs &... Args) {
-      if (auto Err = startSendMessage(C))
-        return Err;
-      if (auto Err = serializeSeq(C, FuncId, SeqNo, Args...))
-        return Err;
-      return endSendMessage(C);
-    }
-  };
+#ifdef _MSC_VER
+  // The ErrorReturnType wrapped in a std::promise.
+  using ReturnPromiseType = std::promise<msvc_hacks::MSVCPExpected<RetT>>;
 
-  // Helper for handle primitive.
-  template <typename ChannelT, typename SequenceNumberT, typename Func>
-  class HandlerHelper;
+  // The ErrorReturnType wrapped in a std::future.
+  using ReturnFutureType = std::future<msvc_hacks::MSVCPExpected<RetT>>;
+#else
+  // The ErrorReturnType wrapped in a std::promise.
+  using ReturnPromiseType = std::promise<ErrorReturnType>;
 
-  template <typename ChannelT, typename SequenceNumberT, typename FunctionIdT,
-            FunctionIdT FuncId, typename RetT, typename... ArgTs>
-  class HandlerHelper<ChannelT, SequenceNumberT,
-                      FunctionHelper<FunctionIdT, FuncId, RetT(ArgTs...)>> {
-  public:
-    template <typename HandlerT>
-    static Error handle(ChannelT &C, HandlerT Handler) {
-      return readAndHandle(C, Handler, llvm::index_sequence_for<ArgTs...>());
-    }
+  // The ErrorReturnType wrapped in a std::future.
+  using ReturnFutureType = std::future<ErrorReturnType>;
+#endif
 
-  private:
-    typedef FunctionHelper<FunctionIdT, FuncId, RetT(ArgTs...)> Func;
+  // Create a 'blank' value of the ErrorReturnType, ready and safe to
+  // overwrite.
+  static ErrorReturnType createBlankErrorReturnValue() {
+    return ErrorReturnType(RetT());
+  }
 
-    template <typename HandlerT, size_t... Is>
-    static Error readAndHandle(ChannelT &C, HandlerT Handler,
-                               llvm::index_sequence<Is...> _) {
-      std::tuple<ArgTs...> RPCArgs;
-      SequenceNumberT SeqNo;
-      // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
-      // for RPCArgs. Void cast RPCArgs to work around this for now.
-      // FIXME: Remove this workaround once we can assume a working GCC version.
-      (void)RPCArgs;
-      if (auto Err = deserializeSeq(C, SeqNo, std::get<Is>(RPCArgs)...))
-        return Err;
+  // Consume an abandoned ErrorReturnType.
+  static void consumeAbandoned(ErrorReturnType RetOrErr) {
+    consumeError(RetOrErr.takeError());
+  }
+};
 
-      // We've deserialized the arguments, so unlock the channel for reading
-      // before we call the handler. This allows recursive RPC calls.
-      if (auto Err = endReceiveMessage(C))
-        return Err;
+// ResultTraits specialization for void functions.
+template <> class ResultTraits<void> {
+public:
+  // For void functions, ErrorReturnType is llvm::Error.
+  using ErrorReturnType = Error;
 
-      // Run the handler and get the result.
-      auto Result = Handler(std::get<Is>(RPCArgs)...);
+#ifdef _MSC_VER
+  // The ErrorReturnType wrapped in a std::promise.
+  using ReturnPromiseType = std::promise<msvc_hacks::MSVCPError>;
 
-      // Return the result to the client.
-      return Func::template respond<ChannelT, SequenceNumberT>(C, SeqNo,
-                                                               Result);
-    }
-  };
+  // The ErrorReturnType wrapped in a std::future.
+  using ReturnFutureType = std::future<msvc_hacks::MSVCPError>;
+#else
+  // The ErrorReturnType wrapped in a std::promise.
+  using ReturnPromiseType = std::promise<ErrorReturnType>;
 
-  // Helper for wrapping member functions up as functors.
-  template <typename ClassT, typename RetT, typename... ArgTs>
-  class MemberFnWrapper {
-  public:
-    typedef RetT (ClassT::*MethodT)(ArgTs...);
-    MemberFnWrapper(ClassT &Instance, MethodT Method)
-        : Instance(Instance), Method(Method) {}
-    RetT operator()(ArgTs &... Args) { return (Instance.*Method)(Args...); }
-
-  private:
-    ClassT &Instance;
-    MethodT Method;
-  };
+  // The ErrorReturnType wrapped in a std::future.
+  using ReturnFutureType = std::future<ErrorReturnType>;
+#endif
 
-  // Helper that provides a Functor for deserializing arguments.
-  template <typename... ArgTs> class ReadArgs {
-  public:
-    Error operator()() { return Error::success(); }
-  };
+  // Create a 'blank' value of the ErrorReturnType, ready and safe to
+  // overwrite.
+  static ErrorReturnType createBlankErrorReturnValue() {
+    return ErrorReturnType::success();
+  }
 
-  template <typename ArgT, typename... ArgTs>
-  class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
-  public:
-    ReadArgs(ArgT &Arg, ArgTs &... Args)
-        : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
+  // Consume an abandoned ErrorReturnType.
+  static void consumeAbandoned(ErrorReturnType Err) {
+    consumeError(std::move(Err));
+  }
+};
 
-    Error operator()(ArgT &ArgVal, ArgTs &... ArgVals) {
-      this->Arg = std::move(ArgVal);
-      return ReadArgs<ArgTs...>::operator()(ArgVals...);
-    }
+// ResultTraits<Error> is equivalent to ResultTraits<void>. This allows
+// handlers for void RPC functions to return either void (in which case they
+// implicitly succeed) or Error (in which case their error return is
+// propagated). See usage in HandlerTraits::runHandlerHelper.
+template <> class ResultTraits<Error> : public ResultTraits<void> {};
+
+// ResultTraits<Expected<T>> is equivalent to ResultTraits<T>. This allows
+// handlers for RPC functions returning a T to return either a T (in which
+// case they implicitly succeed) or Expected<T> (in which case their error
+// return is propagated). See usage in HandlerTraits::runHandlerHelper.
+template <typename RetT>
+class ResultTraits<Expected<RetT>> : public ResultTraits<RetT> {};
+
+// Send a response of the given wire return type (WireRetT) over the
+// channel, with the given sequence number.
+template <typename WireRetT, typename HandlerRetT, typename ChannelT,
+          typename FunctionIdT, typename SequenceNumberT>
+static Error respond(ChannelT &C, const FunctionIdT &ResponseId,
+                     SequenceNumberT SeqNo, Expected<HandlerRetT> ResultOrErr) {
+  // If this was an error bail out.
+  // FIXME: Send an "error" message to the client if this is not a channel
+  //        failure?
+  if (auto Err = ResultOrErr.takeError())
+    return Err;
+
+  // Open the response message.
+  if (auto Err = C.startSendMessage(ResponseId, SeqNo))
+    return Err;
+
+  // Serialize the result.
+  if (auto Err =
+          SerializationTraits<ChannelT, WireRetT, HandlerRetT>::serialize(
+              C, *ResultOrErr))
+    return Err;
+
+  // Close the response message.
+  return C.endSendMessage();
+}
+
+// Send an empty response message on the given channel to indicate that
+// the handler ran.
+template <typename WireRetT, typename ChannelT, typename FunctionIdT,
+          typename SequenceNumberT>
+static Error respond(ChannelT &C, const FunctionIdT &ResponseId,
+                     SequenceNumberT SeqNo, Error Err) {
+  if (Err)
+    return Err;
+  if (auto Err2 = C.startSendMessage(ResponseId, SeqNo))
+    return Err2;
+  return C.endSendMessage();
+}
+
+// Converts a given type to the equivalent error return type.
+template <typename T> class WrappedHandlerReturn {
+public:
+  using Type = Expected<T>;
+};
 
-  private:
-    ArgT &Arg;
-  };
+template <typename T> class WrappedHandlerReturn<Expected<T>> {
+public:
+  using Type = Expected<T>;
 };
 
-/// Contains primitive utilities for defining, calling and handling calls to
-/// remote procedures. ChannelT is a bidirectional stream conforming to the
-/// RPCChannel interface (see RPCChannel.h), and FunctionIdT is a procedure
-/// identifier type that must be serializable on ChannelT.
-///
-/// These utilities support the construction of very primitive RPC utilities.
-/// Their intent is to ensure correct serialization and deserialization of
-/// procedure arguments, and to keep the client and server's view of the API in
-/// sync.
-///
-/// These utilities do not support return values. These can be handled by
-/// declaring a corresponding '.*Response' procedure and expecting it after a
-/// call). They also do not support versioning: the client and server *must* be
-/// compiled with the same procedure definitions.
-///
-///
-///
-/// Overview (see comments individual types/methods for details):
-///
-/// Function<Id, Args...> :
-///
-///   associates a unique serializable id with an argument list.
-///
-///
-/// call<Func>(Channel, Args...) :
-///
-///   Calls the remote procedure 'Func' by serializing Func's id followed by its
-/// arguments and sending the resulting bytes to 'Channel'.
-///
-///
-/// handle<Func>(Channel, <functor matching Error(Args...)> :
-///
-///   Handles a call to 'Func' by deserializing its arguments and calling the
-/// given functor. This assumes that the id for 'Func' has already been
-/// deserialized.
-///
-/// expect<Func>(Channel, <functor matching Error(Args...)> :
-///
-///   The same as 'handle', except that the procedure id should not have been
-/// read yet. Expect will deserialize the id and assert that it matches Func's
-/// id. If it does not, and unexpected RPC call error is returned.
-template <typename ChannelT, typename FunctionIdT = uint32_t,
-          typename SequenceNumberT = uint16_t>
-class RPC : public RPCBase {
+template <> class WrappedHandlerReturn<void> {
+public:
+  using Type = Error;
+};
+
+template <> class WrappedHandlerReturn<Error> {
 public:
-  /// RPC default constructor.
-  RPC() = default;
+  using Type = Error;
+};
 
-  /// RPC instances cannot be copied.
-  RPC(const RPC &) = delete;
+template <> class WrappedHandlerReturn<ErrorSuccess> {
+public:
+  using Type = Error;
+};
 
-  /// RPC instances cannot be copied.
-  RPC &operator=(const RPC &) = delete;
+// This template class provides utilities related to RPC function handlers.
+// The base case applies to non-function types (the template class is
+// specialized for function types) and inherits from the appropriate
+// speciilization for the given non-function type's call operator.
+template <typename HandlerT>
+class HandlerTraits : public HandlerTraits<decltype(
+                          &std::remove_reference<HandlerT>::type::operator())> {
+};
 
-  /// RPC move constructor.
-  // FIXME: Remove once MSVC can synthesize move ops.
-  RPC(RPC &&Other)
-      : SequenceNumberMgr(std::move(Other.SequenceNumberMgr)),
-        OutstandingResults(std::move(Other.OutstandingResults)) {}
+// Traits for handlers with a given function type.
+template <typename RetT, typename... ArgTs>
+class HandlerTraits<RetT(ArgTs...)> {
+public:
+  // Function type of the handler.
+  using Type = RetT(ArgTs...);
 
-  /// RPC move assignment.
-  // FIXME: Remove once MSVC can synthesize move ops.
-  RPC &operator=(RPC &&Other) {
-    SequenceNumberMgr = std::move(Other.SequenceNumberMgr);
-    OutstandingResults = std::move(Other.OutstandingResults);
-    return *this;
+  // Return type of the handler.
+  using ReturnType = RetT;
+
+  // A std::tuple wrapping the handler arguments.
+  using ArgStorage = typename FunctionArgsTuple<RetT(ArgTs...)>::Type;
+
+  // Call the given handler with the given arguments.
+  template <typename HandlerT>
+  static typename WrappedHandlerReturn<RetT>::Type
+  unpackAndRun(HandlerT &Handler, ArgStorage &Args) {
+    return unpackAndRunHelper(Handler, Args,
+                              llvm::index_sequence_for<ArgTs...>());
   }
 
-  /// Utility class for defining/referring to RPC procedures.
-  ///
-  /// Typedefs of this utility are used when calling/handling remote procedures.
-  ///
-  /// FuncId should be a unique value of FunctionIdT (i.e. not used with any
-  /// other Function typedef in the RPC API being defined.
-  ///
-  /// the template argument Ts... gives the argument list for the remote
-  /// procedure.
-  ///
-  /// E.g.
-  ///
-  ///   typedef Function<0, bool> Func1;
-  ///   typedef Function<1, std::string, std::vector<int>> Func2;
-  ///
-  ///   if (auto Err = call<Func1>(Channel, true))
-  ///     /* handle Err */;
-  ///
-  ///   if (auto Err = expect<Func2>(Channel,
-  ///         [](std::string &S, std::vector<int> &V) {
-  ///           // Stuff.
-  ///           return Error::success();
-  ///         })
-  ///     /* handle Err */;
-  ///
-  template <FunctionIdT FuncId, typename FnT>
-  using Function = FunctionHelper<FunctionIdT, FuncId, FnT>;
+  // Call the given handler with the given arguments.
+  template <typename HandlerT>
+  static typename std::enable_if<
+      std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value,
+      Error>::type
+  run(HandlerT &Handler, ArgTs &&... Args) {
+    Handler(std::move(Args)...);
+    return Error::success();
+  }
 
-  /// Return type for asynchronous call primitives.
-  template <typename Func>
-  using AsyncCallResult = std::future<typename Func::OptionalReturn>;
+  template <typename HandlerT>
+  static typename std::enable_if<
+      !std::is_void<typename HandlerTraits<HandlerT>::ReturnType>::value,
+      typename HandlerTraits<HandlerT>::ReturnType>::type
+  run(HandlerT &Handler, ArgTs... Args) {
+    return Handler(std::move(Args)...);
+  }
 
-  /// Return type for asynchronous call-with-seq primitives.
-  template <typename Func>
-  using AsyncCallWithSeqResult =
-      std::pair<std::future<typename Func::OptionalReturn>, SequenceNumberT>;
+  // Serialize arguments to the channel.
+  template <typename ChannelT, typename... CArgTs>
+  static Error serializeArgs(ChannelT &C, const CArgTs... CArgs) {
+    return SequenceSerialization<ChannelT, ArgTs...>::serialize(C, CArgs...);
+  }
 
-  /// Serialize Args... to channel C, but do not call C.send().
-  ///
-  /// Returns an error (on serialization failure) or a pair of:
-  /// (1) A future Optional<T> (or future<bool> for void functions), and
-  /// (2) A sequence number.
-  ///
-  /// This utility function is primarily used for single-threaded mode support,
-  /// where the sequence number can be used to wait for the corresponding
-  /// result. In multi-threaded mode the appendCallAsync method, which does not
-  /// return the sequence numeber, should be preferred.
-  template <typename Func, typename... ArgTs>
-  Expected<AsyncCallWithSeqResult<Func>>
-  appendCallAsyncWithSeq(ChannelT &C, const ArgTs &... Args) {
-    auto SeqNo = SequenceNumberMgr.getSequenceNumber();
-    std::promise<typename Func::OptionalReturn> Promise;
-    auto Result = Promise.get_future();
-    OutstandingResults[SeqNo] =
-        createOutstandingResult<Func>(std::move(Promise));
-
-    if (auto Err = CallHelper<ChannelT, SequenceNumberT, Func>::call(C, SeqNo,
-                                                                     Args...)) {
-      abandonOutstandingResults();
-      return std::move(Err);
-    } else
-      return AsyncCallWithSeqResult<Func>(std::move(Result), SeqNo);
+  // Deserialize arguments from the channel.
+  template <typename ChannelT, typename... CArgTs>
+  static Error deserializeArgs(ChannelT &C, std::tuple<CArgTs...> &Args) {
+    return deserializeArgsHelper(C, Args,
+                                 llvm::index_sequence_for<CArgTs...>());
   }
 
-  /// The same as appendCallAsyncWithSeq, except that it calls C.send() to
-  /// flush the channel after serializing the call.
-  template <typename Func, typename... ArgTs>
-  Expected<AsyncCallWithSeqResult<Func>>
-  callAsyncWithSeq(ChannelT &C, const ArgTs &... Args) {
-    auto Result = appendCallAsyncWithSeq<Func>(C, Args...);
-    if (!Result)
-      return Result;
-    if (auto Err = C.send()) {
-      abandonOutstandingResults();
-      return std::move(Err);
-    }
-    return Result;
+private:
+  template <typename ChannelT, typename... CArgTs, size_t... Indexes>
+  static Error deserializeArgsHelper(ChannelT &C, std::tuple<CArgTs...> &Args,
+                                     llvm::index_sequence<Indexes...> _) {
+    return SequenceSerialization<ChannelT, ArgTs...>::deserialize(
+        C, std::get<Indexes>(Args)...);
   }
 
-  /// Serialize Args... to channel C, but do not call send.
-  /// Returns an error if serialization fails, otherwise returns a
-  /// std::future<Optional<T>> (or a future<bool> for void functions).
-  template <typename Func, typename... ArgTs>
-  Expected<AsyncCallResult<Func>> appendCallAsync(ChannelT &C,
-                                                  const ArgTs &... Args) {
-    auto ResAndSeqOrErr = appendCallAsyncWithSeq<Func>(C, Args...);
-    if (ResAndSeqOrErr)
-      return std::move(ResAndSeqOrErr->first);
-    return ResAndSeqOrErr.getError();
+  template <typename HandlerT, size_t... Indexes>
+  static typename WrappedHandlerReturn<
+      typename HandlerTraits<HandlerT>::ReturnType>::Type
+  unpackAndRunHelper(HandlerT &Handler, ArgStorage &Args,
+                     llvm::index_sequence<Indexes...>) {
+    return run(Handler, std::move(std::get<Indexes>(Args))...);
   }
+};
 
-  /// The same as appendCallAsync, except that it calls C.send to flush the
-  /// channel after serializing the call.
-  template <typename Func, typename... ArgTs>
-  Expected<AsyncCallResult<Func>> callAsync(ChannelT &C,
-                                            const ArgTs &... Args) {
-    auto ResAndSeqOrErr = callAsyncWithSeq<Func>(C, Args...);
-    if (ResAndSeqOrErr)
-      return std::move(ResAndSeqOrErr->first);
-    return ResAndSeqOrErr.getError();
-  }
-
-  /// This can be used in single-threaded mode.
-  template <typename Func, typename HandleFtor, typename... ArgTs>
-  typename Func::ErrorReturn
-  callSTHandling(ChannelT &C, HandleFtor &HandleOther, const ArgTs &... Args) {
-    if (auto ResultAndSeqNoOrErr = callAsyncWithSeq<Func>(C, Args...)) {
-      auto &ResultAndSeqNo = *ResultAndSeqNoOrErr;
-      if (auto Err = waitForResult(C, ResultAndSeqNo.second, HandleOther))
-        return std::move(Err);
-      return Func::optionalToErrorReturn(ResultAndSeqNo.first.get());
-    } else
-      return ResultAndSeqNoOrErr.takeError();
+// Handler traits for class methods (especially call operators for lambdas).
+template <typename Class, typename RetT, typename... ArgTs>
+class HandlerTraits<RetT (Class::*)(ArgTs...)>
+    : public HandlerTraits<RetT(ArgTs...)> {};
+
+// Handler traits for const class methods (especially call operators for
+// lambdas).
+template <typename Class, typename RetT, typename... ArgTs>
+class HandlerTraits<RetT (Class::*)(ArgTs...) const>
+    : public HandlerTraits<RetT(ArgTs...)> {};
+
+// Utility to peel the Expected wrapper off a response handler error type.
+template <typename HandlerT> class ResponseHandlerArg;
+
+template <typename ArgT> class ResponseHandlerArg<Error(Expected<ArgT>)> {
+public:
+  using ArgType = Expected<ArgT>;
+  using UnwrappedArgType = ArgT;
+};
+
+template <typename ArgT>
+class ResponseHandlerArg<ErrorSuccess(Expected<ArgT>)> {
+public:
+  using ArgType = Expected<ArgT>;
+  using UnwrappedArgType = ArgT;
+};
+
+template <> class ResponseHandlerArg<Error(Error)> {
+public:
+  using ArgType = Error;
+};
+
+template <> class ResponseHandlerArg<ErrorSuccess(Error)> {
+public:
+  using ArgType = Error;
+};
+
+// ResponseHandler represents a handler for a not-yet-received function call
+// result.
+template <typename ChannelT> class ResponseHandler {
+public:
+  virtual ~ResponseHandler() {}
+
+  // Reads the function result off the wire and acts on it. The meaning of
+  // "act" will depend on how this method is implemented in any given
+  // ResponseHandler subclass but could, for example, mean running a
+  // user-specified handler or setting a promise value.
+  virtual Error handleResponse(ChannelT &C) = 0;
+
+  // Abandons this outstanding result.
+  virtual void abandon() = 0;
+
+  // Create an error instance representing an abandoned response.
+  static Error createAbandonedResponseError() {
+    return orcError(OrcErrorCode::RPCResponseAbandoned);
   }
+};
 
-  // This can be used in single-threaded mode.
-  template <typename Func, typename... ArgTs>
-  typename Func::ErrorReturn callST(ChannelT &C, const ArgTs &... Args) {
-    return callSTHandling<Func>(C, handleNone, Args...);
+// ResponseHandler subclass for RPC functions with non-void returns.
+template <typename ChannelT, typename FuncRetT, typename HandlerT>
+class ResponseHandlerImpl : public ResponseHandler<ChannelT> {
+public:
+  ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
+
+  // Handle the result by deserializing it from the channel then passing it
+  // to the user defined handler.
+  Error handleResponse(ChannelT &C) override {
+    using UnwrappedArgType = typename ResponseHandlerArg<
+        typename HandlerTraits<HandlerT>::Type>::UnwrappedArgType;
+    UnwrappedArgType Result;
+    if (auto Err =
+            SerializationTraits<ChannelT, FuncRetT,
+                                UnwrappedArgType>::deserialize(C, Result))
+      return Err;
+    if (auto Err = C.endReceiveMessage())
+      return Err;
+    return Handler(Result);
   }
 
-  /// Start receiving a new function call.
-  ///
-  /// Calls startReceiveMessage on the channel, then deserializes a FunctionId
-  /// into Id.
-  Error startReceivingFunction(ChannelT &C, FunctionIdT &Id) {
-    if (auto Err = startReceiveMessage(C))
+  // Abandon this response by calling the handler with an 'abandoned response'
+  // error.
+  void abandon() override {
+    if (auto Err = Handler(this->createAbandonedResponseError())) {
+      // Handlers should not fail when passed an abandoned response error.
+      report_fatal_error(std::move(Err));
+    }
+  }
+
+private:
+  HandlerT Handler;
+};
+
+// ResponseHandler subclass for RPC functions with void returns.
+template <typename ChannelT, typename HandlerT>
+class ResponseHandlerImpl<ChannelT, void, HandlerT>
+    : public ResponseHandler<ChannelT> {
+public:
+  ResponseHandlerImpl(HandlerT Handler) : Handler(std::move(Handler)) {}
+
+  // Handle the result (no actual value, just a notification that the function
+  // has completed on the remote end) by calling the user-defined handler with
+  // Error::success().
+  Error handleResponse(ChannelT &C) override {
+    if (auto Err = C.endReceiveMessage())
       return Err;
+    return Handler(Error::success());
+  }
 
-    return deserialize(C, Id);
+  // Abandon this response by calling the handler with an 'abandoned response'
+  // error.
+  void abandon() override {
+    if (auto Err = Handler(this->createAbandonedResponseError())) {
+      // Handlers should not fail when passed an abandoned response error.
+      report_fatal_error(std::move(Err));
+    }
   }
 
-  /// Deserialize args for Func from C and call Handler. The signature of
-  /// handler must conform to 'Error(Args...)' where Args... matches
-  /// the arguments used in the Func typedef.
-  template <typename Func, typename HandlerT>
-  static Error handle(ChannelT &C, HandlerT Handler) {
-    return HandlerHelper<ChannelT, SequenceNumberT, Func>::handle(C, Handler);
+private:
+  HandlerT Handler;
+};
+
+// Create a ResponseHandler from a given user handler.
+template <typename ChannelT, typename FuncRetT, typename HandlerT>
+std::unique_ptr<ResponseHandler<ChannelT>> createResponseHandler(HandlerT H) {
+  return llvm::make_unique<ResponseHandlerImpl<ChannelT, FuncRetT, HandlerT>>(
+      std::move(H));
+}
+
+// Helper for wrapping member functions up as functors. This is useful for
+// installing methods as result handlers.
+template <typename ClassT, typename RetT, typename... ArgTs>
+class MemberFnWrapper {
+public:
+  using MethodT = RetT (ClassT::*)(ArgTs...);
+  MemberFnWrapper(ClassT &Instance, MethodT Method)
+      : Instance(Instance), Method(Method) {}
+  RetT operator()(ArgTs &&... Args) {
+    return (Instance.*Method)(std::move(Args)...);
   }
 
-  /// Helper version of 'handle' for calling member functions.
-  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
-  static Error handle(ChannelT &C, ClassT &Instance,
-                      RetT (ClassT::*HandlerMethod)(ArgTs...)) {
-    return handle<Func>(
-        C, MemberFnWrapper<ClassT, RetT, ArgTs...>(Instance, HandlerMethod));
+private:
+  ClassT &Instance;
+  MethodT Method;
+};
+
+// Helper that provides a Functor for deserializing arguments.
+template <typename... ArgTs> class ReadArgs {
+public:
+  Error operator()() { return Error::success(); }
+};
+
+template <typename ArgT, typename... ArgTs>
+class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
+public:
+  ReadArgs(ArgT &Arg, ArgTs &... Args)
+      : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
+
+  Error operator()(ArgT &ArgVal, ArgTs &... ArgVals) {
+    this->Arg = std::move(ArgVal);
+    return ReadArgs<ArgTs...>::operator()(ArgVals...);
   }
 
-  /// Deserialize a FunctionIdT from C and verify it matches the id for Func.
-  /// If the id does match, deserialize the arguments and call the handler
-  /// (similarly to handle).
-  /// If the id does not match, return an unexpect RPC call error and do not
-  /// deserialize any further bytes.
-  template <typename Func, typename HandlerT>
-  Error expect(ChannelT &C, HandlerT Handler) {
-    FunctionIdT FuncId;
-    if (auto Err = startReceivingFunction(C, FuncId))
+private:
+  ArgT &Arg;
+};
+
+// Manage sequence numbers.
+template <typename SequenceNumberT> class SequenceNumberManager {
+public:
+  // Reset, making all sequence numbers available.
+  void reset() {
+    std::lock_guard<std::mutex> Lock(SeqNoLock);
+    NextSequenceNumber = 0;
+    FreeSequenceNumbers.clear();
+  }
+
+  // Get the next available sequence number. Will re-use numbers that have
+  // been released.
+  SequenceNumberT getSequenceNumber() {
+    std::lock_guard<std::mutex> Lock(SeqNoLock);
+    if (FreeSequenceNumbers.empty())
+      return NextSequenceNumber++;
+    auto SequenceNumber = FreeSequenceNumbers.back();
+    FreeSequenceNumbers.pop_back();
+    return SequenceNumber;
+  }
+
+  // Release a sequence number, making it available for re-use.
+  void releaseSequenceNumber(SequenceNumberT SequenceNumber) {
+    std::lock_guard<std::mutex> Lock(SeqNoLock);
+    FreeSequenceNumbers.push_back(SequenceNumber);
+  }
+
+private:
+  std::mutex SeqNoLock;
+  SequenceNumberT NextSequenceNumber = 0;
+  std::vector<SequenceNumberT> FreeSequenceNumbers;
+};
+
+// Checks that predicate P holds for each corresponding pair of type arguments
+// from T1 and T2 tuple.
+template <template <class, class> class P, typename T1Tuple, typename T2Tuple>
+class RPCArgTypeCheckHelper;
+
+template <template <class, class> class P>
+class RPCArgTypeCheckHelper<P, std::tuple<>, std::tuple<>> {
+public:
+  static const bool value = true;
+};
+
+template <template <class, class> class P, typename T, typename... Ts,
+          typename U, typename... Us>
+class RPCArgTypeCheckHelper<P, std::tuple<T, Ts...>, std::tuple<U, Us...>> {
+public:
+  static const bool value =
+      P<T, U>::value &&
+      RPCArgTypeCheckHelper<P, std::tuple<Ts...>, std::tuple<Us...>>::value;
+};
+
+template <template <class, class> class P, typename T1Sig, typename T2Sig>
+class RPCArgTypeCheck {
+public:
+  using T1Tuple = typename FunctionArgsTuple<T1Sig>::Type;
+  using T2Tuple = typename FunctionArgsTuple<T2Sig>::Type;
+
+  static_assert(std::tuple_size<T1Tuple>::value >=
+                    std::tuple_size<T2Tuple>::value,
+                "Too many arguments to RPC call");
+  static_assert(std::tuple_size<T1Tuple>::value <=
+                    std::tuple_size<T2Tuple>::value,
+                "Too few arguments to RPC call");
+
+  static const bool value = RPCArgTypeCheckHelper<P, T1Tuple, T2Tuple>::value;
+};
+
+template <typename ChannelT, typename WireT, typename ConcreteT>
+class CanSerialize {
+private:
+  using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
+
+  template <typename T>
+  static std::true_type
+  check(typename std::enable_if<
+        std::is_same<decltype(T::serialize(std::declval<ChannelT &>(),
+                                           std::declval<const ConcreteT &>())),
+                     Error>::value,
+        void *>::type);
+
+  template <typename> static std::false_type check(...);
+
+public:
+  static const bool value = decltype(check<S>(0))::value;
+};
+
+template <typename ChannelT, typename WireT, typename ConcreteT>
+class CanDeserialize {
+private:
+  using S = SerializationTraits<ChannelT, WireT, ConcreteT>;
+
+  template <typename T>
+  static std::true_type
+  check(typename std::enable_if<
+        std::is_same<decltype(T::deserialize(std::declval<ChannelT &>(),
+                                             std::declval<ConcreteT &>())),
+                     Error>::value,
+        void *>::type);
+
+  template <typename> static std::false_type check(...);
+
+public:
+  static const bool value = decltype(check<S>(0))::value;
+};
+
+/// Contains primitive utilities for defining, calling and handling calls to
+/// remote procedures. ChannelT is a bidirectional stream conforming to the
+/// RPCChannel interface (see RPCChannel.h), FunctionIdT is a procedure
+/// identifier type that must be serializable on ChannelT, and SequenceNumberT
+/// is an integral type that will be used to number in-flight function calls.
+///
+/// These utilities support the construction of very primitive RPC utilities.
+/// Their intent is to ensure correct serialization and deserialization of
+/// procedure arguments, and to keep the client and server's view of the API in
+/// sync.
+template <typename ImplT, typename ChannelT, typename FunctionIdT,
+          typename SequenceNumberT>
+class RPCBase {
+protected:
+  class OrcRPCInvalid : public Function<OrcRPCInvalid, void()> {
+  public:
+    static const char *getName() { return "__orc_rpc$invalid"; }
+  };
+
+  class OrcRPCResponse : public Function<OrcRPCResponse, void()> {
+  public:
+    static const char *getName() { return "__orc_rpc$response"; }
+  };
+
+  class OrcRPCNegotiate
+      : public Function<OrcRPCNegotiate, FunctionIdT(std::string)> {
+  public:
+    static const char *getName() { return "__orc_rpc$negotiate"; }
+  };
+
+  // Helper predicate for testing for the presence of SerializeTraits
+  // serializers.
+  template <typename WireT, typename ConcreteT>
+  class CanSerializeCheck : detail::CanSerialize<ChannelT, WireT, ConcreteT> {
+  public:
+    using detail::CanSerialize<ChannelT, WireT, ConcreteT>::value;
+
+    static_assert(value, "Missing serializer for argument (Can't serialize the "
+                         "first template type argument of CanSerializeCheck "
+                         "from the second)");
+  };
+
+  // Helper predicate for testing for the presence of SerializeTraits
+  // deserializers.
+  template <typename WireT, typename ConcreteT>
+  class CanDeserializeCheck
+      : detail::CanDeserialize<ChannelT, WireT, ConcreteT> {
+  public:
+    using detail::CanDeserialize<ChannelT, WireT, ConcreteT>::value;
+
+    static_assert(value, "Missing deserializer for argument (Can't deserialize "
+                         "the second template type argument of "
+                         "CanDeserializeCheck from the first)");
+  };
+
+public:
+  /// Construct an RPC instance on a channel.
+  RPCBase(ChannelT &C, bool LazyAutoNegotiation)
+      : C(C), LazyAutoNegotiation(LazyAutoNegotiation) {
+    // Hold ResponseId in a special variable, since we expect Response to be
+    // called relatively frequently, and want to avoid the map lookup.
+    ResponseId = FnIdAllocator.getResponseId();
+    RemoteFunctionIds[OrcRPCResponse::getPrototype()] = ResponseId;
+
+    // Register the negotiate function id and handler.
+    auto NegotiateId = FnIdAllocator.getNegotiateId();
+    RemoteFunctionIds[OrcRPCNegotiate::getPrototype()] = NegotiateId;
+    Handlers[NegotiateId] = wrapHandler<OrcRPCNegotiate>(
+        [this](const std::string &Name) { return handleNegotiate(Name); },
+        LaunchPolicy());
+  }
+
+  /// Append a call Func, does not call send on the channel.
+  /// The first argument specifies a user-defined handler to be run when the
+  /// function returns. The handler should take an Expected<Func::ReturnType>,
+  /// or an Error (if Func::ReturnType is void). The handler will be called
+  /// with an error if the return value is abandoned due to a channel error.
+  template <typename Func, typename HandlerT, typename... ArgTs>
+  Error appendCallAsync(HandlerT Handler, const ArgTs &... Args) {
+
+    static_assert(
+        detail::RPCArgTypeCheck<CanSerializeCheck, typename Func::Type,
+                                void(ArgTs...)>::value,
+        "");
+
+    // Look up the function ID.
+    FunctionIdT FnId;
+    if (auto FnIdOrErr = getRemoteFunctionId<Func>())
+      FnId = *FnIdOrErr;
+    else {
+      // This isn't a channel error so we don't want to abandon other pending
+      // responses, but we still need to run the user handler with an error to
+      // let them know the call failed.
+      if (auto Err = Handler(orcError(OrcErrorCode::UnknownRPCFunction)))
+        report_fatal_error(std::move(Err));
+      return FnIdOrErr.takeError();
+    }
+
+    // Allocate a sequence number.
+    auto SeqNo = SequenceNumberMgr.getSequenceNumber();
+    assert(!PendingResponses.count(SeqNo) &&
+           "Sequence number already allocated");
+
+    // Install the user handler.
+    PendingResponses[SeqNo] =
+        detail::createResponseHandler<ChannelT, typename Func::ReturnType>(
+            std::move(Handler));
+
+    // Open the function call message.
+    if (auto Err = C.startSendMessage(FnId, SeqNo)) {
+      abandonPendingResponses();
+      return joinErrors(std::move(Err), C.endSendMessage());
+    }
+
+    // Serialize the call arguments.
+    if (auto Err = detail::HandlerTraits<typename Func::Type>::serializeArgs(
+            C, Args...)) {
+      abandonPendingResponses();
+      return joinErrors(std::move(Err), C.endSendMessage());
+    }
+
+    // Close the function call messagee.
+    if (auto Err = C.endSendMessage()) {
+      abandonPendingResponses();
       return std::move(Err);
-    if (FuncId != Func::Id)
-      return orcError(OrcErrorCode::UnexpectedRPCCall);
-    return handle<Func>(C, Handler);
+    }
+
+    return Error::success();
   }
 
-  /// Helper version of expect for calling member functions.
-  template <typename Func, typename ClassT, typename... ArgTs>
-  static Error expect(ChannelT &C, ClassT &Instance,
-                      Error (ClassT::*HandlerMethod)(ArgTs...)) {
-    return expect<Func>(
-        C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod));
+  Error sendAppendedCalls() { return C.send(); };
+
+  template <typename Func, typename HandlerT, typename... ArgTs>
+  Error callAsync(HandlerT Handler, const ArgTs &... Args) {
+    if (auto Err = appendCallAsync<Func>(std::move(Handler), Args...))
+      return Err;
+    return C.send();
+  }
+
+  /// Handle one incoming call.
+  Error handleOne() {
+    FunctionIdT FnId;
+    SequenceNumberT SeqNo;
+    if (auto Err = C.startReceiveMessage(FnId, SeqNo))
+      return Err;
+    if (FnId == ResponseId)
+      return handleResponse(SeqNo);
+    auto I = Handlers.find(FnId);
+    if (I != Handlers.end())
+      return I->second(C, SeqNo);
+
+    // else: No handler found. Report error to client?
+    return orcError(OrcErrorCode::UnexpectedRPCCall);
   }
 
   /// Helper for handling setter procedures - this method returns a functor that
@@ -535,159 +859,480 @@ public:
   ///     /* Handle Args */ ;
   ///
   template <typename... ArgTs>
-  static ReadArgs<ArgTs...> readArgs(ArgTs &... Args) {
-    return ReadArgs<ArgTs...>(Args...);
+  static detail::ReadArgs<ArgTs...> readArgs(ArgTs &... Args) {
+    return detail::ReadArgs<ArgTs...>(Args...);
   }
 
-  /// Read a response from Channel.
-  /// This should be called from the receive loop to retrieve results.
-  Error handleResponse(ChannelT &C, SequenceNumberT *SeqNoRet = nullptr) {
-    SequenceNumberT SeqNo;
-    if (auto Err = deserialize(C, SeqNo)) {
-      abandonOutstandingResults();
-      return Err;
-    }
+protected:
+  // The LaunchPolicy type allows a launch policy to be specified when adding
+  // a function handler. See addHandlerImpl.
+  using LaunchPolicy = std::function<Error(std::function<Error()>)>;
 
-    if (SeqNoRet)
-      *SeqNoRet = SeqNo;
+  /// Add the given handler to the handler map and make it available for
+  /// autonegotiation and execution.
+  template <typename Func, typename HandlerT>
+  void addHandlerImpl(HandlerT Handler, LaunchPolicy Launch) {
 
-    auto I = OutstandingResults.find(SeqNo);
-    if (I == OutstandingResults.end()) {
-      abandonOutstandingResults();
+    static_assert(detail::RPCArgTypeCheck<
+                      CanDeserializeCheck, typename Func::Type,
+                      typename detail::HandlerTraits<HandlerT>::Type>::value,
+                  "");
+
+    FunctionIdT NewFnId = FnIdAllocator.template allocate<Func>();
+    LocalFunctionIds[Func::getPrototype()] = NewFnId;
+    Handlers[NewFnId] =
+        wrapHandler<Func>(std::move(Handler), std::move(Launch));
+  }
+
+  // Abandon all outstanding results.
+  void abandonPendingResponses() {
+    for (auto &KV : PendingResponses)
+      KV.second->abandon();
+    PendingResponses.clear();
+    SequenceNumberMgr.reset();
+  }
+
+  Error handleResponse(SequenceNumberT SeqNo) {
+    auto I = PendingResponses.find(SeqNo);
+    if (I == PendingResponses.end()) {
+      abandonPendingResponses();
       return orcError(OrcErrorCode::UnexpectedRPCResponse);
     }
 
-    if (auto Err = I->second->readResult(C)) {
-      abandonOutstandingResults();
-      // FIXME: Release sequence numbers?
+    auto PRHandler = std::move(I->second);
+    PendingResponses.erase(I);
+    SequenceNumberMgr.releaseSequenceNumber(SeqNo);
+
+    if (auto Err = PRHandler->handleResponse(C)) {
+      abandonPendingResponses();
+      SequenceNumberMgr.reset();
       return Err;
     }
 
-    OutstandingResults.erase(I);
-    SequenceNumberMgr.releaseSequenceNumber(SeqNo);
-
     return Error::success();
   }
 
-  // Loop waiting for a result with the given sequence number.
-  // This can be used as a receive loop if the user doesn't have a default.
-  template <typename HandleOtherFtor>
-  Error waitForResult(ChannelT &C, SequenceNumberT TgtSeqNo,
-                      HandleOtherFtor &HandleOther = handleNone) {
-    bool GotTgtResult = false;
+  FunctionIdT handleNegotiate(const std::string &Name) {
+    auto I = LocalFunctionIds.find(Name);
+    if (I == LocalFunctionIds.end())
+      return FnIdAllocator.getInvalidId();
+    return I->second;
+  }
+
+  // Find the remote FunctionId for the given function, which must be in the
+  // RemoteFunctionIds map.
+  template <typename Func> Expected<FunctionIdT> getRemoteFunctionId() {
+    // Try to find the id for the given function.
+    auto I = RemoteFunctionIds.find(Func::getPrototype());
+
+    // If we have it in the map, return it.
+    if (I != RemoteFunctionIds.end())
+      return I->second;
+
+    // Otherwise, if we have auto-negotiation enabled, try to negotiate it.
+    if (LazyAutoNegotiation) {
+      auto &Impl = static_cast<ImplT &>(*this);
+      if (auto RemoteIdOrErr =
+              Impl.template callB<OrcRPCNegotiate>(Func::getPrototype())) {
+        auto &RemoteId = *RemoteIdOrErr;
+
+        // If autonegotiation indicates that the remote end doesn't support this
+        // function, return an unknown function error.
+        if (RemoteId == FnIdAllocator.getInvalidId())
+          return orcError(OrcErrorCode::UnknownRPCFunction);
+
+        // Autonegotiation succeeded and returned a valid id. Update the map and
+        // return the id.
+        RemoteFunctionIds[Func::getPrototype()] = RemoteId;
+        return RemoteId;
+      } else {
+        // Autonegotiation failed. Return the error.
+        return RemoteIdOrErr.takeError();
+      }
+    }
 
-    while (!GotTgtResult) {
-      FunctionIdT Id = RPCFunctionIdTraits<FunctionIdT>::InvalidId;
-      if (auto Err = startReceivingFunction(C, Id))
+    // No key was available in the map and autonegotiation wasn't enabled.
+    // Return an unknown function error.
+    return orcError(OrcErrorCode::UnknownRPCFunction);
+  }
+
+  using WrappedHandlerFn = std::function<Error(ChannelT &, SequenceNumberT)>;
+
+  // Wrap the given user handler in the necessary argument-deserialization code,
+  // result-serialization code, and call to the launch policy (if present).
+  template <typename Func, typename HandlerT>
+  WrappedHandlerFn wrapHandler(HandlerT Handler, LaunchPolicy Launch) {
+    return [this, Handler, Launch](ChannelT &Channel,
+                                   SequenceNumberT SeqNo) mutable -> Error {
+      // Start by deserializing the arguments.
+      auto Args = std::make_shared<
+          typename detail::HandlerTraits<HandlerT>::ArgStorage>();
+      if (auto Err =
+              detail::HandlerTraits<typename Func::Type>::deserializeArgs(
+                  Channel, *Args))
         return Err;
-      if (Id == RPCFunctionIdTraits<FunctionIdT>::ResponseId) {
-        SequenceNumberT SeqNo;
-        if (auto Err = handleResponse(C, &SeqNo))
-          return Err;
-        GotTgtResult = (SeqNo == TgtSeqNo);
-      } else if (auto Err = HandleOther(C, Id))
+
+      // GCC 4.7 and 4.8 incorrectly issue a -Wunused-but-set-variable warning
+      // for RPCArgs. Void cast RPCArgs to work around this for now.
+      // FIXME: Remove this workaround once we can assume a working GCC version.
+      (void)Args;
+
+      // End receieve message, unlocking the channel for reading.
+      if (auto Err = Channel.endReceiveMessage())
         return Err;
-    }
 
-    return Error::success();
+      // Build the handler/responder.
+      auto Responder = [this, Handler, Args, &Channel,
+                        SeqNo]() mutable -> Error {
+        using HTraits = detail::HandlerTraits<HandlerT>;
+        using FuncReturn = typename Func::ReturnType;
+        return detail::respond<FuncReturn>(
+            Channel, ResponseId, SeqNo, HTraits::unpackAndRun(Handler, *Args));
+      };
+
+      // If there is an explicit launch policy then use it to launch the
+      // handler.
+      if (Launch)
+        return Launch(std::move(Responder));
+
+      // Otherwise run the handler on the listener thread.
+      return Responder();
+    };
   }
 
-  // Default handler for 'other' (non-response) functions when waiting for a
-  // result from the channel.
-  static Error handleNone(ChannelT &, FunctionIdT) {
-    return orcError(OrcErrorCode::UnexpectedRPCCall);
-  };
+  ChannelT &C;
+
+  bool LazyAutoNegotiation;
+
+  RPCFunctionIdAllocator<FunctionIdT> FnIdAllocator;
+
+  FunctionIdT ResponseId;
+  std::map<std::string, FunctionIdT> LocalFunctionIds;
+  std::map<const char *, FunctionIdT> RemoteFunctionIds;
+
+  std::map<FunctionIdT, WrappedHandlerFn> Handlers;
 
+  detail::SequenceNumberManager<SequenceNumberT> SequenceNumberMgr;
+  std::map<SequenceNumberT, std::unique_ptr<detail::ResponseHandler<ChannelT>>>
+      PendingResponses;
+};
+
+} // end namespace detail
+
+template <typename ChannelT, typename FunctionIdT = uint32_t,
+          typename SequenceNumberT = uint32_t>
+class MultiThreadedRPC
+    : public detail::RPCBase<
+          MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
+          FunctionIdT, SequenceNumberT> {
 private:
-  // Manage sequence numbers.
-  class SequenceNumberManager {
-  public:
-    SequenceNumberManager() = default;
+  using BaseClass =
+      detail::RPCBase<MultiThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
+                      ChannelT, FunctionIdT, SequenceNumberT>;
 
-    SequenceNumberManager(const SequenceNumberManager &) = delete;
-    SequenceNumberManager &operator=(const SequenceNumberManager &) = delete;
+public:
+  MultiThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
+      : BaseClass(C, LazyAutoNegotiation) {}
 
-    SequenceNumberManager(SequenceNumberManager &&Other)
-        : NextSequenceNumber(std::move(Other.NextSequenceNumber)),
-          FreeSequenceNumbers(std::move(Other.FreeSequenceNumbers)) {}
+  /// The LaunchPolicy type allows a launch policy to be specified when adding
+  /// a function handler. See addHandler.
+  using LaunchPolicy = typename BaseClass::LaunchPolicy;
 
-    SequenceNumberManager &operator=(SequenceNumberManager &&Other) {
-      NextSequenceNumber = std::move(Other.NextSequenceNumber);
-      FreeSequenceNumbers = std::move(Other.FreeSequenceNumbers);
+  /// Add a handler for the given RPC function.
+  /// This installs the given handler functor for the given RPC Function, and
+  /// makes the RPC function available for negotiation/calling from the remote.
+  ///
+  /// The optional LaunchPolicy argument can be used to control how the handler
+  /// is run when called:
+  ///
+  /// * If no LaunchPolicy is given, the handler code will be run on the RPC
+  ///   handler thread that is reading from the channel. This handler cannot
+  ///   make blocking RPC calls (since it would be blocking the thread used to
+  ///   get the result), but can make non-blocking calls.
+  ///
+  /// * If a LaunchPolicy is given, the user's handler will be wrapped in a
+  ///   call to serialize and send the result, and the resulting functor (with
+  ///   type 'Error()' will be passed to the LaunchPolicy. The user can then
+  ///   choose to add the wrapped handler to a work queue, spawn a new thread,
+  ///   or anything else.
+  template <typename Func, typename HandlerT>
+  void addHandler(HandlerT Handler, LaunchPolicy Launch = LaunchPolicy()) {
+    return this->template addHandlerImpl<Func>(std::move(Handler),
+                                               std::move(Launch));
+  }
+
+  /// Negotiate a function id for Func with the other end of the channel.
+  template <typename Func> Error negotiateFunction() {
+    using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;
+
+    if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
+      this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
+      return Error::success();
+    } else
+      return RemoteIdOrErr.takeError();
+  }
+
+  /// Convenience method for negotiating multiple functions at once.
+  template <typename Func> Error negotiateFunctions() {
+    return negotiateFunction<Func>();
+  }
+
+  /// Convenience method for negotiating multiple functions at once.
+  template <typename Func1, typename Func2, typename... Funcs>
+  Error negotiateFunctions() {
+    if (auto Err = negotiateFunction<Func1>())
+      return Err;
+    return negotiateFunctions<Func2, Funcs...>();
+  }
+
+  /// Return type for non-blocking call primitives.
+  template <typename Func>
+  using NonBlockingCallResult = typename detail::ResultTraits<
+      typename Func::ReturnType>::ReturnFutureType;
+
+  /// Call Func on Channel C. Does not block, does not call send. Returns a pair
+  /// of a future result and the sequence number assigned to the result.
+  ///
+  /// This utility function is primarily used for single-threaded mode support,
+  /// where the sequence number can be used to wait for the corresponding
+  /// result. In multi-threaded mode the appendCallNB method, which does not
+  /// return the sequence numeber, should be preferred.
+  template <typename Func, typename... ArgTs>
+  Expected<NonBlockingCallResult<Func>> appendCallNB(const ArgTs &... Args) {
+    using RTraits = detail::ResultTraits<typename Func::ReturnType>;
+    using ErrorReturn = typename RTraits::ErrorReturnType;
+    using ErrorReturnPromise = typename RTraits::ReturnPromiseType;
+
+    // FIXME: Stack allocate and move this into the handler once LLVM builds
+    //        with C++14.
+    auto Promise = std::make_shared<ErrorReturnPromise>();
+    auto FutureResult = Promise->get_future();
+
+    if (auto Err = this->template appendCallAsync<Func>(
+            [Promise](ErrorReturn RetOrErr) {
+              Promise->set_value(std::move(RetOrErr));
+              return Error::success();
+            },
+            Args...)) {
+      this->abandonPendingResponses();
+      RTraits::consumeAbandoned(FutureResult.get());
+      return std::move(Err);
     }
+    return std::move(FutureResult);
+  }
 
-    void reset() {
-      std::lock_guard<std::mutex> Lock(SeqNoLock);
-      NextSequenceNumber = 0;
-      FreeSequenceNumbers.clear();
+  /// The same as appendCallNBWithSeq, except that it calls C.send() to
+  /// flush the channel after serializing the call.
+  template <typename Func, typename... ArgTs>
+  Expected<NonBlockingCallResult<Func>> callNB(const ArgTs &... Args) {
+    auto Result = appendCallNB<Func>(Args...);
+    if (!Result)
+      return Result;
+    if (auto Err = this->C.send()) {
+      this->abandonPendingResponses();
+      detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
+          std::move(Result->get()));
+      return std::move(Err);
     }
+    return Result;
+  }
 
-    SequenceNumberT getSequenceNumber() {
-      std::lock_guard<std::mutex> Lock(SeqNoLock);
-      if (FreeSequenceNumbers.empty())
-        return NextSequenceNumber++;
-      auto SequenceNumber = FreeSequenceNumbers.back();
-      FreeSequenceNumbers.pop_back();
-      return SequenceNumber;
+  /// Call Func on Channel C. Blocks waiting for a result. Returns an Error
+  /// for void functions or an Expected<T> for functions returning a T.
+  ///
+  /// This function is for use in threaded code where another thread is
+  /// handling responses and incoming calls.
+  template <typename Func, typename... ArgTs,
+            typename AltRetT = typename Func::ReturnType>
+  typename detail::ResultTraits<AltRetT>::ErrorReturnType
+  callB(const ArgTs &... Args) {
+    if (auto FutureResOrErr = callNB<Func>(Args...)) {
+      if (auto Err = this->C.send()) {
+        this->abandonPendingResponses();
+        detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
+            std::move(FutureResOrErr->get()));
+        return std::move(Err);
+      }
+      return FutureResOrErr->get();
+    } else
+      return FutureResOrErr.takeError();
+  }
+
+  /// Handle incoming RPC calls.
+  Error handlerLoop() {
+    while (true)
+      if (auto Err = this->handleOne())
+        return Err;
+    return Error::success();
+  }
+};
+
+template <typename ChannelT, typename FunctionIdT = uint32_t,
+          typename SequenceNumberT = uint32_t>
+class SingleThreadedRPC
+    : public detail::RPCBase<
+          SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>, ChannelT,
+          FunctionIdT, SequenceNumberT> {
+private:
+  using BaseClass =
+      detail::RPCBase<SingleThreadedRPC<ChannelT, FunctionIdT, SequenceNumberT>,
+                      ChannelT, FunctionIdT, SequenceNumberT>;
+
+  using LaunchPolicy = typename BaseClass::LaunchPolicy;
+
+public:
+  SingleThreadedRPC(ChannelT &C, bool LazyAutoNegotiation)
+      : BaseClass(C, LazyAutoNegotiation) {}
+
+  template <typename Func, typename HandlerT>
+  void addHandler(HandlerT Handler) {
+    return this->template addHandlerImpl<Func>(std::move(Handler),
+                                               LaunchPolicy());
+  }
+
+  template <typename Func, typename ClassT, typename RetT, typename... ArgTs>
+  void addHandler(ClassT &Object, RetT (ClassT::*Method)(ArgTs...)) {
+    addHandler<Func>(
+        detail::MemberFnWrapper<ClassT, RetT, ArgTs...>(Object, Method));
+  }
+
+  /// Negotiate a function id for Func with the other end of the channel.
+  template <typename Func> Error negotiateFunction() {
+    using OrcRPCNegotiate = typename BaseClass::OrcRPCNegotiate;
+
+    if (auto RemoteIdOrErr = callB<OrcRPCNegotiate>(Func::getPrototype())) {
+      this->RemoteFunctionIds[Func::getPrototype()] = *RemoteIdOrErr;
+      return Error::success();
+    } else
+      return RemoteIdOrErr.takeError();
+  }
+
+  /// Convenience method for negotiating multiple functions at once.
+  template <typename Func> Error negotiateFunctions() {
+    return negotiateFunction<Func>();
+  }
+
+  /// Convenience method for negotiating multiple functions at once.
+  template <typename Func1, typename Func2, typename... Funcs>
+  Error negotiateFunctions() {
+    if (auto Err = negotiateFunction<Func1>())
+      return Err;
+    return negotiateFunctions<Func2, Funcs...>();
+  }
+
+  template <typename Func, typename... ArgTs,
+            typename AltRetT = typename Func::ReturnType>
+  typename detail::ResultTraits<AltRetT>::ErrorReturnType
+  callB(const ArgTs &... Args) {
+    bool ReceivedResponse = false;
+    using ResultType = typename detail::ResultTraits<AltRetT>::ErrorReturnType;
+    auto Result = detail::ResultTraits<AltRetT>::createBlankErrorReturnValue();
+
+    // We have to 'Check' result (which we know is in a success state at this
+    // point) so that it can be overwritten in the async handler.
+    (void)!!Result;
+
+    if (auto Err = this->template appendCallAsync<Func>(
+            [&](ResultType R) {
+              Result = std::move(R);
+              ReceivedResponse = true;
+              return Error::success();
+            },
+            Args...)) {
+      this->abandonPendingResponses();
+      detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
+          std::move(Result));
+      return std::move(Err);
     }
 
-    void releaseSequenceNumber(SequenceNumberT SequenceNumber) {
-      std::lock_guard<std::mutex> Lock(SeqNoLock);
-      FreeSequenceNumbers.push_back(SequenceNumber);
+    while (!ReceivedResponse) {
+      if (auto Err = this->handleOne()) {
+        this->abandonPendingResponses();
+        detail::ResultTraits<typename Func::ReturnType>::consumeAbandoned(
+            std::move(Result));
+        return std::move(Err);
+      }
     }
 
-  private:
-    std::mutex SeqNoLock;
-    SequenceNumberT NextSequenceNumber = 0;
-    std::vector<SequenceNumberT> FreeSequenceNumbers;
-  };
+    return Result;
+  }
+};
 
-  // Base class for results that haven't been returned from the other end of the
-  // RPC connection yet.
-  class OutstandingResult {
-  public:
-    virtual ~OutstandingResult() {}
-    virtual Error readResult(ChannelT &C) = 0;
-    virtual void abandon() = 0;
-  };
+/// \brief Allows a set of asynchrounous calls to be dispatched, and then
+///        waited on as a group.
+template <typename RPCClass> class ParallelCallGroup {
+public:
 
-  // Outstanding results for a specific function.
-  template <typename Func>
-  class OutstandingResultImpl : public OutstandingResult {
-  private:
-  public:
-    OutstandingResultImpl(std::promise<typename Func::OptionalReturn> &&P)
-        : P(std::move(P)) {}
+  /// \brief Construct a parallel call group for the given RPC.
+  ParallelCallGroup(RPCClass &RPC) : RPC(RPC), NumOutstandingCalls(0) {}
+
+  ParallelCallGroup(const ParallelCallGroup &) = delete;
+  ParallelCallGroup &operator=(const ParallelCallGroup &) = delete;
 
-    Error readResult(ChannelT &C) override { return Func::readResult(C, P); }
+  /// \brief Make as asynchronous call.
+  ///
+  /// Does not issue a send call to the RPC's channel. The channel may use this
+  /// to batch up subsequent calls. A send will automatically be sent when wait
+  /// is called.
+  template <typename Func, typename HandlerT, typename... ArgTs>
+  Error appendCall(HandlerT Handler, const ArgTs &... Args) {
+    // Increment the count of outstanding calls. This has to happen before
+    // we invoke the call, as the handler may (depending on scheduling)
+    // be run immediately on another thread, and we don't want the decrement
+    // in the wrapped handler below to run before the increment.
+    {
+      std::unique_lock<std::mutex> Lock(M);
+      ++NumOutstandingCalls;
+    }
 
-    void abandon() override { Func::abandon(P); }
+    // Wrap the user handler in a lambda that will decrement the
+    // outstanding calls count, then poke the condition variable.
+    using ArgType = typename detail::ResponseHandlerArg<
+        typename detail::HandlerTraits<HandlerT>::Type>::ArgType;
+    // FIXME: Move handler into wrapped handler once we have C++14.
+    auto WrappedHandler = [this, Handler](ArgType Arg) {
+      auto Err = Handler(std::move(Arg));
+      std::unique_lock<std::mutex> Lock(M);
+      --NumOutstandingCalls;
+      CV.notify_all();
+      return Err;
+    };
 
-  private:
-    std::promise<typename Func::OptionalReturn> P;
-  };
+    return RPC.template appendCallAsync<Func>(std::move(WrappedHandler),
+                                              Args...);
+  }
 
-  // Create an outstanding result for the given function.
-  template <typename Func>
-  std::unique_ptr<OutstandingResult>
-  createOutstandingResult(std::promise<typename Func::OptionalReturn> &&P) {
-    return llvm::make_unique<OutstandingResultImpl<Func>>(std::move(P));
+  /// \brief Make an asynchronous call.
+  ///
+  /// The same as appendCall, but also calls send on the channel immediately.
+  /// Prefer appendCall if you are about to issue a "wait" call shortly, as
+  /// this may allow the channel to better batch the calls.
+  template <typename Func, typename HandlerT, typename... ArgTs>
+  Error call(HandlerT Handler, const ArgTs &... Args) {
+    if (auto Err = appendCall(std::move(Handler), Args...))
+      return Err;
+    return RPC.sendAppendedCalls();
   }
 
-  // Abandon all outstanding results.
-  void abandonOutstandingResults() {
-    for (auto &KV : OutstandingResults)
-      KV.second->abandon();
-    OutstandingResults.clear();
-    SequenceNumberMgr.reset();
+  /// \brief Blocks until all calls have been completed and their return value
+  ///        handlers run.
+  Error wait() {
+    if (auto Err = RPC.sendAppendedCalls())
+      return Err;
+    std::unique_lock<std::mutex> Lock(M);
+    while (NumOutstandingCalls > 0)
+      CV.wait(Lock);
+    return Error::success();
   }
 
-  SequenceNumberManager SequenceNumberMgr;
-  std::map<SequenceNumberT, std::unique_ptr<OutstandingResult>>
-      OutstandingResults;
+private:
+  RPCClass &RPC;
+  std::mutex M;
+  std::condition_variable CV;
+  uint32_t NumOutstandingCalls;
 };
 
-} // end namespace remote
+} // end namespace rpc
 } // end namespace orc
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
new file mode 100644
index 000000000000..43b597de000f
--- /dev/null
+++ b/contrib/llvm/include/llvm/ExecutionEngine/Orc/RawByteChannel.h
@@ -0,0 +1,175 @@
+//===- llvm/ExecutionEngine/Orc/RawByteChannel.h ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
+#define LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
+
+#include "OrcError.h"
+#include "RPCSerialization.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <cstddef>
+#include <cstdint>
+#include <mutex>
+#include <string>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+namespace llvm {
+namespace orc {
+namespace rpc {
+
+/// Interface for byte-streams to be used with RPC.
+class RawByteChannel {
+public:
+  virtual ~RawByteChannel() {}
+
+  /// Read Size bytes from the stream into *Dst.
+  virtual Error readBytes(char *Dst, unsigned Size) = 0;
+
+  /// Read size bytes from *Src and append them to the stream.
+  virtual Error appendBytes(const char *Src, unsigned Size) = 0;
+
+  /// Flush the stream if possible.
+  virtual Error send() = 0;
+
+  /// Notify the channel that we're starting a message send.
+  /// Locks the channel for writing.
+  template <typename FunctionIdT, typename SequenceIdT>
+  Error startSendMessage(const FunctionIdT &FnId, const SequenceIdT &SeqNo) {
+    if (auto Err = serializeSeq(*this, FnId, SeqNo))
+      return Err;
+    writeLock.lock();
+    return Error::success();
+  }
+
+  /// Notify the channel that we're ending a message send.
+  /// Unlocks the channel for writing.
+  Error endSendMessage() {
+    writeLock.unlock();
+    return Error::success();
+  }
+
+  /// Notify the channel that we're starting a message receive.
+  /// Locks the channel for reading.
+  template <typename FunctionIdT, typename SequenceNumberT>
+  Error startReceiveMessage(FunctionIdT &FnId, SequenceNumberT &SeqNo) {
+    readLock.lock();
+    return deserializeSeq(*this, FnId, SeqNo);
+  }
+
+  /// Notify the channel that we're ending a message receive.
+  /// Unlocks the channel for reading.
+  Error endReceiveMessage() {
+    readLock.unlock();
+    return Error::success();
+  }
+
+  /// Get the lock for stream reading.
+  std::mutex &getReadLock() { return readLock; }
+
+  /// Get the lock for stream writing.
+  std::mutex &getWriteLock() { return writeLock; }
+
+private:
+  std::mutex readLock, writeLock;
+};
+
+template <typename ChannelT, typename T>
+class SerializationTraits<
+    ChannelT, T, T,
+    typename std::enable_if<
+        std::is_base_of<RawByteChannel, ChannelT>::value &&
+        (std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value ||
+         std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
+         std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
+         std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
+         std::is_same<T, char>::value)>::type> {
+public:
+  static Error serialize(ChannelT &C, T V) {
+    support::endian::byte_swap<T, support::big>(V);
+    return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
+  };
+
+  static Error deserialize(ChannelT &C, T &V) {
+    if (auto Err = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
+      return Err;
+    support::endian::byte_swap<T, support::big>(V);
+    return Error::success();
+  };
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT, bool, bool,
+                          typename std::enable_if<std::is_base_of<
+                              RawByteChannel, ChannelT>::value>::type> {
+public:
+  static Error serialize(ChannelT &C, bool V) {
+    return C.appendBytes(reinterpret_cast<const char *>(&V), 1);
+  }
+
+  static Error deserialize(ChannelT &C, bool &V) {
+    return C.readBytes(reinterpret_cast<char *>(&V), 1);
+  }
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT, std::string, StringRef,
+                          typename std::enable_if<std::is_base_of<
+                              RawByteChannel, ChannelT>::value>::type> {
+public:
+  /// RPC channel serialization for std::strings.
+  static Error serialize(RawByteChannel &C, StringRef S) {
+    if (auto Err = serializeSeq(C, static_cast<uint64_t>(S.size())))
+      return Err;
+    return C.appendBytes((const char *)S.data(), S.size());
+  }
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT, std::string, const char *,
+                          typename std::enable_if<std::is_base_of<
+                              RawByteChannel, ChannelT>::value>::type> {
+public:
+  static Error serialize(RawByteChannel &C, const char *S) {
+    return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
+                                                                            S);
+  }
+};
+
+template <typename ChannelT>
+class SerializationTraits<ChannelT, std::string, std::string,
+                          typename std::enable_if<std::is_base_of<
+                              RawByteChannel, ChannelT>::value>::type> {
+public:
+  /// RPC channel serialization for std::strings.
+  static Error serialize(RawByteChannel &C, const std::string &S) {
+    return SerializationTraits<ChannelT, std::string, StringRef>::serialize(C,
+                                                                            S);
+  }
+
+  /// RPC channel deserialization for std::strings.
+  static Error deserialize(RawByteChannel &C, std::string &S) {
+    uint64_t Count = 0;
+    if (auto Err = deserializeSeq(C, Count))
+      return Err;
+    S.resize(Count);
+    return C.readBytes(&S[0], Count);
+  }
+};
+
+} // end namespace rpc
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_RAWBYTECHANNEL_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 9451fa57c0f6..5638717790bb 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -14,22 +14,24 @@
 #ifndef LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
 #define LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
 
-#include "RuntimeDyld.h"
-#include "llvm-c/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/Support/CBindingWrapping.h"
-#include "llvm/Support/Memory.h"
+#include "llvm-c/ExecutionEngine.h"
+#include <cstddef>
+#include <cstdint>
+#include <string>
 
 namespace llvm {
 
 class ExecutionEngine;
 
-  namespace object {
-    class ObjectFile;
-  }
+namespace object {
+  class ObjectFile;
+} // end namespace object
 
 class MCJITMemoryManager : public RuntimeDyld::MemoryManager {
 public:
-
   // Don't hide the notifyObjectLoaded method from RuntimeDyld::MemoryManager.
   using RuntimeDyld::MemoryManager::notifyObjectLoaded;
 
@@ -54,11 +56,11 @@ public:
 // FIXME: As the RuntimeDyld fills out, additional routines will be needed
 //        for the varying types of objects to be allocated.
 class RTDyldMemoryManager : public MCJITMemoryManager,
-                            public RuntimeDyld::SymbolResolver {
+                            public JITSymbolResolver {
+public:
+  RTDyldMemoryManager() = default;
   RTDyldMemoryManager(const RTDyldMemoryManager&) = delete;
   void operator=(const RTDyldMemoryManager&) = delete;
-public:
-  RTDyldMemoryManager() {}
   ~RTDyldMemoryManager() override;
 
   /// Register EH frames in the current process.
@@ -98,9 +100,8 @@ public:
   /// Clients writing custom RTDyldMemoryManagers are encouraged to override
   /// this method and return a SymbolInfo with the flags set correctly. This is
   /// necessary for RuntimeDyld to correctly handle weak and non-exported symbols.
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
-    return RuntimeDyld::SymbolInfo(getSymbolAddress(Name),
-                                   JITSymbolFlags::Exported);
+  JITSymbol findSymbol(const std::string &Name) override {
+    return JITSymbol(getSymbolAddress(Name), JITSymbolFlags::Exported);
   }
 
   /// Legacy symbol lookup -- DEPRECATED! Please override
@@ -121,10 +122,10 @@ public:
   /// Clients writing custom RTDyldMemoryManagers are encouraged to override
   /// this method and return a SymbolInfo with the flags set correctly. This is
   /// necessary for RuntimeDyld to correctly handle weak and non-exported symbols.
-  RuntimeDyld::SymbolInfo
+  JITSymbol
   findSymbolInLogicalDylib(const std::string &Name) override {
-    return RuntimeDyld::SymbolInfo(getSymbolAddressInLogicalDylib(Name),
-                                   JITSymbolFlags::Exported);
+    return JITSymbol(getSymbolAddressInLogicalDylib(Name),
+                          JITSymbolFlags::Exported);
   }
 
   /// This method returns the address of the specified function. As such it is
@@ -144,7 +145,6 @@ public:
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(
     RTDyldMemoryManager, LLVMMCJITMemoryManagerRef)
 
-} // namespace llvm
-
+} // end namespace llvm
 
-#endif
+#endif // LLVM_EXECUTIONENGINE_RTDYLDMEMORYMANAGER_H
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
index bd485de91bd4..13a5f9922c51 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -14,33 +14,39 @@
 #ifndef LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
 #define LLVM_EXECUTIONENGINE_RUNTIMEDYLD_H
 
-#include "JITSymbolFlags.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/DebugInfo/DIContext.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Object/ObjectFile.h"
-#include "llvm/Support/Memory.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 #include <map>
 #include <memory>
-#include <utility>
+#include <string>
+#include <system_error>
 
 namespace llvm {
 
-class StringRef;
-
 namespace object {
-  class ObjectFile;
   template <typename T> class OwningBinary;
-}
+} // end namespace object
 
 /// Base class for errors originating in RuntimeDyld, e.g. missing relocation
 /// support.
 class RuntimeDyldError : public ErrorInfo<RuntimeDyldError> {
 public:
   static char ID;
+
   RuntimeDyldError(std::string ErrMsg) : ErrMsg(std::move(ErrMsg)) {}
+
   void log(raw_ostream &OS) const override;
   const std::string &getErrorMessage() const { return ErrMsg; }
   std::error_code convertToErrorCode() const override;
+
 private:
   std::string ErrMsg;
 };
@@ -51,30 +57,16 @@ class RuntimeDyldCheckerImpl;
 class RuntimeDyld {
   friend class RuntimeDyldCheckerImpl;
 
-  RuntimeDyld(const RuntimeDyld &) = delete;
-  void operator=(const RuntimeDyld &) = delete;
-
 protected:
   // Change the address associated with a section when resolving relocations.
   // Any relocations already associated with the symbol will be re-resolved.
   void reassignSectionAddress(unsigned SectionID, uint64_t Addr);
-public:
-
-  /// \brief Information about a named symbol.
-  class SymbolInfo : public JITSymbolBase {
-  public:
-    SymbolInfo(std::nullptr_t) : JITSymbolBase(JITSymbolFlags::None), Address(0) {}
-    SymbolInfo(uint64_t Address, JITSymbolFlags Flags)
-      : JITSymbolBase(Flags), Address(Address) {}
-    explicit operator bool() const { return Address != 0; }
-    uint64_t getAddress() const { return Address; }
-  private:
-    uint64_t Address;
-  };
 
+public:
   /// \brief Information about the loaded object.
   class LoadedObjectInfo : public llvm::LoadedObjectInfo {
     friend class RuntimeDyldImpl;
+
   public:
     typedef std::map<object::SectionRef, unsigned> ObjSectionToIDMap;
 
@@ -103,6 +95,7 @@ public:
     LoadedObjectInfoHelper(RuntimeDyldImpl &RTDyld,
                            LoadedObjectInfo::ObjSectionToIDMap ObjSecToIDMap)
         : LoadedObjectInfo(RTDyld, std::move(ObjSecToIDMap)) {}
+
     std::unique_ptr<llvm::LoadedObjectInfo> clone() const override {
       return llvm::make_unique<Derived>(static_cast<const Derived &>(*this));
     }
@@ -111,9 +104,10 @@ public:
   /// \brief Memory Management.
   class MemoryManager {
     friend class RuntimeDyld;
+
   public:
-    MemoryManager() : FinalizationLocked(false) {}
-    virtual ~MemoryManager() {}
+    MemoryManager() = default;
+    virtual ~MemoryManager() = default;
 
     /// Allocate a memory block of (at least) the given size suitable for
     /// executable code. The SectionID is a unique identifier assigned by the
@@ -186,42 +180,14 @@ public:
 
   private:
     virtual void anchor();
-    bool FinalizationLocked;
-  };
-
-  /// \brief Symbol resolution.
-  class SymbolResolver {
-  public:
-    virtual ~SymbolResolver() {}
-
-    /// This method returns the address of the specified symbol if it exists
-    /// within the logical dynamic library represented by this
-    /// RTDyldMemoryManager. Unlike findSymbol, queries through this
-    /// interface should return addresses for hidden symbols.
-    ///
-    /// This is of particular importance for the Orc JIT APIs, which support lazy
-    /// compilation by breaking up modules: Each of those broken out modules
-    /// must be able to resolve hidden symbols provided by the others. Clients
-    /// writing memory managers for MCJIT can usually ignore this method.
-    ///
-    /// This method will be queried by RuntimeDyld when checking for previous
-    /// definitions of common symbols.
-    virtual SymbolInfo findSymbolInLogicalDylib(const std::string &Name) = 0;
-
-    /// This method returns the address of the specified function or variable.
-    /// It is used to resolve symbols during module linking.
-    ///
-    /// If the returned symbol's address is equal to ~0ULL then RuntimeDyld will
-    /// skip all relocations for that symbol, and the client will be responsible
-    /// for handling them manually.
-    virtual SymbolInfo findSymbol(const std::string &Name) = 0;
 
-  private:
-    virtual void anchor();
+    bool FinalizationLocked = false;
   };
 
   /// \brief Construct a RuntimeDyld instance.
-  RuntimeDyld(MemoryManager &MemMgr, SymbolResolver &Resolver);
+  RuntimeDyld(MemoryManager &MemMgr, JITSymbolResolver &Resolver);
+  RuntimeDyld(const RuntimeDyld &) = delete;
+  void operator=(const RuntimeDyld &) = delete;
   ~RuntimeDyld();
 
   /// Add the referenced object file to the list of objects to be loaded and
@@ -235,7 +201,7 @@ public:
 
   /// Get the target address and flags for the named symbol.
   /// This address is the one used for relocation.
-  SymbolInfo getSymbol(StringRef Name) const;
+  JITEvaluatedSymbol getSymbol(StringRef Name) const;
 
   /// Resolve the relocations for all symbols we currently know about.
   void resolveRelocations();
@@ -295,7 +261,7 @@ private:
   // interface.
   std::unique_ptr<RuntimeDyldImpl> Dyld;
   MemoryManager &MemMgr;
-  SymbolResolver &Resolver;
+  JITSymbolResolver &Resolver;
   bool ProcessAllSections;
   RuntimeDyldCheckerImpl *Checker;
 };
diff --git a/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h b/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
index 7bb96eb8b71b..3b2af11cdaf4 100644
--- a/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
+++ b/contrib/llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h
@@ -16,11 +16,15 @@
 #define LLVM_EXECUTIONENGINE_SECTIONMEMORYMANAGER_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Memory.h"
+#include <cstdint>
+#include <string>
+#include <system_error>
 
 namespace llvm {
+
 /// This is a simple memory manager which implements the methods called by
 /// the RuntimeDyld class to allocate memory for section-based loading of
 /// objects, usually those generated by the MCJIT execution engine.
@@ -35,11 +39,10 @@ namespace llvm {
 /// MCJIT::finalizeObject or by calling SectionMemoryManager::finalizeMemory
 /// directly.  Clients of MCJIT should call MCJIT::finalizeObject.
 class SectionMemoryManager : public RTDyldMemoryManager {
+public:
+  SectionMemoryManager() = default;
   SectionMemoryManager(const SectionMemoryManager&) = delete;
   void operator=(const SectionMemoryManager&) = delete;
-
-public:
-  SectionMemoryManager() { }
   ~SectionMemoryManager() override;
 
   /// \brief Allocates a memory block of (at least) the given size suitable for
@@ -118,6 +121,6 @@ private:
   MemoryGroup RODataMem;
 };
 
-}
+} // end namespace llvm
 
 #endif // LLVM_EXECUTION_ENGINE_SECTION_MEMORY_MANAGER_H
diff --git a/contrib/llvm/include/llvm/IR/Attributes.h b/contrib/llvm/include/llvm/IR/Attributes.h
index 5ef03715b9a9..15783858dd32 100644
--- a/contrib/llvm/include/llvm/IR/Attributes.h
+++ b/contrib/llvm/include/llvm/IR/Attributes.h
@@ -68,7 +68,7 @@ public:
     // IR-Level Attributes
     None,                  ///< No attributes have been set
     #define GET_ATTR_ENUM
-    #include "llvm/IR/Attributes.inc"
+    #include "llvm/IR/Attributes.gen"
     EndAttrKinds           ///< Sentinal value useful for loops
   };
 
@@ -338,6 +338,10 @@ public:
   /// may be faster.
   bool hasFnAttribute(Attribute::AttrKind Kind) const;
 
+  /// \brief Equivalent to hasAttribute(AttributeSet::FunctionIndex, Kind) but
+  /// may be faster.
+  bool hasFnAttribute(StringRef Kind) const;
+
   /// \brief Return true if the specified attribute is set for at least one
   /// parameter or for the return value. If Index is not nullptr, the index
   /// of a parameter with the specified attribute is provided.
@@ -387,9 +391,6 @@ public:
   // AttributeSet Introspection
   //===--------------------------------------------------------------------===//
 
-  // FIXME: Remove this.
-  uint64_t Raw(unsigned Index) const;
-
   /// \brief Return a raw pointer that uniquely identifies this attribute list.
   void *getRawPointer() const {
     return pImpl;
@@ -454,11 +455,6 @@ public:
   AttrBuilder()
       : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
         DerefOrNullBytes(0), AllocSizeArgs(0) {}
-  explicit AttrBuilder(uint64_t Val)
-      : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
-        DerefOrNullBytes(0), AllocSizeArgs(0) {
-    addRawValue(Val);
-  }
   AttrBuilder(const Attribute &A)
       : Attrs(0), Alignment(0), StackAlignment(0), DerefBytes(0),
         DerefOrNullBytes(0), AllocSizeArgs(0) {
@@ -586,11 +582,6 @@ public:
   bool operator!=(const AttrBuilder &B) {
     return !(*this == B);
   }
-
-  // FIXME: Remove this in 4.0.
-
-  /// \brief Add the raw value to the internal representation.
-  AttrBuilder &addRawValue(uint64_t Val);
 };
 
 namespace AttributeFuncs {
diff --git a/contrib/llvm/include/llvm/IR/AutoUpgrade.h b/contrib/llvm/include/llvm/IR/AutoUpgrade.h
index 9eb358682c65..b42a3d3ad955 100644
--- a/contrib/llvm/include/llvm/IR/AutoUpgrade.h
+++ b/contrib/llvm/include/llvm/IR/AutoUpgrade.h
@@ -51,9 +51,10 @@ namespace llvm {
   /// module is modified.
   bool UpgradeModuleFlags(Module &M);
 
-  /// If the TBAA tag for the given instruction uses the scalar TBAA format,
-  /// we upgrade it to the struct-path aware TBAA format.
-  void UpgradeInstWithTBAATag(Instruction *I);
+  /// If the given TBAA tag uses the scalar TBAA format, create a new node
+  /// corresponding to the upgrade to the struct-path aware TBAA format.
+  /// Otherwise return the \p TBAANode itself.
+  MDNode *UpgradeTBAANode(MDNode &TBAANode);
 
   /// This is an auto-upgrade for bitcast between pointers with different
   /// address spaces: the instruction is replaced by a pair ptrtoint+inttoptr.
diff --git a/contrib/llvm/include/llvm/IR/BasicBlock.h b/contrib/llvm/include/llvm/IR/BasicBlock.h
index e7daf6ee238e..93dbd573ee16 100644
--- a/contrib/llvm/include/llvm/IR/BasicBlock.h
+++ b/contrib/llvm/include/llvm/IR/BasicBlock.h
@@ -14,25 +14,24 @@
 #ifndef LLVM_IR_BASICBLOCK_H
 #define LLVM_IR_BASICBLOCK_H
 
-#include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist.h"
+#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/CBindingWrapping.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm-c/Types.h"
+#include <cassert>
+#include <cstddef>
 
 namespace llvm {
 
 class CallInst;
+class Function;
 class LandingPadInst;
-class TerminatorInst;
 class LLVMContext;
-class BlockAddress;
-class Function;
-
-template <>
-struct SymbolTableListSentinelTraits<BasicBlock>
-    : public ilist_half_embedded_sentinel_traits<BasicBlock> {};
+class TerminatorInst;
 
 /// \brief LLVM Basic Block Representation
 ///
@@ -51,19 +50,17 @@ struct SymbolTableListSentinelTraits<BasicBlock>
 /// are "well formed".
 class BasicBlock : public Value, // Basic blocks are data objects also
                    public ilist_node_with_parent<BasicBlock, Function> {
-  friend class BlockAddress;
 public:
   typedef SymbolTableList<Instruction> InstListType;
 
 private:
+  friend class BlockAddress;
+  friend class SymbolTableListTraits<BasicBlock>;
+
   InstListType InstList;
   Function *Parent;
 
   void setParent(Function *parent);
-  friend class SymbolTableListTraits<BasicBlock>;
-
-  BasicBlock(const BasicBlock &) = delete;
-  void operator=(const BasicBlock &) = delete;
 
   /// \brief Constructor.
   ///
@@ -73,7 +70,12 @@ private:
   explicit BasicBlock(LLVMContext &C, const Twine &Name = "",
                       Function *Parent = nullptr,
                       BasicBlock *InsertBefore = nullptr);
+
 public:
+  BasicBlock(const BasicBlock &) = delete;
+  BasicBlock &operator=(const BasicBlock &) = delete;
+  ~BasicBlock() override;
+
   /// \brief Get the context in which this basic block lives.
   LLVMContext &getContext() const;
 
@@ -93,7 +95,6 @@ public:
                             BasicBlock *InsertBefore = nullptr) {
     return new BasicBlock(Context, Name, Parent, InsertBefore);
   }
-  ~BasicBlock() override;
 
   /// \brief Return the enclosing method, or null if none.
   const Function *getParent() const { return Parent; }
@@ -334,6 +335,7 @@ private:
     assert((int)(signed char)getSubclassDataFromValue() >= 0 &&
            "Refcount wrap-around");
   }
+
   /// \brief Shadow Value::setValueSubclassData with a private forwarding method
   /// so that any future subclasses cannot accidentally use it.
   void setValueSubclassData(unsigned short D) {
@@ -344,6 +346,6 @@ private:
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(BasicBlock, LLVMBasicBlockRef)
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_BASICBLOCK_H
diff --git a/contrib/llvm/include/llvm/IR/CFG.h b/contrib/llvm/include/llvm/IR/CFG.h
index a256b5960bb3..52de11a06baf 100644
--- a/contrib/llvm/include/llvm/IR/CFG.h
+++ b/contrib/llvm/include/llvm/IR/CFG.h
@@ -16,9 +16,17 @@
 #define LLVM_IR_CFG_H
 
 #include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/type_traits.h"
+#include <cassert>
+#include <cstddef>
+#include <iterator>
 
 namespace llvm {
 
@@ -44,7 +52,7 @@ public:
   typedef typename super::pointer pointer;
   typedef typename super::reference reference;
 
-  PredIterator() {}
+  PredIterator() = default;
   explicit inline PredIterator(Ptr *bb) : It(bb->user_begin()) {
     advancePastNonTerminators();
   }
@@ -85,8 +93,8 @@ public:
 typedef PredIterator<BasicBlock, Value::user_iterator> pred_iterator;
 typedef PredIterator<const BasicBlock,
                      Value::const_user_iterator> const_pred_iterator;
-typedef llvm::iterator_range<pred_iterator> pred_range;
-typedef llvm::iterator_range<const_pred_iterator> pred_const_range;
+typedef iterator_range<pred_iterator> pred_range;
+typedef iterator_range<const_pred_iterator> pred_const_range;
 
 inline pred_iterator pred_begin(BasicBlock *BB) { return pred_iterator(BB); }
 inline const_pred_iterator pred_begin(const BasicBlock *BB) {
@@ -114,8 +122,8 @@ typedef TerminatorInst::SuccIterator<TerminatorInst *, BasicBlock>
     succ_iterator;
 typedef TerminatorInst::SuccIterator<const TerminatorInst *, const BasicBlock>
     succ_const_iterator;
-typedef llvm::iterator_range<succ_iterator> succ_range;
-typedef llvm::iterator_range<succ_const_iterator> succ_const_range;
+typedef iterator_range<succ_iterator> succ_range;
+typedef iterator_range<succ_const_iterator> succ_const_range;
 
 inline succ_iterator succ_begin(BasicBlock *BB) {
   return succ_iterator(BB->getTerminator());
@@ -144,8 +152,6 @@ struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
   static const bool value = isPodLike<T>::value;
 };
 
-
-
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for basic block graphs (CFGs)
 //===--------------------------------------------------------------------===//
@@ -154,32 +160,22 @@ struct isPodLike<TerminatorInst::SuccIterator<T, U>> {
 // graph of basic blocks...
 
 template <> struct GraphTraits<BasicBlock*> {
-  typedef BasicBlock NodeType;
   typedef BasicBlock *NodeRef;
   typedef succ_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(BasicBlock *BB) { return BB; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return succ_begin(N);
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return succ_end(N);
-  }
+  static NodeRef getEntryNode(BasicBlock *BB) { return BB; }
+  static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); }
+  static ChildIteratorType child_end(NodeRef N) { return succ_end(N); }
 };
 
 template <> struct GraphTraits<const BasicBlock*> {
-  typedef const BasicBlock NodeType;
   typedef const BasicBlock *NodeRef;
   typedef succ_const_iterator ChildIteratorType;
 
-  static NodeType *getEntryNode(const BasicBlock *BB) { return BB; }
+  static NodeRef getEntryNode(const BasicBlock *BB) { return BB; }
 
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return succ_begin(N);
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return succ_end(N);
-  }
+  static ChildIteratorType child_begin(NodeRef N) { return succ_begin(N); }
+  static ChildIteratorType child_end(NodeRef N) { return succ_end(N); }
 };
 
 // Provide specializations of GraphTraits to be able to treat a function as a
@@ -187,36 +183,22 @@ template <> struct GraphTraits<const BasicBlock*> {
 // a function is considered to be when traversing the predecessor edges of a BB
 // instead of the successor edges.
 //
-template <> struct GraphTraits<Inverse<BasicBlock*> > {
-  typedef BasicBlock NodeType;
+template <> struct GraphTraits<Inverse<BasicBlock*>> {
   typedef BasicBlock *NodeRef;
   typedef pred_iterator ChildIteratorType;
-  static NodeType *getEntryNode(Inverse<BasicBlock *> G) { return G.Graph; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return pred_begin(N);
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return pred_end(N);
-  }
+  static NodeRef getEntryNode(Inverse<BasicBlock *> G) { return G.Graph; }
+  static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); }
+  static ChildIteratorType child_end(NodeRef N) { return pred_end(N); }
 };
 
-template <> struct GraphTraits<Inverse<const BasicBlock*> > {
-  typedef const BasicBlock NodeType;
+template <> struct GraphTraits<Inverse<const BasicBlock*>> {
   typedef const BasicBlock *NodeRef;
   typedef const_pred_iterator ChildIteratorType;
-  static NodeType *getEntryNode(Inverse<const BasicBlock*> G) {
-    return G.Graph;
-  }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return pred_begin(N);
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return pred_end(N);
-  }
+  static NodeRef getEntryNode(Inverse<const BasicBlock *> G) { return G.Graph; }
+  static ChildIteratorType child_begin(NodeRef N) { return pred_begin(N); }
+  static ChildIteratorType child_end(NodeRef N) { return pred_end(N); }
 };
 
-
-
 //===--------------------------------------------------------------------===//
 // GraphTraits specializations for function basic block graphs (CFGs)
 //===--------------------------------------------------------------------===//
@@ -226,44 +208,57 @@ template <> struct GraphTraits<Inverse<const BasicBlock*> > {
 // except that the root node is implicitly the first node of the function.
 //
 template <> struct GraphTraits<Function*> : public GraphTraits<BasicBlock*> {
-  static NodeType *getEntryNode(Function *F) { return &F->getEntryBlock(); }
+  static NodeRef getEntryNode(Function *F) { return &F->getEntryBlock(); }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef Function::iterator nodes_iterator;
-  static nodes_iterator nodes_begin(Function *F) { return F->begin(); }
-  static nodes_iterator nodes_end  (Function *F) { return F->end(); }
-  static size_t         size       (Function *F) { return F->size(); }
+  typedef pointer_iterator<Function::iterator> nodes_iterator;
+
+  static nodes_iterator nodes_begin(Function *F) {
+    return nodes_iterator(F->begin());
+  }
+
+  static nodes_iterator nodes_end(Function *F) {
+    return nodes_iterator(F->end());
+  }
+
+  static size_t size(Function *F) { return F->size(); }
 };
 template <> struct GraphTraits<const Function*> :
   public GraphTraits<const BasicBlock*> {
-  static NodeType *getEntryNode(const Function *F) {return &F->getEntryBlock();}
+  static NodeRef getEntryNode(const Function *F) { return &F->getEntryBlock(); }
 
   // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
-  typedef Function::const_iterator nodes_iterator;
-  static nodes_iterator nodes_begin(const Function *F) { return F->begin(); }
-  static nodes_iterator nodes_end  (const Function *F) { return F->end(); }
-  static size_t         size       (const Function *F) { return F->size(); }
-};
+  typedef pointer_iterator<Function::const_iterator> nodes_iterator;
+
+  static nodes_iterator nodes_begin(const Function *F) {
+    return nodes_iterator(F->begin());
+  }
 
+  static nodes_iterator nodes_end(const Function *F) {
+    return nodes_iterator(F->end());
+  }
+
+  static size_t size(const Function *F) { return F->size(); }
+};
 
 // Provide specializations of GraphTraits to be able to treat a function as a
 // graph of basic blocks... and to walk it in inverse order.  Inverse order for
 // a function is considered to be when traversing the predecessor edges of a BB
 // instead of the successor edges.
 //
-template <> struct GraphTraits<Inverse<Function*> > :
-  public GraphTraits<Inverse<BasicBlock*> > {
-  static NodeType *getEntryNode(Inverse<Function*> G) {
+template <> struct GraphTraits<Inverse<Function*>> :
+  public GraphTraits<Inverse<BasicBlock*>> {
+  static NodeRef getEntryNode(Inverse<Function *> G) {
     return &G.Graph->getEntryBlock();
   }
 };
-template <> struct GraphTraits<Inverse<const Function*> > :
-  public GraphTraits<Inverse<const BasicBlock*> > {
-  static NodeType *getEntryNode(Inverse<const Function *> G) {
+template <> struct GraphTraits<Inverse<const Function*>> :
+  public GraphTraits<Inverse<const BasicBlock*>> {
+  static NodeRef getEntryNode(Inverse<const Function *> G) {
     return &G.Graph->getEntryBlock();
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_CFG_H
diff --git a/contrib/llvm/include/llvm/IR/CallSite.h b/contrib/llvm/include/llvm/IR/CallSite.h
index 9c977aef941a..b02c89474146 100644
--- a/contrib/llvm/include/llvm/IR/CallSite.h
+++ b/contrib/llvm/include/llvm/IR/CallSite.h
@@ -26,17 +26,26 @@
 #ifndef LLVM_IR_CALLSITE_H
 #define LLVM_IR_CALLSITE_H
 
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
 
 namespace llvm {
 
-class CallInst;
-class InvokeInst;
-
 template <typename FunTy = const Function,
           typename BBTy = const BasicBlock,
           typename ValTy = const Value,
@@ -109,6 +118,17 @@ public:
     *getCallee() = V;
   }
 
+  /// Return the intrinsic ID of the intrinsic called by this CallSite,
+  /// or Intrinsic::not_intrinsic if the called function is not an
+  /// intrinsic, or if this CallSite is an indirect call.
+  Intrinsic::ID getIntrinsicID() const {
+    if (auto *F = getCalledFunction())
+      return F->getIntrinsicID();
+    // Don't use Intrinsic::not_intrinsic, as it will require pulling
+    // Intrinsics.h into every header that uses CallSite.
+    return static_cast<Intrinsic::ID>(0);
+  }
+
   /// isCallee - Determine whether the passed iterator points to the
   /// callee operand's Use.
   bool isCallee(Value::const_user_iterator UI) const {
@@ -302,10 +322,10 @@ public:
 
   /// getAttributes/setAttributes - get or set the parameter attributes of
   /// the call.
-  const AttributeSet &getAttributes() const {
+  AttributeSet getAttributes() const {
     CALLSITE_DELEGATE_GETTER(getAttributes());
   }
-  void setAttributes(const AttributeSet &PAL) {
+  void setAttributes(AttributeSet PAL) {
     CALLSITE_DELEGATE_SETTER(setAttributes(PAL));
   }
 
@@ -313,10 +333,6 @@ public:
     CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind));
   }
 
-  void addAttribute(unsigned i, StringRef Kind, StringRef Value) {
-    CALLSITE_DELEGATE_SETTER(addAttribute(i, Kind, Value));
-  }
-
   void addAttribute(unsigned i, Attribute Attr) {
     CALLSITE_DELEGATE_SETTER(addAttribute(i, Attr));
   }
@@ -329,10 +345,6 @@ public:
     CALLSITE_DELEGATE_SETTER(removeAttribute(i, Kind));
   }
 
-  void removeAttribute(unsigned i, Attribute Attr) {
-    CALLSITE_DELEGATE_SETTER(removeAttribute(i, Attr));
-  }
-
   /// \brief Return true if this function has the given attribute.
   bool hasFnAttr(Attribute::AttrKind Kind) const {
     CALLSITE_DELEGATE_GETTER(hasFnAttr(Kind));
@@ -509,6 +521,10 @@ public:
     CALLSITE_DELEGATE_GETTER(countOperandBundlesOfType(ID));
   }
 
+  bool isBundleOperand(unsigned Idx) const {
+    CALLSITE_DELEGATE_GETTER(isBundleOperand(Idx));
+  }
+
   IterTy arg_begin() const {
     CALLSITE_DELEGATE_GETTER(arg_begin());
   }
@@ -602,7 +618,7 @@ class CallSite : public CallSiteBase<Function, BasicBlock, Value, User, Use,
                                      Instruction, CallInst, InvokeInst,
                                      User::op_iterator> {
 public:
-  CallSite() {}
+  CallSite() = default;
   CallSite(CallSiteBase B) : CallSiteBase(B) {}
   CallSite(CallInst *CI) : CallSiteBase(CI) {}
   CallSite(InvokeInst *II) : CallSiteBase(II) {}
@@ -616,13 +632,39 @@ public:
   }
 
 private:
+  friend struct DenseMapInfo<CallSite>;
+
   User::op_iterator getCallee() const;
 };
 
+template <> struct DenseMapInfo<CallSite> {
+  using BaseInfo = DenseMapInfo<decltype(CallSite::I)>;
+
+  static CallSite getEmptyKey() {
+    CallSite CS;
+    CS.I = BaseInfo::getEmptyKey();
+    return CS;
+  }
+
+  static CallSite getTombstoneKey() {
+    CallSite CS;
+    CS.I = BaseInfo::getTombstoneKey();
+    return CS;
+  }
+
+  static unsigned getHashValue(const CallSite &CS) {
+    return BaseInfo::getHashValue(CS.I);
+  }
+
+  static bool isEqual(const CallSite &LHS, const CallSite &RHS) {
+    return LHS == RHS;
+  }
+};
+
 /// ImmutableCallSite - establish a view to a call site for examination
 class ImmutableCallSite : public CallSiteBase<> {
 public:
-  ImmutableCallSite() {}
+  ImmutableCallSite() = default;
   ImmutableCallSite(const CallInst *CI) : CallSiteBase(CI) {}
   ImmutableCallSite(const InvokeInst *II) : CallSiteBase(II) {}
   explicit ImmutableCallSite(const Instruction *II) : CallSiteBase(II) {}
@@ -630,6 +672,6 @@ public:
   ImmutableCallSite(CallSite CS) : CallSiteBase(CS.getInstruction()) {}
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_CALLSITE_H
diff --git a/contrib/llvm/include/llvm/IR/CallingConv.h b/contrib/llvm/include/llvm/IR/CallingConv.h
index 4987b7e943f2..9cfbda1f6857 100644
--- a/contrib/llvm/include/llvm/IR/CallingConv.h
+++ b/contrib/llvm/include/llvm/IR/CallingConv.h
@@ -193,6 +193,9 @@ namespace CallingConv {
     /// Calling convention for AMDGPU code object kernels.
     AMDGPU_KERNEL = 91,
 
+    /// Register calling convention used for parameters transfer optimization
+    X86_RegCall = 92,
+
     /// The highest possible calling convention ID. Must be some 2^k - 1.
     MaxID = 1023
   };
diff --git a/contrib/llvm/include/llvm/IR/Comdat.h b/contrib/llvm/include/llvm/IR/Comdat.h
index 577247f27e20..f4a391c31ae2 100644
--- a/contrib/llvm/include/llvm/IR/Comdat.h
+++ b/contrib/llvm/include/llvm/IR/Comdat.h
@@ -36,7 +36,9 @@ public:
     SameSize,     ///< The data referenced by the COMDAT must be the same size.
   };
 
+  Comdat(const Comdat &) = delete;
   Comdat(Comdat &&C);
+
   SelectionKind getSelectionKind() const { return SK; }
   void setSelectionKind(SelectionKind Val) { SK = Val; }
   StringRef getName() const;
@@ -45,8 +47,8 @@ public:
 
 private:
   friend class Module;
+
   Comdat();
-  Comdat(const Comdat &) = delete;
 
   // Points to the map in Module.
   StringMapEntry<Comdat> *Name;
@@ -58,6 +60,6 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Comdat &C) {
   return OS;
 }
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_COMDAT_H
diff --git a/contrib/llvm/include/llvm/IR/Constant.h b/contrib/llvm/include/llvm/IR/Constant.h
index 3c5fe556d50f..99c970ebb633 100644
--- a/contrib/llvm/include/llvm/IR/Constant.h
+++ b/contrib/llvm/include/llvm/IR/Constant.h
@@ -15,11 +15,12 @@
 #define LLVM_IR_CONSTANT_H
 
 #include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 
 namespace llvm {
-  class APInt;
 
-  template<typename T> class SmallVectorImpl;
+class APInt;
 
 /// This is an important base class in LLVM. It provides the common facilities
 /// of all constant values in an LLVM program. A constant is a value that is
@@ -39,8 +40,6 @@ namespace llvm {
 /// don't have to worry about the lifetime of the objects.
 /// @brief LLVM Constant Representation
 class Constant : public User {
-  void operator=(const Constant &) = delete;
-  Constant(const Constant &) = delete;
   void anchor() override;
 
 protected:
@@ -48,6 +47,9 @@ protected:
     : User(ty, vty, Ops, NumOps) {}
 
 public:
+  void operator=(const Constant &) = delete;
+  Constant(const Constant &) = delete;
+
   /// Return true if this is the value that would be returned by getNullValue.
   bool isNullValue() const;
 
@@ -159,6 +161,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_CONSTANT_H
diff --git a/contrib/llvm/include/llvm/IR/ConstantFolder.h b/contrib/llvm/include/llvm/IR/ConstantFolder.h
index fb6ca3b3184c..da5bba7ba141 100644
--- a/contrib/llvm/include/llvm/IR/ConstantFolder.h
+++ b/contrib/llvm/include/llvm/IR/ConstantFolder.h
@@ -17,15 +17,17 @@
 #ifndef LLVM_IR_CONSTANTFOLDER_H
 #define LLVM_IR_CONSTANTFOLDER_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 
 namespace llvm {
 
 /// ConstantFolder - Create constants with minimum, target independent, folding.
 class ConstantFolder {
 public:
-  explicit ConstantFolder() {}
+  explicit ConstantFolder() = default;
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
@@ -35,61 +37,78 @@ public:
                       bool HasNUW = false, bool HasNSW = false) const {
     return ConstantExpr::getAdd(LHS, RHS, HasNUW, HasNSW);
   }
+
   Constant *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFAdd(LHS, RHS);
   }
+
   Constant *CreateSub(Constant *LHS, Constant *RHS,
                       bool HasNUW = false, bool HasNSW = false) const {
     return ConstantExpr::getSub(LHS, RHS, HasNUW, HasNSW);
   }
+
   Constant *CreateFSub(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFSub(LHS, RHS);
   }
+
   Constant *CreateMul(Constant *LHS, Constant *RHS,
                       bool HasNUW = false, bool HasNSW = false) const {
     return ConstantExpr::getMul(LHS, RHS, HasNUW, HasNSW);
   }
+
   Constant *CreateFMul(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFMul(LHS, RHS);
   }
+
   Constant *CreateUDiv(Constant *LHS, Constant *RHS,
                        bool isExact = false) const {
     return ConstantExpr::getUDiv(LHS, RHS, isExact);
   }
+
   Constant *CreateSDiv(Constant *LHS, Constant *RHS,
                        bool isExact = false) const {
     return ConstantExpr::getSDiv(LHS, RHS, isExact);
   }
+
   Constant *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFDiv(LHS, RHS);
   }
+
   Constant *CreateURem(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getURem(LHS, RHS);
   }
+
   Constant *CreateSRem(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getSRem(LHS, RHS);
   }
+
   Constant *CreateFRem(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getFRem(LHS, RHS);
   }
+
   Constant *CreateShl(Constant *LHS, Constant *RHS,
                       bool HasNUW = false, bool HasNSW = false) const {
     return ConstantExpr::getShl(LHS, RHS, HasNUW, HasNSW);
   }
+
   Constant *CreateLShr(Constant *LHS, Constant *RHS,
                        bool isExact = false) const {
     return ConstantExpr::getLShr(LHS, RHS, isExact);
   }
+
   Constant *CreateAShr(Constant *LHS, Constant *RHS,
                        bool isExact = false) const {
     return ConstantExpr::getAShr(LHS, RHS, isExact);
   }
+
   Constant *CreateAnd(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getAnd(LHS, RHS);
   }
+
   Constant *CreateOr(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getOr(LHS, RHS);
   }
+
   Constant *CreateXor(Constant *LHS, Constant *RHS) const {
     return ConstantExpr::getXor(LHS, RHS);
   }
@@ -107,9 +126,11 @@ public:
                       bool HasNUW = false, bool HasNSW = false) const {
     return ConstantExpr::getNeg(C, HasNUW, HasNSW);
   }
+
   Constant *CreateFNeg(Constant *C) const {
     return ConstantExpr::getFNeg(C);
   }
+
   Constant *CreateNot(Constant *C) const {
     return ConstantExpr::getNot(C);
   }
@@ -122,12 +143,14 @@ public:
                                 ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getGetElementPtr(Ty, C, IdxList);
   }
+
   Constant *CreateGetElementPtr(Type *Ty, Constant *C, Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
     return ConstantExpr::getGetElementPtr(Ty, C, Idx);
   }
+
   Constant *CreateGetElementPtr(Type *Ty, Constant *C,
                                 ArrayRef<Value *> IdxList) const {
     return ConstantExpr::getGetElementPtr(Ty, C, IdxList);
@@ -137,6 +160,7 @@ public:
                                         ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList);
   }
+
   Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
@@ -144,6 +168,7 @@ public:
     // ArrayRef<Value *>.
     return ConstantExpr::getInBoundsGetElementPtr(Ty, C, Idx);
   }
+
   Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         ArrayRef<Value *> IdxList) const {
     return ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList);
@@ -157,6 +182,7 @@ public:
                        Type *DestTy) const {
     return ConstantExpr::getCast(Op, C, DestTy);
   }
+
   Constant *CreatePointerCast(Constant *C, Type *DestTy) const {
     return ConstantExpr::getPointerCast(C, DestTy);
   }
@@ -170,6 +196,7 @@ public:
                           bool isSigned) const {
     return ConstantExpr::getIntegerCast(C, DestTy, isSigned);
   }
+
   Constant *CreateFPCast(Constant *C, Type *DestTy) const {
     return ConstantExpr::getFPCast(C, DestTy);
   }
@@ -177,15 +204,19 @@ public:
   Constant *CreateBitCast(Constant *C, Type *DestTy) const {
     return CreateCast(Instruction::BitCast, C, DestTy);
   }
+
   Constant *CreateIntToPtr(Constant *C, Type *DestTy) const {
     return CreateCast(Instruction::IntToPtr, C, DestTy);
   }
+
   Constant *CreatePtrToInt(Constant *C, Type *DestTy) const {
     return CreateCast(Instruction::PtrToInt, C, DestTy);
   }
+
   Constant *CreateZExtOrBitCast(Constant *C, Type *DestTy) const {
     return ConstantExpr::getZExtOrBitCast(C, DestTy);
   }
+
   Constant *CreateSExtOrBitCast(Constant *C, Type *DestTy) const {
     return ConstantExpr::getSExtOrBitCast(C, DestTy);
   }
@@ -202,6 +233,7 @@ public:
                        Constant *RHS) const {
     return ConstantExpr::getCompare(P, LHS, RHS);
   }
+
   Constant *CreateFCmp(CmpInst::Predicate P, Constant *LHS,
                        Constant *RHS) const {
     return ConstantExpr::getCompare(P, LHS, RHS);
@@ -240,6 +272,6 @@ public:
   }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_CONSTANTFOLDER_H
diff --git a/contrib/llvm/include/llvm/IR/ConstantRange.h b/contrib/llvm/include/llvm/IR/ConstantRange.h
index 9458fa9f5c86..27a9b1364448 100644
--- a/contrib/llvm/include/llvm/IR/ConstantRange.h
+++ b/contrib/llvm/include/llvm/IR/ConstantRange.h
@@ -38,6 +38,8 @@
 
 namespace llvm {
 
+class MDNode;
+
 /// This class represents a range of values.
 ///
 class ConstantRange {
@@ -166,6 +168,14 @@ public:
     return nullptr;
   }
 
+  /// If this set contains all but a single element, return it, otherwise return
+  /// null.
+  const APInt *getSingleMissingElement() const {
+    if (Lower == Upper + 1)
+      return &Upper;
+    return nullptr;
+  }
+
   /// Return true if this set contains exactly one member.
   ///
   bool isSingleElement() const { return getSingleElement() != nullptr; }
@@ -223,6 +233,15 @@ public:
   ///
   ConstantRange unionWith(const ConstantRange &CR) const;
 
+  /// Return a new range representing the possible values resulting
+  /// from an application of the specified cast operator to this range. \p
+  /// BitWidth is the target bitwidth of the cast.  For casts which don't
+  /// change bitwidth, it must be the same as the source bitwidth.  For casts
+  /// which do change bitwidth, the bitwidth must be consistent with the
+  /// requested cast and source bitwidth.
+  ConstantRange castOp(Instruction::CastOps CastOp,
+                       uint32_t BitWidth) const;
+
   /// Return a new range in the specified integer type, which must
   /// be strictly larger than the current type.  The returned range will
   /// correspond to the possible range of values if the source range had been
@@ -250,9 +269,19 @@ public:
   ConstantRange sextOrTrunc(uint32_t BitWidth) const;
 
   /// Return a new range representing the possible values resulting
+  /// from an application of the specified binary operator to an left hand side
+  /// of this range and a right hand side of \p Other.
+  ConstantRange binaryOp(Instruction::BinaryOps BinOp,
+                         const ConstantRange &Other) const;
+
+  /// Return a new range representing the possible values resulting
   /// from an addition of a value in this range and a value in \p Other.
   ConstantRange add(const ConstantRange &Other) const;
 
+  /// Return a new range representing the possible values resulting from a
+  /// known NSW addition of a value in this range and \p Other constant.
+  ConstantRange addWithNoSignedWrap(const APInt &Other) const;
+
   /// Return a new range representing the possible values resulting
   /// from a subtraction of a value in this range and a value in \p Other.
   ConstantRange sub(const ConstantRange &Other) const;
@@ -318,6 +347,11 @@ inline raw_ostream &operator<<(raw_ostream &OS, const ConstantRange &CR) {
   return OS;
 }
 
+/// Parse out a conservative ConstantRange from !range metadata.
+///
+/// E.g. if RangeMD is !{i32 0, i32 10, i32 15, i32 20} then return [0, 20).
+ConstantRange getConstantRangeFromMetadata(const MDNode &RangeMD);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/Constants.h b/contrib/llvm/include/llvm/IR/Constants.h
index 2a5d14d94646..cbefa3f05dfc 100644
--- a/contrib/llvm/include/llvm/IR/Constants.h
+++ b/contrib/llvm/include/llvm/IR/Constants.h
@@ -24,20 +24,29 @@
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 
 namespace llvm {
 
 class ArrayType;
 class IntegerType;
-class StructType;
 class PointerType;
-class VectorType;
 class SequentialType;
-
-struct ConstantExprKeyType;
+class StructType;
+class VectorType;
 template <class ConstantClass> struct ConstantAggrKeyType;
 
 /// Base class for constants with no operands.
@@ -46,21 +55,25 @@ template <class ConstantClass> struct ConstantAggrKeyType;
 /// Since they can be in use by unrelated modules (and are never based on
 /// GlobalValues), it never makes sense to RAUW them.
 class ConstantData : public Constant {
+  friend class Constant;
+
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
-  ConstantData() = delete;
-  ConstantData(const ConstantData &) = delete;
 
-  friend class Constant;
   Value *handleOperandChangeImpl(Value *From, Value *To) {
     llvm_unreachable("Constant data does not have operands!");
   }
 
 protected:
   explicit ConstantData(Type *Ty, ValueTy VT) : Constant(Ty, VT, nullptr, 0) {}
+
   void *operator new(size_t s) { return User::operator new(s, 0); }
 
 public:
+  ConstantData() = delete;
+  ConstantData(const ConstantData &) = delete;
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Methods to support type inquiry through isa, cast, and dyn_cast.
   static bool classof(const Value *V) {
     return V->getValueID() >= ConstantDataFirstVal &&
@@ -73,15 +86,18 @@ public:
 /// represents both boolean and integral constants.
 /// @brief Class for constant integers.
 class ConstantInt final : public ConstantData {
-  void anchor() override;
-  ConstantInt(const ConstantInt &) = delete;
-  ConstantInt(IntegerType *Ty, const APInt& V);
+  friend class Constant;
+
   APInt Val;
 
-  friend class Constant;
+  ConstantInt(IntegerType *Ty, const APInt& V);
+
+  void anchor() override;
   void destroyConstantImpl();
 
 public:
+  ConstantInt(const ConstantInt &) = delete;
+
   static ConstantInt *getTrue(LLVMContext &Context);
   static ConstantInt *getFalse(LLVMContext &Context);
   static Constant *getTrue(Type *Ty);
@@ -247,21 +263,22 @@ public:
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 /// ConstantFP - Floating Point Values [float, double]
 ///
 class ConstantFP final : public ConstantData {
-  APFloat Val;
-  void anchor() override;
-  ConstantFP(const ConstantFP &) = delete;
-
   friend class Constant;
-  void destroyConstantImpl();
+
+  APFloat Val;
 
   ConstantFP(Type *Ty, const APFloat& V);
 
+  void anchor() override;
+  void destroyConstantImpl();
+
 public:
+  ConstantFP(const ConstantFP &) = delete;
+
   /// Floating point negation must be implemented with f(x) = -0.0 - x. This
   /// method returns the negative zero constant for floating point or vector
   /// floating point types; for all other types, it returns the null value.
@@ -308,6 +325,7 @@ public:
     FV.convert(Val.getSemantics(), APFloat::rmNearestTiesToEven, &ignored);
     return isExactlyValue(FV);
   }
+
   /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static bool classof(const Value *V) {
     return V->getValueID() == ConstantFPVal;
@@ -318,15 +336,16 @@ public:
 /// All zero aggregate value
 ///
 class ConstantAggregateZero final : public ConstantData {
-  ConstantAggregateZero(const ConstantAggregateZero &) = delete;
-
   friend class Constant;
-  void destroyConstantImpl();
 
   explicit ConstantAggregateZero(Type *Ty)
       : ConstantData(Ty, ConstantAggregateZeroVal) {}
 
+  void destroyConstantImpl();
+
 public:
+  ConstantAggregateZero(const ConstantAggregateZero &) = delete;
+
   static ConstantAggregateZero *get(Type *Ty);
 
   /// If this CAZ has array or vector type, return a zero with the right element
@@ -392,11 +411,12 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantAggregate, Constant)
 class ConstantArray final : public ConstantAggregate {
   friend struct ConstantAggrKeyType<ConstantArray>;
   friend class Constant;
-  void destroyConstantImpl();
-  Value *handleOperandChangeImpl(Value *From, Value *To);
 
   ConstantArray(ArrayType *T, ArrayRef<Constant *> Val);
 
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To);
+
 public:
   // ConstantArray accessors
   static Constant *get(ArrayType *T, ArrayRef<Constant*> V);
@@ -423,11 +443,12 @@ public:
 class ConstantStruct final : public ConstantAggregate {
   friend struct ConstantAggrKeyType<ConstantStruct>;
   friend class Constant;
-  void destroyConstantImpl();
-  Value *handleOperandChangeImpl(Value *From, Value *To);
 
   ConstantStruct(StructType *T, ArrayRef<Constant *> Val);
 
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To);
+
 public:
   // ConstantStruct accessors
   static Constant *get(StructType *T, ArrayRef<Constant*> V);
@@ -463,18 +484,18 @@ public:
   }
 };
 
-
 //===----------------------------------------------------------------------===//
 /// Constant Vector Declarations
 ///
 class ConstantVector final : public ConstantAggregate {
   friend struct ConstantAggrKeyType<ConstantVector>;
   friend class Constant;
-  void destroyConstantImpl();
-  Value *handleOperandChangeImpl(Value *From, Value *To);
 
   ConstantVector(VectorType *T, ArrayRef<Constant *> Val);
 
+  void destroyConstantImpl();
+  Value *handleOperandChangeImpl(Value *From, Value *To);
+
 public:
   // ConstantVector accessors
   static Constant *get(ArrayRef<Constant*> V);
@@ -506,15 +527,16 @@ public:
 /// A constant pointer value that points to null
 ///
 class ConstantPointerNull final : public ConstantData {
-  ConstantPointerNull(const ConstantPointerNull &) = delete;
-
   friend class Constant;
-  void destroyConstantImpl();
 
   explicit ConstantPointerNull(PointerType *T)
       : ConstantData(T, Value::ConstantPointerNullVal) {}
 
+  void destroyConstantImpl();
+
 public:
+  ConstantPointerNull(const ConstantPointerNull &) = delete;
+
   /// Static factory methods - Return objects of the specified value
   static ConstantPointerNull *get(PointerType *T);
 
@@ -541,6 +563,8 @@ public:
 ///
 class ConstantDataSequential : public ConstantData {
   friend class LLVMContextImpl;
+  friend class Constant;
+
   /// A pointer to the bytes underlying this constant (which is owned by the
   /// uniquing StringMap).
   const char *DataElements;
@@ -550,9 +574,7 @@ class ConstantDataSequential : public ConstantData {
   /// element array of i8, or a 1-element array of i32.  They'll both end up in
   /// the same StringMap bucket, linked up.
   ConstantDataSequential *Next;
-  ConstantDataSequential(const ConstantDataSequential &) = delete;
 
-  friend class Constant;
   void destroyConstantImpl();
 
 protected:
@@ -563,6 +585,8 @@ protected:
   static Constant *getImpl(StringRef Bytes, Type *Ty);
 
 public:
+  ConstantDataSequential(const ConstantDataSequential &) = delete;
+
   /// Return true if a ConstantDataSequential can be formed with a vector or
   /// array of the specified element type.
   /// ConstantDataArray only works with normal float and int types that are
@@ -638,6 +662,7 @@ public:
     return V->getValueID() == ConstantDataArrayVal ||
            V->getValueID() == ConstantDataVectorVal;
   }
+
 private:
   const char *getElementPointer(unsigned Elt) const;
 };
@@ -649,18 +674,23 @@ private:
 /// stores all of the elements of the constant as densely packed data, instead
 /// of as Value*'s.
 class ConstantDataArray final : public ConstantDataSequential {
-  void *operator new(size_t, unsigned) = delete;
-  ConstantDataArray(const ConstantDataArray &) = delete;
-  void anchor() override;
   friend class ConstantDataSequential;
+
   explicit ConstantDataArray(Type *ty, const char *Data)
       : ConstantDataSequential(ty, ConstantDataArrayVal, Data) {}
+
   /// Allocate space for exactly zero operands.
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
 
+  void anchor() override;
+
 public:
+  ConstantDataArray(const ConstantDataArray &) = delete;
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// get() constructors - Return a constant with array type with an element
   /// count and element type matching the ArrayRef passed in.  Note that this
   /// can return a ConstantAggregateZero object.
@@ -707,18 +737,23 @@ public:
 /// stores all of the elements of the constant as densely packed data, instead
 /// of as Value*'s.
 class ConstantDataVector final : public ConstantDataSequential {
-  void *operator new(size_t, unsigned) = delete;
-  ConstantDataVector(const ConstantDataVector &) = delete;
-  void anchor() override;
   friend class ConstantDataSequential;
+
   explicit ConstantDataVector(Type *ty, const char *Data)
       : ConstantDataSequential(ty, ConstantDataVectorVal, Data) {}
+
   // allocate space for exactly zero operands.
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
 
+  void anchor() override;
+
 public:
+  ConstantDataVector(const ConstantDataVector &) = delete;
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// get() constructors - Return a constant with vector type with an element
   /// count and element type matching the ArrayRef passed in.  Note that this
   /// can return a ConstantAggregateZero object.
@@ -763,15 +798,16 @@ public:
 /// A constant token which is empty
 ///
 class ConstantTokenNone final : public ConstantData {
-  ConstantTokenNone(const ConstantTokenNone &) = delete;
-
   friend class Constant;
-  void destroyConstantImpl();
 
   explicit ConstantTokenNone(LLVMContext &Context)
       : ConstantData(Type::getTokenTy(Context), ConstantTokenNoneVal) {}
 
+  void destroyConstantImpl();
+
 public:
+  ConstantTokenNone(const ConstantTokenNone &) = delete;
+
   /// Return the ConstantTokenNone.
   static ConstantTokenNone *get(LLVMContext &Context);
 
@@ -784,15 +820,18 @@ public:
 /// The address of a basic block.
 ///
 class BlockAddress final : public Constant {
-  void *operator new(size_t, unsigned) = delete;
-  void *operator new(size_t s) { return User::operator new(s, 2); }
+  friend class Constant;
+
   BlockAddress(Function *F, BasicBlock *BB);
 
-  friend class Constant;
+  void *operator new(size_t s) { return User::operator new(s, 2); }
+
   void destroyConstantImpl();
   Value *handleOperandChangeImpl(Value *From, Value *To);
 
 public:
+  void *operator new(size_t, unsigned) = delete;
+
   /// Return a BlockAddress for the specified function and basic block.
   static BlockAddress *get(Function *F, BasicBlock *BB);
 
@@ -824,7 +863,6 @@ struct OperandTraits<BlockAddress> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
 
-
 //===----------------------------------------------------------------------===//
 /// A constant value that is initialized with an expression using
 /// other constant values.
@@ -834,8 +872,8 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BlockAddress, Value)
 /// maintained in the Value::SubclassData field.
 class ConstantExpr : public Constant {
   friend struct ConstantExprKeyType;
-
   friend class Constant;
+
   void destroyConstantImpl();
   Value *handleOperandChangeImpl(Value *From, Value *To);
 
@@ -919,39 +957,51 @@ public:
 
   static Constant *getNSWNeg(Constant *C) { return getNeg(C, false, true); }
   static Constant *getNUWNeg(Constant *C) { return getNeg(C, true, false); }
+
   static Constant *getNSWAdd(Constant *C1, Constant *C2) {
     return getAdd(C1, C2, false, true);
   }
+
   static Constant *getNUWAdd(Constant *C1, Constant *C2) {
     return getAdd(C1, C2, true, false);
   }
+
   static Constant *getNSWSub(Constant *C1, Constant *C2) {
     return getSub(C1, C2, false, true);
   }
+
   static Constant *getNUWSub(Constant *C1, Constant *C2) {
     return getSub(C1, C2, true, false);
   }
+
   static Constant *getNSWMul(Constant *C1, Constant *C2) {
     return getMul(C1, C2, false, true);
   }
+
   static Constant *getNUWMul(Constant *C1, Constant *C2) {
     return getMul(C1, C2, true, false);
   }
+
   static Constant *getNSWShl(Constant *C1, Constant *C2) {
     return getShl(C1, C2, false, true);
   }
+
   static Constant *getNUWShl(Constant *C1, Constant *C2) {
     return getShl(C1, C2, true, false);
   }
+
   static Constant *getExactSDiv(Constant *C1, Constant *C2) {
     return getSDiv(C1, C2, true);
   }
+
   static Constant *getExactUDiv(Constant *C1, Constant *C2) {
     return getUDiv(C1, C2, true);
   }
+
   static Constant *getExactAShr(Constant *C1, Constant *C2) {
     return getAShr(C1, C2, true);
   }
+
   static Constant *getExactLShr(Constant *C1, Constant *C2) {
     return getLShr(C1, C2, true);
   }
@@ -1071,26 +1121,31 @@ public:
   /// Getelementptr form.  Value* is only accepted for convenience;
   /// all elements must be Constants.
   ///
+  /// \param InRangeIndex the inrange index if present or None.
   /// \param OnlyIfReducedTy see \a getWithOperands() docs.
   static Constant *getGetElementPtr(Type *Ty, Constant *C,
                                     ArrayRef<Constant *> IdxList,
                                     bool InBounds = false,
+                                    Optional<unsigned> InRangeIndex = None,
                                     Type *OnlyIfReducedTy = nullptr) {
     return getGetElementPtr(
         Ty, C, makeArrayRef((Value * const *)IdxList.data(), IdxList.size()),
-        InBounds, OnlyIfReducedTy);
+        InBounds, InRangeIndex, OnlyIfReducedTy);
   }
   static Constant *getGetElementPtr(Type *Ty, Constant *C, Constant *Idx,
                                     bool InBounds = false,
+                                    Optional<unsigned> InRangeIndex = None,
                                     Type *OnlyIfReducedTy = nullptr) {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
-    return getGetElementPtr(Ty, C, cast<Value>(Idx), InBounds, OnlyIfReducedTy);
+    return getGetElementPtr(Ty, C, cast<Value>(Idx), InBounds, InRangeIndex,
+                            OnlyIfReducedTy);
   }
   static Constant *getGetElementPtr(Type *Ty, Constant *C,
                                     ArrayRef<Value *> IdxList,
                                     bool InBounds = false,
+                                    Optional<unsigned> InRangeIndex = None,
                                     Type *OnlyIfReducedTy = nullptr);
 
   /// Create an "inbounds" getelementptr. See the documentation for the
@@ -1201,14 +1256,15 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantExpr, Constant)
 /// LangRef.html#undefvalues for details.
 ///
 class UndefValue final : public ConstantData {
-  UndefValue(const UndefValue &) = delete;
-
   friend class Constant;
-  void destroyConstantImpl();
 
   explicit UndefValue(Type *T) : ConstantData(T, UndefValueVal) {}
 
+  void destroyConstantImpl();
+
 public:
+  UndefValue(const UndefValue &) = delete;
+
   /// Static factory methods - Return an 'undef' object of the specified type.
   static UndefValue *get(Type *T);
 
@@ -1236,6 +1292,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_CONSTANTS_H
diff --git a/contrib/llvm/include/llvm/IR/DIBuilder.h b/contrib/llvm/include/llvm/IR/DIBuilder.h
index 0f2f67f5feaf..932ae51b39dc 100644
--- a/contrib/llvm/include/llvm/IR/DIBuilder.h
+++ b/contrib/llvm/include/llvm/IR/DIBuilder.h
@@ -15,21 +15,27 @@
 #ifndef LLVM_IR_DIBUILDER_H
 #define LLVM_IR_DIBUILDER_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/TrackingMDRef.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cstdint>
 
 namespace llvm {
+
   class BasicBlock;
-  class Instruction;
+  class Constant;
   class Function;
+  class Instruction;
+  class LLVMContext;
   class Module;
   class Value;
-  class Constant;
-  class LLVMContext;
-  class StringRef;
-  template <typename T> class ArrayRef;
 
   class DIBuilder {
     Module &M;
@@ -57,9 +63,6 @@ namespace llvm {
     /// copy.
     DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> PreservedVariables;
 
-    DIBuilder(const DIBuilder &) = delete;
-    void operator=(const DIBuilder &) = delete;
-
     /// Create a temporary.
     ///
     /// Create an \a temporary node and track it in \a UnresolvedNodes.
@@ -71,6 +74,8 @@ namespace llvm {
     /// If \c AllowUnresolved, collect unresolved nodes attached to the module
     /// in order to resolve cycles during \a finalize().
     explicit DIBuilder(Module &M, bool AllowUnresolved = true);
+    DIBuilder(const DIBuilder &) = delete;
+    DIBuilder &operator=(const DIBuilder &) = delete;
 
     /// Construct any deferred debug info descriptors.
     void finalize();
@@ -78,8 +83,7 @@ namespace llvm {
     /// A CompileUnit provides an anchor for all debugging
     /// information generated during this instance of compilation.
     /// \param Lang          Source programming language, eg. dwarf::DW_LANG_C99
-    /// \param File          File name
-    /// \param Dir           Directory
+    /// \param File          File info.
     /// \param Producer      Identify the producer of debugging information
     ///                      and code.  Usually this is a compiler
     ///                      version string.
@@ -96,16 +100,21 @@ namespace llvm {
     /// \param Kind          The kind of debug information to generate.
     /// \param DWOId         The DWOId if this is a split skeleton compile unit.
     DICompileUnit *
-    createCompileUnit(unsigned Lang, StringRef File, StringRef Dir,
-                      StringRef Producer, bool isOptimized, StringRef Flags,
-                      unsigned RV, StringRef SplitName = StringRef(),
+    createCompileUnit(unsigned Lang, DIFile *File, StringRef Producer,
+                      bool isOptimized, StringRef Flags, unsigned RV,
+                      StringRef SplitName = StringRef(),
                       DICompileUnit::DebugEmissionKind Kind =
                           DICompileUnit::DebugEmissionKind::FullDebug,
-                      uint64_t DWOId = 0);
+                      uint64_t DWOId = 0, bool SplitDebugInlining = true);
 
-    /// Create a file descriptor to hold debugging information
-    /// for a file.
-    DIFile *createFile(StringRef Filename, StringRef Directory);
+    /// Create a file descriptor to hold debugging information for a file.
+    /// \param Filename  File name.
+    /// \param Directory Directory.
+    /// \param CSKind    Checksum kind (e.g. CSK_None, CSK_MD5, CSK_SHA1, etc.).
+    /// \param Checksum  Checksum data.
+    DIFile *createFile(StringRef Filename, StringRef Directory,
+                       DIFile::ChecksumKind CSKind = DIFile::CSK_None,
+                       StringRef Checksum = StringRef());
 
     /// Create a single enumerator value.
     DIEnumerator *createEnumerator(StringRef Name, int64_t Val);
@@ -120,10 +129,9 @@ namespace llvm {
     /// type.
     /// \param Name        Type name.
     /// \param SizeInBits  Size of the type.
-    /// \param AlignInBits Type alignment.
     /// \param Encoding    DWARF encoding code, e.g. dwarf::DW_ATE_float.
     DIBasicType *createBasicType(StringRef Name, uint64_t SizeInBits,
-                                 uint64_t AlignInBits, unsigned Encoding);
+                                 unsigned Encoding);
 
     /// Create debugging information entry for a qualified
     /// type, e.g. 'const int'.
@@ -137,7 +145,7 @@ namespace llvm {
     /// \param AlignInBits Alignment. (optional)
     /// \param Name        Pointer type name. (optional)
     DIDerivedType *createPointerType(DIType *PointeeTy, uint64_t SizeInBits,
-                                     uint64_t AlignInBits = 0,
+                                     uint32_t AlignInBits = 0,
                                      StringRef Name = "");
 
     /// Create debugging information entry for a pointer to member.
@@ -145,16 +153,16 @@ namespace llvm {
     /// \param SizeInBits  Size.
     /// \param AlignInBits Alignment. (optional)
     /// \param Class Type for which this pointer points to members of.
-    DIDerivedType *createMemberPointerType(DIType *PointeeTy, DIType *Class,
-                                           uint64_t SizeInBits,
-                                           uint64_t AlignInBits = 0,
-                                           unsigned Flags = 0);
+    DIDerivedType *
+    createMemberPointerType(DIType *PointeeTy, DIType *Class,
+                            uint64_t SizeInBits, uint32_t AlignInBits = 0,
+                            DINode::DIFlags Flags = DINode::FlagZero);
 
     /// Create debugging information entry for a c++
     /// style reference or rvalue reference type.
     DIDerivedType *createReferenceType(unsigned Tag, DIType *RTy,
                                        uint64_t SizeInBits = 0,
-                                       uint64_t AlignInBits = 0);
+                                       uint32_t AlignInBits = 0);
 
     /// Create debugging information entry for a typedef.
     /// \param Ty          Original type.
@@ -176,7 +184,8 @@ namespace llvm {
     /// \param Flags        Flags to describe inheritance attribute,
     ///                     e.g. private
     DIDerivedType *createInheritance(DIType *Ty, DIType *BaseTy,
-                                     uint64_t BaseOffset, unsigned Flags);
+                                     uint64_t BaseOffset,
+                                     DINode::DIFlags Flags);
 
     /// Create debugging information entry for a member.
     /// \param Scope        Member scope.
@@ -190,9 +199,10 @@ namespace llvm {
     /// \param Ty           Parent type.
     DIDerivedType *createMemberType(DIScope *Scope, StringRef Name,
                                     DIFile *File, unsigned LineNo,
-                                    uint64_t SizeInBits, uint64_t AlignInBits,
-                                    uint64_t OffsetInBits, unsigned Flags,
-                                    DIType *Ty);
+                                    uint64_t SizeInBits,
+                                    uint32_t AlignInBits,
+                                    uint64_t OffsetInBits,
+                                    DINode::DIFlags Flags, DIType *Ty);
 
     /// Create debugging information entry for a bit field member.
     /// \param Scope               Member scope.
@@ -200,15 +210,14 @@ namespace llvm {
     /// \param File                File where this member is defined.
     /// \param LineNo              Line number.
     /// \param SizeInBits          Member size.
-    /// \param AlignInBits         Member alignment.
     /// \param OffsetInBits        Member offset.
     /// \param StorageOffsetInBits Member storage offset.
     /// \param Flags               Flags to encode member attribute.
     /// \param Ty                  Parent type.
     DIDerivedType *createBitFieldMemberType(
         DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo,
-        uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-        uint64_t StorageOffsetInBits, unsigned Flags, DIType *Ty);
+        uint64_t SizeInBits, uint64_t OffsetInBits,
+        uint64_t StorageOffsetInBits, DINode::DIFlags Flags, DIType *Ty);
 
     /// Create debugging information entry for a
     /// C++ static data member.
@@ -219,10 +228,12 @@ namespace llvm {
     /// \param Ty         Type of the static member.
     /// \param Flags      Flags to encode member attribute, e.g. private.
     /// \param Val        Const initializer of the member.
+    /// \param AlignInBits  Member alignment.
     DIDerivedType *createStaticMemberType(DIScope *Scope, StringRef Name,
                                           DIFile *File, unsigned LineNo,
-                                          DIType *Ty, unsigned Flags,
-                                          llvm::Constant *Val);
+                                          DIType *Ty, DINode::DIFlags Flags,
+                                          Constant *Val,
+                                          uint32_t AlignInBits = 0);
 
     /// Create debugging information entry for Objective-C
     /// instance variable.
@@ -236,8 +247,8 @@ namespace llvm {
     /// \param Ty           Parent type.
     /// \param PropertyNode Property associated with this ivar.
     DIDerivedType *createObjCIVar(StringRef Name, DIFile *File, unsigned LineNo,
-                                  uint64_t SizeInBits, uint64_t AlignInBits,
-                                  uint64_t OffsetInBits, unsigned Flags,
+                                  uint64_t SizeInBits, uint32_t AlignInBits,
+                                  uint64_t OffsetInBits, DINode::DIFlags Flags,
                                   DIType *Ty, MDNode *PropertyNode);
 
     /// Create debugging information entry for Objective-C
@@ -271,14 +282,12 @@ namespace llvm {
     ///                     for more info.
     /// \param TemplateParms Template type parameters.
     /// \param UniqueIdentifier A unique identifier for the class.
-    DICompositeType *createClassType(DIScope *Scope, StringRef Name,
-                                     DIFile *File, unsigned LineNumber,
-                                     uint64_t SizeInBits, uint64_t AlignInBits,
-                                     uint64_t OffsetInBits, unsigned Flags,
-                                     DIType *DerivedFrom, DINodeArray Elements,
-                                     DIType *VTableHolder = nullptr,
-                                     MDNode *TemplateParms = nullptr,
-                                     StringRef UniqueIdentifier = "");
+    DICompositeType *createClassType(
+        DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
+        uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+        DINode::DIFlags Flags, DIType *DerivedFrom, DINodeArray Elements,
+        DIType *VTableHolder = nullptr, MDNode *TemplateParms = nullptr,
+        StringRef UniqueIdentifier = "");
 
     /// Create debugging information entry for a struct.
     /// \param Scope        Scope in which this struct is defined.
@@ -293,7 +302,7 @@ namespace llvm {
     /// \param UniqueIdentifier A unique identifier for the struct.
     DICompositeType *createStructType(
         DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
-        uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+        uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags,
         DIType *DerivedFrom, DINodeArray Elements, unsigned RunTimeLang = 0,
         DIType *VTableHolder = nullptr, StringRef UniqueIdentifier = "");
 
@@ -310,8 +319,9 @@ namespace llvm {
     /// \param UniqueIdentifier A unique identifier for the union.
     DICompositeType *createUnionType(DIScope *Scope, StringRef Name,
                                      DIFile *File, unsigned LineNumber,
-                                     uint64_t SizeInBits, uint64_t AlignInBits,
-                                     unsigned Flags, DINodeArray Elements,
+                                     uint64_t SizeInBits, uint32_t AlignInBits,
+                                     DINode::DIFlags Flags,
+                                     DINodeArray Elements,
                                      unsigned RunTimeLang = 0,
                                      StringRef UniqueIdentifier = "");
 
@@ -359,7 +369,7 @@ namespace llvm {
     /// \param AlignInBits  Alignment.
     /// \param Ty           Element type.
     /// \param Subscripts   Subscripts.
-    DICompositeType *createArrayType(uint64_t Size, uint64_t AlignInBits,
+    DICompositeType *createArrayType(uint64_t Size, uint32_t AlignInBits,
                                      DIType *Ty, DINodeArray Subscripts);
 
     /// Create debugging information entry for a vector type.
@@ -367,7 +377,7 @@ namespace llvm {
     /// \param AlignInBits  Alignment.
     /// \param Ty           Element type.
     /// \param Subscripts   Subscripts.
-    DICompositeType *createVectorType(uint64_t Size, uint64_t AlignInBits,
+    DICompositeType *createVectorType(uint64_t Size, uint32_t AlignInBits,
                                       DIType *Ty, DINodeArray Subscripts);
 
     /// Create debugging information entry for an
@@ -383,7 +393,7 @@ namespace llvm {
     /// \param UniqueIdentifier A unique identifier for the enum.
     DICompositeType *createEnumerationType(
         DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
-        uint64_t SizeInBits, uint64_t AlignInBits, DINodeArray Elements,
+        uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
         DIType *UnderlyingType, StringRef UniqueIdentifier = "");
 
     /// Create subroutine type.
@@ -392,8 +402,10 @@ namespace llvm {
     /// \param Flags           E.g.: LValueReference.
     ///                        These flags are used to emit dwarf attributes.
     /// \param CC              Calling convention, e.g. dwarf::DW_CC_normal
-    DISubroutineType *createSubroutineType(DITypeRefArray ParameterTypes,
-                                           unsigned Flags = 0, unsigned CC = 0);
+    DISubroutineType *
+    createSubroutineType(DITypeRefArray ParameterTypes,
+                         DINode::DIFlags Flags = DINode::FlagZero,
+                         unsigned CC = 0);
 
     /// Create an external type reference.
     /// \param Tag              Dwarf TAG.
@@ -414,14 +426,14 @@ namespace llvm {
                                        DIScope *Scope, DIFile *F, unsigned Line,
                                        unsigned RuntimeLang = 0,
                                        uint64_t SizeInBits = 0,
-                                       uint64_t AlignInBits = 0,
+                                       uint32_t AlignInBits = 0,
                                        StringRef UniqueIdentifier = "");
 
     /// Create a temporary forward-declared type.
     DICompositeType *createReplaceableCompositeType(
         unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
         unsigned RuntimeLang = 0, uint64_t SizeInBits = 0,
-        uint64_t AlignInBits = 0, unsigned Flags = DINode::FlagFwdDecl,
+        uint32_t AlignInBits = 0, DINode::DIFlags Flags = DINode::FlagFwdDecl,
         StringRef UniqueIdentifier = "");
 
     /// Retain DIScope* in a module even if it is not referenced
@@ -442,8 +454,7 @@ namespace llvm {
     /// implicitly uniques the values returned.
     DISubrange *getOrCreateSubrange(int64_t Lo, int64_t Count);
 
-    /// Create a new descriptor for the specified
-    /// variable.
+    /// Create a new descriptor for the specified variable.
     /// \param Context     Variable scope.
     /// \param Name        Name of the variable.
     /// \param LinkageName Mangled  name of the variable.
@@ -452,21 +463,23 @@ namespace llvm {
     /// \param Ty          Variable Type.
     /// \param isLocalToUnit Boolean flag indicate whether this variable is
     ///                      externally visible or not.
-    /// \param Val         llvm::Value of the variable.
+    /// \param Expr        The location of the global relative to the attached
+    ///                    GlobalVariable.
     /// \param Decl        Reference to the corresponding declaration.
-    DIGlobalVariable *createGlobalVariable(DIScope *Context, StringRef Name,
-                                           StringRef LinkageName, DIFile *File,
-                                           unsigned LineNo, DIType *Ty,
-                                           bool isLocalToUnit,
-                                           llvm::Constant *Val,
-                                           MDNode *Decl = nullptr);
+    /// \param AlignInBits Variable alignment(or 0 if no alignment attr was
+    ///                    specified)
+    DIGlobalVariableExpression *createGlobalVariableExpression(
+        DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
+        unsigned LineNo, DIType *Ty, bool isLocalToUnit,
+        DIExpression *Expr = nullptr, MDNode *Decl = nullptr,
+        uint32_t AlignInBits = 0);
 
     /// Identical to createGlobalVariable
     /// except that the resulting DbgNode is temporary and meant to be RAUWed.
     DIGlobalVariable *createTempGlobalVariableFwdDecl(
         DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
-        unsigned LineNo, DIType *Ty, bool isLocalToUnit, llvm::Constant *Val,
-        MDNode *Decl = nullptr);
+        unsigned LineNo, DIType *Ty, bool isLocalToUnit, MDNode *Decl = nullptr,
+        uint32_t AlignInBits = 0);
 
     /// Create a new descriptor for an auto variable.  This is a local variable
     /// that is not a subprogram parameter.
@@ -476,11 +489,11 @@ namespace llvm {
     ///
     /// If \c AlwaysPreserve, this variable will be referenced from its
     /// containing subprogram, and will survive some optimizations.
-    DILocalVariable *createAutoVariable(DIScope *Scope, StringRef Name,
-                                         DIFile *File, unsigned LineNo,
-                                         DIType *Ty,
-                                         bool AlwaysPreserve = false,
-                                         unsigned Flags = 0);
+    DILocalVariable *
+    createAutoVariable(DIScope *Scope, StringRef Name, DIFile *File,
+                       unsigned LineNo, DIType *Ty, bool AlwaysPreserve = false,
+                       DINode::DIFlags Flags = DINode::FlagZero,
+                       uint32_t AlignInBits = 0);
 
     /// Create a new descriptor for a parameter variable.
     ///
@@ -493,11 +506,11 @@ namespace llvm {
     ///
     /// If \c AlwaysPreserve, this variable will be referenced from its
     /// containing subprogram, and will survive some optimizations.
-    DILocalVariable *createParameterVariable(DIScope *Scope, StringRef Name,
-                                             unsigned ArgNo, DIFile *File,
-                                             unsigned LineNo, DIType *Ty,
-                                             bool AlwaysPreserve = false,
-                                             unsigned Flags = 0);
+    DILocalVariable *
+    createParameterVariable(DIScope *Scope, StringRef Name, unsigned ArgNo,
+                            DIFile *File, unsigned LineNo, DIType *Ty,
+                            bool AlwaysPreserve = false,
+                            DINode::DIFlags Flags = DINode::FlagZero);
 
     /// Create a new descriptor for the specified
     /// variable which has a complex address expression for its address.
@@ -510,9 +523,16 @@ namespace llvm {
     ///
     /// \param OffsetInBits Offset of the piece in bits.
     /// \param SizeInBits   Size of the piece in bits.
-    DIExpression *createBitPieceExpression(unsigned OffsetInBits,
+    DIExpression *createFragmentExpression(unsigned OffsetInBits,
                                            unsigned SizeInBits);
 
+    /// Create an expression for a variable that does not have an address, but
+    /// does have a constant value.
+    DIExpression *createConstantValueExpression(uint64_t Val) {
+      return DIExpression::get(
+          VMContext, {dwarf::DW_OP_constu, Val, dwarf::DW_OP_stack_value});
+    }
+
     /// Create a new descriptor for the specified subprogram.
     /// See comments in DISubprogram* for descriptions of these fields.
     /// \param Scope         Function scope.
@@ -532,7 +552,8 @@ namespace llvm {
                                  StringRef LinkageName, DIFile *File,
                                  unsigned LineNo, DISubroutineType *Ty,
                                  bool isLocalToUnit, bool isDefinition,
-                                 unsigned ScopeLine, unsigned Flags = 0,
+                                 unsigned ScopeLine,
+                                 DINode::DIFlags Flags = DINode::FlagZero,
                                  bool isOptimized = false,
                                  DITemplateParameterArray TParams = nullptr,
                                  DISubprogram *Decl = nullptr);
@@ -542,8 +563,9 @@ namespace llvm {
     DISubprogram *createTempFunctionFwdDecl(
         DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
         unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-        bool isDefinition, unsigned ScopeLine, unsigned Flags = 0,
-        bool isOptimized = false, DITemplateParameterArray TParams = nullptr,
+        bool isDefinition, unsigned ScopeLine,
+        DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
+        DITemplateParameterArray TParams = nullptr,
         DISubprogram *Decl = nullptr);
 
     /// Create a new descriptor for the specified C++ method.
@@ -568,14 +590,13 @@ namespace llvm {
     ///                      This flags are used to emit dwarf attributes.
     /// \param isOptimized   True if optimization is ON.
     /// \param TParams       Function template parameters.
-    DISubprogram *
-    createMethod(DIScope *Scope, StringRef Name, StringRef LinkageName,
-                 DIFile *File, unsigned LineNo, DISubroutineType *Ty,
-                 bool isLocalToUnit, bool isDefinition, unsigned Virtuality = 0,
-                 unsigned VTableIndex = 0, int ThisAdjustment = 0,
-                 DIType *VTableHolder = nullptr, unsigned Flags = 0,
-                 bool isOptimized = false,
-                 DITemplateParameterArray TParams = nullptr);
+    DISubprogram *createMethod(
+        DIScope *Scope, StringRef Name, StringRef LinkageName, DIFile *File,
+        unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
+        bool isDefinition, unsigned Virtuality = 0, unsigned VTableIndex = 0,
+        int ThisAdjustment = 0, DIType *VTableHolder = nullptr,
+        DINode::DIFlags Flags = DINode::FlagZero, bool isOptimized = false,
+        DITemplateParameterArray TParams = nullptr);
 
     /// This creates new descriptor for a namespace with the specified
     /// parent scope.
@@ -583,8 +604,9 @@ namespace llvm {
     /// \param Name        Name of this namespace
     /// \param File        Source file
     /// \param LineNo      Line number
+    /// \param ExportSymbols True for C++ inline namespaces.
     DINamespace *createNameSpace(DIScope *Scope, StringRef Name, DIFile *File,
-                                 unsigned LineNo);
+                                 unsigned LineNo, bool ExportSymbols);
 
     /// This creates new descriptor for a module with the specified
     /// parent scope.
@@ -726,6 +748,7 @@ namespace llvm {
       return Replacement;
     }
   };
+
 } // end namespace llvm
 
-#endif
+#endif // LLVM_IR_DIBUILDER_H
diff --git a/contrib/llvm/include/llvm/IR/DataLayout.h b/contrib/llvm/include/llvm/IR/DataLayout.h
index 173121b72ffd..6f37669f9768 100644
--- a/contrib/llvm/include/llvm/IR/DataLayout.h
+++ b/contrib/llvm/include/llvm/IR/DataLayout.h
@@ -21,6 +21,7 @@
 #define LLVM_IR_DATALAYOUT_H
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
@@ -144,6 +145,10 @@ private:
   // The StructType -> StructLayout map.
   mutable void *LayoutMap;
 
+  /// Pointers in these address spaces are non-integral, and don't have a
+  /// well-defined bitwise representation.
+  SmallVector<unsigned, 8> NonIntegralAddressSpaces;
+
   void setAlignment(AlignTypeEnum align_type, unsigned abi_align,
                     unsigned pref_align, uint32_t bit_width);
   unsigned getAlignmentInfo(AlignTypeEnum align_type, uint32_t bit_width,
@@ -199,6 +204,7 @@ public:
     LegalIntWidths = DL.LegalIntWidths;
     Alignments = DL.Alignments;
     Pointers = DL.Pointers;
+    NonIntegralAddressSpaces = DL.NonIntegralAddressSpaces;
     return *this;
   }
 
@@ -255,7 +261,7 @@ public:
 
   bool hasLinkerPrivateGlobalPrefix() const { return ManglingMode == MM_MachO; }
 
-  const char *getLinkerPrivateGlobalPrefix() const {
+  StringRef getLinkerPrivateGlobalPrefix() const {
     if (ManglingMode == MM_MachO)
       return "l";
     return "";
@@ -275,16 +281,16 @@ public:
     llvm_unreachable("invalid mangling mode");
   }
 
-  const char *getPrivateGlobalPrefix() const {
+  StringRef getPrivateGlobalPrefix() const {
     switch (ManglingMode) {
     case MM_None:
       return "";
     case MM_ELF:
+    case MM_WinCOFF:
       return ".L";
     case MM_Mips:
       return "$";
     case MM_MachO:
-    case MM_WinCOFF:
     case MM_WinCOFFX86:
       return "L";
     }
@@ -320,6 +326,23 @@ public:
   /// the backends/clients are updated.
   unsigned getPointerSize(unsigned AS = 0) const;
 
+  /// Return the address spaces containing non-integral pointers.  Pointers in
+  /// this address space don't have a well-defined bitwise representation.
+  ArrayRef<unsigned> getNonIntegralAddressSpaces() const {
+    return NonIntegralAddressSpaces;
+  }
+
+  bool isNonIntegralPointerType(PointerType *PT) const {
+    ArrayRef<unsigned> NonIntegralSpaces = getNonIntegralAddressSpaces();
+    return find(NonIntegralSpaces, PT->getAddressSpace()) !=
+           NonIntegralSpaces.end();
+  }
+
+  bool isNonIntegralPointerType(Type *Ty) const {
+    auto *PTy = dyn_cast<PointerType>(Ty);
+    return PTy && isNonIntegralPointerType(PTy);
+  }
+
   /// Layout pointer size, in bits
   /// FIXME: The defaults need to be removed once all of
   /// the backends/clients are updated.
diff --git a/contrib/llvm/include/llvm/IR/DebugInfo.h b/contrib/llvm/include/llvm/IR/DebugInfo.h
index 972042432b7b..04f46197b1c3 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfo.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfo.h
@@ -44,6 +44,18 @@ DISubprogram *getDISubprogram(const MDNode *Scope);
 bool StripDebugInfo(Module &M);
 bool stripDebugInfo(Function &F);
 
+/// Downgrade the debug info in a module to contain only line table information.
+///
+/// In order to convert debug info to what -gline-tables-only would have
+/// created, this does the following:
+///   1) Delete all debug intrinsics.
+///   2) Delete all non-CU named metadata debug info nodes.
+///   3) Create new DebugLocs for each instruction.
+///   4) Create a new CU debug info, and similarly for every metadata node
+///      that's reachable from the CU debug info.
+///   All debug type metadata nodes are unreachable and garbage collected.
+bool stripNonLineTableDebugInfo(Module &M);
+
 /// \brief Return Debug Info Metadata Version by checking module flags.
 unsigned getDebugMetadataVersionFromModule(const Module &M);
 
@@ -77,7 +89,7 @@ private:
   void processSubprogram(DISubprogram *SP);
   void processScope(DIScope *Scope);
   bool addCompileUnit(DICompileUnit *CU);
-  bool addGlobalVariable(DIGlobalVariable *DIG);
+  bool addGlobalVariable(DIGlobalVariableExpression *DIG);
   bool addSubprogram(DISubprogram *SP);
   bool addType(DIType *DT);
   bool addScope(DIScope *Scope);
@@ -86,8 +98,8 @@ public:
   typedef SmallVectorImpl<DICompileUnit *>::const_iterator
       compile_unit_iterator;
   typedef SmallVectorImpl<DISubprogram *>::const_iterator subprogram_iterator;
-  typedef SmallVectorImpl<DIGlobalVariable *>::const_iterator
-      global_variable_iterator;
+  typedef SmallVectorImpl<DIGlobalVariableExpression *>::const_iterator
+      global_variable_expression_iterator;
   typedef SmallVectorImpl<DIType *>::const_iterator type_iterator;
   typedef SmallVectorImpl<DIScope *>::const_iterator scope_iterator;
 
@@ -99,7 +111,7 @@ public:
     return make_range(SPs.begin(), SPs.end());
   }
 
-  iterator_range<global_variable_iterator> global_variables() const {
+  iterator_range<global_variable_expression_iterator> global_variables() const {
     return make_range(GVs.begin(), GVs.end());
   }
 
@@ -120,7 +132,7 @@ public:
 private:
   SmallVector<DICompileUnit *, 8> CUs;
   SmallVector<DISubprogram *, 8> SPs;
-  SmallVector<DIGlobalVariable *, 8> GVs;
+  SmallVector<DIGlobalVariableExpression *, 8> GVs;
   SmallVector<DIType *, 8> TYs;
   SmallVector<DIScope *, 8> Scopes;
   SmallPtrSet<const MDNode *, 32> NodesSeen;
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
index 26238c349e7e..87f3dc9dbdd3 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
+++ b/contrib/llvm/include/llvm/IR/DebugInfoFlags.def
@@ -16,6 +16,8 @@
 #error "Missing macro definition of HANDLE_DI_FLAG"
 #endif
 
+HANDLE_DI_FLAG(0, Zero) // Use it as zero value.
+                        // For example: void foo(DIFlags Flags = FlagZero).
 HANDLE_DI_FLAG(1, Private)
 HANDLE_DI_FLAG(2, Protected)
 HANDLE_DI_FLAG(3, Public)
@@ -38,5 +40,19 @@ HANDLE_DI_FLAG((2 << 16), MultipleInheritance)
 HANDLE_DI_FLAG((3 << 16), VirtualInheritance)
 HANDLE_DI_FLAG((1 << 18), IntroducedVirtual)
 HANDLE_DI_FLAG((1 << 19), BitField)
+HANDLE_DI_FLAG((1 << 20), NoReturn)
+HANDLE_DI_FLAG((1 << 21), MainSubprogram)
+
+// To avoid needing a dedicated value for IndirectVirtualBase, we use
+// the bitwise or of Virtual and FwdDecl, which does not otherwise
+// make sense for inheritance.
+HANDLE_DI_FLAG((1 << 2) | (1 << 5), IndirectVirtualBase)
+
+#ifdef DI_FLAG_LARGEST_NEEDED
+// intended to be used with ADT/BitmaskEnum.h
+// NOTE: always must be equal to largest flag, check this when adding new flag
+HANDLE_DI_FLAG((1 << 21), Largest)
+#undef DI_FLAG_LARGEST_NEEDED
+#endif
 
 #undef HANDLE_DI_FLAG
diff --git a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
index 853a94afd9d9..26f4626ead10 100644
--- a/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/contrib/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -14,8 +14,21 @@
 #ifndef LLVM_IR_DEBUGINFOMETADATA_H
 #define LLVM_IR_DEBUGINFOMETADATA_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitmaskEnum.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Dwarf.h"
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <type_traits>
+#include <vector>
 
 // Helper macros for defining get() overrides.
 #define DEFINE_MDNODE_GET_UNPACK_IMPL(...) __VA_ARGS__
@@ -108,16 +121,20 @@ public:
   public:
     iterator() = default;
     explicit iterator(MDNode::op_iterator I) : I(I) {}
+
     DITypeRef operator*() const { return DITypeRef(*I); }
+
     iterator &operator++() {
       ++I;
       return *this;
     }
+
     iterator operator++(int) {
       iterator Temp(*this);
       ++I;
       return Temp;
     }
+
     bool operator==(const iterator &X) const { return I == X.I; }
     bool operator!=(const iterator &X) const { return I != X.I; }
   };
@@ -127,7 +144,7 @@ public:
   iterator end() const { return N ? iterator(N->op_end()) : iterator(); }
 };
 
-/// \brief Tagged DWARF-like metadata node.
+/// Tagged DWARF-like metadata node.
 ///
 /// A metadata node with a DWARF tag (i.e., a constant named \c DW_TAG_*,
 /// defined in llvm/Support/Dwarf.h).  Called \a DINode because it's
@@ -167,27 +184,29 @@ protected:
 public:
   unsigned getTag() const { return SubclassData16; }
 
-  /// \brief Debug info flags.
+  /// Debug info flags.
   ///
   /// The three accessibility flags are mutually exclusive and rolled together
   /// in the first two bits.
-  enum DIFlags {
+  enum DIFlags : uint32_t {
 #define HANDLE_DI_FLAG(ID, NAME) Flag##NAME = ID,
+#define DI_FLAG_LARGEST_NEEDED
 #include "llvm/IR/DebugInfoFlags.def"
     FlagAccessibility = FlagPrivate | FlagProtected | FlagPublic,
     FlagPtrToMemberRep = FlagSingleInheritance | FlagMultipleInheritance |
                          FlagVirtualInheritance,
+    LLVM_MARK_AS_BITMASK_ENUM(FlagLargest)
   };
 
-  static unsigned getFlag(StringRef Flag);
-  static const char *getFlagString(unsigned Flag);
+  static DIFlags getFlag(StringRef Flag);
+  static StringRef getFlagString(DIFlags Flag);
 
-  /// \brief Split up a flags bitfield.
+  /// Split up a flags bitfield.
   ///
   /// Split \c Flags into \c SplitFlags, a vector of its components.  Returns
   /// any remaining (unrecognized) bits.
-  static unsigned splitFlags(unsigned Flags,
-                             SmallVectorImpl<unsigned> &SplitFlags);
+  static DIFlags splitFlags(DIFlags Flags,
+                            SmallVectorImpl<DIFlags> &SplitFlags);
 
   static bool classof(const Metadata *MD) {
     switch (MD->getMetadataID()) {
@@ -229,7 +248,7 @@ template <class T>
 struct simplify_type<TypedDINodeRef<T>>
     : simplify_type<const TypedDINodeRef<T>> {};
 
-/// \brief Generic tagged DWARF-like metadata node.
+/// Generic tagged DWARF-like metadata node.
 ///
 /// An un-specialized DWARF-like metadata node.  The first operand is a
 /// (possibly empty) null-separated \a MDString header that contains arbitrary
@@ -277,7 +296,7 @@ public:
                                     ArrayRef<Metadata *> DwarfOps),
                     (Tag, Header, DwarfOps))
 
-  /// \brief Return a (temporary) clone of this.
+  /// Return a (temporary) clone of this.
   TempGenericDINode clone() const { return cloneImpl(); }
 
   unsigned getTag() const { return SubclassData16; }
@@ -303,7 +322,7 @@ public:
   }
 };
 
-/// \brief Array subrange.
+/// Array subrange.
 ///
 /// TODO: Merge into node for DW_TAG_array_type, which should have a custom
 /// type.
@@ -342,7 +361,7 @@ public:
   }
 };
 
-/// \brief Enumeration value.
+/// Enumeration value.
 ///
 /// TODO: Add a pointer to the context (DW_TAG_enumeration_type) once that no
 /// longer creates a type cycle.
@@ -390,7 +409,7 @@ public:
   }
 };
 
-/// \brief Base class for scope-like contexts.
+/// Base class for scope-like contexts.
 ///
 /// Base class for lexical scopes and types (which are also declaration
 /// contexts).
@@ -412,7 +431,7 @@ public:
   StringRef getName() const;
   DIScopeRef getScope() const;
 
-  /// \brief Return the raw underlying file.
+  /// Return the raw underlying file.
   ///
   /// An \a DIFile is an \a DIScope, but it doesn't point at a separate file
   /// (it\em is the file).  If \c this is an \a DIFile, we need to return \c
@@ -443,7 +462,7 @@ public:
   }
 };
 
-/// \brief File.
+/// File.
 ///
 /// TODO: Merge with directory/file node (including users).
 /// TODO: Canonicalize paths on creation.
@@ -451,38 +470,62 @@ class DIFile : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
 
-  DIFile(LLVMContext &C, StorageType Storage, ArrayRef<Metadata *> Ops)
-      : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops) {}
+public:
+  enum ChecksumKind {
+    CSK_None,
+    CSK_MD5,
+    CSK_SHA1,
+    CSK_Last = CSK_SHA1 // Should be last enumeration.
+  };
+
+private:
+  ChecksumKind CSKind;
+
+  DIFile(LLVMContext &C, StorageType Storage, ChecksumKind CSK,
+         ArrayRef<Metadata *> Ops)
+      : DIScope(C, DIFileKind, Storage, dwarf::DW_TAG_file_type, Ops),
+        CSKind(CSK) {}
   ~DIFile() = default;
 
   static DIFile *getImpl(LLVMContext &Context, StringRef Filename,
-                         StringRef Directory, StorageType Storage,
-                         bool ShouldCreate = true) {
+                         StringRef Directory, ChecksumKind CSK, StringRef CS,
+                         StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, getCanonicalMDString(Context, Filename),
-                   getCanonicalMDString(Context, Directory), Storage,
-                   ShouldCreate);
+                   getCanonicalMDString(Context, Directory), CSK,
+                   getCanonicalMDString(Context, CS), Storage, ShouldCreate);
   }
   static DIFile *getImpl(LLVMContext &Context, MDString *Filename,
-                         MDString *Directory, StorageType Storage,
-                         bool ShouldCreate = true);
+                         MDString *Directory, ChecksumKind CSK, MDString *CS,
+                         StorageType Storage, bool ShouldCreate = true);
 
   TempDIFile cloneImpl() const {
-    return getTemporary(getContext(), getFilename(), getDirectory());
+    return getTemporary(getContext(), getFilename(), getDirectory(),
+                        getChecksumKind(), getChecksum());
   }
 
 public:
-  DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory),
-                    (Filename, Directory))
-  DEFINE_MDNODE_GET(DIFile, (MDString * Filename, MDString *Directory),
-                    (Filename, Directory))
+  DEFINE_MDNODE_GET(DIFile, (StringRef Filename, StringRef Directory,
+                             ChecksumKind CSK = CSK_None,
+                             StringRef CS = StringRef()),
+                    (Filename, Directory, CSK, CS))
+  DEFINE_MDNODE_GET(DIFile, (MDString *Filename, MDString *Directory,
+                             ChecksumKind CSK = CSK_None,
+                             MDString *CS = nullptr),
+                    (Filename, Directory, CSK, CS))
 
   TempDIFile clone() const { return cloneImpl(); }
 
   StringRef getFilename() const { return getStringOperand(0); }
   StringRef getDirectory() const { return getStringOperand(1); }
+  StringRef getChecksum() const { return getStringOperand(2); }
+  ChecksumKind getChecksumKind() const { return CSKind; }
+  StringRef getChecksumKindAsString() const;
 
   MDString *getRawFilename() const { return getOperandAs<MDString>(0); }
   MDString *getRawDirectory() const { return getOperandAs<MDString>(1); }
+  MDString *getRawChecksum() const { return getOperandAs<MDString>(2); }
+
+  static ChecksumKind getChecksumKind(StringRef CSKindStr);
 
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIFileKind;
@@ -501,29 +544,29 @@ StringRef DIScope::getDirectory() const {
   return "";
 }
 
-/// \brief Base class for types.
+/// Base class for types.
 ///
 /// TODO: Remove the hardcoded name and context, since many types don't use
 /// them.
 /// TODO: Split up flags.
 class DIType : public DIScope {
   unsigned Line;
-  unsigned Flags;
+  DIFlags Flags;
   uint64_t SizeInBits;
-  uint64_t AlignInBits;
   uint64_t OffsetInBits;
+  uint32_t AlignInBits;
 
 protected:
   DIType(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Tag,
-         unsigned Line, uint64_t SizeInBits, uint64_t AlignInBits,
-         uint64_t OffsetInBits, unsigned Flags, ArrayRef<Metadata *> Ops)
+         unsigned Line, uint64_t SizeInBits, uint32_t AlignInBits,
+         uint64_t OffsetInBits, DIFlags Flags, ArrayRef<Metadata *> Ops)
       : DIScope(C, ID, Storage, Tag, Ops) {
     init(Line, SizeInBits, AlignInBits, OffsetInBits, Flags);
   }
   ~DIType() = default;
 
-  void init(unsigned Line, uint64_t SizeInBits, uint64_t AlignInBits,
-            uint64_t OffsetInBits, unsigned Flags) {
+  void init(unsigned Line, uint64_t SizeInBits, uint32_t AlignInBits,
+            uint64_t OffsetInBits, DIFlags Flags) {
     this->Line = Line;
     this->Flags = Flags;
     this->SizeInBits = SizeInBits;
@@ -533,7 +576,7 @@ protected:
 
   /// Change fields in place.
   void mutate(unsigned Tag, unsigned Line, uint64_t SizeInBits,
-              uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags) {
+              uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags) {
     assert(isDistinct() && "Only distinct nodes can mutate");
     setTag(Tag);
     init(Line, SizeInBits, AlignInBits, OffsetInBits, Flags);
@@ -546,9 +589,10 @@ public:
 
   unsigned getLine() const { return Line; }
   uint64_t getSizeInBits() const { return SizeInBits; }
-  uint64_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
   uint64_t getOffsetInBits() const { return OffsetInBits; }
-  unsigned getFlags() const { return Flags; }
+  DIFlags getFlags() const { return Flags; }
 
   DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
@@ -557,7 +601,7 @@ public:
   Metadata *getRawScope() const { return getOperand(1); }
   MDString *getRawName() const { return getOperandAs<MDString>(2); }
 
-  void setFlags(unsigned NewFlags) {
+  void setFlags(DIFlags NewFlags) {
     assert(!isUniqued() && "Cannot set flags on uniqued nodes");
     Flags = NewFlags;
   }
@@ -600,7 +644,7 @@ public:
   }
 };
 
-/// \brief Basic type, like 'int' or 'float'.
+/// Basic type, like 'int' or 'float'.
 ///
 /// TODO: Split out DW_TAG_unspecified_type.
 /// TODO: Drop unused accessors.
@@ -611,23 +655,23 @@ class DIBasicType : public DIType {
   unsigned Encoding;
 
   DIBasicType(LLVMContext &C, StorageType Storage, unsigned Tag,
-              uint64_t SizeInBits, uint64_t AlignInBits, unsigned Encoding,
+              uint64_t SizeInBits, uint32_t AlignInBits, unsigned Encoding,
               ArrayRef<Metadata *> Ops)
       : DIType(C, DIBasicTypeKind, Storage, Tag, 0, SizeInBits, AlignInBits, 0,
-               0, Ops),
+               FlagZero, Ops),
         Encoding(Encoding) {}
   ~DIBasicType() = default;
 
   static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,
                               StringRef Name, uint64_t SizeInBits,
-                              uint64_t AlignInBits, unsigned Encoding,
+                              uint32_t AlignInBits, unsigned Encoding,
                               StorageType Storage, bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name),
                    SizeInBits, AlignInBits, Encoding, Storage, ShouldCreate);
   }
   static DIBasicType *getImpl(LLVMContext &Context, unsigned Tag,
                               MDString *Name, uint64_t SizeInBits,
-                              uint64_t AlignInBits, unsigned Encoding,
+                              uint32_t AlignInBits, unsigned Encoding,
                               StorageType Storage, bool ShouldCreate = true);
 
   TempDIBasicType cloneImpl() const {
@@ -640,11 +684,11 @@ public:
                     (Tag, Name, 0, 0, 0))
   DEFINE_MDNODE_GET(DIBasicType,
                     (unsigned Tag, StringRef Name, uint64_t SizeInBits,
-                     uint64_t AlignInBits, unsigned Encoding),
+                     uint32_t AlignInBits, unsigned Encoding),
                     (Tag, Name, SizeInBits, AlignInBits, Encoding))
   DEFINE_MDNODE_GET(DIBasicType,
                     (unsigned Tag, MDString *Name, uint64_t SizeInBits,
-                     uint64_t AlignInBits, unsigned Encoding),
+                     uint32_t AlignInBits, unsigned Encoding),
                     (Tag, Name, SizeInBits, AlignInBits, Encoding))
 
   TempDIBasicType clone() const { return cloneImpl(); }
@@ -656,7 +700,7 @@ public:
   }
 };
 
-/// \brief Derived types.
+/// Derived types.
 ///
 /// This includes qualified types, pointers, references, friends, typedefs, and
 /// class members.
@@ -667,8 +711,8 @@ class DIDerivedType : public DIType {
   friend class MDNode;
 
   DIDerivedType(LLVMContext &C, StorageType Storage, unsigned Tag,
-                unsigned Line, uint64_t SizeInBits, uint64_t AlignInBits,
-                uint64_t OffsetInBits, unsigned Flags, ArrayRef<Metadata *> Ops)
+                unsigned Line, uint64_t SizeInBits, uint32_t AlignInBits,
+                uint64_t OffsetInBits, DIFlags Flags, ArrayRef<Metadata *> Ops)
       : DIType(C, DIDerivedTypeKind, Storage, Tag, Line, SizeInBits,
                AlignInBits, OffsetInBits, Flags, Ops) {}
   ~DIDerivedType() = default;
@@ -676,8 +720,8 @@ class DIDerivedType : public DIType {
   static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
                                 StringRef Name, DIFile *File, unsigned Line,
                                 DIScopeRef Scope, DITypeRef BaseType,
-                                uint64_t SizeInBits, uint64_t AlignInBits,
-                                uint64_t OffsetInBits, unsigned Flags,
+                                uint64_t SizeInBits, uint32_t AlignInBits,
+                                uint64_t OffsetInBits, DIFlags Flags,
                                 Metadata *ExtraData, StorageType Storage,
                                 bool ShouldCreate = true) {
     return getImpl(Context, Tag, getCanonicalMDString(Context, Name), File,
@@ -687,8 +731,8 @@ class DIDerivedType : public DIType {
   static DIDerivedType *getImpl(LLVMContext &Context, unsigned Tag,
                                 MDString *Name, Metadata *File, unsigned Line,
                                 Metadata *Scope, Metadata *BaseType,
-                                uint64_t SizeInBits, uint64_t AlignInBits,
-                                uint64_t OffsetInBits, unsigned Flags,
+                                uint64_t SizeInBits, uint32_t AlignInBits,
+                                uint64_t OffsetInBits, DIFlags Flags,
                                 Metadata *ExtraData, StorageType Storage,
                                 bool ShouldCreate = true);
 
@@ -703,16 +747,16 @@ public:
   DEFINE_MDNODE_GET(DIDerivedType,
                     (unsigned Tag, MDString *Name, Metadata *File,
                      unsigned Line, Metadata *Scope, Metadata *BaseType,
-                     uint64_t SizeInBits, uint64_t AlignInBits,
-                     uint64_t OffsetInBits, unsigned Flags,
+                     uint64_t SizeInBits, uint32_t AlignInBits,
+                     uint64_t OffsetInBits, DIFlags Flags,
                      Metadata *ExtraData = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, ExtraData))
   DEFINE_MDNODE_GET(DIDerivedType,
                     (unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
                      DIScopeRef Scope, DITypeRef BaseType, uint64_t SizeInBits,
-                     uint64_t AlignInBits, uint64_t OffsetInBits,
-                     unsigned Flags, Metadata *ExtraData = nullptr),
+                     uint32_t AlignInBits, uint64_t OffsetInBits,
+                     DIFlags Flags, Metadata *ExtraData = nullptr),
                     (Tag, Name, File, Line, Scope, BaseType, SizeInBits,
                      AlignInBits, OffsetInBits, Flags, ExtraData))
 
@@ -722,7 +766,7 @@ public:
   DITypeRef getBaseType() const { return DITypeRef(getRawBaseType()); }
   Metadata *getRawBaseType() const { return getOperand(3); }
 
-  /// \brief Get extra data associated with this derived type.
+  /// Get extra data associated with this derived type.
   ///
   /// Class type for pointer-to-members, objective-c property node for ivars,
   /// or global constant wrapper for static members.
@@ -732,7 +776,7 @@ public:
   Metadata *getExtraData() const { return getRawExtraData(); }
   Metadata *getRawExtraData() const { return getOperand(4); }
 
-  /// \brief Get casted version of extra data.
+  /// Get casted version of extra data.
   /// @{
   DITypeRef getClassType() const {
     assert(getTag() == dwarf::DW_TAG_ptr_to_member_type);
@@ -760,7 +804,7 @@ public:
   }
 };
 
-/// \brief Composite types.
+/// Composite types.
 ///
 /// TODO: Detach from DerivedTypeBase (split out MDEnumType?).
 /// TODO: Create a custom, unrelated node for DW_TAG_array_type.
@@ -772,7 +816,7 @@ class DICompositeType : public DIType {
 
   DICompositeType(LLVMContext &C, StorageType Storage, unsigned Tag,
                   unsigned Line, unsigned RuntimeLang, uint64_t SizeInBits,
-                  uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+                  uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
                   ArrayRef<Metadata *> Ops)
       : DIType(C, DICompositeTypeKind, Storage, Tag, Line, SizeInBits,
                AlignInBits, OffsetInBits, Flags, Ops),
@@ -781,8 +825,8 @@ class DICompositeType : public DIType {
 
   /// Change fields in place.
   void mutate(unsigned Tag, unsigned Line, unsigned RuntimeLang,
-              uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-              unsigned Flags) {
+              uint64_t SizeInBits, uint32_t AlignInBits,
+              uint64_t OffsetInBits, DIFlags Flags) {
     assert(isDistinct() && "Only distinct nodes can mutate");
     assert(getRawIdentifier() && "Only ODR-uniqued nodes should mutate");
     this->RuntimeLang = RuntimeLang;
@@ -792,8 +836,8 @@ class DICompositeType : public DIType {
   static DICompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, StringRef Name, Metadata *File,
           unsigned Line, DIScopeRef Scope, DITypeRef BaseType,
-          uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-          uint64_t Flags, DINodeArray Elements, unsigned RuntimeLang,
+          uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+          DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
           DITypeRef VTableHolder, DITemplateParameterArray TemplateParams,
           StringRef Identifier, StorageType Storage, bool ShouldCreate = true) {
     return getImpl(
@@ -805,8 +849,8 @@ class DICompositeType : public DIType {
   static DICompositeType *
   getImpl(LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
           unsigned Line, Metadata *Scope, Metadata *BaseType,
-          uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-          unsigned Flags, Metadata *Elements, unsigned RuntimeLang,
+          uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+          DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
           Metadata *VTableHolder, Metadata *TemplateParams,
           MDString *Identifier, StorageType Storage, bool ShouldCreate = true);
 
@@ -822,8 +866,8 @@ public:
   DEFINE_MDNODE_GET(DICompositeType,
                     (unsigned Tag, StringRef Name, DIFile *File, unsigned Line,
                      DIScopeRef Scope, DITypeRef BaseType, uint64_t SizeInBits,
-                     uint64_t AlignInBits, uint64_t OffsetInBits,
-                     unsigned Flags, DINodeArray Elements, unsigned RuntimeLang,
+                     uint32_t AlignInBits, uint64_t OffsetInBits,
+                     DIFlags Flags, DINodeArray Elements, unsigned RuntimeLang,
                      DITypeRef VTableHolder,
                      DITemplateParameterArray TemplateParams = nullptr,
                      StringRef Identifier = ""),
@@ -833,8 +877,8 @@ public:
   DEFINE_MDNODE_GET(DICompositeType,
                     (unsigned Tag, MDString *Name, Metadata *File,
                      unsigned Line, Metadata *Scope, Metadata *BaseType,
-                     uint64_t SizeInBits, uint64_t AlignInBits,
-                     uint64_t OffsetInBits, unsigned Flags, Metadata *Elements,
+                     uint64_t SizeInBits, uint32_t AlignInBits,
+                     uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
                      unsigned RuntimeLang, Metadata *VTableHolder,
                      Metadata *TemplateParams = nullptr,
                      MDString *Identifier = nullptr),
@@ -854,8 +898,8 @@ public:
   static DICompositeType *
   getODRType(LLVMContext &Context, MDString &Identifier, unsigned Tag,
              MDString *Name, Metadata *File, unsigned Line, Metadata *Scope,
-             Metadata *BaseType, uint64_t SizeInBits, uint64_t AlignInBits,
-             uint64_t OffsetInBits, unsigned Flags, Metadata *Elements,
+             Metadata *BaseType, uint64_t SizeInBits, uint32_t AlignInBits,
+             uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
              unsigned RuntimeLang, Metadata *VTableHolder,
              Metadata *TemplateParams);
   static DICompositeType *getODRTypeIfExists(LLVMContext &Context,
@@ -873,8 +917,8 @@ public:
   static DICompositeType *
   buildODRType(LLVMContext &Context, MDString &Identifier, unsigned Tag,
                MDString *Name, Metadata *File, unsigned Line, Metadata *Scope,
-               Metadata *BaseType, uint64_t SizeInBits, uint64_t AlignInBits,
-               uint64_t OffsetInBits, unsigned Flags, Metadata *Elements,
+               Metadata *BaseType, uint64_t SizeInBits, uint32_t AlignInBits,
+               uint64_t OffsetInBits, DIFlags Flags, Metadata *Elements,
                unsigned RuntimeLang, Metadata *VTableHolder,
                Metadata *TemplateParams);
 
@@ -895,7 +939,7 @@ public:
   Metadata *getRawTemplateParams() const { return getOperand(6); }
   MDString *getRawIdentifier() const { return getOperandAs<MDString>(7); }
 
-  /// \brief Replace operands.
+  /// Replace operands.
   ///
   /// If this \a isUniqued() and not \a isResolved(), on a uniquing collision
   /// this will be RAUW'ed and deleted.  Use a \a TrackingMDRef to keep track
@@ -904,7 +948,7 @@ public:
   void replaceElements(DINodeArray Elements) {
 #ifndef NDEBUG
     for (DINode *Op : getElements())
-      assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+      assert(is_contained(Elements->operands(), Op) &&
              "Lost a member during member list replacement");
 #endif
     replaceOperandWith(4, Elements.get());
@@ -922,7 +966,7 @@ public:
   }
 };
 
-/// \brief Type array for a subprogram.
+/// Type array for a subprogram.
 ///
 /// TODO: Fold the array of types in directly as operands.
 class DISubroutineType : public DIType {
@@ -933,20 +977,20 @@ class DISubroutineType : public DIType {
   /// type dwarf::CallingConvention.
   uint8_t CC;
 
-  DISubroutineType(LLVMContext &C, StorageType Storage, unsigned Flags,
+  DISubroutineType(LLVMContext &C, StorageType Storage, DIFlags Flags,
                    uint8_t CC, ArrayRef<Metadata *> Ops)
       : DIType(C, DISubroutineTypeKind, Storage, dwarf::DW_TAG_subroutine_type,
                0, 0, 0, 0, Flags, Ops),
         CC(CC) {}
   ~DISubroutineType() = default;
 
-  static DISubroutineType *getImpl(LLVMContext &Context, unsigned Flags,
+  static DISubroutineType *getImpl(LLVMContext &Context, DIFlags Flags,
                                    uint8_t CC, DITypeRefArray TypeArray,
                                    StorageType Storage,
                                    bool ShouldCreate = true) {
     return getImpl(Context, Flags, CC, TypeArray.get(), Storage, ShouldCreate);
   }
-  static DISubroutineType *getImpl(LLVMContext &Context, unsigned Flags,
+  static DISubroutineType *getImpl(LLVMContext &Context, DIFlags Flags,
                                    uint8_t CC, Metadata *TypeArray,
                                    StorageType Storage,
                                    bool ShouldCreate = true);
@@ -957,10 +1001,10 @@ class DISubroutineType : public DIType {
 
 public:
   DEFINE_MDNODE_GET(DISubroutineType,
-                    (unsigned Flags, uint8_t CC, DITypeRefArray TypeArray),
+                    (DIFlags Flags, uint8_t CC, DITypeRefArray TypeArray),
                     (Flags, CC, TypeArray))
   DEFINE_MDNODE_GET(DISubroutineType,
-                    (unsigned Flags, uint8_t CC, Metadata *TypeArray),
+                    (DIFlags Flags, uint8_t CC, Metadata *TypeArray),
                     (Flags, CC, TypeArray))
 
   TempDISubroutineType clone() const { return cloneImpl(); }
@@ -977,10 +1021,11 @@ public:
   }
 };
 
-/// \brief Compile unit.
+/// Compile unit.
 class DICompileUnit : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
+
 public:
   enum DebugEmissionKind : unsigned {
     NoDebug = 0,
@@ -988,6 +1033,7 @@ public:
     LineTablesOnly,
     LastEmissionKind = LineTablesOnly
   };
+
   static Optional<DebugEmissionKind> getEmissionKind(StringRef Str);
   static const char *EmissionKindString(DebugEmissionKind EK);
 
@@ -997,14 +1043,16 @@ private:
   unsigned RuntimeVersion;
   unsigned EmissionKind;
   uint64_t DWOId;
+  bool SplitDebugInlining;
 
   DICompileUnit(LLVMContext &C, StorageType Storage, unsigned SourceLanguage,
                 bool IsOptimized, unsigned RuntimeVersion,
-                unsigned EmissionKind, uint64_t DWOId, ArrayRef<Metadata *> Ops)
+                unsigned EmissionKind, uint64_t DWOId, bool SplitDebugInlining,
+                ArrayRef<Metadata *> Ops)
       : DIScope(C, DICompileUnitKind, Storage, dwarf::DW_TAG_compile_unit, Ops),
         SourceLanguage(SourceLanguage), IsOptimized(IsOptimized),
         RuntimeVersion(RuntimeVersion), EmissionKind(EmissionKind),
-        DWOId(DWOId) {
+        DWOId(DWOId), SplitDebugInlining(SplitDebugInlining) {
     assert(Storage != Uniqued);
   }
   ~DICompileUnit() = default;
@@ -1014,15 +1062,18 @@ private:
           StringRef Producer, bool IsOptimized, StringRef Flags,
           unsigned RuntimeVersion, StringRef SplitDebugFilename,
           unsigned EmissionKind, DICompositeTypeArray EnumTypes,
-          DIScopeArray RetainedTypes, DIGlobalVariableArray GlobalVariables,
+          DIScopeArray RetainedTypes,
+          DIGlobalVariableExpressionArray GlobalVariables,
           DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
-          uint64_t DWOId, StorageType Storage, bool ShouldCreate = true) {
-    return getImpl(
-        Context, SourceLanguage, File, getCanonicalMDString(Context, Producer),
-        IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion,
-        getCanonicalMDString(Context, SplitDebugFilename), EmissionKind,
-        EnumTypes.get(), RetainedTypes.get(), GlobalVariables.get(),
-        ImportedEntities.get(), Macros.get(), DWOId, Storage, ShouldCreate);
+          uint64_t DWOId, bool SplitDebugInlining, StorageType Storage,
+          bool ShouldCreate = true) {
+    return getImpl(Context, SourceLanguage, File,
+                   getCanonicalMDString(Context, Producer), IsOptimized,
+                   getCanonicalMDString(Context, Flags), RuntimeVersion,
+                   getCanonicalMDString(Context, SplitDebugFilename),
+                   EmissionKind, EnumTypes.get(), RetainedTypes.get(),
+                   GlobalVariables.get(), ImportedEntities.get(), Macros.get(),
+                   DWOId, SplitDebugInlining, Storage, ShouldCreate);
   }
   static DICompileUnit *
   getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File,
@@ -1030,43 +1081,45 @@ private:
           unsigned RuntimeVersion, MDString *SplitDebugFilename,
           unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
           Metadata *GlobalVariables, Metadata *ImportedEntities,
-          Metadata *Macros, uint64_t DWOId, StorageType Storage,
-          bool ShouldCreate = true);
+          Metadata *Macros, uint64_t DWOId, bool SplitDebugInlining,
+          StorageType Storage, bool ShouldCreate = true);
 
   TempDICompileUnit cloneImpl() const {
-    return getTemporary(
-        getContext(), getSourceLanguage(), getFile(), getProducer(),
-        isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(),
-        getEmissionKind(), getEnumTypes(), getRetainedTypes(),
-        getGlobalVariables(), getImportedEntities(), getMacros(), DWOId);
+    return getTemporary(getContext(), getSourceLanguage(), getFile(),
+                        getProducer(), isOptimized(), getFlags(),
+                        getRuntimeVersion(), getSplitDebugFilename(),
+                        getEmissionKind(), getEnumTypes(), getRetainedTypes(),
+                        getGlobalVariables(), getImportedEntities(),
+                        getMacros(), DWOId, getSplitDebugInlining());
   }
 
+public:
   static void get() = delete;
   static void getIfExists() = delete;
 
-public:
   DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
       DICompileUnit,
       (unsigned SourceLanguage, DIFile *File, StringRef Producer,
        bool IsOptimized, StringRef Flags, unsigned RuntimeVersion,
        StringRef SplitDebugFilename, DebugEmissionKind EmissionKind,
        DICompositeTypeArray EnumTypes, DIScopeArray RetainedTypes,
-       DIGlobalVariableArray GlobalVariables,
+       DIGlobalVariableExpressionArray GlobalVariables,
        DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros,
-       uint64_t DWOId),
+       uint64_t DWOId, bool SplitDebugInlining),
       (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
        SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes,
-       GlobalVariables, ImportedEntities, Macros, DWOId))
+       GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining))
   DEFINE_MDNODE_GET_DISTINCT_TEMPORARY(
       DICompileUnit,
       (unsigned SourceLanguage, Metadata *File, MDString *Producer,
        bool IsOptimized, MDString *Flags, unsigned RuntimeVersion,
        MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes,
        Metadata *RetainedTypes, Metadata *GlobalVariables,
-       Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId),
+       Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId,
+       bool SplitDebugInlining),
       (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion,
        SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes,
-       GlobalVariables, ImportedEntities, Macros, DWOId))
+       GlobalVariables, ImportedEntities, Macros, DWOId, SplitDebugInlining))
 
   TempDICompileUnit clone() const { return cloneImpl(); }
 
@@ -1085,7 +1138,7 @@ public:
   DIScopeArray getRetainedTypes() const {
     return cast_or_null<MDTuple>(getRawRetainedTypes());
   }
-  DIGlobalVariableArray getGlobalVariables() const {
+  DIGlobalVariableExpressionArray getGlobalVariables() const {
     return cast_or_null<MDTuple>(getRawGlobalVariables());
   }
   DIImportedEntityArray getImportedEntities() const {
@@ -1096,6 +1149,10 @@ public:
   }
   uint64_t getDWOId() const { return DWOId; }
   void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
+  bool getSplitDebugInlining() const { return SplitDebugInlining; }
+  void setSplitDebugInlining(bool SplitDebugInlining) {
+    this->SplitDebugInlining = SplitDebugInlining;
+  }
 
   MDString *getRawProducer() const { return getOperandAs<MDString>(1); }
   MDString *getRawFlags() const { return getOperandAs<MDString>(2); }
@@ -1108,7 +1165,7 @@ public:
   Metadata *getRawImportedEntities() const { return getOperand(7); }
   Metadata *getRawMacros() const { return getOperand(8); }
 
-  /// \brief Replace arrays.
+  /// Replace arrays.
   ///
   /// If this \a isUniqued() and not \a isResolved(), it will be RAUW'ed and
   /// deleted on a uniquing collision.  In practice, uniquing collisions on \a
@@ -1120,7 +1177,7 @@ public:
   void replaceRetainedTypes(DITypeArray N) {
     replaceOperandWith(5, N.get());
   }
-  void replaceGlobalVariables(DIGlobalVariableArray N) {
+  void replaceGlobalVariables(DIGlobalVariableExpressionArray N) {
     replaceOperandWith(6, N.get());
   }
   void replaceImportedEntities(DIImportedEntityArray N) {
@@ -1134,7 +1191,7 @@ public:
   }
 };
 
-/// \brief A scope for locals.
+/// A scope for locals.
 ///
 /// A legal scope for lexical blocks, local variables, and debug info
 /// locations.  Subclasses are \a DISubprogram, \a DILexicalBlock, and \a
@@ -1147,7 +1204,7 @@ protected:
   ~DILocalScope() = default;
 
 public:
-  /// \brief Get the subprogram for this scope.
+  /// Get the subprogram for this scope.
   ///
   /// Return this if it's an \a DISubprogram; otherwise, look up the scope
   /// chain.
@@ -1166,7 +1223,7 @@ public:
   }
 };
 
-/// \brief Debug location.
+/// Debug location.
 ///
 /// A debug location in source code, used for debug info and otherwise.
 class DILocation : public MDNode {
@@ -1196,10 +1253,10 @@ class DILocation : public MDNode {
                         getRawInlinedAt());
   }
 
+public:
   // Disallow replacing operands.
   void replaceOperandWith(unsigned I, Metadata *New) = delete;
 
-public:
   DEFINE_MDNODE_GET(DILocation,
                     (unsigned Line, unsigned Column, Metadata *Scope,
                      Metadata *InlinedAt = nullptr),
@@ -1209,7 +1266,7 @@ public:
                      DILocation *InlinedAt = nullptr),
                     (Line, Column, Scope, InlinedAt))
 
-  /// \brief Return a (temporary) clone of this.
+  /// Return a (temporary) clone of this.
   TempDILocation clone() const { return cloneImpl(); }
 
   unsigned getLine() const { return SubclassData32; }
@@ -1223,7 +1280,7 @@ public:
   StringRef getFilename() const { return getScope()->getFilename(); }
   StringRef getDirectory() const { return getScope()->getDirectory(); }
 
-  /// \brief Get the scope where this is inlined.
+  /// Get the scope where this is inlined.
   ///
   /// Walk through \a getInlinedAt() and return \a getScope() from the deepest
   /// location.
@@ -1233,7 +1290,7 @@ public:
     return getScope();
   }
 
-  /// \brief Check whether this can be discriminated from another location.
+  /// Check whether this can be discriminated from another location.
   ///
   /// Check \c this can be discriminated from \c RHS in a linetable entry.
   /// Scope and inlined-at chains are not recorded in the linetable, so they
@@ -1250,12 +1307,33 @@ public:
     return getFilename() != RHS.getFilename() || getLine() != RHS.getLine();
   }
 
-  /// \brief Get the DWARF discriminator.
+  /// Get the DWARF discriminator.
   ///
   /// DWARF discriminators distinguish identical file locations between
   /// instructions that are on different basic blocks.
   inline unsigned getDiscriminator() const;
 
+  /// Returns a new DILocation with updated \p Discriminator.
+  inline DILocation *cloneWithDiscriminator(unsigned Discriminator) const;
+
+  /// When two instructions are combined into a single instruction we also
+  /// need to combine the original locations into a single location.
+  ///
+  /// When the locations are the same we can use either location. When they
+  /// differ, we need a third location which is distinct from either. If
+  /// they have the same file/line but have a different discriminator we
+  /// could create a location with a new discriminator. If they are from
+  /// different files/lines the location is ambiguous and can't be
+  /// represented in a single line entry.  In this case, no location
+  /// should be set.
+  ///
+  /// Currently this function is simply a stub, and no location will be
+  /// used for all cases.
+  static DILocation *getMergedLocation(const DILocation *LocA,
+                                       const DILocation *LocB) {
+    return nullptr;
+  }
+
   Metadata *getRawScope() const { return getOperand(0); }
   Metadata *getRawInlinedAt() const {
     if (getNumOperands() == 2)
@@ -1268,7 +1346,7 @@ public:
   }
 };
 
-/// \brief Subprogram description.
+/// Subprogram description.
 ///
 /// TODO: Remove DisplayName.  It's always equal to Name.
 /// TODO: Split up flags.
@@ -1289,8 +1367,6 @@ class DISubprogram : public DILocalScope {
   // in 2 bits (none/pure/pure_virtual).
   unsigned Virtuality : 2;
 
-  unsigned Flags : 27;
-
   // These are boolean flags so one bit is enough.
   // MSVC starts a new container field every time the base
   // type changes so we can't use 'bool' to ensure these bits
@@ -1299,19 +1375,22 @@ class DISubprogram : public DILocalScope {
   unsigned IsDefinition : 1;
   unsigned IsOptimized : 1;
 
+  unsigned Padding : 3;
+
+  DIFlags Flags;
+
   DISubprogram(LLVMContext &C, StorageType Storage, unsigned Line,
                unsigned ScopeLine, unsigned Virtuality, unsigned VirtualIndex,
-               int ThisAdjustment, unsigned Flags, bool IsLocalToUnit,
+               int ThisAdjustment, DIFlags Flags, bool IsLocalToUnit,
                bool IsDefinition, bool IsOptimized, ArrayRef<Metadata *> Ops)
       : DILocalScope(C, DISubprogramKind, Storage, dwarf::DW_TAG_subprogram,
                      Ops),
         Line(Line), ScopeLine(ScopeLine), VirtualIndex(VirtualIndex),
-        ThisAdjustment(ThisAdjustment), Virtuality(Virtuality), Flags(Flags),
+        ThisAdjustment(ThisAdjustment), Virtuality(Virtuality),
         IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition),
-        IsOptimized(IsOptimized) {
+        IsOptimized(IsOptimized), Flags(Flags) {
     static_assert(dwarf::DW_VIRTUALITY_max < 4, "Virtuality out of range");
     assert(Virtuality < 4 && "Virtuality out of range");
-    assert((Flags < (1 << 27)) && "Flags out of range");
   }
   ~DISubprogram() = default;
 
@@ -1320,7 +1399,7 @@ class DISubprogram : public DILocalScope {
           StringRef LinkageName, DIFile *File, unsigned Line,
           DISubroutineType *Type, bool IsLocalToUnit, bool IsDefinition,
           unsigned ScopeLine, DITypeRef ContainingType, unsigned Virtuality,
-          unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
+          unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
           bool IsOptimized, DICompileUnit *Unit,
           DITemplateParameterArray TemplateParams, DISubprogram *Declaration,
           DILocalVariableArray Variables, StorageType Storage,
@@ -1337,7 +1416,7 @@ class DISubprogram : public DILocalScope {
           MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
           bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
           Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-          int ThisAdjustment, unsigned Flags, bool IsOptimized, Metadata *Unit,
+          int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
           Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
           StorageType Storage, bool ShouldCreate = true);
 
@@ -1356,7 +1435,7 @@ public:
                      DIFile *File, unsigned Line, DISubroutineType *Type,
                      bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
                      DITypeRef ContainingType, unsigned Virtuality,
-                     unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
+                     unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
                      bool IsOptimized, DICompileUnit *Unit,
                      DITemplateParameterArray TemplateParams = nullptr,
                      DISubprogram *Declaration = nullptr,
@@ -1370,7 +1449,7 @@ public:
       (Metadata * Scope, MDString *Name, MDString *LinkageName, Metadata *File,
        unsigned Line, Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
        unsigned ScopeLine, Metadata *ContainingType, unsigned Virtuality,
-       unsigned VirtualIndex, int ThisAdjustment, unsigned Flags,
+       unsigned VirtualIndex, int ThisAdjustment, DIFlags Flags,
        bool IsOptimized, Metadata *Unit, Metadata *TemplateParams = nullptr,
        Metadata *Declaration = nullptr, Metadata *Variables = nullptr),
       (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit, IsDefinition,
@@ -1385,12 +1464,12 @@ public:
   unsigned getVirtualIndex() const { return VirtualIndex; }
   int getThisAdjustment() const { return ThisAdjustment; }
   unsigned getScopeLine() const { return ScopeLine; }
-  unsigned getFlags() const { return Flags; }
+  DIFlags getFlags() const { return Flags; }
   bool isLocalToUnit() const { return IsLocalToUnit; }
   bool isDefinition() const { return IsDefinition; }
   bool isOptimized() const { return IsOptimized; }
 
-  unsigned isArtificial() const { return getFlags() & FlagArtificial; }
+  bool isArtificial() const { return getFlags() & FlagArtificial; }
   bool isPrivate() const {
     return (getFlags() & FlagAccessibility) == FlagPrivate;
   }
@@ -1402,22 +1481,24 @@ public:
   }
   bool isExplicit() const { return getFlags() & FlagExplicit; }
   bool isPrototyped() const { return getFlags() & FlagPrototyped; }
+  bool isMainSubprogram() const { return getFlags() & FlagMainSubprogram; }
 
-  /// \brief Check if this is reference-qualified.
+  /// Check if this is reference-qualified.
   ///
   /// Return true if this subprogram is a C++11 reference-qualified non-static
   /// member function (void foo() &).
-  unsigned isLValueReference() const {
-    return getFlags() & FlagLValueReference;
-  }
+  bool isLValueReference() const { return getFlags() & FlagLValueReference; }
 
-  /// \brief Check if this is rvalue-reference-qualified.
+  /// Check if this is rvalue-reference-qualified.
   ///
   /// Return true if this subprogram is a C++11 rvalue-reference-qualified
   /// non-static member function (void foo() &&).
-  unsigned isRValueReference() const {
-    return getFlags() & FlagRValueReference;
-  }
+  bool isRValueReference() const { return getFlags() & FlagRValueReference; }
+
+  /// Check if this is marked as noreturn.
+  ///
+  /// Return true if this subprogram is C++11 noreturn or C11 _Noreturn
+  bool isNoReturn() const { return getFlags() & FlagNoReturn; }
 
   DIScopeRef getScope() const { return DIScopeRef(getRawScope()); }
 
@@ -1459,7 +1540,7 @@ public:
   Metadata *getRawDeclaration() const { return getOperand(9); }
   Metadata *getRawVariables() const { return getOperand(10); }
 
-  /// \brief Check if this subprogram describes the given function.
+  /// Check if this subprogram describes the given function.
   ///
   /// FIXME: Should this be looking through bitcasts?
   bool describes(const Function *F) const;
@@ -1596,45 +1677,66 @@ unsigned DILocation::getDiscriminator() const {
   return 0;
 }
 
+DILocation *DILocation::cloneWithDiscriminator(unsigned Discriminator) const {
+  DIScope *Scope = getScope();
+  // Skip all parent DILexicalBlockFile that already have a discriminator
+  // assigned. We do not want to have nested DILexicalBlockFiles that have
+  // mutliple discriminators because only the leaf DILexicalBlockFile's
+  // dominator will be used.
+  for (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope);
+       LBF && LBF->getDiscriminator() != 0;
+       LBF = dyn_cast<DILexicalBlockFile>(Scope))
+    Scope = LBF->getScope();
+  DILexicalBlockFile *NewScope =
+      DILexicalBlockFile::get(getContext(), Scope, getFile(), Discriminator);
+  return DILocation::get(getContext(), getLine(), getColumn(), NewScope,
+                         getInlinedAt());
+}
+
 class DINamespace : public DIScope {
   friend class LLVMContextImpl;
   friend class MDNode;
 
   unsigned Line;
+  unsigned ExportSymbols : 1;
 
   DINamespace(LLVMContext &Context, StorageType Storage, unsigned Line,
-              ArrayRef<Metadata *> Ops)
+              bool ExportSymbols, ArrayRef<Metadata *> Ops)
       : DIScope(Context, DINamespaceKind, Storage, dwarf::DW_TAG_namespace,
                 Ops),
-        Line(Line) {}
+        Line(Line), ExportSymbols(ExportSymbols) {}
   ~DINamespace() = default;
 
   static DINamespace *getImpl(LLVMContext &Context, DIScope *Scope,
                               DIFile *File, StringRef Name, unsigned Line,
-                              StorageType Storage, bool ShouldCreate = true) {
+                              bool ExportSymbols, StorageType Storage,
+                              bool ShouldCreate = true) {
     return getImpl(Context, Scope, File, getCanonicalMDString(Context, Name),
-                   Line, Storage, ShouldCreate);
+                   Line, ExportSymbols, Storage, ShouldCreate);
   }
   static DINamespace *getImpl(LLVMContext &Context, Metadata *Scope,
                               Metadata *File, MDString *Name, unsigned Line,
-                              StorageType Storage, bool ShouldCreate = true);
+                              bool ExportSymbols, StorageType Storage,
+                              bool ShouldCreate = true);
 
   TempDINamespace cloneImpl() const {
     return getTemporary(getContext(), getScope(), getFile(), getName(),
-                        getLine());
+                        getLine(), getExportSymbols());
   }
 
 public:
   DEFINE_MDNODE_GET(DINamespace, (DIScope * Scope, DIFile *File, StringRef Name,
-                                  unsigned Line),
-                    (Scope, File, Name, Line))
-  DEFINE_MDNODE_GET(DINamespace, (Metadata * Scope, Metadata *File,
-                                  MDString *Name, unsigned Line),
-                    (Scope, File, Name, Line))
+                                  unsigned Line, bool ExportSymbols),
+                    (Scope, File, Name, Line, ExportSymbols))
+  DEFINE_MDNODE_GET(DINamespace,
+                    (Metadata * Scope, Metadata *File, MDString *Name,
+                     unsigned Line, bool ExportSymbols),
+                    (Scope, File, Name, Line, ExportSymbols))
 
   TempDINamespace clone() const { return cloneImpl(); }
 
   unsigned getLine() const { return Line; }
+  bool getExportSymbols() const { return ExportSymbols; }
   DIScope *getScope() const { return cast_or_null<DIScope>(getRawScope()); }
   StringRef getName() const { return getStringOperand(2); }
 
@@ -1646,7 +1748,7 @@ public:
   }
 };
 
-/// \brief A (clang) module that has been imported by the compile unit.
+/// A (clang) module that has been imported by the compile unit.
 ///
 class DIModule : public DIScope {
   friend class LLVMContextImpl;
@@ -1654,7 +1756,7 @@ class DIModule : public DIScope {
 
   DIModule(LLVMContext &Context, StorageType Storage, ArrayRef<Metadata *> Ops)
       : DIScope(Context, DIModuleKind, Storage, dwarf::DW_TAG_module, Ops) {}
-  ~DIModule() {}
+  ~DIModule() = default;
 
   static DIModule *getImpl(LLVMContext &Context, DIScope *Scope,
                            StringRef Name, StringRef ConfigurationMacros,
@@ -1706,7 +1808,7 @@ public:
   }
 };
 
-/// \brief Base class for template parameters.
+/// Base class for template parameters.
 class DITemplateParameter : public DINode {
 protected:
   DITemplateParameter(LLVMContext &Context, unsigned ID, StorageType Storage,
@@ -1808,14 +1910,16 @@ public:
   }
 };
 
-/// \brief Base class for variables.
+/// Base class for variables.
 class DIVariable : public DINode {
   unsigned Line;
+  uint32_t AlignInBits;
 
 protected:
   DIVariable(LLVMContext &C, unsigned ID, StorageType Storage, unsigned Line,
-             ArrayRef<Metadata *> Ops)
-      : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line) {}
+             ArrayRef<Metadata *> Ops, uint32_t AlignInBits = 0)
+      : DINode(C, ID, Storage, dwarf::DW_TAG_variable, Ops), Line(Line),
+	      AlignInBits(AlignInBits) {}
   ~DIVariable() = default;
 
 public:
@@ -1824,6 +1928,8 @@ public:
   StringRef getName() const { return getStringOperand(1); }
   DIFile *getFile() const { return cast_or_null<DIFile>(getRawFile()); }
   DITypeRef getType() const { return DITypeRef(getRawType()); }
+  uint32_t getAlignInBits() const { return AlignInBits; }
+  uint32_t getAlignInBytes() const { return getAlignInBits() / CHAR_BIT; }
 
   StringRef getFilename() const {
     if (auto *F = getFile())
@@ -1847,173 +1953,11 @@ public:
   }
 };
 
-/// \brief Global variables.
-///
-/// TODO: Remove DisplayName.  It's always equal to Name.
-class DIGlobalVariable : public DIVariable {
-  friend class LLVMContextImpl;
-  friend class MDNode;
-
-  bool IsLocalToUnit;
-  bool IsDefinition;
-
-  DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
-                   bool IsLocalToUnit, bool IsDefinition,
-                   ArrayRef<Metadata *> Ops)
-      : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops),
-        IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
-  ~DIGlobalVariable() = default;
-
-  static DIGlobalVariable *
-  getImpl(LLVMContext &Context, DIScope *Scope, StringRef Name,
-          StringRef LinkageName, DIFile *File, unsigned Line, DITypeRef Type,
-          bool IsLocalToUnit, bool IsDefinition, Constant *Variable,
-          DIDerivedType *StaticDataMemberDeclaration, StorageType Storage,
-          bool ShouldCreate = true) {
-    return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
-                   getCanonicalMDString(Context, LinkageName), File, Line, Type,
-                   IsLocalToUnit, IsDefinition,
-                   Variable ? ConstantAsMetadata::get(Variable) : nullptr,
-                   StaticDataMemberDeclaration, Storage, ShouldCreate);
-  }
-  static DIGlobalVariable *
-  getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
-          MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
-          bool IsLocalToUnit, bool IsDefinition, Metadata *Variable,
-          Metadata *StaticDataMemberDeclaration, StorageType Storage,
-          bool ShouldCreate = true);
-
-  TempDIGlobalVariable cloneImpl() const {
-    return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
-                        getFile(), getLine(), getType(), isLocalToUnit(),
-                        isDefinition(), getVariable(),
-                        getStaticDataMemberDeclaration());
-  }
-
-public:
-  DEFINE_MDNODE_GET(DIGlobalVariable,
-                    (DIScope * Scope, StringRef Name, StringRef LinkageName,
-                     DIFile *File, unsigned Line, DITypeRef Type,
-                     bool IsLocalToUnit, bool IsDefinition, Constant *Variable,
-                     DIDerivedType *StaticDataMemberDeclaration),
-                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, Variable, StaticDataMemberDeclaration))
-  DEFINE_MDNODE_GET(DIGlobalVariable,
-                    (Metadata * Scope, MDString *Name, MDString *LinkageName,
-                     Metadata *File, unsigned Line, Metadata *Type,
-                     bool IsLocalToUnit, bool IsDefinition, Metadata *Variable,
-                     Metadata *StaticDataMemberDeclaration),
-                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
-                     IsDefinition, Variable, StaticDataMemberDeclaration))
-
-  TempDIGlobalVariable clone() const { return cloneImpl(); }
-
-  bool isLocalToUnit() const { return IsLocalToUnit; }
-  bool isDefinition() const { return IsDefinition; }
-  StringRef getDisplayName() const { return getStringOperand(4); }
-  StringRef getLinkageName() const { return getStringOperand(5); }
-  Constant *getVariable() const {
-    if (auto *C = cast_or_null<ConstantAsMetadata>(getRawVariable()))
-      return dyn_cast<Constant>(C->getValue());
-    return nullptr;
-  }
-  DIDerivedType *getStaticDataMemberDeclaration() const {
-    return cast_or_null<DIDerivedType>(getRawStaticDataMemberDeclaration());
-  }
-
-  MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
-  Metadata *getRawVariable() const { return getOperand(6); }
-  Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(7); }
-
-  static bool classof(const Metadata *MD) {
-    return MD->getMetadataID() == DIGlobalVariableKind;
-  }
-};
-
-/// \brief Local variable.
-///
-/// TODO: Split up flags.
-class DILocalVariable : public DIVariable {
-  friend class LLVMContextImpl;
-  friend class MDNode;
-
-  unsigned Arg : 16;
-  unsigned Flags : 16;
-
-  DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
-                  unsigned Arg, unsigned Flags, ArrayRef<Metadata *> Ops)
-      : DIVariable(C, DILocalVariableKind, Storage, Line, Ops), Arg(Arg),
-        Flags(Flags) {
-    assert(Flags < (1 << 16) && "DILocalVariable: Flags out of range");
-    assert(Arg < (1 << 16) && "DILocalVariable: Arg out of range");
-  }
-  ~DILocalVariable() = default;
-
-  static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
-                                  StringRef Name, DIFile *File, unsigned Line,
-                                  DITypeRef Type, unsigned Arg, unsigned Flags,
-                                  StorageType Storage,
-                                  bool ShouldCreate = true) {
-    return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
-                   Line, Type, Arg, Flags, Storage, ShouldCreate);
-  }
-  static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
-                                  MDString *Name, Metadata *File, unsigned Line,
-                                  Metadata *Type, unsigned Arg, unsigned Flags,
-                                  StorageType Storage,
-                                  bool ShouldCreate = true);
-
-  TempDILocalVariable cloneImpl() const {
-    return getTemporary(getContext(), getScope(), getName(), getFile(),
-                        getLine(), getType(), getArg(), getFlags());
-  }
-
-public:
-  DEFINE_MDNODE_GET(DILocalVariable,
-                    (DILocalScope * Scope, StringRef Name, DIFile *File,
-                     unsigned Line, DITypeRef Type, unsigned Arg,
-                     unsigned Flags),
-                    (Scope, Name, File, Line, Type, Arg, Flags))
-  DEFINE_MDNODE_GET(DILocalVariable,
-                    (Metadata * Scope, MDString *Name, Metadata *File,
-                     unsigned Line, Metadata *Type, unsigned Arg,
-                     unsigned Flags),
-                    (Scope, Name, File, Line, Type, Arg, Flags))
-
-  TempDILocalVariable clone() const { return cloneImpl(); }
-
-  /// \brief Get the local scope for this variable.
-  ///
-  /// Variables must be defined in a local scope.
-  DILocalScope *getScope() const {
-    return cast<DILocalScope>(DIVariable::getScope());
-  }
-
-  bool isParameter() const { return Arg; }
-  unsigned getArg() const { return Arg; }
-  unsigned getFlags() const { return Flags; }
-
-  bool isArtificial() const { return getFlags() & FlagArtificial; }
-  bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
-
-  /// \brief Check that a location is valid for this variable.
-  ///
-  /// Check that \c DL exists, is in the same subprogram, and has the same
-  /// inlined-at location as \c this.  (Otherwise, it's not a valid attachment
-  /// to a \a DbgInfoIntrinsic.)
-  bool isValidLocationForIntrinsic(const DILocation *DL) const {
-    return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram();
-  }
-
-  static bool classof(const Metadata *MD) {
-    return MD->getMetadataID() == DILocalVariableKind;
-  }
-};
-
-/// \brief DWARF expression.
+/// DWARF expression.
 ///
 /// This is (almost) a DWARF expression that modifies the location of a
-/// variable or (or the location of a single piece of a variable).
+/// variable, or the location of a single piece of a variable, or (when using
+/// DW_OP_stack_value) is the constant variable value.
 ///
 /// FIXME: Instead of DW_OP_plus taking an argument, this should use DW_OP_const
 /// and have DW_OP_plus consume the topmost elements on the stack.
@@ -2053,53 +1997,49 @@ public:
     return Elements[I];
   }
 
-  /// \brief Return whether this is a piece of an aggregate variable.
-  bool isBitPiece() const;
-
-  /// \brief Return the offset of this piece in bits.
-  uint64_t getBitPieceOffset() const;
-
-  /// \brief Return the size of this piece in bits.
-  uint64_t getBitPieceSize() const;
+  /// Determine whether this represents a standalone constant value.
+  bool isConstant() const;
 
   typedef ArrayRef<uint64_t>::iterator element_iterator;
   element_iterator elements_begin() const { return getElements().begin(); }
   element_iterator elements_end() const { return getElements().end(); }
 
-  /// \brief A lightweight wrapper around an expression operand.
+  /// A lightweight wrapper around an expression operand.
   ///
   /// TODO: Store arguments directly and change \a DIExpression to store a
   /// range of these.
   class ExprOperand {
-    const uint64_t *Op;
+    const uint64_t *Op = nullptr;
 
   public:
+    ExprOperand() = default;
     explicit ExprOperand(const uint64_t *Op) : Op(Op) {}
 
     const uint64_t *get() const { return Op; }
 
-    /// \brief Get the operand code.
+    /// Get the operand code.
     uint64_t getOp() const { return *Op; }
 
-    /// \brief Get an argument to the operand.
+    /// Get an argument to the operand.
     ///
     /// Never returns the operand itself.
     uint64_t getArg(unsigned I) const { return Op[I + 1]; }
 
     unsigned getNumArgs() const { return getSize() - 1; }
 
-    /// \brief Return the size of the operand.
+    /// Return the size of the operand.
     ///
     /// Return the number of elements in the operand (1 + args).
     unsigned getSize() const;
   };
 
-  /// \brief An iterator for expression operands.
+  /// An iterator for expression operands.
   class expr_op_iterator
       : public std::iterator<std::input_iterator_tag, ExprOperand> {
     ExprOperand Op;
 
   public:
+    expr_op_iterator() = default;
     explicit expr_op_iterator(element_iterator I) : Op(I) {}
 
     element_iterator getBase() const { return Op.get(); }
@@ -2116,7 +2056,7 @@ public:
       return T;
     }
 
-    /// \brief Get the next iterator.
+    /// Get the next iterator.
     ///
     /// \a std::next() doesn't work because this is technically an
     /// input_iterator, but it's a perfectly valid operation.  This is an
@@ -2134,7 +2074,7 @@ public:
     void increment() { Op = ExprOperand(getBase() + Op.getSize()); }
   };
 
-  /// \brief Visit the elements via ExprOperand wrappers.
+  /// Visit the elements via ExprOperand wrappers.
   ///
   /// These range iterators visit elements through \a ExprOperand wrappers.
   /// This is not guaranteed to be a valid range unless \a isValid() gives \c
@@ -2155,6 +2095,189 @@ public:
   static bool classof(const Metadata *MD) {
     return MD->getMetadataID() == DIExpressionKind;
   }
+
+  /// Is the first element a DW_OP_deref?.
+  bool startsWithDeref() const {
+    return getNumElements() > 0 && getElement(0) == dwarf::DW_OP_deref;
+  }
+
+  /// Holds the characteristics of one fragment of a larger variable.
+  struct FragmentInfo {
+    uint64_t SizeInBits;
+    uint64_t OffsetInBits;
+  };
+
+  /// Retrieve the details of this fragment expression.
+  static Optional<FragmentInfo> getFragmentInfo(expr_op_iterator Start,
+						expr_op_iterator End);
+
+  /// Retrieve the details of this fragment expression.
+  Optional<FragmentInfo> getFragmentInfo() const {
+    return getFragmentInfo(expr_op_begin(), expr_op_end());
+  }
+
+  /// Return whether this is a piece of an aggregate variable.
+  bool isFragment() const { return getFragmentInfo().hasValue(); }
+};
+
+/// Global variables.
+///
+/// TODO: Remove DisplayName.  It's always equal to Name.
+class DIGlobalVariable : public DIVariable {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  bool IsLocalToUnit;
+  bool IsDefinition;
+
+  DIGlobalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
+                   bool IsLocalToUnit, bool IsDefinition, uint32_t AlignInBits,
+                   ArrayRef<Metadata *> Ops)
+      : DIVariable(C, DIGlobalVariableKind, Storage, Line, Ops, AlignInBits),
+        IsLocalToUnit(IsLocalToUnit), IsDefinition(IsDefinition) {}
+  ~DIGlobalVariable() = default;
+
+  static DIGlobalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
+                                   StringRef Name, StringRef LinkageName,
+                                   DIFile *File, unsigned Line, DITypeRef Type,
+                                   bool IsLocalToUnit, bool IsDefinition,
+                                   DIDerivedType *StaticDataMemberDeclaration,
+                                   uint32_t AlignInBits, StorageType Storage,
+                                   bool ShouldCreate = true) {
+    return getImpl(Context, Scope, getCanonicalMDString(Context, Name),
+                   getCanonicalMDString(Context, LinkageName), File, Line, Type,
+                   IsLocalToUnit, IsDefinition, StaticDataMemberDeclaration,
+                   AlignInBits, Storage, ShouldCreate);
+  }
+  static DIGlobalVariable *
+  getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
+          MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
+          bool IsLocalToUnit, bool IsDefinition,
+          Metadata *StaticDataMemberDeclaration, uint32_t AlignInBits,
+          StorageType Storage, bool ShouldCreate = true);
+
+  TempDIGlobalVariable cloneImpl() const {
+    return getTemporary(getContext(), getScope(), getName(), getLinkageName(),
+                        getFile(), getLine(), getType(), isLocalToUnit(),
+                        isDefinition(), getStaticDataMemberDeclaration(),
+                        getAlignInBits());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DIGlobalVariable,
+                    (DIScope * Scope, StringRef Name, StringRef LinkageName,
+                     DIFile *File, unsigned Line, DITypeRef Type,
+                     bool IsLocalToUnit, bool IsDefinition,
+                     DIDerivedType *StaticDataMemberDeclaration,
+                     uint32_t AlignInBits),
+                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
+                     IsDefinition, StaticDataMemberDeclaration, AlignInBits))
+  DEFINE_MDNODE_GET(DIGlobalVariable,
+                    (Metadata * Scope, MDString *Name, MDString *LinkageName,
+                     Metadata *File, unsigned Line, Metadata *Type,
+                     bool IsLocalToUnit, bool IsDefinition,
+                     Metadata *StaticDataMemberDeclaration,
+                     uint32_t AlignInBits),
+                    (Scope, Name, LinkageName, File, Line, Type, IsLocalToUnit,
+                     IsDefinition, StaticDataMemberDeclaration, AlignInBits))
+
+  TempDIGlobalVariable clone() const { return cloneImpl(); }
+
+  bool isLocalToUnit() const { return IsLocalToUnit; }
+  bool isDefinition() const { return IsDefinition; }
+  StringRef getDisplayName() const { return getStringOperand(4); }
+  StringRef getLinkageName() const { return getStringOperand(5); }
+  DIDerivedType *getStaticDataMemberDeclaration() const {
+    return cast_or_null<DIDerivedType>(getRawStaticDataMemberDeclaration());
+  }
+
+  MDString *getRawLinkageName() const { return getOperandAs<MDString>(5); }
+  Metadata *getRawStaticDataMemberDeclaration() const { return getOperand(6); }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DIGlobalVariableKind;
+  }
+};
+
+/// Local variable.
+///
+/// TODO: Split up flags.
+class DILocalVariable : public DIVariable {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  unsigned Arg : 16;
+  DIFlags Flags;
+
+  DILocalVariable(LLVMContext &C, StorageType Storage, unsigned Line,
+                  unsigned Arg, DIFlags Flags, uint32_t AlignInBits,
+                  ArrayRef<Metadata *> Ops)
+      : DIVariable(C, DILocalVariableKind, Storage, Line, Ops, AlignInBits),
+        Arg(Arg), Flags(Flags) {
+    assert(Arg < (1 << 16) && "DILocalVariable: Arg out of range");
+  }
+  ~DILocalVariable() = default;
+
+  static DILocalVariable *getImpl(LLVMContext &Context, DIScope *Scope,
+                                  StringRef Name, DIFile *File, unsigned Line,
+                                  DITypeRef Type, unsigned Arg, DIFlags Flags,
+                                  uint32_t AlignInBits, StorageType Storage,
+                                  bool ShouldCreate = true) {
+    return getImpl(Context, Scope, getCanonicalMDString(Context, Name), File,
+                   Line, Type, Arg, Flags, AlignInBits, Storage, ShouldCreate);
+  }
+  static DILocalVariable *getImpl(LLVMContext &Context, Metadata *Scope,
+                                  MDString *Name, Metadata *File, unsigned Line,
+                                  Metadata *Type, unsigned Arg, DIFlags Flags,
+                                  uint32_t AlignInBits, StorageType Storage,
+                                  bool ShouldCreate = true);
+
+  TempDILocalVariable cloneImpl() const {
+    return getTemporary(getContext(), getScope(), getName(), getFile(),
+                        getLine(), getType(), getArg(), getFlags(),
+                        getAlignInBits());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DILocalVariable,
+                    (DILocalScope * Scope, StringRef Name, DIFile *File,
+                     unsigned Line, DITypeRef Type, unsigned Arg,
+                     DIFlags Flags, uint32_t AlignInBits),
+                    (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+  DEFINE_MDNODE_GET(DILocalVariable,
+                    (Metadata * Scope, MDString *Name, Metadata *File,
+                     unsigned Line, Metadata *Type, unsigned Arg,
+                     DIFlags Flags, uint32_t AlignInBits),
+                    (Scope, Name, File, Line, Type, Arg, Flags, AlignInBits))
+
+  TempDILocalVariable clone() const { return cloneImpl(); }
+
+  /// Get the local scope for this variable.
+  ///
+  /// Variables must be defined in a local scope.
+  DILocalScope *getScope() const {
+    return cast<DILocalScope>(DIVariable::getScope());
+  }
+
+  bool isParameter() const { return Arg; }
+  unsigned getArg() const { return Arg; }
+  DIFlags getFlags() const { return Flags; }
+
+  bool isArtificial() const { return getFlags() & FlagArtificial; }
+  bool isObjectPointer() const { return getFlags() & FlagObjectPointer; }
+
+  /// Check that a location is valid for this variable.
+  ///
+  /// Check that \c DL exists, is in the same subprogram, and has the same
+  /// inlined-at location as \c this.  (Otherwise, it's not a valid attachment
+  /// to a \a DbgInfoIntrinsic.)
+  bool isValidLocationForIntrinsic(const DILocation *DL) const {
+    return DL && getScope()->getSubprogram() == DL->getScope()->getSubprogram();
+  }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DILocalVariableKind;
+  }
 };
 
 class DIObjCProperty : public DINode {
@@ -2238,7 +2361,7 @@ public:
   }
 };
 
-/// \brief An imported module (C++ using directive or similar).
+/// An imported module (C++ using directive or similar).
 class DIImportedEntity : public DINode {
   friend class LLVMContextImpl;
   friend class MDNode;
@@ -2295,7 +2418,46 @@ public:
   }
 };
 
-/// \brief Macro Info DWARF-like metadata node.
+/// A pair of DIGlobalVariable and DIExpression.
+class DIGlobalVariableExpression : public MDNode {
+  friend class LLVMContextImpl;
+  friend class MDNode;
+
+  DIGlobalVariableExpression(LLVMContext &C, StorageType Storage,
+                             ArrayRef<Metadata *> Ops)
+      : MDNode(C, DIGlobalVariableExpressionKind, Storage, Ops) {}
+  ~DIGlobalVariableExpression() = default;
+
+  static DIGlobalVariableExpression *
+  getImpl(LLVMContext &Context, Metadata *Variable, Metadata *Expression,
+          StorageType Storage, bool ShouldCreate = true);
+
+  TempDIGlobalVariableExpression cloneImpl() const {
+    return getTemporary(getContext(), getVariable(), getExpression());
+  }
+
+public:
+  DEFINE_MDNODE_GET(DIGlobalVariableExpression,
+                    (Metadata * Variable, Metadata *Expression),
+                    (Variable, Expression))
+
+  TempDIGlobalVariableExpression clone() const { return cloneImpl(); }
+
+  Metadata *getRawVariable() const { return getOperand(0); }
+  DIGlobalVariable *getVariable() const {
+    return cast_or_null<DIGlobalVariable>(getRawVariable());
+  }
+  Metadata *getRawExpression() const { return getOperand(1); }
+  DIExpression *getExpression() const {
+    return cast_or_null<DIExpression>(getRawExpression());
+  }
+
+  static bool classof(const Metadata *MD) {
+    return MD->getMetadataID() == DIGlobalVariableExpressionKind;
+  }
+};
+
+/// Macro Info DWARF-like metadata node.
 ///
 /// A metadata node with a DWARF macro info (i.e., a constant named
 /// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h).  Called \a DIMacroNode
@@ -2433,7 +2595,7 @@ public:
   void replaceElements(DIMacroNodeArray Elements) {
 #ifndef NDEBUG
     for (DIMacroNode *Op : getElements())
-      assert(std::find(Elements->op_begin(), Elements->op_end(), Op) &&
+      assert(is_contained(Elements->operands(), Op) &&
              "Lost a macro node during macro node list replacement");
 #endif
     replaceOperandWith(1, Elements.get());
@@ -2460,4 +2622,4 @@ public:
 #undef DEFINE_MDNODE_GET_UNPACK
 #undef DEFINE_MDNODE_GET
 
-#endif
+#endif // LLVM_IR_DEBUGINFOMETADATA_H
diff --git a/contrib/llvm/include/llvm/IR/DebugLoc.h b/contrib/llvm/include/llvm/IR/DebugLoc.h
index 8ea5875e1f85..202be3da14da 100644
--- a/contrib/llvm/include/llvm/IR/DebugLoc.h
+++ b/contrib/llvm/include/llvm/IR/DebugLoc.h
@@ -35,17 +35,7 @@ namespace llvm {
     TrackingMDNodeRef Loc;
 
   public:
-    DebugLoc() {}
-    DebugLoc(DebugLoc &&X) : Loc(std::move(X.Loc)) {}
-    DebugLoc(const DebugLoc &X) : Loc(X.Loc) {}
-    DebugLoc &operator=(DebugLoc &&X) {
-      Loc = std::move(X.Loc);
-      return *this;
-    }
-    DebugLoc &operator=(const DebugLoc &X) {
-      Loc = X.Loc;
-      return *this;
-    }
+    DebugLoc() = default;
 
     /// \brief Construct from an \a DILocation.
     DebugLoc(const DILocation *L);
diff --git a/contrib/llvm/include/llvm/IR/DerivedTypes.h b/contrib/llvm/include/llvm/IR/DerivedTypes.h
index efd0d07366ee..05e99157b8dc 100644
--- a/contrib/llvm/include/llvm/IR/DerivedTypes.h
+++ b/contrib/llvm/include/llvm/IR/DerivedTypes.h
@@ -18,17 +18,19 @@
 #ifndef LLVM_IR_DERIVEDTYPES_H
 #define LLVM_IR_DERIVEDTYPES_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <cstdint>
 
 namespace llvm {
 
 class Value;
 class APInt;
 class LLVMContext;
-template<typename T> class ArrayRef;
-class StringRef;
 
 /// Class to represent integer types. Note that this class is also used to
 /// represent the built-in integer types: Int1Ty, Int8Ty, Int16Ty, Int32Ty and
@@ -46,9 +48,10 @@ public:
   /// This enum is just used to hold constants we need for IntegerType.
   enum {
     MIN_INT_BITS = 1,        ///< Minimum number of bits that can be specified
-    MAX_INT_BITS = (1<<23)-1 ///< Maximum number of bits that can be specified
+    MAX_INT_BITS = (1<<24)-1 ///< Maximum number of bits that can be specified
       ///< Note that bit width is stored in the Type classes SubclassData field
-      ///< which has 23 bits. This yields a maximum bit width of 8,388,607 bits.
+      ///< which has 24 bits. This yields a maximum bit width of 16,777,215
+      ///< bits.
   };
 
   /// This static method is the primary way of constructing an IntegerType.
@@ -97,11 +100,12 @@ unsigned Type::getIntegerBitWidth() const {
 /// Class to represent function types
 ///
 class FunctionType : public Type {
-  FunctionType(const FunctionType &) = delete;
-  const FunctionType &operator=(const FunctionType &) = delete;
   FunctionType(Type *Result, ArrayRef<Type*> Params, bool IsVarArgs);
 
 public:
+  FunctionType(const FunctionType &) = delete;
+  FunctionType &operator=(const FunctionType &) = delete;
+
   /// This static method is the primary way of constructing a FunctionType.
   static FunctionType *get(Type *Result,
                            ArrayRef<Type*> Params, bool isVarArg);
@@ -137,7 +141,7 @@ public:
     return T->getTypeID() == FunctionTyID;
   }
 };
-static_assert(AlignOf<FunctionType>::Alignment >= AlignOf<Type *>::Alignment,
+static_assert(alignof(FunctionType) >= alignof(Type *),
               "Alignment sufficient for objects appended to FunctionType");
 
 bool Type::isFunctionVarArg() const {
@@ -152,7 +156,7 @@ unsigned Type::getFunctionNumParams() const {
   return cast<FunctionType>(this)->getNumParams();
 }
 
-/// Common super class of ArrayType, StructType, PointerType and VectorType.
+/// Common super class of ArrayType, StructType and VectorType.
 class CompositeType : public Type {
 protected:
   explicit CompositeType(LLVMContext &C, TypeID tid) : Type(C, tid) {}
@@ -168,7 +172,6 @@ public:
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID ||
            T->getTypeID() == StructTyID ||
-           T->getTypeID() == PointerTyID ||
            T->getTypeID() == VectorTyID;
   }
 };
@@ -194,10 +197,9 @@ public:
 /// generator for a target expects).
 ///
 class StructType : public CompositeType {
-  StructType(const StructType &) = delete;
-  const StructType &operator=(const StructType &) = delete;
   StructType(LLVMContext &C)
     : CompositeType(C, StructTyID), SymbolTableEntry(nullptr) {}
+
   enum {
     /// This is the contents of the SubClassData field.
     SCDB_HasBody = 1,
@@ -213,6 +215,9 @@ class StructType : public CompositeType {
   void *SymbolTableEntry;
 
 public:
+  StructType(const StructType &) = delete;
+  StructType &operator=(const StructType &) = delete;
+
   /// This creates an identified struct.
   static StructType *create(LLVMContext &Context, StringRef Name);
   static StructType *create(LLVMContext &Context);
@@ -305,52 +310,51 @@ Type *Type::getStructElementType(unsigned N) const {
   return cast<StructType>(this)->getElementType(N);
 }
 
-/// This is the superclass of the array, pointer and vector type classes.
-/// All of these represent "arrays" in memory. The array type represents a
-/// specifically sized array, pointer types are unsized/unknown size arrays,
-/// vector types represent specifically sized arrays that allow for use of SIMD
-/// instructions. SequentialType holds the common features of all, which stem
-/// from the fact that all three lay their components out in memory identically.
+/// This is the superclass of the array and vector type classes. Both of these
+/// represent "arrays" in memory. The array type represents a specifically sized
+/// array, and the vector type represents a specifically sized array that allows
+/// for use of SIMD instructions. SequentialType holds the common features of
+/// both, which stem from the fact that both lay their components out in memory
+/// identically.
 class SequentialType : public CompositeType {
   Type *ContainedType;               ///< Storage for the single contained type.
-  SequentialType(const SequentialType &) = delete;
-  const SequentialType &operator=(const SequentialType &) = delete;
+  uint64_t NumElements;
 
 protected:
-  SequentialType(TypeID TID, Type *ElType)
-    : CompositeType(ElType->getContext(), TID), ContainedType(ElType) {
+  SequentialType(TypeID TID, Type *ElType, uint64_t NumElements)
+    : CompositeType(ElType->getContext(), TID), ContainedType(ElType),
+      NumElements(NumElements) {
     ContainedTys = &ContainedType;
     NumContainedTys = 1;
   }
 
 public:
-  Type *getElementType() const { return getSequentialElementType(); }
+  SequentialType(const SequentialType &) = delete;
+  SequentialType &operator=(const SequentialType &) = delete;
+
+  uint64_t getNumElements() const { return NumElements; }
+  Type *getElementType() const { return ContainedType; }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const Type *T) {
-    return T->getTypeID() == ArrayTyID ||
-           T->getTypeID() == PointerTyID ||
-           T->getTypeID() == VectorTyID;
+    return T->getTypeID() == ArrayTyID || T->getTypeID() == VectorTyID;
   }
 };
 
 /// Class to represent array types.
 class ArrayType : public SequentialType {
-  uint64_t NumElements;
-
-  ArrayType(const ArrayType &) = delete;
-  const ArrayType &operator=(const ArrayType &) = delete;
   ArrayType(Type *ElType, uint64_t NumEl);
 
 public:
+  ArrayType(const ArrayType &) = delete;
+  ArrayType &operator=(const ArrayType &) = delete;
+
   /// This static method is the primary way to construct an ArrayType
   static ArrayType *get(Type *ElementType, uint64_t NumElements);
 
   /// Return true if the specified type is valid as a element type.
   static bool isValidElementType(Type *ElemTy);
 
-  uint64_t getNumElements() const { return NumElements; }
-
   /// Methods for support type inquiry through isa, cast, and dyn_cast.
   static inline bool classof(const Type *T) {
     return T->getTypeID() == ArrayTyID;
@@ -363,13 +367,12 @@ uint64_t Type::getArrayNumElements() const {
 
 /// Class to represent vector types.
 class VectorType : public SequentialType {
-  unsigned NumElements;
-
-  VectorType(const VectorType &) = delete;
-  const VectorType &operator=(const VectorType &) = delete;
   VectorType(Type *ElType, unsigned NumEl);
 
 public:
+  VectorType(const VectorType &) = delete;
+  VectorType &operator=(const VectorType &) = delete;
+
   /// This static method is the primary way to construct an VectorType.
   static VectorType *get(Type *ElementType, unsigned NumElements);
 
@@ -420,13 +423,10 @@ public:
   /// Return true if the specified type is valid as a element type.
   static bool isValidElementType(Type *ElemTy);
 
-  /// Return the number of elements in the Vector type.
-  unsigned getNumElements() const { return NumElements; }
-
   /// Return the number of bits in the Vector type.
   /// Returns zero when the vector is a vector of pointers.
   unsigned getBitWidth() const {
-    return NumElements * getElementType()->getPrimitiveSizeInBits();
+    return getNumElements() * getElementType()->getPrimitiveSizeInBits();
   }
 
   /// Methods for support type inquiry through isa, cast, and dyn_cast.
@@ -440,12 +440,15 @@ unsigned Type::getVectorNumElements() const {
 }
 
 /// Class to represent pointers.
-class PointerType : public SequentialType {
-  PointerType(const PointerType &) = delete;
-  const PointerType &operator=(const PointerType &) = delete;
+class PointerType : public Type {
   explicit PointerType(Type *ElType, unsigned AddrSpace);
 
+  Type *PointeeTy;
+
 public:
+  PointerType(const PointerType &) = delete;
+  PointerType &operator=(const PointerType &) = delete;
+
   /// This constructs a pointer to an object of the specified type in a numbered
   /// address space.
   static PointerType *get(Type *ElementType, unsigned AddressSpace);
@@ -456,6 +459,8 @@ public:
     return PointerType::get(ElementType, 0);
   }
 
+  Type *getElementType() const { return PointeeTy; }
+
   /// Return true if the specified type is valid as a element type.
   static bool isValidElementType(Type *ElemTy);
 
@@ -475,6 +480,6 @@ unsigned Type::getPointerAddressSpace() const {
   return cast<PointerType>(getScalarType())->getAddressSpace();
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_DERIVEDTYPES_H
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
index 1c78684da64d..a93c180df1b5 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -15,13 +15,19 @@
 #ifndef LLVM_IR_DIAGNOSTICINFO_H
 #define LLVM_IR_DIAGNOSTICINFO_H
 
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/Support/CBindingWrapping.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm-c/Types.h"
 #include <functional>
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
 #include <string>
 
 namespace llvm {
@@ -47,13 +53,13 @@ enum DiagnosticSeverity : char {
 /// \brief Defines the different supported kind of a diagnostic.
 /// This enum should be extended with a new ID for each added concrete subclass.
 enum DiagnosticKind {
-  DK_Bitcode,
   DK_InlineAsm,
   DK_ResourceLimit,
   DK_StackSize,
   DK_Linker,
   DK_DebugMetadataVersion,
   DK_DebugMetadataInvalid,
+  DK_ISelFallback,
   DK_SampleProfile,
   DK_OptimizationRemark,
   DK_OptimizationRemarkMissed,
@@ -95,7 +101,7 @@ public:
   DiagnosticInfo(/* DiagnosticKind */ int Kind, DiagnosticSeverity Severity)
       : Kind(Kind), Severity(Severity) {}
 
-  virtual ~DiagnosticInfo() {}
+  virtual ~DiagnosticInfo() = default;
 
   /* DiagnosticKind */ int getKind() const { return Kind; }
   DiagnosticSeverity getSeverity() const { return Severity; }
@@ -272,7 +278,6 @@ public:
   }
 };
 
-
 /// Diagnostic information for the sample profiler.
 class DiagnosticInfoSampleProfile : public DiagnosticInfo {
 public:
@@ -374,6 +379,65 @@ private:
 /// Common features for diagnostics dealing with optimization remarks.
 class DiagnosticInfoOptimizationBase : public DiagnosticInfoWithDebugLocBase {
 public:
+  /// \brief Used to set IsVerbose via the stream interface.
+  struct setIsVerbose {};
+
+  /// \brief When an instance of this is inserted into the stream, the arguments
+  /// following will not appear in the remark printed in the compiler output
+  /// (-Rpass) but only in the optimization record file
+  /// (-fsave-optimization-record).
+  struct setExtraArgs {};
+
+  /// \brief Used in the streaming interface as the general argument type.  It
+  /// internally converts everything into a key-value pair.
+  struct Argument {
+    StringRef Key;
+    std::string Val;
+    // If set, the debug location corresponding to the value.
+    DebugLoc DLoc;
+
+    explicit Argument(StringRef Str = "") : Key("String"), Val(Str) {}
+    Argument(StringRef Key, Value *V);
+    Argument(StringRef Key, Type *T);
+    Argument(StringRef Key, int N);
+    Argument(StringRef Key, unsigned N);
+    Argument(StringRef Key, bool B) : Key(Key), Val(B ? "true" : "false") {}
+  };
+
+  /// \p PassName is the name of the pass emitting this diagnostic. \p
+  /// RemarkName is a textual identifier for the remark.  \p Fn is the function
+  /// where the diagnostic is being emitted. \p DLoc is the location information
+  /// to use in the diagnostic. If line table information is available, the
+  /// diagnostic will include the source code location. \p CodeRegion is IR
+  /// value (currently basic block) that the optimization operates on.  This is
+  /// currently used to provide run-time hotness information with PGO.
+  DiagnosticInfoOptimizationBase(enum DiagnosticKind Kind,
+                                 enum DiagnosticSeverity Severity,
+                                 const char *PassName, StringRef RemarkName,
+                                 const Function &Fn, const DebugLoc &DLoc,
+                                 Value *CodeRegion = nullptr)
+      : DiagnosticInfoWithDebugLocBase(Kind, Severity, Fn, DLoc),
+        PassName(PassName), RemarkName(RemarkName), CodeRegion(CodeRegion) {}
+
+  /// \brief This is ctor variant allows a pass to build an optimization remark
+  /// from an existing remark.
+  ///
+  /// This is useful when a transformation pass (e.g LV) wants to emit a remark
+  /// (\p Orig) generated by one of its analyses (e.g. LAA) as its own analysis
+  /// remark.  The string \p Prepend will be emitted before the original
+  /// message.
+  DiagnosticInfoOptimizationBase(const char *PassName, StringRef Prepend,
+                                 const DiagnosticInfoOptimizationBase &Orig)
+      : DiagnosticInfoWithDebugLocBase((DiagnosticKind)Orig.getKind(),
+                                       Orig.getSeverity(), Orig.getFunction(),
+                                       Orig.getDebugLoc()),
+        PassName(PassName), RemarkName(Orig.RemarkName),
+        CodeRegion(Orig.getCodeRegion()) {
+    *this << Prepend;
+    std::copy(Orig.Args.begin(), Orig.Args.end(), std::back_inserter(Args));
+  }
+
+  /// Legacy interface.
   /// \p PassName is the name of the pass emitting this diagnostic.
   /// \p Fn is the function where the diagnostic is being emitted. \p DLoc is
   /// the location information to use in the diagnostic. If line table
@@ -387,7 +451,14 @@ public:
                                  const DebugLoc &DLoc, const Twine &Msg,
                                  Optional<uint64_t> Hotness = None)
       : DiagnosticInfoWithDebugLocBase(Kind, Severity, Fn, DLoc),
-        PassName(PassName), Msg(Msg), Hotness(Hotness) {}
+        PassName(PassName), Hotness(Hotness) {
+    Args.push_back(Argument(Msg.str()));
+  }
+
+  DiagnosticInfoOptimizationBase &operator<<(StringRef S);
+  DiagnosticInfoOptimizationBase &operator<<(Argument A);
+  DiagnosticInfoOptimizationBase &operator<<(setIsVerbose V);
+  DiagnosticInfoOptimizationBase &operator<<(setExtraArgs EA);
 
   /// \see DiagnosticInfo::print.
   void print(DiagnosticPrinter &DP) const override;
@@ -399,8 +470,14 @@ public:
   /// in BackendConsumer::OptimizationRemarkHandler).
   virtual bool isEnabled() const = 0;
 
-  const char *getPassName() const { return PassName; }
-  const Twine &getMsg() const { return Msg; }
+  StringRef getPassName() const { return PassName; }
+  std::string getMsg() const;
+  Optional<uint64_t> getHotness() const { return Hotness; }
+  void setHotness(Optional<uint64_t> H) { Hotness = H; }
+
+  Value *getCodeRegion() const { return CodeRegion; }
+
+  bool isVerbose() const { return IsVerbose; }
 
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() >= DK_FirstRemark &&
@@ -413,16 +490,34 @@ private:
   /// be emitted.
   const char *PassName;
 
-  /// Message to report.
-  const Twine &Msg;
+  /// Textual identifier for the remark.  Can be used by external tools reading
+  /// the YAML output file for optimization remarks to identify the remark.
+  StringRef RemarkName;
 
   /// If profile information is available, this is the number of times the
   /// corresponding code was executed in a profile instrumentation run.
   Optional<uint64_t> Hotness;
+
+  /// The IR value (currently basic block) that the optimization operates on.
+  /// This is currently used to provide run-time hotness information with PGO.
+  Value *CodeRegion;
+
+  /// Arguments collected via the streaming interface.
+  SmallVector<Argument, 4> Args;
+
+  /// The remark is expected to be noisy.
+  bool IsVerbose = false;
+
+  /// \brief If positive, the index of the first argument that only appear in
+  /// the optimization records and not in the remark printed in the compiler
+  /// output.
+  int FirstExtraArgIndex = -1;
+
+  friend struct yaml::MappingTraits<DiagnosticInfoOptimizationBase *>;
 };
 
 /// Diagnostic information for applied optimization remarks.
-class DiagnosticInfoOptimizationRemark : public DiagnosticInfoOptimizationBase {
+class OptimizationRemark : public DiagnosticInfoOptimizationBase {
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If
   /// this name matches the regular expression given in -Rpass=, then the
@@ -432,10 +527,24 @@ public:
   /// will include the source code location. \p Msg is the message to show.
   /// Note that this class does not copy this message, so this reference
   /// must be valid for the whole life time of the diagnostic.
-  DiagnosticInfoOptimizationRemark(const char *PassName, const Function &Fn,
-                                   const DebugLoc &DLoc, const Twine &Msg)
+  OptimizationRemark(const char *PassName, const Function &Fn,
+                     const DebugLoc &DLoc, const Twine &Msg,
+                     Optional<uint64_t> Hotness = None)
       : DiagnosticInfoOptimizationBase(DK_OptimizationRemark, DS_Remark,
-                                       PassName, Fn, DLoc, Msg) {}
+                                       PassName, Fn, DLoc, Msg, Hotness) {}
+
+  /// \p PassName is the name of the pass emitting this diagnostic. If this name
+  /// matches the regular expression given in -Rpass=, then the diagnostic will
+  /// be emitted.  \p RemarkName is a textual identifier for the remark.  \p
+  /// DLoc is the debug location and \p CodeRegion is the region that the
+  /// optimization operates on (currently on block is supported).
+  OptimizationRemark(const char *PassName, StringRef RemarkName,
+                     const DebugLoc &DLoc, Value *CodeRegion);
+
+  /// Same as above but the debug location and code region is derived from \p
+  /// Instr.
+  OptimizationRemark(const char *PassName, StringRef RemarkName,
+                     Instruction *Inst);
 
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_OptimizationRemark;
@@ -446,8 +555,7 @@ public:
 };
 
 /// Diagnostic information for missed-optimization remarks.
-class DiagnosticInfoOptimizationRemarkMissed
-    : public DiagnosticInfoOptimizationBase {
+class OptimizationRemarkMissed : public DiagnosticInfoOptimizationBase {
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If
   /// this name matches the regular expression given in -Rpass-missed=, then the
@@ -457,13 +565,25 @@ public:
   /// will include the source code location. \p Msg is the message to show.
   /// Note that this class does not copy this message, so this reference
   /// must be valid for the whole life time of the diagnostic.
-  DiagnosticInfoOptimizationRemarkMissed(const char *PassName,
-                                         const Function &Fn,
-                                         const DebugLoc &DLoc, const Twine &Msg,
-                                         Optional<uint64_t> Hotness = None)
+  OptimizationRemarkMissed(const char *PassName, const Function &Fn,
+                           const DebugLoc &DLoc, const Twine &Msg,
+                           Optional<uint64_t> Hotness = None)
       : DiagnosticInfoOptimizationBase(DK_OptimizationRemarkMissed, DS_Remark,
                                        PassName, Fn, DLoc, Msg, Hotness) {}
 
+  /// \p PassName is the name of the pass emitting this diagnostic. If this name
+  /// matches the regular expression given in -Rpass-missed=, then the
+  /// diagnostic will be emitted.  \p RemarkName is a textual identifier for the
+  /// remark.  \p DLoc is the debug location and \p CodeRegion is the region
+  /// that the optimization operates on (currently on block is supported).
+  OptimizationRemarkMissed(const char *PassName, StringRef RemarkName,
+                           const DebugLoc &DLoc, Value *CodeRegion);
+
+  /// \brief Same as above but \p Inst is used to derive code region and debug
+  /// location.
+  OptimizationRemarkMissed(const char *PassName, StringRef RemarkName,
+                           Instruction *Inst);
+
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_OptimizationRemarkMissed;
   }
@@ -473,8 +593,7 @@ public:
 };
 
 /// Diagnostic information for optimization analysis remarks.
-class DiagnosticInfoOptimizationRemarkAnalysis
-    : public DiagnosticInfoOptimizationBase {
+class OptimizationRemarkAnalysis : public DiagnosticInfoOptimizationBase {
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If
   /// this name matches the regular expression given in -Rpass-analysis=, then
@@ -484,12 +603,35 @@ public:
   /// include the source code location. \p Msg is the message to show. Note that
   /// this class does not copy this message, so this reference must be valid for
   /// the whole life time of the diagnostic.
-  DiagnosticInfoOptimizationRemarkAnalysis(const char *PassName,
-                                           const Function &Fn,
-                                           const DebugLoc &DLoc,
-                                           const Twine &Msg)
+  OptimizationRemarkAnalysis(const char *PassName, const Function &Fn,
+                             const DebugLoc &DLoc, const Twine &Msg,
+                             Optional<uint64_t> Hotness = None)
       : DiagnosticInfoOptimizationBase(DK_OptimizationRemarkAnalysis, DS_Remark,
-                                       PassName, Fn, DLoc, Msg) {}
+                                       PassName, Fn, DLoc, Msg, Hotness) {}
+
+  /// \p PassName is the name of the pass emitting this diagnostic. If this name
+  /// matches the regular expression given in -Rpass-analysis=, then the
+  /// diagnostic will be emitted.  \p RemarkName is a textual identifier for the
+  /// remark.  \p DLoc is the debug location and \p CodeRegion is the region
+  /// that the optimization operates on (currently on block is supported).
+  OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+                             const DebugLoc &DLoc, Value *CodeRegion);
+
+  /// \brief This is ctor variant allows a pass to build an optimization remark
+  /// from an existing remark.
+  ///
+  /// This is useful when a transformation pass (e.g LV) wants to emit a remark
+  /// (\p Orig) generated by one of its analyses (e.g. LAA) as its own analysis
+  /// remark.  The string \p Prepend will be emitted before the original
+  /// message.
+  OptimizationRemarkAnalysis(const char *PassName, StringRef Prepend,
+                             const OptimizationRemarkAnalysis &Orig)
+      : DiagnosticInfoOptimizationBase(PassName, Prepend, Orig) {}
+
+  /// \brief Same as above but \p Inst is used to derive code region and debug
+  /// location.
+  OptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+                             Instruction *Inst);
 
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_OptimizationRemarkAnalysis;
@@ -503,19 +645,20 @@ public:
   bool shouldAlwaysPrint() const { return getPassName() == AlwaysPrint; }
 
 protected:
-  DiagnosticInfoOptimizationRemarkAnalysis(enum DiagnosticKind Kind,
-                                           const char *PassName,
-                                           const Function &Fn,
-                                           const DebugLoc &DLoc,
-                                           const Twine &Msg)
-      : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc,
-                                       Msg) {}
+  OptimizationRemarkAnalysis(enum DiagnosticKind Kind, const char *PassName,
+                             const Function &Fn, const DebugLoc &DLoc,
+                             const Twine &Msg, Optional<uint64_t> Hotness)
+      : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, Fn, DLoc, Msg,
+                                       Hotness) {}
+
+  OptimizationRemarkAnalysis(enum DiagnosticKind Kind, const char *PassName,
+                             StringRef RemarkName, const DebugLoc &DLoc,
+                             Value *CodeRegion);
 };
 
 /// Diagnostic information for optimization analysis remarks related to
 /// floating-point non-commutativity.
-class DiagnosticInfoOptimizationRemarkAnalysisFPCommute
-    : public DiagnosticInfoOptimizationRemarkAnalysis {
+class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis {
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If
   /// this name matches the regular expression given in -Rpass-analysis=, then
@@ -527,12 +670,24 @@ public:
   /// floating-point non-commutativity. Note that this class does not copy this
   /// message, so this reference must be valid for the whole life time of the
   /// diagnostic.
-  DiagnosticInfoOptimizationRemarkAnalysisFPCommute(const char *PassName,
-                                                    const Function &Fn,
-                                                    const DebugLoc &DLoc,
-                                                    const Twine &Msg)
-      : DiagnosticInfoOptimizationRemarkAnalysis(
-            DK_OptimizationRemarkAnalysisFPCommute, PassName, Fn, DLoc, Msg) {}
+  OptimizationRemarkAnalysisFPCommute(const char *PassName, const Function &Fn,
+                                      const DebugLoc &DLoc, const Twine &Msg,
+                                      Optional<uint64_t> Hotness = None)
+      : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisFPCommute,
+                                   PassName, Fn, DLoc, Msg, Hotness) {}
+
+  /// \p PassName is the name of the pass emitting this diagnostic. If this name
+  /// matches the regular expression given in -Rpass-analysis=, then the
+  /// diagnostic will be emitted.  \p RemarkName is a textual identifier for the
+  /// remark.  \p DLoc is the debug location and \p CodeRegion is the region
+  /// that the optimization operates on (currently on block is supported). The
+  /// front-end will append its own message related to options that address
+  /// floating-point non-commutativity.
+  OptimizationRemarkAnalysisFPCommute(const char *PassName,
+                                      StringRef RemarkName,
+                                      const DebugLoc &DLoc, Value *CodeRegion)
+      : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisFPCommute,
+                                   PassName, RemarkName, DLoc, CodeRegion) {}
 
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_OptimizationRemarkAnalysisFPCommute;
@@ -541,8 +696,7 @@ public:
 
 /// Diagnostic information for optimization analysis remarks related to
 /// pointer aliasing.
-class DiagnosticInfoOptimizationRemarkAnalysisAliasing
-    : public DiagnosticInfoOptimizationRemarkAnalysis {
+class OptimizationRemarkAnalysisAliasing : public OptimizationRemarkAnalysis {
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If
   /// this name matches the regular expression given in -Rpass-analysis=, then
@@ -554,12 +708,23 @@ public:
   /// pointer aliasing legality. Note that this class does not copy this
   /// message, so this reference must be valid for the whole life time of the
   /// diagnostic.
-  DiagnosticInfoOptimizationRemarkAnalysisAliasing(const char *PassName,
-                                                   const Function &Fn,
-                                                   const DebugLoc &DLoc,
-                                                   const Twine &Msg)
-      : DiagnosticInfoOptimizationRemarkAnalysis(
-            DK_OptimizationRemarkAnalysisAliasing, PassName, Fn, DLoc, Msg) {}
+  OptimizationRemarkAnalysisAliasing(const char *PassName, const Function &Fn,
+                                     const DebugLoc &DLoc, const Twine &Msg,
+                                     Optional<uint64_t> Hotness = None)
+      : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisAliasing,
+                                   PassName, Fn, DLoc, Msg, Hotness) {}
+
+  /// \p PassName is the name of the pass emitting this diagnostic. If this name
+  /// matches the regular expression given in -Rpass-analysis=, then the
+  /// diagnostic will be emitted.  \p RemarkName is a textual identifier for the
+  /// remark.  \p DLoc is the debug location and \p CodeRegion is the region
+  /// that the optimization operates on (currently on block is supported). The
+  /// front-end will append its own message related to options that address
+  /// pointer aliasing legality.
+  OptimizationRemarkAnalysisAliasing(const char *PassName, StringRef RemarkName,
+                                     const DebugLoc &DLoc, Value *CodeRegion)
+      : OptimizationRemarkAnalysis(DK_OptimizationRemarkAnalysisAliasing,
+                                   PassName, RemarkName, DLoc, CodeRegion) {}
 
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_OptimizationRemarkAnalysisAliasing;
@@ -584,6 +749,25 @@ public:
   }
 };
 
+/// Diagnostic information for ISel fallback path.
+class DiagnosticInfoISelFallback : public DiagnosticInfo {
+  /// The function that is concerned by this diagnostic.
+  const Function &Fn;
+
+public:
+  DiagnosticInfoISelFallback(const Function &Fn,
+                             DiagnosticSeverity Severity = DS_Warning)
+      : DiagnosticInfo(DK_ISelFallback, Severity), Fn(Fn) {}
+
+  const Function &getFunction() const { return Fn; }
+
+  void print(DiagnosticPrinter &DP) const override;
+
+  static bool classof(const DiagnosticInfo *DI) {
+    return DI->getKind() == DK_ISelFallback;
+  }
+};
+
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(DiagnosticInfo, LLVMDiagnosticInfoRef)
 
diff --git a/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h b/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
index 1bcd73738b66..59c83291affa 100644
--- a/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
+++ b/contrib/llvm/include/llvm/IR/DiagnosticPrinter.h
@@ -19,6 +19,7 @@
 #include <string>
 
 namespace llvm {
+
 // Forward declarations.
 class Module;
 class raw_ostream;
@@ -30,7 +31,7 @@ class Value;
 /// \brief Interface for custom diagnostic printing.
 class DiagnosticPrinter {
 public:
-  virtual ~DiagnosticPrinter() {}
+  virtual ~DiagnosticPrinter() = default;
 
   // Simple types.
   virtual DiagnosticPrinter &operator<<(char C) = 0;
@@ -89,6 +90,7 @@ public:
   // Other types.
   DiagnosticPrinter &operator<<(const SMDiagnostic &Diag) override;
 };
-} // End namespace llvm
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_IR_DIAGNOSTICPRINTER_H
diff --git a/contrib/llvm/include/llvm/IR/Dominators.h b/contrib/llvm/include/llvm/IR/Dominators.h
index f445a49b67b8..7c733bac8da0 100644
--- a/contrib/llvm/include/llvm/IR/Dominators.h
+++ b/contrib/llvm/include/llvm/IR/Dominators.h
@@ -33,9 +33,9 @@ extern template class DomTreeNodeBase<BasicBlock>;
 extern template class DominatorTreeBase<BasicBlock>;
 
 extern template void Calculate<Function, BasicBlock *>(
-    DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT, Function &F);
+    DominatorTreeBaseByGraphTraits<GraphTraits<BasicBlock *>> &DT, Function &F);
 extern template void Calculate<Function, Inverse<BasicBlock *>>(
-    DominatorTreeBase<GraphTraits<Inverse<BasicBlock *>>::NodeType> &DT,
+    DominatorTreeBaseByGraphTraits<GraphTraits<Inverse<BasicBlock *>>> &DT,
     Function &F);
 
 typedef DomTreeNodeBase<BasicBlock> DomTreeNode;
@@ -102,13 +102,6 @@ public:
     recalculate(F);
   }
 
-  DominatorTree(DominatorTree &&Arg)
-      : Base(std::move(static_cast<Base &>(Arg))) {}
-  DominatorTree &operator=(DominatorTree &&RHS) {
-    Base::operator=(std::move(static_cast<Base &>(RHS)));
-    return *this;
-  }
-
   /// \brief Returns *false* if the other dominator tree matches this dominator
   /// tree.
   inline bool compare(const DominatorTree &Other) const {
@@ -155,23 +148,19 @@ public:
 // iterable by generic graph iterators.
 
 template <class Node, class ChildIterator> struct DomTreeGraphTraitsBase {
-  typedef Node NodeType;
+  typedef Node *NodeRef;
   typedef ChildIterator ChildIteratorType;
-  typedef df_iterator<Node *, SmallPtrSet<NodeType *, 8>> nodes_iterator;
+  typedef df_iterator<Node *, df_iterator_default_set<Node*>> nodes_iterator;
 
-  static NodeType *getEntryNode(NodeType *N) { return N; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) { return N->end(); }
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->end(); }
 
-  static nodes_iterator nodes_begin(NodeType *N) {
+  static nodes_iterator nodes_begin(NodeRef N) {
     return df_begin(getEntryNode(N));
   }
 
-  static nodes_iterator nodes_end(NodeType *N) {
-    return df_end(getEntryNode(N));
-  }
+  static nodes_iterator nodes_end(NodeRef N) { return df_end(getEntryNode(N)); }
 };
 
 template <>
@@ -185,9 +174,7 @@ struct GraphTraits<const DomTreeNode *>
 
 template <> struct GraphTraits<DominatorTree*>
   : public GraphTraits<DomTreeNode*> {
-  static NodeType *getEntryNode(DominatorTree *DT) {
-    return DT->getRootNode();
-  }
+  static NodeRef getEntryNode(DominatorTree *DT) { return DT->getRootNode(); }
 
   static nodes_iterator nodes_begin(DominatorTree *N) {
     return df_begin(getEntryNode(N));
@@ -201,14 +188,14 @@ template <> struct GraphTraits<DominatorTree*>
 /// \brief Analysis pass which computes a \c DominatorTree.
 class DominatorTreeAnalysis : public AnalysisInfoMixin<DominatorTreeAnalysis> {
   friend AnalysisInfoMixin<DominatorTreeAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   /// \brief Provide the result typedef for this analysis pass.
   typedef DominatorTree Result;
 
   /// \brief Run the analysis pass over a function and produce a dominator tree.
-  DominatorTree run(Function &F, AnalysisManager<Function> &);
+  DominatorTree run(Function &F, FunctionAnalysisManager &);
 };
 
 /// \brief Printer pass for the \c DominatorTree.
@@ -218,12 +205,12 @@ class DominatorTreePrinterPass
 
 public:
   explicit DominatorTreePrinterPass(raw_ostream &OS);
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Verifier pass for the \c DominatorTree.
 struct DominatorTreeVerifierPass : PassInfoMixin<DominatorTreeVerifierPass> {
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Legacy analysis pass which computes a \c DominatorTree.
diff --git a/contrib/llvm/include/llvm/IR/Function.h b/contrib/llvm/include/llvm/IR/Function.h
index d7d27e7585c1..1854d413c627 100644
--- a/contrib/llvm/include/llvm/IR/Function.h
+++ b/contrib/llvm/include/llvm/IR/Function.h
@@ -18,26 +18,33 @@
 #ifndef LLVM_IR_FUNCTION_H
 #define LLVM_IR_FUNCTION_H
 
+#include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/SymbolTableListTraits.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/Compiler.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
 
 namespace llvm {
 
 template <typename T> class Optional;
+class AssemblyAnnotationWriter;
 class FunctionType;
 class LLVMContext;
 class DISubprogram;
 
-template <>
-struct SymbolTableListSentinelTraits<Argument>
-    : public ilist_half_embedded_sentinel_traits<Argument> {};
-
 class Function : public GlobalObject, public ilist_node<Function> {
 public:
   typedef SymbolTableList<Argument> ArgumentListType;
@@ -54,7 +61,8 @@ private:
   // Important things that make up a function!
   BasicBlockListType  BasicBlocks;        ///< The basic blocks
   mutable ArgumentListType ArgumentList;  ///< The formal arguments
-  ValueSymbolTable *SymTab;               ///< Symbol table of args/instructions
+  std::unique_ptr<ValueSymbolTable>
+      SymTab;                             ///< Symbol table of args/instructions
   AttributeSet AttributeSets;             ///< Parameter attributes
 
   /*
@@ -77,8 +85,6 @@ private:
 
   friend class SymbolTableListTraits<Function>;
 
-  void setParent(Module *parent);
-
   /// hasLazyArguments/CheckLazyArguments - The argument list of a function is
   /// built on demand, so that the list isn't allocated until the first client
   /// needs it.  The hasLazyArguments predicate returns true if the arg list
@@ -93,10 +99,8 @@ private:
     if (hasLazyArguments())
       BuildLazyArguments();
   }
-  void BuildLazyArguments() const;
 
-  Function(const Function&) = delete;
-  void operator=(const Function&) = delete;
+  void BuildLazyArguments() const;
 
   /// Function ctor - If the (optional) Module argument is specified, the
   /// function is automatically inserted into the end of the function list for
@@ -106,18 +110,21 @@ private:
            const Twine &N = "", Module *M = nullptr);
 
 public:
+  Function(const Function&) = delete;
+  void operator=(const Function&) = delete;
+  ~Function() override;
+
   static Function *Create(FunctionType *Ty, LinkageTypes Linkage,
                           const Twine &N = "", Module *M = nullptr) {
     return new Function(Ty, Linkage, N, M);
   }
 
-  ~Function() override;
-
-  /// \brief Provide fast operand accessors
+  // Provide fast operand accessors.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-
-  Type *getReturnType() const;           // Return the type of the ret val
-  FunctionType *getFunctionType() const; // Return the FunctionType for me
+  /// Returns the type of the ret val.
+  Type *getReturnType() const;
+  /// Returns the FunctionType for me.
+  FunctionType *getFunctionType() const;
 
   /// getContext - Return a reference to the LLVMContext associated with this
   /// function.
@@ -137,7 +144,13 @@ public:
   /// The particular intrinsic functions which correspond to this value are
   /// defined in llvm/Intrinsics.h.
   Intrinsic::ID getIntrinsicID() const LLVM_READONLY { return IntID; }
-  bool isIntrinsic() const { return getName().startswith("llvm."); }
+
+  /// isIntrinsic - Returns true if the function's name starts with "llvm.".
+  /// It's possible for this function to return true while getIntrinsicID()
+  /// returns Intrinsic::not_intrinsic!
+  bool isIntrinsic() const { return HasLLVMReservedName; }
+
+  static Intrinsic::ID lookupIntrinsicID(StringRef Name);
 
   /// \brief Recalculate the ID for this function if it is an Intrinsic defined
   /// in llvm/Intrinsics.h.  Sets the intrinsic ID to Intrinsic::not_intrinsic
@@ -166,41 +179,55 @@ public:
   void setAttributes(AttributeSet Attrs) { AttributeSets = Attrs; }
 
   /// @brief Add function attributes to this function.
-  void addFnAttr(Attribute::AttrKind N) {
-    setAttributes(AttributeSets.addAttribute(getContext(),
-                                             AttributeSet::FunctionIndex, N));
+  void addFnAttr(Attribute::AttrKind Kind) {
+    addAttribute(AttributeSet::FunctionIndex, Kind);
+  }
+
+  /// @brief Add function attributes to this function.
+  void addFnAttr(StringRef Kind, StringRef Val = StringRef()) {
+    addAttribute(AttributeSet::FunctionIndex,
+                 Attribute::get(getContext(), Kind, Val));
+  }
+
+  void addFnAttr(Attribute Attr) {
+    addAttribute(AttributeSet::FunctionIndex, Attr);
   }
 
   /// @brief Remove function attributes from this function.
   void removeFnAttr(Attribute::AttrKind Kind) {
-    setAttributes(AttributeSets.removeAttribute(
-        getContext(), AttributeSet::FunctionIndex, Kind));
+    removeAttribute(AttributeSet::FunctionIndex, Kind);
   }
 
-  /// @brief Add function attributes to this function.
-  void addFnAttr(StringRef Kind) {
-    setAttributes(
-      AttributeSets.addAttribute(getContext(),
-                                 AttributeSet::FunctionIndex, Kind));
-  }
-  void addFnAttr(StringRef Kind, StringRef Value) {
-    setAttributes(
-      AttributeSets.addAttribute(getContext(),
-                                 AttributeSet::FunctionIndex, Kind, Value));
+  /// @brief Remove function attribute from this function.
+  void removeFnAttr(StringRef Kind) {
+    setAttributes(AttributeSets.removeAttribute(
+        getContext(), AttributeSet::FunctionIndex, Kind));
   }
 
-  /// Set the entry count for this function.
+  /// \brief Set the entry count for this function.
+  ///
+  /// Entry count is the number of times this function was executed based on
+  /// pgo data.
   void setEntryCount(uint64_t Count);
 
-  /// Get the entry count for this function.
+  /// \brief Get the entry count for this function.
+  ///
+  /// Entry count is the number of times the function was executed based on
+  /// pgo data.
   Optional<uint64_t> getEntryCount() const;
 
+  /// Set the section prefix for this function.
+  void setSectionPrefix(StringRef Prefix);
+
+  /// Get the section prefix for this function.
+  Optional<StringRef> getSectionPrefix() const;
+
   /// @brief Return true if the function has the attribute.
   bool hasFnAttribute(Attribute::AttrKind Kind) const {
     return AttributeSets.hasFnAttribute(Kind);
   }
   bool hasFnAttribute(StringRef Kind) const {
-    return AttributeSets.hasAttribute(AttributeSet::FunctionIndex, Kind);
+    return AttributeSets.hasFnAttribute(Kind);
   }
 
   /// @brief Return the attribute for the given attribute kind.
@@ -323,7 +350,7 @@ public:
   }
 
   /// @brief Determine if the function may only access memory that is
-  //  either inaccessible from the IR or pointed to by its arguments.
+  ///  either inaccessible from the IR or pointed to by its arguments.
   bool onlyAccessesInaccessibleMemOrArgMem() const {
     return hasFnAttribute(Attribute::InaccessibleMemOrArgMemOnly);
   }
@@ -431,7 +458,7 @@ public:
   }
 
   /// Optimize this function for minimum size (-Oz).
-  bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); };
+  bool optForMinSize() const { return hasFnAttribute(Attribute::MinSize); }
 
   /// Optimize this function for size (-Os) or minimum size (-Oz).
   bool optForSize() const {
@@ -477,12 +504,14 @@ public:
     CheckLazyArguments();
     return ArgumentList;
   }
+
   static ArgumentListType Function::*getSublistAccess(Argument*) {
     return &Function::ArgumentList;
   }
 
   const BasicBlockListType &getBasicBlockList() const { return BasicBlocks; }
         BasicBlockListType &getBasicBlockList()       { return BasicBlocks; }
+
   static BasicBlockListType Function::*getSublistAccess(BasicBlock*) {
     return &Function::BasicBlocks;
   }
@@ -493,10 +522,12 @@ public:
   //===--------------------------------------------------------------------===//
   // Symbol Table Accessing functions...
 
-  /// getSymbolTable() - Return the symbol table...
+  /// getSymbolTable() - Return the symbol table if any, otherwise nullptr.
   ///
-  inline       ValueSymbolTable &getValueSymbolTable()       { return *SymTab; }
-  inline const ValueSymbolTable &getValueSymbolTable() const { return *SymTab; }
+  inline ValueSymbolTable *getValueSymbolTable() { return SymTab.get(); }
+  inline const ValueSymbolTable *getValueSymbolTable() const {
+    return SymTab.get();
+  }
 
   //===--------------------------------------------------------------------===//
   // BasicBlock iterator forwarding functions
@@ -524,6 +555,7 @@ public:
     CheckLazyArguments();
     return ArgumentList.begin();
   }
+
   arg_iterator arg_end() {
     CheckLazyArguments();
     return ArgumentList.end();
@@ -536,7 +568,6 @@ public:
   iterator_range<arg_iterator> args() {
     return make_range(arg_begin(), arg_end());
   }
-
   iterator_range<const_arg_iterator> args() const {
     return make_range(arg_begin(), arg_end());
   }
@@ -644,8 +675,8 @@ private:
   void allocHungoffUselist();
   template<int Idx> void setHungoffOperand(Constant *C);
 
-  // Shadow Value::setValueSubclassData with a private forwarding method so that
-  // subclasses cannot accidentally use it.
+  /// Shadow Value::setValueSubclassData with a private forwarding method so
+  /// that subclasses cannot accidentally use it.
   void setValueSubclassData(unsigned short D) {
     Value::setValueSubclassData(D);
   }
@@ -657,6 +688,6 @@ struct OperandTraits<Function> : public HungoffOperandTraits<3> {};
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(Function, Value)
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_FUNCTION_H
diff --git a/contrib/llvm/include/llvm/IR/GVMaterializer.h b/contrib/llvm/include/llvm/IR/GVMaterializer.h
index 9e47722c892b..675abeb6ec3a 100644
--- a/contrib/llvm/include/llvm/IR/GVMaterializer.h
+++ b/contrib/llvm/include/llvm/IR/GVMaterializer.h
@@ -18,36 +18,35 @@
 #ifndef LLVM_IR_GVMATERIALIZER_H
 #define LLVM_IR_GVMATERIALIZER_H
 
-#include <system_error>
 #include <vector>
 
 namespace llvm {
-class Function;
+
+class Error;
 class GlobalValue;
-class Module;
 class StructType;
 
 class GVMaterializer {
 protected:
-  GVMaterializer() {}
+  GVMaterializer() = default;
 
 public:
   virtual ~GVMaterializer();
 
   /// Make sure the given GlobalValue is fully read.
   ///
-  virtual std::error_code materialize(GlobalValue *GV) = 0;
+  virtual Error materialize(GlobalValue *GV) = 0;
 
   /// Make sure the entire Module has been completely read.
   ///
-  virtual std::error_code materializeModule() = 0;
+  virtual Error materializeModule() = 0;
 
-  virtual std::error_code materializeMetadata() = 0;
+  virtual Error materializeMetadata() = 0;
   virtual void setStripDebugInfo() = 0;
 
   virtual std::vector<StructType *> getIdentifiedStructTypes() const = 0;
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GVMATERIALIZER_H
diff --git a/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h b/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
index 4953aebbe8aa..490bff29cf38 100644
--- a/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
+++ b/contrib/llvm/include/llvm/IR/GetElementPtrTypeIterator.h
@@ -15,12 +15,17 @@
 #ifndef LLVM_IR_GETELEMENTPTRTYPEITERATOR_H
 #define LLVM_IR_GETELEMENTPTRTYPEITERATOR_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/User.h"
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/Support/Casting.h"
+#include <cstddef>
+#include <iterator>
 
 namespace llvm {
+
   template<typename ItTy = User::const_op_iterator>
   class generic_gep_type_iterator
     : public std::iterator<std::forward_iterator_tag, Type *, ptrdiff_t> {
@@ -28,20 +33,19 @@ namespace llvm {
                           Type *, ptrdiff_t> super;
 
     ItTy OpIt;
-    PointerIntPair<Type *, 1> CurTy;
-    unsigned AddrSpace;
-    generic_gep_type_iterator() {}
-  public:
+    PointerUnion<StructType *, Type *> CurTy;
+    enum : uint64_t { Unbounded = -1ull };
+    uint64_t NumElements = Unbounded;
+    generic_gep_type_iterator() = default;
 
-    static generic_gep_type_iterator begin(Type *Ty, unsigned AddrSpace,
-                                           ItTy It) {
+  public:
+    static generic_gep_type_iterator begin(Type *Ty, ItTy It) {
       generic_gep_type_iterator I;
-      I.CurTy.setPointer(Ty);
-      I.CurTy.setInt(true);
-      I.AddrSpace = AddrSpace;
+      I.CurTy = Ty;
       I.OpIt = It;
       return I;
     }
+
     static generic_gep_type_iterator end(ItTy It) {
       generic_gep_type_iterator I;
       I.OpIt = It;
@@ -51,38 +55,30 @@ namespace llvm {
     bool operator==(const generic_gep_type_iterator& x) const {
       return OpIt == x.OpIt;
     }
+
     bool operator!=(const generic_gep_type_iterator& x) const {
       return !operator==(x);
     }
 
-    Type *operator*() const {
-      if (CurTy.getInt())
-        return CurTy.getPointer()->getPointerTo(AddrSpace);
-      return CurTy.getPointer();
-    }
-
+    // FIXME: Make this the iterator's operator*() after the 4.0 release.
+    // operator*() had a different meaning in earlier releases, so we're
+    // temporarily not giving this iterator an operator*() to avoid a subtle
+    // semantics break.
     Type *getIndexedType() const {
-      if (CurTy.getInt())
-        return CurTy.getPointer();
-      CompositeType *CT = cast<CompositeType>(CurTy.getPointer());
-      return CT->getTypeAtIndex(getOperand());
+      if (auto *T = CurTy.dyn_cast<Type *>())
+        return T;
+      return CurTy.get<StructType *>()->getTypeAtIndex(getOperand());
     }
 
-    // This is a non-standard operator->.  It allows you to call methods on the
-    // current type directly.
-    Type *operator->() const { return operator*(); }
-
     Value *getOperand() const { return const_cast<Value *>(&**OpIt); }
 
     generic_gep_type_iterator& operator++() {   // Preincrement
-      if (CurTy.getInt()) {
-        CurTy.setInt(false);
-      } else if (CompositeType *CT =
-                     dyn_cast<CompositeType>(CurTy.getPointer())) {
-        CurTy.setPointer(CT->getTypeAtIndex(getOperand()));
-      } else {
-        CurTy.setPointer(nullptr);
-      }
+      Type *Ty = getIndexedType();
+      if (auto *STy = dyn_cast<SequentialType>(Ty)) {
+        CurTy = STy->getElementType();
+        NumElements = STy->getNumElements();
+      } else
+        CurTy = dyn_cast<StructType>(Ty);
       ++OpIt;
       return *this;
     }
@@ -90,6 +86,39 @@ namespace llvm {
     generic_gep_type_iterator operator++(int) { // Postincrement
       generic_gep_type_iterator tmp = *this; ++*this; return tmp;
     }
+
+    // All of the below API is for querying properties of the "outer type", i.e.
+    // the type that contains the indexed type. Most of the time this is just
+    // the type that was visited immediately prior to the indexed type, but for
+    // the first element this is an unbounded array of the GEP's source element
+    // type, for which there is no clearly corresponding IR type (we've
+    // historically used a pointer type as the outer type in this case, but
+    // pointers will soon lose their element type).
+    //
+    // FIXME: Most current users of this class are just interested in byte
+    // offsets (a few need to know whether the outer type is a struct because
+    // they are trying to replace a constant with a variable, which is only
+    // legal for arrays, e.g. canReplaceOperandWithVariable in SimplifyCFG.cpp);
+    // we should provide a more minimal API here that exposes not much more than
+    // that.
+
+    bool isStruct() const { return CurTy.is<StructType *>(); }
+    bool isSequential() const { return CurTy.is<Type *>(); }
+
+    StructType *getStructType() const { return CurTy.get<StructType *>(); }
+
+    StructType *getStructTypeOrNull() const {
+      return CurTy.dyn_cast<StructType *>();
+    }
+
+    bool isBoundedSequential() const {
+      return isSequential() && NumElements != Unbounded;
+    }
+
+    uint64_t getSequentialNumElements() const {
+      assert(isBoundedSequential());
+      return NumElements;
+    }
   };
 
   typedef generic_gep_type_iterator<> gep_type_iterator;
@@ -98,36 +127,36 @@ namespace llvm {
     auto *GEPOp = cast<GEPOperator>(GEP);
     return gep_type_iterator::begin(
         GEPOp->getSourceElementType(),
-        cast<PointerType>(GEPOp->getPointerOperandType()->getScalarType())
-            ->getAddressSpace(),
         GEP->op_begin() + 1);
   }
+
   inline gep_type_iterator gep_type_end(const User *GEP) {
     return gep_type_iterator::end(GEP->op_end());
   }
+
   inline gep_type_iterator gep_type_begin(const User &GEP) {
     auto &GEPOp = cast<GEPOperator>(GEP);
     return gep_type_iterator::begin(
         GEPOp.getSourceElementType(),
-        cast<PointerType>(GEPOp.getPointerOperandType()->getScalarType())
-            ->getAddressSpace(),
         GEP.op_begin() + 1);
   }
+
   inline gep_type_iterator gep_type_end(const User &GEP) {
     return gep_type_iterator::end(GEP.op_end());
   }
 
   template<typename T>
   inline generic_gep_type_iterator<const T *>
-  gep_type_begin(Type *Op0, unsigned AS, ArrayRef<T> A) {
-    return generic_gep_type_iterator<const T *>::begin(Op0, AS, A.begin());
+  gep_type_begin(Type *Op0, ArrayRef<T> A) {
+    return generic_gep_type_iterator<const T *>::begin(Op0, A.begin());
   }
 
   template<typename T>
   inline generic_gep_type_iterator<const T *>
-  gep_type_end(Type * /*Op0*/, unsigned /*AS*/, ArrayRef<T> A) {
+  gep_type_end(Type * /*Op0*/, ArrayRef<T> A) {
     return generic_gep_type_iterator<const T *>::end(A.end());
   }
+
 } // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GETELEMENTPTRTYPEITERATOR_H
diff --git a/contrib/llvm/include/llvm/IR/GlobalAlias.h b/contrib/llvm/include/llvm/IR/GlobalAlias.h
index 3ae3e4a001e1..37a291dfeb7a 100644
--- a/contrib/llvm/include/llvm/IR/GlobalAlias.h
+++ b/contrib/llvm/include/llvm/IR/GlobalAlias.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/Value.h"
 
 namespace llvm {
 
@@ -27,15 +28,14 @@ template <typename ValueSubClass> class SymbolTableListTraits;
 class GlobalAlias : public GlobalIndirectSymbol,
                     public ilist_node<GlobalAlias> {
   friend class SymbolTableListTraits<GlobalAlias>;
-  void operator=(const GlobalAlias &) = delete;
-  GlobalAlias(const GlobalAlias &) = delete;
-
-  void setParent(Module *parent);
 
   GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
               const Twine &Name, Constant *Aliasee, Module *Parent);
 
 public:
+  GlobalAlias(const GlobalAlias &) = delete;
+  GlobalAlias &operator=(const GlobalAlias &) = delete;
+
   /// If a parent module is specified, the alias is automatically inserted into
   /// the end of the specified module's alias list.
   static GlobalAlias *create(Type *Ty, unsigned AddressSpace,
@@ -89,6 +89,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GLOBALALIAS_H
diff --git a/contrib/llvm/include/llvm/IR/GlobalIFunc.h b/contrib/llvm/include/llvm/IR/GlobalIFunc.h
index 0cbe882c58d8..bfaa9960cb13 100644
--- a/contrib/llvm/include/llvm/IR/GlobalIFunc.h
+++ b/contrib/llvm/include/llvm/IR/GlobalIFunc.h
@@ -20,6 +20,7 @@
 
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/Value.h"
 
 namespace llvm {
 
@@ -32,15 +33,14 @@ template <typename ValueSubClass> class SymbolTableListTraits;
 class GlobalIFunc final : public GlobalIndirectSymbol,
                           public ilist_node<GlobalIFunc> {
   friend class SymbolTableListTraits<GlobalIFunc>;
-  void operator=(const GlobalIFunc &) = delete;
-  GlobalIFunc(const GlobalIFunc &) = delete;
-
-  void setParent(Module *parent);
 
   GlobalIFunc(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage,
               const Twine &Name, Constant *Resolver, Module *Parent);
 
 public:
+  GlobalIFunc(const GlobalIFunc &) = delete;
+  GlobalIFunc &operator=(const GlobalIFunc &) = delete;
+
   /// If a parent module is specified, the ifunc is automatically inserted into
   /// the end of the specified module's ifunc list.
   static GlobalIFunc *create(Type *Ty, unsigned AddressSpace,
@@ -71,6 +71,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GLOBALIFUNC_H
diff --git a/contrib/llvm/include/llvm/IR/GlobalIndirectSymbol.h b/contrib/llvm/include/llvm/IR/GlobalIndirectSymbol.h
index 8edb3d1dbf4b..671309e85d19 100644
--- a/contrib/llvm/include/llvm/IR/GlobalIndirectSymbol.h
+++ b/contrib/llvm/include/llvm/IR/GlobalIndirectSymbol.h
@@ -16,20 +16,25 @@
 #ifndef LLVM_IR_GLOBALINDIRECTSYMBOL_H
 #define LLVM_IR_GLOBALINDIRECTSYMBOL_H
 
+#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <cstddef>
 
 namespace llvm {
 
 class GlobalIndirectSymbol : public GlobalValue {
-  void operator=(const GlobalIndirectSymbol &) = delete;
-  GlobalIndirectSymbol(const GlobalIndirectSymbol &) = delete;
-
 protected:
   GlobalIndirectSymbol(Type *Ty, ValueTy VTy, unsigned AddressSpace,
       LinkageTypes Linkage, const Twine &Name, Constant *Symbol);
 
 public:
+  GlobalIndirectSymbol(const GlobalIndirectSymbol &) = delete;
+  GlobalIndirectSymbol &operator=(const GlobalIndirectSymbol &) = delete;
+
   // allocate space for exactly one operand
   void *operator new(size_t s) {
     return User::operator new(s, 1);
@@ -79,6 +84,6 @@ struct OperandTraits<GlobalIndirectSymbol> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalIndirectSymbol, Constant)
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GLOBALINDIRECTSYMBOL_H
diff --git a/contrib/llvm/include/llvm/IR/GlobalObject.h b/contrib/llvm/include/llvm/IR/GlobalObject.h
index 04737a045ae5..11eb713a4e1d 100644
--- a/contrib/llvm/include/llvm/IR/GlobalObject.h
+++ b/contrib/llvm/include/llvm/IR/GlobalObject.h
@@ -15,18 +15,19 @@
 #ifndef LLVM_IR_GLOBALOBJECT_H
 #define LLVM_IR_GLOBALOBJECT_H
 
-#include "llvm/IR/DerivedTypes.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Value.h"
+#include <string>
+#include <utility>
 
 namespace llvm {
+
 class Comdat;
 class MDNode;
 class Metadata;
-class Module;
 
 class GlobalObject : public GlobalValue {
-  GlobalObject(const GlobalObject &) = delete;
-
 protected:
   GlobalObject(Type *Ty, ValueTy VTy, Use *Ops, unsigned NumOps,
                LinkageTypes Linkage, const Twine &Name,
@@ -53,6 +54,8 @@ private:
   static const unsigned GlobalObjectMask = (1 << GlobalObjectBits) - 1;
 
 public:
+  GlobalObject(const GlobalObject &) = delete;
+
   unsigned getAlignment() const {
     unsigned Data = getGlobalValueSubClassData();
     unsigned AlignmentData = Data & AlignmentMask;
@@ -141,6 +144,6 @@ private:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GLOBALOBJECT_H
diff --git a/contrib/llvm/include/llvm/IR/GlobalValue.h b/contrib/llvm/include/llvm/IR/GlobalValue.h
index 09682f7aa349..c6398aaa4847 100644
--- a/contrib/llvm/include/llvm/IR/GlobalValue.h
+++ b/contrib/llvm/include/llvm/IR/GlobalValue.h
@@ -18,23 +18,31 @@
 #ifndef LLVM_IR_GLOBALVALUE_H
 #define LLVM_IR_GLOBALVALUE_H
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Support/MD5.h"
-#include <system_error>
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
 
 namespace llvm {
 
 class Comdat;
-class PointerType;
+class ConstantRange;
+class Error;
+class GlobalObject;
 class Module;
 
 namespace Intrinsic {
   enum ID : unsigned;
-}
+} // end namespace Intrinsic
 
 class GlobalValue : public Constant {
-  GlobalValue(const GlobalValue &) = delete;
 public:
   /// @brief An enumeration for the kinds of linkage for global values.
   enum LinkageTypes {
@@ -72,11 +80,14 @@ protected:
         ValueType(Ty), Linkage(Linkage), Visibility(DefaultVisibility),
         UnnamedAddrVal(unsigned(UnnamedAddr::None)),
         DllStorageClass(DefaultStorageClass), ThreadLocal(NotThreadLocal),
-        IntID((Intrinsic::ID)0U), Parent(nullptr) {
+        HasLLVMReservedName(false), IntID((Intrinsic::ID)0U), Parent(nullptr) {
     setName(Name);
   }
 
   Type *ValueType;
+
+  static const unsigned GlobalValueSubClassDataBits = 18;
+
   // All bitfields use unsigned as the underlying type so that MSVC will pack
   // them.
   unsigned Linkage : 4;       // The linkage of this global
@@ -86,14 +97,19 @@ protected:
 
   unsigned ThreadLocal : 3; // Is this symbol "Thread Local", if so, what is
                             // the desired model?
-  static const unsigned GlobalValueSubClassDataBits = 19;
+
+  /// True if the function's name starts with "llvm.".  This corresponds to the
+  /// value of Function::isIntrinsic(), which may be true even if
+  /// Function::intrinsicID() returns Intrinsic::not_intrinsic.
+  unsigned HasLLVMReservedName : 1;
 
 private:
+  friend class Constant;
+
   // Give subclasses access to what otherwise would be wasted padding.
-  // (19 + 4 + 2 + 2 + 2 + 3) == 32.
+  // (18 + 4 + 2 + 2 + 2 + 3 + 1) == 32.
   unsigned SubClassData : GlobalValueSubClassDataBits;
 
-  friend class Constant;
   void destroyConstantImpl();
   Value *handleOperandChangeImpl(Value *From, Value *To);
 
@@ -139,6 +155,12 @@ protected:
   }
 
   Module *Parent;             // The containing module.
+
+  // Used by SymbolTableListTraits.
+  void setParent(Module *parent) {
+    Parent = parent;
+  }
+
 public:
   enum ThreadLocalMode {
     NotThreadLocal = 0,
@@ -148,6 +170,8 @@ public:
     LocalExecTLSModel
   };
 
+  GlobalValue(const GlobalValue &) = delete;
+
   ~GlobalValue() override {
     removeDeadConstantUsers();   // remove any dead constants using this.
   }
@@ -460,10 +484,8 @@ public:
   /// function has been read in yet or not.
   bool isMaterializable() const;
 
-  /// Make sure this GlobalValue is fully read. If the module is corrupt, this
-  /// returns true and fills in the optional string with information about the
-  /// problem.  If successful, this returns false.
-  std::error_code materialize();
+  /// Make sure this GlobalValue is fully read.
+  Error materialize();
 
 /// @}
 
@@ -492,6 +514,18 @@ public:
   // increased.
   bool canIncreaseAlignment() const;
 
+  const GlobalObject *getBaseObject() const {
+    return const_cast<GlobalValue *>(this)->getBaseObject();
+  }
+  GlobalObject *getBaseObject();
+
+  /// Returns whether this is a reference to an absolute symbol.
+  bool isAbsoluteSymbolRef() const;
+
+  /// If this is an absolute symbol reference, returns the range of the symbol,
+  /// otherwise returns None.
+  Optional<ConstantRange> getAbsoluteSymbolRange() const;
+
   /// This method unlinks 'this' from the containing module, but does not delete
   /// it.
   virtual void removeFromParent() = 0;
@@ -512,6 +546,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GLOBALVALUE_H
diff --git a/contrib/llvm/include/llvm/IR/GlobalVariable.h b/contrib/llvm/include/llvm/IR/GlobalVariable.h
index ebeb635468d0..3b545d811d44 100644
--- a/contrib/llvm/include/llvm/IR/GlobalVariable.h
+++ b/contrib/llvm/include/llvm/IR/GlobalVariable.h
@@ -20,36 +20,34 @@
 #ifndef LLVM_IR_GLOBALVARIABLE_H
 #define LLVM_IR_GLOBALVARIABLE_H
 
+#include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Value.h"
+#include <cassert>
+#include <cstddef>
 
 namespace llvm {
 
-class Module;
 class Constant;
+class Module;
+
 template <typename ValueSubClass> class SymbolTableListTraits;
+class DIGlobalVariable;
+class DIGlobalVariableExpression;
 
 class GlobalVariable : public GlobalObject, public ilist_node<GlobalVariable> {
   friend class SymbolTableListTraits<GlobalVariable>;
-  void *operator new(size_t, unsigned) = delete;
-  void operator=(const GlobalVariable &) = delete;
-  GlobalVariable(const GlobalVariable &) = delete;
-
-  void setParent(Module *parent);
 
   bool isConstantGlobal : 1;                   // Is this a global constant?
   bool isExternallyInitializedConstant : 1;    // Is this a global whose value
                                                // can change from its initial
                                                // value before global
                                                // initializers are run?
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
 
+public:
   /// GlobalVariable ctor - If a parent module is specified, the global is
   /// automatically inserted into the end of the specified modules global list.
   GlobalVariable(Type *Ty, bool isConstant, LinkageTypes Linkage,
@@ -63,6 +61,8 @@ public:
                  const Twine &Name = "", GlobalVariable *InsertBefore = nullptr,
                  ThreadLocalMode = NotThreadLocal, unsigned AddressSpace = 0,
                  bool isExternallyInitialized = false);
+  GlobalVariable(const GlobalVariable &) = delete;
+  GlobalVariable &operator=(const GlobalVariable &) = delete;
 
   ~GlobalVariable() override {
     dropAllReferences();
@@ -71,6 +71,13 @@ public:
     setGlobalVariableNumOperands(1);
   }
 
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -165,6 +172,12 @@ public:
   /// drops not only the reference to the initializer but also to any metadata.
   void dropAllReferences();
 
+  /// Attach a DIGlobalVariableExpression.
+  void addDebugInfo(DIGlobalVariableExpression *GV);
+
+  /// Fill the vector with all debug info attachements.
+  void getDebugInfo(SmallVectorImpl<DIGlobalVariableExpression *> &GVs) const;
+
   // Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Value *V) {
     return V->getValueID() == Value::GlobalVariableVal;
@@ -178,6 +191,6 @@ struct OperandTraits<GlobalVariable> :
 
 DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GlobalVariable, Value)
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_GLOBALVARIABLE_H
diff --git a/contrib/llvm/include/llvm/IR/IRBuilder.h b/contrib/llvm/include/llvm/IR/IRBuilder.h
index 016e9e1d2c50..1d9c16989de9 100644
--- a/contrib/llvm/include/llvm/IR/IRBuilder.h
+++ b/contrib/llvm/include/llvm/IR/IRBuilder.h
@@ -17,7 +17,6 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/None.h"
-#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/BasicBlock.h"
@@ -45,6 +44,8 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
+#include <algorithm>
+#include <functional>
 
 namespace llvm {
 
@@ -67,6 +68,23 @@ protected:
   }
 };
 
+/// Provides an 'InsertHelper' that calls a user-provided callback after
+/// performing the default insertion.
+class IRBuilderCallbackInserter : IRBuilderDefaultInserter {
+  std::function<void(Instruction *)> Callback;
+
+public:
+  IRBuilderCallbackInserter(std::function<void(Instruction *)> Callback)
+      : Callback(Callback) {}
+
+protected:
+  void InsertHelper(Instruction *I, const Twine &Name,
+                    BasicBlock *BB, BasicBlock::iterator InsertPt) const {
+    IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
+    Callback(I);
+  }
+};
+
 /// \brief Common base class shared among various IRBuilders.
 class IRBuilderBase {
   DebugLoc CurDbgLocation;
@@ -84,7 +102,7 @@ protected:
 public:
   IRBuilderBase(LLVMContext &context, MDNode *FPMathTag = nullptr,
                 ArrayRef<OperandBundleDef> OpBundles = None)
-      : Context(context), DefaultFPMathTag(FPMathTag), FMF(),
+      : Context(context), DefaultFPMathTag(FPMathTag),
         DefaultOperandBundles(OpBundles) {
     ClearInsertionPoint();
   }
@@ -97,7 +115,7 @@ public:
   /// inserted into a block.
   void ClearInsertionPoint() {
     BB = nullptr;
-    InsertPt.reset(nullptr);
+    InsertPt = BasicBlock::iterator();
   }
 
   BasicBlock *GetInsertBlock() const { return BB; }
@@ -148,12 +166,12 @@ public:
 
   /// InsertPoint - A saved insertion point.
   class InsertPoint {
-    BasicBlock *Block;
+    BasicBlock *Block = nullptr;
     BasicBlock::iterator Point;
 
   public:
     /// \brief Creates a new insertion point which doesn't point to anything.
-    InsertPoint() : Block(nullptr) {}
+    InsertPoint() = default;
 
     /// \brief Creates a new insertion point at the given location.
     InsertPoint(BasicBlock *InsertBlock, BasicBlock::iterator InsertPoint)
@@ -162,8 +180,8 @@ public:
     /// \brief Returns true if this insert point is set.
     bool isSet() const { return (Block != nullptr); }
 
-    llvm::BasicBlock *getBlock() const { return Block; }
-    llvm::BasicBlock::iterator getPoint() const { return Point; }
+    BasicBlock *getBlock() const { return Block; }
+    BasicBlock::iterator getPoint() const { return Point; }
   };
 
   /// \brief Returns the current insert point.
@@ -213,14 +231,14 @@ public:
     BasicBlock::iterator Point;
     DebugLoc DbgLoc;
 
-    InsertPointGuard(const InsertPointGuard &) = delete;
-    InsertPointGuard &operator=(const InsertPointGuard &) = delete;
-
   public:
     InsertPointGuard(IRBuilderBase &B)
         : Builder(B), Block(B.GetInsertBlock()), Point(B.GetInsertPoint()),
           DbgLoc(B.getCurrentDebugLocation()) {}
 
+    InsertPointGuard(const InsertPointGuard &) = delete;
+    InsertPointGuard &operator=(const InsertPointGuard &) = delete;
+
     ~InsertPointGuard() {
       Builder.restoreIP(InsertPoint(Block, Point));
       Builder.SetCurrentDebugLocation(DbgLoc);
@@ -234,14 +252,13 @@ public:
     FastMathFlags FMF;
     MDNode *FPMathTag;
 
-    FastMathFlagGuard(const FastMathFlagGuard &) = delete;
-    FastMathFlagGuard &operator=(
-        const FastMathFlagGuard &) = delete;
-
   public:
     FastMathFlagGuard(IRBuilderBase &B)
         : Builder(B), FMF(B.FMF), FPMathTag(B.DefaultFPMathTag) {}
 
+    FastMathFlagGuard(const FastMathFlagGuard &) = delete;
+    FastMathFlagGuard &operator=(const FastMathFlagGuard &) = delete;
+
     ~FastMathFlagGuard() {
       Builder.FMF = FMF;
       Builder.DefaultFPMathTag = FPMathTag;
@@ -446,6 +463,11 @@ public:
   /// If the pointer isn't i8* it will be converted.
   CallInst *CreateLifetimeEnd(Value *Ptr, ConstantInt *Size = nullptr);
 
+  /// Create a call to invariant.start intrinsic.
+  ///
+  /// If the pointer isn't i8* it will be converted.
+  CallInst *CreateInvariantStart(Value *Ptr, ConstantInt *Size = nullptr);
+
   /// \brief Create a call to Masked Load intrinsic
   CallInst *CreateMaskedLoad(Value *Ptr, unsigned Align, Value *Mask,
                              Value *PassThru = nullptr, const Twine &Name = "");
@@ -681,6 +703,19 @@ public:
                                     BranchWeights, Unpredictable));
   }
 
+  /// \brief Create a conditional 'br Cond, TrueDest, FalseDest'
+  /// instruction. Copy branch meta data if available.
+  BranchInst *CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False,
+                           Instruction *MDSrc) {
+    BranchInst *Br = BranchInst::Create(True, False, Cond);
+    if (MDSrc) {
+      unsigned WL[4] = {LLVMContext::MD_prof, LLVMContext::MD_unpredictable,
+                        LLVMContext::MD_make_implicit, LLVMContext::MD_dbg};
+      Br->copyMetadata(*MDSrc, makeArrayRef(&WL[0], 4));
+    }
+    return Insert(Br);
+  }
+
   /// \brief Create a switch instruction with the specified value, default dest,
   /// and with a hint for the number of cases that will be added (for efficient
   /// allocation).
@@ -1000,7 +1035,7 @@ public:
     if (Constant *LC = dyn_cast<Constant>(LHS))
       if (Constant *RC = dyn_cast<Constant>(RHS))
         return Insert(Folder.CreateBinOp(Opc, LC, RC), Name);
-    llvm::Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
+    Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
     if (isa<FPMathOperator>(BinOp))
       BinOp = AddFPMathAttributes(BinOp, FPMathTag, FMF);
     return Insert(BinOp, Name);
@@ -1410,12 +1445,6 @@ public:
     return CreateBitCast(V, DestTy, Name);
   }
 
-private:
-  // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
-  // compile time error, instead of converting the string to bool for the
-  // isSigned parameter.
-  Value *CreateIntCast(Value *, Type *, const char *) = delete;
-
 public:
   Value *CreateFPCast(Value *V, Type *DestTy, const Twine &Name = "") {
     if (V->getType() == DestTy)
@@ -1425,6 +1454,11 @@ public:
     return Insert(CastInst::CreateFPCast(V, DestTy), Name);
   }
 
+  // \brief Provided to resolve 'CreateIntCast(Ptr, Ptr, "...")', giving a
+  // compile time error, instead of converting the string to bool for the
+  // isSigned parameter.
+  Value *CreateIntCast(Value *, Type *, const char *) = delete;
+
   //===--------------------------------------------------------------------===//
   // Instruction creation methods: Compare Instructions
   //===--------------------------------------------------------------------===//
@@ -1549,7 +1583,7 @@ public:
     return CreateCall(FTy, Callee, Args, Name, FPMathTag);
   }
 
-  CallInst *CreateCall(llvm::FunctionType *FTy, Value *Callee,
+  CallInst *CreateCall(FunctionType *FTy, Value *Callee,
                        ArrayRef<Value *> Args, const Twine &Name = "",
                        MDNode *FPMathTag = nullptr) {
     CallInst *CI = CallInst::Create(FTy, Callee, Args, DefaultOperandBundles);
diff --git a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
index bc6de19a6c3a..0825e0696cac 100644
--- a/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
+++ b/contrib/llvm/include/llvm/IR/IRPrintingPasses.h
@@ -30,7 +30,7 @@ class Module;
 class ModulePass;
 class PreservedAnalyses;
 class raw_ostream;
-template <typename IRUnitT> class AnalysisManager;
+template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
 
 /// \brief Create and return a pass that writes the module to the specified
 /// \c raw_ostream.
diff --git a/contrib/llvm/include/llvm/IR/InlineAsm.h b/contrib/llvm/include/llvm/IR/InlineAsm.h
index 40ba830b8819..f95509b9b09a 100644
--- a/contrib/llvm/include/llvm/IR/InlineAsm.h
+++ b/contrib/llvm/include/llvm/IR/InlineAsm.h
@@ -18,15 +18,14 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Value.h"
+#include <cassert>
+#include <string>
 #include <vector>
 
 namespace llvm {
 
-class PointerType;
 class FunctionType;
-class Module;
-
-struct InlineAsmKeyType;
+class PointerType;
 template <class ConstantClass> class ConstantUniqueMap;
 
 class InlineAsm : public Value {
@@ -40,9 +39,6 @@ private:
   friend struct InlineAsmKeyType;
   friend class ConstantUniqueMap<InlineAsm>;
 
-  InlineAsm(const InlineAsm &) = delete;
-  void operator=(const InlineAsm&) = delete;
-
   std::string AsmString, Constraints;
   FunctionType *FTy;
   bool HasSideEffects;
@@ -59,6 +55,9 @@ private:
   void destroyConstant();
 
 public:
+  InlineAsm(const InlineAsm &) = delete;
+  InlineAsm &operator=(const InlineAsm &) = delete;
+
   /// InlineAsm::get - Return the specified uniqued inline asm string.
   ///
   static InlineAsm *get(FunctionType *Ty, StringRef AsmString,
@@ -361,6 +360,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_INLINEASM_H
diff --git a/contrib/llvm/include/llvm/IR/InstIterator.h b/contrib/llvm/include/llvm/IR/InstIterator.h
index 1baca21c73af..28fc473f1490 100644
--- a/contrib/llvm/include/llvm/IR/InstIterator.h
+++ b/contrib/llvm/include/llvm/IR/InstIterator.h
@@ -19,8 +19,11 @@
 #ifndef LLVM_IR_INSTITERATOR_H
 #define LLVM_IR_INSTITERATOR_H
 
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/SymbolTableListTraits.h"
+#include <iterator>
 
 namespace llvm {
 
@@ -35,6 +38,7 @@ template <class BB_t, class BB_i_t, class BI_t, class II_t> class InstIterator {
   BB_t *BBs; // BasicBlocksType
   BB_i_t BB; // BasicBlocksType::iterator
   BI_t BI;   // BasicBlock::iterator
+
 public:
   typedef std::bidirectional_iterator_tag iterator_category;
   typedef IIty                            value_type;
@@ -43,7 +47,7 @@ public:
   typedef IIty&                           reference;
 
   // Default constructor
-  InstIterator() {}
+  InstIterator() = default;
 
   // Copy constructor...
   template<typename A, typename B, typename C, typename D>
@@ -97,7 +101,7 @@ public:
     --BI;
     return *this;
   }
-  inline InstIterator  operator--(int) {
+  inline InstIterator operator--(int) {
     InstIterator tmp = *this; --*this; return tmp;
   }
 
@@ -152,6 +156,6 @@ inline const_inst_range instructions(const Function &F) {
   return const_inst_range(inst_begin(F), inst_end(F));
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_INSTITERATOR_H
diff --git a/contrib/llvm/include/llvm/IR/InstrTypes.h b/contrib/llvm/include/llvm/IR/InstrTypes.h
index 39514c5675a7..f2abbec64fe6 100644
--- a/contrib/llvm/include/llvm/IR/InstrTypes.h
+++ b/contrib/llvm/include/llvm/IR/InstrTypes.h
@@ -16,18 +16,32 @@
 #ifndef LLVM_IR_INSTRTYPES_H
 #define LLVM_IR_INSTRTYPES_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/User.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <string>
+#include <vector>
 
 namespace llvm {
 
-class LLVMContext;
-
 //===----------------------------------------------------------------------===//
 //                            TerminatorInst Class
 //===----------------------------------------------------------------------===//
@@ -249,8 +263,8 @@ public:
   typedef SuccIterator<TerminatorInst *, BasicBlock> succ_iterator;
   typedef SuccIterator<const TerminatorInst *, const BasicBlock>
       succ_const_iterator;
-  typedef llvm::iterator_range<succ_iterator> succ_range;
-  typedef llvm::iterator_range<succ_const_iterator> succ_const_range;
+  typedef iterator_range<succ_iterator> succ_range;
+  typedef iterator_range<succ_const_iterator> succ_const_range;
 
 private:
   inline succ_iterator succ_begin() { return succ_iterator(this); }
@@ -276,8 +290,6 @@ public:
 //===----------------------------------------------------------------------===//
 
 class UnaryInstruction : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
-
 protected:
   UnaryInstruction(Type *Ty, unsigned iType, Value *V,
                    Instruction *IB = nullptr)
@@ -295,6 +307,8 @@ public:
     return User::operator new(s, 1);
   }
 
+  void *operator new(size_t, unsigned) = delete;
+
   // Out of line virtual method, so the vtable, etc has a home.
   ~UnaryInstruction() override;
 
@@ -326,8 +340,6 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(UnaryInstruction, Value)
 //===----------------------------------------------------------------------===//
 
 class BinaryOperator : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
-
 protected:
   void init(BinaryOps iType);
   BinaryOperator(BinaryOps iType, Value *S1, Value *S2, Type *Ty,
@@ -345,6 +357,8 @@ public:
     return User::operator new(s, 2);
   }
 
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -899,10 +913,6 @@ public:
     BAD_ICMP_PREDICATE = ICMP_SLE + 1
   };
 
-private:
-  void *operator new(size_t, unsigned) = delete;
-  CmpInst() = delete;
-
 protected:
   CmpInst(Type *ty, Instruction::OtherOps op, Predicate pred,
           Value *LHS, Value *RHS, const Twine &Name = "",
@@ -915,10 +925,15 @@ protected:
   void anchor() override; // Out of line virtual method.
 
 public:
+  CmpInst() = delete;
+
   // allocate space for exactly two operands
   void *operator new(size_t s) {
     return User::operator new(s, 2);
   }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Construct a compare instruction, given the opcode, the predicate and
   /// the two operands.  Optionally (if InstBefore is specified) insert the
   /// instruction into a BasicBlock right before the specified instruction.
@@ -957,6 +972,8 @@ public:
     return P >= FIRST_ICMP_PREDICATE && P <= LAST_ICMP_PREDICATE;
   }
 
+  static StringRef getPredicateName(Predicate P);
+
   bool isFPPredicate() const { return isFPPredicate(getPredicate()); }
   bool isIntPredicate() const { return isIntPredicate(getPredicate()); }
 
@@ -1189,7 +1206,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(FuncletPadInst, Value)
 struct OperandBundleUse {
   ArrayRef<Use> Inputs;
 
-  OperandBundleUse() {}
+  OperandBundleUse() = default;
   explicit OperandBundleUse(StringMapEntry<uint32_t> *Tag, ArrayRef<Use> Inputs)
       : Inputs(Inputs), Tag(Tag) {}
 
@@ -1202,7 +1219,7 @@ struct OperandBundleUse {
 
     // Conservative answer:  no operands have any attributes.
     return false;
-  };
+  }
 
   /// \brief Return the tag of this operand bundle as a string.
   StringRef getTagName() const {
@@ -1335,6 +1352,12 @@ public:
     return bundle_op_info_end()[-1].End;
   }
 
+  /// Return true if the operand at index \p Idx is a bundle operand.
+  bool isBundleOperand(unsigned Idx) const {
+    return hasOperandBundles() && Idx >= getBundleOperandsStartIndex() &&
+           Idx < getBundleOperandsEndIndex();
+  }
+
   /// \brief Return the total number operands (not operand bundles) used by
   /// every operand bundle in this OperandBundleUser.
   unsigned getNumTotalBundleOperands() const {
@@ -1471,14 +1494,14 @@ public:
 
     return std::equal(bundle_op_info_begin(), bundle_op_info_end(),
                       Other.bundle_op_info_begin());
-  };
+  }
 
   /// \brief Return true if this operand bundle user contains operand bundles
   /// with tags other than those specified in \p IDs.
   bool hasOperandBundlesOtherThan(ArrayRef<uint32_t> IDs) const {
     for (unsigned i = 0, e = getNumOperandBundles(); i != e; ++i) {
       uint32_t ID = getOperandBundleAt(i).getTagID();
-      if (std::find(IDs.begin(), IDs.end(), ID) == IDs.end())
+      if (!is_contained(IDs, ID))
         return true;
     }
     return false;
diff --git a/contrib/llvm/include/llvm/IR/Instruction.h b/contrib/llvm/include/llvm/IR/Instruction.h
index df4f8df78b12..fd7c54d69b63 100644
--- a/contrib/llvm/include/llvm/IR/Instruction.h
+++ b/contrib/llvm/include/llvm/IR/Instruction.h
@@ -17,27 +17,27 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/SymbolTableListTraits.h"
 #include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <utility>
 
 namespace llvm {
 
+class BasicBlock;
 class FastMathFlags;
-class LLVMContext;
 class MDNode;
-class BasicBlock;
 struct AAMDNodes;
 
-template <>
-struct SymbolTableListSentinelTraits<Instruction>
-    : public ilist_half_embedded_sentinel_traits<Instruction> {};
-
 class Instruction : public User,
                     public ilist_node_with_parent<Instruction, BasicBlock> {
-  void operator=(const Instruction &) = delete;
-  Instruction(const Instruction &) = delete;
-
   BasicBlock *Parent;
   DebugLoc DbgLoc;                         // 'dbg' Metadata cache.
 
@@ -46,7 +46,11 @@ class Instruction : public User,
     /// this instruction has metadata attached to it or not.
     HasMetadataBit = 1 << 15
   };
+
 public:
+  Instruction(const Instruction &) = delete;
+  Instruction &operator=(const Instruction &) = delete;
+
   // Out of line virtual method, so the vtable, etc has a home.
   ~Instruction() override;
 
@@ -94,6 +98,11 @@ public:
   /// the basic block that MovePos lives in, right before MovePos.
   void moveBefore(Instruction *MovePos);
 
+  /// Unlink this instruction and insert into BB before I.
+  ///
+  /// \pre I is a valid iterator into BB.
+  void moveBefore(BasicBlock &BB, SymbolTableList<Instruction>::iterator I);
+
   //===--------------------------------------------------------------------===//
   // Subclass classification.
   //===--------------------------------------------------------------------===//
@@ -133,6 +142,11 @@ public:
     return getOpcode() == AShr;
   }
 
+  /// Return true if this is and/or/xor.
+  inline bool isBitwiseLogicOp() const {
+    return getOpcode() == And || getOpcode() == Or || getOpcode() == Xor;
+  }
+
   /// Determine if the OpCode is one of the CastInst instructions.
   static inline bool isCast(unsigned OpCode) {
     return OpCode >= CastOpsBegin && OpCode < CastOpsEnd;
@@ -197,6 +211,17 @@ public:
   void setMetadata(unsigned KindID, MDNode *Node);
   void setMetadata(StringRef Kind, MDNode *Node);
 
+  /// Copy metadata from \p SrcInst to this instruction. \p WL, if not empty,
+  /// specifies the list of meta data that needs to be copied. If \p WL is
+  /// empty, all meta data will be copied.
+  void copyMetadata(const Instruction &SrcInst,
+                    ArrayRef<unsigned> WL = ArrayRef<unsigned>());
+
+  /// If the instruction has "branch_weights" MD_prof metadata and the MDNode
+  /// has three operands (including name string), swap the order of the
+  /// metadata.
+  void swapProfMetadata();
+
   /// Drop all unknown metadata except for debug locations.
   /// @{
   /// Passes are required to drop metadata they don't understand. This is a
@@ -220,12 +245,12 @@ public:
   /// Retrieve the raw weight values of a conditional branch or select.
   /// Returns true on success with profile weights filled in.
   /// Returns false if no metadata or invalid metadata was found.
-  bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal);
+  bool extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) const;
 
   /// Retrieve total raw weight values of a branch.
   /// Returns true on success with profile total weights filled in.
   /// Returns false if no metadata was found.
-  bool extractProfTotalWeight(uint64_t &TotalVal);
+  bool extractProfTotalWeight(uint64_t &TotalVal) const;
 
   /// Set the debug location information for this instruction.
   void setDebugLoc(DebugLoc Loc) { DbgLoc = std::move(Loc); }
@@ -335,12 +360,12 @@ private:
       SmallVectorImpl<std::pair<unsigned, MDNode *>> &) const;
   /// Clear all hashtable-based metadata from this instruction.
   void clearMetadataHashEntries();
+
 public:
   //===--------------------------------------------------------------------===//
   // Predicates and helper methods.
   //===--------------------------------------------------------------------===//
 
-
   /// Return true if the instruction is associative:
   ///
   ///   Associative operators satisfy:  x op (y op z) === (x op y) op z
@@ -450,7 +475,7 @@ public:
   bool isIdenticalTo(const Instruction *I) const;
 
   /// This is like isIdenticalTo, except that it ignores the
-  /// SubclassOptionalData flags, which specify conditions under which the
+  /// SubclassOptionalData flags, which may specify conditions under which the
   /// instruction's result is undefined.
   bool isIdenticalToWhenDefined(const Instruction *I) const;
 
@@ -529,12 +554,16 @@ public:
 #define   LAST_OTHER_INST(N)             OtherOpsEnd = N+1
 #include "llvm/IR/Instruction.def"
   };
+
 private:
+  friend class SymbolTableListTraits<Instruction>;
+
   // Shadow Value::setValueSubclassData with a private forwarding method so that
   // subclasses cannot accidentally use it.
   void setValueSubclassData(unsigned short D) {
     Value::setValueSubclassData(D);
   }
+
   unsigned short getSubclassDataFromValue() const {
     return Value::getSubclassDataFromValue();
   }
@@ -544,8 +573,8 @@ private:
                          (V ? HasMetadataBit : 0));
   }
 
-  friend class SymbolTableListTraits<Instruction>;
   void setParent(BasicBlock *P);
+
 protected:
   // Instruction subclasses can stick up to 15 bits of stuff into the
   // SubclassData field of instruction with these members.
@@ -570,18 +599,6 @@ private:
   Instruction *cloneImpl() const;
 };
 
-// Instruction* is only 4-byte aligned.
-template<>
-class PointerLikeTypeTraits<Instruction*> {
-  typedef Instruction* PT;
-public:
-  static inline void *getAsVoidPointer(PT P) { return P; }
-  static inline PT getFromVoidPointer(void *P) {
-    return static_cast<PT>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_INSTRUCTION_H
diff --git a/contrib/llvm/include/llvm/IR/Instructions.h b/contrib/llvm/include/llvm/IR/Instructions.h
index be077725f7bc..a5d78a08171a 100644
--- a/contrib/llvm/include/llvm/IR/Instructions.h
+++ b/contrib/llvm/include/llvm/IR/Instructions.h
@@ -17,23 +17,33 @@
 #define LLVM_IR_INSTRUCTIONS_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
 #include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
-#include <iterator>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 
 namespace llvm {
 
 class APInt;
 class ConstantInt;
-class ConstantRange;
 class DataLayout;
 class LLVMContext;
 
@@ -46,14 +56,14 @@ enum SynchronizationScope {
 //                                AllocaInst Class
 //===----------------------------------------------------------------------===//
 
-/// AllocaInst - an instruction to allocate memory on the stack
-///
+/// an instruction to allocate memory on the stack
 class AllocaInst : public UnaryInstruction {
   Type *AllocatedType;
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   AllocaInst *cloneImpl() const;
 
 public:
@@ -74,65 +84,56 @@ public:
   // Out of line virtual method, so the vtable, etc. has a home.
   ~AllocaInst() override;
 
-  /// isArrayAllocation - Return true if there is an allocation size parameter
-  /// to the allocation instruction that is not 1.
-  ///
+  /// Return true if there is an allocation size parameter to the allocation
+  /// instruction that is not 1.
   bool isArrayAllocation() const;
 
-  /// getArraySize - Get the number of elements allocated. For a simple
-  /// allocation of a single element, this will return a constant 1 value.
-  ///
+  /// Get the number of elements allocated. For a simple allocation of a single
+  /// element, this will return a constant 1 value.
   const Value *getArraySize() const { return getOperand(0); }
   Value *getArraySize() { return getOperand(0); }
 
-  /// getType - Overload to return most specific pointer type
-  ///
+  /// Overload to return most specific pointer type.
   PointerType *getType() const {
     return cast<PointerType>(Instruction::getType());
   }
 
-  /// getAllocatedType - Return the type that is being allocated by the
-  /// instruction.
-  ///
+  /// Return the type that is being allocated by the instruction.
   Type *getAllocatedType() const { return AllocatedType; }
-  /// \brief for use only in special circumstances that need to generically
+  /// for use only in special circumstances that need to generically
   /// transform a whole instruction (eg: IR linking and vectorization).
   void setAllocatedType(Type *Ty) { AllocatedType = Ty; }
 
-  /// getAlignment - Return the alignment of the memory that is being allocated
-  /// by the instruction.
-  ///
+  /// Return the alignment of the memory that is being allocated by the
+  /// instruction.
   unsigned getAlignment() const {
     return (1u << (getSubclassDataFromInstruction() & 31)) >> 1;
   }
   void setAlignment(unsigned Align);
 
-  /// isStaticAlloca - Return true if this alloca is in the entry block of the
-  /// function and is a constant size.  If so, the code generator will fold it
-  /// into the prolog/epilog code, so it is basically free.
+  /// Return true if this alloca is in the entry block of the function and is a
+  /// constant size. If so, the code generator will fold it into the
+  /// prolog/epilog code, so it is basically free.
   bool isStaticAlloca() const;
 
-  /// \brief Return true if this alloca is used as an inalloca argument to a
-  /// call.  Such allocas are never considered static even if they are in the
-  /// entry block.
+  /// Return true if this alloca is used as an inalloca argument to a call. Such
+  /// allocas are never considered static even if they are in the entry block.
   bool isUsedWithInAlloca() const {
     return getSubclassDataFromInstruction() & 32;
   }
 
-  /// \brief Specify whether this alloca is used to represent the arguments to
-  /// a call.
+  /// Specify whether this alloca is used to represent the arguments to a call.
   void setUsedWithInAlloca(bool V) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~32) |
                                (V ? 32 : 0));
   }
 
-  /// \brief Return true if this alloca is used as a swifterror argument to a
-  /// call.
+  /// Return true if this alloca is used as a swifterror argument to a call.
   bool isSwiftError() const {
     return getSubclassDataFromInstruction() & 64;
   }
 
-  /// \brief Specify whether this alloca is used to represent a swifterror.
+  /// Specify whether this alloca is used to represent a swifterror.
   void setSwiftError(bool V) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~64) |
                                (V ? 64 : 0));
@@ -158,15 +159,15 @@ private:
 //                                LoadInst Class
 //===----------------------------------------------------------------------===//
 
-/// LoadInst - an instruction for reading from memory.  This uses the
-/// SubclassData field in Value to store whether or not the load is volatile.
-///
+/// An instruction for reading from memory. This uses the SubclassData field in
+/// Value to store whether or not the load is volatile.
 class LoadInst : public UnaryInstruction {
   void AssertOK();
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   LoadInst *cloneImpl() const;
 
 public:
@@ -201,7 +202,6 @@ public:
            unsigned Align, AtomicOrdering Order,
            SynchronizationScope SynchScope,
            BasicBlock *InsertAtEnd);
-
   LoadInst(Value *Ptr, const char *NameStr, Instruction *InsertBefore);
   LoadInst(Value *Ptr, const char *NameStr, BasicBlock *InsertAtEnd);
   LoadInst(Type *Ty, Value *Ptr, const char *NameStr = nullptr,
@@ -214,20 +214,16 @@ public:
   LoadInst(Value *Ptr, const char *NameStr, bool isVolatile,
            BasicBlock *InsertAtEnd);
 
-  /// isVolatile - Return true if this is a load from a volatile memory
-  /// location.
-  ///
+  /// Return true if this is a load from a volatile memory location.
   bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
 
-  /// setVolatile - Specify whether this is a volatile load or not.
-  ///
+  /// Specify whether this is a volatile load or not.
   void setVolatile(bool V) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
                                (V ? 1 : 0));
   }
 
-  /// getAlignment - Return the alignment of the access that is being performed
-  ///
+  /// Return the alignment of the access that is being performed.
   unsigned getAlignment() const {
     return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
   }
@@ -275,7 +271,7 @@ public:
   const Value *getPointerOperand() const { return getOperand(0); }
   static unsigned getPointerOperandIndex() { return 0U; }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getPointerAddressSpace() const {
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
@@ -300,22 +296,17 @@ private:
 //                                StoreInst Class
 //===----------------------------------------------------------------------===//
 
-/// StoreInst - an instruction for storing to memory
-///
+/// An instruction for storing to memory.
 class StoreInst : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
   void AssertOK();
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   StoreInst *cloneImpl() const;
 
 public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
   StoreInst(Value *Val, Value *Ptr, Instruction *InsertBefore);
   StoreInst(Value *Val, Value *Ptr, BasicBlock *InsertAtEnd);
   StoreInst(Value *Val, Value *Ptr, bool isVolatile = false,
@@ -334,13 +325,17 @@ public:
             SynchronizationScope SynchScope,
             BasicBlock *InsertAtEnd);
 
-  /// isVolatile - Return true if this is a store to a volatile memory
-  /// location.
-  ///
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
+  /// Return true if this is a store to a volatile memory location.
   bool isVolatile() const { return getSubclassDataFromInstruction() & 1; }
 
-  /// setVolatile - Specify whether this is a volatile store or not.
-  ///
+  /// Specify whether this is a volatile store or not.
   void setVolatile(bool V) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
                                (V ? 1 : 0));
@@ -349,8 +344,7 @@ public:
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// getAlignment - Return the alignment of the access that is being performed
-  ///
+  /// Return the alignment of the access that is being performed
   unsigned getAlignment() const {
     return (1 << ((getSubclassDataFromInstruction() >> 1) & 31)) >> 1;
   }
@@ -401,7 +395,7 @@ public:
   const Value *getPointerOperand() const { return getOperand(1); }
   static unsigned getPointerOperandIndex() { return 1U; }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getPointerAddressSpace() const {
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
@@ -432,23 +426,17 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(StoreInst, Value)
 //                                FenceInst Class
 //===----------------------------------------------------------------------===//
 
-/// FenceInst - an instruction for ordering other memory operations
-///
+/// An instruction for ordering other memory operations.
 class FenceInst : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
   void Init(AtomicOrdering Ordering, SynchronizationScope SynchScope);
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   FenceInst *cloneImpl() const;
 
 public:
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 0);
-  }
-
   // Ordering may only be Acquire, Release, AcquireRelease, or
   // SequentiallyConsistent.
   FenceInst(LLVMContext &C, AtomicOrdering Ordering,
@@ -458,6 +446,13 @@ public:
             SynchronizationScope SynchScope,
             BasicBlock *InsertAtEnd);
 
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 0);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Returns the ordering effect of this fence.
   AtomicOrdering getOrdering() const {
     return AtomicOrdering(getSubclassDataFromInstruction() >> 1);
@@ -502,12 +497,11 @@ private:
 //                                AtomicCmpXchgInst Class
 //===----------------------------------------------------------------------===//
 
-/// AtomicCmpXchgInst - an instruction that atomically checks whether a
+/// an instruction that atomically checks whether a
 /// specified value is in a memory location, and, if it is, stores a new value
 /// there.  Returns the value that was loaded.
 ///
 class AtomicCmpXchgInst : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
   void Init(Value *Ptr, Value *Cmp, Value *NewVal,
             AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering,
             SynchronizationScope SynchScope);
@@ -515,13 +509,10 @@ class AtomicCmpXchgInst : public Instruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   AtomicCmpXchgInst *cloneImpl() const;
 
 public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
   AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal,
                     AtomicOrdering SuccessOrdering,
                     AtomicOrdering FailureOrdering,
@@ -533,14 +524,21 @@ public:
                     SynchronizationScope SynchScope,
                     BasicBlock *InsertAtEnd);
 
-  /// isVolatile - Return true if this is a cmpxchg from a volatile memory
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
+  /// Return true if this is a cmpxchg from a volatile memory
   /// location.
   ///
   bool isVolatile() const {
     return getSubclassDataFromInstruction() & 1;
   }
 
-  /// setVolatile - Specify whether this is a volatile cmpxchg.
+  /// Specify whether this is a volatile cmpxchg.
   ///
   void setVolatile(bool V) {
      setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
@@ -609,12 +607,12 @@ public:
   Value *getNewValOperand() { return getOperand(2); }
   const Value *getNewValOperand() const { return getOperand(2); }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getPointerAddressSpace() const {
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
 
-  /// \brief Returns the strongest permitted ordering on failure, given the
+  /// Returns the strongest permitted ordering on failure, given the
   /// desired ordering on success.
   ///
   /// If the comparison in a cmpxchg operation fails, there is no atomic store
@@ -664,16 +662,15 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(AtomicCmpXchgInst, Value)
 //                                AtomicRMWInst Class
 //===----------------------------------------------------------------------===//
 
-/// AtomicRMWInst - an instruction that atomically reads a memory location,
+/// an instruction that atomically reads a memory location,
 /// combines it with another value, and then stores the result back.  Returns
 /// the old value.
 ///
 class AtomicRMWInst : public Instruction {
-  void *operator new(size_t, unsigned) = delete;
-
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   AtomicRMWInst *cloneImpl() const;
 
 public:
@@ -710,10 +707,6 @@ public:
     BAD_BINOP
   };
 
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
   AtomicRMWInst(BinOp Operation, Value *Ptr, Value *Val,
                 AtomicOrdering Ordering, SynchronizationScope SynchScope,
                 Instruction *InsertBefore = nullptr);
@@ -721,6 +714,13 @@ public:
                 AtomicOrdering Ordering, SynchronizationScope SynchScope,
                 BasicBlock *InsertAtEnd);
 
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   BinOp getOperation() const {
     return static_cast<BinOp>(getSubclassDataFromInstruction() >> 5);
   }
@@ -731,13 +731,13 @@ public:
                                (Operation << 5));
   }
 
-  /// isVolatile - Return true if this is a RMW on a volatile memory location.
+  /// Return true if this is a RMW on a volatile memory location.
   ///
   bool isVolatile() const {
     return getSubclassDataFromInstruction() & 1;
   }
 
-  /// setVolatile - Specify whether this is a volatile RMW or not.
+  /// Specify whether this is a volatile RMW or not.
   ///
   void setVolatile(bool V) {
      setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
@@ -781,7 +781,7 @@ public:
   Value *getValOperand() { return getOperand(1); }
   const Value *getValOperand() const { return getOperand(1); }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getPointerAddressSpace() const {
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
@@ -797,6 +797,7 @@ public:
 private:
   void Init(BinOp Operation, Value *Ptr, Value *Val,
             AtomicOrdering Ordering, SynchronizationScope SynchScope);
+
   // Shadow Instruction::setInstructionSubclassData with a private forwarding
   // method so that subclasses cannot accidentally use it.
   void setInstructionSubclassData(unsigned short D) {
@@ -823,7 +824,7 @@ inline Type *checkGEPType(Type *Ty) {
   return Ty;
 }
 
-/// GetElementPtrInst - an instruction for type-safe pointer arithmetic to
+/// an instruction for type-safe pointer arithmetic to
 /// access elements of arrays and structs
 ///
 class GetElementPtrInst : public Instruction {
@@ -849,6 +850,7 @@ class GetElementPtrInst : public Instruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   GetElementPtrInst *cloneImpl() const;
 
 public:
@@ -867,6 +869,7 @@ public:
     return new (Values) GetElementPtrInst(PointeeType, Ptr, IdxList, Values,
                                           NameStr, InsertBefore);
   }
+
   static GetElementPtrInst *Create(Type *PointeeType, Value *Ptr,
                                    ArrayRef<Value *> IdxList,
                                    const Twine &NameStr,
@@ -891,6 +894,7 @@ public:
                                            Instruction *InsertBefore = nullptr){
     return CreateInBounds(nullptr, Ptr, IdxList, NameStr, InsertBefore);
   }
+
   static GetElementPtrInst *
   CreateInBounds(Type *PointeeType, Value *Ptr, ArrayRef<Value *> IdxList,
                  const Twine &NameStr = "",
@@ -900,12 +904,14 @@ public:
     GEP->setIsInBounds(true);
     return GEP;
   }
+
   static GetElementPtrInst *CreateInBounds(Value *Ptr,
                                            ArrayRef<Value *> IdxList,
                                            const Twine &NameStr,
                                            BasicBlock *InsertAtEnd) {
     return CreateInBounds(nullptr, Ptr, IdxList, NameStr, InsertAtEnd);
   }
+
   static GetElementPtrInst *CreateInBounds(Type *PointeeType, Value *Ptr,
                                            ArrayRef<Value *> IdxList,
                                            const Twine &NameStr,
@@ -919,11 +925,6 @@ public:
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  // getType - Overload to return most specific sequential type.
-  SequentialType *getType() const {
-    return cast<SequentialType>(Instruction::getType());
-  }
-
   Type *getSourceElementType() const { return SourceElementType; }
 
   void setSourceElementType(Type *Ty) { SourceElementType = Ty; }
@@ -935,14 +936,14 @@ public:
     return ResultElementType;
   }
 
-  /// \brief Returns the address space of this instruction's pointer type.
+  /// Returns the address space of this instruction's pointer type.
   unsigned getAddressSpace() const {
     // Note that this is always the same as the pointer operand's address space
     // and that is cheaper to compute, so cheat here.
     return getPointerAddressSpace();
   }
 
-  /// getIndexedType - Returns the type of the element that would be loaded with
+  /// Returns the type of the element that would be loaded with
   /// a load instruction with the specified parameters.
   ///
   /// Null is returned if the indices are invalid for the specified
@@ -967,23 +968,23 @@ public:
     return 0U;    // get index for modifying correct operand.
   }
 
-  /// getPointerOperandType - Method to return the pointer operand as a
+  /// Method to return the pointer operand as a
   /// PointerType.
   Type *getPointerOperandType() const {
     return getPointerOperand()->getType();
   }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getPointerAddressSpace() const {
     return getPointerOperandType()->getPointerAddressSpace();
   }
 
-  /// GetGEPReturnType - Returns the pointer type returned by the GEP
+  /// Returns the pointer type returned by the GEP
   /// instruction, which may be a vector of pointers.
   static Type *getGEPReturnType(Value *Ptr, ArrayRef<Value *> IdxList) {
     return getGEPReturnType(
-        cast<PointerType>(Ptr->getType()->getScalarType())->getElementType(),
-        Ptr, IdxList);
+      cast<PointerType>(Ptr->getType()->getScalarType())->getElementType(),
+      Ptr, IdxList);
   }
   static Type *getGEPReturnType(Type *ElTy, Value *Ptr,
                                 ArrayRef<Value *> IdxList) {
@@ -1011,24 +1012,24 @@ public:
     return getNumOperands() > 1;
   }
 
-  /// hasAllZeroIndices - Return true if all of the indices of this GEP are
+  /// Return true if all of the indices of this GEP are
   /// zeros.  If so, the result pointer and the first operand have the same
   /// value, just potentially different types.
   bool hasAllZeroIndices() const;
 
-  /// hasAllConstantIndices - Return true if all of the indices of this GEP are
+  /// Return true if all of the indices of this GEP are
   /// constant integers.  If so, the result pointer and the first operand have
   /// a constant offset between them.
   bool hasAllConstantIndices() const;
 
-  /// setIsInBounds - Set or clear the inbounds flag on this GEP instruction.
+  /// Set or clear the inbounds flag on this GEP instruction.
   /// See LangRef.html for the meaning of inbounds on a getelementptr.
   void setIsInBounds(bool b = true);
 
-  /// isInBounds - Determine whether the GEP has the inbounds flag.
+  /// Determine whether the GEP has the inbounds flag.
   bool isInBounds() const;
 
-  /// \brief Accumulate the constant address offset of this GEP if possible.
+  /// Accumulate the constant address offset of this GEP if possible.
   ///
   /// This routine accepts an APInt into which it will accumulate the constant
   /// offset of this GEP if the GEP is in fact constant. If the GEP is not
@@ -1065,6 +1066,7 @@ GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
          cast<PointerType>(getType()->getScalarType())->getElementType());
   init(Ptr, IdxList, NameStr);
 }
+
 GetElementPtrInst::GetElementPtrInst(Type *PointeeType, Value *Ptr,
                                      ArrayRef<Value *> IdxList, unsigned Values,
                                      const Twine &NameStr,
@@ -1088,7 +1090,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(GetElementPtrInst, Value)
 /// This instruction compares its operands according to the predicate given
 /// to the constructor. It only operates on integers or pointers. The operands
 /// must be identical types.
-/// \brief Represent an integer comparison operator.
+/// Represent an integer comparison operator.
 class ICmpInst: public CmpInst {
   void anchor() override;
 
@@ -1107,11 +1109,12 @@ class ICmpInst: public CmpInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical ICmpInst
+
+  /// Clone an identical ICmpInst
   ICmpInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics.
+  /// Constructor with insert-before-instruction semantics.
   ICmpInst(
     Instruction *InsertBefore,  ///< Where to insert
     Predicate pred,  ///< The predicate to use for the comparison
@@ -1126,7 +1129,7 @@ public:
 #endif
   }
 
-  /// \brief Constructor with insert-at-end semantics.
+  /// Constructor with insert-at-end semantics.
   ICmpInst(
     BasicBlock &InsertAtEnd, ///< Block to insert into.
     Predicate pred,  ///< The predicate to use for the comparison
@@ -1141,7 +1144,7 @@ public:
 #endif
   }
 
-  /// \brief Constructor with no-insertion semantics
+  /// Constructor with no-insertion semantics
   ICmpInst(
     Predicate pred, ///< The predicate to use for the comparison
     Value *LHS,     ///< The left-hand-side of the expression
@@ -1157,64 +1160,60 @@ public:
   /// For example, EQ->EQ, SLE->SLE, UGT->SGT, etc.
   /// @returns the predicate that would be the result if the operand were
   /// regarded as signed.
-  /// \brief Return the signed version of the predicate
+  /// Return the signed version of the predicate
   Predicate getSignedPredicate() const {
     return getSignedPredicate(getPredicate());
   }
 
   /// This is a static version that you can use without an instruction.
-  /// \brief Return the signed version of the predicate.
+  /// Return the signed version of the predicate.
   static Predicate getSignedPredicate(Predicate pred);
 
   /// For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
   /// @returns the predicate that would be the result if the operand were
   /// regarded as unsigned.
-  /// \brief Return the unsigned version of the predicate
+  /// Return the unsigned version of the predicate
   Predicate getUnsignedPredicate() const {
     return getUnsignedPredicate(getPredicate());
   }
 
   /// This is a static version that you can use without an instruction.
-  /// \brief Return the unsigned version of the predicate.
+  /// Return the unsigned version of the predicate.
   static Predicate getUnsignedPredicate(Predicate pred);
 
-  /// isEquality - Return true if this predicate is either EQ or NE.  This also
+  /// Return true if this predicate is either EQ or NE.  This also
   /// tests for commutativity.
   static bool isEquality(Predicate P) {
     return P == ICMP_EQ || P == ICMP_NE;
   }
 
-  /// isEquality - Return true if this predicate is either EQ or NE.  This also
+  /// Return true if this predicate is either EQ or NE.  This also
   /// tests for commutativity.
   bool isEquality() const {
     return isEquality(getPredicate());
   }
 
   /// @returns true if the predicate of this ICmpInst is commutative
-  /// \brief Determine if this relation is commutative.
+  /// Determine if this relation is commutative.
   bool isCommutative() const { return isEquality(); }
 
-  /// isRelational - Return true if the predicate is relational (not EQ or NE).
+  /// Return true if the predicate is relational (not EQ or NE).
   ///
   bool isRelational() const {
     return !isEquality();
   }
 
-  /// isRelational - Return true if the predicate is relational (not EQ or NE).
+  /// Return true if the predicate is relational (not EQ or NE).
   ///
   static bool isRelational(Predicate P) {
     return !isEquality(P);
   }
 
-  /// Initialize a set of values that all satisfy the predicate with C.
-  /// \brief Make a ConstantRange for a relation with a constant value.
-  static ConstantRange makeConstantRange(Predicate pred, const APInt &C);
-
   /// Exchange the two operands to this instruction in such a way that it does
   /// not modify the semantics of the instruction. The predicate value may be
   /// changed to retain the same result if the predicate is order dependent
   /// (e.g. ult).
-  /// \brief Swap operands and adjust predicate.
+  /// Swap operands and adjust predicate.
   void swapOperands() {
     setPredicate(getSwappedPredicate());
     Op<0>().swap(Op<1>());
@@ -1236,16 +1235,17 @@ public:
 /// This instruction compares its operands according to the predicate given
 /// to the constructor. It only operates on floating point values or packed
 /// vectors of floating point values. The operands must be identical types.
-/// \brief Represents a floating point comparison operator.
+/// Represents a floating point comparison operator.
 class FCmpInst: public CmpInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical FCmpInst
+
+  /// Clone an identical FCmpInst
   FCmpInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics.
+  /// Constructor with insert-before-instruction semantics.
   FCmpInst(
     Instruction *InsertBefore, ///< Where to insert
     Predicate pred,  ///< The predicate to use for the comparison
@@ -1264,7 +1264,7 @@ public:
            "Invalid operand types for FCmp instruction");
   }
 
-  /// \brief Constructor with insert-at-end semantics.
+  /// Constructor with insert-at-end semantics.
   FCmpInst(
     BasicBlock &InsertAtEnd, ///< Block to insert into.
     Predicate pred,  ///< The predicate to use for the comparison
@@ -1283,7 +1283,7 @@ public:
            "Invalid operand types for FCmp instruction");
   }
 
-  /// \brief Constructor with no-insertion semantics
+  /// Constructor with no-insertion semantics
   FCmpInst(
     Predicate pred, ///< The predicate to use for the comparison
     Value *LHS,     ///< The left-hand-side of the expression
@@ -1301,18 +1301,18 @@ public:
   }
 
   /// @returns true if the predicate of this instruction is EQ or NE.
-  /// \brief Determine if this is an equality predicate.
+  /// Determine if this is an equality predicate.
   static bool isEquality(Predicate Pred) {
     return Pred == FCMP_OEQ || Pred == FCMP_ONE || Pred == FCMP_UEQ ||
            Pred == FCMP_UNE;
   }
 
   /// @returns true if the predicate of this instruction is EQ or NE.
-  /// \brief Determine if this is an equality predicate.
+  /// Determine if this is an equality predicate.
   bool isEquality() const { return isEquality(getPredicate()); }
 
   /// @returns true if the predicate of this instruction is commutative.
-  /// \brief Determine if this is a commutative predicate.
+  /// Determine if this is a commutative predicate.
   bool isCommutative() const {
     return isEquality() ||
            getPredicate() == FCMP_FALSE ||
@@ -1322,20 +1322,20 @@ public:
   }
 
   /// @returns true if the predicate is relational (not EQ or NE).
-  /// \brief Determine if this a relational predicate.
+  /// Determine if this a relational predicate.
   bool isRelational() const { return !isEquality(); }
 
   /// Exchange the two operands to this instruction in such a way that it does
   /// not modify the semantics of the instruction. The predicate value may be
   /// changed to retain the same result if the predicate is order dependent
   /// (e.g. ult).
-  /// \brief Swap operands and adjust predicate.
+  /// Swap operands and adjust predicate.
   void swapOperands() {
     setPredicate(getSwappedPredicate());
     Op<0>().swap(Op<1>());
   }
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::FCmp;
   }
@@ -1345,31 +1345,26 @@ public:
 };
 
 //===----------------------------------------------------------------------===//
-/// CallInst - This class represents a function call, abstracting a target
+/// This class represents a function call, abstracting a target
 /// machine's calling convention.  This class uses low bit of the SubClassData
 /// field to indicate whether or not this is a tail call.  The rest of the bits
 /// hold the calling convention of the call.
 ///
 class CallInst : public Instruction,
                  public OperandBundleUser<CallInst, User::op_iterator> {
+  friend class OperandBundleUser<CallInst, User::op_iterator>;
+
   AttributeSet AttributeList; ///< parameter attributes for call
   FunctionType *FTy;
+
   CallInst(const CallInst &CI);
-  void init(Value *Func, ArrayRef<Value *> Args,
-            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
-    init(cast<FunctionType>(
-             cast<PointerType>(Func->getType())->getElementType()),
-         Func, Args, Bundles, NameStr);
-  }
-  void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
-            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
-  void init(Value *Func, const Twine &NameStr);
 
   /// Construct a CallInst given a range of arguments.
-  /// \brief Construct a CallInst from a range of arguments
+  /// Construct a CallInst from a range of arguments
   inline CallInst(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                   Instruction *InsertBefore);
+
   inline CallInst(Value *Func, ArrayRef<Value *> Args,
                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                   Instruction *InsertBefore)
@@ -1382,24 +1377,37 @@ class CallInst : public Instruction,
       : CallInst(Func, Args, None, NameStr, InsertBefore) {}
 
   /// Construct a CallInst given a range of arguments.
-  /// \brief Construct a CallInst from a range of arguments
+  /// Construct a CallInst from a range of arguments
   inline CallInst(Value *Func, ArrayRef<Value *> Args,
                   ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr,
                   BasicBlock *InsertAtEnd);
 
   explicit CallInst(Value *F, const Twine &NameStr,
                     Instruction *InsertBefore);
+
   CallInst(Value *F, const Twine &NameStr, BasicBlock *InsertAtEnd);
 
-  friend class OperandBundleUser<CallInst, User::op_iterator>;
+  void init(Value *Func, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr) {
+    init(cast<FunctionType>(
+             cast<PointerType>(Func->getType())->getElementType()),
+         Func, Args, Bundles, NameStr);
+  }
+  void init(FunctionType *FTy, Value *Func, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+  void init(Value *Func, const Twine &NameStr);
+
   bool hasDescriptor() const { return HasDescriptor; }
 
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   CallInst *cloneImpl() const;
 
 public:
+  ~CallInst() override;
+
   static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
                           ArrayRef<OperandBundleDef> Bundles = None,
                           const Twine &NameStr = "",
@@ -1408,6 +1416,7 @@ public:
                       cast<PointerType>(Func->getType())->getElementType()),
                   Func, Args, Bundles, NameStr, InsertBefore);
   }
+
   static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
                           const Twine &NameStr,
                           Instruction *InsertBefore = nullptr) {
@@ -1415,12 +1424,14 @@ public:
                       cast<PointerType>(Func->getType())->getElementType()),
                   Func, Args, None, NameStr, InsertBefore);
   }
+
   static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                           const Twine &NameStr,
                           Instruction *InsertBefore = nullptr) {
     return new (unsigned(Args.size() + 1))
         CallInst(Ty, Func, Args, None, NameStr, InsertBefore);
   }
+
   static CallInst *Create(FunctionType *Ty, Value *Func, ArrayRef<Value *> Args,
                           ArrayRef<OperandBundleDef> Bundles = None,
                           const Twine &NameStr = "",
@@ -1432,6 +1443,7 @@ public:
     return new (TotalOps, DescriptorBytes)
         CallInst(Ty, Func, Args, Bundles, NameStr, InsertBefore);
   }
+
   static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
                           ArrayRef<OperandBundleDef> Bundles,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
@@ -1442,21 +1454,24 @@ public:
     return new (TotalOps, DescriptorBytes)
         CallInst(Func, Args, Bundles, NameStr, InsertAtEnd);
   }
+
   static CallInst *Create(Value *Func, ArrayRef<Value *> Args,
                           const Twine &NameStr, BasicBlock *InsertAtEnd) {
     return new (unsigned(Args.size() + 1))
         CallInst(Func, Args, None, NameStr, InsertAtEnd);
   }
+
   static CallInst *Create(Value *F, const Twine &NameStr = "",
                           Instruction *InsertBefore = nullptr) {
     return new(1) CallInst(F, NameStr, InsertBefore);
   }
+
   static CallInst *Create(Value *F, const Twine &NameStr,
                           BasicBlock *InsertAtEnd) {
     return new(1) CallInst(F, NameStr, InsertAtEnd);
   }
 
-  /// \brief Create a clone of \p CI with a different set of operand bundles and
+  /// Create a clone of \p CI with a different set of operand bundles and
   /// insert it before \p InsertPt.
   ///
   /// The returned call instruction is identical \p CI in every way except that
@@ -1465,7 +1480,7 @@ public:
   static CallInst *Create(CallInst *CI, ArrayRef<OperandBundleDef> Bundles,
                           Instruction *InsertPt = nullptr);
 
-  /// CreateMalloc - Generate the IR for a call to malloc:
+  /// Generate the IR for a call to malloc:
   /// 1. Compute the malloc call's argument as the specified type's size,
   ///    possibly multiplied by the array size if the array size is not
   ///    constant 1.
@@ -1493,7 +1508,7 @@ public:
                                    ArrayRef<OperandBundleDef> Bundles = None,
                                    Function* MallocF = nullptr,
                                    const Twine &Name = "");
-  /// CreateFree - Generate the IR for a call to the builtin free function.
+  /// Generate the IR for a call to the builtin free function.
   static Instruction *CreateFree(Value *Source,
                                  Instruction *InsertBefore);
   static Instruction *CreateFree(Value *Source,
@@ -1505,8 +1520,6 @@ public:
                                  ArrayRef<OperandBundleDef> Bundles,
                                  BasicBlock *InsertAtEnd);
 
-  ~CallInst() override;
-
   FunctionType *getFunctionType() const { return FTy; }
 
   void mutateFunctionType(FunctionType *FTy) {
@@ -1520,20 +1533,25 @@ public:
   TailCallKind getTailCallKind() const {
     return TailCallKind(getSubclassDataFromInstruction() & 3);
   }
+
   bool isTailCall() const {
     unsigned Kind = getSubclassDataFromInstruction() & 3;
     return Kind == TCK_Tail || Kind == TCK_MustTail;
   }
+
   bool isMustTailCall() const {
     return (getSubclassDataFromInstruction() & 3) == TCK_MustTail;
   }
+
   bool isNoTailCall() const {
     return (getSubclassDataFromInstruction() & 3) == TCK_NoTail;
   }
+
   void setTailCall(bool isTC = true) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
                                unsigned(isTC ? TCK_Tail : TCK_None));
   }
+
   void setTailCallKind(TailCallKind TCK) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~3) |
                                unsigned(TCK));
@@ -1542,7 +1560,7 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// getNumArgOperands - Return the number of call arguments.
+  /// Return the number of call arguments.
   ///
   unsigned getNumArgOperands() const {
     return getNumOperands() - getNumTotalBundleOperands() - 1;
@@ -1559,35 +1577,35 @@ public:
     setOperand(i, v);
   }
 
-  /// \brief Return the iterator pointing to the beginning of the argument list.
+  /// Return the iterator pointing to the beginning of the argument list.
   op_iterator arg_begin() { return op_begin(); }
 
-  /// \brief Return the iterator pointing to the end of the argument list.
+  /// Return the iterator pointing to the end of the argument list.
   op_iterator arg_end() {
     // [ call args ], [ operand bundles ], callee
     return op_end() - getNumTotalBundleOperands() - 1;
-  };
+  }
 
-  /// \brief Iteration adapter for range-for loops.
+  /// Iteration adapter for range-for loops.
   iterator_range<op_iterator> arg_operands() {
     return make_range(arg_begin(), arg_end());
   }
 
-  /// \brief Return the iterator pointing to the beginning of the argument list.
+  /// Return the iterator pointing to the beginning of the argument list.
   const_op_iterator arg_begin() const { return op_begin(); }
 
-  /// \brief Return the iterator pointing to the end of the argument list.
+  /// Return the iterator pointing to the end of the argument list.
   const_op_iterator arg_end() const {
     // [ call args ], [ operand bundles ], callee
     return op_end() - getNumTotalBundleOperands() - 1;
-  };
+  }
 
-  /// \brief Iteration adapter for range-for loops.
+  /// Iteration adapter for range-for loops.
   iterator_range<const_op_iterator> arg_operands() const {
     return make_range(arg_begin(), arg_end());
   }
 
-  /// \brief Wrappers for getting the \c Use of a call argument.
+  /// Wrappers for getting the \c Use of a call argument.
   const Use &getArgOperandUse(unsigned i) const {
     assert(i < getNumArgOperands() && "Out of bounds!");
     return getOperandUse(i);
@@ -1613,61 +1631,59 @@ public:
                                (ID << 2));
   }
 
-  /// getAttributes - Return the parameter attributes for this call.
+  /// Return the parameter attributes for this call.
   ///
-  const AttributeSet &getAttributes() const { return AttributeList; }
+  AttributeSet getAttributes() const { return AttributeList; }
 
-  /// setAttributes - Set the parameter attributes for this call.
+  /// Set the parameter attributes for this call.
   ///
-  void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; }
+  void setAttributes(AttributeSet Attrs) { AttributeList = Attrs; }
 
-  /// addAttribute - adds the attribute to the list of attributes.
+  /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute::AttrKind Kind);
 
-  /// addAttribute - adds the attribute to the list of attributes.
-  void addAttribute(unsigned i, StringRef Kind, StringRef Value);
-
-  /// addAttribute - adds the attribute to the list of attributes.
+  /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute Attr);
 
-  /// removeAttribute - removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute::AttrKind Kind);
 
-  /// removeAttribute - removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, StringRef Kind);
 
-  /// removeAttribute - removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute Attr);
-
-  /// \brief adds the dereferenceable attribute to the list of attributes.
+  /// adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
-  /// \brief adds the dereferenceable_or_null attribute to the list of
+  /// adds the dereferenceable_or_null attribute to the list of
   /// attributes.
   void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
 
-  /// \brief Determine whether this call has the given attribute.
+  /// Determine whether this call has the given attribute.
   bool hasFnAttr(Attribute::AttrKind Kind) const {
     assert(Kind != Attribute::NoBuiltin &&
            "Use CallInst::isNoBuiltin() to check for Attribute::NoBuiltin");
     return hasFnAttrImpl(Kind);
   }
 
-  /// \brief Determine whether this call has the given attribute.
+  /// Determine whether this call has the given attribute.
   bool hasFnAttr(StringRef Kind) const {
     return hasFnAttrImpl(Kind);
   }
 
-  /// \brief Determine whether the call or the callee has the given attributes.
+  /// Determine whether the call or the callee has the given attributes.
   bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const;
 
-  /// \brief Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const;
+  /// Get the attribute of a given kind at a position.
+  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
+    return getAttributes().getAttribute(i, Kind);
+  }
 
-  /// \brief Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, StringRef Kind) const;
+  /// Get the attribute of a given kind at a position.
+  Attribute getAttribute(unsigned i, StringRef Kind) const {
+    return getAttributes().getAttribute(i, Kind);
+  }
 
-  /// \brief Return true if the data operand at index \p i has the attribute \p
+  /// Return true if the data operand at index \p i has the attribute \p
   /// A.
   ///
   /// Data operands include call arguments and values used in operand bundles,
@@ -1682,18 +1698,18 @@ public:
   ///     (\p i - 1) in the operand list.
   bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const;
 
-  /// \brief Extract the alignment for a call or parameter (0=unknown).
+  /// Extract the alignment for a call or parameter (0=unknown).
   unsigned getParamAlignment(unsigned i) const {
     return AttributeList.getParamAlignment(i);
   }
 
-  /// \brief Extract the number of dereferenceable bytes for a call or
+  /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
     return AttributeList.getDereferenceableBytes(i);
   }
 
-  /// \brief Extract the number of dereferenceable_or_null bytes for a call or
+  /// Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeList.getDereferenceableOrNullBytes(i);
@@ -1706,20 +1722,20 @@ public:
     return AttributeList.hasAttribute(n, Attribute::NoAlias);
   }
 
-  /// \brief Return true if the call should not be treated as a call to a
+  /// Return true if the call should not be treated as a call to a
   /// builtin.
   bool isNoBuiltin() const {
     return hasFnAttrImpl(Attribute::NoBuiltin) &&
       !hasFnAttrImpl(Attribute::Builtin);
   }
 
-  /// \brief Return true if the call should not be inlined.
+  /// Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
   void setIsNoInline() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline);
   }
 
-  /// \brief Return true if the call can return twice
+  /// Return true if the call can return twice
   bool canReturnTwice() const {
     return hasFnAttr(Attribute::ReturnsTwice);
   }
@@ -1727,7 +1743,7 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice);
   }
 
-  /// \brief Determine if the call does not access memory.
+  /// Determine if the call does not access memory.
   bool doesNotAccessMemory() const {
     return hasFnAttr(Attribute::ReadNone);
   }
@@ -1735,7 +1751,7 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
   }
 
-  /// \brief Determine if the call does not access or only reads memory.
+  /// Determine if the call does not access or only reads memory.
   bool onlyReadsMemory() const {
     return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
   }
@@ -1743,7 +1759,7 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
   }
 
-  /// \brief Determine if the call does not access or only writes memory.
+  /// Determine if the call does not access or only writes memory.
   bool doesNotReadMemory() const {
     return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
   }
@@ -1760,35 +1776,34 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ArgMemOnly);
   }
 
-  /// \brief Determine if the call cannot return.
+  /// Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
   void setDoesNotReturn() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
   }
 
-  /// \brief Determine if the call cannot unwind.
+  /// Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
   void setDoesNotThrow() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
   }
 
-  /// \brief Determine if the call cannot be duplicated.
+  /// Determine if the call cannot be duplicated.
   bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
   void setCannotDuplicate() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate);
   }
 
-  /// \brief Determine if the call is convergent
+  /// Determine if the call is convergent
   bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
   void setConvergent() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::Convergent);
   }
   void setNotConvergent() {
-    removeAttribute(AttributeSet::FunctionIndex,
-                    Attribute::get(getContext(), Attribute::Convergent));
+    removeAttribute(AttributeSet::FunctionIndex, Attribute::Convergent);
   }
 
-  /// \brief Determine if the call returns a structure through first
+  /// Determine if the call returns a structure through first
   /// pointer argument.
   bool hasStructRetAttr() const {
     if (getNumArgOperands() == 0)
@@ -1798,24 +1813,24 @@ public:
     return paramHasAttr(1, Attribute::StructRet);
   }
 
-  /// \brief Determine if any call argument is an aggregate passed by value.
+  /// Determine if any call argument is an aggregate passed by value.
   bool hasByValArgument() const {
     return AttributeList.hasAttrSomewhere(Attribute::ByVal);
   }
 
-  /// getCalledFunction - Return the function called, or null if this is an
+  /// Return the function called, or null if this is an
   /// indirect function invocation.
   ///
   Function *getCalledFunction() const {
     return dyn_cast<Function>(Op<-1>());
   }
 
-  /// getCalledValue - Get a pointer to the function that is invoked by this
+  /// Get a pointer to the function that is invoked by this
   /// instruction.
   const Value *getCalledValue() const { return Op<-1>(); }
         Value *getCalledValue()       { return Op<-1>(); }
 
-  /// setCalledFunction - Set the function called.
+  /// Set the function called.
   void setCalledFunction(Value* Fn) {
     setCalledFunction(
         cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
@@ -1828,7 +1843,7 @@ public:
     Op<-1>() = Fn;
   }
 
-  /// isInlineAsm - Check if this call is an inline asm statement.
+  /// Check if this call is an inline asm statement.
   bool isInlineAsm() const {
     return isa<InlineAsm>(Op<-1>());
   }
@@ -1842,17 +1857,17 @@ public:
   }
 
 private:
-  template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
-    if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+  template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
+    if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, Kind))
       return true;
 
     // Operand bundles override attributes on the called function, but don't
     // override attributes directly present on the call instruction.
-    if (isFnAttrDisallowedByOpBundle(A))
+    if (isFnAttrDisallowedByOpBundle(Kind))
       return false;
 
     if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+      return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Kind);
     return false;
   }
 
@@ -1899,16 +1914,9 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CallInst, Value)
 //                               SelectInst Class
 //===----------------------------------------------------------------------===//
 
-/// SelectInst - This class represents the LLVM 'select' instruction.
+/// This class represents the LLVM 'select' instruction.
 ///
 class SelectInst : public Instruction {
-  void init(Value *C, Value *S1, Value *S2) {
-    assert(!areInvalidOperands(C, S1, S2) && "Invalid operands for select");
-    Op<0>() = C;
-    Op<1>() = S1;
-    Op<2>() = S2;
-  }
-
   SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
              Instruction *InsertBefore)
     : Instruction(S1->getType(), Instruction::Select,
@@ -1916,6 +1924,7 @@ class SelectInst : public Instruction {
     init(C, S1, S2);
     setName(NameStr);
   }
+
   SelectInst(Value *C, Value *S1, Value *S2, const Twine &NameStr,
              BasicBlock *InsertAtEnd)
     : Instruction(S1->getType(), Instruction::Select,
@@ -1924,17 +1933,30 @@ class SelectInst : public Instruction {
     setName(NameStr);
   }
 
+  void init(Value *C, Value *S1, Value *S2) {
+    assert(!areInvalidOperands(C, S1, S2) && "Invalid operands for select");
+    Op<0>() = C;
+    Op<1>() = S1;
+    Op<2>() = S2;
+  }
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   SelectInst *cloneImpl() const;
 
 public:
   static SelectInst *Create(Value *C, Value *S1, Value *S2,
                             const Twine &NameStr = "",
-                            Instruction *InsertBefore = nullptr) {
-    return new(3) SelectInst(C, S1, S2, NameStr, InsertBefore);
+                            Instruction *InsertBefore = nullptr,
+                            Instruction *MDFrom = nullptr) {
+    SelectInst *Sel = new(3) SelectInst(C, S1, S2, NameStr, InsertBefore);
+    if (MDFrom)
+      Sel->copyMetadata(*MDFrom);
+    return Sel;
   }
+
   static SelectInst *Create(Value *C, Value *S1, Value *S2,
                             const Twine &NameStr,
                             BasicBlock *InsertAtEnd) {
@@ -1952,7 +1974,7 @@ public:
   void setTrueValue(Value *V) { Op<1>() = V; }
   void setFalseValue(Value *V) { Op<2>() = V; }
 
-  /// areInvalidOperands - Return a string if the specified operands are invalid
+  /// Return a string if the specified operands are invalid
   /// for a select operation, otherwise return null.
   static const char *areInvalidOperands(Value *Cond, Value *True, Value *False);
 
@@ -1982,13 +2004,14 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SelectInst, Value)
 //                                VAArgInst Class
 //===----------------------------------------------------------------------===//
 
-/// VAArgInst - This class represents the va_arg llvm instruction, which returns
+/// This class represents the va_arg llvm instruction, which returns
 /// an argument of the specified type given a va_list and increments that list
 ///
 class VAArgInst : public UnaryInstruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   VAArgInst *cloneImpl() const;
 
 public:
@@ -1997,6 +2020,7 @@ public:
     : UnaryInstruction(Ty, VAArg, List, InsertBefore) {
     setName(NameStr);
   }
+
   VAArgInst(Value *List, Type *Ty, const Twine &NameStr,
             BasicBlock *InsertAtEnd)
     : UnaryInstruction(Ty, VAArg, List, InsertAtEnd) {
@@ -2020,7 +2044,7 @@ public:
 //                                ExtractElementInst Class
 //===----------------------------------------------------------------------===//
 
-/// ExtractElementInst - This instruction extracts a single (scalar)
+/// This instruction extracts a single (scalar)
 /// element from a VectorType value
 ///
 class ExtractElementInst : public Instruction {
@@ -2032,6 +2056,7 @@ class ExtractElementInst : public Instruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   ExtractElementInst *cloneImpl() const;
 
 public:
@@ -2040,13 +2065,14 @@ public:
                                    Instruction *InsertBefore = nullptr) {
     return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertBefore);
   }
+
   static ExtractElementInst *Create(Value *Vec, Value *Idx,
                                    const Twine &NameStr,
                                    BasicBlock *InsertAtEnd) {
     return new(2) ExtractElementInst(Vec, Idx, NameStr, InsertAtEnd);
   }
 
-  /// isValidOperands - Return true if an extractelement instruction can be
+  /// Return true if an extractelement instruction can be
   /// formed with the specified operands.
   static bool isValidOperands(const Value *Vec, const Value *Idx);
 
@@ -2082,7 +2108,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ExtractElementInst, Value)
 //                                InsertElementInst Class
 //===----------------------------------------------------------------------===//
 
-/// InsertElementInst - This instruction inserts a single (scalar)
+/// This instruction inserts a single (scalar)
 /// element into a VectorType value
 ///
 class InsertElementInst : public Instruction {
@@ -2095,6 +2121,7 @@ class InsertElementInst : public Instruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   InsertElementInst *cloneImpl() const;
 
 public:
@@ -2103,18 +2130,19 @@ public:
                                    Instruction *InsertBefore = nullptr) {
     return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertBefore);
   }
+
   static InsertElementInst *Create(Value *Vec, Value *NewElt, Value *Idx,
                                    const Twine &NameStr,
                                    BasicBlock *InsertAtEnd) {
     return new(3) InsertElementInst(Vec, NewElt, Idx, NameStr, InsertAtEnd);
   }
 
-  /// isValidOperands - Return true if an insertelement instruction can be
+  /// Return true if an insertelement instruction can be
   /// formed with the specified operands.
   static bool isValidOperands(const Value *Vec, const Value *NewElt,
                               const Value *Idx);
 
-  /// getType - Overload to return most specific vector type.
+  /// Overload to return most specific vector type.
   ///
   VectorType *getType() const {
     return cast<VectorType>(Instruction::getType());
@@ -2143,32 +2171,34 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertElementInst, Value)
 //                           ShuffleVectorInst Class
 //===----------------------------------------------------------------------===//
 
-/// ShuffleVectorInst - This instruction constructs a fixed permutation of two
+/// This instruction constructs a fixed permutation of two
 /// input vectors.
 ///
 class ShuffleVectorInst : public Instruction {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   ShuffleVectorInst *cloneImpl() const;
 
 public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
   ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
                     const Twine &NameStr = "",
                     Instruction *InsertBefor = nullptr);
   ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
                     const Twine &NameStr, BasicBlock *InsertAtEnd);
 
-  /// isValidOperands - Return true if a shufflevector instruction can be
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+
+  /// Return true if a shufflevector instruction can be
   /// formed with the specified operands.
   static bool isValidOperands(const Value *V1, const Value *V2,
                               const Value *Mask);
 
-  /// getType - Overload to return most specific vector type.
+  /// Overload to return most specific vector type.
   ///
   VectorType *getType() const {
     return cast<VectorType>(Instruction::getType());
@@ -2181,19 +2211,22 @@ public:
     return cast<Constant>(getOperand(2));
   }
 
-  /// getMaskValue - Return the index from the shuffle mask for the specified
-  /// output result.  This is either -1 if the element is undef or a number less
-  /// than 2*numelements.
-  static int getMaskValue(Constant *Mask, unsigned i);
+  /// Return the shuffle mask value for the specified element of the mask.
+  /// Return -1 if the element is undef.
+  static int getMaskValue(Constant *Mask, unsigned Elt);
 
-  int getMaskValue(unsigned i) const {
-    return getMaskValue(getMask(), i);
+  /// Return the shuffle mask value of this instruction for the given element
+  /// index. Return -1 if the element is undef.
+  int getMaskValue(unsigned Elt) const {
+    return getMaskValue(getMask(), Elt);
   }
 
-  /// getShuffleMask - Return the full mask for this instruction, where each
-  /// element is the element number and undef's are returned as -1.
+  /// Convert the input shuffle mask operand to a vector of integers. Undefined
+  /// elements of the mask are returned as -1.
   static void getShuffleMask(Constant *Mask, SmallVectorImpl<int> &Result);
 
+  /// Return the mask for this instruction as a vector of integers. Undefined
+  /// elements of the mask are returned as -1.
   void getShuffleMask(SmallVectorImpl<int> &Result) const {
     return getShuffleMask(getMask(), Result);
   }
@@ -2224,15 +2257,13 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ShuffleVectorInst, Value)
 //                                ExtractValueInst Class
 //===----------------------------------------------------------------------===//
 
-/// ExtractValueInst - This instruction extracts a struct member or array
+/// This instruction extracts a struct member or array
 /// element value from an aggregate value.
 ///
 class ExtractValueInst : public UnaryInstruction {
   SmallVector<unsigned, 4> Indices;
 
   ExtractValueInst(const ExtractValueInst &EVI);
-  void init(ArrayRef<unsigned> Idxs, const Twine &NameStr);
-
   /// Constructors - Create a extractvalue instruction with a base aggregate
   /// value and a list of indices.  The first ctor can optionally insert before
   /// an existing instruction, the second appends the new instruction to the
@@ -2248,9 +2279,12 @@ class ExtractValueInst : public UnaryInstruction {
   // allocate space for exactly one operand
   void *operator new(size_t s) { return User::operator new(s, 1); }
 
+  void init(ArrayRef<unsigned> Idxs, const Twine &NameStr);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   ExtractValueInst *cloneImpl() const;
 
 public:
@@ -2261,6 +2295,7 @@ public:
     return new
       ExtractValueInst(Agg, Idxs, NameStr, InsertBefore);
   }
+
   static ExtractValueInst *Create(Value *Agg,
                                   ArrayRef<unsigned> Idxs,
                                   const Twine &NameStr,
@@ -2268,7 +2303,7 @@ public:
     return new ExtractValueInst(Agg, Idxs, NameStr, InsertAtEnd);
   }
 
-  /// getIndexedType - Returns the type of the element that would be extracted
+  /// Returns the type of the element that would be extracted
   /// with an extractvalue instruction with the specified parameters.
   ///
   /// Null is returned if the indices are invalid for the specified type.
@@ -2320,6 +2355,7 @@ ExtractValueInst::ExtractValueInst(Value *Agg,
                      ExtractValue, Agg, InsertBefore) {
   init(Idxs, NameStr);
 }
+
 ExtractValueInst::ExtractValueInst(Value *Agg,
                                    ArrayRef<unsigned> Idxs,
                                    const Twine &NameStr,
@@ -2333,16 +2369,13 @@ ExtractValueInst::ExtractValueInst(Value *Agg,
 //                                InsertValueInst Class
 //===----------------------------------------------------------------------===//
 
-/// InsertValueInst - This instruction inserts a struct field of array element
+/// This instruction inserts a struct field of array element
 /// value into an aggregate value.
 ///
 class InsertValueInst : public Instruction {
   SmallVector<unsigned, 4> Indices;
 
-  void *operator new(size_t, unsigned) = delete;
   InsertValueInst(const InsertValueInst &IVI);
-  void init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
-            const Twine &NameStr);
 
   /// Constructors - Create a insertvalue instruction with a base aggregate
   /// value, a value to insert, and a list of indices.  The first ctor can
@@ -2364,9 +2397,13 @@ class InsertValueInst : public Instruction {
   InsertValueInst(Value *Agg, Value *Val, unsigned Idx, const Twine &NameStr,
                   BasicBlock *InsertAtEnd);
 
+  void init(Value *Agg, Value *Val, ArrayRef<unsigned> Idxs,
+            const Twine &NameStr);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   InsertValueInst *cloneImpl() const;
 
 public:
@@ -2375,12 +2412,15 @@ public:
     return User::operator new(s, 2);
   }
 
+  void *operator new(size_t, unsigned) = delete;
+
   static InsertValueInst *Create(Value *Agg, Value *Val,
                                  ArrayRef<unsigned> Idxs,
                                  const Twine &NameStr = "",
                                  Instruction *InsertBefore = nullptr) {
     return new InsertValueInst(Agg, Val, Idxs, NameStr, InsertBefore);
   }
+
   static InsertValueInst *Create(Value *Agg, Value *Val,
                                  ArrayRef<unsigned> Idxs,
                                  const Twine &NameStr,
@@ -2454,6 +2494,7 @@ InsertValueInst::InsertValueInst(Value *Agg,
                 2, InsertBefore) {
   init(Agg, Val, Idxs, NameStr);
 }
+
 InsertValueInst::InsertValueInst(Value *Agg,
                                  Value *Val,
                                  ArrayRef<unsigned> Idxs,
@@ -2476,17 +2517,13 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InsertValueInst, Value)
 // scientist's overactive imagination.
 //
 class PHINode : public Instruction {
-  void anchor() override;
-
-  void *operator new(size_t, unsigned) = delete;
-  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// The number of operands actually allocated.  NumOperands is
   /// the number actually in use.
   unsigned ReservedSpace;
+
   PHINode(const PHINode &PN);
   // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s);
-  }
+
   explicit PHINode(Type *Ty, unsigned NumReservedValues,
                    const Twine &NameStr = "",
                    Instruction *InsertBefore = nullptr)
@@ -2504,7 +2541,18 @@ class PHINode : public Instruction {
     allocHungoffUses(ReservedSpace);
   }
 
+  void *operator new(size_t s) {
+    return User::operator new(s);
+  }
+
+  void anchor() override;
+
 protected:
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+
+  PHINode *cloneImpl() const;
+
   // allocHungoffUses - this is more complicated than the generic
   // User::allocHungoffUses, because we have to allocate Uses for the incoming
   // values and pointers to the incoming blocks, all in one allocation.
@@ -2512,11 +2560,9 @@ protected:
     User::allocHungoffUses(N, /* IsPhi */ true);
   }
 
-  // Note: Instruction needs to be a friend here to call cloneImpl.
-  friend class Instruction;
-  PHINode *cloneImpl() const;
-
 public:
+  void *operator new(size_t, unsigned) = delete;
+
   /// Constructors - NumReservedValues is a hint for the number of incoming
   /// edges that this phi node will have (use 0 if you really have no idea).
   static PHINode *Create(Type *Ty, unsigned NumReservedValues,
@@ -2524,6 +2570,7 @@ public:
                          Instruction *InsertBefore = nullptr) {
     return new PHINode(Ty, NumReservedValues, NameStr, InsertBefore);
   }
+
   static PHINode *Create(Type *Ty, unsigned NumReservedValues,
                          const Twine &NameStr, BasicBlock *InsertAtEnd) {
     return new PHINode(Ty, NumReservedValues, NameStr, InsertAtEnd);
@@ -2570,11 +2617,11 @@ public:
 
   const_op_range incoming_values() const { return operands(); }
 
-  /// getNumIncomingValues - Return the number of incoming edges
+  /// Return the number of incoming edges
   ///
   unsigned getNumIncomingValues() const { return getNumOperands(); }
 
-  /// getIncomingValue - Return incoming value number x
+  /// Return incoming value number x
   ///
   Value *getIncomingValue(unsigned i) const {
     return getOperand(i);
@@ -2592,13 +2639,13 @@ public:
     return i;
   }
 
-  /// getIncomingBlock - Return incoming basic block number @p i.
+  /// Return incoming basic block number @p i.
   ///
   BasicBlock *getIncomingBlock(unsigned i) const {
     return block_begin()[i];
   }
 
-  /// getIncomingBlock - Return incoming basic block corresponding
+  /// Return incoming basic block corresponding
   /// to an operand of the PHI.
   ///
   BasicBlock *getIncomingBlock(const Use &U) const {
@@ -2606,7 +2653,7 @@ public:
     return getIncomingBlock(unsigned(&U - op_begin()));
   }
 
-  /// getIncomingBlock - Return incoming basic block corresponding
+  /// Return incoming basic block corresponding
   /// to value use iterator.
   ///
   BasicBlock *getIncomingBlock(Value::const_user_iterator I) const {
@@ -2618,7 +2665,7 @@ public:
     block_begin()[i] = BB;
   }
 
-  /// addIncoming - Add an incoming value to the end of the PHI list
+  /// Add an incoming value to the end of the PHI list
   ///
   void addIncoming(Value *V, BasicBlock *BB) {
     if (getNumOperands() == ReservedSpace)
@@ -2629,7 +2676,7 @@ public:
     setIncomingBlock(getNumOperands() - 1, BB);
   }
 
-  /// removeIncomingValue - Remove an incoming value.  This is useful if a
+  /// Remove an incoming value.  This is useful if a
   /// predecessor basic block is deleted.  The value removed is returned.
   ///
   /// If the last incoming value for a PHI node is removed (and DeletePHIIfEmpty
@@ -2645,7 +2692,7 @@ public:
     return removeIncomingValue(Idx, DeletePHIIfEmpty);
   }
 
-  /// getBasicBlockIndex - Return the first index of the specified basic
+  /// Return the first index of the specified basic
   /// block in the value list for this PHI.  Returns -1 if no instance.
   ///
   int getBasicBlockIndex(const BasicBlock *BB) const {
@@ -2661,11 +2708,11 @@ public:
     return getIncomingValue(Idx);
   }
 
-  /// hasConstantValue - If the specified PHI node always merges together the
+  /// If the specified PHI node always merges together the
   /// same value, return the value, otherwise return null.
   Value *hasConstantValue() const;
 
-  /// hasConstantOrUndefValue - Whether the specified PHI node always merges
+  /// Whether the specified PHI node always merges
   /// together the same value, assuming undefs are equal to a unique
   /// non-undef value.
   bool hasConstantOrUndefValue() const;
@@ -2693,7 +2740,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------------
-/// LandingPadInst - The landingpad instruction holds all of the information
+/// The landingpad instruction holds all of the information
 /// necessary to generate correct exception handling. The landingpad instruction
 /// cannot be moved from the top of a landing pad block, which itself is
 /// accessible only from the 'unwind' edge of an invoke. This uses the
@@ -2701,34 +2748,38 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(PHINode, Value)
 /// cleanup.
 ///
 class LandingPadInst : public Instruction {
-  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// The number of operands actually allocated.  NumOperands is
   /// the number actually in use.
   unsigned ReservedSpace;
+
   LandingPadInst(const LandingPadInst &LP);
 
 public:
   enum ClauseType { Catch, Filter };
 
 private:
-  void *operator new(size_t, unsigned) = delete;
+  explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues,
+                          const Twine &NameStr, Instruction *InsertBefore);
+  explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues,
+                          const Twine &NameStr, BasicBlock *InsertAtEnd);
+
   // Allocate space for exactly zero operands.
   void *operator new(size_t s) {
     return User::operator new(s);
   }
+
   void growOperands(unsigned Size);
   void init(unsigned NumReservedValues, const Twine &NameStr);
 
-  explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues,
-                          const Twine &NameStr, Instruction *InsertBefore);
-  explicit LandingPadInst(Type *RetTy, unsigned NumReservedValues,
-                          const Twine &NameStr, BasicBlock *InsertAtEnd);
-
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   LandingPadInst *cloneImpl() const;
 
 public:
+  void *operator new(size_t, unsigned) = delete;
+
   /// Constructors - NumReservedClauses is a hint for the number of incoming
   /// clauses that this landingpad will have (use 0 if you really have no idea).
   static LandingPadInst *Create(Type *RetTy, unsigned NumReservedClauses,
@@ -2740,12 +2791,12 @@ public:
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
-  /// isCleanup - Return 'true' if this landingpad instruction is a
+  /// Return 'true' if this landingpad instruction is a
   /// cleanup. I.e., it should be run when unwinding even if its landing pad
   /// doesn't catch the exception.
   bool isCleanup() const { return getSubclassDataFromInstruction() & 1; }
 
-  /// setCleanup - Indicate that this landingpad instruction is a cleanup.
+  /// Indicate that this landingpad instruction is a cleanup.
   void setCleanup(bool V) {
     setInstructionSubclassData((getSubclassDataFromInstruction() & ~1) |
                                (V ? 1 : 0));
@@ -2760,20 +2811,20 @@ public:
     return cast<Constant>(getOperandList()[Idx]);
   }
 
-  /// isCatch - Return 'true' if the clause and index Idx is a catch clause.
+  /// Return 'true' if the clause and index Idx is a catch clause.
   bool isCatch(unsigned Idx) const {
     return !isa<ArrayType>(getOperandList()[Idx]->getType());
   }
 
-  /// isFilter - Return 'true' if the clause and index Idx is a filter clause.
+  /// Return 'true' if the clause and index Idx is a filter clause.
   bool isFilter(unsigned Idx) const {
     return isa<ArrayType>(getOperandList()[Idx]->getType());
   }
 
-  /// getNumClauses - Get the number of clauses for this landing pad.
+  /// Get the number of clauses for this landing pad.
   unsigned getNumClauses() const { return getNumOperands(); }
 
-  /// reserveClauses - Grow the size of the operand list to accommodate the new
+  /// Grow the size of the operand list to accommodate the new
   /// number of clauses.
   void reserveClauses(unsigned Size) { growOperands(Size); }
 
@@ -2797,7 +2848,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(LandingPadInst, Value)
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------------
-/// ReturnInst - Return a value (possibly void), from a function.  Execution
+/// Return a value (possibly void), from a function.  Execution
 /// does not continue in this function any longer.
 ///
 class ReturnInst : public TerminatorInst {
@@ -2823,21 +2874,25 @@ private:
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   ReturnInst *cloneImpl() const;
 
 public:
+  ~ReturnInst() override;
+
   static ReturnInst* Create(LLVMContext &C, Value *retVal = nullptr,
                             Instruction *InsertBefore = nullptr) {
     return new(!!retVal) ReturnInst(C, retVal, InsertBefore);
   }
+
   static ReturnInst* Create(LLVMContext &C, Value *retVal,
                             BasicBlock *InsertAtEnd) {
     return new(!!retVal) ReturnInst(C, retVal, InsertAtEnd);
   }
+
   static ReturnInst* Create(LLVMContext &C, BasicBlock *InsertAtEnd) {
     return new(0) ReturnInst(C, InsertAtEnd);
   }
-  ~ReturnInst() override;
 
   /// Provide fast operand accessors
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
@@ -2874,7 +2929,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ReturnInst, Value)
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------------
-/// BranchInst - Conditional or Unconditional Branch instruction.
+/// Conditional or Unconditional Branch instruction.
 ///
 class BranchInst : public TerminatorInst {
   /// Ops list - Branches are strange.  The operands are ordered:
@@ -2882,7 +2937,6 @@ class BranchInst : public TerminatorInst {
   /// they don't have to check for cond/uncond branchness. These are mostly
   /// accessed relative from op_end().
   BranchInst(const BranchInst &BI);
-  void AssertOK();
   // BranchInst constructors (where {B, T, F} are blocks, and C is a condition):
   // BranchInst(BB *B)                           - 'br B'
   // BranchInst(BB* T, BB *F, Value *C)          - 'br C, T, F'
@@ -2897,9 +2951,12 @@ class BranchInst : public TerminatorInst {
   BranchInst(BasicBlock *IfTrue, BasicBlock *IfFalse, Value *Cond,
              BasicBlock *InsertAtEnd);
 
+  void AssertOK();
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   BranchInst *cloneImpl() const;
 
 public:
@@ -2907,13 +2964,16 @@ public:
                             Instruction *InsertBefore = nullptr) {
     return new(1) BranchInst(IfTrue, InsertBefore);
   }
+
   static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
                             Value *Cond, Instruction *InsertBefore = nullptr) {
     return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertBefore);
   }
+
   static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *InsertAtEnd) {
     return new(1) BranchInst(IfTrue, InsertAtEnd);
   }
+
   static BranchInst *Create(BasicBlock *IfTrue, BasicBlock *IfFalse,
                             Value *Cond, BasicBlock *InsertAtEnd) {
     return new(3) BranchInst(IfTrue, IfFalse, Cond, InsertAtEnd);
@@ -2947,7 +3007,7 @@ public:
     *(&Op<-1>() - idx) = NewSucc;
   }
 
-  /// \brief Swap the successors of this branch instruction.
+  /// Swap the successors of this branch instruction.
   ///
   /// Swaps the successors of the branch instruction. This also swaps any
   /// branch weight metadata associated with the instruction so that it
@@ -2982,19 +3042,14 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(BranchInst, Value)
 /// Multiway switch
 ///
 class SwitchInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) = delete;
   unsigned ReservedSpace;
+
   // Operand[0]    = Value to switch on
   // Operand[1]    = Default basic block destination
   // Operand[2n  ] = Value to match
   // Operand[2n+1] = BasicBlock to go to on match
   SwitchInst(const SwitchInst &SI);
-  void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
-  void growOperands();
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s);
-  }
+
   /// Create a new switch instruction, specifying a value to switch on and a
   /// default destination. The number of additional cases can be specified here
   /// to make memory allocation more efficient. This constructor can also
@@ -3009,12 +3064,23 @@ class SwitchInst : public TerminatorInst {
   SwitchInst(Value *Value, BasicBlock *Default, unsigned NumCases,
              BasicBlock *InsertAtEnd);
 
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s);
+  }
+
+  void init(Value *Value, BasicBlock *Default, unsigned NumReserved);
+  void growOperands();
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   SwitchInst *cloneImpl() const;
 
 public:
+  void *operator new(size_t, unsigned) = delete;
+
   // -2
   static const unsigned DefaultPseudoIndex = static_cast<unsigned>(~0L-1);
 
@@ -3111,7 +3177,6 @@ public:
     ConstCaseIt;
 
   class CaseIt : public CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> {
-
     typedef CaseIteratorT<SwitchInst, ConstantInt, BasicBlock> ParentTy;
 
   public:
@@ -3135,6 +3200,7 @@ public:
                             Instruction *InsertBefore = nullptr) {
     return new SwitchInst(Value, Default, NumCases, InsertBefore);
   }
+
   static SwitchInst *Create(Value *Value, BasicBlock *Default,
                             unsigned NumCases, BasicBlock *InsertAtEnd) {
     return new SwitchInst(Value, Default, NumCases, InsertAtEnd);
@@ -3166,6 +3232,7 @@ public:
   CaseIt case_begin() {
     return CaseIt(this, 0);
   }
+
   /// Returns a read-only iterator that points to the first case in the
   /// SwitchInst.
   ConstCaseIt case_begin() const {
@@ -3177,6 +3244,7 @@ public:
   CaseIt case_end() {
     return CaseIt(this, getNumCases());
   }
+
   /// Returns a read-only iterator that points one past the last in the
   /// SwitchInst.
   ConstCaseIt case_end() const {
@@ -3286,44 +3354,49 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(SwitchInst, Value)
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------------
-/// IndirectBrInst - Indirect Branch Instruction.
+/// Indirect Branch Instruction.
 ///
 class IndirectBrInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) = delete;
   unsigned ReservedSpace;
-  // Operand[0]    = Value to switch on
-  // Operand[1]    = Default basic block destination
-  // Operand[2n  ] = Value to match
-  // Operand[2n+1] = BasicBlock to go to on match
+
+  // Operand[0]   = Address to jump to
+  // Operand[n+1] = n-th destination
   IndirectBrInst(const IndirectBrInst &IBI);
-  void init(Value *Address, unsigned NumDests);
-  void growOperands();
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) {
-    return User::operator new(s);
-  }
-  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+
+  /// Create a new indirectbr instruction, specifying an
   /// Address to jump to.  The number of expected destinations can be specified
   /// here to make memory allocation more efficient.  This constructor can also
   /// autoinsert before another instruction.
   IndirectBrInst(Value *Address, unsigned NumDests, Instruction *InsertBefore);
 
-  /// IndirectBrInst ctor - Create a new indirectbr instruction, specifying an
+  /// Create a new indirectbr instruction, specifying an
   /// Address to jump to.  The number of expected destinations can be specified
   /// here to make memory allocation more efficient.  This constructor also
   /// autoinserts at the end of the specified BasicBlock.
   IndirectBrInst(Value *Address, unsigned NumDests, BasicBlock *InsertAtEnd);
 
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) {
+    return User::operator new(s);
+  }
+
+  void init(Value *Address, unsigned NumDests);
+  void growOperands();
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   IndirectBrInst *cloneImpl() const;
 
 public:
+  void *operator new(size_t, unsigned) = delete;
+
   static IndirectBrInst *Create(Value *Address, unsigned NumDests,
                                 Instruction *InsertBefore = nullptr) {
     return new IndirectBrInst(Address, NumDests, InsertBefore);
   }
+
   static IndirectBrInst *Create(Value *Address, unsigned NumDests,
                                 BasicBlock *InsertAtEnd) {
     return new IndirectBrInst(Address, NumDests, InsertAtEnd);
@@ -3337,19 +3410,19 @@ public:
   const Value *getAddress() const { return getOperand(0); }
   void setAddress(Value *V) { setOperand(0, V); }
 
-  /// getNumDestinations - return the number of possible destinations in this
+  /// return the number of possible destinations in this
   /// indirectbr instruction.
   unsigned getNumDestinations() const { return getNumOperands()-1; }
 
-  /// getDestination - Return the specified destination.
+  /// Return the specified destination.
   BasicBlock *getDestination(unsigned i) { return getSuccessor(i); }
   const BasicBlock *getDestination(unsigned i) const { return getSuccessor(i); }
 
-  /// addDestination - Add a destination.
+  /// Add a destination.
   ///
   void addDestination(BasicBlock *Dest);
 
-  /// removeDestination - This method removes the specified successor from the
+  /// This method removes the specified successor from the
   /// indirectbr instruction.
   void removeDestination(unsigned i);
 
@@ -3385,28 +3458,21 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(IndirectBrInst, Value)
 //                               InvokeInst Class
 //===----------------------------------------------------------------------===//
 
-/// InvokeInst - Invoke instruction.  The SubclassData field is used to hold the
+/// Invoke instruction.  The SubclassData field is used to hold the
 /// calling convention of the call.
 ///
 class InvokeInst : public TerminatorInst,
                    public OperandBundleUser<InvokeInst, User::op_iterator> {
+  friend class OperandBundleUser<InvokeInst, User::op_iterator>;
+
   AttributeSet AttributeList;
   FunctionType *FTy;
+
   InvokeInst(const InvokeInst &BI);
-  void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
-            ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
-            const Twine &NameStr) {
-    init(cast<FunctionType>(
-             cast<PointerType>(Func->getType())->getElementType()),
-         Func, IfNormal, IfException, Args, Bundles, NameStr);
-  }
-  void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal,
-            BasicBlock *IfException, ArrayRef<Value *> Args,
-            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
 
   /// Construct an InvokeInst given a range of arguments.
   ///
-  /// \brief Construct an InvokeInst from a range of arguments
+  /// Construct an InvokeInst from a range of arguments
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
                     ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
                     unsigned Values, const Twine &NameStr,
@@ -3422,18 +3488,30 @@ class InvokeInst : public TerminatorInst,
                     const Twine &NameStr, Instruction *InsertBefore);
   /// Construct an InvokeInst given a range of arguments.
   ///
-  /// \brief Construct an InvokeInst from a range of arguments
+  /// Construct an InvokeInst from a range of arguments
   inline InvokeInst(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
                     ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
                     unsigned Values, const Twine &NameStr,
                     BasicBlock *InsertAtEnd);
 
-  friend class OperandBundleUser<InvokeInst, User::op_iterator>;
   bool hasDescriptor() const { return HasDescriptor; }
 
+  void init(Value *Func, BasicBlock *IfNormal, BasicBlock *IfException,
+            ArrayRef<Value *> Args, ArrayRef<OperandBundleDef> Bundles,
+            const Twine &NameStr) {
+    init(cast<FunctionType>(
+             cast<PointerType>(Func->getType())->getElementType()),
+         Func, IfNormal, IfException, Args, Bundles, NameStr);
+  }
+
+  void init(FunctionType *FTy, Value *Func, BasicBlock *IfNormal,
+            BasicBlock *IfException, ArrayRef<Value *> Args,
+            ArrayRef<OperandBundleDef> Bundles, const Twine &NameStr);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   InvokeInst *cloneImpl() const;
 
 public:
@@ -3446,6 +3524,7 @@ public:
                   Func, IfNormal, IfException, Args, None, NameStr,
                   InsertBefore);
   }
+
   static InvokeInst *Create(Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             ArrayRef<OperandBundleDef> Bundles = None,
@@ -3456,6 +3535,7 @@ public:
                   Func, IfNormal, IfException, Args, Bundles, NameStr,
                   InsertBefore);
   }
+
   static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             const Twine &NameStr,
@@ -3464,6 +3544,7 @@ public:
     return new (Values) InvokeInst(Ty, Func, IfNormal, IfException, Args, None,
                                    Values, NameStr, InsertBefore);
   }
+
   static InvokeInst *Create(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                             BasicBlock *IfException, ArrayRef<Value *> Args,
                             ArrayRef<OperandBundleDef> Bundles = None,
@@ -3476,6 +3557,7 @@ public:
         InvokeInst(Ty, Func, IfNormal, IfException, Args, Bundles, Values,
                    NameStr, InsertBefore);
   }
+
   static InvokeInst *Create(Value *Func,
                             BasicBlock *IfNormal, BasicBlock *IfException,
                             ArrayRef<Value *> Args, const Twine &NameStr,
@@ -3496,7 +3578,7 @@ public:
                    InsertAtEnd);
   }
 
-  /// \brief Create a clone of \p II with a different set of operand bundles and
+  /// Create a clone of \p II with a different set of operand bundles and
   /// insert it before \p InsertPt.
   ///
   /// The returned invoke instruction is identical to \p II in every way except
@@ -3515,7 +3597,7 @@ public:
     this->FTy = FTy;
   }
 
-  /// getNumArgOperands - Return the number of invoke arguments.
+  /// Return the number of invoke arguments.
   ///
   unsigned getNumArgOperands() const {
     return getNumOperands() - getNumTotalBundleOperands() - 3;
@@ -3532,35 +3614,35 @@ public:
     setOperand(i, v);
   }
 
-  /// \brief Return the iterator pointing to the beginning of the argument list.
+  /// Return the iterator pointing to the beginning of the argument list.
   op_iterator arg_begin() { return op_begin(); }
 
-  /// \brief Return the iterator pointing to the end of the argument list.
+  /// Return the iterator pointing to the end of the argument list.
   op_iterator arg_end() {
     // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
     return op_end() - getNumTotalBundleOperands() - 3;
-  };
+  }
 
-  /// \brief Iteration adapter for range-for loops.
+  /// Iteration adapter for range-for loops.
   iterator_range<op_iterator> arg_operands() {
     return make_range(arg_begin(), arg_end());
   }
 
-  /// \brief Return the iterator pointing to the beginning of the argument list.
+  /// Return the iterator pointing to the beginning of the argument list.
   const_op_iterator arg_begin() const { return op_begin(); }
 
-  /// \brief Return the iterator pointing to the end of the argument list.
+  /// Return the iterator pointing to the end of the argument list.
   const_op_iterator arg_end() const {
     // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee
     return op_end() - getNumTotalBundleOperands() - 3;
-  };
+  }
 
-  /// \brief Iteration adapter for range-for loops.
+  /// Iteration adapter for range-for loops.
   iterator_range<const_op_iterator> arg_operands() const {
     return make_range(arg_begin(), arg_end());
   }
 
-  /// \brief Wrappers for getting the \c Use of a invoke argument.
+  /// Wrappers for getting the \c Use of a invoke argument.
   const Use &getArgOperandUse(unsigned i) const {
     assert(i < getNumArgOperands() && "Out of bounds!");
     return getOperandUse(i);
@@ -3585,58 +3667,59 @@ public:
     setInstructionSubclassData(ID);
   }
 
-  /// getAttributes - Return the parameter attributes for this invoke.
+  /// Return the parameter attributes for this invoke.
   ///
-  const AttributeSet &getAttributes() const { return AttributeList; }
+  AttributeSet getAttributes() const { return AttributeList; }
 
-  /// setAttributes - Set the parameter attributes for this invoke.
+  /// Set the parameter attributes for this invoke.
   ///
-  void setAttributes(const AttributeSet &Attrs) { AttributeList = Attrs; }
+  void setAttributes(AttributeSet Attrs) { AttributeList = Attrs; }
 
-  /// addAttribute - adds the attribute to the list of attributes.
+  /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute::AttrKind Kind);
 
-  /// addAttribute - adds the attribute to the list of attributes.
+  /// adds the attribute to the list of attributes.
   void addAttribute(unsigned i, Attribute Attr);
 
-  /// removeAttribute - removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, Attribute::AttrKind Kind);
 
-  /// removeAttribute - removes the attribute from the list of attributes.
+  /// removes the attribute from the list of attributes.
   void removeAttribute(unsigned i, StringRef Kind);
 
-  /// removeAttribute - removes the attribute from the list of attributes.
-  void removeAttribute(unsigned i, Attribute Attr);
-
-  /// \brief adds the dereferenceable attribute to the list of attributes.
+  /// adds the dereferenceable attribute to the list of attributes.
   void addDereferenceableAttr(unsigned i, uint64_t Bytes);
 
-  /// \brief adds the dereferenceable_or_null attribute to the list of
+  /// adds the dereferenceable_or_null attribute to the list of
   /// attributes.
   void addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes);
 
-  /// \brief Determine whether this call has the given attribute.
+  /// Determine whether this call has the given attribute.
   bool hasFnAttr(Attribute::AttrKind Kind) const {
     assert(Kind != Attribute::NoBuiltin &&
            "Use CallInst::isNoBuiltin() to check for Attribute::NoBuiltin");
     return hasFnAttrImpl(Kind);
   }
 
-  /// \brief Determine whether this call has the given attribute.
+  /// Determine whether this call has the given attribute.
   bool hasFnAttr(StringRef Kind) const {
     return hasFnAttrImpl(Kind);
   }
 
-  /// \brief Determine whether the call or the callee has the given attributes.
+  /// Determine whether the call or the callee has the given attributes.
   bool paramHasAttr(unsigned i, Attribute::AttrKind Kind) const;
 
-  /// \brief Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const;
+  /// Get the attribute of a given kind at a position.
+  Attribute getAttribute(unsigned i, Attribute::AttrKind Kind) const {
+    return getAttributes().getAttribute(i, Kind);
+  }
 
-  /// \brief Get the attribute of a given kind at a position.
-  Attribute getAttribute(unsigned i, StringRef Kind) const;
+  /// Get the attribute of a given kind at a position.
+  Attribute getAttribute(unsigned i, StringRef Kind) const {
+    return getAttributes().getAttribute(i, Kind);
+  }
 
-  /// \brief Return true if the data operand at index \p i has the attribute \p
+  /// Return true if the data operand at index \p i has the attribute \p
   /// A.
   ///
   /// Data operands include invoke arguments and values used in operand bundles,
@@ -3652,18 +3735,18 @@ public:
   ///     (\p i - 1) in the operand list.
   bool dataOperandHasImpliedAttr(unsigned i, Attribute::AttrKind Kind) const;
 
-  /// \brief Extract the alignment for a call or parameter (0=unknown).
+  /// Extract the alignment for a call or parameter (0=unknown).
   unsigned getParamAlignment(unsigned i) const {
     return AttributeList.getParamAlignment(i);
   }
 
-  /// \brief Extract the number of dereferenceable bytes for a call or
+  /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {
     return AttributeList.getDereferenceableBytes(i);
   }
 
-  /// \brief Extract the number of dereferenceable_or_null bytes for a call or
+  /// Extract the number of dereferenceable_or_null bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableOrNullBytes(unsigned i) const {
     return AttributeList.getDereferenceableOrNullBytes(i);
@@ -3676,7 +3759,7 @@ public:
     return AttributeList.hasAttribute(n, Attribute::NoAlias);
   }
 
-  /// \brief Return true if the call should not be treated as a call to a
+  /// Return true if the call should not be treated as a call to a
   /// builtin.
   bool isNoBuiltin() const {
     // We assert in hasFnAttr if one passes in Attribute::NoBuiltin, so we have
@@ -3685,13 +3768,13 @@ public:
       !hasFnAttrImpl(Attribute::Builtin);
   }
 
-  /// \brief Return true if the call should not be inlined.
+  /// Return true if the call should not be inlined.
   bool isNoInline() const { return hasFnAttr(Attribute::NoInline); }
   void setIsNoInline() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoInline);
   }
 
-  /// \brief Determine if the call does not access memory.
+  /// Determine if the call does not access memory.
   bool doesNotAccessMemory() const {
     return hasFnAttr(Attribute::ReadNone);
   }
@@ -3699,7 +3782,7 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ReadNone);
   }
 
-  /// \brief Determine if the call does not access or only reads memory.
+  /// Determine if the call does not access or only reads memory.
   bool onlyReadsMemory() const {
     return doesNotAccessMemory() || hasFnAttr(Attribute::ReadOnly);
   }
@@ -3707,7 +3790,7 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ReadOnly);
   }
 
-  /// \brief Determine if the call does not access or only writes memory.
+  /// Determine if the call does not access or only writes memory.
   bool doesNotReadMemory() const {
     return doesNotAccessMemory() || hasFnAttr(Attribute::WriteOnly);
   }
@@ -3724,35 +3807,34 @@ public:
     addAttribute(AttributeSet::FunctionIndex, Attribute::ArgMemOnly);
   }
 
-  /// \brief Determine if the call cannot return.
+  /// Determine if the call cannot return.
   bool doesNotReturn() const { return hasFnAttr(Attribute::NoReturn); }
   void setDoesNotReturn() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
   }
 
-  /// \brief Determine if the call cannot unwind.
+  /// Determine if the call cannot unwind.
   bool doesNotThrow() const { return hasFnAttr(Attribute::NoUnwind); }
   void setDoesNotThrow() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
   }
 
-  /// \brief Determine if the invoke cannot be duplicated.
+  /// Determine if the invoke cannot be duplicated.
   bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
   void setCannotDuplicate() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::NoDuplicate);
   }
 
-  /// \brief Determine if the invoke is convergent
+  /// Determine if the invoke is convergent
   bool isConvergent() const { return hasFnAttr(Attribute::Convergent); }
   void setConvergent() {
     addAttribute(AttributeSet::FunctionIndex, Attribute::Convergent);
   }
   void setNotConvergent() {
-    removeAttribute(AttributeSet::FunctionIndex,
-                    Attribute::get(getContext(), Attribute::Convergent));
+    removeAttribute(AttributeSet::FunctionIndex, Attribute::Convergent);
   }
 
-  /// \brief Determine if the call returns a structure through first
+  /// Determine if the call returns a structure through first
   /// pointer argument.
   bool hasStructRetAttr() const {
     if (getNumArgOperands() == 0)
@@ -3762,24 +3844,24 @@ public:
     return paramHasAttr(1, Attribute::StructRet);
   }
 
-  /// \brief Determine if any call argument is an aggregate passed by value.
+  /// Determine if any call argument is an aggregate passed by value.
   bool hasByValArgument() const {
     return AttributeList.hasAttrSomewhere(Attribute::ByVal);
   }
 
-  /// getCalledFunction - Return the function called, or null if this is an
+  /// Return the function called, or null if this is an
   /// indirect function invocation.
   ///
   Function *getCalledFunction() const {
     return dyn_cast<Function>(Op<-3>());
   }
 
-  /// getCalledValue - Get a pointer to the function that is invoked by this
+  /// Get a pointer to the function that is invoked by this
   /// instruction
   const Value *getCalledValue() const { return Op<-3>(); }
         Value *getCalledValue()       { return Op<-3>(); }
 
-  /// setCalledFunction - Set the function called.
+  /// Set the function called.
   void setCalledFunction(Value* Fn) {
     setCalledFunction(
         cast<FunctionType>(cast<PointerType>(Fn->getType())->getElementType()),
@@ -3806,7 +3888,7 @@ public:
     Op<-1>() = reinterpret_cast<Value*>(B);
   }
 
-  /// getLandingPadInst - Get the landingpad instruction from the landing pad
+  /// Get the landingpad instruction from the landing pad
   /// block (the unwind destination).
   LandingPadInst *getLandingPadInst() const;
 
@@ -3835,17 +3917,17 @@ private:
   unsigned getNumSuccessorsV() const override;
   void setSuccessorV(unsigned idx, BasicBlock *B) override;
 
-  template <typename AttrKind> bool hasFnAttrImpl(AttrKind A) const {
-    if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A))
+  template <typename AttrKind> bool hasFnAttrImpl(AttrKind Kind) const {
+    if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, Kind))
       return true;
 
     // Operand bundles override attributes on the called function, but don't
     // override attributes directly present on the invoke instruction.
-    if (isFnAttrDisallowedByOpBundle(A))
+    if (isFnAttrDisallowedByOpBundle(Kind))
       return false;
 
     if (const Function *F = getCalledFunction())
-      return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, A);
+      return F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Kind);
     return false;
   }
 
@@ -3869,6 +3951,7 @@ InvokeInst::InvokeInst(FunctionType *Ty, Value *Func, BasicBlock *IfNormal,
                      InsertBefore) {
   init(Ty, Func, IfNormal, IfException, Args, Bundles, NameStr);
 }
+
 InvokeInst::InvokeInst(Value *Func, BasicBlock *IfNormal,
                        BasicBlock *IfException, ArrayRef<Value *> Args,
                        ArrayRef<OperandBundleDef> Bundles, unsigned Values,
@@ -3888,7 +3971,7 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(InvokeInst, Value)
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------------
-/// ResumeInst - Resume the propagation of an exception.
+/// Resume the propagation of an exception.
 ///
 class ResumeInst : public TerminatorInst {
   ResumeInst(const ResumeInst &RI);
@@ -3899,12 +3982,14 @@ class ResumeInst : public TerminatorInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   ResumeInst *cloneImpl() const;
 
 public:
   static ResumeInst *Create(Value *Exn, Instruction *InsertBefore = nullptr) {
     return new(1) ResumeInst(Exn, InsertBefore);
   }
+
   static ResumeInst *Create(Value *Exn, BasicBlock *InsertAtEnd) {
     return new(1) ResumeInst(Exn, InsertAtEnd);
   }
@@ -3942,19 +4027,16 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ResumeInst, Value)
 //                         CatchSwitchInst Class
 //===----------------------------------------------------------------------===//
 class CatchSwitchInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) = delete;
-  /// ReservedSpace - The number of operands actually allocated.  NumOperands is
+  /// The number of operands actually allocated.  NumOperands is
   /// the number actually in use.
   unsigned ReservedSpace;
+
   // Operand[0] = Outer scope
   // Operand[1] = Unwind block destination
   // Operand[n] = BasicBlock to go to on match
   CatchSwitchInst(const CatchSwitchInst &CSI);
-  void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved);
-  void growOperands(unsigned Size);
-  // allocate space for exactly zero operands
-  void *operator new(size_t s) { return User::operator new(s); }
-  /// CatchSwitchInst ctor - Create a new switch instruction, specifying a
+
+  /// Create a new switch instruction, specifying a
   /// default destination.  The number of additional handlers can be specified
   /// here to make memory allocation more efficient.
   /// This constructor can also autoinsert before another instruction.
@@ -3962,7 +4044,7 @@ class CatchSwitchInst : public TerminatorInst {
                   unsigned NumHandlers, const Twine &NameStr,
                   Instruction *InsertBefore);
 
-  /// CatchSwitchInst ctor - Create a new switch instruction, specifying a
+  /// Create a new switch instruction, specifying a
   /// default destination.  The number of additional handlers can be specified
   /// here to make memory allocation more efficient.
   /// This constructor also autoinserts at the end of the specified BasicBlock.
@@ -3970,12 +4052,21 @@ class CatchSwitchInst : public TerminatorInst {
                   unsigned NumHandlers, const Twine &NameStr,
                   BasicBlock *InsertAtEnd);
 
+  // allocate space for exactly zero operands
+  void *operator new(size_t s) { return User::operator new(s); }
+
+  void init(Value *ParentPad, BasicBlock *UnwindDest, unsigned NumReserved);
+  void growOperands(unsigned Size);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   CatchSwitchInst *cloneImpl() const;
 
 public:
+  void *operator new(size_t, unsigned) = delete;
+
   static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest,
                                  unsigned NumHandlers,
                                  const Twine &NameStr = "",
@@ -3983,6 +4074,7 @@ public:
     return new CatchSwitchInst(ParentPad, UnwindDest, NumHandlers, NameStr,
                                InsertBefore);
   }
+
   static CatchSwitchInst *Create(Value *ParentPad, BasicBlock *UnwindDest,
                                  unsigned NumHandlers, const Twine &NameStr,
                                  BasicBlock *InsertAtEnd) {
@@ -4011,7 +4103,7 @@ public:
     setOperand(1, UnwindDest);
   }
 
-  /// getNumHandlers - return the number of 'handlers' in this catchswitch
+  /// return the number of 'handlers' in this catchswitch
   /// instruction, except the default handler
   unsigned getNumHandlers() const {
     if (hasUnwindDest())
@@ -4029,8 +4121,6 @@ public:
   typedef std::pointer_to_unary_function<Value *, BasicBlock *> DerefFnTy;
   typedef mapped_iterator<op_iterator, DerefFnTy> handler_iterator;
   typedef iterator_range<handler_iterator> handler_range;
-
-
   typedef std::pointer_to_unary_function<const Value *, const BasicBlock *>
       ConstDerefFnTy;
   typedef mapped_iterator<const_op_iterator, ConstDerefFnTy> const_handler_iterator;
@@ -4043,6 +4133,7 @@ public:
       ++It;
     return handler_iterator(It, DerefFnTy(handler_helper));
   }
+
   /// Returns an iterator that points to the first handler in the
   /// CatchSwitchInst.
   const_handler_iterator handler_begin() const {
@@ -4057,23 +4148,24 @@ public:
   handler_iterator handler_end() {
     return handler_iterator(op_end(), DerefFnTy(handler_helper));
   }
+
   /// Returns an iterator that points one past the last handler in the
   /// CatchSwitchInst.
   const_handler_iterator handler_end() const {
     return const_handler_iterator(op_end(), ConstDerefFnTy(handler_helper));
   }
 
-  /// handlers - iteration adapter for range-for loops.
+  /// iteration adapter for range-for loops.
   handler_range handlers() {
     return make_range(handler_begin(), handler_end());
   }
 
-  /// handlers - iteration adapter for range-for loops.
+  /// iteration adapter for range-for loops.
   const_handler_range handlers() const {
     return make_range(handler_begin(), handler_end());
   }
 
-  /// addHandler - Add an entry to the switch instruction...
+  /// Add an entry to the switch instruction...
   /// Note:
   /// This action invalidates handler_end(). Old handler_end() iterator will
   /// point to the added handler.
@@ -4136,6 +4228,7 @@ public:
     return new (Values)
         CleanupPadInst(ParentPad, Args, Values, NameStr, InsertBefore);
   }
+
   static CleanupPadInst *Create(Value *ParentPad, ArrayRef<Value *> Args,
                                 const Twine &NameStr, BasicBlock *InsertAtEnd) {
     unsigned Values = 1 + Args.size();
@@ -4143,7 +4236,7 @@ public:
         CleanupPadInst(ParentPad, Args, Values, NameStr, InsertAtEnd);
   }
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::CleanupPad;
   }
@@ -4176,6 +4269,7 @@ public:
     return new (Values)
         CatchPadInst(CatchSwitch, Args, Values, NameStr, InsertBefore);
   }
+
   static CatchPadInst *Create(Value *CatchSwitch, ArrayRef<Value *> Args,
                               const Twine &NameStr, BasicBlock *InsertAtEnd) {
     unsigned Values = 1 + Args.size();
@@ -4192,7 +4286,7 @@ public:
     Op<-1>() = CatchSwitch;
   }
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Instruction::CatchPad;
   }
@@ -4207,14 +4301,15 @@ public:
 
 class CatchReturnInst : public TerminatorInst {
   CatchReturnInst(const CatchReturnInst &RI);
-
-  void init(Value *CatchPad, BasicBlock *BB);
   CatchReturnInst(Value *CatchPad, BasicBlock *BB, Instruction *InsertBefore);
   CatchReturnInst(Value *CatchPad, BasicBlock *BB, BasicBlock *InsertAtEnd);
 
+  void init(Value *CatchPad, BasicBlock *BB);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   CatchReturnInst *cloneImpl() const;
 
 public:
@@ -4224,6 +4319,7 @@ public:
     assert(BB);
     return new (2) CatchReturnInst(CatchPad, BB, InsertBefore);
   }
+
   static CatchReturnInst *Create(Value *CatchPad, BasicBlock *BB,
                                  BasicBlock *InsertAtEnd) {
     assert(CatchPad);
@@ -4281,16 +4377,17 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CatchReturnInst, Value)
 class CleanupReturnInst : public TerminatorInst {
 private:
   CleanupReturnInst(const CleanupReturnInst &RI);
-
-  void init(Value *CleanupPad, BasicBlock *UnwindBB);
   CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
                     Instruction *InsertBefore = nullptr);
   CleanupReturnInst(Value *CleanupPad, BasicBlock *UnwindBB, unsigned Values,
                     BasicBlock *InsertAtEnd);
 
+  void init(Value *CleanupPad, BasicBlock *UnwindBB);
+
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   CleanupReturnInst *cloneImpl() const;
 
 public:
@@ -4304,6 +4401,7 @@ public:
     return new (Values)
         CleanupReturnInst(CleanupPad, UnwindBB, Values, InsertBefore);
   }
+
   static CleanupReturnInst *Create(Value *CleanupPad, BasicBlock *UnwindBB,
                                    BasicBlock *InsertAtEnd) {
     assert(CleanupPad);
@@ -4371,25 +4469,27 @@ DEFINE_TRANSPARENT_OPERAND_ACCESSORS(CleanupReturnInst, Value)
 //===----------------------------------------------------------------------===//
 
 //===---------------------------------------------------------------------------
-/// UnreachableInst - This function has undefined behavior.  In particular, the
+/// This function has undefined behavior.  In particular, the
 /// presence of this instruction indicates some higher level knowledge that the
 /// end of the block cannot be reached.
 ///
 class UnreachableInst : public TerminatorInst {
-  void *operator new(size_t, unsigned) = delete;
-
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
+
   UnreachableInst *cloneImpl() const;
 
 public:
+  explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = nullptr);
+  explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
   // allocate space for exactly zero operands
   void *operator new(size_t s) {
     return User::operator new(s, 0);
   }
-  explicit UnreachableInst(LLVMContext &C, Instruction *InsertBefore = nullptr);
-  explicit UnreachableInst(LLVMContext &C, BasicBlock *InsertAtEnd);
+
+  void *operator new(size_t, unsigned) = delete;
 
   unsigned getNumSuccessors() const { return 0; }
 
@@ -4411,16 +4511,17 @@ private:
 //                                 TruncInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a truncation of integer types.
+/// This class represents a truncation of integer types.
 class TruncInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical TruncInst
+
+  /// Clone an identical TruncInst
   TruncInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   TruncInst(
     Value *S,                           ///< The value to be truncated
     Type *Ty,                           ///< The (smaller) type to truncate to
@@ -4428,7 +4529,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   TruncInst(
     Value *S,                     ///< The value to be truncated
     Type *Ty,                     ///< The (smaller) type to truncate to
@@ -4436,7 +4537,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == Trunc;
   }
@@ -4449,16 +4550,17 @@ public:
 //                                 ZExtInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents zero extension of integer types.
+/// This class represents zero extension of integer types.
 class ZExtInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical ZExtInst
+
+  /// Clone an identical ZExtInst
   ZExtInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   ZExtInst(
     Value *S,                           ///< The value to be zero extended
     Type *Ty,                           ///< The type to zero extend to
@@ -4466,7 +4568,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end semantics.
+  /// Constructor with insert-at-end semantics.
   ZExtInst(
     Value *S,                     ///< The value to be zero extended
     Type *Ty,                     ///< The type to zero extend to
@@ -4474,7 +4576,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == ZExt;
   }
@@ -4487,16 +4589,17 @@ public:
 //                                 SExtInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a sign extension of integer types.
+/// This class represents a sign extension of integer types.
 class SExtInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical SExtInst
+
+  /// Clone an identical SExtInst
   SExtInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   SExtInst(
     Value *S,                           ///< The value to be sign extended
     Type *Ty,                           ///< The type to sign extend to
@@ -4504,7 +4607,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   SExtInst(
     Value *S,                     ///< The value to be sign extended
     Type *Ty,                     ///< The type to sign extend to
@@ -4512,7 +4615,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == SExt;
   }
@@ -4525,16 +4628,17 @@ public:
 //                                 FPTruncInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a truncation of floating point types.
+/// This class represents a truncation of floating point types.
 class FPTruncInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical FPTruncInst
+
+  /// Clone an identical FPTruncInst
   FPTruncInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   FPTruncInst(
     Value *S,                           ///< The value to be truncated
     Type *Ty,                           ///< The type to truncate to
@@ -4542,7 +4646,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   FPTruncInst(
     Value *S,                     ///< The value to be truncated
     Type *Ty,                     ///< The type to truncate to
@@ -4550,7 +4654,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPTrunc;
   }
@@ -4563,16 +4667,17 @@ public:
 //                                 FPExtInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents an extension of floating point types.
+/// This class represents an extension of floating point types.
 class FPExtInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical FPExtInst
+
+  /// Clone an identical FPExtInst
   FPExtInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   FPExtInst(
     Value *S,                           ///< The value to be extended
     Type *Ty,                           ///< The type to extend to
@@ -4580,7 +4685,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   FPExtInst(
     Value *S,                     ///< The value to be extended
     Type *Ty,                     ///< The type to extend to
@@ -4588,7 +4693,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPExt;
   }
@@ -4601,16 +4706,17 @@ public:
 //                                 UIToFPInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a cast unsigned integer to floating point.
+/// This class represents a cast unsigned integer to floating point.
 class UIToFPInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical UIToFPInst
+
+  /// Clone an identical UIToFPInst
   UIToFPInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   UIToFPInst(
     Value *S,                           ///< The value to be converted
     Type *Ty,                           ///< The type to convert to
@@ -4618,7 +4724,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   UIToFPInst(
     Value *S,                     ///< The value to be converted
     Type *Ty,                     ///< The type to convert to
@@ -4626,7 +4732,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == UIToFP;
   }
@@ -4639,16 +4745,17 @@ public:
 //                                 SIToFPInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a cast from signed integer to floating point.
+/// This class represents a cast from signed integer to floating point.
 class SIToFPInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical SIToFPInst
+
+  /// Clone an identical SIToFPInst
   SIToFPInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   SIToFPInst(
     Value *S,                           ///< The value to be converted
     Type *Ty,                           ///< The type to convert to
@@ -4656,7 +4763,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   SIToFPInst(
     Value *S,                     ///< The value to be converted
     Type *Ty,                     ///< The type to convert to
@@ -4664,7 +4771,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == SIToFP;
   }
@@ -4677,16 +4784,17 @@ public:
 //                                 FPToUIInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a cast from floating point to unsigned integer
+/// This class represents a cast from floating point to unsigned integer
 class FPToUIInst  : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical FPToUIInst
+
+  /// Clone an identical FPToUIInst
   FPToUIInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   FPToUIInst(
     Value *S,                           ///< The value to be converted
     Type *Ty,                           ///< The type to convert to
@@ -4694,7 +4802,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   FPToUIInst(
     Value *S,                     ///< The value to be converted
     Type *Ty,                     ///< The type to convert to
@@ -4702,7 +4810,7 @@ public:
     BasicBlock *InsertAtEnd       ///< Where to insert the new instruction
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPToUI;
   }
@@ -4715,16 +4823,17 @@ public:
 //                                 FPToSIInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a cast from floating point to signed integer.
+/// This class represents a cast from floating point to signed integer.
 class FPToSIInst  : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical FPToSIInst
+
+  /// Clone an identical FPToSIInst
   FPToSIInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   FPToSIInst(
     Value *S,                           ///< The value to be converted
     Type *Ty,                           ///< The type to convert to
@@ -4732,7 +4841,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   FPToSIInst(
     Value *S,                     ///< The value to be converted
     Type *Ty,                     ///< The type to convert to
@@ -4740,7 +4849,7 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Methods for support type inquiry through isa, cast, and dyn_cast:
+  /// Methods for support type inquiry through isa, cast, and dyn_cast:
   static inline bool classof(const Instruction *I) {
     return I->getOpcode() == FPToSI;
   }
@@ -4753,10 +4862,13 @@ public:
 //                                 IntToPtrInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a cast from an integer to a pointer.
+/// This class represents a cast from an integer to a pointer.
 class IntToPtrInst : public CastInst {
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  // Note: Instruction needs to be a friend here to call cloneImpl.
+  friend class Instruction;
+
+  /// Constructor with insert-before-instruction semantics
   IntToPtrInst(
     Value *S,                           ///< The value to be converted
     Type *Ty,                           ///< The type to convert to
@@ -4764,7 +4876,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   IntToPtrInst(
     Value *S,                     ///< The value to be converted
     Type *Ty,                     ///< The type to convert to
@@ -4772,12 +4884,10 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  // Note: Instruction needs to be a friend here to call cloneImpl.
-  friend class Instruction;
-  /// \brief Clone an identical IntToPtrInst
+  /// Clone an identical IntToPtrInst.
   IntToPtrInst *cloneImpl() const;
 
-  /// \brief Returns the address space of this instruction's pointer type.
+  /// Returns the address space of this instruction's pointer type.
   unsigned getAddressSpace() const {
     return getType()->getPointerAddressSpace();
   }
@@ -4795,16 +4905,17 @@ public:
 //                                 PtrToIntInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a cast from a pointer to an integer
+/// This class represents a cast from a pointer to an integer.
 class PtrToIntInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical PtrToIntInst
+
+  /// Clone an identical PtrToIntInst.
   PtrToIntInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   PtrToIntInst(
     Value *S,                           ///< The value to be converted
     Type *Ty,                           ///< The type to convert to
@@ -4812,7 +4923,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   PtrToIntInst(
     Value *S,                     ///< The value to be converted
     Type *Ty,                     ///< The type to convert to
@@ -4820,14 +4931,14 @@ public:
     BasicBlock *InsertAtEnd       ///< The block to insert the instruction into
   );
 
-  /// \brief Gets the pointer operand.
+  /// Gets the pointer operand.
   Value *getPointerOperand() { return getOperand(0); }
-  /// \brief Gets the pointer operand.
+  /// Gets the pointer operand.
   const Value *getPointerOperand() const { return getOperand(0); }
-  /// \brief Gets the operand index of the pointer operand.
+  /// Gets the operand index of the pointer operand.
   static unsigned getPointerOperandIndex() { return 0U; }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getPointerAddressSpace() const {
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
@@ -4845,16 +4956,17 @@ public:
 //                             BitCastInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a no-op cast from one type to another.
+/// This class represents a no-op cast from one type to another.
 class BitCastInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical BitCastInst
+
+  /// Clone an identical BitCastInst.
   BitCastInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   BitCastInst(
     Value *S,                           ///< The value to be casted
     Type *Ty,                           ///< The type to casted to
@@ -4862,7 +4974,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   BitCastInst(
     Value *S,                     ///< The value to be casted
     Type *Ty,                     ///< The type to casted to
@@ -4883,17 +4995,18 @@ public:
 //                          AddrSpaceCastInst Class
 //===----------------------------------------------------------------------===//
 
-/// \brief This class represents a conversion between pointers from
-/// one address space to another.
+/// This class represents a conversion between pointers from one address space
+/// to another.
 class AddrSpaceCastInst : public CastInst {
 protected:
   // Note: Instruction needs to be a friend here to call cloneImpl.
   friend class Instruction;
-  /// \brief Clone an identical AddrSpaceCastInst
+
+  /// Clone an identical AddrSpaceCastInst.
   AddrSpaceCastInst *cloneImpl() const;
 
 public:
-  /// \brief Constructor with insert-before-instruction semantics
+  /// Constructor with insert-before-instruction semantics
   AddrSpaceCastInst(
     Value *S,                           ///< The value to be casted
     Type *Ty,                           ///< The type to casted to
@@ -4901,7 +5014,7 @@ public:
     Instruction *InsertBefore = nullptr ///< Where to insert the new instruction
   );
 
-  /// \brief Constructor with insert-at-end-of-block semantics
+  /// Constructor with insert-at-end-of-block semantics
   AddrSpaceCastInst(
     Value *S,                     ///< The value to be casted
     Type *Ty,                     ///< The type to casted to
@@ -4917,32 +5030,32 @@ public:
     return isa<Instruction>(V) && classof(cast<Instruction>(V));
   }
 
-  /// \brief Gets the pointer operand.
+  /// Gets the pointer operand.
   Value *getPointerOperand() {
     return getOperand(0);
   }
 
-  /// \brief Gets the pointer operand.
+  /// Gets the pointer operand.
   const Value *getPointerOperand() const {
     return getOperand(0);
   }
 
-  /// \brief Gets the operand index of the pointer operand.
+  /// Gets the operand index of the pointer operand.
   static unsigned getPointerOperandIndex() {
     return 0U;
   }
 
-  /// \brief Returns the address space of the pointer operand.
+  /// Returns the address space of the pointer operand.
   unsigned getSrcAddressSpace() const {
     return getPointerOperand()->getType()->getPointerAddressSpace();
   }
 
-  /// \brief Returns the address space of the result.
+  /// Returns the address space of the result.
   unsigned getDestAddressSpace() const {
     return getType()->getPointerAddressSpace();
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_INSTRUCTIONS_H
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicInst.h b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
index 52044e0a0cc8..b14a54503e52 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/contrib/llvm/include/llvm/IR/IntrinsicInst.h
@@ -25,20 +25,28 @@
 #define LLVM_IR_INTRINSICINST_H
 
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstdint>
 
 namespace llvm {
+
   /// A wrapper class for inspecting calls to intrinsic functions.
   /// This allows the standard isa/dyncast/cast functionality to work with calls
   /// to intrinsic functions.
   class IntrinsicInst : public CallInst {
-    IntrinsicInst() = delete;
-    IntrinsicInst(const IntrinsicInst&) = delete;
-    void operator=(const IntrinsicInst&) = delete;
   public:
+    IntrinsicInst() = delete;
+    IntrinsicInst(const IntrinsicInst &) = delete;
+    IntrinsicInst &operator=(const IntrinsicInst &) = delete;
+
     /// Return the intrinsic ID of this intrinsic.
     Intrinsic::ID getIntrinsicID() const {
       return getCalledFunction()->getIntrinsicID();
@@ -81,9 +89,11 @@ namespace llvm {
   class DbgDeclareInst : public DbgInfoIntrinsic {
   public:
     Value *getAddress() const { return getVariableLocation(); }
+
     DILocalVariable *getVariable() const {
       return cast<DILocalVariable>(getRawVariable());
     }
+
     DIExpression *getExpression() const {
       return cast<DIExpression>(getRawExpression());
     }
@@ -91,6 +101,7 @@ namespace llvm {
     Metadata *getRawVariable() const {
       return cast<MetadataAsValue>(getArgOperand(1))->getMetadata();
     }
+
     Metadata *getRawExpression() const {
       return cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
     }
@@ -110,13 +121,16 @@ namespace llvm {
     Value *getValue() const {
       return getVariableLocation(/* AllowNullOp = */ false);
     }
+
     uint64_t getOffset() const {
       return cast<ConstantInt>(
                           const_cast<Value*>(getArgOperand(1)))->getZExtValue();
     }
+
     DILocalVariable *getVariable() const {
       return cast<DILocalVariable>(getRawVariable());
     }
+
     DIExpression *getExpression() const {
       return cast<DIExpression>(getRawExpression());
     }
@@ -124,6 +138,7 @@ namespace llvm {
     Metadata *getRawVariable() const {
       return cast<MetadataAsValue>(getArgOperand(2))->getMetadata();
     }
+
     Metadata *getRawExpression() const {
       return cast<MetadataAsValue>(getArgOperand(3))->getMetadata();
     }
@@ -159,6 +174,7 @@ namespace llvm {
     ConstantInt *getVolatileCst() const {
       return cast<ConstantInt>(const_cast<Value*>(getArgOperand(4)));
     }
+
     bool isVolatile() const {
       return !getVolatileCst()->isZero();
     }
@@ -268,7 +284,6 @@ namespace llvm {
     }
   };
 
-
   /// This class wraps the llvm.memcpy intrinsic.
   class MemCpyInst : public MemTransferInst {
   public:
@@ -359,6 +374,18 @@ namespace llvm {
     ConstantInt *getIndex() const {
       return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
     }
+
+    Value *getStep() const;
+  };
+
+  class InstrProfIncrementInstStep : public InstrProfIncrementInst {
+  public:
+    static inline bool classof(const IntrinsicInst *I) {
+      return I->getIntrinsicID() == Intrinsic::instrprof_increment_step;
+    }
+    static inline bool classof(const Value *V) {
+      return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+    }
   };
 
   /// This represents the llvm.instrprof_value_profile intrinsic.
@@ -393,6 +420,7 @@ namespace llvm {
       return cast<ConstantInt>(const_cast<Value *>(getArgOperand(4)));
     }
   };
-} // namespace llvm
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_IR_INTRINSICINST_H
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.h b/contrib/llvm/include/llvm/IR/Intrinsics.h
index 7a87c2167710..d07358445dab 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.h
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.h
@@ -45,7 +45,16 @@ namespace Intrinsic {
   };
 
   /// Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
-  std::string getName(ID id, ArrayRef<Type*> Tys = None);
+  /// Note, this version is for intrinsics with no overloads.  Use the other
+  /// version of getName if overloads are required.
+  StringRef getName(ID id);
+
+  /// Return the LLVM name for an intrinsic, such as "llvm.ppc.altivec.lvx".
+  /// Note, this version of getName supports overloads, but is less efficient
+  /// than the StringRef version of this function.  If no overloads are
+  /// requried, it is safe to use this version, but better to use the StringRef
+  /// version.
+  std::string getName(ID id, ArrayRef<Type*> Tys);
 
   /// Return the function type for an intrinsic.
   FunctionType *getType(LLVMContext &Context, ID id,
@@ -79,10 +88,10 @@ namespace Intrinsic {
                                 StringRef Name);
 
   /// Map a GCC builtin name to an intrinsic ID.
-  ID getIntrinsicForGCCBuiltin(const char *Prefix, const char *BuiltinName);
+  ID getIntrinsicForGCCBuiltin(const char *Prefix, StringRef BuiltinName);
 
   /// Map a MS builtin name to an intrinsic ID.
-  ID getIntrinsicForMSBuiltin(const char *Prefix, const char *BuiltinName);
+  ID getIntrinsicForMSBuiltin(const char *Prefix, StringRef BuiltinName);
 
   /// This is a type descriptor which explains the type requirements of an
   /// intrinsic. This is returned by getIntrinsicInfoTableEntries.
@@ -91,7 +100,7 @@ namespace Intrinsic {
       Void, VarArg, MMX, Token, Metadata, Half, Float, Double,
       Integer, Vector, Pointer, Struct,
       Argument, ExtendArgument, TruncArgument, HalfVecArgument,
-      SameVecWidthArgument, PtrToArgument, VecOfPtrsToElt
+      SameVecWidthArgument, PtrToArgument, PtrToElt, VecOfPtrsToElt
     } Kind;
 
     union {
@@ -114,7 +123,7 @@ namespace Intrinsic {
       assert(Kind == Argument || Kind == ExtendArgument ||
              Kind == TruncArgument || Kind == HalfVecArgument ||
              Kind == SameVecWidthArgument || Kind == PtrToArgument ||
-             Kind == VecOfPtrsToElt);
+             Kind == PtrToElt || Kind == VecOfPtrsToElt);
       return Argument_Info >> 3;
     }
     ArgKind getArgumentKind() const {
diff --git a/contrib/llvm/include/llvm/IR/Intrinsics.td b/contrib/llvm/include/llvm/IR/Intrinsics.td
index e9264ec59b55..33fd17f20dbe 100644
--- a/contrib/llvm/include/llvm/IR/Intrinsics.td
+++ b/contrib/llvm/include/llvm/IR/Intrinsics.td
@@ -43,6 +43,15 @@ def IntrWriteMem : IntrinsicProperty;
 // reads from and (possibly volatile) writes to memory, it has no side effects.
 def IntrArgMemOnly : IntrinsicProperty;
 
+// IntrInaccessibleMemOnly -- This intrinsic only accesses memory that is not
+// accessible by the module being compiled. This is a weaker form of IntrNoMem.
+def IntrInaccessibleMemOnly : IntrinsicProperty;
+
+// IntrInaccessibleMemOrArgMemOnly -- This intrinsic only accesses memory that
+// its pointer-typed arguments point to or memory that is not accessible
+// by the module being compiled. This is a weaker form of IntrArgMemOnly.
+def IntrInaccessibleMemOrArgMemOnly : IntrinsicProperty;
+
 // Commutative - This intrinsic is commutative: X op Y == Y op X.
 def Commutative : IntrinsicProperty;
 
@@ -133,6 +142,7 @@ class LLVMVectorSameWidth<int num, LLVMType elty>
   ValueType ElTy = elty.VT;
 }
 class LLVMPointerTo<int num> : LLVMMatchType<num>;
+class LLVMPointerToElt<int num> : LLVMMatchType<num>;
 class LLVMVectorOfPointersToElt<int num> : LLVMMatchType<num>;
 
 // Match the type of another intrinsic parameter that is expected to be a
@@ -290,6 +300,7 @@ def int_gcwrite : Intrinsic<[],
 //===--------------------- Code Generator Intrinsics ----------------------===//
 //
 def int_returnaddress : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
+def int_addressofreturnaddress : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
 def int_frameaddress  : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], [IntrNoMem]>;
 def int_read_register  : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
                                    [IntrReadMem], "llvm.read_register">;
@@ -346,6 +357,12 @@ def int_instrprof_increment : Intrinsic<[],
                                          llvm_i32_ty, llvm_i32_ty],
                                         []>;
 
+// A counter increment with step for instrumentation based profiling.
+def int_instrprof_increment_step : Intrinsic<[],
+                                        [llvm_ptr_ty, llvm_i64_ty,
+                                         llvm_i32_ty, llvm_i32_ty, llvm_i64_ty],
+                                        []>;
+
 // A call to profile runtime for value profiling of target expressions
 // through instrumentation based profiling.
 def int_instrprof_value_profile : Intrinsic<[],
@@ -554,11 +571,11 @@ def int_lifetime_end    : Intrinsic<[],
                                     [llvm_i64_ty, llvm_ptr_ty],
                                     [IntrArgMemOnly, NoCapture<1>]>;
 def int_invariant_start : Intrinsic<[llvm_descriptor_ty],
-                                    [llvm_i64_ty, llvm_ptr_ty],
+                                    [llvm_i64_ty, llvm_anyptr_ty],
                                     [IntrArgMemOnly, NoCapture<1>]>;
 def int_invariant_end   : Intrinsic<[],
                                     [llvm_descriptor_ty, llvm_i64_ty,
-                                     llvm_ptr_ty],
+                                     llvm_anyptr_ty],
                                     [IntrArgMemOnly, NoCapture<2>]>;
 
 def int_invariant_group_barrier : Intrinsic<[llvm_ptr_ty], 
@@ -597,7 +614,50 @@ def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
                                 [llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
                                 [IntrReadMem]>;
 
-//===-------------------------- Other Intrinsics --------------------------===//
+//===------------------------ Coroutine Intrinsics ---------------===//
+// These are documented in docs/Coroutines.rst
+
+// Coroutine Structure Intrinsics.
+
+def int_coro_id : Intrinsic<[llvm_token_ty], [llvm_i32_ty, llvm_ptr_ty, 
+                             llvm_ptr_ty, llvm_ptr_ty], 
+                            [IntrArgMemOnly, IntrReadMem, 
+                             ReadNone<1>, ReadOnly<2>, NoCapture<2>]>;
+def int_coro_alloc : Intrinsic<[llvm_i1_ty], [llvm_token_ty], []>;
+def int_coro_begin : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty],
+                               [WriteOnly<1>]>;
+
+def int_coro_free : Intrinsic<[llvm_ptr_ty], [llvm_token_ty, llvm_ptr_ty], 
+                              [IntrReadMem, IntrArgMemOnly, ReadOnly<1>, 
+                               NoCapture<1>]>;
+def int_coro_end : Intrinsic<[], [llvm_ptr_ty, llvm_i1_ty], []>;
+
+def int_coro_frame : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+def int_coro_size : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>;
+
+def int_coro_save : Intrinsic<[llvm_token_ty], [llvm_ptr_ty], []>;
+def int_coro_suspend : Intrinsic<[llvm_i8_ty], [llvm_token_ty, llvm_i1_ty], []>;
+
+def int_coro_param : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_ptr_ty],
+                               [IntrNoMem, ReadNone<0>, ReadNone<1>]>;
+
+// Coroutine Manipulation Intrinsics.
+
+def int_coro_resume : Intrinsic<[], [llvm_ptr_ty], [Throws]>;
+def int_coro_destroy : Intrinsic<[], [llvm_ptr_ty], [Throws]>;
+def int_coro_done : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty],
+                              [IntrArgMemOnly, ReadOnly<0>, NoCapture<0>]>;
+def int_coro_promise : Intrinsic<[llvm_ptr_ty],
+                                 [llvm_ptr_ty, llvm_i32_ty, llvm_i1_ty],
+                                 [IntrNoMem, NoCapture<0>]>;
+
+// Coroutine Lowering Intrinsics. Used internally by coroutine passes.
+
+def int_coro_subfn_addr : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i8_ty],
+                                    [IntrReadMem, IntrArgMemOnly, ReadOnly<0>,
+                                     NoCapture<0>]>;
+
+///===-------------------------- Other Intrinsics --------------------------===//
 //
 def int_flt_rounds : Intrinsic<[llvm_i32_ty]>,
                      GCCBuiltin<"__builtin_flt_rounds">;
@@ -668,13 +728,25 @@ def int_masked_gather: Intrinsic<[llvm_anyvector_ty],
                                  [LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
                                   LLVMVectorSameWidth<0, llvm_i1_ty>,
                                   LLVMMatchType<0>],
-				  [IntrReadMem]>;
+                                 [IntrReadMem]>;
 
 def int_masked_scatter: Intrinsic<[],
                                   [llvm_anyvector_ty,
                                    LLVMVectorOfPointersToElt<0>, llvm_i32_ty,
                                    LLVMVectorSameWidth<0, llvm_i1_ty>]>;
 
+def int_masked_expandload: Intrinsic<[llvm_anyvector_ty],
+                                     [LLVMPointerToElt<0>,
+                                      LLVMVectorSameWidth<0, llvm_i1_ty>,
+                                      LLVMMatchType<0>],
+                                     [IntrReadMem]>;
+
+def int_masked_compressstore: Intrinsic<[],
+                                     [llvm_anyvector_ty,
+                                      LLVMPointerToElt<0>,
+                                      LLVMVectorSameWidth<0, llvm_i1_ty>],
+                                     [IntrArgMemOnly]>;
+
 // Test whether a pointer is associated with a type metadata identifier.
 def int_type_test : Intrinsic<[llvm_i1_ty], [llvm_ptr_ty, llvm_metadata_ty],
                               [IntrNoMem]>;
@@ -687,6 +759,15 @@ def int_type_checked_load : Intrinsic<[llvm_ptr_ty, llvm_i1_ty],
 def int_load_relative: Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_anyint_ty],
                                  [IntrReadMem, IntrArgMemOnly]>;
 
+//===------ Memory intrinsics with element-wise atomicity guarantees ------===//
+//
+
+def int_memcpy_element_atomic  : Intrinsic<[],
+                                           [llvm_anyptr_ty, llvm_anyptr_ty,
+                                            llvm_i64_ty, llvm_i32_ty],
+                                 [IntrArgMemOnly, NoCapture<0>, NoCapture<1>,
+                                  WriteOnly<0>, ReadOnly<1>]>;
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9bf2a4dd5a1d..078959ce15d0 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -41,8 +41,6 @@ defm int_r600_read_tgid : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
 defm int_r600_read_local_size : AMDGPUReadPreloadRegisterIntrinsic_xyz;
 defm int_r600_read_tidig : AMDGPUReadPreloadRegisterIntrinsic_xyz;
 
-def int_r600_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;
-
 def int_r600_group_barrier : GCCBuiltin<"__builtin_r600_group_barrier">,
   Intrinsic<[], [], [IntrConvergent]>;
 
@@ -70,13 +68,48 @@ def int_r600_recipsqrt_clamped : Intrinsic<
 
 let TargetPrefix = "amdgcn" in {
 
+//===----------------------------------------------------------------------===//
+// ABI Special Intrinsics
+//===----------------------------------------------------------------------===//
+
 defm int_amdgcn_workitem_id : AMDGPUReadPreloadRegisterIntrinsic_xyz;
 defm int_amdgcn_workgroup_id : AMDGPUReadPreloadRegisterIntrinsic_xyz_named
                                <"__builtin_amdgcn_workgroup_id">;
 
+def int_amdgcn_dispatch_ptr :
+  GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
+def int_amdgcn_queue_ptr :
+  GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
+def int_amdgcn_kernarg_segment_ptr :
+  GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
+def int_amdgcn_implicitarg_ptr :
+  GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
+  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+
+def int_amdgcn_groupstaticsize :
+  GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
+  Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
+
+def int_amdgcn_dispatch_id :
+  GCCBuiltin<"__builtin_amdgcn_dispatch_id">,
+  Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>;
+
+//===----------------------------------------------------------------------===//
+// Instruction Intrinsics
+//===----------------------------------------------------------------------===//
+
 def int_amdgcn_s_barrier : GCCBuiltin<"__builtin_amdgcn_s_barrier">,
   Intrinsic<[], [], [IntrConvergent]>;
 
+def int_amdgcn_wave_barrier : GCCBuiltin<"__builtin_amdgcn_wave_barrier">,
+  Intrinsic<[], [], [IntrConvergent]>;
+
 def int_amdgcn_s_waitcnt : Intrinsic<[], [llvm_i32_ty], []>;
 
 def int_amdgcn_div_scale : Intrinsic<
@@ -115,10 +148,18 @@ def int_amdgcn_log_clamp : Intrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
 >;
 
+def int_amdgcn_fmul_legacy : GCCBuiltin<"__builtin_amdgcn_fmul_legacy">,
+  Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]
+>;
+
 def int_amdgcn_rcp : Intrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
 >;
 
+def int_amdgcn_rcp_legacy : GCCBuiltin<"__builtin_amdgcn_rcp_legacy">,
+  Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]
+>;
+
 def int_amdgcn_rsq :  Intrinsic<
   [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
 >;
@@ -140,7 +181,7 @@ def int_amdgcn_frexp_mant : Intrinsic<
 >;
 
 def int_amdgcn_frexp_exp : Intrinsic<
-  [llvm_i32_ty], [llvm_anyfloat_ty], [IntrNoMem]
+  [llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]
 >;
 
 // v_fract is buggy on SI/CI. It mishandles infinities, may return 1.0
@@ -174,6 +215,11 @@ def int_amdgcn_cubetc : GCCBuiltin<"__builtin_amdgcn_cubetc">,
     [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]
 >;
 
+// v_ffbh_i32, as opposed to v_ffbh_u32. For v_ffbh_u32, llvm.ctlz
+// should be used.
+def int_amdgcn_sffbh :
+  Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
 // TODO: Do we want an ordering for these?
 def int_amdgcn_atomic_inc : Intrinsic<[llvm_anyint_ty],
   [llvm_anyptr_ty, LLVMMatchType<0>],
@@ -186,34 +232,130 @@ def int_amdgcn_atomic_dec : Intrinsic<[llvm_anyint_ty],
 >;
 
 class AMDGPUImageLoad : Intrinsic <
-  [llvm_v4f32_ty],    // vdata(VGPR)
+  [llvm_anyfloat_ty], // vdata(VGPR)
   [llvm_anyint_ty,    // vaddr(VGPR)
-   llvm_v8i32_ty,     // rsrc(SGPR)
+   llvm_anyint_ty,    // rsrc(SGPR)
    llvm_i32_ty,       // dmask(imm)
-   llvm_i1_ty,        // r128(imm)
-   llvm_i1_ty,        // da(imm)
    llvm_i1_ty,        // glc(imm)
-   llvm_i1_ty],       // slc(imm)
+   llvm_i1_ty,        // slc(imm)
+   llvm_i1_ty,        // lwe(imm)
+   llvm_i1_ty],       // da(imm)
   [IntrReadMem]>;
 
 def int_amdgcn_image_load : AMDGPUImageLoad;
 def int_amdgcn_image_load_mip : AMDGPUImageLoad;
+def int_amdgcn_image_getresinfo : AMDGPUImageLoad;
 
 class AMDGPUImageStore : Intrinsic <
   [],
-  [llvm_v4f32_ty,     // vdata(VGPR)
+  [llvm_anyfloat_ty,  // vdata(VGPR)
    llvm_anyint_ty,    // vaddr(VGPR)
-   llvm_v8i32_ty,     // rsrc(SGPR)
+   llvm_anyint_ty,    // rsrc(SGPR)
    llvm_i32_ty,       // dmask(imm)
-   llvm_i1_ty,        // r128(imm)
-   llvm_i1_ty,        // da(imm)
    llvm_i1_ty,        // glc(imm)
-   llvm_i1_ty],       // slc(imm)
+   llvm_i1_ty,        // slc(imm)
+   llvm_i1_ty,        // lwe(imm)
+   llvm_i1_ty],       // da(imm)
   []>;
 
 def int_amdgcn_image_store : AMDGPUImageStore;
 def int_amdgcn_image_store_mip : AMDGPUImageStore;
 
+class AMDGPUImageSample : Intrinsic <
+    [llvm_anyfloat_ty], // vdata(VGPR)
+    [llvm_anyfloat_ty,  // vaddr(VGPR)
+     llvm_anyint_ty,    // rsrc(SGPR)
+     llvm_v4i32_ty,     // sampler(SGPR)
+     llvm_i32_ty,       // dmask(imm)
+     llvm_i1_ty,        // unorm(imm)
+     llvm_i1_ty,        // glc(imm)
+     llvm_i1_ty,        // slc(imm)
+     llvm_i1_ty,        // lwe(imm)
+     llvm_i1_ty],       // da(imm)
+     [IntrReadMem]>;
+
+// Basic sample
+def int_amdgcn_image_sample : AMDGPUImageSample;
+def int_amdgcn_image_sample_cl : AMDGPUImageSample;
+def int_amdgcn_image_sample_d : AMDGPUImageSample;
+def int_amdgcn_image_sample_d_cl : AMDGPUImageSample;
+def int_amdgcn_image_sample_l : AMDGPUImageSample;
+def int_amdgcn_image_sample_b : AMDGPUImageSample;
+def int_amdgcn_image_sample_b_cl : AMDGPUImageSample;
+def int_amdgcn_image_sample_lz : AMDGPUImageSample;
+def int_amdgcn_image_sample_cd : AMDGPUImageSample;
+def int_amdgcn_image_sample_cd_cl : AMDGPUImageSample;
+
+// Sample with comparison
+def int_amdgcn_image_sample_c : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_cl : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_d : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_d_cl : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_l : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_b : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_b_cl : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_lz : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_cd : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_cd_cl : AMDGPUImageSample;
+
+// Sample with offsets
+def int_amdgcn_image_sample_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_d_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_d_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_l_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_b_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_b_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_lz_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_cd_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_cd_cl_o : AMDGPUImageSample;
+
+// Sample with comparison and offsets
+def int_amdgcn_image_sample_c_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_d_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_d_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_l_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_b_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_b_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_lz_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_cd_o : AMDGPUImageSample;
+def int_amdgcn_image_sample_c_cd_cl_o : AMDGPUImageSample;
+
+// Basic gather4
+def int_amdgcn_image_gather4 : AMDGPUImageSample;
+def int_amdgcn_image_gather4_cl : AMDGPUImageSample;
+def int_amdgcn_image_gather4_l : AMDGPUImageSample;
+def int_amdgcn_image_gather4_b : AMDGPUImageSample;
+def int_amdgcn_image_gather4_b_cl : AMDGPUImageSample;
+def int_amdgcn_image_gather4_lz : AMDGPUImageSample;
+
+// Gather4 with comparison
+def int_amdgcn_image_gather4_c : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_cl : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_l : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_b : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_b_cl : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_lz : AMDGPUImageSample;
+
+// Gather4 with offsets
+def int_amdgcn_image_gather4_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_l_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_b_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_b_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_lz_o : AMDGPUImageSample;
+
+// Gather4 with comparison and offsets
+def int_amdgcn_image_gather4_c_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_l_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_b_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_b_cl_o : AMDGPUImageSample;
+def int_amdgcn_image_gather4_c_lz_o : AMDGPUImageSample;
+
+def int_amdgcn_image_getlod : AMDGPUImageSample;
+
 class AMDGPUImageAtomic : Intrinsic <
   [llvm_i32_ty],
   [llvm_i32_ty,       // vdata(VGPR)
@@ -298,9 +440,6 @@ def int_amdgcn_buffer_atomic_cmpswap : Intrinsic<
    llvm_i1_ty],       // slc(imm)
   []>;
 
-def int_amdgcn_read_workdim : AMDGPUReadPreloadRegisterIntrinsic;
-
-
 def int_amdgcn_buffer_wbinvl1_sc :
   GCCBuiltin<"__builtin_amdgcn_buffer_wbinvl1_sc">,
   Intrinsic<[], [], []>;
@@ -322,35 +461,33 @@ def int_amdgcn_s_sleep :
   Intrinsic<[], [llvm_i32_ty], []> {
 }
 
+def int_amdgcn_s_incperflevel :
+  GCCBuiltin<"__builtin_amdgcn_s_incperflevel">,
+  Intrinsic<[], [llvm_i32_ty], []> {
+}
+
+def int_amdgcn_s_decperflevel :
+  GCCBuiltin<"__builtin_amdgcn_s_decperflevel">,
+  Intrinsic<[], [llvm_i32_ty], []> {
+}
+
 def int_amdgcn_s_getreg :
   GCCBuiltin<"__builtin_amdgcn_s_getreg">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrReadMem]>;
 
-def int_amdgcn_groupstaticsize :
-  GCCBuiltin<"__builtin_amdgcn_groupstaticsize">,
-  Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
-
-def int_amdgcn_dispatch_ptr :
-  GCCBuiltin<"__builtin_amdgcn_dispatch_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
-
-def int_amdgcn_queue_ptr :
-  GCCBuiltin<"__builtin_amdgcn_queue_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
-
-def int_amdgcn_kernarg_segment_ptr :
-  GCCBuiltin<"__builtin_amdgcn_kernarg_segment_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
-
-def int_amdgcn_implicitarg_ptr :
-  GCCBuiltin<"__builtin_amdgcn_implicitarg_ptr">,
-  Intrinsic<[LLVMQualPointerType<llvm_i8_ty, 2>], [], [IntrNoMem]>;
+// __builtin_amdgcn_interp_mov <param>, <attr_chan>, <attr>, <m0>
+// param values: 0 = P10, 1 = P20, 2 = P0
+def int_amdgcn_interp_mov :
+  GCCBuiltin<"__builtin_amdgcn_interp_mov">,
+  Intrinsic<[llvm_float_ty],
+            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [IntrNoMem]>;
 
 // __builtin_amdgcn_interp_p1 <i>, <attr_chan>, <attr>, <m0>
 def int_amdgcn_interp_p1 :
   GCCBuiltin<"__builtin_amdgcn_interp_p1">,
   Intrinsic<[llvm_float_ty],
-            [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem]>;  // This intrinsic reads from lds, but the memory
                            // values are constant, so it behaves like IntrNoMem.
 
@@ -358,7 +495,7 @@ def int_amdgcn_interp_p1 :
 def int_amdgcn_interp_p2 :
   GCCBuiltin<"__builtin_amdgcn_interp_p2">,
   Intrinsic<[llvm_float_ty],
-            [llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
+            [llvm_float_ty, llvm_float_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty],
             [IntrNoMem]>;  // See int_amdgcn_v_interp_p1 for why this is
                            // IntrNoMem.
 
@@ -387,6 +524,54 @@ def int_amdgcn_lerp :
   GCCBuiltin<"__builtin_amdgcn_lerp">,
   Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
 
+def int_amdgcn_sad_u8 :
+  GCCBuiltin<"__builtin_amdgcn_sad_u8">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_msad_u8 :
+  GCCBuiltin<"__builtin_amdgcn_msad_u8">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_sad_hi_u8 :
+  GCCBuiltin<"__builtin_amdgcn_sad_hi_u8">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_sad_u16 :
+  GCCBuiltin<"__builtin_amdgcn_sad_u16">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_qsad_pk_u16_u8 :
+  GCCBuiltin<"__builtin_amdgcn_qsad_pk_u16_u8">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
+
+def int_amdgcn_mqsad_pk_u16_u8 :
+  GCCBuiltin<"__builtin_amdgcn_mqsad_pk_u16_u8">,
+  Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i32_ty, llvm_i64_ty], [IntrNoMem]>;
+
+def int_amdgcn_mqsad_u32_u8 :
+  GCCBuiltin<"__builtin_amdgcn_mqsad_u32_u8">,
+  Intrinsic<[llvm_v4i32_ty], [llvm_i64_ty, llvm_i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_cvt_pk_u8_f32 :
+  GCCBuiltin<"__builtin_amdgcn_cvt_pk_u8_f32">,
+  Intrinsic<[llvm_i32_ty], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
+
+def int_amdgcn_icmp :
+  Intrinsic<[llvm_i64_ty], [llvm_anyint_ty, LLVMMatchType<0>, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent]>;
+
+def int_amdgcn_fcmp :
+  Intrinsic<[llvm_i64_ty], [llvm_anyfloat_ty, LLVMMatchType<0>, llvm_i32_ty],
+            [IntrNoMem, IntrConvergent]>;
+
+def int_amdgcn_readfirstlane :
+  GCCBuiltin<"__builtin_amdgcn_readfirstlane">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+
+def int_amdgcn_readlane :
+  GCCBuiltin<"__builtin_amdgcn_readlane">,
+  Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem, IntrConvergent]>;
+
 //===----------------------------------------------------------------------===//
 // CI+ Intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsNVVM.td b/contrib/llvm/include/llvm/IR/IntrinsicsNVVM.td
index 6919ec47eb9a..f035ac3c90ee 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsNVVM.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsNVVM.td
@@ -729,6 +729,39 @@ let TargetPrefix = "nvvm" in {
           [LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
                                       [IntrArgMemOnly, NoCapture<0>]>;
 
+  class SCOPED_ATOMIC2_impl<LLVMType elty>
+        : Intrinsic<[elty],
+          [LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>],
+          [IntrArgMemOnly, NoCapture<0>]>;
+  class SCOPED_ATOMIC3_impl<LLVMType elty>
+        : Intrinsic<[elty],
+          [LLVMAnyPointerType<LLVMMatchType<0>>, LLVMMatchType<0>,
+           LLVMMatchType<0>],
+          [IntrArgMemOnly, NoCapture<0>]>;
+
+  multiclass PTXAtomicWithScope2<LLVMType elty> {
+    def _cta : SCOPED_ATOMIC2_impl<elty>;
+    def _sys : SCOPED_ATOMIC2_impl<elty>;
+  }
+  multiclass PTXAtomicWithScope3<LLVMType elty> {
+    def _cta : SCOPED_ATOMIC3_impl<elty>;
+    def _sys : SCOPED_ATOMIC3_impl<elty>;
+  }
+  multiclass PTXAtomicWithScope2_fi {
+    defm _f: PTXAtomicWithScope2<llvm_anyfloat_ty>;
+    defm _i: PTXAtomicWithScope2<llvm_anyint_ty>;
+  }
+  defm int_nvvm_atomic_add_gen   : PTXAtomicWithScope2_fi;
+  defm int_nvvm_atomic_inc_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_dec_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_exch_gen_i: PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_xor_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_max_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_min_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_or_gen_i  : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_and_gen_i : PTXAtomicWithScope2<llvm_anyint_ty>;
+  defm int_nvvm_atomic_cas_gen_i : PTXAtomicWithScope3<llvm_anyint_ty>;
+
 // Bar.Sync
 
   // The builtin for "bar.sync 0" is called __syncthreads.  Unlike most of the
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index e195c0ebac3a..12e23b681ca4 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -250,6 +250,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vcmpgtuw : GCCBuiltin<"__builtin_altivec_vcmpgtuw">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnew : GCCBuiltin<"__builtin_altivec_vcmpnew">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezw : GCCBuiltin<"__builtin_altivec_vcmpnezw">,
+              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
+                        [IntrNoMem]>;
                         
   def int_ppc_altivec_vcmpequh : GCCBuiltin<"__builtin_altivec_vcmpequh">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
@@ -260,6 +266,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vcmpgtuh : GCCBuiltin<"__builtin_altivec_vcmpgtuh">,
               Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneh : GCCBuiltin<"__builtin_altivec_vcmpneh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezh : GCCBuiltin<"__builtin_altivec_vcmpnezh">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
 
   def int_ppc_altivec_vcmpequb : GCCBuiltin<"__builtin_altivec_vcmpequb">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
@@ -270,6 +282,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vcmpgtub : GCCBuiltin<"__builtin_altivec_vcmpgtub">,
               Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneb : GCCBuiltin<"__builtin_altivec_vcmpneb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezb : GCCBuiltin<"__builtin_altivec_vcmpnezb">,
+              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty],
+                        [IntrNoMem]>;
 
   // Predicate Comparisons.  The first operand specifies interpretation of CR6.
   def int_ppc_altivec_vcmpbfp_p : GCCBuiltin<"__builtin_altivec_vcmpbfp_p">,
@@ -304,6 +322,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vcmpgtuw_p : GCCBuiltin<"__builtin_altivec_vcmpgtuw_p">,
               Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnew_p : GCCBuiltin<"__builtin_altivec_vcmpnew_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezw_p : GCCBuiltin<"__builtin_altivec_vcmpnezw_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v4i32_ty,llvm_v4i32_ty],
+                        [IntrNoMem]>;
                         
   def int_ppc_altivec_vcmpequh_p : GCCBuiltin<"__builtin_altivec_vcmpequh_p">,
               Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
@@ -314,6 +338,12 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vcmpgtuh_p : GCCBuiltin<"__builtin_altivec_vcmpgtuh_p">,
               Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneh_p : GCCBuiltin<"__builtin_altivec_vcmpneh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezh_p : GCCBuiltin<"__builtin_altivec_vcmpnezh_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v8i16_ty,llvm_v8i16_ty],
+                        [IntrNoMem]>;
 
   def int_ppc_altivec_vcmpequb_p : GCCBuiltin<"__builtin_altivec_vcmpequb_p">,
               Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
@@ -324,6 +354,23 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   def int_ppc_altivec_vcmpgtub_p : GCCBuiltin<"__builtin_altivec_vcmpgtub_p">,
               Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
                         [IntrNoMem]>;
+  def int_ppc_altivec_vcmpneb_p : GCCBuiltin<"__builtin_altivec_vcmpneb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vcmpnezb_p : GCCBuiltin<"__builtin_altivec_vcmpnezb_p">,
+              Intrinsic<[llvm_i32_ty],[llvm_i32_ty,llvm_v16i8_ty,llvm_v16i8_ty],
+                        [IntrNoMem]>;
+  def int_ppc_altivec_vclzlsbb : GCCBuiltin<"__builtin_altivec_vclzlsbb">,
+              Intrinsic<[llvm_i32_ty],[llvm_v16i8_ty],[IntrNoMem]>;
+  def int_ppc_altivec_vctzlsbb : GCCBuiltin<"__builtin_altivec_vctzlsbb">,
+              Intrinsic<[llvm_i32_ty],[llvm_v16i8_ty],[IntrNoMem]>;
+  def int_ppc_altivec_vprtybw : GCCBuiltin<"__builtin_altivec_vprtybw">,
+              Intrinsic<[llvm_v4i32_ty],[llvm_v4i32_ty],[IntrNoMem]>;
+  def int_ppc_altivec_vprtybd : GCCBuiltin<"__builtin_altivec_vprtybd">,
+              Intrinsic<[llvm_v2i64_ty],[llvm_v2i64_ty],[IntrNoMem]>;
+  def int_ppc_altivec_vprtybq : GCCBuiltin<"__builtin_altivec_vprtybq">,
+              Intrinsic<[llvm_v1i128_ty],[llvm_v1i128_ty],[IntrNoMem]>;
+
 }
 
 // Vector average.
@@ -575,6 +622,8 @@ def int_ppc_altivec_vsl   : PowerPC_Vec_WWW_Intrinsic<"vsl">;
 def int_ppc_altivec_vslo  : PowerPC_Vec_WWW_Intrinsic<"vslo">;
 
 def int_ppc_altivec_vslb  : PowerPC_Vec_BBB_Intrinsic<"vslb">;
+def int_ppc_altivec_vslv  : PowerPC_Vec_BBB_Intrinsic<"vslv">;
+def int_ppc_altivec_vsrv  : PowerPC_Vec_BBB_Intrinsic<"vsrv">;
 def int_ppc_altivec_vslh  : PowerPC_Vec_HHH_Intrinsic<"vslh">;
 def int_ppc_altivec_vslw  : PowerPC_Vec_WWW_Intrinsic<"vslw">;
 
@@ -657,6 +706,27 @@ def int_ppc_altivec_crypto_vpmsumw :
 def int_ppc_altivec_crypto_vpmsumd :
             PowerPC_Vec_DDD_Intrinsic<"crypto_vpmsumd">;
 
+// Absolute Difference intrinsics
+def int_ppc_altivec_vabsdub : PowerPC_Vec_BBB_Intrinsic<"vabsdub">;
+def int_ppc_altivec_vabsduh : PowerPC_Vec_HHH_Intrinsic<"vabsduh">;
+def int_ppc_altivec_vabsduw : PowerPC_Vec_WWW_Intrinsic<"vabsduw">;
+
+// Vector rotates
+def int_ppc_altivec_vrlwnm :
+      PowerPC_Vec_Intrinsic<"vrlwnm", [llvm_v4i32_ty],
+                            [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+def int_ppc_altivec_vrlwmi :
+      PowerPC_Vec_Intrinsic<"vrlwmi", [llvm_v4i32_ty],
+                            [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty],
+                            [IntrNoMem]>;
+def int_ppc_altivec_vrldnm :
+      PowerPC_Vec_Intrinsic<"vrldnm", [llvm_v2i64_ty],
+                            [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>;
+def int_ppc_altivec_vrldmi :
+      PowerPC_Vec_Intrinsic<"vrldmi", [llvm_v2i64_ty],
+                            [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty],
+                            [IntrNoMem]>;
+
 //===----------------------------------------------------------------------===//
 // PowerPC VSX Intrinsic Definitions.
 
@@ -667,13 +737,32 @@ def int_ppc_vsx_lxvw4x :
       Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
 def int_ppc_vsx_lxvd2x :
       Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvw4x_be :
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvd2x_be :
+      Intrinsic<[llvm_v2f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
+def int_ppc_vsx_lxvl :
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem,
+      IntrArgMemOnly]>;
+def int_ppc_vsx_lxvll :
+      Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty, llvm_i64_ty], [IntrReadMem,
+      IntrArgMemOnly]>;
+def int_ppc_vsx_stxvl :
+      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
+      [IntrArgMemOnly]>;
+def int_ppc_vsx_stxvll :
+      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty, llvm_i64_ty],
+      [IntrArgMemOnly]>;
 
 // Vector store.
 def int_ppc_vsx_stxvw4x :
       Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
 def int_ppc_vsx_stxvd2x :
       Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
-
+def int_ppc_vsx_stxvw4x_be :
+      Intrinsic<[], [llvm_v4i32_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
+def int_ppc_vsx_stxvd2x_be :
+      Intrinsic<[], [llvm_v2f64_ty, llvm_ptr_ty], [IntrArgMemOnly]>;
 // Vector and scalar maximum.
 def int_ppc_vsx_xvmaxdp : PowerPC_VSX_Vec_DDD_Intrinsic<"xvmaxdp">;
 def int_ppc_vsx_xvmaxsp : PowerPC_VSX_Vec_FFF_Intrinsic<"xvmaxsp">;
@@ -746,6 +835,67 @@ def int_ppc_vsx_xvcmpgtsp_p : GCCBuiltin<"__builtin_vsx_xvcmpgtsp_p">,
 def int_ppc_vsx_xxleqv :
       PowerPC_VSX_Intrinsic<"xxleqv", [llvm_v4i32_ty],
                             [llvm_v4i32_ty, llvm_v4i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xviexpdp :
+      PowerPC_VSX_Intrinsic<"xviexpdp",[llvm_v2f64_ty],
+                            [llvm_v2i64_ty, llvm_v2i64_ty],[IntrNoMem]>;
+def int_ppc_vsx_xviexpsp :
+      PowerPC_VSX_Intrinsic<"xviexpsp",[llvm_v4f32_ty],
+                            [llvm_v4i32_ty, llvm_v4i32_ty],[IntrNoMem]>;
+def int_ppc_vsx_xvcvdpsxws :
+      PowerPC_VSX_Intrinsic<"xvcvdpsxws", [llvm_v4i32_ty],
+                            [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvdpuxws :
+      PowerPC_VSX_Intrinsic<"xvcvdpuxws", [llvm_v4i32_ty],
+                            [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvsxwdp :
+      PowerPC_VSX_Intrinsic<"xvcvsxwdp", [llvm_v2f64_ty],
+                            [llvm_v4i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvuxwdp :
+      PowerPC_VSX_Intrinsic<"xvcvuxwdp", [llvm_v2f64_ty],
+                            [llvm_v4i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvspdp :
+      PowerPC_VSX_Intrinsic<"xvcvspdp", [llvm_v2f64_ty],
+                            [llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvsxdsp :
+      PowerPC_VSX_Intrinsic<"xvcvsxdsp", [llvm_v4f32_ty],
+                            [llvm_v2i64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvuxdsp :
+      PowerPC_VSX_Intrinsic<"xvcvuxdsp", [llvm_v4f32_ty],
+                            [llvm_v2i64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvdpsp :
+      PowerPC_VSX_Intrinsic<"xvcvdpsp", [llvm_v4f32_ty],
+                            [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvsphp :
+      PowerPC_VSX_Intrinsic<"xvcvsphp", [llvm_v4f32_ty],
+                            [llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvxexpdp :
+      PowerPC_VSX_Intrinsic<"xvxexpdp", [llvm_v2i64_ty],
+                            [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvxexpsp :
+      PowerPC_VSX_Intrinsic<"xvxexpsp", [llvm_v4i32_ty],
+                            [llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvxsigdp :
+      PowerPC_VSX_Intrinsic<"xvxsigdp", [llvm_v2i64_ty],
+                            [llvm_v2f64_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvxsigsp :
+      PowerPC_VSX_Intrinsic<"xvxsigsp", [llvm_v4i32_ty],
+                            [llvm_v4f32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtstdcdp :
+      PowerPC_VSX_Intrinsic<"xvtstdcdp", [llvm_v2i64_ty],
+                            [llvm_v2f64_ty, llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvtstdcsp :
+      PowerPC_VSX_Intrinsic<"xvtstdcsp", [llvm_v4i32_ty],
+                            [llvm_v4f32_ty,llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xvcvhpsp :
+      PowerPC_VSX_Intrinsic<"xvcvhpsp", [llvm_v4f32_ty],
+                            [llvm_v8i16_ty],[IntrNoMem]>;
+def int_ppc_vsx_xxextractuw :
+      PowerPC_VSX_Intrinsic<"xxextractuw",[llvm_v2i64_ty],
+                            [llvm_v2i64_ty,llvm_i32_ty], [IntrNoMem]>;
+def int_ppc_vsx_xxinsertw :
+      PowerPC_VSX_Intrinsic<"xxinsertw",[llvm_v4i32_ty],
+                            [llvm_v4i32_ty,llvm_v2i64_ty,llvm_i32_ty],
+                            [IntrNoMem]>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index bfc15b9bc09e..9be37d3645b2 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -382,6 +382,11 @@ let TargetPrefix = "s390" in {
 //===----------------------------------------------------------------------===//
 
 let TargetPrefix = "s390" in {
+  def int_s390_sfpc : GCCBuiltin<"__builtin_s390_sfpc">,
+                      Intrinsic<[], [llvm_i32_ty], []>;
+  def int_s390_efpc : GCCBuiltin<"__builtin_s390_efpc">,
+                      Intrinsic<[llvm_i32_ty], [], []>;
+
   def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty],
                                [IntrNoMem]>;
 }
diff --git a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
index b965f082b8d8..3a496cb6645c 100644
--- a/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/contrib/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -145,18 +145,6 @@ let TargetPrefix = "x86" in {
 
 // Arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse_add_ss :
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_sub_ss :
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_mul_ss :
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_div_ss :
-              Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
-                         llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty],
                         [IntrNoMem]>;
@@ -246,10 +234,10 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvttss2si64 : GCCBuiltin<"__builtin_ia32_cvttss2si64">,
               Intrinsic<[llvm_i64_ty], [llvm_v4f32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtsi2ss : GCCBuiltin<"__builtin_ia32_cvtsi2ss">,
+  def int_x86_sse_cvtsi2ss : // TODO: Remove this intrinsic.
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse_cvtsi642ss : GCCBuiltin<"__builtin_ia32_cvtsi642ss">,
+  def int_x86_sse_cvtsi642ss : // TODO: Remove this intrinsic.
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i64_ty], [IntrNoMem]>;
 
@@ -287,18 +275,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // FP arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_sse2_add_sd :
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_sub_sd :
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_mul_sd :
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_div_sd :
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty],
                         [IntrNoMem]>;
@@ -489,16 +465,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_i32_ty], [llvm_v2f64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvttsd2si64 : GCCBuiltin<"__builtin_ia32_cvttsd2si64">,
               Intrinsic<[llvm_i64_ty], [llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsi2sd : GCCBuiltin<"__builtin_ia32_cvtsi2sd">,
+  def int_x86_sse2_cvtsi2sd : // TODO: Remove this intrinsic.
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtsi642sd : GCCBuiltin<"__builtin_ia32_cvtsi642sd">,
+  def int_x86_sse2_cvtsi642sd : // TODO: Remove this intrinsic.
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i64_ty], [IntrNoMem]>;
   def int_x86_sse2_cvtsd2ss : GCCBuiltin<"__builtin_ia32_cvtsd2ss">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_v2f64_ty], [IntrNoMem]>;
-  def int_x86_sse2_cvtss2sd : GCCBuiltin<"__builtin_ia32_cvtss2sd">,
+  def int_x86_sse2_cvtss2sd : // TODO: Remove this intrinsic.
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_v4f32_ty], [IntrNoMem]>;
   def int_x86_sse_cvtpd2pi : GCCBuiltin<"__builtin_ia32_cvtpd2pi">,
@@ -1340,58 +1316,19 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermilvar_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4i64_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermilvar_pd_512 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8i64_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermilvar_pd_128 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarpd_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2i64_ty, llvm_v2f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermilvar_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_vpermilvar_ps_512 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16i32_ty, llvm_v16f32_ty, llvm_i16_ty],
+  def int_x86_avx512_vpermilvar_pd_512 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarpd512">,
+          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8i64_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_vpermilvar_ps_128 :
-        GCCBuiltin<"__builtin_ia32_vpermilvarps_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4i32_ty, llvm_v4f32_ty, llvm_i8_ty],
+  def int_x86_avx512_vpermilvar_ps_512 :
+        GCCBuiltin<"__builtin_ia32_vpermilvarps512">,
+          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_pshuf_b_128 :
-        GCCBuiltin<"__builtin_ia32_pshufb128_mask">,
-          Intrinsic<[llvm_v16i8_ty],
-          [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty,  llvm_i16_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pshuf_b_256 :
-        GCCBuiltin<"__builtin_ia32_pshufb256_mask">,
-          Intrinsic<[llvm_v32i8_ty],
-          [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty,  llvm_i32_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pshuf_b_512 :
-        GCCBuiltin<"__builtin_ia32_pshufb512_mask">,
-          Intrinsic<[llvm_v64i8_ty],
-          [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty,  llvm_i64_ty],
+  def int_x86_avx512_pshuf_b_512 :
+        GCCBuiltin<"__builtin_ia32_pshufb512">,
+          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
           [IntrNoMem]>;
 
   def int_x86_avx512_mask_shuf_f32x4_256 :
@@ -1441,42 +1378,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v8i64_ty],
           [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty],
           [IntrNoMem]>;
-
-  def int_x86_avx512_mask_shuf_pd_128 :
-         GCCBuiltin<"__builtin_ia32_shufpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_shuf_pd_256 :
-         GCCBuiltin<"__builtin_ia32_shufpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_shuf_pd_512 :
-         GCCBuiltin<"__builtin_ia32_shufpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_shuf_ps_128 :
-         GCCBuiltin<"__builtin_ia32_shufps128_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_shuf_ps_256 :
-         GCCBuiltin<"__builtin_ia32_shufps256_mask">,
-          Intrinsic<[llvm_v8f32_ty],
-          [llvm_v8f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_shuf_ps_512 :
-         GCCBuiltin<"__builtin_ia32_shufps512_mask">,
-          Intrinsic<[llvm_v16f32_ty],
-          [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty],
-          [IntrNoMem]>;
 }
 
 // Vector blend
@@ -1493,7 +1394,7 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_dp_ps_256 : GCCBuiltin<"__builtin_ia32_dpps256">,
         Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty,
-                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
+                  llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem, Commutative]>;
 }
 
 // Vector compare
@@ -1694,16 +1595,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
         Intrinsic<[], [], []>;
 }
 
-// Vector load with broadcast
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx_vbroadcastf128_pd_256 :
-        GCCBuiltin<"__builtin_ia32_vbroadcastf128_pd256">,
-        Intrinsic<[llvm_v4f64_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-  def int_x86_avx_vbroadcastf128_ps_256 :
-        GCCBuiltin<"__builtin_ia32_vbroadcastf128_ps256">,
-        Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty], [IntrReadMem, IntrArgMemOnly]>;
-}
-
 // SIMD load ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_ldu_dq_256 : GCCBuiltin<"__builtin_ia32_lddqu256">,
@@ -1726,20 +1617,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                   [IntrReadMem, IntrArgMemOnly]>;
 }
 
-// Conditional move ops
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_move_ss :
-        GCCBuiltin<"__builtin_ia32_movss_mask">,
-          Intrinsic<[llvm_v4f32_ty],
-                    [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-  def int_x86_avx512_mask_move_sd :
-        GCCBuiltin<"__builtin_ia32_movsd_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-                    [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
-                    [IntrNoMem]>;
-}
-
 // Conditional store ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx_maskstore_pd : GCCBuiltin<"__builtin_ia32_maskstorepd">,
@@ -1818,154 +1695,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
                          llvm_v32i8_ty], [IntrNoMem, Commutative]>;
 }
 
-// Vector min, max
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_pmaxs_b_128 : GCCBuiltin<"__builtin_ia32_pmaxsb128_mask">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_b_256 : GCCBuiltin<"__builtin_ia32_pmaxsb256_mask">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_v32i8_ty,  llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_b_512 : GCCBuiltin<"__builtin_ia32_pmaxsb512_mask">,
-              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                         llvm_v64i8_ty,  llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_b_128 : GCCBuiltin<"__builtin_ia32_pmaxub128_mask">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_b_256 : GCCBuiltin<"__builtin_ia32_pmaxub256_mask">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_v32i8_ty,  llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_b_512 : GCCBuiltin<"__builtin_ia32_pmaxub512_mask">,
-              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                         llvm_v64i8_ty,  llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_w_128 : GCCBuiltin<"__builtin_ia32_pmaxsw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_w_256 : GCCBuiltin<"__builtin_ia32_pmaxsw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_w_512 : GCCBuiltin<"__builtin_ia32_pmaxsw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                   llvm_v32i16_ty,  llvm_i32_ty],[IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_w_128 : GCCBuiltin<"__builtin_ia32_pmaxuw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_w_256 : GCCBuiltin<"__builtin_ia32_pmaxuw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_w_512 : GCCBuiltin<"__builtin_ia32_pmaxuw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                         llvm_v32i16_ty,  llvm_i32_ty],[IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_b_128 : GCCBuiltin<"__builtin_ia32_pminsb128_mask">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty,llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_b_256 : GCCBuiltin<"__builtin_ia32_pminsb256_mask">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_b_512 : GCCBuiltin<"__builtin_ia32_pminsb512_mask">,
-              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                         llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_b_128 : GCCBuiltin<"__builtin_ia32_pminub128_mask">,
-              Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                         llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_b_256 : GCCBuiltin<"__builtin_ia32_pminub256_mask">,
-              Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                         llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_b_512 : GCCBuiltin<"__builtin_ia32_pminub512_mask">,
-              Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                         llvm_v64i8_ty,  llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_w_128 : GCCBuiltin<"__builtin_ia32_pminsw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_w_256 : GCCBuiltin<"__builtin_ia32_pminsw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_w_512 : GCCBuiltin<"__builtin_ia32_pminsw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                         llvm_v32i16_ty,  llvm_i32_ty],[IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_w_128 : GCCBuiltin<"__builtin_ia32_pminuw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_w_256 : GCCBuiltin<"__builtin_ia32_pminuw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_w_512 : GCCBuiltin<"__builtin_ia32_pminuw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                         llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_d_512 : GCCBuiltin<"__builtin_ia32_pmaxud512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_d_256 : GCCBuiltin<"__builtin_ia32_pmaxud256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_d_128 : GCCBuiltin<"__builtin_ia32_pmaxud128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_d_512 : GCCBuiltin<"__builtin_ia32_pmaxsd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_d_256 : GCCBuiltin<"__builtin_ia32_pmaxsd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_d_128 : GCCBuiltin<"__builtin_ia32_pmaxsd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_q_512 : GCCBuiltin<"__builtin_ia32_pmaxuq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_q_256 : GCCBuiltin<"__builtin_ia32_pmaxuq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxu_q_128 : GCCBuiltin<"__builtin_ia32_pmaxuq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_q_512 : GCCBuiltin<"__builtin_ia32_pmaxsq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_q_256 : GCCBuiltin<"__builtin_ia32_pmaxsq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmaxs_q_128 : GCCBuiltin<"__builtin_ia32_pmaxsq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_d_512 : GCCBuiltin<"__builtin_ia32_pminud512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_d_256 : GCCBuiltin<"__builtin_ia32_pminud256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_d_128 : GCCBuiltin<"__builtin_ia32_pminud128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_d_512 : GCCBuiltin<"__builtin_ia32_pminsd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_d_256 : GCCBuiltin<"__builtin_ia32_pminsd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_d_128 : GCCBuiltin<"__builtin_ia32_pminsd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_q_512 : GCCBuiltin<"__builtin_ia32_pminuq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_q_256 : GCCBuiltin<"__builtin_ia32_pminuq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pminu_q_128 : GCCBuiltin<"__builtin_ia32_pminuq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_q_512 : GCCBuiltin<"__builtin_ia32_pminsq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_q_256 : GCCBuiltin<"__builtin_ia32_pminsq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmins_q_128 : GCCBuiltin<"__builtin_ia32_pminsq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-}
-
 // Integer shift ops.
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx2_psll_w : GCCBuiltin<"__builtin_ia32_psllw256">,
@@ -2018,165 +1747,76 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
                          llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psrl_w_128 : GCCBuiltin<"__builtin_ia32_psrlw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_w_256 : GCCBuiltin<"__builtin_ia32_psrlw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v8i16_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_v8i16_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_wi_128 : GCCBuiltin<"__builtin_ia32_psrlwi128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_i32_ty, llvm_v8i16_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_wi_256 : GCCBuiltin<"__builtin_ia32_psrlwi256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_i32_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_wi_512 : GCCBuiltin<"__builtin_ia32_psrlwi512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_i32_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_v2i64_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psra_w_128 : GCCBuiltin<"__builtin_ia32_psraw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_w_256 : GCCBuiltin<"__builtin_ia32_psraw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_wi_128 : GCCBuiltin<"__builtin_ia32_psrawi128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_wi_256 : GCCBuiltin<"__builtin_ia32_psrawi256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_wi_512 : GCCBuiltin<"__builtin_ia32_psrawi512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_i32_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrai_q_128 : GCCBuiltin<"__builtin_ia32_psraqi128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrai_q_256 : GCCBuiltin<"__builtin_ia32_psraqi256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psll_d : GCCBuiltin<"__builtin_ia32_pslld512_mask">,
+  def int_x86_avx512_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_psll_d_512 : GCCBuiltin<"__builtin_ia32_pslld512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_q : GCCBuiltin<"__builtin_ia32_psllq512_mask">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psll_q_512 : GCCBuiltin<"__builtin_ia32_psllq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_d : GCCBuiltin<"__builtin_ia32_psrld512_mask">,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrl_w_512 : GCCBuiltin<"__builtin_ia32_psrlw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrl_d_512 : GCCBuiltin<"__builtin_ia32_psrld512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_q : GCCBuiltin<"__builtin_ia32_psrlq512_mask">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrl_q_512 : GCCBuiltin<"__builtin_ia32_psrlq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_d : GCCBuiltin<"__builtin_ia32_psrad512_mask">,
+                         llvm_v2i64_ty], [IntrNoMem]>;
+  def int_x86_avx512_psra_w_512 : GCCBuiltin<"__builtin_ia32_psraw512">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+                         llvm_v8i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_psra_d_512 : GCCBuiltin<"__builtin_ia32_psrad512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_q : GCCBuiltin<"__builtin_ia32_psraq512_mask">,
+                         llvm_v4i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psra_q_512 : GCCBuiltin<"__builtin_ia32_psraq512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_v2i64_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psll_w_128 : GCCBuiltin<"__builtin_ia32_psllw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_w_256 : GCCBuiltin<"__builtin_ia32_psllw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512_mask">,
+  def int_x86_avx512_pslli_w_512 : GCCBuiltin<"__builtin_ia32_psllwi512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_wi_128 : GCCBuiltin<"__builtin_ia32_psllwi128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_i32_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_wi_256 : GCCBuiltin<"__builtin_ia32_psllwi256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_i32_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_wi_512 : GCCBuiltin<"__builtin_ia32_psllwi512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pslli_d_512 : GCCBuiltin<"__builtin_ia32_pslldi512">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pslli_q_512 : GCCBuiltin<"__builtin_ia32_psllqi512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrli_w_512 : GCCBuiltin<"__builtin_ia32_psrlwi512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_i32_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv16_hi : GCCBuiltin<"__builtin_ia32_psllv16hi_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv2_di : GCCBuiltin<"__builtin_ia32_psllv2di_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-             llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv32hi : GCCBuiltin<"__builtin_ia32_psllv32hi_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrli_d_512 : GCCBuiltin<"__builtin_ia32_psrldi512">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrli_q_512 : GCCBuiltin<"__builtin_ia32_psrlqi512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrai_w_512 : GCCBuiltin<"__builtin_ia32_psrawi512">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv4_di : GCCBuiltin<"__builtin_ia32_psllv4di_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv4_si : GCCBuiltin<"__builtin_ia32_psllv4si_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv8_hi : GCCBuiltin<"__builtin_ia32_psllv8hi_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv8_si : GCCBuiltin<"__builtin_ia32_psllv8si_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-              llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_d_256 : GCCBuiltin<"__builtin_ia32_psrad256_mask">,
-             Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_di_128 : GCCBuiltin<"__builtin_ia32_psradi128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_di_256 : GCCBuiltin<"__builtin_ia32_psradi256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                        llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_di_512 : GCCBuiltin<"__builtin_ia32_psradi512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrai_d_512 : GCCBuiltin<"__builtin_ia32_psradi512">,
               Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_q_128 : GCCBuiltin<"__builtin_ia32_psraq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_q_256 : GCCBuiltin<"__builtin_ia32_psraq256_mask">,
-             Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_qi_128 : GCCBuiltin<"__builtin_ia32_psraqi128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_qi_256 : GCCBuiltin<"__builtin_ia32_psraqi256_mask">,
-             Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psra_qi_512 : GCCBuiltin<"__builtin_ia32_psraqi512_mask">,
+                         llvm_i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrai_q_512 : GCCBuiltin<"__builtin_ia32_psraqi512">,
               Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+                         llvm_i32_ty], [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psrl_d_128: GCCBuiltin<"__builtin_ia32_psrld128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_d_256: GCCBuiltin<"__builtin_ia32_psrld256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
-                         llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_di_128: GCCBuiltin<"__builtin_ia32_psrldi128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [ llvm_v4i32_ty,
-                         llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty ], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_di_256: GCCBuiltin<"__builtin_ia32_psrldi256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [ llvm_v8i32_ty,
-                         llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty ], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_di_512: GCCBuiltin<"__builtin_ia32_psrldi512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [ llvm_v16i32_ty,
-                         llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty ], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_psrl_q_128:  GCCBuiltin<"__builtin_ia32_psrlq128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                   llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_q_256: GCCBuiltin<"__builtin_ia32_psrlq256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                   llvm_v2i64_ty, llvm_v4i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_qi_128: GCCBuiltin<"__builtin_ia32_psrlqi128_mask">,
-        Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                   llvm_i32_ty, llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_qi_256: GCCBuiltin<"__builtin_ia32_psrlqi256_mask">,
-        Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                   llvm_i32_ty, llvm_v4i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrl_qi_512: GCCBuiltin<"__builtin_ia32_psrlqi512_mask">,
-        Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                   llvm_i32_ty, llvm_v8i64_ty,  llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmultishift_qb_128:
         GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">,
         Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
@@ -2596,6 +2236,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
                         [IntrNoMem]>;
 
+  def int_x86_avx512_psllv_d_512 : GCCBuiltin<"__builtin_ia32_psllv16si">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psllv_q_512 : GCCBuiltin<"__builtin_ia32_psllv8di">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
+                        [IntrNoMem]>;
+
   def int_x86_avx2_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv4si">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                         [IntrNoMem]>;
@@ -2609,6 +2256,13 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
                         [IntrNoMem]>;
 
+  def int_x86_avx512_psrlv_d_512 : GCCBuiltin<"__builtin_ia32_psrlv16si">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrlv_q_512 : GCCBuiltin<"__builtin_ia32_psrlv8di">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
+                        [IntrNoMem]>;
+
   def int_x86_avx2_psrav_d : GCCBuiltin<"__builtin_ia32_psrav4si">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty],
                         [IntrNoMem]>;
@@ -2616,105 +2270,48 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty],
                         [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psllv_d : GCCBuiltin<"__builtin_ia32_psllv16si_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+  def int_x86_avx512_psrav_d_512 : GCCBuiltin<"__builtin_ia32_psrav16si">,
+              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
                         [IntrNoMem]>;
-  def int_x86_avx512_mask_psllv_q : GCCBuiltin<"__builtin_ia32_psllv8di_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+  def int_x86_avx512_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128">,
+              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty],
                         [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav_d : GCCBuiltin<"__builtin_ia32_psrav16si_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+  def int_x86_avx512_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256">,
+              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
                         [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav_q : GCCBuiltin<"__builtin_ia32_psrav8di_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+  def int_x86_avx512_psrav_q_512 : GCCBuiltin<"__builtin_ia32_psrav8di">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty],
                         [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv_d : GCCBuiltin<"__builtin_ia32_psrlv16si_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty],
+
+  def int_x86_avx512_psllv_w_128 : GCCBuiltin<"__builtin_ia32_psllv8hi">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
                         [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv_q : GCCBuiltin<"__builtin_ia32_psrlv8di_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty],
+  def int_x86_avx512_psllv_w_256 : GCCBuiltin<"__builtin_ia32_psllv16hi">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psllv_w_512 : GCCBuiltin<"__builtin_ia32_psllv32hi">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
                         [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psll_d_128 : GCCBuiltin<"__builtin_ia32_pslld128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_d_256 : GCCBuiltin<"__builtin_ia32_pslld256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_di_128 : GCCBuiltin<"__builtin_ia32_pslldi128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_di_256 : GCCBuiltin<"__builtin_ia32_pslldi256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_di_512 : GCCBuiltin<"__builtin_ia32_pslldi512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
-                         llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_q_128 : GCCBuiltin<"__builtin_ia32_psllq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_q_256 : GCCBuiltin<"__builtin_ia32_psllq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_qi_128 : GCCBuiltin<"__builtin_ia32_psllqi128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_i32_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_qi_256 : GCCBuiltin<"__builtin_ia32_psllqi256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psll_qi_512 : GCCBuiltin<"__builtin_ia32_psllqi512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
-                         llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_psrav16_hi : GCCBuiltin<"__builtin_ia32_psrav16hi_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav32_hi : GCCBuiltin<"__builtin_ia32_psrav32hi_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav4_si : GCCBuiltin<"__builtin_ia32_psrav4si_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav8_hi : GCCBuiltin<"__builtin_ia32_psrav8hi_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav8_si : GCCBuiltin<"__builtin_ia32_psrav8si_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrlv_w_128 : GCCBuiltin<"__builtin_ia32_psrlv8hi">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrlv_w_256 : GCCBuiltin<"__builtin_ia32_psrlv16hi">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrlv_w_512 : GCCBuiltin<"__builtin_ia32_psrlv32hi">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
+                        [IntrNoMem]>;
 
-  def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
-                         llvm_v16i16_ty, llvm_v16i16_ty,  llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv2_di : GCCBuiltin<"__builtin_ia32_psrlv2di_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
-                         llvm_v2i64_ty, llvm_v2i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv32hi : GCCBuiltin<"__builtin_ia32_psrlv32hi_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
-                         llvm_v32i16_ty, llvm_v32i16_ty,  llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv4_di : GCCBuiltin<"__builtin_ia32_psrlv4di_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
-                         llvm_v4i64_ty, llvm_v4i64_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv4_si : GCCBuiltin<"__builtin_ia32_psrlv4si_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
-                         llvm_v4i32_ty, llvm_v4i32_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv8_hi : GCCBuiltin<"__builtin_ia32_psrlv8hi_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
-                         llvm_v8i16_ty, llvm_v8i16_ty,  llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
-                         llvm_v8i32_ty, llvm_v8i32_ty,  llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_psrav_w_128 : GCCBuiltin<"__builtin_ia32_psrav8hi">,
+              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrav_w_256 : GCCBuiltin<"__builtin_ia32_psrav16hi">,
+              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty],
+                        [IntrNoMem]>;
+  def int_x86_avx512_psrav_w_512 : GCCBuiltin<"__builtin_ia32_psrav32hi">,
+              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
+                        [IntrNoMem]>;
 
   def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">,
               Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
@@ -3268,6 +2865,18 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
           llvm_i32_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask3_vfmsub_sd :
+         GCCBuiltin<"__builtin_ia32_vfmsubsd3_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfmsub_ss :
+         GCCBuiltin<"__builtin_ia32_vfmsubss3_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
   def int_x86_avx512_mask3_vfmsub_pd_128 :
          GCCBuiltin<"__builtin_ia32_vfmsubpd128_mask3">,
           Intrinsic<[llvm_v2f64_ty],
@@ -3376,6 +2985,18 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty,  llvm_i16_ty,
           llvm_i32_ty], [IntrNoMem]>;
 
+  def int_x86_avx512_mask3_vfnmsub_sd :
+         GCCBuiltin<"__builtin_ia32_vfnmsubsd3_mask3">,
+          Intrinsic<[llvm_v2f64_ty],
+          [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
+  def int_x86_avx512_mask3_vfnmsub_ss :
+         GCCBuiltin<"__builtin_ia32_vfnmsubss3_mask3">,
+          Intrinsic<[llvm_v4f32_ty],
+          [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,  llvm_i8_ty,
+          llvm_i32_ty], [IntrNoMem]>;
+
   def int_x86_avx512_mask_vfnmsub_pd_128 :
          GCCBuiltin<"__builtin_ia32_vfnmsubpd128_mask">,
           Intrinsic<[llvm_v2f64_ty],
@@ -3758,16 +3379,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // Addition
   def int_x86_mmx_padd_b : GCCBuiltin<"__builtin_ia32_paddb">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_mmx_padd_w : GCCBuiltin<"__builtin_ia32_paddw">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_mmx_padd_d : GCCBuiltin<"__builtin_ia32_paddd">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_mmx_padd_q : GCCBuiltin<"__builtin_ia32_paddq">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
 
   def int_x86_mmx_padds_b : GCCBuiltin<"__builtin_ia32_paddsb">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty,
@@ -3831,16 +3452,16 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   // Bitwise operations
   def int_x86_mmx_pand : GCCBuiltin<"__builtin_ia32_pand">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_mmx_pandn : GCCBuiltin<"__builtin_ia32_pandn">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
                         [IntrNoMem]>;
   def int_x86_mmx_por : GCCBuiltin<"__builtin_ia32_por">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
   def int_x86_mmx_pxor : GCCBuiltin<"__builtin_ia32_pxor">,
               Intrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty],
-                        [IntrNoMem]>;
+                        [IntrNoMem, Commutative]>;
 
   // Averages
   def int_x86_mmx_pavg_b : GCCBuiltin<"__builtin_ia32_pavgb">,
@@ -4140,6 +3761,10 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
               Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
   def int_x86_xsaves64 :
               Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], []>;
+  def int_x86_xgetbv :
+              Intrinsic<[llvm_i64_ty], [llvm_i32_ty], []>;
+  def int_x86_xsetbv :
+              Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], []>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -4294,61 +3919,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
               Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
                         [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pmovsxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxbd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty,
-                        llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxbd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty,
-                        llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxbd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty,
-                        llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxbq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
-                        llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxbq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty,
-                        llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxbq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty,
-                        llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovsxbw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
-                        llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovsxbw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty,
-                        llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovsxbw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty,
-                        llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxdq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
-                        llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxdq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty,
-                        llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxdq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty,
-                        llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxwd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
-                        llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxwd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty,
-                        llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxwd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty,
-                        llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxwq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty,
-                        llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxwq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty,
-                        llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovsxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxwq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty,
-                        llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
 }
 
 // Conversion ops
@@ -4403,9 +3973,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
   def int_x86_avx512_cvtsi2ss64 : GCCBuiltin<"__builtin_ia32_cvtsi2ss64">,
               Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty,
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_cvtsi2sd32 : GCCBuiltin<"__builtin_ia32_cvtsi2sd32">,
-              Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
-                         llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_cvtsi2sd64 : GCCBuiltin<"__builtin_ia32_cvtsi2sd64">,
               Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty,
                          llvm_i64_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -4510,24 +4077,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
 // Vector convert
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_mask_cvtdq2pd_128 :
-        GCCBuiltin<"__builtin_ia32_cvtdq2pd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v4i32_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtdq2pd_256 :
-        GCCBuiltin<"__builtin_ia32_cvtdq2pd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4i32_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtdq2pd_512 :
-        GCCBuiltin<"__builtin_ia32_cvtdq2pd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8i32_ty, llvm_v8f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtdq2ps_128 :
         GCCBuiltin<"__builtin_ia32_cvtdq2ps128_mask">,
           Intrinsic<[llvm_v4f32_ty],
@@ -4918,24 +4467,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           [llvm_v8f32_ty, llvm_v8i64_ty,  llvm_i8_ty,  llvm_i32_ty],
           [IntrNoMem]>;
 
-  def int_x86_avx512_mask_cvtudq2pd_128 :
-        GCCBuiltin<"__builtin_ia32_cvtudq2pd128_mask">,
-          Intrinsic<[llvm_v2f64_ty],
-          [llvm_v4i32_ty, llvm_v2f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtudq2pd_256 :
-        GCCBuiltin<"__builtin_ia32_cvtudq2pd256_mask">,
-          Intrinsic<[llvm_v4f64_ty],
-          [llvm_v4i32_ty, llvm_v4f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
-  def int_x86_avx512_mask_cvtudq2pd_512 :
-        GCCBuiltin<"__builtin_ia32_cvtudq2pd512_mask">,
-          Intrinsic<[llvm_v8f64_ty],
-          [llvm_v8i32_ty, llvm_v8f64_ty,  llvm_i8_ty],
-          [IntrNoMem]>;
-
   def int_x86_avx512_mask_cvtudq2ps_128 :
         GCCBuiltin<"__builtin_ia32_cvtudq2ps128_mask">,
           Intrinsic<[llvm_v4f32_ty],
@@ -5161,153 +4692,30 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
           Intrinsic<[llvm_v2i64_ty], [llvm_i8_ty], [IntrNoMem]>;
 }
 
-// Vector sign and zero extend
-let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
-  def int_x86_avx512_pmovzxbq : GCCBuiltin<"__builtin_ia32_pmovzxbq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_pmovzxwd : GCCBuiltin<"__builtin_ia32_pmovzxwd512">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_pmovzxbd : GCCBuiltin<"__builtin_ia32_pmovzxbd512">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_pmovzxwq : GCCBuiltin<"__builtin_ia32_pmovzxwq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty],
-                        [IntrNoMem]>;
-  def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty],
-                        [IntrNoMem]>;
-
-  def int_x86_avx512_mask_pmovzxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxbd128_mask">,
-              Intrinsic<[llvm_v4i32_ty],  [llvm_v16i8_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxbd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxbd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxbq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxbq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxbq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovzxbw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
-                         llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovzxbw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty,
-                         llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovzxbw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty,
-                         llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxdq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxdq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxdq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxwd128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
-                         llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxwd256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty,
-                         llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxwd512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty,
-                         llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxwq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxwq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmovzxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxwq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-
-}
-
 // Arithmetic ops
 let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
 
-  def int_x86_avx512_mask_add_ps_128 : GCCBuiltin<"__builtin_ia32_addps128_mask">,
-          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_add_ps_256 : GCCBuiltin<"__builtin_ia32_addps256_mask">,
-          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_add_ps_512 : GCCBuiltin<"__builtin_ia32_addps512_mask">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                      llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_add_pd_128 : GCCBuiltin<"__builtin_ia32_addpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_add_pd_256 : GCCBuiltin<"__builtin_ia32_addpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_add_pd_512 : GCCBuiltin<"__builtin_ia32_addpd512_mask">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                      llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sub_ps_128 : GCCBuiltin<"__builtin_ia32_subps128_mask">,
-          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sub_ps_256 : GCCBuiltin<"__builtin_ia32_subps256_mask">,
-          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_sub_ps_512 : GCCBuiltin<"__builtin_ia32_subps512_mask">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                      llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sub_pd_128 : GCCBuiltin<"__builtin_ia32_subpd128_mask">,
-          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_sub_pd_256 : GCCBuiltin<"__builtin_ia32_subpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_sub_pd_512 : GCCBuiltin<"__builtin_ia32_subpd512_mask">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                      llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_mul_ps_128 : GCCBuiltin<"__builtin_ia32_mulps_mask">,
-          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_mul_ps_256 : GCCBuiltin<"__builtin_ia32_mulps256_mask">,
-          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_mul_ps_512 : GCCBuiltin<"__builtin_ia32_mulps512_mask">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                      llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_mul_pd_128 : GCCBuiltin<"__builtin_ia32_mulpd_mask">,
-          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_mul_pd_256 : GCCBuiltin<"__builtin_ia32_mulpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_mul_pd_512 : GCCBuiltin<"__builtin_ia32_mulpd512_mask">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                      llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_div_ps_128 : GCCBuiltin<"__builtin_ia32_divps_mask">,
-          Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                     llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_div_ps_256 : GCCBuiltin<"__builtin_ia32_divps256_mask">,
-          Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                     llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_div_ps_512 : GCCBuiltin<"__builtin_ia32_divps512_mask">,
           Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
                      llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_div_pd_128 : GCCBuiltin<"__builtin_ia32_divpd_mask">,
-          Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                     llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_div_pd_256 : GCCBuiltin<"__builtin_ia32_divpd256_mask">,
-          Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                     llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_div_pd_512 : GCCBuiltin<"__builtin_ia32_divpd512_mask">,
           Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
                      llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -5701,106 +5109,12 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
             Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
                                         llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty],
                       [IntrNoMem]>;
-def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
-            Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
-                      [IntrNoMem]>;
-}
-// FP logical ops
-let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_and_pd_128 : GCCBuiltin<"__builtin_ia32_andpd128_mask">,
-            Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                      llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_and_pd_256 : GCCBuiltin<"__builtin_ia32_andpd256_mask">,
-            Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                      llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_and_pd_512 : GCCBuiltin<"__builtin_ia32_andpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                    llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_and_ps_128 : GCCBuiltin<"__builtin_ia32_andps128_mask">,
-            Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                      llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_and_ps_256 : GCCBuiltin<"__builtin_ia32_andps256_mask">,
-            Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                      llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_and_ps_512 : GCCBuiltin<"__builtin_ia32_andps512_mask">,
-          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                    llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_andn_pd_128 : GCCBuiltin<"__builtin_ia32_andnpd128_mask">,
-            Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                      llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_andn_pd_256 : GCCBuiltin<"__builtin_ia32_andnpd256_mask">,
-            Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                      llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_andn_pd_512 : GCCBuiltin<"__builtin_ia32_andnpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                    llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_andn_ps_128 : GCCBuiltin<"__builtin_ia32_andnps128_mask">,
-            Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                      llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_andn_ps_256 : GCCBuiltin<"__builtin_ia32_andnps256_mask">,
-            Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                      llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_andn_ps_512 : GCCBuiltin<"__builtin_ia32_andnps512_mask">,
-          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                    llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_or_pd_128 : GCCBuiltin<"__builtin_ia32_orpd128_mask">,
-            Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                      llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_or_pd_256 : GCCBuiltin<"__builtin_ia32_orpd256_mask">,
-            Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                      llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_or_pd_512 : GCCBuiltin<"__builtin_ia32_orpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                    llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_or_ps_128 : GCCBuiltin<"__builtin_ia32_orps128_mask">,
-            Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                      llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_or_ps_256 : GCCBuiltin<"__builtin_ia32_orps256_mask">,
-            Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                      llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_or_ps_512 : GCCBuiltin<"__builtin_ia32_orps512_mask">,
-          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                    llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_xor_pd_128 : GCCBuiltin<"__builtin_ia32_xorpd128_mask">,
-            Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty,
-                      llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_xor_pd_256 : GCCBuiltin<"__builtin_ia32_xorpd256_mask">,
-            Intrinsic<[llvm_v4f64_ty], [llvm_v4f64_ty, llvm_v4f64_ty,
-                      llvm_v4f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_xor_pd_512 : GCCBuiltin<"__builtin_ia32_xorpd512_mask">,
-          Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty,
-                    llvm_v8f64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_xor_ps_128 : GCCBuiltin<"__builtin_ia32_xorps128_mask">,
-            Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty,
-                      llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_xor_ps_256 : GCCBuiltin<"__builtin_ia32_xorps256_mask">,
-            Intrinsic<[llvm_v8f32_ty], [llvm_v8f32_ty, llvm_v8f32_ty,
-                      llvm_v8f32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_xor_ps_512 : GCCBuiltin<"__builtin_ia32_xorps512_mask">,
-          Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty,
-                    llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>;
+  def int_x86_avx512_psad_bw_512 : GCCBuiltin<"__builtin_ia32_psadbw512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
+                        [IntrNoMem, Commutative]>;
 }
 // Integer arithmetic ops
 let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_padd_b_128 : GCCBuiltin<"__builtin_ia32_paddb128_mask">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_b_256 : GCCBuiltin<"__builtin_ia32_paddb256_mask">,
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_b_512 : GCCBuiltin<"__builtin_ia32_paddb512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_w_128 : GCCBuiltin<"__builtin_ia32_paddw128_mask">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_w_256 : GCCBuiltin<"__builtin_ia32_paddw256_mask">,
-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_w_512 : GCCBuiltin<"__builtin_ia32_paddw512_mask">,
-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_padds_b_128 : GCCBuiltin<"__builtin_ia32_paddsb128_mask">,
           Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
                      llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
@@ -5837,42 +5151,6 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_paddus_w_512 : GCCBuiltin<"__builtin_ia32_paddusw512_mask">,
           Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_d_128 : GCCBuiltin<"__builtin_ia32_paddd128_mask">,
-          Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                     llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_d_256 : GCCBuiltin<"__builtin_ia32_paddd256_mask">,
-          Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                     llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_d_512 : GCCBuiltin<"__builtin_ia32_paddd512_mask">,
-          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_q_128 : GCCBuiltin<"__builtin_ia32_paddq128_mask">,
-          Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                     llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_q_256 : GCCBuiltin<"__builtin_ia32_paddq256_mask">,
-          Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                     llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_padd_q_512 : GCCBuiltin<"__builtin_ia32_paddq512_mask">,
-          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_b_128 : GCCBuiltin<"__builtin_ia32_psubb128_mask">,
-          Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
-                     llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_b_256 : GCCBuiltin<"__builtin_ia32_psubb256_mask">,
-          Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, llvm_v32i8_ty,
-                     llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_b_512 : GCCBuiltin<"__builtin_ia32_psubb512_mask">,
-          Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty,
-                     llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_w_128 : GCCBuiltin<"__builtin_ia32_psubw128_mask">,
-          Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_w_256 : GCCBuiltin<"__builtin_ia32_psubw256_mask">,
-          Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_w_512 : GCCBuiltin<"__builtin_ia32_psubw512_mask">,
-          Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_psubs_b_128 : GCCBuiltin<"__builtin_ia32_psubsb128_mask">,
           Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty,
                      llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
@@ -5909,69 +5187,10 @@ let TargetPrefix = "x86" in {
   def int_x86_avx512_mask_psubus_w_512 : GCCBuiltin<"__builtin_ia32_psubusw512_mask">,
           Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_d_128 : GCCBuiltin<"__builtin_ia32_psubd128_mask">,
-          Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                     llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_d_256 : GCCBuiltin<"__builtin_ia32_psubd256_mask">,
-          Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                     llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_d_512 : GCCBuiltin<"__builtin_ia32_psubd512_mask">,
-          Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_q_128 : GCCBuiltin<"__builtin_ia32_psubq128_mask">,
-          Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                     llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_q_256 : GCCBuiltin<"__builtin_ia32_psubq256_mask">,
-          Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                     llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_psub_q_512 : GCCBuiltin<"__builtin_ia32_psubq512_mask">,
-          Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulu_dq_128 : GCCBuiltin<"__builtin_ia32_pmuludq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmul_dq_128 : GCCBuiltin<"__builtin_ia32_pmuldq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                         llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulu_dq_256 : GCCBuiltin<"__builtin_ia32_pmuludq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmul_dq_256 : GCCBuiltin<"__builtin_ia32_pmuldq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                         llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                         llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_w_128 : GCCBuiltin<"__builtin_ia32_pmullw128_mask">,
-              Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty,
-                     llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_w_256 : GCCBuiltin<"__builtin_ia32_pmullw256_mask">,
-              Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty, llvm_v16i16_ty,
-                     llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_w_512 : GCCBuiltin<"__builtin_ia32_pmullw512_mask">,
-              Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
-                     llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_d_128 : GCCBuiltin<"__builtin_ia32_pmulld128_mask">,
-              Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty,
-                     llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_d_256 : GCCBuiltin<"__builtin_ia32_pmulld256_mask">,
-              Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty, llvm_v8i32_ty,
-                     llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_d_512 : GCCBuiltin<"__builtin_ia32_pmulld512_mask">,
-              Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
-                     llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_q_128 : GCCBuiltin<"__builtin_ia32_pmullq128_mask">,
-              Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty,
-                     llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_q_256 : GCCBuiltin<"__builtin_ia32_pmullq256_mask">,
-              Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty,
-                     llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
-  def int_x86_avx512_mask_pmull_q_512 : GCCBuiltin<"__builtin_ia32_pmullq512_mask">,
-              Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
-                     llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+  def int_x86_avx512_pmulu_dq_512 : GCCBuiltin<"__builtin_ia32_pmuludq512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
+  def int_x86_avx512_pmul_dq_512 : GCCBuiltin<"__builtin_ia32_pmuldq512">,
+              Intrinsic<[llvm_v8i64_ty], [llvm_v16i32_ty, llvm_v16i32_ty], [IntrNoMem]>;
   def int_x86_avx512_mask_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512_mask">,
               Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty,
                      llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
@@ -6103,7 +5322,7 @@ let TargetPrefix = "x86" in {
 
   def int_x86_avx512_gather3div2_di :
         GCCBuiltin<"__builtin_ia32_gather3div2di">,
-          Intrinsic<[llvm_v4i32_ty],
+          Intrinsic<[llvm_v2i64_ty],
           [llvm_v2i64_ty, llvm_ptr_ty, llvm_v2i64_ty, llvm_i8_ty, llvm_i32_ty],
           [IntrReadMem, IntrArgMemOnly]>;
 
@@ -6115,7 +5334,7 @@ let TargetPrefix = "x86" in {
 
   def int_x86_avx512_gather3div4_di :
         GCCBuiltin<"__builtin_ia32_gather3div4di">,
-          Intrinsic<[llvm_v8i32_ty],
+          Intrinsic<[llvm_v4i64_ty],
           [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i64_ty, llvm_i8_ty, llvm_i32_ty],
           [IntrReadMem, IntrArgMemOnly]>;
 
@@ -6151,7 +5370,7 @@ let TargetPrefix = "x86" in {
 
   def int_x86_avx512_gather3siv2_di :
         GCCBuiltin<"__builtin_ia32_gather3siv2di">,
-          Intrinsic<[llvm_v4i32_ty],
+          Intrinsic<[llvm_v2i64_ty],
           [llvm_v2i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
           [IntrReadMem, IntrArgMemOnly]>;
 
@@ -6163,7 +5382,7 @@ let TargetPrefix = "x86" in {
 
   def int_x86_avx512_gather3siv4_di :
         GCCBuiltin<"__builtin_ia32_gather3siv4di">,
-          Intrinsic<[llvm_v8i32_ty],
+          Intrinsic<[llvm_v4i64_ty],
           [llvm_v4i64_ty, llvm_ptr_ty, llvm_v4i32_ty, llvm_i8_ty, llvm_i32_ty],
           [IntrReadMem, IntrArgMemOnly]>;
 
@@ -6414,44 +5633,6 @@ let TargetPrefix = "x86" in {
                     [IntrNoMem]>;
 }
 
-let TargetPrefix = "x86" in {
-  def int_x86_avx512_mask_valign_q_512 :
-        GCCBuiltin<"__builtin_ia32_alignq512_mask">,
-        Intrinsic<[llvm_v8i64_ty],
-                  [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, llvm_v8i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_valign_d_512 :
-        GCCBuiltin<"__builtin_ia32_alignd512_mask">,
-        Intrinsic<[llvm_v16i32_ty],
-                  [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, llvm_v16i32_ty,
-                   llvm_i16_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_valign_q_256 :
-        GCCBuiltin<"__builtin_ia32_alignq256_mask">,
-        Intrinsic<[llvm_v4i64_ty],
-                  [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v4i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_valign_d_256 :
-        GCCBuiltin<"__builtin_ia32_alignd256_mask">,
-        Intrinsic<[llvm_v8i32_ty],
-                  [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v8i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_valign_q_128 :
-        GCCBuiltin<"__builtin_ia32_alignq128_mask">,
-        Intrinsic<[llvm_v2i64_ty],
-                  [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v2i64_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-
-  def int_x86_avx512_mask_valign_d_128 :
-        GCCBuiltin<"__builtin_ia32_alignd128_mask">,
-        Intrinsic<[llvm_v4i32_ty],
-                  [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v4i32_ty,
-                   llvm_i8_ty], [IntrNoMem]>;
-}
-
 // Compares
 let TargetPrefix = "x86" in {
   // 512-bit
diff --git a/contrib/llvm/include/llvm/IR/LLVMContext.h b/contrib/llvm/include/llvm/IR/LLVMContext.h
index dbf2b4562332..7f43d5df3c3f 100644
--- a/contrib/llvm/include/llvm/IR/LLVMContext.h
+++ b/contrib/llvm/include/llvm/IR/LLVMContext.h
@@ -15,25 +15,30 @@
 #ifndef LLVM_IR_LLVMCONTEXT_H
 #define LLVM_IR_LLVMCONTEXT_H
 
+#include "llvm-c/Types.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Options.h"
+#include <cstdint>
+#include <memory>
+#include <string>
 
 namespace llvm {
 
-class LLVMContextImpl;
-class StringRef;
-class Twine;
-class Instruction;
-class Module;
-class MDString;
-class DICompositeType;
-class SMDiagnostic;
 class DiagnosticInfo;
 enum DiagnosticSeverity : char;
-template <typename T> class SmallVectorImpl;
 class Function;
-class DebugLoc;
+class Instruction;
+class LLVMContextImpl;
+class Module;
 class OptBisect;
+template <typename T> class SmallVectorImpl;
+class SMDiagnostic;
+class StringRef;
+class Twine;
+
+namespace yaml {
+class Output;
+} // end namespace yaml
 
 /// This is an important class for using LLVM in a threaded context.  It
 /// (opaquely) owns and manages the core "global" data of LLVM's core
@@ -44,6 +49,8 @@ class LLVMContext {
 public:
   LLVMContextImpl *const pImpl;
   LLVMContext();
+  LLVMContext(LLVMContext &) = delete;
+  LLVMContext &operator=(const LLVMContext &) = delete;
   ~LLVMContext();
 
   // Pinned metadata names, which always have the same value.  This is a
@@ -69,6 +76,8 @@ public:
     MD_align = 17,                    // "align"
     MD_loop = 18,                     // "llvm.loop"
     MD_type = 19,                     // "type"
+    MD_section_prefix = 20,           // "section_prefix"
+    MD_absolute_symbol = 21,          // "absolute_symbol"
   };
 
   /// Known operand bundle tag IDs, which always have the same value.  All
@@ -181,6 +190,17 @@ public:
   /// diagnostics.
   void setDiagnosticHotnessRequested(bool Requested);
 
+  /// \brief Return the YAML file used by the backend to save optimization
+  /// diagnostics.  If null, diagnostics are not saved in a file but only
+  /// emitted via the diagnostic handler.
+  yaml::Output *getDiagnosticsOutputFile();
+  /// Set the diagnostics output file used for optimization diagnostics.
+  ///
+  /// By default or if invoked with null, diagnostics are not saved in a file
+  /// but only emitted via the diagnostic handler.  Even if an output file is
+  /// set, the handler is invoked for each diagnostic message.
+  void setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F);
+
   /// \brief Get the prefix that should be printed in front of a diagnostic of
   ///        the given \p Severity
   static const char *getDiagnosticMessagePrefix(DiagnosticSeverity Severity);
@@ -244,8 +264,8 @@ public:
   /// analysis.
   OptBisect &getOptBisect();
 private:
-  LLVMContext(LLVMContext&) = delete;
-  void operator=(LLVMContext&) = delete;
+  // Module needs access to the add/removeModule methods.
+  friend class Module;
 
   /// addModule - Register a module as being instantiated in this context.  If
   /// the context is deleted, the module will be deleted as well.
@@ -253,9 +273,6 @@ private:
 
   /// removeModule - Unregister a module from this context.
   void removeModule(Module*);
-
-  // Module needs access to the add/removeModule methods.
-  friend class Module;
 };
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
@@ -271,6 +288,6 @@ inline LLVMContextRef *wrap(const LLVMContext **Tys) {
   return reinterpret_cast<LLVMContextRef*>(const_cast<LLVMContext**>(Tys));
 }
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_LLVMCONTEXT_H
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
index 530fd7166498..b22f9302298d 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassManagers.h
@@ -486,9 +486,7 @@ public:
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) override;
 
-  const char *getPassName() const override {
-    return "Function Pass Manager";
-  }
+  StringRef getPassName() const override { return "Function Pass Manager"; }
 
   FunctionPass *getContainedPass(unsigned N) {
     assert ( N < PassVector.size() && "Pass number out of range!");
diff --git a/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h b/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h
index 39ae80d797c7..fd9d468b06cb 100644
--- a/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h
+++ b/contrib/llvm/include/llvm/IR/LegacyPassNameParser.h
@@ -60,20 +60,20 @@ public:
   inline bool ignorablePass(const PassInfo *P) const {
     // Ignore non-selectable and non-constructible passes!  Ignore
     // non-optimizations.
-    return P->getPassArgument() == nullptr || *P->getPassArgument() == 0 ||
-           P->getNormalCtor() == nullptr || ignorablePassImpl(P);
+    return P->getPassArgument().empty() || P->getNormalCtor() == nullptr ||
+           ignorablePassImpl(P);
   }
 
   // Implement the PassRegistrationListener callbacks used to populate our map
   //
   void passRegistered(const PassInfo *P) override {
     if (ignorablePass(P)) return;
-    if (findOption(P->getPassArgument()) != getNumOptions()) {
+    if (findOption(P->getPassArgument().data()) != getNumOptions()) {
       errs() << "Two passes with the same argument (-"
            << P->getPassArgument() << ") attempted to be registered!\n";
       llvm_unreachable(nullptr);
     }
-    addLiteralOption(P->getPassArgument(), P, P->getPassName());
+    addLiteralOption(P->getPassArgument().data(), P, P->getPassName().data());
   }
   void passEnumerate(const PassInfo *P) override { passRegistered(P); }
 
@@ -89,7 +89,7 @@ private:
   // ValLessThan - Provide a sorting comparator for Values elements...
   static int ValLessThan(const PassNameParser::OptionInfo *VT1,
                          const PassNameParser::OptionInfo *VT2) {
-    return std::strcmp(VT1->Name, VT2->Name);
+    return VT1->Name < VT2->Name;
   }
 };
 
@@ -130,7 +130,7 @@ template<const char *Args>
 class PassArgFilter {
 public:
   bool operator()(const PassInfo &P) const {
-    return(std::strstr(Args, P.getPassArgument()));
+    return StringRef(Args).contains(P.getPassArgument());
   }
 };
 
diff --git a/contrib/llvm/include/llvm/IR/MDBuilder.h b/contrib/llvm/include/llvm/IR/MDBuilder.h
index 35341e3271ff..bab8728ed49f 100644
--- a/contrib/llvm/include/llvm/IR/MDBuilder.h
+++ b/contrib/llvm/include/llvm/IR/MDBuilder.h
@@ -66,6 +66,9 @@ public:
   /// Return metadata containing the entry count for a function.
   MDNode *createFunctionEntryCount(uint64_t Count);
 
+  /// Return metadata containing the section prefix for a function.
+  MDNode *createFunctionSectionPrefix(StringRef Prefix);
+
   //===------------------------------------------------------------------===//
   // Range metadata.
   //===------------------------------------------------------------------===//
diff --git a/contrib/llvm/include/llvm/IR/Mangler.h b/contrib/llvm/include/llvm/IR/Mangler.h
index 349218e33817..0eb91a3b0600 100644
--- a/contrib/llvm/include/llvm/IR/Mangler.h
+++ b/contrib/llvm/include/llvm/IR/Mangler.h
@@ -29,12 +29,7 @@ class Mangler {
   /// This keeps track of the number we give to anonymous ones.
   mutable DenseMap<const GlobalValue*, unsigned> AnonGlobalIDs;
 
-  /// This simple counter is used to unique value names.
-  mutable unsigned NextAnonGlobalID;
-
 public:
-  Mangler() : NextAnonGlobalID(1) {}
-
   /// Print the appropriate prefix and the specified global variable's name.
   /// If the global variable doesn't have a name, this fills in a unique name
   /// for the global.
diff --git a/contrib/llvm/include/llvm/IR/Metadata.def b/contrib/llvm/include/llvm/IR/Metadata.def
index 607f5ef125c9..03cdcab7dc47 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.def
+++ b/contrib/llvm/include/llvm/IR/Metadata.def
@@ -82,6 +82,7 @@ HANDLE_MDNODE_BRANCH(MDNode)
 HANDLE_MDNODE_LEAF_UNIQUABLE(MDTuple)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocation)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIExpression)
+HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariableExpression)
 HANDLE_SPECIALIZED_MDNODE_BRANCH(DINode)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(GenericDINode)
 HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DISubrange)
diff --git a/contrib/llvm/include/llvm/IR/Metadata.h b/contrib/llvm/include/llvm/IR/Metadata.h
index 91f43d342d27..46c785a1c05d 100644
--- a/contrib/llvm/include/llvm/IR/Metadata.h
+++ b/contrib/llvm/include/llvm/IR/Metadata.h
@@ -18,19 +18,30 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/PointerUnion.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/ilist_node.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
+#include <memory>
+#include <string>
 #include <type_traits>
+#include <utility>
 
 namespace llvm {
 
-class LLVMContext;
 class Module;
 class ModuleSlotTracker;
 
@@ -69,6 +80,7 @@ protected:
       : SubclassID(ID), Storage(Storage), SubclassData16(0), SubclassData32(0) {
     static_assert(sizeof(*this) == 8, "Metdata fields poorly packed");
   }
+
   ~Metadata() = default;
 
   /// \brief Default handling of a changed operand, which asserts.
@@ -263,6 +275,7 @@ private:
 public:
   ReplaceableMetadataImpl(LLVMContext &Context)
       : Context(Context), NextIndex(0) {}
+
   ~ReplaceableMetadataImpl() {
     assert(UseMap.empty() && "Cannot destroy in-use replaceable metadata");
   }
@@ -325,6 +338,7 @@ protected:
       : Metadata(ID, Uniqued), ReplaceableMetadataImpl(V->getContext()), V(V) {
     assert(V && "Expected valid value");
   }
+
   ~ValueAsMetadata() = default;
 
 public:
@@ -378,6 +392,7 @@ public:
   static ConstantAsMetadata *get(Constant *C) {
     return ValueAsMetadata::getConstant(C);
   }
+
   static ConstantAsMetadata *getIfExists(Constant *C) {
     return ValueAsMetadata::getConstantIfExists(C);
   }
@@ -403,6 +418,7 @@ public:
   static LocalAsMetadata *get(Value *Local) {
     return ValueAsMetadata::getLocal(Local);
   }
+
   static LocalAsMetadata *getIfExists(Value *Local) {
     return ValueAsMetadata::getLocalIfExists(Local);
   }
@@ -463,6 +479,7 @@ public:
 namespace mdconst {
 
 namespace detail {
+
 template <class T> T &make();
 template <class T, class Result> struct HasDereference {
   typedef char Yes[1];
@@ -484,6 +501,7 @@ template <class V, class M> struct IsValidReference {
   static const bool value = std::is_base_of<Constant, V>::value &&
                             std::is_convertible<M, const Metadata &>::value;
 };
+
 } // end namespace detail
 
 /// \brief Check whether Metadata has a Value.
@@ -568,14 +586,14 @@ dyn_extract_or_null(Y &&MD) {
 class MDString : public Metadata {
   friend class StringMapEntry<MDString>;
 
-  MDString(const MDString &) = delete;
-  MDString &operator=(MDString &&) = delete;
-  MDString &operator=(const MDString &) = delete;
-
   StringMapEntry<MDString> *Entry;
   MDString() : Metadata(MDStringKind, Uniqued), Entry(nullptr) {}
 
 public:
+  MDString(const MDString &) = delete;
+  MDString &operator=(MDString &&) = delete;
+  MDString &operator=(const MDString &) = delete;
+
   static MDString *get(LLVMContext &Context, StringRef Str);
   static MDString *get(LLVMContext &Context, const char *Str) {
     return get(Context, Str ? StringRef(Str) : StringRef());
@@ -634,15 +652,18 @@ struct DenseMapInfo<AAMDNodes> {
     return AAMDNodes(DenseMapInfo<MDNode *>::getEmptyKey(),
                      nullptr, nullptr);
   }
+
   static inline AAMDNodes getTombstoneKey() {
     return AAMDNodes(DenseMapInfo<MDNode *>::getTombstoneKey(),
                      nullptr, nullptr);
   }
+
   static unsigned getHashValue(const AAMDNodes &Val) {
     return DenseMapInfo<MDNode *>::getHashValue(Val.TBAA) ^
            DenseMapInfo<MDNode *>::getHashValue(Val.Scope) ^
            DenseMapInfo<MDNode *>::getHashValue(Val.NoAlias);
   }
+
   static bool isEqual(const AAMDNodes &LHS, const AAMDNodes &RHS) {
     return LHS == RHS;
   }
@@ -656,15 +677,14 @@ struct DenseMapInfo<AAMDNodes> {
 ///
 /// In particular, this is used by \a MDNode.
 class MDOperand {
+  Metadata *MD = nullptr;
+
+public:
+  MDOperand() = default;
   MDOperand(MDOperand &&) = delete;
   MDOperand(const MDOperand &) = delete;
   MDOperand &operator=(MDOperand &&) = delete;
   MDOperand &operator=(const MDOperand &) = delete;
-
-  Metadata *MD;
-
-public:
-  MDOperand() : MD(nullptr) {}
   ~MDOperand() { untrack(); }
 
   Metadata *get() const { return MD; }
@@ -691,6 +711,7 @@ private:
         MetadataTracking::track(MD);
     }
   }
+
   void untrack() {
     assert(static_cast<void *>(this) == &MD && "Expected same address");
     if (MD)
@@ -715,13 +736,6 @@ template <> struct simplify_type<const MDOperand> {
 class ContextAndReplaceableUses {
   PointerUnion<LLVMContext *, ReplaceableMetadataImpl *> Ptr;
 
-  ContextAndReplaceableUses() = delete;
-  ContextAndReplaceableUses(ContextAndReplaceableUses &&) = delete;
-  ContextAndReplaceableUses(const ContextAndReplaceableUses &) = delete;
-  ContextAndReplaceableUses &operator=(ContextAndReplaceableUses &&) = delete;
-  ContextAndReplaceableUses &
-  operator=(const ContextAndReplaceableUses &) = delete;
-
 public:
   ContextAndReplaceableUses(LLVMContext &Context) : Ptr(&Context) {}
   ContextAndReplaceableUses(
@@ -729,6 +743,12 @@ public:
       : Ptr(ReplaceableUses.release()) {
     assert(getReplaceableUses() && "Expected non-null replaceable uses");
   }
+  ContextAndReplaceableUses() = delete;
+  ContextAndReplaceableUses(ContextAndReplaceableUses &&) = delete;
+  ContextAndReplaceableUses(const ContextAndReplaceableUses &) = delete;
+  ContextAndReplaceableUses &operator=(ContextAndReplaceableUses &&) = delete;
+  ContextAndReplaceableUses &
+  operator=(const ContextAndReplaceableUses &) = delete;
   ~ContextAndReplaceableUses() { delete getReplaceableUses(); }
 
   operator LLVMContext &() { return getContext(); }
@@ -737,11 +757,13 @@ public:
   bool hasReplaceableUses() const {
     return Ptr.is<ReplaceableMetadataImpl *>();
   }
+
   LLVMContext &getContext() const {
     if (hasReplaceableUses())
       return getReplaceableUses()->getContext();
     return *Ptr.get<LLVMContext *>();
   }
+
   ReplaceableMetadataImpl *getReplaceableUses() const {
     if (hasReplaceableUses())
       return Ptr.get<ReplaceableMetadataImpl *>();
@@ -809,10 +831,6 @@ class MDNode : public Metadata {
   friend class ReplaceableMetadataImpl;
   friend class LLVMContextImpl;
 
-  MDNode(const MDNode &) = delete;
-  void operator=(const MDNode &) = delete;
-  void *operator new(size_t) = delete;
-
   unsigned NumOperands;
   unsigned NumUnresolved;
 
@@ -847,6 +865,10 @@ protected:
   }
 
 public:
+  MDNode(const MDNode &) = delete;
+  void operator=(const MDNode &) = delete;
+  void *operator new(size_t) = delete;
+
   static inline MDTuple *get(LLVMContext &Context, ArrayRef<Metadata *> MDs);
   static inline MDTuple *getIfExists(LLVMContext &Context,
                                      ArrayRef<Metadata *> MDs);
@@ -1002,9 +1024,11 @@ public:
   op_iterator op_begin() const {
     return const_cast<MDNode *>(this)->mutable_begin();
   }
+
   op_iterator op_end() const {
     return const_cast<MDNode *>(this)->mutable_end();
   }
+
   op_range operands() const { return op_range(op_begin(), op_end()); }
 
   const MDOperand &getOperand(unsigned I) const {
@@ -1054,6 +1078,7 @@ class MDTuple : public MDNode {
       : MDNode(C, MDTupleKind, Storage, Vals) {
     setHash(Hash);
   }
+
   ~MDTuple() { dropAllReferences(); }
 
   void setHash(unsigned Hash) { SubclassData32 = Hash; }
@@ -1074,6 +1099,7 @@ public:
   static MDTuple *get(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
     return getImpl(Context, MDs, Uniqued);
   }
+
   static MDTuple *getIfExists(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
     return getImpl(Context, MDs, Uniqued, /* ShouldCreate */ false);
   }
@@ -1106,12 +1132,15 @@ public:
 MDTuple *MDNode::get(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
   return MDTuple::get(Context, MDs);
 }
+
 MDTuple *MDNode::getIfExists(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
   return MDTuple::getIfExists(Context, MDs);
 }
+
 MDTuple *MDNode::getDistinct(LLVMContext &Context, ArrayRef<Metadata *> MDs) {
   return MDTuple::getDistinct(Context, MDs);
 }
+
 TempMDTuple MDNode::getTemporary(LLVMContext &Context,
                                  ArrayRef<Metadata *> MDs) {
   return MDTuple::getTemporary(Context, MDs);
@@ -1133,16 +1162,20 @@ class TypedMDOperandIterator
 public:
   TypedMDOperandIterator() = default;
   explicit TypedMDOperandIterator(MDNode::op_iterator I) : I(I) {}
+
   T *operator*() const { return cast_or_null<T>(*I); }
+
   TypedMDOperandIterator &operator++() {
     ++I;
     return *this;
   }
+
   TypedMDOperandIterator operator++(int) {
     TypedMDOperandIterator Temp(*this);
     ++I;
     return Temp;
   }
+
   bool operator==(const TypedMDOperandIterator &X) const { return I == X.I; }
   bool operator!=(const TypedMDOperandIterator &X) const { return I != X.I; }
 };
@@ -1213,16 +1246,16 @@ class DistinctMDOperandPlaceholder : public Metadata {
 
   Metadata **Use = nullptr;
 
-  DistinctMDOperandPlaceholder() = delete;
-  DistinctMDOperandPlaceholder(DistinctMDOperandPlaceholder &&) = delete;
-  DistinctMDOperandPlaceholder(const DistinctMDOperandPlaceholder &) = delete;
-
 public:
   explicit DistinctMDOperandPlaceholder(unsigned ID)
       : Metadata(DistinctMDOperandPlaceholderKind, Distinct) {
     SubclassData32 = ID;
   }
 
+  DistinctMDOperandPlaceholder() = delete;
+  DistinctMDOperandPlaceholder(DistinctMDOperandPlaceholder &&) = delete;
+  DistinctMDOperandPlaceholder(const DistinctMDOperandPlaceholder &) = delete;
+
   ~DistinctMDOperandPlaceholder() {
     if (Use)
       *Use = nullptr;
@@ -1247,10 +1280,8 @@ public:
 ///
 /// TODO: Inherit from Metadata.
 class NamedMDNode : public ilist_node<NamedMDNode> {
-  friend struct ilist_traits<NamedMDNode>;
   friend class LLVMContextImpl;
   friend class Module;
-  NamedMDNode(const NamedMDNode &) = delete;
 
   std::string Name;
   Module *Parent;
@@ -1263,30 +1294,35 @@ class NamedMDNode : public ilist_node<NamedMDNode> {
   template<class T1, class T2>
   class op_iterator_impl :
       public std::iterator<std::bidirectional_iterator_tag, T2> {
-    const NamedMDNode *Node;
-    unsigned Idx;
+    const NamedMDNode *Node = nullptr;
+    unsigned Idx = 0;
+
     op_iterator_impl(const NamedMDNode *N, unsigned i) : Node(N), Idx(i) { }
 
     friend class NamedMDNode;
 
   public:
-    op_iterator_impl() : Node(nullptr), Idx(0) { }
+    op_iterator_impl() = default;
 
     bool operator==(const op_iterator_impl &o) const { return Idx == o.Idx; }
     bool operator!=(const op_iterator_impl &o) const { return Idx != o.Idx; }
+
     op_iterator_impl &operator++() {
       ++Idx;
       return *this;
     }
+
     op_iterator_impl operator++(int) {
       op_iterator_impl tmp(*this);
       operator++();
       return tmp;
     }
+
     op_iterator_impl &operator--() {
       --Idx;
       return *this;
     }
+
     op_iterator_impl operator--(int) {
       op_iterator_impl tmp(*this);
       operator--();
@@ -1297,13 +1333,16 @@ class NamedMDNode : public ilist_node<NamedMDNode> {
   };
 
 public:
+  NamedMDNode(const NamedMDNode &) = delete;
+  ~NamedMDNode();
+
   /// \brief Drop all references and remove the node from parent module.
   void eraseFromParent();
 
-  /// \brief Remove all uses and clear node vector.
-  void dropAllReferences();
-
-  ~NamedMDNode();
+  /// Remove all uses and clear node vector.
+  void dropAllReferences() { clearOperands(); }
+  /// Drop all references to this node's operands.
+  void clearOperands();
 
   /// \brief Get the module that holds this named metadata collection.
   inline Module *getParent() { return Parent; }
@@ -1338,6 +1377,6 @@ public:
   }
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
 #endif // LLVM_IR_METADATA_H
diff --git a/contrib/llvm/include/llvm/IR/Module.h b/contrib/llvm/include/llvm/IR/Module.h
index 632b27e2d0dd..79870b9455a6 100644
--- a/contrib/llvm/include/llvm/IR/Module.h
+++ b/contrib/llvm/include/llvm/IR/Module.h
@@ -26,36 +26,18 @@
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/DataTypes.h"
-#include <system_error>
 
 namespace llvm {
 template <typename T> class Optional;
+class Error;
 class FunctionType;
 class GVMaterializer;
 class LLVMContext;
+class MemoryBuffer;
 class RandomNumberGenerator;
 class StructType;
 template <class PtrType> class SmallPtrSetImpl;
 
-template<> struct ilist_traits<NamedMDNode>
-  : public ilist_default_traits<NamedMDNode> {
-  // createSentinel is used to get hold of a node that marks the end of
-  // the list...
-  NamedMDNode *createSentinel() const {
-    return static_cast<NamedMDNode*>(&Sentinel);
-  }
-  static void destroySentinel(NamedMDNode*) {}
-
-  NamedMDNode *provideInitialHead() const { return createSentinel(); }
-  NamedMDNode *ensureHead(NamedMDNode*) const { return createSentinel(); }
-  static void noteHead(NamedMDNode*, NamedMDNode*) {}
-  void addNodeToList(NamedMDNode *) {}
-  void removeNodeFromList(NamedMDNode *) {}
-
-private:
-  mutable ilist_node<NamedMDNode> Sentinel;
-};
-
 /// A Module instance is used to store all the information related to an
 /// LLVM module. Modules are the top level container of all other LLVM
 /// Intermediate Representation (IR) objects. Each module directly contains a
@@ -177,6 +159,9 @@ private:
   std::string GlobalScopeAsm;     ///< Inline Asm at global scope.
   ValueSymbolTable *ValSymTab;    ///< Symbol table for values
   ComdatSymTabType ComdatSymTab;  ///< Symbol table for COMDATs
+  std::unique_ptr<MemoryBuffer>
+  OwnedMemoryBuffer;              ///< Memory buffer directly owned by this
+                                  ///< module, for legacy clients only.
   std::unique_ptr<GVMaterializer>
   Materializer;                   ///< Used to materialize GlobalValues
   std::string ModuleID;           ///< Human readable identifier for the module
@@ -469,16 +454,14 @@ public:
   GVMaterializer *getMaterializer() const { return Materializer.get(); }
   bool isMaterialized() const { return !getMaterializer(); }
 
-  /// Make sure the GlobalValue is fully read. If the module is corrupt, this
-  /// returns true and fills in the optional string with information about the
-  /// problem. If successful, this returns false.
-  std::error_code materialize(GlobalValue *GV);
+  /// Make sure the GlobalValue is fully read.
+  llvm::Error materialize(GlobalValue *GV);
 
   /// Make sure all GlobalValues in this Module are fully read and clear the
   /// Materializer.
-  std::error_code materializeAll();
+  llvm::Error materializeAll();
 
-  std::error_code materializeMetadata();
+  llvm::Error materializeMetadata();
 
 /// @}
 /// @name Direct access to the globals list, functions list, and symbol table
@@ -603,73 +586,33 @@ public:
     return make_range(ifunc_begin(), ifunc_end());
   }
 
-/// @}
-/// @name Convenience iterators
-/// @{
-
-  template <bool IsConst> class global_object_iterator_t {
-    friend Module;
-
-    typename std::conditional<IsConst, const_iterator, iterator>::type
-        function_i,
-        function_e;
-    typename std::conditional<IsConst, const_global_iterator,
-                              global_iterator>::type global_i;
-
-    typedef
-        typename std::conditional<IsConst, const Module, Module>::type ModuleTy;
-
-    global_object_iterator_t(ModuleTy &M)
-        : function_i(M.begin()), function_e(M.end()),
-          global_i(M.global_begin()) {}
-    global_object_iterator_t(ModuleTy &M, int)
-        : function_i(M.end()), function_e(M.end()), global_i(M.global_end()) {}
-
-  public:
-    global_object_iterator_t &operator++() {
-      if (function_i != function_e)
-        ++function_i;
-      else
-        ++global_i;
-      return *this;
-    }
-
-    typename std::conditional<IsConst, const GlobalObject, GlobalObject>::type &
-    operator*() const {
-      if (function_i != function_e)
-        return *function_i;
-      else
-        return *global_i;
-    }
-
-    bool operator!=(const global_object_iterator_t &other) const {
-      return function_i != other.function_i || global_i != other.global_i;
-    }
-  };
+  /// @}
+  /// @name Convenience iterators
+  /// @{
 
-  typedef global_object_iterator_t</*IsConst=*/false> global_object_iterator;
-  typedef global_object_iterator_t</*IsConst=*/true>
+  typedef concat_iterator<GlobalObject, iterator, global_iterator>
+      global_object_iterator;
+  typedef concat_iterator<const GlobalObject, const_iterator,
+                          const_global_iterator>
       const_global_object_iterator;
 
-  global_object_iterator global_object_begin() {
-    return global_object_iterator(*this);
+  iterator_range<global_object_iterator> global_objects() {
+    return concat<GlobalObject>(functions(), globals());
   }
-  global_object_iterator global_object_end() {
-    return global_object_iterator(*this, 0);
+  iterator_range<const_global_object_iterator> global_objects() const {
+    return concat<const GlobalObject>(functions(), globals());
   }
 
-  const_global_object_iterator global_object_begin() const {
-    return const_global_object_iterator(*this);
-  }
-  const_global_object_iterator global_object_end() const {
-    return const_global_object_iterator(*this, 0);
+  global_object_iterator global_object_begin() {
+    return global_objects().begin();
   }
+  global_object_iterator global_object_end() { return global_objects().end(); }
 
-  iterator_range<global_object_iterator> global_objects() {
-    return make_range(global_object_begin(), global_object_end());
+  const_global_object_iterator global_object_begin() const {
+    return global_objects().begin();
   }
-  iterator_range<const_global_object_iterator> global_objects() const {
-    return make_range(global_object_begin(), global_object_end());
+  const_global_object_iterator global_object_end() const {
+    return global_objects().end();
   }
 
   /// @}
@@ -821,6 +764,9 @@ public:
   /// \brief Returns profile summary metadata
   Metadata *getProfileSummary();
   /// @}
+
+  /// Take ownership of the given memory buffer.
+  void setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB);
 };
 
 /// \brief Given "llvm.used" or "llvm.compiler.used" as a global name, collect
diff --git a/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h b/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 45d9bf7af706..2cfe673d970f 100644
--- a/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/contrib/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -30,23 +30,19 @@ namespace llvm {
 
 /// \brief Class to accumulate and hold information about a callee.
 struct CalleeInfo {
-  /// The static number of callsites calling corresponding function.
-  unsigned CallsiteCount;
-  /// The cumulative profile count of calls to corresponding function
-  /// (if using PGO, otherwise 0).
-  uint64_t ProfileCount;
-  CalleeInfo() : CallsiteCount(0), ProfileCount(0) {}
-  CalleeInfo(unsigned CallsiteCount, uint64_t ProfileCount)
-      : CallsiteCount(CallsiteCount), ProfileCount(ProfileCount) {}
-  CalleeInfo &operator+=(uint64_t RHSProfileCount) {
-    CallsiteCount++;
-    ProfileCount += RHSProfileCount;
-    return *this;
+  enum class HotnessType : uint8_t { Unknown = 0, Cold = 1, None = 2, Hot = 3 };
+  HotnessType Hotness = HotnessType::Unknown;
+
+  CalleeInfo() = default;
+  explicit CalleeInfo(HotnessType Hotness) : Hotness(Hotness) {}
+
+  void updateHotness(const HotnessType OtherHotness) {
+    Hotness = std::max(Hotness, OtherHotness);
   }
 };
 
-/// Struct to hold value either by GUID or Value*, depending on whether this
-/// is a combined or per-module index, respectively.
+/// Struct to hold value either by GUID or GlobalValue*. Values in combined
+/// indexes as well as indirect calls are GUIDs, all others are GlobalValues.
 struct ValueInfo {
   /// The value representation used in this instance.
   enum ValueInfoKind {
@@ -57,9 +53,9 @@ struct ValueInfo {
   /// Union of the two possible value types.
   union ValueUnion {
     GlobalValue::GUID Id;
-    const Value *V;
+    const GlobalValue *GV;
     ValueUnion(GlobalValue::GUID Id) : Id(Id) {}
-    ValueUnion(const Value *V) : V(V) {}
+    ValueUnion(const GlobalValue *GV) : GV(GV) {}
   };
 
   /// The value being represented.
@@ -68,29 +64,45 @@ struct ValueInfo {
   ValueInfoKind Kind;
   /// Constructor for a GUID value
   ValueInfo(GlobalValue::GUID Id = 0) : TheValue(Id), Kind(VI_GUID) {}
-  /// Constructor for a Value* value
-  ValueInfo(const Value *V) : TheValue(V), Kind(VI_Value) {}
+  /// Constructor for a GlobalValue* value
+  ValueInfo(const GlobalValue *V) : TheValue(V), Kind(VI_Value) {}
   /// Accessor for GUID value
   GlobalValue::GUID getGUID() const {
     assert(Kind == VI_GUID && "Not a GUID type");
     return TheValue.Id;
   }
-  /// Accessor for Value* value
-  const Value *getValue() const {
+  /// Accessor for GlobalValue* value
+  const GlobalValue *getValue() const {
     assert(Kind == VI_Value && "Not a Value type");
-    return TheValue.V;
+    return TheValue.GV;
   }
   bool isGUID() const { return Kind == VI_GUID; }
 };
 
+template <> struct DenseMapInfo<ValueInfo> {
+  static inline ValueInfo getEmptyKey() { return ValueInfo((GlobalValue *)-1); }
+  static inline ValueInfo getTombstoneKey() {
+    return ValueInfo((GlobalValue *)-2);
+  }
+  static bool isEqual(ValueInfo L, ValueInfo R) {
+    if (L.isGUID() != R.isGUID())
+      return false;
+    return L.isGUID() ? (L.getGUID() == R.getGUID())
+                      : (L.getValue() == R.getValue());
+  }
+  static unsigned getHashValue(ValueInfo I) {
+    return I.isGUID() ? I.getGUID() : (uintptr_t)I.getValue();
+  }
+};
+
 /// \brief Function and variable summary information to aid decisions and
 /// implementation of importing.
 class GlobalValueSummary {
 public:
   /// \brief Sububclass discriminator (for dyn_cast<> et al.)
-  enum SummaryKind { AliasKind, FunctionKind, GlobalVarKind };
+  enum SummaryKind : unsigned { AliasKind, FunctionKind, GlobalVarKind };
 
-  /// Group flags (Linkage, hasSection, isOptSize, etc.) as a bitfield.
+  /// Group flags (Linkage, noRename, isOptSize, etc.) as a bitfield.
   struct GVFlags {
     /// \brief The linkage type of the associated global value.
     ///
@@ -101,20 +113,47 @@ public:
     /// types based on global summary-based analysis.
     unsigned Linkage : 4;
 
-    /// Indicate if the global value is located in a specific section.
-    unsigned HasSection : 1;
+    /// Indicate if the global value cannot be renamed (in a specific section,
+    /// possibly referenced from inline assembly, etc).
+    unsigned NoRename : 1;
+
+    /// Indicate if a function contains inline assembly (which is opaque),
+    /// that may reference a local value. This is used to prevent importing
+    /// of this function, since we can't promote and rename the uses of the
+    /// local in the inline assembly. Use a flag rather than bloating the
+    /// summary with references to every possible local value in the
+    /// llvm.used set.
+    unsigned HasInlineAsmMaybeReferencingInternal : 1;
+
+    /// Indicate if the function is not viable to inline.
+    unsigned IsNotViableToInline : 1;
 
     /// Convenience Constructors
-    explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool HasSection)
-        : Linkage(Linkage), HasSection(HasSection) {}
+    explicit GVFlags(GlobalValue::LinkageTypes Linkage, bool NoRename,
+                     bool HasInlineAsmMaybeReferencingInternal,
+                     bool IsNotViableToInline)
+        : Linkage(Linkage), NoRename(NoRename),
+          HasInlineAsmMaybeReferencingInternal(
+              HasInlineAsmMaybeReferencingInternal),
+          IsNotViableToInline(IsNotViableToInline) {}
+
     GVFlags(const GlobalValue &GV)
-        : Linkage(GV.getLinkage()), HasSection(GV.hasSection()) {}
+        : Linkage(GV.getLinkage()), NoRename(GV.hasSection()),
+          HasInlineAsmMaybeReferencingInternal(false) {
+      IsNotViableToInline = false;
+      if (const auto *F = dyn_cast<Function>(&GV))
+        // Inliner doesn't handle variadic functions.
+        // FIXME: refactor this to use the same code that inliner is using.
+        IsNotViableToInline = F->isVarArg();
+    }
   };
 
 private:
   /// Kind of summary for use in dyn_cast<> et al.
   SummaryKind Kind;
 
+  GVFlags Flags;
+
   /// This is the hash of the name of the symbol in the original file. It is
   /// identical to the GUID for global symbols, but differs for local since the
   /// GUID includes the module level id in the hash.
@@ -129,8 +168,6 @@ private:
   /// module path string table.
   StringRef ModulePath;
 
-  GVFlags Flags;
-
   /// List of values referenced by this global value's definition
   /// (either by the initializer of a global variable, or referenced
   /// from within a function). This does not include functions called, which
@@ -139,7 +176,8 @@ private:
 
 protected:
   /// GlobalValueSummary constructor.
-  GlobalValueSummary(SummaryKind K, GVFlags Flags) : Kind(K), Flags(Flags) {}
+  GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs)
+      : Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {}
 
 public:
   virtual ~GlobalValueSummary() = default;
@@ -175,31 +213,34 @@ public:
     Flags.Linkage = Linkage;
   }
 
+  bool isNotViableToInline() const { return Flags.IsNotViableToInline; }
+
   /// Return true if this summary is for a GlobalValue that needs promotion
   /// to be referenced from another module.
   bool needsRenaming() const { return GlobalValue::isLocalLinkage(linkage()); }
 
-  /// Return true if this global value is located in a specific section.
-  bool hasSection() const { return Flags.HasSection; }
+  /// Return true if this global value cannot be renamed (in a specific section,
+  /// possibly referenced from inline assembly, etc).
+  bool noRename() const { return Flags.NoRename; }
 
-  /// Record a reference from this global value to the global value identified
-  /// by \p RefGUID.
-  void addRefEdge(GlobalValue::GUID RefGUID) { RefEdgeList.push_back(RefGUID); }
+  /// Flag that this global value cannot be renamed (in a specific section,
+  /// possibly referenced from inline assembly, etc).
+  void setNoRename() { Flags.NoRename = true; }
 
-  /// Record a reference from this global value to the global value identified
-  /// by \p RefV.
-  void addRefEdge(const Value *RefV) { RefEdgeList.push_back(RefV); }
+  /// Return true if this global value possibly references another value
+  /// that can't be renamed.
+  bool hasInlineAsmMaybeReferencingInternal() const {
+    return Flags.HasInlineAsmMaybeReferencingInternal;
+  }
 
-  /// Record a reference from this global value to each global value identified
-  /// in \p RefEdges.
-  void addRefEdges(DenseSet<const Value *> &RefEdges) {
-    for (auto &RI : RefEdges)
-      addRefEdge(RI);
+  /// Flag that this global value possibly references another value that
+  /// can't be renamed.
+  void setHasInlineAsmMaybeReferencingInternal() {
+    Flags.HasInlineAsmMaybeReferencingInternal = true;
   }
 
   /// Return the list of values referenced by this global value definition.
-  std::vector<ValueInfo> &refs() { return RefEdgeList; }
-  const std::vector<ValueInfo> &refs() const { return RefEdgeList; }
+  ArrayRef<ValueInfo> refs() const { return RefEdgeList; }
 };
 
 /// \brief Alias summary information.
@@ -208,7 +249,8 @@ class AliasSummary : public GlobalValueSummary {
 
 public:
   /// Summary constructors.
-  AliasSummary(GVFlags Flags) : GlobalValueSummary(AliasKind, Flags) {}
+  AliasSummary(GVFlags Flags, std::vector<ValueInfo> Refs)
+      : GlobalValueSummary(AliasKind, Flags, std::move(Refs)) {}
 
   /// Check if this is an alias summary.
   static bool classof(const GlobalValueSummary *GVS) {
@@ -242,10 +284,17 @@ private:
   /// List of <CalleeValueInfo, CalleeInfo> call edge pairs from this function.
   std::vector<EdgeTy> CallGraphEdgeList;
 
+  /// List of type identifiers used by this function, represented as GUIDs.
+  std::vector<GlobalValue::GUID> TypeIdList;
+
 public:
   /// Summary constructors.
-  FunctionSummary(GVFlags Flags, unsigned NumInsts)
-      : GlobalValueSummary(FunctionKind, Flags), InstCount(NumInsts) {}
+  FunctionSummary(GVFlags Flags, unsigned NumInsts, std::vector<ValueInfo> Refs,
+                  std::vector<EdgeTy> CGEdges,
+                  std::vector<GlobalValue::GUID> TypeIds)
+      : GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
+        InstCount(NumInsts), CallGraphEdgeList(std::move(CGEdges)),
+        TypeIdList(std::move(TypeIds)) {}
 
   /// Check if this is a function summary.
   static bool classof(const GlobalValueSummary *GVS) {
@@ -255,38 +304,11 @@ public:
   /// Get the instruction count recorded for this function.
   unsigned instCount() const { return InstCount; }
 
-  /// Record a call graph edge from this function to the function identified
-  /// by \p CalleeGUID, with \p CalleeInfo including the cumulative profile
-  /// count (across all calls from this function) or 0 if no PGO.
-  void addCallGraphEdge(GlobalValue::GUID CalleeGUID, CalleeInfo Info) {
-    CallGraphEdgeList.push_back(std::make_pair(CalleeGUID, Info));
-  }
-
-  /// Record a call graph edge from this function to each function GUID recorded
-  /// in \p CallGraphEdges.
-  void
-  addCallGraphEdges(DenseMap<GlobalValue::GUID, CalleeInfo> &CallGraphEdges) {
-    for (auto &EI : CallGraphEdges)
-      addCallGraphEdge(EI.first, EI.second);
-  }
-
-  /// Record a call graph edge from this function to the function identified
-  /// by \p CalleeV, with \p CalleeInfo including the cumulative profile
-  /// count (across all calls from this function) or 0 if no PGO.
-  void addCallGraphEdge(const Value *CalleeV, CalleeInfo Info) {
-    CallGraphEdgeList.push_back(std::make_pair(CalleeV, Info));
-  }
-
-  /// Record a call graph edge from this function to each function recorded
-  /// in \p CallGraphEdges.
-  void addCallGraphEdges(DenseMap<const Value *, CalleeInfo> &CallGraphEdges) {
-    for (auto &EI : CallGraphEdges)
-      addCallGraphEdge(EI.first, EI.second);
-  }
-
   /// Return the list of <CalleeValueInfo, CalleeInfo> pairs.
-  std::vector<EdgeTy> &calls() { return CallGraphEdgeList; }
-  const std::vector<EdgeTy> &calls() const { return CallGraphEdgeList; }
+  ArrayRef<EdgeTy> calls() const { return CallGraphEdgeList; }
+
+  /// Returns the list of type identifiers used by this function.
+  ArrayRef<GlobalValue::GUID> type_tests() const { return TypeIdList; }
 };
 
 /// \brief Global variable summary information to aid decisions and
@@ -299,7 +321,8 @@ class GlobalVarSummary : public GlobalValueSummary {
 
 public:
   /// Summary constructors.
-  GlobalVarSummary(GVFlags Flags) : GlobalValueSummary(GlobalVarKind, Flags) {}
+  GlobalVarSummary(GVFlags Flags, std::vector<ValueInfo> Refs)
+      : GlobalValueSummary(GlobalVarKind, Flags, std::move(Refs)) {}
 
   /// Check if this is a global variable summary.
   static bool classof(const GlobalValueSummary *GVS) {
@@ -348,13 +371,6 @@ private:
   ModulePathStringTableTy ModulePathStringTable;
 
 public:
-  ModuleSummaryIndex() = default;
-
-  // Disable the copy constructor and assignment operators, so
-  // no unexpected copying/moving occurs.
-  ModuleSummaryIndex(const ModuleSummaryIndex &) = delete;
-  void operator=(const ModuleSummaryIndex &) = delete;
-
   gvsummary_iterator begin() { return GlobalValueMap.begin(); }
   const_gvsummary_iterator begin() const { return GlobalValueMap.begin(); }
   gvsummary_iterator end() { return GlobalValueMap.end(); }
diff --git a/contrib/llvm/include/llvm/IR/NoFolder.h b/contrib/llvm/include/llvm/IR/NoFolder.h
index 61f4817a9b62..def07ffe2ff6 100644
--- a/contrib/llvm/include/llvm/IR/NoFolder.h
+++ b/contrib/llvm/include/llvm/IR/NoFolder.h
@@ -24,6 +24,8 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 
 namespace llvm {
@@ -31,7 +33,7 @@ namespace llvm {
 /// NoFolder - Create "constants" (actually, instructions) with no folding.
 class NoFolder {
 public:
-  explicit NoFolder() {}
+  explicit NoFolder() = default;
 
   //===--------------------------------------------------------------------===//
   // Binary Operators
@@ -44,15 +46,19 @@ public:
     if (HasNSW) BO->setHasNoSignedWrap();
     return BO;
   }
+
   Instruction *CreateNSWAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNSWAdd(LHS, RHS);
   }
+
   Instruction *CreateNUWAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNUWAdd(LHS, RHS);
   }
+
   Instruction *CreateFAdd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFAdd(LHS, RHS);
   }
+
   Instruction *CreateSub(Constant *LHS, Constant *RHS,
                          bool HasNUW = false, bool HasNSW = false) const {
     BinaryOperator *BO = BinaryOperator::CreateSub(LHS, RHS);
@@ -60,15 +66,19 @@ public:
     if (HasNSW) BO->setHasNoSignedWrap();
     return BO;
   }
+
   Instruction *CreateNSWSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNSWSub(LHS, RHS);
   }
+
   Instruction *CreateNUWSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNUWSub(LHS, RHS);
   }
+
   Instruction *CreateFSub(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFSub(LHS, RHS);
   }
+
   Instruction *CreateMul(Constant *LHS, Constant *RHS,
                          bool HasNUW = false, bool HasNSW = false) const {
     BinaryOperator *BO = BinaryOperator::CreateMul(LHS, RHS);
@@ -76,45 +86,57 @@ public:
     if (HasNSW) BO->setHasNoSignedWrap();
     return BO;
   }
+
   Instruction *CreateNSWMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNSWMul(LHS, RHS);
   }
+
   Instruction *CreateNUWMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateNUWMul(LHS, RHS);
   }
+
   Instruction *CreateFMul(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFMul(LHS, RHS);
   }
+
   Instruction *CreateUDiv(Constant *LHS, Constant *RHS,
                           bool isExact = false) const {
     if (!isExact)
       return BinaryOperator::CreateUDiv(LHS, RHS);
     return BinaryOperator::CreateExactUDiv(LHS, RHS);
   }
+
   Instruction *CreateExactUDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateExactUDiv(LHS, RHS);
   }
+
   Instruction *CreateSDiv(Constant *LHS, Constant *RHS,
                           bool isExact = false) const {
     if (!isExact)
       return BinaryOperator::CreateSDiv(LHS, RHS);
     return BinaryOperator::CreateExactSDiv(LHS, RHS);
   }
+
   Instruction *CreateExactSDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateExactSDiv(LHS, RHS);
   }
+
   Instruction *CreateFDiv(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFDiv(LHS, RHS);
   }
+
   Instruction *CreateURem(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateURem(LHS, RHS);
   }
+
   Instruction *CreateSRem(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateSRem(LHS, RHS);
   }
+
   Instruction *CreateFRem(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateFRem(LHS, RHS);
   }
+
   Instruction *CreateShl(Constant *LHS, Constant *RHS, bool HasNUW = false,
                          bool HasNSW = false) const {
     BinaryOperator *BO = BinaryOperator::CreateShl(LHS, RHS);
@@ -122,24 +144,29 @@ public:
     if (HasNSW) BO->setHasNoSignedWrap();
     return BO;
   }
+
   Instruction *CreateLShr(Constant *LHS, Constant *RHS,
                           bool isExact = false) const {
     if (!isExact)
       return BinaryOperator::CreateLShr(LHS, RHS);
     return BinaryOperator::CreateExactLShr(LHS, RHS);
   }
+
   Instruction *CreateAShr(Constant *LHS, Constant *RHS,
                           bool isExact = false) const {
     if (!isExact)
       return BinaryOperator::CreateAShr(LHS, RHS);
     return BinaryOperator::CreateExactAShr(LHS, RHS);
   }
+
   Instruction *CreateAnd(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateAnd(LHS, RHS);
   }
+
   Instruction *CreateOr(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateOr(LHS, RHS);
   }
+
   Instruction *CreateXor(Constant *LHS, Constant *RHS) const {
     return BinaryOperator::CreateXor(LHS, RHS);
   }
@@ -160,15 +187,19 @@ public:
     if (HasNSW) BO->setHasNoSignedWrap();
     return BO;
   }
+
   Instruction *CreateNSWNeg(Constant *C) const {
     return BinaryOperator::CreateNSWNeg(C);
   }
+
   Instruction *CreateNUWNeg(Constant *C) const {
     return BinaryOperator::CreateNUWNeg(C);
   }
+
   Instruction *CreateFNeg(Constant *C) const {
     return BinaryOperator::CreateFNeg(C);
   }
+
   Instruction *CreateNot(Constant *C) const {
     return BinaryOperator::CreateNot(C);
   }
@@ -181,12 +212,14 @@ public:
                                 ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getGetElementPtr(Ty, C, IdxList);
   }
+
   Constant *CreateGetElementPtr(Type *Ty, Constant *C, Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
     // warnings about whether to convert Idx to ArrayRef<Constant *> or
     // ArrayRef<Value *>.
     return ConstantExpr::getGetElementPtr(Ty, C, Idx);
   }
+
   Instruction *CreateGetElementPtr(Type *Ty, Constant *C,
                                    ArrayRef<Value *> IdxList) const {
     return GetElementPtrInst::Create(Ty, C, IdxList);
@@ -196,6 +229,7 @@ public:
                                         ArrayRef<Constant *> IdxList) const {
     return ConstantExpr::getInBoundsGetElementPtr(Ty, C, IdxList);
   }
+
   Constant *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                         Constant *Idx) const {
     // This form of the function only exists to avoid ambiguous overload
@@ -203,6 +237,7 @@ public:
     // ArrayRef<Value *>.
     return ConstantExpr::getInBoundsGetElementPtr(Ty, C, Idx);
   }
+
   Instruction *CreateInBoundsGetElementPtr(Type *Ty, Constant *C,
                                            ArrayRef<Value *> IdxList) const {
     return GetElementPtrInst::CreateInBounds(Ty, C, IdxList);
@@ -216,13 +251,16 @@ public:
                     Type *DestTy) const {
     return CastInst::Create(Op, C, DestTy);
   }
+
   Instruction *CreatePointerCast(Constant *C, Type *DestTy) const {
     return CastInst::CreatePointerCast(C, DestTy);
   }
+
   Instruction *CreateIntCast(Constant *C, Type *DestTy,
                        bool isSigned) const {
     return CastInst::CreateIntegerCast(C, DestTy, isSigned);
   }
+
   Instruction *CreateFPCast(Constant *C, Type *DestTy) const {
     return CastInst::CreateFPCast(C, DestTy);
   }
@@ -230,15 +268,19 @@ public:
   Instruction *CreateBitCast(Constant *C, Type *DestTy) const {
     return CreateCast(Instruction::BitCast, C, DestTy);
   }
+
   Instruction *CreateIntToPtr(Constant *C, Type *DestTy) const {
     return CreateCast(Instruction::IntToPtr, C, DestTy);
   }
+
   Instruction *CreatePtrToInt(Constant *C, Type *DestTy) const {
     return CreateCast(Instruction::PtrToInt, C, DestTy);
   }
+
   Instruction *CreateZExtOrBitCast(Constant *C, Type *DestTy) const {
     return CastInst::CreateZExtOrBitCast(C, DestTy);
   }
+
   Instruction *CreateSExtOrBitCast(Constant *C, Type *DestTy) const {
     return CastInst::CreateSExtOrBitCast(C, DestTy);
   }
@@ -255,6 +297,7 @@ public:
                           Constant *LHS, Constant *RHS) const {
     return new ICmpInst(P, LHS, RHS);
   }
+
   Instruction *CreateFCmp(CmpInst::Predicate P,
                           Constant *LHS, Constant *RHS) const {
     return new FCmpInst(P, LHS, RHS);
@@ -294,6 +337,6 @@ public:
   }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_NOFOLDER_H
diff --git a/contrib/llvm/include/llvm/IR/Operator.h b/contrib/llvm/include/llvm/IR/Operator.h
index 5880290f3d99..444ce93921f6 100644
--- a/contrib/llvm/include/llvm/IR/Operator.h
+++ b/contrib/llvm/include/llvm/IR/Operator.h
@@ -15,28 +15,22 @@
 #ifndef LLVM_IR_OPERATOR_H
 #define LLVM_IR_OPERATOR_H
 
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <cstddef>
 
 namespace llvm {
 
-class GetElementPtrInst;
-class BinaryOperator;
-class ConstantExpr;
-
 /// This is a utility class that provides an abstraction for the common
 /// functionality between Instructions and ConstantExprs.
 class Operator : public User {
-private:
-  // The Operator class is intended to be used as a utility, and is never itself
-  // instantiated.
-  void *operator new(size_t, unsigned) = delete;
-  void *operator new(size_t s) = delete;
-  Operator() = delete;
-
 protected:
   // NOTE: Cannot use = delete because it's not legal to delete
   // an overridden method that's not deleted in the base class. Cannot leave
@@ -44,6 +38,13 @@ protected:
   ~Operator() override;
 
 public:
+  // The Operator class is intended to be used as a utility, and is never itself
+  // instantiated.
+  Operator() = delete;
+
+  void *operator new(size_t, unsigned) = delete;
+  void *operator new(size_t s) = delete;
+
   /// Return the opcode for this Instruction or ConstantExpr.
   unsigned getOpcode() const {
     if (const Instruction *I = dyn_cast<Instruction>(this))
@@ -81,6 +82,7 @@ public:
 private:
   friend class Instruction;
   friend class ConstantExpr;
+
   void setHasNoUnsignedWrap(bool B) {
     SubclassOptionalData =
       (SubclassOptionalData & ~NoUnsignedWrap) | (B * NoUnsignedWrap);
@@ -132,6 +134,7 @@ public:
 private:
   friend class Instruction;
   friend class ConstantExpr;
+
   void setIsExact(bool B) {
     SubclassOptionalData = (SubclassOptionalData & ~IsExact) | (B * IsExact);
   }
@@ -148,6 +151,7 @@ public:
            OpC == Instruction::AShr ||
            OpC == Instruction::LShr;
   }
+
   static inline bool classof(const ConstantExpr *CE) {
     return isPossiblyExactOpcode(CE->getOpcode());
   }
@@ -164,7 +168,9 @@ public:
 class FastMathFlags {
 private:
   friend class FPMathOperator;
-  unsigned Flags;
+
+  unsigned Flags = 0;
+
   FastMathFlags(unsigned F) : Flags(F) { }
 
 public:
@@ -176,8 +182,7 @@ public:
     AllowReciprocal = (1 << 4)
   };
 
-  FastMathFlags() : Flags(0)
-  { }
+  FastMathFlags() = default;
 
   /// Whether any flag is set
   bool any() const { return Flags != 0; }
@@ -210,7 +215,6 @@ public:
   }
 };
 
-
 /// Utility class for floating point operations which can have
 /// information about relaxed accuracy requirements attached to them.
 class FPMathOperator : public Operator {
@@ -230,21 +234,25 @@ private:
       setHasAllowReciprocal(true);
     }
   }
+
   void setHasNoNaNs(bool B) {
     SubclassOptionalData =
       (SubclassOptionalData & ~FastMathFlags::NoNaNs) |
       (B * FastMathFlags::NoNaNs);
   }
+
   void setHasNoInfs(bool B) {
     SubclassOptionalData =
       (SubclassOptionalData & ~FastMathFlags::NoInfs) |
       (B * FastMathFlags::NoInfs);
   }
+
   void setHasNoSignedZeros(bool B) {
     SubclassOptionalData =
       (SubclassOptionalData & ~FastMathFlags::NoSignedZeros) |
       (B * FastMathFlags::NoSignedZeros);
   }
+
   void setHasAllowReciprocal(bool B) {
     SubclassOptionalData =
       (SubclassOptionalData & ~FastMathFlags::AllowReciprocal) |
@@ -299,9 +307,9 @@ public:
     return FastMathFlags(SubclassOptionalData);
   }
 
-  /// \brief Get the maximum error permitted by this operation in ULPs.  An
-  /// accuracy of 0.0 means that the operation should be performed with the
-  /// default precision.
+  /// Get the maximum error permitted by this operation in ULPs. An accuracy of
+  /// 0.0 means that the operation should be performed with the default
+  /// precision.
   float getFPAccuracy() const;
 
   static inline bool classof(const Instruction *I) {
@@ -313,7 +321,6 @@ public:
   }
 };
 
-
 /// A helper template for defining operators for individual opcodes.
 template<typename SuperClass, unsigned Opc>
 class ConcreteOperator : public SuperClass {
@@ -343,7 +350,6 @@ class ShlOperator
   : public ConcreteOperator<OverflowingBinaryOperator, Instruction::Shl> {
 };
 
-
 class SDivOperator
   : public ConcreteOperator<PossiblyExactOperator, Instruction::SDiv> {
 };
@@ -357,18 +363,18 @@ class LShrOperator
   : public ConcreteOperator<PossiblyExactOperator, Instruction::LShr> {
 };
 
-
 class ZExtOperator : public ConcreteOperator<Operator, Instruction::ZExt> {};
 
-
 class GEPOperator
   : public ConcreteOperator<Operator, Instruction::GetElementPtr> {
+  friend class GetElementPtrInst;
+  friend class ConstantExpr;
+
   enum {
-    IsInBounds = (1 << 0)
+    IsInBounds = (1 << 0),
+    // InRangeIndex: bits 1-6
   };
 
-  friend class GetElementPtrInst;
-  friend class ConstantExpr;
   void setIsInBounds(bool B) {
     SubclassOptionalData =
       (SubclassOptionalData & ~IsInBounds) | (B * IsInBounds);
@@ -380,6 +386,13 @@ public:
     return SubclassOptionalData & IsInBounds;
   }
 
+  /// Returns the offset of the index with an inrange attachment, or None if
+  /// none.
+  Optional<unsigned> getInRangeIndex() const {
+    if (SubclassOptionalData >> 1 == 0) return None;
+    return (SubclassOptionalData >> 1) - 1;
+  }
+
   inline op_iterator       idx_begin()       { return op_begin()+1; }
   inline const_op_iterator idx_begin() const { return op_begin()+1; }
   inline op_iterator       idx_end()         { return op_end(); }
@@ -463,6 +476,7 @@ public:
   const Value *getPointerOperand() const {
     return getOperand(0);
   }
+
   static unsigned getPointerOperandIndex() {
     return 0U;                      // get index for modifying correct operand
   }
@@ -493,6 +507,6 @@ public:
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_OPERATOR_H
diff --git a/contrib/llvm/include/llvm/IR/PassManager.h b/contrib/llvm/include/llvm/IR/PassManager.h
index 402d04a54a41..3e4edd893d3c 100644
--- a/contrib/llvm/include/llvm/IR/PassManager.h
+++ b/contrib/llvm/include/llvm/IR/PassManager.h
@@ -41,6 +41,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManagerInternal.h"
@@ -54,50 +55,113 @@
 
 namespace llvm {
 
-/// \brief An abstract set of preserved analyses following a transformation pass
-/// run.
+/// A special type used by analysis passes to provide an address that
+/// identifies that particular analysis pass type.
 ///
-/// When a transformation pass is run, it can return a set of analyses whose
-/// results were preserved by that transformation. The default set is "none",
-/// and preserving analyses must be done explicitly.
+/// Analysis passes should have a static data member of this type and derive
+/// from the \c AnalysisInfoMixin to get a static ID method used to identify
+/// the analysis in the pass management infrastructure.
+struct alignas(8) AnalysisKey {};
+
+/// A special type used to provide an address that identifies a set of related
+/// analyses.
+///
+/// These sets are primarily used below to mark sets of analyses as preserved.
+/// An example would be analyses depending only on the CFG of a function.
+/// A transformation can mark that it is preserving the CFG of a function and
+/// then analyses can check for this rather than each transform having to fully
+/// enumerate every analysis preserved.
+struct alignas(8) AnalysisSetKey {};
+
+/// Class for tracking what analyses are preserved after a transformation pass
+/// runs over some unit of IR.
+///
+/// Transformation passes build and return these objects when run over the IR
+/// to communicate which analyses remain valid afterward. For most passes this
+/// is fairly simple: if they don't change anything all analyses are preserved,
+/// otherwise only a short list of analyses that have been explicitly updated
+/// are preserved.
+///
+/// This class also provides the ability to mark abstract *sets* of analyses as
+/// preserved. These sets allow passes to indicate that they preserve broad
+/// aspects of the IR (such as its CFG) and analyses to opt in to that being
+/// sufficient without the passes having to fully enumerate such analyses.
+///
+/// Finally, this class can represent "abandoning" an analysis, which marks it
+/// as not-preserved even if it would be covered by some abstract set of
+/// analyses.
 ///
-/// There is also an explicit all state which can be used (for example) when
-/// the IR is not mutated at all.
+/// Given a `PreservedAnalyses` object, an analysis will typically want to
+/// figure out whether it is preserved. In the example below, MyAnalysisType is
+/// preserved if it's not abandoned, and (a) it's explicitly marked as
+/// preserved, (b), the set AllAnalysesOn<MyIRUnit> is preserved, or (c) both
+/// AnalysisSetA and AnalysisSetB are preserved.
+///
+/// ```
+///   auto PAC = PA.getChecker<MyAnalysisType>();
+///   if (PAC.preserved() || PAC.preservedSet<AllAnalysesOn<MyIRUnit>>() ||
+///       (PAC.preservedSet<AnalysisSetA>() &&
+///        PAC.preservedSet<AnalysisSetB>())) {
+///     // The analysis has been successfully preserved ...
+///   }
+/// ```
 class PreservedAnalyses {
 public:
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  PreservedAnalyses() {}
-  PreservedAnalyses(const PreservedAnalyses &Arg)
-      : PreservedPassIDs(Arg.PreservedPassIDs) {}
-  PreservedAnalyses(PreservedAnalyses &&Arg)
-      : PreservedPassIDs(std::move(Arg.PreservedPassIDs)) {}
-  friend void swap(PreservedAnalyses &LHS, PreservedAnalyses &RHS) {
-    using std::swap;
-    swap(LHS.PreservedPassIDs, RHS.PreservedPassIDs);
-  }
-  PreservedAnalyses &operator=(PreservedAnalyses RHS) {
-    swap(*this, RHS);
-    return *this;
-  }
-
   /// \brief Convenience factory function for the empty preserved set.
   static PreservedAnalyses none() { return PreservedAnalyses(); }
 
   /// \brief Construct a special preserved set that preserves all passes.
   static PreservedAnalyses all() {
     PreservedAnalyses PA;
-    PA.PreservedPassIDs.insert((void *)AllPassesID);
+    PA.PreservedIDs.insert(&AllAnalysesKey);
     return PA;
   }
 
-  /// \brief Mark a particular pass as preserved, adding it to the set.
-  template <typename PassT> void preserve() { preserve(PassT::ID()); }
+  /// Mark an analysis as preserved.
+  template <typename AnalysisT> void preserve() { preserve(AnalysisT::ID()); }
 
-  /// \brief Mark an abstract PassID as preserved, adding it to the set.
-  void preserve(void *PassID) {
+  /// Mark an analysis as preserved using its ID.
+  void preserve(AnalysisKey *ID) {
+    // Clear this ID from the explicit not-preserved set if present.
+    NotPreservedAnalysisIDs.erase(ID);
+
+    // If we're not already preserving all analyses (other than those in
+    // NotPreservedAnalysisIDs).
+    if (!areAllPreserved())
+      PreservedIDs.insert(ID);
+  }
+
+  /// Mark an analysis set as preserved.
+  template <typename AnalysisSetT> void preserveSet() {
+    preserveSet(AnalysisSetT::ID());
+  }
+
+  /// Mark an analysis set as preserved using its ID.
+  void preserveSet(AnalysisSetKey *ID) {
+    // If we're not already in the saturated 'all' state, add this set.
     if (!areAllPreserved())
-      PreservedPassIDs.insert(PassID);
+      PreservedIDs.insert(ID);
+  }
+
+  /// Mark an analysis as abandoned.
+  ///
+  /// An abandoned analysis is not preserved, even if it is nominally covered
+  /// by some other set or was previously explicitly marked as preserved.
+  ///
+  /// Note that you can only abandon a specific analysis, not a *set* of
+  /// analyses.
+  template <typename AnalysisT> void abandon() { abandon(AnalysisT::ID()); }
+
+  /// Mark an analysis as abandoned using its ID.
+  ///
+  /// An abandoned analysis is not preserved, even if it is nominally covered
+  /// by some other set or was previously explicitly marked as preserved.
+  ///
+  /// Note that you can only abandon a specific analysis, not a *set* of
+  /// analyses.
+  void abandon(AnalysisKey *ID) {
+    PreservedIDs.erase(ID);
+    NotPreservedAnalysisIDs.insert(ID);
   }
 
   /// \brief Intersect this set with another in place.
@@ -108,12 +172,18 @@ public:
     if (Arg.areAllPreserved())
       return;
     if (areAllPreserved()) {
-      PreservedPassIDs = Arg.PreservedPassIDs;
+      *this = Arg;
       return;
     }
-    for (void *P : PreservedPassIDs)
-      if (!Arg.PreservedPassIDs.count(P))
-        PreservedPassIDs.erase(P);
+    // The intersection requires the *union* of the explicitly not-preserved
+    // IDs and the *intersection* of the preserved IDs.
+    for (auto ID : Arg.NotPreservedAnalysisIDs) {
+      PreservedIDs.erase(ID);
+      NotPreservedAnalysisIDs.insert(ID);
+    }
+    for (auto ID : PreservedIDs)
+      if (!Arg.PreservedIDs.count(ID))
+        PreservedIDs.erase(ID);
   }
 
   /// \brief Intersect this set with a temporary other set in place.
@@ -124,54 +194,115 @@ public:
     if (Arg.areAllPreserved())
       return;
     if (areAllPreserved()) {
-      PreservedPassIDs = std::move(Arg.PreservedPassIDs);
+      *this = std::move(Arg);
       return;
     }
-    for (void *P : PreservedPassIDs)
-      if (!Arg.PreservedPassIDs.count(P))
-        PreservedPassIDs.erase(P);
+    // The intersection requires the *union* of the explicitly not-preserved
+    // IDs and the *intersection* of the preserved IDs.
+    for (auto ID : Arg.NotPreservedAnalysisIDs) {
+      PreservedIDs.erase(ID);
+      NotPreservedAnalysisIDs.insert(ID);
+    }
+    for (auto ID : PreservedIDs)
+      if (!Arg.PreservedIDs.count(ID))
+        PreservedIDs.erase(ID);
   }
 
-  /// \brief Query whether a pass is marked as preserved by this set.
-  template <typename PassT> bool preserved() const {
-    return preserved(PassT::ID());
-  }
+  /// A checker object that makes it easy to query for whether an analysis or
+  /// some set covering it is preserved.
+  class PreservedAnalysisChecker {
+    friend class PreservedAnalyses;
+
+    const PreservedAnalyses &PA;
+    AnalysisKey *const ID;
+    const bool IsAbandoned;
 
-  /// \brief Query whether an abstract pass ID is marked as preserved by this
-  /// set.
-  bool preserved(void *PassID) const {
-    return PreservedPassIDs.count((void *)AllPassesID) ||
-           PreservedPassIDs.count(PassID);
+    /// A PreservedAnalysisChecker is tied to a particular Analysis because
+    /// `preserved()` and `preservedSet()` both return false if the Analysis
+    /// was abandoned.
+    PreservedAnalysisChecker(const PreservedAnalyses &PA, AnalysisKey *ID)
+        : PA(PA), ID(ID), IsAbandoned(PA.NotPreservedAnalysisIDs.count(ID)) {}
+
+  public:
+    /// Returns true if the checker's analysis was not abandoned and the
+    /// analysis is either is explicitly preserved or all analyses are
+    /// preserved.
+    bool preserved() {
+      return !IsAbandoned && (PA.PreservedIDs.count(&AllAnalysesKey) ||
+                              PA.PreservedIDs.count(ID));
+    }
+
+    /// Returns true if the checker's analysis was not abandoned and either the
+    /// provided set type is either explicitly preserved or all analyses are
+    /// preserved.
+    template <typename AnalysisSetT> bool preservedSet() {
+      AnalysisSetKey *SetID = AnalysisSetT::ID();
+      return !IsAbandoned && (PA.PreservedIDs.count(&AllAnalysesKey) ||
+                              PA.PreservedIDs.count(SetID));
+    }
+  };
+
+  /// Build a checker for this `PreservedAnalyses` and the specified analysis
+  /// type.
+  ///
+  /// You can use the returned object to query whether an analysis was
+  /// preserved. See the example in the comment on `PreservedAnalysis`.
+  template <typename AnalysisT> PreservedAnalysisChecker getChecker() const {
+    return PreservedAnalysisChecker(*this, AnalysisT::ID());
   }
 
-  /// \brief Query whether all of the analyses in the set are preserved.
-  bool preserved(PreservedAnalyses Arg) {
-    if (Arg.areAllPreserved())
-      return areAllPreserved();
-    for (void *P : Arg.PreservedPassIDs)
-      if (!preserved(P))
-        return false;
-    return true;
+  /// Build a checker for this `PreservedAnalyses` and the specified analysis
+  /// ID.
+  ///
+  /// You can use the returned object to query whether an analysis was
+  /// preserved. See the example in the comment on `PreservedAnalysis`.
+  PreservedAnalysisChecker getChecker(AnalysisKey *ID) const {
+    return PreservedAnalysisChecker(*this, ID);
   }
 
-  /// \brief Test whether all passes are preserved.
+  /// Test whether all analyses are preserved (and none are abandoned).
   ///
-  /// This is used primarily to optimize for the case of no changes which will
-  /// common in many scenarios.
+  /// This lets analyses optimize for the common case where a transformation
+  /// made no changes to the IR.
   bool areAllPreserved() const {
-    return PreservedPassIDs.count((void *)AllPassesID);
+    return NotPreservedAnalysisIDs.empty() &&
+           PreservedIDs.count(&AllAnalysesKey);
+  }
+
+  /// Directly test whether a set of analyses is preserved.
+  ///
+  /// This is only true when no analyses have been explicitly abandoned.
+  template <typename AnalysisSetT> bool allAnalysesInSetPreserved() const {
+    return allAnalysesInSetPreserved(AnalysisSetT::ID());
+  }
+
+  /// Directly test whether a set of analyses is preserved.
+  ///
+  /// This is only true when no analyses have been explicitly abandoned.
+  bool allAnalysesInSetPreserved(AnalysisSetKey *SetID) const {
+    return NotPreservedAnalysisIDs.empty() &&
+           (PreservedIDs.count(&AllAnalysesKey) || PreservedIDs.count(SetID));
   }
 
 private:
-  // Note that this must not be -1 or -2 as those are already used by the
-  // SmallPtrSet.
-  static const uintptr_t AllPassesID = (intptr_t)(-3);
+  /// A special key used to indicate all analyses.
+  static AnalysisSetKey AllAnalysesKey;
 
-  SmallPtrSet<void *, 2> PreservedPassIDs;
+  /// The IDs of analyses and analysis sets that are preserved.
+  SmallPtrSet<void *, 2> PreservedIDs;
+
+  /// The IDs of explicitly not-preserved analyses.
+  ///
+  /// If an analysis in this set is covered by a set in `PreservedIDs`, we
+  /// consider it not-preserved. That is, `NotPreservedAnalysisIDs` always
+  /// "wins" over analysis sets in `PreservedIDs`.
+  ///
+  /// Also, a given ID should never occur both here and in `PreservedIDs`.
+  SmallPtrSet<AnalysisKey *, 2> NotPreservedAnalysisIDs;
 };
 
 // Forward declare the analysis manager template.
-template <typename IRUnitT> class AnalysisManager;
+template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
 
 /// A CRTP mix-in to automatically provide informational APIs needed for
 /// passes.
@@ -195,10 +326,14 @@ template <typename DerivedT> struct PassInfoMixin {
 /// specifically used for analyses.
 template <typename DerivedT>
 struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
-  /// Returns an opaque, unique ID for this pass type.
+  /// Returns an opaque, unique ID for this analysis type.
+  ///
+  /// This ID is a pointer type that is guaranteed to be 8-byte aligned and
+  /// thus suitable for use in sets, maps, and other data structures optimized
+  /// for pointer-like types using the alignment-provided low bits.
   ///
-  /// Note that this requires the derived type provide a static member whose
-  /// address can be converted to a void pointer.
+  /// Note that this requires the derived type provide a static \c AnalysisKey
+  /// member called \c Key.
   ///
   /// FIXME: The only reason the derived type needs to provide this rather than
   /// this mixin providing it is due to broken implementations which cannot
@@ -207,10 +342,35 @@ struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
   /// instantiation. The only currently known platform with this limitation are
   /// Windows DLL builds, specifically building each part of LLVM as a DLL. If
   /// we ever remove that build configuration, this mixin can provide the
-  /// static PassID as well.
-  static void *ID() { return (void *)&DerivedT::PassID; }
+  /// static key as well.
+  static AnalysisKey *ID() { return &DerivedT::Key; }
+};
+
+/// A class template to provide analysis sets for IR units.
+///
+/// Analyses operate on units of IR. It is useful to be able to talk about
+/// preservation of all analyses for a given unit of IR as a set. This class
+/// template can be used with the \c PreservedAnalyses API for that purpose and
+/// the \c AnalysisManager will automatically check and use this set to skip
+/// invalidation events.
+///
+/// Note that you must provide an explicit instantiation declaration and
+/// definition for this template in order to get the correct behavior on
+/// Windows. Otherwise, the address of SetKey will not be stable.
+template <typename IRUnitT>
+class AllAnalysesOn {
+public:
+  static AnalysisSetKey *ID() { return &SetKey; }
+
+private:
+  static AnalysisSetKey SetKey;
 };
 
+template <typename IRUnitT> AnalysisSetKey AllAnalysesOn<IRUnitT>::SetKey;
+
+extern template class AllAnalysesOn<Module>;
+extern template class AllAnalysesOn<Function>;
+
 /// \brief Manages a sequence of passes over units of IR.
 ///
 /// A pass manager contains a sequence of passes to run over units of IR. It is
@@ -222,15 +382,21 @@ struct AnalysisInfoMixin : PassInfoMixin<DerivedT> {
 /// that analysis manager to each pass it runs, as well as calling the analysis
 /// manager's invalidation routine with the PreservedAnalyses of each pass it
 /// runs.
-template <typename IRUnitT>
-class PassManager : public PassInfoMixin<PassManager<IRUnitT>> {
+template <typename IRUnitT,
+          typename AnalysisManagerT = AnalysisManager<IRUnitT>,
+          typename... ExtraArgTs>
+class PassManager : public PassInfoMixin<
+                        PassManager<IRUnitT, AnalysisManagerT, ExtraArgTs...>> {
 public:
   /// \brief Construct a pass manager.
   ///
   /// It can be passed a flag to get debug logging as the passes are run.
-  PassManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
+  explicit PassManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
+
+  // FIXME: These are equivalent to the default move constructor/move
+  // assignment. However, using = default triggers linker errors due to the
+  // explicit instantiations below. Find away to use the default and remove the
+  // duplicated code here.
   PassManager(PassManager &&Arg)
       : Passes(std::move(Arg.Passes)),
         DebugLogging(std::move(Arg.DebugLogging)) {}
@@ -241,7 +407,8 @@ public:
   }
 
   /// \brief Run all of the passes in this manager over the IR.
-  PreservedAnalyses run(IRUnitT &IR, AnalysisManager<IRUnitT> &AM) {
+  PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
+                        ExtraArgTs... ExtraArgs) {
     PreservedAnalyses PA = PreservedAnalyses::all();
 
     if (DebugLogging)
@@ -252,16 +419,14 @@ public:
         dbgs() << "Running pass: " << Passes[Idx]->name() << " on "
                << IR.getName() << "\n";
 
-      PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM);
+      PreservedAnalyses PassPA = Passes[Idx]->run(IR, AM, ExtraArgs...);
 
       // Update the analysis manager as each pass runs and potentially
-      // invalidates analyses. We also update the preserved set of analyses
-      // based on what analyses we have already handled the invalidation for
-      // here and don't need to invalidate when finished.
-      PassPA = AM.invalidate(IR, std::move(PassPA));
+      // invalidates analyses.
+      AM.invalidate(IR, PassPA);
 
-      // Finally, we intersect the final preserved analyses to compute the
-      // aggregate preserved set for this pass manager.
+      // Finally, we intersect the preserved analyses to compute the aggregate
+      // preserved set for this pass manager.
       PA.intersect(std::move(PassPA));
 
       // FIXME: Historically, the pass managers all called the LLVM context's
@@ -271,6 +436,12 @@ public:
       //IR.getContext().yield();
     }
 
+    // Invaliadtion was handled after each pass in the above loop for the
+    // current unit of IR. Therefore, the remaining analysis results in the
+    // AnalysisManager are preserved. We mark this with a set so that we don't
+    // need to inspect each one individually.
+    PA.preserveSet<AllAnalysesOn<IRUnitT>>();
+
     if (DebugLogging)
       dbgs() << "Finished " << getTypeName<IRUnitT>() << " pass manager run.\n";
 
@@ -278,15 +449,15 @@ public:
   }
 
   template <typename PassT> void addPass(PassT Pass) {
-    typedef detail::PassModel<IRUnitT, PassT> PassModelT;
+    typedef detail::PassModel<IRUnitT, PassT, PreservedAnalyses,
+                              AnalysisManagerT, ExtraArgTs...>
+        PassModelT;
     Passes.emplace_back(new PassModelT(std::move(Pass)));
   }
 
 private:
-  typedef detail::PassConcept<IRUnitT> PassConceptT;
-
-  PassManager(const PassManager &) = delete;
-  PassManager &operator=(const PassManager &) = delete;
+  typedef detail::PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...>
+      PassConceptT;
 
   std::vector<std::unique_ptr<PassConceptT>> Passes;
 
@@ -302,63 +473,190 @@ extern template class PassManager<Function>;
 /// \brief Convenience typedef for a pass manager over functions.
 typedef PassManager<Function> FunctionPassManager;
 
-namespace detail {
-
-/// \brief A CRTP base used to implement analysis managers.
-///
-/// This class template serves as the boiler plate of an analysis manager. Any
-/// analysis manager can be implemented on top of this base class. Any
-/// implementation will be required to provide specific hooks:
-///
-/// - getResultImpl
-/// - getCachedResultImpl
-/// - invalidateImpl
-///
-/// The details of the call pattern are within.
+/// \brief A generic analysis pass manager with lazy running and caching of
+/// results.
 ///
-/// Note that there is also a generic analysis manager template which implements
-/// the above required functions along with common datastructures used for
-/// managing analyses. This base class is factored so that if you need to
-/// customize the handling of a specific IR unit, you can do so without
-/// replicating *all* of the boilerplate.
-template <typename DerivedT, typename IRUnitT> class AnalysisManagerBase {
-  DerivedT *derived_this() { return static_cast<DerivedT *>(this); }
-  const DerivedT *derived_this() const {
-    return static_cast<const DerivedT *>(this);
+/// This analysis manager can be used for any IR unit where the address of the
+/// IR unit sufficies as its identity. It manages the cache for a unit of IR via
+/// the address of each unit of IR cached.
+template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager {
+public:
+  class Invalidator;
+
+private:
+  // Now that we've defined our invalidator, we can build types for the concept
+  // types.
+  typedef detail::AnalysisResultConcept<IRUnitT, PreservedAnalyses, Invalidator>
+      ResultConceptT;
+  typedef detail::AnalysisPassConcept<IRUnitT, PreservedAnalyses, Invalidator,
+                                      ExtraArgTs...>
+      PassConceptT;
+
+  /// \brief List of function analysis pass IDs and associated concept pointers.
+  ///
+  /// Requires iterators to be valid across appending new entries and arbitrary
+  /// erases. Provides the analysis ID to enable finding iterators to a given entry
+  /// in maps below, and provides the storage for the actual result concept.
+  typedef std::list<std::pair<AnalysisKey *, std::unique_ptr<ResultConceptT>>>
+      AnalysisResultListT;
+
+  /// \brief Map type from IRUnitT pointer to our custom list type.
+  typedef DenseMap<IRUnitT *, AnalysisResultListT> AnalysisResultListMapT;
+
+  /// \brief Map type from a pair of analysis ID and IRUnitT pointer to an
+  /// iterator into a particular result list which is where the actual result
+  /// is stored.
+  typedef DenseMap<std::pair<AnalysisKey *, IRUnitT *>,
+                   typename AnalysisResultListT::iterator>
+      AnalysisResultMapT;
+
+public:
+  /// API to communicate dependencies between analyses during invalidation.
+  ///
+  /// When an analysis result embeds handles to other analysis results, it
+  /// needs to be invalidated both when its own information isn't preserved and
+  /// if any of those embedded analysis results end up invalidated. We pass in
+  /// an \c Invalidator object from the analysis manager in order to let the
+  /// analysis results themselves define the dependency graph on the fly. This
+  /// avoids building an explicit data structure representation of the
+  /// dependencies between analysis results.
+  class Invalidator {
+  public:
+    /// Trigger the invalidation of some other analysis pass if not already
+    /// handled and return whether it will in fact be invalidated.
+    ///
+    /// This is expected to be called from within a given analysis result's \c
+    /// invalidate method to trigger a depth-first walk of all inter-analysis
+    /// dependencies. The same \p IR unit and \p PA passed to that result's \c
+    /// invalidate method should in turn be provided to this routine.
+    ///
+    /// The first time this is called for a given analysis pass, it will
+    /// trigger the corresponding result's \c invalidate method to be called.
+    /// Subsequent calls will use a cache of the results of that initial call.
+    /// It is an error to form cyclic dependencies between analysis results.
+    ///
+    /// This returns true if the given analysis pass's result is invalid and
+    /// any dependecies on it will become invalid as a result.
+    template <typename PassT>
+    bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
+      typedef detail::AnalysisResultModel<IRUnitT, PassT,
+                                          typename PassT::Result,
+                                          PreservedAnalyses, Invalidator>
+          ResultModelT;
+      return invalidateImpl<ResultModelT>(PassT::ID(), IR, PA);
+    }
+
+    /// A type-erased variant of the above invalidate method with the same core
+    /// API other than passing an analysis ID rather than an analysis type
+    /// parameter.
+    ///
+    /// This is sadly less efficient than the above routine, which leverages
+    /// the type parameter to avoid the type erasure overhead.
+    bool invalidate(AnalysisKey *ID, IRUnitT &IR, const PreservedAnalyses &PA) {
+      return invalidateImpl<>(ID, IR, PA);
+    }
+
+  private:
+    friend class AnalysisManager;
+
+    template <typename ResultT = ResultConceptT>
+    bool invalidateImpl(AnalysisKey *ID, IRUnitT &IR,
+                        const PreservedAnalyses &PA) {
+      // If we've already visited this pass, return true if it was invalidated
+      // and false otherwise.
+      auto IMapI = IsResultInvalidated.find(ID);
+      if (IMapI != IsResultInvalidated.end())
+        return IMapI->second;
+
+      // Otherwise look up the result object.
+      auto RI = Results.find({ID, &IR});
+      assert(RI != Results.end() &&
+             "Trying to invalidate a dependent result that isn't in the "
+             "manager's cache is always an error, likely due to a stale result "
+             "handle!");
+
+      auto &Result = static_cast<ResultT &>(*RI->second->second);
+
+      // Insert into the map whether the result should be invalidated and
+      // return that. Note that we cannot re-use IMapI and must do a fresh
+      // insert here as calling the invalidate routine could (recursively)
+      // insert things into the map making any iterator or reference invalid.
+      bool Inserted;
+      std::tie(IMapI, Inserted) =
+          IsResultInvalidated.insert({ID, Result.invalidate(IR, PA, *this)});
+      (void)Inserted;
+      assert(Inserted && "Should not have already inserted this ID, likely "
+                         "indicates a dependency cycle!");
+      return IMapI->second;
+    }
+
+    Invalidator(SmallDenseMap<AnalysisKey *, bool, 8> &IsResultInvalidated,
+                const AnalysisResultMapT &Results)
+        : IsResultInvalidated(IsResultInvalidated), Results(Results) {}
+
+    SmallDenseMap<AnalysisKey *, bool, 8> &IsResultInvalidated;
+    const AnalysisResultMapT &Results;
+  };
+
+  /// \brief Construct an empty analysis manager.
+  ///
+  /// A flag can be passed to indicate that the manager should perform debug
+  /// logging.
+  AnalysisManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
+  AnalysisManager(AnalysisManager &&) = default;
+  AnalysisManager &operator=(AnalysisManager &&) = default;
+
+  /// \brief Returns true if the analysis manager has an empty results cache.
+  bool empty() const {
+    assert(AnalysisResults.empty() == AnalysisResultLists.empty() &&
+           "The storage and index of analysis results disagree on how many "
+           "there are!");
+    return AnalysisResults.empty();
   }
 
-  AnalysisManagerBase(const AnalysisManagerBase &) = delete;
-  AnalysisManagerBase &operator=(const AnalysisManagerBase &) = delete;
+  /// \brief Clear any results for a single unit of IR.
+  ///
+  /// This doesn't invalidate but directly clears the results. It is useful
+  /// when the IR is being removed and we want to clear out all the memory
+  /// pinned for it.
+  void clear(IRUnitT &IR) {
+    if (DebugLogging)
+      dbgs() << "Clearing all analysis results for: " << IR.getName() << "\n";
 
-protected:
-  typedef detail::AnalysisResultConcept<IRUnitT> ResultConceptT;
-  typedef detail::AnalysisPassConcept<IRUnitT> PassConceptT;
+    auto ResultsListI = AnalysisResultLists.find(&IR);
+    if (ResultsListI == AnalysisResultLists.end())
+      return;
+    // Clear the map pointing into the results list.
+    for (auto &IDAndResult : ResultsListI->second)
+      AnalysisResults.erase({IDAndResult.first, &IR});
 
-  // FIXME: Provide template aliases for the models when we're using C++11 in
-  // a mode supporting them.
+    // And actually destroy and erase the results associated with this IR.
+    AnalysisResultLists.erase(ResultsListI);
+  }
 
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  AnalysisManagerBase() {}
-  AnalysisManagerBase(AnalysisManagerBase &&Arg)
-      : AnalysisPasses(std::move(Arg.AnalysisPasses)) {}
-  AnalysisManagerBase &operator=(AnalysisManagerBase &&RHS) {
-    AnalysisPasses = std::move(RHS.AnalysisPasses);
-    return *this;
+  /// \brief Clear the analysis result cache.
+  ///
+  /// This routine allows cleaning up when the set of IR units itself has
+  /// potentially changed, and thus we can't even look up a a result and
+  /// invalidate it directly. Notably, this does *not* call invalidate
+  /// functions as there is nothing to be done for them.
+  void clear() {
+    AnalysisResults.clear();
+    AnalysisResultLists.clear();
   }
 
-public:
   /// \brief Get the result of an analysis pass for this module.
   ///
   /// If there is not a valid cached result in the manager already, this will
   /// re-run the analysis to produce a valid result.
-  template <typename PassT> typename PassT::Result &getResult(IRUnitT &IR) {
+  template <typename PassT>
+  typename PassT::Result &getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs) {
     assert(AnalysisPasses.count(PassT::ID()) &&
            "This analysis pass was not registered prior to being queried");
-
     ResultConceptT &ResultConcept =
-        derived_this()->getResultImpl(PassT::ID(), IR);
-    typedef detail::AnalysisResultModel<IRUnitT, PassT, typename PassT::Result>
+        getResultImpl(PassT::ID(), IR, ExtraArgs...);
+    typedef detail::AnalysisResultModel<IRUnitT, PassT, typename PassT::Result,
+                                        PreservedAnalyses, Invalidator>
         ResultModelT;
     return static_cast<ResultModelT &>(ResultConcept).Result;
   }
@@ -373,12 +671,12 @@ public:
     assert(AnalysisPasses.count(PassT::ID()) &&
            "This analysis pass was not registered prior to being queried");
 
-    ResultConceptT *ResultConcept =
-        derived_this()->getCachedResultImpl(PassT::ID(), IR);
+    ResultConceptT *ResultConcept = getCachedResultImpl(PassT::ID(), IR);
     if (!ResultConcept)
       return nullptr;
 
-    typedef detail::AnalysisResultModel<IRUnitT, PassT, typename PassT::Result>
+    typedef detail::AnalysisResultModel<IRUnitT, PassT, typename PassT::Result,
+                                        PreservedAnalyses, Invalidator>
         ResultModelT;
     return &static_cast<ResultModelT *>(ResultConcept)->Result;
   }
@@ -401,9 +699,12 @@ public:
   /// interface also lends itself to minimizing the number of times we have to
   /// do lookups for analyses or construct complex passes only to throw them
   /// away.
-  template <typename PassBuilderT> bool registerPass(PassBuilderT PassBuilder) {
+  template <typename PassBuilderT>
+  bool registerPass(PassBuilderT &&PassBuilder) {
     typedef decltype(PassBuilder()) PassT;
-    typedef detail::AnalysisPassModel<IRUnitT, PassT> PassModelT;
+    typedef detail::AnalysisPassModel<IRUnitT, PassT, PreservedAnalyses,
+                                      Invalidator, ExtraArgTs...>
+        PassModelT;
 
     auto &PassPtr = AnalysisPasses[PassT::ID()];
     if (PassPtr)
@@ -421,125 +722,112 @@ public:
   template <typename PassT> void invalidate(IRUnitT &IR) {
     assert(AnalysisPasses.count(PassT::ID()) &&
            "This analysis pass was not registered prior to being invalidated");
-    derived_this()->invalidateImpl(PassT::ID(), IR);
+    invalidateImpl(PassT::ID(), IR);
   }
 
   /// \brief Invalidate analyses cached for an IR unit.
   ///
   /// Walk through all of the analyses pertaining to this unit of IR and
   /// invalidate them unless they are preserved by the PreservedAnalyses set.
-  /// We accept the PreservedAnalyses set by value and update it with each
-  /// analyis pass which has been successfully invalidated and thus can be
-  /// preserved going forward. The updated set is returned.
-  PreservedAnalyses invalidate(IRUnitT &IR, PreservedAnalyses PA) {
-    return derived_this()->invalidateImpl(IR, std::move(PA));
-  }
-
-protected:
-  /// \brief Lookup a registered analysis pass.
-  PassConceptT &lookupPass(void *PassID) {
-    typename AnalysisPassMapT::iterator PI = AnalysisPasses.find(PassID);
-    assert(PI != AnalysisPasses.end() &&
-           "Analysis passes must be registered prior to being queried!");
-    return *PI->second;
-  }
-
-  /// \brief Lookup a registered analysis pass.
-  const PassConceptT &lookupPass(void *PassID) const {
-    typename AnalysisPassMapT::const_iterator PI = AnalysisPasses.find(PassID);
-    assert(PI != AnalysisPasses.end() &&
-           "Analysis passes must be registered prior to being queried!");
-    return *PI->second;
-  }
+  void invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
+    // We're done if all analyses on this IR unit are preserved.
+    if (PA.allAnalysesInSetPreserved<AllAnalysesOn<IRUnitT>>())
+      return;
 
-private:
-  /// \brief Map type from module analysis pass ID to pass concept pointer.
-  typedef DenseMap<void *, std::unique_ptr<PassConceptT>> AnalysisPassMapT;
+    if (DebugLogging)
+      dbgs() << "Invalidating all non-preserved analyses for: " << IR.getName()
+             << "\n";
 
-  /// \brief Collection of module analysis passes, indexed by ID.
-  AnalysisPassMapT AnalysisPasses;
-};
+    // Track whether each pass's result is invalidated. Memoize the results
+    // using the IsResultInvalidated map.
+    SmallDenseMap<AnalysisKey *, bool, 8> IsResultInvalidated;
+    Invalidator Inv(IsResultInvalidated, AnalysisResults);
+    AnalysisResultListT &ResultsList = AnalysisResultLists[&IR];
+    for (auto &AnalysisResultPair : ResultsList) {
+      // This is basically the same thing as Invalidator::invalidate, but we
+      // can't call it here because we're operating on the type-erased result.
+      // Moreover if we instead called invalidate() directly, it would do an
+      // unnecessary look up in ResultsList.
+      AnalysisKey *ID = AnalysisResultPair.first;
+      auto &Result = *AnalysisResultPair.second;
+
+      auto IMapI = IsResultInvalidated.find(ID);
+      if (IMapI != IsResultInvalidated.end())
+        // This result was already handled via the Invalidator.
+        continue;
 
-} // End namespace detail
+      // Try to invalidate the result, giving it the Invalidator so it can
+      // recursively query for any dependencies it has and record the result.
+      // Note that we cannot re-use 'IMapI' here or pre-insert the ID as the
+      // invalidate method may insert things into the map as well, invalidating
+      // any iterator or pointer.
+      bool Inserted =
+          IsResultInvalidated.insert({ID, Result.invalidate(IR, PA, Inv)})
+              .second;
+      (void)Inserted;
+      assert(Inserted && "Should never have already inserted this ID, likely "
+                         "indicates a cycle!");
+    }
 
-/// \brief A generic analysis pass manager with lazy running and caching of
-/// results.
-///
-/// This analysis manager can be used for any IR unit where the address of the
-/// IR unit sufficies as its identity. It manages the cache for a unit of IR via
-/// the address of each unit of IR cached.
-template <typename IRUnitT>
-class AnalysisManager
-    : public detail::AnalysisManagerBase<AnalysisManager<IRUnitT>, IRUnitT> {
-  friend class detail::AnalysisManagerBase<AnalysisManager<IRUnitT>, IRUnitT>;
-  typedef detail::AnalysisManagerBase<AnalysisManager<IRUnitT>, IRUnitT> BaseT;
-  typedef typename BaseT::ResultConceptT ResultConceptT;
-  typedef typename BaseT::PassConceptT PassConceptT;
+    // Now erase the results that were marked above as invalidated.
+    if (!IsResultInvalidated.empty()) {
+      for (auto I = ResultsList.begin(), E = ResultsList.end(); I != E;) {
+        AnalysisKey *ID = I->first;
+        if (!IsResultInvalidated.lookup(ID)) {
+          ++I;
+          continue;
+        }
 
-public:
-  // Most public APIs are inherited from the CRTP base class.
+        if (DebugLogging)
+          dbgs() << "Invalidating analysis: " << this->lookUpPass(ID).name()
+                 << "\n";
 
-  /// \brief Construct an empty analysis manager.
-  ///
-  /// A flag can be passed to indicate that the manager should perform debug
-  /// logging.
-  AnalysisManager(bool DebugLogging = false) : DebugLogging(DebugLogging) {}
+        I = ResultsList.erase(I);
+        AnalysisResults.erase({ID, &IR});
+      }
+    }
 
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  AnalysisManager(AnalysisManager &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))),
-        AnalysisResults(std::move(Arg.AnalysisResults)),
-        DebugLogging(std::move(Arg.DebugLogging)) {}
-  AnalysisManager &operator=(AnalysisManager &&RHS) {
-    BaseT::operator=(std::move(static_cast<BaseT &>(RHS)));
-    AnalysisResults = std::move(RHS.AnalysisResults);
-    DebugLogging = std::move(RHS.DebugLogging);
-    return *this;
+    if (ResultsList.empty())
+      AnalysisResultLists.erase(&IR);
   }
 
-  /// \brief Returns true if the analysis manager has an empty results cache.
-  bool empty() const {
-    assert(AnalysisResults.empty() == AnalysisResultLists.empty() &&
-           "The storage and index of analysis results disagree on how many "
-           "there are!");
-    return AnalysisResults.empty();
+private:
+  /// \brief Look up a registered analysis pass.
+  PassConceptT &lookUpPass(AnalysisKey *ID) {
+    typename AnalysisPassMapT::iterator PI = AnalysisPasses.find(ID);
+    assert(PI != AnalysisPasses.end() &&
+           "Analysis passes must be registered prior to being queried!");
+    return *PI->second;
   }
 
-  /// \brief Clear the analysis result cache.
-  ///
-  /// This routine allows cleaning up when the set of IR units itself has
-  /// potentially changed, and thus we can't even look up a a result and
-  /// invalidate it directly. Notably, this does *not* call invalidate functions
-  /// as there is nothing to be done for them.
-  void clear() {
-    AnalysisResults.clear();
-    AnalysisResultLists.clear();
+  /// \brief Look up a registered analysis pass.
+  const PassConceptT &lookUpPass(AnalysisKey *ID) const {
+    typename AnalysisPassMapT::const_iterator PI = AnalysisPasses.find(ID);
+    assert(PI != AnalysisPasses.end() &&
+           "Analysis passes must be registered prior to being queried!");
+    return *PI->second;
   }
 
-private:
-  AnalysisManager(const AnalysisManager &) = delete;
-  AnalysisManager &operator=(const AnalysisManager &) = delete;
-
   /// \brief Get an analysis result, running the pass if necessary.
-  ResultConceptT &getResultImpl(void *PassID, IRUnitT &IR) {
+  ResultConceptT &getResultImpl(AnalysisKey *ID, IRUnitT &IR,
+                                ExtraArgTs... ExtraArgs) {
     typename AnalysisResultMapT::iterator RI;
     bool Inserted;
     std::tie(RI, Inserted) = AnalysisResults.insert(std::make_pair(
-        std::make_pair(PassID, &IR), typename AnalysisResultListT::iterator()));
+        std::make_pair(ID, &IR), typename AnalysisResultListT::iterator()));
 
     // If we don't have a cached result for this function, look up the pass and
     // run it to produce a result, which we then add to the cache.
     if (Inserted) {
-      auto &P = this->lookupPass(PassID);
+      auto &P = this->lookUpPass(ID);
       if (DebugLogging)
         dbgs() << "Running analysis: " << P.name() << "\n";
       AnalysisResultListT &ResultList = AnalysisResultLists[&IR];
-      ResultList.emplace_back(PassID, P.run(IR, *this));
+      ResultList.emplace_back(ID, P.run(IR, *this, ExtraArgs...));
 
       // P.run may have inserted elements into AnalysisResults and invalidated
       // RI.
-      RI = AnalysisResults.find(std::make_pair(PassID, &IR));
+      RI = AnalysisResults.find({ID, &IR});
       assert(RI != AnalysisResults.end() && "we just inserted it!");
 
       RI->second = std::prev(ResultList.end());
@@ -549,84 +837,31 @@ private:
   }
 
   /// \brief Get a cached analysis result or return null.
-  ResultConceptT *getCachedResultImpl(void *PassID, IRUnitT &IR) const {
+  ResultConceptT *getCachedResultImpl(AnalysisKey *ID, IRUnitT &IR) const {
     typename AnalysisResultMapT::const_iterator RI =
-        AnalysisResults.find(std::make_pair(PassID, &IR));
+        AnalysisResults.find({ID, &IR});
     return RI == AnalysisResults.end() ? nullptr : &*RI->second->second;
   }
 
   /// \brief Invalidate a function pass result.
-  void invalidateImpl(void *PassID, IRUnitT &IR) {
+  void invalidateImpl(AnalysisKey *ID, IRUnitT &IR) {
     typename AnalysisResultMapT::iterator RI =
-        AnalysisResults.find(std::make_pair(PassID, &IR));
+        AnalysisResults.find({ID, &IR});
     if (RI == AnalysisResults.end())
       return;
 
     if (DebugLogging)
-      dbgs() << "Invalidating analysis: " << this->lookupPass(PassID).name()
+      dbgs() << "Invalidating analysis: " << this->lookUpPass(ID).name()
              << "\n";
     AnalysisResultLists[&IR].erase(RI->second);
     AnalysisResults.erase(RI);
   }
 
-  /// \brief Invalidate the results for a function..
-  PreservedAnalyses invalidateImpl(IRUnitT &IR, PreservedAnalyses PA) {
-    // Short circuit for a common case of all analyses being preserved.
-    if (PA.areAllPreserved())
-      return PA;
-
-    if (DebugLogging)
-      dbgs() << "Invalidating all non-preserved analyses for: " << IR.getName()
-             << "\n";
-
-    // Clear all the invalidated results associated specifically with this
-    // function.
-    SmallVector<void *, 8> InvalidatedPassIDs;
-    AnalysisResultListT &ResultsList = AnalysisResultLists[&IR];
-    for (typename AnalysisResultListT::iterator I = ResultsList.begin(),
-                                                E = ResultsList.end();
-         I != E;) {
-      void *PassID = I->first;
-
-      // Pass the invalidation down to the pass itself to see if it thinks it is
-      // necessary. The analysis pass can return false if no action on the part
-      // of the analysis manager is required for this invalidation event.
-      if (I->second->invalidate(IR, PA)) {
-        if (DebugLogging)
-          dbgs() << "Invalidating analysis: " << this->lookupPass(PassID).name()
-                 << "\n";
-
-        InvalidatedPassIDs.push_back(I->first);
-        I = ResultsList.erase(I);
-      } else {
-        ++I;
-      }
-
-      // After handling each pass, we mark it as preserved. Once we've
-      // invalidated any stale results, the rest of the system is allowed to
-      // start preserving this analysis again.
-      PA.preserve(PassID);
-    }
-    while (!InvalidatedPassIDs.empty())
-      AnalysisResults.erase(
-          std::make_pair(InvalidatedPassIDs.pop_back_val(), &IR));
-    if (ResultsList.empty())
-      AnalysisResultLists.erase(&IR);
-
-    return PA;
-  }
-
-  /// \brief List of function analysis pass IDs and associated concept pointers.
-  ///
-  /// Requires iterators to be valid across appending new entries and arbitrary
-  /// erases. Provides both the pass ID and concept pointer such that it is
-  /// half of a bijection and provides storage for the actual result concept.
-  typedef std::list<std::pair<
-      void *, std::unique_ptr<detail::AnalysisResultConcept<IRUnitT>>>>
-      AnalysisResultListT;
+  /// \brief Map type from module analysis pass ID to pass concept pointer.
+  typedef DenseMap<AnalysisKey *, std::unique_ptr<PassConceptT>> AnalysisPassMapT;
 
-  /// \brief Map type from function pointer to our custom list type.
-  typedef DenseMap<IRUnitT *, AnalysisResultListT> AnalysisResultListMapT;
+  /// \brief Collection of module analysis passes, indexed by ID.
+  AnalysisPassMapT AnalysisPasses;
 
   /// \brief Map from function to a list of function analysis results.
   ///
@@ -634,12 +869,6 @@ private:
   /// the ultimate storage for a particular cached analysis result.
   AnalysisResultListMapT AnalysisResultLists;
 
-  /// \brief Map type from a pair of analysis ID and function pointer to an
-  /// iterator into a particular result list.
-  typedef DenseMap<std::pair<void *, IRUnitT *>,
-                   typename AnalysisResultListT::iterator>
-      AnalysisResultMapT;
-
   /// \brief Map from an analysis ID and function to a particular cached
   /// analysis result.
   AnalysisResultMapT AnalysisResults;
@@ -668,78 +897,68 @@ typedef AnalysisManager<Function> FunctionAnalysisManager;
 /// Note that the proxy's result is a move-only object and represents ownership
 /// of the validity of the analyses in the \c FunctionAnalysisManager it
 /// provides.
-template <typename AnalysisManagerT, typename IRUnitT>
+template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
 class InnerAnalysisManagerProxy
     : public AnalysisInfoMixin<
           InnerAnalysisManagerProxy<AnalysisManagerT, IRUnitT>> {
 public:
   class Result {
   public:
-    explicit Result(AnalysisManagerT &AM) : AM(&AM) {}
-    Result(Result &&Arg) : AM(std::move(Arg.AM)) {
+    explicit Result(AnalysisManagerT &InnerAM) : InnerAM(&InnerAM) {}
+    Result(Result &&Arg) : InnerAM(std::move(Arg.InnerAM)) {
       // We have to null out the analysis manager in the moved-from state
       // because we are taking ownership of the responsibilty to clear the
       // analysis state.
-      Arg.AM = nullptr;
+      Arg.InnerAM = nullptr;
     }
     Result &operator=(Result &&RHS) {
-      AM = RHS.AM;
+      InnerAM = RHS.InnerAM;
       // We have to null out the analysis manager in the moved-from state
       // because we are taking ownership of the responsibilty to clear the
       // analysis state.
-      RHS.AM = nullptr;
+      RHS.InnerAM = nullptr;
       return *this;
     }
     ~Result() {
-      // AM is cleared in a moved from state where there is nothing to do.
-      if (!AM)
+      // InnerAM is cleared in a moved from state where there is nothing to do.
+      if (!InnerAM)
         return;
 
       // Clear out the analysis manager if we're being destroyed -- it means we
       // didn't even see an invalidate call when we got invalidated.
-      AM->clear();
+      InnerAM->clear();
     }
 
     /// \brief Accessor for the analysis manager.
-    AnalysisManagerT &getManager() { return *AM; }
+    AnalysisManagerT &getManager() { return *InnerAM; }
 
-    /// \brief Handler for invalidation of the module.
+    /// \brief Handler for invalidation of the outer IR unit.
     ///
     /// If this analysis itself is preserved, then we assume that the set of \c
-    /// Function objects in the \c Module hasn't changed and thus we don't need
-    /// to invalidate *all* cached data associated with a \c Function* in the \c
-    /// FunctionAnalysisManager.
+    /// IR units that the inner analysis manager controls hasn't changed and
+    /// thus we don't need to invalidate *all* cached data associated with any
+    /// \c IRUnitT* in the \c AnalysisManagerT.
     ///
     /// Regardless of whether this analysis is marked as preserved, all of the
-    /// analyses in the \c FunctionAnalysisManager are potentially invalidated
-    /// based on the set of preserved analyses.
-    bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA) {
-      // If this proxy isn't marked as preserved, then we can't even invalidate
-      // individual function analyses, there may be an invalid set of Function
-      // objects in the cache making it impossible to incrementally preserve
-      // them. Just clear the entire manager.
-      if (!PA.preserved(InnerAnalysisManagerProxy::ID()))
-        AM->clear();
-
-      // Return false to indicate that this result is still a valid proxy.
-      return false;
-    }
+    /// analyses in the \c AnalysisManagerT are potentially invalidated (for
+    /// the relevant inner set of their IR units) based on the set of preserved
+    /// analyses.
+    ///
+    /// Because this needs to understand the mapping from one IR unit to an
+    /// inner IR unit, this method isn't defined in the primary template.
+    /// Instead, each specialization of this template will need to provide an
+    /// explicit specialization of this method to handle that particular pair
+    /// of IR unit and inner AnalysisManagerT.
+    bool invalidate(
+        IRUnitT &IR, const PreservedAnalyses &PA,
+        typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &Inv);
 
   private:
-    AnalysisManagerT *AM;
+    AnalysisManagerT *InnerAM;
   };
 
-  explicit InnerAnalysisManagerProxy(AnalysisManagerT &AM) : AM(&AM) {}
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  InnerAnalysisManagerProxy(const InnerAnalysisManagerProxy &Arg)
-      : AM(Arg.AM) {}
-  InnerAnalysisManagerProxy(InnerAnalysisManagerProxy &&Arg)
-      : AM(std::move(Arg.AM)) {}
-  InnerAnalysisManagerProxy &operator=(InnerAnalysisManagerProxy RHS) {
-    std::swap(AM, RHS.AM);
-    return *this;
-  }
+  explicit InnerAnalysisManagerProxy(AnalysisManagerT &InnerAM)
+      : InnerAM(&InnerAM) {}
 
   /// \brief Run the analysis pass and create our proxy result object.
   ///
@@ -750,25 +969,39 @@ public:
   /// In debug builds, it will also assert that the analysis manager is empty
   /// as no queries should arrive at the function analysis manager prior to
   /// this analysis being requested.
-  Result run(IRUnitT &IR, AnalysisManager<IRUnitT> &) { return Result(*AM); }
+  Result run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
+             ExtraArgTs...) {
+    return Result(*InnerAM);
+  }
 
 private:
   friend AnalysisInfoMixin<
       InnerAnalysisManagerProxy<AnalysisManagerT, IRUnitT>>;
-  static char PassID;
+  static AnalysisKey Key;
 
-  AnalysisManagerT *AM;
+  AnalysisManagerT *InnerAM;
 };
 
-template <typename AnalysisManagerT, typename IRUnitT>
-char InnerAnalysisManagerProxy<AnalysisManagerT, IRUnitT>::PassID;
+template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
+AnalysisKey
+    InnerAnalysisManagerProxy<AnalysisManagerT, IRUnitT, ExtraArgTs...>::Key;
 
-extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
-                                                Module>;
 /// Provide the \c FunctionAnalysisManager to \c Module proxy.
 typedef InnerAnalysisManagerProxy<FunctionAnalysisManager, Module>
     FunctionAnalysisManagerModuleProxy;
 
+/// Specialization of the invalidate method for the \c
+/// FunctionAnalysisManagerModuleProxy's result.
+template <>
+bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
+    Module &M, const PreservedAnalyses &PA,
+    ModuleAnalysisManager::Invalidator &Inv);
+
+// Ensure the \c FunctionAnalysisManagerModuleProxy is provided as an extern
+// template.
+extern template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
+                                                Module>;
+
 /// \brief A function analysis which acts as a proxy for a module analysis
 /// manager.
 ///
@@ -778,10 +1011,14 @@ typedef InnerAnalysisManagerProxy<FunctionAnalysisManager, Module>
 /// cannot request a module analysis to actually run. Instead, the user must
 /// rely on the \c getCachedResult API.
 ///
-/// This proxy *doesn't* manage the invalidation in any way. That is handled by
-/// the recursive return path of each layer of the pass manager and the
-/// returned PreservedAnalysis set.
-template <typename AnalysisManagerT, typename IRUnitT>
+/// The invalidation provided by this proxy involves tracking when an
+/// invalidation event in the outer analysis manager needs to trigger an
+/// invalidation of a particular analysis on this IR unit.
+///
+/// Because outer analyses aren't invalidated while these IR units are being
+/// precessed, we have to register and handle these as deferred invalidation
+/// events.
+template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
 class OuterAnalysisManagerProxy
     : public AnalysisInfoMixin<
           OuterAnalysisManagerProxy<AnalysisManagerT, IRUnitT>> {
@@ -790,51 +1027,71 @@ public:
   class Result {
   public:
     explicit Result(const AnalysisManagerT &AM) : AM(&AM) {}
-    // We have to explicitly define all the special member functions because
-    // MSVC refuses to generate them.
-    Result(const Result &Arg) : AM(Arg.AM) {}
-    Result(Result &&Arg) : AM(std::move(Arg.AM)) {}
-    Result &operator=(Result RHS) {
-      std::swap(AM, RHS.AM);
-      return *this;
-    }
 
     const AnalysisManagerT &getManager() const { return *AM; }
 
     /// \brief Handle invalidation by ignoring it, this pass is immutable.
-    bool invalidate(IRUnitT &) { return false; }
+    bool invalidate(
+        IRUnitT &, const PreservedAnalyses &,
+        typename AnalysisManager<IRUnitT, ExtraArgTs...>::Invalidator &) {
+      return false;
+    }
+
+    /// Register a deferred invalidation event for when the outer analysis
+    /// manager processes its invalidations.
+    template <typename OuterAnalysisT, typename InvalidatedAnalysisT>
+    void registerOuterAnalysisInvalidation() {
+      AnalysisKey *OuterID = OuterAnalysisT::ID();
+      AnalysisKey *InvalidatedID = InvalidatedAnalysisT::ID();
+
+      auto &InvalidatedIDList = OuterAnalysisInvalidationMap[OuterID];
+      // Note, this is a linear scan. If we end up with large numbers of
+      // analyses that all trigger invalidation on the same outer analysis,
+      // this entire system should be changed to some other deterministic
+      // data structure such as a `SetVector` of a pair of pointers.
+      auto InvalidatedIt = std::find(InvalidatedIDList.begin(),
+                                     InvalidatedIDList.end(), InvalidatedID);
+      if (InvalidatedIt == InvalidatedIDList.end())
+        InvalidatedIDList.push_back(InvalidatedID);
+    }
+
+    /// Access the map from outer analyses to deferred invalidation requiring
+    /// analyses.
+    const SmallDenseMap<AnalysisKey *, TinyPtrVector<AnalysisKey *>, 2> &
+    getOuterInvalidations() const {
+      return OuterAnalysisInvalidationMap;
+    }
 
   private:
     const AnalysisManagerT *AM;
+
+    /// A map from an outer analysis ID to the set of this IR-unit's analyses
+    /// which need to be invalidated.
+    SmallDenseMap<AnalysisKey *, TinyPtrVector<AnalysisKey *>, 2>
+        OuterAnalysisInvalidationMap;
   };
 
   OuterAnalysisManagerProxy(const AnalysisManagerT &AM) : AM(&AM) {}
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  OuterAnalysisManagerProxy(const OuterAnalysisManagerProxy &Arg)
-      : AM(Arg.AM) {}
-  OuterAnalysisManagerProxy(OuterAnalysisManagerProxy &&Arg)
-      : AM(std::move(Arg.AM)) {}
-  OuterAnalysisManagerProxy &operator=(OuterAnalysisManagerProxy RHS) {
-    std::swap(AM, RHS.AM);
-    return *this;
-  }
 
   /// \brief Run the analysis pass and create our proxy result object.
   /// Nothing to see here, it just forwards the \c AM reference into the
   /// result.
-  Result run(IRUnitT &, AnalysisManager<IRUnitT> &) { return Result(*AM); }
+  Result run(IRUnitT &, AnalysisManager<IRUnitT, ExtraArgTs...> &,
+             ExtraArgTs...) {
+    return Result(*AM);
+  }
 
 private:
   friend AnalysisInfoMixin<
       OuterAnalysisManagerProxy<AnalysisManagerT, IRUnitT>>;
-  static char PassID;
+  static AnalysisKey Key;
 
   const AnalysisManagerT *AM;
 };
 
-template <typename AnalysisManagerT, typename IRUnitT>
-char OuterAnalysisManagerProxy<AnalysisManagerT, IRUnitT>::PassID;
+template <typename AnalysisManagerT, typename IRUnitT, typename... ExtraArgTs>
+AnalysisKey
+    OuterAnalysisManagerProxy<AnalysisManagerT, IRUnitT, ExtraArgTs...>::Key;
 
 extern template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
                                                 Function>;
@@ -870,21 +1127,6 @@ class ModuleToFunctionPassAdaptor
 public:
   explicit ModuleToFunctionPassAdaptor(FunctionPassT Pass)
       : Pass(std::move(Pass)) {}
-  // We have to explicitly define all the special member functions because MSVC
-  // refuses to generate them.
-  ModuleToFunctionPassAdaptor(const ModuleToFunctionPassAdaptor &Arg)
-      : Pass(Arg.Pass) {}
-  ModuleToFunctionPassAdaptor(ModuleToFunctionPassAdaptor &&Arg)
-      : Pass(std::move(Arg.Pass)) {}
-  friend void swap(ModuleToFunctionPassAdaptor &LHS,
-                   ModuleToFunctionPassAdaptor &RHS) {
-    using std::swap;
-    swap(LHS.Pass, RHS.Pass);
-  }
-  ModuleToFunctionPassAdaptor &operator=(ModuleToFunctionPassAdaptor RHS) {
-    swap(*this, RHS);
-    return *this;
-  }
 
   /// \brief Runs the function pass across every function in the module.
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
@@ -901,20 +1143,19 @@ public:
 
       // We know that the function pass couldn't have invalidated any other
       // function's analyses (that's the contract of a function pass), so
-      // directly handle the function analysis manager's invalidation here and
-      // update our preserved set to reflect that these have already been
-      // handled.
-      PassPA = FAM.invalidate(F, std::move(PassPA));
+      // directly handle the function analysis manager's invalidation here.
+      FAM.invalidate(F, PassPA);
 
       // Then intersect the preserved set so that invalidation of module
       // analyses will eventually occur when the module pass completes.
       PA.intersect(std::move(PassPA));
     }
 
-    // By definition we preserve the proxy. This precludes *any* invalidation
-    // of function analyses by the proxy, but that's OK because we've taken
-    // care to invalidate analyses in the function analysis manager
-    // incrementally above.
+    // By definition we preserve the proxy. We also preserve all analyses on
+    // Function units. This precludes *any* invalidation of function analyses
+    // by the proxy, but that's OK because we've taken care to invalidate
+    // analyses in the function analysis manager incrementally above.
+    PA.preserveSet<AllAnalysesOn<Function>>();
     PA.preserve<FunctionAnalysisManagerModuleProxy>();
     return PA;
   }
@@ -933,19 +1174,29 @@ createModuleToFunctionPassAdaptor(FunctionPassT Pass) {
 
 /// \brief A template utility pass to force an analysis result to be available.
 ///
-/// This is a no-op pass which simply forces a specific analysis pass's result
-/// to be available when it is run.
-template <typename AnalysisT>
-struct RequireAnalysisPass : PassInfoMixin<RequireAnalysisPass<AnalysisT>> {
+/// If there are extra arguments at the pass's run level there may also be
+/// extra arguments to the analysis manager's \c getResult routine. We can't
+/// guess how to effectively map the arguments from one to the other, and so
+/// this specialization just ignores them.
+///
+/// Specific patterns of run-method extra arguments and analysis manager extra
+/// arguments will have to be defined as appropriate specializations.
+template <typename AnalysisT, typename IRUnitT,
+          typename AnalysisManagerT = AnalysisManager<IRUnitT>,
+          typename... ExtraArgTs>
+struct RequireAnalysisPass
+    : PassInfoMixin<RequireAnalysisPass<AnalysisT, IRUnitT, AnalysisManagerT,
+                                        ExtraArgTs...>> {
   /// \brief Run this pass over some unit of IR.
   ///
   /// This pass can be run over any unit of IR and use any analysis manager
   /// provided they satisfy the basic API requirements. When this pass is
   /// created, these methods can be instantiated to satisfy whatever the
   /// context requires.
-  template <typename IRUnitT>
-  PreservedAnalyses run(IRUnitT &Arg, AnalysisManager<IRUnitT> &AM) {
-    (void)AM.template getResult<AnalysisT>(Arg);
+  PreservedAnalyses run(IRUnitT &Arg, AnalysisManagerT &AM,
+                        ExtraArgTs &&... Args) {
+    (void)AM.template getResult<AnalysisT>(Arg,
+                                           std::forward<ExtraArgTs>(Args)...);
 
     return PreservedAnalyses::all();
   }
@@ -965,13 +1216,11 @@ struct InvalidateAnalysisPass
   /// provided they satisfy the basic API requirements. When this pass is
   /// created, these methods can be instantiated to satisfy whatever the
   /// context requires.
-  template <typename IRUnitT>
-  PreservedAnalyses run(IRUnitT &Arg, AnalysisManager<IRUnitT> &AM) {
-    // We have to directly invalidate the analysis result as we can't
-    // enumerate all other analyses and use the preserved set to control it.
-    AM.template invalidate<AnalysisT>(Arg);
-
-    return PreservedAnalyses::all();
+  template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
+  PreservedAnalyses run(IRUnitT &Arg, AnalysisManagerT &AM, ExtraArgTs &&...) {
+    auto PA = PreservedAnalyses::all();
+    PA.abandon<AnalysisT>();
+    return PA;
   }
 };
 
@@ -981,12 +1230,39 @@ struct InvalidateAnalysisPass
 /// analysis passes to be re-run to produce fresh results if any are needed.
 struct InvalidateAllAnalysesPass : PassInfoMixin<InvalidateAllAnalysesPass> {
   /// \brief Run this pass over some unit of IR.
-  template <typename IRUnitT>
-  PreservedAnalyses run(IRUnitT &, AnalysisManager<IRUnitT> &) {
+  template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
+  PreservedAnalyses run(IRUnitT &, AnalysisManagerT &, ExtraArgTs &&...) {
     return PreservedAnalyses::none();
   }
 };
 
+/// A utility pass template that simply runs another pass multiple times.
+///
+/// This can be useful when debugging or testing passes. It also serves as an
+/// example of how to extend the pass manager in ways beyond composition.
+template <typename PassT>
+class RepeatedPass : public PassInfoMixin<RepeatedPass<PassT>> {
+public:
+  RepeatedPass(int Count, PassT P) : Count(Count), P(std::move(P)) {}
+
+  template <typename IRUnitT, typename AnalysisManagerT, typename... Ts>
+  PreservedAnalyses run(IRUnitT &Arg, AnalysisManagerT &AM, Ts &&... Args) {
+    auto PA = PreservedAnalyses::all();
+    for (int i = 0; i < Count; ++i)
+      PA.intersect(P.run(Arg, AM, std::forward<Ts>(Args)...));
+    return PA;
+  }
+
+private:
+  int Count;
+  PassT P;
+};
+
+template <typename PassT>
+RepeatedPass<PassT> createRepeatedPass(int Count, PassT P) {
+  return RepeatedPass<PassT>(Count, std::move(P));
+}
+
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/IR/PassManagerInternal.h b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
index 4351b5888283..02f21675fa9d 100644
--- a/contrib/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/contrib/llvm/include/llvm/IR/PassManagerInternal.h
@@ -20,10 +20,14 @@
 
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include <memory>
+#include <utility>
 
 namespace llvm {
 
-template <typename IRUnitT> class AnalysisManager;
+template <typename IRUnitT> class AllAnalysesOn;
+template <typename IRUnitT, typename... ExtraArgTs> class AnalysisManager;
+class Invalidator;
 class PreservedAnalyses;
 
 /// \brief Implementation details of the pass manager interfaces.
@@ -31,16 +35,18 @@ namespace detail {
 
 /// \brief Template for the abstract base class used to dispatch
 /// polymorphically over pass objects.
-template <typename IRUnitT> struct PassConcept {
+template <typename IRUnitT, typename AnalysisManagerT, typename... ExtraArgTs>
+struct PassConcept {
   // Boiler plate necessary for the container of derived classes.
-  virtual ~PassConcept() {}
+  virtual ~PassConcept() = default;
 
   /// \brief The polymorphic API which runs the pass over a given IR entity.
   ///
   /// Note that actual pass object can omit the analysis manager argument if
   /// desired. Also that the analysis manager may be null if there is no
   /// analysis manager in the pass pipeline.
-  virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManager<IRUnitT> &AM) = 0;
+  virtual PreservedAnalyses run(IRUnitT &IR, AnalysisManagerT &AM,
+                                ExtraArgTs... ExtraArgs) = 0;
 
   /// \brief Polymorphic method to access the name of a pass.
   virtual StringRef name() = 0;
@@ -50,28 +56,33 @@ template <typename IRUnitT> struct PassConcept {
 ///
 /// Can be instantiated for any object which provides a \c run method accepting
 /// an \c IRUnitT& and an \c AnalysisManager<IRUnit>&. It requires the pass to
-/// be a copyable object. When the
-template <typename IRUnitT, typename PassT,
-          typename PreservedAnalysesT = PreservedAnalyses>
-struct PassModel : PassConcept<IRUnitT> {
+/// be a copyable object.
+template <typename IRUnitT, typename PassT, typename PreservedAnalysesT,
+          typename AnalysisManagerT, typename... ExtraArgTs>
+struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
   explicit PassModel(PassT Pass) : Pass(std::move(Pass)) {}
   // We have to explicitly define all the special member functions because MSVC
   // refuses to generate them.
   PassModel(const PassModel &Arg) : Pass(Arg.Pass) {}
   PassModel(PassModel &&Arg) : Pass(std::move(Arg.Pass)) {}
+
   friend void swap(PassModel &LHS, PassModel &RHS) {
     using std::swap;
     swap(LHS.Pass, RHS.Pass);
   }
+
   PassModel &operator=(PassModel RHS) {
     swap(*this, RHS);
     return *this;
   }
 
-  PreservedAnalysesT run(IRUnitT &IR, AnalysisManager<IRUnitT> &AM) override {
-    return Pass.run(IR, AM);
+  PreservedAnalysesT run(IRUnitT &IR, AnalysisManagerT &AM,
+                         ExtraArgTs... ExtraArgs) override {
+    return Pass.run(IR, AM, ExtraArgs...);
   }
+
   StringRef name() override { return PassT::name(); }
+
   PassT Pass;
 };
 
@@ -79,38 +90,63 @@ struct PassModel : PassConcept<IRUnitT> {
 ///
 /// This concept is parameterized over the IR unit that this result pertains
 /// to.
-template <typename IRUnitT> struct AnalysisResultConcept {
-  virtual ~AnalysisResultConcept() {}
+template <typename IRUnitT, typename PreservedAnalysesT, typename InvalidatorT>
+struct AnalysisResultConcept {
+  virtual ~AnalysisResultConcept() = default;
 
   /// \brief Method to try and mark a result as invalid.
   ///
   /// When the outer analysis manager detects a change in some underlying
   /// unit of the IR, it will call this method on all of the results cached.
   ///
-  /// This method also receives a set of preserved analyses which can be used
-  /// to avoid invalidation because the pass which changed the underlying IR
-  /// took care to update or preserve the analysis result in some way.
+  /// \p PA is a set of preserved analyses which can be used to avoid
+  /// invalidation because the pass which changed the underlying IR took care
+  /// to update or preserve the analysis result in some way.
+  ///
+  /// \p Inv is typically a \c AnalysisManager::Invalidator object that can be
+  /// used by a particular analysis result to discover if other analyses
+  /// results are also invalidated in the event that this result depends on
+  /// them. See the documentation in the \c AnalysisManager for more details.
   ///
   /// \returns true if the result is indeed invalid (the default).
-  virtual bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA) = 0;
+  virtual bool invalidate(IRUnitT &IR, const PreservedAnalysesT &PA,
+                          InvalidatorT &Inv) = 0;
 };
 
 /// \brief SFINAE metafunction for computing whether \c ResultT provides an
 /// \c invalidate member function.
 template <typename IRUnitT, typename ResultT> class ResultHasInvalidateMethod {
-  typedef char SmallType;
-  struct BigType {
+  typedef char EnabledType;
+  struct DisabledType {
     char a, b;
   };
 
-  template <typename T, bool (T::*)(IRUnitT &, const PreservedAnalyses &)>
-  struct Checker;
-
-  template <typename T> static SmallType f(Checker<T, &T::invalidate> *);
-  template <typename T> static BigType f(...);
+  // Purely to help out MSVC which fails to disable the below specialization,
+  // explicitly enable using the result type's invalidate routine if we can
+  // successfully call that routine.
+  template <typename T> struct Nonce { typedef EnabledType Type; };
+  template <typename T>
+  static typename Nonce<decltype(std::declval<T>().invalidate(
+      std::declval<IRUnitT &>(), std::declval<PreservedAnalyses>()))>::Type
+      check(rank<2>);
+
+  // First we define an overload that can only be taken if there is no
+  // invalidate member. We do this by taking the address of an invalidate
+  // member in an adjacent base class of a derived class. This would be
+  // ambiguous if there were an invalidate member in the result type.
+  template <typename T, typename U> static DisabledType NonceFunction(T U::*);
+  struct CheckerBase { int invalidate; };
+  template <typename T> struct Checker : CheckerBase, T {};
+  template <typename T>
+  static decltype(NonceFunction(&Checker<T>::invalidate)) check(rank<1>);
+
+  // Now we have the fallback that will only be reached when there is an
+  // invalidate member, and enables the trait.
+  template <typename T>
+  static EnabledType check(rank<0>);
 
 public:
-  enum { Value = sizeof(f<ResultT>(nullptr)) == sizeof(SmallType) };
+  enum { Value = sizeof(check<ResultT>(rank<2>())) == sizeof(EnabledType) };
 };
 
 /// \brief Wrapper to model the analysis result concept.
@@ -120,7 +156,7 @@ public:
 /// an invalidation handler. It is only selected when the invalidation handler
 /// is not part of the ResultT's interface.
 template <typename IRUnitT, typename PassT, typename ResultT,
-          typename PreservedAnalysesT = PreservedAnalyses,
+          typename PreservedAnalysesT, typename InvalidatorT,
           bool HasInvalidateHandler =
               ResultHasInvalidateMethod<IRUnitT, ResultT>::Value>
 struct AnalysisResultModel;
@@ -128,19 +164,22 @@ struct AnalysisResultModel;
 /// \brief Specialization of \c AnalysisResultModel which provides the default
 /// invalidate functionality.
 template <typename IRUnitT, typename PassT, typename ResultT,
-          typename PreservedAnalysesT>
-struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT, false>
-    : AnalysisResultConcept<IRUnitT> {
+          typename PreservedAnalysesT, typename InvalidatorT>
+struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT,
+                           InvalidatorT, false>
+    : AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT> {
   explicit AnalysisResultModel(ResultT Result) : Result(std::move(Result)) {}
   // We have to explicitly define all the special member functions because MSVC
   // refuses to generate them.
   AnalysisResultModel(const AnalysisResultModel &Arg) : Result(Arg.Result) {}
   AnalysisResultModel(AnalysisResultModel &&Arg)
       : Result(std::move(Arg.Result)) {}
+
   friend void swap(AnalysisResultModel &LHS, AnalysisResultModel &RHS) {
     using std::swap;
     swap(LHS.Result, RHS.Result);
   }
+
   AnalysisResultModel &operator=(AnalysisResultModel RHS) {
     swap(*this, RHS);
     return *this;
@@ -151,8 +190,11 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT, false>
   // FIXME: We should actually use two different concepts for analysis results
   // rather than two different models, and avoid the indirect function call for
   // ones that use the trivial behavior.
-  bool invalidate(IRUnitT &, const PreservedAnalysesT &PA) override {
-    return !PA.preserved(PassT::ID());
+  bool invalidate(IRUnitT &, const PreservedAnalysesT &PA,
+                  InvalidatorT &) override {
+    auto PAC = PA.template getChecker<PassT>();
+    return !PAC.preserved() &&
+           !PAC.template preservedSet<AllAnalysesOn<IRUnitT>>();
   }
 
   ResultT Result;
@@ -161,27 +203,31 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT, false>
 /// \brief Specialization of \c AnalysisResultModel which delegates invalidate
 /// handling to \c ResultT.
 template <typename IRUnitT, typename PassT, typename ResultT,
-          typename PreservedAnalysesT>
-struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT, true>
-    : AnalysisResultConcept<IRUnitT> {
+          typename PreservedAnalysesT, typename InvalidatorT>
+struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT,
+                           InvalidatorT, true>
+    : AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT> {
   explicit AnalysisResultModel(ResultT Result) : Result(std::move(Result)) {}
   // We have to explicitly define all the special member functions because MSVC
   // refuses to generate them.
   AnalysisResultModel(const AnalysisResultModel &Arg) : Result(Arg.Result) {}
   AnalysisResultModel(AnalysisResultModel &&Arg)
       : Result(std::move(Arg.Result)) {}
+
   friend void swap(AnalysisResultModel &LHS, AnalysisResultModel &RHS) {
     using std::swap;
     swap(LHS.Result, RHS.Result);
   }
+
   AnalysisResultModel &operator=(AnalysisResultModel RHS) {
     swap(*this, RHS);
     return *this;
   }
 
   /// \brief The model delegates to the \c ResultT method.
-  bool invalidate(IRUnitT &IR, const PreservedAnalysesT &PA) override {
-    return Result.invalidate(IR, PA);
+  bool invalidate(IRUnitT &IR, const PreservedAnalysesT &PA,
+                  InvalidatorT &Inv) override {
+    return Result.invalidate(IR, PA, Inv);
   }
 
   ResultT Result;
@@ -191,14 +237,18 @@ struct AnalysisResultModel<IRUnitT, PassT, ResultT, PreservedAnalysesT, true>
 ///
 /// This concept is parameterized over the IR unit that it can run over and
 /// produce an analysis result.
-template <typename IRUnitT> struct AnalysisPassConcept {
-  virtual ~AnalysisPassConcept() {}
+template <typename IRUnitT, typename PreservedAnalysesT, typename InvalidatorT,
+          typename... ExtraArgTs>
+struct AnalysisPassConcept {
+  virtual ~AnalysisPassConcept() = default;
 
   /// \brief Method to run this analysis over a unit of IR.
   /// \returns A unique_ptr to the analysis result object to be queried by
   /// users.
-  virtual std::unique_ptr<AnalysisResultConcept<IRUnitT>>
-  run(IRUnitT &IR, AnalysisManager<IRUnitT> &AM) = 0;
+  virtual std::unique_ptr<
+      AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT>>
+  run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
+      ExtraArgTs... ExtraArgs) = 0;
 
   /// \brief Polymorphic method to access the name of a pass.
   virtual StringRef name() = 0;
@@ -209,32 +259,39 @@ template <typename IRUnitT> struct AnalysisPassConcept {
 /// Can wrap any type which implements a suitable \c run method. The method
 /// must accept an \c IRUnitT& and an \c AnalysisManager<IRUnitT>& as arguments
 /// and produce an object which can be wrapped in a \c AnalysisResultModel.
-template <typename IRUnitT, typename PassT>
-struct AnalysisPassModel : AnalysisPassConcept<IRUnitT> {
+template <typename IRUnitT, typename PassT, typename PreservedAnalysesT,
+          typename InvalidatorT, typename... ExtraArgTs>
+struct AnalysisPassModel : AnalysisPassConcept<IRUnitT, PreservedAnalysesT,
+                                               InvalidatorT, ExtraArgTs...> {
   explicit AnalysisPassModel(PassT Pass) : Pass(std::move(Pass)) {}
   // We have to explicitly define all the special member functions because MSVC
   // refuses to generate them.
   AnalysisPassModel(const AnalysisPassModel &Arg) : Pass(Arg.Pass) {}
   AnalysisPassModel(AnalysisPassModel &&Arg) : Pass(std::move(Arg.Pass)) {}
+
   friend void swap(AnalysisPassModel &LHS, AnalysisPassModel &RHS) {
     using std::swap;
     swap(LHS.Pass, RHS.Pass);
   }
+
   AnalysisPassModel &operator=(AnalysisPassModel RHS) {
     swap(*this, RHS);
     return *this;
   }
 
   // FIXME: Replace PassT::Result with type traits when we use C++11.
-  typedef AnalysisResultModel<IRUnitT, PassT, typename PassT::Result>
+  typedef AnalysisResultModel<IRUnitT, PassT, typename PassT::Result,
+                              PreservedAnalysesT, InvalidatorT>
       ResultModelT;
 
   /// \brief The model delegates to the \c PassT::run method.
   ///
   /// The return is wrapped in an \c AnalysisResultModel.
-  std::unique_ptr<AnalysisResultConcept<IRUnitT>>
-  run(IRUnitT &IR, AnalysisManager<IRUnitT> &AM) override {
-    return make_unique<ResultModelT>(Pass.run(IR, AM));
+  std::unique_ptr<
+      AnalysisResultConcept<IRUnitT, PreservedAnalysesT, InvalidatorT>>
+  run(IRUnitT &IR, AnalysisManager<IRUnitT, ExtraArgTs...> &AM,
+      ExtraArgTs... ExtraArgs) override {
+    return make_unique<ResultModelT>(Pass.run(IR, AM, ExtraArgs...));
   }
 
   /// \brief The model delegates to a static \c PassT::name method.
@@ -245,7 +302,8 @@ struct AnalysisPassModel : AnalysisPassConcept<IRUnitT> {
   PassT Pass;
 };
 
-} // End namespace detail
-}
+} // end namespace detail
+
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_PASSMANAGERINTERNAL_H
diff --git a/contrib/llvm/include/llvm/IR/PatternMatch.h b/contrib/llvm/include/llvm/IR/PatternMatch.h
index 7da9afcf9463..a30fc97e98ef 100644
--- a/contrib/llvm/include/llvm/IR/PatternMatch.h
+++ b/contrib/llvm/include/llvm/IR/PatternMatch.h
@@ -294,10 +294,10 @@ template <typename Class> struct bind_ty {
 
 /// \brief Match a value, capturing it if we match.
 inline bind_ty<Value> m_Value(Value *&V) { return V; }
+inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
 
 /// \brief Match an instruction, capturing it if we match.
 inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
-
 /// \brief Match a binary operator, capturing it if we match.
 inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
 
@@ -682,7 +682,7 @@ template <typename SubPattern_t> struct Exact_match {
   Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
 
   template <typename OpTy> bool match(OpTy *V) {
-    if (PossiblyExactOperator *PEO = dyn_cast<PossiblyExactOperator>(V))
+    if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
       return PEO->isExact() && SubPattern.match(V);
     return false;
   }
@@ -706,7 +706,7 @@ struct CmpClass_match {
       : Predicate(Pred), L(LHS), R(RHS) {}
 
   template <typename OpTy> bool match(OpTy *V) {
-    if (Class *I = dyn_cast<Class>(V))
+    if (auto *I = dyn_cast<Class>(V))
       if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
         Predicate = I->getPredicate();
         return true;
@@ -1349,6 +1349,35 @@ m_c_Xor(const LHS &L, const RHS &R) {
   return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
 }
 
+/// Matches an SMin with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>,
+                        MaxMin_match<ICmpInst, RHS, LHS, smin_pred_ty>>
+m_c_SMin(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_SMin(L, R), m_SMin(R, L));
+}
+/// Matches an SMax with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>,
+                        MaxMin_match<ICmpInst, RHS, LHS, smax_pred_ty>>
+m_c_SMax(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_SMax(L, R), m_SMax(R, L));
+}
+/// Matches a UMin with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>,
+                        MaxMin_match<ICmpInst, RHS, LHS, umin_pred_ty>>
+m_c_UMin(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_UMin(L, R), m_UMin(R, L));
+}
+/// Matches a UMax with LHS and RHS in either order.
+template <typename LHS, typename RHS>
+inline match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>,
+                        MaxMin_match<ICmpInst, RHS, LHS, umax_pred_ty>>
+m_c_UMax(const LHS &L, const RHS &R) {
+  return m_CombineOr(m_UMax(L, R), m_UMax(R, L));
+}
+
 } // end namespace PatternMatch
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/IR/Statepoint.h b/contrib/llvm/include/llvm/IR/Statepoint.h
index 5cd7fe1b576c..916faa4b327e 100644
--- a/contrib/llvm/include/llvm/IR/Statepoint.h
+++ b/contrib/llvm/include/llvm/IR/Statepoint.h
@@ -1,4 +1,4 @@
-//===-- llvm/IR/Statepoint.h - gc.statepoint utilities ------ --*- C++ -*-===//
+//===-- llvm/IR/Statepoint.h - gc.statepoint utilities ----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -19,6 +19,7 @@
 
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -26,21 +27,33 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <vector>
 
 namespace llvm {
+
 /// The statepoint intrinsic accepts a set of flags as its third argument.
 /// Valid values come out of this set.
 enum class StatepointFlags {
   None = 0,
   GCTransition = 1, ///< Indicates that this statepoint is a transition from
                     ///< GC-aware code to code that is not GC-aware.
-
-  MaskAll = GCTransition ///< A bitmask that includes all valid flags.
+  /// Mark the deopt arguments associated with the statepoint as only being
+  /// "live-in". By default, deopt arguments are "live-through".  "live-through"
+  /// requires that they the value be live on entry, on exit, and at any point
+  /// during the call.  "live-in" only requires the value be available at the
+  /// start of the call.  In particular, "live-in" values can be placed in
+  /// unused argument registers or other non-callee saved registers.
+  DeoptLiveIn = 2,
+
+  MaskAll = 3 ///< A bitmask that includes all valid flags.
 };
 
 class GCRelocateInst;
 class GCResultInst;
-class ImmutableStatepoint;
 
 bool isStatepoint(ImmutableCallSite CS);
 bool isStatepoint(const Value *V);
@@ -59,8 +72,6 @@ template <typename FunTy, typename InstructionTy, typename ValueTy,
           typename CallSiteTy>
 class StatepointBase {
   CallSiteTy StatepointCS;
-  void *operator new(size_t, unsigned) = delete;
-  void *operator new(size_t s) = delete;
 
 protected:
   explicit StatepointBase(InstructionTy *I) {
@@ -69,6 +80,7 @@ protected:
       assert(StatepointCS && "isStatepoint implies CallSite");
     }
   }
+
   explicit StatepointBase(CallSiteTy CS) {
     if (isStatepoint(CS))
       StatepointCS = CS;
@@ -86,6 +98,9 @@ public:
     CallArgsBeginPos = 5,
   };
 
+  void *operator new(size_t, unsigned) = delete;
+  void *operator new(size_t s) = delete;
+
   explicit operator bool() const {
     // We do not assign non-statepoint CallSites to StatepointCS.
     return (bool)StatepointCS;
@@ -444,6 +459,7 @@ StatepointDirectives parseStatepointDirectivesFromAttrs(AttributeSet AS);
 /// Return \c true if the the \p Attr is an attribute that is a statepoint
 /// directive.
 bool isStatepointDirectiveAttr(Attribute Attr);
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_IR_STATEPOINT_H
diff --git a/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h b/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
index 60e04e2f9eca..5c6d58affd7a 100644
--- a/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
+++ b/contrib/llvm/include/llvm/IR/SymbolTableListTraits.h
@@ -30,14 +30,6 @@
 namespace llvm {
 class ValueSymbolTable;
 
-template <typename NodeTy> class ilist_iterator;
-template <typename NodeTy, typename Traits> class iplist;
-template <typename Ty> struct ilist_traits;
-
-template <typename NodeTy>
-struct SymbolTableListSentinelTraits
-    : public ilist_embedded_sentinel_traits<NodeTy> {};
-
 /// Template metafunction to get the parent type for a symbol table list.
 ///
 /// Implementations create a typedef called \c type so that we only need a
@@ -68,11 +60,9 @@ template <typename NodeTy> class SymbolTableList;
 // ItemParentClass - The type of object that owns the list, e.g. BasicBlock.
 //
 template <typename ValueSubClass>
-class SymbolTableListTraits
-    : public ilist_nextprev_traits<ValueSubClass>,
-      public SymbolTableListSentinelTraits<ValueSubClass>,
-      public ilist_node_traits<ValueSubClass> {
+class SymbolTableListTraits : public ilist_alloc_traits<ValueSubClass> {
   typedef SymbolTableList<ValueSubClass> ListTy;
+  typedef typename simple_ilist<ValueSubClass>::iterator iterator;
   typedef
       typename SymbolTableListParentType<ValueSubClass>::type ItemParentClass;
 
@@ -101,10 +91,9 @@ private:
 public:
   void addNodeToList(ValueSubClass *V);
   void removeNodeFromList(ValueSubClass *V);
-  void transferNodesFromList(SymbolTableListTraits &L2,
-                             ilist_iterator<ValueSubClass> first,
-                             ilist_iterator<ValueSubClass> last);
-//private:
+  void transferNodesFromList(SymbolTableListTraits &L2, iterator first,
+                             iterator last);
+  // private:
   template<typename TPtr>
   void setSymTabObject(TPtr *, TPtr);
   static ValueSymbolTable *toPtr(ValueSymbolTable *P) { return P; }
@@ -116,8 +105,9 @@ public:
 /// When nodes are inserted into and removed from this list, the associated
 /// symbol table will be automatically updated.  Similarly, parent links get
 /// updated automatically.
-template <typename NodeTy>
-class SymbolTableList : public iplist<NodeTy, SymbolTableListTraits<NodeTy>> {};
+template <class T>
+class SymbolTableList
+    : public iplist_impl<simple_ilist<T>, SymbolTableListTraits<T>> {};
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/include/llvm/IR/Type.h b/contrib/llvm/include/llvm/IR/Type.h
index ef7ad733f47a..778ee06169f1 100644
--- a/contrib/llvm/include/llvm/IR/Type.h
+++ b/contrib/llvm/include/llvm/IR/Type.h
@@ -81,6 +81,8 @@ private:
 
   TypeID   ID : 8;            // The current base type of this type.
   unsigned SubclassData : 24; // Space for subclasses to store data.
+                              // Note that this should be synchronized with
+                              // MAX_INT_BITS value in IntegerType class.
 
 protected:
   friend class LLVMContextImpl;
@@ -108,7 +110,7 @@ protected:
   Type * const *ContainedTys;
 
   static bool isSequentialType(TypeID TyID) {
-    return TyID == ArrayTyID || TyID == PointerTyID || TyID == VectorTyID;
+    return TyID == ArrayTyID || TyID == VectorTyID;
   }
 
 public:
@@ -164,12 +166,12 @@ public:
 
   const fltSemantics &getFltSemantics() const {
     switch (getTypeID()) {
-    case HalfTyID: return APFloat::IEEEhalf;
-    case FloatTyID: return APFloat::IEEEsingle;
-    case DoubleTyID: return APFloat::IEEEdouble;
-    case X86_FP80TyID: return APFloat::x87DoubleExtended;
-    case FP128TyID: return APFloat::IEEEquad;
-    case PPC_FP128TyID: return APFloat::PPCDoubleDouble;
+    case HalfTyID: return APFloat::IEEEhalf();
+    case FloatTyID: return APFloat::IEEEsingle();
+    case DoubleTyID: return APFloat::IEEEdouble();
+    case X86_FP80TyID: return APFloat::x87DoubleExtended();
+    case FP128TyID: return APFloat::IEEEquad();
+    case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
     default: llvm_unreachable("Invalid floating type");
     }
   }
@@ -342,12 +344,21 @@ public:
   }
 
   inline uint64_t getArrayNumElements() const;
-  Type *getArrayElementType() const { return getSequentialElementType(); }
+  Type *getArrayElementType() const {
+    assert(getTypeID() == ArrayTyID);
+    return ContainedTys[0];
+  }
 
   inline unsigned getVectorNumElements() const;
-  Type *getVectorElementType() const { return getSequentialElementType(); }
+  Type *getVectorElementType() const {
+    assert(getTypeID() == VectorTyID);
+    return ContainedTys[0];
+  }
 
-  Type *getPointerElementType() const { return getSequentialElementType(); }
+  Type *getPointerElementType() const {
+    assert(getTypeID() == PointerTyID);
+    return ContainedTys[0];
+  }
 
   /// Get the address space of this pointer or pointer vector type.
   inline unsigned getPointerAddressSpace() const;
@@ -429,29 +440,21 @@ template <> struct isa_impl<PointerType, Type> {
 // graph of sub types.
 
 template <> struct GraphTraits<Type *> {
-  typedef Type NodeType;
+  typedef Type *NodeRef;
   typedef Type::subtype_iterator ChildIteratorType;
 
-  static inline NodeType *getEntryNode(Type *T) { return T; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->subtype_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->subtype_end();
-  }
+  static NodeRef getEntryNode(Type *T) { return T; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->subtype_end(); }
 };
 
 template <> struct GraphTraits<const Type*> {
-  typedef const Type NodeType;
+  typedef const Type *NodeRef;
   typedef Type::subtype_iterator ChildIteratorType;
 
-  static inline NodeType *getEntryNode(NodeType *T) { return T; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->subtype_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->subtype_end();
-  }
+  static NodeRef getEntryNode(NodeRef T) { return T; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->subtype_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->subtype_end(); }
 };
 
 // Create wrappers for C Binding types (see CBindingWrapping.h).
diff --git a/contrib/llvm/include/llvm/IR/Use.h b/contrib/llvm/include/llvm/IR/Use.h
index e62eab56b1f1..ff6b2e1f1e22 100644
--- a/contrib/llvm/include/llvm/IR/Use.h
+++ b/contrib/llvm/include/llvm/IR/Use.h
@@ -27,7 +27,7 @@
 
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/Support/CBindingWrapping.h"
-#include <cstddef>
+#include "llvm-c/Types.h"
 
 namespace llvm {
 
@@ -36,16 +36,6 @@ class User;
 class Use;
 template <typename> struct simplify_type;
 
-// Use** is only 4-byte aligned.
-template <> class PointerLikeTypeTraits<Use **> {
-public:
-  static inline void *getAsVoidPointer(Use **P) { return P; }
-  static inline Use **getFromVoidPointer(void *P) {
-    return static_cast<Use **>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
 /// \brief A Use represents the edge between a Value definition and its users.
 ///
 /// This is notionally a two-dimensional linked list. It supports traversing
@@ -65,6 +55,8 @@ public:
 /// time complexity.
 class Use {
 public:
+  Use(const Use &U) = delete;
+
   /// \brief Provide a fast substitute to std::swap<Use>
   /// that also works with less standard-compliant compilers
   void swap(Use &RHS);
@@ -74,8 +66,6 @@ public:
   typedef PointerIntPair<User *, 1, unsigned> UserRef;
 
 private:
-  Use(const Use &U) = delete;
-
   /// Destructor - Only for zap()
   ~Use() {
     if (Val)
@@ -128,6 +118,7 @@ private:
   PointerIntPair<Use **, 2, PrevPtrTag> Prev;
 
   void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); }
+
   void addToList(Use **List) {
     Next = *List;
     if (Next)
@@ -135,6 +126,7 @@ private:
     setPrev(List);
     *List = this;
   }
+
   void removeFromList() {
     Use **StrippedPrev = Prev.getPointer();
     *StrippedPrev = Next;
@@ -159,6 +151,6 @@ template <> struct simplify_type<const Use> {
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_SIMPLE_CONVERSION_FUNCTIONS(Use, LLVMUseRef)
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_USE_H
diff --git a/contrib/llvm/include/llvm/IR/UseListOrder.h b/contrib/llvm/include/llvm/IR/UseListOrder.h
index b86425b6a697..efff208295b6 100644
--- a/contrib/llvm/include/llvm/IR/UseListOrder.h
+++ b/contrib/llvm/include/llvm/IR/UseListOrder.h
@@ -34,18 +34,8 @@ struct UseListOrder {
       : V(V), F(F), Shuffle(ShuffleSize) {}
 
   UseListOrder() : V(nullptr), F(nullptr) {}
-  UseListOrder(UseListOrder &&X)
-      : V(X.V), F(X.F), Shuffle(std::move(X.Shuffle)) {}
-  UseListOrder &operator=(UseListOrder &&X) {
-    V = X.V;
-    F = X.F;
-    Shuffle = std::move(X.Shuffle);
-    return *this;
-  }
-
-private:
-  UseListOrder(const UseListOrder &X) = delete;
-  UseListOrder &operator=(const UseListOrder &X) = delete;
+  UseListOrder(UseListOrder &&) = default;
+  UseListOrder &operator=(UseListOrder &&) = default;
 };
 
 typedef std::vector<UseListOrder> UseListOrderStack;
diff --git a/contrib/llvm/include/llvm/IR/User.h b/contrib/llvm/include/llvm/IR/User.h
index 4d6b30cd1124..e6fe97484580 100644
--- a/contrib/llvm/include/llvm/IR/User.h
+++ b/contrib/llvm/include/llvm/IR/User.h
@@ -21,9 +21,15 @@
 
 #include "llvm/ADT/iterator.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/IR/Use.h"
 #include "llvm/IR/Value.h"
-#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
 
 namespace llvm {
 
@@ -37,9 +43,9 @@ template <class>
 struct OperandTraits;
 
 class User : public Value {
-  User(const User &) = delete;
   template <unsigned>
   friend struct HungoffOperandTraits;
+
   virtual void anchor();
 
   LLVM_ATTRIBUTE_ALWAYS_INLINE inline static void *
@@ -88,8 +94,9 @@ protected:
   void growHungoffUses(unsigned N, bool IsPhi = false);
 
 public:
-  ~User() override {
-  }
+  User(const User &) = delete;
+  ~User() override = default;
+
   /// \brief Free memory allocated for User and Use objects.
   void operator delete(void *Usr);
   /// \brief Placement delete - required by std, but never called.
@@ -100,6 +107,7 @@ public:
   void operator delete(void*, unsigned, bool) {
     llvm_unreachable("Constructor throws?");
   }
+
 protected:
   template <int Idx, typename U> static Use &OpFrom(const U *that) {
     return Idx < 0
@@ -112,6 +120,7 @@ protected:
   template <int Idx> const Use &Op() const {
     return OpFrom<Idx>(this);
   }
+
 private:
   Use *&getHungOffOperands() { return *(reinterpret_cast<Use **>(this) - 1); }
 
@@ -124,6 +133,7 @@ private:
            "Setting operand list only required for hung off uses");
     getHungOffOperands() = NewList;
   }
+
 public:
   Use *getOperandList() {
     return HasHungOffUses ? getHungOffOperands() : getIntrusiveOperands();
@@ -131,10 +141,12 @@ public:
   const Use *getOperandList() const {
     return const_cast<User *>(this)->getOperandList();
   }
+
   Value *getOperand(unsigned i) const {
     assert(i < NumUserOperands && "getOperand() out of range!");
     return getOperandList()[i];
   }
+
   void setOperand(unsigned i, Value *Val) {
     assert(i < NumUserOperands && "setOperand() out of range!");
     assert((!isa<Constant>((const Value*)this) ||
@@ -142,6 +154,7 @@ public:
            "Cannot mutate a constant with setOperand!");
     getOperandList()[i] = Val;
   }
+
   const Use &getOperandUse(unsigned i) const {
     assert(i < NumUserOperands && "getOperandUse() out of range!");
     return getOperandList()[i];
@@ -250,9 +263,9 @@ public:
   }
 };
 // Either Use objects, or a Use pointer can be prepended to User.
-static_assert(AlignOf<Use>::Alignment >= AlignOf<User>::Alignment,
+static_assert(alignof(Use) >= alignof(User),
               "Alignment is insufficient after objects prepended to User");
-static_assert(AlignOf<Use *>::Alignment >= AlignOf<User>::Alignment,
+static_assert(alignof(Use *) >= alignof(User),
               "Alignment is insufficient after objects prepended to User");
 
 template<> struct simplify_type<User::op_iterator> {
@@ -268,6 +281,6 @@ template<> struct simplify_type<User::const_op_iterator> {
   }
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_USER_H
diff --git a/contrib/llvm/include/llvm/IR/Value.h b/contrib/llvm/include/llvm/IR/Value.h
index f3a342dadf73..bdafbbf58cc4 100644
--- a/contrib/llvm/include/llvm/IR/Value.h
+++ b/contrib/llvm/include/llvm/IR/Value.h
@@ -18,12 +18,14 @@
 #include "llvm/IR/Use.h"
 #include "llvm/Support/CBindingWrapping.h"
 #include "llvm/Support/Casting.h"
+#include "llvm-c/Types.h"
+#include <cassert>
+#include <iterator>
 
 namespace llvm {
 
 class APInt;
 class Argument;
-class AssemblyAnnotationWriter;
 class BasicBlock;
 class Constant;
 class ConstantData;
@@ -41,12 +43,10 @@ class Instruction;
 class LLVMContext;
 class Module;
 class ModuleSlotTracker;
+class raw_ostream;
 class StringRef;
 class Twine;
 class Type;
-class ValueHandleBase;
-class ValueSymbolTable;
-class raw_ostream;
 
 template<typename ValueTy> class StringMapEntry;
 typedef StringMapEntry<Value*> ValueName;
@@ -77,6 +77,7 @@ class Value {
 
   const unsigned char SubclassID;   // Subclass identifier (for isa/dyn_cast)
   unsigned char HasValueHandle : 1; // Has a ValueHandle pointing to this?
+
 protected:
   /// \brief Hold subclass data that can be dropped.
   ///
@@ -134,6 +135,7 @@ private:
       U = U->getNext();
       return *this;
     }
+
     use_iterator_impl operator++(int) { // Postincrement
       auto tmp = *this;
       ++*this;
@@ -160,7 +162,7 @@ private:
     friend class Value;
 
   public:
-    user_iterator_impl() {}
+    user_iterator_impl() = default;
 
     bool operator==(const user_iterator_impl &x) const { return UI == x.UI; }
     bool operator!=(const user_iterator_impl &x) const { return !operator==(x); }
@@ -172,6 +174,7 @@ private:
       ++UI;
       return *this;
     }
+
     user_iterator_impl operator++(int) { // Postincrement
       auto tmp = *this;
       ++*this;
@@ -192,12 +195,12 @@ private:
     Use &getUse() const { return *UI; }
   };
 
-  void operator=(const Value &) = delete;
-  Value(const Value &) = delete;
-
 protected:
   Value(Type *Ty, unsigned scid);
+
 public:
+  Value(const Value &) = delete;
+  void operator=(const Value &) = delete;
   virtual ~Value();
 
   /// \brief Support for debugging, callable in GDB: V->dump()
@@ -236,13 +239,15 @@ public:
 
 private:
   void destroyValueName();
+  void doRAUW(Value *New, bool NoMetadata);
   void setNameImpl(const Twine &Name);
 
 public:
   /// \brief Return a constant reference to the value's name.
   ///
-  /// This is cheap and guaranteed to return the same reference as long as the
-  /// value is not modified.
+  /// This guaranteed to return the same reference as long as the value is not
+  /// modified.  If the value has a name, this does a hashtable lookup, so it's
+  /// not free.
   StringRef getName() const;
 
   /// \brief Change the name of the value.
@@ -252,7 +257,6 @@ public:
   /// \param Name The new name; or "" if the value's name should be removed.
   void setName(const Twine &Name);
 
-
   /// \brief Transfer the name from V to this value.
   ///
   /// After taking V's name, sets V's name to empty.
@@ -267,6 +271,12 @@ public:
   /// guaranteed to be empty.
   void replaceAllUsesWith(Value *V);
 
+  /// \brief Change non-metadata uses of this to point to a new Value.
+  ///
+  /// Go through the uses list for this definition and make each use point to
+  /// "V" instead of "this". This function skips metadata entries in the list.
+  void replaceNonMetadataUsesWith(Value *V);
+
   /// replaceUsesOutsideBlock - Go through the uses list for this definition and
   /// make each use point to "V" instead of "this" when the use is outside the
   /// block. 'This's use list is expected to have at least one element.
@@ -442,17 +452,18 @@ public:
     return SubclassOptionalData == V->SubclassOptionalData;
   }
 
-  /// \brief Clear any optional flags not set in the given Value.
-  void intersectOptionalDataWith(const Value *V) {
-    SubclassOptionalData &= V->SubclassOptionalData;
-  }
-
   /// \brief Return true if there is a value handle associated with this value.
   bool hasValueHandle() const { return HasValueHandle; }
 
   /// \brief Return true if there is metadata referencing this value.
   bool isUsedByMetadata() const { return IsUsedByMD; }
 
+  /// \brief Return true if this value is a swifterror value.
+  ///
+  /// swifterror values can be either a function argument or an alloca with a
+  /// swifterror attribute.
+  bool isSwiftError() const;
+
   /// \brief Strip off pointer casts, all-zero GEPs, and aliases.
   ///
   /// Returns the original uncasted value.  If this is called on a non-pointer
@@ -783,18 +794,6 @@ template <> struct isa_impl<GlobalObject, Value> {
   }
 };
 
-// Value* is only 4-byte aligned.
-template<>
-class PointerLikeTypeTraits<Value*> {
-  typedef Value* PT;
-public:
-  static inline void *getAsVoidPointer(PT P) { return P; }
-  static inline PT getFromVoidPointer(void *P) {
-    return static_cast<PT>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
 // Create wrappers for C Binding types (see CBindingWrapping.h).
 DEFINE_ISA_CONVERSION_FUNCTIONS(Value, LLVMValueRef)
 
@@ -805,9 +804,9 @@ inline Value **unwrap(LLVMValueRef *Vals) {
 
 template<typename T>
 inline T **unwrap(LLVMValueRef *Vals, unsigned Length) {
-#ifdef DEBUG
+#ifndef NDEBUG
   for (LLVMValueRef *I = Vals, *E = Vals + Length; I != E; ++I)
-    cast<T>(*I);
+    unwrap<T>(*I); // For side effect of calling assert on invalid usage.
 #endif
   (void)Length;
   return reinterpret_cast<T**>(Vals);
@@ -817,6 +816,6 @@ inline LLVMValueRef *wrap(const Value **Vals) {
   return reinterpret_cast<LLVMValueRef*>(const_cast<Value**>(Vals));
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_VALUE_H
diff --git a/contrib/llvm/include/llvm/IR/ValueHandle.h b/contrib/llvm/include/llvm/IR/ValueHandle.h
index 3c2805913ef5..a4d4893a9bc9 100644
--- a/contrib/llvm/include/llvm/IR/ValueHandle.h
+++ b/contrib/llvm/include/llvm/IR/ValueHandle.h
@@ -22,17 +22,6 @@ namespace llvm {
 class ValueHandleBase;
 template<typename From> struct simplify_type;
 
-// ValueHandleBase** is only 4-byte aligned.
-template<>
-class PointerLikeTypeTraits<ValueHandleBase**> {
-public:
-  static inline void *getAsVoidPointer(ValueHandleBase** P) { return P; }
-  static inline ValueHandleBase **getFromVoidPointer(void *P) {
-    return static_cast<ValueHandleBase**>(P);
-  }
-  enum { NumLowBitsAvailable = 2 };
-};
-
 /// \brief This is the common base class of value handles.
 ///
 /// ValueHandle's are smart pointers to Value's that have special behavior when
diff --git a/contrib/llvm/include/llvm/IR/ValueMap.h b/contrib/llvm/include/llvm/IR/ValueMap.h
index 85379ad468c4..9648e1989f94 100644
--- a/contrib/llvm/include/llvm/IR/ValueMap.h
+++ b/contrib/llvm/include/llvm/IR/ValueMap.h
@@ -27,14 +27,20 @@
 #define LLVM_IR_VALUEMAP_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/IR/TrackingMDRef.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/UniqueLock.h"
-#include "llvm/Support/type_traits.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
 #include <iterator>
-#include <memory>
+#include <type_traits>
+#include <utility>
 
 namespace llvm {
 
@@ -77,11 +83,12 @@ struct ValueMapConfig {
 };
 
 /// See the file comment.
-template<typename KeyT, typename ValueT, typename Config =ValueMapConfig<KeyT> >
+template<typename KeyT, typename ValueT, typename Config =ValueMapConfig<KeyT>>
 class ValueMap {
   friend class ValueMapCallbackVH<KeyT, ValueT, Config>;
+
   typedef ValueMapCallbackVH<KeyT, ValueT, Config> ValueMapCVH;
-  typedef DenseMap<ValueMapCVH, ValueT, DenseMapInfo<ValueMapCVH> > MapT;
+  typedef DenseMap<ValueMapCVH, ValueT, DenseMapInfo<ValueMapCVH>> MapT;
   typedef DenseMap<const Metadata *, TrackingMDRef> MDMapT;
   typedef typename Config::ExtraData ExtraData;
   MapT Map;
@@ -90,8 +97,6 @@ class ValueMap {
 
   bool MayMapMetadata = true;
 
-  ValueMap(const ValueMap&) = delete;
-  ValueMap& operator=(const ValueMap&) = delete;
 public:
   typedef KeyT key_type;
   typedef ValueT mapped_type;
@@ -102,6 +107,8 @@ public:
       : Map(NumInitBuckets), Data() {}
   explicit ValueMap(const ExtraData &Data, unsigned NumInitBuckets = 64)
       : Map(NumInitBuckets), Data(Data) {}
+  ValueMap(const ValueMap &) = delete;
+  ValueMap &operator=(const ValueMap &) = delete;
 
   bool hasMD() const { return bool(MDMap); }
   MDMapT &MD() {
@@ -183,7 +190,6 @@ public:
       insert(*I);
   }
 
-
   bool erase(const KeyT &Val) {
     typename MapT::iterator I = Map.find_as(Val);
     if (I == Map.end())
@@ -237,6 +243,7 @@ template <typename KeyT, typename ValueT, typename Config>
 class ValueMapCallbackVH final : public CallbackVH {
   friend class ValueMap<KeyT, ValueT, Config>;
   friend struct DenseMapInfo<ValueMapCallbackVH>;
+
   typedef ValueMap<KeyT, ValueT, Config> ValueMapT;
   typedef typename std::remove_pointer<KeyT>::type KeySansPointerT;
 
@@ -262,6 +269,7 @@ public:
     Config::onDelete(Copy.Map->Data, Copy.Unwrap());  // May destroy *this.
     Copy.Map->Map.erase(Copy);  // Definitely destroys *this.
   }
+
   void allUsesReplacedWith(Value *new_key) override {
     assert(isa<KeySansPointerT>(new_key) &&
            "Invalid RAUW on key of ValueMap<>");
@@ -289,30 +297,34 @@ public:
 };
 
 template<typename KeyT, typename ValueT, typename Config>
-struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config> > {
+struct DenseMapInfo<ValueMapCallbackVH<KeyT, ValueT, Config>> {
   typedef ValueMapCallbackVH<KeyT, ValueT, Config> VH;
 
   static inline VH getEmptyKey() {
     return VH(DenseMapInfo<Value *>::getEmptyKey());
   }
+
   static inline VH getTombstoneKey() {
     return VH(DenseMapInfo<Value *>::getTombstoneKey());
   }
+
   static unsigned getHashValue(const VH &Val) {
     return DenseMapInfo<KeyT>::getHashValue(Val.Unwrap());
   }
+
   static unsigned getHashValue(const KeyT &Val) {
     return DenseMapInfo<KeyT>::getHashValue(Val);
   }
+
   static bool isEqual(const VH &LHS, const VH &RHS) {
     return LHS == RHS;
   }
+
   static bool isEqual(const KeyT &LHS, const VH &RHS) {
     return LHS == RHS.getValPtr();
   }
 };
 
-
 template<typename DenseMapT, typename KeyT>
 class ValueMapIterator :
     public std::iterator<std::forward_iterator_tag,
@@ -320,10 +332,11 @@ class ValueMapIterator :
                          ptrdiff_t> {
   typedef typename DenseMapT::iterator BaseT;
   typedef typename DenseMapT::mapped_type ValueT;
+
   BaseT I;
+
 public:
   ValueMapIterator() : I() {}
-
   ValueMapIterator(BaseT I) : I(I) {}
 
   BaseT base() const { return I; }
@@ -369,7 +382,9 @@ class ValueMapConstIterator :
                          ptrdiff_t> {
   typedef typename DenseMapT::const_iterator BaseT;
   typedef typename DenseMapT::mapped_type ValueT;
+
   BaseT I;
+
 public:
   ValueMapConstIterator() : I() {}
   ValueMapConstIterator(BaseT I) : I(I) {}
@@ -414,4 +429,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_IR_VALUEMAP_H
diff --git a/contrib/llvm/include/llvm/IR/Verifier.h b/contrib/llvm/include/llvm/IR/Verifier.h
index fdb6ce400a8d..71f727c3d4fc 100644
--- a/contrib/llvm/include/llvm/IR/Verifier.h
+++ b/contrib/llvm/include/llvm/IR/Verifier.h
@@ -30,6 +30,49 @@ class FunctionPass;
 class ModulePass;
 class Module;
 class raw_ostream;
+struct VerifierSupport;
+
+/// Verify that the TBAA Metadatas are valid.
+class TBAAVerifier {
+  VerifierSupport *Diagnostic = nullptr;
+
+  /// Helper to diagnose a failure
+  template <typename... Tys> void CheckFailed(Tys &&... Args);
+
+  /// Cache of TBAA base nodes that have already been visited.  This cachce maps
+  /// a node that has been visited to a pair (IsInvalid, BitWidth) where
+  ///
+  ///  \c IsInvalid is true iff the node is invalid.
+  ///  \c BitWidth, if non-zero, is the bitwidth of the integer used to denoting
+  ///    the offset of the access.  If zero, only a zero offset is allowed.
+  ///
+  /// \c BitWidth has no meaning if \c IsInvalid is true.
+  typedef std::pair<bool, unsigned> TBAABaseNodeSummary;
+  DenseMap<const MDNode *, TBAABaseNodeSummary> TBAABaseNodes;
+
+  /// Maps an alleged scalar TBAA node to a boolean that is true if the said
+  /// TBAA node is a valid scalar TBAA node or false otherwise.
+  DenseMap<const MDNode *, bool> TBAAScalarNodes;
+
+  /// \name Helper functions used by \c visitTBAAMetadata.
+  /// @{
+  MDNode *getFieldNodeFromTBAABaseNode(Instruction &I, const MDNode *BaseNode,
+                                       APInt &Offset);
+  TBAAVerifier::TBAABaseNodeSummary verifyTBAABaseNode(Instruction &I,
+                                                       const MDNode *BaseNode);
+  TBAABaseNodeSummary verifyTBAABaseNodeImpl(Instruction &I,
+                                             const MDNode *BaseNode);
+
+  bool isValidScalarTBAANode(const MDNode *MD);
+  /// @}
+
+public:
+  TBAAVerifier(VerifierSupport *Diagnostic = nullptr)
+      : Diagnostic(Diagnostic) {}
+  /// Visit an instruction and return true if it is valid, return false if an
+  /// invalid TBAA is attached.
+  bool visitTBAAMetadata(Instruction &I, const MDNode *MD);
+};
 
 /// \brief Check a function for errors, useful for use when debugging a
 /// pass.
@@ -58,13 +101,12 @@ FunctionPass *createVerifierPass(bool FatalErrors = true);
 /// and debug info errors.
 class VerifierAnalysis : public AnalysisInfoMixin<VerifierAnalysis> {
   friend AnalysisInfoMixin<VerifierAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   struct Result {
     bool IRBroken, DebugInfoBroken;
   };
-  static void *ID() { return (void *)&PassID; }
   Result run(Module &M, ModuleAnalysisManager &);
   Result run(Function &F, FunctionAnalysisManager &);
 };
diff --git a/contrib/llvm/include/llvm/InitializePasses.h b/contrib/llvm/include/llvm/InitializePasses.h
index 90ff82fe86d4..a34ebaf18a03 100644
--- a/contrib/llvm/include/llvm/InitializePasses.h
+++ b/contrib/llvm/include/llvm/InitializePasses.h
@@ -46,6 +46,9 @@ void initializeInstrumentation(PassRegistry&);
 /// Initialize all passes linked into the Analysis library.
 void initializeAnalysis(PassRegistry&);
 
+/// Initialize all passes linked into the Coroutines library.
+void initializeCoroutines(PassRegistry&);
+
 /// Initialize all passes linked into the CodeGen library.
 void initializeCodeGen(PassRegistry&);
 
@@ -63,7 +66,7 @@ void initializeAddressSanitizerModulePass(PassRegistry&);
 void initializeAddressSanitizerPass(PassRegistry&);
 void initializeAliasSetPrinterPass(PassRegistry&);
 void initializeAlignmentFromAssumptionsPass(PassRegistry&);
-void initializeAlwaysInlinerPass(PassRegistry&);
+void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
 void initializeArgPromotionPass(PassRegistry&);
 void initializeAssumptionCacheTrackerPass(PassRegistry &);
 void initializeAtomicExpandPass(PassRegistry&);
@@ -76,12 +79,13 @@ void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry&);
 void initializeBoundsCheckingPass(PassRegistry&);
 void initializeBranchFolderPassPass(PassRegistry&);
 void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry&);
+void initializeBranchRelaxationPass(PassRegistry&);
 void initializeBreakCriticalEdgesPass(PassRegistry&);
-void initializeCFGOnlyPrinterPass(PassRegistry&);
-void initializeCFGOnlyViewerPass(PassRegistry&);
-void initializeCFGPrinterPass(PassRegistry&);
+void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&);
+void initializeCFGPrinterLegacyPassPass(PassRegistry&);
+void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&);
 void initializeCFGSimplifyPassPass(PassRegistry&);
-void initializeCFGViewerPass(PassRegistry&);
+void initializeCFGViewerLegacyPassPass(PassRegistry&);
 void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
 void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
 void initializeCallGraphDOTPrinterPass(PassRegistry&);
@@ -89,6 +93,7 @@ void initializeCallGraphPrinterLegacyPassPass(PassRegistry&);
 void initializeCallGraphViewerPass(PassRegistry&);
 void initializeCallGraphWrapperPassPass(PassRegistry &);
 void initializeCodeGenPreparePass(PassRegistry&);
+void initializeCountingFunctionInserterPass(PassRegistry&);
 void initializeConstantHoistingLegacyPassPass(PassRegistry&);
 void initializeConstantMergeLegacyPassPass(PassRegistry &);
 void initializeConstantPropagationPass(PassRegistry&);
@@ -116,10 +121,12 @@ void initializeDominanceFrontierWrapperPassPass(PassRegistry&);
 void initializeDominatorTreeWrapperPassPass(PassRegistry&);
 void initializeDwarfEHPreparePass(PassRegistry&);
 void initializeEarlyCSELegacyPassPass(PassRegistry &);
+void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &);
 void initializeEarlyIfConverterPass(PassRegistry&);
 void initializeEdgeBundlesPass(PassRegistry&);
 void initializeEfficiencySanitizerPass(PassRegistry&);
 void initializeEliminateAvailableExternallyLegacyPassPass(PassRegistry &);
+void initializeRAGreedyPass(PassRegistry&);
 void initializeGVNHoistLegacyPassPass(PassRegistry &);
 void initializeExpandISelPseudosPass(PassRegistry&);
 void initializeExpandPostRAPass(PassRegistry&);
@@ -130,7 +137,7 @@ void initializeFloat2IntLegacyPassPass(PassRegistry&);
 void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeForwardControlFlowIntegrityPass(PassRegistry&);
 void initializeFuncletLayoutPass(PassRegistry &);
-void initializeFunctionImportPassPass(PassRegistry &);
+void initializeFunctionImportLegacyPassPass(PassRegistry &);
 void initializeGCMachineCodeAnalysisPass(PassRegistry&);
 void initializeGCModuleInfoPass(PassRegistry&);
 void initializeGCOVProfilerLegacyPassPass(PassRegistry&);
@@ -138,6 +145,7 @@ void initializeGVNLegacyPassPass(PassRegistry&);
 void initializeGlobalDCELegacyPassPass(PassRegistry&);
 void initializeGlobalMergePass(PassRegistry&);
 void initializeGlobalOptLegacyPassPass(PassRegistry&);
+void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
 void initializeIPCPPass(PassRegistry&);
@@ -155,14 +163,20 @@ void initializeInstNamerPass(PassRegistry&);
 void initializeInstSimplifierPass(PassRegistry&);
 void initializeInstrProfilingLegacyPassPass(PassRegistry &);
 void initializeInstructionCombiningPassPass(PassRegistry&);
+void initializeInstructionSelectPass(PassRegistry &);
 void initializeInterleavedAccessPass(PassRegistry &);
 void initializeInternalizeLegacyPassPass(PassRegistry&);
 void initializeIntervalPartitionPass(PassRegistry&);
 void initializeJumpThreadingPass(PassRegistry&);
-void initializeLCSSAWrapperPassPass(PassRegistry &);
+void initializeLCSSAWrapperPassPass(PassRegistry&);
+void initializeLCSSAVerificationPassPass(PassRegistry&);
 void initializeLegacyLICMPassPass(PassRegistry&);
+void initializeLegacyLoopSinkPassPass(PassRegistry&);
+void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&);
 void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&);
 void initializeLazyValueInfoWrapperPassPass(PassRegistry&);
+void initializeLegalizerPass(PassRegistry&);
+void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
 void initializeLintPass(PassRegistry&);
 void initializeLiveDebugValuesPass(PassRegistry&);
 void initializeLiveDebugVariablesPass(PassRegistry&);
@@ -175,7 +189,7 @@ void initializeLoaderPassPass(PassRegistry&);
 void initializeLoadStoreVectorizerPass(PassRegistry&);
 void initializeLocalStackSlotPassPass(PassRegistry&);
 void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
-void initializeLoopDataPrefetchPass(PassRegistry&);
+void initializeLoopDataPrefetchLegacyPassPass(PassRegistry &);
 void initializeLoopDeletionLegacyPassPass(PassRegistry&);
 void initializeLoopDistributeLegacyPass(PassRegistry&);
 void initializeLoopExtractorPass(PassRegistry&);
@@ -198,9 +212,9 @@ void initializeLoopVersioningPassPass(PassRegistry &);
 void initializeLowerAtomicLegacyPassPass(PassRegistry &);
 void initializeLowerEmuTLSPass(PassRegistry&);
 void initializeLowerExpectIntrinsicPass(PassRegistry&);
-void initializeLowerGuardIntrinsicPass(PassRegistry&);
+void initializeLowerGuardIntrinsicLegacyPassPass(PassRegistry&);
 void initializeLowerIntrinsicsPass(PassRegistry&);
-void initializeLowerInvokePass(PassRegistry&);
+void initializeLowerInvokeLegacyPassPass(PassRegistry&);
 void initializeLowerSwitchPass(PassRegistry&);
 void initializeLowerTypeTestsPass(PassRegistry&);
 void initializeMIRPrintingPassPass(PassRegistry&);
@@ -217,6 +231,7 @@ void initializeMachineFunctionPrinterPassPass(PassRegistry&);
 void initializeMachineLICMPass(PassRegistry&);
 void initializeMachineLoopInfoPass(PassRegistry&);
 void initializeMachineModuleInfoPass(PassRegistry&);
+void initializeMachinePipelinerPass(PassRegistry&);
 void initializeMachinePostDominatorTreePass(PassRegistry&);
 void initializeMachineRegionInfoPassPass(PassRegistry&);
 void initializeMachineSchedulerPass(PassRegistry&);
@@ -235,9 +250,9 @@ void initializeMergedLoadStoreMotionLegacyPassPass(PassRegistry &);
 void initializeMetaRenamerPass(PassRegistry&);
 void initializeModuleDebugInfoPrinterPass(PassRegistry&);
 void initializeModuleSummaryIndexWrapperPassPass(PassRegistry &);
-void initializeNameAnonFunctionPass(PassRegistry &);
-void initializeNaryReassociatePass(PassRegistry&);
-void initializeNoAAPass(PassRegistry&);
+void initializeNameAnonGlobalLegacyPassPass(PassRegistry &);
+void initializeNaryReassociateLegacyPassPass(PassRegistry &);
+void initializeNewGVNPass(PassRegistry&);
 void initializeObjCARCAAWrapperPassPass(PassRegistry&);
 void initializeObjCARCAPElimPass(PassRegistry&);
 void initializeObjCARCContractPass(PassRegistry&);
@@ -284,10 +299,12 @@ void initializeRegionOnlyViewerPass(PassRegistry&);
 void initializeRegionPrinterPass(PassRegistry&);
 void initializeRegionViewerPass(PassRegistry&);
 void initializeRegisterCoalescerPass(PassRegistry&);
+void initializeStripGCRelocatesPass(PassRegistry&);
 void initializeRenameIndependentSubregsPass(PassRegistry&);
+void initializeResetMachineFunctionPass(PassRegistry &);
 void initializeReversePostOrderFunctionAttrsLegacyPassPass(PassRegistry&);
 void initializeRewriteStatepointsForGCPass(PassRegistry&);
-void initializeRewriteSymbolsPass(PassRegistry&);
+void initializeRewriteSymbolsLegacyPassPass(PassRegistry&);
 void initializeSCCPLegacyPassPass(PassRegistry &);
 void initializeSCEVAAWrapperPassPass(PassRegistry&);
 void initializeSLPVectorizerPass(PassRegistry&);
@@ -306,7 +323,7 @@ void initializeSingleLoopExtractorPass(PassRegistry&);
 void initializeSinkingLegacyPassPass(PassRegistry&);
 void initializeSjLjEHPreparePass(PassRegistry&);
 void initializeSlotIndexesPass(PassRegistry&);
-void initializeSpeculativeExecutionPass(PassRegistry&);
+void initializeSpeculativeExecutionLegacyPassPass(PassRegistry&);
 void initializeSpillPlacementPass(PassRegistry&);
 void initializeStackColoringPass(PassRegistry&);
 void initializeStackMapLivenessPass(PassRegistry&);
@@ -317,6 +334,7 @@ void initializeStripDeadDebugInfoPass(PassRegistry&);
 void initializeStripDeadPrototypesLegacyPassPass(PassRegistry&);
 void initializeStripDebugDeclarePass(PassRegistry&);
 void initializeStripNonDebugSymbolsPass(PassRegistry&);
+void initializeStripNonLineTableDebugInfoPass(PassRegistry&);
 void initializeStripSymbolsPass(PassRegistry&);
 void initializeStructurizeCFGPass(PassRegistry&);
 void initializeTailCallElimPass(PassRegistry&);
@@ -337,6 +355,7 @@ void initializeVirtRegRewriterPass(PassRegistry&);
 void initializeWholeProgramDevirtPass(PassRegistry &);
 void initializeWinEHPreparePass(PassRegistry&);
 void initializeWriteBitcodePassPass(PassRegistry &);
+void initializeWriteThinLTOBitcodePass(PassRegistry &);
 void initializeXRayInstrumentationPass(PassRegistry &);
 }
 
diff --git a/contrib/llvm/include/llvm/LTO/Caching.h b/contrib/llvm/include/llvm/LTO/Caching.h
new file mode 100644
index 000000000000..3b96bd1dc301
--- /dev/null
+++ b/contrib/llvm/include/llvm/LTO/Caching.h
@@ -0,0 +1,37 @@
+//===- Caching.h - LLVM Link Time Optimizer Configuration -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the localCache function, which allows clients to add a
+// filesystem cache to ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_CACHING_H
+#define LLVM_LTO_CACHING_H
+
+#include "llvm/LTO/LTO.h"
+#include <string>
+
+namespace llvm {
+namespace lto {
+
+/// This type defines the callback to add a pre-existing native object file
+/// (e.g. in a cache).
+///
+/// File callbacks must be thread safe.
+typedef std::function<void(unsigned Task, StringRef Path)> AddFileFn;
+
+/// Create a local file system cache which uses the given cache directory and
+/// file callback.
+NativeObjectCache localCache(std::string CacheDirectoryPath, AddFileFn AddFile);
+
+} // namespace lto
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/LTO/Config.h b/contrib/llvm/include/llvm/LTO/Config.h
new file mode 100644
index 000000000000..3aa48c9f7c28
--- /dev/null
+++ b/contrib/llvm/include/llvm/LTO/Config.h
@@ -0,0 +1,181 @@
+//===-Config.h - LLVM Link Time Optimizer Configuration -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the lto::Config data structure, which allows clients to
+// configure LTO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_CONFIG_H
+#define LLVM_LTO_CONFIG_H
+
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetOptions.h"
+
+#include <functional>
+
+namespace llvm {
+
+class Error;
+class Module;
+class ModuleSummaryIndex;
+class raw_pwrite_stream;
+
+namespace lto {
+
+/// LTO configuration. A linker can configure LTO by setting fields in this data
+/// structure and passing it to the lto::LTO constructor.
+struct Config {
+  // Note: when adding fields here, consider whether they need to be added to
+  // computeCacheKey in LTO.cpp.
+  std::string CPU;
+  TargetOptions Options;
+  std::vector<std::string> MAttrs;
+  Reloc::Model RelocModel = Reloc::PIC_;
+  CodeModel::Model CodeModel = CodeModel::Default;
+  CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
+  unsigned OptLevel = 2;
+  bool DisableVerify = false;
+
+  /// Disable entirely the optimizer, including importing for ThinLTO
+  bool CodeGenOnly = false;
+
+  /// If this field is set, the set of passes run in the middle-end optimizer
+  /// will be the one specified by the string. Only works with the new pass
+  /// manager as the old one doesn't have this ability.
+  std::string OptPipeline;
+
+  // If this field is set, it has the same effect of specifying an AA pipeline
+  // identified by the string. Only works with the new pass manager, in
+  // conjunction OptPipeline.
+  std::string AAPipeline;
+
+  /// Setting this field will replace target triples in input files with this
+  /// triple.
+  std::string OverrideTriple;
+
+  /// Setting this field will replace unspecified target triples in input files
+  /// with this triple.
+  std::string DefaultTriple;
+
+  /// Sample PGO profile path.
+  std::string SampleProfile;
+
+  bool ShouldDiscardValueNames = true;
+  DiagnosticHandlerFunction DiagHandler;
+
+  /// If this field is set, LTO will write input file paths and symbol
+  /// resolutions here in llvm-lto2 command line flag format. This can be
+  /// used for testing and for running the LTO pipeline outside of the linker
+  /// with llvm-lto2.
+  std::unique_ptr<raw_ostream> ResolutionFile;
+
+  /// The following callbacks deal with tasks, which normally represent the
+  /// entire optimization and code generation pipeline for what will become a
+  /// single native object file. Each task has a unique identifier between 0 and
+  /// getMaxTasks()-1, which is supplied to the callback via the Task parameter.
+  /// A task represents the entire pipeline for ThinLTO and regular
+  /// (non-parallel) LTO, but a parallel code generation task will be split into
+  /// N tasks before code generation, where N is the parallelism level.
+  ///
+  /// LTO may decide to stop processing a task at any time, for example if the
+  /// module is empty or if a module hook (see below) returns false. For this
+  /// reason, the client should not expect to receive exactly getMaxTasks()
+  /// native object files.
+
+  /// A module hook may be used by a linker to perform actions during the LTO
+  /// pipeline. For example, a linker may use this function to implement
+  /// -save-temps. If this function returns false, any further processing for
+  /// that task is aborted.
+  ///
+  /// Module hooks must be thread safe with respect to the linker's internal
+  /// data structures. A module hook will never be called concurrently from
+  /// multiple threads with the same task ID, or the same module.
+  ///
+  /// Note that in out-of-process backend scenarios, none of the hooks will be
+  /// called for ThinLTO tasks.
+  typedef std::function<bool(unsigned Task, const Module &)> ModuleHookFn;
+
+  /// This module hook is called after linking (regular LTO) or loading
+  /// (ThinLTO) the module, before modifying it.
+  ModuleHookFn PreOptModuleHook;
+
+  /// This hook is called after promoting any internal functions
+  /// (ThinLTO-specific).
+  ModuleHookFn PostPromoteModuleHook;
+
+  /// This hook is called after internalizing the module.
+  ModuleHookFn PostInternalizeModuleHook;
+
+  /// This hook is called after importing from other modules (ThinLTO-specific).
+  ModuleHookFn PostImportModuleHook;
+
+  /// This module hook is called after optimization is complete.
+  ModuleHookFn PostOptModuleHook;
+
+  /// This module hook is called before code generation. It is similar to the
+  /// PostOptModuleHook, but for parallel code generation it is called after
+  /// splitting the module.
+  ModuleHookFn PreCodeGenModuleHook;
+
+  /// A combined index hook is called after all per-module indexes have been
+  /// combined (ThinLTO-specific). It can be used to implement -save-temps for
+  /// the combined index.
+  ///
+  /// If this function returns false, any further processing for ThinLTO tasks
+  /// is aborted.
+  ///
+  /// It is called regardless of whether the backend is in-process, although it
+  /// is not called from individual backend processes.
+  typedef std::function<bool(const ModuleSummaryIndex &Index)>
+      CombinedIndexHookFn;
+  CombinedIndexHookFn CombinedIndexHook;
+
+  /// This is a convenience function that configures this Config object to write
+  /// temporary files named after the given OutputFileName for each of the LTO
+  /// phases to disk. A client can use this function to implement -save-temps.
+  ///
+  /// FIXME: Temporary files derived from ThinLTO backends are currently named
+  /// after the input file name, rather than the output file name, when
+  /// UseInputModulePath is set to true.
+  ///
+  /// Specifically, it (1) sets each of the above module hooks and the combined
+  /// index hook to a function that calls the hook function (if any) that was
+  /// present in the appropriate field when the addSaveTemps function was
+  /// called, and writes the module to a bitcode file with a name prefixed by
+  /// the given output file name, and (2) creates a resolution file whose name
+  /// is prefixed by the given output file name and sets ResolutionFile to its
+  /// file handle.
+  Error addSaveTemps(std::string OutputFileName,
+                     bool UseInputModulePath = false);
+};
+
+/// A derived class of LLVMContext that initializes itself according to a given
+/// Config object. The purpose of this class is to tie ownership of the
+/// diagnostic handler to the context, as opposed to the Config object (which
+/// may be ephemeral).
+struct LTOLLVMContext : LLVMContext {
+  static void funcDiagHandler(const DiagnosticInfo &DI, void *Context) {
+    auto *Fn = static_cast<DiagnosticHandlerFunction *>(Context);
+    (*Fn)(DI);
+  }
+
+  LTOLLVMContext(const Config &C) : DiagHandler(C.DiagHandler) {
+    setDiscardValueNames(C.ShouldDiscardValueNames);
+    enableDebugTypeODRUniquing();
+    setDiagnosticHandler(funcDiagHandler, &DiagHandler, true);
+  }
+  DiagnosticHandlerFunction DiagHandler;
+};
+
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/LTO/LTO.h b/contrib/llvm/include/llvm/LTO/LTO.h
index 5154c0007aaa..bc435702157e 100644
--- a/contrib/llvm/include/llvm/LTO/LTO.h
+++ b/contrib/llvm/include/llvm/LTO/LTO.h
@@ -16,39 +16,28 @@
 #ifndef LLVM_LTO_LTO_H
 #define LLVM_LTO_LTO_H
 
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSet.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/LTO/Config.h"
+#include "llvm/Linker/IRMover.h"
+#include "llvm/Object/IRObjectFile.h"
+#include "llvm/Support/thread.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
 
 namespace llvm {
 
+class BitcodeModule;
+class Error;
 class LLVMContext;
 class MemoryBufferRef;
 class Module;
-
-/// Helper to load a module from bitcode.
-std::unique_ptr<Module> loadModuleFromBuffer(const MemoryBufferRef &Buffer,
-                                             LLVMContext &Context, bool Lazy);
-
-/// Provide a "loader" for the FunctionImporter to access function from other
-/// modules.
-class ModuleLoader {
-  /// The context that will be used for importing.
-  LLVMContext &Context;
-
-  /// Map from Module identifier to MemoryBuffer. Used by clients like the
-  /// FunctionImported to request loading a Module.
-  StringMap<MemoryBufferRef> &ModuleMap;
-
-public:
-  ModuleLoader(LLVMContext &Context, StringMap<MemoryBufferRef> &ModuleMap)
-      : Context(Context), ModuleMap(ModuleMap) {}
-
-  /// Load a module on demand.
-  std::unique_ptr<Module> operator()(StringRef Identifier) {
-    return loadModuleFromBuffer(ModuleMap[Identifier], Context, /*Lazy*/ true);
-  }
-};
-
+class Target;
+class raw_pwrite_stream;
 
 /// Resolve Weak and LinkOnce values in the \p Index. Linkage changes recorded
 /// in the index and the ThinLTO backends must apply the changes to the Module
@@ -69,6 +58,400 @@ void thinLTOResolveWeakForLinkerInIndex(
 void thinLTOInternalizeAndPromoteInIndex(
     ModuleSummaryIndex &Index,
     function_ref<bool(StringRef, GlobalValue::GUID)> isExported);
-}
+
+namespace lto {
+
+/// Given the original \p Path to an output file, replace any path
+/// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
+/// resulting directory if it does not yet exist.
+std::string getThinLTOOutputFile(const std::string &Path,
+                                 const std::string &OldPrefix,
+                                 const std::string &NewPrefix);
+
+class LTO;
+struct SymbolResolution;
+class ThinBackendProc;
+
+/// An input file. This is a wrapper for ModuleSymbolTable that exposes only the
+/// information that an LTO client should need in order to do symbol resolution.
+class InputFile {
+  // FIXME: Remove LTO class friendship once we have bitcode symbol tables.
+  friend LTO;
+  InputFile() = default;
+
+  // FIXME: Remove the LLVMContext once we have bitcode symbol tables.
+  LLVMContext Ctx;
+  struct InputModule;
+  std::vector<InputModule> Mods;
+  ModuleSymbolTable SymTab;
+
+  std::vector<StringRef> Comdats;
+  DenseMap<const Comdat *, unsigned> ComdatMap;
+
+public:
+  ~InputFile();
+
+  /// Create an InputFile.
+  static Expected<std::unique_ptr<InputFile>> create(MemoryBufferRef Object);
+
+  class symbol_iterator;
+
+  /// This is a wrapper for ArrayRef<ModuleSymbolTable::Symbol>::iterator that
+  /// exposes only the information that an LTO client should need in order to do
+  /// symbol resolution.
+  ///
+  /// This object is ephemeral; it is only valid as long as an iterator obtained
+  /// from symbols() refers to it.
+  class Symbol {
+    friend symbol_iterator;
+    friend LTO;
+
+    ArrayRef<ModuleSymbolTable::Symbol>::iterator I;
+    const ModuleSymbolTable &SymTab;
+    const InputFile *File;
+    uint32_t Flags;
+    SmallString<64> Name;
+
+    bool shouldSkip() {
+      return !(Flags & object::BasicSymbolRef::SF_Global) ||
+             (Flags & object::BasicSymbolRef::SF_FormatSpecific);
+    }
+
+    void skip() {
+      ArrayRef<ModuleSymbolTable::Symbol>::iterator E = SymTab.symbols().end();
+      while (I != E) {
+        Flags = SymTab.getSymbolFlags(*I);
+        if (!shouldSkip())
+          break;
+        ++I;
+      }
+      if (I == E)
+        return;
+
+      Name.clear();
+      {
+        raw_svector_ostream OS(Name);
+        SymTab.printSymbolName(OS, *I);
+      }
+    }
+
+    bool isGV() const { return I->is<GlobalValue *>(); }
+    GlobalValue *getGV() const { return I->get<GlobalValue *>(); }
+
+  public:
+    Symbol(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
+           const ModuleSymbolTable &SymTab, const InputFile *File)
+        : I(I), SymTab(SymTab), File(File) {
+      skip();
+    }
+
+    /// Returns the mangled name of the global.
+    StringRef getName() const { return Name; }
+
+    uint32_t getFlags() const { return Flags; }
+    GlobalValue::VisibilityTypes getVisibility() const {
+      if (isGV())
+        return getGV()->getVisibility();
+      return GlobalValue::DefaultVisibility;
+    }
+    bool canBeOmittedFromSymbolTable() const {
+      return isGV() && llvm::canBeOmittedFromSymbolTable(getGV());
+    }
+    bool isTLS() const {
+      // FIXME: Expose a thread-local flag for module asm symbols.
+      return isGV() && getGV()->isThreadLocal();
+    }
+
+    // Returns the index of the comdat this symbol is in or -1 if the symbol
+    // is not in a comdat.
+    // FIXME: We have to return Expected<int> because aliases point to an
+    // arbitrary ConstantExpr and that might not actually be a constant. That
+    // means we might not be able to find what an alias is aliased to and
+    // so find its comdat.
+    Expected<int> getComdatIndex() const;
+
+    uint64_t getCommonSize() const {
+      assert(Flags & object::BasicSymbolRef::SF_Common);
+      if (!isGV())
+        return 0;
+      return getGV()->getParent()->getDataLayout().getTypeAllocSize(
+          getGV()->getType()->getElementType());
+    }
+    unsigned getCommonAlignment() const {
+      assert(Flags & object::BasicSymbolRef::SF_Common);
+      if (!isGV())
+        return 0;
+      return getGV()->getAlignment();
+    }
+  };
+
+  class symbol_iterator {
+    Symbol Sym;
+
+  public:
+    symbol_iterator(ArrayRef<ModuleSymbolTable::Symbol>::iterator I,
+                    const ModuleSymbolTable &SymTab, const InputFile *File)
+        : Sym(I, SymTab, File) {}
+
+    symbol_iterator &operator++() {
+      ++Sym.I;
+      Sym.skip();
+      return *this;
+    }
+
+    symbol_iterator operator++(int) {
+      symbol_iterator I = *this;
+      ++*this;
+      return I;
+    }
+
+    const Symbol &operator*() const { return Sym; }
+    const Symbol *operator->() const { return &Sym; }
+
+    bool operator!=(const symbol_iterator &Other) const {
+      return Sym.I != Other.Sym.I;
+    }
+  };
+
+  /// A range over the symbols in this InputFile.
+  iterator_range<symbol_iterator> symbols() {
+    return llvm::make_range(
+        symbol_iterator(SymTab.symbols().begin(), SymTab, this),
+        symbol_iterator(SymTab.symbols().end(), SymTab, this));
+  }
+
+  /// Returns the path to the InputFile.
+  StringRef getName() const;
+
+  /// Returns the source file path specified at compile time.
+  StringRef getSourceFileName() const;
+
+  // Returns a table with all the comdats used by this file.
+  ArrayRef<StringRef> getComdatTable() const { return Comdats; }
+
+private:
+  iterator_range<symbol_iterator> module_symbols(InputModule &IM);
+};
+
+/// This class wraps an output stream for a native object. Most clients should
+/// just be able to return an instance of this base class from the stream
+/// callback, but if a client needs to perform some action after the stream is
+/// written to, that can be done by deriving from this class and overriding the
+/// destructor.
+class NativeObjectStream {
+public:
+  NativeObjectStream(std::unique_ptr<raw_pwrite_stream> OS) : OS(std::move(OS)) {}
+  std::unique_ptr<raw_pwrite_stream> OS;
+  virtual ~NativeObjectStream() = default;
+};
+
+/// This type defines the callback to add a native object that is generated on
+/// the fly.
+///
+/// Stream callbacks must be thread safe.
+typedef std::function<std::unique_ptr<NativeObjectStream>(unsigned Task)>
+    AddStreamFn;
+
+/// This is the type of a native object cache. To request an item from the
+/// cache, pass a unique string as the Key. For hits, the cached file will be
+/// added to the link and this function will return AddStreamFn(). For misses,
+/// the cache will return a stream callback which must be called at most once to
+/// produce content for the stream. The native object stream produced by the
+/// stream callback will add the file to the link after the stream is written
+/// to.
+///
+/// Clients generally look like this:
+///
+/// if (AddStreamFn AddStream = Cache(Task, Key))
+///   ProduceContent(AddStream);
+typedef std::function<AddStreamFn(unsigned Task, StringRef Key)>
+    NativeObjectCache;
+
+/// A ThinBackend defines what happens after the thin-link phase during ThinLTO.
+/// The details of this type definition aren't important; clients can only
+/// create a ThinBackend using one of the create*ThinBackend() functions below.
+typedef std::function<std::unique_ptr<ThinBackendProc>(
+    Config &C, ModuleSummaryIndex &CombinedIndex,
+    StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+    AddStreamFn AddStream, NativeObjectCache Cache)>
+    ThinBackend;
+
+/// This ThinBackend runs the individual backend jobs in-process.
+ThinBackend createInProcessThinBackend(unsigned ParallelismLevel);
+
+/// This ThinBackend writes individual module indexes to files, instead of
+/// running the individual backend jobs. This backend is for distributed builds
+/// where separate processes will invoke the real backends.
+///
+/// To find the path to write the index to, the backend checks if the path has a
+/// prefix of OldPrefix; if so, it replaces that prefix with NewPrefix. It then
+/// appends ".thinlto.bc" and writes the index to that path. If
+/// ShouldEmitImportsFiles is true it also writes a list of imported files to a
+/// similar path with ".imports" appended instead.
+ThinBackend createWriteIndexesThinBackend(std::string OldPrefix,
+                                          std::string NewPrefix,
+                                          bool ShouldEmitImportsFiles,
+                                          std::string LinkedObjectsFile);
+
+/// This class implements a resolution-based interface to LLVM's LTO
+/// functionality. It supports regular LTO, parallel LTO code generation and
+/// ThinLTO. You can use it from a linker in the following way:
+/// - Set hooks and code generation options (see lto::Config struct defined in
+///   Config.h), and use the lto::Config object to create an lto::LTO object.
+/// - Create lto::InputFile objects using lto::InputFile::create(), then use
+///   the symbols() function to enumerate its symbols and compute a resolution
+///   for each symbol (see SymbolResolution below).
+/// - After the linker has visited each input file (and each regular object
+///   file) and computed a resolution for each symbol, take each lto::InputFile
+///   and pass it and an array of symbol resolutions to the add() function.
+/// - Call the getMaxTasks() function to get an upper bound on the number of
+///   native object files that LTO may add to the link.
+/// - Call the run() function. This function will use the supplied AddStream
+///   and Cache functions to add up to getMaxTasks() native object files to
+///   the link.
+class LTO {
+  friend InputFile;
+
+public:
+  /// Create an LTO object. A default constructed LTO object has a reasonable
+  /// production configuration, but you can customize it by passing arguments to
+  /// this constructor.
+  /// FIXME: We do currently require the DiagHandler field to be set in Conf.
+  /// Until that is fixed, a Config argument is required.
+  LTO(Config Conf, ThinBackend Backend = nullptr,
+      unsigned ParallelCodeGenParallelismLevel = 1);
+  ~LTO();
+
+  /// Add an input file to the LTO link, using the provided symbol resolutions.
+  /// The symbol resolutions must appear in the enumeration order given by
+  /// InputFile::symbols().
+  Error add(std::unique_ptr<InputFile> Obj, ArrayRef<SymbolResolution> Res);
+
+  /// Returns an upper bound on the number of tasks that the client may expect.
+  /// This may only be called after all IR object files have been added. For a
+  /// full description of tasks see LTOBackend.h.
+  unsigned getMaxTasks() const;
+
+  /// Runs the LTO pipeline. This function calls the supplied AddStream
+  /// function to add native object files to the link.
+  ///
+  /// The Cache parameter is optional. If supplied, it will be used to cache
+  /// native object files and add them to the link.
+  ///
+  /// The client will receive at most one callback (via either AddStream or
+  /// Cache) for each task identifier.
+  Error run(AddStreamFn AddStream, NativeObjectCache Cache = nullptr);
+
+private:
+  Config Conf;
+
+  struct RegularLTOState {
+    RegularLTOState(unsigned ParallelCodeGenParallelismLevel, Config &Conf);
+    struct CommonResolution {
+      uint64_t Size = 0;
+      unsigned Align = 0;
+      /// Record if at least one instance of the common was marked as prevailing
+      bool Prevailing = false;
+    };
+    std::map<std::string, CommonResolution> Commons;
+
+    unsigned ParallelCodeGenParallelismLevel;
+    LTOLLVMContext Ctx;
+    bool HasModule = false;
+    std::unique_ptr<Module> CombinedModule;
+    std::unique_ptr<IRMover> Mover;
+  } RegularLTO;
+
+  struct ThinLTOState {
+    ThinLTOState(ThinBackend Backend);
+
+    ThinBackend Backend;
+    ModuleSummaryIndex CombinedIndex;
+    MapVector<StringRef, BitcodeModule> ModuleMap;
+    DenseMap<GlobalValue::GUID, StringRef> PrevailingModuleForGUID;
+  } ThinLTO;
+
+  // The global resolution for a particular (mangled) symbol name. This is in
+  // particular necessary to track whether each symbol can be internalized.
+  // Because any input file may introduce a new cross-partition reference, we
+  // cannot make any final internalization decisions until all input files have
+  // been added and the client has called run(). During run() we apply
+  // internalization decisions either directly to the module (for regular LTO)
+  // or to the combined index (for ThinLTO).
+  struct GlobalResolution {
+    /// The unmangled name of the global.
+    std::string IRName;
+
+    bool UnnamedAddr = true;
+
+    /// This field keeps track of the partition number of this global. The
+    /// regular LTO object is partition 0, while each ThinLTO object has its own
+    /// partition number from 1 onwards.
+    ///
+    /// Any global that is defined or used by more than one partition, or that
+    /// is referenced externally, may not be internalized.
+    ///
+    /// Partitions generally have a one-to-one correspondence with tasks, except
+    /// that we use partition 0 for all parallel LTO code generation partitions.
+    /// Any partitioning of the combined LTO object is done internally by the
+    /// LTO backend.
+    unsigned Partition = Unknown;
+
+    /// Special partition numbers.
+    enum : unsigned {
+      /// A partition number has not yet been assigned to this global.
+      Unknown = -1u,
+
+      /// This global is either used by more than one partition or has an
+      /// external reference, and therefore cannot be internalized.
+      External = -2u,
+    };
+  };
+
+  // Global mapping from mangled symbol names to resolutions.
+  StringMap<GlobalResolution> GlobalResolutions;
+
+  void addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
+                            const InputFile::Symbol &Sym, SymbolResolution Res,
+                            unsigned Partition);
+
+  // These functions take a range of symbol resolutions [ResI, ResE) and consume
+  // the resolutions used by a single input module by incrementing ResI. After
+  // these functions return, [ResI, ResE) will refer to the resolution range for
+  // the remaining modules in the InputFile.
+  Error addModule(InputFile &Input, InputFile::InputModule &IM,
+                  const SymbolResolution *&ResI, const SymbolResolution *ResE);
+  Error addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
+                      const SymbolResolution *ResE);
+  Error addThinLTO(BitcodeModule BM, Module &M,
+                   iterator_range<InputFile::symbol_iterator> Syms,
+                   const SymbolResolution *&ResI, const SymbolResolution *ResE);
+
+  Error runRegularLTO(AddStreamFn AddStream);
+  Error runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+                   bool HasRegularLTO);
+
+  mutable bool CalledGetMaxTasks = false;
+};
+
+/// The resolution for a symbol. The linker must provide a SymbolResolution for
+/// each global symbol based on its internal resolution of that symbol.
+struct SymbolResolution {
+  SymbolResolution()
+      : Prevailing(0), FinalDefinitionInLinkageUnit(0), VisibleToRegularObj(0) {
+  }
+  /// The linker has chosen this definition of the symbol.
+  unsigned Prevailing : 1;
+
+  /// The definition of this symbol is unpreemptable at runtime and is known to
+  /// be in this linkage unit.
+  unsigned FinalDefinitionInLinkageUnit : 1;
+
+  /// The definition of this symbol is visible outside of the LTO unit.
+  unsigned VisibleToRegularObj : 1;
+};
+
+} // namespace lto
+} // namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/LTO/LTOBackend.h b/contrib/llvm/include/llvm/LTO/LTOBackend.h
new file mode 100644
index 000000000000..933503afddc8
--- /dev/null
+++ b/contrib/llvm/include/llvm/LTO/LTOBackend.h
@@ -0,0 +1,51 @@
+//===-LTOBackend.h - LLVM Link Time Optimizer Backend ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the "backend" phase of LTO, i.e. it performs
+// optimization and code generation on a loaded module. It is generally used
+// internally by the LTO class but can also be used independently, for example
+// to implement a standalone ThinLTO backend.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LTO_LTOBACKEND_H
+#define LLVM_LTO_LTOBACKEND_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
+
+namespace llvm {
+
+class BitcodeModule;
+class Error;
+class Module;
+class Target;
+
+namespace lto {
+
+/// Runs a regular LTO backend.
+Error backend(Config &C, AddStreamFn AddStream,
+              unsigned ParallelCodeGenParallelismLevel,
+              std::unique_ptr<Module> M);
+
+/// Runs a ThinLTO backend.
+Error thinBackend(Config &C, unsigned Task, AddStreamFn AddStream, Module &M,
+                  ModuleSummaryIndex &CombinedIndex,
+                  const FunctionImporter::ImportMapTy &ImportList,
+                  const GVSummaryMapTy &DefinedGlobals,
+                  MapVector<StringRef, BitcodeModule> &ModuleMap);
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
index d083e37d75b8..f14682111280 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/LTOCodeGenerator.h
@@ -41,6 +41,7 @@
 #include "llvm/ADT/StringSet.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetOptions.h"
 #include <string>
@@ -77,6 +78,7 @@ struct LTOCodeGenerator {
   /// Resets \a HasVerifiedInput.
   void setModule(std::unique_ptr<LTOModule> M);
 
+  void setAsmUndefinedRefs(struct LTOModule *);
   void setTargetOptions(const TargetOptions &Options);
   void setDebugInfo(lto_debug_model);
   void setCodePICModel(Optional<Reloc::Model> Model) { RelocModel = Model; }
@@ -85,8 +87,8 @@ struct LTOCodeGenerator {
   /// The default is TargetMachine::CGFT_ObjectFile.
   void setFileType(TargetMachine::CodeGenFileType FT) { FileType = FT; }
 
-  void setCpu(const char *MCpu) { this->MCpu = MCpu; }
-  void setAttr(const char *MAttr) { this->MAttr = MAttr; }
+  void setCpu(StringRef MCpu) { this->MCpu = MCpu; }
+  void setAttr(StringRef MAttr) { this->MAttr = MAttr; }
   void setOptLevel(unsigned OptLevel);
 
   void setShouldInternalize(bool Value) { ShouldInternalize = Value; }
@@ -116,7 +118,7 @@ struct LTOCodeGenerator {
   /// name is misleading).  This function should be called before
   /// LTOCodeGenerator::compilexxx(), and
   /// LTOCodeGenerator::writeMergedModules().
-  void setCodeGenDebugOptions(const char *Opts);
+  void setCodeGenDebugOptions(StringRef Opts);
 
   /// Parse the options set in setCodeGenDebugOptions.
   ///
@@ -129,7 +131,7 @@ struct LTOCodeGenerator {
   /// true on success.
   ///
   /// Calls \a verifyMergedModuleOnce().
-  bool writeMergedModules(const char *Path);
+  bool writeMergedModules(StringRef Path);
 
   /// Compile the merged module into a *single* output file; the path to output
   /// file is returned to the caller via argument "name". Return true on
@@ -204,6 +206,9 @@ private:
   void emitError(const std::string &ErrMsg);
   void emitWarning(const std::string &ErrMsg);
 
+  bool setupOptimizationRemarks();
+  void finishOptimizationRemarks();
+
   LLVMContext &Context;
   std::unique_ptr<Module> MergedModule;
   std::unique_ptr<Linker> TheLinker;
@@ -231,6 +236,7 @@ private:
   bool ShouldEmbedUselists = false;
   bool ShouldRestoreGlobalsLinkage = false;
   TargetMachine::CodeGenFileType FileType = TargetMachine::CGFT_ObjectFile;
+  std::unique_ptr<tool_output_file> DiagnosticOutputFile;
 };
 }
 #endif
diff --git a/contrib/llvm/include/llvm/LTO/legacy/LTOModule.h b/contrib/llvm/include/llvm/LTO/legacy/LTOModule.h
index 2e46219be19e..2a8758587a11 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/LTOModule.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/LTOModule.h
@@ -19,6 +19,7 @@
 #include "llvm/ADT/StringSet.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Object/IRObjectFile.h"
+#include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Target/TargetMachine.h"
 #include <string>
 #include <vector>
@@ -37,33 +38,36 @@ namespace llvm {
 struct LTOModule {
 private:
   struct NameAndAttributes {
-    const char        *name;
-    uint32_t           attributes;
-    bool               isFunction;
-    const GlobalValue *symbol;
+    StringRef name;
+    uint32_t           attributes = 0;
+    bool               isFunction = 0;
+    const GlobalValue *symbol = 0;
   };
 
   std::unique_ptr<LLVMContext> OwnedContext;
 
   std::string LinkerOpts;
 
-  std::unique_ptr<object::IRObjectFile> IRFile;
+  std::unique_ptr<Module> Mod;
+  MemoryBufferRef MBRef;
+  ModuleSymbolTable SymTab;
   std::unique_ptr<TargetMachine> _target;
   std::vector<NameAndAttributes> _symbols;
 
   // _defines and _undefines only needed to disambiguate tentative definitions
   StringSet<>                             _defines;
   StringMap<NameAndAttributes> _undefines;
-  std::vector<const char*>                _asm_undefines;
+  std::vector<StringRef> _asm_undefines;
 
-  LTOModule(std::unique_ptr<object::IRObjectFile> Obj, TargetMachine *TM);
+  LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
+            TargetMachine *TM);
 
 public:
   ~LTOModule();
 
   /// Returns 'true' if the file or memory contents is LLVM bitcode.
   static bool isBitcodeFile(const void *mem, size_t length);
-  static bool isBitcodeFile(const char *path);
+  static bool isBitcodeFile(StringRef path);
 
   /// Returns 'true' if the Module is produced for ThinLTO.
   bool isThinLTO();
@@ -91,13 +95,13 @@ public:
   /// InitializeAllAsmPrinters();
   /// InitializeAllAsmParsers();
   static ErrorOr<std::unique_ptr<LTOModule>>
-  createFromFile(LLVMContext &Context, const char *path,
+  createFromFile(LLVMContext &Context, StringRef path,
                  const TargetOptions &options);
   static ErrorOr<std::unique_ptr<LTOModule>>
-  createFromOpenFile(LLVMContext &Context, int fd, const char *path,
-                     size_t size, const TargetOptions &options);
+  createFromOpenFile(LLVMContext &Context, int fd, StringRef path, size_t size,
+                     const TargetOptions &options);
   static ErrorOr<std::unique_ptr<LTOModule>>
-  createFromOpenFileSlice(LLVMContext &Context, int fd, const char *path,
+  createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path,
                           size_t map_size, off_t offset,
                           const TargetOptions &options);
   static ErrorOr<std::unique_ptr<LTOModule>>
@@ -108,14 +112,10 @@ public:
                        size_t length, const TargetOptions &options,
                        StringRef path);
 
-  const Module &getModule() const {
-    return const_cast<LTOModule*>(this)->getModule();
-  }
-  Module &getModule() {
-    return IRFile->getModule();
-  }
+  const Module &getModule() const { return *Mod; }
+  Module &getModule() { return *Mod; }
 
-  std::unique_ptr<Module> takeModule() { return IRFile->takeModule(); }
+  std::unique_ptr<Module> takeModule() { return std::move(Mod); }
 
   /// Return the Module's target triple.
   const std::string &getTargetTriple() {
@@ -140,10 +140,10 @@ public:
   }
 
   /// Get the name of the symbol at the specified index.
-  const char *getSymbolName(uint32_t index) {
+  StringRef getSymbolName(uint32_t index) {
     if (index < _symbols.size())
       return _symbols[index].name;
-    return nullptr;
+    return StringRef();
   }
 
   const GlobalValue *getSymbolGV(uint32_t index) {
@@ -152,13 +152,9 @@ public:
     return nullptr;
   }
 
-  const char *getLinkerOpts() {
-    return LinkerOpts.c_str();
-  }
+  StringRef getLinkerOpts() { return LinkerOpts; }
 
-  const std::vector<const char*> &getAsmUndefinedRefs() {
-    return _asm_undefines;
-  }
+  const std::vector<StringRef> &getAsmUndefinedRefs() { return _asm_undefines; }
 
 private:
   /// Parse metadata from the module
@@ -170,26 +166,26 @@ private:
   void parseSymbols();
 
   /// Add a symbol which isn't defined just yet to a list to be resolved later.
-  void addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
+  void addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym,
                                    bool isFunc);
 
   /// Add a defined symbol to the list.
-  void addDefinedSymbol(const char *Name, const GlobalValue *def,
+  void addDefinedSymbol(StringRef Name, const GlobalValue *def,
                         bool isFunction);
 
   /// Add a data symbol as defined to the list.
-  void addDefinedDataSymbol(const object::BasicSymbolRef &Sym);
-  void addDefinedDataSymbol(const char*Name, const GlobalValue *v);
+  void addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym);
+  void addDefinedDataSymbol(StringRef Name, const GlobalValue *v);
 
   /// Add a function symbol as defined to the list.
-  void addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym);
-  void addDefinedFunctionSymbol(const char *Name, const Function *F);
+  void addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym);
+  void addDefinedFunctionSymbol(StringRef Name, const Function *F);
 
   /// Add a global symbol from module-level ASM to the defined list.
-  void addAsmGlobalSymbol(const char *, lto_symbol_attributes scope);
+  void addAsmGlobalSymbol(StringRef, lto_symbol_attributes scope);
 
   /// Add a global symbol from module-level ASM to the undefined list.
-  void addAsmGlobalSymbolUndef(const char *);
+  void addAsmGlobalSymbolUndef(StringRef);
 
   /// Parse i386/ppc ObjC class data structure.
   void addObjCClass(const GlobalVariable *clgv);
diff --git a/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h b/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
index 539880e8d3a7..cb4a16cb5b7b 100644
--- a/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
+++ b/contrib/llvm/include/llvm/LTO/legacy/ThinLTOCodeGenerator.h
@@ -38,7 +38,7 @@ struct TargetMachineBuilder {
   std::string MAttr;
   TargetOptions Options;
   Optional<Reloc::Model> RelocModel;
-  CodeGenOpt::Level CGOptLevel = CodeGenOpt::Default;
+  CodeGenOpt::Level CGOptLevel = CodeGenOpt::Aggressive;
 
   std::unique_ptr<TargetMachine> create() const;
 };
@@ -72,18 +72,32 @@ public:
   /**
    * Process all the modules that were added to the code generator in parallel.
    *
-   * Client can access the resulting object files using getProducedBinaries()
+   * Client can access the resulting object files using getProducedBinaries(),
+   * unless setGeneratedObjectsDirectory() has been called, in which case
+   * results are available through getProducedBinaryFiles().
    */
   void run();
 
   /**
-   * Return the "in memory" binaries produced by the code generator.
+   * Return the "in memory" binaries produced by the code generator. This is
+   * filled after run() unless setGeneratedObjectsDirectory() has been
+   * called, in which case results are available through
+   * getProducedBinaryFiles().
    */
   std::vector<std::unique_ptr<MemoryBuffer>> &getProducedBinaries() {
     return ProducedBinaries;
   }
 
   /**
+   * Return the "on-disk" binaries produced by the code generator. This is
+   * filled after run() when setGeneratedObjectsDirectory() has been
+   * called, in which case results are available through getProducedBinaries().
+   */
+  std::vector<std::string> &getProducedBinaryFiles() {
+    return ProducedBinaryFiles;
+  }
+
+  /**
    * \defgroup Options setters
    * @{
    */
@@ -156,6 +170,14 @@ public:
   /// the processing.
   void setSaveTempsDir(std::string Path) { SaveTempsDir = std::move(Path); }
 
+  /// Set the path to a directory where to save generated object files. This
+  /// path can be used by a linker to request on-disk files instead of in-memory
+  /// buffers. When set, results are available through getProducedBinaryFiles()
+  /// instead of getProducedBinaries().
+  void setGeneratedObjectsDirectory(std::string Path) {
+    SavedObjectsDirectoryPath = std::move(Path);
+  }
+
   /// CPU to use to initialize the TargetMachine
   void setCpu(std::string Cpu) { TMBuilder.MCpu = std::move(Cpu); }
 
@@ -177,6 +199,11 @@ public:
     TMBuilder.CGOptLevel = CGOptLevel;
   }
 
+  /// IR optimization level: from 0 to 3.
+  void setOptLevel(unsigned NewOptLevel) {
+    OptLevel = (NewOptLevel > 3) ? 3 : NewOptLevel;
+  }
+
   /// Disable CodeGen, only run the stages till codegen and stop. The output
   /// will be bitcode.
   void disableCodeGen(bool Disable) { DisableCodeGen = Disable; }
@@ -244,9 +271,13 @@ private:
   /// Helper factory to build a TargetMachine
   TargetMachineBuilder TMBuilder;
 
-  /// Vector holding the in-memory buffer containing the produced binaries.
+  /// Vector holding the in-memory buffer containing the produced binaries, when
+  /// SavedObjectsDirectoryPath isn't set.
   std::vector<std::unique_ptr<MemoryBuffer>> ProducedBinaries;
 
+  /// Path to generated files in the supplied SavedObjectsDirectoryPath if any.
+  std::vector<std::string> ProducedBinaryFiles;
+
   /// Vector holding the input buffers containing the bitcode modules to
   /// process.
   std::vector<MemoryBufferRef> Modules;
@@ -264,6 +295,9 @@ private:
   /// Path to a directory to save the temporary bitcode files.
   std::string SaveTempsDir;
 
+  /// Path to a directory to save the generated object files.
+  std::string SavedObjectsDirectoryPath;
+
   /// Flag to enable/disable CodeGen. When set to true, the process stops after
   /// optimizations and a bitcode is produced.
   bool DisableCodeGen = false;
@@ -271,6 +305,9 @@ private:
   /// Flag to indicate that only the CodeGen will be performed, no cross-module
   /// importing or optimization.
   bool CodeGenOnly = false;
+
+  /// IR Optimization Level [0-3].
+  unsigned OptLevel = 3;
 };
 }
 #endif
diff --git a/contrib/llvm/include/llvm/LinkAllIR.h b/contrib/llvm/include/llvm/LinkAllIR.h
index 77e19ce900e3..f078c73f979e 100644
--- a/contrib/llvm/include/llvm/LinkAllIR.h
+++ b/contrib/llvm/include/llvm/LinkAllIR.h
@@ -31,7 +31,6 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
 #include "llvm/Support/Signals.h"
-#include "llvm/Support/TimeValue.h"
 #include <cstdlib>
 
 namespace {
diff --git a/contrib/llvm/include/llvm/LinkAllPasses.h b/contrib/llvm/include/llvm/LinkAllPasses.h
index b2721d0a1fd9..e50137f8e02e 100644
--- a/contrib/llvm/include/llvm/LinkAllPasses.h
+++ b/contrib/llvm/include/llvm/LinkAllPasses.h
@@ -39,6 +39,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/ObjCARC.h"
@@ -77,6 +78,7 @@ namespace {
       (void) llvm::createCFLAndersAAWrapperPass();
       (void) llvm::createCFLSteensAAWrapperPass();
       (void) llvm::createStructurizeCFGPass();
+      (void) llvm::createLibCallsShrinkWrapPass();
       (void) llvm::createConstantMergePass();
       (void) llvm::createConstantPropagationPass();
       (void) llvm::createCostModelAnalysisPass();
@@ -97,7 +99,7 @@ namespace {
       (void) llvm::createInstrProfilingLegacyPass();
       (void) llvm::createFunctionImportPass();
       (void) llvm::createFunctionInliningPass();
-      (void) llvm::createAlwaysInlinerPass();
+      (void) llvm::createAlwaysInlinerLegacyPass();
       (void) llvm::createGlobalDCEPass();
       (void) llvm::createGlobalOptimizerPass();
       (void) llvm::createGlobalsAAWrapperPass();
@@ -110,6 +112,7 @@ namespace {
       (void) llvm::createInternalizePass();
       (void) llvm::createLCSSAPass();
       (void) llvm::createLICMPass();
+      (void) llvm::createLoopSinkPass();
       (void) llvm::createLazyValueInfoPass();
       (void) llvm::createLoopExtractorPass();
       (void) llvm::createLoopInterchangePass();
@@ -159,10 +162,12 @@ namespace {
       (void) llvm::createInstCountPass();
       (void) llvm::createConstantHoistingPass();
       (void) llvm::createCodeGenPreparePass();
+      (void) llvm::createCountingFunctionInserterPass();
       (void) llvm::createEarlyCSEPass();
       (void) llvm::createGVNHoistPass();
       (void) llvm::createMergedLoadStoreMotionPass();
       (void) llvm::createGVNPass();
+      (void) llvm::createNewGVNPass();
       (void) llvm::createMemCpyOptPass();
       (void) llvm::createLoopDeletionPass();
       (void) llvm::createPostDomTree();
diff --git a/contrib/llvm/include/llvm/Linker/IRMover.h b/contrib/llvm/include/llvm/Linker/IRMover.h
index 578940ed4069..2a187cbc42f5 100644
--- a/contrib/llvm/include/llvm/Linker/IRMover.h
+++ b/contrib/llvm/include/llvm/Linker/IRMover.h
@@ -71,8 +71,15 @@ public:
   ///   not present in ValuesToLink. The GlobalValue and a ValueAdder callback
   ///   are passed as an argument, and the callback is expected to be called
   ///   if the GlobalValue needs to be added to the \p ValuesToLink and linked.
+  /// - \p LinkModuleInlineAsm is true if the ModuleInlineAsm string in Src
+  ///   should be linked with (concatenated into) the ModuleInlineAsm string
+  ///   for the destination module. It should be true for full LTO, but not
+  ///   when importing for ThinLTO, otherwise we can have duplicate symbols.
+  /// - \p IsPerformingImport is true when this IR link is to perform ThinLTO
+  ///   function importing from Src.
   Error move(std::unique_ptr<Module> Src, ArrayRef<GlobalValue *> ValuesToLink,
-             std::function<void(GlobalValue &GV, ValueAdder Add)> AddLazyFor);
+             std::function<void(GlobalValue &GV, ValueAdder Add)> AddLazyFor,
+             bool LinkModuleInlineAsm, bool IsPerformingImport);
   Module &getModule() { return Composite; }
 
 private:
diff --git a/contrib/llvm/include/llvm/MC/ConstantPools.h b/contrib/llvm/include/llvm/MC/ConstantPools.h
index 552e1443e7d0..f0c445dbe59f 100644
--- a/contrib/llvm/include/llvm/MC/ConstantPools.h
+++ b/contrib/llvm/include/llvm/MC/ConstantPools.h
@@ -25,6 +25,7 @@ class MCExpr;
 class MCSection;
 class MCStreamer;
 class MCSymbol;
+class MCSymbolRefExpr;
 
 struct ConstantPoolEntry {
   ConstantPoolEntry(MCSymbol *L, const MCExpr *Val, unsigned Sz, SMLoc Loc_)
@@ -40,6 +41,7 @@ struct ConstantPoolEntry {
 class ConstantPool {
   typedef SmallVector<ConstantPoolEntry, 4> EntryVecTy;
   EntryVecTy Entries;
+  DenseMap<int64_t, const MCSymbolRefExpr *> CachedEntries;
 
 public:
   // Initialize a new empty constant pool
diff --git a/contrib/llvm/include/llvm/MC/LaneBitmask.h b/contrib/llvm/include/llvm/MC/LaneBitmask.h
new file mode 100644
index 000000000000..89e60928405d
--- /dev/null
+++ b/contrib/llvm/include/llvm/MC/LaneBitmask.h
@@ -0,0 +1,89 @@
+//===-- llvm/MC/LaneBitmask.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// A common definition of LaneBitmask for use in TableGen and CodeGen.
+///
+/// A lane mask is a bitmask representing the covering of a register with
+/// sub-registers.
+///
+/// This is typically used to track liveness at sub-register granularity.
+/// Lane masks for sub-register indices are similar to register units for
+/// physical registers. The individual bits in a lane mask can't be assigned
+/// any specific meaning. They can be used to check if two sub-register
+/// indices overlap.
+///
+/// Iff the target has a register such that:
+///
+///   getSubReg(Reg, A) overlaps getSubReg(Reg, B)
+///
+/// then:
+///
+///   (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
+
+#ifndef LLVM_MC_LANEBITMASK_H
+#define LLVM_MC_LANEBITMASK_H
+
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Printable.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+  struct LaneBitmask {
+    // When changing the underlying type, change the format string as well.
+    typedef unsigned Type;
+    enum : unsigned { BitWidth = 8*sizeof(Type) };
+    constexpr static const char *const FormatStr = "%08X";
+
+    constexpr LaneBitmask() = default;
+    explicit constexpr LaneBitmask(Type V) : Mask(V) {}
+
+    constexpr bool operator== (LaneBitmask M) const { return Mask == M.Mask; }
+    constexpr bool operator!= (LaneBitmask M) const { return Mask != M.Mask; }
+    constexpr bool operator< (LaneBitmask M)  const { return Mask < M.Mask; }
+    constexpr bool none() const { return Mask == 0; }
+    constexpr bool any()  const { return Mask != 0; }
+    constexpr bool all()  const { return ~Mask == 0; }
+
+    constexpr LaneBitmask operator~() const {
+      return LaneBitmask(~Mask);
+    }
+    constexpr LaneBitmask operator|(LaneBitmask M) const {
+      return LaneBitmask(Mask | M.Mask);
+    }
+    constexpr LaneBitmask operator&(LaneBitmask M) const {
+      return LaneBitmask(Mask & M.Mask);
+    }
+    LaneBitmask &operator|=(LaneBitmask M) {
+      Mask |= M.Mask;
+      return *this;
+    }
+    LaneBitmask &operator&=(LaneBitmask M) {
+      Mask &= M.Mask;
+      return *this;
+    }
+
+    constexpr Type getAsInteger() const { return Mask; }
+
+    static LaneBitmask getNone() { return LaneBitmask(0); }
+    static LaneBitmask getAll()  { return ~LaneBitmask(0); }
+
+  private:
+    Type Mask = 0;
+  };
+
+  /// Create Printable object to print LaneBitmasks on a \ref raw_ostream.
+  static LLVM_ATTRIBUTE_UNUSED Printable PrintLaneMask(LaneBitmask LaneMask) {
+    return Printable([LaneMask](raw_ostream &OS) {
+      OS << format(LaneBitmask::FormatStr, LaneMask.getAsInteger());
+    });
+  }
+}
+
+#endif // LLVM_MC_LANEBITMASK_H
diff --git a/contrib/llvm/include/llvm/MC/MCAsmBackend.h b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
index ce17a2a06758..d4bdbcd2baa3 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmBackend.h
@@ -13,7 +13,6 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/MC/MCDirectives.h"
-#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -21,6 +20,7 @@
 namespace llvm {
 class MCAsmLayout;
 class MCAssembler;
+class MCCFIInstruction;
 class MCELFObjectTargetWriter;
 struct MCFixupKindInfo;
 class MCFragment;
@@ -28,6 +28,7 @@ class MCInst;
 class MCRelaxableFragment;
 class MCObjectWriter;
 class MCSection;
+class MCSubtargetInfo;
 class MCValue;
 class raw_pwrite_stream;
 
diff --git a/contrib/llvm/include/llvm/MC/MCAsmInfo.h b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
index e6ed5688d18d..f898bf5288d6 100644
--- a/contrib/llvm/include/llvm/MC/MCAsmInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmInfo.h
@@ -18,6 +18,7 @@
 
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include <cassert>
 #include <vector>
 
@@ -41,14 +42,6 @@ enum class EncodingType {
 };
 }
 
-enum class ExceptionHandling {
-  None,     /// No exception support
-  DwarfCFI, /// DWARF-like instruction based exceptions
-  SjLj,     /// setjmp/longjmp based exceptions
-  ARM,      /// ARM EHABI
-  WinEH,    /// Windows Exception Handling
-};
-
 namespace LCOMM {
 enum LCOMMType { NoAlignment, ByteAlignment, Log2Alignment };
 }
@@ -92,12 +85,6 @@ protected:
   /// directive for emitting thread local BSS Symbols.  Default is false.
   bool HasMachoTBSSDirective;
 
-  /// True if the compiler should emit a ".reference .constructors_used" or
-  /// ".reference .destructors_used" directive after the static ctor/dtor
-  /// list.  This directive is only emitted in Static relocation model.  Default
-  /// is false.
-  bool HasStaticCtorDtorReferenceInStaticMode;
-
   /// This is the maximum possible length of an instruction, which is needed to
   /// compute the size of an inline asm.  Defaults to 4.
   unsigned MaxInstLength;
@@ -116,7 +103,7 @@ protected:
 
   /// This indicates the comment character used by the assembler.  Defaults to
   /// "#"
-  const char *CommentString;
+  StringRef CommentString;
 
   /// This is appended to emitted labels.  Defaults to ":"
   const char *LabelSuffix;
@@ -130,17 +117,17 @@ protected:
   /// This prefix is used for globals like constant pool entries that are
   /// completely private to the .s file and should not have names in the .o
   /// file.  Defaults to "L"
-  const char *PrivateGlobalPrefix;
+  StringRef PrivateGlobalPrefix;
 
   /// This prefix is used for labels for basic blocks. Defaults to the same as
   /// PrivateGlobalPrefix.
-  const char *PrivateLabelPrefix;
+  StringRef PrivateLabelPrefix;
 
   /// This prefix is used for symbols that should be passed through the
   /// assembler but be removed by the linker.  This is 'l' on Darwin, currently
   /// used for some ObjC metadata.  The default of "" meast that for this system
   /// a plain private symbol should be used.  Defaults to "".
-  const char *LinkerPrivateGlobalPrefix;
+  StringRef LinkerPrivateGlobalPrefix;
 
   /// If these are nonempty, they contain a directive to emit before and after
   /// an inline assembly statement.  Defaults to "#APP\n", "#NO_APP\n"
@@ -206,6 +193,14 @@ protected:
   /// on Alpha.  Defaults to NULL.
   const char *GPRel32Directive;
 
+  /// If non-null, directives that are used to emit a word/dword which should
+  /// be relocated as a 32/64-bit DTP/TP-relative offset, e.g. .dtprelword/
+  /// .dtpreldword/.tprelword/.tpreldword on Mips.
+  const char *DTPRel32Directive = nullptr;
+  const char *DTPRel64Directive = nullptr;
+  const char *TPRel32Directive = nullptr;
+  const char *TPRel64Directive = nullptr;
+
   /// This is true if this target uses "Sun Style" syntax for section switching
   /// ("#alloc,#write" etc) instead of the normal ELF syntax (,"a,w") in
   /// .section directives.  Defaults to false.
@@ -376,6 +371,10 @@ protected:
   // X86_64 ELF.
   bool RelaxELFRelocations = true;
 
+  // If true, then the lexer and expression parser will support %neg(),
+  // %hi(), and similar unary operators.
+  bool HasMipsExpressions = false;
+
 public:
   explicit MCAsmInfo();
   virtual ~MCAsmInfo();
@@ -405,6 +404,10 @@ public:
   const char *getData64bitsDirective() const { return Data64bitsDirective; }
   const char *getGPRel64Directive() const { return GPRel64Directive; }
   const char *getGPRel32Directive() const { return GPRel32Directive; }
+  const char *getDTPRel64Directive() const { return DTPRel64Directive; }
+  const char *getDTPRel32Directive() const { return DTPRel32Directive; }
+  const char *getTPRel64Directive() const { return TPRel64Directive; }
+  const char *getTPRel32Directive() const { return TPRel32Directive; }
 
   /// Targets can implement this method to specify a section to switch to if the
   /// translation unit doesn't have any trampolines that require an executable
@@ -456,9 +459,6 @@ public:
 
   bool hasMachoZeroFillDirective() const { return HasMachoZeroFillDirective; }
   bool hasMachoTBSSDirective() const { return HasMachoTBSSDirective; }
-  bool hasStaticCtorDtorReferenceInStaticMode() const {
-    return HasStaticCtorDtorReferenceInStaticMode;
-  }
   unsigned getMaxInstLength() const { return MaxInstLength; }
   unsigned getMinInstAlignment() const { return MinInstAlignment; }
   bool getDollarIsPC() const { return DollarIsPC; }
@@ -468,17 +468,17 @@ public:
   /// printed.
   unsigned getCommentColumn() const { return 40; }
 
-  const char *getCommentString() const { return CommentString; }
+  StringRef getCommentString() const { return CommentString; }
   const char *getLabelSuffix() const { return LabelSuffix; }
 
   bool useAssignmentForEHBegin() const { return UseAssignmentForEHBegin; }
   bool needsLocalForSize() const { return NeedsLocalForSize; }
-  const char *getPrivateGlobalPrefix() const { return PrivateGlobalPrefix; }
-  const char *getPrivateLabelPrefix() const { return PrivateLabelPrefix; }
+  StringRef getPrivateGlobalPrefix() const { return PrivateGlobalPrefix; }
+  StringRef getPrivateLabelPrefix() const { return PrivateLabelPrefix; }
   bool hasLinkerPrivateGlobalPrefix() const {
     return LinkerPrivateGlobalPrefix[0] != '\0';
   }
-  const char *getLinkerPrivateGlobalPrefix() const {
+  StringRef getLinkerPrivateGlobalPrefix() const {
     if (hasLinkerPrivateGlobalPrefix())
       return LinkerPrivateGlobalPrefix;
     return getPrivateGlobalPrefix();
@@ -598,6 +598,7 @@ public:
 
   bool canRelaxRelocations() const { return RelaxELFRelocations; }
   void setRelaxELFRelocations(bool V) { RelaxELFRelocations = V; }
+  bool hasMipsExpressions() const { return HasMipsExpressions; }
 };
 }
 
diff --git a/contrib/llvm/include/llvm/MC/MCAssembler.h b/contrib/llvm/include/llvm/MC/MCAssembler.h
index aa3b451152df..641e78994768 100644
--- a/contrib/llvm/include/llvm/MC/MCAssembler.h
+++ b/contrib/llvm/include/llvm/MC/MCAssembler.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_MC_MCASSEMBLER_H
 #define LLVM_MC_MCASSEMBLER_H
 
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/ilist.h"
 #include "llvm/ADT/ilist_node.h"
@@ -402,8 +403,7 @@ public:
   ArrayRef<std::string> getFileNames() { return FileNames; }
 
   void addFileName(StringRef FileName) {
-    if (std::find(FileNames.begin(), FileNames.end(), FileName) ==
-        FileNames.end())
+    if (!is_contained(FileNames, FileName))
       FileNames.push_back(FileName);
   }
 
diff --git a/contrib/llvm/include/llvm/MC/MCCodeView.h b/contrib/llvm/include/llvm/MC/MCCodeView.h
index d999ff555997..41521a6549b8 100644
--- a/contrib/llvm/include/llvm/MC/MCCodeView.h
+++ b/contrib/llvm/include/llvm/MC/MCCodeView.h
@@ -25,6 +25,7 @@ namespace llvm {
 class MCContext;
 class MCObjectStreamer;
 class MCStreamer;
+class CodeViewContext;
 
 /// \brief Instances of this class represent the information from a
 /// .cv_loc directive.
@@ -36,8 +37,8 @@ class MCCVLoc {
   uint16_t PrologueEnd : 1;
   uint16_t IsStmt : 1;
 
-private: // MCContext manages these
-  friend class MCContext;
+private: // CodeViewContext manages these
+  friend class CodeViewContext;
   MCCVLoc(unsigned functionid, unsigned fileNum, unsigned line, unsigned column,
           bool prologueend, bool isstmt)
       : FunctionId(functionid), FileNum(fileNum), Line(line), Column(column),
@@ -104,6 +105,55 @@ public:
   static void Make(MCObjectStreamer *MCOS);
 };
 
+/// Information describing a function or inlined call site introduced by
+/// .cv_func_id or .cv_inline_site_id. Accumulates information from .cv_loc
+/// directives used with this function's id or the id of an inlined call site
+/// within this function or inlined call site.
+struct MCCVFunctionInfo {
+  /// If this represents an inlined call site, then ParentFuncIdPlusOne will be
+  /// the parent function id plus one. If this represents a normal function,
+  /// then there is no parent, and ParentFuncIdPlusOne will be FunctionSentinel.
+  /// If this struct is an unallocated slot in the function info vector, then
+  /// ParentFuncIdPlusOne will be zero.
+  unsigned ParentFuncIdPlusOne = 0;
+
+  enum : unsigned { FunctionSentinel = ~0U };
+
+  struct LineInfo {
+    unsigned File;
+    unsigned Line;
+    unsigned Col;
+  };
+
+  LineInfo InlinedAt;
+
+  /// The section of the first .cv_loc directive used for this function, or null
+  /// if none has been seen yet.
+  MCSection *Section = nullptr;
+
+  /// Map from inlined call site id to the inlined at location to use for that
+  /// call site. Call chains are collapsed, so for the call chain 'f -> g -> h',
+  /// the InlinedAtMap of 'f' will contain entries for 'g' and 'h' that both
+  /// list the line info for the 'g' call site.
+  DenseMap<unsigned, LineInfo> InlinedAtMap;
+
+  /// Returns true if this is function info has not yet been used in a
+  /// .cv_func_id or .cv_inline_site_id directive.
+  bool isUnallocatedFunctionInfo() const { return ParentFuncIdPlusOne == 0; }
+
+  /// Returns true if this represents an inlined call site, meaning
+  /// ParentFuncIdPlusOne is neither zero nor ~0U.
+  bool isInlinedCallSite() const {
+    return !isUnallocatedFunctionInfo() &&
+           ParentFuncIdPlusOne != FunctionSentinel;
+  }
+
+  unsigned getParentFuncId() const {
+    assert(isInlinedCallSite());
+    return ParentFuncIdPlusOne - 1;
+  }
+};
+
 /// Holds state from .cv_file and .cv_loc directives for later emission.
 class CodeViewContext {
 public:
@@ -114,6 +164,48 @@ public:
   bool addFile(unsigned FileNumber, StringRef Filename);
   ArrayRef<StringRef> getFilenames() { return Filenames; }
 
+  /// Records the function id of a normal function. Returns false if the
+  /// function id has already been used, and true otherwise.
+  bool recordFunctionId(unsigned FuncId);
+
+  /// Records the function id of an inlined call site. Records the "inlined at"
+  /// location info of the call site, including what function or inlined call
+  /// site it was inlined into. Returns false if the function id has already
+  /// been used, and true otherwise.
+  bool recordInlinedCallSiteId(unsigned FuncId, unsigned IAFunc,
+                               unsigned IAFile, unsigned IALine,
+                               unsigned IACol);
+
+  /// Retreive the function info if this is a valid function id, or nullptr.
+  MCCVFunctionInfo *getCVFunctionInfo(unsigned FuncId) {
+    if (FuncId >= Functions.size())
+      return nullptr;
+    if (Functions[FuncId].isUnallocatedFunctionInfo())
+      return nullptr;
+    return &Functions[FuncId];
+  }
+
+  /// Saves the information from the currently parsed .cv_loc directive
+  /// and sets CVLocSeen.  When the next instruction is assembled an entry
+  /// in the line number table with this information and the address of the
+  /// instruction will be created.
+  void setCurrentCVLoc(unsigned FunctionId, unsigned FileNo, unsigned Line,
+                       unsigned Column, bool PrologueEnd, bool IsStmt) {
+    CurrentCVLoc.setFunctionId(FunctionId);
+    CurrentCVLoc.setFileNum(FileNo);
+    CurrentCVLoc.setLine(Line);
+    CurrentCVLoc.setColumn(Column);
+    CurrentCVLoc.setPrologueEnd(PrologueEnd);
+    CurrentCVLoc.setIsStmt(IsStmt);
+    CVLocSeen = true;
+  }
+  void clearCVLocSeen() { CVLocSeen = false; }
+
+  bool getCVLocSeen() { return CVLocSeen; }
+  const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
+
+  bool isValidCVFileNumber(unsigned FileNumber);
+
   /// \brief Add a line entry.
   void addLineEntry(const MCCVLineEntry &LineEntry) {
     size_t Offset = MCCVLines.size();
@@ -157,10 +249,12 @@ public:
                                 const MCSymbol *FuncBegin,
                                 const MCSymbol *FuncEnd);
 
-  void emitInlineLineTableForFunction(
-      MCObjectStreamer &OS, unsigned PrimaryFunctionId, unsigned SourceFileId,
-      unsigned SourceLineNum, const MCSymbol *FnStartSym,
-      const MCSymbol *FnEndSym, ArrayRef<unsigned> SecondaryFunctionIds);
+  void emitInlineLineTableForFunction(MCObjectStreamer &OS,
+                                      unsigned PrimaryFunctionId,
+                                      unsigned SourceFileId,
+                                      unsigned SourceLineNum,
+                                      const MCSymbol *FnStartSym,
+                                      const MCSymbol *FnEndSym);
 
   /// Encodes the binary annotations once we have a layout.
   void encodeInlineLineTable(MCAsmLayout &Layout,
@@ -180,6 +274,10 @@ public:
   void emitFileChecksums(MCObjectStreamer &OS);
 
 private:
+  /// The current CodeView line information from the last .cv_loc directive.
+  MCCVLoc CurrentCVLoc = MCCVLoc(0, 0, 0, 0, false, true);
+  bool CVLocSeen = false;
+
   /// Map from string to string table offset.
   StringMap<unsigned> StringTable;
 
@@ -204,6 +302,9 @@ private:
 
   /// A collection of MCCVLineEntry for each section.
   std::vector<MCCVLineEntry> MCCVLines;
+
+  /// All known functions and inlined call sites, indexed by function id.
+  std::vector<MCCVFunctionInfo> Functions;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/MC/MCContext.h b/contrib/llvm/include/llvm/MC/MCContext.h
index fe1377e054e8..f846b632f112 100644
--- a/contrib/llvm/include/llvm/MC/MCContext.h
+++ b/contrib/llvm/include/llvm/MC/MCContext.h
@@ -16,12 +16,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SectionKind.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 #include <tuple>
@@ -83,9 +83,9 @@ namespace llvm {
     /// Bindings of names to symbols.
     SymbolTable Symbols;
 
-    /// ELF sections can have a corresponding symbol. This maps one to the
+    /// Sections can have a corresponding symbol. This maps one to the
     /// other.
-    DenseMap<const MCSectionELF *, MCSymbolELF *> SectionSymbols;
+    DenseMap<const MCSection *, MCSymbol *> SectionSymbols;
 
     /// A mapping from a local label number and an instance count to a symbol.
     /// For example, in the assembly
@@ -141,10 +141,6 @@ namespace llvm {
     MCDwarfLoc CurrentDwarfLoc;
     bool DwarfLocSeen;
 
-    /// The current CodeView line information from the last .cv_loc directive.
-    MCCVLoc CurrentCVLoc = MCCVLoc(0, 0, 0, 0, false, true);
-    bool CVLocSeen = false;
-
     /// Generate dwarf debugging info for assembly source files.
     bool GenDwarfForAssembly;
 
@@ -307,6 +303,9 @@ namespace llvm {
     /// Get the symbol for \p Name, or null.
     MCSymbol *lookupSymbol(const Twine &Name) const;
 
+    /// Set value for a symbol.
+    void setSymbolValue(MCStreamer &Streamer, StringRef Sym, uint64_t Val);
+
     /// getSymbols - Get a reference for the symbol table for clients that
     /// want to, for example, iterate over all symbols. 'const' because we
     /// still want any modifications to the table itself to use the MCContext
@@ -528,41 +527,15 @@ namespace llvm {
 
     void setDwarfDebugProducer(StringRef S) { DwarfDebugProducer = S; }
     StringRef getDwarfDebugProducer() { return DwarfDebugProducer; }
-
+    dwarf::DwarfFormat getDwarfFormat() const {
+      // TODO: Support DWARF64
+      return dwarf::DWARF32;
+    }
     void setDwarfVersion(uint16_t v) { DwarfVersion = v; }
     uint16_t getDwarfVersion() const { return DwarfVersion; }
 
     /// @}
 
-
-    /// \name CodeView Management
-    /// @{
-
-    /// Creates an entry in the cv file table.
-    unsigned getCVFile(StringRef FileName, unsigned FileNumber);
-
-    /// Saves the information from the currently parsed .cv_loc directive
-    /// and sets CVLocSeen.  When the next instruction is assembled an entry
-    /// in the line number table with this information and the address of the
-    /// instruction will be created.
-    void setCurrentCVLoc(unsigned FunctionId, unsigned FileNo, unsigned Line,
-                         unsigned Column, bool PrologueEnd, bool IsStmt) {
-      CurrentCVLoc.setFunctionId(FunctionId);
-      CurrentCVLoc.setFileNum(FileNo);
-      CurrentCVLoc.setLine(Line);
-      CurrentCVLoc.setColumn(Column);
-      CurrentCVLoc.setPrologueEnd(PrologueEnd);
-      CurrentCVLoc.setIsStmt(IsStmt);
-      CVLocSeen = true;
-    }
-    void clearCVLocSeen() { CVLocSeen = false; }
-
-    bool getCVLocSeen() { return CVLocSeen; }
-    const MCCVLoc &getCurrentCVLoc() { return CurrentCVLoc; }
-
-    bool isValidCVFileNumber(unsigned FileNumber);
-    /// @}
-
     char *getSecureLogFile() { return SecureLogFile; }
     raw_fd_ostream *getSecureLog() { return SecureLog.get(); }
     bool getSecureLogUsed() { return SecureLogUsed; }
@@ -612,7 +585,7 @@ namespace llvm {
 ///                  allocator supports it).
 /// \return The allocated memory. Could be NULL.
 inline void *operator new(size_t Bytes, llvm::MCContext &C,
-                          size_t Alignment = 8) LLVM_NOEXCEPT {
+                          size_t Alignment = 8) noexcept {
   return C.allocate(Bytes, Alignment);
 }
 /// \brief Placement delete companion to the new above.
@@ -621,8 +594,7 @@ inline void *operator new(size_t Bytes, llvm::MCContext &C,
 /// invoking it directly; see the new operator for more details. This operator
 /// is called implicitly by the compiler if a placement new expression using
 /// the MCContext throws in the object constructor.
-inline void operator delete(void *Ptr, llvm::MCContext &C,
-                            size_t) LLVM_NOEXCEPT {
+inline void operator delete(void *Ptr, llvm::MCContext &C, size_t) noexcept {
   C.deallocate(Ptr);
 }
 
@@ -646,7 +618,7 @@ inline void operator delete(void *Ptr, llvm::MCContext &C,
 ///                  allocator supports it).
 /// \return The allocated memory. Could be NULL.
 inline void *operator new[](size_t Bytes, llvm::MCContext &C,
-                            size_t Alignment = 8) LLVM_NOEXCEPT {
+                            size_t Alignment = 8) noexcept {
   return C.allocate(Bytes, Alignment);
 }
 
@@ -656,7 +628,7 @@ inline void *operator new[](size_t Bytes, llvm::MCContext &C,
 /// invoking it directly; see the new[] operator for more details. This operator
 /// is called implicitly by the compiler if a placement new[] expression using
 /// the MCContext throws in the object constructor.
-inline void operator delete[](void *Ptr, llvm::MCContext &C) LLVM_NOEXCEPT {
+inline void operator delete[](void *Ptr, llvm::MCContext &C) noexcept {
   C.deallocate(Ptr);
 }
 
diff --git a/contrib/llvm/include/llvm/MC/MCELFStreamer.h b/contrib/llvm/include/llvm/MC/MCELFStreamer.h
index b108f0df52b6..a5c263844352 100644
--- a/contrib/llvm/include/llvm/MC/MCELFStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCELFStreamer.h
@@ -57,7 +57,7 @@ public:
   void EmitCOFFSymbolType(int Type) override;
   void EndCOFFSymbolDef() override;
 
-  void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) override;
+  void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
 
   void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                              unsigned ByteAlignment) override;
diff --git a/contrib/llvm/include/llvm/MC/MCExpr.h b/contrib/llvm/include/llvm/MC/MCExpr.h
index b0e4736565b0..0d1e675da459 100644
--- a/contrib/llvm/include/llvm/MC/MCExpr.h
+++ b/contrib/llvm/include/llvm/MC/MCExpr.h
@@ -266,6 +266,11 @@ public:
 
     VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
 
+    VK_AMDGPU_GOTPCREL32_LO, // symbol@gotpcrel32@lo
+    VK_AMDGPU_GOTPCREL32_HI, // symbol@gotpcrel32@hi
+    VK_AMDGPU_REL32_LO,      // symbol@rel32@lo
+    VK_AMDGPU_REL32_HI,      // symbol@rel32@hi
+
     VK_TPREL,
     VK_DTPREL
   };
diff --git a/contrib/llvm/include/llvm/MC/MCFixup.h b/contrib/llvm/include/llvm/MC/MCFixup.h
index 8ab477c401a1..b493ca0b0ea7 100644
--- a/contrib/llvm/include/llvm/MC/MCFixup.h
+++ b/contrib/llvm/include/llvm/MC/MCFixup.h
@@ -33,6 +33,10 @@ enum MCFixupKind {
   FK_GPRel_2,    ///< A two-byte gp relative fixup.
   FK_GPRel_4,    ///< A four-byte gp relative fixup.
   FK_GPRel_8,    ///< A eight-byte gp relative fixup.
+  FK_DTPRel_4,   ///< A four-byte dtp relative fixup.
+  FK_DTPRel_8,   ///< A eight-byte dtp relative fixup.
+  FK_TPRel_4,    ///< A four-byte tp relative fixup.
+  FK_TPRel_8,    ///< A eight-byte tp relative fixup.
   FK_SecRel_1,   ///< A one-byte section relative fixup.
   FK_SecRel_2,   ///< A two-byte section relative fixup.
   FK_SecRel_4,   ///< A four-byte section relative fixup.
diff --git a/contrib/llvm/include/llvm/MC/MCFragment.h b/contrib/llvm/include/llvm/MC/MCFragment.h
index e0a2bfc23747..edb740f36d91 100644
--- a/contrib/llvm/include/llvm/MC/MCFragment.h
+++ b/contrib/llvm/include/llvm/MC/MCFragment.h
@@ -25,6 +25,7 @@ class MCSubtargetInfo;
 class MCFragment : public ilist_node_with_parent<MCFragment, MCSection> {
   friend class MCAsmLayout;
 
+  MCFragment() = delete;
   MCFragment(const MCFragment &) = delete;
   void operator=(const MCFragment &) = delete;
 
@@ -83,11 +84,6 @@ protected:
              uint8_t BundlePadding, MCSection *Parent = nullptr);
 
   ~MCFragment();
-private:
-
-  // This is a friend so that the sentinal can be created.
-  friend struct ilist_sentinel_traits<MCFragment>;
-  MCFragment();
 
 public:
   /// Destroys the current fragment.
@@ -350,9 +346,13 @@ class MCOrgFragment : public MCFragment {
   /// Value - Value to use for filling bytes.
   int8_t Value;
 
+  /// Loc - Source location of the directive that this fragment was created for.
+  SMLoc Loc;
+
 public:
-  MCOrgFragment(const MCExpr &Offset, int8_t Value, MCSection *Sec = nullptr)
-      : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value) {}
+  MCOrgFragment(const MCExpr &Offset, int8_t Value, SMLoc Loc,
+                MCSection *Sec = nullptr)
+      : MCFragment(FT_Org, false, 0, Sec), Offset(&Offset), Value(Value), Loc(Loc) {}
 
   /// \name Accessors
   /// @{
@@ -361,6 +361,8 @@ public:
 
   uint8_t getValue() const { return Value; }
 
+  SMLoc getLoc() const { return Loc; }
+
   /// @}
 
   static bool classof(const MCFragment *F) {
@@ -495,7 +497,6 @@ class MCCVInlineLineTableFragment : public MCFragment {
   unsigned StartLineNum;
   const MCSymbol *FnStartSym;
   const MCSymbol *FnEndSym;
-  SmallVector<unsigned, 3> SecondaryFuncs;
   SmallString<8> Contents;
 
   /// CodeViewContext has the real knowledge about this format, so let it access
@@ -506,12 +507,10 @@ public:
   MCCVInlineLineTableFragment(unsigned SiteFuncId, unsigned StartFileId,
                               unsigned StartLineNum, const MCSymbol *FnStartSym,
                               const MCSymbol *FnEndSym,
-                              ArrayRef<unsigned> SecondaryFuncs,
                               MCSection *Sec = nullptr)
       : MCFragment(FT_CVInlineLines, false, 0, Sec), SiteFuncId(SiteFuncId),
         StartFileId(StartFileId), StartLineNum(StartLineNum),
-        FnStartSym(FnStartSym), FnEndSym(FnEndSym),
-        SecondaryFuncs(SecondaryFuncs.begin(), SecondaryFuncs.end()) {}
+        FnStartSym(FnStartSym), FnEndSym(FnEndSym) {}
 
   /// \name Accessors
   /// @{
diff --git a/contrib/llvm/include/llvm/MC/MCInstPrinter.h b/contrib/llvm/include/llvm/MC/MCInstPrinter.h
index 2119c5a633b4..320b280cc756 100644
--- a/contrib/llvm/include/llvm/MC/MCInstPrinter.h
+++ b/contrib/llvm/include/llvm/MC/MCInstPrinter.h
@@ -10,10 +10,11 @@
 #ifndef LLVM_MC_MCINSTPRINTER_H
 #define LLVM_MC_MCINSTPRINTER_H
 
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Format.h"
+#include <cstdint>
 
 namespace llvm {
+
 template <typename T> class ArrayRef;
 class MCInst;
 class raw_ostream;
@@ -27,11 +28,13 @@ class StringRef;
 void dumpBytes(ArrayRef<uint8_t> Bytes, raw_ostream &OS);
 
 namespace HexStyle {
+
 enum Style {
   C,  ///< 0xff
   Asm ///< 0ffh
 };
-}
+
+} // end namespace HexStyle
 
 /// \brief This is an instance of a target assembly language printer that
 /// converts an MCInst to valid target assembly syntax.
@@ -60,8 +63,8 @@ protected:
 public:
   MCInstPrinter(const MCAsmInfo &mai, const MCInstrInfo &mii,
                 const MCRegisterInfo &mri)
-      : CommentStream(nullptr), MAI(mai), MII(mii), MRI(mri), UseMarkup(0),
-        PrintImmHex(0), PrintHexStyle(HexStyle::C) {}
+      : CommentStream(nullptr), MAI(mai), MII(mii), MRI(mri), UseMarkup(false),
+        PrintImmHex(false), PrintHexStyle(HexStyle::C) {}
 
   virtual ~MCInstPrinter();
 
@@ -103,6 +106,6 @@ public:
   format_object<uint64_t> formatHex(uint64_t Value) const;
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_MC_MCINSTPRINTER_H
diff --git a/contrib/llvm/include/llvm/MC/MCInstrDesc.h b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
index 88aab73d4058..340d8253b8c9 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrDesc.h
@@ -47,8 +47,22 @@ enum OperandType {
   OPERAND_REGISTER = 2,
   OPERAND_MEMORY = 3,
   OPERAND_PCREL = 4,
-  OPERAND_FIRST_TARGET = 5
+
+  OPERAND_FIRST_GENERIC = 6,
+  OPERAND_GENERIC_0 = 6,
+  OPERAND_GENERIC_1 = 7,
+  OPERAND_GENERIC_2 = 8,
+  OPERAND_GENERIC_3 = 9,
+  OPERAND_GENERIC_4 = 10,
+  OPERAND_GENERIC_5 = 11,
+  OPERAND_LAST_GENERIC = 11,
+
+  OPERAND_FIRST_TARGET = 12,
 };
+
+enum GenericOperandType {
+};
+
 }
 
 /// \brief This holds information about one operand of a machine instruction,
@@ -83,6 +97,16 @@ public:
 
   /// \brief Set if this operand is a optional def.
   bool isOptionalDef() const { return Flags & (1 << MCOI::OptionalDef); }
+
+  bool isGenericType() const {
+    return OperandType >= MCOI::OPERAND_FIRST_GENERIC &&
+           OperandType <= MCOI::OPERAND_LAST_GENERIC;
+  }
+
+  unsigned getGenericTypeIndex() const {
+    assert(isGenericType() && "non-generic types don't have an index");
+    return OperandType - MCOI::OPERAND_FIRST_GENERIC;
+  }
 };
 
 //===----------------------------------------------------------------------===//
@@ -126,7 +150,8 @@ enum Flag {
   RegSequence,
   ExtractSubreg,
   InsertSubreg,
-  Convergent
+  Convergent,
+  Add
 };
 }
 
@@ -191,32 +216,35 @@ public:
   unsigned getNumDefs() const { return NumDefs; }
 
   /// \brief Return flags of this instruction.
-  unsigned getFlags() const { return Flags; }
+  uint64_t getFlags() const { return Flags; }
 
   /// \brief Return true if this instruction can have a variable number of
   /// operands.  In this case, the variable operands will be after the normal
   /// operands but before the implicit definitions and uses (if any are
   /// present).
-  bool isVariadic() const { return Flags & (1 << MCID::Variadic); }
+  bool isVariadic() const { return Flags & (1ULL << MCID::Variadic); }
 
   /// \brief Set if this instruction has an optional definition, e.g.
   /// ARM instructions which can set condition code if 's' bit is set.
-  bool hasOptionalDef() const { return Flags & (1 << MCID::HasOptionalDef); }
+  bool hasOptionalDef() const { return Flags & (1ULL << MCID::HasOptionalDef); }
 
   /// \brief Return true if this is a pseudo instruction that doesn't
   /// correspond to a real machine instruction.
-  bool isPseudo() const { return Flags & (1 << MCID::Pseudo); }
+  bool isPseudo() const { return Flags & (1ULL << MCID::Pseudo); }
 
   /// \brief Return true if the instruction is a return.
-  bool isReturn() const { return Flags & (1 << MCID::Return); }
+  bool isReturn() const { return Flags & (1ULL << MCID::Return); }
+
+  /// \brief Return true if the instruction is an add instruction.
+  bool isAdd() const { return Flags & (1ULL << MCID::Add); }
 
   /// \brief  Return true if the instruction is a call.
-  bool isCall() const { return Flags & (1 << MCID::Call); }
+  bool isCall() const { return Flags & (1ULL << MCID::Call); }
 
   /// \brief Returns true if the specified instruction stops control flow
   /// from executing the instruction immediately following it.  Examples include
   /// unconditional branches and return instructions.
-  bool isBarrier() const { return Flags & (1 << MCID::Barrier); }
+  bool isBarrier() const { return Flags & (1ULL << MCID::Barrier); }
 
   /// \brief Returns true if this instruction part of the terminator for
   /// a basic block.  Typically this is things like return and branch
@@ -224,17 +252,17 @@ public:
   ///
   /// Various passes use this to insert code into the bottom of a basic block,
   /// but before control flow occurs.
-  bool isTerminator() const { return Flags & (1 << MCID::Terminator); }
+  bool isTerminator() const { return Flags & (1ULL << MCID::Terminator); }
 
   /// \brief Returns true if this is a conditional, unconditional, or
   /// indirect branch.  Predicates below can be used to discriminate between
   /// these cases, and the TargetInstrInfo::AnalyzeBranch method can be used to
   /// get more information.
-  bool isBranch() const { return Flags & (1 << MCID::Branch); }
+  bool isBranch() const { return Flags & (1ULL << MCID::Branch); }
 
   /// \brief Return true if this is an indirect branch, such as a
   /// branch through a register.
-  bool isIndirectBranch() const { return Flags & (1 << MCID::IndirectBranch); }
+  bool isIndirectBranch() const { return Flags & (1ULL << MCID::IndirectBranch); }
 
   /// \brief Return true if this is a branch which may fall
   /// through to the next instruction or may transfer control flow to some other
@@ -261,29 +289,29 @@ public:
   /// that controls execution. It may be set to 'always', or may be set to other
   /// values. There are various methods in TargetInstrInfo that can be used to
   /// control and modify the predicate in this instruction.
-  bool isPredicable() const { return Flags & (1 << MCID::Predicable); }
+  bool isPredicable() const { return Flags & (1ULL << MCID::Predicable); }
 
   /// \brief Return true if this instruction is a comparison.
-  bool isCompare() const { return Flags & (1 << MCID::Compare); }
+  bool isCompare() const { return Flags & (1ULL << MCID::Compare); }
 
   /// \brief Return true if this instruction is a move immediate
   /// (including conditional moves) instruction.
-  bool isMoveImmediate() const { return Flags & (1 << MCID::MoveImm); }
+  bool isMoveImmediate() const { return Flags & (1ULL << MCID::MoveImm); }
 
   /// \brief Return true if this instruction is a bitcast instruction.
-  bool isBitcast() const { return Flags & (1 << MCID::Bitcast); }
+  bool isBitcast() const { return Flags & (1ULL << MCID::Bitcast); }
 
   /// \brief Return true if this is a select instruction.
-  bool isSelect() const { return Flags & (1 << MCID::Select); }
+  bool isSelect() const { return Flags & (1ULL << MCID::Select); }
 
   /// \brief Return true if this instruction cannot be safely
   /// duplicated.  For example, if the instruction has a unique labels attached
   /// to it, duplicating it would cause multiple definition errors.
-  bool isNotDuplicable() const { return Flags & (1 << MCID::NotDuplicable); }
+  bool isNotDuplicable() const { return Flags & (1ULL << MCID::NotDuplicable); }
 
   /// \brief Returns true if the specified instruction has a delay slot which
   /// must be filled by the code generator.
-  bool hasDelaySlot() const { return Flags & (1 << MCID::DelaySlot); }
+  bool hasDelaySlot() const { return Flags & (1ULL << MCID::DelaySlot); }
 
   /// \brief Return true for instructions that can be folded as memory operands
   /// in other instructions. The most common use for this is instructions that
@@ -292,7 +320,7 @@ public:
   /// constant-pool loads, such as V_SETALLONES on x86, to allow them to be
   /// folded when it is beneficial.  This should only be set on instructions
   /// that return a value in their only virtual register definition.
-  bool canFoldAsLoad() const { return Flags & (1 << MCID::FoldableAsLoad); }
+  bool canFoldAsLoad() const { return Flags & (1ULL << MCID::FoldableAsLoad); }
 
   /// \brief Return true if this instruction behaves
   /// the same way as the generic REG_SEQUENCE instructions.
@@ -304,7 +332,7 @@ public:
   /// Note that for the optimizers to be able to take advantage of
   /// this property, TargetInstrInfo::getRegSequenceLikeInputs has to be
   /// override accordingly.
-  bool isRegSequenceLike() const { return Flags & (1 << MCID::RegSequence); }
+  bool isRegSequenceLike() const { return Flags & (1ULL << MCID::RegSequence); }
 
   /// \brief Return true if this instruction behaves
   /// the same way as the generic EXTRACT_SUBREG instructions.
@@ -318,7 +346,7 @@ public:
   /// this property, TargetInstrInfo::getExtractSubregLikeInputs has to be
   /// override accordingly.
   bool isExtractSubregLike() const {
-    return Flags & (1 << MCID::ExtractSubreg);
+    return Flags & (1ULL << MCID::ExtractSubreg);
   }
 
   /// \brief Return true if this instruction behaves
@@ -331,14 +359,14 @@ public:
   /// Note that for the optimizers to be able to take advantage of
   /// this property, TargetInstrInfo::getInsertSubregLikeInputs has to be
   /// override accordingly.
-  bool isInsertSubregLike() const { return Flags & (1 << MCID::InsertSubreg); }
+  bool isInsertSubregLike() const { return Flags & (1ULL << MCID::InsertSubreg); }
 
 
   /// \brief Return true if this instruction is convergent.
   ///
   /// Convergent instructions may not be made control-dependent on any
   /// additional values.
-  bool isConvergent() const { return Flags & (1 << MCID::Convergent); }
+  bool isConvergent() const { return Flags & (1ULL << MCID::Convergent); }
 
   //===--------------------------------------------------------------------===//
   // Side Effect Analysis
@@ -347,13 +375,13 @@ public:
   /// \brief Return true if this instruction could possibly read memory.
   /// Instructions with this flag set are not necessarily simple load
   /// instructions, they may load a value and modify it, for example.
-  bool mayLoad() const { return Flags & (1 << MCID::MayLoad); }
+  bool mayLoad() const { return Flags & (1ULL << MCID::MayLoad); }
 
   /// \brief Return true if this instruction could possibly modify memory.
   /// Instructions with this flag set are not necessarily simple store
   /// instructions, they may store a modified value based on their operands, or
   /// may not actually modify anything, for example.
-  bool mayStore() const { return Flags & (1 << MCID::MayStore); }
+  bool mayStore() const { return Flags & (1ULL << MCID::MayStore); }
 
   /// \brief Return true if this instruction has side
   /// effects that are not modeled by other flags.  This does not return true
@@ -368,7 +396,7 @@ public:
   /// a control register, flushing a cache, modifying a register invisible to
   /// LLVM, etc.
   bool hasUnmodeledSideEffects() const {
-    return Flags & (1 << MCID::UnmodeledSideEffects);
+    return Flags & (1ULL << MCID::UnmodeledSideEffects);
   }
 
   //===--------------------------------------------------------------------===//
@@ -385,7 +413,7 @@ public:
   /// sometimes.  In these cases, the call to commuteInstruction will fail.
   /// Also note that some instructions require non-trivial modification to
   /// commute them.
-  bool isCommutable() const { return Flags & (1 << MCID::Commutable); }
+  bool isCommutable() const { return Flags & (1ULL << MCID::Commutable); }
 
   /// \brief Return true if this is a 2-address instruction which can be changed
   /// into a 3-address instruction if needed.  Doing this transformation can be
@@ -402,7 +430,7 @@ public:
   /// instruction (e.g. shl reg, 4 on x86).
   ///
   bool isConvertibleTo3Addr() const {
-    return Flags & (1 << MCID::ConvertibleTo3Addr);
+    return Flags & (1ULL << MCID::ConvertibleTo3Addr);
   }
 
   /// \brief Return true if this instruction requires custom insertion support
@@ -414,14 +442,14 @@ public:
   /// If this is true, the TargetLoweringInfo::InsertAtEndOfBasicBlock method
   /// is used to insert this into the MachineBasicBlock.
   bool usesCustomInsertionHook() const {
-    return Flags & (1 << MCID::UsesCustomInserter);
+    return Flags & (1ULL << MCID::UsesCustomInserter);
   }
 
   /// \brief Return true if this instruction requires *adjustment* after
   /// instruction selection by calling a target hook. For example, this can be
   /// used to fill in ARM 's' optional operand depending on whether the
   /// conditional flag register is used.
-  bool hasPostISelHook() const { return Flags & (1 << MCID::HasPostISelHook); }
+  bool hasPostISelHook() const { return Flags & (1ULL << MCID::HasPostISelHook); }
 
   /// \brief Returns true if this instruction is a candidate for remat. This
   /// flag is only used in TargetInstrInfo method isTriviallyRematerializable.
@@ -430,7 +458,7 @@ public:
   /// or isReallyTriviallyReMaterializableGeneric methods are called to verify
   /// the instruction is really rematable.
   bool isRematerializable() const {
-    return Flags & (1 << MCID::Rematerializable);
+    return Flags & (1ULL << MCID::Rematerializable);
   }
 
   /// \brief Returns true if this instruction has the same cost (or less) than a
@@ -442,7 +470,7 @@ public:
   ///
   /// This method could be called by interface TargetInstrInfo::isAsCheapAsAMove
   /// for different subtargets.
-  bool isAsCheapAsAMove() const { return Flags & (1 << MCID::CheapAsAMove); }
+  bool isAsCheapAsAMove() const { return Flags & (1ULL << MCID::CheapAsAMove); }
 
   /// \brief Returns true if this instruction source operands have special
   /// register allocation requirements that are not captured by the operand
@@ -451,7 +479,7 @@ public:
   /// allocation passes should not attempt to change allocations for sources of
   /// instructions with this flag.
   bool hasExtraSrcRegAllocReq() const {
-    return Flags & (1 << MCID::ExtraSrcRegAllocReq);
+    return Flags & (1ULL << MCID::ExtraSrcRegAllocReq);
   }
 
   /// \brief Returns true if this instruction def operands have special register
@@ -461,7 +489,7 @@ public:
   /// allocation passes should not attempt to change allocations for definitions
   /// of instructions with this flag.
   bool hasExtraDefRegAllocReq() const {
-    return Flags & (1 << MCID::ExtraDefRegAllocReq);
+    return Flags & (1ULL << MCID::ExtraDefRegAllocReq);
   }
 
   /// \brief Return a list of registers that are potentially read by any
diff --git a/contrib/llvm/include/llvm/MC/MCInstrInfo.h b/contrib/llvm/include/llvm/MC/MCInstrInfo.h
index 70c86587b08c..80f1f320b7c2 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrInfo.h
@@ -48,9 +48,9 @@ public:
   }
 
   /// \brief Returns the name for the instructions with the given opcode.
-  const char *getName(unsigned Opcode) const {
+  StringRef getName(unsigned Opcode) const {
     assert(Opcode < NumOpcodes && "Invalid opcode!");
-    return &InstrNameData[InstrNameIndices[Opcode]];
+    return StringRef(&InstrNameData[InstrNameIndices[Opcode]]);
   }
 };
 
diff --git a/contrib/llvm/include/llvm/MC/MCInstrItineraries.h b/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
index b2871a9805e1..1fb276a302b9 100644
--- a/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
+++ b/contrib/llvm/include/llvm/MC/MCInstrItineraries.h
@@ -111,7 +111,7 @@ public:
   MCSchedModel          SchedModel;     ///< Basic machine properties.
   const InstrStage     *Stages;         ///< Array of stages selected
   const unsigned       *OperandCycles;  ///< Array of operand cycles selected
-  const unsigned       *Forwardings;    ///< Array of pipeline forwarding pathes
+  const unsigned       *Forwardings;    ///< Array of pipeline forwarding paths
   const InstrItinerary *Itineraries;    ///< Array of itineraries selected
 
   /// Ctors.
diff --git a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
index cef4e5b3eb93..9aa8812c7bb3 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectFileInfo.h
@@ -65,12 +65,6 @@ protected:
   /// constants.
   MCSection *ReadOnlySection;
 
-  /// This section contains the static constructor pointer list.
-  MCSection *StaticCtorSection;
-
-  /// This section contains the static destructor pointer list.
-  MCSection *StaticDtorSection;
-
   /// If exception handling is supported by the target, this is the section the
   /// Language Specific Data Area information is emitted to.
   MCSection *LSDASection;
diff --git a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
index d7775f27868c..f9111b7f47ea 100644
--- a/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -112,7 +112,8 @@ public:
                             unsigned MaxBytesToEmit = 0) override;
   void EmitCodeAlignment(unsigned ByteAlignment,
                          unsigned MaxBytesToEmit = 0) override;
-  void emitValueToOffset(const MCExpr *Offset, unsigned char Value) override;
+  void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
+                         SMLoc Loc) override;
   void EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                              unsigned Column, unsigned Flags,
                              unsigned Isa, unsigned Discriminator,
@@ -124,18 +125,23 @@ public:
                                  const MCSymbol *Label);
   void EmitCVLocDirective(unsigned FunctionId, unsigned FileNo, unsigned Line,
                           unsigned Column, bool PrologueEnd, bool IsStmt,
-                          StringRef FileName) override;
+                          StringRef FileName, SMLoc Loc) override;
   void EmitCVLinetableDirective(unsigned FunctionId, const MCSymbol *Begin,
                                 const MCSymbol *End) override;
-  void EmitCVInlineLinetableDirective(
-      unsigned PrimaryFunctionId, unsigned SourceFileId, unsigned SourceLineNum,
-      const MCSymbol *FnStartSym, const MCSymbol *FnEndSym,
-      ArrayRef<unsigned> SecondaryFunctionIds) override;
+  void EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
+                                      unsigned SourceFileId,
+                                      unsigned SourceLineNum,
+                                      const MCSymbol *FnStartSym,
+                                      const MCSymbol *FnEndSym) override;
   void EmitCVDefRangeDirective(
       ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
       StringRef FixedSizePortion) override;
   void EmitCVStringTableDirective() override;
   void EmitCVFileChecksumsDirective() override;
+  void EmitDTPRel32Value(const MCExpr *Value) override;
+  void EmitDTPRel64Value(const MCExpr *Value) override;
+  void EmitTPRel32Value(const MCExpr *Value) override;
+  void EmitTPRel64Value(const MCExpr *Value) override;
   void EmitGPRel32Value(const MCExpr *Value) override;
   void EmitGPRel64Value(const MCExpr *Value) override;
   bool EmitRelocDirective(const MCExpr &Offset, StringRef Name,
diff --git a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
index c779121b6cf0..029598c013d3 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/AsmLexer.h
@@ -31,7 +31,8 @@ class AsmLexer : public MCAsmLexer {
   StringRef CurBuf;
   bool IsAtStartOfLine;
   bool IsAtStartOfStatement;
-
+  bool IsParsingMSInlineAsm;
+  bool IsPeeking;
   void operator=(const AsmLexer&) = delete;
   AsmLexer(const AsmLexer&) = delete;
 
@@ -44,6 +45,7 @@ public:
   ~AsmLexer() override;
 
   void setBuffer(StringRef Buf, const char *ptr = nullptr);
+  void setParsingMSInlineAsm(bool V) { IsParsingMSInlineAsm = V; }
 
   StringRef LexUntilEndOfStatement() override;
 
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
index 3dd22c93d363..56da6f85c199 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmLexer.h
@@ -55,7 +55,15 @@ public:
     Pipe, PipePipe, Caret,
     Amp, AmpAmp, Exclaim, ExclaimEqual, Percent, Hash,
     Less, LessEqual, LessLess, LessGreater,
-    Greater, GreaterEqual, GreaterGreater, At
+    Greater, GreaterEqual, GreaterGreater, At,
+
+    // MIPS unary expression operators such as %neg.
+    PercentCall16, PercentCall_Hi, PercentCall_Lo, PercentDtprel_Hi,
+    PercentDtprel_Lo, PercentGot, PercentGot_Disp, PercentGot_Hi, PercentGot_Lo,
+    PercentGot_Ofst, PercentGot_Page, PercentGottprel, PercentGp_Rel, PercentHi,
+    PercentHigher, PercentHighest, PercentLo, PercentNeg, PercentPcrel_Hi,
+    PercentPcrel_Lo, PercentTlsgd, PercentTlsldm, PercentTprel_Hi,
+    PercentTprel_Lo
   };
 
 private:
@@ -120,6 +128,20 @@ public:
   }
 };
 
+/// A callback class which is notified of each comment in an assembly file as
+/// it is lexed.
+class AsmCommentConsumer {
+public:
+  virtual ~AsmCommentConsumer() {};
+
+  /// Callback function for when a comment is lexed. Loc is the start of the
+  /// comment text (excluding the comment-start marker). CommentText is the text
+  /// of the comment, excluding the comment start and end markers, and the
+  /// newline for single-line comments.
+  virtual void HandleComment(SMLoc Loc, StringRef CommentText) = 0;
+};
+
+
 /// Generic assembler lexer interface, for use by target specific assembly
 /// lexers.
 class MCAsmLexer {
@@ -136,6 +158,8 @@ protected: // Can only create subclasses.
   const char *TokStart;
   bool SkipSpace;
   bool AllowAtInIdentifier;
+  bool IsAtStartOfStatement;
+  AsmCommentConsumer *CommentConsumer;
 
   MCAsmLexer();
 
@@ -155,6 +179,8 @@ public:
   /// the main input file has been reached.
   const AsmToken &Lex() {
     assert(!CurTok.empty());
+    // Mark if we parsing out a EndOfStatement.
+    IsAtStartOfStatement = CurTok.front().getKind() == AsmToken::EndOfStatement;
     CurTok.erase(CurTok.begin());
     // LexToken may generate multiple tokens via UnLex but will always return
     // the first one. Place returned value at head of CurTok vector.
@@ -166,9 +192,12 @@ public:
   }
 
   void UnLex(AsmToken const &Token) {
+    IsAtStartOfStatement = false;
     CurTok.insert(CurTok.begin(), Token);
   }
 
+  bool isAtStartOfStatement() { return IsAtStartOfStatement; }
+
   virtual StringRef LexUntilEndOfStatement() = 0;
 
   /// Get the current source location.
@@ -220,6 +249,10 @@ public:
 
   bool getAllowAtInIdentifier() { return AllowAtInIdentifier; }
   void setAllowAtInIdentifier(bool v) { AllowAtInIdentifier = v; }
+
+  void setCommentConsumer(AsmCommentConsumer *CommentConsumer) {
+    this->CommentConsumer = CommentConsumer;
+  }
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index ac8706d99505..eb85a3a30963 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -11,7 +11,9 @@
 #define LLVM_MC_MCPARSER_MCASMPARSER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
 #include "llvm/Support/DataTypes.h"
 
@@ -67,6 +69,12 @@ public:
   typedef std::pair<MCAsmParserExtension*, DirectiveHandler>
     ExtensionDirectiveHandler;
 
+  struct MCPendingError {
+    SMLoc Loc;
+    SmallString<64> Msg;
+    SMRange Range;
+  };
+
 private:
   MCAsmParser(const MCAsmParser &) = delete;
   void operator=(const MCAsmParser &) = delete;
@@ -78,6 +86,11 @@ private:
 protected: // Can only create subclasses.
   MCAsmParser();
 
+  bool HadError;
+
+  SmallVector<MCPendingError, 1> PendingErrors;
+  /// Flag tracking whether any errors have been encountered.
+
 public:
   virtual ~MCAsmParser();
 
@@ -122,21 +135,38 @@ public:
       const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) = 0;
 
   /// \brief Emit a note at the location \p L, with the message \p Msg.
-  virtual void Note(SMLoc L, const Twine &Msg,
-                    ArrayRef<SMRange> Ranges = None) = 0;
+  virtual void Note(SMLoc L, const Twine &Msg, SMRange Range = None) = 0;
 
   /// \brief Emit a warning at the location \p L, with the message \p Msg.
   ///
   /// \return The return value is true, if warnings are fatal.
-  virtual bool Warning(SMLoc L, const Twine &Msg,
-                       ArrayRef<SMRange> Ranges = None) = 0;
+  virtual bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) = 0;
+
+  /// \brief Return an error at the location \p L, with the message \p Msg. This
+  /// may be modified before being emitted.
+  ///
+  /// \return The return value is always true, as an idiomatic convenience to
+  /// clients.
+  bool Error(SMLoc L, const Twine &Msg, SMRange Range = None);
 
   /// \brief Emit an error at the location \p L, with the message \p Msg.
   ///
   /// \return The return value is always true, as an idiomatic convenience to
   /// clients.
-  virtual bool Error(SMLoc L, const Twine &Msg,
-                     ArrayRef<SMRange> Ranges = None) = 0;
+  virtual bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) = 0;
+
+  bool hasPendingError() { return !PendingErrors.empty(); }
+
+  bool printPendingErrors() {
+    bool rv = !PendingErrors.empty();
+    for (auto Err : PendingErrors) {
+      printError(Err.Loc, Twine(Err.Msg), Err.Range);
+    }
+    PendingErrors.clear();
+    return rv;
+  }
+
+  bool addErrorSuffix(const Twine &Suffix);
 
   /// \brief Get the next AsmToken in the stream, possibly handling file
   /// inclusion first.
@@ -146,7 +176,22 @@ public:
   const AsmToken &getTok() const;
 
   /// \brief Report an error at the current lexer location.
-  bool TokError(const Twine &Msg, ArrayRef<SMRange> Ranges = None);
+  bool TokError(const Twine &Msg, SMRange Range = None);
+
+  bool parseTokenLoc(SMLoc &Loc);
+  bool parseToken(AsmToken::TokenKind T, const Twine &Msg = "unexpected token");
+  /// \brief Attempt to parse and consume token, returning true on
+  /// success.
+  bool parseOptionalToken(AsmToken::TokenKind T);
+
+  bool parseEOL(const Twine &ErrMsg);
+
+  bool parseMany(std::function<bool()> parseOne, bool hasComma = true);
+
+  bool parseIntToken(int64_t &V, const Twine &ErrMsg);
+
+  bool check(bool P, const llvm::Twine &Msg);
+  bool check(bool P, SMLoc Loc, const llvm::Twine &Msg);
 
   /// \brief Parse an identifier or string (as a quoted identifier) and set \p
   /// Res to the identifier contents.
@@ -196,7 +241,8 @@ public:
 
   /// \brief Ensure that we have a valid section set in the streamer. Otherwise,
   /// report an error and switch to .text.
-  virtual void checkForValidSection() = 0;
+  /// \return - False on success.
+  virtual bool checkForValidSection() = 0;
 
   /// \brief Parse an arbitrary expression of a specified parenthesis depth,
   /// assuming that the initial '(' characters have already been consumed.
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
index 30b25dcfdaec..dabda0ab9485 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCAsmParserExtension.h
@@ -68,8 +68,8 @@ public:
   bool Warning(SMLoc L, const Twine &Msg) {
     return getParser().Warning(L, Msg);
   }
-  bool Error(SMLoc L, const Twine &Msg) {
-    return getParser().Error(L, Msg);
+  bool Error(SMLoc L, const Twine &Msg, SMRange Range = SMRange()) {
+    return getParser().Error(L, Msg, Range);
   }
   void Note(SMLoc L, const Twine &Msg) {
     getParser().Note(L, Msg);
@@ -79,8 +79,31 @@ public:
   }
 
   const AsmToken &Lex() { return getParser().Lex(); }
-
   const AsmToken &getTok() { return getParser().getTok(); }
+  bool parseToken(AsmToken::TokenKind T,
+                  const Twine &Msg = "unexpected token") {
+    return getParser().parseToken(T, Msg);
+  }
+
+  bool parseMany(std::function<bool()> parseOne, bool hasComma = true) {
+    return getParser().parseMany(parseOne, hasComma);
+  }
+
+  bool parseOptionalToken(AsmToken::TokenKind T) {
+    return getParser().parseOptionalToken(T);
+  }
+
+  bool check(bool P, const llvm::Twine &Msg) {
+    return getParser().check(P, Msg);
+  }
+
+  bool check(bool P, SMLoc Loc, const llvm::Twine &Msg) {
+    return getParser().check(P, Loc, Msg);
+  }
+
+  bool addErrorSuffix(const Twine &Suffix) {
+    return getParser().addErrorSuffix(Suffix);
+  }
 
   bool HasBracketExpressions() const { return BracketExpressionsSupported; }
 
diff --git a/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h b/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
index 28a7b9664882..70cd60c9a112 100644
--- a/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
+++ b/contrib/llvm/include/llvm/MC/MCParser/MCTargetAsmParser.h
@@ -82,6 +82,12 @@ struct ParseInstructionInfo {
     : AsmRewrites(rewrites) {}
 };
 
+enum OperandMatchResultTy {
+  MatchOperand_Success,  // operand matched successfully
+  MatchOperand_NoMatch,  // operand did not match
+  MatchOperand_ParseFail // operand matched but had errors
+};
+
 /// MCTargetAsmParser - Generic interface to target specific assembly parsers.
 class MCTargetAsmParser : public MCAsmParserExtension {
 public:
@@ -196,6 +202,13 @@ public:
     return Match_InvalidOperand;
   }
 
+  /// Validate the instruction match against any complex target predicates
+  /// before rendering any operands to it.
+  virtual unsigned
+  checkEarlyTargetMatchPredicate(MCInst &Inst, const OperandVector &Operands) {
+    return Match_Success;
+  }
+
   /// checkTargetMatchPredicate - Validate the instruction match against
   /// any complex target predicates not expressible via match classes.
   virtual unsigned checkTargetMatchPredicate(MCInst &Inst) {
@@ -217,6 +230,16 @@ public:
   }
 
   virtual void onLabelParsed(MCSymbol *Symbol) { }
+
+  /// Ensure that all previously parsed instructions have been emitted to the
+  /// output streamer, if the target does not emit them immediately.
+  virtual void flushPendingInstructions(MCStreamer &Out) { }
+
+  virtual const MCExpr *createTargetUnaryExpr(const MCExpr *E,
+                                              AsmToken::TokenKind OperatorToken,
+                                              MCContext &Ctx) {
+    return nullptr;
+  }
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
index 548280614e92..3dc88a298ff8 100644
--- a/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
+++ b/contrib/llvm/include/llvm/MC/MCRegisterInfo.h
@@ -17,6 +17,7 @@
 #define LLVM_MC_MCREGISTERINFO_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 
@@ -161,7 +162,7 @@ private:
   unsigned NumRegUnits;                       // Number of regunits.
   const MCPhysReg (*RegUnitRoots)[2];         // Pointer to regunit root table.
   const MCPhysReg *DiffLists;                 // Pointer to the difflists array
-  const unsigned *RegUnitMaskSequences;       // Pointer to lane mask sequences
+  const LaneBitmask *RegUnitMaskSequences;    // Pointer to lane mask sequences
                                               // for register units.
   const char *RegStrings;                     // Pointer to the string table.
   const char *RegClassStrings;                // Pointer to the class strings.
@@ -248,7 +249,7 @@ public:
                           const MCPhysReg (*RURoots)[2],
                           unsigned NRU,
                           const MCPhysReg *DL,
-                          const unsigned *RUMS,
+                          const LaneBitmask *RUMS,
                           const char *Strings,
                           const char *ClassStrings,
                           const uint16_t *SubIndices,
@@ -271,6 +272,16 @@ public:
     NumSubRegIndices = NumIndices;
     SubRegIdxRanges = SubIdxRanges;
     RegEncodingTable = RET;
+
+    // Initialize DWARF register mapping variables
+    EHL2DwarfRegs = nullptr;
+    EHL2DwarfRegsSize = 0;
+    L2DwarfRegs = nullptr;
+    L2DwarfRegsSize = 0;
+    EHDwarf2LRegs = nullptr;
+    EHDwarf2LRegsSize = 0;
+    Dwarf2LRegs = nullptr;
+    Dwarf2LRegsSize = 0;
   }
 
   /// \brief Used to initialize LLVM register to Dwarf
@@ -569,11 +580,12 @@ public:
   }
 };
 
-/// MCRegUnitIterator enumerates a list of register units and their associated
-/// lane masks for Reg. The register units are in ascending numerical order.
+/// MCRegUnitMaskIterator enumerates a list of register units and their
+/// associated lane masks for Reg. The register units are in ascending
+/// numerical order.
 class MCRegUnitMaskIterator {
   MCRegUnitIterator RUIter;
-  const unsigned *MaskListIter;
+  const LaneBitmask *MaskListIter;
 public:
   MCRegUnitMaskIterator() {}
   /// Constructs an iterator that traverses the register units and their
@@ -585,7 +597,7 @@ public:
   }
 
   /// Returns a (RegUnit, LaneMask) pair.
-  std::pair<unsigned,unsigned> operator*() const {
+  std::pair<unsigned,LaneBitmask> operator*() const {
     return std::make_pair(*RUIter, *MaskListIter);
   }
 
diff --git a/contrib/llvm/include/llvm/MC/MCSection.h b/contrib/llvm/include/llvm/MC/MCSection.h
index a8d7af9bd651..d4c31696b40f 100644
--- a/contrib/llvm/include/llvm/MC/MCSection.h
+++ b/contrib/llvm/include/llvm/MC/MCSection.h
@@ -31,16 +31,8 @@ class MCSection;
 class MCSymbol;
 class raw_ostream;
 
-template<>
-struct ilist_node_traits<MCFragment> {
-  MCFragment *createNode(const MCFragment &V);
+template <> struct ilist_alloc_traits<MCFragment> {
   static void deleteNode(MCFragment *V);
-
-  void addNodeToList(MCFragment *) {}
-  void removeNodeFromList(MCFragment *) {}
-  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
-                             ilist_iterator<MCFragment> /*first*/,
-                             ilist_iterator<MCFragment> /*last*/) {}
 };
 
 /// Instances of this class represent a uniqued identifier for a section in the
@@ -161,25 +153,17 @@ public:
   const MCDummyFragment &getDummyFragment() const { return DummyFragment; }
   MCDummyFragment &getDummyFragment() { return DummyFragment; }
 
-  MCSection::iterator begin();
-  MCSection::const_iterator begin() const {
-    return const_cast<MCSection *>(this)->begin();
-  }
+  iterator begin() { return Fragments.begin(); }
+  const_iterator begin() const { return Fragments.begin(); }
 
-  MCSection::iterator end();
-  MCSection::const_iterator end() const {
-    return const_cast<MCSection *>(this)->end();
-  }
+  iterator end() { return Fragments.end(); }
+  const_iterator end() const { return Fragments.end(); }
 
-  MCSection::reverse_iterator rbegin();
-  MCSection::const_reverse_iterator rbegin() const {
-    return const_cast<MCSection *>(this)->rbegin();
-  }
+  reverse_iterator rbegin() { return Fragments.rbegin(); }
+  const_reverse_iterator rbegin() const { return Fragments.rbegin(); }
 
-  MCSection::reverse_iterator rend();
-  MCSection::const_reverse_iterator rend() const {
-    return const_cast<MCSection *>(this)->rend();
-  }
+  reverse_iterator rend() { return Fragments.rend(); }
+  const_reverse_iterator rend() const  { return Fragments.rend(); }
 
   MCSection::iterator getSubsectionInsertionPoint(unsigned Subsection);
 
diff --git a/contrib/llvm/include/llvm/MC/MCSectionCOFF.h b/contrib/llvm/include/llvm/MC/MCSectionCOFF.h
index c9fd8ea1605d..7d5f9f7f3cde 100644
--- a/contrib/llvm/include/llvm/MC/MCSectionCOFF.h
+++ b/contrib/llvm/include/llvm/MC/MCSectionCOFF.h
@@ -84,6 +84,10 @@ public:
     return WinCFISectionID;
   }
 
+  static bool isImplicitlyDiscardable(StringRef Name) {
+    return Name.startswith(".debug");
+  }
+
   static bool classof(const MCSection *S) { return S->getVariant() == SV_COFF; }
 };
 
diff --git a/contrib/llvm/include/llvm/MC/MCStreamer.h b/contrib/llvm/include/llvm/MC/MCStreamer.h
index cd710ee43425..41f00a24dfbf 100644
--- a/contrib/llvm/include/llvm/MC/MCStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCStreamer.h
@@ -169,6 +169,7 @@ class MCStreamer {
   MCDwarfFrameInfo *getCurrentDwarfFrameInfo();
   void EnsureValidDwarfFrame();
 
+  MCSymbol *EmitCFILabel();
   MCSymbol *EmitCFICommon();
 
   std::vector<WinEH::FrameInfo *> WinFrameInfos;
@@ -261,7 +262,11 @@ public:
   ///
   /// If the comment includes embedded \n's, they will each get the comment
   /// prefix as appropriate.  The added comment should not end with a \n.
-  virtual void AddComment(const Twine &T) {}
+  /// By default, each comment is terminated with an end of line, i.e. the
+  /// EOL param is set to true by default. If one prefers not to end the 
+  /// comment with a new line then the EOL param should be passed 
+  /// with a false value.
+  virtual void AddComment(const Twine &T, bool EOL = true) {}
 
   /// \brief Return a raw_ostream that comments can be written to. Unlike
   /// AddComment, you are required to terminate comments with \n if you use this
@@ -470,13 +475,13 @@ public:
   /// \brief Emits a COFF section relative relocation.
   ///
   /// \param Symbol - Symbol the section relative relocation should point to.
-  virtual void EmitCOFFSecRel32(MCSymbol const *Symbol);
+  virtual void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset);
 
   /// \brief Emit an ELF .size directive.
   ///
   /// This corresponds to an assembler statement such as:
   ///  .size symbol, expression
-  virtual void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value);
+  virtual void emitELFSize(MCSymbol *Symbol, const MCExpr *Value);
 
   /// \brief Emit a Linker Optimization Hint (LOH) directive.
   /// \param Args - Arguments of the LOH.
@@ -569,6 +574,34 @@ public:
   void EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
                        bool IsSectionRelative = false);
 
+  /// \brief Emit the expression \p Value into the output as a dtprel
+  /// (64-bit DTP relative) value.
+  ///
+  /// This is used to implement assembler directives such as .dtpreldword on
+  /// targets that support them.
+  virtual void EmitDTPRel64Value(const MCExpr *Value);
+
+  /// \brief Emit the expression \p Value into the output as a dtprel
+  /// (32-bit DTP relative) value.
+  ///
+  /// This is used to implement assembler directives such as .dtprelword on
+  /// targets that support them.
+  virtual void EmitDTPRel32Value(const MCExpr *Value);
+
+  /// \brief Emit the expression \p Value into the output as a tprel
+  /// (64-bit TP relative) value.
+  ///
+  /// This is used to implement assembler directives such as .tpreldword on
+  /// targets that support them.
+  virtual void EmitTPRel64Value(const MCExpr *Value);
+
+  /// \brief Emit the expression \p Value into the output as a tprel
+  /// (32-bit TP relative) value.
+  ///
+  /// This is used to implement assembler directives such as .tprelword on
+  /// targets that support them.
+  virtual void EmitTPRel32Value(const MCExpr *Value);
+
   /// \brief Emit the expression \p Value into the output as a gprel64 (64-bit
   /// GP relative) value.
   ///
@@ -655,7 +688,8 @@ public:
   /// \param Offset - The offset to reach. This may be an expression, but the
   /// expression must be associated with the current section.
   /// \param Value - The value to use when filling bytes.
-  virtual void emitValueToOffset(const MCExpr *Offset, unsigned char Value = 0);
+  virtual void emitValueToOffset(const MCExpr *Offset, unsigned char Value,
+                                 SMLoc Loc);
 
   /// @}
 
@@ -681,14 +715,24 @@ public:
                                      StringRef FileName);
 
   /// \brief Associate a filename with a specified logical file number.  This
-  /// implements the '.cv_file 4 "foo.c"' assembler directive.
-  virtual unsigned EmitCVFileDirective(unsigned FileNo, StringRef Filename);
+  /// implements the '.cv_file 4 "foo.c"' assembler directive. Returns true on
+  /// success.
+  virtual bool EmitCVFileDirective(unsigned FileNo, StringRef Filename);
+
+  /// \brief Introduces a function id for use with .cv_loc.
+  virtual bool EmitCVFuncIdDirective(unsigned FunctionId);
+
+  /// \brief Introduces an inline call site id for use with .cv_loc. Includes
+  /// extra information for inline line table generation.
+  virtual bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc,
+                                           unsigned IAFile, unsigned IALine,
+                                           unsigned IACol, SMLoc Loc);
 
   /// \brief This implements the CodeView '.cv_loc' assembler directive.
   virtual void EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                   unsigned Line, unsigned Column,
                                   bool PrologueEnd, bool IsStmt,
-                                  StringRef FileName);
+                                  StringRef FileName, SMLoc Loc);
 
   /// \brief This implements the CodeView '.cv_linetable' assembler directive.
   virtual void EmitCVLinetableDirective(unsigned FunctionId,
@@ -697,10 +741,11 @@ public:
 
   /// \brief This implements the CodeView '.cv_inline_linetable' assembler
   /// directive.
-  virtual void EmitCVInlineLinetableDirective(
-      unsigned PrimaryFunctionId, unsigned SourceFileId, unsigned SourceLineNum,
-      const MCSymbol *FnStartSym, const MCSymbol *FnEndSym,
-      ArrayRef<unsigned> SecondaryFunctionIds);
+  virtual void EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
+                                              unsigned SourceFileId,
+                                              unsigned SourceLineNum,
+                                              const MCSymbol *FnStartSym,
+                                              const MCSymbol *FnEndSym);
 
   /// \brief This implements the CodeView '.cv_def_range' assembler
   /// directive.
diff --git a/contrib/llvm/include/llvm/MC/MCTargetOptions.h b/contrib/llvm/include/llvm/MC/MCTargetOptions.h
index 1d170b757cb3..a300c4f6fb00 100644
--- a/contrib/llvm/include/llvm/MC/MCTargetOptions.h
+++ b/contrib/llvm/include/llvm/MC/MCTargetOptions.h
@@ -14,6 +14,14 @@
 
 namespace llvm {
 
+enum class ExceptionHandling {
+  None,     /// No exception support
+  DwarfCFI, /// DWARF-like instruction based exceptions
+  SjLj,     /// setjmp/longjmp based exceptions
+  ARM,      /// ARM EHABI
+  WinEH,    /// Windows Exception Handling
+};
+
 class StringRef;
 
 class MCTargetOptions {
@@ -30,9 +38,11 @@ public:
   bool MCNoExecStack : 1;
   bool MCFatalWarnings : 1;
   bool MCNoWarn : 1;
+  bool MCNoDeprecatedWarn : 1;
   bool MCSaveTempLabels : 1;
   bool MCUseDwarfDirectory : 1;
   bool MCIncrementalLinkerCompatible : 1;
+  bool MCPIECopyRelocations : 1;
   bool ShowMCEncoding : 1;
   bool ShowMCInst : 1;
   bool AsmVerbose : 1;
@@ -56,9 +66,11 @@ inline bool operator==(const MCTargetOptions &LHS, const MCTargetOptions &RHS) {
           ARE_EQUAL(MCNoExecStack) &&
           ARE_EQUAL(MCFatalWarnings) &&
           ARE_EQUAL(MCNoWarn) &&
+          ARE_EQUAL(MCNoDeprecatedWarn) &&
           ARE_EQUAL(MCSaveTempLabels) &&
           ARE_EQUAL(MCUseDwarfDirectory) &&
           ARE_EQUAL(MCIncrementalLinkerCompatible) &&
+          ARE_EQUAL(MCPIECopyRelocations) &&
           ARE_EQUAL(ShowMCEncoding) &&
           ARE_EQUAL(ShowMCInst) &&
           ARE_EQUAL(AsmVerbose) &&
diff --git a/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h b/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
index 5180208d33b6..96179be3b8b0 100644
--- a/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
+++ b/contrib/llvm/include/llvm/MC/MCTargetOptionsCommandFlags.h
@@ -26,8 +26,7 @@ cl::opt<MCTargetOptions::AsmInstrumentation> AsmInstrumentation(
     cl::values(clEnumValN(MCTargetOptions::AsmInstrumentationNone, "none",
                           "no instrumentation at all"),
                clEnumValN(MCTargetOptions::AsmInstrumentationAddress, "address",
-                          "instrument instructions with memory arguments"),
-               clEnumValEnd));
+                          "instrument instructions with memory arguments")));
 
 cl::opt<bool> RelaxAll("mc-relax-all",
                        cl::desc("When used with filetype=obj, "
@@ -39,6 +38,8 @@ cl::opt<bool> IncrementalLinkerCompatible(
         "When used with filetype=obj, "
         "emit an object file which can be used with an incremental linker"));
 
+cl::opt<bool> PIECopyRelocations("pie-copy-relocations", cl::desc("PIE Copy Relocations"));
+
 cl::opt<int> DwarfVersion("dwarf-version", cl::desc("Dwarf version"),
                           cl::init(0));
 
@@ -52,6 +53,9 @@ cl::opt<bool> FatalWarnings("fatal-warnings",
 cl::opt<bool> NoWarn("no-warn", cl::desc("Suppress all warnings"));
 cl::alias NoWarnW("W", cl::desc("Alias for --no-warn"), cl::aliasopt(NoWarn));
 
+cl::opt<bool> NoDeprecatedWarn("no-deprecated-warn",
+                               cl::desc("Suppress all deprecated warnings"));
+
 cl::opt<std::string>
 ABIName("target-abi", cl::Hidden,
         cl::desc("The name of the ABI to be targeted from the backend."),
@@ -63,11 +67,13 @@ static inline MCTargetOptions InitMCTargetOptionsFromFlags() {
       (AsmInstrumentation == MCTargetOptions::AsmInstrumentationAddress);
   Options.MCRelaxAll = RelaxAll;
   Options.MCIncrementalLinkerCompatible = IncrementalLinkerCompatible;
+  Options.MCPIECopyRelocations = PIECopyRelocations;
   Options.DwarfVersion = DwarfVersion;
   Options.ShowMCInst = ShowMCInst;
   Options.ABIName = ABIName;
   Options.MCFatalWarnings = FatalWarnings;
   Options.MCNoWarn = NoWarn;
+  Options.MCNoDeprecatedWarn = NoDeprecatedWarn;
   return Options;
 }
 
diff --git a/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h b/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
index fe1ada9b9e5b..63e44f2e67d6 100644
--- a/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
+++ b/contrib/llvm/include/llvm/MC/MCWinCOFFStreamer.h
@@ -52,7 +52,7 @@ public:
   void EndCOFFSymbolDef() override;
   void EmitCOFFSafeSEH(MCSymbol const *Symbol) override;
   void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
-  void EmitCOFFSecRel32(MCSymbol const *Symbol) override;
+  void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
   void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         unsigned ByteAlignment) override;
   void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
diff --git a/contrib/llvm/include/llvm/MC/SectionKind.h b/contrib/llvm/include/llvm/MC/SectionKind.h
index 02fb22623cf7..66eb9ec56d14 100644
--- a/contrib/llvm/include/llvm/MC/SectionKind.h
+++ b/contrib/llvm/include/llvm/MC/SectionKind.h
@@ -28,6 +28,9 @@ class SectionKind {
     /// Text - Text section, used for functions and other executable code.
     Text,
 
+           /// ExecuteOnly, Text section that is not readable.
+           ExecuteOnly,
+
     /// ReadOnly - Data that is never written to at program runtime by the
     /// program or the dynamic linker.  Things in the top-level readonly
     /// SectionKind are not mergeable.
@@ -112,7 +115,10 @@ class SectionKind {
 public:
 
   bool isMetadata() const { return K == Metadata; }
-  bool isText() const { return K == Text; }
+
+  bool isText() const { return K == Text || K == ExecuteOnly; }
+
+  bool isExecuteOnly() const { return K == ExecuteOnly; }
 
   bool isReadOnly() const {
     return K == ReadOnly || isMergeableCString() ||
@@ -172,6 +178,7 @@ public:
 
   static SectionKind getMetadata() { return get(Metadata); }
   static SectionKind getText() { return get(Text); }
+  static SectionKind getExecuteOnly() { return get(ExecuteOnly); }
   static SectionKind getReadOnly() { return get(ReadOnly); }
   static SectionKind getMergeable1ByteCString() {
     return get(Mergeable1ByteCString);
diff --git a/contrib/llvm/include/llvm/MC/StringTableBuilder.h b/contrib/llvm/include/llvm/MC/StringTableBuilder.h
index f2b8ecd2d997..7da444f7bfb1 100644
--- a/contrib/llvm/include/llvm/MC/StringTableBuilder.h
+++ b/contrib/llvm/include/llvm/MC/StringTableBuilder.h
@@ -10,11 +10,12 @@
 #ifndef LLVM_MC_STRINGTABLEBUILDER_H
 #define LLVM_MC_STRINGTABLEBUILDER_H
 
-#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseMap.h"
 #include <cassert>
 
 namespace llvm {
+class raw_ostream;
 
 /// \brief Utility for building string tables with deduplicated suffixes.
 class StringTableBuilder {
@@ -22,21 +23,24 @@ public:
   enum Kind { ELF, WinCOFF, MachO, RAW };
 
 private:
-  SmallString<256> StringTable;
-  DenseMap<CachedHash<StringRef>, size_t> StringIndexMap;
+  DenseMap<CachedHashStringRef, size_t> StringIndexMap;
   size_t Size = 0;
   Kind K;
   unsigned Alignment;
+  bool Finalized = false;
 
   void finalizeStringTable(bool Optimize);
+  void initSize();
 
 public:
   StringTableBuilder(Kind K, unsigned Alignment = 1);
+  ~StringTableBuilder();
 
   /// \brief Add a string to the builder. Returns the position of S in the
   /// table. The position will be changed if finalize is used.
   /// Can only be used before the table is finalized.
-  size_t add(StringRef S);
+  size_t add(CachedHashStringRef S);
+  size_t add(StringRef S) { return add(CachedHashStringRef(S)); }
 
   /// \brief Analyze the strings and build the final table. No more strings can
   /// be added after this point.
@@ -46,28 +50,21 @@ public:
   /// returned by add will still be valid.
   void finalizeInOrder();
 
-  /// \brief Retrieve the string table data. Can only be used after the table
-  /// is finalized.
-  StringRef data() const {
-    assert(isFinalized());
-    return StringTable;
-  }
-
   /// \brief Get the offest of a string in the string table. Can only be used
   /// after the table is finalized.
-  size_t getOffset(StringRef S) const;
-
-  const DenseMap<CachedHash<StringRef>, size_t> &getMap() const {
-    return StringIndexMap;
+  size_t getOffset(CachedHashStringRef S) const;
+  size_t getOffset(StringRef S) const {
+    return getOffset(CachedHashStringRef(S));
   }
 
   size_t getSize() const { return Size; }
   void clear();
 
+  void write(raw_ostream &OS) const;
+  void write(uint8_t *Buf) const;
+
 private:
-  bool isFinalized() const {
-    return !StringTable.empty();
-  }
+  bool isFinalized() const { return Finalized; }
 };
 
 } // end llvm namespace
diff --git a/contrib/llvm/include/llvm/Object/Archive.h b/contrib/llvm/include/llvm/Object/Archive.h
index cfba2567371a..08128b0c2515 100644
--- a/contrib/llvm/include/llvm/Object/Archive.h
+++ b/contrib/llvm/include/llvm/Object/Archive.h
@@ -18,35 +18,59 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Object/Binary.h"
+#include "llvm/Support/Chrono.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 
 namespace llvm {
 namespace object {
-struct ArchiveMemberHeader {
-  char Name[16];
-  char LastModified[12];
-  char UID[6];
-  char GID[6];
-  char AccessMode[8];
-  char Size[10]; ///< Size of data, not including header or padding.
-  char Terminator[2];
+
+class Archive;
+
+class ArchiveMemberHeader {
+public:
+  friend class Archive;
+  ArchiveMemberHeader(Archive const *Parent, const char *RawHeaderPtr,
+                      uint64_t Size, Error *Err);
+  // ArchiveMemberHeader() = default;
 
   /// Get the name without looking up long names.
-  llvm::StringRef getName() const;
+  Expected<llvm::StringRef> getRawName() const;
+
+  /// Get the name looking up long names.
+  Expected<llvm::StringRef> getName(uint64_t Size) const;
 
   /// Members are not larger than 4GB.
-  ErrorOr<uint32_t> getSize() const;
+  Expected<uint32_t> getSize() const;
 
-  sys::fs::perms getAccessMode() const;
-  sys::TimeValue getLastModified() const;
+  Expected<sys::fs::perms> getAccessMode() const;
+  Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const;
   llvm::StringRef getRawLastModified() const {
-    return StringRef(LastModified, sizeof(LastModified)).rtrim(' ');
+    return StringRef(ArMemHdr->LastModified,
+                     sizeof(ArMemHdr->LastModified)).rtrim(' ');
+  }
+  Expected<unsigned> getUID() const;
+  Expected<unsigned> getGID() const;
+
+  // This returns the size of the private struct ArMemHdrType
+  uint64_t getSizeOf() const {
+    return sizeof(ArMemHdrType);
   }
-  unsigned getUID() const;
-  unsigned getGID() const;
+
+private:
+  struct ArMemHdrType {
+    char Name[16];
+    char LastModified[12];
+    char UID[6];
+    char GID[6];
+    char AccessMode[8];
+    char Size[10]; ///< Size of data, not including header or padding.
+    char Terminator[2];
+  };
+  Archive const *Parent;
+  ArMemHdrType const *ArMemHdr;
 };
 
 class Archive : public Binary {
@@ -55,52 +79,50 @@ public:
   class Child {
     friend Archive;
     const Archive *Parent;
+    friend ArchiveMemberHeader;
+    ArchiveMemberHeader Header;
     /// \brief Includes header but not padding byte.
     StringRef Data;
     /// \brief Offset from Data to the start of the file.
     uint16_t StartOfFile;
 
-    const ArchiveMemberHeader *getHeader() const {
-      return reinterpret_cast<const ArchiveMemberHeader *>(Data.data());
-    }
-
-    bool isThinMember() const;
+    Expected<bool> isThinMember() const;
 
   public:
-    Child(const Archive *Parent, const char *Start, std::error_code *EC);
+    Child(const Archive *Parent, const char *Start, Error *Err);
     Child(const Archive *Parent, StringRef Data, uint16_t StartOfFile);
 
     bool operator ==(const Child &other) const {
-      assert(Parent == other.Parent);
+      assert(!Parent || !other.Parent || Parent == other.Parent);
       return Data.begin() == other.Data.begin();
     }
 
     const Archive *getParent() const { return Parent; }
-    ErrorOr<Child> getNext() const;
+    Expected<Child> getNext() const;
 
-    ErrorOr<StringRef> getName() const;
-    ErrorOr<std::string> getFullName() const;
-    StringRef getRawName() const { return getHeader()->getName(); }
-    sys::TimeValue getLastModified() const {
-      return getHeader()->getLastModified();
+    Expected<StringRef> getName() const;
+    Expected<std::string> getFullName() const;
+    Expected<StringRef> getRawName() const { return Header.getRawName(); }
+    Expected<sys::TimePoint<std::chrono::seconds>> getLastModified() const {
+      return Header.getLastModified();
     }
     StringRef getRawLastModified() const {
-      return getHeader()->getRawLastModified();
+      return Header.getRawLastModified();
     }
-    unsigned getUID() const { return getHeader()->getUID(); }
-    unsigned getGID() const { return getHeader()->getGID(); }
-    sys::fs::perms getAccessMode() const {
-      return getHeader()->getAccessMode();
+    Expected<unsigned> getUID() const { return Header.getUID(); }
+    Expected<unsigned> getGID() const { return Header.getGID(); }
+    Expected<sys::fs::perms> getAccessMode() const {
+      return Header.getAccessMode();
     }
     /// \return the size of the archive member without the header or padding.
-    ErrorOr<uint64_t> getSize() const;
+    Expected<uint64_t> getSize() const;
     /// \return the size in the archive header for this member.
-    ErrorOr<uint64_t> getRawSize() const;
+    Expected<uint64_t> getRawSize() const;
 
-    ErrorOr<StringRef> getBuffer() const;
+    Expected<StringRef> getBuffer() const;
     uint64_t getChildOffset() const;
 
-    ErrorOr<MemoryBufferRef> getMemoryBufferRef() const;
+    Expected<MemoryBufferRef> getMemoryBufferRef() const;
 
     Expected<std::unique_ptr<Binary>>
     getAsBinary(LLVMContext *Context = nullptr) const;
@@ -131,12 +153,12 @@ public:
     // iteration.  And if there is an error break out of the loop.
     child_iterator &operator++() { // Preincrement
       assert(E && "Can't increment iterator with no Error attached");
+      ErrorAsOutParameter ErrAsOutParam(E);
       if (auto ChildOrErr = C.getNext())
         C = *ChildOrErr;
       else {
-        ErrorAsOutParameter ErrAsOutParam(*E);
         C = C.getParent()->child_end().C;
-        *E = errorCodeToError(ChildOrErr.getError());
+        *E = ChildOrErr.takeError();
         E = nullptr;
       }
       return *this;
@@ -158,7 +180,7 @@ public:
       , SymbolIndex(symi)
       , StringIndex(stri) {}
     StringRef getName() const;
-    ErrorOr<Child> getMember() const;
+    Expected<Child> getMember() const;
     Symbol getNext() const;
   };
 
@@ -218,8 +240,10 @@ public:
   // check if a symbol is in the archive
   Expected<Optional<Child>> findSym(StringRef name) const;
 
+  bool isEmpty() const;
   bool hasSymbolTable() const;
   StringRef getSymbolTable() const { return SymbolTable; }
+  StringRef getStringTable() const { return StringTable; }
   uint32_t getNumberOfSymbols() const;
 
   std::vector<std::unique_ptr<MemoryBuffer>> takeThinBuffers() {
diff --git a/contrib/llvm/include/llvm/Object/ArchiveWriter.h b/contrib/llvm/include/llvm/Object/ArchiveWriter.h
index 55b58fac4f66..3e84a5814d79 100644
--- a/contrib/llvm/include/llvm/Object/ArchiveWriter.h
+++ b/contrib/llvm/include/llvm/Object/ArchiveWriter.h
@@ -22,21 +22,11 @@ namespace llvm {
 
 struct NewArchiveMember {
   std::unique_ptr<MemoryBuffer> Buf;
-  sys::TimeValue ModTime = sys::TimeValue::PosixZeroTime();
+  sys::TimePoint<std::chrono::seconds> ModTime;
   unsigned UID = 0, GID = 0, Perms = 0644;
 
+  bool IsNew = false;
   NewArchiveMember() = default;
-  NewArchiveMember(NewArchiveMember &&Other)
-      : Buf(std::move(Other.Buf)), ModTime(Other.ModTime), UID(Other.UID),
-        GID(Other.GID), Perms(Other.Perms) {}
-  NewArchiveMember &operator=(NewArchiveMember &&Other) {
-    Buf = std::move(Other.Buf);
-    ModTime = Other.ModTime;
-    UID = Other.UID;
-    GID = Other.GID;
-    Perms = Other.Perms;
-    return *this;
-  }
   NewArchiveMember(MemoryBufferRef BufRef);
 
   static Expected<NewArchiveMember>
diff --git a/contrib/llvm/include/llvm/Object/Binary.h b/contrib/llvm/include/llvm/Object/Binary.h
index 5dff5406fcdd..00d06e3c7437 100644
--- a/contrib/llvm/include/llvm/Object/Binary.h
+++ b/contrib/llvm/include/llvm/Object/Binary.h
@@ -59,6 +59,8 @@ protected:
     ID_MachO64L, // MachO 64-bit, little endian
     ID_MachO64B, // MachO 64-bit, big endian
 
+    ID_Wasm,
+
     ID_EndObjects
   };
 
@@ -115,6 +117,8 @@ public:
     return TypeID == ID_COFF;
   }
 
+  bool isWasm() const { return TypeID == ID_Wasm; }
+
   bool isCOFFImportFile() const {
     return TypeID == ID_COFFImportFile;
   }
diff --git a/contrib/llvm/include/llvm/Object/COFF.h b/contrib/llvm/include/llvm/Object/COFF.h
index dcc58b06e228..696042d29dab 100644
--- a/contrib/llvm/include/llvm/Object/COFF.h
+++ b/contrib/llvm/include/llvm/Object/COFF.h
@@ -15,6 +15,7 @@
 #define LLVM_OBJECT_COFF_H
 
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/DebugInfo/CodeView/CVDebugRecord.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Endian.h"
@@ -161,14 +162,6 @@ struct data_directory {
   support::ulittle32_t Size;
 };
 
-struct import_directory_table_entry {
-  support::ulittle32_t ImportLookupTableRVA;
-  support::ulittle32_t TimeDateStamp;
-  support::ulittle32_t ForwarderChain;
-  support::ulittle32_t NameRVA;
-  support::ulittle32_t ImportAddressTableRVA;
-};
-
 struct debug_directory {
   support::ulittle32_t Characteristics;
   support::ulittle32_t TimeDateStamp;
@@ -180,15 +173,6 @@ struct debug_directory {
   support::ulittle32_t PointerToRawData;
 };
 
-/// Information that is resent in debug_directory::AddressOfRawData if Type is
-/// IMAGE_DEBUG_TYPE_CODEVIEW.
-struct debug_pdb_info {
-  support::ulittle32_t Signature;
-  uint8_t Guid[16];
-  support::ulittle32_t Age;
-  // PDBFileName: The null-terminated PDB file name follows.
-};
-
 template <typename IntTy>
 struct import_lookup_table_entry {
   IntTy Data;
@@ -534,6 +518,10 @@ struct coff_import_directory_table_entry {
   support::ulittle32_t ForwarderChain;
   support::ulittle32_t NameRVA;
   support::ulittle32_t ImportAddressTableRVA;
+  bool isNull() const {
+    return ImportLookupTableRVA == 0 && TimeDateStamp == 0 &&
+           ForwarderChain == 0 && NameRVA == 0 && ImportAddressTableRVA == 0;
+  }
 };
 
 template <typename IntTy>
@@ -633,7 +621,7 @@ private:
   const coff_symbol32 *SymbolTable32;
   const char *StringTable;
   uint32_t StringTableSize;
-  const import_directory_table_entry *ImportDirectory;
+  const coff_import_directory_table_entry *ImportDirectory;
   const delay_import_directory_table_entry *DelayImportDirectory;
   uint32_t NumberOfDelayImportDirectory;
   const export_directory_table_entry *ExportDirectory;
@@ -711,17 +699,23 @@ public:
       return COFFBigObjHeader->PointerToSymbolTable;
     llvm_unreachable("no COFF header!");
   }
-  uint32_t getNumberOfSymbols() const {
+  uint32_t getRawNumberOfSymbols() const {
     if (COFFHeader)
       return COFFHeader->isImportLibrary() ? 0 : COFFHeader->NumberOfSymbols;
     if (COFFBigObjHeader)
       return COFFBigObjHeader->NumberOfSymbols;
     llvm_unreachable("no COFF header!");
   }
+  uint32_t getNumberOfSymbols() const {
+    if (!SymbolTable16 && !SymbolTable32)
+      return 0;
+    return getRawNumberOfSymbols();
+  }
 protected:
   void moveSymbolNext(DataRefImpl &Symb) const override;
   Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
   Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
+  uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
   uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
@@ -752,8 +746,8 @@ protected:
 
 public:
   COFFObjectFile(MemoryBufferRef Object, std::error_code &EC);
-  basic_symbol_iterator symbol_begin_impl() const override;
-  basic_symbol_iterator symbol_end_impl() const override;
+  basic_symbol_iterator symbol_begin() const override;
+  basic_symbol_iterator symbol_end() const override;
   section_iterator section_begin() const override;
   section_iterator section_end() const override;
 
@@ -872,14 +866,14 @@ public:
 
   /// Get PDB information out of a codeview debug directory entry.
   std::error_code getDebugPDBInfo(const debug_directory *DebugDir,
-                                  const debug_pdb_info *&Info,
+                                  const codeview::DebugInfo *&Info,
                                   StringRef &PDBFileName) const;
 
   /// Get PDB information from an executable. If the information is not present,
   /// Info will be set to nullptr and PDBFileName will be empty. An error is
   /// returned only on corrupt object files. Convenience accessor that can be
   /// used if the debug directory is not already handy.
-  std::error_code getDebugPDBInfo(const debug_pdb_info *&Info,
+  std::error_code getDebugPDBInfo(const codeview::DebugInfo *&Info,
                                   StringRef &PDBFileName) const;
 
   bool isRelocatableObject() const override;
@@ -892,8 +886,8 @@ public:
 class ImportDirectoryEntryRef {
 public:
   ImportDirectoryEntryRef() : OwningObject(nullptr) {}
-  ImportDirectoryEntryRef(const import_directory_table_entry *Table, uint32_t I,
-                          const COFFObjectFile *Owner)
+  ImportDirectoryEntryRef(const coff_import_directory_table_entry *Table,
+                          uint32_t I, const COFFObjectFile *Owner)
       : ImportTable(Table), Index(I), OwningObject(Owner) {}
 
   bool operator==(const ImportDirectoryEntryRef &Other) const;
@@ -903,15 +897,19 @@ public:
   imported_symbol_iterator imported_symbol_end() const;
   iterator_range<imported_symbol_iterator> imported_symbols() const;
 
+  imported_symbol_iterator lookup_table_begin() const;
+  imported_symbol_iterator lookup_table_end() const;
+  iterator_range<imported_symbol_iterator> lookup_table_symbols() const;
+
   std::error_code getName(StringRef &Result) const;
   std::error_code getImportLookupTableRVA(uint32_t &Result) const;
   std::error_code getImportAddressTableRVA(uint32_t &Result) const;
 
   std::error_code
-  getImportTableEntry(const import_directory_table_entry *&Result) const;
+  getImportTableEntry(const coff_import_directory_table_entry *&Result) const;
 
 private:
-  const import_directory_table_entry *ImportTable;
+  const coff_import_directory_table_entry *ImportTable;
   uint32_t Index;
   const COFFObjectFile *OwningObject;
 };
diff --git a/contrib/llvm/include/llvm/Object/COFFImportFile.h b/contrib/llvm/include/llvm/Object/COFFImportFile.h
index b04a44ea60d2..4192fe7e5c90 100644
--- a/contrib/llvm/include/llvm/Object/COFFImportFile.h
+++ b/contrib/llvm/include/llvm/Object/COFFImportFile.h
@@ -47,11 +47,11 @@ public:
     return SymbolRef::SF_Global;
   }
 
-  basic_symbol_iterator symbol_begin_impl() const override {
+  basic_symbol_iterator symbol_begin() const override {
     return BasicSymbolRef(DataRefImpl(), this);
   }
 
-  basic_symbol_iterator symbol_end_impl() const override {
+  basic_symbol_iterator symbol_end() const override {
     DataRefImpl Symb;
     Symb.p = isCode() ? 2 : 1;
     return BasicSymbolRef(Symb, this);
diff --git a/contrib/llvm/include/llvm/Object/ELF.h b/contrib/llvm/include/llvm/Object/ELF.h
index 80b8be03810c..aaa79ae70f01 100644
--- a/contrib/llvm/include/llvm/Object/ELF.h
+++ b/contrib/llvm/include/llvm/Object/ELF.h
@@ -33,27 +33,29 @@ getElfArchType(StringRef Object) {
                         (uint8_t)Object[ELF::EI_DATA]);
 }
 
+static inline Error createError(StringRef Err) {
+  return make_error<StringError>(Err, object_error::parse_failed);
+}
+
 template <class ELFT>
 class ELFFile {
 public:
   LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
-  typedef typename std::conditional<ELFT::Is64Bits,
-                                    uint64_t, uint32_t>::type uintX_t;
-
-  typedef Elf_Ehdr_Impl<ELFT> Elf_Ehdr;
-  typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
-  typedef Elf_Sym_Impl<ELFT> Elf_Sym;
-  typedef Elf_Dyn_Impl<ELFT> Elf_Dyn;
-  typedef Elf_Phdr_Impl<ELFT> Elf_Phdr;
-  typedef Elf_Rel_Impl<ELFT, false> Elf_Rel;
-  typedef Elf_Rel_Impl<ELFT, true> Elf_Rela;
-  typedef Elf_Verdef_Impl<ELFT> Elf_Verdef;
-  typedef Elf_Verdaux_Impl<ELFT> Elf_Verdaux;
-  typedef Elf_Verneed_Impl<ELFT> Elf_Verneed;
-  typedef Elf_Vernaux_Impl<ELFT> Elf_Vernaux;
-  typedef Elf_Versym_Impl<ELFT> Elf_Versym;
-  typedef Elf_Hash_Impl<ELFT> Elf_Hash;
-  typedef Elf_GnuHash_Impl<ELFT> Elf_GnuHash;
+  typedef typename ELFT::uint uintX_t;
+  typedef typename ELFT::Ehdr Elf_Ehdr;
+  typedef typename ELFT::Shdr Elf_Shdr;
+  typedef typename ELFT::Sym Elf_Sym;
+  typedef typename ELFT::Dyn Elf_Dyn;
+  typedef typename ELFT::Phdr Elf_Phdr;
+  typedef typename ELFT::Rel Elf_Rel;
+  typedef typename ELFT::Rela Elf_Rela;
+  typedef typename ELFT::Verdef Elf_Verdef;
+  typedef typename ELFT::Verdaux Elf_Verdaux;
+  typedef typename ELFT::Verneed Elf_Verneed;
+  typedef typename ELFT::Vernaux Elf_Vernaux;
+  typedef typename ELFT::Versym Elf_Versym;
+  typedef typename ELFT::Hash Elf_Hash;
+  typedef typename ELFT::GnuHash Elf_GnuHash;
   typedef typename ELFT::DynRange Elf_Dyn_Range;
   typedef typename ELFT::ShdrRange Elf_Shdr_Range;
   typedef typename ELFT::SymRange Elf_Sym_Range;
@@ -71,20 +73,24 @@ private:
 
   StringRef Buf;
 
-  const Elf_Ehdr *Header;
-  const Elf_Shdr *SectionHeaderTable = nullptr;
-  StringRef DotShstrtab;                    // Section header string table.
-
 public:
-  template<typename T>
-  const T        *getEntry(uint32_t Section, uint32_t Entry) const;
+  const Elf_Ehdr *getHeader() const {
+    return reinterpret_cast<const Elf_Ehdr *>(base());
+  }
+
   template <typename T>
-  const T *getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
+  Expected<const T *> getEntry(uint32_t Section, uint32_t Entry) const;
+  template <typename T>
+  Expected<const T *> getEntry(const Elf_Shdr *Section, uint32_t Entry) const;
 
-  ErrorOr<StringRef> getStringTable(const Elf_Shdr *Section) const;
-  ErrorOr<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const;
+  Expected<StringRef> getStringTable(const Elf_Shdr *Section) const;
+  Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section) const;
+  Expected<StringRef> getStringTableForSymtab(const Elf_Shdr &Section,
+                                              Elf_Shdr_Range Sections) const;
 
-  ErrorOr<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section) const;
+  Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section) const;
+  Expected<ArrayRef<Elf_Word>> getSHNDXTable(const Elf_Shdr &Section,
+                                             Elf_Shdr_Range Sections) const;
 
   void VerifyStrTab(const Elf_Shdr *sh) const;
 
@@ -93,118 +99,66 @@ public:
                              SmallVectorImpl<char> &Result) const;
 
   /// \brief Get the symbol for a given relocation.
-  const Elf_Sym *getRelocationSymbol(const Elf_Rel *Rel,
-                                     const Elf_Shdr *SymTab) const;
+  Expected<const Elf_Sym *> getRelocationSymbol(const Elf_Rel *Rel,
+                                                const Elf_Shdr *SymTab) const;
 
-  ELFFile(StringRef Object, std::error_code &EC);
+  ELFFile(StringRef Object);
 
   bool isMipsELF64() const {
-    return Header->e_machine == ELF::EM_MIPS &&
-      Header->getFileClass() == ELF::ELFCLASS64;
+    return getHeader()->e_machine == ELF::EM_MIPS &&
+           getHeader()->getFileClass() == ELF::ELFCLASS64;
   }
 
   bool isMips64EL() const {
-    return Header->e_machine == ELF::EM_MIPS &&
-      Header->getFileClass() == ELF::ELFCLASS64 &&
-      Header->getDataEncoding() == ELF::ELFDATA2LSB;
+    return isMipsELF64() &&
+           getHeader()->getDataEncoding() == ELF::ELFDATA2LSB;
   }
 
-  const Elf_Shdr *section_begin() const;
-  const Elf_Shdr *section_end() const;
-  Elf_Shdr_Range sections() const {
-    return makeArrayRef(section_begin(), section_end());
-  }
+  Expected<Elf_Shdr_Range> sections() const;
 
-  const Elf_Sym *symbol_begin(const Elf_Shdr *Sec) const {
+  Expected<Elf_Sym_Range> symbols(const Elf_Shdr *Sec) const {
     if (!Sec)
-      return nullptr;
-    if (Sec->sh_entsize != sizeof(Elf_Sym))
-      report_fatal_error("Invalid symbol size");
-    return reinterpret_cast<const Elf_Sym *>(base() + Sec->sh_offset);
-  }
-  const Elf_Sym *symbol_end(const Elf_Shdr *Sec) const {
-    if (!Sec)
-      return nullptr;
-    uint64_t Size = Sec->sh_size;
-    if (Size % sizeof(Elf_Sym))
-      report_fatal_error("Invalid symbol table size");
-    return symbol_begin(Sec) + Size / sizeof(Elf_Sym);
-  }
-  Elf_Sym_Range symbols(const Elf_Shdr *Sec) const {
-    return makeArrayRef(symbol_begin(Sec), symbol_end(Sec));
+      return makeArrayRef<Elf_Sym>(nullptr, nullptr);
+    return getSectionContentsAsArray<Elf_Sym>(Sec);
   }
 
-  const Elf_Rela *rela_begin(const Elf_Shdr *sec) const {
-    if (sec->sh_entsize != sizeof(Elf_Rela))
-      report_fatal_error("Invalid relocation entry size");
-    return reinterpret_cast<const Elf_Rela *>(base() + sec->sh_offset);
+  Expected<Elf_Rela_Range> relas(const Elf_Shdr *Sec) const {
+    return getSectionContentsAsArray<Elf_Rela>(Sec);
   }
 
-  const Elf_Rela *rela_end(const Elf_Shdr *sec) const {
-    uint64_t Size = sec->sh_size;
-    if (Size % sizeof(Elf_Rela))
-      report_fatal_error("Invalid relocation table size");
-    return rela_begin(sec) + Size / sizeof(Elf_Rela);
-  }
-
-  Elf_Rela_Range relas(const Elf_Shdr *Sec) const {
-    return makeArrayRef(rela_begin(Sec), rela_end(Sec));
-  }
-
-  const Elf_Rel *rel_begin(const Elf_Shdr *sec) const {
-    if (sec->sh_entsize != sizeof(Elf_Rel))
-      report_fatal_error("Invalid relocation entry size");
-    return reinterpret_cast<const Elf_Rel *>(base() + sec->sh_offset);
-  }
-
-  const Elf_Rel *rel_end(const Elf_Shdr *sec) const {
-    uint64_t Size = sec->sh_size;
-    if (Size % sizeof(Elf_Rel))
-      report_fatal_error("Invalid relocation table size");
-    return rel_begin(sec) + Size / sizeof(Elf_Rel);
-  }
-
-  Elf_Rel_Range rels(const Elf_Shdr *Sec) const {
-    return makeArrayRef(rel_begin(Sec), rel_end(Sec));
+  Expected<Elf_Rel_Range> rels(const Elf_Shdr *Sec) const {
+    return getSectionContentsAsArray<Elf_Rel>(Sec);
   }
 
   /// \brief Iterate over program header table.
-  const Elf_Phdr *program_header_begin() const {
-    if (Header->e_phnum && Header->e_phentsize != sizeof(Elf_Phdr))
-      report_fatal_error("Invalid program header size");
-    return reinterpret_cast<const Elf_Phdr *>(base() + Header->e_phoff);
-  }
-
-  const Elf_Phdr *program_header_end() const {
-    return program_header_begin() + Header->e_phnum;
+  Expected<Elf_Phdr_Range> program_headers() const {
+    if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr))
+      return createError("invalid e_phentsize");
+    auto *Begin =
+        reinterpret_cast<const Elf_Phdr *>(base() + getHeader()->e_phoff);
+    return makeArrayRef(Begin, Begin + getHeader()->e_phnum);
   }
 
-  const Elf_Phdr_Range program_headers() const {
-    return makeArrayRef(program_header_begin(), program_header_end());
-  }
-
-  uint64_t getNumSections() const;
-  uintX_t getStringTableIndex() const;
-  uint32_t getExtendedSymbolTableIndex(const Elf_Sym *Sym,
-                                       const Elf_Shdr *SymTab,
-                                       ArrayRef<Elf_Word> ShndxTable) const;
-  uint32_t getExtendedSymbolTableIndex(const Elf_Sym *Sym,
-                                       const Elf_Sym *FirstSym,
-                                       ArrayRef<Elf_Word> ShndxTable) const;
-  const Elf_Ehdr *getHeader() const { return Header; }
-  ErrorOr<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
-                                       const Elf_Shdr *SymTab,
-                                       ArrayRef<Elf_Word> ShndxTable) const;
-  ErrorOr<const Elf_Shdr *> getSection(uint32_t Index) const;
-
-  const Elf_Sym *getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
-    return &*(symbol_begin(Sec) + Index);
-  }
-
-  ErrorOr<StringRef> getSectionName(const Elf_Shdr *Section) const;
+  Expected<StringRef> getSectionStringTable(Elf_Shdr_Range Sections) const;
+  Expected<uint32_t> getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms,
+                                     ArrayRef<Elf_Word> ShndxTable) const;
+  Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
+                                        const Elf_Shdr *SymTab,
+                                        ArrayRef<Elf_Word> ShndxTable) const;
+  Expected<const Elf_Shdr *> getSection(const Elf_Sym *Sym,
+                                        Elf_Sym_Range Symtab,
+                                        ArrayRef<Elf_Word> ShndxTable) const;
+  Expected<const Elf_Shdr *> getSection(uint32_t Index) const;
+
+  Expected<const Elf_Sym *> getSymbol(const Elf_Shdr *Sec,
+                                      uint32_t Index) const;
+
+  Expected<StringRef> getSectionName(const Elf_Shdr *Section) const;
+  Expected<StringRef> getSectionName(const Elf_Shdr *Section,
+                                     StringRef DotShstrtab) const;
   template <typename T>
-  ErrorOr<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr *Sec) const;
-  ErrorOr<ArrayRef<uint8_t> > getSectionContents(const Elf_Shdr *Sec) const;
+  Expected<ArrayRef<T>> getSectionContentsAsArray(const Elf_Shdr *Sec) const;
+  Expected<ArrayRef<uint8_t>> getSectionContents(const Elf_Shdr *Sec) const;
 };
 
 typedef ELFFile<ELFType<support::little, false>> ELF32LEFile;
@@ -213,61 +167,116 @@ typedef ELFFile<ELFType<support::big, false>> ELF32BEFile;
 typedef ELFFile<ELFType<support::big, true>> ELF64BEFile;
 
 template <class ELFT>
-uint32_t ELFFile<ELFT>::getExtendedSymbolTableIndex(
-    const Elf_Sym *Sym, const Elf_Shdr *SymTab,
-    ArrayRef<Elf_Word> ShndxTable) const {
-  return getExtendedSymbolTableIndex(Sym, symbol_begin(SymTab), ShndxTable);
+inline Expected<const typename ELFT::Shdr *>
+getSection(typename ELFT::ShdrRange Sections, uint32_t Index) {
+  if (Index >= Sections.size())
+    return createError("invalid section index");
+  return &Sections[Index];
 }
 
 template <class ELFT>
-uint32_t ELFFile<ELFT>::getExtendedSymbolTableIndex(
-    const Elf_Sym *Sym, const Elf_Sym *FirstSym,
-    ArrayRef<Elf_Word> ShndxTable) const {
+inline Expected<uint32_t>
+getExtendedSymbolTableIndex(const typename ELFT::Sym *Sym,
+                            const typename ELFT::Sym *FirstSym,
+                            ArrayRef<typename ELFT::Word> ShndxTable) {
   assert(Sym->st_shndx == ELF::SHN_XINDEX);
   unsigned Index = Sym - FirstSym;
+  if (Index >= ShndxTable.size())
+    return createError("index past the end of the symbol table");
 
   // The size of the table was checked in getSHNDXTable.
   return ShndxTable[Index];
 }
 
 template <class ELFT>
-ErrorOr<const typename ELFFile<ELFT>::Elf_Shdr *>
+Expected<uint32_t>
+ELFFile<ELFT>::getSectionIndex(const Elf_Sym *Sym, Elf_Sym_Range Syms,
+                               ArrayRef<Elf_Word> ShndxTable) const {
+  uint32_t Index = Sym->st_shndx;
+  if (Index == ELF::SHN_XINDEX) {
+    auto ErrorOrIndex = object::getExtendedSymbolTableIndex<ELFT>(
+        Sym, Syms.begin(), ShndxTable);
+    if (!ErrorOrIndex)
+      return ErrorOrIndex.takeError();
+    return *ErrorOrIndex;
+  }
+  if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE)
+    return 0;
+  return Index;
+}
+
+template <class ELFT>
+Expected<const typename ELFT::Shdr *>
 ELFFile<ELFT>::getSection(const Elf_Sym *Sym, const Elf_Shdr *SymTab,
                           ArrayRef<Elf_Word> ShndxTable) const {
-  uint32_t Index = Sym->st_shndx;
-  if (Index == ELF::SHN_XINDEX)
-    return getSection(getExtendedSymbolTableIndex(Sym, SymTab, ShndxTable));
+  auto SymsOrErr = symbols(SymTab);
+  if (!SymsOrErr)
+    return SymsOrErr.takeError();
+  return getSection(Sym, *SymsOrErr, ShndxTable);
+}
 
-  if (Index == ELF::SHN_UNDEF || Index >= ELF::SHN_LORESERVE)
+template <class ELFT>
+Expected<const typename ELFT::Shdr *>
+ELFFile<ELFT>::getSection(const Elf_Sym *Sym, Elf_Sym_Range Symbols,
+                          ArrayRef<Elf_Word> ShndxTable) const {
+  auto IndexOrErr = getSectionIndex(Sym, Symbols, ShndxTable);
+  if (!IndexOrErr)
+    return IndexOrErr.takeError();
+  uint32_t Index = *IndexOrErr;
+  if (Index == 0)
     return nullptr;
-  return getSection(Sym->st_shndx);
+  auto SectionsOrErr = sections();
+  if (!SectionsOrErr)
+    return SectionsOrErr.takeError();
+  return object::getSection<ELFT>(*SectionsOrErr, Index);
+}
+
+template <class ELFT>
+inline Expected<const typename ELFT::Sym *>
+getSymbol(typename ELFT::SymRange Symbols, uint32_t Index) {
+  if (Index >= Symbols.size())
+    return createError("invalid symbol index");
+  return &Symbols[Index];
+}
+
+template <class ELFT>
+Expected<const typename ELFT::Sym *>
+ELFFile<ELFT>::getSymbol(const Elf_Shdr *Sec, uint32_t Index) const {
+  auto SymtabOrErr = symbols(Sec);
+  if (!SymtabOrErr)
+    return SymtabOrErr.takeError();
+  return object::getSymbol<ELFT>(*SymtabOrErr, Index);
 }
 
 template <class ELFT>
 template <typename T>
-ErrorOr<ArrayRef<T>>
+Expected<ArrayRef<T>>
 ELFFile<ELFT>::getSectionContentsAsArray(const Elf_Shdr *Sec) const {
+  if (Sec->sh_entsize != sizeof(T) && sizeof(T) != 1)
+    return createError("invalid sh_entsize");
+
   uintX_t Offset = Sec->sh_offset;
   uintX_t Size = Sec->sh_size;
 
   if (Size % sizeof(T))
-    return object_error::parse_failed;
-  if (Offset + Size > Buf.size())
-    return object_error::parse_failed;
+    return createError("size is not a multiple of sh_entsize");
+  if ((std::numeric_limits<uintX_t>::max() - Offset < Size) ||
+      Offset + Size > Buf.size())
+    return createError("invalid section offset");
 
   const T *Start = reinterpret_cast<const T *>(base() + Offset);
   return makeArrayRef(Start, Size / sizeof(T));
 }
 
 template <class ELFT>
-ErrorOr<ArrayRef<uint8_t>>
+Expected<ArrayRef<uint8_t>>
 ELFFile<ELFT>::getSectionContents(const Elf_Shdr *Sec) const {
   return getSectionContentsAsArray<uint8_t>(Sec);
 }
 
 template <class ELFT>
 StringRef ELFFile<ELFT>::getRelocationTypeName(uint32_t Type) const {
-  return getELFRelocationTypeName(Header->e_machine, Type);
+  return getELFRelocationTypeName(getHeader()->e_machine, Type);
 }
 
 template <class ELFT>
@@ -302,7 +311,7 @@ void ELFFile<ELFT>::getRelocationTypeName(uint32_t Type,
 }
 
 template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Sym *
+Expected<const typename ELFT::Sym *>
 ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel,
                                    const Elf_Shdr *SymTab) const {
   uint32_t Index = Rel->getSymbol(isMips64EL());
@@ -312,193 +321,197 @@ ELFFile<ELFT>::getRelocationSymbol(const Elf_Rel *Rel,
 }
 
 template <class ELFT>
-uint64_t ELFFile<ELFT>::getNumSections() const {
-  assert(Header && "Header not initialized!");
-  if (Header->e_shnum == ELF::SHN_UNDEF && Header->e_shoff > 0) {
-    assert(SectionHeaderTable && "SectionHeaderTable not initialized!");
-    return SectionHeaderTable->sh_size;
-  }
-  return Header->e_shnum;
+Expected<StringRef>
+ELFFile<ELFT>::getSectionStringTable(Elf_Shdr_Range Sections) const {
+  uint32_t Index = getHeader()->e_shstrndx;
+  if (Index == ELF::SHN_XINDEX)
+    Index = Sections[0].sh_link;
+
+  if (!Index) // no section string table.
+    return "";
+  if (Index >= Sections.size())
+    return createError("invalid section index");
+  return getStringTable(&Sections[Index]);
 }
 
 template <class ELFT>
-typename ELFFile<ELFT>::uintX_t ELFFile<ELFT>::getStringTableIndex() const {
-  if (Header->e_shnum == ELF::SHN_UNDEF) {
-    if (Header->e_shstrndx == ELF::SHN_HIRESERVE)
-      return SectionHeaderTable->sh_link;
-    if (Header->e_shstrndx >= getNumSections())
-      return 0;
-  }
-  return Header->e_shstrndx;
+ELFFile<ELFT>::ELFFile(StringRef Object) : Buf(Object) {
+  assert(sizeof(Elf_Ehdr) <= Buf.size() && "Invalid buffer");
 }
 
 template <class ELFT>
-ELFFile<ELFT>::ELFFile(StringRef Object, std::error_code &EC)
-    : Buf(Object) {
-  const uint64_t FileSize = Buf.size();
+static bool compareAddr(uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
+  return VAddr < Phdr->p_vaddr;
+}
 
-  if (sizeof(Elf_Ehdr) > FileSize) {
-    // File too short!
-    EC = object_error::parse_failed;
-    return;
-  }
+template <class ELFT>
+Expected<typename ELFT::ShdrRange> ELFFile<ELFT>::sections() const {
+  const uintX_t SectionTableOffset = getHeader()->e_shoff;
+  if (SectionTableOffset == 0)
+    return ArrayRef<Elf_Shdr>();
 
-  Header = reinterpret_cast<const Elf_Ehdr *>(base());
+  if (getHeader()->e_shentsize != sizeof(Elf_Shdr))
+    return createError(
+        "invalid section header entry size (e_shentsize) in ELF header");
 
-  if (Header->e_shoff == 0)
-    return;
+  const uint64_t FileSize = Buf.size();
 
-  const uint64_t SectionTableOffset = Header->e_shoff;
+  if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize)
+    return createError("section header table goes past the end of the file");
 
-  if (SectionTableOffset + sizeof(Elf_Shdr) > FileSize) {
-    // Section header table goes past end of file!
-    EC = object_error::parse_failed;
-    return;
-  }
+  // Invalid address alignment of section headers
+  if (SectionTableOffset & (alignof(Elf_Shdr) - 1))
+    return createError("invalid alignment of section headers");
 
-  // The getNumSections() call below depends on SectionHeaderTable being set.
-  SectionHeaderTable =
-    reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
-  const uint64_t SectionTableSize = getNumSections() * Header->e_shentsize;
+  const Elf_Shdr *First =
+      reinterpret_cast<const Elf_Shdr *>(base() + SectionTableOffset);
 
-  if (SectionTableOffset + SectionTableSize > FileSize) {
-    // Section table goes past end of file!
-    EC = object_error::parse_failed;
-    return;
-  }
+  uintX_t NumSections = getHeader()->e_shnum;
+  if (NumSections == 0)
+    NumSections = First->sh_size;
 
-  // Get string table sections.
-  uintX_t StringTableIndex = getStringTableIndex();
-  if (StringTableIndex) {
-    ErrorOr<const Elf_Shdr *> StrTabSecOrErr = getSection(StringTableIndex);
-    if ((EC = StrTabSecOrErr.getError()))
-      return;
-
-    ErrorOr<StringRef> StringTableOrErr = getStringTable(*StrTabSecOrErr);
-    if ((EC = StringTableOrErr.getError()))
-      return;
-    DotShstrtab = *StringTableOrErr;
-  }
+  if (NumSections > UINT64_MAX / sizeof(Elf_Shdr))
+    return createError("section table goes past the end of file");
 
-  EC = std::error_code();
-}
+  const uint64_t SectionTableSize = NumSections * sizeof(Elf_Shdr);
 
-template <class ELFT>
-static bool compareAddr(uint64_t VAddr, const Elf_Phdr_Impl<ELFT> *Phdr) {
-  return VAddr < Phdr->p_vaddr;
-}
-
-template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Shdr *ELFFile<ELFT>::section_begin() const {
-  if (Header->e_shentsize != sizeof(Elf_Shdr))
-    report_fatal_error(
-        "Invalid section header entry size (e_shentsize) in ELF header");
-  return reinterpret_cast<const Elf_Shdr *>(base() + Header->e_shoff);
-}
+  // Section table goes past end of file!
+  if (SectionTableOffset + SectionTableSize > FileSize)
+    return createError("section table goes past the end of file");
 
-template <class ELFT>
-const typename ELFFile<ELFT>::Elf_Shdr *ELFFile<ELFT>::section_end() const {
-  return section_begin() + getNumSections();
+  return makeArrayRef(First, NumSections);
 }
 
 template <class ELFT>
 template <typename T>
-const T *ELFFile<ELFT>::getEntry(uint32_t Section, uint32_t Entry) const {
-  ErrorOr<const Elf_Shdr *> Sec = getSection(Section);
-  if (std::error_code EC = Sec.getError())
-    report_fatal_error(EC.message());
-  return getEntry<T>(*Sec, Entry);
+Expected<const T *> ELFFile<ELFT>::getEntry(uint32_t Section,
+                                            uint32_t Entry) const {
+  auto SecOrErr = getSection(Section);
+  if (!SecOrErr)
+    return SecOrErr.takeError();
+  return getEntry<T>(*SecOrErr, Entry);
 }
 
 template <class ELFT>
 template <typename T>
-const T *ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
-                                 uint32_t Entry) const {
-  return reinterpret_cast<const T *>(base() + Section->sh_offset +
-                                     (Entry * Section->sh_entsize));
+Expected<const T *> ELFFile<ELFT>::getEntry(const Elf_Shdr *Section,
+                                            uint32_t Entry) const {
+  if (sizeof(T) != Section->sh_entsize)
+    return createError("invalid sh_entsize");
+  size_t Pos = Section->sh_offset + Entry * sizeof(T);
+  if (Pos + sizeof(T) > Buf.size())
+    return createError("invalid section offset");
+  return reinterpret_cast<const T *>(base() + Pos);
 }
 
 template <class ELFT>
-ErrorOr<const typename ELFFile<ELFT>::Elf_Shdr *>
+Expected<const typename ELFT::Shdr *>
 ELFFile<ELFT>::getSection(uint32_t Index) const {
-  assert(SectionHeaderTable && "SectionHeaderTable not initialized!");
-  if (Index >= getNumSections())
-    return object_error::invalid_section_index;
-
-  return reinterpret_cast<const Elf_Shdr *>(
-      reinterpret_cast<const char *>(SectionHeaderTable) +
-      (Index * Header->e_shentsize));
+  auto TableOrErr = sections();
+  if (!TableOrErr)
+    return TableOrErr.takeError();
+  return object::getSection<ELFT>(*TableOrErr, Index);
 }
 
 template <class ELFT>
-ErrorOr<StringRef>
+Expected<StringRef>
 ELFFile<ELFT>::getStringTable(const Elf_Shdr *Section) const {
   if (Section->sh_type != ELF::SHT_STRTAB)
-    return object_error::parse_failed;
-  uint64_t Offset = Section->sh_offset;
-  uint64_t Size = Section->sh_size;
-  if (Offset + Size > Buf.size())
-    return object_error::parse_failed;
-  StringRef Data((const char *)base() + Section->sh_offset, Size);
-  if (Data[Size - 1] != '\0')
-    return object_error::string_table_non_null_end;
-  return Data;
+    return createError("invalid sh_type for string table, expected SHT_STRTAB");
+  auto V = getSectionContentsAsArray<char>(Section);
+  if (!V)
+    return V.takeError();
+  ArrayRef<char> Data = *V;
+  if (Data.empty())
+    return createError("empty string table");
+  if (Data.back() != '\0')
+    return createError("string table non-null terminated");
+  return StringRef(Data.begin(), Data.size());
 }
 
 template <class ELFT>
-ErrorOr<ArrayRef<typename ELFFile<ELFT>::Elf_Word>>
+Expected<ArrayRef<typename ELFT::Word>>
 ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section) const {
+  auto SectionsOrErr = sections();
+  if (!SectionsOrErr)
+    return SectionsOrErr.takeError();
+  return getSHNDXTable(Section, *SectionsOrErr);
+}
+
+template <class ELFT>
+Expected<ArrayRef<typename ELFT::Word>>
+ELFFile<ELFT>::getSHNDXTable(const Elf_Shdr &Section,
+                             Elf_Shdr_Range Sections) const {
   assert(Section.sh_type == ELF::SHT_SYMTAB_SHNDX);
-  const Elf_Word *ShndxTableBegin =
-      reinterpret_cast<const Elf_Word *>(base() + Section.sh_offset);
-  uintX_t Size = Section.sh_size;
-  if (Size % sizeof(uint32_t))
-    return object_error::parse_failed;
-  uintX_t NumSymbols = Size / sizeof(uint32_t);
-  const Elf_Word *ShndxTableEnd = ShndxTableBegin + NumSymbols;
-  if (reinterpret_cast<const char *>(ShndxTableEnd) > Buf.end())
-    return object_error::parse_failed;
-  ErrorOr<const Elf_Shdr *> SymTableOrErr = getSection(Section.sh_link);
-  if (std::error_code EC = SymTableOrErr.getError())
-    return EC;
+  auto VOrErr = getSectionContentsAsArray<Elf_Word>(&Section);
+  if (!VOrErr)
+    return VOrErr.takeError();
+  ArrayRef<Elf_Word> V = *VOrErr;
+  auto SymTableOrErr = object::getSection<ELFT>(Sections, Section.sh_link);
+  if (!SymTableOrErr)
+    return SymTableOrErr.takeError();
   const Elf_Shdr &SymTable = **SymTableOrErr;
   if (SymTable.sh_type != ELF::SHT_SYMTAB &&
       SymTable.sh_type != ELF::SHT_DYNSYM)
-    return object_error::parse_failed;
-  if (NumSymbols != (SymTable.sh_size / sizeof(Elf_Sym)))
-    return object_error::parse_failed;
-  return makeArrayRef(ShndxTableBegin, ShndxTableEnd);
+    return createError("invalid sh_type");
+  if (V.size() != (SymTable.sh_size / sizeof(Elf_Sym)))
+    return createError("invalid section contents size");
+  return V;
 }
 
 template <class ELFT>
-ErrorOr<StringRef>
+Expected<StringRef>
 ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec) const {
+  auto SectionsOrErr = sections();
+  if (!SectionsOrErr)
+    return SectionsOrErr.takeError();
+  return getStringTableForSymtab(Sec, *SectionsOrErr);
+}
+
+template <class ELFT>
+Expected<StringRef>
+ELFFile<ELFT>::getStringTableForSymtab(const Elf_Shdr &Sec,
+                                       Elf_Shdr_Range Sections) const {
+
   if (Sec.sh_type != ELF::SHT_SYMTAB && Sec.sh_type != ELF::SHT_DYNSYM)
-    return object_error::parse_failed;
-  ErrorOr<const Elf_Shdr *> SectionOrErr = getSection(Sec.sh_link);
-  if (std::error_code EC = SectionOrErr.getError())
-    return EC;
+    return createError(
+        "invalid sh_type for symbol table, expected SHT_SYMTAB or SHT_DYNSYM");
+  auto SectionOrErr = object::getSection<ELFT>(Sections, Sec.sh_link);
+  if (!SectionOrErr)
+    return SectionOrErr.takeError();
   return getStringTable(*SectionOrErr);
 }
 
 template <class ELFT>
-ErrorOr<StringRef>
+Expected<StringRef>
 ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section) const {
+  auto SectionsOrErr = sections();
+  if (!SectionsOrErr)
+    return SectionsOrErr.takeError();
+  auto Table = getSectionStringTable(*SectionsOrErr);
+  if (!Table)
+    return Table.takeError();
+  return getSectionName(Section, *Table);
+}
+
+template <class ELFT>
+Expected<StringRef> ELFFile<ELFT>::getSectionName(const Elf_Shdr *Section,
+                                                  StringRef DotShstrtab) const {
   uint32_t Offset = Section->sh_name;
   if (Offset == 0)
     return StringRef();
   if (Offset >= DotShstrtab.size())
-    return object_error::parse_failed;
+    return createError("invalid string offset");
   return StringRef(DotShstrtab.data() + Offset);
 }
 
 /// This function returns the hash value for a symbol in the .dynsym section
 /// Name of the API remains consistent as specified in the libelf
 /// REF : http://www.sco.com/developers/gabi/latest/ch5.dynamic.html#hash
-static inline unsigned elf_hash(StringRef &symbolName) {
+inline unsigned hashSysV(StringRef SymbolName) {
   unsigned h = 0, g;
-  for (unsigned i = 0, j = symbolName.size(); i < j; i++) {
-    h = (h << 4) + symbolName[i];
+  for (char C : SymbolName) {
+    h = (h << 4) + C;
     g = h & 0xf0000000L;
     if (g != 0)
       h ^= g >> 24;
diff --git a/contrib/llvm/include/llvm/Object/ELFObjectFile.h b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
index 07c6364a6894..69987d433e2d 100644
--- a/contrib/llvm/include/llvm/Object/ELFObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ELFObjectFile.h
@@ -59,6 +59,7 @@ protected:
 
   virtual uint32_t getSectionType(DataRefImpl Sec) const = 0;
   virtual uint64_t getSectionFlags(DataRefImpl Sec) const = 0;
+  virtual uint64_t getSectionOffset(DataRefImpl Sec) const = 0;
 
   virtual ErrorOr<int64_t> getRelocationAddend(DataRefImpl Rel) const = 0;
 
@@ -90,6 +91,10 @@ public:
   uint64_t getFlags() const {
     return getObject()->getSectionFlags(getRawDataRefImpl());
   }
+
+  uint64_t getOffset() const {
+    return getObject()->getSectionOffset(getRawDataRefImpl());
+  }
 };
 
 class elf_section_iterator : public section_iterator {
@@ -245,11 +250,15 @@ protected:
 
   uint32_t getSectionType(DataRefImpl Sec) const override;
   uint64_t getSectionFlags(DataRefImpl Sec) const override;
+  uint64_t getSectionOffset(DataRefImpl Sec) const override;
   StringRef getRelocationTypeName(uint32_t Type) const;
 
   /// \brief Get the relocation section that contains \a Rel.
   const Elf_Shdr *getRelSection(DataRefImpl Rel) const {
-    return *EF.getSection(Rel.d.a);
+    auto RelSecOrErr = EF.getSection(Rel.d.a);
+    if (!RelSecOrErr)
+      report_fatal_error(errorToErrorCode(RelSecOrErr.takeError()).message());
+    return *RelSecOrErr;
   }
 
   DataRefImpl toDRI(const Elf_Shdr *SymTable, unsigned SymbolNum) const {
@@ -262,7 +271,13 @@ protected:
     assert(SymTable->sh_type == ELF::SHT_SYMTAB ||
            SymTable->sh_type == ELF::SHT_DYNSYM);
 
-    uintptr_t SHT = reinterpret_cast<uintptr_t>(EF.section_begin());
+    auto SectionsOrErr = EF.sections();
+    if (!SectionsOrErr) {
+      DRI.d.a = 0;
+      DRI.d.b = 0;
+      return DRI;
+    }
+    uintptr_t SHT = reinterpret_cast<uintptr_t>((*SectionsOrErr).begin());
     unsigned SymTableIndex =
         (reinterpret_cast<uintptr_t>(SymTable) - SHT) / sizeof(Elf_Shdr);
 
@@ -311,15 +326,18 @@ public:
   const Elf_Rela *getRela(DataRefImpl Rela) const;
 
   const Elf_Sym *getSymbol(DataRefImpl Sym) const {
-    return EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
+    auto Ret = EF.template getEntry<Elf_Sym>(Sym.d.a, Sym.d.b);
+    if (!Ret)
+      report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+    return *Ret;
   }
 
   const Elf_Shdr *getSection(DataRefImpl Sec) const {
     return reinterpret_cast<const Elf_Shdr *>(Sec.p);
   }
 
-  basic_symbol_iterator symbol_begin_impl() const override;
-  basic_symbol_iterator symbol_end_impl() const override;
+  basic_symbol_iterator symbol_begin() const override;
+  basic_symbol_iterator symbol_end() const override;
 
   elf_symbol_iterator dynamic_symbol_begin() const;
   elf_symbol_iterator dynamic_symbol_end() const;
@@ -364,10 +382,18 @@ void ELFObjectFile<ELFT>::moveSymbolNext(DataRefImpl &Sym) const {
 template <class ELFT>
 Expected<StringRef> ELFObjectFile<ELFT>::getSymbolName(DataRefImpl Sym) const {
   const Elf_Sym *ESym = getSymbol(Sym);
-  const Elf_Shdr *SymTableSec = *EF.getSection(Sym.d.a);
-  const Elf_Shdr *StringTableSec = *EF.getSection(SymTableSec->sh_link);
-  StringRef SymTable = *EF.getStringTable(StringTableSec);
-  return ESym->getName(SymTable);
+  auto SymTabOrErr = EF.getSection(Sym.d.a);
+  if (!SymTabOrErr)
+    return SymTabOrErr.takeError();
+  const Elf_Shdr *SymTableSec = *SymTabOrErr;
+  auto StrTabOrErr = EF.getSection(SymTableSec->sh_link);
+  if (!StrTabOrErr)
+    return StrTabOrErr.takeError();
+  const Elf_Shdr *StringTableSec = *StrTabOrErr;
+  auto SymStrTabOrErr = EF.getStringTable(StringTableSec);
+  if (!SymStrTabOrErr)
+    return SymStrTabOrErr.takeError();
+  return ESym->getName(*SymStrTabOrErr);
 }
 
 template <class ELFT>
@@ -381,6 +407,11 @@ uint32_t ELFObjectFile<ELFT>::getSectionType(DataRefImpl Sec) const {
 }
 
 template <class ELFT>
+uint64_t ELFObjectFile<ELFT>::getSectionOffset(DataRefImpl Sec) const {
+  return getSection(Sec)->sh_offset;
+}
+
+template <class ELFT>
 uint64_t ELFObjectFile<ELFT>::getSymbolValueImpl(DataRefImpl Symb) const {
   const Elf_Sym *ESym = getSymbol(Symb);
   uint64_t Ret = ESym->st_value;
@@ -409,13 +440,15 @@ ELFObjectFile<ELFT>::getSymbolAddress(DataRefImpl Symb) const {
   }
 
   const Elf_Ehdr *Header = EF.getHeader();
-  const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a);
+  auto SymTabOrErr = EF.getSection(Symb.d.a);
+  if (!SymTabOrErr)
+    return SymTabOrErr.takeError();
+  const Elf_Shdr *SymTab = *SymTabOrErr;
 
   if (Header->e_type == ELF::ET_REL) {
-    ErrorOr<const Elf_Shdr *> SectionOrErr =
-        EF.getSection(ESym, SymTab, ShndxTable);
-    if (std::error_code EC = SectionOrErr.getError())
-      return errorCodeToError(EC);
+    auto SectionOrErr = EF.getSection(ESym, SymTab, ShndxTable);
+    if (!SectionOrErr)
+      return SectionOrErr.takeError();
     const Elf_Shdr *Section = *SectionOrErr;
     if (Section)
       Result += Section->sh_addr;
@@ -495,9 +528,14 @@ uint32_t ELFObjectFile<ELFT>::getSymbolFlags(DataRefImpl Sym) const {
   if (ESym->st_shndx == ELF::SHN_ABS)
     Result |= SymbolRef::SF_Absolute;
 
-  if (ESym->getType() == ELF::STT_FILE || ESym->getType() == ELF::STT_SECTION ||
-      ESym == EF.symbol_begin(DotSymtabSec) ||
-      ESym == EF.symbol_begin(DotDynSymSec))
+  if (ESym->getType() == ELF::STT_FILE || ESym->getType() == ELF::STT_SECTION)
+    Result |= SymbolRef::SF_FormatSpecific;
+
+  auto DotSymtabSecSyms = EF.symbols(DotSymtabSec);
+  if (DotSymtabSecSyms && ESym == (*DotSymtabSecSyms).begin())
+    Result |= SymbolRef::SF_FormatSpecific;
+  auto DotDynSymSecSyms = EF.symbols(DotDynSymSec);
+  if (DotDynSymSecSyms && ESym == (*DotDynSymSecSyms).begin())
     Result |= SymbolRef::SF_FormatSpecific;
 
   if (EF.getHeader()->e_machine == ELF::EM_ARM) {
@@ -533,9 +571,9 @@ template <class ELFT>
 Expected<section_iterator>
 ELFObjectFile<ELFT>::getSymbolSection(const Elf_Sym *ESym,
                                       const Elf_Shdr *SymTab) const {
-  ErrorOr<const Elf_Shdr *> ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable);
-  if (std::error_code EC = ESecOrErr.getError())
-    return errorCodeToError(EC);
+  auto ESecOrErr = EF.getSection(ESym, SymTab, ShndxTable);
+  if (!ESecOrErr)
+    return ESecOrErr.takeError();
 
   const Elf_Shdr *ESec = *ESecOrErr;
   if (!ESec)
@@ -550,7 +588,10 @@ template <class ELFT>
 Expected<section_iterator>
 ELFObjectFile<ELFT>::getSymbolSection(DataRefImpl Symb) const {
   const Elf_Sym *Sym = getSymbol(Symb);
-  const Elf_Shdr *SymTab = *EF.getSection(Symb.d.a);
+  auto SymTabOrErr = EF.getSection(Symb.d.a);
+  if (!SymTabOrErr)
+    return SymTabOrErr.takeError();
+  const Elf_Shdr *SymTab = *SymTabOrErr;
   return getSymbolSection(Sym, SymTab);
 }
 
@@ -563,9 +604,9 @@ void ELFObjectFile<ELFT>::moveSectionNext(DataRefImpl &Sec) const {
 template <class ELFT>
 std::error_code ELFObjectFile<ELFT>::getSectionName(DataRefImpl Sec,
                                                     StringRef &Result) const {
-  ErrorOr<StringRef> Name = EF.getSectionName(&*getSection(Sec));
+  auto Name = EF.getSectionName(&*getSection(Sec));
   if (!Name)
-    return Name.getError();
+    return errorToErrorCode(Name.takeError());
   Result = *Name;
   return std::error_code();
 }
@@ -627,7 +668,10 @@ template <class ELFT>
 relocation_iterator
 ELFObjectFile<ELFT>::section_rel_begin(DataRefImpl Sec) const {
   DataRefImpl RelData;
-  uintptr_t SHT = reinterpret_cast<uintptr_t>(EF.section_begin());
+  auto SectionsOrErr = EF.sections();
+  if (!SectionsOrErr)
+    return relocation_iterator(RelocationRef());
+  uintptr_t SHT = reinterpret_cast<uintptr_t>((*SectionsOrErr).begin());
   RelData.d.a = (Sec.p - SHT) / EF.getHeader()->e_shentsize;
   RelData.d.b = 0;
   return relocation_iterator(RelocationRef(RelData, this));
@@ -644,9 +688,9 @@ ELFObjectFile<ELFT>::section_rel_end(DataRefImpl Sec) const {
   const Elf_Shdr *RelSec = getRelSection(RelData);
 
   // Error check sh_link here so that getRelocationSymbol can just use it.
-  ErrorOr<const Elf_Shdr *> SymSecOrErr = EF.getSection(RelSec->sh_link);
-  if (std::error_code EC = SymSecOrErr.getError())
-    report_fatal_error(EC.message());
+  auto SymSecOrErr = EF.getSection(RelSec->sh_link);
+  if (!SymSecOrErr)
+    report_fatal_error(errorToErrorCode(SymSecOrErr.takeError()).message());
 
   RelData.d.b += S->sh_size / S->sh_entsize;
   return relocation_iterator(RelocationRef(RelData, this));
@@ -663,9 +707,9 @@ ELFObjectFile<ELFT>::getRelocatedSection(DataRefImpl Sec) const {
   if (Type != ELF::SHT_REL && Type != ELF::SHT_RELA)
     return section_end();
 
-  ErrorOr<const Elf_Shdr *> R = EF.getSection(EShdr->sh_info);
-  if (std::error_code EC = R.getError())
-    report_fatal_error(EC.message());
+  auto R = EF.getSection(EShdr->sh_info);
+  if (!R)
+    report_fatal_error(errorToErrorCode(R.takeError()).message());
   return section_iterator(SectionRef(toDRI(*R), this));
 }
 
@@ -738,14 +782,20 @@ template <class ELFT>
 const typename ELFObjectFile<ELFT>::Elf_Rel *
 ELFObjectFile<ELFT>::getRel(DataRefImpl Rel) const {
   assert(getRelSection(Rel)->sh_type == ELF::SHT_REL);
-  return EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
+  auto Ret = EF.template getEntry<Elf_Rel>(Rel.d.a, Rel.d.b);
+  if (!Ret)
+    report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+  return *Ret;
 }
 
 template <class ELFT>
 const typename ELFObjectFile<ELFT>::Elf_Rela *
 ELFObjectFile<ELFT>::getRela(DataRefImpl Rela) const {
   assert(getRelSection(Rela)->sh_type == ELF::SHT_RELA);
-  return EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
+  auto Ret = EF.template getEntry<Elf_Rela>(Rela.d.a, Rela.d.b);
+  if (!Ret)
+    report_fatal_error(errorToErrorCode(Ret.takeError()).message());
+  return *Ret;
 }
 
 template <class ELFT>
@@ -753,10 +803,13 @@ ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC)
     : ELFObjectFileBase(
           getELFType(ELFT::TargetEndianness == support::little, ELFT::Is64Bits),
           Object),
-      EF(Data.getBuffer(), EC) {
-  if (EC)
+      EF(Data.getBuffer()) {
+  auto SectionsOrErr = EF.sections();
+  if (!SectionsOrErr) {
+    EC = errorToErrorCode(SectionsOrErr.takeError());
     return;
-  for (const Elf_Shdr &Sec : EF.sections()) {
+  }
+  for (const Elf_Shdr &Sec : *SectionsOrErr) {
     switch (Sec.sh_type) {
     case ELF::SHT_DYNSYM: {
       if (DotDynSymSec) {
@@ -777,9 +830,11 @@ ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC)
       break;
     }
     case ELF::SHT_SYMTAB_SHNDX: {
-      ErrorOr<ArrayRef<Elf_Word>> TableOrErr = EF.getSHNDXTable(Sec);
-      if ((EC = TableOrErr.getError()))
+      auto TableOrErr = EF.getSHNDXTable(Sec);
+      if (!TableOrErr) {
+        EC = errorToErrorCode(TableOrErr.takeError());
         return;
+      }
       ShndxTable = *TableOrErr;
       break;
     }
@@ -788,16 +843,16 @@ ELFObjectFile<ELFT>::ELFObjectFile(MemoryBufferRef Object, std::error_code &EC)
 }
 
 template <class ELFT>
-basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin_impl() const {
+basic_symbol_iterator ELFObjectFile<ELFT>::symbol_begin() const {
   DataRefImpl Sym = toDRI(DotSymtabSec, 0);
   return basic_symbol_iterator(SymbolRef(Sym, this));
 }
 
 template <class ELFT>
-basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end_impl() const {
+basic_symbol_iterator ELFObjectFile<ELFT>::symbol_end() const {
   const Elf_Shdr *SymTab = DotSymtabSec;
   if (!SymTab)
-    return symbol_begin_impl();
+    return symbol_begin();
   DataRefImpl Sym = toDRI(SymTab, SymTab->sh_size / sizeof(Elf_Sym));
   return basic_symbol_iterator(SymbolRef(Sym, this));
 }
@@ -817,12 +872,18 @@ elf_symbol_iterator ELFObjectFile<ELFT>::dynamic_symbol_end() const {
 
 template <class ELFT>
 section_iterator ELFObjectFile<ELFT>::section_begin() const {
-  return section_iterator(SectionRef(toDRI(EF.section_begin()), this));
+  auto SectionsOrErr = EF.sections();
+  if (!SectionsOrErr)
+    return section_iterator(SectionRef());
+  return section_iterator(SectionRef(toDRI((*SectionsOrErr).begin()), this));
 }
 
 template <class ELFT>
 section_iterator ELFObjectFile<ELFT>::section_end() const {
-  return section_iterator(SectionRef(toDRI(EF.section_end()), this));
+  auto SectionsOrErr = EF.sections();
+  if (!SectionsOrErr)
+    return section_iterator(SectionRef());
+  return section_iterator(SectionRef(toDRI((*SectionsOrErr).end()), this));
 }
 
 template <class ELFT>
@@ -854,6 +915,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return "ELF32-mips";
     case ELF::EM_PPC:
       return "ELF32-ppc";
+    case ELF::EM_RISCV:
+      return "ELF32-riscv";
     case ELF::EM_SPARC:
     case ELF::EM_SPARC32PLUS:
       return "ELF32-sparc";
@@ -874,6 +937,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
       return (IsLittleEndian ? "ELF64-aarch64-little" : "ELF64-aarch64-big");
     case ELF::EM_PPC64:
       return "ELF64-ppc64";
+    case ELF::EM_RISCV:
+      return "ELF64-riscv";
     case ELF::EM_S390:
       return "ELF64-s390";
     case ELF::EM_SPARCV9:
@@ -907,7 +972,7 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
   case ELF::EM_X86_64:
     return Triple::x86_64;
   case ELF::EM_AARCH64:
-    return Triple::aarch64;
+    return IsLittleEndian ? Triple::aarch64 : Triple::aarch64_be;
   case ELF::EM_ARM:
     return Triple::arm;
   case ELF::EM_AVR:
@@ -929,6 +994,15 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
     return Triple::ppc;
   case ELF::EM_PPC64:
     return IsLittleEndian ? Triple::ppc64le : Triple::ppc64;
+  case ELF::EM_RISCV:
+    switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) {
+    case ELF::ELFCLASS32:
+      return Triple::riscv32;
+    case ELF::ELFCLASS64:
+      return Triple::riscv64;
+    default:
+      report_fatal_error("Invalid ELFCLASS!");
+    }
   case ELF::EM_S390:
     return Triple::systemz;
 
diff --git a/contrib/llvm/include/llvm/Object/ELFTypes.h b/contrib/llvm/include/llvm/Object/ELFTypes.h
index 55028f360dbf..3e03fd8b980e 100644
--- a/contrib/llvm/include/llvm/Object/ELFTypes.h
+++ b/contrib/llvm/include/llvm/Object/ELFTypes.h
@@ -124,20 +124,19 @@ struct ELFDataTypeTypedefHelper<ELFType<TargetEndianness, true>>
 };
 
 // I really don't like doing this, but the alternative is copypasta.
-#define LLVM_ELF_IMPORT_TYPES(E, W)                                            \
-  typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Addr Elf_Addr; \
-  typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Off Elf_Off;   \
-  typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Half Elf_Half; \
-  typedef typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Word Elf_Word; \
-  typedef                                                                      \
-      typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Sword Elf_Sword;   \
-  typedef                                                                      \
-      typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Xword Elf_Xword;   \
-  typedef                                                                      \
-      typename ELFDataTypeTypedefHelper<ELFType<E, W>>::Elf_Sxword Elf_Sxword;
 
 #define LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)                                       \
-  LLVM_ELF_IMPORT_TYPES(ELFT::TargetEndianness, ELFT::Is64Bits)
+  typedef typename ELFT::Addr Elf_Addr;                                        \
+  typedef typename ELFT::Off Elf_Off;                                          \
+  typedef typename ELFT::Half Elf_Half;                                        \
+  typedef typename ELFT::Word Elf_Word;                                        \
+  typedef typename ELFT::Sword Elf_Sword;                                      \
+  typedef typename ELFT::Xword Elf_Xword;                                      \
+  typedef typename ELFT::Sxword Elf_Sxword;
+
+#define LLD_ELF_COMMA ,
+#define LLVM_ELF_IMPORT_TYPES(E, W)                                            \
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFType<E LLD_ELF_COMMA W>)
 
 // Section header.
 template <class ELFT> struct Elf_Shdr_Base;
@@ -519,7 +518,7 @@ struct Elf_Phdr_Impl<ELFType<TargetEndianness, true>> {
   Elf_Xword p_align;  // Segment alignment constraint
 };
 
-// ELFT needed for endianess.
+// ELFT needed for endianness.
 template <class ELFT>
 struct Elf_Hash_Impl {
   LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
@@ -609,10 +608,13 @@ template <class ELFT> struct Elf_Mips_Options {
                     // or 0 for global options
   Elf_Word info;    // Kind-specific information
 
-  const Elf_Mips_RegInfo<ELFT> &getRegInfo() const {
+  Elf_Mips_RegInfo<ELFT> &getRegInfo() {
     assert(kind == llvm::ELF::ODK_REGINFO);
-    return *reinterpret_cast<const Elf_Mips_RegInfo<ELFT> *>(
-               (const uint8_t *)this + sizeof(Elf_Mips_Options));
+    return *reinterpret_cast<Elf_Mips_RegInfo<ELFT> *>(
+        (uint8_t *)this + sizeof(Elf_Mips_Options));
+  }
+  const Elf_Mips_RegInfo<ELFT> &getRegInfo() const {
+    return const_cast<Elf_Mips_Options *>(this)->getRegInfo();
   }
 };
 
diff --git a/contrib/llvm/include/llvm/Object/Error.h b/contrib/llvm/include/llvm/Object/Error.h
index cd55e5dc26d7..eb938338715d 100644
--- a/contrib/llvm/include/llvm/Object/Error.h
+++ b/contrib/llvm/include/llvm/Object/Error.h
@@ -34,6 +34,7 @@ enum class object_error {
   string_table_non_null_end,
   invalid_section_index,
   bitcode_section_not_found,
+  invalid_symbol_index,
 };
 
 inline std::error_code make_error_code(object_error e) {
diff --git a/contrib/llvm/include/llvm/Object/IRObjectFile.h b/contrib/llvm/include/llvm/Object/IRObjectFile.h
index 9fe011e17d62..0ea89011e883 100644
--- a/contrib/llvm/include/llvm/Object/IRObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/IRObjectFile.h
@@ -14,6 +14,8 @@
 #ifndef LLVM_OBJECT_IROBJECTFILE_H
 #define LLVM_OBJECT_IROBJECTFILE_H
 
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/Object/ModuleSymbolTable.h"
 #include "llvm/Object/SymbolicFile.h"
 
 namespace llvm {
@@ -26,31 +28,21 @@ namespace object {
 class ObjectFile;
 
 class IRObjectFile : public SymbolicFile {
-  std::unique_ptr<Module> M;
-  std::unique_ptr<Mangler> Mang;
-  std::vector<std::pair<std::string, uint32_t>> AsmSymbols;
+  std::vector<std::unique_ptr<Module>> Mods;
+  ModuleSymbolTable SymTab;
+  IRObjectFile(MemoryBufferRef Object,
+               std::vector<std::unique_ptr<Module>> Mods);
 
 public:
-  IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> M);
   ~IRObjectFile() override;
   void moveSymbolNext(DataRefImpl &Symb) const override;
   std::error_code printSymbolName(raw_ostream &OS,
                                   DataRefImpl Symb) const override;
   uint32_t getSymbolFlags(DataRefImpl Symb) const override;
-  GlobalValue *getSymbolGV(DataRefImpl Symb);
-  const GlobalValue *getSymbolGV(DataRefImpl Symb) const {
-    return const_cast<IRObjectFile *>(this)->getSymbolGV(Symb);
-  }
-  basic_symbol_iterator symbol_begin_impl() const override;
-  basic_symbol_iterator symbol_end_impl() const override;
+  basic_symbol_iterator symbol_begin() const override;
+  basic_symbol_iterator symbol_end() const override;
 
-  const Module &getModule() const {
-    return const_cast<IRObjectFile*>(this)->getModule();
-  }
-  Module &getModule() {
-    return *M;
-  }
-  std::unique_ptr<Module> takeModule();
+  StringRef getTargetTriple() const;
 
   static inline bool classof(const Binary *v) {
     return v->isIR();
@@ -60,23 +52,14 @@ public:
   /// error code if not found.
   static ErrorOr<MemoryBufferRef> findBitcodeInObject(const ObjectFile &Obj);
 
-  /// Parse inline ASM and collect the symbols that are not defined in
-  /// the current module.
-  ///
-  /// For each found symbol, call \p AsmUndefinedRefs with the name of the
-  /// symbol found and the associated flags.
-  static void CollectAsmUndefinedRefs(
-      const Triple &TheTriple, StringRef InlineAsm,
-      function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmUndefinedRefs);
-
   /// \brief Finds and returns bitcode in the given memory buffer (which may
   /// be either a bitcode file or a native object file with embedded bitcode),
   /// or an error code if not found.
   static ErrorOr<MemoryBufferRef>
   findBitcodeInMemBuffer(MemoryBufferRef Object);
 
-  static ErrorOr<std::unique_ptr<IRObjectFile>> create(MemoryBufferRef Object,
-                                                       LLVMContext &Context);
+  static Expected<std::unique_ptr<IRObjectFile>> create(MemoryBufferRef Object,
+                                                        LLVMContext &Context);
 };
 }
 }
diff --git a/contrib/llvm/include/llvm/Object/MachO.h b/contrib/llvm/include/llvm/Object/MachO.h
index 7906db1e8a77..8c33ec8fd603 100644
--- a/contrib/llvm/include/llvm/Object/MachO.h
+++ b/contrib/llvm/include/llvm/Object/MachO.h
@@ -194,7 +194,8 @@ public:
   typedef LoadCommandList::const_iterator load_command_iterator;
 
   static Expected<std::unique_ptr<MachOObjectFile>>
-  create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits);
+  create(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
+         uint32_t UniversalCputype = 0, uint32_t UniversalIndex = 0);
 
   void moveSymbolNext(DataRefImpl &Symb) const override;
 
@@ -202,6 +203,8 @@ public:
   Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
 
   // MachO specific.
+  Error checkSymbolTable() const;
+
   std::error_code getIndirectName(DataRefImpl Symb, StringRef &Res) const;
   unsigned getSectionType(SectionRef Sec) const;
 
@@ -248,8 +251,8 @@ public:
   // TODO: Would be useful to have an iterator based version
   // of the load command interface too.
 
-  basic_symbol_iterator symbol_begin_impl() const override;
-  basic_symbol_iterator symbol_end_impl() const override;
+  basic_symbol_iterator symbol_begin() const override;
+  basic_symbol_iterator symbol_end() const override;
 
   // MachO specific.
   basic_symbol_iterator getSymbolByIndex(unsigned Index) const;
@@ -410,7 +413,8 @@ public:
 
   static Triple::ArchType getArch(uint32_t CPUType);
   static Triple getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
-                              const char **McpuDefault = nullptr);
+                              const char **McpuDefault = nullptr,
+                              const char **ArchFlag = nullptr);
   static bool isValidArch(StringRef ArchFlag);
   static Triple getHostArch();
 
@@ -443,7 +447,8 @@ public:
 private:
 
   MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian, bool Is64Bits,
-                  Error &Err);
+                  Error &Err, uint32_t UniversalCputype = 0,
+                  uint32_t UniversalIndex = 0);
 
   uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
 
diff --git a/contrib/llvm/include/llvm/Object/MachOUniversal.h b/contrib/llvm/include/llvm/Object/MachOUniversal.h
index 7eb2af944f3d..a14c4ca01223 100644
--- a/contrib/llvm/include/llvm/Object/MachOUniversal.h
+++ b/contrib/llvm/include/llvm/Object/MachOUniversal.h
@@ -89,16 +89,24 @@ public:
       else // Parent->getMagic() == MachO::FAT_MAGIC_64
         return Header64.reserved;
     }
-    std::string getArchTypeName() const {
+    std::string getArchFlagName() const {
+      const char *McpuDefault, *ArchFlag;
       if (Parent->getMagic() == MachO::FAT_MAGIC) {
         Triple T =
-            MachOObjectFile::getArchTriple(Header.cputype, Header.cpusubtype);
-        return T.getArchName();
+            MachOObjectFile::getArchTriple(Header.cputype, Header.cpusubtype,
+                                           &McpuDefault, &ArchFlag);
       } else { // Parent->getMagic() == MachO::FAT_MAGIC_64
         Triple T =
             MachOObjectFile::getArchTriple(Header64.cputype,
-                                           Header64.cpusubtype);
-        return T.getArchName();
+                                           Header64.cpusubtype,
+                                           &McpuDefault, &ArchFlag);
+      }
+      if (ArchFlag) {
+        std::string ArchFlagName(ArchFlag);
+        return ArchFlagName;
+      } else {
+        std::string ArchFlagName("");
+        return ArchFlagName;
       }
     }
 
diff --git a/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h b/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h
index d021fb29427f..6205927039dc 100644
--- a/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ModuleSummaryIndexObjectFile.h
@@ -50,11 +50,11 @@ public:
     llvm_unreachable("not implemented");
     return 0;
   }
-  basic_symbol_iterator symbol_begin_impl() const override {
+  basic_symbol_iterator symbol_begin() const override {
     llvm_unreachable("not implemented");
     return basic_symbol_iterator(BasicSymbolRef());
   }
-  basic_symbol_iterator symbol_end_impl() const override {
+  basic_symbol_iterator symbol_end() const override {
     llvm_unreachable("not implemented");
     return basic_symbol_iterator(BasicSymbolRef());
   }
@@ -79,25 +79,18 @@ public:
   static ErrorOr<MemoryBufferRef>
   findBitcodeInMemBuffer(MemoryBufferRef Object);
 
-  /// \brief Looks for summary sections in the given memory buffer,
-  /// returns true if found, else false.
-  static bool hasGlobalValueSummaryInMemBuffer(
-      MemoryBufferRef Object,
-      const DiagnosticHandlerFunction &DiagnosticHandler);
-
   /// \brief Parse module summary index in the given memory buffer.
   /// Return new ModuleSummaryIndexObjectFile instance containing parsed module
   /// summary/index.
-  static ErrorOr<std::unique_ptr<ModuleSummaryIndexObjectFile>>
-  create(MemoryBufferRef Object,
-         const DiagnosticHandlerFunction &DiagnosticHandler);
+  static Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>>
+  create(MemoryBufferRef Object);
 };
 }
 
 /// Parse the module summary index out of an IR file and return the module
 /// summary index object if found, or nullptr if not.
-ErrorOr<std::unique_ptr<ModuleSummaryIndex>> getModuleSummaryIndexForFile(
-    StringRef Path, const DiagnosticHandlerFunction &DiagnosticHandler);
+Expected<std::unique_ptr<ModuleSummaryIndex>>
+getModuleSummaryIndexForFile(StringRef Path);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h b/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h
new file mode 100644
index 000000000000..70775352d977
--- /dev/null
+++ b/contrib/llvm/include/llvm/Object/ModuleSymbolTable.h
@@ -0,0 +1,61 @@
+//===- ModuleSymbolTable.h - symbol table for in-memory IR ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a symbol table built from in-memory IR. It provides
+// access to GlobalValues and should only be used if such access is required
+// (e.g. in the LTO implementation).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_MODULESYMBOLTABLE_H
+#define LLVM_OBJECT_MODULESYMBOLTABLE_H
+
+#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/Object/SymbolicFile.h"
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class GlobalValue;
+
+class ModuleSymbolTable {
+public:
+  typedef std::pair<std::string, uint32_t> AsmSymbol;
+  typedef PointerUnion<GlobalValue *, AsmSymbol *> Symbol;
+
+private:
+  Module *FirstMod = nullptr;
+
+  SpecificBumpPtrAllocator<AsmSymbol> AsmSymbols;
+  std::vector<Symbol> SymTab;
+  Mangler Mang;
+
+public:
+  ArrayRef<Symbol> symbols() const { return SymTab; }
+  void addModule(Module *M);
+
+  void printSymbolName(raw_ostream &OS, Symbol S) const;
+  uint32_t getSymbolFlags(Symbol S) const;
+
+  /// Parse inline ASM and collect the symbols that are defined or referenced in
+  /// the current module.
+  ///
+  /// For each found symbol, call \p AsmSymbol with the name of the symbol found
+  /// and the associated flags.
+  static void CollectAsmSymbols(
+      const Triple &TheTriple, StringRef InlineAsm,
+      function_ref<void(StringRef, object::BasicSymbolRef::Flags)> AsmSymbol);
+};
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Object/ObjectFile.h b/contrib/llvm/include/llvm/Object/ObjectFile.h
index 6272a5f056eb..13d5845c3a71 100644
--- a/contrib/llvm/include/llvm/Object/ObjectFile.h
+++ b/contrib/llvm/include/llvm/Object/ObjectFile.h
@@ -29,6 +29,7 @@ namespace object {
 class ObjectFile;
 class COFFObjectFile;
 class MachOObjectFile;
+class WasmObjectFile;
 
 class SymbolRef;
 class symbol_iterator;
@@ -300,8 +301,12 @@ public:
   createELFObjectFile(MemoryBufferRef Object);
 
   static Expected<std::unique_ptr<MachOObjectFile>>
-  createMachOObjectFile(MemoryBufferRef Object);
+  createMachOObjectFile(MemoryBufferRef Object,
+                        uint32_t UniversalCputype = 0,
+                        uint32_t UniversalIndex = 0);
 
+  static Expected<std::unique_ptr<WasmObjectFile>>
+  createWasmObjectFile(MemoryBufferRef Object);
 };
 
 // Inline function definitions.
diff --git a/contrib/llvm/include/llvm/Object/RelocVisitor.h b/contrib/llvm/include/llvm/Object/RelocVisitor.h
index 5e0df98d8627..3510d293d73d 100644
--- a/contrib/llvm/include/llvm/Object/RelocVisitor.h
+++ b/contrib/llvm/include/llvm/Object/RelocVisitor.h
@@ -86,6 +86,7 @@ private:
           return RelocToApply();
         }
       case Triple::aarch64:
+      case Triple::aarch64_be:
         switch (RelocType) {
         case llvm::ELF::R_AARCH64_ABS32:
           return visitELF_AARCH64_ABS32(R, Value);
@@ -95,6 +96,17 @@ private:
           HasError = true;
           return RelocToApply();
         }
+      case Triple::bpfel:
+      case Triple::bpfeb:
+        switch (RelocType) {
+        case llvm::ELF::R_BPF_64_64:
+          return visitELF_BPF_64_64(R, Value);
+        case llvm::ELF::R_BPF_64_32:
+          return visitELF_BPF_64_32(R, Value);
+        default:
+          HasError = true;
+          return RelocToApply();
+        }
       case Triple::mips64el:
       case Triple::mips64:
         switch (RelocType) {
@@ -139,6 +151,14 @@ private:
           HasError = true;
           return RelocToApply();
         }
+      case Triple::amdgcn:
+        switch (RelocType) {
+        case llvm::ELF::R_AMDGPU_ABS32:
+          return visitELF_AMDGPU_ABS32(R, Value);
+        default:
+          HasError = true;
+          return RelocToApply();
+        }
       default:
         HasError = true;
         return RelocToApply();
@@ -200,6 +220,14 @@ private:
           HasError = true;
           return RelocToApply();
         }
+      case Triple::hexagon:
+        switch (RelocType) {
+        case llvm::ELF::R_HEX_32:
+          return visitELF_HEX_32(R, Value);
+        default:
+          HasError = true;
+          return RelocToApply();
+        }
       default:
         HasError = true;
         return RelocToApply();
@@ -300,6 +328,15 @@ private:
     return RelocToApply(Res, 4);
   }
 
+  /// BPF ELF
+  RelocToApply visitELF_BPF_64_32(RelocationRef R, uint64_t Value) {
+    uint32_t Res = Value & 0xFFFFFFFF;
+    return RelocToApply(Res, 4);
+  }
+  RelocToApply visitELF_BPF_64_64(RelocationRef R, uint64_t Value) {
+    return RelocToApply(Value, 8);
+  }
+
   /// PPC64 ELF
   RelocToApply visitELF_PPC64_ADDR32(RelocationRef R, uint64_t Value) {
     int64_t Addend = getELFAddend(R);
@@ -403,6 +440,16 @@ private:
     return RelocToApply(static_cast<uint32_t>(Res), 4);
   }
 
+  RelocToApply visitELF_HEX_32(RelocationRef R, uint64_t Value) {
+    int64_t Addend = getELFAddend(R);
+    return RelocToApply(Value + Addend, 4);
+  }
+
+  RelocToApply visitELF_AMDGPU_ABS32(RelocationRef R, uint64_t Value) {
+    int64_t Addend = getELFAddend(R);
+    return RelocToApply(Value + Addend, 4);
+  }
+
   /// I386 COFF
   RelocToApply visitCOFF_I386_SECREL(RelocationRef R, uint64_t Value) {
     return RelocToApply(static_cast<uint32_t>(Value), /*Width=*/4);
diff --git a/contrib/llvm/include/llvm/Object/StackMapParser.h b/contrib/llvm/include/llvm/Object/StackMapParser.h
index e58162de1501..efea62bb3cb3 100644
--- a/contrib/llvm/include/llvm/Object/StackMapParser.h
+++ b/contrib/llvm/include/llvm/Object/StackMapParser.h
@@ -17,7 +17,7 @@
 namespace llvm {
 
 template <support::endianness Endianness>
-class StackMapV1Parser {
+class StackMapV2Parser {
 public:
 
   template <typename AccessorT>
@@ -47,7 +47,7 @@ public:
 
   /// Accessor for function records.
   class FunctionAccessor {
-    friend class StackMapV1Parser;
+    friend class StackMapV2Parser;
   public:
 
     /// Get the function address.
@@ -56,14 +56,19 @@ public:
     }
 
     /// Get the function's stack size.
-    uint32_t getStackSize() const {
+    uint64_t getStackSize() const {
       return read<uint64_t>(P + sizeof(uint64_t));
     }
+    
+    /// Get the number of callsite records.
+    uint64_t getRecordCount() const {
+      return read<uint64_t>(P + (2 * sizeof(uint64_t)));
+    }
 
   private:
     FunctionAccessor(const uint8_t *P) : P(P) {}
 
-    const static int FunctionAccessorSize = 2 * sizeof(uint64_t);
+    const static int FunctionAccessorSize = 3 * sizeof(uint64_t);
 
     FunctionAccessor next() const {
       return FunctionAccessor(P + FunctionAccessorSize);
@@ -74,7 +79,7 @@ public:
 
   /// Accessor for constants.
   class ConstantAccessor {
-    friend class StackMapV1Parser;
+    friend class StackMapV2Parser;
   public:
 
     /// Return the value of this constant.
@@ -103,7 +108,7 @@ public:
 
   /// Accessor for location records.
   class LocationAccessor {
-    friend class StackMapV1Parser;
+    friend class StackMapV2Parser;
     friend class RecordAccessor;
   public:
 
@@ -156,7 +161,7 @@ public:
 
   /// Accessor for stackmap live-out fields.
   class LiveOutAccessor {
-    friend class StackMapV1Parser;
+    friend class StackMapV2Parser;
     friend class RecordAccessor;
   public:
 
@@ -188,7 +193,7 @@ public:
 
   /// Accessor for stackmap records.
   class RecordAccessor {
-    friend class StackMapV1Parser;
+    friend class StackMapV2Parser;
   public:
 
     typedef AccessorIterator<LocationAccessor> location_iterator;
@@ -292,14 +297,14 @@ public:
     const uint8_t *P;
   };
 
-  /// Construct a parser for a version-1 stackmap. StackMap data will be read
+  /// Construct a parser for a version-2 stackmap. StackMap data will be read
   /// from the given array.
-  StackMapV1Parser(ArrayRef<uint8_t> StackMapSection)
+  StackMapV2Parser(ArrayRef<uint8_t> StackMapSection)
       : StackMapSection(StackMapSection) {
     ConstantsListOffset = FunctionListOffset + getNumFunctions() * FunctionSize;
 
-    assert(StackMapSection[0] == 1 &&
-           "StackMapV1Parser can only parse version 1 stackmaps");
+    assert(StackMapSection[0] == 2 &&
+           "StackMapV2Parser can only parse version 2 stackmaps");
 
     unsigned CurrentRecordOffset =
       ConstantsListOffset + getNumConstants() * ConstantSize;
@@ -315,8 +320,8 @@ public:
   typedef AccessorIterator<ConstantAccessor> constant_iterator;
   typedef AccessorIterator<RecordAccessor> record_iterator;
 
-  /// Get the version number of this stackmap. (Always returns 1).
-  unsigned getVersion() const { return 1; }
+  /// Get the version number of this stackmap. (Always returns 2).
+  unsigned getVersion() const { return 2; }
 
   /// Get the number of functions in the stack map.
   uint32_t getNumFunctions() const {
@@ -420,7 +425,7 @@ private:
   static const unsigned NumRecordsOffset = NumConstantsOffset + sizeof(uint32_t);
   static const unsigned FunctionListOffset = NumRecordsOffset + sizeof(uint32_t);
 
-  static const unsigned FunctionSize = 2 * sizeof(uint64_t);
+  static const unsigned FunctionSize = 3 * sizeof(uint64_t);
   static const unsigned ConstantSize = sizeof(uint64_t);
 
   std::size_t getFunctionOffset(unsigned FunctionIndex) const {
diff --git a/contrib/llvm/include/llvm/Object/SymbolSize.h b/contrib/llvm/include/llvm/Object/SymbolSize.h
index f2ce70f4208d..1a1dc8752943 100644
--- a/contrib/llvm/include/llvm/Object/SymbolSize.h
+++ b/contrib/llvm/include/llvm/Object/SymbolSize.h
@@ -15,8 +15,19 @@
 
 namespace llvm {
 namespace object {
+
+struct SymEntry {
+  symbol_iterator I;
+  uint64_t Address;
+  unsigned Number;
+  unsigned SectionID;
+};
+
+int compareAddress(const SymEntry *A, const SymEntry *B);
+
 std::vector<std::pair<SymbolRef, uint64_t>>
 computeSymbolSizes(const ObjectFile &O);
+
 }
 } // namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Object/SymbolicFile.h b/contrib/llvm/include/llvm/Object/SymbolicFile.h
index 894c2670f265..af62e62c51d8 100644
--- a/contrib/llvm/include/llvm/Object/SymbolicFile.h
+++ b/contrib/llvm/include/llvm/Object/SymbolicFile.h
@@ -88,7 +88,6 @@ class BasicSymbolRef {
   const SymbolicFile *OwningObject;
 
 public:
-  // FIXME: should we add a SF_Text?
   enum Flags : unsigned {
     SF_None = 0,
     SF_Undefined = 1U << 0,      // Symbol is defined in another object file
@@ -103,6 +102,8 @@ public:
     SF_Thumb = 1U << 8,          // Thumb symbol in a 32-bit ARM binary
     SF_Hidden = 1U << 9,         // Symbol has hidden visibility
     SF_Const = 1U << 10,         // Symbol value is constant
+    SF_Executable = 1U << 11,    // Symbol points to an executable section
+                                 // (IR only)
   };
 
   BasicSymbolRef() : OwningObject(nullptr) { }
@@ -137,17 +138,11 @@ public:
 
   virtual uint32_t getSymbolFlags(DataRefImpl Symb) const = 0;
 
-  virtual basic_symbol_iterator symbol_begin_impl() const = 0;
+  virtual basic_symbol_iterator symbol_begin() const = 0;
 
-  virtual basic_symbol_iterator symbol_end_impl() const = 0;
+  virtual basic_symbol_iterator symbol_end() const = 0;
 
   // convenience wrappers.
-  basic_symbol_iterator symbol_begin() const {
-    return symbol_begin_impl();
-  }
-  basic_symbol_iterator symbol_end() const {
-    return symbol_end_impl();
-  }
   typedef iterator_range<basic_symbol_iterator> basic_symbol_iterator_range;
   basic_symbol_iterator_range symbols() const {
     return basic_symbol_iterator_range(symbol_begin(), symbol_end());
diff --git a/contrib/llvm/include/llvm/Object/Wasm.h b/contrib/llvm/include/llvm/Object/Wasm.h
new file mode 100644
index 000000000000..2ece6a6c3770
--- /dev/null
+++ b/contrib/llvm/include/llvm/Object/Wasm.h
@@ -0,0 +1,99 @@
+//===- WasmObjectFile.h - Wasm object file implementation -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the WasmObjectFile class, which implements the ObjectFile
+// interface for Wasm files.
+//
+// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_OBJECT_WASM_H
+#define LLVM_OBJECT_WASM_H
+
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Wasm.h"
+
+namespace llvm {
+namespace object {
+
+class WasmObjectFile : public ObjectFile {
+public:
+  WasmObjectFile(MemoryBufferRef Object, Error &Err);
+  const wasm::WasmObjectHeader &getHeader() const;
+  const wasm::WasmSection *getWasmSection(const SectionRef &Section) const;
+  static bool classof(const Binary *v) { return v->isWasm(); }
+
+protected:
+  void moveSymbolNext(DataRefImpl &Symb) const override;
+
+  std::error_code printSymbolName(raw_ostream &OS,
+                                  DataRefImpl Symb) const override;
+
+  uint32_t getSymbolFlags(DataRefImpl Symb) const override;
+
+  basic_symbol_iterator symbol_begin() const override;
+
+  basic_symbol_iterator symbol_end() const override;
+  Expected<StringRef> getSymbolName(DataRefImpl Symb) const override;
+
+  Expected<uint64_t> getSymbolAddress(DataRefImpl Symb) const override;
+  uint64_t getSymbolValueImpl(DataRefImpl Symb) const override;
+  uint32_t getSymbolAlignment(DataRefImpl Symb) const override;
+  uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override;
+  Expected<SymbolRef::Type> getSymbolType(DataRefImpl Symb) const override;
+  Expected<section_iterator> getSymbolSection(DataRefImpl Symb) const override;
+
+  // Overrides from SectionRef.
+  void moveSectionNext(DataRefImpl &Sec) const override;
+  std::error_code getSectionName(DataRefImpl Sec,
+                                 StringRef &Res) const override;
+  uint64_t getSectionAddress(DataRefImpl Sec) const override;
+  uint64_t getSectionSize(DataRefImpl Sec) const override;
+  std::error_code getSectionContents(DataRefImpl Sec,
+                                     StringRef &Res) const override;
+  uint64_t getSectionAlignment(DataRefImpl Sec) const override;
+  bool isSectionCompressed(DataRefImpl Sec) const override;
+  bool isSectionText(DataRefImpl Sec) const override;
+  bool isSectionData(DataRefImpl Sec) const override;
+  bool isSectionBSS(DataRefImpl Sec) const override;
+  bool isSectionVirtual(DataRefImpl Sec) const override;
+  bool isSectionBitcode(DataRefImpl Sec) const override;
+  relocation_iterator section_rel_begin(DataRefImpl Sec) const override;
+  relocation_iterator section_rel_end(DataRefImpl Sec) const override;
+  section_iterator getRelocatedSection(DataRefImpl Sec) const override;
+
+  // Overrides from RelocationRef.
+  void moveRelocationNext(DataRefImpl &Rel) const override;
+  uint64_t getRelocationOffset(DataRefImpl Rel) const override;
+  symbol_iterator getRelocationSymbol(DataRefImpl Rel) const override;
+  uint64_t getRelocationType(DataRefImpl Rel) const override;
+  void getRelocationTypeName(DataRefImpl Rel,
+                             SmallVectorImpl<char> &Result) const override;
+
+  section_iterator section_begin() const override;
+  section_iterator section_end() const override;
+  uint8_t getBytesInAddress() const override;
+  StringRef getFileFormatName() const override;
+  unsigned getArch() const override;
+  SubtargetFeatures getFeatures() const override;
+  bool isRelocatableObject() const override;
+
+private:
+  const uint8_t *getPtr(size_t Offset) const;
+  Error parseUserSection(wasm::WasmSection &Sec, const uint8_t *Ptr,
+                         size_t Length);
+
+  wasm::WasmObjectHeader Header;
+  std::vector<wasm::WasmSection> Sections;
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/contrib/llvm/include/llvm/ObjectYAML/DWARFYAML.h
new file mode 100644
index 000000000000..222cad61a992
--- /dev/null
+++ b/contrib/llvm/include/llvm/ObjectYAML/DWARFYAML.h
@@ -0,0 +1,203 @@
+//===- DWARFYAML.h - DWARF YAMLIO implementation ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares classes for handling the YAML representation
+/// of DWARF Debug Info.
+///
+//===----------------------------------------------------------------------===//
+
+
+#ifndef LLVM_OBJECTYAML_DWARFYAML_H
+#define LLVM_OBJECTYAML_DWARFYAML_H
+
+#include "llvm/ObjectYAML/YAML.h"
+#include "llvm/Support/Dwarf.h"
+
+namespace llvm {
+namespace DWARFYAML {
+
+struct AttributeAbbrev {
+  llvm::dwarf::Attribute Attribute;
+  llvm::dwarf::Form Form;
+};
+
+struct Abbrev {
+  llvm::yaml::Hex32 Code;
+  llvm::dwarf::Tag Tag;
+  llvm::dwarf::Constants Children;
+  std::vector<AttributeAbbrev> Attributes;
+};
+
+struct ARangeDescriptor {
+  llvm::yaml::Hex64 Address;
+  uint64_t Length;
+};
+
+struct ARange {
+  uint32_t Length;
+  uint16_t Version;
+  uint32_t CuOffset;
+  uint8_t AddrSize;
+  uint8_t SegSize;
+  std::vector<ARangeDescriptor> Descriptors;
+};
+
+struct PubEntry {
+  llvm::yaml::Hex32 DieOffset;
+  llvm::yaml::Hex8 Descriptor;
+  StringRef Name;
+};
+
+struct PubSection {
+  PubSection() : IsGNUStyle(false) {}
+
+  uint32_t Length;
+  uint16_t Version;
+  uint32_t UnitOffset;
+  uint32_t UnitSize;
+  bool IsGNUStyle;
+  std::vector<PubEntry> Entries;
+};
+
+struct FormValue {
+  llvm::yaml::Hex64 Value;
+  StringRef CStr;
+  std::vector<llvm::yaml::Hex8> BlockData;
+};
+
+struct Entry {
+  llvm::yaml::Hex32 AbbrCode;
+  std::vector<FormValue> Values;
+};
+
+struct Unit {
+  uint32_t Length;
+  uint16_t Version;
+  uint32_t AbbrOffset;
+  uint8_t AddrSize;
+  std::vector<Entry> Entries;
+};
+
+struct Data {
+  bool IsLittleEndian;
+  std::vector<Abbrev> AbbrevDecls;
+  std::vector<StringRef> DebugStrings;
+  std::vector<ARange> ARanges;
+  PubSection PubNames;
+  PubSection PubTypes;
+
+  PubSection GNUPubNames;
+  PubSection GNUPubTypes;
+  
+  std::vector<Unit> CompileUnits;
+
+  bool isEmpty() const;
+};
+
+} // namespace llvm::DWARFYAML
+} // namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(uint8_t)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::StringRef)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AttributeAbbrev)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Abbrev)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARangeDescriptor)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::ARange)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::PubEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Unit)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::FormValue)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::Entry)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<DWARFYAML::Data> {
+  static void mapping(IO &IO, DWARFYAML::Data &DWARF);
+};
+
+template <> struct MappingTraits<DWARFYAML::Abbrev> {
+  static void mapping(IO &IO, DWARFYAML::Abbrev &Abbrev);
+};
+
+template <> struct MappingTraits<DWARFYAML::AttributeAbbrev> {
+  static void mapping(IO &IO, DWARFYAML::AttributeAbbrev &AttAbbrev);
+};
+
+template <> struct MappingTraits<DWARFYAML::ARangeDescriptor> {
+  static void mapping(IO &IO, DWARFYAML::ARangeDescriptor &Descriptor);
+};
+
+template <> struct MappingTraits<DWARFYAML::ARange> {
+  static void mapping(IO &IO, DWARFYAML::ARange &Range);
+};
+
+template <> struct MappingTraits<DWARFYAML::PubEntry> {
+  static void mapping(IO &IO, DWARFYAML::PubEntry &Entry);
+};
+
+template <> struct MappingTraits<DWARFYAML::PubSection> {
+  static void mapping(IO &IO, DWARFYAML::PubSection &Section);
+};
+
+template <> struct MappingTraits<DWARFYAML::Unit> {
+  static void mapping(IO &IO, DWARFYAML::Unit &Unit);
+};
+
+template <> struct MappingTraits<DWARFYAML::Entry> {
+  static void mapping(IO &IO, DWARFYAML::Entry &Entry);
+};
+
+template <> struct MappingTraits<DWARFYAML::FormValue> {
+  static void mapping(IO &IO, DWARFYAML::FormValue &FormValue);
+};
+
+#define HANDLE_DW_TAG(unused, name)                                            \
+  io.enumCase(value, "DW_TAG_" #name, dwarf::DW_TAG_##name);
+
+template <> struct ScalarEnumerationTraits<dwarf::Tag> {
+  static void enumeration(IO &io, dwarf::Tag &value) {
+#include "llvm/Support/Dwarf.def"
+    io.enumFallback<Hex16>(value);
+  }
+};
+
+#define HANDLE_DW_AT(unused, name)                                             \
+  io.enumCase(value, "DW_AT_" #name, dwarf::DW_AT_##name);
+
+template <> struct ScalarEnumerationTraits<dwarf::Attribute> {
+  static void enumeration(IO &io, dwarf::Attribute &value) {
+#include "llvm/Support/Dwarf.def"
+    io.enumFallback<Hex16>(value);
+  }
+};
+
+#define HANDLE_DW_FORM(unused, name)                                           \
+  io.enumCase(value, "DW_FORM_" #name, dwarf::DW_FORM_##name);
+
+template <> struct ScalarEnumerationTraits<dwarf::Form> {
+  static void enumeration(IO &io, dwarf::Form &value) {
+#include "llvm/Support/Dwarf.def"
+    io.enumFallback<Hex16>(value);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<dwarf::Constants> {
+  static void enumeration(IO &io, dwarf::Constants &value) {
+    io.enumCase(value, "DW_CHILDREN_no", dwarf::DW_CHILDREN_no);
+    io.enumCase(value, "DW_CHILDREN_yes", dwarf::DW_CHILDREN_yes);
+    io.enumFallback<Hex16>(value);
+  }
+};
+
+} // namespace llvm::yaml
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/ObjectYAML/MachOYAML.h b/contrib/llvm/include/llvm/ObjectYAML/MachOYAML.h
index bb15e64789d0..6b7a924f5143 100644
--- a/contrib/llvm/include/llvm/ObjectYAML/MachOYAML.h
+++ b/contrib/llvm/include/llvm/ObjectYAML/MachOYAML.h
@@ -17,6 +17,7 @@
 #define LLVM_OBJECTYAML_MACHOYAML_H
 
 #include "llvm/ObjectYAML/YAML.h"
+#include "llvm/ObjectYAML/DWARFYAML.h"
 #include "llvm/Support/MachO.h"
 
 namespace llvm {
@@ -59,7 +60,7 @@ struct LoadCommand {
 
 struct NListEntry {
   uint32_t n_strx;
-  uint8_t n_type;
+  llvm::yaml::Hex8 n_type;
   uint8_t n_sect;
   uint16_t n_desc;
   uint64_t n_value;
@@ -100,13 +101,17 @@ struct LinkEditData {
   MachOYAML::ExportEntry ExportTrie;
   std::vector<NListEntry> NameList;
   std::vector<StringRef> StringTable;
+
+  bool isEmpty() const;
 };
 
 struct Object {
+  bool IsLittleEndian;
   FileHeader Header;
   std::vector<LoadCommand> LoadCommands;
   std::vector<Section> Sections;
   LinkEditData LinkEdit;
+  DWARFYAML::Data DWARF;
 };
 
 struct FatHeader {
@@ -134,14 +139,12 @@ struct UniversalBinary {
 
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::LoadCommand)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::Section)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex8)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::Hex64)
 LLVM_YAML_IS_SEQUENCE_VECTOR(int64_t)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::RebaseOpcode)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::BindOpcode)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::ExportEntry)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::NListEntry)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::StringRef)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::Object)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MachOYAML::FatArch)
 
diff --git a/contrib/llvm/include/llvm/Option/ArgList.h b/contrib/llvm/include/llvm/Option/ArgList.h
index 89771b5c3cf1..53cb0d8dec4d 100644
--- a/contrib/llvm/include/llvm/Option/ArgList.h
+++ b/contrib/llvm/include/llvm/Option/ArgList.h
@@ -259,6 +259,10 @@ public:
   void AddLastArg(ArgStringList &Output, OptSpecifier Id0,
                   OptSpecifier Id1) const;
 
+  /// AddAllArgsExcept - Render all arguments matching any of the given ids
+  /// and not matching any of the excluded ids.
+  void AddAllArgsExcept(ArgStringList &Output, ArrayRef<OptSpecifier> Ids,
+                        ArrayRef<OptSpecifier> ExcludeIds) const;
   /// AddAllArgs - Render all arguments matching any of the given ids.
   void AddAllArgs(ArgStringList &Output, ArrayRef<OptSpecifier> Ids) const;
 
diff --git a/contrib/llvm/include/llvm/Pass.h b/contrib/llvm/include/llvm/Pass.h
index 2a21b82876bb..e9c8ca3072c7 100644
--- a/contrib/llvm/include/llvm/Pass.h
+++ b/contrib/llvm/include/llvm/Pass.h
@@ -97,7 +97,7 @@ public:
   /// implemented in terms of the name that is registered by one of the
   /// Registration templates, but can be overloaded directly.
   ///
-  virtual const char *getPassName() const;
+  virtual StringRef getPassName() const;
 
   /// getPassID - Return the PassID number that corresponds to this pass.
   AnalysisID getPassID() const {
diff --git a/contrib/llvm/include/llvm/PassInfo.h b/contrib/llvm/include/llvm/PassInfo.h
index cee4ade323e4..21ade85b682f 100644
--- a/contrib/llvm/include/llvm/PassInfo.h
+++ b/contrib/llvm/include/llvm/PassInfo.h
@@ -13,6 +13,8 @@
 #ifndef LLVM_PASSINFO_H
 #define LLVM_PASSINFO_H
 
+#include "llvm/ADT/StringRef.h"
+
 #include <cassert>
 #include <vector>
 
@@ -33,8 +35,8 @@ public:
   typedef Pass *(*TargetMachineCtor_t)(TargetMachine *);
 
 private:
-  const char *const PassName;     // Nice name for Pass
-  const char *const PassArgument; // Command Line argument to run this pass
+  StringRef PassName;     // Nice name for Pass
+  StringRef PassArgument; // Command Line argument to run this pass
   const void *PassID;
   const bool IsCFGOnlyPass;              // Pass only looks at the CFG.
   const bool IsAnalysis;                 // True if an analysis pass.
@@ -47,8 +49,8 @@ private:
 public:
   /// PassInfo ctor - Do not call this directly, this should only be invoked
   /// through RegisterPass.
-  PassInfo(const char *name, const char *arg, const void *pi,
-           NormalCtor_t normal, bool isCFGOnly, bool is_analysis,
+  PassInfo(StringRef name, StringRef arg, const void *pi, NormalCtor_t normal,
+           bool isCFGOnly, bool is_analysis,
            TargetMachineCtor_t machine = nullptr)
       : PassName(name), PassArgument(arg), PassID(pi), IsCFGOnlyPass(isCFGOnly),
         IsAnalysis(is_analysis), IsAnalysisGroup(false), NormalCtor(normal),
@@ -56,20 +58,20 @@ public:
   /// PassInfo ctor - Do not call this directly, this should only be invoked
   /// through RegisterPass. This version is for use by analysis groups; it
   /// does not auto-register the pass.
-  PassInfo(const char *name, const void *pi)
+  PassInfo(StringRef name, const void *pi)
       : PassName(name), PassArgument(""), PassID(pi), IsCFGOnlyPass(false),
         IsAnalysis(false), IsAnalysisGroup(true), NormalCtor(nullptr),
         TargetMachineCtor(nullptr) {}
 
   /// getPassName - Return the friendly name for the pass, never returns null
   ///
-  const char *getPassName() const { return PassName; }
+  StringRef getPassName() const { return PassName; }
 
   /// getPassArgument - Return the command line option that may be passed to
   /// 'opt' that will cause this pass to be run.  This will return null if there
   /// is no argument.
   ///
-  const char *getPassArgument() const { return PassArgument; }
+  StringRef getPassArgument() const { return PassArgument; }
 
   /// getTypeInfo - Return the id object for the pass...
   /// TODO : Rename
diff --git a/contrib/llvm/include/llvm/PassSupport.h b/contrib/llvm/include/llvm/PassSupport.h
index ba6d84f04ba0..e77a0b9882b2 100644
--- a/contrib/llvm/include/llvm/PassSupport.h
+++ b/contrib/llvm/include/llvm/PassSupport.h
@@ -101,7 +101,7 @@ template <typename PassName> Pass *callTargetMachineCtor(TargetMachine *TM) {
 ///
 template <typename passName> struct RegisterPass : public PassInfo {
   // Register Pass using default constructor...
-  RegisterPass(const char *PassArg, const char *Name, bool CFGOnly = false,
+  RegisterPass(StringRef PassArg, StringRef Name, bool CFGOnly = false,
                bool is_analysis = false)
       : PassInfo(Name, PassArg, &passName::ID,
                  PassInfo::NormalCtor_t(callDefaultCtor<passName>), CFGOnly,
@@ -131,7 +131,7 @@ template <typename passName> struct RegisterPass : public PassInfo {
 ///
 class RegisterAGBase : public PassInfo {
 public:
-  RegisterAGBase(const char *Name, const void *InterfaceID,
+  RegisterAGBase(StringRef Name, const void *InterfaceID,
                  const void *PassID = nullptr, bool isDefault = false);
 };
 
diff --git a/contrib/llvm/include/llvm/Passes/PassBuilder.h b/contrib/llvm/include/llvm/Passes/PassBuilder.h
index 9f0a9c6e1380..ba3238c86044 100644
--- a/contrib/llvm/include/llvm/Passes/PassBuilder.h
+++ b/contrib/llvm/include/llvm/Passes/PassBuilder.h
@@ -16,9 +16,11 @@
 #ifndef LLVM_PASSES_PASSBUILDER_H
 #define LLVM_PASSES_PASSBUILDER_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
 #include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
+#include <vector>
 
 namespace llvm {
 class StringRef;
@@ -163,35 +165,68 @@ public:
   /// additional analyses.
   void registerLoopAnalyses(LoopAnalysisManager &LAM);
 
-  /// \brief Add a per-module default optimization pipeline to a pass manager.
+  /// Construct the core LLVM function canonicalization and simplification
+  /// pipeline.
+  ///
+  /// This is a long pipeline and uses most of the per-function optimization
+  /// passes in LLVM to canonicalize and simplify the IR. It is suitable to run
+  /// repeatedly over the IR and is not expected to destroy important
+  /// information about the semantics of the IR.
+  ///
+  /// Note that \p Level cannot be `O0` here. The pipelines produced are
+  /// only intended for use when attempting to optimize code. If frontends
+  /// require some transformations for semantic reasons, they should explicitly
+  /// build them.
+  FunctionPassManager
+  buildFunctionSimplificationPipeline(OptimizationLevel Level,
+                                      bool DebugLogging = false);
+
+  /// Build a per-module default optimization pipeline.
   ///
   /// This provides a good default optimization pipeline for per-module
   /// optimization and code generation without any link-time optimization. It
   /// typically correspond to frontend "-O[123]" options for optimization
   /// levels \c O1, \c O2 and \c O3 resp.
-  void addPerModuleDefaultPipeline(ModulePassManager &MPM,
-                                   OptimizationLevel Level,
-                                   bool DebugLogging = false);
+  ///
+  /// Note that \p Level cannot be `O0` here. The pipelines produced are
+  /// only intended for use when attempting to optimize code. If frontends
+  /// require some transformations for semantic reasons, they should explicitly
+  /// build them.
+  ModulePassManager buildPerModuleDefaultPipeline(OptimizationLevel Level,
+                                                  bool DebugLogging = false);
 
-  /// \brief Add a pre-link, LTO-targeting default optimization pipeline to
-  /// a pass manager.
+  /// Build a pre-link, LTO-targeting default optimization pipeline to a pass
+  /// manager.
   ///
   /// This adds the pre-link optimizations tuned to work well with a later LTO
   /// run. It works to minimize the IR which needs to be analyzed without
   /// making irreversible decisions which could be made better during the LTO
   /// run.
-  void addLTOPreLinkDefaultPipeline(ModulePassManager &MPM,
-                                    OptimizationLevel Level,
-                                    bool DebugLogging = false);
+  ///
+  /// Note that \p Level cannot be `O0` here. The pipelines produced are
+  /// only intended for use when attempting to optimize code. If frontends
+  /// require some transformations for semantic reasons, they should explicitly
+  /// build them.
+  ModulePassManager buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
+                                                   bool DebugLogging = false);
 
-  /// \brief Add an LTO default optimization pipeline to a pass manager.
+  /// Build an LTO default optimization pipeline to a pass manager.
   ///
   /// This provides a good default optimization pipeline for link-time
   /// optimization and code generation. It is particularly tuned to fit well
   /// when IR coming into the LTO phase was first run through \c
   /// addPreLinkLTODefaultPipeline, and the two coordinate closely.
-  void addLTODefaultPipeline(ModulePassManager &MPM, OptimizationLevel Level,
-                             bool DebugLogging = false);
+  ///
+  /// Note that \p Level cannot be `O0` here. The pipelines produced are
+  /// only intended for use when attempting to optimize code. If frontends
+  /// require some transformations for semantic reasons, they should explicitly
+  /// build them.
+  ModulePassManager buildLTODefaultPipeline(OptimizationLevel Level,
+                                            bool DebugLogging = false);
+
+  /// Build the default `AAManager` with the default alias analysis pipeline
+  /// registered.
+  AAManager buildDefaultAAPipeline();
 
   /// \brief Parse a textual pass pipeline description into a \c ModulePassManager.
   ///
@@ -242,20 +277,36 @@ public:
   bool parseAAPipeline(AAManager &AA, StringRef PipelineText);
 
 private:
-  bool parseModulePassName(ModulePassManager &MPM, StringRef Name,
-                           bool DebugLogging);
-  bool parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name);
-  bool parseFunctionPassName(FunctionPassManager &FPM, StringRef Name);
-  bool parseLoopPassName(LoopPassManager &LPM, StringRef Name);
+  /// A struct to capture parsed pass pipeline names.
+  struct PipelineElement {
+    StringRef Name;
+    std::vector<PipelineElement> InnerPipeline;
+  };
+
+  static Optional<std::vector<PipelineElement>>
+  parsePipelineText(StringRef Text);
+
+  bool parseModulePass(ModulePassManager &MPM, const PipelineElement &E,
+                       bool VerifyEachPass, bool DebugLogging);
+  bool parseCGSCCPass(CGSCCPassManager &CGPM, const PipelineElement &E,
+                      bool VerifyEachPass, bool DebugLogging);
+  bool parseFunctionPass(FunctionPassManager &FPM, const PipelineElement &E,
+                     bool VerifyEachPass, bool DebugLogging);
+  bool parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
+                     bool VerifyEachPass, bool DebugLogging);
   bool parseAAPassName(AAManager &AA, StringRef Name);
-  bool parseLoopPassPipeline(LoopPassManager &LPM, StringRef &PipelineText,
+
+  bool parseLoopPassPipeline(LoopPassManager &LPM,
+                             ArrayRef<PipelineElement> Pipeline,
                              bool VerifyEachPass, bool DebugLogging);
   bool parseFunctionPassPipeline(FunctionPassManager &FPM,
-                                 StringRef &PipelineText, bool VerifyEachPass,
-                                 bool DebugLogging);
-  bool parseCGSCCPassPipeline(CGSCCPassManager &CGPM, StringRef &PipelineText,
+                                 ArrayRef<PipelineElement> Pipeline,
+                                 bool VerifyEachPass, bool DebugLogging);
+  bool parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+                              ArrayRef<PipelineElement> Pipeline,
                               bool VerifyEachPass, bool DebugLogging);
-  bool parseModulePassPipeline(ModulePassManager &MPM, StringRef &PipelineText,
+  bool parseModulePassPipeline(ModulePassManager &MPM,
+                               ArrayRef<PipelineElement> Pipeline,
                                bool VerifyEachPass, bool DebugLogging);
 };
 }
diff --git a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
index 6afde56122f0..d6051ffb3f8d 100644
--- a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
+++ b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMapping.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/ProfileData/InstrProf.h"
@@ -75,6 +76,7 @@ class IndexedInstrProfReader;
 namespace coverage {
 
 class CoverageMappingReader;
+struct CoverageMappingRecord;
 
 class CoverageMapping;
 struct CounterExpressions;
@@ -244,22 +246,6 @@ struct CounterMappingRegion {
   inline std::pair<unsigned, unsigned> endLoc() const {
     return std::pair<unsigned, unsigned>(LineEnd, ColumnEnd);
   }
-
-  bool operator<(const CounterMappingRegion &Other) const {
-    if (FileID != Other.FileID)
-      return FileID < Other.FileID;
-    return startLoc() < Other.startLoc();
-  }
-
-  bool contains(const CounterMappingRegion &Other) const {
-    if (FileID != Other.FileID)
-      return false;
-    if (startLoc() > Other.startLoc())
-      return false;
-    if (endLoc() < Other.endLoc())
-      return false;
-    return true;
-  }
 };
 
 /// \brief Associates a source range with an execution count.
@@ -305,6 +291,9 @@ struct FunctionRecord {
   FunctionRecord(StringRef Name, ArrayRef<StringRef> Filenames)
       : Name(Name), Filenames(Filenames.begin(), Filenames.end()) {}
 
+  FunctionRecord(FunctionRecord &&FR) = default;
+  FunctionRecord &operator=(FunctionRecord &&) = default;
+
   void pushRegion(CounterMappingRegion Region, uint64_t Count) {
     if (CountedRegions.empty())
       ExecutionCount = Count;
@@ -411,19 +400,19 @@ public:
 
   CoverageData(StringRef Filename) : Filename(Filename) {}
 
-  CoverageData(CoverageData &&RHS)
-      : Filename(std::move(RHS.Filename)), Segments(std::move(RHS.Segments)),
-        Expansions(std::move(RHS.Expansions)) {}
-
   /// \brief Get the name of the file this data covers.
   StringRef getFilename() const { return Filename; }
 
-  std::vector<CoverageSegment>::iterator begin() { return Segments.begin(); }
-  std::vector<CoverageSegment>::iterator end() { return Segments.end(); }
-  bool empty() { return Segments.empty(); }
+  std::vector<CoverageSegment>::const_iterator begin() const {
+    return Segments.begin();
+  }
+  std::vector<CoverageSegment>::const_iterator end() const {
+    return Segments.end();
+  }
+  bool empty() const { return Segments.empty(); }
 
   /// \brief Expansions that can be further processed.
-  ArrayRef<ExpansionRecord> getExpansions() { return Expansions; }
+  ArrayRef<ExpansionRecord> getExpansions() const { return Expansions; }
 };
 
 /// \brief The mapping of profile information to coverage data.
@@ -431,20 +420,38 @@ public:
 /// This is the main interface to get coverage information, using a profile to
 /// fill out execution counts.
 class CoverageMapping {
+  StringSet<> FunctionNames;
   std::vector<FunctionRecord> Functions;
   unsigned MismatchedFunctionCount;
 
   CoverageMapping() : MismatchedFunctionCount(0) {}
 
+  CoverageMapping(const CoverageMapping &) = delete;
+  const CoverageMapping &operator=(const CoverageMapping &) = delete;
+
+  /// \brief Add a function record corresponding to \p Record.
+  Error loadFunctionRecord(const CoverageMappingRecord &Record,
+                           IndexedInstrProfReader &ProfileReader);
+
 public:
   /// \brief Load the coverage mapping using the given readers.
   static Expected<std::unique_ptr<CoverageMapping>>
   load(CoverageMappingReader &CoverageReader,
        IndexedInstrProfReader &ProfileReader);
 
+  static Expected<std::unique_ptr<CoverageMapping>>
+  load(ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
+       IndexedInstrProfReader &ProfileReader);
+
   /// \brief Load the coverage mapping from the given files.
   static Expected<std::unique_ptr<CoverageMapping>>
   load(StringRef ObjectFilename, StringRef ProfileFilename,
+       StringRef Arch = StringRef()) {
+    return load(ArrayRef<StringRef>(ObjectFilename), ProfileFilename, Arch);
+  }
+
+  static Expected<std::unique_ptr<CoverageMapping>>
+  load(ArrayRef<StringRef> ObjectFilenames, StringRef ProfileFilename,
        StringRef Arch = StringRef());
 
   /// \brief The number of functions that couldn't have their profiles mapped.
@@ -453,7 +460,8 @@ public:
   /// can't be associated with any coverage information.
   unsigned getMismatchedCount() { return MismatchedFunctionCount; }
 
-  /// \brief Returns the list of files that are covered.
+  /// \brief Returns a lexicographically sorted, unique list of files that are
+  /// covered.
   std::vector<StringRef> getUniqueSourceFiles() const;
 
   /// \brief Get the coverage for a particular file.
diff --git a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
index 10269cc50f35..24fb94647247 100644
--- a/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
+++ b/contrib/llvm/include/llvm/ProfileData/Coverage/CoverageMappingWriter.h
@@ -49,10 +49,6 @@ public:
       : VirtualFileMapping(VirtualFileMapping), Expressions(Expressions),
         MappingRegions(MappingRegions) {}
 
-  CoverageMappingWriter(ArrayRef<CounterExpression> Expressions,
-                        MutableArrayRef<CounterMappingRegion> MappingRegions)
-      : Expressions(Expressions), MappingRegions(MappingRegions) {}
-
   /// \brief Write encoded coverage mapping data to the given output stream.
   void write(raw_ostream &OS);
 };
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProf.h b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
index 75646b761659..094f3af005d3 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProf.h
@@ -55,6 +55,7 @@ inline StringRef getInstrProfNameSectionName(bool AddSegment) {
 /// data.
 inline StringRef getInstrProfDataSectionName(bool AddSegment) {
   return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR
+                      ",regular,live_support"
                     : INSTR_PROF_DATA_SECT_NAME_STR;
 }
 
@@ -81,7 +82,7 @@ inline StringRef getInstrProfValueProfFuncName() {
 /// Return the name of the section containing function coverage mapping
 /// data.
 inline StringRef getInstrProfCoverageSectionName(bool AddSegment) {
-  return AddSegment ? "__DATA," INSTR_PROF_COVMAP_SECT_NAME_STR
+  return AddSegment ? "__LLVM_COV," INSTR_PROF_COVMAP_SECT_NAME_STR
                     : INSTR_PROF_COVMAP_SECT_NAME_STR;
 }
 
@@ -155,7 +156,7 @@ inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; }
 /// A reference to the variable causes the linker to link in the runtime
 /// initialization module (which defines the hook variable).
 inline StringRef getInstrProfRuntimeHookVarName() {
-  return "__llvm_profile_runtime";
+  return INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_RUNTIME_VAR);
 }
 
 /// Return the name of the compiler generated function that references the
@@ -164,12 +165,6 @@ inline StringRef getInstrProfRuntimeHookVarUseFuncName() {
   return "__llvm_profile_runtime_user";
 }
 
-/// Return the name of the profile runtime interface that overrides the default
-/// profile data file name.
-inline StringRef getInstrProfFileOverriderFuncName() {
-  return "__llvm_profile_override_default_filename";
-}
-
 /// Return the marker used to separate PGO names during serialization.
 inline StringRef getInstrProfNameSeparator() { return "\01"; }
 
@@ -231,7 +226,7 @@ Error collectPGOFuncNameStrings(const std::vector<GlobalVariable *> &NameVars,
                                 std::string &Result, bool doCompression = true);
 class InstrProfSymtab;
 /// \c NameStrings is a string composed of one of more sub-strings encoded in
-/// the format described above. The substrings are seperated by 0 or more zero
+/// the format described above. The substrings are separated by 0 or more zero
 /// bytes. This method decodes the string and populates the \c Symtab.
 Error readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab);
 
@@ -274,6 +269,10 @@ MDNode *getPGOFuncNameMetadata(const Function &F);
 /// declared by users only.
 void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName);
 
+/// Check if we can use Comdat for profile variables. This will eliminate
+/// the duplicated profile variables for Comdat functions.
+bool needsComdatForCounter(const Function &F, const Module &M);
+
 const std::error_category &instrprof_category();
 
 enum class instrprof_error {
@@ -293,7 +292,8 @@ enum class instrprof_error {
   counter_overflow,
   value_site_count_mismatch,
   compress_failed,
-  uncompress_failed
+  uncompress_failed,
+  empty_raw_profile
 };
 
 inline std::error_code make_error_code(instrprof_error E) {
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
index 4138e18fa22f..f7c22d10763c 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -603,7 +603,12 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define VARIANT_MASKS_ALL 0xff00000000000000ULL
 #define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
 #define VARIANT_MASK_IR_PROF (0x1ULL << 56)
-#define IR_LEVEL_PROF_VERSION_VAR __llvm_profile_raw_version
+#define INSTR_PROF_RAW_VERSION_VAR __llvm_profile_raw_version
+#define INSTR_PROF_PROFILE_RUNTIME_VAR __llvm_profile_runtime
+
+/* The variable that holds the name of the profile data
+ * specified via command line. */
+#define INSTR_PROF_PROFILE_NAME_VAR __llvm_profile_filename
 
 /* Runtime section names and name strings.  */
 #define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
diff --git a/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h b/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h
index 7d292731cccb..f7780fb45004 100644
--- a/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h
+++ b/contrib/llvm/include/llvm/ProfileData/InstrProfWriter.h
@@ -47,6 +47,8 @@ public:
   /// for this function and the hash and number of counts match, each counter is
   /// summed. Optionally scale counts by \p Weight.
   Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1);
+  /// Merge existing function counts from the given writer.
+  Error mergeRecordsFromWriter(InstrProfWriter &&IPW);
   /// Write the profile to \c OS
   void write(raw_fd_ostream &OS);
   /// Write the profile in text format to \c OS
diff --git a/contrib/llvm/include/llvm/ProfileData/ProfileCommon.h b/contrib/llvm/include/llvm/ProfileData/ProfileCommon.h
index ecb228ca59c4..e955755e5c9a 100644
--- a/contrib/llvm/include/llvm/ProfileData/ProfileCommon.h
+++ b/contrib/llvm/include/llvm/ProfileData/ProfileCommon.h
@@ -45,22 +45,21 @@ inline const char *getUnlikelySectionPrefix() { return ".unlikely"; }
 class ProfileSummaryBuilder {
 
 private:
-  // We keep track of the number of times a count (block count or samples)
-  // appears in the profile. The map is kept sorted in the descending order of
-  // counts.
+  /// We keep track of the number of times a count (block count or samples)
+  /// appears in the profile. The map is kept sorted in the descending order of
+  /// counts.
   std::map<uint64_t, uint32_t, std::greater<uint64_t>> CountFrequencies;
   std::vector<uint32_t> DetailedSummaryCutoffs;
 
 protected:
   SummaryEntryVector DetailedSummary;
   ProfileSummaryBuilder(std::vector<uint32_t> Cutoffs)
-      : DetailedSummaryCutoffs(std::move(Cutoffs)), TotalCount(0), MaxCount(0),
-        MaxFunctionCount(0), NumCounts(0), NumFunctions(0) {}
+      : DetailedSummaryCutoffs(std::move(Cutoffs)) {}
   inline void addCount(uint64_t Count);
   ~ProfileSummaryBuilder() = default;
   void computeDetailedSummary();
-  uint64_t TotalCount, MaxCount, MaxFunctionCount;
-  uint32_t NumCounts, NumFunctions;
+  uint64_t TotalCount = 0, MaxCount = 0, MaxFunctionCount = 0;
+  uint32_t NumCounts = 0, NumFunctions = 0;
 
 public:
   /// \brief A vector of useful cutoff values for detailed summary.
@@ -68,13 +67,13 @@ public:
 };
 
 class InstrProfSummaryBuilder final : public ProfileSummaryBuilder {
-  uint64_t MaxInternalBlockCount;
+  uint64_t MaxInternalBlockCount = 0;
   inline void addEntryCount(uint64_t Count);
   inline void addInternalCount(uint64_t Count);
 
 public:
   InstrProfSummaryBuilder(std::vector<uint32_t> Cutoffs)
-      : ProfileSummaryBuilder(std::move(Cutoffs)), MaxInternalBlockCount(0) {}
+      : ProfileSummaryBuilder(std::move(Cutoffs)) {}
   void addRecord(const InstrProfRecord &);
   std::unique_ptr<ProfileSummary> getSummary();
 };
@@ -88,7 +87,7 @@ public:
   std::unique_ptr<ProfileSummary> getSummary();
 };
 
-// This is called when a count is seen in the profile.
+/// This is called when a count is seen in the profile.
 void ProfileSummaryBuilder::addCount(uint64_t Count) {
   TotalCount += Count;
   if (Count > MaxCount)
diff --git a/contrib/llvm/include/llvm/ProfileData/SampleProf.h b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
index 9fefefa627b1..a96f83620f8b 100644
--- a/contrib/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/contrib/llvm/include/llvm/ProfileData/SampleProf.h
@@ -222,6 +222,21 @@ public:
       return ret->second.getSamples();
   }
 
+  /// Return the total number of call target samples collected at a given
+  /// location. Each location is specified by \p LineOffset and
+  /// \p Discriminator. If the location is not found in profile, return error.
+  ErrorOr<uint64_t> findCallSamplesAt(uint32_t LineOffset,
+                                      uint32_t Discriminator) const {
+    const auto &ret = BodySamples.find(LineLocation(LineOffset, Discriminator));
+    if (ret == BodySamples.end())
+      return std::error_code();
+    uint64_t T = 0;
+    for (const auto &t_c : ret->second.getCallTargets()) {
+      T += t_c.second;
+    }
+    return T;
+  }
+
   /// Return the function samples at the given callsite location.
   FunctionSamples &functionSamplesAt(const LineLocation &Loc) {
     return CallsiteSamples[Loc];
diff --git a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
index 67f981b8f2fa..c4416f099de1 100644
--- a/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
+++ b/contrib/llvm/include/llvm/Support/AArch64TargetParser.def
@@ -16,19 +16,20 @@
 #ifndef AARCH64_ARCH
 #define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)
 #endif
+AARCH64_ARCH("invalid", AK_INVALID, nullptr, nullptr,
+             ARMBuildAttrs::CPUArch::v8_A, FK_NONE, AArch64::AEK_NONE)
 AARCH64_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A,
              FK_CRYPTO_NEON_FP_ARMV8,
              (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
-              AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_PROFILE))
+              AArch64::AEK_SIMD | AArch64::AEK_LSE))
 AARCH64_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a",
              ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
              (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
-              AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_PROFILE))
+              AArch64::AEK_SIMD | AArch64::AEK_LSE))
 AARCH64_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a",
              ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
              (AArch64::AEK_CRC | AArch64::AEK_CRYPTO | AArch64::AEK_FP |
-              AArch64::AEK_SIMD | AArch64::AEK_FP16 | AArch64::AEK_PROFILE |
-              AArch64::AEK_RAS))
+              AArch64::AEK_SIMD | AArch64::AEK_RAS | AArch64::AEK_LSE))
 #undef AARCH64_ARCH
 
 #ifndef AARCH64_ARCH_EXT_NAME
@@ -38,6 +39,7 @@ AARCH64_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a",
 AARCH64_ARCH_EXT_NAME("invalid",  AArch64::AEK_INVALID,  nullptr,  nullptr)
 AARCH64_ARCH_EXT_NAME("none",     AArch64::AEK_NONE,     nullptr,  nullptr)
 AARCH64_ARCH_EXT_NAME("crc",      AArch64::AEK_CRC,      "+crc",   "-crc")
+AARCH64_ARCH_EXT_NAME("lse",      AArch64::AEK_LSE,      "+lse",   "-lse")
 AARCH64_ARCH_EXT_NAME("crypto",   AArch64::AEK_CRYPTO,   "+crypto","-crypto")
 AARCH64_ARCH_EXT_NAME("fp",       AArch64::AEK_FP,       "+fp-armv8",  "-fp-armv8")
 AARCH64_ARCH_EXT_NAME("simd",     AArch64::AEK_SIMD,     "+neon",  "-neon")
@@ -63,6 +65,12 @@ AARCH64_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_SIMD | AArch64::AEK_CRYPTO))
 AARCH64_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
+AARCH64_CPU_NAME("exynos-m2", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
+AARCH64_CPU_NAME("exynos-m3", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
+AARCH64_CPU_NAME("falkor", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
+                (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
 AARCH64_CPU_NAME("kryo", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false,
                 (AArch64::AEK_SIMD | AArch64::AEK_CRC | AArch64::AEK_CRYPTO))
 AARCH64_CPU_NAME("vulcan", AK_ARMV8_1A, FK_CRYPTO_NEON_FP_ARMV8, false,
diff --git a/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h b/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h
index f447cd072b5f..e25445790b0c 100644
--- a/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h
+++ b/contrib/llvm/include/llvm/Support/ARMBuildAttributes.h
@@ -108,6 +108,7 @@ enum CPUArch {
   v6S_M    = 12,  // v6_M with the System extensions
   v7E_M    = 13,  // v7_M with DSP extensions
   v8_A     = 14,  // v8_A AArch32
+  v8_R     = 15,  // e.g. Cortex R52
   v8_M_Base= 16,  // v8_M_Base AArch32
   v8_M_Main= 17,  // v8_M_Main AArch32
 };
diff --git a/contrib/llvm/include/llvm/Support/ARMTargetParser.def b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
index 195f7112d6a0..58cb6381a9ab 100644
--- a/contrib/llvm/include/llvm/Support/ARMTargetParser.def
+++ b/contrib/llvm/include/llvm/Support/ARMTargetParser.def
@@ -89,11 +89,15 @@ ARM_ARCH("armv8-a", AK_ARMV8A, "8-A", "v8", ARMBuildAttrs::CPUArch::v8_A,
 ARM_ARCH("armv8.1-a", AK_ARMV8_1A, "8.1-A", "v8.1a",
          ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
          (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS))
+          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC))
 ARM_ARCH("armv8.2-a", AK_ARMV8_2A, "8.2-A", "v8.2a",
          ARMBuildAttrs::CPUArch::v8_A, FK_CRYPTO_NEON_FP_ARMV8,
          (ARM::AEK_SEC | ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM |
-          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC))
+          ARM::AEK_HWDIV | ARM::AEK_DSP | ARM::AEK_CRC | ARM::AEK_RAS))
+ARM_ARCH("armv8-r", AK_ARMV8R, "8-R", "v8r", ARMBuildAttrs::CPUArch::v8_R,
+          FK_NEON_FP_ARMV8,
+          (ARM::AEK_MP | ARM::AEK_VIRT | ARM::AEK_HWDIVARM | ARM::AEK_HWDIV |
+           ARM::AEK_DSP | ARM::AEK_CRC))
 ARM_ARCH("armv8-m.base", AK_ARMV8MBaseline, "8-M.Baseline", "v8m.base",
           ARMBuildAttrs::CPUArch::v8_M_Base, FK_NONE, ARM::AEK_HWDIV)
 ARM_ARCH("armv8-m.main", AK_ARMV8MMainline, "8-M.Mainline", "v8m.main",
@@ -220,6 +224,7 @@ ARM_CPU_NAME("cortex-r7", AK_ARMV7R, FK_VFPV3_D16_FP16, false,
              (ARM::AEK_MP | ARM::AEK_HWDIVARM))
 ARM_CPU_NAME("cortex-r8", AK_ARMV7R, FK_VFPV3_D16_FP16, false,
              (ARM::AEK_MP | ARM::AEK_HWDIVARM))
+ARM_CPU_NAME("cortex-r52", AK_ARMV8R, FK_NEON_FP_ARMV8, true, ARM::AEK_NONE)
 ARM_CPU_NAME("sc300", AK_ARMV7M, FK_NONE, false, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-m3", AK_ARMV7M, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("cortex-m4", AK_ARMV7EM, FK_FPV4_SP_D16, true, ARM::AEK_NONE)
@@ -232,6 +237,8 @@ ARM_CPU_NAME("cortex-a72", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_C
 ARM_CPU_NAME("cortex-a73", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("cyclone", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 ARM_CPU_NAME("exynos-m1", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
+ARM_CPU_NAME("exynos-m2", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
+ARM_CPU_NAME("exynos-m3", AK_ARMV8A, FK_CRYPTO_NEON_FP_ARMV8, false, ARM::AEK_CRC)
 // Non-standard Arch names.
 ARM_CPU_NAME("iwmmxt", AK_IWMMXT, FK_NONE, true, ARM::AEK_NONE)
 ARM_CPU_NAME("xscale", AK_XSCALE, FK_NONE, true, ARM::AEK_NONE)
diff --git a/contrib/llvm/include/llvm/Support/AlignOf.h b/contrib/llvm/include/llvm/Support/AlignOf.h
index af7f20028b6d..abd19afa22f0 100644
--- a/contrib/llvm/include/llvm/Support/AlignOf.h
+++ b/contrib/llvm/include/llvm/Support/AlignOf.h
@@ -7,8 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines the AlignOf function that computes alignments for
-// arbitrary types.
+// This file defines the AlignedCharArray and AlignedCharArrayUnion classes.
 //
 //===----------------------------------------------------------------------===//
 
@@ -17,100 +16,15 @@
 
 #include "llvm/Support/Compiler.h"
 #include <cstddef>
-#include <type_traits>
 
 namespace llvm {
 
-namespace detail {
-
-// For everything other than an abstract class we can calulate alignment by
-// building a class with a single character and a member of the given type.
-template <typename T, bool = std::is_abstract<T>::value>
-struct AlignmentCalcImpl {
-  char x;
-#if defined(_MSC_VER)
-// Disables "structure was padded due to __declspec(align())" warnings that are
-// generated by any class using AlignOf<T> with a manually specified alignment.
-// Although the warning is disabled in the LLVM project we need this pragma
-// as AlignOf.h is a published support header that's available for use
-// out-of-tree, and we would like that to compile cleanly at /W4.
-#pragma warning(suppress : 4324)
-#endif
-  T t;
-private:
-  AlignmentCalcImpl() = delete;
-};
-
-// Abstract base class helper, this will have the minimal alignment and size
-// for any abstract class. We don't even define its destructor because this
-// type should never be used in a way that requires it.
-struct AlignmentCalcImplBase {
-  virtual ~AlignmentCalcImplBase() = 0;
-};
-
-// When we have an abstract class type, specialize the alignment computation
-// engine to create another abstract class that derives from both an empty
-// abstract base class and the provided type. This has the same effect as the
-// above except that it handles the fact that we can't actually create a member
-// of type T.
-template <typename T>
-struct AlignmentCalcImpl<T, true> : AlignmentCalcImplBase, T {
-  ~AlignmentCalcImpl() override = 0;
-};
-
-} // End detail namespace.
-
-/// AlignOf - A templated class that contains an enum value representing
-///  the alignment of the template argument.  For example,
-///  AlignOf<int>::Alignment represents the alignment of type "int".  The
-///  alignment calculated is the minimum alignment, and not necessarily
-///  the "desired" alignment returned by GCC's __alignof__ (for example).  Note
-///  that because the alignment is an enum value, it can be used as a
-///  compile-time constant (e.g., for template instantiation).
-template <typename T>
-struct AlignOf {
-#ifndef _MSC_VER
-  // Avoid warnings from GCC like:
-  //   comparison between 'enum llvm::AlignOf<X>::<anonymous>' and 'enum
-  //   llvm::AlignOf<Y>::<anonymous>' [-Wenum-compare]
-  // by using constexpr instead of enum.
-  // (except on MSVC, since it doesn't support constexpr yet).
-  static constexpr unsigned Alignment = static_cast<unsigned int>(
-      sizeof(detail::AlignmentCalcImpl<T>) - sizeof(T));
-#else
-  enum {
-    Alignment = static_cast<unsigned int>(
-        sizeof(::llvm::detail::AlignmentCalcImpl<T>) - sizeof(T))
-  };
-#endif
-  enum { Alignment_GreaterEqual_2Bytes = Alignment >= 2 ? 1 : 0 };
-  enum { Alignment_GreaterEqual_4Bytes = Alignment >= 4 ? 1 : 0 };
-  enum { Alignment_GreaterEqual_8Bytes = Alignment >= 8 ? 1 : 0 };
-  enum { Alignment_GreaterEqual_16Bytes = Alignment >= 16 ? 1 : 0 };
-
-  enum { Alignment_LessEqual_2Bytes = Alignment <= 2 ? 1 : 0 };
-  enum { Alignment_LessEqual_4Bytes = Alignment <= 4 ? 1 : 0 };
-  enum { Alignment_LessEqual_8Bytes = Alignment <= 8 ? 1 : 0 };
-  enum { Alignment_LessEqual_16Bytes = Alignment <= 16 ? 1 : 0 };
-};
-
-#ifndef _MSC_VER
-template <typename T> constexpr unsigned AlignOf<T>::Alignment;
-#endif
-
-/// alignOf - A templated function that returns the minimum alignment of
-///  of a type.  This provides no extra functionality beyond the AlignOf
-///  class besides some cosmetic cleanliness.  Example usage:
-///  alignOf<int>() returns the alignment of an int.
-template <typename T>
-inline unsigned alignOf() { return AlignOf<T>::Alignment; }
-
 /// \struct AlignedCharArray
 /// \brief Helper for building an aligned character array type.
 ///
 /// This template is used to explicitly build up a collection of aligned
 /// character array types. We have to build these up using a macro and explicit
-/// specialization to cope with old versions of MSVC and GCC where only an
+/// specialization to cope with MSVC (at least till 2015) where only an
 /// integer literal can be used to specify an alignment constraint. Once built
 /// up here, we can then begin to indirect between these using normal C++
 /// template parameters.
@@ -118,38 +32,11 @@ inline unsigned alignOf() { return AlignOf<T>::Alignment; }
 // MSVC requires special handling here.
 #ifndef _MSC_VER
 
-#if __has_feature(cxx_alignas)
 template<std::size_t Alignment, std::size_t Size>
 struct AlignedCharArray {
-  alignas(Alignment) char buffer[Size];
+  LLVM_ALIGNAS(Alignment) char buffer[Size];
 };
 
-#elif defined(__GNUC__) || defined(__IBM_ATTRIBUTES)
-/// \brief Create a type with an aligned char buffer.
-template<std::size_t Alignment, std::size_t Size>
-struct AlignedCharArray;
-
-#define LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(x) \
-  template<std::size_t Size> \
-  struct AlignedCharArray<x, Size> { \
-    __attribute__((aligned(x))) char buffer[Size]; \
-  };
-
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(1)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(2)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(4)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(8)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(16)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(32)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(64)
-LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT(128)
-
-#undef LLVM_ALIGNEDCHARARRAY_TEMPLATE_ALIGNMENT
-
-#else
-# error No supported align as directive.
-#endif
-
 #else // _MSC_VER
 
 /// \brief Create a type with an aligned char buffer.
@@ -249,8 +136,8 @@ template <typename T1,
           typename T5 = char, typename T6 = char, typename T7 = char,
           typename T8 = char, typename T9 = char, typename T10 = char>
 struct AlignedCharArrayUnion : llvm::AlignedCharArray<
-    AlignOf<llvm::detail::AlignerImpl<T1, T2, T3, T4, T5,
-                                      T6, T7, T8, T9, T10> >::Alignment,
+    alignof(llvm::detail::AlignerImpl<T1, T2, T3, T4, T5,
+                                      T6, T7, T8, T9, T10>),
     sizeof(::llvm::detail::SizerImpl<T1, T2, T3, T4, T5,
                                      T6, T7, T8, T9, T10>)> {
 };
diff --git a/contrib/llvm/include/llvm/Support/Allocator.h b/contrib/llvm/include/llvm/Support/Allocator.h
index 1c9508661d6f..c71759abd7d2 100644
--- a/contrib/llvm/include/llvm/Support/Allocator.h
+++ b/contrib/llvm/include/llvm/Support/Allocator.h
@@ -22,14 +22,16 @@
 #define LLVM_SUPPORT_ALLOCATOR_H
 
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/AlignOf.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Memory.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
+#include <cstdint>
 #include <cstdlib>
+#include <iterator>
+#include <type_traits>
+#include <utility>
 
 namespace llvm {
 
@@ -74,7 +76,7 @@ public:
 
   /// \brief Allocate space for a sequence of objects without constructing them.
   template <typename T> T *Allocate(size_t Num = 1) {
-    return static_cast<T *>(Allocate(Num * sizeof(T), AlignOf<T>::Alignment));
+    return static_cast<T *>(Allocate(Num * sizeof(T), alignof(T)));
   }
 
   /// \brief Deallocate space for a sequence of objects without constructing them.
@@ -114,7 +116,8 @@ namespace detail {
 // printing code uses Allocator.h in its implementation.
 void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated,
                                 size_t TotalMemory);
-} // End namespace detail.
+
+} // end namespace detail
 
 /// \brief Allocate memory in an ever growing pool, as if by bump-pointer.
 ///
@@ -366,7 +369,7 @@ template <typename T> class SpecificBumpPtrAllocator {
   BumpPtrAllocator Allocator;
 
 public:
-  SpecificBumpPtrAllocator() : Allocator() {}
+  SpecificBumpPtrAllocator() = default;
   SpecificBumpPtrAllocator(SpecificBumpPtrAllocator &&Old)
       : Allocator(std::move(Old.Allocator)) {}
   ~SpecificBumpPtrAllocator() { DestroyAll(); }
@@ -381,7 +384,7 @@ public:
   /// all memory allocated so far.
   void DestroyAll() {
     auto DestroyElements = [](char *Begin, char *End) {
-      assert(Begin == (char*)alignAddr(Begin, alignOf<T>()));
+      assert(Begin == (char *)alignAddr(Begin, alignof(T)));
       for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T))
         reinterpret_cast<T *>(Ptr)->~T();
     };
@@ -390,7 +393,7 @@ public:
          ++I) {
       size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize(
           std::distance(Allocator.Slabs.begin(), I));
-      char *Begin = (char*)alignAddr(*I, alignOf<T>());
+      char *Begin = (char *)alignAddr(*I, alignof(T));
       char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr
                                                : (char *)*I + AllocatedSlabSize;
 
@@ -400,7 +403,7 @@ public:
     for (auto &PtrAndSize : Allocator.CustomSizedSlabs) {
       void *Ptr = PtrAndSize.first;
       size_t Size = PtrAndSize.second;
-      DestroyElements((char*)alignAddr(Ptr, alignOf<T>()), (char *)Ptr + Size);
+      DestroyElements((char *)alignAddr(Ptr, alignof(T)), (char *)Ptr + Size);
     }
 
     Allocator.Reset();
@@ -410,7 +413,7 @@ public:
   T *Allocate(size_t num = 1) { return Allocator.Allocate<T>(num); }
 };
 
-}  // end namespace llvm
+} // end namespace llvm
 
 template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold>
 void *operator new(size_t Size,
diff --git a/contrib/llvm/include/llvm/Support/ArrayRecycler.h b/contrib/llvm/include/llvm/Support/ArrayRecycler.h
index 36f644af2880..4698f12b3bbc 100644
--- a/contrib/llvm/include/llvm/Support/ArrayRecycler.h
+++ b/contrib/llvm/include/llvm/Support/ArrayRecycler.h
@@ -26,15 +26,14 @@ namespace llvm {
 /// Arrays are allocated in a small number of fixed sizes. For each supported
 /// array size, the ArrayRecycler keeps a free list of available arrays.
 ///
-template<class T, size_t Align = AlignOf<T>::Alignment>
-class ArrayRecycler {
+template <class T, size_t Align = alignof(T)> class ArrayRecycler {
   // The free list for a given array size is a simple singly linked list.
   // We can't use iplist or Recycler here since those classes can't be copied.
   struct FreeList {
     FreeList *Next;
   };
 
-  static_assert(Align >= AlignOf<FreeList>::Alignment, "Object underaligned");
+  static_assert(Align >= alignof(FreeList), "Object underaligned");
   static_assert(sizeof(T) >= sizeof(FreeList), "Objects are too small");
 
   // Keep a free list for each array size.
diff --git a/contrib/llvm/include/llvm/Support/AtomicOrdering.h b/contrib/llvm/include/llvm/Support/AtomicOrdering.h
index 8837fab19575..001804248b85 100644
--- a/contrib/llvm/include/llvm/Support/AtomicOrdering.h
+++ b/contrib/llvm/include/llvm/Support/AtomicOrdering.h
@@ -73,8 +73,8 @@ bool operator>=(AtomicOrdering, AtomicOrdering) = delete;
 // Validate an integral value which isn't known to fit within the enum's range
 // is a valid AtomicOrdering.
 template <typename Int> static inline bool isValidAtomicOrdering(Int I) {
-  return (Int)AtomicOrdering::NotAtomic <= I &&
-         I <= (Int)AtomicOrdering::SequentiallyConsistent;
+  return static_cast<Int>(AtomicOrdering::NotAtomic) <= I &&
+         I <= static_cast<Int>(AtomicOrdering::SequentiallyConsistent);
 }
 
 /// String used by LLVM IR to represent atomic ordering.
@@ -82,40 +82,40 @@ static inline const char *toIRString(AtomicOrdering ao) {
   static const char *names[8] = {"not_atomic", "unordered", "monotonic",
                                  "consume",    "acquire",   "release",
                                  "acq_rel",    "seq_cst"};
-  return names[(size_t)ao];
+  return names[static_cast<size_t>(ao)];
 }
 
 /// Returns true if ao is stronger than other as defined by the AtomicOrdering
 /// lattice, which is based on C++'s definition.
 static inline bool isStrongerThan(AtomicOrdering ao, AtomicOrdering other) {
   static const bool lookup[8][8] = {
-      //               NA UN RX CO AC RE AR SC
-      /* NotAtomic */ {0, 0, 0, 0, 0, 0, 0, 0},
-      /* Unordered */ {1, 0, 0, 0, 0, 0, 0, 0},
-      /* relaxed   */ {1, 1, 0, 0, 0, 0, 0, 0},
-      /* consume   */ {1, 1, 1, 0, 0, 0, 0, 0},
-      /* acquire   */ {1, 1, 1, 1, 0, 0, 0, 0},
-      /* release   */ {1, 1, 1, 0, 0, 0, 0, 0},
-      /* acq_rel   */ {1, 1, 1, 1, 1, 1, 0, 0},
-      /* seq_cst   */ {1, 1, 1, 1, 1, 1, 1, 0},
+      //               NA     UN     RX     CO     AC     RE     AR     SC
+      /* NotAtomic */ {false, false, false, false, false, false, false, false},
+      /* Unordered */ { true, false, false, false, false, false, false, false},
+      /* relaxed   */ { true,  true, false, false, false, false, false, false},
+      /* consume   */ { true,  true,  true, false, false, false, false, false},
+      /* acquire   */ { true,  true,  true,  true, false, false, false, false},
+      /* release   */ { true,  true,  true, false, false, false, false, false},
+      /* acq_rel   */ { true,  true,  true,  true,  true,  true, false, false},
+      /* seq_cst   */ { true,  true,  true,  true,  true,  true,  true, false},
   };
-  return lookup[(size_t)ao][(size_t)other];
+  return lookup[static_cast<size_t>(ao)][static_cast<size_t>(other)];
 }
 
 static inline bool isAtLeastOrStrongerThan(AtomicOrdering ao,
                                            AtomicOrdering other) {
   static const bool lookup[8][8] = {
-      //               NA UN RX CO AC RE AR SC
-      /* NotAtomic */ {1, 0, 0, 0, 0, 0, 0, 0},
-      /* Unordered */ {1, 1, 0, 0, 0, 0, 0, 0},
-      /* relaxed   */ {1, 1, 1, 0, 0, 0, 0, 0},
-      /* consume   */ {1, 1, 1, 1, 0, 0, 0, 0},
-      /* acquire   */ {1, 1, 1, 1, 1, 0, 0, 0},
-      /* release   */ {1, 1, 1, 0, 0, 1, 0, 0},
-      /* acq_rel   */ {1, 1, 1, 1, 1, 1, 1, 0},
-      /* seq_cst   */ {1, 1, 1, 1, 1, 1, 1, 1},
+      //               NA     UN     RX     CO     AC     RE     AR     SC
+      /* NotAtomic */ { true, false, false, false, false, false, false, false},
+      /* Unordered */ { true,  true, false, false, false, false, false, false},
+      /* relaxed   */ { true,  true,  true, false, false, false, false, false},
+      /* consume   */ { true,  true,  true,  true, false, false, false, false},
+      /* acquire   */ { true,  true,  true,  true,  true, false, false, false},
+      /* release   */ { true,  true,  true, false, false,  true, false, false},
+      /* acq_rel   */ { true,  true,  true,  true,  true,  true,  true, false},
+      /* seq_cst   */ { true,  true,  true,  true,  true,  true,  true,  true},
   };
-  return lookup[(size_t)ao][(size_t)other];
+  return lookup[static_cast<size_t>(ao)][static_cast<size_t>(other)];
 }
 
 static inline bool isStrongerThanUnordered(AtomicOrdering ao) {
@@ -145,7 +145,7 @@ static inline AtomicOrderingCABI toCABI(AtomicOrdering ao) {
       /* acq_rel   */ AtomicOrderingCABI::acq_rel,
       /* seq_cst   */ AtomicOrderingCABI::seq_cst,
   };
-  return lookup[(size_t)ao];
+  return lookup[static_cast<size_t>(ao)];
 }
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/COFF.h b/contrib/llvm/include/llvm/Support/COFF.h
index 7dad3e82bda6..19223306bd07 100644
--- a/contrib/llvm/include/llvm/Support/COFF.h
+++ b/contrib/llvm/include/llvm/Support/COFF.h
@@ -41,6 +41,11 @@ namespace COFF {
       '\xaf', '\x20', '\xfa', '\xf6', '\x6a', '\xa4', '\xdc', '\xb8',
   };
 
+  static const char ClGlObjMagic[] = {
+      '\x38', '\xfe', '\xb3', '\x0c', '\xa5', '\xd9', '\xab', '\x4d',
+      '\xac', '\x9b', '\xd6', '\xb6', '\x22', '\x26', '\x53', '\xc2',
+  };
+
   // Sizes in bytes of various things in the COFF format.
   enum {
     Header16Size   = 20,
@@ -657,7 +662,7 @@ namespace COFF {
     }
 
     ImportNameType getNameType() const {
-      return static_cast<ImportNameType>((TypeInfo & 0x1C) >> 3);
+      return static_cast<ImportNameType>((TypeInfo & 0x1C) >> 2);
     }
   };
 
diff --git a/contrib/llvm/include/llvm/Support/CachePruning.h b/contrib/llvm/include/llvm/Support/CachePruning.h
index 383414119139..954fd8ae7ffb 100644
--- a/contrib/llvm/include/llvm/Support/CachePruning.h
+++ b/contrib/llvm/include/llvm/Support/CachePruning.h
@@ -16,6 +16,7 @@
 #define LLVM_SUPPORT_CACHE_PRUNING_H
 
 #include "llvm/ADT/StringRef.h"
+#include <chrono>
 
 namespace llvm {
 
@@ -29,7 +30,7 @@ public:
   /// Define the pruning interval. This is intended to be used to avoid scanning
   /// the directory too often. It does not impact the decision of which file to
   /// prune. A value of 0 forces the scan to occurs.
-  CachePruning &setPruningInterval(int PruningInterval) {
+  CachePruning &setPruningInterval(std::chrono::seconds PruningInterval) {
     Interval = PruningInterval;
     return *this;
   }
@@ -37,7 +38,7 @@ public:
   /// Define the expiration for a file. When a file hasn't been accessed for
   /// \p ExpireAfter seconds, it is removed from the cache. A value of 0 disable
   /// the expiration-based pruning.
-  CachePruning &setEntryExpiration(unsigned ExpireAfter) {
+  CachePruning &setEntryExpiration(std::chrono::seconds ExpireAfter) {
     Expiration = ExpireAfter;
     return *this;
   }
@@ -59,11 +60,11 @@ public:
 private:
   // Options that matches the setters above.
   std::string Path;
-  unsigned Expiration = 0;
-  unsigned Interval = 0;
+  std::chrono::seconds Expiration = std::chrono::seconds::zero();
+  std::chrono::seconds Interval = std::chrono::seconds::zero();
   unsigned PercentageOfAvailableSpace = 0;
 };
 
 } // namespace llvm
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/contrib/llvm/include/llvm/Support/Casting.h b/contrib/llvm/include/llvm/Support/Casting.h
index 6ba5efa47554..a73047b2b557 100644
--- a/contrib/llvm/include/llvm/Support/Casting.h
+++ b/contrib/llvm/include/llvm/Support/Casting.h
@@ -128,8 +128,7 @@ struct isa_impl_wrap<To, FromTy, FromTy> {
 //
 //  if (isa<Type>(myVal)) { ... }
 //
-template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline bool isa(const Y &Val) {
+template <class X, class Y> LLVM_NODISCARD inline bool isa(const Y &Val) {
   return isa_impl_wrap<X, const Y,
                        typename simplify_type<const Y>::SimpleType>::doit(Val);
 }
@@ -243,9 +242,10 @@ inline typename cast_retty<X, Y *>::ret_type cast(Y *Val) {
 // accepted.
 //
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
-    !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>::type
-cast_or_null(const Y &Val) {
+LLVM_NODISCARD inline
+    typename std::enable_if<!is_simple_type<Y>::value,
+                            typename cast_retty<X, const Y>::ret_type>::type
+    cast_or_null(const Y &Val) {
   if (!Val)
     return nullptr;
   assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
@@ -253,9 +253,10 @@ cast_or_null(const Y &Val) {
 }
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
-    !is_simple_type<Y>::value, typename cast_retty<X, Y>::ret_type>::type
-cast_or_null(Y &Val) {
+LLVM_NODISCARD inline
+    typename std::enable_if<!is_simple_type<Y>::value,
+                            typename cast_retty<X, Y>::ret_type>::type
+    cast_or_null(Y &Val) {
   if (!Val)
     return nullptr;
   assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
@@ -263,7 +264,7 @@ cast_or_null(Y &Val) {
 }
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
+LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type
 cast_or_null(Y *Val) {
   if (!Val) return nullptr;
   assert(isa<X>(Val) && "cast_or_null<Ty>() argument of incompatible type!");
@@ -280,21 +281,20 @@ cast_or_null(Y *Val) {
 //
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
-    !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>::type
-dyn_cast(const Y &Val) {
+LLVM_NODISCARD inline
+    typename std::enable_if<!is_simple_type<Y>::value,
+                            typename cast_retty<X, const Y>::ret_type>::type
+    dyn_cast(const Y &Val) {
   return isa<X>(Val) ? cast<X>(Val) : nullptr;
 }
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y>::ret_type
-dyn_cast(Y &Val) {
+LLVM_NODISCARD inline typename cast_retty<X, Y>::ret_type dyn_cast(Y &Val) {
   return isa<X>(Val) ? cast<X>(Val) : nullptr;
 }
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
-dyn_cast(Y *Val) {
+LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type dyn_cast(Y *Val) {
   return isa<X>(Val) ? cast<X>(Val) : nullptr;
 }
 
@@ -302,21 +302,23 @@ dyn_cast(Y *Val) {
 // value is accepted.
 //
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
-    !is_simple_type<Y>::value, typename cast_retty<X, const Y>::ret_type>::type
-dyn_cast_or_null(const Y &Val) {
+LLVM_NODISCARD inline
+    typename std::enable_if<!is_simple_type<Y>::value,
+                            typename cast_retty<X, const Y>::ret_type>::type
+    dyn_cast_or_null(const Y &Val) {
   return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
 }
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename std::enable_if<
-    !is_simple_type<Y>::value, typename cast_retty<X, Y>::ret_type>::type
-dyn_cast_or_null(Y &Val) {
+LLVM_NODISCARD inline
+    typename std::enable_if<!is_simple_type<Y>::value,
+                            typename cast_retty<X, Y>::ret_type>::type
+    dyn_cast_or_null(Y &Val) {
   return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
 }
 
 template <class X, class Y>
-LLVM_ATTRIBUTE_UNUSED_RESULT inline typename cast_retty<X, Y *>::ret_type
+LLVM_NODISCARD inline typename cast_retty<X, Y *>::ret_type
 dyn_cast_or_null(Y *Val) {
   return (Val && isa<X>(Val)) ? cast<X>(Val) : nullptr;
 }
diff --git a/contrib/llvm/include/llvm/Support/Chrono.h b/contrib/llvm/include/llvm/Support/Chrono.h
new file mode 100644
index 000000000000..203439cab919
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/Chrono.h
@@ -0,0 +1,55 @@
+//===- llvm/Support/Chrono.h - Utilities for Timing Manipulation-*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_CHRONO_H
+#define LLVM_SUPPORT_CHRONO_H
+
+#include "llvm/Support/Compiler.h"
+
+#include <chrono>
+#include <ctime>
+
+namespace llvm {
+
+class raw_ostream;
+
+namespace sys {
+
+/// A time point on the system clock. This is provided for two reasons:
+/// - to insulate us agains subtle differences in behavoir to differences in
+///   system clock precision (which is implementation-defined and differs between
+///   platforms).
+/// - to shorten the type name
+/// The default precision is nanoseconds. If need a specific precision specify
+/// it explicitly. If unsure, use the default. If you need a time point on a
+/// clock other than the system_clock, use std::chrono directly.
+template <typename D = std::chrono::nanoseconds>
+using TimePoint = std::chrono::time_point<std::chrono::system_clock, D>;
+
+/// Convert a TimePoint to std::time_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE inline std::time_t toTimeT(TimePoint<> TP) {
+  using namespace std::chrono;
+  return system_clock::to_time_t(
+      time_point_cast<system_clock::time_point::duration>(TP));
+}
+
+/// Convert a std::time_t to a TimePoint
+LLVM_ATTRIBUTE_ALWAYS_INLINE inline TimePoint<std::chrono::seconds>
+toTimePoint(std::time_t T) {
+  using namespace std::chrono;
+  return time_point_cast<seconds>(system_clock::from_time_t(T));
+}
+
+} // namespace sys
+
+raw_ostream &operator<<(raw_ostream &OS, sys::TimePoint<> TP);
+
+} // namespace llvm
+
+#endif // LLVM_SUPPORT_CHRONO_H
diff --git a/contrib/llvm/include/llvm/Support/CodeGen.h b/contrib/llvm/include/llvm/Support/CodeGen.h
index e19abf8271eb..941c112b0dd2 100644
--- a/contrib/llvm/include/llvm/Support/CodeGen.h
+++ b/contrib/llvm/include/llvm/Support/CodeGen.h
@@ -19,7 +19,7 @@ namespace llvm {
 
   // Relocation model types.
   namespace Reloc {
-  enum Model { Static, PIC_, DynamicNoPIC };
+  enum Model { Static, PIC_, DynamicNoPIC, ROPI, RWPI, ROPI_RWPI };
   }
 
   // Code model types.
diff --git a/contrib/llvm/include/llvm/Support/CommandLine.h b/contrib/llvm/include/llvm/Support/CommandLine.h
index 70465a0e3fd3..204672f88dd9 100644
--- a/contrib/llvm/include/llvm/Support/CommandLine.h
+++ b/contrib/llvm/include/llvm/Support/CommandLine.h
@@ -21,16 +21,21 @@
 #define LLVM_SUPPORT_COMMANDLINE_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include <cassert>
 #include <climits>
-#include <cstdarg>
-#include <utility>
+#include <cstddef>
+#include <initializer_list>
+#include <string>
+#include <type_traits>
 #include <vector>
 
 namespace llvm {
@@ -46,7 +51,7 @@ namespace cl {
 // ParseCommandLineOptions - Command line option processing entry point.
 //
 bool ParseCommandLineOptions(int argc, const char *const *argv,
-                             const char *Overview = nullptr,
+                             StringRef Overview = "",
                              bool IgnoreErrors = false);
 
 //===----------------------------------------------------------------------===//
@@ -54,7 +59,7 @@ bool ParseCommandLineOptions(int argc, const char *const *argv,
 //                           entry point.
 //
 void ParseEnvironmentOptions(const char *progName, const char *envvar,
-                             const char *Overview = nullptr);
+                             const char *Overview = "");
 
 ///===---------------------------------------------------------------------===//
 /// SetVersionPrinter - Override the default (LLVM specific) version printer
@@ -88,7 +93,7 @@ class Option;
 ///
 /// Literal options are used by some parsers to register special option values.
 /// This is how the PassNameParser registers pass names for opt.
-void AddLiteralOption(Option &O, const char *Name);
+void AddLiteralOption(Option &O, StringRef Name);
 
 //===----------------------------------------------------------------------===//
 // Flags permitted to be passed to command line arguments
@@ -156,18 +161,20 @@ enum MiscFlags {             // Miscellaneous flags to adjust argument
 //
 class OptionCategory {
 private:
-  const char *const Name;
-  const char *const Description;
+  StringRef const Name;
+  StringRef const Description;
+
   void registerCategory();
 
 public:
-  OptionCategory(const char *const Name,
-                 const char *const Description = nullptr)
+  OptionCategory(StringRef const Name,
+                 StringRef const Description = "")
       : Name(Name), Description(Description) {
     registerCategory();
   }
-  const char *getName() const { return Name; }
-  const char *getDescription() const { return Description; }
+
+  StringRef getName() const { return Name; }
+  StringRef getDescription() const { return Description; }
 };
 
 // The general Option Category (used as default category).
@@ -178,26 +185,26 @@ extern OptionCategory GeneralCategory;
 //
 class SubCommand {
 private:
-  const char *const Name = nullptr;
-  const char *const Description = nullptr;
+  StringRef Name;
+  StringRef Description;
 
 protected:
   void registerSubCommand();
   void unregisterSubCommand();
 
 public:
-  SubCommand(const char *const Name, const char *const Description = nullptr)
+  SubCommand(StringRef Name, StringRef Description = "")
       : Name(Name), Description(Description) {
-    registerSubCommand();
+        registerSubCommand();
   }
-  SubCommand() {}
+  SubCommand() = default;
 
   void reset();
 
   operator bool() const;
 
-  const char *getName() const { return Name; }
-  const char *getDescription() const { return Description; }
+  StringRef getName() const { return Name; }
+  StringRef getDescription() const { return Description; }
 
   SmallVector<Option *, 4> PositionalOpts;
   SmallVector<Option *, 4> SinkOpts;
@@ -215,7 +222,6 @@ extern ManagedStatic<SubCommand> AllSubCommands;
 //===----------------------------------------------------------------------===//
 // Option Base class
 //
-class alias;
 class Option {
   friend class alias;
 
@@ -257,15 +263,19 @@ public:
   inline enum NumOccurrencesFlag getNumOccurrencesFlag() const {
     return (enum NumOccurrencesFlag)Occurrences;
   }
+
   inline enum ValueExpected getValueExpectedFlag() const {
     return Value ? ((enum ValueExpected)Value) : getValueExpectedFlagDefault();
   }
+
   inline enum OptionHidden getOptionHiddenFlag() const {
     return (enum OptionHidden)HiddenFlag;
   }
+
   inline enum FormattingFlags getFormattingFlag() const {
     return (enum FormattingFlags)Formatting;
   }
+
   inline unsigned getMiscFlags() const { return Misc; }
   inline unsigned getPosition() const { return Position; }
   inline unsigned getNumAdditionalVals() const { return AdditionalVals; }
@@ -274,11 +284,13 @@ public:
   bool hasArgStr() const { return !ArgStr.empty(); }
   bool isPositional() const { return getFormattingFlag() == cl::Positional; }
   bool isSink() const { return getMiscFlags() & cl::Sink; }
+
   bool isConsumeAfter() const {
     return getNumOccurrencesFlag() == cl::ConsumeAfter;
   }
+
   bool isInAllSubCommands() const {
-    return std::any_of(Subs.begin(), Subs.end(), [](const SubCommand *SC) {
+    return any_of(Subs, [](const SubCommand *SC) {
       return SC == &*AllSubCommands;
     });
   }
@@ -303,12 +315,14 @@ protected:
                   enum OptionHidden Hidden)
       : NumOccurrences(0), Occurrences(OccurrencesFlag), Value(0),
         HiddenFlag(Hidden), Formatting(NormalFormatting), Misc(0), Position(0),
-        AdditionalVals(0), ArgStr(""), HelpStr(""), ValueStr(""),
-        Category(&GeneralCategory), FullyInitialized(false) {}
+        AdditionalVals(0), Category(&GeneralCategory), FullyInitialized(false) {
+  }
 
   inline void setNumAdditionalVals(unsigned n) { AdditionalVals = n; }
 
 public:
+  virtual ~Option() = default;
+
   // addArgument - Register this argument with the commandline system.
   //
   void addArgument();
@@ -339,10 +353,8 @@ public:
   // Prints option name followed by message.  Always returns true.
   bool error(const Twine &Message, StringRef ArgName = StringRef());
 
-public:
   inline int getNumOccurrences() const { return NumOccurrences; }
   inline void reset() { NumOccurrences = 0; }
-  virtual ~Option() {}
 };
 
 //===----------------------------------------------------------------------===//
@@ -352,16 +364,20 @@ public:
 
 // desc - Modifier to set the description shown in the -help output...
 struct desc {
-  const char *Desc;
-  desc(const char *Str) : Desc(Str) {}
+  StringRef Desc;
+
+  desc(StringRef Str) : Desc(Str) {}
+
   void apply(Option &O) const { O.setDescription(Desc); }
 };
 
 // value_desc - Modifier to set the value description shown in the -help
 // output...
 struct value_desc {
-  const char *Desc;
-  value_desc(const char *Str) : Desc(Str) {}
+  StringRef Desc;
+
+  value_desc(StringRef Str) : Desc(Str) {}
+
   void apply(Option &O) const { O.setValueStr(Desc); }
 };
 
@@ -386,6 +402,7 @@ template <class Ty> initializer<Ty> init(const Ty &Val) {
 //
 template <class Ty> struct LocationClass {
   Ty &Loc;
+
   LocationClass(Ty &L) : Loc(L) {}
 
   template <class Opt> void apply(Opt &O) const { O.setLocation(O, Loc); }
@@ -399,6 +416,7 @@ template <class Ty> LocationClass<Ty> location(Ty &L) {
 // to.
 struct cat {
   OptionCategory &Category;
+
   cat(OptionCategory &c) : Category(c) {}
 
   template <class Opt> void apply(Opt &O) const { O.setCategory(Category); }
@@ -407,6 +425,7 @@ struct cat {
 // sub - Specify the subcommand that this option belongs to.
 struct sub {
   SubCommand &Sub;
+
   sub(SubCommand &S) : Sub(S) {}
 
   template <class Opt> void apply(Opt &O) const { O.addSubCommand(Sub); }
@@ -420,9 +439,9 @@ struct GenericOptionValue {
   virtual bool compare(const GenericOptionValue &V) const = 0;
 
 protected:
-  ~GenericOptionValue() = default;
   GenericOptionValue() = default;
   GenericOptionValue(const GenericOptionValue&) = default;
+  ~GenericOptionValue() = default;
   GenericOptionValue &operator=(const GenericOptionValue &) = default;
 
 private:
@@ -458,15 +477,15 @@ protected:
 // Simple copy of the option value.
 template <class DataType> class OptionValueCopy : public GenericOptionValue {
   DataType Value;
-  bool Valid;
+  bool Valid = false;
 
 protected:
-  ~OptionValueCopy() = default;
   OptionValueCopy(const OptionValueCopy&) = default;
+  ~OptionValueCopy() = default;
   OptionValueCopy &operator=(const OptionValueCopy&) = default;
 
 public:
-  OptionValueCopy() : Valid(false) {}
+  OptionValueCopy() = default;
 
   bool hasValue() const { return Valid; }
 
@@ -497,9 +516,9 @@ struct OptionValueBase<DataType, false> : OptionValueCopy<DataType> {
   typedef DataType WrapperType;
 
 protected:
-  ~OptionValueBase() = default;
   OptionValueBase() = default;
   OptionValueBase(const OptionValueBase&) = default;
+  ~OptionValueBase() = default;
   OptionValueBase &operator=(const OptionValueBase&) = default;
 };
 
@@ -510,6 +529,7 @@ struct OptionValue final
   OptionValue() = default;
 
   OptionValue(const DataType &V) { this->setValue(V); }
+
   // Some options may take their value from a different data type.
   template <class DT> OptionValue<DataType> &operator=(const DT &V) {
     this->setValue(V);
@@ -524,9 +544,10 @@ struct OptionValue<cl::boolOrDefault> final
     : OptionValueCopy<cl::boolOrDefault> {
   typedef cl::boolOrDefault WrapperType;
 
-  OptionValue() {}
+  OptionValue() = default;
 
   OptionValue(const cl::boolOrDefault &V) { this->setValue(V); }
+
   OptionValue<cl::boolOrDefault> &operator=(const cl::boolOrDefault &V) {
     setValue(V);
     return *this;
@@ -540,9 +561,10 @@ template <>
 struct OptionValue<std::string> final : OptionValueCopy<std::string> {
   typedef StringRef WrapperType;
 
-  OptionValue() {}
+  OptionValue() = default;
 
   OptionValue(const std::string &V) { this->setValue(V); }
+
   OptionValue<std::string> &operator=(const std::string &V) {
     setValue(V);
     return *this;
@@ -555,52 +577,43 @@ private:
 //===----------------------------------------------------------------------===//
 // Enum valued command line option
 //
-#define clEnumVal(ENUMVAL, DESC) #ENUMVAL, int(ENUMVAL), DESC
-#define clEnumValN(ENUMVAL, FLAGNAME, DESC) FLAGNAME, int(ENUMVAL), DESC
-#define clEnumValEnd (reinterpret_cast<void *>(0))
+
+// This represents a single enum value, using "int" as the underlying type.
+struct OptionEnumValue {
+  StringRef Name;
+  int Value;
+  StringRef Description;
+};
+
+#define clEnumVal(ENUMVAL, DESC)                                               \
+  llvm::cl::OptionEnumValue { #ENUMVAL, int(ENUMVAL), DESC }
+#define clEnumValN(ENUMVAL, FLAGNAME, DESC)                                    \
+  llvm::cl::OptionEnumValue { FLAGNAME, int(ENUMVAL), DESC }
 
 // values - For custom data types, allow specifying a group of values together
-// as the values that go into the mapping that the option handler uses.  Note
-// that the values list must always have a 0 at the end of the list to indicate
-// that the list has ended.
+// as the values that go into the mapping that the option handler uses.
 //
-template <class DataType> class ValuesClass {
+class ValuesClass {
   // Use a vector instead of a map, because the lists should be short,
   // the overhead is less, and most importantly, it keeps them in the order
   // inserted so we can print our option out nicely.
-  SmallVector<std::pair<const char *, std::pair<int, const char *>>, 4> Values;
-  void processValues(va_list Vals);
+  SmallVector<OptionEnumValue, 4> Values;
 
 public:
-  ValuesClass(const char *EnumName, DataType Val, const char *Desc,
-              va_list ValueArgs) {
-    // Insert the first value, which is required.
-    Values.push_back(std::make_pair(EnumName, std::make_pair(Val, Desc)));
-
-    // Process the varargs portion of the values...
-    while (const char *enumName = va_arg(ValueArgs, const char *)) {
-      DataType EnumVal = static_cast<DataType>(va_arg(ValueArgs, int));
-      const char *EnumDesc = va_arg(ValueArgs, const char *);
-      Values.push_back(std::make_pair(enumName, // Add value to value map
-                                      std::make_pair(EnumVal, EnumDesc)));
-    }
-  }
+  ValuesClass(std::initializer_list<OptionEnumValue> Options)
+      : Values(Options) {}
 
   template <class Opt> void apply(Opt &O) const {
-    for (size_t i = 0, e = Values.size(); i != e; ++i)
-      O.getParser().addLiteralOption(Values[i].first, Values[i].second.first,
-                                     Values[i].second.second);
+    for (auto Value : Values)
+      O.getParser().addLiteralOption(Value.Name, Value.Value,
+                                     Value.Description);
   }
 };
 
-template <class DataType>
-ValuesClass<DataType> LLVM_END_WITH_NULL
-values(const char *Arg, DataType Val, const char *Desc, ...) {
-  va_list ValueArgs;
-  va_start(ValueArgs, Desc);
-  ValuesClass<DataType> Vals(Arg, Val, Desc, ValueArgs);
-  va_end(ValueArgs);
-  return Vals;
+/// Helper to build a ValuesClass by forwarding a variable number of arguments
+/// as an initializer list to the ValuesClass constructor.
+template <typename... OptsTy> ValuesClass values(OptsTy... Options) {
+  return ValuesClass({Options...});
 }
 
 //===----------------------------------------------------------------------===//
@@ -619,16 +632,17 @@ class generic_parser_base {
 protected:
   class GenericOptionInfo {
   public:
-    GenericOptionInfo(const char *name, const char *helpStr)
+    GenericOptionInfo(StringRef name, StringRef helpStr)
         : Name(name), HelpStr(helpStr) {}
-    const char *Name;
-    const char *HelpStr;
+    StringRef Name;
+    StringRef HelpStr;
   };
 
 public:
   generic_parser_base(Option &O) : Owner(O) {}
 
-  virtual ~generic_parser_base() {} // Base class should have virtual-dtor
+  virtual ~generic_parser_base() = default;
+  // Base class should have virtual-destructor
 
   // getNumOptions - Virtual function implemented by generic subclass to
   // indicate how many entries are in Values.
@@ -636,10 +650,10 @@ public:
   virtual unsigned getNumOptions() const = 0;
 
   // getOption - Return option name N.
-  virtual const char *getOption(unsigned N) const = 0;
+  virtual StringRef getOption(unsigned N) const = 0;
 
   // getDescription - Return description N
-  virtual const char *getDescription(unsigned N) const = 0;
+  virtual StringRef getDescription(unsigned N) const = 0;
 
   // Return the width of the option tag for printing...
   virtual size_t getOptionWidth(const Option &O) const;
@@ -698,7 +712,7 @@ public:
   // findOption - Return the option number corresponding to the specified
   // argument string.  If the option is not found, getNumOptions() is returned.
   //
-  unsigned findOption(const char *Name);
+  unsigned findOption(StringRef Name);
 
 protected:
   Option &Owner;
@@ -714,7 +728,7 @@ template <class DataType> class parser : public generic_parser_base {
 protected:
   class OptionInfo : public GenericOptionInfo {
   public:
-    OptionInfo(const char *name, DataType v, const char *helpStr)
+    OptionInfo(StringRef name, DataType v, StringRef helpStr)
         : GenericOptionInfo(name, helpStr), V(v) {}
     OptionValue<DataType> V;
   };
@@ -726,8 +740,8 @@ public:
 
   // Implement virtual functions needed by generic_parser_base
   unsigned getNumOptions() const override { return unsigned(Values.size()); }
-  const char *getOption(unsigned N) const override { return Values[N].Name; }
-  const char *getDescription(unsigned N) const override {
+  StringRef getOption(unsigned N) const override { return Values[N].Name; }
+  StringRef getDescription(unsigned N) const override {
     return Values[N].HelpStr;
   }
 
@@ -756,7 +770,7 @@ public:
   /// addLiteralOption - Add an entry to the mapping table.
   ///
   template <class DT>
-  void addLiteralOption(const char *Name, const DT &V, const char *HelpStr) {
+  void addLiteralOption(StringRef Name, const DT &V, StringRef HelpStr) {
     assert(findOption(Name) == Values.size() && "Option already exists!");
     OptionInfo X(Name, static_cast<DataType>(V), HelpStr);
     Values.push_back(X);
@@ -765,7 +779,7 @@ public:
 
   /// removeLiteralOption - Remove the specified option.
   ///
-  void removeLiteralOption(const char *Name) {
+  void removeLiteralOption(StringRef Name) {
     unsigned N = findOption(Name);
     assert(N != Values.size() && "Option not found!");
     Values.erase(Values.begin() + N);
@@ -779,7 +793,6 @@ class basic_parser_impl { // non-template implementation of basic_parser<t>
 public:
   basic_parser_impl(Option &) {}
 
-
   enum ValueExpected getValueExpectedFlagDefault() const {
     return ValueRequired;
   }
@@ -801,13 +814,14 @@ public:
   void printOptionNoValue(const Option &O, size_t GlobalWidth) const;
 
   // getValueName - Overload in subclass to provide a better default value.
-  virtual const char *getValueName() const { return "value"; }
+  virtual StringRef getValueName() const { return "value"; }
 
   // An out-of-line virtual method to provide a 'home' for this class.
   virtual void anchor();
 
 protected:
   ~basic_parser_impl() = default;
+
   // A helper for basic_parser::printOptionDiff.
   void printOptionName(const Option &O, size_t GlobalWidth) const;
 };
@@ -818,12 +832,12 @@ protected:
 template <class DataType> class basic_parser : public basic_parser_impl {
 public:
   basic_parser(Option &O) : basic_parser_impl(O) {}
+
   typedef DataType parser_data_type;
   typedef OptionValue<DataType> OptVal;
 
 protected:
-  // Workaround Clang PR22793
-  ~basic_parser() {}
+  ~basic_parser() = default;
 };
 
 //--------------------------------------------------
@@ -843,7 +857,7 @@ public:
   }
 
   // getValueName - Do not print =<value> at all.
-  const char *getValueName() const override { return nullptr; }
+  StringRef getValueName() const override { return StringRef(); }
 
   void printOptionDiff(const Option &O, bool V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -869,7 +883,7 @@ public:
   }
 
   // getValueName - Do not print =<value> at all.
-  const char *getValueName() const override { return nullptr; }
+  StringRef getValueName() const override { return StringRef(); }
 
   void printOptionDiff(const Option &O, boolOrDefault V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -891,7 +905,7 @@ public:
   bool parse(Option &O, StringRef ArgName, StringRef Arg, int &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "int"; }
+  StringRef getValueName() const override { return "int"; }
 
   void printOptionDiff(const Option &O, int V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -913,7 +927,7 @@ public:
   bool parse(Option &O, StringRef ArgName, StringRef Arg, unsigned &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "uint"; }
+  StringRef getValueName() const override { return "uint"; }
 
   void printOptionDiff(const Option &O, unsigned V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -938,7 +952,7 @@ public:
              unsigned long long &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "uint"; }
+  StringRef getValueName() const override { return "uint"; }
 
   void printOptionDiff(const Option &O, unsigned long long V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -960,7 +974,7 @@ public:
   bool parse(Option &O, StringRef ArgName, StringRef Arg, double &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "number"; }
+  StringRef getValueName() const override { return "number"; }
 
   void printOptionDiff(const Option &O, double V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -982,7 +996,7 @@ public:
   bool parse(Option &O, StringRef ArgName, StringRef Arg, float &Val);
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "number"; }
+  StringRef getValueName() const override { return "number"; }
 
   void printOptionDiff(const Option &O, float V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -1007,7 +1021,7 @@ public:
   }
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "string"; }
+  StringRef getValueName() const override { return "string"; }
 
   void printOptionDiff(const Option &O, StringRef V, const OptVal &Default,
                        size_t GlobalWidth) const;
@@ -1032,7 +1046,7 @@ public:
   }
 
   // getValueName - Overload in subclass to provide a better default value.
-  const char *getValueName() const override { return "char"; }
+  StringRef getValueName() const override { return "char"; }
 
   void printOptionDiff(const Option &O, char V, OptVal Default,
                        size_t GlobalWidth) const;
@@ -1100,17 +1114,17 @@ template <class Mod> struct applicator {
 
 // Handle const char* as a special case...
 template <unsigned n> struct applicator<char[n]> {
-  template <class Opt> static void opt(const char *Str, Opt &O) {
+  template <class Opt> static void opt(StringRef Str, Opt &O) {
     O.setArgStr(Str);
   }
 };
 template <unsigned n> struct applicator<const char[n]> {
-  template <class Opt> static void opt(const char *Str, Opt &O) {
+  template <class Opt> static void opt(StringRef Str, Opt &O) {
     O.setArgStr(Str);
   }
 };
-template <> struct applicator<const char *> {
-  template <class Opt> static void opt(const char *Str, Opt &O) {
+template <> struct applicator<StringRef > {
+  template <class Opt> static void opt(StringRef Str, Opt &O) {
     O.setArgStr(Str);
   }
 };
@@ -1120,15 +1134,19 @@ template <> struct applicator<NumOccurrencesFlag> {
     O.setNumOccurrencesFlag(N);
   }
 };
+
 template <> struct applicator<ValueExpected> {
   static void opt(ValueExpected VE, Option &O) { O.setValueExpectedFlag(VE); }
 };
+
 template <> struct applicator<OptionHidden> {
   static void opt(OptionHidden OH, Option &O) { O.setHiddenFlag(OH); }
 };
+
 template <> struct applicator<FormattingFlags> {
   static void opt(FormattingFlags FF, Option &O) { O.setFormattingFlag(FF); }
 };
+
 template <> struct applicator<MiscFlags> {
   static void opt(MiscFlags MF, Option &O) { O.setMiscFlag(MF); }
 };
@@ -1153,7 +1171,7 @@ template <class Opt, class Mod> void apply(Opt *O, const Mod &M) {
 //
 template <class DataType, bool ExternalStorage, bool isClass>
 class opt_storage {
-  DataType *Location; // Where to store the object...
+  DataType *Location = nullptr; // Where to store the object...
   OptionValue<DataType> Default;
 
   void check_location() const {
@@ -1163,7 +1181,7 @@ class opt_storage {
   }
 
 public:
-  opt_storage() : Location(nullptr) {}
+  opt_storage() = default;
 
   bool setLocation(Option &O, DataType &L) {
     if (Location)
@@ -1292,11 +1310,11 @@ class opt : public Option,
     Parser.initialize();
   }
 
+public:
   // Command line options should not be copyable
   opt(const opt &) = delete;
   opt &operator=(const opt &) = delete;
 
-public:
   // setInitialValue - Used by the cl::init modifier...
   void setInitialValue(const DataType &V) { this->setValue(V, true); }
 
@@ -1329,10 +1347,10 @@ extern template class opt<bool>;
 // cl::location(x) modifier.
 //
 template <class DataType, class StorageClass> class list_storage {
-  StorageClass *Location; // Where to store the object...
+  StorageClass *Location = nullptr; // Where to store the object...
 
 public:
-  list_storage() : Location(0) {}
+  list_storage() = default;
 
   bool setLocation(Option &O, StorageClass &L) {
     if (Location)
@@ -1462,11 +1480,11 @@ class list : public Option, public list_storage<DataType, StorageClass> {
     Parser.initialize();
   }
 
+public:
   // Command line options should not be copyable
   list(const list &) = delete;
   list &operator=(const list &) = delete;
 
-public:
   ParserClass &getParser() { return Parser; }
 
   unsigned getPosition(unsigned optnum) const {
@@ -1503,7 +1521,7 @@ struct multi_val {
 // cl::location(x) modifier.
 //
 template <class DataType, class StorageClass> class bits_storage {
-  unsigned *Location; // Where to store the bits...
+  unsigned *Location = nullptr; // Where to store the bits...
 
   template <class T> static unsigned Bit(const T &V) {
     unsigned BitPos = reinterpret_cast<unsigned>(V);
@@ -1513,7 +1531,7 @@ template <class DataType, class StorageClass> class bits_storage {
   }
 
 public:
-  bits_storage() : Location(nullptr) {}
+  bits_storage() = default;
 
   bool setLocation(Option &O, unsigned &L) {
     if (Location)
@@ -1601,11 +1619,11 @@ class bits : public Option, public bits_storage<DataType, Storage> {
     Parser.initialize();
   }
 
+public:
   // Command line options should not be copyable
   bits(const bits &) = delete;
   bits &operator=(const bits &) = delete;
 
-public:
   ParserClass &getParser() { return Parser; }
 
   unsigned getPosition(unsigned optnum) const {
@@ -1627,14 +1645,17 @@ public:
 
 class alias : public Option {
   Option *AliasFor;
+
   bool handleOccurrence(unsigned pos, StringRef /*ArgName*/,
                         StringRef Arg) override {
     return AliasFor->handleOccurrence(pos, AliasFor->ArgStr, Arg);
   }
+
   bool addOccurrence(unsigned pos, StringRef /*ArgName*/, StringRef Value,
                      bool MultiArg = false) override {
     return AliasFor->addOccurrence(pos, AliasFor->ArgStr, Value, MultiArg);
   }
+
   // Handle printing stuff...
   size_t getOptionWidth() const override;
   void printOptionInfo(size_t GlobalWidth) const override;
@@ -1656,11 +1677,11 @@ class alias : public Option {
     addArgument();
   }
 
+public:
   // Command line options should not be copyable
   alias(const alias &) = delete;
   alias &operator=(const alias &) = delete;
 
-public:
   void setAliasFor(Option &O) {
     if (AliasFor)
       error("cl::alias must only have one cl::aliasopt(...) specified!");
@@ -1678,7 +1699,9 @@ public:
 // aliasfor - Modifier to set the option an alias aliases.
 struct aliasopt {
   Option &Opt;
+
   explicit aliasopt(Option &O) : Opt(O) {}
+
   void apply(alias &A) const { A.setAliasFor(Opt); }
 };
 
@@ -1687,8 +1710,9 @@ struct aliasopt {
 // printed to stderr at the end of the regular help, just before
 // exit is called.
 struct extrahelp {
-  const char *morehelp;
-  explicit extrahelp(const char *help);
+  StringRef morehelp;
+
+  explicit extrahelp(StringRef help);
 };
 
 void PrintVersionMessage();
@@ -1730,11 +1754,33 @@ void PrintHelpMessage(bool Hidden = false, bool Categorized = false);
 /// the control of the client. The options should be modified before calling
 /// llvm::cl::ParseCommandLineOptions().
 ///
-/// Hopefully this API can be depricated soon. Any situation where options need
+/// Hopefully this API can be deprecated soon. Any situation where options need
 /// to be modified by tools or libraries should be handled by sane APIs rather
 /// than just handing around a global list.
 StringMap<Option *> &getRegisteredOptions(SubCommand &Sub = *TopLevelSubCommand);
 
+/// \brief Use this to get all registered SubCommands from the provided parser.
+///
+/// \return A range of all SubCommand pointers registered with the parser.
+///
+/// Typical usage:
+/// \code
+/// main(int argc, char* argv[]) {
+///   llvm::cl::ParseCommandLineOptions(argc, argv);
+///   for (auto* S : llvm::cl::getRegisteredSubcommands()) {
+///     if (*S) {
+///       std::cout << "Executing subcommand: " << S->getName() << std::endl;
+///       // Execute some function based on the name...
+///     }
+///   }
+/// }
+/// \endcode
+///
+/// This interface is useful for defining subcommands in libraries and
+/// the dispatch from a single point (like in the main function).
+iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator>
+getRegisteredSubcommands();
+
 //===----------------------------------------------------------------------===//
 // Standalone command line processing utilities.
 //
@@ -1789,10 +1835,12 @@ typedef void (*TokenizerCallback)(StringRef Source, StringSaver &Saver,
 /// \param [in,out] Argv Command line into which to expand response files.
 /// \param [in] MarkEOLs Mark end of lines and the end of the response file
 /// with nullptrs in the Argv vector.
+/// \param [in] RelativeNames true if names of nested response files must be
+/// resolved relative to including file.
 /// \return true if all @files were expanded successfully or there were none.
 bool ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
                          SmallVectorImpl<const char *> &Argv,
-                         bool MarkEOLs = false);
+                         bool MarkEOLs = false, bool RelativeNames = false);
 
 /// \brief Mark all options not part of this category as cl::ReallyHidden.
 ///
@@ -1825,8 +1873,7 @@ void ResetAllOptionOccurrences();
 /// where no options are supported.
 void ResetCommandLineParser();
 
-} // End namespace cl
-
-} // End namespace llvm
+} // end namespace cl
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_COMMANDLINE_H
diff --git a/contrib/llvm/include/llvm/Support/Compiler.h b/contrib/llvm/include/llvm/Support/Compiler.h
index fae0d8f4419e..55148a490c29 100644
--- a/contrib/llvm/include/llvm/Support/Compiler.h
+++ b/contrib/llvm/include/llvm/Support/Compiler.h
@@ -33,6 +33,10 @@
 # define __has_attribute(x) 0
 #endif
 
+#ifndef __has_cpp_attribute
+# define __has_cpp_attribute(x) 0
+#endif
+
 #ifndef __has_builtin
 # define __has_builtin(x) 0
 #endif
@@ -56,26 +60,19 @@
 /// \macro LLVM_MSC_PREREQ
 /// \brief Is the compiler MSVC of at least the specified version?
 /// The common \param version values to check for are:
-///  * 1800: Microsoft Visual Studio 2013 / 12.0
 ///  * 1900: Microsoft Visual Studio 2015 / 14.0
 #ifdef _MSC_VER
 #define LLVM_MSC_PREREQ(version) (_MSC_VER >= (version))
 
-// We require at least MSVC 2013.
-#if !LLVM_MSC_PREREQ(1800)
-#error LLVM requires at least MSVC 2013.
+// We require at least MSVC 2015.
+#if !LLVM_MSC_PREREQ(1900)
+#error LLVM requires at least MSVC 2015.
 #endif
 
 #else
 #define LLVM_MSC_PREREQ(version) 0
 #endif
 
-#if !defined(_MSC_VER) || defined(__clang__) || LLVM_MSC_PREREQ(1900)
-#define LLVM_NOEXCEPT noexcept
-#else
-#define LLVM_NOEXCEPT throw()
-#endif
-
 /// \brief Does the compiler support ref-qualifiers for *this?
 ///
 /// Sadly, this is separate from just rvalue reference support because GCC
@@ -96,12 +93,6 @@
 #define LLVM_LVALUE_FUNCTION
 #endif
 
-#if __has_feature(cxx_constexpr) || defined(__GXX_EXPERIMENTAL_CXX0X__) || LLVM_MSC_PREREQ(1900)
-# define LLVM_CONSTEXPR constexpr
-#else
-# define LLVM_CONSTEXPR
-#endif
-
 /// LLVM_LIBRARY_VISIBILITY - If a class marked with this attribute is linked
 /// into a shared library, then the class should be private to the library and
 /// not accessible from outside it.  Can also be used to mark variables and
@@ -114,6 +105,12 @@
 #define LLVM_LIBRARY_VISIBILITY
 #endif
 
+#if defined(__GNUC__)
+#define LLVM_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality)
+#else
+#define LLVM_PREFETCH(addr, rw, locality)
+#endif
+
 #if __has_attribute(sentinel) || LLVM_GNUC_PREREQ(3, 0, 0)
 #define LLVM_END_WITH_NULL __attribute__((sentinel))
 #else
@@ -126,12 +123,17 @@
 #define LLVM_ATTRIBUTE_USED
 #endif
 
-#if __has_attribute(warn_unused_result) || LLVM_GNUC_PREREQ(3, 4, 0)
-#define LLVM_ATTRIBUTE_UNUSED_RESULT __attribute__((__warn_unused_result__))
-#elif defined(_MSC_VER)
-#define LLVM_ATTRIBUTE_UNUSED_RESULT _Check_return_
+/// LLVM_NODISCARD - Warn if a type or return value is discarded.
+#if __cplusplus > 201402L && __has_cpp_attribute(nodiscard)
+#define LLVM_NODISCARD [[nodiscard]]
+#elif !__cplusplus
+// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
+// error when __has_cpp_attribute is given a scoped attribute in C mode.
+#define LLVM_NODISCARD
+#elif __has_cpp_attribute(clang::warn_unused_result)
+#define LLVM_NODISCARD [[clang::warn_unused_result]]
 #else
-#define LLVM_ATTRIBUTE_UNUSED_RESULT
+#define LLVM_NODISCARD
 #endif
 
 // Some compilers warn about unused functions. When a function is sometimes
@@ -228,6 +230,19 @@
 #define LLVM_ATTRIBUTE_RETURNS_NOALIAS
 #endif
 
+/// LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
+#if __cplusplus > 201402L && __has_cpp_attribute(fallthrough)
+#define LLVM_FALLTHROUGH [[fallthrough]]
+#elif !__cplusplus
+// Workaround for llvm.org/PR23435, since clang 3.6 and below emit a spurious
+// error when __has_cpp_attribute is given a scoped attribute in C mode.
+#define LLVM_FALLTHROUGH
+#elif __has_cpp_attribute(clang::fallthrough)
+#define LLVM_FALLTHROUGH [[clang::fallthrough]]
+#else
+#define LLVM_FALLTHROUGH
+#endif
+
 /// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
 /// pedantic diagnostics.
 #ifdef __GNUC__
@@ -304,15 +319,8 @@
 #endif
 
 /// \macro LLVM_ALIGNAS
-/// \brief Used to specify a minimum alignment for a structure or variable. The
-/// alignment must be a constant integer. Use LLVM_PTR_SIZE to compute
-/// alignments in terms of the size of a pointer.
-///
-/// Note that __declspec(align) has special quirks, it's not legal to pass a
-/// structure with __declspec(align) as a formal parameter.
-#ifdef _MSC_VER
-# define LLVM_ALIGNAS(x) __declspec(align(x))
-#elif __GNUC__ && !__has_feature(cxx_alignas) && !LLVM_GNUC_PREREQ(4, 8, 0)
+/// \brief Used to specify a minimum alignment for a structure or variable.
+#if __GNUC__ && !__has_feature(cxx_alignas) && !LLVM_GNUC_PREREQ(4, 8, 1)
 # define LLVM_ALIGNAS(x) __attribute__((aligned(x)))
 #else
 # define LLVM_ALIGNAS(x) alignas(x)
@@ -362,15 +370,6 @@
 # define LLVM_PTR_SIZE sizeof(void *)
 #endif
 
-/// \macro LLVM_FUNCTION_NAME
-/// \brief Expands to __func__ on compilers which support it.  Otherwise,
-/// expands to a compiler-dependent replacement.
-#if defined(_MSC_VER)
-# define LLVM_FUNCTION_NAME __FUNCTION__
-#else
-# define LLVM_FUNCTION_NAME __func__
-#endif
-
 /// \macro LLVM_MEMORY_SANITIZER_BUILD
 /// \brief Whether LLVM itself is built with MemorySanitizer instrumentation.
 #if __has_feature(memory_sanitizer)
@@ -405,12 +404,16 @@
 // Thread Sanitizer is a tool that finds races in code.
 // See http://code.google.com/p/data-race-test/wiki/DynamicAnnotations .
 // tsan detects these exact functions by name.
+#ifdef __cplusplus
 extern "C" {
+#endif
 void AnnotateHappensAfter(const char *file, int line, const volatile void *cv);
 void AnnotateHappensBefore(const char *file, int line, const volatile void *cv);
 void AnnotateIgnoreWritesBegin(const char *file, int line);
 void AnnotateIgnoreWritesEnd(const char *file, int line);
+#ifdef __cplusplus
 }
+#endif
 
 // This marker is used to define a happens-before arc. The race detector will
 // infer an arc from the begin to the end when they share the same pointer
@@ -449,6 +452,19 @@ void AnnotateIgnoreWritesEnd(const char *file, int line);
 #define LLVM_DUMP_METHOD LLVM_ATTRIBUTE_NOINLINE
 #endif
 
+/// \macro LLVM_PRETTY_FUNCTION
+/// \brief Gets a user-friendly looking function signature for the current scope
+/// using the best available method on each platform.  The exact format of the
+/// resulting string is implementation specific and non-portable, so this should
+/// only be used, for example, for logging or diagnostics.
+#if defined(_MSC_VER)
+#define LLVM_PRETTY_FUNCTION __FUNCSIG__
+#elif defined(__GNUC__) || defined(__clang__)
+#define LLVM_PRETTY_FUNCTION __PRETTY_FUNCTION__
+#else 
+#define LLVM_PRETTY_FUNCTION __func__
+#endif
+
 /// \macro LLVM_THREAD_LOCAL
 /// \brief A thread-local storage specifier which can be used with globals,
 /// extern globals, and static globals.
diff --git a/contrib/llvm/include/llvm/Support/Compression.h b/contrib/llvm/include/llvm/Support/Compression.h
index 28274d67aad2..5bf7031fe9f9 100644
--- a/contrib/llvm/include/llvm/Support/Compression.h
+++ b/contrib/llvm/include/llvm/Support/Compression.h
@@ -43,6 +43,9 @@ bool isAvailable();
 Status compress(StringRef InputBuffer, SmallVectorImpl<char> &CompressedBuffer,
                 CompressionLevel Level = DefaultCompression);
 
+Status uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+                  size_t &UncompressedSize);
+
 Status uncompress(StringRef InputBuffer,
                   SmallVectorImpl<char> &UncompressedBuffer,
                   size_t UncompressedSize);
diff --git a/contrib/llvm/include/llvm/Support/ConvertUTF.h b/contrib/llvm/include/llvm/Support/ConvertUTF.h
index 5de5774f9db5..f714c0ed997e 100644
--- a/contrib/llvm/include/llvm/Support/ConvertUTF.h
+++ b/contrib/llvm/include/llvm/Support/ConvertUTF.h
@@ -90,6 +90,14 @@
 #ifndef LLVM_SUPPORT_CONVERTUTF_H
 #define LLVM_SUPPORT_CONVERTUTF_H
 
+#include <string>
+#include <cstddef>
+
+// Wrap everything in namespace llvm so that programs can link with llvm and
+// their own version of the unicode libraries.
+
+namespace llvm {
+
 /* ---------------------------------------------------------------------
     The following 4 definitions are compiler-specific.
     The C standard does not guarantee that wchar_t has at least
@@ -127,11 +135,6 @@ typedef enum {
   lenientConversion
 } ConversionFlags;
 
-/* This is for C++ and does no harm in C */
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 ConversionResult ConvertUTF8toUTF16 (
   const UTF8** sourceStart, const UTF8* sourceEnd,
   UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
@@ -174,16 +177,9 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
 
 unsigned getNumBytesForUTF8(UTF8 firstByte);
 
-#ifdef __cplusplus
-}
-
 /*************************************************************************/
 /* Below are LLVM-specific wrappers of the functions above. */
 
-#include <string>
-#include <cstddef>
-
-namespace llvm {
 template <typename T> class ArrayRef;
 template <typename T> class SmallVectorImpl;
 class StringRef;
@@ -293,7 +289,3 @@ bool convertUTF8ToUTF16String(StringRef SrcUTF8,
 } /* end namespace llvm */
 
 #endif
-
-/* --------------------------------------------------------------------- */
-
-#endif
diff --git a/contrib/llvm/include/llvm/Support/DataExtractor.h b/contrib/llvm/include/llvm/Support/DataExtractor.h
index 3ffa9bc90dd1..2d1180c228e3 100644
--- a/contrib/llvm/include/llvm/Support/DataExtractor.h
+++ b/contrib/llvm/include/llvm/Support/DataExtractor.h
@@ -29,7 +29,7 @@ public:
 
   /// \brief Get the data pointed to by this extractor.
   StringRef getData() const { return Data; }
-  /// \brief Get the endianess for this extractor.
+  /// \brief Get the endianness for this extractor.
   bool isLittleEndian() const { return IsLittleEndian; }
   /// \brief Get the address size for this extractor.
   uint8_t getAddressSize() const { return AddressSize; }
diff --git a/contrib/llvm/include/llvm/Support/DataStream.h b/contrib/llvm/include/llvm/Support/DataStream.h
deleted file mode 100644
index a544316f430d..000000000000
--- a/contrib/llvm/include/llvm/Support/DataStream.h
+++ /dev/null
@@ -1,38 +0,0 @@
-//===---- llvm/Support/DataStream.h - Lazy bitcode streaming ----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This header defines DataStreamer, which fetches bytes of data from
-// a stream source. It provides support for streaming (lazy reading) of
-// data, e.g. bitcode
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_SUPPORT_DATASTREAM_H
-#define LLVM_SUPPORT_DATASTREAM_H
-
-#include <memory>
-#include <string>
-
-namespace llvm {
-
-class DataStreamer {
-public:
-  /// Fetch bytes [start-end) from the stream, and write them to the
-  /// buffer pointed to by buf. Returns the number of bytes actually written.
-  virtual size_t GetBytes(unsigned char *buf, size_t len) = 0;
-
-  virtual ~DataStreamer();
-};
-
-std::unique_ptr<DataStreamer> getDataFileStreamer(const std::string &Filename,
-                                                  std::string *Err);
-}
-
-#endif  // LLVM_SUPPORT_DATASTREAM_H_
diff --git a/contrib/llvm/include/llvm/Support/Debug.h b/contrib/llvm/include/llvm/Support/Debug.h
index 6e213477d710..3465c403361f 100644
--- a/contrib/llvm/include/llvm/Support/Debug.h
+++ b/contrib/llvm/include/llvm/Support/Debug.h
@@ -29,6 +29,7 @@
 #define LLVM_SUPPORT_DEBUG_H
 
 namespace llvm {
+
 class raw_ostream;
 
 #ifndef NDEBUG
@@ -50,6 +51,12 @@ bool isCurrentDebugType(const char *Type);
 ///
 void setCurrentDebugType(const char *Type);
 
+/// setCurrentDebugTypes - Set the current debug type, as if the
+/// -debug-only=X,Y,Z option were specified. Note that DebugFlag
+/// also needs to be set to true for debug output to be produced.
+///
+void setCurrentDebugTypes(const char **Types, unsigned Count);
+
 /// DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug
 /// information.  In the '-debug' option is specified on the commandline, and if
 /// this is a debug build, then the code specified as the option to the macro
@@ -61,12 +68,13 @@ void setCurrentDebugType(const char *Type);
 /// is not specified, or is specified as "bitset".
 #define DEBUG_WITH_TYPE(TYPE, X)                                        \
   do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType(TYPE)) { X; } \
-  } while (0)
+  } while (false)
 
 #else
 #define isCurrentDebugType(X) (false)
 #define setCurrentDebugType(X)
-#define DEBUG_WITH_TYPE(TYPE, X) do { } while (0)
+#define setCurrentDebugTypes(X, N)
+#define DEBUG_WITH_TYPE(TYPE, X) do { } while (false)
 #endif
 
 /// EnableDebugBuffering - This defaults to false.  If true, the debug
@@ -91,6 +99,6 @@ raw_ostream &dbgs();
 //
 #define DEBUG(X) DEBUG_WITH_TYPE(DEBUG_TYPE, X)
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_DEBUG_H
diff --git a/contrib/llvm/include/llvm/Support/Dwarf.def b/contrib/llvm/include/llvm/Support/Dwarf.def
index b73f2aed5311..841fc7d4ae22 100644
--- a/contrib/llvm/include/llvm/Support/Dwarf.def
+++ b/contrib/llvm/include/llvm/Support/Dwarf.def
@@ -12,9 +12,14 @@
 //===----------------------------------------------------------------------===//
 
 // TODO: Add other DW-based macros.
-#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_OP ||                         \
+#if !(defined HANDLE_DW_TAG || defined HANDLE_DW_AT ||                         \
+      defined HANDLE_DW_FORM || defined HANDLE_DW_OP ||                        \
       defined HANDLE_DW_LANG || defined HANDLE_DW_ATE ||                       \
-      defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_CC)
+      defined HANDLE_DW_VIRTUALITY || defined HANDLE_DW_DEFAULTED ||           \
+      defined HANDLE_DW_CC || defined HANDLE_DW_LNS ||                         \
+      defined HANDLE_DW_LNE || defined HANDLE_DW_LNCT ||                       \
+      defined HANDLE_DW_MACRO || defined HANDLE_DW_RLE ||                      \
+      defined HANDLE_DW_CFA || defined HANDLE_DW_APPLE_PROPERTY)
 #error "Missing macro definition of HANDLE_DW*"
 #endif
 
@@ -22,6 +27,14 @@
 #define HANDLE_DW_TAG(ID, NAME)
 #endif
 
+#ifndef HANDLE_DW_AT
+#define HANDLE_DW_AT(ID, NAME)
+#endif
+
+#ifndef HANDLE_DW_FORM
+#define HANDLE_DW_FORM(ID, NAME)
+#endif
+
 #ifndef HANDLE_DW_OP
 #define HANDLE_DW_OP(ID, NAME)
 #endif
@@ -38,11 +51,43 @@
 #define HANDLE_DW_VIRTUALITY(ID, NAME)
 #endif
 
+#ifndef HANDLE_DW_DEFAULTED
+#define HANDLE_DW_DEFAULTED(ID, NAME)
+#endif
+
 #ifndef HANDLE_DW_CC
 #define HANDLE_DW_CC(ID, NAME)
 #endif
 
+#ifndef HANDLE_DW_LNS
+#define HANDLE_DW_LNS(ID, NAME)
+#endif
+
+#ifndef HANDLE_DW_LNE
+#define HANDLE_DW_LNE(ID, NAME)
+#endif
 
+#ifndef HANDLE_DW_LNCT
+#define HANDLE_DW_LNCT(ID, NAME)
+#endif
+
+#ifndef HANDLE_DW_MACRO
+#define HANDLE_DW_MACRO(ID, NAME)
+#endif
+
+#ifndef HANDLE_DW_RLE
+#define HANDLE_DW_RLE(ID, NAME)
+#endif
+
+#ifndef HANDLE_DW_CFA
+#define HANDLE_DW_CFA(ID, NAME)
+#endif
+
+#ifndef HANDLE_DW_APPLE_PROPERTY
+#define HANDLE_DW_APPLE_PROPERTY(ID, NAME)
+#endif
+
+HANDLE_DW_TAG(0x0000, null)
 HANDLE_DW_TAG(0x0001, array_type)
 HANDLE_DW_TAG(0x0002, class_type)
 HANDLE_DW_TAG(0x0003, entry_point)
@@ -108,6 +153,11 @@ HANDLE_DW_TAG(0x0043, template_alias)
 HANDLE_DW_TAG(0x0044, coarray_type)
 HANDLE_DW_TAG(0x0045, generic_subrange)
 HANDLE_DW_TAG(0x0046, dynamic_type)
+HANDLE_DW_TAG(0x0047, atomic_type)
+HANDLE_DW_TAG(0x0048, call_site)
+HANDLE_DW_TAG(0x0049, call_site_parameter)
+HANDLE_DW_TAG(0x004a, skeleton_unit)
+HANDLE_DW_TAG(0x004b, immutable_type)
 
 // User-defined tags.
 HANDLE_DW_TAG(0x4081, MIPS_loop)
@@ -124,6 +174,260 @@ HANDLE_DW_TAG(0xb002, BORLAND_Delphi_dynamic_array)
 HANDLE_DW_TAG(0xb003, BORLAND_Delphi_set)
 HANDLE_DW_TAG(0xb004, BORLAND_Delphi_variant)
 
+// Attributes.
+HANDLE_DW_AT(0x01, sibling)
+HANDLE_DW_AT(0x02, location)
+HANDLE_DW_AT(0x03, name)
+HANDLE_DW_AT(0x09, ordering)
+HANDLE_DW_AT(0x0b, byte_size)
+HANDLE_DW_AT(0x0c, bit_offset)
+HANDLE_DW_AT(0x0d, bit_size)
+HANDLE_DW_AT(0x10, stmt_list)
+HANDLE_DW_AT(0x11, low_pc)
+HANDLE_DW_AT(0x12, high_pc)
+HANDLE_DW_AT(0x13, language)
+HANDLE_DW_AT(0x15, discr)
+HANDLE_DW_AT(0x16, discr_value)
+HANDLE_DW_AT(0x17, visibility)
+HANDLE_DW_AT(0x18, import)
+HANDLE_DW_AT(0x19, string_length)
+HANDLE_DW_AT(0x1a, common_reference)
+HANDLE_DW_AT(0x1b, comp_dir)
+HANDLE_DW_AT(0x1c, const_value)
+HANDLE_DW_AT(0x1d, containing_type)
+HANDLE_DW_AT(0x1e, default_value)
+HANDLE_DW_AT(0x20, inline)
+HANDLE_DW_AT(0x21, is_optional)
+HANDLE_DW_AT(0x22, lower_bound)
+HANDLE_DW_AT(0x25, producer)
+HANDLE_DW_AT(0x27, prototyped)
+HANDLE_DW_AT(0x2a, return_addr)
+HANDLE_DW_AT(0x2c, start_scope)
+HANDLE_DW_AT(0x2e, bit_stride)
+HANDLE_DW_AT(0x2f, upper_bound)
+HANDLE_DW_AT(0x31, abstract_origin)
+HANDLE_DW_AT(0x32, accessibility)
+HANDLE_DW_AT(0x33, address_class)
+HANDLE_DW_AT(0x34, artificial)
+HANDLE_DW_AT(0x35, base_types)
+HANDLE_DW_AT(0x36, calling_convention)
+HANDLE_DW_AT(0x37, count)
+HANDLE_DW_AT(0x38, data_member_location)
+HANDLE_DW_AT(0x39, decl_column)
+HANDLE_DW_AT(0x3a, decl_file)
+HANDLE_DW_AT(0x3b, decl_line)
+HANDLE_DW_AT(0x3c, declaration)
+HANDLE_DW_AT(0x3d, discr_list)
+HANDLE_DW_AT(0x3e, encoding)
+HANDLE_DW_AT(0x3f, external)
+HANDLE_DW_AT(0x40, frame_base)
+HANDLE_DW_AT(0x41, friend)
+HANDLE_DW_AT(0x42, identifier_case)
+HANDLE_DW_AT(0x43, macro_info)
+HANDLE_DW_AT(0x44, namelist_item)
+HANDLE_DW_AT(0x45, priority)
+HANDLE_DW_AT(0x46, segment)
+HANDLE_DW_AT(0x47, specification)
+HANDLE_DW_AT(0x48, static_link)
+HANDLE_DW_AT(0x49, type)
+HANDLE_DW_AT(0x4a, use_location)
+HANDLE_DW_AT(0x4b, variable_parameter)
+HANDLE_DW_AT(0x4c, virtuality)
+HANDLE_DW_AT(0x4d, vtable_elem_location)
+HANDLE_DW_AT(0x4e, allocated)
+HANDLE_DW_AT(0x4f, associated)
+HANDLE_DW_AT(0x50, data_location)
+HANDLE_DW_AT(0x51, byte_stride)
+HANDLE_DW_AT(0x52, entry_pc)
+HANDLE_DW_AT(0x53, use_UTF8)
+HANDLE_DW_AT(0x54, extension)
+HANDLE_DW_AT(0x55, ranges)
+HANDLE_DW_AT(0x56, trampoline)
+HANDLE_DW_AT(0x57, call_column)
+HANDLE_DW_AT(0x58, call_file)
+HANDLE_DW_AT(0x59, call_line)
+HANDLE_DW_AT(0x5a, description)
+HANDLE_DW_AT(0x5b, binary_scale)
+HANDLE_DW_AT(0x5c, decimal_scale)
+HANDLE_DW_AT(0x5d, small)
+HANDLE_DW_AT(0x5e, decimal_sign)
+HANDLE_DW_AT(0x5f, digit_count)
+HANDLE_DW_AT(0x60, picture_string)
+HANDLE_DW_AT(0x61, mutable)
+HANDLE_DW_AT(0x62, threads_scaled)
+HANDLE_DW_AT(0x63, explicit)
+HANDLE_DW_AT(0x64, object_pointer)
+HANDLE_DW_AT(0x65, endianity)
+HANDLE_DW_AT(0x66, elemental)
+HANDLE_DW_AT(0x67, pure)
+HANDLE_DW_AT(0x68, recursive)
+HANDLE_DW_AT(0x69, signature)
+HANDLE_DW_AT(0x6a, main_subprogram)
+HANDLE_DW_AT(0x6b, data_bit_offset)
+HANDLE_DW_AT(0x6c, const_expr)
+HANDLE_DW_AT(0x6d, enum_class)
+HANDLE_DW_AT(0x6e, linkage_name)
+
+// New in DWARF 5:
+HANDLE_DW_AT(0x6f, string_length_bit_size)
+HANDLE_DW_AT(0x70, string_length_byte_size)
+HANDLE_DW_AT(0x71, rank)
+HANDLE_DW_AT(0x72, str_offsets_base)
+HANDLE_DW_AT(0x73, addr_base)
+HANDLE_DW_AT(0x74, rnglists_base)
+HANDLE_DW_AT(0x75, dwo_id) ///< Retracted from DWARF 5.
+HANDLE_DW_AT(0x76, dwo_name)
+HANDLE_DW_AT(0x77, reference)
+HANDLE_DW_AT(0x78, rvalue_reference)
+HANDLE_DW_AT(0x79, macros)
+HANDLE_DW_AT(0x7a, call_all_calls)
+HANDLE_DW_AT(0x7b, call_all_source_calls)
+HANDLE_DW_AT(0x7c, call_all_tail_calls)
+HANDLE_DW_AT(0x7d, call_return_pc)
+HANDLE_DW_AT(0x7e, call_value)
+HANDLE_DW_AT(0x7f, call_origin)
+HANDLE_DW_AT(0x80, call_parameter)
+HANDLE_DW_AT(0x81, call_pc)
+HANDLE_DW_AT(0x82, call_tail_call)
+HANDLE_DW_AT(0x83, call_target)
+HANDLE_DW_AT(0x84, call_target_clobbered)
+HANDLE_DW_AT(0x85, call_data_location)
+HANDLE_DW_AT(0x86, call_data_value)
+HANDLE_DW_AT(0x87, noreturn)
+HANDLE_DW_AT(0x88, alignment)
+HANDLE_DW_AT(0x89, export_symbols)
+HANDLE_DW_AT(0x8a, deleted)
+HANDLE_DW_AT(0x8b, defaulted)
+HANDLE_DW_AT(0x8c, loclists_base)
+
+HANDLE_DW_AT(0x2002, MIPS_loop_begin)
+HANDLE_DW_AT(0x2003, MIPS_tail_loop_begin)
+HANDLE_DW_AT(0x2004, MIPS_epilog_begin)
+HANDLE_DW_AT(0x2005, MIPS_loop_unroll_factor)
+HANDLE_DW_AT(0x2006, MIPS_software_pipeline_depth)
+HANDLE_DW_AT(0x2007, MIPS_linkage_name)
+HANDLE_DW_AT(0x2008, MIPS_stride)
+HANDLE_DW_AT(0x2009, MIPS_abstract_name)
+HANDLE_DW_AT(0x200a, MIPS_clone_origin)
+HANDLE_DW_AT(0x200b, MIPS_has_inlines)
+HANDLE_DW_AT(0x200c, MIPS_stride_byte)
+HANDLE_DW_AT(0x200d, MIPS_stride_elem)
+HANDLE_DW_AT(0x200e, MIPS_ptr_dopetype)
+HANDLE_DW_AT(0x200f, MIPS_allocatable_dopetype)
+HANDLE_DW_AT(0x2010, MIPS_assumed_shape_dopetype)
+
+// This one appears to have only been implemented by Open64 for
+// fortran and may conflict with other extensions.
+HANDLE_DW_AT(0x2011, MIPS_assumed_size)
+
+// GNU extensions
+HANDLE_DW_AT(0x2101, sf_names)
+HANDLE_DW_AT(0x2102, src_info)
+HANDLE_DW_AT(0x2103, mac_info)
+HANDLE_DW_AT(0x2104, src_coords)
+HANDLE_DW_AT(0x2105, body_begin)
+HANDLE_DW_AT(0x2106, body_end)
+HANDLE_DW_AT(0x2107, GNU_vector)
+HANDLE_DW_AT(0x2110, GNU_template_name)
+
+HANDLE_DW_AT(0x210f, GNU_odr_signature)
+HANDLE_DW_AT(0x2119, GNU_macros)
+
+// Extensions for Fission proposal.
+HANDLE_DW_AT(0x2130, GNU_dwo_name)
+HANDLE_DW_AT(0x2131, GNU_dwo_id)
+HANDLE_DW_AT(0x2132, GNU_ranges_base)
+HANDLE_DW_AT(0x2133, GNU_addr_base)
+HANDLE_DW_AT(0x2134, GNU_pubnames)
+HANDLE_DW_AT(0x2135, GNU_pubtypes)
+HANDLE_DW_AT(0x2136, GNU_discriminator)
+
+// Borland extensions.
+HANDLE_DW_AT(0x3b11, BORLAND_property_read)
+HANDLE_DW_AT(0x3b12, BORLAND_property_write)
+HANDLE_DW_AT(0x3b13, BORLAND_property_implements)
+HANDLE_DW_AT(0x3b14, BORLAND_property_index)
+HANDLE_DW_AT(0x3b15, BORLAND_property_default)
+HANDLE_DW_AT(0x3b20, BORLAND_Delphi_unit)
+HANDLE_DW_AT(0x3b21, BORLAND_Delphi_class)
+HANDLE_DW_AT(0x3b22, BORLAND_Delphi_record)
+HANDLE_DW_AT(0x3b23, BORLAND_Delphi_metaclass)
+HANDLE_DW_AT(0x3b24, BORLAND_Delphi_constructor)
+HANDLE_DW_AT(0x3b25, BORLAND_Delphi_destructor)
+HANDLE_DW_AT(0x3b26, BORLAND_Delphi_anonymous_method)
+HANDLE_DW_AT(0x3b27, BORLAND_Delphi_interface)
+HANDLE_DW_AT(0x3b28, BORLAND_Delphi_ABI)
+HANDLE_DW_AT(0x3b29, BORLAND_Delphi_return)
+HANDLE_DW_AT(0x3b30, BORLAND_Delphi_frameptr)
+HANDLE_DW_AT(0x3b31, BORLAND_closure)
+
+// LLVM project extensions.
+HANDLE_DW_AT(0x3e00, LLVM_include_path)
+HANDLE_DW_AT(0x3e01, LLVM_config_macros)
+HANDLE_DW_AT(0x3e02, LLVM_isysroot)
+
+// Apple extensions.
+HANDLE_DW_AT(0x3fe1, APPLE_optimized)
+HANDLE_DW_AT(0x3fe2, APPLE_flags)
+HANDLE_DW_AT(0x3fe3, APPLE_isa)
+HANDLE_DW_AT(0x3fe4, APPLE_block)
+HANDLE_DW_AT(0x3fe5, APPLE_major_runtime_vers)
+HANDLE_DW_AT(0x3fe6, APPLE_runtime_class)
+HANDLE_DW_AT(0x3fe7, APPLE_omit_frame_ptr)
+HANDLE_DW_AT(0x3fe8, APPLE_property_name)
+HANDLE_DW_AT(0x3fe9, APPLE_property_getter)
+HANDLE_DW_AT(0x3fea, APPLE_property_setter)
+HANDLE_DW_AT(0x3feb, APPLE_property_attribute)
+HANDLE_DW_AT(0x3fec, APPLE_objc_complete_type)
+HANDLE_DW_AT(0x3fed, APPLE_property)
+
+// Attribute form encodings.
+HANDLE_DW_FORM(0x01, addr)
+HANDLE_DW_FORM(0x03, block2)
+HANDLE_DW_FORM(0x04, block4)
+HANDLE_DW_FORM(0x05, data2)
+HANDLE_DW_FORM(0x06, data4)
+HANDLE_DW_FORM(0x07, data8)
+HANDLE_DW_FORM(0x08, string)
+HANDLE_DW_FORM(0x09, block)
+HANDLE_DW_FORM(0x0a, block1)
+HANDLE_DW_FORM(0x0b, data1)
+HANDLE_DW_FORM(0x0c, flag)
+HANDLE_DW_FORM(0x0d, sdata)
+HANDLE_DW_FORM(0x0e, strp)
+HANDLE_DW_FORM(0x0f, udata)
+HANDLE_DW_FORM(0x10, ref_addr)
+HANDLE_DW_FORM(0x11, ref1)
+HANDLE_DW_FORM(0x12, ref2)
+HANDLE_DW_FORM(0x13, ref4)
+HANDLE_DW_FORM(0x14, ref8)
+HANDLE_DW_FORM(0x15, ref_udata)
+HANDLE_DW_FORM(0x16, indirect)
+HANDLE_DW_FORM(0x17, sec_offset)
+HANDLE_DW_FORM(0x18, exprloc)
+HANDLE_DW_FORM(0x19, flag_present)
+
+// New in DWARF v5.
+HANDLE_DW_FORM(0x1a, strx)
+HANDLE_DW_FORM(0x1b, addrx)
+HANDLE_DW_FORM(0x1c, ref_sup)
+HANDLE_DW_FORM(0x1d, strp_sup)
+HANDLE_DW_FORM(0x1e, data16)
+HANDLE_DW_FORM(0x1f, line_strp)
+HANDLE_DW_FORM(0x20, ref_sig8)
+HANDLE_DW_FORM(0x21, implicit_const)
+HANDLE_DW_FORM(0x22, loclistx)
+HANDLE_DW_FORM(0x23, rnglistx)
+
+// Extensions for Fission proposal
+HANDLE_DW_FORM(0x1f01, GNU_addr_index)
+HANDLE_DW_FORM(0x1f02, GNU_str_index)
+
+// Alternate debug sections proposal (output of "dwz" tool).
+HANDLE_DW_FORM(0x1f20, GNU_ref_alt)
+HANDLE_DW_FORM(0x1f21, GNU_strp_alt)
+
+// DWARF Expression operators.
 HANDLE_DW_OP(0x03, addr)
 HANDLE_DW_OP(0x06, deref)
 HANDLE_DW_OP(0x08, const1u)
@@ -151,7 +455,7 @@ HANDLE_DW_OP(0x1d, mod)
 HANDLE_DW_OP(0x1e, mul)
 HANDLE_DW_OP(0x1f, neg)
 HANDLE_DW_OP(0x20, not)
-HANDLE_DW_OP(0x21, or )
+HANDLE_DW_OP(0x21, or)
 HANDLE_DW_OP(0x22, plus)
 HANDLE_DW_OP(0x23, plus_uconst)
 HANDLE_DW_OP(0x24, shl)
@@ -278,7 +582,18 @@ HANDLE_DW_OP(0x9c, call_frame_cfa)
 HANDLE_DW_OP(0x9d, bit_piece)
 HANDLE_DW_OP(0x9e, implicit_value)
 HANDLE_DW_OP(0x9f, stack_value)
+HANDLE_DW_OP(0xa0, implicit_pointer)
+HANDLE_DW_OP(0xa1, addrx)
+HANDLE_DW_OP(0xa2, constx)
+HANDLE_DW_OP(0xa3, entry_value)
+HANDLE_DW_OP(0xa4, const_type)
+HANDLE_DW_OP(0xa5, regval_type)
+HANDLE_DW_OP(0xa6, deref_type)
+HANDLE_DW_OP(0xa7, xderef_type)
+HANDLE_DW_OP(0xa8, convert)
+HANDLE_DW_OP(0xa9, reinterpret)
 
+// Vendor extensions.
 // Extensions for GNU-style thread-local storage.
 HANDLE_DW_OP(0xe0, GNU_push_tls_address)
 
@@ -324,6 +639,9 @@ HANDLE_DW_LANG(0x0020, Dylan)
 HANDLE_DW_LANG(0x0021, C_plus_plus_14)
 HANDLE_DW_LANG(0x0022, Fortran03)
 HANDLE_DW_LANG(0x0023, Fortran08)
+HANDLE_DW_LANG(0x0024, RenderScript)
+
+// Vendor extensions.
 HANDLE_DW_LANG(0x8001, Mips_Assembler)
 HANDLE_DW_LANG(0x8e57, GOOGLE_RenderScript)
 HANDLE_DW_LANG(0xb000, BORLAND_Delphi)
@@ -345,16 +663,25 @@ HANDLE_DW_ATE(0x0d, signed_fixed)
 HANDLE_DW_ATE(0x0e, unsigned_fixed)
 HANDLE_DW_ATE(0x0f, decimal_float)
 HANDLE_DW_ATE(0x10, UTF)
+HANDLE_DW_ATE(0x11, UCS)
+HANDLE_DW_ATE(0x12, ASCII)
 
 // DWARF virtuality codes.
 HANDLE_DW_VIRTUALITY(0x00, none)
 HANDLE_DW_VIRTUALITY(0x01, virtual)
 HANDLE_DW_VIRTUALITY(0x02, pure_virtual)
 
+// DWARF v5 Defaulted Member Encodings.
+HANDLE_DW_DEFAULTED(0x00, no)
+HANDLE_DW_DEFAULTED(0x01, in_class)
+HANDLE_DW_DEFAULTED(0x02, out_of_class)
+
 // DWARF calling convention codes.
 HANDLE_DW_CC(0x01, normal)
 HANDLE_DW_CC(0x02, program)
 HANDLE_DW_CC(0x03, nocall)
+HANDLE_DW_CC(0x04, pass_by_reference)
+HANDLE_DW_CC(0x05, pass_by_value)
 HANDLE_DW_CC(0x41, GNU_borland_fastcall_i386)
 HANDLE_DW_CC(0xb0, BORLAND_safecall)
 HANDLE_DW_CC(0xb1, BORLAND_stdcall)
@@ -365,9 +692,120 @@ HANDLE_DW_CC(0xb5, BORLAND_thiscall)
 HANDLE_DW_CC(0xb6, BORLAND_fastcall)
 HANDLE_DW_CC(0xc0, LLVM_vectorcall)
 
+// Line Number Extended Opcode Encodings
+HANDLE_DW_LNE(0x01, end_sequence)
+HANDLE_DW_LNE(0x02, set_address)
+HANDLE_DW_LNE(0x03, define_file)
+HANDLE_DW_LNE(0x04, set_discriminator)
+
+// Line Number Standard Opcode Encodings.
+HANDLE_DW_LNS(0x00, extended_op)
+HANDLE_DW_LNS(0x01, copy)
+HANDLE_DW_LNS(0x02, advance_pc)
+HANDLE_DW_LNS(0x03, advance_line)
+HANDLE_DW_LNS(0x04, set_file)
+HANDLE_DW_LNS(0x05, set_column)
+HANDLE_DW_LNS(0x06, negate_stmt)
+HANDLE_DW_LNS(0x07, set_basic_block)
+HANDLE_DW_LNS(0x08, const_add_pc)
+HANDLE_DW_LNS(0x09, fixed_advance_pc)
+HANDLE_DW_LNS(0x0a, set_prologue_end)
+HANDLE_DW_LNS(0x0b, set_epilogue_begin)
+HANDLE_DW_LNS(0x0c, set_isa)
+
+// DWARF v5 Line number header entry format.
+HANDLE_DW_LNCT(0x01, path)
+HANDLE_DW_LNCT(0x02, directory_index)
+HANDLE_DW_LNCT(0x03, timestamp)
+HANDLE_DW_LNCT(0x04, size)
+HANDLE_DW_LNCT(0x05, MD5)
+
+HANDLE_DW_MACRO(0x01, define)
+HANDLE_DW_MACRO(0x02, undef)
+HANDLE_DW_MACRO(0x03, start_file)
+HANDLE_DW_MACRO(0x04, end_file)
+HANDLE_DW_MACRO(0x05, define_strp)
+HANDLE_DW_MACRO(0x06, undef_strp)
+HANDLE_DW_MACRO(0x07, import)
+HANDLE_DW_MACRO(0x08, define_sup)
+HANDLE_DW_MACRO(0x09, undef_sup)
+HANDLE_DW_MACRO(0x0a, import_sup)
+HANDLE_DW_MACRO(0x0b, define_strx)
+HANDLE_DW_MACRO(0x0c, undef_strx)
+
+// Range list entry encoding values.
+HANDLE_DW_RLE(0x00, end_of_list)
+HANDLE_DW_RLE(0x01, base_addressx)
+HANDLE_DW_RLE(0x02, startx_endx)
+HANDLE_DW_RLE(0x03, startx_length)
+HANDLE_DW_RLE(0x04, offset_pair)
+HANDLE_DW_RLE(0x05, base_address)
+HANDLE_DW_RLE(0x06, start_end)
+HANDLE_DW_RLE(0x07, start_length)
+
+// Call frame instruction encodings.
+HANDLE_DW_CFA(0x00, nop)
+HANDLE_DW_CFA(0x40, advance_loc)
+HANDLE_DW_CFA(0x80, offset)
+HANDLE_DW_CFA(0xc0, restore)
+HANDLE_DW_CFA(0x01, set_loc)
+HANDLE_DW_CFA(0x02, advance_loc1)
+HANDLE_DW_CFA(0x03, advance_loc2)
+HANDLE_DW_CFA(0x04, advance_loc4)
+HANDLE_DW_CFA(0x05, offset_extended)
+HANDLE_DW_CFA(0x06, restore_extended)
+HANDLE_DW_CFA(0x07, undefined)
+HANDLE_DW_CFA(0x08, same_value)
+HANDLE_DW_CFA(0x09, register)
+HANDLE_DW_CFA(0x0a, remember_state)
+HANDLE_DW_CFA(0x0b, restore_state)
+HANDLE_DW_CFA(0x0c, def_cfa)
+HANDLE_DW_CFA(0x0d, def_cfa_register)
+HANDLE_DW_CFA(0x0e, def_cfa_offset)
+HANDLE_DW_CFA(0x0f, def_cfa_expression)
+HANDLE_DW_CFA(0x10, expression)
+HANDLE_DW_CFA(0x11, offset_extended_sf)
+HANDLE_DW_CFA(0x12, def_cfa_sf)
+HANDLE_DW_CFA(0x13, def_cfa_offset_sf)
+HANDLE_DW_CFA(0x14, val_offset)
+HANDLE_DW_CFA(0x15, val_offset_sf)
+HANDLE_DW_CFA(0x16, val_expression)
+HANDLE_DW_CFA(0x1d, MIPS_advance_loc8)
+HANDLE_DW_CFA(0x2d, GNU_window_save)
+HANDLE_DW_CFA(0x2e, GNU_args_size)
+
+// Apple Objective-C Property Attributes.
+// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind!
+HANDLE_DW_APPLE_PROPERTY(0x01, readonly)
+HANDLE_DW_APPLE_PROPERTY(0x02, getter)
+HANDLE_DW_APPLE_PROPERTY(0x04, assign)
+HANDLE_DW_APPLE_PROPERTY(0x08, readwrite)
+HANDLE_DW_APPLE_PROPERTY(0x10, retain)
+HANDLE_DW_APPLE_PROPERTY(0x20, copy)
+HANDLE_DW_APPLE_PROPERTY(0x40, nonatomic)
+HANDLE_DW_APPLE_PROPERTY(0x80, setter)
+HANDLE_DW_APPLE_PROPERTY(0x100, atomic)
+HANDLE_DW_APPLE_PROPERTY(0x200, weak)
+HANDLE_DW_APPLE_PROPERTY(0x400, strong)
+HANDLE_DW_APPLE_PROPERTY(0x800, unsafe_unretained)
+HANDLE_DW_APPLE_PROPERTY(0x1000, nullability)
+HANDLE_DW_APPLE_PROPERTY(0x2000, null_resettable)
+HANDLE_DW_APPLE_PROPERTY(0x4000, class)
+
+
 #undef HANDLE_DW_TAG
+#undef HANDLE_DW_AT
+#undef HANDLE_DW_FORM
 #undef HANDLE_DW_OP
 #undef HANDLE_DW_LANG
 #undef HANDLE_DW_ATE
 #undef HANDLE_DW_VIRTUALITY
+#undef HANDLE_DW_DEFAULTED
 #undef HANDLE_DW_CC
+#undef HANDLE_DW_LNS
+#undef HANDLE_DW_LNE
+#undef HANDLE_DW_LNCT
+#undef HANDLE_DW_MACRO
+#undef HANDLE_DW_RLE
+#undef HANDLE_DW_CFA
+#undef HANDLE_DW_APPLE_PROPERTY
diff --git a/contrib/llvm/include/llvm/Support/Dwarf.h b/contrib/llvm/include/llvm/Support/Dwarf.h
index 86b19676c345..1a984037da09 100644
--- a/contrib/llvm/include/llvm/Support/Dwarf.h
+++ b/contrib/llvm/include/llvm/Support/Dwarf.h
@@ -90,241 +90,26 @@ inline bool isType(Tag T) {
   }
 }
 
+/// Attributes.
 enum Attribute : uint16_t {
-  // Attributes
-  DW_AT_sibling = 0x01,
-  DW_AT_location = 0x02,
-  DW_AT_name = 0x03,
-  DW_AT_ordering = 0x09,
-  DW_AT_byte_size = 0x0b,
-  DW_AT_bit_offset = 0x0c,
-  DW_AT_bit_size = 0x0d,
-  DW_AT_stmt_list = 0x10,
-  DW_AT_low_pc = 0x11,
-  DW_AT_high_pc = 0x12,
-  DW_AT_language = 0x13,
-  DW_AT_discr = 0x15,
-  DW_AT_discr_value = 0x16,
-  DW_AT_visibility = 0x17,
-  DW_AT_import = 0x18,
-  DW_AT_string_length = 0x19,
-  DW_AT_common_reference = 0x1a,
-  DW_AT_comp_dir = 0x1b,
-  DW_AT_const_value = 0x1c,
-  DW_AT_containing_type = 0x1d,
-  DW_AT_default_value = 0x1e,
-  DW_AT_inline = 0x20,
-  DW_AT_is_optional = 0x21,
-  DW_AT_lower_bound = 0x22,
-  DW_AT_producer = 0x25,
-  DW_AT_prototyped = 0x27,
-  DW_AT_return_addr = 0x2a,
-  DW_AT_start_scope = 0x2c,
-  DW_AT_bit_stride = 0x2e,
-  DW_AT_upper_bound = 0x2f,
-  DW_AT_abstract_origin = 0x31,
-  DW_AT_accessibility = 0x32,
-  DW_AT_address_class = 0x33,
-  DW_AT_artificial = 0x34,
-  DW_AT_base_types = 0x35,
-  DW_AT_calling_convention = 0x36,
-  DW_AT_count = 0x37,
-  DW_AT_data_member_location = 0x38,
-  DW_AT_decl_column = 0x39,
-  DW_AT_decl_file = 0x3a,
-  DW_AT_decl_line = 0x3b,
-  DW_AT_declaration = 0x3c,
-  DW_AT_discr_list = 0x3d,
-  DW_AT_encoding = 0x3e,
-  DW_AT_external = 0x3f,
-  DW_AT_frame_base = 0x40,
-  DW_AT_friend = 0x41,
-  DW_AT_identifier_case = 0x42,
-  DW_AT_macro_info = 0x43,
-  DW_AT_namelist_item = 0x44,
-  DW_AT_priority = 0x45,
-  DW_AT_segment = 0x46,
-  DW_AT_specification = 0x47,
-  DW_AT_static_link = 0x48,
-  DW_AT_type = 0x49,
-  DW_AT_use_location = 0x4a,
-  DW_AT_variable_parameter = 0x4b,
-  DW_AT_virtuality = 0x4c,
-  DW_AT_vtable_elem_location = 0x4d,
-  DW_AT_allocated = 0x4e,
-  DW_AT_associated = 0x4f,
-  DW_AT_data_location = 0x50,
-  DW_AT_byte_stride = 0x51,
-  DW_AT_entry_pc = 0x52,
-  DW_AT_use_UTF8 = 0x53,
-  DW_AT_extension = 0x54,
-  DW_AT_ranges = 0x55,
-  DW_AT_trampoline = 0x56,
-  DW_AT_call_column = 0x57,
-  DW_AT_call_file = 0x58,
-  DW_AT_call_line = 0x59,
-  DW_AT_description = 0x5a,
-  DW_AT_binary_scale = 0x5b,
-  DW_AT_decimal_scale = 0x5c,
-  DW_AT_small = 0x5d,
-  DW_AT_decimal_sign = 0x5e,
-  DW_AT_digit_count = 0x5f,
-  DW_AT_picture_string = 0x60,
-  DW_AT_mutable = 0x61,
-  DW_AT_threads_scaled = 0x62,
-  DW_AT_explicit = 0x63,
-  DW_AT_object_pointer = 0x64,
-  DW_AT_endianity = 0x65,
-  DW_AT_elemental = 0x66,
-  DW_AT_pure = 0x67,
-  DW_AT_recursive = 0x68,
-  DW_AT_signature = 0x69,
-  DW_AT_main_subprogram = 0x6a,
-  DW_AT_data_bit_offset = 0x6b,
-  DW_AT_const_expr = 0x6c,
-  DW_AT_enum_class = 0x6d,
-  DW_AT_linkage_name = 0x6e,
-
-  // New in DWARF 5:
-  DW_AT_string_length_bit_size = 0x6f,
-  DW_AT_string_length_byte_size = 0x70,
-  DW_AT_rank = 0x71,
-  DW_AT_str_offsets_base = 0x72,
-  DW_AT_addr_base = 0x73,
-  DW_AT_ranges_base = 0x74,
-  DW_AT_dwo_id = 0x75,
-  DW_AT_dwo_name = 0x76,
-  DW_AT_reference = 0x77,
-  DW_AT_rvalue_reference = 0x78,
-  DW_AT_macros = 0x79,
-
+#define HANDLE_DW_AT(ID, NAME) DW_AT_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
   DW_AT_lo_user = 0x2000,
   DW_AT_hi_user = 0x3fff,
-
-  DW_AT_MIPS_loop_begin = 0x2002,
-  DW_AT_MIPS_tail_loop_begin = 0x2003,
-  DW_AT_MIPS_epilog_begin = 0x2004,
-  DW_AT_MIPS_loop_unroll_factor = 0x2005,
-  DW_AT_MIPS_software_pipeline_depth = 0x2006,
-  DW_AT_MIPS_linkage_name = 0x2007,
-  DW_AT_MIPS_stride = 0x2008,
-  DW_AT_MIPS_abstract_name = 0x2009,
-  DW_AT_MIPS_clone_origin = 0x200a,
-  DW_AT_MIPS_has_inlines = 0x200b,
-  DW_AT_MIPS_stride_byte = 0x200c,
-  DW_AT_MIPS_stride_elem = 0x200d,
-  DW_AT_MIPS_ptr_dopetype = 0x200e,
-  DW_AT_MIPS_allocatable_dopetype = 0x200f,
-  DW_AT_MIPS_assumed_shape_dopetype = 0x2010,
-
-  // This one appears to have only been implemented by Open64 for
-  // fortran and may conflict with other extensions.
-  DW_AT_MIPS_assumed_size = 0x2011,
-
-  // GNU extensions
-  DW_AT_sf_names = 0x2101,
-  DW_AT_src_info = 0x2102,
-  DW_AT_mac_info = 0x2103,
-  DW_AT_src_coords = 0x2104,
-  DW_AT_body_begin = 0x2105,
-  DW_AT_body_end = 0x2106,
-  DW_AT_GNU_vector = 0x2107,
-  DW_AT_GNU_template_name = 0x2110,
-
-  DW_AT_GNU_odr_signature = 0x210f,
-  DW_AT_GNU_macros = 0x2119,
-
-  // Extensions for Fission proposal.
-  DW_AT_GNU_dwo_name = 0x2130,
-  DW_AT_GNU_dwo_id = 0x2131,
-  DW_AT_GNU_ranges_base = 0x2132,
-  DW_AT_GNU_addr_base = 0x2133,
-  DW_AT_GNU_pubnames = 0x2134,
-  DW_AT_GNU_pubtypes = 0x2135,
-  DW_AT_GNU_discriminator = 0x2136,
-
-  // Borland extensions.
-  DW_AT_BORLAND_property_read = 0x3b11,
-  DW_AT_BORLAND_property_write = 0x3b12,
-  DW_AT_BORLAND_property_implements = 0x3b13,
-  DW_AT_BORLAND_property_index = 0x3b14,
-  DW_AT_BORLAND_property_default = 0x3b15,
-  DW_AT_BORLAND_Delphi_unit = 0x3b20,
-  DW_AT_BORLAND_Delphi_class = 0x3b21,
-  DW_AT_BORLAND_Delphi_record = 0x3b22,
-  DW_AT_BORLAND_Delphi_metaclass = 0x3b23,
-  DW_AT_BORLAND_Delphi_constructor = 0x3b24,
-  DW_AT_BORLAND_Delphi_destructor = 0x3b25,
-  DW_AT_BORLAND_Delphi_anonymous_method = 0x3b26,
-  DW_AT_BORLAND_Delphi_interface = 0x3b27,
-  DW_AT_BORLAND_Delphi_ABI = 0x3b28,
-  DW_AT_BORLAND_Delphi_return = 0x3b29,
-  DW_AT_BORLAND_Delphi_frameptr = 0x3b30,
-  DW_AT_BORLAND_closure = 0x3b31,
-
-  // LLVM project extensions.
-  DW_AT_LLVM_include_path = 0x3e00,
-  DW_AT_LLVM_config_macros = 0x3e01,
-  DW_AT_LLVM_isysroot = 0x3e02,
-
-  // Apple extensions.
-  DW_AT_APPLE_optimized = 0x3fe1,
-  DW_AT_APPLE_flags = 0x3fe2,
-  DW_AT_APPLE_isa = 0x3fe3,
-  DW_AT_APPLE_block = 0x3fe4,
-  DW_AT_APPLE_major_runtime_vers = 0x3fe5,
-  DW_AT_APPLE_runtime_class = 0x3fe6,
-  DW_AT_APPLE_omit_frame_ptr = 0x3fe7,
-  DW_AT_APPLE_property_name = 0x3fe8,
-  DW_AT_APPLE_property_getter = 0x3fe9,
-  DW_AT_APPLE_property_setter = 0x3fea,
-  DW_AT_APPLE_property_attribute = 0x3feb,
-  DW_AT_APPLE_objc_complete_type = 0x3fec,
-  DW_AT_APPLE_property = 0x3fed
 };
 
 enum Form : uint16_t {
-  // Attribute form encodings
-  DW_FORM_addr = 0x01,
-  DW_FORM_block2 = 0x03,
-  DW_FORM_block4 = 0x04,
-  DW_FORM_data2 = 0x05,
-  DW_FORM_data4 = 0x06,
-  DW_FORM_data8 = 0x07,
-  DW_FORM_string = 0x08,
-  DW_FORM_block = 0x09,
-  DW_FORM_block1 = 0x0a,
-  DW_FORM_data1 = 0x0b,
-  DW_FORM_flag = 0x0c,
-  DW_FORM_sdata = 0x0d,
-  DW_FORM_strp = 0x0e,
-  DW_FORM_udata = 0x0f,
-  DW_FORM_ref_addr = 0x10,
-  DW_FORM_ref1 = 0x11,
-  DW_FORM_ref2 = 0x12,
-  DW_FORM_ref4 = 0x13,
-  DW_FORM_ref8 = 0x14,
-  DW_FORM_ref_udata = 0x15,
-  DW_FORM_indirect = 0x16,
-  DW_FORM_sec_offset = 0x17,
-  DW_FORM_exprloc = 0x18,
-  DW_FORM_flag_present = 0x19,
-  DW_FORM_ref_sig8 = 0x20,
-
-  // Extensions for Fission proposal
-  DW_FORM_GNU_addr_index = 0x1f01,
-  DW_FORM_GNU_str_index = 0x1f02,
-
-  // Alternate debug sections proposal (output of "dwz" tool).
-  DW_FORM_GNU_ref_alt = 0x1f20,
-  DW_FORM_GNU_strp_alt = 0x1f21
+#define HANDLE_DW_FORM(ID, NAME) DW_FORM_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
+ DW_FORM_lo_user = 0x1f00, ///< Not specified by DWARF.
 };
 
 enum LocationAtom {
 #define HANDLE_DW_OP(ID, NAME) DW_OP_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
   DW_OP_lo_user = 0xe0,
-  DW_OP_hi_user = 0xff
+  DW_OP_hi_user = 0xff,
+  DW_OP_LLVM_fragment = 0x1000 ///< Only used in LLVM metadata.
 };
 
 enum TypeKind {
@@ -372,6 +157,12 @@ enum VirtualityAttribute {
   DW_VIRTUALITY_max = 0x02
 };
 
+enum DefaultedMemberAttribute {
+#define HANDLE_DW_DEFAULTED(ID, NAME) DW_DEFAULTED_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
+  DW_DEFAULTED_max = 0x02
+};
+
 enum SourceLanguage {
 #define HANDLE_DW_LANG(ID, NAME) DW_LANG_##NAME = ID,
 #include "llvm/Support/Dwarf.def"
@@ -415,33 +206,27 @@ enum DiscriminantList {
   DW_DSC_range = 0x01
 };
 
+/// Line Number Standard Opcode Encodings.
 enum LineNumberOps {
-  // Line Number Standard Opcode Encodings
-  DW_LNS_extended_op = 0x00,
-  DW_LNS_copy = 0x01,
-  DW_LNS_advance_pc = 0x02,
-  DW_LNS_advance_line = 0x03,
-  DW_LNS_set_file = 0x04,
-  DW_LNS_set_column = 0x05,
-  DW_LNS_negate_stmt = 0x06,
-  DW_LNS_set_basic_block = 0x07,
-  DW_LNS_const_add_pc = 0x08,
-  DW_LNS_fixed_advance_pc = 0x09,
-  DW_LNS_set_prologue_end = 0x0a,
-  DW_LNS_set_epilogue_begin = 0x0b,
-  DW_LNS_set_isa = 0x0c
+#define HANDLE_DW_LNS(ID, NAME) DW_LNS_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
 };
 
+/// Line Number Extended Opcode Encodings.
 enum LineNumberExtendedOps {
-  // Line Number Extended Opcode Encodings
-  DW_LNE_end_sequence = 0x01,
-  DW_LNE_set_address = 0x02,
-  DW_LNE_define_file = 0x03,
-  DW_LNE_set_discriminator = 0x04,
+#define HANDLE_DW_LNE(ID, NAME) DW_LNE_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
   DW_LNE_lo_user = 0x80,
   DW_LNE_hi_user = 0xff
 };
 
+enum LinerNumberEntryFormat {
+#define HANDLE_DW_LNCT(ID, NAME) DW_DEFAULTED_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
+  DW_LNCT_lo_user = 0x2000,
+  DW_LNCT_hi_user = 0x3fff,
+};
+
 enum MacinfoRecordType {
   // Macinfo Type Encodings
   DW_MACINFO_define = 0x01,
@@ -451,56 +236,27 @@ enum MacinfoRecordType {
   DW_MACINFO_vendor_ext = 0xff
 };
 
+/// DWARF v5 macro information entry type encodings.
 enum MacroEntryType {
-  // Macro Information Entry Type Encodings
-  DW_MACRO_define = 0x01,
-  DW_MACRO_undef = 0x02,
-  DW_MACRO_start_file = 0x03,
-  DW_MACRO_end_file = 0x04,
-  DW_MACRO_define_indirect = 0x05,
-  DW_MACRO_undef_indirect = 0x06,
-  DW_MACRO_transparent_include = 0x07,
-  DW_MACRO_define_indirect_sup = 0x08,
-  DW_MACRO_undef_indirect_sup = 0x09,
-  DW_MACRO_transparent_include_sup = 0x0a,
-  DW_MACRO_define_indirectx = 0x0b,
-  DW_MACRO_undef_indirectx = 0x0c,
+#define HANDLE_DW_MACRO(ID, NAME) DW_MACRO_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
   DW_MACRO_lo_user = 0xe0,
   DW_MACRO_hi_user = 0xff
 };
 
+/// DWARF v5 range list entry encoding values.
+enum RangeListEntries {
+#define HANDLE_DW_RLE(ID, NAME) DW_RLE_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
+};
+
+
+/// Call frame instruction encodings.
 enum CallFrameInfo {
-  // Call frame instruction encodings
+#define HANDLE_DW_CFA(ID, NAME) DW_CFA_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
   DW_CFA_extended = 0x00,
-  DW_CFA_nop = 0x00,
-  DW_CFA_advance_loc = 0x40,
-  DW_CFA_offset = 0x80,
-  DW_CFA_restore = 0xc0,
-  DW_CFA_set_loc = 0x01,
-  DW_CFA_advance_loc1 = 0x02,
-  DW_CFA_advance_loc2 = 0x03,
-  DW_CFA_advance_loc4 = 0x04,
-  DW_CFA_offset_extended = 0x05,
-  DW_CFA_restore_extended = 0x06,
-  DW_CFA_undefined = 0x07,
-  DW_CFA_same_value = 0x08,
-  DW_CFA_register = 0x09,
-  DW_CFA_remember_state = 0x0a,
-  DW_CFA_restore_state = 0x0b,
-  DW_CFA_def_cfa = 0x0c,
-  DW_CFA_def_cfa_register = 0x0d,
-  DW_CFA_def_cfa_offset = 0x0e,
-  DW_CFA_def_cfa_expression = 0x0f,
-  DW_CFA_expression = 0x10,
-  DW_CFA_offset_extended_sf = 0x11,
-  DW_CFA_def_cfa_sf = 0x12,
-  DW_CFA_def_cfa_offset_sf = 0x13,
-  DW_CFA_val_offset = 0x14,
-  DW_CFA_val_offset_sf = 0x15,
-  DW_CFA_val_expression = 0x16,
-  DW_CFA_MIPS_advance_loc8 = 0x1d,
-  DW_CFA_GNU_window_save = 0x2d,
-  DW_CFA_GNU_args_size = 0x2e,
+
   DW_CFA_lo_user = 0x1c,
   DW_CFA_hi_user = 0x3f
 };
@@ -529,34 +285,24 @@ enum Constants {
   DW_EH_PE_indirect = 0x80
 };
 
-// Constants for debug_loc.dwo in the DWARF5 Split Debug Info Proposal
+/// Constants for location lists in DWARF v5.
 enum LocationListEntry : unsigned char {
-  DW_LLE_end_of_list_entry,
-  DW_LLE_base_address_selection_entry,
-  DW_LLE_start_end_entry,
-  DW_LLE_start_length_entry,
-  DW_LLE_offset_pair_entry
+  DW_LLE_end_of_list = 0x00,
+  DW_LLE_base_addressx = 0x01,
+  DW_LLE_startx_endx = 0x02,
+  DW_LLE_startx_length = 0x03,
+  DW_LLE_offset_pair = 0x04,
+  DW_LLE_default_location = 0x05,
+  DW_LLE_base_address = 0x06,
+  DW_LLE_start_end = 0x07,
+  DW_LLE_start_length = 0x08
 };
 
 /// Constants for the DW_APPLE_PROPERTY_attributes attribute.
-/// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind.
+/// Keep this list in sync with clang's DeclSpec.h ObjCPropertyAttributeKind!
 enum ApplePropertyAttributes {
-  // Apple Objective-C Property Attributes
-  DW_APPLE_PROPERTY_readonly = 0x01,
-  DW_APPLE_PROPERTY_getter = 0x02,
-  DW_APPLE_PROPERTY_assign = 0x04,
-  DW_APPLE_PROPERTY_readwrite = 0x08,
-  DW_APPLE_PROPERTY_retain = 0x10,
-  DW_APPLE_PROPERTY_copy = 0x20,
-  DW_APPLE_PROPERTY_nonatomic = 0x40,
-  DW_APPLE_PROPERTY_setter = 0x80,
-  DW_APPLE_PROPERTY_atomic = 0x100,
-  DW_APPLE_PROPERTY_weak =   0x200,
-  DW_APPLE_PROPERTY_strong = 0x400,
-  DW_APPLE_PROPERTY_unsafe_unretained = 0x800,
-  DW_APPLE_PROPERTY_nullability = 0x1000,
-  DW_APPLE_PROPERTY_null_resettable = 0x2000,
-  DW_APPLE_PROPERTY_class = 0x4000
+#define HANDLE_DW_APPLE_PROPERTY(ID, NAME) DW_APPLE_PROPERTY_##NAME = ID,
+#include "llvm/Support/Dwarf.def"
 };
 
 // Constants for the DWARF5 Accelerator Table Proposal
@@ -605,31 +351,31 @@ enum GDBIndexEntryLinkage {
 /// known.
 ///
 /// @{
-const char *TagString(unsigned Tag);
-const char *ChildrenString(unsigned Children);
-const char *AttributeString(unsigned Attribute);
-const char *FormEncodingString(unsigned Encoding);
-const char *OperationEncodingString(unsigned Encoding);
-const char *AttributeEncodingString(unsigned Encoding);
-const char *DecimalSignString(unsigned Sign);
-const char *EndianityString(unsigned Endian);
-const char *AccessibilityString(unsigned Access);
-const char *VisibilityString(unsigned Visibility);
-const char *VirtualityString(unsigned Virtuality);
-const char *LanguageString(unsigned Language);
-const char *CaseString(unsigned Case);
-const char *ConventionString(unsigned Convention);
-const char *InlineCodeString(unsigned Code);
-const char *ArrayOrderString(unsigned Order);
-const char *DiscriminantString(unsigned Discriminant);
-const char *LNStandardString(unsigned Standard);
-const char *LNExtendedString(unsigned Encoding);
-const char *MacinfoString(unsigned Encoding);
-const char *CallFrameString(unsigned Encoding);
-const char *ApplePropertyString(unsigned);
-const char *AtomTypeString(unsigned Atom);
-const char *GDBIndexEntryKindString(GDBIndexEntryKind Kind);
-const char *GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage);
+StringRef TagString(unsigned Tag);
+StringRef ChildrenString(unsigned Children);
+StringRef AttributeString(unsigned Attribute);
+StringRef FormEncodingString(unsigned Encoding);
+StringRef OperationEncodingString(unsigned Encoding);
+StringRef AttributeEncodingString(unsigned Encoding);
+StringRef DecimalSignString(unsigned Sign);
+StringRef EndianityString(unsigned Endian);
+StringRef AccessibilityString(unsigned Access);
+StringRef VisibilityString(unsigned Visibility);
+StringRef VirtualityString(unsigned Virtuality);
+StringRef LanguageString(unsigned Language);
+StringRef CaseString(unsigned Case);
+StringRef ConventionString(unsigned Convention);
+StringRef InlineCodeString(unsigned Code);
+StringRef ArrayOrderString(unsigned Order);
+StringRef DiscriminantString(unsigned Discriminant);
+StringRef LNStandardString(unsigned Standard);
+StringRef LNExtendedString(unsigned Encoding);
+StringRef MacinfoString(unsigned Encoding);
+StringRef CallFrameString(unsigned Encoding);
+StringRef ApplePropertyString(unsigned);
+StringRef AtomTypeString(unsigned Atom);
+StringRef GDBIndexEntryKindString(GDBIndexEntryKind Kind);
+StringRef GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage);
 /// @}
 
 /// \defgroup DwarfConstantsParsing Dwarf constants parsing functions
@@ -653,7 +399,7 @@ unsigned getMacinfo(StringRef MacinfoString);
 
 /// \brief Returns the symbolic string representing Val when used as a value
 /// for attribute Attr.
-const char *AttributeValueString(uint16_t Attr, unsigned Val);
+StringRef AttributeValueString(uint16_t Attr, unsigned Val);
 
 /// \brief Decsribes an entry of the various gnu_pub* debug sections.
 ///
@@ -677,7 +423,9 @@ struct PubIndexEntryDescriptor {
                                             KIND_OFFSET)),
         Linkage(static_cast<GDBIndexEntryLinkage>((Value & LINKAGE_MASK) >>
                                                   LINKAGE_OFFSET)) {}
-  uint8_t toBits() { return Kind << KIND_OFFSET | Linkage << LINKAGE_OFFSET; }
+  uint8_t toBits() const {
+    return Kind << KIND_OFFSET | Linkage << LINKAGE_OFFSET;
+  }
 
 private:
   enum {
@@ -688,6 +436,9 @@ private:
   };
 };
 
+/// Constants that define the DWARF format as 32 or 64 bit.
+enum DwarfFormat { DWARF32, DWARF64 };
+
 } // End of namespace dwarf
 
 } // End of namespace llvm
diff --git a/contrib/llvm/include/llvm/Support/ELF.h b/contrib/llvm/include/llvm/Support/ELF.h
index 70b9daab8360..3ea4da81ad94 100644
--- a/contrib/llvm/include/llvm/Support/ELF.h
+++ b/contrib/llvm/include/llvm/Support/ELF.h
@@ -32,49 +32,49 @@ typedef uint32_t Elf32_Addr; // Program address
 typedef uint32_t Elf32_Off;  // File offset
 typedef uint16_t Elf32_Half;
 typedef uint32_t Elf32_Word;
-typedef int32_t  Elf32_Sword;
+typedef int32_t Elf32_Sword;
 
 typedef uint64_t Elf64_Addr;
 typedef uint64_t Elf64_Off;
 typedef uint16_t Elf64_Half;
 typedef uint32_t Elf64_Word;
-typedef int32_t  Elf64_Sword;
+typedef int32_t Elf64_Sword;
 typedef uint64_t Elf64_Xword;
-typedef int64_t  Elf64_Sxword;
+typedef int64_t Elf64_Sxword;
 
 // Object file magic string.
-static const char ElfMagic[] = { 0x7f, 'E', 'L', 'F', '\0' };
+static const char ElfMagic[] = {0x7f, 'E', 'L', 'F', '\0'};
 
 // e_ident size and indices.
 enum {
-  EI_MAG0       = 0,          // File identification index.
-  EI_MAG1       = 1,          // File identification index.
-  EI_MAG2       = 2,          // File identification index.
-  EI_MAG3       = 3,          // File identification index.
-  EI_CLASS      = 4,          // File class.
-  EI_DATA       = 5,          // Data encoding.
-  EI_VERSION    = 6,          // File version.
-  EI_OSABI      = 7,          // OS/ABI identification.
-  EI_ABIVERSION = 8,          // ABI version.
-  EI_PAD        = 9,          // Start of padding bytes.
-  EI_NIDENT     = 16          // Number of bytes in e_ident.
+  EI_MAG0 = 0,       // File identification index.
+  EI_MAG1 = 1,       // File identification index.
+  EI_MAG2 = 2,       // File identification index.
+  EI_MAG3 = 3,       // File identification index.
+  EI_CLASS = 4,      // File class.
+  EI_DATA = 5,       // Data encoding.
+  EI_VERSION = 6,    // File version.
+  EI_OSABI = 7,      // OS/ABI identification.
+  EI_ABIVERSION = 8, // ABI version.
+  EI_PAD = 9,        // Start of padding bytes.
+  EI_NIDENT = 16     // Number of bytes in e_ident.
 };
 
 struct Elf32_Ehdr {
   unsigned char e_ident[EI_NIDENT]; // ELF Identification bytes
-  Elf32_Half    e_type;      // Type of file (see ET_* below)
-  Elf32_Half    e_machine;   // Required architecture for this file (see EM_*)
-  Elf32_Word    e_version;   // Must be equal to 1
-  Elf32_Addr    e_entry;     // Address to jump to in order to start program
-  Elf32_Off     e_phoff;     // Program header table's file offset, in bytes
-  Elf32_Off     e_shoff;     // Section header table's file offset, in bytes
-  Elf32_Word    e_flags;     // Processor-specific flags
-  Elf32_Half    e_ehsize;    // Size of ELF header, in bytes
-  Elf32_Half    e_phentsize; // Size of an entry in the program header table
-  Elf32_Half    e_phnum;     // Number of entries in the program header table
-  Elf32_Half    e_shentsize; // Size of an entry in the section header table
-  Elf32_Half    e_shnum;     // Number of entries in the section header table
-  Elf32_Half    e_shstrndx;  // Sect hdr table index of sect name string table
+  Elf32_Half e_type;                // Type of file (see ET_* below)
+  Elf32_Half e_machine;   // Required architecture for this file (see EM_*)
+  Elf32_Word e_version;   // Must be equal to 1
+  Elf32_Addr e_entry;     // Address to jump to in order to start program
+  Elf32_Off e_phoff;      // Program header table's file offset, in bytes
+  Elf32_Off e_shoff;      // Section header table's file offset, in bytes
+  Elf32_Word e_flags;     // Processor-specific flags
+  Elf32_Half e_ehsize;    // Size of ELF header, in bytes
+  Elf32_Half e_phentsize; // Size of an entry in the program header table
+  Elf32_Half e_phnum;     // Number of entries in the program header table
+  Elf32_Half e_shentsize; // Size of an entry in the section header table
+  Elf32_Half e_shnum;     // Number of entries in the section header table
+  Elf32_Half e_shstrndx;  // Sect hdr table index of sect name string table
   bool checkMagic() const {
     return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0;
   }
@@ -86,19 +86,19 @@ struct Elf32_Ehdr {
 // types (see above).
 struct Elf64_Ehdr {
   unsigned char e_ident[EI_NIDENT];
-  Elf64_Half    e_type;
-  Elf64_Half    e_machine;
-  Elf64_Word    e_version;
-  Elf64_Addr    e_entry;
-  Elf64_Off     e_phoff;
-  Elf64_Off     e_shoff;
-  Elf64_Word    e_flags;
-  Elf64_Half    e_ehsize;
-  Elf64_Half    e_phentsize;
-  Elf64_Half    e_phnum;
-  Elf64_Half    e_shentsize;
-  Elf64_Half    e_shnum;
-  Elf64_Half    e_shstrndx;
+  Elf64_Half e_type;
+  Elf64_Half e_machine;
+  Elf64_Word e_version;
+  Elf64_Addr e_entry;
+  Elf64_Off e_phoff;
+  Elf64_Off e_shoff;
+  Elf64_Word e_flags;
+  Elf64_Half e_ehsize;
+  Elf64_Half e_phentsize;
+  Elf64_Half e_phnum;
+  Elf64_Half e_shentsize;
+  Elf64_Half e_shnum;
+  Elf64_Half e_shstrndx;
   bool checkMagic() const {
     return (memcmp(e_ident, ElfMagic, strlen(ElfMagic))) == 0;
   }
@@ -108,215 +108,213 @@ struct Elf64_Ehdr {
 
 // File types
 enum {
-  ET_NONE   = 0,      // No file type
-  ET_REL    = 1,      // Relocatable file
-  ET_EXEC   = 2,      // Executable file
-  ET_DYN    = 3,      // Shared object file
-  ET_CORE   = 4,      // Core file
+  ET_NONE = 0,        // No file type
+  ET_REL = 1,         // Relocatable file
+  ET_EXEC = 2,        // Executable file
+  ET_DYN = 3,         // Shared object file
+  ET_CORE = 4,        // Core file
   ET_LOPROC = 0xff00, // Beginning of processor-specific codes
   ET_HIPROC = 0xffff  // Processor-specific
 };
 
 // Versioning
-enum {
-  EV_NONE = 0,
-  EV_CURRENT = 1
-};
+enum { EV_NONE = 0, EV_CURRENT = 1 };
 
 // Machine architectures
 // See current registered ELF machine architectures at:
 //    http://www.uxsglobal.com/developers/gabi/latest/ch4.eheader.html
 enum {
-  EM_NONE          = 0, // No machine
-  EM_M32           = 1, // AT&T WE 32100
-  EM_SPARC         = 2, // SPARC
-  EM_386           = 3, // Intel 386
-  EM_68K           = 4, // Motorola 68000
-  EM_88K           = 5, // Motorola 88000
-  EM_IAMCU         = 6, // Intel MCU
-  EM_860           = 7, // Intel 80860
-  EM_MIPS          = 8, // MIPS R3000
-  EM_S370          = 9, // IBM System/370
-  EM_MIPS_RS3_LE   = 10, // MIPS RS3000 Little-endian
-  EM_PARISC        = 15, // Hewlett-Packard PA-RISC
-  EM_VPP500        = 17, // Fujitsu VPP500
-  EM_SPARC32PLUS   = 18, // Enhanced instruction set SPARC
-  EM_960           = 19, // Intel 80960
-  EM_PPC           = 20, // PowerPC
-  EM_PPC64         = 21, // PowerPC64
-  EM_S390          = 22, // IBM System/390
-  EM_SPU           = 23, // IBM SPU/SPC
-  EM_V800          = 36, // NEC V800
-  EM_FR20          = 37, // Fujitsu FR20
-  EM_RH32          = 38, // TRW RH-32
-  EM_RCE           = 39, // Motorola RCE
-  EM_ARM           = 40, // ARM
-  EM_ALPHA         = 41, // DEC Alpha
-  EM_SH            = 42, // Hitachi SH
-  EM_SPARCV9       = 43, // SPARC V9
-  EM_TRICORE       = 44, // Siemens TriCore
-  EM_ARC           = 45, // Argonaut RISC Core
-  EM_H8_300        = 46, // Hitachi H8/300
-  EM_H8_300H       = 47, // Hitachi H8/300H
-  EM_H8S           = 48, // Hitachi H8S
-  EM_H8_500        = 49, // Hitachi H8/500
-  EM_IA_64         = 50, // Intel IA-64 processor architecture
-  EM_MIPS_X        = 51, // Stanford MIPS-X
-  EM_COLDFIRE      = 52, // Motorola ColdFire
-  EM_68HC12        = 53, // Motorola M68HC12
-  EM_MMA           = 54, // Fujitsu MMA Multimedia Accelerator
-  EM_PCP           = 55, // Siemens PCP
-  EM_NCPU          = 56, // Sony nCPU embedded RISC processor
-  EM_NDR1          = 57, // Denso NDR1 microprocessor
-  EM_STARCORE      = 58, // Motorola Star*Core processor
-  EM_ME16          = 59, // Toyota ME16 processor
-  EM_ST100         = 60, // STMicroelectronics ST100 processor
-  EM_TINYJ         = 61, // Advanced Logic Corp. TinyJ embedded processor family
-  EM_X86_64        = 62, // AMD x86-64 architecture
-  EM_PDSP          = 63, // Sony DSP Processor
-  EM_PDP10         = 64, // Digital Equipment Corp. PDP-10
-  EM_PDP11         = 65, // Digital Equipment Corp. PDP-11
-  EM_FX66          = 66, // Siemens FX66 microcontroller
-  EM_ST9PLUS       = 67, // STMicroelectronics ST9+ 8/16 bit microcontroller
-  EM_ST7           = 68, // STMicroelectronics ST7 8-bit microcontroller
-  EM_68HC16        = 69, // Motorola MC68HC16 Microcontroller
-  EM_68HC11        = 70, // Motorola MC68HC11 Microcontroller
-  EM_68HC08        = 71, // Motorola MC68HC08 Microcontroller
-  EM_68HC05        = 72, // Motorola MC68HC05 Microcontroller
-  EM_SVX           = 73, // Silicon Graphics SVx
-  EM_ST19          = 74, // STMicroelectronics ST19 8-bit microcontroller
-  EM_VAX           = 75, // Digital VAX
-  EM_CRIS          = 76, // Axis Communications 32-bit embedded processor
-  EM_JAVELIN       = 77, // Infineon Technologies 32-bit embedded processor
-  EM_FIREPATH      = 78, // Element 14 64-bit DSP Processor
-  EM_ZSP           = 79, // LSI Logic 16-bit DSP Processor
-  EM_MMIX          = 80, // Donald Knuth's educational 64-bit processor
-  EM_HUANY         = 81, // Harvard University machine-independent object files
-  EM_PRISM         = 82, // SiTera Prism
-  EM_AVR           = 83, // Atmel AVR 8-bit microcontroller
-  EM_FR30          = 84, // Fujitsu FR30
-  EM_D10V          = 85, // Mitsubishi D10V
-  EM_D30V          = 86, // Mitsubishi D30V
-  EM_V850          = 87, // NEC v850
-  EM_M32R          = 88, // Mitsubishi M32R
-  EM_MN10300       = 89, // Matsushita MN10300
-  EM_MN10200       = 90, // Matsushita MN10200
-  EM_PJ            = 91, // picoJava
-  EM_OPENRISC      = 92, // OpenRISC 32-bit embedded processor
-  EM_ARC_COMPACT   = 93, // ARC International ARCompact processor (old
+  EM_NONE = 0,           // No machine
+  EM_M32 = 1,            // AT&T WE 32100
+  EM_SPARC = 2,          // SPARC
+  EM_386 = 3,            // Intel 386
+  EM_68K = 4,            // Motorola 68000
+  EM_88K = 5,            // Motorola 88000
+  EM_IAMCU = 6,          // Intel MCU
+  EM_860 = 7,            // Intel 80860
+  EM_MIPS = 8,           // MIPS R3000
+  EM_S370 = 9,           // IBM System/370
+  EM_MIPS_RS3_LE = 10,   // MIPS RS3000 Little-endian
+  EM_PARISC = 15,        // Hewlett-Packard PA-RISC
+  EM_VPP500 = 17,        // Fujitsu VPP500
+  EM_SPARC32PLUS = 18,   // Enhanced instruction set SPARC
+  EM_960 = 19,           // Intel 80960
+  EM_PPC = 20,           // PowerPC
+  EM_PPC64 = 21,         // PowerPC64
+  EM_S390 = 22,          // IBM System/390
+  EM_SPU = 23,           // IBM SPU/SPC
+  EM_V800 = 36,          // NEC V800
+  EM_FR20 = 37,          // Fujitsu FR20
+  EM_RH32 = 38,          // TRW RH-32
+  EM_RCE = 39,           // Motorola RCE
+  EM_ARM = 40,           // ARM
+  EM_ALPHA = 41,         // DEC Alpha
+  EM_SH = 42,            // Hitachi SH
+  EM_SPARCV9 = 43,       // SPARC V9
+  EM_TRICORE = 44,       // Siemens TriCore
+  EM_ARC = 45,           // Argonaut RISC Core
+  EM_H8_300 = 46,        // Hitachi H8/300
+  EM_H8_300H = 47,       // Hitachi H8/300H
+  EM_H8S = 48,           // Hitachi H8S
+  EM_H8_500 = 49,        // Hitachi H8/500
+  EM_IA_64 = 50,         // Intel IA-64 processor architecture
+  EM_MIPS_X = 51,        // Stanford MIPS-X
+  EM_COLDFIRE = 52,      // Motorola ColdFire
+  EM_68HC12 = 53,        // Motorola M68HC12
+  EM_MMA = 54,           // Fujitsu MMA Multimedia Accelerator
+  EM_PCP = 55,           // Siemens PCP
+  EM_NCPU = 56,          // Sony nCPU embedded RISC processor
+  EM_NDR1 = 57,          // Denso NDR1 microprocessor
+  EM_STARCORE = 58,      // Motorola Star*Core processor
+  EM_ME16 = 59,          // Toyota ME16 processor
+  EM_ST100 = 60,         // STMicroelectronics ST100 processor
+  EM_TINYJ = 61,         // Advanced Logic Corp. TinyJ embedded processor family
+  EM_X86_64 = 62,        // AMD x86-64 architecture
+  EM_PDSP = 63,          // Sony DSP Processor
+  EM_PDP10 = 64,         // Digital Equipment Corp. PDP-10
+  EM_PDP11 = 65,         // Digital Equipment Corp. PDP-11
+  EM_FX66 = 66,          // Siemens FX66 microcontroller
+  EM_ST9PLUS = 67,       // STMicroelectronics ST9+ 8/16 bit microcontroller
+  EM_ST7 = 68,           // STMicroelectronics ST7 8-bit microcontroller
+  EM_68HC16 = 69,        // Motorola MC68HC16 Microcontroller
+  EM_68HC11 = 70,        // Motorola MC68HC11 Microcontroller
+  EM_68HC08 = 71,        // Motorola MC68HC08 Microcontroller
+  EM_68HC05 = 72,        // Motorola MC68HC05 Microcontroller
+  EM_SVX = 73,           // Silicon Graphics SVx
+  EM_ST19 = 74,          // STMicroelectronics ST19 8-bit microcontroller
+  EM_VAX = 75,           // Digital VAX
+  EM_CRIS = 76,          // Axis Communications 32-bit embedded processor
+  EM_JAVELIN = 77,       // Infineon Technologies 32-bit embedded processor
+  EM_FIREPATH = 78,      // Element 14 64-bit DSP Processor
+  EM_ZSP = 79,           // LSI Logic 16-bit DSP Processor
+  EM_MMIX = 80,          // Donald Knuth's educational 64-bit processor
+  EM_HUANY = 81,         // Harvard University machine-independent object files
+  EM_PRISM = 82,         // SiTera Prism
+  EM_AVR = 83,           // Atmel AVR 8-bit microcontroller
+  EM_FR30 = 84,          // Fujitsu FR30
+  EM_D10V = 85,          // Mitsubishi D10V
+  EM_D30V = 86,          // Mitsubishi D30V
+  EM_V850 = 87,          // NEC v850
+  EM_M32R = 88,          // Mitsubishi M32R
+  EM_MN10300 = 89,       // Matsushita MN10300
+  EM_MN10200 = 90,       // Matsushita MN10200
+  EM_PJ = 91,            // picoJava
+  EM_OPENRISC = 92,      // OpenRISC 32-bit embedded processor
+  EM_ARC_COMPACT = 93,   // ARC International ARCompact processor (old
                          // spelling/synonym: EM_ARC_A5)
-  EM_XTENSA        = 94, // Tensilica Xtensa Architecture
-  EM_VIDEOCORE     = 95, // Alphamosaic VideoCore processor
-  EM_TMM_GPP       = 96, // Thompson Multimedia General Purpose Processor
-  EM_NS32K         = 97, // National Semiconductor 32000 series
-  EM_TPC           = 98, // Tenor Network TPC processor
-  EM_SNP1K         = 99, // Trebia SNP 1000 processor
-  EM_ST200         = 100, // STMicroelectronics (www.st.com) ST200
-  EM_IP2K          = 101, // Ubicom IP2xxx microcontroller family
-  EM_MAX           = 102, // MAX Processor
-  EM_CR            = 103, // National Semiconductor CompactRISC microprocessor
-  EM_F2MC16        = 104, // Fujitsu F2MC16
-  EM_MSP430        = 105, // Texas Instruments embedded microcontroller msp430
-  EM_BLACKFIN      = 106, // Analog Devices Blackfin (DSP) processor
-  EM_SE_C33        = 107, // S1C33 Family of Seiko Epson processors
-  EM_SEP           = 108, // Sharp embedded microprocessor
-  EM_ARCA          = 109, // Arca RISC Microprocessor
-  EM_UNICORE       = 110, // Microprocessor series from PKU-Unity Ltd. and MPRC
-                          // of Peking University
-  EM_EXCESS        = 111, // eXcess: 16/32/64-bit configurable embedded CPU
-  EM_DXP           = 112, // Icera Semiconductor Inc. Deep Execution Processor
-  EM_ALTERA_NIOS2  = 113, // Altera Nios II soft-core processor
-  EM_CRX           = 114, // National Semiconductor CompactRISC CRX
-  EM_XGATE         = 115, // Motorola XGATE embedded processor
-  EM_C166          = 116, // Infineon C16x/XC16x processor
-  EM_M16C          = 117, // Renesas M16C series microprocessors
-  EM_DSPIC30F      = 118, // Microchip Technology dsPIC30F Digital Signal
-                          // Controller
-  EM_CE            = 119, // Freescale Communication Engine RISC core
-  EM_M32C          = 120, // Renesas M32C series microprocessors
-  EM_TSK3000       = 131, // Altium TSK3000 core
-  EM_RS08          = 132, // Freescale RS08 embedded processor
-  EM_SHARC         = 133, // Analog Devices SHARC family of 32-bit DSP
-                          // processors
-  EM_ECOG2         = 134, // Cyan Technology eCOG2 microprocessor
-  EM_SCORE7        = 135, // Sunplus S+core7 RISC processor
-  EM_DSP24         = 136, // New Japan Radio (NJR) 24-bit DSP Processor
-  EM_VIDEOCORE3    = 137, // Broadcom VideoCore III processor
+  EM_XTENSA = 94,        // Tensilica Xtensa Architecture
+  EM_VIDEOCORE = 95,     // Alphamosaic VideoCore processor
+  EM_TMM_GPP = 96,       // Thompson Multimedia General Purpose Processor
+  EM_NS32K = 97,         // National Semiconductor 32000 series
+  EM_TPC = 98,           // Tenor Network TPC processor
+  EM_SNP1K = 99,         // Trebia SNP 1000 processor
+  EM_ST200 = 100,        // STMicroelectronics (www.st.com) ST200
+  EM_IP2K = 101,         // Ubicom IP2xxx microcontroller family
+  EM_MAX = 102,          // MAX Processor
+  EM_CR = 103,           // National Semiconductor CompactRISC microprocessor
+  EM_F2MC16 = 104,       // Fujitsu F2MC16
+  EM_MSP430 = 105,       // Texas Instruments embedded microcontroller msp430
+  EM_BLACKFIN = 106,     // Analog Devices Blackfin (DSP) processor
+  EM_SE_C33 = 107,       // S1C33 Family of Seiko Epson processors
+  EM_SEP = 108,          // Sharp embedded microprocessor
+  EM_ARCA = 109,         // Arca RISC Microprocessor
+  EM_UNICORE = 110,      // Microprocessor series from PKU-Unity Ltd. and MPRC
+                         // of Peking University
+  EM_EXCESS = 111,       // eXcess: 16/32/64-bit configurable embedded CPU
+  EM_DXP = 112,          // Icera Semiconductor Inc. Deep Execution Processor
+  EM_ALTERA_NIOS2 = 113, // Altera Nios II soft-core processor
+  EM_CRX = 114,          // National Semiconductor CompactRISC CRX
+  EM_XGATE = 115,        // Motorola XGATE embedded processor
+  EM_C166 = 116,         // Infineon C16x/XC16x processor
+  EM_M16C = 117,         // Renesas M16C series microprocessors
+  EM_DSPIC30F = 118,     // Microchip Technology dsPIC30F Digital Signal
+                         // Controller
+  EM_CE = 119,           // Freescale Communication Engine RISC core
+  EM_M32C = 120,         // Renesas M32C series microprocessors
+  EM_TSK3000 = 131,      // Altium TSK3000 core
+  EM_RS08 = 132,         // Freescale RS08 embedded processor
+  EM_SHARC = 133,        // Analog Devices SHARC family of 32-bit DSP
+                         // processors
+  EM_ECOG2 = 134,        // Cyan Technology eCOG2 microprocessor
+  EM_SCORE7 = 135,       // Sunplus S+core7 RISC processor
+  EM_DSP24 = 136,        // New Japan Radio (NJR) 24-bit DSP Processor
+  EM_VIDEOCORE3 = 137,   // Broadcom VideoCore III processor
   EM_LATTICEMICO32 = 138, // RISC processor for Lattice FPGA architecture
-  EM_SE_C17        = 139, // Seiko Epson C17 family
-  EM_TI_C6000      = 140, // The Texas Instruments TMS320C6000 DSP family
-  EM_TI_C2000      = 141, // The Texas Instruments TMS320C2000 DSP family
-  EM_TI_C5500      = 142, // The Texas Instruments TMS320C55x DSP family
-  EM_MMDSP_PLUS    = 160, // STMicroelectronics 64bit VLIW Data Signal Processor
-  EM_CYPRESS_M8C   = 161, // Cypress M8C microprocessor
-  EM_R32C          = 162, // Renesas R32C series microprocessors
-  EM_TRIMEDIA      = 163, // NXP Semiconductors TriMedia architecture family
-  EM_HEXAGON       = 164, // Qualcomm Hexagon processor
-  EM_8051          = 165, // Intel 8051 and variants
-  EM_STXP7X        = 166, // STMicroelectronics STxP7x family of configurable
+  EM_SE_C17 = 139,        // Seiko Epson C17 family
+  EM_TI_C6000 = 140,      // The Texas Instruments TMS320C6000 DSP family
+  EM_TI_C2000 = 141,      // The Texas Instruments TMS320C2000 DSP family
+  EM_TI_C5500 = 142,      // The Texas Instruments TMS320C55x DSP family
+  EM_MMDSP_PLUS = 160,    // STMicroelectronics 64bit VLIW Data Signal Processor
+  EM_CYPRESS_M8C = 161,   // Cypress M8C microprocessor
+  EM_R32C = 162,          // Renesas R32C series microprocessors
+  EM_TRIMEDIA = 163,      // NXP Semiconductors TriMedia architecture family
+  EM_HEXAGON = 164,       // Qualcomm Hexagon processor
+  EM_8051 = 165,          // Intel 8051 and variants
+  EM_STXP7X = 166,        // STMicroelectronics STxP7x family of configurable
                           // and extensible RISC processors
-  EM_NDS32         = 167, // Andes Technology compact code size embedded RISC
+  EM_NDS32 = 167,         // Andes Technology compact code size embedded RISC
                           // processor family
-  EM_ECOG1         = 168, // Cyan Technology eCOG1X family
-  EM_ECOG1X        = 168, // Cyan Technology eCOG1X family
-  EM_MAXQ30        = 169, // Dallas Semiconductor MAXQ30 Core Micro-controllers
-  EM_XIMO16        = 170, // New Japan Radio (NJR) 16-bit DSP Processor
-  EM_MANIK         = 171, // M2000 Reconfigurable RISC Microprocessor
-  EM_CRAYNV2       = 172, // Cray Inc. NV2 vector architecture
-  EM_RX            = 173, // Renesas RX family
-  EM_METAG         = 174, // Imagination Technologies META processor
+  EM_ECOG1 = 168,         // Cyan Technology eCOG1X family
+  EM_ECOG1X = 168,        // Cyan Technology eCOG1X family
+  EM_MAXQ30 = 169,        // Dallas Semiconductor MAXQ30 Core Micro-controllers
+  EM_XIMO16 = 170,        // New Japan Radio (NJR) 16-bit DSP Processor
+  EM_MANIK = 171,         // M2000 Reconfigurable RISC Microprocessor
+  EM_CRAYNV2 = 172,       // Cray Inc. NV2 vector architecture
+  EM_RX = 173,            // Renesas RX family
+  EM_METAG = 174,         // Imagination Technologies META processor
                           // architecture
-  EM_MCST_ELBRUS   = 175, // MCST Elbrus general purpose hardware architecture
-  EM_ECOG16        = 176, // Cyan Technology eCOG16 family
-  EM_CR16          = 177, // National Semiconductor CompactRISC CR16 16-bit
+  EM_MCST_ELBRUS = 175,   // MCST Elbrus general purpose hardware architecture
+  EM_ECOG16 = 176,        // Cyan Technology eCOG16 family
+  EM_CR16 = 177,          // National Semiconductor CompactRISC CR16 16-bit
                           // microprocessor
-  EM_ETPU          = 178, // Freescale Extended Time Processing Unit
-  EM_SLE9X         = 179, // Infineon Technologies SLE9X core
-  EM_L10M          = 180, // Intel L10M
-  EM_K10M          = 181, // Intel K10M
-  EM_AARCH64       = 183, // ARM AArch64
-  EM_AVR32         = 185, // Atmel Corporation 32-bit microprocessor family
-  EM_STM8          = 186, // STMicroeletronics STM8 8-bit microcontroller
-  EM_TILE64        = 187, // Tilera TILE64 multicore architecture family
-  EM_TILEPRO       = 188, // Tilera TILEPro multicore architecture family
-  EM_CUDA          = 190, // NVIDIA CUDA architecture
-  EM_TILEGX        = 191, // Tilera TILE-Gx multicore architecture family
-  EM_CLOUDSHIELD   = 192, // CloudShield architecture family
-  EM_COREA_1ST     = 193, // KIPO-KAIST Core-A 1st generation processor family
-  EM_COREA_2ND     = 194, // KIPO-KAIST Core-A 2nd generation processor family
-  EM_ARC_COMPACT2  = 195, // Synopsys ARCompact V2
-  EM_OPEN8         = 196, // Open8 8-bit RISC soft processor core
-  EM_RL78          = 197, // Renesas RL78 family
-  EM_VIDEOCORE5    = 198, // Broadcom VideoCore V processor
-  EM_78KOR         = 199, // Renesas 78KOR family
-  EM_56800EX       = 200, // Freescale 56800EX Digital Signal Controller (DSC)
-  EM_BA1           = 201, // Beyond BA1 CPU architecture
-  EM_BA2           = 202, // Beyond BA2 CPU architecture
-  EM_XCORE         = 203, // XMOS xCORE processor family
-  EM_MCHP_PIC      = 204, // Microchip 8-bit PIC(r) family
-  EM_INTEL205      = 205, // Reserved by Intel
-  EM_INTEL206      = 206, // Reserved by Intel
-  EM_INTEL207      = 207, // Reserved by Intel
-  EM_INTEL208      = 208, // Reserved by Intel
-  EM_INTEL209      = 209, // Reserved by Intel
-  EM_KM32          = 210, // KM211 KM32 32-bit processor
-  EM_KMX32         = 211, // KM211 KMX32 32-bit processor
-  EM_KMX16         = 212, // KM211 KMX16 16-bit processor
-  EM_KMX8          = 213, // KM211 KMX8 8-bit processor
-  EM_KVARC         = 214, // KM211 KVARC processor
-  EM_CDP           = 215, // Paneve CDP architecture family
-  EM_COGE          = 216, // Cognitive Smart Memory Processor
-  EM_COOL          = 217, // iCelero CoolEngine
-  EM_NORC          = 218, // Nanoradio Optimized RISC
-  EM_CSR_KALIMBA   = 219, // CSR Kalimba architecture family
-  EM_AMDGPU        = 224, // AMD GPU architecture
-  EM_LANAI         = 244, // Lanai 32-bit processor
-  EM_BPF           = 247, // Linux kernel bpf virtual machine
+  EM_ETPU = 178,          // Freescale Extended Time Processing Unit
+  EM_SLE9X = 179,         // Infineon Technologies SLE9X core
+  EM_L10M = 180,          // Intel L10M
+  EM_K10M = 181,          // Intel K10M
+  EM_AARCH64 = 183,       // ARM AArch64
+  EM_AVR32 = 185,         // Atmel Corporation 32-bit microprocessor family
+  EM_STM8 = 186,          // STMicroeletronics STM8 8-bit microcontroller
+  EM_TILE64 = 187,        // Tilera TILE64 multicore architecture family
+  EM_TILEPRO = 188,       // Tilera TILEPro multicore architecture family
+  EM_CUDA = 190,          // NVIDIA CUDA architecture
+  EM_TILEGX = 191,        // Tilera TILE-Gx multicore architecture family
+  EM_CLOUDSHIELD = 192,   // CloudShield architecture family
+  EM_COREA_1ST = 193,     // KIPO-KAIST Core-A 1st generation processor family
+  EM_COREA_2ND = 194,     // KIPO-KAIST Core-A 2nd generation processor family
+  EM_ARC_COMPACT2 = 195,  // Synopsys ARCompact V2
+  EM_OPEN8 = 196,         // Open8 8-bit RISC soft processor core
+  EM_RL78 = 197,          // Renesas RL78 family
+  EM_VIDEOCORE5 = 198,    // Broadcom VideoCore V processor
+  EM_78KOR = 199,         // Renesas 78KOR family
+  EM_56800EX = 200,       // Freescale 56800EX Digital Signal Controller (DSC)
+  EM_BA1 = 201,           // Beyond BA1 CPU architecture
+  EM_BA2 = 202,           // Beyond BA2 CPU architecture
+  EM_XCORE = 203,         // XMOS xCORE processor family
+  EM_MCHP_PIC = 204,      // Microchip 8-bit PIC(r) family
+  EM_INTEL205 = 205,      // Reserved by Intel
+  EM_INTEL206 = 206,      // Reserved by Intel
+  EM_INTEL207 = 207,      // Reserved by Intel
+  EM_INTEL208 = 208,      // Reserved by Intel
+  EM_INTEL209 = 209,      // Reserved by Intel
+  EM_KM32 = 210,          // KM211 KM32 32-bit processor
+  EM_KMX32 = 211,         // KM211 KMX32 32-bit processor
+  EM_KMX16 = 212,         // KM211 KMX16 16-bit processor
+  EM_KMX8 = 213,          // KM211 KMX8 8-bit processor
+  EM_KVARC = 214,         // KM211 KVARC processor
+  EM_CDP = 215,           // Paneve CDP architecture family
+  EM_COGE = 216,          // Cognitive Smart Memory Processor
+  EM_COOL = 217,          // iCelero CoolEngine
+  EM_NORC = 218,          // Nanoradio Optimized RISC
+  EM_CSR_KALIMBA = 219,   // CSR Kalimba architecture family
+  EM_AMDGPU = 224,        // AMD GPU architecture
+  EM_RISCV = 243,         // RISC-V
+  EM_LANAI = 244,         // Lanai 32-bit processor
+  EM_BPF = 247,           // Linux kernel bpf virtual machine
 
   // A request has been made to the maintainer of the official registry for
   // such numbers for an official value for WebAssembly. As soon as one is
   // allocated, this enum will be updated to use it.
-  EM_WEBASSEMBLY   = 0x4157, // WebAssembly architecture
+  EM_WEBASSEMBLY = 0x4157, // WebAssembly architecture
 };
 
 // Object file classes.
@@ -391,20 +389,14 @@ enum {
   STO_PPC64_LOCAL_BIT = 5,
   STO_PPC64_LOCAL_MASK = (7 << STO_PPC64_LOCAL_BIT)
 };
-static inline int64_t
-decodePPC64LocalEntryOffset(unsigned Other) {
+static inline int64_t decodePPC64LocalEntryOffset(unsigned Other) {
   unsigned Val = (Other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT;
   return ((1 << Val) >> 2) << 2;
 }
-static inline unsigned
-encodePPC64LocalEntryOffset(int64_t Offset) {
-  unsigned Val = (Offset >= 4 * 4
-                  ? (Offset >= 8 * 4
-                     ? (Offset >= 16 * 4 ? 6 : 5)
-                     : 4)
-                  : (Offset >= 2 * 4
-                     ? 3
-                     : (Offset >= 1 * 4 ? 2 : 0)));
+static inline unsigned encodePPC64LocalEntryOffset(int64_t Offset) {
+  unsigned Val =
+      (Offset >= 4 * 4 ? (Offset >= 8 * 4 ? (Offset >= 16 * 4 ? 6 : 5) : 4)
+                       : (Offset >= 2 * 4 ? 3 : (Offset >= 1 * 4 ? 2 : 0)));
   return Val << STO_PPC64_LOCAL_BIT;
 }
 
@@ -420,15 +412,15 @@ enum {
 
 // ARM Specific e_flags
 enum : unsigned {
-  EF_ARM_SOFT_FLOAT =     0x00000200U,
-  EF_ARM_VFP_FLOAT =      0x00000400U,
-  EF_ARM_EABI_UNKNOWN =   0x00000000U,
-  EF_ARM_EABI_VER1 =      0x01000000U,
-  EF_ARM_EABI_VER2 =      0x02000000U,
-  EF_ARM_EABI_VER3 =      0x03000000U,
-  EF_ARM_EABI_VER4 =      0x04000000U,
-  EF_ARM_EABI_VER5 =      0x05000000U,
-  EF_ARM_EABIMASK =       0xFF000000U
+  EF_ARM_SOFT_FLOAT = 0x00000200U,
+  EF_ARM_VFP_FLOAT = 0x00000400U,
+  EF_ARM_EABI_UNKNOWN = 0x00000000U,
+  EF_ARM_EABI_VER1 = 0x01000000U,
+  EF_ARM_EABI_VER2 = 0x02000000U,
+  EF_ARM_EABI_VER3 = 0x03000000U,
+  EF_ARM_EABI_VER4 = 0x04000000U,
+  EF_ARM_EABI_VER5 = 0x05000000U,
+  EF_ARM_EABIMASK = 0xFF000000U
 };
 
 // ELF Relocation types for ARM
@@ -438,24 +430,24 @@ enum {
 
 // AVR specific e_flags
 enum : unsigned {
-  EF_AVR_ARCH_AVR1    = 1,
-  EF_AVR_ARCH_AVR2    = 2,
-  EF_AVR_ARCH_AVR25   = 25,
-  EF_AVR_ARCH_AVR3    = 3,
-  EF_AVR_ARCH_AVR31   = 31,
-  EF_AVR_ARCH_AVR35   = 35,
-  EF_AVR_ARCH_AVR4    = 4,
-  EF_AVR_ARCH_AVR5    = 5,
-  EF_AVR_ARCH_AVR51   = 51,
-  EF_AVR_ARCH_AVR6    = 6,
+  EF_AVR_ARCH_AVR1 = 1,
+  EF_AVR_ARCH_AVR2 = 2,
+  EF_AVR_ARCH_AVR25 = 25,
+  EF_AVR_ARCH_AVR3 = 3,
+  EF_AVR_ARCH_AVR31 = 31,
+  EF_AVR_ARCH_AVR35 = 35,
+  EF_AVR_ARCH_AVR4 = 4,
+  EF_AVR_ARCH_AVR5 = 5,
+  EF_AVR_ARCH_AVR51 = 51,
+  EF_AVR_ARCH_AVR6 = 6,
   EF_AVR_ARCH_AVRTINY = 100,
-  EF_AVR_ARCH_XMEGA1  = 101,
-  EF_AVR_ARCH_XMEGA2  = 102,
-  EF_AVR_ARCH_XMEGA3  = 103,
-  EF_AVR_ARCH_XMEGA4  = 104,
-  EF_AVR_ARCH_XMEGA5  = 105,
-  EF_AVR_ARCH_XMEGA6  = 106,
-  EF_AVR_ARCH_XMEGA7  = 107
+  EF_AVR_ARCH_XMEGA1 = 101,
+  EF_AVR_ARCH_XMEGA2 = 102,
+  EF_AVR_ARCH_XMEGA3 = 103,
+  EF_AVR_ARCH_XMEGA4 = 104,
+  EF_AVR_ARCH_XMEGA5 = 105,
+  EF_AVR_ARCH_XMEGA6 = 106,
+  EF_AVR_ARCH_XMEGA7 = 107
 };
 
 // ELF Relocation types for AVR
@@ -466,65 +458,63 @@ enum {
 // Mips Specific e_flags
 enum : unsigned {
   EF_MIPS_NOREORDER = 0x00000001, // Don't reorder instructions
-  EF_MIPS_PIC       = 0x00000002, // Position independent code
-  EF_MIPS_CPIC      = 0x00000004, // Call object with Position independent code
-  EF_MIPS_ABI2      = 0x00000020, // File uses N32 ABI
+  EF_MIPS_PIC = 0x00000002,       // Position independent code
+  EF_MIPS_CPIC = 0x00000004,      // Call object with Position independent code
+  EF_MIPS_ABI2 = 0x00000020,      // File uses N32 ABI
   EF_MIPS_32BITMODE = 0x00000100, // Code compiled for a 64-bit machine
                                   // in 32-bit mode
-  EF_MIPS_FP64      = 0x00000200, // Code compiled for a 32-bit machine
+  EF_MIPS_FP64 = 0x00000200,      // Code compiled for a 32-bit machine
                                   // but uses 64-bit FP registers
-  EF_MIPS_NAN2008   = 0x00000400, // Uses IEE 754-2008 NaN encoding
+  EF_MIPS_NAN2008 = 0x00000400,   // Uses IEE 754-2008 NaN encoding
 
   // ABI flags
-  EF_MIPS_ABI_O32    = 0x00001000, // This file follows the first MIPS 32 bit ABI
-  EF_MIPS_ABI_O64    = 0x00002000, // O32 ABI extended for 64-bit architecture.
+  EF_MIPS_ABI_O32 = 0x00001000, // This file follows the first MIPS 32 bit ABI
+  EF_MIPS_ABI_O64 = 0x00002000, // O32 ABI extended for 64-bit architecture.
   EF_MIPS_ABI_EABI32 = 0x00003000, // EABI in 32 bit mode.
   EF_MIPS_ABI_EABI64 = 0x00004000, // EABI in 64 bit mode.
-  EF_MIPS_ABI        = 0x0000f000, // Mask for selecting EF_MIPS_ABI_ variant.
+  EF_MIPS_ABI = 0x0000f000,        // Mask for selecting EF_MIPS_ABI_ variant.
 
   // MIPS machine variant
-  EF_MIPS_MACH_NONE    = 0x00000000, // A standard MIPS implementation.
-  EF_MIPS_MACH_3900    = 0x00810000, // Toshiba R3900
-  EF_MIPS_MACH_4010    = 0x00820000, // LSI R4010
-  EF_MIPS_MACH_4100    = 0x00830000, // NEC VR4100
-  EF_MIPS_MACH_4650    = 0x00850000, // MIPS R4650
-  EF_MIPS_MACH_4120    = 0x00870000, // NEC VR4120
-  EF_MIPS_MACH_4111    = 0x00880000, // NEC VR4111/VR4181
-  EF_MIPS_MACH_SB1     = 0x008a0000, // Broadcom SB-1
-  EF_MIPS_MACH_OCTEON  = 0x008b0000, // Cavium Networks Octeon
-  EF_MIPS_MACH_XLR     = 0x008c0000, // RMI Xlr
+  EF_MIPS_MACH_NONE = 0x00000000,    // A standard MIPS implementation.
+  EF_MIPS_MACH_3900 = 0x00810000,    // Toshiba R3900
+  EF_MIPS_MACH_4010 = 0x00820000,    // LSI R4010
+  EF_MIPS_MACH_4100 = 0x00830000,    // NEC VR4100
+  EF_MIPS_MACH_4650 = 0x00850000,    // MIPS R4650
+  EF_MIPS_MACH_4120 = 0x00870000,    // NEC VR4120
+  EF_MIPS_MACH_4111 = 0x00880000,    // NEC VR4111/VR4181
+  EF_MIPS_MACH_SB1 = 0x008a0000,     // Broadcom SB-1
+  EF_MIPS_MACH_OCTEON = 0x008b0000,  // Cavium Networks Octeon
+  EF_MIPS_MACH_XLR = 0x008c0000,     // RMI Xlr
   EF_MIPS_MACH_OCTEON2 = 0x008d0000, // Cavium Networks Octeon2
   EF_MIPS_MACH_OCTEON3 = 0x008e0000, // Cavium Networks Octeon3
-  EF_MIPS_MACH_5400    = 0x00910000, // NEC VR5400
-  EF_MIPS_MACH_5900    = 0x00920000, // MIPS R5900
-  EF_MIPS_MACH_5500    = 0x00980000, // NEC VR5500
-  EF_MIPS_MACH_9000    = 0x00990000, // Unknown
-  EF_MIPS_MACH_LS2E    = 0x00a00000, // ST Microelectronics Loongson 2E
-  EF_MIPS_MACH_LS2F    = 0x00a10000, // ST Microelectronics Loongson 2F
-  EF_MIPS_MACH_LS3A    = 0x00a20000, // Loongson 3A
-  EF_MIPS_MACH         = 0x00ff0000, // EF_MIPS_MACH_xxx selection mask
+  EF_MIPS_MACH_5400 = 0x00910000,    // NEC VR5400
+  EF_MIPS_MACH_5900 = 0x00920000,    // MIPS R5900
+  EF_MIPS_MACH_5500 = 0x00980000,    // NEC VR5500
+  EF_MIPS_MACH_9000 = 0x00990000,    // Unknown
+  EF_MIPS_MACH_LS2E = 0x00a00000,    // ST Microelectronics Loongson 2E
+  EF_MIPS_MACH_LS2F = 0x00a10000,    // ST Microelectronics Loongson 2F
+  EF_MIPS_MACH_LS3A = 0x00a20000,    // Loongson 3A
+  EF_MIPS_MACH = 0x00ff0000,         // EF_MIPS_MACH_xxx selection mask
 
   // ARCH_ASE
-  EF_MIPS_MICROMIPS = 0x02000000, // microMIPS
-  EF_MIPS_ARCH_ASE_M16 =
-                      0x04000000, // Has Mips-16 ISA extensions
-  EF_MIPS_ARCH_ASE_MDMX =
-                      0x08000000, // Has MDMX multimedia extensions
-  EF_MIPS_ARCH_ASE  = 0x0f000000, // Mask for EF_MIPS_ARCH_ASE_xxx flags
+  EF_MIPS_MICROMIPS = 0x02000000,     // microMIPS
+  EF_MIPS_ARCH_ASE_M16 = 0x04000000,  // Has Mips-16 ISA extensions
+  EF_MIPS_ARCH_ASE_MDMX = 0x08000000, // Has MDMX multimedia extensions
+  EF_MIPS_ARCH_ASE = 0x0f000000,      // Mask for EF_MIPS_ARCH_ASE_xxx flags
 
   // ARCH
-  EF_MIPS_ARCH_1    = 0x00000000, // MIPS1 instruction set
-  EF_MIPS_ARCH_2    = 0x10000000, // MIPS2 instruction set
-  EF_MIPS_ARCH_3    = 0x20000000, // MIPS3 instruction set
-  EF_MIPS_ARCH_4    = 0x30000000, // MIPS4 instruction set
-  EF_MIPS_ARCH_5    = 0x40000000, // MIPS5 instruction set
-  EF_MIPS_ARCH_32   = 0x50000000, // MIPS32 instruction set per linux not elf.h
-  EF_MIPS_ARCH_64   = 0x60000000, // MIPS64 instruction set per linux not elf.h
+  EF_MIPS_ARCH_1 = 0x00000000,    // MIPS1 instruction set
+  EF_MIPS_ARCH_2 = 0x10000000,    // MIPS2 instruction set
+  EF_MIPS_ARCH_3 = 0x20000000,    // MIPS3 instruction set
+  EF_MIPS_ARCH_4 = 0x30000000,    // MIPS4 instruction set
+  EF_MIPS_ARCH_5 = 0x40000000,    // MIPS5 instruction set
+  EF_MIPS_ARCH_32 = 0x50000000,   // MIPS32 instruction set per linux not elf.h
+  EF_MIPS_ARCH_64 = 0x60000000,   // MIPS64 instruction set per linux not elf.h
   EF_MIPS_ARCH_32R2 = 0x70000000, // mips32r2, mips32r3, mips32r5
   EF_MIPS_ARCH_64R2 = 0x80000000, // mips64r2, mips64r3, mips64r5
   EF_MIPS_ARCH_32R6 = 0x90000000, // mips32r6
   EF_MIPS_ARCH_64R6 = 0xa0000000, // mips64r6
-  EF_MIPS_ARCH      = 0xf0000000  // Mask for applying EF_MIPS_ARCH_ variant
+  EF_MIPS_ARCH = 0xf0000000       // Mask for applying EF_MIPS_ARCH_ variant
 };
 
 // ELF Relocation types for Mips
@@ -534,57 +524,57 @@ enum {
 
 // Special values for the st_other field in the symbol table entry for MIPS.
 enum {
-  STO_MIPS_OPTIONAL        = 0x04,  // Symbol whose definition is optional
-  STO_MIPS_PLT             = 0x08,  // PLT entry related dynamic table record
-  STO_MIPS_PIC             = 0x20,  // PIC func in an object mixes PIC/non-PIC
-  STO_MIPS_MICROMIPS       = 0x80,  // MIPS Specific ISA for MicroMips
-  STO_MIPS_MIPS16          = 0xf0   // MIPS Specific ISA for Mips16
+  STO_MIPS_OPTIONAL = 0x04,  // Symbol whose definition is optional
+  STO_MIPS_PLT = 0x08,       // PLT entry related dynamic table record
+  STO_MIPS_PIC = 0x20,       // PIC func in an object mixes PIC/non-PIC
+  STO_MIPS_MICROMIPS = 0x80, // MIPS Specific ISA for MicroMips
+  STO_MIPS_MIPS16 = 0xf0     // MIPS Specific ISA for Mips16
 };
 
 // .MIPS.options section descriptor kinds
 enum {
-  ODK_NULL       = 0,   // Undefined
-  ODK_REGINFO    = 1,   // Register usage information
-  ODK_EXCEPTIONS = 2,   // Exception processing options
-  ODK_PAD        = 3,   // Section padding options
-  ODK_HWPATCH    = 4,   // Hardware patches applied
-  ODK_FILL       = 5,   // Linker fill value
-  ODK_TAGS       = 6,   // Space for tool identification
-  ODK_HWAND      = 7,   // Hardware AND patches applied
-  ODK_HWOR       = 8,   // Hardware OR patches applied
-  ODK_GP_GROUP   = 9,   // GP group to use for text/data sections
-  ODK_IDENT      = 10,  // ID information
-  ODK_PAGESIZE   = 11   // Page size information
+  ODK_NULL = 0,       // Undefined
+  ODK_REGINFO = 1,    // Register usage information
+  ODK_EXCEPTIONS = 2, // Exception processing options
+  ODK_PAD = 3,        // Section padding options
+  ODK_HWPATCH = 4,    // Hardware patches applied
+  ODK_FILL = 5,       // Linker fill value
+  ODK_TAGS = 6,       // Space for tool identification
+  ODK_HWAND = 7,      // Hardware AND patches applied
+  ODK_HWOR = 8,       // Hardware OR patches applied
+  ODK_GP_GROUP = 9,   // GP group to use for text/data sections
+  ODK_IDENT = 10,     // ID information
+  ODK_PAGESIZE = 11   // Page size information
 };
 
 // Hexagon-specific e_flags
 enum {
   // Object processor version flags, bits[11:0]
-  EF_HEXAGON_MACH_V2      = 0x00000001,   // Hexagon V2
-  EF_HEXAGON_MACH_V3      = 0x00000002,   // Hexagon V3
-  EF_HEXAGON_MACH_V4      = 0x00000003,   // Hexagon V4
-  EF_HEXAGON_MACH_V5      = 0x00000004,   // Hexagon V5
-  EF_HEXAGON_MACH_V55     = 0x00000005,   // Hexagon V55
-  EF_HEXAGON_MACH_V60     = 0x00000060,   // Hexagon V60
+  EF_HEXAGON_MACH_V2 = 0x00000001,  // Hexagon V2
+  EF_HEXAGON_MACH_V3 = 0x00000002,  // Hexagon V3
+  EF_HEXAGON_MACH_V4 = 0x00000003,  // Hexagon V4
+  EF_HEXAGON_MACH_V5 = 0x00000004,  // Hexagon V5
+  EF_HEXAGON_MACH_V55 = 0x00000005, // Hexagon V55
+  EF_HEXAGON_MACH_V60 = 0x00000060, // Hexagon V60
 
   // Highest ISA version flags
-  EF_HEXAGON_ISA_MACH     = 0x00000000,   // Same as specified in bits[11:0]
-                                          // of e_flags
-  EF_HEXAGON_ISA_V2       = 0x00000010,   // Hexagon V2 ISA
-  EF_HEXAGON_ISA_V3       = 0x00000020,   // Hexagon V3 ISA
-  EF_HEXAGON_ISA_V4       = 0x00000030,   // Hexagon V4 ISA
-  EF_HEXAGON_ISA_V5       = 0x00000040,   // Hexagon V5 ISA
-  EF_HEXAGON_ISA_V55      = 0x00000050,   // Hexagon V55 ISA
-  EF_HEXAGON_ISA_V60      = 0x00000060,   // Hexagon V60 ISA
+  EF_HEXAGON_ISA_MACH = 0x00000000, // Same as specified in bits[11:0]
+                                    // of e_flags
+  EF_HEXAGON_ISA_V2 = 0x00000010,   // Hexagon V2 ISA
+  EF_HEXAGON_ISA_V3 = 0x00000020,   // Hexagon V3 ISA
+  EF_HEXAGON_ISA_V4 = 0x00000030,   // Hexagon V4 ISA
+  EF_HEXAGON_ISA_V5 = 0x00000040,   // Hexagon V5 ISA
+  EF_HEXAGON_ISA_V55 = 0x00000050,  // Hexagon V55 ISA
+  EF_HEXAGON_ISA_V60 = 0x00000060,  // Hexagon V60 ISA
 };
 
 // Hexagon-specific section indexes for common small data
 enum {
-  SHN_HEXAGON_SCOMMON     = 0xff00,       // Other access sizes
-  SHN_HEXAGON_SCOMMON_1   = 0xff01,       // Byte-sized access
-  SHN_HEXAGON_SCOMMON_2   = 0xff02,       // Half-word-sized access
-  SHN_HEXAGON_SCOMMON_4   = 0xff03,       // Word-sized access
-  SHN_HEXAGON_SCOMMON_8   = 0xff04        // Double-word-size access
+  SHN_HEXAGON_SCOMMON = 0xff00,   // Other access sizes
+  SHN_HEXAGON_SCOMMON_1 = 0xff01, // Byte-sized access
+  SHN_HEXAGON_SCOMMON_2 = 0xff02, // Half-word-sized access
+  SHN_HEXAGON_SCOMMON_4 = 0xff03, // Word-sized access
+  SHN_HEXAGON_SCOMMON_8 = 0xff04  // Double-word-size access
 };
 
 // ELF Relocation types for Hexagon
@@ -597,6 +587,11 @@ enum {
 #include "ELFRelocs/Lanai.def"
 };
 
+// ELF Relocation types for RISC-V
+enum {
+#include "ELFRelocs/RISCV.def"
+};
+
 // ELF Relocation types for S390/zSeries
 enum {
 #include "ELFRelocs/SystemZ.def"
@@ -630,7 +625,7 @@ struct Elf32_Shdr {
   Elf32_Word sh_type;      // Section type (SHT_*)
   Elf32_Word sh_flags;     // Section flags (SHF_*)
   Elf32_Addr sh_addr;      // Address where section is to be loaded
-  Elf32_Off  sh_offset;    // File offset of section data, in bytes
+  Elf32_Off sh_offset;     // File offset of section data, in bytes
   Elf32_Word sh_size;      // Size of section, in bytes
   Elf32_Word sh_link;      // Section type-specific header table index link
   Elf32_Word sh_info;      // Section type-specific extra information
@@ -640,79 +635,79 @@ struct Elf32_Shdr {
 
 // Section header for ELF64 - same fields as ELF32, different types.
 struct Elf64_Shdr {
-  Elf64_Word  sh_name;
-  Elf64_Word  sh_type;
+  Elf64_Word sh_name;
+  Elf64_Word sh_type;
   Elf64_Xword sh_flags;
-  Elf64_Addr  sh_addr;
-  Elf64_Off   sh_offset;
+  Elf64_Addr sh_addr;
+  Elf64_Off sh_offset;
   Elf64_Xword sh_size;
-  Elf64_Word  sh_link;
-  Elf64_Word  sh_info;
+  Elf64_Word sh_link;
+  Elf64_Word sh_info;
   Elf64_Xword sh_addralign;
   Elf64_Xword sh_entsize;
 };
 
 // Special section indices.
 enum {
-  SHN_UNDEF     = 0,      // Undefined, missing, irrelevant, or meaningless
+  SHN_UNDEF = 0,          // Undefined, missing, irrelevant, or meaningless
   SHN_LORESERVE = 0xff00, // Lowest reserved index
-  SHN_LOPROC    = 0xff00, // Lowest processor-specific index
-  SHN_HIPROC    = 0xff1f, // Highest processor-specific index
-  SHN_LOOS      = 0xff20, // Lowest operating system-specific index
-  SHN_HIOS      = 0xff3f, // Highest operating system-specific index
-  SHN_ABS       = 0xfff1, // Symbol has absolute value; does not need relocation
-  SHN_COMMON    = 0xfff2, // FORTRAN COMMON or C external global variables
-  SHN_XINDEX    = 0xffff, // Mark that the index is >= SHN_LORESERVE
+  SHN_LOPROC = 0xff00,    // Lowest processor-specific index
+  SHN_HIPROC = 0xff1f,    // Highest processor-specific index
+  SHN_LOOS = 0xff20,      // Lowest operating system-specific index
+  SHN_HIOS = 0xff3f,      // Highest operating system-specific index
+  SHN_ABS = 0xfff1,       // Symbol has absolute value; does not need relocation
+  SHN_COMMON = 0xfff2,    // FORTRAN COMMON or C external global variables
+  SHN_XINDEX = 0xffff,    // Mark that the index is >= SHN_LORESERVE
   SHN_HIRESERVE = 0xffff  // Highest reserved index
 };
 
 // Section types.
 enum : unsigned {
-  SHT_NULL          = 0,  // No associated section (inactive entry).
-  SHT_PROGBITS      = 1,  // Program-defined contents.
-  SHT_SYMTAB        = 2,  // Symbol table.
-  SHT_STRTAB        = 3,  // String table.
-  SHT_RELA          = 4,  // Relocation entries; explicit addends.
-  SHT_HASH          = 5,  // Symbol hash table.
-  SHT_DYNAMIC       = 6,  // Information for dynamic linking.
-  SHT_NOTE          = 7,  // Information about the file.
-  SHT_NOBITS        = 8,  // Data occupies no space in the file.
-  SHT_REL           = 9,  // Relocation entries; no explicit addends.
-  SHT_SHLIB         = 10, // Reserved.
-  SHT_DYNSYM        = 11, // Symbol table.
-  SHT_INIT_ARRAY    = 14, // Pointers to initialization functions.
-  SHT_FINI_ARRAY    = 15, // Pointers to termination functions.
-  SHT_PREINIT_ARRAY = 16, // Pointers to pre-init functions.
-  SHT_GROUP         = 17, // Section group.
-  SHT_SYMTAB_SHNDX  = 18, // Indices for SHN_XINDEX entries.
-  SHT_LOOS          = 0x60000000, // Lowest operating system-specific type.
-  SHT_GNU_ATTRIBUTES= 0x6ffffff5, // Object attributes.
-  SHT_GNU_HASH      = 0x6ffffff6, // GNU-style hash table.
-  SHT_GNU_verdef    = 0x6ffffffd, // GNU version definitions.
-  SHT_GNU_verneed   = 0x6ffffffe, // GNU version references.
-  SHT_GNU_versym    = 0x6fffffff, // GNU symbol versions table.
-  SHT_HIOS          = 0x6fffffff, // Highest operating system-specific type.
-  SHT_LOPROC        = 0x70000000, // Lowest processor arch-specific type.
+  SHT_NULL = 0,                    // No associated section (inactive entry).
+  SHT_PROGBITS = 1,                // Program-defined contents.
+  SHT_SYMTAB = 2,                  // Symbol table.
+  SHT_STRTAB = 3,                  // String table.
+  SHT_RELA = 4,                    // Relocation entries; explicit addends.
+  SHT_HASH = 5,                    // Symbol hash table.
+  SHT_DYNAMIC = 6,                 // Information for dynamic linking.
+  SHT_NOTE = 7,                    // Information about the file.
+  SHT_NOBITS = 8,                  // Data occupies no space in the file.
+  SHT_REL = 9,                     // Relocation entries; no explicit addends.
+  SHT_SHLIB = 10,                  // Reserved.
+  SHT_DYNSYM = 11,                 // Symbol table.
+  SHT_INIT_ARRAY = 14,             // Pointers to initialization functions.
+  SHT_FINI_ARRAY = 15,             // Pointers to termination functions.
+  SHT_PREINIT_ARRAY = 16,          // Pointers to pre-init functions.
+  SHT_GROUP = 17,                  // Section group.
+  SHT_SYMTAB_SHNDX = 18,           // Indices for SHN_XINDEX entries.
+  SHT_LOOS = 0x60000000,           // Lowest operating system-specific type.
+  SHT_GNU_ATTRIBUTES = 0x6ffffff5, // Object attributes.
+  SHT_GNU_HASH = 0x6ffffff6,       // GNU-style hash table.
+  SHT_GNU_verdef = 0x6ffffffd,     // GNU version definitions.
+  SHT_GNU_verneed = 0x6ffffffe,    // GNU version references.
+  SHT_GNU_versym = 0x6fffffff,     // GNU symbol versions table.
+  SHT_HIOS = 0x6fffffff,           // Highest operating system-specific type.
+  SHT_LOPROC = 0x70000000,         // Lowest processor arch-specific type.
   // Fixme: All this is duplicated in MCSectionELF. Why??
   // Exception Index table
-  SHT_ARM_EXIDX           = 0x70000001U,
+  SHT_ARM_EXIDX = 0x70000001U,
   // BPABI DLL dynamic linking pre-emption map
-  SHT_ARM_PREEMPTMAP      = 0x70000002U,
+  SHT_ARM_PREEMPTMAP = 0x70000002U,
   //  Object file compatibility attributes
-  SHT_ARM_ATTRIBUTES      = 0x70000003U,
-  SHT_ARM_DEBUGOVERLAY    = 0x70000004U,
-  SHT_ARM_OVERLAYSECTION  = 0x70000005U,
-  SHT_HEX_ORDERED         = 0x70000000, // Link editor is to sort the entries in
-                                        // this section based on their sizes
-  SHT_X86_64_UNWIND       = 0x70000001, // Unwind information
+  SHT_ARM_ATTRIBUTES = 0x70000003U,
+  SHT_ARM_DEBUGOVERLAY = 0x70000004U,
+  SHT_ARM_OVERLAYSECTION = 0x70000005U,
+  SHT_HEX_ORDERED = 0x70000000,   // Link editor is to sort the entries in
+                                  // this section based on their sizes
+  SHT_X86_64_UNWIND = 0x70000001, // Unwind information
 
-  SHT_MIPS_REGINFO        = 0x70000006, // Register usage information
-  SHT_MIPS_OPTIONS        = 0x7000000d, // General options
-  SHT_MIPS_ABIFLAGS       = 0x7000002a, // ABI information.
+  SHT_MIPS_REGINFO = 0x70000006,  // Register usage information
+  SHT_MIPS_OPTIONS = 0x7000000d,  // General options
+  SHT_MIPS_ABIFLAGS = 0x7000002a, // ABI information.
 
-  SHT_HIPROC        = 0x7fffffff, // Highest processor arch-specific type.
-  SHT_LOUSER        = 0x80000000, // Lowest type reserved for applications.
-  SHT_HIUSER        = 0xffffffff  // Highest type reserved for applications.
+  SHT_HIPROC = 0x7fffffff, // Highest processor arch-specific type.
+  SHT_LOUSER = 0x80000000, // Lowest type reserved for applications.
+  SHT_HIUSER = 0xffffffff  // Highest type reserved for applications.
 };
 
 // Section flags.
@@ -766,7 +761,7 @@ enum : unsigned {
   /// set to the start of the section by the boot code.
   XCORE_SHF_DP_SECTION = 0x1000U,
 
-  SHF_MASKOS   = 0x0ff00000,
+  SHF_MASKOS = 0x0ff00000,
 
   // Bits indicating processor-specific flags.
   SHF_MASKPROC = 0xf0000000,
@@ -789,30 +784,33 @@ enum : unsigned {
   SHF_MIPS_NODUPES = 0x01000000,
 
   // Linker must generate implicit hidden weak names.
-  SHF_MIPS_NAMES   = 0x02000000,
+  SHF_MIPS_NAMES = 0x02000000,
 
   // Section data local to process.
-  SHF_MIPS_LOCAL   = 0x04000000,
+  SHF_MIPS_LOCAL = 0x04000000,
 
   // Do not strip this section.
   SHF_MIPS_NOSTRIP = 0x08000000,
 
   // Section must be part of global data area.
-  SHF_MIPS_GPREL   = 0x10000000,
+  SHF_MIPS_GPREL = 0x10000000,
 
   // This section should be merged.
-  SHF_MIPS_MERGE   = 0x20000000,
+  SHF_MIPS_MERGE = 0x20000000,
 
   // Address size to be inferred from section entry size.
-  SHF_MIPS_ADDR    = 0x40000000,
+  SHF_MIPS_ADDR = 0x40000000,
 
   // Section data is string data by default.
-  SHF_MIPS_STRING  = 0x80000000,
+  SHF_MIPS_STRING = 0x80000000,
+
+  // Make code section unreadable when in execute-only mode
+  SHF_ARM_PURECODE = 0x20000000,
 
-  SHF_AMDGPU_HSA_GLOBAL   = 0x00100000,
+  SHF_AMDGPU_HSA_GLOBAL = 0x00100000,
   SHF_AMDGPU_HSA_READONLY = 0x00200000,
-  SHF_AMDGPU_HSA_CODE     = 0x00400000,
-  SHF_AMDGPU_HSA_AGENT    = 0x00800000
+  SHF_AMDGPU_HSA_CODE = 0x00400000,
+  SHF_AMDGPU_HSA_AGENT = 0x00800000
 };
 
 // Section Group Flags
@@ -824,12 +822,12 @@ enum : unsigned {
 
 // Symbol table entries for ELF32.
 struct Elf32_Sym {
-  Elf32_Word    st_name;  // Symbol name (index into string table)
-  Elf32_Addr    st_value; // Value or address associated with the symbol
-  Elf32_Word    st_size;  // Size of the symbol
+  Elf32_Word st_name;     // Symbol name (index into string table)
+  Elf32_Addr st_value;    // Value or address associated with the symbol
+  Elf32_Word st_size;     // Size of the symbol
   unsigned char st_info;  // Symbol's type and binding attributes
   unsigned char st_other; // Must be zero; reserved
-  Elf32_Half    st_shndx; // Which section (header table index) it's defined in
+  Elf32_Half st_shndx;    // Which section (header table index) it's defined in
 
   // These accessors and mutators correspond to the ELF32_ST_BIND,
   // ELF32_ST_TYPE, and ELF32_ST_INFO macros defined in the ELF specification:
@@ -844,12 +842,12 @@ struct Elf32_Sym {
 
 // Symbol table entries for ELF64.
 struct Elf64_Sym {
-  Elf64_Word      st_name;  // Symbol name (index into string table)
-  unsigned char   st_info;  // Symbol's type and binding attributes
-  unsigned char   st_other; // Must be zero; reserved
-  Elf64_Half      st_shndx; // Which section (header tbl index) it's defined in
-  Elf64_Addr      st_value; // Value or address associated with the symbol
-  Elf64_Xword     st_size;  // Size of the symbol
+  Elf64_Word st_name;     // Symbol name (index into string table)
+  unsigned char st_info;  // Symbol's type and binding attributes
+  unsigned char st_other; // Must be zero; reserved
+  Elf64_Half st_shndx;    // Which section (header tbl index) it's defined in
+  Elf64_Addr st_value;    // Value or address associated with the symbol
+  Elf64_Xword st_size;    // Size of the symbol
 
   // These accessors and mutators are identical to those defined for ELF32
   // symbol table entries.
@@ -870,48 +868,46 @@ enum {
 
 // Symbol bindings.
 enum {
-  STB_LOCAL = 0,   // Local symbol, not visible outside obj file containing def
-  STB_GLOBAL = 1,  // Global symbol, visible to all object files being combined
-  STB_WEAK = 2,    // Weak symbol, like global but lower-precedence
+  STB_LOCAL = 0,  // Local symbol, not visible outside obj file containing def
+  STB_GLOBAL = 1, // Global symbol, visible to all object files being combined
+  STB_WEAK = 2,   // Weak symbol, like global but lower-precedence
   STB_GNU_UNIQUE = 10,
-  STB_LOOS   = 10, // Lowest operating system-specific binding type
-  STB_HIOS   = 12, // Highest operating system-specific binding type
+  STB_LOOS = 10,   // Lowest operating system-specific binding type
+  STB_HIOS = 12,   // Highest operating system-specific binding type
   STB_LOPROC = 13, // Lowest processor-specific binding type
   STB_HIPROC = 15  // Highest processor-specific binding type
 };
 
 // Symbol types.
 enum {
-  STT_NOTYPE  = 0,   // Symbol's type is not specified
-  STT_OBJECT  = 1,   // Symbol is a data object (variable, array, etc.)
-  STT_FUNC    = 2,   // Symbol is executable code (function, etc.)
-  STT_SECTION = 3,   // Symbol refers to a section
-  STT_FILE    = 4,   // Local, absolute symbol that refers to a file
-  STT_COMMON  = 5,   // An uninitialized common block
-  STT_TLS     = 6,   // Thread local data object
+  STT_NOTYPE = 0,     // Symbol's type is not specified
+  STT_OBJECT = 1,     // Symbol is a data object (variable, array, etc.)
+  STT_FUNC = 2,       // Symbol is executable code (function, etc.)
+  STT_SECTION = 3,    // Symbol refers to a section
+  STT_FILE = 4,       // Local, absolute symbol that refers to a file
+  STT_COMMON = 5,     // An uninitialized common block
+  STT_TLS = 6,        // Thread local data object
   STT_GNU_IFUNC = 10, // GNU indirect function
-  STT_LOOS    = 10,  // Lowest operating system-specific symbol type
-  STT_HIOS    = 12,  // Highest operating system-specific symbol type
-  STT_LOPROC  = 13,  // Lowest processor-specific symbol type
-  STT_HIPROC  = 15,  // Highest processor-specific symbol type
+  STT_LOOS = 10,      // Lowest operating system-specific symbol type
+  STT_HIOS = 12,      // Highest operating system-specific symbol type
+  STT_LOPROC = 13,    // Lowest processor-specific symbol type
+  STT_HIPROC = 15,    // Highest processor-specific symbol type
 
   // AMDGPU symbol types
-  STT_AMDGPU_HSA_KERNEL            = 10,
+  STT_AMDGPU_HSA_KERNEL = 10,
   STT_AMDGPU_HSA_INDIRECT_FUNCTION = 11,
-  STT_AMDGPU_HSA_METADATA          = 12
+  STT_AMDGPU_HSA_METADATA = 12
 };
 
 enum {
-  STV_DEFAULT   = 0,  // Visibility is specified by binding type
-  STV_INTERNAL  = 1,  // Defined by processor supplements
-  STV_HIDDEN    = 2,  // Not visible to other components
-  STV_PROTECTED = 3   // Visible in other components but not preemptable
+  STV_DEFAULT = 0,  // Visibility is specified by binding type
+  STV_INTERNAL = 1, // Defined by processor supplements
+  STV_HIDDEN = 2,   // Not visible to other components
+  STV_PROTECTED = 3 // Visible in other components but not preemptable
 };
 
 // Symbol number.
-enum {
-  STN_UNDEF = 0
-};
+enum { STN_UNDEF = 0 };
 
 // Special relocation symbols used in the MIPS64 ELF relocation entries
 enum {
@@ -929,7 +925,7 @@ struct Elf32_Rel {
   // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
   // and ELF32_R_INFO macros defined in the ELF specification:
   Elf32_Word getSymbol() const { return (r_info >> 8); }
-  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  unsigned char getType() const { return (unsigned char)(r_info & 0x0ff); }
   void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
   void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(Elf32_Word s, unsigned char t) {
@@ -939,14 +935,14 @@ struct Elf32_Rel {
 
 // Relocation entry with explicit addend.
 struct Elf32_Rela {
-  Elf32_Addr  r_offset; // Location (file byte offset, or program virtual addr)
-  Elf32_Word  r_info;   // Symbol table index and type of relocation to apply
+  Elf32_Addr r_offset;  // Location (file byte offset, or program virtual addr)
+  Elf32_Word r_info;    // Symbol table index and type of relocation to apply
   Elf32_Sword r_addend; // Compute value for relocatable field by adding this
 
   // These accessors and mutators correspond to the ELF32_R_SYM, ELF32_R_TYPE,
   // and ELF32_R_INFO macros defined in the ELF specification:
   Elf32_Word getSymbol() const { return (r_info >> 8); }
-  unsigned char getType() const { return (unsigned char) (r_info & 0x0ff); }
+  unsigned char getType() const { return (unsigned char)(r_info & 0x0ff); }
   void setSymbol(Elf32_Word s) { setSymbolAndType(s, getType()); }
   void setType(unsigned char t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(Elf32_Word s, unsigned char t) {
@@ -957,44 +953,40 @@ struct Elf32_Rela {
 // Relocation entry, without explicit addend.
 struct Elf64_Rel {
   Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr).
-  Elf64_Xword r_info;   // Symbol table index and type of relocation to apply.
+  Elf64_Xword r_info;  // Symbol table index and type of relocation to apply.
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
   Elf64_Word getSymbol() const { return (r_info >> 32); }
-  Elf64_Word getType() const {
-    return (Elf64_Word) (r_info & 0xffffffffL);
-  }
+  Elf64_Word getType() const { return (Elf64_Word)(r_info & 0xffffffffL); }
   void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); }
   void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(Elf64_Word s, Elf64_Word t) {
-    r_info = ((Elf64_Xword)s << 32) + (t&0xffffffffL);
+    r_info = ((Elf64_Xword)s << 32) + (t & 0xffffffffL);
   }
 };
 
 // Relocation entry with explicit addend.
 struct Elf64_Rela {
-  Elf64_Addr  r_offset; // Location (file byte offset, or program virtual addr).
-  Elf64_Xword  r_info;   // Symbol table index and type of relocation to apply.
+  Elf64_Addr r_offset; // Location (file byte offset, or program virtual addr).
+  Elf64_Xword r_info;  // Symbol table index and type of relocation to apply.
   Elf64_Sxword r_addend; // Compute value for relocatable field by adding this.
 
   // These accessors and mutators correspond to the ELF64_R_SYM, ELF64_R_TYPE,
   // and ELF64_R_INFO macros defined in the ELF specification:
   Elf64_Word getSymbol() const { return (r_info >> 32); }
-  Elf64_Word getType() const {
-    return (Elf64_Word) (r_info & 0xffffffffL);
-  }
+  Elf64_Word getType() const { return (Elf64_Word)(r_info & 0xffffffffL); }
   void setSymbol(Elf64_Word s) { setSymbolAndType(s, getType()); }
   void setType(Elf64_Word t) { setSymbolAndType(getSymbol(), t); }
   void setSymbolAndType(Elf64_Word s, Elf64_Word t) {
-    r_info = ((Elf64_Xword)s << 32) + (t&0xffffffffL);
+    r_info = ((Elf64_Xword)s << 32) + (t & 0xffffffffL);
   }
 };
 
 // Program header for ELF32.
 struct Elf32_Phdr {
   Elf32_Word p_type;   // Type of segment
-  Elf32_Off  p_offset; // File offset where segment is located, in bytes
+  Elf32_Off p_offset;  // File offset where segment is located, in bytes
   Elf32_Addr p_vaddr;  // Virtual address of beginning of segment
   Elf32_Addr p_paddr;  // Physical address of beginning of segment (OS-specific)
   Elf32_Word p_filesz; // Num. of bytes in file image of segment (may be zero)
@@ -1005,57 +997,61 @@ struct Elf32_Phdr {
 
 // Program header for ELF64.
 struct Elf64_Phdr {
-  Elf64_Word   p_type;   // Type of segment
-  Elf64_Word   p_flags;  // Segment flags
-  Elf64_Off    p_offset; // File offset where segment is located, in bytes
-  Elf64_Addr   p_vaddr;  // Virtual address of beginning of segment
-  Elf64_Addr   p_paddr;  // Physical addr of beginning of segment (OS-specific)
-  Elf64_Xword  p_filesz; // Num. of bytes in file image of segment (may be zero)
-  Elf64_Xword  p_memsz;  // Num. of bytes in mem image of segment (may be zero)
-  Elf64_Xword  p_align;  // Segment alignment constraint
+  Elf64_Word p_type;    // Type of segment
+  Elf64_Word p_flags;   // Segment flags
+  Elf64_Off p_offset;   // File offset where segment is located, in bytes
+  Elf64_Addr p_vaddr;   // Virtual address of beginning of segment
+  Elf64_Addr p_paddr;   // Physical addr of beginning of segment (OS-specific)
+  Elf64_Xword p_filesz; // Num. of bytes in file image of segment (may be zero)
+  Elf64_Xword p_memsz;  // Num. of bytes in mem image of segment (may be zero)
+  Elf64_Xword p_align;  // Segment alignment constraint
 };
 
 // Segment types.
 enum {
-  PT_NULL    = 0, // Unused segment.
-  PT_LOAD    = 1, // Loadable segment.
-  PT_DYNAMIC = 2, // Dynamic linking information.
-  PT_INTERP  = 3, // Interpreter pathname.
-  PT_NOTE    = 4, // Auxiliary information.
-  PT_SHLIB   = 5, // Reserved.
-  PT_PHDR    = 6, // The program header table itself.
-  PT_TLS     = 7, // The thread-local storage template.
-  PT_LOOS    = 0x60000000, // Lowest operating system-specific pt entry type.
-  PT_HIOS    = 0x6fffffff, // Highest operating system-specific pt entry type.
-  PT_LOPROC  = 0x70000000, // Lowest processor-specific program hdr entry type.
-  PT_HIPROC  = 0x7fffffff, // Highest processor-specific program hdr entry type.
+  PT_NULL = 0,            // Unused segment.
+  PT_LOAD = 1,            // Loadable segment.
+  PT_DYNAMIC = 2,         // Dynamic linking information.
+  PT_INTERP = 3,          // Interpreter pathname.
+  PT_NOTE = 4,            // Auxiliary information.
+  PT_SHLIB = 5,           // Reserved.
+  PT_PHDR = 6,            // The program header table itself.
+  PT_TLS = 7,             // The thread-local storage template.
+  PT_LOOS = 0x60000000,   // Lowest operating system-specific pt entry type.
+  PT_HIOS = 0x6fffffff,   // Highest operating system-specific pt entry type.
+  PT_LOPROC = 0x70000000, // Lowest processor-specific program hdr entry type.
+  PT_HIPROC = 0x7fffffff, // Highest processor-specific program hdr entry type.
 
   // x86-64 program header types.
   // These all contain stack unwind tables.
-  PT_GNU_EH_FRAME  = 0x6474e550,
+  PT_GNU_EH_FRAME = 0x6474e550,
   PT_SUNW_EH_FRAME = 0x6474e550,
-  PT_SUNW_UNWIND   = 0x6464e550,
+  PT_SUNW_UNWIND = 0x6464e550,
+
+  PT_GNU_STACK = 0x6474e551, // Indicates stack executability.
+  PT_GNU_RELRO = 0x6474e552, // Read-only after relocation.
 
-  PT_GNU_STACK  = 0x6474e551, // Indicates stack executability.
-  PT_GNU_RELRO  = 0x6474e552, // Read-only after relocation.
+  PT_OPENBSD_RANDOMIZE = 0x65a3dbe6, // Fill with random data.
+  PT_OPENBSD_WXNEEDED = 0x65a3dbe7,  // Program does W^X violations.
+  PT_OPENBSD_BOOTDATA = 0x65a41be6,  // Section for boot arguments.
 
   // ARM program header types.
   PT_ARM_ARCHEXT = 0x70000000, // Platform architecture compatibility info
   // These all contain stack unwind tables.
-  PT_ARM_EXIDX   = 0x70000001,
-  PT_ARM_UNWIND  = 0x70000001,
+  PT_ARM_EXIDX = 0x70000001,
+  PT_ARM_UNWIND = 0x70000001,
 
   // MIPS program header types.
-  PT_MIPS_REGINFO  = 0x70000000,  // Register usage information.
-  PT_MIPS_RTPROC   = 0x70000001,  // Runtime procedure table.
-  PT_MIPS_OPTIONS  = 0x70000002,  // Options segment.
-  PT_MIPS_ABIFLAGS = 0x70000003,  // Abiflags segment.
+  PT_MIPS_REGINFO = 0x70000000,  // Register usage information.
+  PT_MIPS_RTPROC = 0x70000001,   // Runtime procedure table.
+  PT_MIPS_OPTIONS = 0x70000002,  // Options segment.
+  PT_MIPS_ABIFLAGS = 0x70000003, // Abiflags segment.
 
   // AMDGPU program header types.
   PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000,
-  PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT   = 0x60000001,
+  PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001,
   PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002,
-  PT_AMDGPU_HSA_LOAD_CODE_AGENT     = 0x60000003,
+  PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003,
 
   // WebAssembly program header types.
   PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions.
@@ -1063,267 +1059,273 @@ enum {
 
 // Segment flag bits.
 enum : unsigned {
-  PF_X        = 1,         // Execute
-  PF_W        = 2,         // Write
-  PF_R        = 4,         // Read
-  PF_MASKOS   = 0x0ff00000,// Bits for operating system-specific semantics.
+  PF_X = 1,                // Execute
+  PF_W = 2,                // Write
+  PF_R = 4,                // Read
+  PF_MASKOS = 0x0ff00000,  // Bits for operating system-specific semantics.
   PF_MASKPROC = 0xf0000000 // Bits for processor-specific semantics.
 };
 
 // Dynamic table entry for ELF32.
-struct Elf32_Dyn
-{
-  Elf32_Sword d_tag;            // Type of dynamic table entry.
-  union
-  {
-      Elf32_Word d_val;         // Integer value of entry.
-      Elf32_Addr d_ptr;         // Pointer value of entry.
+struct Elf32_Dyn {
+  Elf32_Sword d_tag; // Type of dynamic table entry.
+  union {
+    Elf32_Word d_val; // Integer value of entry.
+    Elf32_Addr d_ptr; // Pointer value of entry.
   } d_un;
 };
 
 // Dynamic table entry for ELF64.
-struct Elf64_Dyn
-{
-  Elf64_Sxword d_tag;           // Type of dynamic table entry.
-  union
-  {
-      Elf64_Xword d_val;        // Integer value of entry.
-      Elf64_Addr  d_ptr;        // Pointer value of entry.
+struct Elf64_Dyn {
+  Elf64_Sxword d_tag; // Type of dynamic table entry.
+  union {
+    Elf64_Xword d_val; // Integer value of entry.
+    Elf64_Addr d_ptr;  // Pointer value of entry.
   } d_un;
 };
 
 // Dynamic table entry tags.
 enum {
-  DT_NULL         = 0,        // Marks end of dynamic array.
-  DT_NEEDED       = 1,        // String table offset of needed library.
-  DT_PLTRELSZ     = 2,        // Size of relocation entries in PLT.
-  DT_PLTGOT       = 3,        // Address associated with linkage table.
-  DT_HASH         = 4,        // Address of symbolic hash table.
-  DT_STRTAB       = 5,        // Address of dynamic string table.
-  DT_SYMTAB       = 6,        // Address of dynamic symbol table.
-  DT_RELA         = 7,        // Address of relocation table (Rela entries).
-  DT_RELASZ       = 8,        // Size of Rela relocation table.
-  DT_RELAENT      = 9,        // Size of a Rela relocation entry.
-  DT_STRSZ        = 10,       // Total size of the string table.
-  DT_SYMENT       = 11,       // Size of a symbol table entry.
-  DT_INIT         = 12,       // Address of initialization function.
-  DT_FINI         = 13,       // Address of termination function.
-  DT_SONAME       = 14,       // String table offset of a shared objects name.
-  DT_RPATH        = 15,       // String table offset of library search path.
-  DT_SYMBOLIC     = 16,       // Changes symbol resolution algorithm.
-  DT_REL          = 17,       // Address of relocation table (Rel entries).
-  DT_RELSZ        = 18,       // Size of Rel relocation table.
-  DT_RELENT       = 19,       // Size of a Rel relocation entry.
-  DT_PLTREL       = 20,       // Type of relocation entry used for linking.
-  DT_DEBUG        = 21,       // Reserved for debugger.
-  DT_TEXTREL      = 22,       // Relocations exist for non-writable segments.
-  DT_JMPREL       = 23,       // Address of relocations associated with PLT.
-  DT_BIND_NOW     = 24,       // Process all relocations before execution.
-  DT_INIT_ARRAY   = 25,       // Pointer to array of initialization functions.
-  DT_FINI_ARRAY   = 26,       // Pointer to array of termination functions.
-  DT_INIT_ARRAYSZ = 27,       // Size of DT_INIT_ARRAY.
-  DT_FINI_ARRAYSZ = 28,       // Size of DT_FINI_ARRAY.
-  DT_RUNPATH      = 29,       // String table offset of lib search path.
-  DT_FLAGS        = 30,       // Flags.
-  DT_ENCODING     = 32,       // Values from here to DT_LOOS follow the rules
-                              // for the interpretation of the d_un union.
-
-  DT_PREINIT_ARRAY = 32,      // Pointer to array of preinit functions.
-  DT_PREINIT_ARRAYSZ = 33,    // Size of the DT_PREINIT_ARRAY array.
-
-  DT_LOOS         = 0x60000000, // Start of environment specific tags.
-  DT_HIOS         = 0x6FFFFFFF, // End of environment specific tags.
-  DT_LOPROC       = 0x70000000, // Start of processor specific tags.
-  DT_HIPROC       = 0x7FFFFFFF, // End of processor specific tags.
-
-  DT_GNU_HASH     = 0x6FFFFEF5, // Reference to the GNU hash table.
-  DT_TLSDESC_PLT  = 0x6FFFFEF6, // Location of PLT entry for TLS descriptor resolver calls.
-  DT_TLSDESC_GOT  = 0x6FFFFEF7, // Location of GOT entry used by TLS descriptor resolver PLT entry.
-  DT_RELACOUNT    = 0x6FFFFFF9, // ELF32_Rela count.
-  DT_RELCOUNT     = 0x6FFFFFFA, // ELF32_Rel count.
-
-  DT_FLAGS_1      = 0X6FFFFFFB, // Flags_1.
-  DT_VERSYM       = 0x6FFFFFF0, // The address of .gnu.version section.
-  DT_VERDEF       = 0X6FFFFFFC, // The address of the version definition table.
-  DT_VERDEFNUM    = 0X6FFFFFFD, // The number of entries in DT_VERDEF.
-  DT_VERNEED      = 0X6FFFFFFE, // The address of the version Dependency table.
-  DT_VERNEEDNUM   = 0X6FFFFFFF, // The number of entries in DT_VERNEED.
+  DT_NULL = 0,          // Marks end of dynamic array.
+  DT_NEEDED = 1,        // String table offset of needed library.
+  DT_PLTRELSZ = 2,      // Size of relocation entries in PLT.
+  DT_PLTGOT = 3,        // Address associated with linkage table.
+  DT_HASH = 4,          // Address of symbolic hash table.
+  DT_STRTAB = 5,        // Address of dynamic string table.
+  DT_SYMTAB = 6,        // Address of dynamic symbol table.
+  DT_RELA = 7,          // Address of relocation table (Rela entries).
+  DT_RELASZ = 8,        // Size of Rela relocation table.
+  DT_RELAENT = 9,       // Size of a Rela relocation entry.
+  DT_STRSZ = 10,        // Total size of the string table.
+  DT_SYMENT = 11,       // Size of a symbol table entry.
+  DT_INIT = 12,         // Address of initialization function.
+  DT_FINI = 13,         // Address of termination function.
+  DT_SONAME = 14,       // String table offset of a shared objects name.
+  DT_RPATH = 15,        // String table offset of library search path.
+  DT_SYMBOLIC = 16,     // Changes symbol resolution algorithm.
+  DT_REL = 17,          // Address of relocation table (Rel entries).
+  DT_RELSZ = 18,        // Size of Rel relocation table.
+  DT_RELENT = 19,       // Size of a Rel relocation entry.
+  DT_PLTREL = 20,       // Type of relocation entry used for linking.
+  DT_DEBUG = 21,        // Reserved for debugger.
+  DT_TEXTREL = 22,      // Relocations exist for non-writable segments.
+  DT_JMPREL = 23,       // Address of relocations associated with PLT.
+  DT_BIND_NOW = 24,     // Process all relocations before execution.
+  DT_INIT_ARRAY = 25,   // Pointer to array of initialization functions.
+  DT_FINI_ARRAY = 26,   // Pointer to array of termination functions.
+  DT_INIT_ARRAYSZ = 27, // Size of DT_INIT_ARRAY.
+  DT_FINI_ARRAYSZ = 28, // Size of DT_FINI_ARRAY.
+  DT_RUNPATH = 29,      // String table offset of lib search path.
+  DT_FLAGS = 30,        // Flags.
+  DT_ENCODING = 32,     // Values from here to DT_LOOS follow the rules
+                        // for the interpretation of the d_un union.
+
+  DT_PREINIT_ARRAY = 32,   // Pointer to array of preinit functions.
+  DT_PREINIT_ARRAYSZ = 33, // Size of the DT_PREINIT_ARRAY array.
+
+  DT_LOOS = 0x60000000,   // Start of environment specific tags.
+  DT_HIOS = 0x6FFFFFFF,   // End of environment specific tags.
+  DT_LOPROC = 0x70000000, // Start of processor specific tags.
+  DT_HIPROC = 0x7FFFFFFF, // End of processor specific tags.
+
+  DT_GNU_HASH = 0x6FFFFEF5, // Reference to the GNU hash table.
+  DT_TLSDESC_PLT =
+      0x6FFFFEF6, // Location of PLT entry for TLS descriptor resolver calls.
+  DT_TLSDESC_GOT = 0x6FFFFEF7, // Location of GOT entry used by TLS descriptor
+                               // resolver PLT entry.
+  DT_RELACOUNT = 0x6FFFFFF9,   // ELF32_Rela count.
+  DT_RELCOUNT = 0x6FFFFFFA,    // ELF32_Rel count.
+
+  DT_FLAGS_1 = 0X6FFFFFFB,    // Flags_1.
+  DT_VERSYM = 0x6FFFFFF0,     // The address of .gnu.version section.
+  DT_VERDEF = 0X6FFFFFFC,     // The address of the version definition table.
+  DT_VERDEFNUM = 0X6FFFFFFD,  // The number of entries in DT_VERDEF.
+  DT_VERNEED = 0X6FFFFFFE,    // The address of the version Dependency table.
+  DT_VERNEEDNUM = 0X6FFFFFFF, // The number of entries in DT_VERNEED.
+
+  // Hexagon specific dynamic table entries
+  DT_HEXAGON_SYMSZ = 0x70000000,
+  DT_HEXAGON_VER = 0x70000001,
+  DT_HEXAGON_PLT = 0x70000002,
 
   // Mips specific dynamic table entry tags.
-  DT_MIPS_RLD_VERSION   = 0x70000001, // 32 bit version number for runtime
-                                      // linker interface.
-  DT_MIPS_TIME_STAMP    = 0x70000002, // Time stamp.
-  DT_MIPS_ICHECKSUM     = 0x70000003, // Checksum of external strings
-                                      // and common sizes.
-  DT_MIPS_IVERSION      = 0x70000004, // Index of version string
-                                      // in string table.
-  DT_MIPS_FLAGS         = 0x70000005, // 32 bits of flags.
-  DT_MIPS_BASE_ADDRESS  = 0x70000006, // Base address of the segment.
-  DT_MIPS_MSYM          = 0x70000007, // Address of .msym section.
-  DT_MIPS_CONFLICT      = 0x70000008, // Address of .conflict section.
-  DT_MIPS_LIBLIST       = 0x70000009, // Address of .liblist section.
-  DT_MIPS_LOCAL_GOTNO   = 0x7000000a, // Number of local global offset
-                                      // table entries.
-  DT_MIPS_CONFLICTNO    = 0x7000000b, // Number of entries
-                                      // in the .conflict section.
-  DT_MIPS_LIBLISTNO     = 0x70000010, // Number of entries
-                                      // in the .liblist section.
-  DT_MIPS_SYMTABNO      = 0x70000011, // Number of entries
-                                      // in the .dynsym section.
-  DT_MIPS_UNREFEXTNO    = 0x70000012, // Index of first external dynamic symbol
-                                      // not referenced locally.
-  DT_MIPS_GOTSYM        = 0x70000013, // Index of first dynamic symbol
-                                      // in global offset table.
-  DT_MIPS_HIPAGENO      = 0x70000014, // Number of page table entries
-                                      // in global offset table.
-  DT_MIPS_RLD_MAP       = 0x70000016, // Address of run time loader map,
-                                      // used for debugging.
-  DT_MIPS_DELTA_CLASS       = 0x70000017, // Delta C++ class definition.
-  DT_MIPS_DELTA_CLASS_NO    = 0x70000018, // Number of entries
-                                          // in DT_MIPS_DELTA_CLASS.
-  DT_MIPS_DELTA_INSTANCE    = 0x70000019, // Delta C++ class instances.
-  DT_MIPS_DELTA_INSTANCE_NO = 0x7000001A, // Number of entries
-                                          // in DT_MIPS_DELTA_INSTANCE.
-  DT_MIPS_DELTA_RELOC       = 0x7000001B, // Delta relocations.
-  DT_MIPS_DELTA_RELOC_NO    = 0x7000001C, // Number of entries
-                                          // in DT_MIPS_DELTA_RELOC.
-  DT_MIPS_DELTA_SYM         = 0x7000001D, // Delta symbols that Delta
-                                          // relocations refer to.
-  DT_MIPS_DELTA_SYM_NO      = 0x7000001E, // Number of entries
-                                          // in DT_MIPS_DELTA_SYM.
-  DT_MIPS_DELTA_CLASSSYM    = 0x70000020, // Delta symbols that hold
-                                          // class declarations.
-  DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021, // Number of entries
-                                          // in DT_MIPS_DELTA_CLASSSYM.
-  DT_MIPS_CXX_FLAGS         = 0x70000022, // Flags indicating information
-                                          // about C++ flavor.
-  DT_MIPS_PIXIE_INIT        = 0x70000023, // Pixie information.
-  DT_MIPS_SYMBOL_LIB        = 0x70000024, // Address of .MIPS.symlib
-  DT_MIPS_LOCALPAGE_GOTIDX  = 0x70000025, // The GOT index of the first PTE
-                                          // for a segment
-  DT_MIPS_LOCAL_GOTIDX      = 0x70000026, // The GOT index of the first PTE
-                                          // for a local symbol
-  DT_MIPS_HIDDEN_GOTIDX     = 0x70000027, // The GOT index of the first PTE
-                                          // for a hidden symbol
-  DT_MIPS_PROTECTED_GOTIDX  = 0x70000028, // The GOT index of the first PTE
-                                          // for a protected symbol
-  DT_MIPS_OPTIONS           = 0x70000029, // Address of `.MIPS.options'.
-  DT_MIPS_INTERFACE         = 0x7000002A, // Address of `.interface'.
-  DT_MIPS_DYNSTR_ALIGN      = 0x7000002B, // Unknown.
-  DT_MIPS_INTERFACE_SIZE    = 0x7000002C, // Size of the .interface section.
+  DT_MIPS_RLD_VERSION = 0x70000001,    // 32 bit version number for runtime
+                                       // linker interface.
+  DT_MIPS_TIME_STAMP = 0x70000002,     // Time stamp.
+  DT_MIPS_ICHECKSUM = 0x70000003,      // Checksum of external strings
+                                       // and common sizes.
+  DT_MIPS_IVERSION = 0x70000004,       // Index of version string
+                                       // in string table.
+  DT_MIPS_FLAGS = 0x70000005,          // 32 bits of flags.
+  DT_MIPS_BASE_ADDRESS = 0x70000006,   // Base address of the segment.
+  DT_MIPS_MSYM = 0x70000007,           // Address of .msym section.
+  DT_MIPS_CONFLICT = 0x70000008,       // Address of .conflict section.
+  DT_MIPS_LIBLIST = 0x70000009,        // Address of .liblist section.
+  DT_MIPS_LOCAL_GOTNO = 0x7000000a,    // Number of local global offset
+                                       // table entries.
+  DT_MIPS_CONFLICTNO = 0x7000000b,     // Number of entries
+                                       // in the .conflict section.
+  DT_MIPS_LIBLISTNO = 0x70000010,      // Number of entries
+                                       // in the .liblist section.
+  DT_MIPS_SYMTABNO = 0x70000011,       // Number of entries
+                                       // in the .dynsym section.
+  DT_MIPS_UNREFEXTNO = 0x70000012,     // Index of first external dynamic symbol
+                                       // not referenced locally.
+  DT_MIPS_GOTSYM = 0x70000013,         // Index of first dynamic symbol
+                                       // in global offset table.
+  DT_MIPS_HIPAGENO = 0x70000014,       // Number of page table entries
+                                       // in global offset table.
+  DT_MIPS_RLD_MAP = 0x70000016,        // Address of run time loader map,
+                                       // used for debugging.
+  DT_MIPS_DELTA_CLASS = 0x70000017,    // Delta C++ class definition.
+  DT_MIPS_DELTA_CLASS_NO = 0x70000018, // Number of entries
+                                       // in DT_MIPS_DELTA_CLASS.
+  DT_MIPS_DELTA_INSTANCE = 0x70000019, // Delta C++ class instances.
+  DT_MIPS_DELTA_INSTANCE_NO = 0x7000001A,     // Number of entries
+                                              // in DT_MIPS_DELTA_INSTANCE.
+  DT_MIPS_DELTA_RELOC = 0x7000001B,           // Delta relocations.
+  DT_MIPS_DELTA_RELOC_NO = 0x7000001C,        // Number of entries
+                                              // in DT_MIPS_DELTA_RELOC.
+  DT_MIPS_DELTA_SYM = 0x7000001D,             // Delta symbols that Delta
+                                              // relocations refer to.
+  DT_MIPS_DELTA_SYM_NO = 0x7000001E,          // Number of entries
+                                              // in DT_MIPS_DELTA_SYM.
+  DT_MIPS_DELTA_CLASSSYM = 0x70000020,        // Delta symbols that hold
+                                              // class declarations.
+  DT_MIPS_DELTA_CLASSSYM_NO = 0x70000021,     // Number of entries
+                                              // in DT_MIPS_DELTA_CLASSSYM.
+  DT_MIPS_CXX_FLAGS = 0x70000022,             // Flags indicating information
+                                              // about C++ flavor.
+  DT_MIPS_PIXIE_INIT = 0x70000023,            // Pixie information.
+  DT_MIPS_SYMBOL_LIB = 0x70000024,            // Address of .MIPS.symlib
+  DT_MIPS_LOCALPAGE_GOTIDX = 0x70000025,      // The GOT index of the first PTE
+                                              // for a segment
+  DT_MIPS_LOCAL_GOTIDX = 0x70000026,          // The GOT index of the first PTE
+                                              // for a local symbol
+  DT_MIPS_HIDDEN_GOTIDX = 0x70000027,         // The GOT index of the first PTE
+                                              // for a hidden symbol
+  DT_MIPS_PROTECTED_GOTIDX = 0x70000028,      // The GOT index of the first PTE
+                                              // for a protected symbol
+  DT_MIPS_OPTIONS = 0x70000029,               // Address of `.MIPS.options'.
+  DT_MIPS_INTERFACE = 0x7000002A,             // Address of `.interface'.
+  DT_MIPS_DYNSTR_ALIGN = 0x7000002B,          // Unknown.
+  DT_MIPS_INTERFACE_SIZE = 0x7000002C,        // Size of the .interface section.
   DT_MIPS_RLD_TEXT_RESOLVE_ADDR = 0x7000002D, // Size of rld_text_resolve
                                               // function stored in the GOT.
-  DT_MIPS_PERF_SUFFIX       = 0x7000002E, // Default suffix of DSO to be added
-                                          // by rld on dlopen() calls.
-  DT_MIPS_COMPACT_SIZE      = 0x7000002F, // Size of compact relocation
-                                          // section (O32).
-  DT_MIPS_GP_VALUE          = 0x70000030, // GP value for auxiliary GOTs.
-  DT_MIPS_AUX_DYNAMIC       = 0x70000031, // Address of auxiliary .dynamic.
-  DT_MIPS_PLTGOT            = 0x70000032, // Address of the base of the PLTGOT.
-  DT_MIPS_RWPLT             = 0x70000034, // Points to the base
-                                          // of a writable PLT.
-  DT_MIPS_RLD_MAP_REL       = 0x70000035, // Relative offset of run time loader
-                                          // map, used for debugging.
+  DT_MIPS_PERF_SUFFIX = 0x7000002E,  // Default suffix of DSO to be added
+                                     // by rld on dlopen() calls.
+  DT_MIPS_COMPACT_SIZE = 0x7000002F, // Size of compact relocation
+                                     // section (O32).
+  DT_MIPS_GP_VALUE = 0x70000030,     // GP value for auxiliary GOTs.
+  DT_MIPS_AUX_DYNAMIC = 0x70000031,  // Address of auxiliary .dynamic.
+  DT_MIPS_PLTGOT = 0x70000032,       // Address of the base of the PLTGOT.
+  DT_MIPS_RWPLT = 0x70000034,        // Points to the base
+                                     // of a writable PLT.
+  DT_MIPS_RLD_MAP_REL = 0x70000035,  // Relative offset of run time loader
+                                     // map, used for debugging.
 
   // Sun machine-independent extensions.
-  DT_AUXILIARY              = 0x7FFFFFFD, // Shared object to load before self
-  DT_FILTER                 = 0x7FFFFFFF  // Shared object to get values from
+  DT_AUXILIARY = 0x7FFFFFFD, // Shared object to load before self
+  DT_FILTER = 0x7FFFFFFF     // Shared object to get values from
 };
 
 // DT_FLAGS values.
 enum {
-  DF_ORIGIN     = 0x01, // The object may reference $ORIGIN.
-  DF_SYMBOLIC   = 0x02, // Search the shared lib before searching the exe.
-  DF_TEXTREL    = 0x04, // Relocations may modify a non-writable segment.
-  DF_BIND_NOW   = 0x08, // Process all relocations on load.
-  DF_STATIC_TLS = 0x10  // Reject attempts to load dynamically.
+  DF_ORIGIN = 0x01,    // The object may reference $ORIGIN.
+  DF_SYMBOLIC = 0x02,  // Search the shared lib before searching the exe.
+  DF_TEXTREL = 0x04,   // Relocations may modify a non-writable segment.
+  DF_BIND_NOW = 0x08,  // Process all relocations on load.
+  DF_STATIC_TLS = 0x10 // Reject attempts to load dynamically.
 };
 
 // State flags selectable in the `d_un.d_val' element of the DT_FLAGS_1 entry.
 enum {
-  DF_1_NOW        = 0x00000001, // Set RTLD_NOW for this object.
-  DF_1_GLOBAL     = 0x00000002, // Set RTLD_GLOBAL for this object.
-  DF_1_GROUP      = 0x00000004, // Set RTLD_GROUP for this object.
-  DF_1_NODELETE   = 0x00000008, // Set RTLD_NODELETE for this object.
-  DF_1_LOADFLTR   = 0x00000010, // Trigger filtee loading at runtime.
-  DF_1_INITFIRST  = 0x00000020, // Set RTLD_INITFIRST for this object.
-  DF_1_NOOPEN     = 0x00000040, // Set RTLD_NOOPEN for this object.
-  DF_1_ORIGIN     = 0x00000080, // $ORIGIN must be handled.
-  DF_1_DIRECT     = 0x00000100, // Direct binding enabled.
-  DF_1_TRANS      = 0x00000200,
-  DF_1_INTERPOSE  = 0x00000400, // Object is used to interpose.
-  DF_1_NODEFLIB   = 0x00000800, // Ignore default lib search path.
-  DF_1_NODUMP     = 0x00001000, // Object can't be dldump'ed.
-  DF_1_CONFALT    = 0x00002000, // Configuration alternative created.
-  DF_1_ENDFILTEE  = 0x00004000, // Filtee terminates filters search.
+  DF_1_NOW = 0x00000001,       // Set RTLD_NOW for this object.
+  DF_1_GLOBAL = 0x00000002,    // Set RTLD_GLOBAL for this object.
+  DF_1_GROUP = 0x00000004,     // Set RTLD_GROUP for this object.
+  DF_1_NODELETE = 0x00000008,  // Set RTLD_NODELETE for this object.
+  DF_1_LOADFLTR = 0x00000010,  // Trigger filtee loading at runtime.
+  DF_1_INITFIRST = 0x00000020, // Set RTLD_INITFIRST for this object.
+  DF_1_NOOPEN = 0x00000040,    // Set RTLD_NOOPEN for this object.
+  DF_1_ORIGIN = 0x00000080,    // $ORIGIN must be handled.
+  DF_1_DIRECT = 0x00000100,    // Direct binding enabled.
+  DF_1_TRANS = 0x00000200,
+  DF_1_INTERPOSE = 0x00000400,  // Object is used to interpose.
+  DF_1_NODEFLIB = 0x00000800,   // Ignore default lib search path.
+  DF_1_NODUMP = 0x00001000,     // Object can't be dldump'ed.
+  DF_1_CONFALT = 0x00002000,    // Configuration alternative created.
+  DF_1_ENDFILTEE = 0x00004000,  // Filtee terminates filters search.
   DF_1_DISPRELDNE = 0x00008000, // Disp reloc applied at build time.
   DF_1_DISPRELPND = 0x00010000, // Disp reloc applied at run-time.
-  DF_1_NODIRECT   = 0x00020000, // Object has no-direct binding.
-  DF_1_IGNMULDEF  = 0x00040000,
-  DF_1_NOKSYMS    = 0x00080000,
-  DF_1_NOHDR      = 0x00100000,
-  DF_1_EDITED     = 0x00200000, // Object is modified after built.
-  DF_1_NORELOC    = 0x00400000,
+  DF_1_NODIRECT = 0x00020000,   // Object has no-direct binding.
+  DF_1_IGNMULDEF = 0x00040000,
+  DF_1_NOKSYMS = 0x00080000,
+  DF_1_NOHDR = 0x00100000,
+  DF_1_EDITED = 0x00200000, // Object is modified after built.
+  DF_1_NORELOC = 0x00400000,
   DF_1_SYMINTPOSE = 0x00800000, // Object has individual interposers.
-  DF_1_GLOBAUDIT  = 0x01000000, // Global auditing required.
-  DF_1_SINGLETON  = 0x02000000  // Singleton symbols are used.
+  DF_1_GLOBAUDIT = 0x01000000,  // Global auditing required.
+  DF_1_SINGLETON = 0x02000000   // Singleton symbols are used.
 };
 
 // DT_MIPS_FLAGS values.
 enum {
-  RHF_NONE                    = 0x00000000, // No flags.
-  RHF_QUICKSTART              = 0x00000001, // Uses shortcut pointers.
-  RHF_NOTPOT                  = 0x00000002, // Hash size is not a power of two.
-  RHS_NO_LIBRARY_REPLACEMENT  = 0x00000004, // Ignore LD_LIBRARY_PATH.
-  RHF_NO_MOVE                 = 0x00000008, // DSO address may not be relocated.
-  RHF_SGI_ONLY                = 0x00000010, // SGI specific features.
-  RHF_GUARANTEE_INIT          = 0x00000020, // Guarantee that .init will finish
-                                            // executing before any non-init
-                                            // code in DSO is called.
-  RHF_DELTA_C_PLUS_PLUS       = 0x00000040, // Contains Delta C++ code.
-  RHF_GUARANTEE_START_INIT    = 0x00000080, // Guarantee that .init will start
-                                            // executing before any non-init
-                                            // code in DSO is called.
-  RHF_PIXIE                   = 0x00000100, // Generated by pixie.
-  RHF_DEFAULT_DELAY_LOAD      = 0x00000200, // Delay-load DSO by default.
-  RHF_REQUICKSTART            = 0x00000400, // Object may be requickstarted
-  RHF_REQUICKSTARTED          = 0x00000800, // Object has been requickstarted
-  RHF_CORD                    = 0x00001000, // Generated by cord.
-  RHF_NO_UNRES_UNDEF          = 0x00002000, // Object contains no unresolved
-                                            // undef symbols.
-  RHF_RLD_ORDER_SAFE          = 0x00004000  // Symbol table is in a safe order.
+  RHF_NONE = 0x00000000,                   // No flags.
+  RHF_QUICKSTART = 0x00000001,             // Uses shortcut pointers.
+  RHF_NOTPOT = 0x00000002,                 // Hash size is not a power of two.
+  RHS_NO_LIBRARY_REPLACEMENT = 0x00000004, // Ignore LD_LIBRARY_PATH.
+  RHF_NO_MOVE = 0x00000008,                // DSO address may not be relocated.
+  RHF_SGI_ONLY = 0x00000010,               // SGI specific features.
+  RHF_GUARANTEE_INIT = 0x00000020,         // Guarantee that .init will finish
+                                           // executing before any non-init
+                                           // code in DSO is called.
+  RHF_DELTA_C_PLUS_PLUS = 0x00000040,      // Contains Delta C++ code.
+  RHF_GUARANTEE_START_INIT = 0x00000080,   // Guarantee that .init will start
+                                           // executing before any non-init
+                                           // code in DSO is called.
+  RHF_PIXIE = 0x00000100,                  // Generated by pixie.
+  RHF_DEFAULT_DELAY_LOAD = 0x00000200,     // Delay-load DSO by default.
+  RHF_REQUICKSTART = 0x00000400,           // Object may be requickstarted
+  RHF_REQUICKSTARTED = 0x00000800,         // Object has been requickstarted
+  RHF_CORD = 0x00001000,                   // Generated by cord.
+  RHF_NO_UNRES_UNDEF = 0x00002000,         // Object contains no unresolved
+                                           // undef symbols.
+  RHF_RLD_ORDER_SAFE = 0x00004000          // Symbol table is in a safe order.
 };
 
 // ElfXX_VerDef structure version (GNU versioning)
-enum {
-  VER_DEF_NONE    = 0,
-  VER_DEF_CURRENT = 1
-};
+enum { VER_DEF_NONE = 0, VER_DEF_CURRENT = 1 };
 
 // VerDef Flags (ElfXX_VerDef::vd_flags)
-enum {
-  VER_FLG_BASE = 0x1,
-  VER_FLG_WEAK = 0x2,
-  VER_FLG_INFO = 0x4
-};
+enum { VER_FLG_BASE = 0x1, VER_FLG_WEAK = 0x2, VER_FLG_INFO = 0x4 };
 
 // Special constants for the version table. (SHT_GNU_versym/.gnu.version)
 enum {
-  VER_NDX_LOCAL  = 0,      // Unversioned local symbol
+  VER_NDX_LOCAL = 0,       // Unversioned local symbol
   VER_NDX_GLOBAL = 1,      // Unversioned global symbol
   VERSYM_VERSION = 0x7fff, // Version Index mask
-  VERSYM_HIDDEN  = 0x8000  // Hidden bit (non-default version)
+  VERSYM_HIDDEN = 0x8000   // Hidden bit (non-default version)
 };
 
 // ElfXX_VerNeed structure version (GNU versioning)
+enum { VER_NEED_NONE = 0, VER_NEED_CURRENT = 1 };
+
+// SHT_NOTE section types
 enum {
-  VER_NEED_NONE = 0,
-  VER_NEED_CURRENT = 1
+  NT_GNU_ABI_TAG = 1,
+  NT_GNU_HWCAP = 2,
+  NT_GNU_BUILD_ID = 3,
+  NT_GNU_GOLD_VERSION = 4,
 };
 
-// SHT_NOTE section types
 enum {
-  NT_GNU_BUILD_ID = 3
+  GNU_ABI_TAG_LINUX = 0,
+  GNU_ABI_TAG_HURD = 1,
+  GNU_ABI_TAG_SOLARIS = 2,
+  GNU_ABI_TAG_FREEBSD = 3,
+  GNU_ABI_TAG_NETBSD = 4,
+  GNU_ABI_TAG_SYLLABLE = 5,
+  GNU_ABI_TAG_NACL = 6,
 };
 
 // Compressed section header for ELF32.
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/AArch64.def b/contrib/llvm/include/llvm/Support/ELFRelocs/AArch64.def
index aa0c560f3e50..c21df07d2dbc 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/AArch64.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/AArch64.def
@@ -3,145 +3,199 @@
 #error "ELF_RELOC must be defined"
 #endif
 
-// ABI release 1.0
-ELF_RELOC(R_AARCH64_NONE,                             0)
-
-ELF_RELOC(R_AARCH64_ABS64,                        0x101)
-ELF_RELOC(R_AARCH64_ABS32,                        0x102)
-ELF_RELOC(R_AARCH64_ABS16,                        0x103)
-ELF_RELOC(R_AARCH64_PREL64,                       0x104)
-ELF_RELOC(R_AARCH64_PREL32,                       0x105)
-ELF_RELOC(R_AARCH64_PREL16,                       0x106)
-
-ELF_RELOC(R_AARCH64_MOVW_UABS_G0,                 0x107)
-ELF_RELOC(R_AARCH64_MOVW_UABS_G0_NC,              0x108)
-ELF_RELOC(R_AARCH64_MOVW_UABS_G1,                 0x109)
-ELF_RELOC(R_AARCH64_MOVW_UABS_G1_NC,              0x10a)
-ELF_RELOC(R_AARCH64_MOVW_UABS_G2,                 0x10b)
-ELF_RELOC(R_AARCH64_MOVW_UABS_G2_NC,              0x10c)
-ELF_RELOC(R_AARCH64_MOVW_UABS_G3,                 0x10d)
-ELF_RELOC(R_AARCH64_MOVW_SABS_G0,                 0x10e)
-ELF_RELOC(R_AARCH64_MOVW_SABS_G1,                 0x10f)
-ELF_RELOC(R_AARCH64_MOVW_SABS_G2,                 0x110)
-
-ELF_RELOC(R_AARCH64_LD_PREL_LO19,                 0x111)
-ELF_RELOC(R_AARCH64_ADR_PREL_LO21,                0x112)
-ELF_RELOC(R_AARCH64_ADR_PREL_PG_HI21,             0x113)
-ELF_RELOC(R_AARCH64_ADR_PREL_PG_HI21_NC,          0x114)
-ELF_RELOC(R_AARCH64_ADD_ABS_LO12_NC,              0x115)
-ELF_RELOC(R_AARCH64_LDST8_ABS_LO12_NC,            0x116)
-
-ELF_RELOC(R_AARCH64_TSTBR14,                      0x117)
-ELF_RELOC(R_AARCH64_CONDBR19,                     0x118)
-ELF_RELOC(R_AARCH64_JUMP26,                       0x11a)
-ELF_RELOC(R_AARCH64_CALL26,                       0x11b)
-
-ELF_RELOC(R_AARCH64_LDST16_ABS_LO12_NC,           0x11c)
-ELF_RELOC(R_AARCH64_LDST32_ABS_LO12_NC,           0x11d)
-ELF_RELOC(R_AARCH64_LDST64_ABS_LO12_NC,           0x11e)
-
-ELF_RELOC(R_AARCH64_MOVW_PREL_G0,                 0x11f)
-ELF_RELOC(R_AARCH64_MOVW_PREL_G0_NC,              0x120)
-ELF_RELOC(R_AARCH64_MOVW_PREL_G1,                 0x121)
-ELF_RELOC(R_AARCH64_MOVW_PREL_G1_NC,              0x122)
-ELF_RELOC(R_AARCH64_MOVW_PREL_G2,                 0x123)
-ELF_RELOC(R_AARCH64_MOVW_PREL_G2_NC,              0x124)
-ELF_RELOC(R_AARCH64_MOVW_PREL_G3,                 0x125)
-
-ELF_RELOC(R_AARCH64_LDST128_ABS_LO12_NC,          0x12b)
-
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G0,               0x12c)
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G0_NC,            0x12d)
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G1,               0x12e)
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G1_NC,            0x12f)
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G2,               0x130)
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G2_NC,            0x131)
-ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G3,               0x132)
-
-ELF_RELOC(R_AARCH64_GOTREL64,                     0x133)
-ELF_RELOC(R_AARCH64_GOTREL32,                     0x134)
-
-ELF_RELOC(R_AARCH64_GOT_LD_PREL19,                0x135)
-ELF_RELOC(R_AARCH64_LD64_GOTOFF_LO15,             0x136)
-ELF_RELOC(R_AARCH64_ADR_GOT_PAGE,                 0x137)
-ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC,             0x138)
-ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15,            0x139)
-
-ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21,             0x200)
-ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21,             0x201)
-ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC,            0x202)
-ELF_RELOC(R_AARCH64_TLSGD_MOVW_G1,                0x203)
-ELF_RELOC(R_AARCH64_TLSGD_MOVW_G0_NC,             0x204)
-
-ELF_RELOC(R_AARCH64_TLSLD_ADR_PREL21,             0x205)
-ELF_RELOC(R_AARCH64_TLSLD_ADR_PAGE21,             0x206)
-ELF_RELOC(R_AARCH64_TLSLD_ADD_LO12_NC,            0x207)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_G1,                0x208)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_G0_NC,             0x209)
-ELF_RELOC(R_AARCH64_TLSLD_LD_PREL19,              0x20a)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G2,         0x20b)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G1,         0x20c)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC,      0x20d)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G0,         0x20e)
-ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC,      0x20f)
-ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_HI12,        0x210)
-ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_LO12,        0x211)
-ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC,     0x212)
-ELF_RELOC(R_AARCH64_TLSLD_LDST8_DTPREL_LO12,      0x213)
-ELF_RELOC(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC,   0x214)
-ELF_RELOC(R_AARCH64_TLSLD_LDST16_DTPREL_LO12,     0x215)
-ELF_RELOC(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC,  0x216)
-ELF_RELOC(R_AARCH64_TLSLD_LDST32_DTPREL_LO12,     0x217)
-ELF_RELOC(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC,  0x218)
-ELF_RELOC(R_AARCH64_TLSLD_LDST64_DTPREL_LO12,     0x219)
-ELF_RELOC(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC,  0x21a)
-
-ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1,       0x21b)
-ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC,    0x21c)
-ELF_RELOC(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21,    0x21d)
-ELF_RELOC(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC,  0x21e)
-ELF_RELOC(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19,     0x21f)
-
-ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G2,          0x220)
-ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1,          0x221)
-ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC,       0x222)
-ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G0,          0x223)
-ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC,       0x224)
-ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_HI12,         0x225)
-ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_LO12,         0x226)
-ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC,      0x227)
-ELF_RELOC(R_AARCH64_TLSLE_LDST8_TPREL_LO12,       0x228)
-ELF_RELOC(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC,    0x229)
-ELF_RELOC(R_AARCH64_TLSLE_LDST16_TPREL_LO12,      0x22a)
-ELF_RELOC(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC,   0x22b)
-ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12,      0x22c)
-ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC,   0x22d)
-ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12,      0x22e)
-ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC,   0x22f)
-
-ELF_RELOC(R_AARCH64_TLSDESC_LD_PREL19,            0x230)
-ELF_RELOC(R_AARCH64_TLSDESC_ADR_PREL21,           0x231)
-ELF_RELOC(R_AARCH64_TLSDESC_ADR_PAGE21,           0x232)
-ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12_NC,         0x233)
-ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12_NC,          0x234)
-ELF_RELOC(R_AARCH64_TLSDESC_OFF_G1,               0x235)
-ELF_RELOC(R_AARCH64_TLSDESC_OFF_G0_NC,            0x236)
-ELF_RELOC(R_AARCH64_TLSDESC_LDR,                  0x237)
-ELF_RELOC(R_AARCH64_TLSDESC_ADD,                  0x238)
-ELF_RELOC(R_AARCH64_TLSDESC_CALL,                 0x239)
-
-ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12,     0x23a)
-ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC,  0x23b)
-
-ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12,    0x23c)
-ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC, 0x23d)
-
-ELF_RELOC(R_AARCH64_COPY,                         0x400)
-ELF_RELOC(R_AARCH64_GLOB_DAT,                     0x401)
-ELF_RELOC(R_AARCH64_JUMP_SLOT,                    0x402)
-ELF_RELOC(R_AARCH64_RELATIVE,                     0x403)
-ELF_RELOC(R_AARCH64_TLS_DTPREL64,                 0x404)
-ELF_RELOC(R_AARCH64_TLS_DTPMOD64,                 0x405)
-ELF_RELOC(R_AARCH64_TLS_TPREL64,                  0x406)
-ELF_RELOC(R_AARCH64_TLSDESC,                      0x407)
-ELF_RELOC(R_AARCH64_IRELATIVE,                    0x408)
+// Based on ABI release 1.1-beta, dated 6 November 2013. NB: The cover page of
+// this document, IHI0056C_beta_aaelf64.pdf, on infocenter.arm.com, still
+// labels this as release 1.0.
+ELF_RELOC(R_AARCH64_NONE,                                0)
+ELF_RELOC(R_AARCH64_ABS64,                           0x101)
+ELF_RELOC(R_AARCH64_ABS32,                           0x102)
+ELF_RELOC(R_AARCH64_ABS16,                           0x103)
+ELF_RELOC(R_AARCH64_PREL64,                          0x104)
+ELF_RELOC(R_AARCH64_PREL32,                          0x105)
+ELF_RELOC(R_AARCH64_PREL16,                          0x106)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G0,                    0x107)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G0_NC,                 0x108)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G1,                    0x109)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G1_NC,                 0x10a)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G2,                    0x10b)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G2_NC,                 0x10c)
+ELF_RELOC(R_AARCH64_MOVW_UABS_G3,                    0x10d)
+ELF_RELOC(R_AARCH64_MOVW_SABS_G0,                    0x10e)
+ELF_RELOC(R_AARCH64_MOVW_SABS_G1,                    0x10f)
+ELF_RELOC(R_AARCH64_MOVW_SABS_G2,                    0x110)
+ELF_RELOC(R_AARCH64_LD_PREL_LO19,                    0x111)
+ELF_RELOC(R_AARCH64_ADR_PREL_LO21,                   0x112)
+ELF_RELOC(R_AARCH64_ADR_PREL_PG_HI21,                0x113)
+ELF_RELOC(R_AARCH64_ADR_PREL_PG_HI21_NC,             0x114)
+ELF_RELOC(R_AARCH64_ADD_ABS_LO12_NC,                 0x115)
+ELF_RELOC(R_AARCH64_LDST8_ABS_LO12_NC,               0x116)
+ELF_RELOC(R_AARCH64_TSTBR14,                         0x117)
+ELF_RELOC(R_AARCH64_CONDBR19,                        0x118)
+ELF_RELOC(R_AARCH64_JUMP26,                          0x11a)
+ELF_RELOC(R_AARCH64_CALL26,                          0x11b)
+ELF_RELOC(R_AARCH64_LDST16_ABS_LO12_NC,              0x11c)
+ELF_RELOC(R_AARCH64_LDST32_ABS_LO12_NC,              0x11d)
+ELF_RELOC(R_AARCH64_LDST64_ABS_LO12_NC,              0x11e)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G0,                    0x11f)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G0_NC,                 0x120)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G1,                    0x121)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G1_NC,                 0x122)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G2,                    0x123)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G2_NC,                 0x124)
+ELF_RELOC(R_AARCH64_MOVW_PREL_G3,                    0x125)
+ELF_RELOC(R_AARCH64_LDST128_ABS_LO12_NC,             0x12b)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G0,                  0x12c)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G0_NC,               0x12d)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G1,                  0x12e)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G1_NC,               0x12f)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G2,                  0x130)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G2_NC,               0x131)
+ELF_RELOC(R_AARCH64_MOVW_GOTOFF_G3,                  0x132)
+ELF_RELOC(R_AARCH64_GOTREL64,                        0x133)
+ELF_RELOC(R_AARCH64_GOTREL32,                        0x134)
+ELF_RELOC(R_AARCH64_GOT_LD_PREL19,                   0x135)
+ELF_RELOC(R_AARCH64_LD64_GOTOFF_LO15,                0x136)
+ELF_RELOC(R_AARCH64_ADR_GOT_PAGE,                    0x137)
+ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC,                0x138)
+ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15,               0x139)
+ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21,                0x200)
+ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21,                0x201)
+ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC,               0x202)
+ELF_RELOC(R_AARCH64_TLSGD_MOVW_G1,                   0x203)
+ELF_RELOC(R_AARCH64_TLSGD_MOVW_G0_NC,                0x204)
+ELF_RELOC(R_AARCH64_TLSLD_ADR_PREL21,                0x205)
+ELF_RELOC(R_AARCH64_TLSLD_ADR_PAGE21,                0x206)
+ELF_RELOC(R_AARCH64_TLSLD_ADD_LO12_NC,               0x207)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_G1,                   0x208)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_G0_NC,                0x209)
+ELF_RELOC(R_AARCH64_TLSLD_LD_PREL19,                 0x20a)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G2,            0x20b)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G1,            0x20c)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC,         0x20d)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G0,            0x20e)
+ELF_RELOC(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC,         0x20f)
+ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_HI12,           0x210)
+ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_LO12,           0x211)
+ELF_RELOC(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC,        0x212)
+ELF_RELOC(R_AARCH64_TLSLD_LDST8_DTPREL_LO12,         0x213)
+ELF_RELOC(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC,      0x214)
+ELF_RELOC(R_AARCH64_TLSLD_LDST16_DTPREL_LO12,        0x215)
+ELF_RELOC(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC,     0x216)
+ELF_RELOC(R_AARCH64_TLSLD_LDST32_DTPREL_LO12,        0x217)
+ELF_RELOC(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC,     0x218)
+ELF_RELOC(R_AARCH64_TLSLD_LDST64_DTPREL_LO12,        0x219)
+ELF_RELOC(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC,     0x21a)
+ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1,          0x21b)
+ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC,       0x21c)
+ELF_RELOC(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21,       0x21d)
+ELF_RELOC(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC,     0x21e)
+ELF_RELOC(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19,        0x21f)
+ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G2,             0x220)
+ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1,             0x221)
+ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC,          0x222)
+ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G0,             0x223)
+ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC,          0x224)
+ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_HI12,            0x225)
+ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_LO12,            0x226)
+ELF_RELOC(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC,         0x227)
+ELF_RELOC(R_AARCH64_TLSLE_LDST8_TPREL_LO12,          0x228)
+ELF_RELOC(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC,       0x229)
+ELF_RELOC(R_AARCH64_TLSLE_LDST16_TPREL_LO12,         0x22a)
+ELF_RELOC(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC,      0x22b)
+ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12,         0x22c)
+ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC,      0x22d)
+ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12,         0x22e)
+ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC,      0x22f)
+ELF_RELOC(R_AARCH64_TLSDESC_LD_PREL19,               0x230)
+ELF_RELOC(R_AARCH64_TLSDESC_ADR_PREL21,              0x231)
+ELF_RELOC(R_AARCH64_TLSDESC_ADR_PAGE21,              0x232)
+ELF_RELOC(R_AARCH64_TLSDESC_LD64_LO12_NC,            0x233)
+ELF_RELOC(R_AARCH64_TLSDESC_ADD_LO12_NC,             0x234)
+ELF_RELOC(R_AARCH64_TLSDESC_OFF_G1,                  0x235)
+ELF_RELOC(R_AARCH64_TLSDESC_OFF_G0_NC,               0x236)
+ELF_RELOC(R_AARCH64_TLSDESC_LDR,                     0x237)
+ELF_RELOC(R_AARCH64_TLSDESC_ADD,                     0x238)
+ELF_RELOC(R_AARCH64_TLSDESC_CALL,                    0x239)
+ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12,        0x23a)
+ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC,     0x23b)
+ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12,       0x23c)
+ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC,    0x23d)
+ELF_RELOC(R_AARCH64_COPY,                            0x400)
+ELF_RELOC(R_AARCH64_GLOB_DAT,                        0x401)
+ELF_RELOC(R_AARCH64_JUMP_SLOT,                       0x402)
+ELF_RELOC(R_AARCH64_RELATIVE,                        0x403)
+ELF_RELOC(R_AARCH64_TLS_DTPREL64,                    0x404)
+ELF_RELOC(R_AARCH64_TLS_DTPMOD64,                    0x405)
+ELF_RELOC(R_AARCH64_TLS_TPREL64,                     0x406)
+ELF_RELOC(R_AARCH64_TLSDESC,                         0x407)
+ELF_RELOC(R_AARCH64_IRELATIVE,                       0x408)
+
+// ELF_RELOC(R_AARCH64_P32_NONE,                         0)
+ELF_RELOC(R_AARCH64_P32_ABS32,                       0x001)
+ELF_RELOC(R_AARCH64_P32_ABS16,                       0x002)
+ELF_RELOC(R_AARCH64_P32_PREL32,                      0x003)
+ELF_RELOC(R_AARCH64_P32_PREL16,                      0x004)
+ELF_RELOC(R_AARCH64_P32_MOVW_UABS_G0,                0x005)
+ELF_RELOC(R_AARCH64_P32_MOVW_UABS_G0_NC,             0x006)
+ELF_RELOC(R_AARCH64_P32_MOVW_UABS_G1,                0x007)
+ELF_RELOC(R_AARCH64_P32_MOVW_SABS_G0,                0x008)
+ELF_RELOC(R_AARCH64_P32_LD_PREL_LO19,                0x009)
+ELF_RELOC(R_AARCH64_P32_ADR_PREL_LO21,               0x00a)
+ELF_RELOC(R_AARCH64_P32_ADR_PREL_PG_HI21,            0x00b)
+ELF_RELOC(R_AARCH64_P32_ADD_ABS_LO12_NC,             0x00c)
+ELF_RELOC(R_AARCH64_P32_LDST8_ABS_LO12_NC,           0x00d)
+ELF_RELOC(R_AARCH64_P32_TSTBR14,                     0x012)
+ELF_RELOC(R_AARCH64_P32_CONDBR19,                    0x013)
+ELF_RELOC(R_AARCH64_P32_JUMP26,                      0x014)
+ELF_RELOC(R_AARCH64_P32_CALL26,                      0x015)
+ELF_RELOC(R_AARCH64_P32_LDST16_ABS_LO12_NC,          0x00e)
+ELF_RELOC(R_AARCH64_P32_LDST32_ABS_LO12_NC,          0x00f)
+ELF_RELOC(R_AARCH64_P32_LDST64_ABS_LO12_NC,          0x010)
+ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0,                0x016)
+ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G0_NC,             0x017)
+ELF_RELOC(R_AARCH64_P32_MOVW_PREL_G1,                0x018)
+ELF_RELOC(R_AARCH64_P32_LDST128_ABS_LO12_NC,         0x011)
+ELF_RELOC(R_AARCH64_P32_GOT_LD_PREL19,               0x019)
+ELF_RELOC(R_AARCH64_P32_ADR_GOT_PAGE,                0x01a)
+ELF_RELOC(R_AARCH64_P32_LD64_GOT_LO12_NC,            0x01b)
+ELF_RELOC(R_AARCH64_P32_LD32_GOTPAGE_LO14,           0x01c)
+ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G1,        0x057)
+ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0,        0x058)
+ELF_RELOC(R_AARCH64_P32_TLSLD_MOVW_DTPREL_G0_NC,     0x059)
+ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_DTPREL_HI12,       0x05a)
+ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12,       0x05b)
+ELF_RELOC(R_AARCH64_P32_TLSLD_ADD_DTPREL_LO12_NC,    0x05c)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12,     0x05d)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST8_DTPREL_LO12_NC,  0x05e)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12,    0x05f)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST16_DTPREL_LO12_NC, 0x060)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12,    0x061)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST32_DTPREL_LO12_NC, 0x062)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12,    0x063)
+ELF_RELOC(R_AARCH64_P32_TLSLD_LDST64_DTPREL_LO12_NC, 0x064)
+ELF_RELOC(R_AARCH64_P32_TLSIE_ADR_GOTTPREL_PAGE21,   0x067)
+ELF_RELOC(R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC, 0x068)
+ELF_RELOC(R_AARCH64_P32_TLSIE_LD_GOTTPREL_PREL19,    0x069)
+ELF_RELOC(R_AARCH64_P32_TLSLE_MOVW_TPREL_G1,         0x06a)
+ELF_RELOC(R_AARCH64_P32_TLSLE_MOVW_TPREL_G0,         0x06b)
+ELF_RELOC(R_AARCH64_P32_TLSLE_MOVW_TPREL_G0_NC,      0x06c)
+ELF_RELOC(R_AARCH64_P32_TLSLE_ADD_TPREL_HI12,        0x06d)
+ELF_RELOC(R_AARCH64_P32_TLSLE_ADD_TPREL_LO12,        0x06e)
+ELF_RELOC(R_AARCH64_P32_TLSLE_ADD_TPREL_LO12_NC,     0x06f)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12,      0x070)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST8_TPREL_LO12_NC,   0x071)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12,     0x072)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST16_TPREL_LO12_NC,  0x073)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12,     0x074)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST32_TPREL_LO12_NC,  0x075)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12,     0x076)
+ELF_RELOC(R_AARCH64_P32_TLSLE_LDST64_TPREL_LO12_NC,  0x077)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21,          0x051)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12_NC,        0x07d)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12_NC,         0x034)
+ELF_RELOC(R_AARCH64_P32_TLSDESC_CALL,                0x07f)
+ELF_RELOC(R_AARCH64_P32_COPY,                        0x0b4)
+ELF_RELOC(R_AARCH64_P32_GLOB_DAT,                    0x0b5)
+ELF_RELOC(R_AARCH64_P32_JUMP_SLOT,                   0x0b6)
+ELF_RELOC(R_AARCH64_P32_RELATIVE,                    0x0b7)
+ELF_RELOC(R_AARCH64_P32_IRELATIVE,                   0x0bc)
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/AMDGPU.def b/contrib/llvm/include/llvm/Support/ELFRelocs/AMDGPU.def
index c1e6797fdb0d..c66f88d14ec7 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/AMDGPU.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/AMDGPU.def
@@ -2,11 +2,15 @@
 #error "ELF_RELOC must be defined"
 #endif
 
-ELF_RELOC(R_AMDGPU_NONE,          0)
-ELF_RELOC(R_AMDGPU_ABS32_LO,      1)
-ELF_RELOC(R_AMDGPU_ABS32_HI,      2)
-ELF_RELOC(R_AMDGPU_ABS64,         3)
-ELF_RELOC(R_AMDGPU_REL32,         4)
-ELF_RELOC(R_AMDGPU_REL64,         5)
-ELF_RELOC(R_AMDGPU_ABS32,         6)
-ELF_RELOC(R_AMDGPU_GOTPCREL,      7)
+ELF_RELOC(R_AMDGPU_NONE,           0)
+ELF_RELOC(R_AMDGPU_ABS32_LO,       1)
+ELF_RELOC(R_AMDGPU_ABS32_HI,       2)
+ELF_RELOC(R_AMDGPU_ABS64,          3)
+ELF_RELOC(R_AMDGPU_REL32,          4)
+ELF_RELOC(R_AMDGPU_REL64,          5)
+ELF_RELOC(R_AMDGPU_ABS32,          6)
+ELF_RELOC(R_AMDGPU_GOTPCREL,       7)
+ELF_RELOC(R_AMDGPU_GOTPCREL32_LO,  8)
+ELF_RELOC(R_AMDGPU_GOTPCREL32_HI,  9)
+ELF_RELOC(R_AMDGPU_REL32_LO,      10)
+ELF_RELOC(R_AMDGPU_REL32_HI,      11)
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/BPF.def b/contrib/llvm/include/llvm/Support/ELFRelocs/BPF.def
index 868974d683c7..5dd7f70b6963 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/BPF.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/BPF.def
@@ -4,6 +4,5 @@
 
 // No relocation
 ELF_RELOC(R_BPF_NONE,        0)
-// Map index in "maps" section to file descriptor
-// within ld_64 instruction.
-ELF_RELOC(R_BPF_MAP_FD,      1)
+ELF_RELOC(R_BPF_64_64,       1)
+ELF_RELOC(R_BPF_64_32,      10)
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/RISCV.def b/contrib/llvm/include/llvm/Support/ELFRelocs/RISCV.def
new file mode 100644
index 000000000000..9ec4955d26db
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/RISCV.def
@@ -0,0 +1,50 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_RISCV_NONE,               0)
+ELF_RELOC(R_RISCV_32,                 1)
+ELF_RELOC(R_RISCV_64,                 2)
+ELF_RELOC(R_RISCV_RELATIVE,           3)
+ELF_RELOC(R_RISCV_COPY,               4)
+ELF_RELOC(R_RISCV_JUMP_SLOT,          5)
+ELF_RELOC(R_RISCV_TLS_DTPMOD32,       6)
+ELF_RELOC(R_RISCV_TLS_DTPMOD64,       7)
+ELF_RELOC(R_RISCV_TLS_DTPREL32,       8)
+ELF_RELOC(R_RISCV_TLS_DTPREL64,       9)
+ELF_RELOC(R_RISCV_TLS_TPREL32,       10)
+ELF_RELOC(R_RISCV_TLS_TPREL64,       11)
+ELF_RELOC(R_RISCV_BRANCH,            16)
+ELF_RELOC(R_RISCV_JAL,               17)
+ELF_RELOC(R_RISCV_CALL,              18)
+ELF_RELOC(R_RISCV_CALL_PLT,          19)
+ELF_RELOC(R_RISCV_GOT_HI20,          20)
+ELF_RELOC(R_RISCV_TLS_GOT_HI20,      21)
+ELF_RELOC(R_RISCV_TLS_GD_HI20,       22)
+ELF_RELOC(R_RISCV_PCREL_HI20,        23)
+ELF_RELOC(R_RISCV_PCREL_LO12_I,      24)
+ELF_RELOC(R_RISCV_PCREL_LO12_S,      25)
+ELF_RELOC(R_RISCV_HI20,              26)
+ELF_RELOC(R_RISCV_LO12_I,            27)
+ELF_RELOC(R_RISCV_LO12_S,            28)
+ELF_RELOC(R_RISCV_TPREL_HI20,        29)
+ELF_RELOC(R_RISCV_TPREL_LO12_I,      30)
+ELF_RELOC(R_RISCV_TPREL_LO12_S,      31)
+ELF_RELOC(R_RISCV_TPREL_ADD,         32)
+ELF_RELOC(R_RISCV_ADD8,              33)
+ELF_RELOC(R_RISCV_ADD16,             34)
+ELF_RELOC(R_RISCV_ADD32,             35)
+ELF_RELOC(R_RISCV_ADD64,             36)
+ELF_RELOC(R_RISCV_SUB8,              37)
+ELF_RELOC(R_RISCV_SUB16,             38)
+ELF_RELOC(R_RISCV_SUB32,             39)
+ELF_RELOC(R_RISCV_SUB64,             40)
+ELF_RELOC(R_RISCV_GNU_VTINHERIT,     41)
+ELF_RELOC(R_RISCV_GNU_VTENTRY,       42)
+ELF_RELOC(R_RISCV_ALIGN,             43)
+ELF_RELOC(R_RISCV_RVC_BRANCH,        44)
+ELF_RELOC(R_RISCV_RVC_JUMP,          45)
+ELF_RELOC(R_RISCV_RVC_LUI,           46)
+ELF_RELOC(R_RISCV_GPREL_I,           47)
+ELF_RELOC(R_RISCV_GPREL_S,           48)
diff --git a/contrib/llvm/include/llvm/Support/ELFRelocs/SystemZ.def b/contrib/llvm/include/llvm/Support/ELFRelocs/SystemZ.def
index 711f94011f2c..d6c0b79d40ab 100644
--- a/contrib/llvm/include/llvm/Support/ELFRelocs/SystemZ.def
+++ b/contrib/llvm/include/llvm/Support/ELFRelocs/SystemZ.def
@@ -65,3 +65,7 @@ ELF_RELOC(R_390_GOT20,        58)
 ELF_RELOC(R_390_GOTPLT20,     59)
 ELF_RELOC(R_390_TLS_GOTIE20,  60)
 ELF_RELOC(R_390_IRELATIVE,    61)
+ELF_RELOC(R_390_PC12DBL,      62)
+ELF_RELOC(R_390_PLT12DBL,     63)
+ELF_RELOC(R_390_PC24DBL,      64)
+ELF_RELOC(R_390_PLT24DBL,     65)
diff --git a/contrib/llvm/include/llvm/Support/Endian.h b/contrib/llvm/include/llvm/Support/Endian.h
index cb5cd8e511b1..cbe3d67b1f9e 100644
--- a/contrib/llvm/include/llvm/Support/Endian.h
+++ b/contrib/llvm/include/llvm/Support/Endian.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_ENDIAN_H
 #define LLVM_SUPPORT_ENDIAN_H
 
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/SwapByteOrder.h"
 
@@ -29,7 +28,7 @@ namespace detail {
   /// \brief ::value is either alignment, or alignof(T) if alignment is 0.
   template<class T, int alignment>
   struct PickAlignment {
-    enum {value = alignment == 0 ? AlignOf<T>::Alignment : alignment};
+    enum { value = alignment == 0 ? alignof(T) : alignment };
   };
 } // end namespace detail
 
diff --git a/contrib/llvm/include/llvm/Support/Error.h b/contrib/llvm/include/llvm/Support/Error.h
index 5f515a88a021..f13c9484b5fd 100644
--- a/contrib/llvm/include/llvm/Support/Error.h
+++ b/contrib/llvm/include/llvm/Support/Error.h
@@ -14,25 +14,38 @@
 #ifndef LLVM_SUPPORT_ERROR_H
 #define LLVM_SUPPORT_ERROR_H
 
-#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Config/abi-breaking.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <functional>
+#include <memory>
+#include <new>
+#include <string>
+#include <system_error>
+#include <type_traits>
+#include <utility>
 #include <vector>
 
 namespace llvm {
 
-class Error;
-class ErrorList;
+class ErrorSuccess;
 
 /// Base class for error info classes. Do not extend this directly: Extend
 /// the ErrorInfo template subclass instead.
 class ErrorInfoBase {
 public:
-  virtual ~ErrorInfoBase() {}
+  virtual ~ErrorInfoBase() = default;
 
   /// Print an error message to an output stream.
   virtual void log(raw_ostream &OS) const = 0;
@@ -67,6 +80,7 @@ public:
 
 private:
   virtual void anchor();
+
   static char ID;
 };
 
@@ -86,12 +100,14 @@ private:
 /// Error instance is in. For Error instances indicating success, it
 /// is sufficient to invoke the boolean conversion operator. E.g.:
 ///
+///   @code{.cpp}
 ///   Error foo(<...>);
 ///
 ///   if (auto E = foo(<...>))
 ///     return E; // <- Return E if it is in the error state.
 ///   // We have verified that E was in the success state. It can now be safely
 ///   // destroyed.
+///   @endcode
 ///
 /// A success value *can not* be dropped. For example, just calling 'foo(<...>)'
 /// without testing the return value will raise a runtime error, even if foo
@@ -100,6 +116,7 @@ private:
 /// For Error instances representing failure, you must use either the
 /// handleErrors or handleAllErrors function with a typed handler. E.g.:
 ///
+///   @code{.cpp}
 ///   class MyErrorInfo : public ErrorInfo<MyErrorInfo> {
 ///     // Custom error info.
 ///   };
@@ -122,6 +139,7 @@ private:
 ///       );
 ///   // Note - we must check or return NewE in case any of the handlers
 ///   // returned a new error.
+///   @endcode
 ///
 /// The handleAllErrors function is identical to handleErrors, except
 /// that it has a void return type, and requires all errors to be handled and
@@ -131,8 +149,7 @@ private:
 /// *All* Error instances must be checked before destruction, even if
 /// they're moved-assigned or constructed from Success values that have already
 /// been checked. This enforces checking through all levels of the call stack.
-class Error {
-
+class LLVM_NODISCARD Error {
   // ErrorList needs to be able to yank ErrorInfoBase pointers out of this
   // class to add to the error list.
   friend class ErrorList;
@@ -143,19 +160,18 @@ class Error {
 
   // Expected<T> needs to be able to steal the payload when constructed from an
   // error.
-  template <typename T> class Expected;
+  template <typename T> friend class Expected;
 
-public:
+protected:
   /// Create a success value. Prefer using 'Error::success()' for readability
-  /// where possible.
-  Error() {
+  Error() : Payload(nullptr) {
     setPtr(nullptr);
     setChecked(false);
   }
 
-  /// Create a success value. This is equivalent to calling the default
-  /// constructor, but should be preferred for readability where possible.
-  static Error success() { return Error(); }
+public:
+  /// Create a success value.
+  static ErrorSuccess success();
 
   // Errors are not copy-constructable.
   Error(const Error &Other) = delete;
@@ -163,7 +179,7 @@ public:
   /// Move-construct an error value. The newly constructed error is considered
   /// unchecked, even if the source error had been checked. The original error
   /// becomes a checked Success value, regardless of its original state.
-  Error(Error &&Other) {
+  Error(Error &&Other) : Payload(nullptr) {
     setChecked(true);
     *this = std::move(Other);
   }
@@ -219,7 +235,7 @@ public:
 
 private:
   void assertIsChecked() {
-#ifndef NDEBUG
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
     if (!getChecked() || getPtr()) {
       dbgs() << "Program aborted due to an unhandled Error:\n";
       if (getPtr())
@@ -234,33 +250,35 @@ private:
   }
 
   ErrorInfoBase *getPtr() const {
-#ifndef NDEBUG
-    return PayloadAndCheckedBit.getPointer();
-#else
-    return Payload;
-#endif
+    return reinterpret_cast<ErrorInfoBase*>(
+             reinterpret_cast<uintptr_t>(Payload) &
+             ~static_cast<uintptr_t>(0x1));
   }
 
   void setPtr(ErrorInfoBase *EI) {
-#ifndef NDEBUG
-    PayloadAndCheckedBit.setPointer(EI);
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    Payload = reinterpret_cast<ErrorInfoBase*>(
+                (reinterpret_cast<uintptr_t>(EI) &
+                 ~static_cast<uintptr_t>(0x1)) |
+                (reinterpret_cast<uintptr_t>(Payload) & 0x1));
 #else
     Payload = EI;
 #endif
   }
 
   bool getChecked() const {
-#ifndef NDEBUG
-    return PayloadAndCheckedBit.getInt();
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    return (reinterpret_cast<uintptr_t>(Payload) & 0x1) == 0;
 #else
     return true;
 #endif
   }
 
   void setChecked(bool V) {
-#ifndef NDEBUG
-    PayloadAndCheckedBit.setInt(V);
-#endif
+    Payload = reinterpret_cast<ErrorInfoBase*>(
+                (reinterpret_cast<uintptr_t>(Payload) &
+                  ~static_cast<uintptr_t>(0x1)) |
+                  (V ? 0 : 1));
   }
 
   std::unique_ptr<ErrorInfoBase> takePayload() {
@@ -270,13 +288,16 @@ private:
     return Tmp;
   }
 
-#ifndef NDEBUG
-  PointerIntPair<ErrorInfoBase *, 1> PayloadAndCheckedBit;
-#else
   ErrorInfoBase *Payload;
-#endif
 };
 
+/// Subclass of Error for the sole purpose of identifying the success path in
+/// the type system. This allows to catch invalid conversion to Expected<T> at
+/// compile time.
+class ErrorSuccess : public Error {};
+
+inline ErrorSuccess Error::success() { return ErrorSuccess(); }
+
 /// Make a Error instance representing failure using the given error info
 /// type.
 template <typename ErrT, typename... ArgTs> Error make_error(ArgTs &&... Args) {
@@ -305,7 +326,6 @@ public:
 /// Special ErrorInfo subclass representing a list of ErrorInfos.
 /// Instances of this class are constructed by joinError.
 class ErrorList final : public ErrorInfo<ErrorList> {
-
   // handleErrors needs to be able to iterate the payload list of an
   // ErrorList.
   template <typename... HandlerTs>
@@ -570,25 +590,36 @@ inline void consumeError(Error Err) {
 /// to check the result. This helper performs these actions automatically using
 /// RAII:
 ///
-/// Result foo(Error &Err) {
-///   ErrorAsOutParameter ErrAsOutParam(Err); // 'Checked' flag set
-///   // <body of foo>
-///   // <- 'Checked' flag auto-cleared when ErrAsOutParam is destructed.
-/// }
+///   @code{.cpp}
+///   Result foo(Error &Err) {
+///     ErrorAsOutParameter ErrAsOutParam(&Err); // 'Checked' flag set
+///     // <body of foo>
+///     // <- 'Checked' flag auto-cleared when ErrAsOutParam is destructed.
+///   }
+///   @endcode
+///
+/// ErrorAsOutParameter takes an Error* rather than Error& so that it can be
+/// used with optional Errors (Error pointers that are allowed to be null). If
+/// ErrorAsOutParameter took an Error reference, an instance would have to be
+/// created inside every condition that verified that Error was non-null. By
+/// taking an Error pointer we can just create one instance at the top of the
+/// function.
 class ErrorAsOutParameter {
 public:
-  ErrorAsOutParameter(Error &Err) : Err(Err) {
+  ErrorAsOutParameter(Error *Err) : Err(Err) {
     // Raise the checked bit if Err is success.
-    (void)!!Err;
+    if (Err)
+      (void)!!*Err;
   }
+
   ~ErrorAsOutParameter() {
     // Clear the checked bit.
-    if (!Err)
-      Err = Error::success();
+    if (Err && !*Err)
+      *Err = Error::success();
   }
 
 private:
-  Error &Err;
+  Error *Err;
 };
 
 /// Tagged union holding either a T or a Error.
@@ -597,7 +628,7 @@ private:
 /// Error cannot be copied, this class replaces getError() with
 /// takeError(). It also adds an bool errorIsA<ErrT>() method for testing the
 /// error class type.
-template <class T> class Expected {
+template <class T> class LLVM_NODISCARD Expected {
   template <class OtherT> friend class Expected;
   static const bool isRef = std::is_reference<T>::value;
   typedef ReferenceStorage<typename std::remove_reference<T>::type> wrap;
@@ -618,15 +649,20 @@ public:
   /// Create an Expected<T> error value from the given Error.
   Expected(Error Err)
       : HasError(true)
-#ifndef NDEBUG
-        ,
-        Checked(false)
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+        // Expected is unchecked upon construction in Debug builds.
+        , Unchecked(true)
 #endif
   {
     assert(Err && "Cannot create Expected<T> from Error success value.");
-    new (getErrorStorage()) Error(std::move(Err));
+    new (getErrorStorage()) error_type(Err.takePayload());
   }
 
+  /// Forbid to convert from Error::success() implicitly, this avoids having
+  /// Expected<T> foo() { return Error::success(); } which compiles otherwise
+  /// but triggers the assertion above.
+  Expected(ErrorSuccess) = delete;
+
   /// Create an Expected<T> success value from the given OtherT value, which
   /// must be convertible to T.
   template <typename OtherT>
@@ -634,9 +670,9 @@ public:
            typename std::enable_if<std::is_convertible<OtherT, T>::value>::type
                * = nullptr)
       : HasError(false)
-#ifndef NDEBUG
-        ,
-        Checked(false)
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+        // Expected is unchecked upon construction in Debug builds.
+        , Unchecked(true)
 #endif
   {
     new (getStorage()) storage_type(std::forward<OtherT>(Val));
@@ -681,8 +717,8 @@ public:
 
   /// \brief Return false if there is an error.
   explicit operator bool() {
-#ifndef NDEBUG
-    Checked = !HasError;
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    Unchecked = HasError;
 #endif
     return !HasError;
   }
@@ -709,8 +745,8 @@ public:
   /// only be safely destructed. No further calls (beside the destructor) should
   /// be made on the Expected<T> vaule.
   Error takeError() {
-#ifndef NDEBUG
-    Checked = true;
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    Unchecked = false;
 #endif
     return HasError ? Error(std::move(*getErrorStorage())) : Error::success();
   }
@@ -752,10 +788,9 @@ private:
 
   template <class OtherT> void moveConstruct(Expected<OtherT> &&Other) {
     HasError = Other.HasError;
-
-#ifndef NDEBUG
-    Checked = false;
-    Other.Checked = true;
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    Unchecked = true;
+    Other.Unchecked = false;
 #endif
 
     if (!HasError)
@@ -798,8 +833,8 @@ private:
   }
 
   void assertIsChecked() {
-#ifndef NDEBUG
-    if (!Checked) {
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+    if (Unchecked) {
       dbgs() << "Expected<T> must be checked before access or destruction.\n";
       if (HasError) {
         dbgs() << "Unchecked Expected<T> contained error:\n";
@@ -818,8 +853,8 @@ private:
     AlignedCharArrayUnion<error_type> ErrorStorage;
   };
   bool HasError : 1;
-#ifndef NDEBUG
-  bool Checked : 1;
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+  bool Unchecked : 1;
 #endif
 };
 
@@ -830,6 +865,7 @@ private:
 /// std::error_codes.
 class ECError : public ErrorInfo<ECError> {
   friend Error errorCodeToError(std::error_code);
+
 public:
   void setErrorCode(std::error_code EC) { this->EC = EC; }
   std::error_code convertToErrorCode() const override { return EC; }
@@ -841,6 +877,7 @@ public:
 protected:
   ECError() = default;
   ECError(std::error_code EC) : EC(EC) {}
+
   std::error_code EC;
 };
 
@@ -883,9 +920,12 @@ template <typename T> ErrorOr<T> expectedToErrorOr(Expected<T> &&E) {
 class StringError : public ErrorInfo<StringError> {
 public:
   static char ID;
+
   StringError(const Twine &S, std::error_code EC);
+
   void log(raw_ostream &OS) const override;
   std::error_code convertToErrorCode() const override;
+
 private:
   std::string Msg;
   std::error_code EC;
@@ -945,6 +985,6 @@ private:
 LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err,
                                                 bool gen_crash_diag = true);
 
-} // namespace llvm
+} // end namespace llvm
 
 #endif // LLVM_SUPPORT_ERROR_H
diff --git a/contrib/llvm/include/llvm/Support/FileSystem.h b/contrib/llvm/include/llvm/Support/FileSystem.h
index 42a6180e0eb3..9d8d8c3ffb5c 100644
--- a/contrib/llvm/include/llvm/Support/FileSystem.h
+++ b/contrib/llvm/include/llvm/Support/FileSystem.h
@@ -31,10 +31,9 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ErrorOr.h"
-#include "llvm/Support/TimeValue.h"
 #include <cassert>
 #include <cstdint>
 #include <ctime>
@@ -125,6 +124,7 @@ class UniqueID {
 public:
   UniqueID() = default;
   UniqueID(uint64_t Device, uint64_t File) : Device(Device), File(File) {}
+
   bool operator==(const UniqueID &Other) const {
     return Device == Other.Device && File == Other.File;
   }
@@ -132,6 +132,7 @@ public:
   bool operator<(const UniqueID &Other) const {
     return std::tie(Device, File) < std::tie(Other.Device, Other.File);
   }
+
   uint64_t getDevice() const { return Device; }
   uint64_t getFile() const { return File; }
 };
@@ -209,8 +210,8 @@ public:
   // getters
   file_type type() const { return Type; }
   perms permissions() const { return Perms; }
-  TimeValue getLastAccessedTime() const;
-  TimeValue getLastModificationTime() const;
+  TimePoint<> getLastAccessedTime() const;
+  TimePoint<> getLastModificationTime() const;
   UniqueID getUniqueID() const;
 
   #if defined(LLVM_ON_UNIX)
@@ -258,10 +259,12 @@ struct file_magic {
     macho_dsym_companion,     ///< Mach-O dSYM companion file
     macho_kext_bundle,        ///< Mach-O kext bundle file
     macho_universal_binary,   ///< Mach-O universal binary
+    coff_cl_gl_object,        ///< Microsoft cl.exe's intermediate code file
     coff_object,              ///< COFF object file
     coff_import_library,      ///< COFF import library
     pecoff_executable,        ///< PECOFF executable file
-    windows_resource          ///< Windows compiled resource file (.rc)
+    windows_resource,         ///< Windows compiled resource file (.rc)
+    wasm_object               ///< WebAssembly Object file
   };
 
   bool is_object() const {
@@ -339,6 +342,14 @@ std::error_code create_directory(const Twine &path, bool IgnoreExisting = true,
 /// specific error_code.
 std::error_code create_link(const Twine &to, const Twine &from);
 
+/// Create a hard link from \a from to \a to, or return an error.
+///
+/// @param to The path to hard link to.
+/// @param from The path to hard link from. This is created.
+/// @returns errc::success if the link was created, otherwise a platform
+/// specific error_code.
+std::error_code create_hard_link(const Twine &to, const Twine &from);
+
 /// @brief Get the current path.
 ///
 /// @param result Holds the current path on return.
@@ -540,7 +551,7 @@ inline std::error_code file_size(const Twine &Path, uint64_t &Result) {
 /// @returns errc::success if the file times were successfully set, otherwise a
 ///          platform-specific error_code or errc::function_not_supported on
 ///          platforms where the functionality isn't available.
-std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time);
+std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time);
 
 /// @brief Is status available?
 ///
@@ -672,10 +683,6 @@ ErrorOr<space_info> disk_space(const Twine &Path);
 /// This class represents a memory mapped file. It is based on
 /// boost::iostreams::mapped_file.
 class mapped_file_region {
-  mapped_file_region() = delete;
-  mapped_file_region(mapped_file_region&) = delete;
-  mapped_file_region &operator =(mapped_file_region&) = delete;
-
 public:
   enum mapmode {
     readonly, ///< May only access map via const_data as read only.
@@ -691,6 +698,10 @@ private:
   std::error_code init(int FD, uint64_t Offset, mapmode Mode);
 
 public:
+  mapped_file_region() = delete;
+  mapped_file_region(mapped_file_region&) = delete;
+  mapped_file_region &operator =(mapped_file_region&) = delete;
+
   /// \param fd An open file descriptor to map. mapped_file_region takes
   ///   ownership if closefd is true. It must have been opended in the correct
   ///   mode.
@@ -731,7 +742,7 @@ public:
     : Path(path.str())
     , Status(st) {}
 
-  directory_entry() {}
+  directory_entry() = default;
 
   void assign(const Twine &path, file_status st = file_status()) {
     Path = path.str();
@@ -829,7 +840,7 @@ namespace detail {
       : Level(0)
       , HasNoPushRequest(false) {}
 
-    std::stack<directory_iterator, std::vector<directory_iterator> > Stack;
+    std::stack<directory_iterator, std::vector<directory_iterator>> Stack;
     uint16_t Level;
     bool HasNoPushRequest;
   };
@@ -841,13 +852,14 @@ class recursive_directory_iterator {
   IntrusiveRefCntPtr<detail::RecDirIterState> State;
 
 public:
-  recursive_directory_iterator() {}
+  recursive_directory_iterator() = default;
   explicit recursive_directory_iterator(const Twine &path, std::error_code &ec)
       : State(new detail::RecDirIterState) {
     State->Stack.push(directory_iterator(path, ec));
     if (State->Stack.top() == directory_iterator())
       State.reset();
   }
+
   // No operator++ because we need error_code.
   recursive_directory_iterator &increment(std::error_code &ec) {
     const directory_iterator end_itr;
diff --git a/contrib/llvm/include/llvm/Support/Format.h b/contrib/llvm/include/llvm/Support/Format.h
index d5c301cd7e2b..017b4973f1ff 100644
--- a/contrib/llvm/include/llvm/Support/Format.h
+++ b/contrib/llvm/include/llvm/Support/Format.h
@@ -23,6 +23,7 @@
 #ifndef LLVM_SUPPORT_FORMAT_H
 #define LLVM_SUPPORT_FORMAT_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/DataTypes.h"
@@ -71,10 +72,20 @@ public:
 };
 
 /// These are templated helper classes used by the format function that
-/// capture the object to be formated and the format string. When actually
+/// capture the object to be formatted and the format string. When actually
 /// printed, this synthesizes the string into a temporary buffer provided and
 /// returns whether or not it is big enough.
 
+// Helper to validate that format() parameters are scalars or pointers.
+template <typename... Args> struct validate_format_parameters;
+template <typename Arg, typename... Args>
+struct validate_format_parameters<Arg, Args...> {
+  static_assert(std::is_scalar<Arg>::value,
+                "format can't be used with non fundamental / non pointer type");
+  validate_format_parameters() { validate_format_parameters<Args...>(); }
+};
+template <> struct validate_format_parameters<> {};
+
 template <typename... Ts>
 class format_object final : public format_object_base {
   std::tuple<Ts...> Vals;
@@ -91,7 +102,9 @@ class format_object final : public format_object_base {
 
 public:
   format_object(const char *fmt, const Ts &... vals)
-      : format_object_base(fmt), Vals(vals...) {}
+      : format_object_base(fmt), Vals(vals...) {
+    validate_format_parameters<Ts...>();
+  }
 
   int snprint(char *Buffer, unsigned BufferSize) const override {
     return snprint_tuple(Buffer, BufferSize, index_sequence_for<Ts...>());
@@ -190,6 +203,46 @@ inline FormattedNumber format_decimal(int64_t N, unsigned Width) {
   return FormattedNumber(0, N, Width, false, false, false);
 }
 
+class FormattedBytes {
+  ArrayRef<uint8_t> Bytes;
+
+  // If not None, display offsets for each line relative to starting value.
+  Optional<uint64_t> FirstByteOffset;
+  uint32_t IndentLevel;  // Number of characters to indent each line.
+  uint32_t NumPerLine;   // Number of bytes to show per line.
+  uint8_t ByteGroupSize; // How many hex bytes are grouped without spaces
+  bool Upper;            // Show offset and hex bytes as upper case.
+  bool ASCII;            // Show the ASCII bytes for the hex bytes to the right.
+  friend class raw_ostream;
+
+public:
+  FormattedBytes(ArrayRef<uint8_t> B, uint32_t IL, Optional<uint64_t> O,
+                 uint32_t NPL, uint8_t BGS, bool U, bool A)
+      : Bytes(B), FirstByteOffset(O), IndentLevel(IL), NumPerLine(NPL),
+        ByteGroupSize(BGS), Upper(U), ASCII(A) {
+
+    if (ByteGroupSize > NumPerLine)
+      ByteGroupSize = NumPerLine;
+  }
+};
+
+inline FormattedBytes
+format_bytes(ArrayRef<uint8_t> Bytes, Optional<uint64_t> FirstByteOffset = None,
+             uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4,
+             uint32_t IndentLevel = 0, bool Upper = false) {
+  return FormattedBytes(Bytes, IndentLevel, FirstByteOffset, NumPerLine,
+                        ByteGroupSize, Upper, false);
+}
+
+inline FormattedBytes
+format_bytes_with_ascii(ArrayRef<uint8_t> Bytes,
+                        Optional<uint64_t> FirstByteOffset = None,
+                        uint32_t NumPerLine = 16, uint8_t ByteGroupSize = 4,
+                        uint32_t IndentLevel = 0, bool Upper = false) {
+  return FormattedBytes(Bytes, IndentLevel, FirstByteOffset, NumPerLine,
+                        ByteGroupSize, Upper, true);
+}
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/FormatAdapters.h b/contrib/llvm/include/llvm/Support/FormatAdapters.h
new file mode 100644
index 000000000000..7bacd2e17135
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FormatAdapters.h
@@ -0,0 +1,93 @@
+//===- FormatAdapters.h - Formatters for common LLVM types -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATADAPTERS_H
+#define LLVM_SUPPORT_FORMATADAPTERS_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormatCommon.h"
+#include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+template <typename T> class FormatAdapter : public detail::format_adapter {
+protected:
+  explicit FormatAdapter(T &&Item) : Item(Item) {}
+
+  T Item;
+
+  static_assert(!detail::uses_missing_provider<T>::value,
+                "Item does not have a format provider!");
+};
+
+namespace detail {
+template <typename T> class AlignAdapter final : public FormatAdapter<T> {
+  AlignStyle Where;
+  size_t Amount;
+
+public:
+  AlignAdapter(T &&Item, AlignStyle Where, size_t Amount)
+      : FormatAdapter<T>(std::forward<T>(Item)), Where(Where), Amount(Amount) {}
+
+  void format(llvm::raw_ostream &Stream, StringRef Style) {
+    auto Adapter = detail::build_format_adapter(std::forward<T>(this->Item));
+    FmtAlign(Adapter, Where, Amount).format(Stream, Style);
+  }
+};
+
+template <typename T> class PadAdapter final : public FormatAdapter<T> {
+  size_t Left;
+  size_t Right;
+
+public:
+  PadAdapter(T &&Item, size_t Left, size_t Right)
+      : FormatAdapter<T>(std::forward<T>(Item)), Left(Left), Right(Right) {}
+
+  void format(llvm::raw_ostream &Stream, StringRef Style) {
+    auto Adapter = detail::build_format_adapter(std::forward<T>(this->Item));
+    Stream.indent(Left);
+    Adapter.format(Stream, Style);
+    Stream.indent(Right);
+  }
+};
+
+template <typename T> class RepeatAdapter final : public FormatAdapter<T> {
+  size_t Count;
+
+public:
+  RepeatAdapter(T &&Item, size_t Count)
+      : FormatAdapter<T>(std::forward<T>(Item)), Count(Count) {}
+
+  void format(llvm::raw_ostream &Stream, StringRef Style) {
+    auto Adapter = detail::build_format_adapter(std::forward<T>(this->Item));
+    for (size_t I = 0; I < Count; ++I) {
+      Adapter.format(Stream, Style);
+    }
+  }
+};
+}
+
+template <typename T>
+detail::AlignAdapter<T> fmt_align(T &&Item, AlignStyle Where, size_t Amount) {
+  return detail::AlignAdapter<T>(std::forward<T>(Item), Where, Amount);
+}
+
+template <typename T>
+detail::PadAdapter<T> fmt_pad(T &&Item, size_t Left, size_t Right) {
+  return detail::PadAdapter<T>(std::forward<T>(Item), Left, Right);
+}
+
+template <typename T>
+detail::RepeatAdapter<T> fmt_repeat(T &&Item, size_t Count) {
+  return detail::RepeatAdapter<T>(std::forward<T>(Item), Count);
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/FormatCommon.h b/contrib/llvm/include/llvm/Support/FormatCommon.h
new file mode 100644
index 000000000000..a8c5fdeb6bff
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FormatCommon.h
@@ -0,0 +1,69 @@
+//===- FormatAdapters.h - Formatters for common LLVM types -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATCOMMON_H
+#define LLVM_SUPPORT_FORMATCOMMON_H
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+enum class AlignStyle { Left, Center, Right };
+
+struct FmtAlign {
+  detail::format_adapter &Adapter;
+  AlignStyle Where;
+  size_t Amount;
+
+  FmtAlign(detail::format_adapter &Adapter, AlignStyle Where, size_t Amount)
+      : Adapter(Adapter), Where(Where), Amount(Amount) {}
+
+  void format(raw_ostream &S, StringRef Options) {
+    // If we don't need to align, we can format straight into the underlying
+    // stream.  Otherwise we have to go through an intermediate stream first
+    // in order to calculate how long the output is so we can align it.
+    // TODO: Make the format method return the number of bytes written, that
+    // way we can also skip the intermediate stream for left-aligned output.
+    if (Amount == 0) {
+      Adapter.format(S, Options);
+      return;
+    }
+    SmallString<64> Item;
+    raw_svector_ostream Stream(Item);
+
+    Adapter.format(Stream, Options);
+    if (Amount <= Item.size()) {
+      S << Item;
+      return;
+    }
+
+    size_t PadAmount = Amount - Item.size();
+    switch (Where) {
+    case AlignStyle::Left:
+      S << Item;
+      S.indent(PadAmount);
+      break;
+    case AlignStyle::Center: {
+      size_t X = PadAmount / 2;
+      S.indent(X);
+      S << Item;
+      S.indent(PadAmount - X);
+      break;
+    }
+    default:
+      S.indent(PadAmount);
+      S << Item;
+      break;
+    }
+  }
+};
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/FormatProviders.h b/contrib/llvm/include/llvm/Support/FormatProviders.h
new file mode 100644
index 000000000000..1f0768c3ab08
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FormatProviders.h
@@ -0,0 +1,413 @@
+//===- FormatProviders.h - Formatters for common LLVM types -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements format providers for many common LLVM types, for example
+// allowing precision and width specifiers for scalar and string types.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATPROVIDERS_H
+#define LLVM_SUPPORT_FORMATPROVIDERS_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/Support/NativeFormatting.h"
+
+#include <type_traits>
+#include <vector>
+
+namespace llvm {
+namespace detail {
+template <typename T>
+struct use_integral_formatter
+    : public std::integral_constant<
+          bool, is_one_of<T, uint8_t, int16_t, uint16_t, int32_t, uint32_t,
+                          int64_t, uint64_t, int, unsigned, long, unsigned long,
+                          long long, unsigned long long>::value> {};
+
+template <typename T>
+struct use_char_formatter
+    : public std::integral_constant<bool, std::is_same<T, char>::value> {};
+
+template <typename T>
+struct is_cstring
+    : public std::integral_constant<bool,
+                                    is_one_of<T, char *, const char *>::value> {
+};
+
+template <typename T>
+struct use_string_formatter
+    : public std::integral_constant<
+          bool, is_one_of<T, llvm::StringRef, std::string>::value ||
+                    is_cstring<T>::value> {};
+
+template <typename T>
+struct use_pointer_formatter
+    : public std::integral_constant<bool, std::is_pointer<T>::value &&
+                                              !is_cstring<T>::value> {};
+
+template <typename T>
+struct use_double_formatter
+    : public std::integral_constant<bool, std::is_floating_point<T>::value> {};
+
+class HelperFunctions {
+protected:
+  static Optional<size_t> parseNumericPrecision(StringRef Str) {
+    size_t Prec;
+    Optional<size_t> Result;
+    if (Str.empty())
+      Result = None;
+    else if (Str.getAsInteger(10, Prec)) {
+      assert(false && "Invalid precision specifier");
+      Result = None;
+    } else {
+      assert(Prec < 100 && "Precision out of range");
+      Result = std::min<size_t>(99u, Prec);
+    }
+    return Result;
+  }
+
+  static bool consumeHexStyle(StringRef &Str, HexPrintStyle &Style) {
+    if (!Str.startswith_lower("x"))
+      return false;
+
+    if (Str.consume_front("x-"))
+      Style = HexPrintStyle::Lower;
+    else if (Str.consume_front("X-"))
+      Style = HexPrintStyle::Upper;
+    else if (Str.consume_front("x+") || Str.consume_front("x"))
+      Style = HexPrintStyle::PrefixLower;
+    else if (Str.consume_front("X+") || Str.consume_front("X"))
+      Style = HexPrintStyle::PrefixUpper;
+    return true;
+  }
+
+  static size_t consumeNumHexDigits(StringRef &Str, HexPrintStyle Style,
+                                    size_t Default) {
+    Str.consumeInteger(10, Default);
+    if (isPrefixedHexStyle(Style))
+      Default += 2;
+    return Default;
+  }
+};
+}
+
+/// Implementation of format_provider<T> for integral arithmetic types.
+///
+/// The options string of an integral type has the grammar:
+///
+///   integer_options   :: [style][digits]
+///   style             :: <see table below>
+///   digits            :: <non-negative integer> 0-99
+///
+///   ==========================================================================
+///   |  style  |     Meaning          |      Example     | Digits Meaning     |
+///   --------------------------------------------------------------------------
+///   |         |                      |  Input |  Output |                    |
+///   ==========================================================================
+///   |   x-    | Hex no prefix, lower |   42   |    2a   | Minimum # digits   |
+///   |   X-    | Hex no prefix, upper |   42   |    2A   | Minimum # digits   |
+///   | x+ / x  | Hex + prefix, lower  |   42   |   0x2a  | Minimum # digits   |
+///   | X+ / X  | Hex + prefix, upper  |   42   |   0x2A  | Minimum # digits   |
+///   | N / n   | Digit grouped number | 123456 | 123,456 | Ignored            |
+///   | D / d   | Integer              | 100000 | 100000  | Ignored            |
+///   | (empty) | Same as D / d        |        |         |                    |
+///   ==========================================================================
+///
+
+template <typename T>
+struct format_provider<
+    T, typename std::enable_if<detail::use_integral_formatter<T>::value>::type>
+    : public detail::HelperFunctions {
+private:
+public:
+  static void format(const T &V, llvm::raw_ostream &Stream, StringRef Style) {
+    HexPrintStyle HS;
+    size_t Digits = 0;
+    if (consumeHexStyle(Style, HS)) {
+      Digits = consumeNumHexDigits(Style, HS, 0);
+      write_hex(Stream, V, HS, Digits);
+      return;
+    }
+
+    IntegerStyle IS = IntegerStyle::Integer;
+    if (Style.consume_front("N") || Style.consume_front("n"))
+      IS = IntegerStyle::Number;
+    else if (Style.consume_front("D") || Style.consume_front("d"))
+      IS = IntegerStyle::Integer;
+
+    Style.consumeInteger(10, Digits);
+    assert(Style.empty() && "Invalid integral format style!");
+    write_integer(Stream, V, Digits, IS);
+  }
+};
+
+/// Implementation of format_provider<T> for integral pointer types.
+///
+/// The options string of a pointer type has the grammar:
+///
+///   pointer_options   :: [style][precision]
+///   style             :: <see table below>
+///   digits            :: <non-negative integer> 0-sizeof(void*)
+///
+///   ==========================================================================
+///   |   S     |     Meaning          |                Example                |
+///   --------------------------------------------------------------------------
+///   |         |                      |       Input       |      Output       |
+///   ==========================================================================
+///   |   x-    | Hex no prefix, lower |    0xDEADBEEF     |     deadbeef      |
+///   |   X-    | Hex no prefix, upper |    0xDEADBEEF     |     DEADBEEF      |
+///   | x+ / x  | Hex + prefix, lower  |    0xDEADBEEF     |    0xdeadbeef     |
+///   | X+ / X  | Hex + prefix, upper  |    0xDEADBEEF     |    0xDEADBEEF     |
+///   | (empty) | Same as X+ / X       |                   |                   |
+///   ==========================================================================
+///
+/// The default precision is the number of nibbles in a machine word, and in all
+/// cases indicates the minimum number of nibbles to print.
+template <typename T>
+struct format_provider<
+    T, typename std::enable_if<detail::use_pointer_formatter<T>::value>::type>
+    : public detail::HelperFunctions {
+private:
+public:
+  static void format(const T &V, llvm::raw_ostream &Stream, StringRef Style) {
+    HexPrintStyle HS = HexPrintStyle::PrefixUpper;
+    consumeHexStyle(Style, HS);
+    size_t Digits = consumeNumHexDigits(Style, HS, sizeof(void *) * 2);
+    write_hex(Stream, reinterpret_cast<std::uintptr_t>(V), HS, Digits);
+  }
+};
+
+/// Implementation of format_provider<T> for c-style strings and string
+/// objects such as std::string and llvm::StringRef.
+///
+/// The options string of a string type has the grammar:
+///
+///   string_options :: [length]
+///
+/// where `length` is an optional integer specifying the maximum number of
+/// characters in the string to print.  If `length` is omitted, the string is
+/// printed up to the null terminator.
+
+template <typename T>
+struct format_provider<
+    T, typename std::enable_if<detail::use_string_formatter<T>::value>::type> {
+  static void format(const T &V, llvm::raw_ostream &Stream, StringRef Style) {
+    size_t N = StringRef::npos;
+    if (!Style.empty() && Style.getAsInteger(10, N)) {
+      assert(false && "Style is not a valid integer");
+    }
+    llvm::StringRef S(V);
+    Stream << S.substr(0, N);
+  }
+};
+
+/// Implementation of format_provider<T> for characters.
+///
+/// The options string of a character type has the grammar:
+///
+///   char_options :: (empty) | [integer_options]
+///
+/// If `char_options` is empty, the character is displayed as an ASCII
+/// character.  Otherwise, it is treated as an integer options string.
+///
+template <typename T>
+struct format_provider<
+    T, typename std::enable_if<detail::use_char_formatter<T>::value>::type> {
+  static void format(const char &V, llvm::raw_ostream &Stream,
+                     StringRef Style) {
+    if (Style.empty())
+      Stream << V;
+    else {
+      int X = static_cast<int>(V);
+      format_provider<int>::format(X, Stream, Style);
+    }
+  }
+};
+
+/// Implementation of format_provider<T> for type `bool`
+///
+/// The options string of a boolean type has the grammar:
+///
+///   bool_options :: "" | "Y" | "y" | "D" | "d" | "T" | "t"
+///
+///   ==================================
+///   |    C    |     Meaning          |
+///   ==================================
+///   |    Y    |       YES / NO       |
+///   |    y    |       yes / no       |
+///   |  D / d  |    Integer 0 or 1    |
+///   |    T    |     TRUE / FALSE     |
+///   |    t    |     true / false     |
+///   | (empty) |   Equivalent to 't'  |
+///   ==================================
+template <> struct format_provider<bool> {
+  static void format(const bool &B, llvm::raw_ostream &Stream,
+                     StringRef Style) {
+    Stream << StringSwitch<const char *>(Style)
+                  .Case("Y", B ? "YES" : "NO")
+                  .Case("y", B ? "yes" : "no")
+                  .CaseLower("D", B ? "1" : "0")
+                  .Case("T", B ? "TRUE" : "FALSE")
+                  .Cases("t", "", B ? "true" : "false")
+                  .Default(B ? "1" : "0");
+  }
+};
+
+/// Implementation of format_provider<T> for floating point types.
+///
+/// The options string of a floating point type has the format:
+///
+///   float_options   :: [style][precision]
+///   style           :: <see table below>
+///   precision       :: <non-negative integer> 0-99
+///
+///   =====================================================
+///   |  style  |     Meaning          |      Example     |
+///   -----------------------------------------------------
+///   |         |                      |  Input |  Output |
+///   =====================================================
+///   | P / p   | Percentage           |  0.05  |  5.00%  |
+///   | F / f   | Fixed point          |   1.0  |  1.00   |
+///   |   E     | Exponential with E   | 100000 | 1.0E+05 |
+///   |   e     | Exponential with e   | 100000 | 1.0e+05 |
+///   | (empty) | Same as F / f        |        |         |
+///   =====================================================
+///
+/// The default precision is 6 for exponential (E / e) and 2 for everything
+/// else.
+
+template <typename T>
+struct format_provider<
+    T, typename std::enable_if<detail::use_double_formatter<T>::value>::type>
+    : public detail::HelperFunctions {
+  static void format(const T &V, llvm::raw_ostream &Stream, StringRef Style) {
+    FloatStyle S;
+    if (Style.consume_front("P") || Style.consume_front("p"))
+      S = FloatStyle::Percent;
+    else if (Style.consume_front("F") || Style.consume_front("f"))
+      S = FloatStyle::Fixed;
+    else if (Style.consume_front("E"))
+      S = FloatStyle::ExponentUpper;
+    else if (Style.consume_front("e"))
+      S = FloatStyle::Exponent;
+    else
+      S = FloatStyle::Fixed;
+
+    Optional<size_t> Precision = parseNumericPrecision(Style);
+    if (!Precision.hasValue())
+      Precision = getDefaultPrecision(S);
+
+    write_double(Stream, static_cast<double>(V), S, Precision);
+  }
+};
+
+namespace detail {
+template <typename IterT>
+using IterValue = typename std::iterator_traits<IterT>::value_type;
+
+template <typename IterT>
+struct range_item_has_provider
+    : public std::integral_constant<
+          bool, !uses_missing_provider<IterValue<IterT>>::value> {};
+}
+
+/// Implementation of format_provider<T> for ranges.
+///
+/// This will print an arbitrary range as a delimited sequence of items.
+///
+/// The options string of a range type has the grammar:
+///
+///   range_style       ::= [separator] [element_style]
+///   separator         ::= "$" delimeted_expr
+///   element_style     ::= "@" delimeted_expr
+///   delimeted_expr    ::= "[" expr "]" | "(" expr ")" | "<" expr ">"
+///   expr              ::= <any string not containing delimeter>
+///
+/// where the separator expression is the string to insert between consecutive
+/// items in the range and the argument expression is the Style specification to
+/// be used when formatting the underlying type.  The default separator if
+/// unspecified is ' ' (space).  The syntax of the argument expression follows
+/// whatever grammar is dictated by the format provider or format adapter used
+/// to format the value type.
+///
+/// Note that attempting to format an `iterator_range<T>` where no format
+/// provider can be found for T will result in a compile error.
+///
+
+template <typename IterT> class format_provider<llvm::iterator_range<IterT>> {
+  using value = typename std::iterator_traits<IterT>::value_type;
+  using reference = typename std::iterator_traits<IterT>::reference;
+
+  static StringRef consumeOneOption(StringRef &Style, char Indicator,
+                                    StringRef Default) {
+    if (Style.empty())
+      return Default;
+    if (Style.front() != Indicator)
+      return Default;
+    Style = Style.drop_front();
+    if (Style.empty()) {
+      assert(false && "Invalid range style");
+      return Default;
+    }
+
+    std::vector<const char *> Delims = {"[]", "<>", "()"};
+    for (const char *D : Delims) {
+      if (Style.front() != D[0])
+        continue;
+      size_t End = Style.find_first_of(D[1]);
+      if (End == StringRef::npos) {
+        assert(false && "Missing range option end delimeter!");
+        return Default;
+      }
+      StringRef Result = Style.slice(1, End);
+      Style = Style.drop_front(End + 1);
+      return Result;
+    }
+    assert(false && "Invalid range style!");
+    return Default;
+  }
+
+  static std::pair<StringRef, StringRef> parseOptions(StringRef Style) {
+    StringRef Sep = consumeOneOption(Style, '$', ", ");
+    StringRef Args = consumeOneOption(Style, '@', "");
+    assert(Style.empty() && "Unexpected text in range option string!");
+    return std::make_pair(Sep, Args);
+  }
+
+public:
+  static_assert(detail::range_item_has_provider<IterT>::value,
+                "Range value_type does not have a format provider!");
+  static void format(const llvm::iterator_range<IterT> &V,
+                     llvm::raw_ostream &Stream, StringRef Style) {
+    StringRef Sep;
+    StringRef ArgStyle;
+    std::tie(Sep, ArgStyle) = parseOptions(Style);
+    auto Begin = V.begin();
+    auto End = V.end();
+    if (Begin != End) {
+      auto Adapter =
+          detail::build_format_adapter(std::forward<reference>(*Begin));
+      Adapter.format(Stream, ArgStyle);
+      ++Begin;
+    }
+    while (Begin != End) {
+      Stream << Sep;
+      auto Adapter =
+          detail::build_format_adapter(std::forward<reference>(*Begin));
+      Adapter.format(Stream, ArgStyle);
+      ++Begin;
+    }
+  }
+};
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/FormatVariadic.h b/contrib/llvm/include/llvm/Support/FormatVariadic.h
new file mode 100644
index 000000000000..e5f5c9615cb6
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FormatVariadic.h
@@ -0,0 +1,247 @@
+//===- FormatVariadic.h - Efficient type-safe string formatting --*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the formatv() function which can be used with other LLVM
+// subsystems to provide printf-like formatting, but with improved safety and
+// flexibility.  The result of `formatv` is an object which can be streamed to
+// a raw_ostream or converted to a std::string or llvm::SmallString.
+//
+//   // Convert to std::string.
+//   std::string S = formatv("{0} {1}", 1234.412, "test").str();
+//
+//   // Convert to llvm::SmallString
+//   SmallString<8> S = formatv("{0} {1}", 1234.412, "test").sstr<8>();
+//
+//   // Stream to an existing raw_ostream.
+//   OS << formatv("{0} {1}", 1234.412, "test");
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATVARIADIC_H
+#define LLVM_SUPPORT_FORMATVARIADIC_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/FormatCommon.h"
+#include "llvm/Support/FormatProviders.h"
+#include "llvm/Support/FormatVariadicDetails.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstddef>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+namespace llvm {
+
+enum class ReplacementType { Empty, Format, Literal };
+
+struct ReplacementItem {
+  ReplacementItem() = default;
+  explicit ReplacementItem(StringRef Literal)
+      : Type(ReplacementType::Literal), Spec(Literal) {}
+  ReplacementItem(StringRef Spec, size_t Index, size_t Align, AlignStyle Where,
+                  char Pad, StringRef Options)
+      : Type(ReplacementType::Format), Spec(Spec), Index(Index), Align(Align),
+        Where(Where), Pad(Pad), Options(Options) {}
+
+  ReplacementType Type = ReplacementType::Empty;
+  StringRef Spec;
+  size_t Index = 0;
+  size_t Align = 0;
+  AlignStyle Where = AlignStyle::Right;
+  char Pad;
+  StringRef Options;
+};
+
+class formatv_object_base {
+protected:
+  // The parameters are stored in a std::tuple, which does not provide runtime
+  // indexing capabilities.  In order to enable runtime indexing, we use this
+  // structure to put the parameters into a std::vector.  Since the parameters
+  // are not all the same type, we use some type-erasure by wrapping the
+  // parameters in a template class that derives from a non-template superclass.
+  // Essentially, we are converting a std::tuple<Derived<Ts...>> to a
+  // std::vector<Base*>.
+  struct create_adapters {
+    template <typename... Ts>
+    std::vector<detail::format_adapter *> operator()(Ts &... Items) {
+      return std::vector<detail::format_adapter *>{&Items...};
+    }
+  };
+
+  StringRef Fmt;
+  std::vector<detail::format_adapter *> Adapters;
+  std::vector<ReplacementItem> Replacements;
+
+  static bool consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
+                                 size_t &Align, char &Pad);
+
+  static std::pair<ReplacementItem, StringRef>
+  splitLiteralAndReplacement(StringRef Fmt);
+
+public:
+  formatv_object_base(StringRef Fmt, std::size_t ParamCount)
+      : Fmt(Fmt), Replacements(parseFormatString(Fmt)) {
+    Adapters.reserve(ParamCount);
+  }
+
+  void format(raw_ostream &S) const {
+    for (auto &R : Replacements) {
+      if (R.Type == ReplacementType::Empty)
+        continue;
+      if (R.Type == ReplacementType::Literal) {
+        S << R.Spec;
+        continue;
+      }
+      if (R.Index >= Adapters.size()) {
+        S << R.Spec;
+        continue;
+      }
+
+      auto W = Adapters[R.Index];
+
+      FmtAlign Align(*W, R.Where, R.Align);
+      Align.format(S, R.Options);
+    }
+  }
+  static std::vector<ReplacementItem> parseFormatString(StringRef Fmt);
+
+  static Optional<ReplacementItem> parseReplacementItem(StringRef Spec);
+
+  std::string str() const {
+    std::string Result;
+    raw_string_ostream Stream(Result);
+    Stream << *this;
+    Stream.flush();
+    return Result;
+  }
+
+  template <unsigned N> SmallString<N> sstr() const {
+    SmallString<N> Result;
+    raw_svector_ostream Stream(Result);
+    Stream << *this;
+    return Result;
+  }
+
+  template <unsigned N> operator SmallString<N>() const { return sstr<N>(); }
+
+  operator std::string() const { return str(); }
+};
+
+template <typename Tuple> class formatv_object : public formatv_object_base {
+  // Storage for the parameter adapters.  Since the base class erases the type
+  // of the parameters, we have to own the storage for the parameters here, and
+  // have the base class store type-erased pointers into this tuple.
+  Tuple Parameters;
+
+public:
+  formatv_object(StringRef Fmt, Tuple &&Params)
+      : formatv_object_base(Fmt, std::tuple_size<Tuple>::value),
+        Parameters(std::move(Params)) {
+    Adapters = apply_tuple(create_adapters(), Parameters);
+  }
+};
+
+// \brief Format text given a format string and replacement parameters.
+//
+// ===General Description===
+//
+// Formats textual output.  `Fmt` is a string consisting of one or more
+// replacement sequences with the following grammar:
+//
+// rep_field ::= "{" [index] ["," layout] [":" format] "}"
+// index     ::= <non-negative integer>
+// layout    ::= [[[char]loc]width]
+// format    ::= <any string not containing "{" or "}">
+// char      ::= <any character except "{" or "}">
+// loc       ::= "-" | "=" | "+"
+// width     ::= <positive integer>
+//
+// index   - A non-negative integer specifying the index of the item in the
+//           parameter pack to print.  Any other value is invalid.
+// layout  - A string controlling how the field is laid out within the available
+//           space.
+// format  - A type-dependent string used to provide additional options to
+//           the formatting operation.  Refer to the documentation of the
+//           various individual format providers for per-type options.
+// char    - The padding character.  Defaults to ' ' (space).  Only valid if
+//           `loc` is also specified.
+// loc     - Where to print the formatted text within the field.  Only valid if
+//           `width` is also specified.
+//           '-' : The field is left aligned within the available space.
+//           '=' : The field is centered within the available space.
+//           '+' : The field is right aligned within the available space (this
+//                 is the default).
+// width   - The width of the field within which to print the formatted text.
+//           If this is less than the required length then the `char` and `loc`
+//           fields are ignored, and the field is printed with no leading or
+//           trailing padding.  If this is greater than the required length,
+//           then the text is output according to the value of `loc`, and padded
+//           as appropriate on the left and/or right by `char`.
+//
+// ===Special Characters===
+//
+// The characters '{' and '}' are reserved and cannot appear anywhere within a
+// replacement sequence.  Outside of a replacement sequence, in order to print
+// a literal '{' or '}' it must be doubled -- "{{" to print a literal '{' and
+// "}}" to print a literal '}'.
+//
+// ===Parameter Indexing===
+// `index` specifies the index of the paramter in the parameter pack to format
+// into the output.  Note that it is possible to refer to the same parameter
+// index multiple times in a given format string.  This makes it possible to
+// output the same value multiple times without passing it multiple times to the
+// function. For example:
+//
+//   formatv("{0} {1} {0}", "a", "bb")
+//
+// would yield the string "abba".  This can be convenient when it is expensive
+// to compute the value of the parameter, and you would otherwise have had to
+// save it to a temporary.
+//
+// ===Formatter Search===
+//
+// For a given parameter of type T, the following steps are executed in order
+// until a match is found:
+//
+//   1. If the parameter is of class type, and contains a method
+//      void format(raw_ostream &Stream, StringRef Options)
+//      Then this method is invoked to produce the formatted output.  The
+//      implementation should write the formatted text into `Stream`.
+//   2. If there is a suitable template specialization of format_provider<>
+//      for type T containing a method whose signature is:
+//      void format(const T &Obj, raw_ostream &Stream, StringRef Options)
+//      Then this method is invoked as described in Step 1.
+//
+// If a match cannot be found through either of the above methods, a compiler
+// error is generated.
+//
+// ===Invalid Format String Handling===
+//
+// In the case of a format string which does not match the grammar described
+// above, the output is undefined.  With asserts enabled, LLVM will trigger an
+// assertion.  Otherwise, it will try to do something reasonable, but in general
+// the details of what that is are undefined.
+//
+template <typename... Ts>
+inline auto formatv(const char *Fmt, Ts &&... Vals) -> formatv_object<decltype(
+    std::make_tuple(detail::build_format_adapter(std::forward<Ts>(Vals))...))> {
+  using ParamTuple = decltype(
+      std::make_tuple(detail::build_format_adapter(std::forward<Ts>(Vals))...));
+  return formatv_object<ParamTuple>(
+      Fmt,
+      std::make_tuple(detail::build_format_adapter(std::forward<Ts>(Vals))...));
+}
+
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_FORMATVARIADIC_H
diff --git a/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h b/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
new file mode 100644
index 000000000000..b4a564ffc26c
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/FormatVariadicDetails.h
@@ -0,0 +1,112 @@
+//===- FormatVariadicDetails.h - Helpers for FormatVariadic.h ----*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_FORMATVARIADIC_DETAILS_H
+#define LLVM_SUPPORT_FORMATVARIADIC_DETAILS_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <type_traits>
+
+namespace llvm {
+template <typename T, typename Enable = void> struct format_provider {};
+
+namespace detail {
+class format_adapter {
+protected:
+  virtual ~format_adapter() {}
+
+public:
+  virtual void format(raw_ostream &S, StringRef Options) = 0;
+};
+
+template <typename T> class provider_format_adapter : public format_adapter {
+  T Item;
+
+public:
+  explicit provider_format_adapter(T &&Item) : Item(Item) {}
+
+  void format(llvm::raw_ostream &S, StringRef Options) override {
+    format_provider<typename std::decay<T>::type>::format(Item, S, Options);
+  }
+};
+
+template <typename T> class missing_format_adapter;
+
+// Test if format_provider<T> is defined on T and contains a member function
+// with the signature:
+//   static void format(const T&, raw_stream &, StringRef);
+//
+template <class T> class has_FormatProvider {
+public:
+  using Decayed = typename std::decay<T>::type;
+  typedef void (*Signature_format)(const Decayed &, llvm::raw_ostream &,
+                                   StringRef);
+
+  template <typename U>
+  static char test(SameType<Signature_format, &U::format> *);
+
+  template <typename U> static double test(...);
+
+  static bool const value =
+      (sizeof(test<llvm::format_provider<Decayed>>(nullptr)) == 1);
+};
+
+// Simple template that decides whether a type T should use the member-function
+// based format() invocation.
+template <typename T>
+struct uses_format_member
+    : public std::integral_constant<
+          bool,
+          std::is_base_of<format_adapter,
+                          typename std::remove_reference<T>::type>::value> {};
+
+// Simple template that decides whether a type T should use the format_provider
+// based format() invocation.  The member function takes priority, so this test
+// will only be true if there is not ALSO a format member.
+template <typename T>
+struct uses_format_provider
+    : public std::integral_constant<
+          bool, !uses_format_member<T>::value && has_FormatProvider<T>::value> {
+};
+
+// Simple template that decides whether a type T has neither a member-function
+// nor format_provider based implementation that it can use.  Mostly used so
+// that the compiler spits out a nice diagnostic when a type with no format
+// implementation can be located.
+template <typename T>
+struct uses_missing_provider
+    : public std::integral_constant<bool,
+                                    !uses_format_member<T>::value &&
+                                        !uses_format_provider<T>::value> {};
+
+template <typename T>
+typename std::enable_if<uses_format_member<T>::value, T>::type
+build_format_adapter(T &&Item) {
+  return std::forward<T>(Item);
+}
+
+template <typename T>
+typename std::enable_if<uses_format_provider<T>::value,
+                        provider_format_adapter<T>>::type
+build_format_adapter(T &&Item) {
+  return provider_format_adapter<T>(std::forward<T>(Item));
+}
+
+template <typename T>
+typename std::enable_if<uses_missing_provider<T>::value,
+                        missing_format_adapter<T>>::type
+build_format_adapter(T &&Item) {
+  return missing_format_adapter<T>();
+}
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/GCOV.h b/contrib/llvm/include/llvm/Support/GCOV.h
index 544434f036a4..f297fe609d2a 100644
--- a/contrib/llvm/include/llvm/Support/GCOV.h
+++ b/contrib/llvm/include/llvm/Support/GCOV.h
@@ -1,4 +1,4 @@
-//===- GCOV.h - LLVM coverage tool ----------------------------------------===//
+//===- GCOV.h - LLVM coverage tool ------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,12 +16,20 @@
 #define LLVM_SUPPORT_GCOV_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
 
 namespace llvm {
 
@@ -30,6 +38,7 @@ class GCOVBlock;
 class FileInfo;
 
 namespace GCOV {
+
 enum GCOVVersion { V402, V404, V704 };
 
 /// \brief A struct for passing gcov options between functions.
@@ -47,7 +56,8 @@ struct Options {
   bool LongFileNames;
   bool NoOutput;
 };
-} // end GCOV namespace
+
+} // end namespace GCOV
 
 /// GCOVBuffer - A wrapper around MemoryBuffer to provide GCOV specific
 /// read operations.
@@ -232,8 +242,9 @@ private:
 class GCOVFile {
 public:
   GCOVFile()
-      : GCNOInitialized(false), Checksum(0), Functions(), RunCount(0),
+      : GCNOInitialized(false), Checksum(0), RunCount(0),
         ProgramCount(0) {}
+
   bool readGCNO(GCOVBuffer &Buffer);
   bool readGCDA(GCOVBuffer &Buffer);
   uint32_t getChecksum() const { return Checksum; }
@@ -312,9 +323,9 @@ public:
   typedef SmallVectorImpl<GCOVEdge *>::const_iterator EdgeIterator;
 
   GCOVBlock(GCOVFunction &P, uint32_t N)
-      : Parent(P), Number(N), Counter(0), DstEdgesAreSorted(true), SrcEdges(),
-        DstEdges(), Lines() {}
+      : Parent(P), Number(N), Counter(0), DstEdgesAreSorted(true) {}
   ~GCOVBlock();
+
   const GCOVFunction &getParent() const { return Parent; }
   void addLine(uint32_t N) { Lines.push_back(N); }
   uint32_t getLastLine() const { return Lines.back(); }
@@ -325,6 +336,7 @@ public:
     assert(&Edge->Dst == this); // up to caller to ensure edge is valid
     SrcEdges.push_back(Edge);
   }
+
   void addDstEdge(GCOVEdge *Edge) {
     assert(&Edge->Src == this); // up to caller to ensure edge is valid
     // Check if adding this edge causes list to become unsorted.
@@ -332,6 +344,7 @@ public:
       DstEdgesAreSorted = false;
     DstEdges.push_back(Edge);
   }
+
   size_t getNumSrcEdges() const { return SrcEdges.size(); }
   size_t getNumDstEdges() const { return DstEdges.size(); }
   void sortDstEdges();
@@ -396,19 +409,21 @@ class FileInfo {
 
 public:
   FileInfo(const GCOV::Options &Options)
-      : Options(Options), LineInfo(), RunCount(0), ProgramCount(0) {}
+      : Options(Options), RunCount(0), ProgramCount(0) {}
 
   void addBlockLine(StringRef Filename, uint32_t Line, const GCOVBlock *Block) {
     if (Line > LineInfo[Filename].LastLine)
       LineInfo[Filename].LastLine = Line;
     LineInfo[Filename].Blocks[Line - 1].push_back(Block);
   }
+
   void addFunctionLine(StringRef Filename, uint32_t Line,
                        const GCOVFunction *Function) {
     if (Line > LineInfo[Filename].LastLine)
       LineInfo[Filename].LastLine = Line;
     LineInfo[Filename].Functions[Line - 1].push_back(Function);
   }
+
   void setRunCount(uint32_t Runs) { RunCount = Runs; }
   void setProgramCount(uint32_t Programs) { ProgramCount = Programs; }
   void print(raw_ostream &OS, StringRef MainFilename, StringRef GCNOFile,
@@ -440,6 +455,7 @@ private:
   FileCoverageList FileCoverages;
   FuncCoverageMap FuncCoverages;
 };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_GCOV_H
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTree.h b/contrib/llvm/include/llvm/Support/GenericDomTree.h
index 77b5ba7e3e97..07a53438085a 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTree.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTree.h
@@ -13,6 +13,12 @@
 /// dominance queries on the CFG, but is fully generic w.r.t. the underlying
 /// graph types.
 ///
+/// Unlike ADT/* graph algorithms, generic dominator tree has more reuiqrement
+/// on the graph's NodeRef. The NodeRef should be a pointer and, depending on
+/// the implementation, e.g. NodeRef->getParent() return the parent node.
+///
+/// FIXME: Maybe GenericDomTree needs a TreeTraits, instead of GraphTraits.
+///
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_SUPPORT_GENERICDOMTREE_H
@@ -30,6 +36,23 @@
 
 namespace llvm {
 
+template <class NodeT> class DominatorTreeBase;
+
+namespace detail {
+
+template <typename GT> struct DominatorTreeBaseTraits {
+  static_assert(std::is_pointer<typename GT::NodeRef>::value,
+                "Currently NodeRef must be a pointer type.");
+  using type = DominatorTreeBase<
+      typename std::remove_pointer<typename GT::NodeRef>::type>;
+};
+
+} // End namespace detail
+
+template <typename GT>
+using DominatorTreeBaseByGraphTraits =
+    typename detail::DominatorTreeBaseTraits<GT>::type;
+
 /// \brief Base class that other, more interesting dominator analyses
 /// inherit from.
 template <class NodeT> class DominatorBase {
@@ -62,7 +85,6 @@ public:
   bool isPostDominator() const { return IsPostDominators; }
 };
 
-template <class NodeT> class DominatorTreeBase;
 struct PostDominatorTree;
 
 /// \brief Base class for the actual dominator tree node.
@@ -126,7 +148,7 @@ public:
     assert(IDom && "No immediate dominator?");
     if (IDom != NewIDom) {
       typename std::vector<DomTreeNodeBase<NodeT> *>::iterator I =
-          std::find(IDom->Children.begin(), IDom->Children.end(), this);
+          find(IDom->Children, this);
       assert(I != IDom->Children.end() &&
              "Not in immediate dominator children set!");
       // I am no longer your child...
@@ -177,8 +199,7 @@ void PrintDomTree(const DomTreeNodeBase<NodeT> *N, raw_ostream &o,
 
 // The calculate routine is provided in a separate header but referenced here.
 template <class FuncT, class N>
-void Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType> &DT,
-               FuncT &F);
+void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT, FuncT &F);
 
 /// \brief Core dominator tree base class.
 ///
@@ -251,14 +272,14 @@ protected:
   // NewBB is split and now it has one successor. Update dominator tree to
   // reflect this change.
   template <class N, class GraphT>
-  void Split(DominatorTreeBase<typename GraphT::NodeType> &DT,
-             typename GraphT::NodeType *NewBB) {
+  void Split(DominatorTreeBaseByGraphTraits<GraphT> &DT,
+             typename GraphT::NodeRef NewBB) {
     assert(std::distance(GraphT::child_begin(NewBB),
                          GraphT::child_end(NewBB)) == 1 &&
            "NewBB should have a single successor!");
-    typename GraphT::NodeType *NewBBSucc = *GraphT::child_begin(NewBB);
+    typename GraphT::NodeRef NewBBSucc = *GraphT::child_begin(NewBB);
 
-    std::vector<typename GraphT::NodeType *> PredBlocks;
+    std::vector<typename GraphT::NodeRef> PredBlocks;
     typedef GraphTraits<Inverse<N>> InvTraits;
     for (typename InvTraits::ChildIteratorType
              PI = InvTraits::child_begin(NewBB),
@@ -273,7 +294,7 @@ protected:
              PI = InvTraits::child_begin(NewBBSucc),
              E = InvTraits::child_end(NewBBSucc);
          PI != E; ++PI) {
-      typename InvTraits::NodeType *ND = *PI;
+      typename InvTraits::NodeRef ND = *PI;
       if (ND != NewBB && !DT.dominates(NewBBSucc, ND) &&
           DT.isReachableFromEntry(ND)) {
         NewBBDominatesNewBBSucc = false;
@@ -588,7 +609,7 @@ public:
     DomTreeNodeBase<NodeT> *IDom = Node->getIDom();
     if (IDom) {
       typename std::vector<DomTreeNodeBase<NodeT> *>::iterator I =
-          std::find(IDom->Children.begin(), IDom->Children.end(), Node);
+          find(IDom->Children, Node);
       assert(I != IDom->Children.end() &&
              "Not in immediate dominator children set!");
       // I am no longer your child...
@@ -627,18 +648,17 @@ public:
 
 protected:
   template <class GraphT>
-  friend typename GraphT::NodeType *
-  Eval(DominatorTreeBase<typename GraphT::NodeType> &DT,
-       typename GraphT::NodeType *V, unsigned LastLinked);
+  friend typename GraphT::NodeRef
+  Eval(DominatorTreeBaseByGraphTraits<GraphT> &DT, typename GraphT::NodeRef V,
+       unsigned LastLinked);
 
   template <class GraphT>
-  friend unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType> &DT,
-                          typename GraphT::NodeType *V, unsigned N);
+  friend unsigned DFSPass(DominatorTreeBaseByGraphTraits<GraphT> &DT,
+                          typename GraphT::NodeRef V, unsigned N);
 
   template <class FuncT, class N>
-  friend void
-  Calculate(DominatorTreeBase<typename GraphTraits<N>::NodeType> &DT, FuncT &F);
-
+  friend void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<N>> &DT,
+                        FuncT &F);
 
   DomTreeNodeBase<NodeT> *getNodeForBlock(NodeT *BB) {
     if (DomTreeNodeBase<NodeT> *Node = getNode(BB))
@@ -730,8 +750,8 @@ public:
       for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
                                              E = TraitsTy::nodes_end(&F);
            I != E; ++I)
-        if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I))
-          addRoot(&*I);
+        if (TraitsTy::child_begin(*I) == TraitsTy::child_end(*I))
+          addRoot(*I);
 
       Calculate<FT, Inverse<NodeT *>>(*this, F);
     }
diff --git a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
index 3e867dc6cbf1..54e55cc1a32e 100644
--- a/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
+++ b/contrib/llvm/include/llvm/Support/GenericDomTreeConstruction.h
@@ -29,9 +29,9 @@
 
 namespace llvm {
 
-template<class GraphT>
-unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
-                 typename GraphT::NodeType* V, unsigned N) {
+template <class GraphT>
+unsigned DFSPass(DominatorTreeBaseByGraphTraits<GraphT> &DT,
+                 typename GraphT::NodeRef V, unsigned N) {
   // This is more understandable as a recursive algorithm, but we can't use the
   // recursive algorithm due to stack depth issues.  Keep it here for
   // documentation purposes.
@@ -52,15 +52,16 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
 #else
   bool IsChildOfArtificialExit = (N != 0);
 
-  SmallVector<std::pair<typename GraphT::NodeType*,
-                        typename GraphT::ChildIteratorType>, 32> Worklist;
+  SmallVector<
+      std::pair<typename GraphT::NodeRef, typename GraphT::ChildIteratorType>,
+      32>
+      Worklist;
   Worklist.push_back(std::make_pair(V, GraphT::child_begin(V)));
   while (!Worklist.empty()) {
-    typename GraphT::NodeType* BB = Worklist.back().first;
+    typename GraphT::NodeRef BB = Worklist.back().first;
     typename GraphT::ChildIteratorType NextSucc = Worklist.back().second;
 
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &BBInfo =
-                                                                    DT.Info[BB];
+    auto &BBInfo = DT.Info[BB];
 
     // First time we visited this BB?
     if (NextSucc == GraphT::child_begin(BB)) {
@@ -89,10 +90,9 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
     ++Worklist.back().second;
 
     // Visit the successor next, if it isn't already visited.
-    typename GraphT::NodeType* Succ = *NextSucc;
+    typename GraphT::NodeRef Succ = *NextSucc;
 
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &SuccVInfo =
-                                                                  DT.Info[Succ];
+    auto &SuccVInfo = DT.Info[Succ];
     if (SuccVInfo.Semi == 0) {
       SuccVInfo.Parent = BBDFSNum;
       Worklist.push_back(std::make_pair(Succ, GraphT::child_begin(Succ)));
@@ -103,25 +103,23 @@ unsigned DFSPass(DominatorTreeBase<typename GraphT::NodeType>& DT,
 }
 
 template <class GraphT>
-typename GraphT::NodeType *
-Eval(DominatorTreeBase<typename GraphT::NodeType> &DT,
-     typename GraphT::NodeType *VIn, unsigned LastLinked) {
-  typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInInfo =
-                                                                  DT.Info[VIn];
+typename GraphT::NodeRef Eval(DominatorTreeBaseByGraphTraits<GraphT> &DT,
+                              typename GraphT::NodeRef VIn,
+                              unsigned LastLinked) {
+  auto &VInInfo = DT.Info[VIn];
   if (VInInfo.DFSNum < LastLinked)
     return VIn;
 
-  SmallVector<typename GraphT::NodeType*, 32> Work;
-  SmallPtrSet<typename GraphT::NodeType*, 32> Visited;
+  SmallVector<typename GraphT::NodeRef, 32> Work;
+  SmallPtrSet<typename GraphT::NodeRef, 32> Visited;
 
   if (VInInfo.Parent >= LastLinked)
     Work.push_back(VIn);
 
   while (!Work.empty()) {
-    typename GraphT::NodeType* V = Work.back();
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VInfo =
-                                                                     DT.Info[V];
-    typename GraphT::NodeType* VAncestor = DT.Vertex[VInfo.Parent];
+    typename GraphT::NodeRef V = Work.back();
+    auto &VInfo = DT.Info[V];
+    typename GraphT::NodeRef VAncestor = DT.Vertex[VInfo.Parent];
 
     // Process Ancestor first
     if (Visited.insert(VAncestor).second && VInfo.Parent >= LastLinked) {
@@ -134,10 +132,9 @@ Eval(DominatorTreeBase<typename GraphT::NodeType> &DT,
     if (VInfo.Parent < LastLinked)
       continue;
 
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &VAInfo =
-                                                             DT.Info[VAncestor];
-    typename GraphT::NodeType* VAncestorLabel = VAInfo.Label;
-    typename GraphT::NodeType* VLabel = VInfo.Label;
+    auto &VAInfo = DT.Info[VAncestor];
+    typename GraphT::NodeRef VAncestorLabel = VAInfo.Label;
+    typename GraphT::NodeRef VLabel = VInfo.Label;
     if (DT.Info[VAncestorLabel].Semi < DT.Info[VLabel].Semi)
       VInfo.Label = VAncestorLabel;
     VInfo.Parent = VAInfo.Parent;
@@ -146,16 +143,18 @@ Eval(DominatorTreeBase<typename GraphT::NodeType> &DT,
   return VInInfo.Label;
 }
 
-template<class FuncT, class NodeT>
-void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
-               FuncT& F) {
+template <class FuncT, class NodeT>
+void Calculate(DominatorTreeBaseByGraphTraits<GraphTraits<NodeT>> &DT,
+               FuncT &F) {
   typedef GraphTraits<NodeT> GraphT;
+  static_assert(std::is_pointer<typename GraphT::NodeRef>::value,
+                "NodeRef should be pointer type");
+  typedef typename std::remove_pointer<typename GraphT::NodeRef>::type NodeType;
 
   unsigned N = 0;
   bool MultipleRoots = (DT.Roots.size() > 1);
   if (MultipleRoots) {
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &BBInfo =
-        DT.Info[nullptr];
+    auto &BBInfo = DT.Info[nullptr];
     BBInfo.DFSNum = BBInfo.Semi = ++N;
     BBInfo.Label = nullptr;
 
@@ -188,14 +187,13 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
     Buckets[i] = i;
 
   for (unsigned i = N; i >= 2; --i) {
-    typename GraphT::NodeType* W = DT.Vertex[i];
-    typename DominatorTreeBase<typename GraphT::NodeType>::InfoRec &WInfo =
-                                                                     DT.Info[W];
+    typename GraphT::NodeRef W = DT.Vertex[i];
+    auto &WInfo = DT.Info[W];
 
     // Step #2: Implicitly define the immediate dominator of vertices
     for (unsigned j = i; Buckets[j] != i; j = Buckets[j]) {
-      typename GraphT::NodeType* V = DT.Vertex[Buckets[j]];
-      typename GraphT::NodeType* U = Eval<GraphT>(DT, V, i + 1);
+      typename GraphT::NodeRef V = DT.Vertex[Buckets[j]];
+      typename GraphT::NodeRef U = Eval<GraphT>(DT, V, i + 1);
       DT.IDoms[V] = DT.Info[U].Semi < i ? U : W;
     }
 
@@ -207,7 +205,7 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
     for (typename InvTraits::ChildIteratorType CI =
          InvTraits::child_begin(W),
          E = InvTraits::child_end(W); CI != E; ++CI) {
-      typename InvTraits::NodeType *N = *CI;
+      typename InvTraits::NodeRef N = *CI;
       if (DT.Info.count(N)) {  // Only if this predecessor is reachable!
         unsigned SemiU = DT.Info[Eval<GraphT>(DT, N, i + 1)].Semi;
         if (SemiU < WInfo.Semi)
@@ -227,17 +225,17 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
   }
 
   if (N >= 1) {
-    typename GraphT::NodeType* Root = DT.Vertex[1];
+    typename GraphT::NodeRef Root = DT.Vertex[1];
     for (unsigned j = 1; Buckets[j] != 1; j = Buckets[j]) {
-      typename GraphT::NodeType* V = DT.Vertex[Buckets[j]];
+      typename GraphT::NodeRef V = DT.Vertex[Buckets[j]];
       DT.IDoms[V] = Root;
     }
   }
 
   // Step #4: Explicitly define the immediate dominator of each vertex
   for (unsigned i = 2; i <= N; ++i) {
-    typename GraphT::NodeType* W = DT.Vertex[i];
-    typename GraphT::NodeType*& WIDom = DT.IDoms[W];
+    typename GraphT::NodeRef W = DT.Vertex[i];
+    typename GraphT::NodeRef &WIDom = DT.IDoms[W];
     if (WIDom != DT.Vertex[DT.Info[W].Semi])
       WIDom = DT.IDoms[WIDom];
   }
@@ -248,34 +246,32 @@ void Calculate(DominatorTreeBase<typename GraphTraits<NodeT>::NodeType>& DT,
   // one exit block, or it may be the virtual exit (denoted by (BasicBlock *)0)
   // which postdominates all real exits if there are multiple exit blocks, or
   // an infinite loop.
-  typename GraphT::NodeType* Root = !MultipleRoots ? DT.Roots[0] : nullptr;
+  typename GraphT::NodeRef Root = !MultipleRoots ? DT.Roots[0] : nullptr;
 
   DT.RootNode =
       (DT.DomTreeNodes[Root] =
-           llvm::make_unique<DomTreeNodeBase<typename GraphT::NodeType>>(
-               Root, nullptr)).get();
+           llvm::make_unique<DomTreeNodeBase<NodeType>>(Root, nullptr))
+          .get();
 
   // Loop over all of the reachable blocks in the function...
   for (unsigned i = 2; i <= N; ++i) {
-    typename GraphT::NodeType* W = DT.Vertex[i];
+    typename GraphT::NodeRef W = DT.Vertex[i];
 
     // Don't replace this with 'count', the insertion side effect is important
     if (DT.DomTreeNodes[W])
       continue; // Haven't calculated this node yet?
 
-    typename GraphT::NodeType* ImmDom = DT.getIDom(W);
+    typename GraphT::NodeRef ImmDom = DT.getIDom(W);
 
     assert(ImmDom || DT.DomTreeNodes[nullptr]);
 
     // Get or calculate the node for the immediate dominator
-    DomTreeNodeBase<typename GraphT::NodeType> *IDomNode =
-                                                     DT.getNodeForBlock(ImmDom);
+    DomTreeNodeBase<NodeType> *IDomNode = DT.getNodeForBlock(ImmDom);
 
     // Add a new tree node for this BasicBlock, and link it as a child of
     // IDomNode
     DT.DomTreeNodes[W] = IDomNode->addChild(
-        llvm::make_unique<DomTreeNodeBase<typename GraphT::NodeType>>(
-            W, IDomNode));
+        llvm::make_unique<DomTreeNodeBase<NodeType>>(W, IDomNode));
   }
 
   // Free temporary memory used to construct idom's
diff --git a/contrib/llvm/include/llvm/Support/GlobPattern.h b/contrib/llvm/include/llvm/Support/GlobPattern.h
new file mode 100644
index 000000000000..c9436a13c1a3
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/GlobPattern.h
@@ -0,0 +1,48 @@
+//===-- GlobPattern.h - glob pattern matcher implementation -*- C++ -*-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a glob pattern matcher. The glob pattern is the
+// rule used by the shell.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_GLOB_PATTERN_H
+#define LLVM_SUPPORT_GLOB_PATTERN_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+// This class represents a glob pattern. Supported metacharacters
+// are "*", "?", "[<chars>]" and "[^<chars>]".
+namespace llvm {
+class BitVector;
+template <typename T> class ArrayRef;
+
+class GlobPattern {
+public:
+  static Expected<GlobPattern> create(StringRef Pat);
+  bool match(StringRef S) const;
+
+private:
+  bool matchOne(ArrayRef<BitVector> Pat, StringRef S) const;
+
+  // Parsed glob pattern.
+  std::vector<BitVector> Tokens;
+
+  // The following members are for optimization.
+  Optional<StringRef> Exact;
+  Optional<StringRef> Prefix;
+  Optional<StringRef> Suffix;
+};
+}
+
+#endif // LLVM_SUPPORT_GLOB_PATTERN_H
diff --git a/contrib/llvm/include/llvm/Support/GraphWriter.h b/contrib/llvm/include/llvm/Support/GraphWriter.h
index 86985c569464..7555d5b31a8d 100644
--- a/contrib/llvm/include/llvm/Support/GraphWriter.h
+++ b/contrib/llvm/include/llvm/Support/GraphWriter.h
@@ -59,14 +59,19 @@ class GraphWriter {
 
   typedef DOTGraphTraits<GraphType>           DOTTraits;
   typedef GraphTraits<GraphType>              GTraits;
-  typedef typename GTraits::NodeType          NodeType;
+  typedef typename GTraits::NodeRef           NodeRef;
   typedef typename GTraits::nodes_iterator    node_iterator;
   typedef typename GTraits::ChildIteratorType child_iterator;
   DOTTraits DTraits;
 
+  static_assert(std::is_pointer<NodeRef>::value,
+                "FIXME: Currently GraphWriter requires the NodeRef type to be "
+                "a pointer.\nThe pointer usage should be moved to "
+                "DOTGraphTraits, and removed from GraphWriter itself.");
+
   // Writes the edge labels of the node to O and returns true if there are any
   // edge labels not equal to the empty string "".
-  bool getEdgeSourceLabels(raw_ostream &O, NodeType *Node) {
+  bool getEdgeSourceLabels(raw_ostream &O, NodeRef Node) {
     child_iterator EI = GTraits::child_begin(Node);
     child_iterator EE = GTraits::child_end(Node);
     bool hasEdgeSourceLabels = false;
@@ -144,27 +149,11 @@ public:
         writeNode(*I);
   }
 
-  bool isNodeHidden(NodeType &Node) {
-    return isNodeHidden(&Node);
-  }
-
-  bool isNodeHidden(NodeType *const *Node) {
-    return isNodeHidden(*Node);
-  }
-
-  bool isNodeHidden(NodeType *Node) {
+  bool isNodeHidden(NodeRef Node) {
     return DTraits.isNodeHidden(Node);
   }
 
-  void writeNode(NodeType& Node) {
-    writeNode(&Node);
-  }
-
-  void writeNode(NodeType *const *Node) {
-    writeNode(*Node);
-  }
-
-  void writeNode(NodeType *Node) {
+  void writeNode(NodeRef Node) {
     std::string NodeAttributes = DTraits.getNodeAttributes(Node, G);
 
     O << "\tNode" << static_cast<const void*>(Node) << " [shape=record,";
@@ -237,8 +226,8 @@ public:
         writeEdge(Node, 64, EI);
   }
 
-  void writeEdge(NodeType *Node, unsigned edgeidx, child_iterator EI) {
-    if (NodeType *TargetNode = *EI) {
+  void writeEdge(NodeRef Node, unsigned edgeidx, child_iterator EI) {
+    if (NodeRef TargetNode = *EI) {
       int DestPort = -1;
       if (DTraits.edgeTargetsEdgeSource(Node, EI)) {
         child_iterator TargetIt = DTraits.getEdgeTarget(Node, EI);
diff --git a/contrib/llvm/include/llvm/Support/Host.h b/contrib/llvm/include/llvm/Support/Host.h
index 8114f9bf846b..9df584c68c0d 100644
--- a/contrib/llvm/include/llvm/Support/Host.h
+++ b/contrib/llvm/include/llvm/Support/Host.h
@@ -18,6 +18,8 @@
 
 #if defined(__linux__) || defined(__GNU__) || defined(__HAIKU__)
 #include <endian.h>
+#elif defined(_AIX)
+#include <sys/machine.h>
 #else
 #if !defined(BYTE_ORDER) && !defined(LLVM_ON_WIN32)
 #include <machine/endian.h>
@@ -68,6 +70,11 @@ namespace sys {
   ///
   /// \return - True on success.
   bool getHostCPUFeatures(StringMap<bool> &Features);
+
+  /// Get the number of physical cores (as opposed to logical cores returned
+  /// from thread::hardware_concurrency(), which includes hyperthreads).
+  /// Returns -1 if unknown for the current host system.
+  int getHostNumPhysicalCores();
 }
 }
 
diff --git a/contrib/llvm/include/llvm/Support/MD5.h b/contrib/llvm/include/llvm/Support/MD5.h
index 42d8ca8a1ebb..eb181bfe8a5c 100644
--- a/contrib/llvm/include/llvm/Support/MD5.h
+++ b/contrib/llvm/include/llvm/Support/MD5.h
@@ -31,6 +31,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/Endian.h"
+#include <array>
 
 namespace llvm {
 template <typename T> class ArrayRef;
@@ -62,6 +63,9 @@ public:
   /// deposited into \p Str. The result will be of length 32.
   static void stringifyResult(MD5Result &Result, SmallString<32> &Str);
 
+  /// \brief Computes the hash for a given bytes.
+  static std::array<uint8_t, 16> hash(ArrayRef<uint8_t> Data);
+
 private:
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
diff --git a/contrib/llvm/include/llvm/Support/MachO.def b/contrib/llvm/include/llvm/Support/MachO.def
index 9ca6440dd82b..57522897d0fc 100644
--- a/contrib/llvm/include/llvm/Support/MachO.def
+++ b/contrib/llvm/include/llvm/Support/MachO.def
@@ -15,27 +15,37 @@
 
 HANDLE_LOAD_COMMAND(LC_SEGMENT, 0x00000001u, segment_command)
 HANDLE_LOAD_COMMAND(LC_SYMTAB, 0x00000002u, symtab_command)
+// LC_SYMSEG is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_SYMSEG, 0x00000003u, symseg_command)
 HANDLE_LOAD_COMMAND(LC_THREAD, 0x00000004u, thread_command)
 HANDLE_LOAD_COMMAND(LC_UNIXTHREAD, 0x00000005u, thread_command)
+// LC_LOADFVMLIB is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_LOADFVMLIB, 0x00000006u, fvmlib_command)
+// LC_IDFVMLIB is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_IDFVMLIB, 0x00000007u, fvmlib_command)
+// LC_IDENT is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_IDENT, 0x00000008u, ident_command)
+// LC_FVMFILE is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_FVMFILE, 0x00000009u, fvmfile_command)
+// LC_PREPAGE is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_PREPAGE, 0x0000000Au, load_command)
 HANDLE_LOAD_COMMAND(LC_DYSYMTAB, 0x0000000Bu, dysymtab_command)
 HANDLE_LOAD_COMMAND(LC_LOAD_DYLIB, 0x0000000Cu, dylib_command)
 HANDLE_LOAD_COMMAND(LC_ID_DYLIB, 0x0000000Du, dylib_command)
 HANDLE_LOAD_COMMAND(LC_LOAD_DYLINKER, 0x0000000Eu, dylinker_command)
 HANDLE_LOAD_COMMAND(LC_ID_DYLINKER, 0x0000000Fu, dylinker_command)
+// LC_PREBOUND_DYLIB is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_PREBOUND_DYLIB, 0x00000010u, prebound_dylib_command)
 HANDLE_LOAD_COMMAND(LC_ROUTINES, 0x00000011u, routines_command)
 HANDLE_LOAD_COMMAND(LC_SUB_FRAMEWORK, 0x00000012u, sub_framework_command)
 HANDLE_LOAD_COMMAND(LC_SUB_UMBRELLA, 0x00000013u, sub_umbrella_command)
 HANDLE_LOAD_COMMAND(LC_SUB_CLIENT, 0x00000014u, sub_client_command)
 HANDLE_LOAD_COMMAND(LC_SUB_LIBRARY, 0x00000015u, sub_library_command)
+// LC_TWOLEVEL_HINTS is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_TWOLEVEL_HINTS, 0x00000016u, twolevel_hints_command)
+// LC_PREBIND_CKSUM is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_PREBIND_CKSUM, 0x00000017u, prebind_cksum_command)
+// LC_LOAD_WEAK_DYLIB is obsolete and no longer supported.
 HANDLE_LOAD_COMMAND(LC_LOAD_WEAK_DYLIB, 0x80000018u, dylib_command)
 HANDLE_LOAD_COMMAND(LC_SEGMENT_64, 0x00000019u, segment_command_64)
 HANDLE_LOAD_COMMAND(LC_ROUTINES_64, 0x0000001Au, routines_command_64)
diff --git a/contrib/llvm/include/llvm/Support/MachO.h b/contrib/llvm/include/llvm/Support/MachO.h
index 9a03722d250f..2b23c0f86448 100644
--- a/contrib/llvm/include/llvm/Support/MachO.h
+++ b/contrib/llvm/include/llvm/Support/MachO.h
@@ -302,7 +302,7 @@ namespace llvm {
       N_EXT  = 0x01
     };
 
-    enum NListType {
+    enum NListType : uint8_t {
       // Constants for the "n_type & N_TYPE" llvm::MachO::nlist and
       // llvm::MachO::nlist_64
       N_UNDF = 0x0u,
@@ -578,6 +578,7 @@ namespace llvm {
       uint32_t header_addr;
     };
 
+    // The fvmlib_command is obsolete and no longer supported.
     struct fvmlib_command {
       uint32_t  cmd;
       uint32_t cmdsize;
@@ -621,6 +622,7 @@ namespace llvm {
       uint32_t sub_library;
     };
 
+    // The prebound_dylib_command is obsolete and no longer supported.
     struct prebound_dylib_command {
       uint32_t cmd;
       uint32_t cmdsize;
@@ -740,6 +742,7 @@ namespace llvm {
                flags:8;
     };
 
+    // The twolevel_hints_command is obsolete and no longer supported.
     struct twolevel_hints_command {
       uint32_t cmd;
       uint32_t cmdsize;
@@ -747,11 +750,13 @@ namespace llvm {
       uint32_t nhints;
     };
 
+    // The twolevel_hints_command is obsolete and no longer supported.
     struct twolevel_hint {
       uint32_t isub_image:8,
                itoc:24;
     };
 
+    // The prebind_cksum_command is obsolete and no longer supported.
     struct prebind_cksum_command {
       uint32_t cmd;
       uint32_t cmdsize;
@@ -835,6 +840,7 @@ namespace llvm {
       uint32_t count;
     };
 
+    // The symseg_command is obsolete and no longer supported.
     struct symseg_command {
       uint32_t cmd;
       uint32_t cmdsize;
@@ -842,11 +848,13 @@ namespace llvm {
       uint32_t size;
     };
 
+    // The ident_command is obsolete and no longer supported.
     struct ident_command {
       uint32_t cmd;
       uint32_t cmdsize;
     };
 
+    // The fvmfile_command is obsolete and no longer supported.
     struct fvmfile_command {
       uint32_t cmd;
       uint32_t cmdsize;
@@ -1268,12 +1276,14 @@ namespace llvm {
       sys::swapByteOrder(C);
     }
 
+    // The prebind_cksum_command is obsolete and no longer supported.
     inline void swapStruct(prebind_cksum_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
       sys::swapByteOrder(C.cksum);
     }
 
+    // The twolevel_hints_command is obsolete and no longer supported.
     inline void swapStruct(twolevel_hints_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
@@ -1281,6 +1291,7 @@ namespace llvm {
       sys::swapByteOrder(C.nhints);
     }
 
+    // The prebound_dylib_command is obsolete and no longer supported.
     inline void swapStruct(prebound_dylib_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
@@ -1289,6 +1300,7 @@ namespace llvm {
       sys::swapByteOrder(C.linked_modules);
     }
 
+    // The fvmfile_command is obsolete and no longer supported.
     inline void swapStruct(fvmfile_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
@@ -1296,6 +1308,7 @@ namespace llvm {
       sys::swapByteOrder(C.header_addr);
     }
 
+    // The symseg_command is obsolete and no longer supported.
     inline void swapStruct(symseg_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
@@ -1303,6 +1316,7 @@ namespace llvm {
       sys::swapByteOrder(C.size);
     }
 
+    // The ident_command is obsolete and no longer supported.
     inline void swapStruct(ident_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
@@ -1314,6 +1328,7 @@ namespace llvm {
       sys::swapByteOrder(C.header_addr);
     }
 
+    // The fvmlib_command is obsolete and no longer supported.
     inline void swapStruct(fvmlib_command &C) {
       sys::swapByteOrder(C.cmd);
       sys::swapByteOrder(C.cmdsize);
@@ -1710,6 +1725,204 @@ namespace llvm {
     const uint32_t x86_EXCEPTION_STATE_COUNT =
       sizeof(x86_exception_state_t) / sizeof(uint32_t);
 
+    struct arm_thread_state32_t {
+      uint32_t r[13];
+      uint32_t sp;
+      uint32_t lr;
+      uint32_t pc;
+      uint32_t cpsr;
+    };
+
+    inline void swapStruct(arm_thread_state32_t &x) {
+      for (int i = 0; i < 13; i++)
+        sys::swapByteOrder(x.r[i]);
+      sys::swapByteOrder(x.sp);
+      sys::swapByteOrder(x.lr);
+      sys::swapByteOrder(x.pc);
+      sys::swapByteOrder(x.cpsr);
+    }
+
+    struct arm_thread_state64_t {
+      uint64_t x[29];
+      uint64_t fp;
+      uint64_t lr;
+      uint64_t sp;
+      uint64_t pc;
+      uint32_t cpsr;
+      uint32_t pad;
+    };
+
+    inline void swapStruct(arm_thread_state64_t &x) {
+      for (int i = 0; i < 29; i++)
+        sys::swapByteOrder(x.x[i]);
+      sys::swapByteOrder(x.fp);
+      sys::swapByteOrder(x.lr);
+      sys::swapByteOrder(x.sp);
+      sys::swapByteOrder(x.pc);
+      sys::swapByteOrder(x.cpsr);
+    }
+
+    struct arm_state_hdr_t {
+      uint32_t flavor;
+      uint32_t count;
+    };
+
+    struct arm_thread_state_t {
+      arm_state_hdr_t tsh;
+      union {
+        arm_thread_state32_t ts32;
+      } uts;
+    };
+
+    inline void swapStruct(arm_state_hdr_t &x) {
+      sys::swapByteOrder(x.flavor);
+      sys::swapByteOrder(x.count);
+    }
+
+    enum ARMThreadFlavors {
+      ARM_THREAD_STATE      = 1,
+      ARM_VFP_STATE         = 2,
+      ARM_EXCEPTION_STATE   = 3,
+      ARM_DEBUG_STATE       = 4,
+      ARN_THREAD_STATE_NONE = 5,
+      ARM_THREAD_STATE64    = 6,
+      ARM_EXCEPTION_STATE64 = 7
+    };
+
+    inline void swapStruct(arm_thread_state_t &x) {
+      swapStruct(x.tsh);
+      if (x.tsh.flavor == ARM_THREAD_STATE)
+        swapStruct(x.uts.ts32);
+    }
+
+    const uint32_t ARM_THREAD_STATE_COUNT =
+      sizeof(arm_thread_state32_t) / sizeof(uint32_t);
+
+    const uint32_t ARM_THREAD_STATE64_COUNT =
+      sizeof(arm_thread_state64_t) / sizeof(uint32_t);
+
+    struct ppc_thread_state32_t {
+      uint32_t srr0;
+      uint32_t srr1;
+      uint32_t r0;
+      uint32_t r1;
+      uint32_t r2;
+      uint32_t r3;
+      uint32_t r4;
+      uint32_t r5;
+      uint32_t r6;
+      uint32_t r7;
+      uint32_t r8;
+      uint32_t r9;
+      uint32_t r10;
+      uint32_t r11;
+      uint32_t r12;
+      uint32_t r13;
+      uint32_t r14;
+      uint32_t r15;
+      uint32_t r16;
+      uint32_t r17;
+      uint32_t r18;
+      uint32_t r19;
+      uint32_t r20;
+      uint32_t r21;
+      uint32_t r22;
+      uint32_t r23;
+      uint32_t r24;
+      uint32_t r25;
+      uint32_t r26;
+      uint32_t r27;
+      uint32_t r28;
+      uint32_t r29;
+      uint32_t r30;
+      uint32_t r31;
+      uint32_t ct;
+      uint32_t xer;
+      uint32_t lr;
+      uint32_t ctr;
+      uint32_t mq;
+      uint32_t vrsave;
+    };
+
+    inline void swapStruct(ppc_thread_state32_t &x) {
+      sys::swapByteOrder(x.srr0);
+      sys::swapByteOrder(x.srr1);
+      sys::swapByteOrder(x.r0);
+      sys::swapByteOrder(x.r1);
+      sys::swapByteOrder(x.r2);
+      sys::swapByteOrder(x.r3);
+      sys::swapByteOrder(x.r4);
+      sys::swapByteOrder(x.r5);
+      sys::swapByteOrder(x.r6);
+      sys::swapByteOrder(x.r7);
+      sys::swapByteOrder(x.r8);
+      sys::swapByteOrder(x.r9);
+      sys::swapByteOrder(x.r10);
+      sys::swapByteOrder(x.r11);
+      sys::swapByteOrder(x.r12);
+      sys::swapByteOrder(x.r13);
+      sys::swapByteOrder(x.r14);
+      sys::swapByteOrder(x.r15);
+      sys::swapByteOrder(x.r16);
+      sys::swapByteOrder(x.r17);
+      sys::swapByteOrder(x.r18);
+      sys::swapByteOrder(x.r19);
+      sys::swapByteOrder(x.r20);
+      sys::swapByteOrder(x.r21);
+      sys::swapByteOrder(x.r22);
+      sys::swapByteOrder(x.r23);
+      sys::swapByteOrder(x.r24);
+      sys::swapByteOrder(x.r25);
+      sys::swapByteOrder(x.r26);
+      sys::swapByteOrder(x.r27);
+      sys::swapByteOrder(x.r28);
+      sys::swapByteOrder(x.r29);
+      sys::swapByteOrder(x.r30);
+      sys::swapByteOrder(x.r31);
+      sys::swapByteOrder(x.ct);
+      sys::swapByteOrder(x.xer);
+      sys::swapByteOrder(x.lr);
+      sys::swapByteOrder(x.ctr);
+      sys::swapByteOrder(x.mq);
+      sys::swapByteOrder(x.vrsave);
+    }
+
+    struct ppc_state_hdr_t {
+      uint32_t flavor;
+      uint32_t count;
+    };
+
+    struct ppc_thread_state_t {
+      ppc_state_hdr_t tsh;
+      union {
+        ppc_thread_state32_t ts32;
+      } uts;
+    };
+
+    inline void swapStruct(ppc_state_hdr_t &x) {
+      sys::swapByteOrder(x.flavor);
+      sys::swapByteOrder(x.count);
+    }
+
+    enum PPCThreadFlavors {
+      PPC_THREAD_STATE      = 1,
+      PPC_FLOAT_STATE       = 2,
+      PPC_EXCEPTION_STATE   = 3,
+      PPC_VECTOR_STATE      = 4,
+      PPC_THREAD_STATE64    = 5,
+      PPC_EXCEPTION_STATE64 = 6,
+      PPC_THREAD_STATE_NONE = 7
+    };
+
+    inline void swapStruct(ppc_thread_state_t &x) {
+      swapStruct(x.tsh);
+      if (x.tsh.flavor == PPC_THREAD_STATE)
+        swapStruct(x.uts.ts32);
+    }
+
+    const uint32_t PPC_THREAD_STATE_COUNT =
+      sizeof(ppc_thread_state32_t) / sizeof(uint32_t);
+
     // Define a union of all load command structs
     #define LOAD_COMMAND_STRUCT(LCStruct) LCStruct LCStruct##_data;
 
diff --git a/contrib/llvm/include/llvm/Support/ManagedStatic.h b/contrib/llvm/include/llvm/Support/ManagedStatic.h
index ec8154b828e5..7ce86eee95d2 100644
--- a/contrib/llvm/include/llvm/Support/ManagedStatic.h
+++ b/contrib/llvm/include/llvm/Support/ManagedStatic.h
@@ -46,6 +46,7 @@ protected:
   mutable const ManagedStaticBase *Next;
 
   void RegisterManagedStatic(void *(*creator)(), void (*deleter)(void*)) const;
+
 public:
   /// isConstructed - Return true if this object has not been created yet.
   bool isConstructed() const { return Ptr != nullptr; }
@@ -89,10 +90,10 @@ void llvm_shutdown();
 /// llvm_shutdown_obj - This is a simple helper class that calls
 /// llvm_shutdown() when it is destroyed.
 struct llvm_shutdown_obj {
-  llvm_shutdown_obj() { }
+  llvm_shutdown_obj() = default;
   ~llvm_shutdown_obj() { llvm_shutdown(); }
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_MANAGEDSTATIC_H
diff --git a/contrib/llvm/include/llvm/Support/MathExtras.h b/contrib/llvm/include/llvm/Support/MathExtras.h
index 5c816ac9df92..77970f487112 100644
--- a/contrib/llvm/include/llvm/Support/MathExtras.h
+++ b/contrib/llvm/include/llvm/Support/MathExtras.h
@@ -245,44 +245,40 @@ T reverseBits(T Val) {
 // ambiguity.
 
 /// Hi_32 - This function returns the high 32 bits of a 64 bit value.
-inline uint32_t Hi_32(uint64_t Value) {
+constexpr inline uint32_t Hi_32(uint64_t Value) {
   return static_cast<uint32_t>(Value >> 32);
 }
 
 /// Lo_32 - This function returns the low 32 bits of a 64 bit value.
-inline uint32_t Lo_32(uint64_t Value) {
+constexpr inline uint32_t Lo_32(uint64_t Value) {
   return static_cast<uint32_t>(Value);
 }
 
 /// Make_64 - This functions makes a 64-bit integer from a high / low pair of
 ///           32-bit integers.
-inline uint64_t Make_64(uint32_t High, uint32_t Low) {
+constexpr inline uint64_t Make_64(uint32_t High, uint32_t Low) {
   return ((uint64_t)High << 32) | (uint64_t)Low;
 }
 
 /// isInt - Checks if an integer fits into the given bit width.
-template<unsigned N>
-inline bool isInt(int64_t x) {
+template <unsigned N> constexpr inline bool isInt(int64_t x) {
   return N >= 64 || (-(INT64_C(1)<<(N-1)) <= x && x < (INT64_C(1)<<(N-1)));
 }
 // Template specializations to get better code for common cases.
-template<>
-inline bool isInt<8>(int64_t x) {
+template <> constexpr inline bool isInt<8>(int64_t x) {
   return static_cast<int8_t>(x) == x;
 }
-template<>
-inline bool isInt<16>(int64_t x) {
+template <> constexpr inline bool isInt<16>(int64_t x) {
   return static_cast<int16_t>(x) == x;
 }
-template<>
-inline bool isInt<32>(int64_t x) {
+template <> constexpr inline bool isInt<32>(int64_t x) {
   return static_cast<int32_t>(x) == x;
 }
 
 /// isShiftedInt<N,S> - Checks if a signed integer is an N bit number shifted
 ///                     left by S.
-template<unsigned N, unsigned S>
-inline bool isShiftedInt(int64_t x) {
+template <unsigned N, unsigned S>
+constexpr inline bool isShiftedInt(int64_t x) {
   static_assert(
       N > 0, "isShiftedInt<0> doesn't make sense (refers to a 0-bit number.");
   static_assert(N + S <= 64, "isShiftedInt<N, S> with N + S > 64 is too wide.");
@@ -290,29 +286,39 @@ inline bool isShiftedInt(int64_t x) {
 }
 
 /// isUInt - Checks if an unsigned integer fits into the given bit width.
-template<unsigned N>
-inline bool isUInt(uint64_t x) {
-  static_assert(N > 0, "isUInt<0> doesn't make sense.");
-  return N >= 64 || x < (UINT64_C(1)<<(N));
+///
+/// This is written as two functions rather than as simply
+///
+///   return N >= 64 || X < (UINT64_C(1) << N);
+///
+/// to keep MSVC from (incorrectly) warning on isUInt<64> that we're shifting
+/// left too many places.
+template <unsigned N>
+constexpr inline typename std::enable_if<(N < 64), bool>::type
+isUInt(uint64_t X) {
+  static_assert(N > 0, "isUInt<0> doesn't make sense");
+  return X < (UINT64_C(1) << (N));
+}
+template <unsigned N>
+constexpr inline typename std::enable_if<N >= 64, bool>::type
+isUInt(uint64_t X) {
+  return true;
 }
 
 // Template specializations to get better code for common cases.
-template<>
-inline bool isUInt<8>(uint64_t x) {
+template <> constexpr inline bool isUInt<8>(uint64_t x) {
   return static_cast<uint8_t>(x) == x;
 }
-template<>
-inline bool isUInt<16>(uint64_t x) {
+template <> constexpr inline bool isUInt<16>(uint64_t x) {
   return static_cast<uint16_t>(x) == x;
 }
-template<>
-inline bool isUInt<32>(uint64_t x) {
+template <> constexpr inline bool isUInt<32>(uint64_t x) {
   return static_cast<uint32_t>(x) == x;
 }
 
 /// Checks if a unsigned integer is an N bit number shifted left by S.
-template<unsigned N, unsigned S>
-inline bool isShiftedUInt(uint64_t x) {
+template <unsigned N, unsigned S>
+constexpr inline bool isShiftedUInt(uint64_t x) {
   static_assert(
       N > 0, "isShiftedUInt<0> doesn't make sense (refers to a 0-bit number)");
   static_assert(N + S <= 64,
@@ -364,39 +370,39 @@ inline bool isIntN(unsigned N, int64_t x) {
 /// isMask_32 - This function returns true if the argument is a non-empty
 /// sequence of ones starting at the least significant bit with the remainder
 /// zero (32 bit version).  Ex. isMask_32(0x0000FFFFU) == true.
-inline bool isMask_32(uint32_t Value) {
+constexpr inline bool isMask_32(uint32_t Value) {
   return Value && ((Value + 1) & Value) == 0;
 }
 
 /// isMask_64 - This function returns true if the argument is a non-empty
 /// sequence of ones starting at the least significant bit with the remainder
 /// zero (64 bit version).
-inline bool isMask_64(uint64_t Value) {
+constexpr inline bool isMask_64(uint64_t Value) {
   return Value && ((Value + 1) & Value) == 0;
 }
 
 /// isShiftedMask_32 - This function returns true if the argument contains a
 /// non-empty sequence of ones with the remainder zero (32 bit version.)
 /// Ex. isShiftedMask_32(0x0000FF00U) == true.
-inline bool isShiftedMask_32(uint32_t Value) {
+constexpr inline bool isShiftedMask_32(uint32_t Value) {
   return Value && isMask_32((Value - 1) | Value);
 }
 
 /// isShiftedMask_64 - This function returns true if the argument contains a
 /// non-empty sequence of ones with the remainder zero (64 bit version.)
-inline bool isShiftedMask_64(uint64_t Value) {
+constexpr inline bool isShiftedMask_64(uint64_t Value) {
   return Value && isMask_64((Value - 1) | Value);
 }
 
 /// isPowerOf2_32 - This function returns true if the argument is a power of
 /// two > 0. Ex. isPowerOf2_32(0x00100000U) == true (32 bit edition.)
-inline bool isPowerOf2_32(uint32_t Value) {
+constexpr inline bool isPowerOf2_32(uint32_t Value) {
   return Value && !(Value & (Value - 1));
 }
 
 /// isPowerOf2_64 - This function returns true if the argument is a power of two
 /// > 0 (64 bit edition.)
-inline bool isPowerOf2_64(uint64_t Value) {
+constexpr inline bool isPowerOf2_64(uint64_t Value) {
   return Value && !(Value & (Value - int64_t(1L)));
 }
 
@@ -541,23 +547,19 @@ inline uint64_t GreatestCommonDivisor64(uint64_t A, uint64_t B) {
 /// BitsToDouble - This function takes a 64-bit integer and returns the bit
 /// equivalent double.
 inline double BitsToDouble(uint64_t Bits) {
-  union {
-    uint64_t L;
-    double D;
-  } T;
-  T.L = Bits;
-  return T.D;
+  double D;
+  static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
+  memcpy(&D, &Bits, sizeof(Bits));
+  return D;
 }
 
 /// BitsToFloat - This function takes a 32-bit integer and returns the bit
 /// equivalent float.
 inline float BitsToFloat(uint32_t Bits) {
-  union {
-    uint32_t I;
-    float F;
-  } T;
-  T.I = Bits;
-  return T.F;
+  float F;
+  static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
+  memcpy(&F, &Bits, sizeof(Bits));
+  return F;
 }
 
 /// DoubleToBits - This function takes a double and returns the bit
@@ -565,12 +567,10 @@ inline float BitsToFloat(uint32_t Bits) {
 /// changes the bits of NaNs on some hosts, notably x86, so this
 /// routine cannot be used if these bits are needed.
 inline uint64_t DoubleToBits(double Double) {
-  union {
-    uint64_t L;
-    double D;
-  } T;
-  T.D = Double;
-  return T.L;
+  uint64_t Bits;
+  static_assert(sizeof(uint64_t) == sizeof(double), "Unexpected type sizes");
+  memcpy(&Bits, &Double, sizeof(Double));
+  return Bits;
 }
 
 /// FloatToBits - This function takes a float and returns the bit
@@ -578,17 +578,15 @@ inline uint64_t DoubleToBits(double Double) {
 /// changes the bits of NaNs on some hosts, notably x86, so this
 /// routine cannot be used if these bits are needed.
 inline uint32_t FloatToBits(float Float) {
-  union {
-    uint32_t I;
-    float F;
-  } T;
-  T.F = Float;
-  return T.I;
+  uint32_t Bits;
+  static_assert(sizeof(uint32_t) == sizeof(float), "Unexpected type sizes");
+  memcpy(&Bits, &Float, sizeof(Float));
+  return Bits;
 }
 
 /// MinAlign - A and B are either alignments or offsets.  Return the minimum
 /// alignment that may be assumed after adding the two together.
-inline uint64_t MinAlign(uint64_t A, uint64_t B) {
+constexpr inline uint64_t MinAlign(uint64_t A, uint64_t B) {
   // The largest power of 2 that divides both A and B.
   //
   // Replace "-Value" by "1+~Value" in the following commented code to avoid
@@ -635,6 +633,14 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
   return 1ull << (63 - countLeadingZeros(A, ZB_Undefined));
 }
 
+/// Returns the power of two which is greater than or equal to the given value.
+/// Essentially, it is a ceil operation across the domain of powers of two.
+inline uint64_t PowerOf2Ceil(uint64_t A) {
+  if (!A)
+    return 0;
+  return NextPowerOf2(A - 1);
+}
+
 /// Returns the next integer (mod 2**64) that is greater than or equal to
 /// \p Value and is a multiple of \p Align. \p Align must be non-zero.
 ///
@@ -656,13 +662,35 @@ inline uint64_t PowerOf2Floor(uint64_t A) {
 ///   alignTo(321, 255, 42) = 552
 /// \endcode
 inline uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
+  assert(Align != 0u && "Align can't be 0.");
   Skew %= Align;
   return (Value + Align - 1 - Skew) / Align * Align + Skew;
 }
 
+/// Returns the next integer (mod 2**64) that is greater than or equal to
+/// \p Value and is a multiple of \c Align. \c Align must be non-zero.
+template <uint64_t Align> constexpr inline uint64_t alignTo(uint64_t Value) {
+  static_assert(Align != 0u, "Align must be non-zero");
+  return (Value + Align - 1) / Align * Align;
+}
+
+/// \c alignTo for contexts where a constant expression is required.
+/// \sa alignTo
+///
+/// \todo FIXME: remove when \c constexpr becomes really \c constexpr
+template <uint64_t Align>
+struct AlignTo {
+  static_assert(Align != 0u, "Align must be non-zero");
+  template <uint64_t Value>
+  struct from_value {
+    static const uint64_t value = (Value + Align - 1) / Align * Align;
+  };
+};
+
 /// Returns the largest uint64_t less than or equal to \p Value and is
 /// \p Skew mod \p Align. \p Align must be non-zero
 inline uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew = 0) {
+  assert(Align != 0u && "Align can't be 0.");
   Skew %= Align;
   return (Value - Skew) / Align * Align + Skew;
 }
@@ -676,7 +704,7 @@ inline uint64_t OffsetToAlignment(uint64_t Value, uint64_t Align) {
 
 /// Sign-extend the number in the bottom B bits of X to a 32-bit integer.
 /// Requires 0 < B <= 32.
-template <unsigned B> inline int32_t SignExtend32(uint32_t X) {
+template <unsigned B> constexpr inline int32_t SignExtend32(uint32_t X) {
   static_assert(B > 0, "Bit width can't be 0.");
   static_assert(B <= 32, "Bit width out of range.");
   return int32_t(X << (32 - B)) >> (32 - B);
@@ -692,7 +720,7 @@ inline int32_t SignExtend32(uint32_t X, unsigned B) {
 
 /// Sign-extend the number in the bottom B bits of X to a 64-bit integer.
 /// Requires 0 < B < 64.
-template <unsigned B> inline int64_t SignExtend64(uint64_t x) {
+template <unsigned B> constexpr inline int64_t SignExtend64(uint64_t x) {
   static_assert(B > 0, "Bit width can't be 0.");
   static_assert(B <= 64, "Bit width out of range.");
   return int64_t(x << (64 - B)) >> (64 - B);
diff --git a/contrib/llvm/include/llvm/Support/MemoryBuffer.h b/contrib/llvm/include/llvm/Support/MemoryBuffer.h
index 73d643537a6f..f739d19907b0 100644
--- a/contrib/llvm/include/llvm/Support/MemoryBuffer.h
+++ b/contrib/llvm/include/llvm/Support/MemoryBuffer.h
@@ -14,13 +14,17 @@
 #ifndef LLVM_SUPPORT_MEMORYBUFFER_H
 #define LLVM_SUPPORT_MEMORYBUFFER_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/CBindingWrapping.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorOr.h"
+#include "llvm-c/Types.h"
 #include <memory>
+#include <cstddef>
+#include <cstdint>
 
 namespace llvm {
+
 class MemoryBufferRef;
 
 /// This interface provides simple read-only access to a block of memory, and
@@ -37,13 +41,15 @@ class MemoryBuffer {
   const char *BufferStart; // Start of the buffer.
   const char *BufferEnd;   // End of the buffer.
 
-  MemoryBuffer(const MemoryBuffer &) = delete;
-  MemoryBuffer &operator=(const MemoryBuffer &) = delete;
+
 protected:
-  MemoryBuffer() {}
+  MemoryBuffer() = default;
+
   void init(const char *BufStart, const char *BufEnd,
             bool RequiresNullTerminator);
 public:
+  MemoryBuffer(const MemoryBuffer &) = delete;
+  MemoryBuffer &operator=(const MemoryBuffer &) = delete;
   virtual ~MemoryBuffer();
 
   const char *getBufferStart() const { return BufferStart; }
@@ -56,9 +62,7 @@ public:
 
   /// Return an identifier for this buffer, typically the filename it was read
   /// from.
-  virtual const char *getBufferIdentifier() const {
-    return "Unknown buffer";
-  }
+  virtual StringRef getBufferIdentifier() const { return "Unknown buffer"; }
 
   /// Open the specified file as a MemoryBuffer, returning a new MemoryBuffer
   /// if successful, otherwise returning null. If FileSize is specified, this
@@ -72,6 +76,12 @@ public:
   getFile(const Twine &Filename, int64_t FileSize = -1,
           bool RequiresNullTerminator = true, bool IsVolatileSize = false);
 
+  /// Read all of the specified file into a MemoryBuffer as a stream
+  /// (i.e. until EOF reached). This is useful for special files that
+  /// look like a regular file but have 0 size (e.g. /proc/cpuinfo on Linux).
+  static ErrorOr<std::unique_ptr<MemoryBuffer>>
+  getFileAsStream(const Twine &Filename);
+
   /// Given an already-open file descriptor, map some slice of it into a
   /// MemoryBuffer. The slice is specified by an \p Offset and \p MapSize.
   /// Since this is in the middle of a file, the buffer is not null terminated.
@@ -150,7 +160,7 @@ class MemoryBufferRef {
   StringRef Identifier;
 
 public:
-  MemoryBufferRef() {}
+  MemoryBufferRef() = default;
   MemoryBufferRef(MemoryBuffer& Buffer)
       : Buffer(Buffer.getBuffer()), Identifier(Buffer.getBufferIdentifier()) {}
   MemoryBufferRef(StringRef Buffer, StringRef Identifier)
@@ -170,4 +180,4 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(MemoryBuffer, LLVMMemoryBufferRef)
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_MEMORYBUFFER_H
diff --git a/contrib/llvm/include/llvm/Support/MemoryObject.h b/contrib/llvm/include/llvm/Support/MemoryObject.h
deleted file mode 100644
index e0c8749da346..000000000000
--- a/contrib/llvm/include/llvm/Support/MemoryObject.h
+++ /dev/null
@@ -1,68 +0,0 @@
-//===- MemoryObject.h - Abstract memory interface ---------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_MEMORYOBJECT_H
-#define LLVM_SUPPORT_MEMORYOBJECT_H
-
-#include "llvm/Support/DataTypes.h"
-
-namespace llvm {
-
-/// Interface to data which might be streamed. Streamability has 2 important
-/// implications/restrictions. First, the data might not yet exist in memory
-/// when the request is made. This just means that readByte/readBytes might have
-/// to block or do some work to get it. More significantly, the exact size of
-/// the object might not be known until it has all been fetched. This means that
-/// to return the right result, getExtent must also wait for all the data to
-/// arrive; therefore it should not be called on objects which are actually
-/// streamed (this would defeat the purpose of streaming). Instead,
-/// isValidAddress can be used to test addresses without knowing the exact size
-/// of the stream. Finally, getPointer can be used instead of readBytes to avoid
-/// extra copying.
-class MemoryObject {
-public:
-  virtual ~MemoryObject();
-
-  /// Returns the size of the region in bytes.  (The region is contiguous, so
-  /// the highest valid address of the region is getExtent() - 1).
-  ///
-  /// @result         - The size of the region.
-  virtual uint64_t getExtent() const = 0;
-
-  /// Tries to read a contiguous range of bytes from the region, up to the end
-  /// of the region.
-  ///
-  /// @param Buf      - A pointer to a buffer to be filled in.  Must be non-NULL
-  ///                   and large enough to hold size bytes.
-  /// @param Size     - The number of bytes to copy.
-  /// @param Address  - The address of the first byte, in the same space as
-  ///                   getBase().
-  /// @result         - The number of bytes read.
-  virtual uint64_t readBytes(uint8_t *Buf, uint64_t Size,
-                             uint64_t Address) const = 0;
-
-  /// Ensures that the requested data is in memory, and returns a pointer to it.
-  /// More efficient than using readBytes if the data is already in memory. May
-  /// block until (address - base + size) bytes have been read
-  /// @param address - address of the byte, in the same space as getBase()
-  /// @param size    - amount of data that must be available on return
-  /// @result        - valid pointer to the requested data
-  virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const = 0;
-
-  /// Returns true if the address is within the object (i.e. between base and
-  /// base + extent - 1 inclusive). May block until (address - base) bytes have
-  /// been read
-  /// @param address - address of the byte, in the same space as getBase()
-  /// @result        - true if the address may be read with readByte()
-  virtual bool isValidAddress(uint64_t address) const = 0;
-};
-
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/Support/NativeFormatting.h b/contrib/llvm/include/llvm/Support/NativeFormatting.h
new file mode 100644
index 000000000000..6d1dd7b422fe
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/NativeFormatting.h
@@ -0,0 +1,49 @@
+//===- NativeFormatting.h - Low level formatting helpers ---------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_NATIVE_FORMATTING_H
+#define LLVM_SUPPORT_NATIVE_FORMATTING_H
+
+#include "llvm/ADT/Optional.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <cstdint>
+
+namespace llvm {
+enum class FloatStyle { Exponent, ExponentUpper, Fixed, Percent };
+enum class IntegerStyle {
+  Integer,
+  Number,
+};
+enum class HexPrintStyle { Upper, Lower, PrefixUpper, PrefixLower };
+
+size_t getDefaultPrecision(FloatStyle Style);
+
+bool isPrefixedHexStyle(HexPrintStyle S);
+
+void write_integer(raw_ostream &S, unsigned int N, size_t MinDigits,
+                   IntegerStyle Style);
+void write_integer(raw_ostream &S, int N, size_t MinDigits, IntegerStyle Style);
+void write_integer(raw_ostream &S, unsigned long N, size_t MinDigits,
+                   IntegerStyle Style);
+void write_integer(raw_ostream &S, long N, size_t MinDigits,
+                   IntegerStyle Style);
+void write_integer(raw_ostream &S, unsigned long long N, size_t MinDigits,
+                   IntegerStyle Style);
+void write_integer(raw_ostream &S, long long N, size_t MinDigits,
+                   IntegerStyle Style);
+
+void write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style,
+               Optional<size_t> Width = None);
+void write_double(raw_ostream &S, double D, FloatStyle Style,
+                  Optional<size_t> Precision = None);
+}
+
+#endif
+
diff --git a/contrib/llvm/include/llvm/Support/OnDiskHashTable.h b/contrib/llvm/include/llvm/Support/OnDiskHashTable.h
index c28fcabe78fc..e9c28daf03b9 100644
--- a/contrib/llvm/include/llvm/Support/OnDiskHashTable.h
+++ b/contrib/llvm/include/llvm/Support/OnDiskHashTable.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_SUPPORT_ONDISKHASHTABLE_H
 #define LLVM_SUPPORT_ONDISKHASHTABLE_H
 
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/DataTypes.h"
 #include "llvm/Support/EndianStream.h"
@@ -208,7 +207,7 @@ public:
 
     // Pad with zeros so that we can start the hashtable at an aligned address.
     offset_type TableOff = Out.tell();
-    uint64_t N = llvm::OffsetToAlignment(TableOff, alignOf<offset_type>());
+    uint64_t N = llvm::OffsetToAlignment(TableOff, alignof(offset_type));
     TableOff += N;
     while (N--)
       LE.write<uint8_t>(0);
diff --git a/contrib/llvm/include/llvm/Support/Options.h b/contrib/llvm/include/llvm/Support/Options.h
index 7b61b2308f57..9019804d24e0 100644
--- a/contrib/llvm/include/llvm/Support/Options.h
+++ b/contrib/llvm/include/llvm/Support/Options.h
@@ -93,7 +93,7 @@ public:
   /// option stores (\p ValT), the class that will read the option (\p Base),
   /// and the member that the class will store the data into (\p Mem).
   template <typename ValT, typename Base, ValT(Base::*Mem)>
-  static void registerOption(const char *ArgStr, const char *Desc,
+  static void registerOption(StringRef ArgStr, StringRef Desc,
                              const ValT &InitValue) {
     cl::opt<ValT> *Option = new cl::opt<ValT>(ArgStr, cl::desc(Desc),
                                               cl::Hidden, cl::init(InitValue));
diff --git a/contrib/llvm/include/llvm/Support/Path.h b/contrib/llvm/include/llvm/Support/Path.h
index 853f0997571c..0513350d446b 100644
--- a/contrib/llvm/include/llvm/Support/Path.h
+++ b/contrib/llvm/include/llvm/Support/Path.h
@@ -445,7 +445,8 @@ StringRef remove_leading_dotslash(StringRef path);
 /// @brief In-place remove any './' and optionally '../' components from a path.
 ///
 /// @param path processed path
-/// @param remove_dot_dot specify if '../' should be removed
+/// @param remove_dot_dot specify if '../' (except for leading "../") should be
+/// removed
 /// @result True if path was changed
 bool remove_dots(SmallVectorImpl<char> &path, bool remove_dot_dot = false);
 
diff --git a/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h b/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h
index 96cdaed142c2..9ff894edbeb0 100644
--- a/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h
+++ b/contrib/llvm/include/llvm/Support/PointerLikeTypeTraits.h
@@ -15,8 +15,8 @@
 #ifndef LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 #define LLVM_SUPPORT_POINTERLIKETYPETRAITS_H
 
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/DataTypes.h"
+#include <type_traits>
 
 namespace llvm {
 
@@ -42,9 +42,7 @@ public:
   static inline void *getAsVoidPointer(T *P) { return P; }
   static inline T *getFromVoidPointer(void *P) { return static_cast<T *>(P); }
 
-  enum {
-    NumLowBitsAvailable = detail::ConstantLog2<AlignOf<T>::Alignment>::value
-  };
+  enum { NumLowBitsAvailable = detail::ConstantLog2<alignof(T)>::value };
 };
 
 template <> class PointerLikeTypeTraits<void *> {
diff --git a/contrib/llvm/include/llvm/Support/PrettyStackTrace.h b/contrib/llvm/include/llvm/Support/PrettyStackTrace.h
index 62e3bbc0ddbc..4d64fe4ef727 100644
--- a/contrib/llvm/include/llvm/Support/PrettyStackTrace.h
+++ b/contrib/llvm/include/llvm/Support/PrettyStackTrace.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_SUPPORT_PRETTYSTACKTRACE_H
 #define LLVM_SUPPORT_PRETTYSTACKTRACE_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/Compiler.h"
 
 namespace llvm {
@@ -55,6 +56,16 @@ namespace llvm {
     void print(raw_ostream &OS) const override;
   };
 
+  /// PrettyStackTraceFormat - This object prints a string (which may use
+  /// printf-style formatting but should not contain newlines) to the stream
+  /// as the stack trace when a crash occurs.
+  class PrettyStackTraceFormat : public PrettyStackTraceEntry {
+    llvm::SmallVector<char, 32> Str;
+  public:
+    PrettyStackTraceFormat(const char *Format, ...);
+    void print(raw_ostream &OS) const override;
+  };
+
   /// PrettyStackTraceProgram - This object prints a specified program arguments
   /// to the stream as the stack trace when a crash occurs.
   class PrettyStackTraceProgram : public PrettyStackTraceEntry {
diff --git a/contrib/llvm/include/llvm/Support/Printable.h b/contrib/llvm/include/llvm/Support/Printable.h
index 83b8f0998ae6..28e875e8ff5e 100644
--- a/contrib/llvm/include/llvm/Support/Printable.h
+++ b/contrib/llvm/include/llvm/Support/Printable.h
@@ -21,7 +21,7 @@ namespace llvm {
 class raw_ostream;
 
 /// Simple wrapper around std::function<void(raw_ostream&)>.
-/// This class is usefull to construct print helpers for raw_ostream.
+/// This class is useful to construct print helpers for raw_ostream.
 ///
 /// Example:
 ///     Printable PrintRegister(unsigned Register) {
diff --git a/contrib/llvm/include/llvm/Support/Process.h b/contrib/llvm/include/llvm/Support/Process.h
index 06fd0af10aa4..780c7e2ddd6f 100644
--- a/contrib/llvm/include/llvm/Support/Process.h
+++ b/contrib/llvm/include/llvm/Support/Process.h
@@ -28,8 +28,8 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/DataTypes.h"
-#include "llvm/Support/TimeValue.h"
 #include <system_error>
 
 namespace llvm {
@@ -55,13 +55,14 @@ public:
   /// This static function will set \p user_time to the amount of CPU time
   /// spent in user (non-kernel) mode and \p sys_time to the amount of CPU
   /// time spent in system (kernel) mode.  If the operating system does not
-  /// support collection of these metrics, a zero TimeValue will be for both
+  /// support collection of these metrics, a zero duration will be for both
   /// values.
-  /// \param elapsed Returns the TimeValue::now() giving current time
+  /// \param elapsed Returns the system_clock::now() giving current time
   /// \param user_time Returns the current amount of user time for the process
   /// \param sys_time Returns the current amount of system time for the process
-  static void GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
-                           TimeValue &sys_time);
+  static void GetTimeUsage(TimePoint<> &elapsed,
+                           std::chrono::nanoseconds &user_time,
+                           std::chrono::nanoseconds &sys_time);
 
   /// This function makes the necessary calls to the operating system to
   /// prevent core files or any other kind of large memory dumps that can
diff --git a/contrib/llvm/include/llvm/Support/RWMutex.h b/contrib/llvm/include/llvm/Support/RWMutex.h
index 4be931337765..e4736b8e24eb 100644
--- a/contrib/llvm/include/llvm/Support/RWMutex.h
+++ b/contrib/llvm/include/llvm/Support/RWMutex.h
@@ -18,10 +18,9 @@
 #include "llvm/Support/Threading.h"
 #include <cassert>
 
-namespace llvm
-{
-  namespace sys
-  {
+namespace llvm {
+namespace sys {
+
     /// @brief Platform agnostic RWMutex class.
     class RWMutexImpl
     {
@@ -89,9 +88,11 @@ namespace llvm
     template<bool mt_only>
     class SmartRWMutex {
       RWMutexImpl impl;
-      unsigned readers, writers;
+      unsigned readers = 0;
+      unsigned writers = 0;
+
     public:
-      explicit SmartRWMutex() : impl(), readers(0), writers(0) { }
+      explicit SmartRWMutex() = default;
 
       bool lock_shared() {
         if (!mt_only || llvm_is_multithreaded())
@@ -140,6 +141,7 @@ namespace llvm
       SmartRWMutex(const SmartRWMutex<mt_only> & original);
       void operator=(const SmartRWMutex<mt_only> &);
     };
+
     typedef SmartRWMutex<false> RWMutex;
 
     /// ScopedReader - RAII acquisition of a reader lock
@@ -155,6 +157,7 @@ namespace llvm
         mutex.unlock_shared();
       }
     };
+
     typedef SmartScopedReader<false> ScopedReader;
 
     /// ScopedWriter - RAII acquisition of a writer lock
@@ -170,8 +173,10 @@ namespace llvm
         mutex.unlock();
       }
     };
+
     typedef SmartScopedWriter<false> ScopedWriter;
-  }
-}
 
-#endif
+} // end namespace sys
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_RWMUTEX_H
diff --git a/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h b/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h
index f146e350fe62..1399dab815f8 100644
--- a/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h
+++ b/contrib/llvm/include/llvm/Support/RandomNumberGenerator.h
@@ -19,6 +19,7 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h" // Needed for uint64_t on Windows.
 #include <random>
+#include <system_error>
 
 namespace llvm {
 class StringRef;
@@ -30,9 +31,21 @@ class StringRef;
 /// Module::createRNG to create a new RNG instance for use with that
 /// module.
 class RandomNumberGenerator {
+
+  // 64-bit Mersenne Twister by Matsumoto and Nishimura, 2000
+  // http://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine
+  // This RNG is deterministically portable across C++11
+  // implementations.
+  using generator_type = std::mt19937_64;
+
 public:
+  using result_type = generator_type::result_type;
+
   /// Returns a random number in the range [0, Max).
-  uint_fast64_t operator()();
+  result_type operator()();
+
+  static constexpr result_type min() { return generator_type::min(); }
+  static constexpr result_type max() { return generator_type::max(); }
 
 private:
   /// Seeds and salts the underlying RNG engine.
@@ -41,11 +54,7 @@ private:
   /// Module::createRNG to create a new RNG salted with the Module ID.
   RandomNumberGenerator(StringRef Salt);
 
-  // 64-bit Mersenne Twister by Matsumoto and Nishimura, 2000
-  // http://en.cppreference.com/w/cpp/numeric/random/mersenne_twister_engine
-  // This RNG is deterministically portable across C++11
-  // implementations.
-  std::mt19937_64 Generator;
+  generator_type Generator;
 
   // Noncopyable.
   RandomNumberGenerator(const RandomNumberGenerator &other) = delete;
@@ -53,6 +62,9 @@ private:
 
   friend class Module;
 };
+
+// Get random vector of specified size
+std::error_code getRandomBytes(void *Buffer, size_t Size);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/Recycler.h b/contrib/llvm/include/llvm/Support/Recycler.h
index a38050d81903..1523aad38d46 100644
--- a/contrib/llvm/include/llvm/Support/Recycler.h
+++ b/contrib/llvm/include/llvm/Support/Recycler.h
@@ -16,7 +16,6 @@
 #define LLVM_SUPPORT_RECYCLER_H
 
 #include "llvm/ADT/ilist.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cassert>
@@ -32,7 +31,7 @@ void PrintRecyclerStats(size_t Size, size_t Align, size_t FreeListSize);
 /// and facilitates reusing deallocated memory in place of allocating
 /// new memory.
 ///
-template<class T, size_t Size = sizeof(T), size_t Align = AlignOf<T>::Alignment>
+template <class T, size_t Size = sizeof(T), size_t Align = alignof(T)>
 class Recycler {
   struct FreeNode {
     FreeNode *Next;
@@ -80,7 +79,7 @@ public:
 
   template<class SubClass, class AllocatorType>
   SubClass *Allocate(AllocatorType &Allocator) {
-    static_assert(AlignOf<SubClass>::Alignment <= Align,
+    static_assert(alignof(SubClass) <= Align,
                   "Recycler allocation alignment is less than object align!");
     static_assert(sizeof(SubClass) <= Size,
                   "Recycler allocation size is less than object size!");
diff --git a/contrib/llvm/include/llvm/Support/RecyclingAllocator.h b/contrib/llvm/include/llvm/Support/RecyclingAllocator.h
index 001d1cf7c3df..32b033b17946 100644
--- a/contrib/llvm/include/llvm/Support/RecyclingAllocator.h
+++ b/contrib/llvm/include/llvm/Support/RecyclingAllocator.h
@@ -22,8 +22,8 @@ namespace llvm {
 /// RecyclingAllocator - This class wraps an Allocator, adding the
 /// functionality of recycling deleted objects.
 ///
-template<class AllocatorType, class T,
-         size_t Size = sizeof(T), size_t Align = AlignOf<T>::Alignment>
+template <class AllocatorType, class T, size_t Size = sizeof(T),
+          size_t Align = alignof(T)>
 class RecyclingAllocator {
 private:
   /// Base - Implementation details.
diff --git a/contrib/llvm/include/llvm/Support/Regex.h b/contrib/llvm/include/llvm/Support/Regex.h
index 31b35ed0cad6..83db80359ee2 100644
--- a/contrib/llvm/include/llvm/Support/Regex.h
+++ b/contrib/llvm/include/llvm/Support/Regex.h
@@ -43,6 +43,7 @@ namespace llvm {
       BasicRegex=4
     };
 
+    Regex();
     /// Compiles the given regular expression \p Regex.
     Regex(StringRef Regex, unsigned Flags = NoFlags);
     Regex(const Regex &) = delete;
@@ -51,11 +52,7 @@ namespace llvm {
       std::swap(error, regex.error);
       return *this;
     }
-    Regex(Regex &&regex) {
-      preg = regex.preg;
-      error = regex.error;
-      regex.preg = nullptr;
-    }
+    Regex(Regex &&regex);
     ~Regex();
 
     /// isValid - returns the error encountered during regex compilation, or
diff --git a/contrib/llvm/include/llvm/Support/Registry.h b/contrib/llvm/include/llvm/Support/Registry.h
index 27f025fcd080..02fd5b9354a1 100644
--- a/contrib/llvm/include/llvm/Support/Registry.h
+++ b/contrib/llvm/include/llvm/Support/Registry.h
@@ -15,6 +15,7 @@
 #define LLVM_SUPPORT_REGISTRY_H
 
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DynamicLibrary.h"
@@ -25,16 +26,15 @@ namespace llvm {
   /// no-argument constructor.
   template <typename T>
   class SimpleRegistryEntry {
-    const char *Name, *Desc;
+    StringRef Name, Desc;
     std::unique_ptr<T> (*Ctor)();
 
   public:
-    SimpleRegistryEntry(const char *N, const char *D, std::unique_ptr<T> (*C)())
-      : Name(N), Desc(D), Ctor(C)
-    {}
+    SimpleRegistryEntry(StringRef N, StringRef D, std::unique_ptr<T> (*C)())
+        : Name(N), Desc(D), Ctor(C) {}
 
-    const char *getName() const { return Name; }
-    const char *getDesc() const { return Desc; }
+    StringRef getName() const { return Name; }
+    StringRef getDesc() const { return Desc; }
     std::unique_ptr<T> instantiate() const { return Ctor(); }
   };
 
@@ -44,6 +44,7 @@ namespace llvm {
   template <typename T>
   class Registry {
   public:
+    typedef T type;
     typedef SimpleRegistryEntry<T> entry;
 
     class node;
@@ -69,13 +70,14 @@ namespace llvm {
       node(const entry &V) : Next(nullptr), Val(V) {}
     };
 
-    static void add_node(node *N) {
-      if (Tail)
-        Tail->Next = N;
-      else
-        Head = N;
-      Tail = N;
-    }
+    /// Add a node to the Registry: this is the interface between the plugin and
+    /// the executable.
+    ///
+    /// This function is exported by the executable and called by the plugin to
+    /// add a node to the executable's registry. Therefore it's not defined here
+    /// to avoid it being instantiated in the plugin and is instead defined in
+    /// the executable (see LLVM_INSTANTIATE_REGISTRY below).
+    static void add_node(node *N);
 
     /// Iterators for registry entries.
     ///
@@ -92,7 +94,9 @@ namespace llvm {
       const entry *operator->() const { return &Cur->Val; }
     };
 
-    static iterator begin() { return iterator(Head); }
+    // begin is not defined here in order to avoid usage of an undefined static
+    // data member, instead it's instantiated by LLVM_INSTANTIATE_REGISTRY.
+    static iterator begin();
     static iterator end()   { return iterator(nullptr); }
 
     static iterator_range<iterator> entries() {
@@ -115,66 +119,42 @@ namespace llvm {
       static std::unique_ptr<T> CtorFn() { return make_unique<V>(); }
 
     public:
-      Add(const char *Name, const char *Desc)
+      Add(StringRef Name, StringRef Desc)
           : Entry(Name, Desc, CtorFn), Node(Entry) {
         add_node(&Node);
       }
     };
-
-    /// A dynamic import facility.  This is used on Windows to
-    /// import the entries added in the plugin.
-    static void import(sys::DynamicLibrary &DL, const char *RegistryName) {
-      typedef void *(*GetRegistry)();
-      std::string Name("LLVMGetRegistry_");
-      Name.append(RegistryName);
-      GetRegistry Getter =
-          (GetRegistry)(intptr_t)DL.getAddressOfSymbol(Name.c_str());
-      if (Getter) {
-        // Call the getter function in order to get the full copy of the
-        // registry defined in the plugin DLL, and copy them over to the
-        // current Registry.
-        typedef std::pair<const node *, const node *> Info;
-        Info *I = static_cast<Info *>(Getter());
-        iterator begin(I->first);
-        iterator end(I->second);
-        for (++end; begin != end; ++begin) {
-          // This Node object needs to remain alive for the
-          // duration of the program.
-          add_node(new node(*begin));
-        }
-      }
-    }
-
-    /// Retrieve the data to be passed across DLL boundaries when
-    /// importing registries from another DLL on Windows.
-    static void *exportRegistry() {
-      static std::pair<const node *, const node *> Info(Head, Tail);
-      return &Info;
-    }
   };
-
-  
-  // Since these are defined in a header file, plugins must be sure to export
-  // these symbols.
-  template <typename T>
-  typename Registry<T>::node *Registry<T>::Head;
-
-  template <typename T>
-  typename Registry<T>::node *Registry<T>::Tail;
 } // end namespace llvm
 
-#ifdef LLVM_ON_WIN32
-#define LLVM_EXPORT_REGISTRY(REGISTRY_CLASS)                                   \
-  extern "C" {                                                                 \
-  __declspec(dllexport) void *__cdecl LLVMGetRegistry_##REGISTRY_CLASS() {     \
-    return REGISTRY_CLASS::exportRegistry();                                   \
-  }                                                                            \
+/// Instantiate a registry class.
+///
+/// This provides template definitions of add_node, begin, and the Head and Tail
+/// pointers, then explicitly instantiates them. We could explicitly specialize
+/// them, instead of the two-step process of define then instantiate, but
+/// strictly speaking that's not allowed by the C++ standard (we would need to
+/// have explicit specialization declarations in all translation units where the
+/// specialization is used) so we don't.
+#define LLVM_INSTANTIATE_REGISTRY(REGISTRY_CLASS) \
+  namespace llvm { \
+  template<typename T> typename Registry<T>::node *Registry<T>::Head = nullptr;\
+  template<typename T> typename Registry<T>::node *Registry<T>::Tail = nullptr;\
+  template<typename T> \
+  void Registry<T>::add_node(typename Registry<T>::node *N) { \
+    if (Tail) \
+      Tail->Next = N; \
+    else \
+      Head = N; \
+    Tail = N; \
+  } \
+  template<typename T> typename Registry<T>::iterator Registry<T>::begin() { \
+    return iterator(Head); \
+  } \
+  template REGISTRY_CLASS::node *Registry<REGISTRY_CLASS::type>::Head; \
+  template REGISTRY_CLASS::node *Registry<REGISTRY_CLASS::type>::Tail; \
+  template \
+  void Registry<REGISTRY_CLASS::type>::add_node(REGISTRY_CLASS::node*); \
+  template REGISTRY_CLASS::iterator Registry<REGISTRY_CLASS::type>::begin(); \
   }
-#define LLVM_IMPORT_REGISTRY(REGISTRY_CLASS, DL)                               \
-  REGISTRY_CLASS::import(DL, #REGISTRY_CLASS)
-#else
-#define LLVM_EXPORT_REGISTRY(REGISTRY_CLASS)
-#define LLVM_IMPORT_REGISTRY(REGISTRY_CLASS, DL)
-#endif
 
 #endif // LLVM_SUPPORT_REGISTRY_H
diff --git a/contrib/llvm/include/llvm/Support/SHA1.h b/contrib/llvm/include/llvm/Support/SHA1.h
index 8347a713f272..1fc60a878f94 100644
--- a/contrib/llvm/include/llvm/Support/SHA1.h
+++ b/contrib/llvm/include/llvm/Support/SHA1.h
@@ -18,6 +18,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 
+#include <array>
 #include <cstdint>
 
 namespace llvm {
@@ -53,6 +54,9 @@ public:
   /// made into update.
   StringRef result();
 
+  /// Returns a raw 160-bit SHA1 hash for the given data.
+  static std::array<uint8_t, 20> hash(ArrayRef<uint8_t> Data);
+
 private:
   /// Define some constants.
   /// "static constexpr" would be cleaner but MSVC does not support it yet.
@@ -61,7 +65,10 @@ private:
 
   // Internal State
   struct {
-    uint32_t Buffer[BLOCK_LENGTH / 4];
+    union {
+      uint8_t C[BLOCK_LENGTH];
+      uint32_t L[BLOCK_LENGTH / 4];
+    } Buffer;
     uint32_t State[HASH_LENGTH / 4];
     uint32_t ByteCount;
     uint8_t BufferOffset;
diff --git a/contrib/llvm/include/llvm/Support/SMLoc.h b/contrib/llvm/include/llvm/Support/SMLoc.h
index c6e9a14e82ac..eb3a1ba7db51 100644
--- a/contrib/llvm/include/llvm/Support/SMLoc.h
+++ b/contrib/llvm/include/llvm/Support/SMLoc.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_SUPPORT_SMLOC_H
 #define LLVM_SUPPORT_SMLOC_H
 
+#include "llvm/ADT/None.h"
 #include <cassert>
 
 namespace llvm {
@@ -49,7 +50,8 @@ class SMRange {
 public:
   SMLoc Start, End;
 
-  SMRange() {}
+  SMRange() = default;
+  SMRange(NoneType) {}
   SMRange(SMLoc St, SMLoc En) : Start(St), End(En) {
     assert(Start.isValid() == End.isValid() &&
            "Start and end should either both be valid or both be invalid!");
@@ -60,4 +62,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_SUPPORT_SMLOC_H
diff --git a/contrib/llvm/include/llvm/Support/SourceMgr.h b/contrib/llvm/include/llvm/Support/SourceMgr.h
index 1f8b1a01865f..bc7478e0d703 100644
--- a/contrib/llvm/include/llvm/Support/SourceMgr.h
+++ b/contrib/llvm/include/llvm/Support/SourceMgr.h
@@ -51,11 +51,6 @@ private:
 
     /// This is the location of the parent include, or null if at the top level.
     SMLoc IncludeLoc;
-
-    SrcBuffer() {}
-
-    SrcBuffer(SrcBuffer &&O)
-        : Buffer(std::move(O.Buffer)), IncludeLoc(O.IncludeLoc) {}
   };
 
   /// This is all of the buffers that we are reading from.
diff --git a/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h b/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
deleted file mode 100644
index 1ab85372cd20..000000000000
--- a/contrib/llvm/include/llvm/Support/StreamingMemoryObject.h
+++ /dev/null
@@ -1,87 +0,0 @@
-//===- StreamingMemoryObject.h - Streamable data interface -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_STREAMINGMEMORYOBJECT_H
-#define LLVM_SUPPORT_STREAMINGMEMORYOBJECT_H
-
-#include "llvm/Support/Compiler.h"
-#include "llvm/Support/DataStream.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MemoryObject.h"
-#include <memory>
-#include <vector>
-
-namespace llvm {
-
-/// Interface to data which is actually streamed from a DataStreamer. In
-/// addition to inherited members, it has the dropLeadingBytes and
-/// setKnownObjectSize methods which are not applicable to non-streamed objects.
-class StreamingMemoryObject : public MemoryObject {
-public:
-  StreamingMemoryObject(std::unique_ptr<DataStreamer> Streamer);
-  uint64_t getExtent() const override;
-  uint64_t readBytes(uint8_t *Buf, uint64_t Size,
-                     uint64_t Address) const override;
-  const uint8_t *getPointer(uint64_t Address, uint64_t Size) const override;
-  bool isValidAddress(uint64_t address) const override;
-
-  /// Drop s bytes from the front of the stream, pushing the positions of the
-  /// remaining bytes down by s. This is used to skip past the bitcode header,
-  /// since we don't know a priori if it's present, and we can't put bytes
-  /// back into the stream once we've read them.
-  bool dropLeadingBytes(size_t s);
-
-  /// If the data object size is known in advance, many of the operations can
-  /// be made more efficient, so this method should be called before reading
-  /// starts (although it can be called anytime).
-  void setKnownObjectSize(size_t size);
-
-  /// The number of bytes read at a time from the data streamer.
-  static const uint32_t kChunkSize = 4096 * 4;
-
-private:
-  mutable std::vector<unsigned char> Bytes;
-  std::unique_ptr<DataStreamer> Streamer;
-  mutable size_t BytesRead;   // Bytes read from stream
-  size_t BytesSkipped;// Bytes skipped at start of stream (e.g. wrapper/header)
-  mutable size_t ObjectSize; // 0 if unknown, set if wrapper seen or EOF reached
-  mutable bool EOFReached;
-
-  // Fetch enough bytes such that Pos can be read (i.e. BytesRead >
-  // Pos). Returns true if Pos can be read.  Unlike most of the
-  // functions in BitcodeReader, returns true on success.  Most of the
-  // requests will be small, but we fetch at kChunkSize bytes at a
-  // time to avoid making too many potentially expensive GetBytes
-  // calls.
-  bool fetchToPos(size_t Pos) const {
-    while (Pos >= BytesRead) {
-      if (EOFReached)
-        return false;
-      Bytes.resize(BytesRead + BytesSkipped + kChunkSize);
-      size_t bytes = Streamer->GetBytes(&Bytes[BytesRead + BytesSkipped],
-                                        kChunkSize);
-      BytesRead += bytes;
-      if (bytes == 0) { // reached EOF/ran out of bytes
-        if (ObjectSize == 0)
-          ObjectSize = BytesRead;
-        EOFReached = true;
-      }
-    }
-    return !ObjectSize || Pos < ObjectSize;
-  }
-
-  StreamingMemoryObject(const StreamingMemoryObject&) = delete;
-  void operator=(const StreamingMemoryObject&) = delete;
-};
-
-MemoryObject *getNonStreamedMemoryObject(
-    const unsigned char *Start, const unsigned char *End);
-
-}
-#endif  // STREAMINGMEMORYOBJECT_H_
diff --git a/contrib/llvm/include/llvm/Support/StringSaver.h b/contrib/llvm/include/llvm/Support/StringSaver.h
index 38fb7bb38339..fcddd4cde5b6 100644
--- a/contrib/llvm/include/llvm/Support/StringSaver.h
+++ b/contrib/llvm/include/llvm/Support/StringSaver.h
@@ -16,17 +16,17 @@
 
 namespace llvm {
 
-/// \brief Saves strings in the inheritor's stable storage and returns a stable
-/// raw character pointer.
+/// \brief Saves strings in the inheritor's stable storage and returns a
+/// StringRef with a stable character pointer.
 class StringSaver final {
   BumpPtrAllocator &Alloc;
 
 public:
   StringSaver(BumpPtrAllocator &Alloc) : Alloc(Alloc) {}
-  const char *save(const char *S) { return save(StringRef(S)); }
-  const char *save(StringRef S);
-  const char *save(const Twine &S) { return save(StringRef(S.str())); }
-  const char *save(std::string &S) { return save(StringRef(S)); }
+  StringRef save(const char *S) { return save(StringRef(S)); }
+  StringRef save(StringRef S);
+  StringRef save(const Twine &S) { return save(StringRef(S.str())); }
+  StringRef save(std::string &S) { return save(StringRef(S)); }
 };
 }
 #endif
diff --git a/contrib/llvm/include/llvm/Support/SwapByteOrder.h b/contrib/llvm/include/llvm/Support/SwapByteOrder.h
index 91693aceb27d..71d3724950ab 100644
--- a/contrib/llvm/include/llvm/Support/SwapByteOrder.h
+++ b/contrib/llvm/include/llvm/Support/SwapByteOrder.h
@@ -18,6 +18,9 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/DataTypes.h"
 #include <cstddef>
+#if defined(_MSC_VER) && !defined(_DEBUG)
+#include <stdlib.h>
+#endif
 
 namespace llvm {
 namespace sys {
diff --git a/contrib/llvm/include/llvm/Support/TargetParser.h b/contrib/llvm/include/llvm/Support/TargetParser.h
index 0e2141f6d46f..63aeca7f4e1e 100644
--- a/contrib/llvm/include/llvm/Support/TargetParser.h
+++ b/contrib/llvm/include/llvm/Support/TargetParser.h
@@ -111,17 +111,17 @@ unsigned getFPUNeonSupportLevel(unsigned FPUKind);
 unsigned getFPURestriction(unsigned FPUKind);
 
 // FIXME: These should be moved to TargetTuple once it exists
-bool getFPUFeatures(unsigned FPUKind, std::vector<const char *> &Features);
-bool getHWDivFeatures(unsigned HWDivKind, std::vector<const char *> &Features);
+bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
+bool getHWDivFeatures(unsigned HWDivKind, std::vector<StringRef> &Features);
 bool getExtensionFeatures(unsigned Extensions,
-                                   std::vector<const char*> &Features);
+                          std::vector<StringRef> &Features);
 
 StringRef getArchName(unsigned ArchKind);
 unsigned getArchAttr(unsigned ArchKind);
 StringRef getCPUAttr(unsigned ArchKind);
 StringRef getSubArch(unsigned ArchKind);
 StringRef getArchExtName(unsigned ArchExtKind);
-const char *getArchExtFeature(StringRef ArchExt);
+StringRef getArchExtFeature(StringRef ArchExt);
 StringRef getHWDivName(unsigned HWDivKind);
 
 // Information by Name
@@ -145,6 +145,13 @@ unsigned parseArchVersion(StringRef Arch);
 // FIXME:This should be made into class design,to avoid dupplication. 
 namespace AArch64 {
 
+// Arch names.
+enum class ArchKind {
+#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT) ID,
+#include "AArch64TargetParser.def"
+  AK_LAST
+};
+
 // Arch extension modifiers for CPUs.
 enum ArchExtKind : unsigned {
   AEK_INVALID = 0x0,
@@ -155,7 +162,8 @@ enum ArchExtKind : unsigned {
   AEK_SIMD = 0x10,
   AEK_FP16 = 0x20,
   AEK_PROFILE = 0x40,
-  AEK_RAS = 0x80
+  AEK_RAS = 0x80,
+  AEK_LSE = 0x100
 };
 
 StringRef getCanonicalArchName(StringRef Arch);
@@ -167,17 +175,17 @@ unsigned getFPUNeonSupportLevel(unsigned FPUKind);
 unsigned getFPURestriction(unsigned FPUKind);
 
 // FIXME: These should be moved to TargetTuple once it exists
-bool getFPUFeatures(unsigned FPUKind, std::vector<const char *> &Features);
+bool getFPUFeatures(unsigned FPUKind, std::vector<StringRef> &Features);
 bool getExtensionFeatures(unsigned Extensions,
-                                   std::vector<const char*> &Features);
-bool getArchFeatures(unsigned ArchKind, std::vector<const char *> &Features);
+                                   std::vector<StringRef> &Features);
+bool getArchFeatures(unsigned ArchKind, std::vector<StringRef> &Features);
 
 StringRef getArchName(unsigned ArchKind);
 unsigned getArchAttr(unsigned ArchKind);
 StringRef getCPUAttr(unsigned ArchKind);
 StringRef getSubArch(unsigned ArchKind);
 StringRef getArchExtName(unsigned ArchExtKind);
-const char *getArchExtFeature(StringRef ArchExt);
+StringRef getArchExtFeature(StringRef ArchExt);
 unsigned checkArchVersion(StringRef Arch);
 
 // Information by Name
diff --git a/contrib/llvm/include/llvm/Support/TargetRegistry.h b/contrib/llvm/include/llvm/Support/TargetRegistry.h
index 076558e4df77..954cdb13abaf 100644
--- a/contrib/llvm/include/llvm/Support/TargetRegistry.h
+++ b/contrib/llvm/include/llvm/Support/TargetRegistry.h
@@ -112,7 +112,8 @@ public:
       TargetMachine &TM, std::unique_ptr<MCStreamer> &&Streamer);
   typedef MCAsmBackend *(*MCAsmBackendCtorTy)(const Target &T,
                                               const MCRegisterInfo &MRI,
-                                              const Triple &TT, StringRef CPU);
+                                              const Triple &TT, StringRef CPU,
+                                              const MCTargetOptions &Options);
   typedef MCTargetAsmParser *(*MCAsmParserCtorTy)(
       const MCSubtargetInfo &STI, MCAsmParser &P, const MCInstrInfo &MII,
       const MCTargetOptions &Options);
@@ -279,6 +280,9 @@ public:
   /// hasMCAsmBackend - Check if this target supports .o generation.
   bool hasMCAsmBackend() const { return MCAsmBackendCtorFn != nullptr; }
 
+  /// hasMCAsmParser - Check if this target supports assembly parsing.
+  bool hasMCAsmParser() const { return MCAsmParserCtorFn != nullptr; }
+
   /// @}
   /// @name Feature Constructors
   /// @{
@@ -365,10 +369,12 @@ public:
   ///
   /// \param TheTriple The target triple string.
   MCAsmBackend *createMCAsmBackend(const MCRegisterInfo &MRI,
-                                   StringRef TheTriple, StringRef CPU) const {
+                                   StringRef TheTriple, StringRef CPU,
+                                   const MCTargetOptions &Options)
+                                   const {
     if (!MCAsmBackendCtorFn)
       return nullptr;
-    return MCAsmBackendCtorFn(*this, MRI, Triple(TheTriple), CPU);
+    return MCAsmBackendCtorFn(*this, MRI, Triple(TheTriple), CPU, Options);
   }
 
   /// createMCAsmParser - Create a target specific assembly parser.
@@ -846,10 +852,13 @@ struct TargetRegistry {
 /// target's initialization function. Usage:
 ///
 ///
-/// Target TheFooTarget; // The global target instance.
-///
+/// Target &getTheFooTarget() { // The global target instance.
+///   static Target TheFooTarget;
+///   return TheFooTarget;
+/// }
 /// extern "C" void LLVMInitializeFooTargetInfo() {
-///   RegisterTarget<Triple::foo> X(TheFooTarget, "foo", "Foo description");
+///   RegisterTarget<Triple::foo> X(getTheFooTarget(), "foo", "Foo
+///   description");
 /// }
 template <Triple::ArchType TargetArchType = Triple::UnknownArch,
           bool HasJIT = false>
@@ -1071,7 +1080,8 @@ template <class MCAsmBackendImpl> struct RegisterMCAsmBackend {
 
 private:
   static MCAsmBackend *Allocator(const Target &T, const MCRegisterInfo &MRI,
-                                 const Triple &TheTriple, StringRef CPU) {
+                                 const Triple &TheTriple, StringRef CPU,
+                                 const MCTargetOptions &Options) {
     return new MCAsmBackendImpl(T, MRI, TheTriple, CPU);
   }
 };
diff --git a/contrib/llvm/include/llvm/Support/Threading.h b/contrib/llvm/include/llvm/Support/Threading.h
index fe407b725314..4bef7ec8dd3f 100644
--- a/contrib/llvm/include/llvm/Support/Threading.h
+++ b/contrib/llvm/include/llvm/Support/Threading.h
@@ -115,6 +115,13 @@ namespace llvm {
     TsanHappensAfter(&flag);
 #endif
   }
+
+  /// Get the amount of currency to use for tasks requiring significant
+  /// memory or other resources. Currently based on physical cores, if
+  /// available for the host system, otherwise falls back to
+  /// thread::hardware_concurrency().
+  /// Returns 1 when LLVM is configured with LLVM_ENABLE_THREADS=OFF
+  unsigned heavyweight_hardware_concurrency();
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/TimeValue.h b/contrib/llvm/include/llvm/Support/TimeValue.h
deleted file mode 100644
index 6bca58b6bc20..000000000000
--- a/contrib/llvm/include/llvm/Support/TimeValue.h
+++ /dev/null
@@ -1,386 +0,0 @@
-//===-- TimeValue.h - Declare OS TimeValue Concept --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This header file declares the operating system TimeValue concept.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_TIMEVALUE_H
-#define LLVM_SUPPORT_TIMEVALUE_H
-
-#include "llvm/Support/DataTypes.h"
-#include <string>
-
-namespace llvm {
-namespace sys {
-  /// This class is used where a precise fixed point in time is required. The
-  /// range of TimeValue spans many hundreds of billions of years both past and
-  /// present.  The precision of TimeValue is to the nanosecond. However, the
-  /// actual precision of its values will be determined by the resolution of
-  /// the system clock. The TimeValue class is used in conjunction with several
-  /// other lib/System interfaces to specify the time at which a call should
-  /// timeout, etc.
-  /// @since 1.4
-  /// @brief Provides an abstraction for a fixed point in time.
-  class TimeValue {
-
-  /// @name Constants
-  /// @{
-  public:
-
-    /// A constant TimeValue representing the smallest time
-    /// value permissible by the class. MinTime is some point
-    /// in the distant past, about 300 billion years BCE.
-    /// @brief The smallest possible time value.
-    static TimeValue MinTime() {
-      return TimeValue ( INT64_MIN,0 );
-    }
-
-    /// A constant TimeValue representing the largest time
-    /// value permissible by the class. MaxTime is some point
-    /// in the distant future, about 300 billion years AD.
-    /// @brief The largest possible time value.
-    static TimeValue MaxTime() {
-      return TimeValue ( INT64_MAX,0 );
-    }
-
-    /// A constant TimeValue representing the base time,
-    /// or zero time of 00:00:00 (midnight) January 1st, 2000.
-    /// @brief 00:00:00 Jan 1, 2000 UTC.
-    static TimeValue ZeroTime() {
-      return TimeValue ( 0,0 );
-    }
-
-    /// A constant TimeValue for the Posix base time which is
-    /// 00:00:00 (midnight) January 1st, 1970.
-    /// @brief 00:00:00 Jan 1, 1970 UTC.
-    static TimeValue PosixZeroTime() {
-      return TimeValue ( PosixZeroTimeSeconds,0 );
-    }
-
-    /// A constant TimeValue for the Win32 base time which is
-    /// 00:00:00 (midnight) January 1st, 1601.
-    /// @brief 00:00:00 Jan 1, 1601 UTC.
-    static TimeValue Win32ZeroTime() {
-      return TimeValue ( Win32ZeroTimeSeconds,0 );
-    }
-
-  /// @}
-  /// @name Types
-  /// @{
-  public:
-    typedef int64_t SecondsType;    ///< Type used for representing seconds.
-    typedef int32_t NanoSecondsType;///< Type used for representing nanoseconds.
-
-    enum TimeConversions {
-      NANOSECONDS_PER_SECOND = 1000000000,  ///< One Billion
-      MICROSECONDS_PER_SECOND = 1000000,    ///< One Million
-      MILLISECONDS_PER_SECOND = 1000,       ///< One Thousand
-      NANOSECONDS_PER_MICROSECOND = 1000,   ///< One Thousand
-      NANOSECONDS_PER_MILLISECOND = 1000000,///< One Million
-      NANOSECONDS_PER_WIN32_TICK = 100      ///< Win32 tick is 10^7 Hz (10ns)
-    };
-
-  /// @}
-  /// @name Constructors
-  /// @{
-  public:
-    /// \brief Default construct a time value, initializing to ZeroTime.
-    TimeValue() : seconds_(0), nanos_(0) {}
-
-    /// Caller provides the exact value in seconds and nanoseconds. The
-    /// \p nanos argument defaults to zero for convenience.
-    /// @brief Explicit constructor
-    explicit TimeValue (SecondsType seconds, NanoSecondsType nanos = 0)
-      : seconds_( seconds ), nanos_( nanos ) { this->normalize(); }
-
-    /// Caller provides the exact value as a double in seconds with the
-    /// fractional part representing nanoseconds.
-    /// @brief Double Constructor.
-    explicit TimeValue( double new_time )
-      : seconds_( 0 ) , nanos_ ( 0 ) {
-      SecondsType integer_part = static_cast<SecondsType>( new_time );
-      seconds_ = integer_part;
-      nanos_ = static_cast<NanoSecondsType>( (new_time -
-               static_cast<double>(integer_part)) * NANOSECONDS_PER_SECOND );
-      this->normalize();
-    }
-
-    /// This is a static constructor that returns a TimeValue that represents
-    /// the current time.
-    /// @brief Creates a TimeValue with the current time (UTC).
-    static TimeValue now();
-
-  /// @}
-  /// @name Operators
-  /// @{
-  public:
-    /// Add \p that to \p this.
-    /// @returns this
-    /// @brief Incrementing assignment operator.
-    TimeValue& operator += (const TimeValue& that ) {
-      this->seconds_ += that.seconds_  ;
-      this->nanos_ += that.nanos_ ;
-      this->normalize();
-      return *this;
-    }
-
-    /// Subtract \p that from \p this.
-    /// @returns this
-    /// @brief Decrementing assignment operator.
-    TimeValue& operator -= (const TimeValue &that ) {
-      this->seconds_ -= that.seconds_ ;
-      this->nanos_ -= that.nanos_ ;
-      this->normalize();
-      return *this;
-    }
-
-    /// Determine if \p this is less than \p that.
-    /// @returns True iff *this < that.
-    /// @brief True if this < that.
-    int operator < (const TimeValue &that) const { return that > *this; }
-
-    /// Determine if \p this is greather than \p that.
-    /// @returns True iff *this > that.
-    /// @brief True if this > that.
-    int operator > (const TimeValue &that) const {
-      if ( this->seconds_ > that.seconds_ ) {
-          return 1;
-      } else if ( this->seconds_ == that.seconds_ ) {
-          if ( this->nanos_ > that.nanos_ ) return 1;
-      }
-      return 0;
-    }
-
-    /// Determine if \p this is less than or equal to \p that.
-    /// @returns True iff *this <= that.
-    /// @brief True if this <= that.
-    int operator <= (const TimeValue &that) const { return that >= *this; }
-
-    /// Determine if \p this is greater than or equal to \p that.
-    /// @returns True iff *this >= that.
-    int operator >= (const TimeValue &that) const {
-      if ( this->seconds_ > that.seconds_ ) {
-          return 1;
-      } else if ( this->seconds_ == that.seconds_ ) {
-          if ( this->nanos_ >= that.nanos_ ) return 1;
-      }
-      return 0;
-    }
-
-    /// Determines if two TimeValue objects represent the same moment in time.
-    /// @returns True iff *this == that.
-    int operator == (const TimeValue &that) const {
-      return (this->seconds_ == that.seconds_) &&
-             (this->nanos_ == that.nanos_);
-    }
-
-    /// Determines if two TimeValue objects represent times that are not the
-    /// same.
-    /// @returns True iff *this != that.
-    int operator != (const TimeValue &that) const { return !(*this == that); }
-
-    /// Adds two TimeValue objects together.
-    /// @returns The sum of the two operands as a new TimeValue
-    /// @brief Addition operator.
-    friend TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2);
-
-    /// Subtracts two TimeValue objects.
-    /// @returns The difference of the two operands as a new TimeValue
-    /// @brief Subtraction operator.
-    friend TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2);
-
-  /// @}
-  /// @name Accessors
-  /// @{
-  public:
-
-    /// Returns only the seconds component of the TimeValue. The nanoseconds
-    /// portion is ignored. No rounding is performed.
-    /// @brief Retrieve the seconds component
-    SecondsType seconds() const { return seconds_; }
-
-    /// Returns only the nanoseconds component of the TimeValue. The seconds
-    /// portion is ignored.
-    /// @brief Retrieve the nanoseconds component.
-    NanoSecondsType nanoseconds() const { return nanos_; }
-
-    /// Returns only the fractional portion of the TimeValue rounded down to the
-    /// nearest microsecond (divide by one thousand).
-    /// @brief Retrieve the fractional part as microseconds;
-    uint32_t microseconds() const {
-      return nanos_ / NANOSECONDS_PER_MICROSECOND;
-    }
-
-    /// Returns only the fractional portion of the TimeValue rounded down to the
-    /// nearest millisecond (divide by one million).
-    /// @brief Retrieve the fractional part as milliseconds;
-    uint32_t milliseconds() const {
-      return nanos_ / NANOSECONDS_PER_MILLISECOND;
-    }
-
-    /// Returns the TimeValue as a number of microseconds. Note that the value
-    /// returned can overflow because the range of a uint64_t is smaller than
-    /// the range of a TimeValue. Nevertheless, this is useful on some operating
-    /// systems and is therefore provided.
-    /// @brief Convert to a number of microseconds (can overflow)
-    uint64_t usec() const {
-      return seconds_ * MICROSECONDS_PER_SECOND +
-             ( nanos_ / NANOSECONDS_PER_MICROSECOND );
-    }
-
-    /// Returns the TimeValue as a number of milliseconds. Note that the value
-    /// returned can overflow because the range of a uint64_t is smaller than
-    /// the range of a TimeValue. Nevertheless, this is useful on some operating
-    /// systems and is therefore provided.
-    /// @brief Convert to a number of milliseconds (can overflow)
-    uint64_t msec() const {
-      return seconds_ * MILLISECONDS_PER_SECOND +
-             ( nanos_ / NANOSECONDS_PER_MILLISECOND );
-    }
-
-    /// Converts the TimeValue into the corresponding number of seconds
-    /// since the epoch (00:00:00 Jan 1,1970).
-    uint64_t toEpochTime() const {
-      return seconds_ - PosixZeroTimeSeconds;
-    }
-
-    /// Converts the TimeValue into the corresponding number of "ticks" for
-    /// Win32 platforms, correcting for the difference in Win32 zero time.
-    /// @brief Convert to Win32's FILETIME
-    /// (100ns intervals since 00:00:00 Jan 1, 1601 UTC)
-    uint64_t toWin32Time() const {
-      uint64_t result = (uint64_t)10000000 * (seconds_ - Win32ZeroTimeSeconds);
-      result += nanos_ / NANOSECONDS_PER_WIN32_TICK;
-      return result;
-    }
-
-    /// Provides the seconds and nanoseconds as results in its arguments after
-    /// correction for the Posix zero time.
-    /// @brief Convert to timespec time (ala POSIX.1b)
-    void getTimespecTime( uint64_t& seconds, uint32_t& nanos ) const {
-      seconds = seconds_ - PosixZeroTimeSeconds;
-      nanos = nanos_;
-    }
-
-    /// Provides conversion of the TimeValue into a readable time & date.
-    /// @returns std::string containing the readable time value
-    /// @brief Convert time to a string.
-    std::string str() const;
-
-  /// @}
-  /// @name Mutators
-  /// @{
-  public:
-    /// The seconds component of the TimeValue is set to \p sec without
-    /// modifying the nanoseconds part.  This is useful for whole second
-    /// arithmetic.
-    /// @brief Set the seconds component.
-    void seconds (SecondsType sec ) {
-      this->seconds_ = sec;
-      this->normalize();
-    }
-
-    /// The nanoseconds component of the TimeValue is set to \p nanos without
-    /// modifying the seconds part. This is useful for basic computations
-    /// involving just the nanoseconds portion. Note that the TimeValue will be
-    /// normalized after this call so that the fractional (nanoseconds) portion
-    /// will have the smallest equivalent value.
-    /// @brief Set the nanoseconds component using a number of nanoseconds.
-    void nanoseconds ( NanoSecondsType nanos ) {
-      this->nanos_ = nanos;
-      this->normalize();
-    }
-
-    /// The seconds component remains unchanged.
-    /// @brief Set the nanoseconds component using a number of microseconds.
-    void microseconds ( int32_t micros ) {
-      this->nanos_ = micros * NANOSECONDS_PER_MICROSECOND;
-      this->normalize();
-    }
-
-    /// The seconds component remains unchanged.
-    /// @brief Set the nanoseconds component using a number of milliseconds.
-    void milliseconds ( int32_t millis ) {
-      this->nanos_ = millis * NANOSECONDS_PER_MILLISECOND;
-      this->normalize();
-    }
-
-    /// @brief Converts from microsecond format to TimeValue format
-    void usec( int64_t microseconds ) {
-      this->seconds_ = microseconds / MICROSECONDS_PER_SECOND;
-      this->nanos_ = NanoSecondsType(microseconds % MICROSECONDS_PER_SECOND) *
-        NANOSECONDS_PER_MICROSECOND;
-      this->normalize();
-    }
-
-    /// @brief Converts from millisecond format to TimeValue format
-    void msec( int64_t milliseconds ) {
-      this->seconds_ = milliseconds / MILLISECONDS_PER_SECOND;
-      this->nanos_ = NanoSecondsType(milliseconds % MILLISECONDS_PER_SECOND) *
-        NANOSECONDS_PER_MILLISECOND;
-      this->normalize();
-    }
-
-    /// Converts the \p seconds argument from PosixTime to the corresponding
-    /// TimeValue and assigns that value to \p this.
-    /// @brief Convert seconds form PosixTime to TimeValue
-    void fromEpochTime( SecondsType seconds ) {
-      seconds_ = seconds + PosixZeroTimeSeconds;
-      nanos_ = 0;
-      this->normalize();
-    }
-
-    /// Converts the \p win32Time argument from Windows FILETIME to the
-    /// corresponding TimeValue and assigns that value to \p this.
-    /// @brief Convert seconds form Windows FILETIME to TimeValue
-    void fromWin32Time( uint64_t win32Time ) {
-      this->seconds_ = win32Time / 10000000 + Win32ZeroTimeSeconds;
-      this->nanos_ = NanoSecondsType(win32Time  % 10000000) * 100;
-    }
-
-  /// @}
-  /// @name Implementation
-  /// @{
-  private:
-    /// This causes the values to be represented so that the fractional
-    /// part is minimized, possibly incrementing the seconds part.
-    /// @brief Normalize to canonical form.
-    void normalize();
-
-  /// @}
-  /// @name Data
-  /// @{
-  private:
-    /// Store the values as a <timeval>.
-    SecondsType      seconds_;///< Stores the seconds part of the TimeVal
-    NanoSecondsType  nanos_;  ///< Stores the nanoseconds part of the TimeVal
-
-    static const SecondsType PosixZeroTimeSeconds;
-    static const SecondsType Win32ZeroTimeSeconds;
-  /// @}
-
-  };
-
-inline TimeValue operator + (const TimeValue &tv1, const TimeValue &tv2) {
-  TimeValue sum (tv1.seconds_ + tv2.seconds_, tv1.nanos_ + tv2.nanos_);
-  sum.normalize ();
-  return sum;
-}
-
-inline TimeValue operator - (const TimeValue &tv1, const TimeValue &tv2) {
-  TimeValue difference (tv1.seconds_ - tv2.seconds_, tv1.nanos_ - tv2.nanos_ );
-  difference.normalize ();
-  return difference;
-}
-
-}
-}
-
-#endif
diff --git a/contrib/llvm/include/llvm/Support/Timer.h b/contrib/llvm/include/llvm/Support/Timer.h
index f0cb07599b86..80e8f13dccfe 100644
--- a/contrib/llvm/include/llvm/Support/Timer.h
+++ b/contrib/llvm/include/llvm/Support/Timer.h
@@ -24,17 +24,17 @@ class TimerGroup;
 class raw_ostream;
 
 class TimeRecord {
-  double WallTime;       // Wall clock time elapsed in seconds
-  double UserTime;       // User time elapsed
-  double SystemTime;     // System time elapsed
-  ssize_t MemUsed;       // Memory allocated (in bytes)
+  double WallTime;       ///< Wall clock time elapsed in seconds.
+  double UserTime;       ///< User time elapsed.
+  double SystemTime;     ///< System time elapsed.
+  ssize_t MemUsed;       ///< Memory allocated (in bytes).
 public:
   TimeRecord() : WallTime(0), UserTime(0), SystemTime(0), MemUsed(0) {}
 
-  /// getCurrentTime - Get the current time and memory usage.  If Start is true
-  /// we get the memory usage before the time, otherwise we get time before
-  /// memory usage.  This matters if the time to get the memory usage is
-  /// significant and shouldn't be counted as part of a duration.
+  /// Get the current time and memory usage.  If Start is true we get the memory
+  /// usage before the time, otherwise we get time before memory usage.  This
+  /// matters if the time to get the memory usage is significant and shouldn't
+  /// be counted as part of a duration.
   static TimeRecord getCurrentTime(bool Start = true);
 
   double getProcessTime() const { return UserTime + SystemTime; }
@@ -43,7 +43,6 @@ public:
   double getWallTime() const { return WallTime; }
   ssize_t getMemUsed() const { return MemUsed; }
 
-  // operator< - Allow sorting.
   bool operator<(const TimeRecord &T) const {
     // Sort by Wall Time elapsed, as it is the only thing really accurate
     return WallTime < T.WallTime;
@@ -67,27 +66,32 @@ public:
   void print(const TimeRecord &Total, raw_ostream &OS) const;
 };
 
-/// Timer - This class is used to track the amount of time spent between
-/// invocations of its startTimer()/stopTimer() methods.  Given appropriate OS
-/// support it can also keep track of the RSS of the program at various points.
-/// By default, the Timer will print the amount of time it has captured to
-/// standard error when the last timer is destroyed, otherwise it is printed
-/// when its TimerGroup is destroyed.  Timers do not print their information
-/// if they are never started.
-///
+/// This class is used to track the amount of time spent between invocations of
+/// its startTimer()/stopTimer() methods.  Given appropriate OS support it can
+/// also keep track of the RSS of the program at various points.  By default,
+/// the Timer will print the amount of time it has captured to standard error
+/// when the last timer is destroyed, otherwise it is printed when its
+/// TimerGroup is destroyed.  Timers do not print their information if they are
+/// never started.
 class Timer {
-  TimeRecord Time;       // The total time captured
-  TimeRecord StartTime;  // The time startTimer() was last called
-  std::string Name;      // The name of this time variable.
-  bool Running;          // Is the timer currently running?
-  bool Triggered;        // Has the timer ever been triggered?
-  TimerGroup *TG;        // The TimerGroup this Timer is in.
-
-  Timer **Prev, *Next;   // Doubly linked list of timers in the group.
+  TimeRecord Time;          ///< The total time captured.
+  TimeRecord StartTime;     ///< The time startTimer() was last called.
+  std::string Name;         ///< The name of this time variable.
+  std::string Description;  ///< Description of this time variable.
+  bool Running;             ///< Is the timer currently running?
+  bool Triggered;           ///< Has the timer ever been triggered?
+  TimerGroup *TG = nullptr; ///< The TimerGroup this Timer is in.
+
+  Timer **Prev;             ///< Pointer to \p Next of previous timer in group.
+  Timer *Next;              ///< Next timer in the group.
 public:
-  explicit Timer(StringRef N) : TG(nullptr) { init(N); }
-  Timer(StringRef N, TimerGroup &tg) : TG(nullptr) { init(N, tg); }
-  Timer(const Timer &RHS) : TG(nullptr) {
+  explicit Timer(StringRef Name, StringRef Description) {
+    init(Name, Description);
+  }
+  Timer(StringRef Name, StringRef Description, TimerGroup &tg) {
+    init(Name, Description, tg);
+  }
+  Timer(const Timer &RHS) {
     assert(!RHS.TG && "Can only copy uninitialized timers");
   }
   const Timer &operator=(const Timer &T) {
@@ -96,12 +100,13 @@ public:
   }
   ~Timer();
 
-  // Create an uninitialized timer, client must use 'init'.
-  explicit Timer() : TG(nullptr) {}
-  void init(StringRef N);
-  void init(StringRef N, TimerGroup &tg);
+  /// Create an uninitialized timer, client must use 'init'.
+  explicit Timer() {}
+  void init(StringRef Name, StringRef Description);
+  void init(StringRef Name, StringRef Description, TimerGroup &tg);
 
   const std::string &getName() const { return Name; }
+  const std::string &getDescription() const { return Description; }
   bool isInitialized() const { return TG != nullptr; }
 
   /// Check if the timer is currently running.
@@ -132,7 +137,6 @@ private:
 /// stopTimer() methods of the Timer class.  When the object is constructed, it
 /// starts the timer specified as its argument.  When it is destroyed, it stops
 /// the relevant timer.  This makes it easy to time a region of code.
-///
 class TimeRegion {
   Timer *T;
   TimeRegion(const TimeRegion &) = delete;
@@ -149,51 +153,77 @@ public:
   }
 };
 
-/// NamedRegionTimer - This class is basically a combination of TimeRegion and
-/// Timer.  It allows you to declare a new timer, AND specify the region to
-/// time, all in one statement.  All timers with the same name are merged.  This
-/// is primarily used for debugging and for hunting performance problems.
-///
+/// This class is basically a combination of TimeRegion and Timer.  It allows
+/// you to declare a new timer, AND specify the region to time, all in one
+/// statement.  All timers with the same name are merged.  This is primarily
+/// used for debugging and for hunting performance problems.
 struct NamedRegionTimer : public TimeRegion {
-  explicit NamedRegionTimer(StringRef Name,
-                            bool Enabled = true);
-  explicit NamedRegionTimer(StringRef Name, StringRef GroupName,
-                            bool Enabled = true);
+  explicit NamedRegionTimer(StringRef Name, StringRef Description,
+                            StringRef GroupName,
+                            StringRef GroupDescription, bool Enabled = true);
 };
 
 /// The TimerGroup class is used to group together related timers into a single
 /// report that is printed when the TimerGroup is destroyed.  It is illegal to
 /// destroy a TimerGroup object before all of the Timers in it are gone.  A
 /// TimerGroup can be specified for a newly created timer in its constructor.
-///
 class TimerGroup {
+  struct PrintRecord {
+    TimeRecord Time;
+    std::string Name;
+    std::string Description;
+
+    PrintRecord(const PrintRecord &Other) = default;
+    PrintRecord(const TimeRecord &Time, const std::string &Name,
+                const std::string &Description)
+      : Time(Time), Name(Name), Description(Description) {}
+
+    bool operator <(const PrintRecord &Other) const {
+      return Time < Other.Time;
+    }
+  };
   std::string Name;
-  Timer *FirstTimer;   // First timer in the group.
-  std::vector<std::pair<TimeRecord, std::string>> TimersToPrint;
+  std::string Description;
+  Timer *FirstTimer = nullptr; ///< First timer in the group.
+  std::vector<PrintRecord> TimersToPrint;
 
-  TimerGroup **Prev, *Next; // Doubly linked list of TimerGroup's.
+  TimerGroup **Prev; ///< Pointer to Next field of previous timergroup in list.
+  TimerGroup *Next;  ///< Pointer to next timergroup in list.
   TimerGroup(const TimerGroup &TG) = delete;
   void operator=(const TimerGroup &TG) = delete;
 
 public:
-  explicit TimerGroup(StringRef name);
+  explicit TimerGroup(StringRef Name, StringRef Description);
   ~TimerGroup();
 
-  void setName(StringRef name) { Name.assign(name.begin(), name.end()); }
+  void setName(StringRef NewName, StringRef NewDescription) {
+    Name.assign(NewName.begin(), NewName.end());
+    Description.assign(NewDescription.begin(), NewDescription.end());
+  }
 
-  /// print - Print any started timers in this group and zero them.
+  /// Print any started timers in this group and zero them.
   void print(raw_ostream &OS);
 
-  /// printAll - This static method prints all timers and clears them all out.
+  /// This static method prints all timers and clears them all out.
   static void printAll(raw_ostream &OS);
 
+  /// Ensure global timer group lists are initialized. This function is mostly
+  /// used by the Statistic code to influence the construction and destruction
+  /// order of the global timer lists.
+  static void ConstructTimerLists();
 private:
   friend class Timer;
+  friend void PrintStatisticsJSON(raw_ostream &OS);
   void addTimer(Timer &T);
   void removeTimer(Timer &T);
+  void prepareToPrintList();
   void PrintQueuedTimers(raw_ostream &OS);
+  void printJSONValue(raw_ostream &OS, const PrintRecord &R,
+                      const char *suffix, double Value);
+  const char *printJSONValues(raw_ostream &OS, const char *delim);
+  static const char *printAllJSONValues(raw_ostream &OS, const char *delim);
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Support/TrailingObjects.h b/contrib/llvm/include/llvm/Support/TrailingObjects.h
index 5a21cddf9731..4d355724149c 100644
--- a/contrib/llvm/include/llvm/Support/TrailingObjects.h
+++ b/contrib/llvm/include/llvm/Support/TrailingObjects.h
@@ -62,7 +62,7 @@ namespace trailing_objects_internal {
 template <typename First, typename... Rest> class AlignmentCalcHelper {
 private:
   enum {
-    FirstAlignment = AlignOf<First>::Alignment,
+    FirstAlignment = alignof(First),
     RestAlignment = AlignmentCalcHelper<Rest...>::Alignment,
   };
 
@@ -74,7 +74,7 @@ public:
 
 template <typename First> class AlignmentCalcHelper<First> {
 public:
-  enum { Alignment = AlignOf<First>::Alignment };
+  enum { Alignment = alignof(First) };
 };
 
 /// The base class for TrailingObjects* classes.
@@ -127,29 +127,33 @@ template <typename Ty1, typename Ty2> struct ExtractSecondType {
 
 template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
           typename... MoreTys>
-struct TrailingObjectsImpl {
+class TrailingObjectsImpl {
   // The main template definition is never used -- the two
   // specializations cover all possibilities.
 };
 
 template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy,
           typename NextTy, typename... MoreTys>
-struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
-                           MoreTys...>
+class TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
+                          MoreTys...>
     : public TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy,
                                  MoreTys...> {
 
   typedef TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, NextTy, MoreTys...>
       ParentType;
 
-  // Ensure the methods we inherit are not hidden.
-  using ParentType::getTrailingObjectsImpl;
-  using ParentType::additionalSizeToAllocImpl;
+  struct RequiresRealignment {
+    static const bool value = alignof(PrevTy) < alignof(NextTy);
+  };
 
-  static LLVM_CONSTEXPR bool requiresRealignment() {
-    return llvm::AlignOf<PrevTy>::Alignment < llvm::AlignOf<NextTy>::Alignment;
+  static constexpr bool requiresRealignment() {
+    return RequiresRealignment::value;
   }
 
+protected:
+  // Ensure the inherited getTrailingObjectsImpl is not hidden.
+  using ParentType::getTrailingObjectsImpl;
+
   // These two functions are helper functions for
   // TrailingObjects::getTrailingObjects. They recurse to the left --
   // the result for each type in the list of trailing types depends on
@@ -169,7 +173,7 @@ struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
 
     if (requiresRealignment())
       return reinterpret_cast<const NextTy *>(
-          llvm::alignAddr(Ptr, llvm::alignOf<NextTy>()));
+          llvm::alignAddr(Ptr, alignof(NextTy)));
     else
       return reinterpret_cast<const NextTy *>(Ptr);
   }
@@ -183,8 +187,7 @@ struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
                     Obj, TrailingObjectsBase::OverloadToken<PrevTy>());
 
     if (requiresRealignment())
-      return reinterpret_cast<NextTy *>(
-          llvm::alignAddr(Ptr, llvm::alignOf<NextTy>()));
+      return reinterpret_cast<NextTy *>(llvm::alignAddr(Ptr, alignof(NextTy)));
     else
       return reinterpret_cast<NextTy *>(Ptr);
   }
@@ -192,13 +195,12 @@ struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
   // Helper function for TrailingObjects::additionalSizeToAlloc: this
   // function recurses to superclasses, each of which requires one
   // fewer size_t argument, and adds its own size.
-  static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(
+  static constexpr size_t additionalSizeToAllocImpl(
       size_t SizeSoFar, size_t Count1,
       typename ExtractSecondType<MoreTys, size_t>::type... MoreCounts) {
-    return additionalSizeToAllocImpl(
-        (requiresRealignment()
-             ? llvm::alignTo(SizeSoFar, llvm::alignOf<NextTy>())
-             : SizeSoFar) +
+    return ParentType::additionalSizeToAllocImpl(
+        (requiresRealignment() ? llvm::alignTo<alignof(NextTy)>(SizeSoFar)
+                               : SizeSoFar) +
             sizeof(NextTy) * Count1,
         MoreCounts...);
   }
@@ -207,14 +209,15 @@ struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy, NextTy,
 // The base case of the TrailingObjectsImpl inheritance recursion,
 // when there's no more trailing types.
 template <int Align, typename BaseTy, typename TopTrailingObj, typename PrevTy>
-struct TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy>
+class TrailingObjectsImpl<Align, BaseTy, TopTrailingObj, PrevTy>
     : public TrailingObjectsAligner<Align> {
+protected:
   // This is a dummy method, only here so the "using" doesn't fail --
   // it will never be called, because this function recurses backwards
   // up the inheritance chain to subclasses.
   static void getTrailingObjectsImpl();
 
-  static LLVM_CONSTEXPR size_t additionalSizeToAllocImpl(size_t SizeSoFar) {
+  static constexpr size_t additionalSizeToAllocImpl(size_t SizeSoFar) {
     return SizeSoFar;
   }
 
@@ -235,7 +238,7 @@ class TrailingObjects : private trailing_objects_internal::TrailingObjectsImpl<
                             BaseTy, TrailingTys...> {
 
   template <int A, typename B, typename T, typename P, typename... M>
-  friend struct trailing_objects_internal::TrailingObjectsImpl;
+  friend class trailing_objects_internal::TrailingObjectsImpl;
 
   template <typename... Tys> class Foo {};
 
@@ -323,11 +326,10 @@ public:
   /// used in the class; they are supplied here redundantly only so
   /// that it's clear what the counts are counting in callers.
   template <typename... Tys>
-  static LLVM_CONSTEXPR typename std::enable_if<
+  static constexpr typename std::enable_if<
       std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
-      additionalSizeToAlloc(
-          typename trailing_objects_internal::ExtractSecondType<
-              TrailingTys, size_t>::type... Counts) {
+  additionalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
+                        TrailingTys, size_t>::type... Counts) {
     return ParentType::additionalSizeToAllocImpl(0, Counts...);
   }
 
@@ -336,10 +338,10 @@ public:
   /// additionalSizeToAlloc, except it *does* include the size of the base
   /// object.
   template <typename... Tys>
-  static LLVM_CONSTEXPR typename std::enable_if<
+  static constexpr typename std::enable_if<
       std::is_same<Foo<TrailingTys...>, Foo<Tys...>>::value, size_t>::type
-      totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
-                       TrailingTys, size_t>::type... Counts) {
+  totalSizeToAlloc(typename trailing_objects_internal::ExtractSecondType<
+                   TrailingTys, size_t>::type... Counts) {
     return sizeof(BaseTy) + ParentType::additionalSizeToAllocImpl(0, Counts...);
   }
 
@@ -361,9 +363,7 @@ public:
   template <typename... Tys> struct FixedSizeStorage {
     template <size_t... Counts> struct with_counts {
       enum { Size = totalSizeToAlloc<Tys...>(Counts...) };
-      typedef llvm::AlignedCharArray<
-          llvm::AlignOf<BaseTy>::Alignment, Size
-          > type;
+      typedef llvm::AlignedCharArray<alignof(BaseTy), Size> type;
     };
   };
 
diff --git a/contrib/llvm/include/llvm/Support/TrigramIndex.h b/contrib/llvm/include/llvm/Support/TrigramIndex.h
new file mode 100644
index 000000000000..da0b6daf47ed
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/TrigramIndex.h
@@ -0,0 +1,70 @@
+//===-- TrigramIndex.h - a heuristic for SpecialCaseList --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+//
+// TrigramIndex implements a heuristic for SpecialCaseList that allows to
+// filter out ~99% incoming queries when all regular expressions in the
+// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
+// complicated, the check is defeated and it will always pass the queries to a
+// full regex.
+//
+// The basic idea is that in order for a wildcard to match a query, the query
+// needs to have all trigrams which occur in the wildcard. We create a trigram
+// index (trigram -> list of rules with it) and then count trigrams in the query
+// for each rule. If the count for one of the rules reaches the expected value,
+// the check passes the query to a regex. If none of the rules got enough
+// trigrams, the check tells that the query is definitely not matched by any
+// of the rules, and no regex matching is needed.
+// A similar idea was used in Google Code Search as described in the blog post:
+// https://swtch.com/~rsc/regexp/regexp4.html
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_TRIGRAMINDEX_H
+#define LLVM_SUPPORT_TRIGRAMINDEX_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringMap.h"
+
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+namespace llvm {
+class StringRef;
+
+class TrigramIndex {
+ public:
+  /// Inserts a new Regex into the index.
+  void insert(std::string Regex);
+
+  /// Returns true, if special case list definitely does not have a line
+  /// that matches the query. Returns false, if it's not sure.
+  bool isDefinitelyOut(StringRef Query) const;
+
+  /// Returned true, iff the heuristic is defeated and not useful.
+  /// In this case isDefinitelyOut always returns false.
+  bool isDefeated() { return Defeated; }
+ private:
+  // If true, the rules are too complicated for the check to work, and full
+  // regex matching is needed for every rule.
+  bool Defeated = false;
+  // The minimum number of trigrams which should match for a rule to have a
+  // chance to match the query. The number of elements equals the number of
+  // regex rules in the SpecialCaseList.
+  std::vector<unsigned> Counts;
+  // Index holds a list of rules indices for each trigram. The same indices
+  // are used in Counts to store per-rule limits.
+  // If a trigram is too common (>4 rules with it), we stop tracking it,
+  // which increases the probability for a need to match using regex, but
+  // decreases the costs in the regular case.
+  std::unordered_map<unsigned, SmallVector<size_t, 4>> Index{256};
+};
+
+}  // namespace llvm
+
+#endif  // LLVM_SUPPORT_TRIGRAMINDEX_H
diff --git a/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h b/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h
index 134698c3ec6b..d4d4d8eb84a4 100644
--- a/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h
+++ b/contrib/llvm/include/llvm/Support/UnicodeCharRanges.h
@@ -56,7 +56,7 @@ public:
   // may get rid of NDEBUG in this header. Unfortunately there are some
   // problems to get this working with MSVC 2013. Change this when
   // the support for MSVC 2013 is dropped.
-  LLVM_CONSTEXPR UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
+  constexpr UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {}
 #else
   UnicodeCharSet(CharRanges Ranges) : Ranges(Ranges) {
     assert(rangesAreValid());
diff --git a/contrib/llvm/include/llvm/Support/Wasm.h b/contrib/llvm/include/llvm/Support/Wasm.h
new file mode 100644
index 000000000000..8ac6b9038e91
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/Wasm.h
@@ -0,0 +1,87 @@
+//===- Wasm.h - Wasm object file format -------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines manifest constants for the wasm object file format.
+// See: https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_WASM_H
+#define LLVM_SUPPORT_WASM_H
+
+#include "llvm/ADT/ArrayRef.h"
+
+namespace llvm {
+namespace wasm {
+
+// Object file magic string.
+const char WasmMagic[] = {'\0', 'a', 's', 'm'};
+// Wasm binary format version
+const uint32_t WasmVersion = 0xd;
+
+struct WasmObjectHeader {
+  StringRef Magic;
+  uint32_t Version;
+};
+
+struct WasmSection {
+  uint32_t Type;             // Section type (See below)
+  uint32_t Offset;           // Offset with in the file
+  StringRef Name;            // Section name (User-defined sections only)
+  ArrayRef<uint8_t> Content; // Section content
+};
+
+enum : unsigned {
+  WASM_SEC_USER = 0,     // User-defined section
+  WASM_SEC_TYPE = 1,     // Function signature declarations
+  WASM_SEC_IMPORT = 2,   // Import declarations
+  WASM_SEC_FUNCTION = 3, // Function declarations
+  WASM_SEC_TABLE = 4,    // Indirect function table and other tables
+  WASM_SEC_MEMORY = 5,   // Memory attributes
+  WASM_SEC_GLOBAL = 6,   // Global declarations
+  WASM_SEC_EXPORT = 7,   // Exports
+  WASM_SEC_START = 8,    // Start function declaration
+  WASM_SEC_ELEM = 9,     // Elements section
+  WASM_SEC_CODE = 10,    // Function bodies (code)
+  WASM_SEC_DATA = 11     // Data segments
+};
+
+// Type immediate encodings used in various contexts.
+enum : unsigned {
+  WASM_TYPE_I32          = 0x7f,
+  WASM_TYPE_I64          = 0x7e,
+  WASM_TYPE_F32          = 0x7d,
+  WASM_TYPE_F64          = 0x7c,
+  WASM_TYPE_ANYFUNC      = 0x70,
+  WASM_TYPE_FUNC         = 0x60,
+  WASM_TYPE_NORESULT     = 0x40, // for blocks with no result values
+};
+
+// Kinds of externals (for imports and exports).
+enum : unsigned {
+  WASM_EXTERNAL_FUNCTION = 0x0,
+  WASM_EXTERNAL_TABLE    = 0x1,
+  WASM_EXTERNAL_MEMORY   = 0x2,
+  WASM_EXTERNAL_GLOBAL   = 0x3,
+};
+
+// Opcodes used in initializer expressions.
+enum : unsigned {
+  WASM_OPCODE_END        = 0x0b,
+  WASM_OPCODE_GET_GLOBAL = 0x23,
+  WASM_OPCODE_I32_CONST  = 0x41,
+  WASM_OPCODE_I64_CONST  = 0x42,
+  WASM_OPCODE_F32_CONST  = 0x43,
+  WASM_OPCODE_F64_CONST  = 0x44,
+};
+
+} // end namespace wasm
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/include/llvm/Support/YAMLParser.h b/contrib/llvm/include/llvm/Support/YAMLParser.h
index 23014fc10a3f..b9e3fa47752c 100644
--- a/contrib/llvm/include/llvm/Support/YAMLParser.h
+++ b/contrib/llvm/include/llvm/Support/YAMLParser.h
@@ -42,6 +42,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/SMLoc.h"
 #include <map>
+#include <system_error>
 #include <utility>
 
 namespace llvm {
@@ -75,9 +76,11 @@ std::string escape(StringRef Input);
 class Stream {
 public:
   /// \brief This keeps a reference to the string referenced by \p Input.
-  Stream(StringRef Input, SourceMgr &, bool ShowColors = true);
+  Stream(StringRef Input, SourceMgr &, bool ShowColors = true,
+         std::error_code *EC = nullptr);
 
-  Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true);
+  Stream(MemoryBufferRef InputBuffer, SourceMgr &, bool ShowColors = true,
+         std::error_code *EC = nullptr);
   ~Stream();
 
   document_iterator begin();
@@ -144,12 +147,12 @@ public:
   unsigned int getType() const { return TypeID; }
 
   void *operator new(size_t Size, BumpPtrAllocator &Alloc,
-                     size_t Alignment = 16) LLVM_NOEXCEPT {
+                     size_t Alignment = 16) noexcept {
     return Alloc.Allocate(Size, Alignment);
   }
 
   void operator delete(void *Ptr, BumpPtrAllocator &Alloc,
-                       size_t Size) LLVM_NOEXCEPT {
+                       size_t Size) noexcept {
     Alloc.Deallocate(Ptr, Size);
   }
 
@@ -157,7 +160,7 @@ protected:
   std::unique_ptr<Document> &Doc;
   SMRange SourceRange;
 
-  void operator delete(void *) LLVM_NOEXCEPT = delete;
+  void operator delete(void *) noexcept = delete;
 
   ~Node() = default;
 
diff --git a/contrib/llvm/include/llvm/Support/YAMLTraits.h b/contrib/llvm/include/llvm/Support/YAMLTraits.h
index bc3fa8ad11da..38acb36942bc 100644
--- a/contrib/llvm/include/llvm/Support/YAMLTraits.h
+++ b/contrib/llvm/include/llvm/Support/YAMLTraits.h
@@ -14,19 +14,30 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/Support/AlignOf.h"
+#include "llvm/Support/Allocator.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <new>
+#include <string>
 #include <system_error>
+#include <type_traits>
+#include <vector>
 
 namespace llvm {
 namespace yaml {
 
+struct EmptyContext {};
+
 /// This class should be specialized by any type that needs to be converted
 /// to/from a YAML mapping.  For example:
 ///
@@ -49,6 +60,28 @@ struct MappingTraits {
   // static const bool flow = true;
 };
 
+/// This class is similar to MappingTraits<T> but allows you to pass in
+/// additional context for each map operation.  For example:
+///
+///     struct MappingContextTraits<MyStruct, MyContext> {
+///       static void mapping(IO &io, MyStruct &s, MyContext &c) {
+///         io.mapRequired("name", s.name);
+///         io.mapRequired("size", s.size);
+///         io.mapOptional("age",  s.age);
+///         ++c.TimesMapped;
+///       }
+///     };
+template <class T, class Context> struct MappingContextTraits {
+  // Must provide:
+  // static void mapping(IO &io, T &fields, Context &Ctx);
+  // Optionally may provide:
+  // static StringRef validate(IO &io, T &fields, Context &Ctx);
+  //
+  // The optional flow flag will cause generated YAML to use a flow mapping
+  // (e.g. { a: 0, b: 1 }):
+  // static const bool flow = true;
+};
+
 /// This class should be specialized by any integral type that converts
 /// to/from a YAML scalar where there is a one-to-one mapping between
 /// in-memory values and a string in YAML.  For example:
@@ -114,7 +147,6 @@ struct ScalarTraits {
   //static bool mustQuote(StringRef);
 };
 
-
 /// This class should be specialized by type that requires custom conversion
 /// to/from a YAML literal block scalar. For example:
 ///
@@ -147,7 +179,7 @@ struct BlockScalarTraits {
 /// to/from a YAML sequence.  For example:
 ///
 ///    template<>
-///    struct SequenceTraits< std::vector<MyType> > {
+///    struct SequenceTraits< std::vector<MyType>> {
 ///      static size_t size(IO &io, std::vector<MyType> &seq) {
 ///        return seq.size();
 ///      }
@@ -177,10 +209,6 @@ struct DocumentListTraits {
   // static T::value_type& element(IO &io, T &seq, size_t index);
 };
 
-// Only used by compiler if both template types are the same
-template <typename T, T>
-struct SameType;
-
 // Only used for better diagnostics of missing traits
 template <typename T>
 struct MissingTrait;
@@ -199,7 +227,7 @@ struct has_ScalarEnumerationTraits
 
 public:
   static bool const value =
-    (sizeof(test<ScalarEnumerationTraits<T> >(nullptr)) == 1);
+    (sizeof(test<ScalarEnumerationTraits<T>>(nullptr)) == 1);
 };
 
 // Test if ScalarBitSetTraits<T> is defined on type T.
@@ -215,7 +243,7 @@ struct has_ScalarBitSetTraits
   static double test(...);
 
 public:
-  static bool const value = (sizeof(test<ScalarBitSetTraits<T> >(nullptr)) == 1);
+  static bool const value = (sizeof(test<ScalarBitSetTraits<T>>(nullptr)) == 1);
 };
 
 // Test if ScalarTraits<T> is defined on type T.
@@ -258,11 +286,9 @@ public:
       (sizeof(test<BlockScalarTraits<T>>(nullptr, nullptr)) == 1);
 };
 
-// Test if MappingTraits<T> is defined on type T.
-template <class T>
-struct has_MappingTraits
-{
-  typedef void (*Signature_mapping)(class IO&, T&);
+// Test if MappingContextTraits<T> is defined on type T.
+template <class T, class Context> struct has_MappingTraits {
+  typedef void (*Signature_mapping)(class IO &, T &, Context &);
 
   template <typename U>
   static char test(SameType<Signature_mapping, &U::mapping>*);
@@ -271,14 +297,26 @@ struct has_MappingTraits
   static double test(...);
 
 public:
-  static bool const value = (sizeof(test<MappingTraits<T> >(nullptr)) == 1);
+  static bool const value =
+      (sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
 };
 
-// Test if MappingTraits<T>::validate() is defined on type T.
-template <class T>
-struct has_MappingValidateTraits
-{
-  typedef StringRef (*Signature_validate)(class IO&, T&);
+// Test if MappingTraits<T> is defined on type T.
+template <class T> struct has_MappingTraits<T, EmptyContext> {
+  typedef void (*Signature_mapping)(class IO &, T &);
+
+  template <typename U>
+  static char test(SameType<Signature_mapping, &U::mapping> *);
+
+  template <typename U> static double test(...);
+
+public:
+  static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
+};
+
+// Test if MappingContextTraits<T>::validate() is defined on type T.
+template <class T, class Context> struct has_MappingValidateTraits {
+  typedef StringRef (*Signature_validate)(class IO &, T &, Context &);
 
   template <typename U>
   static char test(SameType<Signature_validate, &U::validate>*);
@@ -287,7 +325,21 @@ struct has_MappingValidateTraits
   static double test(...);
 
 public:
-  static bool const value = (sizeof(test<MappingTraits<T> >(nullptr)) == 1);
+  static bool const value =
+      (sizeof(test<MappingContextTraits<T, Context>>(nullptr)) == 1);
+};
+
+// Test if MappingTraits<T>::validate() is defined on type T.
+template <class T> struct has_MappingValidateTraits<T, EmptyContext> {
+  typedef StringRef (*Signature_validate)(class IO &, T &);
+
+  template <typename U>
+  static char test(SameType<Signature_validate, &U::validate> *);
+
+  template <typename U> static double test(...);
+
+public:
+  static bool const value = (sizeof(test<MappingTraits<T>>(nullptr)) == 1);
 };
 
 // Test if SequenceTraits<T> is defined on type T.
@@ -303,7 +355,7 @@ struct has_SequenceMethodTraits
   static double test(...);
 
 public:
-  static bool const value =  (sizeof(test<SequenceTraits<T> >(nullptr)) == 1);
+  static bool const value =  (sizeof(test<SequenceTraits<T>>(nullptr)) == 1);
 };
 
 // has_FlowTraits<int> will cause an error with some compilers because
@@ -353,7 +405,7 @@ struct has_DocumentListTraits
   static double test(...);
 
 public:
-  static bool const value = (sizeof(test<DocumentListTraits<T> >(nullptr))==1);
+  static bool const value = (sizeof(test<DocumentListTraits<T>>(nullptr))==1);
 };
 
 inline bool isNumber(StringRef S) {
@@ -432,29 +484,32 @@ inline bool needsQuotes(StringRef S) {
   return false;
 }
 
-template<typename T>
-struct missingTraits : public std::integral_constant<bool,
-                                         !has_ScalarEnumerationTraits<T>::value
-                                      && !has_ScalarBitSetTraits<T>::value
-                                      && !has_ScalarTraits<T>::value
-                                      && !has_BlockScalarTraits<T>::value
-                                      && !has_MappingTraits<T>::value
-                                      && !has_SequenceTraits<T>::value
-                                      && !has_DocumentListTraits<T>::value >  {};
-
-template<typename T>
-struct validatedMappingTraits : public std::integral_constant<bool,
-                                       has_MappingTraits<T>::value
-                                    && has_MappingValidateTraits<T>::value> {};
+template <typename T, typename Context>
+struct missingTraits
+    : public std::integral_constant<bool,
+                                    !has_ScalarEnumerationTraits<T>::value &&
+                                        !has_ScalarBitSetTraits<T>::value &&
+                                        !has_ScalarTraits<T>::value &&
+                                        !has_BlockScalarTraits<T>::value &&
+                                        !has_MappingTraits<T, Context>::value &&
+                                        !has_SequenceTraits<T>::value &&
+                                        !has_DocumentListTraits<T>::value> {};
+
+template <typename T, typename Context>
+struct validatedMappingTraits
+    : public std::integral_constant<
+          bool, has_MappingTraits<T, Context>::value &&
+                    has_MappingValidateTraits<T, Context>::value> {};
+
+template <typename T, typename Context>
+struct unvalidatedMappingTraits
+    : public std::integral_constant<
+          bool, has_MappingTraits<T, Context>::value &&
+                    !has_MappingValidateTraits<T, Context>::value> {};
 
-template<typename T>
-struct unvalidatedMappingTraits : public std::integral_constant<bool,
-                                        has_MappingTraits<T>::value
-                                    && !has_MappingValidateTraits<T>::value> {};
 // Base class for Input and Output.
 class IO {
 public:
-
   IO(void *Ctxt=nullptr);
   virtual ~IO();
 
@@ -512,9 +567,10 @@ public:
   template <typename FBT, typename T>
   void enumFallback(T &Val) {
     if (matchEnumFallback()) {
+      EmptyContext Context;
       // FIXME: Force integral conversion to allow strong typedefs to convert.
       FBT Res = static_cast<typename FBT::BaseType>(Val);
-      yamlize(*this, Res, true);
+      yamlize(*this, Res, true, Context);
       Val = static_cast<T>(static_cast<typename FBT::BaseType>(Res));
     }
   }
@@ -550,40 +606,58 @@ public:
   void *getContext();
   void setContext(void *);
 
-  template <typename T>
-  void mapRequired(const char* Key, T& Val) {
-    this->processKey(Key, Val, true);
+  template <typename T> void mapRequired(const char *Key, T &Val) {
+    EmptyContext Ctx;
+    this->processKey(Key, Val, true, Ctx);
+  }
+  template <typename T, typename Context>
+  void mapRequired(const char *Key, T &Val, Context &Ctx) {
+    this->processKey(Key, Val, true, Ctx);
+  }
+
+  template <typename T> void mapOptional(const char *Key, T &Val) {
+    EmptyContext Ctx;
+    mapOptionalWithContext(Key, Val, Ctx);
   }
 
   template <typename T>
-  typename std::enable_if<has_SequenceTraits<T>::value,void>::type
-  mapOptional(const char* Key, T& Val) {
+  void mapOptional(const char *Key, T &Val, const T &Default) {
+    EmptyContext Ctx;
+    mapOptionalWithContext(Key, Val, Default, Ctx);
+  }
+
+  template <typename T, typename Context>
+  typename std::enable_if<has_SequenceTraits<T>::value, void>::type
+  mapOptionalWithContext(const char *Key, T &Val, Context &Ctx) {
     // omit key/value instead of outputting empty sequence
-    if ( this->canElideEmptySequence() && !(Val.begin() != Val.end()) )
+    if (this->canElideEmptySequence() && !(Val.begin() != Val.end()))
       return;
-    this->processKey(Key, Val, false);
+    this->processKey(Key, Val, false, Ctx);
   }
 
-  template <typename T>
-  void mapOptional(const char* Key, Optional<T> &Val) {
-    processKeyWithDefault(Key, Val, Optional<T>(), /*Required=*/false);
+  template <typename T, typename Context>
+  void mapOptionalWithContext(const char *Key, Optional<T> &Val, Context &Ctx) {
+    this->processKeyWithDefault(Key, Val, Optional<T>(), /*Required=*/false,
+                                Ctx);
   }
 
-  template <typename T>
-  typename std::enable_if<!has_SequenceTraits<T>::value,void>::type
-  mapOptional(const char* Key, T& Val) {
-    this->processKey(Key, Val, false);
+  template <typename T, typename Context>
+  typename std::enable_if<!has_SequenceTraits<T>::value, void>::type
+  mapOptionalWithContext(const char *Key, T &Val, Context &Ctx) {
+    this->processKey(Key, Val, false, Ctx);
   }
 
-  template <typename T>
-  void mapOptional(const char* Key, T& Val, const T& Default) {
-    this->processKeyWithDefault(Key, Val, Default, false);
+  template <typename T, typename Context>
+  void mapOptionalWithContext(const char *Key, T &Val, const T &Default,
+                              Context &Ctx) {
+    this->processKeyWithDefault(Key, Val, Default, false, Ctx);
   }
 
 private:
-  template <typename T>
+  template <typename T, typename Context>
   void processKeyWithDefault(const char *Key, Optional<T> &Val,
-                             const Optional<T> &DefaultValue, bool Required) {
+                             const Optional<T> &DefaultValue, bool Required,
+                             Context &Ctx) {
     assert(DefaultValue.hasValue() == false &&
            "Optional<T> shouldn't have a value!");
     void *SaveInfo;
@@ -593,7 +667,7 @@ private:
       Val = T();
     if (this->preflightKey(Key, Required, sameAsDefault, UseDefault,
                            SaveInfo)) {
-      yamlize(*this, Val.getValue(), Required);
+      yamlize(*this, Val.getValue(), Required, Ctx);
       this->postflightKey(SaveInfo);
     } else {
       if (UseDefault)
@@ -601,15 +675,15 @@ private:
     }
   }
 
-  template <typename T>
-  void processKeyWithDefault(const char *Key, T &Val, const T& DefaultValue,
-                                                                bool Required) {
+  template <typename T, typename Context>
+  void processKeyWithDefault(const char *Key, T &Val, const T &DefaultValue,
+                             bool Required, Context &Ctx) {
     void *SaveInfo;
     bool UseDefault;
     const bool sameAsDefault = outputting() && Val == DefaultValue;
     if ( this->preflightKey(Key, Required, sameAsDefault, UseDefault,
                                                                   SaveInfo) ) {
-      yamlize(*this, Val, Required);
+      yamlize(*this, Val, Required, Ctx);
       this->postflightKey(SaveInfo);
     }
     else {
@@ -618,12 +692,12 @@ private:
     }
   }
 
-  template <typename T>
-  void processKey(const char *Key, T &Val, bool Required) {
+  template <typename T, typename Context>
+  void processKey(const char *Key, T &Val, bool Required, Context &Ctx) {
     void *SaveInfo;
     bool UseDefault;
     if ( this->preflightKey(Key, Required, false, UseDefault, SaveInfo) ) {
-      yamlize(*this, Val, Required);
+      yamlize(*this, Val, Required, Ctx);
       this->postflightKey(SaveInfo);
     }
   }
@@ -632,17 +706,30 @@ private:
   void  *Ctxt;
 };
 
-template<typename T>
-typename std::enable_if<has_ScalarEnumerationTraits<T>::value,void>::type
-yamlize(IO &io, T &Val, bool) {
+namespace detail {
+
+template <typename T, typename Context>
+void doMapping(IO &io, T &Val, Context &Ctx) {
+  MappingContextTraits<T, Context>::mapping(io, Val, Ctx);
+}
+
+template <typename T> void doMapping(IO &io, T &Val, EmptyContext &Ctx) {
+  MappingTraits<T>::mapping(io, Val);
+}
+
+} // end namespace detail
+
+template <typename T>
+typename std::enable_if<has_ScalarEnumerationTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   io.beginEnumScalar();
   ScalarEnumerationTraits<T>::enumeration(io, Val);
   io.endEnumScalar();
 }
 
-template<typename T>
-typename std::enable_if<has_ScalarBitSetTraits<T>::value,void>::type
-yamlize(IO &io, T &Val, bool) {
+template <typename T>
+typename std::enable_if<has_ScalarBitSetTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   bool DoClear;
   if ( io.beginBitSetScalar(DoClear) ) {
     if ( DoClear )
@@ -652,9 +739,9 @@ yamlize(IO &io, T &Val, bool) {
   }
 }
 
-template<typename T>
-typename std::enable_if<has_ScalarTraits<T>::value,void>::type
-yamlize(IO &io, T &Val, bool) {
+template <typename T>
+typename std::enable_if<has_ScalarTraits<T>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   if ( io.outputting() ) {
     std::string Storage;
     llvm::raw_string_ostream Buffer(Storage);
@@ -674,7 +761,7 @@ yamlize(IO &io, T &Val, bool) {
 
 template <typename T>
 typename std::enable_if<has_BlockScalarTraits<T>::value, void>::type
-yamlize(IO &YamlIO, T &Val, bool) {
+yamlize(IO &YamlIO, T &Val, bool, EmptyContext &Ctx) {
   if (YamlIO.outputting()) {
     std::string Storage;
     llvm::raw_string_ostream Buffer(Storage);
@@ -691,9 +778,9 @@ yamlize(IO &YamlIO, T &Val, bool) {
   }
 }
 
-template<typename T>
-typename std::enable_if<validatedMappingTraits<T>::value, void>::type
-yamlize(IO &io, T &Val, bool) {
+template <typename T, typename Context>
+typename std::enable_if<validatedMappingTraits<T, Context>::value, void>::type
+yamlize(IO &io, T &Val, bool, Context &Ctx) {
   if (has_FlowTraits<MappingTraits<T>>::value)
     io.beginFlowMapping();
   else
@@ -705,7 +792,7 @@ yamlize(IO &io, T &Val, bool) {
       assert(Err.empty() && "invalid struct trying to be written as yaml");
     }
   }
-  MappingTraits<T>::mapping(io, Val);
+  detail::doMapping(io, Val, Ctx);
   if (!io.outputting()) {
     StringRef Err = MappingTraits<T>::validate(io, Val);
     if (!Err.empty())
@@ -717,36 +804,36 @@ yamlize(IO &io, T &Val, bool) {
     io.endMapping();
 }
 
-template<typename T>
-typename std::enable_if<unvalidatedMappingTraits<T>::value, void>::type
-yamlize(IO &io, T &Val, bool) {
+template <typename T, typename Context>
+typename std::enable_if<unvalidatedMappingTraits<T, Context>::value, void>::type
+yamlize(IO &io, T &Val, bool, Context &Ctx) {
   if (has_FlowTraits<MappingTraits<T>>::value) {
     io.beginFlowMapping();
-    MappingTraits<T>::mapping(io, Val);
+    detail::doMapping(io, Val, Ctx);
     io.endFlowMapping();
   } else {
     io.beginMapping();
-    MappingTraits<T>::mapping(io, Val);
+    detail::doMapping(io, Val, Ctx);
     io.endMapping();
   }
 }
 
-template<typename T>
-typename std::enable_if<missingTraits<T>::value, void>::type
-yamlize(IO &io, T &Val, bool) {
+template <typename T>
+typename std::enable_if<missingTraits<T, EmptyContext>::value, void>::type
+yamlize(IO &io, T &Val, bool, EmptyContext &Ctx) {
   char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
 }
 
-template<typename T>
-typename std::enable_if<has_SequenceTraits<T>::value,void>::type
-yamlize(IO &io, T &Seq, bool) {
+template <typename T, typename Context>
+typename std::enable_if<has_SequenceTraits<T>::value, void>::type
+yamlize(IO &io, T &Seq, bool, Context &Ctx) {
   if ( has_FlowTraits< SequenceTraits<T> >::value ) {
     unsigned incnt = io.beginFlowSequence();
     unsigned count = io.outputting() ? SequenceTraits<T>::size(io, Seq) : incnt;
     for(unsigned i=0; i < count; ++i) {
       void *SaveInfo;
       if ( io.preflightFlowElement(i, SaveInfo) ) {
-        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true);
+        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true, Ctx);
         io.postflightFlowElement(SaveInfo);
       }
     }
@@ -758,7 +845,7 @@ yamlize(IO &io, T &Seq, bool) {
     for(unsigned i=0; i < count; ++i) {
       void *SaveInfo;
       if ( io.preflightElement(i, SaveInfo) ) {
-        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true);
+        yamlize(io, SequenceTraits<T>::element(io, Seq, i), true, Ctx);
         io.postflightElement(SaveInfo);
       }
     }
@@ -871,6 +958,7 @@ struct ScalarTraits<support::detail::packed_endian_specific_integral<
                      llvm::raw_ostream &Stream) {
     ScalarTraits<value_type>::output(static_cast<value_type>(E), Ctx, Stream);
   }
+
   static StringRef input(StringRef Str, void *Ctx, endian_type &E) {
     value_type V;
     auto R = ScalarTraits<value_type>::input(Str, Ctx, V);
@@ -1010,9 +1098,11 @@ private:
 
   class HNode {
     virtual void anchor();
+
   public:
     HNode(Node *n) : _node(n) { }
-    virtual ~HNode() { }
+    virtual ~HNode() = default;
+
     static inline bool classof(const HNode *) { return true; }
 
     Node *_node;
@@ -1020,16 +1110,20 @@ private:
 
   class EmptyHNode : public HNode {
     void anchor() override;
+
   public:
     EmptyHNode(Node *n) : HNode(n) { }
+
     static inline bool classof(const HNode *n) {
       return NullNode::classof(n->_node);
     }
+
     static inline bool classof(const EmptyHNode *) { return true; }
   };
 
   class ScalarHNode : public HNode {
     void anchor() override;
+
   public:
     ScalarHNode(Node *n, StringRef s) : HNode(n), _value(s) { }
 
@@ -1039,7 +1133,9 @@ private:
       return ScalarNode::classof(n->_node) ||
              BlockScalarNode::classof(n->_node);
     }
+
     static inline bool classof(const ScalarHNode *) { return true; }
+
   protected:
     StringRef _value;
   };
@@ -1053,6 +1149,7 @@ private:
     static inline bool classof(const HNode *n) {
       return MappingNode::classof(n->_node);
     }
+
     static inline bool classof(const MapHNode *) { return true; }
 
     typedef llvm::StringMap<std::unique_ptr<HNode>> NameToNode;
@@ -1072,6 +1169,7 @@ private:
     static inline bool classof(const HNode *n) {
       return SequenceNode::classof(n->_node);
     }
+
     static inline bool classof(const SequenceHNode *) { return true; }
 
     std::vector<std::unique_ptr<HNode>> Entries;
@@ -1138,7 +1236,7 @@ public:
   void blockScalarString(StringRef &) override;
   void setError(const Twine &message) override;
   bool canElideEmptySequence() override;
-public:
+
   // These are only used by operator<<. They could be private
   // if that templated operator could be made a friend.
   void beginDocuments();
@@ -1185,10 +1283,10 @@ private:
 /// Based on BOOST_STRONG_TYPEDEF
 #define LLVM_YAML_STRONG_TYPEDEF(_base, _type)                                 \
     struct _type {                                                             \
-        _type() { }                                                            \
-        _type(const _base v) : value(v) { }                                    \
-        _type(const _type &v) : value(v.value) {}                              \
-        _type &operator=(const _type &rhs) { value = rhs.value; return *this; }\
+        _type() = default;                                                     \
+        _type(const _base v) : value(v) {}                                     \
+        _type(const _type &v) = default;                                       \
+        _type &operator=(const _type &rhs) = default;                          \
         _type &operator=(const _base &rhs) { value = rhs; return *this; }      \
         operator const _base & () const { return value; }                      \
         bool operator==(const _type &rhs) const { return value == rhs.value; } \
@@ -1241,8 +1339,9 @@ inline
 typename std::enable_if<has_DocumentListTraits<T>::value, Input &>::type
 operator>>(Input &yin, T &docList) {
   int i = 0;
+  EmptyContext Ctx;
   while ( yin.setCurrentDocument() ) {
-    yamlize(yin, DocumentListTraits<T>::element(yin, docList, i), true);
+    yamlize(yin, DocumentListTraits<T>::element(yin, docList, i), true, Ctx);
     if ( yin.error() )
       return yin;
     yin.nextDocument();
@@ -1253,11 +1352,12 @@ operator>>(Input &yin, T &docList) {
 
 // Define non-member operator>> so that Input can stream in a map as a document.
 template <typename T>
-inline
-typename std::enable_if<has_MappingTraits<T>::value, Input &>::type
+inline typename std::enable_if<has_MappingTraits<T, EmptyContext>::value,
+                               Input &>::type
 operator>>(Input &yin, T &docMap) {
+  EmptyContext Ctx;
   yin.setCurrentDocument();
-  yamlize(yin, docMap, true);
+  yamlize(yin, docMap, true, Ctx);
   return yin;
 }
 
@@ -1267,8 +1367,9 @@ template <typename T>
 inline
 typename std::enable_if<has_SequenceTraits<T>::value, Input &>::type
 operator>>(Input &yin, T &docSeq) {
+  EmptyContext Ctx;
   if (yin.setCurrentDocument())
-    yamlize(yin, docSeq, true);
+    yamlize(yin, docSeq, true, Ctx);
   return yin;
 }
 
@@ -1277,15 +1378,16 @@ template <typename T>
 inline
 typename std::enable_if<has_BlockScalarTraits<T>::value, Input &>::type
 operator>>(Input &In, T &Val) {
+  EmptyContext Ctx;
   if (In.setCurrentDocument())
-    yamlize(In, Val, true);
+    yamlize(In, Val, true, Ctx);
   return In;
 }
 
 // Provide better error message about types missing a trait specialization
 template <typename T>
-inline
-typename std::enable_if<missingTraits<T>::value, Input &>::type
+inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
+                               Input &>::type
 operator>>(Input &yin, T &docSeq) {
   char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
   return yin;
@@ -1296,11 +1398,13 @@ template <typename T>
 inline
 typename std::enable_if<has_DocumentListTraits<T>::value, Output &>::type
 operator<<(Output &yout, T &docList) {
+  EmptyContext Ctx;
   yout.beginDocuments();
   const size_t count = DocumentListTraits<T>::size(yout, docList);
   for(size_t i=0; i < count; ++i) {
     if ( yout.preflightDocument(i) ) {
-      yamlize(yout, DocumentListTraits<T>::element(yout, docList, i), true);
+      yamlize(yout, DocumentListTraits<T>::element(yout, docList, i), true,
+              Ctx);
       yout.postflightDocument();
     }
   }
@@ -1310,12 +1414,13 @@ operator<<(Output &yout, T &docList) {
 
 // Define non-member operator<< so that Output can stream out a map.
 template <typename T>
-inline
-typename std::enable_if<has_MappingTraits<T>::value, Output &>::type
+inline typename std::enable_if<has_MappingTraits<T, EmptyContext>::value,
+                               Output &>::type
 operator<<(Output &yout, T &map) {
+  EmptyContext Ctx;
   yout.beginDocuments();
   if ( yout.preflightDocument(0) ) {
-    yamlize(yout, map, true);
+    yamlize(yout, map, true, Ctx);
     yout.postflightDocument();
   }
   yout.endDocuments();
@@ -1327,9 +1432,10 @@ template <typename T>
 inline
 typename std::enable_if<has_SequenceTraits<T>::value, Output &>::type
 operator<<(Output &yout, T &seq) {
+  EmptyContext Ctx;
   yout.beginDocuments();
   if ( yout.preflightDocument(0) ) {
-    yamlize(yout, seq, true);
+    yamlize(yout, seq, true, Ctx);
     yout.postflightDocument();
   }
   yout.endDocuments();
@@ -1341,9 +1447,10 @@ template <typename T>
 inline
 typename std::enable_if<has_BlockScalarTraits<T>::value, Output &>::type
 operator<<(Output &Out, T &Val) {
+  EmptyContext Ctx;
   Out.beginDocuments();
   if (Out.preflightDocument(0)) {
-    yamlize(Out, Val, true);
+    yamlize(Out, Val, true, Ctx);
     Out.postflightDocument();
   }
   Out.endDocuments();
@@ -1352,73 +1459,75 @@ operator<<(Output &Out, T &Val) {
 
 // Provide better error message about types missing a trait specialization
 template <typename T>
-inline
-typename std::enable_if<missingTraits<T>::value, Output &>::type
+inline typename std::enable_if<missingTraits<T, EmptyContext>::value,
+                               Output &>::type
 operator<<(Output &yout, T &seq) {
   char missing_yaml_trait_for_type[sizeof(MissingTrait<T>)];
   return yout;
 }
 
-} // namespace yaml
-} // namespace llvm
+template <typename T> struct SequenceTraitsImpl {
+  typedef typename T::value_type _type;
+  static size_t size(IO &io, T &seq) { return seq.size(); }
+  static _type &element(IO &io, T &seq, size_t index) {
+    if (index >= seq.size())
+      seq.resize(index + 1);
+    return seq[index];
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
 
 /// Utility for declaring that a std::vector of a particular type
 /// should be considered a YAML sequence.
-#define LLVM_YAML_IS_SEQUENCE_VECTOR(_type)                                 \
-  namespace llvm {                                                          \
-  namespace yaml {                                                          \
-    template<>                                                              \
-    struct SequenceTraits< std::vector<_type> > {                           \
-      static size_t size(IO &io, std::vector<_type> &seq) {                 \
-        return seq.size();                                                  \
-      }                                                                     \
-      static _type& element(IO &io, std::vector<_type> &seq, size_t index) {\
-        if ( index >= seq.size() )                                          \
-          seq.resize(index+1);                                              \
-        return seq[index];                                                  \
-      }                                                                     \
-    };                                                                      \
-  }                                                                         \
+#define LLVM_YAML_IS_SEQUENCE_VECTOR(_type)                                    \
+  namespace llvm {                                                             \
+  namespace yaml {                                                             \
+  template <>                                                                  \
+  struct SequenceTraits<std::vector<_type>>                                    \
+      : public SequenceTraitsImpl<std::vector<_type>> {};                      \
+  template <unsigned N>                                                        \
+  struct SequenceTraits<SmallVector<_type, N>>                                 \
+      : public SequenceTraitsImpl<SmallVector<_type, N>> {};                   \
+  }                                                                            \
   }
 
 /// Utility for declaring that a std::vector of a particular type
 /// should be considered a YAML flow sequence.
-#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(_type)                            \
-  namespace llvm {                                                          \
-  namespace yaml {                                                          \
-    template<>                                                              \
-    struct SequenceTraits< std::vector<_type> > {                           \
-      static size_t size(IO &io, std::vector<_type> &seq) {                 \
-        return seq.size();                                                  \
-      }                                                                     \
-      static _type& element(IO &io, std::vector<_type> &seq, size_t index) {\
-        (void)flow; /* Remove this workaround after PR17897 is fixed */     \
-        if ( index >= seq.size() )                                          \
-          seq.resize(index+1);                                              \
-        return seq[index];                                                  \
-      }                                                                     \
-      static const bool flow = true;                                        \
-    };                                                                      \
-  }                                                                         \
+/// We need to do a partial specialization on the vector version, not a full.
+/// If this is a full specialization, the compiler is a bit too "smart" and
+/// decides to warn on -Wunused-const-variable.  This workaround can be
+/// removed and we can do a full specialization on std::vector<T> once
+/// PR28878 is fixed.
+#define LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(_type)                               \
+  namespace llvm {                                                             \
+  namespace yaml {                                                             \
+  template <unsigned N>                                                        \
+  struct SequenceTraits<SmallVector<_type, N>>                                 \
+      : public SequenceTraitsImpl<SmallVector<_type, N>> {                     \
+    static const bool flow = true;                                             \
+  };                                                                           \
+  template <typename Allocator>                                                \
+  struct SequenceTraits<std::vector<_type, Allocator>>                         \
+      : public SequenceTraitsImpl<std::vector<_type, Allocator>> {             \
+    static const bool flow = true;                                             \
+  };                                                                           \
+  }                                                                            \
   }
 
 /// Utility for declaring that a std::vector of a particular type
 /// should be considered a YAML document list.
-#define LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(_type)                            \
-  namespace llvm {                                                          \
-  namespace yaml {                                                          \
-    template<>                                                              \
-    struct DocumentListTraits< std::vector<_type> > {                       \
-      static size_t size(IO &io, std::vector<_type> &seq) {                 \
-        return seq.size();                                                  \
-      }                                                                     \
-      static _type& element(IO &io, std::vector<_type> &seq, size_t index) {\
-        if ( index >= seq.size() )                                          \
-          seq.resize(index+1);                                              \
-        return seq[index];                                                  \
-      }                                                                     \
-    };                                                                      \
-  }                                                                         \
+#define LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(_type)                               \
+  namespace llvm {                                                             \
+  namespace yaml {                                                             \
+  template <unsigned N>                                                        \
+  struct DocumentListTraits<SmallVector<_type, N>>                             \
+      : public SequenceTraitsImpl<SmallVector<_type, N>> {};                   \
+  template <>                                                                  \
+  struct DocumentListTraits<std::vector<_type>>                                \
+      : public SequenceTraitsImpl<std::vector<_type>> {};                      \
+  }                                                                            \
   }
 
 #endif // LLVM_SUPPORT_YAMLTRAITS_H
diff --git a/contrib/llvm/include/llvm/Support/raw_ostream.h b/contrib/llvm/include/llvm/Support/raw_ostream.h
index d1e96f892a4b..e644a5bda5ef 100644
--- a/contrib/llvm/include/llvm/Support/raw_ostream.h
+++ b/contrib/llvm/include/llvm/Support/raw_ostream.h
@@ -16,20 +16,26 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <string>
 #include <system_error>
 
 namespace llvm {
+
+class formatv_object_base;
 class format_object_base;
 class FormattedString;
 class FormattedNumber;
-template <typename T> class SmallVectorImpl;
+class FormattedBytes;
 
 namespace sys {
 namespace fs {
 enum OpenFlags : unsigned;
-}
-}
+} // end namespace fs
+} // end namespace sys
 
 /// This class implements an extremely fast bulk output stream that can *only*
 /// output to a stream.  It does not support seeking, reopening, rewinding, line
@@ -37,9 +43,6 @@ enum OpenFlags : unsigned;
 /// a chunk at a time.
 class raw_ostream {
 private:
-  void operator=(const raw_ostream &) = delete;
-  raw_ostream(const raw_ostream &) = delete;
-
   /// The buffer is handled in such a way that the buffer is
   /// uninitialized, unbuffered, or out of space when OutBufCur >=
   /// OutBufEnd. Thus a single comparison suffices to determine if we
@@ -69,7 +72,7 @@ private:
 public:
   // color order matches ANSI escape sequence, don't change
   enum Colors {
-    BLACK=0,
+    BLACK = 0,
     RED,
     GREEN,
     YELLOW,
@@ -86,6 +89,9 @@ public:
     OutBufStart = OutBufEnd = OutBufCur = nullptr;
   }
 
+  raw_ostream(const raw_ostream &) = delete;
+  void operator=(const raw_ostream &) = delete;
+
   virtual ~raw_ostream();
 
   /// tell - Return the current offset with the file.
@@ -184,7 +190,7 @@ public:
     return write(Str.data(), Str.length());
   }
 
-  raw_ostream &operator<<(const llvm::SmallVectorImpl<char> &Str) {
+  raw_ostream &operator<<(const SmallVectorImpl<char> &Str) {
     return write(Str.data(), Str.size());
   }
 
@@ -193,6 +199,7 @@ public:
   raw_ostream &operator<<(unsigned long long N);
   raw_ostream &operator<<(long long N);
   raw_ostream &operator<<(const void *P);
+
   raw_ostream &operator<<(unsigned int N) {
     return this->operator<<(static_cast<unsigned long>(N));
   }
@@ -222,6 +229,12 @@ public:
   // Formatted output, see the formatHex() function in Support/Format.h.
   raw_ostream &operator<<(const FormattedNumber &);
 
+  // Formatted output, see the formatv() function in Support/FormatVariadic.h.
+  raw_ostream &operator<<(const formatv_object_base &);
+
+  // Formatted output, see the format_bytes() function in Support/Format.h.
+  raw_ostream &operator<<(const FormattedBytes &);
+
   /// indent - Insert 'NumSpaces' spaces.
   raw_ostream &indent(unsigned NumSpaces);
 
@@ -493,7 +506,8 @@ public:
   explicit raw_svector_ostream(SmallVectorImpl<char> &O) : OS(O) {
     SetUnbuffered();
   }
-  ~raw_svector_ostream() override {}
+
+  ~raw_svector_ostream() override = default;
 
   void flush() = delete;
 
@@ -512,7 +526,7 @@ class raw_null_ostream : public raw_pwrite_stream {
   uint64_t current_pos() const override;
 
 public:
-  explicit raw_null_ostream() {}
+  explicit raw_null_ostream() = default;
   ~raw_null_ostream() override;
 };
 
@@ -525,6 +539,6 @@ public:
   ~buffer_ostream() override { OS << str(); }
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
 #endif // LLVM_SUPPORT_RAW_OSTREAM_H
diff --git a/contrib/llvm/include/llvm/Support/xxhash.h b/contrib/llvm/include/llvm/Support/xxhash.h
new file mode 100644
index 000000000000..f7ca460188a2
--- /dev/null
+++ b/contrib/llvm/include/llvm/Support/xxhash.h
@@ -0,0 +1,47 @@
+/*
+   xxHash - Extremely Fast Hash algorithm
+   Header File
+   Copyright (C) 2012-2016, Yann Collet.
+
+   BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are
+   met:
+
+       * Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+       * Redistributions in binary form must reproduce the above
+   copyright notice, this list of conditions and the following disclaimer
+   in the documentation and/or other materials provided with the
+   distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+   OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+   You can contact the author at :
+   - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* based on revision d2df04efcbef7d7f6886d345861e5dfda4edacc1 Removed
+ * everything but a simple interface for computing XXh64. */
+
+#ifndef LLVM_SUPPORT_XXHASH_H
+#define LLVM_SUPPORT_XXHASH_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+uint64_t xxHash64(llvm::StringRef Data);
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/TableGen/Record.h b/contrib/llvm/include/llvm/TableGen/Record.h
index 393cafa7924a..5a100f0cba76 100644
--- a/contrib/llvm/include/llvm/TableGen/Record.h
+++ b/contrib/llvm/include/llvm/TableGen/Record.h
@@ -18,21 +18,31 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TrailingObjects.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 #include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
 
 namespace llvm {
 
 class ListRecTy;
-struct MultiClass;
 class Record;
-class RecordVal;
 class RecordKeeper;
+class RecordVal;
+class StringInit;
+struct MultiClass;
 
 //===----------------------------------------------------------------------===//
 //  Type Classes
@@ -54,13 +64,13 @@ public:
 
 private:
   RecTyKind Kind;
-  std::unique_ptr<ListRecTy> ListTy;
+  ListRecTy *ListTy = nullptr;
 
 public:
-  RecTyKind getRecTyKind() const { return Kind; }
-
   RecTy(RecTyKind K) : Kind(K) {}
-  virtual ~RecTy() {}
+  virtual ~RecTy() = default;
+
+  RecTyKind getRecTyKind() const { return Kind; }
 
   virtual std::string getAsString() const = 0;
   void print(raw_ostream &OS) const { OS << getAsString(); }
@@ -83,6 +93,7 @@ inline raw_ostream &operator<<(raw_ostream &OS, const RecTy &Ty) {
 ///
 class BitRecTy : public RecTy {
   static BitRecTy Shared;
+
   BitRecTy() : RecTy(BitRecTyKind) {}
 
 public:
@@ -101,6 +112,7 @@ public:
 ///
 class BitsRecTy : public RecTy {
   unsigned Size;
+
   explicit BitsRecTy(unsigned Sz) : RecTy(BitsRecTyKind), Size(Sz) {}
 
 public:
@@ -121,6 +133,7 @@ public:
 ///
 class CodeRecTy : public RecTy {
   static CodeRecTy Shared;
+
   CodeRecTy() : RecTy(CodeRecTyKind) {}
 
 public:
@@ -137,6 +150,7 @@ public:
 ///
 class IntRecTy : public RecTy {
   static IntRecTy Shared;
+
   IntRecTy() : RecTy(IntRecTyKind) {}
 
 public:
@@ -155,6 +169,7 @@ public:
 ///
 class StringRecTy : public RecTy {
   static StringRecTy Shared;
+
   StringRecTy() : RecTy(StringRecTyKind) {}
 
 public:
@@ -173,7 +188,9 @@ public:
 ///
 class ListRecTy : public RecTy {
   RecTy *Ty;
+
   explicit ListRecTy(RecTy *T) : RecTy(ListRecTyKind), Ty(T) {}
+
   friend ListRecTy *RecTy::getListTy();
 
 public:
@@ -193,6 +210,7 @@ public:
 ///
 class DagRecTy : public RecTy {
   static DagRecTy Shared;
+
   DagRecTy() : RecTy(DagRecTyKind) {}
 
 public:
@@ -210,7 +228,9 @@ public:
 ///
 class RecordRecTy : public RecTy {
   Record *Rec;
+
   explicit RecordRecTy(Record *R) : RecTy(RecordRecTyKind), Rec(R) {}
+
   friend class Record;
 
 public:
@@ -276,11 +296,11 @@ protected:
 
 private:
   const InitKind Kind;
+
 protected:
   uint8_t Opc; // Used by UnOpInit, BinOpInit, and TernOpInit
+
 private:
-  Init(const Init &) = delete;
-  Init &operator=(const Init &) = delete;
   virtual void anchor();
 
 public:
@@ -290,7 +310,9 @@ protected:
   explicit Init(InitKind K, uint8_t Opc = 0) : Kind(K), Opc(Opc) {}
 
 public:
-  virtual ~Init() {}
+  Init(const Init &) = delete;
+  Init &operator=(const Init &) = delete;
+  virtual ~Init() = default;
 
   /// This virtual method should be overridden by values that may
   /// not be completely specified yet.
@@ -320,8 +342,7 @@ public:
   /// out, returning them as a new init of bits type.  If it is not legal to use
   /// the bit subscript operator on this initializer, return null.
   ///
-  virtual Init *
-  convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+  virtual Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
     return nullptr;
   }
 
@@ -330,8 +351,7 @@ public:
   /// elements, returning them as a new init of list type.  If it is not legal
   /// to take a slice of this, return null.
   ///
-  virtual Init *
-  convertInitListSlice(const std::vector<unsigned> &Elements) const {
+  virtual Init *convertInitListSlice(ArrayRef<unsigned> Elements) const {
     return nullptr;
   }
 
@@ -339,7 +359,7 @@ public:
   /// Implementors of this method should return the type of the named field if
   /// they are of record type.
   ///
-  virtual RecTy *getFieldType(const std::string &FieldName) const {
+  virtual RecTy *getFieldType(StringInit *FieldName) const {
     return nullptr;
   }
 
@@ -348,7 +368,7 @@ public:
   /// this method should return non-null, otherwise it returns null.
   ///
   virtual Init *getFieldInit(Record &R, const RecordVal *RV,
-                             const std::string &FieldName) const {
+                             StringInit *FieldName) const {
     return nullptr;
   }
 
@@ -384,37 +404,31 @@ inline raw_ostream &operator<<(raw_ostream &OS, const Init &I) {
 class TypedInit : public Init {
   RecTy *Ty;
 
-  TypedInit(const TypedInit &Other) = delete;
-  TypedInit &operator=(const TypedInit &Other) = delete;
-
 protected:
   explicit TypedInit(InitKind K, RecTy *T, uint8_t Opc = 0)
     : Init(K, Opc), Ty(T) {}
-  ~TypedInit() override {
-    // If this is a DefInit we need to delete the RecordRecTy.
-    if (getKind() == IK_DefInit)
-      delete Ty;
-  }
 
 public:
+  TypedInit(const TypedInit &Other) = delete;
+  TypedInit &operator=(const TypedInit &Other) = delete;
+
   static bool classof(const Init *I) {
     return I->getKind() >= IK_FirstTypedInit &&
            I->getKind() <= IK_LastTypedInit;
   }
+
   RecTy *getType() const { return Ty; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
 
-  Init *
-  convertInitializerBitRange(const std::vector<unsigned> &Bits) const override;
-  Init *
-  convertInitListSlice(const std::vector<unsigned> &Elements) const override;
+  Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const override;
+  Init *convertInitListSlice(ArrayRef<unsigned> Elements) const override;
 
   /// This method is used to implement the FieldInit class.
   /// Implementors of this method should return the type of the named field if
   /// they are of record type.
   ///
-  RecTy *getFieldType(const std::string &FieldName) const override;
+  RecTy *getFieldType(StringInit *FieldName) const override;
 
   /// This method is used to implement
   /// VarListElementInit::resolveReferences.  If the list element is resolvable
@@ -427,13 +441,15 @@ public:
 ///
 class UnsetInit : public Init {
   UnsetInit() : Init(IK_UnsetInit) {}
+
+public:
   UnsetInit(const UnsetInit &) = delete;
   UnsetInit &operator=(const UnsetInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_UnsetInit;
   }
+
   static UnsetInit *get();
 
   Init *convertInitializerTo(RecTy *Ty) const override;
@@ -452,13 +468,15 @@ class BitInit : public Init {
   bool Value;
 
   explicit BitInit(bool V) : Init(IK_BitInit), Value(V) {}
+
+public:
   BitInit(const BitInit &Other) = delete;
   BitInit &operator=(BitInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_BitInit;
   }
+
   static BitInit *get(bool V);
 
   bool getValue() const { return Value; }
@@ -483,16 +501,17 @@ class BitsInit final : public TypedInit, public FoldingSetNode,
   BitsInit(unsigned N)
     : TypedInit(IK_BitsInit, BitsRecTy::get(N)), NumBits(N) {}
 
+public:
   BitsInit(const BitsInit &Other) = delete;
   BitsInit &operator=(const BitsInit &Other) = delete;
 
-public:
   // Do not use sized deallocation due to trailing objects.
   void operator delete(void *p) { ::operator delete(p); }
 
   static bool classof(const Init *I) {
     return I->getKind() == IK_BitsInit;
   }
+
   static BitsInit *get(ArrayRef<Init *> Range);
 
   void Profile(FoldingSetNodeID &ID) const;
@@ -500,19 +519,20 @@ public:
   unsigned getNumBits() const { return NumBits; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
-  Init *
-  convertInitializerBitRange(const std::vector<unsigned> &Bits) const override;
+  Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const override;
 
   bool isComplete() const override {
     for (unsigned i = 0; i != getNumBits(); ++i)
       if (!getBit(i)->isComplete()) return false;
     return true;
   }
+
   bool allInComplete() const {
     for (unsigned i = 0; i != getNumBits(); ++i)
       if (getBit(i)->isComplete()) return false;
     return true;
   }
+
   std::string getAsString() const override;
 
   /// This method is used to implement
@@ -539,20 +559,20 @@ class IntInit : public TypedInit {
   explicit IntInit(int64_t V)
     : TypedInit(IK_IntInit, IntRecTy::get()), Value(V) {}
 
+public:
   IntInit(const IntInit &Other) = delete;
   IntInit &operator=(const IntInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_IntInit;
   }
+
   static IntInit *get(int64_t V);
 
   int64_t getValue() const { return Value; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
-  Init *
-  convertInitializerBitRange(const std::vector<unsigned> &Bits) const override;
+  Init *convertInitializerBitRange(ArrayRef<unsigned> Bits) const override;
 
   std::string getAsString() const override;
 
@@ -572,25 +592,26 @@ public:
 /// "foo" - Represent an initialization by a string value.
 ///
 class StringInit : public TypedInit {
-  std::string Value;
+  StringRef Value;
 
   explicit StringInit(StringRef V)
       : TypedInit(IK_StringInit, StringRecTy::get()), Value(V) {}
 
+public:
   StringInit(const StringInit &Other) = delete;
   StringInit &operator=(const StringInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_StringInit;
   }
+
   static StringInit *get(StringRef);
 
-  const std::string &getValue() const { return Value; }
+  StringRef getValue() const { return Value; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
 
-  std::string getAsString() const override { return "\"" + Value + "\""; }
+  std::string getAsString() const override { return "\"" + Value.str() + "\""; }
 
   std::string getAsUnquotedString() const override { return Value; }
 
@@ -608,27 +629,28 @@ public:
 };
 
 class CodeInit : public TypedInit {
-  std::string Value;
+  StringRef Value;
 
   explicit CodeInit(StringRef V)
       : TypedInit(IK_CodeInit, static_cast<RecTy *>(CodeRecTy::get())),
         Value(V) {}
 
+public:
   CodeInit(const StringInit &Other) = delete;
   CodeInit &operator=(const StringInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_CodeInit;
   }
+
   static CodeInit *get(StringRef);
 
-  const std::string &getValue() const { return Value; }
+  StringRef getValue() const { return Value; }
 
   Init *convertInitializerTo(RecTy *Ty) const override;
 
   std::string getAsString() const override {
-    return "[{" + Value + "}]";
+    return "[{" + Value.str() + "}]";
   }
 
   std::string getAsUnquotedString() const override { return Value; }
@@ -659,10 +681,10 @@ private:
   explicit ListInit(unsigned N, RecTy *EltTy)
     : TypedInit(IK_ListInit, ListRecTy::get(EltTy)), NumValues(N) {}
 
+public:
   ListInit(const ListInit &Other) = delete;
   ListInit &operator=(const ListInit &Other) = delete;
 
-public:
   // Do not use sized deallocation due to trailing objects.
   void operator delete(void *p) { ::operator delete(p); }
 
@@ -680,8 +702,7 @@ public:
 
   Record *getElementAsRecord(unsigned i) const;
 
-  Init *
-    convertInitListSlice(const std::vector<unsigned> &Elements) const override;
+  Init *convertInitListSlice(ArrayRef<unsigned> Elements) const override;
 
   Init *convertInitializerTo(RecTy *Ty) const override;
 
@@ -718,20 +739,21 @@ public:
 /// Base class for operators
 ///
 class OpInit : public TypedInit {
-  OpInit(const OpInit &Other) = delete;
-  OpInit &operator=(OpInit &Other) = delete;
-
 protected:
   explicit OpInit(InitKind K, RecTy *Type, uint8_t Opc)
     : TypedInit(K, Type, Opc) {}
 
 public:
+  OpInit(const OpInit &Other) = delete;
+  OpInit &operator=(OpInit &Other) = delete;
+
   static bool classof(const Init *I) {
     return I->getKind() >= IK_FirstOpInit &&
            I->getKind() <= IK_LastOpInit;
   }
+
   // Clone - Clone this operator, replacing arguments with the new list
-  virtual OpInit *clone(std::vector<Init *> &Operands) const = 0;
+  virtual OpInit *clone(ArrayRef<Init *> Operands) const = 0;
 
   virtual unsigned getNumOperands() const = 0;
   virtual Init *getOperand(unsigned i) const = 0;
@@ -758,25 +780,27 @@ private:
   UnOpInit(UnaryOp opc, Init *lhs, RecTy *Type)
     : OpInit(IK_UnOpInit, Type, opc), LHS(lhs) {}
 
+public:
   UnOpInit(const UnOpInit &Other) = delete;
   UnOpInit &operator=(const UnOpInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_UnOpInit;
   }
+
   static UnOpInit *get(UnaryOp opc, Init *lhs, RecTy *Type);
 
   void Profile(FoldingSetNodeID &ID) const;
 
   // Clone - Clone this operator, replacing arguments with the new list
-  OpInit *clone(std::vector<Init *> &Operands) const override {
+  OpInit *clone(ArrayRef<Init *> Operands) const override {
     assert(Operands.size() == 1 &&
            "Wrong number of operands for unary operation");
     return UnOpInit::get(getOpcode(), *Operands.begin(), getType());
   }
 
   unsigned getNumOperands() const override { return 1; }
+
   Init *getOperand(unsigned i) const override {
     assert(i == 0 && "Invalid operand id for unary operator");
     return getOperand();
@@ -798,7 +822,7 @@ public:
 ///
 class BinOpInit : public OpInit, public FoldingSetNode {
 public:
-  enum BinaryOp : uint8_t { ADD, AND, SHL, SRA, SRL, LISTCONCAT,
+  enum BinaryOp : uint8_t { ADD, AND, OR, SHL, SRA, SRL, LISTCONCAT,
                             STRCONCAT, CONCAT, EQ };
 
 private:
@@ -807,20 +831,21 @@ private:
   BinOpInit(BinaryOp opc, Init *lhs, Init *rhs, RecTy *Type) :
       OpInit(IK_BinOpInit, Type, opc), LHS(lhs), RHS(rhs) {}
 
+public:
   BinOpInit(const BinOpInit &Other) = delete;
   BinOpInit &operator=(const BinOpInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_BinOpInit;
   }
+
   static BinOpInit *get(BinaryOp opc, Init *lhs, Init *rhs,
                         RecTy *Type);
 
   void Profile(FoldingSetNodeID &ID) const;
 
   // Clone - Clone this operator, replacing arguments with the new list
-  OpInit *clone(std::vector<Init *> &Operands) const override {
+  OpInit *clone(ArrayRef<Init *> Operands) const override {
     assert(Operands.size() == 2 &&
            "Wrong number of operands for binary operation");
     return BinOpInit::get(getOpcode(), Operands[0], Operands[1], getType());
@@ -861,13 +886,14 @@ private:
              RecTy *Type) :
       OpInit(IK_TernOpInit, Type, opc), LHS(lhs), MHS(mhs), RHS(rhs) {}
 
+public:
   TernOpInit(const TernOpInit &Other) = delete;
   TernOpInit &operator=(const TernOpInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_TernOpInit;
   }
+
   static TernOpInit *get(TernaryOp opc, Init *lhs,
                          Init *mhs, Init *rhs,
                          RecTy *Type);
@@ -875,7 +901,7 @@ public:
   void Profile(FoldingSetNodeID &ID) const;
 
   // Clone - Clone this operator, replacing arguments with the new list
-  OpInit *clone(std::vector<Init *> &Operands) const override {
+  OpInit *clone(ArrayRef<Init *> Operands) const override {
     assert(Operands.size() == 3 &&
            "Wrong number of operands for ternary operation");
     return TernOpInit::get(getOpcode(), Operands[0], Operands[1], Operands[2],
@@ -916,18 +942,20 @@ class VarInit : public TypedInit {
   explicit VarInit(Init *VN, RecTy *T)
       : TypedInit(IK_VarInit, T), VarName(VN) {}
 
+public:
   VarInit(const VarInit &Other) = delete;
   VarInit &operator=(const VarInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_VarInit;
   }
-  static VarInit *get(const std::string &VN, RecTy *T);
+
+  static VarInit *get(StringRef VN, RecTy *T);
   static VarInit *get(Init *VN, RecTy *T);
 
-  const std::string &getName() const;
+  StringRef getName() const;
   Init *getNameInit() const { return VarName; }
+
   std::string getNameInitAsString() const {
     return getNameInit()->getAsUnquotedString();
   }
@@ -935,9 +963,9 @@ public:
   Init *resolveListElementReference(Record &R, const RecordVal *RV,
                                     unsigned Elt) const override;
 
-  RecTy *getFieldType(const std::string &FieldName) const override;
+  RecTy *getFieldType(StringInit *FieldName) const override;
   Init *getFieldInit(Record &R, const RecordVal *RV,
-                     const std::string &FieldName) const override;
+                     StringInit *FieldName) const override;
 
   /// This method is used by classes that refer to other
   /// variables which may not be defined at the time they expression is formed.
@@ -965,13 +993,14 @@ class VarBitInit : public Init {
            "Illegal VarBitInit expression!");
   }
 
+public:
   VarBitInit(const VarBitInit &Other) = delete;
   VarBitInit &operator=(const VarBitInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_VarBitInit;
   }
+
   static VarBitInit *get(TypedInit *T, unsigned B);
 
   Init *convertInitializerTo(RecTy *Ty) const override;
@@ -1002,13 +1031,14 @@ class VarListElementInit : public TypedInit {
            "Illegal VarBitInit expression!");
   }
 
+public:
   VarListElementInit(const VarListElementInit &Other) = delete;
   void operator=(const VarListElementInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_VarListElementInit;
   }
+
   static VarListElementInit *get(TypedInit *T, unsigned E);
 
   TypedInit *getVariable() const { return TI; }
@@ -1032,26 +1062,28 @@ class DefInit : public TypedInit {
   Record *Def;
 
   DefInit(Record *D, RecordRecTy *T) : TypedInit(IK_DefInit, T), Def(D) {}
+
   friend class Record;
 
+public:
   DefInit(const DefInit &Other) = delete;
   DefInit &operator=(const DefInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_DefInit;
   }
+
   static DefInit *get(Record*);
 
   Init *convertInitializerTo(RecTy *Ty) const override;
 
   Record *getDef() const { return Def; }
 
-  //virtual Init *convertInitializerBitRange(const std::vector<unsigned> &Bits);
+  //virtual Init *convertInitializerBitRange(ArrayRef<unsigned> Bits);
 
-  RecTy *getFieldType(const std::string &FieldName) const override;
+  RecTy *getFieldType(StringInit *FieldName) const override;
   Init *getFieldInit(Record &R, const RecordVal *RV,
-                     const std::string &FieldName) const override;
+                     StringInit *FieldName) const override;
 
   std::string getAsString() const override;
 
@@ -1072,21 +1104,22 @@ public:
 ///
 class FieldInit : public TypedInit {
   Init *Rec;                // Record we are referring to
-  std::string FieldName;    // Field we are accessing
+  StringInit *FieldName;    // Field we are accessing
 
-  FieldInit(Init *R, const std::string &FN)
+  FieldInit(Init *R, StringInit *FN)
       : TypedInit(IK_FieldInit, R->getFieldType(FN)), Rec(R), FieldName(FN) {
     assert(getType() && "FieldInit with non-record type!");
   }
 
+public:
   FieldInit(const FieldInit &Other) = delete;
   FieldInit &operator=(const FieldInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_FieldInit;
   }
-  static FieldInit *get(Init *R, const std::string &FN);
+
+  static FieldInit *get(Init *R, StringInit *FN);
 
   Init *getBit(unsigned Bit) const override;
 
@@ -1096,7 +1129,7 @@ public:
   Init *resolveReferences(Record &R, const RecordVal *RV) const override;
 
   std::string getAsString() const override {
-    return Rec->getAsString() + "." + FieldName;
+    return Rec->getAsString() + "." + FieldName->getValue().str();
   }
 };
 
@@ -1106,30 +1139,28 @@ public:
 ///
 class DagInit : public TypedInit, public FoldingSetNode {
   Init *Val;
-  std::string ValName;
-  std::vector<Init*> Args;
-  std::vector<std::string> ArgNames;
+  StringInit *ValName;
+  SmallVector<Init*, 4> Args;
+  SmallVector<StringInit*, 4> ArgNames;
 
-  DagInit(Init *V, const std::string &VN,
-          ArrayRef<Init *> ArgRange,
-          ArrayRef<std::string> NameRange)
+  DagInit(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
+          ArrayRef<StringInit *> NameRange)
       : TypedInit(IK_DagInit, DagRecTy::get()), Val(V), ValName(VN),
           Args(ArgRange.begin(), ArgRange.end()),
           ArgNames(NameRange.begin(), NameRange.end()) {}
 
+public:
   DagInit(const DagInit &Other) = delete;
   DagInit &operator=(const DagInit &Other) = delete;
 
-public:
   static bool classof(const Init *I) {
     return I->getKind() == IK_DagInit;
   }
-  static DagInit *get(Init *V, const std::string &VN,
-                      ArrayRef<Init *> ArgRange,
-                      ArrayRef<std::string> NameRange);
-  static DagInit *get(Init *V, const std::string &VN,
-                      const std::vector<
-                        std::pair<Init*, std::string> > &args);
+
+  static DagInit *get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
+                      ArrayRef<StringInit*> NameRange);
+  static DagInit *get(Init *V, StringInit *VN,
+                      ArrayRef<std::pair<Init*, StringInit*>> Args);
 
   void Profile(FoldingSetNodeID &ID) const;
 
@@ -1137,24 +1168,31 @@ public:
 
   Init *getOperator() const { return Val; }
 
-  const std::string &getName() const { return ValName; }
+  StringInit *getName() const { return ValName; }
+  StringRef getNameStr() const {
+    return ValName ? ValName->getValue() : StringRef();
+  }
 
   unsigned getNumArgs() const { return Args.size(); }
   Init *getArg(unsigned Num) const {
     assert(Num < Args.size() && "Arg number out of range!");
     return Args[Num];
   }
-  const std::string &getArgName(unsigned Num) const {
+  StringInit *getArgName(unsigned Num) const {
     assert(Num < ArgNames.size() && "Arg number out of range!");
     return ArgNames[Num];
   }
+  StringRef getArgNameStr(unsigned Num) const {
+    StringInit *Init = getArgName(Num);
+    return Init ? Init->getValue() : StringRef();
+  }
 
   Init *resolveReferences(Record &R, const RecordVal *RV) const override;
 
   std::string getAsString() const override;
 
-  typedef std::vector<Init*>::const_iterator       const_arg_iterator;
-  typedef std::vector<std::string>::const_iterator const_name_iterator;
+  typedef SmallVectorImpl<Init*>::const_iterator       const_arg_iterator;
+  typedef SmallVectorImpl<StringInit*>::const_iterator const_name_iterator;
 
   inline const_arg_iterator  arg_begin() const { return Args.begin(); }
   inline const_arg_iterator  arg_end  () const { return Args.end();   }
@@ -1183,27 +1221,29 @@ public:
 //===----------------------------------------------------------------------===//
 
 class RecordVal {
-  PointerIntPair<Init *, 1, bool> NameAndPrefix;
-  RecTy *Ty;
+  friend class Record;
+  Init *Name;
+  PointerIntPair<RecTy *, 1, bool> TyAndPrefix;
   Init *Value;
 
 public:
   RecordVal(Init *N, RecTy *T, bool P);
-  RecordVal(const std::string &N, RecTy *T, bool P);
+  RecordVal(StringRef N, RecTy *T, bool P);
+
+  StringRef getName() const;
+  Init *getNameInit() const { return Name; }
 
-  const std::string &getName() const;
-  const Init *getNameInit() const { return NameAndPrefix.getPointer(); }
   std::string getNameInitAsString() const {
     return getNameInit()->getAsUnquotedString();
   }
 
-  bool getPrefix() const { return NameAndPrefix.getInt(); }
-  RecTy *getType() const { return Ty; }
+  bool getPrefix() const { return TyAndPrefix.getInt(); }
+  RecTy *getType() const { return TyAndPrefix.getPointer(); }
   Init *getValue() const { return Value; }
 
   bool setValue(Init *V) {
     if (V) {
-      Value = V->convertInitializerTo(Ty);
+      Value = V->convertInitializerTo(getType());
       return Value == nullptr;
     }
     Value = nullptr;
@@ -1233,7 +1273,7 @@ class Record {
   // Tracks Record instances. Not owned by Record.
   RecordKeeper &TrackedRecords;
 
-  std::unique_ptr<DefInit> TheInit;
+  DefInit *TheInit = nullptr;
 
   // Unique record ID.
   unsigned ID;
@@ -1263,10 +1303,10 @@ public:
     ID(LastID++), IsAnonymous(Anonymous), ResolveFirst(false) {
     init();
   }
-  explicit Record(const std::string &N, ArrayRef<SMLoc> locs,
-                  RecordKeeper &records, bool Anonymous = false)
-    : Record(StringInit::get(N), locs, records, Anonymous) {}
 
+  explicit Record(StringRef N, ArrayRef<SMLoc> locs, RecordKeeper &records,
+                  bool Anonymous = false)
+    : Record(StringInit::get(N), locs, records, Anonymous) {}
 
   // When copy-constructing a Record, we must still guarantee a globally unique
   // ID number.  Don't copy TheInit either since it's owned by the original
@@ -1281,16 +1321,17 @@ public:
 
   unsigned getID() const { return ID; }
 
-  const std::string &getName() const;
+  StringRef getName() const;
   Init *getNameInit() const {
     return Name;
   }
+
   const std::string getNameInitAsString() const {
     return getNameInit()->getAsUnquotedString();
   }
 
-  void setName(Init *Name);               // Also updates RecordKeeper.
-  void setName(const std::string &Name);  // Also updates RecordKeeper.
+  void setName(Init *Name);      // Also updates RecordKeeper.
+  void setName(StringRef Name);  // Also updates RecordKeeper.
 
   ArrayRef<SMLoc> getLoc() const { return Locs; }
 
@@ -1300,7 +1341,9 @@ public:
   ArrayRef<Init *> getTemplateArgs() const {
     return TemplateArgs;
   }
+
   ArrayRef<RecordVal> getValues() const { return Values; }
+
   ArrayRef<std::pair<Record *, SMRange>>  getSuperClasses() const {
     return SuperClasses;
   }
@@ -1310,23 +1353,27 @@ public:
       if (TA == Name) return true;
     return false;
   }
+
   bool isTemplateArg(StringRef Name) const {
     return isTemplateArg(StringInit::get(Name));
   }
 
   const RecordVal *getValue(const Init *Name) const {
     for (const RecordVal &Val : Values)
-      if (Val.getNameInit() == Name) return &Val;
+      if (Val.Name == Name) return &Val;
     return nullptr;
   }
+
   const RecordVal *getValue(StringRef Name) const {
     return getValue(StringInit::get(Name));
   }
+
   RecordVal *getValue(const Init *Name) {
     for (RecordVal &Val : Values)
-      if (Val.getNameInit() == Name) return &Val;
+      if (Val.Name == Name) return &Val;
     return nullptr;
   }
+
   RecordVal *getValue(StringRef Name) {
     return getValue(StringInit::get(Name));
   }
@@ -1335,6 +1382,7 @@ public:
     assert(!isTemplateArg(Name) && "Template arg already defined!");
     TemplateArgs.push_back(Name);
   }
+
   void addTemplateArg(StringRef Name) {
     addTemplateArg(StringInit::get(Name));
   }
@@ -1506,7 +1554,7 @@ struct MultiClass {
 
   void dump() const;
 
-  MultiClass(const std::string &Name, SMLoc Loc, RecordKeeper &Records) :
+  MultiClass(StringRef Name, SMLoc Loc, RecordKeeper &Records) :
     Rec(Name, Loc, Records) {}
 };
 
@@ -1518,20 +1566,23 @@ public:
   const RecordMap &getClasses() const { return Classes; }
   const RecordMap &getDefs() const { return Defs; }
 
-  Record *getClass(const std::string &Name) const {
+  Record *getClass(StringRef Name) const {
     auto I = Classes.find(Name);
     return I == Classes.end() ? nullptr : I->second.get();
   }
-  Record *getDef(const std::string &Name) const {
+
+  Record *getDef(StringRef Name) const {
     auto I = Defs.find(Name);
     return I == Defs.end() ? nullptr : I->second.get();
   }
+
   void addClass(std::unique_ptr<Record> R) {
     bool Ins = Classes.insert(std::make_pair(R->getName(),
                                              std::move(R))).second;
     (void)Ins;
     assert(Ins && "Class already exists");
   }
+
   void addDef(std::unique_ptr<Record> R) {
     bool Ins = Defs.insert(std::make_pair(R->getName(),
                                           std::move(R))).second;
@@ -1545,8 +1596,7 @@ public:
   /// This method returns all concrete definitions
   /// that derive from the specified class name.  A class with the specified
   /// name must exist.
-  std::vector<Record *>
-  getAllDerivedDefinitions(const std::string &ClassName) const;
+  std::vector<Record *> getAllDerivedDefinitions(StringRef ClassName) const;
 
   void dump() const;
 };
@@ -1662,13 +1712,8 @@ raw_ostream &operator<<(raw_ostream &OS, const RecordKeeper &RK);
 /// Return an Init with a qualifier prefix referring
 /// to CurRec's name.
 Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
-                  Init *Name, const std::string &Scoper);
-
-/// Return an Init with a qualifier prefix referring
-/// to CurRec's name.
-Init *QualifyName(Record &CurRec, MultiClass *CurMultiClass,
-                  const std::string &Name, const std::string &Scoper);
+                  Init *Name, StringRef Scoper);
 
-} // end llvm namespace
+} // end namespace llvm
 
 #endif // LLVM_TABLEGEN_RECORD_H
diff --git a/contrib/llvm/include/llvm/TableGen/SetTheory.h b/contrib/llvm/include/llvm/TableGen/SetTheory.h
index d4e0f53d3efa..818b0549b66a 100644
--- a/contrib/llvm/include/llvm/TableGen/SetTheory.h
+++ b/contrib/llvm/include/llvm/TableGen/SetTheory.h
@@ -50,8 +50,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SMLoc.h"
 #include <map>
+#include <memory>
 #include <vector>
 
 namespace llvm {
@@ -68,13 +70,14 @@ public:
   /// Operator - A callback representing a DAG operator.
   class Operator {
     virtual void anchor();
+
   public:
-    virtual ~Operator() {}
+    virtual ~Operator() = default;
 
     /// apply - Apply this operator to Expr's arguments and insert the result
     /// in Elts.
     virtual void apply(SetTheory&, DagInit *Expr, RecSet &Elts,
-                       ArrayRef<SMLoc> Loc) =0;
+                       ArrayRef<SMLoc> Loc) = 0;
   };
 
   /// Expander - A callback function that can transform a Record representing a
@@ -82,10 +85,11 @@ public:
   /// users to define named sets that can be used in DAG expressions.
   class Expander {
     virtual void anchor();
+
   public:
-    virtual ~Expander() {}
+    virtual ~Expander() = default;
 
-    virtual void expand(SetTheory&, Record*, RecSet &Elts) =0;
+    virtual void expand(SetTheory&, Record*, RecSet &Elts) = 0;
   };
 
 private:
@@ -138,5 +142,4 @@ public:
 
 } // end namespace llvm
 
-#endif
-
+#endif // LLVM_TABLEGEN_SETTHEORY_H
diff --git a/contrib/llvm/include/llvm/Target/CostTable.h b/contrib/llvm/include/llvm/Target/CostTable.h
index 2499f5c3189c..b7d9240a91f5 100644
--- a/contrib/llvm/include/llvm/Target/CostTable.h
+++ b/contrib/llvm/include/llvm/Target/CostTable.h
@@ -16,6 +16,7 @@
 #define LLVM_TARGET_COSTTABLE_H_
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineValueType.h"
 
 namespace llvm {
@@ -30,9 +31,9 @@ struct CostTblEntry {
 /// Find in cost table, TypeTy must be comparable to CompareTy by ==
 inline const CostTblEntry *CostTableLookup(ArrayRef<CostTblEntry> Tbl,
                                            int ISD, MVT Ty) {
-  auto I = std::find_if(Tbl.begin(), Tbl.end(),
-                        [=](const CostTblEntry &Entry) {
-                          return ISD == Entry.ISD && Ty == Entry.Type; });
+  auto I = find_if(Tbl, [=](const CostTblEntry &Entry) {
+    return ISD == Entry.ISD && Ty == Entry.Type;
+  });
   if (I != Tbl.end())
     return I;
 
@@ -53,11 +54,9 @@ struct TypeConversionCostTblEntry {
 inline const TypeConversionCostTblEntry *
 ConvertCostTableLookup(ArrayRef<TypeConversionCostTblEntry> Tbl,
                        int ISD, MVT Dst, MVT Src) {
-  auto I = std::find_if(Tbl.begin(), Tbl.end(),
-                        [=](const TypeConversionCostTblEntry &Entry) {
-                          return ISD == Entry.ISD && Src == Entry.Src &&
-                                 Dst == Entry.Dst;
-                        });
+  auto I = find_if(Tbl, [=](const TypeConversionCostTblEntry &Entry) {
+    return ISD == Entry.ISD && Src == Entry.Src && Dst == Entry.Dst;
+  });
   if (I != Tbl.end())
     return I;
 
diff --git a/contrib/llvm/include/llvm/Target/GenericOpcodes.td b/contrib/llvm/include/llvm/Target/GenericOpcodes.td
index b4d95508f0a5..8694eb5797d0 100644
--- a/contrib/llvm/include/llvm/Target/GenericOpcodes.td
+++ b/contrib/llvm/include/llvm/Target/GenericOpcodes.td
@@ -13,27 +13,419 @@
 //===----------------------------------------------------------------------===//
 
 //------------------------------------------------------------------------------
+// Unary ops.
+//------------------------------------------------------------------------------
+
+// Extend the underlying scalar type of an operation, leaving the high bits
+// unspecified.
+def G_ANYEXT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+// Sign extend the underlying scalar type of an operation, copying the sign bit
+// into the newly-created space.
+def G_SEXT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+// Zero extend the underlying scalar type of an operation, putting zero bits
+// into the newly-created space.
+def G_ZEXT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+
+// Truncate the underlying scalar type of an operation. This is equivalent to
+// G_EXTRACT for scalar types, but acts elementwise on vectors.
+def G_TRUNC : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FRAME_INDEX : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$src2);
+  let hasSideEffects = 0;
+}
+
+def G_GLOBAL_VALUE : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$src);
+  let hasSideEffects = 0;
+}
+
+def G_INTTOPTR : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_PTRTOINT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_BITCAST : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_CONSTANT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$imm);
+  let hasSideEffects = 0;
+}
+
+def G_FCONSTANT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$imm);
+  let hasSideEffects = 0;
+}
+
+//------------------------------------------------------------------------------
 // Binary ops.
 //------------------------------------------------------------------------------
+
 // Generic addition.
 def G_ADD : Instruction {
-  let OutOperandList = (outs unknown:$dst);
-  let InOperandList = (ins unknown:$src1, unknown:$src2);
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic pointer offset.
+def G_GEP : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic subtraction.
+def G_SUB : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 0;
+}
+
+// Generic multiplication.
+def G_MUL : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic signed division.
+def G_SDIV : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 0;
+}
+
+// Generic unsigned division.
+def G_UDIV : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 0;
+}
+
+// Generic signed remainder.
+def G_SREM : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 0;
+}
+
+// Generic unsigned remainder.
+def G_UREM : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 0;
+}
+
+// Generic bitwise and.
+def G_AND : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
   let hasSideEffects = 0;
   let isCommutable = 1;
 }
 
 // Generic bitwise or.
 def G_OR : Instruction {
-  let OutOperandList = (outs unknown:$dst);
-  let InOperandList = (ins unknown:$src1, unknown:$src2);
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic bitwise xor.
+def G_XOR : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic left-shift.
+def G_SHL : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic logical right-shift.
+def G_LSHR : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic arithmetic right-shift.
+def G_ASHR : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic integer comparison.
+def G_ICMP : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic floating-point comparison.
+def G_FCMP : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins unknown:$tst, type1:$src1, type1:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic select
+def G_SELECT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$tst, type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+//------------------------------------------------------------------------------
+// Overflow ops
+//------------------------------------------------------------------------------
+
+// Generic unsigned addition consuming and producing a carry flag.
+def G_UADDE : Instruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
+  let hasSideEffects = 0;
+}
+
+// Generic signed addition producing a carry flag.
+def G_SADDO : Instruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic unsigned subtraction consuming and producing a carry flag.
+def G_USUBE : Instruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2, type1:$carry_in);
+  let hasSideEffects = 0;
+}
+
+// Generic unsigned subtraction producing a carry flag.
+def G_SSUBO : Instruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic unsigned multiplication producing a carry flag.
+def G_UMULO : Instruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic signed multiplication producing a carry flag.
+def G_SMULO : Instruction {
+  let OutOperandList = (outs type0:$dst, type1:$carry_out);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+//------------------------------------------------------------------------------
+// Floating Point Unary Ops.
+//------------------------------------------------------------------------------
+
+def G_FPEXT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FPTRUNC : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FPTOSI : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_FPTOUI : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_SITOFP : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+def G_UITOFP : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$src);
+  let hasSideEffects = 0;
+}
+
+//------------------------------------------------------------------------------
+// Floating Point Binary ops.
+//------------------------------------------------------------------------------
+
+// Generic FP addition.
+def G_FADD : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 1;
+}
+
+// Generic FP subtraction.
+def G_FSUB : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+  let isCommutable = 0;
+}
+
+// Generic FP multiplication.
+def G_FMUL : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
   let hasSideEffects = 0;
   let isCommutable = 1;
 }
 
+// Generic FP division.
+def G_FDIV : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+// Generic FP remainder.
+def G_FREM : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src1, type0:$src2);
+  let hasSideEffects = 0;
+}
+
+//------------------------------------------------------------------------------
+// Memory ops
+//------------------------------------------------------------------------------
+
+// Generic load. Expects a MachineMemOperand in addition to explicit operands.
+def G_LOAD : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type1:$addr);
+  let hasSideEffects = 0;
+  let mayLoad = 1;
+}
+
+// Generic store. Expects a MachineMemOperand in addition to explicit operands.
+def G_STORE : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins type0:$src, type1:$addr);
+  let hasSideEffects = 0;
+  let mayStore = 1;
+}
+
+//------------------------------------------------------------------------------
+// Variadic ops
+//------------------------------------------------------------------------------
+
+// Extract multiple registers specified size, starting from blocks given by
+// indexes. This will almost certainly be mapped to sub-register COPYs after
+// register banks have been selected.
+def G_EXTRACT : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins variable_ops);
+  let hasSideEffects = 0;
+}
+
+// Insert a sequence of smaller registers into a larger one at the specified
+// indices (interleaved with the values in the operand list "op0, bit0, op1,
+// bit1, ...")).
+def G_INSERT : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins type0:$src, variable_ops);
+  let hasSideEffects = 0;
+}
+
+// Combine a sequence of generic vregs into a single larger value (starting at
+// bit 0). Essentially a G_INSERT where $src is an IMPLICIT_DEF, but it's so
+// important to legalization it probably deserves its own instruction.
+def G_SEQUENCE : Instruction {
+  let OutOperandList = (outs type0:$dst);
+  let InOperandList = (ins variable_ops);
+  let hasSideEffects = 0;
+}
+
+// Intrinsic without side effects.
+def G_INTRINSIC : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins unknown:$intrin, variable_ops);
+  let hasSideEffects = 0;
+}
+
+// Intrinsic with side effects.
+def G_INTRINSIC_W_SIDE_EFFECTS : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins unknown:$intrin, variable_ops);
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
 //------------------------------------------------------------------------------
 // Branches.
 //------------------------------------------------------------------------------
+
 // Generic unconditional branch.
 def G_BR : Instruction {
   let OutOperandList = (outs);
@@ -41,6 +433,16 @@ def G_BR : Instruction {
   let hasSideEffects = 0;
   let isBranch = 1;
   let isTerminator = 1;
+  let isBarrier = 1;
+}
+
+// Generic conditional branch.
+def G_BRCOND : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins type0:$tst, unknown:$truebb);
+  let hasSideEffects = 0;
+  let isBranch = 1;
+  let isTerminator = 1;
 }
 
 // TODO: Add the other generic opcodes.
diff --git a/contrib/llvm/include/llvm/Target/Target.td b/contrib/llvm/include/llvm/Target/Target.td
index c71435a2564c..729d7669e0fa 100644
--- a/contrib/llvm/include/llvm/Target/Target.td
+++ b/contrib/llvm/include/llvm/Target/Target.td
@@ -141,7 +141,10 @@ class RegisterWithSubRegs<string n, list<Register> subregs> : Register<n> {
 // of Operand's that are legal as type qualifiers in DAG patterns.  This should
 // only ever be used for defining multiclasses that are polymorphic over both
 // RegisterClass's and other Operand's.
-class DAGOperand { }
+class DAGOperand {
+  string OperandNamespace = "MCOI";
+  string DecoderMethod = "";
+}
 
 // RegisterClass - Now that all of the registers are defined, and aliases
 // between registers are defined, specify which registers belong to which
@@ -368,6 +371,7 @@ class Instruction {
   bit isSelect     = 0;     // Is this instruction a select instruction?
   bit isBarrier    = 0;     // Can control flow fall through this instruction?
   bit isCall       = 0;     // Is this instruction a call instruction?
+  bit isAdd        = 0;     // Is this instruction an add instruction?
   bit canFoldAsLoad = 0;    // Can this be folded as a simple memory operand?
   bit mayLoad      = ?;     // Is it possible for this inst to read memory?
   bit mayStore     = ?;     // Is it possible for this inst to write memory?
@@ -485,6 +489,12 @@ class Instruction {
   /// constraint. For example, "$Rn = $Rd".
   string TwoOperandAliasConstraint = "";
 
+  /// Assembler variant name to use for this instruction. If specified then
+  /// instruction will be presented only in MatchTable for this variant. If
+  /// not specified then assembler variants will be determined based on
+  /// AsmString
+  string AsmVariantName = "";
+
   ///@}
 
   /// UseNamedOperandTable - If set, the operand indices of this instruction
@@ -639,9 +649,7 @@ class Operand<ValueType ty> : DAGOperand {
   ValueType Type = ty;
   string PrintMethod = "printOperand";
   string EncoderMethod = "";
-  string DecoderMethod = "";
   bit hasCompleteDecoder = 1;
-  string OperandNamespace = "MCOI";
   string OperandType = "OPERAND_UNKNOWN";
   dag MIOperandInfo = (ops);
 
@@ -679,7 +687,6 @@ class RegisterOperand<RegisterClass regclass, string pm = "printOperand">
   // should declare the other operand as one of its super classes.
   AsmOperandClass ParserMatchClass;
 
-  string OperandNamespace = "MCOI";
   string OperandType = "OPERAND_REGISTER";
 }
 
@@ -694,6 +701,20 @@ def f32imm : Operand<f32>;
 def f64imm : Operand<f64>;
 }
 
+// Register operands for generic instructions don't have an MVT, but do have
+// constraints linking the operands (e.g. all operands of a G_ADD must
+// have the same LLT).
+class TypedOperand<string Ty> : Operand<untyped> {
+  let OperandType = Ty;
+}
+
+def type0 : TypedOperand<"OPERAND_GENERIC_0">;
+def type1 : TypedOperand<"OPERAND_GENERIC_1">;
+def type2 : TypedOperand<"OPERAND_GENERIC_2">;
+def type3 : TypedOperand<"OPERAND_GENERIC_3">;
+def type4 : TypedOperand<"OPERAND_GENERIC_4">;
+def type5 : TypedOperand<"OPERAND_GENERIC_5">;
+
 /// zero_reg definition - Special node to stand for the zero register.
 ///
 def zero_reg;
@@ -956,8 +977,24 @@ def PATCHABLE_FUNCTION_ENTER : Instruction {
 def PATCHABLE_RET : Instruction {
   let OutOperandList = (outs unknown:$dst);
   let InOperandList = (ins variable_ops);
+  let AsmString = "# XRay Function Patchable RET.";
+  let usesCustomInserter = 1;
+  let hasSideEffects = 1;
+  let isReturn = 1;
+}
+def PATCHABLE_FUNCTION_EXIT : Instruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins);
   let AsmString = "# XRay Function Exit.";
   let usesCustomInserter = 1;
+  let hasSideEffects = 0; // FIXME: is this correct?
+  let isReturn = 0; // Original return instruction will follow
+}
+def PATCHABLE_TAIL_CALL : Instruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins variable_ops);
+  let AsmString = "# XRay Tail Call Exit.";
+  let usesCustomInserter = 1;
   let hasSideEffects = 1;
   let isReturn = 1;
 }
@@ -1118,6 +1155,10 @@ class InstAlias<string Asm, dag Result, int Emit = 1> {
   // defined AsmMatchConverter and instead use the function generated by the
   // dag Result.
   bit UseInstAsmMatchConverter = 1;
+
+  // Assembler variant name to use for this alias. If not specified then
+  // assembler variants will be determined based on AsmString
+  string AsmVariantName = "";
 }
 
 //===----------------------------------------------------------------------===//
@@ -1299,3 +1340,8 @@ include "llvm/Target/TargetCallingConv.td"
 // Pull in the common support for DAG isel generation.
 //
 include "llvm/Target/TargetSelectionDAG.td"
+
+//===----------------------------------------------------------------------===//
+// Pull in the common support for Global ISel generation.
+//
+include "llvm/Target/TargetGlobalISel.td"
diff --git a/contrib/llvm/include/llvm/Target/TargetCallingConv.h b/contrib/llvm/include/llvm/Target/TargetCallingConv.h
index 19d8917f17d3..be09236cdab0 100644
--- a/contrib/llvm/include/llvm/Target/TargetCallingConv.h
+++ b/contrib/llvm/include/llvm/Target/TargetCallingConv.h
@@ -51,6 +51,15 @@ namespace ISD {
     static const uint64_t SwiftSelfOffs  = 14;
     static const uint64_t SwiftError     = 1ULL<<15; ///< Swift error parameter
     static const uint64_t SwiftErrorOffs = 15;
+    static const uint64_t Hva            = 1ULL << 16; ///< HVA field for
+                                                       ///< vectorcall
+    static const uint64_t HvaOffs        = 16;
+    static const uint64_t HvaStart       = 1ULL << 17; ///< HVA structure start
+                                                       ///< for vectorcall
+    static const uint64_t HvaStartOffs   = 17;
+    static const uint64_t SecArgPass     = 1ULL << 18; ///< Second argument
+                                                       ///< pass for vectorcall
+    static const uint64_t SecArgPassOffs = 18;
     static const uint64_t OrigAlign      = 0x1FULL<<27;
     static const uint64_t OrigAlignOffs  = 27;
     static const uint64_t ByValSize      = 0x3fffffffULL<<32; ///< Struct size
@@ -91,6 +100,15 @@ namespace ISD {
     bool isSwiftError() const { return Flags & SwiftError; }
     void setSwiftError() { Flags |= One << SwiftErrorOffs; }
 
+    bool isHva() const { return Flags & Hva; }
+    void setHva() { Flags |= One << HvaOffs; }
+
+    bool isHvaStart() const { return Flags & HvaStart; }
+    void setHvaStart() { Flags |= One << HvaStartOffs; }
+
+    bool isSecArgPass() const { return Flags & SecArgPass; }
+    void setSecArgPass() { Flags |= One << SecArgPassOffs; }
+
     bool isNest()      const { return Flags & Nest; }
     void setNest()     { Flags |= One << NestOffs; }
 
diff --git a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
index 98065aca16f3..4576f8c7582b 100644
--- a/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetFrameLowering.h
@@ -179,12 +179,6 @@ public:
   virtual void adjustForHiPEPrologue(MachineFunction &MF,
                                      MachineBasicBlock &PrologueMBB) const {}
 
-  /// Adjust the prologue to add an allocation at a fixed offset from the frame
-  /// pointer.
-  virtual void
-  adjustForFrameAllocatePrologue(MachineFunction &MF,
-                                 MachineBasicBlock &PrologueMBB) const {}
-
   /// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
   /// saved registers and returns true if it isn't possible / profitable to do
   /// so by issuing a series of store instructions via
diff --git a/contrib/llvm/include/llvm/Target/TargetGlobalISel.td b/contrib/llvm/include/llvm/Target/TargetGlobalISel.td
new file mode 100644
index 000000000000..0727c9802e5e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Target/TargetGlobalISel.td
@@ -0,0 +1,29 @@
+//===- TargetGlobalISel.td - Common code for GlobalISel ----*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the target-independent interfaces used to support
+// SelectionDAG instruction selection patterns (specified in
+// TargetSelectionDAG.td) when generating GlobalISel instruction selectors.
+//
+// This is intended as a compatibility layer, to enable reuse of target
+// descriptions written for SelectionDAG without requiring explicit GlobalISel
+// support.  It will eventually supersede SelectionDAG patterns.
+//
+//===----------------------------------------------------------------------===//
+
+// Declare that a generic Instruction is 'equivalent' to an SDNode, that is,
+// SelectionDAG patterns involving the SDNode can be transformed to match the
+// Instruction instead.
+class GINodeEquiv<Instruction i, SDNode node> {
+  Instruction I = i;
+  SDNode Node = node;
+}
+
+def : GINodeEquiv<G_ADD, add>;
+def : GINodeEquiv<G_BR, br>;
diff --git a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
index e0b9a22ed5d0..83515bc91841 100644
--- a/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetInstrInfo.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Target/TargetRegisterInfo.h"
@@ -250,6 +251,12 @@ public:
                                  unsigned &Size, unsigned &Offset,
                                  const MachineFunction &MF) const;
 
+  /// Returns the size in bytes of the specified MachineInstr, or ~0U
+  /// when this function is not implemented by a target.
+  virtual unsigned getInstSizeInBytes(const MachineInstr &MI) const {
+    return ~0U;
+  }
+
   /// Return true if the instruction is as cheap as a move instruction.
   ///
   /// Targets for different archs need to override this, and different
@@ -263,11 +270,8 @@ public:
   /// MachineSink determines on its own whether the instruction is safe to sink;
   /// this gives the target a hook to override the default behavior with regards
   /// to which instructions should be sunk.
-  /// The default behavior is to not sink insert_subreg, subreg_to_reg, and
-  /// reg_sequence. These are meant to be close to the source to make it easier
-  /// to coalesce.
   virtual bool shouldSink(const MachineInstr &MI) const {
-    return !MI.isInsertSubreg() && !MI.isSubregToReg() && !MI.isRegSequence();
+    return true;
   }
 
   /// Re-issue the specified 'original' instruction at the
@@ -439,6 +443,31 @@ public:
                                 const MachineInstr &MI1,
                                 const MachineRegisterInfo *MRI = nullptr) const;
 
+  /// \returns true if a branch from an instruction with opcode \p BranchOpc
+  ///  bytes is capable of jumping to a position \p BrOffset bytes away.
+  virtual bool isBranchOffsetInRange(unsigned BranchOpc,
+                                     int64_t BrOffset) const {
+    llvm_unreachable("target did not implement");
+  }
+
+  /// \returns The block that branch instruction \p MI jumps to.
+  virtual MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const {
+    llvm_unreachable("target did not implement");
+  }
+
+  /// Insert an unconditional indirect branch at the end of \p MBB to \p
+  /// NewDestBB.  \p BrOffset indicates the offset of \p NewDestBB relative to
+  /// the offset of the position to insert the new branch.
+  ///
+  /// \returns The number of bytes added to the block.
+  virtual unsigned insertIndirectBranch(MachineBasicBlock &MBB,
+                                        MachineBasicBlock &NewDestBB,
+                                        const DebugLoc &DL,
+                                        int64_t BrOffset = 0,
+                                        RegScavenger *RS = nullptr) const {
+    llvm_unreachable("target did not implement");
+  }
+
   /// Analyze the branching code at the end of MBB, returning
   /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
   /// implemented for a target).  Upon success, this returns false and returns
@@ -458,7 +487,7 @@ public:
   ///    condition.  These operands can be passed to other TargetInstrInfo
   ///    methods to create new branches.
   ///
-  /// Note that RemoveBranch and InsertBranch must be implemented to support
+  /// Note that removeBranch and insertBranch must be implemented to support
   /// cases where this method returns success.
   ///
   /// If AllowModify is true, then this routine is allowed to modify the basic
@@ -521,15 +550,18 @@ public:
   /// Remove the branching code at the end of the specific MBB.
   /// This is only invoked in cases where AnalyzeBranch returns success. It
   /// returns the number of instructions that were removed.
-  virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const {
-    llvm_unreachable("Target didn't implement TargetInstrInfo::RemoveBranch!");
+  /// If \p BytesRemoved is non-null, report the change in code size from the
+  /// removed instructions.
+  virtual unsigned removeBranch(MachineBasicBlock &MBB,
+                                int *BytesRemoved = nullptr) const {
+    llvm_unreachable("Target didn't implement TargetInstrInfo::removeBranch!");
   }
 
-  /// Insert branch code into the end of the specified MachineBasicBlock.
-  /// The operands to this method are the same as those
-  /// returned by AnalyzeBranch.  This is only invoked in cases where
-  /// AnalyzeBranch returns success. It returns the number of instructions
-  /// inserted.
+  /// Insert branch code into the end of the specified MachineBasicBlock. The
+  /// operands to this method are the same as those returned by AnalyzeBranch.
+  /// This is only invoked in cases where AnalyzeBranch returns success. It
+  /// returns the number of instructions inserted. If \p BytesAdded is non-null,
+  /// report the change in code size from the added instructions.
   ///
   /// It is also invoked by tail merging to add unconditional branches in
   /// cases where AnalyzeBranch doesn't apply because there was no original
@@ -538,11 +570,40 @@ public:
   ///
   /// The CFG information in MBB.Predecessors and MBB.Successors must be valid
   /// before calling this function.
-  virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  virtual unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                                 MachineBasicBlock *FBB,
                                 ArrayRef<MachineOperand> Cond,
-                                const DebugLoc &DL) const {
-    llvm_unreachable("Target didn't implement TargetInstrInfo::InsertBranch!");
+                                const DebugLoc &DL,
+                                int *BytesAdded = nullptr) const {
+    llvm_unreachable("Target didn't implement TargetInstrInfo::insertBranch!");
+  }
+
+  unsigned insertUnconditionalBranch(MachineBasicBlock &MBB,
+                                     MachineBasicBlock *DestBB,
+                                     const DebugLoc &DL,
+                                     int *BytesAdded = nullptr) const {
+    return insertBranch(MBB, DestBB, nullptr,
+                        ArrayRef<MachineOperand>(), DL, BytesAdded);
+  }
+
+  /// Analyze the loop code, return true if it cannot be understoo. Upon
+  /// success, this function returns false and returns information about the
+  /// induction variable and compare instruction used at the end.
+  virtual bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                           MachineInstr *&CmpInst) const {
+    return true;
+  }
+
+  /// Generate code to reduce the loop iteration by one and check if the loop is
+  /// finished.  Return the value/register of the the new loop count.  We need
+  /// this function when peeling off one or more iterations of a loop. This
+  /// function assumes the nth iteration is peeled first.
+  virtual unsigned reduceLoopCount(MachineBasicBlock &MBB,
+                                   MachineInstr *IndVar, MachineInstr &Cmp,
+                                   SmallVectorImpl<MachineOperand> &Cond,
+                                   SmallVectorImpl<MachineInstr *> &PrevInsts,
+                                   unsigned Iter, unsigned MaxIter) const {
+    llvm_unreachable("Target didn't implement ReduceLoopCount");
   }
 
   /// Delete the instruction OldInst and everything after it, replacing it with
@@ -550,40 +611,6 @@ public:
   virtual void ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
                                        MachineBasicBlock *NewDest) const;
 
-  /// Get an instruction that performs an unconditional branch to the given
-  /// symbol.
-  virtual void
-  getUnconditionalBranch(MCInst &MI,
-                         const MCSymbolRefExpr *BranchTarget) const {
-    llvm_unreachable("Target didn't implement "
-                     "TargetInstrInfo::getUnconditionalBranch!");
-  }
-
-  /// Get a machine trap instruction.
-  virtual void getTrap(MCInst &MI) const {
-    llvm_unreachable("Target didn't implement TargetInstrInfo::getTrap!");
-  }
-
-  /// Get a number of bytes that suffices to hold
-  /// either the instruction returned by getUnconditionalBranch or the
-  /// instruction returned by getTrap. This only makes sense because
-  /// getUnconditionalBranch returns a single, specific instruction. This
-  /// information is needed by the jumptable construction code, since it must
-  /// decide how many bytes to use for a jumptable entry so it can generate the
-  /// right mask.
-  ///
-  /// Note that if the jumptable instruction requires alignment, then that
-  /// alignment should be factored into this required bound so that the
-  /// resulting bound gives the right alignment for the instruction.
-  virtual unsigned getJumpInstrTableEntryBound() const {
-    // This method gets called by LLVMTargetMachine always, so it can't fail
-    // just because there happens to be no implementation for this target.
-    // Any code that tries to use a jumptable annotation without defining
-    // getUnconditionalBranch on the appropriate Target will fail anyway, and
-    // the value returned here won't matter in that case.
-    return 0;
-  }
-
   /// Return true if it's legal to split the given basic
   /// block at the specified instruction (i.e. instruction would be the start
   /// of a new basic block).
@@ -790,6 +817,20 @@ public:
   /// anything was changed.
   virtual bool expandPostRAPseudo(MachineInstr &MI) const { return false; }
 
+  /// Check whether the target can fold a load that feeds a subreg operand
+  /// (or a subreg operand that feeds a store).
+  /// For example, X86 may want to return true if it can fold
+  /// movl (%esp), %eax
+  /// subb, %al, ...
+  /// Into:
+  /// subb (%esp), ...
+  ///
+  /// Ideally, we'd like the target implementation of foldMemoryOperand() to
+  /// reject subregs - but since this behavior used to be enforced in the
+  /// target-independent code, moving this responsibility to the targets
+  /// has the potential of causing nasty silent breakage in out-of-tree targets.
+  virtual bool isSubregFoldable() const { return false; }
+
   /// Attempt to fold a load or store of the specified stack
   /// slot into the specified machine instruction for the specified operand(s).
   /// If this is possible, a new instruction is returned with the specified
@@ -1003,27 +1044,45 @@ public:
     return false;
   }
 
-  virtual bool enableClusterLoads() const { return false; }
+  /// Return true if the instruction contains a base register and offset. If
+  /// true, the function also sets the operand position in the instruction
+  /// for the base register and offset.
+  virtual bool getBaseAndOffsetPosition(const MachineInstr &MI,
+                                        unsigned &BasePos,
+                                        unsigned &OffsetPos) const {
+    return false;
+  }
 
-  virtual bool enableClusterStores() const { return false; }
+  /// If the instruction is an increment of a constant value, return the amount.
+  virtual bool getIncrementValue(const MachineInstr &MI, int &Value) const {
+    return false;
+  }
 
+  /// Returns true if the two given memory operations should be scheduled
+  /// adjacent. Note that you have to add:
+  ///   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  /// or
+  ///   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  /// to TargetPassConfig::createMachineScheduler() to have an effect.
   virtual bool shouldClusterMemOps(MachineInstr &FirstLdSt,
                                    MachineInstr &SecondLdSt,
                                    unsigned NumLoads) const {
-    return false;
+    llvm_unreachable("target did not implement shouldClusterMemOps()");
   }
 
   /// Can this target fuse the given instructions if they are scheduled
-  /// adjacent.
-  virtual bool shouldScheduleAdjacent(MachineInstr &First,
-                                      MachineInstr &Second) const {
-    return false;
+  /// adjacent. Note that you have to add:
+  ///   DAG.addMutation(createMacroFusionDAGMutation());
+  /// to TargetPassConfig::createMachineScheduler() to have an effect.
+  virtual bool shouldScheduleAdjacent(const MachineInstr &First,
+                                      const MachineInstr &Second) const {
+    llvm_unreachable("target did not implement shouldScheduleAdjacent()");
   }
 
   /// Reverses the branch condition of the specified condition list,
   /// returning false on success and true if it cannot be reversed.
   virtual
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
     return true;
   }
 
@@ -1035,6 +1094,10 @@ public:
   /// Return the noop instruction to use for a noop.
   virtual void getNoopForMachoTarget(MCInst &NopInst) const;
 
+  /// Return true for post-incremented instructions.
+  virtual bool isPostIncrement(const MachineInstr &MI) const {
+    return false;
+  }
 
   /// Returns true if the instruction is already predicated.
   virtual bool isPredicated(const MachineInstr &MI) const {
@@ -1045,6 +1108,25 @@ public:
   /// terminator instruction that has not been predicated.
   virtual bool isUnpredicatedTerminator(const MachineInstr &MI) const;
 
+  /// Returns true if MI is an unconditional tail call.
+  virtual bool isUnconditionalTailCall(const MachineInstr &MI) const {
+    return false;
+  }
+
+  /// Returns true if the tail call can be made conditional on BranchCond.
+  virtual bool
+  canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+                             const MachineInstr &TailCall) const {
+    return false;
+  }
+
+  /// Replace the conditional branch in MBB with a conditional tail call.
+  virtual void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+                                         SmallVectorImpl<MachineOperand> &Cond,
+                                         const MachineInstr &TailCall) const {
+    llvm_unreachable("Target didn't implement replaceBranchWithTailCall!");
+  }
+
   /// Convert the instruction into a predicated instruction.
   /// It returns true if the operation was successful.
   virtual bool PredicateInstruction(MachineInstr &MI,
@@ -1195,22 +1277,6 @@ public:
                                 const MachineInstr &UseMI,
                                 unsigned UseIdx) const;
 
-  /// Compute and return the latency of the given data dependent def and use
-  /// when the operand indices are already known. UseMI may be \c nullptr for
-  /// an unknown use.
-  ///
-  /// FindMin may be set to get the minimum vs. expected latency. Minimum
-  /// latency is used for scheduling groups, while expected latency is for
-  /// instruction cost and critical path.
-  ///
-  /// Depending on the subtarget's itinerary properties, this may or may not
-  /// need to call getOperandLatency(). For most subtargets, we don't need
-  /// DefIdx or UseIdx to compute min latency.
-  unsigned computeOperandLatency(const InstrItineraryData *ItinData,
-                                 const MachineInstr &DefMI, unsigned DefIdx,
-                                 const MachineInstr *UseMI,
-                                 unsigned UseIdx) const;
-
   /// Compute the instruction latency of a given instruction.
   /// If the instruction has higher cost when predicated, it's returned via
   /// PredCost.
@@ -1439,6 +1505,11 @@ public:
     return None;
   }
 
+  /// Determines whether |Inst| is a tail call instruction.
+  virtual bool isTailCall(const MachineInstr &Inst) const {
+    return false;
+  }
+
 private:
   unsigned CallFrameSetupOpcode, CallFrameDestroyOpcode;
   unsigned CatchRetOpcode;
diff --git a/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h b/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h
index c630f5b12a15..6a92bdee747e 100644
--- a/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetIntrinsicInfo.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TARGET_TARGETINTRINSICINFO_H
 #define LLVM_TARGET_TARGETINTRINSICINFO_H
 
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Compiler.h"
 #include <string>
 
@@ -47,8 +48,12 @@ public:
   /// names.
   virtual unsigned lookupName(const char *Name, unsigned Len) const =0;
 
+  unsigned lookupName(StringRef Name) const {
+    return lookupName(Name.data(), Name.size());
+  }
+
   /// Return the target intrinsic ID of a function, or 0.
-  virtual unsigned getIntrinsicID(Function *F) const;
+  virtual unsigned getIntrinsicID(const Function *F) const;
 
   /// Returns true if the intrinsic can be overloaded.
   virtual bool isOverloaded(unsigned IID) const = 0;
diff --git a/contrib/llvm/include/llvm/Target/TargetItinerary.td b/contrib/llvm/include/llvm/Target/TargetItinerary.td
index a37bbf2474c5..3b1998dfb1ff 100644
--- a/contrib/llvm/include/llvm/Target/TargetItinerary.td
+++ b/contrib/llvm/include/llvm/Target/TargetItinerary.td
@@ -92,7 +92,7 @@ def NoItinerary : InstrItinClass;
 //
 // OperandCycles are optional "cycle counts". They specify the cycle after
 // instruction issue the values which correspond to specific operand indices
-// are defined or read. Bypasses are optional "pipeline forwarding pathes", if
+// are defined or read. Bypasses are optional "pipeline forwarding paths", if
 // a def by an instruction is available on a specific bypass and the use can
 // read from the same bypass, then the operand use latency is reduced by one.
 //
diff --git a/contrib/llvm/include/llvm/Target/TargetLowering.h b/contrib/llvm/include/llvm/Target/TargetLowering.h
index 4586a172e76b..fb43ef19a645 100644
--- a/contrib/llvm/include/llvm/Target/TargetLowering.h
+++ b/contrib/llvm/include/llvm/Target/TargetLowering.h
@@ -142,6 +142,13 @@ public:
     CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
   };
 
+  /// Enum that specifies when a multiplication should be expanded.
+  enum class MulExpansionKind {
+    Always,            // Always expand the instruction.
+    OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
+                       // or custom.
+  };
+
   static ISD::NodeType getExtendForContent(BooleanContent Content) {
     switch (Content) {
     case UndefinedBooleanContent:
@@ -190,9 +197,6 @@ public:
     return getPointerTy(DL);
   }
 
-  /// Return true if the select operation is expensive for this target.
-  bool isSelectExpensive() const { return SelectIsExpensive; }
-
   virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
     return true;
   }
@@ -243,11 +247,43 @@ public:
     return true;
   }
 
-  /// Return true if sqrt(x) is as cheap or cheaper than 1 / rsqrt(x)
-  bool isFsqrtCheap() const {
-    return FsqrtIsCheap;
+  /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
+  virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
+    // Default behavior is to replace SQRT(X) with X*RSQRT(X).
+    return false;
   }
 
+  /// Reciprocal estimate status values used by the functions below.
+  enum ReciprocalEstimate : int {
+    Unspecified = -1,
+    Disabled = 0,
+    Enabled = 1
+  };
+
+  /// Return a ReciprocalEstimate enum value for a square root of the given type
+  /// based on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
+
+  /// Return a ReciprocalEstimate enum value for a division of the given type
+  /// based on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
+
+  /// Return the refinement step count for a square root of the given type based
+  /// on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
+
+  /// Return the refinement step count for a division of the given type based
+  /// on the function's attributes. If the operation is not overridden by
+  /// the function's attributes, "Unspecified" is returned and target defaults
+  /// are expected to be used for instruction selection.
+  int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
+
   /// Returns true if target has indicated at least one type should be bypassed.
   bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
 
@@ -321,6 +357,11 @@ public:
     return false;
   }
 
+  /// \brief Return true if ctlz instruction is fast.
+  virtual bool isCtlzFast() const {
+    return false;
+  }
+
   /// Return true if it is safe to transform an integer-domain bitwise operation
   /// into the equivalent floating-point operation. This should be set to true
   /// if the target has IEEE-754-compliant fabs/fneg operations for the input
@@ -329,6 +370,12 @@ public:
     return false;
   }
 
+  /// \brief Return true if it is cheaper to split the store of a merged int val
+  /// from a pair of smaller values into multiple stores.
+  virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
+    return false;
+  }
+
   /// \brief Return if the target supports combining a
   /// chain like:
   /// \code
@@ -360,6 +407,15 @@ public:
     return false;
   }
 
+  /// Return true if the target has a bitwise and-not operation:
+  /// X = ~A & B
+  /// This can be used to simplify select or other instructions.
+  virtual bool hasAndNot(SDValue X) const {
+    // If the target has the more complex version of this operation, assume that
+    // it has this operation too.
+    return hasAndNotCompare(X);
+  }
+
   /// \brief Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).
@@ -586,7 +642,7 @@ public:
   /// Returns true if the operation can trap for the value type.
   ///
   /// VT must be a legal type. By default, we optimistically assume most
-  /// operations don't trap except for divide and remainder.
+  /// operations don't trap except for integer divide and remainder.
   virtual bool canOpTrap(unsigned Op, EVT VT) const;
 
   /// Similar to isShuffleMaskLegal. This is used by Targets can use this to
@@ -1010,10 +1066,15 @@ public:
     return UseUnderscoreLongJmp;
   }
 
-  /// Return integer threshold on number of blocks to use jump tables rather
-  /// than if sequence.
-  int getMinimumJumpTableEntries() const {
-    return MinimumJumpTableEntries;
+  /// Return lower limit for number of blocks in a jump table.
+  unsigned getMinimumJumpTableEntries() const;
+
+  /// Return upper limit for number of entries in a jump table.
+  /// Zero if no limit.
+  unsigned getMaximumJumpTableSize() const;
+
+  virtual bool isJumpTableRelative() const {
+    return TM.isPositionIndependent();
   }
 
   /// If a physical register, this specifies the register that
@@ -1095,8 +1156,12 @@ public:
   /// Should be used only when getIRStackGuard returns nullptr.
   virtual Value *getSSPStackGuardCheck(const Module &M) const;
 
-  /// If the target has a standard location for the unsafe stack pointer,
-  /// returns the address of that location. Otherwise, returns nullptr.
+protected:
+  Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
+                                            bool UseTLS) const;
+
+public:
+  /// Returns the target-specific address of the unsafe stack pointer.
   virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
 
   /// Returns true if a cast between SrcAS and DestAS is a noop.
@@ -1104,6 +1169,12 @@ public:
     return false;
   }
 
+  /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
+  /// are happy to sink it into basic blocks.
+  virtual bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
+    return isNoopAddrSpaceCast(SrcAS, DestAS);
+  }
+
   /// Return true if the pointer arguments to CI should be aligned by aligning
   /// the object whose address is being passed. If so then MinSize is set to the
   /// minimum size the object must be to be aligned and PrefAlign is set to the
@@ -1341,11 +1412,12 @@ protected:
     UseUnderscoreLongJmp = Val;
   }
 
-  /// Indicate the number of blocks to generate jump tables rather than if
-  /// sequence.
-  void setMinimumJumpTableEntries(int Val) {
-    MinimumJumpTableEntries = Val;
-  }
+  /// Indicate the minimum number of blocks to generate jump tables.
+  void setMinimumJumpTableEntries(unsigned Val);
+
+  /// Indicate the maximum number of entries in jump tables.
+  /// Set to zero to generate unlimited jump tables.
+  void setMaximumJumpTableSize(unsigned);
 
   /// If set to a physical register, this specifies the register that
   /// llvm.savestack/llvm.restorestack should save and restore.
@@ -1353,12 +1425,6 @@ protected:
     StackPointerRegisterToSaveRestore = R;
   }
 
-  /// Tells the code generator not to expand operations into sequences that use
-  /// the select operations if possible.
-  void setSelectIsExpensive(bool isExpensive = true) {
-    SelectIsExpensive = isExpensive;
-  }
-
   /// Tells the code generator that the target has multiple (allocatable)
   /// condition registers that can be used to store the results of comparisons
   /// for use by selects and conditional branches. With multiple condition
@@ -1381,10 +1447,6 @@ protected:
   /// control.
   void setJumpIsExpensive(bool isExpensive = true);
 
-  /// Tells the code generator that fsqrt is cheap, and should not be replaced
-  /// with an alternative sequence of instructions.
-  void setFsqrtIsCheap(bool isCheap = true) { FsqrtIsCheap = isCheap; }
-
   /// Tells the code generator that this target supports floating point
   /// exceptions and cares about preserving floating point exception behavior.
   void setHasFloatingPointExceptions(bool FPExceptions = true) {
@@ -1401,21 +1463,9 @@ protected:
   /// that class natively.
   void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
     assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT));
-    AvailableRegClasses.push_back(std::make_pair(VT, RC));
     RegClassForVT[VT.SimpleTy] = RC;
   }
 
-  /// Remove all register classes.
-  void clearRegisterClasses() {
-    std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr);
-
-    AvailableRegClasses.clear();
-  }
-
-  /// \brief Remove all operation actions.
-  void clearOperationActions() {
-  }
-
   /// Return the largest legal super-reg register class of the register class
   /// for the specified type and its associated "cost".
   virtual std::pair<const TargetRegisterClass *, uint8_t>
@@ -1628,6 +1678,10 @@ public:
     return -1;
   }
 
+  virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const {
+    return true;
+  }
+
   /// Return true if the specified immediate is legal icmp immediate, that is
   /// the target has icmp instructions which can compare a register against the
   /// immediate without having to materialize the immediate into a register.
@@ -1739,11 +1793,6 @@ public:
   /// In other words, unless the target performs a post-isel load combining,
   /// this information should not be provided because it will generate more
   /// loads.
-  virtual bool hasPairedLoad(Type * /*LoadedType*/,
-                             unsigned & /*RequiredAligment*/) const {
-    return false;
-  }
-
   virtual bool hasPairedLoad(EVT /*LoadedType*/,
                              unsigned & /*RequiredAligment*/) const {
     return false;
@@ -1893,10 +1942,6 @@ public:
 private:
   const TargetMachine &TM;
 
-  /// Tells the code generator not to expand operations into sequences that use
-  /// the select operations if possible.
-  bool SelectIsExpensive;
-
   /// Tells the code generator that the target has multiple (allocatable)
   /// condition registers that can be used to store the results of comparisons
   /// for use by selects and conditional branches. With multiple condition
@@ -1910,9 +1955,6 @@ private:
   /// combined with "shift" to BitExtract instructions.
   bool HasExtractBitsInsn;
 
-  // Don't expand fsqrt with an approximation based on the inverse sqrt.
-  bool FsqrtIsCheap;
-
   /// Tells the code generator to bypass slow divide or remainder
   /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
   /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
@@ -1938,9 +1980,6 @@ private:
   /// Defaults to false.
   bool UseUnderscoreLongJmp;
 
-  /// Number of blocks threshold to use jump tables.
-  int MinimumJumpTableEntries;
-
   /// Information about the contents of the high-bits in boolean values held in
   /// a type wider than i1. See getBooleanContents.
   BooleanContent BooleanContents;
@@ -2055,7 +2094,6 @@ private:
   LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
 
 private:
-  std::vector<std::pair<MVT, const TargetRegisterClass*> > AvailableRegClasses;
 
   /// Targets can specify ISD nodes that they would like PerformDAGCombine
   /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
@@ -2089,7 +2127,7 @@ protected:
   virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
 
   /// Depth that GatherAllAliases should should continue looking for chain
-  /// dependencies when trying to find a more preferrable chain. As an
+  /// dependencies when trying to find a more preferable chain. As an
   /// approximation, this should be more than the number of consecutive stores
   /// expected to be merged.
   unsigned GatherAllAliasesMaxDepth;
@@ -2175,6 +2213,8 @@ class TargetLowering : public TargetLoweringBase {
   void operator=(const TargetLowering&) = delete;
 
 public:
+  struct DAGCombinerInfo;
+
   /// NOTE: The TargetMachine owns TLOF.
   explicit TargetLowering(const TargetMachine &TM);
 
@@ -2287,6 +2327,16 @@ public:
     /// generalized for targets with other types of implicit widening casts.
     bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
                           const SDLoc &dl);
+
+    /// Helper for SimplifyDemandedBits that can simplify an operation with
+    /// multiple uses.  This function uses TLI.SimplifyDemandedBits to
+    /// simplify Operand \p OpIdx of \p User and then updated \p User with
+    /// the simplified version.  No other uses of \p OpIdx are updated.
+    /// If \p User is the only user of \p OpIdx, this function behaves exactly
+    /// like TLI.SimplifyDemandedBits except that it also updates the DAG by
+    /// calling DCI.CommitTargetLoweringOpt.
+    bool SimplifyDemandedBits(SDNode *User, unsigned OpIdx,
+                              const APInt &Demanded, DAGCombinerInfo &DCI);
   };
 
   /// Look at Op.  At this point, we know that only the DemandedMask bits of the
@@ -2296,9 +2346,17 @@ public:
   /// expression and return a mask of KnownOne and KnownZero bits for the
   /// expression (used to simplify the caller).  The KnownZero/One bits may only
   /// be accurate for those bits in the DemandedMask.
+  /// \p AssumeSingleUse When this paramater is true, this function will
+  ///    attempt to simplify \p Op even if there are multiple uses.
+  ///    Callers are responsible for correctly updating the DAG based on the
+  ///    results of this function, because simply replacing replacing TLO.Old
+  ///    with TLO.New will be incorrect when this paramater is true and TLO.Old
+  ///    has multiple uses.
   bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask,
                             APInt &KnownZero, APInt &KnownOne,
-                            TargetLoweringOpt &TLO, unsigned Depth = 0) const;
+                            TargetLoweringOpt &TLO,
+                            unsigned Depth = 0,
+                            bool AssumeSingleUse = false) const;
 
   /// Determine which of the bits specified in Mask are known to be either zero
   /// or one and return them in the KnownZero/KnownOne bitsets.
@@ -2333,7 +2391,6 @@ public:
     bool isCalledByLegalizer() const { return CalledByLegalizer; }
 
     void AddToWorklist(SDNode *N);
-    void RemoveFromWorklist(SDNode *N);
     SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
@@ -2955,32 +3012,39 @@ public:
   /// Hooks for building estimates in place of slower divisions and square
   /// roots.
 
-  /// Return a reciprocal square root estimate value for the input operand.
-  /// The RefinementSteps output is the number of Newton-Raphson refinement
-  /// iterations required to generate a sufficient (though not necessarily
-  /// IEEE-754 compliant) estimate for the value type.
+  /// Return either a square root or its reciprocal estimate value for the input
+  /// operand.
+  /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+  /// 'Enabled' as set by a potential default override attribute.
+  /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+  /// refinement iterations required to generate a sufficient (though not
+  /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
   /// The boolean UseOneConstNR output is used to select a Newton-Raphson
-  /// algorithm implementation that uses one constant or two constants.
+  /// algorithm implementation that uses either one or two constants.
+  /// The boolean Reciprocal is used to select whether the estimate is for the
+  /// square root of the input operand or the reciprocal of its square root.
   /// A target may choose to implement its own refinement within this function.
   /// If that's true, then return '0' as the number of RefinementSteps to avoid
   /// any further refinement of the estimate.
   /// An empty SDValue return means no estimate sequence can be created.
-  virtual SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                                   unsigned &RefinementSteps,
-                                   bool &UseOneConstNR) const {
+  virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+                                  int Enabled, int &RefinementSteps,
+                                  bool &UseOneConstNR, bool Reciprocal) const {
     return SDValue();
   }
 
   /// Return a reciprocal estimate value for the input operand.
-  /// The RefinementSteps output is the number of Newton-Raphson refinement
-  /// iterations required to generate a sufficient (though not necessarily
-  /// IEEE-754 compliant) estimate for the value type.
+  /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
+  /// 'Enabled' as set by a potential default override attribute.
+  /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
+  /// refinement iterations required to generate a sufficient (though not
+  /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
   /// A target may choose to implement its own refinement within this function.
   /// If that's true, then return '0' as the number of RefinementSteps to avoid
   /// any further refinement of the estimate.
   /// An empty SDValue return means no estimate sequence can be created.
-  virtual SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                                   unsigned &RefinementSteps) const {
+  virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+                                   int Enabled, int &RefinementSteps) const {
     return SDValue();
   }
 
@@ -2988,6 +3052,22 @@ public:
   // Legalization utility functions
   //
 
+  /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
+  /// respectively, each computing an n/2-bit part of the result.
+  /// \param Result A vector that will be filled with the parts of the result
+  ///        in little-endian order.
+  /// \param LL Low bits of the LHS of the MUL.  You can use this parameter
+  ///        if you want to control how low bits are extracted from the LHS.
+  /// \param LH High bits of the LHS of the MUL.  See LL for meaning.
+  /// \param RL Low bits of the RHS of the MUL.  See LL for meaning
+  /// \param RH High bits of the RHS of the MUL.  See LL for meaning.
+  /// \returns true if the node has been expanded, false if it has not
+  bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS,
+                      SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
+                      SelectionDAG &DAG, MulExpansionKind Kind,
+                      SDValue LL = SDValue(), SDValue LH = SDValue(),
+                      SDValue RL = SDValue(), SDValue RH = SDValue()) const;
+
   /// Expand a MUL into two nodes.  One that computes the high bits of
   /// the result and one that computes the low bits.
   /// \param HiLoVT The value type to use for the Lo and Hi nodes.
@@ -2998,9 +3078,9 @@ public:
   /// \param RH High bits of the RHS of the MUL.  See LL for meaning.
   /// \returns true if the node has been expanded. false if it has not
   bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
-                 SelectionDAG &DAG, SDValue LL = SDValue(),
-                 SDValue LH = SDValue(), SDValue RL = SDValue(),
-                 SDValue RH = SDValue()) const;
+                 SelectionDAG &DAG, MulExpansionKind Kind,
+                 SDValue LL = SDValue(), SDValue LH = SDValue(),
+                 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
 
   /// Expand float(f32) to SINT(i64) conversion
   /// \param N Node to expand
@@ -3027,6 +3107,17 @@ public:
   /// possibly more for vectors.
   SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
 
+  /// Increments memory address \p Addr according to the type of the value
+  /// \p DataVT that should be stored. If the data is stored in compressed
+  /// form, the memory address should be incremented according to the number of
+  /// the stored elements. This number is equal to the number of '1's bits
+  /// in the \p Mask.
+  /// \p DataVT is a vector type. \p Mask is a vector value.
+  /// \p DataVT and \p Mask have the same number of vector elements.
+  SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
+                                 EVT DataVT, SelectionDAG &DAG,
+                                 bool IsCompressedMemory) const;
+
   //===--------------------------------------------------------------------===//
   // Instruction Emitting Hooks
   //
@@ -3059,6 +3150,12 @@ public:
   virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
                                           SelectionDAG &DAG) const;
 
+  // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
+  // If we're comparing for equality to zero and isCtlzFast is true, expose the
+  // fact that this can be implemented as a ctlz/srl pair, so that the dag
+  // combiner can fold the new nodes.
+  SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
+
 private:
   SDValue simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
                                ISD::CondCode Cond, DAGCombinerInfo &DCI,
diff --git a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index 189eff4f3953..72bae0a38e65 100644
--- a/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/contrib/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -37,6 +37,9 @@ namespace llvm {
 class TargetLoweringObjectFile : public MCObjectFileInfo {
   MCContext *Ctx;
 
+  /// Name-mangler for global names.
+  Mangler *Mang = nullptr;
+
   TargetLoweringObjectFile(
     const TargetLoweringObjectFile&) = delete;
   void operator=(const TargetLoweringObjectFile&) = delete;
@@ -45,12 +48,19 @@ protected:
   bool SupportIndirectSymViaGOTPCRel;
   bool SupportGOTPCRelWithOffset;
 
+  /// This section contains the static constructor pointer list.
+  MCSection *StaticCtorSection;
+
+  /// This section contains the static destructor pointer list.
+  MCSection *StaticDtorSection;
+
 public:
   MCContext &getContext() const { return *Ctx; }
+  Mangler &getMangler() const { return *Mang; }
 
   TargetLoweringObjectFile()
-      : MCObjectFileInfo(), Ctx(nullptr), SupportIndirectSymViaGOTPCRel(false),
-        SupportGOTPCRelWithOffset(true) {}
+      : MCObjectFileInfo(), Ctx(nullptr), Mang(nullptr),
+        SupportIndirectSymViaGOTPCRel(false), SupportGOTPCRelWithOffset(true) {}
 
   virtual ~TargetLoweringObjectFile();
 
@@ -65,7 +75,7 @@ public:
   /// Emit the module flags that the platform cares about.
   virtual void emitModuleFlags(MCStreamer &Streamer,
                                ArrayRef<Module::ModuleFlagEntry> Flags,
-                               Mangler &Mang, const TargetMachine &TM) const {}
+                               const TargetMachine &TM) const {}
 
   /// Given a constant with the SectionKind, return a section that it should be
   /// placed in.
@@ -76,28 +86,28 @@ public:
 
   /// Classify the specified global variable into a set of target independent
   /// categories embodied in SectionKind.
-  static SectionKind getKindForGlobal(const GlobalValue *GV,
+  static SectionKind getKindForGlobal(const GlobalObject *GO,
                                       const TargetMachine &TM);
 
   /// This method computes the appropriate section to emit the specified global
   /// variable or function definition. This should not be passed external (or
   /// available externally) globals.
-  MCSection *SectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                              Mangler &Mang, const TargetMachine &TM) const;
+  MCSection *SectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+                              const TargetMachine &TM) const;
 
   /// This method computes the appropriate section to emit the specified global
   /// variable or function definition. This should not be passed external (or
   /// available externally) globals.
-  MCSection *SectionForGlobal(const GlobalValue *GV, Mangler &Mang,
+  MCSection *SectionForGlobal(const GlobalObject *GO,
                               const TargetMachine &TM) const {
-    return SectionForGlobal(GV, getKindForGlobal(GV, TM), Mang, TM);
+    return SectionForGlobal(GO, getKindForGlobal(GO, TM), TM);
   }
 
   virtual void getNameWithPrefix(SmallVectorImpl<char> &OutName,
-                                 const GlobalValue *GV, Mangler &Mang,
+                                 const GlobalValue *GV,
                                  const TargetMachine &TM) const;
 
-  virtual MCSection *getSectionForJumpTable(const Function &F, Mangler &Mang,
+  virtual MCSection *getSectionForJumpTable(const Function &F,
                                             const TargetMachine &TM) const;
 
   virtual bool shouldPutJumpTableInFunctionSection(bool UsesLabelDifference,
@@ -105,40 +115,32 @@ public:
 
   /// Targets should implement this method to assign a section to globals with
   /// an explicit section specfied. The implementation of this method can
-  /// assume that GV->hasSection() is true.
+  /// assume that GO->hasSection() is true.
   virtual MCSection *
-  getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                           Mangler &Mang, const TargetMachine &TM) const = 0;
-
-  /// Allow the target to completely override section assignment of a global.
-  virtual const MCSection *getSpecialCasedSectionGlobals(const GlobalValue *GV,
-                                                         SectionKind Kind,
-                                                         Mangler &Mang) const {
-    return nullptr;
-  }
+  getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+                           const TargetMachine &TM) const = 0;
 
   /// Return an MCExpr to use for a reference to the specified global variable
   /// from exception handling information.
-  virtual const MCExpr *
-  getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
-                          Mangler &Mang, const TargetMachine &TM,
-                          MachineModuleInfo *MMI, MCStreamer &Streamer) const;
+  virtual const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                                unsigned Encoding,
+                                                const TargetMachine &TM,
+                                                MachineModuleInfo *MMI,
+                                                MCStreamer &Streamer) const;
 
   /// Return the MCSymbol for a private symbol with global value name as its
   /// base, with the specified suffix.
   MCSymbol *getSymbolWithGlobalValueBase(const GlobalValue *GV,
-                                         StringRef Suffix, Mangler &Mang,
+                                         StringRef Suffix,
                                          const TargetMachine &TM) const;
 
   // The symbol that gets passed to .cfi_personality.
   virtual MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
-                                            Mangler &Mang,
                                             const TargetMachine &TM,
                                             MachineModuleInfo *MMI) const;
 
-  const MCExpr *
-  getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
-                    MCStreamer &Streamer) const;
+  const MCExpr *getTTypeReference(const MCSymbolRefExpr *Sym, unsigned Encoding,
+                                  MCStreamer &Streamer) const;
 
   virtual MCSection *getStaticCtorSection(unsigned Priority,
                                           const MCSymbol *KeySym) const {
@@ -154,9 +156,9 @@ public:
   /// emitting the address in debug info.
   virtual const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const;
 
-  virtual const MCExpr *
-  lowerRelativeReference(const GlobalValue *LHS, const GlobalValue *RHS,
-                         Mangler &Mang, const TargetMachine &TM) const {
+  virtual const MCExpr *lowerRelativeReference(const GlobalValue *LHS,
+                                               const GlobalValue *RHS,
+                                               const TargetMachine &TM) const {
     return nullptr;
   }
 
@@ -181,12 +183,12 @@ public:
     return nullptr;
   }
 
-  virtual void emitLinkerFlagsForGlobal(raw_ostream &OS, const GlobalValue *GV,
-                                        const Mangler &Mang) const {}
+  virtual void emitLinkerFlagsForGlobal(raw_ostream &OS,
+                                        const GlobalValue *GV) const {}
 
 protected:
-  virtual MCSection *SelectSectionForGlobal(const GlobalValue *GV,
-                                            SectionKind Kind, Mangler &Mang,
+  virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO,
+                                            SectionKind Kind,
                                             const TargetMachine &TM) const = 0;
 };
 
diff --git a/contrib/llvm/include/llvm/Target/TargetMachine.h b/contrib/llvm/include/llvm/Target/TargetMachine.h
index 563fef96acfc..f5493283eee6 100644
--- a/contrib/llvm/include/llvm/Target/TargetMachine.h
+++ b/contrib/llvm/include/llvm/Target/TargetMachine.h
@@ -193,12 +193,6 @@ public:
 
   bool shouldPrintMachineCode() const { return Options.PrintMachineCode; }
 
-  /// Returns the default value of asm verbosity.
-  ///
-  bool getAsmVerbosityDefault() const {
-    return Options.MCOptions.AsmVerbose;
-  }
-
   bool getUniqueSectionNames() const { return Options.UniqueSectionNames; }
 
   /// Return true if data objects should be emitted into their own section,
@@ -241,7 +235,8 @@ public:
   virtual bool addPassesToEmitFile(
       PassManagerBase &, raw_pwrite_stream &, CodeGenFileType,
       bool /*DisableVerify*/ = true, AnalysisID /*StartBefore*/ = nullptr,
-      AnalysisID /*StartAfter*/ = nullptr, AnalysisID /*StopAfter*/ = nullptr,
+      AnalysisID /*StartAfter*/ = nullptr, AnalysisID /*StopBefore*/ = nullptr,
+      AnalysisID /*StopAfter*/ = nullptr,
       MachineFunctionInitializer * /*MFInitializer*/ = nullptr) {
     return true;
   }
@@ -266,7 +261,7 @@ public:
 
   void getNameWithPrefix(SmallVectorImpl<char> &Name, const GlobalValue *GV,
                          Mangler &Mang, bool MayAlwaysUsePrivate = false) const;
-  MCSymbol *getSymbol(const GlobalValue *GV, Mangler &Mang) const;
+  MCSymbol *getSymbol(const GlobalValue *GV) const;
 
   /// True if the target uses physical regs at Prolog/Epilog insertion
   /// time. If true (most machines), all vregs must be allocated before
@@ -302,7 +297,8 @@ public:
   bool addPassesToEmitFile(
       PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
       bool DisableVerify = true, AnalysisID StartBefore = nullptr,
-      AnalysisID StartAfter = nullptr, AnalysisID StopAfter = nullptr,
+      AnalysisID StartAfter = nullptr, AnalysisID StopBefore = nullptr,
+      AnalysisID StopAfter = nullptr,
       MachineFunctionInitializer *MFInitializer = nullptr) override;
 
   /// Add passes to the specified pass manager to get machine code emitted with
@@ -312,13 +308,6 @@ public:
   bool addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                          raw_pwrite_stream &OS,
                          bool DisableVerify = true) override;
-
-  /// Add MachineModuleInfo pass to pass manager.
-  MachineModuleInfo &addMachineModuleInfo(PassManagerBase &PM) const;
-
-  /// Add MachineFunctionAnalysis pass to pass manager.
-  void addMachineFunctionAnalysis(PassManagerBase &PM,
-      MachineFunctionInitializer *MFInitializer) const;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Target/TargetOpcodes.def b/contrib/llvm/include/llvm/Target/TargetOpcodes.def
index abab6c7a2a7c..edb9b7350ca7 100644
--- a/contrib/llvm/include/llvm/Target/TargetOpcodes.def
+++ b/contrib/llvm/include/llvm/Target/TargetOpcodes.def
@@ -27,22 +27,22 @@
 
 /// Every instruction defined here must also appear in Target.td.
 ///
-HANDLE_TARGET_OPCODE(PHI, 0)
-HANDLE_TARGET_OPCODE(INLINEASM, 1)
-HANDLE_TARGET_OPCODE(CFI_INSTRUCTION, 2)
-HANDLE_TARGET_OPCODE(EH_LABEL, 3)
-HANDLE_TARGET_OPCODE(GC_LABEL, 4)
+HANDLE_TARGET_OPCODE(PHI)
+HANDLE_TARGET_OPCODE(INLINEASM)
+HANDLE_TARGET_OPCODE(CFI_INSTRUCTION)
+HANDLE_TARGET_OPCODE(EH_LABEL)
+HANDLE_TARGET_OPCODE(GC_LABEL)
 
 /// KILL - This instruction is a noop that is used only to adjust the
 /// liveness of registers. This can be useful when dealing with
 /// sub-registers.
-HANDLE_TARGET_OPCODE(KILL, 5)
+HANDLE_TARGET_OPCODE(KILL)
 
 /// EXTRACT_SUBREG - This instruction takes two operands: a register
 /// that has subregisters, and a subregister index. It returns the
 /// extracted subregister value. This is commonly used to implement
 /// truncation operations on target architectures which support it.
-HANDLE_TARGET_OPCODE(EXTRACT_SUBREG, 6)
+HANDLE_TARGET_OPCODE(EXTRACT_SUBREG)
 
 /// INSERT_SUBREG - This instruction takes three operands: a register that
 /// has subregisters, a register providing an insert value, and a
@@ -50,16 +50,20 @@ HANDLE_TARGET_OPCODE(EXTRACT_SUBREG, 6)
 /// value of the second register inserted. The first register is often
 /// defined by an IMPLICIT_DEF, because it is commonly used to implement
 /// anyext operations on target architectures which support it.
-HANDLE_TARGET_OPCODE(INSERT_SUBREG, 7)
+HANDLE_TARGET_OPCODE(INSERT_SUBREG)
 
 /// IMPLICIT_DEF - This is the MachineInstr-level equivalent of undef.
-HANDLE_TARGET_OPCODE(IMPLICIT_DEF, 8)
+HANDLE_TARGET_OPCODE(IMPLICIT_DEF)
 
-/// SUBREG_TO_REG - This instruction is similar to INSERT_SUBREG except that
-/// the first operand is an immediate integer constant. This constant is
-/// often zero, because it is commonly used to assert that the instruction
-/// defining the register implicitly clears the high bits.
-HANDLE_TARGET_OPCODE(SUBREG_TO_REG, 9)
+/// SUBREG_TO_REG - Assert the value of bits in a super register.
+/// The result of this instruction is the value of the second operand inserted
+/// into the subregister specified by the third operand. All other bits are
+/// assumed to be equal to the bits in the immediate integer constant in the
+/// first operand. This instruction just communicates information; No code
+/// should be generated.
+/// This is typically used after an instruction where the write to a subregister
+/// implicitly cleared the bits in the super registers.
+HANDLE_TARGET_OPCODE(SUBREG_TO_REG)
 
 /// COPY_TO_REGCLASS - This instruction is a placeholder for a plain
 /// register-to-register copy into a specific register class. This is only
@@ -67,10 +71,10 @@ HANDLE_TARGET_OPCODE(SUBREG_TO_REG, 9)
 /// virtual registers have been created for all the instructions, and it's
 /// only needed in cases where the register classes implied by the
 /// instructions are insufficient. It is emitted as a COPY MachineInstr.
-HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS, 10)
+  HANDLE_TARGET_OPCODE(COPY_TO_REGCLASS)
 
 /// DBG_VALUE - a mapping of the llvm.dbg.value intrinsic
-HANDLE_TARGET_OPCODE(DBG_VALUE, 11)
+HANDLE_TARGET_OPCODE(DBG_VALUE)
 
 /// REG_SEQUENCE - This variadic instruction is used to form a register that
 /// represents a consecutive sequence of sub-registers. It's used as a
@@ -83,55 +87,55 @@ HANDLE_TARGET_OPCODE(DBG_VALUE, 11)
 /// e.g. v1027 = REG_SEQUENCE v1024, 3, v1025, 4, v1026, 5
 /// After register coalescing references of v1024 should be replace with
 /// v1027:3, v1025 with v1027:4, etc.
-HANDLE_TARGET_OPCODE(REG_SEQUENCE, 12)
+  HANDLE_TARGET_OPCODE(REG_SEQUENCE)
 
 /// COPY - Target-independent register copy. This instruction can also be
 /// used to copy between subregisters of virtual registers.
-HANDLE_TARGET_OPCODE(COPY, 13)
+  HANDLE_TARGET_OPCODE(COPY)
 
 /// BUNDLE - This instruction represents an instruction bundle. Instructions
 /// which immediately follow a BUNDLE instruction which are marked with
 /// 'InsideBundle' flag are inside the bundle.
-HANDLE_TARGET_OPCODE(BUNDLE, 14)
+HANDLE_TARGET_OPCODE(BUNDLE)
 
 /// Lifetime markers.
-HANDLE_TARGET_OPCODE(LIFETIME_START, 15)
-HANDLE_TARGET_OPCODE(LIFETIME_END, 16)
+HANDLE_TARGET_OPCODE(LIFETIME_START)
+HANDLE_TARGET_OPCODE(LIFETIME_END)
 
 /// A Stackmap instruction captures the location of live variables at its
 /// position in the instruction stream. It is followed by a shadow of bytes
 /// that must lie within the function and not contain another stackmap.
-HANDLE_TARGET_OPCODE(STACKMAP, 17)
+HANDLE_TARGET_OPCODE(STACKMAP)
 
 /// Patchable call instruction - this instruction represents a call to a
 /// constant address, followed by a series of NOPs. It is intended to
 /// support optimizations for dynamic languages (such as javascript) that
 /// rewrite calls to runtimes with more efficient code sequences.
 /// This also implies a stack map.
-HANDLE_TARGET_OPCODE(PATCHPOINT, 18)
+HANDLE_TARGET_OPCODE(PATCHPOINT)
 
 /// This pseudo-instruction loads the stack guard value. Targets which need
 /// to prevent the stack guard value or address from being spilled to the
 /// stack should override TargetLowering::emitLoadStackGuardNode and
 /// additionally expand this pseudo after register allocation.
-HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD, 19)
+HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
 
 /// Call instruction with associated vm state for deoptimization and list
 /// of live pointers for relocation by the garbage collector.  It is
 /// intended to support garbage collection with fully precise relocating
 /// collectors and deoptimizations in either the callee or caller.
-HANDLE_TARGET_OPCODE(STATEPOINT, 20)
+HANDLE_TARGET_OPCODE(STATEPOINT)
 
 /// Instruction that records the offset of a local stack allocation passed to
 /// llvm.localescape. It has two arguments: the symbol for the label and the
 /// frame index of the local stack allocation.
-HANDLE_TARGET_OPCODE(LOCAL_ESCAPE, 21)
+HANDLE_TARGET_OPCODE(LOCAL_ESCAPE)
 
 /// Loading instruction that may page fault, bundled with associated
 /// information on how to handle such a page fault.  It is intended to support
 /// "zero cost" null checks in managed languages by allowing LLVM to fold
 /// comparisons into existing memory operations.
-HANDLE_TARGET_OPCODE(FAULTING_LOAD_OP, 22)
+HANDLE_TARGET_OPCODE(FAULTING_LOAD_OP)
 
 /// Wraps a machine instruction to add patchability constraints.  An
 /// instruction wrapped in PATCHABLE_OP has to either have a minimum
@@ -140,30 +144,220 @@ HANDLE_TARGET_OPCODE(FAULTING_LOAD_OP, 22)
 /// second operand is an immediate denoting the opcode of the original
 /// instruction.  The rest of the operands are the operands of the
 /// original instruction.
-HANDLE_TARGET_OPCODE(PATCHABLE_OP, 23)
+HANDLE_TARGET_OPCODE(PATCHABLE_OP)
 
 /// This is a marker instruction which gets translated into a nop sled, useful
 /// for inserting instrumentation instructions at runtime.
-HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER, 24)
+HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_ENTER)
 
 /// Wraps a return instruction and its operands to enable adding nop sleds
 /// either before or after the return. The nop sleds are useful for inserting
 /// instrumentation instructions at runtime.
-HANDLE_TARGET_OPCODE(PATCHABLE_RET, 25)
+/// The patch here replaces the return instruction.
+HANDLE_TARGET_OPCODE(PATCHABLE_RET)
+
+/// This is a marker instruction which gets translated into a nop sled, useful
+/// for inserting instrumentation instructions at runtime.
+/// The patch here prepends the return instruction.
+/// The same thing as in x86_64 is not possible for ARM because it has multiple
+/// return instructions. Furthermore, CPU allows parametrized and even
+/// conditional return instructions. In the current ARM implementation we are
+/// making use of the fact that currently LLVM doesn't seem to generate
+/// conditional return instructions.
+/// On ARM, the same instruction can be used for popping multiple registers
+/// from the stack and returning (it just pops pc register too), and LLVM
+/// generates it sometimes. So we can't insert the sled between this stack
+/// adjustment and the return without splitting the original instruction into 2
+/// instructions. So on ARM, rather than jumping into the exit trampoline, we
+/// call it, it does the tracing, preserves the stack and returns.
+HANDLE_TARGET_OPCODE(PATCHABLE_FUNCTION_EXIT)
+
+/// Wraps a tail call instruction and its operands to enable adding nop sleds
+/// either before or after the tail exit. We use this as a disambiguation from
+/// PATCHABLE_RET which specifically only works for return instructions.
+HANDLE_TARGET_OPCODE(PATCHABLE_TAIL_CALL)
 
 /// The following generic opcodes are not supposed to appear after ISel.
 /// This is something we might want to relax, but for now, this is convenient
 /// to produce diagnostics.
 
 /// Generic ADD instruction. This is an integer add.
-HANDLE_TARGET_OPCODE(G_ADD, 26)
+HANDLE_TARGET_OPCODE(G_ADD)
 HANDLE_TARGET_OPCODE_MARKER(PRE_ISEL_GENERIC_OPCODE_START, G_ADD)
 
-/// Generic Bitwise-OR instruction.
-HANDLE_TARGET_OPCODE(G_OR, 27)
+/// Generic SUB instruction. This is an integer sub.
+HANDLE_TARGET_OPCODE(G_SUB)
+
+// Generic multiply instruction.
+HANDLE_TARGET_OPCODE(G_MUL)
+
+// Generic signed division instruction.
+HANDLE_TARGET_OPCODE(G_SDIV)
+
+// Generic unsigned division instruction.
+HANDLE_TARGET_OPCODE(G_UDIV)
+
+// Generic signed remainder instruction.
+HANDLE_TARGET_OPCODE(G_SREM)
+
+// Generic unsigned remainder instruction.
+HANDLE_TARGET_OPCODE(G_UREM)
+
+/// Generic bitwise and instruction.
+HANDLE_TARGET_OPCODE(G_AND)
+
+/// Generic bitwise or instruction.
+HANDLE_TARGET_OPCODE(G_OR)
+
+/// Generic bitwise exclusive-or instruction.
+HANDLE_TARGET_OPCODE(G_XOR)
+
+
+/// Generic instruction to materialize the address of an alloca or other
+/// stack-based object.
+HANDLE_TARGET_OPCODE(G_FRAME_INDEX)
+
+/// Generic reference to global value.
+HANDLE_TARGET_OPCODE(G_GLOBAL_VALUE)
+
+/// Generic instruction to extract blocks of bits from the register given
+/// (typically a sub-register COPY after instruction selection).
+HANDLE_TARGET_OPCODE(G_EXTRACT)
+
+/// Generic instruction to insert blocks of bits from the registers given into
+/// the source.
+HANDLE_TARGET_OPCODE(G_INSERT)
+
+/// Generic instruction to paste a variable number of components together into a
+/// larger register.
+HANDLE_TARGET_OPCODE(G_SEQUENCE)
+
+/// Generic pointer to int conversion.
+HANDLE_TARGET_OPCODE(G_PTRTOINT)
+
+/// Generic int to pointer conversion.
+HANDLE_TARGET_OPCODE(G_INTTOPTR)
+
+/// Generic bitcast. The source and destination types must be different, or a
+/// COPY is the relevant instruction.
+HANDLE_TARGET_OPCODE(G_BITCAST)
+
+/// Generic load.
+HANDLE_TARGET_OPCODE(G_LOAD)
+
+/// Generic store.
+HANDLE_TARGET_OPCODE(G_STORE)
+
+/// Generic conditional branch instruction.
+HANDLE_TARGET_OPCODE(G_BRCOND)
+
+/// Generic intrinsic use (without side effects).
+HANDLE_TARGET_OPCODE(G_INTRINSIC)
+
+/// Generic intrinsic use (with side effects).
+HANDLE_TARGET_OPCODE(G_INTRINSIC_W_SIDE_EFFECTS)
+
+/// Generic extension allowing rubbish in high bits.
+HANDLE_TARGET_OPCODE(G_ANYEXT)
+
+/// Generic instruction to discard the high bits of a register. This differs
+/// from (G_EXTRACT val, 0) on its action on vectors: G_TRUNC will truncate
+/// each element individually, G_EXTRACT will typically discard the high
+/// elements of the vector.
+HANDLE_TARGET_OPCODE(G_TRUNC)
+
+/// Generic integer constant.
+HANDLE_TARGET_OPCODE(G_CONSTANT)
+
+/// Generic floating constant.
+HANDLE_TARGET_OPCODE(G_FCONSTANT)
+
+// Generic sign extend
+HANDLE_TARGET_OPCODE(G_SEXT)
+
+// Generic zero extend
+HANDLE_TARGET_OPCODE(G_ZEXT)
+
+// Generic left-shift
+HANDLE_TARGET_OPCODE(G_SHL)
+
+// Generic logical right-shift
+HANDLE_TARGET_OPCODE(G_LSHR)
+
+// Generic arithmetic right-shift
+HANDLE_TARGET_OPCODE(G_ASHR)
+
+/// Generic integer-base comparison, also applicable to vectors of integers.
+HANDLE_TARGET_OPCODE(G_ICMP)
+
+/// Generic floating-point comparison, also applicable to vectors.
+HANDLE_TARGET_OPCODE(G_FCMP)
+
+/// Generic select.
+HANDLE_TARGET_OPCODE(G_SELECT)
+
+/// Generic unsigned add instruction, consuming the normal operands plus a carry
+/// flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_UADDE)
+
+/// Generic unsigned subtract instruction, consuming the normal operands plus a
+/// carry flag, and similarly producing the result and a carry flag.
+HANDLE_TARGET_OPCODE(G_USUBE)
+
+/// Generic signed add instruction, producing the result and a signed overflow
+/// flag.
+HANDLE_TARGET_OPCODE(G_SADDO)
+
+/// Generic signed subtract instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_SSUBO)
+
+/// Generic unsigned multiply instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_UMULO)
+
+/// Generic signed multiply instruction, producing the result and a signed
+/// overflow flag.
+HANDLE_TARGET_OPCODE(G_SMULO)
+
+/// Generic FP addition.
+HANDLE_TARGET_OPCODE(G_FADD)
+
+/// Generic FP subtraction.
+HANDLE_TARGET_OPCODE(G_FSUB)
+
+/// Generic FP multiplication.
+HANDLE_TARGET_OPCODE(G_FMUL)
+
+/// Generic FP division.
+HANDLE_TARGET_OPCODE(G_FDIV)
+
+/// Generic FP remainder.
+HANDLE_TARGET_OPCODE(G_FREM)
+
+/// Generic float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPEXT)
+
+/// Generic float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTRUNC)
+
+/// Generic float to signed-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOSI)
+
+/// Generic float to unsigned-int conversion
+HANDLE_TARGET_OPCODE(G_FPTOUI)
+
+/// Generic signed-int to float conversion
+HANDLE_TARGET_OPCODE(G_SITOFP)
+
+/// Generic unsigned-int to float conversion
+HANDLE_TARGET_OPCODE(G_UITOFP)
+
+/// Generic unsigned-int to float conversion
+HANDLE_TARGET_OPCODE(G_GEP)
 
 /// Generic BRANCH instruction. This is an unconditional branch.
-HANDLE_TARGET_OPCODE(G_BR, 28)
+HANDLE_TARGET_OPCODE(G_BR)
 
 // TODO: Add more generic opcodes as we move along.
 
diff --git a/contrib/llvm/include/llvm/Target/TargetOpcodes.h b/contrib/llvm/include/llvm/Target/TargetOpcodes.h
index f851fc27527b..33df133a4d58 100644
--- a/contrib/llvm/include/llvm/Target/TargetOpcodes.h
+++ b/contrib/llvm/include/llvm/Target/TargetOpcodes.h
@@ -20,7 +20,7 @@ namespace llvm {
 ///
 namespace TargetOpcode {
 enum {
-#define HANDLE_TARGET_OPCODE(OPC, NUM) OPC = NUM,
+#define HANDLE_TARGET_OPCODE(OPC) OPC,
 #define HANDLE_TARGET_OPCODE_MARKER(IDENT, OPC) IDENT = OPC,
 #include "llvm/Target/TargetOpcodes.def"
 };
@@ -32,6 +32,11 @@ static inline bool isPreISelGenericOpcode(unsigned Opcode) {
   return Opcode >= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_START &&
          Opcode <= TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
 }
+
+/// Check whether the given Opcode is a target-specific opcode.
+static inline bool isTargetSpecificOpcode(unsigned Opcode) {
+  return Opcode > TargetOpcode::PRE_ISEL_GENERIC_OPCODE_END;
+}
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetOptions.h b/contrib/llvm/include/llvm/Target/TargetOptions.h
index 57873b4bd0b4..f5134d99b039 100644
--- a/contrib/llvm/include/llvm/Target/TargetOptions.h
+++ b/contrib/llvm/include/llvm/Target/TargetOptions.h
@@ -15,9 +15,7 @@
 #ifndef LLVM_TARGET_TARGETOPTIONS_H
 #define LLVM_TARGET_TARGETOPTIONS_H
 
-#include "llvm/Target/TargetRecip.h"
 #include "llvm/MC/MCTargetOptions.h"
-#include "llvm/MC/MCAsmInfo.h"
 
 namespace llvm {
   class MachineFunction;
@@ -57,6 +55,15 @@ namespace llvm {
     };
   }
 
+  namespace FPDenormal {
+    enum DenormalMode {
+      IEEE,           // IEEE 754 denormal numbers
+      PreserveSign,   // the sign of a flushed-to-zero number is preserved in
+                      // the sign of 0
+      PositiveZero    // denormals are flushed to positive zero
+    };
+  }
+
   enum class EABI {
     Unknown,
     Default, // Default means not specified
@@ -94,6 +101,7 @@ namespace llvm {
     TargetOptions()
         : PrintMachineCode(false), LessPreciseFPMADOption(false),
           UnsafeFPMath(false), NoInfsFPMath(false), NoNaNsFPMath(false),
+          NoTrappingFPMath(false),
           HonorSignDependentRoundingFPMathOption(false), NoZerosInBSS(false),
           GuaranteedTailCallOpt(false), StackAlignmentOverride(0),
           StackSymbolOrdering(true), EnableFastISel(false), UseInitArray(false),
@@ -102,9 +110,10 @@ namespace llvm {
           DataSections(false), UniqueSectionNames(true), TrapUnreachable(false),
           EmulatedTLS(false), EnableIPRA(false),
           FloatABIType(FloatABI::Default),
-          AllowFPOpFusion(FPOpFusion::Standard), Reciprocals(TargetRecip()),
-          JTType(JumpTable::Single), ThreadModel(ThreadModel::POSIX),
+          AllowFPOpFusion(FPOpFusion::Standard),
+          ThreadModel(ThreadModel::POSIX),
           EABIVersion(EABI::Default), DebuggerTuning(DebuggerKind::Default),
+          FPDenormalMode(FPDenormal::IEEE),
           ExceptionModel(ExceptionHandling::None) {}
 
     /// PrintMachineCode - This flag is enabled when the -print-machineinstrs
@@ -144,6 +153,11 @@ namespace llvm {
     /// assume the FP arithmetic arguments and results are never NaNs.
     unsigned NoNaNsFPMath : 1;
 
+    /// NoTrappingFPMath - This flag is enabled when the 
+    /// -enable-no-trapping-fp-math is specified on the command line. This 
+    /// specifies that there are no trap handlers to handle exceptions.
+    unsigned NoTrappingFPMath : 1;
+
     /// HonorSignDependentRoundingFPMath - This returns true when the
     /// -enable-sign-dependent-rounding-fp-math is specified.  If this returns
     /// false (the default), the code generator is allowed to assume that the
@@ -237,13 +251,6 @@ namespace llvm {
     /// the value of this option.
     FPOpFusion::FPOpFusionMode AllowFPOpFusion;
 
-    /// This class encapsulates options for reciprocal-estimate code generation.
-    TargetRecip Reciprocals;
-
-    /// JTType - This flag specifies the type of jump-instruction table to
-    /// create for functions that have the jumptable attribute.
-    JumpTable::JumpTableType JTType;
-
     /// ThreadModel - This flag specifies the type of threading model to assume
     /// for things like atomics
     ThreadModel::Model ThreadModel;
@@ -254,6 +261,10 @@ namespace llvm {
     /// Which debugger to tune for.
     DebuggerKind DebuggerTuning;
 
+    /// FPDenormalMode - This flags specificies which denormal numbers the code
+    /// is permitted to require.
+    FPDenormal::DenormalMode FPDenormalMode;
+
     /// What exception model to use
     ExceptionHandling ExceptionModel;
 
@@ -271,6 +282,7 @@ inline bool operator==(const TargetOptions &LHS,
     ARE_EQUAL(UnsafeFPMath) &&
     ARE_EQUAL(NoInfsFPMath) &&
     ARE_EQUAL(NoNaNsFPMath) &&
+    ARE_EQUAL(NoTrappingFPMath) &&
     ARE_EQUAL(HonorSignDependentRoundingFPMathOption) &&
     ARE_EQUAL(NoZerosInBSS) &&
     ARE_EQUAL(GuaranteedTailCallOpt) &&
@@ -281,11 +293,10 @@ inline bool operator==(const TargetOptions &LHS,
     ARE_EQUAL(EmulatedTLS) &&
     ARE_EQUAL(FloatABIType) &&
     ARE_EQUAL(AllowFPOpFusion) &&
-    ARE_EQUAL(Reciprocals) &&
-    ARE_EQUAL(JTType) &&
     ARE_EQUAL(ThreadModel) &&
     ARE_EQUAL(EABIVersion) &&
     ARE_EQUAL(DebuggerTuning) &&
+    ARE_EQUAL(FPDenormalMode) &&
     ARE_EQUAL(ExceptionModel) &&
     ARE_EQUAL(MCOptions) &&
     ARE_EQUAL(EnableIPRA);
diff --git a/contrib/llvm/include/llvm/Target/TargetRecip.h b/contrib/llvm/include/llvm/Target/TargetRecip.h
deleted file mode 100644
index 309b96079131..000000000000
--- a/contrib/llvm/include/llvm/Target/TargetRecip.h
+++ /dev/null
@@ -1,74 +0,0 @@
-//===--------------------- llvm/Target/TargetRecip.h ------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TARGET_TARGETRECIP_H
-#define LLVM_TARGET_TARGETRECIP_H
-
-#include "llvm/ADT/StringRef.h"
-#include <cstdint>
-#include <map>
-#include <string>
-#include <vector>
-
-namespace llvm {
-
-struct TargetRecip {
-public:
-  TargetRecip();
-
-  /// Initialize all or part of the operations from command-line options or
-  /// a front end.
-  TargetRecip(const std::vector<std::string> &Args);
-
-  /// Set whether a particular reciprocal operation is enabled and how many
-  /// refinement steps are needed when using it. Use "all" to set enablement
-  /// and refinement steps for all operations.
-  void setDefaults(StringRef Key, bool Enable, unsigned RefSteps);
-
-  /// Return true if the reciprocal operation has been enabled by default or
-  /// from the command-line. Return false if the operation has been disabled
-  /// by default or from the command-line.
-  bool isEnabled(StringRef Key) const;
-
-  /// Return the number of iterations necessary to refine the
-  /// the result of a machine instruction for the given reciprocal operation.
-  unsigned getRefinementSteps(StringRef Key) const;
-
-  bool operator==(const TargetRecip &Other) const;
-
-private:
-  enum {
-    Uninitialized = -1
-  };
-
-  struct RecipParams {
-    int8_t Enabled;
-    int8_t RefinementSteps;
-
-    RecipParams() : Enabled(Uninitialized), RefinementSteps(Uninitialized) {}
-  };
-
-  std::map<StringRef, RecipParams> RecipMap;
-  typedef std::map<StringRef, RecipParams>::iterator RecipIter;
-  typedef std::map<StringRef, RecipParams>::const_iterator ConstRecipIter;
-
-  bool parseGlobalParams(const std::string &Arg);
-  void parseIndividualParams(const std::vector<std::string> &Args);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_TARGET_TARGETRECIP_H
diff --git a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
index e5a6c8ed2f2d..3080e9a32c3a 100644
--- a/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetRegisterInfo.h
@@ -17,6 +17,7 @@
 #define LLVM_TARGET_TARGETREGISTERINFO_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/IR/CallingConv.h"
@@ -35,23 +36,6 @@ class VirtRegMap;
 class raw_ostream;
 class LiveRegMatrix;
 
-/// A bitmask representing the covering of a register with sub-registers.
-///
-/// This is typically used to track liveness at sub-register granularity.
-/// Lane masks for sub-register indices are similar to register units for
-/// physical registers. The individual bits in a lane mask can't be assigned
-/// any specific meaning. They can be used to check if two sub-register
-/// indices overlap.
-///
-/// Iff the target has a register such that:
-///
-///   getSubReg(Reg, A) overlaps getSubReg(Reg, B)
-///
-/// then:
-///
-///   (getSubRegIndexLaneMask(A) & getSubRegIndexLaneMask(B)) != 0
-typedef unsigned LaneBitmask;
-
 class TargetRegisterClass {
 public:
   typedef const MCPhysReg* iterator;
@@ -87,6 +71,11 @@ public:
   /// Return the number of registers in this class.
   unsigned getNumRegs() const { return MC->getNumRegs(); }
 
+  iterator_range<SmallVectorImpl<MCPhysReg>::const_iterator>
+  getRegisters() const {
+    return make_range(MC->begin(), MC->end());
+  }
+
   /// Return the specified register in the class.
   unsigned getRegister(unsigned i) const {
     return MC->getRegister(i);
@@ -263,7 +252,7 @@ private:
   const LaneBitmask *SubRegIndexLaneMasks;
 
   regclass_iterator RegClassBegin, RegClassEnd;   // List of regclasses
-  unsigned CoveringLanes;
+  LaneBitmask CoveringLanes;
 
 protected:
   TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
@@ -271,7 +260,7 @@ protected:
                      regclass_iterator RegClassEnd,
                      const char *const *SRINames,
                      const LaneBitmask *SRILaneMasks,
-                     unsigned CoveringLanes);
+                     LaneBitmask CoveringLanes);
   virtual ~TargetRegisterInfo();
 public:
 
@@ -441,11 +430,6 @@ public:
   virtual const MCPhysReg*
   getCalleeSavedRegs(const MachineFunction *MF) const = 0;
 
-  virtual const MCPhysReg*
-  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const {
-    return nullptr;
-  }
-
   /// Return a mask of call-preserved registers for the given calling convention
   /// on the current function. The mask should include all call-preserved
   /// aliases. This is used by the register allocator to determine which
@@ -485,10 +469,20 @@ public:
 
   /// Returns a bitset indexed by physical register number indicating if a
   /// register is a special register that has particular uses and should be
-  /// considered unavailable at all times, e.g. SP, RA. This is
-  /// used by register scavenger to determine what registers are free.
+  /// considered unavailable at all times, e.g. stack pointer, return address.
+  /// A reserved register:
+  /// - is not allocatable
+  /// - is considered always live
+  /// - is ignored by liveness tracking
+  /// It is often necessary to reserve the super registers of a reserved
+  /// register as well, to avoid them getting allocated indirectly. You may use
+  /// markSuperRegs() and checkAllSuperRegsMarked() in this case.
   virtual BitVector getReservedRegs(const MachineFunction &MF) const = 0;
 
+  /// Returns true if PhysReg is unallocatable and constant throughout the
+  /// function.  Used by MachineRegisterInfo::isConstantPhysReg().
+  virtual bool isConstantPhysReg(unsigned PhysReg) const { return false; }
+
   /// Prior to adding the live-out mask to a stackmap or patchpoint
   /// instruction, provide the target the opportunity to adjust it (mainly to
   /// remove pseudo-registers that should be ignored).
@@ -512,7 +506,7 @@ public:
 
   // For a copy-like instruction that defines a register of class DefRC with
   // subreg index DefSubReg, reading from another source with class SrcRC and
-  // subregister SrcSubReg return true if this is a preferrable copy
+  // subregister SrcSubReg return true if this is a preferable copy
   // instruction or an earlier use should be used.
   virtual bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC,
                                     unsigned DefSubReg,
@@ -808,6 +802,13 @@ public:
     return false;
   }
 
+  /// Returns true if the target requires using the RegScavenger directly for
+  /// frame elimination despite using requiresFrameIndexScavenging.
+  virtual bool requiresFrameIndexReplacementScavenging(
+      const MachineFunction &MF) const {
+    return false;
+  }
+
   /// Returns true if the target wants the LocalStackAllocation pass to be run
   /// and virtual base registers used for more efficient stack access.
   virtual bool requiresVirtualBaseRegisters(const MachineFunction &MF) const {
@@ -930,6 +931,14 @@ public:
   /// getFrameRegister - This method should return the register used as a base
   /// for values allocated in the current stack frame.
   virtual unsigned getFrameRegister(const MachineFunction &MF) const = 0;
+
+  /// Mark a register and all its aliases as reserved in the given set.
+  void markSuperRegs(BitVector &RegisterSet, unsigned Reg) const;
+
+  /// Returns true if for every register in the set all super registers are part
+  /// of the set as well.
+  bool checkAllSuperRegsMarked(const BitVector &RegisterSet,
+      ArrayRef<MCPhysReg> Exceptions = ArrayRef<MCPhysReg>()) const;
 };
 
 
@@ -1115,9 +1124,6 @@ Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI);
 /// registers on a \ref raw_ostream.
 Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI);
 
-/// Create Printable object to print LaneBitmasks on a \ref raw_ostream.
-Printable PrintLaneMask(LaneBitmask LaneMask);
-
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
index 88375f77e230..4ddf7d77a23a 100644
--- a/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/contrib/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -450,10 +450,10 @@ def ftrunc     : SDNode<"ISD::FTRUNC"     , SDTFPUnaryOp>;
 def fceil      : SDNode<"ISD::FCEIL"      , SDTFPUnaryOp>;
 def ffloor     : SDNode<"ISD::FFLOOR"     , SDTFPUnaryOp>;
 def fnearbyint : SDNode<"ISD::FNEARBYINT" , SDTFPUnaryOp>;
-def frnd       : SDNode<"ISD::FROUND"     , SDTFPUnaryOp>;
+def fround     : SDNode<"ISD::FROUND"     , SDTFPUnaryOp>;
 
-def fround     : SDNode<"ISD::FP_ROUND"   , SDTFPRoundOp>;
-def fextend    : SDNode<"ISD::FP_EXTEND"  , SDTFPExtendOp>;
+def fpround    : SDNode<"ISD::FP_ROUND"   , SDTFPRoundOp>;
+def fpextend   : SDNode<"ISD::FP_EXTEND"  , SDTFPExtendOp>;
 def fcopysign  : SDNode<"ISD::FCOPYSIGN"  , SDTFPSignOp>;
 
 def sint_to_fp : SDNode<"ISD::SINT_TO_FP" , SDTIntToFPOp>;
@@ -1165,10 +1165,12 @@ class Pat<dag pattern, dag result> : Pattern<pattern, [result]>;
 // e.g. X86 addressing mode - def addr : ComplexPattern<4, "SelectAddr", [add]>;
 //
 class ComplexPattern<ValueType ty, int numops, string fn,
-                     list<SDNode> roots = [], list<SDNodeProperty> props = []> {
+                     list<SDNode> roots = [], list<SDNodeProperty> props = [],
+                     int complexity = -1> {
   ValueType Ty = ty;
   int NumOperands = numops;
   string SelectFunc = fn;
   list<SDNode> RootNodes = roots;
   list<SDNodeProperty> Properties = props;
+  int Complexity = complexity;
 }
diff --git a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
index b929070484f9..bf4331383cb0 100644
--- a/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
+++ b/contrib/llvm/include/llvm/Target/TargetSubtargetInfo.h
@@ -25,6 +25,8 @@ namespace llvm {
 
 class CallLowering;
 class DataLayout;
+class InstructionSelector;
+class LegalizerInfo;
 class MachineFunction;
 class MachineInstr;
 class RegisterBankInfo;
@@ -69,6 +71,8 @@ public:
 
   virtual ~TargetSubtargetInfo();
 
+  virtual bool isXRaySupported() const { return false; }
+
   // Interfaces to the major aspects of target machine information:
   //
   // -- Instruction opcode and operand information
@@ -88,12 +92,23 @@ public:
     return nullptr;
   }
   virtual const CallLowering *getCallLowering() const { return nullptr; }
+
+  // FIXME: This lets targets specialize the selector by subtarget (which lets
+  // us do things like a dedicated avx512 selector).  However, we might want
+  // to also specialize selectors by MachineFunction, which would let us be
+  // aware of optsize/optnone and such.
+  virtual const InstructionSelector *getInstructionSelector() const {
+    return nullptr;
+  }
+
   /// Target can subclass this hook to select a different DAG scheduler.
   virtual RegisterScheduler::FunctionPassCtor
       getDAGScheduler(CodeGenOpt::Level) const {
     return nullptr;
   }
 
+  virtual const LegalizerInfo *getLegalizerInfo() const { return nullptr; }
+
   /// getRegisterInfo - If register information is available, return it.  If
   /// not, return null.
   ///
@@ -176,6 +191,12 @@ public:
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
   }
 
+  // \brief Provide an ordered list of schedule DAG mutations for the machine
+  // pipeliner.
+  virtual void getSMSMutations(
+      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+  }
+
   // For use with PostRAScheduling: get the minimum optimization level needed
   // to enable post-RA scheduling.
   virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
@@ -203,6 +224,8 @@ public:
   }
 
   /// Enable tracking of subregister liveness in register allocator.
+  /// Please use MachineRegisterInfo::subRegLivenessEnabled() instead where
+  /// possible.
   virtual bool enableSubRegLiveness() const { return false; }
 };
 
diff --git a/contrib/llvm/include/llvm/Transforms/Coroutines.h b/contrib/llvm/include/llvm/Transforms/Coroutines.h
new file mode 100644
index 000000000000..51beb44fdc56
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Coroutines.h
@@ -0,0 +1,38 @@
+//===-- Coroutines.h - Coroutine Transformations ----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Declare accessor functions for coroutine lowering passes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_COROUTINES_H
+#define LLVM_TRANSFORMS_COROUTINES_H
+
+namespace llvm {
+
+class Pass;
+class PassManagerBuilder;
+
+/// Add all coroutine passes to appropriate extension points.
+void addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder);
+
+/// Lower coroutine intrinsics that are not needed by later passes.
+Pass *createCoroEarlyPass();
+
+/// Split up coroutines into multiple functions driving their state machines.
+Pass *createCoroSplitPass();
+
+/// Analyze coroutines use sites, devirtualize resume/destroy calls and elide
+/// heap allocation for coroutine frame where possible.
+Pass *createCoroElidePass();
+
+/// Lower all remaining coroutine intrinsics.
+Pass *createCoroCleanupPass();
+
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Transforms/GCOVProfiler.h b/contrib/llvm/include/llvm/Transforms/GCOVProfiler.h
index f6521901a33e..66bd75c88e24 100644
--- a/contrib/llvm/include/llvm/Transforms/GCOVProfiler.h
+++ b/contrib/llvm/include/llvm/Transforms/GCOVProfiler.h
@@ -21,7 +21,7 @@ namespace llvm {
 class GCOVProfilerPass : public PassInfoMixin<GCOVProfilerPass> {
 public:
   GCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()) : GCOVOpts(Options) { }
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
 private:
   GCOVOptions GCOVOpts;
diff --git a/contrib/llvm/include/llvm/Transforms/IPO.h b/contrib/llvm/include/llvm/Transforms/IPO.h
index f6731884870c..4bebc863b4a9 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO.h
@@ -20,6 +20,7 @@
 
 namespace llvm {
 
+struct InlineParams;
 class StringRef;
 class ModuleSummaryIndex;
 class ModulePass;
@@ -27,6 +28,7 @@ class Pass;
 class Function;
 class BasicBlock;
 class GlobalValue;
+class raw_ostream;
 
 //===----------------------------------------------------------------------===//
 //
@@ -42,6 +44,10 @@ ModulePass *createStripSymbolsPass(bool OnlyDebugInfo = false);
 //
 ModulePass *createStripNonDebugSymbolsPass();
 
+/// This function returns a new pass that downgrades the debug info in the
+/// module to line tables only.
+ModulePass *createStripNonLineTableDebugInfoPass();
+
 //===----------------------------------------------------------------------===//
 //
 // These pass removes llvm.dbg.declare intrinsics.
@@ -89,7 +95,7 @@ ModulePass *createGVExtractionPass(std::vector<GlobalValue*>& GVs, bool
 
 //===----------------------------------------------------------------------===//
 /// This pass performs iterative function importing from other modules.
-Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr);
+Pass *createFunctionImportPass();
 
 //===----------------------------------------------------------------------===//
 /// createFunctionInliningPass - Return a new pass object that uses a heuristic
@@ -103,12 +109,7 @@ Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr);
 Pass *createFunctionInliningPass();
 Pass *createFunctionInliningPass(int Threshold);
 Pass *createFunctionInliningPass(unsigned OptLevel, unsigned SizeOptLevel);
-
-//===----------------------------------------------------------------------===//
-/// createAlwaysInlinerPass - Return a new pass object that inlines only
-/// functions that are marked as "always_inline".
-Pass *createAlwaysInlinerPass();
-Pass *createAlwaysInlinerPass(bool InsertLifetime);
+Pass *createFunctionInliningPass(InlineParams &Params);
 
 //===----------------------------------------------------------------------===//
 /// createPruneEHPass - Return a new pass object which transforms invoke
@@ -225,12 +226,19 @@ ModulePass *createCrossDSOCFIPass();
 /// metadata.
 ModulePass *createWholeProgramDevirtPass();
 
+/// This pass splits globals into pieces for the benefit of whole-program
+/// devirtualization and control-flow integrity.
+ModulePass *createGlobalSplitPass();
+
 //===----------------------------------------------------------------------===//
 // SampleProfilePass - Loads sample profile data from disk and generates
 // IR metadata to reflect the profile.
 ModulePass *createSampleProfileLoaderPass();
 ModulePass *createSampleProfileLoaderPass(StringRef Name);
 
+/// Write ThinLTO-ready bitcode to Str.
+ModulePass *createWriteThinLTOBitcodePass(raw_ostream &Str);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h b/contrib/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
new file mode 100644
index 000000000000..15c80357e4a8
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/AlwaysInliner.h
@@ -0,0 +1,40 @@
+//===-- AlwaysInliner.h - Pass to inline "always_inline" functions --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Provides passes to inlining "always_inline" functions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H
+#define LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Inlines functions marked as "always_inline".
+///
+/// Note that this does not inline call sites marked as always_inline and does
+/// not delete the functions even when all users are inlined. The normal
+/// inliner should be used to handle call site inlining, this pass's goal is to
+/// be the simplest possible pass to remove always_inline function definitions'
+/// uses by inlining them. The \c GlobalDCE pass can be used to remove these
+/// functions once all users are gone.
+struct AlwaysInlinerPass : PassInfoMixin<AlwaysInlinerPass> {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
+};
+
+/// Create a legacy pass manager instance of a pass to inline and remove
+/// functions marked as "always_inline".
+Pass *createAlwaysInlinerLegacyPass(bool InsertLifetime = true);
+
+}
+
+#endif // LLVM_TRANSFORMS_IPO_ALWAYSINLINER_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h b/contrib/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h
index 409604a7f330..0979f5b79e86 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/CrossDSOCFI.h
@@ -21,7 +21,7 @@
 namespace llvm {
 class CrossDSOCFIPass : public PassInfoMixin<CrossDSOCFIPass> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 }
 #endif // LLVM_TRANSFORMS_IPO_CROSSDSOCFI_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
index c44cc43fc0f6..ee45f35bf11b 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionAttrs.h
@@ -30,7 +30,8 @@ namespace llvm {
 /// attribute. It also discovers function arguments that are not captured by
 /// the function and marks them with the nocapture attribute.
 struct PostOrderFunctionAttrsPass : PassInfoMixin<PostOrderFunctionAttrsPass> {
-  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM);
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
 };
 
 /// Create a legacy pass manager instance of a pass to compute function attrs
@@ -50,7 +51,7 @@ Pass *createPostOrderFunctionAttrsLegacyPass();
 class ReversePostOrderFunctionAttrsPass
     : public PassInfoMixin<ReversePostOrderFunctionAttrsPass> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
index ba5db2b5c739..d7acbe883c5d 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/FunctionImport.h
@@ -13,6 +13,8 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Error.h"
 
 #include <functional>
 #include <map>
@@ -42,25 +44,34 @@ public:
   /// The set contains an entry for every global value the module exports.
   typedef std::unordered_set<GlobalValue::GUID> ExportSetTy;
 
+  /// A function of this type is used to load modules referenced by the index.
+  typedef std::function<Expected<std::unique_ptr<Module>>(StringRef Identifier)>
+      ModuleLoaderTy;
+
   /// Create a Function Importer.
-  FunctionImporter(
-      const ModuleSummaryIndex &Index,
-      std::function<std::unique_ptr<Module>(StringRef Identifier)> ModuleLoader)
+  FunctionImporter(const ModuleSummaryIndex &Index, ModuleLoaderTy ModuleLoader)
       : Index(Index), ModuleLoader(std::move(ModuleLoader)) {}
 
   /// Import functions in Module \p M based on the supplied import list.
   /// \p ForceImportReferencedDiscardableSymbols will set the ModuleLinker in
   /// a mode where referenced discarable symbols in the source modules will be
   /// imported as well even if they are not present in the ImportList.
-  bool importFunctions(Module &M, const ImportMapTy &ImportList,
-                       bool ForceImportReferencedDiscardableSymbols = false);
+  Expected<bool>
+  importFunctions(Module &M, const ImportMapTy &ImportList,
+                  bool ForceImportReferencedDiscardableSymbols = false);
 
 private:
   /// The summaries index used to trigger importing.
   const ModuleSummaryIndex &Index;
 
   /// Factory function to load a Module for a given identifier
-  std::function<std::unique_ptr<Module>(StringRef Identifier)> ModuleLoader;
+  ModuleLoaderTy ModuleLoader;
+};
+
+/// The function importing pass
+class FunctionImportPass : public PassInfoMixin<FunctionImportPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// Compute all the imports and exports for every module in the Index.
@@ -102,12 +113,13 @@ void ComputeCrossModuleImportForModule(
 void gatherImportedSummariesForModule(
     StringRef ModulePath,
     const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-    const StringMap<FunctionImporter::ImportMapTy> &ImportLists,
+    const FunctionImporter::ImportMapTy &ImportList,
     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex);
 
+/// Emit into \p OutputFilename the files module \p ModulePath will import from.
 std::error_code
 EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename,
-                 const StringMap<FunctionImporter::ImportMapTy> &ImportLists);
+                 const FunctionImporter::ImportMapTy &ModuleImports);
 
 /// Resolve WeakForLinker values in \p TheModule based on the information
 /// recorded in the summaries during global summary-based analysis.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/GlobalOpt.h b/contrib/llvm/include/llvm/Transforms/IPO/GlobalOpt.h
index 5a25a6db4390..ab9116810be1 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/GlobalOpt.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/GlobalOpt.h
@@ -24,7 +24,7 @@ namespace llvm {
 /// Optimize globals that never have their address taken.
 class GlobalOptPass : public PassInfoMixin<GlobalOptPass> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 }
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/GlobalSplit.h b/contrib/llvm/include/llvm/Transforms/IPO/GlobalSplit.h
new file mode 100644
index 000000000000..fb2c2d27338e
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/GlobalSplit.h
@@ -0,0 +1,30 @@
+//===- GlobalSplit.h - global variable splitter -----------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass uses inrange annotations on GEP indices to split globals where
+// beneficial. Clang currently attaches these annotations to references to
+// virtual table globals under the Itanium ABI for the benefit of the
+// whole-program virtual call optimization and control flow integrity passes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_GLOBALSPLIT_H
+#define LLVM_TRANSFORMS_IPO_GLOBALSPLIT_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+/// Pass to perform split of global variables.
+class GlobalSplitPass : public PassInfoMixin<GlobalSplitPass> {
+public:
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+}
+#endif // LLVM_TRANSFORMS_IPO_GLOBALSPLIT_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h b/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
index f5cbf9eb0613..54e1c243ae27 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/InferFunctionAttrs.h
@@ -24,7 +24,7 @@ namespace llvm {
 /// A pass which infers function attributes from the names and signatures of
 /// function declarations in a module.
 struct InferFunctionAttrsPass : PassInfoMixin<InferFunctionAttrsPass> {
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// Create a legacy pass manager instance of a pass to infer function
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/Inliner.h b/contrib/llvm/include/llvm/Transforms/IPO/Inliner.h
new file mode 100644
index 000000000000..b3ca5156e388
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/IPO/Inliner.h
@@ -0,0 +1,108 @@
+//===- Inliner.h - Inliner pass and infrastructure --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_INLINER_H
+#define LLVM_TRANSFORMS_IPO_INLINER_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+
+namespace llvm {
+class AssumptionCacheTracker;
+class CallSite;
+class DataLayout;
+class InlineCost;
+class OptimizationRemarkEmitter;
+class ProfileSummaryInfo;
+
+/// This class contains all of the helper code which is used to perform the
+/// inlining operations that do not depend on the policy. It contains the core
+/// bottom-up inlining infrastructure that specific inliner passes use.
+struct LegacyInlinerBase : public CallGraphSCCPass {
+  explicit LegacyInlinerBase(char &ID);
+  explicit LegacyInlinerBase(char &ID, bool InsertLifetime);
+
+  /// For this class, we declare that we require and preserve the call graph.
+  /// If the derived class implements this method, it should always explicitly
+  /// call the implementation here.
+  void getAnalysisUsage(AnalysisUsage &Info) const override;
+
+  bool doInitialization(CallGraph &CG) override;
+
+  /// Main run interface method, this implements the interface required by the
+  /// Pass class.
+  bool runOnSCC(CallGraphSCC &SCC) override;
+
+  using llvm::Pass::doFinalization;
+  /// Remove now-dead linkonce functions at the end of processing to avoid
+  /// breaking the SCC traversal.
+  bool doFinalization(CallGraph &CG) override;
+
+  /// This method must be implemented by the subclass to determine the cost of
+  /// inlining the specified call site.  If the cost returned is greater than
+  /// the current inline threshold, the call site is not inlined.
+  virtual InlineCost getInlineCost(CallSite CS) = 0;
+
+  /// Remove dead functions.
+  ///
+  /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag
+  /// which restricts it to deleting functions with an 'AlwaysInline'
+  /// attribute. This is useful for the InlineAlways pass that only wants to
+  /// deal with that subset of the functions.
+  bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
+
+  /// This function performs the main work of the pass.  The default of
+  /// Inlinter::runOnSCC() calls skipSCC() before calling this method, but
+  /// derived classes which cannot be skipped can override that method and call
+  /// this function unconditionally.
+  bool inlineCalls(CallGraphSCC &SCC);
+
+private:
+  // Insert @llvm.lifetime intrinsics.
+  bool InsertLifetime;
+
+protected:
+  AssumptionCacheTracker *ACT;
+  ProfileSummaryInfo *PSI;
+  ImportedFunctionsInliningStatistics ImportedFunctionsStats;
+};
+
+/// The inliner pass for the new pass manager.
+///
+/// This pass wires together the inlining utilities and the inline cost
+/// analysis into a CGSCC pass. It considers every call in every function in
+/// the SCC and tries to inline if profitable. It can be tuned with a number of
+/// parameters to control what cost model is used and what tradeoffs are made
+/// when making the decision.
+///
+/// It should be noted that the legacy inliners do considerably more than this
+/// inliner pass does. They provide logic for manually merging allocas, and
+/// doing considerable DCE including the DCE of dead functions. This pass makes
+/// every attempt to be simpler. DCE of functions requires complex reasoning
+/// about comdat groups, etc. Instead, it is expected that other more focused
+/// passes be composed to achieve the same end result.
+class InlinerPass : public PassInfoMixin<InlinerPass> {
+public:
+  InlinerPass(InlineParams Params = getInlineParams())
+      : Params(std::move(Params)) {}
+
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
+                        LazyCallGraph &CG, CGSCCUpdateResult &UR);
+
+private:
+  InlineParams Params;
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h b/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
deleted file mode 100644
index 59e10608a9ba..000000000000
--- a/contrib/llvm/include/llvm/Transforms/IPO/InlinerPass.h
+++ /dev/null
@@ -1,94 +0,0 @@
-//===- InlinerPass.h - Code common to all inliners --------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a simple policy-based bottom-up inliner.  This file
-// implements all of the boring mechanics of the bottom-up inlining, while the
-// subclass determines WHAT to inline, which is the much more interesting
-// component.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_IPO_INLINERPASS_H
-#define LLVM_TRANSFORMS_IPO_INLINERPASS_H
-
-#include "llvm/Analysis/CallGraphSCCPass.h"
-
-namespace llvm {
-class AssumptionCacheTracker;
-class CallSite;
-class DataLayout;
-class InlineCost;
-class ProfileSummaryInfo;
-template <class PtrType, unsigned SmallSize> class SmallPtrSet;
-
-/// Inliner - This class contains all of the helper code which is used to
-/// perform the inlining operations that do not depend on the policy.
-///
-struct Inliner : public CallGraphSCCPass {
-  explicit Inliner(char &ID);
-  explicit Inliner(char &ID, bool InsertLifetime);
-
-  /// getAnalysisUsage - For this class, we declare that we require and preserve
-  /// the call graph.  If the derived class implements this method, it should
-  /// always explicitly call the implementation here.
-  void getAnalysisUsage(AnalysisUsage &Info) const override;
-
-  // Main run interface method, this implements the interface required by the
-  // Pass class.
-  bool runOnSCC(CallGraphSCC &SCC) override;
-
-  using llvm::Pass::doFinalization;
-  // doFinalization - Remove now-dead linkonce functions at the end of
-  // processing to avoid breaking the SCC traversal.
-  bool doFinalization(CallGraph &CG) override;
-
-  /// getInlineCost - This method must be implemented by the subclass to
-  /// determine the cost of inlining the specified call site.  If the cost
-  /// returned is greater than the current inline threshold, the call site is
-  /// not inlined.
-  ///
-  virtual InlineCost getInlineCost(CallSite CS) = 0;
-
-  /// removeDeadFunctions - Remove dead functions.
-  ///
-  /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag
-  /// which restricts it to deleting functions with an 'AlwaysInline'
-  /// attribute. This is useful for the InlineAlways pass that only wants to
-  /// deal with that subset of the functions.
-  bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false);
-
-  /// This function performs the main work of the pass.  The default
-  /// of Inlinter::runOnSCC() calls skipSCC() before calling this method, but
-  /// derived classes which cannot be skipped can override that method and
-  /// call this function unconditionally.
-  bool inlineCalls(CallGraphSCC &SCC);
-
-private:
-  // InsertLifetime - Insert @llvm.lifetime intrinsics.
-  bool InsertLifetime;
-
-  /// shouldInline - Return true if the inliner should attempt to
-  /// inline at the given CallSite.
-  bool shouldInline(CallSite CS);
-  /// Return true if inlining of CS can block the caller from being
-  /// inlined which is proved to be more beneficial. \p IC is the
-  /// estimated inline cost associated with callsite \p CS.
-  /// \p TotalAltCost will be set to the estimated cost of inlining the caller
-  /// if \p CS is suppressed for inlining.
-  bool shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
-                        int &TotalAltCost);
-
-protected:
-  AssumptionCacheTracker *ACT;
-  ProfileSummaryInfo *PSI;
-};
-
-} // End llvm namespace
-
-#endif
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/Internalize.h b/contrib/llvm/include/llvm/Transforms/IPO/Internalize.h
index ba1b06877d3a..45d676d9f77b 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/Internalize.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/Internalize.h
@@ -63,7 +63,7 @@ public:
   /// internalizing a function (by removing any edge from the "external node")
   bool internalizeModule(Module &TheModule, CallGraph *CG = nullptr);
 
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// Helper function to internalize functions and variables in a Module.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h b/contrib/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
index 93d4fb94e2c4..23c59c199a3b 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/LowerTypeTests.h
@@ -205,7 +205,7 @@ struct ByteArrayBuilder {
 
 class LowerTypeTestsPass : public PassInfoMixin<LowerTypeTestsPass> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/PartialInlining.h b/contrib/llvm/include/llvm/Transforms/IPO/PartialInlining.h
index 48eb1e30a191..15407fc36a22 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/PartialInlining.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/PartialInlining.h
@@ -24,9 +24,6 @@ namespace llvm {
 class PartialInlinerPass : public PassInfoMixin<PartialInlinerPass> {
 public:
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &);
-
-private:
-  Function *unswitchFunction(Function *F);
 };
 }
 #endif // LLVM_TRANSFORMS_IPO_PARTIALINLINING_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
index 4f483deeefe5..9f9ce467337e 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/PassManagerBuilder.h
@@ -100,6 +100,11 @@ public:
     /// peephole optimizations similar to the instruction combiner. These passes
     /// will be inserted after each instance of the instruction combiner pass.
     EP_Peephole,
+
+    /// EP_CGSCCOptimizerLate - This extension point allows adding CallGraphSCC
+    /// passes at the end of the main CallGraphSCC passes and before any
+    /// function simplification passes run by CGPassManager.
+    EP_CGSCCOptimizerLate,
   };
 
   /// The Optimization Level - Specify the basic optimization level.
@@ -119,9 +124,6 @@ public:
   /// added to the per-module passes.
   Pass *Inliner;
 
-  /// The module summary index to use for function importing.
-  const ModuleSummaryIndex *ModuleSummary;
-
   bool DisableTailCalls;
   bool DisableUnitAtATime;
   bool DisableUnrollLoops;
@@ -130,6 +132,7 @@ public:
   bool LoopVectorize;
   bool RerollLoops;
   bool LoadCombine;
+  bool NewGVN;
   bool DisableGVNLoadPRE;
   bool VerifyInput;
   bool VerifyOutput;
@@ -138,10 +141,14 @@ public:
   bool PrepareForThinLTO;
   bool PerformThinLTO;
 
+  /// Enable profile instrumentation pass.
+  bool EnablePGOInstrGen;
   /// Profile data file name that the instrumentation will be written to.
   std::string PGOInstrGen;
   /// Path of the profile data file.
   std::string PGOInstrUse;
+  /// Path of the sample Profile data file.
+  std::string PGOSampleUse;
 
 private:
   /// ExtensionList - This is list of all of the extensions that are registered.
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/SCCP.h b/contrib/llvm/include/llvm/Transforms/IPO/SCCP.h
index fab731342144..7082006f14a6 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/SCCP.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/SCCP.h
@@ -28,7 +28,7 @@ namespace llvm {
 /// Pass to perform interprocedural constant propagation.
 class IPSCCPPass : public PassInfoMixin<IPSCCPPass> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 }
 #endif // LLVM_TRANSFORMS_IPO_SCCP_H
diff --git a/contrib/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h b/contrib/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
index 2bd20c95702c..1aa4c6f4f559 100644
--- a/contrib/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
+++ b/contrib/llvm/include/llvm/Transforms/IPO/WholeProgramDevirt.h
@@ -118,7 +118,7 @@ struct VirtualCallTarget {
 
   // For testing only.
   VirtualCallTarget(const TypeMemberInfo *TM, bool IsBigEndian)
-      : Fn(nullptr), TM(TM), IsBigEndian(IsBigEndian) {}
+      : Fn(nullptr), TM(TM), IsBigEndian(IsBigEndian), WasDevirt(false) {}
 
   // The function stored in the vtable.
   Function *Fn;
@@ -134,6 +134,9 @@ struct VirtualCallTarget {
   // Whether the target is big endian.
   bool IsBigEndian;
 
+  // Whether at least one call site to the target was devirtualized.
+  bool WasDevirt;
+
   // The minimum byte offset before the address point. This covers the bytes in
   // the vtable object before the address point (e.g. RTTI, access-to-top,
   // vtables for other base classes) and is equal to the offset from the start
diff --git a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
index d70b847c6892..6bd22dc46255 100644
--- a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
+++ b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombine.h
@@ -31,19 +31,10 @@ class InstCombinePass : public PassInfoMixin<InstCombinePass> {
 public:
   static StringRef name() { return "InstCombinePass"; }
 
-  // Explicitly define constructors for MSVC.
-  InstCombinePass(bool ExpensiveCombines = true)
+  explicit InstCombinePass(bool ExpensiveCombines = true)
       : ExpensiveCombines(ExpensiveCombines) {}
-  InstCombinePass(InstCombinePass &&Arg)
-      : Worklist(std::move(Arg.Worklist)),
-        ExpensiveCombines(Arg.ExpensiveCombines) {}
-  InstCombinePass &operator=(InstCombinePass &&RHS) {
-    Worklist = std::move(RHS.Worklist);
-    ExpensiveCombines = RHS.ExpensiveCombines;
-    return *this;
-  }
 
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief The legacy pass manager's instcombine pass.
diff --git a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
index 32af035d07d4..271e891bb45e 100644
--- a/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
+++ b/contrib/llvm/include/llvm/Transforms/InstCombine/InstCombineWorklist.h
@@ -28,19 +28,11 @@ class InstCombineWorklist {
   SmallVector<Instruction*, 256> Worklist;
   DenseMap<Instruction*, unsigned> WorklistMap;
 
-  void operator=(const InstCombineWorklist&RHS) = delete;
-  InstCombineWorklist(const InstCombineWorklist&) = delete;
 public:
-  InstCombineWorklist() {}
-
-  InstCombineWorklist(InstCombineWorklist &&Arg)
-      : Worklist(std::move(Arg.Worklist)),
-        WorklistMap(std::move(Arg.WorklistMap)) {}
-  InstCombineWorklist &operator=(InstCombineWorklist &&RHS) {
-    Worklist = std::move(RHS.Worklist);
-    WorklistMap = std::move(RHS.WorklistMap);
-    return *this;
-  }
+  InstCombineWorklist() = default;
+
+  InstCombineWorklist(InstCombineWorklist &&) = default;
+  InstCombineWorklist &operator=(InstCombineWorklist &&) = default;
 
   bool isEmpty() const { return Worklist.empty(); }
 
diff --git a/contrib/llvm/include/llvm/Transforms/InstrProfiling.h b/contrib/llvm/include/llvm/Transforms/InstrProfiling.h
index 9ac6d63b96ae..b7c2935f4d84 100644
--- a/contrib/llvm/include/llvm/Transforms/InstrProfiling.h
+++ b/contrib/llvm/include/llvm/Transforms/InstrProfiling.h
@@ -21,7 +21,9 @@
 
 namespace llvm {
 
-/// Instrumenation based profiling lowering pass. This pass lowers
+class TargetLibraryInfo;
+
+/// Instrumentation based profiling lowering pass. This pass lowers
 /// the profile instrumented code generated by FE or the IR based
 /// instrumentation pass.
 class InstrProfiling : public PassInfoMixin<InstrProfiling> {
@@ -29,12 +31,13 @@ public:
   InstrProfiling() {}
   InstrProfiling(const InstrProfOptions &Options) : Options(Options) {}
 
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
-  bool run(Module &M);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+  bool run(Module &M, const TargetLibraryInfo &TLI);
 
 private:
   InstrProfOptions Options;
   Module *M;
+  const TargetLibraryInfo *TLI;
   struct PerFunctionProfileData {
     uint32_t NumValueSites[IPVK_Last + 1];
     GlobalVariable *RegionCounters;
@@ -44,7 +47,7 @@ private:
     }
   };
   DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;
-  std::vector<Value *> UsedVars;
+  std::vector<GlobalValue *> UsedVars;
   std::vector<GlobalVariable *> ReferencedNames;
   GlobalVariable *NamesVar;
   size_t NamesSize;
diff --git a/contrib/llvm/include/llvm/Transforms/Instrumentation.h b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
index 09eef7e0750e..7fb9a5442081 100644
--- a/contrib/llvm/include/llvm/Transforms/Instrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/Instrumentation.h
@@ -108,7 +108,8 @@ ModulePass *createAddressSanitizerModulePass(bool CompileKernel = false,
                                              bool Recover = false);
 
 // Insert MemorySanitizer instrumentation (detection of uninitialized reads)
-FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0);
+FunctionPass *createMemorySanitizerPass(int TrackOrigins = 0,
+                                        bool Recover = false);
 
 // Insert ThreadSanitizer (race detection) instrumentation
 FunctionPass *createThreadSanitizerPass();
@@ -136,7 +137,8 @@ ModulePass *createEfficiencySanitizerPass(
 struct SanitizerCoverageOptions {
   SanitizerCoverageOptions()
       : CoverageType(SCK_None), IndirectCalls(false), TraceBB(false),
-        TraceCmp(false), Use8bitCounters(false), TracePC(false) {}
+        TraceCmp(false), TraceDiv(false), TraceGep(false),
+        Use8bitCounters(false), TracePC(false), TracePCGuard(false) {}
 
   enum Type {
     SCK_None = 0,
@@ -147,8 +149,11 @@ struct SanitizerCoverageOptions {
   bool IndirectCalls;
   bool TraceBB;
   bool TraceCmp;
+  bool TraceDiv;
+  bool TraceGep;
   bool Use8bitCounters;
   bool TracePC;
+  bool TracePCGuard;
 };
 
 // Insert SanitizerCoverage instrumentation.
diff --git a/contrib/llvm/include/llvm/Transforms/PGOInstrumentation.h b/contrib/llvm/include/llvm/Transforms/PGOInstrumentation.h
index f6b5639e5aad..1b449c9abdc2 100644
--- a/contrib/llvm/include/llvm/Transforms/PGOInstrumentation.h
+++ b/contrib/llvm/include/llvm/Transforms/PGOInstrumentation.h
@@ -22,13 +22,13 @@ namespace llvm {
 /// The instrumentation (profile-instr-gen) pass for IR based PGO.
 class PGOInstrumentationGen : public PassInfoMixin<PGOInstrumentationGen> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 /// The profile annotation (profile-instr-use) pass for IR based PGO.
 class PGOInstrumentationUse : public PassInfoMixin<PGOInstrumentationUse> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
   PGOInstrumentationUse(std::string Filename = "");
 
 private:
@@ -39,7 +39,7 @@ private:
 class PGOIndirectCallPromotion : public PassInfoMixin<PGOIndirectCallPromotion> {
 public:
   PGOIndirectCallPromotion(bool IsInLTO = false) : InLTO(IsInLTO) {}
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 private:
   bool InLTO;
 };
diff --git a/contrib/llvm/include/llvm/Transforms/SampleProfile.h b/contrib/llvm/include/llvm/Transforms/SampleProfile.h
index 0fdfa2f85e54..93fa9532cc3a 100644
--- a/contrib/llvm/include/llvm/Transforms/SampleProfile.h
+++ b/contrib/llvm/include/llvm/Transforms/SampleProfile.h
@@ -20,7 +20,7 @@ namespace llvm {
 /// The sample profiler data loader pass.
 class SampleProfileLoaderPass : public PassInfoMixin<SampleProfileLoaderPass> {
 public:
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &AM);
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar.h b/contrib/llvm/include/llvm/Transforms/Scalar.h
index 167cc94ec81f..92558937d047 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar.h
@@ -140,6 +140,13 @@ Pass *createLICMPass();
 
 //===----------------------------------------------------------------------===//
 //
+// LoopSink - This pass sinks invariants from preheader to loop body where
+// frequency is lower than loop preheader.
+//
+Pass *createLoopSinkPass();
+
+//===----------------------------------------------------------------------===//
+//
 // LoopInterchange - This pass interchanges loops to provide a more
 // cache-friendly memory access patterns.
 //
@@ -169,8 +176,9 @@ Pass *createLoopInstSimplifyPass();
 // LoopUnroll - This pass is a simple loop unrolling pass.
 //
 Pass *createLoopUnrollPass(int Threshold = -1, int Count = -1,
-                           int AllowPartial = -1, int Runtime = -1);
-// Create an unrolling pass for full unrolling only.
+                           int AllowPartial = -1, int Runtime = -1,
+                           int UpperBound = -1);
+// Create an unrolling pass for full unrolling that uses exact trip count only.
 Pass *createSimpleLoopUnrollPass();
 
 //===----------------------------------------------------------------------===//
@@ -322,7 +330,7 @@ extern char &LCSSAID;
 // EarlyCSE - This pass performs a simple and fast CSE pass over the dominator
 // tree.
 //
-FunctionPass *createEarlyCSEPass();
+FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false);
 
 //===----------------------------------------------------------------------===//
 //
@@ -340,6 +348,13 @@ FunctionPass *createMergedLoadStoreMotionPass();
 
 //===----------------------------------------------------------------------===//
 //
+// GVN - This pass performs global value numbering and redundant load
+// elimination cotemporaneously.
+//
+FunctionPass *createNewGVNPass();
+
+//===----------------------------------------------------------------------===//
+//
 // MemCpyOpt - This pass performs optimizations related to eliminating memcpy
 // calls and/or combining multiple stores into memset's.
 //
@@ -471,6 +486,13 @@ ModulePass *createRewriteStatepointsForGCPass();
 
 //===----------------------------------------------------------------------===//
 //
+// StripGCRelocates - Remove GC relocates that have been inserted by
+// RewriteStatepointsForGC. The resulting IR is incorrect, but this is useful
+// for manual inspection.
+FunctionPass *createStripGCRelocatesPass();
+
+//===----------------------------------------------------------------------===//
+//
 // Float2Int - Demote floats to ints where possible.
 //
 FunctionPass *createFloat2IntPass();
@@ -485,10 +507,7 @@ FunctionPass *createNaryReassociatePass();
 //
 // LoopDistribute - Distribute loops.
 //
-// ProcessAllLoopsByDefault instructs the pass to look for distribution
-// opportunities in all loops unless -enable-loop-distribute or the
-// llvm.loop.distribute.enable metadata data override this default.
-FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault);
+FunctionPass *createLoopDistributePass();
 
 //===----------------------------------------------------------------------===//
 //
@@ -516,8 +535,14 @@ FunctionPass *createLoopVersioningPass();
 FunctionPass *createLoopDataPrefetchPass();
 
 ///===---------------------------------------------------------------------===//
-ModulePass *createNameAnonFunctionPass();
+ModulePass *createNameAnonGlobalPass();
 
+//===----------------------------------------------------------------------===//
+//
+// LibCallsShrinkWrap - Shrink-wraps a call to function if the result is not
+// used.
+//
+FunctionPass *createLibCallsShrinkWrapPass();
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/DCE.h b/contrib/llvm/include/llvm/Transforms/Scalar/DCE.h
index d9f921e1e7c1..273346cf81d9 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/DCE.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/DCE.h
@@ -22,7 +22,7 @@ namespace llvm {
 /// Basic Dead Code Elimination pass.
 class DCEPass : public PassInfoMixin<DCEPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/DeadStoreElimination.h b/contrib/llvm/include/llvm/Transforms/Scalar/DeadStoreElimination.h
index 7826e29f178e..3ae999dfb542 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/DeadStoreElimination.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/DeadStoreElimination.h
@@ -27,7 +27,7 @@ namespace llvm {
 /// only the redundant stores that are local to a single Basic Block.
 class DSEPass : public PassInfoMixin<DSEPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &FAM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h b/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
index 80e3c602a2b8..969ab78bfd19 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -27,8 +27,12 @@ namespace llvm {
 /// cases so that instcombine and other passes are more effective. It is
 /// expected that a later pass of GVN will catch the interesting/hard cases.
 struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> {
+  EarlyCSEPass(bool UseMemorySSA = false) : UseMemorySSA(UseMemorySSA) {}
+
   /// \brief Run the pass over the function.
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  bool UseMemorySSA;
 };
 
 }
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
index 3bb5ec392272..8f05e8cdb233 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/GVN.h
@@ -22,12 +22,14 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
+class OptimizationRemarkEmitter;
 
 /// A private "module" namespace for types and utilities used by GVN. These
 /// are implementation details and should not be used by clients.
@@ -45,7 +47,7 @@ class GVN : public PassInfoMixin<GVN> {
 public:
 
   /// \brief Run the pass over the function.
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
   /// This removes the specified instruction from
   /// our various maps and marks it for deletion.
@@ -109,6 +111,7 @@ private:
   const TargetLibraryInfo *TLI;
   AssumptionCache *AC;
   SetVector<BasicBlock *> DeadBlocks;
+  OptimizationRemarkEmitter *ORE;
 
   ValueTable VN;
 
@@ -134,7 +137,8 @@ private:
 
   bool runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
                const TargetLibraryInfo &RunTLI, AAResults &RunAA,
-               MemoryDependenceResults *RunMD);
+               MemoryDependenceResults *RunMD, LoopInfo *LI,
+               OptimizationRemarkEmitter *ORE);
 
   /// Push a new Value to the LeaderTable onto the list for its value number.
   void addToLeaderTable(uint32_t N, Value *V, const BasicBlock *BB) {
@@ -232,7 +236,7 @@ FunctionPass *createGVNPass(bool NoLoads = false);
 /// from sibling branches.
 struct GVNHoistPass : PassInfoMixin<GVNHoistPass> {
   /// \brief Run the pass over the function.
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 }
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/GVNExpression.h b/contrib/llvm/include/llvm/Transforms/Scalar/GVNExpression.h
new file mode 100644
index 000000000000..3458696e0687
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/GVNExpression.h
@@ -0,0 +1,605 @@
+//======- GVNExpression.h - GVN Expression classes -------*- C++ -*-==-------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// The header file for the GVN pass that contains expression handling
+/// classes
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_GVNEXPRESSION_H
+#define LLVM_TRANSFORMS_SCALAR_GVNEXPRESSION_H
+
+#include "llvm/ADT/Hashing.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
+#include <algorithm>
+
+namespace llvm {
+class MemoryAccess;
+
+namespace GVNExpression {
+
+enum ExpressionType {
+  ET_Base,
+  ET_Constant,
+  ET_Variable,
+  ET_Unknown,
+  ET_BasicStart,
+  ET_Basic,
+  ET_Call,
+  ET_AggregateValue,
+  ET_Phi,
+  ET_Load,
+  ET_Store,
+  ET_BasicEnd
+};
+
+class Expression {
+private:
+  ExpressionType EType;
+  unsigned Opcode;
+
+public:
+  Expression(const Expression &) = delete;
+  Expression(ExpressionType ET = ET_Base, unsigned O = ~2U)
+      : EType(ET), Opcode(O) {}
+  void operator=(const Expression &) = delete;
+  virtual ~Expression();
+
+  static unsigned getEmptyKey() { return ~0U; }
+  static unsigned getTombstoneKey() { return ~1U; }
+
+  bool operator==(const Expression &Other) const {
+    if (getOpcode() != Other.getOpcode())
+      return false;
+    if (getOpcode() == getEmptyKey() || getOpcode() == getTombstoneKey())
+      return true;
+    // Compare the expression type for anything but load and store.
+    // For load and store we set the opcode to zero.
+    // This is needed for load coercion.
+    if (getExpressionType() != ET_Load && getExpressionType() != ET_Store &&
+        getExpressionType() != Other.getExpressionType())
+      return false;
+
+    return equals(Other);
+  }
+
+  virtual bool equals(const Expression &Other) const { return true; }
+
+  unsigned getOpcode() const { return Opcode; }
+  void setOpcode(unsigned opcode) { Opcode = opcode; }
+  ExpressionType getExpressionType() const { return EType; }
+
+  virtual hash_code getHashValue() const {
+    return hash_combine(getExpressionType(), getOpcode());
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const {
+    if (PrintEType)
+      OS << "etype = " << getExpressionType() << ",";
+    OS << "opcode = " << getOpcode() << ", ";
+  }
+
+  void print(raw_ostream &OS) const {
+    OS << "{ ";
+    printInternal(OS, true);
+    OS << "}";
+  }
+  void dump() const { print(dbgs()); }
+};
+
+inline raw_ostream &operator<<(raw_ostream &OS, const Expression &E) {
+  E.print(OS);
+  return OS;
+}
+
+class BasicExpression : public Expression {
+private:
+  typedef ArrayRecycler<Value *> RecyclerType;
+  typedef RecyclerType::Capacity RecyclerCapacity;
+  Value **Operands;
+  unsigned MaxOperands;
+  unsigned NumOperands;
+  Type *ValueType;
+
+public:
+  static bool classof(const Expression *EB) {
+    ExpressionType ET = EB->getExpressionType();
+    return ET > ET_BasicStart && ET < ET_BasicEnd;
+  }
+
+  BasicExpression(unsigned NumOperands)
+      : BasicExpression(NumOperands, ET_Basic) {}
+  BasicExpression(unsigned NumOperands, ExpressionType ET)
+      : Expression(ET), Operands(nullptr), MaxOperands(NumOperands),
+        NumOperands(0), ValueType(nullptr) {}
+  virtual ~BasicExpression() override;
+  void operator=(const BasicExpression &) = delete;
+  BasicExpression(const BasicExpression &) = delete;
+  BasicExpression() = delete;
+
+  /// \brief Swap two operands. Used during GVN to put commutative operands in
+  /// order.
+  void swapOperands(unsigned First, unsigned Second) {
+    std::swap(Operands[First], Operands[Second]);
+  }
+
+  Value *getOperand(unsigned N) const {
+    assert(Operands && "Operands not allocated");
+    assert(N < NumOperands && "Operand out of range");
+    return Operands[N];
+  }
+
+  void setOperand(unsigned N, Value *V) {
+    assert(Operands && "Operands not allocated before setting");
+    assert(N < NumOperands && "Operand out of range");
+    Operands[N] = V;
+  }
+
+  unsigned getNumOperands() const { return NumOperands; }
+
+  typedef Value **op_iterator;
+  typedef Value *const *const_op_iterator;
+  op_iterator op_begin() { return Operands; }
+  op_iterator op_end() { return Operands + NumOperands; }
+  const_op_iterator op_begin() const { return Operands; }
+  const_op_iterator op_end() const { return Operands + NumOperands; }
+  iterator_range<op_iterator> operands() {
+    return iterator_range<op_iterator>(op_begin(), op_end());
+  }
+  iterator_range<const_op_iterator> operands() const {
+    return iterator_range<const_op_iterator>(op_begin(), op_end());
+  }
+
+  void op_push_back(Value *Arg) {
+    assert(NumOperands < MaxOperands && "Tried to add too many operands");
+    assert(Operands && "Operandss not allocated before pushing");
+    Operands[NumOperands++] = Arg;
+  }
+  bool op_empty() const { return getNumOperands() == 0; }
+
+  void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator) {
+    assert(!Operands && "Operands already allocated");
+    Operands = Recycler.allocate(RecyclerCapacity::get(MaxOperands), Allocator);
+  }
+  void deallocateOperands(RecyclerType &Recycler) {
+    Recycler.deallocate(RecyclerCapacity::get(MaxOperands), Operands);
+  }
+
+  void setType(Type *T) { ValueType = T; }
+  Type *getType() const { return ValueType; }
+
+  virtual bool equals(const Expression &Other) const override {
+    if (getOpcode() != Other.getOpcode())
+      return false;
+
+    const auto &OE = cast<BasicExpression>(Other);
+    return getType() == OE.getType() && NumOperands == OE.NumOperands &&
+           std::equal(op_begin(), op_end(), OE.op_begin());
+  }
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(getExpressionType(), getOpcode(), ValueType,
+                        hash_combine_range(op_begin(), op_end()));
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeBasic, ";
+
+    this->Expression::printInternal(OS, false);
+    OS << "operands = {";
+    for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+      OS << "[" << i << "] = ";
+      Operands[i]->printAsOperand(OS);
+      OS << "  ";
+    }
+    OS << "} ";
+  }
+};
+class op_inserter
+    : public std::iterator<std::output_iterator_tag, void, void, void, void> {
+private:
+  typedef BasicExpression Container;
+  Container *BE;
+
+public:
+  explicit op_inserter(BasicExpression &E) : BE(&E) {}
+  explicit op_inserter(BasicExpression *E) : BE(E) {}
+
+  op_inserter &operator=(Value *val) {
+    BE->op_push_back(val);
+    return *this;
+  }
+  op_inserter &operator*() { return *this; }
+  op_inserter &operator++() { return *this; }
+  op_inserter &operator++(int) { return *this; }
+};
+
+class CallExpression final : public BasicExpression {
+private:
+  CallInst *Call;
+  MemoryAccess *DefiningAccess;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Call;
+  }
+
+  CallExpression(unsigned NumOperands, CallInst *C, MemoryAccess *DA)
+      : BasicExpression(NumOperands, ET_Call), Call(C), DefiningAccess(DA) {}
+  void operator=(const CallExpression &) = delete;
+  CallExpression(const CallExpression &) = delete;
+  CallExpression() = delete;
+  virtual ~CallExpression() override;
+
+  virtual bool equals(const Expression &Other) const override {
+    if (!this->BasicExpression::equals(Other))
+      return false;
+    const auto &OE = cast<CallExpression>(Other);
+    return DefiningAccess == OE.DefiningAccess;
+  }
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(this->BasicExpression::getHashValue(), DefiningAccess);
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeCall, ";
+    this->BasicExpression::printInternal(OS, false);
+    OS << " represents call at " << Call;
+  }
+};
+
+class LoadExpression final : public BasicExpression {
+private:
+  LoadInst *Load;
+  MemoryAccess *DefiningAccess;
+  unsigned Alignment;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Load;
+  }
+
+  LoadExpression(unsigned NumOperands, LoadInst *L, MemoryAccess *DA)
+      : LoadExpression(ET_Load, NumOperands, L, DA) {}
+  LoadExpression(enum ExpressionType EType, unsigned NumOperands, LoadInst *L,
+                 MemoryAccess *DA)
+      : BasicExpression(NumOperands, EType), Load(L), DefiningAccess(DA) {
+    Alignment = L ? L->getAlignment() : 0;
+  }
+  void operator=(const LoadExpression &) = delete;
+  LoadExpression(const LoadExpression &) = delete;
+  LoadExpression() = delete;
+  virtual ~LoadExpression() override;
+
+  LoadInst *getLoadInst() const { return Load; }
+  void setLoadInst(LoadInst *L) { Load = L; }
+
+  MemoryAccess *getDefiningAccess() const { return DefiningAccess; }
+  void setDefiningAccess(MemoryAccess *MA) { DefiningAccess = MA; }
+  unsigned getAlignment() const { return Alignment; }
+  void setAlignment(unsigned Align) { Alignment = Align; }
+
+  virtual bool equals(const Expression &Other) const override;
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(getOpcode(), getType(), DefiningAccess,
+                        hash_combine_range(op_begin(), op_end()));
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeLoad, ";
+    this->BasicExpression::printInternal(OS, false);
+    OS << " represents Load at " << Load;
+    OS << " with DefiningAccess " << *DefiningAccess;
+  }
+};
+
+class StoreExpression final : public BasicExpression {
+private:
+  StoreInst *Store;
+  MemoryAccess *DefiningAccess;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Store;
+  }
+
+  StoreExpression(unsigned NumOperands, StoreInst *S, MemoryAccess *DA)
+      : BasicExpression(NumOperands, ET_Store), Store(S), DefiningAccess(DA) {}
+  void operator=(const StoreExpression &) = delete;
+  StoreExpression(const StoreExpression &) = delete;
+  StoreExpression() = delete;
+  virtual ~StoreExpression() override;
+
+  StoreInst *getStoreInst() const { return Store; }
+  MemoryAccess *getDefiningAccess() const { return DefiningAccess; }
+
+  virtual bool equals(const Expression &Other) const override;
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(getOpcode(), getType(), DefiningAccess,
+                        hash_combine_range(op_begin(), op_end()));
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeStore, ";
+    this->BasicExpression::printInternal(OS, false);
+    OS << " represents Store at " << Store;
+    OS << " with DefiningAccess " << *DefiningAccess;
+  }
+};
+
+class AggregateValueExpression final : public BasicExpression {
+private:
+  unsigned MaxIntOperands;
+  unsigned NumIntOperands;
+  unsigned *IntOperands;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_AggregateValue;
+  }
+
+  AggregateValueExpression(unsigned NumOperands, unsigned NumIntOperands)
+      : BasicExpression(NumOperands, ET_AggregateValue),
+        MaxIntOperands(NumIntOperands), NumIntOperands(0),
+        IntOperands(nullptr) {}
+
+  void operator=(const AggregateValueExpression &) = delete;
+  AggregateValueExpression(const AggregateValueExpression &) = delete;
+  AggregateValueExpression() = delete;
+  virtual ~AggregateValueExpression() override;
+
+  typedef unsigned *int_arg_iterator;
+  typedef const unsigned *const_int_arg_iterator;
+
+  int_arg_iterator int_op_begin() { return IntOperands; }
+  int_arg_iterator int_op_end() { return IntOperands + NumIntOperands; }
+  const_int_arg_iterator int_op_begin() const { return IntOperands; }
+  const_int_arg_iterator int_op_end() const {
+    return IntOperands + NumIntOperands;
+  }
+  unsigned int_op_size() const { return NumIntOperands; }
+  bool int_op_empty() const { return NumIntOperands == 0; }
+  void int_op_push_back(unsigned IntOperand) {
+    assert(NumIntOperands < MaxIntOperands &&
+           "Tried to add too many int operands");
+    assert(IntOperands && "Operands not allocated before pushing");
+    IntOperands[NumIntOperands++] = IntOperand;
+  }
+
+  virtual void allocateIntOperands(BumpPtrAllocator &Allocator) {
+    assert(!IntOperands && "Operands already allocated");
+    IntOperands = Allocator.Allocate<unsigned>(MaxIntOperands);
+  }
+
+  virtual bool equals(const Expression &Other) const override {
+    if (!this->BasicExpression::equals(Other))
+      return false;
+    const AggregateValueExpression &OE = cast<AggregateValueExpression>(Other);
+    return NumIntOperands == OE.NumIntOperands &&
+           std::equal(int_op_begin(), int_op_end(), OE.int_op_begin());
+  }
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(this->BasicExpression::getHashValue(),
+                        hash_combine_range(int_op_begin(), int_op_end()));
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeAggregateValue, ";
+    this->BasicExpression::printInternal(OS, false);
+    OS << ", intoperands = {";
+    for (unsigned i = 0, e = int_op_size(); i != e; ++i) {
+      OS << "[" << i << "] = " << IntOperands[i] << "  ";
+    }
+    OS << "}";
+  }
+};
+class int_op_inserter
+    : public std::iterator<std::output_iterator_tag, void, void, void, void> {
+private:
+  typedef AggregateValueExpression Container;
+  Container *AVE;
+
+public:
+  explicit int_op_inserter(AggregateValueExpression &E) : AVE(&E) {}
+  explicit int_op_inserter(AggregateValueExpression *E) : AVE(E) {}
+  int_op_inserter &operator=(unsigned int val) {
+    AVE->int_op_push_back(val);
+    return *this;
+  }
+  int_op_inserter &operator*() { return *this; }
+  int_op_inserter &operator++() { return *this; }
+  int_op_inserter &operator++(int) { return *this; }
+};
+
+class PHIExpression final : public BasicExpression {
+private:
+  BasicBlock *BB;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Phi;
+  }
+
+  PHIExpression(unsigned NumOperands, BasicBlock *B)
+      : BasicExpression(NumOperands, ET_Phi), BB(B) {}
+  void operator=(const PHIExpression &) = delete;
+  PHIExpression(const PHIExpression &) = delete;
+  PHIExpression() = delete;
+  virtual ~PHIExpression() override;
+
+  virtual bool equals(const Expression &Other) const override {
+    if (!this->BasicExpression::equals(Other))
+      return false;
+    const PHIExpression &OE = cast<PHIExpression>(Other);
+    return BB == OE.BB;
+  }
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(this->BasicExpression::getHashValue(), BB);
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypePhi, ";
+    this->BasicExpression::printInternal(OS, false);
+    OS << "bb = " << BB;
+  }
+};
+
+class VariableExpression final : public Expression {
+private:
+  Value *VariableValue;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Variable;
+  }
+
+  VariableExpression(Value *V) : Expression(ET_Variable), VariableValue(V) {}
+  void operator=(const VariableExpression &) = delete;
+  VariableExpression(const VariableExpression &) = delete;
+  VariableExpression() = delete;
+
+  Value *getVariableValue() const { return VariableValue; }
+  void setVariableValue(Value *V) { VariableValue = V; }
+  virtual bool equals(const Expression &Other) const override {
+    const VariableExpression &OC = cast<VariableExpression>(Other);
+    return VariableValue == OC.VariableValue;
+  }
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(getExpressionType(), VariableValue->getType(),
+                        VariableValue);
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeVariable, ";
+    this->Expression::printInternal(OS, false);
+    OS << " variable = " << *VariableValue;
+  }
+};
+
+class ConstantExpression final : public Expression {
+private:
+  Constant *ConstantValue;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Constant;
+  }
+
+  ConstantExpression() : Expression(ET_Constant), ConstantValue(NULL) {}
+  ConstantExpression(Constant *constantValue)
+      : Expression(ET_Constant), ConstantValue(constantValue) {}
+  void operator=(const ConstantExpression &) = delete;
+  ConstantExpression(const ConstantExpression &) = delete;
+
+  Constant *getConstantValue() const { return ConstantValue; }
+  void setConstantValue(Constant *V) { ConstantValue = V; }
+
+  virtual bool equals(const Expression &Other) const override {
+    const ConstantExpression &OC = cast<ConstantExpression>(Other);
+    return ConstantValue == OC.ConstantValue;
+  }
+
+  virtual hash_code getHashValue() const override {
+    return hash_combine(getExpressionType(), ConstantValue->getType(),
+                        ConstantValue);
+  }
+
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeConstant, ";
+    this->Expression::printInternal(OS, false);
+    OS << " constant = " << *ConstantValue;
+  }
+};
+
+class UnknownExpression final : public Expression {
+private:
+  Instruction *Inst;
+
+public:
+  static bool classof(const Expression *EB) {
+    return EB->getExpressionType() == ET_Unknown;
+  }
+
+  UnknownExpression(Instruction *I) : Expression(ET_Unknown), Inst(I) {}
+  void operator=(const UnknownExpression &) = delete;
+  UnknownExpression(const UnknownExpression &) = delete;
+  UnknownExpression() = delete;
+
+  Instruction *getInstruction() const { return Inst; }
+  void setInstruction(Instruction *I) { Inst = I; }
+  virtual bool equals(const Expression &Other) const override {
+    const auto &OU = cast<UnknownExpression>(Other);
+    return Inst == OU.Inst;
+  }
+  virtual hash_code getHashValue() const override {
+    return hash_combine(getExpressionType(), Inst);
+  }
+  //
+  // Debugging support
+  //
+  virtual void printInternal(raw_ostream &OS, bool PrintEType) const override {
+    if (PrintEType)
+      OS << "ExpressionTypeUnknown, ";
+    this->Expression::printInternal(OS, false);
+    OS << " inst = " << *Inst;
+  }
+};
+}
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/GuardWidening.h b/contrib/llvm/include/llvm/Transforms/Scalar/GuardWidening.h
index 201065cbdfb5..2bc0940ac715 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/GuardWidening.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/GuardWidening.h
@@ -24,7 +24,7 @@ namespace llvm {
 class Function;
 
 struct GuardWideningPass : public PassInfoMixin<GuardWideningPass> {
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h b/contrib/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
index 325bcc7bed8f..24a31594b153 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/IndVarSimplify.h
@@ -16,13 +16,14 @@
 #define LLVM_TRANSFORMS_SCALAR_INDVARSIMPLIFY_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
 
 class IndVarSimplifyPass : public PassInfoMixin<IndVarSimplifyPass> {
 public:
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h b/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
index e38bdd03ac06..f96741c0127d 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -85,20 +85,13 @@ class JumpThreadingPass : public PassInfoMixin<JumpThreadingPass> {
 
 public:
   JumpThreadingPass(int T = -1);
-  // Hack for MSVC 2013 which seems like it can't synthesize this.
-  JumpThreadingPass(JumpThreadingPass &&Other)
-      : TLI(Other.TLI), LVI(Other.LVI), BFI(std::move(Other.BFI)),
-        BPI(std::move(Other.BPI)), HasProfileData(Other.HasProfileData),
-        LoopHeaders(std::move(Other.LoopHeaders)),
-        RecursionSet(std::move(Other.RecursionSet)),
-        BBDupThreshold(Other.BBDupThreshold) {}
 
   // Glue for old PM.
   bool runImpl(Function &F, TargetLibraryInfo *TLI_, LazyValueInfo *LVI_,
                bool HasProfileData_, std::unique_ptr<BlockFrequencyInfo> BFI_,
                std::unique_ptr<BranchProbabilityInfo> BPI_);
 
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
   void releaseMemory() {
     BFI.reset();
@@ -134,6 +127,8 @@ private:
                               const char *Suffix);
   void UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB, BasicBlock *BB,
                                     BasicBlock *NewBB, BasicBlock *SuccBB);
+  /// Check if the block has profile metadata for its outgoing edges.
+  bool doesBlockHaveProfileData(BasicBlock *BB);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LICM.h b/contrib/llvm/include/llvm/Transforms/Scalar/LICM.h
index a050a43d6179..39bbc72f8cb4 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LICM.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LICM.h
@@ -34,6 +34,7 @@
 #define LLVM_TRANSFORMS_SCALAR_LICM_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -41,7 +42,7 @@ namespace llvm {
 /// Performs Loop Invariant Code Motion Pass.
 class LICMPass : public PassInfoMixin<LICMPass> {
 public:
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
new file mode 100644
index 000000000000..114d1bad17a5
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopDataPrefetch.h
@@ -0,0 +1,31 @@
+//===-------- LoopDataPrefetch.h - Loop Data Prefetching Pass ---*- C++ -*-===//
+//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for LLVM's Loop Data Prefetching Pass.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPDATAPREFETCH_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// An optimization pass inserting data prefetches in loops.
+class LoopDataPrefetchPass : public PassInfoMixin<LoopDataPrefetchPass> {
+public:
+  LoopDataPrefetchPass() {}
+  /// \brief Run the pass over the function.
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+}
+
+#endif
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h
index ed5a9833e572..891f08faa48a 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopDeletion.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_SCALAR_LOOPDELETION_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/PassManager.h"
 
@@ -23,7 +24,7 @@ namespace llvm {
 class LoopDeletionPass : public PassInfoMixin<LoopDeletionPass> {
 public:
   LoopDeletionPass() {}
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
   bool runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
               LoopInfo &loopInfo);
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
index cc66156fba8a..0c052ddd2fe7 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopIdiomRecognize.h
@@ -17,6 +17,7 @@
 #define LLVM_TRANSFORMS_SCALAR_LOOPIDIOMRECOGNIZE_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -24,7 +25,7 @@ namespace llvm {
 /// Performs Loop Idiom Recognize Pass.
 class LoopIdiomRecognizePass : public PassInfoMixin<LoopIdiomRecognizePass> {
 public:
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopInstSimplify.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopInstSimplify.h
index f67343f40a7c..e30f4a97b78e 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopInstSimplify.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopInstSimplify.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_SCALAR_LOOPINSTSIMPLIFY_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -22,7 +23,7 @@ namespace llvm {
 /// Performs Loop Inst Simplify Pass.
 class LoopInstSimplifyPass : public PassInfoMixin<LoopInstSimplifyPass> {
 public:
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopRotation.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
index b21c7313dc4b..54b8ec545ed2 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopRotation.h
@@ -15,6 +15,7 @@
 #define LLVM_TRANSFORMS_SCALAR_LOOPROTATION_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -22,8 +23,11 @@ namespace llvm {
 /// A simple loop rotation transformation.
 class LoopRotatePass : public PassInfoMixin<LoopRotatePass> {
 public:
-  LoopRotatePass();
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  LoopRotatePass(bool EnableHeaderDuplication = true);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
+
+private:
+  const bool EnableHeaderDuplication;
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
index 7609bb26a1a0..2f06782052c5 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopSimplifyCFG.h
@@ -18,6 +18,7 @@
 #define LLVM_TRANSFORMS_SCALAR_LOOPSIMPLIFYCFG_H
 
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
@@ -25,7 +26,7 @@ namespace llvm {
 /// Performs basic CFG simplifications to assist other loop passes.
 class LoopSimplifyCFGPass : public PassInfoMixin<LoopSimplifyCFGPass> {
 public:
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &AM);
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopStrengthReduce.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
new file mode 100644
index 000000000000..11c0d9bce85b
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopStrengthReduce.h
@@ -0,0 +1,38 @@
+//===- LoopStrengthReduce.h - Loop Strength Reduce Pass -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation analyzes and transforms the induction variables (and
+// computations derived from them) into forms suitable for efficient execution
+// on the target.
+//
+// This pass performs a strength reduction on array references inside loops that
+// have as one or more of their components the loop induction variable, it
+// rewrites expressions to take advantage of scaled-index addressing modes
+// available on the target, and it performs a variety of other optimizations
+// related to loop induction variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Performs Loop Strength Reduce Pass.
+class LoopStrengthReducePass : public PassInfoMixin<LoopStrengthReducePass> {
+public:
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPSTRENGTHREDUCE_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h b/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
new file mode 100644
index 000000000000..74a7258df5fc
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LoopUnrollPass.h
@@ -0,0 +1,30 @@
+//===- LoopUnrollPass.h -----------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
+
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
+  Optional<unsigned> ProvidedCount;
+  Optional<unsigned> ProvidedThreshold;
+  Optional<bool> ProvidedAllowPartial;
+  Optional<bool> ProvidedRuntime;
+  Optional<bool> ProvidedUpperBound;
+
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h b/contrib/llvm/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
new file mode 100644
index 000000000000..a9f19f6b84b4
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/LowerGuardIntrinsic.h
@@ -0,0 +1,28 @@
+//===--- LowerGuardIntrinsic.h - Lower the guard intrinsic ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass lowers the llvm.experimental.guard intrinsic to a conditional call
+// to @llvm.experimental.deoptimize.  Once this happens, the guard can no longer
+// be widened.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_LOWERGUARDINTRINSIC_H
+#define LLVM_TRANSFORMS_SCALAR_LOWERGUARDINTRINSIC_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct LowerGuardIntrinsicPass : PassInfoMixin<LowerGuardIntrinsicPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+}
+
+#endif //LLVM_TRANSFORMS_SCALAR_LOWERGUARDINTRINSIC_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h b/contrib/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
index 47cfea489243..3cad7bb070d0 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/MergedLoadStoreMotion.h
@@ -31,7 +31,7 @@ namespace llvm {
 class MergedLoadStoreMotionPass
     : public PassInfoMixin<MergedLoadStoreMotionPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 }
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h b/contrib/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h
new file mode 100644
index 000000000000..a74bb6cc4194
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/NaryReassociate.h
@@ -0,0 +1,174 @@
+//===- NaryReassociate.h - Reassociate n-ary expressions ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass reassociates n-ary add expressions and eliminates the redundancy
+// exposed by the reassociation.
+//
+// A motivating example:
+//
+//   void foo(int a, int b) {
+//     bar(a + b);
+//     bar((a + 2) + b);
+//   }
+//
+// An ideal compiler should reassociate (a + 2) + b to (a + b) + 2 and simplify
+// the above code to
+//
+//   int t = a + b;
+//   bar(t);
+//   bar(t + 2);
+//
+// However, the Reassociate pass is unable to do that because it processes each
+// instruction individually and believes (a + 2) + b is the best form according
+// to its rank system.
+//
+// To address this limitation, NaryReassociate reassociates an expression in a
+// form that reuses existing instructions. As a result, NaryReassociate can
+// reassociate (a + 2) + b in the example to (a + b) + 2 because it detects that
+// (a + b) is computed before.
+//
+// NaryReassociate works as follows. For every instruction in the form of (a +
+// b) + c, it checks whether a + c or b + c is already computed by a dominating
+// instruction. If so, it then reassociates (a + b) + c into (a + c) + b or (b +
+// c) + a and removes the redundancy accordingly. To efficiently look up whether
+// an expression is computed before, we store each instruction seen and its SCEV
+// into an SCEV-to-instruction map.
+//
+// Although the algorithm pattern-matches only ternary additions, it
+// automatically handles many >3-ary expressions by walking through the function
+// in the depth-first order. For example, given
+//
+//   (a + c) + d
+//   ((a + b) + c) + d
+//
+// NaryReassociate first rewrites (a + b) + c to (a + c) + b, and then rewrites
+// ((a + c) + b) + d into ((a + c) + d) + b.
+//
+// Finally, the above dominator-based algorithm may need to be run multiple
+// iterations before emitting optimal code. One source of this need is that we
+// only split an operand when it is used only once. The above algorithm can
+// eliminate an instruction and decrease the usage count of its operands. As a
+// result, an instruction that previously had multiple uses may become a
+// single-use instruction and thus eligible for split consideration. For
+// example,
+//
+//   ac = a + c
+//   ab = a + b
+//   abc = ab + c
+//   ab2 = ab + b
+//   ab2c = ab2 + c
+//
+// In the first iteration, we cannot reassociate abc to ac+b because ab is used
+// twice. However, we can reassociate ab2c to abc+b in the first iteration. As a
+// result, ab2 becomes dead and ab will be used only once in the second
+// iteration.
+//
+// Limitations and TODO items:
+//
+// 1) We only considers n-ary adds and muls for now. This should be extended
+// and generalized.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H
+#define LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class NaryReassociatePass : public PassInfoMixin<NaryReassociatePass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  // Glue for old PM.
+  bool runImpl(Function &F, AssumptionCache *AC_, DominatorTree *DT_,
+               ScalarEvolution *SE_, TargetLibraryInfo *TLI_,
+               TargetTransformInfo *TTI_);
+
+private:
+  // Runs only one iteration of the dominator-based algorithm. See the header
+  // comments for why we need multiple iterations.
+  bool doOneIteration(Function &F);
+
+  // Reassociates I for better CSE.
+  Instruction *tryReassociate(Instruction *I);
+
+  // Reassociate GEP for better CSE.
+  Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
+  // Try splitting GEP at the I-th index and see whether either part can be
+  // CSE'ed. This is a helper function for tryReassociateGEP.
+  //
+  // \p IndexedType The element type indexed by GEP's I-th index. This is
+  //                equivalent to
+  //                  GEP->getIndexedType(GEP->getPointerOperand(), 0-th index,
+  //                                      ..., i-th index).
+  GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+                                              unsigned I, Type *IndexedType);
+  // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or
+  // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly.
+  GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+                                              unsigned I, Value *LHS,
+                                              Value *RHS, Type *IndexedType);
+
+  // Reassociate binary operators for better CSE.
+  Instruction *tryReassociateBinaryOp(BinaryOperator *I);
+
+  // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
+  // passed.
+  Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
+                                      BinaryOperator *I);
+  // Rewrites I to (LHS op RHS) if LHS is computed already.
+  Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
+                                       BinaryOperator *I);
+
+  // Tries to match Op1 and Op2 by using V.
+  bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
+
+  // Gets SCEV for (LHS op RHS).
+  const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
+                            const SCEV *RHS);
+
+  // Returns the closest dominator of \c Dominatee that computes
+  // \c CandidateExpr. Returns null if not found.
+  Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr,
+                                            Instruction *Dominatee);
+  // GetElementPtrInst implicitly sign-extends an index if the index is shorter
+  // than the pointer size. This function returns whether Index is shorter than
+  // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
+  // to be an index of GEP.
+  bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
+
+  AssumptionCache *AC;
+  const DataLayout *DL;
+  DominatorTree *DT;
+  ScalarEvolution *SE;
+  TargetLibraryInfo *TLI;
+  TargetTransformInfo *TTI;
+  // A lookup table quickly telling which instructions compute the given SCEV.
+  // Note that there can be multiple instructions at different locations
+  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
+  // example,
+  //
+  //   if (p1)
+  //     foo(a + b);
+  //   if (p2)
+  //     bar(a + b);
+  DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_NARYREASSOCIATE_H
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/NewGVN.h b/contrib/llvm/include/llvm/Transforms/Scalar/NewGVN.h
new file mode 100644
index 000000000000..d0425aa4345f
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/NewGVN.h
@@ -0,0 +1,28 @@
+//===----- NewGVN.h - Global Value Numbering Pass ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for LLVM's Global Value Numbering pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_NEWGVN_H
+#define LLVM_TRANSFORMS_SCALAR_NEWGVN_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class NewGVNPass : public PassInfoMixin<NewGVNPass> {
+public:
+  /// \brief Run the pass over the function.
+  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+};
+}
+
+#endif // LLVM_TRANSFORMS_SCALAR_NEWGVN_H
+
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h b/contrib/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
index 385bbb40db3f..7f73831e0eb3 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/PartiallyInlineLibCalls.h
@@ -23,7 +23,7 @@ namespace llvm {
 class PartiallyInlineLibCallsPass
     : public PassInfoMixin<PartiallyInlineLibCallsPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h b/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h
index 2f56b9398778..7b68b4489306 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/Reassociate.h
@@ -65,7 +65,7 @@ public:
   PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
 
 private:
-  void BuildRankMap(Function &F);
+  void BuildRankMap(Function &F, ReversePostOrderTraversal<Function *> &RPOT);
   unsigned getRank(Value *V);
   void canonicalizeOperands(Instruction *I);
   void ReassociateExpression(BinaryOperator *I);
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h b/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
index 0dd90ecbedec..6e7f77fe2c50 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SCCP.h
@@ -29,7 +29,7 @@ namespace llvm {
 /// This pass performs function-level constant propagation and merging.
 class SCCPPass : public PassInfoMixin<SCCPPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h b/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h
index 72e7d63d4df6..3e93f46dd4e5 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SROA.h
@@ -102,7 +102,7 @@ public:
   SROA() : C(nullptr), DT(nullptr), AC(nullptr) {}
 
   /// \brief Run the pass over the function.
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 
 private:
   friend class sroa::AllocaSliceRewriter;
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index 53f427a7d19a..96e1658c00b0 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -36,7 +36,7 @@ public:
   SimplifyCFGPass(int BonusInstThreshold);
 
   /// \brief Run the pass over the function.
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 }
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/Sink.h b/contrib/llvm/include/llvm/Transforms/Scalar/Sink.h
index 1144c62fb20c..f9b3cb0fae39 100644
--- a/contrib/llvm/include/llvm/Transforms/Scalar/Sink.h
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/Sink.h
@@ -23,7 +23,7 @@ namespace llvm {
 /// Move instructions into successor blocks when possible.
 class SinkingPass : public PassInfoMixin<SinkingPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h b/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h
new file mode 100644
index 000000000000..068f81776a03
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Scalar/SpeculativeExecution.h
@@ -0,0 +1,92 @@
+//===- SpeculativeExecution.h -----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass hoists instructions to enable speculative execution on
+// targets where branches are expensive. This is aimed at GPUs. It
+// currently works on simple if-then and if-then-else
+// patterns.
+//
+// Removing branches is not the only motivation for this
+// pass. E.g. consider this code and assume that there is no
+// addressing mode for multiplying by sizeof(*a):
+//
+//   if (b > 0)
+//     c = a[i + 1]
+//   if (d > 0)
+//     e = a[i + 2]
+//
+// turns into
+//
+//   p = &a[i + 1];
+//   if (b > 0)
+//     c = *p;
+//   q = &a[i + 2];
+//   if (d > 0)
+//     e = *q;
+//
+// which could later be optimized to
+//
+//   r = &a[i];
+//   if (b > 0)
+//     c = r[1];
+//   if (d > 0)
+//     e = r[2];
+//
+// Later passes sink back much of the speculated code that did not enable
+// further optimization.
+//
+// This pass is more aggressive than the function SpeculativeyExecuteBB in
+// SimplifyCFG. SimplifyCFG will not speculate if no selects are introduced and
+// it will speculate at most one instruction. It also will not speculate if
+// there is a value defined in the if-block that is only used in the then-block.
+// These restrictions make sense since the speculation in SimplifyCFG seems
+// aimed at introducing cheap selects, while this pass is intended to do more
+// aggressive speculation while counting on later passes to either capitalize on
+// that or clean it up.
+//
+// If the pass was created by calling
+// createSpeculativeExecutionIfHasBranchDivergencePass or the
+// -spec-exec-only-if-divergent-target option is present, this pass only has an
+// effect on targets where TargetTransformInfo::hasBranchDivergence() is true;
+// on other targets, it is a nop.
+//
+// This lets you include this pass unconditionally in the IR pass pipeline, but
+// only enable it for relevant targets.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H
+#define LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H
+
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class SpeculativeExecutionPass
+    : public PassInfoMixin<SpeculativeExecutionPass> {
+public:
+  SpeculativeExecutionPass(bool OnlyIfDivergentTarget = false);
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  // Glue for old PM
+  bool runImpl(Function &F, TargetTransformInfo *TTI);
+
+private:
+  bool runOnBasicBlock(BasicBlock &B);
+  bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
+
+  // If true, this pass is a nop unless the target architecture has branch
+  // divergence.  
+  const bool OnlyIfDivergentTarget = false;
+
+  TargetTransformInfo *TTI = nullptr;
+};
+}
+
+#endif //LLVM_TRANSFORMS_SCALAR_SPECULATIVEEXECUTION_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h b/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
index 4e4f02c84ece..eaad06a10819 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ASanStackFrameLayout.h
@@ -24,30 +24,31 @@ class AllocaInst;
 static const int kAsanStackLeftRedzoneMagic = 0xf1;
 static const int kAsanStackMidRedzoneMagic = 0xf2;
 static const int kAsanStackRightRedzoneMagic = 0xf3;
+static const int kAsanStackUseAfterReturnMagic = 0xf5;
+static const int kAsanStackUseAfterScopeMagic = 0xf8;
 
 // Input/output data struct for ComputeASanStackFrameLayout.
 struct ASanStackVariableDescription {
-  const char *Name;  // Name of the variable that will be displayed by asan
-                     // if a stack-related bug is reported.
-  uint64_t Size;     // Size of the variable in bytes.
-  size_t Alignment;  // Alignment of the variable (power of 2).
-  AllocaInst *AI;    // The actual AllocaInst.
-  size_t Offset;     // Offset from the beginning of the frame;
-                     // set by ComputeASanStackFrameLayout.
+  const char *Name;    // Name of the variable that will be displayed by asan
+                       // if a stack-related bug is reported.
+  uint64_t Size;       // Size of the variable in bytes.
+  size_t LifetimeSize; // Size in bytes to use for lifetime analysis check.
+                       // Will be rounded up to Granularity.
+  size_t Alignment;    // Alignment of the variable (power of 2).
+  AllocaInst *AI;      // The actual AllocaInst.
+  size_t Offset;       // Offset from the beginning of the frame;
+                       // set by ComputeASanStackFrameLayout.
+  unsigned Line;       // Line number.
 };
 
 // Output data struct for ComputeASanStackFrameLayout.
 struct ASanStackFrameLayout {
-  // Frame description, see DescribeAddressIfStack in ASan runtime.
-  SmallString<64> DescriptionString;
-  // The contents of the shadow memory for the stack frame that we need
-  // to set at function entry.
-  SmallVector<uint8_t, 64> ShadowBytes;
+  size_t Granularity;     // Shadow granularity.
   size_t FrameAlignment;  // Alignment for the entire frame.
   size_t FrameSize;       // Size of the frame in bytes.
 };
 
-void ComputeASanStackFrameLayout(
+ASanStackFrameLayout ComputeASanStackFrameLayout(
     // The array of stack variables. The elements may get reordered and changed.
     SmallVectorImpl<ASanStackVariableDescription> &Vars,
     // AddressSanitizer's shadow granularity. Usually 8, may also be 16, 32, 64.
@@ -55,9 +56,25 @@ void ComputeASanStackFrameLayout(
     // The minimal size of the left-most redzone (header).
     // At least 4 pointer sizes, power of 2, and >= Granularity.
     // The resulting FrameSize should be multiple of MinHeaderSize.
-    size_t MinHeaderSize,
-    // The result is put here.
-    ASanStackFrameLayout *Layout);
+    size_t MinHeaderSize);
+
+// Compute frame description, see DescribeAddressIfStack in ASan runtime.
+SmallString<64> ComputeASanStackFrameDescription(
+    const SmallVectorImpl<ASanStackVariableDescription> &Vars);
+
+// Returns shadow bytes with marked red zones. This shadow represents the state
+// if the stack frame when all local variables are inside of the own scope.
+SmallVector<uint8_t, 64>
+GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+               const ASanStackFrameLayout &Layout);
+
+// Returns shadow bytes with marked red zones and after scope. This shadow
+// represents the state if the stack frame when all local variables are outside
+// of the own scope.
+SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
+    // The array of stack variables. The elements may get reordered and changed.
+    const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+    const ASanStackFrameLayout &Layout);
 
 } // llvm namespace
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h b/contrib/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
index 0b3a8add6278..a87758300992 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/AddDiscriminators.h
@@ -22,7 +22,7 @@ namespace llvm {
 
 class AddDiscriminatorsPass : public PassInfoMixin<AddDiscriminatorsPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 37fd20925cba..3d41dbe2b954 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -17,8 +17,11 @@
 
 // FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/InstrTypes.h"
+#include <cassert>
 
 namespace llvm {
 
@@ -29,7 +32,6 @@ class Instruction;
 class MDNode;
 class ReturnInst;
 class TargetLibraryInfo;
-class TerminatorInst;
 
 /// Delete the specified block, which must have no predecessors.
 void DeleteDeadBlock(BasicBlock *BB);
@@ -154,7 +156,7 @@ SplitCriticalEdge(BasicBlock *Src, BasicBlock *Dst,
                       CriticalEdgeSplittingOptions()) {
   TerminatorInst *TI = Src->getTerminator();
   unsigned i = 0;
-  while (1) {
+  while (true) {
     assert(i != TI->getNumSuccessors() && "Edge doesn't exist!");
     if (TI->getSuccessor(i) == Dst)
       return SplitCriticalEdge(TI, i, Options);
@@ -228,8 +230,8 @@ ReturnInst *FoldReturnIntoUncondBranch(ReturnInst *RI, BasicBlock *BB,
                                        BasicBlock *Pred);
 
 /// Split the containing block at the specified instruction - everything before
-/// and including SplitBefore stays in the old basic block, and everything after
-/// SplitBefore is moved to a new block. The two blocks are connected by a
+/// SplitBefore stays in the old basic block, and the rest of the instructions
+/// in the BB are moved to a new block. The two blocks are connected by a
 /// conditional branch (with value of Cmp being the condition).
 /// Before:
 ///   Head
@@ -282,6 +284,7 @@ void SplitBlockAndInsertIfThenElse(Value *Cond, Instruction *SplitBefore,
 /// instructions in them.
 Value *GetIfCondition(BasicBlock *BB, BasicBlock *&IfTrue,
                       BasicBlock *&IfFalse);
-} // End llvm namespace
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h b/contrib/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h
new file mode 100644
index 000000000000..9cc81a176cb6
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/BreakCriticalEdges.h
@@ -0,0 +1,29 @@
+//===- BreakCriticalEdges.h - Critical Edge Elimination Pass --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BreakCriticalEdges pass - Break all of the critical edges in the CFG by
+// inserting a dummy basic block.  This pass may be "required" by passes that
+// cannot deal with critical edges.  For this usage, the structure type is
+// forward declared.  This pass obviously invalidates the CFG, but can update
+// dominator trees.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H
+#define LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+struct BreakCriticalEdgesPass : public PassInfoMixin<BreakCriticalEdgesPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+} // namespace llvm
+#endif // LLVM_TRANSFORMS_UTILS_BREAKCRITICALEDGES_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
index c5fb37007090..5eeb8cf30695 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Cloning.h
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -176,13 +177,14 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
 class InlineFunctionInfo {
 public:
   explicit InlineFunctionInfo(CallGraph *cg = nullptr,
-                              AssumptionCacheTracker *ACT = nullptr)
-      : CG(cg), ACT(ACT) {}
+                              std::function<AssumptionCache &(Function &)>
+                                  *GetAssumptionCache = nullptr)
+      : CG(cg), GetAssumptionCache(GetAssumptionCache) {}
 
   /// CG - If non-null, InlineFunction will update the callgraph to reflect the
   /// changes it makes.
   CallGraph *CG;
-  AssumptionCacheTracker *ACT;
+  std::function<AssumptionCache &(Function &)> *GetAssumptionCache;
 
   /// StaticAllocas - InlineFunction fills this in with all static allocas that
   /// get copied into the caller.
@@ -192,9 +194,17 @@ public:
   /// inlined from the callee.  This is only filled in if CG is non-null.
   SmallVector<WeakVH, 8> InlinedCalls;
 
+  /// All of the new call sites inlined into the caller.
+  ///
+  /// 'InlineFunction' fills this in by scanning the inlined instructions, and
+  /// only if CG is null. If CG is non-null, instead the value handle
+  /// `InlinedCalls` above is used.
+  SmallVector<CallSite, 8> InlinedCallSites;
+
   void reset() {
     StaticAllocas.clear();
     InlinedCalls.clear();
+    InlinedCallSites.clear();
   }
 };
 
@@ -208,6 +218,10 @@ public:
 /// exists in the instruction stream.  Similarly this will inline a recursive
 /// function by one level.
 ///
+/// Note that while this routine is allowed to cleanup and optimize the
+/// *inlined* code to minimize the actual inserted code, it must not delete
+/// code in the caller as users of this routine may have pointers to
+/// instructions in the caller that need to remain stable.
 bool InlineFunction(CallInst *C, InlineFunctionInfo &IFI,
                     AAResults *CalleeAAR = nullptr, bool InsertLifetime = true);
 bool InlineFunction(InvokeInst *II, InlineFunctionInfo &IFI,
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h b/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h
index 73c15e42c359..5ec3888d4538 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CmpInstAnalysis.h
@@ -21,13 +21,13 @@ namespace llvm {
   class ICmpInst;
   class Value;
 
-  /// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
-  /// are carefully arranged to allow folding of expressions such as:
+  /// Encode a icmp predicate into a three bit mask. These bits are carefully
+  /// arranged to allow folding of expressions such as:
   ///
   ///      (A < B) | (A > B) --> (A != B)
   ///
   /// Note that this is only valid if the first and second predicates have the
-  /// same sign. Is illegal to do: (A u< B) | (A s> B)
+  /// same sign. It is illegal to do: (A u< B) | (A s> B)
   ///
   /// Three bits are used to represent the condition, as follows:
   ///   0  A > B
@@ -46,20 +46,25 @@ namespace llvm {
   ///
   unsigned getICmpCode(const ICmpInst *ICI, bool InvertPred = false);
 
-  /// getICmpValue - This is the complement of getICmpCode, which turns an
-  /// opcode and two operands into either a constant true or false, or the
-  /// predicate for a new ICmp instruction. The sign is passed in to determine
-  /// which kind of predicate to use in the new icmp instruction.
+  /// This is the complement of getICmpCode, which turns an opcode and two
+  /// operands into either a constant true or false, or the predicate for a new
+  /// ICmp instruction. The sign is passed in to determine which kind of
+  /// predicate to use in the new icmp instruction.
   /// Non-NULL return value will be a true or false constant.
-  /// NULL return means a new ICmp is needed.  The predicate for which is
-  /// output in NewICmpPred.
+  /// NULL return means a new ICmp is needed. The predicate for which is output
+  /// in NewICmpPred.
   Value *getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
                       CmpInst::Predicate &NewICmpPred);
 
-  /// PredicatesFoldable - Return true if both predicates match sign or if at
-  /// least one of them is an equality comparison (which is signless).
+  /// Return true if both predicates match sign or if at least one of them is an
+  /// equality comparison (which is signless).
   bool PredicatesFoldable(CmpInst::Predicate p1, CmpInst::Predicate p2);
 
+  /// Decompose an icmp into the form ((X & Y) pred Z) if possible. The returned
+  /// predicate is either == or !=. Returns false if decomposition fails.
+  bool decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
+                            Value *&X, Value *&Y, Value *&Z);
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
index 30dafd045f23..a2978663a4d1 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/CodeExtractor.h
@@ -20,6 +20,9 @@
 namespace llvm {
 template <typename T> class ArrayRef;
   class BasicBlock;
+  class BlockFrequency;
+  class BlockFrequencyInfo;
+  class BranchProbabilityInfo;
   class DominatorTree;
   class Function;
   class Loop;
@@ -47,6 +50,8 @@ template <typename T> class ArrayRef;
     // Various bits of state computed on construction.
     DominatorTree *const DT;
     const bool AggregateArgs;
+    BlockFrequencyInfo *BFI;
+    BranchProbabilityInfo *BPI;
 
     // Bits of intermediate state computed at various phases of extraction.
     SetVector<BasicBlock *> Blocks;
@@ -54,11 +59,19 @@ template <typename T> class ArrayRef;
     Type *RetTy;
 
   public:
+
+    /// \brief Check to see if a block is valid for extraction.
+    ///
+    /// Blocks containing EHPads, allocas, invokes, or vastarts are not valid.
+    static bool isBlockValidForExtraction(const BasicBlock &BB);
+
     /// \brief Create a code extractor for a single basic block.
     ///
     /// In this formation, we don't require a dominator tree. The given basic
     /// block is set up for extraction.
-    CodeExtractor(BasicBlock *BB, bool AggregateArgs = false);
+    CodeExtractor(BasicBlock *BB, bool AggregateArgs = false,
+                  BlockFrequencyInfo *BFI = nullptr,
+                  BranchProbabilityInfo *BPI = nullptr);
 
     /// \brief Create a code extractor for a sequence of blocks.
     ///
@@ -67,20 +80,24 @@ template <typename T> class ArrayRef;
     /// sequence out into its new function. When a DominatorTree is also given,
     /// extra checking and transformations are enabled.
     CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
-                  bool AggregateArgs = false);
+                  bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
+                  BranchProbabilityInfo *BPI = nullptr);
 
     /// \brief Create a code extractor for a loop body.
     ///
     /// Behaves just like the generic code sequence constructor, but uses the
     /// block sequence of the loop.
-    CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false);
+    CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false,
+                  BlockFrequencyInfo *BFI = nullptr,
+                  BranchProbabilityInfo *BPI = nullptr);
 
     /// \brief Create a code extractor for a region node.
     ///
     /// Behaves just like the generic code sequence constructor, but uses the
     /// block sequence of the region node passed in.
     CodeExtractor(DominatorTree &DT, const RegionNode &RN,
-                  bool AggregateArgs = false);
+                  bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
+                  BranchProbabilityInfo *BPI = nullptr);
 
     /// \brief Perform the extraction, returning the new function.
     ///
@@ -116,6 +133,11 @@ template <typename T> class ArrayRef;
 
     void moveCodeToFunction(Function *newFunction);
 
+    void calculateNewCallTerminatorWeights(
+        BasicBlock *CodeReplacer,
+        DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
+        BranchProbabilityInfo *BPI);
+
     void emitCallAndSwitchStatement(Function *newFunction,
                                     BasicBlock *newHeader,
                                     ValueSet &inputs,
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h b/contrib/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h
new file mode 100644
index 000000000000..80d16ed4cf5b
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/EscapeEnumerator.h
@@ -0,0 +1,49 @@
+//===-- EscapeEnumerator.h --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a helper class that enumerates all possible exits from a function,
+// including exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H
+#define LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H
+
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Function.h"
+
+namespace llvm {
+
+/// EscapeEnumerator - This is a little algorithm to find all escape points
+/// from a function so that "finally"-style code can be inserted. In addition
+/// to finding the existing return and unwind instructions, it also (if
+/// necessary) transforms any call instructions into invokes and sends them to
+/// a landing pad.
+class EscapeEnumerator {
+  Function &F;
+  const char *CleanupBBName;
+
+  Function::iterator StateBB, StateE;
+  IRBuilder<> Builder;
+  bool Done;
+  bool HandleExceptions;
+
+public:
+  EscapeEnumerator(Function &F, const char *N = "cleanup",
+                   bool HandleExceptions = true)
+      : F(F), CleanupBBName(N), StateBB(F.begin()), StateE(F.end()),
+        Builder(F.getContext()), Done(false),
+        HandleExceptions(HandleExceptions) {}
+
+  IRBuilder<> *Next();
+};
+
+}
+
+#endif // LLVM_TRANSFORMS_UTILS_ESCAPEENUMERATOR_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h b/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
new file mode 100644
index 000000000000..a613fc31a5e3
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/FunctionComparator.h
@@ -0,0 +1,376 @@
+//===- FunctionComparator.h - Function Comparator ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the FunctionComparator and GlobalNumberState classes which
+// are used by the MergeFunctions pass for comparing functions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
+#define LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/ValueMap.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include <cstdint>
+#include <tuple>
+
+namespace llvm {
+
+class GetElementPtrInst;
+
+/// GlobalNumberState assigns an integer to each global value in the program,
+/// which is used by the comparison routine to order references to globals. This
+/// state must be preserved throughout the pass, because Functions and other
+/// globals need to maintain their relative order. Globals are assigned a number
+/// when they are first visited. This order is deterministic, and so the
+/// assigned numbers are as well. When two functions are merged, neither number
+/// is updated. If the symbols are weak, this would be incorrect. If they are
+/// strong, then one will be replaced at all references to the other, and so
+/// direct callsites will now see one or the other symbol, and no update is
+/// necessary. Note that if we were guaranteed unique names, we could just
+/// compare those, but this would not work for stripped bitcodes or for those
+/// few symbols without a name.
+class GlobalNumberState {
+  struct Config : ValueMapConfig<GlobalValue*> {
+    enum { FollowRAUW = false };
+  };
+  // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
+  // occurs, the mapping does not change. Tracking changes is unnecessary, and
+  // also problematic for weak symbols (which may be overwritten).
+  typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
+  ValueNumberMap GlobalNumbers;
+  // The next unused serial number to assign to a global.
+  uint64_t NextNumber = 0;
+
+public:
+  GlobalNumberState() = default;
+
+  uint64_t getNumber(GlobalValue* Global) {
+    ValueNumberMap::iterator MapIter;
+    bool Inserted;
+    std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
+    if (Inserted)
+      NextNumber++;
+    return MapIter->second;
+  }
+
+  void clear() {
+    GlobalNumbers.clear();
+  }
+};
+
+/// FunctionComparator - Compares two functions to determine whether or not
+/// they will generate machine code with the same behaviour. DataLayout is
+/// used if available. The comparator always fails conservatively (erring on the
+/// side of claiming that two functions are different).
+class FunctionComparator {
+public:
+  FunctionComparator(const Function *F1, const Function *F2,
+                     GlobalNumberState* GN)
+      : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
+
+  /// Test whether the two functions have equivalent behaviour.
+  int compare();
+  /// Hash a function. Equivalent functions will have the same hash, and unequal
+  /// functions will have different hashes with high probability.
+  typedef uint64_t FunctionHash;
+  static FunctionHash functionHash(Function &);
+
+protected:
+  /// Start the comparison.
+  void beginCompare() {
+    sn_mapL.clear();
+    sn_mapR.clear();
+  }
+
+  /// Compares the signature and other general attributes of the two functions.
+  int compareSignature() const;
+
+  /// Test whether two basic blocks have equivalent behaviour.
+  int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const;
+
+  /// Constants comparison.
+  /// Its analog to lexicographical comparison between hypothetical numbers
+  /// of next format:
+  /// <bitcastability-trait><raw-bit-contents>
+  ///
+  /// 1. Bitcastability.
+  /// Check whether L's type could be losslessly bitcasted to R's type.
+  /// On this stage method, in case when lossless bitcast is not possible
+  /// method returns -1 or 1, thus also defining which type is greater in
+  /// context of bitcastability.
+  /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
+  ///          to the contents comparison.
+  ///          If types differ, remember types comparison result and check
+  ///          whether we still can bitcast types.
+  /// Stage 1: Types that satisfies isFirstClassType conditions are always
+  ///          greater then others.
+  /// Stage 2: Vector is greater then non-vector.
+  ///          If both types are vectors, then vector with greater bitwidth is
+  ///          greater.
+  ///          If both types are vectors with the same bitwidth, then types
+  ///          are bitcastable, and we can skip other stages, and go to contents
+  ///          comparison.
+  /// Stage 3: Pointer types are greater than non-pointers. If both types are
+  ///          pointers of the same address space - go to contents comparison.
+  ///          Different address spaces: pointer with greater address space is
+  ///          greater.
+  /// Stage 4: Types are neither vectors, nor pointers. And they differ.
+  ///          We don't know how to bitcast them. So, we better don't do it,
+  ///          and return types comparison result (so it determines the
+  ///          relationship among constants we don't know how to bitcast).
+  ///
+  /// Just for clearance, let's see how the set of constants could look
+  /// on single dimension axis:
+  ///
+  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+  /// Where: NFCT - Not a FirstClassType
+  ///        FCT - FirstClassTyp:
+  ///
+  /// 2. Compare raw contents.
+  /// It ignores types on this stage and only compares bits from L and R.
+  /// Returns 0, if L and R has equivalent contents.
+  /// -1 or 1 if values are different.
+  /// Pretty trivial:
+  /// 2.1. If contents are numbers, compare numbers.
+  ///    Ints with greater bitwidth are greater. Ints with same bitwidths
+  ///    compared by their contents.
+  /// 2.2. "And so on". Just to avoid discrepancies with comments
+  /// perhaps it would be better to read the implementation itself.
+  /// 3. And again about overall picture. Let's look back at how the ordered set
+  /// of constants will look like:
+  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
+  ///
+  /// Now look, what could be inside [FCT, "others"], for example:
+  /// [FCT, "others"] =
+  /// [
+  ///   [double 0.1], [double 1.23],
+  ///   [i32 1], [i32 2],
+  ///   { double 1.0 },       ; StructTyID, NumElements = 1
+  ///   { i32 1 },            ; StructTyID, NumElements = 1
+  ///   { double 1, i32 1 },  ; StructTyID, NumElements = 2
+  ///   { i32 1, double 1 }   ; StructTyID, NumElements = 2
+  /// ]
+  ///
+  /// Let's explain the order. Float numbers will be less than integers, just
+  /// because of cmpType terms: FloatTyID < IntegerTyID.
+  /// Floats (with same fltSemantics) are sorted according to their value.
+  /// Then you can see integers, and they are, like a floats,
+  /// could be easy sorted among each others.
+  /// The structures. Structures are grouped at the tail, again because of their
+  /// TypeID: StructTyID > IntegerTyID > FloatTyID.
+  /// Structures with greater number of elements are greater. Structures with
+  /// greater elements going first are greater.
+  /// The same logic with vectors, arrays and other possible complex types.
+  ///
+  /// Bitcastable constants.
+  /// Let's assume, that some constant, belongs to some group of
+  /// "so-called-equal" values with different types, and at the same time
+  /// belongs to another group of constants with equal types
+  /// and "really" equal values.
+  ///
+  /// Now, prove that this is impossible:
+  ///
+  /// If constant A with type TyA is bitcastable to B with type TyB, then:
+  /// 1. All constants with equal types to TyA, are bitcastable to B. Since
+  ///    those should be vectors (if TyA is vector), pointers
+  ///    (if TyA is pointer), or else (if TyA equal to TyB), those types should
+  ///    be equal to TyB.
+  /// 2. All constants with non-equal, but bitcastable types to TyA, are
+  ///    bitcastable to B.
+  ///    Once again, just because we allow it to vectors and pointers only.
+  ///    This statement could be expanded as below:
+  /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
+  ///      vector B, and thus bitcastable to B as well.
+  /// 2.2. All pointers of the same address space, no matter what they point to,
+  ///      bitcastable. So if C is pointer, it could be bitcasted to A and to B.
+  /// So any constant equal or bitcastable to A is equal or bitcastable to B.
+  /// QED.
+  ///
+  /// In another words, for pointers and vectors, we ignore top-level type and
+  /// look at their particular properties (bit-width for vectors, and
+  /// address space for pointers).
+  /// If these properties are equal - compare their contents.
+  int cmpConstants(const Constant *L, const Constant *R) const;
+
+  /// Compares two global values by number. Uses the GlobalNumbersState to
+  /// identify the same gobals across function calls.
+  int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const;
+
+  /// Assign or look up previously assigned numbers for the two values, and
+  /// return whether the numbers are equal. Numbers are assigned in the order
+  /// visited.
+  /// Comparison order:
+  /// Stage 0: Value that is function itself is always greater then others.
+  ///          If left and right values are references to their functions, then
+  ///          they are equal.
+  /// Stage 1: Constants are greater than non-constants.
+  ///          If both left and right are constants, then the result of
+  ///          cmpConstants is used as cmpValues result.
+  /// Stage 2: InlineAsm instances are greater than others. If both left and
+  ///          right are InlineAsm instances, InlineAsm* pointers casted to
+  ///          integers and compared as numbers.
+  /// Stage 3: For all other cases we compare order we meet these values in
+  ///          their functions. If right value was met first during scanning,
+  ///          then left value is greater.
+  ///          In another words, we compare serial numbers, for more details
+  ///          see comments for sn_mapL and sn_mapR.
+  int cmpValues(const Value *L, const Value *R) const;
+
+  /// Compare two Instructions for equivalence, similar to
+  /// Instruction::isSameOperationAs.
+  ///
+  /// Stages are listed in "most significant stage first" order:
+  /// On each stage below, we do comparison between some left and right
+  /// operation parts. If parts are non-equal, we assign parts comparison
+  /// result to the operation comparison result and exit from method.
+  /// Otherwise we proceed to the next stage.
+  /// Stages:
+  /// 1. Operations opcodes. Compared as numbers.
+  /// 2. Number of operands.
+  /// 3. Operation types. Compared with cmpType method.
+  /// 4. Compare operation subclass optional data as stream of bytes:
+  /// just convert it to integers and call cmpNumbers.
+  /// 5. Compare in operation operand types with cmpType in
+  /// most significant operand first order.
+  /// 6. Last stage. Check operations for some specific attributes.
+  /// For example, for Load it would be:
+  /// 6.1.Load: volatile (as boolean flag)
+  /// 6.2.Load: alignment (as integer numbers)
+  /// 6.3.Load: ordering (as underlying enum class value)
+  /// 6.4.Load: synch-scope (as integer numbers)
+  /// 6.5.Load: range metadata (as integer ranges)
+  /// On this stage its better to see the code, since its not more than 10-15
+  /// strings for particular instruction, and could change sometimes.
+  ///
+  /// Sets \p needToCmpOperands to true if the operands of the instructions
+  /// still must be compared afterwards. In this case it's already guaranteed
+  /// that both instructions have the same number of operands.
+  int cmpOperations(const Instruction *L, const Instruction *R,
+                    bool &needToCmpOperands) const;
+
+  /// cmpType - compares two types,
+  /// defines total ordering among the types set.
+  ///
+  /// Return values:
+  /// 0 if types are equal,
+  /// -1 if Left is less than Right,
+  /// +1 if Left is greater than Right.
+  ///
+  /// Description:
+  /// Comparison is broken onto stages. Like in lexicographical comparison
+  /// stage coming first has higher priority.
+  /// On each explanation stage keep in mind total ordering properties.
+  ///
+  /// 0. Before comparison we coerce pointer types of 0 address space to
+  /// integer.
+  /// We also don't bother with same type at left and right, so
+  /// just return 0 in this case.
+  ///
+  /// 1. If types are of different kind (different type IDs).
+  ///    Return result of type IDs comparison, treating them as numbers.
+  /// 2. If types are integers, check that they have the same width. If they
+  /// are vectors, check that they have the same count and subtype.
+  /// 3. Types have the same ID, so check whether they are one of:
+  /// * Void
+  /// * Float
+  /// * Double
+  /// * X86_FP80
+  /// * FP128
+  /// * PPC_FP128
+  /// * Label
+  /// * Metadata
+  /// We can treat these types as equal whenever their IDs are same.
+  /// 4. If Left and Right are pointers, return result of address space
+  /// comparison (numbers comparison). We can treat pointer types of same
+  /// address space as equal.
+  /// 5. If types are complex.
+  /// Then both Left and Right are to be expanded and their element types will
+  /// be checked with the same way. If we get Res != 0 on some stage, return it.
+  /// Otherwise return 0.
+  /// 6. For all other cases put llvm_unreachable.
+  int cmpTypes(Type *TyL, Type *TyR) const;
+
+  int cmpNumbers(uint64_t L, uint64_t R) const;
+  int cmpAPInts(const APInt &L, const APInt &R) const;
+  int cmpAPFloats(const APFloat &L, const APFloat &R) const;
+  int cmpMem(StringRef L, StringRef R) const;
+
+  // The two functions undergoing comparison.
+  const Function *FnL, *FnR;
+
+private:
+  int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
+  int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
+  int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
+  int cmpRangeMetadata(const MDNode *L, const MDNode *R) const;
+  int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
+
+  /// Compare two GEPs for equivalent pointer arithmetic.
+  /// Parts to be compared for each comparison stage,
+  /// most significant stage first:
+  /// 1. Address space. As numbers.
+  /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
+  /// 3. Pointer operand type (using cmpType method).
+  /// 4. Number of operands.
+  /// 5. Compare operands, using cmpValues method.
+  int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const;
+  int cmpGEPs(const GetElementPtrInst *GEPL,
+              const GetElementPtrInst *GEPR) const {
+    return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
+  }
+
+  /// Assign serial numbers to values from left function, and values from
+  /// right function.
+  /// Explanation:
+  /// Being comparing functions we need to compare values we meet at left and
+  /// right sides.
+  /// Its easy to sort things out for external values. It just should be
+  /// the same value at left and right.
+  /// But for local values (those were introduced inside function body)
+  /// we have to ensure they were introduced at exactly the same place,
+  /// and plays the same role.
+  /// Let's assign serial number to each value when we meet it first time.
+  /// Values that were met at same place will be with same serial numbers.
+  /// In this case it would be good to explain few points about values assigned
+  /// to BBs and other ways of implementation (see below).
+  ///
+  /// 1. Safety of BB reordering.
+  /// It's safe to change the order of BasicBlocks in function.
+  /// Relationship with other functions and serial numbering will not be
+  /// changed in this case.
+  /// As follows from FunctionComparator::compare(), we do CFG walk: we start
+  /// from the entry, and then take each terminator. So it doesn't matter how in
+  /// fact BBs are ordered in function. And since cmpValues are called during
+  /// this walk, the numbering depends only on how BBs located inside the CFG.
+  /// So the answer is - yes. We will get the same numbering.
+  ///
+  /// 2. Impossibility to use dominance properties of values.
+  /// If we compare two instruction operands: first is usage of local
+  /// variable AL from function FL, and second is usage of local variable AR
+  /// from FR, we could compare their origins and check whether they are
+  /// defined at the same place.
+  /// But, we are still not able to compare operands of PHI nodes, since those
+  /// could be operands from further BBs we didn't scan yet.
+  /// So it's impossible to use dominance properties in general.
+  mutable DenseMap<const Value*, int> sn_mapL, sn_mapR;
+
+  // The global state we will use
+  GlobalNumberState* GlobalNumbers;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_FUNCTIONCOMPARATOR_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
index 3b94ef60be5d..57b7d0fcd7cc 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/FunctionImportUtils.h
@@ -41,7 +41,7 @@ class FunctionImportGlobalProcessing {
   bool HasExportedFunctions = false;
 
   /// Check if we should promote the given local value to global scope.
-  bool doPromoteLocalToGlobal(const GlobalValue *SGV);
+  bool shouldPromoteLocalToGlobal(const GlobalValue *SGV);
 
   /// Helper methods to check if we are importing from or potentially
   /// exporting from the current source module.
@@ -54,8 +54,9 @@ class FunctionImportGlobalProcessing {
 
   /// Get the name for SGV that should be used in the linked destination
   /// module. Specifically, this handles the case where we need to rename
-  /// a local that is being promoted to global scope.
-  std::string getName(const GlobalValue *SGV);
+  /// a local that is being promoted to global scope, which it will always
+  /// do when \p DoPromote is true (or when importing a local).
+  std::string getName(const GlobalValue *SGV, bool DoPromote);
 
   /// Process globals so that they can be used in ThinLTO. This includes
   /// promoting local variables so that they can be reference externally by
@@ -66,8 +67,9 @@ class FunctionImportGlobalProcessing {
 
   /// Get the new linkage for SGV that should be used in the linked destination
   /// module. Specifically, for ThinLTO importing or exporting it may need
-  /// to be adjusted.
-  GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV);
+  /// to be adjusted. When \p DoPromote is true then we must adjust the
+  /// linkage for a required promotion of a local to global scope.
+  GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV, bool DoPromote);
 
 public:
   FunctionImportGlobalProcessing(
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h b/contrib/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
new file mode 100644
index 000000000000..bb7fa523cb19
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h
@@ -0,0 +1,107 @@
+//===-- ImportedFunctionsInliningStats.h ------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Generating inliner statistics for imported functions, mostly useful for
+// ThinLTO.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+#define LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringMap.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+class Module;
+class Function;
+/// \brief Calculate and dump ThinLTO specific inliner stats.
+/// The main statistics are:
+/// (1) Number of inlined imported functions,
+/// (2) Number of imported functions inlined into importing module (indirect),
+/// (3) Number of non imported functions inlined into importing module
+/// (indirect).
+/// The difference between first and the second is that first stat counts
+/// all performed inlines on imported functions, but the second one only the
+/// functions that have been eventually inlined to a function in the importing
+/// module (by a chain of inlines). Because llvm uses bottom-up inliner, it is
+/// possible to e.g. import function `A`, `B` and then inline `B` to `A`,
+/// and after this `A` might be too big to be inlined into some other function
+/// that calls it. It calculates this statistic by building graph, where
+/// the nodes are functions, and edges are performed inlines and then by marking
+/// the edges starting from not imported function.
+///
+/// If `Verbose` is set to true, then it also dumps statistics
+/// per each inlined function, sorted by the greatest inlines count like
+/// - number of performed inlines
+/// - number of performed inlines to importing module
+class ImportedFunctionsInliningStatistics {
+private:
+  /// InlineGraphNode represents node in graph of inlined functions.
+  struct InlineGraphNode {
+    // Default-constructible and movable.
+    InlineGraphNode() = default;
+    InlineGraphNode(InlineGraphNode &&) = default;
+    InlineGraphNode &operator=(InlineGraphNode &&) = default;
+
+    llvm::SmallVector<InlineGraphNode *, 8> InlinedCallees;
+    /// Incremented every direct inline.
+    int32_t NumberOfInlines = 0;
+    /// Number of inlines into non imported function (possibly indirect via
+    /// intermediate inlines). Computed based on graph search.
+    int32_t NumberOfRealInlines = 0;
+    bool Imported = false;
+    bool Visited = false;
+  };
+
+public:
+  ImportedFunctionsInliningStatistics() = default;
+  ImportedFunctionsInliningStatistics(
+      const ImportedFunctionsInliningStatistics &) = delete;
+
+  /// Set information like AllFunctions, ImportedFunctions, ModuleName.
+  void setModuleInfo(const Module &M);
+  /// Record inline of @param Callee to @param Caller for statistis.
+  void recordInline(const Function &Caller, const Function &Callee);
+  /// Dump stats computed with InlinerStatistics class.
+  /// If @param Verbose is true then separate statistics for every inlined
+  /// function will be printed.
+  void dump(bool Verbose);
+
+private:
+  /// Creates new Node in NodeMap and sets attributes, or returns existed one.
+  InlineGraphNode &createInlineGraphNode(const Function &);
+  void calculateRealInlines();
+  void dfs(InlineGraphNode &GraphNode);
+
+  using NodesMapTy =
+      llvm::StringMap<std::unique_ptr<InlineGraphNode>>;
+  using SortedNodesTy =
+      std::vector<const NodesMapTy::MapEntryTy*>;
+  /// Returns vector of elements sorted by
+  /// (-NumberOfInlines, -NumberOfRealInlines, FunctionName).
+  SortedNodesTy getSortedNodes();
+
+private:
+  /// This map manage life of all InlineGraphNodes. Unique pointer to
+  /// InlineGraphNode used since the node pointers are also saved in the
+  /// InlinedCallees vector. If it would store InlineGraphNode instead then the
+  /// address of the node would not be invariant.
+  NodesMapTy NodesMap;
+  /// Non external functions that have some other function inlined inside.
+  std::vector<StringRef> NonImportedCallers;
+  int AllFunctions = 0;
+  int ImportedFunctions = 0;
+  StringRef ModuleName;
+};
+
+} // llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_IMPORTEDFUNCTIONSINLININGSTATISTICS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LCSSA.h b/contrib/llvm/include/llvm/Transforms/Utils/LCSSA.h
index f0277d021541..fe717e5f6635 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LCSSA.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LCSSA.h
@@ -37,7 +37,7 @@ namespace llvm {
 /// Converts loops into loop-closed SSA form.
 class LCSSAPass : public PassInfoMixin<LCSSAPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h b/contrib/llvm/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
new file mode 100644
index 000000000000..c9df532e5794
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LibCallsShrinkWrap.h
@@ -0,0 +1,27 @@
+//===- LibCallsShrinkWrap.h - Shrink Wrap Library Calls -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_LIBCALLSSHRINKWRAP_H
+#define LLVM_TRANSFORMS_UTILS_LIBCALLSSHRINKWRAP_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LibCallsShrinkWrapPass : public PassInfoMixin<LibCallsShrinkWrapPass> {
+public:
+  static StringRef name() { return "LibCallsShrinkWrapPass"; }
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_LIBCALLSSHRINKWRAP_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Local.h b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
index 43b376cf8068..490a765c3fab 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Local.h
@@ -32,6 +32,7 @@ class BranchInst;
 class Instruction;
 class CallInst;
 class DbgDeclareInst;
+class DbgValueInst;
 class StoreInst;
 class LoadInst;
 class Value;
@@ -48,6 +49,8 @@ class LazyValueInfo;
 
 template<typename T> class SmallVectorImpl;
 
+typedef SmallVector<DbgValueInst *, 1> DbgValueList;
+
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
@@ -161,10 +164,15 @@ AllocaInst *DemoteRegToStack(Instruction &X,
 /// deleted and it returns the pointer to the alloca inserted.
 AllocaInst *DemotePHIToStack(PHINode *P, Instruction *AllocaPoint = nullptr);
 
-/// If the specified pointer has an alignment that we can determine, return it,
-/// otherwise return 0. If PrefAlign is specified, and it is more than the
-/// alignment of the ultimate object, see if we can increase the alignment of
-/// the ultimate object, making this check succeed.
+/// Try to ensure that the alignment of \p V is at least \p PrefAlign bytes. If
+/// the owning object can be modified and has an alignment less than \p
+/// PrefAlign, it will be increased and \p PrefAlign returned. If the alignment
+/// cannot be increased, the known alignment of the value is returned.
+///
+/// It is not always possible to modify the alignment of the underlying object,
+/// so if alignment is important, a more reliable approach is to simply align
+/// all global variables and allocation instructions to their preferred
+/// alignment from the beginning.
 unsigned getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
                                     const DataLayout &DL,
                                     const Instruction *CxtI = nullptr,
@@ -209,7 +217,7 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
         continue;
 
       // Handle a struct index, which adds its field offset to the pointer.
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         if (OpC->getType()->isVectorTy())
           OpC = OpC->getSplatValue();
 
@@ -250,14 +258,19 @@ Value *EmitGEPOffset(IRBuilderTy *Builder, const DataLayout &DL, User *GEP,
 
 /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
-bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                      StoreInst *SI, DIBuilder &Builder);
 
 /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
-bool ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                      LoadInst *LI, DIBuilder &Builder);
 
+/// Inserts a llvm.dbg.value intrinsic after a phi of an alloca'd value
+/// that has an associated llvm.dbg.decl intrinsic.
+void ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                     PHINode *LI, DIBuilder &Builder);
+
 /// Lowers llvm.dbg.declare intrinsics into appropriate set of
 /// llvm.dbg.value intrinsics.
 bool LowerDbgDeclare(Function &F);
@@ -265,6 +278,9 @@ bool LowerDbgDeclare(Function &F);
 /// Finds the llvm.dbg.declare intrinsic corresponding to an alloca, if any.
 DbgDeclareInst *FindAllocaDbgDeclare(Value *V);
 
+/// Finds the llvm.dbg.value intrinsics corresponding to an alloca, if any.
+void FindAllocaDbgValues(DbgValueList &DbgValues, Value *V);
+
 /// Replaces llvm.dbg.declare instruction when the address it describes
 /// is replaced with a new value. If Deref is true, an additional DW_OP_deref is
 /// prepended to the expression. If Offset is non-zero, a constant displacement
@@ -296,7 +312,15 @@ unsigned removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB);
 
 /// Insert an unreachable instruction before the specified
 /// instruction, making it and the rest of the code in the block dead.
-unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap);
+unsigned changeToUnreachable(Instruction *I, bool UseLLVMTrap,
+                             bool PreserveLCSSA = false);
+
+/// Convert the CallInst to InvokeInst with the specified unwind edge basic
+/// block.  This also splits the basic block where CI is located, because
+/// InvokeInst is a terminator instruction.  Returns the newly split basic
+/// block.
+BasicBlock *changeToInvokeAndSplitBasicBlock(CallInst *CI,
+                                             BasicBlock *UnwindEdge);
 
 /// Replace 'BB's terminator with one that does not have an unwind successor
 /// block. Rewrites `invoke` to `call`, etc. Updates any PHIs in unwind
@@ -316,6 +340,12 @@ bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr);
 /// Metadata not listed as known via KnownIDs is removed
 void combineMetadata(Instruction *K, const Instruction *J, ArrayRef<unsigned> KnownIDs);
 
+/// Combine the metadata of two instructions so that K can replace J. This
+/// specifically handles the case of CSE-like transformations.
+///
+/// Unknown metadata is removed.
+void combineMetadataForCSE(Instruction *K, const Instruction *J);
+
 /// Replace each use of 'From' with 'To' if that use is dominated by
 /// the given edge.  Returns the number of replacements made.
 unsigned replaceDominatedUsesWith(Value *From, Value *To, DominatorTree &DT,
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopSimplify.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopSimplify.h
index 7cf89eaeb939..f3828bc16e2f 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopSimplify.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopSimplify.h
@@ -49,7 +49,7 @@ namespace llvm {
 /// This pass is responsible for loop canonicalization.
 class LoopSimplifyPass : public PassInfoMixin<LoopSimplifyPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Simplify each loop in a loop nest recursively.
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 89fea10f818a..845069d4260a 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -246,7 +246,7 @@ private:
   RecurrenceKind Kind;
   // If this a min/max recurrence the kind of recurrence.
   MinMaxRecurrenceKind MinMaxKind;
-  // First occurance of unasfe algebra in the PHI's use-chain.
+  // First occurrence of unasfe algebra in the PHI's use-chain.
   Instruction *UnsafeAlgebraInst;
   // The type of the recurrence.
   Type *RecurrenceType;
@@ -263,13 +263,15 @@ public:
   enum InductionKind {
     IK_NoInduction,  ///< Not an induction variable.
     IK_IntInduction, ///< Integer induction variable. Step = C.
-    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
+    IK_PtrInduction, ///< Pointer induction var. Step = C / sizeof(elem).
+    IK_FpInduction   ///< Floating point induction variable.
   };
 
 public:
   /// Default constructor - creates an invalid induction.
   InductionDescriptor()
-      : StartValue(nullptr), IK(IK_NoInduction), Step(nullptr) {}
+    : StartValue(nullptr), IK(IK_NoInduction), Step(nullptr),
+    InductionBinOp(nullptr) {}
 
   /// Get the consecutive direction. Returns:
   ///   0 - unknown or non-consecutive.
@@ -291,26 +293,58 @@ public:
   const SCEV *getStep() const { return Step; }
   ConstantInt *getConstIntStepValue() const;
 
-  /// Returns true if \p Phi is an induction. If \p Phi is an induction,
-  /// the induction descriptor \p D will contain the data describing this
-  /// induction. If by some other means the caller has a better SCEV
+  /// Returns true if \p Phi is an induction in the loop \p L. If \p Phi is an
+  /// induction, the induction descriptor \p D will contain the data describing
+  /// this induction. If by some other means the caller has a better SCEV
   /// expression for \p Phi than the one returned by the ScalarEvolution
   /// analysis, it can be passed through \p Expr.
-  static bool isInductionPHI(PHINode *Phi, ScalarEvolution *SE,
+  static bool isInductionPHI(PHINode *Phi, const Loop* L, ScalarEvolution *SE,
                              InductionDescriptor &D,
                              const SCEV *Expr = nullptr);
 
-  /// Returns true if \p Phi is an induction, in the context associated with
-  /// the run-time predicate of PSE. If \p Assume is true, this can add further
-  /// SCEV predicates to \p PSE in order to prove that \p Phi is an induction.
+  /// Returns true if \p Phi is a floating point induction in the loop \p L.
+  /// If \p Phi is an induction, the induction descriptor \p D will contain 
+  /// the data describing this induction.
+  static bool isFPInductionPHI(PHINode *Phi, const Loop* L,
+                               ScalarEvolution *SE, InductionDescriptor &D);
+
+  /// Returns true if \p Phi is a loop \p L induction, in the context associated
+  /// with the run-time predicate of PSE. If \p Assume is true, this can add
+  /// further SCEV predicates to \p PSE in order to prove that \p Phi is an
+  /// induction.
   /// If \p Phi is an induction, \p D will contain the data describing this
   /// induction.
-  static bool isInductionPHI(PHINode *Phi, PredicatedScalarEvolution &PSE,
+  static bool isInductionPHI(PHINode *Phi, const Loop* L,
+                             PredicatedScalarEvolution &PSE,
                              InductionDescriptor &D, bool Assume = false);
 
+  /// Returns true if the induction type is FP and the binary operator does
+  /// not have the "fast-math" property. Such operation requires a relaxed FP
+  /// mode.
+  bool hasUnsafeAlgebra() {
+    return InductionBinOp &&
+      !cast<FPMathOperator>(InductionBinOp)->hasUnsafeAlgebra();
+  }
+
+  /// Returns induction operator that does not have "fast-math" property
+  /// and requires FP unsafe mode.
+  Instruction *getUnsafeAlgebraInst() {
+    if (!InductionBinOp ||
+        cast<FPMathOperator>(InductionBinOp)->hasUnsafeAlgebra())
+      return nullptr;
+    return InductionBinOp;
+  }
+
+  /// Returns binary opcode of the induction operator.
+  Instruction::BinaryOps getInductionOpcode() const {
+    return InductionBinOp ? InductionBinOp->getOpcode() :
+      Instruction::BinaryOpsEnd;
+  }
+
 private:
   /// Private constructor - used by \c isInductionPHI.
-  InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step);
+  InductionDescriptor(Value *Start, InductionKind K, const SCEV *Step,
+                      BinaryOperator *InductionBinOp = nullptr);
 
   /// Start value.
   TrackingVH<Value> StartValue;
@@ -318,6 +352,8 @@ private:
   InductionKind IK;
   /// Step value.
   const SCEV *Step;
+  // Instruction that advances induction variable.
+  BinaryOperator *InductionBinOp;
 };
 
 BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
@@ -425,12 +461,28 @@ Optional<const MDOperand *> findStringMetadataForLoop(Loop *TheLoop,
 void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
                              unsigned V = 0);
 
+/// \brief Get a loop's estimated trip count based on branch weight metadata.
+/// Returns 0 when the count is estimated to be 0, or None when a meaningful
+/// estimate can not be made.
+Optional<unsigned> getLoopEstimatedTripCount(Loop *L);
+
 /// Helper to consistently add the set of standard passes to a loop pass's \c
 /// AnalysisUsage.
 ///
 /// All loop passes should call this as part of implementing their \c
 /// getAnalysisUsage.
 void getLoopAnalysisUsage(AnalysisUsage &AU);
+
+/// Returns true if the hoister and sinker can handle this instruction.
+/// If SafetyInfo is null, we are checking for sinking instructions from
+/// preheader to loop body (no speculation).
+/// If SafetyInfo is not null, we are checking for hoisting/sinking
+/// instructions from loop body to preheader/exit. Check if the instruction
+/// can execute specultatively.
+///
+bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
+                        Loop *CurLoop, AliasSetTracker *CurAST,
+                        LoopSafetyInfo *SafetyInfo);
 }
 
 #endif
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/LowerInvoke.h b/contrib/llvm/include/llvm/Transforms/Utils/LowerInvoke.h
new file mode 100644
index 000000000000..12774c7fd1f7
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LowerInvoke.h
@@ -0,0 +1,30 @@
+//===- LowerInvoke.h - Eliminate Invoke instructions ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This transformation is designed for use by code generators which do not yet
+// support stack unwinding.  This pass converts 'invoke' instructions to 'call'
+// instructions, so that any exception-handling 'landingpad' blocks become dead
+// code (which can be removed by running the '-simplifycfg' pass afterwards).
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_LOWERINVOKE_H
+#define LLVM_TRANSFORMS_UTILS_LOWERINVOKE_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class LowerInvokePass : public PassInfoMixin<LowerInvokePass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+}
+
+#endif // LLVM_TRANSFORMS_UTILS_LOWERINVOKE_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/Mem2Reg.h b/contrib/llvm/include/llvm/Transforms/Utils/Mem2Reg.h
index f3c80edf544d..456876b520b0 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/Mem2Reg.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/Mem2Reg.h
@@ -21,7 +21,7 @@
 namespace llvm {
 class PromotePass : public PassInfoMixin<PromotePass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/MemorySSA.h b/contrib/llvm/include/llvm/Transforms/Utils/MemorySSA.h
index befc34cb80fc..408c6a157cd0 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/MemorySSA.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/MemorySSA.h
@@ -110,6 +110,11 @@ class Instruction;
 class MemoryAccess;
 class LLVMContext;
 class raw_ostream;
+enum {
+  // Used to signify what the default invalid ID is for MemoryAccess's
+  // getID()
+  INVALID_MEMORYACCESS_ID = 0
+};
 
 template <class T> class memoryaccess_def_iterator_base;
 using memoryaccess_def_iterator = memoryaccess_def_iterator_base<MemoryAccess>;
@@ -157,7 +162,8 @@ protected:
   friend class MemoryDef;
   friend class MemoryPhi;
 
-  /// \brief Used internally to give IDs to MemoryAccesses for printing
+  /// \brief Used for debugging and tracking things about MemoryAccesses.
+  /// Guaranteed unique among MemoryAccesses, no guarantees otherwise.
   virtual unsigned getID() const = 0;
 
   MemoryAccess(LLVMContext &C, unsigned Vty, BasicBlock *BB,
@@ -170,25 +176,6 @@ private:
   BasicBlock *Block;
 };
 
-template <>
-struct ilist_traits<MemoryAccess> : public ilist_default_traits<MemoryAccess> {
-  /// See details of the instruction class for why this trick works
-  // FIXME: This downcast is UB. See llvm.org/PR26753.
-  LLVM_NO_SANITIZE("object-size")
-  MemoryAccess *createSentinel() const {
-    return static_cast<MemoryAccess *>(&Sentinel);
-  }
-
-  static void destroySentinel(MemoryAccess *) {}
-
-  MemoryAccess *provideInitialHead() const { return createSentinel(); }
-  MemoryAccess *ensureHead(MemoryAccess *) const { return createSentinel(); }
-  static void noteHead(MemoryAccess *, MemoryAccess *) {}
-
-private:
-  mutable ilist_half_node<MemoryAccess> Sentinel;
-};
-
 inline raw_ostream &operator<<(raw_ostream &OS, const MemoryAccess &MA) {
   MA.print(OS);
   return OS;
@@ -254,7 +241,7 @@ public:
   void *operator new(size_t s) { return User::operator new(s, 1); }
 
   MemoryUse(LLVMContext &C, MemoryAccess *DMA, Instruction *MI, BasicBlock *BB)
-      : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB) {}
+      : MemoryUseOrDef(C, DMA, MemoryUseVal, MI, BB), OptimizedID(0) {}
 
   static inline bool classof(const MemoryUse *) { return true; }
   static inline bool classof(const Value *MA) {
@@ -262,6 +249,18 @@ public:
   }
 
   void print(raw_ostream &OS) const override;
+  void setDefiningAccess(MemoryAccess *DMA, bool Optimized = false) {
+    if (Optimized)
+      OptimizedID = DMA->getID();
+    MemoryUseOrDef::setDefiningAccess(DMA);
+  }
+  bool isOptimized() const {
+    return getDefiningAccess() && OptimizedID == getDefiningAccess()->getID();
+  }
+  /// \brief Reset the ID of what this MemoryUse was optimized to, causing it to
+  /// be rewalked by the walker if necessary.
+  /// This really should only be called by tests.
+  void resetOptimized() { OptimizedID = INVALID_MEMORYACCESS_ID; }
 
 protected:
   friend class MemorySSA;
@@ -269,6 +268,9 @@ protected:
   unsigned getID() const override {
     llvm_unreachable("MemoryUses do not have IDs");
   }
+
+private:
+  unsigned int OptimizedID;
 };
 
 template <>
@@ -307,8 +309,6 @@ public:
 protected:
   friend class MemorySSA;
 
-  // For debugging only. This gets used to give memory accesses pretty numbers
-  // when printing them out
   unsigned getID() const override { return ID; }
 
 private:
@@ -387,6 +387,14 @@ public:
     return block_begin() + getNumOperands();
   }
 
+  iterator_range<block_iterator> blocks() {
+    return make_range(block_begin(), block_end());
+  }
+
+  iterator_range<const_block_iterator> blocks() const {
+    return make_range(block_begin(), block_end());
+  }
+
   op_range incoming_values() { return operands(); }
 
   const_op_range incoming_values() const { return operands(); }
@@ -465,8 +473,6 @@ protected:
     User::allocHungoffUses(N, /* IsPhi */ true);
   }
 
-  /// For debugging only. This gets used to give memory accesses pretty numbers
-  /// when printing them out
   unsigned getID() const final { return ID; }
 
 private:
@@ -494,7 +500,6 @@ class MemorySSAWalker;
 class MemorySSA {
 public:
   MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
-  MemorySSA(MemorySSA &&);
   ~MemorySSA();
 
   MemorySSAWalker *getWalker();
@@ -503,7 +508,7 @@ public:
   /// access associated with it. If passed a basic block gets the memory phi
   /// node that exists for that block, if there is one. Otherwise, this will get
   /// a MemoryUseOrDef.
-  MemoryAccess *getMemoryAccess(const Value *) const;
+  MemoryUseOrDef *getMemoryAccess(const Instruction *) const;
   MemoryPhi *getMemoryAccess(const BasicBlock *BB) const;
 
   void dump() const;
@@ -530,11 +535,12 @@ public:
   ///
   /// This list is not modifiable by the user.
   const AccessList *getBlockAccesses(const BasicBlock *BB) const {
-    auto It = PerBlockAccesses.find(BB);
-    return It == PerBlockAccesses.end() ? nullptr : It->second.get();
+    return getWritableBlockAccesses(BB);
   }
 
-  /// \brief Create an empty MemoryPhi in MemorySSA
+  /// \brief Create an empty MemoryPhi in MemorySSA for a given basic block.
+  /// Only one MemoryPhi for a block exists at a time, so this function will
+  /// assert if you try to create one where it already exists.
   MemoryPhi *createMemoryPhi(BasicBlock *BB);
 
   enum InsertionPlace { Beginning, End };
@@ -550,6 +556,8 @@ public:
   /// will be placed. The caller is expected to keep ordering the same as
   /// instructions.
   /// It will return the new MemoryAccess.
+  /// Note: If a MemoryAccess already exists for I, this function will make it
+  /// inaccessible and it *must* have removeMemoryAccess called on it.
   MemoryAccess *createMemoryAccessInBB(Instruction *I, MemoryAccess *Definition,
                                        const BasicBlock *BB,
                                        InsertionPlace Point);
@@ -561,12 +569,23 @@ public:
   /// used to replace an existing memory instruction. It will *not* create PHI
   /// nodes, or verify the clobbering definition.  The clobbering definition
   /// must be non-null.
-  MemoryAccess *createMemoryAccessBefore(Instruction *I,
-                                         MemoryAccess *Definition,
-                                         MemoryAccess *InsertPt);
-  MemoryAccess *createMemoryAccessAfter(Instruction *I,
-                                        MemoryAccess *Definition,
-                                        MemoryAccess *InsertPt);
+  /// Note: If a MemoryAccess already exists for I, this function will make it
+  /// inaccessible and it *must* have removeMemoryAccess called on it.
+  MemoryUseOrDef *createMemoryAccessBefore(Instruction *I,
+                                           MemoryAccess *Definition,
+                                           MemoryUseOrDef *InsertPt);
+  MemoryUseOrDef *createMemoryAccessAfter(Instruction *I,
+                                          MemoryAccess *Definition,
+                                          MemoryAccess *InsertPt);
+
+  // \brief Splice \p What to just before \p Where.
+  //
+  // In order to be efficient, the following conditions must be met:
+  //   - \p Where  dominates \p What,
+  //   - All memory accesses in [\p Where, \p What) are no-alias with \p What.
+  //
+  // TODO: relax the MemoryDef requirement on Where.
+  void spliceMemoryAccessAbove(MemoryDef *Where, MemoryUseOrDef *What);
 
   /// \brief Remove a MemoryAccess from MemorySSA, including updating all
   /// definitions and uses.
@@ -580,6 +599,14 @@ public:
   /// whether MemoryAccess \p A dominates MemoryAccess \p B.
   bool locallyDominates(const MemoryAccess *A, const MemoryAccess *B) const;
 
+  /// \brief Given two memory accesses in potentially different blocks,
+  /// determine whether MemoryAccess \p A dominates MemoryAccess \p B.
+  bool dominates(const MemoryAccess *A, const MemoryAccess *B) const;
+
+  /// \brief Given a MemoryAccess and a Use, determine whether MemoryAccess \p A
+  /// dominates Use \p B.
+  bool dominates(const MemoryAccess *A, const Use &B) const;
+
   /// \brief Verify that MemorySSA is self consistent (IE definitions dominate
   /// all uses, uses appear in the right places).  This is used by unit tests.
   void verifyMemorySSA() const;
@@ -592,9 +619,20 @@ protected:
   void verifyDomination(Function &F) const;
   void verifyOrdering(Function &F) const;
 
+  // This is used by the use optimizer class
+  AccessList *getWritableBlockAccesses(const BasicBlock *BB) const {
+    auto It = PerBlockAccesses.find(BB);
+    return It == PerBlockAccesses.end() ? nullptr : It->second.get();
+  }
+
 private:
   class CachingWalker;
+  class OptimizeUses;
+
+  CachingWalker *getWalkerImpl();
   void buildMemorySSA();
+  void optimizeUses();
+
   void verifyUseInDefs(MemoryAccess *, MemoryAccess *) const;
   using AccessMap = DenseMap<const BasicBlock *, std::unique_ptr<AccessList>>;
 
@@ -608,10 +646,14 @@ private:
   MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
   void removeFromLookups(MemoryAccess *);
 
+  void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &,
+                     const DenseMap<const BasicBlock *, unsigned int> &);
   MemoryAccess *renameBlock(BasicBlock *, MemoryAccess *);
   void renamePass(DomTreeNode *, MemoryAccess *IncomingVal,
                   SmallPtrSet<BasicBlock *, 16> &Visited);
   AccessList *getOrCreateAccessList(const BasicBlock *);
+  void renumberBlock(const BasicBlock *) const;
+
   AliasAnalysis *AA;
   DominatorTree *DT;
   Function &F;
@@ -621,6 +663,12 @@ private:
   AccessMap PerBlockAccesses;
   std::unique_ptr<MemoryAccess> LiveOnEntryDef;
 
+  // Domination mappings
+  // Note that the numbering is local to a block, even though the map is
+  // global.
+  mutable SmallPtrSet<const BasicBlock *, 16> BlockNumberingValid;
+  mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
+
   // Memory SSA building info
   std::unique_ptr<CachingWalker> Walker;
   unsigned NextID;
@@ -641,12 +689,20 @@ public:
 ///
 class MemorySSAAnalysis : public AnalysisInfoMixin<MemorySSAAnalysis> {
   friend AnalysisInfoMixin<MemorySSAAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
-  typedef MemorySSA Result;
+  // Wrap MemorySSA result to ensure address stability of internal MemorySSA
+  // pointers after construction.  Use a wrapper class instead of plain
+  // unique_ptr<MemorySSA> to avoid build breakage on MSVC.
+  struct Result {
+    Result(std::unique_ptr<MemorySSA> &&MSSA) : MSSA(std::move(MSSA)) {}
+    MemorySSA &getMSSA() { return *MSSA.get(); }
 
-  MemorySSA run(Function &F, AnalysisManager<Function> &AM);
+    std::unique_ptr<MemorySSA> MSSA;
+  };
+
+  Result run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Printer pass for \c MemorySSA.
@@ -655,12 +711,12 @@ class MemorySSAPrinterPass : public PassInfoMixin<MemorySSAPrinterPass> {
 
 public:
   explicit MemorySSAPrinterPass(raw_ostream &OS) : OS(OS) {}
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Verifier pass for \c MemorySSA.
 struct MemorySSAVerifierPass : PassInfoMixin<MemorySSAVerifierPass> {
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 
 /// \brief Legacy analysis pass which computes \c MemorySSA.
@@ -715,7 +771,7 @@ public:
   ///   store %a
   /// } else {
   ///   2 = MemoryDef(liveOnEntry)
-  ///    store %b
+  ///   store %b
   /// }
   /// 3 = MemoryPhi(2, 1)
   /// MemoryUse(3)
@@ -723,7 +779,15 @@ public:
   ///
   /// calling this API on load(%a) will return the MemoryPhi, not the MemoryDef
   /// in the if (a) branch.
-  virtual MemoryAccess *getClobberingMemoryAccess(const Instruction *) = 0;
+  MemoryAccess *getClobberingMemoryAccess(const Instruction *I) {
+    MemoryAccess *MA = MSSA->getMemoryAccess(I);
+    assert(MA && "Handed an instruction that MemorySSA doesn't recognize?");
+    return getClobberingMemoryAccess(MA);
+  }
+
+  /// Does the same thing as getClobberingMemoryAccess(const Instruction *I),
+  /// but takes a MemoryAccess instead of an Instruction.
+  virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) = 0;
 
   /// \brief Given a potentially clobbering memory access and a new location,
   /// calling this will give you the nearest dominating clobbering MemoryAccess
@@ -737,7 +801,7 @@ public:
   /// will return that MemoryDef, whereas the above would return the clobber
   /// starting from the use side of  the memory def.
   virtual MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
-                                                  MemoryLocation &) = 0;
+                                                  const MemoryLocation &) = 0;
 
   /// \brief Given a memory access, invalidate anything this walker knows about
   /// that access.
@@ -746,6 +810,8 @@ public:
   /// the walker it uses or returns.
   virtual void invalidateInfo(MemoryAccess *) {}
 
+  virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); }
+
 protected:
   friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
                           // constructor.
@@ -756,9 +822,12 @@ protected:
 /// simply returns the links as they were constructed by the builder.
 class DoNothingMemorySSAWalker final : public MemorySSAWalker {
 public:
-  MemoryAccess *getClobberingMemoryAccess(const Instruction *) override;
+  // Keep the overrides below from hiding the Instruction overload of
+  // getClobberingMemoryAccess.
+  using MemorySSAWalker::getClobberingMemoryAccess;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
-                                          MemoryLocation &) override;
+                                          const MemoryLocation &) override;
 };
 
 using MemoryAccessPair = std::pair<MemoryAccess *, MemoryLocation>;
@@ -837,29 +906,21 @@ inline const_memoryaccess_def_iterator MemoryAccess::defs_end() const {
 /// \brief GraphTraits for a MemoryAccess, which walks defs in the normal case,
 /// and uses in the inverse case.
 template <> struct GraphTraits<MemoryAccess *> {
-  using NodeType = MemoryAccess;
+  using NodeRef = MemoryAccess *;
   using ChildIteratorType = memoryaccess_def_iterator;
 
-  static NodeType *getEntryNode(NodeType *N) { return N; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->defs_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->defs_end();
-  }
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->defs_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->defs_end(); }
 };
 
 template <> struct GraphTraits<Inverse<MemoryAccess *>> {
-  using NodeType = MemoryAccess;
+  using NodeRef = MemoryAccess *;
   using ChildIteratorType = MemoryAccess::iterator;
 
-  static NodeType *getEntryNode(NodeType *N) { return N; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->user_begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->user_end();
-  }
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->user_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->user_end(); }
 };
 
 /// \brief Provide an iterator that walks defs, giving both the memory access,
@@ -944,6 +1005,10 @@ inline upward_defs_iterator upward_defs_begin(const MemoryAccessPair &Pair) {
 
 inline upward_defs_iterator upward_defs_end() { return upward_defs_iterator(); }
 
+// Return true when MD may alias MU, return false otherwise.
+bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
+                         AliasAnalysis &AA);
+
 } // end namespace llvm
 
 #endif // LLVM_TRANSFORMS_UTILS_MEMORYSSA_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
index 2eb2b1363b0b..27508799f8e0 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/ModuleUtils.h
@@ -55,9 +55,31 @@ std::pair<Function *, Function *> createSanitizerCtorAndInitFunctions(
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
     StringRef VersionCheckName = StringRef());
 
-/// Rename all the anon functions in the module using a hash computed from
+/// Rename all the anon globals in the module using a hash computed from
 /// the list of public globals in the module.
-bool nameUnamedFunctions(Module &M);
+bool nameUnamedGlobals(Module &M);
+
+/// \brief Adds global values to the llvm.used list.
+void appendToUsed(Module &M, ArrayRef<GlobalValue *> Values);
+
+/// \brief Adds global values to the llvm.compiler.used list.
+void appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values);
+
+/// Filter out potentially dead comdat functions where other entries keep the
+/// entire comdat group alive.
+///
+/// This is designed for cases where functions appear to become dead but remain
+/// alive due to other live entries in their comdat group.
+///
+/// The \p DeadComdatFunctions container should only have pointers to
+/// `Function`s which are members of a comdat group and are believed to be
+/// dead.
+///
+/// After this routine finishes, the only remaining `Function`s in \p
+/// DeadComdatFunctions are those where every member of the comdat is listed
+/// and thus removing them is safe (provided *all* are removed).
+void filterDeadComdatFunctions(
+    Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions);
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h b/contrib/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h
new file mode 100644
index 000000000000..4bec361674bb
--- /dev/null
+++ b/contrib/llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h
@@ -0,0 +1,31 @@
+//===-- NameAnonGlobals.h - Anonymous Global Naming Pass ----*- C++ -*-=======//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements naming anonymous globals to make sure they can be
+// referred to by ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALSS_H
+#define LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALSS_H
+
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+/// Simple pass that provides a name to every anonymous globals.
+class NameAnonGlobalPass : public PassInfoMixin<NameAnonGlobalPass> {
+public:
+  NameAnonGlobalPass() {}
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+}
+
+#endif // LLVM_TRANSFORMS_UTILS_NAMEANONGLOBALS_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
index b5f4ac82b605..b0448fed9f4d 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SSAUpdaterImpl.h
@@ -120,9 +120,8 @@ public:
       if (Info->NumPreds == 0)
         Info->Preds = nullptr;
       else
-        Info->Preds = static_cast<BBInfo**>
-          (Allocator.Allocate(Info->NumPreds * sizeof(BBInfo*),
-                              AlignOf<BBInfo*>::Alignment));
+        Info->Preds = static_cast<BBInfo **>(Allocator.Allocate(
+            Info->NumPreds * sizeof(BBInfo *), alignof(BBInfo *)));
 
       for (unsigned p = 0; p != Info->NumPreds; ++p) {
         BlkT *Pred = Preds[p];
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyInstructions.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyInstructions.h
index ea491dc50587..3f838611626f 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyInstructions.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyInstructions.h
@@ -24,7 +24,7 @@ namespace llvm {
 /// This pass removes redundant instructions.
 class InstSimplifierPass : public PassInfoMixin<InstSimplifierPass> {
 public:
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &AM);
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
index 92ee24633950..5e217adf1987 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SimplifyLibCalls.h
@@ -137,6 +137,7 @@ private:
 
   // Integer Library Call Optimizations
   Value *optimizeFFS(CallInst *CI, IRBuilder<> &B);
+  Value *optimizeFls(CallInst *CI, IRBuilder<> &B);
   Value *optimizeAbs(CallInst *CI, IRBuilder<> &B);
   Value *optimizeIsDigit(CallInst *CI, IRBuilder<> &B);
   Value *optimizeIsAscii(CallInst *CI, IRBuilder<> &B);
@@ -158,7 +159,6 @@ private:
                       SmallVectorImpl<CallInst *> &SinCalls,
                       SmallVectorImpl<CallInst *> &CosCalls,
                       SmallVectorImpl<CallInst *> &SinCosCalls);
-  void replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls, Value *Res);
   Value *optimizePrintFString(CallInst *CI, IRBuilder<> &B);
   Value *optimizeSPrintFString(CallInst *CI, IRBuilder<> &B);
   Value *optimizeFPrintFString(CallInst *CI, IRBuilder<> &B);
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h b/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
index 5ccee98f97e7..ff995173e126 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/SymbolRewriter.h
@@ -30,12 +30,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_TRANSFORMS_UTILS_SYMBOL_REWRITER_H
-#define LLVM_TRANSFORMS_UTILS_SYMBOL_REWRITER_H
+#ifndef LLVM_TRANSFORMS_UTILS_SYMBOLREWRITER_H
+#define LLVM_TRANSFORMS_UTILS_SYMBOLREWRITER_H
 
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include <list>
 
 namespace llvm {
 class MemoryBuffer;
@@ -59,7 +59,7 @@ namespace SymbolRewriter {
 /// be rewritten or providing a (posix compatible) regular expression that will
 /// select the symbols to rewrite.  This descriptor list is passed to the
 /// SymbolRewriter pass.
-class RewriteDescriptor : public ilist_node<RewriteDescriptor> {
+class RewriteDescriptor {
   RewriteDescriptor(const RewriteDescriptor &) = delete;
 
   const RewriteDescriptor &
@@ -86,7 +86,7 @@ private:
   const Type Kind;
 };
 
-typedef iplist<RewriteDescriptor> RewriteDescriptorList;
+typedef std::list<std::unique_ptr<RewriteDescriptor>> RewriteDescriptorList;
 
 class RewriteMapParser {
 public:
@@ -110,43 +110,26 @@ private:
 };
 }
 
-template <>
-struct ilist_traits<SymbolRewriter::RewriteDescriptor>
-    : public ilist_default_traits<SymbolRewriter::RewriteDescriptor> {
-  mutable ilist_half_node<SymbolRewriter::RewriteDescriptor> Sentinel;
+ModulePass *createRewriteSymbolsPass();
+ModulePass *createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &);
 
+class RewriteSymbolPass : public PassInfoMixin<RewriteSymbolPass> {
 public:
-  // createSentinel is used to get a reference to a node marking the end of
-  // the list.  Because the sentinel is relative to this instance, use a
-  // non-static method.
-  SymbolRewriter::RewriteDescriptor *createSentinel() const {
-    // since i[p] lists always publicly derive from the corresponding
-    // traits, placing a data member in this class will augment the
-    // i[p]list.  Since the NodeTy is expected to publicly derive from
-    // ilist_node<NodeTy>, there is a legal viable downcast from it to
-    // NodeTy.  We use this trick to superpose i[p]list with a "ghostly"
-    // NodeTy, which becomes the sentinel.  Dereferencing the sentinel is
-    // forbidden (save the ilist_node<NodeTy>) so no one will ever notice
-    // the superposition.
-    return static_cast<SymbolRewriter::RewriteDescriptor *>(&Sentinel);
+  RewriteSymbolPass() { loadAndParseMapFiles(); }
+  RewriteSymbolPass(SymbolRewriter::RewriteDescriptorList &DL) {
+    Descriptors.splice(Descriptors.begin(), DL);
   }
-  void destroySentinel(SymbolRewriter::RewriteDescriptor *) {}
 
-  SymbolRewriter::RewriteDescriptor *provideInitialHead() const {
-    return createSentinel();
-  }
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
-  SymbolRewriter::RewriteDescriptor *
-  ensureHead(SymbolRewriter::RewriteDescriptor *&) const {
-    return createSentinel();
-  }
+  // Glue for old PM
+  bool runImpl(Module &M);
 
-  static void noteHead(SymbolRewriter::RewriteDescriptor *,
-                       SymbolRewriter::RewriteDescriptor *) {}
-};
+private:
+  void loadAndParseMapFiles();
 
-ModulePass *createRewriteSymbolsPass();
-ModulePass *createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &);
+  SymbolRewriter::RewriteDescriptorList Descriptors;  
+};
 }
 
-#endif
+#endif //LLVM_TRANSFORMS_UTILS_SYMBOLREWRITER_H
diff --git a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
index 4d370407591d..2ea28f2d4e13 100644
--- a/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/UnrollLoop.h
@@ -16,6 +16,9 @@
 #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
 #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
 
+// Needed because we can't forward-declare the nested struct
+// TargetTransformInfo::UnrollingPreferences
+#include "llvm/Analysis/TargetTransformInfo.h"
 
 namespace llvm {
 
@@ -27,12 +30,15 @@ class LoopInfo;
 class LPPassManager;
 class MDNode;
 class Pass;
+class OptimizationRemarkEmitter;
 class ScalarEvolution;
 
 bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                 bool AllowRuntime, bool AllowExpensiveTripCount,
-                unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
-                DominatorTree *DT, AssumptionCache *AC, bool PreserveLCSSA);
+                bool PreserveCondBr, bool PreserveOnlyFirst,
+                unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
+                ScalarEvolution *SE, DominatorTree *DT, AssumptionCache *AC,
+                OptimizationRemarkEmitter *ORE, bool PreserveLCSSA);
 
 bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 bool AllowExpensiveTripCount,
@@ -40,6 +46,12 @@ bool UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
                                 ScalarEvolution *SE, DominatorTree *DT,
                                 bool PreserveLCSSA);
 
+void computePeelCount(Loop *L, unsigned LoopSize,
+                      TargetTransformInfo::UnrollingPreferences &UP);
+
+bool peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI, ScalarEvolution *SE,
+              DominatorTree *DT, bool PreserveLCSSA);
+
 MDNode *GetUnrollMetadata(MDNode *LoopID, StringRef Name);
 }
 
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
index e6d3e8353307..2efc7ca4f8a1 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/LoopVectorize.h
@@ -58,6 +58,7 @@
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Function.h"
@@ -84,6 +85,7 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
   AliasAnalysis *AA;
   AssumptionCache *AC;
   std::function<const LoopAccessInfo &(Loop &)> *GetLAA;
+  OptimizationRemarkEmitter *ORE;
 
   BlockFrequency ColdEntryFreq;
 
@@ -94,7 +96,8 @@ struct LoopVectorizePass : public PassInfoMixin<LoopVectorizePass> {
                TargetTransformInfo &TTI_, DominatorTree &DT_,
                BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
                DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
-               std::function<const LoopAccessInfo &(Loop &)> &GetLAA_);
+               std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
+               OptimizationRemarkEmitter &ORE);
 
   bool processLoop(Loop *L);
 };
diff --git a/contrib/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h b/contrib/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
index 3a5b42411d35..d669a8e5b615 100644
--- a/contrib/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
+++ b/contrib/llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h
@@ -80,7 +80,7 @@ private:
   /// \returns true if a value was vectorized.
   bool tryToVectorizeList(ArrayRef<Value *> VL, slpvectorizer::BoUpSLP &R,
                           ArrayRef<Value *> BuildVector = None,
-                          bool allowReorder = false);
+                          bool AllowReorder = false);
 
   /// \brief Try to vectorize a chain that may start at the operands of \V;
   bool tryToVectorize(BinaryOperator *V, slpvectorizer::BoUpSLP &R);
@@ -92,15 +92,20 @@ private:
   /// collected in GEPs.
   bool vectorizeGEPIndices(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
 
+  /// Try to find horizontal reduction or otherwise vectorize a chain of binary
+  /// operators.
+  bool vectorizeRootInstruction(PHINode *P, Value *V, BasicBlock *BB,
+                                slpvectorizer::BoUpSLP &R,
+                                TargetTransformInfo *TTI);
+
   /// \brief Scan the basic block and look for patterns that are likely to start
   /// a vectorization chain.
   bool vectorizeChainsInBlock(BasicBlock *BB, slpvectorizer::BoUpSLP &R);
 
-  bool vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold,
-                           slpvectorizer::BoUpSLP &R, unsigned VecRegSize);
+  bool vectorizeStoreChain(ArrayRef<Value *> Chain, slpvectorizer::BoUpSLP &R,
+                           unsigned VecRegSize);
 
-  bool vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold,
-                       slpvectorizer::BoUpSLP &R);
+  bool vectorizeStores(ArrayRef<StoreInst *> Stores, slpvectorizer::BoUpSLP &R);
 
   /// The store instructions in a basic block organized by base pointer.
   StoreListMap Stores;
diff --git a/contrib/llvm/include/llvm/module.modulemap b/contrib/llvm/include/llvm/module.modulemap
index cc1895011dc3..a86bc7e7fcbf 100644
--- a/contrib/llvm/include/llvm/module.modulemap
+++ b/contrib/llvm/include/llvm/module.modulemap
@@ -18,9 +18,6 @@ module LLVM_Backend {
     umbrella "CodeGen"
     module * { export * }
 
-    // FIXME: Why is this excluded?
-    exclude header "CodeGen/MachineValueType.h"
-
     // Exclude these; they're intended to be included into only a single
     // translation unit (or none) and aren't part of this module.
     exclude header "CodeGen/CommandFlags.h"
@@ -84,6 +81,13 @@ module LLVM_DebugInfo_PDB_DIA {
   module * { export * }
 }
 
+module LLVM_DebugInfo_MSF {
+  requires cplusplus
+
+  umbrella "DebugInfo/MSF"
+  module * { export * }
+}
+
 module LLVM_DebugInfo_CodeView {
   requires cplusplus
 
@@ -109,6 +113,14 @@ module LLVM_ExecutionEngine {
   exclude header "ExecutionEngine/MCJIT.h"
   exclude header "ExecutionEngine/Interpreter.h"
   exclude header "ExecutionEngine/OrcMCJITReplacement.h"
+
+  // FIXME: These exclude directives were added as a workaround for
+  //        <rdar://problem/29247092> and should be removed once it is fixed.
+  exclude header "ExecutionEngine/Orc/RawByteChannel.h"
+  exclude header "ExecutionEngine/Orc/RPCUtils.h"
+  exclude header "ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
+  exclude header "ExecutionEngine/Orc/OrcRemoteTargetClient.h"
+  exclude header "ExecutionEngine/Orc/OrcRemoteTargetServer.h"
 }
 
 module LLVM_Pass {
@@ -218,9 +230,6 @@ module LLVM_Transforms {
   requires cplusplus
   umbrella "Transforms"
   module * { export * }
-
-  // FIXME: Excluded because it does bad things with the legacy pass manager.
-  exclude header "Transforms/IPO/PassManagerBuilder.h"
 }
 
 // A module covering ADT/ and Support/. These are intertwined and
@@ -245,9 +254,6 @@ module LLVM_Utils {
     // Exclude this; it's fundamentally non-modular.
     exclude header "Support/PluginLoader.h"
 
-    // FIXME: Mislayered?
-    exclude header "Support/TargetRegistry.h"
-
     // These are intended for textual inclusion.
     textual header "Support/ARMTargetParser.def"
     textual header "Support/AArch64TargetParser.def"
@@ -264,6 +270,7 @@ module LLVM_Utils {
     textual header "Support/ELFRelocs/Mips.def"
     textual header "Support/ELFRelocs/PowerPC64.def"
     textual header "Support/ELFRelocs/PowerPC.def"
+    textual header "Support/ELFRelocs/RISCV.def"
     textual header "Support/ELFRelocs/Sparc.def"
     textual header "Support/ELFRelocs/SystemZ.def"
     textual header "Support/ELFRelocs/x86_64.def"
diff --git a/contrib/llvm/include/llvm/module.modulemap.build b/contrib/llvm/include/llvm/module.modulemap.build
index 7150fe93935f..0f6f82af6e12 100644
--- a/contrib/llvm/include/llvm/module.modulemap.build
+++ b/contrib/llvm/include/llvm/module.modulemap.build
@@ -3,3 +3,7 @@ module LLVM_Support_DataTypes {
   header "Support/DataTypes.h"
   export *
 }
+module LLVM_Config_ABI_Breaking {
+  header "Config/abi-breaking.h"
+  export *
+}
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
index f931b6fc6523..84da76be98bb 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp
@@ -53,7 +53,8 @@ using namespace llvm;
 static cl::opt<bool> DisableBasicAA("disable-basicaa", cl::Hidden,
                                     cl::init(false));
 
-AAResults::AAResults(AAResults &&Arg) : TLI(Arg.TLI), AAs(std::move(Arg.AAs)) {
+AAResults::AAResults(AAResults &&Arg)
+    : TLI(Arg.TLI), AAs(std::move(Arg.AAs)), AADeps(std::move(Arg.AADeps)) {
   for (auto &AA : AAs)
     AA->setAAResults(this);
 }
@@ -69,6 +70,22 @@ AAResults::~AAResults() {
 #endif
 }
 
+bool AAResults::invalidate(Function &F, const PreservedAnalyses &PA,
+                           FunctionAnalysisManager::Invalidator &Inv) {
+  // Check if the AA manager itself has been invalidated.
+  auto PAC = PA.getChecker<AAManager>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+    return true; // The manager needs to be blown away, clear everything.
+
+  // Check all of the dependencies registered.
+  for (AnalysisKey *ID : AADeps)
+    if (Inv.invalidate(ID, F, PA))
+      return true;
+
+  // Everything we depend on is still fine, so are we. Nothing to invalidate.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Default chaining methods
 //===----------------------------------------------------------------------===//
@@ -141,7 +158,8 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
   // Try to refine the mod-ref info further using other API entry points to the
   // aggregate set of AA results.
   auto MRB = getModRefBehavior(CS);
-  if (MRB == FMRB_DoesNotAccessMemory)
+  if (MRB == FMRB_DoesNotAccessMemory ||
+      MRB == FMRB_OnlyAccessesInaccessibleMem)
     return MRI_NoModRef;
 
   if (onlyReadsMemory(MRB))
@@ -149,7 +167,7 @@ ModRefInfo AAResults::getModRefInfo(ImmutableCallSite CS,
   else if (doesNotReadMemory(MRB))
     Result = ModRefInfo(Result & MRI_Mod);
 
-  if (onlyAccessesArgPointees(MRB)) {
+  if (onlyAccessesArgPointees(MRB) || onlyAccessesInaccessibleOrArgMem(MRB)) {
     bool DoesAlias = false;
     ModRefInfo AllArgsMask = MRI_NoModRef;
     if (doesAccessArgPointees(MRB)) {
@@ -459,7 +477,8 @@ ModRefInfo AAResults::callCapturesBefore(const Instruction *I,
     // pointer were passed to arguments that were neither of these, then it
     // couldn't be no-capture.
     if (!(*CI)->getType()->isPointerTy() ||
-        (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo)))
+        (!CS.doesNotCapture(ArgNo) &&
+         ArgNo < CS.getNumArgOperands() && !CS.isByValArgument(ArgNo)))
       continue;
 
     // If this is a no-capture pointer argument, see if we can tell that it
@@ -512,7 +531,7 @@ bool AAResults::canInstructionRangeModRef(const Instruction &I1,
 AAResults::Concept::~Concept() {}
 
 // Provide a definition for the static object used to identify passes.
-char AAManager::PassID;
+AnalysisKey AAManager::Key;
 
 namespace {
 /// A wrapper pass for external alias analyses. This just squirrels away the
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
index baf8f3f881db..4d6a6c9a30aa 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -88,7 +88,7 @@ static inline bool isInterestingPointer(Value *V) {
       && !isa<ConstantPointerNull>(V);
 }
 
-PreservedAnalyses AAEvaluator::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses AAEvaluator::run(Function &F, FunctionAnalysisManager &AM) {
   runInternal(F, AM.getResult<AAManager>(F));
   return PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp
index f3f13df283db..2b4879453beb 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.cpp
@@ -7,25 +7,23 @@ namespace llvm {
 namespace cflaa {
 
 namespace {
-LLVM_CONSTEXPR unsigned AttrEscapedIndex = 0;
-LLVM_CONSTEXPR unsigned AttrUnknownIndex = 1;
-LLVM_CONSTEXPR unsigned AttrGlobalIndex = 2;
-LLVM_CONSTEXPR unsigned AttrCallerIndex = 3;
-LLVM_CONSTEXPR unsigned AttrFirstArgIndex = 4;
-LLVM_CONSTEXPR unsigned AttrLastArgIndex = NumAliasAttrs;
-LLVM_CONSTEXPR unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
+const unsigned AttrEscapedIndex = 0;
+const unsigned AttrUnknownIndex = 1;
+const unsigned AttrGlobalIndex = 2;
+const unsigned AttrCallerIndex = 3;
+const unsigned AttrFirstArgIndex = 4;
+const unsigned AttrLastArgIndex = NumAliasAttrs;
+const unsigned AttrMaxNumArgs = AttrLastArgIndex - AttrFirstArgIndex;
 
-// NOTE: These aren't AliasAttrs because bitsets don't have a constexpr
-// ctor for some versions of MSVC that we support. We could maybe refactor,
-// but...
+// It would be *slightly* prettier if we changed these to AliasAttrs, but it
+// seems that both GCC and MSVC emit dynamic initializers for const bitsets.
 using AliasAttr = unsigned;
-LLVM_CONSTEXPR AliasAttr AttrNone = 0;
-LLVM_CONSTEXPR AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
-LLVM_CONSTEXPR AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
-LLVM_CONSTEXPR AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
-LLVM_CONSTEXPR AliasAttr AttrCaller = 1 << AttrCallerIndex;
-LLVM_CONSTEXPR AliasAttr ExternalAttrMask =
-    AttrEscaped | AttrUnknown | AttrGlobal;
+const AliasAttr AttrNone = 0;
+const AliasAttr AttrEscaped = 1 << AttrEscapedIndex;
+const AliasAttr AttrUnknown = 1 << AttrUnknownIndex;
+const AliasAttr AttrGlobal = 1 << AttrGlobalIndex;
+const AliasAttr AttrCaller = 1 << AttrCallerIndex;
+const AliasAttr ExternalAttrMask = AttrEscaped | AttrUnknown | AttrGlobal;
 }
 
 AliasAttrs getAttrNone() { return AttrNone; }
@@ -91,7 +89,7 @@ instantiateExternalRelation(ExternalRelation ERelation, CallSite CS) {
   auto To = instantiateInterfaceValue(ERelation.To, CS);
   if (!To)
     return None;
-  return InstantiatedRelation{*From, *To};
+  return InstantiatedRelation{*From, *To, ERelation.Offset};
 }
 
 Optional<InstantiatedAttr> instantiateExternalAttribute(ExternalAttribute EAttr,
diff --git a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h
index 43c0d4cb14f9..51a85f4e7061 100644
--- a/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h
+++ b/contrib/llvm/lib/Analysis/AliasAnalysisSummary.h
@@ -99,7 +99,7 @@ AliasAttrs getExternallyVisibleAttrs(AliasAttrs);
 //===----------------------------------------------------------------------===//
 
 /// The maximum number of arguments we can put into a summary.
-LLVM_CONSTEXPR static unsigned MaxSupportedArgsInSummary = 50;
+static const unsigned MaxSupportedArgsInSummary = 50;
 
 /// We use InterfaceValue to describe parameters/return value, as well as
 /// potential memory locations that are pointed to by parameters/return value,
@@ -120,13 +120,66 @@ inline bool operator==(InterfaceValue LHS, InterfaceValue RHS) {
 inline bool operator!=(InterfaceValue LHS, InterfaceValue RHS) {
   return !(LHS == RHS);
 }
+inline bool operator<(InterfaceValue LHS, InterfaceValue RHS) {
+  return LHS.Index < RHS.Index ||
+         (LHS.Index == RHS.Index && LHS.DerefLevel < RHS.DerefLevel);
+}
+inline bool operator>(InterfaceValue LHS, InterfaceValue RHS) {
+  return RHS < LHS;
+}
+inline bool operator<=(InterfaceValue LHS, InterfaceValue RHS) {
+  return !(RHS < LHS);
+}
+inline bool operator>=(InterfaceValue LHS, InterfaceValue RHS) {
+  return !(LHS < RHS);
+}
+
+// We use UnknownOffset to represent pointer offsets that cannot be determined
+// at compile time. Note that MemoryLocation::UnknownSize cannot be used here
+// because we require a signed value.
+static const int64_t UnknownOffset = INT64_MAX;
+
+inline int64_t addOffset(int64_t LHS, int64_t RHS) {
+  if (LHS == UnknownOffset || RHS == UnknownOffset)
+    return UnknownOffset;
+  // FIXME: Do we need to guard against integer overflow here?
+  return LHS + RHS;
+}
 
 /// We use ExternalRelation to describe an externally visible aliasing relations
 /// between parameters/return value of a function.
 struct ExternalRelation {
   InterfaceValue From, To;
+  int64_t Offset;
 };
 
+inline bool operator==(ExternalRelation LHS, ExternalRelation RHS) {
+  return LHS.From == RHS.From && LHS.To == RHS.To && LHS.Offset == RHS.Offset;
+}
+inline bool operator!=(ExternalRelation LHS, ExternalRelation RHS) {
+  return !(LHS == RHS);
+}
+inline bool operator<(ExternalRelation LHS, ExternalRelation RHS) {
+  if (LHS.From < RHS.From)
+    return true;
+  if (LHS.From > RHS.From)
+    return false;
+  if (LHS.To < RHS.To)
+    return true;
+  if (LHS.To > RHS.To)
+    return false;
+  return LHS.Offset < RHS.Offset;
+}
+inline bool operator>(ExternalRelation LHS, ExternalRelation RHS) {
+  return RHS < LHS;
+}
+inline bool operator<=(ExternalRelation LHS, ExternalRelation RHS) {
+  return !(RHS < LHS);
+}
+inline bool operator>=(ExternalRelation LHS, ExternalRelation RHS) {
+  return !(LHS < RHS);
+}
+
 /// We use ExternalAttribute to describe an externally visible AliasAttrs
 /// for parameters/return value.
 struct ExternalAttribute {
@@ -174,6 +227,7 @@ inline bool operator>=(InstantiatedValue LHS, InstantiatedValue RHS) {
 /// callsite
 struct InstantiatedRelation {
   InstantiatedValue From, To;
+  int64_t Offset;
 };
 Optional<InstantiatedRelation> instantiateExternalRelation(ExternalRelation,
                                                            CallSite);
diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
index d349ac51a9b9..701b0e1a5925 100644
--- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
+++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp
@@ -26,12 +26,19 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
+static cl::opt<unsigned>
+    SaturationThreshold("alias-set-saturation-threshold", cl::Hidden,
+                        cl::init(250),
+                        cl::desc("The maximum number of pointers may-alias "
+                                 "sets may contain before degradation"));
+
 /// mergeSetIn - Merge the specified alias set into this alias set.
 ///
 void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
   assert(!AS.Forward && "Alias set is already forwarding!");
   assert(!Forward && "This set is a forwarding set!!");
 
+  bool WasMustAlias = (Alias == SetMustAlias);
   // Update the alias and access types of this set...
   Access |= AS.Access;
   Alias  |= AS.Alias;
@@ -52,6 +59,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
       Alias = SetMayAlias;
   }
 
+  if (Alias == SetMayAlias) {
+    if (WasMustAlias)
+      AST.TotalMayAliasSetSize += size();
+    if (AS.Alias == SetMustAlias)
+      AST.TotalMayAliasSetSize += AS.size();
+  }
+
   bool ASHadUnknownInsts = !AS.UnknownInsts.empty();
   if (UnknownInsts.empty()) {            // Merge call sites...
     if (ASHadUnknownInsts) {
@@ -63,11 +77,13 @@ void AliasSet::mergeSetIn(AliasSet &AS, AliasSetTracker &AST) {
     AS.UnknownInsts.clear();
   }
 
-  AS.Forward = this;  // Forward across AS now...
-  addRef();           // AS is now pointing to us...
+  AS.Forward = this; // Forward across AS now...
+  addRef();          // AS is now pointing to us...
 
   // Merge the list of constituent pointers...
   if (AS.PtrList) {
+    SetSize += AS.size();
+    AS.SetSize = 0;
     *PtrListEnd = AS.PtrList;
     AS.PtrList->setPrevInList(PtrListEnd);
     PtrListEnd = AS.PtrListEnd;
@@ -85,7 +101,12 @@ void AliasSetTracker::removeAliasSet(AliasSet *AS) {
     Fwd->dropRef(*this);
     AS->Forward = nullptr;
   }
+
+  if (AS->Alias == AliasSet::SetMayAlias)
+    TotalMayAliasSetSize -= AS->size();
+
   AliasSets.erase(AS);
+
 }
 
 void AliasSet::removeFromTracker(AliasSetTracker &AST) {
@@ -105,10 +126,13 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
       AliasResult Result =
           AA.alias(MemoryLocation(P->getValue(), P->getSize(), P->getAAInfo()),
                    MemoryLocation(Entry.getValue(), Size, AAInfo));
-      if (Result != MustAlias)
+      if (Result != MustAlias) {
         Alias = SetMayAlias;
-      else                  // First entry of must alias must have maximum size!
+        AST.TotalMayAliasSetSize += size();
+      } else {
+        // First entry of must alias must have maximum size!        
         P->updateSizeAndAAInfo(Size, AAInfo);
+      }
       assert(Result != NoAlias && "Cannot be part of must set!");
     }
 
@@ -116,11 +140,16 @@ void AliasSet::addPointer(AliasSetTracker &AST, PointerRec &Entry,
   Entry.updateSizeAndAAInfo(Size, AAInfo);
 
   // Add it to the end of the list...
+  ++SetSize;
   assert(*PtrListEnd == nullptr && "End of list is not null?");
   *PtrListEnd = &Entry;
   PtrListEnd = Entry.setPrevInList(PtrListEnd);
   assert(*PtrListEnd == nullptr && "End of list is not null?");
-  addRef();               // Entry points to alias set.
+  // Entry points to alias set.
+  addRef();
+
+  if (Alias == SetMayAlias)
+    AST.TotalMayAliasSetSize++;
 }
 
 void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
@@ -145,6 +174,9 @@ void AliasSet::addUnknownInst(Instruction *I, AliasAnalysis &AA) {
 bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
                               const AAMDNodes &AAInfo,
                               AliasAnalysis &AA) const {
+  if (AliasAny)
+    return true;
+
   if (Alias == SetMustAlias) {
     assert(UnknownInsts.empty() && "Illegal must alias set!");
 
@@ -177,6 +209,10 @@ bool AliasSet::aliasesPointer(const Value *Ptr, uint64_t Size,
 
 bool AliasSet::aliasesUnknownInst(const Instruction *Inst,
                                   AliasAnalysis &AA) const {
+
+  if (AliasAny)
+    return true;
+
   if (!Inst->mayReadOrWriteMemory())
     return false;
 
@@ -229,17 +265,6 @@ AliasSet *AliasSetTracker::mergeAliasSetsForPointer(const Value *Ptr,
   return FoundSet;
 }
 
-/// containsPointer - Return true if the specified location is represented by
-/// this alias set, false otherwise.  This does not modify the AST object or
-/// alias sets.
-bool AliasSetTracker::containsPointer(const Value *Ptr, uint64_t Size,
-                                      const AAMDNodes &AAInfo) const {
-  for (const AliasSet &AS : *this)
-    if (!AS.Forward && AS.aliasesPointer(Ptr, Size, AAInfo, AA))
-      return true;
-  return false;
-}
-
 bool AliasSetTracker::containsUnknown(const Instruction *Inst) const {
   for (const AliasSet &AS : *this)
     if (!AS.Forward && AS.aliasesUnknownInst(Inst, AA))
@@ -261,16 +286,28 @@ AliasSet *AliasSetTracker::findAliasSetForUnknownInst(Instruction *Inst) {
   return FoundSet;
 }
 
-
-
-
 /// getAliasSetForPointer - Return the alias set that the specified pointer
 /// lives in.
 AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
-                                                 const AAMDNodes &AAInfo,
-                                                 bool *New) {
+                                                 const AAMDNodes &AAInfo) {
   AliasSet::PointerRec &Entry = getEntryFor(Pointer);
 
+  if (AliasAnyAS) {
+    // At this point, the AST is saturated, so we only have one active alias
+    // set. That means we already know which alias set we want to return, and
+    // just need to add the pointer to that set to keep the data structure
+    // consistent.
+    // This, of course, means that we will never need a merge here.
+    if (Entry.hasAliasSet()) {
+      Entry.updateSizeAndAAInfo(Size, AAInfo);
+      assert(Entry.getAliasSet(*this) == AliasAnyAS &&
+             "Entry in saturated AST must belong to only alias set");
+    } else {
+      AliasAnyAS->addPointer(*this, Entry, Size, AAInfo);
+    }
+    return *AliasAnyAS;
+  }
+
   // Check to see if the pointer is already known.
   if (Entry.hasAliasSet()) {
     // If the size changed, we may need to merge several alias sets.
@@ -290,68 +327,55 @@ AliasSet &AliasSetTracker::getAliasSetForPointer(Value *Pointer, uint64_t Size,
     return *AS;
   }
   
-  if (New) *New = true;
   // Otherwise create a new alias set to hold the loaded pointer.
   AliasSets.push_back(new AliasSet());
   AliasSets.back().addPointer(*this, Entry, Size, AAInfo);
   return AliasSets.back();
 }
 
-bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
-  bool NewPtr;
-  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess, NewPtr);
-  return NewPtr;
+void AliasSetTracker::add(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
+  addPointer(Ptr, Size, AAInfo, AliasSet::NoAccess);
 }
 
-
-bool AliasSetTracker::add(LoadInst *LI) {
+void AliasSetTracker::add(LoadInst *LI) {
   if (isStrongerThanMonotonic(LI->getOrdering())) return addUnknown(LI);
 
   AAMDNodes AAInfo;
   LI->getAAMetadata(AAInfo);
 
   AliasSet::AccessLattice Access = AliasSet::RefAccess;
-  bool NewPtr;
   const DataLayout &DL = LI->getModule()->getDataLayout();
   AliasSet &AS = addPointer(LI->getOperand(0),
-                            DL.getTypeStoreSize(LI->getType()),
-                            AAInfo, Access, NewPtr);
+                            DL.getTypeStoreSize(LI->getType()), AAInfo, Access);
   if (LI->isVolatile()) AS.setVolatile();
-  return NewPtr;
 }
 
-bool AliasSetTracker::add(StoreInst *SI) {
+void AliasSetTracker::add(StoreInst *SI) {
   if (isStrongerThanMonotonic(SI->getOrdering())) return addUnknown(SI);
 
   AAMDNodes AAInfo;
   SI->getAAMetadata(AAInfo);
 
   AliasSet::AccessLattice Access = AliasSet::ModAccess;
-  bool NewPtr;
   const DataLayout &DL = SI->getModule()->getDataLayout();
   Value *Val = SI->getOperand(0);
-  AliasSet &AS = addPointer(SI->getOperand(1),
-                            DL.getTypeStoreSize(Val->getType()),
-                            AAInfo, Access, NewPtr);
+  AliasSet &AS = addPointer(
+      SI->getOperand(1), DL.getTypeStoreSize(Val->getType()), AAInfo, Access);
   if (SI->isVolatile()) AS.setVolatile();
-  return NewPtr;
 }
 
-bool AliasSetTracker::add(VAArgInst *VAAI) {
+void AliasSetTracker::add(VAArgInst *VAAI) {
   AAMDNodes AAInfo;
   VAAI->getAAMetadata(AAInfo);
 
-  bool NewPtr;
   addPointer(VAAI->getOperand(0), MemoryLocation::UnknownSize, AAInfo,
-             AliasSet::ModRefAccess, NewPtr);
-  return NewPtr;
+             AliasSet::ModRefAccess);
 }
 
-bool AliasSetTracker::add(MemSetInst *MSI) {
+void AliasSetTracker::add(MemSetInst *MSI) {
   AAMDNodes AAInfo;
   MSI->getAAMetadata(AAInfo);
 
-  bool NewPtr;
   uint64_t Len;
 
   if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
@@ -360,30 +384,61 @@ bool AliasSetTracker::add(MemSetInst *MSI) {
     Len = MemoryLocation::UnknownSize;
 
   AliasSet &AS =
-      addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess, NewPtr);
+      addPointer(MSI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
   if (MSI->isVolatile())
     AS.setVolatile();
-  return NewPtr;
 }
 
-bool AliasSetTracker::addUnknown(Instruction *Inst) {
-  if (isa<DbgInfoIntrinsic>(Inst)) 
-    return true; // Ignore DbgInfo Intrinsics.
+void AliasSetTracker::add(MemTransferInst *MTI) {
+  AAMDNodes AAInfo;
+  MTI->getAAMetadata(AAInfo);
+
+  uint64_t Len;
+  if (ConstantInt *C = dyn_cast<ConstantInt>(MTI->getLength()))
+    Len = C->getZExtValue();
+  else
+    Len = MemoryLocation::UnknownSize;
+
+  AliasSet &ASSrc =
+      addPointer(MTI->getRawSource(), Len, AAInfo, AliasSet::RefAccess);
+  if (MTI->isVolatile())
+    ASSrc.setVolatile();
+
+  AliasSet &ASDst =
+      addPointer(MTI->getRawDest(), Len, AAInfo, AliasSet::ModAccess);
+  if (MTI->isVolatile())
+    ASDst.setVolatile();
+}
+
+void AliasSetTracker::addUnknown(Instruction *Inst) {
+  if (isa<DbgInfoIntrinsic>(Inst))
+    return; // Ignore DbgInfo Intrinsics.
+
+  if (auto *II = dyn_cast<IntrinsicInst>(Inst)) {
+    // These intrinsics will show up as affecting memory, but they are just
+    // markers.
+    switch (II->getIntrinsicID()) {
+    default:
+      break;
+      // FIXME: Add lifetime/invariant intrinsics (See: PR30807).
+    case Intrinsic::assume:
+      return;
+    }
+  }
   if (!Inst->mayReadOrWriteMemory())
-    return true; // doesn't alias anything
+    return; // doesn't alias anything
 
   AliasSet *AS = findAliasSetForUnknownInst(Inst);
   if (AS) {
     AS->addUnknownInst(Inst, AA);
-    return false;
+    return;
   }
   AliasSets.push_back(new AliasSet());
   AS = &AliasSets.back();
   AS->addUnknownInst(Inst, AA);
-  return true;
 }
 
-bool AliasSetTracker::add(Instruction *I) {
+void AliasSetTracker::add(Instruction *I) {
   // Dispatch to one of the other add methods.
   if (LoadInst *LI = dyn_cast<LoadInst>(I))
     return add(LI);
@@ -393,8 +448,9 @@ bool AliasSetTracker::add(Instruction *I) {
     return add(VAAI);
   if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
     return add(MSI);
+  if (MemTransferInst *MTI = dyn_cast<MemTransferInst>(I))
+    return add(MTI);
   return addUnknown(I);
-  // FIXME: add support of memcpy and memmove. 
 }
 
 void AliasSetTracker::add(BasicBlock &BB) {
@@ -418,134 +474,15 @@ void AliasSetTracker::add(const AliasSetTracker &AST) {
       add(AS.UnknownInsts[i]);
 
     // Loop over all of the pointers in this alias set.
-    bool X;
     for (AliasSet::iterator ASI = AS.begin(), E = AS.end(); ASI != E; ++ASI) {
-      AliasSet &NewAS = addPointer(ASI.getPointer(), ASI.getSize(),
-                                   ASI.getAAInfo(),
-                                   (AliasSet::AccessLattice)AS.Access, X);
+      AliasSet &NewAS =
+          addPointer(ASI.getPointer(), ASI.getSize(), ASI.getAAInfo(),
+                     (AliasSet::AccessLattice)AS.Access);
       if (AS.isVolatile()) NewAS.setVolatile();
     }
   }
 }
 
-/// remove - Remove the specified (potentially non-empty) alias set from the
-/// tracker.
-void AliasSetTracker::remove(AliasSet &AS) {
-  // Drop all call sites.
-  if (!AS.UnknownInsts.empty())
-    AS.dropRef(*this);
-  AS.UnknownInsts.clear();
-  
-  // Clear the alias set.
-  unsigned NumRefs = 0;
-  while (!AS.empty()) {
-    AliasSet::PointerRec *P = AS.PtrList;
-
-    Value *ValToRemove = P->getValue();
-    
-    // Unlink and delete entry from the list of values.
-    P->eraseFromList();
-    
-    // Remember how many references need to be dropped.
-    ++NumRefs;
-
-    // Finally, remove the entry.
-    PointerMap.erase(ValToRemove);
-  }
-  
-  // Stop using the alias set, removing it.
-  AS.RefCount -= NumRefs;
-  if (AS.RefCount == 0)
-    AS.removeFromTracker(*this);
-}
-
-bool
-AliasSetTracker::remove(Value *Ptr, uint64_t Size, const AAMDNodes &AAInfo) {
-  AliasSet *AS = mergeAliasSetsForPointer(Ptr, Size, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(LoadInst *LI) {
-  const DataLayout &DL = LI->getModule()->getDataLayout();
-  uint64_t Size = DL.getTypeStoreSize(LI->getType());
-
-  AAMDNodes AAInfo;
-  LI->getAAMetadata(AAInfo);
-
-  AliasSet *AS = mergeAliasSetsForPointer(LI->getOperand(0), Size, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(StoreInst *SI) {
-  const DataLayout &DL = SI->getModule()->getDataLayout();
-  uint64_t Size = DL.getTypeStoreSize(SI->getOperand(0)->getType());
-
-  AAMDNodes AAInfo;
-  SI->getAAMetadata(AAInfo);
-
-  AliasSet *AS = mergeAliasSetsForPointer(SI->getOperand(1), Size, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(VAArgInst *VAAI) {
-  AAMDNodes AAInfo;
-  VAAI->getAAMetadata(AAInfo);
-
-  AliasSet *AS = mergeAliasSetsForPointer(VAAI->getOperand(0),
-                                          MemoryLocation::UnknownSize, AAInfo);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(MemSetInst *MSI) {
-  AAMDNodes AAInfo;
-  MSI->getAAMetadata(AAInfo);
-  uint64_t Len;
-
-  if (ConstantInt *C = dyn_cast<ConstantInt>(MSI->getLength()))
-    Len = C->getZExtValue();
-  else
-    Len = MemoryLocation::UnknownSize;
-
-  AliasSet *AS = mergeAliasSetsForPointer(MSI->getRawDest(), Len, AAInfo);
-  if (!AS)
-    return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::removeUnknown(Instruction *I) {
-  if (!I->mayReadOrWriteMemory())
-    return false; // doesn't alias anything
-
-  AliasSet *AS = findAliasSetForUnknownInst(I);
-  if (!AS) return false;
-  remove(*AS);
-  return true;
-}
-
-bool AliasSetTracker::remove(Instruction *I) {
-  // Dispatch to one of the other remove methods...
-  if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    return remove(LI);
-  if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return remove(SI);
-  if (VAArgInst *VAAI = dyn_cast<VAArgInst>(I))
-    return remove(VAAI);
-  if (MemSetInst *MSI = dyn_cast<MemSetInst>(I))
-    return remove(MSI);
-  return removeUnknown(I);
-  // FIXME: add support of memcpy and memmove. 
-}
-
-
 // deleteValue method - This method is used to remove a pointer value from the
 // AliasSetTracker entirely.  It should be used when an instruction is deleted
 // from the program to update the AST.  If you don't use this, you would have
@@ -575,6 +512,11 @@ void AliasSetTracker::deleteValue(Value *PtrVal) {
 
   // Unlink and delete from the list of values.
   PtrValEnt->eraseFromList();
+
+  if (AS->Alias == AliasSet::SetMayAlias) {
+    AS->SetSize--;
+    TotalMayAliasSetSize--;
+  }
   
   // Stop using the alias set.
   AS->dropRef(*this);
@@ -597,15 +539,68 @@ void AliasSetTracker::copyValue(Value *From, Value *To) {
   AliasSet::PointerRec &Entry = getEntryFor(To);
   if (Entry.hasAliasSet()) return;    // Already in the tracker!
 
-  // Add it to the alias set it aliases...
+  // getEntryFor above may invalidate iterator \c I, so reinitialize it.
   I = PointerMap.find_as(From);
+  // Add it to the alias set it aliases...
   AliasSet *AS = I->second->getAliasSet(*this);
   AS->addPointer(*this, Entry, I->second->getSize(),
                  I->second->getAAInfo(),
                  true);
 }
 
+AliasSet &AliasSetTracker::mergeAllAliasSets() {
+  assert(!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold) &&
+         "Full merge should happen once, when the saturation threshold is "
+         "reached");
+
+  // Collect all alias sets, so that we can drop references with impunity
+  // without worrying about iterator invalidation.
+  std::vector<AliasSet *> ASVector;
+  ASVector.reserve(SaturationThreshold);
+  for (iterator I = begin(), E = end(); I != E; I++)
+    ASVector.push_back(&*I);
+
+  // Copy all instructions and pointers into a new set, and forward all other
+  // sets to it.
+  AliasSets.push_back(new AliasSet());
+  AliasAnyAS = &AliasSets.back();
+  AliasAnyAS->Alias = AliasSet::SetMayAlias;
+  AliasAnyAS->Access = AliasSet::ModRefAccess;
+  AliasAnyAS->AliasAny = true;
+
+  for (auto Cur : ASVector) {
+    
+    // If Cur was already forwarding, just forward to the new AS instead.
+    AliasSet *FwdTo = Cur->Forward;
+    if (FwdTo) {
+      Cur->Forward = AliasAnyAS;
+      AliasAnyAS->addRef();      
+      FwdTo->dropRef(*this);
+      continue;
+    }
+
+    // Otherwise, perform the actual merge.
+    AliasAnyAS->mergeSetIn(*Cur, *this);
+  }
+
+  return *AliasAnyAS;
+}
+
+AliasSet &AliasSetTracker::addPointer(Value *P, uint64_t Size,
+                                      const AAMDNodes &AAInfo,
+                                      AliasSet::AccessLattice E) {
+
+  AliasSet &AS = getAliasSetForPointer(P, Size, AAInfo);
+  AS.Access |= E;
+
+  if (!AliasAnyAS && (TotalMayAliasSetSize > SaturationThreshold)) {
+    // The AST is now saturated. From here on, we conservatively consider all
+    // pointers to alias each-other.
+    return mergeAllAliasSets();
+  }
 
+  return AS;
+}
 
 //===----------------------------------------------------------------------===//
 //               AliasSet/AliasSetTracker Printing Support
@@ -700,7 +695,7 @@ namespace {
     bool runOnFunction(Function &F) override {
       auto &AAWP = getAnalysis<AAResultsWrapperPass>();
       Tracker = new AliasSetTracker(AAWP.getAAResults());
-
+      errs() << "Alias sets for function '" << F.getName() << "':\n";
       for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I)
         Tracker->add(&*I);
       Tracker->print(errs());
diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp
index c04447ca58c9..0e7cf402cdb5 100644
--- a/contrib/llvm/lib/Analysis/Analysis.cpp
+++ b/contrib/llvm/lib/Analysis/Analysis.cpp
@@ -30,10 +30,10 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeCallGraphPrinterLegacyPassPass(Registry);
   initializeCallGraphViewerPass(Registry);
   initializeCostModelAnalysisPass(Registry);
-  initializeCFGViewerPass(Registry);
-  initializeCFGPrinterPass(Registry);
-  initializeCFGOnlyViewerPass(Registry);
-  initializeCFGOnlyPrinterPass(Registry);
+  initializeCFGViewerLegacyPassPass(Registry);
+  initializeCFGPrinterLegacyPassPass(Registry);
+  initializeCFGOnlyViewerLegacyPassPass(Registry);
+  initializeCFGOnlyPrinterLegacyPassPass(Registry);
   initializeCFLAndersAAWrapperPassPass(Registry);
   initializeCFLSteensAAWrapperPassPass(Registry);
   initializeDependenceAnalysisWrapperPassPass(Registry);
@@ -54,6 +54,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeIVUsersWrapperPassPass(Registry);
   initializeInstCountPass(Registry);
   initializeIntervalPartitionPass(Registry);
+  initializeLazyBranchProbabilityInfoPassPass(Registry);
   initializeLazyBlockFrequencyInfoPassPass(Registry);
   initializeLazyValueInfoWrapperPassPass(Registry);
   initializeLintPass(Registry);
@@ -76,6 +77,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) {
   initializeTargetTransformInfoWrapperPassPass(Registry);
   initializeTypeBasedAAWrapperPassPass(Registry);
   initializeScopedNoAliasAAWrapperPassPass(Registry);
+  initializeLCSSAVerificationPassPass(Registry);
 }
 
 void LLVMInitializeAnalysis(LLVMPassRegistryRef R) {
diff --git a/contrib/llvm/lib/Analysis/AssumptionCache.cpp b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
index ca71644757f0..3c518034ba62 100644
--- a/contrib/llvm/lib/Analysis/AssumptionCache.cpp
+++ b/contrib/llvm/lib/Analysis/AssumptionCache.cpp
@@ -74,10 +74,10 @@ void AssumptionCache::registerAssumption(CallInst *CI) {
 #endif
 }
 
-char AssumptionAnalysis::PassID;
+AnalysisKey AssumptionAnalysis::Key;
 
 PreservedAnalyses AssumptionPrinterPass::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   AssumptionCache &AC = AM.getResult<AssumptionAnalysis>(F);
 
   OS << "Cached assumptions for function: " << F.getName() << "\n";
diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 43d5c3ccf907..e8d86bdbca5b 100644
--- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -63,6 +63,21 @@ const unsigned MaxNumPhiBBsValueReachabilityCheck = 20;
 // depth otherwise the algorithm in aliasGEP will assert.
 static const unsigned MaxLookupSearchDepth = 6;
 
+bool BasicAAResult::invalidate(Function &F, const PreservedAnalyses &PA,
+                               FunctionAnalysisManager::Invalidator &Inv) {
+  // We don't care if this analysis itself is preserved, it has no state. But
+  // we need to check that the analyses it depends on have been. Note that we
+  // may be created without handles to some analyses and in that case don't
+  // depend on them.
+  if (Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+      (DT && Inv.invalidate<DominatorTreeAnalysis>(F, PA)) ||
+      (LI && Inv.invalidate<LoopAnalysis>(F, PA)))
+    return true;
+
+  // Otherwise this analysis result remains valid.
+  return false;
+}
+
 //===----------------------------------------------------------------------===//
 // Useful predicates
 //===----------------------------------------------------------------------===//
@@ -227,7 +242,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
           Offset = 0;
           return V;
         }
-      // FALL THROUGH.
+        LLVM_FALLTHROUGH;
       case Instruction::Add:
         V = GetLinearExpression(BOp->getOperand(0), Scale, Offset, ZExtBits,
                                 SExtBits, DL, Depth + 1, AC, DT, NSW, NUW);
@@ -275,7 +290,7 @@ static bool isObjectSize(const Value *V, uint64_t Size, const DataLayout &DL,
         GetLinearExpression(CastOp, Scale, Offset, ZExtBits, SExtBits, DL,
                             Depth + 1, AC, DT, NSW, NUW);
 
-    // zext(zext(%x)) == zext(%x), and similiarly for sext; we'll handle this
+    // zext(zext(%x)) == zext(%x), and similarly for sext; we'll handle this
     // by just incrementing the number of bits we've extended by.
     unsigned ExtendedBy = NewWidth - SmallWidth;
 
@@ -409,11 +424,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
     // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices.
     gep_type_iterator GTI = gep_type_begin(GEPOp);
     unsigned PointerSize = DL.getPointerSizeInBits(AS);
+    // Assume all GEP operands are constants until proven otherwise.
+    bool GepHasConstantOffset = true;
     for (User::const_op_iterator I = GEPOp->op_begin() + 1, E = GEPOp->op_end();
-         I != E; ++I) {
+         I != E; ++I, ++GTI) {
       const Value *Index = *I;
       // Compute the (potentially symbolic) offset in bytes for this index.
-      if (StructType *STy = dyn_cast<StructType>(*GTI++)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         // For a struct, add the member offset.
         unsigned FieldNo = cast<ConstantInt>(Index)->getZExtValue();
         if (FieldNo == 0)
@@ -429,11 +446,13 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
         if (CIdx->isZero())
           continue;
         Decomposed.OtherOffset +=
-          DL.getTypeAllocSize(*GTI) * CIdx->getSExtValue();
+          DL.getTypeAllocSize(GTI.getIndexedType()) * CIdx->getSExtValue();
         continue;
       }
 
-      uint64_t Scale = DL.getTypeAllocSize(*GTI);
+      GepHasConstantOffset = false;
+
+      uint64_t Scale = DL.getTypeAllocSize(GTI.getIndexedType());
       unsigned ZExtBits = 0, SExtBits = 0;
 
       // If the integer type is smaller than the pointer size, it is implicitly
@@ -458,7 +477,7 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
       //   A[x][x] -> x*16 + x*4 -> x*20
       // This also ensures that 'x' only appears in the index list once.
       for (unsigned i = 0, e = Decomposed.VarIndices.size(); i != e; ++i) {
-        if (Decomposed.VarIndices[i].V == Index && 
+        if (Decomposed.VarIndices[i].V == Index &&
             Decomposed.VarIndices[i].ZExtBits == ZExtBits &&
             Decomposed.VarIndices[i].SExtBits == SExtBits) {
           Scale += Decomposed.VarIndices[i].Scale;
@@ -479,10 +498,12 @@ bool BasicAAResult::DecomposeGEPExpression(const Value *V,
     }
 
     // Take care of wrap-arounds
-    Decomposed.StructOffset =
-      adjustToPointerSize(Decomposed.StructOffset, PointerSize);
-    Decomposed.OtherOffset =
-      adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+    if (GepHasConstantOffset) {
+      Decomposed.StructOffset =
+          adjustToPointerSize(Decomposed.StructOffset, PointerSize);
+      Decomposed.OtherOffset =
+          adjustToPointerSize(Decomposed.OtherOffset, PointerSize);
+    }
 
     // Analyze the base pointer next.
     V = GEPOp->getOperand(0);
@@ -603,6 +624,10 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
 
   if (F->onlyAccessesArgMemory())
     Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesArgumentPointees);
+  else if (F->onlyAccessesInaccessibleMemory())
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleMem);
+  else if (F->onlyAccessesInaccessibleMemOrArgMem())
+    Min = FunctionModRefBehavior(Min & FMRB_OnlyAccessesInaccessibleOrArgMem);
 
   return Min;
 }
@@ -732,7 +757,8 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
       // pointer were passed to arguments that were neither of these, then it
       // couldn't be no-capture.
       if (!(*CI)->getType()->isPointerTy() ||
-          (!CS.doesNotCapture(OperandNo) && !CS.isByValArgument(OperandNo)))
+          (!CS.doesNotCapture(OperandNo) &&
+           OperandNo < CS.getNumArgOperands() && !CS.isByValArgument(OperandNo)))
         continue;
 
       // If this is a no-capture pointer argument, see if we can tell that it
@@ -765,6 +791,31 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
       return MRI_NoModRef;
   }
 
+  // The semantics of memcpy intrinsics forbid overlap between their respective
+  // operands, i.e., source and destination of any given memcpy must no-alias.
+  // If Loc must-aliases either one of these two locations, then it necessarily
+  // no-aliases the other.
+  if (auto *Inst = dyn_cast<MemCpyInst>(CS.getInstruction())) {
+    AliasResult SrcAA, DestAA;
+
+    if ((SrcAA = getBestAAResults().alias(MemoryLocation::getForSource(Inst),
+                                          Loc)) == MustAlias)
+      // Loc is exactly the memcpy source thus disjoint from memcpy dest.
+      return MRI_Ref;
+    if ((DestAA = getBestAAResults().alias(MemoryLocation::getForDest(Inst),
+                                           Loc)) == MustAlias)
+      // The converse case.
+      return MRI_Mod;
+
+    // It's also possible for Loc to alias both src and dest, or neither.
+    ModRefInfo rv = MRI_NoModRef;
+    if (SrcAA != NoAlias)
+      rv = static_cast<ModRefInfo>(rv | MRI_Ref);
+    if (DestAA != NoAlias)
+      rv = static_cast<ModRefInfo>(rv | MRI_Mod);
+    return rv;
+  }
+
   // While the assume intrinsic is marked as arbitrarily writing so that
   // proper control dependencies will be maintained, it never aliases any
   // particular memory location.
@@ -781,6 +832,32 @@ ModRefInfo BasicAAResult::getModRefInfo(ImmutableCallSite CS,
   if (isIntrinsicCall(CS, Intrinsic::experimental_guard))
     return MRI_Ref;
 
+  // Like assumes, invariant.start intrinsics were also marked as arbitrarily
+  // writing so that proper control dependencies are maintained but they never
+  // mod any particular memory location visible to the IR.
+  // *Unlike* assumes (which are now modeled as NoModRef), invariant.start
+  // intrinsic is now modeled as reading memory. This prevents hoisting the
+  // invariant.start intrinsic over stores. Consider:
+  // *ptr = 40;
+  // *ptr = 50;
+  // invariant_start(ptr)
+  // int val = *ptr;
+  // print(val);
+  //
+  // This cannot be transformed to:
+  //
+  // *ptr = 40;
+  // invariant_start(ptr)
+  // *ptr = 50;
+  // int val = *ptr;
+  // print(val);
+  //
+  // The transformation will cause the second store to be ignored (based on
+  // rules of invariant.start)  and print 40, while the first program always
+  // prints 50.
+  if (isIntrinsicCall(CS, Intrinsic::invariant_start))
+    return MRI_Ref;
+
   // The AAResultBase base class has some smarts, lets use them.
   return AAResultBase::getModRefInfo(CS, Loc);
 }
@@ -1114,7 +1191,8 @@ AliasResult BasicAAResult::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size,
       return MayAlias;
 
     AliasResult R = aliasCheck(UnderlyingV1, MemoryLocation::UnknownSize,
-                               AAMDNodes(), V2, V2Size, V2AAInfo);
+                               AAMDNodes(), V2, V2Size, V2AAInfo,
+                               nullptr, UnderlyingV2);
     if (R != MustAlias)
       // If V2 may alias GEP base pointer, conservatively returns MayAlias.
       // If V2 is known not to alias GEP base pointer, then the two values
@@ -1251,7 +1329,8 @@ static AliasResult MergeAliasResults(AliasResult A, AliasResult B) {
 AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
                                        const AAMDNodes &SIAAInfo,
                                        const Value *V2, uint64_t V2Size,
-                                       const AAMDNodes &V2AAInfo) {
+                                       const AAMDNodes &V2AAInfo,
+                                       const Value *UnderV2) {
   // If the values are Selects with the same condition, we can do a more precise
   // check: just check for aliases between the values on corresponding arms.
   if (const SelectInst *SI2 = dyn_cast<SelectInst>(V2))
@@ -1269,12 +1348,14 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
   // If both arms of the Select node NoAlias or MustAlias V2, then returns
   // NoAlias / MustAlias. Otherwise, returns MayAlias.
   AliasResult Alias =
-      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(), SISize, SIAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getTrueValue(),
+                 SISize, SIAAInfo, UnderV2);
   if (Alias == MayAlias)
     return MayAlias;
 
   AliasResult ThisAlias =
-      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, SI->getFalseValue(), SISize, SIAAInfo,
+                 UnderV2);
   return MergeAliasResults(ThisAlias, Alias);
 }
 
@@ -1282,8 +1363,8 @@ AliasResult BasicAAResult::aliasSelect(const SelectInst *SI, uint64_t SISize,
 /// another.
 AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
                                     const AAMDNodes &PNAAInfo, const Value *V2,
-                                    uint64_t V2Size,
-                                    const AAMDNodes &V2AAInfo) {
+                                    uint64_t V2Size, const AAMDNodes &V2AAInfo,
+                                    const Value *UnderV2) {
   // Track phi nodes we have visited. We use this information when we determine
   // value equivalence.
   VisitedPhiBBs.insert(PN->getParent());
@@ -1362,7 +1443,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
     PNSize = MemoryLocation::UnknownSize;
 
   AliasResult Alias =
-      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0], PNSize, PNAAInfo);
+      aliasCheck(V2, V2Size, V2AAInfo, V1Srcs[0],
+                 PNSize, PNAAInfo, UnderV2);
 
   // Early exit if the check of the first PHI source against V2 is MayAlias.
   // Other results are not possible.
@@ -1375,7 +1457,7 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
     Value *V = V1Srcs[i];
 
     AliasResult ThisAlias =
-        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo);
+        aliasCheck(V2, V2Size, V2AAInfo, V, PNSize, PNAAInfo, UnderV2);
     Alias = MergeAliasResults(ThisAlias, Alias);
     if (Alias == MayAlias)
       break;
@@ -1388,7 +1470,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, uint64_t PNSize,
 /// array references.
 AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
                                       AAMDNodes V1AAInfo, const Value *V2,
-                                      uint64_t V2Size, AAMDNodes V2AAInfo) {
+                                      uint64_t V2Size, AAMDNodes V2AAInfo, 
+                                      const Value *O1, const Value *O2) {
   // If either of the memory references is empty, it doesn't matter what the
   // pointer values are.
   if (V1Size == 0 || V2Size == 0)
@@ -1416,8 +1499,11 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
     return NoAlias; // Scalars cannot alias each other
 
   // Figure out what objects these things are pointing to if we can.
-  const Value *O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
-  const Value *O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
+  if (O1 == nullptr)
+    O1 = GetUnderlyingObject(V1, DL, MaxLookupSearchDepth);
+
+  if (O2 == nullptr)
+    O2 = GetUnderlyingObject(V2, DL, MaxLookupSearchDepth);
 
   // Null values in the default address space don't point to any object, so they
   // don't alias any other pointer.
@@ -1500,23 +1586,26 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, uint64_t V1Size,
 
   if (isa<PHINode>(V2) && !isa<PHINode>(V1)) {
     std::swap(V1, V2);
+    std::swap(O1, O2);
     std::swap(V1Size, V2Size);
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const PHINode *PN = dyn_cast<PHINode>(V1)) {
-    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+    AliasResult Result = aliasPHI(PN, V1Size, V1AAInfo,
+                                  V2, V2Size, V2AAInfo, O2);
     if (Result != MayAlias)
       return AliasCache[Locs] = Result;
   }
 
   if (isa<SelectInst>(V2) && !isa<SelectInst>(V1)) {
     std::swap(V1, V2);
+    std::swap(O1, O2);
     std::swap(V1Size, V2Size);
     std::swap(V1AAInfo, V2AAInfo);
   }
   if (const SelectInst *S1 = dyn_cast<SelectInst>(V1)) {
     AliasResult Result =
-        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo);
+        aliasSelect(S1, V1Size, V1AAInfo, V2, V2Size, V2AAInfo, O2);
     if (Result != MayAlias)
       return AliasCache[Locs] = Result;
   }
@@ -1667,9 +1756,9 @@ bool BasicAAResult::constantOffsetHeuristic(
 // BasicAliasAnalysis Pass
 //===----------------------------------------------------------------------===//
 
-char BasicAA::PassID;
+AnalysisKey BasicAA::Key;
 
-BasicAAResult BasicAA::run(Function &F, AnalysisManager<Function> &AM) {
+BasicAAResult BasicAA::run(Function &F, FunctionAnalysisManager &AM) {
   return BasicAAResult(F.getParent()->getDataLayout(),
                        AM.getResult<TargetLibraryAnalysis>(F),
                        AM.getResult<AssumptionAnalysis>(F),
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
index 1dd8f4fdfcfe..4cdbe4d0fcf6 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp
@@ -39,8 +39,7 @@ static cl::opt<GVDAGType> ViewBlockFreqPropagationDAG(
                           "display a graph using the raw "
                           "integer fractional block frequency representation."),
                clEnumValN(GVDT_Count, "count", "display a graph using the real "
-                                               "profile count if available."),
-               clEnumValEnd));
+                                               "profile count if available.")));
 
 cl::opt<std::string>
     ViewBlockFreqFuncName("view-bfi-func-name", cl::Hidden,
@@ -60,24 +59,22 @@ namespace llvm {
 
 template <>
 struct GraphTraits<BlockFrequencyInfo *> {
-  typedef const BasicBlock NodeType;
+  typedef const BasicBlock *NodeRef;
   typedef succ_const_iterator ChildIteratorType;
-  typedef Function::const_iterator nodes_iterator;
+  typedef pointer_iterator<Function::const_iterator> nodes_iterator;
 
-  static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) {
+  static NodeRef getEntryNode(const BlockFrequencyInfo *G) {
     return &G->getFunction()->front();
   }
-  static ChildIteratorType child_begin(const NodeType *N) {
+  static ChildIteratorType child_begin(const NodeRef N) {
     return succ_begin(N);
   }
-  static ChildIteratorType child_end(const NodeType *N) {
-    return succ_end(N);
-  }
+  static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
   static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) {
-    return G->getFunction()->begin();
+    return nodes_iterator(G->getFunction()->begin());
   }
   static nodes_iterator nodes_end(const BlockFrequencyInfo *G) {
-    return G->getFunction()->end();
+    return nodes_iterator(G->getFunction()->end());
   }
 };
 
@@ -162,6 +159,13 @@ BlockFrequencyInfo::getBlockProfileCount(const BasicBlock *BB) const {
   return BFI->getBlockProfileCount(*getFunction(), BB);
 }
 
+Optional<uint64_t>
+BlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+  if (!BFI)
+    return None;
+  return BFI->getProfileCountFromFreq(*getFunction(), Freq);
+}
+
 void BlockFrequencyInfo::setBlockFreq(const BasicBlock *BB, uint64_t Freq) {
   assert(BFI && "Expected analysis to be available");
   BFI->setBlockFreq(BB, Freq);
@@ -248,9 +252,9 @@ bool BlockFrequencyInfoWrapperPass::runOnFunction(Function &F) {
   return false;
 }
 
-char BlockFrequencyAnalysis::PassID;
+AnalysisKey BlockFrequencyAnalysis::Key;
 BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
-                                               AnalysisManager<Function> &AM) {
+                                               FunctionAnalysisManager &AM) {
   BlockFrequencyInfo BFI;
   BFI.calculate(F, AM.getResult<BranchProbabilityAnalysis>(F),
                 AM.getResult<LoopAnalysis>(F));
@@ -258,7 +262,7 @@ BlockFrequencyInfo BlockFrequencyAnalysis::run(Function &F,
 }
 
 PreservedAnalyses
-BlockFrequencyPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+BlockFrequencyPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   OS << "Printing analysis results of BFI for function "
      << "'" << F.getName() << "':"
      << "\n";
diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index c2039e1dec2b..9850e02fca22 100644
--- a/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -533,12 +533,18 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const {
 Optional<uint64_t>
 BlockFrequencyInfoImplBase::getBlockProfileCount(const Function &F,
                                                  const BlockNode &Node) const {
+  return getProfileCountFromFreq(F, getBlockFreq(Node).getFrequency());
+}
+
+Optional<uint64_t>
+BlockFrequencyInfoImplBase::getProfileCountFromFreq(const Function &F,
+                                                    uint64_t Freq) const {
   auto EntryCount = F.getEntryCount();
   if (!EntryCount)
     return None;
   // Use 128 bit APInt to do the arithmetic to avoid overflow.
   APInt BlockCount(128, EntryCount.getValue());
-  APInt BlockFreq(128, getBlockFreq(Node).getFrequency());
+  APInt BlockFreq(128, Freq);
   APInt EntryFreq(128, getEntryFreq());
   BlockCount *= BlockFreq;
   BlockCount = BlockCount.udiv(EntryFreq);
@@ -622,15 +628,12 @@ namespace llvm {
 template <> struct GraphTraits<IrreducibleGraph> {
   typedef bfi_detail::IrreducibleGraph GraphT;
 
-  typedef const GraphT::IrrNode NodeType;
   typedef const GraphT::IrrNode *NodeRef;
   typedef GraphT::IrrNode::iterator ChildIteratorType;
 
-  static const NodeType *getEntryNode(const GraphT &G) {
-    return G.StartIrr;
-  }
-  static ChildIteratorType child_begin(NodeType *N) { return N->succ_begin(); }
-  static ChildIteratorType child_end(NodeType *N) { return N->succ_end(); }
+  static NodeRef getEntryNode(const GraphT &G) { return G.StartIrr; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->succ_begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->succ_end(); }
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
index d802552d4e29..3eabb780398c 100644
--- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp
@@ -162,12 +162,12 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(const BasicBlock *BB) {
     return true;
   }
 
-  BranchProbability UnreachableProb(UR_TAKEN_WEIGHT,
-                                    (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
-                                        UnreachableEdges.size());
-  BranchProbability ReachableProb(UR_NONTAKEN_WEIGHT,
-                                  (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
-                                      ReachableEdges.size());
+  auto UnreachableProb = BranchProbability::getBranchProbability(
+      UR_TAKEN_WEIGHT, (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) *
+                           uint64_t(UnreachableEdges.size()));
+  auto ReachableProb = BranchProbability::getBranchProbability(
+      UR_NONTAKEN_WEIGHT,
+      (UR_TAKEN_WEIGHT + UR_NONTAKEN_WEIGHT) * uint64_t(ReachableEdges.size()));
 
   for (unsigned SuccIdx : UnreachableEdges)
     setEdgeProbability(BB, SuccIdx, UnreachableProb);
@@ -279,6 +279,16 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
         }
   }
 
+  if (auto *II = dyn_cast<InvokeInst>(TI)) {
+    // If the terminator is an InvokeInst, consider only the normal destination
+    // block.
+    if (PostDominatedByColdCall.count(II->getNormalDest()))
+      PostDominatedByColdCall.insert(BB);
+    // Return false here so that edge weights for InvokeInst could be decided
+    // in calcInvokeHeuristics().
+    return false;
+  }
+
   // Skip probabilities if this block has a single successor.
   if (TI->getNumSuccessors() == 1 || ColdEdges.empty())
     return false;
@@ -290,12 +300,12 @@ bool BranchProbabilityInfo::calcColdCallHeuristics(const BasicBlock *BB) {
     return true;
   }
 
-  BranchProbability ColdProb(CC_TAKEN_WEIGHT,
-                             (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
-                                 ColdEdges.size());
-  BranchProbability NormalProb(CC_NONTAKEN_WEIGHT,
-                               (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) *
-                                   NormalEdges.size());
+  auto ColdProb = BranchProbability::getBranchProbability(
+      CC_TAKEN_WEIGHT,
+      (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(ColdEdges.size()));
+  auto NormalProb = BranchProbability::getBranchProbability(
+      CC_NONTAKEN_WEIGHT,
+      (CC_TAKEN_WEIGHT + CC_NONTAKEN_WEIGHT) * uint64_t(NormalEdges.size()));
 
   for (unsigned SuccIdx : ColdEdges)
     setEdgeProbability(BB, SuccIdx, ColdProb);
@@ -701,16 +711,16 @@ void BranchProbabilityInfoWrapperPass::print(raw_ostream &OS,
   BPI.print(OS);
 }
 
-char BranchProbabilityAnalysis::PassID;
+AnalysisKey BranchProbabilityAnalysis::Key;
 BranchProbabilityInfo
-BranchProbabilityAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+BranchProbabilityAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   BranchProbabilityInfo BPI;
   BPI.calculate(F, AM.getResult<LoopAnalysis>(F));
   return BPI;
 }
 
 PreservedAnalyses
-BranchProbabilityPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+BranchProbabilityPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   OS << "Printing analysis results of BPI for function "
      << "'" << F.getName() << "':"
      << "\n";
diff --git a/contrib/llvm/lib/Analysis/CFGPrinter.cpp b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
index c86f1f55954b..a85af6c9c93f 100644
--- a/contrib/llvm/lib/Analysis/CFGPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/CFGPrinter.cpp
@@ -23,10 +23,10 @@
 using namespace llvm;
 
 namespace {
-  struct CFGViewer : public FunctionPass {
+  struct CFGViewerLegacyPass : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
-    CFGViewer() : FunctionPass(ID) {
-      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    CFGViewerLegacyPass() : FunctionPass(ID) {
+      initializeCFGViewerLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
@@ -42,14 +42,21 @@ namespace {
   };
 }
 
-char CFGViewer::ID = 0;
-INITIALIZE_PASS(CFGViewer, "view-cfg", "View CFG of function", false, true)
+char CFGViewerLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGViewerLegacyPass, "view-cfg", "View CFG of function", false, true)
+
+PreservedAnalyses CFGViewerPass::run(Function &F,
+                                     FunctionAnalysisManager &AM) {
+  F.viewCFG();
+  return PreservedAnalyses::all();
+}
+
 
 namespace {
-  struct CFGOnlyViewer : public FunctionPass {
+  struct CFGOnlyViewerLegacyPass : public FunctionPass {
     static char ID; // Pass identifcation, replacement for typeid
-    CFGOnlyViewer() : FunctionPass(ID) {
-      initializeCFGOnlyViewerPass(*PassRegistry::getPassRegistry());
+    CFGOnlyViewerLegacyPass() : FunctionPass(ID) {
+      initializeCFGOnlyViewerLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
@@ -65,29 +72,39 @@ namespace {
   };
 }
 
-char CFGOnlyViewer::ID = 0;
-INITIALIZE_PASS(CFGOnlyViewer, "view-cfg-only",
+char CFGOnlyViewerLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGOnlyViewerLegacyPass, "view-cfg-only",
                 "View CFG of function (with no function bodies)", false, true)
 
+PreservedAnalyses CFGOnlyViewerPass::run(Function &F,
+                                         FunctionAnalysisManager &AM) {
+  F.viewCFGOnly();
+  return PreservedAnalyses::all();
+}
+
+static void writeCFGToDotFile(Function &F) {
+  std::string Filename = ("cfg." + F.getName() + ".dot").str();
+  errs() << "Writing '" << Filename << "'...";
+
+  std::error_code EC;
+  raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
+
+  if (!EC)
+    WriteGraph(File, (const Function*)&F);
+  else
+    errs() << "  error opening file for writing!";
+  errs() << "\n";
+}
+
 namespace {
-  struct CFGPrinter : public FunctionPass {
+  struct CFGPrinterLegacyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGPrinter() : FunctionPass(ID) {
-      initializeCFGPrinterPass(*PassRegistry::getPassRegistry());
+    CFGPrinterLegacyPass() : FunctionPass(ID) {
+      initializeCFGPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = ("cfg." + F.getName() + ".dot").str();
-      errs() << "Writing '" << Filename << "'...";
-
-      std::error_code EC;
-      raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
-
-      if (!EC)
-        WriteGraph(File, (const Function*)&F);
-      else
-        errs() << "  error opening file for writing!";
-      errs() << "\n";
+      writeCFGToDotFile(F);
       return false;
     }
 
@@ -99,29 +116,25 @@ namespace {
   };
 }
 
-char CFGPrinter::ID = 0;
-INITIALIZE_PASS(CFGPrinter, "dot-cfg", "Print CFG of function to 'dot' file", 
+char CFGPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGPrinterLegacyPass, "dot-cfg", "Print CFG of function to 'dot' file", 
                 false, true)
 
+PreservedAnalyses CFGPrinterPass::run(Function &F,
+                                      FunctionAnalysisManager &AM) {
+  writeCFGToDotFile(F);
+  return PreservedAnalyses::all();
+}
+
 namespace {
-  struct CFGOnlyPrinter : public FunctionPass {
+  struct CFGOnlyPrinterLegacyPass : public FunctionPass {
     static char ID; // Pass identification, replacement for typeid
-    CFGOnlyPrinter() : FunctionPass(ID) {
-      initializeCFGOnlyPrinterPass(*PassRegistry::getPassRegistry());
+    CFGOnlyPrinterLegacyPass() : FunctionPass(ID) {
+      initializeCFGOnlyPrinterLegacyPassPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnFunction(Function &F) override {
-      std::string Filename = ("cfg." + F.getName() + ".dot").str();
-      errs() << "Writing '" << Filename << "'...";
-
-      std::error_code EC;
-      raw_fd_ostream File(Filename, EC, sys::fs::F_Text);
-
-      if (!EC)
-        WriteGraph(File, (const Function*)&F, true);
-      else
-        errs() << "  error opening file for writing!";
-      errs() << "\n";
+      writeCFGToDotFile(F);
       return false;
     }
     void print(raw_ostream &OS, const Module* = nullptr) const override {}
@@ -132,11 +145,17 @@ namespace {
   };
 }
 
-char CFGOnlyPrinter::ID = 0;
-INITIALIZE_PASS(CFGOnlyPrinter, "dot-cfg-only",
+char CFGOnlyPrinterLegacyPass::ID = 0;
+INITIALIZE_PASS(CFGOnlyPrinterLegacyPass, "dot-cfg-only",
    "Print CFG of function to 'dot' file (with no function bodies)",
    false, true)
 
+PreservedAnalyses CFGOnlyPrinterPass::run(Function &F,
+                                          FunctionAnalysisManager &AM) {
+  writeCFGToDotFile(F);
+  return PreservedAnalyses::all();
+}
+
 /// viewCFG - This function is meant for use from the debugger.  You can just
 /// say 'call F->viewCFG()' and a ghostview window should pop up from the
 /// program, displaying the CFG of the current function.  This depends on there
@@ -155,11 +174,11 @@ void Function::viewCFGOnly() const {
   ViewGraph(this, "cfg" + getName(), true);
 }
 
-FunctionPass *llvm::createCFGPrinterPass () {
-  return new CFGPrinter();
+FunctionPass *llvm::createCFGPrinterLegacyPassPass () {
+  return new CFGPrinterLegacyPass();
 }
 
-FunctionPass *llvm::createCFGOnlyPrinterPass () {
-  return new CFGOnlyPrinter();
+FunctionPass *llvm::createCFGOnlyPrinterLegacyPassPass () {
+  return new CFGOnlyPrinterLegacyPass();
 }
 
diff --git a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
index 7d5bd94133a7..e48ff230f43c 100644
--- a/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLAndersAliasAnalysis.cpp
@@ -27,12 +27,23 @@
 // codes: all we do here is to selectively expand the transitive closure by
 // discarding edges that are not recognized by the state machine.
 //
-// There is one difference between our current implementation and the one
-// described in the paper: out algorithm eagerly computes all alias pairs after
-// the CFLGraph is built, while in the paper the authors did the computation in
-// a demand-driven fashion. We did not implement the demand-driven algorithm due
-// to the additional coding complexity and higher memory profile, but if we
-// found it necessary we may switch to it eventually.
+// There are two differences between our current implementation and the one
+// described in the paper:
+// - Our algorithm eagerly computes all alias pairs after the CFLGraph is built,
+// while in the paper the authors did the computation in a demand-driven
+// fashion. We did not implement the demand-driven algorithm due to the
+// additional coding complexity and higher memory profile, but if we found it
+// necessary we may switch to it eventually.
+// - In the paper the authors use a state machine that does not distinguish
+// value reads from value writes. For example, if Y is reachable from X at state
+// S3, it may be the case that X is written into Y, or it may be the case that
+// there's a third value Z that writes into both X and Y. To make that
+// distinction (which is crucial in building function summary as well as
+// retrieving mod-ref info), we choose to duplicate some of the states in the
+// paper's proposed state machine. The duplication does not change the set the
+// machine accepts. Given a pair of reachable values, it only provides more
+// detailed information on which value is being written into and which is being
+// read from.
 //
 //===----------------------------------------------------------------------===//
 
@@ -71,16 +82,65 @@ static const Function *parentFunctionOfValue(const Value *Val) {
 namespace {
 
 enum class MatchState : uint8_t {
-  FlowFrom = 0,     // S1 in the paper
-  FlowFromMemAlias, // S2 in the paper
-  FlowTo,           // S3 in the paper
-  FlowToMemAlias    // S4 in the paper
+  // The following state represents S1 in the paper.
+  FlowFromReadOnly = 0,
+  // The following two states together represent S2 in the paper.
+  // The 'NoReadWrite' suffix indicates that there exists an alias path that
+  // does not contain assignment and reverse assignment edges.
+  // The 'ReadOnly' suffix indicates that there exists an alias path that
+  // contains reverse assignment edges only.
+  FlowFromMemAliasNoReadWrite,
+  FlowFromMemAliasReadOnly,
+  // The following two states together represent S3 in the paper.
+  // The 'WriteOnly' suffix indicates that there exists an alias path that
+  // contains assignment edges only.
+  // The 'ReadWrite' suffix indicates that there exists an alias path that
+  // contains both assignment and reverse assignment edges. Note that if X and Y
+  // are reachable at 'ReadWrite' state, it does NOT mean X is both read from
+  // and written to Y. Instead, it means that a third value Z is written to both
+  // X and Y.
+  FlowToWriteOnly,
+  FlowToReadWrite,
+  // The following two states together represent S4 in the paper.
+  FlowToMemAliasWriteOnly,
+  FlowToMemAliasReadWrite,
 };
 
+typedef std::bitset<7> StateSet;
+const unsigned ReadOnlyStateMask =
+    (1U << static_cast<uint8_t>(MatchState::FlowFromReadOnly)) |
+    (1U << static_cast<uint8_t>(MatchState::FlowFromMemAliasReadOnly));
+const unsigned WriteOnlyStateMask =
+    (1U << static_cast<uint8_t>(MatchState::FlowToWriteOnly)) |
+    (1U << static_cast<uint8_t>(MatchState::FlowToMemAliasWriteOnly));
+
+// A pair that consists of a value and an offset
+struct OffsetValue {
+  const Value *Val;
+  int64_t Offset;
+};
+
+bool operator==(OffsetValue LHS, OffsetValue RHS) {
+  return LHS.Val == RHS.Val && LHS.Offset == RHS.Offset;
+}
+bool operator<(OffsetValue LHS, OffsetValue RHS) {
+  return std::less<const Value *>()(LHS.Val, RHS.Val) ||
+         (LHS.Val == RHS.Val && LHS.Offset < RHS.Offset);
+}
+
+// A pair that consists of an InstantiatedValue and an offset
+struct OffsetInstantiatedValue {
+  InstantiatedValue IVal;
+  int64_t Offset;
+};
+
+bool operator==(OffsetInstantiatedValue LHS, OffsetInstantiatedValue RHS) {
+  return LHS.IVal == RHS.IVal && LHS.Offset == RHS.Offset;
+}
+
 // We use ReachabilitySet to keep track of value aliases (The nonterminal "V" in
 // the paper) during the analysis.
 class ReachabilitySet {
-  typedef std::bitset<4> StateSet;
   typedef DenseMap<InstantiatedValue, StateSet> ValueStateMap;
   typedef DenseMap<InstantiatedValue, ValueStateMap> ValueReachMap;
   ValueReachMap ReachMap;
@@ -91,6 +151,7 @@ public:
 
   // Insert edge 'From->To' at state 'State'
   bool insert(InstantiatedValue From, InstantiatedValue To, MatchState State) {
+    assert(From != To);
     auto &States = ReachMap[To][From];
     auto Idx = static_cast<size_t>(State);
     if (!States.test(Idx)) {
@@ -150,8 +211,6 @@ public:
   typedef MapType::const_iterator const_iterator;
 
   bool add(InstantiatedValue V, AliasAttrs Attr) {
-    if (Attr.none())
-      return false;
     auto &OldAttr = AttrMap[V];
     auto NewAttr = OldAttr | Attr;
     if (OldAttr == NewAttr)
@@ -178,6 +237,57 @@ struct WorkListItem {
   InstantiatedValue To;
   MatchState State;
 };
+
+struct ValueSummary {
+  struct Record {
+    InterfaceValue IValue;
+    unsigned DerefLevel;
+  };
+  SmallVector<Record, 4> FromRecords, ToRecords;
+};
+}
+
+namespace llvm {
+// Specialize DenseMapInfo for OffsetValue.
+template <> struct DenseMapInfo<OffsetValue> {
+  static OffsetValue getEmptyKey() {
+    return OffsetValue{DenseMapInfo<const Value *>::getEmptyKey(),
+                       DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static OffsetValue getTombstoneKey() {
+    return OffsetValue{DenseMapInfo<const Value *>::getTombstoneKey(),
+                       DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static unsigned getHashValue(const OffsetValue &OVal) {
+    return DenseMapInfo<std::pair<const Value *, int64_t>>::getHashValue(
+        std::make_pair(OVal.Val, OVal.Offset));
+  }
+  static bool isEqual(const OffsetValue &LHS, const OffsetValue &RHS) {
+    return LHS == RHS;
+  }
+};
+
+// Specialize DenseMapInfo for OffsetInstantiatedValue.
+template <> struct DenseMapInfo<OffsetInstantiatedValue> {
+  static OffsetInstantiatedValue getEmptyKey() {
+    return OffsetInstantiatedValue{
+        DenseMapInfo<InstantiatedValue>::getEmptyKey(),
+        DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static OffsetInstantiatedValue getTombstoneKey() {
+    return OffsetInstantiatedValue{
+        DenseMapInfo<InstantiatedValue>::getTombstoneKey(),
+        DenseMapInfo<int64_t>::getEmptyKey()};
+  }
+  static unsigned getHashValue(const OffsetInstantiatedValue &OVal) {
+    return DenseMapInfo<std::pair<InstantiatedValue, int64_t>>::getHashValue(
+        std::make_pair(OVal.IVal, OVal.Offset));
+  }
+  static bool isEqual(const OffsetInstantiatedValue &LHS,
+                      const OffsetInstantiatedValue &RHS) {
+    return LHS == RHS;
+  }
+};
 }
 
 class CFLAndersAAResult::FunctionInfo {
@@ -185,7 +295,7 @@ class CFLAndersAAResult::FunctionInfo {
   /// Since the alias relation is symmetric, to save some space we assume values
   /// are properly ordered: if a and b alias each other, and a < b, then b is in
   /// AliasMap[a] but not vice versa.
-  DenseMap<const Value *, std::vector<const Value *>> AliasMap;
+  DenseMap<const Value *, std::vector<OffsetValue>> AliasMap;
 
   /// Map a value to its corresponding AliasAttrs
   DenseMap<const Value *, AliasAttrs> AttrMap;
@@ -193,27 +303,56 @@ class CFLAndersAAResult::FunctionInfo {
   /// Summary of externally visible effects.
   AliasSummary Summary;
 
-  AliasAttrs getAttrs(const Value *) const;
+  Optional<AliasAttrs> getAttrs(const Value *) const;
 
 public:
-  FunctionInfo(const ReachabilitySet &, AliasAttrMap);
+  FunctionInfo(const Function &, const SmallVectorImpl<Value *> &,
+               const ReachabilitySet &, AliasAttrMap);
 
-  bool mayAlias(const Value *LHS, const Value *RHS) const;
+  bool mayAlias(const Value *, uint64_t, const Value *, uint64_t) const;
   const AliasSummary &getAliasSummary() const { return Summary; }
 };
 
-CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet,
-                                              AliasAttrMap AMap) {
-  // Populate AttrMap
+static bool hasReadOnlyState(StateSet Set) {
+  return (Set & StateSet(ReadOnlyStateMask)).any();
+}
+
+static bool hasWriteOnlyState(StateSet Set) {
+  return (Set & StateSet(WriteOnlyStateMask)).any();
+}
+
+static Optional<InterfaceValue>
+getInterfaceValue(InstantiatedValue IValue,
+                  const SmallVectorImpl<Value *> &RetVals) {
+  auto Val = IValue.Val;
+
+  Optional<unsigned> Index;
+  if (auto Arg = dyn_cast<Argument>(Val))
+    Index = Arg->getArgNo() + 1;
+  else if (is_contained(RetVals, Val))
+    Index = 0;
+
+  if (Index)
+    return InterfaceValue{*Index, IValue.DerefLevel};
+  return None;
+}
+
+static void populateAttrMap(DenseMap<const Value *, AliasAttrs> &AttrMap,
+                            const AliasAttrMap &AMap) {
   for (const auto &Mapping : AMap.mappings()) {
     auto IVal = Mapping.first;
 
+    // Insert IVal into the map
+    auto &Attr = AttrMap[IVal.Val];
     // AttrMap only cares about top-level values
     if (IVal.DerefLevel == 0)
-      AttrMap[IVal.Val] = Mapping.second;
+      Attr |= Mapping.second;
   }
+}
 
-  // Populate AliasMap
+static void
+populateAliasMap(DenseMap<const Value *, std::vector<OffsetValue>> &AliasMap,
+                 const ReachabilitySet &ReachSet) {
   for (const auto &OuterMapping : ReachSet.value_mappings()) {
     // AliasMap only cares about top-level values
     if (OuterMapping.first.DerefLevel > 0)
@@ -224,48 +363,202 @@ CFLAndersAAResult::FunctionInfo::FunctionInfo(const ReachabilitySet &ReachSet,
     for (const auto &InnerMapping : OuterMapping.second) {
       // Again, AliasMap only cares about top-level values
       if (InnerMapping.first.DerefLevel == 0)
-        AliasList.push_back(InnerMapping.first.Val);
+        AliasList.push_back(OffsetValue{InnerMapping.first.Val, UnknownOffset});
     }
 
     // Sort AliasList for faster lookup
-    std::sort(AliasList.begin(), AliasList.end(), std::less<const Value *>());
+    std::sort(AliasList.begin(), AliasList.end());
   }
+}
 
-  // TODO: Populate function summary here
+static void populateExternalRelations(
+    SmallVectorImpl<ExternalRelation> &ExtRelations, const Function &Fn,
+    const SmallVectorImpl<Value *> &RetVals, const ReachabilitySet &ReachSet) {
+  // If a function only returns one of its argument X, then X will be both an
+  // argument and a return value at the same time. This is an edge case that
+  // needs special handling here.
+  for (const auto &Arg : Fn.args()) {
+    if (is_contained(RetVals, &Arg)) {
+      auto ArgVal = InterfaceValue{Arg.getArgNo() + 1, 0};
+      auto RetVal = InterfaceValue{0, 0};
+      ExtRelations.push_back(ExternalRelation{ArgVal, RetVal, 0});
+    }
+  }
+
+  // Below is the core summary construction logic.
+  // A naive solution of adding only the value aliases that are parameters or
+  // return values in ReachSet to the summary won't work: It is possible that a
+  // parameter P is written into an intermediate value I, and the function
+  // subsequently returns *I. In that case, *I is does not value alias anything
+  // in ReachSet, and the naive solution will miss a summary edge from (P, 1) to
+  // (I, 1).
+  // To account for the aforementioned case, we need to check each non-parameter
+  // and non-return value for the possibility of acting as an intermediate.
+  // 'ValueMap' here records, for each value, which InterfaceValues read from or
+  // write into it. If both the read list and the write list of a given value
+  // are non-empty, we know that a particular value is an intermidate and we
+  // need to add summary edges from the writes to the reads.
+  DenseMap<Value *, ValueSummary> ValueMap;
+  for (const auto &OuterMapping : ReachSet.value_mappings()) {
+    if (auto Dst = getInterfaceValue(OuterMapping.first, RetVals)) {
+      for (const auto &InnerMapping : OuterMapping.second) {
+        // If Src is a param/return value, we get a same-level assignment.
+        if (auto Src = getInterfaceValue(InnerMapping.first, RetVals)) {
+          // This may happen if both Dst and Src are return values
+          if (*Dst == *Src)
+            continue;
+
+          if (hasReadOnlyState(InnerMapping.second))
+            ExtRelations.push_back(ExternalRelation{*Dst, *Src, UnknownOffset});
+          // No need to check for WriteOnly state, since ReachSet is symmetric
+        } else {
+          // If Src is not a param/return, add it to ValueMap
+          auto SrcIVal = InnerMapping.first;
+          if (hasReadOnlyState(InnerMapping.second))
+            ValueMap[SrcIVal.Val].FromRecords.push_back(
+                ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
+          if (hasWriteOnlyState(InnerMapping.second))
+            ValueMap[SrcIVal.Val].ToRecords.push_back(
+                ValueSummary::Record{*Dst, SrcIVal.DerefLevel});
+        }
+      }
+    }
+  }
+
+  for (const auto &Mapping : ValueMap) {
+    for (const auto &FromRecord : Mapping.second.FromRecords) {
+      for (const auto &ToRecord : Mapping.second.ToRecords) {
+        auto ToLevel = ToRecord.DerefLevel;
+        auto FromLevel = FromRecord.DerefLevel;
+        // Same-level assignments should have already been processed by now
+        if (ToLevel == FromLevel)
+          continue;
+
+        auto SrcIndex = FromRecord.IValue.Index;
+        auto SrcLevel = FromRecord.IValue.DerefLevel;
+        auto DstIndex = ToRecord.IValue.Index;
+        auto DstLevel = ToRecord.IValue.DerefLevel;
+        if (ToLevel > FromLevel)
+          SrcLevel += ToLevel - FromLevel;
+        else
+          DstLevel += FromLevel - ToLevel;
+
+        ExtRelations.push_back(ExternalRelation{
+            InterfaceValue{SrcIndex, SrcLevel},
+            InterfaceValue{DstIndex, DstLevel}, UnknownOffset});
+      }
+    }
+  }
+
+  // Remove duplicates in ExtRelations
+  std::sort(ExtRelations.begin(), ExtRelations.end());
+  ExtRelations.erase(std::unique(ExtRelations.begin(), ExtRelations.end()),
+                     ExtRelations.end());
+}
+
+static void populateExternalAttributes(
+    SmallVectorImpl<ExternalAttribute> &ExtAttributes, const Function &Fn,
+    const SmallVectorImpl<Value *> &RetVals, const AliasAttrMap &AMap) {
+  for (const auto &Mapping : AMap.mappings()) {
+    if (auto IVal = getInterfaceValue(Mapping.first, RetVals)) {
+      auto Attr = getExternallyVisibleAttrs(Mapping.second);
+      if (Attr.any())
+        ExtAttributes.push_back(ExternalAttribute{*IVal, Attr});
+    }
+  }
+}
+
+CFLAndersAAResult::FunctionInfo::FunctionInfo(
+    const Function &Fn, const SmallVectorImpl<Value *> &RetVals,
+    const ReachabilitySet &ReachSet, AliasAttrMap AMap) {
+  populateAttrMap(AttrMap, AMap);
+  populateExternalAttributes(Summary.RetParamAttributes, Fn, RetVals, AMap);
+  populateAliasMap(AliasMap, ReachSet);
+  populateExternalRelations(Summary.RetParamRelations, Fn, RetVals, ReachSet);
 }
 
-AliasAttrs CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
+Optional<AliasAttrs>
+CFLAndersAAResult::FunctionInfo::getAttrs(const Value *V) const {
   assert(V != nullptr);
 
-  AliasAttrs Attr;
   auto Itr = AttrMap.find(V);
   if (Itr != AttrMap.end())
-    Attr = Itr->second;
-  return Attr;
+    return Itr->second;
+  return None;
 }
 
 bool CFLAndersAAResult::FunctionInfo::mayAlias(const Value *LHS,
-                                               const Value *RHS) const {
+                                               uint64_t LHSSize,
+                                               const Value *RHS,
+                                               uint64_t RHSSize) const {
   assert(LHS && RHS);
 
+  // Check if we've seen LHS and RHS before. Sometimes LHS or RHS can be created
+  // after the analysis gets executed, and we want to be conservative in those
+  // cases.
+  auto MaybeAttrsA = getAttrs(LHS);
+  auto MaybeAttrsB = getAttrs(RHS);
+  if (!MaybeAttrsA || !MaybeAttrsB)
+    return true;
+
+  // Check AliasAttrs before AliasMap lookup since it's cheaper
+  auto AttrsA = *MaybeAttrsA;
+  auto AttrsB = *MaybeAttrsB;
+  if (hasUnknownOrCallerAttr(AttrsA))
+    return AttrsB.any();
+  if (hasUnknownOrCallerAttr(AttrsB))
+    return AttrsA.any();
+  if (isGlobalOrArgAttr(AttrsA))
+    return isGlobalOrArgAttr(AttrsB);
+  if (isGlobalOrArgAttr(AttrsB))
+    return isGlobalOrArgAttr(AttrsA);
+
+  // At this point both LHS and RHS should point to locally allocated objects
+
   auto Itr = AliasMap.find(LHS);
   if (Itr != AliasMap.end()) {
-    if (std::binary_search(Itr->second.begin(), Itr->second.end(), RHS,
-                           std::less<const Value *>()))
-      return true;
-  }
 
-  // Even if LHS and RHS are not reachable, they may still alias due to their
-  // AliasAttrs
-  auto AttrsA = getAttrs(LHS);
-  auto AttrsB = getAttrs(RHS);
+    // Find out all (X, Offset) where X == RHS
+    auto Comparator = [](OffsetValue LHS, OffsetValue RHS) {
+      return std::less<const Value *>()(LHS.Val, RHS.Val);
+    };
+#ifdef EXPENSIVE_CHECKS
+    assert(std::is_sorted(Itr->second.begin(), Itr->second.end(), Comparator));
+#endif
+    auto RangePair = std::equal_range(Itr->second.begin(), Itr->second.end(),
+                                      OffsetValue{RHS, 0}, Comparator);
+
+    if (RangePair.first != RangePair.second) {
+      // Be conservative about UnknownSize
+      if (LHSSize == MemoryLocation::UnknownSize ||
+          RHSSize == MemoryLocation::UnknownSize)
+        return true;
+
+      for (const auto &OVal : make_range(RangePair)) {
+        // Be conservative about UnknownOffset
+        if (OVal.Offset == UnknownOffset)
+          return true;
+
+        // We know that LHS aliases (RHS + OVal.Offset) if the control flow
+        // reaches here. The may-alias query essentially becomes integer
+        // range-overlap queries over two ranges [OVal.Offset, OVal.Offset +
+        // LHSSize) and [0, RHSSize).
+
+        // Try to be conservative on super large offsets
+        if (LLVM_UNLIKELY(LHSSize > INT64_MAX || RHSSize > INT64_MAX))
+          return true;
+
+        auto LHSStart = OVal.Offset;
+        // FIXME: Do we need to guard against integer overflow?
+        auto LHSEnd = OVal.Offset + static_cast<int64_t>(LHSSize);
+        auto RHSStart = 0;
+        auto RHSEnd = static_cast<int64_t>(RHSSize);
+        if (LHSEnd > RHSStart && LHSStart < RHSEnd)
+          return true;
+      }
+    }
+  }
 
-  if (AttrsA.none() || AttrsB.none())
-    return false;
-  if (hasUnknownOrCallerAttr(AttrsA) || hasUnknownOrCallerAttr(AttrsB))
-    return true;
-  if (isGlobalOrArgAttr(AttrsA) && isGlobalOrArgAttr(AttrsB))
-    return true;
   return false;
 }
 
@@ -292,8 +585,10 @@ static void initializeWorkList(std::vector<WorkListItem> &WorkList,
       // If there's an assignment edge from X to Y, it means Y is reachable from
       // X at S2 and X is reachable from Y at S1
       for (auto &Edge : ValueInfo.getNodeInfoAtLevel(I).Edges) {
-        propagate(Edge.Other, Src, MatchState::FlowFrom, ReachSet, WorkList);
-        propagate(Src, Edge.Other, MatchState::FlowTo, ReachSet, WorkList);
+        propagate(Edge.Other, Src, MatchState::FlowFromReadOnly, ReachSet,
+                  WorkList);
+        propagate(Src, Edge.Other, MatchState::FlowToWriteOnly, ReachSet,
+                  WorkList);
       }
     }
   }
@@ -328,16 +623,21 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
   auto ToNodeBelow = getNodeBelow(Graph, ToNode);
   if (FromNodeBelow && ToNodeBelow &&
       MemSet.insert(*FromNodeBelow, *ToNodeBelow)) {
-    propagate(*FromNodeBelow, *ToNodeBelow, MatchState::FlowFromMemAlias,
-              ReachSet, WorkList);
+    propagate(*FromNodeBelow, *ToNodeBelow,
+              MatchState::FlowFromMemAliasNoReadWrite, ReachSet, WorkList);
     for (const auto &Mapping : ReachSet.reachableValueAliases(*FromNodeBelow)) {
       auto Src = Mapping.first;
-      if (Mapping.second.test(static_cast<size_t>(MatchState::FlowFrom)))
-        propagate(Src, *ToNodeBelow, MatchState::FlowFromMemAlias, ReachSet,
-                  WorkList);
-      if (Mapping.second.test(static_cast<size_t>(MatchState::FlowTo)))
-        propagate(Src, *ToNodeBelow, MatchState::FlowToMemAlias, ReachSet,
-                  WorkList);
+      auto MemAliasPropagate = [&](MatchState FromState, MatchState ToState) {
+        if (Mapping.second.test(static_cast<size_t>(FromState)))
+          propagate(Src, *ToNodeBelow, ToState, ReachSet, WorkList);
+      };
+
+      MemAliasPropagate(MatchState::FlowFromReadOnly,
+                        MatchState::FlowFromMemAliasReadOnly);
+      MemAliasPropagate(MatchState::FlowToWriteOnly,
+                        MatchState::FlowToMemAliasWriteOnly);
+      MemAliasPropagate(MatchState::FlowToReadWrite,
+                        MatchState::FlowToMemAliasReadWrite);
     }
   }
 
@@ -349,45 +649,54 @@ static void processWorkListItem(const WorkListItem &Item, const CFLGraph &Graph,
   // - If *X and *Y are memory aliases, then X and Y are value aliases
   // - If Y is an alias of X, then reverse assignment edges (if there is any)
   // should precede any assignment edges on the path from X to Y.
-  switch (Item.State) {
-  case MatchState::FlowFrom: {
-    for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
-      propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet,
-                WorkList);
+  auto NextAssignState = [&](MatchState State) {
     for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
+      propagate(FromNode, AssignEdge.Other, State, ReachSet, WorkList);
+  };
+  auto NextRevAssignState = [&](MatchState State) {
+    for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
+      propagate(FromNode, RevAssignEdge.Other, State, ReachSet, WorkList);
+  };
+  auto NextMemState = [&](MatchState State) {
     if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
       for (const auto &MemAlias : *AliasSet)
-        propagate(FromNode, MemAlias, MatchState::FlowFromMemAlias, ReachSet,
-                  WorkList);
+        propagate(FromNode, MemAlias, State, ReachSet, WorkList);
     }
+  };
+
+  switch (Item.State) {
+  case MatchState::FlowFromReadOnly: {
+    NextRevAssignState(MatchState::FlowFromReadOnly);
+    NextAssignState(MatchState::FlowToReadWrite);
+    NextMemState(MatchState::FlowFromMemAliasReadOnly);
     break;
   }
-  case MatchState::FlowFromMemAlias: {
-    for (const auto &RevAssignEdge : NodeInfo->ReverseEdges)
-      propagate(FromNode, RevAssignEdge.Other, MatchState::FlowFrom, ReachSet,
-                WorkList);
-    for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
+  case MatchState::FlowFromMemAliasNoReadWrite: {
+    NextRevAssignState(MatchState::FlowFromReadOnly);
+    NextAssignState(MatchState::FlowToWriteOnly);
     break;
   }
-  case MatchState::FlowTo: {
-    for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
-    if (auto AliasSet = MemSet.getMemoryAliases(ToNode)) {
-      for (const auto &MemAlias : *AliasSet)
-        propagate(FromNode, MemAlias, MatchState::FlowToMemAlias, ReachSet,
-                  WorkList);
-    }
+  case MatchState::FlowFromMemAliasReadOnly: {
+    NextRevAssignState(MatchState::FlowFromReadOnly);
+    NextAssignState(MatchState::FlowToReadWrite);
     break;
   }
-  case MatchState::FlowToMemAlias: {
-    for (const auto &AssignEdge : NodeInfo->Edges)
-      propagate(FromNode, AssignEdge.Other, MatchState::FlowTo, ReachSet,
-                WorkList);
+  case MatchState::FlowToWriteOnly: {
+    NextAssignState(MatchState::FlowToWriteOnly);
+    NextMemState(MatchState::FlowToMemAliasWriteOnly);
+    break;
+  }
+  case MatchState::FlowToReadWrite: {
+    NextAssignState(MatchState::FlowToReadWrite);
+    NextMemState(MatchState::FlowToMemAliasReadWrite);
+    break;
+  }
+  case MatchState::FlowToMemAliasWriteOnly: {
+    NextAssignState(MatchState::FlowToWriteOnly);
+    break;
+  }
+  case MatchState::FlowToMemAliasReadWrite: {
+    NextAssignState(MatchState::FlowToReadWrite);
     break;
   }
   }
@@ -465,7 +774,8 @@ CFLAndersAAResult::buildInfoFrom(const Function &Fn) {
   // to it
   auto IValueAttrMap = buildAttrMap(Graph, ReachSet);
 
-  return FunctionInfo(ReachSet, std::move(IValueAttrMap));
+  return FunctionInfo(Fn, GraphBuilder.getReturnValues(), ReachSet,
+                      std::move(IValueAttrMap));
 }
 
 void CFLAndersAAResult::scan(const Function &Fn) {
@@ -530,7 +840,7 @@ AliasResult CFLAndersAAResult::query(const MemoryLocation &LocA,
   auto &FunInfo = ensureCached(*Fn);
 
   // AliasMap lookup
-  if (FunInfo->mayAlias(ValA, ValB))
+  if (FunInfo->mayAlias(ValA, LocA.Size, ValB, LocB.Size))
     return MayAlias;
   return NoAlias;
 }
@@ -555,9 +865,9 @@ AliasResult CFLAndersAAResult::alias(const MemoryLocation &LocA,
   return QueryResult;
 }
 
-char CFLAndersAA::PassID;
+AnalysisKey CFLAndersAA::Key;
 
-CFLAndersAAResult CFLAndersAA::run(Function &F, AnalysisManager<Function> &AM) {
+CFLAndersAAResult CFLAndersAA::run(Function &F, FunctionAnalysisManager &AM) {
   return CFLAndersAAResult(AM.getResult<TargetLibraryAnalysis>(F));
 }
 
diff --git a/contrib/llvm/lib/Analysis/CFLGraph.h b/contrib/llvm/lib/Analysis/CFLGraph.h
index bc6e794d0b2a..e526e0e16aa7 100644
--- a/contrib/llvm/lib/Analysis/CFLGraph.h
+++ b/contrib/llvm/lib/Analysis/CFLGraph.h
@@ -40,6 +40,7 @@ public:
 
   struct Edge {
     Node Other;
+    int64_t Offset;
   };
 
   typedef std::vector<Edge> EdgeList;
@@ -107,8 +108,8 @@ public:
     auto *ToInfo = getNode(To);
     assert(ToInfo != nullptr);
 
-    FromInfo->Edges.push_back(Edge{To});
-    ToInfo->ReverseEdges.push_back(Edge{From});
+    FromInfo->Edges.push_back(Edge{To, Offset});
+    ToInfo->ReverseEdges.push_back(Edge{From, Offset});
   }
 
   const NodeInfo *getNode(Node N) const {
@@ -151,6 +152,7 @@ template <typename CFLAA> class CFLGraphBuilder {
   /// Gets the edges our graph should have, based on an Instruction*
   class GetEdgesVisitor : public InstVisitor<GetEdgesVisitor, void> {
     CFLAA &AA;
+    const DataLayout &DL;
     const TargetLibraryInfo &TLI;
 
     CFLGraph &Graph;
@@ -225,8 +227,8 @@ template <typename CFLAA> class CFLGraphBuilder {
     void addStoreEdge(Value *From, Value *To) { addDerefEdge(From, To, false); }
 
   public:
-    GetEdgesVisitor(CFLGraphBuilder &Builder)
-        : AA(Builder.Analysis), TLI(Builder.TLI), Graph(Builder.Graph),
+    GetEdgesVisitor(CFLGraphBuilder &Builder, const DataLayout &DL)
+        : AA(Builder.Analysis), DL(DL), TLI(Builder.TLI), Graph(Builder.Graph),
           ReturnValues(Builder.ReturnedValues) {}
 
     void visitInstruction(Instruction &) {
@@ -281,9 +283,20 @@ template <typename CFLAA> class CFLGraphBuilder {
         addAssignEdge(Val, &Inst);
     }
 
+    void visitGEP(GEPOperator &GEPOp) {
+      uint64_t Offset = UnknownOffset;
+      APInt APOffset(DL.getPointerSizeInBits(GEPOp.getPointerAddressSpace()),
+                     0);
+      if (GEPOp.accumulateConstantOffset(DL, APOffset))
+        Offset = APOffset.getSExtValue();
+
+      auto *Op = GEPOp.getPointerOperand();
+      addAssignEdge(Op, &GEPOp, Offset);
+    }
+
     void visitGetElementPtrInst(GetElementPtrInst &Inst) {
-      auto *Op = Inst.getPointerOperand();
-      addAssignEdge(Op, &Inst);
+      auto *GEPOp = cast<GEPOperator>(&Inst);
+      visitGEP(*GEPOp);
     }
 
     void visitSelectInst(SelectInst &Inst) {
@@ -321,7 +334,8 @@ template <typename CFLAA> class CFLGraphBuilder {
       // For now, we'll handle this like a landingpad instruction (by placing
       // the
       // result in its own group, and having that group alias externals).
-      addNode(&Inst, getAttrUnknown());
+      if (Inst.getType()->isPointerTy())
+        addNode(&Inst, getAttrUnknown());
     }
 
     static bool isFunctionExternal(Function *Fn) {
@@ -444,7 +458,8 @@ template <typename CFLAA> class CFLGraphBuilder {
       // Exceptions come from "nowhere", from our analysis' perspective.
       // So we place the instruction its own group, noting that said group may
       // alias externals
-      addNode(&Inst, getAttrUnknown());
+      if (Inst.getType()->isPointerTy())
+        addNode(&Inst, getAttrUnknown());
     }
 
     void visitInsertValueInst(InsertValueInst &Inst) {
@@ -468,14 +483,97 @@ template <typename CFLAA> class CFLGraphBuilder {
 
     void visitConstantExpr(ConstantExpr *CE) {
       switch (CE->getOpcode()) {
+      case Instruction::GetElementPtr: {
+        auto GEPOp = cast<GEPOperator>(CE);
+        visitGEP(*GEPOp);
+        break;
+      }
+      case Instruction::PtrToInt: {
+        auto *Ptr = CE->getOperand(0);
+        addNode(Ptr, getAttrEscaped());
+        break;
+      }
+      case Instruction::IntToPtr: {
+        addNode(CE, getAttrUnknown());
+        break;
+      }
+      case Instruction::BitCast:
+      case Instruction::AddrSpaceCast:
+      case Instruction::Trunc:
+      case Instruction::ZExt:
+      case Instruction::SExt:
+      case Instruction::FPExt:
+      case Instruction::FPTrunc:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+      case Instruction::FPToUI:
+      case Instruction::FPToSI: {
+        auto *Src = CE->getOperand(0);
+        addAssignEdge(Src, CE);
+        break;
+      }
+      case Instruction::Select: {
+        auto *TrueVal = CE->getOperand(0);
+        auto *FalseVal = CE->getOperand(1);
+        addAssignEdge(TrueVal, CE);
+        addAssignEdge(FalseVal, CE);
+        break;
+      }
+      case Instruction::InsertElement: {
+        auto *Vec = CE->getOperand(0);
+        auto *Val = CE->getOperand(1);
+        addAssignEdge(Vec, CE);
+        addStoreEdge(Val, CE);
+        break;
+      }
+      case Instruction::ExtractElement: {
+        auto *Ptr = CE->getOperand(0);
+        addLoadEdge(Ptr, CE);
+        break;
+      }
+      case Instruction::InsertValue: {
+        auto *Agg = CE->getOperand(0);
+        auto *Val = CE->getOperand(1);
+        addAssignEdge(Agg, CE);
+        addStoreEdge(Val, CE);
+        break;
+      }
+      case Instruction::ExtractValue: {
+        auto *Ptr = CE->getOperand(0);
+        addLoadEdge(Ptr, CE);
+      }
+      case Instruction::ShuffleVector: {
+        auto *From1 = CE->getOperand(0);
+        auto *From2 = CE->getOperand(1);
+        addAssignEdge(From1, CE);
+        addAssignEdge(From2, CE);
+        break;
+      }
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::FSub:
+      case Instruction::Mul:
+      case Instruction::FMul:
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
+      case Instruction::Shl:
+      case Instruction::LShr:
+      case Instruction::AShr:
+      case Instruction::ICmp:
+      case Instruction::FCmp: {
+        addAssignEdge(CE->getOperand(0), CE);
+        addAssignEdge(CE->getOperand(1), CE);
+        break;
+      }
       default:
         llvm_unreachable("Unknown instruction type encountered!");
-// Build the switch statement using the Instruction.def file.
-#define HANDLE_INST(NUM, OPCODE, CLASS)                                        \
-  case Instruction::OPCODE:                                                    \
-    this->visit##OPCODE(*(CLASS *)CE);                                         \
-    break;
-#include "llvm/IR/Instruction.def"
       }
     }
   };
@@ -517,7 +615,7 @@ template <typename CFLAA> class CFLGraphBuilder {
   // Builds the graph needed for constructing the StratifiedSets for the given
   // function
   void buildGraphFrom(Function &Fn) {
-    GetEdgesVisitor Visitor(*this);
+    GetEdgesVisitor Visitor(*this, Fn.getParent()->getDataLayout());
 
     for (auto &Bb : Fn.getBasicBlockList())
       for (auto &Inst : Bb.getInstList())
diff --git a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
index d816822aaaea..dde24ef5fdd5 100644
--- a/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/CFLSteensAliasAnalysis.cpp
@@ -153,7 +153,7 @@ CFLSteensAAResult::FunctionInfo::FunctionInfo(
       if (Itr != InterfaceMap.end()) {
         if (CurrValue != Itr->second)
           Summary.RetParamRelations.push_back(
-              ExternalRelation{CurrValue, Itr->second});
+              ExternalRelation{CurrValue, Itr->second, UnknownOffset});
         break;
       }
 
@@ -341,81 +341,9 @@ AliasResult CFLSteensAAResult::query(const MemoryLocation &LocA,
   return NoAlias;
 }
 
-ModRefInfo CFLSteensAAResult::getArgModRefInfo(ImmutableCallSite CS,
-                                               unsigned ArgIdx) {
-  if (auto CalledFunc = CS.getCalledFunction()) {
-    auto &MaybeInfo = ensureCached(const_cast<Function *>(CalledFunc));
-    if (!MaybeInfo.hasValue())
-      return MRI_ModRef;
-    auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
-    auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
-
-    bool ArgAttributeIsWritten =
-        std::any_of(RetParamAttributes.begin(), RetParamAttributes.end(),
-                    [ArgIdx](const ExternalAttribute &ExtAttr) {
-                      return ExtAttr.IValue.Index == ArgIdx + 1;
-                    });
-    bool ArgIsAccessed =
-        std::any_of(RetParamRelations.begin(), RetParamRelations.end(),
-                    [ArgIdx](const ExternalRelation &ExtRelation) {
-                      return ExtRelation.To.Index == ArgIdx + 1 ||
-                             ExtRelation.From.Index == ArgIdx + 1;
-                    });
-
-    return (!ArgIsAccessed && !ArgAttributeIsWritten) ? MRI_NoModRef
-                                                      : MRI_ModRef;
-  }
-
-  return MRI_ModRef;
-}
-
-FunctionModRefBehavior
-CFLSteensAAResult::getModRefBehavior(ImmutableCallSite CS) {
-  // If we know the callee, try analyzing it
-  if (auto CalledFunc = CS.getCalledFunction())
-    return getModRefBehavior(CalledFunc);
-
-  // Otherwise, be conservative
-  return FMRB_UnknownModRefBehavior;
-}
-
-FunctionModRefBehavior CFLSteensAAResult::getModRefBehavior(const Function *F) {
-  assert(F != nullptr);
-
-  // TODO: Remove the const_cast
-  auto &MaybeInfo = ensureCached(const_cast<Function *>(F));
-  if (!MaybeInfo.hasValue())
-    return FMRB_UnknownModRefBehavior;
-  auto &RetParamAttributes = MaybeInfo->getAliasSummary().RetParamAttributes;
-  auto &RetParamRelations = MaybeInfo->getAliasSummary().RetParamRelations;
-
-  // First, if any argument is marked Escpaed, Unknown or Global, anything may
-  // happen to them and thus we can't draw any conclusion.
-  if (!RetParamAttributes.empty())
-    return FMRB_UnknownModRefBehavior;
-
-  // Currently we don't (and can't) distinguish reads from writes in
-  // RetParamRelations. All we can say is whether there may be memory access or
-  // not.
-  if (RetParamRelations.empty())
-    return FMRB_DoesNotAccessMemory;
-
-  // Check if something beyond argmem gets touched.
-  bool AccessArgMemoryOnly =
-      std::all_of(RetParamRelations.begin(), RetParamRelations.end(),
-                  [](const ExternalRelation &ExtRelation) {
-                    // Both DerefLevels has to be 0, since we don't know which
-                    // one is a read and which is a write.
-                    return ExtRelation.From.DerefLevel == 0 &&
-                           ExtRelation.To.DerefLevel == 0;
-                  });
-  return AccessArgMemoryOnly ? FMRB_OnlyAccessesArgumentPointees
-                             : FMRB_UnknownModRefBehavior;
-}
-
-char CFLSteensAA::PassID;
+AnalysisKey CFLSteensAA::Key;
 
-CFLSteensAAResult CFLSteensAA::run(Function &F, AnalysisManager<Function> &AM) {
+CFLSteensAAResult CFLSteensAA::run(Function &F, FunctionAnalysisManager &AM) {
   return CFLSteensAAResult(AM.getResult<TargetLibraryAnalysis>(F));
 }
 
diff --git a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
index f6f30bb927a5..054bdc45ad67 100644
--- a/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/CGSCCPassManager.cpp
@@ -8,17 +8,506 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/InstIterator.h"
 
 using namespace llvm;
 
-// Explicit instantiations for the core proxy templates.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
 namespace llvm {
-template class PassManager<LazyCallGraph::SCC>;
-template class AnalysisManager<LazyCallGraph::SCC>;
+
+// Explicit instantiations for the core proxy templates.
+template class AllAnalysesOn<LazyCallGraph::SCC>;
+template class AnalysisManager<LazyCallGraph::SCC, LazyCallGraph &>;
+template class PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager,
+                           LazyCallGraph &, CGSCCUpdateResult &>;
 template class InnerAnalysisManagerProxy<CGSCCAnalysisManager, Module>;
 template class OuterAnalysisManagerProxy<ModuleAnalysisManager,
-                                         LazyCallGraph::SCC>;
-template class InnerAnalysisManagerProxy<FunctionAnalysisManager,
-                                         LazyCallGraph::SCC>;
+                                         LazyCallGraph::SCC, LazyCallGraph &>;
 template class OuterAnalysisManagerProxy<CGSCCAnalysisManager, Function>;
+
+/// Explicitly specialize the pass manager run method to handle call graph
+/// updates.
+template <>
+PreservedAnalyses
+PassManager<LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,
+            CGSCCUpdateResult &>::run(LazyCallGraph::SCC &InitialC,
+                                      CGSCCAnalysisManager &AM,
+                                      LazyCallGraph &G, CGSCCUpdateResult &UR) {
+  PreservedAnalyses PA = PreservedAnalyses::all();
+
+  if (DebugLogging)
+    dbgs() << "Starting CGSCC pass manager run.\n";
+
+  // The SCC may be refined while we are running passes over it, so set up
+  // a pointer that we can update.
+  LazyCallGraph::SCC *C = &InitialC;
+
+  for (auto &Pass : Passes) {
+    if (DebugLogging)
+      dbgs() << "Running pass: " << Pass->name() << " on " << *C << "\n";
+
+    PreservedAnalyses PassPA = Pass->run(*C, AM, G, UR);
+
+    // Update the SCC if necessary.
+    C = UR.UpdatedC ? UR.UpdatedC : C;
+
+    // Check that we didn't miss any update scenario.
+    assert(!UR.InvalidatedSCCs.count(C) && "Processing an invalid SCC!");
+    assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+    // Update the analysis manager as each pass runs and potentially
+    // invalidates analyses.
+    AM.invalidate(*C, PassPA);
+
+    // Finally, we intersect the final preserved analyses to compute the
+    // aggregate preserved set for this pass manager.
+    PA.intersect(std::move(PassPA));
+
+    // FIXME: Historically, the pass managers all called the LLVM context's
+    // yield function here. We don't have a generic way to acquire the
+    // context and it isn't yet clear what the right pattern is for yielding
+    // in the new pass manager so it is currently omitted.
+    // ...getContext().yield();
+  }
+
+  // Invaliadtion was handled after each pass in the above loop for the current
+  // SCC. Therefore, the remaining analysis results in the AnalysisManager are
+  // preserved. We mark this with a set so that we don't need to inspect each
+  // one individually.
+  PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+
+  if (DebugLogging)
+    dbgs() << "Finished CGSCC pass manager run.\n";
+
+  return PA;
+}
+
+bool CGSCCAnalysisManagerModuleProxy::Result::invalidate(
+    Module &M, const PreservedAnalyses &PA,
+    ModuleAnalysisManager::Invalidator &Inv) {
+  // If literally everything is preserved, we're done.
+  if (PA.areAllPreserved())
+    return false; // This is still a valid proxy.
+
+  // If this proxy or the call graph is going to be invalidated, we also need
+  // to clear all the keys coming from that analysis.
+  //
+  // We also directly invalidate the FAM's module proxy if necessary, and if
+  // that proxy isn't preserved we can't preserve this proxy either. We rely on
+  // it to handle module -> function analysis invalidation in the face of
+  // structural changes and so if it's unavailable we conservatively clear the
+  // entire SCC layer as well rather than trying to do invalidation ourselves.
+  auto PAC = PA.getChecker<CGSCCAnalysisManagerModuleProxy>();
+  if (!(PAC.preserved() || PAC.preservedSet<AllAnalysesOn<Module>>()) ||
+      Inv.invalidate<LazyCallGraphAnalysis>(M, PA) ||
+      Inv.invalidate<FunctionAnalysisManagerModuleProxy>(M, PA)) {
+    InnerAM->clear();
+
+    // And the proxy itself should be marked as invalid so that we can observe
+    // the new call graph. This isn't strictly necessary because we cheat
+    // above, but is still useful.
+    return true;
+  }
+
+  // Directly check if the relevant set is preserved so we can short circuit
+  // invalidating SCCs below.
+  bool AreSCCAnalysesPreserved =
+      PA.allAnalysesInSetPreserved<AllAnalysesOn<LazyCallGraph::SCC>>();
+
+  // Ok, we have a graph, so we can propagate the invalidation down into it.
+  for (auto &RC : G->postorder_ref_sccs())
+    for (auto &C : RC) {
+      Optional<PreservedAnalyses> InnerPA;
+
+      // Check to see whether the preserved set needs to be adjusted based on
+      // module-level analysis invalidation triggering deferred invalidation
+      // for this SCC.
+      if (auto *OuterProxy =
+              InnerAM->getCachedResult<ModuleAnalysisManagerCGSCCProxy>(C))
+        for (const auto &OuterInvalidationPair :
+             OuterProxy->getOuterInvalidations()) {
+          AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first;
+          const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+          if (Inv.invalidate(OuterAnalysisID, M, PA)) {
+            if (!InnerPA)
+              InnerPA = PA;
+            for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+              InnerPA->abandon(InnerAnalysisID);
+          }
+        }
+
+      // Check if we needed a custom PA set. If so we'll need to run the inner
+      // invalidation.
+      if (InnerPA) {
+        InnerAM->invalidate(C, *InnerPA);
+        continue;
+      }
+
+      // Otherwise we only need to do invalidation if the original PA set didn't
+      // preserve all SCC analyses.
+      if (!AreSCCAnalysesPreserved)
+        InnerAM->invalidate(C, PA);
+    }
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
+
+template <>
+CGSCCAnalysisManagerModuleProxy::Result
+CGSCCAnalysisManagerModuleProxy::run(Module &M, ModuleAnalysisManager &AM) {
+  // Force the Function analysis manager to also be available so that it can
+  // be accessed in an SCC analysis and proxied onward to function passes.
+  // FIXME: It is pretty awkward to just drop the result here and assert that
+  // we can find it again later.
+  (void)AM.getResult<FunctionAnalysisManagerModuleProxy>(M);
+
+  return Result(*InnerAM, AM.getResult<LazyCallGraphAnalysis>(M));
+}
+
+AnalysisKey FunctionAnalysisManagerCGSCCProxy::Key;
+
+FunctionAnalysisManagerCGSCCProxy::Result
+FunctionAnalysisManagerCGSCCProxy::run(LazyCallGraph::SCC &C,
+                                       CGSCCAnalysisManager &AM,
+                                       LazyCallGraph &CG) {
+  // Collect the FunctionAnalysisManager from the Module layer and use that to
+  // build the proxy result.
+  //
+  // This allows us to rely on the FunctionAnalysisMangaerModuleProxy to
+  // invalidate the function analyses.
+  auto &MAM = AM.getResult<ModuleAnalysisManagerCGSCCProxy>(C, CG).getManager();
+  Module &M = *C.begin()->getFunction().getParent();
+  auto *FAMProxy = MAM.getCachedResult<FunctionAnalysisManagerModuleProxy>(M);
+  assert(FAMProxy && "The CGSCC pass manager requires that the FAM module "
+                     "proxy is run on the module prior to entering the CGSCC "
+                     "walk.");
+
+  // Note that we special-case invalidation handling of this proxy in the CGSCC
+  // analysis manager's Module proxy. This avoids the need to do anything
+  // special here to recompute all of this if ever the FAM's module proxy goes
+  // away.
+  return Result(FAMProxy->getManager());
+}
+
+bool FunctionAnalysisManagerCGSCCProxy::Result::invalidate(
+    LazyCallGraph::SCC &C, const PreservedAnalyses &PA,
+    CGSCCAnalysisManager::Invalidator &Inv) {
+  for (LazyCallGraph::Node &N : C)
+    FAM->invalidate(N.getFunction(), PA);
+
+  // This proxy doesn't need to handle invalidation itself. Instead, the
+  // module-level CGSCC proxy handles it above by ensuring that if the
+  // module-level FAM proxy becomes invalid the entire SCC layer, which
+  // includes this proxy, is cleared.
+  return false;
+}
+
+} // End llvm namespace
+
+namespace {
+/// Helper function to update both the \c CGSCCAnalysisManager \p AM and the \c
+/// CGSCCPassManager's \c CGSCCUpdateResult \p UR based on a range of newly
+/// added SCCs.
+///
+/// The range of new SCCs must be in postorder already. The SCC they were split
+/// out of must be provided as \p C. The current node being mutated and
+/// triggering updates must be passed as \p N.
+///
+/// This function returns the SCC containing \p N. This will be either \p C if
+/// no new SCCs have been split out, or it will be the new SCC containing \p N.
+template <typename SCCRangeT>
+LazyCallGraph::SCC *
+incorporateNewSCCRange(const SCCRangeT &NewSCCRange, LazyCallGraph &G,
+                       LazyCallGraph::Node &N, LazyCallGraph::SCC *C,
+                       CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR,
+                       bool DebugLogging = false) {
+  typedef LazyCallGraph::SCC SCC;
+
+  if (NewSCCRange.begin() == NewSCCRange.end())
+    return C;
+
+  // Add the current SCC to the worklist as its shape has changed.
+  UR.CWorklist.insert(C);
+  if (DebugLogging)
+    dbgs() << "Enqueuing the existing SCC in the worklist:" << *C << "\n";
+
+  SCC *OldC = C;
+  (void)OldC;
+
+  // Update the current SCC. Note that if we have new SCCs, this must actually
+  // change the SCC.
+  assert(C != &*NewSCCRange.begin() &&
+         "Cannot insert new SCCs without changing current SCC!");
+  C = &*NewSCCRange.begin();
+  assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+
+  for (SCC &NewC :
+       reverse(make_range(std::next(NewSCCRange.begin()), NewSCCRange.end()))) {
+    assert(C != &NewC && "No need to re-visit the current SCC!");
+    assert(OldC != &NewC && "Already handled the original SCC!");
+    UR.CWorklist.insert(&NewC);
+    if (DebugLogging)
+      dbgs() << "Enqueuing a newly formed SCC:" << NewC << "\n";
+  }
+  return C;
+}
+}
+
+LazyCallGraph::SCC &llvm::updateCGAndAnalysisManagerForFunctionPass(
+    LazyCallGraph &G, LazyCallGraph::SCC &InitialC, LazyCallGraph::Node &N,
+    CGSCCAnalysisManager &AM, CGSCCUpdateResult &UR, bool DebugLogging) {
+  typedef LazyCallGraph::Node Node;
+  typedef LazyCallGraph::Edge Edge;
+  typedef LazyCallGraph::SCC SCC;
+  typedef LazyCallGraph::RefSCC RefSCC;
+
+  RefSCC &InitialRC = InitialC.getOuterRefSCC();
+  SCC *C = &InitialC;
+  RefSCC *RC = &InitialRC;
+  Function &F = N.getFunction();
+
+  // Walk the function body and build up the set of retained, promoted, and
+  // demoted edges.
+  SmallVector<Constant *, 16> Worklist;
+  SmallPtrSet<Constant *, 16> Visited;
+  SmallPtrSet<Function *, 16> RetainedEdges;
+  SmallSetVector<Function *, 4> PromotedRefTargets;
+  SmallSetVector<Function *, 4> DemotedCallTargets;
+
+  // First walk the function and handle all called functions. We do this first
+  // because if there is a single call edge, whether there are ref edges is
+  // irrelevant.
+  for (Instruction &I : instructions(F))
+    if (auto CS = CallSite(&I))
+      if (Function *Callee = CS.getCalledFunction())
+        if (Visited.insert(Callee).second && !Callee->isDeclaration()) {
+          const Edge *E = N.lookup(*Callee);
+          // FIXME: We should really handle adding new calls. While it will
+          // make downstream usage more complex, there is no fundamental
+          // limitation and it will allow passes within the CGSCC to be a bit
+          // more flexible in what transforms they can do. Until then, we
+          // verify that new calls haven't been introduced.
+          assert(E && "No function transformations should introduce *new* "
+                      "call edges! Any new calls should be modeled as "
+                      "promoted existing ref edges!");
+          RetainedEdges.insert(Callee);
+          if (!E->isCall())
+            PromotedRefTargets.insert(Callee);
+        }
+
+  // Now walk all references.
+  for (Instruction &I : instructions(F))
+    for (Value *Op : I.operand_values())
+      if (Constant *C = dyn_cast<Constant>(Op))
+        if (Visited.insert(C).second)
+          Worklist.push_back(C);
+
+  LazyCallGraph::visitReferences(Worklist, Visited, [&](Function &Referee) {
+    const Edge *E = N.lookup(Referee);
+    // FIXME: Similarly to new calls, we also currently preclude
+    // introducing new references. See above for details.
+    assert(E && "No function transformations should introduce *new* ref "
+                "edges! Any new ref edges would require IPO which "
+                "function passes aren't allowed to do!");
+    RetainedEdges.insert(&Referee);
+    if (E->isCall())
+      DemotedCallTargets.insert(&Referee);
+  });
+
+  // First remove all of the edges that are no longer present in this function.
+  // We have to build a list of dead targets first and then remove them as the
+  // data structures will all be invalidated by removing them.
+  SmallVector<PointerIntPair<Node *, 1, Edge::Kind>, 4> DeadTargets;
+  for (Edge &E : N)
+    if (!RetainedEdges.count(&E.getFunction()))
+      DeadTargets.push_back({E.getNode(), E.getKind()});
+  for (auto DeadTarget : DeadTargets) {
+    Node &TargetN = *DeadTarget.getPointer();
+    bool IsCall = DeadTarget.getInt() == Edge::Call;
+    SCC &TargetC = *G.lookupSCC(TargetN);
+    RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+    if (&TargetRC != RC) {
+      RC->removeOutgoingEdge(N, TargetN);
+      if (DebugLogging)
+        dbgs() << "Deleting outgoing edge from '" << N << "' to '" << TargetN
+               << "'\n";
+      continue;
+    }
+    if (DebugLogging)
+      dbgs() << "Deleting internal " << (IsCall ? "call" : "ref")
+             << " edge from '" << N << "' to '" << TargetN << "'\n";
+
+    if (IsCall) {
+      if (C != &TargetC) {
+        // For separate SCCs this is trivial.
+        RC->switchTrivialInternalEdgeToRef(N, TargetN);
+      } else {
+        // Otherwise we may end up re-structuring the call graph. First,
+        // invalidate any SCC analyses. We have to do this before we split
+        // functions into new SCCs and lose track of where their analyses are
+        // cached.
+        // FIXME: We should accept a more precise preserved set here. For
+        // example, it might be possible to preserve some function analyses
+        // even as the SCC structure is changed.
+        AM.invalidate(*C, PreservedAnalyses::none());
+        // Now update the call graph.
+        C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
+                                   N, C, AM, UR, DebugLogging);
+      }
+    }
+
+    auto NewRefSCCs = RC->removeInternalRefEdge(N, TargetN);
+    if (!NewRefSCCs.empty()) {
+      // Note that we don't bother to invalidate analyses as ref-edge
+      // connectivity is not really observable in any way and is intended
+      // exclusively to be used for ordering of transforms rather than for
+      // analysis conclusions.
+
+      // The RC worklist is in reverse postorder, so we first enqueue the
+      // current RefSCC as it will remain the parent of all split RefSCCs, then
+      // we enqueue the new ones in RPO except for the one which contains the
+      // source node as that is the "bottom" we will continue processing in the
+      // bottom-up walk.
+      UR.RCWorklist.insert(RC);
+      if (DebugLogging)
+        dbgs() << "Enqueuing the existing RefSCC in the update worklist: "
+               << *RC << "\n";
+      // Update the RC to the "bottom".
+      assert(G.lookupSCC(N) == C && "Changed the SCC when splitting RefSCCs!");
+      RC = &C->getOuterRefSCC();
+      assert(G.lookupRefSCC(N) == RC && "Failed to update current RefSCC!");
+      assert(NewRefSCCs.front() == RC &&
+             "New current RefSCC not first in the returned list!");
+      for (RefSCC *NewRC : reverse(
+               make_range(std::next(NewRefSCCs.begin()), NewRefSCCs.end()))) {
+        assert(NewRC != RC && "Should not encounter the current RefSCC further "
+                              "in the postorder list of new RefSCCs.");
+        UR.RCWorklist.insert(NewRC);
+        if (DebugLogging)
+          dbgs() << "Enqueuing a new RefSCC in the update worklist: " << *NewRC
+                 << "\n";
+      }
+    }
+  }
+
+  // Next demote all the call edges that are now ref edges. This helps make
+  // the SCCs small which should minimize the work below as we don't want to
+  // form cycles that this would break.
+  for (Function *RefTarget : DemotedCallTargets) {
+    Node &TargetN = *G.lookup(*RefTarget);
+    SCC &TargetC = *G.lookupSCC(TargetN);
+    RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+    // The easy case is when the target RefSCC is not this RefSCC. This is
+    // only supported when the target RefSCC is a child of this RefSCC.
+    if (&TargetRC != RC) {
+      assert(RC->isAncestorOf(TargetRC) &&
+             "Cannot potentially form RefSCC cycles here!");
+      RC->switchOutgoingEdgeToRef(N, TargetN);
+      if (DebugLogging)
+        dbgs() << "Switch outgoing call edge to a ref edge from '" << N
+               << "' to '" << TargetN << "'\n";
+      continue;
+    }
+
+    // We are switching an internal call edge to a ref edge. This may split up
+    // some SCCs.
+    if (C != &TargetC) {
+      // For separate SCCs this is trivial.
+      RC->switchTrivialInternalEdgeToRef(N, TargetN);
+      continue;
+    }
+
+    // Otherwise we may end up re-structuring the call graph. First, invalidate
+    // any SCC analyses. We have to do this before we split functions into new
+    // SCCs and lose track of where their analyses are cached.
+    // FIXME: We should accept a more precise preserved set here. For example,
+    // it might be possible to preserve some function analyses even as the SCC
+    // structure is changed.
+    AM.invalidate(*C, PreservedAnalyses::none());
+    // Now update the call graph.
+    C = incorporateNewSCCRange(RC->switchInternalEdgeToRef(N, TargetN), G,
+                               N, C, AM, UR, DebugLogging);
+  }
+
+  // Now promote ref edges into call edges.
+  for (Function *CallTarget : PromotedRefTargets) {
+    Node &TargetN = *G.lookup(*CallTarget);
+    SCC &TargetC = *G.lookupSCC(TargetN);
+    RefSCC &TargetRC = TargetC.getOuterRefSCC();
+
+    // The easy case is when the target RefSCC is not this RefSCC. This is
+    // only supported when the target RefSCC is a child of this RefSCC.
+    if (&TargetRC != RC) {
+      assert(RC->isAncestorOf(TargetRC) &&
+             "Cannot potentially form RefSCC cycles here!");
+      RC->switchOutgoingEdgeToCall(N, TargetN);
+      if (DebugLogging)
+        dbgs() << "Switch outgoing ref edge to a call edge from '" << N
+               << "' to '" << TargetN << "'\n";
+      continue;
+    }
+    if (DebugLogging)
+      dbgs() << "Switch an internal ref edge to a call edge from '" << N
+             << "' to '" << TargetN << "'\n";
+
+    // Otherwise we are switching an internal ref edge to a call edge. This
+    // may merge away some SCCs, and we add those to the UpdateResult. We also
+    // need to make sure to update the worklist in the event SCCs have moved
+    // before the current one in the post-order sequence.
+    auto InitialSCCIndex = RC->find(*C) - RC->begin();
+    auto InvalidatedSCCs = RC->switchInternalEdgeToCall(N, TargetN);
+    if (!InvalidatedSCCs.empty()) {
+      C = &TargetC;
+      assert(G.lookupSCC(N) == C && "Failed to update current SCC!");
+
+      // Any analyses cached for this SCC are no longer precise as the shape
+      // has changed by introducing this cycle.
+      AM.invalidate(*C, PreservedAnalyses::none());
+
+      for (SCC *InvalidatedC : InvalidatedSCCs) {
+        assert(InvalidatedC != C && "Cannot invalidate the current SCC!");
+        UR.InvalidatedSCCs.insert(InvalidatedC);
+
+        // Also clear any cached analyses for the SCCs that are dead. This
+        // isn't really necessary for correctness but can release memory.
+        AM.clear(*InvalidatedC);
+      }
+    }
+    auto NewSCCIndex = RC->find(*C) - RC->begin();
+    if (InitialSCCIndex < NewSCCIndex) {
+      // Put our current SCC back onto the worklist as we'll visit other SCCs
+      // that are now definitively ordered prior to the current one in the
+      // post-order sequence, and may end up observing more precise context to
+      // optimize the current SCC.
+      UR.CWorklist.insert(C);
+      if (DebugLogging)
+        dbgs() << "Enqueuing the existing SCC in the worklist: " << *C << "\n";
+      // Enqueue in reverse order as we pop off the back of the worklist.
+      for (SCC &MovedC : reverse(make_range(RC->begin() + InitialSCCIndex,
+                                            RC->begin() + NewSCCIndex))) {
+        UR.CWorklist.insert(&MovedC);
+        if (DebugLogging)
+          dbgs() << "Enqueuing a newly earlier in post-order SCC: " << MovedC
+                 << "\n";
+      }
+    }
+  }
+
+  assert(!UR.InvalidatedSCCs.count(C) && "Invalidated the current SCC!");
+  assert(!UR.InvalidatedRefSCCs.count(RC) && "Invalidated the current RefSCC!");
+  assert(&C->getOuterRefSCC() == RC && "Current SCC not in current RefSCC!");
+
+  // Record the current RefSCC and SCC for higher layers of the CGSCC pass
+  // manager now that all the updates have been applied.
+  if (RC != &InitialRC)
+    UR.UpdatedRC = RC;
+  if (C != &InitialC)
+    UR.UpdatedC = C;
+
+  return *C;
 }
diff --git a/contrib/llvm/lib/Analysis/CallGraph.cpp b/contrib/llvm/lib/Analysis/CallGraph.cpp
index 39cb86d2ccb1..458b7bfae959 100644
--- a/contrib/llvm/lib/Analysis/CallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraph.cpp
@@ -258,10 +258,10 @@ void CallGraphNode::replaceCallEdge(CallSite CS,
 }
 
 // Provide an explicit template instantiation for the static ID.
-char CallGraphAnalysis::PassID;
+AnalysisKey CallGraphAnalysis::Key;
 
 PreservedAnalyses CallGraphPrinterPass::run(Module &M,
-                                            AnalysisManager<Module> &AM) {
+                                            ModuleAnalysisManager &AM) {
   AM.getResult<CallGraphAnalysis>(M).print(OS);
   return PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
index 69d767354785..9cef78144150 100644
--- a/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/contrib/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -67,9 +67,7 @@ public:
     Info.setPreservesAll();
   }
 
-  const char *getPassName() const override {
-    return "CallGraph Pass Manager";
-  }
+  StringRef getPassName() const override { return "CallGraph Pass Manager"; }
 
   PMDataManager *getAsPMDataManager() override { return this; }
   Pass *getAsPass() override { return this; }
@@ -100,7 +98,7 @@ private:
   bool RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
                     CallGraph &CG, bool &CallGraphUpToDate,
                     bool &DevirtualizedCall);
-  bool RefreshCallGraph(CallGraphSCC &CurSCC, CallGraph &CG,
+  bool RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
                         bool IsCheckingMode);
 };
 
@@ -175,8 +173,8 @@ bool CGPassManager::RunPassOnSCC(Pass *P, CallGraphSCC &CurSCC,
 /// a function pass like GVN optimizes away stuff feeding the indirect call.
 /// This never happens in checking mode.
 ///
-bool CGPassManager::RefreshCallGraph(CallGraphSCC &CurSCC,
-                                     CallGraph &CG, bool CheckingMode) {
+bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
+                                     bool CheckingMode) {
   DenseMap<Value*, CallGraphNode*> CallSites;
   
   DEBUG(dbgs() << "CGSCCPASSMGR: Refreshing SCC with " << CurSCC.size()
@@ -450,7 +448,7 @@ bool CGPassManager::runOnModule(Module &M) {
     // Copy the current SCC and increment past it so that the pass can hack
     // on the SCC if it wants to without invalidating our iterator.
     const std::vector<CallGraphNode *> &NodeVec = *CGI;
-    CurSCC.initialize(NodeVec.data(), NodeVec.data() + NodeVec.size());
+    CurSCC.initialize(NodeVec);
     ++CGI;
 
     // At the top level, we run all the passes in this pass manager on the
diff --git a/contrib/llvm/lib/Analysis/CodeMetrics.cpp b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
index ed8370498dd0..bdffdd8eb270 100644
--- a/contrib/llvm/lib/Analysis/CodeMetrics.cpp
+++ b/contrib/llvm/lib/Analysis/CodeMetrics.cpp
@@ -27,36 +27,45 @@
 
 using namespace llvm;
 
-static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
-                                    SmallPtrSetImpl<const Value*> &EphValues) {
-  SmallPtrSet<const Value *, 32> Visited;
-
-  // Make sure that all of the items in WorkSet are in our EphValues set.
-  EphValues.insert(WorkSet.begin(), WorkSet.end());
+static void
+appendSpeculatableOperands(const Value *V,
+                           SmallPtrSetImpl<const Value *> &Visited,
+                           SmallVectorImpl<const Value *> &Worklist) {
+  const User *U = dyn_cast<User>(V);
+  if (!U)
+    return;
+
+  for (const Value *Operand : U->operands())
+    if (Visited.insert(Operand).second)
+      if (isSafeToSpeculativelyExecute(Operand))
+        Worklist.push_back(Operand);
+}
 
+static void completeEphemeralValues(SmallPtrSetImpl<const Value *> &Visited,
+                                    SmallVectorImpl<const Value *> &Worklist,
+                                    SmallPtrSetImpl<const Value *> &EphValues) {
   // Note: We don't speculate PHIs here, so we'll miss instruction chains kept
   // alive only by ephemeral values.
 
-  while (!WorkSet.empty()) {
-    const Value *V = WorkSet.front();
-    WorkSet.erase(WorkSet.begin());
+  // Walk the worklist using an index but without caching the size so we can
+  // append more entries as we process the worklist. This forms a queue without
+  // quadratic behavior by just leaving processed nodes at the head of the
+  // worklist forever.
+  for (int i = 0; i < (int)Worklist.size(); ++i) {
+    const Value *V = Worklist[i];
 
-    if (!Visited.insert(V).second)
-      continue;
+    assert(Visited.count(V) &&
+           "Failed to add a worklist entry to our visited set!");
 
     // If all uses of this value are ephemeral, then so is this value.
-    if (!std::all_of(V->user_begin(), V->user_end(),
-                     [&](const User *U) { return EphValues.count(U); }))
+    if (!all_of(V->users(), [&](const User *U) { return EphValues.count(U); }))
       continue;
 
     EphValues.insert(V);
     DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
 
-    if (const User *U = dyn_cast<User>(V))
-      for (const Value *J : U->operands()) {
-        if (isSafeToSpeculativelyExecute(J))
-          WorkSet.push_back(J);
-      }
+    // Append any more operands to consider.
+    appendSpeculatableOperands(V, Visited, Worklist);
   }
 }
 
@@ -64,29 +73,32 @@ static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
 void CodeMetrics::collectEphemeralValues(
     const Loop *L, AssumptionCache *AC,
     SmallPtrSetImpl<const Value *> &EphValues) {
-  SmallVector<const Value *, 16> WorkSet;
+  SmallPtrSet<const Value *, 32> Visited;
+  SmallVector<const Value *, 16> Worklist;
 
   for (auto &AssumeVH : AC->assumptions()) {
     if (!AssumeVH)
       continue;
     Instruction *I = cast<Instruction>(AssumeVH);
 
-    // Filter out call sites outside of the loop so we don't to a function's
+    // Filter out call sites outside of the loop so we don't do a function's
     // worth of work for each of its loops (and, in the common case, ephemeral
     // values in the loop are likely due to @llvm.assume calls in the loop).
     if (!L->contains(I->getParent()))
       continue;
 
-    WorkSet.push_back(I);
+    if (EphValues.insert(I).second)
+      appendSpeculatableOperands(I, Visited, Worklist);
   }
 
-  completeEphemeralValues(WorkSet, EphValues);
+  completeEphemeralValues(Visited, Worklist, EphValues);
 }
 
 void CodeMetrics::collectEphemeralValues(
     const Function *F, AssumptionCache *AC,
     SmallPtrSetImpl<const Value *> &EphValues) {
-  SmallVector<const Value *, 16> WorkSet;
+  SmallPtrSet<const Value *, 32> Visited;
+  SmallVector<const Value *, 16> Worklist;
 
   for (auto &AssumeVH : AC->assumptions()) {
     if (!AssumeVH)
@@ -94,17 +106,19 @@ void CodeMetrics::collectEphemeralValues(
     Instruction *I = cast<Instruction>(AssumeVH);
     assert(I->getParent()->getParent() == F &&
            "Found assumption for the wrong function!");
-    WorkSet.push_back(I);
+
+    if (EphValues.insert(I).second)
+      appendSpeculatableOperands(I, Visited, Worklist);
   }
 
-  completeEphemeralValues(WorkSet, EphValues);
+  completeEphemeralValues(Visited, Worklist, EphValues);
 }
 
 /// Fill in the current structure with information gleaned from the specified
 /// block.
 void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
                                     const TargetTransformInfo &TTI,
-                                    SmallPtrSetImpl<const Value*> &EphValues) {
+                                    const SmallPtrSetImpl<const Value*> &EphValues) {
   ++NumBlocks;
   unsigned NumInstsBeforeThisBB = NumInsts;
   for (const Instruction &I : *BB) {
diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
index c9adaa7b111c..73867279abe4 100644
--- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp
@@ -17,29 +17,38 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Config/config.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include <cassert>
 #include <cerrno>
 #include <cfenv>
 #include <cmath>
-#include <limits>
+#include <cstddef>
+#include <cstdint>
 
 using namespace llvm;
 
@@ -49,6 +58,36 @@ namespace {
 // Constant Folding internal helper functions
 //===----------------------------------------------------------------------===//
 
+static Constant *foldConstVectorToAPInt(APInt &Result, Type *DestTy,
+                                        Constant *C, Type *SrcEltTy,
+                                        unsigned NumSrcElts,
+                                        const DataLayout &DL) {
+  // Now that we know that the input value is a vector of integers, just shift
+  // and insert them into our result.
+  unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
+  for (unsigned i = 0; i != NumSrcElts; ++i) {
+    Constant *Element;
+    if (DL.isLittleEndian())
+      Element = C->getAggregateElement(NumSrcElts - i - 1);
+    else
+      Element = C->getAggregateElement(i);
+
+    if (Element && isa<UndefValue>(Element)) {
+      Result <<= BitShift;
+      continue;
+    }
+
+    auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
+    if (!ElementCI)
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    Result <<= BitShift;
+    Result |= ElementCI->getValue().zextOrSelf(Result.getBitWidth());
+  }
+
+  return nullptr;
+}
+
 /// Constant fold bitcast, symbolically evaluating it with DataLayout.
 /// This always returns a non-null constant, but it may be a
 /// ConstantExpr if unfoldable.
@@ -60,45 +99,33 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
       !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
     return Constant::getAllOnesValue(DestTy);
 
-  // Handle a vector->integer cast.
-  if (auto *IT = dyn_cast<IntegerType>(DestTy)) {
-    auto *VTy = dyn_cast<VectorType>(C->getType());
-    if (!VTy)
-      return ConstantExpr::getBitCast(C, DestTy);
+  if (auto *VTy = dyn_cast<VectorType>(C->getType())) {
+    // Handle a vector->scalar integer/fp cast.
+    if (isa<IntegerType>(DestTy) || DestTy->isFloatingPointTy()) {
+      unsigned NumSrcElts = VTy->getNumElements();
+      Type *SrcEltTy = VTy->getElementType();
+
+      // If the vector is a vector of floating point, convert it to vector of int
+      // to simplify things.
+      if (SrcEltTy->isFloatingPointTy()) {
+        unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
+        Type *SrcIVTy =
+          VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
+        // Ask IR to do the conversion now that #elts line up.
+        C = ConstantExpr::getBitCast(C, SrcIVTy);
+      }
 
-    unsigned NumSrcElts = VTy->getNumElements();
-    Type *SrcEltTy = VTy->getElementType();
-
-    // If the vector is a vector of floating point, convert it to vector of int
-    // to simplify things.
-    if (SrcEltTy->isFloatingPointTy()) {
-      unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits();
-      Type *SrcIVTy =
-        VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts);
-      // Ask IR to do the conversion now that #elts line up.
-      C = ConstantExpr::getBitCast(C, SrcIVTy);
-    }
+      APInt Result(DL.getTypeSizeInBits(DestTy), 0);
+      if (Constant *CE = foldConstVectorToAPInt(Result, DestTy, C,
+                                                SrcEltTy, NumSrcElts, DL))
+        return CE;
 
-    // Now that we know that the input value is a vector of integers, just shift
-    // and insert them into our result.
-    unsigned BitShift = DL.getTypeSizeInBits(SrcEltTy);
-    APInt Result(IT->getBitWidth(), 0);
-    for (unsigned i = 0; i != NumSrcElts; ++i) {
-      Constant *Element;
-      if (DL.isLittleEndian())
-        Element = C->getAggregateElement(NumSrcElts-i-1);
-      else
-        Element = C->getAggregateElement(i);
-
-      auto *ElementCI = dyn_cast_or_null<ConstantInt>(Element);
-      if (!ElementCI)
-        return ConstantExpr::getBitCast(C, DestTy);
+      if (isa<IntegerType>(DestTy))
+        return ConstantInt::get(DestTy, Result);
 
-      Result <<= BitShift;
-      Result |= ElementCI->getValue().zextOrSelf(IT->getBitWidth());
+      APFloat FP(DestTy->getFltSemantics(), Result);
+      return ConstantFP::get(DestTy->getContext(), FP);
     }
-
-    return ConstantInt::get(IT, Result);
   }
 
   // The code below only handles casts to vectors currently.
@@ -180,7 +207,11 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
       Constant *Elt = Zero;
       unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize*(Ratio-1);
       for (unsigned j = 0; j != Ratio; ++j) {
-        Constant *Src = dyn_cast<ConstantInt>(C->getAggregateElement(SrcElt++));
+        Constant *Src = C->getAggregateElement(SrcElt++);
+        if (Src && isa<UndefValue>(Src))
+          Src = Constant::getNullValue(C->getType()->getVectorElementType());
+        else
+          Src = dyn_cast_or_null<ConstantInt>(Src);
         if (!Src)  // Reject constantexpr elements.
           return ConstantExpr::getBitCast(C, DestTy);
 
@@ -206,8 +237,19 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) {
 
   // Loop over each source value, expanding into multiple results.
   for (unsigned i = 0; i != NumSrcElt; ++i) {
-    auto *Src = dyn_cast<ConstantInt>(C->getAggregateElement(i));
-    if (!Src)  // Reject constantexpr elements.
+    auto *Element = C->getAggregateElement(i);
+
+    if (!Element) // Reject constantexpr elements.
+      return ConstantExpr::getBitCast(C, DestTy);
+
+    if (isa<UndefValue>(Element)) {
+      // Correctly Propagate undef values.
+      Result.append(Ratio, UndefValue::get(DstEltTy));
+      continue;
+    }
+
+    auto *Src = dyn_cast<ConstantInt>(Element);
+    if (!Src)
       return ConstantExpr::getBitCast(C, DestTy);
 
     unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize*(Ratio-1);
@@ -333,7 +375,7 @@ bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, unsigned char *CurPtr,
     uint64_t CurEltOffset = SL->getElementOffset(Index);
     ByteOffset -= CurEltOffset;
 
-    while (1) {
+    while (true) {
       // If the element access is to the element itself and not to tail padding,
       // read the bytes from the element.
       uint64_t EltSize = DL.getTypeAllocSize(CS->getOperand(Index)->getType());
@@ -689,23 +731,27 @@ Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, Constant *Op1,
 /// If array indices are not pointer-sized integers, explicitly cast them so
 /// that they aren't implicitly casted by the getelementptr.
 Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
-                         Type *ResultTy, const DataLayout &DL,
-                         const TargetLibraryInfo *TLI) {
+                         Type *ResultTy, Optional<unsigned> InRangeIndex,
+                         const DataLayout &DL, const TargetLibraryInfo *TLI) {
   Type *IntPtrTy = DL.getIntPtrType(ResultTy);
+  Type *IntPtrScalarTy = IntPtrTy->getScalarType();
 
   bool Any = false;
   SmallVector<Constant*, 32> NewIdxs;
   for (unsigned i = 1, e = Ops.size(); i != e; ++i) {
     if ((i == 1 ||
-         !isa<StructType>(GetElementPtrInst::getIndexedType(SrcElemTy,
-             Ops.slice(1, i - 1)))) &&
-        Ops[i]->getType() != IntPtrTy) {
+         !isa<StructType>(GetElementPtrInst::getIndexedType(
+             SrcElemTy, Ops.slice(1, i - 1)))) &&
+        Ops[i]->getType()->getScalarType() != IntPtrScalarTy) {
       Any = true;
+      Type *NewType = Ops[i]->getType()->isVectorTy()
+                          ? IntPtrTy
+                          : IntPtrTy->getScalarType();
       NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i],
                                                                       true,
-                                                                      IntPtrTy,
+                                                                      NewType,
                                                                       true),
-                                              Ops[i], IntPtrTy));
+                                              Ops[i], NewType));
     } else
       NewIdxs.push_back(Ops[i]);
   }
@@ -713,11 +759,10 @@ Constant *CastGEPIndices(Type *SrcElemTy, ArrayRef<Constant *> Ops,
   if (!Any)
     return nullptr;
 
-  Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ops[0], NewIdxs);
-  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
-    if (Constant *Folded = ConstantFoldConstantExpression(CE, DL, TLI))
-      C = Folded;
-  }
+  Constant *C = ConstantExpr::getGetElementPtr(
+      SrcElemTy, Ops[0], NewIdxs, /*InBounds=*/false, InRangeIndex);
+  if (Constant *Folded = ConstantFoldConstant(C, DL, TLI))
+    C = Folded;
 
   return C;
 }
@@ -744,13 +789,17 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
                                   ArrayRef<Constant *> Ops,
                                   const DataLayout &DL,
                                   const TargetLibraryInfo *TLI) {
+  const GEPOperator *InnermostGEP = GEP;
+  bool InBounds = GEP->isInBounds();
+
   Type *SrcElemTy = GEP->getSourceElementType();
   Type *ResElemTy = GEP->getResultElementType();
   Type *ResTy = GEP->getType();
   if (!SrcElemTy->isSized())
     return nullptr;
 
-  if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy, DL, TLI))
+  if (Constant *C = CastGEPIndices(SrcElemTy, Ops, ResTy,
+                                   GEP->getInRangeIndex(), DL, TLI))
     return C;
 
   Constant *Ptr = Ops[0];
@@ -775,8 +824,8 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
           Constant *Res = ConstantExpr::getPtrToInt(Ptr, CE->getType());
           Res = ConstantExpr::getSub(Res, CE->getOperand(1));
           Res = ConstantExpr::getIntToPtr(Res, ResTy);
-          if (auto *ResCE = dyn_cast<ConstantExpr>(Res))
-            Res = ConstantFoldConstantExpression(ResCE, DL, TLI);
+          if (auto *FoldedRes = ConstantFoldConstant(Res, DL, TLI))
+            Res = FoldedRes;
           return Res;
         }
       }
@@ -793,6 +842,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 
   // If this is a GEP of a GEP, fold it all into a single GEP.
   while (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
+    InnermostGEP = GEP;
+    InBounds &= GEP->isInBounds();
+
     SmallVector<Value *, 4> NestedOps(GEP->op_begin() + 1, GEP->op_end());
 
     // Do not try the incorporate the sub-GEP if some index is not a number.
@@ -821,7 +873,9 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
     }
   }
 
-  if (Ptr->isNullValue() || BasePtr != 0) {
+  auto *PTy = cast<PointerType>(Ptr->getType());
+  if ((Ptr->isNullValue() || BasePtr != 0) &&
+      !DL.isNonIntegralPointerType(PTy)) {
     Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr);
     return ConstantExpr::getIntToPtr(C, ResTy);
   }
@@ -830,8 +884,7 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   // we eliminate over-indexing of the notional static type array bounds.
   // This makes it easy to determine if the getelementptr is "inbounds".
   // Also, this helps GlobalOpt do SROA on GlobalVariables.
-  Type *Ty = Ptr->getType();
-  assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type");
+  Type *Ty = PTy;
   SmallVector<Constant *, 32> NewIdxs;
 
   do {
@@ -897,8 +950,23 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
   if (Offset != 0)
     return nullptr;
 
+  // Preserve the inrange index from the innermost GEP if possible. We must
+  // have calculated the same indices up to and including the inrange index.
+  Optional<unsigned> InRangeIndex;
+  if (Optional<unsigned> LastIRIndex = InnermostGEP->getInRangeIndex())
+    if (SrcElemTy == InnermostGEP->getSourceElementType() &&
+        NewIdxs.size() > *LastIRIndex) {
+      InRangeIndex = LastIRIndex;
+      for (unsigned I = 0; I <= *LastIRIndex; ++I)
+        if (NewIdxs[I] != InnermostGEP->getOperand(I + 1)) {
+          InRangeIndex = None;
+          break;
+        }
+    }
+
   // Create a GEP.
-  Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs);
+  Constant *C = ConstantExpr::getGetElementPtr(SrcElemTy, Ptr, NewIdxs,
+                                               InBounds, InRangeIndex);
   assert(C->getType()->getPointerElementType() == Ty &&
          "Computed GetElementPtr has unexpected type!");
 
@@ -916,15 +984,16 @@ Constant *SymbolicallyEvaluateGEP(const GEPOperator *GEP,
 /// attempting to fold instructions like loads and stores, which have no
 /// constant expression form.
 ///
-/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/etc
-/// information, due to only being passed an opcode and operands. Constant
+/// TODO: This function neither utilizes nor preserves nsw/nuw/inbounds/inrange
+/// etc information, due to only being passed an opcode and operands. Constant
 /// folding using this function strips this information.
 ///
-Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
-                                       unsigned Opcode,
+Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, unsigned Opcode,
                                        ArrayRef<Constant *> Ops,
                                        const DataLayout &DL,
                                        const TargetLibraryInfo *TLI) {
+  Type *DestTy = InstOrCE->getType();
+
   // Handle easy binops first.
   if (Instruction::isBinaryOp(Opcode))
     return ConstantFoldBinaryOpOperands(Opcode, Ops[0], Ops[1], DL);
@@ -936,10 +1005,14 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
     if (Constant *C = SymbolicallyEvaluateGEP(GEP, Ops, DL, TLI))
       return C;
 
-    return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(),
-                                          Ops[0], Ops.slice(1));
+    return ConstantExpr::getGetElementPtr(GEP->getSourceElementType(), Ops[0],
+                                          Ops.slice(1), GEP->isInBounds(),
+                                          GEP->getInRangeIndex());
   }
 
+  if (auto *CE = dyn_cast<ConstantExpr>(InstOrCE))
+    return CE->getWithOperands(Ops);
+
   switch (Opcode) {
   default: return nullptr;
   case Instruction::ICmp:
@@ -966,12 +1039,58 @@ Constant *ConstantFoldInstOperandsImpl(const Value *InstOrCE, Type *DestTy,
 // Constant Folding public APIs
 //===----------------------------------------------------------------------===//
 
+namespace {
+
+Constant *
+ConstantFoldConstantImpl(const Constant *C, const DataLayout &DL,
+                         const TargetLibraryInfo *TLI,
+                         SmallDenseMap<Constant *, Constant *> &FoldedOps) {
+  if (!isa<ConstantVector>(C) && !isa<ConstantExpr>(C))
+    return nullptr;
+
+  SmallVector<Constant *, 8> Ops;
+  for (const Use &NewU : C->operands()) {
+    auto *NewC = cast<Constant>(&NewU);
+    // Recursively fold the ConstantExpr's operands. If we have already folded
+    // a ConstantExpr, we don't have to process it again.
+    if (isa<ConstantVector>(NewC) || isa<ConstantExpr>(NewC)) {
+      auto It = FoldedOps.find(NewC);
+      if (It == FoldedOps.end()) {
+        if (auto *FoldedC =
+                ConstantFoldConstantImpl(NewC, DL, TLI, FoldedOps)) {
+          NewC = FoldedC;
+          FoldedOps.insert({NewC, FoldedC});
+        } else {
+          FoldedOps.insert({NewC, NewC});
+        }
+      } else {
+        NewC = It->second;
+      }
+    }
+    Ops.push_back(NewC);
+  }
+
+  if (auto *CE = dyn_cast<ConstantExpr>(C)) {
+    if (CE->isCompare())
+      return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+                                             DL, TLI);
+
+    return ConstantFoldInstOperandsImpl(CE, CE->getOpcode(), Ops, DL, TLI);
+  }
+
+  assert(isa<ConstantVector>(C));
+  return ConstantVector::get(Ops);
+}
+
+} // end anonymous namespace
+
 Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
                                         const TargetLibraryInfo *TLI) {
   // Handle PHI nodes quickly here...
   if (auto *PN = dyn_cast<PHINode>(I)) {
     Constant *CommonValue = nullptr;
 
+    SmallDenseMap<Constant *, Constant *> FoldedOps;
     for (Value *Incoming : PN->incoming_values()) {
       // If the incoming value is undef then skip it.  Note that while we could
       // skip the value if it is equal to the phi node itself we choose not to
@@ -984,8 +1103,8 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
       if (!C)
         return nullptr;
       // Fold the PHI's operands.
-      if (auto *NewC = dyn_cast<ConstantExpr>(C))
-        C = ConstantFoldConstantExpression(NewC, DL, TLI);
+      if (auto *FoldedC = ConstantFoldConstantImpl(C, DL, TLI, FoldedOps))
+        C = FoldedC;
       // If the incoming value is a different constant to
       // the one we saw previously, then give up.
       if (CommonValue && C != CommonValue)
@@ -993,7 +1112,6 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
       CommonValue = C;
     }
 
-
     // If we reach here, all incoming values are the same constant or undef.
     return CommonValue ? CommonValue : UndefValue::get(PN->getType());
   }
@@ -1003,12 +1121,13 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
   if (!all_of(I->operands(), [](Use &U) { return isa<Constant>(U); }))
     return nullptr;
 
+  SmallDenseMap<Constant *, Constant *> FoldedOps;
   SmallVector<Constant *, 8> Ops;
   for (const Use &OpU : I->operands()) {
     auto *Op = cast<Constant>(&OpU);
     // Fold the Instruction's operands.
-    if (auto *NewCE = dyn_cast<ConstantExpr>(Op))
-      Op = ConstantFoldConstantExpression(NewCE, DL, TLI);
+    if (auto *FoldedOp = ConstantFoldConstantImpl(Op, DL, TLI, FoldedOps))
+      Op = FoldedOp;
 
     Ops.push_back(Op);
   }
@@ -1036,55 +1155,17 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, const DataLayout &DL,
   return ConstantFoldInstOperands(I, Ops, DL, TLI);
 }
 
-namespace {
-
-Constant *
-ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout &DL,
-                                   const TargetLibraryInfo *TLI,
-                                   SmallPtrSetImpl<ConstantExpr *> &FoldedOps) {
-  SmallVector<Constant *, 8> Ops;
-  for (const Use &NewU : CE->operands()) {
-    auto *NewC = cast<Constant>(&NewU);
-    // Recursively fold the ConstantExpr's operands. If we have already folded
-    // a ConstantExpr, we don't have to process it again.
-    if (auto *NewCE = dyn_cast<ConstantExpr>(NewC)) {
-      if (FoldedOps.insert(NewCE).second)
-        NewC = ConstantFoldConstantExpressionImpl(NewCE, DL, TLI, FoldedOps);
-    }
-    Ops.push_back(NewC);
-  }
-
-  if (CE->isCompare())
-    return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
-                                           DL, TLI);
-
-  return ConstantFoldInstOperandsImpl(CE, CE->getType(), CE->getOpcode(), Ops,
-                                      DL, TLI);
-}
-
-} // end anonymous namespace
-
-Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
-                                               const DataLayout &DL,
-                                               const TargetLibraryInfo *TLI) {
-  SmallPtrSet<ConstantExpr *, 4> FoldedOps;
-  return ConstantFoldConstantExpressionImpl(CE, DL, TLI, FoldedOps);
+Constant *llvm::ConstantFoldConstant(const Constant *C, const DataLayout &DL,
+                                     const TargetLibraryInfo *TLI) {
+  SmallDenseMap<Constant *, Constant *> FoldedOps;
+  return ConstantFoldConstantImpl(C, DL, TLI, FoldedOps);
 }
 
 Constant *llvm::ConstantFoldInstOperands(Instruction *I,
                                          ArrayRef<Constant *> Ops,
                                          const DataLayout &DL,
                                          const TargetLibraryInfo *TLI) {
-  return ConstantFoldInstOperandsImpl(I, I->getType(), I->getOpcode(), Ops, DL,
-                                      TLI);
-}
-
-Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy,
-                                         ArrayRef<Constant *> Ops,
-                                         const DataLayout &DL,
-                                         const TargetLibraryInfo *TLI) {
-  assert(Opcode != Instruction::GetElementPtr && "Invalid for GEPs");
-  return ConstantFoldInstOperandsImpl(nullptr, DestTy, Opcode, Ops, DL, TLI);
+  return ConstantFoldInstOperandsImpl(I, I->getOpcode(), Ops, DL, TLI);
 }
 
 Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
@@ -1350,6 +1431,8 @@ bool llvm::canConstantFoldCallTo(const Function *F) {
            Name == "log10f";
   case 'p':
     return Name == "pow" || Name == "powf";
+  case 'r':
+    return Name == "round" || Name == "roundf";
   case 's':
     return Name == "sin" || Name == "sinh" || Name == "sqrt" ||
            Name == "sinf" || Name == "sinhf" || Name == "sqrtf";
@@ -1364,7 +1447,7 @@ Constant *GetConstantFoldFPValue(double V, Type *Ty) {
   if (Ty->isHalfTy()) {
     APFloat APF(V);
     bool unused;
-    APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused);
+    APF.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &unused);
     return ConstantFP::get(Ty->getContext(), APF);
   }
   if (Ty->isFloatTy())
@@ -1455,7 +1538,7 @@ double getValueAsDouble(ConstantFP *Op) {
 
   bool unused;
   APFloat APF = Op->getValueAPF();
-  APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused);
+  APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &unused);
   return APF.convertToDouble();
 }
 
@@ -1473,7 +1556,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
         APFloat Val(Op->getValueAPF());
 
         bool lost = false;
-        Val.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &lost);
+        Val.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &lost);
 
         return ConstantInt::get(Ty->getContext(), Val.bitcastToAPInt());
       }
@@ -1614,6 +1697,10 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
           }
         }
         break;
+      case 'r':
+        if ((Name == "round" && TLI->has(LibFunc::round)) ||
+            (Name == "roundf" && TLI->has(LibFunc::roundf)))
+          return ConstantFoldFP(round, V, Ty);
       case 's':
         if ((Name == "sin" && TLI->has(LibFunc::sin)) ||
             (Name == "sinf" && TLI->has(LibFunc::sinf)))
@@ -1648,7 +1735,7 @@ Constant *ConstantFoldScalarCall(StringRef Name, unsigned IntrinsicID, Type *Ty,
       case Intrinsic::bitreverse:
         return ConstantInt::get(Ty->getContext(), Op->getValue().reverseBits());
       case Intrinsic::convert_from_fp16: {
-        APFloat Val(APFloat::IEEEhalf, Op->getValue());
+        APFloat Val(APFloat::IEEEhalf(), Op->getValue());
 
         bool lost = false;
         APFloat::opStatus status = Val.convert(
@@ -1927,3 +2014,152 @@ llvm::ConstantFoldCall(Function *F, ArrayRef<Constant *> Operands,
 
   return ConstantFoldScalarCall(Name, F->getIntrinsicID(), Ty, Operands, TLI);
 }
+
+bool llvm::isMathLibCallNoop(CallSite CS, const TargetLibraryInfo *TLI) {
+  // FIXME: Refactor this code; this duplicates logic in LibCallsShrinkWrap
+  // (and to some extent ConstantFoldScalarCall).
+  Function *F = CS.getCalledFunction();
+  if (!F)
+    return false;
+
+  LibFunc::Func Func;
+  if (!TLI || !TLI->getLibFunc(*F, Func))
+    return false;
+
+  if (CS.getNumArgOperands() == 1) {
+    if (ConstantFP *OpC = dyn_cast<ConstantFP>(CS.getArgOperand(0))) {
+      const APFloat &Op = OpC->getValueAPF();
+      switch (Func) {
+      case LibFunc::logl:
+      case LibFunc::log:
+      case LibFunc::logf:
+      case LibFunc::log2l:
+      case LibFunc::log2:
+      case LibFunc::log2f:
+      case LibFunc::log10l:
+      case LibFunc::log10:
+      case LibFunc::log10f:
+        return Op.isNaN() || (!Op.isZero() && !Op.isNegative());
+
+      case LibFunc::expl:
+      case LibFunc::exp:
+      case LibFunc::expf:
+        // FIXME: These boundaries are slightly conservative.
+        if (OpC->getType()->isDoubleTy())
+          return Op.compare(APFloat(-745.0)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(709.0)) != APFloat::cmpGreaterThan;
+        if (OpC->getType()->isFloatTy())
+          return Op.compare(APFloat(-103.0f)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(88.0f)) != APFloat::cmpGreaterThan;
+        break;
+
+      case LibFunc::exp2l:
+      case LibFunc::exp2:
+      case LibFunc::exp2f:
+        // FIXME: These boundaries are slightly conservative.
+        if (OpC->getType()->isDoubleTy())
+          return Op.compare(APFloat(-1074.0)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(1023.0)) != APFloat::cmpGreaterThan;
+        if (OpC->getType()->isFloatTy())
+          return Op.compare(APFloat(-149.0f)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(127.0f)) != APFloat::cmpGreaterThan;
+        break;
+
+      case LibFunc::sinl:
+      case LibFunc::sin:
+      case LibFunc::sinf:
+      case LibFunc::cosl:
+      case LibFunc::cos:
+      case LibFunc::cosf:
+        return !Op.isInfinity();
+
+      case LibFunc::tanl:
+      case LibFunc::tan:
+      case LibFunc::tanf: {
+        // FIXME: Stop using the host math library.
+        // FIXME: The computation isn't done in the right precision.
+        Type *Ty = OpC->getType();
+        if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
+          double OpV = getValueAsDouble(OpC);
+          return ConstantFoldFP(tan, OpV, Ty) != nullptr;
+        }
+        break;
+      }
+
+      case LibFunc::asinl:
+      case LibFunc::asin:
+      case LibFunc::asinf:
+      case LibFunc::acosl:
+      case LibFunc::acos:
+      case LibFunc::acosf:
+        return Op.compare(APFloat(Op.getSemantics(), "-1")) !=
+                   APFloat::cmpLessThan &&
+               Op.compare(APFloat(Op.getSemantics(), "1")) !=
+                   APFloat::cmpGreaterThan;
+
+      case LibFunc::sinh:
+      case LibFunc::cosh:
+      case LibFunc::sinhf:
+      case LibFunc::coshf:
+      case LibFunc::sinhl:
+      case LibFunc::coshl:
+        // FIXME: These boundaries are slightly conservative.
+        if (OpC->getType()->isDoubleTy())
+          return Op.compare(APFloat(-710.0)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(710.0)) != APFloat::cmpGreaterThan;
+        if (OpC->getType()->isFloatTy())
+          return Op.compare(APFloat(-89.0f)) != APFloat::cmpLessThan &&
+                 Op.compare(APFloat(89.0f)) != APFloat::cmpGreaterThan;
+        break;
+
+      case LibFunc::sqrtl:
+      case LibFunc::sqrt:
+      case LibFunc::sqrtf:
+        return Op.isNaN() || Op.isZero() || !Op.isNegative();
+
+      // FIXME: Add more functions: sqrt_finite, atanh, expm1, log1p,
+      // maybe others?
+      default:
+        break;
+      }
+    }
+  }
+
+  if (CS.getNumArgOperands() == 2) {
+    ConstantFP *Op0C = dyn_cast<ConstantFP>(CS.getArgOperand(0));
+    ConstantFP *Op1C = dyn_cast<ConstantFP>(CS.getArgOperand(1));
+    if (Op0C && Op1C) {
+      const APFloat &Op0 = Op0C->getValueAPF();
+      const APFloat &Op1 = Op1C->getValueAPF();
+
+      switch (Func) {
+      case LibFunc::powl:
+      case LibFunc::pow:
+      case LibFunc::powf: {
+        // FIXME: Stop using the host math library.
+        // FIXME: The computation isn't done in the right precision.
+        Type *Ty = Op0C->getType();
+        if (Ty->isDoubleTy() || Ty->isFloatTy() || Ty->isHalfTy()) {
+          if (Ty == Op1C->getType()) {
+            double Op0V = getValueAsDouble(Op0C);
+            double Op1V = getValueAsDouble(Op1C);
+            return ConstantFoldBinaryFP(pow, Op0V, Op1V, Ty) != nullptr;
+          }
+        }
+        break;
+      }
+
+      case LibFunc::fmodl:
+      case LibFunc::fmod:
+      case LibFunc::fmodf:
+        return Op0.isNaN() || Op1.isNaN() ||
+               (!Op0.isInfinity() && !Op1.isZero());
+
+      default:
+        break;
+      }
+    }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp
index 68a4bea96baa..67d1773f0811 100644
--- a/contrib/llvm/lib/Analysis/CostModel.cpp
+++ b/contrib/llvm/lib/Analysis/CostModel.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -89,14 +90,35 @@ CostModelAnalysis::runOnFunction(Function &F) {
  return false;
 }
 
-static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) {
+static bool isReverseVectorMask(ArrayRef<int> Mask) {
   for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
-    if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i))
+    if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
       return false;
   return true;
 }
 
-static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
+static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
+  bool Vec0 = false;
+  bool Vec1 = false;
+  for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
+    if (Mask[i] >= 0) {
+      if ((unsigned)Mask[i] >= NumVecElts)
+        Vec1 = true;
+      else
+        Vec0 = true;
+    }
+  }
+  return !(Vec0 && Vec1);
+}
+
+static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
+  for (unsigned i = 0; i < Mask.size(); ++i)
+    if (Mask[i] > 0)
+      return false;
+  return true;
+}
+
+static bool isAlternateVectorMask(ArrayRef<int> Mask) {
   bool isAlternate = true;
   unsigned MaskSize = Mask.size();
 
@@ -123,7 +145,7 @@ static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) {
 
 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
   TargetTransformInfo::OperandValueKind OpInfo =
-    TargetTransformInfo::OK_AnyValue;
+      TargetTransformInfo::OK_AnyValue;
 
   // Check for a splat of a constant or for a non uniform vector of constants.
   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
@@ -132,6 +154,12 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
   }
 
+  // Check for a splat of a uniform value. This is not loop aware, so return
+  // true only for the obviously uniform cases (argument, globalvalue)
+  const Value *Splat = getSplatValue(V);
+  if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
+    OpInfo = TargetTransformInfo::OK_UniformValue;
+
   return OpInfo;
 }
 
@@ -494,6 +522,17 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
       if (isAlternateVectorMask(Mask))
         return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
                                    VecTypOp0, 0, nullptr);
+
+      if (isZeroEltBroadcastVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
+                                   VecTypOp0, 0, nullptr);
+
+      if (isSingleSourceVectorMask(Mask))
+        return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
+                                   VecTypOp0, 0, nullptr);
+
+      return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
+                                 VecTypOp0, 0, nullptr);
     }
 
     return -1;
diff --git a/contrib/llvm/lib/Analysis/DemandedBits.cpp b/contrib/llvm/lib/Analysis/DemandedBits.cpp
index a3f8b7fda08a..688c1db534c1 100644
--- a/contrib/llvm/lib/Analysis/DemandedBits.cpp
+++ b/contrib/llvm/lib/Analysis/DemandedBits.cpp
@@ -280,10 +280,8 @@ void DemandedBits::performAnalysis() {
     // add their operands to the work list (for integer values operands, mark
     // all bits as live).
     if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) {
-      if (!AliveBits.count(&I)) {
-        AliveBits[&I] = APInt(IT->getBitWidth(), 0);
+      if (AliveBits.try_emplace(&I, IT->getBitWidth(), 0).second)
         Worklist.push_back(&I);
-      }
 
       continue;
     }
@@ -363,8 +361,9 @@ APInt DemandedBits::getDemandedBits(Instruction *I) {
   performAnalysis();
   
   const DataLayout &DL = I->getParent()->getModule()->getDataLayout();
-  if (AliveBits.count(I))
-    return AliveBits[I];
+  auto Found = AliveBits.find(I);
+  if (Found != AliveBits.end())
+    return Found->second;
   return APInt::getAllOnesValue(DL.getTypeSizeInBits(I->getType()));
 }
 
@@ -387,10 +386,10 @@ FunctionPass *llvm::createDemandedBitsWrapperPass() {
   return new DemandedBitsWrapperPass();
 }
 
-char DemandedBitsAnalysis::PassID;
+AnalysisKey DemandedBitsAnalysis::Key;
 
 DemandedBits DemandedBitsAnalysis::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   return DemandedBits(F, AC, DT);
diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
index eb4d925fea73..a332a07ce864 100644
--- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp
@@ -122,7 +122,7 @@ DependenceAnalysis::run(Function &F, FunctionAnalysisManager &FAM) {
   return DependenceInfo(&F, &AA, &SE, &LI);
 }
 
-char DependenceAnalysis::PassID;
+AnalysisKey DependenceAnalysis::Key;
 
 INITIALIZE_PASS_BEGIN(DependenceAnalysisWrapperPass, "da",
                       "Dependence Analysis", true, true)
diff --git a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
index 4554374252a4..15856c3f8b7a 100644
--- a/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/DominanceFrontier.cpp
@@ -56,7 +56,7 @@ LLVM_DUMP_METHOD void DominanceFrontierWrapperPass::dump() const {
 }
 #endif
 
-char DominanceFrontierAnalysis::PassID;
+AnalysisKey DominanceFrontierAnalysis::Key;
 
 DominanceFrontier DominanceFrontierAnalysis::run(Function &F,
                                                  FunctionAnalysisManager &AM) {
diff --git a/contrib/llvm/lib/Analysis/EHPersonalities.cpp b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
index 5f951f5112e9..ebf0a370b0b0 100644
--- a/contrib/llvm/lib/Analysis/EHPersonalities.cpp
+++ b/contrib/llvm/lib/Analysis/EHPersonalities.cpp
@@ -40,6 +40,29 @@ EHPersonality llvm::classifyEHPersonality(const Value *Pers) {
     .Default(EHPersonality::Unknown);
 }
 
+StringRef llvm::getEHPersonalityName(EHPersonality Pers) {
+  switch (Pers) {
+  case EHPersonality::GNU_Ada:       return "__gnat_eh_personality";
+  case EHPersonality::GNU_CXX:       return "__gxx_personality_v0";
+  case EHPersonality::GNU_CXX_SjLj:  return "__gxx_personality_sj0";
+  case EHPersonality::GNU_C:         return "__gcc_personality_v0";
+  case EHPersonality::GNU_C_SjLj:    return "__gcc_personality_sj0";
+  case EHPersonality::GNU_ObjC:      return "__objc_personality_v0";
+  case EHPersonality::MSVC_X86SEH:   return "_except_handler3";
+  case EHPersonality::MSVC_Win64SEH: return "__C_specific_handler";
+  case EHPersonality::MSVC_CXX:      return "__CxxFrameHandler3";
+  case EHPersonality::CoreCLR:       return "ProcessCLRException";
+  case EHPersonality::Rust:          return "rust_eh_personality";
+  case EHPersonality::Unknown:       llvm_unreachable("Unknown EHPersonality!");
+  }
+
+  llvm_unreachable("Invalid EHPersonality!");
+}
+
+EHPersonality llvm::getDefaultEHPersonality(const Triple &T) {
+  return EHPersonality::GNU_C;
+}
+
 bool llvm::canSimplifyInvokeNoUnwind(const Function *F) {
   EHPersonality Personality = classifyEHPersonality(F->getPersonalityFn());
   // We can't simplify any invokes to nounwind functions if the personality
@@ -82,7 +105,7 @@ DenseMap<BasicBlock *, ColorVector> llvm::colorEHFunclets(Function &F) {
     }
     // Note that this is a member of the given color.
     ColorVector &Colors = BlockColors[Visiting];
-    if (std::find(Colors.begin(), Colors.end(), Color) == Colors.end())
+    if (!is_contained(Colors, Color))
       Colors.push_back(Color);
     else
       continue;
diff --git a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
index a7d1e048e133..33f00cb19b26 100644
--- a/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
+++ b/contrib/llvm/lib/Analysis/GlobalsModRef.cpp
@@ -78,7 +78,7 @@ class GlobalsAAResult::FunctionInfo {
       return (AlignedMap *)P;
     }
     enum { NumLowBitsAvailable = 3 };
-    static_assert(AlignOf<AlignedMap>::Alignment >= (1 << NumLowBitsAvailable),
+    static_assert(alignof(AlignedMap) >= (1 << NumLowBitsAvailable),
                   "AlignedMap insufficiently aligned to have enough low bits.");
   };
 
@@ -366,6 +366,10 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
     } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(I)) {
       if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
         return true; // Allow comparison against null.
+    } else if (Constant *C = dyn_cast<Constant>(I)) {
+      // Ignore constants which don't have any live uses.
+      if (isa<GlobalValue>(C) || C->isConstantUsed())
+        return true;
     } else {
       return true;
     }
@@ -521,7 +525,7 @@ void GlobalsAAResult::AnalyzeCallGraph(CallGraph &CG, Module &M) {
             // Can't say anything about it.  However, if it is inside our SCC,
             // then nothing needs to be done.
             CallGraphNode *CalleeNode = CG[Callee];
-            if (std::find(SCC.begin(), SCC.end(), CalleeNode) == SCC.end())
+            if (!is_contained(SCC, CalleeNode))
               KnowNothing = true;
           }
         } else {
@@ -857,22 +861,22 @@ ModRefInfo GlobalsAAResult::getModRefInfoForArgument(ImmutableCallSite CS,
   if (CS.doesNotAccessMemory())
     return MRI_NoModRef;
   ModRefInfo ConservativeResult = CS.onlyReadsMemory() ? MRI_Ref : MRI_ModRef;
-  
+
   // Iterate through all the arguments to the called function. If any argument
   // is based on GV, return the conservative result.
   for (auto &A : CS.args()) {
     SmallVector<Value*, 4> Objects;
     GetUnderlyingObjects(A, Objects, DL);
-    
+
     // All objects must be identified.
-    if (!std::all_of(Objects.begin(), Objects.end(), isIdentifiedObject) &&
+    if (!all_of(Objects, isIdentifiedObject) &&
         // Try ::alias to see if all objects are known not to alias GV.
-        !std::all_of(Objects.begin(), Objects.end(), [&](Value *V) {
+        !all_of(Objects, [&](Value *V) {
           return this->alias(MemoryLocation(V), MemoryLocation(GV)) == NoAlias;
-          }))
+        }))
       return ConservativeResult;
 
-    if (std::find(Objects.begin(), Objects.end(), GV) != Objects.end())
+    if (is_contained(Objects, GV))
       return ConservativeResult;
   }
 
@@ -937,9 +941,9 @@ GlobalsAAResult::analyzeModule(Module &M, const TargetLibraryInfo &TLI,
   return Result;
 }
 
-char GlobalsAA::PassID;
+AnalysisKey GlobalsAA::Key;
 
-GlobalsAAResult GlobalsAA::run(Module &M, AnalysisManager<Module> &AM) {
+GlobalsAAResult GlobalsAA::run(Module &M, ModuleAnalysisManager &AM) {
   return GlobalsAAResult::analyzeModule(M,
                                         AM.getResult<TargetLibraryAnalysis>(M),
                                         AM.getResult<CallGraphAnalysis>(M));
diff --git a/contrib/llvm/lib/Analysis/IVUsers.cpp b/contrib/llvm/lib/Analysis/IVUsers.cpp
index 43c0ba17fe4a..76e2561b9da3 100644
--- a/contrib/llvm/lib/Analysis/IVUsers.cpp
+++ b/contrib/llvm/lib/Analysis/IVUsers.cpp
@@ -34,9 +34,9 @@ using namespace llvm;
 
 #define DEBUG_TYPE "iv-users"
 
-char IVUsersAnalysis::PassID;
+AnalysisKey IVUsersAnalysis::Key;
 
-IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
+IVUsers IVUsersAnalysis::run(Loop &L, LoopAnalysisManager &AM) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
@@ -47,7 +47,7 @@ IVUsers IVUsersAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
                  FAM.getCachedResult<ScalarEvolutionAnalysis>(*F));
 }
 
-PreservedAnalyses IVUsersPrinterPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses IVUsersPrinterPass::run(Loop &L, LoopAnalysisManager &AM) {
   AM.getResult<IVUsersAnalysis>(L).print(OS);
   return PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Analysis/InlineCost.cpp b/contrib/llvm/lib/Analysis/InlineCost.cpp
index dcb724abc02d..9b9faacd354c 100644
--- a/contrib/llvm/lib/Analysis/InlineCost.cpp
+++ b/contrib/llvm/lib/Analysis/InlineCost.cpp
@@ -40,18 +40,7 @@ using namespace llvm;
 
 STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed");
 
-// Threshold to use when optsize is specified (and there is no
-// -inline-threshold).
-const int OptSizeThreshold = 75;
-
-// Threshold to use when -Oz is specified (and there is no -inline-threshold).
-const int OptMinSizeThreshold = 25;
-
-// Threshold to use when -O[34] is specified (and there is no
-// -inline-threshold).
-const int OptAggressiveThreshold = 275;
-
-static cl::opt<int> DefaultInlineThreshold(
+static cl::opt<int> InlineThreshold(
     "inline-threshold", cl::Hidden, cl::init(225), cl::ZeroOrMore,
     cl::desc("Control the amount of inlining to perform (default = 225)"));
 
@@ -66,6 +55,11 @@ static cl::opt<int> ColdThreshold(
     "inlinecold-threshold", cl::Hidden, cl::init(225),
     cl::desc("Threshold for inlining functions with cold attribute"));
 
+static cl::opt<int>
+    HotCallSiteThreshold("hot-callsite-threshold", cl::Hidden, cl::init(3000),
+                         cl::ZeroOrMore,
+                         cl::desc("Threshold for hot callsites "));
+
 namespace {
 
 class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
@@ -75,20 +69,23 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   /// The TargetTransformInfo available for this compilation.
   const TargetTransformInfo &TTI;
 
-  /// The cache of @llvm.assume intrinsics.
-  AssumptionCacheTracker *ACT;
+  /// Getter for the cache of @llvm.assume intrinsics.
+  std::function<AssumptionCache &(Function &)> &GetAssumptionCache;
 
   /// Profile summary information.
   ProfileSummaryInfo *PSI;
 
-  // The called function.
+  /// The called function.
   Function &F;
 
-  // The candidate callsite being analyzed. Please do not use this to do
-  // analysis in the caller function; we want the inline cost query to be
-  // easily cacheable. Instead, use the cover function paramHasAttr.
+  /// The candidate callsite being analyzed. Please do not use this to do
+  /// analysis in the caller function; we want the inline cost query to be
+  /// easily cacheable. Instead, use the cover function paramHasAttr.
   CallSite CandidateCS;
 
+  /// Tunable parameters that control the analysis.
+  const InlineParams &Params;
+
   int Threshold;
   int Cost;
 
@@ -107,25 +104,25 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   int FiftyPercentVectorBonus, TenPercentVectorBonus;
   int VectorBonus;
 
-  // While we walk the potentially-inlined instructions, we build up and
-  // maintain a mapping of simplified values specific to this callsite. The
-  // idea is to propagate any special information we have about arguments to
-  // this call through the inlinable section of the function, and account for
-  // likely simplifications post-inlining. The most important aspect we track
-  // is CFG altering simplifications -- when we prove a basic block dead, that
-  // can cause dramatic shifts in the cost of inlining a function.
+  /// While we walk the potentially-inlined instructions, we build up and
+  /// maintain a mapping of simplified values specific to this callsite. The
+  /// idea is to propagate any special information we have about arguments to
+  /// this call through the inlinable section of the function, and account for
+  /// likely simplifications post-inlining. The most important aspect we track
+  /// is CFG altering simplifications -- when we prove a basic block dead, that
+  /// can cause dramatic shifts in the cost of inlining a function.
   DenseMap<Value *, Constant *> SimplifiedValues;
 
-  // Keep track of the values which map back (through function arguments) to
-  // allocas on the caller stack which could be simplified through SROA.
+  /// Keep track of the values which map back (through function arguments) to
+  /// allocas on the caller stack which could be simplified through SROA.
   DenseMap<Value *, Value *> SROAArgValues;
 
-  // The mapping of caller Alloca values to their accumulated cost savings. If
-  // we have to disable SROA for one of the allocas, this tells us how much
-  // cost must be added.
+  /// The mapping of caller Alloca values to their accumulated cost savings. If
+  /// we have to disable SROA for one of the allocas, this tells us how much
+  /// cost must be added.
   DenseMap<Value *, int> SROAArgCosts;
 
-  // Keep track of values which map to a pointer base and constant offset.
+  /// Keep track of values which map to a pointer base and constant offset.
   DenseMap<Value *, std::pair<Value *, APInt>> ConstantOffsetPtrs;
 
   // Custom simplification helper routines.
@@ -203,20 +200,21 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
   bool visitUnreachableInst(UnreachableInst &I);
 
 public:
-  CallAnalyzer(const TargetTransformInfo &TTI, AssumptionCacheTracker *ACT,
-               ProfileSummaryInfo *PSI, Function &Callee, int Threshold,
-               CallSite CSArg)
-      : TTI(TTI), ACT(ACT), PSI(PSI), F(Callee), CandidateCS(CSArg),
-        Threshold(Threshold), Cost(0), IsCallerRecursive(false),
-        IsRecursiveCall(false), ExposesReturnsTwice(false),
-        HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
-        HasReturn(false), HasIndirectBr(false), HasFrameEscape(false),
-        AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
-        FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
-        NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
-        NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
-        NumInstructionsSimplified(0), SROACostSavings(0),
-        SROACostSavingsLost(0) {}
+  CallAnalyzer(const TargetTransformInfo &TTI,
+               std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+               ProfileSummaryInfo *PSI, Function &Callee, CallSite CSArg,
+               const InlineParams &Params)
+      : TTI(TTI), GetAssumptionCache(GetAssumptionCache), PSI(PSI), F(Callee),
+        CandidateCS(CSArg), Params(Params), Threshold(Params.DefaultThreshold),
+        Cost(0), IsCallerRecursive(false), IsRecursiveCall(false),
+        ExposesReturnsTwice(false), HasDynamicAlloca(false),
+        ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
+        HasFrameEscape(false), AllocatedSize(0), NumInstructions(0),
+        NumVectorInstructions(0), FiftyPercentVectorBonus(0),
+        TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0),
+        NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0),
+        NumConstantPtrDiffs(0), NumInstructionsSimplified(0),
+        SROACostSavings(0), SROACostSavingsLost(0) {}
 
   bool analyzeCall(CallSite CS);
 
@@ -320,7 +318,7 @@ bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) {
       continue;
 
     // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       unsigned ElementIdx = OpC->getZExtValue();
       const StructLayout *SL = DL.getStructLayout(STy);
       Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx));
@@ -620,42 +618,47 @@ void CallAnalyzer::updateThreshold(CallSite CS, Function &Callee) {
   }
 
   Function *Caller = CS.getCaller();
-  if (DefaultInlineThreshold.getNumOccurrences() > 0) {
-    // Explicitly specified -inline-threhold overrides the threshold passed to
-    // CallAnalyzer's constructor.
-    Threshold = DefaultInlineThreshold;
-  } else {
-    // If -inline-threshold is not given, listen to the optsize and minsize
-    // attributes when they would decrease the threshold.
-    if (Caller->optForMinSize() && OptMinSizeThreshold < Threshold)
-      Threshold = OptMinSizeThreshold;
-    else if (Caller->optForSize() && OptSizeThreshold < Threshold)
-      Threshold = OptSizeThreshold;
-  }
+
+  // return min(A, B) if B is valid.
+  auto MinIfValid = [](int A, Optional<int> B) {
+    return B ? std::min(A, B.getValue()) : A;
+  };
+
+  // return max(A, B) if B is valid.
+  auto MaxIfValid = [](int A, Optional<int> B) {
+    return B ? std::max(A, B.getValue()) : A;
+  };
+
+  // Use the OptMinSizeThreshold or OptSizeThreshold knob if they are available
+  // and reduce the threshold if the caller has the necessary attribute.
+  if (Caller->optForMinSize())
+    Threshold = MinIfValid(Threshold, Params.OptMinSizeThreshold);
+  else if (Caller->optForSize())
+    Threshold = MinIfValid(Threshold, Params.OptSizeThreshold);
 
   bool HotCallsite = false;
   uint64_t TotalWeight;
-  if (CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
-      PSI->isHotCount(TotalWeight))
+  if (PSI && CS.getInstruction()->extractProfTotalWeight(TotalWeight) &&
+      PSI->isHotCount(TotalWeight)) {
     HotCallsite = true;
+  }
 
   // Listen to the inlinehint attribute or profile based hotness information
   // when it would increase the threshold and the caller does not need to
   // minimize its size.
   bool InlineHint = Callee.hasFnAttribute(Attribute::InlineHint) ||
-                    PSI->isHotFunction(&Callee) ||
-                    HotCallsite;
-  if (InlineHint && HintThreshold > Threshold && !Caller->optForMinSize())
-    Threshold = HintThreshold;
-
-  bool ColdCallee = PSI->isColdFunction(&Callee);
-  // Command line argument for DefaultInlineThreshold will override the default
-  // ColdThreshold. If we have -inline-threshold but no -inlinecold-threshold,
-  // do not use the default cold threshold even if it is smaller.
-  if ((DefaultInlineThreshold.getNumOccurrences() == 0 ||
-       ColdThreshold.getNumOccurrences() > 0) &&
-      ColdCallee && ColdThreshold < Threshold)
-    Threshold = ColdThreshold;
+                    (PSI && PSI->isFunctionEntryHot(&Callee));
+  if (InlineHint && !Caller->optForMinSize())
+    Threshold = MaxIfValid(Threshold, Params.HintThreshold);
+
+  if (HotCallsite && !Caller->optForMinSize())
+    Threshold = MaxIfValid(Threshold, Params.HotCallSiteThreshold);
+
+  bool ColdCallee = PSI && PSI->isFunctionEntryCold(&Callee);
+  // For cold callees, use the ColdThreshold knob if it is available and reduces
+  // the threshold.
+  if (ColdCallee)
+    Threshold = MinIfValid(Threshold, Params.ColdThreshold);
 
   // Finally, take the target-specific inlining threshold multiplier into
   // account.
@@ -957,8 +960,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
   // during devirtualization and so we want to give it a hefty bonus for
   // inlining, but cap that bonus in the event that inlining wouldn't pan
   // out. Pretend to inline the function, with a custom threshold.
-  CallAnalyzer CA(TTI, ACT, PSI, *F, InlineConstants::IndirectCallThreshold,
-                  CS);
+  auto IndirectCallParams = Params;
+  IndirectCallParams.DefaultThreshold = InlineConstants::IndirectCallThreshold;
+  CallAnalyzer CA(TTI, GetAssumptionCache, PSI, *F, CS, IndirectCallParams);
   if (CA.analyzeCall(CS)) {
     // We were able to inline the indirect call! Subtract the cost from the
     // threshold to get the bonus we want to apply, but don't go below zero.
@@ -1251,13 +1255,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
       Cost -= InlineConstants::InstrCost;
     }
   }
-
+  // The call instruction also disappears after inlining.
+  Cost -= InlineConstants::InstrCost + InlineConstants::CallPenalty;
+  
   // If there is only one call of the function, and it has internal linkage,
   // the cost of inlining it drops dramatically.
   bool OnlyOneCallAndLocalLinkage =
       F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction();
   if (OnlyOneCallAndLocalLinkage)
-    Cost += InlineConstants::LastCallToStaticBonus;
+    Cost -= InlineConstants::LastCallToStaticBonus;
 
   // If this function uses the coldcc calling convention, prefer not to inline
   // it.
@@ -1312,8 +1318,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
   // the ephemeral values multiple times (and they're completely determined by
   // the callee, so this is purely duplicate work).
   SmallPtrSet<const Value *, 32> EphValues;
-  CodeMetrics::collectEphemeralValues(&F, &ACT->getAssumptionCache(F),
-                                      EphValues);
+  CodeMetrics::collectEphemeralValues(&F, &GetAssumptionCache(F), EphValues);
 
   // The worklist of live basic blocks in the callee *after* inlining. We avoid
   // adding basic blocks of the callee which can be proven to be dead for this
@@ -1444,32 +1449,19 @@ static bool functionsHaveCompatibleAttributes(Function *Caller,
          AttributeFuncs::areInlineCompatible(*Caller, *Callee);
 }
 
-InlineCost llvm::getInlineCost(CallSite CS, int DefaultThreshold,
-                               TargetTransformInfo &CalleeTTI,
-                               AssumptionCacheTracker *ACT,
-                               ProfileSummaryInfo *PSI) {
-  return getInlineCost(CS, CS.getCalledFunction(), DefaultThreshold, CalleeTTI,
-                       ACT, PSI);
-}
-
-int llvm::computeThresholdFromOptLevels(unsigned OptLevel,
-                                        unsigned SizeOptLevel) {
-  if (OptLevel > 2)
-    return OptAggressiveThreshold;
-  if (SizeOptLevel == 1) // -Os
-    return OptSizeThreshold;
-  if (SizeOptLevel == 2) // -Oz
-    return OptMinSizeThreshold;
-  return DefaultInlineThreshold;
+InlineCost llvm::getInlineCost(
+    CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    ProfileSummaryInfo *PSI) {
+  return getInlineCost(CS, CS.getCalledFunction(), Params, CalleeTTI,
+                       GetAssumptionCache, PSI);
 }
 
-int llvm::getDefaultInlineThreshold() { return DefaultInlineThreshold; }
-
-InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
-                               int DefaultThreshold,
-                               TargetTransformInfo &CalleeTTI,
-                               AssumptionCacheTracker *ACT,
-                               ProfileSummaryInfo *PSI) {
+InlineCost llvm::getInlineCost(
+    CallSite CS, Function *Callee, const InlineParams &Params,
+    TargetTransformInfo &CalleeTTI,
+    std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
+    ProfileSummaryInfo *PSI) {
 
   // Cannot inline indirect calls.
   if (!Callee)
@@ -1494,7 +1486,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
 
   // Don't inline functions which can be interposed at link-time.  Don't inline
   // functions marked noinline or call sites marked noinline.
-  // Note: inlining non-exact non-interposable fucntions is fine, since we know
+  // Note: inlining non-exact non-interposable functions is fine, since we know
   // we have *a* correct implementation of the source level function.
   if (Callee->isInterposable() || Callee->hasFnAttribute(Attribute::NoInline) ||
       CS.isNoInline())
@@ -1503,7 +1495,7 @@ InlineCost llvm::getInlineCost(CallSite CS, Function *Callee,
   DEBUG(llvm::dbgs() << "      Analyzing call of " << Callee->getName()
                      << "...\n");
 
-  CallAnalyzer CA(CalleeTTI, ACT, PSI, *Callee, DefaultThreshold, CS);
+  CallAnalyzer CA(CalleeTTI, GetAssumptionCache, PSI, *Callee, CS, Params);
   bool ShouldInline = CA.analyzeCall(CS);
 
   DEBUG(CA.dump());
@@ -1551,3 +1543,67 @@ bool llvm::isInlineViable(Function &F) {
 
   return true;
 }
+
+// APIs to create InlineParams based on command line flags and/or other
+// parameters.
+
+InlineParams llvm::getInlineParams(int Threshold) {
+  InlineParams Params;
+
+  // This field is the threshold to use for a callee by default. This is
+  // derived from one or more of:
+  //  * optimization or size-optimization levels,
+  //  * a value passed to createFunctionInliningPass function, or
+  //  * the -inline-threshold flag.
+  //  If the -inline-threshold flag is explicitly specified, that is used
+  //  irrespective of anything else.
+  if (InlineThreshold.getNumOccurrences() > 0)
+    Params.DefaultThreshold = InlineThreshold;
+  else
+    Params.DefaultThreshold = Threshold;
+
+  // Set the HintThreshold knob from the -inlinehint-threshold.
+  Params.HintThreshold = HintThreshold;
+
+  // Set the HotCallSiteThreshold knob from the -hot-callsite-threshold.
+  Params.HotCallSiteThreshold = HotCallSiteThreshold;
+
+  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+  // Set the OptMinSizeThreshold and OptSizeThreshold params only if the
+  // -inlinehint-threshold commandline option is not explicitly given. If that
+  // option is present, then its value applies even for callees with size and
+  // minsize attributes.
+  // If the -inline-threshold is not specified, set the ColdThreshold from the
+  // -inlinecold-threshold even if it is not explicitly passed. If
+  // -inline-threshold is specified, then -inlinecold-threshold needs to be
+  // explicitly specified to set the ColdThreshold knob
+  if (InlineThreshold.getNumOccurrences() == 0) {
+    Params.OptMinSizeThreshold = InlineConstants::OptMinSizeThreshold;
+    Params.OptSizeThreshold = InlineConstants::OptSizeThreshold;
+    Params.ColdThreshold = ColdThreshold;
+  } else if (ColdThreshold.getNumOccurrences() > 0) {
+    Params.ColdThreshold = ColdThreshold;
+  }
+  return Params;
+}
+
+InlineParams llvm::getInlineParams() {
+  return getInlineParams(InlineThreshold);
+}
+
+// Compute the default threshold for inlining based on the opt level and the
+// size opt level.
+static int computeThresholdFromOptLevels(unsigned OptLevel,
+                                         unsigned SizeOptLevel) {
+  if (OptLevel > 2)
+    return InlineConstants::OptAggressiveThreshold;
+  if (SizeOptLevel == 1) // -Os
+    return InlineConstants::OptSizeThreshold;
+  if (SizeOptLevel == 2) // -Oz
+    return InlineConstants::OptMinSizeThreshold;
+  return InlineThreshold;
+}
+
+InlineParams llvm::getInlineParams(unsigned OptLevel, unsigned SizeOptLevel) {
+  return getInlineParams(computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+}
diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
index aeaf9388579c..b4686a1ff175 100644
--- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -67,9 +67,12 @@ static Value *SimplifyFPBinOp(unsigned, Value *, Value *, const FastMathFlags &,
                               const Query &, unsigned);
 static Value *SimplifyCmpInst(unsigned, Value *, Value *, const Query &,
                               unsigned);
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const Query &Q, unsigned MaxRecurse);
 static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
 static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
-static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
+static Value *SimplifyCastInst(unsigned, Value *, Type *,
+                               const Query &, unsigned);
 
 /// For a boolean type, or a vector of boolean type, return false, or
 /// a vector with every element false, as appropriate for the type.
@@ -679,9 +682,26 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
   if (Op0 == Op1)
     return Constant::getNullValue(Op0->getType());
 
-  // 0 - X -> 0 if the sub is NUW.
-  if (isNUW && match(Op0, m_Zero()))
-    return Op0;
+  // Is this a negation?
+  if (match(Op0, m_Zero())) {
+    // 0 - X -> 0 if the sub is NUW.
+    if (isNUW)
+      return Op0;
+
+    unsigned BitWidth = Op1->getType()->getScalarSizeInBits();
+    APInt KnownZero(BitWidth, 0);
+    APInt KnownOne(BitWidth, 0);
+    computeKnownBits(Op1, KnownZero, KnownOne, Q.DL, 0, Q.AC, Q.CxtI, Q.DT);
+    if (KnownZero == ~APInt::getSignBit(BitWidth)) {
+      // Op1 is either 0 or the minimum signed value. If the sub is NSW, then
+      // Op1 must be 0 because negating the minimum signed value is undefined.
+      if (isNSW)
+        return Op0;
+
+      // 0 - X -> X if X is 0 or the minimum signed value.
+      return Op1;
+    }
+  }
 
   // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies.
   // For example, (X + Y) - Y -> X; (Y + X) - Y -> X
@@ -747,7 +767,8 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
       // See if "V === X - Y" simplifies.
       if (Value *V = SimplifyBinOp(Instruction::Sub, X, Y, Q, MaxRecurse-1))
         // It does!  Now see if "trunc V" simplifies.
-        if (Value *W = SimplifyTruncInst(V, Op0->getType(), Q, MaxRecurse-1))
+        if (Value *W = SimplifyCastInst(Instruction::Trunc, V, Op0->getType(),
+                                        Q, MaxRecurse - 1))
           // It does, return the simplified "trunc V".
           return W;
 
@@ -1106,6 +1127,10 @@ static Value *SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
   if (match(Op1, m_Undef()))
     return Op1;
 
+  // X / 1.0 -> X
+  if (match(Op1, m_FPOne()))
+    return Op0;
+
   // 0 / X -> 0
   // Requires that NaNs are off (X could be zero) and signed zeroes are
   // ignored (X could be positive or negative, so the output sign is unknown).
@@ -1497,17 +1522,45 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
   return nullptr;
 }
 
-static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
-  Type *ITy = Op0->getType();
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *simplifyAndOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
-  ConstantInt *CI1, *CI2;
-  Value *V;
+  Value *A ,*B;
+  if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+      !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+    return nullptr;
+
+  // We have (icmp Pred0, A, B) & (icmp Pred1, A, B).
+  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+  // can eliminate Op1 from this 'and'.
+  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+    return Op0;
 
+  // Check for any combination of predicates that are guaranteed to be disjoint.
+  if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_EQ && ICmpInst::isFalseWhenEqual(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_SLT && Pred1 == ICmpInst::ICMP_SGT) ||
+      (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT))
+    return getFalse(Op0->getType());
+
+  return nullptr;
+}
+
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/true))
     return X;
 
+  if (Value *X = simplifyAndOfICmpsWithSameOperands(Op0, Op1))
+    return X;
+
   // Look for this pattern: (icmp V, C0) & (icmp V, C1)).
+  Type *ITy = Op0->getType();
+  ICmpInst::Predicate Pred0, Pred1;
   const APInt *C0, *C1;
+  Value *V;
   if (match(Op0, m_ICmp(Pred0, m_Value(V), m_APInt(C0))) &&
       match(Op1, m_ICmp(Pred1, m_Specific(V), m_APInt(C1)))) {
     // Make a constant range that's the intersection of the two icmp ranges.
@@ -1518,21 +1571,22 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
       return getFalse(ITy);
   }
 
-  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
-                         m_ConstantInt(CI2))))
+  // (icmp (add V, C0), C1) & (icmp V, C0)
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
     return nullptr;
 
-  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
     return nullptr;
 
   auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  if (AddInst->getOperand(1) != Op1->getOperand(1))
+    return nullptr;
+
   bool isNSW = AddInst->hasNoSignedWrap();
   bool isNUW = AddInst->hasNoUnsignedWrap();
 
-  const APInt &CI1V = CI1->getValue();
-  const APInt &CI2V = CI2->getValue();
-  const APInt Delta = CI2V - CI1V;
-  if (CI1V.isStrictlyPositive()) {
+  const APInt Delta = *C1 - *C0;
+  if (C0->isStrictlyPositive()) {
     if (Delta == 2) {
       if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_SGT)
         return getFalse(ITy);
@@ -1546,7 +1600,7 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
         return getFalse(ITy);
     }
   }
-  if (CI1V.getBoolValue() && isNUW) {
+  if (C0->getBoolValue() && isNUW) {
     if (Delta == 2)
       if (Pred0 == ICmpInst::ICMP_ULT && Pred1 == ICmpInst::ICMP_UGT)
         return getFalse(ITy);
@@ -1680,33 +1734,61 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
                            RecursionLimit);
 }
 
-/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
-/// contains all possible values.
-static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *simplifyOrOfICmpsWithSameOperands(ICmpInst *Op0, ICmpInst *Op1) {
   ICmpInst::Predicate Pred0, Pred1;
-  ConstantInt *CI1, *CI2;
-  Value *V;
+  Value *A ,*B;
+  if (!match(Op0, m_ICmp(Pred0, m_Value(A), m_Value(B))) ||
+      !match(Op1, m_ICmp(Pred1, m_Specific(A), m_Specific(B))))
+    return nullptr;
 
+  // We have (icmp Pred0, A, B) | (icmp Pred1, A, B).
+  // If Op1 is always implied true by Op0, then Op0 is a subset of Op1, and we
+  // can eliminate Op0 from this 'or'.
+  if (ICmpInst::isImpliedTrueByMatchingCmp(Pred0, Pred1))
+    return Op1;
+
+  // Check for any combination of predicates that cover the entire range of
+  // possibilities.
+  if ((Pred0 == ICmpInst::getInversePredicate(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_NE && ICmpInst::isTrueWhenEqual(Pred1)) ||
+      (Pred0 == ICmpInst::ICMP_SLE && Pred1 == ICmpInst::ICMP_SGE) ||
+      (Pred0 == ICmpInst::ICMP_ULE && Pred1 == ICmpInst::ICMP_UGE))
+    return getTrue(Op0->getType());
+
+  return nullptr;
+}
+
+/// Commuted variants are assumed to be handled by calling this function again
+/// with the parameters swapped.
+static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
   if (Value *X = simplifyUnsignedRangeCheck(Op0, Op1, /*IsAnd=*/false))
     return X;
 
-  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_ConstantInt(CI1)),
-                         m_ConstantInt(CI2))))
-   return nullptr;
+  if (Value *X = simplifyOrOfICmpsWithSameOperands(Op0, Op1))
+    return X;
 
-  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Specific(CI1))))
+  // (icmp (add V, C0), C1) | (icmp V, C0)
+  ICmpInst::Predicate Pred0, Pred1;
+  const APInt *C0, *C1;
+  Value *V;
+  if (!match(Op0, m_ICmp(Pred0, m_Add(m_Value(V), m_APInt(C0)), m_APInt(C1))))
     return nullptr;
 
-  Type *ITy = Op0->getType();
+  if (!match(Op1, m_ICmp(Pred1, m_Specific(V), m_Value())))
+    return nullptr;
 
   auto *AddInst = cast<BinaryOperator>(Op0->getOperand(0));
+  if (AddInst->getOperand(1) != Op1->getOperand(1))
+    return nullptr;
+
+  Type *ITy = Op0->getType();
   bool isNSW = AddInst->hasNoSignedWrap();
   bool isNUW = AddInst->hasNoUnsignedWrap();
 
-  const APInt &CI1V = CI1->getValue();
-  const APInt &CI2V = CI2->getValue();
-  const APInt Delta = CI2V - CI1V;
-  if (CI1V.isStrictlyPositive()) {
+  const APInt Delta = *C1 - *C0;
+  if (C0->isStrictlyPositive()) {
     if (Delta == 2) {
       if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_SLE)
         return getTrue(ITy);
@@ -1720,7 +1802,7 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
         return getTrue(ITy);
     }
   }
-  if (CI1V.getBoolValue() && isNUW) {
+  if (C0->getBoolValue() && isNUW) {
     if (Delta == 2)
       if (Pred0 == ICmpInst::ICMP_UGE && Pred1 == ICmpInst::ICMP_ULE)
         return getTrue(ITy);
@@ -2102,8 +2184,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
     GetUnderlyingObjects(RHS, RHSUObjs, DL);
 
     // Is the set of underlying objects all noalias calls?
-    auto IsNAC = [](SmallVectorImpl<Value *> &Objects) {
-      return std::all_of(Objects.begin(), Objects.end(), isNoAliasCall);
+    auto IsNAC = [](ArrayRef<Value *> Objects) {
+      return all_of(Objects, isNoAliasCall);
     };
 
     // Is the set of underlying objects all things which must be disjoint from
@@ -2112,8 +2194,8 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
     // live with the compared-to allocation). For globals, we exclude symbols
     // that might be resolve lazily to symbols in another dynamically-loaded
     // library (and, thus, could be malloc'ed by the implementation).
-    auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
-      return std::all_of(Objects.begin(), Objects.end(), [](Value *V) {
+    auto IsAllocDisjoint = [](ArrayRef<Value *> Objects) {
+      return all_of(Objects, [](Value *V) {
         if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
           return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
         if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
@@ -2150,470 +2232,275 @@ computePointerICmp(const DataLayout &DL, const TargetLibraryInfo *TLI,
   return nullptr;
 }
 
-/// Given operands for an ICmpInst, see if we can fold the result.
-/// If not, this returns null.
-static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
-                               const Query &Q, unsigned MaxRecurse) {
-  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
-  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
-
-  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
-    if (Constant *CRHS = dyn_cast<Constant>(RHS))
-      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
-
-    // If we have a constant, make sure it is on the RHS.
-    std::swap(LHS, RHS);
-    Pred = CmpInst::getSwappedPredicate(Pred);
-  }
-
+/// Fold an icmp when its operands have i1 scalar type.
+static Value *simplifyICmpOfBools(CmpInst::Predicate Pred, Value *LHS,
+                                  Value *RHS, const Query &Q) {
   Type *ITy = GetCompareTy(LHS); // The return type.
   Type *OpTy = LHS->getType();   // The operand type.
+  if (!OpTy->getScalarType()->isIntegerTy(1))
+    return nullptr;
 
-  // icmp X, X -> true/false
-  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
-  // because X could be 0.
-  if (LHS == RHS || isa<UndefValue>(RHS))
-    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
-
-  // Special case logic when the operands have i1 type.
-  if (OpTy->getScalarType()->isIntegerTy(1)) {
-    switch (Pred) {
-    default: break;
-    case ICmpInst::ICMP_EQ:
-      // X == 1 -> X
-      if (match(RHS, m_One()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_NE:
-      // X != 0 -> X
-      if (match(RHS, m_Zero()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_UGT:
-      // X >u 0 -> X
-      if (match(RHS, m_Zero()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_UGE: {
-      // X >=u 1 -> X
-      if (match(RHS, m_One()))
-        return LHS;
-      if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
-        return getTrue(ITy);
-      break;
-    }
-    case ICmpInst::ICMP_SGE: {
-      /// For signed comparison, the values for an i1 are 0 and -1
-      /// respectively. This maps into a truth table of:
-      /// LHS | RHS | LHS >=s RHS   | LHS implies RHS
-      ///  0  |  0  |  1 (0 >= 0)   |  1
-      ///  0  |  1  |  1 (0 >= -1)  |  1
-      ///  1  |  0  |  0 (-1 >= 0)  |  0
-      ///  1  |  1  |  1 (-1 >= -1) |  1
-      if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
-        return getTrue(ITy);
-      break;
-    }
-    case ICmpInst::ICMP_SLT:
-      // X <s 0 -> X
-      if (match(RHS, m_Zero()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_SLE:
-      // X <=s -1 -> X
-      if (match(RHS, m_One()))
-        return LHS;
-      break;
-    case ICmpInst::ICMP_ULE: {
-      if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
-        return getTrue(ITy);
-      break;
-    }
-    }
-  }
-
-  // If we are comparing with zero then try hard since this is a common case.
-  if (match(RHS, m_Zero())) {
-    bool LHSKnownNonNegative, LHSKnownNegative;
-    switch (Pred) {
-    default: llvm_unreachable("Unknown ICmp predicate!");
-    case ICmpInst::ICMP_ULT:
-      return getFalse(ITy);
-    case ICmpInst::ICMP_UGE:
+  switch (Pred) {
+  default:
+    break;
+  case ICmpInst::ICMP_EQ:
+    // X == 1 -> X
+    if (match(RHS, m_One()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_NE:
+    // X != 0 -> X
+    if (match(RHS, m_Zero()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_UGT:
+    // X >u 0 -> X
+    if (match(RHS, m_Zero()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_UGE:
+    // X >=u 1 -> X
+    if (match(RHS, m_One()))
+      return LHS;
+    if (isImpliedCondition(RHS, LHS, Q.DL).getValueOr(false))
       return getTrue(ITy);
-    case ICmpInst::ICMP_EQ:
-    case ICmpInst::ICMP_ULE:
-      if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getFalse(ITy);
-      break;
-    case ICmpInst::ICMP_NE:
-    case ICmpInst::ICMP_UGT:
-      if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getTrue(ITy);
-      break;
-    case ICmpInst::ICMP_SLT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getTrue(ITy);
-      if (LHSKnownNonNegative)
-        return getFalse(ITy);
-      break;
-    case ICmpInst::ICMP_SLE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getTrue(ITy);
-      if (LHSKnownNonNegative &&
-          isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getFalse(ITy);
-      break;
-    case ICmpInst::ICMP_SGE:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getFalse(ITy);
-      if (LHSKnownNonNegative)
-        return getTrue(ITy);
-      break;
-    case ICmpInst::ICMP_SGT:
-      ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
-                     Q.CxtI, Q.DT);
-      if (LHSKnownNegative)
-        return getFalse(ITy);
-      if (LHSKnownNonNegative &&
-          isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
-        return getTrue(ITy);
-      break;
-    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    /// For signed comparison, the values for an i1 are 0 and -1
+    /// respectively. This maps into a truth table of:
+    /// LHS | RHS | LHS >=s RHS   | LHS implies RHS
+    ///  0  |  0  |  1 (0 >= 0)   |  1
+    ///  0  |  1  |  1 (0 >= -1)  |  1
+    ///  1  |  0  |  0 (-1 >= 0)  |  0
+    ///  1  |  1  |  1 (-1 >= -1) |  1
+    if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+      return getTrue(ITy);
+    break;
+  case ICmpInst::ICMP_SLT:
+    // X <s 0 -> X
+    if (match(RHS, m_Zero()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_SLE:
+    // X <=s -1 -> X
+    if (match(RHS, m_One()))
+      return LHS;
+    break;
+  case ICmpInst::ICMP_ULE:
+    if (isImpliedCondition(LHS, RHS, Q.DL).getValueOr(false))
+      return getTrue(ITy);
+    break;
   }
 
-  // See if we are doing a comparison with a constant integer.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-    // Rule out tautological comparisons (eg., ult 0 or uge 0).
-    ConstantRange RHS_CR = ICmpInst::makeConstantRange(Pred, CI->getValue());
-    if (RHS_CR.isEmptySet())
-      return ConstantInt::getFalse(CI->getContext());
-    if (RHS_CR.isFullSet())
-      return ConstantInt::getTrue(CI->getContext());
-
-    // Many binary operators with constant RHS have easy to compute constant
-    // range.  Use them to check whether the comparison is a tautology.
-    unsigned Width = CI->getBitWidth();
-    APInt Lower = APInt(Width, 0);
-    APInt Upper = APInt(Width, 0);
-    ConstantInt *CI2;
-    if (match(LHS, m_URem(m_Value(), m_ConstantInt(CI2)))) {
-      // 'urem x, CI2' produces [0, CI2).
-      Upper = CI2->getValue();
-    } else if (match(LHS, m_SRem(m_Value(), m_ConstantInt(CI2)))) {
-      // 'srem x, CI2' produces (-|CI2|, |CI2|).
-      Upper = CI2->getValue().abs();
-      Lower = (-Upper) + 1;
-    } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) {
-      // 'udiv CI2, x' produces [0, CI2].
-      Upper = CI2->getValue() + 1;
-    } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) {
-      // 'udiv x, CI2' produces [0, UINT_MAX / CI2].
-      APInt NegOne = APInt::getAllOnesValue(Width);
-      if (!CI2->isZero())
-        Upper = NegOne.udiv(CI2->getValue()) + 1;
-    } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) {
-      if (CI2->isMinSignedValue()) {
-        // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
-        Lower = CI2->getValue();
-        Upper = Lower.lshr(1) + 1;
-      } else {
-        // 'sdiv CI2, x' produces [-|CI2|, |CI2|].
-        Upper = CI2->getValue().abs() + 1;
-        Lower = (-Upper) + 1;
-      }
-    } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) {
-      APInt IntMin = APInt::getSignedMinValue(Width);
-      APInt IntMax = APInt::getSignedMaxValue(Width);
-      const APInt &Val = CI2->getValue();
-      if (Val.isAllOnesValue()) {
-        // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
-        //    where CI2 != -1 and CI2 != 0 and CI2 != 1
-        Lower = IntMin + 1;
-        Upper = IntMax + 1;
-      } else if (Val.countLeadingZeros() < Width - 1) {
-        // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]
-        //    where CI2 != -1 and CI2 != 0 and CI2 != 1
-        Lower = IntMin.sdiv(Val);
-        Upper = IntMax.sdiv(Val);
-        if (Lower.sgt(Upper))
-          std::swap(Lower, Upper);
-        Upper = Upper + 1;
-        assert(Upper != Lower && "Upper part of range has wrapped!");
-      }
-    } else if (match(LHS, m_NUWShl(m_ConstantInt(CI2), m_Value()))) {
-      // 'shl nuw CI2, x' produces [CI2, CI2 << CLZ(CI2)]
-      Lower = CI2->getValue();
-      Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
-    } else if (match(LHS, m_NSWShl(m_ConstantInt(CI2), m_Value()))) {
-      if (CI2->isNegative()) {
-        // 'shl nsw CI2, x' produces [CI2 << CLO(CI2)-1, CI2]
-        unsigned ShiftAmount = CI2->getValue().countLeadingOnes() - 1;
-        Lower = CI2->getValue().shl(ShiftAmount);
-        Upper = CI2->getValue() + 1;
-      } else {
-        // 'shl nsw CI2, x' produces [CI2, CI2 << CLZ(CI2)-1]
-        unsigned ShiftAmount = CI2->getValue().countLeadingZeros() - 1;
-        Lower = CI2->getValue();
-        Upper = CI2->getValue().shl(ShiftAmount) + 1;
-      }
-    } else if (match(LHS, m_LShr(m_Value(), m_ConstantInt(CI2)))) {
-      // 'lshr x, CI2' produces [0, UINT_MAX >> CI2].
-      APInt NegOne = APInt::getAllOnesValue(Width);
-      if (CI2->getValue().ult(Width))
-        Upper = NegOne.lshr(CI2->getValue()) + 1;
-    } else if (match(LHS, m_LShr(m_ConstantInt(CI2), m_Value()))) {
-      // 'lshr CI2, x' produces [CI2 >> (Width-1), CI2].
-      unsigned ShiftAmount = Width - 1;
-      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
-        ShiftAmount = CI2->getValue().countTrailingZeros();
-      Lower = CI2->getValue().lshr(ShiftAmount);
-      Upper = CI2->getValue() + 1;
-    } else if (match(LHS, m_AShr(m_Value(), m_ConstantInt(CI2)))) {
-      // 'ashr x, CI2' produces [INT_MIN >> CI2, INT_MAX >> CI2].
-      APInt IntMin = APInt::getSignedMinValue(Width);
-      APInt IntMax = APInt::getSignedMaxValue(Width);
-      if (CI2->getValue().ult(Width)) {
-        Lower = IntMin.ashr(CI2->getValue());
-        Upper = IntMax.ashr(CI2->getValue()) + 1;
-      }
-    } else if (match(LHS, m_AShr(m_ConstantInt(CI2), m_Value()))) {
-      unsigned ShiftAmount = Width - 1;
-      if (!CI2->isZero() && cast<BinaryOperator>(LHS)->isExact())
-        ShiftAmount = CI2->getValue().countTrailingZeros();
-      if (CI2->isNegative()) {
-        // 'ashr CI2, x' produces [CI2, CI2 >> (Width-1)]
-        Lower = CI2->getValue();
-        Upper = CI2->getValue().ashr(ShiftAmount) + 1;
-      } else {
-        // 'ashr CI2, x' produces [CI2 >> (Width-1), CI2]
-        Lower = CI2->getValue().ashr(ShiftAmount);
-        Upper = CI2->getValue() + 1;
-      }
-    } else if (match(LHS, m_Or(m_Value(), m_ConstantInt(CI2)))) {
-      // 'or x, CI2' produces [CI2, UINT_MAX].
-      Lower = CI2->getValue();
-    } else if (match(LHS, m_And(m_Value(), m_ConstantInt(CI2)))) {
-      // 'and x, CI2' produces [0, CI2].
-      Upper = CI2->getValue() + 1;
-    } else if (match(LHS, m_NUWAdd(m_Value(), m_ConstantInt(CI2)))) {
-      // 'add nuw x, CI2' produces [CI2, UINT_MAX].
-      Lower = CI2->getValue();
-    }
-
-    ConstantRange LHS_CR = Lower != Upper ? ConstantRange(Lower, Upper)
-                                          : ConstantRange(Width, true);
+  return nullptr;
+}
 
-    if (auto *I = dyn_cast<Instruction>(LHS))
-      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
-        LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
+/// Try hard to fold icmp with zero RHS because this is a common case.
+static Value *simplifyICmpWithZero(CmpInst::Predicate Pred, Value *LHS,
+                                   Value *RHS, const Query &Q) {
+  if (!match(RHS, m_Zero()))
+    return nullptr;
 
-    if (!LHS_CR.isFullSet()) {
-      if (RHS_CR.contains(LHS_CR))
-        return ConstantInt::getTrue(RHS->getContext());
-      if (RHS_CR.inverse().contains(LHS_CR))
-        return ConstantInt::getFalse(RHS->getContext());
-    }
+  Type *ITy = GetCompareTy(LHS); // The return type.
+  bool LHSKnownNonNegative, LHSKnownNegative;
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unknown ICmp predicate!");
+  case ICmpInst::ICMP_ULT:
+    return getFalse(ITy);
+  case ICmpInst::ICMP_UGE:
+    return getTrue(ITy);
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_ULE:
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getFalse(ITy);
+    break;
+  case ICmpInst::ICMP_NE:
+  case ICmpInst::ICMP_UGT:
+    if (isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getTrue(ITy);
+    break;
+  case ICmpInst::ICMP_SLT:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getTrue(ITy);
+    if (LHSKnownNonNegative)
+      return getFalse(ITy);
+    break;
+  case ICmpInst::ICMP_SLE:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getTrue(ITy);
+    if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getFalse(ITy);
+    break;
+  case ICmpInst::ICMP_SGE:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getFalse(ITy);
+    if (LHSKnownNonNegative)
+      return getTrue(ITy);
+    break;
+  case ICmpInst::ICMP_SGT:
+    ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, Q.DL, 0, Q.AC,
+                   Q.CxtI, Q.DT);
+    if (LHSKnownNegative)
+      return getFalse(ITy);
+    if (LHSKnownNonNegative && isKnownNonZero(LHS, Q.DL, 0, Q.AC, Q.CxtI, Q.DT))
+      return getTrue(ITy);
+    break;
   }
 
-  // If both operands have range metadata, use the metadata
-  // to simplify the comparison.
-  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
-    auto RHS_Instr = dyn_cast<Instruction>(RHS);
-    auto LHS_Instr = dyn_cast<Instruction>(LHS);
-
-    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
-        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
-      auto RHS_CR = getConstantRangeFromMetadata(
-          *RHS_Instr->getMetadata(LLVMContext::MD_range));
-      auto LHS_CR = getConstantRangeFromMetadata(
-          *LHS_Instr->getMetadata(LLVMContext::MD_range));
+  return nullptr;
+}
 
-      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
-      if (Satisfied_CR.contains(LHS_CR))
-        return ConstantInt::getTrue(RHS->getContext());
+static Value *simplifyICmpWithConstant(CmpInst::Predicate Pred, Value *LHS,
+                                       Value *RHS) {
+  const APInt *C;
+  if (!match(RHS, m_APInt(C)))
+    return nullptr;
 
-      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
-                CmpInst::getInversePredicate(Pred), RHS_CR);
-      if (InversedSatisfied_CR.contains(LHS_CR))
-        return ConstantInt::getFalse(RHS->getContext());
+  // Rule out tautological comparisons (eg., ult 0 or uge 0).
+  ConstantRange RHS_CR = ConstantRange::makeExactICmpRegion(Pred, *C);
+  if (RHS_CR.isEmptySet())
+    return ConstantInt::getFalse(GetCompareTy(RHS));
+  if (RHS_CR.isFullSet())
+    return ConstantInt::getTrue(GetCompareTy(RHS));
+
+  // Many binary operators with constant RHS have easy to compute constant
+  // range.  Use them to check whether the comparison is a tautology.
+  unsigned Width = C->getBitWidth();
+  APInt Lower = APInt(Width, 0);
+  APInt Upper = APInt(Width, 0);
+  const APInt *C2;
+  if (match(LHS, m_URem(m_Value(), m_APInt(C2)))) {
+    // 'urem x, C2' produces [0, C2).
+    Upper = *C2;
+  } else if (match(LHS, m_SRem(m_Value(), m_APInt(C2)))) {
+    // 'srem x, C2' produces (-|C2|, |C2|).
+    Upper = C2->abs();
+    Lower = (-Upper) + 1;
+  } else if (match(LHS, m_UDiv(m_APInt(C2), m_Value()))) {
+    // 'udiv C2, x' produces [0, C2].
+    Upper = *C2 + 1;
+  } else if (match(LHS, m_UDiv(m_Value(), m_APInt(C2)))) {
+    // 'udiv x, C2' produces [0, UINT_MAX / C2].
+    APInt NegOne = APInt::getAllOnesValue(Width);
+    if (*C2 != 0)
+      Upper = NegOne.udiv(*C2) + 1;
+  } else if (match(LHS, m_SDiv(m_APInt(C2), m_Value()))) {
+    if (C2->isMinSignedValue()) {
+      // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2].
+      Lower = *C2;
+      Upper = Lower.lshr(1) + 1;
+    } else {
+      // 'sdiv C2, x' produces [-|C2|, |C2|].
+      Upper = C2->abs() + 1;
+      Lower = (-Upper) + 1;
     }
-  }
-
-  // Compare of cast, for example (zext X) != 0 -> X != 0
-  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
-    Instruction *LI = cast<CastInst>(LHS);
-    Value *SrcOp = LI->getOperand(0);
-    Type *SrcTy = SrcOp->getType();
-    Type *DstTy = LI->getType();
-
-    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
-    // if the integer type is the same size as the pointer type.
-    if (MaxRecurse && isa<PtrToIntInst>(LI) &&
-        Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
-      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
-        // Transfer the cast to the constant.
-        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
-                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
-                                        Q, MaxRecurse-1))
-          return V;
-      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
-        if (RI->getOperand(0)->getType() == SrcTy)
-          // Compare without the cast.
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          Q, MaxRecurse-1))
-            return V;
-      }
+  } else if (match(LHS, m_SDiv(m_Value(), m_APInt(C2)))) {
+    APInt IntMin = APInt::getSignedMinValue(Width);
+    APInt IntMax = APInt::getSignedMaxValue(Width);
+    if (C2->isAllOnesValue()) {
+      // 'sdiv x, -1' produces [INT_MIN + 1, INT_MAX]
+      //    where C2 != -1 and C2 != 0 and C2 != 1
+      Lower = IntMin + 1;
+      Upper = IntMax + 1;
+    } else if (C2->countLeadingZeros() < Width - 1) {
+      // 'sdiv x, C2' produces [INT_MIN / C2, INT_MAX / C2]
+      //    where C2 != -1 and C2 != 0 and C2 != 1
+      Lower = IntMin.sdiv(*C2);
+      Upper = IntMax.sdiv(*C2);
+      if (Lower.sgt(Upper))
+        std::swap(Lower, Upper);
+      Upper = Upper + 1;
+      assert(Upper != Lower && "Upper part of range has wrapped!");
     }
-
-    if (isa<ZExtInst>(LHS)) {
-      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
-      // same type.
-      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
-        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
-          // Compare X and Y.  Note that signed predicates become unsigned.
-          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                          SrcOp, RI->getOperand(0), Q,
-                                          MaxRecurse-1))
-            return V;
-      }
-      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
-      // too.  If not, then try to deduce the result of the comparison.
-      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-        // Compute the constant that would happen if we truncated to SrcTy then
-        // reextended to DstTy.
-        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
-        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
-
-        // If the re-extended constant didn't change then this is effectively
-        // also a case of comparing two zero-extended values.
-        if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
-                                        SrcOp, Trunc, Q, MaxRecurse-1))
-            return V;
-
-        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
-        // there.  Use this to work out the result of the comparison.
-        if (RExt != CI) {
-          switch (Pred) {
-          default: llvm_unreachable("Unknown ICmp predicate!");
-          // LHS <u RHS.
-          case ICmpInst::ICMP_EQ:
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_UGE:
-            return ConstantInt::getFalse(CI->getContext());
-
-          case ICmpInst::ICMP_NE:
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_ULE:
-            return ConstantInt::getTrue(CI->getContext());
-
-          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
-          // is non-negative then LHS <s RHS.
-          case ICmpInst::ICMP_SGT:
-          case ICmpInst::ICMP_SGE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getTrue(CI->getContext()) :
-              ConstantInt::getFalse(CI->getContext());
-
-          case ICmpInst::ICMP_SLT:
-          case ICmpInst::ICMP_SLE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getFalse(CI->getContext()) :
-              ConstantInt::getTrue(CI->getContext());
-          }
-        }
-      }
+  } else if (match(LHS, m_NUWShl(m_APInt(C2), m_Value()))) {
+    // 'shl nuw C2, x' produces [C2, C2 << CLZ(C2)]
+    Lower = *C2;
+    Upper = Lower.shl(Lower.countLeadingZeros()) + 1;
+  } else if (match(LHS, m_NSWShl(m_APInt(C2), m_Value()))) {
+    if (C2->isNegative()) {
+      // 'shl nsw C2, x' produces [C2 << CLO(C2)-1, C2]
+      unsigned ShiftAmount = C2->countLeadingOnes() - 1;
+      Lower = C2->shl(ShiftAmount);
+      Upper = *C2 + 1;
+    } else {
+      // 'shl nsw C2, x' produces [C2, C2 << CLZ(C2)-1]
+      unsigned ShiftAmount = C2->countLeadingZeros() - 1;
+      Lower = *C2;
+      Upper = C2->shl(ShiftAmount) + 1;
     }
+  } else if (match(LHS, m_LShr(m_Value(), m_APInt(C2)))) {
+    // 'lshr x, C2' produces [0, UINT_MAX >> C2].
+    APInt NegOne = APInt::getAllOnesValue(Width);
+    if (C2->ult(Width))
+      Upper = NegOne.lshr(*C2) + 1;
+  } else if (match(LHS, m_LShr(m_APInt(C2), m_Value()))) {
+    // 'lshr C2, x' produces [C2 >> (Width-1), C2].
+    unsigned ShiftAmount = Width - 1;
+    if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
+      ShiftAmount = C2->countTrailingZeros();
+    Lower = C2->lshr(ShiftAmount);
+    Upper = *C2 + 1;
+  } else if (match(LHS, m_AShr(m_Value(), m_APInt(C2)))) {
+    // 'ashr x, C2' produces [INT_MIN >> C2, INT_MAX >> C2].
+    APInt IntMin = APInt::getSignedMinValue(Width);
+    APInt IntMax = APInt::getSignedMaxValue(Width);
+    if (C2->ult(Width)) {
+      Lower = IntMin.ashr(*C2);
+      Upper = IntMax.ashr(*C2) + 1;
+    }
+  } else if (match(LHS, m_AShr(m_APInt(C2), m_Value()))) {
+    unsigned ShiftAmount = Width - 1;
+    if (*C2 != 0 && cast<BinaryOperator>(LHS)->isExact())
+      ShiftAmount = C2->countTrailingZeros();
+    if (C2->isNegative()) {
+      // 'ashr C2, x' produces [C2, C2 >> (Width-1)]
+      Lower = *C2;
+      Upper = C2->ashr(ShiftAmount) + 1;
+    } else {
+      // 'ashr C2, x' produces [C2 >> (Width-1), C2]
+      Lower = C2->ashr(ShiftAmount);
+      Upper = *C2 + 1;
+    }
+  } else if (match(LHS, m_Or(m_Value(), m_APInt(C2)))) {
+    // 'or x, C2' produces [C2, UINT_MAX].
+    Lower = *C2;
+  } else if (match(LHS, m_And(m_Value(), m_APInt(C2)))) {
+    // 'and x, C2' produces [0, C2].
+    Upper = *C2 + 1;
+  } else if (match(LHS, m_NUWAdd(m_Value(), m_APInt(C2)))) {
+    // 'add nuw x, C2' produces [C2, UINT_MAX].
+    Lower = *C2;
+  }
 
-    if (isa<SExtInst>(LHS)) {
-      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
-      // same type.
-      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
-        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
-          // Compare X and Y.  Note that the predicate does not change.
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
-                                          Q, MaxRecurse-1))
-            return V;
-      }
-      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
-      // too.  If not, then try to deduce the result of the comparison.
-      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-        // Compute the constant that would happen if we truncated to SrcTy then
-        // reextended to DstTy.
-        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
-        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
-
-        // If the re-extended constant didn't change then this is effectively
-        // also a case of comparing two sign-extended values.
-        if (RExt == CI && MaxRecurse)
-          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
-            return V;
-
-        // Otherwise the upper bits of LHS are all equal, while RHS has varying
-        // bits there.  Use this to work out the result of the comparison.
-        if (RExt != CI) {
-          switch (Pred) {
-          default: llvm_unreachable("Unknown ICmp predicate!");
-          case ICmpInst::ICMP_EQ:
-            return ConstantInt::getFalse(CI->getContext());
-          case ICmpInst::ICMP_NE:
-            return ConstantInt::getTrue(CI->getContext());
+  ConstantRange LHS_CR =
+      Lower != Upper ? ConstantRange(Lower, Upper) : ConstantRange(Width, true);
 
-          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
-          // LHS >s RHS.
-          case ICmpInst::ICMP_SGT:
-          case ICmpInst::ICMP_SGE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getTrue(CI->getContext()) :
-              ConstantInt::getFalse(CI->getContext());
-          case ICmpInst::ICMP_SLT:
-          case ICmpInst::ICMP_SLE:
-            return CI->getValue().isNegative() ?
-              ConstantInt::getFalse(CI->getContext()) :
-              ConstantInt::getTrue(CI->getContext());
+  if (auto *I = dyn_cast<Instruction>(LHS))
+    if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+      LHS_CR = LHS_CR.intersectWith(getConstantRangeFromMetadata(*Ranges));
 
-          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
-          // LHS >u RHS.
-          case ICmpInst::ICMP_UGT:
-          case ICmpInst::ICMP_UGE:
-            // Comparison is true iff the LHS <s 0.
-            if (MaxRecurse)
-              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
-                                              Constant::getNullValue(SrcTy),
-                                              Q, MaxRecurse-1))
-                return V;
-            break;
-          case ICmpInst::ICMP_ULT:
-          case ICmpInst::ICMP_ULE:
-            // Comparison is true iff the LHS >=s 0.
-            if (MaxRecurse)
-              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
-                                              Constant::getNullValue(SrcTy),
-                                              Q, MaxRecurse-1))
-                return V;
-            break;
-          }
-        }
-      }
-    }
+  if (!LHS_CR.isFullSet()) {
+    if (RHS_CR.contains(LHS_CR))
+      return ConstantInt::getTrue(GetCompareTy(RHS));
+    if (RHS_CR.inverse().contains(LHS_CR))
+      return ConstantInt::getFalse(GetCompareTy(RHS));
   }
 
-  // icmp eq|ne X, Y -> false|true if X != Y
-  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
-      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
-    LLVMContext &Ctx = LHS->getType()->getContext();
-    return Pred == ICmpInst::ICMP_NE ?
-      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
-  }
+  return nullptr;
+}
+
+static Value *simplifyICmpWithBinOp(CmpInst::Predicate Pred, Value *LHS,
+                                    Value *RHS, const Query &Q,
+                                    unsigned MaxRecurse) {
+  Type *ITy = GetCompareTy(LHS); // The return type.
 
-  // Special logic for binary operators.
   BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS);
   BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS);
   if (MaxRecurse && (LBO || RBO)) {
@@ -2622,35 +2509,39 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // LHS = A + B (or A and B are null); RHS = C + D (or C and D are null).
     bool NoLHSWrapProblem = false, NoRHSWrapProblem = false;
     if (LBO && LBO->getOpcode() == Instruction::Add) {
-      A = LBO->getOperand(0); B = LBO->getOperand(1);
-      NoLHSWrapProblem = ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
+      A = LBO->getOperand(0);
+      B = LBO->getOperand(1);
+      NoLHSWrapProblem =
+          ICmpInst::isEquality(Pred) ||
+          (CmpInst::isUnsigned(Pred) && LBO->hasNoUnsignedWrap()) ||
+          (CmpInst::isSigned(Pred) && LBO->hasNoSignedWrap());
     }
     if (RBO && RBO->getOpcode() == Instruction::Add) {
-      C = RBO->getOperand(0); D = RBO->getOperand(1);
-      NoRHSWrapProblem = ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
+      C = RBO->getOperand(0);
+      D = RBO->getOperand(1);
+      NoRHSWrapProblem =
+          ICmpInst::isEquality(Pred) ||
+          (CmpInst::isUnsigned(Pred) && RBO->hasNoUnsignedWrap()) ||
+          (CmpInst::isSigned(Pred) && RBO->hasNoSignedWrap());
     }
 
     // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
     if ((A == RHS || B == RHS) && NoLHSWrapProblem)
       if (Value *V = SimplifyICmpInst(Pred, A == RHS ? B : A,
-                                      Constant::getNullValue(RHS->getType()),
-                                      Q, MaxRecurse-1))
+                                      Constant::getNullValue(RHS->getType()), Q,
+                                      MaxRecurse - 1))
         return V;
 
     // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
     if ((C == LHS || D == LHS) && NoRHSWrapProblem)
-      if (Value *V = SimplifyICmpInst(Pred,
-                                      Constant::getNullValue(LHS->getType()),
-                                      C == LHS ? D : C, Q, MaxRecurse-1))
+      if (Value *V =
+              SimplifyICmpInst(Pred, Constant::getNullValue(LHS->getType()),
+                               C == LHS ? D : C, Q, MaxRecurse - 1))
         return V;
 
     // icmp (X+Y), (X+Z) -> icmp Y,Z for equalities or if there is no overflow.
-    if (A && C && (A == C || A == D || B == C || B == D) &&
-        NoLHSWrapProblem && NoRHSWrapProblem) {
+    if (A && C && (A == C || A == D || B == C || B == D) && NoLHSWrapProblem &&
+        NoRHSWrapProblem) {
       // Determine Y and Z in the form icmp (X+Y), (X+Z).
       Value *Y, *Z;
       if (A == C) {
@@ -2671,7 +2562,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         Y = A;
         Z = C;
       }
-      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse-1))
+      if (Value *V = SimplifyICmpInst(Pred, Y, Z, Q, MaxRecurse - 1))
         return V;
     }
   }
@@ -2771,7 +2662,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
@@ -2782,7 +2673,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
@@ -2802,7 +2693,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_NE:
     case ICmpInst::ICMP_UGT:
     case ICmpInst::ICMP_UGE:
@@ -2813,7 +2704,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
                      Q.CxtI, Q.DT);
       if (!KnownNonNegative)
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_EQ:
     case ICmpInst::ICMP_ULT:
     case ICmpInst::ICMP_ULE:
@@ -2832,6 +2723,17 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getTrue(ITy);
   }
 
+  // x >=u x >> y
+  // x >=u x udiv y.
+  if (RBO && (match(RBO, m_LShr(m_Specific(LHS), m_Value())) ||
+              match(RBO, m_UDiv(m_Specific(LHS), m_Value())))) {
+    // icmp pred X, (X op Y)
+    if (Pred == ICmpInst::ICMP_ULT)
+      return getFalse(ITy);
+    if (Pred == ICmpInst::ICMP_UGE)
+      return getTrue(ITy);
+  }
+
   // handle:
   //   CI2 << X == CI
   //   CI2 << X != CI
@@ -2870,18 +2772,19 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() &&
       LBO->getOperand(1) == RBO->getOperand(1)) {
     switch (LBO->getOpcode()) {
-    default: break;
+    default:
+      break;
     case Instruction::UDiv:
     case Instruction::LShr:
       if (ICmpInst::isSigned(Pred))
         break;
-      // fall-through
+      LLVM_FALLTHROUGH;
     case Instruction::SDiv:
     case Instruction::AShr:
       if (!LBO->isExact() || !RBO->isExact())
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), Q, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     case Instruction::Shl: {
@@ -2892,40 +2795,51 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       if (!NSW && ICmpInst::isSigned(Pred))
         break;
       if (Value *V = SimplifyICmpInst(Pred, LBO->getOperand(0),
-                                      RBO->getOperand(0), Q, MaxRecurse-1))
+                                      RBO->getOperand(0), Q, MaxRecurse - 1))
         return V;
       break;
     }
     }
   }
+  return nullptr;
+}
 
-  // Simplify comparisons involving max/min.
+/// Simplify integer comparisons where at least one operand of the compare
+/// matches an integer min/max idiom.
+static Value *simplifyICmpWithMinMax(CmpInst::Predicate Pred, Value *LHS,
+                                     Value *RHS, const Query &Q,
+                                     unsigned MaxRecurse) {
+  Type *ITy = GetCompareTy(LHS); // The return type.
   Value *A, *B;
   CmpInst::Predicate P = CmpInst::BAD_ICMP_PREDICATE;
   CmpInst::Predicate EqP; // Chosen so that "A == max/min(A,B)" iff "A EqP B".
 
   // Signed variants on "max(a,b)>=a -> true".
   if (match(LHS, m_SMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // smax(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // smax(A, B) pred A.
     EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
     // We analyze this as smax(A, B) pred A.
     P = Pred;
   } else if (match(RHS, m_SMax(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred smax(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred smax(A, B).
     EqP = CmpInst::ICMP_SGE; // "A == smax(A, B)" iff "A sge B".
     // We analyze this as smax(A, B) swapped-pred A.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(LHS, m_SMin(m_Value(A), m_Value(B))) &&
              (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // smin(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // smin(A, B) pred A.
     EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
     // We analyze this as smax(-A, -B) swapped-pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(RHS, m_SMin(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred smin(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred smin(A, B).
     EqP = CmpInst::ICMP_SLE; // "A == smin(A, B)" iff "A sle B".
     // We analyze this as smax(-A, -B) pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
@@ -2946,7 +2860,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -2960,7 +2874,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     }
@@ -2976,26 +2890,30 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // Unsigned variants on "max(a,b)>=a -> true".
   P = CmpInst::BAD_ICMP_PREDICATE;
   if (match(LHS, m_UMax(m_Value(A), m_Value(B))) && (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // umax(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // umax(A, B) pred A.
     EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
     // We analyze this as umax(A, B) pred A.
     P = Pred;
   } else if (match(RHS, m_UMax(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred umax(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred umax(A, B).
     EqP = CmpInst::ICMP_UGE; // "A == umax(A, B)" iff "A uge B".
     // We analyze this as umax(A, B) swapped-pred A.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(LHS, m_UMin(m_Value(A), m_Value(B))) &&
              (A == RHS || B == RHS)) {
-    if (A != RHS) std::swap(A, B); // umin(A, B) pred A.
+    if (A != RHS)
+      std::swap(A, B);       // umin(A, B) pred A.
     EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
     // We analyze this as umax(-A, -B) swapped-pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
     P = CmpInst::getSwappedPredicate(Pred);
   } else if (match(RHS, m_UMin(m_Value(A), m_Value(B))) &&
              (A == LHS || B == LHS)) {
-    if (A != LHS) std::swap(A, B); // A pred umin(A, B).
+    if (A != LHS)
+      std::swap(A, B);       // A pred umin(A, B).
     EqP = CmpInst::ICMP_ULE; // "A == umin(A, B)" iff "A ule B".
     // We analyze this as umax(-A, -B) pred -A.
     // Note that we do not need to actually form -A or -B thanks to EqP.
@@ -3016,7 +2934,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A EqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(EqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     case CmpInst::ICMP_NE:
@@ -3030,7 +2948,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         return V;
       // Otherwise, see if "A InvEqP B" simplifies.
       if (MaxRecurse)
-        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse-1))
+        if (Value *V = SimplifyICmpInst(InvEqP, A, B, Q, MaxRecurse - 1))
           return V;
       break;
     }
@@ -3087,11 +3005,254 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       return getFalse(ITy);
   }
 
+  return nullptr;
+}
+
+/// Given operands for an ICmpInst, see if we can fold the result.
+/// If not, this returns null.
+static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+                               const Query &Q, unsigned MaxRecurse) {
+  CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+  assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+  if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+    if (Constant *CRHS = dyn_cast<Constant>(RHS))
+      return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, Q.DL, Q.TLI);
+
+    // If we have a constant, make sure it is on the RHS.
+    std::swap(LHS, RHS);
+    Pred = CmpInst::getSwappedPredicate(Pred);
+  }
+
+  Type *ITy = GetCompareTy(LHS); // The return type.
+
+  // icmp X, X -> true/false
+  // X icmp undef -> true/false.  For example, icmp ugt %X, undef -> false
+  // because X could be 0.
+  if (LHS == RHS || isa<UndefValue>(RHS))
+    return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+  if (Value *V = simplifyICmpOfBools(Pred, LHS, RHS, Q))
+    return V;
+
+  if (Value *V = simplifyICmpWithZero(Pred, LHS, RHS, Q))
+    return V;
+
+  if (Value *V = simplifyICmpWithConstant(Pred, LHS, RHS))
+    return V;
+
+  // If both operands have range metadata, use the metadata
+  // to simplify the comparison.
+  if (isa<Instruction>(RHS) && isa<Instruction>(LHS)) {
+    auto RHS_Instr = dyn_cast<Instruction>(RHS);
+    auto LHS_Instr = dyn_cast<Instruction>(LHS);
+
+    if (RHS_Instr->getMetadata(LLVMContext::MD_range) &&
+        LHS_Instr->getMetadata(LLVMContext::MD_range)) {
+      auto RHS_CR = getConstantRangeFromMetadata(
+          *RHS_Instr->getMetadata(LLVMContext::MD_range));
+      auto LHS_CR = getConstantRangeFromMetadata(
+          *LHS_Instr->getMetadata(LLVMContext::MD_range));
+
+      auto Satisfied_CR = ConstantRange::makeSatisfyingICmpRegion(Pred, RHS_CR);
+      if (Satisfied_CR.contains(LHS_CR))
+        return ConstantInt::getTrue(RHS->getContext());
+
+      auto InversedSatisfied_CR = ConstantRange::makeSatisfyingICmpRegion(
+                CmpInst::getInversePredicate(Pred), RHS_CR);
+      if (InversedSatisfied_CR.contains(LHS_CR))
+        return ConstantInt::getFalse(RHS->getContext());
+    }
+  }
+
+  // Compare of cast, for example (zext X) != 0 -> X != 0
+  if (isa<CastInst>(LHS) && (isa<Constant>(RHS) || isa<CastInst>(RHS))) {
+    Instruction *LI = cast<CastInst>(LHS);
+    Value *SrcOp = LI->getOperand(0);
+    Type *SrcTy = SrcOp->getType();
+    Type *DstTy = LI->getType();
+
+    // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input
+    // if the integer type is the same size as the pointer type.
+    if (MaxRecurse && isa<PtrToIntInst>(LI) &&
+        Q.DL.getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) {
+      if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+        // Transfer the cast to the constant.
+        if (Value *V = SimplifyICmpInst(Pred, SrcOp,
+                                        ConstantExpr::getIntToPtr(RHSC, SrcTy),
+                                        Q, MaxRecurse-1))
+          return V;
+      } else if (PtrToIntInst *RI = dyn_cast<PtrToIntInst>(RHS)) {
+        if (RI->getOperand(0)->getType() == SrcTy)
+          // Compare without the cast.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          Q, MaxRecurse-1))
+            return V;
+      }
+    }
+
+    if (isa<ZExtInst>(LHS)) {
+      // Turn icmp (zext X), (zext Y) into a compare of X and Y if they have the
+      // same type.
+      if (ZExtInst *RI = dyn_cast<ZExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that signed predicates become unsigned.
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                          SrcOp, RI->getOperand(0), Q,
+                                          MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (zext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::ZExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two zero-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(ICmpInst::getUnsignedPredicate(Pred),
+                                        SrcOp, Trunc, Q, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are zero while RHS has a non-zero bit
+        // there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default: llvm_unreachable("Unknown ICmp predicate!");
+          // LHS <u RHS.
+          case ICmpInst::ICMP_EQ:
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            return ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_NE:
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // LHS is non-negative.  If RHS is negative then LHS >s LHS.  If RHS
+          // is non-negative then LHS <s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+          }
+        }
+      }
+    }
+
+    if (isa<SExtInst>(LHS)) {
+      // Turn icmp (sext X), (sext Y) into a compare of X and Y if they have the
+      // same type.
+      if (SExtInst *RI = dyn_cast<SExtInst>(RHS)) {
+        if (MaxRecurse && SrcTy == RI->getOperand(0)->getType())
+          // Compare X and Y.  Note that the predicate does not change.
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, RI->getOperand(0),
+                                          Q, MaxRecurse-1))
+            return V;
+      }
+      // Turn icmp (sext X), Cst into a compare of X and Cst if Cst is extended
+      // too.  If not, then try to deduce the result of the comparison.
+      else if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+        // Compute the constant that would happen if we truncated to SrcTy then
+        // reextended to DstTy.
+        Constant *Trunc = ConstantExpr::getTrunc(CI, SrcTy);
+        Constant *RExt = ConstantExpr::getCast(CastInst::SExt, Trunc, DstTy);
+
+        // If the re-extended constant didn't change then this is effectively
+        // also a case of comparing two sign-extended values.
+        if (RExt == CI && MaxRecurse)
+          if (Value *V = SimplifyICmpInst(Pred, SrcOp, Trunc, Q, MaxRecurse-1))
+            return V;
+
+        // Otherwise the upper bits of LHS are all equal, while RHS has varying
+        // bits there.  Use this to work out the result of the comparison.
+        if (RExt != CI) {
+          switch (Pred) {
+          default: llvm_unreachable("Unknown ICmp predicate!");
+          case ICmpInst::ICMP_EQ:
+            return ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_NE:
+            return ConstantInt::getTrue(CI->getContext());
+
+          // If RHS is non-negative then LHS <s RHS.  If RHS is negative then
+          // LHS >s RHS.
+          case ICmpInst::ICMP_SGT:
+          case ICmpInst::ICMP_SGE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getTrue(CI->getContext()) :
+              ConstantInt::getFalse(CI->getContext());
+          case ICmpInst::ICMP_SLT:
+          case ICmpInst::ICMP_SLE:
+            return CI->getValue().isNegative() ?
+              ConstantInt::getFalse(CI->getContext()) :
+              ConstantInt::getTrue(CI->getContext());
+
+          // If LHS is non-negative then LHS <u RHS.  If LHS is negative then
+          // LHS >u RHS.
+          case ICmpInst::ICMP_UGT:
+          case ICmpInst::ICMP_UGE:
+            // Comparison is true iff the LHS <s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SLT, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              Q, MaxRecurse-1))
+                return V;
+            break;
+          case ICmpInst::ICMP_ULT:
+          case ICmpInst::ICMP_ULE:
+            // Comparison is true iff the LHS >=s 0.
+            if (MaxRecurse)
+              if (Value *V = SimplifyICmpInst(ICmpInst::ICMP_SGE, SrcOp,
+                                              Constant::getNullValue(SrcTy),
+                                              Q, MaxRecurse-1))
+                return V;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  // icmp eq|ne X, Y -> false|true if X != Y
+  if ((Pred == ICmpInst::ICMP_EQ || Pred == ICmpInst::ICMP_NE) &&
+      isKnownNonEqual(LHS, RHS, Q.DL, Q.AC, Q.CxtI, Q.DT)) {
+    LLVMContext &Ctx = LHS->getType()->getContext();
+    return Pred == ICmpInst::ICMP_NE ?
+      ConstantInt::getTrue(Ctx) : ConstantInt::getFalse(Ctx);
+  }
+
+  if (Value *V = simplifyICmpWithBinOp(Pred, LHS, RHS, Q, MaxRecurse))
+    return V;
+
+  if (Value *V = simplifyICmpWithMinMax(Pred, LHS, RHS, Q, MaxRecurse))
+    return V;
+
   // Simplify comparisons of related pointers using a powerful, recursive
   // GEP-walk when we have target data available..
   if (LHS->getType()->isPointerTy())
     if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI, LHS, RHS))
       return C;
+  if (auto *CLHS = dyn_cast<PtrToIntOperator>(LHS))
+    if (auto *CRHS = dyn_cast<PtrToIntOperator>(RHS))
+      if (Q.DL.getTypeSizeInBits(CLHS->getPointerOperandType()) ==
+              Q.DL.getTypeSizeInBits(CLHS->getType()) &&
+          Q.DL.getTypeSizeInBits(CRHS->getPointerOperandType()) ==
+              Q.DL.getTypeSizeInBits(CRHS->getType()))
+        if (auto *C = computePointerICmp(Q.DL, Q.TLI, Q.DT, Pred, Q.CxtI,
+                                         CLHS->getPointerOperand(),
+                                         CRHS->getPointerOperand()))
+          return C;
 
   if (GetElementPtrInst *GLHS = dyn_cast<GetElementPtrInst>(LHS)) {
     if (GEPOperator *GRHS = dyn_cast<GEPOperator>(RHS)) {
@@ -3119,17 +3280,16 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   // If a bit is known to be zero for A and known to be one for B,
   // then A and B cannot be equal.
   if (ICmpInst::isEquality(Pred)) {
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-      uint32_t BitWidth = CI->getBitWidth();
+    const APInt *RHSVal;
+    if (match(RHS, m_APInt(RHSVal))) {
+      unsigned BitWidth = RHSVal->getBitWidth();
       APInt LHSKnownZero(BitWidth, 0);
       APInt LHSKnownOne(BitWidth, 0);
       computeKnownBits(LHS, LHSKnownZero, LHSKnownOne, Q.DL, /*Depth=*/0, Q.AC,
                        Q.CxtI, Q.DT);
-      const APInt &RHSVal = CI->getValue();
-      if (((LHSKnownZero & RHSVal) != 0) || ((LHSKnownOne & ~RHSVal) != 0))
-        return Pred == ICmpInst::ICMP_EQ
-                   ? ConstantInt::getFalse(CI->getContext())
-                   : ConstantInt::getTrue(CI->getContext());
+      if (((LHSKnownZero & *RHSVal) != 0) || ((LHSKnownOne & ~(*RHSVal)) != 0))
+        return Pred == ICmpInst::ICMP_EQ ? ConstantInt::getFalse(ITy)
+                                         : ConstantInt::getTrue(ITy);
     }
   }
 
@@ -3175,17 +3335,18 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   }
 
   // Fold trivial predicates.
+  Type *RetTy = GetCompareTy(LHS);
   if (Pred == FCmpInst::FCMP_FALSE)
-    return ConstantInt::get(GetCompareTy(LHS), 0);
+    return getFalse(RetTy);
   if (Pred == FCmpInst::FCMP_TRUE)
-    return ConstantInt::get(GetCompareTy(LHS), 1);
+    return getTrue(RetTy);
 
   // UNO/ORD predicates can be trivially folded if NaNs are ignored.
   if (FMF.noNaNs()) {
     if (Pred == FCmpInst::FCMP_UNO)
-      return ConstantInt::get(GetCompareTy(LHS), 0);
+      return getFalse(RetTy);
     if (Pred == FCmpInst::FCMP_ORD)
-      return ConstantInt::get(GetCompareTy(LHS), 1);
+      return getTrue(RetTy);
   }
 
   // fcmp pred x, undef  and  fcmp pred undef, x
@@ -3193,15 +3354,15 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
   if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) {
     // Choosing NaN for the undef will always make unordered comparison succeed
     // and ordered comparison fail.
-    return ConstantInt::get(GetCompareTy(LHS), CmpInst::isUnordered(Pred));
+    return ConstantInt::get(RetTy, CmpInst::isUnordered(Pred));
   }
 
   // fcmp x,x -> true/false.  Not all compares are foldable.
   if (LHS == RHS) {
     if (CmpInst::isTrueWhenEqual(Pred))
-      return ConstantInt::get(GetCompareTy(LHS), 1);
+      return getTrue(RetTy);
     if (CmpInst::isFalseWhenEqual(Pred))
-      return ConstantInt::get(GetCompareTy(LHS), 0);
+      return getFalse(RetTy);
   }
 
   // Handle fcmp with constant RHS
@@ -3216,11 +3377,11 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
     // If the constant is a nan, see if we can fold the comparison based on it.
     if (CFP->getValueAPF().isNaN()) {
       if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
-        return ConstantInt::getFalse(CFP->getContext());
+        return getFalse(RetTy);
       assert(FCmpInst::isUnordered(Pred) &&
              "Comparison must be either ordered or unordered!");
       // True if unordered.
-      return ConstantInt::get(GetCompareTy(LHS), 1);
+      return getTrue(RetTy);
     }
     // Check whether the constant is an infinity.
     if (CFP->getValueAPF().isInfinity()) {
@@ -3228,10 +3389,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         switch (Pred) {
         case FCmpInst::FCMP_OLT:
           // No value is ordered and less than negative infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 0);
+          return getFalse(RetTy);
         case FCmpInst::FCMP_UGE:
           // All values are unordered with or at least negative infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 1);
+          return getTrue(RetTy);
         default:
           break;
         }
@@ -3239,10 +3400,10 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
         switch (Pred) {
         case FCmpInst::FCMP_OGT:
           // No value is ordered and greater than infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 0);
+          return getFalse(RetTy);
         case FCmpInst::FCMP_ULE:
           // All values are unordered with and at most infinity.
-          return ConstantInt::get(GetCompareTy(LHS), 1);
+          return getTrue(RetTy);
         default:
           break;
         }
@@ -3252,12 +3413,12 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
       switch (Pred) {
       case FCmpInst::FCMP_UGE:
         if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return ConstantInt::get(GetCompareTy(LHS), 1);
+          return getTrue(RetTy);
         break;
       case FCmpInst::FCMP_OLT:
         // X < 0
         if (CannotBeOrderedLessThanZero(LHS, Q.TLI))
-          return ConstantInt::get(GetCompareTy(LHS), 0);
+          return getFalse(RetTy);
         break;
       default:
         break;
@@ -3371,6 +3532,150 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
   return nullptr;
 }
 
+/// Try to simplify a select instruction when its condition operand is an
+/// integer comparison where one operand of the compare is a constant.
+static Value *simplifySelectBitTest(Value *TrueVal, Value *FalseVal, Value *X,
+                                    const APInt *Y, bool TrueWhenUnset) {
+  const APInt *C;
+
+  // (X & Y) == 0 ? X & ~Y : X  --> X
+  // (X & Y) != 0 ? X & ~Y : X  --> X & ~Y
+  if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
+      *Y == ~*C)
+    return TrueWhenUnset ? FalseVal : TrueVal;
+
+  // (X & Y) == 0 ? X : X & ~Y  --> X & ~Y
+  // (X & Y) != 0 ? X : X & ~Y  --> X
+  if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
+      *Y == ~*C)
+    return TrueWhenUnset ? FalseVal : TrueVal;
+
+  if (Y->isPowerOf2()) {
+    // (X & Y) == 0 ? X | Y : X  --> X | Y
+    // (X & Y) != 0 ? X | Y : X  --> X
+    if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
+        *Y == *C)
+      return TrueWhenUnset ? TrueVal : FalseVal;
+
+    // (X & Y) == 0 ? X : X | Y  --> X
+    // (X & Y) != 0 ? X : X | Y  --> X | Y
+    if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
+        *Y == *C)
+      return TrueWhenUnset ? TrueVal : FalseVal;
+  }
+  
+  return nullptr;
+}
+
+/// An alternative way to test if a bit is set or not uses sgt/slt instead of
+/// eq/ne.
+static Value *simplifySelectWithFakeICmpEq(Value *CmpLHS, Value *TrueVal,
+                                           Value *FalseVal,
+                                           bool TrueWhenUnset) {
+  unsigned BitWidth = TrueVal->getType()->getScalarSizeInBits();
+  if (!BitWidth)
+    return nullptr;
+  
+  APInt MinSignedValue;
+  Value *X;
+  if (match(CmpLHS, m_Trunc(m_Value(X))) && (X == TrueVal || X == FalseVal)) {
+    // icmp slt (trunc X), 0  <--> icmp ne (and X, C), 0
+    // icmp sgt (trunc X), -1 <--> icmp eq (and X, C), 0
+    unsigned DestSize = CmpLHS->getType()->getScalarSizeInBits();
+    MinSignedValue = APInt::getSignedMinValue(DestSize).zext(BitWidth);
+  } else {
+    // icmp slt X, 0  <--> icmp ne (and X, C), 0
+    // icmp sgt X, -1 <--> icmp eq (and X, C), 0
+    X = CmpLHS;
+    MinSignedValue = APInt::getSignedMinValue(BitWidth);
+  }
+
+  if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, &MinSignedValue,
+                                       TrueWhenUnset))
+    return V;
+
+  return nullptr;
+}
+
+/// Try to simplify a select instruction when its condition operand is an
+/// integer comparison.
+static Value *simplifySelectWithICmpCond(Value *CondVal, Value *TrueVal,
+                                         Value *FalseVal, const Query &Q,
+                                         unsigned MaxRecurse) {
+  ICmpInst::Predicate Pred;
+  Value *CmpLHS, *CmpRHS;
+  if (!match(CondVal, m_ICmp(Pred, m_Value(CmpLHS), m_Value(CmpRHS))))
+    return nullptr;
+
+  // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
+  // decomposeBitTestICmp() might help.
+  if (ICmpInst::isEquality(Pred) && match(CmpRHS, m_Zero())) {
+    Value *X;
+    const APInt *Y;
+    if (match(CmpLHS, m_And(m_Value(X), m_APInt(Y))))
+      if (Value *V = simplifySelectBitTest(TrueVal, FalseVal, X, Y,
+                                           Pred == ICmpInst::ICMP_EQ))
+        return V;
+  } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
+    // Comparing signed-less-than 0 checks if the sign bit is set.
+    if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
+                                                false))
+      return V;
+  } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
+    // Comparing signed-greater-than -1 checks if the sign bit is not set.
+    if (Value *V = simplifySelectWithFakeICmpEq(CmpLHS, TrueVal, FalseVal,
+                                                true))
+      return V;
+  }
+
+  if (CondVal->hasOneUse()) {
+    const APInt *C;
+    if (match(CmpRHS, m_APInt(C))) {
+      // X < MIN ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
+        return FalseVal;
+      // X < MIN ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
+        return FalseVal;
+      // X > MAX ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
+        return FalseVal;
+      // X > MAX ? T : F  -->  F
+      if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
+        return FalseVal;
+    }
+  }
+
+  // If we have an equality comparison, then we know the value in one of the
+  // arms of the select. See if substituting this value into the arm and
+  // simplifying the result yields the same value as the other arm.
+  if (Pred == ICmpInst::ICMP_EQ) {
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            TrueVal)
+      return FalseVal;
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            FalseVal)
+      return FalseVal;
+  } else if (Pred == ICmpInst::ICMP_NE) {
+    if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            FalseVal ||
+        SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            FalseVal)
+      return TrueVal;
+    if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
+            TrueVal ||
+        SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
+            TrueVal)
+      return TrueVal;
+  }
+
+  return nullptr;
+}
+
 /// Given operands for a SelectInst, see if we can fold the result.
 /// If not, this returns null.
 static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
@@ -3399,106 +3704,9 @@ static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
   if (isa<UndefValue>(FalseVal))   // select C, X, undef -> X
     return TrueVal;
 
-  if (const auto *ICI = dyn_cast<ICmpInst>(CondVal)) {
-    // FIXME: This code is nearly duplicated in InstCombine. Using/refactoring
-    // decomposeBitTestICmp() might help.
-    unsigned BitWidth =
-        Q.DL.getTypeSizeInBits(TrueVal->getType()->getScalarType());
-    ICmpInst::Predicate Pred = ICI->getPredicate();
-    Value *CmpLHS = ICI->getOperand(0);
-    Value *CmpRHS = ICI->getOperand(1);
-    APInt MinSignedValue = APInt::getSignBit(BitWidth);
-    Value *X;
-    const APInt *Y;
-    bool TrueWhenUnset;
-    bool IsBitTest = false;
-    if (ICmpInst::isEquality(Pred) &&
-        match(CmpLHS, m_And(m_Value(X), m_APInt(Y))) &&
-        match(CmpRHS, m_Zero())) {
-      IsBitTest = true;
-      TrueWhenUnset = Pred == ICmpInst::ICMP_EQ;
-    } else if (Pred == ICmpInst::ICMP_SLT && match(CmpRHS, m_Zero())) {
-      X = CmpLHS;
-      Y = &MinSignedValue;
-      IsBitTest = true;
-      TrueWhenUnset = false;
-    } else if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes())) {
-      X = CmpLHS;
-      Y = &MinSignedValue;
-      IsBitTest = true;
-      TrueWhenUnset = true;
-    }
-    if (IsBitTest) {
-      const APInt *C;
-      // (X & Y) == 0 ? X & ~Y : X  --> X
-      // (X & Y) != 0 ? X & ~Y : X  --> X & ~Y
-      if (FalseVal == X && match(TrueVal, m_And(m_Specific(X), m_APInt(C))) &&
-          *Y == ~*C)
-        return TrueWhenUnset ? FalseVal : TrueVal;
-      // (X & Y) == 0 ? X : X & ~Y  --> X & ~Y
-      // (X & Y) != 0 ? X : X & ~Y  --> X
-      if (TrueVal == X && match(FalseVal, m_And(m_Specific(X), m_APInt(C))) &&
-          *Y == ~*C)
-        return TrueWhenUnset ? FalseVal : TrueVal;
-
-      if (Y->isPowerOf2()) {
-        // (X & Y) == 0 ? X | Y : X  --> X | Y
-        // (X & Y) != 0 ? X | Y : X  --> X
-        if (FalseVal == X && match(TrueVal, m_Or(m_Specific(X), m_APInt(C))) &&
-            *Y == *C)
-          return TrueWhenUnset ? TrueVal : FalseVal;
-        // (X & Y) == 0 ? X : X | Y  --> X
-        // (X & Y) != 0 ? X : X | Y  --> X | Y
-        if (TrueVal == X && match(FalseVal, m_Or(m_Specific(X), m_APInt(C))) &&
-            *Y == *C)
-          return TrueWhenUnset ? TrueVal : FalseVal;
-      }
-    }
-    if (ICI->hasOneUse()) {
-      const APInt *C;
-      if (match(CmpRHS, m_APInt(C))) {
-        // X < MIN ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_SLT && C->isMinSignedValue())
-          return FalseVal;
-        // X < MIN ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_ULT && C->isMinValue())
-          return FalseVal;
-        // X > MAX ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_SGT && C->isMaxSignedValue())
-          return FalseVal;
-        // X > MAX ? T : F  -->  F
-        if (Pred == ICmpInst::ICMP_UGT && C->isMaxValue())
-          return FalseVal;
-      }
-    }
-
-    // If we have an equality comparison then we know the value in one of the
-    // arms of the select. See if substituting this value into the arm and
-    // simplifying the result yields the same value as the other arm.
-    if (Pred == ICmpInst::ICMP_EQ) {
-      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              TrueVal ||
-          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              TrueVal)
-        return FalseVal;
-      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              FalseVal ||
-          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              FalseVal)
-        return FalseVal;
-    } else if (Pred == ICmpInst::ICMP_NE) {
-      if (SimplifyWithOpReplaced(TrueVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              FalseVal ||
-          SimplifyWithOpReplaced(TrueVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              FalseVal)
-        return TrueVal;
-      if (SimplifyWithOpReplaced(FalseVal, CmpLHS, CmpRHS, Q, MaxRecurse) ==
-              TrueVal ||
-          SimplifyWithOpReplaced(FalseVal, CmpRHS, CmpLHS, Q, MaxRecurse) ==
-              TrueVal)
-        return TrueVal;
-    }
-  }
+  if (Value *V =
+          simplifySelectWithICmpCond(CondVal, TrueVal, FalseVal, Q, MaxRecurse))
+    return V;
 
   return nullptr;
 }
@@ -3587,6 +3795,32 @@ static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
     }
   }
 
+  if (Q.DL.getTypeAllocSize(LastType) == 1 &&
+      all_of(Ops.slice(1).drop_back(1),
+             [](Value *Idx) { return match(Idx, m_Zero()); })) {
+    unsigned PtrWidth =
+        Q.DL.getPointerSizeInBits(Ops[0]->getType()->getPointerAddressSpace());
+    if (Q.DL.getTypeSizeInBits(Ops.back()->getType()) == PtrWidth) {
+      APInt BasePtrOffset(PtrWidth, 0);
+      Value *StrippedBasePtr =
+          Ops[0]->stripAndAccumulateInBoundsConstantOffsets(Q.DL,
+                                                            BasePtrOffset);
+
+      // gep (gep V, C), (sub 0, V) -> C
+      if (match(Ops.back(),
+                m_Sub(m_Zero(), m_PtrToInt(m_Specific(StrippedBasePtr))))) {
+        auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset);
+        return ConstantExpr::getIntToPtr(CI, GEPTy);
+      }
+      // gep (gep V, C), (xor V, -1) -> C-1
+      if (match(Ops.back(),
+                m_Xor(m_PtrToInt(m_Specific(StrippedBasePtr)), m_AllOnes()))) {
+        auto *CI = ConstantInt::get(GEPTy->getContext(), BasePtrOffset - 1);
+        return ConstantExpr::getIntToPtr(CI, GEPTy);
+      }
+    }
+  }
+
   // Check to see if this is constant foldable.
   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
     if (!isa<Constant>(Ops[i]))
@@ -3742,19 +3976,47 @@ static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
   return CommonValue;
 }
 
-static Value *SimplifyTruncInst(Value *Op, Type *Ty, const Query &Q, unsigned) {
-  if (Constant *C = dyn_cast<Constant>(Op))
-    return ConstantFoldCastOperand(Instruction::Trunc, C, Ty, Q.DL);
+static Value *SimplifyCastInst(unsigned CastOpc, Value *Op,
+                               Type *Ty, const Query &Q, unsigned MaxRecurse) {
+  if (auto *C = dyn_cast<Constant>(Op))
+    return ConstantFoldCastOperand(CastOpc, C, Ty, Q.DL);
+
+  if (auto *CI = dyn_cast<CastInst>(Op)) {
+    auto *Src = CI->getOperand(0);
+    Type *SrcTy = Src->getType();
+    Type *MidTy = CI->getType();
+    Type *DstTy = Ty;
+    if (Src->getType() == Ty) {
+      auto FirstOp = static_cast<Instruction::CastOps>(CI->getOpcode());
+      auto SecondOp = static_cast<Instruction::CastOps>(CastOpc);
+      Type *SrcIntPtrTy =
+          SrcTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(SrcTy) : nullptr;
+      Type *MidIntPtrTy =
+          MidTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(MidTy) : nullptr;
+      Type *DstIntPtrTy =
+          DstTy->isPtrOrPtrVectorTy() ? Q.DL.getIntPtrType(DstTy) : nullptr;
+      if (CastInst::isEliminableCastPair(FirstOp, SecondOp, SrcTy, MidTy, DstTy,
+                                         SrcIntPtrTy, MidIntPtrTy,
+                                         DstIntPtrTy) == Instruction::BitCast)
+        return Src;
+    }
+  }
+
+  // bitcast x -> x
+  if (CastOpc == Instruction::BitCast)
+    if (Op->getType() == Ty)
+      return Op;
 
   return nullptr;
 }
 
-Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
-                               const TargetLibraryInfo *TLI,
-                               const DominatorTree *DT, AssumptionCache *AC,
-                               const Instruction *CxtI) {
-  return ::SimplifyTruncInst(Op, Ty, Query(DL, TLI, DT, AC, CxtI),
-                             RecursionLimit);
+Value *llvm::SimplifyCastInst(unsigned CastOpc, Value *Op, Type *Ty,
+                              const DataLayout &DL,
+                              const TargetLibraryInfo *TLI,
+                              const DominatorTree *DT, AssumptionCache *AC,
+                              const Instruction *CxtI) {
+  return ::SimplifyCastInst(CastOpc, Op, Ty, Query(DL, TLI, DT, AC, CxtI),
+                            RecursionLimit);
 }
 
 //=== Helper functions for higher up the class hierarchy.
@@ -3837,6 +4099,8 @@ static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
     return SimplifyFSubInst(LHS, RHS, FMF, Q, MaxRecurse);
   case Instruction::FMul:
     return SimplifyFMulInst(LHS, RHS, FMF, Q, MaxRecurse);
+  case Instruction::FDiv:
+    return SimplifyFDivInst(LHS, RHS, FMF, Q, MaxRecurse);
   default:
     return SimplifyBinOp(Opcode, LHS, RHS, Q, MaxRecurse);
   }
@@ -4223,21 +4487,23 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
                           TLI, DT, AC, I);
     break;
   }
-  case Instruction::Trunc:
-    Result =
-        SimplifyTruncInst(I->getOperand(0), I->getType(), DL, TLI, DT, AC, I);
+#define HANDLE_CAST_INST(num, opc, clas) case Instruction::opc:
+#include "llvm/IR/Instruction.def"
+#undef HANDLE_CAST_INST
+    Result = SimplifyCastInst(I->getOpcode(), I->getOperand(0), I->getType(),
+                              DL, TLI, DT, AC, I);
     break;
   }
 
   // In general, it is possible for computeKnownBits to determine all bits in a
   // value even when the operands are not all constants.
-  if (!Result && I->getType()->isIntegerTy()) {
+  if (!Result && I->getType()->isIntOrIntVectorTy()) {
     unsigned BitWidth = I->getType()->getScalarSizeInBits();
     APInt KnownZero(BitWidth, 0);
     APInt KnownOne(BitWidth, 0);
     computeKnownBits(I, KnownZero, KnownOne, DL, /*Depth*/0, AC, I, DT);
     if ((KnownZero | KnownOne).isAllOnesValue())
-      Result = ConstantInt::get(I->getContext(), KnownOne);
+      Result = ConstantInt::get(I->getType(), KnownOne);
   }
 
   /// If called on unreachable code, the above logic may report that the
diff --git a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
index 3ab6b5d60905..d1374acd963e 100644
--- a/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
+++ b/contrib/llvm/lib/Analysis/IteratedDominanceFrontier.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-/// \brief Compute iterated dominance frontiers using a linear time algorithm.
+// Compute iterated dominance frontiers using a linear time algorithm.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
index 7debfde87d2a..596b6fc1afb5 100644
--- a/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyBlockFrequencyInfo.cpp
@@ -15,7 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
-#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 
 using namespace llvm;
@@ -24,7 +24,7 @@ using namespace llvm;
 
 INITIALIZE_PASS_BEGIN(LazyBlockFrequencyInfoPass, DEBUG_TYPE,
                       "Lazy Block Frequency Analysis", true, true)
-INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LazyBPIPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LazyBlockFrequencyInfoPass, DEBUG_TYPE,
                     "Lazy Block Frequency Analysis", true, true)
@@ -40,7 +40,7 @@ void LazyBlockFrequencyInfoPass::print(raw_ostream &OS, const Module *) const {
 }
 
 void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
   AU.addRequired<LoopInfoWrapperPass>();
   AU.setPreservesAll();
 }
@@ -48,21 +48,20 @@ void LazyBlockFrequencyInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
 void LazyBlockFrequencyInfoPass::releaseMemory() { LBFI.releaseMemory(); }
 
 bool LazyBlockFrequencyInfoPass::runOnFunction(Function &F) {
-  BranchProbabilityInfo &BPI =
-      getAnalysis<BranchProbabilityInfoWrapperPass>().getBPI();
+  auto &BPIPass = getAnalysis<LazyBranchProbabilityInfoPass>();
   LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  LBFI.setAnalysis(&F, &BPI, &LI);
+  LBFI.setAnalysis(&F, &BPIPass, &LI);
   return false;
 }
 
 void LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage(AnalysisUsage &AU) {
-  AU.addRequired<BranchProbabilityInfoWrapperPass>();
+  LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AU);
   AU.addRequired<LazyBlockFrequencyInfoPass>();
   AU.addRequired<LoopInfoWrapperPass>();
 }
 
 void llvm::initializeLazyBFIPassPass(PassRegistry &Registry) {
-  INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass);
+  initializeLazyBPIPassPass(Registry);
   INITIALIZE_PASS_DEPENDENCY(LazyBlockFrequencyInfoPass);
   INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
 }
diff --git a/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
new file mode 100644
index 000000000000..b51c6beb7959
--- /dev/null
+++ b/contrib/llvm/lib/Analysis/LazyBranchProbabilityInfo.cpp
@@ -0,0 +1,63 @@
+//===- LazyBranchProbabilityInfo.cpp - Lazy Branch Probability Analysis ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is an alternative analysis pass to BranchProbabilityInfoWrapperPass.
+// The difference is that with this pass the branch probabilities are not
+// computed when the analysis pass is executed but rather when the BPI results
+// is explicitly requested by the analysis client.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/LazyBranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "lazy-branch-prob"
+
+INITIALIZE_PASS_BEGIN(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
+                      "Lazy Branch Probability Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LazyBranchProbabilityInfoPass, DEBUG_TYPE,
+                    "Lazy Branch Probability Analysis", true, true)
+
+char LazyBranchProbabilityInfoPass::ID = 0;
+
+LazyBranchProbabilityInfoPass::LazyBranchProbabilityInfoPass()
+    : FunctionPass(ID) {
+  initializeLazyBranchProbabilityInfoPassPass(*PassRegistry::getPassRegistry());
+}
+
+void LazyBranchProbabilityInfoPass::print(raw_ostream &OS,
+                                          const Module *) const {
+  LBPI->getCalculated().print(OS);
+}
+
+void LazyBranchProbabilityInfoPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<LoopInfoWrapperPass>();
+  AU.setPreservesAll();
+}
+
+void LazyBranchProbabilityInfoPass::releaseMemory() { LBPI.reset(); }
+
+bool LazyBranchProbabilityInfoPass::runOnFunction(Function &F) {
+  LoopInfo &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  LBPI = llvm::make_unique<LazyBranchProbabilityInfo>(&F, &LI);
+  return false;
+}
+
+void LazyBranchProbabilityInfoPass::getLazyBPIAnalysisUsage(AnalysisUsage &AU) {
+  AU.addRequired<LazyBranchProbabilityInfoPass>();
+  AU.addRequired<LoopInfoWrapperPass>();
+}
+
+void llvm::initializeLazyBPIPassPass(PassRegistry &Registry) {
+  INITIALIZE_PASS_DEPENDENCY(LazyBranchProbabilityInfoPass);
+  INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass);
+}
diff --git a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
index acff8529b151..f7cf8c6729f2 100644
--- a/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
+++ b/contrib/llvm/lib/Analysis/LazyCallGraph.cpp
@@ -8,7 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/Instructions.h"
@@ -23,39 +26,11 @@ using namespace llvm;
 static void addEdge(SmallVectorImpl<LazyCallGraph::Edge> &Edges,
                     DenseMap<Function *, int> &EdgeIndexMap, Function &F,
                     LazyCallGraph::Edge::Kind EK) {
-  // Note that we consider *any* function with a definition to be a viable
-  // edge. Even if the function's definition is subject to replacement by
-  // some other module (say, a weak definition) there may still be
-  // optimizations which essentially speculate based on the definition and
-  // a way to check that the specific definition is in fact the one being
-  // used. For example, this could be done by moving the weak definition to
-  // a strong (internal) definition and making the weak definition be an
-  // alias. Then a test of the address of the weak function against the new
-  // strong definition's address would be an effective way to determine the
-  // safety of optimizing a direct call edge.
-  if (!F.isDeclaration() &&
-      EdgeIndexMap.insert({&F, Edges.size()}).second) {
-    DEBUG(dbgs() << "    Added callable function: " << F.getName() << "\n");
-    Edges.emplace_back(LazyCallGraph::Edge(F, EK));
-  }
-}
-
-static void findReferences(SmallVectorImpl<Constant *> &Worklist,
-                           SmallPtrSetImpl<Constant *> &Visited,
-                           SmallVectorImpl<LazyCallGraph::Edge> &Edges,
-                           DenseMap<Function *, int> &EdgeIndexMap) {
-  while (!Worklist.empty()) {
-    Constant *C = Worklist.pop_back_val();
-
-    if (Function *F = dyn_cast<Function>(C)) {
-      addEdge(Edges, EdgeIndexMap, *F, LazyCallGraph::Edge::Ref);
-      continue;
-    }
+  if (!EdgeIndexMap.insert({&F, Edges.size()}).second)
+    return;
 
-    for (Value *Op : C->operand_values())
-      if (Visited.insert(cast<Constant>(Op)).second)
-        Worklist.push_back(cast<Constant>(Op));
-  }
+  DEBUG(dbgs() << "    Added callable function: " << F.getName() << "\n");
+  Edges.emplace_back(LazyCallGraph::Edge(F, EK));
 }
 
 LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
@@ -72,14 +47,26 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
   // are trivially added, but to accumulate the latter we walk the instructions
   // and add every operand which is a constant to the worklist to process
   // afterward.
+  //
+  // Note that we consider *any* function with a definition to be a viable
+  // edge. Even if the function's definition is subject to replacement by
+  // some other module (say, a weak definition) there may still be
+  // optimizations which essentially speculate based on the definition and
+  // a way to check that the specific definition is in fact the one being
+  // used. For example, this could be done by moving the weak definition to
+  // a strong (internal) definition and making the weak definition be an
+  // alias. Then a test of the address of the weak function against the new
+  // strong definition's address would be an effective way to determine the
+  // safety of optimizing a direct call edge.
   for (BasicBlock &BB : F)
     for (Instruction &I : BB) {
       if (auto CS = CallSite(&I))
         if (Function *Callee = CS.getCalledFunction())
-          if (Callees.insert(Callee).second) {
-            Visited.insert(Callee);
-            addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
-          }
+          if (!Callee->isDeclaration())
+            if (Callees.insert(Callee).second) {
+              Visited.insert(Callee);
+              addEdge(Edges, EdgeIndexMap, *Callee, LazyCallGraph::Edge::Call);
+            }
 
       for (Value *Op : I.operand_values())
         if (Constant *C = dyn_cast<Constant>(Op))
@@ -90,7 +77,9 @@ LazyCallGraph::Node::Node(LazyCallGraph &G, Function &F)
   // We've collected all the constant (and thus potentially function or
   // function containing) operands to all of the instructions in the function.
   // Process them (recursively) collecting every function found.
-  findReferences(Worklist, Visited, Edges, EdgeIndexMap);
+  visitReferences(Worklist, Visited, [&](Function &F) {
+    addEdge(Edges, EdgeIndexMap, F, LazyCallGraph::Edge::Ref);
+  });
 }
 
 void LazyCallGraph::Node::insertEdgeInternal(Function &Target, Edge::Kind EK) {
@@ -144,7 +133,9 @@ LazyCallGraph::LazyCallGraph(Module &M) : NextDFSNumber(0) {
 
   DEBUG(dbgs() << "  Adding functions referenced by global initializers to the "
                   "entry set.\n");
-  findReferences(Worklist, Visited, EntryEdges, EntryIndexMap);
+  visitReferences(Worklist, Visited, [&](Function &F) {
+    addEdge(EntryEdges, EntryIndexMap, F, LazyCallGraph::Edge::Ref);
+  });
 
   for (const Edge &E : EntryEdges)
     RefSCCEntryNodes.push_back(&E.getFunction());
@@ -199,6 +190,57 @@ void LazyCallGraph::SCC::verify() {
 }
 #endif
 
+bool LazyCallGraph::SCC::isParentOf(const SCC &C) const {
+  if (this == &C)
+    return false;
+
+  for (Node &N : *this)
+    for (Edge &E : N.calls())
+      if (Node *CalleeN = E.getNode())
+        if (OuterRefSCC->G->lookupSCC(*CalleeN) == &C)
+          return true;
+
+  // No edges found.
+  return false;
+}
+
+bool LazyCallGraph::SCC::isAncestorOf(const SCC &TargetC) const {
+  if (this == &TargetC)
+    return false;
+
+  LazyCallGraph &G = *OuterRefSCC->G;
+
+  // Start with this SCC.
+  SmallPtrSet<const SCC *, 16> Visited = {this};
+  SmallVector<const SCC *, 16> Worklist = {this};
+
+  // Walk down the graph until we run out of edges or find a path to TargetC.
+  do {
+    const SCC &C = *Worklist.pop_back_val();
+    for (Node &N : C)
+      for (Edge &E : N.calls()) {
+        Node *CalleeN = E.getNode();
+        if (!CalleeN)
+          continue;
+        SCC *CalleeC = G.lookupSCC(*CalleeN);
+        if (!CalleeC)
+          continue;
+
+        // If the callee's SCC is the TargetC, we're done.
+        if (CalleeC == &TargetC)
+          return true;
+
+        // If this is the first time we've reached this SCC, put it on the
+        // worklist to recurse through.
+        if (Visited.insert(CalleeC).second)
+          Worklist.push_back(CalleeC);
+      }
+  } while (!Worklist.empty());
+
+  // No paths found.
+  return false;
+}
+
 LazyCallGraph::RefSCC::RefSCC(LazyCallGraph &G) : G(&G) {}
 
 void LazyCallGraph::RefSCC::dump() const {
@@ -211,11 +253,17 @@ void LazyCallGraph::RefSCC::verify() {
   assert(!SCCs.empty() && "Can't have an empty SCC!");
 
   // Verify basic properties of the SCCs.
+  SmallPtrSet<SCC *, 4> SCCSet;
   for (SCC *C : SCCs) {
     assert(C && "Can't have a null SCC!");
     C->verify();
     assert(&C->getOuterRefSCC() == this &&
            "SCC doesn't think it is inside this RefSCC!");
+    bool Inserted = SCCSet.insert(C).second;
+    assert(Inserted && "Found a duplicate SCC!");
+    auto IndexIt = SCCIndices.find(C);
+    assert(IndexIt != SCCIndices.end() &&
+           "Found an SCC that doesn't have an index!");
   }
 
   // Check that our indices map correctly.
@@ -223,6 +271,7 @@ void LazyCallGraph::RefSCC::verify() {
     SCC *C = SCCIndexPair.first;
     int i = SCCIndexPair.second;
     assert(C && "Can't have a null SCC in the indices!");
+    assert(SCCSet.count(C) && "Found an index for an SCC not in the RefSCC!");
     assert(SCCs[i] == C && "Index doesn't point to SCC!");
   }
 
@@ -243,6 +292,20 @@ void LazyCallGraph::RefSCC::verify() {
                "Edge to a RefSCC missing us in its parent set.");
       }
   }
+
+  // Check that our parents are actually parents.
+  for (RefSCC *ParentRC : Parents) {
+    assert(ParentRC != this && "Cannot be our own parent!");
+    auto HasConnectingEdge = [&] {
+      for (SCC &C : *ParentRC)
+        for (Node &N : C)
+          for (Edge &E : N)
+            if (G->lookupRefSCC(*E.getNode()) == this)
+              return true;
+      return false;
+    };
+    assert(HasConnectingEdge() && "No edge connects the parent to us!");
+  }
 }
 #endif
 
@@ -261,12 +324,153 @@ bool LazyCallGraph::RefSCC::isDescendantOf(const RefSCC &C) const {
   return false;
 }
 
+/// Generic helper that updates a postorder sequence of SCCs for a potentially
+/// cycle-introducing edge insertion.
+///
+/// A postorder sequence of SCCs of a directed graph has one fundamental
+/// property: all deges in the DAG of SCCs point "up" the sequence. That is,
+/// all edges in the SCC DAG point to prior SCCs in the sequence.
+///
+/// This routine both updates a postorder sequence and uses that sequence to
+/// compute the set of SCCs connected into a cycle. It should only be called to
+/// insert a "downward" edge which will require changing the sequence to
+/// restore it to a postorder.
+///
+/// When inserting an edge from an earlier SCC to a later SCC in some postorder
+/// sequence, all of the SCCs which may be impacted are in the closed range of
+/// those two within the postorder sequence. The algorithm used here to restore
+/// the state is as follows:
+///
+/// 1) Starting from the source SCC, construct a set of SCCs which reach the
+///    source SCC consisting of just the source SCC. Then scan toward the
+///    target SCC in postorder and for each SCC, if it has an edge to an SCC
+///    in the set, add it to the set. Otherwise, the source SCC is not
+///    a successor, move it in the postorder sequence to immediately before
+///    the source SCC, shifting the source SCC and all SCCs in the set one
+///    position toward the target SCC. Stop scanning after processing the
+///    target SCC.
+/// 2) If the source SCC is now past the target SCC in the postorder sequence,
+///    and thus the new edge will flow toward the start, we are done.
+/// 3) Otherwise, starting from the target SCC, walk all edges which reach an
+///    SCC between the source and the target, and add them to the set of
+///    connected SCCs, then recurse through them. Once a complete set of the
+///    SCCs the target connects to is known, hoist the remaining SCCs between
+///    the source and the target to be above the target. Note that there is no
+///    need to process the source SCC, it is already known to connect.
+/// 4) At this point, all of the SCCs in the closed range between the source
+///    SCC and the target SCC in the postorder sequence are connected,
+///    including the target SCC and the source SCC. Inserting the edge from
+///    the source SCC to the target SCC will form a cycle out of precisely
+///    these SCCs. Thus we can merge all of the SCCs in this closed range into
+///    a single SCC.
+///
+/// This process has various important properties:
+/// - Only mutates the SCCs when adding the edge actually changes the SCC
+///   structure.
+/// - Never mutates SCCs which are unaffected by the change.
+/// - Updates the postorder sequence to correctly satisfy the postorder
+///   constraint after the edge is inserted.
+/// - Only reorders SCCs in the closed postorder sequence from the source to
+///   the target, so easy to bound how much has changed even in the ordering.
+/// - Big-O is the number of edges in the closed postorder range of SCCs from
+///   source to target.
+///
+/// This helper routine, in addition to updating the postorder sequence itself
+/// will also update a map from SCCs to indices within that sequecne.
+///
+/// The sequence and the map must operate on pointers to the SCC type.
+///
+/// Two callbacks must be provided. The first computes the subset of SCCs in
+/// the postorder closed range from the source to the target which connect to
+/// the source SCC via some (transitive) set of edges. The second computes the
+/// subset of the same range which the target SCC connects to via some
+/// (transitive) set of edges. Both callbacks should populate the set argument
+/// provided.
+template <typename SCCT, typename PostorderSequenceT, typename SCCIndexMapT,
+          typename ComputeSourceConnectedSetCallableT,
+          typename ComputeTargetConnectedSetCallableT>
+static iterator_range<typename PostorderSequenceT::iterator>
+updatePostorderSequenceForEdgeInsertion(
+    SCCT &SourceSCC, SCCT &TargetSCC, PostorderSequenceT &SCCs,
+    SCCIndexMapT &SCCIndices,
+    ComputeSourceConnectedSetCallableT ComputeSourceConnectedSet,
+    ComputeTargetConnectedSetCallableT ComputeTargetConnectedSet) {
+  int SourceIdx = SCCIndices[&SourceSCC];
+  int TargetIdx = SCCIndices[&TargetSCC];
+  assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
+
+  SmallPtrSet<SCCT *, 4> ConnectedSet;
+
+  // Compute the SCCs which (transitively) reach the source.
+  ComputeSourceConnectedSet(ConnectedSet);
+
+  // Partition the SCCs in this part of the port-order sequence so only SCCs
+  // connecting to the source remain between it and the target. This is
+  // a benign partition as it preserves postorder.
+  auto SourceI = std::stable_partition(
+      SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
+      [&ConnectedSet](SCCT *C) { return !ConnectedSet.count(C); });
+  for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
+    SCCIndices.find(SCCs[i])->second = i;
+
+  // If the target doesn't connect to the source, then we've corrected the
+  // post-order and there are no cycles formed.
+  if (!ConnectedSet.count(&TargetSCC)) {
+    assert(SourceI > (SCCs.begin() + SourceIdx) &&
+           "Must have moved the source to fix the post-order.");
+    assert(*std::prev(SourceI) == &TargetSCC &&
+           "Last SCC to move should have bene the target.");
+
+    // Return an empty range at the target SCC indicating there is nothing to
+    // merge.
+    return make_range(std::prev(SourceI), std::prev(SourceI));
+  }
+
+  assert(SCCs[TargetIdx] == &TargetSCC &&
+         "Should not have moved target if connected!");
+  SourceIdx = SourceI - SCCs.begin();
+  assert(SCCs[SourceIdx] == &SourceSCC &&
+         "Bad updated index computation for the source SCC!");
+
+
+  // See whether there are any remaining intervening SCCs between the source
+  // and target. If so we need to make sure they all are reachable form the
+  // target.
+  if (SourceIdx + 1 < TargetIdx) {
+    ConnectedSet.clear();
+    ComputeTargetConnectedSet(ConnectedSet);
+
+    // Partition SCCs so that only SCCs reached from the target remain between
+    // the source and the target. This preserves postorder.
+    auto TargetI = std::stable_partition(
+        SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
+        [&ConnectedSet](SCCT *C) { return ConnectedSet.count(C); });
+    for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
+      SCCIndices.find(SCCs[i])->second = i;
+    TargetIdx = std::prev(TargetI) - SCCs.begin();
+    assert(SCCs[TargetIdx] == &TargetSCC &&
+           "Should always end with the target!");
+  }
+
+  // At this point, we know that connecting source to target forms a cycle
+  // because target connects back to source, and we know that all of the SCCs
+  // between the source and target in the postorder sequence participate in that
+  // cycle.
+  return make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
+}
+
 SmallVector<LazyCallGraph::SCC *, 1>
 LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   assert(!SourceN[TargetN].isCall() && "Must start with a ref edge!");
-
   SmallVector<SCC *, 1> DeletedSCCs;
 
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
   SCC &SourceSCC = *G->lookupSCC(SourceN);
   SCC &TargetSCC = *G->lookupSCC(TargetN);
 
@@ -274,10 +478,6 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   // we've just added more connectivity.
   if (&SourceSCC == &TargetSCC) {
     SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
-#endif
     return DeletedSCCs;
   }
 
@@ -291,114 +491,44 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   int TargetIdx = SCCIndices[&TargetSCC];
   if (TargetIdx < SourceIdx) {
     SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
-#ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
-#endif
     return DeletedSCCs;
   }
 
-  // When we do have an edge from an earlier SCC to a later SCC in the
-  // postorder sequence, all of the SCCs which may be impacted are in the
-  // closed range of those two within the postorder sequence. The algorithm to
-  // restore the state is as follows:
-  //
-  // 1) Starting from the source SCC, construct a set of SCCs which reach the
-  //    source SCC consisting of just the source SCC. Then scan toward the
-  //    target SCC in postorder and for each SCC, if it has an edge to an SCC
-  //    in the set, add it to the set. Otherwise, the source SCC is not
-  //    a successor, move it in the postorder sequence to immediately before
-  //    the source SCC, shifting the source SCC and all SCCs in the set one
-  //    position toward the target SCC. Stop scanning after processing the
-  //    target SCC.
-  // 2) If the source SCC is now past the target SCC in the postorder sequence,
-  //    and thus the new edge will flow toward the start, we are done.
-  // 3) Otherwise, starting from the target SCC, walk all edges which reach an
-  //    SCC between the source and the target, and add them to the set of
-  //    connected SCCs, then recurse through them. Once a complete set of the
-  //    SCCs the target connects to is known, hoist the remaining SCCs between
-  //    the source and the target to be above the target. Note that there is no
-  //    need to process the source SCC, it is already known to connect.
-  // 4) At this point, all of the SCCs in the closed range between the source
-  //    SCC and the target SCC in the postorder sequence are connected,
-  //    including the target SCC and the source SCC. Inserting the edge from
-  //    the source SCC to the target SCC will form a cycle out of precisely
-  //    these SCCs. Thus we can merge all of the SCCs in this closed range into
-  //    a single SCC.
-  //
-  // This process has various important properties:
-  // - Only mutates the SCCs when adding the edge actually changes the SCC
-  //   structure.
-  // - Never mutates SCCs which are unaffected by the change.
-  // - Updates the postorder sequence to correctly satisfy the postorder
-  //   constraint after the edge is inserted.
-  // - Only reorders SCCs in the closed postorder sequence from the source to
-  //   the target, so easy to bound how much has changed even in the ordering.
-  // - Big-O is the number of edges in the closed postorder range of SCCs from
-  //   source to target.
-
-  assert(SourceIdx < TargetIdx && "Cannot have equal indices here!");
-  SmallPtrSet<SCC *, 4> ConnectedSet;
-
   // Compute the SCCs which (transitively) reach the source.
-  ConnectedSet.insert(&SourceSCC);
-  auto IsConnected = [&](SCC &C) {
-    for (Node &N : C)
-      for (Edge &E : N.calls()) {
-        assert(E.getNode() && "Must have formed a node within an SCC!");
-        if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
-          return true;
-      }
-
-    return false;
-  };
-
-  for (SCC *C :
-       make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
-    if (IsConnected(*C))
-      ConnectedSet.insert(C);
-
-  // Partition the SCCs in this part of the port-order sequence so only SCCs
-  // connecting to the source remain between it and the target. This is
-  // a benign partition as it preserves postorder.
-  auto SourceI = std::stable_partition(
-      SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx + 1,
-      [&ConnectedSet](SCC *C) { return !ConnectedSet.count(C); });
-  for (int i = SourceIdx, e = TargetIdx + 1; i < e; ++i)
-    SCCIndices.find(SCCs[i])->second = i;
-
-  // If the target doesn't connect to the source, then we've corrected the
-  // post-order and there are no cycles formed.
-  if (!ConnectedSet.count(&TargetSCC)) {
-    assert(SourceI > (SCCs.begin() + SourceIdx) &&
-           "Must have moved the source to fix the post-order.");
-    assert(*std::prev(SourceI) == &TargetSCC &&
-           "Last SCC to move should have bene the target.");
-    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+  auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
 #ifndef NDEBUG
+    // Check that the RefSCC is still valid before computing this as the
+    // results will be nonsensical of we've broken its invariants.
     verify();
 #endif
-    return DeletedSCCs;
-  }
+    ConnectedSet.insert(&SourceSCC);
+    auto IsConnected = [&](SCC &C) {
+      for (Node &N : C)
+        for (Edge &E : N.calls()) {
+          assert(E.getNode() && "Must have formed a node within an SCC!");
+          if (ConnectedSet.count(G->lookupSCC(*E.getNode())))
+            return true;
+        }
 
-  assert(SCCs[TargetIdx] == &TargetSCC &&
-         "Should not have moved target if connected!");
-  SourceIdx = SourceI - SCCs.begin();
+      return false;
+    };
+
+    for (SCC *C :
+         make_range(SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1))
+      if (IsConnected(*C))
+        ConnectedSet.insert(C);
+  };
 
+  // Use a normal worklist to find which SCCs the target connects to. We still
+  // bound the search based on the range in the postorder list we care about,
+  // but because this is forward connectivity we just "recurse" through the
+  // edges.
+  auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<SCC *> &ConnectedSet) {
 #ifndef NDEBUG
-  // Check that the RefSCC is still valid.
-  verify();
+    // Check that the RefSCC is still valid before computing this as the
+    // results will be nonsensical of we've broken its invariants.
+    verify();
 #endif
-
-  // See whether there are any remaining intervening SCCs between the source
-  // and target. If so we need to make sure they all are reachable form the
-  // target.
-  if (SourceIdx + 1 < TargetIdx) {
-    // Use a normal worklist to find which SCCs the target connects to. We still
-    // bound the search based on the range in the postorder list we care about,
-    // but because this is forward connectivity we just "recurse" through the
-    // edges.
-    ConnectedSet.clear();
     ConnectedSet.insert(&TargetSCC);
     SmallVector<SCC *, 4> Worklist;
     Worklist.push_back(&TargetSCC);
@@ -421,35 +551,36 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
             Worklist.push_back(&EdgeC);
         }
     } while (!Worklist.empty());
+  };
 
-    // Partition SCCs so that only SCCs reached from the target remain between
-    // the source and the target. This preserves postorder.
-    auto TargetI = std::stable_partition(
-        SCCs.begin() + SourceIdx + 1, SCCs.begin() + TargetIdx + 1,
-        [&ConnectedSet](SCC *C) { return ConnectedSet.count(C); });
-    for (int i = SourceIdx + 1, e = TargetIdx + 1; i < e; ++i)
-      SCCIndices.find(SCCs[i])->second = i;
-    TargetIdx = std::prev(TargetI) - SCCs.begin();
-    assert(SCCs[TargetIdx] == &TargetSCC &&
-           "Should always end with the target!");
+  // Use a generic helper to update the postorder sequence of SCCs and return
+  // a range of any SCCs connected into a cycle by inserting this edge. This
+  // routine will also take care of updating the indices into the postorder
+  // sequence.
+  auto MergeRange = updatePostorderSequenceForEdgeInsertion(
+      SourceSCC, TargetSCC, SCCs, SCCIndices, ComputeSourceConnectedSet,
+      ComputeTargetConnectedSet);
+
+  // If the merge range is empty, then adding the edge didn't actually form any
+  // new cycles. We're done.
+  if (MergeRange.begin() == MergeRange.end()) {
+    // Now that the SCC structure is finalized, flip the kind to call.
+    SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
+    return DeletedSCCs;
+  }
 
 #ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
+  // Before merging, check that the RefSCC remains valid after all the
+  // postorder updates.
+  verify();
 #endif
-  }
 
-  // At this point, we know that connecting source to target forms a cycle
-  // because target connects back to source, and we know that all of the SCCs
-  // between the source and target in the postorder sequence participate in that
-  // cycle. This means that we need to merge all of these SCCs into a single
+  // Otherwise we need to merge all of the SCCs in the cycle into a single
   // result SCC.
   //
   // NB: We merge into the target because all of these functions were already
   // reachable from the target, meaning any SCC-wide properties deduced about it
   // other than the set of functions within it will not have changed.
-  auto MergeRange =
-      make_range(SCCs.begin() + SourceIdx, SCCs.begin() + TargetIdx);
   for (SCC *C : MergeRange) {
     assert(C != &TargetSCC &&
            "We merge *into* the target and shouldn't process it here!");
@@ -471,37 +602,55 @@ LazyCallGraph::RefSCC::switchInternalEdgeToCall(Node &SourceN, Node &TargetN) {
   // Now that the SCC structure is finalized, flip the kind to call.
   SourceN.setEdgeKind(TargetN.getFunction(), Edge::Call);
 
-#ifndef NDEBUG
-  // And we're done! Verify in debug builds that the RefSCC is coherent.
-  verify();
-#endif
+  // And we're done!
   return DeletedSCCs;
 }
 
-void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
-                                                    Node &TargetN) {
+void LazyCallGraph::RefSCC::switchTrivialInternalEdgeToRef(Node &SourceN,
+                                                           Node &TargetN) {
   assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
 
-  SCC &SourceSCC = *G->lookupSCC(SourceN);
-  SCC &TargetSCC = *G->lookupSCC(TargetN);
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
 
-  assert(&SourceSCC.getOuterRefSCC() == this &&
+  assert(G->lookupRefSCC(SourceN) == this &&
          "Source must be in this RefSCC.");
-  assert(&TargetSCC.getOuterRefSCC() == this &&
+  assert(G->lookupRefSCC(TargetN) == this &&
          "Target must be in this RefSCC.");
+  assert(G->lookupSCC(SourceN) != G->lookupSCC(TargetN) &&
+         "Source and Target must be in separate SCCs for this to be trivial!");
 
   // Set the edge kind.
   SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
+}
+
+iterator_range<LazyCallGraph::RefSCC::iterator>
+LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN, Node &TargetN) {
+  assert(SourceN[TargetN].isCall() && "Must start with a call edge!");
 
-  // If this call edge is just connecting two separate SCCs within this RefSCC,
-  // there is nothing to do.
-  if (&SourceSCC != &TargetSCC) {
 #ifndef NDEBUG
-    // Check that the RefSCC is still valid.
-    verify();
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
 #endif
-    return;
-  }
+
+  assert(G->lookupRefSCC(SourceN) == this &&
+         "Source must be in this RefSCC.");
+  assert(G->lookupRefSCC(TargetN) == this &&
+         "Target must be in this RefSCC.");
+
+  SCC &TargetSCC = *G->lookupSCC(TargetN);
+  assert(G->lookupSCC(SourceN) == &TargetSCC && "Source and Target must be in "
+                                                "the same SCC to require the "
+                                                "full CG update.");
+
+  // Set the edge kind.
+  SourceN.setEdgeKind(TargetN.getFunction(), Edge::Ref);
 
   // Otherwise we are removing a call edge from a single SCC. This may break
   // the cycle. In order to compute the new set of SCCs, we need to do a small
@@ -635,10 +784,9 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
       // root DFS number.
       auto SCCNodes = make_range(
           PendingSCCStack.rbegin(),
-          std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(),
-                       [RootDFSNumber](Node *N) {
-                         return N->DFSNumber < RootDFSNumber;
-                       }));
+          find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) {
+            return N->DFSNumber < RootDFSNumber;
+          }));
 
       // Form a new SCC out of these nodes and then clear them off our pending
       // stack.
@@ -663,10 +811,8 @@ void LazyCallGraph::RefSCC::switchInternalEdgeToRef(Node &SourceN,
   for (int Idx = OldIdx, Size = SCCs.size(); Idx < Size; ++Idx)
     SCCIndices[SCCs[Idx]] = Idx;
 
-#ifndef NDEBUG
-  // We're done. Check the validity on our way out.
-  verify();
-#endif
+  return make_range(SCCs.begin() + OldIdx,
+                    SCCs.begin() + OldIdx + NewSCCs.size());
 }
 
 void LazyCallGraph::RefSCC::switchOutgoingEdgeToCall(Node &SourceN,
@@ -746,112 +892,113 @@ void LazyCallGraph::RefSCC::insertOutgoingEdge(Node &SourceN, Node &TargetN,
 
 SmallVector<LazyCallGraph::RefSCC *, 1>
 LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
-  assert(G->lookupRefSCC(TargetN) == this && "Target must be in this SCC.");
-
-  // We store the RefSCCs found to be connected in postorder so that we can use
-  // that when merging. We also return this to the caller to allow them to
-  // invalidate information pertaining to these RefSCCs.
-  SmallVector<RefSCC *, 1> Connected;
-
+  assert(G->lookupRefSCC(TargetN) == this && "Target must be in this RefSCC.");
   RefSCC &SourceC = *G->lookupRefSCC(SourceN);
-  assert(&SourceC != this && "Source must not be in this SCC.");
+  assert(&SourceC != this && "Source must not be in this RefSCC.");
   assert(SourceC.isDescendantOf(*this) &&
          "Source must be a descendant of the Target.");
 
-  // The algorithm we use for merging SCCs based on the cycle introduced here
-  // is to walk the RefSCC inverted DAG formed by the parent sets. The inverse
-  // graph has the same cycle properties as the actual DAG of the RefSCCs, and
-  // when forming RefSCCs lazily by a DFS, the bottom of the graph won't exist
-  // in many cases which should prune the search space.
-  //
-  // FIXME: We can get this pruning behavior even after the incremental RefSCC
-  // formation by leaving behind (conservative) DFS numberings in the nodes,
-  // and pruning the search with them. These would need to be cleverly updated
-  // during the removal of intra-SCC edges, but could be preserved
-  // conservatively.
-  //
-  // FIXME: This operation currently creates ordering stability problems
-  // because we don't use stably ordered containers for the parent SCCs.
-
-  // The set of RefSCCs that are connected to the parent, and thus will
-  // participate in the merged connected component.
-  SmallPtrSet<RefSCC *, 8> ConnectedSet;
-  ConnectedSet.insert(this);
-
-  // We build up a DFS stack of the parents chains.
-  SmallVector<std::pair<RefSCC *, parent_iterator>, 8> DFSStack;
-  SmallPtrSet<RefSCC *, 8> Visited;
-  int ConnectedDepth = -1;
-  DFSStack.push_back({&SourceC, SourceC.parent_begin()});
-  do {
-    auto DFSPair = DFSStack.pop_back_val();
-    RefSCC *C = DFSPair.first;
-    parent_iterator I = DFSPair.second;
-    auto E = C->parent_end();
+  SmallVector<RefSCC *, 1> DeletedRefSCCs;
 
-    while (I != E) {
-      RefSCC &Parent = *I++;
-
-      // If we have already processed this parent SCC, skip it, and remember
-      // whether it was connected so we don't have to check the rest of the
-      // stack. This also handles when we reach a child of the 'this' SCC (the
-      // callee) which terminates the search.
-      if (ConnectedSet.count(&Parent)) {
-        assert(ConnectedDepth < (int)DFSStack.size() &&
-               "Cannot have a connected depth greater than the DFS depth!");
-        ConnectedDepth = DFSStack.size();
-        continue;
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
+  int SourceIdx = G->RefSCCIndices[&SourceC];
+  int TargetIdx = G->RefSCCIndices[this];
+  assert(SourceIdx < TargetIdx &&
+         "Postorder list doesn't see edge as incoming!");
+
+  // Compute the RefSCCs which (transitively) reach the source. We do this by
+  // working backwards from the source using the parent set in each RefSCC,
+  // skipping any RefSCCs that don't fall in the postorder range. This has the
+  // advantage of walking the sparser parent edge (in high fan-out graphs) but
+  // more importantly this removes examining all forward edges in all RefSCCs
+  // within the postorder range which aren't in fact connected. Only connected
+  // RefSCCs (and their edges) are visited here.
+  auto ComputeSourceConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+    Set.insert(&SourceC);
+    SmallVector<RefSCC *, 4> Worklist;
+    Worklist.push_back(&SourceC);
+    do {
+      RefSCC &RC = *Worklist.pop_back_val();
+      for (RefSCC &ParentRC : RC.parents()) {
+        // Skip any RefSCCs outside the range of source to target in the
+        // postorder sequence.
+        int ParentIdx = G->getRefSCCIndex(ParentRC);
+        assert(ParentIdx > SourceIdx && "Parent cannot precede source in postorder!");
+        if (ParentIdx > TargetIdx)
+          continue;
+        if (Set.insert(&ParentRC).second)
+          // First edge connecting to this parent, add it to our worklist.
+          Worklist.push_back(&ParentRC);
       }
-      if (Visited.count(&Parent))
-        continue;
+    } while (!Worklist.empty());
+  };
 
-      // We fully explore the depth-first space, adding nodes to the connected
-      // set only as we pop them off, so "recurse" by rotating to the parent.
-      DFSStack.push_back({C, I});
-      C = &Parent;
-      I = C->parent_begin();
-      E = C->parent_end();
-    }
+  // Use a normal worklist to find which SCCs the target connects to. We still
+  // bound the search based on the range in the postorder list we care about,
+  // but because this is forward connectivity we just "recurse" through the
+  // edges.
+  auto ComputeTargetConnectedSet = [&](SmallPtrSetImpl<RefSCC *> &Set) {
+    Set.insert(this);
+    SmallVector<RefSCC *, 4> Worklist;
+    Worklist.push_back(this);
+    do {
+      RefSCC &RC = *Worklist.pop_back_val();
+      for (SCC &C : RC)
+        for (Node &N : C)
+          for (Edge &E : N) {
+            assert(E.getNode() && "Must have formed a node!");
+            RefSCC &EdgeRC = *G->lookupRefSCC(*E.getNode());
+            if (G->getRefSCCIndex(EdgeRC) <= SourceIdx)
+              // Not in the postorder sequence between source and target.
+              continue;
+
+            if (Set.insert(&EdgeRC).second)
+              Worklist.push_back(&EdgeRC);
+          }
+    } while (!Worklist.empty());
+  };
 
-    // If we've found a connection anywhere below this point on the stack (and
-    // thus up the parent graph from the caller), the current node needs to be
-    // added to the connected set now that we've processed all of its parents.
-    if ((int)DFSStack.size() == ConnectedDepth) {
-      --ConnectedDepth; // We're finished with this connection.
-      bool Inserted = ConnectedSet.insert(C).second;
-      (void)Inserted;
-      assert(Inserted && "Cannot insert a refSCC multiple times!");
-      Connected.push_back(C);
-    } else {
-      // Otherwise remember that its parents don't ever connect.
-      assert(ConnectedDepth < (int)DFSStack.size() &&
-             "Cannot have a connected depth greater than the DFS depth!");
-      Visited.insert(C);
-    }
-  } while (!DFSStack.empty());
+  // Use a generic helper to update the postorder sequence of RefSCCs and return
+  // a range of any RefSCCs connected into a cycle by inserting this edge. This
+  // routine will also take care of updating the indices into the postorder
+  // sequence.
+  iterator_range<SmallVectorImpl<RefSCC *>::iterator> MergeRange =
+      updatePostorderSequenceForEdgeInsertion(
+          SourceC, *this, G->PostOrderRefSCCs, G->RefSCCIndices,
+          ComputeSourceConnectedSet, ComputeTargetConnectedSet);
+
+  // Build a set so we can do fast tests for whether a RefSCC will end up as
+  // part of the merged RefSCC.
+  SmallPtrSet<RefSCC *, 16> MergeSet(MergeRange.begin(), MergeRange.end());
+
+  // This RefSCC will always be part of that set, so just insert it here.
+  MergeSet.insert(this);
 
   // Now that we have identified all of the SCCs which need to be merged into
   // a connected set with the inserted edge, merge all of them into this SCC.
-  // We walk the newly connected RefSCCs in the reverse postorder of the parent
-  // DAG walk above and merge in each of their SCC postorder lists. This
-  // ensures a merged postorder SCC list.
   SmallVector<SCC *, 16> MergedSCCs;
   int SCCIndex = 0;
-  for (RefSCC *C : reverse(Connected)) {
-    assert(C != this &&
-           "This RefSCC should terminate the DFS without being reached.");
+  for (RefSCC *RC : MergeRange) {
+    assert(RC != this && "We're merging into the target RefSCC, so it "
+                         "shouldn't be in the range.");
 
     // Merge the parents which aren't part of the merge into the our parents.
-    for (RefSCC *ParentC : C->Parents)
-      if (!ConnectedSet.count(ParentC))
-        Parents.insert(ParentC);
-    C->Parents.clear();
+    for (RefSCC *ParentRC : RC->Parents)
+      if (!MergeSet.count(ParentRC))
+        Parents.insert(ParentRC);
+    RC->Parents.clear();
 
     // Walk the inner SCCs to update their up-pointer and walk all the edges to
     // update any parent sets.
     // FIXME: We should try to find a way to avoid this (rather expensive) edge
     // walk by updating the parent sets in some other manner.
-    for (SCC &InnerC : *C) {
+    for (SCC &InnerC : *RC) {
       InnerC.OuterRefSCC = this;
       SCCIndices[&InnerC] = SCCIndex++;
       for (Node &N : InnerC) {
@@ -860,9 +1007,9 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
           assert(E.getNode() &&
                  "Cannot have a null node within a visited SCC!");
           RefSCC &ChildRC = *G->lookupRefSCC(*E.getNode());
-          if (ConnectedSet.count(&ChildRC))
+          if (MergeSet.count(&ChildRC))
             continue;
-          ChildRC.Parents.erase(C);
+          ChildRC.Parents.erase(RC);
           ChildRC.Parents.insert(this);
         }
       }
@@ -871,33 +1018,37 @@ LazyCallGraph::RefSCC::insertIncomingRefEdge(Node &SourceN, Node &TargetN) {
     // Now merge in the SCCs. We can actually move here so try to reuse storage
     // the first time through.
     if (MergedSCCs.empty())
-      MergedSCCs = std::move(C->SCCs);
+      MergedSCCs = std::move(RC->SCCs);
     else
-      MergedSCCs.append(C->SCCs.begin(), C->SCCs.end());
-    C->SCCs.clear();
+      MergedSCCs.append(RC->SCCs.begin(), RC->SCCs.end());
+    RC->SCCs.clear();
+    DeletedRefSCCs.push_back(RC);
   }
 
-  // Finally append our original SCCs to the merged list and move it into
-  // place.
+  // Append our original SCCs to the merged list and move it into place.
   for (SCC &InnerC : *this)
     SCCIndices[&InnerC] = SCCIndex++;
   MergedSCCs.append(SCCs.begin(), SCCs.end());
   SCCs = std::move(MergedSCCs);
 
+  // Remove the merged away RefSCCs from the post order sequence.
+  for (RefSCC *RC : MergeRange)
+    G->RefSCCIndices.erase(RC);
+  int IndexOffset = MergeRange.end() - MergeRange.begin();
+  auto EraseEnd =
+      G->PostOrderRefSCCs.erase(MergeRange.begin(), MergeRange.end());
+  for (RefSCC *RC : make_range(EraseEnd, G->PostOrderRefSCCs.end()))
+    G->RefSCCIndices[RC] -= IndexOffset;
+
   // At this point we have a merged RefSCC with a post-order SCCs list, just
   // connect the nodes to form the new edge.
   SourceN.insertEdgeInternal(TargetN, Edge::Ref);
 
-#ifndef NDEBUG
-  // Check that the RefSCC is still valid.
-  verify();
-#endif
-
   // We return the list of SCCs which were merged so that callers can
   // invalidate any data they have associated with those SCCs. Note that these
   // SCCs are no longer in an interesting state (they are totally empty) but
   // the pointers will remain stable for the life of the graph itself.
-  return Connected;
+  return DeletedRefSCCs;
 }
 
 void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
@@ -907,10 +1058,16 @@ void LazyCallGraph::RefSCC::removeOutgoingEdge(Node &SourceN, Node &TargetN) {
   RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
   assert(&TargetRC != this && "The target must not be a member of this RefSCC");
 
-  assert(std::find(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this) ==
-             G->LeafRefSCCs.end() &&
+  assert(!is_contained(G->LeafRefSCCs, this) &&
          "Cannot have a leaf RefSCC source.");
 
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
   // First remove it from the node.
   SourceN.removeEdgeInternal(TargetN.getFunction());
 
@@ -962,6 +1119,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   assert(!SourceN[TargetN].isCall() &&
          "Cannot remove a call edge, it must first be made a ref edge");
 
+#ifndef NDEBUG
+  // In a debug build, verify the RefSCC is valid to start with and when this
+  // routine finishes.
+  verify();
+  auto VerifyOnExit = make_scope_exit([&]() { verify(); });
+#endif
+
   // First remove the actual edge.
   SourceN.removeEdgeInternal(TargetN.getFunction());
 
@@ -972,6 +1136,13 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   if (&SourceN == &TargetN)
     return Result;
 
+  // If this ref edge is within an SCC then there are sufficient other edges to
+  // form a cycle without this edge so removing it is a no-op.
+  SCC &SourceC = *G->lookupSCC(SourceN);
+  SCC &TargetC = *G->lookupSCC(TargetN);
+  if (&SourceC == &TargetC)
+    return Result;
+
   // We build somewhat synthetic new RefSCCs by providing a postorder mapping
   // for each inner SCC. We also store these associated with *nodes* rather
   // than SCCs because this saves a round-trip through the node->SCC map and in
@@ -994,7 +1165,6 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   // and handle participants in that cycle without walking all the edges that
   // form the connections, and instead by relying on the fundamental guarantee
   // coming into this operation.
-  SCC &TargetC = *G->lookupSCC(TargetN);
   for (Node &N : TargetC)
     PostOrderMapping[&N] = RootPostOrderNumber;
 
@@ -1082,9 +1252,8 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
           }
 
           // If this child isn't currently in this RefSCC, no need to process
-          // it.
-          // However, we do need to remove this RefSCC from its RefSCC's parent
-          // set.
+          // it. However, we do need to remove this RefSCC from its RefSCC's
+          // parent set.
           RefSCC &ChildRC = *G->lookupRefSCC(ChildN);
           ChildRC.Parents.erase(this);
           ++I;
@@ -1121,10 +1290,9 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
       // root DFS number.
       auto RefSCCNodes = make_range(
           PendingRefSCCStack.rbegin(),
-          std::find_if(PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(),
-                       [RootDFSNumber](Node *N) {
-                         return N->DFSNumber < RootDFSNumber;
-                       }));
+          find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
+            return N->DFSNumber < RootDFSNumber;
+          }));
 
       // Mark the postorder number for these nodes and clear them off the
       // stack. We'll use the postorder number to pull them into RefSCCs at the
@@ -1149,6 +1317,25 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   for (int i = 1; i < PostOrderNumber; ++i)
     Result.push_back(G->createRefSCC(*G));
 
+  // Insert the resulting postorder sequence into the global graph postorder
+  // sequence before the current RefSCC in that sequence. The idea being that
+  // this RefSCC is the target of the reference edge removed, and thus has
+  // a direct or indirect edge to every other RefSCC formed and so must be at
+  // the end of any postorder traversal.
+  //
+  // FIXME: It'd be nice to change the APIs so that we returned an iterator
+  // range over the global postorder sequence and generally use that sequence
+  // rather than building a separate result vector here.
+  if (!Result.empty()) {
+    int Idx = G->getRefSCCIndex(*this);
+    G->PostOrderRefSCCs.insert(G->PostOrderRefSCCs.begin() + Idx,
+                               Result.begin(), Result.end());
+    for (int i : seq<int>(Idx, G->PostOrderRefSCCs.size()))
+      G->RefSCCIndices[G->PostOrderRefSCCs[i]] = i;
+    assert(G->PostOrderRefSCCs[G->getRefSCCIndex(*this)] == this &&
+           "Failed to update this RefSCC's index after insertion!");
+  }
+
   for (SCC *C : SCCs) {
     auto PostOrderI = PostOrderMapping.find(&*C->begin());
     assert(PostOrderI != PostOrderMapping.end() &&
@@ -1166,7 +1353,7 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
     RefSCC &RC = *Result[SCCNumber - 1];
     int SCCIndex = RC.SCCs.size();
     RC.SCCs.push_back(C);
-    SCCIndices[C] = SCCIndex;
+    RC.SCCIndices[C] = SCCIndex;
     C->OuterRefSCC = &RC;
   }
 
@@ -1178,12 +1365,15 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
     G->connectRefSCC(*RC);
 
   // Now erase all but the root's SCCs.
-  SCCs.erase(std::remove_if(SCCs.begin(), SCCs.end(),
-                            [&](SCC *C) {
-                              return PostOrderMapping.lookup(&*C->begin()) !=
-                                     RootPostOrderNumber;
-                            }),
+  SCCs.erase(remove_if(SCCs,
+                       [&](SCC *C) {
+                         return PostOrderMapping.lookup(&*C->begin()) !=
+                                RootPostOrderNumber;
+                       }),
              SCCs.end());
+  SCCIndices.clear();
+  for (int i = 0, Size = SCCs.size(); i < Size; ++i)
+    SCCIndices[SCCs[i]] = i;
 
 #ifndef NDEBUG
   // Now we need to reconnect the current (root) SCC to the graph. We do this
@@ -1207,11 +1397,24 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
   if (!Result.empty())
     assert(!IsLeaf && "This SCC cannot be a leaf as we have split out new "
                       "SCCs by removing this edge.");
-  if (!std::any_of(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(),
-                   [&](RefSCC *C) { return C == this; }))
+  if (none_of(G->LeafRefSCCs, [&](RefSCC *C) { return C == this; }))
     assert(!IsLeaf && "This SCC cannot be a leaf as it already had child "
                       "SCCs before we removed this edge.");
 #endif
+  // And connect both this RefSCC and all the new ones to the correct parents.
+  // The easiest way to do this is just to re-analyze the old parent set.
+  SmallVector<RefSCC *, 4> OldParents(Parents.begin(), Parents.end());
+  Parents.clear();
+  for (RefSCC *ParentRC : OldParents)
+    for (SCC &ParentC : *ParentRC)
+      for (Node &ParentN : ParentC)
+        for (Edge &E : ParentN) {
+          assert(E.getNode() && "Cannot have a missing node in a visited SCC!");
+          RefSCC &RC = *G->lookupRefSCC(*E.getNode());
+          if (&RC != ParentRC)
+            RC.Parents.insert(ParentRC);
+        }
+
   // If this SCC stopped being a leaf through this edge removal, remove it from
   // the leaf SCC list. Note that this DTRT in the case where this was never
   // a leaf.
@@ -1222,10 +1425,93 @@ LazyCallGraph::RefSCC::removeInternalRefEdge(Node &SourceN, Node &TargetN) {
         std::remove(G->LeafRefSCCs.begin(), G->LeafRefSCCs.end(), this),
         G->LeafRefSCCs.end());
 
+#ifndef NDEBUG
+  // Verify all of the new RefSCCs.
+  for (RefSCC *RC : Result)
+    RC->verify();
+#endif
+
   // Return the new list of SCCs.
   return Result;
 }
 
+void LazyCallGraph::RefSCC::handleTrivialEdgeInsertion(Node &SourceN,
+                                                       Node &TargetN) {
+  // The only trivial case that requires any graph updates is when we add new
+  // ref edge and may connect different RefSCCs along that path. This is only
+  // because of the parents set. Every other part of the graph remains constant
+  // after this edge insertion.
+  assert(G->lookupRefSCC(SourceN) == this && "Source must be in this RefSCC.");
+  RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
+  if (&TargetRC == this) {
+
+    return;
+  }
+
+  assert(TargetRC.isDescendantOf(*this) &&
+         "Target must be a descendant of the Source.");
+  // The only change required is to add this RefSCC to the parent set of the
+  // target. This is a set and so idempotent if the edge already existed.
+  TargetRC.Parents.insert(this);
+}
+
+void LazyCallGraph::RefSCC::insertTrivialCallEdge(Node &SourceN,
+                                                  Node &TargetN) {
+#ifndef NDEBUG
+  // Check that the RefSCC is still valid when we finish.
+  auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+  // Check that we aren't breaking some invariants of the SCC graph.
+  SCC &SourceC = *G->lookupSCC(SourceN);
+  SCC &TargetC = *G->lookupSCC(TargetN);
+  if (&SourceC != &TargetC)
+    assert(SourceC.isAncestorOf(TargetC) &&
+           "Call edge is not trivial in the SCC graph!");
+#endif
+  // First insert it into the source or find the existing edge.
+  auto InsertResult = SourceN.EdgeIndexMap.insert(
+      {&TargetN.getFunction(), SourceN.Edges.size()});
+  if (!InsertResult.second) {
+    // Already an edge, just update it.
+    Edge &E = SourceN.Edges[InsertResult.first->second];
+    if (E.isCall())
+      return; // Nothing to do!
+    E.setKind(Edge::Call);
+  } else {
+    // Create the new edge.
+    SourceN.Edges.emplace_back(TargetN, Edge::Call);
+  }
+
+  // Now that we have the edge, handle the graph fallout.
+  handleTrivialEdgeInsertion(SourceN, TargetN);
+}
+
+void LazyCallGraph::RefSCC::insertTrivialRefEdge(Node &SourceN, Node &TargetN) {
+#ifndef NDEBUG
+  // Check that the RefSCC is still valid when we finish.
+  auto ExitVerifier = make_scope_exit([this] { verify(); });
+
+  // Check that we aren't breaking some invariants of the RefSCC graph.
+  RefSCC &SourceRC = *G->lookupRefSCC(SourceN);
+  RefSCC &TargetRC = *G->lookupRefSCC(TargetN);
+  if (&SourceRC != &TargetRC)
+    assert(SourceRC.isAncestorOf(TargetRC) &&
+           "Ref edge is not trivial in the RefSCC graph!");
+#endif
+  // First insert it into the source or find the existing edge.
+  auto InsertResult = SourceN.EdgeIndexMap.insert(
+      {&TargetN.getFunction(), SourceN.Edges.size()});
+  if (!InsertResult.second)
+    // Already an edge, we're done.
+    return;
+
+  // Create the new edge.
+  SourceN.Edges.emplace_back(TargetN, Edge::Ref);
+
+  // Now that we have the edge, handle the graph fallout.
+  handleTrivialEdgeInsertion(SourceN, TargetN);
+}
+
 void LazyCallGraph::insertEdge(Node &SourceN, Function &Target, Edge::Kind EK) {
   assert(SCCMap.empty() && DFSStack.empty() &&
          "This method cannot be called after SCCs have been formed!");
@@ -1240,6 +1526,93 @@ void LazyCallGraph::removeEdge(Node &SourceN, Function &Target) {
   return SourceN.removeEdgeInternal(Target);
 }
 
+void LazyCallGraph::removeDeadFunction(Function &F) {
+  // FIXME: This is unnecessarily restrictive. We should be able to remove
+  // functions which recursively call themselves.
+  assert(F.use_empty() &&
+         "This routine should only be called on trivially dead functions!");
+
+  auto EII = EntryIndexMap.find(&F);
+  if (EII != EntryIndexMap.end()) {
+    EntryEdges[EII->second] = Edge();
+    EntryIndexMap.erase(EII);
+  }
+
+  // It's safe to just remove un-visited functions from the RefSCC entry list.
+  // FIXME: This is a linear operation which could become hot and benefit from
+  // an index map.
+  auto RENI = find(RefSCCEntryNodes, &F);
+  if (RENI != RefSCCEntryNodes.end())
+    RefSCCEntryNodes.erase(RENI);
+
+  auto NI = NodeMap.find(&F);
+  if (NI == NodeMap.end())
+    // Not in the graph at all!
+    return;
+
+  Node &N = *NI->second;
+  NodeMap.erase(NI);
+
+  if (SCCMap.empty() && DFSStack.empty()) {
+    // No SCC walk has begun, so removing this is fine and there is nothing
+    // else necessary at this point but clearing out the node.
+    N.clear();
+    return;
+  }
+
+  // Check that we aren't going to break the DFS walk.
+  assert(all_of(DFSStack,
+                [&N](const std::pair<Node *, edge_iterator> &Element) {
+                  return Element.first != &N;
+                }) &&
+         "Tried to remove a function currently in the DFS stack!");
+  assert(find(PendingRefSCCStack, &N) == PendingRefSCCStack.end() &&
+         "Tried to remove a function currently pending to add to a RefSCC!");
+
+  // Cannot remove a function which has yet to be visited in the DFS walk, so
+  // if we have a node at all then we must have an SCC and RefSCC.
+  auto CI = SCCMap.find(&N);
+  assert(CI != SCCMap.end() &&
+         "Tried to remove a node without an SCC after DFS walk started!");
+  SCC &C = *CI->second;
+  SCCMap.erase(CI);
+  RefSCC &RC = C.getOuterRefSCC();
+
+  // This node must be the only member of its SCC as it has no callers, and
+  // that SCC must be the only member of a RefSCC as it has no references.
+  // Validate these properties first.
+  assert(C.size() == 1 && "Dead functions must be in a singular SCC");
+  assert(RC.size() == 1 && "Dead functions must be in a singular RefSCC");
+  assert(RC.Parents.empty() && "Cannot have parents of a dead RefSCC!");
+
+  // Now remove this RefSCC from any parents sets and the leaf list.
+  for (Edge &E : N)
+    if (Node *TargetN = E.getNode())
+      if (RefSCC *TargetRC = lookupRefSCC(*TargetN))
+        TargetRC->Parents.erase(&RC);
+  // FIXME: This is a linear operation which could become hot and benefit from
+  // an index map.
+  auto LRI = find(LeafRefSCCs, &RC);
+  if (LRI != LeafRefSCCs.end())
+    LeafRefSCCs.erase(LRI);
+
+  auto RCIndexI = RefSCCIndices.find(&RC);
+  int RCIndex = RCIndexI->second;
+  PostOrderRefSCCs.erase(PostOrderRefSCCs.begin() + RCIndex);
+  RefSCCIndices.erase(RCIndexI);
+  for (int i = RCIndex, Size = PostOrderRefSCCs.size(); i < Size; ++i)
+    RefSCCIndices[PostOrderRefSCCs[i]] = i;
+
+  // Finally clear out all the data structures from the node down through the
+  // components.
+  N.clear();
+  C.clear();
+  RC.clear();
+
+  // Nothing to delete as all the objects are allocated in stable bump pointer
+  // allocators.
+}
+
 LazyCallGraph::Node &LazyCallGraph::insertInto(Function &F, Node *&MappedN) {
   return *new (MappedN = BPA.Allocate()) Node(*this, F);
 }
@@ -1372,10 +1745,9 @@ void LazyCallGraph::buildSCCs(RefSCC &RC, node_stack_range Nodes) {
       // root DFS number.
       auto SCCNodes = make_range(
           PendingSCCStack.rbegin(),
-          std::find_if(PendingSCCStack.rbegin(), PendingSCCStack.rend(),
-                       [RootDFSNumber](Node *N) {
-                         return N->DFSNumber < RootDFSNumber;
-                       }));
+          find_if(reverse(PendingSCCStack), [RootDFSNumber](const Node *N) {
+            return N->DFSNumber < RootDFSNumber;
+          }));
       // Form a new SCC out of these nodes and then clear them off our pending
       // stack.
       RC.SCCs.push_back(createSCC(RC, SCCNodes));
@@ -1411,19 +1783,19 @@ void LazyCallGraph::connectRefSCC(RefSCC &RC) {
         IsLeaf = false;
       }
 
-  // For the SCCs where we fine no child SCCs, add them to the leaf list.
+  // For the SCCs where we find no child SCCs, add them to the leaf list.
   if (IsLeaf)
     LeafRefSCCs.push_back(&RC);
 }
 
-LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
+bool LazyCallGraph::buildNextRefSCCInPostOrder() {
   if (DFSStack.empty()) {
     Node *N;
     do {
       // If we've handled all candidate entry nodes to the SCC forest, we're
       // done.
       if (RefSCCEntryNodes.empty())
-        return nullptr;
+        return false;
 
       N = &get(*RefSCCEntryNodes.pop_back_val());
     } while (N->DFSNumber != 0);
@@ -1494,9 +1866,9 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
     // root DFS number.
     auto RefSCCNodes = node_stack_range(
         PendingRefSCCStack.rbegin(),
-        std::find_if(
-            PendingRefSCCStack.rbegin(), PendingRefSCCStack.rend(),
-            [RootDFSNumber](Node *N) { return N->DFSNumber < RootDFSNumber; }));
+        find_if(reverse(PendingRefSCCStack), [RootDFSNumber](const Node *N) {
+          return N->DFSNumber < RootDFSNumber;
+        }));
     // Form a new RefSCC out of these nodes and then clear them off our pending
     // stack.
     RefSCC *NewRC = createRefSCC(*this);
@@ -1505,13 +1877,18 @@ LazyCallGraph::RefSCC *LazyCallGraph::getNextRefSCCInPostOrder() {
     PendingRefSCCStack.erase(RefSCCNodes.end().base(),
                              PendingRefSCCStack.end());
 
-    // We return the new node here. This essentially suspends the DFS walk
-    // until another RefSCC is requested.
-    return NewRC;
+    // Push the new node into the postorder list and return true indicating we
+    // successfully grew the postorder sequence by one.
+    bool Inserted =
+        RefSCCIndices.insert({NewRC, PostOrderRefSCCs.size()}).second;
+    (void)Inserted;
+    assert(Inserted && "Cannot already have this RefSCC in the index map!");
+    PostOrderRefSCCs.push_back(NewRC);
+    return true;
   }
 }
 
-char LazyCallGraphAnalysis::PassID;
+AnalysisKey LazyCallGraphAnalysis::Key;
 
 LazyCallGraphPrinterPass::LazyCallGraphPrinterPass(raw_ostream &OS) : OS(OS) {}
 
diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
index 3ce667f08037..4f6355236873 100644
--- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/ValueHandle.h"
@@ -50,7 +51,7 @@ namespace llvm {
   FunctionPass *createLazyValueInfoPass() { return new LazyValueInfoWrapperPass(); }
 }
 
-char LazyValueAnalysis::PassID;
+AnalysisKey LazyValueAnalysis::Key;
 
 //===----------------------------------------------------------------------===//
 //                               LVILatticeVal
@@ -70,12 +71,14 @@ class LVILatticeVal {
     /// "nothing known yet".
     undefined,
 
-    /// This Value has a specific constant value.  (For integers, constantrange
-    /// is used instead.)
+    /// This Value has a specific constant value.  (For constant integers,
+    /// constantrange is used instead.  Integer typed constantexprs can appear
+    /// as constant.) 
     constant,
 
-    /// This Value is known to not have the specified value.  (For integers,
-    /// constantrange is used instead.)
+    /// This Value is known to not have the specified value.  (For constant
+    /// integers, constantrange is used instead.  As above, integer typed
+    /// constantexprs can appear here.)
     notconstant,
 
     /// The Value falls within this range. (Used only for integer typed values.)
@@ -139,37 +142,37 @@ public:
     return Range;
   }
 
-  /// Return true if this is a change in status.
-  bool markOverdefined() {
+private:
+  void markOverdefined() {
     if (isOverdefined())
-      return false;
+      return;
     Tag = overdefined;
-    return true;
   }
 
-  /// Return true if this is a change in status.
-  bool markConstant(Constant *V) {
+  void markConstant(Constant *V) {
     assert(V && "Marking constant with NULL");
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-      return markConstantRange(ConstantRange(CI->getValue()));
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      markConstantRange(ConstantRange(CI->getValue()));
+      return;
+    }
     if (isa<UndefValue>(V))
-      return false;
+      return;
 
     assert((!isConstant() || getConstant() == V) &&
            "Marking constant with different value");
     assert(isUndefined());
     Tag = constant;
     Val = V;
-    return true;
   }
 
-  /// Return true if this is a change in status.
-  bool markNotConstant(Constant *V) {
+  void markNotConstant(Constant *V) {
     assert(V && "Marking constant with NULL");
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(V))
-      return markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+      markConstantRange(ConstantRange(CI->getValue()+1, CI->getValue()));
+      return;
+    }
     if (isa<UndefValue>(V))
-      return false;
+      return;
 
     assert((!isConstant() || getConstant() != V) &&
            "Marking constant !constant with same value");
@@ -178,100 +181,70 @@ public:
     assert(isUndefined() || isConstant());
     Tag = notconstant;
     Val = V;
-    return true;
   }
 
-  /// Return true if this is a change in status.
-  bool markConstantRange(ConstantRange NewR) {
+  void markConstantRange(ConstantRange NewR) {
     if (isConstantRange()) {
       if (NewR.isEmptySet())
-        return markOverdefined();
-
-      bool changed = Range != NewR;
-      Range = std::move(NewR);
-      return changed;
+        markOverdefined();
+      else {
+        Range = std::move(NewR);
+      }
+      return;
     }
 
     assert(isUndefined());
     if (NewR.isEmptySet())
-      return markOverdefined();
-
-    Tag = constantrange;
-    Range = std::move(NewR);
-    return true;
+      markOverdefined();
+    else {
+      Tag = constantrange;
+      Range = std::move(NewR);
+    }
   }
 
+public:
+
   /// Merge the specified lattice value into this one, updating this
   /// one and returning true if anything changed.
-  bool mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
-    if (RHS.isUndefined() || isOverdefined()) return false;
-    if (RHS.isOverdefined()) return markOverdefined();
+  void mergeIn(const LVILatticeVal &RHS, const DataLayout &DL) {
+    if (RHS.isUndefined() || isOverdefined())
+      return;
+    if (RHS.isOverdefined()) {
+      markOverdefined();
+      return;
+    }
 
     if (isUndefined()) {
-      Tag = RHS.Tag;
-      Val = RHS.Val;
-      Range = RHS.Range;
-      return true;
+      *this = RHS;
+      return;
     }
 
     if (isConstant()) {
-      if (RHS.isConstant()) {
-        if (Val == RHS.Val)
-          return false;
-        return markOverdefined();
-      }
-
-      if (RHS.isNotConstant()) {
-        if (Val == RHS.Val)
-          return markOverdefined();
-
-        // Unless we can prove that the two Constants are different, we must
-        // move to overdefined.
-        if (ConstantInt *Res =
-                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
-                    CmpInst::ICMP_NE, getConstant(), RHS.getNotConstant(), DL)))
-          if (Res->isOne())
-            return markNotConstant(RHS.getNotConstant());
-
-        return markOverdefined();
-      }
-
-      return markOverdefined();
+      if (RHS.isConstant() && Val == RHS.Val)
+          return;
+      markOverdefined();
+      return;
     }
 
     if (isNotConstant()) {
-      if (RHS.isConstant()) {
-        if (Val == RHS.Val)
-          return markOverdefined();
-
-        // Unless we can prove that the two Constants are different, we must
-        // move to overdefined.
-        if (ConstantInt *Res =
-                dyn_cast<ConstantInt>(ConstantFoldCompareInstOperands(
-                    CmpInst::ICMP_NE, getNotConstant(), RHS.getConstant(), DL)))
-          if (Res->isOne())
-            return false;
-
-        return markOverdefined();
-      }
-
-      if (RHS.isNotConstant()) {
-        if (Val == RHS.Val)
-          return false;
-        return markOverdefined();
-      }
-
-      return markOverdefined();
+      if (RHS.isNotConstant() && Val == RHS.Val)
+          return;
+      markOverdefined();
+      return;
     }
 
     assert(isConstantRange() && "New LVILattice type?");
-    if (!RHS.isConstantRange())
-      return markOverdefined();
-
+    if (!RHS.isConstantRange()) {
+      // We can get here if we've encountered a constantexpr of integer type
+      // and merge it with a constantrange.
+      markOverdefined();
+      return;
+    }
     ConstantRange NewR = Range.unionWith(RHS.getConstantRange());
     if (NewR.isFullSet())
-      return markOverdefined();
-    return markConstantRange(NewR);
+      markOverdefined();
+    else
+      markConstantRange(NewR);
   }
 };
 
@@ -366,6 +339,9 @@ namespace {
   /// A callback value handle updates the cache when values are erased.
   class LazyValueInfoCache;
   struct LVIValueHandle final : public CallbackVH {
+    // Needs to access getValPtr(), which is protected.
+    friend struct DenseMapInfo<LVIValueHandle>;
+
     LazyValueInfoCache *Parent;
 
     LVIValueHandle(Value *V, LazyValueInfoCache *P)
@@ -376,7 +352,7 @@ namespace {
       deleted();
     }
   };
-}
+} // end anonymous namespace
 
 namespace {
   /// This is the cache kept by LazyValueInfo which
@@ -387,12 +363,15 @@ namespace {
     /// entries, allowing us to do a lookup with a binary search.
     /// Over-defined lattice values are recorded in OverDefinedCache to reduce
     /// memory overhead.
-    typedef SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4>
-        ValueCacheEntryTy;
+    struct ValueCacheEntryTy {
+      ValueCacheEntryTy(Value *V, LazyValueInfoCache *P) : Handle(V, P) {}
+      LVIValueHandle Handle;
+      SmallDenseMap<AssertingVH<BasicBlock>, LVILatticeVal, 4> BlockVals;
+    };
 
     /// This is all of the cached information for all values,
     /// mapped from Value* to key information.
-    std::map<LVIValueHandle, ValueCacheEntryTy> ValueCache;
+    DenseMap<Value *, std::unique_ptr<ValueCacheEntryTy>> ValueCache;
 
     /// This tracks, on a per-block basis, the set of values that are
     /// over-defined at the end of that block.
@@ -404,6 +383,183 @@ namespace {
     /// don't spend time removing unused blocks from our caches.
     DenseSet<AssertingVH<BasicBlock> > SeenBlocks;
 
+  public:
+    void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
+      SeenBlocks.insert(BB);
+
+      // Insert over-defined values into their own cache to reduce memory
+      // overhead.
+      if (Result.isOverdefined())
+        OverDefinedCache[BB].insert(Val);
+      else {
+        auto It = ValueCache.find_as(Val);
+        if (It == ValueCache.end()) {
+          ValueCache[Val] = make_unique<ValueCacheEntryTy>(Val, this);
+          It = ValueCache.find_as(Val);
+          assert(It != ValueCache.end() && "Val was just added to the map!");
+        }
+        It->second->BlockVals[BB] = Result;
+      }
+    }
+
+    bool isOverdefined(Value *V, BasicBlock *BB) const {
+      auto ODI = OverDefinedCache.find(BB);
+
+      if (ODI == OverDefinedCache.end())
+        return false;
+
+      return ODI->second.count(V);
+    }
+
+    bool hasCachedValueInfo(Value *V, BasicBlock *BB) const {
+      if (isOverdefined(V, BB))
+        return true;
+
+      auto I = ValueCache.find_as(V);
+      if (I == ValueCache.end())
+        return false;
+
+      return I->second->BlockVals.count(BB);
+    }
+
+    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) const {
+      if (isOverdefined(V, BB))
+        return LVILatticeVal::getOverdefined();
+
+      auto I = ValueCache.find_as(V);
+      if (I == ValueCache.end())
+        return LVILatticeVal();
+      auto BBI = I->second->BlockVals.find(BB);
+      if (BBI == I->second->BlockVals.end())
+        return LVILatticeVal();
+      return BBI->second;
+    }
+
+    /// clear - Empty the cache.
+    void clear() {
+      SeenBlocks.clear();
+      ValueCache.clear();
+      OverDefinedCache.clear();
+    }
+
+    /// Inform the cache that a given value has been deleted.
+    void eraseValue(Value *V);
+
+    /// This is part of the update interface to inform the cache
+    /// that a block has been deleted.
+    void eraseBlock(BasicBlock *BB);
+
+    /// Updates the cache to remove any influence an overdefined value in
+    /// OldSucc might have (unless also overdefined in NewSucc).  This just
+    /// flushes elements from the cache and does not add any.
+    void threadEdgeImpl(BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+    friend struct LVIValueHandle;
+  };
+}
+
+void LazyValueInfoCache::eraseValue(Value *V) {
+  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
+  for (auto &I : OverDefinedCache) {
+    SmallPtrSetImpl<Value *> &ValueSet = I.second;
+    ValueSet.erase(V);
+    if (ValueSet.empty())
+      ToErase.push_back(I.first);
+  }
+  for (auto &BB : ToErase)
+    OverDefinedCache.erase(BB);
+
+  ValueCache.erase(V);
+}
+
+void LVIValueHandle::deleted() {
+  // This erasure deallocates *this, so it MUST happen after we're done
+  // using any and all members of *this.
+  Parent->eraseValue(*this);
+}
+
+void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
+  // Shortcut if we have never seen this block.
+  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
+  if (I == SeenBlocks.end())
+    return;
+  SeenBlocks.erase(I);
+
+  auto ODI = OverDefinedCache.find(BB);
+  if (ODI != OverDefinedCache.end())
+    OverDefinedCache.erase(ODI);
+
+  for (auto &I : ValueCache)
+    I.second->BlockVals.erase(BB);
+}
+
+void LazyValueInfoCache::threadEdgeImpl(BasicBlock *OldSucc,
+                                        BasicBlock *NewSucc) {
+  // When an edge in the graph has been threaded, values that we could not
+  // determine a value for before (i.e. were marked overdefined) may be
+  // possible to solve now. We do NOT try to proactively update these values.
+  // Instead, we clear their entries from the cache, and allow lazy updating to
+  // recompute them when needed.
+
+  // The updating process is fairly simple: we need to drop cached info
+  // for all values that were marked overdefined in OldSucc, and for those same
+  // values in any successor of OldSucc (except NewSucc) in which they were
+  // also marked overdefined.
+  std::vector<BasicBlock*> worklist;
+  worklist.push_back(OldSucc);
+
+  auto I = OverDefinedCache.find(OldSucc);
+  if (I == OverDefinedCache.end())
+    return; // Nothing to process here.
+  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
+
+  // Use a worklist to perform a depth-first search of OldSucc's successors.
+  // NOTE: We do not need a visited list since any blocks we have already
+  // visited will have had their overdefined markers cleared already, and we
+  // thus won't loop to their successors.
+  while (!worklist.empty()) {
+    BasicBlock *ToUpdate = worklist.back();
+    worklist.pop_back();
+
+    // Skip blocks only accessible through NewSucc.
+    if (ToUpdate == NewSucc) continue;
+
+    // If a value was marked overdefined in OldSucc, and is here too...
+    auto OI = OverDefinedCache.find(ToUpdate);
+    if (OI == OverDefinedCache.end())
+      continue;
+    SmallPtrSetImpl<Value *> &ValueSet = OI->second;
+
+    bool changed = false;
+    for (Value *V : ValsToClear) {
+      if (!ValueSet.erase(V))
+        continue;
+
+      // If we removed anything, then we potentially need to update
+      // blocks successors too.
+      changed = true;
+
+      if (ValueSet.empty()) {
+        OverDefinedCache.erase(OI);
+        break;
+      }
+    }
+
+    if (!changed) continue;
+
+    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
+  }
+}
+
+namespace {
+  // The actual implementation of the lazy analysis and update.  Note that the
+  // inheritance from LazyValueInfoCache is intended to be temporary while
+  // splitting the code and then transitioning to a has-a relationship.
+  class LazyValueInfoImpl {
+
+    /// Cached results from previous queries
+    LazyValueInfoCache TheCache;
+
     /// This stack holds the state of the value solver during a query.
     /// It basically emulates the callstack of the naive
     /// recursive value lookup process.
@@ -428,19 +584,6 @@ namespace {
     const DataLayout &DL; ///< A mandatory DataLayout
     DominatorTree *DT;    ///< An optional DT pointer.
 
-    friend struct LVIValueHandle;
-
-    void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) {
-      SeenBlocks.insert(BB);
-
-      // Insert over-defined values into their own cache to reduce memory
-      // overhead.
-      if (Result.isOverdefined())
-        OverDefinedCache[BB].insert(Val);
-      else
-        lookup(Val)[BB] = Result;
-    }
-
   LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB);
   bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
                     LVILatticeVal &Result, Instruction *CxtI = nullptr);
@@ -450,6 +593,7 @@ namespace {
   // returned means that the work item was not completely processed and must
   // be revisited after going through the new items.
   bool solveBlockValue(Value *Val, BasicBlock *BB);
+  bool solveBlockValueImpl(LVILatticeVal &Res, Value *Val, BasicBlock *BB);
   bool solveBlockValueNonLocal(LVILatticeVal &BBLV, Value *Val, BasicBlock *BB);
   bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
   bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
@@ -458,43 +602,12 @@ namespace {
                                BasicBlock *BB);
   bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI,
                            BasicBlock *BB);
-  void intersectAssumeBlockValueConstantRange(Value *Val, LVILatticeVal &BBLV,
+  void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
+                                                     LVILatticeVal &BBLV,
                                               Instruction *BBI);
 
   void solve();
 
-  ValueCacheEntryTy &lookup(Value *V) {
-    return ValueCache[LVIValueHandle(V, this)];
-  }
-
-    bool isOverdefined(Value *V, BasicBlock *BB) const {
-      auto ODI = OverDefinedCache.find(BB);
-
-      if (ODI == OverDefinedCache.end())
-        return false;
-
-      return ODI->second.count(V);
-    }
-
-    bool hasCachedValueInfo(Value *V, BasicBlock *BB) {
-      if (isOverdefined(V, BB))
-        return true;
-
-      LVIValueHandle ValHandle(V, this);
-      auto I = ValueCache.find(ValHandle);
-      if (I == ValueCache.end())
-        return false;
-
-      return I->second.count(BB);
-    }
-
-    LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) {
-      if (isOverdefined(V, BB))
-        return LVILatticeVal::getOverdefined();
-
-      return lookup(V)[BB];
-    }
-
   public:
     /// This is the query interface to determine the lattice
     /// value for the specified Value* at the end of the specified block.
@@ -511,60 +624,28 @@ namespace {
     LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB,
                                  Instruction *CxtI = nullptr);
 
-    /// This is the update interface to inform the cache that an edge from
-    /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
-    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+    /// Complete flush all previously computed values
+    void clear() {
+      TheCache.clear();
+    }
 
     /// This is part of the update interface to inform the cache
     /// that a block has been deleted.
-    void eraseBlock(BasicBlock *BB);
-
-    /// clear - Empty the cache.
-    void clear() {
-      SeenBlocks.clear();
-      ValueCache.clear();
-      OverDefinedCache.clear();
+    void eraseBlock(BasicBlock *BB) {
+      TheCache.eraseBlock(BB);
     }
 
-    LazyValueInfoCache(AssumptionCache *AC, const DataLayout &DL,
+    /// This is the update interface to inform the cache that an edge from
+    /// PredBB to OldSucc has been threaded to be from PredBB to NewSucc.
+    void threadEdge(BasicBlock *PredBB,BasicBlock *OldSucc,BasicBlock *NewSucc);
+
+    LazyValueInfoImpl(AssumptionCache *AC, const DataLayout &DL,
                        DominatorTree *DT = nullptr)
         : AC(AC), DL(DL), DT(DT) {}
   };
 } // end anonymous namespace
 
-void LVIValueHandle::deleted() {
-  SmallVector<AssertingVH<BasicBlock>, 4> ToErase;
-  for (auto &I : Parent->OverDefinedCache) {
-    SmallPtrSetImpl<Value *> &ValueSet = I.second;
-    if (ValueSet.count(getValPtr()))
-      ValueSet.erase(getValPtr());
-    if (ValueSet.empty())
-      ToErase.push_back(I.first);
-  }
-  for (auto &BB : ToErase)
-    Parent->OverDefinedCache.erase(BB);
-
-  // This erasure deallocates *this, so it MUST happen after we're done
-  // using any and all members of *this.
-  Parent->ValueCache.erase(*this);
-}
-
-void LazyValueInfoCache::eraseBlock(BasicBlock *BB) {
-  // Shortcut if we have never seen this block.
-  DenseSet<AssertingVH<BasicBlock> >::iterator I = SeenBlocks.find(BB);
-  if (I == SeenBlocks.end())
-    return;
-  SeenBlocks.erase(I);
-
-  auto ODI = OverDefinedCache.find(BB);
-  if (ODI != OverDefinedCache.end())
-    OverDefinedCache.erase(ODI);
-
-  for (auto &I : ValueCache)
-    I.second.erase(BB);
-}
-
-void LazyValueInfoCache::solve() {
+void LazyValueInfoImpl::solve() {
   while (!BlockValueStack.empty()) {
     std::pair<BasicBlock*, Value*> &e = BlockValueStack.top();
     assert(BlockValueSet.count(e) && "Stack value should be in BlockValueSet!");
@@ -572,11 +653,11 @@ void LazyValueInfoCache::solve() {
     if (solveBlockValue(e.second, e.first)) {
       // The work item was completely processed.
       assert(BlockValueStack.top() == e && "Nothing should have been pushed!");
-      assert(hasCachedValueInfo(e.second, e.first) &&
+      assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
              "Result should be in cache!");
 
       DEBUG(dbgs() << "POP " << *e.second << " in " << e.first->getName()
-                   << " = " << getCachedValueInfo(e.second, e.first) << "\n");
+                   << " = " << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
 
       BlockValueStack.pop();
       BlockValueSet.erase(e);
@@ -587,21 +668,20 @@ void LazyValueInfoCache::solve() {
   }
 }
 
-bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) {
   // If already a constant, there is nothing to compute.
   if (isa<Constant>(Val))
     return true;
 
-  return hasCachedValueInfo(Val, BB);
+  return TheCache.hasCachedValueInfo(Val, BB);
 }
 
-LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) {
+LVILatticeVal LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB) {
   // If already a constant, there is nothing to compute.
   if (Constant *VC = dyn_cast<Constant>(Val))
     return LVILatticeVal::get(VC);
 
-  SeenBlocks.insert(BB);
-  return getCachedValueInfo(Val, BB);
+  return TheCache.getCachedValueInfo(Val, BB);
 }
 
 static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
@@ -610,7 +690,7 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
   case Instruction::Load:
   case Instruction::Call:
   case Instruction::Invoke:
-    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) 
+    if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range))
       if (isa<IntegerType>(BBI->getType())) {
         return LVILatticeVal::getRange(getConstantRangeFromMetadata(*Ranges));
       }
@@ -620,14 +700,14 @@ static LVILatticeVal getFromRangeMetadata(Instruction *BBI) {
   return LVILatticeVal::getOverdefined();
 }
 
-bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
   if (isa<Constant>(Val))
     return true;
 
-  if (hasCachedValueInfo(Val, BB)) {
+  if (TheCache.hasCachedValueInfo(Val, BB)) {
     // If we have a cached value, use that.
     DEBUG(dbgs() << "  reuse BB '" << BB->getName()
-                 << "' val=" << getCachedValueInfo(Val, BB) << '\n');
+                 << "' val=" << TheCache.getCachedValueInfo(Val, BB) << '\n');
 
     // Since we're reusing a cached value, we don't need to update the
     // OverDefinedCache. The cache will have been properly updated whenever the
@@ -638,28 +718,26 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
   // Hold off inserting this value into the Cache in case we have to return
   // false and come back later.
   LVILatticeVal Res;
+  if (!solveBlockValueImpl(Res, Val, BB))
+    // Work pushed, will revisit
+    return false;
+
+  TheCache.insertResult(Val, BB, Res);
+  return true;
+}
+
+bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
+                                            Value *Val, BasicBlock *BB) {
 
   Instruction *BBI = dyn_cast<Instruction>(Val);
-  if (!BBI || BBI->getParent() != BB) {
-    if (!solveBlockValueNonLocal(Res, Val, BB))
-      return false;
-   insertResult(Val, BB, Res);
-   return true;
-  }
+  if (!BBI || BBI->getParent() != BB)
+    return solveBlockValueNonLocal(Res, Val, BB);
 
-  if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
-    if (!solveBlockValuePHINode(Res, PN, BB))
-      return false;
-    insertResult(Val, BB, Res);
-    return true;
-  }
+  if (PHINode *PN = dyn_cast<PHINode>(BBI))
+    return solveBlockValuePHINode(Res, PN, BB);
 
-  if (auto *SI = dyn_cast<SelectInst>(BBI)) {
-    if (!solveBlockValueSelect(Res, SI, BB))
-      return false;
-    insertResult(Val, BB, Res);
-    return true;
-  }
+  if (auto *SI = dyn_cast<SelectInst>(BBI))
+    return solveBlockValueSelect(Res, SI, BB);
 
   // If this value is a nonnull pointer, record it's range and bailout.  Note
   // that for all other pointer typed values, we terminate the search at the
@@ -673,29 +751,20 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) {
   PointerType *PT = dyn_cast<PointerType>(BBI->getType());
   if (PT && isKnownNonNull(BBI)) {
     Res = LVILatticeVal::getNot(ConstantPointerNull::get(PT));
-    insertResult(Val, BB, Res);
     return true;
   }
   if (BBI->getType()->isIntegerTy()) {
-    if (isa<CastInst>(BBI)) {
-      if (!solveBlockValueCast(Res, BBI, BB))
-        return false;
-      insertResult(Val, BB, Res);
-      return true;
-    }
+    if (isa<CastInst>(BBI))
+      return solveBlockValueCast(Res, BBI, BB);
+    
     BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
-    if (BO && isa<ConstantInt>(BO->getOperand(1))) { 
-      if (!solveBlockValueBinaryOp(Res, BBI, BB))
-        return false;
-      insertResult(Val, BB, Res);
-      return true;
-    }
+    if (BO && isa<ConstantInt>(BO->getOperand(1)))
+      return solveBlockValueBinaryOp(Res, BBI, BB);
   }
 
   DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - unknown inst def found.\n");
   Res = getFromRangeMetadata(BBI);
-  insertResult(Val, BB, Res);
   return true;
 }
 
@@ -748,7 +817,7 @@ static bool isObjectDereferencedInBlock(Value *Val, BasicBlock *BB) {
   return false;
 }
 
-bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueNonLocal(LVILatticeVal &BBLV,
                                                  Value *Val, BasicBlock *BB) {
   LVILatticeVal Result;  // Start Undefined.
 
@@ -763,7 +832,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
       PointerType *PTy = cast<PointerType>(Val->getType());
       Result = LVILatticeVal::getNot(ConstantPointerNull::get(PTy));
     } else {
-      Result.markOverdefined();
+      Result = LVILatticeVal::getOverdefined();
     }
     BBLV = Result;
     return true;
@@ -785,7 +854,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
     if (Result.isOverdefined()) {
       DEBUG(dbgs() << " compute BB '" << BB->getName()
             << "' - overdefined because of pred (non local).\n");
-      // Bofore giving up, see if we can prove the pointer non-null local to
+      // Before giving up, see if we can prove the pointer non-null local to
       // this particular block.
       if (Val->getType()->isPointerTy() &&
           isObjectDereferencedInBlock(Val, BB)) {
@@ -806,7 +875,7 @@ bool LazyValueInfoCache::solveBlockValueNonLocal(LVILatticeVal &BBLV,
   return true;
 }
 
-bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValuePHINode(LVILatticeVal &BBLV,
                                                 PHINode *PN, BasicBlock *BB) {
   LVILatticeVal Result;  // Start Undefined.
 
@@ -845,15 +914,13 @@ bool LazyValueInfoCache::solveBlockValuePHINode(LVILatticeVal &BBLV,
   return true;
 }
 
-static bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
-                                      LVILatticeVal &Result,
-                                      bool isTrueDest = true);
+static LVILatticeVal getValueFromCondition(Value *Val, Value *Cond,
+                                           bool isTrueDest = true);
 
 // If we can determine a constraint on the value given conditions assumed by
 // the program, intersect those constraints with BBLV
-void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val,
-                                                            LVILatticeVal &BBLV,
-                                                            Instruction *BBI) {
+void LazyValueInfoImpl::intersectAssumeOrGuardBlockValueConstantRange(
+        Value *Val, LVILatticeVal &BBLV, Instruction *BBI) {
   BBI = BBI ? BBI : dyn_cast<Instruction>(Val);
   if (!BBI)
     return;
@@ -865,44 +932,52 @@ void LazyValueInfoCache::intersectAssumeBlockValueConstantRange(Value *Val,
     if (!isValidAssumeForContext(I, BBI, DT))
       continue;
 
-    Value *C = I->getArgOperand(0);
-    if (ICmpInst *ICI = dyn_cast<ICmpInst>(C)) {
-      LVILatticeVal Result;
-      if (getValueFromFromCondition(Val, ICI, Result))
-        BBLV = intersect(BBLV, Result);
-    }
+    BBLV = intersect(BBLV, getValueFromCondition(Val, I->getArgOperand(0)));
+  }
+
+  // If guards are not used in the module, don't spend time looking for them
+  auto *GuardDecl = BBI->getModule()->getFunction(
+          Intrinsic::getName(Intrinsic::experimental_guard));
+  if (!GuardDecl || GuardDecl->use_empty())
+    return;
+
+  for (Instruction &I : make_range(BBI->getIterator().getReverse(),
+                                   BBI->getParent()->rend())) {
+    Value *Cond = nullptr;
+    if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(Cond))))
+      BBLV = intersect(BBLV, getValueFromCondition(Val, Cond));
   }
 }
 
-bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
                                                SelectInst *SI, BasicBlock *BB) {
 
   // Recurse on our inputs if needed
   if (!hasBlockValue(SI->getTrueValue(), BB)) {
     if (pushBlockValue(std::make_pair(BB, SI->getTrueValue())))
       return false;
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
   LVILatticeVal TrueVal = getBlockValue(SI->getTrueValue(), BB);
   // If we hit overdefined, don't ask more queries.  We want to avoid poisoning
   // extra slots in the table if we can.
   if (TrueVal.isOverdefined()) {
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
   if (!hasBlockValue(SI->getFalseValue(), BB)) {
     if (pushBlockValue(std::make_pair(BB, SI->getFalseValue())))
       return false;
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
   LVILatticeVal FalseVal = getBlockValue(SI->getFalseValue(), BB);
   // If we hit overdefined, don't ask more queries.  We want to avoid poisoning
   // extra slots in the table if we can.
   if (FalseVal.isOverdefined()) {
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
@@ -916,22 +991,22 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
     // ValueTracking getting smarter looking back past our immediate inputs.)
     if (SelectPatternResult::isMinOrMax(SPR.Flavor) &&
         LHS == SI->getTrueValue() && RHS == SI->getFalseValue()) {
-      switch (SPR.Flavor) {
-      default:
-        llvm_unreachable("unexpected minmax type!");
-      case SPF_SMIN:                   /// Signed minimum
-        BBLV.markConstantRange(TrueCR.smin(FalseCR));
-        return true;
-      case SPF_UMIN:                   /// Unsigned minimum
-        BBLV.markConstantRange(TrueCR.umin(FalseCR));
-        return true;
-      case SPF_SMAX:                   /// Signed maximum
-        BBLV.markConstantRange(TrueCR.smax(FalseCR));
-        return true;
-      case SPF_UMAX:                   /// Unsigned maximum
-        BBLV.markConstantRange(TrueCR.umax(FalseCR));
-        return true;
-      };
+      ConstantRange ResultCR = [&]() {
+        switch (SPR.Flavor) {
+        default:
+          llvm_unreachable("unexpected minmax type!");
+        case SPF_SMIN:                   /// Signed minimum
+          return TrueCR.smin(FalseCR);
+        case SPF_UMIN:                   /// Unsigned minimum
+          return TrueCR.umin(FalseCR);
+        case SPF_SMAX:                   /// Signed maximum
+          return TrueCR.smax(FalseCR);
+        case SPF_UMAX:                   /// Unsigned maximum
+          return TrueCR.umax(FalseCR);
+        };
+      }();
+      BBLV = LVILatticeVal::getRange(ResultCR);
+      return true;
     }
 
     // TODO: ABS, NABS from the SelectPatternResult
@@ -940,27 +1015,21 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
   // Can we constrain the facts about the true and false values by using the
   // condition itself?  This shows up with idioms like e.g. select(a > 5, a, 5).
   // TODO: We could potentially refine an overdefined true value above.
-  if (auto *ICI = dyn_cast<ICmpInst>(SI->getCondition())) {
-    LVILatticeVal TrueValTaken, FalseValTaken;
-    if (!getValueFromFromCondition(SI->getTrueValue(), ICI,
-                                   TrueValTaken, true))
-      TrueValTaken.markOverdefined();
-    if (!getValueFromFromCondition(SI->getFalseValue(), ICI,
-                                   FalseValTaken, false))
-      FalseValTaken.markOverdefined();
-
-    TrueVal = intersect(TrueVal, TrueValTaken);
-    FalseVal = intersect(FalseVal, FalseValTaken);
-
-
-    // Handle clamp idioms such as:
-    //   %24 = constantrange<0, 17>
-    //   %39 = icmp eq i32 %24, 0
-    //   %40 = add i32 %24, -1
-    //   %siv.next = select i1 %39, i32 16, i32 %40
-    //   %siv.next = constantrange<0, 17> not <-1, 17>
-    // In general, this can handle any clamp idiom which tests the edge
-    // condition via an equality or inequality.
+  Value *Cond = SI->getCondition();
+  TrueVal = intersect(TrueVal,
+                      getValueFromCondition(SI->getTrueValue(), Cond, true));
+  FalseVal = intersect(FalseVal,
+                       getValueFromCondition(SI->getFalseValue(), Cond, false));
+
+  // Handle clamp idioms such as:
+  //   %24 = constantrange<0, 17>
+  //   %39 = icmp eq i32 %24, 0
+  //   %40 = add i32 %24, -1
+  //   %siv.next = select i1 %39, i32 16, i32 %40
+  //   %siv.next = constantrange<0, 17> not <-1, 17>
+  // In general, this can handle any clamp idiom which tests the edge
+  // condition via an equality or inequality.
+  if (auto *ICI = dyn_cast<ICmpInst>(Cond)) {
     ICmpInst::Predicate Pred = ICI->getPredicate();
     Value *A = ICI->getOperand(0);
     if (ConstantInt *CIBase = dyn_cast<ConstantInt>(ICI->getOperand(1))) {
@@ -1001,13 +1070,13 @@ bool LazyValueInfoCache::solveBlockValueSelect(LVILatticeVal &BBLV,
   return true;
 }
 
-bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
                                              Instruction *BBI,
                                              BasicBlock *BB) {
   if (!BBI->getOperand(0)->getType()->isSized()) {
     // Without knowing how wide the input is, we can't analyze it in any useful
     // way.
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
@@ -1024,7 +1093,7 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
     // Unhandled instructions are overdefined.
     DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - overdefined (unknown cast).\n");
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   }
 
@@ -1041,7 +1110,8 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
   ConstantRange LHSRange = ConstantRange(OperandBitWidth);
   if (hasBlockValue(BBI->getOperand(0), BB)) {
     LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
-    intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+    intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
+                                                  BBI);
     if (LHSVal.isConstantRange())
       LHSRange = LHSVal.getConstantRange();
   }
@@ -1052,31 +1122,12 @@ bool LazyValueInfoCache::solveBlockValueCast(LVILatticeVal &BBLV,
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
-  LVILatticeVal Result;
-  switch (BBI->getOpcode()) {
-  case Instruction::Trunc:
-    Result.markConstantRange(LHSRange.truncate(ResultBitWidth));
-    break;
-  case Instruction::SExt:
-    Result.markConstantRange(LHSRange.signExtend(ResultBitWidth));
-    break;
-  case Instruction::ZExt:
-    Result.markConstantRange(LHSRange.zeroExtend(ResultBitWidth));
-    break;
-  case Instruction::BitCast:
-    Result.markConstantRange(LHSRange);
-    break;
-  default:
-    // Should be dead if the code above is correct
-    llvm_unreachable("inconsistent with above");
-    break;
-  }
-
-  BBLV = Result;
+  auto CastOp = (Instruction::CastOps) BBI->getOpcode();
+  BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth));
   return true;
 }
 
-bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
+bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
                                                  Instruction *BBI,
                                                  BasicBlock *BB) {
 
@@ -1101,7 +1152,7 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
     // Unhandled instructions are overdefined.
     DEBUG(dbgs() << " compute BB '" << BB->getName()
                  << "' - overdefined (unknown binary operator).\n");
-    BBLV.markOverdefined();
+    BBLV = LVILatticeVal::getOverdefined();
     return true;
   };
 
@@ -1118,7 +1169,8 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
   ConstantRange LHSRange = ConstantRange(OperandBitWidth);
   if (hasBlockValue(BBI->getOperand(0), BB)) {
     LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
-    intersectAssumeBlockValueConstantRange(BBI->getOperand(0), LHSVal, BBI);
+    intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
+                                                  BBI);
     if (LHSVal.isConstantRange())
       LHSRange = LHSVal.getConstantRange();
   }
@@ -1129,82 +1181,114 @@ bool LazyValueInfoCache::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
   // NOTE: We're currently limited by the set of operations that ConstantRange
   // can evaluate symbolically.  Enhancing that set will allows us to analyze
   // more definitions.
-  LVILatticeVal Result;
-  switch (BBI->getOpcode()) {
-  case Instruction::Add:
-    Result.markConstantRange(LHSRange.add(RHSRange));
-    break;
-  case Instruction::Sub:
-    Result.markConstantRange(LHSRange.sub(RHSRange));
-    break;
-  case Instruction::Mul:
-    Result.markConstantRange(LHSRange.multiply(RHSRange));
-    break;
-  case Instruction::UDiv:
-    Result.markConstantRange(LHSRange.udiv(RHSRange));
-    break;
-  case Instruction::Shl:
-    Result.markConstantRange(LHSRange.shl(RHSRange));
-    break;
-  case Instruction::LShr:
-    Result.markConstantRange(LHSRange.lshr(RHSRange));
-    break;
-  case Instruction::And:
-    Result.markConstantRange(LHSRange.binaryAnd(RHSRange));
-    break;
-  case Instruction::Or:
-    Result.markConstantRange(LHSRange.binaryOr(RHSRange));
-    break;
-  default:
-    // Should be dead if the code above is correct
-    llvm_unreachable("inconsistent with above");
-    break;
-  }
-
-  BBLV = Result;
+  auto BinOp = (Instruction::BinaryOps) BBI->getOpcode();
+  BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
   return true;
 }
 
-bool getValueFromFromCondition(Value *Val, ICmpInst *ICI,
-                               LVILatticeVal &Result, bool isTrueDest) {
-  assert(ICI && "precondition");
-  if (isa<Constant>(ICI->getOperand(1))) {
-    if (ICI->isEquality() && ICI->getOperand(0) == Val) {
+static LVILatticeVal getValueFromICmpCondition(Value *Val, ICmpInst *ICI,
+                                               bool isTrueDest) {
+  Value *LHS = ICI->getOperand(0);
+  Value *RHS = ICI->getOperand(1);
+  CmpInst::Predicate Predicate = ICI->getPredicate();
+
+  if (isa<Constant>(RHS)) {
+    if (ICI->isEquality() && LHS == Val) {
       // We know that V has the RHS constant if this is a true SETEQ or
       // false SETNE.
-      if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
-        Result = LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+      if (isTrueDest == (Predicate == ICmpInst::ICMP_EQ))
+        return LVILatticeVal::get(cast<Constant>(RHS));
       else
-        Result = LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
-      return true;
+        return LVILatticeVal::getNot(cast<Constant>(RHS));
     }
+  }
 
-    // Recognize the range checking idiom that InstCombine produces.
-    // (X-C1) u< C2 --> [C1, C1+C2)
-    ConstantInt *NegOffset = nullptr;
-    if (ICI->getPredicate() == ICmpInst::ICMP_ULT)
-      match(ICI->getOperand(0), m_Add(m_Specific(Val),
-                                      m_ConstantInt(NegOffset)));
-
-    ConstantInt *CI = dyn_cast<ConstantInt>(ICI->getOperand(1));
-    if (CI && (ICI->getOperand(0) == Val || NegOffset)) {
-      // Calculate the range of values that are allowed by the comparison
-      ConstantRange CmpRange(CI->getValue());
-      ConstantRange TrueValues =
-          ConstantRange::makeAllowedICmpRegion(ICI->getPredicate(), CmpRange);
+  if (!Val->getType()->isIntegerTy())
+    return LVILatticeVal::getOverdefined();
+
+  // Use ConstantRange::makeAllowedICmpRegion in order to determine the possible
+  // range of Val guaranteed by the condition. Recognize comparisons in the from
+  // of:
+  //  icmp <pred> Val, ...
+  //  icmp <pred> (add Val, Offset), ...
+  // The latter is the range checking idiom that InstCombine produces. Subtract
+  // the offset from the allowed range for RHS in this case.
+
+  // Val or (add Val, Offset) can be on either hand of the comparison
+  if (LHS != Val && !match(LHS, m_Add(m_Specific(Val), m_ConstantInt()))) {
+    std::swap(LHS, RHS);
+    Predicate = CmpInst::getSwappedPredicate(Predicate);
+  }
 
-      if (NegOffset) // Apply the offset from above.
-        TrueValues = TrueValues.subtract(NegOffset->getValue());
+  ConstantInt *Offset = nullptr;
+  if (LHS != Val)
+    match(LHS, m_Add(m_Specific(Val), m_ConstantInt(Offset)));
+
+  if (LHS == Val || Offset) {
+    // Calculate the range of values that are allowed by the comparison
+    ConstantRange RHSRange(RHS->getType()->getIntegerBitWidth(),
+                           /*isFullSet=*/true);
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS))
+      RHSRange = ConstantRange(CI->getValue());
+    else if (Instruction *I = dyn_cast<Instruction>(RHS))
+      if (auto *Ranges = I->getMetadata(LLVMContext::MD_range))
+        RHSRange = getConstantRangeFromMetadata(*Ranges);
+
+    // If we're interested in the false dest, invert the condition
+    CmpInst::Predicate Pred =
+            isTrueDest ? Predicate : CmpInst::getInversePredicate(Predicate);
+    ConstantRange TrueValues =
+            ConstantRange::makeAllowedICmpRegion(Pred, RHSRange);
 
-      // If we're interested in the false dest, invert the condition.
-      if (!isTrueDest) TrueValues = TrueValues.inverse();
+    if (Offset) // Apply the offset from above.
+      TrueValues = TrueValues.subtract(Offset->getValue());
 
-      Result = LVILatticeVal::getRange(std::move(TrueValues));
-      return true;
-    }
+    return LVILatticeVal::getRange(std::move(TrueValues));
   }
 
-  return false;
+  return LVILatticeVal::getOverdefined();
+}
+
+static LVILatticeVal
+getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
+                      DenseMap<Value*, LVILatticeVal> &Visited);
+
+static LVILatticeVal
+getValueFromConditionImpl(Value *Val, Value *Cond, bool isTrueDest,
+                          DenseMap<Value*, LVILatticeVal> &Visited) {
+  if (ICmpInst *ICI = dyn_cast<ICmpInst>(Cond))
+    return getValueFromICmpCondition(Val, ICI, isTrueDest);
+
+  // Handle conditions in the form of (cond1 && cond2), we know that on the
+  // true dest path both of the conditions hold.
+  if (!isTrueDest)
+    return LVILatticeVal::getOverdefined();
+
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond);
+  if (!BO || BO->getOpcode() != BinaryOperator::And)
+    return LVILatticeVal::getOverdefined();
+
+  auto RHS = getValueFromCondition(Val, BO->getOperand(0), isTrueDest, Visited);
+  auto LHS = getValueFromCondition(Val, BO->getOperand(1), isTrueDest, Visited);
+  return intersect(RHS, LHS);
+}
+
+static LVILatticeVal
+getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest,
+                      DenseMap<Value*, LVILatticeVal> &Visited) {
+  auto I = Visited.find(Cond);
+  if (I != Visited.end())
+    return I->second;
+
+  auto Result = getValueFromConditionImpl(Val, Cond, isTrueDest, Visited);
+  Visited[Cond] = Result;
+  return Result;
+}
+
+LVILatticeVal getValueFromCondition(Value *Val, Value *Cond, bool isTrueDest) {
+  assert(Cond && "precondition");
+  DenseMap<Value*, LVILatticeVal> Visited;
+  return getValueFromCondition(Val, Cond, isTrueDest, Visited);
 }
 
 /// \brief Compute the value of Val on the edge BBFrom -> BBTo. Returns false if
@@ -1233,9 +1317,9 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
 
       // If the condition of the branch is an equality comparison, we may be
       // able to infer the value.
-      if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
-        if (getValueFromFromCondition(Val, ICI, Result, isTrueDest))
-          return true;
+      Result = getValueFromCondition(Val, BI->getCondition(), isTrueDest);
+      if (!Result.isOverdefined())
+        return true;
     }
   }
 
@@ -1267,7 +1351,7 @@ static bool getEdgeValueLocal(Value *Val, BasicBlock *BBFrom,
 
 /// \brief Compute the value of Val on the edge BBFrom -> BBTo or the value at
 /// the basic block if the edge does not constrain Val.
-bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
+bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
                                       BasicBlock *BBTo, LVILatticeVal &Result,
                                       Instruction *CxtI) {
   // If already a constant, there is nothing to compute.
@@ -1280,7 +1364,7 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
   if (!getEdgeValueLocal(Val, BBFrom, BBTo, LocalResult))
     // If we couldn't constrain the value on the edge, LocalResult doesn't
     // provide any information.
-    LocalResult.markOverdefined();
+    LocalResult = LVILatticeVal::getOverdefined();
 
   if (hasSingleValue(LocalResult)) {
     // Can't get any more precise here
@@ -1298,39 +1382,40 @@ bool LazyValueInfoCache::getEdgeValue(Value *Val, BasicBlock *BBFrom,
 
   // Try to intersect ranges of the BB and the constraint on the edge.
   LVILatticeVal InBlock = getBlockValue(Val, BBFrom);
-  intersectAssumeBlockValueConstantRange(Val, InBlock, BBFrom->getTerminator());
+  intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock,
+                                                BBFrom->getTerminator());
   // We can use the context instruction (generically the ultimate instruction
   // the calling pass is trying to simplify) here, even though the result of
   // this function is generally cached when called from the solve* functions
   // (and that cached result might be used with queries using a different
   // context instruction), because when this function is called from the solve*
   // functions, the context instruction is not provided. When called from
-  // LazyValueInfoCache::getValueOnEdge, the context instruction is provided,
+  // LazyValueInfoImpl::getValueOnEdge, the context instruction is provided,
   // but then the result is not cached.
-  intersectAssumeBlockValueConstantRange(Val, InBlock, CxtI);
+  intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock, CxtI);
 
   Result = intersect(LocalResult, InBlock);
   return true;
 }
 
-LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB,
+LVILatticeVal LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
                                                   Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting block end value " << *V << " at '"
         << BB->getName() << "'\n");
 
   assert(BlockValueStack.empty() && BlockValueSet.empty());
   if (!hasBlockValue(V, BB)) {
-    pushBlockValue(std::make_pair(BB, V)); 
+    pushBlockValue(std::make_pair(BB, V));
     solve();
   }
   LVILatticeVal Result = getBlockValue(V, BB);
-  intersectAssumeBlockValueConstantRange(V, Result, CxtI);
+  intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
 
   DEBUG(dbgs() << "  Result = " << Result << "\n");
   return Result;
 }
 
-LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
+LVILatticeVal LazyValueInfoImpl::getValueAt(Value *V, Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting value " << *V << " at '"
         << CxtI->getName() << "'\n");
 
@@ -1340,13 +1425,13 @@ LVILatticeVal LazyValueInfoCache::getValueAt(Value *V, Instruction *CxtI) {
   LVILatticeVal Result = LVILatticeVal::getOverdefined();
   if (auto *I = dyn_cast<Instruction>(V))
     Result = getFromRangeMetadata(I);
-  intersectAssumeBlockValueConstantRange(V, Result, CxtI);
+  intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
 
   DEBUG(dbgs() << "  Result = " << Result << "\n");
   return Result;
 }
 
-LVILatticeVal LazyValueInfoCache::
+LVILatticeVal LazyValueInfoImpl::
 getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
                Instruction *CxtI) {
   DEBUG(dbgs() << "LVI Getting edge value " << *V << " from '"
@@ -1364,75 +1449,24 @@ getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
   return Result;
 }
 
-void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
-                                    BasicBlock *NewSucc) {
-  // When an edge in the graph has been threaded, values that we could not
-  // determine a value for before (i.e. were marked overdefined) may be
-  // possible to solve now. We do NOT try to proactively update these values.
-  // Instead, we clear their entries from the cache, and allow lazy updating to
-  // recompute them when needed.
-
-  // The updating process is fairly simple: we need to drop cached info
-  // for all values that were marked overdefined in OldSucc, and for those same
-  // values in any successor of OldSucc (except NewSucc) in which they were
-  // also marked overdefined.
-  std::vector<BasicBlock*> worklist;
-  worklist.push_back(OldSucc);
-
-  auto I = OverDefinedCache.find(OldSucc);
-  if (I == OverDefinedCache.end())
-    return; // Nothing to process here.
-  SmallVector<Value *, 4> ValsToClear(I->second.begin(), I->second.end());
-
-  // Use a worklist to perform a depth-first search of OldSucc's successors.
-  // NOTE: We do not need a visited list since any blocks we have already
-  // visited will have had their overdefined markers cleared already, and we
-  // thus won't loop to their successors.
-  while (!worklist.empty()) {
-    BasicBlock *ToUpdate = worklist.back();
-    worklist.pop_back();
-
-    // Skip blocks only accessible through NewSucc.
-    if (ToUpdate == NewSucc) continue;
-
-    bool changed = false;
-    for (Value *V : ValsToClear) {
-      // If a value was marked overdefined in OldSucc, and is here too...
-      auto OI = OverDefinedCache.find(ToUpdate);
-      if (OI == OverDefinedCache.end())
-        continue;
-      SmallPtrSetImpl<Value *> &ValueSet = OI->second;
-      if (!ValueSet.count(V))
-        continue;
-
-      ValueSet.erase(V);
-      if (ValueSet.empty())
-        OverDefinedCache.erase(OI);
-
-      // If we removed anything, then we potentially need to update
-      // blocks successors too.
-      changed = true;
-    }
-
-    if (!changed) continue;
-
-    worklist.insert(worklist.end(), succ_begin(ToUpdate), succ_end(ToUpdate));
-  }
+void LazyValueInfoImpl::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
+                                   BasicBlock *NewSucc) {
+  TheCache.threadEdgeImpl(OldSucc, NewSucc);
 }
 
 //===----------------------------------------------------------------------===//
 //                            LazyValueInfo Impl
 //===----------------------------------------------------------------------===//
 
-/// This lazily constructs the LazyValueInfoCache.
-static LazyValueInfoCache &getCache(void *&PImpl, AssumptionCache *AC,
-                                    const DataLayout *DL,
-                                    DominatorTree *DT = nullptr) {
+/// This lazily constructs the LazyValueInfoImpl.
+static LazyValueInfoImpl &getImpl(void *&PImpl, AssumptionCache *AC,
+                                  const DataLayout *DL,
+                                  DominatorTree *DT = nullptr) {
   if (!PImpl) {
     assert(DL && "getCache() called with a null DataLayout");
-    PImpl = new LazyValueInfoCache(AC, *DL, DT);
+    PImpl = new LazyValueInfoImpl(AC, *DL, DT);
   }
-  return *static_cast<LazyValueInfoCache*>(PImpl);
+  return *static_cast<LazyValueInfoImpl*>(PImpl);
 }
 
 bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
@@ -1445,7 +1479,7 @@ bool LazyValueInfoWrapperPass::runOnFunction(Function &F) {
   Info.TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
 
   if (Info.PImpl)
-    getCache(Info.PImpl, Info.AC, &DL, Info.DT).clear();
+    getImpl(Info.PImpl, Info.AC, &DL, Info.DT).clear();
 
   // Fully lazy.
   return false;
@@ -1464,7 +1498,7 @@ LazyValueInfo::~LazyValueInfo() { releaseMemory(); }
 void LazyValueInfo::releaseMemory() {
   // If the cache was allocated, free it.
   if (PImpl) {
-    delete &getCache(PImpl, AC, nullptr);
+    delete &getImpl(PImpl, AC, nullptr);
     PImpl = nullptr;
   }
 }
@@ -1479,7 +1513,6 @@ LazyValueInfo LazyValueAnalysis::run(Function &F, FunctionAnalysisManager &FAM)
   return LazyValueInfo(&AC, &TLI, DT);
 }
 
- 
 /// Returns true if we can statically tell that this value will never be a
 /// "useful" constant.  In practice, this means we've got something like an
 /// alloca or a malloc call for which a comparison against a constant can
@@ -1502,7 +1535,7 @@ Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB,
 
   const DataLayout &DL = BB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
+      getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
 
   if (Result.isConstant())
     return Result.getConstant();
@@ -1520,12 +1553,15 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, BasicBlock *BB,
   unsigned Width = V->getType()->getIntegerBitWidth();
   const DataLayout &DL = BB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
-  assert(!Result.isConstant());
+      getImpl(PImpl, AC, &DL, DT).getValueInBlock(V, BB, CxtI);
   if (Result.isUndefined())
     return ConstantRange(Width, /*isFullSet=*/false);
   if (Result.isConstantRange())
     return Result.getConstantRange();
+  // We represent ConstantInt constants as constant ranges but other kinds
+  // of integer constants, i.e. ConstantExpr will be tagged as constants
+  assert(!(Result.isConstant() && isa<ConstantInt>(Result.getConstant())) &&
+         "ConstantInt value must be represented as constantrange");
   return ConstantRange(Width, /*isFullSet=*/true);
 }
 
@@ -1536,7 +1572,7 @@ Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
                                            Instruction *CxtI) {
   const DataLayout &DL = FromBB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+      getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   if (Result.isConstant())
     return Result.getConstant();
@@ -1583,8 +1619,8 @@ static LazyValueInfo::Tristate getPredicateResult(unsigned Pred, Constant *C,
     }
 
     // Handle more complex predicates.
-    ConstantRange TrueValues =
-        ICmpInst::makeConstantRange((ICmpInst::Predicate)Pred, CI->getValue());
+    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(
+        (ICmpInst::Predicate)Pred, CI->getValue());
     if (TrueValues.contains(CR))
       return LazyValueInfo::True;
     if (TrueValues.inverse().contains(CR))
@@ -1624,7 +1660,7 @@ LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
                                   Instruction *CxtI) {
   const DataLayout &DL = FromBB->getModule()->getDataLayout();
   LVILatticeVal Result =
-      getCache(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
+      getImpl(PImpl, AC, &DL, DT).getValueOnEdge(V, FromBB, ToBB, CxtI);
 
   return getPredicateResult(Pred, C, Result, DL, TLI);
 }
@@ -1644,7 +1680,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
       return LazyValueInfo::True;
   }
   const DataLayout &DL = CxtI->getModule()->getDataLayout();
-  LVILatticeVal Result = getCache(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
+  LVILatticeVal Result = getImpl(PImpl, AC, &DL, DT).getValueAt(V, CxtI);
   Tristate Ret = getPredicateResult(Pred, C, Result, DL, TLI);
   if (Ret != Unknown)
     return Ret;
@@ -1703,7 +1739,7 @@ LazyValueInfo::getPredicateAt(unsigned Pred, Value *V, Constant *C,
         }
         if (Baseline != Unknown)
           return Baseline;
-      }    
+      }
 
     // For a comparison where the V is outside this block, it's possible
     // that we've branched on it before. Look to see if the value is known
@@ -1734,13 +1770,13 @@ void LazyValueInfo::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc,
                                BasicBlock *NewSucc) {
   if (PImpl) {
     const DataLayout &DL = PredBB->getModule()->getDataLayout();
-    getCache(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
+    getImpl(PImpl, AC, &DL, DT).threadEdge(PredBB, OldSucc, NewSucc);
   }
 }
 
 void LazyValueInfo::eraseBlock(BasicBlock *BB) {
   if (PImpl) {
     const DataLayout &DL = BB->getModule()->getDataLayout();
-    getCache(PImpl, AC, &DL, DT).eraseBlock(BB);
+    getImpl(PImpl, AC, &DL, DT).eraseBlock(BB);
   }
 }
diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp
index fdf5f55dab9f..2ca46b1fe872 100644
--- a/contrib/llvm/lib/Analysis/Lint.cpp
+++ b/contrib/llvm/lib/Analysis/Lint.cpp
@@ -35,27 +35,48 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/Lint.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryLocation.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <string>
+
 using namespace llvm;
 
 namespace {
@@ -64,7 +85,7 @@ namespace {
     static const unsigned Write    = 2;
     static const unsigned Callee   = 4;
     static const unsigned Branchee = 8;
-  }
+  } // end namespace MemRef
 
   class Lint : public FunctionPass, public InstVisitor<Lint> {
     friend class InstVisitor<Lint>;
@@ -159,7 +180,7 @@ namespace {
       WriteValues({V1, Vs...});
     }
   };
-}
+} // end anonymous namespace
 
 char Lint::ID = 0;
 INITIALIZE_PASS_BEGIN(Lint, "lint", "Statically lint-checks LLVM IR",
@@ -173,7 +194,7 @@ INITIALIZE_PASS_END(Lint, "lint", "Statically lint-checks LLVM IR",
 
 // Assert - We know that cond should be true, if not print an error message.
 #define Assert(C, ...) \
-    do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
+    do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
 
 // Lint::run - This is the main Analysis entry point for a
 // function.
@@ -680,9 +701,9 @@ Value *Lint::findValueImpl(Value *V, bool OffsetOk,
   if (Instruction *Inst = dyn_cast<Instruction>(V)) {
     if (Value *W = SimplifyInstruction(Inst, *DL, TLI, DT, AC))
       return findValueImpl(W, OffsetOk, Visited);
-  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
-    if (Value *W = ConstantFoldConstantExpression(CE, *DL, TLI))
-      if (W != V)
+  } else if (auto *C = dyn_cast<Constant>(V)) {
+    if (Value *W = ConstantFoldConstant(C, *DL, TLI))
+      if (W && W != V)
         return findValueImpl(W, OffsetOk, Visited);
   }
 
diff --git a/contrib/llvm/lib/Analysis/Loads.cpp b/contrib/llvm/lib/Analysis/Loads.cpp
index 75426b54195a..e46541e6538d 100644
--- a/contrib/llvm/lib/Analysis/Loads.cpp
+++ b/contrib/llvm/lib/Analysis/Loads.cpp
@@ -55,6 +55,10 @@ static bool isDereferenceableAndAlignedPointer(
     const Value *V, unsigned Align, const APInt &Size, const DataLayout &DL,
     const Instruction *CtxI, const DominatorTree *DT,
     SmallPtrSetImpl<const Value *> &Visited) {
+  // Already visited?  Bail out, we've likely hit unreachable code.
+  if (!Visited.insert(V).second)
+    return false;
+
   // Note that it is not safe to speculate into a malloc'd region because
   // malloc may return null.
 
@@ -87,9 +91,11 @@ static bool isDereferenceableAndAlignedPointer(
     // then the GEP (== Base + Offset == k_0 * Align + k_1 * Align) is also
     // aligned to Align bytes.
 
-    return Visited.insert(Base).second &&
-           isDereferenceableAndAlignedPointer(Base, Align, Offset + Size, DL,
-                                              CtxI, DT, Visited);
+    // Offset and Size may have different bit widths if we have visited an
+    // addrspacecast, so we can't do arithmetic directly on the APInt values.
+    return isDereferenceableAndAlignedPointer(
+        Base, Align, Offset + Size.sextOrTrunc(Offset.getBitWidth()),
+        DL, CtxI, DT, Visited);
   }
 
   // For gc.relocate, look through relocations
@@ -302,11 +308,11 @@ llvm::DefMaxInstsToScan("available-load-scan-limit", cl::init(6), cl::Hidden,
            "to scan backward from a given instruction, when searching for "
            "available loaded value"));
 
-Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
+Value *llvm::FindAvailableLoadedValue(LoadInst *Load,
+                                      BasicBlock *ScanBB,
                                       BasicBlock::iterator &ScanFrom,
                                       unsigned MaxInstsToScan,
-                                      AliasAnalysis *AA, AAMDNodes *AATags,
-                                      bool *IsLoadCSE) {
+                                      AliasAnalysis *AA, bool *IsLoadCSE) {
   if (MaxInstsToScan == 0)
     MaxInstsToScan = ~0U;
 
@@ -356,8 +362,6 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
         if (LI->isAtomic() < Load->isAtomic())
           return nullptr;
 
-        if (AATags)
-          LI->getAAMetadata(*AATags);
         if (IsLoadCSE)
             *IsLoadCSE = true;
         return LI;
@@ -377,8 +381,8 @@ Value *llvm::FindAvailableLoadedValue(LoadInst *Load, BasicBlock *ScanBB,
         if (SI->isAtomic() < Load->isAtomic())
           return nullptr;
 
-        if (AATags)
-          SI->getAAMetadata(*AATags);
+        if (IsLoadCSE)
+          *IsLoadCSE = false;
         return SI->getOperand(0);
       }
 
diff --git a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 5214eb7c051c..2f3dca3d23fa 100644
--- a/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -12,19 +12,61 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/VectorUtils.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-accesses"
@@ -94,14 +136,18 @@ bool VectorizerParams::isInterleaveForced() {
 }
 
 void LoopAccessReport::emitAnalysis(const LoopAccessReport &Message,
-                                    const Function *TheFunction,
-                                    const Loop *TheLoop,
-                                    const char *PassName) {
+                                    const Loop *TheLoop, const char *PassName,
+                                    OptimizationRemarkEmitter &ORE) {
   DebugLoc DL = TheLoop->getStartLoc();
-  if (const Instruction *I = Message.getInstr())
-    DL = I->getDebugLoc();
-  emitOptimizationRemarkAnalysis(TheFunction->getContext(), PassName,
-                                 *TheFunction, DL, Message.str());
+  const Value *V = TheLoop->getHeader();
+  if (const Instruction *I = Message.getInstr()) {
+    // If there is no debug location attached to the instruction, revert back to
+    // using the loop's.
+    if (I->getDebugLoc())
+      DL = I->getDebugLoc();
+    V = I->getParent();
+  }
+  ORE.emitOptimizationRemarkAnalysis(PassName, DL, V, Message.str());
 }
 
 Value *llvm::stripIntegerCast(Value *V) {
@@ -463,6 +509,7 @@ void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
 }
 
 namespace {
+
 /// \brief Analyses memory accesses in a loop.
 ///
 /// Checks whether run time pointer checks are needed and builds sets for data
@@ -886,7 +933,7 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
 /// \brief Check whether the access through \p Ptr has a constant stride.
 int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
                            const Loop *Lp, const ValueToValueMap &StridesMap,
-                           bool Assume) {
+                           bool Assume, bool ShouldCheckWrap) {
   Type *Ty = Ptr->getType();
   assert(Ty->isPointerTy() && "Unexpected non-ptr");
 
@@ -925,9 +972,9 @@ int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
   // to access the pointer value "0" which is undefined behavior in address
   // space 0, therefore we can also vectorize this case.
   bool IsInBoundsGEP = isInBoundsGep(Ptr);
-  bool IsNoWrapAddRec =
-      PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
-      isNoWrapAddRec(Ptr, AR, PSE, Lp);
+  bool IsNoWrapAddRec = !ShouldCheckWrap ||
+    PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
+    isNoWrapAddRec(Ptr, AR, PSE, Lp);
   bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0;
   if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) {
     if (Assume) {
@@ -1028,8 +1075,8 @@ bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
     return false;
 
   // Make sure that A and B have the same type if required.
-  if(CheckType && PtrA->getType() != PtrB->getType())
-      return false;
+  if (CheckType && PtrA->getType() != PtrB->getType())
+    return false;
 
   unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
@@ -1451,7 +1498,7 @@ MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
   auto &IndexVector = Accesses.find(Access)->second;
 
   SmallVector<Instruction *, 4> Insts;
-  std::transform(IndexVector.begin(), IndexVector.end(),
+  transform(IndexVector,
                  std::back_inserter(Insts),
                  [&](unsigned Idx) { return this->InstMap[Idx]; });
   return Insts;
@@ -1478,25 +1525,23 @@ bool LoopAccessInfo::canAnalyzeLoop() {
   // We can only analyze innermost loops.
   if (!TheLoop->empty()) {
     DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
-    emitAnalysis(LoopAccessReport() << "loop is not the innermost loop");
+    recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
     return false;
   }
 
   // We must have a single backedge.
   if (TheLoop->getNumBackEdges() != 1) {
     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
-    emitAnalysis(
-        LoopAccessReport() <<
-        "loop control flow is not understood by analyzer");
+    recordAnalysis("CFGNotUnderstood")
+        << "loop control flow is not understood by analyzer";
     return false;
   }
 
   // We must have a single exiting block.
   if (!TheLoop->getExitingBlock()) {
     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
-    emitAnalysis(
-        LoopAccessReport() <<
-        "loop control flow is not understood by analyzer");
+    recordAnalysis("CFGNotUnderstood")
+        << "loop control flow is not understood by analyzer";
     return false;
   }
 
@@ -1505,17 +1550,16 @@ bool LoopAccessInfo::canAnalyzeLoop() {
   // instructions in the loop are executed the same number of times.
   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
     DEBUG(dbgs() << "LAA: loop control flow is not understood by analyzer\n");
-    emitAnalysis(
-        LoopAccessReport() <<
-        "loop control flow is not understood by analyzer");
+    recordAnalysis("CFGNotUnderstood")
+        << "loop control flow is not understood by analyzer";
     return false;
   }
 
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = PSE->getBackedgeTakenCount();
   if (ExitCount == PSE->getSE()->getCouldNotCompute()) {
-    emitAnalysis(LoopAccessReport()
-                 << "could not determine number of loop iterations");
+    recordAnalysis("CantComputeNumberOfIterations")
+        << "could not determine number of loop iterations";
     DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
     return false;
   }
@@ -1564,8 +1608,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
 
         auto *Ld = dyn_cast<LoadInst>(&I);
         if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) {
-          emitAnalysis(LoopAccessReport(Ld)
-                       << "read with atomic ordering or volatile read");
+          recordAnalysis("NonSimpleLoad", Ld)
+              << "read with atomic ordering or volatile read";
           DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
           CanVecMem = false;
           return;
@@ -1582,14 +1626,14 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
       if (I.mayWriteToMemory()) {
         auto *St = dyn_cast<StoreInst>(&I);
         if (!St) {
-          emitAnalysis(LoopAccessReport(St)
-                       << "instruction cannot be vectorized");
+          recordAnalysis("CantVectorizeInstruction", St)
+              << "instruction cannot be vectorized";
           CanVecMem = false;
           return;
         }
         if (!St->isSimple() && !IsAnnotatedParallel) {
-          emitAnalysis(LoopAccessReport(St)
-                       << "write with atomic ordering or volatile write");
+          recordAnalysis("NonSimpleStore", St)
+              << "write with atomic ordering or volatile write";
           DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
           CanVecMem = false;
           return;
@@ -1697,7 +1741,7 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
   bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
                                                   TheLoop, SymbolicStrides);
   if (!CanDoRTIfNeeded) {
-    emitAnalysis(LoopAccessReport() << "cannot identify array bounds");
+    recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
     DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
                  << "the array bounds.\n");
     CanVecMem = false;
@@ -1728,8 +1772,8 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
 
       // Check that we found the bounds for the pointer.
       if (!CanDoRTIfNeeded) {
-        emitAnalysis(LoopAccessReport()
-                     << "cannot check memory dependencies at runtime");
+        recordAnalysis("CantCheckMemDepsAtRunTime")
+            << "cannot check memory dependencies at runtime";
         DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
         CanVecMem = false;
         return;
@@ -1744,12 +1788,11 @@ void LoopAccessInfo::analyzeLoop(AliasAnalysis *AA, LoopInfo *LI,
                  << (PtrRtChecking->Need ? "" : " don't")
                  << " need runtime memory checks.\n");
   else {
-    emitAnalysis(
-        LoopAccessReport()
+    recordAnalysis("UnsafeMemDep")
         << "unsafe dependent memory operations in loop. Use "
            "#pragma loop distribute(enable) to allow loop distribution "
            "to attempt to isolate the offending operations into a separate "
-           "loop");
+           "loop";
     DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
   }
 }
@@ -1763,13 +1806,35 @@ bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
   return !DT->dominates(BB, Latch);
 }
 
-void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) {
+OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
+                                                           Instruction *I) {
   assert(!Report && "Multiple reports generated");
-  Report = Message;
+
+  Value *CodeRegion = TheLoop->getHeader();
+  DebugLoc DL = TheLoop->getStartLoc();
+
+  if (I) {
+    CodeRegion = I->getParent();
+    // If there is no debug location attached to the instruction, revert back to
+    // using the loop's.
+    if (I->getDebugLoc())
+      DL = I->getDebugLoc();
+  }
+
+  Report = make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
+                                                   CodeRegion);
+  return *Report;
 }
 
 bool LoopAccessInfo::isUniform(Value *V) const {
-  return (PSE->getSE()->isLoopInvariant(PSE->getSE()->getSCEV(V), TheLoop));
+  auto *SE = PSE->getSE();
+  // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
+  // never considered uniform.
+  // TODO: Is this really what we want? Even without FP SCEV, we may want some
+  // trivially loop-invariant FP values to be considered uniform.
+  if (!SE->isSCEVable(V->getType()))
+    return false;
+  return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
 }
 
 // FIXME: this function is currently a duplicate of the one in
@@ -1784,6 +1849,7 @@ static Instruction *getFirstInst(Instruction *FirstInst, Value *V,
 }
 
 namespace {
+
 /// \brief IR Values for the lower and upper bounds of a pointer evolution.  We
 /// need to use value-handles because SCEV expansion can invalidate previously
 /// expanded values.  Thus expansion of a pointer can invalidate the bounds for
@@ -1792,6 +1858,7 @@ struct PointerBounds {
   TrackingVH<Value> Start;
   TrackingVH<Value> End;
 };
+
 } // end anonymous namespace
 
 /// \brief Expand code for the lower and upper bound of the pointer group \p CG
@@ -1803,18 +1870,24 @@ expandBounds(const RuntimePointerChecking::CheckingPtrGroup *CG, Loop *TheLoop,
   Value *Ptr = PtrRtChecking.Pointers[CG->Members[0]].PointerValue;
   const SCEV *Sc = SE->getSCEV(Ptr);
 
+  unsigned AS = Ptr->getType()->getPointerAddressSpace();
+  LLVMContext &Ctx = Loc->getContext();
+
+  // Use this type for pointer arithmetic.
+  Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
+
   if (SE->isLoopInvariant(Sc, TheLoop)) {
     DEBUG(dbgs() << "LAA: Adding RT check for a loop invariant ptr:" << *Ptr
                  << "\n");
-    return {Ptr, Ptr};
+    // Ptr could be in the loop body. If so, expand a new one at the correct
+    // location.
+    Instruction *Inst = dyn_cast<Instruction>(Ptr);
+    Value *NewPtr = (Inst && TheLoop->contains(Inst))
+                        ? Exp.expandCodeFor(Sc, PtrArithTy, Loc)
+                        : Ptr;
+    return {NewPtr, NewPtr};
   } else {
-    unsigned AS = Ptr->getType()->getPointerAddressSpace();
-    LLVMContext &Ctx = Loc->getContext();
-
-    // Use this type for pointer arithmetic.
-    Type *PtrArithTy = Type::getInt8PtrTy(Ctx, AS);
     Value *Start = nullptr, *End = nullptr;
-
     DEBUG(dbgs() << "LAA: Adding RT check for range:\n");
     Start = Exp.expandCodeFor(CG->Low, PtrArithTy, Loc);
     End = Exp.expandCodeFor(CG->High, PtrArithTy, Loc);
@@ -1833,9 +1906,8 @@ static SmallVector<std::pair<PointerBounds, PointerBounds>, 4> expandBounds(
 
   // Here we're relying on the SCEV Expander's cache to only emit code for the
   // same bounds once.
-  std::transform(
-      PointerChecks.begin(), PointerChecks.end(),
-      std::back_inserter(ChecksWithBounds),
+  transform(
+      PointerChecks, std::back_inserter(ChecksWithBounds),
       [&](const RuntimePointerChecking::PointerCheck &Check) {
         PointerBounds
           First = expandBounds(Check.first, L, Loc, Exp, SE, PtrRtChecking),
@@ -1967,7 +2039,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
   }
 
   if (Report)
-    OS.indent(Depth) << "Report: " << Report->str() << "\n";
+    OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
 
   if (auto *Dependences = DepChecker->getDependences()) {
     OS.indent(Depth) << "Dependences:\n";
@@ -2046,10 +2118,10 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
 
-char LoopAccessAnalysis::PassID;
+AnalysisKey LoopAccessAnalysis::Key;
 
-LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
-  const AnalysisManager<Function> &FAM =
+LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM) {
+  const FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function &F = *L.getHeader()->getParent();
   auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(F);
@@ -2070,7 +2142,7 @@ LoopAccessInfo LoopAccessAnalysis::run(Loop &L, AnalysisManager<Loop> &AM) {
 }
 
 PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L,
-                                                 AnalysisManager<Loop> &AM) {
+                                                 LoopAnalysisManager &AM) {
   Function &F = *L.getHeader()->getParent();
   auto &LAI = AM.getResult<LoopAccessAnalysis>(L);
   OS << "Loop access info in function '" << F.getName() << "':\n";
@@ -2080,7 +2152,9 @@ PreservedAnalyses LoopAccessInfoPrinterPass::run(Loop &L,
 }
 
 namespace llvm {
+
   Pass *createLAAPass() {
     return new LoopAccessLegacyAnalysis();
   }
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp
index 30f7ef392422..19c0171740c9 100644
--- a/contrib/llvm/lib/Analysis/LoopInfo.cpp
+++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp
@@ -143,42 +143,47 @@ PHINode *Loop::getCanonicalInductionVariable() const {
   return nullptr;
 }
 
-bool Loop::isLCSSAForm(DominatorTree &DT) const {
-  for (BasicBlock *BB : this->blocks()) {
-    for (Instruction &I : *BB) {
-      // Tokens can't be used in PHI nodes and live-out tokens prevent loop
-      // optimizations, so for the purposes of considered LCSSA form, we
-      // can ignore them.
-      if (I.getType()->isTokenTy())
-        continue;
+// Check that 'BB' doesn't have any uses outside of the 'L'
+static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
+                               DominatorTree &DT) {
+  for (const Instruction &I : BB) {
+    // Tokens can't be used in PHI nodes and live-out tokens prevent loop
+    // optimizations, so for the purposes of considered LCSSA form, we
+    // can ignore them.
+    if (I.getType()->isTokenTy())
+      continue;
 
-      for (Use &U : I.uses()) {
-        Instruction *UI = cast<Instruction>(U.getUser());
-        BasicBlock *UserBB = UI->getParent();
-        if (PHINode *P = dyn_cast<PHINode>(UI))
-          UserBB = P->getIncomingBlock(U);
-
-        // Check the current block, as a fast-path, before checking whether
-        // the use is anywhere in the loop.  Most values are used in the same
-        // block they are defined in.  Also, blocks not reachable from the
-        // entry are special; uses in them don't need to go through PHIs.
-        if (UserBB != BB &&
-            !contains(UserBB) &&
-            DT.isReachableFromEntry(UserBB))
-          return false;
-      }
+    for (const Use &U : I.uses()) {
+      const Instruction *UI = cast<Instruction>(U.getUser());
+      const BasicBlock *UserBB = UI->getParent();
+      if (const PHINode *P = dyn_cast<PHINode>(UI))
+        UserBB = P->getIncomingBlock(U);
+
+      // Check the current block, as a fast-path, before checking whether
+      // the use is anywhere in the loop.  Most values are used in the same
+      // block they are defined in.  Also, blocks not reachable from the
+      // entry are special; uses in them don't need to go through PHIs.
+      if (UserBB != &BB && !L.contains(UserBB) &&
+          DT.isReachableFromEntry(UserBB))
+        return false;
     }
   }
-
   return true;
 }
 
-bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const {
-  if (!isLCSSAForm(DT))
-    return false;
+bool Loop::isLCSSAForm(DominatorTree &DT) const {
+  // For each block we check that it doesn't have any uses outside of this loop.
+  return all_of(this->blocks(), [&](const BasicBlock *BB) {
+    return isBlockInLCSSAForm(*this, *BB, DT);
+  });
+}
 
-  return std::all_of(begin(), end(), [&](const Loop *L) {
-    return L->isRecursivelyLCSSAForm(DT);
+bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
+  // For each block we check that it doesn't have any uses outside of it's
+  // innermost loop. This process will transitivelly guarntee that current loop 
+  // and all of the nested loops are in the LCSSA form.
+  return all_of(this->blocks(), [&](const BasicBlock *BB) {
+    return isBlockInLCSSAForm(*LI.getLoopFor(BB), *BB, DT);
   });
 }
 
@@ -300,23 +305,40 @@ bool Loop::isAnnotatedParallel() const {
 }
 
 DebugLoc Loop::getStartLoc() const {
+  return getLocRange().getStart();
+}
+
+Loop::LocRange Loop::getLocRange() const {
   // If we have a debug location in the loop ID, then use it.
-  if (MDNode *LoopID = getLoopID())
-    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i)
-      if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i)))
-        return DebugLoc(L);
+  if (MDNode *LoopID = getLoopID()) {
+    DebugLoc Start;
+    // We use the first DebugLoc in the header as the start location of the loop
+    // and if there is a second DebugLoc in the header we use it as end location
+    // of the loop.
+    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
+      if (DILocation *L = dyn_cast<DILocation>(LoopID->getOperand(i))) {
+        if (!Start)
+          Start = DebugLoc(L);
+        else
+          return LocRange(Start, DebugLoc(L));
+      }
+    }
+
+    if (Start)
+      return LocRange(Start);
+  }
 
   // Try the pre-header first.
   if (BasicBlock *PHeadBB = getLoopPreheader())
     if (DebugLoc DL = PHeadBB->getTerminator()->getDebugLoc())
-      return DL;
+      return LocRange(DL);
 
   // If we have no pre-header or there are no instructions with debug
   // info in it, try the header.
   if (BasicBlock *HeadBB = getHeader())
-    return HeadBB->getTerminator()->getDebugLoc();
+    return LocRange(HeadBB->getTerminator()->getDebugLoc());
 
-  return DebugLoc();
+  return LocRange();
 }
 
 bool Loop::hasDedicatedExits() const {
@@ -366,8 +388,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
       // In case of multiple edges from current block to exit block, collect
       // only one edge in ExitBlocks. Use switchExitBlocks to keep track of
       // duplicate edges.
-      if (std::find(SwitchExitBlocks.begin(), SwitchExitBlocks.end(), Successor)
-          == SwitchExitBlocks.end()) {
+      if (!is_contained(SwitchExitBlocks, Successor)) {
         SwitchExitBlocks.push_back(Successor);
         ExitBlocks.push_back(Successor);
       }
@@ -387,6 +408,10 @@ BasicBlock *Loop::getUniqueExitBlock() const {
 LLVM_DUMP_METHOD void Loop::dump() const {
   print(dbgs());
 }
+
+LLVM_DUMP_METHOD void Loop::dumpVerbose() const {
+  print(dbgs(), /*Depth=*/ 0, /*Verbose=*/ true);
+}
 #endif
 
 //===----------------------------------------------------------------------===//
@@ -532,8 +557,7 @@ Loop *UnloopUpdater::getNearestLoop(BasicBlock *BB, Loop *BBLoop) {
       assert(Subloop && "subloop is not an ancestor of the original loop");
     }
     // Get the current nearest parent of the Subloop exits, initially Unloop.
-    NearLoop =
-      SubloopParents.insert(std::make_pair(Subloop, &Unloop)).first->second;
+    NearLoop = SubloopParents.insert({Subloop, &Unloop}).first->second;
   }
 
   succ_iterator I = succ_begin(BB), E = succ_end(BB);
@@ -645,9 +669,9 @@ void LoopInfo::markAsRemoved(Loop *Unloop) {
   }
 }
 
-char LoopAnalysis::PassID;
+AnalysisKey LoopAnalysis::Key;
 
-LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+LoopInfo LoopAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   // FIXME: Currently we create a LoopInfo from scratch for every function.
   // This may prove to be too wasteful due to deallocating and re-allocating
   // memory each time for the underlying map and vector datastructures. At some
@@ -660,7 +684,7 @@ LoopInfo LoopAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
 }
 
 PreservedAnalyses LoopPrinterPass::run(Function &F,
-                                       AnalysisManager<Function> &AM) {
+                                       FunctionAnalysisManager &AM) {
   AM.getResult<LoopAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
@@ -702,8 +726,10 @@ void LoopInfoWrapperPass::verifyAnalysis() const {
   // -verify-loop-info option can enable this. In order to perform some
   // checking by default, LoopPass has been taught to call verifyLoop manually
   // during loop pass sequences.
-  if (VerifyLoopInfo)
-    LI.verify();
+  if (VerifyLoopInfo) {
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    LI.verify(DT);
+  }
 }
 
 void LoopInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -715,6 +741,14 @@ void LoopInfoWrapperPass::print(raw_ostream &OS, const Module *) const {
   LI.print(OS);
 }
 
+PreservedAnalyses LoopVerifierPass::run(Function &F,
+                                        FunctionAnalysisManager &AM) {
+  LoopInfo &LI = AM.getResult<LoopAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  LI.verify(DT);
+  return PreservedAnalyses::all();
+}
+
 //===----------------------------------------------------------------------===//
 // LoopBlocksDFS implementation
 //
diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp
index 222345c9a980..b5b8040984d7 100644
--- a/contrib/llvm/lib/Analysis/LoopPass.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPass.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/OptBisect.h"
@@ -48,7 +49,7 @@ public:
                        [](BasicBlock *BB) { return BB; });
     if (BBI != L->blocks().end() &&
         isFunctionInPrintList((*BBI)->getParent()->getName())) {
-      AnalysisManager<Loop> DummyLAM;
+      LoopAnalysisManager DummyLAM;
       P.run(*L, DummyLAM);
     }
     return false;
@@ -131,8 +132,8 @@ void LPPassManager::deleteSimpleAnalysisLoop(Loop *L) {
 // Recurse through all subloops and all loops  into LQ.
 static void addLoopIntoQueue(Loop *L, std::deque<Loop *> &LQ) {
   LQ.push_back(L);
-  for (Loop::reverse_iterator I = L->rbegin(), E = L->rend(); I != E; ++I)
-    addLoopIntoQueue(*I, LQ);
+  for (Loop *I : reverse(*L))
+    addLoopIntoQueue(I, LQ);
 }
 
 /// Pass Manager itself does not invalidate any analysis info.
@@ -140,6 +141,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
   // LPPassManager needs LoopInfo. In the long term LoopInfo class will
   // become part of LPPassManager.
   Info.addRequired<LoopInfoWrapperPass>();
+  Info.addRequired<DominatorTreeWrapperPass>();
   Info.setPreservesAll();
 }
 
@@ -148,6 +150,7 @@ void LPPassManager::getAnalysisUsage(AnalysisUsage &Info) const {
 bool LPPassManager::runOnFunction(Function &F) {
   auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
   LI = &LIWP.getLoopInfo();
+  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   bool Changed = false;
 
   // Collect inherited analysis from Module level pass manager.
@@ -162,16 +165,14 @@ bool LPPassManager::runOnFunction(Function &F) {
   // Note that LoopInfo::iterator visits loops in reverse program
   // order. Here, reverse_iterator gives us a forward order, and the LoopQueue
   // reverses the order a third time by popping from the back.
-  for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I)
-    addLoopIntoQueue(*I, LQ);
+  for (Loop *L : reverse(*LI))
+    addLoopIntoQueue(L, LQ);
 
   if (LQ.empty()) // No loops, skip calling finalizers
     return false;
 
   // Initialization
-  for (std::deque<Loop *>::const_iterator I = LQ.begin(), E = LQ.end();
-       I != E; ++I) {
-    Loop *L = *I;
+  for (Loop *L : LQ) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       LoopPass *P = getContainedPass(Index);
       Changed |= P->doInitialization(L, *this);
@@ -220,6 +221,12 @@ bool LPPassManager::runOnFunction(Function &F) {
           TimeRegion PassTimer(getPassTimer(&LIWP));
           CurrentLoop->verifyLoop();
         }
+        // Here we apply same reasoning as in the above case. Only difference
+        // is that LPPassManager might run passes which do not require LCSSA
+        // form (LoopPassPrinter for example). We should skip verification for
+        // such passes.
+        if (mustPreserveAnalysisID(LCSSAVerificationPass::ID))
+          CurrentLoop->isRecursivelyLCSSAForm(*DT, *LI);
 
         // Then call the regular verifyAnalysis functions.
         verifyPreservedAnalysis(P);
@@ -355,3 +362,8 @@ bool LoopPass::skipLoop(const Loop *L) const {
   }
   return false;
 }
+
+char LCSSAVerificationPass::ID = 0;
+INITIALIZE_PASS(LCSSAVerificationPass, "lcssa-verification", "LCSSA Verifier",
+                false, false)
+
diff --git a/contrib/llvm/lib/Analysis/LoopPassManager.cpp b/contrib/llvm/lib/Analysis/LoopPassManager.cpp
index 8bac19a58217..044e5d55dafd 100644
--- a/contrib/llvm/lib/Analysis/LoopPassManager.cpp
+++ b/contrib/llvm/lib/Analysis/LoopPassManager.cpp
@@ -17,12 +17,31 @@
 
 using namespace llvm;
 
-// Explicit instantiations for core typedef'ed templates.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
 namespace llvm {
 template class PassManager<Loop>;
 template class AnalysisManager<Loop>;
 template class InnerAnalysisManagerProxy<LoopAnalysisManager, Function>;
 template class OuterAnalysisManagerProxy<FunctionAnalysisManager, Loop>;
+
+template <>
+bool LoopAnalysisManagerFunctionProxy::Result::invalidate(
+    Function &F, const PreservedAnalyses &PA,
+    FunctionAnalysisManager::Invalidator &Inv) {
+  // If this proxy isn't marked as preserved, the set of Function objects in
+  // the module may have changed. We therefore can't call
+  // InnerAM->invalidate(), because any pointers to Functions it has may be
+  // stale.
+  auto PAC = PA.getChecker<LoopAnalysisManagerFunctionProxy>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Loop>>())
+    InnerAM->clear();
+
+  // FIXME: Proper suppor for invalidation isn't yet implemented for the LPM.
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
+}
 }
 
 PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
@@ -32,6 +51,7 @@ PreservedAnalyses llvm::getLoopPassPreservedAnalyses() {
   PA.preserve<ScalarEvolutionAnalysis>();
   // TODO: What we really want to do here is preserve an AA category, but that
   // concept doesn't exist yet.
+  PA.preserve<AAManager>();
   PA.preserve<BasicAA>();
   PA.preserve<GlobalsAA>();
   PA.preserve<SCEVAA>();
diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
index f23477622bec..2d8274040d39 100644
--- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -77,8 +77,12 @@ static const std::pair<LibFunc::Func, AllocFnsTy> AllocationFnData[] = {
   // TODO: Handle "int posix_memalign(void **, size_t, size_t)"
 };
 
+static Function *getCalledFunction(const Value *V, bool LookThroughBitCast,
+                                   bool &IsNoBuiltin) {
+  // Don't care about intrinsics in this case.
+  if (isa<IntrinsicInst>(V))
+    return nullptr;
 
-static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
   if (LookThroughBitCast)
     V = V->stripPointerCasts();
 
@@ -86,8 +90,7 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
   if (!CS.getInstruction())
     return nullptr;
 
-  if (CS.isNoBuiltin())
-    return nullptr;
+  IsNoBuiltin = CS.isNoBuiltin();
 
   Function *Callee = CS.getCalledFunction();
   if (!Callee || !Callee->isDeclaration())
@@ -98,47 +101,19 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) {
 /// Returns the allocation data for the given value if it's either a call to a
 /// known allocation function, or a call to a function with the allocsize
 /// attribute.
-static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
-                                              const TargetLibraryInfo *TLI,
-                                              bool LookThroughBitCast = false) {
-  // Skip intrinsics
-  if (isa<IntrinsicInst>(V))
-    return None;
-
-  const Function *Callee = getCalledFunction(V, LookThroughBitCast);
-  if (!Callee)
-    return None;
-
-  // If it has allocsize, we can skip checking if it's a known function.
-  //
-  // MallocLike is chosen here because allocsize makes no guarantees about the
-  // nullness of the result of the function, nor does it deal with strings, nor
-  // does it require that the memory returned is zeroed out.
-  LLVM_CONSTEXPR auto AllocSizeAllocTy = MallocLike;
-  if ((AllocTy & AllocSizeAllocTy) == AllocSizeAllocTy &&
-      Callee->hasFnAttribute(Attribute::AllocSize)) {
-    Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
-    std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
-
-    AllocFnsTy Result;
-    Result.AllocTy = AllocSizeAllocTy;
-    Result.NumParams = Callee->getNumOperands();
-    Result.FstParam = Args.first;
-    Result.SndParam = Args.second.getValueOr(-1);
-    return Result;
-  }
-
+static Optional<AllocFnsTy>
+getAllocationDataForFunction(const Function *Callee, AllocType AllocTy,
+                             const TargetLibraryInfo *TLI) {
   // Make sure that the function is available.
   StringRef FnName = Callee->getName();
   LibFunc::Func TLIFn;
   if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn))
     return None;
 
-  const auto *Iter =
-      std::find_if(std::begin(AllocationFnData), std::end(AllocationFnData),
-                   [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
-                     return P.first == TLIFn;
-                   });
+  const auto *Iter = find_if(
+      AllocationFnData, [TLIFn](const std::pair<LibFunc::Func, AllocFnsTy> &P) {
+        return P.first == TLIFn;
+      });
 
   if (Iter == std::end(AllocationFnData))
     return None;
@@ -164,6 +139,48 @@ static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
   return None;
 }
 
+static Optional<AllocFnsTy> getAllocationData(const Value *V, AllocType AllocTy,
+                                              const TargetLibraryInfo *TLI,
+                                              bool LookThroughBitCast = false) {
+  bool IsNoBuiltinCall;
+  if (const Function *Callee =
+          getCalledFunction(V, LookThroughBitCast, IsNoBuiltinCall))
+    if (!IsNoBuiltinCall)
+      return getAllocationDataForFunction(Callee, AllocTy, TLI);
+  return None;
+}
+
+static Optional<AllocFnsTy> getAllocationSize(const Value *V,
+                                              const TargetLibraryInfo *TLI) {
+  bool IsNoBuiltinCall;
+  const Function *Callee =
+      getCalledFunction(V, /*LookThroughBitCast=*/false, IsNoBuiltinCall);
+  if (!Callee)
+    return None;
+
+  // Prefer to use existing information over allocsize. This will give us an
+  // accurate AllocTy.
+  if (!IsNoBuiltinCall)
+    if (Optional<AllocFnsTy> Data =
+            getAllocationDataForFunction(Callee, AnyAlloc, TLI))
+      return Data;
+
+  Attribute Attr = Callee->getFnAttribute(Attribute::AllocSize);
+  if (Attr == Attribute())
+    return None;
+
+  std::pair<unsigned, Optional<unsigned>> Args = Attr.getAllocSizeArgs();
+
+  AllocFnsTy Result;
+  // Because allocsize only tells us how many bytes are allocated, we're not
+  // really allowed to assume anything, so we use MallocLike.
+  Result.AllocTy = MallocLike;
+  Result.NumParams = Callee->getNumOperands();
+  Result.FstParam = Args.first;
+  Result.SndParam = Args.second.getValueOr(-1);
+  return Result;
+}
+
 static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) {
   ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V);
   return CS && CS.paramHasAttr(AttributeSet::ReturnIndex, Attribute::NoAlias);
@@ -389,6 +406,36 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout &DL,
   return true;
 }
 
+ConstantInt *llvm::lowerObjectSizeCall(IntrinsicInst *ObjectSize,
+                                       const DataLayout &DL,
+                                       const TargetLibraryInfo *TLI,
+                                       bool MustSucceed) {
+  assert(ObjectSize->getIntrinsicID() == Intrinsic::objectsize &&
+         "ObjectSize must be a call to llvm.objectsize!");
+
+  bool MaxVal = cast<ConstantInt>(ObjectSize->getArgOperand(1))->isZero();
+  ObjSizeMode Mode;
+  // Unless we have to fold this to something, try to be as accurate as
+  // possible.
+  if (MustSucceed)
+    Mode = MaxVal ? ObjSizeMode::Max : ObjSizeMode::Min;
+  else
+    Mode = ObjSizeMode::Exact;
+
+  // FIXME: Does it make sense to just return a failure value if the size won't
+  // fit in the output and `!MustSucceed`?
+  uint64_t Size;
+  auto *ResultType = cast<IntegerType>(ObjectSize->getType());
+  if (getObjectSize(ObjectSize->getArgOperand(0), Size, DL, TLI, false, Mode) &&
+      isUIntN(ResultType->getBitWidth(), Size))
+    return ConstantInt::get(ResultType, Size);
+
+  if (!MustSucceed)
+    return nullptr;
+
+  return ConstantInt::get(ResultType, MaxVal ? -1ULL : 0);
+}
+
 STATISTIC(ObjectVisitorArgument,
           "Number of arguments with unsolved size and offset");
 STATISTIC(ObjectVisitorLoad,
@@ -476,8 +523,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
 }
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitCallSite(CallSite CS) {
-  Optional<AllocFnsTy> FnData =
-      getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
+  Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI);
   if (!FnData)
     return unknown();
 
@@ -736,8 +782,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) {
 }
 
 SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitCallSite(CallSite CS) {
-  Optional<AllocFnsTy> FnData =
-      getAllocationData(CS.getInstruction(), AnyAlloc, TLI);
+  Optional<AllocFnsTy> FnData = getAllocationSize(CS.getInstruction(), TLI);
   if (!FnData)
     return unknown();
 
diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
index 33499334fefa..2746361ab4b5 100644
--- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -15,24 +15,38 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/PredIteratorCache.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "memdep"
@@ -166,7 +180,7 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
     BasicBlock *BB) {
   unsigned Limit = BlockScanLimit;
 
-  // Walk backwards through the block, looking for dependencies
+  // Walk backwards through the block, looking for dependencies.
   while (ScanIt != BB->begin()) {
     // Limit the amount of scanning we do so we don't end up with quadratic
     // running time on extreme testcases.
@@ -220,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom(
   return MemDepResult::getNonFuncLocal();
 }
 
-/// Return true if LI is a load that would fully overlap MemLoc if done as
-/// a wider legal integer load.
-///
-/// MemLocBase, MemLocOffset are lazily computed here the first time the
-/// base/offs of memloc is needed.
-static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc,
-                                                   const Value *&MemLocBase,
-                                                   int64_t &MemLocOffs,
-                                                   const LoadInst *LI) {
-  const DataLayout &DL = LI->getModule()->getDataLayout();
-
-  // If we haven't already computed the base/offset of MemLoc, do so now.
-  if (!MemLocBase)
-    MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL);
-
-  unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
-      MemLocBase, MemLocOffs, MemLoc.Size, LI);
-  return Size != 0;
-}
-
 unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
     const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize,
     const LoadInst *LI) {
@@ -292,7 +286,7 @@ unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize(
   unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits() / 8U;
   NewLoadByteSize = NextPowerOf2(NewLoadByteSize);
 
-  while (1) {
+  while (true) {
     // If this load size is bigger than our known alignment or would not fit
     // into a native integer register, then we fail.
     if (NewLoadByteSize > LoadAlign ||
@@ -327,7 +321,7 @@ static bool isVolatile(Instruction *Inst) {
 
 MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
-    BasicBlock *BB, Instruction *QueryInst) {
+    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
 
   if (QueryInst != nullptr) {
     if (auto *LI = dyn_cast<LoadInst>(QueryInst)) {
@@ -338,49 +332,69 @@ MemDepResult MemoryDependenceResults::getPointerDependencyFrom(
         return invariantGroupDependency;
     }
   }
-  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst);
+  return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+                                        Limit);
 }
 
 MemDepResult
 MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
-                                                             BasicBlock *BB) {
+                                                            BasicBlock *BB) {
+
+  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
+  if (!InvariantGroupMD)
+    return MemDepResult::getUnknown();
+
   Value *LoadOperand = LI->getPointerOperand();
   // It's is not safe to walk the use list of global value, because function
   // passes aren't allowed to look outside their functions.
   if (isa<GlobalValue>(LoadOperand))
     return MemDepResult::getUnknown();
 
-  auto *InvariantGroupMD = LI->getMetadata(LLVMContext::MD_invariant_group);
-  if (!InvariantGroupMD)
-    return MemDepResult::getUnknown();
-
-  MemDepResult Result = MemDepResult::getUnknown();
-  llvm::SmallSet<Value *, 14> Seen;
   // Queue to process all pointers that are equivalent to load operand.
-  llvm::SmallVector<Value *, 8> LoadOperandsQueue;
-  LoadOperandsQueue.push_back(LoadOperand);
+  SmallVector<const Value *, 8> LoadOperandsQueue;
+  SmallSet<const Value *, 14> SeenValues;
+  auto TryInsertToQueue = [&](Value *V) {
+    if (SeenValues.insert(V).second)
+      LoadOperandsQueue.push_back(V);
+  };
+
+  TryInsertToQueue(LoadOperand);
   while (!LoadOperandsQueue.empty()) {
-    Value *Ptr = LoadOperandsQueue.pop_back_val();
+    const Value *Ptr = LoadOperandsQueue.pop_back_val();
+    assert(Ptr);
     if (isa<GlobalValue>(Ptr))
       continue;
 
-    if (auto *BCI = dyn_cast<BitCastInst>(Ptr)) {
-      if (Seen.insert(BCI->getOperand(0)).second) {
-        LoadOperandsQueue.push_back(BCI->getOperand(0));
-      }
-    }
-
-    for (Use &Us : Ptr->uses()) {
+    // Value comes from bitcast: Ptr = bitcast x. Insert x.
+    if (auto *BCI = dyn_cast<BitCastInst>(Ptr))
+      TryInsertToQueue(BCI->getOperand(0));
+    // Gep with zeros is equivalent to bitcast.
+    // FIXME: we are not sure if some bitcast should be canonicalized to gep 0
+    // or gep 0 to bitcast because of SROA, so there are 2 forms. When typeless
+    // pointers will be upstream then both cases will be gone (and this BFS
+    // also won't be needed).
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr))
+      if (GEP->hasAllZeroIndices())
+        TryInsertToQueue(GEP->getOperand(0));
+
+    for (const Use &Us : Ptr->uses()) {
       auto *U = dyn_cast<Instruction>(Us.getUser());
       if (!U || U == LI || !DT.dominates(U, LI))
         continue;
 
-      if (auto *BCI = dyn_cast<BitCastInst>(U)) {
-        if (Seen.insert(BCI).second) {
-          LoadOperandsQueue.push_back(BCI);
-        }
+      // Bitcast or gep with zeros are using Ptr. Add to queue to check it's
+      // users.      U = bitcast Ptr
+      if (isa<BitCastInst>(U)) {
+        TryInsertToQueue(U);
         continue;
       }
+      // U = getelementptr Ptr, 0, 0...
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(U))
+        if (GEP->hasAllZeroIndices()) {
+          TryInsertToQueue(U);
+          continue;
+        }
+
       // If we hit load/store with the same invariant.group metadata (and the
       // same pointer operand) we can assume that value pointed by pointer
       // operand didn't change.
@@ -389,18 +403,20 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI,
         return MemDepResult::getDef(U);
     }
   }
-  return Result;
+  return MemDepResult::getUnknown();
 }
 
 MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
     const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt,
-    BasicBlock *BB, Instruction *QueryInst) {
-
-  const Value *MemLocBase = nullptr;
-  int64_t MemLocOffset = 0;
-  unsigned Limit = BlockScanLimit;
+    BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) {
   bool isInvariantLoad = false;
 
+  if (!Limit) {
+    unsigned DefaultLimit = BlockScanLimit;
+    return getSimplePointerDependencyFrom(MemLoc, isLoad, ScanIt, BB, QueryInst,
+                                          &DefaultLimit);
+  }
+
   // We must be careful with atomic accesses, as they may allow another thread
   //   to touch this location, clobbering it. We are conservative: if the
   //   QueryInst is not a simple (non-atomic) memory access, we automatically
@@ -474,8 +490,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
 
     // Limit the amount of scanning we do so we don't end up with quadratic
     // running time on extreme testcases.
-    --Limit;
-    if (!Limit)
+    --*Limit;
+    if (!*Limit)
       return MemDepResult::getUnknown();
 
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
@@ -530,21 +546,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom(
       AliasResult R = AA.alias(LoadLoc, MemLoc);
 
       if (isLoad) {
-        if (R == NoAlias) {
-          // If this is an over-aligned integer load (for example,
-          // "load i8* %P, align 4") see if it would obviously overlap with the
-          // queried location if widened to a larger load (e.g. if the queried
-          // location is 1 byte at P+1).  If so, return it as a load/load
-          // clobber result, allowing the client to decide to widen the load if
-          // it wants to.
-          if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
-            if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() &&
-                isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase,
-                                                       MemLocOffset, LI))
-              return MemDepResult::getClobber(Inst);
-          }
+        if (R == NoAlias)
           continue;
-        }
 
         // Must aliased loads are defs of each other.
         if (R == MustAlias)
@@ -697,7 +700,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
 
   // Do the scan.
   if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) {
-    // No dependence found.  If this is the entry block of the function, it is
+    // No dependence found. If this is the entry block of the function, it is
     // unknown, otherwise it is non-local.
     if (QueryParent != &QueryParent->getParent()->getEntryBlock())
       LocalCache = MemDepResult::getNonLocal();
@@ -709,7 +712,7 @@ MemDepResult MemoryDependenceResults::getDependency(Instruction *QueryInst) {
     if (MemLoc.Ptr) {
       // If we can do a pointer scan, make it happen.
       bool isLoad = !(MR & MRI_Mod);
-      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(QueryInst))
+      if (auto *II = dyn_cast<IntrinsicInst>(QueryInst))
         isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start;
 
       LocalCache = getPointerDependencyFrom(
@@ -1010,7 +1013,7 @@ SortNonLocalDepInfoCache(MemoryDependenceResults::NonLocalDepInfo &Cache,
     MemoryDependenceResults::NonLocalDepInfo::iterator Entry =
         std::upper_bound(Cache.begin(), Cache.end() - 1, Val);
     Cache.insert(Entry, Val);
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   }
   case 1:
     // One new entry, Just insert the new value at the appropriate position.
@@ -1659,10 +1662,10 @@ void MemoryDependenceResults::verifyRemoved(Instruction *D) const {
 #endif
 }
 
-char MemoryDependenceAnalysis::PassID;
+AnalysisKey MemoryDependenceAnalysis::Key;
 
 MemoryDependenceResults
-MemoryDependenceAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+MemoryDependenceAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   auto &AA = AM.getResult<AAManager>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
@@ -1684,6 +1687,7 @@ INITIALIZE_PASS_END(MemoryDependenceWrapperPass, "memdep",
 MemoryDependenceWrapperPass::MemoryDependenceWrapperPass() : FunctionPass(ID) {
   initializeMemoryDependenceWrapperPassPass(*PassRegistry::getPassRegistry());
 }
+
 MemoryDependenceWrapperPass::~MemoryDependenceWrapperPass() {}
 
 void MemoryDependenceWrapperPass::releaseMemory() {
@@ -1698,6 +1702,28 @@ void MemoryDependenceWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequiredTransitive<TargetLibraryInfoWrapperPass>();
 }
 
+bool MemoryDependenceResults::invalidate(Function &F, const PreservedAnalyses &PA,
+                               FunctionAnalysisManager::Invalidator &Inv) {
+  // Check whether our analysis is preserved.
+  auto PAC = PA.getChecker<MemoryDependenceAnalysis>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
+    // If not, give up now.
+    return true;
+
+  // Check whether the analyses we depend on became invalid for any reason.
+  if (Inv.invalidate<AAManager>(F, PA) ||
+      Inv.invalidate<AssumptionAnalysis>(F, PA) ||
+      Inv.invalidate<DominatorTreeAnalysis>(F, PA))
+    return true;
+
+  // Otherwise this analysis result remains valid.
+  return false;
+}
+
+unsigned MemoryDependenceResults::getDefaultBlockScanLimit() const {
+  return BlockScanLimit;
+}
+
 bool MemoryDependenceWrapperPass::runOnFunction(Function &F) {
   auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
diff --git a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
index 36c47141a45f..f675830aa67d 100644
--- a/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleDebugInfoPrinter.cpp
@@ -74,7 +74,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
   // filenames), so just print a few useful things.
   for (DICompileUnit *CU : Finder.compile_units()) {
     O << "Compile unit: ";
-    if (const char *Lang = dwarf::LanguageString(CU->getSourceLanguage()))
+    auto Lang = dwarf::LanguageString(CU->getSourceLanguage());
+    if (!Lang.empty())
       O << Lang;
     else
       O << "unknown-language(" << CU->getSourceLanguage() << ")";
@@ -90,7 +91,8 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
     O << '\n';
   }
 
-  for (const DIGlobalVariable *GV : Finder.global_variables()) {
+  for (auto GVU : Finder.global_variables()) {
+    const auto *GV = GVU->getVariable();
     O << "Global variable: " << GV->getName();
     printFile(O, GV->getFilename(), GV->getDirectory(), GV->getLine());
     if (!GV->getLinkageName().empty())
@@ -105,14 +107,15 @@ void ModuleDebugInfoPrinter::print(raw_ostream &O, const Module *M) const {
     printFile(O, T->getFilename(), T->getDirectory(), T->getLine());
     if (auto *BT = dyn_cast<DIBasicType>(T)) {
       O << " ";
-      if (const char *Encoding =
-              dwarf::AttributeEncodingString(BT->getEncoding()))
+      auto Encoding = dwarf::AttributeEncodingString(BT->getEncoding());
+      if (!Encoding.empty())
         O << Encoding;
       else
         O << "unknown-encoding(" << BT->getEncoding() << ')';
     } else {
       O << ' ';
-      if (const char *Tag = dwarf::TagString(T->getTag()))
+      auto Tag = dwarf::TagString(T->getTag());
+      if (!Tag.empty())
         O << Tag;
       else
         O << "unknown-tag(" << T->getTag() << ")";
diff --git a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index c9ac2bdb7942..1d2ffc1abe1f 100644
--- a/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -13,16 +13,22 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Object/IRObjectFile.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
@@ -31,7 +37,7 @@ using namespace llvm;
 // Walk through the operands of a given User via worklist iteration and populate
 // the set of GlobalValue references encountered. Invoked either on an
 // Instruction or a GlobalVariable (which walks its initializer).
-static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
+static void findRefEdges(const User *CurUser, SetVector<ValueInfo> &RefEdges,
                          SmallPtrSet<const User *, 8> &Visited) {
   SmallVector<const User *, 32> Worklist;
   Worklist.push_back(CurUser);
@@ -50,12 +56,12 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
         continue;
       if (isa<BlockAddress>(Operand))
         continue;
-      if (isa<GlobalValue>(Operand)) {
+      if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
         // We have a reference to a global value. This should be added to
         // the reference set unless it is a callee. Callees are handled
         // specially by WriteFunction and are added to a separate list.
         if (!(CS && CS.isCallee(&OI)))
-          RefEdges.insert(Operand);
+          RefEdges.insert(GV);
         continue;
       }
       Worklist.push_back(Operand);
@@ -63,98 +69,178 @@ static void findRefEdges(const User *CurUser, DenseSet<const Value *> &RefEdges,
   }
 }
 
-void ModuleSummaryIndexBuilder::computeFunctionSummary(
-    const Function &F, BlockFrequencyInfo *BFI) {
-  // Summary not currently supported for anonymous functions, they must
-  // be renamed.
-  if (!F.hasName())
-    return;
+static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
+                                          ProfileSummaryInfo *PSI) {
+  if (!PSI)
+    return CalleeInfo::HotnessType::Unknown;
+  if (PSI->isHotCount(ProfileCount))
+    return CalleeInfo::HotnessType::Hot;
+  if (PSI->isColdCount(ProfileCount))
+    return CalleeInfo::HotnessType::Cold;
+  return CalleeInfo::HotnessType::None;
+}
+
+static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
+                                   const Function &F, BlockFrequencyInfo *BFI,
+                                   ProfileSummaryInfo *PSI,
+                                   bool HasLocalsInUsed) {
+  // Summary not currently supported for anonymous functions, they should
+  // have been named.
+  assert(F.hasName());
 
   unsigned NumInsts = 0;
   // Map from callee ValueId to profile count. Used to accumulate profile
   // counts for all static calls to a given callee.
-  DenseMap<const Value *, CalleeInfo> CallGraphEdges;
-  DenseMap<GlobalValue::GUID, CalleeInfo> IndirectCallEdges;
-  DenseSet<const Value *> RefEdges;
+  MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
+  SetVector<ValueInfo> RefEdges;
+  SetVector<GlobalValue::GUID> TypeTests;
   ICallPromotionAnalysis ICallAnalysis;
 
+  bool HasInlineAsmMaybeReferencingInternal = false;
   SmallPtrSet<const User *, 8> Visited;
   for (const BasicBlock &BB : F)
     for (const Instruction &I : BB) {
-      if (!isa<DbgInfoIntrinsic>(I))
-        ++NumInsts;
-
-      if (auto CS = ImmutableCallSite(&I)) {
-        auto *CalledFunction = CS.getCalledFunction();
-        // Check if this is a direct call to a known function.
-        if (CalledFunction) {
-          if (CalledFunction->hasName() && !CalledFunction->isIntrinsic()) {
-            auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
-            auto *CalleeId =
-                M->getValueSymbolTable().lookup(CalledFunction->getName());
-            CallGraphEdges[CalleeId] +=
-                (ScaledCount ? ScaledCount.getValue() : 0);
-          }
-        } else {
-          // Otherwise, check for an indirect call (call to a non-const value
-          // that isn't an inline assembly call).
-          const CallInst *CI = dyn_cast<CallInst>(&I);
-          if (CS.getCalledValue() && !isa<Constant>(CS.getCalledValue()) &&
-              !(CI && CI->isInlineAsm())) {
-            uint32_t NumVals, NumCandidates;
-            uint64_t TotalCount;
-            auto CandidateProfileData =
-                ICallAnalysis.getPromotionCandidatesForInstruction(
-                    &I, NumVals, TotalCount, NumCandidates);
-            for (auto &Candidate : CandidateProfileData)
-              IndirectCallEdges[Candidate.Value] += Candidate.Count;
+      if (isa<DbgInfoIntrinsic>(I))
+        continue;
+      ++NumInsts;
+      findRefEdges(&I, RefEdges, Visited);
+      auto CS = ImmutableCallSite(&I);
+      if (!CS)
+        continue;
+
+      const auto *CI = dyn_cast<CallInst>(&I);
+      // Since we don't know exactly which local values are referenced in inline
+      // assembly, conservatively mark the function as possibly referencing
+      // a local value from inline assembly to ensure we don't export a
+      // reference (which would require renaming and promotion of the
+      // referenced value).
+      if (HasLocalsInUsed && CI && CI->isInlineAsm())
+        HasInlineAsmMaybeReferencingInternal = true;
+
+      auto *CalledValue = CS.getCalledValue();
+      auto *CalledFunction = CS.getCalledFunction();
+      // Check if this is an alias to a function. If so, get the
+      // called aliasee for the checks below.
+      if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
+        assert(!CalledFunction && "Expected null called function in callsite for alias");
+        CalledFunction = dyn_cast<Function>(GA->getBaseObject());
+      }
+      // Check if this is a direct call to a known function or a known
+      // intrinsic, or an indirect call with profile data.
+      if (CalledFunction) {
+        if (CalledFunction->isIntrinsic()) {
+          if (CalledFunction->getIntrinsicID() != Intrinsic::type_test)
+            continue;
+          // Produce a summary from type.test intrinsics. We only summarize
+          // type.test intrinsics that are used other than by an llvm.assume
+          // intrinsic. Intrinsics that are assumed are relevant only to the
+          // devirtualization pass, not the type test lowering pass.
+          bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
+            auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
+            if (!AssumeCI)
+              return true;
+            Function *F = AssumeCI->getCalledFunction();
+            return !F || F->getIntrinsicID() != Intrinsic::assume;
+          });
+          if (HasNonAssumeUses) {
+            auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
+            if (auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata()))
+              TypeTests.insert(GlobalValue::getGUID(TypeId->getString()));
           }
         }
+        // We should have named any anonymous globals
+        assert(CalledFunction->hasName());
+        auto ScaledCount = BFI ? BFI->getBlockProfileCount(&BB) : None;
+        auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
+                                   : CalleeInfo::HotnessType::Unknown;
+
+        // Use the original CalledValue, in case it was an alias. We want
+        // to record the call edge to the alias in that case. Eventually
+        // an alias summary will be created to associate the alias and
+        // aliasee.
+        CallGraphEdges[cast<GlobalValue>(CalledValue)].updateHotness(Hotness);
+      } else {
+        // Skip inline assembly calls.
+        if (CI && CI->isInlineAsm())
+          continue;
+        // Skip direct calls.
+        if (!CS.getCalledValue() || isa<Constant>(CS.getCalledValue()))
+          continue;
+
+        uint32_t NumVals, NumCandidates;
+        uint64_t TotalCount;
+        auto CandidateProfileData =
+            ICallAnalysis.getPromotionCandidatesForInstruction(
+                &I, NumVals, TotalCount, NumCandidates);
+        for (auto &Candidate : CandidateProfileData)
+          CallGraphEdges[Candidate.Value].updateHotness(
+              getHotness(Candidate.Count, PSI));
       }
-      findRefEdges(&I, RefEdges, Visited);
     }
 
   GlobalValueSummary::GVFlags Flags(F);
-  std::unique_ptr<FunctionSummary> FuncSummary =
-      llvm::make_unique<FunctionSummary>(Flags, NumInsts);
-  FuncSummary->addCallGraphEdges(CallGraphEdges);
-  FuncSummary->addCallGraphEdges(IndirectCallEdges);
-  FuncSummary->addRefEdges(RefEdges);
-  Index->addGlobalValueSummary(F.getName(), std::move(FuncSummary));
+  auto FuncSummary = llvm::make_unique<FunctionSummary>(
+      Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
+      TypeTests.takeVector());
+  if (HasInlineAsmMaybeReferencingInternal)
+    FuncSummary->setHasInlineAsmMaybeReferencingInternal();
+  Index.addGlobalValueSummary(F.getName(), std::move(FuncSummary));
 }
 
-void ModuleSummaryIndexBuilder::computeVariableSummary(
-    const GlobalVariable &V) {
-  DenseSet<const Value *> RefEdges;
+static void computeVariableSummary(ModuleSummaryIndex &Index,
+                                   const GlobalVariable &V) {
+  SetVector<ValueInfo> RefEdges;
   SmallPtrSet<const User *, 8> Visited;
   findRefEdges(&V, RefEdges, Visited);
   GlobalValueSummary::GVFlags Flags(V);
-  std::unique_ptr<GlobalVarSummary> GVarSummary =
-      llvm::make_unique<GlobalVarSummary>(Flags);
-  GVarSummary->addRefEdges(RefEdges);
-  Index->addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+  auto GVarSummary =
+      llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
+  Index.addGlobalValueSummary(V.getName(), std::move(GVarSummary));
+}
+
+static void computeAliasSummary(ModuleSummaryIndex &Index,
+                                const GlobalAlias &A) {
+  GlobalValueSummary::GVFlags Flags(A);
+  auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
+  auto *Aliasee = A.getBaseObject();
+  auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
+  assert(AliaseeSummary && "Alias expects aliasee summary to be parsed");
+  AS->setAliasee(AliaseeSummary);
+  Index.addGlobalValueSummary(A.getName(), std::move(AS));
 }
 
-ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder(
-    const Module *M,
-    std::function<BlockFrequencyInfo *(const Function &F)> Ftor)
-    : Index(llvm::make_unique<ModuleSummaryIndex>()), M(M) {
-  // Check if the module can be promoted, otherwise just disable importing from
-  // it by not emitting any summary.
-  // FIXME: we could still import *into* it most of the time.
-  if (!moduleCanBeRenamedForThinLTO(*M))
-    return;
+ModuleSummaryIndex llvm::buildModuleSummaryIndex(
+    const Module &M,
+    std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
+    ProfileSummaryInfo *PSI) {
+  ModuleSummaryIndex Index;
+
+  // Identify the local values in the llvm.used and llvm.compiler.used sets,
+  // which should not be exported as they would then require renaming and
+  // promotion, but we may have opaque uses e.g. in inline asm. We collect them
+  // here because we use this information to mark functions containing inline
+  // assembly calls as not importable.
+  SmallPtrSet<GlobalValue *, 8> LocalsUsed;
+  SmallPtrSet<GlobalValue *, 8> Used;
+  // First collect those in the llvm.used set.
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+  // Next collect those in the llvm.compiler.used set.
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
+  for (auto *V : Used) {
+    if (V->hasLocalLinkage())
+      LocalsUsed.insert(V);
+  }
 
   // Compute summaries for all functions defined in module, and save in the
   // index.
-  for (auto &F : *M) {
+  for (auto &F : M) {
     if (F.isDeclaration())
       continue;
 
     BlockFrequencyInfo *BFI = nullptr;
     std::unique_ptr<BlockFrequencyInfo> BFIPtr;
-    if (Ftor)
-      BFI = Ftor(F);
+    if (GetBFICallback)
+      BFI = GetBFICallback(F);
     else if (F.getEntryCount().hasValue()) {
       LoopInfo LI{DominatorTree(const_cast<Function &>(F))};
       BranchProbabilityInfo BPI{F, LI};
@@ -162,16 +248,89 @@ ModuleSummaryIndexBuilder::ModuleSummaryIndexBuilder(
       BFI = BFIPtr.get();
     }
 
-    computeFunctionSummary(F, BFI);
+    computeFunctionSummary(Index, M, F, BFI, PSI, !LocalsUsed.empty());
   }
 
   // Compute summaries for all variables defined in module, and save in the
   // index.
-  for (const GlobalVariable &G : M->globals()) {
+  for (const GlobalVariable &G : M.globals()) {
     if (G.isDeclaration())
       continue;
-    computeVariableSummary(G);
+    computeVariableSummary(Index, G);
+  }
+
+  // Compute summaries for all aliases defined in module, and save in the
+  // index.
+  for (const GlobalAlias &A : M.aliases())
+    computeAliasSummary(Index, A);
+
+  for (auto *V : LocalsUsed) {
+    auto *Summary = Index.getGlobalValueSummary(*V);
+    assert(Summary && "Missing summary for global value");
+    Summary->setNoRename();
+  }
+
+  if (!M.getModuleInlineAsm().empty()) {
+    // Collect the local values defined by module level asm, and set up
+    // summaries for these symbols so that they can be marked as NoRename,
+    // to prevent export of any use of them in regular IR that would require
+    // renaming within the module level asm. Note we don't need to create a
+    // summary for weak or global defs, as they don't need to be flagged as
+    // NoRename, and defs in module level asm can't be imported anyway.
+    // Also, any values used but not defined within module level asm should
+    // be listed on the llvm.used or llvm.compiler.used global and marked as
+    // referenced from there.
+    ModuleSymbolTable::CollectAsmSymbols(
+        Triple(M.getTargetTriple()), M.getModuleInlineAsm(),
+        [&M, &Index](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+          // Symbols not marked as Weak or Global are local definitions.
+          if (Flags & (object::BasicSymbolRef::SF_Weak |
+                       object::BasicSymbolRef::SF_Global))
+            return;
+          GlobalValue *GV = M.getNamedValue(Name);
+          if (!GV)
+            return;
+          assert(GV->isDeclaration() && "Def in module asm already has definition");
+          GlobalValueSummary::GVFlags GVFlags(
+              GlobalValue::InternalLinkage,
+              /* NoRename */ true,
+              /* HasInlineAsmMaybeReferencingInternal */ false,
+              /* IsNotViableToInline */ true);
+          // Create the appropriate summary type.
+          if (isa<Function>(GV)) {
+            std::unique_ptr<FunctionSummary> Summary =
+                llvm::make_unique<FunctionSummary>(
+                    GVFlags, 0, ArrayRef<ValueInfo>{},
+                    ArrayRef<FunctionSummary::EdgeTy>{},
+                    ArrayRef<GlobalValue::GUID>{});
+            Summary->setNoRename();
+            Index.addGlobalValueSummary(Name, std::move(Summary));
+          } else {
+            std::unique_ptr<GlobalVarSummary> Summary =
+                llvm::make_unique<GlobalVarSummary>(GVFlags,
+                                                    ArrayRef<ValueInfo>{});
+            Summary->setNoRename();
+            Index.addGlobalValueSummary(Name, std::move(Summary));
+          }
+        });
   }
+
+  return Index;
+}
+
+AnalysisKey ModuleSummaryIndexAnalysis::Key;
+
+ModuleSummaryIndex
+ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
+  ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  return buildModuleSummaryIndex(
+      M,
+      [&FAM](const Function &F) {
+        return &FAM.getResult<BlockFrequencyAnalysis>(
+            *const_cast<Function *>(&F));
+      },
+      &PSI);
 }
 
 char ModuleSummaryIndexWrapperPass::ID = 0;
@@ -191,59 +350,25 @@ ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
 }
 
 bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
-  IndexBuilder = llvm::make_unique<ModuleSummaryIndexBuilder>(
-      &M, [this](const Function &F) {
+  auto &PSI = *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  Index = buildModuleSummaryIndex(
+      M,
+      [this](const Function &F) {
         return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
                          *const_cast<Function *>(&F))
                      .getBFI());
-      });
+      },
+      &PSI);
   return false;
 }
 
 bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) {
-  IndexBuilder.reset();
+  Index.reset();
   return false;
 }
 
 void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   AU.addRequired<BlockFrequencyInfoWrapperPass>();
-}
-
-bool llvm::moduleCanBeRenamedForThinLTO(const Module &M) {
-  // We cannot currently promote or rename anything used in inline assembly,
-  // which are not visible to the compiler. Detect a possible case by looking
-  // for a llvm.used local value, in conjunction with an inline assembly call
-  // in the module. Prevent importing of any modules containing these uses by
-  // suppressing generation of the index. This also prevents importing
-  // into this module, which is also necessary to avoid needing to rename
-  // in case of a name clash between a local in this module and an imported
-  // global.
-  // FIXME: If we find we need a finer-grained approach of preventing promotion
-  // and renaming of just the functions using inline assembly we will need to:
-  // - Add flag in the function summaries to identify those with inline asm.
-  // - Prevent importing of any functions with flag set.
-  // - Prevent importing of any global function with the same name as a
-  //   function in current module that has the flag set.
-  // - For any llvm.used value that is exported and promoted, add a private
-  //   alias to the original name in the current module (even if we don't
-  //   export the function using those values in inline asm, another function
-  //   with a reference could be exported).
-  SmallPtrSet<GlobalValue *, 8> Used;
-  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
-  bool LocalIsUsed =
-      llvm::any_of(Used, [](GlobalValue *V) { return V->hasLocalLinkage(); });
-  if (!LocalIsUsed)
-    return true;
-
-  // Walk all the instructions in the module and find if one is inline ASM
-  auto HasInlineAsm = llvm::any_of(M, [](const Function &F) {
-    return llvm::any_of(instructions(F), [](const Instruction &I) {
-      const CallInst *CallI = dyn_cast<CallInst>(&I);
-      if (!CallI)
-        return false;
-      return CallI->isInlineAsm();
-    });
-  });
-  return !HasInlineAsm;
+  AU.addRequired<ProfileSummaryInfoWrapperPass>();
 }
diff --git a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
index 9bb1048ea8ba..ed03406ca8c6 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCAliasAnalysis.cpp
@@ -131,7 +131,7 @@ ModRefInfo ObjCARCAAResult::getModRefInfo(ImmutableCallSite CS,
   return AAResultBase::getModRefInfo(CS, Loc);
 }
 
-ObjCARCAAResult ObjCARCAA::run(Function &F, AnalysisManager<Function> &AM) {
+ObjCARCAAResult ObjCARCAA::run(Function &F, FunctionAnalysisManager &AM) {
   return ObjCARCAAResult(F.getParent()->getDataLayout());
 }
 
diff --git a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
index 3dc1463b8d8b..1e75c0824d03 100644
--- a/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
+++ b/contrib/llvm/lib/Analysis/ObjCARCInstKind.cpp
@@ -96,43 +96,47 @@ ARCInstKind llvm::objcarc::GetFunctionClass(const Function *F) {
 
   // One argument.
   const Argument *A0 = &*AI++;
-  if (AI == AE)
+  if (AI == AE) {
     // Argument is a pointer.
-    if (PointerType *PTy = dyn_cast<PointerType>(A0->getType())) {
-      Type *ETy = PTy->getElementType();
-      // Argument is i8*.
-      if (ETy->isIntegerTy(8))
+    PointerType *PTy = dyn_cast<PointerType>(A0->getType());
+    if (!PTy)
+      return ARCInstKind::CallOrUser;
+
+    Type *ETy = PTy->getElementType();
+    // Argument is i8*.
+    if (ETy->isIntegerTy(8))
+      return StringSwitch<ARCInstKind>(F->getName())
+          .Case("objc_retain", ARCInstKind::Retain)
+          .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
+          .Case("objc_unsafeClaimAutoreleasedReturnValue", ARCInstKind::ClaimRV)
+          .Case("objc_retainBlock", ARCInstKind::RetainBlock)
+          .Case("objc_release", ARCInstKind::Release)
+          .Case("objc_autorelease", ARCInstKind::Autorelease)
+          .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
+          .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
+          .Case("objc_retainedObject", ARCInstKind::NoopCast)
+          .Case("objc_unretainedObject", ARCInstKind::NoopCast)
+          .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
+          .Case("objc_retain_autorelease", ARCInstKind::FusedRetainAutorelease)
+          .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
+          .Case("objc_retainAutoreleaseReturnValue",
+                ARCInstKind::FusedRetainAutoreleaseRV)
+          .Case("objc_sync_enter", ARCInstKind::User)
+          .Case("objc_sync_exit", ARCInstKind::User)
+          .Default(ARCInstKind::CallOrUser);
+
+    // Argument is i8**
+    if (PointerType *Pte = dyn_cast<PointerType>(ETy))
+      if (Pte->getElementType()->isIntegerTy(8))
         return StringSwitch<ARCInstKind>(F->getName())
-            .Case("objc_retain", ARCInstKind::Retain)
-            .Case("objc_retainAutoreleasedReturnValue", ARCInstKind::RetainRV)
-            .Case("objc_unsafeClaimAutoreleasedReturnValue",
-                  ARCInstKind::ClaimRV)
-            .Case("objc_retainBlock", ARCInstKind::RetainBlock)
-            .Case("objc_release", ARCInstKind::Release)
-            .Case("objc_autorelease", ARCInstKind::Autorelease)
-            .Case("objc_autoreleaseReturnValue", ARCInstKind::AutoreleaseRV)
-            .Case("objc_autoreleasePoolPop", ARCInstKind::AutoreleasepoolPop)
-            .Case("objc_retainedObject", ARCInstKind::NoopCast)
-            .Case("objc_unretainedObject", ARCInstKind::NoopCast)
-            .Case("objc_unretainedPointer", ARCInstKind::NoopCast)
-            .Case("objc_retain_autorelease",
-                  ARCInstKind::FusedRetainAutorelease)
-            .Case("objc_retainAutorelease", ARCInstKind::FusedRetainAutorelease)
-            .Case("objc_retainAutoreleaseReturnValue",
-                  ARCInstKind::FusedRetainAutoreleaseRV)
-            .Case("objc_sync_enter", ARCInstKind::User)
-            .Case("objc_sync_exit", ARCInstKind::User)
+            .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
+            .Case("objc_loadWeak", ARCInstKind::LoadWeak)
+            .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
             .Default(ARCInstKind::CallOrUser);
 
-      // Argument is i8**
-      if (PointerType *Pte = dyn_cast<PointerType>(ETy))
-        if (Pte->getElementType()->isIntegerTy(8))
-          return StringSwitch<ARCInstKind>(F->getName())
-              .Case("objc_loadWeakRetained", ARCInstKind::LoadWeakRetained)
-              .Case("objc_loadWeak", ARCInstKind::LoadWeak)
-              .Case("objc_destroyWeak", ARCInstKind::DestroyWeak)
-              .Default(ARCInstKind::CallOrUser);
-    }
+    // Anything else with one argument.
+    return ARCInstKind::CallOrUser;
+  }
 
   // Two arguments, first is i8**.
   const Argument *A1 = &*AI++;
@@ -180,6 +184,7 @@ static bool isInertIntrinsic(unsigned ID) {
   // TODO: Make this into a covered switch.
   switch (ID) {
   case Intrinsic::returnaddress:
+  case Intrinsic::addressofreturnaddress:
   case Intrinsic::frameaddress:
   case Intrinsic::stacksave:
   case Intrinsic::stackrestore:
diff --git a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp b/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
index e979ba2531e4..fa8b07d61b01 100644
--- a/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
+++ b/contrib/llvm/lib/Analysis/OptimizationDiagnosticInfo.cpp
@@ -13,30 +13,204 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LazyBlockFrequencyInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Dominators.h"
 #include "llvm/IR/LLVMContext.h"
 
 using namespace llvm;
 
-Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(Value *V) {
+OptimizationRemarkEmitter::OptimizationRemarkEmitter(Function *F)
+    : F(F), BFI(nullptr) {
+  if (!F->getContext().getDiagnosticHotnessRequested())
+    return;
+
+  // First create a dominator tree.
+  DominatorTree DT;
+  DT.recalculate(*F);
+
+  // Generate LoopInfo from it.
+  LoopInfo LI;
+  LI.analyze(DT);
+
+  // Then compute BranchProbabilityInfo.
+  BranchProbabilityInfo BPI;
+  BPI.calculate(*F, LI);
+
+  // Finally compute BFI.
+  OwnedBFI = llvm::make_unique<BlockFrequencyInfo>(*F, BPI, LI);
+  BFI = OwnedBFI.get();
+}
+
+Optional<uint64_t> OptimizationRemarkEmitter::computeHotness(const Value *V) {
   if (!BFI)
     return None;
 
   return BFI->getBlockProfileCount(cast<BasicBlock>(V));
 }
 
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase *> {
+  static void mapping(IO &io, DiagnosticInfoOptimizationBase *&OptDiag) {
+    assert(io.outputting() && "input not yet implemented");
+
+    if (io.mapTag("!Passed", OptDiag->getKind() == DK_OptimizationRemark))
+      ;
+    else if (io.mapTag("!Missed",
+                       OptDiag->getKind() == DK_OptimizationRemarkMissed))
+      ;
+    else if (io.mapTag("!Analysis",
+                       OptDiag->getKind() == DK_OptimizationRemarkAnalysis))
+      ;
+    else if (io.mapTag("!AnalysisFPCommute",
+                       OptDiag->getKind() ==
+                           DK_OptimizationRemarkAnalysisFPCommute))
+      ;
+    else if (io.mapTag("!AnalysisAliasing",
+                       OptDiag->getKind() ==
+                           DK_OptimizationRemarkAnalysisAliasing))
+      ;
+    else
+      llvm_unreachable("todo");
+
+    // These are read-only for now.
+    DebugLoc DL = OptDiag->getDebugLoc();
+    StringRef FN = GlobalValue::getRealLinkageName(
+        OptDiag->getFunction().getName());
+
+    StringRef PassName(OptDiag->PassName);
+    io.mapRequired("Pass", PassName);
+    io.mapRequired("Name", OptDiag->RemarkName);
+    if (!io.outputting() || DL)
+      io.mapOptional("DebugLoc", DL);
+    io.mapRequired("Function", FN);
+    io.mapOptional("Hotness", OptDiag->Hotness);
+    io.mapOptional("Args", OptDiag->Args);
+  }
+};
+
+template <> struct MappingTraits<DebugLoc> {
+  static void mapping(IO &io, DebugLoc &DL) {
+    assert(io.outputting() && "input not yet implemented");
+
+    auto *Scope = cast<DIScope>(DL.getScope());
+    StringRef File = Scope->getFilename();
+    unsigned Line = DL.getLine();
+    unsigned Col = DL.getCol();
+
+    io.mapRequired("File", File);
+    io.mapRequired("Line", Line);
+    io.mapRequired("Column", Col);
+  }
+
+  static const bool flow = true;
+};
+
+// Implement this as a mapping for now to get proper quotation for the value.
+template <> struct MappingTraits<DiagnosticInfoOptimizationBase::Argument> {
+  static void mapping(IO &io, DiagnosticInfoOptimizationBase::Argument &A) {
+    assert(io.outputting() && "input not yet implemented");
+    io.mapRequired(A.Key.data(), A.Val);
+    if (A.DLoc)
+      io.mapOptional("DebugLoc", A.DLoc);
+  }
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(DiagnosticInfoOptimizationBase::Argument)
+
+void OptimizationRemarkEmitter::computeHotness(
+    DiagnosticInfoOptimizationBase &OptDiag) {
+  Value *V = OptDiag.getCodeRegion();
+  if (V)
+    OptDiag.setHotness(computeHotness(V));
+}
+
+void OptimizationRemarkEmitter::emit(DiagnosticInfoOptimizationBase &OptDiag) {
+  computeHotness(OptDiag);
+
+  yaml::Output *Out = F->getContext().getDiagnosticsOutputFile();
+  if (Out) {
+    auto *P = &const_cast<DiagnosticInfoOptimizationBase &>(OptDiag);
+    *Out << P;
+  }
+  // FIXME: now that IsVerbose is part of DI, filtering for this will be moved
+  // from here to clang.
+  if (!OptDiag.isVerbose() || shouldEmitVerbose())
+    F->getContext().diagnose(OptDiag);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
+                                                       const DebugLoc &DLoc,
+                                                       const Value *V,
+                                                       const Twine &Msg) {
+  LLVMContext &Ctx = F->getContext();
+  Ctx.diagnose(OptimizationRemark(PassName, *F, DLoc, Msg, computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemark(const char *PassName,
+                                                       Loop *L,
+                                                       const Twine &Msg) {
+  emitOptimizationRemark(PassName, L->getStartLoc(), L->getHeader(), Msg);
+}
+
 void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
-    const char *PassName, const DebugLoc &DLoc, Value *V, const Twine &Msg) {
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg, bool IsVerbose) {
   LLVMContext &Ctx = F->getContext();
-  Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, *F, DLoc, Msg,
-                                                      computeHotness(V)));
+  if (!IsVerbose || shouldEmitVerbose())
+    Ctx.diagnose(
+        OptimizationRemarkMissed(PassName, *F, DLoc, Msg, computeHotness(V)));
 }
 
 void OptimizationRemarkEmitter::emitOptimizationRemarkMissed(
+    const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
+  emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg,
+                               IsVerbose);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg, bool IsVerbose) {
+  LLVMContext &Ctx = F->getContext();
+  if (!IsVerbose || shouldEmitVerbose())
+    Ctx.diagnose(
+        OptimizationRemarkAnalysis(PassName, *F, DLoc, Msg, computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysis(
+    const char *PassName, Loop *L, const Twine &Msg, bool IsVerbose) {
+  emitOptimizationRemarkAnalysis(PassName, L->getStartLoc(), L->getHeader(),
+                                 Msg, IsVerbose);
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisFPCommute(
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg) {
+  LLVMContext &Ctx = F->getContext();
+  Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, *F, DLoc, Msg,
+                                                   computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
+    const char *PassName, const DebugLoc &DLoc, const Value *V,
+    const Twine &Msg) {
+  LLVMContext &Ctx = F->getContext();
+  Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, *F, DLoc, Msg,
+                                                  computeHotness(V)));
+}
+
+void OptimizationRemarkEmitter::emitOptimizationRemarkAnalysisAliasing(
     const char *PassName, Loop *L, const Twine &Msg) {
-  emitOptimizationRemarkMissed(PassName, L->getStartLoc(), L->getHeader(), Msg);
+  emitOptimizationRemarkAnalysisAliasing(PassName, L->getStartLoc(),
+                                         L->getHeader(), Msg);
 }
 
 OptimizationRemarkEmitterWrapperPass::OptimizationRemarkEmitterWrapperPass()
@@ -63,10 +237,11 @@ void OptimizationRemarkEmitterWrapperPass::getAnalysisUsage(
   AU.setPreservesAll();
 }
 
-char OptimizationRemarkEmitterAnalysis::PassID;
+AnalysisKey OptimizationRemarkEmitterAnalysis::Key;
 
 OptimizationRemarkEmitter
-OptimizationRemarkEmitterAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+OptimizationRemarkEmitterAnalysis::run(Function &F,
+                                       FunctionAnalysisManager &AM) {
   BlockFrequencyInfo *BFI;
 
   if (F.getContext().getDiagnosticHotnessRequested())
diff --git a/contrib/llvm/lib/Analysis/PHITransAddr.cpp b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
index b4aad74d50dc..84ecd4ab9809 100644
--- a/contrib/llvm/lib/Analysis/PHITransAddr.cpp
+++ b/contrib/llvm/lib/Analysis/PHITransAddr.cpp
@@ -62,8 +62,7 @@ static bool VerifySubExpr(Value *Expr,
 
   // If it's an instruction, it is either in Tmp or its operands recursively
   // are.
-  SmallVectorImpl<Instruction*>::iterator Entry =
-    std::find(InstInputs.begin(), InstInputs.end(), I);
+  SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
   if (Entry != InstInputs.end()) {
     InstInputs.erase(Entry);
     return true;
@@ -126,8 +125,7 @@ static void RemoveInstInputs(Value *V,
   if (!I) return;
 
   // If the instruction is in the InstInputs list, remove it.
-  SmallVectorImpl<Instruction*>::iterator Entry =
-    std::find(InstInputs.begin(), InstInputs.end(), I);
+  SmallVectorImpl<Instruction *>::iterator Entry = find(InstInputs, I);
   if (Entry != InstInputs.end()) {
     InstInputs.erase(Entry);
     return;
@@ -150,8 +148,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
   if (!Inst) return V;
 
   // Determine whether 'Inst' is an input to our PHI translatable expression.
-  bool isInput =
-      std::find(InstInputs.begin(), InstInputs.end(), Inst) != InstInputs.end();
+  bool isInput = is_contained(InstInputs, Inst);
 
   // Handle inputs instructions if needed.
   if (isInput) {
@@ -165,7 +162,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
     // translated, we need to incorporate the value into the expression or fail.
 
     // In either case, the instruction itself isn't an input any longer.
-    InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+    InstInputs.erase(find(InstInputs, Inst));
 
     // If this is a PHI, go ahead and translate it.
     if (PHINode *PN = dyn_cast<PHINode>(Inst))
@@ -272,8 +269,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
           isNSW = isNUW = false;
 
           // If the old 'LHS' was an input, add the new 'LHS' as an input.
-          if (std::find(InstInputs.begin(), InstInputs.end(), BOp) !=
-              InstInputs.end()) {
+          if (is_contained(InstInputs, BOp)) {
             RemoveInstInputs(BOp, InstInputs);
             AddAsInput(LHS);
           }
diff --git a/contrib/llvm/lib/Analysis/PostDominators.cpp b/contrib/llvm/lib/Analysis/PostDominators.cpp
index 73550805d5ba..cb9438a2f928 100644
--- a/contrib/llvm/lib/Analysis/PostDominators.cpp
+++ b/contrib/llvm/lib/Analysis/PostDominators.cpp
@@ -44,7 +44,7 @@ FunctionPass* llvm::createPostDomTree() {
   return new PostDominatorTreeWrapperPass();
 }
 
-char PostDominatorTreeAnalysis::PassID;
+AnalysisKey PostDominatorTreeAnalysis::Key;
 
 PostDominatorTree PostDominatorTreeAnalysis::run(Function &F,
                                                  FunctionAnalysisManager &) {
diff --git a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
index 9cf99af49581..16d3614c14c6 100644
--- a/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/ProfileSummaryInfo.cpp
@@ -12,7 +12,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
@@ -63,10 +65,10 @@ void ProfileSummaryInfo::computeSummary() {
   Summary.reset(ProfileSummary::getFromMD(SummaryMD));
 }
 
-// Returns true if the function is a hot function. If it returns false, it
-// either means it is not hot or it is unknown whether F is hot or not (for
-// example, no profile data is available).
-bool ProfileSummaryInfo::isHotFunction(const Function *F) {
+/// Returns true if the function's entry is hot. If it returns false, it
+/// either means it is not hot or it is unknown whether it is hot or not (for
+/// example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionEntryHot(const Function *F) {
   computeSummary();
   if (!F || !Summary)
     return false;
@@ -74,15 +76,13 @@ bool ProfileSummaryInfo::isHotFunction(const Function *F) {
   // FIXME: The heuristic used below for determining hotness is based on
   // preliminary SPEC tuning for inliner. This will eventually be a
   // convenience method that calls isHotCount.
-  return (FunctionCount &&
-          FunctionCount.getValue() >=
-              (uint64_t)(0.3 * (double)Summary->getMaxFunctionCount()));
+  return FunctionCount && isHotCount(FunctionCount.getValue());
 }
 
-// Returns true if the function is a cold function. If it returns false, it
-// either means it is not cold or it is unknown whether F is cold or not (for
-// example, no profile data is available).
-bool ProfileSummaryInfo::isColdFunction(const Function *F) {
+/// Returns true if the function's entry is a cold. If it returns false, it
+/// either means it is not cold or it is unknown whether it is cold or not (for
+/// example, no profile data is available).
+bool ProfileSummaryInfo::isFunctionEntryCold(const Function *F) {
   computeSummary();
   if (!F)
     return false;
@@ -95,12 +95,10 @@ bool ProfileSummaryInfo::isColdFunction(const Function *F) {
   // FIXME: The heuristic used below for determining coldness is based on
   // preliminary SPEC tuning for inliner. This will eventually be a
   // convenience method that calls isHotCount.
-  return (FunctionCount &&
-          FunctionCount.getValue() <=
-              (uint64_t)(0.01 * (double)Summary->getMaxFunctionCount()));
+  return FunctionCount && isColdCount(FunctionCount.getValue());
 }
 
-// Compute the hot and cold thresholds.
+/// Compute the hot and cold thresholds.
 void ProfileSummaryInfo::computeThresholds() {
   if (!Summary)
     computeSummary();
@@ -125,10 +123,22 @@ bool ProfileSummaryInfo::isColdCount(uint64_t C) {
   return ColdCountThreshold && C <= ColdCountThreshold.getValue();
 }
 
-ProfileSummaryInfo *ProfileSummaryInfoWrapperPass::getPSI(Module &M) {
-  if (!PSI)
-    PSI.reset(new ProfileSummaryInfo(M));
-  return PSI.get();
+bool ProfileSummaryInfo::isHotBB(const BasicBlock *B, BlockFrequencyInfo *BFI) {
+  auto Count = BFI->getBlockProfileCount(B);
+  if (Count && isHotCount(*Count))
+    return true;
+  // Use extractProfTotalWeight to get BB count.
+  // For Sample PGO, BFI may not provide accurate BB count due to errors
+  // magnified during sample count propagation. This serves as a backup plan
+  // to ensure all hot BB will not be missed.
+  // The query currently has false positives as branch instruction cloning does
+  // not update/scale branch weights. Unlike false negatives, this will not cause
+  // performance problem.
+  uint64_t TotalCount;
+  if (B->getTerminator()->extractProfTotalWeight(TotalCount) &&
+      isHotCount(TotalCount))
+    return true;
+  return false;
 }
 
 INITIALIZE_PASS(ProfileSummaryInfoWrapperPass, "profile-summary-info",
@@ -139,25 +149,33 @@ ProfileSummaryInfoWrapperPass::ProfileSummaryInfoWrapperPass()
   initializeProfileSummaryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
-char ProfileSummaryAnalysis::PassID;
+bool ProfileSummaryInfoWrapperPass::doInitialization(Module &M) {
+  PSI.reset(new ProfileSummaryInfo(M));
+  return false;
+}
+
+bool ProfileSummaryInfoWrapperPass::doFinalization(Module &M) {
+  PSI.reset();
+  return false;
+}
+
+AnalysisKey ProfileSummaryAnalysis::Key;
 ProfileSummaryInfo ProfileSummaryAnalysis::run(Module &M,
                                                ModuleAnalysisManager &) {
   return ProfileSummaryInfo(M);
 }
 
-// FIXME: This only tests isHotFunction and isColdFunction and not the
-// isHotCount and isColdCount calls.
 PreservedAnalyses ProfileSummaryPrinterPass::run(Module &M,
-                                                 AnalysisManager<Module> &AM) {
+                                                 ModuleAnalysisManager &AM) {
   ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
 
   OS << "Functions in " << M.getName() << " with hot/cold annotations: \n";
   for (auto &F : M) {
     OS << F.getName();
-    if (PSI.isHotFunction(&F))
-      OS << " :hot ";
-    else if (PSI.isColdFunction(&F))
-      OS << " :cold ";
+    if (PSI.isFunctionEntryHot(&F))
+      OS << " :hot entry ";
+    else if (PSI.isFunctionEntryCold(&F))
+      OS << " :cold entry ";
     OS << "\n";
   }
   return PreservedAnalyses::all();
diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp
index 6860a3e63953..8c084ddd2266 100644
--- a/contrib/llvm/lib/Analysis/RegionInfo.cpp
+++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp
@@ -54,8 +54,7 @@ static cl::opt<Region::PrintStyle, true> printStyleX("print-region-style",
     clEnumValN(Region::PrintBB, "bb",
                "print regions in detail with block_iterator"),
     clEnumValN(Region::PrintRN, "rn",
-               "print regions in detail with element_iterator"),
-    clEnumValEnd));
+               "print regions in detail with element_iterator")));
 
 
 //===----------------------------------------------------------------------===//
@@ -182,9 +181,9 @@ namespace llvm {
 // RegionInfoAnalysis implementation
 //
 
-char RegionInfoAnalysis::PassID;
+AnalysisKey RegionInfoAnalysis::Key;
 
-RegionInfo RegionInfoAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+RegionInfo RegionInfoAnalysis::run(Function &F, FunctionAnalysisManager &AM) {
   RegionInfo RI;
   auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
   auto *PDT = &AM.getResult<PostDominatorTreeAnalysis>(F);
@@ -206,7 +205,7 @@ PreservedAnalyses RegionInfoPrinterPass::run(Function &F,
 }
 
 PreservedAnalyses RegionInfoVerifierPass::run(Function &F,
-                                              AnalysisManager<Function> &AM) {
+                                              FunctionAnalysisManager &AM) {
   AM.getResult<RegionInfoAnalysis>(F).verifyAnalysis();
 
   return PreservedAnalyses::all();
diff --git a/contrib/llvm/lib/Analysis/RegionPass.cpp b/contrib/llvm/lib/Analysis/RegionPass.cpp
index 5e1cdd48a78e..7358aa6810a1 100644
--- a/contrib/llvm/lib/Analysis/RegionPass.cpp
+++ b/contrib/llvm/lib/Analysis/RegionPass.cpp
@@ -10,7 +10,7 @@
 // This file implements RegionPass and RGPassManager. All region optimization
 // and transformation passes are derived from RegionPass. RGPassManager is
 // responsible for managing RegionPasses.
-// most of these codes are COPY from LoopPass.cpp
+// Most of this code has been COPIED from LoopPass.cpp
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/Analysis/RegionPass.h"
@@ -64,9 +64,7 @@ bool RGPassManager::runOnFunction(Function &F) {
     return false;
 
   // Initialization
-  for (std::deque<Region *>::const_iterator I = RQ.begin(), E = RQ.end();
-       I != E; ++I) {
-    Region *R = *I;
+  for (Region *R : RQ) {
     for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
       RegionPass *RP = (RegionPass *)getContainedPass(Index);
       Changed |= RP->doInitialization(R, *this);
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
index e42a4b574d90..5e566bcdaff4 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -61,6 +61,8 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/Sequence.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
@@ -120,6 +122,16 @@ static cl::opt<bool>
                   cl::desc("Verify no dangling value in ScalarEvolution's "
                            "ExprValueMap (slow)"));
 
+static cl::opt<unsigned> MulOpsInlineThreshold(
+    "scev-mulops-inline-threshold", cl::Hidden,
+    cl::desc("Threshold for inlining multiplication operands into a SCEV"),
+    cl::init(1000));
+
+static cl::opt<unsigned>
+    MaxCompareDepth("scalar-evolution-max-compare-depth", cl::Hidden,
+                    cl::desc("Maximum depth of recursive compare complexity"),
+                    cl::init(32));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@@ -447,180 +459,233 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const {
 //                               SCEV Utilities
 //===----------------------------------------------------------------------===//
 
-namespace {
-/// SCEVComplexityCompare - Return true if the complexity of the LHS is less
-/// than the complexity of the RHS.  This comparator is used to canonicalize
-/// expressions.
-class SCEVComplexityCompare {
-  const LoopInfo *const LI;
-public:
-  explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {}
+/// Compare the two values \p LV and \p RV in terms of their "complexity" where
+/// "complexity" is a partial (and somewhat ad-hoc) relation used to order
+/// operands in SCEV expressions.  \p EqCache is a set of pairs of values that
+/// have been previously deemed to be "equally complex" by this routine.  It is
+/// intended to avoid exponential time complexity in cases like:
+///
+///   %a = f(%x, %y)
+///   %b = f(%a, %a)
+///   %c = f(%b, %b)
+///
+///   %d = f(%x, %y)
+///   %e = f(%d, %d)
+///   %f = f(%e, %e)
+///
+///   CompareValueComplexity(%f, %c)
+///
+/// Since we do not continue running this routine on expression trees once we
+/// have seen unequal values, there is no need to track them in the cache.
+static int
+CompareValueComplexity(SmallSet<std::pair<Value *, Value *>, 8> &EqCache,
+                       const LoopInfo *const LI, Value *LV, Value *RV,
+                       unsigned Depth) {
+  if (Depth > MaxCompareDepth || EqCache.count({LV, RV}))
+    return 0;
+
+  // Order pointer values after integer values. This helps SCEVExpander form
+  // GEPs.
+  bool LIsPointer = LV->getType()->isPointerTy(),
+       RIsPointer = RV->getType()->isPointerTy();
+  if (LIsPointer != RIsPointer)
+    return (int)LIsPointer - (int)RIsPointer;
 
-  // Return true or false if LHS is less than, or at least RHS, respectively.
-  bool operator()(const SCEV *LHS, const SCEV *RHS) const {
-    return compare(LHS, RHS) < 0;
+  // Compare getValueID values.
+  unsigned LID = LV->getValueID(), RID = RV->getValueID();
+  if (LID != RID)
+    return (int)LID - (int)RID;
+
+  // Sort arguments by their position.
+  if (const auto *LA = dyn_cast<Argument>(LV)) {
+    const auto *RA = cast<Argument>(RV);
+    unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
+    return (int)LArgNo - (int)RArgNo;
   }
 
-  // Return negative, zero, or positive, if LHS is less than, equal to, or
-  // greater than RHS, respectively. A three-way result allows recursive
-  // comparisons to be more efficient.
-  int compare(const SCEV *LHS, const SCEV *RHS) const {
-    // Fast-path: SCEVs are uniqued so we can do a quick equality check.
-    if (LHS == RHS)
-      return 0;
-
-    // Primarily, sort the SCEVs by their getSCEVType().
-    unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
-    if (LType != RType)
-      return (int)LType - (int)RType;
-
-    // Aside from the getSCEVType() ordering, the particular ordering
-    // isn't very important except that it's beneficial to be consistent,
-    // so that (a + b) and (b + a) don't end up as different expressions.
-    switch (static_cast<SCEVTypes>(LType)) {
-    case scUnknown: {
-      const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
-      const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
-
-      // Sort SCEVUnknown values with some loose heuristics. TODO: This is
-      // not as complete as it could be.
-      const Value *LV = LU->getValue(), *RV = RU->getValue();
-
-      // Order pointer values after integer values. This helps SCEVExpander
-      // form GEPs.
-      bool LIsPointer = LV->getType()->isPointerTy(),
-        RIsPointer = RV->getType()->isPointerTy();
-      if (LIsPointer != RIsPointer)
-        return (int)LIsPointer - (int)RIsPointer;
-
-      // Compare getValueID values.
-      unsigned LID = LV->getValueID(),
-        RID = RV->getValueID();
-      if (LID != RID)
-        return (int)LID - (int)RID;
-
-      // Sort arguments by their position.
-      if (const Argument *LA = dyn_cast<Argument>(LV)) {
-        const Argument *RA = cast<Argument>(RV);
-        unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo();
-        return (int)LArgNo - (int)RArgNo;
-      }
+  if (const auto *LGV = dyn_cast<GlobalValue>(LV)) {
+    const auto *RGV = cast<GlobalValue>(RV);
 
-      // For instructions, compare their loop depth, and their operand
-      // count.  This is pretty loose.
-      if (const Instruction *LInst = dyn_cast<Instruction>(LV)) {
-        const Instruction *RInst = cast<Instruction>(RV);
-
-        // Compare loop depths.
-        const BasicBlock *LParent = LInst->getParent(),
-          *RParent = RInst->getParent();
-        if (LParent != RParent) {
-          unsigned LDepth = LI->getLoopDepth(LParent),
-            RDepth = LI->getLoopDepth(RParent);
-          if (LDepth != RDepth)
-            return (int)LDepth - (int)RDepth;
-        }
+    const auto IsGVNameSemantic = [&](const GlobalValue *GV) {
+      auto LT = GV->getLinkage();
+      return !(GlobalValue::isPrivateLinkage(LT) ||
+               GlobalValue::isInternalLinkage(LT));
+    };
 
-        // Compare the number of operands.
-        unsigned LNumOps = LInst->getNumOperands(),
-          RNumOps = RInst->getNumOperands();
-        return (int)LNumOps - (int)RNumOps;
-      }
+    // Use the names to distinguish the two values, but only if the
+    // names are semantically important.
+    if (IsGVNameSemantic(LGV) && IsGVNameSemantic(RGV))
+      return LGV->getName().compare(RGV->getName());
+  }
+
+  // For instructions, compare their loop depth, and their operand count.  This
+  // is pretty loose.
+  if (const auto *LInst = dyn_cast<Instruction>(LV)) {
+    const auto *RInst = cast<Instruction>(RV);
 
-      return 0;
+    // Compare loop depths.
+    const BasicBlock *LParent = LInst->getParent(),
+                     *RParent = RInst->getParent();
+    if (LParent != RParent) {
+      unsigned LDepth = LI->getLoopDepth(LParent),
+               RDepth = LI->getLoopDepth(RParent);
+      if (LDepth != RDepth)
+        return (int)LDepth - (int)RDepth;
     }
 
-    case scConstant: {
-      const SCEVConstant *LC = cast<SCEVConstant>(LHS);
-      const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+    // Compare the number of operands.
+    unsigned LNumOps = LInst->getNumOperands(),
+             RNumOps = RInst->getNumOperands();
+    if (LNumOps != RNumOps)
+      return (int)LNumOps - (int)RNumOps;
 
-      // Compare constant values.
-      const APInt &LA = LC->getAPInt();
-      const APInt &RA = RC->getAPInt();
-      unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
-      if (LBitWidth != RBitWidth)
-        return (int)LBitWidth - (int)RBitWidth;
-      return LA.ult(RA) ? -1 : 1;
+    for (unsigned Idx : seq(0u, LNumOps)) {
+      int Result =
+          CompareValueComplexity(EqCache, LI, LInst->getOperand(Idx),
+                                 RInst->getOperand(Idx), Depth + 1);
+      if (Result != 0)
+        return Result;
     }
+  }
 
-    case scAddRecExpr: {
-      const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
-      const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+  EqCache.insert({LV, RV});
+  return 0;
+}
 
-      // Compare addrec loop depths.
-      const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
-      if (LLoop != RLoop) {
-        unsigned LDepth = LLoop->getLoopDepth(),
-          RDepth = RLoop->getLoopDepth();
-        if (LDepth != RDepth)
-          return (int)LDepth - (int)RDepth;
-      }
+// Return negative, zero, or positive, if LHS is less than, equal to, or greater
+// than RHS, respectively. A three-way result allows recursive comparisons to be
+// more efficient.
+static int CompareSCEVComplexity(
+    SmallSet<std::pair<const SCEV *, const SCEV *>, 8> &EqCacheSCEV,
+    const LoopInfo *const LI, const SCEV *LHS, const SCEV *RHS,
+    unsigned Depth = 0) {
+  // Fast-path: SCEVs are uniqued so we can do a quick equality check.
+  if (LHS == RHS)
+    return 0;
 
-      // Addrec complexity grows with operand count.
-      unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
-      if (LNumOps != RNumOps)
-        return (int)LNumOps - (int)RNumOps;
+  // Primarily, sort the SCEVs by their getSCEVType().
+  unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType();
+  if (LType != RType)
+    return (int)LType - (int)RType;
 
-      // Lexicographically compare.
-      for (unsigned i = 0; i != LNumOps; ++i) {
-        long X = compare(LA->getOperand(i), RA->getOperand(i));
-        if (X != 0)
-          return X;
-      }
+  if (Depth > MaxCompareDepth || EqCacheSCEV.count({LHS, RHS}))
+    return 0;
+  // Aside from the getSCEVType() ordering, the particular ordering
+  // isn't very important except that it's beneficial to be consistent,
+  // so that (a + b) and (b + a) don't end up as different expressions.
+  switch (static_cast<SCEVTypes>(LType)) {
+  case scUnknown: {
+    const SCEVUnknown *LU = cast<SCEVUnknown>(LHS);
+    const SCEVUnknown *RU = cast<SCEVUnknown>(RHS);
+
+    SmallSet<std::pair<Value *, Value *>, 8> EqCache;
+    int X = CompareValueComplexity(EqCache, LI, LU->getValue(), RU->getValue(),
+                                   Depth + 1);
+    if (X == 0)
+      EqCacheSCEV.insert({LHS, RHS});
+    return X;
+  }
 
-      return 0;
+  case scConstant: {
+    const SCEVConstant *LC = cast<SCEVConstant>(LHS);
+    const SCEVConstant *RC = cast<SCEVConstant>(RHS);
+
+    // Compare constant values.
+    const APInt &LA = LC->getAPInt();
+    const APInt &RA = RC->getAPInt();
+    unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth();
+    if (LBitWidth != RBitWidth)
+      return (int)LBitWidth - (int)RBitWidth;
+    return LA.ult(RA) ? -1 : 1;
+  }
+
+  case scAddRecExpr: {
+    const SCEVAddRecExpr *LA = cast<SCEVAddRecExpr>(LHS);
+    const SCEVAddRecExpr *RA = cast<SCEVAddRecExpr>(RHS);
+
+    // Compare addrec loop depths.
+    const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop();
+    if (LLoop != RLoop) {
+      unsigned LDepth = LLoop->getLoopDepth(), RDepth = RLoop->getLoopDepth();
+      if (LDepth != RDepth)
+        return (int)LDepth - (int)RDepth;
     }
 
-    case scAddExpr:
-    case scMulExpr:
-    case scSMaxExpr:
-    case scUMaxExpr: {
-      const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
-      const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
-
-      // Lexicographically compare n-ary expressions.
-      unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
-      if (LNumOps != RNumOps)
-        return (int)LNumOps - (int)RNumOps;
-
-      for (unsigned i = 0; i != LNumOps; ++i) {
-        if (i >= RNumOps)
-          return 1;
-        long X = compare(LC->getOperand(i), RC->getOperand(i));
-        if (X != 0)
-          return X;
-      }
+    // Addrec complexity grows with operand count.
+    unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands();
+    if (LNumOps != RNumOps)
       return (int)LNumOps - (int)RNumOps;
+
+    // Lexicographically compare.
+    for (unsigned i = 0; i != LNumOps; ++i) {
+      int X = CompareSCEVComplexity(EqCacheSCEV, LI, LA->getOperand(i),
+                                    RA->getOperand(i), Depth + 1);
+      if (X != 0)
+        return X;
     }
+    EqCacheSCEV.insert({LHS, RHS});
+    return 0;
+  }
 
-    case scUDivExpr: {
-      const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
-      const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
+  case scAddExpr:
+  case scMulExpr:
+  case scSMaxExpr:
+  case scUMaxExpr: {
+    const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+    const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+
+    // Lexicographically compare n-ary expressions.
+    unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands();
+    if (LNumOps != RNumOps)
+      return (int)LNumOps - (int)RNumOps;
 
-      // Lexicographically compare udiv expressions.
-      long X = compare(LC->getLHS(), RC->getLHS());
+    for (unsigned i = 0; i != LNumOps; ++i) {
+      if (i >= RNumOps)
+        return 1;
+      int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(i),
+                                    RC->getOperand(i), Depth + 1);
       if (X != 0)
         return X;
-      return compare(LC->getRHS(), RC->getRHS());
     }
+    EqCacheSCEV.insert({LHS, RHS});
+    return 0;
+  }
 
-    case scTruncate:
-    case scZeroExtend:
-    case scSignExtend: {
-      const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
-      const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+  case scUDivExpr: {
+    const SCEVUDivExpr *LC = cast<SCEVUDivExpr>(LHS);
+    const SCEVUDivExpr *RC = cast<SCEVUDivExpr>(RHS);
 
-      // Compare cast expressions by operand.
-      return compare(LC->getOperand(), RC->getOperand());
-    }
+    // Lexicographically compare udiv expressions.
+    int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getLHS(), RC->getLHS(),
+                                  Depth + 1);
+    if (X != 0)
+      return X;
+    X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getRHS(), RC->getRHS(),
+                              Depth + 1);
+    if (X == 0)
+      EqCacheSCEV.insert({LHS, RHS});
+    return X;
+  }
 
-    case scCouldNotCompute:
-      llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
-    }
-    llvm_unreachable("Unknown SCEV kind!");
+  case scTruncate:
+  case scZeroExtend:
+  case scSignExtend: {
+    const SCEVCastExpr *LC = cast<SCEVCastExpr>(LHS);
+    const SCEVCastExpr *RC = cast<SCEVCastExpr>(RHS);
+
+    // Compare cast expressions by operand.
+    int X = CompareSCEVComplexity(EqCacheSCEV, LI, LC->getOperand(),
+                                  RC->getOperand(), Depth + 1);
+    if (X == 0)
+      EqCacheSCEV.insert({LHS, RHS});
+    return X;
   }
-};
-}  // end anonymous namespace
+
+  case scCouldNotCompute:
+    llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+  }
+  llvm_unreachable("Unknown SCEV kind!");
+}
 
 /// Given a list of SCEV objects, order them by their complexity, and group
 /// objects of the same complexity together by value.  When this routine is
@@ -635,17 +700,22 @@ public:
 static void GroupByComplexity(SmallVectorImpl<const SCEV *> &Ops,
                               LoopInfo *LI) {
   if (Ops.size() < 2) return;  // Noop
+
+  SmallSet<std::pair<const SCEV *, const SCEV *>, 8> EqCache;
   if (Ops.size() == 2) {
     // This is the common case, which also happens to be trivially simple.
     // Special case it.
     const SCEV *&LHS = Ops[0], *&RHS = Ops[1];
-    if (SCEVComplexityCompare(LI)(RHS, LHS))
+    if (CompareSCEVComplexity(EqCache, LI, RHS, LHS) < 0)
       std::swap(LHS, RHS);
     return;
   }
 
   // Do the rough sort by complexity.
-  std::stable_sort(Ops.begin(), Ops.end(), SCEVComplexityCompare(LI));
+  std::stable_sort(Ops.begin(), Ops.end(),
+                   [&EqCache, LI](const SCEV *LHS, const SCEV *RHS) {
+                     return CompareSCEVComplexity(EqCache, LI, LHS, RHS) < 0;
+                   });
 
   // Now that we are sorted by complexity, group elements of the same
   // complexity.  Note that this is, at worst, N^2, but the vector is likely to
@@ -2518,6 +2588,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
   if (Idx < Ops.size()) {
     bool DeletedMul = false;
     while (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Ops[Idx])) {
+      if (Ops.size() > MulOpsInlineThreshold)
+        break;
       // If we have an mul, expand the mul operands onto the end of the operands
       // list.
       Ops.erase(Ops.begin()+Idx);
@@ -2970,9 +3042,9 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
 }
 
 const SCEV *
-ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
-                            const SmallVectorImpl<const SCEV *> &IndexExprs,
-                            bool InBounds) {
+ScalarEvolution::getGEPExpr(GEPOperator *GEP,
+                            const SmallVectorImpl<const SCEV *> &IndexExprs) {
+  const SCEV *BaseExpr = getSCEV(GEP->getPointerOperand());
   // getSCEV(Base)->getType() has the same address space as Base->getType()
   // because SCEV::getType() preserves the address space.
   Type *IntPtrTy = getEffectiveSCEVType(BaseExpr->getType());
@@ -2981,12 +3053,13 @@ ScalarEvolution::getGEPExpr(Type *PointeeType, const SCEV *BaseExpr,
   // flow and the no-overflow bits may not be valid for the expression in any
   // context. This can be fixed similarly to how these flags are handled for
   // adds.
-  SCEV::NoWrapFlags Wrap = InBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap;
+  SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW
+                                             : SCEV::FlagAnyWrap;
 
   const SCEV *TotalOffset = getZero(IntPtrTy);
-  // The address space is unimportant. The first thing we do on CurTy is getting
+  // The array size is unimportant. The first thing we do on CurTy is getting
   // its element type.
-  Type *CurTy = PointerType::getUnqual(PointeeType);
+  Type *CurTy = ArrayType::get(GEP->getSourceElementType(), 0);
   for (const SCEV *IndexExpr : IndexExprs) {
     // Compute the (potentially symbolic) offset in bytes for this index.
     if (StructType *STy = dyn_cast<StructType>(CurTy)) {
@@ -3311,75 +3384,47 @@ const SCEV *ScalarEvolution::getCouldNotCompute() {
   return CouldNotCompute.get();
 }
 
-
 bool ScalarEvolution::checkValidity(const SCEV *S) const {
-  // Helper class working with SCEVTraversal to figure out if a SCEV contains
-  // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne
-  // is set iff if find such SCEVUnknown.
-  //
-  struct FindInvalidSCEVUnknown {
-    bool FindOne;
-    FindInvalidSCEVUnknown() { FindOne = false; }
-    bool follow(const SCEV *S) {
-      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
-      case scConstant:
-        return false;
-      case scUnknown:
-        if (!cast<SCEVUnknown>(S)->getValue())
-          FindOne = true;
-        return false;
-      default:
-        return true;
-      }
-    }
-    bool isDone() const { return FindOne; }
-  };
-
-  FindInvalidSCEVUnknown F;
-  SCEVTraversal<FindInvalidSCEVUnknown> ST(F);
-  ST.visitAll(S);
+  bool ContainsNulls = SCEVExprContains(S, [](const SCEV *S) {
+    auto *SU = dyn_cast<SCEVUnknown>(S);
+    return SU && SU->getValue() == nullptr;
+  });
 
-  return !F.FindOne;
-}
-
-namespace {
-// Helper class working with SCEVTraversal to figure out if a SCEV contains
-// a sub SCEV of scAddRecExpr type.  FindInvalidSCEVUnknown::FoundOne is set
-// iff if such sub scAddRecExpr type SCEV is found.
-struct FindAddRecurrence {
-  bool FoundOne;
-  FindAddRecurrence() : FoundOne(false) {}
-
-  bool follow(const SCEV *S) {
-    switch (static_cast<SCEVTypes>(S->getSCEVType())) {
-    case scAddRecExpr:
-      FoundOne = true;
-    case scConstant:
-    case scUnknown:
-    case scCouldNotCompute:
-      return false;
-    default:
-      return true;
-    }
-  }
-  bool isDone() const { return FoundOne; }
-};
+  return !ContainsNulls;
 }
 
 bool ScalarEvolution::containsAddRecurrence(const SCEV *S) {
-  HasRecMapType::iterator I = HasRecMap.find_as(S);
+  HasRecMapType::iterator I = HasRecMap.find(S);
   if (I != HasRecMap.end())
     return I->second;
 
-  FindAddRecurrence F;
-  SCEVTraversal<FindAddRecurrence> ST(F);
-  ST.visitAll(S);
-  HasRecMap.insert({S, F.FoundOne});
-  return F.FoundOne;
+  bool FoundAddRec = SCEVExprContains(S, isa<SCEVAddRecExpr, const SCEV *>);
+  HasRecMap.insert({S, FoundAddRec});
+  return FoundAddRec;
+}
+
+/// Try to split a SCEVAddExpr into a pair of {SCEV, ConstantInt}.
+/// If \p S is a SCEVAddExpr and is composed of a sub SCEV S' and an
+/// offset I, then return {S', I}, else return {\p S, nullptr}.
+static std::pair<const SCEV *, ConstantInt *> splitAddExpr(const SCEV *S) {
+  const auto *Add = dyn_cast<SCEVAddExpr>(S);
+  if (!Add)
+    return {S, nullptr};
+
+  if (Add->getNumOperands() != 2)
+    return {S, nullptr};
+
+  auto *ConstOp = dyn_cast<SCEVConstant>(Add->getOperand(0));
+  if (!ConstOp)
+    return {S, nullptr};
+
+  return {Add->getOperand(1), ConstOp->getValue()};
 }
 
-/// Return the Value set from S.
-SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
+/// Return the ValueOffsetPair set for \p S. \p S can be represented
+/// by the value and offset from any ValueOffsetPair in the set.
+SetVector<ScalarEvolution::ValueOffsetPair> *
+ScalarEvolution::getSCEVValues(const SCEV *S) {
   ExprValueMapType::iterator SI = ExprValueMap.find_as(S);
   if (SI == ExprValueMap.end())
     return nullptr;
@@ -3387,24 +3432,31 @@ SetVector<Value *> *ScalarEvolution::getSCEVValues(const SCEV *S) {
   if (VerifySCEVMap) {
     // Check there is no dangling Value in the set returned.
     for (const auto &VE : SI->second)
-      assert(ValueExprMap.count(VE));
+      assert(ValueExprMap.count(VE.first));
   }
 #endif
   return &SI->second;
 }
 
-/// Erase Value from ValueExprMap and ExprValueMap.  If ValueExprMap.erase(V) is
-/// not used together with forgetMemoizedResults(S), eraseValueFromMap should be
-/// used instead to ensure whenever V->S is removed from ValueExprMap, V is also
-/// removed from the set of ExprValueMap[S].
+/// Erase Value from ValueExprMap and ExprValueMap. ValueExprMap.erase(V)
+/// cannot be used separately. eraseValueFromMap should be used to remove
+/// V from ValueExprMap and ExprValueMap at the same time.
 void ScalarEvolution::eraseValueFromMap(Value *V) {
   ValueExprMapType::iterator I = ValueExprMap.find_as(V);
   if (I != ValueExprMap.end()) {
     const SCEV *S = I->second;
-    SetVector<Value *> *SV = getSCEVValues(S);
-    // Remove V from the set of ExprValueMap[S]
-    if (SV)
-      SV->remove(V);
+    // Remove {V, 0} from the set of ExprValueMap[S]
+    if (SetVector<ValueOffsetPair> *SV = getSCEVValues(S))
+      SV->remove({V, nullptr});
+
+    // Remove {V, Offset} from the set of ExprValueMap[Stripped]
+    const SCEV *Stripped;
+    ConstantInt *Offset;
+    std::tie(Stripped, Offset) = splitAddExpr(S);
+    if (Offset != nullptr) {
+      if (SetVector<ValueOffsetPair> *SV = getSCEVValues(Stripped))
+        SV->remove({V, Offset});
+    }
     ValueExprMap.erase(V);
   }
 }
@@ -3419,11 +3471,26 @@ const SCEV *ScalarEvolution::getSCEV(Value *V) {
     S = createSCEV(V);
     // During PHI resolution, it is possible to create two SCEVs for the same
     // V, so it is needed to double check whether V->S is inserted into
-    // ValueExprMap before insert S->V into ExprValueMap.
+    // ValueExprMap before insert S->{V, 0} into ExprValueMap.
     std::pair<ValueExprMapType::iterator, bool> Pair =
         ValueExprMap.insert({SCEVCallbackVH(V, this), S});
-    if (Pair.second)
-      ExprValueMap[S].insert(V);
+    if (Pair.second) {
+      ExprValueMap[S].insert({V, nullptr});
+
+      // If S == Stripped + Offset, add Stripped -> {V, Offset} into
+      // ExprValueMap.
+      const SCEV *Stripped = S;
+      ConstantInt *Offset = nullptr;
+      std::tie(Stripped, Offset) = splitAddExpr(S);
+      // If stripped is SCEVUnknown, don't bother to save
+      // Stripped -> {V, offset}. It doesn't simplify and sometimes even
+      // increase the complexity of the expansion code.
+      // If V is GetElementPtrInst, don't save Stripped -> {V, offset}
+      // because it may generate add/sub instead of GEP in SCEV expansion.
+      if (Offset != nullptr && !isa<SCEVUnknown>(Stripped) &&
+          !isa<GetElementPtrInst>(V))
+        ExprValueMap[Stripped].insert({V, Offset});
+    }
   }
   return S;
 }
@@ -3436,8 +3503,8 @@ const SCEV *ScalarEvolution::getExistingSCEV(Value *V) {
     const SCEV *S = I->second;
     if (checkValidity(S))
       return S;
+    eraseValueFromMap(V);
     forgetMemoizedResults(S);
-    ValueExprMap.erase(I);
   }
   return nullptr;
 }
@@ -3675,8 +3742,8 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
       if (!isa<PHINode>(I) ||
           !isa<SCEVUnknown>(Old) ||
           (I != PN && Old == SymName)) {
+        eraseValueFromMap(It->first);
         forgetMemoizedResults(Old);
-        ValueExprMap.erase(It);
       }
     }
 
@@ -4055,7 +4122,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
     // to create an AddRecExpr for this PHI node. We can not keep this temporary
     // as it will prevent later (possibly simpler) SCEV expressions to be added
     // to the ValueExprMap.
-    ValueExprMap.erase(PN);
+    eraseValueFromMap(PN);
   }
 
   return nullptr;
@@ -4168,7 +4235,9 @@ static bool BrPHIToSelect(DominatorTree &DT, BranchInst *BI, PHINode *Merge,
 }
 
 const SCEV *ScalarEvolution::createNodeFromSelectLikePHI(PHINode *PN) {
-  if (PN->getNumIncomingValues() == 2) {
+  auto IsReachable =
+      [&](BasicBlock *BB) { return DT.isReachableFromEntry(BB); };
+  if (PN->getNumIncomingValues() == 2 && all_of(PN->blocks(), IsReachable)) {
     const Loop *L = LI.getLoopFor(PN->getParent());
 
     // We don't want to break LCSSA, even in a SCEV expression tree.
@@ -4244,7 +4313,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
   case ICmpInst::ICMP_SLT:
   case ICmpInst::ICMP_SLE:
     std::swap(LHS, RHS);
-  // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SGT:
   case ICmpInst::ICMP_SGE:
     // a >s b ? a+x : b+x  ->  smax(a, b)+x
@@ -4267,7 +4336,7 @@ const SCEV *ScalarEvolution::createNodeForSelectOrPHI(Instruction *I,
   case ICmpInst::ICMP_ULT:
   case ICmpInst::ICMP_ULE:
     std::swap(LHS, RHS);
-  // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_UGT:
   case ICmpInst::ICMP_UGE:
     // a >u b ? a+x : b+x  ->  umax(a, b)+x
@@ -4332,9 +4401,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
   SmallVector<const SCEV *, 4> IndexExprs;
   for (auto Index = GEP->idx_begin(); Index != GEP->idx_end(); ++Index)
     IndexExprs.push_back(getSCEV(*Index));
-  return getGEPExpr(GEP->getSourceElementType(),
-                    getSCEV(GEP->getPointerOperand()),
-                    IndexExprs, GEP->isInBounds());
+  return getGEPExpr(GEP, IndexExprs);
 }
 
 uint32_t
@@ -4612,19 +4679,18 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
 
   MaxBECount = getNoopOrZeroExtend(MaxBECount, Start->getType());
   ConstantRange MaxBECountRange = getUnsignedRange(MaxBECount);
-  ConstantRange ZExtMaxBECountRange =
-      MaxBECountRange.zextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange ZExtMaxBECountRange = MaxBECountRange.zextOrTrunc(BitWidth * 2);
 
   ConstantRange StepSRange = getSignedRange(Step);
-  ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange SExtStepSRange = StepSRange.sextOrTrunc(BitWidth * 2);
 
   ConstantRange StartURange = getUnsignedRange(Start);
   ConstantRange EndURange =
       StartURange.add(MaxBECountRange.multiply(StepSRange));
 
   // Check for unsigned overflow.
-  ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2 + 1);
-  ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange ZExtStartURange = StartURange.zextOrTrunc(BitWidth * 2);
+  ConstantRange ZExtEndURange = EndURange.zextOrTrunc(BitWidth * 2);
   if (ZExtStartURange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
       ZExtEndURange) {
     APInt Min = APIntOps::umin(StartURange.getUnsignedMin(),
@@ -4644,8 +4710,8 @@ ConstantRange ScalarEvolution::getRangeForAffineAR(const SCEV *Start,
   // Check for signed overflow. This must be done with ConstantRange
   // arithmetic because we could be called from within the ScalarEvolution
   // overflow checking code.
-  ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2 + 1);
-  ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2 + 1);
+  ConstantRange SExtStartSRange = StartSRange.sextOrTrunc(BitWidth * 2);
+  ConstantRange SExtEndSRange = EndSRange.sextOrTrunc(BitWidth * 2);
   if (SExtStartSRange.add(ZExtMaxBECountRange.multiply(SExtStepSRange)) ==
       SExtEndSRange) {
     APInt Min =
@@ -4909,17 +4975,33 @@ bool ScalarEvolution::isAddRecNeverPoison(const Instruction *I, const Loop *L) {
   return LatchControlDependentOnPoison && loopHasNoAbnormalExits(L);
 }
 
-bool ScalarEvolution::loopHasNoAbnormalExits(const Loop *L) {
-  auto Itr = LoopHasNoAbnormalExits.find(L);
-  if (Itr == LoopHasNoAbnormalExits.end()) {
-    auto NoAbnormalExitInBB = [&](BasicBlock *BB) {
-      return all_of(*BB, [](Instruction &I) {
-        return isGuaranteedToTransferExecutionToSuccessor(&I);
-      });
+ScalarEvolution::LoopProperties
+ScalarEvolution::getLoopProperties(const Loop *L) {
+  typedef ScalarEvolution::LoopProperties LoopProperties;
+
+  auto Itr = LoopPropertiesCache.find(L);
+  if (Itr == LoopPropertiesCache.end()) {
+    auto HasSideEffects = [](Instruction *I) {
+      if (auto *SI = dyn_cast<StoreInst>(I))
+        return !SI->isSimple();
+
+      return I->mayHaveSideEffects();
     };
 
-    auto InsertPair = LoopHasNoAbnormalExits.insert(
-        {L, all_of(L->getBlocks(), NoAbnormalExitInBB)});
+    LoopProperties LP = {/* HasNoAbnormalExits */ true,
+                         /*HasNoSideEffects*/ true};
+
+    for (auto *BB : L->getBlocks())
+      for (auto &I : *BB) {
+        if (!isGuaranteedToTransferExecutionToSuccessor(&I))
+          LP.HasNoAbnormalExits = false;
+        if (HasSideEffects(&I))
+          LP.HasNoSideEffects = false;
+        if (!LP.HasNoAbnormalExits && !LP.HasNoSideEffects)
+          break; // We're already as pessimistic as we can get.
+      }
+
+    auto InsertPair = LoopPropertiesCache.insert({L, LP});
     assert(InsertPair.second && "We just checked!");
     Itr = InsertPair.first;
   }
@@ -5247,6 +5329,20 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
 //                   Iteration Count Computation Code
 //
 
+static unsigned getConstantTripCount(const SCEVConstant *ExitCount) {
+  if (!ExitCount)
+    return 0;
+
+  ConstantInt *ExitConst = ExitCount->getValue();
+
+  // Guard against huge trip counts.
+  if (ExitConst->getValue().getActiveBits() > 32)
+    return 0;
+
+  // In case of integer overflow, this returns 0, which is correct.
+  return ((unsigned)ExitConst->getZExtValue()) + 1;
+}
+
 unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L) {
   if (BasicBlock *ExitingBB = L->getExitingBlock())
     return getSmallConstantTripCount(L, ExitingBB);
@@ -5262,17 +5358,13 @@ unsigned ScalarEvolution::getSmallConstantTripCount(Loop *L,
          "Exiting block must actually branch out of the loop!");
   const SCEVConstant *ExitCount =
       dyn_cast<SCEVConstant>(getExitCount(L, ExitingBlock));
-  if (!ExitCount)
-    return 0;
-
-  ConstantInt *ExitConst = ExitCount->getValue();
-
-  // Guard against huge trip counts.
-  if (ExitConst->getValue().getActiveBits() > 32)
-    return 0;
+  return getConstantTripCount(ExitCount);
+}
 
-  // In case of integer overflow, this returns 0, which is correct.
-  return ((unsigned)ExitConst->getZExtValue()) + 1;
+unsigned ScalarEvolution::getSmallConstantMaxTripCount(Loop *L) {
+  const auto *MaxExitCount =
+      dyn_cast<SCEVConstant>(getMaxBackedgeTakenCount(L));
+  return getConstantTripCount(MaxExitCount);
 }
 
 unsigned ScalarEvolution::getSmallConstantTripMultiple(Loop *L) {
@@ -5351,6 +5443,10 @@ const SCEV *ScalarEvolution::getMaxBackedgeTakenCount(const Loop *L) {
   return getBackedgeTakenInfo(L).getMax(this);
 }
 
+bool ScalarEvolution::isBackedgeTakenCountMaxOrZero(const Loop *L) {
+  return getBackedgeTakenInfo(L).isMaxOrZero(this);
+}
+
 /// Push PHI nodes in the header of the given loop onto the given Worklist.
 static void
 PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
@@ -5376,7 +5472,7 @@ ScalarEvolution::getPredicatedBackedgeTakenInfo(const Loop *L) {
   BackedgeTakenInfo Result =
       computeBackedgeTakenCount(L, /*AllowPredicates=*/true);
 
-  return PredicatedBackedgeTakenCounts.find(L)->second = Result;
+  return PredicatedBackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
 const ScalarEvolution::BackedgeTakenInfo &
@@ -5435,8 +5531,8 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
         // case, createNodeForPHI will perform the necessary updates on its
         // own when it gets to that point.
         if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old)) {
+          eraseValueFromMap(It->first);
           forgetMemoizedResults(Old);
-          ValueExprMap.erase(It);
         }
         if (PHINode *PN = dyn_cast<PHINode>(I))
           ConstantEvolutionLoopExitValue.erase(PN);
@@ -5451,7 +5547,7 @@ ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
   // recusive call to getBackedgeTakenInfo (on a different
   // loop), which would invalidate the iterator computed
   // earlier.
-  return BackedgeTakenCounts.find(L)->second = Result;
+  return BackedgeTakenCounts.find(L)->second = std::move(Result);
 }
 
 void ScalarEvolution::forgetLoop(const Loop *L) {
@@ -5481,8 +5577,8 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
     ValueExprMapType::iterator It =
       ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
+      eraseValueFromMap(It->first);
       forgetMemoizedResults(It->second);
-      ValueExprMap.erase(It);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
     }
@@ -5495,7 +5591,7 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
   for (Loop *I : *L)
     forgetLoop(I);
 
-  LoopHasNoAbnormalExits.erase(L);
+  LoopPropertiesCache.erase(L);
 }
 
 void ScalarEvolution::forgetValue(Value *V) {
@@ -5515,8 +5611,8 @@ void ScalarEvolution::forgetValue(Value *V) {
     ValueExprMapType::iterator It =
       ValueExprMap.find_as(static_cast<Value *>(I));
     if (It != ValueExprMap.end()) {
+      eraseValueFromMap(It->first);
       forgetMemoizedResults(It->second);
-      ValueExprMap.erase(It);
       if (PHINode *PN = dyn_cast<PHINode>(I))
         ConstantEvolutionLoopExitValue.erase(PN);
     }
@@ -5534,14 +5630,11 @@ void ScalarEvolution::forgetValue(Value *V) {
 /// caller's responsibility to specify the relevant loop exit using
 /// getExact(ExitingBlock, SE).
 const SCEV *
-ScalarEvolution::BackedgeTakenInfo::getExact(
-    ScalarEvolution *SE, SCEVUnionPredicate *Preds) const {
+ScalarEvolution::BackedgeTakenInfo::getExact(ScalarEvolution *SE,
+                                             SCEVUnionPredicate *Preds) const {
   // If any exits were not computable, the loop is not computable.
-  if (!ExitNotTaken.isCompleteList()) return SE->getCouldNotCompute();
-
-  // We need exactly one computable exit.
-  if (!ExitNotTaken.ExitingBlock) return SE->getCouldNotCompute();
-  assert(ExitNotTaken.ExactNotTaken && "uninitialized not-taken info");
+  if (!isComplete() || ExitNotTaken.empty())
+    return SE->getCouldNotCompute();
 
   const SCEV *BECount = nullptr;
   for (auto &ENT : ExitNotTaken) {
@@ -5551,10 +5644,10 @@ ScalarEvolution::BackedgeTakenInfo::getExact(
       BECount = ENT.ExactNotTaken;
     else if (BECount != ENT.ExactNotTaken)
       return SE->getCouldNotCompute();
-    if (Preds && ENT.getPred())
-      Preds->add(ENT.getPred());
+    if (Preds && !ENT.hasAlwaysTruePredicate())
+      Preds->add(ENT.Predicate.get());
 
-    assert((Preds || ENT.hasAlwaysTruePred()) &&
+    assert((Preds || ENT.hasAlwaysTruePredicate()) &&
            "Predicate should be always true!");
   }
 
@@ -5567,7 +5660,7 @@ const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
                                              ScalarEvolution *SE) const {
   for (auto &ENT : ExitNotTaken)
-    if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePred())
+    if (ENT.ExitingBlock == ExitingBlock && ENT.hasAlwaysTruePredicate())
       return ENT.ExactNotTaken;
 
   return SE->getCouldNotCompute();
@@ -5576,21 +5669,29 @@ ScalarEvolution::BackedgeTakenInfo::getExact(BasicBlock *ExitingBlock,
 /// getMax - Get the max backedge taken count for the loop.
 const SCEV *
 ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const {
-  for (auto &ENT : ExitNotTaken)
-    if (!ENT.hasAlwaysTruePred())
-      return SE->getCouldNotCompute();
+  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+    return !ENT.hasAlwaysTruePredicate();
+  };
 
-  return Max ? Max : SE->getCouldNotCompute();
+  if (any_of(ExitNotTaken, PredicateNotAlwaysTrue) || !getMax())
+    return SE->getCouldNotCompute();
+
+  return getMax();
+}
+
+bool ScalarEvolution::BackedgeTakenInfo::isMaxOrZero(ScalarEvolution *SE) const {
+  auto PredicateNotAlwaysTrue = [](const ExitNotTakenInfo &ENT) {
+    return !ENT.hasAlwaysTruePredicate();
+  };
+  return MaxOrZero && !any_of(ExitNotTaken, PredicateNotAlwaysTrue);
 }
 
 bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
                                                     ScalarEvolution *SE) const {
-  if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S))
+  if (getMax() && getMax() != SE->getCouldNotCompute() &&
+      SE->hasOperand(getMax(), S))
     return true;
 
-  if (!ExitNotTaken.ExitingBlock)
-    return false;
-
   for (auto &ENT : ExitNotTaken)
     if (ENT.ExactNotTaken != SE->getCouldNotCompute() &&
         SE->hasOperand(ENT.ExactNotTaken, S))
@@ -5602,62 +5703,31 @@ bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S,
 /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each
 /// computable exit into a persistent ExitNotTakenInfo array.
 ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo(
-    SmallVectorImpl<EdgeInfo> &ExitCounts, bool Complete, const SCEV *MaxCount)
-    : Max(MaxCount) {
-
-  if (!Complete)
-    ExitNotTaken.setIncomplete();
-
-  unsigned NumExits = ExitCounts.size();
-  if (NumExits == 0) return;
-
-  ExitNotTaken.ExitingBlock = ExitCounts[0].ExitBlock;
-  ExitNotTaken.ExactNotTaken = ExitCounts[0].Taken;
-
-  // Determine the number of ExitNotTakenExtras structures that we need.
-  unsigned ExtraInfoSize = 0;
-  if (NumExits > 1)
-    ExtraInfoSize = 1 + std::count_if(std::next(ExitCounts.begin()),
-                                      ExitCounts.end(), [](EdgeInfo &Entry) {
-                                        return !Entry.Pred.isAlwaysTrue();
-                                      });
-  else if (!ExitCounts[0].Pred.isAlwaysTrue())
-    ExtraInfoSize = 1;
-
-  ExitNotTakenExtras *ENT = nullptr;
-
-  // Allocate the ExitNotTakenExtras structures and initialize the first
-  // element (ExitNotTaken).
-  if (ExtraInfoSize > 0) {
-    ENT = new ExitNotTakenExtras[ExtraInfoSize];
-    ExitNotTaken.ExtraInfo = &ENT[0];
-    *ExitNotTaken.getPred() = std::move(ExitCounts[0].Pred);
-  }
-
-  if (NumExits == 1)
-    return;
-
-  assert(ENT && "ExitNotTakenExtras is NULL while having more than one exit");
-
-  auto &Exits = ExitNotTaken.ExtraInfo->Exits;
-
-  // Handle the rare case of multiple computable exits.
-  for (unsigned i = 1, PredPos = 1; i < NumExits; ++i) {
-    ExitNotTakenExtras *Ptr = nullptr;
-    if (!ExitCounts[i].Pred.isAlwaysTrue()) {
-      Ptr = &ENT[PredPos++];
-      Ptr->Pred = std::move(ExitCounts[i].Pred);
-    }
-
-    Exits.emplace_back(ExitCounts[i].ExitBlock, ExitCounts[i].Taken, Ptr);
-  }
+    SmallVectorImpl<ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo>
+        &&ExitCounts,
+    bool Complete, const SCEV *MaxCount, bool MaxOrZero)
+    : MaxAndComplete(MaxCount, Complete), MaxOrZero(MaxOrZero) {
+  typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+  ExitNotTaken.reserve(ExitCounts.size());
+  std::transform(
+      ExitCounts.begin(), ExitCounts.end(), std::back_inserter(ExitNotTaken),
+      [&](const EdgeExitInfo &EEI) {
+        BasicBlock *ExitBB = EEI.first;
+        const ExitLimit &EL = EEI.second;
+        if (EL.Predicates.empty())
+          return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, nullptr);
+
+        std::unique_ptr<SCEVUnionPredicate> Predicate(new SCEVUnionPredicate);
+        for (auto *Pred : EL.Predicates)
+          Predicate->add(Pred);
+
+        return ExitNotTakenInfo(ExitBB, EL.ExactNotTaken, std::move(Predicate));
+      });
 }
 
 /// Invalidate this result and free the ExitNotTakenInfo array.
 void ScalarEvolution::BackedgeTakenInfo::clear() {
-  ExitNotTaken.ExitingBlock = nullptr;
-  ExitNotTaken.ExactNotTaken = nullptr;
-  delete[] ExitNotTaken.ExtraInfo;
+  ExitNotTaken.clear();
 }
 
 /// Compute the number of times the backedge of the specified loop will execute.
@@ -5667,11 +5737,14 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
   SmallVector<BasicBlock *, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
 
-  SmallVector<EdgeInfo, 4> ExitCounts;
+  typedef ScalarEvolution::BackedgeTakenInfo::EdgeExitInfo EdgeExitInfo;
+
+  SmallVector<EdgeExitInfo, 4> ExitCounts;
   bool CouldComputeBECount = true;
   BasicBlock *Latch = L->getLoopLatch(); // may be NULL.
   const SCEV *MustExitMaxBECount = nullptr;
   const SCEV *MayExitMaxBECount = nullptr;
+  bool MustExitMaxOrZero = false;
 
   // Compute the ExitLimit for each loop exit. Use this to populate ExitCounts
   // and compute maxBECount.
@@ -5680,17 +5753,17 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
     BasicBlock *ExitBB = ExitingBlocks[i];
     ExitLimit EL = computeExitLimit(L, ExitBB, AllowPredicates);
 
-    assert((AllowPredicates || EL.Pred.isAlwaysTrue()) &&
+    assert((AllowPredicates || EL.Predicates.empty()) &&
            "Predicated exit limit when predicates are not allowed!");
 
     // 1. For each exit that can be computed, add an entry to ExitCounts.
     // CouldComputeBECount is true only if all exits can be computed.
-    if (EL.Exact == getCouldNotCompute())
+    if (EL.ExactNotTaken == getCouldNotCompute())
       // We couldn't compute an exact value for this exit, so
       // we won't be able to compute an exact value for the loop.
       CouldComputeBECount = false;
     else
-      ExitCounts.emplace_back(EdgeInfo(ExitBB, EL.Exact, EL.Pred));
+      ExitCounts.emplace_back(ExitBB, EL);
 
     // 2. Derive the loop's MaxBECount from each exit's max number of
     // non-exiting iterations. Partition the loop exits into two kinds:
@@ -5698,29 +5771,35 @@ ScalarEvolution::computeBackedgeTakenCount(const Loop *L,
     //
     // If the exit dominates the loop latch, it is a LoopMustExit otherwise it
     // is a LoopMayExit.  If any computable LoopMustExit is found, then
-    // MaxBECount is the minimum EL.Max of computable LoopMustExits. Otherwise,
-    // MaxBECount is conservatively the maximum EL.Max, where CouldNotCompute is
-    // considered greater than any computable EL.Max.
-    if (EL.Max != getCouldNotCompute() && Latch &&
+    // MaxBECount is the minimum EL.MaxNotTaken of computable
+    // LoopMustExits. Otherwise, MaxBECount is conservatively the maximum
+    // EL.MaxNotTaken, where CouldNotCompute is considered greater than any
+    // computable EL.MaxNotTaken.
+    if (EL.MaxNotTaken != getCouldNotCompute() && Latch &&
         DT.dominates(ExitBB, Latch)) {
-      if (!MustExitMaxBECount)
-        MustExitMaxBECount = EL.Max;
-      else {
+      if (!MustExitMaxBECount) {
+        MustExitMaxBECount = EL.MaxNotTaken;
+        MustExitMaxOrZero = EL.MaxOrZero;
+      } else {
         MustExitMaxBECount =
-          getUMinFromMismatchedTypes(MustExitMaxBECount, EL.Max);
+            getUMinFromMismatchedTypes(MustExitMaxBECount, EL.MaxNotTaken);
       }
     } else if (MayExitMaxBECount != getCouldNotCompute()) {
-      if (!MayExitMaxBECount || EL.Max == getCouldNotCompute())
-        MayExitMaxBECount = EL.Max;
+      if (!MayExitMaxBECount || EL.MaxNotTaken == getCouldNotCompute())
+        MayExitMaxBECount = EL.MaxNotTaken;
       else {
         MayExitMaxBECount =
-          getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.Max);
+            getUMaxFromMismatchedTypes(MayExitMaxBECount, EL.MaxNotTaken);
       }
     }
   }
   const SCEV *MaxBECount = MustExitMaxBECount ? MustExitMaxBECount :
     (MayExitMaxBECount ? MayExitMaxBECount : getCouldNotCompute());
-  return BackedgeTakenInfo(ExitCounts, CouldComputeBECount, MaxBECount);
+  // The loop backedge will be taken the maximum or zero times if there's
+  // a single exit that must be taken the maximum or zero times.
+  bool MaxOrZero = (MustExitMaxOrZero && ExitingBlocks.size() == 1);
+  return BackedgeTakenInfo(std::move(ExitCounts), CouldComputeBECount,
+                           MaxBECount, MaxOrZero);
 }
 
 ScalarEvolution::ExitLimit
@@ -5825,39 +5904,40 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
       if (EitherMayExit) {
         // Both conditions must be true for the loop to continue executing.
         // Choose the less conservative count.
-        if (EL0.Exact == getCouldNotCompute() ||
-            EL1.Exact == getCouldNotCompute())
+        if (EL0.ExactNotTaken == getCouldNotCompute() ||
+            EL1.ExactNotTaken == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
-          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
-        if (EL0.Max == getCouldNotCompute())
-          MaxBECount = EL1.Max;
-        else if (EL1.Max == getCouldNotCompute())
-          MaxBECount = EL0.Max;
+          BECount =
+              getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+        if (EL0.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL1.MaxNotTaken;
+        else if (EL1.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL0.MaxNotTaken;
         else
-          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+          MaxBECount =
+              getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
       } else {
         // Both conditions must be true at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(FBB) && "Loop block has no successor in loop!");
-        if (EL0.Max == EL1.Max)
-          MaxBECount = EL0.Max;
-        if (EL0.Exact == EL1.Exact)
-          BECount = EL0.Exact;
+        if (EL0.MaxNotTaken == EL1.MaxNotTaken)
+          MaxBECount = EL0.MaxNotTaken;
+        if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+          BECount = EL0.ExactNotTaken;
       }
 
-      SCEVUnionPredicate NP;
-      NP.add(&EL0.Pred);
-      NP.add(&EL1.Pred);
       // There are cases (e.g. PR26207) where computeExitLimitFromCond is able
       // to be more aggressive when computing BECount than when computing
-      // MaxBECount.  In these cases it is possible for EL0.Exact and EL1.Exact
-      // to match, but for EL0.Max and EL1.Max to not.
+      // MaxBECount.  In these cases it is possible for EL0.ExactNotTaken and
+      // EL1.ExactNotTaken to match, but for EL0.MaxNotTaken and EL1.MaxNotTaken
+      // to not.
       if (isa<SCEVCouldNotCompute>(MaxBECount) &&
           !isa<SCEVCouldNotCompute>(BECount))
         MaxBECount = BECount;
 
-      return ExitLimit(BECount, MaxBECount, NP);
+      return ExitLimit(BECount, MaxBECount, false,
+                       {&EL0.Predicates, &EL1.Predicates});
     }
     if (BO->getOpcode() == Instruction::Or) {
       // Recurse on the operands of the or.
@@ -5873,31 +5953,31 @@ ScalarEvolution::computeExitLimitFromCond(const Loop *L,
       if (EitherMayExit) {
         // Both conditions must be false for the loop to continue executing.
         // Choose the less conservative count.
-        if (EL0.Exact == getCouldNotCompute() ||
-            EL1.Exact == getCouldNotCompute())
+        if (EL0.ExactNotTaken == getCouldNotCompute() ||
+            EL1.ExactNotTaken == getCouldNotCompute())
           BECount = getCouldNotCompute();
         else
-          BECount = getUMinFromMismatchedTypes(EL0.Exact, EL1.Exact);
-        if (EL0.Max == getCouldNotCompute())
-          MaxBECount = EL1.Max;
-        else if (EL1.Max == getCouldNotCompute())
-          MaxBECount = EL0.Max;
+          BECount =
+              getUMinFromMismatchedTypes(EL0.ExactNotTaken, EL1.ExactNotTaken);
+        if (EL0.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL1.MaxNotTaken;
+        else if (EL1.MaxNotTaken == getCouldNotCompute())
+          MaxBECount = EL0.MaxNotTaken;
         else
-          MaxBECount = getUMinFromMismatchedTypes(EL0.Max, EL1.Max);
+          MaxBECount =
+              getUMinFromMismatchedTypes(EL0.MaxNotTaken, EL1.MaxNotTaken);
       } else {
         // Both conditions must be false at the same time for the loop to exit.
         // For now, be conservative.
         assert(L->contains(TBB) && "Loop block has no successor in loop!");
-        if (EL0.Max == EL1.Max)
-          MaxBECount = EL0.Max;
-        if (EL0.Exact == EL1.Exact)
-          BECount = EL0.Exact;
+        if (EL0.MaxNotTaken == EL1.MaxNotTaken)
+          MaxBECount = EL0.MaxNotTaken;
+        if (EL0.ExactNotTaken == EL1.ExactNotTaken)
+          BECount = EL0.ExactNotTaken;
       }
 
-      SCEVUnionPredicate NP;
-      NP.add(&EL0.Pred);
-      NP.add(&EL1.Pred);
-      return ExitLimit(BECount, MaxBECount, NP);
+      return ExitLimit(BECount, MaxBECount, false,
+                       {&EL0.Predicates, &EL1.Predicates});
     }
   }
 
@@ -5979,8 +6059,8 @@ ScalarEvolution::computeExitLimitFromICmp(const Loop *L,
     if (const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(LHS))
       if (AddRec->getLoop() == L) {
         // Form the constant range.
-        ConstantRange CompRange(
-            ICmpInst::makeConstantRange(Cond, RHSC->getAPInt()));
+        ConstantRange CompRange =
+            ConstantRange::makeExactICmpRegion(Cond, RHSC->getAPInt());
 
         const SCEV *Ret = AddRec->getNumIterationsInRange(CompRange, *this);
         if (!isa<SCEVCouldNotCompute>(Ret)) return Ret;
@@ -6184,7 +6264,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
   //   %iv = phi i32 [ %iv.shifted, %loop ], [ %val, %preheader ]
   //   %iv.shifted = lshr i32 %iv, <positive constant>
   //
-  // Return true on a succesful match.  Return the corresponding PHI node (%iv
+  // Return true on a successful match.  Return the corresponding PHI node (%iv
   // above) in PNOut and the opcode of the shift operation in OpCodeOut.
   auto MatchShiftRecurrence =
       [&](Value *V, PHINode *&PNOut, Instruction::BinaryOps &OpCodeOut) {
@@ -6282,8 +6362,7 @@ ScalarEvolution::ExitLimit ScalarEvolution::computeShiftCompareExitLimit(
     unsigned BitWidth = getTypeSizeInBits(RHS->getType());
     const SCEV *UpperBound =
         getConstant(getEffectiveSCEVType(RHS->getType()), BitWidth);
-    SCEVUnionPredicate P;
-    return ExitLimit(getCouldNotCompute(), UpperBound, P);
+    return ExitLimit(getCouldNotCompute(), UpperBound, false);
   }
 
   return getCouldNotCompute();
@@ -7044,7 +7123,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
   // effectively V != 0.  We know and take advantage of the fact that this
   // expression only being used in a comparison by zero context.
 
-  SCEVUnionPredicate P;
+  SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // If the value is a constant
   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
     // If the value is already zero, the branch will execute zero times.
@@ -7057,7 +7136,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
-    AddRec = convertSCEVToAddRecWithPredicates(V, L, P);
+    AddRec = convertSCEVToAddRecWithPredicates(V, L, Predicates);
 
   if (!AddRec || AddRec->getLoop() != L)
     return getCouldNotCompute();
@@ -7079,7 +7158,8 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
         // should not accept a root of 2.
         const SCEV *Val = AddRec->evaluateAtIteration(R1, *this);
         if (Val->isZero())
-          return ExitLimit(R1, R1, P); // We found a quadratic root!
+          // We found a quadratic root!
+          return ExitLimit(R1, R1, false, Predicates);
       }
     }
     return getCouldNotCompute();
@@ -7136,7 +7216,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
     else
       MaxBECount = getConstant(CountDown ? CR.getUnsignedMax()
                                          : -CR.getUnsignedMin());
-    return ExitLimit(Distance, MaxBECount, P);
+    return ExitLimit(Distance, MaxBECount, false, Predicates);
   }
 
   // As a special case, handle the instance where Step is a positive power of
@@ -7191,7 +7271,7 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
 
       const SCEV *Limit =
           getZeroExtendExpr(getTruncateExpr(ModuloResult, NarrowTy), WideTy);
-      return ExitLimit(Limit, Limit, P);
+      return ExitLimit(Limit, Limit, false, Predicates);
     }
   }
 
@@ -7204,14 +7284,14 @@ ScalarEvolution::howFarToZero(const SCEV *V, const Loop *L, bool ControlsExit,
       loopHasNoAbnormalExits(AddRec->getLoop())) {
     const SCEV *Exact =
         getUDivExpr(Distance, CountDown ? getNegativeSCEV(Step) : Step);
-    return ExitLimit(Exact, Exact, P);
+    return ExitLimit(Exact, Exact, false, Predicates);
   }
 
   // Then, try to solve the above equation provided that Start is constant.
   if (const SCEVConstant *StartC = dyn_cast<SCEVConstant>(Start)) {
     const SCEV *E = SolveLinEquationWithOverflow(
         StepC->getValue()->getValue(), -StartC->getValue()->getValue(), *this);
-    return ExitLimit(E, E, P);
+    return ExitLimit(E, E, false, Predicates);
   }
   return getCouldNotCompute();
 }
@@ -7323,149 +7403,77 @@ bool ScalarEvolution::SimplifyICmpOperands(ICmpInst::Predicate &Pred,
   // cases, and canonicalize *-or-equal comparisons to regular comparisons.
   if (const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS)) {
     const APInt &RA = RC->getAPInt();
-    switch (Pred) {
-    default: llvm_unreachable("Unexpected ICmpInst::Predicate value!");
-    case ICmpInst::ICMP_EQ:
-    case ICmpInst::ICMP_NE:
-      // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
-      if (!RA)
-        if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
-          if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
-            if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
-                ME->getOperand(0)->isAllOnesValue()) {
-              RHS = AE->getOperand(1);
-              LHS = ME->getOperand(1);
-              Changed = true;
-            }
-      break;
-    case ICmpInst::ICMP_UGE:
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        Changed = true;
-        break;
-      }
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
-        break;
-      }
-      if (RA.isMinValue()) goto trivially_true;
 
-      Pred = ICmpInst::ICMP_UGT;
-      RHS = getConstant(RA - 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_ULE:
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        Changed = true;
-        break;
-      }
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
-        break;
-      }
-      if (RA.isMaxValue()) goto trivially_true;
+    bool SimplifiedByConstantRange = false;
 
-      Pred = ICmpInst::ICMP_ULT;
-      RHS = getConstant(RA + 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_SGE:
-      if ((RA - 1).isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA - 1);
-        Changed = true;
-        break;
-      }
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
-        break;
+    if (!ICmpInst::isEquality(Pred)) {
+      ConstantRange ExactCR = ConstantRange::makeExactICmpRegion(Pred, RA);
+      if (ExactCR.isFullSet())
+        goto trivially_true;
+      else if (ExactCR.isEmptySet())
+        goto trivially_false;
+
+      APInt NewRHS;
+      CmpInst::Predicate NewPred;
+      if (ExactCR.getEquivalentICmp(NewPred, NewRHS) &&
+          ICmpInst::isEquality(NewPred)) {
+        // We were able to convert an inequality to an equality.
+        Pred = NewPred;
+        RHS = getConstant(NewRHS);
+        Changed = SimplifiedByConstantRange = true;
       }
-      if (RA.isMinSignedValue()) goto trivially_true;
+    }
 
-      Pred = ICmpInst::ICMP_SGT;
-      RHS = getConstant(RA - 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_SLE:
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        RHS = getConstant(RA + 1);
-        Changed = true;
+    if (!SimplifiedByConstantRange) {
+      switch (Pred) {
+      default:
         break;
-      }
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
-        Changed = true;
+      case ICmpInst::ICMP_EQ:
+      case ICmpInst::ICMP_NE:
+        // Fold ((-1) * %a) + %b == 0 (equivalent to %b-%a == 0) into %a == %b.
+        if (!RA)
+          if (const SCEVAddExpr *AE = dyn_cast<SCEVAddExpr>(LHS))
+            if (const SCEVMulExpr *ME =
+                    dyn_cast<SCEVMulExpr>(AE->getOperand(0)))
+              if (AE->getNumOperands() == 2 && ME->getNumOperands() == 2 &&
+                  ME->getOperand(0)->isAllOnesValue()) {
+                RHS = AE->getOperand(1);
+                LHS = ME->getOperand(1);
+                Changed = true;
+              }
         break;
-      }
-      if (RA.isMaxSignedValue()) goto trivially_true;
 
-      Pred = ICmpInst::ICMP_SLT;
-      RHS = getConstant(RA + 1);
-      Changed = true;
-      break;
-    case ICmpInst::ICMP_UGT:
-      if (RA.isMinValue()) {
-        Pred = ICmpInst::ICMP_NE;
+
+        // The "Should have been caught earlier!" messages refer to the fact
+        // that the ExactCR.isFullSet() or ExactCR.isEmptySet() check above
+        // should have fired on the corresponding cases, and canonicalized the
+        // check to trivially_true or trivially_false.
+
+      case ICmpInst::ICMP_UGE:
+        assert(!RA.isMinValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_UGT;
+        RHS = getConstant(RA - 1);
         Changed = true;
         break;
-      }
-      if ((RA + 1).isMaxValue()) {
-        Pred = ICmpInst::ICMP_EQ;
+      case ICmpInst::ICMP_ULE:
+        assert(!RA.isMaxValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_ULT;
         RHS = getConstant(RA + 1);
         Changed = true;
         break;
-      }
-      if (RA.isMaxValue()) goto trivially_false;
-      break;
-    case ICmpInst::ICMP_ULT:
-      if (RA.isMaxValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        Changed = true;
-        break;
-      }
-      if ((RA - 1).isMinValue()) {
-        Pred = ICmpInst::ICMP_EQ;
+      case ICmpInst::ICMP_SGE:
+        assert(!RA.isMinSignedValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_SGT;
         RHS = getConstant(RA - 1);
         Changed = true;
         break;
-      }
-      if (RA.isMinValue()) goto trivially_false;
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (RA.isMinSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        Changed = true;
-        break;
-      }
-      if ((RA + 1).isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_EQ;
+      case ICmpInst::ICMP_SLE:
+        assert(!RA.isMaxSignedValue() && "Should have been caught earlier!");
+        Pred = ICmpInst::ICMP_SLT;
         RHS = getConstant(RA + 1);
         Changed = true;
         break;
       }
-      if (RA.isMaxSignedValue()) goto trivially_false;
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (RA.isMaxSignedValue()) {
-        Pred = ICmpInst::ICMP_NE;
-        Changed = true;
-        break;
-      }
-      if ((RA - 1).isMinSignedValue()) {
-       Pred = ICmpInst::ICMP_EQ;
-       RHS = getConstant(RA - 1);
-        Changed = true;
-       break;
-      }
-      if (RA.isMinSignedValue()) goto trivially_false;
-      break;
     }
   }
 
@@ -8025,34 +8033,16 @@ ScalarEvolution::isLoopEntryGuardedByCond(const Loop *L,
   return false;
 }
 
-namespace {
-/// RAII wrapper to prevent recursive application of isImpliedCond.
-/// ScalarEvolution's PendingLoopPredicates set must be empty unless we are
-/// currently evaluating isImpliedCond.
-struct MarkPendingLoopPredicate {
-  Value *Cond;
-  DenseSet<Value*> &LoopPreds;
-  bool Pending;
-
-  MarkPendingLoopPredicate(Value *C, DenseSet<Value*> &LP)
-    : Cond(C), LoopPreds(LP) {
-    Pending = !LoopPreds.insert(Cond).second;
-  }
-  ~MarkPendingLoopPredicate() {
-    if (!Pending)
-      LoopPreds.erase(Cond);
-  }
-};
-} // end anonymous namespace
-
 bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred,
                                     const SCEV *LHS, const SCEV *RHS,
                                     Value *FoundCondValue,
                                     bool Inverse) {
-  MarkPendingLoopPredicate Mark(FoundCondValue, PendingLoopPredicates);
-  if (Mark.Pending)
+  if (!PendingLoopPredicates.insert(FoundCondValue).second)
     return false;
 
+  auto ClearOnExit =
+      make_scope_exit([&]() { PendingLoopPredicates.erase(FoundCondValue); });
+
   // Recursively handle And and Or conditions.
   if (BinaryOperator *BO = dyn_cast<BinaryOperator>(FoundCondValue)) {
     if (BO->getOpcode() == Instruction::And) {
@@ -8237,9 +8227,8 @@ bool ScalarEvolution::splitBinaryAdd(const SCEV *Expr,
   return true;
 }
 
-bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
-                                                const SCEV *More,
-                                                APInt &C) {
+Optional<APInt> ScalarEvolution::computeConstantDifference(const SCEV *More,
+                                                           const SCEV *Less) {
   // We avoid subtracting expressions here because this function is usually
   // fairly deep in the call stack (i.e. is called many times).
 
@@ -8248,15 +8237,15 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
     const auto *MAR = cast<SCEVAddRecExpr>(More);
 
     if (LAR->getLoop() != MAR->getLoop())
-      return false;
+      return None;
 
     // We look at affine expressions only; not for correctness but to keep
     // getStepRecurrence cheap.
     if (!LAR->isAffine() || !MAR->isAffine())
-      return false;
+      return None;
 
     if (LAR->getStepRecurrence(*this) != MAR->getStepRecurrence(*this))
-      return false;
+      return None;
 
     Less = LAR->getStart();
     More = MAR->getStart();
@@ -8267,27 +8256,22 @@ bool ScalarEvolution::computeConstantDifference(const SCEV *Less,
   if (isa<SCEVConstant>(Less) && isa<SCEVConstant>(More)) {
     const auto &M = cast<SCEVConstant>(More)->getAPInt();
     const auto &L = cast<SCEVConstant>(Less)->getAPInt();
-    C = M - L;
-    return true;
+    return M - L;
   }
 
   const SCEV *L, *R;
   SCEV::NoWrapFlags Flags;
   if (splitBinaryAdd(Less, L, R, Flags))
     if (const auto *LC = dyn_cast<SCEVConstant>(L))
-      if (R == More) {
-        C = -(LC->getAPInt());
-        return true;
-      }
+      if (R == More)
+        return -(LC->getAPInt());
 
   if (splitBinaryAdd(More, L, R, Flags))
     if (const auto *LC = dyn_cast<SCEVConstant>(L))
-      if (R == Less) {
-        C = LC->getAPInt();
-        return true;
-      }
+      if (R == Less)
+        return LC->getAPInt();
 
-  return false;
+  return None;
 }
 
 bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
@@ -8344,22 +8328,21 @@ bool ScalarEvolution::isImpliedCondOperandsViaNoOverflow(
   // neither necessary nor sufficient to prove "(FoundLHS + C) s< (FoundRHS +
   // C)".
 
-  APInt LDiff, RDiff;
-  if (!computeConstantDifference(FoundLHS, LHS, LDiff) ||
-      !computeConstantDifference(FoundRHS, RHS, RDiff) ||
-      LDiff != RDiff)
+  Optional<APInt> LDiff = computeConstantDifference(LHS, FoundLHS);
+  Optional<APInt> RDiff = computeConstantDifference(RHS, FoundRHS);
+  if (!LDiff || !RDiff || *LDiff != *RDiff)
     return false;
 
-  if (LDiff == 0)
+  if (LDiff->isMinValue())
     return true;
 
   APInt FoundRHSLimit;
 
   if (Pred == CmpInst::ICMP_ULT) {
-    FoundRHSLimit = -RDiff;
+    FoundRHSLimit = -(*RDiff);
   } else {
     assert(Pred == CmpInst::ICMP_SLT && "Checked above!");
-    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - RDiff;
+    FoundRHSLimit = APInt::getSignedMinValue(getTypeSizeInBits(RHS->getType())) - *RDiff;
   }
 
   // Try to prove (1) or (2), as needed.
@@ -8469,7 +8452,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
 
   case ICmpInst::ICMP_SGE:
     std::swap(LHS, RHS);
-    // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SLE:
     return
       // min(A, ...) <= A
@@ -8479,7 +8462,7 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
 
   case ICmpInst::ICMP_UGE:
     std::swap(LHS, RHS);
-    // fall through
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_ULE:
     return
       // min(A, ...) <= A
@@ -8550,9 +8533,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
     // reduce the compile time impact of this optimization.
     return false;
 
-  const SCEVAddExpr *AddLHS = dyn_cast<SCEVAddExpr>(LHS);
-  if (!AddLHS || AddLHS->getOperand(1) != FoundLHS ||
-      !isa<SCEVConstant>(AddLHS->getOperand(0)))
+  Optional<APInt> Addend = computeConstantDifference(LHS, FoundLHS);
+  if (!Addend)
     return false;
 
   APInt ConstFoundRHS = cast<SCEVConstant>(FoundRHS)->getAPInt();
@@ -8562,10 +8544,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
   ConstantRange FoundLHSRange =
       ConstantRange::makeAllowedICmpRegion(Pred, ConstFoundRHS);
 
-  // Since `LHS` is `FoundLHS` + `AddLHS->getOperand(0)`, we can compute a range
-  // for `LHS`:
-  APInt Addend = cast<SCEVConstant>(AddLHS->getOperand(0))->getAPInt();
-  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(Addend));
+  // Since `LHS` is `FoundLHS` + `Addend`, we can compute a range for `LHS`:
+  ConstantRange LHSRange = FoundLHSRange.add(ConstantRange(*Addend));
 
   // We can also compute the range of values for `LHS` that satisfy the
   // consequent, "`LHS` `Pred` `RHS`":
@@ -8580,6 +8560,8 @@ bool ScalarEvolution::isImpliedCondOperandsViaRanges(ICmpInst::Predicate Pred,
 
 bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride,
                                          bool IsSigned, bool NoWrap) {
+  assert(isKnownPositive(Stride) && "Positive stride expected!");
+
   if (NoWrap) return false;
 
   unsigned BitWidth = getTypeSizeInBits(RHS->getType());
@@ -8642,17 +8624,21 @@ ScalarEvolution::ExitLimit
 ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
                                   const Loop *L, bool IsSigned,
                                   bool ControlsExit, bool AllowPredicates) {
-  SCEVUnionPredicate P;
+  SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // We handle only IV < Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
 
   const SCEVAddRecExpr *IV = dyn_cast<SCEVAddRecExpr>(LHS);
-  if (!IV && AllowPredicates)
+  bool PredicatedIV = false;
+
+  if (!IV && AllowPredicates) {
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
-    IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
+    PredicatedIV = true;
+  }
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8663,61 +8649,144 @@ ScalarEvolution::howManyLessThans(const SCEV *LHS, const SCEV *RHS,
 
   const SCEV *Stride = IV->getStepRecurrence(*this);
 
-  // Avoid negative or zero stride values
-  if (!isKnownPositive(Stride))
-    return getCouldNotCompute();
+  bool PositiveStride = isKnownPositive(Stride);
 
-  // Avoid proven overflow cases: this will ensure that the backedge taken count
-  // will not generate any unsigned overflow. Relaxed no-overflow conditions
-  // exploit NoWrapFlags, allowing to optimize in presence of undefined
-  // behaviors like the case of C language.
-  if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+  // Avoid negative or zero stride values.
+  if (!PositiveStride) {
+    // We can compute the correct backedge taken count for loops with unknown
+    // strides if we can prove that the loop is not an infinite loop with side
+    // effects. Here's the loop structure we are trying to handle -
+    //
+    // i = start
+    // do {
+    //   A[i] = i;
+    //   i += s;
+    // } while (i < end);
+    //
+    // The backedge taken count for such loops is evaluated as -
+    // (max(end, start + stride) - start - 1) /u stride
+    //
+    // The additional preconditions that we need to check to prove correctness
+    // of the above formula is as follows -
+    //
+    // a) IV is either nuw or nsw depending upon signedness (indicated by the
+    //    NoWrap flag).
+    // b) loop is single exit with no side effects.
+    //
+    //
+    // Precondition a) implies that if the stride is negative, this is a single
+    // trip loop. The backedge taken count formula reduces to zero in this case.
+    //
+    // Precondition b) implies that the unknown stride cannot be zero otherwise
+    // we have UB.
+    //
+    // The positive stride case is the same as isKnownPositive(Stride) returning
+    // true (original behavior of the function).
+    //
+    // We want to make sure that the stride is truly unknown as there are edge
+    // cases where ScalarEvolution propagates no wrap flags to the
+    // post-increment/decrement IV even though the increment/decrement operation
+    // itself is wrapping. The computed backedge taken count may be wrong in
+    // such cases. This is prevented by checking that the stride is not known to
+    // be either positive or non-positive. For example, no wrap flags are
+    // propagated to the post-increment IV of this loop with a trip count of 2 -
+    //
+    // unsigned char i;
+    // for(i=127; i<128; i+=129)
+    //   A[i] = i;
+    //
+    if (PredicatedIV || !NoWrap || isKnownNonPositive(Stride) ||
+        !loopHasNoSideEffects(L))
+      return getCouldNotCompute();
+
+  } else if (!Stride->isOne() &&
+             doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap))
+    // Avoid proven overflow cases: this will ensure that the backedge taken
+    // count will not generate any unsigned overflow. Relaxed no-overflow
+    // conditions exploit NoWrapFlags, allowing to optimize in presence of
+    // undefined behaviors like the case of C language.
     return getCouldNotCompute();
 
   ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT
                                       : ICmpInst::ICMP_ULT;
   const SCEV *Start = IV->getStart();
   const SCEV *End = RHS;
-  if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+  // If the backedge is taken at least once, then it will be taken
+  // (End-Start)/Stride times (rounded up to a multiple of Stride), where Start
+  // is the LHS value of the less-than comparison the first time it is evaluated
+  // and End is the RHS.
+  const SCEV *BECountIfBackedgeTaken =
+    computeBECount(getMinusSCEV(End, Start), Stride, false);
+  // If the loop entry is guarded by the result of the backedge test of the
+  // first loop iteration, then we know the backedge will be taken at least
+  // once and so the backedge taken count is as above. If not then we use the
+  // expression (max(End,Start)-Start)/Stride to describe the backedge count,
+  // as if the backedge is taken at least once max(End,Start) is End and so the
+  // result is as above, and if not max(End,Start) is Start so we get a backedge
+  // count of zero.
+  const SCEV *BECount;
+  if (isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS))
+    BECount = BECountIfBackedgeTaken;
+  else {
     End = IsSigned ? getSMaxExpr(RHS, Start) : getUMaxExpr(RHS, Start);
+    BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+  }
 
-  const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false);
+  const SCEV *MaxBECount;
+  bool MaxOrZero = false;
+  if (isa<SCEVConstant>(BECount))
+    MaxBECount = BECount;
+  else if (isa<SCEVConstant>(BECountIfBackedgeTaken)) {
+    // If we know exactly how many times the backedge will be taken if it's
+    // taken at least once, then the backedge count will either be that or
+    // zero.
+    MaxBECount = BECountIfBackedgeTaken;
+    MaxOrZero = true;
+  } else {
+    // Calculate the maximum backedge count based on the range of values
+    // permitted by Start, End, and Stride.
+    APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
+                              : getUnsignedRange(Start).getUnsignedMin();
 
-  APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin()
-                            : getUnsignedRange(Start).getUnsignedMin();
+    unsigned BitWidth = getTypeSizeInBits(LHS->getType());
 
-  APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin()
-                             : getUnsignedRange(Stride).getUnsignedMin();
+    APInt StrideForMaxBECount;
 
-  unsigned BitWidth = getTypeSizeInBits(LHS->getType());
-  APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1)
-                         : APInt::getMaxValue(BitWidth) - (MinStride - 1);
+    if (PositiveStride)
+      StrideForMaxBECount =
+        IsSigned ? getSignedRange(Stride).getSignedMin()
+                 : getUnsignedRange(Stride).getUnsignedMin();
+    else
+      // Using a stride of 1 is safe when computing max backedge taken count for
+      // a loop with unknown stride.
+      StrideForMaxBECount = APInt(BitWidth, 1, IsSigned);
 
-  // Although End can be a MAX expression we estimate MaxEnd considering only
-  // the case End = RHS. This is safe because in the other case (End - Start)
-  // is zero, leading to a zero maximum backedge taken count.
-  APInt MaxEnd =
-    IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
-             : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
+    APInt Limit =
+      IsSigned ? APInt::getSignedMaxValue(BitWidth) - (StrideForMaxBECount - 1)
+               : APInt::getMaxValue(BitWidth) - (StrideForMaxBECount - 1);
+
+    // Although End can be a MAX expression we estimate MaxEnd considering only
+    // the case End = RHS. This is safe because in the other case (End - Start)
+    // is zero, leading to a zero maximum backedge taken count.
+    APInt MaxEnd =
+      IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit)
+               : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit);
 
-  const SCEV *MaxBECount;
-  if (isa<SCEVConstant>(BECount))
-    MaxBECount = BECount;
-  else
     MaxBECount = computeBECount(getConstant(MaxEnd - MinStart),
-                                getConstant(MinStride), false);
+                                getConstant(StrideForMaxBECount), false);
+  }
 
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount, P);
+  return ExitLimit(BECount, MaxBECount, MaxOrZero, Predicates);
 }
 
 ScalarEvolution::ExitLimit
 ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
                                      const Loop *L, bool IsSigned,
                                      bool ControlsExit, bool AllowPredicates) {
-  SCEVUnionPredicate P;
+  SmallPtrSet<const SCEVPredicate *, 4> Predicates;
   // We handle only IV > Invariant
   if (!isLoopInvariant(RHS, L))
     return getCouldNotCompute();
@@ -8727,7 +8796,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
     // Try to make this an AddRec using runtime tests, in the first X
     // iterations of this loop, where X is the SCEV expression found by the
     // algorithm below.
-    IV = convertSCEVToAddRecWithPredicates(LHS, L, P);
+    IV = convertSCEVToAddRecWithPredicates(LHS, L, Predicates);
 
   // Avoid weird loops
   if (!IV || IV->getLoop() != L || !IV->isAffine())
@@ -8787,7 +8856,7 @@ ScalarEvolution::howManyGreaterThans(const SCEV *LHS, const SCEV *RHS,
   if (isa<SCEVCouldNotCompute>(MaxBECount))
     MaxBECount = BECount;
 
-  return ExitLimit(BECount, MaxBECount, P);
+  return ExitLimit(BECount, MaxBECount, false, Predicates);
 }
 
 const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
@@ -8859,9 +8928,7 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
     // Range.getUpper() is crossed.
     SmallVector<const SCEV *, 4> NewOps(op_begin(), op_end());
     NewOps[0] = SE.getNegativeSCEV(SE.getConstant(Range.getUpper()));
-    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(),
-                                             // getNoWrapFlags(FlagNW)
-                                             FlagAnyWrap);
+    const SCEV *NewAddRec = SE.getAddRecExpr(NewOps, getLoop(), FlagAnyWrap);
 
     // Next, solve the constructed addrec
     if (auto Roots =
@@ -8905,38 +8972,15 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(const ConstantRange &Range,
   return SE.getCouldNotCompute();
 }
 
-namespace {
-struct FindUndefs {
-  bool Found;
-  FindUndefs() : Found(false) {}
-
-  bool follow(const SCEV *S) {
-    if (const SCEVUnknown *C = dyn_cast<SCEVUnknown>(S)) {
-      if (isa<UndefValue>(C->getValue()))
-        Found = true;
-    } else if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
-      if (isa<UndefValue>(C->getValue()))
-        Found = true;
-    }
-
-    // Keep looking if we haven't found it yet.
-    return !Found;
-  }
-  bool isDone() const {
-    // Stop recursion if we have found an undef.
-    return Found;
-  }
-};
-}
-
 // Return true when S contains at least an undef value.
-static inline bool
-containsUndefs(const SCEV *S) {
-  FindUndefs F;
-  SCEVTraversal<FindUndefs> ST(F);
-  ST.visitAll(S);
-
-  return F.Found;
+static inline bool containsUndefs(const SCEV *S) {
+  return SCEVExprContains(S, [](const SCEV *S) {
+    if (const auto *SU = dyn_cast<SCEVUnknown>(S))
+      return isa<UndefValue>(SU->getValue());
+    else if (const auto *SC = dyn_cast<SCEVConstant>(S))
+      return isa<UndefValue>(SC->getValue());
+    return false;
+  });
 }
 
 namespace {
@@ -8964,7 +9008,8 @@ struct SCEVCollectTerms {
       : Terms(T) {}
 
   bool follow(const SCEV *S) {
-    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S)) {
+    if (isa<SCEVUnknown>(S) || isa<SCEVMulExpr>(S) ||
+        isa<SCEVSignExtendExpr>(S)) {
       if (!containsUndefs(S))
         Terms.push_back(S);
 
@@ -9116,10 +9161,9 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
   }
 
   // Remove all SCEVConstants.
-  Terms.erase(std::remove_if(Terms.begin(), Terms.end(), [](const SCEV *E) {
-                return isa<SCEVConstant>(E);
-              }),
-              Terms.end());
+  Terms.erase(
+      remove_if(Terms, [](const SCEV *E) { return isa<SCEVConstant>(E); }),
+      Terms.end());
 
   if (Terms.size() > 0)
     if (!findArrayDimensionsRec(SE, Terms, Sizes))
@@ -9129,40 +9173,11 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE,
   return true;
 }
 
-// Returns true when S contains at least a SCEVUnknown parameter.
-static inline bool
-containsParameters(const SCEV *S) {
-  struct FindParameter {
-    bool FoundParameter;
-    FindParameter() : FoundParameter(false) {}
-
-    bool follow(const SCEV *S) {
-      if (isa<SCEVUnknown>(S)) {
-        FoundParameter = true;
-        // Stop recursion: we found a parameter.
-        return false;
-      }
-      // Keep looking.
-      return true;
-    }
-    bool isDone() const {
-      // Stop recursion if we have found a parameter.
-      return FoundParameter;
-    }
-  };
-
-  FindParameter F;
-  SCEVTraversal<FindParameter> ST(F);
-  ST.visitAll(S);
-
-  return F.FoundParameter;
-}
 
 // Returns true when one of the SCEVs of Terms contains a SCEVUnknown parameter.
-static inline bool
-containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
+static inline bool containsParameters(SmallVectorImpl<const SCEV *> &Terms) {
   for (const SCEV *T : Terms)
-    if (containsParameters(T))
+    if (SCEVExprContains(T, isa<SCEVUnknown, const SCEV *>))
       return true;
   return false;
 }
@@ -9493,6 +9508,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
     : F(Arg.F), HasGuards(Arg.HasGuards), TLI(Arg.TLI), AC(Arg.AC), DT(Arg.DT),
       LI(Arg.LI), CouldNotCompute(std::move(Arg.CouldNotCompute)),
       ValueExprMap(std::move(Arg.ValueExprMap)),
+      PendingLoopPredicates(std::move(Arg.PendingLoopPredicates)),
       WalkingBEDominatingConds(false), ProvingSplitPredicate(false),
       BackedgeTakenCounts(std::move(Arg.BackedgeTakenCounts)),
       PredicatedBackedgeTakenCounts(
@@ -9501,6 +9517,7 @@ ScalarEvolution::ScalarEvolution(ScalarEvolution &&Arg)
           std::move(Arg.ConstantEvolutionLoopExitValue)),
       ValuesAtScopes(std::move(Arg.ValuesAtScopes)),
       LoopDispositions(std::move(Arg.LoopDispositions)),
+      LoopPropertiesCache(std::move(Arg.LoopPropertiesCache)),
       BlockDispositions(std::move(Arg.BlockDispositions)),
       UnsignedRanges(std::move(Arg.UnsignedRanges)),
       SignedRanges(std::move(Arg.SignedRanges)),
@@ -9569,6 +9586,8 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
 
   if (!isa<SCEVCouldNotCompute>(SE->getMaxBackedgeTakenCount(L))) {
     OS << "max backedge-taken count is " << *SE->getMaxBackedgeTakenCount(L);
+    if (SE->isBackedgeTakenCountMaxOrZero(L))
+      OS << ", actual taken count either this or zero.";
   } else {
     OS << "Unpredictable max backedge-taken count. ";
   }
@@ -9829,8 +9848,10 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
     const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(S);
     if (!DT.dominates(AR->getLoop()->getHeader(), BB))
       return DoesNotDominateBlock;
+
+    // Fall through into SCEVNAryExpr handling.
+    LLVM_FALLTHROUGH;
   }
-  // FALL THROUGH into SCEVNAryExpr handling.
   case scAddExpr:
   case scMulExpr:
   case scUMaxExpr:
@@ -9883,24 +9904,7 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) {
 }
 
 bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const {
-  // Search for a SCEV expression node within an expression tree.
-  // Implements SCEVTraversal::Visitor.
-  struct SCEVSearch {
-    const SCEV *Node;
-    bool IsFound;
-
-    SCEVSearch(const SCEV *N): Node(N), IsFound(false) {}
-
-    bool follow(const SCEV *S) {
-      IsFound |= (S == Node);
-      return !IsFound;
-    }
-    bool isDone() const { return IsFound; }
-  };
-
-  SCEVSearch Search(Op);
-  visitAll(S, Search);
-  return Search.IsFound;
+  return SCEVExprContains(S, [&](const SCEV *Expr) { return Expr == Op; });
 }
 
 void ScalarEvolution::forgetMemoizedResults(const SCEV *S) {
@@ -10008,10 +10012,10 @@ void ScalarEvolution::verify() const {
   // TODO: Verify more things.
 }
 
-char ScalarEvolutionAnalysis::PassID;
+AnalysisKey ScalarEvolutionAnalysis::Key;
 
 ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   return ScalarEvolution(F, AM.getResult<TargetLibraryAnalysis>(F),
                          AM.getResult<AssumptionAnalysis>(F),
                          AM.getResult<DominatorTreeAnalysis>(F),
@@ -10019,7 +10023,7 @@ ScalarEvolution ScalarEvolutionAnalysis::run(Function &F,
 }
 
 PreservedAnalyses
-ScalarEvolutionPrinterPass::run(Function &F, AnalysisManager<Function> &AM) {
+ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
   AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
@@ -10106,25 +10110,34 @@ namespace {
 
 class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
 public:
-  // Rewrites \p S in the context of a loop L and the predicate A.
-  // If Assume is true, rewrite is free to add further predicates to A
-  // such that the result will be an AddRecExpr.
+  /// Rewrites \p S in the context of a loop L and the SCEV predication
+  /// infrastructure.
+  ///
+  /// If \p Pred is non-null, the SCEV expression is rewritten to respect the
+  /// equivalences present in \p Pred.
+  ///
+  /// If \p NewPreds is non-null, rewrite is free to add further predicates to
+  /// \p NewPreds such that the result will be an AddRecExpr.
   static const SCEV *rewrite(const SCEV *S, const Loop *L, ScalarEvolution &SE,
-                             SCEVUnionPredicate &A, bool Assume) {
-    SCEVPredicateRewriter Rewriter(L, SE, A, Assume);
+                             SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+                             SCEVUnionPredicate *Pred) {
+    SCEVPredicateRewriter Rewriter(L, SE, NewPreds, Pred);
     return Rewriter.visit(S);
   }
 
   SCEVPredicateRewriter(const Loop *L, ScalarEvolution &SE,
-                        SCEVUnionPredicate &P, bool Assume)
-      : SCEVRewriteVisitor(SE), P(P), L(L), Assume(Assume) {}
+                        SmallPtrSetImpl<const SCEVPredicate *> *NewPreds,
+                        SCEVUnionPredicate *Pred)
+      : SCEVRewriteVisitor(SE), NewPreds(NewPreds), Pred(Pred), L(L) {}
 
   const SCEV *visitUnknown(const SCEVUnknown *Expr) {
-    auto ExprPreds = P.getPredicatesForExpr(Expr);
-    for (auto *Pred : ExprPreds)
-      if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
-        if (IPred->getLHS() == Expr)
-          return IPred->getRHS();
+    if (Pred) {
+      auto ExprPreds = Pred->getPredicatesForExpr(Expr);
+      for (auto *Pred : ExprPreds)
+        if (const auto *IPred = dyn_cast<SCEVEqualPredicate>(Pred))
+          if (IPred->getLHS() == Expr)
+            return IPred->getRHS();
+    }
 
     return Expr;
   }
@@ -10165,32 +10178,31 @@ private:
   bool addOverflowAssumption(const SCEVAddRecExpr *AR,
                              SCEVWrapPredicate::IncrementWrapFlags AddedFlags) {
     auto *A = SE.getWrapPredicate(AR, AddedFlags);
-    if (!Assume) {
+    if (!NewPreds) {
       // Check if we've already made this assumption.
-      if (P.implies(A))
-        return true;
-      return false;
+      return Pred && Pred->implies(A);
     }
-    P.add(A);
+    NewPreds->insert(A);
     return true;
   }
 
-  SCEVUnionPredicate &P;
+  SmallPtrSetImpl<const SCEVPredicate *> *NewPreds;
+  SCEVUnionPredicate *Pred;
   const Loop *L;
-  bool Assume;
 };
 } // end anonymous namespace
 
 const SCEV *ScalarEvolution::rewriteUsingPredicate(const SCEV *S, const Loop *L,
                                                    SCEVUnionPredicate &Preds) {
-  return SCEVPredicateRewriter::rewrite(S, L, *this, Preds, false);
+  return SCEVPredicateRewriter::rewrite(S, L, *this, nullptr, &Preds);
 }
 
-const SCEVAddRecExpr *
-ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
-                                                   SCEVUnionPredicate &Preds) {
-  SCEVUnionPredicate TransformPreds;
-  S = SCEVPredicateRewriter::rewrite(S, L, *this, TransformPreds, true);
+const SCEVAddRecExpr *ScalarEvolution::convertSCEVToAddRecWithPredicates(
+    const SCEV *S, const Loop *L,
+    SmallPtrSetImpl<const SCEVPredicate *> &Preds) {
+
+  SmallPtrSet<const SCEVPredicate *, 4> TransformPreds;
+  S = SCEVPredicateRewriter::rewrite(S, L, *this, &TransformPreds, nullptr);
   auto *AddRec = dyn_cast<SCEVAddRecExpr>(S);
 
   if (!AddRec)
@@ -10198,7 +10210,9 @@ ScalarEvolution::convertSCEVToAddRecWithPredicates(const SCEV *S, const Loop *L,
 
   // Since the transformation was successful, we can now transfer the SCEV
   // predicates.
-  Preds.add(&TransformPreds);
+  for (auto *P : TransformPreds)
+    Preds.insert(P);
+
   return AddRec;
 }
 
@@ -10351,7 +10365,7 @@ const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) {
     return Entry.second;
 
   // We found an entry but it's stale. Rewrite the stale entry
-  // acording to the current predicate.
+  // according to the current predicate.
   if (Entry.second)
     Expr = Entry.second;
 
@@ -10425,11 +10439,15 @@ bool PredicatedScalarEvolution::hasNoOverflow(
 
 const SCEVAddRecExpr *PredicatedScalarEvolution::getAsAddRec(Value *V) {
   const SCEV *Expr = this->getSCEV(V);
-  auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, Preds);
+  SmallPtrSet<const SCEVPredicate *, 4> NewPreds;
+  auto *New = SE.convertSCEVToAddRecWithPredicates(Expr, &L, NewPreds);
 
   if (!New)
     return nullptr;
 
+  for (auto *P : NewPreds)
+    Preds.add(P);
+
   updateGeneration();
   RewriteMap[SE.getSCEV(V)] = {Generation, New};
   return New;
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 61fb411d3150..7bea994121c8 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -110,9 +110,9 @@ Value *SCEVAAResult::GetBaseValue(const SCEV *S) {
   return nullptr;
 }
 
-char SCEVAA::PassID;
+AnalysisKey SCEVAA::Key;
 
-SCEVAAResult SCEVAA::run(Function &F, AnalysisManager<Function> &AM) {
+SCEVAAResult SCEVAA::run(Function &F, FunctionAnalysisManager &AM) {
   return SCEVAAResult(AM.getResult<ScalarEvolutionAnalysis>(F));
 }
 
diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 2e45bb840946..d15a7dbd20e6 100644
--- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -549,9 +549,8 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
     while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) {
       if (!L->isLoopInvariant(V)) break;
 
-      bool AnyIndexNotLoopInvariant =
-          std::any_of(GepIndices.begin(), GepIndices.end(),
-                      [L](Value *Op) { return !L->isLoopInvariant(Op); });
+      bool AnyIndexNotLoopInvariant = any_of(
+          GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); });
 
       if (AnyIndexNotLoopInvariant)
         break;
@@ -1183,11 +1182,14 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   PostIncLoopSet SavedPostIncLoops = PostIncLoops;
   PostIncLoops.clear();
 
-  // Expand code for the start value.
-  Value *StartV =
-      expandCodeFor(Normalized->getStart(), ExpandTy, &L->getHeader()->front());
+  // Expand code for the start value into the loop preheader.
+  assert(L->getLoopPreheader() &&
+         "Can't expand add recurrences without a loop preheader!");
+  Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
+                                L->getLoopPreheader()->getTerminator());
 
-  // StartV must be hoisted into L's preheader to dominate the new phi.
+  // StartV must have been be inserted into L's preheader to dominate the new
+  // phi.
   assert(!isa<Instruction>(StartV) ||
          SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(),
                                  L->getHeader()));
@@ -1625,9 +1627,10 @@ Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
   return V;
 }
 
-Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
-                                             const Instruction *InsertPt) {
-  SetVector<Value *> *Set = SE.getSCEVValues(S);
+ScalarEvolution::ValueOffsetPair
+SCEVExpander::FindValueInExprValueMap(const SCEV *S,
+                                      const Instruction *InsertPt) {
+  SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S);
   // If the expansion is not in CanonicalMode, and the SCEV contains any
   // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally.
   if (CanonicalMode || !SE.containsAddRecurrence(S)) {
@@ -1636,21 +1639,21 @@ Value *SCEVExpander::FindValueInExprValueMap(const SCEV *S,
       // Choose a Value from the set which dominates the insertPt.
       // insertPt should be inside the Value's parent loop so as not to break
       // the LCSSA form.
-      for (auto const &Ent : *Set) {
+      for (auto const &VOPair : *Set) {
+        Value *V = VOPair.first;
+        ConstantInt *Offset = VOPair.second;
         Instruction *EntInst = nullptr;
-        if (Ent && isa<Instruction>(Ent) &&
-            (EntInst = cast<Instruction>(Ent)) &&
-            S->getType() == Ent->getType() &&
+        if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) &&
+            S->getType() == V->getType() &&
             EntInst->getFunction() == InsertPt->getFunction() &&
             SE.DT.dominates(EntInst, InsertPt) &&
             (SE.LI.getLoopFor(EntInst->getParent()) == nullptr ||
-             SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) {
-          return Ent;
-        }
+             SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt)))
+          return {V, Offset};
       }
     }
   }
-  return nullptr;
+  return {nullptr, nullptr};
 }
 
 // The expansion of SCEV will either reuse a previous Value in ExprValueMap,
@@ -1698,11 +1701,33 @@ Value *SCEVExpander::expand(const SCEV *S) {
   Builder.SetInsertPoint(InsertPt);
 
   // Expand the expression into instructions.
-  Value *V = FindValueInExprValueMap(S, InsertPt);
+  ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt);
+  Value *V = VO.first;
 
   if (!V)
     V = visit(S);
-
+  else if (VO.second) {
+    if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) {
+      Type *Ety = Vty->getPointerElementType();
+      int64_t Offset = VO.second->getSExtValue();
+      int64_t ESize = SE.getTypeSizeInBits(Ety);
+      if ((Offset * 8) % ESize == 0) {
+        ConstantInt *Idx =
+            ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize);
+        V = Builder.CreateGEP(Ety, V, Idx, "scevgep");
+      } else {
+        ConstantInt *Idx =
+            ConstantInt::getSigned(VO.second->getType(), -Offset);
+        unsigned AS = Vty->getAddressSpace();
+        V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS));
+        V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx,
+                              "uglygep");
+        V = Builder.CreateBitCast(V, Vty);
+      }
+    } else {
+      V = Builder.CreateSub(V, VO.second);
+    }
+  }
   // Remember the expanded value for this SCEV at this location.
   //
   // This is independent of PostIncLoops. The mapped value simply materializes
@@ -1887,8 +1912,18 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
   return NumElim;
 }
 
-Value *SCEVExpander::findExistingExpansion(const SCEV *S,
-                                           const Instruction *At, Loop *L) {
+Value *SCEVExpander::getExactExistingExpansion(const SCEV *S,
+                                               const Instruction *At, Loop *L) {
+  Optional<ScalarEvolution::ValueOffsetPair> VO =
+      getRelatedExistingExpansion(S, At, L);
+  if (VO && VO.getValue().second == nullptr)
+    return VO.getValue().first;
+  return nullptr;
+}
+
+Optional<ScalarEvolution::ValueOffsetPair>
+SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At,
+                                          Loop *L) {
   using namespace llvm::PatternMatch;
 
   SmallVector<BasicBlock *, 4> ExitingBlocks;
@@ -1906,31 +1941,32 @@ Value *SCEVExpander::findExistingExpansion(const SCEV *S,
       continue;
 
     if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At))
-      return LHS;
+      return ScalarEvolution::ValueOffsetPair(LHS, nullptr);
 
     if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At))
-      return RHS;
+      return ScalarEvolution::ValueOffsetPair(RHS, nullptr);
   }
 
   // Use expand's logic which is used for reusing a previous Value in
   // ExprValueMap.
-  if (Value *Val = FindValueInExprValueMap(S, At))
-    return Val;
+  ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At);
+  if (VO.first)
+    return VO;
 
   // There is potential to make this significantly smarter, but this simple
   // heuristic already gets some interesting cases.
 
   // Can not find suitable value.
-  return nullptr;
+  return None;
 }
 
 bool SCEVExpander::isHighCostExpansionHelper(
     const SCEV *S, Loop *L, const Instruction *At,
     SmallPtrSetImpl<const SCEV *> &Processed) {
 
-  // If we can find an existing value for this scev avaliable at the point "At"
+  // If we can find an existing value for this scev available at the point "At"
   // then consider the expression cheap.
-  if (At && findExistingExpansion(S, At, L) != nullptr)
+  if (At && getRelatedExistingExpansion(S, At, L))
     return false;
 
   // Zero/One operand expressions
@@ -1978,7 +2014,7 @@ bool SCEVExpander::isHighCostExpansionHelper(
     // involving division. This is just a simple search heuristic.
     if (!At)
       At = &ExitingBB->back();
-    if (!findExistingExpansion(
+    if (!getRelatedExistingExpansion(
             SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), At, L))
       return true;
   }
diff --git a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
index 82e65a1f2088..833c6e09f6fd 100644
--- a/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/contrib/llvm/lib/Analysis/ScopedNoAliasAA.cpp
@@ -127,9 +127,8 @@ ModRefInfo ScopedNoAliasAAResult::getModRefInfo(ImmutableCallSite CS1,
   return AAResultBase::getModRefInfo(CS1, CS2);
 }
 
-void ScopedNoAliasAAResult::collectMDInDomain(
-    const MDNode *List, const MDNode *Domain,
-    SmallPtrSetImpl<const MDNode *> &Nodes) const {
+static void collectMDInDomain(const MDNode *List, const MDNode *Domain,
+                              SmallPtrSetImpl<const MDNode *> &Nodes) {
   for (const MDOperand &MDOp : List->operands())
     if (const MDNode *MD = dyn_cast<MDNode>(MDOp))
       if (AliasScopeNode(MD).getDomain() == Domain)
@@ -151,12 +150,14 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
   // We alias unless, for some domain, the set of noalias scopes in that domain
   // is a superset of the set of alias scopes in that domain.
   for (const MDNode *Domain : Domains) {
-    SmallPtrSet<const MDNode *, 16> NANodes, ScopeNodes;
-    collectMDInDomain(NoAlias, Domain, NANodes);
+    SmallPtrSet<const MDNode *, 16> ScopeNodes;
     collectMDInDomain(Scopes, Domain, ScopeNodes);
-    if (!ScopeNodes.size())
+    if (ScopeNodes.empty())
       continue;
 
+    SmallPtrSet<const MDNode *, 16> NANodes;
+    collectMDInDomain(NoAlias, Domain, NANodes);
+
     // To not alias, all of the nodes in ScopeNodes must be in NANodes.
     bool FoundAll = true;
     for (const MDNode *SMD : ScopeNodes)
@@ -172,10 +173,10 @@ bool ScopedNoAliasAAResult::mayAliasInScopes(const MDNode *Scopes,
   return true;
 }
 
-char ScopedNoAliasAA::PassID;
+AnalysisKey ScopedNoAliasAA::Key;
 
 ScopedNoAliasAAResult ScopedNoAliasAA::run(Function &F,
-                                           AnalysisManager<Function> &AM) {
+                                           FunctionAnalysisManager &AM) {
   return ScopedNoAliasAAResult();
 }
 
diff --git a/contrib/llvm/lib/Analysis/StratifiedSets.h b/contrib/llvm/lib/Analysis/StratifiedSets.h
index fd3a241d79c1..772df175b384 100644
--- a/contrib/llvm/lib/Analysis/StratifiedSets.h
+++ b/contrib/llvm/lib/Analysis/StratifiedSets.h
@@ -85,17 +85,8 @@ struct StratifiedLink {
 template <typename T> class StratifiedSets {
 public:
   StratifiedSets() = default;
-
-  // TODO: Figure out how to make MSVC not call the copy ctor here, and delete
-  // it.
-
-  // Can't default these due to compile errors in MSVC2013
-  StratifiedSets(StratifiedSets &&Other) { *this = std::move(Other); }
-  StratifiedSets &operator=(StratifiedSets &&Other) {
-    Values = std::move(Other.Values);
-    Links = std::move(Other.Links);
-    return *this;
-  }
+  StratifiedSets(StratifiedSets &&) = default;
+  StratifiedSets &operator=(StratifiedSets &&) = default;
 
   StratifiedSets(DenseMap<T, StratifiedInfo> Map,
                  std::vector<StratifiedLink> Links)
diff --git a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 93d537ad3abb..112118ab77eb 100644
--- a/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -23,9 +23,10 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
                           "No vector functions library"),
                clEnumValN(TargetLibraryInfoImpl::Accelerate, "Accelerate",
                           "Accelerate framework"),
-               clEnumValEnd));
+               clEnumValN(TargetLibraryInfoImpl::SVML, "SVML",
+                          "Intel SVML library")));
 
-const char *const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
+StringRef const TargetLibraryInfoImpl::StandardNames[LibFunc::NumLibFuncs] = {
 #define TLI_DEFINE_STRING
 #include "llvm/Analysis/TargetLibraryInfo.def"
 };
@@ -52,14 +53,33 @@ static bool hasSinCosPiStret(const Triple &T) {
 /// specified target triple.  This should be carefully written so that a missing
 /// target triple gets a sane set of defaults.
 static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
-                       ArrayRef<const char *> StandardNames) {
+                       ArrayRef<StringRef> StandardNames) {
   // Verify that the StandardNames array is in alphabetical order.
   assert(std::is_sorted(StandardNames.begin(), StandardNames.end(),
-                        [](const char *LHS, const char *RHS) {
-                          return strcmp(LHS, RHS) < 0;
+                        [](StringRef LHS, StringRef RHS) {
+                          return LHS < RHS;
                         }) &&
          "TargetLibraryInfoImpl function names must be sorted");
 
+  bool ShouldExtI32Param = false, ShouldExtI32Return = false,
+       ShouldSignExtI32Param = false;
+  // PowerPC64, Sparc64, SystemZ need signext/zeroext on i32 parameters and
+  // returns corresponding to C-level ints and unsigned ints.
+  if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le ||
+      T.getArch() == Triple::sparcv9 || T.getArch() == Triple::systemz) {
+    ShouldExtI32Param = true;
+    ShouldExtI32Return = true;
+  }
+  // Mips, on the other hand, needs signext on i32 parameters corresponding
+  // to both signed and unsigned ints.
+  if (T.getArch() == Triple::mips || T.getArch() == Triple::mipsel ||
+      T.getArch() == Triple::mips64 || T.getArch() == Triple::mips64el) {
+    ShouldSignExtI32Param = true;
+  }
+  TLI.setShouldExtI32Param(ShouldExtI32Param);
+  TLI.setShouldExtI32Return(ShouldExtI32Return);
+  TLI.setShouldSignExtI32Param(ShouldSignExtI32Param);
+
   if (T.getArch() == Triple::r600 ||
       T.getArch() == Triple::amdgcn) {
     TLI.setUnavailable(LibFunc::ldexp);
@@ -322,6 +342,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     // on Linux.
     //
     // Fall through to disable all of them.
+    LLVM_FALLTHROUGH;
   default:
     TLI.setUnavailable(LibFunc::exp10);
     TLI.setUnavailable(LibFunc::exp10f);
@@ -429,14 +450,19 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(const Triple &T) {
 }
 
 TargetLibraryInfoImpl::TargetLibraryInfoImpl(const TargetLibraryInfoImpl &TLI)
-    : CustomNames(TLI.CustomNames) {
+    : CustomNames(TLI.CustomNames), ShouldExtI32Param(TLI.ShouldExtI32Param),
+      ShouldExtI32Return(TLI.ShouldExtI32Return),
+      ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) {
   memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
   VectorDescs = TLI.VectorDescs;
   ScalarDescs = TLI.ScalarDescs;
 }
 
 TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
-    : CustomNames(std::move(TLI.CustomNames)) {
+    : CustomNames(std::move(TLI.CustomNames)),
+      ShouldExtI32Param(TLI.ShouldExtI32Param),
+      ShouldExtI32Return(TLI.ShouldExtI32Return),
+      ShouldSignExtI32Param(TLI.ShouldSignExtI32Param) {
   std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
             AvailableArray);
   VectorDescs = TLI.VectorDescs;
@@ -445,12 +471,18 @@ TargetLibraryInfoImpl::TargetLibraryInfoImpl(TargetLibraryInfoImpl &&TLI)
 
 TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(const TargetLibraryInfoImpl &TLI) {
   CustomNames = TLI.CustomNames;
+  ShouldExtI32Param = TLI.ShouldExtI32Param;
+  ShouldExtI32Return = TLI.ShouldExtI32Return;
+  ShouldSignExtI32Param = TLI.ShouldSignExtI32Param;
   memcpy(AvailableArray, TLI.AvailableArray, sizeof(AvailableArray));
   return *this;
 }
 
 TargetLibraryInfoImpl &TargetLibraryInfoImpl::operator=(TargetLibraryInfoImpl &&TLI) {
   CustomNames = std::move(TLI.CustomNames);
+  ShouldExtI32Param = TLI.ShouldExtI32Param;
+  ShouldExtI32Return = TLI.ShouldExtI32Return;
+  ShouldSignExtI32Param = TLI.ShouldSignExtI32Param;
   std::move(std::begin(TLI.AvailableArray), std::end(TLI.AvailableArray),
             AvailableArray);
   return *this;
@@ -469,16 +501,16 @@ static StringRef sanitizeFunctionName(StringRef funcName) {
 
 bool TargetLibraryInfoImpl::getLibFunc(StringRef funcName,
                                        LibFunc::Func &F) const {
-  const char *const *Start = &StandardNames[0];
-  const char *const *End = &StandardNames[LibFunc::NumLibFuncs];
+  StringRef const *Start = &StandardNames[0];
+  StringRef const *End = &StandardNames[LibFunc::NumLibFuncs];
 
   funcName = sanitizeFunctionName(funcName);
   if (funcName.empty())
     return false;
 
-  const char *const *I = std::lower_bound(
-      Start, End, funcName, [](const char *LHS, StringRef RHS) {
-        return std::strncmp(LHS, RHS.data(), RHS.size()) < 0;
+  StringRef const *I = std::lower_bound(
+      Start, End, funcName, [](StringRef LHS, StringRef RHS) {
+        return LHS < RHS;
       });
   if (I != End && *I == funcName) {
     F = (LibFunc::Func)(I - Start);
@@ -535,7 +567,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::strcpy:
   case LibFunc::stpcpy:
     return (NumParams == 2 && FTy.getReturnType() == FTy.getParamType(0) &&
@@ -547,7 +579,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::strncpy:
   case LibFunc::stpncpy:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
@@ -640,8 +672,9 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::memcpy:
+  case LibFunc::mempcpy:
   case LibFunc::memmove:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0)->isPointerTy() &&
@@ -652,7 +685,7 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
     --NumParams;
     if (!IsSizeTTy(FTy.getParamType(NumParams)))
       return false;
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case LibFunc::memset:
     return (NumParams == 3 && FTy.getReturnType() == FTy.getParamType(0) &&
             FTy.getParamType(0)->isPointerTy() &&
@@ -843,10 +876,10 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc::stat64:
   case LibFunc::lstat64:
   case LibFunc::statvfs64:
-    return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() &&
+    return (NumParams == 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
   case LibFunc::dunder_isoc99_sscanf:
-    return (NumParams >= 1 && FTy.getParamType(0)->isPointerTy() &&
+    return (NumParams >= 2 && FTy.getParamType(0)->isPointerTy() &&
             FTy.getParamType(1)->isPointerTy());
   case LibFunc::fopen64:
     return (NumParams == 2 && FTy.getReturnType()->isPointerTy() &&
@@ -953,15 +986,18 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc::ffs:
   case LibFunc::ffsl:
   case LibFunc::ffsll:
+  case LibFunc::fls:
+  case LibFunc::flsl:
+  case LibFunc::flsll:
+    return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
+            FTy.getParamType(0)->isIntegerTy());
+
   case LibFunc::isdigit:
   case LibFunc::isascii:
   case LibFunc::toascii:
     return (NumParams == 1 && FTy.getReturnType()->isIntegerTy(32) &&
-            FTy.getParamType(0)->isIntegerTy());
+            FTy.getReturnType() == FTy.getParamType(0));
 
-  case LibFunc::fls:
-  case LibFunc::flsl:
-  case LibFunc::flsll:
   case LibFunc::abs:
   case LibFunc::labs:
   case LibFunc::llabs:
@@ -1004,21 +1040,19 @@ void TargetLibraryInfoImpl::disableAllFunctions() {
 }
 
 static bool compareByScalarFnName(const VecDesc &LHS, const VecDesc &RHS) {
-  return std::strncmp(LHS.ScalarFnName, RHS.ScalarFnName,
-                      std::strlen(RHS.ScalarFnName)) < 0;
+  return LHS.ScalarFnName < RHS.ScalarFnName;
 }
 
 static bool compareByVectorFnName(const VecDesc &LHS, const VecDesc &RHS) {
-  return std::strncmp(LHS.VectorFnName, RHS.VectorFnName,
-                      std::strlen(RHS.VectorFnName)) < 0;
+  return LHS.VectorFnName < RHS.VectorFnName;
 }
 
 static bool compareWithScalarFnName(const VecDesc &LHS, StringRef S) {
-  return std::strncmp(LHS.ScalarFnName, S.data(), S.size()) < 0;
+  return LHS.ScalarFnName < S;
 }
 
 static bool compareWithVectorFnName(const VecDesc &LHS, StringRef S) {
-  return std::strncmp(LHS.VectorFnName, S.data(), S.size()) < 0;
+  return LHS.VectorFnName < S;
 }
 
 void TargetLibraryInfoImpl::addVectorizableFunctions(ArrayRef<VecDesc> Fns) {
@@ -1074,6 +1108,75 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
     addVectorizableFunctions(VecFuncs);
     break;
   }
+  case SVML: {
+    const VecDesc VecFuncs[] = {
+        {"sin", "__svml_sin2", 2},
+        {"sin", "__svml_sin4", 4},
+        {"sin", "__svml_sin8", 8},
+
+        {"sinf", "__svml_sinf4", 4},
+        {"sinf", "__svml_sinf8", 8},
+        {"sinf", "__svml_sinf16", 16},
+
+        {"cos", "__svml_cos2", 2},
+        {"cos", "__svml_cos4", 4},
+        {"cos", "__svml_cos8", 8},
+
+        {"cosf", "__svml_cosf4", 4},
+        {"cosf", "__svml_cosf8", 8},
+        {"cosf", "__svml_cosf16", 16},
+
+        {"pow", "__svml_pow2", 2},
+        {"pow", "__svml_pow4", 4},
+        {"pow", "__svml_pow8", 8},
+
+        {"powf", "__svml_powf4", 4},
+        {"powf", "__svml_powf8", 8},
+        {"powf", "__svml_powf16", 16},
+
+        {"llvm.pow.f64", "__svml_pow2", 2},
+        {"llvm.pow.f64", "__svml_pow4", 4},
+        {"llvm.pow.f64", "__svml_pow8", 8},
+
+        {"llvm.pow.f32", "__svml_powf4", 4},
+        {"llvm.pow.f32", "__svml_powf8", 8},
+        {"llvm.pow.f32", "__svml_powf16", 16},
+
+        {"exp", "__svml_exp2", 2},
+        {"exp", "__svml_exp4", 4},
+        {"exp", "__svml_exp8", 8},
+
+        {"expf", "__svml_expf4", 4},
+        {"expf", "__svml_expf8", 8},
+        {"expf", "__svml_expf16", 16},
+
+        {"llvm.exp.f64", "__svml_exp2", 2},
+        {"llvm.exp.f64", "__svml_exp4", 4},
+        {"llvm.exp.f64", "__svml_exp8", 8},
+
+        {"llvm.exp.f32", "__svml_expf4", 4},
+        {"llvm.exp.f32", "__svml_expf8", 8},
+        {"llvm.exp.f32", "__svml_expf16", 16},
+
+        {"log", "__svml_log2", 2},
+        {"log", "__svml_log4", 4},
+        {"log", "__svml_log8", 8},
+
+        {"logf", "__svml_logf4", 4},
+        {"logf", "__svml_logf8", 8},
+        {"logf", "__svml_logf16", 16},
+
+        {"llvm.log.f64", "__svml_log2", 2},
+        {"llvm.log.f64", "__svml_log4", 4},
+        {"llvm.log.f64", "__svml_log8", 8},
+
+        {"llvm.log.f32", "__svml_logf4", 4},
+        {"llvm.log.f32", "__svml_logf8", 8},
+        {"llvm.log.f32", "__svml_logf16", 16},
+    };
+    addVectorizableFunctions(VecFuncs);
+    break;
+  }
   case NoLibrary:
     break;
   }
@@ -1162,7 +1265,7 @@ TargetLibraryInfoWrapperPass::TargetLibraryInfoWrapperPass(
   initializeTargetLibraryInfoWrapperPassPass(*PassRegistry::getPassRegistry());
 }
 
-char TargetLibraryAnalysis::PassID;
+AnalysisKey TargetLibraryAnalysis::Key;
 
 // Register the basic pass.
 INITIALIZE_PASS(TargetLibraryInfoWrapperPass, "targetlibinfo",
diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
index 52013f796c56..2a15b9b264e3 100644
--- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -150,6 +150,11 @@ int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
   return Cost;
 }
 
+bool TargetTransformInfo::isFoldableMemAccessOffset(Instruction *I,
+                                                    int64_t Offset) const {
+  return TTIImpl->isFoldableMemAccessOffset(I, Offset);
+}
+
 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
   return TTIImpl->isTruncateFree(Ty1, Ty2);
 }
@@ -173,6 +178,9 @@ unsigned TargetTransformInfo::getJumpBufSize() const {
 bool TargetTransformInfo::shouldBuildLookupTables() const {
   return TTIImpl->shouldBuildLookupTables();
 }
+bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const {
+  return TTIImpl->shouldBuildLookupTablesForConstant(C);
+}
 
 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
@@ -186,11 +194,12 @@ bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
   return TTIImpl->isFPVectorizationPotentiallyUnsafe();
 }
 
-bool TargetTransformInfo::allowsMisalignedMemoryAccesses(unsigned BitWidth,
+bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
+                                                         unsigned BitWidth,
                                                          unsigned AddressSpace,
                                                          unsigned Alignment,
                                                          bool *Fast) const {
-  return TTIImpl->allowsMisalignedMemoryAccesses(BitWidth, AddressSpace,
+  return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
                                                  Alignment, Fast);
 }
 
@@ -245,10 +254,6 @@ unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
   return TTIImpl->getRegisterBitWidth(Vector);
 }
 
-unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
-  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
-}
-
 unsigned TargetTransformInfo::getCacheLineSize() const {
   return TTIImpl->getCacheLineSize();
 }
@@ -417,6 +422,44 @@ bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
   return TTIImpl->areInlineCompatible(Caller, Callee);
 }
 
+unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
+  return TTIImpl->getLoadStoreVecRegBitWidth(AS);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
+  return TTIImpl->isLegalToVectorizeLoad(LI);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
+  return TTIImpl->isLegalToVectorizeStore(SI);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeLoadChain(
+    unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
+  return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
+                                              AddrSpace);
+}
+
+bool TargetTransformInfo::isLegalToVectorizeStoreChain(
+    unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
+  return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
+                                               AddrSpace);
+}
+
+unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
+                                                  unsigned LoadSize,
+                                                  unsigned ChainSizeInBytes,
+                                                  VectorType *VecTy) const {
+  return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
+}
+
+unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
+                                                   unsigned StoreSize,
+                                                   unsigned ChainSizeInBytes,
+                                                   VectorType *VecTy) const {
+  return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
+}
+
 TargetTransformInfo::Concept::~Concept() {}
 
 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
@@ -426,11 +469,11 @@ TargetIRAnalysis::TargetIRAnalysis(
     : TTICallback(std::move(TTICallback)) {}
 
 TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
-                                               AnalysisManager<Function> &) {
+                                               FunctionAnalysisManager &) {
   return TTICallback(F);
 }
 
-char TargetIRAnalysis::PassID;
+AnalysisKey TargetIRAnalysis::Key;
 
 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
   return Result(F.getParent()->getDataLayout());
@@ -457,7 +500,7 @@ TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
 }
 
 TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
-  AnalysisManager<Function> DummyFAM;
+  FunctionAnalysisManager DummyFAM;
   TTI = TIRA.run(F, DummyFAM);
   return *TTI;
 }
diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 20d162a03c30..e920c4c4e6b2 100644
--- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -135,34 +135,35 @@ using namespace llvm;
 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true));
 
 namespace {
-/// TBAANode - This is a simple wrapper around an MDNode which provides a
-/// higher-level interface by hiding the details of how alias analysis
-/// information is encoded in its operands.
-class TBAANode {
-  const MDNode *Node;
+/// This is a simple wrapper around an MDNode which provides a higher-level
+/// interface by hiding the details of how alias analysis information is encoded
+/// in its operands.
+template<typename MDNodeTy>
+class TBAANodeImpl {
+  MDNodeTy *Node;
 
 public:
-  TBAANode() : Node(nullptr) {}
-  explicit TBAANode(const MDNode *N) : Node(N) {}
+  TBAANodeImpl() : Node(nullptr) {}
+  explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
 
   /// getNode - Get the MDNode for this TBAANode.
-  const MDNode *getNode() const { return Node; }
+  MDNodeTy *getNode() const { return Node; }
 
   /// getParent - Get this TBAANode's Alias tree parent.
-  TBAANode getParent() const {
+  TBAANodeImpl<MDNodeTy> getParent() const {
     if (Node->getNumOperands() < 2)
-      return TBAANode();
-    MDNode *P = dyn_cast_or_null<MDNode>(Node->getOperand(1));
+      return TBAANodeImpl<MDNodeTy>();
+    MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
     if (!P)
-      return TBAANode();
+      return TBAANodeImpl<MDNodeTy>();
     // Ok, this node has a valid parent. Return it.
-    return TBAANode(P);
+    return TBAANodeImpl<MDNodeTy>(P);
   }
 
-  /// TypeIsImmutable - Test if this TBAANode represents a type for objects
-  /// which are not modified (by any means) in the context where this
+  /// Test if this TBAANode represents a type for objects which are
+  /// not modified (by any means) in the context where this
   /// AliasAnalysis is relevant.
-  bool TypeIsImmutable() const {
+  bool isTypeImmutable() const {
     if (Node->getNumOperands() < 3)
       return false;
     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
@@ -172,32 +173,40 @@ public:
   }
 };
 
+/// \name Specializations of \c TBAANodeImpl for const and non const qualified
+/// \c MDNode.
+/// @{
+typedef TBAANodeImpl<const MDNode> TBAANode;
+typedef TBAANodeImpl<MDNode> MutableTBAANode;
+/// @}
+
 /// This is a simple wrapper around an MDNode which provides a
 /// higher-level interface by hiding the details of how alias analysis
 /// information is encoded in its operands.
-class TBAAStructTagNode {
+template<typename MDNodeTy>
+class TBAAStructTagNodeImpl {
   /// This node should be created with createTBAAStructTagNode.
-  const MDNode *Node;
+  MDNodeTy *Node;
 
 public:
-  explicit TBAAStructTagNode(const MDNode *N) : Node(N) {}
+  explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
 
   /// Get the MDNode for this TBAAStructTagNode.
-  const MDNode *getNode() const { return Node; }
+  MDNodeTy *getNode() const { return Node; }
 
-  const MDNode *getBaseType() const {
+  MDNodeTy *getBaseType() const {
     return dyn_cast_or_null<MDNode>(Node->getOperand(0));
   }
-  const MDNode *getAccessType() const {
+  MDNodeTy *getAccessType() const {
     return dyn_cast_or_null<MDNode>(Node->getOperand(1));
   }
   uint64_t getOffset() const {
     return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
   }
-  /// TypeIsImmutable - Test if this TBAAStructTagNode represents a type for
-  /// objects which are not modified (by any means) in the context where this
+  /// Test if this TBAAStructTagNode represents a type for objects
+  /// which are not modified (by any means) in the context where this
   /// AliasAnalysis is relevant.
-  bool TypeIsImmutable() const {
+  bool isTypeImmutable() const {
     if (Node->getNumOperands() < 4)
       return false;
     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(3));
@@ -207,6 +216,13 @@ public:
   }
 };
 
+/// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
+/// qualified \c MDNods.
+/// @{
+typedef TBAAStructTagNodeImpl<const MDNode> TBAAStructTagNode;
+typedef TBAAStructTagNodeImpl<MDNode> MutableTBAAStructTagNode;
+/// @}
+
 /// This is a simple wrapper around an MDNode which provides a
 /// higher-level interface by hiding the details of how alias analysis
 /// information is encoded in its operands.
@@ -311,8 +327,8 @@ bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
 
   // If this is an "immutable" type, we can assume the pointer is pointing
   // to constant memory.
-  if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
-      (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+  if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
+      (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
     return true;
 
   return AAResultBase::pointsToConstantMemory(Loc, OrLocal);
@@ -328,8 +344,8 @@ TypeBasedAAResult::getModRefBehavior(ImmutableCallSite CS) {
   // If this is an "immutable" type, we can assume the call doesn't write
   // to memory.
   if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa))
-    if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) ||
-        (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable()))
+    if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
+        (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
       Min = FMRB_OnlyReadsMemory;
 
   return FunctionModRefBehavior(AAResultBase::getModRefBehavior(CS) & Min);
@@ -401,34 +417,31 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     return A;
 
   // For struct-path aware TBAA, we use the access type of the tag.
-  bool StructPath = isStructPathTBAA(A) && isStructPathTBAA(B);
-  if (StructPath) {
-    A = cast_or_null<MDNode>(A->getOperand(1));
-    if (!A)
-      return nullptr;
-    B = cast_or_null<MDNode>(B->getOperand(1));
-    if (!B)
-      return nullptr;
-  }
+  assert(isStructPathTBAA(A) && isStructPathTBAA(B) &&
+         "Auto upgrade should have taken care of this!");
+  A = cast_or_null<MDNode>(MutableTBAAStructTagNode(A).getAccessType());
+  if (!A)
+    return nullptr;
+  B = cast_or_null<MDNode>(MutableTBAAStructTagNode(B).getAccessType());
+  if (!B)
+    return nullptr;
 
   SmallSetVector<MDNode *, 4> PathA;
-  MDNode *T = A;
-  while (T) {
-    if (PathA.count(T))
+  MutableTBAANode TA(A);
+  while (TA.getNode()) {
+    if (PathA.count(TA.getNode()))
       report_fatal_error("Cycle found in TBAA metadata.");
-    PathA.insert(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
-                                 : nullptr;
+    PathA.insert(TA.getNode());
+    TA = TA.getParent();
   }
 
   SmallSetVector<MDNode *, 4> PathB;
-  T = B;
-  while (T) {
-    if (PathB.count(T))
+  MutableTBAANode TB(B);
+  while (TB.getNode()) {
+    if (PathB.count(TB.getNode()))
       report_fatal_error("Cycle found in TBAA metadata.");
-    PathB.insert(T);
-    T = T->getNumOperands() >= 2 ? cast_or_null<MDNode>(T->getOperand(1))
-                                 : nullptr;
+    PathB.insert(TB.getNode());
+    TB = TB.getParent();
   }
 
   int IA = PathA.size() - 1;
@@ -443,11 +456,13 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
     --IA;
     --IB;
   }
-  if (!StructPath)
-    return Ret;
 
-  if (!Ret)
+  // We either did not find a match, or the only common base "type" is
+  // the root node.  In either case, we don't have any useful TBAA
+  // metadata to attach.
+  if (!Ret || Ret->getNumOperands() < 2)
     return nullptr;
+
   // We need to convert from a type node to a tag node.
   Type *Int64 = IntegerType::get(A->getContext(), 64);
   Metadata *Ops[3] = {Ret, Ret,
@@ -478,52 +493,8 @@ void Instruction::getAAMetadata(AAMDNodes &N, bool Merge) const {
 /// Aliases - Test whether the type represented by A may alias the
 /// type represented by B.
 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
-  // Make sure that both MDNodes are struct-path aware.
-  if (isStructPathTBAA(A) && isStructPathTBAA(B))
-    return PathAliases(A, B);
-
-  // Keep track of the root node for A and B.
-  TBAANode RootA, RootB;
-
-  // Climb the tree from A to see if we reach B.
-  for (TBAANode T(A);;) {
-    if (T.getNode() == B)
-      // B is an ancestor of A.
-      return true;
-
-    RootA = T;
-    T = T.getParent();
-    if (!T.getNode())
-      break;
-  }
-
-  // Climb the tree from B to see if we reach A.
-  for (TBAANode T(B);;) {
-    if (T.getNode() == A)
-      // A is an ancestor of B.
-      return true;
-
-    RootB = T;
-    T = T.getParent();
-    if (!T.getNode())
-      break;
-  }
-
-  // Neither node is an ancestor of the other.
-
-  // If they have different roots, they're part of different potentially
-  // unrelated type systems, so we must be conservative.
-  if (RootA.getNode() != RootB.getNode())
-    return true;
-
-  // If they have the same root, then we've proved there's no alias.
-  return false;
-}
-
-/// Test whether the struct-path tag represented by A may alias the
-/// struct-path tag represented by B.
-bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
-  // Verify that both input nodes are struct-path aware.
+  // Verify that both input nodes are struct-path aware.  Auto-upgrade should
+  // have taken care of this.
   assert(isStructPathTBAA(A) && "MDNode A is not struct-path aware.");
   assert(isStructPathTBAA(B) && "MDNode B is not struct-path aware.");
 
@@ -583,9 +554,9 @@ bool TypeBasedAAResult::PathAliases(const MDNode *A, const MDNode *B) const {
   return false;
 }
 
-char TypeBasedAA::PassID;
+AnalysisKey TypeBasedAA::Key;
 
-TypeBasedAAResult TypeBasedAA::run(Function &F, AnalysisManager<Function> &AM) {
+TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
   return TypeBasedAAResult();
 }
 
diff --git a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
index 31e2b42075d6..f56754167360 100644
--- a/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
+++ b/contrib/llvm/lib/Analysis/TypeMetadataUtils.cpp
@@ -69,8 +69,7 @@ void llvm::findDevirtualizableCallsForTypeTest(
 
   // Find llvm.assume intrinsics for this llvm.type.test call.
   for (const Use &CIU : CI->uses()) {
-    auto AssumeCI = dyn_cast<CallInst>(CIU.getUser());
-    if (AssumeCI) {
+    if (auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser())) {
       Function *F = AssumeCI->getCalledFunction();
       if (F && F->getIntrinsicID() == Intrinsic::assume)
         Assumes.push_back(AssumeCI);
diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp
index f2b40787443a..2a77baec6c36 100644
--- a/contrib/llvm/lib/Analysis/ValueTracking.cpp
+++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp
@@ -51,6 +51,12 @@ const unsigned MaxDepth = 6;
 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
                                               cl::Hidden, cl::init(20));
 
+// This optimization is known to cause performance regressions is some cases,
+// keep it under a temporary flag for now.
+static cl::opt<bool>
+DontImproveNonNegativePhiBits("dont-improve-non-negative-phi-bits",
+                              cl::Hidden, cl::init(true));
+
 /// Returns the bitwidth of the given scalar or pointer type (if unknown returns
 /// 0). For vector types, returns the element type's bitwidth.
 static unsigned getBitWidth(Type *Ty, const DataLayout &DL) {
@@ -80,7 +86,7 @@ struct Query {
   /// isKnownNonZero, which calls computeKnownBits and ComputeSignBit and
   /// isKnownToBeAPowerOfTwo (all of which can call computeKnownBits), and so
   /// on.
-  std::array<const Value*, MaxDepth> Excluded;
+  std::array<const Value *, MaxDepth> Excluded;
   unsigned NumExcluded;
 
   Query(const DataLayout &DL, AssumptionCache *AC, const Instruction *CxtI,
@@ -119,10 +125,10 @@ static const Instruction *safeCxtI(const Value *V, const Instruction *CxtI) {
   return nullptr;
 }
 
-static void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+static void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                              unsigned Depth, const Query &Q);
 
-void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+void llvm::computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT) {
@@ -130,7 +136,8 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
                      Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
+bool llvm::haveNoCommonBitsSet(const Value *LHS, const Value *RHS,
+                               const DataLayout &DL,
                                AssumptionCache *AC, const Instruction *CxtI,
                                const DominatorTree *DT) {
   assert(LHS->getType() == RHS->getType() &&
@@ -145,10 +152,10 @@ bool llvm::haveNoCommonBitsSet(Value *LHS, Value *RHS, const DataLayout &DL,
   return (LHSKnownZero | RHSKnownZero).isAllOnesValue();
 }
 
-static void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+static void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                            unsigned Depth, const Query &Q);
 
-void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+void llvm::ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                           const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
                           const DominatorTree *DT) {
@@ -156,10 +163,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
                    Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+static bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
                                    const Query &Q);
 
-bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
+bool llvm::isKnownToBeAPowerOfTwo(const Value *V, const DataLayout &DL,
+                                  bool OrZero,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
                                   const DominatorTree *DT) {
@@ -167,15 +175,16 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, const DataLayout &DL, bool OrZero,
                                   Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q);
+static bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q);
 
-bool llvm::isKnownNonZero(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNonZero(const Value *V, const DataLayout &DL, unsigned Depth,
                           AssumptionCache *AC, const Instruction *CxtI,
                           const DominatorTree *DT) {
   return ::isKnownNonZero(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNonNegative(const Value *V, const DataLayout &DL,
+                              unsigned Depth,
                               AssumptionCache *AC, const Instruction *CxtI,
                               const DominatorTree *DT) {
   bool NonNegative, Negative;
@@ -183,7 +192,7 @@ bool llvm::isKnownNonNegative(Value *V, const DataLayout &DL, unsigned Depth,
   return NonNegative;
 }
 
-bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownPositive(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
                            const DominatorTree *DT) {
   if (auto *CI = dyn_cast<ConstantInt>(V))
@@ -195,7 +204,7 @@ bool llvm::isKnownPositive(Value *V, const DataLayout &DL, unsigned Depth,
     isKnownNonZero(V, DL, Depth, AC, CxtI, DT);
 }
 
-bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth,
+bool llvm::isKnownNegative(const Value *V, const DataLayout &DL, unsigned Depth,
                            AssumptionCache *AC, const Instruction *CxtI,
                            const DominatorTree *DT) {
   bool NonNegative, Negative;
@@ -203,41 +212,45 @@ bool llvm::isKnownNegative(Value *V, const DataLayout &DL, unsigned Depth,
   return Negative;
 }
 
-static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q);
+static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q);
 
-bool llvm::isKnownNonEqual(Value *V1, Value *V2, const DataLayout &DL,
-                          AssumptionCache *AC, const Instruction *CxtI,
-                          const DominatorTree *DT) {
+bool llvm::isKnownNonEqual(const Value *V1, const Value *V2,
+                           const DataLayout &DL,
+                           AssumptionCache *AC, const Instruction *CxtI,
+                           const DominatorTree *DT) {
   return ::isKnownNonEqual(V1, V2, Query(DL, AC,
                                          safeCxtI(V1, safeCxtI(V2, CxtI)),
                                          DT));
 }
 
-static bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
+static bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
                               const Query &Q);
 
-bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout &DL,
+bool llvm::MaskedValueIsZero(const Value *V, const APInt &Mask,
+                             const DataLayout &DL,
                              unsigned Depth, AssumptionCache *AC,
                              const Instruction *CxtI, const DominatorTree *DT) {
   return ::MaskedValueIsZero(V, Mask, Depth,
                              Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q);
+static unsigned ComputeNumSignBits(const Value *V, unsigned Depth,
+                                   const Query &Q);
 
-unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout &DL,
+unsigned llvm::ComputeNumSignBits(const Value *V, const DataLayout &DL,
                                   unsigned Depth, AssumptionCache *AC,
                                   const Instruction *CxtI,
                                   const DominatorTree *DT) {
   return ::ComputeNumSignBits(V, Depth, Query(DL, AC, safeCxtI(V, CxtI), DT));
 }
 
-static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
+static void computeKnownBitsAddSub(bool Add, const Value *Op0, const Value *Op1,
+                                   bool NSW,
                                    APInt &KnownZero, APInt &KnownOne,
                                    APInt &KnownZero2, APInt &KnownOne2,
                                    unsigned Depth, const Query &Q) {
   if (!Add) {
-    if (ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
+    if (const ConstantInt *CLHS = dyn_cast<ConstantInt>(Op0)) {
       // We know that the top bits of C-X are clear if X contains less bits
       // than C (i.e. no wrap-around can happen).  For example, 20-X is
       // positive if we can prove that X is >= 0 and < 16.
@@ -311,7 +324,7 @@ static void computeKnownBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
   }
 }
 
-static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW,
+static void computeKnownBitsMul(const Value *Op0, const Value *Op1, bool NSW,
                                 APInt &KnownZero, APInt &KnownOne,
                                 APInt &KnownZero2, APInt &KnownOne2,
                                 unsigned Depth, const Query &Q) {
@@ -398,7 +411,7 @@ void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges,
   }
 }
 
-static bool isEphemeralValueOf(Instruction *I, const Value *E) {
+static bool isEphemeralValueOf(const Instruction *I, const Value *E) {
   SmallVector<const Value *, 16> WorkSet(1, I);
   SmallPtrSet<const Value *, 32> Visited;
   SmallPtrSet<const Value *, 16> EphValues;
@@ -406,7 +419,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
   // The instruction defining an assumption's condition itself is always
   // considered ephemeral to that assumption (even if it has other
   // non-ephemeral users). See r246696's test case for an example.
-  if (std::find(I->op_begin(), I->op_end(), E) != I->op_end())
+  if (is_contained(I->operands(), E))
     return true;
 
   while (!WorkSet.empty()) {
@@ -415,8 +428,7 @@ static bool isEphemeralValueOf(Instruction *I, const Value *E) {
       continue;
 
     // If all uses of this value are ephemeral, then so is this value.
-    if (std::all_of(V->user_begin(), V->user_end(),
-                    [&](const User *U) { return EphValues.count(U); })) {
+    if (all_of(V->users(), [&](const User *U) { return EphValues.count(U); })) {
       if (V == E)
         return true;
 
@@ -456,9 +468,9 @@ static bool isAssumeLikeIntrinsic(const Instruction *I) {
   return false;
 }
 
-static bool isValidAssumeForContext(Value *V, const Instruction *CxtI,
-                                    const DominatorTree *DT) {
-  Instruction *Inv = cast<Instruction>(V);
+bool llvm::isValidAssumeForContext(const Instruction *Inv,
+                                   const Instruction *CxtI,
+                                   const DominatorTree *DT) {
 
   // There are two restrictions on the use of an assume:
   //  1. The assume must dominate the context (or the control flow must
@@ -469,54 +481,42 @@ static bool isValidAssumeForContext(Value *V, const Instruction *CxtI,
   //     the assume).
 
   if (DT) {
-    if (DT->dominates(Inv, CxtI)) {
+    if (DT->dominates(Inv, CxtI))
       return true;
-    } else if (Inv->getParent() == CxtI->getParent()) {
-      // The context comes first, but they're both in the same block. Make sure
-      // there is nothing in between that might interrupt the control flow.
-      for (BasicBlock::const_iterator I =
-             std::next(BasicBlock::const_iterator(CxtI)),
-                                      IE(Inv); I != IE; ++I)
-        if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
-          return false;
-
-      return !isEphemeralValueOf(Inv, CxtI);
-    }
+  } else if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
+    // We don't have a DT, but this trivially dominates.
+    return true;
+  }
 
+  // With or without a DT, the only remaining case we will check is if the
+  // instructions are in the same BB.  Give up if that is not the case.
+  if (Inv->getParent() != CxtI->getParent())
     return false;
-  }
 
-  // When we don't have a DT, we do a limited search...
-  if (Inv->getParent() == CxtI->getParent()->getSinglePredecessor()) {
-    return true;
-  } else if (Inv->getParent() == CxtI->getParent()) {
+  // If we have a dom tree, then we now know that the assume doens't dominate
+  // the other instruction.  If we don't have a dom tree then we can check if
+  // the assume is first in the BB.
+  if (!DT) {
     // Search forward from the assume until we reach the context (or the end
     // of the block); the common case is that the assume will come first.
-    for (BasicBlock::iterator I = std::next(BasicBlock::iterator(Inv)),
+    for (auto I = std::next(BasicBlock::const_iterator(Inv)),
          IE = Inv->getParent()->end(); I != IE; ++I)
       if (&*I == CxtI)
         return true;
-
-    // The context must come first...
-    for (BasicBlock::const_iterator I =
-           std::next(BasicBlock::const_iterator(CxtI)),
-                                    IE(Inv); I != IE; ++I)
-      if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
-        return false;
-
-    return !isEphemeralValueOf(Inv, CxtI);
   }
 
-  return false;
-}
+  // The context comes first, but they're both in the same block. Make sure
+  // there is nothing in between that might interrupt the control flow.
+  for (BasicBlock::const_iterator I =
+         std::next(BasicBlock::const_iterator(CxtI)), IE(Inv);
+       I != IE; ++I)
+    if (!isSafeToSpeculativelyExecute(&*I) && !isAssumeLikeIntrinsic(&*I))
+      return false;
 
-bool llvm::isValidAssumeForContext(const Instruction *I,
-                                   const Instruction *CxtI,
-                                   const DominatorTree *DT) {
-  return ::isValidAssumeForContext(const_cast<Instruction *>(I), CxtI, DT);
+  return !isEphemeralValueOf(Inv, CxtI);
 }
 
-static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
+static void computeKnownBitsFromAssume(const Value *V, APInt &KnownZero,
                                        APInt &KnownOne, unsigned Depth,
                                        const Query &Q) {
   // Use of assumptions is context-sensitive. If we don't have a context, we
@@ -788,11 +788,11 @@ static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
 // shift amount, compute the implied known-zero or known-one bits of the shift
 // operator's result respectively for that shift amount. The results from calling
 // KZF and KOF are conservatively combined for all permitted shift amounts.
-template <typename KZFunctor, typename KOFunctor>
-static void computeKnownBitsFromShiftOperator(Operator *I,
-              APInt &KnownZero, APInt &KnownOne,
-              APInt &KnownZero2, APInt &KnownOne2,
-              unsigned Depth, const Query &Q, KZFunctor KZF, KOFunctor KOF) {
+static void computeKnownBitsFromShiftOperator(
+    const Operator *I, APInt &KnownZero, APInt &KnownOne, APInt &KnownZero2,
+    APInt &KnownOne2, unsigned Depth, const Query &Q,
+    function_ref<APInt(const APInt &, unsigned)> KZF,
+    function_ref<APInt(const APInt &, unsigned)> KOF) {
   unsigned BitWidth = KnownZero.getBitWidth();
 
   if (auto *SA = dyn_cast<ConstantInt>(I->getOperand(1))) {
@@ -801,6 +801,14 @@ static void computeKnownBitsFromShiftOperator(Operator *I,
     computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
     KnownZero = KZF(KnownZero, ShiftAmt);
     KnownOne  = KOF(KnownOne, ShiftAmt);
+    // If there is conflict between KnownZero and KnownOne, this must be an
+    // overflowing left shift, so the shift result is undefined. Clear KnownZero
+    // and KnownOne bits so that other code could propagate this undef.
+    if ((KnownZero & KnownOne) != 0) {
+      KnownZero.clearAllBits();
+      KnownOne.clearAllBits();
+    }
+
     return;
   }
 
@@ -866,7 +874,7 @@ static void computeKnownBitsFromShiftOperator(Operator *I,
   }
 }
 
-static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
+static void computeKnownBitsFromOperator(const Operator *I, APInt &KnownZero,
                                          APInt &KnownOne, unsigned Depth,
                                          const Query &Q) {
   unsigned BitWidth = KnownZero.getBitWidth();
@@ -950,14 +958,64 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ);
     break;
   }
-  case Instruction::Select:
+  case Instruction::Select: {
     computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
     computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
 
+    const Value *LHS;
+    const Value *RHS;
+    SelectPatternFlavor SPF = matchSelectPattern(I, LHS, RHS).Flavor;
+    if (SelectPatternResult::isMinOrMax(SPF)) {
+      computeKnownBits(RHS, KnownZero, KnownOne, Depth + 1, Q);
+      computeKnownBits(LHS, KnownZero2, KnownOne2, Depth + 1, Q);
+    } else {
+      computeKnownBits(I->getOperand(2), KnownZero, KnownOne, Depth + 1, Q);
+      computeKnownBits(I->getOperand(1), KnownZero2, KnownOne2, Depth + 1, Q);
+    }
+
+    unsigned MaxHighOnes = 0;
+    unsigned MaxHighZeros = 0;
+    if (SPF == SPF_SMAX) {
+      // If both sides are negative, the result is negative.
+      if (KnownOne[BitWidth - 1] && KnownOne2[BitWidth - 1])
+        // We can derive a lower bound on the result by taking the max of the
+        // leading one bits.
+        MaxHighOnes =
+            std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+      // If either side is non-negative, the result is non-negative.
+      else if (KnownZero[BitWidth - 1] || KnownZero2[BitWidth - 1])
+        MaxHighZeros = 1;
+    } else if (SPF == SPF_SMIN) {
+      // If both sides are non-negative, the result is non-negative.
+      if (KnownZero[BitWidth - 1] && KnownZero2[BitWidth - 1])
+        // We can derive an upper bound on the result by taking the max of the
+        // leading zero bits.
+        MaxHighZeros = std::max(KnownZero.countLeadingOnes(),
+                                KnownZero2.countLeadingOnes());
+      // If either side is negative, the result is negative.
+      else if (KnownOne[BitWidth - 1] || KnownOne2[BitWidth - 1])
+        MaxHighOnes = 1;
+    } else if (SPF == SPF_UMAX) {
+      // We can derive a lower bound on the result by taking the max of the
+      // leading one bits.
+      MaxHighOnes =
+          std::max(KnownOne.countLeadingOnes(), KnownOne2.countLeadingOnes());
+    } else if (SPF == SPF_UMIN) {
+      // We can derive an upper bound on the result by taking the max of the
+      // leading zero bits.
+      MaxHighZeros =
+          std::max(KnownZero.countLeadingOnes(), KnownZero2.countLeadingOnes());
+    }
+
     // Only known if known in both the LHS and RHS.
     KnownOne &= KnownOne2;
     KnownZero &= KnownZero2;
+    if (MaxHighOnes > 0)
+      KnownOne |= APInt::getHighBitsSet(BitWidth, MaxHighOnes);
+    if (MaxHighZeros > 0)
+      KnownZero |= APInt::getHighBitsSet(BitWidth, MaxHighZeros);
     break;
+  }
   case Instruction::FPTrunc:
   case Instruction::FPExt:
   case Instruction::FPToUI:
@@ -967,8 +1025,8 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     break; // Can't work with floating point.
   case Instruction::PtrToInt:
   case Instruction::IntToPtr:
-  case Instruction::AddrSpaceCast: // Pointers could be different sizes.
-    // FALL THROUGH and handle them the same as zext/trunc.
+    // Fall through and handle them the same as zext/trunc.
+    LLVM_FALLTHROUGH;
   case Instruction::ZExt:
   case Instruction::Trunc: {
     Type *SrcTy = I->getOperand(0)->getType();
@@ -1020,13 +1078,23 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   }
   case Instruction::Shl: {
     // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    auto KZF = [BitWidth](const APInt &KnownZero, unsigned ShiftAmt) {
-      return (KnownZero << ShiftAmt) |
-             APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    auto KZF = [BitWidth, NSW](const APInt &KnownZero, unsigned ShiftAmt) {
+      APInt KZResult =
+          (KnownZero << ShiftAmt) |
+          APInt::getLowBitsSet(BitWidth, ShiftAmt); // Low bits known 0.
+      // If this shift has "nsw" keyword, then the result is either a poison
+      // value or has the same sign bit as the first operand.
+      if (NSW && KnownZero.isNegative())
+        KZResult.setBit(BitWidth - 1);
+      return KZResult;
     };
 
-    auto KOF = [BitWidth](const APInt &KnownOne, unsigned ShiftAmt) {
-      return KnownOne << ShiftAmt;
+    auto KOF = [BitWidth, NSW](const APInt &KnownOne, unsigned ShiftAmt) {
+      APInt KOResult = KnownOne << ShiftAmt;
+      if (NSW && KnownOne.isNegative())
+        KOResult.setBit(BitWidth - 1);
+      return KOResult;
     };
 
     computeKnownBitsFromShiftOperator(I, KnownZero, KnownOne,
@@ -1143,7 +1211,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
   }
 
   case Instruction::Alloca: {
-    AllocaInst *AI = cast<AllocaInst>(I);
+    const AllocaInst *AI = cast<AllocaInst>(I);
     unsigned Align = AI->getAlignment();
     if (Align == 0)
       Align = Q.DL.getABITypeAlignment(AI->getAllocatedType());
@@ -1163,7 +1231,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     gep_type_iterator GTI = gep_type_begin(I);
     for (unsigned i = 1, e = I->getNumOperands(); i != e; ++i, ++GTI) {
       Value *Index = I->getOperand(i);
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         // Handle struct member offset arithmetic.
 
         // Handle case when index is vector zeroinitializer
@@ -1200,7 +1268,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     break;
   }
   case Instruction::PHI: {
-    PHINode *P = cast<PHINode>(I);
+    const PHINode *P = cast<PHINode>(I);
     // Handle the case of a simple two-predecessor recurrence PHI.
     // There's a lot more that could theoretically be done here, but
     // this is sufficient to catch some interesting cases.
@@ -1237,9 +1305,46 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
           APInt KnownZero3(KnownZero), KnownOne3(KnownOne);
           computeKnownBits(L, KnownZero3, KnownOne3, Depth + 1, Q);
 
-          KnownZero = APInt::getLowBitsSet(BitWidth,
-                                           std::min(KnownZero2.countTrailingOnes(),
-                                                    KnownZero3.countTrailingOnes()));
+          KnownZero = APInt::getLowBitsSet(
+              BitWidth, std::min(KnownZero2.countTrailingOnes(),
+                                 KnownZero3.countTrailingOnes()));
+
+          if (DontImproveNonNegativePhiBits)
+            break;
+
+          auto *OverflowOp = dyn_cast<OverflowingBinaryOperator>(LU);
+          if (OverflowOp && OverflowOp->hasNoSignedWrap()) {
+            // If initial value of recurrence is nonnegative, and we are adding
+            // a nonnegative number with nsw, the result can only be nonnegative
+            // or poison value regardless of the number of times we execute the
+            // add in phi recurrence. If initial value is negative and we are
+            // adding a negative number with nsw, the result can only be
+            // negative or poison value. Similar arguments apply to sub and mul.
+            //
+            // (add non-negative, non-negative) --> non-negative
+            // (add negative, negative) --> negative
+            if (Opcode == Instruction::Add) {
+              if (KnownZero2.isNegative() && KnownZero3.isNegative())
+                KnownZero.setBit(BitWidth - 1);
+              else if (KnownOne2.isNegative() && KnownOne3.isNegative())
+                KnownOne.setBit(BitWidth - 1);
+            }
+
+            // (sub nsw non-negative, negative) --> non-negative
+            // (sub nsw negative, non-negative) --> negative
+            else if (Opcode == Instruction::Sub && LL == I) {
+              if (KnownZero2.isNegative() && KnownOne3.isNegative())
+                KnownZero.setBit(BitWidth - 1);
+              else if (KnownOne2.isNegative() && KnownZero3.isNegative())
+                KnownOne.setBit(BitWidth - 1);
+            }
+
+            // (mul nsw non-negative, non-negative) --> non-negative
+            else if (Opcode == Instruction::Mul && KnownZero2.isNegative() &&
+                     KnownZero3.isNegative())
+              KnownZero.setBit(BitWidth - 1);
+          }
+
           break;
         }
       }
@@ -1284,12 +1389,12 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
     // function.
     if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range))
       computeKnownBitsFromRangeMetadata(*MD, KnownZero, KnownOne);
-    if (Value *RV = CallSite(I).getReturnedArgOperand()) {
+    if (const Value *RV = ImmutableCallSite(I).getReturnedArgOperand()) {
       computeKnownBits(RV, KnownZero2, KnownOne2, Depth + 1, Q);
       KnownZero |= KnownZero2;
       KnownOne |= KnownOne2;
     }
-    if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+    if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
       switch (II->getIntrinsicID()) {
       default: break;
       case Intrinsic::bswap:
@@ -1326,9 +1431,16 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
       }
     }
     break;
+  case Instruction::ExtractElement:
+    // Look through extract element. At the moment we keep this simple and skip
+    // tracking the specific element. But at least we might find information
+    // valid for all elements of the vector (for example if vector is sign
+    // extended, shifted, etc).
+    computeKnownBits(I->getOperand(0), KnownZero, KnownOne, Depth + 1, Q);
+    break;
   case Instruction::ExtractValue:
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I->getOperand(0))) {
-      ExtractValueInst *EVI = cast<ExtractValueInst>(I);
+      const ExtractValueInst *EVI = cast<ExtractValueInst>(I);
       if (EVI->getNumIndices() != 1) break;
       if (EVI->getIndices()[0] == 0) {
         switch (II->getIntrinsicID()) {
@@ -1372,7 +1484,7 @@ static void computeKnownBitsFromOperator(Operator *I, APInt &KnownZero,
 /// where V is a vector, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
+void computeKnownBits(const Value *V, APInt &KnownZero, APInt &KnownOne,
                       unsigned Depth, const Query &Q) {
   assert(V && "No Value?");
   assert(Depth <= MaxDepth && "Limit Search Depth");
@@ -1388,9 +1500,10 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
          KnownOne.getBitWidth() == BitWidth &&
          "V, KnownOne and KnownZero should have same BitWidth");
 
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
-    // We know all of the bits for a constant!
-    KnownOne = CI->getValue();
+  const APInt *C;
+  if (match(V, m_APInt(C))) {
+    // We know all of the bits for a scalar constant or a splat vector constant!
+    KnownOne = *C;
     KnownZero = ~KnownOne;
     return;
   }
@@ -1402,7 +1515,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   }
   // Handle a constant vector by taking the intersection of the known bits of
   // each element.
-  if (ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
+  if (const ConstantDataSequential *CDS = dyn_cast<ConstantDataSequential>(V)) {
     // We know that CDS must be a vector of integers. Take the intersection of
     // each element.
     KnownZero.setAllBits(); KnownOne.setAllBits();
@@ -1415,7 +1528,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
     return;
   }
 
-  if (auto *CV = dyn_cast<ConstantVector>(V)) {
+  if (const auto *CV = dyn_cast<ConstantVector>(V)) {
     // We know that CV must be a vector of integers. Take the intersection of
     // each element.
     KnownZero.setAllBits(); KnownOne.setAllBits();
@@ -1438,6 +1551,14 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
   // Start out not knowing anything.
   KnownZero.clearAllBits(); KnownOne.clearAllBits();
 
+  // We can't imply anything about undefs.
+  if (isa<UndefValue>(V))
+    return;
+
+  // There's no point in looking through other users of ConstantData for
+  // assumptions.  Confirm that we've handled them all.
+  assert(!isa<ConstantData>(V) && "Unhandled constant data!");
+
   // Limit search depth.
   // All recursive calls that increase depth must come after this.
   if (Depth == MaxDepth)
@@ -1445,13 +1566,13 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
   // A weak GlobalAlias is totally unknown. A non-weak GlobalAlias has
   // the bits of its aliasee.
-  if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) {
     if (!GA->isInterposable())
       computeKnownBits(GA->getAliasee(), KnownZero, KnownOne, Depth + 1, Q);
     return;
   }
 
-  if (Operator *I = dyn_cast<Operator>(V))
+  if (const Operator *I = dyn_cast<Operator>(V))
     computeKnownBitsFromOperator(I, KnownZero, KnownOne, Depth, Q);
 
   // Aligned pointers have trailing zeros - refine KnownZero set
@@ -1472,7 +1593,7 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
 
 /// Determine whether the sign bit is known to be zero or one.
 /// Convenience wrapper around computeKnownBits.
-void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
+void ComputeSignBit(const Value *V, bool &KnownZero, bool &KnownOne,
                     unsigned Depth, const Query &Q) {
   unsigned BitWidth = getBitWidth(V->getType(), Q.DL);
   if (!BitWidth) {
@@ -1491,9 +1612,9 @@ void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
 /// bit set when defined. For vectors return true if every element is known to
 /// be a power of two when defined. Supports values with integer or pointer
 /// types and vectors of integers.
-bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
+bool isKnownToBeAPowerOfTwo(const Value *V, bool OrZero, unsigned Depth,
                             const Query &Q) {
-  if (Constant *C = dyn_cast<Constant>(V)) {
+  if (const Constant *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
       return OrZero;
 
@@ -1523,10 +1644,10 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
                  match(V, m_LShr(m_Value(X), m_Value()))))
     return isKnownToBeAPowerOfTwo(X, /*OrZero*/ true, Depth, Q);
 
-  if (ZExtInst *ZI = dyn_cast<ZExtInst>(V))
+  if (const ZExtInst *ZI = dyn_cast<ZExtInst>(V))
     return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth, Q);
 
-  if (SelectInst *SI = dyn_cast<SelectInst>(V))
+  if (const SelectInst *SI = dyn_cast<SelectInst>(V))
     return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth, Q) &&
            isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth, Q);
 
@@ -1544,7 +1665,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
   // Adding a power-of-two or zero to the same power-of-two or zero yields
   // either the original power-of-two, a larger power-of-two or zero.
   if (match(V, m_Add(m_Value(X), m_Value(Y)))) {
-    OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
+    const OverflowingBinaryOperator *VOBO = cast<OverflowingBinaryOperator>(V);
     if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) {
       if (match(X, m_And(m_Specific(Y), m_Value())) ||
           match(X, m_And(m_Value(), m_Specific(Y))))
@@ -1590,7 +1711,7 @@ bool isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth,
 /// to be non-null.
 ///
 /// Currently this routine does not support vector GEPs.
-static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
+static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth,
                               const Query &Q) {
   if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0)
     return false;
@@ -1609,7 +1730,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
   for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP);
        GTI != GTE; ++GTI) {
     // Struct types are easy -- they must always be indexed by a constant.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       ConstantInt *OpC = cast<ConstantInt>(GTI.getOperand());
       unsigned ElementIdx = OpC->getZExtValue();
       const StructLayout *SL = Q.DL.getStructLayout(STy);
@@ -1649,7 +1770,7 @@ static bool isGEPKnownNonNull(GEPOperator *GEP, unsigned Depth,
 /// Does the 'Range' metadata (which must be a valid MD_range operand list)
 /// ensure that the value it's attached to is never Value?  'RangeType' is
 /// is the type of the value described by the range.
-static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) {
+static bool rangeMetadataExcludesValue(const MDNode* Ranges, const APInt& Value) {
   const unsigned NumRanges = Ranges->getNumOperands() / 2;
   assert(NumRanges >= 1);
   for (unsigned i = 0; i < NumRanges; ++i) {
@@ -1668,7 +1789,7 @@ static bool rangeMetadataExcludesValue(MDNode* Ranges, const APInt& Value) {
 /// For vectors return true if every element is known to be non-zero when
 /// defined. Supports values with integer or pointer type and vectors of
 /// integers.
-bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
+bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) {
   if (auto *C = dyn_cast<Constant>(V)) {
     if (C->isNullValue())
       return false;
@@ -1712,7 +1833,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   if (V->getType()->isPointerTy()) {
     if (isKnownNonNull(V))
       return true;
-    if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
+    if (const GEPOperator *GEP = dyn_cast<GEPOperator>(V))
       if (isGEPKnownNonNull(GEP, Depth, Q))
         return true;
   }
@@ -1732,7 +1853,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   // if the lowest bit is shifted off the end.
   if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) {
     // shl nuw can't remove any non-zero bits.
-    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     if (BO->hasNoUnsignedWrap())
       return isKnownNonZero(X, Depth, Q);
 
@@ -1746,7 +1867,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   // defined if the sign bit is shifted off the end.
   else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) {
     // shr exact can only shift out zero bits.
-    PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
+    const PossiblyExactOperator *BO = cast<PossiblyExactOperator>(V);
     if (BO->isExact())
       return isKnownNonZero(X, Depth, Q);
 
@@ -1817,7 +1938,7 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
   }
   // X * Y.
   else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) {
-    OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
+    const OverflowingBinaryOperator *BO = cast<OverflowingBinaryOperator>(V);
     // If X and Y are non-zero then so is X * Y as long as the multiplication
     // does not overflow.
     if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) &&
@@ -1825,13 +1946,13 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
       return true;
   }
   // (C ? X : Y) != 0 if X != 0 and Y != 0.
-  else if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+  else if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
     if (isKnownNonZero(SI->getTrueValue(), Depth, Q) &&
         isKnownNonZero(SI->getFalseValue(), Depth, Q))
       return true;
   }
   // PHI
-  else if (PHINode *PN = dyn_cast<PHINode>(V)) {
+  else if (const PHINode *PN = dyn_cast<PHINode>(V)) {
     // Try and detect a recurrence that monotonically increases from a
     // starting value, as these are common as induction variables.
     if (PN->getNumIncomingValues() == 2) {
@@ -1865,8 +1986,8 @@ bool isKnownNonZero(Value *V, unsigned Depth, const Query &Q) {
 }
 
 /// Return true if V2 == V1 + X, where X is known non-zero.
-static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) {
-  BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
+static bool isAddOfNonZero(const Value *V1, const Value *V2, const Query &Q) {
+  const BinaryOperator *BO = dyn_cast<BinaryOperator>(V1);
   if (!BO || BO->getOpcode() != Instruction::Add)
     return false;
   Value *Op = nullptr;
@@ -1880,7 +2001,7 @@ static bool isAddOfNonZero(Value *V1, Value *V2, const Query &Q) {
 }
 
 /// Return true if it is known that V1 != V2.
-static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) {
+static bool isKnownNonEqual(const Value *V1, const Value *V2, const Query &Q) {
   if (V1->getType()->isVectorTy() || V1 == V2)
     return false;
   if (V1->getType() != V2->getType())
@@ -1916,7 +2037,7 @@ static bool isKnownNonEqual(Value *V1, Value *V2, const Query &Q) {
 /// where V is a vector, the mask, known zero, and known one values are the
 /// same width as the vector element, and the bit is set only if it is true
 /// for all of the elements in the vector.
-bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
+bool MaskedValueIsZero(const Value *V, const APInt &Mask, unsigned Depth,
                        const Query &Q) {
   APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0);
   computeKnownBits(V, KnownZero, KnownOne, Depth, Q);
@@ -1927,8 +2048,9 @@ bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth,
 /// minimum number of sign bits. Return 0 if the value is not a vector constant
 /// or if any element was not analyzed; otherwise, return the count for the
 /// element with the minimum number of sign bits.
-static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) {
-  auto *CV = dyn_cast<Constant>(V);
+static unsigned computeNumSignBitsVectorConstant(const Value *V,
+                                                 unsigned TyBits) {
+  const auto *CV = dyn_cast<Constant>(V);
   if (!CV || !CV->getType()->isVectorTy())
     return 0;
 
@@ -1956,7 +2078,7 @@ static unsigned computeNumSignBitsVectorConstant(Value *V, unsigned TyBits) {
 /// after an "ashr X, 2", we know that the top 3 bits are all equal to each
 /// other, so we return 3. For vectors, return the number of sign bits for the
 /// vector element with the mininum number of known sign bits.
-unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
+unsigned ComputeNumSignBits(const Value *V, unsigned Depth, const Query &Q) {
   unsigned TyBits = Q.DL.getTypeSizeInBits(V->getType()->getScalarType());
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
@@ -1964,10 +2086,10 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
   // Note that ConstantInt is handled by the general computeKnownBits case
   // below.
 
-  if (Depth == 6)
+  if (Depth == MaxDepth)
     return 1;  // Limit search depth.
 
-  Operator *U = dyn_cast<Operator>(V);
+  const Operator *U = dyn_cast<Operator>(V);
   switch (Operator::getOpcode(V)) {
   default: break;
   case Instruction::SExt:
@@ -2125,7 +2247,7 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
     return std::min(Tmp, Tmp2)-1;
 
   case Instruction::PHI: {
-    PHINode *PN = cast<PHINode>(U);
+    const PHINode *PN = cast<PHINode>(U);
     unsigned NumIncomingValues = PN->getNumIncomingValues();
     // Don't analyze large in-degree PHIs.
     if (NumIncomingValues > 4) break;
@@ -2147,6 +2269,13 @@ unsigned ComputeNumSignBits(Value *V, unsigned Depth, const Query &Q) {
     // FIXME: it's tricky to do anything useful for this, but it is an important
     // case for targets like X86.
     break;
+
+  case Instruction::ExtractElement:
+    // Look through extract element. At the moment we keep this simple and skip
+    // tracking the specific element. But at least we might find information
+    // valid for all elements of the vector (for example if vector is sign
+    // extended, shifted, etc).
+    return ComputeNumSignBits(U->getOperand(0), Depth + 1, Q);
   }
 
   // Finally, if we can prove that the top bits of the result are 0's or 1's,
@@ -2416,7 +2545,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, const TargetLibraryInfo *TLI,
   // FIXME: Magic number! At the least, this should be given a name because it's
   // used similarly in CannotBeOrderedLessThanZero(). A better fix may be to
   // expose it as a parameter, so it can be used for testing / experimenting.
-  if (Depth == 6)
+  if (Depth == MaxDepth)
     return false;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
@@ -2463,7 +2592,7 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
   // FIXME: Magic number! At the least, this should be given a name because it's
   // used similarly in CannotBeNegativeZero(). A better fix may be to
   // expose it as a parameter, so it can be used for testing / experimenting.
-  if (Depth == 6)
+  if (Depth == MaxDepth)
     return false;  // Limit search depth.
 
   const Operator *I = dyn_cast<Operator>(V);
@@ -2478,7 +2607,7 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V,
     // x*x is always non-negative or a NaN.
     if (I->getOperand(0) == I->getOperand(1))
       return true;
-    // Fall through
+    LLVM_FALLTHROUGH;
   case Instruction::FAdd:
   case Instruction::FDiv:
   case Instruction::FRem:
@@ -2768,11 +2897,17 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
       break;
 
     if (GEPOperator *GEP = dyn_cast<GEPOperator>(Ptr)) {
-      APInt GEPOffset(BitWidth, 0);
+      // If one of the values we have visited is an addrspacecast, then
+      // the pointer type of this GEP may be different from the type
+      // of the Ptr parameter which was passed to this function.  This
+      // means when we construct GEPOffset, we need to use the size
+      // of GEP's pointer type rather than the size of the original
+      // pointer type.
+      APInt GEPOffset(DL.getPointerTypeSizeInBits(Ptr->getType()), 0);
       if (!GEP->accumulateConstantOffset(DL, GEPOffset))
         break;
 
-      ByteOffset += GEPOffset;
+      ByteOffset += GEPOffset.getSExtValue();
 
       Ptr = GEP->getPointerOperand();
     } else if (Operator::getOpcode(Ptr) == Instruction::BitCast ||
@@ -2886,13 +3021,14 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str,
 
 /// If we can compute the length of the string pointed to by
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
-static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
+static uint64_t GetStringLengthH(const Value *V,
+                                 SmallPtrSetImpl<const PHINode*> &PHIs) {
   // Look through noop bitcast instructions.
   V = V->stripPointerCasts();
 
   // If this is a PHI node, there are two cases: either we have already seen it
   // or we haven't.
-  if (PHINode *PN = dyn_cast<PHINode>(V)) {
+  if (const PHINode *PN = dyn_cast<PHINode>(V)) {
     if (!PHIs.insert(PN).second)
       return ~0ULL;  // already in the set.
 
@@ -2914,7 +3050,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
   }
 
   // strlen(select(c,x,y)) -> strlen(x) ^ strlen(y)
-  if (SelectInst *SI = dyn_cast<SelectInst>(V)) {
+  if (const SelectInst *SI = dyn_cast<SelectInst>(V)) {
     uint64_t Len1 = GetStringLengthH(SI->getTrueValue(), PHIs);
     if (Len1 == 0) return 0;
     uint64_t Len2 = GetStringLengthH(SI->getFalseValue(), PHIs);
@@ -2935,10 +3071,10 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSetImpl<PHINode*> &PHIs) {
 
 /// If we can compute the length of the string pointed to by
 /// the specified pointer, return 'len+1'.  If we can't, return 0.
-uint64_t llvm::GetStringLength(Value *V) {
+uint64_t llvm::GetStringLength(const Value *V) {
   if (!V->getType()->isPointerTy()) return 0;
 
-  SmallPtrSet<PHINode*, 32> PHIs;
+  SmallPtrSet<const PHINode*, 32> PHIs;
   uint64_t Len = GetStringLengthH(V, PHIs);
   // If Len is ~0ULL, we had an infinite phi cycle: this is dead code, so return
   // an empty string as a length.
@@ -2947,7 +3083,8 @@ uint64_t llvm::GetStringLength(Value *V) {
 
 /// \brief \p PN defines a loop-variant pointer to an object.  Check if the
 /// previous iteration of the loop was referring to the same object as \p PN.
-static bool isSameUnderlyingObjectInLoop(PHINode *PN, LoopInfo *LI) {
+static bool isSameUnderlyingObjectInLoop(const PHINode *PN,
+                                         const LoopInfo *LI) {
   // Find the loop-defined value.
   Loop *L = LI->getLoopFor(PN->getParent());
   if (PN->getNumIncomingValues() != 2)
@@ -3208,11 +3345,11 @@ bool llvm::isKnownNonNull(const Value *V) {
   if (const Argument *A = dyn_cast<Argument>(V))
     return A->hasByValOrInAllocaAttr() || A->hasNonNullAttr();
 
-  // A global variable in address space 0 is non null unless extern weak.
-  // Other address spaces may have null as a valid address for a global,
-  // so we can't assume anything.
+  // A global variable in address space 0 is non null unless extern weak
+  // or an absolute symbol reference. Other address spaces may have null as a
+  // valid address for a global, so we can't assume anything.
   if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
-    return !GV->hasExternalWeakLinkage() &&
+    return !GV->isAbsoluteSymbolRef() && !GV->hasExternalWeakLinkage() &&
            GV->getType()->getAddressSpace() == 0;
 
   // A Load tagged with nonnull metadata is never null.
@@ -3230,6 +3367,9 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
                                                   const Instruction *CtxI,
                                                   const DominatorTree *DT) {
   assert(V->getType()->isPointerTy() && "V must be pointer type");
+  assert(!isa<ConstantData>(V) && "Did not expect ConstantPointerNull");
+  assert(CtxI && "Context instruction required for analysis");
+  assert(DT && "Dominator tree required for analysis");
 
   unsigned NumUsesExplored = 0;
   for (auto *U : V->users()) {
@@ -3266,13 +3406,20 @@ static bool isKnownNonNullFromDominatingCondition(const Value *V,
 
 bool llvm::isKnownNonNullAt(const Value *V, const Instruction *CtxI,
                             const DominatorTree *DT) {
+  if (isa<ConstantPointerNull>(V) || isa<UndefValue>(V))
+    return false;
+
   if (isKnownNonNull(V))
     return true;
 
-  return CtxI ? ::isKnownNonNullFromDominatingCondition(V, CtxI, DT) : false;
+  if (!CtxI || !DT)
+    return false;
+
+  return ::isKnownNonNullFromDominatingCondition(V, CtxI, DT);
 }
 
-OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForUnsignedMul(const Value *LHS,
+                                                   const Value *RHS,
                                                    const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
@@ -3322,7 +3469,8 @@ OverflowResult llvm::computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
   return OverflowResult::MayOverflow;
 }
 
-OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForUnsignedAdd(const Value *LHS,
+                                                   const Value *RHS,
                                                    const DataLayout &DL,
                                                    AssumptionCache *AC,
                                                    const Instruction *CxtI,
@@ -3351,9 +3499,13 @@ OverflowResult llvm::computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
   return OverflowResult::MayOverflow;
 }
 
-static OverflowResult computeOverflowForSignedAdd(
-    Value *LHS, Value *RHS, AddOperator *Add, const DataLayout &DL,
-    AssumptionCache *AC, const Instruction *CxtI, const DominatorTree *DT) {
+static OverflowResult computeOverflowForSignedAdd(const Value *LHS,
+                                                  const Value *RHS,
+                                                  const AddOperator *Add,
+                                                  const DataLayout &DL,
+                                                  AssumptionCache *AC,
+                                                  const Instruction *CxtI,
+                                                  const DominatorTree *DT) {
   if (Add && Add->hasNoSignedWrap()) {
     return OverflowResult::NeverOverflows;
   }
@@ -3395,7 +3547,8 @@ static OverflowResult computeOverflowForSignedAdd(
   return OverflowResult::MayOverflow;
 }
 
-bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
+bool llvm::isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+                                     const DominatorTree &DT) {
 #ifndef NDEBUG
   auto IID = II->getIntrinsicID();
   assert((IID == Intrinsic::sadd_with_overflow ||
@@ -3407,11 +3560,11 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
          "Not an overflow intrinsic!");
 #endif
 
-  SmallVector<BranchInst *, 2> GuardingBranches;
-  SmallVector<ExtractValueInst *, 2> Results;
+  SmallVector<const BranchInst *, 2> GuardingBranches;
+  SmallVector<const ExtractValueInst *, 2> Results;
 
-  for (User *U : II->users()) {
-    if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+  for (const User *U : II->users()) {
+    if (const auto *EVI = dyn_cast<ExtractValueInst>(U)) {
       assert(EVI->getNumIndices() == 1 && "Obvious from CI's type");
 
       if (EVI->getIndices()[0] == 0)
@@ -3419,8 +3572,8 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
       else {
         assert(EVI->getIndices()[0] == 1 && "Obvious from CI's type");
 
-        for (auto *U : EVI->users())
-          if (auto *B = dyn_cast<BranchInst>(U)) {
+        for (const auto *U : EVI->users())
+          if (const auto *B = dyn_cast<BranchInst>(U)) {
             assert(B->isConditional() && "How else is it using an i1?");
             GuardingBranches.push_back(B);
           }
@@ -3432,13 +3585,13 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
     }
   }
 
-  auto AllUsesGuardedByBranch = [&](BranchInst *BI) {
+  auto AllUsesGuardedByBranch = [&](const BranchInst *BI) {
     BasicBlockEdge NoWrapEdge(BI->getParent(), BI->getSuccessor(1));
     if (!NoWrapEdge.isSingleEdge())
       return false;
 
     // Check if all users of the add are provably no-wrap.
-    for (auto *Result : Results) {
+    for (const auto *Result : Results) {
       // If the extractvalue itself is not executed on overflow, the we don't
       // need to check each use separately, since domination is transitive.
       if (DT.dominates(NoWrapEdge, Result->getParent()))
@@ -3456,7 +3609,7 @@ bool llvm::isOverflowIntrinsicNoWrap(IntrinsicInst *II, DominatorTree &DT) {
 }
 
 
-OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
+OverflowResult llvm::computeOverflowForSignedAdd(const AddOperator *Add,
                                                  const DataLayout &DL,
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
@@ -3465,7 +3618,8 @@ OverflowResult llvm::computeOverflowForSignedAdd(AddOperator *Add,
                                        Add, DL, AC, CxtI, DT);
 }
 
-OverflowResult llvm::computeOverflowForSignedAdd(Value *LHS, Value *RHS,
+OverflowResult llvm::computeOverflowForSignedAdd(const Value *LHS,
+                                                 const Value *RHS,
                                                  const DataLayout &DL,
                                                  AssumptionCache *AC,
                                                  const Instruction *CxtI,
@@ -3502,12 +3656,27 @@ bool llvm::isGuaranteedToTransferExecutionToSuccessor(const Instruction *I) {
     return false;
 
   // Calls can throw, or contain an infinite loop, or kill the process.
-  if (CallSite CS = CallSite(const_cast<Instruction*>(I))) {
-    // Calls which don't write to arbitrary memory are safe.
-    // FIXME: Ignoring infinite loops without any side-effects is too aggressive,
-    // but it's consistent with other passes. See http://llvm.org/PR965 .
-    // FIXME: This isn't aggressive enough; a call which only writes to a
-    // global is guaranteed to return.
+  if (auto CS = ImmutableCallSite(I)) {
+    // Call sites that throw have implicit non-local control flow.
+    if (!CS.doesNotThrow())
+      return false;
+
+    // Non-throwing call sites can loop infinitely, call exit/pthread_exit
+    // etc. and thus not return.  However, LLVM already assumes that
+    //
+    //  - Thread exiting actions are modeled as writes to memory invisible to
+    //    the program.
+    //
+    //  - Loops that don't have side effects (side effects are volatile/atomic
+    //    stores and IO) always terminate (see http://llvm.org/PR965).
+    //    Furthermore IO itself is also modeled as writes to memory invisible to
+    //    the program.
+    //
+    // We rely on those assumptions here, and use the memory effects of the call
+    // target as a proxy for checking that it always returns.
+
+    // FIXME: This isn't aggressive enough; a call which only writes to a global
+    // is guaranteed to return.
     return CS.onlyReadsMemory() || CS.onlyAccessesArgMemory() ||
            match(I, m_Intrinsic<Intrinsic::assume>());
   }
@@ -3688,7 +3857,7 @@ bool llvm::isKnownNotFullPoison(const Instruction *PoisonI) {
   return false;
 }
 
-static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
+static bool isKnownNonNaN(const Value *V, FastMathFlags FMF) {
   if (FMF.noNaNs())
     return true;
 
@@ -3697,12 +3866,90 @@ static bool isKnownNonNaN(Value *V, FastMathFlags FMF) {
   return false;
 }
 
-static bool isKnownNonZero(Value *V) {
+static bool isKnownNonZero(const Value *V) {
   if (auto *C = dyn_cast<ConstantFP>(V))
     return !C->isZero();
   return false;
 }
 
+/// Match non-obvious integer minimum and maximum sequences.
+static SelectPatternResult matchMinMax(CmpInst::Predicate Pred,
+                                       Value *CmpLHS, Value *CmpRHS,
+                                       Value *TrueVal, Value *FalseVal,
+                                       Value *&LHS, Value *&RHS) {
+  if (Pred != CmpInst::ICMP_SGT && Pred != CmpInst::ICMP_SLT)
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // Z = X -nsw Y
+  // (X >s Y) ? 0 : Z ==> (Z >s 0) ? 0 : Z ==> SMIN(Z, 0)
+  // (X <s Y) ? 0 : Z ==> (Z <s 0) ? 0 : Z ==> SMAX(Z, 0)
+  if (match(TrueVal, m_Zero()) &&
+      match(FalseVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+  }
+
+  // Z = X -nsw Y
+  // (X >s Y) ? Z : 0 ==> (Z >s 0) ? Z : 0 ==> SMAX(Z, 0)
+  // (X <s Y) ? Z : 0 ==> (Z <s 0) ? Z : 0 ==> SMIN(Z, 0)
+  if (match(FalseVal, m_Zero()) &&
+      match(TrueVal, m_NSWSub(m_Specific(CmpLHS), m_Specific(CmpRHS)))) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+  }
+
+  const APInt *C1;
+  if (!match(CmpRHS, m_APInt(C1)))
+    return {SPF_UNKNOWN, SPNB_NA, false};
+
+  // An unsigned min/max can be written with a signed compare.
+  const APInt *C2;
+  if ((CmpLHS == TrueVal && match(FalseVal, m_APInt(C2))) ||
+      (CmpLHS == FalseVal && match(TrueVal, m_APInt(C2)))) {
+    // Is the sign bit set?
+    // (X <s 0) ? X : MAXVAL ==> (X >u MAXVAL) ? X : MAXVAL ==> UMAX
+    // (X <s 0) ? MAXVAL : X ==> (X >u MAXVAL) ? MAXVAL : X ==> UMIN
+    if (Pred == CmpInst::ICMP_SLT && *C1 == 0 && C2->isMaxSignedValue()) {
+      LHS = TrueVal;
+      RHS = FalseVal;
+      return {CmpLHS == TrueVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+    }
+
+    // Is the sign bit clear?
+    // (X >s -1) ? MINVAL : X ==> (X <u MINVAL) ? MINVAL : X ==> UMAX
+    // (X >s -1) ? X : MINVAL ==> (X <u MINVAL) ? X : MINVAL ==> UMIN
+    if (Pred == CmpInst::ICMP_SGT && C1->isAllOnesValue() &&
+        C2->isMinSignedValue()) {
+      LHS = TrueVal;
+      RHS = FalseVal;
+      return {CmpLHS == FalseVal ? SPF_UMAX : SPF_UMIN, SPNB_NA, false};
+    }
+  }
+
+  // Look through 'not' ops to find disguised signed min/max.
+  // (X >s C) ? ~X : ~C ==> (~X <s ~C) ? ~X : ~C ==> SMIN(~X, ~C)
+  // (X <s C) ? ~X : ~C ==> (~X >s ~C) ? ~X : ~C ==> SMAX(~X, ~C)
+  if (match(TrueVal, m_Not(m_Specific(CmpLHS))) &&
+      match(FalseVal, m_APInt(C2)) && ~(*C1) == *C2) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMIN : SPF_SMAX, SPNB_NA, false};
+  }
+
+  // (X >s C) ? ~C : ~X ==> (~X <s ~C) ? ~C : ~X ==> SMAX(~C, ~X)
+  // (X <s C) ? ~C : ~X ==> (~X >s ~C) ? ~C : ~X ==> SMIN(~C, ~X)
+  if (match(FalseVal, m_Not(m_Specific(CmpLHS))) &&
+      match(TrueVal, m_APInt(C2)) && ~(*C1) == *C2) {
+    LHS = TrueVal;
+    RHS = FalseVal;
+    return {Pred == CmpInst::ICMP_SGT ? SPF_SMAX : SPF_SMIN, SPNB_NA, false};
+  }
+
+  return {SPF_UNKNOWN, SPNB_NA, false};
+}
+
 static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
                                               FastMathFlags FMF,
                                               Value *CmpLHS, Value *CmpRHS,
@@ -3801,39 +4048,26 @@ static SelectPatternResult matchSelectPattern(CmpInst::Predicate Pred,
     }
   }
 
-  if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) {
+  const APInt *C1;
+  if (match(CmpRHS, m_APInt(C1))) {
     if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) ||
         (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) {
 
       // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X
       // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X
-      if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) {
+      if (Pred == ICmpInst::ICMP_SGT && (*C1 == 0 || C1->isAllOnesValue())) {
         return {(CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
       }
 
       // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X
       // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X
-      if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) {
+      if (Pred == ICmpInst::ICMP_SLT && (*C1 == 0 || *C1 == 1)) {
         return {(CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS, SPNB_NA, false};
       }
     }
-
-    // Y >s C ? ~Y : ~C == ~Y <s ~C ? ~Y : ~C = SMIN(~Y, ~C)
-    if (const auto *C2 = dyn_cast<ConstantInt>(FalseVal)) {
-      if (Pred == ICmpInst::ICMP_SGT && C1->getType() == C2->getType() &&
-          ~C1->getValue() == C2->getValue() &&
-          (match(TrueVal, m_Not(m_Specific(CmpLHS))) ||
-           match(CmpLHS, m_Not(m_Specific(TrueVal))))) {
-        LHS = TrueVal;
-        RHS = FalseVal;
-        return {SPF_SMIN, SPNB_NA, false};
-      }
-    }
   }
 
-  // TODO: (X > 4) ? X : 5   -->  (X >= 5) ? X : 5  -->  MAX(X, 5)
-
-  return {SPF_UNKNOWN, SPNB_NA, false};
+  return matchMinMax(Pred, CmpLHS, CmpRHS, TrueVal, FalseVal, LHS, RHS);
 }
 
 static Value *lookThroughCast(CmpInst *CmpI, Value *V1, Value *V2,
@@ -3932,30 +4166,9 @@ SelectPatternResult llvm::matchSelectPattern(Value *V, Value *&LHS, Value *&RHS,
                               LHS, RHS);
 }
 
-ConstantRange llvm::getConstantRangeFromMetadata(MDNode &Ranges) {
-  const unsigned NumRanges = Ranges.getNumOperands() / 2;
-  assert(NumRanges >= 1 && "Must have at least one range!");
-  assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
-
-  auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
-  auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
-
-  ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
-
-  for (unsigned i = 1; i < NumRanges; ++i) {
-    auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
-    auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
-
-    // Note: unionWith will potentially create a range that contains values not
-    // contained in any of the original N ranges.
-    CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
-  }
-
-  return CR;
-}
-
 /// Return true if "icmp Pred LHS RHS" is always true.
-static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
+static bool isTruePredicate(CmpInst::Predicate Pred,
+                            const Value *LHS, const Value *RHS,
                             const DataLayout &DL, unsigned Depth,
                             AssumptionCache *AC, const Instruction *CxtI,
                             const DominatorTree *DT) {
@@ -3984,7 +4197,8 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
       return true;
 
     // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB)
-    auto MatchNUWAddsToSameValue = [&](Value *A, Value *B, Value *&X,
+    auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B,
+                                       const Value *&X,
                                        const APInt *&CA, const APInt *&CB) {
       if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) &&
           match(B, m_NUWAdd(m_Specific(X), m_APInt(CB))))
@@ -4004,7 +4218,7 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
       return false;
     };
 
-    Value *X;
+    const Value *X;
     const APInt *CLHS, *CRHS;
     if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS))
       return CLHS->ule(*CRHS);
@@ -4017,8 +4231,9 @@ static bool isTruePredicate(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
 /// Return true if "icmp Pred BLHS BRHS" is true whenever "icmp Pred
 /// ALHS ARHS" is true.  Otherwise, return None.
 static Optional<bool>
-isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS,
-                      Value *BLHS, Value *BRHS, const DataLayout &DL,
+isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS,
+                      const Value *ARHS, const Value *BLHS,
+                      const Value *BRHS, const DataLayout &DL,
                       unsigned Depth, AssumptionCache *AC,
                       const Instruction *CxtI, const DominatorTree *DT) {
   switch (Pred) {
@@ -4045,7 +4260,8 @@ isImpliedCondOperands(CmpInst::Predicate Pred, Value *ALHS, Value *ARHS,
 
 /// Return true if the operands of the two compares match.  IsSwappedOps is true
 /// when the operands match, but are swapped.
-static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS,
+static bool isMatchingOps(const Value *ALHS, const Value *ARHS,
+                          const Value *BLHS, const Value *BRHS,
                           bool &IsSwappedOps) {
 
   bool IsMatchingOps = (ALHS == BLHS && ARHS == BRHS);
@@ -4057,9 +4273,11 @@ static bool isMatchingOps(Value *ALHS, Value *ARHS, Value *BLHS, Value *BRHS,
 /// true.  Return false if "icmp1 APred ALHS ARHS" implies "icmp2 BPred BLHS
 /// BRHS" is false.  Otherwise, return None if we can't infer anything.
 static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
-                                                    Value *ALHS, Value *ARHS,
+                                                    const Value *ALHS,
+                                                    const Value *ARHS,
                                                     CmpInst::Predicate BPred,
-                                                    Value *BLHS, Value *BRHS,
+                                                    const Value *BLHS,
+                                                    const Value *BRHS,
                                                     bool IsSwappedOps) {
   // Canonicalize the operands so they're matching.
   if (IsSwappedOps) {
@@ -4078,9 +4296,10 @@ static Optional<bool> isImpliedCondMatchingOperands(CmpInst::Predicate APred,
 /// true.  Return false if "icmp1 APred ALHS C1" implies "icmp2 BPred BLHS
 /// C2" is false.  Otherwise, return None if we can't infer anything.
 static Optional<bool>
-isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS,
-                                 ConstantInt *C1, CmpInst::Predicate BPred,
-                                 Value *BLHS, ConstantInt *C2) {
+isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, const Value *ALHS,
+                                 const ConstantInt *C1,
+                                 CmpInst::Predicate BPred,
+                                 const Value *BLHS, const ConstantInt *C2) {
   assert(ALHS == BLHS && "LHS operands must match.");
   ConstantRange DomCR =
       ConstantRange::makeExactICmpRegion(APred, C1->getValue());
@@ -4095,7 +4314,7 @@ isImpliedCondMatchingImmOperands(CmpInst::Predicate APred, Value *ALHS,
   return None;
 }
 
-Optional<bool> llvm::isImpliedCondition(Value *LHS, Value *RHS,
+Optional<bool> llvm::isImpliedCondition(const Value *LHS, const Value *RHS,
                                         const DataLayout &DL, bool InvertAPred,
                                         unsigned Depth, AssumptionCache *AC,
                                         const Instruction *CxtI,
diff --git a/contrib/llvm/lib/Analysis/VectorUtils.cpp b/contrib/llvm/lib/Analysis/VectorUtils.cpp
index 53e7153a350f..7e598f435ff5 100644
--- a/contrib/llvm/lib/Analysis/VectorUtils.cpp
+++ b/contrib/llvm/lib/Analysis/VectorUtils.cpp
@@ -107,11 +107,11 @@ unsigned llvm::getGEPInductionOperand(const GetElementPtrInst *Gep) {
   while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
     // Find the type we're currently indexing into.
     gep_type_iterator GEPTI = gep_type_begin(Gep);
-    std::advance(GEPTI, LastOperand - 1);
+    std::advance(GEPTI, LastOperand - 2);
 
     // If it's a type with the same allocation size as the result of the GEP we
     // can peel off the zero index.
-    if (DL.getTypeAllocSize(*GEPTI) != GEPAllocSize)
+    if (DL.getTypeAllocSize(GEPTI.getIndexedType()) != GEPAllocSize)
       break;
     --LastOperand;
   }
@@ -454,9 +454,10 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   I0->getAllMetadataOtherThanDebugLoc(Metadata);
 
-  for (auto Kind : { LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
-                     LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
-                     LLVMContext::MD_nontemporal }) {
+  for (auto Kind :
+       {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
+        LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
+        LLVMContext::MD_nontemporal, LLVMContext::MD_invariant_load}) {
     MDNode *MD = I0->getMetadata(Kind);
 
     for (int J = 1, E = VL.size(); MD && J != E; ++J) {
@@ -469,13 +470,12 @@ Instruction *llvm::propagateMetadata(Instruction *Inst, ArrayRef<Value *> VL) {
       case LLVMContext::MD_alias_scope:
         MD = MDNode::getMostGenericAliasScope(MD, IMD);
         break;
-      case LLVMContext::MD_noalias:
-        MD = MDNode::intersect(MD, IMD);
-        break;
       case LLVMContext::MD_fpmath:
         MD = MDNode::getMostGenericFPMath(MD, IMD);
         break;
+      case LLVMContext::MD_noalias:
       case LLVMContext::MD_nontemporal:
+      case LLVMContext::MD_invariant_load:
         MD = MDNode::intersect(MD, IMD);
         break;
       default:
diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp
index 507e7e76ecd2..752942fc9fcc 100644
--- a/contrib/llvm/lib/AsmParser/LLLexer.cpp
+++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp
@@ -12,21 +12,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLLexer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instruction.h"
-#include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
+#include <cassert>
 #include <cctype>
 #include <cstdio>
-#include <cstdlib>
-#include <cstring>
+
 using namespace llvm;
 
 bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
@@ -147,18 +144,15 @@ static bool isLabelChar(char C) {
          C == '.' || C == '_';
 }
 
-
 /// isLabelTail - Return true if this pointer points to a valid end of a label.
 static const char *isLabelTail(const char *CurPtr) {
-  while (1) {
+  while (true) {
     if (CurPtr[0] == ':') return CurPtr+1;
     if (!isLabelChar(CurPtr[0])) return nullptr;
     ++CurPtr;
   }
 }
 
-
-
 //===----------------------------------------------------------------------===//
 // Lexer definition.
 //===----------------------------------------------------------------------===//
@@ -185,68 +179,69 @@ int LLLexer::getNextChar() {
   }
 }
 
-
 lltok::Kind LLLexer::LexToken() {
-  TokStart = CurPtr;
+  while (true) {
+    TokStart = CurPtr;
 
-  int CurChar = getNextChar();
-  switch (CurChar) {
-  default:
-    // Handle letters: [a-zA-Z_]
-    if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
-      return LexIdentifier();
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    default:
+      // Handle letters: [a-zA-Z_]
+      if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
+        return LexIdentifier();
 
-    return lltok::Error;
-  case EOF: return lltok::Eof;
-  case 0:
-  case ' ':
-  case '\t':
-  case '\n':
-  case '\r':
-    // Ignore whitespace.
-    return LexToken();
-  case '+': return LexPositive();
-  case '@': return LexAt();
-  case '$': return LexDollar();
-  case '%': return LexPercent();
-  case '"': return LexQuote();
-  case '.':
-    if (const char *Ptr = isLabelTail(CurPtr)) {
-      CurPtr = Ptr;
-      StrVal.assign(TokStart, CurPtr-1);
-      return lltok::LabelStr;
-    }
-    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
-      CurPtr += 2;
-      return lltok::dotdotdot;
+      return lltok::Error;
+    case EOF: return lltok::Eof;
+    case 0:
+    case ' ':
+    case '\t':
+    case '\n':
+    case '\r':
+      // Ignore whitespace.
+      continue;
+    case '+': return LexPositive();
+    case '@': return LexAt();
+    case '$': return LexDollar();
+    case '%': return LexPercent();
+    case '"': return LexQuote();
+    case '.':
+      if (const char *Ptr = isLabelTail(CurPtr)) {
+        CurPtr = Ptr;
+        StrVal.assign(TokStart, CurPtr-1);
+        return lltok::LabelStr;
+      }
+      if (CurPtr[0] == '.' && CurPtr[1] == '.') {
+        CurPtr += 2;
+        return lltok::dotdotdot;
+      }
+      return lltok::Error;
+    case ';':
+      SkipLineComment();
+      continue;
+    case '!': return LexExclaim();
+    case '#': return LexHash();
+    case '0': case '1': case '2': case '3': case '4':
+    case '5': case '6': case '7': case '8': case '9':
+    case '-':
+      return LexDigitOrNegative();
+    case '=': return lltok::equal;
+    case '[': return lltok::lsquare;
+    case ']': return lltok::rsquare;
+    case '{': return lltok::lbrace;
+    case '}': return lltok::rbrace;
+    case '<': return lltok::less;
+    case '>': return lltok::greater;
+    case '(': return lltok::lparen;
+    case ')': return lltok::rparen;
+    case ',': return lltok::comma;
+    case '*': return lltok::star;
+    case '|': return lltok::bar;
     }
-    return lltok::Error;
-  case ';':
-    SkipLineComment();
-    return LexToken();
-  case '!': return LexExclaim();
-  case '#': return LexHash();
-  case '0': case '1': case '2': case '3': case '4':
-  case '5': case '6': case '7': case '8': case '9':
-  case '-':
-    return LexDigitOrNegative();
-  case '=': return lltok::equal;
-  case '[': return lltok::lsquare;
-  case ']': return lltok::rsquare;
-  case '{': return lltok::lbrace;
-  case '}': return lltok::rbrace;
-  case '<': return lltok::less;
-  case '>': return lltok::greater;
-  case '(': return lltok::lparen;
-  case ')': return lltok::rparen;
-  case ',': return lltok::comma;
-  case '*': return lltok::star;
-  case '|': return lltok::bar;
   }
 }
 
 void LLLexer::SkipLineComment() {
-  while (1) {
+  while (true) {
     if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
       return;
   }
@@ -271,7 +266,7 @@ lltok::Kind LLLexer::LexDollar() {
   if (CurPtr[0] == '"') {
     ++CurPtr;
 
-    while (1) {
+    while (true) {
       int CurChar = getNextChar();
 
       if (CurChar == EOF) {
@@ -300,7 +295,7 @@ lltok::Kind LLLexer::LexDollar() {
 /// ReadString - Read a string until the closing quote.
 lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
   const char *Start = CurPtr;
-  while (1) {
+  while (true) {
     int CurChar = getNextChar();
 
     if (CurChar == EOF) {
@@ -338,7 +333,7 @@ lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
   if (CurPtr[0] == '"') {
     ++CurPtr;
 
-    while (1) {
+    while (true) {
       int CurChar = getNextChar();
 
       if (CurChar == EOF) {
@@ -488,11 +483,12 @@ lltok::Kind LLLexer::LexIdentifier() {
   CurPtr = KeywordEnd;
   --StartChar;
   StringRef Keyword(StartChar, CurPtr - StartChar);
+
 #define KEYWORD(STR)                                                           \
   do {                                                                         \
     if (Keyword == #STR)                                                       \
       return lltok::kw_##STR;                                                  \
-  } while (0)
+  } while (false)
 
   KEYWORD(true);    KEYWORD(false);
   KEYWORD(declare); KEYWORD(define);
@@ -557,6 +553,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(nsw);
   KEYWORD(exact);
   KEYWORD(inbounds);
+  KEYWORD(inrange);
   KEYWORD(align);
   KEYWORD(addrspace);
   KEYWORD(section);
@@ -591,6 +588,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(intel_ocl_bicc);
   KEYWORD(x86_64_sysvcc);
   KEYWORD(x86_64_win64cc);
+  KEYWORD(x86_regcallcc);
   KEYWORD(webkit_jscc);
   KEYWORD(swiftcc);
   KEYWORD(anyregcc);
@@ -697,6 +695,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(cleanup);
   KEYWORD(catch);
   KEYWORD(filter);
+
 #undef KEYWORD
 
   // Keywords for types.
@@ -707,6 +706,7 @@ lltok::Kind LLLexer::LexIdentifier() {
       return lltok::Type;                                                      \
     }                                                                          \
   } while (false)
+
   TYPEKEYWORD("void",      Type::getVoidTy(Context));
   TYPEKEYWORD("half",      Type::getHalfTy(Context));
   TYPEKEYWORD("float",     Type::getFloatTy(Context));
@@ -718,6 +718,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
   TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
   TYPEKEYWORD("token",     Type::getTokenTy(Context));
+
 #undef TYPEKEYWORD
 
   // Keywords for instructions.
@@ -782,6 +783,7 @@ lltok::Kind LLLexer::LexIdentifier() {
   INSTKEYWORD(catchswitch,  CatchSwitch);
   INSTKEYWORD(catchpad,     CatchPad);
   INSTKEYWORD(cleanuppad,   CleanupPad);
+
 #undef INSTKEYWORD
 
 #define DWKEYWORD(TYPE, TOKEN)                                                 \
@@ -791,6 +793,7 @@ lltok::Kind LLLexer::LexIdentifier() {
       return lltok::TOKEN;                                                     \
     }                                                                          \
   } while (false)
+
   DWKEYWORD(TAG, DwarfTag);
   DWKEYWORD(ATE, DwarfAttEncoding);
   DWKEYWORD(VIRTUALITY, DwarfVirtuality);
@@ -798,11 +801,19 @@ lltok::Kind LLLexer::LexIdentifier() {
   DWKEYWORD(CC, DwarfCC);
   DWKEYWORD(OP, DwarfOp);
   DWKEYWORD(MACINFO, DwarfMacinfo);
+
 #undef DWKEYWORD
+
   if (Keyword.startswith("DIFlag")) {
     StrVal.assign(Keyword.begin(), Keyword.end());
     return lltok::DIFlag;
   }
+
+  if (Keyword.startswith("CSK_")) {
+    StrVal.assign(Keyword.begin(), Keyword.end());
+    return lltok::ChecksumKind;
+  }
+
   if (Keyword == "NoDebug" || Keyword == "FullDebug" ||
       Keyword == "LineTablesOnly") {
     StrVal.assign(Keyword.begin(), Keyword.end());
@@ -817,7 +828,7 @@ lltok::Kind LLLexer::LexIdentifier() {
     int len = CurPtr-TokStart-3;
     uint32_t bits = len * 4;
     StringRef HexStr(TokStart + 3, len);
-    if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) {
+    if (!all_of(HexStr, isxdigit)) {
       // Bad token, return it as an error.
       CurPtr = TokStart+3;
       return lltok::Error;
@@ -871,7 +882,8 @@ lltok::Kind LLLexer::Lex0x() {
     // HexFPConstant - Floating point constant represented in IEEE format as a
     // hexadecimal number for when exponential notation is not precise enough.
     // Half, Float, and double only.
-    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
+    APFloatVal = APFloat(APFloat::IEEEdouble(),
+                         APInt(64, HexIntToVal(TokStart + 2, CurPtr)));
     return lltok::APFloat;
   }
 
@@ -881,20 +893,20 @@ lltok::Kind LLLexer::Lex0x() {
   case 'K':
     // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
     FP80HexToIntPair(TokStart+3, CurPtr, Pair);
-    APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
+    APFloatVal = APFloat(APFloat::x87DoubleExtended(), APInt(80, Pair));
     return lltok::APFloat;
   case 'L':
     // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
     HexToIntPair(TokStart+3, CurPtr, Pair);
-    APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
+    APFloatVal = APFloat(APFloat::IEEEquad(), APInt(128, Pair));
     return lltok::APFloat;
   case 'M':
     // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
     HexToIntPair(TokStart+3, CurPtr, Pair);
-    APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
+    APFloatVal = APFloat(APFloat::PPCDoubleDouble(), APInt(128, Pair));
     return lltok::APFloat;
   case 'H':
-    APFloatVal = APFloat(APFloat::IEEEhalf,
+    APFloatVal = APFloat(APFloat::IEEEhalf(),
                          APInt(16,HexIntToVal(TokStart+3, CurPtr)));
     return lltok::APFloat;
   }
@@ -961,7 +973,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() {
     }
   }
 
-  APFloatVal = APFloat(APFloat::IEEEdouble,
+  APFloatVal = APFloat(APFloat::IEEEdouble(),
                        StringRef(TokStart, CurPtr - TokStart));
   return lltok::APFloat;
 }
@@ -998,7 +1010,7 @@ lltok::Kind LLLexer::LexPositive() {
     }
   }
 
-  APFloatVal = APFloat(APFloat::IEEEdouble,
+  APFloatVal = APFloat(APFloat::IEEEdouble(),
                        StringRef(TokStart, CurPtr - TokStart));
   return lltok::APFloat;
 }
diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp
index a2fcbf41204d..4cd986e143b6 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.cpp
+++ b/contrib/llvm/lib/AsmParser/LLParser.cpp
@@ -12,29 +12,46 @@
 //===----------------------------------------------------------------------===//
 
 #include "LLParser.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringExtras.h"
 #include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/IR/Argument.h"
 #include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallingConv.h"
-#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Comdat.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueSymbolTable.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstring>
+#include <iterator>
+#include <vector>
+
 using namespace llvm;
 
 static std::string getTypeString(Type *T) {
@@ -104,16 +121,13 @@ void LLParser::restoreParsingState(const SlotMapping *Slots) {
 /// module.
 bool LLParser::ValidateEndOfModule() {
   // Handle any function attribute group forward references.
-  for (std::map<Value*, std::vector<unsigned> >::iterator
-         I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end();
-         I != E; ++I) {
-    Value *V = I->first;
-    std::vector<unsigned> &Vec = I->second;
+  for (const auto &RAG : ForwardRefAttrGroups) {
+    Value *V = RAG.first;
+    const std::vector<unsigned> &Attrs = RAG.second;
     AttrBuilder B;
 
-    for (std::vector<unsigned>::iterator VI = Vec.begin(), VE = Vec.end();
-         VI != VE; ++VI)
-      B.merge(NumberedAttrBuilders[*VI]);
+    for (const auto &Attr : Attrs)
+      B.merge(NumberedAttrBuilders[Attr]);
 
     if (Function *Fn = dyn_cast<Function>(V)) {
       AttributeSet AS = Fn->getAttributes();
@@ -205,8 +219,13 @@ bool LLParser::ValidateEndOfModule() {
       N.second->resolveCycles();
   }
 
-  for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
-    UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
+  for (auto *Inst : InstsWithTBAATag) {
+    MDNode *MD = Inst->getMetadata(LLVMContext::MD_tbaa);
+    assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+    auto *UpgradedMD = UpgradeTBAANode(*MD);
+    if (MD != UpgradedMD)
+      Inst->setMetadata(LLVMContext::MD_tbaa, UpgradedMD);
+  }
 
   // Look for intrinsic functions and CallInst that need to be upgraded
   for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; )
@@ -247,7 +266,7 @@ bool LLParser::ValidateEndOfModule() {
 //===----------------------------------------------------------------------===//
 
 bool LLParser::ParseTopLevelEntities() {
-  while (1) {
+  while (true) {
     switch (Lex.getKind()) {
     default:         return TokError("expected top-level entity");
     case lltok::Eof: return false;
@@ -270,12 +289,13 @@ bool LLParser::ParseTopLevelEntities() {
     case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break;
     case lltok::kw_uselistorder: if (ParseUseListOrder()) return true; break;
     case lltok::kw_uselistorder_bb:
-                                 if (ParseUseListOrderBB()) return true; break;
+      if (ParseUseListOrderBB())
+        return true;
+      break;
     }
   }
 }
 
-
 /// toplevelentity
 ///   ::= 'module' 'asm' STRINGCONSTANT
 bool LLParser::ParseModuleAsm() {
@@ -376,7 +396,6 @@ bool LLParser::ParseUnnamedType() {
   return false;
 }
 
-
 /// toplevelentity
 ///   ::= LocalVar '=' 'type' type
 bool LLParser::ParseNamedType() {
@@ -403,7 +422,6 @@ bool LLParser::ParseNamedType() {
   return false;
 }
 
-
 /// toplevelentity
 ///   ::= 'declare' FunctionHeader
 bool LLParser::ParseDeclare() {
@@ -1213,7 +1231,6 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) {
   return FwdVal;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Comdat Reference/Resolution Routines.
 //===----------------------------------------------------------------------===//
@@ -1231,7 +1248,6 @@ Comdat *LLParser::getComdat(const std::string &Name, LocTy Loc) {
   return C;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Helper Routines.
 //===----------------------------------------------------------------------===//
@@ -1257,7 +1273,7 @@ bool LLParser::ParseStringConstant(std::string &Result) {
 
 /// ParseUInt32
 ///   ::= uint32
-bool LLParser::ParseUInt32(unsigned &Val) {
+bool LLParser::ParseUInt32(uint32_t &Val) {
   if (Lex.getKind() != lltok::APSInt || Lex.getAPSIntVal().isSigned())
     return TokError("expected integer");
   uint64_t Val64 = Lex.getAPSIntVal().getLimitedValue(0xFFFFFFFFULL+1);
@@ -1350,7 +1366,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
 
   B.clear();
 
-  while (1) {
+  while (true) {
     lltok::Kind Token = Lex.getKind();
     switch (Token) {
     default:  // End of attributes.
@@ -1439,7 +1455,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
 
   B.clear();
 
-  while (1) {
+  while (true) {
     lltok::Kind Token = Lex.getKind();
     switch (Token) {
     default:  // End of attributes.
@@ -1676,6 +1692,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) {
   case lltok::kw_coldcc:         CC = CallingConv::Cold; break;
   case lltok::kw_x86_stdcallcc:  CC = CallingConv::X86_StdCall; break;
   case lltok::kw_x86_fastcallcc: CC = CallingConv::X86_FastCall; break;
+  case lltok::kw_x86_regcallcc:  CC = CallingConv::X86_RegCall; break;
   case lltok::kw_x86_thiscallcc: CC = CallingConv::X86_ThisCall; break;
   case lltok::kw_x86_vectorcallcc:CC = CallingConv::X86_VectorCall; break;
   case lltok::kw_arm_apcscc:     CC = CallingConv::ARM_APCS; break;
@@ -2024,7 +2041,7 @@ bool LLParser::ParseType(Type *&Result, const Twine &Msg, bool AllowVoid) {
   }
 
   // Parse the type suffixes.
-  while (1) {
+  while (true) {
     switch (Lex.getKind()) {
     // End of type.
     default:
@@ -2357,7 +2374,6 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
   return false;
 }
 
-
 /// ParseStructType: Handles packed and unpacked types.  </> parsed elsewhere.
 ///   StructType
 ///     ::= '{' '}'
@@ -2480,14 +2496,13 @@ bool LLParser::PerFunctionState::FinishFunction() {
   return false;
 }
 
-
 /// GetVal - Get a value with the specified name or ID, creating a
 /// forward reference record if needed.  This can return null if the value
 /// exists but does not have the right type.
 Value *LLParser::PerFunctionState::GetVal(const std::string &Name, Type *Ty,
                                           LocTy Loc) {
   // Look this name up in the normal function symbol table.
-  Value *Val = F.getValueSymbolTable().lookup(Name);
+  Value *Val = F.getValueSymbolTable()->lookup(Name);
 
   // If this is a forward reference for the value, see if we already created a
   // forward ref record.
@@ -2905,7 +2920,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         return Error(Label.Loc, "cannot take address of numeric label after "
                                 "the function is defined");
       BB = dyn_cast_or_null<BasicBlock>(
-          F->getValueSymbolTable().lookup(Label.StrVal));
+          F->getValueSymbolTable()->lookup(Label.StrVal));
       if (!BB)
         return Error(Label.Loc, "referenced value is not a basic block");
     }
@@ -3157,7 +3172,9 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
         return true;
     }
 
-    if (ParseGlobalValueVector(Elts) ||
+    Optional<unsigned> InRangeOp;
+    if (ParseGlobalValueVector(
+            Elts, Opc == Instruction::GetElementPtr ? &InRangeOp : nullptr) ||
         ParseToken(lltok::rparen, "expected ')' in constantexpr"))
       return true;
 
@@ -3173,20 +3190,23 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
             ExplicitTypeLoc,
             "explicit pointee type doesn't match operand's pointee type");
 
+      unsigned GEPWidth =
+          BaseType->isVectorTy() ? BaseType->getVectorNumElements() : 0;
+
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
       for (Constant *Val : Indices) {
         Type *ValTy = Val->getType();
         if (!ValTy->getScalarType()->isIntegerTy())
           return Error(ID.Loc, "getelementptr index must be an integer");
-        if (ValTy->isVectorTy() != BaseType->isVectorTy())
-          return Error(ID.Loc, "getelementptr index type missmatch");
         if (ValTy->isVectorTy()) {
           unsigned ValNumEl = ValTy->getVectorNumElements();
-          unsigned PtrNumEl = BaseType->getVectorNumElements();
-          if (ValNumEl != PtrNumEl)
+          if (GEPWidth && (ValNumEl != GEPWidth))
             return Error(
                 ID.Loc,
                 "getelementptr vector index has a wrong number of elements");
+          // GEPWidth may have been unknown because the base is a scalar,
+          // but it is known now.
+          GEPWidth = ValNumEl;
         }
       }
 
@@ -3196,8 +3216,16 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
 
       if (!GetElementPtrInst::getIndexedType(Ty, Indices))
         return Error(ID.Loc, "invalid getelementptr indices");
-      ID.ConstantVal =
-          ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices, InBounds);
+
+      if (InRangeOp) {
+        if (*InRangeOp == 0)
+          return Error(ID.Loc,
+                       "inrange keyword may not appear on pointer operand");
+        --*InRangeOp;
+      }
+
+      ID.ConstantVal = ConstantExpr::getGetElementPtr(Ty, Elts[0], Indices,
+                                                      InBounds, InRangeOp);
     } else if (Opc == Instruction::Select) {
       if (Elts.size() != 3)
         return Error(ID.Loc, "expected three operands to select");
@@ -3280,8 +3308,9 @@ bool LLParser::parseOptionalComdat(StringRef GlobalName, Comdat *&C) {
 
 /// ParseGlobalValueVector
 ///   ::= /*empty*/
-///   ::= TypeAndValue (',' TypeAndValue)*
-bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts) {
+///   ::= [inrange] TypeAndValue (',' [inrange] TypeAndValue)*
+bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
+                                      Optional<unsigned> *InRangeOp) {
   // Empty list.
   if (Lex.getKind() == lltok::rbrace ||
       Lex.getKind() == lltok::rsquare ||
@@ -3289,14 +3318,14 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts) {
       Lex.getKind() == lltok::rparen)
     return false;
 
-  Constant *C;
-  if (ParseGlobalTypeAndValue(C)) return true;
-  Elts.push_back(C);
+  do {
+    if (InRangeOp && !*InRangeOp && EatIfPresent(lltok::kw_inrange))
+      *InRangeOp = Elts.size();
 
-  while (EatIfPresent(lltok::comma)) {
+    Constant *C;
     if (ParseGlobalTypeAndValue(C)) return true;
     Elts.push_back(C);
-  }
+  } while (EatIfPresent(lltok::comma));
 
   return false;
 }
@@ -3354,40 +3383,49 @@ struct MDUnsignedField : public MDFieldImpl<uint64_t> {
   MDUnsignedField(uint64_t Default = 0, uint64_t Max = UINT64_MAX)
       : ImplTy(Default), Max(Max) {}
 };
+
 struct LineField : public MDUnsignedField {
   LineField() : MDUnsignedField(0, UINT32_MAX) {}
 };
+
 struct ColumnField : public MDUnsignedField {
   ColumnField() : MDUnsignedField(0, UINT16_MAX) {}
 };
+
 struct DwarfTagField : public MDUnsignedField {
   DwarfTagField() : MDUnsignedField(0, dwarf::DW_TAG_hi_user) {}
   DwarfTagField(dwarf::Tag DefaultTag)
       : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {}
 };
+
 struct DwarfMacinfoTypeField : public MDUnsignedField {
   DwarfMacinfoTypeField() : MDUnsignedField(0, dwarf::DW_MACINFO_vendor_ext) {}
   DwarfMacinfoTypeField(dwarf::MacinfoRecordType DefaultType)
     : MDUnsignedField(DefaultType, dwarf::DW_MACINFO_vendor_ext) {}
 };
+
 struct DwarfAttEncodingField : public MDUnsignedField {
   DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {}
 };
+
 struct DwarfVirtualityField : public MDUnsignedField {
   DwarfVirtualityField() : MDUnsignedField(0, dwarf::DW_VIRTUALITY_max) {}
 };
+
 struct DwarfLangField : public MDUnsignedField {
   DwarfLangField() : MDUnsignedField(0, dwarf::DW_LANG_hi_user) {}
 };
+
 struct DwarfCCField : public MDUnsignedField {
   DwarfCCField() : MDUnsignedField(0, dwarf::DW_CC_hi_user) {}
 };
+
 struct EmissionKindField : public MDUnsignedField {
   EmissionKindField() : MDUnsignedField(0, DICompileUnit::LastEmissionKind) {}
 };
 
-struct DIFlagField : public MDUnsignedField {
-  DIFlagField() : MDUnsignedField(0, UINT32_MAX) {}
+struct DIFlagField : public MDFieldImpl<DINode::DIFlags> {
+  DIFlagField() : MDFieldImpl(DINode::FlagZero) {}
 };
 
 struct MDSignedField : public MDFieldImpl<int64_t> {
@@ -3403,24 +3441,33 @@ struct MDSignedField : public MDFieldImpl<int64_t> {
 struct MDBoolField : public MDFieldImpl<bool> {
   MDBoolField(bool Default = false) : ImplTy(Default) {}
 };
+
 struct MDField : public MDFieldImpl<Metadata *> {
   bool AllowNull;
 
   MDField(bool AllowNull = true) : ImplTy(nullptr), AllowNull(AllowNull) {}
 };
+
 struct MDConstant : public MDFieldImpl<ConstantAsMetadata *> {
   MDConstant() : ImplTy(nullptr) {}
 };
+
 struct MDStringField : public MDFieldImpl<MDString *> {
   bool AllowEmpty;
   MDStringField(bool AllowEmpty = true)
       : ImplTy(nullptr), AllowEmpty(AllowEmpty) {}
 };
+
 struct MDFieldList : public MDFieldImpl<SmallVector<Metadata *, 4>> {
   MDFieldList() : ImplTy(SmallVector<Metadata *, 4>()) {}
 };
 
-} // end namespace
+struct ChecksumKindField : public MDFieldImpl<DIFile::ChecksumKind> {
+  ChecksumKindField() : ImplTy(DIFile::CSK_None) {}
+  ChecksumKindField(DIFile::ChecksumKind CSKind) : ImplTy(CSKind) {}
+};
+
+} // end anonymous namespace
 
 namespace llvm {
 
@@ -3585,12 +3632,15 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
 ///  ::= DIFlagVector '|' DIFlagFwdDecl '|' uint32 '|' DIFlagPublic
 template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
-  assert(Result.Max == UINT32_MAX && "Expected only 32-bits");
 
   // Parser for a single flag.
-  auto parseFlag = [&](unsigned &Val) {
-    if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned())
-      return ParseUInt32(Val);
+  auto parseFlag = [&](DINode::DIFlags &Val) {
+    if (Lex.getKind() == lltok::APSInt && !Lex.getAPSIntVal().isSigned()) {
+      uint32_t TempVal = static_cast<uint32_t>(Val);
+      bool Res = ParseUInt32(TempVal);
+      Val = static_cast<DINode::DIFlags>(TempVal);
+      return Res;
+    }
 
     if (Lex.getKind() != lltok::DIFlag)
       return TokError("expected debug info flag");
@@ -3604,9 +3654,9 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DIFlagField &Result) {
   };
 
   // Parse the flags and combine them together.
-  unsigned Combined = 0;
+  DINode::DIFlags Combined = DINode::FlagZero;
   do {
-    unsigned Val;
+    DINode::DIFlags Val;
     if (parseFlag(Val))
       return true;
     Combined |= Val;
@@ -3671,16 +3721,6 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDField &Result) {
 }
 
 template <>
-bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDConstant &Result) {
-  Metadata *MD;
-  if (ParseValueAsMetadata(MD, "expected constant", nullptr))
-    return true;
-
-  Result.assign(cast<ConstantAsMetadata>(MD));
-  return false;
-}
-
-template <>
 bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDStringField &Result) {
   LocTy ValueLoc = Lex.getLoc();
   std::string S;
@@ -3704,6 +3744,20 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, MDFieldList &Result) {
   return false;
 }
 
+template <>
+bool LLParser::ParseMDField(LocTy Loc, StringRef Name,
+                            ChecksumKindField &Result) {
+  if (Lex.getKind() != lltok::ChecksumKind)
+    return TokError(
+        "invalid checksum kind" + Twine(" '") + Lex.getStrVal() + "'");
+
+  DIFile::ChecksumKind CSKind = DIFile::getChecksumKind(Lex.getStrVal());
+
+  Result.assign(CSKind);
+  Lex.Lex();
+  return false;
+}
+
 } // end namespace llvm
 
 template <class ParserTy>
@@ -3841,7 +3895,7 @@ bool LLParser::ParseDIBasicType(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(tag, DwarfTagField, (dwarf::DW_TAG_base_type));                     \
   OPTIONAL(name, MDStringField, );                                             \
   OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
-  OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX));                           \
+  OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));                           \
   OPTIONAL(encoding, DwarfAttEncodingField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
@@ -3864,7 +3918,7 @@ bool LLParser::ParseDIDerivedType(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(scope, MDField, );                                                  \
   REQUIRED(baseType, MDField, );                                               \
   OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
-  OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX));                           \
+  OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));                           \
   OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX));                          \
   OPTIONAL(flags, DIFlagField, );                                              \
   OPTIONAL(extraData, MDField, );
@@ -3887,7 +3941,7 @@ bool LLParser::ParseDICompositeType(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(scope, MDField, );                                                  \
   OPTIONAL(baseType, MDField, );                                               \
   OPTIONAL(size, MDUnsignedField, (0, UINT64_MAX));                            \
-  OPTIONAL(align, MDUnsignedField, (0, UINT64_MAX));                           \
+  OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));                           \
   OPTIONAL(offset, MDUnsignedField, (0, UINT64_MAX));                          \
   OPTIONAL(flags, DIFlagField, );                                              \
   OPTIONAL(elements, MDField, );                                               \
@@ -3933,15 +3987,20 @@ bool LLParser::ParseDISubroutineType(MDNode *&Result, bool IsDistinct) {
 }
 
 /// ParseDIFileType:
-///   ::= !DIFileType(filename: "path/to/file", directory: "/path/to/dir")
+///   ::= !DIFileType(filename: "path/to/file", directory: "/path/to/dir"
+///                   checksumkind: CSK_MD5,
+///                   checksum: "000102030405060708090a0b0c0d0e0f")
 bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(filename, MDStringField, );                                         \
-  REQUIRED(directory, MDStringField, );
+  REQUIRED(directory, MDStringField, );                                        \
+  OPTIONAL(checksumkind, ChecksumKindField, );                                 \
+  OPTIONAL(checksum, MDStringField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
-  Result = GET_OR_DISTINCT(DIFile, (Context, filename.Val, directory.Val));
+  Result = GET_OR_DISTINCT(DIFile, (Context, filename.Val, directory.Val,
+                                    checksumkind.Val, checksum.Val));
   return false;
 }
 
@@ -3969,14 +4028,16 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(globals, MDField, );                                                \
   OPTIONAL(imports, MDField, );                                                \
   OPTIONAL(macros, MDField, );                                                 \
-  OPTIONAL(dwoId, MDUnsignedField, );
+  OPTIONAL(dwoId, MDUnsignedField, );                                          \
+  OPTIONAL(splitDebugInlining, MDBoolField, = true);
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = DICompileUnit::getDistinct(
       Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val,
       runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val,
-      retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val);
+      retainedTypes.Val, globals.Val, imports.Val, macros.Val, dwoId.Val,
+      splitDebugInlining.Val);
   return false;
 }
 
@@ -4066,12 +4127,13 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
   REQUIRED(scope, MDField, );                                                  \
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(name, MDStringField, );                                             \
-  OPTIONAL(line, LineField, );
+  OPTIONAL(line, LineField, );                                                 \
+  OPTIONAL(exportSymbols, MDBoolField, );
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = GET_OR_DISTINCT(DINamespace,
-                           (Context, scope.Val, file.Val, name.Val, line.Val));
+  (Context, scope.Val, file.Val, name.Val, line.Val, exportSymbols.Val));
   return false;
 }
 
@@ -4080,7 +4142,7 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) {
 bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(type, DwarfMacinfoTypeField, );                                     \
-  REQUIRED(line, LineField, );                                                 \
+  OPTIONAL(line, LineField, );                                                 \
   REQUIRED(name, MDStringField, );                                             \
   OPTIONAL(value, MDStringField, );
   PARSE_MD_FIELDS();
@@ -4096,7 +4158,7 @@ bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) {
 bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   OPTIONAL(type, DwarfMacinfoTypeField, (dwarf::DW_MACINFO_start_file));       \
-  REQUIRED(line, LineField, );                                                 \
+  OPTIONAL(line, LineField, );                                                 \
   REQUIRED(file, MDField, );                                                   \
   OPTIONAL(nodes, MDField, );
   PARSE_MD_FIELDS();
@@ -4107,7 +4169,6 @@ bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) {
   return false;
 }
 
-
 /// ParseDIModule:
 ///   ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG",
 ///                 includePath: "/usr/include", isysroot: "/")
@@ -4160,8 +4221,7 @@ bool LLParser::ParseDITemplateValueParameter(MDNode *&Result, bool IsDistinct) {
 /// ParseDIGlobalVariable:
 ///   ::= !DIGlobalVariable(scope: !0, name: "foo", linkageName: "foo",
 ///                         file: !1, line: 7, type: !2, isLocal: false,
-///                         isDefinition: true, variable: i32* @foo,
-///                         declaration: !3)
+///                         isDefinition: true, declaration: !3, align: 8)
 bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(name, MDStringField, (/* AllowEmpty */ false));                     \
@@ -4172,23 +4232,25 @@ bool LLParser::ParseDIGlobalVariable(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(type, MDField, );                                                   \
   OPTIONAL(isLocal, MDBoolField, );                                            \
   OPTIONAL(isDefinition, MDBoolField, (true));                                 \
-  OPTIONAL(variable, MDConstant, );                                            \
-  OPTIONAL(declaration, MDField, );
+  OPTIONAL(declaration, MDField, );                                            \
+  OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = GET_OR_DISTINCT(DIGlobalVariable,
                            (Context, scope.Val, name.Val, linkageName.Val,
                             file.Val, line.Val, type.Val, isLocal.Val,
-                            isDefinition.Val, variable.Val, declaration.Val));
+                            isDefinition.Val, declaration.Val, align.Val));
   return false;
 }
 
 /// ParseDILocalVariable:
 ///   ::= !DILocalVariable(arg: 7, scope: !0, name: "foo",
-///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
+///                        file: !1, line: 7, type: !2, arg: 2, flags: 7,
+///                        align: 8)
 ///   ::= !DILocalVariable(scope: !0, name: "foo",
-///                        file: !1, line: 7, type: !2, arg: 2, flags: 7)
+///                        file: !1, line: 7, type: !2, arg: 2, flags: 7,
+///                        align: 8)
 bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
 #define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
   REQUIRED(scope, MDField, (/* AllowNull */ false));                           \
@@ -4197,13 +4259,14 @@ bool LLParser::ParseDILocalVariable(MDNode *&Result, bool IsDistinct) {
   OPTIONAL(file, MDField, );                                                   \
   OPTIONAL(line, LineField, );                                                 \
   OPTIONAL(type, MDField, );                                                   \
-  OPTIONAL(flags, DIFlagField, );
+  OPTIONAL(flags, DIFlagField, );                                              \
+  OPTIONAL(align, MDUnsignedField, (0, UINT32_MAX));
   PARSE_MD_FIELDS();
 #undef VISIT_MD_FIELDS
 
   Result = GET_OR_DISTINCT(DILocalVariable,
                            (Context, scope.Val, name.Val, file.Val, line.Val,
-                            type.Val, arg.Val, flags.Val));
+                            type.Val, arg.Val, flags.Val, align.Val));
   return false;
 }
 
@@ -4245,6 +4308,21 @@ bool LLParser::ParseDIExpression(MDNode *&Result, bool IsDistinct) {
   return false;
 }
 
+/// ParseDIGlobalVariableExpression:
+///   ::= !DIGlobalVariableExpression(var: !0, expr: !1)
+bool LLParser::ParseDIGlobalVariableExpression(MDNode *&Result,
+                                               bool IsDistinct) {
+#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED)                                    \
+  REQUIRED(var, MDField, );                                                    \
+  OPTIONAL(expr, MDField, );
+  PARSE_MD_FIELDS();
+#undef VISIT_MD_FIELDS
+
+  Result =
+      GET_OR_DISTINCT(DIGlobalVariableExpression, (Context, var.Val, expr.Val));
+  return false;
+}
+
 /// ParseDIObjCProperty:
 ///   ::= !DIObjCProperty(name: "foo", file: !1, line: 7, setter: "setFoo",
 ///                       getter: "getFoo", attributes: 7, type: !2)
@@ -4373,7 +4451,6 @@ bool LLParser::ParseMetadata(Metadata *&MD, PerFunctionState *PFS) {
   return false;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Function Parsing.
 //===----------------------------------------------------------------------===//
@@ -4419,13 +4496,13 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
 
     // The lexer has no type info, so builds all half, float, and double FP
     // constants as double.  Fix this here.  Long double does not need this.
-    if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble) {
+    if (&ID.APFloatVal.getSemantics() == &APFloat::IEEEdouble()) {
       bool Ignored;
       if (Ty->isHalfTy())
-        ID.APFloatVal.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven,
+        ID.APFloatVal.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven,
                               &Ignored);
       else if (Ty->isFloatTy())
-        ID.APFloatVal.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+        ID.APFloatVal.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
                               &Ignored);
     }
     V = ConstantFP::get(Context, ID.APFloatVal);
@@ -4540,7 +4617,6 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
   return false;
 }
 
-
 /// FunctionHeader
 ///   ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs
 ///       OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection
@@ -5121,7 +5197,6 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB,
   return false;
 }
 
-
 /// ParseBr
 ///   ::= 'br' TypeAndValue
 ///   ::= 'br' TypeAndValue ',' TypeAndValue ',' TypeAndValue
@@ -5236,7 +5311,6 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
   return false;
 }
 
-
 /// ParseInvoke
 ///   ::= 'invoke' OptionalCallingConv OptionalAttrs Type Value ParamList
 ///       OptionalAttrs 'to' TypeAndValue 'unwind' TypeAndValue
@@ -5584,7 +5658,6 @@ bool LLParser::ParseLogical(Instruction *&Inst, PerFunctionState &PFS,
   return false;
 }
 
-
 /// ParseCompare
 ///  ::= 'icmp' IPredicates TypeAndValue ',' Value
 ///  ::= 'fcmp' FPredicates TypeAndValue ',' Value
@@ -5749,7 +5822,8 @@ int LLParser::ParsePHI(Instruction *&Inst, PerFunctionState &PFS) {
 
   bool AteExtraComma = false;
   SmallVector<std::pair<Value*, BasicBlock*>, 16> PHIVals;
-  while (1) {
+
+  while (true) {
     PHIVals.push_back(std::make_pair(Op0, cast<BasicBlock>(Op1)));
 
     if (!EatIfPresent(lltok::comma))
@@ -6477,7 +6551,7 @@ bool LLParser::ParseUseListOrderBB() {
     return Error(Label.Loc, "invalid numeric label in uselistorder_bb");
   if (Label.Kind != ValID::t_LocalName)
     return Error(Label.Loc, "expected basic block name in uselistorder_bb");
-  Value *V = F->getValueSymbolTable().lookup(Label.StrVal);
+  Value *V = F->getValueSymbolTable()->lookup(Label.StrVal);
   if (!V)
     return Error(Label.Loc, "invalid basic block in uselistorder_bb");
   if (!isa<BasicBlock>(V))
diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h
index 479ff96bc8a3..16d4e8b5baa0 100644
--- a/contrib/llvm/lib/AsmParser/LLParser.h
+++ b/contrib/llvm/lib/AsmParser/LLParser.h
@@ -411,7 +411,8 @@ namespace llvm {
     bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
     bool ParseGlobalValue(Type *Ty, Constant *&V);
     bool ParseGlobalTypeAndValue(Constant *&V);
-    bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
+    bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts,
+                                Optional<unsigned> *InRangeOp = nullptr);
     bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
     bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS);
     bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h
index 37998e879503..048aeee90b35 100644
--- a/contrib/llvm/lib/AsmParser/LLToken.h
+++ b/contrib/llvm/lib/AsmParser/LLToken.h
@@ -103,6 +103,7 @@ enum Kind {
   kw_nsw,
   kw_exact,
   kw_inbounds,
+  kw_inrange,
   kw_align,
   kw_addrspace,
   kw_section,
@@ -127,6 +128,7 @@ enum Kind {
   kw_x86_fastcallcc,
   kw_x86_thiscallcc,
   kw_x86_vectorcallcc,
+  kw_x86_regcallcc,
   kw_arm_apcscc,
   kw_arm_aapcscc,
   kw_arm_aapcs_vfpcc,
@@ -351,6 +353,7 @@ enum Kind {
   DwarfOp,          // DW_OP_foo
   DIFlag,           // DIFlagFoo
   DwarfMacinfo,     // DW_MACINFO_foo
+  ChecksumKind,     // CSK_foo
 
   // Type valued tokens (TyVal).
   Type,
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
index 9ac3cb95d086..f64785b3ad92 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitReader.cpp
@@ -9,7 +9,7 @@
 
 #include "llvm-c/BitReader.h"
 #include "llvm-c/Core.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
@@ -34,13 +34,6 @@ LLVMBool LLVMParseBitcode2(LLVMMemoryBufferRef MemBuf,
   return LLVMParseBitcodeInContext2(LLVMGetGlobalContext(), MemBuf, OutModule);
 }
 
-static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
-  auto *Message = reinterpret_cast<std::string *>(C);
-  raw_string_ostream Stream(*Message);
-  DiagnosticPrinterRawOStream DP(Stream);
-  DI.print(DP);
-}
-
 LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
                                    LLVMMemoryBufferRef MemBuf,
                                    LLVMModuleRef *OutModule,
@@ -48,17 +41,12 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
   MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
   LLVMContext &Ctx = *unwrap(ContextRef);
 
-  LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
-      Ctx.getDiagnosticHandler();
-  void *OldDiagnosticContext = Ctx.getDiagnosticContext();
-  std::string Message;
-  Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
-
-  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
-
-  Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
-
-  if (ModuleOrErr.getError()) {
+  Expected<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
+  if (Error Err = ModuleOrErr.takeError()) {
+    std::string Message;
+    handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
+      Message = EIB.message();
+    });
     if (OutMessage)
       *OutMessage = strdup(Message.c_str());
     *OutModule = wrap((Module *)nullptr);
@@ -75,7 +63,8 @@ LLVMBool LLVMParseBitcodeInContext2(LLVMContextRef ContextRef,
   MemoryBufferRef Buf = unwrap(MemBuf)->getMemBufferRef();
   LLVMContext &Ctx = *unwrap(ContextRef);
 
-  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = parseBitcodeFile(Buf, Ctx);
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
+      expectedToErrorOrAndEmitErrors(Ctx, parseBitcodeFile(Buf, Ctx));
   if (ModuleOrErr.getError()) {
     *OutModule = wrap((Module *)nullptr);
     return 1;
@@ -92,23 +81,21 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
                                        LLVMMemoryBufferRef MemBuf,
                                        LLVMModuleRef *OutM, char **OutMessage) {
   LLVMContext &Ctx = *unwrap(ContextRef);
-  LLVMContext::DiagnosticHandlerTy OldDiagnosticHandler =
-      Ctx.getDiagnosticHandler();
-  void *OldDiagnosticContext = Ctx.getDiagnosticContext();
-
-  std::string Message;
-  Ctx.setDiagnosticHandler(diagnosticHandler, &Message, true);
   std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
-
-  ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
-      getLazyBitcodeModule(std::move(Owner), Ctx);
-  Owner.release();
-  Ctx.setDiagnosticHandler(OldDiagnosticHandler, OldDiagnosticContext, true);
-
-  if (ModuleOrErr.getError()) {
-    *OutM = wrap((Module *)nullptr);
+  Expected<std::unique_ptr<Module>> ModuleOrErr =
+      getOwningLazyBitcodeModule(std::move(Owner), Ctx);
+  // Release the buffer if we didn't take ownership of it since we never owned
+  // it anyway.
+  (void)Owner.release();
+
+  if (Error Err = ModuleOrErr.takeError()) {
+    std::string Message;
+    handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
+      Message = EIB.message();
+    });
     if (OutMessage)
       *OutMessage = strdup(Message.c_str());
+    *OutM = wrap((Module *)nullptr);
     return 1;
   }
 
@@ -123,8 +110,8 @@ LLVMBool LLVMGetBitcodeModuleInContext2(LLVMContextRef ContextRef,
   LLVMContext &Ctx = *unwrap(ContextRef);
   std::unique_ptr<MemoryBuffer> Owner(unwrap(MemBuf));
 
-  ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
-      getLazyBitcodeModule(std::move(Owner), Ctx);
+  ErrorOr<std::unique_ptr<Module>> ModuleOrErr = expectedToErrorOrAndEmitErrors(
+      Ctx, getOwningLazyBitcodeModule(std::move(Owner), Ctx));
   Owner.release();
 
   if (ModuleOrErr.getError()) {
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 73a30c61ecaa..03aefcf57118 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -7,38 +7,84 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "MetadataLoader.h"
+#include "ValueList.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
 #include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallingConv.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/IR/OperandTraits.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/IR/TrackingMDRef.h"
+#include "llvm/IR/Type.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/DataStream.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 #include <deque>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <tuple>
 #include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -48,159 +94,320 @@ static cl::opt<bool> PrintSummaryGUIDs(
         "Print the global id for each value when reading the module summary"));
 
 namespace {
+
 enum {
   SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex
 };
 
-class BitcodeReaderValueList {
-  std::vector<WeakVH> ValuePtrs;
-
-  /// As we resolve forward-referenced constants, we add information about them
-  /// to this vector.  This allows us to resolve them in bulk instead of
-  /// resolving each reference at a time.  See the code in
-  /// ResolveConstantForwardRefs for more information about this.
-  ///
-  /// The key of this vector is the placeholder constant, the value is the slot
-  /// number that holds the resolved value.
-  typedef std::vector<std::pair<Constant*, unsigned> > ResolveConstantsTy;
-  ResolveConstantsTy ResolveConstants;
-  LLVMContext &Context;
-public:
-  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
-  ~BitcodeReaderValueList() {
-    assert(ResolveConstants.empty() && "Constants not resolved?");
-  }
+Error error(const Twine &Message) {
+  return make_error<StringError>(
+      Message, make_error_code(BitcodeError::CorruptedBitcode));
+}
+
+/// Helper to read the header common to all bitcode files.
+bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
+  // Sniff for the signature.
+  if (!Stream.canSkipToPos(4) ||
+      Stream.Read(8) != 'B' ||
+      Stream.Read(8) != 'C' ||
+      Stream.Read(4) != 0x0 ||
+      Stream.Read(4) != 0xC ||
+      Stream.Read(4) != 0xE ||
+      Stream.Read(4) != 0xD)
+    return false;
+  return true;
+}
+
+Expected<BitstreamCursor> initStream(MemoryBufferRef Buffer) {
+  const unsigned char *BufPtr = (const unsigned char *)Buffer.getBufferStart();
+  const unsigned char *BufEnd = BufPtr + Buffer.getBufferSize();
+
+  if (Buffer.getBufferSize() & 3)
+    return error("Invalid bitcode signature");
+
+  // If we have a wrapper header, parse it and ignore the non-bc file contents.
+  // The magic number is 0x0B17C0DE stored in little endian.
+  if (isBitcodeWrapper(BufPtr, BufEnd))
+    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
+      return error("Invalid bitcode wrapper header");
+
+  BitstreamCursor Stream(ArrayRef<uint8_t>(BufPtr, BufEnd));
+  if (!hasValidBitcodeHeader(Stream))
+    return error("Invalid bitcode signature");
+
+  return std::move(Stream);
+}
 
-  // vector compatibility methods
-  unsigned size() const { return ValuePtrs.size(); }
-  void resize(unsigned N) { ValuePtrs.resize(N); }
-  void push_back(Value *V) { ValuePtrs.emplace_back(V); }
+/// Convert a string from a record into an std::string, return true on failure.
+template <typename StrTy>
+static bool convertToString(ArrayRef<uint64_t> Record, unsigned Idx,
+                            StrTy &Result) {
+  if (Idx > Record.size())
+    return true;
 
-  void clear() {
-    assert(ResolveConstants.empty() && "Constants not resolved?");
-    ValuePtrs.clear();
+  for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+    Result += (char)Record[i];
+  return false;
+}
+
+// Strip all the TBAA attachment for the module.
+void stripTBAA(Module *M) {
+  for (auto &F : *M) {
+    if (F.isMaterializable())
+      continue;
+    for (auto &I : instructions(F))
+      I.setMetadata(LLVMContext::MD_tbaa, nullptr);
   }
+}
+
+/// Read the "IDENTIFICATION_BLOCK_ID" block, do some basic enforcement on the
+/// "epoch" encoded in the bitcode, and return the producer name if any.
+Expected<std::string> readIdentificationBlock(BitstreamCursor &Stream) {
+  if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
+    return error("Invalid record");
 
-  Value *operator[](unsigned i) const {
-    assert(i < ValuePtrs.size());
-    return ValuePtrs[i];
+  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
+
+  std::string ProducerIdentification;
+
+  while (true) {
+    BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    default:
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return ProducerIdentification;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: // Default behavior: reject
+      return error("Invalid value");
+    case bitc::IDENTIFICATION_CODE_STRING: // IDENTIFICATION: [strchr x N]
+      convertToString(Record, 0, ProducerIdentification);
+      break;
+    case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH: [epoch#]
+      unsigned epoch = (unsigned)Record[0];
+      if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
+        return error(
+          Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
+          "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
+      }
+    }
+    }
   }
+}
+
+Expected<std::string> readIdentificationCode(BitstreamCursor &Stream) {
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (true) {
+    if (Stream.AtEndOfStream())
+      return "";
+
+    BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    case BitstreamEntry::EndBlock:
+    case BitstreamEntry::Error:
+      return error("Malformed block");
 
-  Value *back() const { return ValuePtrs.back(); }
-  void pop_back() { ValuePtrs.pop_back(); }
-  bool empty() const { return ValuePtrs.empty(); }
-  void shrinkTo(unsigned N) {
-    assert(N <= size() && "Invalid shrinkTo request!");
-    ValuePtrs.resize(N);
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID)
+        return readIdentificationBlock(Stream);
+
+      // Ignore other sub-blocks.
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
   }
+}
 
-  Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
-  Value *getValueFwdRef(unsigned Idx, Type *Ty);
+Expected<bool> hasObjCCategoryInModule(BitstreamCursor &Stream) {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return error("Invalid record");
 
-  void assignValue(Value *V, unsigned Idx);
+  SmallVector<uint64_t, 64> Record;
+  // Read all the records for this module.
 
-  /// Once all constants are read, this method bulk resolves any forward
-  /// references.
-  void resolveConstantForwardRefs();
-};
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
-class BitcodeReaderMetadataList {
-  unsigned NumFwdRefs;
-  bool AnyFwdRefs;
-  unsigned MinFwdRef;
-  unsigned MaxFwdRef;
-
-  /// Array of metadata references.
-  ///
-  /// Don't use std::vector here.  Some versions of libc++ copy (instead of
-  /// move) on resize, and TrackingMDRef is very expensive to copy.
-  SmallVector<TrackingMDRef, 1> MetadataPtrs;
-
-  /// Structures for resolving old type refs.
-  struct {
-    SmallDenseMap<MDString *, TempMDTuple, 1> Unknown;
-    SmallDenseMap<MDString *, DICompositeType *, 1> Final;
-    SmallDenseMap<MDString *, DICompositeType *, 1> FwdDecls;
-    SmallVector<std::pair<TrackingMDRef, TempMDTuple>, 1> Arrays;
-  } OldTypeRefs;
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return false;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
 
-  LLVMContext &Context;
-public:
-  BitcodeReaderMetadataList(LLVMContext &C)
-      : NumFwdRefs(0), AnyFwdRefs(false), Context(C) {}
-
-  // vector compatibility methods
-  unsigned size() const { return MetadataPtrs.size(); }
-  void resize(unsigned N) { MetadataPtrs.resize(N); }
-  void push_back(Metadata *MD) { MetadataPtrs.emplace_back(MD); }
-  void clear() { MetadataPtrs.clear(); }
-  Metadata *back() const { return MetadataPtrs.back(); }
-  void pop_back() { MetadataPtrs.pop_back(); }
-  bool empty() const { return MetadataPtrs.empty(); }
-
-  Metadata *operator[](unsigned i) const {
-    assert(i < MetadataPtrs.size());
-    return MetadataPtrs[i];
+    // Read a record.
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default:
+      break; // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
+      std::string S;
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
+      // Check for the i386 and other (x86_64, ARM) conventions
+      if (S.find("__DATA, __objc_catlist") != std::string::npos ||
+          S.find("__OBJC,__category") != std::string::npos)
+        return true;
+      break;
+    }
+    }
+    Record.clear();
   }
+  llvm_unreachable("Exit infinite loop");
+}
 
-  Metadata *lookup(unsigned I) const {
-    if (I < MetadataPtrs.size())
-      return MetadataPtrs[I];
-    return nullptr;
+Expected<bool> hasObjCCategory(BitstreamCursor &Stream) {
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (true) {
+    BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return false;
+
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::MODULE_BLOCK_ID)
+        return hasObjCCategoryInModule(Stream);
+
+      // Ignore other sub-blocks.
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
+
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
   }
+}
+
+Expected<std::string> readModuleTriple(BitstreamCursor &Stream) {
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  std::string Triple;
+
+  // Read all the records for this module.
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
-  void shrinkTo(unsigned N) {
-    assert(N <= size() && "Invalid shrinkTo request!");
-    assert(!AnyFwdRefs && "Unexpected forward refs");
-    MetadataPtrs.resize(N);
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return Triple;
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: break;  // Default behavior, ignore unknown content.
+    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
+      std::string S;
+      if (convertToString(Record, 0, S))
+        return error("Invalid record");
+      Triple = S;
+      break;
+    }
+    }
+    Record.clear();
   }
+  llvm_unreachable("Exit infinite loop");
+}
 
-  /// Return the given metadata, creating a replaceable forward reference if
-  /// necessary.
-  Metadata *getMetadataFwdRef(unsigned Idx);
+Expected<std::string> readTriple(BitstreamCursor &Stream) {
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (true) {
+    BitstreamEntry Entry = Stream.advance();
 
-  /// Return the the given metadata only if it is fully resolved.
-  ///
-  /// Gives the same result as \a lookup(), unless \a MDNode::isResolved()
-  /// would give \c false.
-  Metadata *getMetadataIfResolved(unsigned Idx);
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return "";
 
-  MDNode *getMDNodeFwdRefOrNull(unsigned Idx);
-  void assignValue(Metadata *MD, unsigned Idx);
-  void tryToResolveCycles();
-  bool hasFwdRefs() const { return AnyFwdRefs; }
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::MODULE_BLOCK_ID)
+        return readModuleTriple(Stream);
 
-  /// Upgrade a type that had an MDString reference.
-  void addTypeRef(MDString &UUID, DICompositeType &CT);
+      // Ignore other sub-blocks.
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
 
-  /// Upgrade a type that had an MDString reference.
-  Metadata *upgradeTypeRef(Metadata *MaybeUUID);
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
+  }
+}
 
-  /// Upgrade a type ref array that may have MDString references.
-  Metadata *upgradeTypeRefArray(Metadata *MaybeTuple);
+class BitcodeReaderBase {
+protected:
+  BitcodeReaderBase(BitstreamCursor Stream) : Stream(std::move(Stream)) {
+    this->Stream.setBlockInfo(&BlockInfo);
+  }
 
-private:
-  Metadata *resolveTypeRefArray(Metadata *MaybeTuple);
+  BitstreamBlockInfo BlockInfo;
+  BitstreamCursor Stream;
+
+  bool readBlockInfo();
+
+  // Contains an arbitrary and optional string identifying the bitcode producer
+  std::string ProducerIdentification;
+
+  Error error(const Twine &Message);
 };
 
-class BitcodeReader : public GVMaterializer {
+Error BitcodeReaderBase::error(const Twine &Message) {
+  std::string FullMsg = Message.str();
+  if (!ProducerIdentification.empty())
+    FullMsg += " (Producer: '" + ProducerIdentification + "' Reader: 'LLVM " +
+               LLVM_VERSION_STRING "')";
+  return ::error(FullMsg);
+}
+
+class BitcodeReader : public BitcodeReaderBase, public GVMaterializer {
   LLVMContext &Context;
   Module *TheModule = nullptr;
-  std::unique_ptr<MemoryBuffer> Buffer;
-  std::unique_ptr<BitstreamReader> StreamFile;
-  BitstreamCursor Stream;
   // Next offset to start scanning for lazy parsing of function bodies.
   uint64_t NextUnreadBit = 0;
   // Last function offset found in the VST.
   uint64_t LastFunctionBlockBit = 0;
   bool SeenValueSymbolTable = false;
   uint64_t VSTOffset = 0;
-  // Contains an arbitrary and optional string identifying the bitcode producer
-  std::string ProducerIdentification;
 
   std::vector<Type*> TypeList;
   BitcodeReaderValueList ValueList;
-  BitcodeReaderMetadataList MetadataList;
+  Optional<MetadataLoader> MDLoader;
   std::vector<Comdat *> ComdatList;
   SmallVector<Instruction *, 64> InstructionList;
 
@@ -210,10 +417,6 @@ class BitcodeReader : public GVMaterializer {
   std::vector<std::pair<Function*, unsigned> > FunctionPrologues;
   std::vector<std::pair<Function*, unsigned> > FunctionPersonalityFns;
 
-  SmallVector<Instruction*, 64> InstsWithTBAATag;
-
-  bool HasSeenOldLoopTags = false;
-
   /// The set of attributes by index.  Index zero in the file is for null, and
   /// is thus not represented here.  As such all indices are off by one.
   std::vector<AttributeSet> MAttributes;
@@ -236,9 +439,6 @@ class BitcodeReader : public GVMaterializer {
   // Intrinsics which were remangled because of types rename
   UpdatedIntrinsicMap RemangledIntrinsics;
 
-  // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
-  DenseMap<unsigned, unsigned> MDKindMap;
-
   // Several operations happen after the module header has been read, but
   // before function bodies are processed. This keeps track of whether
   // we've done this yet.
@@ -271,82 +471,55 @@ class BitcodeReader : public GVMaterializer {
   /// (e.g.) blockaddress forward references.
   bool WillMaterializeAllForwardRefs = false;
 
-  /// True if any Metadata block has been materialized.
-  bool IsMetadataMaterialized = false;
-
   bool StripDebugInfo = false;
-
-  /// Functions that need to be matched with subprograms when upgrading old
-  /// metadata.
-  SmallDenseMap<Function *, DISubprogram *, 16> FunctionsWithSPs;
+  TBAAVerifier TBAAVerifyHelper;
 
   std::vector<std::string> BundleTags;
 
 public:
-  std::error_code error(BitcodeError E, const Twine &Message);
-  std::error_code error(const Twine &Message);
-
-  BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context);
-  BitcodeReader(LLVMContext &Context);
-  ~BitcodeReader() override { freeState(); }
-
-  std::error_code materializeForwardReferencedFunctions();
-
-  void freeState();
+  BitcodeReader(BitstreamCursor Stream, StringRef ProducerIdentification,
+                LLVMContext &Context);
 
-  void releaseBuffer();
+  Error materializeForwardReferencedFunctions();
 
-  std::error_code materialize(GlobalValue *GV) override;
-  std::error_code materializeModule() override;
+  Error materialize(GlobalValue *GV) override;
+  Error materializeModule() override;
   std::vector<StructType *> getIdentifiedStructTypes() const override;
 
   /// \brief Main interface to parsing a bitcode buffer.
   /// \returns true if an error occurred.
-  std::error_code parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
-                                   Module *M,
-                                   bool ShouldLazyLoadMetadata = false);
-
-  /// \brief Cheap mechanism to just extract module triple
-  /// \returns true if an error occurred.
-  ErrorOr<std::string> parseTriple();
-
-  /// Cheap mechanism to just extract the identification block out of bitcode.
-  ErrorOr<std::string> parseIdentificationBlock();
-
-  /// Peak at the module content and return true if any ObjC category or class
-  /// is found.
-  ErrorOr<bool> hasObjCCategory();
+  Error parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata = false,
+                         bool IsImporting = false);
 
   static uint64_t decodeSignRotatedValue(uint64_t V);
 
   /// Materialize any deferred Metadata block.
-  std::error_code materializeMetadata() override;
+  Error materializeMetadata() override;
 
   void setStripDebugInfo() override;
 
 private:
-  /// Parse the "IDENTIFICATION_BLOCK_ID" block, populate the
-  // ProducerIdentification data member, and do some basic enforcement on the
-  // "epoch" encoded in the bitcode.
-  std::error_code parseBitcodeVersion();
-
   std::vector<StructType *> IdentifiedStructTypes;
   StructType *createIdentifiedStructType(LLVMContext &Context, StringRef Name);
   StructType *createIdentifiedStructType(LLVMContext &Context);
 
   Type *getTypeByID(unsigned ID);
+
   Value *getFnValueByID(unsigned ID, Type *Ty) {
     if (Ty && Ty->isMetadataTy())
       return MetadataAsValue::get(Ty->getContext(), getFnMetadataByID(ID));
     return ValueList.getValueFwdRef(ID, Ty);
   }
+
   Metadata *getFnMetadataByID(unsigned ID) {
-    return MetadataList.getMetadataFwdRef(ID);
+    return MDLoader->getMetadataFwdRef(ID);
   }
+
   BasicBlock *getBasicBlock(unsigned ID) const {
     if (ID >= FunctionBBs.size()) return nullptr; // Invalid ID
     return FunctionBBs[ID];
   }
+
   AttributeSet getAttributes(unsigned i) const {
     if (i-1 < MAttributes.size())
       return MAttributes[i-1];
@@ -422,68 +595,41 @@ private:
   /// Converts alignment exponent (i.e. power of two (or zero)) to the
   /// corresponding alignment to use. If alignment is too large, returns
   /// a corresponding error code.
-  std::error_code parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
-  std::error_code parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
-  std::error_code parseModule(uint64_t ResumeBit,
-                              bool ShouldLazyLoadMetadata = false);
-  std::error_code parseAttributeBlock();
-  std::error_code parseAttributeGroupBlock();
-  std::error_code parseTypeTable();
-  std::error_code parseTypeTableBody();
-  std::error_code parseOperandBundleTags();
-
-  ErrorOr<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
-                               unsigned NameIndex, Triple &TT);
-  std::error_code parseValueSymbolTable(uint64_t Offset = 0);
-  std::error_code parseConstants();
-  std::error_code rememberAndSkipFunctionBodies();
-  std::error_code rememberAndSkipFunctionBody();
+  Error parseAlignmentValue(uint64_t Exponent, unsigned &Alignment);
+  Error parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind);
+  Error parseModule(uint64_t ResumeBit, bool ShouldLazyLoadMetadata = false);
+  Error parseAttributeBlock();
+  Error parseAttributeGroupBlock();
+  Error parseTypeTable();
+  Error parseTypeTableBody();
+  Error parseOperandBundleTags();
+
+  Expected<Value *> recordValue(SmallVectorImpl<uint64_t> &Record,
+                                unsigned NameIndex, Triple &TT);
+  Error parseValueSymbolTable(uint64_t Offset = 0);
+  Error parseConstants();
+  Error rememberAndSkipFunctionBodies();
+  Error rememberAndSkipFunctionBody();
   /// Save the positions of the Metadata blocks and skip parsing the blocks.
-  std::error_code rememberAndSkipMetadata();
-  std::error_code parseFunctionBody(Function *F);
-  std::error_code globalCleanup();
-  std::error_code resolveGlobalAndIndirectSymbolInits();
-  std::error_code parseMetadata(bool ModuleLevel = false);
-  std::error_code parseMetadataStrings(ArrayRef<uint64_t> Record,
-                                       StringRef Blob,
-                                       unsigned &NextMetadataNo);
-  std::error_code parseMetadataKinds();
-  std::error_code parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
-  std::error_code
-  parseGlobalObjectAttachment(GlobalObject &GO,
-                              ArrayRef<uint64_t> Record);
-  std::error_code parseMetadataAttachment(Function &F);
-  ErrorOr<std::string> parseModuleTriple();
-  ErrorOr<bool> hasObjCCategoryInModule();
-  std::error_code parseUseLists();
-  std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
-  std::error_code initStreamFromBuffer();
-  std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
-  std::error_code findFunctionInStream(
+  Error rememberAndSkipMetadata();
+  Error typeCheckLoadStoreInst(Type *ValType, Type *PtrType);
+  Error parseFunctionBody(Function *F);
+  Error globalCleanup();
+  Error resolveGlobalAndIndirectSymbolInits();
+  Error parseUseLists();
+  Error findFunctionInStream(
       Function *F,
       DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator);
 };
 
 /// Class to manage reading and parsing function summary index bitcode
 /// files/sections.
-class ModuleSummaryIndexBitcodeReader {
-  DiagnosticHandlerFunction DiagnosticHandler;
-
-  /// Eventually points to the module index built during parsing.
-  ModuleSummaryIndex *TheIndex = nullptr;
-
-  std::unique_ptr<MemoryBuffer> Buffer;
-  std::unique_ptr<BitstreamReader> StreamFile;
-  BitstreamCursor Stream;
-
-  /// Used to indicate whether caller only wants to check for the presence
-  /// of the global value summary bitcode section. All blocks are skipped,
-  /// but the SeenGlobalValSummary boolean is set.
-  bool CheckGlobalValSummaryPresenceOnly = false;
+class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
+  /// The module index built during parsing.
+  ModuleSummaryIndex &TheIndex;
 
   /// Indicates whether we have encountered a global value summary section
-  /// yet during parsing, used when checking if file contains global value
-  /// summary section.
+  /// yet during parsing.
   bool SeenGlobalValSummary = false;
 
   /// Indicates whether we have already parsed the VST, used for error checking.
@@ -513,95 +659,52 @@ class ModuleSummaryIndexBitcodeReader {
   std::string SourceFileName;
 
 public:
-  std::error_code error(const Twine &Message);
-
   ModuleSummaryIndexBitcodeReader(
-      MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler,
-      bool CheckGlobalValSummaryPresenceOnly = false);
-  ~ModuleSummaryIndexBitcodeReader() { freeState(); }
-
-  void freeState();
+      BitstreamCursor Stream, ModuleSummaryIndex &TheIndex);
 
-  void releaseBuffer();
-
-  /// Check if the parser has encountered a summary section.
-  bool foundGlobalValSummary() { return SeenGlobalValSummary; }
-
-  /// \brief Main interface to parsing a bitcode buffer.
-  /// \returns true if an error occurred.
-  std::error_code parseSummaryIndexInto(std::unique_ptr<DataStreamer> Streamer,
-                                        ModuleSummaryIndex *I);
+  Error parseModule(StringRef ModulePath);
 
 private:
-  std::error_code parseModule();
-  std::error_code parseValueSymbolTable(
+  Error parseValueSymbolTable(
       uint64_t Offset,
       DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap);
-  std::error_code parseEntireSummary();
-  std::error_code parseModuleStringTable();
-  std::error_code initStream(std::unique_ptr<DataStreamer> Streamer);
-  std::error_code initStreamFromBuffer();
-  std::error_code initLazyStream(std::unique_ptr<DataStreamer> Streamer);
+  std::vector<ValueInfo> makeRefList(ArrayRef<uint64_t> Record);
+  std::vector<FunctionSummary::EdgeTy> makeCallList(ArrayRef<uint64_t> Record,
+                                                    bool IsOldProfileFormat,
+                                                    bool HasProfile);
+  Error parseEntireSummary(StringRef ModulePath);
+  Error parseModuleStringTable();
+
   std::pair<GlobalValue::GUID, GlobalValue::GUID>
   getGUIDFromValueId(unsigned ValueId);
 };
-} // end anonymous namespace
 
-BitcodeDiagnosticInfo::BitcodeDiagnosticInfo(std::error_code EC,
-                                             DiagnosticSeverity Severity,
-                                             const Twine &Msg)
-    : DiagnosticInfo(DK_Bitcode, Severity), Msg(Msg), EC(EC) {}
-
-void BitcodeDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; }
-
-static std::error_code error(const DiagnosticHandlerFunction &DiagnosticHandler,
-                             std::error_code EC, const Twine &Message) {
-  BitcodeDiagnosticInfo DI(EC, DS_Error, Message);
-  DiagnosticHandler(DI);
-  return EC;
-}
-
-static std::error_code error(LLVMContext &Context, std::error_code EC,
-                             const Twine &Message) {
-  return error([&](const DiagnosticInfo &DI) { Context.diagnose(DI); }, EC,
-               Message);
-}
-
-static std::error_code error(LLVMContext &Context, const Twine &Message) {
-  return error(Context, make_error_code(BitcodeError::CorruptedBitcode),
-               Message);
-}
+} // end anonymous namespace
 
-std::error_code BitcodeReader::error(BitcodeError E, const Twine &Message) {
-  if (!ProducerIdentification.empty()) {
-    return ::error(Context, make_error_code(E),
-                   Message + " (Producer: '" + ProducerIdentification +
-                       "' Reader: 'LLVM " + LLVM_VERSION_STRING "')");
+std::error_code llvm::errorToErrorCodeAndEmitErrors(LLVMContext &Ctx,
+                                                    Error Err) {
+  if (Err) {
+    std::error_code EC;
+    handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
+      EC = EIB.convertToErrorCode();
+      Ctx.emitError(EIB.message());
+    });
+    return EC;
   }
-  return ::error(Context, make_error_code(E), Message);
+  return std::error_code();
 }
 
-std::error_code BitcodeReader::error(const Twine &Message) {
-  if (!ProducerIdentification.empty()) {
-    return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode),
-                   Message + " (Producer: '" + ProducerIdentification +
-                       "' Reader: 'LLVM " + LLVM_VERSION_STRING "')");
-  }
-  return ::error(Context, make_error_code(BitcodeError::CorruptedBitcode),
-                 Message);
+BitcodeReader::BitcodeReader(BitstreamCursor Stream,
+                             StringRef ProducerIdentification,
+                             LLVMContext &Context)
+    : BitcodeReaderBase(std::move(Stream)), Context(Context),
+      ValueList(Context) {
+  this->ProducerIdentification = ProducerIdentification;
 }
 
-BitcodeReader::BitcodeReader(MemoryBuffer *Buffer, LLVMContext &Context)
-    : Context(Context), Buffer(Buffer), ValueList(Context),
-      MetadataList(Context) {}
-
-BitcodeReader::BitcodeReader(LLVMContext &Context)
-    : Context(Context), Buffer(nullptr), ValueList(Context),
-      MetadataList(Context) {}
-
-std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
+Error BitcodeReader::materializeForwardReferencedFunctions() {
   if (WillMaterializeAllForwardRefs)
-    return std::error_code();
+    return Error::success();
 
   // Prevent recursion.
   WillMaterializeAllForwardRefs = true;
@@ -622,50 +725,20 @@ std::error_code BitcodeReader::materializeForwardReferencedFunctions() {
       return error("Never resolved function from blockaddress");
 
     // Try to materialize F.
-    if (std::error_code EC = materialize(F))
-      return EC;
+    if (Error Err = materialize(F))
+      return Err;
   }
   assert(BasicBlockFwdRefs.empty() && "Function missing from queue");
 
   // Reset state.
   WillMaterializeAllForwardRefs = false;
-  return std::error_code();
-}
-
-void BitcodeReader::freeState() {
-  Buffer = nullptr;
-  std::vector<Type*>().swap(TypeList);
-  ValueList.clear();
-  MetadataList.clear();
-  std::vector<Comdat *>().swap(ComdatList);
-
-  std::vector<AttributeSet>().swap(MAttributes);
-  std::vector<BasicBlock*>().swap(FunctionBBs);
-  std::vector<Function*>().swap(FunctionsWithBodies);
-  DeferredFunctionInfo.clear();
-  DeferredMetadataInfo.clear();
-  MDKindMap.clear();
-
-  assert(BasicBlockFwdRefs.empty() && "Unresolved blockaddress fwd references");
-  BasicBlockFwdRefQueue.clear();
+  return Error::success();
 }
 
 //===----------------------------------------------------------------------===//
 //  Helper functions to implement forward reference resolution, etc.
 //===----------------------------------------------------------------------===//
 
-/// Convert a string from a record into an std::string, return true on failure.
-template <typename StrTy>
-static bool convertToString(ArrayRef<uint64_t> Record, unsigned Idx,
-                            StrTy &Result) {
-  if (Idx > Record.size())
-    return true;
-
-  for (unsigned i = Idx, e = Record.size(); i != e; ++i)
-    Result += (char)Record[i];
-  return false;
-}
-
 static bool hasImplicitComdat(size_t Val) {
   switch (Val) {
   default:
@@ -720,7 +793,7 @@ static GlobalValue::LinkageTypes getDecodedLinkage(unsigned Val) {
   }
 }
 
-// Decode the flags for GlobalValue in the summary
+/// Decode the flags for GlobalValue in the summary.
 static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
                                                             uint64_t Version) {
   // Summary were not emitted before LLVM 3.9, we don't need to upgrade Linkage
@@ -728,8 +801,12 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
   // to getDecodedLinkage() will need to be taken into account here as above.
   auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
   RawFlags = RawFlags >> 4;
-  auto HasSection = RawFlags & 0x1; // bool
-  return GlobalValueSummary::GVFlags(Linkage, HasSection);
+  bool NoRename = RawFlags & 0x1;
+  bool IsNotViableToInline = RawFlags & 0x2;
+  bool HasInlineAsmMaybeReferencingInternal = RawFlags & 0x4;
+  return GlobalValueSummary::GVFlags(Linkage, NoRename,
+                                     HasInlineAsmMaybeReferencingInternal,
+                                     IsNotViableToInline);
 }
 
 static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
@@ -897,364 +974,13 @@ static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
   return FMF;
 }
 
-static void upgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) {
+static void upgradeDLLImportExportLinkage(GlobalValue *GV, unsigned Val) {
   switch (Val) {
   case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
   case 6: GV->setDLLStorageClass(GlobalValue::DLLExportStorageClass); break;
   }
 }
 
-namespace llvm {
-namespace {
-/// \brief A class for maintaining the slot number definition
-/// as a placeholder for the actual definition for forward constants defs.
-class ConstantPlaceHolder : public ConstantExpr {
-  void operator=(const ConstantPlaceHolder &) = delete;
-
-public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) { return User::operator new(s, 1); }
-  explicit ConstantPlaceHolder(Type *Ty, LLVMContext &Context)
-      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
-    Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
-  }
-
-  /// \brief Methods to support type inquiry through isa, cast, and dyn_cast.
-  static bool classof(const Value *V) {
-    return isa<ConstantExpr>(V) &&
-           cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
-  }
-
-  /// Provide fast operand accessors
-  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
-};
-} // end anonymous namespace
-
-// FIXME: can we inherit this from ConstantExpr?
-template <>
-struct OperandTraits<ConstantPlaceHolder> :
-  public FixedNumOperandTraits<ConstantPlaceHolder, 1> {
-};
-DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
-} // end namespace llvm
-
-void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
-  if (Idx == size()) {
-    push_back(V);
-    return;
-  }
-
-  if (Idx >= size())
-    resize(Idx+1);
-
-  WeakVH &OldV = ValuePtrs[Idx];
-  if (!OldV) {
-    OldV = V;
-    return;
-  }
-
-  // Handle constants and non-constants (e.g. instrs) differently for
-  // efficiency.
-  if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
-    ResolveConstants.push_back(std::make_pair(PHC, Idx));
-    OldV = V;
-  } else {
-    // If there was a forward reference to this value, replace it.
-    Value *PrevVal = OldV;
-    OldV->replaceAllUsesWith(V);
-    delete PrevVal;
-  }
-}
-
-Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx,
-                                                    Type *Ty) {
-  if (Idx >= size())
-    resize(Idx + 1);
-
-  if (Value *V = ValuePtrs[Idx]) {
-    if (Ty != V->getType())
-      report_fatal_error("Type mismatch in constant table!");
-    return cast<Constant>(V);
-  }
-
-  // Create and return a placeholder, which will later be RAUW'd.
-  Constant *C = new ConstantPlaceHolder(Ty, Context);
-  ValuePtrs[Idx] = C;
-  return C;
-}
-
-Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
-  // Bail out for a clearly invalid value. This would make us call resize(0)
-  if (Idx == UINT_MAX)
-    return nullptr;
-
-  if (Idx >= size())
-    resize(Idx + 1);
-
-  if (Value *V = ValuePtrs[Idx]) {
-    // If the types don't match, it's invalid.
-    if (Ty && Ty != V->getType())
-      return nullptr;
-    return V;
-  }
-
-  // No type specified, must be invalid reference.
-  if (!Ty) return nullptr;
-
-  // Create and return a placeholder, which will later be RAUW'd.
-  Value *V = new Argument(Ty);
-  ValuePtrs[Idx] = V;
-  return V;
-}
-
-/// Once all constants are read, this method bulk resolves any forward
-/// references.  The idea behind this is that we sometimes get constants (such
-/// as large arrays) which reference *many* forward ref constants.  Replacing
-/// each of these causes a lot of thrashing when building/reuniquing the
-/// constant.  Instead of doing this, we look at all the uses and rewrite all
-/// the place holders at once for any constant that uses a placeholder.
-void BitcodeReaderValueList::resolveConstantForwardRefs() {
-  // Sort the values by-pointer so that they are efficient to look up with a
-  // binary search.
-  std::sort(ResolveConstants.begin(), ResolveConstants.end());
-
-  SmallVector<Constant*, 64> NewOps;
-
-  while (!ResolveConstants.empty()) {
-    Value *RealVal = operator[](ResolveConstants.back().second);
-    Constant *Placeholder = ResolveConstants.back().first;
-    ResolveConstants.pop_back();
-
-    // Loop over all users of the placeholder, updating them to reference the
-    // new value.  If they reference more than one placeholder, update them all
-    // at once.
-    while (!Placeholder->use_empty()) {
-      auto UI = Placeholder->user_begin();
-      User *U = *UI;
-
-      // If the using object isn't uniqued, just update the operands.  This
-      // handles instructions and initializers for global variables.
-      if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
-        UI.getUse().set(RealVal);
-        continue;
-      }
-
-      // Otherwise, we have a constant that uses the placeholder.  Replace that
-      // constant with a new constant that has *all* placeholder uses updated.
-      Constant *UserC = cast<Constant>(U);
-      for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end();
-           I != E; ++I) {
-        Value *NewOp;
-        if (!isa<ConstantPlaceHolder>(*I)) {
-          // Not a placeholder reference.
-          NewOp = *I;
-        } else if (*I == Placeholder) {
-          // Common case is that it just references this one placeholder.
-          NewOp = RealVal;
-        } else {
-          // Otherwise, look up the placeholder in ResolveConstants.
-          ResolveConstantsTy::iterator It =
-            std::lower_bound(ResolveConstants.begin(), ResolveConstants.end(),
-                             std::pair<Constant*, unsigned>(cast<Constant>(*I),
-                                                            0));
-          assert(It != ResolveConstants.end() && It->first == *I);
-          NewOp = operator[](It->second);
-        }
-
-        NewOps.push_back(cast<Constant>(NewOp));
-      }
-
-      // Make the new constant.
-      Constant *NewC;
-      if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
-        NewC = ConstantArray::get(UserCA->getType(), NewOps);
-      } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
-        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
-      } else if (isa<ConstantVector>(UserC)) {
-        NewC = ConstantVector::get(NewOps);
-      } else {
-        assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
-        NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
-      }
-
-      UserC->replaceAllUsesWith(NewC);
-      UserC->destroyConstant();
-      NewOps.clear();
-    }
-
-    // Update all ValueHandles, they should be the only users at this point.
-    Placeholder->replaceAllUsesWith(RealVal);
-    delete Placeholder;
-  }
-}
-
-void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
-  if (Idx == size()) {
-    push_back(MD);
-    return;
-  }
-
-  if (Idx >= size())
-    resize(Idx+1);
-
-  TrackingMDRef &OldMD = MetadataPtrs[Idx];
-  if (!OldMD) {
-    OldMD.reset(MD);
-    return;
-  }
-
-  // If there was a forward reference to this value, replace it.
-  TempMDTuple PrevMD(cast<MDTuple>(OldMD.get()));
-  PrevMD->replaceAllUsesWith(MD);
-  --NumFwdRefs;
-}
-
-Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
-  if (Idx >= size())
-    resize(Idx + 1);
-
-  if (Metadata *MD = MetadataPtrs[Idx])
-    return MD;
-
-  // Track forward refs to be resolved later.
-  if (AnyFwdRefs) {
-    MinFwdRef = std::min(MinFwdRef, Idx);
-    MaxFwdRef = std::max(MaxFwdRef, Idx);
-  } else {
-    AnyFwdRefs = true;
-    MinFwdRef = MaxFwdRef = Idx;
-  }
-  ++NumFwdRefs;
-
-  // Create and return a placeholder, which will later be RAUW'd.
-  Metadata *MD = MDNode::getTemporary(Context, None).release();
-  MetadataPtrs[Idx].reset(MD);
-  return MD;
-}
-
-Metadata *BitcodeReaderMetadataList::getMetadataIfResolved(unsigned Idx) {
-  Metadata *MD = lookup(Idx);
-  if (auto *N = dyn_cast_or_null<MDNode>(MD))
-    if (!N->isResolved())
-      return nullptr;
-  return MD;
-}
-
-MDNode *BitcodeReaderMetadataList::getMDNodeFwdRefOrNull(unsigned Idx) {
-  return dyn_cast_or_null<MDNode>(getMetadataFwdRef(Idx));
-}
-
-void BitcodeReaderMetadataList::tryToResolveCycles() {
-  if (NumFwdRefs)
-    // Still forward references... can't resolve cycles.
-    return;
-
-  bool DidReplaceTypeRefs = false;
-
-  // Give up on finding a full definition for any forward decls that remain.
-  for (const auto &Ref : OldTypeRefs.FwdDecls)
-    OldTypeRefs.Final.insert(Ref);
-  OldTypeRefs.FwdDecls.clear();
-
-  // Upgrade from old type ref arrays.  In strange cases, this could add to
-  // OldTypeRefs.Unknown.
-  for (const auto &Array : OldTypeRefs.Arrays) {
-    DidReplaceTypeRefs = true;
-    Array.second->replaceAllUsesWith(resolveTypeRefArray(Array.first.get()));
-  }
-  OldTypeRefs.Arrays.clear();
-
-  // Replace old string-based type refs with the resolved node, if possible.
-  // If we haven't seen the node, leave it to the verifier to complain about
-  // the invalid string reference.
-  for (const auto &Ref : OldTypeRefs.Unknown) {
-    DidReplaceTypeRefs = true;
-    if (DICompositeType *CT = OldTypeRefs.Final.lookup(Ref.first))
-      Ref.second->replaceAllUsesWith(CT);
-    else
-      Ref.second->replaceAllUsesWith(Ref.first);
-  }
-  OldTypeRefs.Unknown.clear();
-
-  // Make sure all the upgraded types are resolved.
-  if (DidReplaceTypeRefs) {
-    AnyFwdRefs = true;
-    MinFwdRef = 0;
-    MaxFwdRef = MetadataPtrs.size() - 1;
-  }
-
-  if (!AnyFwdRefs)
-    // Nothing to do.
-    return;
-
-  // Resolve any cycles.
-  for (unsigned I = MinFwdRef, E = MaxFwdRef + 1; I != E; ++I) {
-    auto &MD = MetadataPtrs[I];
-    auto *N = dyn_cast_or_null<MDNode>(MD);
-    if (!N)
-      continue;
-
-    assert(!N->isTemporary() && "Unexpected forward reference");
-    N->resolveCycles();
-  }
-
-  // Make sure we return early again until there's another forward ref.
-  AnyFwdRefs = false;
-}
-
-void BitcodeReaderMetadataList::addTypeRef(MDString &UUID,
-                                           DICompositeType &CT) {
-  assert(CT.getRawIdentifier() == &UUID && "Mismatched UUID");
-  if (CT.isForwardDecl())
-    OldTypeRefs.FwdDecls.insert(std::make_pair(&UUID, &CT));
-  else
-    OldTypeRefs.Final.insert(std::make_pair(&UUID, &CT));
-}
-
-Metadata *BitcodeReaderMetadataList::upgradeTypeRef(Metadata *MaybeUUID) {
-  auto *UUID = dyn_cast_or_null<MDString>(MaybeUUID);
-  if (LLVM_LIKELY(!UUID))
-    return MaybeUUID;
-
-  if (auto *CT = OldTypeRefs.Final.lookup(UUID))
-    return CT;
-
-  auto &Ref = OldTypeRefs.Unknown[UUID];
-  if (!Ref)
-    Ref = MDNode::getTemporary(Context, None);
-  return Ref.get();
-}
-
-Metadata *BitcodeReaderMetadataList::upgradeTypeRefArray(Metadata *MaybeTuple) {
-  auto *Tuple = dyn_cast_or_null<MDTuple>(MaybeTuple);
-  if (!Tuple || Tuple->isDistinct())
-    return MaybeTuple;
-
-  // Look through the array immediately if possible.
-  if (!Tuple->isTemporary())
-    return resolveTypeRefArray(Tuple);
-
-  // Create and return a placeholder to use for now.  Eventually
-  // resolveTypeRefArrays() will be resolve this forward reference.
-  OldTypeRefs.Arrays.emplace_back(
-      std::piecewise_construct, std::forward_as_tuple(Tuple),
-      std::forward_as_tuple(MDTuple::getTemporary(Context, None)));
-  return OldTypeRefs.Arrays.back().second.get();
-}
-
-Metadata *BitcodeReaderMetadataList::resolveTypeRefArray(Metadata *MaybeTuple) {
-  auto *Tuple = dyn_cast_or_null<MDTuple>(MaybeTuple);
-  if (!Tuple || Tuple->isDistinct())
-    return MaybeTuple;
-
-  // Look through the DITypeRefArray, upgrading each DITypeRef.
-  SmallVector<Metadata *, 32> Ops;
-  Ops.reserve(Tuple->getNumOperands());
-  for (Metadata *MD : Tuple->operands())
-    Ops.push_back(upgradeTypeRef(MD));
-
-  return MDTuple::get(Context, Ops);
-}
 
 Type *BitcodeReader::getTypeByID(unsigned ID) {
   // The type table size is always specified correctly.
@@ -1286,6 +1012,97 @@ StructType *BitcodeReader::createIdentifiedStructType(LLVMContext &Context) {
 //  Functions for parsing blocks from the bitcode file
 //===----------------------------------------------------------------------===//
 
+static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
+  switch (Val) {
+  case Attribute::EndAttrKinds:
+    llvm_unreachable("Synthetic enumerators which should never get here");
+
+  case Attribute::None:            return 0;
+  case Attribute::ZExt:            return 1 << 0;
+  case Attribute::SExt:            return 1 << 1;
+  case Attribute::NoReturn:        return 1 << 2;
+  case Attribute::InReg:           return 1 << 3;
+  case Attribute::StructRet:       return 1 << 4;
+  case Attribute::NoUnwind:        return 1 << 5;
+  case Attribute::NoAlias:         return 1 << 6;
+  case Attribute::ByVal:           return 1 << 7;
+  case Attribute::Nest:            return 1 << 8;
+  case Attribute::ReadNone:        return 1 << 9;
+  case Attribute::ReadOnly:        return 1 << 10;
+  case Attribute::NoInline:        return 1 << 11;
+  case Attribute::AlwaysInline:    return 1 << 12;
+  case Attribute::OptimizeForSize: return 1 << 13;
+  case Attribute::StackProtect:    return 1 << 14;
+  case Attribute::StackProtectReq: return 1 << 15;
+  case Attribute::Alignment:       return 31 << 16;
+  case Attribute::NoCapture:       return 1 << 21;
+  case Attribute::NoRedZone:       return 1 << 22;
+  case Attribute::NoImplicitFloat: return 1 << 23;
+  case Attribute::Naked:           return 1 << 24;
+  case Attribute::InlineHint:      return 1 << 25;
+  case Attribute::StackAlignment:  return 7 << 26;
+  case Attribute::ReturnsTwice:    return 1 << 29;
+  case Attribute::UWTable:         return 1 << 30;
+  case Attribute::NonLazyBind:     return 1U << 31;
+  case Attribute::SanitizeAddress: return 1ULL << 32;
+  case Attribute::MinSize:         return 1ULL << 33;
+  case Attribute::NoDuplicate:     return 1ULL << 34;
+  case Attribute::StackProtectStrong: return 1ULL << 35;
+  case Attribute::SanitizeThread:  return 1ULL << 36;
+  case Attribute::SanitizeMemory:  return 1ULL << 37;
+  case Attribute::NoBuiltin:       return 1ULL << 38;
+  case Attribute::Returned:        return 1ULL << 39;
+  case Attribute::Cold:            return 1ULL << 40;
+  case Attribute::Builtin:         return 1ULL << 41;
+  case Attribute::OptimizeNone:    return 1ULL << 42;
+  case Attribute::InAlloca:        return 1ULL << 43;
+  case Attribute::NonNull:         return 1ULL << 44;
+  case Attribute::JumpTable:       return 1ULL << 45;
+  case Attribute::Convergent:      return 1ULL << 46;
+  case Attribute::SafeStack:       return 1ULL << 47;
+  case Attribute::NoRecurse:       return 1ULL << 48;
+  case Attribute::InaccessibleMemOnly:         return 1ULL << 49;
+  case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50;
+  case Attribute::SwiftSelf:       return 1ULL << 51;
+  case Attribute::SwiftError:      return 1ULL << 52;
+  case Attribute::WriteOnly:       return 1ULL << 53;
+  case Attribute::Dereferenceable:
+    llvm_unreachable("dereferenceable attribute not supported in raw format");
+    break;
+  case Attribute::DereferenceableOrNull:
+    llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
+                     "format");
+    break;
+  case Attribute::ArgMemOnly:
+    llvm_unreachable("argmemonly attribute not supported in raw format");
+    break;
+  case Attribute::AllocSize:
+    llvm_unreachable("allocsize not supported in raw format");
+    break;
+  }
+  llvm_unreachable("Unsupported attribute type");
+}
+
+static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) {
+  if (!Val) return;
+
+  for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
+       I = Attribute::AttrKind(I + 1)) {
+    if (I == Attribute::Dereferenceable ||
+        I == Attribute::DereferenceableOrNull ||
+        I == Attribute::ArgMemOnly ||
+        I == Attribute::AllocSize)
+      continue;
+    if (uint64_t A = (Val & getRawAttributeMask(I))) {
+      if (I == Attribute::Alignment)
+        B.addAlignmentAttr(1ULL << ((A >> 16) - 1));
+      else if (I == Attribute::StackAlignment)
+        B.addStackAlignmentAttr(1ULL << ((A >> 26)-1));
+      else
+        B.addAttribute(I);
+    }
+  }
+}
 
 /// \brief This fills an AttrBuilder object with the LLVM attributes that have
 /// been decoded from the given integer. This function must stay in sync with
@@ -1302,11 +1119,11 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B,
 
   if (Alignment)
     B.addAlignmentAttr(Alignment);
-  B.addRawValue(((EncodedAttrs & (0xfffffULL << 32)) >> 11) |
-                (EncodedAttrs & 0xffff));
+  addRawAttributeValue(B, ((EncodedAttrs & (0xfffffULL << 32)) >> 11) |
+                          (EncodedAttrs & 0xffff));
 }
 
-std::error_code BitcodeReader::parseAttributeBlock() {
+Error BitcodeReader::parseAttributeBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID))
     return error("Invalid record");
 
@@ -1318,7 +1135,7 @@ std::error_code BitcodeReader::parseAttributeBlock() {
   SmallVector<AttributeSet, 8> Attrs;
 
   // Read all the records.
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -1326,7 +1143,7 @@ std::error_code BitcodeReader::parseAttributeBlock() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -1476,26 +1293,24 @@ static Attribute::AttrKind getAttrFromCode(uint64_t Code) {
   }
 }
 
-std::error_code BitcodeReader::parseAlignmentValue(uint64_t Exponent,
-                                                   unsigned &Alignment) {
+Error BitcodeReader::parseAlignmentValue(uint64_t Exponent,
+                                         unsigned &Alignment) {
   // Note: Alignment in bitcode files is incremented by 1, so that zero
   // can be used for default alignment.
   if (Exponent > Value::MaxAlignmentExponent + 1)
     return error("Invalid alignment value");
   Alignment = (1 << static_cast<unsigned>(Exponent)) >> 1;
-  return std::error_code();
+  return Error::success();
 }
 
-std::error_code BitcodeReader::parseAttrKind(uint64_t Code,
-                                             Attribute::AttrKind *Kind) {
+Error BitcodeReader::parseAttrKind(uint64_t Code, Attribute::AttrKind *Kind) {
   *Kind = getAttrFromCode(Code);
   if (*Kind == Attribute::None)
-    return error(BitcodeError::CorruptedBitcode,
-                 "Unknown attribute kind (" + Twine(Code) + ")");
-  return std::error_code();
+    return error("Unknown attribute kind (" + Twine(Code) + ")");
+  return Error::success();
 }
 
-std::error_code BitcodeReader::parseAttributeGroupBlock() {
+Error BitcodeReader::parseAttributeGroupBlock() {
   if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID))
     return error("Invalid record");
 
@@ -1505,7 +1320,7 @@ std::error_code BitcodeReader::parseAttributeGroupBlock() {
   SmallVector<uint64_t, 64> Record;
 
   // Read all the records.
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -1513,7 +1328,7 @@ std::error_code BitcodeReader::parseAttributeGroupBlock() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -1535,14 +1350,14 @@ std::error_code BitcodeReader::parseAttributeGroupBlock() {
       for (unsigned i = 2, e = Record.size(); i != e; ++i) {
         if (Record[i] == 0) {        // Enum attribute
           Attribute::AttrKind Kind;
-          if (std::error_code EC = parseAttrKind(Record[++i], &Kind))
-            return EC;
+          if (Error Err = parseAttrKind(Record[++i], &Kind))
+            return Err;
 
           B.addAttribute(Kind);
         } else if (Record[i] == 1) { // Integer attribute
           Attribute::AttrKind Kind;
-          if (std::error_code EC = parseAttrKind(Record[++i], &Kind))
-            return EC;
+          if (Error Err = parseAttrKind(Record[++i], &Kind))
+            return Err;
           if (Kind == Attribute::Alignment)
             B.addAlignmentAttr(Record[++i]);
           else if (Kind == Attribute::StackAlignment)
@@ -1583,14 +1398,14 @@ std::error_code BitcodeReader::parseAttributeGroupBlock() {
   }
 }
 
-std::error_code BitcodeReader::parseTypeTable() {
+Error BitcodeReader::parseTypeTable() {
   if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW))
     return error("Invalid record");
 
   return parseTypeTableBody();
 }
 
-std::error_code BitcodeReader::parseTypeTableBody() {
+Error BitcodeReader::parseTypeTableBody() {
   if (!TypeList.empty())
     return error("Invalid multiple blocks");
 
@@ -1600,7 +1415,7 @@ std::error_code BitcodeReader::parseTypeTableBody() {
   SmallString<64> TypeName;
 
   // Read all the records for this type table.
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -1610,7 +1425,7 @@ std::error_code BitcodeReader::parseTypeTableBody() {
     case BitstreamEntry::EndBlock:
       if (NumRecords != TypeList.size())
         return error("Malformed block");
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -1826,7 +1641,7 @@ std::error_code BitcodeReader::parseTypeTableBody() {
   }
 }
 
-std::error_code BitcodeReader::parseOperandBundleTags() {
+Error BitcodeReader::parseOperandBundleTags() {
   if (Stream.EnterSubBlock(bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID))
     return error("Invalid record");
 
@@ -1835,7 +1650,7 @@ std::error_code BitcodeReader::parseOperandBundleTags() {
 
   SmallVector<uint64_t, 64> Record;
 
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -1843,7 +1658,7 @@ std::error_code BitcodeReader::parseOperandBundleTags() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -1863,8 +1678,8 @@ std::error_code BitcodeReader::parseOperandBundleTags() {
 }
 
 /// Associate a value with its name from the given index in the provided record.
-ErrorOr<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
-                                            unsigned NameIndex, Triple &TT) {
+Expected<Value *> BitcodeReader::recordValue(SmallVectorImpl<uint64_t> &Record,
+                                             unsigned NameIndex, Triple &TT) {
   SmallString<128> ValueName;
   if (convertToString(Record, NameIndex, ValueName))
     return error("Invalid record");
@@ -1912,7 +1727,7 @@ static uint64_t jumpToValueSymbolTable(uint64_t Offset,
 
 /// Parse the value symbol table at either the current parsing location or
 /// at the given bit offset if provided.
-std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
+Error BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
   uint64_t CurrentBit;
   // Pass in the Offset to distinguish between calling for the module-level
   // VST (where we want to jump to the VST offset) and the function-level
@@ -1943,7 +1758,8 @@ std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
 
   // Read all the records for this value table.
   SmallString<128> ValueName;
-  while (1) {
+
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -1953,7 +1769,7 @@ std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
     case BitstreamEntry::EndBlock:
       if (Offset > 0)
         Stream.JumpToBit(CurrentBit);
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -1965,17 +1781,17 @@ std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
     default:  // Default behavior: unknown type.
       break;
     case bitc::VST_CODE_ENTRY: {  // VST_CODE_ENTRY: [valueid, namechar x N]
-      ErrorOr<Value *> ValOrErr = recordValue(Record, 1, TT);
-      if (std::error_code EC = ValOrErr.getError())
-        return EC;
+      Expected<Value *> ValOrErr = recordValue(Record, 1, TT);
+      if (Error Err = ValOrErr.takeError())
+        return Err;
       ValOrErr.get();
       break;
     }
     case bitc::VST_CODE_FNENTRY: {
       // VST_CODE_FNENTRY: [valueid, offset, namechar x N]
-      ErrorOr<Value *> ValOrErr = recordValue(Record, 2, TT);
-      if (std::error_code EC = ValOrErr.getError())
-        return EC;
+      Expected<Value *> ValOrErr = recordValue(Record, 2, TT);
+      if (Error Err = ValOrErr.takeError())
+        return Err;
       Value *V = ValOrErr.get();
 
       auto *GO = dyn_cast<GlobalObject>(V);
@@ -1988,7 +1804,10 @@ std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
         assert(GO);
       }
 
-      uint64_t FuncWordOffset = Record[1];
+      // Note that we subtract 1 here because the offset is relative to one word
+      // before the start of the identification or module block, which was
+      // historically always the start of the regular bitcode header.
+      uint64_t FuncWordOffset = Record[1] - 1;
       Function *F = dyn_cast<Function>(GO);
       assert(F);
       uint64_t FuncBitOffset = FuncWordOffset * 32;
@@ -2015,748 +1834,6 @@ std::error_code BitcodeReader::parseValueSymbolTable(uint64_t Offset) {
   }
 }
 
-/// Parse a single METADATA_KIND record, inserting result in MDKindMap.
-std::error_code
-BitcodeReader::parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record) {
-  if (Record.size() < 2)
-    return error("Invalid record");
-
-  unsigned Kind = Record[0];
-  SmallString<8> Name(Record.begin() + 1, Record.end());
-
-  unsigned NewKind = TheModule->getMDKindID(Name.str());
-  if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
-    return error("Conflicting METADATA_KIND records");
-  return std::error_code();
-}
-
-static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
-
-std::error_code BitcodeReader::parseMetadataStrings(ArrayRef<uint64_t> Record,
-                                                    StringRef Blob,
-                                                    unsigned &NextMetadataNo) {
-  // All the MDStrings in the block are emitted together in a single
-  // record.  The strings are concatenated and stored in a blob along with
-  // their sizes.
-  if (Record.size() != 2)
-    return error("Invalid record: metadata strings layout");
-
-  unsigned NumStrings = Record[0];
-  unsigned StringsOffset = Record[1];
-  if (!NumStrings)
-    return error("Invalid record: metadata strings with no strings");
-  if (StringsOffset > Blob.size())
-    return error("Invalid record: metadata strings corrupt offset");
-
-  StringRef Lengths = Blob.slice(0, StringsOffset);
-  SimpleBitstreamCursor R(*StreamFile);
-  R.jumpToPointer(Lengths.begin());
-
-  // Ensure that Blob doesn't get invalidated, even if this is reading from
-  // a StreamingMemoryObject with corrupt data.
-  R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
-
-  StringRef Strings = Blob.drop_front(StringsOffset);
-  do {
-    if (R.AtEndOfStream())
-      return error("Invalid record: metadata strings bad length");
-
-    unsigned Size = R.ReadVBR(6);
-    if (Strings.size() < Size)
-      return error("Invalid record: metadata strings truncated chars");
-
-    MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
-                             NextMetadataNo++);
-    Strings = Strings.drop_front(Size);
-  } while (--NumStrings);
-
-  return std::error_code();
-}
-
-namespace {
-class PlaceholderQueue {
-  // Placeholders would thrash around when moved, so store in a std::deque
-  // instead of some sort of vector.
-  std::deque<DistinctMDOperandPlaceholder> PHs;
-
-public:
-  DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
-  void flush(BitcodeReaderMetadataList &MetadataList);
-};
-} // end namespace
-
-DistinctMDOperandPlaceholder &PlaceholderQueue::getPlaceholderOp(unsigned ID) {
-  PHs.emplace_back(ID);
-  return PHs.back();
-}
-
-void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) {
-  while (!PHs.empty()) {
-    PHs.front().replaceUseWith(
-        MetadataList.getMetadataFwdRef(PHs.front().getID()));
-    PHs.pop_front();
-  }
-}
-
-/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
-/// module level metadata.
-std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) {
-  assert((ModuleLevel || DeferredMetadataInfo.empty()) &&
-         "Must read all module-level metadata before function-level");
-
-  IsMetadataMaterialized = true;
-  unsigned NextMetadataNo = MetadataList.size();
-
-  if (!ModuleLevel && MetadataList.hasFwdRefs())
-    return error("Invalid metadata: fwd refs into function blocks");
-
-  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
-    return error("Invalid record");
-
-  std::vector<std::pair<DICompileUnit *, Metadata *>> CUSubprograms;
-  SmallVector<uint64_t, 64> Record;
-
-  PlaceholderQueue Placeholders;
-  bool IsDistinct;
-  auto getMD = [&](unsigned ID) -> Metadata * {
-    if (!IsDistinct)
-      return MetadataList.getMetadataFwdRef(ID);
-    if (auto *MD = MetadataList.getMetadataIfResolved(ID))
-      return MD;
-    return &Placeholders.getPlaceholderOp(ID);
-  };
-  auto getMDOrNull = [&](unsigned ID) -> Metadata * {
-    if (ID)
-      return getMD(ID - 1);
-    return nullptr;
-  };
-  auto getMDOrNullWithoutPlaceholders = [&](unsigned ID) -> Metadata * {
-    if (ID)
-      return MetadataList.getMetadataFwdRef(ID - 1);
-    return nullptr;
-  };
-  auto getMDString = [&](unsigned ID) -> MDString *{
-    // This requires that the ID is not really a forward reference.  In
-    // particular, the MDString must already have been resolved.
-    return cast_or_null<MDString>(getMDOrNull(ID));
-  };
-
-  // Support for old type refs.
-  auto getDITypeRefOrNull = [&](unsigned ID) {
-    return MetadataList.upgradeTypeRef(getMDOrNull(ID));
-  };
-
-#define GET_OR_DISTINCT(CLASS, ARGS)                                           \
-  (IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS)
-
-  // Read all the records.
-  while (1) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::SubBlock: // Handled for us already.
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      // Upgrade old-style CU <-> SP pointers to point from SP to CU.
-      for (auto CU_SP : CUSubprograms)
-        if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
-          for (auto &Op : SPs->operands())
-            if (auto *SP = dyn_cast_or_null<MDNode>(Op))
-              SP->replaceOperandWith(7, CU_SP.first);
-
-      MetadataList.tryToResolveCycles();
-      Placeholders.flush(MetadataList);
-      return std::error_code();
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read a record.
-    Record.clear();
-    StringRef Blob;
-    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
-    IsDistinct = false;
-    switch (Code) {
-    default:  // Default behavior: ignore.
-      break;
-    case bitc::METADATA_NAME: {
-      // Read name of the named metadata.
-      SmallString<8> Name(Record.begin(), Record.end());
-      Record.clear();
-      Code = Stream.ReadCode();
-
-      unsigned NextBitCode = Stream.readRecord(Code, Record);
-      if (NextBitCode != bitc::METADATA_NAMED_NODE)
-        return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
-
-      // Read named metadata elements.
-      unsigned Size = Record.size();
-      NamedMDNode *NMD = TheModule->getOrInsertNamedMetadata(Name);
-      for (unsigned i = 0; i != Size; ++i) {
-        MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
-        if (!MD)
-          return error("Invalid record");
-        NMD->addOperand(MD);
-      }
-      break;
-    }
-    case bitc::METADATA_OLD_FN_NODE: {
-      // FIXME: Remove in 4.0.
-      // This is a LocalAsMetadata record, the only type of function-local
-      // metadata.
-      if (Record.size() % 2 == 1)
-        return error("Invalid record");
-
-      // If this isn't a LocalAsMetadata record, we're dropping it.  This used
-      // to be legal, but there's no upgrade path.
-      auto dropRecord = [&] {
-        MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
-      };
-      if (Record.size() != 2) {
-        dropRecord();
-        break;
-      }
-
-      Type *Ty = getTypeByID(Record[0]);
-      if (Ty->isMetadataTy() || Ty->isVoidTy()) {
-        dropRecord();
-        break;
-      }
-
-      MetadataList.assignValue(
-          LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_OLD_NODE: {
-      // FIXME: Remove in 4.0.
-      if (Record.size() % 2 == 1)
-        return error("Invalid record");
-
-      unsigned Size = Record.size();
-      SmallVector<Metadata *, 8> Elts;
-      for (unsigned i = 0; i != Size; i += 2) {
-        Type *Ty = getTypeByID(Record[i]);
-        if (!Ty)
-          return error("Invalid record");
-        if (Ty->isMetadataTy())
-          Elts.push_back(getMD(Record[i + 1]));
-        else if (!Ty->isVoidTy()) {
-          auto *MD =
-              ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty));
-          assert(isa<ConstantAsMetadata>(MD) &&
-                 "Expected non-function-local metadata");
-          Elts.push_back(MD);
-        } else
-          Elts.push_back(nullptr);
-      }
-      MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_VALUE: {
-      if (Record.size() != 2)
-        return error("Invalid record");
-
-      Type *Ty = getTypeByID(Record[0]);
-      if (Ty->isMetadataTy() || Ty->isVoidTy())
-        return error("Invalid record");
-
-      MetadataList.assignValue(
-          ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_DISTINCT_NODE:
-      IsDistinct = true;
-      // fallthrough...
-    case bitc::METADATA_NODE: {
-      SmallVector<Metadata *, 8> Elts;
-      Elts.reserve(Record.size());
-      for (unsigned ID : Record)
-        Elts.push_back(getMDOrNull(ID));
-      MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
-                                          : MDNode::get(Context, Elts),
-                               NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_LOCATION: {
-      if (Record.size() != 5)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      unsigned Line = Record[1];
-      unsigned Column = Record[2];
-      Metadata *Scope = getMD(Record[3]);
-      Metadata *InlinedAt = getMDOrNull(Record[4]);
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DILocation,
-                          (Context, Line, Column, Scope, InlinedAt)),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_GENERIC_DEBUG: {
-      if (Record.size() < 4)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      unsigned Tag = Record[1];
-      unsigned Version = Record[2];
-
-      if (Tag >= 1u << 16 || Version != 0)
-        return error("Invalid record");
-
-      auto *Header = getMDString(Record[3]);
-      SmallVector<Metadata *, 8> DwarfOps;
-      for (unsigned I = 4, E = Record.size(); I != E; ++I)
-        DwarfOps.push_back(getMDOrNull(Record[I]));
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_SUBRANGE: {
-      if (Record.size() != 3)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DISubrange,
-                          (Context, Record[1], unrotateSign(Record[2]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_ENUMERATOR: {
-      if (Record.size() != 3)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
-                                         getMDString(Record[2]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_BASIC_TYPE: {
-      if (Record.size() != 6)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIBasicType,
-                          (Context, Record[1], getMDString(Record[2]),
-                           Record[3], Record[4], Record[5])),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_DERIVED_TYPE: {
-      if (Record.size() != 12)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(
-              DIDerivedType,
-              (Context, Record[1], getMDString(Record[2]),
-               getMDOrNull(Record[3]), Record[4], getDITypeRefOrNull(Record[5]),
-               getDITypeRefOrNull(Record[6]), Record[7], Record[8], Record[9],
-               Record[10], getDITypeRefOrNull(Record[11]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_COMPOSITE_TYPE: {
-      if (Record.size() != 16)
-        return error("Invalid record");
-
-      // If we have a UUID and this is not a forward declaration, lookup the
-      // mapping.
-      IsDistinct = Record[0] & 0x1;
-      bool IsNotUsedInTypeRef = Record[0] >= 2;
-      unsigned Tag = Record[1];
-      MDString *Name = getMDString(Record[2]);
-      Metadata *File = getMDOrNull(Record[3]);
-      unsigned Line = Record[4];
-      Metadata *Scope = getDITypeRefOrNull(Record[5]);
-      Metadata *BaseType = getDITypeRefOrNull(Record[6]);
-      uint64_t SizeInBits = Record[7];
-      uint64_t AlignInBits = Record[8];
-      uint64_t OffsetInBits = Record[9];
-      unsigned Flags = Record[10];
-      Metadata *Elements = getMDOrNull(Record[11]);
-      unsigned RuntimeLang = Record[12];
-      Metadata *VTableHolder = getDITypeRefOrNull(Record[13]);
-      Metadata *TemplateParams = getMDOrNull(Record[14]);
-      auto *Identifier = getMDString(Record[15]);
-      DICompositeType *CT = nullptr;
-      if (Identifier)
-        CT = DICompositeType::buildODRType(
-            Context, *Identifier, Tag, Name, File, Line, Scope, BaseType,
-            SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
-            VTableHolder, TemplateParams);
-
-      // Create a node if we didn't get a lazy ODR type.
-      if (!CT)
-        CT = GET_OR_DISTINCT(DICompositeType,
-                             (Context, Tag, Name, File, Line, Scope, BaseType,
-                              SizeInBits, AlignInBits, OffsetInBits, Flags,
-                              Elements, RuntimeLang, VTableHolder,
-                              TemplateParams, Identifier));
-      if (!IsNotUsedInTypeRef && Identifier)
-        MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
-
-      MetadataList.assignValue(CT, NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_SUBROUTINE_TYPE: {
-      if (Record.size() < 3 || Record.size() > 4)
-        return error("Invalid record");
-      bool IsOldTypeRefArray = Record[0] < 2;
-      unsigned CC = (Record.size() > 3) ? Record[3] : 0;
-
-      IsDistinct = Record[0] & 0x1;
-      Metadata *Types = getMDOrNull(Record[2]);
-      if (LLVM_UNLIKELY(IsOldTypeRefArray))
-        Types = MetadataList.upgradeTypeRefArray(Types);
-
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DISubroutineType, (Context, Record[1], CC, Types)),
-          NextMetadataNo++);
-      break;
-    }
-
-    case bitc::METADATA_MODULE: {
-      if (Record.size() != 6)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIModule,
-                          (Context, getMDOrNull(Record[1]),
-                           getMDString(Record[2]), getMDString(Record[3]),
-                           getMDString(Record[4]), getMDString(Record[5]))),
-          NextMetadataNo++);
-      break;
-    }
-
-    case bitc::METADATA_FILE: {
-      if (Record.size() != 3)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIFile, (Context, getMDString(Record[1]),
-                                   getMDString(Record[2]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_COMPILE_UNIT: {
-      if (Record.size() < 14 || Record.size() > 16)
-        return error("Invalid record");
-
-      // Ignore Record[0], which indicates whether this compile unit is
-      // distinct.  It's always distinct.
-      IsDistinct = true;
-      auto *CU = DICompileUnit::getDistinct(
-          Context, Record[1], getMDOrNull(Record[2]), getMDString(Record[3]),
-          Record[4], getMDString(Record[5]), Record[6], getMDString(Record[7]),
-          Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]),
-          getMDOrNull(Record[12]), getMDOrNull(Record[13]),
-          Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
-          Record.size() <= 14 ? 0 : Record[14]);
-
-      MetadataList.assignValue(CU, NextMetadataNo++);
-
-      // Move the Upgrade the list of subprograms.
-      if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
-        CUSubprograms.push_back({CU, SPs});
-      break;
-    }
-    case bitc::METADATA_SUBPROGRAM: {
-      if (Record.size() < 18 || Record.size() > 20)
-        return error("Invalid record");
-
-      IsDistinct =
-          (Record[0] & 1) || Record[8]; // All definitions should be distinct.
-      // Version 1 has a Function as Record[15].
-      // Version 2 has removed Record[15].
-      // Version 3 has the Unit as Record[15].
-      // Version 4 added thisAdjustment.
-      bool HasUnit = Record[0] >= 2;
-      if (HasUnit && Record.size() < 19)
-        return error("Invalid record");
-      Metadata *CUorFn = getMDOrNull(Record[15]);
-      unsigned Offset = Record.size() >= 19 ? 1 : 0;
-      bool HasFn = Offset && !HasUnit;
-      bool HasThisAdj = Record.size() >= 20;
-      DISubprogram *SP = GET_OR_DISTINCT(
-          DISubprogram, (Context,
-                         getDITypeRefOrNull(Record[1]),    // scope
-                         getMDString(Record[2]),           // name
-                         getMDString(Record[3]),           // linkageName
-                         getMDOrNull(Record[4]),           // file
-                         Record[5],                        // line
-                         getMDOrNull(Record[6]),           // type
-                         Record[7],                        // isLocal
-                         Record[8],                        // isDefinition
-                         Record[9],                        // scopeLine
-                         getDITypeRefOrNull(Record[10]),   // containingType
-                         Record[11],                       // virtuality
-                         Record[12],                       // virtualIndex
-                         HasThisAdj ? Record[19] : 0,      // thisAdjustment
-                         Record[13],                       // flags
-                         Record[14],                       // isOptimized
-                         HasUnit ? CUorFn : nullptr,       // unit
-                         getMDOrNull(Record[15 + Offset]), // templateParams
-                         getMDOrNull(Record[16 + Offset]), // declaration
-                         getMDOrNull(Record[17 + Offset])  // variables
-                         ));
-      MetadataList.assignValue(SP, NextMetadataNo++);
-
-      // Upgrade sp->function mapping to function->sp mapping.
-      if (HasFn) {
-        if (auto *CMD = dyn_cast_or_null<ConstantAsMetadata>(CUorFn))
-          if (auto *F = dyn_cast<Function>(CMD->getValue())) {
-            if (F->isMaterializable())
-              // Defer until materialized; unmaterialized functions may not have
-              // metadata.
-              FunctionsWithSPs[F] = SP;
-            else if (!F->empty())
-              F->setSubprogram(SP);
-          }
-      }
-      break;
-    }
-    case bitc::METADATA_LEXICAL_BLOCK: {
-      if (Record.size() != 5)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DILexicalBlock,
-                          (Context, getMDOrNull(Record[1]),
-                           getMDOrNull(Record[2]), Record[3], Record[4])),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_LEXICAL_BLOCK_FILE: {
-      if (Record.size() != 4)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DILexicalBlockFile,
-                          (Context, getMDOrNull(Record[1]),
-                           getMDOrNull(Record[2]), Record[3])),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_NAMESPACE: {
-      if (Record.size() != 5)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DINamespace, (Context, getMDOrNull(Record[1]),
-                                        getMDOrNull(Record[2]),
-                                        getMDString(Record[3]), Record[4])),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_MACRO: {
-      if (Record.size() != 5)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIMacro,
-                          (Context, Record[1], Record[2],
-                           getMDString(Record[3]), getMDString(Record[4]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_MACRO_FILE: {
-      if (Record.size() != 5)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIMacroFile,
-                          (Context, Record[1], Record[2],
-                           getMDOrNull(Record[3]), getMDOrNull(Record[4]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_TEMPLATE_TYPE: {
-      if (Record.size() != 3)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
-                                               (Context, getMDString(Record[1]),
-                                                getDITypeRefOrNull(Record[2]))),
-                               NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_TEMPLATE_VALUE: {
-      if (Record.size() != 5)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DITemplateValueParameter,
-                          (Context, Record[1], getMDString(Record[2]),
-                           getDITypeRefOrNull(Record[3]),
-                           getMDOrNull(Record[4]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_GLOBAL_VAR: {
-      if (Record.size() != 11)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIGlobalVariable,
-                          (Context, getMDOrNull(Record[1]),
-                           getMDString(Record[2]), getMDString(Record[3]),
-                           getMDOrNull(Record[4]), Record[5],
-                           getDITypeRefOrNull(Record[6]), Record[7], Record[8],
-                           getMDOrNull(Record[9]), getMDOrNull(Record[10]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_LOCAL_VAR: {
-      // 10th field is for the obseleted 'inlinedAt:' field.
-      if (Record.size() < 8 || Record.size() > 10)
-        return error("Invalid record");
-
-      // 2nd field used to be an artificial tag, either DW_TAG_auto_variable or
-      // DW_TAG_arg_variable.
-      IsDistinct = Record[0];
-      bool HasTag = Record.size() > 8;
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DILocalVariable,
-                          (Context, getMDOrNull(Record[1 + HasTag]),
-                           getMDString(Record[2 + HasTag]),
-                           getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
-                           getDITypeRefOrNull(Record[5 + HasTag]),
-                           Record[6 + HasTag], Record[7 + HasTag])),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_EXPRESSION: {
-      if (Record.size() < 1)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIExpression,
-                          (Context, makeArrayRef(Record).slice(1))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_OBJC_PROPERTY: {
-      if (Record.size() != 8)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIObjCProperty,
-                          (Context, getMDString(Record[1]),
-                           getMDOrNull(Record[2]), Record[3],
-                           getMDString(Record[4]), getMDString(Record[5]),
-                           Record[6], getDITypeRefOrNull(Record[7]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_IMPORTED_ENTITY: {
-      if (Record.size() != 6)
-        return error("Invalid record");
-
-      IsDistinct = Record[0];
-      MetadataList.assignValue(
-          GET_OR_DISTINCT(DIImportedEntity,
-                          (Context, Record[1], getMDOrNull(Record[2]),
-                           getDITypeRefOrNull(Record[3]), Record[4],
-                           getMDString(Record[5]))),
-          NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_STRING_OLD: {
-      std::string String(Record.begin(), Record.end());
-
-      // Test for upgrading !llvm.loop.
-      HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
-
-      Metadata *MD = MDString::get(Context, String);
-      MetadataList.assignValue(MD, NextMetadataNo++);
-      break;
-    }
-    case bitc::METADATA_STRINGS:
-      if (std::error_code EC =
-              parseMetadataStrings(Record, Blob, NextMetadataNo))
-        return EC;
-      break;
-    case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
-      if (Record.size() % 2 == 0)
-        return error("Invalid record");
-      unsigned ValueID = Record[0];
-      if (ValueID >= ValueList.size())
-        return error("Invalid record");
-      if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
-        parseGlobalObjectAttachment(*GO, ArrayRef<uint64_t>(Record).slice(1));
-      break;
-    }
-    case bitc::METADATA_KIND: {
-      // Support older bitcode files that had METADATA_KIND records in a
-      // block with METADATA_BLOCK_ID.
-      if (std::error_code EC = parseMetadataKindRecord(Record))
-        return EC;
-      break;
-    }
-    }
-  }
-#undef GET_OR_DISTINCT
-}
-
-/// Parse the metadata kinds out of the METADATA_KIND_BLOCK.
-std::error_code BitcodeReader::parseMetadataKinds() {
-  if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
-    return error("Invalid record");
-
-  SmallVector<uint64_t, 64> Record;
-
-  // Read all the records.
-  while (1) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::SubBlock: // Handled for us already.
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read a record.
-    Record.clear();
-    unsigned Code = Stream.readRecord(Entry.ID, Record);
-    switch (Code) {
-    default: // Default behavior: ignore.
-      break;
-    case bitc::METADATA_KIND: {
-      if (std::error_code EC = parseMetadataKindRecord(Record))
-        return EC;
-      break;
-    }
-    }
-  }
-}
-
 /// Decode a signed value stored with the sign bit in the LSB for dense VBR
 /// encoding.
 uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
@@ -2769,7 +1846,7 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) {
 }
 
 /// Resolve all of the initializers for global values and aliases that we can.
-std::error_code BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
+Error BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
   std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist;
   std::vector<std::pair<GlobalIndirectSymbol*, unsigned> >
       IndirectSymbolInitWorklist;
@@ -2852,18 +1929,18 @@ std::error_code BitcodeReader::resolveGlobalAndIndirectSymbolInits() {
     FunctionPersonalityFnWorklist.pop_back();
   }
 
-  return std::error_code();
+  return Error::success();
 }
 
 static APInt readWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
   SmallVector<uint64_t, 8> Words(Vals.size());
-  std::transform(Vals.begin(), Vals.end(), Words.begin(),
+  transform(Vals, Words.begin(),
                  BitcodeReader::decodeSignRotatedValue);
 
   return APInt(TypeBits, Words);
 }
 
-std::error_code BitcodeReader::parseConstants() {
+Error BitcodeReader::parseConstants() {
   if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID))
     return error("Invalid record");
 
@@ -2872,7 +1949,8 @@ std::error_code BitcodeReader::parseConstants() {
   // Read all the records for this value table.
   Type *CurTy = Type::getInt32Ty(Context);
   unsigned NextCstNo = ValueList.size();
-  while (1) {
+
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -2886,7 +1964,7 @@ std::error_code BitcodeReader::parseConstants() {
       // Once all the constants have been read, go through and resolve forward
       // references.
       ValueList.resolveConstantForwardRefs();
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -2933,26 +2011,26 @@ std::error_code BitcodeReader::parseConstants() {
       if (Record.empty())
         return error("Invalid record");
       if (CurTy->isHalfTy())
-        V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf,
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf(),
                                              APInt(16, (uint16_t)Record[0])));
       else if (CurTy->isFloatTy())
-        V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle,
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle(),
                                              APInt(32, (uint32_t)Record[0])));
       else if (CurTy->isDoubleTy())
-        V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble,
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble(),
                                              APInt(64, Record[0])));
       else if (CurTy->isX86_FP80Ty()) {
         // Bits are not stored the same way as a normal i80 APInt, compensate.
         uint64_t Rearrange[2];
         Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16);
         Rearrange[1] = Record[0] >> 48;
-        V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended,
+        V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended(),
                                              APInt(80, Rearrange)));
       } else if (CurTy->isFP128Ty())
-        V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad,
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad(),
                                              APInt(128, Record)));
       else if (CurTy->isPPC_FP128Ty())
-        V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble,
+        V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble(),
                                              APInt(128, Record)));
       else
         V = UndefValue::get(CurTy);
@@ -3095,12 +2173,25 @@ std::error_code BitcodeReader::parseConstants() {
       }
       break;
     }
-    case bitc::CST_CODE_CE_INBOUNDS_GEP:
-    case bitc::CST_CODE_CE_GEP: {  // CE_GEP:        [n x operands]
+    case bitc::CST_CODE_CE_INBOUNDS_GEP: // [ty, n x operands]
+    case bitc::CST_CODE_CE_GEP: // [ty, n x operands]
+    case bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX: { // [ty, flags, n x
+                                                     // operands]
       unsigned OpNum = 0;
       Type *PointeeType = nullptr;
-      if (Record.size() % 2)
+      if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX ||
+          Record.size() % 2)
         PointeeType = getTypeByID(Record[OpNum++]);
+
+      bool InBounds = false;
+      Optional<unsigned> InRangeIndex;
+      if (BitCode == bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX) {
+        uint64_t Op = Record[OpNum++];
+        InBounds = Op & 1;
+        InRangeIndex = Op >> 1;
+      } else if (BitCode == bitc::CST_CODE_CE_INBOUNDS_GEP)
+        InBounds = true;
+
       SmallVector<Constant*, 16> Elts;
       while (OpNum != Record.size()) {
         Type *ElTy = getTypeByID(Record[OpNum++]);
@@ -3111,7 +2202,7 @@ std::error_code BitcodeReader::parseConstants() {
 
       if (PointeeType &&
           PointeeType !=
-              cast<SequentialType>(Elts[0]->getType()->getScalarType())
+              cast<PointerType>(Elts[0]->getType()->getScalarType())
                   ->getElementType())
         return error("Explicit gep operator type does not match pointee type "
                      "of pointer operand");
@@ -3121,8 +2212,7 @@ std::error_code BitcodeReader::parseConstants() {
 
       ArrayRef<Constant *> Indices(Elts.begin() + 1, Elts.end());
       V = ConstantExpr::getGetElementPtr(PointeeType, Elts[0], Indices,
-                                         BitCode ==
-                                             bitc::CST_CODE_CE_INBOUNDS_GEP);
+                                         InBounds, InRangeIndex);
       break;
     }
     case bitc::CST_CODE_CE_SELECT: {  // CE_SELECT: [opval#, opval#, opval#]
@@ -3326,13 +2416,14 @@ std::error_code BitcodeReader::parseConstants() {
   }
 }
 
-std::error_code BitcodeReader::parseUseLists() {
+Error BitcodeReader::parseUseLists() {
   if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID))
     return error("Invalid record");
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
-  while (1) {
+
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -3340,7 +2431,7 @@ std::error_code BitcodeReader::parseUseLists() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -3354,7 +2445,7 @@ std::error_code BitcodeReader::parseUseLists() {
       break;
     case bitc::USELIST_CODE_BB:
       IsBB = true;
-      // fallthrough
+      LLVM_FALLTHROUGH;
     case bitc::USELIST_CODE_DEFAULT: {
       unsigned RecordLength = Record.size();
       if (RecordLength < 3)
@@ -3392,7 +2483,7 @@ std::error_code BitcodeReader::parseUseLists() {
 
 /// When we see the block for metadata, remember where it is and then skip it.
 /// This lets us lazily deserialize the metadata.
-std::error_code BitcodeReader::rememberAndSkipMetadata() {
+Error BitcodeReader::rememberAndSkipMetadata() {
   // Save the current stream state.
   uint64_t CurBit = Stream.GetCurrentBitNo();
   DeferredMetadataInfo.push_back(CurBit);
@@ -3400,25 +2491,25 @@ std::error_code BitcodeReader::rememberAndSkipMetadata() {
   // Skip over the block for now.
   if (Stream.SkipBlock())
     return error("Invalid record");
-  return std::error_code();
+  return Error::success();
 }
 
-std::error_code BitcodeReader::materializeMetadata() {
+Error BitcodeReader::materializeMetadata() {
   for (uint64_t BitPos : DeferredMetadataInfo) {
     // Move the bit stream to the saved position.
     Stream.JumpToBit(BitPos);
-    if (std::error_code EC = parseMetadata(true))
-      return EC;
+    if (Error Err = MDLoader->parseModuleMetadata())
+      return Err;
   }
   DeferredMetadataInfo.clear();
-  return std::error_code();
+  return Error::success();
 }
 
 void BitcodeReader::setStripDebugInfo() { StripDebugInfo = true; }
 
 /// When we see the block for a function body, remember where it is and then
 /// skip it.  This lets us lazily deserialize the functions.
-std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
+Error BitcodeReader::rememberAndSkipFunctionBody() {
   // Get the function we are talking about.
   if (FunctionsWithBodies.empty())
     return error("Insufficient function protos");
@@ -3436,12 +2527,13 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBody() {
   // Skip over the function block for now.
   if (Stream.SkipBlock())
     return error("Invalid record");
-  return std::error_code();
+  return Error::success();
 }
 
-std::error_code BitcodeReader::globalCleanup() {
+Error BitcodeReader::globalCleanup() {
   // Patch the initializers for globals and aliases up.
-  resolveGlobalAndIndirectSymbolInits();
+  if (Error Err = resolveGlobalAndIndirectSymbolInits())
+    return Err;
   if (!GlobalInits.empty() || !IndirectSymbolInits.empty())
     return error("Malformed global initializer set");
 
@@ -3466,14 +2558,14 @@ std::error_code BitcodeReader::globalCleanup() {
   std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits);
   std::vector<std::pair<GlobalIndirectSymbol*, unsigned> >().swap(
       IndirectSymbolInits);
-  return std::error_code();
+  return Error::success();
 }
 
 /// Support for lazy parsing of function bodies. This is required if we
 /// either have an old bitcode file without a VST forward declaration record,
 /// or if we have an anonymous function being materialized, since anonymous
 /// functions do not have a name and are therefore not in the VST.
-std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
+Error BitcodeReader::rememberAndSkipFunctionBodies() {
   Stream.JumpToBit(NextUnreadBit);
 
   if (Stream.AtEndOfStream())
@@ -3488,7 +2580,7 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
 
   SmallVector<uint64_t, 64> Record;
 
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advance();
     switch (Entry.Kind) {
     default:
@@ -3498,60 +2590,25 @@ std::error_code BitcodeReader::rememberAndSkipFunctionBodies() {
       default:
         return error("Expect function block");
       case bitc::FUNCTION_BLOCK_ID:
-        if (std::error_code EC = rememberAndSkipFunctionBody())
-          return EC;
+        if (Error Err = rememberAndSkipFunctionBody())
+          return Err;
         NextUnreadBit = Stream.GetCurrentBitNo();
-        return std::error_code();
+        return Error::success();
       }
     }
   }
 }
 
-std::error_code BitcodeReader::parseBitcodeVersion() {
-  if (Stream.EnterSubBlock(bitc::IDENTIFICATION_BLOCK_ID))
-    return error("Invalid record");
-
-  // Read all the records.
-  SmallVector<uint64_t, 64> Record;
-  while (1) {
-    BitstreamEntry Entry = Stream.advance();
-
-    switch (Entry.Kind) {
-    default:
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read a record.
-    Record.clear();
-    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
-    switch (BitCode) {
-    default: // Default behavior: reject
-      return error("Invalid value");
-    case bitc::IDENTIFICATION_CODE_STRING: { // IDENTIFICATION:      [strchr x
-                                             // N]
-      convertToString(Record, 0, ProducerIdentification);
-      break;
-    }
-    case bitc::IDENTIFICATION_CODE_EPOCH: { // EPOCH:      [epoch#]
-      unsigned epoch = (unsigned)Record[0];
-      if (epoch != bitc::BITCODE_CURRENT_EPOCH) {
-        return error(
-          Twine("Incompatible epoch: Bitcode '") + Twine(epoch) +
-          "' vs current: '" + Twine(bitc::BITCODE_CURRENT_EPOCH) + "'");
-      }
-    }
-    }
-  }
+bool BitcodeReaderBase::readBlockInfo() {
+  Optional<BitstreamBlockInfo> NewBlockInfo = Stream.ReadBlockInfoBlock();
+  if (!NewBlockInfo)
+    return true;
+  BlockInfo = std::move(*NewBlockInfo);
+  return false;
 }
 
-std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
-                                           bool ShouldLazyLoadMetadata) {
+Error BitcodeReader::parseModule(uint64_t ResumeBit,
+                                 bool ShouldLazyLoadMetadata) {
   if (ResumeBit)
     Stream.JumpToBit(ResumeBit);
   else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
@@ -3562,7 +2619,7 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
   std::vector<std::string> GCTable;
 
   // Read all the records for this module.
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
@@ -3578,20 +2635,20 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
           return error("Invalid record");
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
-        if (Stream.ReadBlockInfoBlock())
+        if (readBlockInfo())
           return error("Malformed block");
         break;
       case bitc::PARAMATTR_BLOCK_ID:
-        if (std::error_code EC = parseAttributeBlock())
-          return EC;
+        if (Error Err = parseAttributeBlock())
+          return Err;
         break;
       case bitc::PARAMATTR_GROUP_BLOCK_ID:
-        if (std::error_code EC = parseAttributeGroupBlock())
-          return EC;
+        if (Error Err = parseAttributeGroupBlock())
+          return Err;
         break;
       case bitc::TYPE_BLOCK_ID_NEW:
-        if (std::error_code EC = parseTypeTable())
-          return EC;
+        if (Error Err = parseTypeTable())
+          return Err;
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
         if (!SeenValueSymbolTable) {
@@ -3601,8 +2658,8 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
           // normally in the stream), or there were no function blocks to
           // trigger an earlier parsing of the VST.
           assert(VSTOffset == 0 || FunctionsWithBodies.empty());
-          if (std::error_code EC = parseValueSymbolTable())
-            return EC;
+          if (Error Err = parseValueSymbolTable())
+            return Err;
           SeenValueSymbolTable = true;
         } else {
           // We must have had a VST forward declaration record, which caused
@@ -3613,32 +2670,32 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
         }
         break;
       case bitc::CONSTANTS_BLOCK_ID:
-        if (std::error_code EC = parseConstants())
-          return EC;
-        if (std::error_code EC = resolveGlobalAndIndirectSymbolInits())
-          return EC;
+        if (Error Err = parseConstants())
+          return Err;
+        if (Error Err = resolveGlobalAndIndirectSymbolInits())
+          return Err;
         break;
       case bitc::METADATA_BLOCK_ID:
-        if (ShouldLazyLoadMetadata && !IsMetadataMaterialized) {
-          if (std::error_code EC = rememberAndSkipMetadata())
-            return EC;
+        if (ShouldLazyLoadMetadata) {
+          if (Error Err = rememberAndSkipMetadata())
+            return Err;
           break;
         }
         assert(DeferredMetadataInfo.empty() && "Unexpected deferred metadata");
-        if (std::error_code EC = parseMetadata(true))
-          return EC;
+        if (Error Err = MDLoader->parseModuleMetadata())
+          return Err;
         break;
       case bitc::METADATA_KIND_BLOCK_ID:
-        if (std::error_code EC = parseMetadataKinds())
-          return EC;
+        if (Error Err = MDLoader->parseMetadataKinds())
+          return Err;
         break;
       case bitc::FUNCTION_BLOCK_ID:
         // If this is the first function body we've seen, reverse the
         // FunctionsWithBodies list.
         if (!SeenFirstFunctionBody) {
           std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
-          if (std::error_code EC = globalCleanup())
-            return EC;
+          if (Error Err = globalCleanup())
+            return Err;
           SeenFirstFunctionBody = true;
         }
 
@@ -3647,9 +2704,8 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
           // parse the VST now if we haven't already. It is needed to
           // set up the DeferredFunctionInfo vector for lazy reading.
           if (!SeenValueSymbolTable) {
-            if (std::error_code EC =
-                    BitcodeReader::parseValueSymbolTable(VSTOffset))
-              return EC;
+            if (Error Err = BitcodeReader::parseValueSymbolTable(VSTOffset))
+              return Err;
             SeenValueSymbolTable = true;
             // Fall through so that we record the NextUnreadBit below.
             // This is necessary in case we have an anonymous function that
@@ -3672,8 +2728,8 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
         // index in the VST, nor a VST forward declaration record, as
         // well as anonymous functions that do not have VST entries.
         // Build the DeferredFunctionInfo vector on the fly.
-        if (std::error_code EC = rememberAndSkipFunctionBody())
-          return EC;
+        if (Error Err = rememberAndSkipFunctionBody())
+          return Err;
 
         // Suspend parsing when we reach the function bodies. Subsequent
         // materialization calls will resume it when necessary. If the bitcode
@@ -3681,16 +2737,18 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
         // have been seen yet. In this case, just finish the parse now.
         if (SeenValueSymbolTable) {
           NextUnreadBit = Stream.GetCurrentBitNo();
-          return std::error_code();
+          // After the VST has been parsed, we need to make sure intrinsic name
+          // are auto-upgraded.
+          return globalCleanup();
         }
         break;
       case bitc::USELIST_BLOCK_ID:
-        if (std::error_code EC = parseUseLists())
-          return EC;
+        if (Error Err = parseUseLists())
+          return Err;
         break;
       case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID:
-        if (std::error_code EC = parseOperandBundleTags())
-          return EC;
+        if (Error Err = parseOperandBundleTags())
+          return Err;
         break;
       }
       continue;
@@ -3803,8 +2861,8 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
       uint64_t RawLinkage = Record[3];
       GlobalValue::LinkageTypes Linkage = getDecodedLinkage(RawLinkage);
       unsigned Alignment;
-      if (std::error_code EC = parseAlignmentValue(Record[4], Alignment))
-        return EC;
+      if (Error Err = parseAlignmentValue(Record[4], Alignment))
+        return Err;
       std::string Section;
       if (Record[5]) {
         if (Record[5]-1 >= SectionTable.size())
@@ -3889,8 +2947,8 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
       Func->setAttributes(getAttributes(Record[4]));
 
       unsigned Alignment;
-      if (std::error_code EC = parseAlignmentValue(Record[5], Alignment))
-        return EC;
+      if (Error Err = parseAlignmentValue(Record[5], Alignment))
+        return Err;
       Func->setAlignment(Alignment);
       if (Record[6]) {
         if (Record[6]-1 >= SectionTable.size())
@@ -4011,7 +3069,10 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
     case bitc::MODULE_CODE_VSTOFFSET:
       if (Record.size() < 1)
         return error("Invalid record");
-      VSTOffset = Record[0];
+      // Note that we subtract 1 here because the offset is relative to one word
+      // before the start of the identification or module block, which was
+      // historically always the start of the regular bitcode header.
+      VSTOffset = Record[0] - 1;
       break;
     /// MODULE_CODE_SOURCE_FILENAME: [namechar x N]
     case bitc::MODULE_CODE_SOURCE_FILENAME:
@@ -4025,350 +3086,40 @@ std::error_code BitcodeReader::parseModule(uint64_t ResumeBit,
   }
 }
 
-/// Helper to read the header common to all bitcode files.
-static bool hasValidBitcodeHeader(BitstreamCursor &Stream) {
-  // Sniff for the signature.
-  if (Stream.Read(8) != 'B' ||
-      Stream.Read(8) != 'C' ||
-      Stream.Read(4) != 0x0 ||
-      Stream.Read(4) != 0xC ||
-      Stream.Read(4) != 0xE ||
-      Stream.Read(4) != 0xD)
-    return false;
-  return true;
-}
-
-std::error_code
-BitcodeReader::parseBitcodeInto(std::unique_ptr<DataStreamer> Streamer,
-                                Module *M, bool ShouldLazyLoadMetadata) {
+Error BitcodeReader::parseBitcodeInto(Module *M, bool ShouldLazyLoadMetadata,
+                                      bool IsImporting) {
   TheModule = M;
-
-  if (std::error_code EC = initStream(std::move(Streamer)))
-    return EC;
-
-  // Sniff for the signature.
-  if (!hasValidBitcodeHeader(Stream))
-    return error("Invalid bitcode signature");
-
-  // We expect a number of well-defined blocks, though we don't necessarily
-  // need to understand them all.
-  while (1) {
-    if (Stream.AtEndOfStream()) {
-      // We didn't really read a proper Module.
-      return error("Malformed IR file");
-    }
-
-    BitstreamEntry Entry =
-      Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
-
-    if (Entry.Kind != BitstreamEntry::SubBlock)
-      return error("Malformed block");
-
-    if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
-      parseBitcodeVersion();
-      continue;
-    }
-
-    if (Entry.ID == bitc::MODULE_BLOCK_ID)
-      return parseModule(0, ShouldLazyLoadMetadata);
-
-    if (Stream.SkipBlock())
-      return error("Invalid record");
-  }
-}
-
-ErrorOr<std::string> BitcodeReader::parseModuleTriple() {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
-
-  SmallVector<uint64_t, 64> Record;
-
-  std::string Triple;
-  // Read all the records for this module.
-  while (1) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::SubBlock: // Handled for us already.
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return Triple;
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read a record.
-    switch (Stream.readRecord(Entry.ID, Record)) {
-    default: break;  // Default behavior, ignore unknown content.
-    case bitc::MODULE_CODE_TRIPLE: {  // TRIPLE: [strchr x N]
-      std::string S;
-      if (convertToString(Record, 0, S))
-        return error("Invalid record");
-      Triple = S;
-      break;
-    }
-    }
-    Record.clear();
-  }
-  llvm_unreachable("Exit infinite loop");
+  MDLoader = MetadataLoader(Stream, *M, ValueList, IsImporting,
+                            [&](unsigned ID) { return getTypeByID(ID); });
+  return parseModule(0, ShouldLazyLoadMetadata);
 }
 
-ErrorOr<std::string> BitcodeReader::parseTriple() {
-  if (std::error_code EC = initStream(nullptr))
-    return EC;
 
-  // Sniff for the signature.
-  if (!hasValidBitcodeHeader(Stream))
-    return error("Invalid bitcode signature");
-
-  // We expect a number of well-defined blocks, though we don't necessarily
-  // need to understand them all.
-  while (1) {
-    BitstreamEntry Entry = Stream.advance();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
-
-    case BitstreamEntry::SubBlock:
-      if (Entry.ID == bitc::MODULE_BLOCK_ID)
-        return parseModuleTriple();
-
-      // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
-      continue;
-
-    case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
-    }
-  }
-}
-
-ErrorOr<std::string> BitcodeReader::parseIdentificationBlock() {
-  if (std::error_code EC = initStream(nullptr))
-    return EC;
-
-  // Sniff for the signature.
-  if (!hasValidBitcodeHeader(Stream))
-    return error("Invalid bitcode signature");
-
-  // We expect a number of well-defined blocks, though we don't necessarily
-  // need to understand them all.
-  while (1) {
-    BitstreamEntry Entry = Stream.advance();
-    switch (Entry.Kind) {
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
-
-    case BitstreamEntry::SubBlock:
-      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
-        if (std::error_code EC = parseBitcodeVersion())
-          return EC;
-        return ProducerIdentification;
-      }
-      // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
-      continue;
-    case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
-    }
-  }
-}
-
-std::error_code BitcodeReader::parseGlobalObjectAttachment(
-    GlobalObject &GO, ArrayRef<uint64_t> Record) {
-  assert(Record.size() % 2 == 0);
-  for (unsigned I = 0, E = Record.size(); I != E; I += 2) {
-    auto K = MDKindMap.find(Record[I]);
-    if (K == MDKindMap.end())
-      return error("Invalid ID");
-    MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]);
-    if (!MD)
-      return error("Invalid metadata attachment");
-    GO.addMetadata(K->second, *MD);
-  }
-  return std::error_code();
-}
-
-ErrorOr<bool> BitcodeReader::hasObjCCategory() {
-  if (std::error_code EC = initStream(nullptr))
-    return EC;
-
-  // Sniff for the signature.
-  if (!hasValidBitcodeHeader(Stream))
-    return error("Invalid bitcode signature");
-
-  // We expect a number of well-defined blocks, though we don't necessarily
-  // need to understand them all.
-  while (1) {
-    BitstreamEntry Entry = Stream.advance();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
-
-    case BitstreamEntry::SubBlock:
-      if (Entry.ID == bitc::MODULE_BLOCK_ID)
-        return hasObjCCategoryInModule();
-
-      // Ignore other sub-blocks.
-      if (Stream.SkipBlock())
-        return error("Malformed block");
-      continue;
-
-    case BitstreamEntry::Record:
-      Stream.skipRecord(Entry.ID);
-      continue;
-    }
-  }
-}
-
-ErrorOr<bool> BitcodeReader::hasObjCCategoryInModule() {
-  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
-    return error("Invalid record");
-
-  SmallVector<uint64_t, 64> Record;
-  // Read all the records for this module.
-  while (1) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::SubBlock: // Handled for us already.
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return false;
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read a record.
-    switch (Stream.readRecord(Entry.ID, Record)) {
-    default:
-      break; // Default behavior, ignore unknown content.
-    case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N]
-      std::string S;
-      if (convertToString(Record, 0, S))
-        return error("Invalid record");
-      // Check for the i386 and other (x86_64, ARM) conventions
-      if (S.find("__DATA, __objc_catlist") != std::string::npos ||
-          S.find("__OBJC,__category") != std::string::npos)
-        return true;
-      break;
-    }
-    }
-    Record.clear();
-  }
-  llvm_unreachable("Exit infinite loop");
-}
-
-/// Parse metadata attachments.
-std::error_code BitcodeReader::parseMetadataAttachment(Function &F) {
-  if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
-    return error("Invalid record");
-
-  SmallVector<uint64_t, 64> Record;
-  while (1) {
-    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
-
-    switch (Entry.Kind) {
-    case BitstreamEntry::SubBlock: // Handled for us already.
-    case BitstreamEntry::Error:
-      return error("Malformed block");
-    case BitstreamEntry::EndBlock:
-      return std::error_code();
-    case BitstreamEntry::Record:
-      // The interesting case.
-      break;
-    }
-
-    // Read a metadata attachment record.
-    Record.clear();
-    switch (Stream.readRecord(Entry.ID, Record)) {
-    default:  // Default behavior: ignore.
-      break;
-    case bitc::METADATA_ATTACHMENT: {
-      unsigned RecordLength = Record.size();
-      if (Record.empty())
-        return error("Invalid record");
-      if (RecordLength % 2 == 0) {
-        // A function attachment.
-        if (std::error_code EC = parseGlobalObjectAttachment(F, Record))
-          return EC;
-        continue;
-      }
-
-      // An instruction attachment.
-      Instruction *Inst = InstructionList[Record[0]];
-      for (unsigned i = 1; i != RecordLength; i = i+2) {
-        unsigned Kind = Record[i];
-        DenseMap<unsigned, unsigned>::iterator I =
-          MDKindMap.find(Kind);
-        if (I == MDKindMap.end())
-          return error("Invalid ID");
-        Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]);
-        if (isa<LocalAsMetadata>(Node))
-          // Drop the attachment.  This used to be legal, but there's no
-          // upgrade path.
-          break;
-        MDNode *MD = dyn_cast_or_null<MDNode>(Node);
-        if (!MD)
-          return error("Invalid metadata attachment");
-
-        if (HasSeenOldLoopTags && I->second == LLVMContext::MD_loop)
-          MD = upgradeInstructionLoopAttachment(*MD);
-
-        Inst->setMetadata(I->second, MD);
-        if (I->second == LLVMContext::MD_tbaa) {
-          InstsWithTBAATag.push_back(Inst);
-          continue;
-        }
-      }
-      break;
-    }
-    }
-  }
-}
-
-static std::error_code typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
-  LLVMContext &Context = PtrType->getContext();
+Error BitcodeReader::typeCheckLoadStoreInst(Type *ValType, Type *PtrType) {
   if (!isa<PointerType>(PtrType))
-    return error(Context, "Load/Store operand is not a pointer type");
+    return error("Load/Store operand is not a pointer type");
   Type *ElemType = cast<PointerType>(PtrType)->getElementType();
 
   if (ValType && ValType != ElemType)
-    return error(Context, "Explicit load/store type does not match pointee "
-                          "type of pointer operand");
+    return error("Explicit load/store type does not match pointee "
+                 "type of pointer operand");
   if (!PointerType::isLoadableOrStorableType(ElemType))
-    return error(Context, "Cannot load/store from pointer");
-  return std::error_code();
+    return error("Cannot load/store from pointer");
+  return Error::success();
 }
 
 /// Lazily parse the specified function body block.
-std::error_code BitcodeReader::parseFunctionBody(Function *F) {
+Error BitcodeReader::parseFunctionBody(Function *F) {
   if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
     return error("Invalid record");
 
   // Unexpected unresolved metadata when parsing function.
-  if (MetadataList.hasFwdRefs())
+  if (MDLoader->hasFwdRefs())
     return error("Invalid function metadata: incoming forward references");
 
   InstructionList.clear();
   unsigned ModuleValueListSize = ValueList.size();
-  unsigned ModuleMetadataListSize = MetadataList.size();
+  unsigned ModuleMDLoaderSize = MDLoader->size();
 
   // Add all the function arguments to the value table.
   for (Argument &I : F->args())
@@ -4392,7 +3143,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
 
   // Read all the records.
   SmallVector<uint64_t, 64> Record;
-  while (1) {
+
+  while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
@@ -4408,25 +3160,27 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           return error("Invalid record");
         break;
       case bitc::CONSTANTS_BLOCK_ID:
-        if (std::error_code EC = parseConstants())
-          return EC;
+        if (Error Err = parseConstants())
+          return Err;
         NextValueNo = ValueList.size();
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
-        if (std::error_code EC = parseValueSymbolTable())
-          return EC;
+        if (Error Err = parseValueSymbolTable())
+          return Err;
         break;
       case bitc::METADATA_ATTACHMENT_ID:
-        if (std::error_code EC = parseMetadataAttachment(*F))
-          return EC;
+        if (Error Err = MDLoader->parseMetadataAttachment(*F, InstructionList))
+          return Err;
         break;
       case bitc::METADATA_BLOCK_ID:
-        if (std::error_code EC = parseMetadata())
-          return EC;
+        assert(DeferredMetadataInfo.empty() &&
+               "Must read all module-level metadata before function-level");
+        if (Error Err = MDLoader->parseFunctionMetadata())
+          return Err;
         break;
       case bitc::USELIST_BLOCK_ID:
-        if (std::error_code EC = parseUseLists())
-          return EC;
+        if (Error Err = parseUseLists())
+          return Err;
         break;
       }
       continue;
@@ -4499,12 +3253,12 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
 
       MDNode *Scope = nullptr, *IA = nullptr;
       if (ScopeID) {
-        Scope = MetadataList.getMDNodeFwdRefOrNull(ScopeID - 1);
+        Scope = MDLoader->getMDNodeFwdRefOrNull(ScopeID - 1);
         if (!Scope)
           return error("Invalid record");
       }
       if (IAID) {
-        IA = MetadataList.getMDNodeFwdRefOrNull(IAID - 1);
+        IA = MDLoader->getMDNodeFwdRefOrNull(IAID - 1);
         if (!IA)
           return error("Invalid record");
       }
@@ -4598,10 +3352,10 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
 
       if (!Ty)
-        Ty = cast<SequentialType>(BasePtr->getType()->getScalarType())
+        Ty = cast<PointerType>(BasePtr->getType()->getScalarType())
                  ->getElementType();
       else if (Ty !=
-               cast<SequentialType>(BasePtr->getType()->getScalarType())
+               cast<PointerType>(BasePtr->getType()->getScalarType())
                    ->getElementType())
         return error(
             "Explicit gep type does not match pointee type of pointer operand");
@@ -5258,9 +4012,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       Type *OpTy = getTypeByID(Record[1]);
       Value *Size = getFnValueByID(Record[2], OpTy);
       unsigned Align;
-      if (std::error_code EC =
-              parseAlignmentValue(AlignRecord & ~FlagMask, Align)) {
-        return EC;
+      if (Error Err = parseAlignmentValue(AlignRecord & ~FlagMask, Align)) {
+        return Err;
       }
       if (!Ty || !Size)
         return error("Invalid record");
@@ -5281,14 +4034,14 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       Type *Ty = nullptr;
       if (OpNum + 3 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
-      if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType()))
-        return EC;
+      if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
+        return Err;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
 
       unsigned Align;
-      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
-        return EC;
+      if (Error Err = parseAlignmentValue(Record[OpNum], Align))
+        return Err;
       I = new LoadInst(Ty, Op, "", Record[OpNum + 1], Align);
 
       InstructionList.push_back(I);
@@ -5305,8 +4058,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       Type *Ty = nullptr;
       if (OpNum + 5 == Record.size())
         Ty = getTypeByID(Record[OpNum++]);
-      if (std::error_code EC = typeCheckLoadStoreInst(Ty, Op->getType()))
-        return EC;
+      if (Error Err = typeCheckLoadStoreInst(Ty, Op->getType()))
+        return Err;
       if (!Ty)
         Ty = cast<PointerType>(Op->getType())->getElementType();
 
@@ -5320,8 +4073,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
       SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 3]);
 
       unsigned Align;
-      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
-        return EC;
+      if (Error Err = parseAlignmentValue(Record[OpNum], Align))
+        return Err;
       I = new LoadInst(Op, "", Record[OpNum+1], Align, Ordering, SynchScope);
 
       InstructionList.push_back(I);
@@ -5340,12 +4093,11 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           OpNum + 2 != Record.size())
         return error("Invalid record");
 
-      if (std::error_code EC =
-              typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
-        return EC;
+      if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
+        return Err;
       unsigned Align;
-      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
-        return EC;
+      if (Error Err = parseAlignmentValue(Record[OpNum], Align))
+        return Err;
       I = new StoreInst(Val, Ptr, Record[OpNum+1], Align);
       InstructionList.push_back(I);
       break;
@@ -5365,9 +4117,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
           OpNum + 4 != Record.size())
         return error("Invalid record");
 
-      if (std::error_code EC =
-              typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
-        return EC;
+      if (Error Err = typeCheckLoadStoreInst(Val->getType(), Ptr->getType()))
+        return Err;
       AtomicOrdering Ordering = getDecodedOrdering(Record[OpNum + 2]);
       if (Ordering == AtomicOrdering::NotAtomic ||
           Ordering == AtomicOrdering::Acquire ||
@@ -5378,8 +4129,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
 
       unsigned Align;
-      if (std::error_code EC = parseAlignmentValue(Record[OpNum], Align))
-        return EC;
+      if (Error Err = parseAlignmentValue(Record[OpNum], Align))
+        return Err;
       I = new StoreInst(Val, Ptr, Record[OpNum+1], Align, Ordering, SynchScope);
       InstructionList.push_back(I);
       break;
@@ -5405,9 +4156,8 @@ std::error_code BitcodeReader::parseFunctionBody(Function *F) {
         return error("Invalid record");
       SynchronizationScope SynchScope = getDecodedSynchScope(Record[OpNum + 2]);
 
-      if (std::error_code EC =
-              typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
-        return EC;
+      if (Error Err = typeCheckLoadStoreInst(Cmp->getType(), Ptr->getType()))
+        return Err;
       AtomicOrdering FailureOrdering;
       if (Record.size() < 7)
         FailureOrdering =
@@ -5633,18 +4383,18 @@ OutOfRecordLoop:
   }
 
   // Unexpected unresolved metadata about to be dropped.
-  if (MetadataList.hasFwdRefs())
+  if (MDLoader->hasFwdRefs())
     return error("Invalid function metadata: outgoing forward refs");
 
   // Trim the value list down to the size it was before we parsed this function.
   ValueList.shrinkTo(ModuleValueListSize);
-  MetadataList.shrinkTo(ModuleMetadataListSize);
+  MDLoader->shrinkTo(ModuleMDLoaderSize);
   std::vector<BasicBlock*>().swap(FunctionBBs);
-  return std::error_code();
+  return Error::success();
 }
 
 /// Find the function body in the bitcode stream
-std::error_code BitcodeReader::findFunctionInStream(
+Error BitcodeReader::findFunctionInStream(
     Function *F,
     DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) {
   while (DeferredFunctionInfoIterator->second == 0) {
@@ -5655,41 +4405,39 @@ std::error_code BitcodeReader::findFunctionInStream(
     assert(VSTOffset == 0 || !F->hasName());
     // Parse the next body in the stream and set its position in the
     // DeferredFunctionInfo map.
-    if (std::error_code EC = rememberAndSkipFunctionBodies())
-      return EC;
+    if (Error Err = rememberAndSkipFunctionBodies())
+      return Err;
   }
-  return std::error_code();
+  return Error::success();
 }
 
 //===----------------------------------------------------------------------===//
 // GVMaterializer implementation
 //===----------------------------------------------------------------------===//
 
-void BitcodeReader::releaseBuffer() { Buffer.release(); }
-
-std::error_code BitcodeReader::materialize(GlobalValue *GV) {
+Error BitcodeReader::materialize(GlobalValue *GV) {
   Function *F = dyn_cast<Function>(GV);
   // If it's not a function or is already material, ignore the request.
   if (!F || !F->isMaterializable())
-    return std::error_code();
+    return Error::success();
 
   DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
   assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
   // If its position is recorded as 0, its body is somewhere in the stream
   // but we haven't seen it yet.
   if (DFII->second == 0)
-    if (std::error_code EC = findFunctionInStream(F, DFII))
-      return EC;
+    if (Error Err = findFunctionInStream(F, DFII))
+      return Err;
 
   // Materialize metadata before parsing any function bodies.
-  if (std::error_code EC = materializeMetadata())
-    return EC;
+  if (Error Err = materializeMetadata())
+    return Err;
 
   // Move the bit stream to the saved position of the deferred function body.
   Stream.JumpToBit(DFII->second);
 
-  if (std::error_code EC = parseFunctionBody(F))
-    return EC;
+  if (Error Err = parseFunctionBody(F))
+    return Err;
   F->setIsMaterializable(false);
 
   if (StripDebugInfo)
@@ -5714,17 +4462,28 @@ std::error_code BitcodeReader::materialize(GlobalValue *GV) {
       CallSite(*UI++).setCalledFunction(I.second);
 
   // Finish fn->subprogram upgrade for materialized functions.
-  if (DISubprogram *SP = FunctionsWithSPs.lookup(F))
+  if (DISubprogram *SP = MDLoader->lookupSubprogramForFunction(F))
     F->setSubprogram(SP);
 
+  // Check if the TBAA Metadata are valid, otherwise we will need to strip them.
+  if (!MDLoader->isStrippingTBAA()) {
+    for (auto &I : instructions(F)) {
+      MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa);
+      if (!TBAA || TBAAVerifyHelper.visitTBAAMetadata(I, TBAA))
+        continue;
+      MDLoader->setStripTBAA(true);
+      stripTBAA(F->getParent());
+    }
+  }
+
   // Bring in any functions that this function forward-referenced via
   // blockaddresses.
   return materializeForwardReferencedFunctions();
 }
 
-std::error_code BitcodeReader::materializeModule() {
-  if (std::error_code EC = materializeMetadata())
-    return EC;
+Error BitcodeReader::materializeModule() {
+  if (Error Err = materializeMetadata())
+    return Err;
 
   // Promise to materialize all forward references.
   WillMaterializeAllForwardRefs = true;
@@ -5732,26 +4491,23 @@ std::error_code BitcodeReader::materializeModule() {
   // Iterate over the module, deserializing any functions that are still on
   // disk.
   for (Function &F : *TheModule) {
-    if (std::error_code EC = materialize(&F))
-      return EC;
+    if (Error Err = materialize(&F))
+      return Err;
   }
   // At this point, if there are any function bodies, parse the rest of
   // the bits in the module past the last function block we have recorded
   // through either lazy scanning or the VST.
   if (LastFunctionBlockBit || NextUnreadBit)
-    parseModule(LastFunctionBlockBit > NextUnreadBit ? LastFunctionBlockBit
-                                                     : NextUnreadBit);
+    if (Error Err = parseModule(LastFunctionBlockBit > NextUnreadBit
+                                    ? LastFunctionBlockBit
+                                    : NextUnreadBit))
+      return Err;
 
   // Check that all block address forward references got resolved (as we
   // promised above).
   if (!BasicBlockFwdRefs.empty())
     return error("Never resolved function from blockaddress");
 
-  // Upgrading intrinsic calls before TBAA can cause TBAA metadata to be lost,
-  // to prevent this instructions with TBAA tags should be upgraded first.
-  for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++)
-    UpgradeInstWithTBAATag(InstsWithTBAATag[I]);
-
   // Upgrade any intrinsic calls that slipped through (should not happen!) and
   // delete the old functions to clean up. We can't do this unless the entire
   // module is materialized because there could always be another function body
@@ -5776,80 +4532,16 @@ std::error_code BitcodeReader::materializeModule() {
   UpgradeDebugInfo(*TheModule);
 
   UpgradeModuleFlags(*TheModule);
-  return std::error_code();
+  return Error::success();
 }
 
 std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
   return IdentifiedStructTypes;
 }
 
-std::error_code
-BitcodeReader::initStream(std::unique_ptr<DataStreamer> Streamer) {
-  if (Streamer)
-    return initLazyStream(std::move(Streamer));
-  return initStreamFromBuffer();
-}
-
-std::error_code BitcodeReader::initStreamFromBuffer() {
-  const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
-  const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
-
-  if (Buffer->getBufferSize() & 3)
-    return error("Invalid bitcode signature");
-
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, BufEnd))
-    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
-      return error("Invalid bitcode wrapper header");
-
-  StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
-  Stream.init(&*StreamFile);
-
-  return std::error_code();
-}
-
-std::error_code
-BitcodeReader::initLazyStream(std::unique_ptr<DataStreamer> Streamer) {
-  // Check and strip off the bitcode wrapper; BitstreamReader expects never to
-  // see it.
-  auto OwnedBytes =
-      llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
-  StreamingMemoryObject &Bytes = *OwnedBytes;
-  StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
-  Stream.init(&*StreamFile);
-
-  unsigned char buf[16];
-  if (Bytes.readBytes(buf, 16, 0) != 16)
-    return error("Invalid bitcode signature");
-
-  if (!isBitcode(buf, buf + 16))
-    return error("Invalid bitcode signature");
-
-  if (isBitcodeWrapper(buf, buf + 4)) {
-    const unsigned char *bitcodeStart = buf;
-    const unsigned char *bitcodeEnd = buf + 16;
-    SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
-    Bytes.dropLeadingBytes(bitcodeStart - buf);
-    Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart);
-  }
-  return std::error_code();
-}
-
-std::error_code ModuleSummaryIndexBitcodeReader::error(const Twine &Message) {
-  return ::error(DiagnosticHandler,
-                 make_error_code(BitcodeError::CorruptedBitcode), Message);
-}
-
 ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
-    MemoryBuffer *Buffer, DiagnosticHandlerFunction DiagnosticHandler,
-    bool CheckGlobalValSummaryPresenceOnly)
-    : DiagnosticHandler(std::move(DiagnosticHandler)), Buffer(Buffer),
-      CheckGlobalValSummaryPresenceOnly(CheckGlobalValSummaryPresenceOnly) {}
-
-void ModuleSummaryIndexBitcodeReader::freeState() { Buffer = nullptr; }
-
-void ModuleSummaryIndexBitcodeReader::releaseBuffer() { Buffer.release(); }
+    BitstreamCursor Cursor, ModuleSummaryIndex &TheIndex)
+    : BitcodeReaderBase(std::move(Cursor)), TheIndex(TheIndex) {}
 
 std::pair<GlobalValue::GUID, GlobalValue::GUID>
 ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
@@ -5861,7 +4553,7 @@ ModuleSummaryIndexBitcodeReader::getGUIDFromValueId(unsigned ValueId) {
 // Specialized value symbol table parser used when reading module index
 // blocks where we don't actually create global values. The parsed information
 // is saved in the bitcode reader for use when later parsing summaries.
-std::error_code ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
+Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
     uint64_t Offset,
     DenseMap<unsigned, GlobalValue::LinkageTypes> &ValueIdToLinkageMap) {
   assert(Offset > 0 && "Expected non-zero VST offset");
@@ -5874,7 +4566,8 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
 
   // Read all the records for this value table.
   SmallString<128> ValueName;
-  while (1) {
+
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -5884,7 +4577,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
     case BitstreamEntry::EndBlock:
       // Done parsing VST, jump back to wherever we came from.
       Stream.JumpToBit(CurrentBit);
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -5959,7 +4652,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
 // Parse just the blocks needed for building the index out of the module.
 // At the end of this routine the module Index is populated with a map
 // from global value id to GlobalValueSummary objects.
-std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
+Error ModuleSummaryIndexBitcodeReader::parseModule(StringRef ModulePath) {
   if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
     return error("Invalid record");
 
@@ -5968,26 +4661,16 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
   unsigned ValueId = 0;
 
   // Read the index for this module.
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = Stream.advance();
 
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return std::error_code();
+      return Error::success();
 
     case BitstreamEntry::SubBlock:
-      if (CheckGlobalValSummaryPresenceOnly) {
-        if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID) {
-          SeenGlobalValSummary = true;
-          // No need to parse the rest since we found the summary.
-          return std::error_code();
-        }
-        if (Stream.SkipBlock())
-          return error("Invalid record");
-        continue;
-      }
       switch (Entry.ID) {
       default: // Skip unknown content.
         if (Stream.SkipBlock())
@@ -5995,7 +4678,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
         break;
       case bitc::BLOCKINFO_BLOCK_ID:
         // Need to parse these to get abbrev ids (e.g. for VST)
-        if (Stream.ReadBlockInfoBlock())
+        if (readBlockInfo())
           return error("Malformed block");
         break;
       case bitc::VALUE_SYMTAB_BLOCK_ID:
@@ -6008,20 +4691,24 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
           return error("Invalid record");
         break;
       case bitc::GLOBALVAL_SUMMARY_BLOCK_ID:
-        assert(VSTOffset > 0 && "Expected non-zero VST offset");
         assert(!SeenValueSymbolTable &&
                "Already read VST when parsing summary block?");
-        if (std::error_code EC =
-                parseValueSymbolTable(VSTOffset, ValueIdToLinkageMap))
-          return EC;
-        SeenValueSymbolTable = true;
+        // We might not have a VST if there were no values in the
+        // summary. An empty summary block generated when we are
+        // performing ThinLTO compiles so we don't later invoke
+        // the regular LTO process on them.
+        if (VSTOffset > 0) {
+          if (Error Err = parseValueSymbolTable(VSTOffset, ValueIdToLinkageMap))
+            return Err;
+          SeenValueSymbolTable = true;
+        }
         SeenGlobalValSummary = true;
-        if (std::error_code EC = parseEntireSummary())
-          return EC;
+        if (Error Err = parseEntireSummary(ModulePath))
+          return Err;
         break;
       case bitc::MODULE_STRTAB_BLOCK_ID:
-        if (std::error_code EC = parseModuleStringTable())
-          return EC;
+        if (Error Err = parseModuleStringTable())
+          return Err;
         break;
       }
       continue;
@@ -6044,14 +4731,12 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
         case bitc::MODULE_CODE_HASH: {
           if (Record.size() != 5)
             return error("Invalid hash length " + Twine(Record.size()).str());
-          if (!TheIndex)
-            break;
-          if (TheIndex->modulePaths().empty())
-            // Does not have any summary emitted.
-            break;
-          if (TheIndex->modulePaths().size() != 1)
+          if (TheIndex.modulePaths().empty())
+            // We always seed the index with the module.
+            TheIndex.addModulePath(ModulePath, 0);
+          if (TheIndex.modulePaths().size() != 1)
             return error("Don't expect multiple modules defined?");
-          auto &Hash = TheIndex->modulePaths().begin()->second.second;
+          auto &Hash = TheIndex.modulePaths().begin()->second.second;
           int Pos = 0;
           for (auto &Val : Record) {
             assert(!(Val >> 32) && "Unexpected high bits set");
@@ -6063,7 +4748,10 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
         case bitc::MODULE_CODE_VSTOFFSET:
           if (Record.size() < 1)
             return error("Invalid record");
-          VSTOffset = Record[0];
+          // Note that we subtract 1 here because the offset is relative to one
+          // word before the start of the identification or module block, which
+          // was historically always the start of the regular bitcode header.
+          VSTOffset = Record[0] - 1;
           break;
         // GLOBALVAR: [pointer type, isconst, initid,
         //             linkage, alignment, section, visibility, threadlocal,
@@ -6105,9 +4793,37 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModule() {
   }
 }
 
+std::vector<ValueInfo>
+ModuleSummaryIndexBitcodeReader::makeRefList(ArrayRef<uint64_t> Record) {
+  std::vector<ValueInfo> Ret;
+  Ret.reserve(Record.size());
+  for (uint64_t RefValueId : Record)
+    Ret.push_back(getGUIDFromValueId(RefValueId).first);
+  return Ret;
+}
+
+std::vector<FunctionSummary::EdgeTy> ModuleSummaryIndexBitcodeReader::makeCallList(
+    ArrayRef<uint64_t> Record, bool IsOldProfileFormat, bool HasProfile) {
+  std::vector<FunctionSummary::EdgeTy> Ret;
+  Ret.reserve(Record.size());
+  for (unsigned I = 0, E = Record.size(); I != E; ++I) {
+    CalleeInfo::HotnessType Hotness = CalleeInfo::HotnessType::Unknown;
+    GlobalValue::GUID CalleeGUID = getGUIDFromValueId(Record[I]).first;
+    if (IsOldProfileFormat) {
+      I += 1; // Skip old callsitecount field
+      if (HasProfile)
+        I += 1; // Skip old profilecount field
+    } else if (HasProfile)
+      Hotness = static_cast<CalleeInfo::HotnessType>(Record[++I]);
+    Ret.push_back(FunctionSummary::EdgeTy{CalleeGUID, CalleeInfo{Hotness}});
+  }
+  return Ret;
+}
+
 // Eagerly parse the entire summary block. This populates the GlobalValueSummary
 // objects in the index.
-std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
+Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(
+    StringRef ModulePath) {
   if (Stream.EnterSubBlock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID))
     return error("Invalid record");
   SmallVector<uint64_t, 64> Record;
@@ -6121,15 +4837,19 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       return error("Invalid Summary Block: version expected");
   }
   const uint64_t Version = Record[0];
-  if (Version != 1)
-    return error("Invalid summary version " + Twine(Version) + ", 1 expected");
+  const bool IsOldProfileFormat = Version == 1;
+  if (!IsOldProfileFormat && Version != 2)
+    return error("Invalid summary version " + Twine(Version) +
+                 ", 1 or 2 expected");
   Record.clear();
 
   // Keep around the last seen summary to be used when we see an optional
   // "OriginalName" attachement.
   GlobalValueSummary *LastSeenSummary = nullptr;
   bool Combined = false;
-  while (1) {
+  std::vector<GlobalValue::GUID> PendingTypeTests;
+
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -6146,8 +4866,8 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       // to clean them up (especially since that may not run for the first
       // module's index if we merge into that).
       if (!Combined)
-        TheIndex->removeEmptySummaryEntries();
-      return std::error_code();
+        TheIndex.removeEmptySummaryEntries();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -6166,10 +4886,10 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
     default: // Default behavior: ignore.
       break;
     // FS_PERMODULE: [valueid, flags, instcount, numrefs, numrefs x valueid,
-    //                n x (valueid, callsitecount)]
+    //                n x (valueid)]
     // FS_PERMODULE_PROFILE: [valueid, flags, instcount, numrefs,
     //                        numrefs x valueid,
-    //                        n x (valueid, callsitecount, profilecount)]
+    //                        n x (valueid, hotness)]
     case bitc::FS_PERMODULE:
     case bitc::FS_PERMODULE_PROFILE: {
       unsigned ValueID = Record[0];
@@ -6177,37 +4897,29 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       unsigned InstCount = Record[2];
       unsigned NumRefs = Record[3];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
-      std::unique_ptr<FunctionSummary> FS =
-          llvm::make_unique<FunctionSummary>(Flags, InstCount);
       // The module path string ref set in the summary must be owned by the
       // index's module string table. Since we don't have a module path
       // string table section in the per-module index, we create a single
       // module path string table entry with an empty (0) ID to take
       // ownership.
-      FS->setModulePath(
-          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
       static int RefListStartIndex = 4;
       int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
       assert(Record.size() >= RefListStartIndex + NumRefs &&
              "Record size inconsistent with number of references");
-      for (unsigned I = 4, E = CallGraphEdgeStartIndex; I != E; ++I) {
-        unsigned RefValueId = Record[I];
-        GlobalValue::GUID RefGUID = getGUIDFromValueId(RefValueId).first;
-        FS->addRefEdge(RefGUID);
-      }
+      std::vector<ValueInfo> Refs = makeRefList(
+          ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
       bool HasProfile = (BitCode == bitc::FS_PERMODULE_PROFILE);
-      for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E;
-           ++I) {
-        unsigned CalleeValueId = Record[I];
-        unsigned CallsiteCount = Record[++I];
-        uint64_t ProfileCount = HasProfile ? Record[++I] : 0;
-        GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first;
-        FS->addCallGraphEdge(CalleeGUID,
-                             CalleeInfo(CallsiteCount, ProfileCount));
-      }
+      std::vector<FunctionSummary::EdgeTy> Calls = makeCallList(
+          ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
+          IsOldProfileFormat, HasProfile);
+      auto FS = llvm::make_unique<FunctionSummary>(
+          Flags, InstCount, std::move(Refs), std::move(Calls),
+          std::move(PendingTypeTests));
+      PendingTypeTests.clear();
       auto GUID = getGUIDFromValueId(ValueID);
+      FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
       FS->setOriginalName(GUID.second);
-      TheIndex->addGlobalValueSummary(GUID.first, std::move(FS));
+      TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
       break;
     }
     // FS_ALIAS: [valueid, flags, valueid]
@@ -6218,24 +4930,24 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       uint64_t RawFlags = Record[1];
       unsigned AliaseeID = Record[2];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
-      std::unique_ptr<AliasSummary> AS = llvm::make_unique<AliasSummary>(Flags);
+      auto AS =
+          llvm::make_unique<AliasSummary>(Flags, std::vector<ValueInfo>{});
       // The module path string ref set in the summary must be owned by the
       // index's module string table. Since we don't have a module path
       // string table section in the per-module index, we create a single
       // module path string table entry with an empty (0) ID to take
       // ownership.
-      AS->setModulePath(
-          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
+      AS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
 
       GlobalValue::GUID AliaseeGUID = getGUIDFromValueId(AliaseeID).first;
-      auto *AliaseeSummary = TheIndex->getGlobalValueSummary(AliaseeGUID);
+      auto *AliaseeSummary = TheIndex.getGlobalValueSummary(AliaseeGUID);
       if (!AliaseeSummary)
         return error("Alias expects aliasee summary to be parsed");
       AS->setAliasee(AliaseeSummary);
 
       auto GUID = getGUIDFromValueId(ValueID);
       AS->setOriginalName(GUID.second);
-      TheIndex->addGlobalValueSummary(GUID.first, std::move(AS));
+      TheIndex.addGlobalValueSummary(GUID.first, std::move(AS));
       break;
     }
     // FS_PERMODULE_GLOBALVAR_INIT_REFS: [valueid, flags, n x valueid]
@@ -6243,25 +4955,19 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       unsigned ValueID = Record[0];
       uint64_t RawFlags = Record[1];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
-      std::unique_ptr<GlobalVarSummary> FS =
-          llvm::make_unique<GlobalVarSummary>(Flags);
-      FS->setModulePath(
-          TheIndex->addModulePath(Buffer->getBufferIdentifier(), 0)->first());
-      for (unsigned I = 2, E = Record.size(); I != E; ++I) {
-        unsigned RefValueId = Record[I];
-        GlobalValue::GUID RefGUID = getGUIDFromValueId(RefValueId).first;
-        FS->addRefEdge(RefGUID);
-      }
+      std::vector<ValueInfo> Refs =
+          makeRefList(ArrayRef<uint64_t>(Record).slice(2));
+      auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
+      FS->setModulePath(TheIndex.addModulePath(ModulePath, 0)->first());
       auto GUID = getGUIDFromValueId(ValueID);
       FS->setOriginalName(GUID.second);
-      TheIndex->addGlobalValueSummary(GUID.first, std::move(FS));
+      TheIndex.addGlobalValueSummary(GUID.first, std::move(FS));
       break;
     }
     // FS_COMBINED: [valueid, modid, flags, instcount, numrefs,
-    //               numrefs x valueid, n x (valueid, callsitecount)]
+    //               numrefs x valueid, n x (valueid)]
     // FS_COMBINED_PROFILE: [valueid, modid, flags, instcount, numrefs,
-    //                       numrefs x valueid,
-    //                       n x (valueid, callsitecount, profilecount)]
+    //                       numrefs x valueid, n x (valueid, hotness)]
     case bitc::FS_COMBINED:
     case bitc::FS_COMBINED_PROFILE: {
       unsigned ValueID = Record[0];
@@ -6270,32 +4976,24 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       unsigned InstCount = Record[3];
       unsigned NumRefs = Record[4];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
-      std::unique_ptr<FunctionSummary> FS =
-          llvm::make_unique<FunctionSummary>(Flags, InstCount);
-      LastSeenSummary = FS.get();
-      FS->setModulePath(ModuleIdMap[ModuleId]);
       static int RefListStartIndex = 5;
       int CallGraphEdgeStartIndex = RefListStartIndex + NumRefs;
       assert(Record.size() >= RefListStartIndex + NumRefs &&
              "Record size inconsistent with number of references");
-      for (unsigned I = RefListStartIndex, E = CallGraphEdgeStartIndex; I != E;
-           ++I) {
-        unsigned RefValueId = Record[I];
-        GlobalValue::GUID RefGUID = getGUIDFromValueId(RefValueId).first;
-        FS->addRefEdge(RefGUID);
-      }
+      std::vector<ValueInfo> Refs = makeRefList(
+          ArrayRef<uint64_t>(Record).slice(RefListStartIndex, NumRefs));
       bool HasProfile = (BitCode == bitc::FS_COMBINED_PROFILE);
-      for (unsigned I = CallGraphEdgeStartIndex, E = Record.size(); I != E;
-           ++I) {
-        unsigned CalleeValueId = Record[I];
-        unsigned CallsiteCount = Record[++I];
-        uint64_t ProfileCount = HasProfile ? Record[++I] : 0;
-        GlobalValue::GUID CalleeGUID = getGUIDFromValueId(CalleeValueId).first;
-        FS->addCallGraphEdge(CalleeGUID,
-                             CalleeInfo(CallsiteCount, ProfileCount));
-      }
+      std::vector<FunctionSummary::EdgeTy> Edges = makeCallList(
+          ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
+          IsOldProfileFormat, HasProfile);
       GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
-      TheIndex->addGlobalValueSummary(GUID, std::move(FS));
+      auto FS = llvm::make_unique<FunctionSummary>(
+          Flags, InstCount, std::move(Refs), std::move(Edges),
+          std::move(PendingTypeTests));
+      PendingTypeTests.clear();
+      LastSeenSummary = FS.get();
+      FS->setModulePath(ModuleIdMap[ModuleId]);
+      TheIndex.addGlobalValueSummary(GUID, std::move(FS));
       Combined = true;
       break;
     }
@@ -6308,19 +5006,19 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       uint64_t RawFlags = Record[2];
       unsigned AliaseeValueId = Record[3];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
-      std::unique_ptr<AliasSummary> AS = llvm::make_unique<AliasSummary>(Flags);
+      auto AS = llvm::make_unique<AliasSummary>(Flags, std::vector<ValueInfo>{});
       LastSeenSummary = AS.get();
       AS->setModulePath(ModuleIdMap[ModuleId]);
 
       auto AliaseeGUID = getGUIDFromValueId(AliaseeValueId).first;
       auto AliaseeInModule =
-          TheIndex->findSummaryInModule(AliaseeGUID, AS->modulePath());
+          TheIndex.findSummaryInModule(AliaseeGUID, AS->modulePath());
       if (!AliaseeInModule)
         return error("Alias expects aliasee summary to be parsed");
       AS->setAliasee(AliaseeInModule);
 
       GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
-      TheIndex->addGlobalValueSummary(GUID, std::move(AS));
+      TheIndex.addGlobalValueSummary(GUID, std::move(AS));
       Combined = true;
       break;
     }
@@ -6330,17 +5028,13 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       uint64_t ModuleId = Record[1];
       uint64_t RawFlags = Record[2];
       auto Flags = getDecodedGVSummaryFlags(RawFlags, Version);
-      std::unique_ptr<GlobalVarSummary> FS =
-          llvm::make_unique<GlobalVarSummary>(Flags);
+      std::vector<ValueInfo> Refs =
+          makeRefList(ArrayRef<uint64_t>(Record).slice(3));
+      auto FS = llvm::make_unique<GlobalVarSummary>(Flags, std::move(Refs));
       LastSeenSummary = FS.get();
       FS->setModulePath(ModuleIdMap[ModuleId]);
-      for (unsigned I = 3, E = Record.size(); I != E; ++I) {
-        unsigned RefValueId = Record[I];
-        GlobalValue::GUID RefGUID = getGUIDFromValueId(RefValueId).first;
-        FS->addRefEdge(RefGUID);
-      }
       GlobalValue::GUID GUID = getGUIDFromValueId(ValueID).first;
-      TheIndex->addGlobalValueSummary(GUID, std::move(FS));
+      TheIndex.addGlobalValueSummary(GUID, std::move(FS));
       Combined = true;
       break;
     }
@@ -6352,6 +5046,13 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
       LastSeenSummary->setOriginalName(OriginalName);
       // Reset the LastSeenSummary
       LastSeenSummary = nullptr;
+      break;
+    }
+    case bitc::FS_TYPE_TESTS: {
+      assert(PendingTypeTests.empty());
+      PendingTypeTests.insert(PendingTypeTests.end(), Record.begin(),
+                              Record.end());
+      break;
     }
     }
   }
@@ -6360,7 +5061,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseEntireSummary() {
 
 // Parse the  module string table block into the Index.
 // This populates the ModulePathStringTable map in the index.
-std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
+Error ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
   if (Stream.EnterSubBlock(bitc::MODULE_STRTAB_BLOCK_ID))
     return error("Invalid record");
 
@@ -6368,7 +5069,8 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
 
   SmallString<128> ModulePath;
   ModulePathStringTableTy::iterator LastSeenModulePath;
-  while (1) {
+
+  while (true) {
     BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
 
     switch (Entry.Kind) {
@@ -6376,7 +5078,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
     case BitstreamEntry::Error:
       return error("Malformed block");
     case BitstreamEntry::EndBlock:
-      return std::error_code();
+      return Error::success();
     case BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -6393,7 +5095,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
       if (convertToString(Record, 1, ModulePath))
         return error("Invalid record");
 
-      LastSeenModulePath = TheIndex->addModulePath(ModulePath, ModuleId);
+      LastSeenModulePath = TheIndex.addModulePath(ModulePath, ModuleId);
       ModuleIdMap[ModuleId] = LastSeenModulePath->first();
 
       ModulePath.clear();
@@ -6403,7 +5105,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
     case bitc::MST_CODE_HASH: {
       if (Record.size() != 5)
         return error("Invalid hash length " + Twine(Record.size()).str());
-      if (LastSeenModulePath == TheIndex->modulePaths().end())
+      if (LastSeenModulePath == TheIndex.modulePaths().end())
         return error("Invalid hash that does not follow a module path");
       int Pos = 0;
       for (auto &Val : Record) {
@@ -6411,7 +5113,7 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
         LastSeenModulePath->second.second[Pos++] = Val;
       }
       // Reset LastSeenModulePath to avoid overriding the hash unexpectedly.
-      LastSeenModulePath = TheIndex->modulePaths().end();
+      LastSeenModulePath = TheIndex.modulePaths().end();
       break;
     }
     }
@@ -6419,114 +5121,25 @@ std::error_code ModuleSummaryIndexBitcodeReader::parseModuleStringTable() {
   llvm_unreachable("Exit infinite loop");
 }
 
-// Parse the function info index from the bitcode streamer into the given index.
-std::error_code ModuleSummaryIndexBitcodeReader::parseSummaryIndexInto(
-    std::unique_ptr<DataStreamer> Streamer, ModuleSummaryIndex *I) {
-  TheIndex = I;
-
-  if (std::error_code EC = initStream(std::move(Streamer)))
-    return EC;
-
-  // Sniff for the signature.
-  if (!hasValidBitcodeHeader(Stream))
-    return error("Invalid bitcode signature");
-
-  // We expect a number of well-defined blocks, though we don't necessarily
-  // need to understand them all.
-  while (1) {
-    if (Stream.AtEndOfStream()) {
-      // We didn't really read a proper Module block.
-      return error("Malformed block");
-    }
-
-    BitstreamEntry Entry =
-        Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs);
-
-    if (Entry.Kind != BitstreamEntry::SubBlock)
-      return error("Malformed block");
-
-    // If we see a MODULE_BLOCK, parse it to find the blocks needed for
-    // building the function summary index.
-    if (Entry.ID == bitc::MODULE_BLOCK_ID)
-      return parseModule();
-
-    if (Stream.SkipBlock())
-      return error("Invalid record");
-  }
-}
-
-std::error_code ModuleSummaryIndexBitcodeReader::initStream(
-    std::unique_ptr<DataStreamer> Streamer) {
-  if (Streamer)
-    return initLazyStream(std::move(Streamer));
-  return initStreamFromBuffer();
-}
-
-std::error_code ModuleSummaryIndexBitcodeReader::initStreamFromBuffer() {
-  const unsigned char *BufPtr = (const unsigned char *)Buffer->getBufferStart();
-  const unsigned char *BufEnd = BufPtr + Buffer->getBufferSize();
-
-  if (Buffer->getBufferSize() & 3)
-    return error("Invalid bitcode signature");
-
-  // If we have a wrapper header, parse it and ignore the non-bc file contents.
-  // The magic number is 0x0B17C0DE stored in little endian.
-  if (isBitcodeWrapper(BufPtr, BufEnd))
-    if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true))
-      return error("Invalid bitcode wrapper header");
-
-  StreamFile.reset(new BitstreamReader(BufPtr, BufEnd));
-  Stream.init(&*StreamFile);
-
-  return std::error_code();
-}
-
-std::error_code ModuleSummaryIndexBitcodeReader::initLazyStream(
-    std::unique_ptr<DataStreamer> Streamer) {
-  // Check and strip off the bitcode wrapper; BitstreamReader expects never to
-  // see it.
-  auto OwnedBytes =
-      llvm::make_unique<StreamingMemoryObject>(std::move(Streamer));
-  StreamingMemoryObject &Bytes = *OwnedBytes;
-  StreamFile = llvm::make_unique<BitstreamReader>(std::move(OwnedBytes));
-  Stream.init(&*StreamFile);
-
-  unsigned char buf[16];
-  if (Bytes.readBytes(buf, 16, 0) != 16)
-    return error("Invalid bitcode signature");
-
-  if (!isBitcode(buf, buf + 16))
-    return error("Invalid bitcode signature");
-
-  if (isBitcodeWrapper(buf, buf + 4)) {
-    const unsigned char *bitcodeStart = buf;
-    const unsigned char *bitcodeEnd = buf + 16;
-    SkipBitcodeWrapperHeader(bitcodeStart, bitcodeEnd, false);
-    Bytes.dropLeadingBytes(bitcodeStart - buf);
-    Bytes.setKnownObjectSize(bitcodeEnd - bitcodeStart);
-  }
-  return std::error_code();
-}
-
 namespace {
+
 // FIXME: This class is only here to support the transition to llvm::Error. It
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class BitcodeErrorCategoryType : public std::error_category {
-  const char *name() const LLVM_NOEXCEPT override {
+  const char *name() const noexcept override {
     return "llvm.bitcode";
   }
   std::string message(int IE) const override {
     BitcodeError E = static_cast<BitcodeError>(IE);
     switch (E) {
-    case BitcodeError::InvalidBitcodeSignature:
-      return "Invalid bitcode signature";
     case BitcodeError::CorruptedBitcode:
       return "Corrupted bitcode";
     }
     llvm_unreachable("Unknown error type!");
   }
 };
+
 } // end anonymous namespace
 
 static ManagedStatic<BitcodeErrorCategoryType> ErrorCategory;
@@ -6539,151 +5152,251 @@ const std::error_category &llvm::BitcodeErrorCategory() {
 // External interface
 //===----------------------------------------------------------------------===//
 
-static ErrorOr<std::unique_ptr<Module>>
-getBitcodeModuleImpl(std::unique_ptr<DataStreamer> Streamer, StringRef Name,
-                     BitcodeReader *R, LLVMContext &Context,
-                     bool MaterializeAll, bool ShouldLazyLoadMetadata) {
-  std::unique_ptr<Module> M = make_unique<Module>(Name, Context);
-  M->setMaterializer(R);
+Expected<std::vector<BitcodeModule>>
+llvm::getBitcodeModuleList(MemoryBufferRef Buffer) {
+  Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
+  if (!StreamOrErr)
+    return StreamOrErr.takeError();
+  BitstreamCursor &Stream = *StreamOrErr;
 
-  auto cleanupOnError = [&](std::error_code EC) {
-    R->releaseBuffer(); // Never take ownership on error.
-    return EC;
-  };
+  std::vector<BitcodeModule> Modules;
+  while (true) {
+    uint64_t BCBegin = Stream.getCurrentByteNo();
 
-  // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
-  if (std::error_code EC = R->parseBitcodeInto(std::move(Streamer), M.get(),
-                                               ShouldLazyLoadMetadata))
-    return cleanupOnError(EC);
+    // We may be consuming bitcode from a client that leaves garbage at the end
+    // of the bitcode stream (e.g. Apple's ar tool). If we are close enough to
+    // the end that there cannot possibly be another module, stop looking.
+    if (BCBegin + 8 >= Stream.getBitcodeBytes().size())
+      return Modules;
 
-  if (MaterializeAll) {
-    // Read in the entire module, and destroy the BitcodeReader.
-    if (std::error_code EC = M->materializeAll())
-      return cleanupOnError(EC);
-  } else {
-    // Resolve forward references from blockaddresses.
-    if (std::error_code EC = R->materializeForwardReferencedFunctions())
-      return cleanupOnError(EC);
+    BitstreamEntry Entry = Stream.advance();
+    switch (Entry.Kind) {
+    case BitstreamEntry::EndBlock:
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+
+    case BitstreamEntry::SubBlock: {
+      uint64_t IdentificationBit = -1ull;
+      if (Entry.ID == bitc::IDENTIFICATION_BLOCK_ID) {
+        IdentificationBit = Stream.GetCurrentBitNo() - BCBegin * 8;
+        if (Stream.SkipBlock())
+          return error("Malformed block");
+
+        Entry = Stream.advance();
+        if (Entry.Kind != BitstreamEntry::SubBlock ||
+            Entry.ID != bitc::MODULE_BLOCK_ID)
+          return error("Malformed block");
+      }
+
+      if (Entry.ID == bitc::MODULE_BLOCK_ID) {
+        uint64_t ModuleBit = Stream.GetCurrentBitNo() - BCBegin * 8;
+        if (Stream.SkipBlock())
+          return error("Malformed block");
+
+        Modules.push_back({Stream.getBitcodeBytes().slice(
+                               BCBegin, Stream.getCurrentByteNo() - BCBegin),
+                           Buffer.getBufferIdentifier(), IdentificationBit,
+                           ModuleBit});
+        continue;
+      }
+
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
+    }
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
   }
-  return std::move(M);
 }
 
 /// \brief Get a lazy one-at-time loading module from bitcode.
 ///
 /// This isn't always used in a lazy context.  In particular, it's also used by
-/// \a parseBitcodeFile().  If this is truly lazy, then we need to eagerly pull
+/// \a parseModule().  If this is truly lazy, then we need to eagerly pull
 /// in forward-referenced functions from block address references.
 ///
 /// \param[in] MaterializeAll Set to \c true if we should materialize
 /// everything.
-static ErrorOr<std::unique_ptr<Module>>
-getLazyBitcodeModuleImpl(std::unique_ptr<MemoryBuffer> &&Buffer,
-                         LLVMContext &Context, bool MaterializeAll,
-                         bool ShouldLazyLoadMetadata = false) {
-  BitcodeReader *R = new BitcodeReader(Buffer.get(), Context);
-
-  ErrorOr<std::unique_ptr<Module>> Ret =
-      getBitcodeModuleImpl(nullptr, Buffer->getBufferIdentifier(), R, Context,
-                           MaterializeAll, ShouldLazyLoadMetadata);
-  if (!Ret)
-    return Ret;
-
-  Buffer.release(); // The BitcodeReader owns it now.
-  return Ret;
+Expected<std::unique_ptr<Module>>
+BitcodeModule::getModuleImpl(LLVMContext &Context, bool MaterializeAll,
+                             bool ShouldLazyLoadMetadata, bool IsImporting) {
+  BitstreamCursor Stream(Buffer);
+
+  std::string ProducerIdentification;
+  if (IdentificationBit != -1ull) {
+    Stream.JumpToBit(IdentificationBit);
+    Expected<std::string> ProducerIdentificationOrErr =
+        readIdentificationBlock(Stream);
+    if (!ProducerIdentificationOrErr)
+      return ProducerIdentificationOrErr.takeError();
+
+    ProducerIdentification = *ProducerIdentificationOrErr;
+  }
+
+  Stream.JumpToBit(ModuleBit);
+  auto *R =
+      new BitcodeReader(std::move(Stream), ProducerIdentification, Context);
+
+  std::unique_ptr<Module> M =
+      llvm::make_unique<Module>(ModuleIdentifier, Context);
+  M->setMaterializer(R);
+
+  // Delay parsing Metadata if ShouldLazyLoadMetadata is true.
+  if (Error Err =
+          R->parseBitcodeInto(M.get(), ShouldLazyLoadMetadata, IsImporting))
+    return std::move(Err);
+
+  if (MaterializeAll) {
+    // Read in the entire module, and destroy the BitcodeReader.
+    if (Error Err = M->materializeAll())
+      return std::move(Err);
+  } else {
+    // Resolve forward references from blockaddresses.
+    if (Error Err = R->materializeForwardReferencedFunctions())
+      return std::move(Err);
+  }
+  return std::move(M);
 }
 
-ErrorOr<std::unique_ptr<Module>>
-llvm::getLazyBitcodeModule(std::unique_ptr<MemoryBuffer> &&Buffer,
-                           LLVMContext &Context, bool ShouldLazyLoadMetadata) {
-  return getLazyBitcodeModuleImpl(std::move(Buffer), Context, false,
-                                  ShouldLazyLoadMetadata);
+Expected<std::unique_ptr<Module>>
+BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
+                             bool IsImporting) {
+  return getModuleImpl(Context, false, ShouldLazyLoadMetadata, IsImporting);
 }
 
-ErrorOr<std::unique_ptr<Module>>
-llvm::getStreamedBitcodeModule(StringRef Name,
-                               std::unique_ptr<DataStreamer> Streamer,
-                               LLVMContext &Context) {
-  std::unique_ptr<Module> M = make_unique<Module>(Name, Context);
-  BitcodeReader *R = new BitcodeReader(Context);
+// Parse the specified bitcode buffer, returning the function info index.
+Expected<std::unique_ptr<ModuleSummaryIndex>> BitcodeModule::getSummary() {
+  BitstreamCursor Stream(Buffer);
+  Stream.JumpToBit(ModuleBit);
 
-  return getBitcodeModuleImpl(std::move(Streamer), Name, R, Context, false,
-                              false);
+  auto Index = llvm::make_unique<ModuleSummaryIndex>();
+  ModuleSummaryIndexBitcodeReader R(std::move(Stream), *Index);
+
+  if (Error Err = R.parseModule(ModuleIdentifier))
+    return std::move(Err);
+
+  return std::move(Index);
 }
 
-ErrorOr<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
-                                                        LLVMContext &Context) {
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  return getLazyBitcodeModuleImpl(std::move(Buf), Context, true);
-  // TODO: Restore the use-lists to the in-memory state when the bitcode was
-  // written.  We must defer until the Module has been fully materialized.
+// Check if the given bitcode buffer contains a global value summary block.
+Expected<bool> BitcodeModule::hasSummary() {
+  BitstreamCursor Stream(Buffer);
+  Stream.JumpToBit(ModuleBit);
+
+  if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID))
+    return error("Invalid record");
+
+  while (true) {
+    BitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return false;
+
+    case BitstreamEntry::SubBlock:
+      if (Entry.ID == bitc::GLOBALVAL_SUMMARY_BLOCK_ID)
+        return true;
+
+      // Ignore other sub-blocks.
+      if (Stream.SkipBlock())
+        return error("Malformed block");
+      continue;
+
+    case BitstreamEntry::Record:
+      Stream.skipRecord(Entry.ID);
+      continue;
+    }
+  }
 }
 
-std::string llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer,
-                                         LLVMContext &Context) {
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context);
-  ErrorOr<std::string> Triple = R->parseTriple();
-  if (Triple.getError())
-    return "";
-  return Triple.get();
+static Expected<BitcodeModule> getSingleModule(MemoryBufferRef Buffer) {
+  Expected<std::vector<BitcodeModule>> MsOrErr = getBitcodeModuleList(Buffer);
+  if (!MsOrErr)
+    return MsOrErr.takeError();
+
+  if (MsOrErr->size() != 1)
+    return error("Expected a single module");
+
+  return (*MsOrErr)[0];
 }
 
-bool llvm::isBitcodeContainingObjCCategory(MemoryBufferRef Buffer,
-                                           LLVMContext &Context) {
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  auto R = llvm::make_unique<BitcodeReader>(Buf.release(), Context);
-  ErrorOr<bool> hasObjCCategory = R->hasObjCCategory();
-  if (hasObjCCategory.getError())
-    return false;
-  return hasObjCCategory.get();
+Expected<std::unique_ptr<Module>>
+llvm::getLazyBitcodeModule(MemoryBufferRef Buffer, LLVMContext &Context,
+                           bool ShouldLazyLoadMetadata, bool IsImporting) {
+  Expected<BitcodeModule> BM = getSingleModule(Buffer);
+  if (!BM)
+    return BM.takeError();
+
+  return BM->getLazyModule(Context, ShouldLazyLoadMetadata, IsImporting);
 }
 
-std::string llvm::getBitcodeProducerString(MemoryBufferRef Buffer,
-                                           LLVMContext &Context) {
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  BitcodeReader R(Buf.release(), Context);
-  ErrorOr<std::string> ProducerString = R.parseIdentificationBlock();
-  if (ProducerString.getError())
-    return "";
-  return ProducerString.get();
+Expected<std::unique_ptr<Module>> llvm::getOwningLazyBitcodeModule(
+    std::unique_ptr<MemoryBuffer> &&Buffer, LLVMContext &Context,
+    bool ShouldLazyLoadMetadata, bool IsImporting) {
+  auto MOrErr = getLazyBitcodeModule(*Buffer, Context, ShouldLazyLoadMetadata,
+                                     IsImporting);
+  if (MOrErr)
+    (*MOrErr)->setOwnedMemoryBuffer(std::move(Buffer));
+  return MOrErr;
 }
 
-// Parse the specified bitcode buffer, returning the function info index.
-ErrorOr<std::unique_ptr<ModuleSummaryIndex>> llvm::getModuleSummaryIndex(
-    MemoryBufferRef Buffer,
-    const DiagnosticHandlerFunction &DiagnosticHandler) {
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  ModuleSummaryIndexBitcodeReader R(Buf.get(), DiagnosticHandler);
+Expected<std::unique_ptr<Module>>
+BitcodeModule::parseModule(LLVMContext &Context) {
+  return getModuleImpl(Context, true, false, false);
+  // TODO: Restore the use-lists to the in-memory state when the bitcode was
+  // written.  We must defer until the Module has been fully materialized.
+}
 
-  auto Index = llvm::make_unique<ModuleSummaryIndex>();
+Expected<std::unique_ptr<Module>> llvm::parseBitcodeFile(MemoryBufferRef Buffer,
+                                                         LLVMContext &Context) {
+  Expected<BitcodeModule> BM = getSingleModule(Buffer);
+  if (!BM)
+    return BM.takeError();
 
-  auto cleanupOnError = [&](std::error_code EC) {
-    R.releaseBuffer(); // Never take ownership on error.
-    return EC;
-  };
+  return BM->parseModule(Context);
+}
 
-  if (std::error_code EC = R.parseSummaryIndexInto(nullptr, Index.get()))
-    return cleanupOnError(EC);
+Expected<std::string> llvm::getBitcodeTargetTriple(MemoryBufferRef Buffer) {
+  Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
+  if (!StreamOrErr)
+    return StreamOrErr.takeError();
 
-  Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now.
-  return std::move(Index);
+  return readTriple(*StreamOrErr);
 }
 
-// Check if the given bitcode buffer contains a global value summary block.
-bool llvm::hasGlobalValueSummary(
-    MemoryBufferRef Buffer,
-    const DiagnosticHandlerFunction &DiagnosticHandler) {
-  std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Buffer, false);
-  ModuleSummaryIndexBitcodeReader R(Buf.get(), DiagnosticHandler, true);
-
-  auto cleanupOnError = [&](std::error_code EC) {
-    R.releaseBuffer(); // Never take ownership on error.
-    return false;
-  };
+Expected<bool> llvm::isBitcodeContainingObjCCategory(MemoryBufferRef Buffer) {
+  Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
+  if (!StreamOrErr)
+    return StreamOrErr.takeError();
+
+  return hasObjCCategory(*StreamOrErr);
+}
+
+Expected<std::string> llvm::getBitcodeProducerString(MemoryBufferRef Buffer) {
+  Expected<BitstreamCursor> StreamOrErr = initStream(Buffer);
+  if (!StreamOrErr)
+    return StreamOrErr.takeError();
+
+  return readIdentificationCode(*StreamOrErr);
+}
+
+Expected<std::unique_ptr<ModuleSummaryIndex>>
+llvm::getModuleSummaryIndex(MemoryBufferRef Buffer) {
+  Expected<BitcodeModule> BM = getSingleModule(Buffer);
+  if (!BM)
+    return BM.takeError();
+
+  return BM->getSummary();
+}
 
-  if (std::error_code EC = R.parseSummaryIndexInto(nullptr, nullptr))
-    return cleanupOnError(EC);
+Expected<bool> llvm::hasGlobalValueSummary(MemoryBufferRef Buffer) {
+  Expected<BitcodeModule> BM = getSingleModule(Buffer);
+  if (!BM)
+    return BM.takeError();
 
-  Buf.release(); // The ModuleSummaryIndexBitcodeReader owns it now.
-  return R.foundGlobalValSummary();
+  return BM->hasSummary();
 }
diff --git a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
index 60360d2ef78f..43c9aebd79ef 100644
--- a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp
@@ -8,6 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/ADT/StringRef.h"
+#include <cassert>
+#include <string>
 
 using namespace llvm;
 
@@ -15,14 +18,6 @@ using namespace llvm;
 //  BitstreamCursor implementation
 //===----------------------------------------------------------------------===//
 
-void BitstreamCursor::freeState() {
-  // Free all the Abbrevs.
-  CurAbbrevs.clear();
-
-  // Free all the Abbrevs in the block scope.
-  BlockScope.clear();
-}
-
 /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
 /// the block, and return true if the block has an error.
 bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
@@ -31,10 +26,12 @@ bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
 
   // Add the abbrevs specific to this block to the CurAbbrevs list.
-  if (const BitstreamReader::BlockInfo *Info =
-          getBitStreamReader()->getBlockInfo(BlockID)) {
-    CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
-                      Info->Abbrevs.end());
+  if (BlockInfo) {
+    if (const BitstreamBlockInfo::BlockInfo *Info =
+            BlockInfo->getBlockInfo(BlockID)) {
+      CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
+                        Info->Abbrevs.end());
+    }
   }
 
   // Get the codesize of this block.
@@ -95,8 +92,6 @@ static void skipAbbreviatedField(BitstreamCursor &Cursor,
   }
 }
 
-
-
 /// skipRecord - Read the current record and discard it.
 void BitstreamCursor::skipRecord(unsigned AbbrevID) {
   // Skip unabbreviated records by reading past their entries.
@@ -136,18 +131,16 @@ void BitstreamCursor::skipRecord(unsigned AbbrevID) {
       default:
         report_fatal_error("Array element type can't be an Array or a Blob");
       case BitCodeAbbrevOp::Fixed:
-        assert((unsigned)Op.getEncodingData() <= MaxChunkSize);
-        for (; NumElts; --NumElts)
-          Read((unsigned)EltEnc.getEncodingData());
+        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
+        JumpToBit(GetCurrentBitNo() + NumElts * EltEnc.getEncodingData());
         break;
       case BitCodeAbbrevOp::VBR:
-        assert((unsigned)Op.getEncodingData() <= MaxChunkSize);
+        assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
         for (; NumElts; --NumElts)
           ReadVBR64((unsigned)EltEnc.getEncodingData());
         break;
       case BitCodeAbbrevOp::Char6:
-        for (; NumElts; --NumElts)
-          Read(6);
+        JumpToBit(GetCurrentBitNo() + NumElts * 6);
         break;
       }
       continue;
@@ -279,7 +272,6 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID,
   return Code;
 }
 
-
 void BitstreamCursor::ReadAbbrevRecord() {
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   unsigned NumOpInfo = ReadVBR(5);
@@ -318,26 +310,25 @@ void BitstreamCursor::ReadAbbrevRecord() {
   CurAbbrevs.push_back(Abbv);
 }
 
-bool BitstreamCursor::ReadBlockInfoBlock() {
-  // If this is the second stream to get to the block info block, skip it.
-  if (getBitStreamReader()->hasBlockInfoRecords())
-    return SkipBlock();
+Optional<BitstreamBlockInfo>
+BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
+  if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return None;
 
-  if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true;
+  BitstreamBlockInfo NewBlockInfo;
 
   SmallVector<uint64_t, 64> Record;
-  BitstreamReader::BlockInfo *CurBlockInfo = nullptr;
+  BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
 
   // Read all the records for this module.
-  while (1) {
+  while (true) {
     BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
 
     switch (Entry.Kind) {
     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
     case llvm::BitstreamEntry::Error:
-      return true;
+      return None;
     case llvm::BitstreamEntry::EndBlock:
-      return false;
+      return std::move(NewBlockInfo);
     case llvm::BitstreamEntry::Record:
       // The interesting case.
       break;
@@ -345,7 +336,7 @@ bool BitstreamCursor::ReadBlockInfoBlock() {
 
     // Read abbrev records, associate them with CurBID.
     if (Entry.ID == bitc::DEFINE_ABBREV) {
-      if (!CurBlockInfo) return true;
+      if (!CurBlockInfo) return None;
       ReadAbbrevRecord();
 
       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
@@ -360,13 +351,12 @@ bool BitstreamCursor::ReadBlockInfoBlock() {
     switch (readRecord(Entry.ID, Record)) {
       default: break;  // Default behavior, ignore unknown content.
       case bitc::BLOCKINFO_CODE_SETBID:
-        if (Record.size() < 1) return true;
-        CurBlockInfo =
-            &getBitStreamReader()->getOrCreateBlockInfo((unsigned)Record[0]);
+        if (Record.size() < 1) return None;
+        CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
         break;
       case bitc::BLOCKINFO_CODE_BLOCKNAME: {
-        if (!CurBlockInfo) return true;
-        if (getBitStreamReader()->isIgnoringBlockInfoNames())
+        if (!CurBlockInfo) return None;
+        if (!ReadBlockInfoNames)
           break; // Ignore name.
         std::string Name;
         for (unsigned i = 0, e = Record.size(); i != e; ++i)
@@ -375,8 +365,8 @@ bool BitstreamCursor::ReadBlockInfoBlock() {
         break;
       }
       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
-        if (!CurBlockInfo) return true;
-        if (getBitStreamReader()->isIgnoringBlockInfoNames())
+        if (!CurBlockInfo) return None;
+        if (!ReadBlockInfoNames)
           break; // Ignore name.
         std::string Name;
         for (unsigned i = 1, e = Record.size(); i != e; ++i)
@@ -388,4 +378,3 @@ bool BitstreamCursor::ReadBlockInfoBlock() {
     }
   }
 }
-
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
new file mode 100644
index 000000000000..cd08268d47b5
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.cpp
@@ -0,0 +1,1398 @@
+//===- MetadataLoader.cpp - Internal BitcodeReader implementation ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MetadataLoader.h"
+#include "ValueList.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitstreamReader.h"
+#include "llvm/Bitcode/LLVMBitCodes.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GVMaterializer.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalIFunc.h"
+#include "llvm/IR/GlobalIndirectSymbol.h"
+#include "llvm/IR/GlobalObject.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/TrackingMDRef.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <deque>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <system_error>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+/// Flag whether we need to import full type definitions for ThinLTO.
+/// Currently needed for Darwin and LLDB.
+static cl::opt<bool> ImportFullTypeDefinitions(
+    "import-full-type-definitions", cl::init(false), cl::Hidden,
+    cl::desc("Import full type definitions for ThinLTO."));
+
+namespace {
+
+static int64_t unrotateSign(uint64_t U) { return U & 1 ? ~(U >> 1) : U >> 1; }
+
+class BitcodeReaderMetadataList {
+  /// Array of metadata references.
+  ///
+  /// Don't use std::vector here.  Some versions of libc++ copy (instead of
+  /// move) on resize, and TrackingMDRef is very expensive to copy.
+  SmallVector<TrackingMDRef, 1> MetadataPtrs;
+
+  /// The set of indices in MetadataPtrs above of forward references that were
+  /// generated.
+  SmallDenseSet<unsigned, 1> ForwardReference;
+
+  /// The set of indices in MetadataPtrs above of Metadata that need to be
+  /// resolved.
+  SmallDenseSet<unsigned, 1> UnresolvedNodes;
+
+  /// Structures for resolving old type refs.
+  struct {
+    SmallDenseMap<MDString *, TempMDTuple, 1> Unknown;
+    SmallDenseMap<MDString *, DICompositeType *, 1> Final;
+    SmallDenseMap<MDString *, DICompositeType *, 1> FwdDecls;
+    SmallVector<std::pair<TrackingMDRef, TempMDTuple>, 1> Arrays;
+  } OldTypeRefs;
+
+  LLVMContext &Context;
+
+public:
+  BitcodeReaderMetadataList(LLVMContext &C) : Context(C) {}
+
+  // vector compatibility methods
+  unsigned size() const { return MetadataPtrs.size(); }
+  void resize(unsigned N) { MetadataPtrs.resize(N); }
+  void push_back(Metadata *MD) { MetadataPtrs.emplace_back(MD); }
+  void clear() { MetadataPtrs.clear(); }
+  Metadata *back() const { return MetadataPtrs.back(); }
+  void pop_back() { MetadataPtrs.pop_back(); }
+  bool empty() const { return MetadataPtrs.empty(); }
+
+  Metadata *operator[](unsigned i) const {
+    assert(i < MetadataPtrs.size());
+    return MetadataPtrs[i];
+  }
+
+  Metadata *lookup(unsigned I) const {
+    if (I < MetadataPtrs.size())
+      return MetadataPtrs[I];
+    return nullptr;
+  }
+
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    assert(ForwardReference.empty() && "Unexpected forward refs");
+    assert(UnresolvedNodes.empty() && "Unexpected unresolved node");
+    MetadataPtrs.resize(N);
+  }
+
+  /// Return the given metadata, creating a replaceable forward reference if
+  /// necessary.
+  Metadata *getMetadataFwdRef(unsigned Idx);
+
+  /// Return the the given metadata only if it is fully resolved.
+  ///
+  /// Gives the same result as \a lookup(), unless \a MDNode::isResolved()
+  /// would give \c false.
+  Metadata *getMetadataIfResolved(unsigned Idx);
+
+  MDNode *getMDNodeFwdRefOrNull(unsigned Idx);
+  void assignValue(Metadata *MD, unsigned Idx);
+  void tryToResolveCycles();
+  bool hasFwdRefs() const { return !ForwardReference.empty(); }
+
+  /// Upgrade a type that had an MDString reference.
+  void addTypeRef(MDString &UUID, DICompositeType &CT);
+
+  /// Upgrade a type that had an MDString reference.
+  Metadata *upgradeTypeRef(Metadata *MaybeUUID);
+
+  /// Upgrade a type ref array that may have MDString references.
+  Metadata *upgradeTypeRefArray(Metadata *MaybeTuple);
+
+private:
+  Metadata *resolveTypeRefArray(Metadata *MaybeTuple);
+};
+
+void BitcodeReaderMetadataList::assignValue(Metadata *MD, unsigned Idx) {
+  if (auto *MDN = dyn_cast<MDNode>(MD))
+    if (!MDN->isResolved())
+      UnresolvedNodes.insert(Idx);
+
+  if (Idx == size()) {
+    push_back(MD);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  TrackingMDRef &OldMD = MetadataPtrs[Idx];
+  if (!OldMD) {
+    OldMD.reset(MD);
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  TempMDTuple PrevMD(cast<MDTuple>(OldMD.get()));
+  PrevMD->replaceAllUsesWith(MD);
+  ForwardReference.erase(Idx);
+}
+
+Metadata *BitcodeReaderMetadataList::getMetadataFwdRef(unsigned Idx) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Metadata *MD = MetadataPtrs[Idx])
+    return MD;
+
+  // Track forward refs to be resolved later.
+  ForwardReference.insert(Idx);
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Metadata *MD = MDNode::getTemporary(Context, None).release();
+  MetadataPtrs[Idx].reset(MD);
+  return MD;
+}
+
+Metadata *BitcodeReaderMetadataList::getMetadataIfResolved(unsigned Idx) {
+  Metadata *MD = lookup(Idx);
+  if (auto *N = dyn_cast_or_null<MDNode>(MD))
+    if (!N->isResolved())
+      return nullptr;
+  return MD;
+}
+
+MDNode *BitcodeReaderMetadataList::getMDNodeFwdRefOrNull(unsigned Idx) {
+  return dyn_cast_or_null<MDNode>(getMetadataFwdRef(Idx));
+}
+
+void BitcodeReaderMetadataList::tryToResolveCycles() {
+  if (!ForwardReference.empty())
+    // Still forward references... can't resolve cycles.
+    return;
+
+  // Give up on finding a full definition for any forward decls that remain.
+  for (const auto &Ref : OldTypeRefs.FwdDecls)
+    OldTypeRefs.Final.insert(Ref);
+  OldTypeRefs.FwdDecls.clear();
+
+  // Upgrade from old type ref arrays.  In strange cases, this could add to
+  // OldTypeRefs.Unknown.
+  for (const auto &Array : OldTypeRefs.Arrays)
+    Array.second->replaceAllUsesWith(resolveTypeRefArray(Array.first.get()));
+  OldTypeRefs.Arrays.clear();
+
+  // Replace old string-based type refs with the resolved node, if possible.
+  // If we haven't seen the node, leave it to the verifier to complain about
+  // the invalid string reference.
+  for (const auto &Ref : OldTypeRefs.Unknown) {
+    if (DICompositeType *CT = OldTypeRefs.Final.lookup(Ref.first))
+      Ref.second->replaceAllUsesWith(CT);
+    else
+      Ref.second->replaceAllUsesWith(Ref.first);
+  }
+  OldTypeRefs.Unknown.clear();
+
+  if (UnresolvedNodes.empty())
+    // Nothing to do.
+    return;
+
+  // Resolve any cycles.
+  for (unsigned I : UnresolvedNodes) {
+    auto &MD = MetadataPtrs[I];
+    auto *N = dyn_cast_or_null<MDNode>(MD);
+    if (!N)
+      continue;
+
+    assert(!N->isTemporary() && "Unexpected forward reference");
+    N->resolveCycles();
+  }
+
+  // Make sure we return early again until there's another unresolved ref.
+  UnresolvedNodes.clear();
+}
+
+void BitcodeReaderMetadataList::addTypeRef(MDString &UUID,
+                                           DICompositeType &CT) {
+  assert(CT.getRawIdentifier() == &UUID && "Mismatched UUID");
+  if (CT.isForwardDecl())
+    OldTypeRefs.FwdDecls.insert(std::make_pair(&UUID, &CT));
+  else
+    OldTypeRefs.Final.insert(std::make_pair(&UUID, &CT));
+}
+
+Metadata *BitcodeReaderMetadataList::upgradeTypeRef(Metadata *MaybeUUID) {
+  auto *UUID = dyn_cast_or_null<MDString>(MaybeUUID);
+  if (LLVM_LIKELY(!UUID))
+    return MaybeUUID;
+
+  if (auto *CT = OldTypeRefs.Final.lookup(UUID))
+    return CT;
+
+  auto &Ref = OldTypeRefs.Unknown[UUID];
+  if (!Ref)
+    Ref = MDNode::getTemporary(Context, None);
+  return Ref.get();
+}
+
+Metadata *BitcodeReaderMetadataList::upgradeTypeRefArray(Metadata *MaybeTuple) {
+  auto *Tuple = dyn_cast_or_null<MDTuple>(MaybeTuple);
+  if (!Tuple || Tuple->isDistinct())
+    return MaybeTuple;
+
+  // Look through the array immediately if possible.
+  if (!Tuple->isTemporary())
+    return resolveTypeRefArray(Tuple);
+
+  // Create and return a placeholder to use for now.  Eventually
+  // resolveTypeRefArrays() will be resolve this forward reference.
+  OldTypeRefs.Arrays.emplace_back(
+      std::piecewise_construct, std::forward_as_tuple(Tuple),
+      std::forward_as_tuple(MDTuple::getTemporary(Context, None)));
+  return OldTypeRefs.Arrays.back().second.get();
+}
+
+Metadata *BitcodeReaderMetadataList::resolveTypeRefArray(Metadata *MaybeTuple) {
+  auto *Tuple = dyn_cast_or_null<MDTuple>(MaybeTuple);
+  if (!Tuple || Tuple->isDistinct())
+    return MaybeTuple;
+
+  // Look through the DITypeRefArray, upgrading each DITypeRef.
+  SmallVector<Metadata *, 32> Ops;
+  Ops.reserve(Tuple->getNumOperands());
+  for (Metadata *MD : Tuple->operands())
+    Ops.push_back(upgradeTypeRef(MD));
+
+  return MDTuple::get(Context, Ops);
+}
+
+namespace {
+
+class PlaceholderQueue {
+  // Placeholders would thrash around when moved, so store in a std::deque
+  // instead of some sort of vector.
+  std::deque<DistinctMDOperandPlaceholder> PHs;
+
+public:
+  DistinctMDOperandPlaceholder &getPlaceholderOp(unsigned ID);
+  void flush(BitcodeReaderMetadataList &MetadataList);
+};
+
+} // end anonymous namespace
+
+DistinctMDOperandPlaceholder &PlaceholderQueue::getPlaceholderOp(unsigned ID) {
+  PHs.emplace_back(ID);
+  return PHs.back();
+}
+
+void PlaceholderQueue::flush(BitcodeReaderMetadataList &MetadataList) {
+  while (!PHs.empty()) {
+    auto *MD = MetadataList.lookup(PHs.front().getID());
+    assert(MD && "Flushing placeholder on unassigned MD");
+#ifndef NDEBUG
+    if (auto *MDN = dyn_cast<MDNode>(MD))
+      assert(MDN->isResolved() &&
+             "Flushing Placeholder while cycles aren't resolved");
+#endif
+    PHs.front().replaceUseWith(MD);
+    PHs.pop_front();
+  }
+}
+
+} // anonynous namespace
+
+class MetadataLoader::MetadataLoaderImpl {
+  BitcodeReaderMetadataList MetadataList;
+  BitcodeReaderValueList &ValueList;
+  BitstreamCursor &Stream;
+  LLVMContext &Context;
+  Module &TheModule;
+  std::function<Type *(unsigned)> getTypeByID;
+
+  // Keep mapping of seens pair of old-style CU <-> SP, and update pointers to
+  // point from SP to CU after a block is completly parsed.
+  std::vector<std::pair<DICompileUnit *, Metadata *>> CUSubprograms;
+
+  /// Functions that need to be matched with subprograms when upgrading old
+  /// metadata.
+  SmallDenseMap<Function *, DISubprogram *, 16> FunctionsWithSPs;
+
+  // Map the bitcode's custom MDKind ID to the Module's MDKind ID.
+  DenseMap<unsigned, unsigned> MDKindMap;
+
+  bool StripTBAA = false;
+  bool HasSeenOldLoopTags = false;
+
+  /// True if metadata is being parsed for a module being ThinLTO imported.
+  bool IsImporting = false;
+
+  Error parseOneMetadata(SmallVectorImpl<uint64_t> &Record, unsigned Code,
+                         PlaceholderQueue &Placeholders, StringRef Blob,
+                         bool ModuleLevel, unsigned &NextMetadataNo);
+  Error parseMetadataStrings(ArrayRef<uint64_t> Record, StringRef Blob,
+                             unsigned &NextMetadataNo);
+  Error parseGlobalObjectAttachment(GlobalObject &GO,
+                                    ArrayRef<uint64_t> Record);
+  Error parseMetadataKindRecord(SmallVectorImpl<uint64_t> &Record);
+
+public:
+  MetadataLoaderImpl(BitstreamCursor &Stream, Module &TheModule,
+                     BitcodeReaderValueList &ValueList,
+                     std::function<Type *(unsigned)> getTypeByID,
+                     bool IsImporting)
+      : MetadataList(TheModule.getContext()), ValueList(ValueList),
+        Stream(Stream), Context(TheModule.getContext()), TheModule(TheModule),
+        getTypeByID(getTypeByID), IsImporting(IsImporting) {}
+
+  Error parseMetadata(bool ModuleLevel);
+
+  bool hasFwdRefs() const { return MetadataList.hasFwdRefs(); }
+  Metadata *getMetadataFwdRef(unsigned Idx) {
+    return MetadataList.getMetadataFwdRef(Idx);
+  }
+
+  MDNode *getMDNodeFwdRefOrNull(unsigned Idx) {
+    return MetadataList.getMDNodeFwdRefOrNull(Idx);
+  }
+
+  DISubprogram *lookupSubprogramForFunction(Function *F) {
+    return FunctionsWithSPs.lookup(F);
+  }
+
+  bool hasSeenOldLoopTags() { return HasSeenOldLoopTags; }
+
+  Error parseMetadataAttachment(
+      Function &F, const SmallVectorImpl<Instruction *> &InstructionList);
+
+  Error parseMetadataKinds();
+
+  void setStripTBAA(bool Value) { StripTBAA = Value; }
+  bool isStrippingTBAA() { return StripTBAA; }
+
+  unsigned size() const { return MetadataList.size(); }
+  void shrinkTo(unsigned N) { MetadataList.shrinkTo(N); }
+};
+
+Error error(const Twine &Message) {
+  return make_error<StringError>(
+      Message, make_error_code(BitcodeError::CorruptedBitcode));
+}
+
+/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
+/// module level metadata.
+Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
+  if (!ModuleLevel && MetadataList.hasFwdRefs())
+    return error("Invalid metadata: fwd refs into function blocks");
+
+  if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID))
+    return error("Invalid record");
+
+  unsigned NextMetadataNo = MetadataList.size();
+  SmallVector<uint64_t, 64> Record;
+
+  PlaceholderQueue Placeholders;
+
+  // Read all the records.
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      // Upgrade old-style CU <-> SP pointers to point from SP to CU.
+      for (auto CU_SP : CUSubprograms)
+        if (auto *SPs = dyn_cast_or_null<MDTuple>(CU_SP.second))
+          for (auto &Op : SPs->operands())
+            if (auto *SP = dyn_cast_or_null<MDNode>(Op))
+              SP->replaceOperandWith(7, CU_SP.first);
+      CUSubprograms.clear();
+
+      MetadataList.tryToResolveCycles();
+      Placeholders.flush(MetadataList);
+      return Error::success();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    StringRef Blob;
+    unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
+    if (Error Err = parseOneMetadata(Record, Code, Placeholders, Blob,
+                                     ModuleLevel, NextMetadataNo))
+      return Err;
+  }
+}
+
+Error MetadataLoader::MetadataLoaderImpl::parseOneMetadata(
+    SmallVectorImpl<uint64_t> &Record, unsigned Code,
+    PlaceholderQueue &Placeholders, StringRef Blob, bool ModuleLevel,
+    unsigned &NextMetadataNo) {
+
+  bool IsDistinct = false;
+  auto getMD = [&](unsigned ID) -> Metadata * {
+    if (!IsDistinct)
+      return MetadataList.getMetadataFwdRef(ID);
+    if (auto *MD = MetadataList.getMetadataIfResolved(ID))
+      return MD;
+    return &Placeholders.getPlaceholderOp(ID);
+  };
+  auto getMDOrNull = [&](unsigned ID) -> Metadata * {
+    if (ID)
+      return getMD(ID - 1);
+    return nullptr;
+  };
+  auto getMDOrNullWithoutPlaceholders = [&](unsigned ID) -> Metadata * {
+    if (ID)
+      return MetadataList.getMetadataFwdRef(ID - 1);
+    return nullptr;
+  };
+  auto getMDString = [&](unsigned ID) -> MDString * {
+    // This requires that the ID is not really a forward reference.  In
+    // particular, the MDString must already have been resolved.
+    return cast_or_null<MDString>(getMDOrNull(ID));
+  };
+
+  // Support for old type refs.
+  auto getDITypeRefOrNull = [&](unsigned ID) {
+    return MetadataList.upgradeTypeRef(getMDOrNull(ID));
+  };
+
+#define GET_OR_DISTINCT(CLASS, ARGS)                                           \
+  (IsDistinct ? CLASS::getDistinct ARGS : CLASS::get ARGS)
+
+  switch (Code) {
+  default: // Default behavior: ignore.
+    break;
+  case bitc::METADATA_NAME: {
+    // Read name of the named metadata.
+    SmallString<8> Name(Record.begin(), Record.end());
+    Record.clear();
+    Code = Stream.ReadCode();
+
+    unsigned NextBitCode = Stream.readRecord(Code, Record);
+    if (NextBitCode != bitc::METADATA_NAMED_NODE)
+      return error("METADATA_NAME not followed by METADATA_NAMED_NODE");
+
+    // Read named metadata elements.
+    unsigned Size = Record.size();
+    NamedMDNode *NMD = TheModule.getOrInsertNamedMetadata(Name);
+    for (unsigned i = 0; i != Size; ++i) {
+      MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[i]);
+      if (!MD)
+        return error("Invalid record");
+      NMD->addOperand(MD);
+    }
+    break;
+  }
+  case bitc::METADATA_OLD_FN_NODE: {
+    // FIXME: Remove in 4.0.
+    // This is a LocalAsMetadata record, the only type of function-local
+    // metadata.
+    if (Record.size() % 2 == 1)
+      return error("Invalid record");
+
+    // If this isn't a LocalAsMetadata record, we're dropping it.  This used
+    // to be legal, but there's no upgrade path.
+    auto dropRecord = [&] {
+      MetadataList.assignValue(MDNode::get(Context, None), NextMetadataNo++);
+    };
+    if (Record.size() != 2) {
+      dropRecord();
+      break;
+    }
+
+    Type *Ty = getTypeByID(Record[0]);
+    if (Ty->isMetadataTy() || Ty->isVoidTy()) {
+      dropRecord();
+      break;
+    }
+
+    MetadataList.assignValue(
+        LocalAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_OLD_NODE: {
+    // FIXME: Remove in 4.0.
+    if (Record.size() % 2 == 1)
+      return error("Invalid record");
+
+    unsigned Size = Record.size();
+    SmallVector<Metadata *, 8> Elts;
+    for (unsigned i = 0; i != Size; i += 2) {
+      Type *Ty = getTypeByID(Record[i]);
+      if (!Ty)
+        return error("Invalid record");
+      if (Ty->isMetadataTy())
+        Elts.push_back(getMD(Record[i + 1]));
+      else if (!Ty->isVoidTy()) {
+        auto *MD =
+            ValueAsMetadata::get(ValueList.getValueFwdRef(Record[i + 1], Ty));
+        assert(isa<ConstantAsMetadata>(MD) &&
+               "Expected non-function-local metadata");
+        Elts.push_back(MD);
+      } else
+        Elts.push_back(nullptr);
+    }
+    MetadataList.assignValue(MDNode::get(Context, Elts), NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_VALUE: {
+    if (Record.size() != 2)
+      return error("Invalid record");
+
+    Type *Ty = getTypeByID(Record[0]);
+    if (Ty->isMetadataTy() || Ty->isVoidTy())
+      return error("Invalid record");
+
+    MetadataList.assignValue(
+        ValueAsMetadata::get(ValueList.getValueFwdRef(Record[1], Ty)),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_DISTINCT_NODE:
+    IsDistinct = true;
+    LLVM_FALLTHROUGH;
+  case bitc::METADATA_NODE: {
+    SmallVector<Metadata *, 8> Elts;
+    Elts.reserve(Record.size());
+    for (unsigned ID : Record)
+      Elts.push_back(getMDOrNull(ID));
+    MetadataList.assignValue(IsDistinct ? MDNode::getDistinct(Context, Elts)
+                                        : MDNode::get(Context, Elts),
+                             NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_LOCATION: {
+    if (Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    unsigned Line = Record[1];
+    unsigned Column = Record[2];
+    Metadata *Scope = getMD(Record[3]);
+    Metadata *InlinedAt = getMDOrNull(Record[4]);
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DILocation, (Context, Line, Column, Scope, InlinedAt)),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_GENERIC_DEBUG: {
+    if (Record.size() < 4)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    unsigned Tag = Record[1];
+    unsigned Version = Record[2];
+
+    if (Tag >= 1u << 16 || Version != 0)
+      return error("Invalid record");
+
+    auto *Header = getMDString(Record[3]);
+    SmallVector<Metadata *, 8> DwarfOps;
+    for (unsigned I = 4, E = Record.size(); I != E; ++I)
+      DwarfOps.push_back(getMDOrNull(Record[I]));
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(GenericDINode, (Context, Tag, Header, DwarfOps)),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_SUBRANGE: {
+    if (Record.size() != 3)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DISubrange,
+                        (Context, Record[1], unrotateSign(Record[2]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_ENUMERATOR: {
+    if (Record.size() != 3)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIEnumerator, (Context, unrotateSign(Record[1]),
+                                       getMDString(Record[2]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_BASIC_TYPE: {
+    if (Record.size() != 6)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIBasicType,
+                        (Context, Record[1], getMDString(Record[2]), Record[3],
+                         Record[4], Record[5])),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_DERIVED_TYPE: {
+    if (Record.size() != 12)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[10]);
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIDerivedType,
+                        (Context, Record[1], getMDString(Record[2]),
+                         getMDOrNull(Record[3]), Record[4],
+                         getDITypeRefOrNull(Record[5]),
+                         getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+                         Record[9], Flags, getDITypeRefOrNull(Record[11]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_COMPOSITE_TYPE: {
+    if (Record.size() != 16)
+      return error("Invalid record");
+
+    // If we have a UUID and this is not a forward declaration, lookup the
+    // mapping.
+    IsDistinct = Record[0] & 0x1;
+    bool IsNotUsedInTypeRef = Record[0] >= 2;
+    unsigned Tag = Record[1];
+    MDString *Name = getMDString(Record[2]);
+    Metadata *File = getMDOrNull(Record[3]);
+    unsigned Line = Record[4];
+    Metadata *Scope = getDITypeRefOrNull(Record[5]);
+    Metadata *BaseType = nullptr;
+    uint64_t SizeInBits = Record[7];
+    if (Record[8] > (uint64_t)std::numeric_limits<uint32_t>::max())
+      return error("Alignment value is too large");
+    uint32_t AlignInBits = Record[8];
+    uint64_t OffsetInBits = 0;
+    DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[10]);
+    Metadata *Elements = nullptr;
+    unsigned RuntimeLang = Record[12];
+    Metadata *VTableHolder = nullptr;
+    Metadata *TemplateParams = nullptr;
+    auto *Identifier = getMDString(Record[15]);
+    // If this module is being parsed so that it can be ThinLTO imported
+    // into another module, composite types only need to be imported
+    // as type declarations (unless full type definitions requested).
+    // Create type declarations up front to save memory. Also, buildODRType
+    // handles the case where this is type ODRed with a definition needed
+    // by the importing module, in which case the existing definition is
+    // used.
+    if (IsImporting && !ImportFullTypeDefinitions &&
+        (Tag == dwarf::DW_TAG_enumeration_type ||
+         Tag == dwarf::DW_TAG_class_type ||
+         Tag == dwarf::DW_TAG_structure_type ||
+         Tag == dwarf::DW_TAG_union_type)) {
+      Flags = Flags | DINode::FlagFwdDecl;
+    } else {
+      BaseType = getDITypeRefOrNull(Record[6]);
+      OffsetInBits = Record[9];
+      Elements = getMDOrNull(Record[11]);
+      VTableHolder = getDITypeRefOrNull(Record[13]);
+      TemplateParams = getMDOrNull(Record[14]);
+    }
+    DICompositeType *CT = nullptr;
+    if (Identifier)
+      CT = DICompositeType::buildODRType(
+          Context, *Identifier, Tag, Name, File, Line, Scope, BaseType,
+          SizeInBits, AlignInBits, OffsetInBits, Flags, Elements, RuntimeLang,
+          VTableHolder, TemplateParams);
+
+    // Create a node if we didn't get a lazy ODR type.
+    if (!CT)
+      CT = GET_OR_DISTINCT(DICompositeType,
+                           (Context, Tag, Name, File, Line, Scope, BaseType,
+                            SizeInBits, AlignInBits, OffsetInBits, Flags,
+                            Elements, RuntimeLang, VTableHolder, TemplateParams,
+                            Identifier));
+    if (!IsNotUsedInTypeRef && Identifier)
+      MetadataList.addTypeRef(*Identifier, *cast<DICompositeType>(CT));
+
+    MetadataList.assignValue(CT, NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_SUBROUTINE_TYPE: {
+    if (Record.size() < 3 || Record.size() > 4)
+      return error("Invalid record");
+    bool IsOldTypeRefArray = Record[0] < 2;
+    unsigned CC = (Record.size() > 3) ? Record[3] : 0;
+
+    IsDistinct = Record[0] & 0x1;
+    DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[1]);
+    Metadata *Types = getMDOrNull(Record[2]);
+    if (LLVM_UNLIKELY(IsOldTypeRefArray))
+      Types = MetadataList.upgradeTypeRefArray(Types);
+
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DISubroutineType, (Context, Flags, CC, Types)),
+        NextMetadataNo++);
+    break;
+  }
+
+  case bitc::METADATA_MODULE: {
+    if (Record.size() != 6)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIModule,
+                        (Context, getMDOrNull(Record[1]),
+                         getMDString(Record[2]), getMDString(Record[3]),
+                         getMDString(Record[4]), getMDString(Record[5]))),
+        NextMetadataNo++);
+    break;
+  }
+
+  case bitc::METADATA_FILE: {
+    if (Record.size() != 3 && Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(
+            DIFile,
+            (Context, getMDString(Record[1]), getMDString(Record[2]),
+             Record.size() == 3 ? DIFile::CSK_None
+                                : static_cast<DIFile::ChecksumKind>(Record[3]),
+             Record.size() == 3 ? nullptr : getMDString(Record[4]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_COMPILE_UNIT: {
+    if (Record.size() < 14 || Record.size() > 17)
+      return error("Invalid record");
+
+    // Ignore Record[0], which indicates whether this compile unit is
+    // distinct.  It's always distinct.
+    IsDistinct = true;
+    auto *CU = DICompileUnit::getDistinct(
+        Context, Record[1], getMDOrNull(Record[2]), getMDString(Record[3]),
+        Record[4], getMDString(Record[5]), Record[6], getMDString(Record[7]),
+        Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]),
+        getMDOrNull(Record[12]), getMDOrNull(Record[13]),
+        Record.size() <= 15 ? nullptr : getMDOrNull(Record[15]),
+        Record.size() <= 14 ? 0 : Record[14],
+        Record.size() <= 16 ? true : Record[16]);
+
+    MetadataList.assignValue(CU, NextMetadataNo++);
+
+    // Move the Upgrade the list of subprograms.
+    if (Metadata *SPs = getMDOrNullWithoutPlaceholders(Record[11]))
+      CUSubprograms.push_back({CU, SPs});
+    break;
+  }
+  case bitc::METADATA_SUBPROGRAM: {
+    if (Record.size() < 18 || Record.size() > 20)
+      return error("Invalid record");
+
+    IsDistinct =
+        (Record[0] & 1) || Record[8]; // All definitions should be distinct.
+    // Version 1 has a Function as Record[15].
+    // Version 2 has removed Record[15].
+    // Version 3 has the Unit as Record[15].
+    // Version 4 added thisAdjustment.
+    bool HasUnit = Record[0] >= 2;
+    if (HasUnit && Record.size() < 19)
+      return error("Invalid record");
+    Metadata *CUorFn = getMDOrNull(Record[15]);
+    unsigned Offset = Record.size() >= 19 ? 1 : 0;
+    bool HasFn = Offset && !HasUnit;
+    bool HasThisAdj = Record.size() >= 20;
+    DISubprogram *SP = GET_OR_DISTINCT(
+        DISubprogram, (Context,
+                       getDITypeRefOrNull(Record[1]),          // scope
+                       getMDString(Record[2]),                 // name
+                       getMDString(Record[3]),                 // linkageName
+                       getMDOrNull(Record[4]),                 // file
+                       Record[5],                              // line
+                       getMDOrNull(Record[6]),                 // type
+                       Record[7],                              // isLocal
+                       Record[8],                              // isDefinition
+                       Record[9],                              // scopeLine
+                       getDITypeRefOrNull(Record[10]),         // containingType
+                       Record[11],                             // virtuality
+                       Record[12],                             // virtualIndex
+                       HasThisAdj ? Record[19] : 0,            // thisAdjustment
+                       static_cast<DINode::DIFlags>(Record[13] // flags
+                                                    ),
+                       Record[14],                       // isOptimized
+                       HasUnit ? CUorFn : nullptr,       // unit
+                       getMDOrNull(Record[15 + Offset]), // templateParams
+                       getMDOrNull(Record[16 + Offset]), // declaration
+                       getMDOrNull(Record[17 + Offset])  // variables
+                       ));
+    MetadataList.assignValue(SP, NextMetadataNo++);
+
+    // Upgrade sp->function mapping to function->sp mapping.
+    if (HasFn) {
+      if (auto *CMD = dyn_cast_or_null<ConstantAsMetadata>(CUorFn))
+        if (auto *F = dyn_cast<Function>(CMD->getValue())) {
+          if (F->isMaterializable())
+            // Defer until materialized; unmaterialized functions may not have
+            // metadata.
+            FunctionsWithSPs[F] = SP;
+          else if (!F->empty())
+            F->setSubprogram(SP);
+        }
+    }
+    break;
+  }
+  case bitc::METADATA_LEXICAL_BLOCK: {
+    if (Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DILexicalBlock,
+                        (Context, getMDOrNull(Record[1]),
+                         getMDOrNull(Record[2]), Record[3], Record[4])),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_LEXICAL_BLOCK_FILE: {
+    if (Record.size() != 4)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DILexicalBlockFile,
+                        (Context, getMDOrNull(Record[1]),
+                         getMDOrNull(Record[2]), Record[3])),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_NAMESPACE: {
+    if (Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0] & 1;
+    bool ExportSymbols = Record[0] & 2;
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DINamespace,
+                        (Context, getMDOrNull(Record[1]),
+                         getMDOrNull(Record[2]), getMDString(Record[3]),
+                         Record[4], ExportSymbols)),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_MACRO: {
+    if (Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIMacro,
+                        (Context, Record[1], Record[2], getMDString(Record[3]),
+                         getMDString(Record[4]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_MACRO_FILE: {
+    if (Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIMacroFile,
+                        (Context, Record[1], Record[2], getMDOrNull(Record[3]),
+                         getMDOrNull(Record[4]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_TEMPLATE_TYPE: {
+    if (Record.size() != 3)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(GET_OR_DISTINCT(DITemplateTypeParameter,
+                                             (Context, getMDString(Record[1]),
+                                              getDITypeRefOrNull(Record[2]))),
+                             NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_TEMPLATE_VALUE: {
+    if (Record.size() != 5)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DITemplateValueParameter,
+                        (Context, Record[1], getMDString(Record[2]),
+                         getDITypeRefOrNull(Record[3]),
+                         getMDOrNull(Record[4]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_GLOBAL_VAR: {
+    if (Record.size() < 11 || Record.size() > 12)
+      return error("Invalid record");
+
+    IsDistinct = Record[0] & 1;
+    unsigned Version = Record[0] >> 1;
+
+    if (Version == 1) {
+      MetadataList.assignValue(
+          GET_OR_DISTINCT(DIGlobalVariable,
+                          (Context, getMDOrNull(Record[1]),
+                           getMDString(Record[2]), getMDString(Record[3]),
+                           getMDOrNull(Record[4]), Record[5],
+                           getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+                           getMDOrNull(Record[10]), Record[11])),
+          NextMetadataNo++);
+    } else if (Version == 0) {
+      // Upgrade old metadata, which stored a global variable reference or a
+      // ConstantInt here.
+      Metadata *Expr = getMDOrNull(Record[9]);
+      uint32_t AlignInBits = 0;
+      if (Record.size() > 11) {
+        if (Record[11] > (uint64_t)std::numeric_limits<uint32_t>::max())
+          return error("Alignment value is too large");
+        AlignInBits = Record[11];
+      }
+      GlobalVariable *Attach = nullptr;
+      if (auto *CMD = dyn_cast_or_null<ConstantAsMetadata>(Expr)) {
+        if (auto *GV = dyn_cast<GlobalVariable>(CMD->getValue())) {
+          Attach = GV;
+          Expr = nullptr;
+        } else if (auto *CI = dyn_cast<ConstantInt>(CMD->getValue())) {
+          Expr = DIExpression::get(Context,
+                                   {dwarf::DW_OP_constu, CI->getZExtValue(),
+                                    dwarf::DW_OP_stack_value});
+        } else {
+          Expr = nullptr;
+        }
+      }
+      DIGlobalVariable *DGV = GET_OR_DISTINCT(
+          DIGlobalVariable,
+          (Context, getMDOrNull(Record[1]), getMDString(Record[2]),
+           getMDString(Record[3]), getMDOrNull(Record[4]), Record[5],
+           getDITypeRefOrNull(Record[6]), Record[7], Record[8],
+           getMDOrNull(Record[10]), AlignInBits));
+
+      auto *DGVE = DIGlobalVariableExpression::getDistinct(Context, DGV, Expr);
+      MetadataList.assignValue(DGVE, NextMetadataNo++);
+      if (Attach)
+        Attach->addDebugInfo(DGVE);
+    } else
+      return error("Invalid record");
+
+    break;
+  }
+  case bitc::METADATA_LOCAL_VAR: {
+    // 10th field is for the obseleted 'inlinedAt:' field.
+    if (Record.size() < 8 || Record.size() > 10)
+      return error("Invalid record");
+
+    IsDistinct = Record[0] & 1;
+    bool HasAlignment = Record[0] & 2;
+    // 2nd field used to be an artificial tag, either DW_TAG_auto_variable or
+    // DW_TAG_arg_variable, if we have alignment flag encoded it means, that
+    // this is newer version of record which doesn't have artifical tag.
+    bool HasTag = !HasAlignment && Record.size() > 8;
+    DINode::DIFlags Flags = static_cast<DINode::DIFlags>(Record[7 + HasTag]);
+    uint32_t AlignInBits = 0;
+    if (HasAlignment) {
+      if (Record[8 + HasTag] > (uint64_t)std::numeric_limits<uint32_t>::max())
+        return error("Alignment value is too large");
+      AlignInBits = Record[8 + HasTag];
+    }
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DILocalVariable,
+                        (Context, getMDOrNull(Record[1 + HasTag]),
+                         getMDString(Record[2 + HasTag]),
+                         getMDOrNull(Record[3 + HasTag]), Record[4 + HasTag],
+                         getDITypeRefOrNull(Record[5 + HasTag]),
+                         Record[6 + HasTag], Flags, AlignInBits)),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_EXPRESSION: {
+    if (Record.size() < 1)
+      return error("Invalid record");
+
+    IsDistinct = Record[0] & 1;
+    bool HasOpFragment = Record[0] & 2;
+    auto Elts = MutableArrayRef<uint64_t>(Record).slice(1);
+    if (!HasOpFragment)
+      if (unsigned N = Elts.size())
+        if (N >= 3 && Elts[N - 3] == dwarf::DW_OP_bit_piece)
+          Elts[N - 3] = dwarf::DW_OP_LLVM_fragment;
+
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIExpression, (Context, makeArrayRef(Record).slice(1))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_GLOBAL_VAR_EXPR: {
+    if (Record.size() != 3)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(GET_OR_DISTINCT(DIGlobalVariableExpression,
+                                             (Context, getMDOrNull(Record[1]),
+                                              getMDOrNull(Record[2]))),
+                             NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_OBJC_PROPERTY: {
+    if (Record.size() != 8)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIObjCProperty,
+                        (Context, getMDString(Record[1]),
+                         getMDOrNull(Record[2]), Record[3],
+                         getMDString(Record[4]), getMDString(Record[5]),
+                         Record[6], getDITypeRefOrNull(Record[7]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_IMPORTED_ENTITY: {
+    if (Record.size() != 6)
+      return error("Invalid record");
+
+    IsDistinct = Record[0];
+    MetadataList.assignValue(
+        GET_OR_DISTINCT(DIImportedEntity,
+                        (Context, Record[1], getMDOrNull(Record[2]),
+                         getDITypeRefOrNull(Record[3]), Record[4],
+                         getMDString(Record[5]))),
+        NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_STRING_OLD: {
+    std::string String(Record.begin(), Record.end());
+
+    // Test for upgrading !llvm.loop.
+    HasSeenOldLoopTags |= mayBeOldLoopAttachmentTag(String);
+
+    Metadata *MD = MDString::get(Context, String);
+    MetadataList.assignValue(MD, NextMetadataNo++);
+    break;
+  }
+  case bitc::METADATA_STRINGS:
+    if (Error Err = parseMetadataStrings(Record, Blob, NextMetadataNo))
+      return Err;
+    break;
+  case bitc::METADATA_GLOBAL_DECL_ATTACHMENT: {
+    if (Record.size() % 2 == 0)
+      return error("Invalid record");
+    unsigned ValueID = Record[0];
+    if (ValueID >= ValueList.size())
+      return error("Invalid record");
+    if (auto *GO = dyn_cast<GlobalObject>(ValueList[ValueID]))
+      if (Error Err = parseGlobalObjectAttachment(
+              *GO, ArrayRef<uint64_t>(Record).slice(1)))
+        return Err;
+    break;
+  }
+  case bitc::METADATA_KIND: {
+    // Support older bitcode files that had METADATA_KIND records in a
+    // block with METADATA_BLOCK_ID.
+    if (Error Err = parseMetadataKindRecord(Record))
+      return Err;
+    break;
+  }
+  }
+#undef GET_OR_DISTINCT
+  return Error::success();
+}
+
+Error MetadataLoader::MetadataLoaderImpl::parseMetadataStrings(
+    ArrayRef<uint64_t> Record, StringRef Blob, unsigned &NextMetadataNo) {
+  // All the MDStrings in the block are emitted together in a single
+  // record.  The strings are concatenated and stored in a blob along with
+  // their sizes.
+  if (Record.size() != 2)
+    return error("Invalid record: metadata strings layout");
+
+  unsigned NumStrings = Record[0];
+  unsigned StringsOffset = Record[1];
+  if (!NumStrings)
+    return error("Invalid record: metadata strings with no strings");
+  if (StringsOffset > Blob.size())
+    return error("Invalid record: metadata strings corrupt offset");
+
+  StringRef Lengths = Blob.slice(0, StringsOffset);
+  SimpleBitstreamCursor R(Lengths);
+
+  StringRef Strings = Blob.drop_front(StringsOffset);
+  do {
+    if (R.AtEndOfStream())
+      return error("Invalid record: metadata strings bad length");
+
+    unsigned Size = R.ReadVBR(6);
+    if (Strings.size() < Size)
+      return error("Invalid record: metadata strings truncated chars");
+
+    MetadataList.assignValue(MDString::get(Context, Strings.slice(0, Size)),
+                             NextMetadataNo++);
+    Strings = Strings.drop_front(Size);
+  } while (--NumStrings);
+
+  return Error::success();
+}
+
+Error MetadataLoader::MetadataLoaderImpl::parseGlobalObjectAttachment(
+    GlobalObject &GO, ArrayRef<uint64_t> Record) {
+  assert(Record.size() % 2 == 0);
+  for (unsigned I = 0, E = Record.size(); I != E; I += 2) {
+    auto K = MDKindMap.find(Record[I]);
+    if (K == MDKindMap.end())
+      return error("Invalid ID");
+    MDNode *MD = MetadataList.getMDNodeFwdRefOrNull(Record[I + 1]);
+    if (!MD)
+      return error("Invalid metadata attachment");
+    GO.addMetadata(K->second, *MD);
+  }
+  return Error::success();
+}
+
+/// Parse metadata attachments.
+Error MetadataLoader::MetadataLoaderImpl::parseMetadataAttachment(
+    Function &F, const SmallVectorImpl<Instruction *> &InstructionList) {
+  if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return Error::success();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a metadata attachment record.
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default: // Default behavior: ignore.
+      break;
+    case bitc::METADATA_ATTACHMENT: {
+      unsigned RecordLength = Record.size();
+      if (Record.empty())
+        return error("Invalid record");
+      if (RecordLength % 2 == 0) {
+        // A function attachment.
+        if (Error Err = parseGlobalObjectAttachment(F, Record))
+          return Err;
+        continue;
+      }
+
+      // An instruction attachment.
+      Instruction *Inst = InstructionList[Record[0]];
+      for (unsigned i = 1; i != RecordLength; i = i + 2) {
+        unsigned Kind = Record[i];
+        DenseMap<unsigned, unsigned>::iterator I = MDKindMap.find(Kind);
+        if (I == MDKindMap.end())
+          return error("Invalid ID");
+        if (I->second == LLVMContext::MD_tbaa && StripTBAA)
+          continue;
+
+        Metadata *Node = MetadataList.getMetadataFwdRef(Record[i + 1]);
+        if (isa<LocalAsMetadata>(Node))
+          // Drop the attachment.  This used to be legal, but there's no
+          // upgrade path.
+          break;
+        MDNode *MD = dyn_cast_or_null<MDNode>(Node);
+        if (!MD)
+          return error("Invalid metadata attachment");
+
+        if (HasSeenOldLoopTags && I->second == LLVMContext::MD_loop)
+          MD = upgradeInstructionLoopAttachment(*MD);
+
+        if (I->second == LLVMContext::MD_tbaa) {
+          assert(!MD->isTemporary() && "should load MDs before attachments");
+          MD = UpgradeTBAANode(*MD);
+        }
+        Inst->setMetadata(I->second, MD);
+      }
+      break;
+    }
+    }
+  }
+}
+
+/// Parse a single METADATA_KIND record, inserting result in MDKindMap.
+Error MetadataLoader::MetadataLoaderImpl::parseMetadataKindRecord(
+    SmallVectorImpl<uint64_t> &Record) {
+  if (Record.size() < 2)
+    return error("Invalid record");
+
+  unsigned Kind = Record[0];
+  SmallString<8> Name(Record.begin() + 1, Record.end());
+
+  unsigned NewKind = TheModule.getMDKindID(Name.str());
+  if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second)
+    return error("Conflicting METADATA_KIND records");
+  return Error::success();
+}
+
+/// Parse the metadata kinds out of the METADATA_KIND_BLOCK.
+Error MetadataLoader::MetadataLoaderImpl::parseMetadataKinds() {
+  if (Stream.EnterSubBlock(bitc::METADATA_KIND_BLOCK_ID))
+    return error("Invalid record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records.
+  while (true) {
+    BitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case BitstreamEntry::SubBlock: // Handled for us already.
+    case BitstreamEntry::Error:
+      return error("Malformed block");
+    case BitstreamEntry::EndBlock:
+      return Error::success();
+    case BitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    unsigned Code = Stream.readRecord(Entry.ID, Record);
+    switch (Code) {
+    default: // Default behavior: ignore.
+      break;
+    case bitc::METADATA_KIND: {
+      if (Error Err = parseMetadataKindRecord(Record))
+        return Err;
+      break;
+    }
+    }
+  }
+}
+
+MetadataLoader &MetadataLoader::operator=(MetadataLoader &&RHS) {
+  Pimpl = std::move(RHS.Pimpl);
+  return *this;
+}
+MetadataLoader::MetadataLoader(MetadataLoader &&RHS)
+    : Pimpl(std::move(RHS.Pimpl)) {}
+
+MetadataLoader::~MetadataLoader() = default;
+MetadataLoader::MetadataLoader(BitstreamCursor &Stream, Module &TheModule,
+                               BitcodeReaderValueList &ValueList,
+                               bool IsImporting,
+                               std::function<Type *(unsigned)> getTypeByID)
+    : Pimpl(llvm::make_unique<MetadataLoaderImpl>(Stream, TheModule, ValueList,
+                                                  getTypeByID, IsImporting)) {}
+
+Error MetadataLoader::parseMetadata(bool ModuleLevel) {
+  return Pimpl->parseMetadata(ModuleLevel);
+}
+
+bool MetadataLoader::hasFwdRefs() const { return Pimpl->hasFwdRefs(); }
+
+/// Return the given metadata, creating a replaceable forward reference if
+/// necessary.
+Metadata *MetadataLoader::getMetadataFwdRef(unsigned Idx) {
+  return Pimpl->getMetadataFwdRef(Idx);
+}
+
+MDNode *MetadataLoader::getMDNodeFwdRefOrNull(unsigned Idx) {
+  return Pimpl->getMDNodeFwdRefOrNull(Idx);
+}
+
+DISubprogram *MetadataLoader::lookupSubprogramForFunction(Function *F) {
+  return Pimpl->lookupSubprogramForFunction(F);
+}
+
+Error MetadataLoader::parseMetadataAttachment(
+    Function &F, const SmallVectorImpl<Instruction *> &InstructionList) {
+  return Pimpl->parseMetadataAttachment(F, InstructionList);
+}
+
+Error MetadataLoader::parseMetadataKinds() {
+  return Pimpl->parseMetadataKinds();
+}
+
+void MetadataLoader::setStripTBAA(bool StripTBAA) {
+  return Pimpl->setStripTBAA(StripTBAA);
+}
+
+bool MetadataLoader::isStrippingTBAA() { return Pimpl->isStrippingTBAA(); }
+
+unsigned MetadataLoader::size() const { return Pimpl->size(); }
+void MetadataLoader::shrinkTo(unsigned N) { return Pimpl->shrinkTo(N); }
diff --git a/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
new file mode 100644
index 000000000000..753846c6eadf
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/MetadataLoader.h
@@ -0,0 +1,85 @@
+//===-- Bitcode/Reader/MetadataLoader.h - Load Metadatas -------*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class handles loading Metadatas.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_BITCODE_READER_METADATALOADER_H
+#define LLVM_LIB_BITCODE_READER_METADATALOADER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Error.h"
+
+#include <functional>
+#include <memory>
+
+namespace llvm {
+class BitcodeReaderValueList;
+class BitstreamCursor;
+class DISubprogram;
+class Error;
+class Function;
+class Instruction;
+class Metadata;
+class MDNode;
+class Module;
+class Type;
+
+/// Helper class that handles loading Metadatas and keeping them available.
+class MetadataLoader {
+  class MetadataLoaderImpl;
+  std::unique_ptr<MetadataLoaderImpl> Pimpl;
+  Error parseMetadata(bool ModuleLevel);
+
+public:
+  ~MetadataLoader();
+  MetadataLoader(BitstreamCursor &Stream, Module &TheModule,
+                 BitcodeReaderValueList &ValueList, bool IsImporting,
+                 std::function<Type *(unsigned)> getTypeByID);
+  MetadataLoader &operator=(MetadataLoader &&);
+  MetadataLoader(MetadataLoader &&);
+
+  // Parse a module metadata block
+  Error parseModuleMetadata() { return parseMetadata(true); }
+
+  // Parse a function metadata block
+  Error parseFunctionMetadata() { return parseMetadata(false); }
+
+  /// Set the mode to strip TBAA metadata on load.
+  void setStripTBAA(bool StripTBAA = true);
+
+  /// Return true if the Loader is stripping TBAA metadata.
+  bool isStrippingTBAA();
+
+  // Return true there are remaining unresolved forward references.
+  bool hasFwdRefs() const;
+
+  /// Return the given metadata, creating a replaceable forward reference if
+  /// necessary.
+  Metadata *getMetadataFwdRef(unsigned Idx);
+
+  MDNode *getMDNodeFwdRefOrNull(unsigned Idx);
+
+  /// Return the DISubprogra metadata for a Function if any, null otherwise.
+  DISubprogram *lookupSubprogramForFunction(Function *F);
+
+  /// Parse a `METADATA_ATTACHMENT` block for a function.
+  Error parseMetadataAttachment(
+      Function &F, const SmallVectorImpl<Instruction *> &InstructionList);
+
+  /// Parse a `METADATA_KIND` block for the current module.
+  Error parseMetadataKinds();
+
+  unsigned size() const;
+  void shrinkTo(unsigned N);
+};
+}
+
+#endif // LLVM_LIB_BITCODE_READER_METADATALOADER_H
diff --git a/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
new file mode 100644
index 000000000000..7152a51cea6e
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/ValueList.cpp
@@ -0,0 +1,199 @@
+//===----- ValueList.cpp - Internal BitcodeReader implementation ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ValueList.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+
+using namespace llvm;
+
+namespace llvm {
+namespace {
+
+/// \brief A class for maintaining the slot number definition
+/// as a placeholder for the actual definition for forward constants defs.
+class ConstantPlaceHolder : public ConstantExpr {
+  void operator=(const ConstantPlaceHolder &) = delete;
+
+public:
+  // allocate space for exactly one operand
+  void *operator new(size_t s) { return User::operator new(s, 1); }
+  explicit ConstantPlaceHolder(Type *Ty, LLVMContext &Context)
+      : ConstantExpr(Ty, Instruction::UserOp1, &Op<0>(), 1) {
+    Op<0>() = UndefValue::get(Type::getInt32Ty(Context));
+  }
+
+  /// \brief Methods to support type inquiry through isa, cast, and dyn_cast.
+  static bool classof(const Value *V) {
+    return isa<ConstantExpr>(V) &&
+           cast<ConstantExpr>(V)->getOpcode() == Instruction::UserOp1;
+  }
+
+  /// Provide fast operand accessors
+  DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
+};
+
+} // end anonymous namespace
+
+// FIXME: can we inherit this from ConstantExpr?
+template <>
+struct OperandTraits<ConstantPlaceHolder>
+    : public FixedNumOperandTraits<ConstantPlaceHolder, 1> {};
+DEFINE_TRANSPARENT_OPERAND_ACCESSORS(ConstantPlaceHolder, Value)
+
+} // end namespace llvm
+
+void BitcodeReaderValueList::assignValue(Value *V, unsigned Idx) {
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (!OldV) {
+    OldV = V;
+    return;
+  }
+
+  // Handle constants and non-constants (e.g. instrs) differently for
+  // efficiency.
+  if (Constant *PHC = dyn_cast<Constant>(&*OldV)) {
+    ResolveConstants.push_back(std::make_pair(PHC, Idx));
+    OldV = V;
+  } else {
+    // If there was a forward reference to this value, replace it.
+    Value *PrevVal = OldV;
+    OldV->replaceAllUsesWith(V);
+    delete PrevVal;
+  }
+}
+
+Constant *BitcodeReaderValueList::getConstantFwdRef(unsigned Idx, Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    if (Ty != V->getType())
+      report_fatal_error("Type mismatch in constant table!");
+    return cast<Constant>(V);
+  }
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Constant *C = new ConstantPlaceHolder(Ty, Context);
+  ValuePtrs[Idx] = C;
+  return C;
+}
+
+Value *BitcodeReaderValueList::getValueFwdRef(unsigned Idx, Type *Ty) {
+  // Bail out for a clearly invalid value. This would make us call resize(0)
+  if (Idx == std::numeric_limits<unsigned>::max())
+    return nullptr;
+
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  if (Value *V = ValuePtrs[Idx]) {
+    // If the types don't match, it's invalid.
+    if (Ty && Ty != V->getType())
+      return nullptr;
+    return V;
+  }
+
+  // No type specified, must be invalid reference.
+  if (!Ty)
+    return nullptr;
+
+  // Create and return a placeholder, which will later be RAUW'd.
+  Value *V = new Argument(Ty);
+  ValuePtrs[Idx] = V;
+  return V;
+}
+
+/// Once all constants are read, this method bulk resolves any forward
+/// references.  The idea behind this is that we sometimes get constants (such
+/// as large arrays) which reference *many* forward ref constants.  Replacing
+/// each of these causes a lot of thrashing when building/reuniquing the
+/// constant.  Instead of doing this, we look at all the uses and rewrite all
+/// the place holders at once for any constant that uses a placeholder.
+void BitcodeReaderValueList::resolveConstantForwardRefs() {
+  // Sort the values by-pointer so that they are efficient to look up with a
+  // binary search.
+  std::sort(ResolveConstants.begin(), ResolveConstants.end());
+
+  SmallVector<Constant *, 64> NewOps;
+
+  while (!ResolveConstants.empty()) {
+    Value *RealVal = operator[](ResolveConstants.back().second);
+    Constant *Placeholder = ResolveConstants.back().first;
+    ResolveConstants.pop_back();
+
+    // Loop over all users of the placeholder, updating them to reference the
+    // new value.  If they reference more than one placeholder, update them all
+    // at once.
+    while (!Placeholder->use_empty()) {
+      auto UI = Placeholder->user_begin();
+      User *U = *UI;
+
+      // If the using object isn't uniqued, just update the operands.  This
+      // handles instructions and initializers for global variables.
+      if (!isa<Constant>(U) || isa<GlobalValue>(U)) {
+        UI.getUse().set(RealVal);
+        continue;
+      }
+
+      // Otherwise, we have a constant that uses the placeholder.  Replace that
+      // constant with a new constant that has *all* placeholder uses updated.
+      Constant *UserC = cast<Constant>(U);
+      for (User::op_iterator I = UserC->op_begin(), E = UserC->op_end(); I != E;
+           ++I) {
+        Value *NewOp;
+        if (!isa<ConstantPlaceHolder>(*I)) {
+          // Not a placeholder reference.
+          NewOp = *I;
+        } else if (*I == Placeholder) {
+          // Common case is that it just references this one placeholder.
+          NewOp = RealVal;
+        } else {
+          // Otherwise, look up the placeholder in ResolveConstants.
+          ResolveConstantsTy::iterator It = std::lower_bound(
+              ResolveConstants.begin(), ResolveConstants.end(),
+              std::pair<Constant *, unsigned>(cast<Constant>(*I), 0));
+          assert(It != ResolveConstants.end() && It->first == *I);
+          NewOp = operator[](It->second);
+        }
+
+        NewOps.push_back(cast<Constant>(NewOp));
+      }
+
+      // Make the new constant.
+      Constant *NewC;
+      if (ConstantArray *UserCA = dyn_cast<ConstantArray>(UserC)) {
+        NewC = ConstantArray::get(UserCA->getType(), NewOps);
+      } else if (ConstantStruct *UserCS = dyn_cast<ConstantStruct>(UserC)) {
+        NewC = ConstantStruct::get(UserCS->getType(), NewOps);
+      } else if (isa<ConstantVector>(UserC)) {
+        NewC = ConstantVector::get(NewOps);
+      } else {
+        assert(isa<ConstantExpr>(UserC) && "Must be a ConstantExpr.");
+        NewC = cast<ConstantExpr>(UserC)->getWithOperands(NewOps);
+      }
+
+      UserC->replaceAllUsesWith(NewC);
+      UserC->destroyConstant();
+      NewOps.clear();
+    }
+
+    // Update all ValueHandles, they should be the only users at this point.
+    Placeholder->replaceAllUsesWith(RealVal);
+    delete Placeholder;
+  }
+}
diff --git a/contrib/llvm/lib/Bitcode/Reader/ValueList.h b/contrib/llvm/lib/Bitcode/Reader/ValueList.h
new file mode 100644
index 000000000000..3119d7735e22
--- /dev/null
+++ b/contrib/llvm/lib/Bitcode/Reader/ValueList.h
@@ -0,0 +1,76 @@
+//===-- Bitcode/Reader/ValueEnumerator.h - Number values --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/ValueHandle.h"
+
+#include <vector>
+
+namespace llvm {
+class Constant;
+
+class BitcodeReaderValueList {
+  std::vector<WeakVH> ValuePtrs;
+
+  /// As we resolve forward-referenced constants, we add information about them
+  /// to this vector.  This allows us to resolve them in bulk instead of
+  /// resolving each reference at a time.  See the code in
+  /// ResolveConstantForwardRefs for more information about this.
+  ///
+  /// The key of this vector is the placeholder constant, the value is the slot
+  /// number that holds the resolved value.
+  typedef std::vector<std::pair<Constant *, unsigned>> ResolveConstantsTy;
+  ResolveConstantsTy ResolveConstants;
+  LLVMContext &Context;
+
+public:
+  BitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  ~BitcodeReaderValueList() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+  }
+
+  // vector compatibility methods
+  unsigned size() const { return ValuePtrs.size(); }
+  void resize(unsigned N) { ValuePtrs.resize(N); }
+  void push_back(Value *V) { ValuePtrs.emplace_back(V); }
+
+  void clear() {
+    assert(ResolveConstants.empty() && "Constants not resolved?");
+    ValuePtrs.clear();
+  }
+
+  Value *operator[](unsigned i) const {
+    assert(i < ValuePtrs.size());
+    return ValuePtrs[i];
+  }
+
+  Value *back() const { return ValuePtrs.back(); }
+  void pop_back() { ValuePtrs.pop_back(); }
+  bool empty() const { return ValuePtrs.empty(); }
+
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    ValuePtrs.resize(N);
+  }
+
+  Constant *getConstantFwdRef(unsigned Idx, Type *Ty);
+  Value *getValueFwdRef(unsigned Idx, Type *Ty);
+
+  void assignValue(Value *V, unsigned Idx);
+
+  /// Once all constants are read, this method bulk resolves any forward
+  /// references.
+  void resolveConstantForwardRefs();
+};
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
index 7218ea05bb5b..e0388418a3d9 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp
@@ -8,9 +8,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm-c/BitWriter.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index dcb8b58cd7b3..c10ba2399e71 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -11,12 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "ValueEnumerator.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Bitcode/BitstreamWriter.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
@@ -38,6 +38,11 @@
 using namespace llvm;
 
 namespace {
+
+cl::opt<unsigned>
+    IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
+                   cl::desc("Number of metadatas above which we emit an index "
+                            "to enable lazy-loading"));
 /// These are manifest constants used by the bitcode writer. They do not need to
 /// be kept in sync with the reader, but need to be consistent within this file.
 enum {
@@ -65,36 +70,20 @@ enum {
 };
 
 /// Abstract class to manage the bitcode writing, subclassed for each bitcode
-/// file type. Owns the BitstreamWriter, and includes the main entry point for
-/// writing.
-class BitcodeWriter {
+/// file type.
+class BitcodeWriterBase {
 protected:
-  /// Pointer to the buffer allocated by caller for bitcode writing.
-  const SmallVectorImpl<char> &Buffer;
-
-  /// The stream created and owned by the BitodeWriter.
-  BitstreamWriter Stream;
+  /// The stream created and owned by the client.
+  BitstreamWriter &Stream;
 
   /// Saves the offset of the VSTOffset record that must eventually be
   /// backpatched with the offset of the actual VST.
   uint64_t VSTOffsetPlaceholder = 0;
 
 public:
-  /// Constructs a BitcodeWriter object, and initializes a BitstreamRecord,
-  /// writing to the provided \p Buffer.
-  BitcodeWriter(SmallVectorImpl<char> &Buffer)
-      : Buffer(Buffer), Stream(Buffer) {}
-
-  virtual ~BitcodeWriter() = default;
-
-  /// Main entry point to write the bitcode file, which writes the bitcode
-  /// header and will then invoke the virtual writeBlocks() method.
-  void write();
-
-private:
-  /// Derived classes must implement this to write the corresponding blocks for
-  /// that bitcode file type.
-  virtual void writeBlocks() = 0;
+  /// Constructs a BitcodeWriterBase object that writes to the provided
+  /// \p Stream.
+  BitcodeWriterBase(BitstreamWriter &Stream) : Stream(Stream) {}
 
 protected:
   bool hasVSTOffsetPlaceholder() { return VSTOffsetPlaceholder != 0; }
@@ -103,7 +92,10 @@ protected:
 };
 
 /// Class to manage the bitcode writing for a module.
-class ModuleBitcodeWriter : public BitcodeWriter {
+class ModuleBitcodeWriter : public BitcodeWriterBase {
+  /// Pointer to the buffer allocated by caller for bitcode writing.
+  const SmallVectorImpl<char> &Buffer;
+
   /// The Module to write to bitcode.
   const Module &M;
 
@@ -116,8 +108,8 @@ class ModuleBitcodeWriter : public BitcodeWriter {
   /// True if a module hash record should be written.
   bool GenerateHash;
 
-  /// The start bit of the module block, for use in generating a module hash
-  uint64_t BitcodeStartBit = 0;
+  /// The start bit of the identification block.
+  uint64_t BitcodeStartBit;
 
   /// Map that holds the correspondence between GUIDs in the summary index,
   /// that came from indirect call profiles, and a value id generated by this
@@ -131,51 +123,38 @@ public:
   /// Constructs a ModuleBitcodeWriter object for the given Module,
   /// writing to the provided \p Buffer.
   ModuleBitcodeWriter(const Module *M, SmallVectorImpl<char> &Buffer,
-                      bool ShouldPreserveUseListOrder,
+                      BitstreamWriter &Stream, bool ShouldPreserveUseListOrder,
                       const ModuleSummaryIndex *Index, bool GenerateHash)
-      : BitcodeWriter(Buffer), M(*M), VE(*M, ShouldPreserveUseListOrder),
-        Index(Index), GenerateHash(GenerateHash) {
-    // Save the start bit of the actual bitcode, in case there is space
-    // saved at the start for the darwin header above. The reader stream
-    // will start at the bitcode, and we need the offset of the VST
-    // to line up.
-    BitcodeStartBit = Stream.GetCurrentBitNo();
-
+      : BitcodeWriterBase(Stream), Buffer(Buffer), M(*M),
+        VE(*M, ShouldPreserveUseListOrder), Index(Index),
+        GenerateHash(GenerateHash), BitcodeStartBit(Stream.GetCurrentBitNo()) {
     // Assign ValueIds to any callee values in the index that came from
     // indirect call profiles and were recorded as a GUID not a Value*
     // (which would have been assigned an ID by the ValueEnumerator).
     // The starting ValueId is just after the number of values in the
     // ValueEnumerator, so that they can be emitted in the VST.
     GlobalValueId = VE.getValues().size();
-    if (Index)
-      for (const auto &GUIDSummaryLists : *Index)
-        // Examine all summaries for this GUID.
-        for (auto &Summary : GUIDSummaryLists.second)
-          if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
-            // For each call in the function summary, see if the call
-            // is to a GUID (which means it is for an indirect call,
-            // otherwise we would have a Value for it). If so, synthesize
-            // a value id.
-            for (auto &CallEdge : FS->calls())
-              if (CallEdge.first.isGUID())
-                assignValueId(CallEdge.first.getGUID());
+    if (!Index)
+      return;
+    for (const auto &GUIDSummaryLists : *Index)
+      // Examine all summaries for this GUID.
+      for (auto &Summary : GUIDSummaryLists.second)
+        if (auto FS = dyn_cast<FunctionSummary>(Summary.get()))
+          // For each call in the function summary, see if the call
+          // is to a GUID (which means it is for an indirect call,
+          // otherwise we would have a Value for it). If so, synthesize
+          // a value id.
+          for (auto &CallEdge : FS->calls())
+            if (CallEdge.first.isGUID())
+              assignValueId(CallEdge.first.getGUID());
   }
 
-private:
-  /// Main entry point for writing a module to bitcode, invoked by
-  /// BitcodeWriter::write() after it writes the header.
-  void writeBlocks() override;
-
-  /// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
-  /// current llvm version, and a record for the epoch number.
-  void writeIdentificationBlock();
-
   /// Emit the current module to the bitstream.
-  void writeModule();
+  void write();
 
+private:
   uint64_t bitcodeStartBit() { return BitcodeStartBit; }
 
-  void writeStringRecord(unsigned Code, StringRef Str, unsigned AbbrevToUse);
   void writeAttributeGroupTable();
   void writeAttributeTable();
   void writeTypeTable();
@@ -236,6 +215,9 @@ private:
                             SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDIExpression(const DIExpression *N,
                          SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
+  void writeDIGlobalVariableExpression(const DIGlobalVariableExpression *N,
+                                       SmallVectorImpl<uint64_t> &Record,
+                                       unsigned Abbrev);
   void writeDIObjCProperty(const DIObjCProperty *N,
                            SmallVectorImpl<uint64_t> &Record, unsigned Abbrev);
   void writeDIImportedEntity(const DIImportedEntity *N,
@@ -247,7 +229,9 @@ private:
   void writeMetadataStrings(ArrayRef<const Metadata *> Strings,
                             SmallVectorImpl<uint64_t> &Record);
   void writeMetadataRecords(ArrayRef<const Metadata *> MDs,
-                            SmallVectorImpl<uint64_t> &Record);
+                            SmallVectorImpl<uint64_t> &Record,
+                            std::vector<unsigned> *MDAbbrevs = nullptr,
+                            std::vector<uint64_t> *IndexPos = nullptr);
   void writeModuleMetadata();
   void writeFunctionMetadata(const Function &F);
   void writeFunctionMetadataAttachment(const Function &F);
@@ -293,7 +277,10 @@ private:
   }
   unsigned getValueId(GlobalValue::GUID ValGUID) {
     const auto &VMI = GUIDToValueIdMap.find(ValGUID);
-    assert(VMI != GUIDToValueIdMap.end());
+    // Expect that any GUID value had a value Id assigned by an
+    // earlier call to assignValueId.
+    assert(VMI != GUIDToValueIdMap.end() &&
+           "GUID does not have assigned value Id");
     return VMI->second;
   }
   // Helper to get the valueId for the type of value recorded in VI.
@@ -306,13 +293,13 @@ private:
 };
 
 /// Class to manage the bitcode writing for a combined index.
-class IndexBitcodeWriter : public BitcodeWriter {
+class IndexBitcodeWriter : public BitcodeWriterBase {
   /// The combined index to write to bitcode.
   const ModuleSummaryIndex &Index;
 
   /// When writing a subset of the index for distributed backends, client
   /// provides a map of modules to the corresponding GUIDs/summaries to write.
-  std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex;
+  const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex;
 
   /// Map that holds the correspondence between the GUID used in the combined
   /// index and a value id generated by this class to use in references.
@@ -325,11 +312,10 @@ public:
   /// Constructs a IndexBitcodeWriter object for the given combined index,
   /// writing to the provided \p Buffer. When writing a subset of the index
   /// for a distributed backend, provide a \p ModuleToSummariesForIndex map.
-  IndexBitcodeWriter(SmallVectorImpl<char> &Buffer,
-                     const ModuleSummaryIndex &Index,
-                     std::map<std::string, GVSummaryMapTy>
+  IndexBitcodeWriter(BitstreamWriter &Stream, const ModuleSummaryIndex &Index,
+                     const std::map<std::string, GVSummaryMapTy>
                          *ModuleToSummariesForIndex = nullptr)
-      : BitcodeWriter(Buffer), Index(Index),
+      : BitcodeWriterBase(Stream), Index(Index),
         ModuleToSummariesForIndex(ModuleToSummariesForIndex) {
     // Assign unique value ids to all summaries to be written, for use
     // in writing out the call graph edges. Save the mapping from GUID
@@ -355,11 +341,11 @@ public:
     // ModuleToSummariesForIndex map:
 
     /// Points to the last element in outer ModuleToSummariesForIndex map.
-    std::map<std::string, GVSummaryMapTy>::iterator ModuleSummariesBack;
+    std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesBack;
     /// Iterator on outer ModuleToSummariesForIndex map.
-    std::map<std::string, GVSummaryMapTy>::iterator ModuleSummariesIter;
+    std::map<std::string, GVSummaryMapTy>::const_iterator ModuleSummariesIter;
     /// Iterator on an inner global variable summary map.
-    GVSummaryMapTy::iterator ModuleGVSummariesIter;
+    GVSummaryMapTy::const_iterator ModuleGVSummariesIter;
 
     // Iterators used when writing all summaries in the index:
 
@@ -476,11 +462,10 @@ public:
   /// Obtain the end iterator over the summaries to be written.
   iterator end() { return iterator(*this, /*IsAtEnd=*/true); }
 
-private:
-  /// Main entry point for writing a combined index to bitcode, invoked by
-  /// BitcodeWriter::write() after it writes the header.
-  void writeBlocks() override;
+  /// Main entry point for writing a combined index to bitcode.
+  void write();
 
+private:
   void writeIndex();
   void writeModStrings();
   void writeCombinedValueSymbolTable();
@@ -593,8 +578,8 @@ static unsigned getEncodedSynchScope(SynchronizationScope SynchScope) {
   llvm_unreachable("Invalid synch scope");
 }
 
-void ModuleBitcodeWriter::writeStringRecord(unsigned Code, StringRef Str,
-                                            unsigned AbbrevToUse) {
+static void writeStringRecord(BitstreamWriter &Stream, unsigned Code,
+                              StringRef Str, unsigned AbbrevToUse) {
   SmallVector<unsigned, 64> Vals;
 
   // Code: [strchar x N]
@@ -918,7 +903,7 @@ void ModuleBitcodeWriter::writeTypeTable() {
 
         // Emit the name if it is present.
         if (!ST->getName().empty())
-          writeStringRecord(bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
+          writeStringRecord(Stream, bitc::TYPE_CODE_STRUCT_NAME, ST->getName(),
                             StructNameAbbrev);
       }
       break;
@@ -986,8 +971,9 @@ static unsigned getEncodedLinkage(const GlobalValue &GV) {
 static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
   uint64_t RawFlags = 0;
 
-  RawFlags |= Flags.HasSection; // bool
-
+  RawFlags |= Flags.NoRename; // bool
+  RawFlags |= (Flags.IsNotViableToInline << 1);
+  RawFlags |= (Flags.HasInlineAsmMaybeReferencingInternal << 2);
   // Linkage don't need to be remapped at that time for the summary. Any future
   // change to the getEncodedLinkage() function will need to be taken into
   // account here as well.
@@ -1068,7 +1054,7 @@ void ModuleBitcodeWriter::writeComdats() {
 /// Write a record that will eventually hold the word offset of the
 /// module-level VST. For now the offset is 0, which will be backpatched
 /// after the real VST is written. Saves the bit offset to backpatch.
-void BitcodeWriter::writeValueSymbolTableForwardDecl() {
+void BitcodeWriterBase::writeValueSymbolTableForwardDecl() {
   // Write a placeholder value in for the offset of the real VST,
   // which is written after the function blocks so that it can include
   // the offset of each function. The placeholder offset will be
@@ -1115,13 +1101,13 @@ static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
 void ModuleBitcodeWriter::writeModuleInfo() {
   // Emit various pieces of data attached to a module.
   if (!M.getTargetTriple().empty())
-    writeStringRecord(bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
+    writeStringRecord(Stream, bitc::MODULE_CODE_TRIPLE, M.getTargetTriple(),
                       0 /*TODO*/);
   const std::string &DL = M.getDataLayoutStr();
   if (!DL.empty())
-    writeStringRecord(bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
+    writeStringRecord(Stream, bitc::MODULE_CODE_DATALAYOUT, DL, 0 /*TODO*/);
   if (!M.getModuleInlineAsm().empty())
-    writeStringRecord(bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
+    writeStringRecord(Stream, bitc::MODULE_CODE_ASM, M.getModuleInlineAsm(),
                       0 /*TODO*/);
 
   // Emit information about sections and GC, computing how many there are. Also
@@ -1137,7 +1123,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
       // Give section names unique ID's.
       unsigned &Entry = SectionMap[GV.getSection()];
       if (!Entry) {
-        writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
+        writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, GV.getSection(),
                           0 /*TODO*/);
         Entry = SectionMap.size();
       }
@@ -1149,7 +1135,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
       // Give section names unique ID's.
       unsigned &Entry = SectionMap[F.getSection()];
       if (!Entry) {
-        writeStringRecord(bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
+        writeStringRecord(Stream, bitc::MODULE_CODE_SECTIONNAME, F.getSection(),
                           0 /*TODO*/);
         Entry = SectionMap.size();
       }
@@ -1158,7 +1144,8 @@ void ModuleBitcodeWriter::writeModuleInfo() {
       // Same for GC names.
       unsigned &Entry = GCMap[F.getGC()];
       if (!Entry) {
-        writeStringRecord(bitc::MODULE_CODE_GCNAME, F.getGC(), 0 /*TODO*/);
+        writeStringRecord(Stream, bitc::MODULE_CODE_GCNAME, F.getGC(),
+                          0 /*TODO*/);
         Entry = GCMap.size();
       }
     }
@@ -1535,6 +1522,8 @@ void ModuleBitcodeWriter::writeDIFile(const DIFile *N,
   Record.push_back(N->isDistinct());
   Record.push_back(VE.getMetadataOrNullID(N->getRawFilename()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawDirectory()));
+  Record.push_back(N->getChecksumKind());
+  Record.push_back(VE.getMetadataOrNullID(N->getRawChecksum()));
 
   Stream.EmitRecord(bitc::METADATA_FILE, Record, Abbrev);
   Record.clear();
@@ -1560,6 +1549,7 @@ void ModuleBitcodeWriter::writeDICompileUnit(const DICompileUnit *N,
   Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get()));
   Record.push_back(N->getDWOId());
   Record.push_back(VE.getMetadataOrNullID(N->getMacros().get()));
+  Record.push_back(N->getSplitDebugInlining());
 
   Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev);
   Record.clear();
@@ -1622,7 +1612,7 @@ void ModuleBitcodeWriter::writeDILexicalBlockFile(
 void ModuleBitcodeWriter::writeDINamespace(const DINamespace *N,
                                            SmallVectorImpl<uint64_t> &Record,
                                            unsigned Abbrev) {
-  Record.push_back(N->isDistinct());
+  Record.push_back(N->isDistinct() | N->getExportSymbols() << 1);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
@@ -1696,7 +1686,8 @@ void ModuleBitcodeWriter::writeDITemplateValueParameter(
 void ModuleBitcodeWriter::writeDIGlobalVariable(
     const DIGlobalVariable *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
-  Record.push_back(N->isDistinct());
+  const uint64_t Version = 1 << 1;
+  Record.push_back((uint64_t)N->isDistinct() | Version);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawLinkageName()));
@@ -1705,8 +1696,9 @@ void ModuleBitcodeWriter::writeDIGlobalVariable(
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->isLocalToUnit());
   Record.push_back(N->isDefinition());
-  Record.push_back(VE.getMetadataOrNullID(N->getRawVariable()));
+  Record.push_back(/* expr */ 0);
   Record.push_back(VE.getMetadataOrNullID(N->getStaticDataMemberDeclaration()));
+  Record.push_back(N->getAlignInBits());
 
   Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR, Record, Abbrev);
   Record.clear();
@@ -1715,7 +1707,21 @@ void ModuleBitcodeWriter::writeDIGlobalVariable(
 void ModuleBitcodeWriter::writeDILocalVariable(
     const DILocalVariable *N, SmallVectorImpl<uint64_t> &Record,
     unsigned Abbrev) {
-  Record.push_back(N->isDistinct());
+  // In order to support all possible bitcode formats in BitcodeReader we need
+  // to distinguish the following cases:
+  // 1) Record has no artificial tag (Record[1]),
+  //   has no obsolete inlinedAt field (Record[9]).
+  //   In this case Record size will be 8, HasAlignment flag is false.
+  // 2) Record has artificial tag (Record[1]),
+  //   has no obsolete inlignedAt field (Record[9]).
+  //   In this case Record size will be 9, HasAlignment flag is false.
+  // 3) Record has both artificial tag (Record[1]) and
+  //   obsolete inlignedAt field (Record[9]).
+  //   In this case Record size will be 10, HasAlignment flag is false.
+  // 4) Record has neither artificial tag, nor inlignedAt field, but
+  //   HasAlignment flag is true and Record[8] contains alignment value.
+  const uint64_t HasAlignmentFlag = 1 << 1;
+  Record.push_back((uint64_t)N->isDistinct() | HasAlignmentFlag);
   Record.push_back(VE.getMetadataOrNullID(N->getScope()));
   Record.push_back(VE.getMetadataOrNullID(N->getRawName()));
   Record.push_back(VE.getMetadataOrNullID(N->getFile()));
@@ -1723,6 +1729,7 @@ void ModuleBitcodeWriter::writeDILocalVariable(
   Record.push_back(VE.getMetadataOrNullID(N->getType()));
   Record.push_back(N->getArg());
   Record.push_back(N->getFlags());
+  Record.push_back(N->getAlignInBits());
 
   Stream.EmitRecord(bitc::METADATA_LOCAL_VAR, Record, Abbrev);
   Record.clear();
@@ -1733,13 +1740,25 @@ void ModuleBitcodeWriter::writeDIExpression(const DIExpression *N,
                                             unsigned Abbrev) {
   Record.reserve(N->getElements().size() + 1);
 
-  Record.push_back(N->isDistinct());
+  const uint64_t HasOpFragmentFlag = 1 << 1;
+  Record.push_back((uint64_t)N->isDistinct() | HasOpFragmentFlag);
   Record.append(N->elements_begin(), N->elements_end());
 
   Stream.EmitRecord(bitc::METADATA_EXPRESSION, Record, Abbrev);
   Record.clear();
 }
 
+void ModuleBitcodeWriter::writeDIGlobalVariableExpression(
+    const DIGlobalVariableExpression *N, SmallVectorImpl<uint64_t> &Record,
+    unsigned Abbrev) {
+  Record.push_back(N->isDistinct());
+  Record.push_back(VE.getMetadataOrNullID(N->getVariable()));
+  Record.push_back(VE.getMetadataOrNullID(N->getExpression()));
+  
+  Stream.EmitRecord(bitc::METADATA_GLOBAL_VAR_EXPR, Record, Abbrev);
+  Record.clear();
+}
+
 void ModuleBitcodeWriter::writeDIObjCProperty(const DIObjCProperty *N,
                                               SmallVectorImpl<uint64_t> &Record,
                                               unsigned Abbrev) {
@@ -1842,8 +1861,16 @@ void ModuleBitcodeWriter::writeMetadataStrings(
   Record.clear();
 }
 
+// Generates an enum to use as an index in the Abbrev array of Metadata record.
+enum MetadataAbbrev : unsigned {
+#define HANDLE_MDNODE_LEAF(CLASS) CLASS##AbbrevID,
+#include "llvm/IR/Metadata.def"
+  LastPlusOne
+};
+
 void ModuleBitcodeWriter::writeMetadataRecords(
-    ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record) {
+    ArrayRef<const Metadata *> MDs, SmallVectorImpl<uint64_t> &Record,
+    std::vector<unsigned> *MDAbbrevs, std::vector<uint64_t> *IndexPos) {
   if (MDs.empty())
     return;
 
@@ -1852,6 +1879,8 @@ void ModuleBitcodeWriter::writeMetadataRecords(
 #include "llvm/IR/Metadata.def"
 
   for (const Metadata *MD : MDs) {
+    if (IndexPos)
+      IndexPos->push_back(Stream.GetCurrentBitNo());
     if (const MDNode *N = dyn_cast<MDNode>(MD)) {
       assert(N->isResolved() && "Expected forward references to be resolved");
 
@@ -1860,7 +1889,11 @@ void ModuleBitcodeWriter::writeMetadataRecords(
         llvm_unreachable("Invalid MDNode subclass");
 #define HANDLE_MDNODE_LEAF(CLASS)                                              \
   case Metadata::CLASS##Kind:                                                  \
-    write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev);                       \
+    if (MDAbbrevs)                                                             \
+      write##CLASS(cast<CLASS>(N), Record,                                     \
+                   (*MDAbbrevs)[MetadataAbbrev::CLASS##AbbrevID]);             \
+    else                                                                       \
+      write##CLASS(cast<CLASS>(N), Record, CLASS##Abbrev);                     \
     continue;
 #include "llvm/IR/Metadata.def"
       }
@@ -1873,10 +1906,77 @@ void ModuleBitcodeWriter::writeModuleMetadata() {
   if (!VE.hasMDs() && M.named_metadata_empty())
     return;
 
-  Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3);
+  Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 4);
   SmallVector<uint64_t, 64> Record;
+
+  // Emit all abbrevs upfront, so that the reader can jump in the middle of the
+  // block and load any metadata.
+  std::vector<unsigned> MDAbbrevs;
+
+  MDAbbrevs.resize(MetadataAbbrev::LastPlusOne);
+  MDAbbrevs[MetadataAbbrev::DILocationAbbrevID] = createDILocationAbbrev();
+  MDAbbrevs[MetadataAbbrev::GenericDINodeAbbrevID] =
+      createGenericDINodeAbbrev();
+
+  BitCodeAbbrev *Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX_OFFSET));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32));
+  unsigned OffsetAbbrev = Stream.EmitAbbrev(Abbv);
+
+  Abbv = new BitCodeAbbrev();
+  Abbv->Add(BitCodeAbbrevOp(bitc::METADATA_INDEX));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));
+  unsigned IndexAbbrev = Stream.EmitAbbrev(Abbv);
+
+  // Emit MDStrings together upfront.
   writeMetadataStrings(VE.getMDStrings(), Record);
-  writeMetadataRecords(VE.getNonMDStrings(), Record);
+
+  // We only emit an index for the metadata record if we have more than a given
+  // (naive) threshold of metadatas, otherwise it is not worth it.
+  if (VE.getNonMDStrings().size() > IndexThreshold) {
+    // Write a placeholder value in for the offset of the metadata index,
+    // which is written after the records, so that it can include
+    // the offset of each entry. The placeholder offset will be
+    // updated after all records are emitted.
+    uint64_t Vals[] = {0, 0};
+    Stream.EmitRecord(bitc::METADATA_INDEX_OFFSET, Vals, OffsetAbbrev);
+  }
+
+  // Compute and save the bit offset to the current position, which will be
+  // patched when we emit the index later. We can simply subtract the 64-bit
+  // fixed size from the current bit number to get the location to backpatch.
+  uint64_t IndexOffsetRecordBitPos = Stream.GetCurrentBitNo();
+
+  // This index will contain the bitpos for each individual record.
+  std::vector<uint64_t> IndexPos;
+  IndexPos.reserve(VE.getNonMDStrings().size());
+
+  // Write all the records
+  writeMetadataRecords(VE.getNonMDStrings(), Record, &MDAbbrevs, &IndexPos);
+
+  if (VE.getNonMDStrings().size() > IndexThreshold) {
+    // Now that we have emitted all the records we will emit the index. But
+    // first
+    // backpatch the forward reference so that the reader can skip the records
+    // efficiently.
+    Stream.BackpatchWord64(IndexOffsetRecordBitPos - 64,
+                           Stream.GetCurrentBitNo() - IndexOffsetRecordBitPos);
+
+    // Delta encode the index.
+    uint64_t PreviousValue = IndexOffsetRecordBitPos;
+    for (auto &Elt : IndexPos) {
+      auto EltDelta = Elt - PreviousValue;
+      PreviousValue = Elt;
+      Elt = EltDelta;
+    }
+    // Emit the index record.
+    Stream.EmitRecord(bitc::METADATA_INDEX, IndexPos, IndexAbbrev);
+    IndexPos.clear();
+  }
+
+  // Write the named metadata now.
   writeNamedMetadata(Record);
 
   auto AddDeclAttachedMetadata = [&](const GlobalObject &GO) {
@@ -2196,9 +2296,12 @@ void ModuleBitcodeWriter::writeConstants(unsigned FirstVal, unsigned LastVal,
       case Instruction::GetElementPtr: {
         Code = bitc::CST_CODE_CE_GEP;
         const auto *GO = cast<GEPOperator>(C);
-        if (GO->isInBounds())
-          Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
         Record.push_back(VE.getTypeID(GO->getSourceElementType()));
+        if (Optional<unsigned> Idx = GO->getInRangeIndex()) {
+          Code = bitc::CST_CODE_CE_GEP_WITH_INRANGE_INDEX;
+          Record.push_back((*Idx << 1) | GO->isInBounds());
+        } else if (GO->isInBounds())
+          Code = bitc::CST_CODE_CE_INBOUNDS_GEP;
         for (unsigned i = 0, e = CE->getNumOperands(); i != e; ++i) {
           Record.push_back(VE.getTypeID(C->getOperand(i)->getType()));
           Record.push_back(VE.getValueID(C->getOperand(i)));
@@ -2495,7 +2598,7 @@ void ModuleBitcodeWriter::writeInstruction(const Instruction &I,
 
     // Emit type/value pairs for varargs params.
     if (FTy->isVarArg()) {
-      for (unsigned i = FTy->getNumParams(), e = I.getNumOperands()-3;
+      for (unsigned i = FTy->getNumParams(), e = II->getNumArgOperands();
            i != e; ++i)
         pushValueAndType(I.getOperand(i), InstID, Vals); // vararg
     }
@@ -2736,11 +2839,13 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
     // Get the offset of the VST we are writing, and backpatch it into
     // the VST forward declaration record.
     uint64_t VSTOffset = Stream.GetCurrentBitNo();
-    // The BitcodeStartBit was the stream offset of the actual bitcode
-    // (e.g. excluding any initial darwin header).
+    // The BitcodeStartBit was the stream offset of the identification block.
     VSTOffset -= bitcodeStartBit();
     assert((VSTOffset & 31) == 0 && "VST block not 32-bit aligned");
-    Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32);
+    // Note that we add 1 here because the offset is relative to one word
+    // before the start of the identification block, which was historically
+    // always the start of the regular bitcode header.
+    Stream.BackpatchWord(VSTOffsetPlaceholder, VSTOffset / 32 + 1);
   }
 
   Stream.EnterSubblock(bitc::VALUE_SYMTAB_BLOCK_ID, 4);
@@ -2828,7 +2933,10 @@ void ModuleBitcodeWriter::writeValueSymbolTable(
       // actual bitcode written to the stream).
       uint64_t BitcodeIndex = (*FunctionToBitcodeIndex)[F] - bitcodeStartBit();
       assert((BitcodeIndex & 31) == 0 && "function block not 32-bit aligned");
-      NameVals.push_back(BitcodeIndex / 32);
+      // Note that we add 1 here because the offset is relative to one word
+      // before the start of the identification block, which was historically
+      // always the start of the regular bitcode header.
+      NameVals.push_back(BitcodeIndex / 32 + 1);
 
       Code = bitc::VST_CODE_FNENTRY;
       AbbrevToUse = FnEntry8BitAbbrev;
@@ -2994,7 +3102,8 @@ void ModuleBitcodeWriter::writeFunction(
     }
 
   // Emit names for all the instructions etc.
-  writeValueSymbolTable(F.getValueSymbolTable());
+  if (auto *Symtab = F.getValueSymbolTable())
+    writeValueSymbolTable(*Symtab);
 
   if (NeedsMetadataAttachment)
     writeFunctionMetadataAttachment(F);
@@ -3009,7 +3118,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
   // We only want to emit block info records for blocks that have multiple
   // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
   // Other blocks can define their abbrevs inline.
-  Stream.EnterBlockInfoBlock(2);
+  Stream.EnterBlockInfoBlock();
 
   { // 8-bit fixed-width VST_CODE_ENTRY/VST_CODE_BBENTRY strings.
     BitCodeAbbrev *Abbv = new BitCodeAbbrev();
@@ -3267,30 +3376,21 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
   NameVals.push_back(ValueID);
 
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
+  if (!FS->type_tests().empty())
+    Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
+
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
   NameVals.push_back(FS->instCount());
   NameVals.push_back(FS->refs().size());
 
-  unsigned SizeBeforeRefs = NameVals.size();
   for (auto &RI : FS->refs())
     NameVals.push_back(VE.getValueID(RI.getValue()));
-  // Sort the refs for determinism output, the vector returned by FS->refs() has
-  // been initialized from a DenseSet.
-  std::sort(NameVals.begin() + SizeBeforeRefs, NameVals.end());
 
-  std::vector<FunctionSummary::EdgeTy> Calls = FS->calls();
-  std::sort(Calls.begin(), Calls.end(),
-            [this](const FunctionSummary::EdgeTy &L,
-                   const FunctionSummary::EdgeTy &R) {
-              return getValueId(L.first) < getValueId(R.first);
-            });
   bool HasProfileData = F.getEntryCount().hasValue();
-  for (auto &ECI : Calls) {
+  for (auto &ECI : FS->calls()) {
     NameVals.push_back(getValueId(ECI.first));
-    assert(ECI.second.CallsiteCount > 0 && "Expected at least one callsite");
-    NameVals.push_back(ECI.second.CallsiteCount);
     if (HasProfileData)
-      NameVals.push_back(ECI.second.ProfileCount);
+      NameVals.push_back(static_cast<uint8_t>(ECI.second.Hotness));
   }
 
   unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
@@ -3307,13 +3407,18 @@ void ModuleBitcodeWriter::writePerModuleFunctionSummaryRecord(
 void ModuleBitcodeWriter::writeModuleLevelReferences(
     const GlobalVariable &V, SmallVector<uint64_t, 64> &NameVals,
     unsigned FSModRefsAbbrev) {
-  // Only interested in recording variable defs in the summary.
-  if (V.isDeclaration())
+  auto Summaries =
+      Index->findGlobalValueSummaryList(GlobalValue::getGUID(V.getName()));
+  if (Summaries == Index->end()) {
+    // Only declarations should not have a summary (a declaration might however
+    // have a summary if the def was in module level asm).
+    assert(V.isDeclaration());
     return;
+  }
+  auto *Summary = Summaries->second.front().get();
   NameVals.push_back(VE.getValueID(&V));
-  NameVals.push_back(getEncodedGVSummaryFlags(V));
-  auto *Summary = Index->getGlobalValueSummary(V);
   GlobalVarSummary *VS = cast<GlobalVarSummary>(Summary);
+  NameVals.push_back(getEncodedGVSummaryFlags(VS->flags()));
 
   unsigned SizeBeforeRefs = NameVals.size();
   for (auto &RI : VS->refs())
@@ -3330,18 +3435,20 @@ void ModuleBitcodeWriter::writeModuleLevelReferences(
 // Current version for the summary.
 // This is bumped whenever we introduce changes in the way some record are
 // interpreted, like flags for instance.
-static const uint64_t INDEX_VERSION = 1;
+static const uint64_t INDEX_VERSION = 2;
 
 /// Emit the per-module summary section alongside the rest of
 /// the module's bitcode.
 void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
-  if (Index->begin() == Index->end())
-    return;
-
   Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
 
   Stream.EmitRecord(bitc::FS_VERSION, ArrayRef<uint64_t>{INDEX_VERSION});
 
+  if (Index->begin() == Index->end()) {
+    Stream.ExitBlock();
+    return;
+  }
+
   // Abbrev for FS_PERMODULE.
   BitCodeAbbrev *Abbv = new BitCodeAbbrev();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE));
@@ -3349,7 +3456,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  // numrefs x valueid, n x (valueid, callsitecount)
+  // numrefs x valueid, n x (valueid)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
@@ -3361,7 +3468,7 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  // numrefs x valueid, n x (valueid, callsitecount, profilecount)
+  // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
@@ -3387,14 +3494,20 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
   // Iterate over the list of functions instead of the Index to
   // ensure the ordering is stable.
   for (const Function &F : M) {
-    if (F.isDeclaration())
-      continue;
     // Summary emission does not support anonymous functions, they have to
     // renamed using the anonymous function renaming pass.
     if (!F.hasName())
       report_fatal_error("Unexpected anonymous function when writing summary");
 
-    auto *Summary = Index->getGlobalValueSummary(F);
+    auto Summaries =
+        Index->findGlobalValueSummaryList(GlobalValue::getGUID(F.getName()));
+    if (Summaries == Index->end()) {
+      // Only declarations should not have a summary (a declaration might
+      // however have a summary if the def was in module level asm).
+      assert(F.isDeclaration());
+      continue;
+    }
+    auto *Summary = Summaries->second.front().get();
     writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
                                         FSCallsAbbrev, FSCallsProfileAbbrev, F);
   }
@@ -3412,7 +3525,9 @@ void ModuleBitcodeWriter::writePerModuleGlobalValueSummary() {
     auto AliasId = VE.getValueID(&A);
     auto AliaseeId = VE.getValueID(Aliasee);
     NameVals.push_back(AliasId);
-    NameVals.push_back(getEncodedGVSummaryFlags(A));
+    auto *Summary = Index->getGlobalValueSummary(A);
+    AliasSummary *AS = cast<AliasSummary>(Summary);
+    NameVals.push_back(getEncodedGVSummaryFlags(AS->flags()));
     NameVals.push_back(AliaseeId);
     Stream.EmitRecord(bitc::FS_ALIAS, NameVals, FSAliasAbbrev);
     NameVals.clear();
@@ -3434,7 +3549,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  // numrefs x valueid, n x (valueid, callsitecount)
+  // numrefs x valueid, n x (valueid)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsAbbrev = Stream.EmitAbbrev(Abbv);
@@ -3447,7 +3562,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6));   // flags
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));   // instcount
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4));   // numrefs
-  // numrefs x valueid, n x (valueid, callsitecount, profilecount)
+  // numrefs x valueid, n x (valueid, hotness)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned FSCallsProfileAbbrev = Stream.EmitAbbrev(Abbv);
@@ -3522,6 +3637,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     }
 
     auto *FS = cast<FunctionSummary>(S);
+    if (!FS->type_tests().empty())
+      Stream.EmitRecord(bitc::FS_TYPE_TESTS, FS->type_tests());
+
     NameVals.push_back(ValueId);
     NameVals.push_back(Index.getModuleId(FS->modulePath()));
     NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -3534,7 +3652,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
 
     bool HasProfileData = false;
     for (auto &EI : FS->calls()) {
-      HasProfileData |= EI.second.ProfileCount != 0;
+      HasProfileData |= EI.second.Hotness != CalleeInfo::HotnessType::Unknown;
       if (HasProfileData)
         break;
     }
@@ -3545,10 +3663,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
       if (!hasValueId(EI.first.getGUID()))
         continue;
       NameVals.push_back(getValueId(EI.first.getGUID()));
-      assert(EI.second.CallsiteCount > 0 && "Expected at least one callsite");
-      NameVals.push_back(EI.second.CallsiteCount);
       if (HasProfileData)
-        NameVals.push_back(EI.second.ProfileCount);
+        NameVals.push_back(static_cast<uint8_t>(EI.second.Hotness));
     }
 
     unsigned FSAbbrev = (HasProfileData ? FSCallsProfileAbbrev : FSCallsAbbrev);
@@ -3580,7 +3696,9 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Stream.ExitBlock();
 }
 
-void ModuleBitcodeWriter::writeIdentificationBlock() {
+/// Create the "IDENTIFICATION_BLOCK_ID" containing a single string with the
+/// current llvm version, and a record for the epoch number.
+void writeIdentificationBlock(BitstreamWriter &Stream) {
   Stream.EnterSubblock(bitc::IDENTIFICATION_BLOCK_ID, 5);
 
   // Write the "user readable" string identifying the bitcode producer
@@ -3589,7 +3707,7 @@ void ModuleBitcodeWriter::writeIdentificationBlock() {
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Char6));
   auto StringAbbrev = Stream.EmitAbbrev(Abbv);
-  writeStringRecord(bitc::IDENTIFICATION_CODE_STRING,
+  writeStringRecord(Stream, bitc::IDENTIFICATION_CODE_STRING,
                     "LLVM" LLVM_VERSION_STRING, StringAbbrev);
 
   // Write the epoch version
@@ -3608,39 +3726,19 @@ void ModuleBitcodeWriter::writeModuleHash(size_t BlockStartPos) {
   SHA1 Hasher;
   Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&(Buffer)[BlockStartPos],
                                   Buffer.size() - BlockStartPos));
-  auto Hash = Hasher.result();
-  SmallVector<uint64_t, 20> Vals;
-  auto LShift = [&](unsigned char Val, unsigned Amount)
-                    -> uint64_t { return ((uint64_t)Val) << Amount; };
+  StringRef Hash = Hasher.result();
+  uint32_t Vals[5];
   for (int Pos = 0; Pos < 20; Pos += 4) {
-    uint32_t SubHash = LShift(Hash[Pos + 0], 24);
-    SubHash |= LShift(Hash[Pos + 1], 16) | LShift(Hash[Pos + 2], 8) |
-               (unsigned)(unsigned char)Hash[Pos + 3];
-    Vals.push_back(SubHash);
+    Vals[Pos / 4] = support::endian::read32be(Hash.data() + Pos);
   }
 
   // Emit the finished record.
   Stream.EmitRecord(bitc::MODULE_CODE_HASH, Vals);
 }
 
-void BitcodeWriter::write() {
-  // Emit the file header first.
-  writeBitcodeHeader();
+void ModuleBitcodeWriter::write() {
+  writeIdentificationBlock(Stream);
 
-  writeBlocks();
-}
-
-void ModuleBitcodeWriter::writeBlocks() {
-  writeIdentificationBlock();
-  writeModule();
-}
-
-void IndexBitcodeWriter::writeBlocks() {
-  // Index contains only a single outer (module) block.
-  writeIndex();
-}
-
-void ModuleBitcodeWriter::writeModule() {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
   size_t BlockStartPos = Buffer.size();
 
@@ -3769,7 +3867,7 @@ static void emitDarwinBCHeaderAndTrailer(SmallVectorImpl<char> &Buffer,
 }
 
 /// Helper to write the header common to all bitcode files.
-void BitcodeWriter::writeBitcodeHeader() {
+static void writeBitcodeHeader(BitstreamWriter &Stream) {
   // Emit the file header.
   Stream.Emit((unsigned)'B', 8);
   Stream.Emit((unsigned)'C', 8);
@@ -3779,6 +3877,22 @@ void BitcodeWriter::writeBitcodeHeader() {
   Stream.Emit(0xD, 4);
 }
 
+BitcodeWriter::BitcodeWriter(SmallVectorImpl<char> &Buffer)
+    : Buffer(Buffer), Stream(new BitstreamWriter(Buffer)) {
+  writeBitcodeHeader(*Stream);
+}
+
+BitcodeWriter::~BitcodeWriter() = default;
+
+void BitcodeWriter::writeModule(const Module *M,
+                                bool ShouldPreserveUseListOrder,
+                                const ModuleSummaryIndex *Index,
+                                bool GenerateHash) {
+  ModuleBitcodeWriter ModuleWriter(
+      M, Buffer, *Stream, ShouldPreserveUseListOrder, Index, GenerateHash);
+  ModuleWriter.write();
+}
+
 /// WriteBitcodeToFile - Write the specified module to the specified output
 /// stream.
 void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
@@ -3794,10 +3908,8 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
     Buffer.insert(Buffer.begin(), BWH_HeaderSize, 0);
 
-  // Emit the module into the buffer.
-  ModuleBitcodeWriter ModuleWriter(M, Buffer, ShouldPreserveUseListOrder, Index,
-                                   GenerateHash);
-  ModuleWriter.write();
+  BitcodeWriter Writer(Buffer);
+  Writer.writeModule(M, ShouldPreserveUseListOrder, Index, GenerateHash);
 
   if (TT.isOSDarwin() || TT.isOSBinFormatMachO())
     emitDarwinBCHeaderAndTrailer(Buffer, TT);
@@ -3806,7 +3918,7 @@ void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out,
   Out.write((char*)&Buffer.front(), Buffer.size());
 }
 
-void IndexBitcodeWriter::writeIndex() {
+void IndexBitcodeWriter::write() {
   Stream.EnterSubblock(bitc::MODULE_BLOCK_ID, 3);
 
   SmallVector<unsigned, 1> Vals;
@@ -3836,11 +3948,14 @@ void IndexBitcodeWriter::writeIndex() {
 // index for a distributed backend, provide a \p ModuleToSummariesForIndex map.
 void llvm::WriteIndexToFile(
     const ModuleSummaryIndex &Index, raw_ostream &Out,
-    std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
+    const std::map<std::string, GVSummaryMapTy> *ModuleToSummariesForIndex) {
   SmallVector<char, 0> Buffer;
   Buffer.reserve(256 * 1024);
 
-  IndexBitcodeWriter IndexWriter(Buffer, Index, ModuleToSummariesForIndex);
+  BitstreamWriter Stream(Buffer);
+  writeBitcodeHeader(Stream);
+
+  IndexBitcodeWriter IndexWriter(Stream, Index, ModuleToSummariesForIndex);
   IndexWriter.write();
 
   Out.write((char *)&Buffer.front(), Buffer.size());
diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
index 3e89ade424a2..80cab762a68c 100644
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp
@@ -13,18 +13,17 @@
 
 #include "llvm/Bitcode/BitcodeWriterPass.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
 using namespace llvm;
 
-PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &) {
-  std::unique_ptr<ModuleSummaryIndex> Index;
-  if (EmitSummaryIndex)
-    Index = ModuleSummaryIndexBuilder(&M).takeIndex();
-  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, Index.get(),
-                     EmitModuleHash);
+PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) {
+  const ModuleSummaryIndex *Index =
+      EmitSummaryIndex ? &(AM.getResult<ModuleSummaryIndexAnalysis>(M))
+                       : nullptr;
+  WriteBitcodeToFile(&M, OS, ShouldPreserveUseListOrder, Index, EmitModuleHash);
   return PreservedAnalyses::all();
 }
 
@@ -49,7 +48,7 @@ namespace {
       initializeWriteBitcodePassPass(*PassRegistry::getPassRegistry());
     }
 
-    const char *getPassName() const override { return "Bitcode Writer"; }
+    StringRef getPassName() const override { return "Bitcode Writer"; }
 
     bool runOnModule(Module &M) override {
       const ModuleSummaryIndex *Index =
diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index a736884be672..bb908618b679 100644
--- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -161,8 +161,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // Mark live-out callee-saved registers. In a return block this is
   // all callee-saved registers. In non-return this is any
   // callee-saved register that is not saved in the prolog.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  BitVector Pristine = MFI->getPristineRegs(MF);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI.getPristineRegs(MF);
   for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     unsigned Reg = *I;
     if (!IsReturnBlock && !Pristine.test(Reg)) continue;
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
index 40451c0d6c19..d840a2f69ab3 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.cpp
@@ -48,7 +48,7 @@ AllocationOrder::AllocationOrder(unsigned VirtReg,
   });
 #ifndef NDEBUG
   for (unsigned I = 0, E = Hints.size(); I != E; ++I)
-    assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() &&
+    assert(is_contained(Order, Hints[I]) &&
            "Target hint is outside allocation order.");
 #endif
 }
diff --git a/contrib/llvm/lib/CodeGen/AllocationOrder.h b/contrib/llvm/lib/CodeGen/AllocationOrder.h
index 2aee3a63a2b1..8223a52e333b 100644
--- a/contrib/llvm/lib/CodeGen/AllocationOrder.h
+++ b/contrib/llvm/lib/CodeGen/AllocationOrder.h
@@ -18,6 +18,7 @@
 #define LLVM_LIB_CODEGEN_ALLOCATIONORDER_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCRegisterInfo.h"
 
 namespace llvm {
@@ -79,9 +80,7 @@ public:
   bool isHint() const { return Pos <= 0; }
 
   /// Return true if PhysReg is a preferred register.
-  bool isHint(unsigned PhysReg) const {
-    return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end();
-  }
+  bool isHint(unsigned PhysReg) const { return is_contained(Hints, PhysReg); }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp
index d69073458cdf..0678bce449ed 100644
--- a/contrib/llvm/lib/CodeGen/Analysis.cpp
+++ b/contrib/llvm/lib/CodeGen/Analysis.cpp
@@ -525,19 +525,15 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
       F, I, Ret, *TM.getSubtargetImpl(*F)->getTargetLowering());
 }
 
-bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
-                                           const Instruction *I,
-                                           const ReturnInst *Ret,
-                                           const TargetLoweringBase &TLI) {
-  // If the block ends with a void return or unreachable, it doesn't matter
-  // what the call's return type is.
-  if (!Ret || Ret->getNumOperands() == 0) return true;
+bool llvm::attributesPermitTailCall(const Function *F, const Instruction *I,
+                                    const ReturnInst *Ret,
+                                    const TargetLoweringBase &TLI,
+                                    bool *AllowDifferingSizes) {
+  // ADS may be null, so don't write to it directly.
+  bool DummyADS;
+  bool &ADS = AllowDifferingSizes ? *AllowDifferingSizes : DummyADS;
+  ADS = true;
 
-  // If the return value is undef, it doesn't matter what the call's
-  // return type is.
-  if (isa<UndefValue>(Ret->getOperand(0))) return true;
-
-  // Make sure the attributes attached to each return are compatible.
   AttrBuilder CallerAttrs(F->getAttributes(),
                           AttributeSet::ReturnIndex);
   AttrBuilder CalleeAttrs(cast<CallInst>(I)->getAttributes(),
@@ -545,22 +541,21 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
 
   // Noalias is completely benign as far as calling convention goes, it
   // shouldn't affect whether the call is a tail call.
-  CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias);
-  CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias);
+  CallerAttrs.removeAttribute(Attribute::NoAlias);
+  CalleeAttrs.removeAttribute(Attribute::NoAlias);
 
-  bool AllowDifferingSizes = true;
   if (CallerAttrs.contains(Attribute::ZExt)) {
     if (!CalleeAttrs.contains(Attribute::ZExt))
       return false;
 
-    AllowDifferingSizes = false;
+    ADS = false;
     CallerAttrs.removeAttribute(Attribute::ZExt);
     CalleeAttrs.removeAttribute(Attribute::ZExt);
   } else if (CallerAttrs.contains(Attribute::SExt)) {
     if (!CalleeAttrs.contains(Attribute::SExt))
       return false;
 
-    AllowDifferingSizes = false;
+    ADS = false;
     CallerAttrs.removeAttribute(Attribute::SExt);
     CalleeAttrs.removeAttribute(Attribute::SExt);
   }
@@ -568,7 +563,24 @@ bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
   // If they're still different, there's some facet we don't understand
   // (currently only "inreg", but in future who knows). It may be OK but the
   // only safe option is to reject the tail call.
-  if (CallerAttrs != CalleeAttrs)
+  return CallerAttrs == CalleeAttrs;
+}
+
+bool llvm::returnTypeIsEligibleForTailCall(const Function *F,
+                                           const Instruction *I,
+                                           const ReturnInst *Ret,
+                                           const TargetLoweringBase &TLI) {
+  // If the block ends with a void return or unreachable, it doesn't matter
+  // what the call's return type is.
+  if (!Ret || Ret->getNumOperands() == 0) return true;
+
+  // If the return value is undef, it doesn't matter what the call's
+  // return type is.
+  if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
+  // Make sure the attributes attached to each return are compatible.
+  bool AllowDifferingSizes;
+  if (!attributesPermitTailCall(F, I, Ret, TLI, &AllowDifferingSizes))
     return false;
 
   const Value *RetVal = Ret->getOperand(0), *CallVal = I;
@@ -672,7 +684,7 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
   DenseMap<const MachineBasicBlock *, int> FuncletMembership;
 
   // We don't have anything to do if there aren't any EH pads.
-  if (!MF.getMMI().hasEHFunclets())
+  if (!MF.hasEHFunclets())
     return FuncletMembership;
 
   int EntryBBNumber = MF.front().getNumber();
@@ -694,9 +706,10 @@ llvm::getFuncletMembership(const MachineFunction &MF) {
     }
 
     MachineBasicBlock::const_iterator MBBI = MBB.getFirstTerminator();
+
     // CatchPads are not funclets for SEH so do not consider CatchRet to
     // transfer control to another funclet.
-    if (MBBI->getOpcode() != TII->getCatchReturnOpcode())
+    if (MBBI == MBB.end() || MBBI->getOpcode() != TII->getCatchReturnOpcode())
       continue;
 
     // FIXME: SEH CatchPads are not necessarily in the parent function:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
index 5294c98e314d..0c79def87933 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -43,13 +43,6 @@ ARMTargetStreamer &ARMException::getTargetStreamer() {
   return static_cast<ARMTargetStreamer &>(TS);
 }
 
-/// endModule - Emit all exception information that should come after the
-/// content.
-void ARMException::endModule() {
-  if (shouldEmitCFI)
-    Asm->OutStreamer->EmitCFISections(false, true);
-}
-
 void ARMException::beginFunction(const MachineFunction *MF) {
   if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::ARM)
     getTargetStreamer().emitFnStart();
@@ -57,7 +50,13 @@ void ARMException::beginFunction(const MachineFunction *MF) {
   AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
   assert(MoveType != AsmPrinter::CFI_M_EH &&
          "non-EH CFI not yet supported in prologue with EHABI lowering");
+
   if (MoveType == AsmPrinter::CFI_M_Debug) {
+    if (!hasEmittedCFISections) {
+      Asm->OutStreamer->EmitCFISections(false, true);
+      hasEmittedCFISections = true;
+    }
+
     shouldEmitCFI = true;
     Asm->OutStreamer->EmitCFIStartProc(false);
   }
@@ -75,7 +74,7 @@ void ARMException::endFunction(const MachineFunction *MF) {
     F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
     F->needsUnwindTableEntry();
   bool shouldEmitPersonality = forceEmitPersonality ||
-    !MMI->getLandingPads().empty();
+    !MF->getLandingPads().empty();
   if (!Asm->MF->getFunction()->needsUnwindTableEntry() &&
       !shouldEmitPersonality)
     ATS.emitCantUnwind();
@@ -99,8 +98,9 @@ void ARMException::endFunction(const MachineFunction *MF) {
 }
 
 void ARMException::emitTypeInfos(unsigned TTypeEncoding) {
-  const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const MachineFunction *MF = Asm->MF;
+  const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MF->getFilterIds();
 
   bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 8c6838394ac9..ec552e0640e9 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -14,8 +14,6 @@
 
 using namespace llvm;
 
-class MCExpr;
-
 unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
   HasBeenUsed = true;
   auto IterBool =
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 272baceeed89..de0a4f0befa1 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -55,10 +55,15 @@ using namespace llvm;
 
 #define DEBUG_TYPE "asm-printer"
 
-static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "Debug Info Emission";
-static const char *const EHTimerName = "DWARF Exception Writer";
-static const char *const CodeViewLineTablesGroupName = "CodeView Line Tables";
+static const char *const DWARFGroupName = "dwarf";
+static const char *const DWARFGroupDescription = "DWARF Emission";
+static const char *const DbgTimerName = "emit";
+static const char *const DbgTimerDescription = "Debug Info Emission";
+static const char *const EHTimerName = "write_exception";
+static const char *const EHTimerDescription = "DWARF Exception Writer";
+static const char *const CodeViewLineTablesGroupName = "linetables";
+static const char *const CodeViewLineTablesGroupDescription =
+  "CodeView Line Tables";
 
 STATISTIC(EmittedInsts, "Number of machine instrs printed");
 
@@ -143,7 +148,7 @@ const DataLayout &AsmPrinter::getDataLayout() const {
 }
 
 // Do not use the cached DataLayout because some client use it without a Module
-// (llmv-dsymutil, llvm-dwarfdump).
+// (llvm-dsymutil, llvm-dwarfdump).
 unsigned AsmPrinter::getPointerSize() const { return TM.getPointerSize(); }
 
 const MCSubtargetInfo &AsmPrinter::getSubtargetInfo() const {
@@ -155,17 +160,11 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
   S.EmitInstruction(Inst, getSubtargetInfo());
 }
 
-StringRef AsmPrinter::getTargetTriple() const {
-  return TM.getTargetTriple().str();
-}
-
 /// getCurrentSection() - Return the current section we are emitting to.
 const MCSection *AsmPrinter::getCurrentSection() const {
-  return OutStreamer->getCurrentSection().first;
+  return OutStreamer->getCurrentSectionOnly();
 }
 
-
-
 void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.setPreservesAll();
   MachineFunctionPass::getAnalysisUsage(AU);
@@ -184,8 +183,6 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   OutStreamer->InitSections(false);
 
-  Mang = new Mangler();
-
   // Emit the version-min deplyment target directive if needed.
   //
   // FIXME: If we end up with a collection of these sorts of Darwin-specific
@@ -194,7 +191,7 @@ bool AsmPrinter::doInitialization(Module &M) {
   // alternative is duplicated code in each of the target asm printers that
   // use the directive, where it would need the same conditionalization
   // anyway.
-  Triple TT(getTargetTriple());
+  const Triple &TT = TM.getTargetTriple();
   // If there is a version specified, Major will be non-zero.
   if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
     unsigned Major, Minor, Update;
@@ -250,15 +247,18 @@ bool AsmPrinter::doInitialization(Module &M) {
 
   if (MAI->doesSupportDebugInformation()) {
     bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
-    if (EmitCodeView && TM.getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+    if (EmitCodeView && (TM.getTargetTriple().isKnownWindowsMSVCEnvironment() ||
+                         TM.getTargetTriple().isWindowsItaniumEnvironment())) {
       Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
-                                     DbgTimerName,
-                                     CodeViewLineTablesGroupName));
+                                     DbgTimerName, DbgTimerDescription,
+                                     CodeViewLineTablesGroupName,
+                                     CodeViewLineTablesGroupDescription));
     }
     if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
       DD = new DwarfDebug(this, &M);
       DD->beginModule();
-      Handlers.push_back(HandlerInfo(DD, DbgTimerName, DWARFGroupName));
+      Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription,
+                                     DWARFGroupName, DWARFGroupDescription));
     }
   }
 
@@ -286,7 +286,8 @@ bool AsmPrinter::doInitialization(Module &M) {
     break;
   }
   if (ES)
-    Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName));
+    Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
+                                   DWARFGroupName, DWARFGroupDescription));
   return false;
 }
 
@@ -340,11 +341,11 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
 
 void AsmPrinter::getNameWithPrefix(SmallVectorImpl<char> &Name,
                                    const GlobalValue *GV) const {
-  TM.getNameWithPrefix(Name, GV, *Mang);
+  TM.getNameWithPrefix(Name, GV, getObjFileLowering().getMangler());
 }
 
 MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const {
-  return TM.getSymbol(GV, *Mang);
+  return TM.getSymbol(GV);
 }
 
 /// EmitGlobalVariable - Emit the specified global variable to the .s file.
@@ -407,7 +408,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   unsigned AlignLog = getGVAlignmentLog2(GV, DL);
 
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+                       HI.TimerGroupName, HI.TimerGroupDescription,
+                       TimePassesIsEnabled);
     HI.Handler->setSymbolSize(GVSym, Size);
   }
 
@@ -424,8 +427,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   }
 
   // Determine to which section this global should be emitted.
-  MCSection *TheSection =
-      getObjFileLowering().SectionForGlobal(GV, GVKind, *Mang, TM);
+  MCSection *TheSection = getObjFileLowering().SectionForGlobal(GV, GVKind, TM);
 
   // If we have a bss global going to a section that supports the
   // zerofill directive, do so here.
@@ -483,7 +485,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
     // Emit the .tbss symbol
     MCSymbol *MangSym =
-      OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
+        OutContext.getOrCreateSymbol(GVSym->getName() + Twine("$tlv$init"));
 
     if (GVKind.isThreadBSS()) {
       TheSection = getObjFileLowering().getTLSBSSSection();
@@ -535,7 +537,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 
   if (MAI->hasDotTypeDotSizeDirective())
     // .size foo, 42
-    OutStreamer->emitELFSize(cast<MCSymbolELF>(EmittedInitSym),
+    OutStreamer->emitELFSize(EmittedInitSym,
                              MCConstantExpr::create(Size, OutContext));
 
   OutStreamer->AddBlankLine();
@@ -550,8 +552,7 @@ void AsmPrinter::EmitFunctionHeader() {
   // Print the 'header' of function.
   const Function *F = MF->getFunction();
 
-  OutStreamer->SwitchSection(
-      getObjFileLowering().SectionForGlobal(F, *Mang, TM));
+  OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(F, TM));
   EmitVisibility(CurrentFnSym, F->getVisibility());
 
   EmitLinkage(F, CurrentFnSym);
@@ -598,7 +599,8 @@ void AsmPrinter::EmitFunctionHeader() {
 
   // Emit pre-function debug and/or EH information.
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->beginFunction(MF);
   }
 
@@ -632,26 +634,26 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
   // Check for spills and reloads
   int FI;
 
-  const MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
 
   // We assume a single instruction only has a spill or reload, not
   // both.
   const MachineMemOperand *MMO;
   if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+    if (MFI.isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Reload\n";
     }
   } else if (TII->hasLoadFromStackSlot(MI, MMO, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI))
+    if (MFI.isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Reload\n";
   } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+    if (MFI.isSpillSlotObjectIndex(FI)) {
       MMO = *MI.memoperands_begin();
       CommentOS << MMO->getSize() << "-byte Spill\n";
     }
   } else if (TII->hasStoreToStackSlot(MI, MMO, FI)) {
-    if (FrameInfo->isSpillSlotObjectIndex(FI))
+    if (MFI.isSpillSlotObjectIndex(FI))
       CommentOS << MMO->getSize() << "-byte Folded Spill\n";
   }
 
@@ -711,9 +713,10 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   OS << V->getName();
 
   const DIExpression *Expr = MI->getDebugExpression();
-  if (Expr->isBitPiece())
-    OS << " [bit_piece offset=" << Expr->getBitPieceOffset()
-       << " size=" << Expr->getBitPieceSize() << "]";
+  auto Fragment = Expr->getFragmentInfo();
+  if (Fragment)
+    OS << " [fragment offset=" << Fragment->OffsetInBits
+       << " size=" << Fragment->SizeInBits << "]";
   OS << " <- ";
 
   // The second operand is only an offset if it's an immediate.
@@ -721,21 +724,21 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
   int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0;
 
   for (unsigned i = 0; i < Expr->getNumElements(); ++i) {
-    if (Deref) {
+    uint64_t Op = Expr->getElement(i);
+    if (Op == dwarf::DW_OP_LLVM_fragment) {
+      // There can't be any operands after this in a valid expression
+      break;
+    } else if (Deref) {
       // We currently don't support extra Offsets or derefs after the first
       // one. Bail out early instead of emitting an incorrect comment
       OS << " [complex expression]";
       AP.OutStreamer->emitRawComment(OS.str());
       return true;
-    }
-    uint64_t Op = Expr->getElement(i);
-    if (Op == dwarf::DW_OP_deref) {
+    } else if (Op == dwarf::DW_OP_deref) {
       Deref = true;
       continue;
-    } else if (Op == dwarf::DW_OP_bit_piece) {
-      // There can't be any operands after this in a valid expression
-      break;
     }
+
     uint64_t ExtraOffset = Expr->getElement(i++);
     if (Op == dwarf::DW_OP_plus)
       Offset += ExtraOffset;
@@ -756,7 +759,7 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) {
       // There is no good way to print long double.  Convert a copy to
       // double.  Ah well, it's only a comment.
       bool ignored;
-      APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+      APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
                   &ignored);
       OS << "(long double) " << APF.convertToDouble();
     }
@@ -819,8 +822,7 @@ void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) {
   if (needsCFIMoves() == CFI_M_None)
     return;
 
-  const MachineModuleInfo &MMI = MF->getMMI();
-  const std::vector<MCCFIInstruction> &Instrs = MMI.getFrameInstructions();
+  const std::vector<MCCFIInstruction> &Instrs = MF->getFrameInstructions();
   unsigned CFIIndex = MI.getOperand(0).getCFIIndex();
   const MCCFIInstruction &CFI = Instrs[CFIIndex];
   emitCFIInstruction(CFI);
@@ -862,7 +864,8 @@ void AsmPrinter::EmitFunctionBody() {
 
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
-          NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+          NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+                             HI.TimerGroupName, HI.TimerGroupDescription,
                              TimePassesIsEnabled);
           HI.Handler->beginInstruction(&MI);
         }
@@ -906,7 +909,8 @@ void AsmPrinter::EmitFunctionBody() {
 
       if (ShouldPrintDebugScopes) {
         for (const HandlerInfo &HI : Handlers) {
-          NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
+          NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
+                             HI.TimerGroupName, HI.TimerGroupDescription,
                              TimePassesIsEnabled);
           HI.Handler->endInstruction();
         }
@@ -944,8 +948,8 @@ void AsmPrinter::EmitFunctionBody() {
   // Emit target-specific gunk after the function body.
   EmitFunctionBodyEnd();
 
-  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
-      MMI->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
+  if (!MF->getLandingPads().empty() || MMI->hasDebugInfo() ||
+      MF->hasEHFunclets() || MAI->hasDotTypeDotSizeDirective()) {
     // Create a symbol for the end of function.
     CurrentFnEnd = createTempSymbol("func_end");
     OutStreamer->EmitLabel(CurrentFnEnd);
@@ -959,12 +963,12 @@ void AsmPrinter::EmitFunctionBody() {
     const MCExpr *SizeExp = MCBinaryExpr::createSub(
         MCSymbolRefExpr::create(CurrentFnEnd, OutContext),
         MCSymbolRefExpr::create(CurrentFnSymForSize, OutContext), OutContext);
-    if (auto Sym = dyn_cast<MCSymbolELF>(CurrentFnSym))
-      OutStreamer->emitELFSize(Sym, SizeExp);
+    OutStreamer->emitELFSize(CurrentFnSym, SizeExp);
   }
 
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->markFunctionEnd();
   }
 
@@ -973,10 +977,10 @@ void AsmPrinter::EmitFunctionBody() {
 
   // Emit post-function debug and/or EH information.
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->endFunction(MF);
   }
-  MMI->EndFunction();
 
   OutStreamer->AddBlankLine();
 }
@@ -1100,8 +1104,7 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
         (!BaseObject || BaseObject->hasPrivateLinkage())) {
       const DataLayout &DL = M.getDataLayout();
       uint64_t Size = DL.getTypeAllocSize(GA->getValueType());
-      OutStreamer->emitELFSize(cast<MCSymbolELF>(Name),
-                               MCConstantExpr::create(Size, OutContext));
+      OutStreamer->emitELFSize(Name, MCConstantExpr::create(Size, OutContext));
     }
   }
 }
@@ -1143,7 +1146,7 @@ bool AsmPrinter::doFinalization(Module &M) {
   SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags;
   M.getModuleFlagsMetadata(ModuleFlags);
   if (!ModuleFlags.empty())
-    TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, *Mang, TM);
+    TLOF.emitModuleFlags(*OutStreamer, ModuleFlags, TM);
 
   if (TM.getTargetTriple().isOSBinFormatELF()) {
     MachineModuleInfoELF &MMIELF = MMI->getObjFileInfo<MachineModuleInfoELF>();
@@ -1164,8 +1167,8 @@ bool AsmPrinter::doFinalization(Module &M) {
 
   // Finalize debug and EH information.
   for (const HandlerInfo &HI : Handlers) {
-    NamedRegionTimer T(HI.TimerName, HI.TimerGroupName,
-                       TimePassesIsEnabled);
+    NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
+                       HI.TimerGroupDescription, TimePassesIsEnabled);
     HI.Handler->endModule();
     delete HI.Handler;
   }
@@ -1246,7 +1249,6 @@ bool AsmPrinter::doFinalization(Module &M) {
   // after everything else has gone out.
   EmitEndOfAsmFile(M);
 
-  delete Mang; Mang = nullptr;
   MMI = nullptr;
 
   OutStreamer->Finish();
@@ -1269,8 +1271,8 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
   CurrentFnBegin = nullptr;
   CurExceptionSym = nullptr;
   bool NeedsLocalForSize = MAI->needsLocalForSize();
-  if (!MMI->getLandingPads().empty() || MMI->hasDebugInfo() ||
-      MMI->hasEHFunclets() || NeedsLocalForSize) {
+  if (!MF.getLandingPads().empty() || MMI->hasDebugInfo() ||
+      MF.hasEHFunclets() || NeedsLocalForSize) {
     CurrentFnBegin = createTempSymbol("func_begin");
     if (NeedsLocalForSize)
       CurrentFnSymForSize = CurrentFnBegin;
@@ -1392,7 +1394,7 @@ void AsmPrinter::EmitJumpTableInfo() {
       *F);
   if (JTInDiffSection) {
     // Drop it in the readonly section.
-    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, *Mang, TM);
+    MCSection *ReadOnlySection = TLOF.getSectionForJumpTable(*F, TM);
     OutStreamer->SwitchSection(ReadOnlySection);
   }
 
@@ -1536,12 +1538,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
                        /* isCtor */ true);
 
-    if (TM.getRelocationModel() == Reloc::Static &&
-        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
-      StringRef Sym(".constructors_used");
-      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
-                                       MCSA_Reference);
-    }
     return true;
   }
 
@@ -1549,12 +1545,6 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
     EmitXXStructorList(GV->getParent()->getDataLayout(), GV->getInitializer(),
                        /* isCtor */ false);
 
-    if (TM.getRelocationModel() == Reloc::Static &&
-        MAI->hasStaticCtorDtorReferenceInStaticMode()) {
-      StringRef Sym(".destructors_used");
-      OutStreamer->EmitSymbolAttribute(OutContext.getOrCreateSymbol(Sym),
-                                       MCSA_Reference);
-    }
     return true;
   }
 
@@ -1699,7 +1689,9 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
                                      unsigned Size,
                                      bool IsSectionRelative) const {
   if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) {
-    OutStreamer->EmitCOFFSecRel32(Label);
+    OutStreamer->EmitCOFFSecRel32(Label, Offset);
+    if (Size > 4)
+      OutStreamer->EmitZeros(Size - 4);
     return;
   }
 
@@ -1764,7 +1756,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
+    if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
       if (C != CE)
         return lowerConstant(C);
 
@@ -1796,7 +1788,7 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
     // expression properly.  This is important for differences between
     // blockaddress labels.  Since the two labels are in the same function, it
     // is reasonable to treat their delta as a 32-bit value.
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case Instruction::BitCast:
     return lowerConstant(CE->getOperand(0));
 
@@ -1843,8 +1835,8 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
       APInt RHSOffset;
       if (IsConstantOffsetFromGlobal(CE->getOperand(1), RHSGV, RHSOffset,
                                      getDataLayout())) {
-        const MCExpr *RelocExpr = getObjFileLowering().lowerRelativeReference(
-            LHSGV, RHSGV, *Mang, TM);
+        const MCExpr *RelocExpr =
+            getObjFileLowering().lowerRelativeReference(LHSGV, RHSGV, TM);
         if (!RelocExpr)
           RelocExpr = MCBinaryExpr::createSub(
               MCSymbolRefExpr::create(getSymbol(LHSGV), Ctx),
@@ -2299,7 +2291,7 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *CV,
       // If the constant expression's size is greater than 64-bits, then we have
       // to emit the value in chunks. Try to constant fold the value and emit it
       // that way.
-      Constant *New = ConstantFoldConstantExpression(CE, DL);
+      Constant *New = ConstantFoldConstant(CE, DL);
       if (New && New != CE)
         return emitGlobalConstantImpl(DL, New, AP);
     }
@@ -2385,8 +2377,7 @@ MCSymbol *AsmPrinter::GetJTSetSymbol(unsigned UID, unsigned MBBID) const {
 
 MCSymbol *AsmPrinter::getSymbolWithGlobalValueBase(const GlobalValue *GV,
                                                    StringRef Suffix) const {
-  return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, *Mang,
-                                                           TM);
+  return getObjFileLowering().getSymbolWithGlobalValueBase(GV, Suffix, TM);
 }
 
 /// Return the MCSymbol for the specified ExternalSymbol.
@@ -2599,12 +2590,12 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
   if (GCPI != GCMap.end())
     return GCPI->second.get();
 
-  const char *Name = S.getName().c_str();
+  auto Name = S.getName();
 
   for (GCMetadataPrinterRegistry::iterator
          I = GCMetadataPrinterRegistry::begin(),
          E = GCMetadataPrinterRegistry::end(); I != E; ++I)
-    if (strcmp(Name, I->getName()) == 0) {
+    if (Name == I->getName()) {
       std::unique_ptr<GCMetadataPrinter> GMP = I->instantiate();
       GMP->S = &S;
       auto IterBool = GCMap.insert(std::make_pair(&S, std::move(GMP)));
@@ -2618,3 +2609,21 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy &S) {
 AsmPrinterHandler::~AsmPrinterHandler() {}
 
 void AsmPrinterHandler::markFunctionEnd() {}
+
+void AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
+  SledKind Kind) {
+  auto Fn = MI.getParent()->getParent()->getFunction();
+  auto Attr = Fn->getFnAttribute("function-instrument");
+  bool AlwaysInstrument =
+    Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
+  Sleds.emplace_back(
+    XRayFunctionEntry{ Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn });
+}
+
+uint16_t AsmPrinter::getDwarfVersion() const {
+  return OutStreamer->getContext().getDwarfVersion();
+}
+
+void AsmPrinter::setDwarfVersion(uint16_t Version) {
+  OutStreamer->getContext().setDwarfVersion(Version);
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index 60f40d063cc8..0185c380cc39 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -138,8 +138,7 @@ void AsmPrinter::EmitTTypeReference(const GlobalValue *GV,
     const TargetLoweringObjectFile &TLOF = getObjFileLowering();
 
     const MCExpr *Exp =
-        TLOF.getTTypeGlobalReference(GV, Encoding, *Mang, TM, MMI,
-                                     *OutStreamer);
+        TLOF.getTTypeGlobalReference(GV, Encoding, TM, MMI, *OutStreamer);
     OutStreamer->EmitValue(Exp, GetSizeOfEncodedValue(Encoding));
   } else
     OutStreamer->EmitIntValue(0, GetSizeOfEncodedValue(Encoding));
@@ -150,7 +149,7 @@ void AsmPrinter::emitDwarfSymbolReference(const MCSymbol *Label,
   if (!ForceOffset) {
     // On COFF targets, we have to emit the special .secrel32 directive.
     if (MAI->needsDwarfSectionOffsetDirective()) {
-      OutStreamer->EmitCOFFSecRel32(Label);
+      OutStreamer->EmitCOFFSecRel32(Label, /*Offset=*/0);
       return;
     }
 
@@ -175,36 +174,6 @@ void AsmPrinter::emitDwarfStringOffset(DwarfStringPoolEntryRef S) const {
   EmitInt32(S.getOffset());
 }
 
-/// EmitDwarfRegOp - Emit dwarf register operation.
-void AsmPrinter::EmitDwarfRegOp(ByteStreamer &Streamer,
-                                const MachineLocation &MLoc) const {
-  DebugLocDwarfExpression Expr(getDwarfDebug()->getDwarfVersion(), Streamer);
-  const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
-  int Reg = MRI->getDwarfRegNum(MLoc.getReg(), false);
-  if (Reg < 0) {
-    // We assume that pointers are always in an addressable register.
-    if (MLoc.isIndirect())
-      // FIXME: We have no reasonable way of handling errors in here. The
-      // caller might be in the middle of a dwarf expression. We should
-      // probably assert that Reg >= 0 once debug info generation is more
-      // mature.
-      return Expr.EmitOp(dwarf::DW_OP_nop,
-                         "nop (could not find a dwarf register number)");
-
-    // Attempt to find a valid super- or sub-register.
-    if (!Expr.AddMachineRegPiece(*MF->getSubtarget().getRegisterInfo(),
-                                 MLoc.getReg()))
-      Expr.EmitOp(dwarf::DW_OP_nop,
-                  "nop (could not find a dwarf register number)");
-    return;
-  }
-
-  if (MLoc.isIndirect())
-    Expr.AddRegIndirect(Reg, MLoc.getOffset());
-  else
-    Expr.AddReg(Reg);
-}
-
 //===----------------------------------------------------------------------===//
 // Dwarf Lowering Routines
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 2ce6c182235f..20075e41977f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -193,6 +193,23 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
       }
       if (Done) break;
 
+      // If we have ${:foo}, then this is not a real operand reference, it is a
+      // "magic" string reference, just like in .td files.  Arrange to call
+      // PrintSpecial.
+      if (LastEmitted[0] == '{' && LastEmitted[1] == ':') {
+        LastEmitted += 2;
+        const char *StrStart = LastEmitted;
+        const char *StrEnd = strchr(StrStart, '}');
+        if (!StrEnd)
+          report_fatal_error("Unterminated ${:foo} operand in inline asm"
+                             " string: '" + Twine(AsmStr) + "'");
+
+        std::string Val(StrStart, StrEnd);
+        AP->PrintSpecial(MI, OS, Val.c_str());
+        LastEmitted = StrEnd+1;
+        break;
+      }
+
       const char *IDStart = LastEmitted;
       const char *IDEnd = IDStart;
       while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index ebf80dea2c4b..408b34a3cdc0 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -13,17 +13,18 @@
 
 #include "CodeViewDebug.h"
 #include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeDumper.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCSectionCOFF.h"
 #include "llvm/MC/MCSymbol.h"
@@ -35,9 +36,11 @@
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 
 CodeViewDebug::CodeViewDebug(AsmPrinter *AP)
-    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), CurFn(nullptr) {
+    : DebugHandlerBase(AP), OS(*Asm->OutStreamer), Allocator(),
+      TypeTable(Allocator), CurFn(nullptr) {
   // If module doesn't have named metadata anchors or COFF debug section
   // is not available, skip any debug info related stuff.
   if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
@@ -108,8 +111,9 @@ unsigned CodeViewDebug::maybeRecordFile(const DIFile *F) {
   if (Insertion.second) {
     // We have to compute the full filepath and emit a .cv_file directive.
     StringRef FullPath = getFullFilepath(F);
-    NextId = OS.EmitCVFileDirective(NextId, FullPath);
-    assert(NextId == FileIdMap.size() && ".cv_file directive failed");
+    bool Success = OS.EmitCVFileDirective(NextId, FullPath);
+    (void)Success;
+    assert(Success && ".cv_file directive failed");
   }
   return Insertion.first->second;
 }
@@ -120,7 +124,16 @@ CodeViewDebug::getInlineSite(const DILocation *InlinedAt,
   auto SiteInsertion = CurFn->InlineSites.insert({InlinedAt, InlineSite()});
   InlineSite *Site = &SiteInsertion.first->second;
   if (SiteInsertion.second) {
+    unsigned ParentFuncId = CurFn->FuncId;
+    if (const DILocation *OuterIA = InlinedAt->getInlinedAt())
+      ParentFuncId =
+          getInlineSite(OuterIA, InlinedAt->getScope()->getSubprogram())
+              .SiteFuncId;
+
     Site->SiteFuncId = NextFuncId++;
+    OS.EmitCVInlineSiteIdDirective(
+        Site->SiteFuncId, ParentFuncId, maybeRecordFile(InlinedAt->getFile()),
+        InlinedAt->getLine(), InlinedAt->getColumn(), SMLoc());
     Site->Inlinee = Inlinee;
     InlinedSubprograms.insert(Inlinee);
     getFuncIdForSubprogram(Inlinee);
@@ -208,8 +221,8 @@ TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
 
   // Build the fully qualified name of the scope.
   std::string ScopeName = getFullyQualifiedName(Scope);
-  TypeIndex TI =
-      TypeTable.writeStringId(StringIdRecord(TypeIndex(), ScopeName));
+  StringIdRecord SID(TypeIndex(), ScopeName);
+  auto TI = TypeTable.writeKnownType(SID);
   return recordTypeIndexForDINode(Scope, TI);
 }
 
@@ -234,12 +247,12 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
     TypeIndex ClassType = getTypeIndex(Class);
     MemberFuncIdRecord MFuncId(ClassType, getMemberFunctionType(SP, Class),
                                DisplayName);
-    TI = TypeTable.writeMemberFuncId(MFuncId);
+    TI = TypeTable.writeKnownType(MFuncId);
   } else {
     // Otherwise, this must be a free function.
     TypeIndex ParentScope = getScopeIndex(Scope);
     FuncIdRecord FuncId(ParentScope, getTypeIndex(SP->getType()), DisplayName);
-    TI = TypeTable.writeFuncId(FuncId);
+    TI = TypeTable.writeKnownType(FuncId);
   }
 
   return recordTypeIndexForDINode(SP, TI);
@@ -353,8 +366,8 @@ void CodeViewDebug::maybeRecordLocation(const DebugLoc &DL,
   }
 
   OS.EmitCVLocDirective(FuncId, FileId, DL.getLine(), DL.getCol(),
-                        /*PrologueEnd=*/false,
-                        /*IsStmt=*/false, DL->getFilename());
+                        /*PrologueEnd=*/false, /*IsStmt=*/false,
+                        DL->getFilename(), SMLoc());
 }
 
 void CodeViewDebug::emitCodeViewMagicVersion() {
@@ -377,6 +390,11 @@ void CodeViewDebug::endModule() {
   // Use the generic .debug$S section, and make a subsection for all the inlined
   // subprograms.
   switchToDebugSectionForSymbol(nullptr);
+
+  MCSymbol *CompilerInfo = beginCVSubsection(ModuleSubstreamKind::Symbols);
+  emitCompilerInformation();
+  endCVSubsection(CompilerInfo);
+
   emitInlineeLinesSubsection();
 
   // Emit per-function debug information.
@@ -418,10 +436,13 @@ void CodeViewDebug::endModule() {
 }
 
 static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S) {
-  // Microsoft's linker seems to have trouble with symbol names longer than
-  // 0xffd8 bytes.
-  S = S.substr(0, 0xffd8);
-  SmallString<32> NullTerminatedString(S);
+  // The maximum CV record length is 0xFF00. Most of the strings we emit appear
+  // after a fixed length portion of the record. The fixed length portion should
+  // always be less than 0xF00 (3840) bytes, so truncate the string so that the
+  // overall record size is less than the maximum allowed.
+  unsigned MaxFixedRecordLength = 0xF00;
+  SmallString<32> NullTerminatedString(
+      S.take_front(MaxRecordLength - MaxFixedRecordLength - 1));
   NullTerminatedString.push_back('\0');
   OS.EmitBytes(NullTerminatedString);
 }
@@ -447,47 +468,173 @@ void CodeViewDebug::emitTypeInformation() {
   }
 
   CVTypeDumper CVTD(nullptr, /*PrintRecordBytes=*/false);
-  TypeTable.ForEachRecord(
-      [&](TypeIndex Index, StringRef Record) {
-        if (OS.isVerboseAsm()) {
-          // Emit a block comment describing the type record for readability.
-          SmallString<512> CommentBlock;
-          raw_svector_ostream CommentOS(CommentBlock);
-          ScopedPrinter SP(CommentOS);
-          SP.setPrefix(CommentPrefix);
-          CVTD.setPrinter(&SP);
-          Error E = CVTD.dump({Record.bytes_begin(), Record.bytes_end()});
-          if (E) {
-            logAllUnhandledErrors(std::move(E), errs(), "error: ");
-            llvm_unreachable("produced malformed type record");
-          }
-          // emitRawComment will insert its own tab and comment string before
-          // the first line, so strip off our first one. It also prints its own
-          // newline.
-          OS.emitRawComment(
-              CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
-        } else {
+  TypeTable.ForEachRecord([&](TypeIndex Index, ArrayRef<uint8_t> Record) {
+    if (OS.isVerboseAsm()) {
+      // Emit a block comment describing the type record for readability.
+      SmallString<512> CommentBlock;
+      raw_svector_ostream CommentOS(CommentBlock);
+      ScopedPrinter SP(CommentOS);
+      SP.setPrefix(CommentPrefix);
+      CVTD.setPrinter(&SP);
+      Error E = CVTD.dump(Record);
+      if (E) {
+        logAllUnhandledErrors(std::move(E), errs(), "error: ");
+        llvm_unreachable("produced malformed type record");
+      }
+      // emitRawComment will insert its own tab and comment string before
+      // the first line, so strip off our first one. It also prints its own
+      // newline.
+      OS.emitRawComment(
+          CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
+    } else {
 #ifndef NDEBUG
-          // Assert that the type data is valid even if we aren't dumping
-          // comments. The MSVC linker doesn't do much type record validation,
-          // so the first link of an invalid type record can succeed while
-          // subsequent links will fail with LNK1285.
-          ByteStream<> Stream({Record.bytes_begin(), Record.bytes_end()});
-          CVTypeArray Types;
-          StreamReader Reader(Stream);
-          Error E = Reader.readArray(Types, Reader.getLength());
-          if (!E) {
-            TypeVisitorCallbacks C;
-            E = CVTypeVisitor(C).visitTypeStream(Types);
-          }
-          if (E) {
-            logAllUnhandledErrors(std::move(E), errs(), "error: ");
-            llvm_unreachable("produced malformed type record");
-          }
+      // Assert that the type data is valid even if we aren't dumping
+      // comments. The MSVC linker doesn't do much type record validation,
+      // so the first link of an invalid type record can succeed while
+      // subsequent links will fail with LNK1285.
+      ByteStream Stream(Record);
+      CVTypeArray Types;
+      StreamReader Reader(Stream);
+      Error E = Reader.readArray(Types, Reader.getLength());
+      if (!E) {
+        TypeVisitorCallbacks C;
+        E = CVTypeVisitor(C).visitTypeStream(Types);
+      }
+      if (E) {
+        logAllUnhandledErrors(std::move(E), errs(), "error: ");
+        llvm_unreachable("produced malformed type record");
+      }
 #endif
-        }
-        OS.EmitBinaryData(Record);
-      });
+    }
+    StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
+    OS.EmitBinaryData(S);
+  });
+}
+
+namespace {
+
+static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
+  switch (DWLang) {
+  case dwarf::DW_LANG_C:
+  case dwarf::DW_LANG_C89:
+  case dwarf::DW_LANG_C99:
+  case dwarf::DW_LANG_C11:
+  case dwarf::DW_LANG_ObjC:
+    return SourceLanguage::C;
+  case dwarf::DW_LANG_C_plus_plus:
+  case dwarf::DW_LANG_C_plus_plus_03:
+  case dwarf::DW_LANG_C_plus_plus_11:
+  case dwarf::DW_LANG_C_plus_plus_14:
+    return SourceLanguage::Cpp;
+  case dwarf::DW_LANG_Fortran77:
+  case dwarf::DW_LANG_Fortran90:
+  case dwarf::DW_LANG_Fortran03:
+  case dwarf::DW_LANG_Fortran08:
+    return SourceLanguage::Fortran;
+  case dwarf::DW_LANG_Pascal83:
+    return SourceLanguage::Pascal;
+  case dwarf::DW_LANG_Cobol74:
+  case dwarf::DW_LANG_Cobol85:
+    return SourceLanguage::Cobol;
+  case dwarf::DW_LANG_Java:
+    return SourceLanguage::Java;
+  default:
+    // There's no CodeView representation for this language, and CV doesn't
+    // have an "unknown" option for the language field, so we'll use MASM,
+    // as it's very low level.
+    return SourceLanguage::Masm;
+  }
+}
+
+struct Version {
+  int Part[4];
+};
+
+// Takes a StringRef like "clang 4.0.0.0 (other nonsense 123)" and parses out
+// the version number.
+static Version parseVersion(StringRef Name) {
+  Version V = {{0}};
+  int N = 0;
+  for (const char C : Name) {
+    if (isdigit(C)) {
+      V.Part[N] *= 10;
+      V.Part[N] += C - '0';
+    } else if (C == '.') {
+      ++N;
+      if (N >= 4)
+        return V;
+    } else if (N > 0)
+      return V;
+  }
+  return V;
+}
+
+static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
+  switch (Type) {
+    case Triple::ArchType::x86:
+      return CPUType::Pentium3;
+    case Triple::ArchType::x86_64:
+      return CPUType::X64;
+    case Triple::ArchType::thumb:
+      return CPUType::Thumb;
+    default:
+      report_fatal_error("target architecture doesn't map to a CodeView "
+                         "CPUType");
+  }
+}
+
+}  // anonymous namespace
+
+void CodeViewDebug::emitCompilerInformation() {
+  MCContext &Context = MMI->getContext();
+  MCSymbol *CompilerBegin = Context.createTempSymbol(),
+           *CompilerEnd = Context.createTempSymbol();
+  OS.AddComment("Record length");
+  OS.emitAbsoluteSymbolDiff(CompilerEnd, CompilerBegin, 2);
+  OS.EmitLabel(CompilerBegin);
+  OS.AddComment("Record kind: S_COMPILE3");
+  OS.EmitIntValue(SymbolKind::S_COMPILE3, 2);
+  uint32_t Flags = 0;
+
+  NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
+  const MDNode *Node = *CUs->operands().begin();
+  const auto *CU = cast<DICompileUnit>(Node);
+
+  // The low byte of the flags indicates the source language.
+  Flags = MapDWLangToCVLang(CU->getSourceLanguage());
+  // TODO:  Figure out which other flags need to be set.
+
+  OS.AddComment("Flags and language");
+  OS.EmitIntValue(Flags, 4);
+
+  OS.AddComment("CPUType");
+  CPUType CPU =
+      mapArchToCVCPUType(Triple(MMI->getModule()->getTargetTriple()).getArch());
+  OS.EmitIntValue(static_cast<uint64_t>(CPU), 2);
+
+  StringRef CompilerVersion = CU->getProducer();
+  Version FrontVer = parseVersion(CompilerVersion);
+  OS.AddComment("Frontend version");
+  for (int N = 0; N < 4; ++N)
+    OS.EmitIntValue(FrontVer.Part[N], 2);
+
+  // Some Microsoft tools, like Binscope, expect a backend version number of at
+  // least 8.something, so we'll coerce the LLVM version into a form that
+  // guarantees it'll be big enough without really lying about the version.
+  int Major = 1000 * LLVM_VERSION_MAJOR +
+              10 * LLVM_VERSION_MINOR +
+              LLVM_VERSION_PATCH;
+  // Clamp it for builds that use unusually large version numbers.
+  Major = std::min<int>(Major, std::numeric_limits<uint16_t>::max());
+  Version BackVer = {{ Major, 0, 0, 0 }};
+  OS.AddComment("Backend version");
+  for (int N = 0; N < 4; ++N)
+    OS.EmitIntValue(BackVer.Part[N], 2);
+
+  OS.AddComment("Null-terminated compiler version string");
+  emitNullTerminatedSymbolName(OS, CompilerVersion);
+
+  OS.EmitLabel(CompilerEnd);
 }
 
 void CodeViewDebug::emitInlineeLinesSubsection() {
@@ -525,17 +672,6 @@ void CodeViewDebug::emitInlineeLinesSubsection() {
   endCVSubsection(InlineEnd);
 }
 
-void CodeViewDebug::collectInlineSiteChildren(
-    SmallVectorImpl<unsigned> &Children, const FunctionInfo &FI,
-    const InlineSite &Site) {
-  for (const DILocation *ChildSiteLoc : Site.ChildSites) {
-    auto I = FI.InlineSites.find(ChildSiteLoc);
-    const InlineSite &ChildSite = I->second;
-    Children.push_back(ChildSite.SiteFuncId);
-    collectInlineSiteChildren(Children, FI, ChildSite);
-  }
-}
-
 void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
                                         const DILocation *InlinedAt,
                                         const InlineSite &Site) {
@@ -561,11 +697,9 @@ void CodeViewDebug::emitInlinedCallSite(const FunctionInfo &FI,
 
   unsigned FileId = maybeRecordFile(Site.Inlinee->getFile());
   unsigned StartLineNum = Site.Inlinee->getLine();
-  SmallVector<unsigned, 3> SecondaryFuncIds;
-  collectInlineSiteChildren(SecondaryFuncIds, FI, Site);
 
   OS.EmitCVInlineLinetableDirective(Site.SiteFuncId, FileId, StartLineNum,
-                                    FI.Begin, FI.End, SecondaryFuncIds);
+                                    FI.Begin, FI.End);
 
   OS.EmitLabel(InlineEnd);
 
@@ -641,13 +775,13 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     OS.emitAbsoluteSymbolDiff(ProcRecordEnd, ProcRecordBegin, 2);
     OS.EmitLabel(ProcRecordBegin);
 
-  if (GV->hasLocalLinkage()) {
-    OS.AddComment("Record kind: S_LPROC32_ID");
-    OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
-  } else {
-    OS.AddComment("Record kind: S_GPROC32_ID");
-    OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
-  }
+    if (GV->hasLocalLinkage()) {
+      OS.AddComment("Record kind: S_LPROC32_ID");
+      OS.EmitIntValue(unsigned(SymbolKind::S_LPROC32_ID), 2);
+    } else {
+      OS.AddComment("Record kind: S_GPROC32_ID");
+      OS.EmitIntValue(unsigned(SymbolKind::S_GPROC32_ID), 2);
+    }
 
     // These fields are filled in by tools like CVPACK which run after the fact.
     OS.AddComment("PtrParent");
@@ -667,7 +801,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
     OS.AddComment("Function type index");
     OS.EmitIntValue(getFuncIdForSubprogram(GV->getSubprogram()).getIndex(), 4);
     OS.AddComment("Function section relative address");
-    OS.EmitCOFFSecRel32(Fn);
+    OS.EmitCOFFSecRel32(Fn, /*Offset=*/0);
     OS.AddComment("Function section index");
     OS.EmitCOFFSectionIndex(Fn);
     OS.AddComment("Flags");
@@ -711,29 +845,33 @@ CodeViewDebug::createDefRangeMem(uint16_t CVRegister, int Offset) {
   DR.InMemory = -1;
   DR.DataOffset = Offset;
   assert(DR.DataOffset == Offset && "truncation");
+  DR.IsSubfield = 0;
   DR.StructOffset = 0;
   DR.CVRegister = CVRegister;
   return DR;
 }
 
 CodeViewDebug::LocalVarDefRange
-CodeViewDebug::createDefRangeReg(uint16_t CVRegister) {
+CodeViewDebug::createDefRangeGeneral(uint16_t CVRegister, bool InMemory,
+                                     int Offset, bool IsSubfield,
+                                     uint16_t StructOffset) {
   LocalVarDefRange DR;
-  DR.InMemory = 0;
-  DR.DataOffset = 0;
-  DR.StructOffset = 0;
+  DR.InMemory = InMemory;
+  DR.DataOffset = Offset;
+  DR.IsSubfield = IsSubfield;
+  DR.StructOffset = StructOffset;
   DR.CVRegister = CVRegister;
   return DR;
 }
 
-void CodeViewDebug::collectVariableInfoFromMMITable(
+void CodeViewDebug::collectVariableInfoFromMFTable(
     DenseSet<InlinedVariable> &Processed) {
-  const TargetSubtargetInfo &TSI = Asm->MF->getSubtarget();
+  const MachineFunction &MF = *Asm->MF;
+  const TargetSubtargetInfo &TSI = MF.getSubtarget();
   const TargetFrameLowering *TFI = TSI.getFrameLowering();
   const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
 
-  for (const MachineModuleInfo::VariableDbgInfo &VI :
-       MMI->getVariableDbgInfo()) {
+  for (const MachineFunction::VariableDbgInfo &VI : MF.getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -770,7 +908,7 @@ void CodeViewDebug::collectVariableInfoFromMMITable(
 void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
   DenseSet<InlinedVariable> Processed;
   // Grab the variable info that was squirreled away in the MMI side-table.
-  collectVariableInfoFromMMITable(Processed);
+  collectVariableInfoFromMFTable(Processed);
 
   const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
 
@@ -802,10 +940,17 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
       const MachineInstr *DVInst = Range.first;
       assert(DVInst->isDebugValue() && "Invalid History entry");
       const DIExpression *DIExpr = DVInst->getDebugExpression();
-
-      // Bail if there is a complex DWARF expression for now.
-      if (DIExpr && DIExpr->getNumElements() > 0)
-        continue;
+      bool IsSubfield = false;
+      unsigned StructOffset = 0;
+
+      // Handle fragments.
+      auto Fragment = DIExpr->getFragmentInfo();
+      if (DIExpr && Fragment) {
+        IsSubfield = true;
+        StructOffset = Fragment->OffsetInBits / 8;
+      } else if (DIExpr && DIExpr->getNumElements() > 0) {
+        continue; // Ignore unrecognized exprs.
+      }
 
       // Bail if operand 0 is not a valid register. This means the variable is a
       // simple constant, or is described by a complex expression.
@@ -817,19 +962,20 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
         continue;
 
       // Handle the two cases we can handle: indirect in memory and in register.
-      bool IsIndirect = DVInst->getOperand(1).isImm();
-      unsigned CVReg = TRI->getCodeViewRegNum(DVInst->getOperand(0).getReg());
+      unsigned CVReg = TRI->getCodeViewRegNum(Reg);
+      bool InMemory = DVInst->getOperand(1).isImm();
+      int Offset = InMemory ? DVInst->getOperand(1).getImm() : 0;
       {
-        LocalVarDefRange DefRange;
-        if (IsIndirect) {
-          int64_t Offset = DVInst->getOperand(1).getImm();
-          DefRange = createDefRangeMem(CVReg, Offset);
-        } else {
-          DefRange = createDefRangeReg(CVReg);
-        }
+        LocalVarDefRange DR;
+        DR.CVRegister = CVReg;
+        DR.InMemory = InMemory;
+        DR.DataOffset = Offset;
+        DR.IsSubfield = IsSubfield;
+        DR.StructOffset = StructOffset;
+
         if (Var.DefRanges.empty() ||
-            Var.DefRanges.back().isDifferentLocation(DefRange)) {
-          Var.DefRanges.emplace_back(std::move(DefRange));
+            Var.DefRanges.back().isDifferentLocation(DR)) {
+          Var.DefRanges.emplace_back(std::move(DR));
         }
       }
 
@@ -837,8 +983,14 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
       const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
       const MCSymbol *End = getLabelAfterInsn(Range.second);
       if (!End) {
-        if (std::next(I) != E)
-          End = getLabelBeforeInsn(std::next(I)->first);
+        // This range is valid until the next overlapping bitpiece. In the
+        // common case, ranges will not be bitpieces, so they will overlap.
+        auto J = std::next(I);
+        while (J != E &&
+               !fragmentsOverlap(DIExpr, J->first->getDebugExpression()))
+          ++J;
+        if (J != E)
+          End = getLabelBeforeInsn(J->first);
         else
           End = Asm->getFunctionEnd();
       }
@@ -873,6 +1025,8 @@ void CodeViewDebug::beginFunction(const MachineFunction *MF) {
   CurFn->FuncId = NextFuncId++;
   CurFn->Begin = Asm->getFunctionBegin();
 
+  OS.EmitCVFuncIdDirective(CurFn->FuncId);
+
   // Find the end of the function prolog.  First known non-DBG_VALUE and
   // non-frame setup location marks the beginning of the function body.
   // FIXME: is there a simpler a way to do this? Can we just search
@@ -933,6 +1087,9 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
   case dwarf::DW_TAG_base_type:
     return lowerTypeBasic(cast<DIBasicType>(Ty));
   case dwarf::DW_TAG_pointer_type:
+    if (cast<DIDerivedType>(Ty)->getName() == "__vtbl_ptr_type")
+      return lowerTypeVFTableShape(cast<DIDerivedType>(Ty));
+    LLVM_FALLTHROUGH;
   case dwarf::DW_TAG_reference_type:
   case dwarf::DW_TAG_rvalue_reference_type:
     return lowerTypePointer(cast<DIDerivedType>(Ty));
@@ -940,6 +1097,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
     return lowerTypeMemberPointer(cast<DIDerivedType>(Ty));
   case dwarf::DW_TAG_const_type:
   case dwarf::DW_TAG_volatile_type:
+  // TODO: add support for DW_TAG_atomic_type here
     return lowerTypeModifier(cast<DIDerivedType>(Ty));
   case dwarf::DW_TAG_subroutine_type:
     if (ClassTy) {
@@ -989,20 +1147,25 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
 
   uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
 
-  bool UndefinedSubrange = false;
 
-  // FIXME:
-  // There is a bug in the front-end where an array of a structure, which was
-  // declared as incomplete structure first, ends up not getting a size assigned
-  // to it. (PR28303)
+  // We want to assert that the element type multiplied by the array lengths
+  // match the size of the overall array. However, if we don't have complete
+  // type information for the base type, we can't make this assertion. This
+  // happens if limited debug info is enabled in this case:
+  //   struct VTableOptzn { VTableOptzn(); virtual ~VTableOptzn(); };
+  //   VTableOptzn array[3];
+  // The DICompositeType of VTableOptzn will have size zero, and the array will
+  // have size 3 * sizeof(void*), and we should avoid asserting.
+  //
+  // There is a related bug in the front-end where an array of a structure,
+  // which was declared as incomplete structure first, ends up not getting a
+  // size assigned to it. (PR28303)
   // Example:
   //   struct A(*p)[3];
   //   struct A { int f; } a[3];
-  //
-  // This needs to be fixed in the front-end, but in the meantime we don't want
-  // to trigger an assertion because of this.
-  if (Ty->getSizeInBits() == 0) {
-    UndefinedSubrange = true;
+  bool PartiallyIncomplete = false;
+  if (Ty->getSizeInBits() == 0 || ElementSize == 0) {
+    PartiallyIncomplete = true;
   }
 
   // Add subranges to array type.
@@ -1021,18 +1184,24 @@ TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
     // FIXME: Make front-end support VLA subrange and emit LF_DIMVARLU.
     if (Count == -1) {
       Count = 1;
-      UndefinedSubrange = true;
+      PartiallyIncomplete = true;
     }
 
-    StringRef Name = (i == 0) ? Ty->getName() : "";
     // Update the element size and element type index for subsequent subranges.
     ElementSize *= Count;
-    ElementTypeIndex = TypeTable.writeArray(
-        ArrayRecord(ElementTypeIndex, IndexType, ElementSize, Name));
+
+    // If this is the outermost array, use the size from the array. It will be
+    // more accurate if PartiallyIncomplete is true.
+    uint64_t ArraySize =
+        (i == 0 && ElementSize == 0) ? Ty->getSizeInBits() / 8 : ElementSize;
+
+    StringRef Name = (i == 0) ? Ty->getName() : "";
+    ArrayRecord AR(ElementTypeIndex, IndexType, ArraySize, Name);
+    ElementTypeIndex = TypeTable.writeKnownType(AR);
   }
 
-  (void)UndefinedSubrange;
-  assert(UndefinedSubrange || ElementSize == (Ty->getSizeInBits() / 8));
+  (void)PartiallyIncomplete;
+  assert(PartiallyIncomplete || ElementSize == (Ty->getSizeInBits() / 8));
 
   return ElementTypeIndex;
 }
@@ -1080,20 +1249,20 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
     break;
   case dwarf::DW_ATE_signed:
     switch (ByteSize) {
-    case 1:  STK = SimpleTypeKind::SByte;      break;
-    case 2:  STK = SimpleTypeKind::Int16Short; break;
-    case 4:  STK = SimpleTypeKind::Int32;      break;
-    case 8:  STK = SimpleTypeKind::Int64Quad;  break;
-    case 16: STK = SimpleTypeKind::Int128Oct;  break;
+    case 1:  STK = SimpleTypeKind::SignedCharacter; break;
+    case 2:  STK = SimpleTypeKind::Int16Short;      break;
+    case 4:  STK = SimpleTypeKind::Int32;           break;
+    case 8:  STK = SimpleTypeKind::Int64Quad;       break;
+    case 16: STK = SimpleTypeKind::Int128Oct;       break;
     }
     break;
   case dwarf::DW_ATE_unsigned:
     switch (ByteSize) {
-    case 1:  STK = SimpleTypeKind::Byte;        break;
-    case 2:  STK = SimpleTypeKind::UInt16Short; break;
-    case 4:  STK = SimpleTypeKind::UInt32;      break;
-    case 8:  STK = SimpleTypeKind::UInt64Quad;  break;
-    case 16: STK = SimpleTypeKind::UInt128Oct;  break;
+    case 1:  STK = SimpleTypeKind::UnsignedCharacter; break;
+    case 2:  STK = SimpleTypeKind::UInt16Short;       break;
+    case 4:  STK = SimpleTypeKind::UInt32;            break;
+    case 8:  STK = SimpleTypeKind::UInt64Quad;        break;
+    case 16: STK = SimpleTypeKind::UInt128Oct;        break;
     }
     break;
   case dwarf::DW_ATE_UTF:
@@ -1133,13 +1302,6 @@ TypeIndex CodeViewDebug::lowerTypeBasic(const DIBasicType *Ty) {
 TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
   TypeIndex PointeeTI = getTypeIndex(Ty->getBaseType());
 
-  // While processing the type being pointed to it is possible we already
-  // created this pointer type.  If so, we check here and return the existing
-  // pointer type.
-  auto I = TypeIndices.find({Ty, nullptr});
-  if (I != TypeIndices.end())
-    return I->second;
-
   // Pointers to simple types can use SimpleTypeMode, rather than having a
   // dedicated pointer type record.
   if (PointeeTI.isSimple() &&
@@ -1171,7 +1333,7 @@ TypeIndex CodeViewDebug::lowerTypePointer(const DIDerivedType *Ty) {
   // do.
   PointerOptions PO = PointerOptions::None;
   PointerRecord PR(PointeeTI, PK, PM, PO, Ty->getSizeInBits() / 8);
-  return TypeTable.writePointer(PR);
+  return TypeTable.writeKnownType(PR);
 }
 
 static PointerToMemberRepresentation
@@ -1222,7 +1384,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberPointer(const DIDerivedType *Ty) {
   MemberPointerInfo MPI(
       ClassTI, translatePtrToMemberRep(SizeInBytes, IsPMF, Ty->getFlags()));
   PointerRecord PR(PointeeTI, PK, PM, PO, SizeInBytes, MPI);
-  return TypeTable.writePointer(PR);
+  return TypeTable.writeKnownType(PR);
 }
 
 /// Given a DWARF calling convention, get the CodeView equivalent. If we don't
@@ -1244,7 +1406,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
   bool IsModifier = true;
   const DIType *BaseTy = Ty;
   while (IsModifier && BaseTy) {
-    // FIXME: Need to add DWARF tag for __unaligned.
+    // FIXME: Need to add DWARF tags for __unaligned and _Atomic
     switch (BaseTy->getTag()) {
     case dwarf::DW_TAG_const_type:
       Mods |= ModifierOptions::Const;
@@ -1260,16 +1422,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
       BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
   }
   TypeIndex ModifiedTI = getTypeIndex(BaseTy);
-
-  // While processing the type being pointed to, it is possible we already
-  // created this modifier type.  If so, we check here and return the existing
-  // modifier type.
-  auto I = TypeIndices.find({Ty, nullptr});
-  if (I != TypeIndices.end())
-    return I->second;
-
   ModifierRecord MR(ModifiedTI, Mods);
-  return TypeTable.writeModifier(MR);
+  return TypeTable.writeKnownType(MR);
 }
 
 TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
@@ -1286,13 +1440,13 @@ TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
   }
 
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
-  TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+  TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
   ProcedureRecord Procedure(ReturnTypeIndex, CC, FunctionOptions::None,
                             ArgTypeIndices.size(), ArgListIndex);
-  return TypeTable.writeProcedure(Procedure);
+  return TypeTable.writeKnownType(Procedure);
 }
 
 TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
@@ -1319,20 +1473,29 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
   }
 
   ArgListRecord ArgListRec(TypeRecordKind::ArgList, ArgTypeIndices);
-  TypeIndex ArgListIndex = TypeTable.writeArgList(ArgListRec);
+  TypeIndex ArgListIndex = TypeTable.writeKnownType(ArgListRec);
 
   CallingConvention CC = dwarfCCToCodeView(Ty->getCC());
 
   // TODO: Need to use the correct values for:
   //       FunctionOptions
   //       ThisPointerAdjustment.
-  TypeIndex TI = TypeTable.writeMemberFunction(MemberFunctionRecord(
-      ReturnTypeIndex, ClassType, ThisTypeIndex, CC, FunctionOptions::None,
-      ArgTypeIndices.size(), ArgListIndex, ThisAdjustment));
+  MemberFunctionRecord MFR(ReturnTypeIndex, ClassType, ThisTypeIndex, CC,
+                           FunctionOptions::None, ArgTypeIndices.size(),
+                           ArgListIndex, ThisAdjustment);
+  TypeIndex TI = TypeTable.writeKnownType(MFR);
 
   return TI;
 }
 
+TypeIndex CodeViewDebug::lowerTypeVFTableShape(const DIDerivedType *Ty) {
+  unsigned VSlotCount = Ty->getSizeInBits() / (8 * Asm->MAI->getPointerSize());
+  SmallVector<VFTableSlotKind, 4> Slots(VSlotCount, VFTableSlotKind::Near);
+
+  VFTableShapeRecord VFTSR(Slots);
+  return TypeTable.writeKnownType(VFTSR);
+}
+
 static MemberAccess translateAccessFlags(unsigned RecordTag, unsigned Flags) {
   switch (Flags & DINode::FlagAccessibility) {
   case DINode::FlagPrivate:   return MemberAccess::Private;
@@ -1420,25 +1583,28 @@ TypeIndex CodeViewDebug::lowerTypeEnum(const DICompositeType *Ty) {
   if (Ty->isForwardDecl()) {
     CO |= ClassOptions::ForwardReference;
   } else {
-    FieldListRecordBuilder Fields;
+    FieldListRecordBuilder FLRB(TypeTable);
+
+    FLRB.begin();
     for (const DINode *Element : Ty->getElements()) {
       // We assume that the frontend provides all members in source declaration
       // order, which is what MSVC does.
       if (auto *Enumerator = dyn_cast_or_null<DIEnumerator>(Element)) {
-        Fields.writeEnumerator(EnumeratorRecord(
-            MemberAccess::Public, APSInt::getUnsigned(Enumerator->getValue()),
-            Enumerator->getName()));
+        EnumeratorRecord ER(MemberAccess::Public,
+                            APSInt::getUnsigned(Enumerator->getValue()),
+                            Enumerator->getName());
+        FLRB.writeMemberType(ER);
         EnumeratorCount++;
       }
     }
-    FTI = TypeTable.writeFieldList(Fields);
+    FTI = FLRB.end();
   }
 
   std::string FullName = getFullyQualifiedName(Ty);
 
-  return TypeTable.writeEnum(EnumRecord(EnumeratorCount, CO, FTI, FullName,
-                                        Ty->getIdentifier(),
-                                        getTypeIndex(Ty->getBaseType())));
+  EnumRecord ER(EnumeratorCount, CO, FTI, FullName, Ty->getIdentifier(),
+                getTypeIndex(Ty->getBaseType()));
+  return TypeTable.writeKnownType(ER);
 }
 
 //===----------------------------------------------------------------------===//
@@ -1465,6 +1631,8 @@ struct llvm::ClassInfo {
   // Direct overloaded methods gathered by name.
   MethodsMap Methods;
 
+  TypeIndex VShapeTI;
+
   std::vector<const DICompositeType *> NestedClasses;
 };
 
@@ -1513,11 +1681,13 @@ ClassInfo CodeViewDebug::collectClassInfo(const DICompositeType *Ty) {
         collectMemberInfo(Info, DDTy);
       } else if (DDTy->getTag() == dwarf::DW_TAG_inheritance) {
         Info.Inheritance.push_back(DDTy);
+      } else if (DDTy->getTag() == dwarf::DW_TAG_pointer_type &&
+                 DDTy->getName() == "__vtbl_ptr_type") {
+        Info.VShapeTI = getTypeIndex(DDTy);
       } else if (DDTy->getTag() == dwarf::DW_TAG_friend) {
         // Ignore friend members. It appears that MSVC emitted info about
         // friends in the past, but modern versions do not.
       }
-      // FIXME: Get Clang to emit function virtual table here and handle it.
     } else if (auto *Composite = dyn_cast<DICompositeType>(Element)) {
       Info.NestedClasses.push_back(Composite);
     }
@@ -1533,9 +1703,9 @@ TypeIndex CodeViewDebug::lowerTypeClass(const DICompositeType *Ty) {
   ClassOptions CO =
       ClassOptions::ForwardReference | getCommonClassOptions(Ty);
   std::string FullName = getFullyQualifiedName(Ty);
-  TypeIndex FwdDeclTI = TypeTable.writeClass(ClassRecord(
-      Kind, 0, CO, HfaKind::None, WindowsRTClassKind::None, TypeIndex(),
-      TypeIndex(), TypeIndex(), 0, FullName, Ty->getIdentifier()));
+  ClassRecord CR(Kind, 0, CO, TypeIndex(), TypeIndex(), TypeIndex(), 0,
+                 FullName, Ty->getIdentifier());
+  TypeIndex FwdDeclTI = TypeTable.writeKnownType(CR);
   if (!Ty->isForwardDecl())
     DeferredCompleteTypes.push_back(Ty);
   return FwdDeclTI;
@@ -1559,14 +1729,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
 
   uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
 
-  TypeIndex ClassTI = TypeTable.writeClass(ClassRecord(
-      Kind, FieldCount, CO, HfaKind::None, WindowsRTClassKind::None, FieldTI,
-      TypeIndex(), VShapeTI, SizeInBytes, FullName, Ty->getIdentifier()));
+  ClassRecord CR(Kind, FieldCount, CO, FieldTI, TypeIndex(), VShapeTI,
+                 SizeInBytes, FullName, Ty->getIdentifier());
+  TypeIndex ClassTI = TypeTable.writeKnownType(CR);
 
-  TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
-      ClassTI, TypeTable.writeStringId(StringIdRecord(
-                   TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
-      Ty->getLine()));
+  StringIdRecord SIDR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
+  TypeIndex SIDI = TypeTable.writeKnownType(SIDR);
+  UdtSourceLineRecord USLR(ClassTI, SIDI, Ty->getLine());
+  TypeTable.writeKnownType(USLR);
 
   addToUDTs(Ty, ClassTI);
 
@@ -1577,9 +1747,8 @@ TypeIndex CodeViewDebug::lowerTypeUnion(const DICompositeType *Ty) {
   ClassOptions CO =
       ClassOptions::ForwardReference | getCommonClassOptions(Ty);
   std::string FullName = getFullyQualifiedName(Ty);
-  TypeIndex FwdDeclTI =
-      TypeTable.writeUnion(UnionRecord(0, CO, HfaKind::None, TypeIndex(), 0,
-                                       FullName, Ty->getIdentifier()));
+  UnionRecord UR(0, CO, TypeIndex(), 0, FullName, Ty->getIdentifier());
+  TypeIndex FwdDeclTI = TypeTable.writeKnownType(UR);
   if (!Ty->isForwardDecl())
     DeferredCompleteTypes.push_back(Ty);
   return FwdDeclTI;
@@ -1599,14 +1768,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeUnion(const DICompositeType *Ty) {
   uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
   std::string FullName = getFullyQualifiedName(Ty);
 
-  TypeIndex UnionTI = TypeTable.writeUnion(
-      UnionRecord(FieldCount, CO, HfaKind::None, FieldTI, SizeInBytes, FullName,
-                  Ty->getIdentifier()));
+  UnionRecord UR(FieldCount, CO, FieldTI, SizeInBytes, FullName,
+                 Ty->getIdentifier());
+  TypeIndex UnionTI = TypeTable.writeKnownType(UR);
 
-  TypeTable.writeUdtSourceLine(UdtSourceLineRecord(
-      UnionTI, TypeTable.writeStringId(StringIdRecord(
-                   TypeIndex(0x0), getFullFilepath(Ty->getFile()))),
-      Ty->getLine()));
+  StringIdRecord SIR(TypeIndex(0x0), getFullFilepath(Ty->getFile()));
+  TypeIndex SIRI = TypeTable.writeKnownType(SIR);
+  UdtSourceLineRecord USLR(UnionTI, SIRI, Ty->getLine());
+  TypeTable.writeKnownType(USLR);
 
   addToUDTs(Ty, UnionTI);
 
@@ -1621,7 +1790,8 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
   // list record.
   unsigned MemberCount = 0;
   ClassInfo Info = collectClassInfo(Ty);
-  FieldListRecordBuilder Fields;
+  FieldListRecordBuilder FLBR(TypeTable);
+  FLBR.begin();
 
   // Create base classes.
   for (const DIDerivedType *I : Info.Inheritance) {
@@ -1631,16 +1801,22 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
       unsigned VBPtrOffset = 0;
       // FIXME: Despite the accessor name, the offset is really in bytes.
       unsigned VBTableIndex = I->getOffsetInBits() / 4;
-      Fields.writeVirtualBaseClass(VirtualBaseClassRecord(
-          translateAccessFlags(Ty->getTag(), I->getFlags()),
+      auto RecordKind = (I->getFlags() & DINode::FlagIndirectVirtualBase) == DINode::FlagIndirectVirtualBase
+                            ? TypeRecordKind::IndirectVirtualBaseClass
+                            : TypeRecordKind::VirtualBaseClass;
+      VirtualBaseClassRecord VBCR(
+          RecordKind, translateAccessFlags(Ty->getTag(), I->getFlags()),
           getTypeIndex(I->getBaseType()), getVBPTypeIndex(), VBPtrOffset,
-          VBTableIndex));
+          VBTableIndex);
+
+      FLBR.writeMemberType(VBCR);
     } else {
       assert(I->getOffsetInBits() % 8 == 0 &&
              "bases must be on byte boundaries");
-      Fields.writeBaseClass(BaseClassRecord(
-          translateAccessFlags(Ty->getTag(), I->getFlags()),
-          getTypeIndex(I->getBaseType()), I->getOffsetInBits() / 8));
+      BaseClassRecord BCR(translateAccessFlags(Ty->getTag(), I->getFlags()),
+                          getTypeIndex(I->getBaseType()),
+                          I->getOffsetInBits() / 8);
+      FLBR.writeMemberType(BCR);
     }
   }
 
@@ -1653,8 +1829,17 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
         translateAccessFlags(Ty->getTag(), Member->getFlags());
 
     if (Member->isStaticMember()) {
-      Fields.writeStaticDataMember(
-          StaticDataMemberRecord(Access, MemberBaseType, MemberName));
+      StaticDataMemberRecord SDMR(Access, MemberBaseType, MemberName);
+      FLBR.writeMemberType(SDMR);
+      MemberCount++;
+      continue;
+    }
+
+    // Virtual function pointer member.
+    if ((Member->getFlags() & DINode::FlagArtificial) &&
+        Member->getName().startswith("_vptr$")) {
+      VFPtrRecord VFPR(getTypeIndex(Member->getBaseType()));
+      FLBR.writeMemberType(VFPR);
       MemberCount++;
       continue;
     }
@@ -1669,12 +1854,14 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
         MemberOffsetInBits = CI->getZExtValue() + MemberInfo.BaseOffset;
       }
       StartBitOffset -= MemberOffsetInBits;
-      MemberBaseType = TypeTable.writeBitField(BitFieldRecord(
-          MemberBaseType, Member->getSizeInBits(), StartBitOffset));
+      BitFieldRecord BFR(MemberBaseType, Member->getSizeInBits(),
+                         StartBitOffset);
+      MemberBaseType = TypeTable.writeKnownType(BFR);
     }
     uint64_t MemberOffsetInBytes = MemberOffsetInBits / 8;
-    Fields.writeDataMember(DataMemberRecord(Access, MemberBaseType,
-                                            MemberOffsetInBytes, MemberName));
+    DataMemberRecord DMR(Access, MemberBaseType, MemberOffsetInBytes,
+                         MemberName);
+    FLBR.writeMemberType(DMR);
     MemberCount++;
   }
 
@@ -1691,33 +1878,32 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
       if (Introduced)
         VFTableOffset = SP->getVirtualIndex() * getPointerSizeInBytes();
 
-      Methods.push_back(
-          OneMethodRecord(MethodType, translateMethodKindFlags(SP, Introduced),
-                          translateMethodOptionFlags(SP),
-                          translateAccessFlags(Ty->getTag(), SP->getFlags()),
-                          VFTableOffset, Name));
+      Methods.push_back(OneMethodRecord(
+          MethodType, translateAccessFlags(Ty->getTag(), SP->getFlags()),
+          translateMethodKindFlags(SP, Introduced),
+          translateMethodOptionFlags(SP), VFTableOffset, Name));
       MemberCount++;
     }
     assert(Methods.size() > 0 && "Empty methods map entry");
     if (Methods.size() == 1)
-      Fields.writeOneMethod(Methods[0]);
+      FLBR.writeMemberType(Methods[0]);
     else {
-      TypeIndex MethodList =
-          TypeTable.writeMethodOverloadList(MethodOverloadListRecord(Methods));
-      Fields.writeOverloadedMethod(
-          OverloadedMethodRecord(Methods.size(), MethodList, Name));
+      MethodOverloadListRecord MOLR(Methods);
+      TypeIndex MethodList = TypeTable.writeKnownType(MOLR);
+      OverloadedMethodRecord OMR(Methods.size(), MethodList, Name);
+      FLBR.writeMemberType(OMR);
     }
   }
 
   // Create nested classes.
   for (const DICompositeType *Nested : Info.NestedClasses) {
     NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
-    Fields.writeNestedType(R);
+    FLBR.writeMemberType(R);
     MemberCount++;
   }
 
-  TypeIndex FieldTI = TypeTable.writeFieldList(Fields);
-  return std::make_tuple(FieldTI, TypeIndex(), MemberCount,
+  TypeIndex FieldTI = FLBR.end();
+  return std::make_tuple(FieldTI, Info.VShapeTI, MemberCount,
                          !Info.NestedClasses.empty());
 }
 
@@ -1725,7 +1911,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
   if (!VBPType.getIndex()) {
     // Make a 'const int *' type.
     ModifierRecord MR(TypeIndex::Int32(), ModifierOptions::Const);
-    TypeIndex ModifiedTI = TypeTable.writeModifier(MR);
+    TypeIndex ModifiedTI = TypeTable.writeKnownType(MR);
 
     PointerKind PK = getPointerSizeInBytes() == 8 ? PointerKind::Near64
                                                   : PointerKind::Near32;
@@ -1733,7 +1919,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
     PointerOptions PO = PointerOptions::None;
     PointerRecord PR(ModifiedTI, PK, PM, PO, getPointerSizeInBytes());
 
-    VBPType = TypeTable.writePointer(PR);
+    VBPType = TypeTable.writeKnownType(PR);
   }
 
   return VBPType;
@@ -1880,30 +2066,47 @@ void CodeViewDebug::emitLocalVariable(const LocalVariable &Var) {
   SmallString<20> BytePrefix;
   for (const LocalVarDefRange &DefRange : Var.DefRanges) {
     BytePrefix.clear();
-    // FIXME: Handle bitpieces.
-    if (DefRange.StructOffset != 0)
-      continue;
-
     if (DefRange.InMemory) {
-      DefRangeRegisterRelSym Sym(DefRange.CVRegister, 0, DefRange.DataOffset, 0,
-                                 0, 0, ArrayRef<LocalVariableAddrGap>());
+      uint16_t RegRelFlags = 0;
+      if (DefRange.IsSubfield) {
+        RegRelFlags = DefRangeRegisterRelSym::IsSubfieldFlag |
+                      (DefRange.StructOffset
+                       << DefRangeRegisterRelSym::OffsetInParentShift);
+      }
+      DefRangeRegisterRelSym Sym(S_DEFRANGE_REGISTER_REL);
+      Sym.Hdr.Register = DefRange.CVRegister;
+      Sym.Hdr.Flags = RegRelFlags;
+      Sym.Hdr.BasePointerOffset = DefRange.DataOffset;
       ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER_REL);
       BytePrefix +=
           StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
       BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&Sym.Header),
-                    sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+          StringRef(reinterpret_cast<const char *>(&Sym.Hdr), sizeof(Sym.Hdr));
     } else {
       assert(DefRange.DataOffset == 0 && "unexpected offset into register");
-      // Unclear what matters here.
-      DefRangeRegisterSym Sym(DefRange.CVRegister, 0, 0, 0, 0,
-                              ArrayRef<LocalVariableAddrGap>());
-      ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
-      BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&SymKind), sizeof(SymKind));
-      BytePrefix +=
-          StringRef(reinterpret_cast<const char *>(&Sym.Header),
-                    sizeof(Sym.Header) - sizeof(LocalVariableAddrRange));
+      if (DefRange.IsSubfield) {
+        // Unclear what matters here.
+        DefRangeSubfieldRegisterSym Sym(S_DEFRANGE_SUBFIELD_REGISTER);
+        Sym.Hdr.Register = DefRange.CVRegister;
+        Sym.Hdr.MayHaveNoName = 0;
+        Sym.Hdr.OffsetInParent = DefRange.StructOffset;
+
+        ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_SUBFIELD_REGISTER);
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
+                                sizeof(SymKind));
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
+                                sizeof(Sym.Hdr));
+      } else {
+        // Unclear what matters here.
+        DefRangeRegisterSym Sym(S_DEFRANGE_REGISTER);
+        Sym.Hdr.Register = DefRange.CVRegister;
+        Sym.Hdr.MayHaveNoName = 0;
+        ulittle16_t SymKind = ulittle16_t(S_DEFRANGE_REGISTER);
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&SymKind),
+                                sizeof(SymKind));
+        BytePrefix += StringRef(reinterpret_cast<const char *>(&Sym.Hdr),
+                                sizeof(Sym.Hdr));
+      }
     }
     OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
   }
@@ -1983,6 +2186,15 @@ void CodeViewDebug::emitDebugInfoForUDTs(
 }
 
 void CodeViewDebug::emitDebugInfoForGlobals() {
+  DenseMap<const DIGlobalVariableExpression *, const GlobalVariable *>
+      GlobalMap;
+  for (const GlobalVariable &GV : MMI->getModule()->globals()) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+    GV.getDebugInfo(GVEs);
+    for (const auto *GVE : GVEs)
+      GlobalMap[GVE] = &GV;
+  }
+
   NamedMDNode *CUs = MMI->getModule()->getNamedMetadata("llvm.dbg.cu");
   for (const MDNode *Node : CUs->operands()) {
     const auto *CU = cast<DICompileUnit>(Node);
@@ -1992,31 +2204,32 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
     // it if we have at least one global to emit.
     switchToDebugSectionForSymbol(nullptr);
     MCSymbol *EndLabel = nullptr;
-    for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
-      if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+    for (const auto *GVE : CU->getGlobalVariables()) {
+      if (const auto *GV = GlobalMap.lookup(GVE))
         if (!GV->hasComdat() && !GV->isDeclarationForLinker()) {
           if (!EndLabel) {
             OS.AddComment("Symbol subsection for globals");
             EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
           }
-          emitDebugInfoForGlobal(G, Asm->getSymbol(GV));
+          // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+          emitDebugInfoForGlobal(GVE->getVariable(), GV, Asm->getSymbol(GV));
         }
-      }
     }
     if (EndLabel)
       endCVSubsection(EndLabel);
 
     // Second, emit each global that is in a comdat into its own .debug$S
     // section along with its own symbol substream.
-    for (const DIGlobalVariable *G : CU->getGlobalVariables()) {
-      if (const auto *GV = dyn_cast_or_null<GlobalVariable>(G->getVariable())) {
+    for (const auto *GVE : CU->getGlobalVariables()) {
+      if (const auto *GV = GlobalMap.lookup(GVE)) {
         if (GV->hasComdat()) {
           MCSymbol *GVSym = Asm->getSymbol(GV);
           OS.AddComment("Symbol subsection for " +
                         Twine(GlobalValue::getRealLinkageName(GV->getName())));
           switchToDebugSectionForSymbol(GVSym);
           EndLabel = beginCVSubsection(ModuleSubstreamKind::Symbols);
-          emitDebugInfoForGlobal(G, GVSym);
+          // FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
+          emitDebugInfoForGlobal(GVE->getVariable(), GV, GVSym);
           endCVSubsection(EndLabel);
         }
       }
@@ -2037,6 +2250,7 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
 }
 
 void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+                                           const GlobalVariable *GV,
                                            MCSymbol *GVSym) {
   // DataSym record, see SymbolRecord.h for more info.
   // FIXME: Thread local data, etc
@@ -2045,7 +2259,6 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
   OS.AddComment("Record length");
   OS.emitAbsoluteSymbolDiff(DataEnd, DataBegin, 2);
   OS.EmitLabel(DataBegin);
-  const auto *GV = cast<GlobalVariable>(DIGV->getVariable());
   if (DIGV->isLocalToUnit()) {
     if (GV->isThreadLocal()) {
       OS.AddComment("Record kind: S_LTHREAD32");
@@ -2066,7 +2279,7 @@ void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
   OS.AddComment("Type");
   OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
   OS.AddComment("DataOffset");
-  OS.EmitCOFFSecRel32(GVSym);
+  OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
   OS.AddComment("Segment");
   OS.EmitCOFFSectionIndex(GVSym);
   OS.AddComment("Name");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index e4bbd61d4ce0..3dd4315e4c2f 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -20,8 +20,8 @@
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCStreamer.h"
@@ -36,7 +36,8 @@ struct ClassInfo;
 /// \brief Collects and handles line tables information in a CodeView format.
 class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   MCStreamer &OS;
-  codeview::MemoryTypeTableBuilder TypeTable;
+  llvm::BumpPtrAllocator Allocator;
+  codeview::TypeTableBuilder TypeTable;
 
   /// Represents the most general definition range.
   struct LocalVarDefRange {
@@ -47,9 +48,11 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     /// Offset of variable data in memory.
     int DataOffset : 31;
 
-    /// Offset of the data into the user level struct. If zero, no splitting
-    /// occurred.
-    uint16_t StructOffset;
+    /// Non-zero if this is a piece of an aggregate.
+    uint16_t IsSubfield : 1;
+
+    /// Offset into aggregate.
+    uint16_t StructOffset : 15;
 
     /// Register containing the data or the register base of the memory
     /// location containing the data.
@@ -59,14 +62,18 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
     /// ranges.
     bool isDifferentLocation(LocalVarDefRange &O) {
       return InMemory != O.InMemory || DataOffset != O.DataOffset ||
-             StructOffset != O.StructOffset || CVRegister != O.CVRegister;
+             IsSubfield != O.IsSubfield || StructOffset != O.StructOffset ||
+             CVRegister != O.CVRegister;
     }
 
     SmallVector<std::pair<const MCSymbol *, const MCSymbol *>, 1> Ranges;
   };
 
   static LocalVarDefRange createDefRangeMem(uint16_t CVRegister, int Offset);
-  static LocalVarDefRange createDefRangeReg(uint16_t CVRegister);
+  static LocalVarDefRange createDefRangeGeneral(uint16_t CVRegister,
+                                                bool InMemory, int Offset,
+                                                bool IsSubfield,
+                                                uint16_t StructOffset);
 
   /// Similar to DbgVariable in DwarfDebug, but not dwarf-specific.
   struct LocalVariable {
@@ -190,6 +197,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void emitTypeInformation();
 
+  void emitCompilerInformation();
+
   void emitInlineeLinesSubsection();
 
   void emitDebugInfoForFunction(const Function *GV, FunctionInfo &FI);
@@ -201,7 +210,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   void emitDebugInfoForUDTs(
       ArrayRef<std::pair<std::string, codeview::TypeIndex>> UDTs);
 
-  void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV, MCSymbol *GVSym);
+  void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
+                              const GlobalVariable *GV, MCSymbol *GVSym);
 
   /// Opens a subsection of the given kind in a .debug$S codeview section.
   /// Returns an end label for use with endCVSubsection when the subsection is
@@ -217,7 +227,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
 
   void collectVariableInfo(const DISubprogram *SP);
 
-  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &Processed);
+  void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &Processed);
 
   /// Records information about a local variable in the appropriate scope. In
   /// particular, locals from inlined code live inside the inlining site.
@@ -251,6 +261,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
   codeview::TypeIndex lowerTypeMemberPointer(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeModifier(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeFunction(const DISubroutineType *Ty);
+  codeview::TypeIndex lowerTypeVFTableShape(const DIDerivedType *Ty);
   codeview::TypeIndex lowerTypeMemberFunction(const DISubroutineType *Ty,
                                               const DIType *ClassTy,
                                               int ThisAdjustment);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index 2aaa85a58094..8ae2f2487cad 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -63,10 +63,10 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const {
 ///
 void DIEAbbrev::Emit(const AsmPrinter *AP) const {
   // Emit its Dwarf tag type.
-  AP->EmitULEB128(Tag, dwarf::TagString(Tag));
+  AP->EmitULEB128(Tag, dwarf::TagString(Tag).data());
 
   // Emit whether it has children DIEs.
-  AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children));
+  AP->EmitULEB128((unsigned)Children, dwarf::ChildrenString(Children).data());
 
   // For each attribute description.
   for (unsigned i = 0, N = Data.size(); i < N; ++i) {
@@ -74,11 +74,11 @@ void DIEAbbrev::Emit(const AsmPrinter *AP) const {
 
     // Emit attribute type.
     AP->EmitULEB128(AttrData.getAttribute(),
-                    dwarf::AttributeString(AttrData.getAttribute()));
+                    dwarf::AttributeString(AttrData.getAttribute()).data());
 
     // Emit form type.
     AP->EmitULEB128(AttrData.getForm(),
-                    dwarf::FormEncodingString(AttrData.getForm()));
+                    dwarf::FormEncodingString(AttrData.getForm()).data());
   }
 
   // Mark end of abbreviation.
@@ -108,6 +108,55 @@ void DIEAbbrev::print(raw_ostream &O) {
 LLVM_DUMP_METHOD
 void DIEAbbrev::dump() { print(dbgs()); }
 
+//===----------------------------------------------------------------------===//
+// DIEAbbrevSet Implementation
+//===----------------------------------------------------------------------===//
+
+DIEAbbrevSet::~DIEAbbrevSet() {
+  for (DIEAbbrev *Abbrev : Abbreviations)
+    Abbrev->~DIEAbbrev();
+}
+
+DIEAbbrev &DIEAbbrevSet::uniqueAbbreviation(DIE &Die) {
+
+  FoldingSetNodeID ID;
+  DIEAbbrev Abbrev = Die.generateAbbrev();
+  Abbrev.Profile(ID);
+
+  void *InsertPos;
+  if (DIEAbbrev *Existing =
+          AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
+    Die.setAbbrevNumber(Existing->getNumber());
+    return *Existing;
+  }
+
+  // Move the abbreviation to the heap and assign a number.
+  DIEAbbrev *New = new (Alloc) DIEAbbrev(std::move(Abbrev));
+  Abbreviations.push_back(New);
+  New->setNumber(Abbreviations.size());
+  Die.setAbbrevNumber(Abbreviations.size());
+
+  // Store it for lookup.
+  AbbreviationsSet.InsertNode(New, InsertPos);
+  return *New;
+}
+
+void DIEAbbrevSet::Emit(const AsmPrinter *AP, MCSection *Section) const {
+  if (!Abbreviations.empty()) {
+    // Start the debug abbrev section.
+    AP->OutStreamer->SwitchSection(Section);
+    AP->emitDwarfAbbrevs(Abbreviations);
+  }
+}
+
+//===----------------------------------------------------------------------===//
+// DIE Implementation
+//===----------------------------------------------------------------------===//
+
+DIE *DIE::getParent() const {
+  return Owner.dyn_cast<DIE*>();
+}
+
 DIEAbbrev DIE::generateAbbrev() const {
   DIEAbbrev Abbrev(Tag, hasChildren());
   for (const DIEValue &V : values())
@@ -115,17 +164,13 @@ DIEAbbrev DIE::generateAbbrev() const {
   return Abbrev;
 }
 
-/// Climb up the parent chain to get the unit DIE to which this DIE
-/// belongs.
-const DIE *DIE::getUnit() const {
-  const DIE *Cu = getUnitOrNull();
-  assert(Cu && "We should not have orphaned DIEs.");
-  return Cu;
+unsigned DIE::getDebugSectionOffset() const {
+  const DIEUnit *Unit = getUnit();
+  assert(Unit && "DIE must be owned by a DIEUnit to get its absolute offset");
+  return Unit->getDebugSectionOffset() + getOffset();
 }
 
-/// Climb up the parent chain to get the unit DIE this DIE belongs
-/// to. Return NULL if DIE is not added to an owner yet.
-const DIE *DIE::getUnitOrNull() const {
+const DIE *DIE::getUnitDie() const {
   const DIE *p = this;
   while (p) {
     if (p->getTag() == dwarf::DW_TAG_compile_unit ||
@@ -136,6 +181,13 @@ const DIE *DIE::getUnitOrNull() const {
   return nullptr;
 }
 
+const DIEUnit *DIE::getUnit() const {
+  const DIE *UnitDie = getUnitDie();
+  if (UnitDie)
+    return UnitDie->Owner.dyn_cast<DIEUnit*>();
+  return nullptr;
+}
+
 DIEValue DIE::findAttribute(dwarf::Attribute Attribute) const {
   // Iterate through all the attributes until we find the one we're
   // looking for, if we can't find it return NULL.
@@ -191,6 +243,55 @@ void DIE::dump() {
   print(dbgs());
 }
 
+unsigned DIE::computeOffsetsAndAbbrevs(const AsmPrinter *AP,
+                                       DIEAbbrevSet &AbbrevSet,
+                                       unsigned CUOffset) {
+  // Unique the abbreviation and fill in the abbreviation number so this DIE
+  // can be emitted.
+  const DIEAbbrev &Abbrev = AbbrevSet.uniqueAbbreviation(*this);
+
+  // Set compile/type unit relative offset of this DIE.
+  setOffset(CUOffset);
+
+  // Add the byte size of the abbreviation code.
+  CUOffset += getULEB128Size(getAbbrevNumber());
+
+  // Add the byte size of all the DIE attribute values.
+  for (const auto &V : values())
+    CUOffset += V.SizeOf(AP);
+
+  // Let the children compute their offsets and abbreviation numbers.
+  if (hasChildren()) {
+    (void)Abbrev;
+    assert(Abbrev.hasChildren() && "Children flag not set");
+
+    for (auto &Child : children())
+      CUOffset = Child.computeOffsetsAndAbbrevs(AP, AbbrevSet, CUOffset);
+
+    // Each child chain is terminated with a zero byte, adjust the offset.
+    CUOffset += sizeof(int8_t);
+  }
+
+  // Compute the byte size of this DIE and all of its children correctly. This
+  // is needed so that top level DIE can help the compile unit set its length
+  // correctly.
+  setSize(CUOffset - getOffset());
+  return CUOffset;
+}
+
+//===----------------------------------------------------------------------===//
+// DIEUnit Implementation
+//===----------------------------------------------------------------------===//
+DIEUnit::DIEUnit(uint16_t V, uint8_t A, dwarf::Tag UnitTag)
+    : Die(UnitTag), Section(nullptr), Offset(0), Length(0), Version(V),
+      AddrSize(A)
+{
+  Die.Owner = this;
+  assert((UnitTag == dwarf::DW_TAG_compile_unit ||
+          UnitTag == dwarf::DW_TAG_type_unit ||
+          UnitTag == dwarf::DW_TAG_partial_unit) && "expected a unit TAG");
+}
+
 void DIEValue::EmitValue(const AsmPrinter *AP) const {
   switch (Ty) {
   case isNone:
@@ -240,38 +341,65 @@ void DIEValue::dump() const {
 /// EmitValue - Emit integer of appropriate size.
 ///
 void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
-  unsigned Size = ~0U;
   switch (Form) {
   case dwarf::DW_FORM_flag_present:
     // Emit something to keep the lines and comments in sync.
     // FIXME: Is there a better way to do this?
     Asm->OutStreamer->AddBlankLine();
     return;
-  case dwarf::DW_FORM_flag:  // Fall thru
-  case dwarf::DW_FORM_ref1:  // Fall thru
-  case dwarf::DW_FORM_data1: Size = 1; break;
-  case dwarf::DW_FORM_ref2:  // Fall thru
-  case dwarf::DW_FORM_data2: Size = 2; break;
-  case dwarf::DW_FORM_sec_offset: // Fall thru
-  case dwarf::DW_FORM_strp: // Fall thru
-  case dwarf::DW_FORM_ref4:  // Fall thru
-  case dwarf::DW_FORM_data4: Size = 4; break;
-  case dwarf::DW_FORM_ref8:  // Fall thru
-  case dwarf::DW_FORM_ref_sig8:  // Fall thru
-  case dwarf::DW_FORM_data8: Size = 8; break;
-  case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return;
-  case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return;
+  case dwarf::DW_FORM_flag:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref1:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data1:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref2:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data2:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref4:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data4:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref8:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sig8:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_data8:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_ref_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_strp_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_line_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_sec_offset:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp_sup:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sup:
+    LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_addr:
-    Size = Asm->getPointerSize();
-    break;
+    LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_ref_addr:
-    Size = SizeOf(Asm, dwarf::DW_FORM_ref_addr);
-    break;
+    Asm->OutStreamer->EmitIntValue(Integer, SizeOf(Asm, Form));
+    return;
+  case dwarf::DW_FORM_GNU_str_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_addr_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_udata:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_udata:
+    Asm->EmitULEB128(Integer);
+    return;
+  case dwarf::DW_FORM_sdata:
+    Asm->EmitSLEB128(Integer);
+    return;
   default: llvm_unreachable("DIE Value form not supported yet");
   }
-  Asm->OutStreamer->EmitIntValue(Integer, Size);
 }
 
 /// SizeOf - Determine size of integer value in bytes.
@@ -279,28 +407,52 @@ void DIEInteger::EmitValue(const AsmPrinter *Asm, dwarf::Form Form) const {
 unsigned DIEInteger::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
   switch (Form) {
   case dwarf::DW_FORM_flag_present: return 0;
-  case dwarf::DW_FORM_flag:  // Fall thru
-  case dwarf::DW_FORM_ref1:  // Fall thru
+  case dwarf::DW_FORM_flag:  LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref1:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data1: return sizeof(int8_t);
-  case dwarf::DW_FORM_ref2:  // Fall thru
+  case dwarf::DW_FORM_ref2:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data2: return sizeof(int16_t);
-  case dwarf::DW_FORM_sec_offset: // Fall thru
-  case dwarf::DW_FORM_strp: // Fall thru
-  case dwarf::DW_FORM_ref4:  // Fall thru
+  case dwarf::DW_FORM_ref4:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data4: return sizeof(int32_t);
-  case dwarf::DW_FORM_ref8:  // Fall thru
-  case dwarf::DW_FORM_ref_sig8:  // Fall thru
+  case dwarf::DW_FORM_ref8:  LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sig8:  LLVM_FALLTHROUGH;
   case dwarf::DW_FORM_data8: return sizeof(int64_t);
-  case dwarf::DW_FORM_GNU_str_index: return getULEB128Size(Integer);
-  case dwarf::DW_FORM_GNU_addr_index: return getULEB128Size(Integer);
-  case dwarf::DW_FORM_udata: return getULEB128Size(Integer);
-  case dwarf::DW_FORM_sdata: return getSLEB128Size(Integer);
-  case dwarf::DW_FORM_addr:
-    return AP->getPointerSize();
   case dwarf::DW_FORM_ref_addr:
-    if (AP->OutStreamer->getContext().getDwarfVersion() == 2)
+    if (AP->getDwarfVersion() == 2)
       return AP->getPointerSize();
-    return sizeof(int32_t);
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_ref_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_strp_alt:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_line_strp:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_sec_offset:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_strp_sup:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_sup:
+    switch (AP->OutStreamer->getContext().getDwarfFormat()) {
+    case dwarf::DWARF32:
+      return 4;
+    case dwarf::DWARF64:
+      return 8;
+    }
+    llvm_unreachable("Invalid DWARF format");
+  case dwarf::DW_FORM_GNU_str_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_GNU_addr_index:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_ref_udata:
+    LLVM_FALLTHROUGH;
+  case dwarf::DW_FORM_udata:
+    return getULEB128Size(Integer);
+  case dwarf::DW_FORM_sdata:
+    return getSLEB128Size(Integer);
+  case dwarf::DW_FORM_addr:
+    return AP->getPointerSize();
   default: llvm_unreachable("DIE Value form not supported yet");
   }
 }
@@ -343,7 +495,8 @@ void DIELabel::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
   AP->EmitLabelReference(Label, SizeOf(AP, Form),
                          Form == dwarf::DW_FORM_strp ||
                              Form == dwarf::DW_FORM_sec_offset ||
-                             Form == dwarf::DW_FORM_ref_addr);
+                             Form == dwarf::DW_FORM_ref_addr ||
+                             Form == dwarf::DW_FORM_data4);
 }
 
 /// SizeOf - Determine size of label value in bytes.
@@ -435,6 +588,29 @@ void DIEString::print(raw_ostream &O) const {
 }
 
 //===----------------------------------------------------------------------===//
+// DIEInlineString Implementation
+//===----------------------------------------------------------------------===//
+void DIEInlineString::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+  if (Form == dwarf::DW_FORM_string) {
+    for (char ch : S)
+      AP->EmitInt8(ch);
+    AP->EmitInt8(0);
+    return;
+  }
+  llvm_unreachable("Expected valid string form");
+}
+
+unsigned DIEInlineString::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  // Emit string bytes + NULL byte.
+  return S.size() + 1;
+}
+
+LLVM_DUMP_METHOD
+void DIEInlineString::print(raw_ostream &O) const {
+  O << "InlineString: " << S;
+}
+
+//===----------------------------------------------------------------------===//
 // DIEEntry Implementation
 //===----------------------------------------------------------------------===//
 
@@ -442,35 +618,69 @@ void DIEString::print(raw_ostream &O) const {
 ///
 void DIEEntry::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
 
-  if (Form == dwarf::DW_FORM_ref_addr) {
-    const DwarfDebug *DD = AP->getDwarfDebug();
-    unsigned Addr = Entry->getOffset();
-    assert(!DD->useSplitDwarf() && "TODO: dwo files can't have relocations.");
-    // For DW_FORM_ref_addr, output the offset from beginning of debug info
-    // section. Entry->getOffset() returns the offset from start of the
-    // compile unit.
-    DwarfCompileUnit *CU = DD->lookupUnit(Entry->getUnit());
-    assert(CU && "CUDie should belong to a CU.");
-    Addr += CU->getDebugInfoOffset();
-    if (AP->MAI->doesDwarfUseRelocationsAcrossSections())
-      AP->EmitLabelPlusOffset(CU->getSectionSym(), Addr,
-                              DIEEntry::getRefAddrSize(AP));
-    else
-      AP->OutStreamer->EmitIntValue(Addr, DIEEntry::getRefAddrSize(AP));
-  } else
-    AP->EmitInt32(Entry->getOffset());
-}
-
-unsigned DIEEntry::getRefAddrSize(const AsmPrinter *AP) {
-  // DWARF4: References that use the attribute form DW_FORM_ref_addr are
-  // specified to be four bytes in the DWARF 32-bit format and eight bytes
-  // in the DWARF 64-bit format, while DWARF Version 2 specifies that such
-  // references have the same size as an address on the target system.
-  const DwarfDebug *DD = AP->getDwarfDebug();
-  assert(DD && "Expected Dwarf Debug info to be available");
-  if (DD->getDwarfVersion() == 2)
-    return AP->getPointerSize();
-  return sizeof(int32_t);
+  switch (Form) {
+  case dwarf::DW_FORM_ref1:
+  case dwarf::DW_FORM_ref2:
+  case dwarf::DW_FORM_ref4:
+  case dwarf::DW_FORM_ref8:
+    AP->OutStreamer->EmitIntValue(Entry->getOffset(), SizeOf(AP, Form));
+    return;
+
+  case dwarf::DW_FORM_ref_udata:
+    AP->EmitULEB128(Entry->getOffset());
+    return;
+
+  case dwarf::DW_FORM_ref_addr: {
+    // Get the absolute offset for this DIE within the debug info/types section.
+    unsigned Addr = Entry->getDebugSectionOffset();
+    if (AP->MAI->doesDwarfUseRelocationsAcrossSections()) {
+      const DwarfDebug *DD = AP->getDwarfDebug();
+      if (DD)
+        assert(!DD->useSplitDwarf() &&
+               "TODO: dwo files can't have relocations.");
+      const DIEUnit *Unit = Entry->getUnit();
+      assert(Unit && "CUDie should belong to a CU.");
+      MCSection *Section = Unit->getSection();
+      if (Section) {
+        const MCSymbol *SectionSym = Section->getBeginSymbol();
+        AP->EmitLabelPlusOffset(SectionSym, Addr, SizeOf(AP, Form), true);
+        return;
+      }
+    }
+    AP->OutStreamer->EmitIntValue(Addr, SizeOf(AP, Form));
+    return;
+  }
+  default:
+    llvm_unreachable("Improper form for DIE reference");
+  }
+}
+
+unsigned DIEEntry::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+  switch (Form) {
+  case dwarf::DW_FORM_ref1:
+    return 1;
+  case dwarf::DW_FORM_ref2:
+    return 2;
+  case dwarf::DW_FORM_ref4:
+    return 4;
+  case dwarf::DW_FORM_ref8:
+    return 8;
+  case dwarf::DW_FORM_ref_udata:
+    return getULEB128Size(Entry->getOffset());
+  case dwarf::DW_FORM_ref_addr:
+    if (AP->getDwarfVersion() == 2)
+      return AP->getPointerSize();
+    switch (AP->OutStreamer->getContext().getDwarfFormat()) {
+    case dwarf::DWARF32:
+      return 4;
+    case dwarf::DWARF64:
+      return 8;
+    }
+    llvm_unreachable("Invalid DWARF format");
+
+  default:
+    llvm_unreachable("Improper form for DIE reference");
+  }
 }
 
 LLVM_DUMP_METHOD
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
index 74c47d151c62..d8ecc7ccfb9b 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -330,6 +330,12 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
     addULEB128(dwarf::DW_FORM_string);
     addString(Value.getDIEString().getString());
     break;
+  case DIEValue::isInlineString:
+    addULEB128('A');
+    addULEB128(Attribute);
+    addULEB128(dwarf::DW_FORM_string);
+    addString(Value.getDIEInlineString().getString());
+    break;
   case DIEValue::isBlock:
   case DIEValue::isLoc:
   case DIEValue::isLocList:
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index adc536f1add8..22fd7bb46056 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -31,7 +31,7 @@ static unsigned isDescribedByReg(const MachineInstr &MI) {
   assert(MI.isDebugValue());
   assert(MI.getNumOperands() == 4);
   // If location of variable is described using a register (directly or
-  // indirecltly), this register is always a first operand.
+  // indirectly), this register is always a first operand.
   return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
 }
 
@@ -83,7 +83,7 @@ static void dropRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
   const auto &I = RegVars.find(RegNo);
   assert(RegNo != 0U && I != RegVars.end());
   auto &VarSet = I->second;
-  const auto &VarPos = std::find(VarSet.begin(), VarSet.end(), Var);
+  const auto &VarPos = find(VarSet, Var);
   assert(VarPos != VarSet.end());
   VarSet.erase(VarPos);
   // Don't keep empty sets in a map to keep it as small as possible.
@@ -96,7 +96,7 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
                                InlinedVariable Var) {
   assert(RegNo != 0U);
   auto &VarSet = RegVars[RegNo];
-  assert(std::find(VarSet.begin(), VarSet.end(), Var) == VarSet.end());
+  assert(!is_contained(VarSet, Var));
   VarSet.push_back(Var);
 }
 
@@ -134,8 +134,8 @@ static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
   // as the return instruction.
   DebugLoc LastLoc = LastMI->getDebugLoc();
   auto Res = LastMI;
-  for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)),
-       E = MBB.rend();
+  for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(),
+                                                 E = MBB.rend();
        I != E; ++I) {
     if (I->getDebugLoc() != LastLoc)
       return &*Res;
@@ -164,7 +164,9 @@ static void collectChangingRegs(const MachineFunction *MF,
       // Look for register defs and register masks. Register masks are
       // typically on calls and they clobber everything not in the mask.
       for (const MachineOperand &MO : MI.operands()) {
-        if (MO.isReg() && MO.isDef() && MO.getReg()) {
+        // Skip virtual registers since they are handled by the parent.
+        if (MO.isReg() && MO.isDef() && MO.getReg() &&
+            !TRI->isVirtualRegister(MO.getReg())) {
           for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
                ++AI)
             Regs.set(*AI);
@@ -192,12 +194,18 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
         // some variables.
         for (const MachineOperand &MO : MI.operands()) {
           if (MO.isReg() && MO.isDef() && MO.getReg()) {
+            // If this is a virtual register, only clobber it since it doesn't
+            // have aliases.
+            if (TRI->isVirtualRegister(MO.getReg()))
+              clobberRegisterUses(RegVars, MO.getReg(), Result, MI);
             // If this is a register def operand, it may end a debug value
             // range.
-            for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
-                 ++AI)
-              if (ChangingRegs.test(*AI))
-                clobberRegisterUses(RegVars, *AI, Result, MI);
+            else {
+              for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+                   ++AI)
+                if (ChangingRegs.test(*AI))
+                  clobberRegisterUses(RegVars, *AI, Result, MI);
+            }
           } else if (MO.isRegMask()) {
             // If this is a register mask operand, clobber all debug values in
             // non-CSRs.
@@ -238,7 +246,8 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
     if (!MBB.empty() && &MBB != &MF->back()) {
       for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
         auto CurElem = I++; // CurElem can be erased below.
-        if (ChangingRegs.test(CurElem->first))
+        if (TRI->isVirtualRegister(CurElem->first) ||
+            ChangingRegs.test(CurElem->first))
           clobberRegisterUses(RegVars, CurElem, Result, MBB.back());
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 16ffe2e15acd..94190981e88e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/MC/MCStreamer.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
@@ -62,14 +63,14 @@ MCSymbol *DebugHandlerBase::getLabelAfterInsn(const MachineInstr *MI) {
   return LabelsAfterInsn.lookup(MI);
 }
 
-// Determine the relative position of the pieces described by P1 and P2.
-// Returns  -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
-// 1 if P1 is entirely after P2.
-int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
-  unsigned l1 = P1->getBitPieceOffset();
-  unsigned l2 = P2->getBitPieceOffset();
-  unsigned r1 = l1 + P1->getBitPieceSize();
-  unsigned r2 = l2 + P2->getBitPieceSize();
+int DebugHandlerBase::fragmentCmp(const DIExpression *P1,
+                                  const DIExpression *P2) {
+  auto Fragment1 = *P1->getFragmentInfo();
+  auto Fragment2 = *P2->getFragmentInfo();
+  unsigned l1 = Fragment1.OffsetInBits;
+  unsigned l2 = Fragment2.OffsetInBits;
+  unsigned r1 = l1 + Fragment1.SizeInBits;
+  unsigned r2 = l2 + Fragment2.SizeInBits;
   if (r1 <= l2)
     return -1;
   else if (r2 <= l1)
@@ -78,11 +79,11 @@ int DebugHandlerBase::pieceCmp(const DIExpression *P1, const DIExpression *P2) {
     return 0;
 }
 
-/// Determine whether two variable pieces overlap.
-bool DebugHandlerBase::piecesOverlap(const DIExpression *P1, const DIExpression *P2) {
-  if (!P1->isBitPiece() || !P2->isBitPiece())
+bool DebugHandlerBase::fragmentsOverlap(const DIExpression *P1,
+                                        const DIExpression *P2) {
+  if (!P1->isFragment() || !P2->isFragment())
     return true;
-  return pieceCmp(P1, P2) == 0;
+  return fragmentCmp(P1, P2) == 0;
 }
 
 /// If this type is derived from a base type then return base type size.
@@ -97,7 +98,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
 
   if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef &&
       Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type &&
-      Tag != dwarf::DW_TAG_restrict_type)
+      Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
     return DDTy->getSizeInBits();
 
   DIType *BaseType = DDTy->getBaseType().resolve();
@@ -141,14 +142,15 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
     if (DIVar->isParameter() &&
         getDISubprogram(DIVar->getScope())->describes(MF->getFunction())) {
       LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
-      if (Ranges.front().first->getDebugExpression()->isBitPiece()) {
-        // Mark all non-overlapping initial pieces.
+      if (Ranges.front().first->getDebugExpression()->isFragment()) {
+        // Mark all non-overlapping initial fragments.
         for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
-          const DIExpression *Piece = I->first->getDebugExpression();
+          const DIExpression *Fragment = I->first->getDebugExpression();
           if (std::all_of(Ranges.begin(), I,
                           [&](DbgValueHistoryMap::InstrRange Pred) {
-                return !piecesOverlap(Piece, Pred.first->getDebugExpression());
-              }))
+                            return !fragmentsOverlap(
+                                Fragment, Pred.first->getDebugExpression());
+                          }))
             LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
           else
             break;
@@ -200,8 +202,10 @@ void DebugHandlerBase::endInstruction() {
   assert(CurMI != nullptr);
   // Don't create a new label after DBG_VALUE instructions.
   // They don't generate code.
-  if (!CurMI->isDebugValue())
+  if (!CurMI->isDebugValue()) {
     PrevLabel = nullptr;
+    PrevInstBB = CurMI->getParent();
+  }
 
   DenseMap<const MachineInstr *, MCSymbol *>::iterator I =
       LabelsAfterInsn.find(CurMI);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
index b8bbcec133fd..c00fa189d94a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.h
@@ -38,10 +38,12 @@ protected:
   MachineModuleInfo *MMI;
 
   /// Previous instruction's location information. This is used to
-  /// determine label location to indicate scope boundries in dwarf
-  /// debug info.
+  /// determine label location to indicate scope boundaries in debug info.
+  /// We track the previous instruction's source location (if not line 0),
+  /// whether it was a label, and its parent BB.
   DebugLoc PrevInstLoc;
   MCSymbol *PrevLabel = nullptr;
+  const MachineBasicBlock *PrevInstBB = nullptr;
 
   /// This location indicates end of function prologue and beginning of
   /// function body.
@@ -92,13 +94,13 @@ public:
   /// Return Label immediately following the instruction.
   MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
 
-  /// Determine the relative position of the pieces described by P1 and P2.
-  /// Returns  -1 if P1 is entirely before P2, 0 if P1 and P2 overlap,
-  /// 1 if P1 is entirely after P2.
-  static int pieceCmp(const DIExpression *P1, const DIExpression *P2);
+  /// Determine the relative position of the fragments described by P1 and P2.
+  /// Returns -1 if P1 is entirely before P2, 0 if P1 and P2 overlap, 1 if P1 is
+  /// entirely after P2.
+  static int fragmentCmp(const DIExpression *P1, const DIExpression *P2);
 
-  /// Determine whether two variable pieces overlap.
-  static bool piecesOverlap(const DIExpression *P1, const DIExpression *P2);
+  /// Determine whether two variable fragments overlap.
+  static bool fragmentsOverlap(const DIExpression *P1, const DIExpression *P2);
 
   /// If this type is derived from a base type then return base type size.
   static uint64_t getBaseTypeSize(const DITypeRef TyRef);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index 20acd45e5720..36fb1507ddc6 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -72,7 +72,7 @@ public:
     const ConstantFP *getConstantFP() const { return Constant.CFP; }
     const ConstantInt *getConstantInt() const { return Constant.CIP; }
     MachineLocation getLoc() const { return Loc; }
-    bool isBitPiece() const { return getExpression()->isBitPiece(); }
+    bool isFragment() const { return getExpression()->isFragment(); }
     const DIExpression *getExpression() const { return Expression; }
     friend bool operator==(const Value &, const Value &);
     friend bool operator<(const Value &, const Value &);
@@ -128,8 +128,8 @@ public:
   void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
     Values.append(Vals.begin(), Vals.end());
     sortUniqueValues();
-    assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value V){
-          return V.isBitPiece();
+    assert(all_of(Values, [](DebugLocEntry::Value V) {
+          return V.isFragment();
         }) && "value must be a piece");
   }
 
@@ -172,11 +172,11 @@ inline bool operator==(const DebugLocEntry::Value &A,
   llvm_unreachable("unhandled EntryKind");
 }
 
-/// \brief Compare two pieces based on their offset.
+/// Compare two fragments based on their offset.
 inline bool operator<(const DebugLocEntry::Value &A,
                       const DebugLocEntry::Value &B) {
-  return A.getExpression()->getBitPieceOffset() <
-         B.getExpression()->getBitPieceOffset();
+  return A.getExpression()->getFragmentInfo()->OffsetInBits <
+         B.getExpression()->getFragmentInfo()->OffsetInBits;
 }
 
 }
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
index 4ad3e1867328..9c324ea26ac8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp
@@ -221,9 +221,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) {
       Asm->EmitInt32((*HI)->Data.Values.size());
       for (HashDataContents *HD : (*HI)->Data.Values) {
         // Emit the DIE offset
-        DwarfCompileUnit *CU = D->lookupUnit(HD->Die->getUnit());
-        assert(CU && "Accelerated DIE should belong to a CU.");
-        Asm->EmitInt32(HD->Die->getOffset() + CU->getDebugInfoOffset());
+        Asm->EmitInt32(HD->Die->getDebugSectionOffset());
         // If we have multiple Atoms emit that info too.
         // FIXME: A bit of a hack, we either emit only one atom or all info.
         if (HeaderData.Atoms.size() > 1) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
index 4d81441f6a72..05ac1cb02f76 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h
@@ -126,8 +126,7 @@ public:
     uint16_t type; // enum AtomType
     uint16_t form; // DWARF DW_FORM_ defines
 
-    LLVM_CONSTEXPR Atom(uint16_t type, uint16_t form)
-        : type(type), form(form) {}
+    constexpr Atom(uint16_t type, uint16_t form) : type(type), form(form) {}
 #ifndef NDEBUG
     void print(raw_ostream &O) {
       O << "Type: " << dwarf::AtomTypeString(type) << "\n"
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 2eae1b234473..ef30e279aed2 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -39,16 +39,16 @@
 using namespace llvm;
 
 DwarfCFIExceptionBase::DwarfCFIExceptionBase(AsmPrinter *A)
-    : EHStreamer(A), shouldEmitCFI(false) {}
+    : EHStreamer(A), shouldEmitCFI(false), hasEmittedCFISections(false) {}
 
 void DwarfCFIExceptionBase::markFunctionEnd() {
   endFragment();
 
-  if (MMI->getLandingPads().empty())
-    return;
-
   // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads();
+  if (!Asm->MF->getLandingPads().empty()) {
+    MachineFunction *NonConstMF = const_cast<MachineFunction*>(Asm->MF);
+    NonConstMF->tidyLandingPads();
+  }
 }
 
 void DwarfCFIExceptionBase::endFragment() {
@@ -59,7 +59,7 @@ void DwarfCFIExceptionBase::endFragment() {
 DwarfCFIException::DwarfCFIException(AsmPrinter *A)
     : DwarfCFIExceptionBase(A), shouldEmitPersonality(false),
       forceEmitPersonality(false), shouldEmitLSDA(false),
-      shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {}
+      shouldEmitMoves(false) {}
 
 DwarfCFIException::~DwarfCFIException() {}
 
@@ -70,9 +70,6 @@ void DwarfCFIException::endModule() {
   if (!Asm->MAI->usesCFIForEH())
     return;
 
-  if (moveTypeModule == AsmPrinter::CFI_M_Debug)
-    Asm->OutStreamer->EmitCFISections(false, true);
-
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
 
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
@@ -98,14 +95,10 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) {
   const Function *F = MF->getFunction();
 
   // If any landing pads survive, we need an EH table.
-  bool hasLandingPads = !MMI->getLandingPads().empty();
+  bool hasLandingPads = !MF->getLandingPads().empty();
 
   // See if we need frame move info.
   AsmPrinter::CFIMoveType MoveType = Asm->needsCFIMoves();
-  if (MoveType == AsmPrinter::CFI_M_EH ||
-      (MoveType == AsmPrinter::CFI_M_Debug &&
-       moveTypeModule == AsmPrinter::CFI_M_None))
-    moveTypeModule = MoveType;
 
   shouldEmitMoves = MoveType != AsmPrinter::CFI_M_None;
 
@@ -143,6 +136,12 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
   if (!shouldEmitCFI)
     return;
 
+  if (!hasEmittedCFISections) {
+    if (Asm->needsCFIMoves() == AsmPrinter::CFI_M_Debug)
+      Asm->OutStreamer->EmitCFISections(false, true);
+    hasEmittedCFISections = true;
+  }
+
   Asm->OutStreamer->EmitCFIStartProc(/*IsSimple=*/false);
 
   // Indicate personality routine, if any.
@@ -160,8 +159,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const MCSymbol *Sym =
-      TLOF.getCFIPersonalitySymbol(P, *Asm->Mang, Asm->TM, MMI);
+  const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(P, Asm->TM, MMI);
   Asm->OutStreamer->EmitCFIPersonality(Sym, PerEncoding);
 
   // Provide LSDA information.
@@ -171,7 +169,7 @@ void DwarfCFIException::beginFragment(const MachineBasicBlock *MBB,
 
 /// endFunction - Gather and emit post-function exception information.
 ///
-void DwarfCFIException::endFunction(const MachineFunction *) {
+void DwarfCFIException::endFunction(const MachineFunction *MF) {
   if (!shouldEmitPersonality)
     return;
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 7822814c7a0f..0db623bbc29a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -73,36 +73,8 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(StringRef FileName,
       Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID());
 }
 
-// Return const expression if value is a GEP to access merged global
-// constant. e.g.
-// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
-static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
-  const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
-  if (!CE || CE->getNumOperands() != 3 ||
-      CE->getOpcode() != Instruction::GetElementPtr)
-    return nullptr;
-
-  // First operand points to a global struct.
-  Value *Ptr = CE->getOperand(0);
-  GlobalValue *GV = dyn_cast<GlobalValue>(Ptr);
-  if (!GV || !isa<StructType>(GV->getValueType()))
-    return nullptr;
-
-  // Second operand is zero.
-  const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
-  if (!CI || !CI->isZero())
-    return nullptr;
-
-  // Third operand is offset.
-  if (!isa<ConstantInt>(CE->getOperand(2)))
-    return nullptr;
-
-  return CE;
-}
-
-/// getOrCreateGlobalVariableDIE - get or create global variable DIE.
 DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
-    const DIGlobalVariable *GV) {
+    const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
   // Check for pre-existence.
   if (DIE *Die = getDIE(GV))
     return Die;
@@ -126,6 +98,10 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
     // We need the declaration DIE that is in the static member's class.
     DIE *VariableSpecDIE = getOrCreateStaticMemberDIE(SDMDecl);
     addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
+    // If the global variable's type is different from the one in the class
+    // member type, assume that it's more specific and also emit it.
+    if (GTy != DD->resolve(SDMDecl->getBaseType()))
+      addType(*VariableDIE, GTy);
   } else {
     DeclContext = GV->getScope();
     // Add name and type.
@@ -145,73 +121,82 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
   else
     addGlobalName(GV->getName(), *VariableDIE, DeclContext);
 
+  if (uint32_t AlignInBytes = GV->getAlignInBytes())
+    addUInt(*VariableDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+            AlignInBytes);
+
   // Add location.
   bool addToAccelTable = false;
-  if (auto *Global = dyn_cast_or_null<GlobalVariable>(GV->getVariable())) {
-    // We cannot describe the location of dllimport'd variables: the computation
-    // of their address requires loads from the IAT.
-    if (!Global->hasDLLImportStorageClass()) {
+  DIELoc *Loc = nullptr;
+  std::unique_ptr<DIEDwarfExpression> DwarfExpr;
+  bool AllConstant = std::all_of(
+      GlobalExprs.begin(), GlobalExprs.end(),
+      [&](const GlobalExpr GE) {
+        return GE.Expr && GE.Expr->isConstant();
+      });
+
+  for (const auto &GE : GlobalExprs) {
+    const GlobalVariable *Global = GE.Var;
+    const DIExpression *Expr = GE.Expr;
+    // For compatibility with DWARF 3 and earlier,
+    // DW_AT_location(DW_OP_constu, X, DW_OP_stack_value) becomes
+    // DW_AT_const_value(X).
+    if (GlobalExprs.size() == 1 && Expr && Expr->isConstant()) {
+      addConstantValue(*VariableDIE, /*Unsigned=*/true, Expr->getElement(1));
+      // We cannot describe the location of dllimport'd variables: the
+      // computation of their address requires loads from the IAT.
+    } else if ((Global && !Global->hasDLLImportStorageClass()) || AllConstant) {
+      if (!Loc) {
+        Loc = new (DIEValueAllocator) DIELoc;
+        DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+      }
       addToAccelTable = true;
-      DIELoc *Loc = new (DIEValueAllocator) DIELoc;
-      const MCSymbol *Sym = Asm->getSymbol(Global);
-      if (Global->isThreadLocal()) {
-        if (Asm->TM.Options.EmulatedTLS) {
-          // TODO: add debug info for emulated thread local mode.
-        } else {
-          // FIXME: Make this work with -gsplit-dwarf.
-          unsigned PointerSize = Asm->getDataLayout().getPointerSize();
-          assert((PointerSize == 4 || PointerSize == 8) &&
-                 "Add support for other sizes if necessary");
-          // Based on GCC's support for TLS:
-          if (!DD->useSplitDwarf()) {
-            // 1) Start with a constNu of the appropriate pointer size
-            addUInt(*Loc, dwarf::DW_FORM_data1, PointerSize == 4
-                                                    ? dwarf::DW_OP_const4u
-                                                    : dwarf::DW_OP_const8u);
-            // 2) containing the (relocated) offset of the TLS variable
-            //    within the module's TLS block.
-            addExpr(*Loc, dwarf::DW_FORM_udata,
-                    Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+      if (Global) {
+        const MCSymbol *Sym = Asm->getSymbol(Global);
+        if (Global->isThreadLocal()) {
+          if (Asm->TM.Options.EmulatedTLS) {
+            // TODO: add debug info for emulated thread local mode.
           } else {
-            addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
-            addUInt(*Loc, dwarf::DW_FORM_udata,
-                    DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+            // FIXME: Make this work with -gsplit-dwarf.
+            unsigned PointerSize = Asm->getDataLayout().getPointerSize();
+            assert((PointerSize == 4 || PointerSize == 8) &&
+                   "Add support for other sizes if necessary");
+            // Based on GCC's support for TLS:
+            if (!DD->useSplitDwarf()) {
+              // 1) Start with a constNu of the appropriate pointer size
+              addUInt(*Loc, dwarf::DW_FORM_data1,
+                      PointerSize == 4 ? dwarf::DW_OP_const4u
+                                       : dwarf::DW_OP_const8u);
+              // 2) containing the (relocated) offset of the TLS variable
+              //    within the module's TLS block.
+              addExpr(*Loc, dwarf::DW_FORM_udata,
+                      Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
+            } else {
+              addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);
+              addUInt(*Loc, dwarf::DW_FORM_udata,
+                      DD->getAddressPool().getIndex(Sym, /* TLS */ true));
+            }
+            // 3) followed by an OP to make the debugger do a TLS lookup.
+            addUInt(*Loc, dwarf::DW_FORM_data1,
+                    DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
+                                          : dwarf::DW_OP_form_tls_address);
           }
-          // 3) followed by an OP to make the debugger do a TLS lookup.
-          addUInt(*Loc, dwarf::DW_FORM_data1,
-                  DD->useGNUTLSOpcode() ? dwarf::DW_OP_GNU_push_tls_address
-                                        : dwarf::DW_OP_form_tls_address);
+        } else {
+          DD->addArangeLabel(SymbolCU(this, Sym));
+          addOpAddress(*Loc, Sym);
         }
-      } else {
-        DD->addArangeLabel(SymbolCU(this, Sym));
-        addOpAddress(*Loc, Sym);
       }
-
-      addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
-      if (DD->useAllLinkageNames())
-        addLinkageName(*VariableDIE, GV->getLinkageName());
-    }
-  } else if (const ConstantInt *CI =
-                 dyn_cast_or_null<ConstantInt>(GV->getVariable())) {
-    addConstantValue(*VariableDIE, CI, GTy);
-  } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getVariable())) {
-    auto *Ptr = cast<GlobalValue>(CE->getOperand(0));
-    if (!Ptr->hasDLLImportStorageClass()) {
-      addToAccelTable = true;
-      // GV is a merged global.
-      DIELoc *Loc = new (DIEValueAllocator) DIELoc;
-      MCSymbol *Sym = Asm->getSymbol(Ptr);
-      DD->addArangeLabel(SymbolCU(this, Sym));
-      addOpAddress(*Loc, Sym);
-      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
-      SmallVector<Value *, 3> Idx(CE->op_begin() + 1, CE->op_end());
-      addUInt(*Loc, dwarf::DW_FORM_udata,
-              Asm->getDataLayout().getIndexedOffsetInType(Ptr->getValueType(),
-                                                          Idx));
-      addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
-      addBlock(*VariableDIE, dwarf::DW_AT_location, Loc);
+      if (Expr) {
+        DwarfExpr->addFragmentOffset(Expr);
+        DwarfExpr->AddExpression(Expr);
+      }
     }
   }
+  if (Loc)
+    addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
+
+  if (DD->useAllLinkageNames())
+    addLinkageName(*VariableDIE, GV->getLinkageName());
 
   if (addToAccelTable) {
     DD->addAccelName(GV->getName(), *VariableDIE);
@@ -265,7 +250,7 @@ void DwarfCompileUnit::initStmtList() {
   // is not okay to use line_table_start here.
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   StmtListValue =
-      addSectionLabel(UnitDie, dwarf::DW_AT_stmt_list, LineTableStartSym,
+      addSectionLabel(getUnitDie(), dwarf::DW_AT_stmt_list, LineTableStartSym,
                       TLOF.getDwarfLineSection()->getBeginSymbol());
 }
 
@@ -450,7 +435,7 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
   addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
           getOrCreateSourceID(IA->getFilename(), IA->getDirectory()));
   addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
-  if (IA->getDiscriminator())
+  if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
     addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
             IA->getDiscriminator());
 
@@ -521,9 +506,10 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
         DIELoc *Loc = new (DIEValueAllocator) DIELoc;
         DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
         // If there is an expression, emit raw unsigned bytes.
+        DwarfExpr.addFragmentOffset(Expr);
         DwarfExpr.AddUnsignedConstant(DVInsn->getOperand(0).getImm());
-        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
-        addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+        DwarfExpr.AddExpression(Expr);
+        addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
       } else
         addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
     } else if (DVInsn->getOperand(0).isFPImm())
@@ -547,12 +533,13 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
     int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
     assert(Expr != DV.getExpression().end() && "Wrong number of expressions");
+    DwarfExpr.addFragmentOffset(*Expr);
     DwarfExpr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
                                     FrameReg, Offset);
-    DwarfExpr.AddExpression((*Expr)->expr_op_begin(), (*Expr)->expr_op_end());
+    DwarfExpr.AddExpression(*Expr);
     ++Expr;
   }
-  addBlock(*VariableDie, dwarf::DW_AT_location, Loc);
+  addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
 
   return VariableDie;
 }
@@ -585,25 +572,22 @@ DIE *DwarfCompileUnit::createScopeChildrenDIE(LexicalScope *Scope,
   return ObjectPointer;
 }
 
-void DwarfCompileUnit::constructSubprogramScopeDIE(LexicalScope *Scope) {
-  assert(Scope && Scope->getScopeNode());
-  assert(!Scope->getInlinedAt());
-  assert(!Scope->isAbstractScope());
-  auto *Sub = cast<DISubprogram>(Scope->getScopeNode());
-
-  DD->getProcessedSPNodes().insert(Sub);
-
+void DwarfCompileUnit::constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope) {
   DIE &ScopeDIE = updateSubprogramScopeDIE(Sub);
 
+  if (Scope) {
+    assert(!Scope->getInlinedAt());
+    assert(!Scope->isAbstractScope());
+    // Collect lexical scope children first.
+    // ObjectPointer might be a local (non-argument) local variable if it's a
+    // block's synthetic this pointer.
+    if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
+      addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
+  }
+
   // If this is a variadic function, add an unspecified parameter.
   DITypeRefArray FnArgs = Sub->getType()->getTypeArray();
 
-  // Collect lexical scope children first.
-  // ObjectPointer might be a local (non-argument) local variable if it's a
-  // block's synthetic this pointer.
-  if (DIE *ObjectPointer = createAndAddScopeChildren(Scope, ScopeDIE))
-    addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, *ObjectPointer);
-
   // If we have a single element of null, it is a function that returns void.
   // If we have more than one elements and the last one is null, it is a
   // variadic function.
@@ -674,7 +658,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
   else if (auto *T = dyn_cast<DIType>(Entity))
     EntityDie = getOrCreateTypeDIE(T);
   else if (auto *GV = dyn_cast<DIGlobalVariable>(Entity))
-    EntityDie = getOrCreateGlobalVariableDIE(GV);
+    EntityDie = getOrCreateGlobalVariableDIE(GV, {});
   else
     EntityDie = getDIE(Entity);
   assert(EntityDie);
@@ -695,11 +679,7 @@ void DwarfCompileUnit::finishSubprogramDefinition(const DISubprogram *SP) {
       // If this subprogram has an abstract definition, reference that
       addDIEEntry(*D, dwarf::DW_AT_abstract_origin, *AbsSPDIE);
   } else {
-    if (!D && !includeMinimalInlineScopes())
-      // Lazily construct the subprogram if we didn't see either concrete or
-      // inlined versions during codegen. (except in -gmlt ^ where we want
-      // to omit these entirely)
-      D = getOrCreateSubprogramDIE(SP);
+    assert(D || includeMinimalInlineScopes());
     if (D)
       // And attach the attributes
       applySubprogramAttributesToDefinition(SP, *D);
@@ -750,18 +730,22 @@ void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
 void DwarfCompileUnit::addAddress(DIE &Die, dwarf::Attribute Attribute,
                                   const MachineLocation &Location) {
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+  DIEDwarfExpression Expr(*Asm, *this, *Loc);
 
   bool validReg;
   if (Location.isReg())
-    validReg = addRegisterOpPiece(*Loc, Location.getReg());
+    validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                  Location.getReg());
   else
-    validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+    validReg =
+        Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                   Location.getReg(), Location.getOffset());
 
   if (!validReg)
     return;
 
   // Now attach the location information to the DIE.
-  addBlock(Die, Attribute, Loc);
+  addBlock(Die, Attribute, Expr.finalize());
 }
 
 /// Start with the address based on the location provided, and generate the
@@ -774,19 +758,22 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
   const DIExpression *Expr = DV.getSingleExpression();
-  bool ValidReg;
+  DIExpressionCursor ExprCursor(Expr);
   const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
-  if (Location.getOffset()) {
-    ValidReg = DwarfExpr.AddMachineRegIndirect(TRI, Location.getReg(),
-                                               Location.getOffset());
-    if (ValidReg)
-      DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end());
-  } else
-    ValidReg = DwarfExpr.AddMachineRegExpression(TRI, Expr, Location.getReg());
+  auto Reg = Location.getReg();
+  DwarfExpr.addFragmentOffset(Expr);
+  bool ValidReg =
+      Location.getOffset()
+          ? DwarfExpr.AddMachineRegIndirect(TRI, Reg, Location.getOffset())
+          : DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Reg);
+
+  if (!ValidReg)
+    return;
+
+  DwarfExpr.AddExpression(std::move(ExprCursor));
 
   // Now attach the location information to the DIE.
-  if (ValidReg)
-    addBlock(Die, Attribute, Loc);
+  addBlock(Die, Attribute, Loc);
 }
 
 /// Add a Dwarf loclistptr attribute data and value.
@@ -802,7 +789,13 @@ void DwarfCompileUnit::applyVariableAttributes(const DbgVariable &Var,
   StringRef Name = Var.getName();
   if (!Name.empty())
     addString(VariableDie, dwarf::DW_AT_name, Name);
-  addSourceLine(VariableDie, Var.getVariable());
+  const auto *DIVar = Var.getVariable();
+  if (DIVar)
+    if (uint32_t AlignInBytes = DIVar->getAlignInBytes())
+      addUInt(VariableDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+              AlignInBytes);
+
+  addSourceLine(VariableDie, DIVar);
   addType(VariableDie, Var.getType());
   if (Var.isArtificial())
     addFlag(VariableDie, dwarf::DW_AT_artificial);
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 90f74a3686ea..a8025f1d1521 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -32,9 +32,6 @@ class DwarfCompileUnit : public DwarfUnit {
   /// A numeric ID unique among all CUs in the module
   unsigned UniqueID;
 
-  /// Offset of the UnitDie from beginning of debug info section.
-  unsigned DebugInfoOffset = 0;
-
   /// The attribute index of DW_AT_stmt_list in the compile unit DIE, avoiding
   /// the need to search for it in applyStmtList.
   DIE::value_iterator StmtListValue;
@@ -84,8 +81,6 @@ public:
                    DwarfDebug *DW, DwarfFile *DWU);
 
   unsigned getUniqueID() const { return UniqueID; }
-  unsigned getDebugInfoOffset() const { return DebugInfoOffset; }
-  void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; }
 
   DwarfCompileUnit *getSkeleton() const {
     return Skeleton;
@@ -96,8 +91,16 @@ public:
   /// Apply the DW_AT_stmt_list from this compile unit to the specified DIE.
   void applyStmtList(DIE &D);
 
-  /// getOrCreateGlobalVariableDIE - get or create global variable DIE.
-  DIE *getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV);
+  /// A pair of GlobalVariable and DIExpression.
+  struct GlobalExpr {
+    const GlobalVariable *Var;
+    const DIExpression *Expr;
+  };
+
+  /// Get or create global variable DIE.
+  DIE *
+  getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
+                               ArrayRef<GlobalExpr> GlobalExprs);
 
   /// addLabelAddress - Add a dwarf label attribute data and value using
   /// either DW_FORM_addr or DW_FORM_GNU_addr_index.
@@ -176,7 +179,7 @@ public:
                               unsigned *ChildScopeCount = nullptr);
 
   /// \brief Construct a DIE for this subprogram scope.
-  void constructSubprogramScopeDIE(LexicalScope *Scope);
+  void constructSubprogramScopeDIE(const DISubprogram *Sub, LexicalScope *Scope);
 
   DIE *createAndAddScopeChildren(LexicalScope *Scope, DIE &ScopeDIE);
 
@@ -190,20 +193,15 @@ public:
   /// Set the skeleton unit associated with this unit.
   void setSkeleton(DwarfCompileUnit &Skel) { Skeleton = &Skel; }
 
-  const MCSymbol *getSectionSym() const {
-    assert(Section);
-    return Section->getBeginSymbol();
-  }
-
   unsigned getLength() {
     return sizeof(uint32_t) + // Length field
-        getHeaderSize() + UnitDie.getSize();
+        getHeaderSize() + getUnitDie().getSize();
   }
 
   void emitHeader(bool UseOffsets) override;
 
   MCSymbol *getLabelBegin() const {
-    assert(Section);
+    assert(getSection());
     return LabelBegin;
   }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 7fba7688f7fb..b465b98f368a 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -62,11 +62,6 @@ static cl::opt<bool>
 DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
                          cl::desc("Disable debug info printing"));
 
-static cl::opt<bool> UnknownLocations(
-    "use-unknown-locations", cl::Hidden,
-    cl::desc("Make an absence of debug location information explicit."),
-    cl::init(false));
-
 static cl::opt<bool>
 GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden,
                        cl::desc("Generate GNU-style pubnames and pubtypes"),
@@ -81,12 +76,19 @@ namespace {
 enum DefaultOnOff { Default, Enable, Disable };
 }
 
+static cl::opt<DefaultOnOff> UnknownLocations(
+    "use-unknown-locations", cl::Hidden,
+    cl::desc("Make an absence of debug location information explicit."),
+    cl::values(clEnumVal(Default, "At top of block or after label"),
+               clEnumVal(Enable, "In all cases"), clEnumVal(Disable, "Never")),
+    cl::init(Default));
+
 static cl::opt<DefaultOnOff>
 DwarfAccelTables("dwarf-accel-tables", cl::Hidden,
                  cl::desc("Output prototype dwarf accelerator tables."),
                  cl::values(clEnumVal(Default, "Default for platform"),
                             clEnumVal(Enable, "Enabled"),
-                            clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                            clEnumVal(Disable, "Disabled")),
                  cl::init(Default));
 
 static cl::opt<DefaultOnOff>
@@ -94,7 +96,7 @@ SplitDwarf("split-dwarf", cl::Hidden,
            cl::desc("Output DWARF5 split debug info."),
            cl::values(clEnumVal(Default, "Default for platform"),
                       clEnumVal(Enable, "Enabled"),
-                      clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                      clEnumVal(Disable, "Disabled")),
            cl::init(Default));
 
 static cl::opt<DefaultOnOff>
@@ -102,7 +104,7 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden,
                  cl::desc("Generate DWARF pubnames and pubtypes sections"),
                  cl::values(clEnumVal(Default, "Default for platform"),
                             clEnumVal(Enable, "Enabled"),
-                            clEnumVal(Disable, "Disabled"), clEnumValEnd),
+                            clEnumVal(Disable, "Disabled")),
                  cl::init(Default));
 
 enum LinkageNameOption {
@@ -117,12 +119,13 @@ static cl::opt<LinkageNameOption>
                                             "Default for platform"),
                                  clEnumValN(AllLinkageNames, "All", "All"),
                                  clEnumValN(AbstractLinkageNames, "Abstract",
-                                            "Abstract subprograms"),
-                                 clEnumValEnd),
+                                            "Abstract subprograms")),
                       cl::init(DefaultLinkageNames));
 
-static const char *const DWARFGroupName = "DWARF Emission";
-static const char *const DbgTimerName = "DWARF Debug Writer";
+static const char *const DWARFGroupName = "dwarf";
+static const char *const DWARFGroupDescription = "DWARF Emission";
+static const char *const DbgTimerName = "writer";
+static const char *const DbgTimerDescription = "DWARF Debug Writer";
 
 void DebugLocDwarfExpression::EmitOp(uint8_t Op, const char *Comment) {
   BS.EmitInt8(
@@ -196,7 +199,7 @@ const DIType *DbgVariable::getType() const {
   return Ty;
 }
 
-static LLVM_CONSTEXPR DwarfAccelTable::Atom TypeAtoms[] = {
+static const DwarfAccelTable::Atom TypeAtoms[] = {
     DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
     DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
     DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
@@ -205,7 +208,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     : DebugHandlerBase(A), DebugLocs(A->OutStreamer->isVerboseAsm()),
       InfoHolder(A, "info_string", DIEValueAllocator),
       SkeletonHolder(A, "skel_string", DIEValueAllocator),
-      IsDarwin(Triple(A->getTargetTriple()).isOSDarwin()),
+      IsDarwin(A->TM.getTargetTriple().isOSDarwin()),
       AccelNames(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
                                        dwarf::DW_FORM_data4)),
       AccelObjC(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset,
@@ -215,7 +218,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
       AccelTypes(TypeAtoms), DebuggerTuning(DebuggerKind::Default) {
 
   CurFn = nullptr;
-  Triple TT(Asm->getTargetTriple());
+  const Triple &TT = Asm->TM.getTargetTriple();
 
   // Make sure we know our "debugger tuning."  The target option takes
   // precedence; fall back to triple-based defaults.
@@ -255,7 +258,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
     UseAllLinkageNames = DwarfLinkageNames == AllLinkageNames;
 
   unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion;
-  DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
+  unsigned DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber
                                     : MMI->getModule()->getDwarfVersion();
   // Use dwarf 4 by default if nothing is requested.
   DwarfVersion = DwarfVersion ? DwarfVersion : dwarf::DWARF_VERSION;
@@ -349,10 +352,11 @@ bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) {
   return !getLabelAfterInsn(Ranges.front().second);
 }
 
-template <typename Func> void forBothCUs(DwarfCompileUnit &CU, Func F) {
+template <typename Func> static void forBothCUs(DwarfCompileUnit &CU, Func F) {
   F(CU);
   if (auto *SkelCU = CU.getSkeleton())
-    F(*SkelCU);
+    if (CU.getCUNode()->getSplitDebugInlining())
+      F(*SkelCU);
 }
 
 void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
@@ -360,13 +364,13 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(LexicalScope *Scope) {
   assert(Scope->isAbstractScope());
   assert(!Scope->getInlinedAt());
 
-  const MDNode *SP = Scope->getScopeNode();
+  auto *SP = cast<DISubprogram>(Scope->getScopeNode());
 
   ProcessedSPNodes.insert(SP);
 
   // Find the subprogram's DwarfCompileUnit in the SPMap in case the subprogram
   // was inlined from another compile unit.
-  auto &CU = *CUMap.lookup(cast<DISubprogram>(SP)->getUnit());
+  auto &CU = *CUMap.lookup(SP->getUnit());
   forBothCUs(CU, [&](DwarfCompileUnit &CU) {
     CU.constructAbstractSubprogramScopeDIE(Scope);
   });
@@ -435,9 +439,9 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
   }
 
   if (useSplitDwarf())
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
+    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoDWOSection());
   else
-    NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+    NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
   if (DIUnit->getDWOId()) {
     // This CU is either a clang module DWO or a skeleton CU.
@@ -449,8 +453,8 @@ DwarfDebug::constructDwarfCompileUnit(const DICompileUnit *DIUnit) {
                       DIUnit->getSplitDebugFilename());
   }
 
-  CUMap.insert(std::make_pair(DIUnit, &NewCU));
-  CUDieMap.insert(std::make_pair(&Die, &NewCU));
+  CUMap.insert({DIUnit, &NewCU});
+  CUDieMap.insert({&Die, &NewCU});
   return NewCU;
 }
 
@@ -460,11 +464,34 @@ void DwarfDebug::constructAndAddImportedEntityDIE(DwarfCompileUnit &TheCU,
     D->addChild(TheCU.constructImportedEntityDIE(N));
 }
 
+/// Sort and unique GVEs by comparing their fragment offset.
+static SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &
+sortGlobalExprs(SmallVectorImpl<DwarfCompileUnit::GlobalExpr> &GVEs) {
+  std::sort(GVEs.begin(), GVEs.end(),
+            [](DwarfCompileUnit::GlobalExpr A, DwarfCompileUnit::GlobalExpr B) {
+              if (A.Expr != B.Expr && A.Expr && B.Expr) {
+		auto FragmentA = A.Expr->getFragmentInfo();
+		auto FragmentB = B.Expr->getFragmentInfo();
+		if (FragmentA && FragmentB)
+		  return FragmentA->OffsetInBits < FragmentB->OffsetInBits;
+	      }
+              return false;
+            });
+  GVEs.erase(std::unique(GVEs.begin(), GVEs.end(),
+                         [](DwarfCompileUnit::GlobalExpr A,
+                            DwarfCompileUnit::GlobalExpr B) {
+                           return A.Expr == B.Expr;
+                         }),
+             GVEs.end());
+  return GVEs;
+}
+
 // Emit all Dwarf sections that should come prior to the content. Create
 // global DIEs and emit initial debug info sections. This is invoked by
 // the target AsmPrinter.
 void DwarfDebug::beginModule() {
-  NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T(DbgTimerName, DbgTimerDescription, DWARFGroupName,
+                     DWARFGroupDescription, TimePassesIsEnabled);
   if (DisableDebugInfoPrinting)
     return;
 
@@ -475,13 +502,30 @@ void DwarfDebug::beginModule() {
   // Tell MMI whether we have debug info.
   MMI->setDebugInfoAvailability(NumDebugCUs > 0);
   SingleCU = NumDebugCUs == 1;
+  DenseMap<DIGlobalVariable *, SmallVector<DwarfCompileUnit::GlobalExpr, 1>>
+      GVMap;
+  for (const GlobalVariable &Global : M->globals()) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVs;
+    Global.getDebugInfo(GVs);
+    for (auto *GVE : GVs)
+      GVMap[GVE->getVariable()].push_back({&Global, GVE->getExpression()});
+  }
 
   for (DICompileUnit *CUNode : M->debug_compile_units()) {
     DwarfCompileUnit &CU = constructDwarfCompileUnit(CUNode);
     for (auto *IE : CUNode->getImportedEntities())
       CU.addImportedEntity(IE);
-    for (auto *GV : CUNode->getGlobalVariables())
-      CU.getOrCreateGlobalVariableDIE(GV);
+
+    // Global Variables.
+    for (auto *GVE : CUNode->getGlobalVariables())
+      GVMap[GVE->getVariable()].push_back({nullptr, GVE->getExpression()});
+    DenseSet<DIGlobalVariable *> Processed;
+    for (auto *GVE : CUNode->getGlobalVariables()) {
+      DIGlobalVariable *GV = GVE->getVariable();
+      if (Processed.insert(GV).second)
+        CU.getOrCreateGlobalVariableDIE(GV, sortGlobalExprs(GVMap[GV]));
+    }
+
     for (auto *Ty : CUNode->getEnumTypes()) {
       // The enum types array by design contains pointers to
       // MDNodes rather than DIRefs. Unique them here.
@@ -509,7 +553,7 @@ void DwarfDebug::finishVariableDefinitions() {
     // FIXME: Consider the time-space tradeoff of just storing the unit pointer
     // in the ConcreteVariables list, rather than looking it up again here.
     // DIE::getUnit isn't simple - it walks parent pointers, etc.
-    DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit());
+    DwarfCompileUnit *Unit = CUDieMap.lookup(VariableDie->getUnitDie());
     assert(Unit);
     DbgVariable *AbsVar = getExistingAbstractVariable(
         InlinedVariable(Var->getVariable(), Var->getInlinedAt()));
@@ -522,13 +566,11 @@ void DwarfDebug::finishVariableDefinitions() {
 }
 
 void DwarfDebug::finishSubprogramDefinitions() {
-  for (auto &F : MMI->getModule()->functions())
-    if (auto *SP = F.getSubprogram())
-      if (ProcessedSPNodes.count(SP) &&
-          SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
-        forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
-          CU.finishSubprogramDefinition(SP);
-        });
+  for (const DISubprogram *SP : ProcessedSPNodes)
+    if (SP->getUnit()->getEmissionKind() != DICompileUnit::NoDebug)
+      forBothCUs(*CUMap.lookup(SP->getUnit()), [&](DwarfCompileUnit &CU) {
+        CU.finishSubprogramDefinition(SP);
+      });
 }
 
 void DwarfDebug::finalizeModuleInfo() {
@@ -715,10 +757,10 @@ void DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(
     createAbstractVariable(Cleansed, Scope);
 }
 
-// Collect variable information from side table maintained by MMI.
-void DwarfDebug::collectVariableInfoFromMMITable(
+// Collect variable information from side table maintained by MF.
+void DwarfDebug::collectVariableInfoFromMFTable(
     DenseSet<InlinedVariable> &Processed) {
-  for (const auto &VI : MMI->getVariableDbgInfo()) {
+  for (const auto &VI : Asm->MF->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     assert(VI.Var->isValidLocationForIntrinsic(VI.Loc) &&
@@ -765,7 +807,7 @@ static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
   llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
 }
 
-/// \brief If this and Next are describing different pieces of the same
+/// \brief If this and Next are describing different fragments of the same
 /// variable, merge them by appending Next's values to the current
 /// list of values.
 /// Return true if the merge was successful.
@@ -773,15 +815,15 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
   if (Begin == Next.Begin) {
     auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
     auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
-    if (!FirstExpr->isBitPiece() || !FirstNextExpr->isBitPiece())
+    if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
       return false;
 
-    // We can only merge entries if none of the pieces overlap any others.
+    // We can only merge entries if none of the fragments overlap any others.
     // In doing so, we can take advantage of the fact that both lists are
     // sorted.
     for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
       for (; j < Next.Values.size(); ++j) {
-        int res = DebugHandlerBase::pieceCmp(
+        int res = DebugHandlerBase::fragmentCmp(
             cast<DIExpression>(Values[i].Expression),
             cast<DIExpression>(Next.Values[j].Expression));
         if (res == 0) // The two expressions overlap, we can't merge.
@@ -804,27 +846,27 @@ bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
 
 /// Build the location list for all DBG_VALUEs in the function that
 /// describe the same variable.  If the ranges of several independent
-/// pieces of the same variable overlap partially, split them up and
+/// fragments of the same variable overlap partially, split them up and
 /// combine the ranges. The resulting DebugLocEntries are will have
 /// strict monotonically increasing begin addresses and will never
 /// overlap.
 //
 // Input:
 //
-//   Ranges History [var, loc, piece ofs size]
-// 0 |      [x, (reg0, piece 0, 32)]
-// 1 | |    [x, (reg1, piece 32, 32)] <- IsPieceOfPrevEntry
+//   Ranges History [var, loc, fragment ofs size]
+// 0 |      [x, (reg0, fragment 0, 32)]
+// 1 | |    [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
 // 2 | |    ...
 // 3   |    [clobber reg0]
-// 4        [x, (mem, piece 0, 64)] <- overlapping with both previous pieces of
+// 4        [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
 //                                     x.
 //
 // Output:
 //
-// [0-1]    [x, (reg0, piece  0, 32)]
-// [1-3]    [x, (reg0, piece  0, 32), (reg1, piece 32, 32)]
-// [3-4]    [x, (reg1, piece 32, 32)]
-// [4- ]    [x, (mem,  piece  0, 64)]
+// [0-1]    [x, (reg0, fragment  0, 32)]
+// [1-3]    [x, (reg0, fragment  0, 32), (reg1, fragment 32, 32)]
+// [3-4]    [x, (reg1, fragment 32, 32)]
+// [4- ]    [x, (mem,  fragment  0, 64)]
 void
 DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
                               const DbgValueHistoryMap::InstrRanges &Ranges) {
@@ -842,11 +884,10 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
       continue;
     }
 
-    // If this piece overlaps with any open ranges, truncate them.
+    // If this fragment overlaps with any open ranges, truncate them.
     const DIExpression *DIExpr = Begin->getDebugExpression();
-    auto Last = std::remove_if(OpenRanges.begin(), OpenRanges.end(),
-                               [&](DebugLocEntry::Value R) {
-      return piecesOverlap(DIExpr, R.getExpression());
+    auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
+      return fragmentsOverlap(DIExpr, R.getExpression());
     });
     OpenRanges.erase(Last, OpenRanges.end());
 
@@ -868,12 +909,12 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
     DebugLocEntry Loc(StartLabel, EndLabel, Value);
     bool couldMerge = false;
 
-    // If this is a piece, it may belong to the current DebugLocEntry.
-    if (DIExpr->isBitPiece()) {
+    // If this is a fragment, it may belong to the current DebugLocEntry.
+    if (DIExpr->isFragment()) {
       // Add this value to the list of open ranges.
       OpenRanges.push_back(Value);
 
-      // Attempt to add the piece to the last entry.
+      // Attempt to add the fragment to the last entry.
       if (!DebugLoc.empty())
         if (DebugLoc.back().MergeValues(Loc))
           couldMerge = true;
@@ -881,7 +922,7 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
 
     if (!couldMerge) {
       // Need to add a new DebugLocEntry. Add all values from still
-      // valid non-overlapping pieces.
+      // valid non-overlapping fragments.
       if (OpenRanges.size())
         Loc.addValues(OpenRanges);
 
@@ -929,7 +970,7 @@ void DwarfDebug::collectVariableInfo(DwarfCompileUnit &TheCU,
                                      const DISubprogram *SP,
                                      DenseSet<InlinedVariable> &Processed) {
   // Grab the variable info that was squirreled away in the MMI side-table.
-  collectVariableInfoFromMMITable(Processed);
+  collectVariableInfoFromMFTable(Processed);
 
   for (const auto &I : DbgValues) {
     InlinedVariable IV = I.first;
@@ -996,30 +1037,82 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
   DebugHandlerBase::beginInstruction(MI);
   assert(CurMI);
 
-  // Check if source location changes, but ignore DBG_VALUE locations.
-  if (!MI->isDebugValue()) {
-    const DebugLoc &DL = MI->getDebugLoc();
-    if (DL != PrevInstLoc) {
-      if (DL) {
-        unsigned Flags = 0;
-        PrevInstLoc = DL;
-        if (DL == PrologEndLoc) {
-          Flags |= DWARF2_FLAG_PROLOGUE_END;
-          PrologEndLoc = DebugLoc();
-          Flags |= DWARF2_FLAG_IS_STMT;
-        }
-        if (DL.getLine() !=
-            Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine())
-          Flags |= DWARF2_FLAG_IS_STMT;
-
-        const MDNode *Scope = DL.getScope();
-        recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
-      } else if (UnknownLocations) {
-        PrevInstLoc = DL;
-        recordSourceLine(0, 0, nullptr, 0);
+  // Check if source location changes, but ignore DBG_VALUE and CFI locations.
+  if (MI->isDebugValue() || MI->isCFIInstruction())
+    return;
+  const DebugLoc &DL = MI->getDebugLoc();
+  // When we emit a line-0 record, we don't update PrevInstLoc; so look at
+  // the last line number actually emitted, to see if it was line 0.
+  unsigned LastAsmLine =
+      Asm->OutStreamer->getContext().getCurrentDwarfLoc().getLine();
+
+  if (DL == PrevInstLoc) {
+    // If we have an ongoing unspecified location, nothing to do here.
+    if (!DL)
+      return;
+    // We have an explicit location, same as the previous location.
+    // But we might be coming back to it after a line 0 record.
+    if (LastAsmLine == 0 && DL.getLine() != 0) {
+      // Reinstate the source location but not marked as a statement.
+      const MDNode *Scope = DL.getScope();
+      recordSourceLine(DL.getLine(), DL.getCol(), Scope, /*Flags=*/0);
+    }
+    return;
+  }
+
+  if (!DL) {
+    // We have an unspecified location, which might want to be line 0.
+    // If we have already emitted a line-0 record, don't repeat it.
+    if (LastAsmLine == 0)
+      return;
+    // If user said Don't Do That, don't do that.
+    if (UnknownLocations == Disable)
+      return;
+    // See if we have a reason to emit a line-0 record now.
+    // Reasons to emit a line-0 record include:
+    // - User asked for it (UnknownLocations).
+    // - Instruction has a label, so it's referenced from somewhere else,
+    //   possibly debug information; we want it to have a source location.
+    // - Instruction is at the top of a block; we don't want to inherit the
+    //   location from the physically previous (maybe unrelated) block.
+    if (UnknownLocations == Enable || PrevLabel ||
+        (PrevInstBB && PrevInstBB != MI->getParent())) {
+      // Preserve the file and column numbers, if we can, to save space in
+      // the encoded line table.
+      // Do not update PrevInstLoc, it remembers the last non-0 line.
+      const MDNode *Scope = nullptr;
+      unsigned Column = 0;
+      if (PrevInstLoc) {
+        Scope = PrevInstLoc.getScope();
+        Column = PrevInstLoc.getCol();
       }
+      recordSourceLine(/*Line=*/0, Column, Scope, /*Flags=*/0);
     }
+    return;
   }
+
+  // We have an explicit location, different from the previous location.
+  // Don't repeat a line-0 record, but otherwise emit the new location.
+  // (The new location might be an explicit line 0, which we do emit.)
+  if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
+    return;
+  unsigned Flags = 0;
+  if (DL == PrologEndLoc) {
+    Flags |= DWARF2_FLAG_PROLOGUE_END | DWARF2_FLAG_IS_STMT;
+    PrologEndLoc = DebugLoc();
+  }
+  // If the line changed, we call that a new statement; unless we went to
+  // line 0 and came back, in which case it is not a new statement.
+  unsigned OldLine = PrevInstLoc ? PrevInstLoc.getLine() : LastAsmLine;
+  if (DL.getLine() && DL.getLine() != OldLine)
+    Flags |= DWARF2_FLAG_IS_STMT;
+
+  const MDNode *Scope = DL.getScope();
+  recordSourceLine(DL.getLine(), DL.getCol(), Scope, Flags);
+
+  // If we're not at line 0, remember this location.
+  if (DL.getLine())
+    PrevInstLoc = DL;
 }
 
 static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
@@ -1093,18 +1186,14 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
       "endFunction should be called with the same function as beginFunction");
 
   const DISubprogram *SP = MF->getFunction()->getSubprogram();
-  if (!MMI->hasDebugInfo() || LScopes.empty() || !SP ||
+  if (!MMI->hasDebugInfo() || !SP ||
       SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug) {
-    // If we don't have a lexical scope for this function then there will
-    // be a hole in the range information. Keep note of this by setting the
-    // previously used section to nullptr.
+    // If we don't have a subprogram for this function then there will be a hole
+    // in the range information. Keep note of this by setting the previously
+    // used section to nullptr.
     PrevCU = nullptr;
     CurFn = nullptr;
     DebugHandlerBase::endFunction(MF);
-    // Mark functions with no debug info on any instructions, but a
-    // valid DISubprogram as processed.
-    if (SP)
-      ProcessedSPNodes.insert(SP);
     return;
   }
 
@@ -1112,7 +1201,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
   Asm->OutStreamer->getContext().setDwarfCompileUnitID(0);
 
   LexicalScope *FnScope = LScopes.getCurrentFunctionScope();
-  SP = cast<DISubprogram>(FnScope->getScopeNode());
+  assert(!FnScope || SP == FnScope->getScopeNode());
   DwarfCompileUnit &TheCU = *CUMap.lookup(SP->getUnit());
 
   DenseSet<InlinedVariable> ProcessedVars;
@@ -1154,10 +1243,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
     constructAbstractSubprogramScopeDIE(AScope);
   }
 
-  TheCU.constructSubprogramScopeDIE(FnScope);
+  ProcessedSPNodes.insert(SP);
+  TheCU.constructSubprogramScopeDIE(SP, FnScope);
   if (auto *SkelCU = TheCU.getSkeleton())
-    if (!LScopes.getAbstractScopesList().empty())
-      SkelCU->constructSubprogramScopeDIE(FnScope);
+    if (!LScopes.getAbstractScopesList().empty() &&
+        TheCU.getCUNode()->getSplitDebugInlining())
+      SkelCU->constructSubprogramScopeDIE(SP, FnScope);
 
   // Clear debug info
   // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the
@@ -1181,7 +1272,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
     Fn = Scope->getFilename();
     Dir = Scope->getDirectory();
     if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
-      Discriminator = LBF->getDiscriminator();
+      if (getDwarfVersion() >= 4)
+        Discriminator = LBF->getDiscriminator();
 
     unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
     Src = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
@@ -1396,9 +1488,9 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
 static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
                               ByteStreamer &Streamer,
                               const DebugLocEntry::Value &Value,
-                              unsigned PieceOffsetInBits) {
-  DebugLocDwarfExpression DwarfExpr(AP.getDwarfDebug()->getDwarfVersion(),
-                                    Streamer);
+                              DwarfExpression &DwarfExpr) {
+  DIExpressionCursor ExprCursor(Value.getExpression());
+  DwarfExpr.addFragmentOffset(Value.getExpression());
   // Regular entry.
   if (Value.isInt()) {
     if (BT && (BT->getEncoding() == dwarf::DW_ATE_signed ||
@@ -1408,25 +1500,16 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
       DwarfExpr.AddUnsignedConstant(Value.getInt());
   } else if (Value.isLocation()) {
     MachineLocation Loc = Value.getLoc();
-    const DIExpression *Expr = Value.getExpression();
-    if (!Expr || !Expr->getNumElements())
-      // Regular entry.
-      AP.EmitDwarfRegOp(Streamer, Loc);
-    else {
-      // Complex address entry.
-      const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
-      if (Loc.getOffset()) {
-        DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
-        DwarfExpr.AddExpression(Expr->expr_op_begin(), Expr->expr_op_end(),
-                                PieceOffsetInBits);
-      } else
-        DwarfExpr.AddMachineRegExpression(TRI, Expr, Loc.getReg(),
-                                          PieceOffsetInBits);
-    }
+    const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
+    if (Loc.getOffset())
+      DwarfExpr.AddMachineRegIndirect(TRI, Loc.getReg(), Loc.getOffset());
+    else
+      DwarfExpr.AddMachineRegExpression(TRI, ExprCursor, Loc.getReg());
   } else if (Value.isConstantFP()) {
     APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
     DwarfExpr.AddUnsignedConstant(RawBytes);
   }
+  DwarfExpr.AddExpression(std::move(ExprCursor));
 }
 
 void DebugLocEntry::finalize(const AsmPrinter &AP,
@@ -1434,36 +1517,24 @@ void DebugLocEntry::finalize(const AsmPrinter &AP,
                              const DIBasicType *BT) {
   DebugLocStream::EntryBuilder Entry(List, Begin, End);
   BufferByteStreamer Streamer = Entry.getStreamer();
+  DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
   const DebugLocEntry::Value &Value = Values[0];
-  if (Value.isBitPiece()) {
-    // Emit all pieces that belong to the same variable and range.
-    assert(std::all_of(Values.begin(), Values.end(), [](DebugLocEntry::Value P) {
-          return P.isBitPiece();
-        }) && "all values are expected to be pieces");
+  if (Value.isFragment()) {
+    // Emit all fragments that belong to the same variable and range.
+    assert(all_of(Values, [](DebugLocEntry::Value P) {
+          return P.isFragment();
+        }) && "all values are expected to be fragments");
     assert(std::is_sorted(Values.begin(), Values.end()) &&
-           "pieces are expected to be sorted");
-   
-    unsigned Offset = 0;
-    for (auto Piece : Values) {
-      const DIExpression *Expr = Piece.getExpression();
-      unsigned PieceOffset = Expr->getBitPieceOffset();
-      unsigned PieceSize = Expr->getBitPieceSize();
-      assert(Offset <= PieceOffset && "overlapping or duplicate pieces");
-      if (Offset < PieceOffset) {
-        // The DWARF spec seriously mandates pieces with no locations for gaps.
-        DebugLocDwarfExpression Expr(AP.getDwarfDebug()->getDwarfVersion(),
-                                     Streamer);
-        Expr.AddOpPiece(PieceOffset-Offset, 0);
-        Offset += PieceOffset-Offset;
-      }
-      Offset += PieceSize;
+           "fragments are expected to be sorted");
+
+    for (auto Fragment : Values)
+      emitDebugLocValue(AP, BT, Streamer, Fragment, DwarfExpr);
 
-      emitDebugLocValue(AP, BT, Streamer, Piece, PieceOffset);
-    }
   } else {
-    assert(Values.size() == 1 && "only pieces may have >1 value");
-    emitDebugLocValue(AP, BT, Streamer, Value, 0);
+    assert(Values.size() == 1 && "only fragments may have >1 value");
+    emitDebugLocValue(AP, BT, Streamer, Value, DwarfExpr);
   }
+  DwarfExpr.finalize();
 }
 
 void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
@@ -1514,14 +1585,14 @@ void DwarfDebug::emitDebugLocDWO() {
       // rather than two. We could get fancier and try to, say, reuse an
       // address we know we've emitted elsewhere (the start of the function?
       // The start of the CU or CU subrange that encloses this range?)
-      Asm->EmitInt8(dwarf::DW_LLE_start_length_entry);
+      Asm->EmitInt8(dwarf::DW_LLE_startx_length);
       unsigned idx = AddrPool.getIndex(Entry.BeginSym);
       Asm->EmitULEB128(idx);
       Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
 
       emitDebugLocEntryLocation(Entry);
     }
-    Asm->EmitInt8(dwarf::DW_LLE_end_of_list_entry);
+    Asm->EmitInt8(dwarf::DW_LLE_end_of_list);
   }
 }
 
@@ -1807,7 +1878,7 @@ DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
   auto OwnedUnit = make_unique<DwarfCompileUnit>(
       CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
   DwarfCompileUnit &NewCU = *OwnedUnit;
-  NewCU.initSection(Asm->getObjFileLowering().getDwarfInfoSection());
+  NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
 
   NewCU.initStmtList();
 
@@ -1889,8 +1960,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
                                               getDwoLineTable(CU));
   DwarfTypeUnit &NewTU = *OwnedUnit;
   DIE &UnitDie = NewTU.getUnitDie();
-  TypeUnitsUnderConstruction.push_back(
-      std::make_pair(std::move(OwnedUnit), CTy));
+  TypeUnitsUnderConstruction.emplace_back(std::move(OwnedUnit), CTy);
 
   NewTU.addUInt(UnitDie, dwarf::DW_AT_language, dwarf::DW_FORM_data2,
                 CU.getLanguage());
@@ -1900,11 +1970,10 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
   Ins.first->second = Signature;
 
   if (useSplitDwarf())
-    NewTU.initSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
+    NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesDWOSection());
   else {
     CU.applyStmtList(UnitDie);
-    NewTU.initSection(
-        Asm->getObjFileLowering().getDwarfTypesSection(Signature));
+    NewTU.setSection(Asm->getObjFileLowering().getDwarfTypesSection(Signature));
   }
 
   NewTU.setType(NewTU.createTypeDIE(CTy));
@@ -1968,3 +2037,7 @@ void DwarfDebug::addAccelType(StringRef Name, const DIE &Die, char Flags) {
     return;
   AccelTypes.AddName(InfoHolder.getStringPool().getEntry(*Asm, Name), &Die);
 }
+
+uint16_t DwarfDebug::getDwarfVersion() const {
+  return Asm->OutStreamer->getContext().getDwarfVersion();
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 6b06757628b6..e5bf33db81fb 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -22,6 +22,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/CodeGen/DIE.h"
@@ -134,8 +135,8 @@ public:
 
     Expr.append(V.Expr.begin(), V.Expr.end());
     FrameIndex.append(V.FrameIndex.begin(), V.FrameIndex.end());
-    assert(std::all_of(Expr.begin(), Expr.end(), [](const DIExpression *E) {
-             return E && E->isBitPiece();
+    assert(all_of(Expr, [](const DIExpression *E) {
+             return E && E->isFragment();
            }) && "conflicting locations for variable");
   }
 
@@ -216,7 +217,9 @@ class DwarfDebug : public DebugHandlerBase {
 
   /// This is a collection of subprogram MDNodes that are processed to
   /// create DIEs.
-  SmallPtrSet<const MDNode *, 16> ProcessedSPNodes;
+  SetVector<const DISubprogram *, SmallVector<const DISubprogram *, 16>,
+            SmallPtrSet<const DISubprogram *, 16>>
+      ProcessedSPNodes;
 
   /// If nonnull, stores the current machine function we're processing.
   const MachineFunction *CurFn;
@@ -254,9 +257,6 @@ class DwarfDebug : public DebugHandlerBase {
   /// Whether to emit all linkage names, or just abstract subprograms.
   bool UseAllLinkageNames;
 
-  /// Version of dwarf we're emitting.
-  unsigned DwarfVersion;
-
   /// DWARF5 Experimental Options
   /// @{
   bool HasDwarfAccelTables;
@@ -443,9 +443,8 @@ class DwarfDebug : public DebugHandlerBase {
   void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
                          const DbgValueHistoryMap::InstrRanges &Ranges);
 
-  /// Collect variable information from the side table maintained
-  /// by MMI.
-  void collectVariableInfoFromMMITable(DenseSet<InlinedVariable> &P);
+  /// Collect variable information from the side table maintained by MF.
+  void collectVariableInfoFromMFTable(DenseSet<InlinedVariable> &P);
 
 public:
   //===--------------------------------------------------------------------===//
@@ -515,7 +514,7 @@ public:
   bool useSplitDwarf() const { return HasSplitDwarf; }
 
   /// Returns the Dwarf Version.
-  unsigned getDwarfVersion() const { return DwarfVersion; }
+  uint16_t getDwarfVersion() const;
 
   /// Returns the previous CU that was being updated
   const DwarfCompileUnit *getPrevCU() const { return PrevCU; }
@@ -537,11 +536,6 @@ public:
     return Ref.resolve();
   }
 
-  /// Find the DwarfCompileUnit for the given CU Die.
-  DwarfCompileUnit *lookupUnit(const DIE *CU) const {
-    return CUDieMap.lookup(CU);
-  }
-
   void addSubprogramNames(const DISubprogram *SP, DIE &Die);
 
   AddressPool &getAddressPool() { return AddrPool; }
@@ -559,12 +553,6 @@ public:
   /// A helper function to check whether the DIE for a given Scope is
   /// going to be null.
   bool isLexicalScopeDIENull(LexicalScope *Scope);
-
-  // FIXME: Sink these functions down into DwarfFile/Dwarf*Unit.
-
-  SmallPtrSet<const MDNode *, 16> &getProcessedSPNodes() {
-    return ProcessedSPNodes;
-  }
 };
 } // End of namespace llvm
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 8287f289f22b..80d5bd208ed8 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -28,6 +28,8 @@ protected:
 
   /// Per-function flag to indicate if frame CFI info should be emitted.
   bool shouldEmitCFI;
+  /// Per-module flag to indicate if .cfi_section has beeen emitted.
+  bool hasEmittedCFISections;
 
   void markFunctionEnd() override;
   void endFragment() override;
@@ -46,8 +48,6 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
   /// Per-function flag to indicate if frame moves info should be emitted.
   bool shouldEmitMoves;
 
-  AsmPrinter::CFIMoveType moveTypeModule;
-
 public:
   //===--------------------------------------------------------------------===//
   // Main entry points.
@@ -81,7 +81,7 @@ public:
   ~ARMException() override;
 
   /// Emit all exception information that should come after the content.
-  void endModule() override;
+  void endModule() override {}
 
   /// Gather pre-function exception information.  Assumes being emitted
   /// immediately after the function entry point.
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 7dbc6cb39951..61b2c7e65842 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -46,7 +46,9 @@ void DwarfExpression::AddRegIndirect(int DwarfReg, int Offset, bool Deref) {
 }
 
 void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
-  assert(SizeInBits > 0 && "piece has size zero");
+  if (!SizeInBits)
+    return;
+
   const unsigned SizeOfByte = 8;
   if (OffsetInBits > 0 || SizeInBits % SizeOfByte) {
     EmitOp(dwarf::DW_OP_bit_piece);
@@ -57,6 +59,7 @@ void DwarfExpression::AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits) {
     unsigned ByteSize = SizeInBits / SizeOfByte;
     EmitUnsigned(ByteSize);
   }
+  this->OffsetInBits += SizeInBits;
 }
 
 void DwarfExpression::AddShr(unsigned ShiftBy) {
@@ -82,10 +85,8 @@ bool DwarfExpression::AddMachineRegIndirect(const TargetRegisterInfo &TRI,
   return true;
 }
 
-bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
-                                         unsigned MachineReg,
-                                         unsigned PieceSizeInBits,
-                                         unsigned PieceOffsetInBits) {
+bool DwarfExpression::AddMachineReg(const TargetRegisterInfo &TRI,
+                                    unsigned MachineReg, unsigned MaxSize) {
   if (!TRI.isPhysicalRegister(MachineReg))
     return false;
 
@@ -94,13 +95,11 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
   // If this is a valid register number, emit it.
   if (Reg >= 0) {
     AddReg(Reg);
-    if (PieceSizeInBits)
-      AddOpPiece(PieceSizeInBits, PieceOffsetInBits);
     return true;
   }
 
   // Walk up the super-register chain until we find a valid number.
-  // For example, EAX on x86_64 is a 32-bit piece of RAX with offset 0.
+  // For example, EAX on x86_64 is a 32-bit fragment of RAX with offset 0.
   for (MCSuperRegIterator SR(MachineReg, &TRI); SR.isValid(); ++SR) {
     Reg = TRI.getDwarfRegNum(*SR, false);
     if (Reg >= 0) {
@@ -108,27 +107,15 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
       unsigned Size = TRI.getSubRegIdxSize(Idx);
       unsigned RegOffset = TRI.getSubRegIdxOffset(Idx);
       AddReg(Reg, "super-register");
-      if (PieceOffsetInBits == RegOffset) {
-        AddOpPiece(Size, RegOffset);
-      } else {
-        // If this is part of a variable in a sub-register at a
-        // non-zero offset, we need to manually shift the value into
-        // place, since the DW_OP_piece describes the part of the
-        // variable, not the position of the subregister.
-        if (RegOffset)
-          AddShr(RegOffset);
-        AddOpPiece(Size, PieceOffsetInBits);
-      }
+      // Use a DW_OP_bit_piece to describe the sub-register.
+      setSubRegisterPiece(Size, RegOffset);
       return true;
     }
   }
 
   // Otherwise, attempt to find a covering set of sub-register numbers.
   // For example, Q0 on ARM is a composition of D0+D1.
-  //
-  // Keep track of the current position so we can emit the more
-  // efficient DW_OP_piece.
-  unsigned CurPos = PieceOffsetInBits;
+  unsigned CurPos = 0;
   // The size of the register in bits, assuming 8 bits per byte.
   unsigned RegSize = TRI.getMinimalPhysRegClass(MachineReg)->getSize() * 8;
   // Keep track of the bits in the register we already emitted, so we
@@ -150,7 +137,12 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
     // its range, emit a DWARF piece for it.
     if (Reg >= 0 && Intersection.any()) {
       AddReg(Reg, "sub-register");
-      AddOpPiece(Size, Offset == CurPos ? 0 : Offset);
+      if (Offset >= MaxSize)
+	break;
+      // Emit a piece for the any gap in the coverage.
+      if (Offset > CurPos)
+        AddOpPiece(Offset - CurPos);
+      AddOpPiece(std::min<unsigned>(Size, MaxSize - Offset));
       CurPos = Offset + Size;
 
       // Mark it as emitted.
@@ -158,7 +150,7 @@ bool DwarfExpression::AddMachineRegPiece(const TargetRegisterInfo &TRI,
     }
   }
 
-  return CurPos > PieceOffsetInBits;
+  return CurPos;
 }
 
 void DwarfExpression::AddStackValue() {
@@ -194,92 +186,114 @@ void DwarfExpression::AddUnsignedConstant(const APInt &Value) {
   }
 }
 
-static unsigned getOffsetOrZero(unsigned OffsetInBits,
-                                unsigned PieceOffsetInBits) {
-  if (OffsetInBits == PieceOffsetInBits)
-    return 0;
-  assert(OffsetInBits >= PieceOffsetInBits && "overlapping pieces");
-  return OffsetInBits;
-}
-
 bool DwarfExpression::AddMachineRegExpression(const TargetRegisterInfo &TRI,
-                                              const DIExpression *Expr,
+                                              DIExpressionCursor &ExprCursor,
                                               unsigned MachineReg,
-                                              unsigned PieceOffsetInBits) {
-  auto I = Expr->expr_op_begin();
-  auto E = Expr->expr_op_end();
-  if (I == E)
-    return AddMachineRegPiece(TRI, MachineReg);
+                                              unsigned FragmentOffsetInBits) {
+  if (!ExprCursor)
+    return AddMachineReg(TRI, MachineReg);
 
   // Pattern-match combinations for which more efficient representations exist
   // first.
   bool ValidReg = false;
-  switch (I->getOp()) {
-  case dwarf::DW_OP_bit_piece: {
-    unsigned OffsetInBits = I->getArg(0);
-    unsigned SizeInBits   = I->getArg(1);
-    // Piece always comes at the end of the expression.
-    return AddMachineRegPiece(TRI, MachineReg, SizeInBits,
-               getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+  auto Op = ExprCursor.peek();
+  switch (Op->getOp()) {
+  default: {
+    auto Fragment = ExprCursor.getFragmentInfo();
+    ValidReg = AddMachineReg(TRI, MachineReg,
+			     Fragment ? Fragment->SizeInBits : ~1U);
+    break;
   }
   case dwarf::DW_OP_plus:
   case dwarf::DW_OP_minus: {
     // [DW_OP_reg,Offset,DW_OP_plus, DW_OP_deref] --> [DW_OP_breg, Offset].
     // [DW_OP_reg,Offset,DW_OP_minus,DW_OP_deref] --> [DW_OP_breg,-Offset].
-    auto N = I.getNext();
-    if (N != E && N->getOp() == dwarf::DW_OP_deref) {
-      unsigned Offset = I->getArg(0);
+    auto N = ExprCursor.peekNext();
+    if (N && N->getOp() == dwarf::DW_OP_deref) {
+      unsigned Offset = Op->getArg(0);
       ValidReg = AddMachineRegIndirect(
-          TRI, MachineReg, I->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
-      std::advance(I, 2);
-      break;
+          TRI, MachineReg, Op->getOp() == dwarf::DW_OP_plus ? Offset : -Offset);
+      ExprCursor.consume(2);
     } else
-      ValidReg = AddMachineRegPiece(TRI, MachineReg);
-  }
-  case dwarf::DW_OP_deref: {
-      // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
-      ValidReg = AddMachineRegIndirect(TRI, MachineReg);
-      ++I;
-      break;
+      ValidReg = AddMachineReg(TRI, MachineReg);
+    break;
   }
-  default:
-    llvm_unreachable("unsupported operand");
+  case dwarf::DW_OP_deref:
+    // [DW_OP_reg,DW_OP_deref] --> [DW_OP_breg].
+    ValidReg = AddMachineRegIndirect(TRI, MachineReg);
+    ExprCursor.take();
+    break;
   }
 
-  if (!ValidReg)
-    return false;
-
-  // Emit remaining elements of the expression.
-  AddExpression(I, E, PieceOffsetInBits);
-  return true;
+  return ValidReg;
 }
 
-void DwarfExpression::AddExpression(DIExpression::expr_op_iterator I,
-                                    DIExpression::expr_op_iterator E,
-                                    unsigned PieceOffsetInBits) {
-  for (; I != E; ++I) {
-    switch (I->getOp()) {
-    case dwarf::DW_OP_bit_piece: {
-      unsigned OffsetInBits = I->getArg(0);
-      unsigned SizeInBits   = I->getArg(1);
-      AddOpPiece(SizeInBits, getOffsetOrZero(OffsetInBits, PieceOffsetInBits));
+void DwarfExpression::AddExpression(DIExpressionCursor &&ExprCursor,
+                                    unsigned FragmentOffsetInBits) {
+  while (ExprCursor) {
+    auto Op = ExprCursor.take();
+    switch (Op->getOp()) {
+    case dwarf::DW_OP_LLVM_fragment: {
+      unsigned SizeInBits = Op->getArg(1);
+      unsigned FragmentOffset = Op->getArg(0);
+      // The fragment offset must have already been adjusted by emitting an
+      // empty DW_OP_piece / DW_OP_bit_piece before we emitted the base
+      // location.
+      assert(OffsetInBits >= FragmentOffset && "fragment offset not added?");
+
+      // If \a AddMachineReg already emitted DW_OP_piece operations to represent
+      // a super-register by splicing together sub-registers, subtract the size
+      // of the pieces that was already emitted.
+      SizeInBits -= OffsetInBits - FragmentOffset;
+
+      // If \a AddMachineReg requested a DW_OP_bit_piece to stencil out a
+      // sub-register that is smaller than the current fragment's size, use it.
+      if (SubRegisterSizeInBits)
+        SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
+      
+      AddOpPiece(SizeInBits, SubRegisterOffsetInBits);
+      setSubRegisterPiece(0, 0);
       break;
     }
     case dwarf::DW_OP_plus:
       EmitOp(dwarf::DW_OP_plus_uconst);
-      EmitUnsigned(I->getArg(0));
+      EmitUnsigned(Op->getArg(0));
       break;
     case dwarf::DW_OP_minus:
       // There is no OP_minus_uconst.
       EmitOp(dwarf::DW_OP_constu);
-      EmitUnsigned(I->getArg(0));
+      EmitUnsigned(Op->getArg(0));
       EmitOp(dwarf::DW_OP_minus);
       break;
     case dwarf::DW_OP_deref:
       EmitOp(dwarf::DW_OP_deref);
       break;
+    case dwarf::DW_OP_constu:
+      EmitOp(dwarf::DW_OP_constu);
+      EmitUnsigned(Op->getArg(0));
+      break;
+    case dwarf::DW_OP_stack_value:
+      AddStackValue();
+      break;
     default:
       llvm_unreachable("unhandled opcode found in expression");
     }
   }
 }
+
+void DwarfExpression::finalize() {
+  if (SubRegisterSizeInBits)
+    AddOpPiece(SubRegisterSizeInBits, SubRegisterOffsetInBits);
+}
+
+void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
+  if (!Expr || !Expr->isFragment())
+    return;
+
+  uint64_t FragmentOffset = Expr->getFragmentInfo()->OffsetInBits;
+  assert(FragmentOffset >= OffsetInBits &&
+         "overlapping or duplicate fragments");
+  if (FragmentOffset > OffsetInBits)
+    AddOpPiece(FragmentOffset - OffsetInBits);
+  OffsetInBits = FragmentOffset;
+}
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 5fff28d8a13c..fd90fa05bc32 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -25,17 +25,86 @@ class TargetRegisterInfo;
 class DwarfUnit;
 class DIELoc;
 
+/// Holds a DIExpression and keeps track of how many operands have been consumed
+/// so far.
+class DIExpressionCursor {
+  DIExpression::expr_op_iterator Start, End;
+public:
+  DIExpressionCursor(const DIExpression *Expr) {
+    if (!Expr) {
+      assert(Start == End);
+      return;
+    }
+    Start = Expr->expr_op_begin();
+    End = Expr->expr_op_end();
+  }
+
+  DIExpressionCursor(ArrayRef<uint64_t> Expr)
+      : Start(Expr.begin()), End(Expr.end()) {}
+
+  /// Consume one operation.
+  Optional<DIExpression::ExprOperand> take() {
+    if (Start == End)
+      return None;
+    return *(Start++);
+  }
+
+  /// Consume N operations.
+  void consume(unsigned N) { std::advance(Start, N); }
+
+  /// Return the current operation.
+  Optional<DIExpression::ExprOperand> peek() const {
+    if (Start == End)
+      return None;
+    return *(Start);
+  }
+
+  /// Return the next operation.
+  Optional<DIExpression::ExprOperand> peekNext() const {
+    if (Start == End)
+      return None;
+
+    auto Next = Start.getNext();
+    if (Next == End)
+      return None;
+
+    return *Next;
+  }
+  /// Determine whether there are any operations left in this expression.
+  operator bool() const { return Start != End; }
+
+  /// Retrieve the fragment information, if any.
+  Optional<DIExpression::FragmentInfo> getFragmentInfo() const {
+    return DIExpression::getFragmentInfo(Start, End);
+  }
+};
+
 /// Base class containing the logic for constructing DWARF expressions
 /// independently of whether they are emitted into a DIE or into a .debug_loc
 /// entry.
 class DwarfExpression {
 protected:
-  // Various convenience accessors that extract things out of AsmPrinter.
   unsigned DwarfVersion;
+  /// Current Fragment Offset in Bits.
+  uint64_t OffsetInBits = 0;
+
+  /// Sometimes we need to add a DW_OP_bit_piece to describe a subregister. 
+  unsigned SubRegisterSizeInBits = 0;
+  unsigned SubRegisterOffsetInBits = 0;
+
+  /// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
+  /// to represent a subregister.
+  void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+    SubRegisterSizeInBits = SizeInBits;
+    SubRegisterOffsetInBits = OffsetInBits;
+  }
 
 public:
   DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
-  virtual ~DwarfExpression() {}
+  virtual ~DwarfExpression() {};
+
+  /// This needs to be called last to commit any pending changes.
+  void finalize();
 
   /// Output a dwarf operand and an optional assembler comment.
   virtual void EmitOp(uint8_t Op, const char *Comment = nullptr) = 0;
@@ -52,24 +121,25 @@ public:
   /// Emit an (double-)indirect dwarf register operation.
   void AddRegIndirect(int DwarfReg, int Offset, bool Deref = false);
 
-  /// Emit a dwarf register operation for describing
-  /// - a small value occupying only part of a register or
-  /// - a register representing only part of a value.
+  /// Emit a DW_OP_piece or DW_OP_bit_piece operation for a variable fragment.
+  /// \param OffsetInBits    This is an optional offset into the location that
+  /// is at the top of the DWARF stack.
   void AddOpPiece(unsigned SizeInBits, unsigned OffsetInBits = 0);
+
   /// Emit a shift-right dwarf expression.
   void AddShr(unsigned ShiftBy);
+
   /// Emit a DW_OP_stack_value, if supported.
   ///
-  /// The proper way to describe a constant value is
-  /// DW_OP_constu <const>, DW_OP_stack_value.
-  /// Unfortunately, DW_OP_stack_value was not available until DWARF-4,
-  /// so we will continue to generate DW_OP_constu <const> for DWARF-2
-  /// and DWARF-3. Technically, this is incorrect since DW_OP_const <const>
-  /// actually describes a value at a constant addess, not a constant value.
-  /// However, in the past there was no better way  to describe a constant
-  /// value, so the producers and consumers started to rely on heuristics
-  /// to disambiguate the value vs. location status of the expression.
-  /// See PR21176 for more details.
+  /// The proper way to describe a constant value is DW_OP_constu <const>,
+  /// DW_OP_stack_value.  Unfortunately, DW_OP_stack_value was not available
+  /// until DWARF 4, so we will continue to generate DW_OP_constu <const> for
+  /// DWARF 2 and DWARF 3. Technically, this is incorrect since DW_OP_const
+  /// <const> actually describes a value at a constant addess, not a constant
+  /// value.  However, in the past there was no better way to describe a
+  /// constant value, so the producers and consumers started to rely on
+  /// heuristics to disambiguate the value vs. location status of the
+  /// expression.  See PR21176 for more details.
   void AddStackValue();
 
   /// Emit an indirect dwarf register operation for the given machine register.
@@ -77,23 +147,23 @@ public:
   bool AddMachineRegIndirect(const TargetRegisterInfo &TRI, unsigned MachineReg,
                              int Offset = 0);
 
-  /// \brief Emit a partial DWARF register operation.
-  /// \param MachineReg        the register
-  /// \param PieceSizeInBits   size and
-  /// \param PieceOffsetInBits offset of the piece in bits, if this is one
-  ///                          piece of an aggregate value.
+  /// Emit a partial DWARF register operation.
+  ///
+  /// \param MachineReg           The register number.
+  /// \param MaxSize              If the register must be composed from
+  ///                             sub-registers this is an upper bound
+  ///                             for how many bits the emitted DW_OP_piece
+  ///                             may cover.
   ///
-  /// If size and offset is zero an operation for the entire
-  /// register is emitted: Some targets do not provide a DWARF
-  /// register number for every register.  If this is the case, this
-  /// function will attempt to emit a DWARF register by emitting a
-  /// piece of a super-register or by piecing together multiple
-  /// subregisters that alias the register.
+  /// If size and offset is zero an operation for the entire register is
+  /// emitted: Some targets do not provide a DWARF register number for every
+  /// register.  If this is the case, this function will attempt to emit a DWARF
+  /// register by emitting a fragment of a super-register or by piecing together
+  /// multiple subregisters that alias the register.
   ///
   /// \return false if no DWARF register exists for MachineReg.
-  bool AddMachineRegPiece(const TargetRegisterInfo &TRI, unsigned MachineReg,
-                          unsigned PieceSizeInBits = 0,
-                          unsigned PieceOffsetInBits = 0);
+  bool AddMachineReg(const TargetRegisterInfo &TRI, unsigned MachineReg,
+                     unsigned MaxSize = ~1U);
 
   /// Emit a signed constant.
   void AddSignedConstant(int64_t Value);
@@ -102,20 +172,29 @@ public:
   /// Emit an unsigned constant.
   void AddUnsignedConstant(const APInt &Value);
 
-  /// \brief Emit an entire expression on top of a machine register location.
+  /// Emit a machine register location. As an optimization this may also consume
+  /// the prefix of a DwarfExpression if a more efficient representation for
+  /// combining the register location and the first operation exists.
   ///
-  /// \param PieceOffsetInBits If this is one piece out of a fragmented
-  /// location, this is the offset of the piece inside the entire variable.
-  /// \return false if no DWARF register exists for MachineReg.
+  /// \param FragmentOffsetInBits     If this is one fragment out of a fragmented
+  ///                                 location, this is the offset of the
+  ///                                 fragment inside the entire variable.
+  /// \return                         false if no DWARF register exists
+  ///                                 for MachineReg.
   bool AddMachineRegExpression(const TargetRegisterInfo &TRI,
-                               const DIExpression *Expr, unsigned MachineReg,
-                               unsigned PieceOffsetInBits = 0);
-  /// Emit a the operations remaining the DIExpressionIterator I.
-  /// \param PieceOffsetInBits If this is one piece out of a fragmented
-  /// location, this is the offset of the piece inside the entire variable.
-  void AddExpression(DIExpression::expr_op_iterator I,
-                     DIExpression::expr_op_iterator E,
-                     unsigned PieceOffsetInBits = 0);
+                               DIExpressionCursor &Expr, unsigned MachineReg,
+                               unsigned FragmentOffsetInBits = 0);
+  /// Emit all remaining operations in the DIExpressionCursor.
+  ///
+  /// \param FragmentOffsetInBits     If this is one fragment out of multiple
+  ///                                 locations, this is the offset of the
+  ///                                 fragment inside the entire variable.
+  void AddExpression(DIExpressionCursor &&Expr,
+                     unsigned FragmentOffsetInBits = 0);
+
+  /// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
+  /// the fragment described by \c Expr.
+  void addFragmentOffset(const DIExpression *Expr);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
@@ -146,6 +225,10 @@ public:
   void EmitUnsigned(uint64_t Value) override;
   bool isFrameRegister(const TargetRegisterInfo &TRI,
                        unsigned MachineReg) override;
+  DIELoc *finalize() {
+    DwarfExpression::finalize();
+    return &DIE;
+  }
 };
 }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index e9fe98ab3cf9..595f1d91c4bf 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -19,37 +19,7 @@
 
 namespace llvm {
 DwarfFile::DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA)
-    : Asm(AP), StrPool(DA, *Asm, Pref) {}
-
-DwarfFile::~DwarfFile() {
-  for (DIEAbbrev *Abbrev : Abbreviations)
-    Abbrev->~DIEAbbrev();
-}
-
-// Define a unique number for the abbreviation.
-//
-DIEAbbrev &DwarfFile::assignAbbrevNumber(DIE &Die) {
-  FoldingSetNodeID ID;
-  DIEAbbrev Abbrev = Die.generateAbbrev();
-  Abbrev.Profile(ID);
-
-  void *InsertPos;
-  if (DIEAbbrev *Existing =
-          AbbreviationsSet.FindNodeOrInsertPos(ID, InsertPos)) {
-    Die.setAbbrevNumber(Existing->getNumber());
-    return *Existing;
-  }
-
-  // Move the abbreviation to the heap and assign a number.
-  DIEAbbrev *New = new (AbbrevAllocator) DIEAbbrev(std::move(Abbrev));
-  Abbreviations.push_back(New);
-  New->setNumber(Abbreviations.size());
-  Die.setAbbrevNumber(Abbreviations.size());
-
-  // Store it for lookup.
-  AbbreviationsSet.InsertNode(New, InsertPos);
-  return *New;
-}
+    : Asm(AP), Abbrevs(AbbrevAllocator), StrPool(DA, *Asm, Pref) {}
 
 void DwarfFile::addUnit(std::unique_ptr<DwarfCompileUnit> U) {
   CUs.push_back(std::move(U));
@@ -80,7 +50,7 @@ void DwarfFile::computeSizeAndOffsets() {
   // Iterate over each compile unit and set the size and offsets for each
   // DIE within each compile unit. All offsets are CU relative.
   for (const auto &TheU : CUs) {
-    TheU->setDebugInfoOffset(SecOffset);
+    TheU->setDebugSectionOffset(SecOffset);
     SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
   }
 }
@@ -98,44 +68,10 @@ unsigned DwarfFile::computeSizeAndOffsetsForUnit(DwarfUnit *TheU) {
 // Compute the size and offset of a DIE. The offset is relative to start of the
 // CU. It returns the offset after laying out the DIE.
 unsigned DwarfFile::computeSizeAndOffset(DIE &Die, unsigned Offset) {
-  // Record the abbreviation.
-  const DIEAbbrev &Abbrev = assignAbbrevNumber(Die);
-
-  // Set DIE offset
-  Die.setOffset(Offset);
-
-  // Start the size with the size of abbreviation code.
-  Offset += getULEB128Size(Die.getAbbrevNumber());
-
-  // Size the DIE attribute values.
-  for (const auto &V : Die.values())
-    // Size attribute value.
-    Offset += V.SizeOf(Asm);
-
-  // Size the DIE children if any.
-  if (Die.hasChildren()) {
-    (void)Abbrev;
-    assert(Abbrev.hasChildren() && "Children flag not set");
-
-    for (auto &Child : Die.children())
-      Offset = computeSizeAndOffset(Child, Offset);
-
-    // End of children marker.
-    Offset += sizeof(int8_t);
-  }
-
-  Die.setSize(Offset - Die.getOffset());
-  return Offset;
+  return Die.computeOffsetsAndAbbrevs(Asm, Abbrevs, Offset);
 }
 
-void DwarfFile::emitAbbrevs(MCSection *Section) {
-  // Check to see if it is worth the effort.
-  if (!Abbreviations.empty()) {
-    // Start the debug abbrev section.
-    Asm->OutStreamer->SwitchSection(Section);
-    Asm->emitDwarfAbbrevs(Abbreviations);
-  }
-}
+void DwarfFile::emitAbbrevs(MCSection *Section) { Abbrevs.Emit(Asm, Section); }
 
 // Emit strings into a string section.
 void DwarfFile::emitStrings(MCSection *StrSection, MCSection *OffsetSection) {
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
index b73d89b0e499..d4d2ed277274 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -16,10 +16,10 @@
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/CodeGen/DIE.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/Support/Allocator.h"
 #include <memory>
-#include <vector>
 
 namespace llvm {
 class AsmPrinter;
@@ -41,10 +41,7 @@ class DwarfFile {
   BumpPtrAllocator AbbrevAllocator;
 
   // Used to uniquely define abbreviations.
-  FoldingSet<DIEAbbrev> AbbreviationsSet;
-
-  // A list of all the unique abbreviations in use.
-  std::vector<DIEAbbrev *> Abbreviations;
+  DIEAbbrevSet Abbrevs;
 
   // A pointer to all units in the section.
   SmallVector<std::unique_ptr<DwarfCompileUnit>, 1> CUs;
@@ -65,8 +62,6 @@ class DwarfFile {
 public:
   DwarfFile(AsmPrinter *AP, StringRef Pref, BumpPtrAllocator &DA);
 
-  ~DwarfFile();
-
   const SmallVectorImpl<std::unique_ptr<DwarfCompileUnit>> &getUnits() {
     return CUs;
   }
@@ -81,12 +76,6 @@ public:
   /// \returns The size of the root DIE.
   unsigned computeSizeAndOffsetsForUnit(DwarfUnit *TheU);
 
-  /// Define a unique number for the abbreviation.
-  ///
-  /// Compute the abbreviation for \c Die, look up its unique number, and
-  /// return a reference to it in the uniquing table.
-  DIEAbbrev &assignAbbrevNumber(DIE &Die);
-
   /// \brief Add a unit to the list of CUs.
   void addUnit(std::unique_ptr<DwarfCompileUnit> U);
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 4100d728a53b..4f90245c6d49 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -12,28 +12,33 @@
 //===----------------------------------------------------------------------===//
 
 #include "DwarfUnit.h"
-#include "DwarfAccelTable.h"
+#include "AddressPool.h"
 #include "DwarfCompileUnit.h"
 #include "DwarfDebug.h"
 #include "DwarfExpression.h"
 #include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/None.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Mangler.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCContext.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/MC/MachineLocation.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Target/TargetFrameLowering.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <string>
+#include <utility>
 
 using namespace llvm;
 
@@ -46,18 +51,21 @@ GenerateDwarfTypeUnits("generate-type-units", cl::Hidden,
 
 DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
                                        DIELoc &DIE)
-    : DwarfExpression(AP.getDwarfDebug()->getDwarfVersion()), AP(AP), DU(DU),
+    : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
       DIE(DIE) {}
 
 void DIEDwarfExpression::EmitOp(uint8_t Op, const char* Comment) {
   DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
 }
+
 void DIEDwarfExpression::EmitSigned(int64_t Value) {
   DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
 }
+
 void DIEDwarfExpression::EmitUnsigned(uint64_t Value) {
   DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
 }
+
 bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
                                          unsigned MachineReg) {
   return MachineReg == TRI.getFrameRegister(*AP.MF);
@@ -65,10 +73,8 @@ bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
 
 DwarfUnit::DwarfUnit(dwarf::Tag UnitTag, const DICompileUnit *Node,
                      AsmPrinter *A, DwarfDebug *DW, DwarfFile *DWU)
-    : CUNode(Node), UnitDie(*DIE::get(DIEValueAllocator, UnitTag)), Asm(A),
-      DD(DW), DU(DWU), IndexTyDie(nullptr), Section(nullptr) {
-  assert(UnitTag == dwarf::DW_TAG_compile_unit ||
-         UnitTag == dwarf::DW_TAG_type_unit);
+    : DIEUnit(A->getDwarfVersion(), A->getPointerSize(), UnitTag), CUNode(Node),
+      Asm(A), DD(DW), DU(DWU), IndexTyDie(nullptr) {
 }
 
 DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
@@ -77,7 +83,7 @@ DwarfTypeUnit::DwarfTypeUnit(DwarfCompileUnit &CU, AsmPrinter *A,
     : DwarfUnit(dwarf::DW_TAG_type_unit, CU.getCUNode(), A, DW, DWU), CU(CU),
       SplitLineTable(SplitLineTable) {
   if (SplitLineTable)
-    addSectionOffset(UnitDie, dwarf::DW_AT_stmt_list, 0);
+    addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
 }
 
 DwarfUnit::~DwarfUnit() {
@@ -286,15 +292,15 @@ void DwarfUnit::addDIETypeSignature(DIE &Die, dwarf::Attribute Attribute,
 
 void DwarfUnit::addDIEEntry(DIE &Die, dwarf::Attribute Attribute,
                             DIEEntry Entry) {
-  const DIE *DieCU = Die.getUnitOrNull();
-  const DIE *EntryCU = Entry.getEntry().getUnitOrNull();
-  if (!DieCU)
+  const DIEUnit *CU = Die.getUnit();
+  const DIEUnit *EntryCU = Entry.getEntry().getUnit();
+  if (!CU)
     // We assume that Die belongs to this CU, if it is not linked to any CU yet.
-    DieCU = &getUnitDie();
+    CU = getUnitDie().getUnit();
   if (!EntryCU)
-    EntryCU = &getUnitDie();
+    EntryCU = getUnitDie().getUnit();
   Die.addValue(DIEValueAllocator, Attribute,
-               EntryCU == DieCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
+               EntryCU == CU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr,
                Entry);
 }
 
@@ -365,21 +371,6 @@ void DwarfUnit::addSourceLine(DIE &Die, const DINamespace *NS) {
   addSourceLine(Die, NS->getLine(), NS->getFilename(), NS->getDirectory());
 }
 
-bool DwarfUnit::addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
-                                   unsigned SizeInBits, unsigned OffsetInBits) {
-  DIEDwarfExpression Expr(*Asm, *this, TheDie);
-  Expr.AddMachineRegPiece(*Asm->MF->getSubtarget().getRegisterInfo(), Reg,
-                          SizeInBits, OffsetInBits);
-  return true;
-}
-
-bool DwarfUnit::addRegisterOffset(DIELoc &TheDie, unsigned Reg,
-                                  int64_t Offset) {
-  DIEDwarfExpression Expr(*Asm, *this, TheDie);
-  return Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
-                                    Reg, Offset);
-}
-
 /* Byref variables, in Blocks, are declared by the programmer as "SomeType
    VarName;", but the compiler creates a __Block_byref_x_VarName struct, and
    gives the variable VarName either the struct, or a pointer to the struct, as
@@ -472,12 +463,17 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   // Decode the original location, and use that as the start of the byref
   // variable's location.
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+  SmallVector<uint64_t, 6> DIExpr;
+  DIEDwarfExpression Expr(*Asm, *this, *Loc);
 
   bool validReg;
   if (Location.isReg())
-    validReg = addRegisterOpPiece(*Loc, Location.getReg());
+    validReg = Expr.AddMachineReg(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                  Location.getReg());
   else
-    validReg = addRegisterOffset(*Loc, Location.getReg(), Location.getOffset());
+    validReg =
+        Expr.AddMachineRegIndirect(*Asm->MF->getSubtarget().getRegisterInfo(),
+                                   Location.getReg(), Location.getOffset());
 
   if (!validReg)
     return;
@@ -485,27 +481,29 @@ void DwarfUnit::addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
   // If we started with a pointer to the __Block_byref... struct, then
   // the first thing we need to do is dereference the pointer (DW_OP_deref).
   if (isPointer)
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+    DIExpr.push_back(dwarf::DW_OP_deref);
 
   // Next add the offset for the '__forwarding' field:
   // DW_OP_plus_uconst ForwardingFieldOffset.  Note there's no point in
   // adding the offset if it's 0.
   if (forwardingFieldOffset > 0) {
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(*Loc, dwarf::DW_FORM_udata, forwardingFieldOffset);
+    DIExpr.push_back(dwarf::DW_OP_plus);
+    DIExpr.push_back(forwardingFieldOffset);
   }
 
   // Now dereference the __forwarding field to get to the real __Block_byref
   // struct:  DW_OP_deref.
-  addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_deref);
+  DIExpr.push_back(dwarf::DW_OP_deref);
 
   // Now that we've got the real __Block_byref... struct, add the offset
   // for the variable's field to get to the location of the actual variable:
   // DW_OP_plus_uconst varFieldOffset.  Again, don't add if it's 0.
   if (varFieldOffset > 0) {
-    addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst);
-    addUInt(*Loc, dwarf::DW_FORM_udata, varFieldOffset);
+    DIExpr.push_back(dwarf::DW_OP_plus);
+    DIExpr.push_back(varFieldOffset);
   }
+  Expr.AddExpression(makeArrayRef(DIExpr));
+  Expr.finalize();
 
   // Now attach the location information to the DIE.
   addBlock(Die, Attribute, Loc);
@@ -538,7 +536,7 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
       return true;
     assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
            T == dwarf::DW_TAG_volatile_type ||
-           T == dwarf::DW_TAG_restrict_type);
+           T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
     DITypeRef Deriv = DTy->getBaseType();
     assert(Deriv && "Expected valid base type");
     return isUnsignedDIType(DD, DD->resolve(Deriv));
@@ -699,6 +697,10 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
   if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
     return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
 
+  // DW_TAG_atomic_type is not supported in DWARF < 5
+  if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
+    return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+
   // Construct the context before querying for the existence of the DIE in case
   // such construction creates the DIE.
   auto *Context = resolve(Ty->getScope());
@@ -735,7 +737,7 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
 void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
                                         const DIType *Ty, const DIE &TyDIE) {
   if (!Ty->getName().empty() && !Ty->isForwardDecl()) {
-    bool IsImplementation = 0;
+    bool IsImplementation = false;
     if (auto *CT = dyn_cast<DICompositeType>(Ty)) {
       // A runtime language of 0 actually means C/C++ and that any
       // non-negative value is some version of Objective-C/C++.
@@ -999,6 +1001,11 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
     if (RLang)
       addUInt(Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1,
               RLang);
+
+    // Add align info if available.
+    if (uint32_t AlignInBytes = CTy->getAlignInBytes())
+      addUInt(Buffer, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+              AlignInBytes);
   }
 }
 
@@ -1066,6 +1073,8 @@ DIE *DwarfUnit::getOrCreateNameSpace(const DINamespace *NS) {
   DD->addAccelNamespace(Name, NDie);
   addGlobalName(Name, NDie, NS->getScope());
   addSourceLine(NDie, NS);
+  if (NS->getExportSymbols())
+    addFlag(NDie, dwarf::DW_AT_export_symbols);
   return &NDie;
 }
 
@@ -1133,7 +1142,9 @@ bool DwarfUnit::applySubprogramDefinitionAttributes(const DISubprogram *SP,
     assert(DeclDie && "This DIE should've already been constructed when the "
                       "definition DIE was created in "
                       "getOrCreateSubprogramDIE");
-    DeclLinkageName = SPDecl->getLinkageName();
+    // Look at the Decl's linkage name only if we emitted it.
+    if (DD->useAllLinkageNames())
+      DeclLinkageName = SPDecl->getLinkageName();
     unsigned DeclID =
         getOrCreateSourceID(SPDecl->getFilename(), SPDecl->getDirectory());
     unsigned DefID = getOrCreateSourceID(SP->getFilename(), SP->getDirectory());
@@ -1248,6 +1259,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
   if (SP->isRValueReference())
     addFlag(SPDie, dwarf::DW_AT_rvalue_reference);
 
+  if (SP->isNoReturn())
+    addFlag(SPDie, dwarf::DW_AT_noreturn);
+
   if (SP->isProtected())
     addUInt(SPDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1,
             dwarf::DW_ACCESS_protected);
@@ -1260,6 +1274,9 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
 
   if (SP->isExplicit())
     addFlag(SPDie, dwarf::DW_AT_explicit);
+
+  if (SP->isMainSubprogram())
+    addFlag(SPDie, dwarf::DW_AT_main_subprogram);
 }
 
 void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
@@ -1288,7 +1305,7 @@ DIE *DwarfUnit::getIndexTyDie() {
   if (IndexTyDie)
     return IndexTyDie;
   // Construct an integer type to use for indexes.
-  IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, UnitDie);
+  IndexTyDie = &createAndAddDIE(dwarf::DW_TAG_base_type, getUnitDie());
   addString(*IndexTyDie, dwarf::DW_AT_name, "sizetype");
   addUInt(*IndexTyDie, dwarf::DW_AT_byte_size, None, sizeof(int64_t));
   addUInt(*IndexTyDie, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
@@ -1383,6 +1400,7 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
   } else {
     uint64_t Size = DT->getSizeInBits();
     uint64_t FieldSize = DD->getBaseTypeSize(DT);
+    uint32_t AlignInBytes = DT->getAlignInBytes();
     uint64_t OffsetInBytes;
 
     bool IsBitfield = FieldSize && Size != FieldSize;
@@ -1393,8 +1411,11 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
       addUInt(MemberDie, dwarf::DW_AT_bit_size, None, Size);
 
       uint64_t Offset = DT->getOffsetInBits();
-      uint64_t Align = DT->getAlignInBits() ? DT->getAlignInBits() : FieldSize;
-      uint64_t AlignMask = ~(Align - 1);
+      // We can't use DT->getAlignInBits() here: AlignInBits for member type
+      // is non-zero if and only if alignment was forced (e.g. _Alignas()),
+      // which can't be done with bitfields. Thus we use FieldSize here.
+      uint32_t AlignInBits = FieldSize;
+      uint32_t AlignMask = ~(AlignInBits - 1);
       // The bits from the start of the storage unit to the start of the field.
       uint64_t StartBitOffset = Offset - (Offset & AlignMask);
       // The byte offset of the field's aligned storage unit inside the struct.
@@ -1417,6 +1438,9 @@ void DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
     } else {
       // This is not a bitfield.
       OffsetInBytes = DT->getOffsetInBits() / 8;
+      if (AlignInBytes)
+        addUInt(MemberDie, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+                AlignInBytes);
     }
 
     if (DD->getDwarfVersion() <= 2) {
@@ -1493,13 +1517,17 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
   if (const ConstantFP *CFP = dyn_cast_or_null<ConstantFP>(DT->getConstant()))
     addConstantFPValue(StaticMemberDIE, CFP);
 
+  if (uint32_t AlignInBytes = DT->getAlignInBytes())
+    addUInt(StaticMemberDIE, dwarf::DW_AT_alignment, dwarf::DW_FORM_udata,
+            AlignInBytes);
+
   return &StaticMemberDIE;
 }
 
 void DwarfUnit::emitHeader(bool UseOffsets) {
   // Emit size of content not including length itself
   Asm->OutStreamer->AddComment("Length of Unit");
-  Asm->EmitInt32(getHeaderSize() + UnitDie.getSize());
+  Asm->EmitInt32(getHeaderSize() + getUnitDie().getSize());
 
   Asm->OutStreamer->AddComment("DWARF version number");
   Asm->EmitInt16(DD->getDwarfVersion());
@@ -1519,11 +1547,6 @@ void DwarfUnit::emitHeader(bool UseOffsets) {
   Asm->EmitInt8(Asm->getDataLayout().getPointerSize());
 }
 
-void DwarfUnit::initSection(MCSection *Section) {
-  assert(!this->Section);
-  this->Section = Section;
-}
-
 void DwarfTypeUnit::emitHeader(bool UseOffsets) {
   DwarfUnit::emitHeader(UseOffsets);
   Asm->OutStreamer->AddComment("Type Signature");
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
index e225f92116d4..8654d6f0caf4 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -65,7 +65,7 @@ public:
 //===----------------------------------------------------------------------===//
 /// This dwarf writer support class manages information associated with a
 /// source file.
-class DwarfUnit {
+  class DwarfUnit : public DIEUnit {
 protected:
   /// MDNode for the compile unit.
   const DICompileUnit *CUNode;
@@ -73,9 +73,6 @@ protected:
   // All DIEValues are allocated through this allocator.
   BumpPtrAllocator DIEValueAllocator;
 
-  /// Unit debug information entry.
-  DIE &UnitDie;
-
   /// Target of Dwarf emission.
   AsmPrinter *Asm;
 
@@ -83,7 +80,7 @@ protected:
   DwarfDebug *DD;
   DwarfFile *DU;
 
-  /// An anonymous type for index type.  Owned by UnitDie.
+  /// An anonymous type for index type.  Owned by DIEUnit.
   DIE *IndexTyDie;
 
   /// Tracks the mapping of unit level debug information variables to debug
@@ -101,9 +98,6 @@ protected:
   /// corresponds to the MDNode mapped with the subprogram DIE.
   DenseMap<DIE *, const DINode *> ContainingTypeMap;
 
-  /// The section this unit will be emitted in.
-  MCSection *Section;
-
   DwarfUnit(dwarf::Tag, const DICompileUnit *CU, AsmPrinter *A, DwarfDebug *DW,
             DwarfFile *DWU);
 
@@ -112,21 +106,13 @@ protected:
 public:
   virtual ~DwarfUnit();
 
-  void initSection(MCSection *Section);
-
-  MCSection *getSection() const {
-    assert(Section);
-    return Section;
-  }
-
   // Accessors.
   AsmPrinter* getAsmPrinter() const { return Asm; }
   uint16_t getLanguage() const { return CUNode->getSourceLanguage(); }
   const DICompileUnit *getCUNode() const { return CUNode; }
-  DIE &getUnitDie() { return UnitDie; }
 
   /// Return true if this compile unit has something to write out.
-  bool hasContent() const { return UnitDie.hasChildren(); }
+  bool hasContent() const { return getUnitDie().hasChildren(); }
 
   /// Get string containing language specific context for a global name.
   ///
@@ -249,17 +235,6 @@ public:
   /// Add template parameters in buffer.
   void addTemplateParams(DIE &Buffer, DINodeArray TParams);
 
-  /// Add register operand.
-  /// \returns false if the register does not exist, e.g., because it was never
-  /// materialized.
-  bool addRegisterOpPiece(DIELoc &TheDie, unsigned Reg,
-                          unsigned SizeInBits = 0, unsigned OffsetInBits = 0);
-
-  /// Add register offset.
-  /// \returns false if the register does not exist, e.g., because it was never
-  /// materialized.
-  bool addRegisterOffset(DIELoc &TheDie, unsigned Reg, int64_t Offset);
-
   // FIXME: Should be reformulated in terms of addComplexAddress.
   /// Start with the address based on the location provided, and generate the
   /// DWARF information necessary to find the actual Block variable (navigating
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index e24dcb1bffd4..0a4a7a06cb2e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -74,7 +74,7 @@ computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
   // output using a fixed width encoding.  FilterOffsets[i] holds the byte
   // offset corresponding to FilterIds[i].
 
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const std::vector<unsigned> &FilterIds = Asm->MF->getFilterIds();
   SmallVector<int, 16> FilterOffsets;
   FilterOffsets.reserve(FilterIds.size());
   int Offset = -1;
@@ -296,7 +296,7 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
         else {
           // SjLj EH must maintain the call sites in the order assigned
           // to them by the SjLjPrepare pass.
-          unsigned SiteNo = MMI->getCallSiteBeginLabel(BeginLabel);
+          unsigned SiteNo = Asm->MF->getCallSiteBeginLabel(BeginLabel);
           if (CallSites.size() < SiteNo)
             CallSites.resize(SiteNo);
           CallSites[SiteNo - 1] = Site;
@@ -336,9 +336,10 @@ computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
 ///  3. Type ID table contains references to all the C++ typeinfo for all
 ///     catches in the function.  This tables is reverse indexed base 1.
 void EHStreamer::emitExceptionTable() {
-  const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
+  const MachineFunction *MF = Asm->MF;
+  const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MF->getFilterIds();
+  const std::vector<LandingPadInfo> &PadInfos = MF->getLandingPads();
 
   // Sort the landing pads in order of their type ids.  This is used to fold
   // duplicate actions.
@@ -649,8 +650,9 @@ void EHStreamer::emitExceptionTable() {
 }
 
 void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) {
-  const std::vector<const GlobalValue *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
+  const MachineFunction *MF = Asm->MF;
+  const std::vector<const GlobalValue *> &TypeInfos = MF->getTypeInfos();
+  const std::vector<unsigned> &FilterIds = MF->getFilterIds();
 
   bool VerboseAsm = Asm->OutStreamer->isVerboseAsm();
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c09ef6adea69..8baee4db772e 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -50,7 +50,7 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) {
   std::string SymName;
   SymName += "caml";
   size_t Letter = SymName.size();
-  SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.'));
+  SymName.append(MId.begin(), find(MId, '.'));
   SymName += "__";
   SymName += Id;
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
index e5933d8d4160..9d7c96a1b8ef 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -63,8 +63,8 @@ void WinException::beginFunction(const MachineFunction *MF) {
   shouldEmitMoves = shouldEmitPersonality = shouldEmitLSDA = false;
 
   // If any landing pads survive, we need an EH table.
-  bool hasLandingPads = !MMI->getLandingPads().empty();
-  bool hasEHFunclets = MMI->hasEHFunclets();
+  bool hasLandingPads = !MF->getLandingPads().empty();
+  bool hasEHFunclets = MF->hasEHFunclets();
 
   const Function *F = MF->getFunction();
 
@@ -72,17 +72,21 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
   unsigned PerEncoding = TLOF.getPersonalityEncoding();
-  const Function *Per = nullptr;
-  if (F->hasPersonalityFn())
-    Per = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
 
-  bool forceEmitPersonality =
-    F->hasPersonalityFn() && !isNoOpWithoutInvoke(classifyEHPersonality(Per)) &&
-    F->needsUnwindTableEntry();
+  EHPersonality Per = EHPersonality::Unknown;
+  const Function *PerFn = nullptr;
+  if (F->hasPersonalityFn()) {
+    PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
+    Per = classifyEHPersonality(PerFn);
+  }
+
+  bool forceEmitPersonality = F->hasPersonalityFn() &&
+                              !isNoOpWithoutInvoke(Per) &&
+                              F->needsUnwindTableEntry();
 
   shouldEmitPersonality =
       forceEmitPersonality || ((hasLandingPads || hasEHFunclets) &&
-                               PerEncoding != dwarf::DW_EH_PE_omit && Per);
+                               PerEncoding != dwarf::DW_EH_PE_omit && PerFn);
 
   unsigned LSDAEncoding = TLOF.getLSDAEncoding();
   shouldEmitLSDA = shouldEmitPersonality &&
@@ -90,7 +94,16 @@ void WinException::beginFunction(const MachineFunction *MF) {
 
   // If we're not using CFI, we don't want the CFI or the personality, but we
   // might want EH tables if we had EH pads.
-  if (!Asm->MAI->usesWindowsCFI()) {
+  if (!Asm->MAI->usesWindowsCFI() || (!MF->hasWinCFI() && !PerFn)) {
+    if (Per == EHPersonality::MSVC_X86SEH && !hasEHFunclets) {
+      // If this is 32-bit SEH and we don't have any funclets (really invokes),
+      // make sure we emit the parent offset label. Some unreferenced filter
+      // functions may still refer to it.
+      const WinEHFuncInfo &FuncInfo = *MF->getWinEHFuncInfo();
+      StringRef FLinkageName =
+          GlobalValue::getRealLinkageName(MF->getFunction()->getName());
+      emitEHRegistrationOffsetLabel(FuncInfo, FLinkageName);
+    }
     shouldEmitLSDA = hasEHFunclets;
     shouldEmitPersonality = false;
     return;
@@ -108,18 +121,20 @@ void WinException::endFunction(const MachineFunction *MF) {
   const Function *F = MF->getFunction();
   EHPersonality Per = EHPersonality::Unknown;
   if (F->hasPersonalityFn())
-    Per = classifyEHPersonality(F->getPersonalityFn());
+    Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
 
   // Get rid of any dead landing pads if we're not using funclets. In funclet
   // schemes, the landing pad is not actually reachable. It only exists so
   // that we can emit the right table data.
-  if (!isFuncletEHPersonality(Per))
-    MMI->TidyLandingPads();
+  if (!isFuncletEHPersonality(Per)) {
+    MachineFunction *NonConstMF = const_cast<MachineFunction*>(MF);
+    NonConstMF->tidyLandingPads();
+  }
 
   endFunclet();
 
   // endFunclet will emit the necessary .xdata tables for x64 SEH.
-  if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets())
+  if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
     return;
 
   if (shouldEmitPersonality || shouldEmitLSDA) {
@@ -147,7 +162,7 @@ void WinException::endFunction(const MachineFunction *MF) {
   }
 }
 
-/// Retreive the MCSymbol for a GlobalValue or MachineBasicBlock.
+/// Retrieve the MCSymbol for a GlobalValue or MachineBasicBlock.
 static MCSymbol *getMCSymbolForMBB(AsmPrinter *Asm,
                                    const MachineBasicBlock *MBB) {
   if (!MBB)
@@ -193,8 +208,10 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
   }
 
   // Mark 'Sym' as starting our funclet.
-  if (shouldEmitMoves || shouldEmitPersonality)
+  if (shouldEmitMoves || shouldEmitPersonality) {
+    CurrentFuncletTextSection = Asm->OutStreamer->getCurrentSectionOnly();
     Asm->OutStreamer->EmitWinCFIStartProc(Sym);
+  }
 
   if (shouldEmitPersonality) {
     const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
@@ -204,16 +221,14 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
     if (F->hasPersonalityFn())
       PerFn = dyn_cast<Function>(F->getPersonalityFn()->stripPointerCasts());
     const MCSymbol *PersHandlerSym =
-        TLOF.getCFIPersonalitySymbol(PerFn, *Asm->Mang, Asm->TM, MMI);
-
-    // Classify the personality routine so that we may reason about it.
-    EHPersonality Per = EHPersonality::Unknown;
-    if (F->hasPersonalityFn())
-      Per = classifyEHPersonality(F->getPersonalityFn());
-
-    // Do not emit a .seh_handler directive if it is a C++ cleanup funclet.
-    if (Per != EHPersonality::MSVC_CXX ||
-        !CurrentFuncletEntry->isCleanupFuncletEntry())
+        TLOF.getCFIPersonalitySymbol(PerFn, Asm->TM, MMI);
+
+    // Do not emit a .seh_handler directives for cleanup funclets.
+    // FIXME: This means cleanup funclets cannot handle exceptions. Given that
+    // Clang doesn't produce EH constructs inside cleanup funclets and LLVM's
+    // inliner doesn't allow inlining them, this isn't a major problem in
+    // practice.
+    if (!CurrentFuncletEntry->isCleanupFuncletEntry())
       Asm->OutStreamer->EmitWinEHHandler(PersHandlerSym, true, true);
   }
 }
@@ -223,15 +238,12 @@ void WinException::endFunclet() {
   if (!CurrentFuncletEntry)
     return;
 
+  const MachineFunction *MF = Asm->MF;
   if (shouldEmitMoves || shouldEmitPersonality) {
-    const Function *F = Asm->MF->getFunction();
+    const Function *F = MF->getFunction();
     EHPersonality Per = EHPersonality::Unknown;
     if (F->hasPersonalityFn())
-      Per = classifyEHPersonality(F->getPersonalityFn());
-
-    // The .seh_handlerdata directive implicitly switches section, push the
-    // current section so that we may return to it.
-    Asm->OutStreamer->PushSection();
+      Per = classifyEHPersonality(F->getPersonalityFn()->stripPointerCasts());
 
     // Emit an UNWIND_INFO struct describing the prologue.
     Asm->OutStreamer->EmitWinEHHandlerData();
@@ -244,18 +256,17 @@ void WinException::endFunclet() {
       MCSymbol *FuncInfoXData = Asm->OutContext.getOrCreateSymbol(
           Twine("$cppxdata$", FuncLinkageName));
       Asm->OutStreamer->EmitValue(create32bitRef(FuncInfoXData), 4);
-    } else if (Per == EHPersonality::MSVC_Win64SEH && MMI->hasEHFunclets() &&
+    } else if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets() &&
                !CurrentFuncletEntry->isEHFuncletEntry()) {
       // If this is the parent function in Win64 SEH, emit the LSDA immediately
       // following .seh_handlerdata.
-      emitCSpecificHandlerTable(Asm->MF);
+      emitCSpecificHandlerTable(MF);
     }
 
-    // Switch back to the previous section now that we are done writing to
-    // .xdata.
-    Asm->OutStreamer->PopSection();
-
-    // Emit a .seh_endproc directive to mark the end of the function.
+    // Switch back to the funclet start .text section now that we are done
+    // writing to .xdata, and emit an .seh_endproc directive to mark the end of
+    // the function.
+    Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
     Asm->OutStreamer->EmitWinCFIEndProc();
   }
 
@@ -905,15 +916,24 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
   // registration in order to recover the parent frame pointer. Now that we know
   // we've code generated the parent, we can emit the label assignment that
   // those helpers use to get the offset of the registration node.
+
+  // Compute the parent frame offset. The EHRegNodeFrameIndex will be invalid if
+  // after optimization all the invokes were eliminated. We still need to emit
+  // the parent frame offset label, but it should be garbage and should never be
+  // used.
+  int64_t Offset = 0;
+  int FI = FuncInfo.EHRegNodeFrameIndex;
+  if (FI != INT_MAX) {
+    const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
+    unsigned UnusedReg;
+    Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
+  }
+
   MCContext &Ctx = Asm->OutContext;
   MCSymbol *ParentFrameOffset =
       Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
-  unsigned UnusedReg;
-  const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
-  int64_t Offset = TFI->getFrameIndexReference(
-      *Asm->MF, FuncInfo.EHRegNodeFrameIndex, UnusedReg);
-  const MCExpr *MCOffset = MCConstantExpr::create(Offset, Ctx);
-  Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+  Asm->OutStreamer->EmitAssignment(ParentFrameOffset,
+                                   MCConstantExpr::create(Offset, Ctx));
 }
 
 /// Emit the language-specific data that _except_handler3 and 4 expect. This is
@@ -966,11 +986,11 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
 
     // Retrieve the Guard Stack slot.
     int GSCookieOffset = -2;
-    const MachineFrameInfo *MFI = MF->getFrameInfo();
-    if (MFI->hasStackProtectorIndex()) {
+    const MachineFrameInfo &MFI = MF->getFrameInfo();
+    if (MFI.hasStackProtectorIndex()) {
       unsigned UnusedReg;
       const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
-      int SSPIdx = MFI->getStackProtectorIndex();
+      int SSPIdx = MFI.getStackProtectorIndex();
       GSCookieOffset = TFI->getFrameIndexReference(*MF, SSPIdx, UnusedReg);
     }
 
diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
index acb301016910..371061c2c2ec 100644
--- a/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
+++ b/contrib/llvm/lib/CodeGen/AsmPrinter/WinException.h
@@ -21,6 +21,7 @@ class Function;
 class GlobalValue;
 class MachineFunction;
 class MCExpr;
+class MCSection;
 class Value;
 struct WinEHFuncInfo;
 
@@ -40,6 +41,9 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
   /// Pointer to the current funclet entry BB.
   const MachineBasicBlock *CurrentFuncletEntry = nullptr;
 
+  /// The section of the last funclet start.
+  MCSection *CurrentFuncletTextSection = nullptr;
+
   void emitCSpecificHandlerTable(const MachineFunction *MF);
 
   void emitSEHActionsForRange(const WinEHFuncInfo &FuncInfo,
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
index 5dacbf9e6b02..a898e327ccc2 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp
@@ -49,6 +49,7 @@ STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumBranchOpts, "Number of branches optimized");
 STATISTIC(NumTailMerge , "Number of block tails merged");
 STATISTIC(NumHoist     , "Number of times common instructions are hoisted");
+STATISTIC(NumTailCalls,  "Number of tail calls optimized");
 
 static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
                               cl::init(cl::BOU_UNSET), cl::Hidden);
@@ -110,9 +111,12 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
 
 BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
                            MBFIWrapper &FreqInfo,
-                           const MachineBranchProbabilityInfo &ProbInfo)
-    : EnableHoistCommonCode(CommonHoist), MBBFreqInfo(FreqInfo),
-      MBPI(ProbInfo) {
+                           const MachineBranchProbabilityInfo &ProbInfo,
+                           unsigned MinTailLength)
+    : EnableHoistCommonCode(CommonHoist), MinCommonTailLength(MinTailLength),
+      MBBFreqInfo(FreqInfo), MBPI(ProbInfo) {
+  if (MinCommonTailLength == 0)
+    MinCommonTailLength = TailMergeSize;
   switch (FlagEnableTailMerge) {
   case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
   case cl::BOU_TRUE: EnableTailMerge = true; break;
@@ -141,59 +145,6 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
     MLI->removeBlock(MBB);
 }
 
-/// OptimizeImpDefsBlock - If a basic block is just a bunch of implicit_def
-/// followed by terminators, and if the implicitly defined registers are not
-/// used by the terminators, remove those implicit_def's. e.g.
-/// BB1:
-///   r0 = implicit_def
-///   r1 = implicit_def
-///   br
-/// This block can be optimized away later if the implicit instructions are
-/// removed.
-bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) {
-  SmallSet<unsigned, 4> ImpDefRegs;
-  MachineBasicBlock::iterator I = MBB->begin();
-  while (I != MBB->end()) {
-    if (!I->isImplicitDef())
-      break;
-    unsigned Reg = I->getOperand(0).getReg();
-    if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
-      for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
-           SubRegs.isValid(); ++SubRegs)
-        ImpDefRegs.insert(*SubRegs);
-    } else {
-      ImpDefRegs.insert(Reg);
-    }
-    ++I;
-  }
-  if (ImpDefRegs.empty())
-    return false;
-
-  MachineBasicBlock::iterator FirstTerm = I;
-  while (I != MBB->end()) {
-    if (!TII->isUnpredicatedTerminator(*I))
-      return false;
-    // See if it uses any of the implicitly defined registers.
-    for (const MachineOperand &MO : I->operands()) {
-      if (!MO.isReg() || !MO.isUse())
-        continue;
-      unsigned Reg = MO.getReg();
-      if (ImpDefRegs.count(Reg))
-        return false;
-    }
-    ++I;
-  }
-
-  I = MBB->begin();
-  while (I != FirstTerm) {
-    MachineInstr *ImpDefMI = &*I;
-    ++I;
-    MBB->erase(ImpDefMI);
-  }
-
-  return true;
-}
-
 /// OptimizeFunction - Perhaps branch folding, tail merging and other
 /// CFG optimizations on the given function.  Block placement changes the layout
 /// and may create new tail merging opportunities.
@@ -224,7 +175,6 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
     SmallVector<MachineOperand, 4> Cond;
     if (!TII->analyzeBranch(MBB, TBB, FBB, Cond, true))
       MadeChange |= MBB.CorrectExtraCFGEdges(TBB, FBB, !Cond.empty());
-    MadeChange |= OptimizeImpDefsBlock(&MBB);
   }
 
   // Recalculate funclet membership.
@@ -399,37 +349,16 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
   return TailLen;
 }
 
-void BranchFolder::computeLiveIns(MachineBasicBlock &MBB) {
-  if (!UpdateLiveIns)
-    return;
-
-  LiveRegs.init(TRI);
-  LiveRegs.addLiveOutsNoPristines(MBB);
-  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
-    LiveRegs.stepBackward(MI);
-
-  for (unsigned Reg : LiveRegs) {
-    // Skip the register if we are about to add one of its super registers.
-    bool ContainsSuperReg = false;
-    for (MCSuperRegIterator SReg(Reg, TRI); SReg.isValid(); ++SReg) {
-      if (LiveRegs.contains(*SReg)) {
-        ContainsSuperReg = true;
-        break;
-      }
-    }
-    if (ContainsSuperReg)
-      continue;
-    MBB.addLiveIn(Reg);
-  }
-}
-
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
 void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                            MachineBasicBlock *NewDest) {
   TII->ReplaceTailWithBranchTo(OldInst, NewDest);
 
-  computeLiveIns(*NewDest);
+  if (UpdateLiveIns) {
+    NewDest->clearLiveIns();
+    computeLiveIns(LiveRegs, *TRI, *NewDest);
+  }
 
   ++NumTailMerge;
 }
@@ -467,7 +396,8 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
   // NewMBB inherits CurMBB's block frequency.
   MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
 
-  computeLiveIns(*NewMBB);
+  if (UpdateLiveIns)
+    computeLiveIns(LiveRegs, *TRI, *NewMBB);
 
   // Add the new block to the funclet.
   const auto &FuncletI = FuncletMembership.find(&CurMBB);
@@ -511,14 +441,14 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
   if (I != MF->end() && !TII->analyzeBranch(*CurMBB, TBB, FBB, Cond, true)) {
     MachineBasicBlock *NextBB = &*I;
     if (TBB == NextBB && !Cond.empty() && !FBB) {
-      if (!TII->ReverseBranchCondition(Cond)) {
-        TII->RemoveBranch(*CurMBB);
-        TII->InsertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
+      if (!TII->reverseBranchCondition(Cond)) {
+        TII->removeBranch(*CurMBB);
+        TII->insertBranch(*CurMBB, SuccBB, nullptr, Cond, dl);
         return;
       }
     }
   }
-  TII->InsertBranch(*CurMBB, SuccBB, nullptr,
+  TII->insertBranch(*CurMBB, SuccBB, nullptr,
                     SmallVector<MachineOperand, 0>(), dl);
 }
 
@@ -591,13 +521,26 @@ static unsigned CountTerminators(MachineBasicBlock *MBB,
 /// and decide if it would be profitable to merge those tails.  Return the
 /// length of the common tail and iterators to the first common instruction
 /// in each block.
+/// MBB1, MBB2      The blocks to check
+/// MinCommonTailLength  Minimum size of tail block to be merged.
+/// CommonTailLen   Out parameter to record the size of the shared tail between
+///                 MBB1 and MBB2
+/// I1, I2          Iterator references that will be changed to point to the first
+///                 instruction in the common tail shared by MBB1,MBB2
+/// SuccBB          A common successor of MBB1, MBB2 which are in a canonical form
+///                 relative to SuccBB
+/// PredBB          The layout predecessor of SuccBB, if any.
+/// FuncletMembership  map from block to funclet #.
+/// AfterPlacement  True if we are merging blocks after layout. Stricter
+///                 thresholds apply to prevent undoing tail-duplication.
 static bool
 ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
-                  unsigned minCommonTailLength, unsigned &CommonTailLen,
+                  unsigned MinCommonTailLength, unsigned &CommonTailLen,
                   MachineBasicBlock::iterator &I1,
                   MachineBasicBlock::iterator &I2, MachineBasicBlock *SuccBB,
                   MachineBasicBlock *PredBB,
-                  DenseMap<const MachineBasicBlock *, int> &FuncletMembership) {
+                  DenseMap<const MachineBasicBlock *, int> &FuncletMembership,
+                  bool AfterPlacement) {
   // It is never profitable to tail-merge blocks from two different funclets.
   if (!FuncletMembership.empty()) {
     auto Funclet1 = FuncletMembership.find(MBB1);
@@ -617,7 +560,11 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
 
   // It's almost always profitable to merge any number of non-terminator
   // instructions with the block that falls through into the common successor.
-  if (MBB1 == PredBB || MBB2 == PredBB) {
+  // This is true only for a single successor. For multiple successors, we are
+  // trading a conditional branch for an unconditional one.
+  // TODO: Re-visit successor size for non-layout tail merging.
+  if ((MBB1 == PredBB || MBB2 == PredBB) &&
+      (!AfterPlacement || MBB1->succ_size() == 1)) {
     MachineBasicBlock::iterator I;
     unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
     if (CommonTailLen > NumTerms)
@@ -635,15 +582,18 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
 
   // If both blocks have an unconditional branch temporarily stripped out,
   // count that as an additional common instruction for the following
-  // heuristics.
+  // heuristics. This heuristic is only accurate for single-succ blocks, so to
+  // make sure that during layout merging and duplicating don't crash, we check
+  // for that when merging during layout.
   unsigned EffectiveTailLen = CommonTailLen;
   if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+      (MBB1->succ_size() == 1 || !AfterPlacement) &&
       !MBB1->back().isBarrier() &&
       !MBB2->back().isBarrier())
     ++EffectiveTailLen;
 
   // Check if the common tail is long enough to be worthwhile.
-  if (EffectiveTailLen >= minCommonTailLength)
+  if (EffectiveTailLen >= MinCommonTailLength)
     return true;
 
   // If we are optimizing for code size, 2 instructions in common is enough if
@@ -666,7 +616,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
 /// those blocks appear in MergePotentials (where they are not necessarily
 /// consecutive).
 unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
-                                        unsigned minCommonTailLength,
+                                        unsigned MinCommonTailLength,
                                         MachineBasicBlock *SuccBB,
                                         MachineBasicBlock *PredBB) {
   unsigned maxCommonTailLength = 0U;
@@ -679,10 +629,11 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
     for (MPIterator I = std::prev(CurMPIter); I->getHash() == CurHash; --I) {
       unsigned CommonTailLen;
       if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
-                            minCommonTailLength,
+                            MinCommonTailLength,
                             CommonTailLen, TrialBBI1, TrialBBI2,
                             SuccBB, PredBB,
-                            FuncletMembership)) {
+                            FuncletMembership,
+                            AfterBlockPlacement)) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;
@@ -749,8 +700,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
     SameTails[commonTailIndex].getTailStartPos();
   MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
 
-  // If the common tail includes any debug info we will take it pretty
-  // randomly from one of the inputs.  Might be better to remove it?
   DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
                << maxCommonTailLength);
 
@@ -832,14 +781,13 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
 // branch to Succ added (but the predecessor/successor lists need no
 // adjustment). The lone predecessor of Succ that falls through into Succ,
 // if any, is given in PredBB.
+// MinCommonTailLength - Except for the special cases below, tail-merge if
+// there are at least this many instructions in common.
 bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
-                                      MachineBasicBlock *PredBB) {
+                                      MachineBasicBlock *PredBB,
+                                      unsigned MinCommonTailLength) {
   bool MadeChange = false;
 
-  // Except for the special cases below, tail-merge if there are at least
-  // this many instructions in common.
-  unsigned minCommonTailLength = TailMergeSize;
-
   DEBUG(dbgs() << "\nTryTailMergeBlocks: ";
         for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
           dbgs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
@@ -852,8 +800,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
                    << PredBB->getNumber() << "\n";
         }
         dbgs() << "Looking for common tails of at least "
-               << minCommonTailLength << " instruction"
-               << (minCommonTailLength == 1 ? "" : "s") << '\n';
+               << MinCommonTailLength << " instruction"
+               << (MinCommonTailLength == 1 ? "" : "s") << '\n';
        );
 
   // Sort by hash value so that blocks with identical end sequences sort
@@ -867,10 +815,10 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
     // Build SameTails, identifying the set of blocks with this hash code
     // and with the maximum number of instructions in common.
     unsigned maxCommonTailLength = ComputeSameTails(CurHash,
-                                                    minCommonTailLength,
+                                                    MinCommonTailLength,
                                                     SuccBB, PredBB);
 
-    // If we didn't find any pair that has at least minCommonTailLength
+    // If we didn't find any pair that has at least MinCommonTailLength
     // instructions in common, remove all blocks with this hash code and retry.
     if (SameTails.empty()) {
       RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -928,6 +876,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
     // Recompute common tail MBB's edge weights and block frequency.
     setCommonTailEdgeWeights(*MBB);
 
+    // Remove the original debug location from the common tail.
+    for (auto &MI : *MBB)
+      if (!MI.isDebugValue())
+        MI.setDebugLoc(DebugLoc());
+
     // MBB is common tail.  Adjust all other BB's to jump to this one.
     // Traversal must be forwards so erases work.
     DEBUG(dbgs() << "\nUsing common tail in BB#" << MBB->getNumber()
@@ -976,7 +929,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
 
     // See if we can do any tail merging on those.
     if (MergePotentials.size() >= 2)
-      MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
+      MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
   }
 
   // Look at blocks (IBB) with multiple predecessors (PBB).
@@ -1056,7 +1009,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // branch.
         SmallVector<MachineOperand, 4> NewCond(Cond);
         if (!Cond.empty() && TBB == IBB) {
-          if (TII->ReverseBranchCondition(NewCond))
+          if (TII->reverseBranchCondition(NewCond))
             continue;
           // This is the QBB case described above
           if (!FBB) {
@@ -1092,10 +1045,10 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         // Remove the unconditional branch at the end, if any.
         if (TBB && (Cond.empty() || FBB)) {
           DebugLoc dl;  // FIXME: this is nowhere
-          TII->RemoveBranch(*PBB);
+          TII->removeBranch(*PBB);
           if (!Cond.empty())
             // reinsert conditional branch only, for now
-            TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
+            TII->insertBranch(*PBB, (TBB == IBB) ? FBB : TBB, nullptr,
                               NewCond, dl);
         }
 
@@ -1110,7 +1063,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
         TriedMerging.insert(MergePotentials[i].getBlock());
 
     if (MergePotentials.size() >= 2)
-      MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+      MadeChange |= TryTailMergeBlocks(IBB, PredBB, MinCommonTailLength);
 
     // Reinsert an unconditional branch if needed. The 1 below can occur as a
     // result of removing blocks in TryTailMergeBlocks.
@@ -1311,10 +1264,10 @@ ReoptimizeBlock:
     // a fall-through.
     if (PriorTBB && PriorTBB == PriorFBB) {
       DebugLoc dl = getBranchDebugLoc(PrevBB);
-      TII->RemoveBranch(PrevBB);
+      TII->removeBranch(PrevBB);
       PriorCond.clear();
       if (PriorTBB != MBB)
-        TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+        TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1359,7 +1312,7 @@ ReoptimizeBlock:
     // If the previous branch *only* branches to *this* block (conditional or
     // not) remove the branch.
     if (PriorTBB == MBB && !PriorFBB) {
-      TII->RemoveBranch(PrevBB);
+      TII->removeBranch(PrevBB);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1369,8 +1322,8 @@ ReoptimizeBlock:
     // the condition is false, remove the uncond second branch.
     if (PriorFBB == MBB) {
       DebugLoc dl = getBranchDebugLoc(PrevBB);
-      TII->RemoveBranch(PrevBB);
-      TII->InsertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
+      TII->removeBranch(PrevBB);
+      TII->insertBranch(PrevBB, PriorTBB, nullptr, PriorCond, dl);
       MadeChange = true;
       ++NumBranchOpts;
       goto ReoptimizeBlock;
@@ -1381,10 +1334,10 @@ ReoptimizeBlock:
     // fall-through.
     if (PriorTBB == MBB) {
       SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
-      if (!TII->ReverseBranchCondition(NewPriorCond)) {
+      if (!TII->reverseBranchCondition(NewPriorCond)) {
         DebugLoc dl = getBranchDebugLoc(PrevBB);
-        TII->RemoveBranch(PrevBB);
-        TII->InsertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
+        TII->removeBranch(PrevBB);
+        TII->insertBranch(PrevBB, PriorFBB, nullptr, NewPriorCond, dl);
         MadeChange = true;
         ++NumBranchOpts;
         goto ReoptimizeBlock;
@@ -1416,13 +1369,13 @@ ReoptimizeBlock:
       if (DoTransform) {
         // Reverse the branch so we will fall through on the previous true cond.
         SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
-        if (!TII->ReverseBranchCondition(NewPriorCond)) {
+        if (!TII->reverseBranchCondition(NewPriorCond)) {
           DEBUG(dbgs() << "\nMoving MBB: " << *MBB
                        << "To make fallthrough to: " << *PriorTBB << "\n");
 
           DebugLoc dl = getBranchDebugLoc(PrevBB);
-          TII->RemoveBranch(PrevBB);
-          TII->InsertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
+          TII->removeBranch(PrevBB);
+          TII->insertBranch(PrevBB, MBB, nullptr, NewPriorCond, dl);
 
           // Move this block to the end of the function.
           MBB->moveAfter(&MF.back());
@@ -1434,6 +1387,42 @@ ReoptimizeBlock:
     }
   }
 
+  if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
+      MF.getFunction()->optForSize()) {
+    // Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
+    // direction, thereby defeating careful block placement and regressing
+    // performance. Therefore, only consider this for optsize functions.
+    MachineInstr &TailCall = *MBB->getFirstNonDebugInstr();
+    if (TII->isUnconditionalTailCall(TailCall)) {
+      MachineBasicBlock *Pred = *MBB->pred_begin();
+      MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+      SmallVector<MachineOperand, 4> PredCond;
+      bool PredAnalyzable =
+          !TII->analyzeBranch(*Pred, PredTBB, PredFBB, PredCond, true);
+
+      if (PredAnalyzable && !PredCond.empty() && PredTBB == MBB) {
+        // The predecessor has a conditional branch to this block which consists
+        // of only a tail call. Try to fold the tail call into the conditional
+        // branch.
+        if (TII->canMakeTailCallConditional(PredCond, TailCall)) {
+          // TODO: It would be nice if analyzeBranch() could provide a pointer
+          // to the branch insturction so replaceBranchWithTailCall() doesn't
+          // have to search for it.
+          TII->replaceBranchWithTailCall(*Pred, PredCond, TailCall);
+          ++NumTailCalls;
+          Pred->removeSuccessor(MBB);
+          MadeChange = true;
+          return MadeChange;
+        }
+      }
+      // If the predecessor is falling through to this block, we could reverse
+      // the branch condition and fold the tail call into that. However, after
+      // that we might have to re-arrange the CFG to fall through to the other
+      // block and there is a high risk of regressing code size rather than
+      // improving it.
+    }
+  }
+
   // Analyze the branch in the current block.
   MachineBasicBlock *CurTBB = nullptr, *CurFBB = nullptr;
   SmallVector<MachineOperand, 4> CurCond;
@@ -1450,10 +1439,10 @@ ReoptimizeBlock:
     //    Loop: xxx; jncc Loop; jmp Out
     if (CurTBB && CurFBB && CurFBB == MBB && CurTBB != MBB) {
       SmallVector<MachineOperand, 4> NewCond(CurCond);
-      if (!TII->ReverseBranchCondition(NewCond)) {
+      if (!TII->reverseBranchCondition(NewCond)) {
         DebugLoc dl = getBranchDebugLoc(*MBB);
-        TII->RemoveBranch(*MBB);
-        TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
+        TII->removeBranch(*MBB);
+        TII->insertBranch(*MBB, CurFBB, CurTBB, NewCond, dl);
         MadeChange = true;
         ++NumBranchOpts;
         goto ReoptimizeBlock;
@@ -1469,7 +1458,7 @@ ReoptimizeBlock:
       // This block may contain just an unconditional branch.  Because there can
       // be 'non-branch terminators' in the block, try removing the branch and
       // then seeing if the block is empty.
-      TII->RemoveBranch(*MBB);
+      TII->removeBranch(*MBB);
       // If the only things remaining in the block are debug info, remove these
       // as well, so this will behave the same as an empty block in non-debug
       // mode.
@@ -1500,8 +1489,8 @@ ReoptimizeBlock:
               PriorFBB = MBB;
             }
             DebugLoc pdl = getBranchDebugLoc(PrevBB);
-            TII->RemoveBranch(PrevBB);
-            TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
+            TII->removeBranch(PrevBB);
+            TII->insertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, pdl);
           }
 
           // Iterate through all the predecessors, revectoring each in-turn.
@@ -1526,9 +1515,9 @@ ReoptimizeBlock:
                   *PMBB, NewCurTBB, NewCurFBB, NewCurCond, true);
               if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
                 DebugLoc pdl = getBranchDebugLoc(*PMBB);
-                TII->RemoveBranch(*PMBB);
+                TII->removeBranch(*PMBB);
                 NewCurCond.clear();
-                TII->InsertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
+                TII->insertBranch(*PMBB, NewCurTBB, nullptr, NewCurCond, pdl);
                 MadeChange = true;
                 ++NumBranchOpts;
                 PMBB->CorrectExtraCFGEdges(NewCurTBB, nullptr, false);
@@ -1548,7 +1537,7 @@ ReoptimizeBlock:
       }
 
       // Add the branch back if the block is more than just an uncond branch.
-      TII->InsertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
+      TII->insertBranch(*MBB, CurTBB, nullptr, CurCond, dl);
     }
   }
 
@@ -1585,7 +1574,7 @@ ReoptimizeBlock:
           if (CurFallsThru) {
             MachineBasicBlock *NextBB = &*std::next(MBB->getIterator());
             CurCond.clear();
-            TII->InsertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
+            TII->insertBranch(*MBB, NextBB, nullptr, CurCond, DebugLoc());
           }
           MBB->moveAfter(PredBB);
           MadeChange = true;
@@ -1615,18 +1604,22 @@ ReoptimizeBlock:
 
       // Okay, there is no really great place to put this block.  If, however,
       // the block before this one would be a fall-through if this block were
-      // removed, move this block to the end of the function.
+      // removed, move this block to the end of the function. There is no real
+      // advantage in "falling through" to an EH block, so we don't want to
+      // perform this transformation for that case.
+      //
+      // Also, Windows EH introduced the possibility of an arbitrary number of
+      // successors to a given block.  The analyzeBranch call does not consider
+      // exception handling and so we can get in a state where a block
+      // containing a call is followed by multiple EH blocks that would be
+      // rotated infinitely at the end of the function if the transformation
+      // below were performed for EH "FallThrough" blocks.  Therefore, even if
+      // that appears not to be happening anymore, we should assume that it is
+      // possible and not remove the "!FallThrough()->isEHPad" condition below.
       MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr;
       SmallVector<MachineOperand, 4> PrevCond;
-      // We're looking for cases where PrevBB could possibly fall through to
-      // FallThrough, but if FallThrough is an EH pad that wouldn't be useful
-      // so here we skip over any EH pads so we might have a chance to find
-      // a branch target from PrevBB.
-      while (FallThrough != MF.end() && FallThrough->isEHPad())
-        ++FallThrough;
-      // Now check to see if the current block is sitting between PrevBB and
-      // a block to which it could fall through.
       if (FallThrough != MF.end() &&
+          !FallThrough->isEHPad() &&
           !TII->analyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
           PrevBB.isSuccessor(&*FallThrough)) {
         MBB->moveAfter(&MF.back());
@@ -1720,10 +1713,8 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
 
   // The terminator is probably a conditional branch, try not to separate the
   // branch from condition setting instruction.
-  MachineBasicBlock::iterator PI = Loc;
-  --PI;
-  while (PI != MBB->begin() && PI->isDebugValue())
-    --PI;
+  MachineBasicBlock::iterator PI =
+    skipDebugInstructionsBackward(std::prev(Loc), MBB->begin());
 
   bool IsDef = false;
   for (const MachineOperand &MO : PI->operands()) {
@@ -1817,18 +1808,11 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
   MachineBasicBlock::iterator FIE = FBB->end();
   while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
-    if (TIB->isDebugValue()) {
-      while (TIB != TIE && TIB->isDebugValue())
-        ++TIB;
-      if (TIB == TIE)
-        break;
-    }
-    if (FIB->isDebugValue()) {
-      while (FIB != FIE && FIB->isDebugValue())
-        ++FIB;
-      if (FIB == FIE)
-        break;
-    }
+    TIB = skipDebugInstructionsForward(TIB, TIE);
+    FIB = skipDebugInstructionsForward(FIB, FIE);
+    if (TIB == TIE || FIB == FIE)
+      break;
+
     if (!TIB->isIdenticalTo(*FIB, MachineInstr::CheckKillDead))
       break;
 
@@ -1929,14 +1913,21 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
   FBB->erase(FBB->begin(), FIB);
 
   // Update livein's.
+  bool AddedLiveIns = false;
   for (unsigned i = 0, e = LocalDefs.size(); i != e; ++i) {
     unsigned Def = LocalDefs[i];
     if (LocalDefsSet.count(Def)) {
       TBB->addLiveIn(Def);
       FBB->addLiveIn(Def);
+      AddedLiveIns = true;
     }
   }
 
+  if (AddedLiveIns) {
+    TBB->sortUniqueLiveIns();
+    FBB->sortUniqueLiveIns();
+  }
+
   ++NumHoist;
   return true;
 }
diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h
index 36a5a2e2c97c..fc48e484292d 100644
--- a/contrib/llvm/lib/CodeGen/BranchFolding.h
+++ b/contrib/llvm/lib/CodeGen/BranchFolding.h
@@ -29,9 +29,13 @@ namespace llvm {
   public:
     class MBFIWrapper;
 
-    explicit BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
+    explicit BranchFolder(bool defaultEnableTailMerge,
+                          bool CommonHoist,
                           MBFIWrapper &MBFI,
-                          const MachineBranchProbabilityInfo &MBPI);
+                          const MachineBranchProbabilityInfo &MBPI,
+                          // Min tail length to merge. Defaults to commandline
+                          // flag. Ignored for optsize.
+                          unsigned MinCommonTailLength = 0);
 
     bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
                           const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
@@ -99,6 +103,7 @@ namespace llvm {
     bool EnableTailMerge;
     bool EnableHoistCommonCode;
     bool UpdateLiveIns;
+    unsigned MinCommonTailLength;
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
     MachineModuleInfo *MMI;
@@ -129,9 +134,9 @@ namespace llvm {
 
     bool TailMergeBlocks(MachineFunction &MF);
     bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
-                       MachineBasicBlock* PredBB);
+                       MachineBasicBlock* PredBB,
+                       unsigned MinCommonTailLength);
     void setCommonTailEdgeWeights(MachineBasicBlock &TailMBB);
-    void computeLiveIns(MachineBasicBlock &MBB);
     void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
                                  MachineBasicBlock *NewDest);
     MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
@@ -150,7 +155,6 @@ namespace llvm {
     bool OptimizeBranches(MachineFunction &MF);
     bool OptimizeBlock(MachineBasicBlock *MBB);
     void RemoveDeadBlock(MachineBasicBlock *MBB);
-    bool OptimizeImpDefsBlock(MachineBasicBlock *MBB);
 
     bool HoistCommonCode(MachineFunction &MF);
     bool HoistCommonCodeInSuccs(MachineBasicBlock *MBB);
diff --git a/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
new file mode 100644
index 000000000000..8b27570a17f4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/BranchRelaxation.cpp
@@ -0,0 +1,510 @@
+//===-- BranchRelaxation.cpp ----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "branch-relaxation"
+
+STATISTIC(NumSplit, "Number of basic blocks split");
+STATISTIC(NumConditionalRelaxed, "Number of conditional branches relaxed");
+STATISTIC(NumUnconditionalRelaxed, "Number of unconditional branches relaxed");
+
+#define BRANCH_RELAX_NAME "Branch relaxation pass"
+
+namespace {
+class BranchRelaxation : public MachineFunctionPass {
+  /// BasicBlockInfo - Information about the offset and size of a single
+  /// basic block.
+  struct BasicBlockInfo {
+    /// Offset - Distance from the beginning of the function to the beginning
+    /// of this basic block.
+    ///
+    /// The offset is always aligned as required by the basic block.
+    unsigned Offset;
+
+    /// Size - Size of the basic block in bytes.  If the block contains
+    /// inline assembly, this is a worst case estimate.
+    ///
+    /// The size does not include any alignment padding whether from the
+    /// beginning of the block, or from an aligned jump table at the end.
+    unsigned Size;
+
+    BasicBlockInfo() : Offset(0), Size(0) {}
+
+    /// Compute the offset immediately following this block. \p MBB is the next
+    /// block.
+    unsigned postOffset(const MachineBasicBlock &MBB) const {
+      unsigned PO = Offset + Size;
+      unsigned Align = MBB.getAlignment();
+      if (Align == 0)
+        return PO;
+
+      unsigned AlignAmt = 1 << Align;
+      unsigned ParentAlign = MBB.getParent()->getAlignment();
+      if (Align <= ParentAlign)
+        return PO + OffsetToAlignment(PO, AlignAmt);
+
+      // The alignment of this MBB is larger than the function's alignment, so we
+      // can't tell whether or not it will insert nops. Assume that it will.
+      return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt);
+    }
+  };
+
+  SmallVector<BasicBlockInfo, 16> BlockInfo;
+  std::unique_ptr<RegScavenger> RS;
+  LivePhysRegs LiveRegs;
+
+  MachineFunction *MF;
+  const TargetRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  bool relaxBranchInstructions();
+  void scanFunction();
+
+  MachineBasicBlock *createNewBlockAfter(MachineBasicBlock &BB);
+
+  MachineBasicBlock *splitBlockBeforeInstr(MachineInstr &MI,
+                                           MachineBasicBlock *DestBB);
+  void adjustBlockOffsets(MachineBasicBlock &MBB);
+  bool isBlockInRange(const MachineInstr &MI, const MachineBasicBlock &BB) const;
+
+  bool fixupConditionalBranch(MachineInstr &MI);
+  bool fixupUnconditionalBranch(MachineInstr &MI);
+  uint64_t computeBlockSize(const MachineBasicBlock &MBB) const;
+  unsigned getInstrOffset(const MachineInstr &MI) const;
+  void dumpBBs();
+  void verify();
+
+public:
+  static char ID;
+  BranchRelaxation() : MachineFunctionPass(ID) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return BRANCH_RELAX_NAME;
+  }
+};
+
+}
+
+char BranchRelaxation::ID = 0;
+char &llvm::BranchRelaxationPassID = BranchRelaxation::ID;
+
+INITIALIZE_PASS(BranchRelaxation, DEBUG_TYPE, BRANCH_RELAX_NAME, false, false)
+
+/// verify - check BBOffsets, BBSizes, alignment of islands
+void BranchRelaxation::verify() {
+#ifndef NDEBUG
+  unsigned PrevNum = MF->begin()->getNumber();
+  for (MachineBasicBlock &MBB : *MF) {
+    unsigned Align = MBB.getAlignment();
+    unsigned Num = MBB.getNumber();
+    assert(BlockInfo[Num].Offset % (1u << Align) == 0);
+    assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
+    assert(BlockInfo[Num].Size == computeBlockSize(MBB));
+    PrevNum = Num;
+  }
+#endif
+}
+
+/// print block size and offset information - debugging
+void BranchRelaxation::dumpBBs() {
+  for (auto &MBB : *MF) {
+    const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
+    dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+           << format("size=%#x\n", BBI.Size);
+  }
+}
+
+/// scanFunction - Do the initial scan of the function, building up
+/// information about each block.
+void BranchRelaxation::scanFunction() {
+  BlockInfo.clear();
+  BlockInfo.resize(MF->getNumBlockIDs());
+
+  // First thing, compute the size of all basic blocks, and see if the function
+  // has any inline assembly in it. If so, we have to be conservative about
+  // alignment assumptions, as we don't know for sure the size of any
+  // instructions in the inline assembly.
+  for (MachineBasicBlock &MBB : *MF)
+    BlockInfo[MBB.getNumber()].Size = computeBlockSize(MBB);
+
+  // Compute block offsets and known bits.
+  adjustBlockOffsets(*MF->begin());
+}
+
+/// computeBlockSize - Compute the size for MBB.
+uint64_t BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) const {
+  uint64_t Size = 0;
+  for (const MachineInstr &MI : MBB)
+    Size += TII->getInstSizeInBytes(MI);
+  return Size;
+}
+
+/// getInstrOffset - Return the current offset of the specified machine
+/// instruction from the start of the function.  This offset changes as stuff is
+/// moved around inside the function.
+unsigned BranchRelaxation::getInstrOffset(const MachineInstr &MI) const {
+  const MachineBasicBlock *MBB = MI.getParent();
+
+  // The offset is composed of two things: the sum of the sizes of all MBB's
+  // before this instruction's block, and the offset from the start of the block
+  // it is in.
+  unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
+
+  // Sum instructions before MI in MBB.
+  for (MachineBasicBlock::const_iterator I = MBB->begin(); &*I != &MI; ++I) {
+    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
+    Offset += TII->getInstSizeInBytes(*I);
+  }
+
+  return Offset;
+}
+
+void BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
+  unsigned PrevNum = Start.getNumber();
+  for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
+    unsigned Num = MBB.getNumber();
+    if (!Num) // block zero is never changed from offset zero.
+      continue;
+    // Get the offset and known bits at the end of the layout predecessor.
+    // Include the alignment of the current block.
+    BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(MBB);
+
+    PrevNum = Num;
+  }
+}
+
+  /// Insert a new empty basic block and insert it after \BB
+MachineBasicBlock *BranchRelaxation::createNewBlockAfter(MachineBasicBlock &BB) {
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+      MF->CreateMachineBasicBlock(BB.getBasicBlock());
+  MF->insert(++BB.getIterator(), NewBB);
+
+  // Insert an entry into BlockInfo to align it properly with the block numbers.
+  BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+  return NewBB;
+}
+
+/// Split the basic block containing MI into two blocks, which are joined by
+/// an unconditional branch.  Update data structures and renumber blocks to
+/// account for this change and returns the newly created block.
+MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
+                                                           MachineBasicBlock *DestBB) {
+  MachineBasicBlock *OrigBB = MI.getParent();
+
+  // Create a new MBB for the code after the OrigBB.
+  MachineBasicBlock *NewBB =
+      MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
+  MF->insert(++OrigBB->getIterator(), NewBB);
+
+  // Splice the instructions starting with MI over to NewBB.
+  NewBB->splice(NewBB->end(), OrigBB, MI.getIterator(), OrigBB->end());
+
+  // Add an unconditional branch from OrigBB to NewBB.
+  // Note the new unconditional branch is not being recorded.
+  // There doesn't seem to be meaningful DebugInfo available; this doesn't
+  // correspond to anything in the source.
+  TII->insertUnconditionalBranch(*OrigBB, NewBB, DebugLoc());
+
+  // Insert an entry into BlockInfo to align it properly with the block numbers.
+  BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
+
+
+  NewBB->transferSuccessors(OrigBB);
+  OrigBB->addSuccessor(NewBB);
+  OrigBB->addSuccessor(DestBB);
+
+  // Cleanup potential unconditional branch to successor block.
+  // Note that updateTerminator may change the size of the blocks.
+  NewBB->updateTerminator();
+  OrigBB->updateTerminator();
+
+  // Figure out how large the OrigBB is.  As the first half of the original
+  // block, it cannot contain a tablejump.  The size includes
+  // the new jump we added.  (It should be possible to do this without
+  // recounting everything, but it's very confusing, and this is rarely
+  // executed.)
+  BlockInfo[OrigBB->getNumber()].Size = computeBlockSize(*OrigBB);
+
+  // Figure out how large the NewMBB is. As the second half of the original
+  // block, it may contain a tablejump.
+  BlockInfo[NewBB->getNumber()].Size = computeBlockSize(*NewBB);
+
+  // All BBOffsets following these blocks must be modified.
+  adjustBlockOffsets(*OrigBB);
+
+  // Need to fix live-in lists if we track liveness.
+  if (TRI->trackLivenessAfterRegAlloc(*MF))
+    computeLiveIns(LiveRegs, *TRI, *NewBB);
+
+  ++NumSplit;
+
+  return NewBB;
+}
+
+/// isBlockInRange - Returns true if the distance between specific MI and
+/// specific BB can fit in MI's displacement field.
+bool BranchRelaxation::isBlockInRange(
+  const MachineInstr &MI, const MachineBasicBlock &DestBB) const {
+  int64_t BrOffset = getInstrOffset(MI);
+  int64_t DestOffset = BlockInfo[DestBB.getNumber()].Offset;
+
+  if (TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - BrOffset))
+    return true;
+
+  DEBUG(
+    dbgs() << "Out of range branch to destination BB#" << DestBB.getNumber()
+           << " from BB#" << MI.getParent()->getNumber()
+           << " to " << DestOffset
+           << " offset " << DestOffset - BrOffset
+           << '\t' << MI
+  );
+
+  return false;
+}
+
+/// fixupConditionalBranch - Fix up a conditional branch whose destination is
+/// too far away to fit in its displacement field. It is converted to an inverse
+/// conditional branch + an unconditional branch to the destination.
+bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
+  DebugLoc DL = MI.getDebugLoc();
+  MachineBasicBlock *MBB = MI.getParent();
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+
+  bool Fail = TII->analyzeBranch(*MBB, TBB, FBB, Cond);
+  assert(!Fail && "branches to be relaxed must be analyzable");
+  (void)Fail;
+
+  // Add an unconditional branch to the destination and invert the branch
+  // condition to jump over it:
+  // tbz L1
+  // =>
+  // tbnz L2
+  // b   L1
+  // L2:
+
+  if (FBB && isBlockInRange(MI, *FBB)) {
+    // Last MI in the BB is an unconditional branch. We can simply invert the
+    // condition and swap destinations:
+    // beq L1
+    // b   L2
+    // =>
+    // bne L2
+    // b   L1
+    DEBUG(dbgs() << "  Invert condition and swap "
+                    "its destination with " << MBB->back());
+
+    TII->reverseBranchCondition(Cond);
+    int OldSize = 0, NewSize = 0;
+    TII->removeBranch(*MBB, &OldSize);
+    TII->insertBranch(*MBB, FBB, TBB, Cond, DL, &NewSize);
+
+    BlockInfo[MBB->getNumber()].Size += (NewSize - OldSize);
+    return true;
+  } else if (FBB) {
+    // We need to split the basic block here to obtain two long-range
+    // unconditional branches.
+    auto &NewBB = *MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+    MF->insert(++MBB->getIterator(), &NewBB);
+
+    // Insert an entry into BlockInfo to align it properly with the block
+    // numbers.
+    BlockInfo.insert(BlockInfo.begin() + NewBB.getNumber(), BasicBlockInfo());
+
+    unsigned &NewBBSize = BlockInfo[NewBB.getNumber()].Size;
+    int NewBrSize;
+    TII->insertUnconditionalBranch(NewBB, FBB, DL, &NewBrSize);
+    NewBBSize += NewBrSize;
+
+    // Update the successor lists according to the transformation to follow.
+    // Do it here since if there's no split, no update is needed.
+    MBB->replaceSuccessor(FBB, &NewBB);
+    NewBB.addSuccessor(FBB);
+  }
+
+  // We now have an appropriate fall-through block in place (either naturally or
+  // just created), so we can invert the condition.
+  MachineBasicBlock &NextBB = *std::next(MachineFunction::iterator(MBB));
+
+  DEBUG(dbgs() << "  Insert B to BB#" << TBB->getNumber()
+               << ", invert condition and change dest. to BB#"
+               << NextBB.getNumber() << '\n');
+
+  unsigned &MBBSize = BlockInfo[MBB->getNumber()].Size;
+
+  // Insert a new conditional branch and a new unconditional branch.
+  int RemovedSize = 0;
+  TII->reverseBranchCondition(Cond);
+  TII->removeBranch(*MBB, &RemovedSize);
+  MBBSize -= RemovedSize;
+
+  int AddedSize = 0;
+  TII->insertBranch(*MBB, &NextBB, TBB, Cond, DL, &AddedSize);
+  MBBSize += AddedSize;
+
+  // Finally, keep the block offsets up to date.
+  adjustBlockOffsets(*MBB);
+  return true;
+}
+
+bool BranchRelaxation::fixupUnconditionalBranch(MachineInstr &MI) {
+  MachineBasicBlock *MBB = MI.getParent();
+
+  unsigned OldBrSize = TII->getInstSizeInBytes(MI);
+  MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+
+  int64_t DestOffset = BlockInfo[DestBB->getNumber()].Offset;
+  int64_t SrcOffset = getInstrOffset(MI);
+
+  assert(!TII->isBranchOffsetInRange(MI.getOpcode(), DestOffset - SrcOffset));
+
+  BlockInfo[MBB->getNumber()].Size -= OldBrSize;
+
+  MachineBasicBlock *BranchBB = MBB;
+
+  // If this was an expanded conditional branch, there is already a single
+  // unconditional branch in a block.
+  if (!MBB->empty()) {
+    BranchBB = createNewBlockAfter(*MBB);
+
+    // Add live outs.
+    for (const MachineBasicBlock *Succ : MBB->successors()) {
+      for (const MachineBasicBlock::RegisterMaskPair &LiveIn : Succ->liveins())
+        BranchBB->addLiveIn(LiveIn);
+    }
+
+    BranchBB->sortUniqueLiveIns();
+    BranchBB->addSuccessor(DestBB);
+    MBB->replaceSuccessor(DestBB, BranchBB);
+  }
+
+  DebugLoc DL = MI.getDebugLoc();
+  MI.eraseFromParent();
+  BlockInfo[BranchBB->getNumber()].Size += TII->insertIndirectBranch(
+    *BranchBB, *DestBB, DL, DestOffset - SrcOffset, RS.get());
+
+  adjustBlockOffsets(*MBB);
+  return true;
+}
+
+bool BranchRelaxation::relaxBranchInstructions() {
+  bool Changed = false;
+
+  // Relaxing branches involves creating new basic blocks, so re-eval
+  // end() for termination.
+  for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
+    MachineBasicBlock &MBB = *I;
+
+    // Empty block?
+    MachineBasicBlock::iterator Last = MBB.getLastNonDebugInstr();
+    if (Last == MBB.end())
+      continue;
+
+    // Expand the unconditional branch first if necessary. If there is a
+    // conditional branch, this will end up changing the branch destination of
+    // it to be over the newly inserted indirect branch block, which may avoid
+    // the need to try expanding the conditional branch first, saving an extra
+    // jump.
+    if (Last->isUnconditionalBranch()) {
+      // Unconditional branch destination might be unanalyzable, assume these
+      // are OK.
+      if (MachineBasicBlock *DestBB = TII->getBranchDestBlock(*Last)) {
+        if (!isBlockInRange(*Last, *DestBB)) {
+          fixupUnconditionalBranch(*Last);
+          ++NumUnconditionalRelaxed;
+          Changed = true;
+        }
+      }
+    }
+
+    // Loop over the conditional branches.
+    MachineBasicBlock::iterator Next;
+    for (MachineBasicBlock::iterator J = MBB.getFirstTerminator();
+         J != MBB.end(); J = Next) {
+      Next = std::next(J);
+      MachineInstr &MI = *J;
+
+      if (MI.isConditionalBranch()) {
+        MachineBasicBlock *DestBB = TII->getBranchDestBlock(MI);
+        if (!isBlockInRange(MI, *DestBB)) {
+          if (Next != MBB.end() && Next->isConditionalBranch()) {
+            // If there are multiple conditional branches, this isn't an
+            // analyzable block. Split later terminators into a new block so
+            // each one will be analyzable.
+
+            splitBlockBeforeInstr(*Next, DestBB);
+          } else {
+            fixupConditionalBranch(MI);
+            ++NumConditionalRelaxed;
+          }
+
+          Changed = true;
+
+          // This may have modified all of the terminators, so start over.
+          Next = MBB.getFirstTerminator();
+        }
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+
+  DEBUG(dbgs() << "***** BranchRelaxation *****\n");
+
+  const TargetSubtargetInfo &ST = MF->getSubtarget();
+  TII = ST.getInstrInfo();
+
+  TRI = ST.getRegisterInfo();
+  if (TRI->trackLivenessAfterRegAlloc(*MF))
+    RS.reset(new RegScavenger());
+
+  // Renumber all of the machine basic blocks in the function, guaranteeing that
+  // the numbers agree with the position of the block in the function.
+  MF->RenumberBlocks();
+
+  // Do the initial scan of the function, building up information about the
+  // sizes of each block.
+  scanFunction();
+
+  DEBUG(dbgs() << "  Basic blocks before relaxation\n"; dumpBBs(););
+
+  bool MadeChange = false;
+  while (relaxBranchInstructions())
+    MadeChange = true;
+
+  // After a while, this might be made debug-only, but it is not expensive.
+  verify();
+
+  DEBUG(dbgs() << "  Basic blocks after relaxation\n\n"; dumpBBs());
+
+  BlockInfo.clear();
+
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
index 7d67bcfe5469..2e33f14c7ee3 100644
--- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
+++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp
@@ -23,6 +23,8 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+
 using namespace llvm;
 
 CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
@@ -64,6 +66,22 @@ void CCState::MarkAllocated(unsigned Reg) {
     UsedRegs[*AI/32] |= 1 << (*AI&31);
 }
 
+bool CCState::IsShadowAllocatedReg(unsigned Reg) const {
+  if (!isAllocated(Reg))
+    return false;
+
+  for (auto const &ValAssign : Locs) {
+    if (ValAssign.isRegLoc()) {
+      for (MCRegAliasIterator AI(ValAssign.getLocReg(), &TRI, true);
+           AI.isValid(); ++AI) {
+        if (*AI == Reg)
+          return false;
+      }
+    }
+  }
+  return true;
+}
+
 /// Analyze an array of argument values,
 /// incorporating info about the formals into this state.
 void
diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp
index 6679819fdef6..4cf9b138f10d 100644
--- a/contrib/llvm/lib/CodeGen/CodeGen.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,9 @@ using namespace llvm;
 void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandPass(Registry);
   initializeBranchFolderPassPass(Registry);
+  initializeBranchRelaxationPass(Registry);
   initializeCodeGenPreparePass(Registry);
+  initializeCountingFunctionInserterPass(Registry);
   initializeDeadMachineInstructionElimPass(Registry);
   initializeDetectDeadLanesPass(Registry);
   initializeDwarfEHPreparePass(Registry);
@@ -53,6 +55,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMachineLICMPass(Registry);
   initializeMachineLoopInfoPass(Registry);
   initializeMachineModuleInfoPass(Registry);
+  initializeMachinePipelinerPass(Registry);
   initializeMachinePostDominatorTreePass(Registry);
   initializeMachineSchedulerPass(Registry);
   initializeMachineSinkingPass(Registry);
@@ -68,6 +71,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializePostRASchedulerPass(Registry);
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeProcessImplicitDefsPass(Registry);
+  initializeRAGreedyPass(Registry);
   initializeRegisterCoalescerPass(Registry);
   initializeRenameIndependentSubregsPass(Registry);
   initializeShrinkWrapPass(Registry);
diff --git a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
index ede404149a1c..934b470f13b5 100644
--- a/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -17,12 +17,16 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -118,6 +122,19 @@ static cl::opt<bool> DisablePreheaderProtect(
     "disable-preheader-prot", cl::Hidden, cl::init(false),
     cl::desc("Disable protection against removing loop preheaders"));
 
+static cl::opt<bool> ProfileGuidedSectionPrefix(
+    "profile-guided-section-prefix", cl::Hidden, cl::init(true),
+    cl::desc("Use profile info to add section prefix for hot/cold functions"));
+
+static cl::opt<unsigned> FreqRatioToSkipMerge(
+    "cgp-freq-ratio-to-skip-merge", cl::Hidden, cl::init(2),
+    cl::desc("Skip merging empty blocks if (frequency of empty block) / "
+             "(frequency of destination block) is greater than this ratio"));
+
+static cl::opt<bool> ForceSplitStore(
+    "force-split-store", cl::Hidden, cl::init(false),
+    cl::desc("Force store splitting no matter what the target query says."));
+
 namespace {
 typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
 typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
@@ -130,6 +147,8 @@ class TypePromotionTransaction;
     const TargetTransformInfo *TTI;
     const TargetLibraryInfo *TLInfo;
     const LoopInfo *LI;
+    std::unique_ptr<BlockFrequencyInfo> BFI;
+    std::unique_ptr<BranchProbabilityInfo> BPI;
 
     /// As we scan instructions optimizing them, this is the next instruction
     /// to optimize. Transforms that can invalidate this should update it.
@@ -163,10 +182,11 @@ class TypePromotionTransaction;
       }
     bool runOnFunction(Function &F) override;
 
-    const char *getPassName() const override { return "CodeGen Prepare"; }
+    StringRef getPassName() const override { return "CodeGen Prepare"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       // FIXME: When we can selectively preserve passes, preserve the domtree.
+      AU.addRequired<ProfileSummaryInfoWrapperPass>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
       AU.addRequired<TargetTransformInfoWrapperPass>();
       AU.addRequired<LoopInfoWrapperPass>();
@@ -175,8 +195,11 @@ class TypePromotionTransaction;
   private:
     bool eliminateFallThrough(Function &F);
     bool eliminateMostlyEmptyBlocks(Function &F);
+    BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
     bool canMergeBlocks(const BasicBlock *BB, const BasicBlock *DestBB) const;
     void eliminateMostlyEmptyBlock(BasicBlock *BB);
+    bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
+                                       bool isPreheader);
     bool optimizeBlock(BasicBlock &BB, bool& ModifiedDT);
     bool optimizeInst(Instruction *I, bool& ModifiedDT);
     bool optimizeMemoryInst(Instruction *I, Value *Addr,
@@ -199,13 +222,15 @@ class TypePromotionTransaction;
                         unsigned CreatedInstCost);
     bool splitBranchCondition(Function &F);
     bool simplifyOffsetableRelocate(Instruction &I);
-    void stripInvariantGroupMetadata(Instruction &I);
   };
 }
 
 char CodeGenPrepare::ID = 0;
-INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare",
-                   "Optimize for code generation", false, false)
+INITIALIZE_TM_PASS_BEGIN(CodeGenPrepare, "codegenprepare",
+                         "Optimize for code generation", false, false)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_TM_PASS_END(CodeGenPrepare, "codegenprepare",
+                       "Optimize for code generation", false, false)
 
 FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) {
   return new CodeGenPrepare(TM);
@@ -221,6 +246,8 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   // Clear per function information.
   InsertedInsts.clear();
   PromotedInsts.clear();
+  BFI.reset();
+  BPI.reset();
 
   ModifiedDT = false;
   if (TM)
@@ -230,6 +257,15 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   OptSize = F.optForSize();
 
+  if (ProfileGuidedSectionPrefix) {
+    ProfileSummaryInfo *PSI =
+        getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    if (PSI->isFunctionEntryHot(&F))
+      F.setSectionPrefix(".hot");
+    else if (PSI->isFunctionEntryCold(&F))
+      F.setSectionPrefix(".cold");
+  }
+
   /// This optimization identifies DIV instructions that can be
   /// profitably bypassed and carried out with a shorter, faster divide.
   if (!OptSize && TLI && TLI->isSlowDivBypassed()) {
@@ -364,6 +400,38 @@ bool CodeGenPrepare::eliminateFallThrough(Function &F) {
   return Changed;
 }
 
+/// Find a destination block from BB if BB is mergeable empty block.
+BasicBlock *CodeGenPrepare::findDestBlockOfMergeableEmptyBlock(BasicBlock *BB) {
+  // If this block doesn't end with an uncond branch, ignore it.
+  BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+  if (!BI || !BI->isUnconditional())
+    return nullptr;
+
+  // If the instruction before the branch (skipping debug info) isn't a phi
+  // node, then other stuff is happening here.
+  BasicBlock::iterator BBI = BI->getIterator();
+  if (BBI != BB->begin()) {
+    --BBI;
+    while (isa<DbgInfoIntrinsic>(BBI)) {
+      if (BBI == BB->begin())
+        break;
+      --BBI;
+    }
+    if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
+      return nullptr;
+  }
+
+  // Do not break infinite loops.
+  BasicBlock *DestBB = BI->getSuccessor(0);
+  if (DestBB == BB)
+    return nullptr;
+
+  if (!canMergeBlocks(BB, DestBB))
+    DestBB = nullptr;
+
+  return DestBB;
+}
+
 /// Eliminate blocks that contain only PHI nodes, debug info directives, and an
 /// unconditional branch. Passes before isel (e.g. LSR/loopsimplify) often split
 /// edges in ways that are non-optimal for isel. Start by eliminating these
@@ -382,46 +450,106 @@ bool CodeGenPrepare::eliminateMostlyEmptyBlocks(Function &F) {
   // Note that this intentionally skips the entry block.
   for (Function::iterator I = std::next(F.begin()), E = F.end(); I != E;) {
     BasicBlock *BB = &*I++;
+    BasicBlock *DestBB = findDestBlockOfMergeableEmptyBlock(BB);
+    if (!DestBB ||
+        !isMergingEmptyBlockProfitable(BB, DestBB, Preheaders.count(BB)))
+      continue;
+
+    eliminateMostlyEmptyBlock(BB);
+    MadeChange = true;
+  }
+  return MadeChange;
+}
+
+bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
+                                                   BasicBlock *DestBB,
+                                                   bool isPreheader) {
+  // Do not delete loop preheaders if doing so would create a critical edge.
+  // Loop preheaders can be good locations to spill registers. If the
+  // preheader is deleted and we create a critical edge, registers may be
+  // spilled in the loop body instead.
+  if (!DisablePreheaderProtect && isPreheader &&
+      !(BB->getSinglePredecessor() &&
+        BB->getSinglePredecessor()->getSingleSuccessor()))
+    return false;
+
+  // Try to skip merging if the unique predecessor of BB is terminated by a
+  // switch or indirect branch instruction, and BB is used as an incoming block
+  // of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
+  // add COPY instructions in the predecessor of BB instead of BB (if it is not
+  // merged). Note that the critical edge created by merging such blocks wont be
+  // split in MachineSink because the jump table is not analyzable. By keeping
+  // such empty block (BB), ISel will place COPY instructions in BB, not in the
+  // predecessor of BB.
+  BasicBlock *Pred = BB->getUniquePredecessor();
+  if (!Pred ||
+      !(isa<SwitchInst>(Pred->getTerminator()) ||
+        isa<IndirectBrInst>(Pred->getTerminator())))
+    return true;
+
+  if (BB->getTerminator() != BB->getFirstNonPHI())
+    return true;
+
+  // We use a simple cost heuristic which determine skipping merging is
+  // profitable if the cost of skipping merging is less than the cost of
+  // merging : Cost(skipping merging) < Cost(merging BB), where the
+  // Cost(skipping merging) is Freq(BB) * (Cost(Copy) + Cost(Branch)), and
+  // the Cost(merging BB) is Freq(Pred) * Cost(Copy).
+  // Assuming Cost(Copy) == Cost(Branch), we could simplify it to :
+  //   Freq(Pred) / Freq(BB) > 2.
+  // Note that if there are multiple empty blocks sharing the same incoming
+  // value for the PHIs in the DestBB, we consider them together. In such
+  // case, Cost(merging BB) will be the sum of their frequencies.
+
+  if (!isa<PHINode>(DestBB->begin()))
+    return true;
 
-    // If this block doesn't end with an uncond branch, ignore it.
-    BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
-    if (!BI || !BI->isUnconditional())
+  SmallPtrSet<BasicBlock *, 16> SameIncomingValueBBs;
+
+  // Find all other incoming blocks from which incoming values of all PHIs in
+  // DestBB are the same as the ones from BB.
+  for (pred_iterator PI = pred_begin(DestBB), E = pred_end(DestBB); PI != E;
+       ++PI) {
+    BasicBlock *DestBBPred = *PI;
+    if (DestBBPred == BB)
       continue;
 
-    // If the instruction before the branch (skipping debug info) isn't a phi
-    // node, then other stuff is happening here.
-    BasicBlock::iterator BBI = BI->getIterator();
-    if (BBI != BB->begin()) {
-      --BBI;
-      while (isa<DbgInfoIntrinsic>(BBI)) {
-        if (BBI == BB->begin())
-          break;
-        --BBI;
+    bool HasAllSameValue = true;
+    BasicBlock::const_iterator DestBBI = DestBB->begin();
+    while (const PHINode *DestPN = dyn_cast<PHINode>(DestBBI++)) {
+      if (DestPN->getIncomingValueForBlock(BB) !=
+          DestPN->getIncomingValueForBlock(DestBBPred)) {
+        HasAllSameValue = false;
+        break;
       }
-      if (!isa<DbgInfoIntrinsic>(BBI) && !isa<PHINode>(BBI))
-        continue;
     }
+    if (HasAllSameValue)
+      SameIncomingValueBBs.insert(DestBBPred);
+  }
 
-    // Do not break infinite loops.
-    BasicBlock *DestBB = BI->getSuccessor(0);
-    if (DestBB == BB)
-      continue;
+  // See if all BB's incoming values are same as the value from Pred. In this
+  // case, no reason to skip merging because COPYs are expected to be place in
+  // Pred already.
+  if (SameIncomingValueBBs.count(Pred))
+    return true;
 
-    if (!canMergeBlocks(BB, DestBB))
-      continue;
+  if (!BFI) {
+    Function &F = *BB->getParent();
+    LoopInfo LI{DominatorTree(F)};
+    BPI.reset(new BranchProbabilityInfo(F, LI));
+    BFI.reset(new BlockFrequencyInfo(F, *BPI, LI));
+  }
 
-    // Do not delete loop preheaders if doing so would create a critical edge.
-    // Loop preheaders can be good locations to spill registers. If the
-    // preheader is deleted and we create a critical edge, registers may be
-    // spilled in the loop body instead.
-    if (!DisablePreheaderProtect && Preheaders.count(BB) &&
-        !(BB->getSinglePredecessor() && BB->getSinglePredecessor()->getSingleSuccessor()))
-     continue;
+  BlockFrequency PredFreq = BFI->getBlockFreq(Pred);
+  BlockFrequency BBFreq = BFI->getBlockFreq(BB);
 
-    eliminateMostlyEmptyBlock(BB);
-    MadeChange = true;
-  }
-  return MadeChange;
+  for (auto SameValueBB : SameIncomingValueBBs)
+    if (SameValueBB->getUniquePredecessor() == Pred &&
+        DestBB == findDestBlockOfMergeableEmptyBlock(SameValueBB))
+      BBFreq += BFI->getBlockFreq(SameValueBB);
+
+  return PredFreq.getFrequency() <=
+         BBFreq.getFrequency() * FreqRatioToSkipMerge;
 }
 
 /// Return true if we can merge BB into DestBB if there is a single
@@ -805,6 +933,14 @@ static bool SinkCast(CastInst *CI) {
 ///
 static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
                                        const DataLayout &DL) {
+  // Sink only "cheap" (or nop) address-space casts.  This is a weaker condition
+  // than sinking only nop casts, but is helpful on some platforms.
+  if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
+    if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
+                                  ASC->getDestAddressSpace()))
+      return false;
+  }
+
   // If this is a noop copy,
   EVT SrcVT = TLI.getValueType(DL, CI->getOperand(0)->getType());
   EVT DstVT = TLI.getValueType(DL, CI->getType());
@@ -925,6 +1061,8 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
       InsertedCmp =
           CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
                           CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+      // Propagate the debug info.
+      InsertedCmp->setDebugLoc(CI->getDebugLoc());
     }
 
     // Replace a use of the cmp with a use of the new cmp.
@@ -1814,18 +1952,8 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool& ModifiedDT) {
     default: break;
     case Intrinsic::objectsize: {
       // Lower all uses of llvm.objectsize.*
-      uint64_t Size;
-      Type *ReturnTy = CI->getType();
-      Constant *RetVal = nullptr;
-      ConstantInt *Op1 = cast<ConstantInt>(II->getArgOperand(1));
-      ObjSizeMode Mode = Op1->isZero() ? ObjSizeMode::Max : ObjSizeMode::Min;
-      if (getObjectSize(II->getArgOperand(0),
-                        Size, *DL, TLInfo, false, Mode)) {
-        RetVal = ConstantInt::get(ReturnTy, Size);
-      } else {
-        RetVal = ConstantInt::get(ReturnTy,
-                                  Mode == ObjSizeMode::Min ? 0 : -1ULL);
-      }
+      ConstantInt *RetVal =
+          lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
       // Substituting this can cause recursive simplifications, which can
       // invalidate our iterator.  Use a WeakVH to hold onto it in case this
       // happens.
@@ -1963,13 +2091,13 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   if (!TLI)
     return false;
 
-  ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator());
-  if (!RI)
+  ReturnInst *RetI = dyn_cast<ReturnInst>(BB->getTerminator());
+  if (!RetI)
     return false;
 
   PHINode *PN = nullptr;
   BitCastInst *BCI = nullptr;
-  Value *V = RI->getReturnValue();
+  Value *V = RetI->getReturnValue();
   if (V) {
     BCI = dyn_cast<BitCastInst>(V);
     if (BCI)
@@ -1983,14 +2111,6 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
   if (PN && PN->getParent() != BB)
     return false;
 
-  // It's not safe to eliminate the sign / zero extension of the return value.
-  // See llvm::isInTailCallPosition().
-  const Function *F = BB->getParent();
-  AttributeSet CallerAttrs = F->getAttributes();
-  if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) ||
-      CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt))
-    return false;
-
   // Make sure there are no instructions between the PHI and return, or that the
   // return is the first instruction in the block.
   if (PN) {
@@ -1999,24 +2119,26 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
     if (&*BI == BCI)
       // Also skip over the bitcast.
       ++BI;
-    if (&*BI != RI)
+    if (&*BI != RetI)
       return false;
   } else {
     BasicBlock::iterator BI = BB->begin();
     while (isa<DbgInfoIntrinsic>(BI)) ++BI;
-    if (&*BI != RI)
+    if (&*BI != RetI)
       return false;
   }
 
   /// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
   /// call.
+  const Function *F = BB->getParent();
   SmallVector<CallInst*, 4> TailCalls;
   if (PN) {
     for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
       CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
       // Make sure the phi value is indeed produced by the tail call.
       if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
-          TLI->mayBeEmittedAsTailCall(CI))
+          TLI->mayBeEmittedAsTailCall(CI) &&
+          attributesPermitTailCall(F, CI, RetI, *TLI))
         TailCalls.push_back(CI);
     }
   } else {
@@ -2033,7 +2155,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
         continue;
 
       CallInst *CI = dyn_cast<CallInst>(&*RI);
-      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI))
+      if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
+          attributesPermitTailCall(F, CI, RetI, *TLI))
         TailCalls.push_back(CI);
     }
   }
@@ -2060,7 +2183,7 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
       continue;
 
     // Duplicate the return into CallBB.
-    (void)FoldReturnIntoUncondBranch(RI, BB, CallBB);
+    (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
     ModifiedDT = Changed = true;
     ++NumRetsDup;
   }
@@ -3237,7 +3360,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
     int64_t ConstantOffset = 0;
     gep_type_iterator GTI = gep_type_begin(AddrInst);
     for (unsigned i = 1, e = AddrInst->getNumOperands(); i != e; ++i, ++GTI) {
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx =
           cast<ConstantInt>(AddrInst->getOperand(i))->getZExtValue();
@@ -3665,8 +3788,7 @@ isProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
     TPT.rollback(LastKnownGood);
 
     // If the match didn't cover I, then it won't be shared by it.
-    if (std::find(MatchedAddrModeInsts.begin(), MatchedAddrModeInsts.end(),
-                  I) == MatchedAddrModeInsts.end())
+    if (!is_contained(MatchedAddrModeInsts, I))
       return false;
 
     MatchedAddrModeInsts.clear();
@@ -3791,18 +3913,10 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   }
   TPT.commit();
 
-  // Check to see if any of the instructions supersumed by this addr mode are
-  // non-local to I's BB.
-  bool AnyNonLocal = false;
-  for (unsigned i = 0, e = AddrModeInsts.size(); i != e; ++i) {
-    if (IsNonLocalValue(AddrModeInsts[i], MemoryInst->getParent())) {
-      AnyNonLocal = true;
-      break;
-    }
-  }
-
   // If all the instructions matched are already in this BB, don't do anything.
-  if (!AnyNonLocal) {
+  if (none_of(AddrModeInsts, [&](Value *V) {
+        return IsNonLocalValue(V, MemoryInst->getParent());
+      })) {
     DEBUG(dbgs() << "CGP: Found      local addrmode: " << AddrMode << "\n");
     return false;
   }
@@ -4217,6 +4331,10 @@ bool CodeGenPrepare::extLdPromotion(TypePromotionTransaction &TPT,
 /// promotions apply.
 ///
 bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
+  // ExtLoad formation infrastructure requires TLI to be effective.
+  if (!TLI)
+    return false;
+
   // Try to promote a chain of computation if it allows to form
   // an extended load.
   TypePromotionTransaction TPT;
@@ -4246,7 +4364,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
 
   // If the load has other users and the truncate is not free, this probably
   // isn't worthwhile.
-  if (!LI->hasOneUse() && TLI &&
+  if (!LI->hasOneUse() &&
       (TLI->isTypeLegal(LoadVT) || !TLI->isTypeLegal(VT)) &&
       !TLI->isTruncateFree(I->getType(), LI->getType())) {
     I = OldExt;
@@ -4262,7 +4380,7 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
     assert(isa<SExtInst>(I) && "Unexpected ext type!");
     LType = ISD::SEXTLOAD;
   }
-  if (TLI && !TLI->isLoadExtLegal(LType, VT, LoadVT)) {
+  if (!TLI->isLoadExtLegal(LType, VT, LoadVT)) {
     I = OldExt;
     TPT.rollback(LastKnownGood);
     return false;
@@ -4273,6 +4391,14 @@ bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
   TPT.commit();
   I->removeFromParent();
   I->insertAfter(LI);
+  // CGP does not check if the zext would be speculatively executed when moved
+  // to the same basic block as the load. Preserving its original location would
+  // pessimize the debugging experience, as well as negatively impact the 
+  // quality of sample pgo. We don't want to use "line 0" as that has a
+  // size cost in the line-table section and logically the zext can be seen as
+  // part of the load. Therefore we conservatively reuse the same debug location
+  // for the load and the zext.
+  I->setDebugLoc(LI->getDebugLoc());
   ++NumExtsMoved;
   return true;
 }
@@ -4583,10 +4709,45 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
   return false;
 }
 
+/// If \p isTrue is true, return the true value of \p SI, otherwise return
+/// false value of \p SI. If the true/false value of \p SI is defined by any
+/// select instructions in \p Selects, look through the defining select
+/// instruction until the true/false value is not defined in \p Selects.
+static Value *getTrueOrFalseValue(
+    SelectInst *SI, bool isTrue,
+    const SmallPtrSet<const Instruction *, 2> &Selects) {
+  Value *V;
+
+  for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
+       DefSI = dyn_cast<SelectInst>(V)) {
+    assert(DefSI->getCondition() == SI->getCondition() &&
+           "The condition of DefSI does not match with SI");
+    V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
+  }
+  return V;
+}
 
 /// If we have a SelectInst that will likely profit from branch prediction,
 /// turn it into a branch.
 bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
+  // Find all consecutive select instructions that share the same condition.
+  SmallVector<SelectInst *, 2> ASI;
+  ASI.push_back(SI);
+  for (BasicBlock::iterator It = ++BasicBlock::iterator(SI);
+       It != SI->getParent()->end(); ++It) {
+    SelectInst *I = dyn_cast<SelectInst>(&*It);
+    if (I && SI->getCondition() == I->getCondition()) {
+      ASI.push_back(I);
+    } else {
+      break;
+    }
+  }
+
+  SelectInst *LastSI = ASI.back();
+  // Increment the current iterator to skip all the rest of select instructions
+  // because they will be either "not lowered" or "all lowered" to branch.
+  CurInstIterator = std::next(LastSI->getIterator());
+
   bool VectorCond = !SI->getCondition()->getType()->isIntegerTy(1);
 
   // Can we convert the 'select' to CF ?
@@ -4633,7 +4794,7 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
 
   // First, we split the block containing the select into 2 blocks.
   BasicBlock *StartBlock = SI->getParent();
-  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(SI));
+  BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI));
   BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
 
   // Delete the unconditional branch that was just created by the split.
@@ -4643,22 +4804,30 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // At least one will become an actual new basic block.
   BasicBlock *TrueBlock = nullptr;
   BasicBlock *FalseBlock = nullptr;
+  BranchInst *TrueBranch = nullptr;
+  BranchInst *FalseBranch = nullptr;
 
   // Sink expensive instructions into the conditional blocks to avoid executing
   // them speculatively.
-  if (sinkSelectOperand(TTI, SI->getTrueValue())) {
-    TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
-                                   EndBlock->getParent(), EndBlock);
-    auto *TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
-    auto *TrueInst = cast<Instruction>(SI->getTrueValue());
-    TrueInst->moveBefore(TrueBranch);
-  }
-  if (sinkSelectOperand(TTI, SI->getFalseValue())) {
-    FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
-                                    EndBlock->getParent(), EndBlock);
-    auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
-    auto *FalseInst = cast<Instruction>(SI->getFalseValue());
-    FalseInst->moveBefore(FalseBranch);
+  for (SelectInst *SI : ASI) {
+    if (sinkSelectOperand(TTI, SI->getTrueValue())) {
+      if (TrueBlock == nullptr) {
+        TrueBlock = BasicBlock::Create(SI->getContext(), "select.true.sink",
+                                       EndBlock->getParent(), EndBlock);
+        TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
+      }
+      auto *TrueInst = cast<Instruction>(SI->getTrueValue());
+      TrueInst->moveBefore(TrueBranch);
+    }
+    if (sinkSelectOperand(TTI, SI->getFalseValue())) {
+      if (FalseBlock == nullptr) {
+        FalseBlock = BasicBlock::Create(SI->getContext(), "select.false.sink",
+                                        EndBlock->getParent(), EndBlock);
+        FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
+      }
+      auto *FalseInst = cast<Instruction>(SI->getFalseValue());
+      FalseInst->moveBefore(FalseBranch);
+    }
   }
 
   // If there was nothing to sink, then arbitrarily choose the 'false' side
@@ -4677,28 +4846,42 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
   // of the condition, it means that side of the branch goes to the end block
   // directly and the path originates from the start block from the point of
   // view of the new PHI.
+  BasicBlock *TT, *FT;
   if (TrueBlock == nullptr) {
-    BranchInst::Create(EndBlock, FalseBlock, SI->getCondition(), SI);
+    TT = EndBlock;
+    FT = FalseBlock;
     TrueBlock = StartBlock;
   } else if (FalseBlock == nullptr) {
-    BranchInst::Create(TrueBlock, EndBlock, SI->getCondition(), SI);
+    TT = TrueBlock;
+    FT = EndBlock;
     FalseBlock = StartBlock;
   } else {
-    BranchInst::Create(TrueBlock, FalseBlock, SI->getCondition(), SI);
+    TT = TrueBlock;
+    FT = FalseBlock;
+  }
+  IRBuilder<>(SI).CreateCondBr(SI->getCondition(), TT, FT, SI);
+
+  SmallPtrSet<const Instruction *, 2> INS;
+  INS.insert(ASI.begin(), ASI.end());
+  // Use reverse iterator because later select may use the value of the
+  // earlier select, and we need to propagate value through earlier select
+  // to get the PHI operand.
+  for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) {
+    SelectInst *SI = *It;
+    // The select itself is replaced with a PHI Node.
+    PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
+    PN->takeName(SI);
+    PN->addIncoming(getTrueOrFalseValue(SI, true, INS), TrueBlock);
+    PN->addIncoming(getTrueOrFalseValue(SI, false, INS), FalseBlock);
+
+    SI->replaceAllUsesWith(PN);
+    SI->eraseFromParent();
+    INS.erase(SI);
+    ++NumSelectsExpanded;
   }
-
-  // The select itself is replaced with a PHI Node.
-  PHINode *PN = PHINode::Create(SI->getType(), 2, "", &EndBlock->front());
-  PN->takeName(SI);
-  PN->addIncoming(SI->getTrueValue(), TrueBlock);
-  PN->addIncoming(SI->getFalseValue(), FalseBlock);
-
-  SI->replaceAllUsesWith(PN);
-  SI->eraseFromParent();
 
   // Instruct OptimizeBlock to skip to the next block.
   CurInstIterator = StartBlock->end();
-  ++NumSelectsExpanded;
   return true;
 }
 
@@ -5179,6 +5362,117 @@ bool CodeGenPrepare::optimizeExtractElementInst(Instruction *Inst) {
   return false;
 }
 
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+///   (store (or (zext (bitcast F to i32) to i64),
+///              (shl (zext I to i64), 32)), addr)  -->
+///   (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8},  i32 store --> two i16 stores.
+/// For pair of {i8, i8},   i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+///   void goo(const std::pair<int, float> &);
+///   hoo() {
+///     ...
+///     goo(std::make_pair(tmp, ftmp));
+///     ...
+///   }
+///
+/// Although we already have similar splitting in DAG Combine, we duplicate
+/// it in CodeGenPrepare to catch the case in which pattern is across
+/// multiple BBs. The logic in DAG Combine is kept to catch case generated
+/// during code expansion.
+static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
+                                const TargetLowering &TLI) {
+  // Handle simple but common cases only.
+  Type *StoreType = SI.getValueOperand()->getType();
+  if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+      DL.getTypeSizeInBits(StoreType) == 0)
+    return false;
+
+  unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
+  Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
+  if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
+      DL.getTypeSizeInBits(SplitStoreType))
+    return false;
+
+  // Match the following patterns:
+  // (store (or (zext LValue to i64),
+  //            (shl (zext HValue to i64), 32)), HalfValBitSize)
+  //  or
+  // (store (or (shl (zext HValue to i64), 32)), HalfValBitSize)
+  //            (zext LValue to i64),
+  // Expect both operands of OR and the first operand of SHL have only
+  // one use.
+  Value *LValue, *HValue;
+  if (!match(SI.getValueOperand(),
+             m_c_Or(m_OneUse(m_ZExt(m_Value(LValue))),
+                    m_OneUse(m_Shl(m_OneUse(m_ZExt(m_Value(HValue))),
+                                   m_SpecificInt(HalfValBitSize))))))
+    return false;
+
+  // Check LValue and HValue are int with size less or equal than 32.
+  if (!LValue->getType()->isIntegerTy() ||
+      DL.getTypeSizeInBits(LValue->getType()) > HalfValBitSize ||
+      !HValue->getType()->isIntegerTy() ||
+      DL.getTypeSizeInBits(HValue->getType()) > HalfValBitSize)
+    return false;
+
+  // If LValue/HValue is a bitcast instruction, use the EVT before bitcast
+  // as the input of target query.
+  auto *LBC = dyn_cast<BitCastInst>(LValue);
+  auto *HBC = dyn_cast<BitCastInst>(HValue);
+  EVT LowTy = LBC ? EVT::getEVT(LBC->getOperand(0)->getType())
+                  : EVT::getEVT(LValue->getType());
+  EVT HighTy = HBC ? EVT::getEVT(HBC->getOperand(0)->getType())
+                   : EVT::getEVT(HValue->getType());
+  if (!ForceSplitStore && !TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+    return false;
+
+  // Start to split store.
+  IRBuilder<> Builder(SI.getContext());
+  Builder.SetInsertPoint(&SI);
+
+  // If LValue/HValue is a bitcast in another BB, create a new one in current
+  // BB so it may be merged with the splitted stores by dag combiner.
+  if (LBC && LBC->getParent() != SI.getParent())
+    LValue = Builder.CreateBitCast(LBC->getOperand(0), LBC->getType());
+  if (HBC && HBC->getParent() != SI.getParent())
+    HValue = Builder.CreateBitCast(HBC->getOperand(0), HBC->getType());
+
+  auto CreateSplitStore = [&](Value *V, bool Upper) {
+    V = Builder.CreateZExtOrBitCast(V, SplitStoreType);
+    Value *Addr = Builder.CreateBitCast(
+        SI.getOperand(1),
+        SplitStoreType->getPointerTo(SI.getPointerAddressSpace()));
+    if (Upper)
+      Addr = Builder.CreateGEP(
+          SplitStoreType, Addr,
+          ConstantInt::get(Type::getInt32Ty(SI.getContext()), 1));
+    Builder.CreateAlignedStore(
+        V, Addr, Upper ? SI.getAlignment() / 2 : SI.getAlignment());
+  };
+
+  CreateSplitStore(LValue, false);
+  CreateSplitStore(HValue, true);
+
+  // Delete the old store.
+  SI.eraseFromParent();
+  return true;
+}
+
 bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
   // Bail out if we inserted the instruction to prevent optimizations from
   // stepping on each other's toes.
@@ -5232,7 +5526,7 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
       return OptimizeCmpExpression(CI, TLI);
 
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
-    stripInvariantGroupMetadata(*LI);
+    LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
     if (TLI) {
       bool Modified = optimizeLoadExt(LI);
       unsigned AS = LI->getPointerAddressSpace();
@@ -5243,7 +5537,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool& ModifiedDT) {
   }
 
   if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
-    stripInvariantGroupMetadata(*SI);
+    if (TLI && splitMergedValStore(*SI, *DL, *TLI))
+      return true;
+    SI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
     if (TLI) {
       unsigned AS = SI->getPointerAddressSpace();
       return optimizeMemoryInst(I, SI->getOperand(1),
@@ -5542,7 +5838,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
     // incoming edge to the PHI nodes, because both branch instructions target
     // now the same successor. Depending on the original branch condition
     // (and/or) we have to swap the successors (TrueDest, FalseDest), so that
-    // we perfrom the correct update for the PHI nodes.
+    // we perform the correct update for the PHI nodes.
     // This doesn't change the successor order of the just created branch
     // instruction (or any other instruction).
     if (Opc == Instruction::Or)
@@ -5649,8 +5945,3 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
   }
   return MadeChange;
 }
-
-void CodeGenPrepare::stripInvariantGroupMetadata(Instruction &I) {
-  if (auto *InvariantMD = I.getMetadata(LLVMContext::MD_invariant_group))
-    I.dropUnknownNonDebugMetadata(InvariantMD->getMetadataID());
-}
diff --git a/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
new file mode 100644
index 000000000000..1e46a7a99e7e
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/CountingFunctionInserter.cpp
@@ -0,0 +1,62 @@
+//===- CountingFunctionInserter.cpp - Insert mcount-like function calls ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Insert calls to counter functions, such as mcount, intended to be called
+// once per function, at the beginning of each function.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+  struct CountingFunctionInserter : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    CountingFunctionInserter() : FunctionPass(ID) {
+      initializeCountingFunctionInserterPass(*PassRegistry::getPassRegistry());
+    }
+    
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
+      AU.addPreserved<GlobalsAAWrapperPass>();
+    }
+
+    bool runOnFunction(Function &F) override {
+      std::string CountingFunctionName =
+        F.getFnAttribute("counting-function").getValueAsString();
+      if (CountingFunctionName.empty())
+        return false;
+
+      Type *VoidTy = Type::getVoidTy(F.getContext());
+      Constant *CountingFn =
+        F.getParent()->getOrInsertFunction(CountingFunctionName,
+                                           VoidTy, nullptr);
+      CallInst::Create(CountingFn, "", &*F.begin()->getFirstInsertionPt());
+      return true;
+    }
+  };
+  
+  char CountingFunctionInserter::ID = 0;
+}
+
+INITIALIZE_PASS(CountingFunctionInserter, "cfinserter", 
+                "Inserts calls to mcount-like functions", false, false)
+
+//===----------------------------------------------------------------------===//
+//
+// CountingFunctionInserter - Give any unnamed non-void instructions "tmp" names.
+//
+FunctionPass *llvm::createCountingFunctionInserterPass() {
+  return new CountingFunctionInserter();
+}
diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
index a0189a172bfc..5d60c3055456 100644
--- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -69,8 +69,8 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
   // Mark live-out callee-saved registers. In a return block this is
   // all callee-saved registers. In non-return this is any
   // callee-saved register that is not saved in the prolog.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  BitVector Pristine = MFI->getPristineRegs(MF);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  BitVector Pristine = MFI.getPristineRegs(MF);
   for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I) {
     if (!IsReturnBlock && !Pristine.test(*I)) continue;
     for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) {
diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
index 2386af9e6877..7b1b2d64fccc 100644
--- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
+++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp
@@ -31,9 +31,14 @@
 #include "llvm/CodeGen/ScheduleDAGInstrs.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
 
 using namespace llvm;
 
+static cl::opt<unsigned> InstrLimit("dfa-instr-limit", cl::Hidden,
+  cl::init(0), cl::desc("If present, stops packetizing after N instructions"));
+static unsigned InstrCount = 0;
+
 // --------------------------------------------------------------------
 // Definitions shared between DFAPacketizer.cpp and DFAPacketizerEmitter.cpp
 
@@ -218,6 +223,13 @@ VLIWPacketizerList::~VLIWPacketizerList() {
 // End the current packet, bundle packet instructions and reset DFA state.
 void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
                                    MachineBasicBlock::iterator MI) {
+  DEBUG({
+    if (!CurrentPacketMIs.empty()) {
+      dbgs() << "Finalizing packet:\n";
+      for (MachineInstr *MI : CurrentPacketMIs)
+        dbgs() << " * " << *MI;
+    }
+  });
   if (CurrentPacketMIs.size() > 1) {
     MachineInstr &MIFirst = *CurrentPacketMIs.front();
     finalizeBundle(*MBB, MIFirst.getIterator(), MI.getInstrIterator());
@@ -249,8 +261,17 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB,
   for (SUnit &SU : VLIWScheduler->SUnits)
     MIToSUnit[SU.getInstr()] = &SU;
 
+  bool LimitPresent = InstrLimit.getPosition();
+
   // The main packetizer loop.
   for (; BeginItr != EndItr; ++BeginItr) {
+    if (LimitPresent) {
+      if (InstrCount >= InstrLimit) {
+        EndItr = BeginItr;
+        break;
+      }
+      InstrCount++;
+    }
     MachineInstr &MI = *BeginItr;
     initPacketizerState();
 
diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
index 0b8dc7a86ada..17c229a216ae 100644
--- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -122,7 +122,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
     // liveness as we go.
     for (MachineBasicBlock::reverse_iterator MII = MBB.rbegin(),
          MIE = MBB.rend(); MII != MIE; ) {
-      MachineInstr *MI = &*MII;
+      MachineInstr *MI = &*MII++;
 
       // If the instruction is dead, delete it!
       if (isDead(MI)) {
@@ -133,9 +133,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
         MI->eraseFromParentAndMarkDBGValuesForRemoval();
         AnyChanges = true;
         ++NumDeletes;
-        MIE = MBB.rend();
-        // MII is now pointing to the next instruction to process,
-        // so don't increment it.
         continue;
       }
 
@@ -169,10 +166,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
           }
         }
       }
-
-      // We didn't delete the current instruction, so increment MII to
-      // the next one.
-      ++MII;
     }
   }
 
diff --git a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
index 1d9e79c055e0..a7ba694c144d 100644
--- a/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
+++ b/contrib/llvm/lib/CodeGen/DetectDeadLanes.cpp
@@ -63,7 +63,7 @@ public:
   static char ID;
   DetectDeadLanes() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "Detect Dead Lanes"; }
+  StringRef getPassName() const override { return "Detect Dead Lanes"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -210,7 +210,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
   VRegInfo &MORegInfo = VRegInfos[MORegIdx];
   LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
   // Any change at all?
-  if ((UsedLanes & ~PrevUsedLanes) == 0)
+  if ((UsedLanes & ~PrevUsedLanes).none())
     return;
 
   // Set UsedLanes and remember instruction for further propagation.
@@ -303,7 +303,7 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
   VRegInfo &RegInfo = VRegInfos[DefRegIdx];
   LaneBitmask PrevDefinedLanes = RegInfo.DefinedLanes;
   // Any change at all?
-  if ((DefinedLanes & ~PrevDefinedLanes) == 0)
+  if ((DefinedLanes & ~PrevDefinedLanes).none())
     return;
 
   RegInfo.DefinedLanes = PrevDefinedLanes | DefinedLanes;
@@ -356,7 +356,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
   // Live-In or unused registers have no definition but are considered fully
   // defined.
   if (!MRI->hasOneDef(Reg))
-    return ~0u;
+    return LaneBitmask::getAll();
 
   const MachineOperand &Def = *MRI->def_begin(Reg);
   const MachineInstr &DefMI = *Def.getParent();
@@ -368,7 +368,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
     PutInWorklist(RegIdx);
 
     if (Def.isDead())
-      return 0;
+      return LaneBitmask::getNone();
 
     // COPY/PHI can copy across unrelated register classes (example: float/int)
     // with incompatible subregister structure. Do not include these in the
@@ -376,7 +376,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
     const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
 
     // Determine initially DefinedLanes.
-    LaneBitmask DefinedLanes = 0;
+    LaneBitmask DefinedLanes;
     for (const MachineOperand &MO : DefMI.uses()) {
       if (!MO.isReg() || !MO.readsReg())
         continue;
@@ -386,9 +386,9 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
 
       LaneBitmask MODefinedLanes;
       if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
-        MODefinedLanes = ~0u;
+        MODefinedLanes = LaneBitmask::getAll();
       } else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
-        MODefinedLanes = ~0u;
+        MODefinedLanes = LaneBitmask::getAll();
       } else {
         assert(TargetRegisterInfo::isVirtualRegister(MOReg));
         if (MRI->hasOneDef(MOReg)) {
@@ -410,7 +410,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
     return DefinedLanes;
   }
   if (DefMI.isImplicitDef() || Def.isDead())
-    return 0;
+    return LaneBitmask::getNone();
 
   assert(Def.getSubReg() == 0 &&
          "Should not have subregister defs in machine SSA phase");
@@ -418,7 +418,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
 }
 
 LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
-  LaneBitmask UsedLanes = 0;
+  LaneBitmask UsedLanes = LaneBitmask::getNone();
   for (const MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     if (!MO.readsReg())
       continue;
@@ -462,7 +462,7 @@ bool DetectDeadLanes::isUndefRegAtInput(const MachineOperand &MO,
                                         const VRegInfo &RegInfo) const {
   unsigned SubReg = MO.getSubReg();
   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
-  return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask) == 0;
+  return (RegInfo.DefinedLanes & RegInfo.UsedLanes & Mask).none();
 }
 
 bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
@@ -482,7 +482,7 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
 
   const VRegInfo &DefRegInfo = VRegInfos[DefRegIdx];
   LaneBitmask UsedLanes = transferUsedLanes(MI, DefRegInfo.UsedLanes, MO);
-  if (UsedLanes != 0)
+  if (UsedLanes.any())
     return false;
 
   unsigned MOReg = MO.getReg();
@@ -546,7 +546,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
           continue;
         unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
         const VRegInfo &RegInfo = VRegInfos[RegIdx];
-        if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes == 0) {
+        if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
           DEBUG(dbgs() << "Marking operand '" << MO << "' as dead in " << MI);
           MO.setIsDead();
         }
@@ -577,12 +577,12 @@ bool DetectDeadLanes::runOnMachineFunction(MachineFunction &MF) {
   // register coalescer cannot deal with hidden dead defs. However without
   // subregister liveness enabled, the expected benefits of this pass are small
   // so we safe the compile time.
-  if (!MF.getSubtarget().enableSubRegLiveness()) {
+  MRI = &MF.getRegInfo();
+  if (!MRI->subRegLivenessEnabled()) {
     DEBUG(dbgs() << "Skipping Detect dead lanes pass\n");
     return false;
   }
 
-  MRI = &MF.getRegInfo();
   TRI = MRI->getTargetRegisterInfo();
 
   unsigned NumVirtRegs = MRI->getNumVirtRegs();
diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
index eae78a950d9a..38af19a04448 100644
--- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp
@@ -71,7 +71,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Exception handling preparation";
     }
   };
diff --git a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
index 8c96124451f3..729172796453 100644
--- a/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/EarlyIfConversion.cpp
@@ -547,7 +547,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
   // Fix up Head's terminators.
   // It should become a single branch or a fallthrough.
   DebugLoc HeadDL = Head->getFirstTerminator()->getDebugLoc();
-  TII->RemoveBranch(*Head);
+  TII->removeBranch(*Head);
 
   // Erase the now empty conditional blocks. It is likely that Head can fall
   // through to Tail, and we can join the two blocks.
@@ -574,7 +574,7 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
     // We need a branch to Tail, let code placement work it out later.
     DEBUG(dbgs() << "Converting to unconditional branch.\n");
     SmallVector<MachineOperand, 0> EmptyCond;
-    TII->InsertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
+    TII->insertBranch(*Head, Tail, nullptr, EmptyCond, HeadDL);
     Head->addSuccessor(Tail);
   }
   DEBUG(dbgs() << *Head);
@@ -602,7 +602,7 @@ public:
   EarlyIfConverter() : MachineFunctionPass(ID) {}
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
-  const char *getPassName() const override { return "Early If-Conversion"; }
+  StringRef getPassName() const override { return "Early If-Conversion"; }
 
 private:
   bool tryConvertIf(MachineBasicBlock*);
diff --git a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
index aea7c31ba316..b3a25544be39 100644
--- a/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
+++ b/contrib/llvm/lib/CodeGen/EdgeBundles.cpp
@@ -17,6 +17,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -57,8 +58,8 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
   Blocks.resize(getNumBundles());
 
   for (unsigned i = 0, e = MF->getNumBlockIDs(); i != e; ++i) {
-    unsigned b0 = getBundle(i, 0);
-    unsigned b1 = getBundle(i, 1);
+    unsigned b0 = getBundle(i, false);
+    unsigned b1 = getBundle(i, true);
     Blocks[b0].push_back(i);
     if (b1 != b0)
       Blocks[b1].push_back(i);
@@ -69,6 +70,7 @@ bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
 
 /// Specialize WriteGraph, the standard implementation won't work.
 namespace llvm {
+
 template<>
 raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
                           bool ShortNames,
@@ -89,7 +91,8 @@ raw_ostream &WriteGraph<>(raw_ostream &O, const EdgeBundles &G,
   O << "}\n";
   return O;
 }
-}
+
+} // end namespace llvm
 
 /// view - Visualize the annotated bipartite CFG with Graphviz.
 void EdgeBundles::view() const {
diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
index 566b8d507b2b..e7c6b03f1a49 100644
--- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
+++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp
@@ -26,6 +26,7 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -137,6 +138,7 @@ class ExeDepsFix : public MachineFunctionPass {
   MachineFunction *MF;
   const TargetInstrInfo *TII;
   const TargetRegisterInfo *TRI;
+  RegisterClassInfo RegClassInfo;
   std::vector<SmallVector<int, 1>> AliasMap;
   const unsigned NumRegs;
   LiveReg *LiveRegs;
@@ -170,12 +172,10 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
-    return "Execution dependency fix";
-  }
+  StringRef getPassName() const override { return "Execution dependency fix"; }
 
 private:
   iterator_range<SmallVectorImpl<int>::const_iterator>
@@ -203,6 +203,8 @@ private:
   void processDefs(MachineInstr*, bool Kill);
   void visitSoftInstr(MachineInstr*, unsigned mask);
   void visitHardInstr(MachineInstr*, unsigned domain);
+  void pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+                                unsigned Pref);
   bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref);
   void processUndefReads(MachineBasicBlock*);
 };
@@ -473,6 +475,60 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) {
   processDefs(MI, !DomP.first);
 }
 
+/// \brief Helps avoid false dependencies on undef registers by updating the
+/// machine instructions' undef operand to use a register that the instruction
+/// is truly dependent on, or use a register with clearance higher than Pref.
+void ExeDepsFix::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
+                                          unsigned Pref) {
+  MachineOperand &MO = MI->getOperand(OpIdx);
+  assert(MO.isUndef() && "Expected undef machine operand");
+
+  unsigned OriginalReg = MO.getReg();
+
+  // Update only undef operands that are mapped to one register.
+  if (AliasMap[OriginalReg].size() != 1)
+    return;
+
+  // Get the undef operand's register class
+  const TargetRegisterClass *OpRC =
+      TII->getRegClass(MI->getDesc(), OpIdx, TRI, *MF);
+
+  // If the instruction has a true dependency, we can hide the false depdency
+  // behind it.
+  for (MachineOperand &CurrMO : MI->operands()) {
+    if (!CurrMO.isReg() || CurrMO.isDef() || CurrMO.isUndef() ||
+        !OpRC->contains(CurrMO.getReg()))
+      continue;
+    // We found a true dependency - replace the undef register with the true
+    // dependency.
+    MO.setReg(CurrMO.getReg());
+    return;
+  }
+
+  // Go over all registers in the register class and find the register with
+  // max clearance or clearance higher than Pref.
+  unsigned MaxClearance = 0;
+  unsigned MaxClearanceReg = OriginalReg;
+  ArrayRef<MCPhysReg> Order = RegClassInfo.getOrder(OpRC);
+  for (auto Reg : Order) {
+    assert(AliasMap[Reg].size() == 1 &&
+           "Reg is expected to be mapped to a single index");
+    int RCrx = *regIndices(Reg).begin();
+    unsigned Clearance = CurInstr - LiveRegs[RCrx].Def;
+    if (Clearance <= MaxClearance)
+      continue;
+    MaxClearance = Clearance;
+    MaxClearanceReg = Reg;
+
+    if (MaxClearance > Pref)
+      break;
+  }
+
+  // Update the operand if we found a register with better clearance.
+  if (MaxClearanceReg != OriginalReg)
+    MO.setReg(MaxClearanceReg);
+}
+
 /// \brief Return true to if it makes sense to break dependence on a partial def
 /// or undef use.
 bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
@@ -510,6 +566,7 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
   unsigned OpNum;
   unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
   if (Pref) {
+    pickBestRegisterForUndef(MI, OpNum, Pref);
     if (shouldBreakDependence(MI, OpNum, Pref))
       UndefReads.push_back(std::make_pair(MI, OpNum));
   }
@@ -520,8 +577,6 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) {
     MachineOperand &MO = MI->getOperand(i);
     if (!MO.isReg())
       continue;
-    if (MO.isImplicit())
-      break;
     if (MO.isUse())
       continue;
     for (int rx : regIndices(MO.getReg())) {
@@ -557,7 +612,7 @@ void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) {
     return;
 
   // Collect this block's live out register units.
-  LiveRegSet.init(TRI);
+  LiveRegSet.init(*TRI);
   // We do not need to care about pristine registers as they are just preserved
   // but not actually used in the function.
   LiveRegSet.addLiveOutsNoPristines(*MBB);
@@ -731,6 +786,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
   MF = &mf;
   TII = MF->getSubtarget().getInstrInfo();
   TRI = MF->getSubtarget().getRegisterInfo();
+  RegClassInfo.runOnMachineFunction(mf);
   LiveRegs = nullptr;
   assert(NumRegs == RC->getNumRegs() && "Bad regclass");
 
diff --git a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
index b16f81c728d0..d61afad4db57 100644
--- a/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/FuncletLayout.cpp
@@ -30,7 +30,7 @@ public:
   bool runOnMachineFunction(MachineFunction &F) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/GCMetadata.cpp b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
index c8116a453d2d..be21c7306da1 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadata.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadata.cpp
@@ -32,7 +32,7 @@ class Printer : public FunctionPass {
 public:
   explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
 
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   bool runOnFunction(Function &F) override;
@@ -87,7 +87,7 @@ FunctionPass *llvm::createGCInfoPrinter(raw_ostream &OS) {
   return new Printer(OS);
 }
 
-const char *Printer::getPassName() const {
+StringRef Printer::getPassName() const {
   return "Print Garbage Collector Information";
 }
 
diff --git a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
index bb8cfa1cc809..d183c7f2980b 100644
--- a/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/GCMetadataPrinter.cpp
@@ -14,6 +14,8 @@
 #include "llvm/CodeGen/GCMetadataPrinter.h"
 using namespace llvm;
 
+LLVM_INSTANTIATE_REGISTRY(GCMetadataPrinterRegistry)
+
 GCMetadataPrinter::GCMetadataPrinter() {}
 
 GCMetadataPrinter::~GCMetadataPrinter() {}
diff --git a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
index 326adab2ba64..35246545ca91 100644
--- a/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/GCRootLowering.cpp
@@ -45,7 +45,7 @@ public:
   static char ID;
 
   LowerIntrinsics();
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
   bool doInitialization(Module &M) override;
@@ -93,7 +93,7 @@ LowerIntrinsics::LowerIntrinsics() : FunctionPass(ID) {
   initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
 }
 
-const char *LowerIntrinsics::getPassName() const {
+StringRef LowerIntrinsics::getPassName() const {
   return "Lower Garbage Collection Instructions";
 }
 
@@ -316,7 +316,7 @@ void GCMachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
   for (GCFunctionInfo::roots_iterator RI = FI->roots_begin();
        RI != FI->roots_end();) {
     // If the root references a dead object, no need to keep it.
-    if (MF.getFrameInfo()->isDeadObjectIndex(RI->Num)) {
+    if (MF.getFrameInfo().isDeadObjectIndex(RI->Num)) {
       RI = FI->removeStackRoot(RI);
     } else {
       unsigned FrameReg; // FIXME: surely GCRoot ought to store the
@@ -338,11 +338,11 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
 
   // Find the size of the stack frame.  There may be no correct static frame
   // size, we use UINT64_MAX to represent this.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
-  const bool DynamicFrameSize = MFI->hasVarSizedObjects() ||
+  const bool DynamicFrameSize = MFI.hasVarSizedObjects() ||
     RegInfo->needsStackRealignment(MF);
-  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI->getStackSize());
+  FI->setFrameSize(DynamicFrameSize ? UINT64_MAX : MFI.getStackSize());
 
   // Find all safe points.
   if (FI->getStrategy().needsSafePoints())
diff --git a/contrib/llvm/lib/CodeGen/GCStrategy.cpp b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
index 554d326942e9..31ab86fdf276 100644
--- a/contrib/llvm/lib/CodeGen/GCStrategy.cpp
+++ b/contrib/llvm/lib/CodeGen/GCStrategy.cpp
@@ -16,6 +16,8 @@
 
 using namespace llvm;
 
+LLVM_INSTANTIATE_REGISTRY(GCRegistry)
+
 GCStrategy::GCStrategy()
     : UseStatepoints(false), NeededSafePoints(0), CustomReadBarriers(false),
       CustomWriteBarriers(false), CustomRoots(false), InitRoots(true),
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
new file mode 100644
index 000000000000..13212212fa01
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -0,0 +1,170 @@
+//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements some simple delegations needed for call lowering.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Target/TargetLowering.h"
+
+using namespace llvm;
+
+bool CallLowering::lowerCall(
+    MachineIRBuilder &MIRBuilder, const CallInst &CI, unsigned ResReg,
+    ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+  auto &DL = CI.getParent()->getParent()->getParent()->getDataLayout();
+
+  // First step is to marshall all the function's parameters into the correct
+  // physregs and memory locations. Gather the sequence of argument types that
+  // we'll pass to the assigner function.
+  SmallVector<ArgInfo, 8> OrigArgs;
+  unsigned i = 0;
+  for (auto &Arg : CI.arg_operands()) {
+    ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{}};
+    setArgFlags(OrigArg, i + 1, DL, CI);
+    OrigArgs.push_back(OrigArg);
+    ++i;
+  }
+
+  MachineOperand Callee = MachineOperand::CreateImm(0);
+  if (Function *F = CI.getCalledFunction())
+    Callee = MachineOperand::CreateGA(F, 0);
+  else
+    Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+
+  ArgInfo OrigRet{ResReg, CI.getType(), ISD::ArgFlagsTy{}};
+  if (!OrigRet.Ty->isVoidTy())
+    setArgFlags(OrigRet, AttributeSet::ReturnIndex, DL, CI);
+
+  return lowerCall(MIRBuilder, Callee, OrigRet, OrigArgs);
+}
+
+template <typename FuncInfoTy>
+void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+                               const DataLayout &DL,
+                               const FuncInfoTy &FuncInfo) const {
+  const AttributeSet &Attrs = FuncInfo.getAttributes();
+  if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
+    Arg.Flags.setZExt();
+  if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
+    Arg.Flags.setSExt();
+  if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
+    Arg.Flags.setInReg();
+  if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
+    Arg.Flags.setSRet();
+  if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
+    Arg.Flags.setSwiftSelf();
+  if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
+    Arg.Flags.setSwiftError();
+  if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
+    Arg.Flags.setByVal();
+  if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
+    Arg.Flags.setInAlloca();
+
+  if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
+    Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
+    Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+    // For ByVal, alignment should be passed from FE.  BE will guess if
+    // this info is not there but there are cases it cannot get right.
+    unsigned FrameAlign;
+    if (FuncInfo.getParamAlignment(OpIdx))
+      FrameAlign = FuncInfo.getParamAlignment(OpIdx);
+    else
+      FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
+    Arg.Flags.setByValAlign(FrameAlign);
+  }
+  if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
+    Arg.Flags.setNest();
+  Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty));
+}
+
+template void
+CallLowering::setArgFlags<Function>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+                                    const DataLayout &DL,
+                                    const Function &FuncInfo) const;
+
+template void
+CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
+                                    const DataLayout &DL,
+                                    const CallInst &FuncInfo) const;
+
+bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
+                                     CCAssignFn *AssignFn,
+                                     ArrayRef<ArgInfo> Args,
+                                     ValueHandler &Handler) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const Function &F = *MF.getFunction();
+  const DataLayout &DL = F.getParent()->getDataLayout();
+
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+
+  unsigned NumArgs = Args.size();
+  for (unsigned i = 0; i != NumArgs; ++i) {
+    MVT CurVT = MVT::getVT(Args[i].Ty);
+    if (AssignFn(i, CurVT, CurVT, CCValAssign::Full, Args[i].Flags, CCInfo))
+      return false;
+  }
+
+  for (unsigned i = 0, e = Args.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+
+    if (VA.isRegLoc())
+      Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
+    else if (VA.isMemLoc()) {
+      unsigned Size = VA.getValVT() == MVT::iPTR
+                          ? DL.getPointerSize()
+                          : alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+      unsigned Offset = VA.getLocMemOffset();
+      MachinePointerInfo MPO;
+      unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
+      Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
+    } else {
+      // FIXME: Support byvals and other weirdness
+      return false;
+    }
+  }
+  return true;
+}
+
+unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
+                                                    CCValAssign &VA) {
+  LLT LocTy{VA.getLocVT()};
+  switch (VA.getLocInfo()) {
+  default: break;
+  case CCValAssign::Full:
+  case CCValAssign::BCvt:
+    // FIXME: bitconverting between vector types may or may not be a
+    // nop in big-endian situations.
+    return ValReg;
+  case CCValAssign::AExt:
+    assert(!VA.getLocVT().isVector() && "unexpected vector extend");
+    // Otherwise, it's a nop.
+    return ValReg;
+  case CCValAssign::SExt: {
+    unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildSExt(NewReg, ValReg);
+    return NewReg;
+  }
+  case CCValAssign::ZExt: {
+    unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+    MIRBuilder.buildZExt(NewReg, ValReg);
+    return NewReg;
+  }
+  }
+  llvm_unreachable("unable to extend register");
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 231e5ac82bec..fcd2722f1c2f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -25,6 +25,8 @@ void llvm::initializeGlobalISel(PassRegistry &Registry) {
 
 void llvm::initializeGlobalISel(PassRegistry &Registry) {
   initializeIRTranslatorPass(Registry);
+  initializeLegalizerPass(Registry);
   initializeRegBankSelectPass(Registry);
+  initializeInstructionSelectPass(Registry);
 }
 #endif // LLVM_BUILD_GLOBAL_ISEL
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index b8a960cfac76..cf35afbc6e5f 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -14,12 +14,19 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/GetElementPtrTypeIterator.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetLowering.h"
 
 #define DEBUG_TYPE "irtranslator"
@@ -27,13 +34,29 @@
 using namespace llvm;
 
 char IRTranslator::ID = 0;
-INITIALIZE_PASS(IRTranslator, "irtranslator", "IRTranslator LLVM IR -> MI",
-                false, false);
+INITIALIZE_PASS_BEGIN(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+                false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(IRTranslator, DEBUG_TYPE, "IRTranslator LLVM IR -> MI",
+                false, false)
+
+static void reportTranslationError(const Value &V, const Twine &Message) {
+  std::string ErrStorage;
+  raw_string_ostream Err(ErrStorage);
+  Err << Message << ": " << V << '\n';
+  report_fatal_error(Err.str());
+}
 
 IRTranslator::IRTranslator() : MachineFunctionPass(ID), MRI(nullptr) {
   initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
 }
 
+void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+
 unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
   unsigned &ValReg = ValToVReg[&Val];
   // Check if this is the first time we see Val.
@@ -42,56 +65,129 @@ unsigned IRTranslator::getOrCreateVReg(const Value &Val) {
     // we need to concat together to produce the value.
     assert(Val.getType()->isSized() &&
            "Don't know how to create an empty vreg");
-    assert(!Val.getType()->isAggregateType() && "Not yet implemented");
-    unsigned Size = Val.getType()->getPrimitiveSizeInBits();
-    unsigned VReg = MRI->createGenericVirtualRegister(Size);
+    unsigned VReg = MRI->createGenericVirtualRegister(LLT{*Val.getType(), *DL});
     ValReg = VReg;
-    assert(!isa<Constant>(Val) && "Not yet implemented");
+
+    if (auto CV = dyn_cast<Constant>(&Val)) {
+      bool Success = translate(*CV, VReg);
+      if (!Success) {
+        if (!TPC->isGlobalISelAbortEnabled()) {
+          MF->getProperties().set(
+              MachineFunctionProperties::Property::FailedISel);
+          return VReg;
+        }
+        reportTranslationError(Val, "unable to translate constant");
+      }
+    }
   }
   return ValReg;
 }
 
+int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
+  if (FrameIndices.find(&AI) != FrameIndices.end())
+    return FrameIndices[&AI];
+
+  unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
+  unsigned Size =
+      ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
+
+  // Always allocate at least one byte.
+  Size = std::max(Size, 1u);
+
+  unsigned Alignment = AI.getAlignment();
+  if (!Alignment)
+    Alignment = DL->getABITypeAlignment(AI.getAllocatedType());
+
+  int &FI = FrameIndices[&AI];
+  FI = MF->getFrameInfo().CreateStackObject(Size, Alignment, false, &AI);
+  return FI;
+}
+
+unsigned IRTranslator::getMemOpAlignment(const Instruction &I) {
+  unsigned Alignment = 0;
+  Type *ValTy = nullptr;
+  if (const StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+    Alignment = SI->getAlignment();
+    ValTy = SI->getValueOperand()->getType();
+  } else if (const LoadInst *LI = dyn_cast<LoadInst>(&I)) {
+    Alignment = LI->getAlignment();
+    ValTy = LI->getType();
+  } else if (!TPC->isGlobalISelAbortEnabled()) {
+    MF->getProperties().set(
+        MachineFunctionProperties::Property::FailedISel);
+    return 1;
+  } else
+    llvm_unreachable("unhandled memory instruction");
+
+  return Alignment ? Alignment : DL->getABITypeAlignment(ValTy);
+}
+
 MachineBasicBlock &IRTranslator::getOrCreateBB(const BasicBlock &BB) {
   MachineBasicBlock *&MBB = BBToMBB[&BB];
   if (!MBB) {
-    MachineFunction &MF = MIRBuilder.getMF();
-    MBB = MF.CreateMachineBasicBlock();
-    MF.push_back(MBB);
+    MBB = MF->CreateMachineBasicBlock();
+    MF->push_back(MBB);
   }
   return *MBB;
 }
 
-bool IRTranslator::translateBinaryOp(unsigned Opcode, const Instruction &Inst) {
+bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
+                                     MachineIRBuilder &MIRBuilder) {
+  // FIXME: handle signed/unsigned wrapping flags.
+
   // Get or create a virtual register for each value.
   // Unless the value is a Constant => loadimm cst?
   // or inline constant each time?
   // Creation of a virtual register needs to have a size.
-  unsigned Op0 = getOrCreateVReg(*Inst.getOperand(0));
-  unsigned Op1 = getOrCreateVReg(*Inst.getOperand(1));
-  unsigned Res = getOrCreateVReg(Inst);
-  MIRBuilder.buildInstr(Opcode, Inst.getType(), Res, Op0, Op1);
+  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
+  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
+  unsigned Res = getOrCreateVReg(U);
+  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+  return true;
+}
+
+bool IRTranslator::translateCompare(const User &U,
+                                    MachineIRBuilder &MIRBuilder) {
+  const CmpInst *CI = dyn_cast<CmpInst>(&U);
+  unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
+  unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
+  unsigned Res = getOrCreateVReg(U);
+  CmpInst::Predicate Pred =
+      CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
+                                    cast<ConstantExpr>(U).getPredicate());
+
+  if (CmpInst::isIntPredicate(Pred))
+    MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
+  else
+    MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
+
   return true;
 }
 
-bool IRTranslator::translateReturn(const Instruction &Inst) {
-  assert(isa<ReturnInst>(Inst) && "Return expected");
-  const Value *Ret = cast<ReturnInst>(Inst).getReturnValue();
+bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
+  const ReturnInst &RI = cast<ReturnInst>(U);
+  const Value *Ret = RI.getReturnValue();
   // The target may mess up with the insertion point, but
   // this is not important as a return is the last instruction
   // of the block anyway.
   return CLI->lowerReturn(MIRBuilder, Ret, !Ret ? 0 : getOrCreateVReg(*Ret));
 }
 
-bool IRTranslator::translateBr(const Instruction &Inst) {
-  assert(isa<BranchInst>(Inst) && "Branch expected");
-  const BranchInst &BrInst = *cast<BranchInst>(&Inst);
-  if (BrInst.isUnconditional()) {
-    const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getOperand(0));
-    MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
-    MIRBuilder.buildInstr(TargetOpcode::G_BR, BrTgt.getType(), TgtBB);
-  } else {
-    assert(0 && "Not yet implemented");
+bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
+  const BranchInst &BrInst = cast<BranchInst>(U);
+  unsigned Succ = 0;
+  if (!BrInst.isUnconditional()) {
+    // We want a G_BRCOND to the true BB followed by an unconditional branch.
+    unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
+    const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
+    MachineBasicBlock &TrueBB = getOrCreateBB(TrueTgt);
+    MIRBuilder.buildBrCond(Tst, TrueBB);
   }
+
+  const BasicBlock &BrTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ));
+  MachineBasicBlock &TgtBB = getOrCreateBB(BrTgt);
+  MIRBuilder.buildBr(TgtBB);
+
   // Link successors.
   MachineBasicBlock &CurBB = MIRBuilder.getMBB();
   for (const BasicBlock *Succ : BrInst.successors())
@@ -99,66 +195,655 @@ bool IRTranslator::translateBr(const Instruction &Inst) {
   return true;
 }
 
+bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
+  const LoadInst &LI = cast<LoadInst>(U);
+
+  if (!TPC->isGlobalISelAbortEnabled() && LI.isAtomic())
+    return false;
+
+  assert(!LI.isAtomic() && "only non-atomic loads are supported at the moment");
+  auto Flags = LI.isVolatile() ? MachineMemOperand::MOVolatile
+                               : MachineMemOperand::MONone;
+  Flags |= MachineMemOperand::MOLoad;
+
+  unsigned Res = getOrCreateVReg(LI);
+  unsigned Addr = getOrCreateVReg(*LI.getPointerOperand());
+  LLT VTy{*LI.getType(), *DL}, PTy{*LI.getPointerOperand()->getType(), *DL};
+  MIRBuilder.buildLoad(
+      Res, Addr,
+      *MF->getMachineMemOperand(MachinePointerInfo(LI.getPointerOperand()),
+                                Flags, DL->getTypeStoreSize(LI.getType()),
+                                getMemOpAlignment(LI)));
+  return true;
+}
+
+bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
+  const StoreInst &SI = cast<StoreInst>(U);
+
+  if (!TPC->isGlobalISelAbortEnabled() && SI.isAtomic())
+    return false;
+
+  assert(!SI.isAtomic() && "only non-atomic stores supported at the moment");
+  auto Flags = SI.isVolatile() ? MachineMemOperand::MOVolatile
+                               : MachineMemOperand::MONone;
+  Flags |= MachineMemOperand::MOStore;
+
+  unsigned Val = getOrCreateVReg(*SI.getValueOperand());
+  unsigned Addr = getOrCreateVReg(*SI.getPointerOperand());
+  LLT VTy{*SI.getValueOperand()->getType(), *DL},
+      PTy{*SI.getPointerOperand()->getType(), *DL};
+
+  MIRBuilder.buildStore(
+      Val, Addr,
+      *MF->getMachineMemOperand(
+          MachinePointerInfo(SI.getPointerOperand()), Flags,
+          DL->getTypeStoreSize(SI.getValueOperand()->getType()),
+          getMemOpAlignment(SI)));
+  return true;
+}
+
+bool IRTranslator::translateExtractValue(const User &U,
+                                         MachineIRBuilder &MIRBuilder) {
+  const Value *Src = U.getOperand(0);
+  Type *Int32Ty = Type::getInt32Ty(U.getContext());
+  SmallVector<Value *, 1> Indices;
+
+  // getIndexedOffsetInType is designed for GEPs, so the first index is the
+  // usual array element rather than looking into the actual aggregate.
+  Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(&U)) {
+    for (auto Idx : EVI->indices())
+      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+  } else {
+    for (unsigned i = 1; i < U.getNumOperands(); ++i)
+      Indices.push_back(U.getOperand(i));
+  }
+
+  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+
+  unsigned Res = getOrCreateVReg(U);
+  MIRBuilder.buildExtract(Res, Offset, getOrCreateVReg(*Src));
+
+  return true;
+}
+
+bool IRTranslator::translateInsertValue(const User &U,
+                                        MachineIRBuilder &MIRBuilder) {
+  const Value *Src = U.getOperand(0);
+  Type *Int32Ty = Type::getInt32Ty(U.getContext());
+  SmallVector<Value *, 1> Indices;
+
+  // getIndexedOffsetInType is designed for GEPs, so the first index is the
+  // usual array element rather than looking into the actual aggregate.
+  Indices.push_back(ConstantInt::get(Int32Ty, 0));
+
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(&U)) {
+    for (auto Idx : IVI->indices())
+      Indices.push_back(ConstantInt::get(Int32Ty, Idx));
+  } else {
+    for (unsigned i = 2; i < U.getNumOperands(); ++i)
+      Indices.push_back(U.getOperand(i));
+  }
+
+  uint64_t Offset = 8 * DL->getIndexedOffsetInType(Src->getType(), Indices);
+
+  unsigned Res = getOrCreateVReg(U);
+  const Value &Inserted = *U.getOperand(1);
+  MIRBuilder.buildInsert(Res, getOrCreateVReg(*Src), getOrCreateVReg(Inserted),
+                         Offset);
+
+  return true;
+}
+
+bool IRTranslator::translateSelect(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  MIRBuilder.buildSelect(getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
+                         getOrCreateVReg(*U.getOperand(1)),
+                         getOrCreateVReg(*U.getOperand(2)));
+  return true;
+}
+
+bool IRTranslator::translateBitCast(const User &U,
+                                    MachineIRBuilder &MIRBuilder) {
+  if (LLT{*U.getOperand(0)->getType(), *DL} == LLT{*U.getType(), *DL}) {
+    unsigned &Reg = ValToVReg[&U];
+    if (Reg)
+      MIRBuilder.buildCopy(Reg, getOrCreateVReg(*U.getOperand(0)));
+    else
+      Reg = getOrCreateVReg(*U.getOperand(0));
+    return true;
+  }
+  return translateCast(TargetOpcode::G_BITCAST, U, MIRBuilder);
+}
+
+bool IRTranslator::translateCast(unsigned Opcode, const User &U,
+                                 MachineIRBuilder &MIRBuilder) {
+  unsigned Op = getOrCreateVReg(*U.getOperand(0));
+  unsigned Res = getOrCreateVReg(U);
+  MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
+  return true;
+}
+
+bool IRTranslator::translateGetElementPtr(const User &U,
+                                          MachineIRBuilder &MIRBuilder) {
+  // FIXME: support vector GEPs.
+  if (U.getType()->isVectorTy())
+    return false;
+
+  Value &Op0 = *U.getOperand(0);
+  unsigned BaseReg = getOrCreateVReg(Op0);
+  LLT PtrTy{*Op0.getType(), *DL};
+  unsigned PtrSize = DL->getPointerSizeInBits(PtrTy.getAddressSpace());
+  LLT OffsetTy = LLT::scalar(PtrSize);
+
+  int64_t Offset = 0;
+  for (gep_type_iterator GTI = gep_type_begin(&U), E = gep_type_end(&U);
+       GTI != E; ++GTI) {
+    const Value *Idx = GTI.getOperand();
+    if (StructType *StTy = GTI.getStructTypeOrNull()) {
+      unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
+      Offset += DL->getStructLayout(StTy)->getElementOffset(Field);
+      continue;
+    } else {
+      uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
+
+      // If this is a scalar constant or a splat vector of constants,
+      // handle it quickly.
+      if (const auto *CI = dyn_cast<ConstantInt>(Idx)) {
+        Offset += ElementSize * CI->getSExtValue();
+        continue;
+      }
+
+      if (Offset != 0) {
+        unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+        unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+        MIRBuilder.buildConstant(OffsetReg, Offset);
+        MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+
+        BaseReg = NewBaseReg;
+        Offset = 0;
+      }
+
+      // N = N + Idx * ElementSize;
+      unsigned ElementSizeReg = MRI->createGenericVirtualRegister(OffsetTy);
+      MIRBuilder.buildConstant(ElementSizeReg, ElementSize);
+
+      unsigned IdxReg = getOrCreateVReg(*Idx);
+      if (MRI->getType(IdxReg) != OffsetTy) {
+        unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
+        MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
+        IdxReg = NewIdxReg;
+      }
+
+      unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+      MIRBuilder.buildMul(OffsetReg, ElementSizeReg, IdxReg);
+
+      unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+      MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+      BaseReg = NewBaseReg;
+    }
+  }
+
+  if (Offset != 0) {
+    unsigned OffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
+    MIRBuilder.buildConstant(OffsetReg, Offset);
+    MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
+    return true;
+  }
+
+  MIRBuilder.buildCopy(getOrCreateVReg(U), BaseReg);
+  return true;
+}
+
+bool IRTranslator::translateMemcpy(const CallInst &CI,
+                                   MachineIRBuilder &MIRBuilder) {
+  LLT SizeTy{*CI.getArgOperand(2)->getType(), *DL};
+  if (cast<PointerType>(CI.getArgOperand(0)->getType())->getAddressSpace() !=
+          0 ||
+      cast<PointerType>(CI.getArgOperand(1)->getType())->getAddressSpace() !=
+          0 ||
+      SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
+    return false;
+
+  SmallVector<CallLowering::ArgInfo, 8> Args;
+  for (int i = 0; i < 3; ++i) {
+    const auto &Arg = CI.getArgOperand(i);
+    Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+  }
+
+  MachineOperand Callee = MachineOperand::CreateES("memcpy");
+
+  return CLI->lowerCall(MIRBuilder, Callee,
+                        CallLowering::ArgInfo(0, CI.getType()), Args);
+}
+
+void IRTranslator::getStackGuard(unsigned DstReg,
+                                 MachineIRBuilder &MIRBuilder) {
+  auto MIB = MIRBuilder.buildInstr(TargetOpcode::LOAD_STACK_GUARD);
+  MIB.addDef(DstReg);
+
+  auto &TLI = *MF->getSubtarget().getTargetLowering();
+  Value *Global = TLI.getSDagStackGuard(*MF->getFunction()->getParent());
+  if (!Global)
+    return;
+
+  MachinePointerInfo MPInfo(Global);
+  MachineInstr::mmo_iterator MemRefs = MF->allocateMemRefsArray(1);
+  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+               MachineMemOperand::MODereferenceable;
+  *MemRefs =
+      MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
+                               DL->getPointerABIAlignment());
+  MIB.setMemRefs(MemRefs, MemRefs + 1);
+}
+
+bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
+                                              MachineIRBuilder &MIRBuilder) {
+  LLT Ty{*CI.getOperand(0)->getType(), *DL};
+  LLT s1 = LLT::scalar(1);
+  unsigned Width = Ty.getSizeInBits();
+  unsigned Res = MRI->createGenericVirtualRegister(Ty);
+  unsigned Overflow = MRI->createGenericVirtualRegister(s1);
+  auto MIB = MIRBuilder.buildInstr(Op)
+                 .addDef(Res)
+                 .addDef(Overflow)
+                 .addUse(getOrCreateVReg(*CI.getOperand(0)))
+                 .addUse(getOrCreateVReg(*CI.getOperand(1)));
+
+  if (Op == TargetOpcode::G_UADDE || Op == TargetOpcode::G_USUBE) {
+    unsigned Zero = MRI->createGenericVirtualRegister(s1);
+    EntryBuilder.buildConstant(Zero, 0);
+    MIB.addUse(Zero);
+  }
+
+  MIRBuilder.buildSequence(getOrCreateVReg(CI), Res, 0, Overflow, Width);
+  return true;
+}
+
+bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
+                                           MachineIRBuilder &MIRBuilder) {
+  switch (ID) {
+  default:
+    break;
+  case Intrinsic::dbg_declare:
+  case Intrinsic::dbg_value:
+    // FIXME: these obviously need to be supported properly.
+    MF->getProperties().set(
+          MachineFunctionProperties::Property::FailedISel);
+    return true;
+  case Intrinsic::uadd_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_UADDE, MIRBuilder);
+  case Intrinsic::sadd_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_SADDO, MIRBuilder);
+  case Intrinsic::usub_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_USUBE, MIRBuilder);
+  case Intrinsic::ssub_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_SSUBO, MIRBuilder);
+  case Intrinsic::umul_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
+  case Intrinsic::smul_with_overflow:
+    return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
+  case Intrinsic::memcpy:
+    return translateMemcpy(CI, MIRBuilder);
+  case Intrinsic::eh_typeid_for: {
+    GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
+    unsigned Reg = getOrCreateVReg(CI);
+    unsigned TypeID = MF->getTypeIDFor(GV);
+    MIRBuilder.buildConstant(Reg, TypeID);
+    return true;
+  }
+  case Intrinsic::objectsize: {
+    // If we don't know by now, we're never going to know.
+    const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));
+
+    MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
+    return true;
+  }
+  case Intrinsic::stackguard:
+    getStackGuard(getOrCreateVReg(CI), MIRBuilder);
+    return true;
+  case Intrinsic::stackprotector: {
+    LLT PtrTy{*CI.getArgOperand(0)->getType(), *DL};
+    unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+    getStackGuard(GuardVal, MIRBuilder);
+
+    AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
+    MIRBuilder.buildStore(
+        GuardVal, getOrCreateVReg(*Slot),
+        *MF->getMachineMemOperand(
+            MachinePointerInfo::getFixedStack(*MF,
+                                              getOrCreateFrameIndex(*Slot)),
+            MachineMemOperand::MOStore | MachineMemOperand::MOVolatile,
+            PtrTy.getSizeInBits() / 8, 8));
+    return true;
+  }
+  }
+  return false;
+}
+
+bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
+  const CallInst &CI = cast<CallInst>(U);
+  auto TII = MF->getTarget().getIntrinsicInfo();
+  const Function *F = CI.getCalledFunction();
+
+  if (!F || !F->isIntrinsic()) {
+    unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+    SmallVector<unsigned, 8> Args;
+    for (auto &Arg: CI.arg_operands())
+      Args.push_back(getOrCreateVReg(*Arg));
+
+    return CLI->lowerCall(MIRBuilder, CI, Res, Args, [&]() {
+      return getOrCreateVReg(*CI.getCalledValue());
+    });
+  }
+
+  Intrinsic::ID ID = F->getIntrinsicID();
+  if (TII && ID == Intrinsic::not_intrinsic)
+    ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
+
+  assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
+
+  if (translateKnownIntrinsic(CI, ID, MIRBuilder))
+    return true;
+
+  unsigned Res = CI.getType()->isVoidTy() ? 0 : getOrCreateVReg(CI);
+  MachineInstrBuilder MIB =
+      MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+
+  for (auto &Arg : CI.arg_operands()) {
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg))
+      MIB.addImm(CI->getSExtValue());
+    else
+      MIB.addUse(getOrCreateVReg(*Arg));
+  }
+  return true;
+}
+
+bool IRTranslator::translateInvoke(const User &U,
+                                   MachineIRBuilder &MIRBuilder) {
+  const InvokeInst &I = cast<InvokeInst>(U);
+  MCContext &Context = MF->getContext();
+
+  const BasicBlock *ReturnBB = I.getSuccessor(0);
+  const BasicBlock *EHPadBB = I.getSuccessor(1);
+
+  const Value *Callee(I.getCalledValue());
+  const Function *Fn = dyn_cast<Function>(Callee);
+  if (isa<InlineAsm>(Callee))
+    return false;
+
+  // FIXME: support invoking patchpoint and statepoint intrinsics.
+  if (Fn && Fn->isIntrinsic())
+    return false;
+
+  // FIXME: support whatever these are.
+  if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
+    return false;
+
+  // FIXME: support Windows exception handling.
+  if (!isa<LandingPadInst>(EHPadBB->front()))
+    return false;
+
+
+  // Emit the actual call, bracketed by EH_LABELs so that the MF knows about
+  // the region covered by the try.
+  MCSymbol *BeginSymbol = Context.createTempSymbol();
+  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
+
+  unsigned Res = I.getType()->isVoidTy() ? 0 : getOrCreateVReg(I);
+  SmallVector<CallLowering::ArgInfo, 8> Args;
+  for (auto &Arg: I.arg_operands())
+    Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+
+  if (!CLI->lowerCall(MIRBuilder, MachineOperand::CreateGA(Fn, 0),
+                      CallLowering::ArgInfo(Res, I.getType()), Args))
+    return false;
+
+  MCSymbol *EndSymbol = Context.createTempSymbol();
+  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
+
+  // FIXME: track probabilities.
+  MachineBasicBlock &EHPadMBB = getOrCreateBB(*EHPadBB),
+                    &ReturnMBB = getOrCreateBB(*ReturnBB);
+  MF->addInvoke(&EHPadMBB, BeginSymbol, EndSymbol);
+  MIRBuilder.getMBB().addSuccessor(&ReturnMBB);
+  MIRBuilder.getMBB().addSuccessor(&EHPadMBB);
+
+  return true;
+}
+
+bool IRTranslator::translateLandingPad(const User &U,
+                                       MachineIRBuilder &MIRBuilder) {
+  const LandingPadInst &LP = cast<LandingPadInst>(U);
+
+  MachineBasicBlock &MBB = MIRBuilder.getMBB();
+  addLandingPadInfo(LP, MBB);
+
+  MBB.setIsEHPad();
+
+  // If there aren't registers to copy the values into (e.g., during SjLj
+  // exceptions), then don't bother.
+  auto &TLI = *MF->getSubtarget().getTargetLowering();
+  const Constant *PersonalityFn = MF->getFunction()->getPersonalityFn();
+  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
+      TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
+    return true;
+
+  // If landingpad's return type is token type, we don't create DAG nodes
+  // for its exception pointer and selector value. The extraction of exception
+  // pointer or selector value from token type landingpads is not currently
+  // supported.
+  if (LP.getType()->isTokenTy())
+    return true;
+
+  // Add a label to mark the beginning of the landing pad.  Deletion of the
+  // landing pad can thus be detected via the MachineModuleInfo.
+  MIRBuilder.buildInstr(TargetOpcode::EH_LABEL)
+    .addSym(MF->addLandingPad(&MBB));
+
+  // Mark exception register as live in.
+  SmallVector<unsigned, 2> Regs;
+  SmallVector<uint64_t, 2> Offsets;
+  LLT p0 = LLT::pointer(0, DL->getPointerSizeInBits());
+  if (unsigned Reg = TLI.getExceptionPointerRegister(PersonalityFn)) {
+    unsigned VReg = MRI->createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(VReg, Reg);
+    Regs.push_back(VReg);
+    Offsets.push_back(0);
+  }
+
+  if (unsigned Reg = TLI.getExceptionSelectorRegister(PersonalityFn)) {
+    unsigned VReg = MRI->createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(VReg, Reg);
+    Regs.push_back(VReg);
+    Offsets.push_back(p0.getSizeInBits());
+  }
+
+  MIRBuilder.buildSequence(getOrCreateVReg(LP), Regs, Offsets);
+  return true;
+}
+
+bool IRTranslator::translateStaticAlloca(const AllocaInst &AI,
+                                         MachineIRBuilder &MIRBuilder) {
+  if (!TPC->isGlobalISelAbortEnabled() && !AI.isStaticAlloca())
+    return false;
+
+  assert(AI.isStaticAlloca() && "only handle static allocas now");
+  unsigned Res = getOrCreateVReg(AI);
+  int FI = getOrCreateFrameIndex(AI);
+  MIRBuilder.buildFrameIndex(Res, FI);
+  return true;
+}
+
+bool IRTranslator::translatePHI(const User &U, MachineIRBuilder &MIRBuilder) {
+  const PHINode &PI = cast<PHINode>(U);
+  auto MIB = MIRBuilder.buildInstr(TargetOpcode::PHI);
+  MIB.addDef(getOrCreateVReg(PI));
+
+  PendingPHIs.emplace_back(&PI, MIB.getInstr());
+  return true;
+}
+
+void IRTranslator::finishPendingPhis() {
+  for (std::pair<const PHINode *, MachineInstr *> &Phi : PendingPHIs) {
+    const PHINode *PI = Phi.first;
+    MachineInstrBuilder MIB(*MF, Phi.second);
+
+    // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
+    // won't create extra control flow here, otherwise we need to find the
+    // dominating predecessor here (or perhaps force the weirder IRTranslators
+    // to provide a simple boundary).
+    for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
+      assert(BBToMBB[PI->getIncomingBlock(i)]->isSuccessor(MIB->getParent()) &&
+             "I appear to have misunderstood Machine PHIs");
+      MIB.addUse(getOrCreateVReg(*PI->getIncomingValue(i)));
+      MIB.addMBB(BBToMBB[PI->getIncomingBlock(i)]);
+    }
+  }
+}
+
 bool IRTranslator::translate(const Instruction &Inst) {
-  MIRBuilder.setDebugLoc(Inst.getDebugLoc());
+  CurBuilder.setDebugLoc(Inst.getDebugLoc());
   switch(Inst.getOpcode()) {
-  case Instruction::Add:
-    return translateBinaryOp(TargetOpcode::G_ADD, Inst);
-  case Instruction::Or:
-    return translateBinaryOp(TargetOpcode::G_OR, Inst);
-  case Instruction::Br:
-    return translateBr(Inst);
-  case Instruction::Ret:
-    return translateReturn(Inst);
-
+#define HANDLE_INST(NUM, OPCODE, CLASS) \
+    case Instruction::OPCODE: return translate##OPCODE(Inst, CurBuilder);
+#include "llvm/IR/Instruction.def"
   default:
-    llvm_unreachable("Opcode not supported");
+    if (!TPC->isGlobalISelAbortEnabled())
+      return false;
+    llvm_unreachable("unknown opcode");
   }
 }
 
+bool IRTranslator::translate(const Constant &C, unsigned Reg) {
+  if (auto CI = dyn_cast<ConstantInt>(&C))
+    EntryBuilder.buildConstant(Reg, *CI);
+  else if (auto CF = dyn_cast<ConstantFP>(&C))
+    EntryBuilder.buildFConstant(Reg, *CF);
+  else if (isa<UndefValue>(C))
+    EntryBuilder.buildInstr(TargetOpcode::IMPLICIT_DEF).addDef(Reg);
+  else if (isa<ConstantPointerNull>(C))
+    EntryBuilder.buildConstant(Reg, 0);
+  else if (auto GV = dyn_cast<GlobalValue>(&C))
+    EntryBuilder.buildGlobalValue(Reg, GV);
+  else if (auto CE = dyn_cast<ConstantExpr>(&C)) {
+    switch(CE->getOpcode()) {
+#define HANDLE_INST(NUM, OPCODE, CLASS)                         \
+      case Instruction::OPCODE: return translate##OPCODE(*CE, EntryBuilder);
+#include "llvm/IR/Instruction.def"
+    default:
+      if (!TPC->isGlobalISelAbortEnabled())
+        return false;
+      llvm_unreachable("unknown opcode");
+    }
+  } else if (!TPC->isGlobalISelAbortEnabled())
+    return false;
+  else
+    llvm_unreachable("unhandled constant kind");
+
+  return true;
+}
 
-void IRTranslator::finalize() {
+void IRTranslator::finalizeFunction() {
   // Release the memory used by the different maps we
   // needed during the translation.
+  PendingPHIs.clear();
   ValToVReg.clear();
+  FrameIndices.clear();
   Constants.clear();
 }
 
-bool IRTranslator::runOnMachineFunction(MachineFunction &MF) {
-  const Function &F = *MF.getFunction();
+bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
+  MF = &CurMF;
+  const Function &F = *MF->getFunction();
   if (F.empty())
     return false;
-  CLI = MF.getSubtarget().getCallLowering();
-  MIRBuilder.setMF(MF);
-  MRI = &MF.getRegInfo();
-  // Setup the arguments.
-  MachineBasicBlock &MBB = getOrCreateBB(F.front());
-  MIRBuilder.setMBB(MBB);
+  CLI = MF->getSubtarget().getCallLowering();
+  CurBuilder.setMF(*MF);
+  EntryBuilder.setMF(*MF);
+  MRI = &MF->getRegInfo();
+  DL = &F.getParent()->getDataLayout();
+  TPC = &getAnalysis<TargetPassConfig>();
+
+  assert(PendingPHIs.empty() && "stale PHIs");
+
+  // Setup a separate basic-block for the arguments and constants, falling
+  // through to the IR-level Function's entry block.
+  MachineBasicBlock *EntryBB = MF->CreateMachineBasicBlock();
+  MF->push_back(EntryBB);
+  EntryBB->addSuccessor(&getOrCreateBB(F.front()));
+  EntryBuilder.setMBB(*EntryBB);
+
+  // Lower the actual args into this basic block.
   SmallVector<unsigned, 8> VRegArgs;
   for (const Argument &Arg: F.args())
     VRegArgs.push_back(getOrCreateVReg(Arg));
-  bool Succeeded =
-      CLI->lowerFormalArguments(MIRBuilder, F.getArgumentList(), VRegArgs);
-  if (!Succeeded)
+  bool Succeeded = CLI->lowerFormalArguments(EntryBuilder, F, VRegArgs);
+  if (!Succeeded) {
+    if (!TPC->isGlobalISelAbortEnabled()) {
+      MF->getProperties().set(
+          MachineFunctionProperties::Property::FailedISel);
+      finalizeFunction();
+      return false;
+    }
     report_fatal_error("Unable to lower arguments");
+  }
 
+  // And translate the function!
   for (const BasicBlock &BB: F) {
     MachineBasicBlock &MBB = getOrCreateBB(BB);
     // Set the insertion point of all the following translations to
     // the end of this basic block.
-    MIRBuilder.setMBB(MBB);
+    CurBuilder.setMBB(MBB);
+
     for (const Instruction &Inst: BB) {
-      bool Succeeded = translate(Inst);
+      Succeeded &= translate(Inst);
       if (!Succeeded) {
-        DEBUG(dbgs() << "Cannot translate: " << Inst << '\n');
-        report_fatal_error("Unable to translate instruction");
+        if (TPC->isGlobalISelAbortEnabled())
+          reportTranslationError(Inst, "unable to translate instruction");
+        MF->getProperties().set(
+            MachineFunctionProperties::Property::FailedISel);
+        break;
       }
     }
   }
 
-  // Now that the MachineFrameInfo has been configured, no further changes to
-  // the reserved registers are possible.
-  MRI->freezeReservedRegs(MF);
+  if (Succeeded) {
+    finishPendingPhis();
+
+    // Now that the MachineFrameInfo has been configured, no further changes to
+    // the reserved registers are possible.
+    MRI->freezeReservedRegs(*MF);
+
+    // Merge the argument lowering and constants block with its single
+    // successor, the LLVM-IR entry block.  We want the basic block to
+    // be maximal.
+    assert(EntryBB->succ_size() == 1 &&
+           "Custom BB used for lowering should have only one successor");
+    // Get the successor of the current entry block.
+    MachineBasicBlock &NewEntryBB = **EntryBB->succ_begin();
+    assert(NewEntryBB.pred_size() == 1 &&
+           "LLVM-IR entry block has a predecessor!?");
+    // Move all the instruction from the current entry block to the
+    // new entry block.
+    NewEntryBB.splice(NewEntryBB.begin(), EntryBB, EntryBB->begin(),
+                      EntryBB->end());
+
+    // Update the live-in information for the new entry block.
+    for (const MachineBasicBlock::RegisterMaskPair &LiveIn : EntryBB->liveins())
+      NewEntryBB.addLiveIn(LiveIn);
+    NewEntryBB.sortUniqueLiveIns();
+
+    // Get rid of the now empty basic block.
+    EntryBB->removeSuccessor(&NewEntryBB);
+    MF->remove(EntryBB);
+
+    assert(&MF->front() == &NewEntryBB &&
+           "New entry wasn't next in the list of basic block!");
+  }
+
+  finalizeFunction();
 
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
new file mode 100644
index 000000000000..1d205cd6c9c8
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -0,0 +1,175 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelect class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "instruction-select"
+
+using namespace llvm;
+
+char InstructionSelect::ID = 0;
+INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
+                      "Select target instructions out of generic instructions",
+                      false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
+                    "Select target instructions out of generic instructions",
+                    false, false)
+
+InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
+  initializeInstructionSelectPass(*PassRegistry::getPassRegistry());
+}
+
+void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+static void reportSelectionError(const MachineInstr *MI, const Twine &Message) {
+  const MachineFunction &MF = *MI->getParent()->getParent();
+  std::string ErrStorage;
+  raw_string_ostream Err(ErrStorage);
+  Err << Message << ":\nIn function: " << MF.getName() << '\n';
+  if (MI)
+    Err << *MI << '\n';
+  report_fatal_error(Err.str());
+}
+
+bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+
+  DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+
+  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+  assert(ISel && "Cannot work without InstructionSelector");
+
+  // FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
+  // other MF/MFI fields we need to initialize.
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+#ifndef NDEBUG
+  // Check that our input is fully legal: we require the function to have the
+  // Legalized property, so it should be.
+  // FIXME: This should be in the MachineVerifier, but it can't use the
+  // LegalizerInfo as it's currently in the separate GlobalISel library.
+  // The RegBankSelected property is already checked in the verifier. Note
+  // that it has the same layering problem, but we only use inline methods so
+  // end up not needing to link against the GlobalISel library.
+  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo())
+    for (const MachineBasicBlock &MBB : MF)
+      for (const MachineInstr &MI : MBB)
+        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+          reportSelectionError(&MI, "Instruction is not legal");
+
+#endif
+  // FIXME: We could introduce new blocks and will need to fix the outer loop.
+  // Until then, keep track of the number of blocks to assert that we don't.
+  const size_t NumBlocks = MF.size();
+
+  bool Failed = false;
+  for (MachineBasicBlock *MBB : post_order(&MF)) {
+    if (MBB->empty())
+      continue;
+
+    // Select instructions in reverse block order. We permit erasing so have
+    // to resort to manually iterating and recognizing the begin (rend) case.
+    bool ReachedBegin = false;
+    for (auto MII = std::prev(MBB->end()), Begin = MBB->begin();
+         !ReachedBegin;) {
+#ifndef NDEBUG
+      // Keep track of the insertion range for debug printing.
+      const auto AfterIt = std::next(MII);
+#endif
+      // Select this instruction.
+      MachineInstr &MI = *MII;
+
+      // And have our iterator point to the next instruction, if there is one.
+      if (MII == Begin)
+        ReachedBegin = true;
+      else
+        --MII;
+
+      DEBUG(dbgs() << "Selecting: \n  " << MI);
+
+      if (!ISel->select(MI)) {
+        if (TPC.isGlobalISelAbortEnabled())
+          // FIXME: It would be nice to dump all inserted instructions.  It's
+          // not
+          // obvious how, esp. considering select() can insert after MI.
+          reportSelectionError(&MI, "Cannot select");
+        Failed = true;
+        break;
+      }
+
+      // Dump the range of instructions that MI expanded into.
+      DEBUG({
+        auto InsertedBegin = ReachedBegin ? MBB->begin() : std::next(MII);
+        dbgs() << "Into:\n";
+        for (auto &InsertedMI : make_range(InsertedBegin, AfterIt))
+          dbgs() << "  " << InsertedMI;
+        dbgs() << '\n';
+      });
+    }
+  }
+
+  // Now that selection is complete, there are no more generic vregs.  Verify
+  // that the size of the now-constrained vreg is unchanged and that it has a
+  // register class.
+  for (auto &VRegToType : MRI.getVRegToType()) {
+    unsigned VReg = VRegToType.first;
+    auto *RC = MRI.getRegClassOrNull(VReg);
+    auto *MI = MRI.def_instr_begin(VReg) == MRI.def_instr_end()
+                   ? nullptr
+                   : &*MRI.def_instr_begin(VReg);
+    if (!RC) {
+      if (TPC.isGlobalISelAbortEnabled())
+        reportSelectionError(MI, "VReg as no regclass after selection");
+      Failed = true;
+      break;
+    }
+
+    if (VRegToType.second.isValid() &&
+        VRegToType.second.getSizeInBits() > (RC->getSize() * 8)) {
+      if (TPC.isGlobalISelAbortEnabled())
+        reportSelectionError(
+            MI, "VReg has explicit size different from class size");
+      Failed = true;
+      break;
+    }
+  }
+
+  MRI.getVRegToType().clear();
+
+  if (!TPC.isGlobalISelAbortEnabled() && (Failed || MF.size() != NumBlocks)) {
+    MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+    return false;
+  }
+  assert(MF.size() == NumBlocks && "Inserting blocks is not supported yet");
+
+  // FIXME: Should we accurately track changes?
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
new file mode 100644
index 000000000000..5c34da0dc557
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -0,0 +1,60 @@
+//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp -----------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the InstructionSelector class.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "instructionselector"
+
+using namespace llvm;
+
+InstructionSelector::InstructionSelector() {}
+
+bool InstructionSelector::constrainSelectedInstRegOperands(
+    MachineInstr &I, const TargetInstrInfo &TII, const TargetRegisterInfo &TRI,
+    const RegisterBankInfo &RBI) const {
+  MachineBasicBlock &MBB = *I.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  for (unsigned OpI = 0, OpE = I.getNumExplicitOperands(); OpI != OpE; ++OpI) {
+    MachineOperand &MO = I.getOperand(OpI);
+
+    // There's nothing to be done on non-register operands.
+    if (!MO.isReg())
+      continue;
+
+    DEBUG(dbgs() << "Converting operand: " << MO << '\n');
+    assert(MO.isReg() && "Unsupported non-reg operand");
+
+    unsigned Reg = MO.getReg();
+    // Physical registers don't need to be constrained.
+    if (TRI.isPhysicalRegister(Reg))
+      continue;
+
+    // Register operands with a value of 0 (e.g. predicate operands) don't need
+    // to be constrained.
+    if (Reg == 0)
+      continue;
+
+    // If the operand is a vreg, we should constrain its regclass, and only
+    // insert COPYs if that's impossible.
+    // constrainOperandRegClass does that for us.
+    MO.setReg(constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, I.getDesc(),
+                                       Reg, OpI));
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
new file mode 100644
index 000000000000..e86356880e99
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -0,0 +1,180 @@
+//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize individual
+/// instructions and the LegalizePass wrapper pass for the primary
+/// legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "legalizer"
+
+using namespace llvm;
+
+char Legalizer::ID = 0;
+INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
+                      "Legalize the Machine IR a function's Machine IR", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
+                    "Legalize the Machine IR a function's Machine IR", false,
+                    false)
+
+Legalizer::Legalizer() : MachineFunctionPass(ID) {
+  initializeLegalizerPass(*PassRegistry::getPassRegistry());
+}
+
+void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addRequired<TargetPassConfig>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void Legalizer::init(MachineFunction &MF) {
+}
+
+bool Legalizer::combineExtracts(MachineInstr &MI, MachineRegisterInfo &MRI,
+                                const TargetInstrInfo &TII) {
+  bool Changed = false;
+  if (MI.getOpcode() != TargetOpcode::G_EXTRACT)
+    return Changed;
+
+  unsigned NumDefs = (MI.getNumOperands() - 1) / 2;
+  unsigned SrcReg = MI.getOperand(NumDefs).getReg();
+  MachineInstr &SeqI = *MRI.def_instr_begin(SrcReg);
+  if (SeqI.getOpcode() != TargetOpcode::G_SEQUENCE)
+      return Changed;
+
+  unsigned NumSeqSrcs = (SeqI.getNumOperands() - 1) / 2;
+  bool AllDefsReplaced = true;
+
+  // Try to match each register extracted with a corresponding insertion formed
+  // by the G_SEQUENCE.
+  for (unsigned Idx = 0, SeqIdx = 0; Idx < NumDefs; ++Idx) {
+    MachineOperand &ExtractMO = MI.getOperand(Idx);
+    assert(ExtractMO.isReg() && ExtractMO.isDef() &&
+           "unexpected extract operand");
+
+    unsigned ExtractReg = ExtractMO.getReg();
+    unsigned ExtractPos = MI.getOperand(NumDefs + Idx + 1).getImm();
+
+    while (SeqIdx < NumSeqSrcs &&
+           SeqI.getOperand(2 * SeqIdx + 2).getImm() < ExtractPos)
+      ++SeqIdx;
+
+    if (SeqIdx == NumSeqSrcs) {
+      AllDefsReplaced = false;
+      continue;
+    }
+
+    unsigned OrigReg = SeqI.getOperand(2 * SeqIdx + 1).getReg();
+    if (SeqI.getOperand(2 * SeqIdx + 2).getImm() != ExtractPos ||
+        MRI.getType(OrigReg) != MRI.getType(ExtractReg)) {
+      AllDefsReplaced = false;
+      continue;
+    }
+
+    assert(!TargetRegisterInfo::isPhysicalRegister(OrigReg) &&
+           "unexpected physical register in G_SEQUENCE");
+
+    // Finally we can replace the uses.
+    for (auto &Use : MRI.use_operands(ExtractReg)) {
+      Changed = true;
+      Use.setReg(OrigReg);
+    }
+  }
+
+  if (AllDefsReplaced) {
+    // If SeqI was the next instruction in the BB and we removed it, we'd break
+    // the outer iteration.
+    assert(std::next(MachineBasicBlock::iterator(MI)) != SeqI &&
+           "G_SEQUENCE does not dominate G_EXTRACT");
+
+    MI.eraseFromParent();
+
+    if (MRI.use_empty(SrcReg))
+      SeqI.eraseFromParent();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
+bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+  DEBUG(dbgs() << "Legalize Machine IR for: " << MF.getName() << '\n');
+  init(MF);
+  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
+  const LegalizerInfo &LegalizerInfo = *MF.getSubtarget().getLegalizerInfo();
+  LegalizerHelper Helper(MF);
+
+  // FIXME: an instruction may need more than one pass before it is legal. For
+  // example on most architectures <3 x i3> is doubly-illegal. It would
+  // typically proceed along a path like: <3 x i3> -> <3 x i8> -> <8 x i8>. We
+  // probably want a worklist of instructions rather than naive iterate until
+  // convergence for performance reasons.
+  bool Changed = false;
+  MachineBasicBlock::iterator NextMI;
+  for (auto &MBB : MF)
+    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
+      // Get the next Instruction before we try to legalize, because there's a
+      // good chance MI will be deleted.
+      NextMI = std::next(MI);
+
+      // Only legalize pre-isel generic instructions: others don't have types
+      // and are assumed to be legal.
+      if (!isPreISelGenericOpcode(MI->getOpcode()))
+        continue;
+
+      auto Res = Helper.legalizeInstr(*MI, LegalizerInfo);
+
+      // Error out if we couldn't legalize this instruction. We may want to fall
+      // back to DAG ISel instead in the future.
+      if (Res == LegalizerHelper::UnableToLegalize) {
+        if (!TPC.isGlobalISelAbortEnabled()) {
+          MF.getProperties().set(
+              MachineFunctionProperties::Property::FailedISel);
+          return false;
+        }
+        std::string Msg;
+        raw_string_ostream OS(Msg);
+        OS << "unable to legalize instruction: ";
+        MI->print(OS);
+        report_fatal_error(OS.str());
+      }
+
+      Changed |= Res == LegalizerHelper::Legalized;
+    }
+
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  for (auto &MBB : MF) {
+    for (auto MI = MBB.begin(); MI != MBB.end(); MI = NextMI) {
+      // Get the next Instruction before we try to legalize, because there's a
+      // good chance MI will be deleted.
+      NextMI = std::next(MI);
+
+      Changed |= combineExtracts(*MI, MRI, TII);
+    }
+  }
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
new file mode 100644
index 000000000000..eb25b6ca268f
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -0,0 +1,354 @@
+//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the LegalizerHelper class to legalize
+/// individual instructions and the LegalizeMachineIR wrapper pass for the
+/// primary legalization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#include <sstream>
+
+#define DEBUG_TYPE "legalize-mir"
+
+using namespace llvm;
+
+LegalizerHelper::LegalizerHelper(MachineFunction &MF)
+  : MRI(MF.getRegInfo()) {
+  MIRBuilder.setMF(MF);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
+                                   const LegalizerInfo &LegalizerInfo) {
+  auto Action = LegalizerInfo.getAction(MI, MRI);
+  switch (std::get<0>(Action)) {
+  case LegalizerInfo::Legal:
+    return AlreadyLegal;
+  case LegalizerInfo::Libcall:
+    return libcall(MI);
+  case LegalizerInfo::NarrowScalar:
+    return narrowScalar(MI, std::get<1>(Action), std::get<2>(Action));
+  case LegalizerInfo::WidenScalar:
+    return widenScalar(MI, std::get<1>(Action), std::get<2>(Action));
+  case LegalizerInfo::Lower:
+    return lower(MI, std::get<1>(Action), std::get<2>(Action));
+  case LegalizerInfo::FewerElements:
+    return fewerElementsVector(MI, std::get<1>(Action), std::get<2>(Action));
+  default:
+    return UnableToLegalize;
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::legalizeInstr(MachineInstr &MI,
+                               const LegalizerInfo &LegalizerInfo) {
+  SmallVector<MachineInstr *, 4> WorkList;
+  MIRBuilder.recordInsertions(
+      [&](MachineInstr *MI) { WorkList.push_back(MI); });
+  WorkList.push_back(&MI);
+
+  bool Changed = false;
+  LegalizeResult Res;
+  unsigned Idx = 0;
+  do {
+    Res = legalizeInstrStep(*WorkList[Idx], LegalizerInfo);
+    if (Res == UnableToLegalize) {
+      MIRBuilder.stopRecordingInsertions();
+      return UnableToLegalize;
+    }
+    Changed |= Res == Legalized;
+    ++Idx;
+  } while (Idx < WorkList.size());
+
+  MIRBuilder.stopRecordingInsertions();
+
+  return Changed ? Legalized : AlreadyLegal;
+}
+
+void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
+                                   SmallVectorImpl<unsigned> &VRegs) {
+  unsigned Size = Ty.getSizeInBits();
+  SmallVector<uint64_t, 4> Indexes;
+  for (int i = 0; i < NumParts; ++i) {
+    VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
+    Indexes.push_back(i * Size);
+  }
+  MIRBuilder.buildExtract(VRegs, Indexes, Reg);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::libcall(MachineInstr &MI) {
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  unsigned Size = Ty.getSizeInBits();
+  MIRBuilder.setInstr(MI);
+
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_FREM: {
+    auto &Ctx = MIRBuilder.getMF().getFunction()->getContext();
+    Type *Ty = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
+    auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+    auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+    const char *Name =
+        TLI.getLibcallName(Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32);
+
+    CLI.lowerCall(
+        MIRBuilder, MachineOperand::CreateES(Name),
+        {MI.getOperand(0).getReg(), Ty},
+        {{MI.getOperand(1).getReg(), Ty}, {MI.getOperand(2).getReg(), Ty}});
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
+                                                              unsigned TypeIdx,
+                                                              LLT NarrowTy) {
+  // FIXME: Don't know how to handle secondary types yet.
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_ADD: {
+    // Expand in terms of carry-setting/consuming G_ADDE instructions.
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    int NumParts = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits() /
+                   NarrowTy.getSizeInBits();
+
+    MIRBuilder.setInstr(MI);
+
+    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<uint64_t, 2> Indexes;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+
+    unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
+    MIRBuilder.buildConstant(CarryIn, 0);
+
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+
+      MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
+                            Src2Regs[i], CarryIn);
+
+      DstRegs.push_back(DstReg);
+      Indexes.push_back(i * NarrowSize);
+      CarryIn = CarryOut;
+    }
+    unsigned DstReg = MI.getOperand(0).getReg();
+    MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
+  MIRBuilder.setInstr(MI);
+
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_SUB: {
+    // Perform operation at larger width (any extension is fine here, high bits
+    // don't affect the result) and then truncate the result back to the
+    // original type.
+    unsigned Src1Ext = MRI.createGenericVirtualRegister(WideTy);
+    unsigned Src2Ext = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(Src1Ext, MI.getOperand(1).getReg());
+    MIRBuilder.buildAnyExt(Src2Ext, MI.getOperand(2).getReg());
+
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(MI.getOpcode())
+        .addDef(DstExt)
+        .addUse(Src1Ext)
+        .addUse(Src2Ext);
+
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UDIV: {
+    unsigned ExtOp = MI.getOpcode() == TargetOpcode::G_SDIV
+                          ? TargetOpcode::G_SEXT
+                          : TargetOpcode::G_ZEXT;
+
+    unsigned LHSExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(ExtOp).addDef(LHSExt).addUse(
+        MI.getOperand(1).getReg());
+
+    unsigned RHSExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(ExtOp).addDef(RHSExt).addUse(
+        MI.getOperand(2).getReg());
+
+    unsigned ResExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildInstr(MI.getOpcode())
+        .addDef(ResExt)
+        .addUse(LHSExt)
+        .addUse(RHSExt);
+
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), ResExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_LOAD: {
+    assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
+               WideTy.getSizeInBits() &&
+           "illegal to increase number of bytes loaded");
+
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildLoad(DstExt, MI.getOperand(1).getReg(),
+                         **MI.memoperands_begin());
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_STORE: {
+    assert(alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) ==
+               WideTy.getSizeInBits() &&
+           "illegal to increase number of bytes modified by a store");
+
+    unsigned SrcExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(SrcExt, MI.getOperand(0).getReg());
+    MIRBuilder.buildStore(SrcExt, MI.getOperand(1).getReg(),
+                          **MI.memoperands_begin());
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_CONSTANT: {
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildConstant(DstExt, *MI.getOperand(1).getCImm());
+    MIRBuilder.buildTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_FCONSTANT: {
+    unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildFConstant(DstExt, *MI.getOperand(1).getFPImm());
+    MIRBuilder.buildFPTrunc(MI.getOperand(0).getReg(), DstExt);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_BRCOND: {
+    unsigned TstExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildAnyExt(TstExt, MI.getOperand(0).getReg());
+    MIRBuilder.buildBrCond(TstExt, *MI.getOperand(1).getMBB());
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_ICMP: {
+    assert(TypeIdx == 1 && "unable to legalize predicate");
+    bool IsSigned = CmpInst::isSigned(
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()));
+    unsigned Op0Ext = MRI.createGenericVirtualRegister(WideTy);
+    unsigned Op1Ext = MRI.createGenericVirtualRegister(WideTy);
+    if (IsSigned) {
+      MIRBuilder.buildSExt(Op0Ext, MI.getOperand(2).getReg());
+      MIRBuilder.buildSExt(Op1Ext, MI.getOperand(3).getReg());
+    } else {
+      MIRBuilder.buildZExt(Op0Ext, MI.getOperand(2).getReg());
+      MIRBuilder.buildZExt(Op1Ext, MI.getOperand(3).getReg());
+    }
+    MIRBuilder.buildICmp(
+        static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()),
+        MI.getOperand(0).getReg(), Op0Ext, Op1Ext);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  case TargetOpcode::G_GEP: {
+    assert(TypeIdx == 1 && "unable to legalize pointer of GEP");
+    unsigned OffsetExt = MRI.createGenericVirtualRegister(WideTy);
+    MIRBuilder.buildSExt(OffsetExt, MI.getOperand(2).getReg());
+    MI.getOperand(2).setReg(OffsetExt);
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+  using namespace TargetOpcode;
+  MIRBuilder.setInstr(MI);
+
+  switch(MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_SREM:
+  case TargetOpcode::G_UREM: {
+    unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
+    MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
+        .addDef(QuotReg)
+        .addUse(MI.getOperand(1).getReg())
+        .addUse(MI.getOperand(2).getReg());
+
+    unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
+    MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
+    MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
+                        ProdReg);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+                                     LLT NarrowTy) {
+  // FIXME: Don't know how to handle secondary types yet.
+  if (TypeIdx != 0)
+    return UnableToLegalize;
+  switch (MI.getOpcode()) {
+  default:
+    return UnableToLegalize;
+  case TargetOpcode::G_ADD: {
+    unsigned NarrowSize = NarrowTy.getSizeInBits();
+    unsigned DstReg = MI.getOperand(0).getReg();
+    int NumParts = MRI.getType(DstReg).getSizeInBits() / NarrowSize;
+
+    MIRBuilder.setInstr(MI);
+
+    SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+    SmallVector<uint64_t, 2> Indexes;
+    extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+    extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+
+    for (int i = 0; i < NumParts; ++i) {
+      unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+      MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
+      DstRegs.push_back(DstReg);
+      Indexes.push_back(i * NarrowSize);
+    }
+
+    MIRBuilder.buildSequence(DstReg, DstRegs, Indexes);
+    MI.eraseFromParent();
+    return Legalized;
+  }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
new file mode 100644
index 000000000000..e49662075ed5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -0,0 +1,182 @@
+//===---- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer -------==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement an interface to specify and query how an illegal operation on a
+// given type should be expanded.
+//
+// Issues to be resolved:
+//   + Make it fast.
+//   + Support weird types like i3, <7 x i3>, ...
+//   + Operations with more than one type (ICMP, CMPXCHG, intrinsics, ...)
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+#include "llvm/ADT/SmallBitVector.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+using namespace llvm;
+
+LegalizerInfo::LegalizerInfo() : TablesInitialized(false) {
+  // FIXME: these two can be legalized to the fundamental load/store Jakob
+  // proposed. Once loads & stores are supported.
+  DefaultActions[TargetOpcode::G_ANYEXT] = Legal;
+  DefaultActions[TargetOpcode::G_TRUNC] = Legal;
+
+  DefaultActions[TargetOpcode::G_INTRINSIC] = Legal;
+  DefaultActions[TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS] = Legal;
+
+  DefaultActions[TargetOpcode::G_ADD] = NarrowScalar;
+  DefaultActions[TargetOpcode::G_LOAD] = NarrowScalar;
+  DefaultActions[TargetOpcode::G_STORE] = NarrowScalar;
+
+  DefaultActions[TargetOpcode::G_BRCOND] = WidenScalar;
+}
+
+void LegalizerInfo::computeTables() {
+  for (unsigned Opcode = 0; Opcode <= LastOp - FirstOp; ++Opcode) {
+    for (unsigned Idx = 0; Idx != Actions[Opcode].size(); ++Idx) {
+      for (auto &Action : Actions[Opcode][Idx]) {
+        LLT Ty = Action.first;
+        if (!Ty.isVector())
+          continue;
+
+        auto &Entry = MaxLegalVectorElts[std::make_pair(Opcode + FirstOp,
+                                                        Ty.getElementType())];
+        Entry = std::max(Entry, Ty.getNumElements());
+      }
+    }
+  }
+
+  TablesInitialized = true;
+}
+
+// FIXME: inefficient implementation for now. Without ComputeValueVTs we're
+// probably going to need specialized lookup structures for various types before
+// we have any hope of doing well with something like <13 x i3>. Even the common
+// cases should do better than what we have now.
+std::pair<LegalizerInfo::LegalizeAction, LLT>
+LegalizerInfo::getAction(const InstrAspect &Aspect) const {
+  assert(TablesInitialized && "backend forgot to call computeTables");
+  // These *have* to be implemented for now, they're the fundamental basis of
+  // how everything else is transformed.
+
+  // Nothing is going to go well with types that aren't a power of 2 yet, so
+  // don't even try because we might make things worse.
+  if (!isPowerOf2_64(Aspect.Type.getSizeInBits()))
+      return std::make_pair(Unsupported, LLT());
+
+  // FIXME: the long-term plan calls for expansion in terms of load/store (if
+  // they're not legal).
+  if (Aspect.Opcode == TargetOpcode::G_SEQUENCE ||
+      Aspect.Opcode == TargetOpcode::G_EXTRACT)
+    return std::make_pair(Legal, Aspect.Type);
+
+  LegalizeAction Action = findInActions(Aspect);
+  if (Action != NotFound)
+    return findLegalAction(Aspect, Action);
+
+  unsigned Opcode = Aspect.Opcode;
+  LLT Ty = Aspect.Type;
+  if (!Ty.isVector()) {
+    auto DefaultAction = DefaultActions.find(Aspect.Opcode);
+    if (DefaultAction != DefaultActions.end() && DefaultAction->second == Legal)
+      return std::make_pair(Legal, Ty);
+
+    if (DefaultAction == DefaultActions.end() ||
+        DefaultAction->second != NarrowScalar)
+      return std::make_pair(Unsupported, LLT());
+    return findLegalAction(Aspect, NarrowScalar);
+  }
+
+  LLT EltTy = Ty.getElementType();
+  int NumElts = Ty.getNumElements();
+
+  auto ScalarAction = ScalarInVectorActions.find(std::make_pair(Opcode, EltTy));
+  if (ScalarAction != ScalarInVectorActions.end() &&
+      ScalarAction->second != Legal)
+    return findLegalAction(Aspect, ScalarAction->second);
+
+  // The element type is legal in principle, but the number of elements is
+  // wrong.
+  auto MaxLegalElts = MaxLegalVectorElts.lookup(std::make_pair(Opcode, EltTy));
+  if (MaxLegalElts > NumElts)
+    return findLegalAction(Aspect, MoreElements);
+
+  if (MaxLegalElts == 0) {
+    // Scalarize if there's no legal vector type, which is just a special case
+    // of FewerElements.
+    return std::make_pair(FewerElements, EltTy);
+  }
+
+  return findLegalAction(Aspect, FewerElements);
+}
+
+std::tuple<LegalizerInfo::LegalizeAction, unsigned, LLT>
+LegalizerInfo::getAction(const MachineInstr &MI,
+                         const MachineRegisterInfo &MRI) const {
+  SmallBitVector SeenTypes(8);
+  const MCOperandInfo *OpInfo = MI.getDesc().OpInfo;
+  for (unsigned i = 0; i < MI.getDesc().getNumOperands(); ++i) {
+    if (!OpInfo[i].isGenericType())
+      continue;
+
+    // We don't want to repeatedly check the same operand index, that
+    // could get expensive.
+    unsigned TypeIdx = OpInfo[i].getGenericTypeIndex();
+    if (SeenTypes[TypeIdx])
+      continue;
+
+    SeenTypes.set(TypeIdx);
+
+    LLT Ty = MRI.getType(MI.getOperand(i).getReg());
+    auto Action = getAction({MI.getOpcode(), TypeIdx, Ty});
+    if (Action.first != Legal)
+      return std::make_tuple(Action.first, TypeIdx, Action.second);
+  }
+  return std::make_tuple(Legal, 0, LLT{});
+}
+
+bool LegalizerInfo::isLegal(const MachineInstr &MI,
+                            const MachineRegisterInfo &MRI) const {
+  return std::get<0>(getAction(MI, MRI)) == Legal;
+}
+
+LLT LegalizerInfo::findLegalType(const InstrAspect &Aspect,
+                                 LegalizeAction Action) const {
+  switch(Action) {
+  default:
+    llvm_unreachable("Cannot find legal type");
+  case Legal:
+  case Lower:
+  case Libcall:
+    return Aspect.Type;
+  case NarrowScalar: {
+    return findLegalType(Aspect,
+                         [&](LLT Ty) -> LLT { return Ty.halfScalarSize(); });
+  }
+  case WidenScalar: {
+    return findLegalType(Aspect, [&](LLT Ty) -> LLT {
+      return Ty.getSizeInBits() < 8 ? LLT::scalar(8) : Ty.doubleScalarSize();
+    });
+  }
+  case FewerElements: {
+    return findLegalType(Aspect,
+                         [&](LLT Ty) -> LLT { return Ty.halfElements(); });
+  }
+  case MoreElements: {
+    return findLegalType(Aspect,
+                         [&](LLT Ty) -> LLT { return Ty.doubleElements(); });
+  }
+  }
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 2f19bcf1e68b..c04f6e4ae897 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -14,6 +14,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOpcodes.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -23,82 +24,408 @@ using namespace llvm;
 void MachineIRBuilder::setMF(MachineFunction &MF) {
   this->MF = &MF;
   this->MBB = nullptr;
+  this->MRI = &MF.getRegInfo();
   this->TII = MF.getSubtarget().getInstrInfo();
   this->DL = DebugLoc();
-  this->MI = nullptr;
+  this->II = MachineBasicBlock::iterator();
+  this->InsertedInstr = nullptr;
 }
 
-void MachineIRBuilder::setMBB(MachineBasicBlock &MBB, bool Beginning) {
+void MachineIRBuilder::setMBB(MachineBasicBlock &MBB) {
   this->MBB = &MBB;
-  Before = Beginning;
+  this->II = MBB.end();
   assert(&getMF() == MBB.getParent() &&
          "Basic block is in a different function");
 }
 
-void MachineIRBuilder::setInstr(MachineInstr &MI, bool Before) {
+void MachineIRBuilder::setInstr(MachineInstr &MI) {
   assert(MI.getParent() && "Instruction is not part of a basic block");
   setMBB(*MI.getParent());
-  this->MI = &MI;
-  this->Before = Before;
+  this->II = MI.getIterator();
 }
 
-MachineBasicBlock::iterator MachineIRBuilder::getInsertPt() {
-  if (MI) {
-    if (Before)
-      return MI;
-    if (!MI->getNextNode())
-      return getMBB().end();
-    return MI->getNextNode();
-  }
-  return Before ? getMBB().begin() : getMBB().end();
+void MachineIRBuilder::setInsertPt(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator II) {
+  assert(MBB.getParent() == &getMF() &&
+         "Basic block is in a different function");
+  this->MBB = &MBB;
+  this->II = II;
+}
+
+void MachineIRBuilder::recordInsertions(
+    std::function<void(MachineInstr *)> Inserted) {
+  InsertedInstr = Inserted;
+}
+
+void MachineIRBuilder::stopRecordingInsertions() {
+  InsertedInstr = nullptr;
 }
 
 //------------------------------------------------------------------------------
 // Build instruction variants.
 //------------------------------------------------------------------------------
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty) {
-  MachineInstr *NewMI = BuildMI(getMF(), DL, getTII().get(Opcode));
-  if (Ty) {
-    assert(isPreISelGenericOpcode(Opcode) &&
-           "Only generic instruction can have a type");
-    NewMI->setType(Ty);
-  } else
-    assert(!isPreISelGenericOpcode(Opcode) &&
-           "Generic instruction must have a type");
-  getMBB().insert(getInsertPt(), NewMI);
-  return NewMI;
+
+MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opcode) {
+  return insertInstr(buildInstrNoInsert(Opcode));
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
-                                           unsigned Op0, unsigned Op1) {
-  return buildInstr(Opcode, nullptr, Res, Op0, Op1);
+MachineInstrBuilder MachineIRBuilder::buildInstrNoInsert(unsigned Opcode) {
+  MachineInstrBuilder MIB = BuildMI(getMF(), DL, getTII().get(Opcode));
+  return MIB;
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
-                                           unsigned Res, unsigned Op0,
-                                           unsigned Op1) {
-  MachineInstr *NewMI = buildInstr(Opcode, Ty);
-  MachineInstrBuilder(getMF(), NewMI)
-      .addReg(Res, RegState::Define)
-      .addReg(Op0)
-      .addReg(Op1);
-  return NewMI;
+
+MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
+  getMBB().insert(getInsertPt(), MIB);
+  if (InsertedInstr)
+    InsertedInstr(MIB);
+  return MIB;
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, unsigned Res,
-                                           unsigned Op0) {
-  MachineInstr *NewMI = buildInstr(Opcode, nullptr);
-  MachineInstrBuilder(getMF(), NewMI).addReg(Res, RegState::Define).addReg(Op0);
-  return NewMI;
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
+  assert(MRI->getType(Res).isPointer() && "invalid operand type");
+  return buildInstr(TargetOpcode::G_FRAME_INDEX)
+      .addDef(Res)
+      .addFrameIndex(Idx);
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode) {
-  return buildInstr(Opcode, nullptr);
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+                                                       const GlobalValue *GV) {
+  assert(MRI->getType(Res).isPointer() && "invalid operand type");
+  assert(MRI->getType(Res).getAddressSpace() ==
+             GV->getType()->getAddressSpace() &&
+         "address space mismatch");
+
+  return buildInstr(TargetOpcode::G_GLOBAL_VALUE)
+      .addDef(Res)
+      .addGlobalAddress(GV);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAdd(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_ADD)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert(MRI->getType(Res).isPointer() &&
+         MRI->getType(Res) == MRI->getType(Op0) && "type mismatch");
+  assert(MRI->getType(Op1).isScalar()  && "invalid offset type");
+
+  return buildInstr(TargetOpcode::G_GEP)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSub(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_SUB)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildMul(unsigned Res, unsigned Op0,
+                                               unsigned Op1) {
+  assert((MRI->getType(Res).isScalar() || MRI->getType(Res).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_MUL)
+      .addDef(Res)
+      .addUse(Op0)
+      .addUse(Op1);
 }
 
-MachineInstr *MachineIRBuilder::buildInstr(unsigned Opcode, Type *Ty,
-                                           MachineBasicBlock &BB) {
-  MachineInstr *NewMI = buildInstr(Opcode, Ty);
-  MachineInstrBuilder(getMF(), NewMI).addMBB(&BB);
-  return NewMI;
+MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
+  return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildCopy(unsigned Res, unsigned Op) {
+  return buildInstr(TargetOpcode::COPY).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
+                                                    const ConstantInt &Val) {
+  LLT Ty = MRI->getType(Res);
+
+  assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
+
+  const ConstantInt *NewVal = &Val;
+  if (Ty.getSizeInBits() != Val.getBitWidth())
+    NewVal = ConstantInt::get(MF->getFunction()->getContext(),
+                              Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
+
+  return buildInstr(TargetOpcode::G_CONSTANT).addDef(Res).addCImm(NewVal);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(unsigned Res,
+                                                    int64_t Val) {
+  auto IntN = IntegerType::get(MF->getFunction()->getContext(),
+                               MRI->getType(Res).getSizeInBits());
+  ConstantInt *CI = ConstantInt::get(IntN, Val, true);
+  return buildConstant(Res, *CI);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFConstant(unsigned Res,
+                                                     const ConstantFP &Val) {
+  assert(MRI->getType(Res).isScalar() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_FCONSTANT).addDef(Res).addFPImm(&Val);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+                                                  MachineBasicBlock &Dest) {
+  assert(MRI->getType(Tst).isScalar() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+                                                MachineMemOperand &MMO) {
+  assert(MRI->getType(Res).isValid() && "invalid operand type");
+  assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_LOAD)
+      .addDef(Res)
+      .addUse(Addr)
+      .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+                                                 MachineMemOperand &MMO) {
+  assert(MRI->getType(Val).isValid() && "invalid operand type");
+  assert(MRI->getType(Addr).isPointer() && "invalid operand type");
+
+  return buildInstr(TargetOpcode::G_STORE)
+      .addUse(Val)
+      .addUse(Addr)
+      .addMemOperand(&MMO);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUAdde(unsigned Res,
+                                                 unsigned CarryOut,
+                                                 unsigned Op0, unsigned Op1,
+                                                 unsigned CarryIn) {
+  assert(MRI->getType(Res).isScalar() && "invalid operand type");
+  assert(MRI->getType(Res) == MRI->getType(Op0) &&
+         MRI->getType(Res) == MRI->getType(Op1) && "type mismatch");
+  assert(MRI->getType(CarryOut).isScalar() && "invalid operand type");
+  assert(MRI->getType(CarryOut) == MRI->getType(CarryIn) && "type mismatch");
+
+  return buildInstr(TargetOpcode::G_UADDE)
+      .addDef(Res)
+      .addDef(CarryOut)
+      .addUse(Op0)
+      .addUse(Op1)
+      .addUse(CarryIn);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildAnyExt(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, true);
+  return buildInstr(TargetOpcode::G_ANYEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExt(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, true);
+  return buildInstr(TargetOpcode::G_SEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildZExt(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, true);
+  return buildInstr(TargetOpcode::G_ZEXT).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSExtOrTrunc(unsigned Res,
+                                                       unsigned Op) {
+  unsigned Opcode = TargetOpcode::COPY;
+  if (MRI->getType(Res).getSizeInBits() > MRI->getType(Op).getSizeInBits())
+    Opcode = TargetOpcode::G_SEXT;
+  else if (MRI->getType(Res).getSizeInBits() < MRI->getType(Op).getSizeInBits())
+    Opcode = TargetOpcode::G_TRUNC;
+
+  return buildInstr(Opcode).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildExtract(ArrayRef<unsigned> Results,
+                                                   ArrayRef<uint64_t> Indices,
+                                                   unsigned Src) {
+#ifndef NDEBUG
+  assert(Results.size() == Indices.size() && "inconsistent number of regs");
+  assert(!Results.empty() && "invalid trivial extract");
+  assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+         "extract offsets must be in ascending order");
+
+  assert(MRI->getType(Src).isValid() && "invalid operand type");
+  for (auto Res : Results)
+    assert(MRI->getType(Res).isValid() && "invalid operand type");
+#endif
+
+  auto MIB = BuildMI(getMF(), DL, getTII().get(TargetOpcode::G_EXTRACT));
+  for (auto Res : Results)
+    MIB.addDef(Res);
+
+  MIB.addUse(Src);
+
+  for (auto Idx : Indices)
+    MIB.addImm(Idx);
+
+  getMBB().insert(getInsertPt(), MIB);
+  if (InsertedInstr)
+    InsertedInstr(MIB);
+
+  return MIB;
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildSequence(unsigned Res,
+                                ArrayRef<unsigned> Ops,
+                                ArrayRef<uint64_t> Indices) {
+#ifndef NDEBUG
+  assert(Ops.size() == Indices.size() && "incompatible args");
+  assert(!Ops.empty() && "invalid trivial sequence");
+  assert(std::is_sorted(Indices.begin(), Indices.end()) &&
+         "sequence offsets must be in ascending order");
+
+  assert(MRI->getType(Res).isValid() && "invalid operand type");
+  for (auto Op : Ops)
+    assert(MRI->getType(Op).isValid() && "invalid operand type");
+#endif
+
+  MachineInstrBuilder MIB = buildInstr(TargetOpcode::G_SEQUENCE);
+  MIB.addDef(Res);
+  for (unsigned i = 0; i < Ops.size(); ++i) {
+    MIB.addUse(Ops[i]);
+    MIB.addImm(Indices[i]);
+  }
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+                                                     unsigned Res,
+                                                     bool HasSideEffects) {
+  auto MIB =
+      buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+                                : TargetOpcode::G_INTRINSIC);
+  if (Res)
+    MIB.addDef(Res);
+  MIB.addIntrinsicID(ID);
+  return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildTrunc(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, false);
+  return buildInstr(TargetOpcode::G_TRUNC).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFPTrunc(unsigned Res, unsigned Op) {
+  validateTruncExt(Res, Op, false);
+  return buildInstr(TargetOpcode::G_FPTRUNC).addDef(Res).addUse(Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
+                                                unsigned Res, unsigned Op0,
+                                                unsigned Op1) {
+#ifndef NDEBUG
+  assert(MRI->getType(Op0) == MRI->getType(Op0) && "type mismatch");
+  assert(CmpInst::isIntPredicate(Pred) && "invalid predicate");
+  if (MRI->getType(Op0).isScalar() || MRI->getType(Op0).isPointer())
+    assert(MRI->getType(Res).isScalar() && "type mismatch");
+  else
+    assert(MRI->getType(Res).isVector() &&
+           MRI->getType(Res).getNumElements() ==
+               MRI->getType(Op0).getNumElements() &&
+           "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_ICMP)
+      .addDef(Res)
+      .addPredicate(Pred)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
+                                                unsigned Res, unsigned Op0,
+                                                unsigned Op1) {
+#ifndef NDEBUG
+  assert((MRI->getType(Op0).isScalar() || MRI->getType(Op0).isVector()) &&
+         "invalid operand type");
+  assert(MRI->getType(Op0) == MRI->getType(Op1) && "type mismatch");
+  assert(CmpInst::isFPPredicate(Pred) && "invalid predicate");
+  if (MRI->getType(Op0).isScalar())
+    assert(MRI->getType(Res).isScalar() && "type mismatch");
+  else
+    assert(MRI->getType(Res).isVector() &&
+           MRI->getType(Res).getNumElements() ==
+               MRI->getType(Op0).getNumElements() &&
+           "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_FCMP)
+      .addDef(Res)
+      .addPredicate(Pred)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildSelect(unsigned Res, unsigned Tst,
+                                                  unsigned Op0, unsigned Op1) {
+#ifndef NDEBUG
+  LLT ResTy = MRI->getType(Res);
+  assert((ResTy.isScalar() || ResTy.isVector() || ResTy.isPointer()) &&
+         "invalid operand type");
+  assert(ResTy == MRI->getType(Op0) && ResTy == MRI->getType(Op1) &&
+         "type mismatch");
+  if (ResTy.isScalar() || ResTy.isPointer())
+    assert(MRI->getType(Tst).isScalar() && "type mismatch");
+  else
+    assert(MRI->getType(Tst).isVector() &&
+           MRI->getType(Tst).getNumElements() ==
+               MRI->getType(Op0).getNumElements() &&
+           "type mismatch");
+#endif
+
+  return buildInstr(TargetOpcode::G_SELECT)
+      .addDef(Res)
+      .addUse(Tst)
+      .addUse(Op0)
+      .addUse(Op1);
+}
+
+void MachineIRBuilder::validateTruncExt(unsigned Dst, unsigned Src,
+                                        bool IsExtend) {
+#ifndef NDEBUG
+  LLT SrcTy = MRI->getType(Src);
+  LLT DstTy = MRI->getType(Dst);
+
+  if (DstTy.isVector()) {
+    assert(SrcTy.isVector() && "mismatched cast between vecot and non-vector");
+    assert(SrcTy.getNumElements() == DstTy.getNumElements() &&
+           "different number of elements in a trunc/ext");
+  } else
+    assert(DstTy.isScalar() && SrcTy.isScalar() && "invalid extend/trunc");
+
+  if (IsExtend)
+    assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
+           "invalid narrowing extend");
+  else
+    assert(DstTy.getSizeInBits() < SrcTy.getSizeInBits() &&
+           "invalid widening trunc");
+#endif
 }
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 419e270c9127..04bb7ca5ba9e 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -12,10 +12,12 @@
 
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Function.h"
 #include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/CommandLine.h"
@@ -31,18 +33,18 @@ static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
     cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
                           "Run the Fast mode (default mapping)"),
                clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
-                          "Use the Greedy mode (best local mapping)"),
-               clEnumValEnd));
+                          "Use the Greedy mode (best local mapping)")));
 
 char RegBankSelect::ID = 0;
-INITIALIZE_PASS_BEGIN(RegBankSelect, "regbankselect",
+INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
                       "Assign register bank of generic virtual registers",
                       false, false);
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
-INITIALIZE_PASS_END(RegBankSelect, "regbankselect",
+INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
                     "Assign register bank of generic virtual registers", false,
-                    false);
+                    false)
 
 RegBankSelect::RegBankSelect(Mode RunningMode)
     : MachineFunctionPass(ID), RBI(nullptr), MRI(nullptr), TRI(nullptr),
@@ -60,6 +62,7 @@ void RegBankSelect::init(MachineFunction &MF) {
   assert(RBI && "Cannot work without RegisterBankInfo");
   MRI = &MF.getRegInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
+  TPC = &getAnalysis<TargetPassConfig>();
   if (OptMode != Mode::Fast) {
     MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
     MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
@@ -77,6 +80,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<MachineBlockFrequencyInfo>();
     AU.addRequired<MachineBranchProbabilityInfo>();
   }
+  AU.addRequired<TargetPassConfig>();
   MachineFunctionPass::getAnalysisUsage(AU);
 }
 
@@ -87,7 +91,7 @@ bool RegBankSelect::assignmentMatch(
   OnlyAssign = false;
   // Each part of a break down needs to end up in a different register.
   // In other word, Reg assignement does not match.
-  if (ValMapping.BreakDown.size() > 1)
+  if (ValMapping.NumBreakDowns > 1)
     return false;
 
   const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
@@ -103,11 +107,13 @@ bool RegBankSelect::assignmentMatch(
   return CurRegBank == DesiredRegBrank;
 }
 
-void RegBankSelect::repairReg(
+bool RegBankSelect::repairReg(
     MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
     RegBankSelect::RepairingPlacement &RepairPt,
     const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
-  assert(ValMapping.BreakDown.size() == 1 && "Not yet implemented");
+  if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled())
+    return false;
+  assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
   // An empty range of new register means no repairing.
   assert(NewVRegs.begin() != NewVRegs.end() && "We should not have to repair");
 
@@ -126,7 +132,7 @@ void RegBankSelect::repairReg(
          "We are about to create several defs for Dst");
 
   // Build the instruction used to repair, then clone it at the right places.
-  MachineInstr *MI = MIRBuilder.buildInstr(TargetOpcode::COPY, Dst, Src);
+  MachineInstr *MI = MIRBuilder.buildCopy(Dst, Src);
   MI->removeFromParent();
   DEBUG(dbgs() << "Copy: " << PrintReg(Src) << " to: " << PrintReg(Dst)
                << '\n');
@@ -149,15 +155,16 @@ void RegBankSelect::repairReg(
   }
   // TODO:
   // Legalize NewInstrs if need be.
+  return true;
 }
 
 uint64_t RegBankSelect::getRepairCost(
     const MachineOperand &MO,
     const RegisterBankInfo::ValueMapping &ValMapping) const {
   assert(MO.isReg() && "We should only repair register operand");
-  assert(!ValMapping.BreakDown.empty() && "Nothing to map??");
+  assert(ValMapping.NumBreakDowns && "Nothing to map??");
 
-  bool IsSameNumOfValues = ValMapping.BreakDown.size() == 1;
+  bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1;
   const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
   // If MO does not have a register bank, we should have just been
   // able to set one unless we have to break the value down.
@@ -195,16 +202,20 @@ uint64_t RegBankSelect::getRepairCost(
     // TODO: use a dedicated constant for ImpossibleCost.
     if (Cost != UINT_MAX)
       return Cost;
-    assert(false && "Legalization not available yet");
+    assert(!TPC->isGlobalISelAbortEnabled() &&
+           "Legalization not available yet");
     // Return the legalization cost of that repairing.
   }
-  assert(false && "Complex repairing not implemented yet");
-  return 1;
+  assert(!TPC->isGlobalISelAbortEnabled() &&
+         "Complex repairing not implemented yet");
+  return UINT_MAX;
 }
 
 RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
     MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
     SmallVectorImpl<RepairingPlacement> &RepairPts) {
+  assert(!PossibleMappings.empty() &&
+         "Do not know how to map this instruction");
 
   RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
   MappingCost Cost = MappingCost::ImpossibleCost();
@@ -219,7 +230,15 @@ RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
         RepairPts.emplace_back(std::move(RepairPt));
     }
   }
-  assert(BestMapping && "No suitable mapping for instruction");
+  if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) {
+    // If none of the mapping worked that means they are all impossible.
+    // Thus, pick the first one and set an impossible repairing point.
+    // It will trigger the failed isel mode.
+    BestMapping = &(*PossibleMappings.begin());
+    RepairPts.emplace_back(
+        RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible));
+  } else
+    assert(BestMapping && "No suitable mapping for instruction");
   return *BestMapping;
 }
 
@@ -250,7 +269,7 @@ void RegBankSelect::tryAvoidingSplit(
       // For the PHI case, the split may not be actually required.
       // In the copy case, a phi is already a copy on the incoming edge,
       // therefore there is no need to split.
-      if (ValMapping.BreakDown.size() == 1)
+      if (ValMapping.NumBreakDowns == 1)
         // This is a already a copy, there is nothing to do.
         RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
     }
@@ -327,7 +346,7 @@ void RegBankSelect::tryAvoidingSplit(
     // We will split all the edges and repair there.
   } else {
     // This is a virtual register defined by a terminator.
-    if (ValMapping.BreakDown.size() == 1) {
+    if (ValMapping.NumBreakDowns == 1) {
       // There is nothing to repair, but we may actually lie on
       // the repairing cost because of the PHIs already proceeded
       // as already stated.
@@ -348,6 +367,9 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
     const RegBankSelect::MappingCost *BestCost) {
   assert((MBFI || !BestCost) && "Costs comparison require MBFI");
 
+  if (!InstrMapping.isValid())
+    return MappingCost::ImpossibleCost();
+
   // If mapped with InstrMapping, MI will have the recorded cost.
   MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
   bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
@@ -362,8 +384,8 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
   // match this mapping. In other words, we may need to locally reassign the
   // register banks. Account for that repairing cost as well.
   // In this context, local means in the surrounding of MI.
-  for (unsigned OpIdx = 0, EndOpIdx = MI.getNumOperands(); OpIdx != EndOpIdx;
-       ++OpIdx) {
+  for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands();
+       OpIdx != EndOpIdx; ++OpIdx) {
     const MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg())
       continue;
@@ -466,7 +488,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
   return Cost;
 }
 
-void RegBankSelect::applyMapping(
+bool RegBankSelect::applyMapping(
     MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
     SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
   // OpdMapper will hold all the information needed for the rewritting.
@@ -474,28 +496,27 @@ void RegBankSelect::applyMapping(
 
   // First, place the repairing code.
   for (RepairingPlacement &RepairPt : RepairPts) {
-    assert(RepairPt.canMaterialize() &&
-           RepairPt.getKind() != RepairingPlacement::Impossible &&
-           "This mapping is impossible");
+    if (!RepairPt.canMaterialize() ||
+        RepairPt.getKind() == RepairingPlacement::Impossible)
+      return false;
     assert(RepairPt.getKind() != RepairingPlacement::None &&
            "This should not make its way in the list");
     unsigned OpIdx = RepairPt.getOpIdx();
     MachineOperand &MO = MI.getOperand(OpIdx);
     const RegisterBankInfo::ValueMapping &ValMapping =
         InstrMapping.getOperandMapping(OpIdx);
-    unsigned BreakDownSize = ValMapping.BreakDown.size();
-    (void)BreakDownSize;
     unsigned Reg = MO.getReg();
 
     switch (RepairPt.getKind()) {
     case RepairingPlacement::Reassign:
-      assert(BreakDownSize == 1 &&
+      assert(ValMapping.NumBreakDowns == 1 &&
              "Reassignment should only be for simple mapping");
       MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
       break;
     case RepairingPlacement::Insert:
       OpdMapper.createVRegs(OpIdx);
-      repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx));
+      if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)))
+        return false;
       break;
     default:
       llvm_unreachable("Other kind should not happen");
@@ -504,9 +525,10 @@ void RegBankSelect::applyMapping(
   // Second, rewrite the instruction.
   DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
   RBI->applyMapping(OpdMapper);
+  return true;
 }
 
-void RegBankSelect::assignInstr(MachineInstr &MI) {
+bool RegBankSelect::assignInstr(MachineInstr &MI) {
   DEBUG(dbgs() << "Assign: " << MI);
   // Remember the repairing placement for all the operands.
   SmallVector<RepairingPlacement, 4> RepairPts;
@@ -516,13 +538,13 @@ void RegBankSelect::assignInstr(MachineInstr &MI) {
     BestMapping = RBI->getInstrMapping(MI);
     MappingCost DefaultCost = computeMapping(MI, BestMapping, RepairPts);
     (void)DefaultCost;
-    assert(DefaultCost != MappingCost::ImpossibleCost() &&
-           "Default mapping is not suited");
+    if (DefaultCost == MappingCost::ImpossibleCost())
+      return false;
   } else {
     RegisterBankInfo::InstructionMappings PossibleMappings =
         RBI->getInstrPossibleMappings(MI);
-    assert(!PossibleMappings.empty() &&
-           "Do not know how to map this instruction");
+    if (PossibleMappings.empty())
+      return false;
     BestMapping = std::move(findBestMapping(MI, PossibleMappings, RepairPts));
   }
   // Make sure the mapping is valid for MI.
@@ -532,16 +554,47 @@ void RegBankSelect::assignInstr(MachineInstr &MI) {
 
   // After this call, MI may not be valid anymore.
   // Do not use it.
-  applyMapping(MI, BestMapping, RepairPts);
+  return applyMapping(MI, BestMapping, RepairPts);
 }
 
 bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
+  // If the ISel pipeline failed, do not bother running that pass.
+  if (MF.getProperties().hasProperty(
+          MachineFunctionProperties::Property::FailedISel))
+    return false;
+
   DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
   const Function *F = MF.getFunction();
   Mode SaveOptMode = OptMode;
   if (F->hasFnAttribute(Attribute::OptimizeNone))
     OptMode = Mode::Fast;
   init(MF);
+
+#ifndef NDEBUG
+  // Check that our input is fully legal: we require the function to have the
+  // Legalized property, so it should be.
+  // FIXME: This should be in the MachineVerifier, but it can't use the
+  // LegalizerInfo as it's currently in the separate GlobalISel library.
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  if (const LegalizerInfo *MLI = MF.getSubtarget().getLegalizerInfo()) {
+    for (const MachineBasicBlock &MBB : MF) {
+      for (const MachineInstr &MI : MBB) {
+        if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI)) {
+          if (!TPC->isGlobalISelAbortEnabled()) {
+            MF.getProperties().set(
+                MachineFunctionProperties::Property::FailedISel);
+            return false;
+          }
+          std::string ErrStorage;
+          raw_string_ostream Err(ErrStorage);
+          Err << "Instruction is not legal: " << MI << '\n';
+          report_fatal_error(Err.str());
+        }
+      }
+    }
+  }
+#endif
+
   // Walk the function and assign register banks to all operands.
   // Use a RPOT to make sure all registers are assigned before we choose
   // the best mapping of the current instruction.
@@ -554,7 +607,18 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
          MII != End;) {
       // MI might be invalidated by the assignment, so move the
       // iterator before hand.
-      assignInstr(*MII++);
+      MachineInstr &MI = *MII++;
+
+      // Ignore target-specific instructions: they should use proper regclasses.
+      if (isTargetSpecificOpcode(MI.getOpcode()))
+        continue;
+
+      if (!assignInstr(MI)) {
+        if (TPC->isGlobalISelAbortEnabled())
+          report_fatal_error("Unable to map instruction");
+        MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+        return false;
+      }
     }
   }
   OptMode = SaveOptMode;
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
index a911225b5af5..0ffc08188ead 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -72,7 +72,7 @@ bool RegisterBank::operator==(const RegisterBank &OtherRB) const {
   return &OtherRB == this;
 }
 
-void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
+LLVM_DUMP_METHOD void RegisterBank::dump(const TargetRegisterInfo *TRI) const {
   print(dbgs(), /* IsForDebug */ true, TRI);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index ef8e4f6d6851..a6c93bc0f3d7 100644
--- a/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -13,6 +13,7 @@
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -32,15 +33,40 @@
 
 using namespace llvm;
 
+STATISTIC(NumPartialMappingsCreated,
+          "Number of partial mappings dynamically created");
+STATISTIC(NumPartialMappingsAccessed,
+          "Number of partial mappings dynamically accessed");
+STATISTIC(NumValueMappingsCreated,
+          "Number of value mappings dynamically created");
+STATISTIC(NumValueMappingsAccessed,
+          "Number of value mappings dynamically accessed");
+STATISTIC(NumOperandsMappingsCreated,
+          "Number of operands mappings dynamically created");
+STATISTIC(NumOperandsMappingsAccessed,
+          "Number of operands mappings dynamically accessed");
+
 const unsigned RegisterBankInfo::DefaultMappingID = UINT_MAX;
 const unsigned RegisterBankInfo::InvalidMappingID = UINT_MAX - 1;
 
 //------------------------------------------------------------------------------
 // RegisterBankInfo implementation.
 //------------------------------------------------------------------------------
-RegisterBankInfo::RegisterBankInfo(unsigned NumRegBanks)
-    : NumRegBanks(NumRegBanks) {
-  RegBanks.reset(new RegisterBank[NumRegBanks]);
+RegisterBankInfo::RegisterBankInfo(RegisterBank **RegBanks,
+                                   unsigned NumRegBanks)
+    : RegBanks(RegBanks), NumRegBanks(NumRegBanks) {
+  DEBUG(for (unsigned Idx = 0, End = getNumRegBanks(); Idx != End; ++Idx) {
+    assert(RegBanks[Idx] != nullptr && "Invalid RegisterBank");
+    assert(!RegBanks[Idx]->isValid() &&
+           "RegisterBank should be invalid before initialization");
+  });
+}
+
+RegisterBankInfo::~RegisterBankInfo() {
+  for (auto It : MapOfPartialMappings)
+    delete It.second;
+  for (auto It : MapOfValueMappings)
+    delete It.second;
 }
 
 bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
@@ -65,8 +91,7 @@ void RegisterBankInfo::createRegisterBank(unsigned ID, const char *Name) {
 }
 
 void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId,
-                                          const TargetRegisterInfo &TRI,
-                                          bool AddTypeMapping) {
+                                          const TargetRegisterInfo &TRI) {
   RegisterBank &RB = getRegBank(ID);
   unsigned NbOfRegClasses = TRI.getNumRegClasses();
 
@@ -98,13 +123,6 @@ void RegisterBankInfo::addRegBankCoverage(unsigned ID, unsigned RCId,
     // Remember the biggest size in bits.
     MaxSize = std::max(MaxSize, CurRC.getSize() * 8);
 
-    // If we have been asked to record the type supported by this
-    // register bank, do it now.
-    if (AddTypeMapping)
-      for (MVT::SimpleValueType SVT :
-           make_range(CurRC.vt_begin(), CurRC.vt_end()))
-        recordRegBankForType(getRegBank(ID), SVT);
-
     // Walk through all sub register classes and push them into the worklist.
     bool First = true;
     for (BitMaskClassIterator It(CurRC.getSubClassMask(), TRI); It.isValid();
@@ -173,11 +191,9 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
 
   assert(Reg && "NoRegister does not have a register bank");
   const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
-  if (RegClassOrBank.is<const RegisterBank *>())
-    return RegClassOrBank.get<const RegisterBank *>();
-  const TargetRegisterClass *RC =
-      RegClassOrBank.get<const TargetRegisterClass *>();
-  if (RC)
+  if (auto *RB = RegClassOrBank.dyn_cast<const RegisterBank *>())
+    return RB;
+  if (auto *RC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>())
     return &getRegBankFromRegClass(*RC);
   return nullptr;
 }
@@ -199,10 +215,37 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
   return &RegBank;
 }
 
+const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
+    unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+
+  // If the register already has a class, fallback to MRI::constrainRegClass.
+  auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
+  if (RegClassOrBank.is<const TargetRegisterClass *>())
+    return MRI.constrainRegClass(Reg, &RC);
+
+  const RegisterBank *RB = RegClassOrBank.get<const RegisterBank *>();
+  // Otherwise, all we can do is ensure the bank covers the class, and set it.
+  if (RB && !RB->covers(RC))
+    return nullptr;
+
+  // If nothing was set or the class is simply compatible, set it.
+  MRI.setRegClass(Reg, &RC);
+  return &RC;
+}
+
 RegisterBankInfo::InstructionMapping
 RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
+  // For copies we want to walk over the operands and try to find one
+  // that has a register bank since the instruction itself will not get
+  // us any constraint.
+  bool isCopyLike = MI.isCopy() || MI.isPHI();
+  // For copy like instruction, only the mapping of the definition
+  // is important. The rest is not constrained.
+  unsigned NumOperandsForMapping = isCopyLike ? 1 : MI.getNumOperands();
+
   RegisterBankInfo::InstructionMapping Mapping(DefaultMappingID, /*Cost*/ 1,
-                                               MI.getNumOperands());
+                                               /*OperandsMapping*/ nullptr,
+                                               NumOperandsForMapping);
   const MachineFunction &MF = *MI.getParent()->getParent();
   const TargetSubtargetInfo &STI = MF.getSubtarget();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
@@ -213,14 +256,10 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
   // Before doing anything complicated check if the mapping is not
   // directly available.
   bool CompleteMapping = true;
-  // For copies we want to walk over the operands and try to find one
-  // that has a register bank.
-  bool isCopyLike = MI.isCopy() || MI.isPHI();
-  // Remember the register bank for reuse for copy-like instructions.
-  const RegisterBank *RegBank = nullptr;
-  // Remember the size of the register for reuse for copy-like instructions.
-  unsigned RegSize = 0;
-  for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
+
+  SmallVector<const ValueMapping *, 8> OperandsMapping(NumOperandsForMapping);
+  for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
+       ++OpIdx) {
     const MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg())
       continue;
@@ -242,71 +281,147 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
       // the register bank from the encoding constraints.
       CurRegBank = getRegBankFromConstraints(MI, OpIdx, TII, TRI);
       if (!CurRegBank) {
-        // Check if we can deduce the register bank from the type of
-        // the instruction.
-        Type *MITy = MI.getType();
-        if (MITy)
-          CurRegBank = getRegBankForType(
-              MVT::getVT(MITy, /*HandleUnknown*/ true).SimpleTy);
-        if (!CurRegBank)
-          // Use the current assigned register bank.
-          // That may not make much sense though.
-          CurRegBank = AltRegBank;
-        if (!CurRegBank) {
-          // All our attempts failed, give up.
-          CompleteMapping = false;
-
-          if (!isCopyLike)
-            // MI does not carry enough information to guess the mapping.
-            return InstructionMapping();
-
-          // For copies, we want to keep interating to find a register
-          // bank for the other operands if we did not find one yet.
-          if (RegBank)
-            break;
-          continue;
-        }
+        // All our attempts failed, give up.
+        CompleteMapping = false;
+
+        if (!isCopyLike)
+          // MI does not carry enough information to guess the mapping.
+          return InstructionMapping();
+        continue;
       }
     }
-    RegBank = CurRegBank;
-    RegSize = getSizeInBits(Reg, MRI, TRI);
-    Mapping.setOperandMapping(OpIdx, RegSize, *CurRegBank);
+    const ValueMapping *ValMapping =
+        &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
+    if (isCopyLike) {
+      OperandsMapping[0] = ValMapping;
+      CompleteMapping = true;
+      break;
+    }
+    OperandsMapping[OpIdx] = ValMapping;
   }
 
-  if (CompleteMapping)
-    return Mapping;
-
-  assert(isCopyLike && "We should have bailed on non-copies at this point");
-  // For copy like instruction, if none of the operand has a register
-  // bank avialable, there is nothing we can propagate.
-  if (!RegBank)
+  if (isCopyLike && !CompleteMapping)
+    // No way to deduce the type from what we have.
     return InstructionMapping();
 
-  // This is a copy-like instruction.
-  // Propagate RegBank to all operands that do not have a
-  // mapping yet.
-  for (unsigned OpIdx = 0, End = MI.getNumOperands(); OpIdx != End; ++OpIdx) {
-    const MachineOperand &MO = MI.getOperand(OpIdx);
-    // Don't assign a mapping for non-reg operands.
-    if (!MO.isReg())
-      continue;
+  assert(CompleteMapping && "Setting an uncomplete mapping");
+  Mapping.setOperandsMapping(getOperandsMapping(OperandsMapping));
+  return Mapping;
+}
 
-    // If a mapping already exists, do not touch it.
-    if (!static_cast<const InstructionMapping *>(&Mapping)
-             ->getOperandMapping(OpIdx)
-             .BreakDown.empty())
-      continue;
+/// Hashing function for PartialMapping.
+static hash_code hashPartialMapping(unsigned StartIdx, unsigned Length,
+                                    const RegisterBank *RegBank) {
+  return hash_combine(StartIdx, Length, RegBank ? RegBank->getID() : 0);
+}
 
-    Mapping.setOperandMapping(OpIdx, RegSize, *RegBank);
+/// Overloaded version of hash_value for a PartialMapping.
+hash_code
+llvm::hash_value(const RegisterBankInfo::PartialMapping &PartMapping) {
+  return hashPartialMapping(PartMapping.StartIdx, PartMapping.Length,
+                            PartMapping.RegBank);
+}
+
+const RegisterBankInfo::PartialMapping &
+RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
+                                    const RegisterBank &RegBank) const {
+  ++NumPartialMappingsAccessed;
+
+  hash_code Hash = hashPartialMapping(StartIdx, Length, &RegBank);
+  const auto &It = MapOfPartialMappings.find(Hash);
+  if (It != MapOfPartialMappings.end())
+    return *It->second;
+
+  ++NumPartialMappingsCreated;
+
+  const PartialMapping *&PartMapping = MapOfPartialMappings[Hash];
+  PartMapping = new PartialMapping{StartIdx, Length, RegBank};
+  return *PartMapping;
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(unsigned StartIdx, unsigned Length,
+                                  const RegisterBank &RegBank) const {
+  return getValueMapping(&getPartialMapping(StartIdx, Length, RegBank), 1);
+}
+
+static hash_code
+hashValueMapping(const RegisterBankInfo::PartialMapping *BreakDown,
+                 unsigned NumBreakDowns) {
+  if (LLVM_LIKELY(NumBreakDowns == 1))
+    return hash_value(*BreakDown);
+  SmallVector<size_t, 8> Hashes(NumBreakDowns);
+  for (unsigned Idx = 0; Idx != NumBreakDowns; ++Idx)
+    Hashes.push_back(hash_value(BreakDown[Idx]));
+  return hash_combine_range(Hashes.begin(), Hashes.end());
+}
+
+const RegisterBankInfo::ValueMapping &
+RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
+                                  unsigned NumBreakDowns) const {
+  ++NumValueMappingsAccessed;
+
+  hash_code Hash = hashValueMapping(BreakDown, NumBreakDowns);
+  const auto &It = MapOfValueMappings.find(Hash);
+  if (It != MapOfValueMappings.end())
+    return *It->second;
+
+  ++NumValueMappingsCreated;
+
+  const ValueMapping *&ValMapping = MapOfValueMappings[Hash];
+  ValMapping = new ValueMapping{BreakDown, NumBreakDowns};
+  return *ValMapping;
+}
+
+template <typename Iterator>
+const RegisterBankInfo::ValueMapping *
+RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
+
+  ++NumOperandsMappingsAccessed;
+
+  // The addresses of the value mapping are unique.
+  // Therefore, we can use them directly to hash the operand mapping.
+  hash_code Hash = hash_combine_range(Begin, End);
+  const auto &It = MapOfOperandsMappings.find(Hash);
+  if (It != MapOfOperandsMappings.end())
+    return It->second;
+
+  ++NumOperandsMappingsCreated;
+
+  // Create the array of ValueMapping.
+  // Note: this array will not hash to this instance of operands
+  // mapping, because we use the pointer of the ValueMapping
+  // to hash and we expect them to uniquely identify an instance
+  // of value mapping.
+  ValueMapping *&Res = MapOfOperandsMappings[Hash];
+  Res = new ValueMapping[std::distance(Begin, End)];
+  unsigned Idx = 0;
+  for (Iterator It = Begin; It != End; ++It, ++Idx) {
+    const ValueMapping *ValMap = *It;
+    if (!ValMap)
+      continue;
+    Res[Idx] = *ValMap;
   }
-  return Mapping;
+  return Res;
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+    const SmallVectorImpl<const RegisterBankInfo::ValueMapping *> &OpdsMapping)
+    const {
+  return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
+}
+
+const RegisterBankInfo::ValueMapping *RegisterBankInfo::getOperandsMapping(
+    std::initializer_list<const RegisterBankInfo::ValueMapping *> OpdsMapping)
+    const {
+  return getOperandsMapping(OpdsMapping.begin(), OpdsMapping.end());
 }
 
 RegisterBankInfo::InstructionMapping
 RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
-    RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
-    if (Mapping.isValid())
-      return Mapping;
+  RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+  if (Mapping.isValid())
+    return Mapping;
   llvm_unreachable("The target must implement this");
 }
 
@@ -335,18 +450,18 @@ RegisterBankInfo::getInstrAlternativeMappings(const MachineInstr &MI) const {
 void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
   MachineInstr &MI = OpdMapper.getMI();
   DEBUG(dbgs() << "Applying default-like mapping\n");
-  for (unsigned OpIdx = 0, EndIdx = MI.getNumOperands(); OpIdx != EndIdx;
-       ++OpIdx) {
+  for (unsigned OpIdx = 0,
+                EndIdx = OpdMapper.getInstrMapping().getNumOperands();
+       OpIdx != EndIdx; ++OpIdx) {
     DEBUG(dbgs() << "OpIdx " << OpIdx);
     MachineOperand &MO = MI.getOperand(OpIdx);
     if (!MO.isReg()) {
       DEBUG(dbgs() << " is not a register, nothing to be done\n");
       continue;
     }
-    assert(
-        OpdMapper.getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() ==
-            1 &&
-        "This mapping is too complex for this function");
+    assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
+               1 &&
+           "This mapping is too complex for this function");
     iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
         OpdMapper.getVRegs(OpIdx);
     if (NewRegs.begin() == NewRegs.end()) {
@@ -369,7 +484,8 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
     // get the size of that register class.
     RC = TRI.getMinimalPhysRegClass(Reg);
   } else {
-    unsigned RegSize = MRI.getSize(Reg);
+    LLT Ty = MRI.getType(Reg);
+    unsigned RegSize = Ty.isValid() ? Ty.getSizeInBits() : 0;
     // If Reg is not a generic register, query the register class to
     // get its size.
     if (RegSize)
@@ -384,7 +500,7 @@ unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
 //------------------------------------------------------------------------------
 // Helper classes implementation.
 //------------------------------------------------------------------------------
-void RegisterBankInfo::PartialMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::PartialMapping::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
@@ -392,7 +508,7 @@ void RegisterBankInfo::PartialMapping::dump() const {
 bool RegisterBankInfo::PartialMapping::verify() const {
   assert(RegBank && "Register bank not set");
   assert(Length && "Empty mapping");
-  assert((StartIdx < getHighBitIdx()) && "Overflow, switch to APInt?");
+  assert((StartIdx <= getHighBitIdx()) && "Overflow, switch to APInt?");
   // Check if the minimum width fits into RegBank.
   assert(RegBank->getSize() >= Length && "Register bank too small for Mask");
   return true;
@@ -406,10 +522,10 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
     OS << "nullptr";
 }
 
-bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
-  assert(!BreakDown.empty() && "Value mapped nowhere?!");
+bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
+  assert(NumBreakDowns && "Value mapped nowhere?!");
   unsigned OrigValueBitWidth = 0;
-  for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+  for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
     // Check that each register bank is big enough to hold the partial value:
     // this check is done by PartialMapping::verify
     assert(PartMap.verify() && "Partial mapping is invalid");
@@ -418,9 +534,10 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
     OrigValueBitWidth =
         std::max(OrigValueBitWidth, PartMap.getHighBitIdx() + 1);
   }
-  assert(OrigValueBitWidth == ExpectedBitWidth && "BitWidth does not match");
+  assert(OrigValueBitWidth >= MeaningfulBitWidth &&
+         "Meaningful bits not covered by the mapping");
   APInt ValueMask(OrigValueBitWidth, 0);
-  for (const RegisterBankInfo::PartialMapping &PartMap : BreakDown) {
+  for (const RegisterBankInfo::PartialMapping &PartMap : *this) {
     // Check that the union of the partial mappings covers the whole value,
     // without overlaps.
     // The high bit is exclusive in the APInt API, thus getHighBitIdx + 1.
@@ -434,15 +551,15 @@ bool RegisterBankInfo::ValueMapping::verify(unsigned ExpectedBitWidth) const {
   return true;
 }
 
-void RegisterBankInfo::ValueMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::ValueMapping::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
 
 void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
-  OS << "#BreakDown: " << BreakDown.size() << " ";
+  OS << "#BreakDown: " << NumBreakDowns << " ";
   bool IsFirst = true;
-  for (const PartialMapping &PartMap : BreakDown) {
+  for (const PartialMapping &PartMap : *this) {
     if (!IsFirst)
       OS << ", ";
     OS << '[' << PartMap << ']';
@@ -450,21 +567,13 @@ void RegisterBankInfo::ValueMapping::print(raw_ostream &OS) const {
   }
 }
 
-void RegisterBankInfo::InstructionMapping::setOperandMapping(
-    unsigned OpIdx, unsigned MaskSize, const RegisterBank &RegBank) {
-  // Build the value mapping.
-  assert(MaskSize <= RegBank.getSize() && "Register bank is too small");
-
-  // Create the mapping object.
-  getOperandMapping(OpIdx).BreakDown.push_back(
-      PartialMapping(0, MaskSize, RegBank));
-}
-
 bool RegisterBankInfo::InstructionMapping::verify(
     const MachineInstr &MI) const {
   // Check that all the register operands are properly mapped.
   // Check the constructor invariant.
-  assert(NumOperands == MI.getNumOperands() &&
+  // For PHI, we only care about mapping the definition.
+  assert(NumOperands ==
+             ((MI.isCopy() || MI.isPHI()) ? 1 : MI.getNumOperands()) &&
          "NumOperands must match, see constructor");
   assert(MI.getParent() && MI.getParent()->getParent() &&
          "MI must be connected to a MachineFunction");
@@ -473,16 +582,18 @@ bool RegisterBankInfo::InstructionMapping::verify(
 
   for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
     const MachineOperand &MO = MI.getOperand(Idx);
-    const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
-    (void)MOMapping;
     if (!MO.isReg()) {
-      assert(MOMapping.BreakDown.empty() &&
+      assert(!getOperandMapping(Idx).isValid() &&
              "We should not care about non-reg mapping");
       continue;
     }
     unsigned Reg = MO.getReg();
     if (!Reg)
       continue;
+    assert(getOperandMapping(Idx).isValid() &&
+           "We must have a mapping for reg operands");
+    const RegisterBankInfo::ValueMapping &MOMapping = getOperandMapping(Idx);
+    (void)MOMapping;
     // Register size in bits.
     // This size must match what the mapping expects.
     assert(MOMapping.verify(getSizeInBits(
@@ -492,7 +603,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
   return true;
 }
 
-void RegisterBankInfo::InstructionMapping::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::InstructionMapping::dump() const {
   print(dbgs());
   dbgs() << '\n';
 }
@@ -514,18 +625,16 @@ RegisterBankInfo::OperandsMapper::OperandsMapper(
     MachineInstr &MI, const InstructionMapping &InstrMapping,
     MachineRegisterInfo &MRI)
     : MRI(MRI), MI(MI), InstrMapping(InstrMapping) {
-  unsigned NumOpds = MI.getNumOperands();
-  OpToNewVRegIdx.reset(new int[NumOpds]);
-  std::fill(&OpToNewVRegIdx[0], &OpToNewVRegIdx[NumOpds],
-            OperandsMapper::DontKnowIdx);
+  unsigned NumOpds = InstrMapping.getNumOperands();
+  OpToNewVRegIdx.resize(NumOpds, OperandsMapper::DontKnowIdx);
   assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
 }
 
 iterator_range<SmallVectorImpl<unsigned>::iterator>
 RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   unsigned NumPartialVal =
-      getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+      getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
   int StartIdx = OpToNewVRegIdx[OpIdx];
 
   if (StartIdx == OperandsMapper::DontKnowIdx) {
@@ -559,16 +668,15 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
 }
 
 void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
       getVRegsMem(OpIdx);
-  const SmallVectorImpl<PartialMapping> &PartMapList =
-      getInstrMapping().getOperandMapping(OpIdx).BreakDown;
-  SmallVectorImpl<PartialMapping>::const_iterator PartMap = PartMapList.begin();
+  const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
+  const PartialMapping *PartMap = ValMapping.begin();
   for (unsigned &NewVReg : NewVRegsForOpIdx) {
-    assert(PartMap != PartMapList.end() && "Out-of-bound access");
+    assert(PartMap != ValMapping.end() && "Out-of-bound access");
     assert(NewVReg == 0 && "Register has already been created");
-    NewVReg = MRI.createGenericVirtualRegister(PartMap->Length);
+    NewVReg = MRI.createGenericVirtualRegister(LLT::scalar(PartMap->Length));
     MRI.setRegBank(NewVReg, *PartMap->RegBank);
     ++PartMap;
   }
@@ -577,8 +685,8 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
 void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
                                                 unsigned PartialMapIdx,
                                                 unsigned NewVReg) {
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
-  assert(getInstrMapping().getOperandMapping(OpIdx).BreakDown.size() >
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
+  assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
              PartialMapIdx &&
          "Out-of-bound access for partial mapping");
   // Make sure the memory is initialized for that operand.
@@ -592,14 +700,14 @@ iterator_range<SmallVectorImpl<unsigned>::const_iterator>
 RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
                                            bool ForDebug) const {
   (void)ForDebug;
-  assert(OpIdx < getMI().getNumOperands() && "Out-of-bound access");
+  assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
   int StartIdx = OpToNewVRegIdx[OpIdx];
 
   if (StartIdx == OperandsMapper::DontKnowIdx)
     return make_range(NewVRegs.end(), NewVRegs.end());
 
   unsigned PartMapSize =
-      getInstrMapping().getOperandMapping(OpIdx).BreakDown.size();
+      getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
   SmallVectorImpl<unsigned>::const_iterator End =
       getNewVRegsEnd(StartIdx, PartMapSize);
   iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
@@ -611,14 +719,14 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
   return Res;
 }
 
-void RegisterBankInfo::OperandsMapper::dump() const {
+LLVM_DUMP_METHOD void RegisterBankInfo::OperandsMapper::dump() const {
   print(dbgs(), true);
   dbgs() << '\n';
 }
 
 void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
                                              bool ForDebug) const {
-  unsigned NumOpds = getMI().getNumOperands();
+  unsigned NumOpds = getInstrMapping().getNumOperands();
   if (ForDebug) {
     OS << "Mapping for " << getMI() << "\nwith " << getInstrMapping() << '\n';
     // Print out the internal state of the index table.
diff --git a/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
new file mode 100644
index 000000000000..e50091833c26
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -0,0 +1,45 @@
+//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file This file implements the utility functions used by the GlobalISel
+/// pipeline.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+#define DEBUG_TYPE "globalisel-utils"
+
+using namespace llvm;
+
+unsigned llvm::constrainOperandRegClass(
+    const MachineFunction &MF, const TargetRegisterInfo &TRI,
+    MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+    const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
+    unsigned Reg, unsigned OpIdx) {
+  // Assume physical registers are properly constrained.
+  assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+         "PhysReg not implemented");
+
+  const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
+
+  if (!RBI.constrainGenericRegister(Reg, *RegClass, MRI)) {
+    unsigned NewReg = MRI.createVirtualRegister(RegClass);
+    BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
+            TII.get(TargetOpcode::COPY), NewReg)
+        .addReg(Reg);
+    return NewReg;
+  }
+
+  return Reg;
+}
diff --git a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
index 8c760b724d13..1ea534939948 100644
--- a/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
+++ b/contrib/llvm/lib/CodeGen/GlobalMerge.cpp
@@ -182,9 +182,7 @@ namespace {
     bool runOnFunction(Function &F) override;
     bool doFinalization(Module &M) override;
 
-    const char *getPassName() const override {
-      return "Merge internal globals";
-    }
+    StringRef getPassName() const override { return "Merge internal globals"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
@@ -434,6 +432,8 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
     std::vector<Type*> Tys;
     std::vector<Constant*> Inits;
 
+    bool HasExternal = false;
+    StringRef FirstExternalName;
     for (j = i; j != -1; j = GlobalSet.find_next(j)) {
       Type *Ty = Globals[j]->getValueType();
       MergedSize += DL.getTypeAllocSize(Ty);
@@ -442,19 +442,46 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
       }
       Tys.push_back(Ty);
       Inits.push_back(Globals[j]->getInitializer());
+
+      if (Globals[j]->hasExternalLinkage() && !HasExternal) {
+        HasExternal = true;
+        FirstExternalName = Globals[j]->getName();
+      }
     }
 
+    // If merged variables doesn't have external linkage, we needn't to expose
+    // the symbol after merging.
+    GlobalValue::LinkageTypes Linkage = HasExternal
+                                            ? GlobalValue::ExternalLinkage
+                                            : GlobalValue::InternalLinkage;
     StructType *MergedTy = StructType::get(M.getContext(), Tys);
     Constant *MergedInit = ConstantStruct::get(MergedTy, Inits);
 
-    GlobalVariable *MergedGV = new GlobalVariable(
-        M, MergedTy, isConst, GlobalValue::PrivateLinkage, MergedInit,
-        "_MergedGlobals", nullptr, GlobalVariable::NotThreadLocal, AddrSpace);
+    // On Darwin external linkage needs to be preserved, otherwise
+    // dsymutil cannot preserve the debug info for the merged
+    // variables.  If they have external linkage, use the symbol name
+    // of the first variable merged as the suffix of global symbol
+    // name.  This avoids a link-time naming conflict for the
+    // _MergedGlobals symbols.
+    Twine MergedName =
+        (IsMachO && HasExternal)
+            ? "_MergedGlobals_" + FirstExternalName
+            : "_MergedGlobals";
+    auto MergedLinkage = IsMachO ? Linkage : GlobalValue::PrivateLinkage;
+    auto *MergedGV = new GlobalVariable(
+        M, MergedTy, isConst, MergedLinkage, MergedInit, MergedName, nullptr,
+        GlobalVariable::NotThreadLocal, AddrSpace);
+
+    const StructLayout *MergedLayout = DL.getStructLayout(MergedTy);
 
     for (ssize_t k = i, idx = 0; k != j; k = GlobalSet.find_next(k), ++idx) {
       GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage();
       std::string Name = Globals[k]->getName();
 
+      // Copy metadata while adjusting any debug info metadata by the original
+      // global's offset within the merged global.
+      MergedGV->copyMetadata(Globals[k], MergedLayout->getElementOffset(idx));
+
       Constant *Idx[2] = {
         ConstantInt::get(Int32Ty, 0),
         ConstantInt::get(Int32Ty, idx),
@@ -498,22 +525,18 @@ void GlobalMerge::collectUsedGlobalVariables(Module &M) {
 void GlobalMerge::setMustKeepGlobalVariables(Module &M) {
   collectUsedGlobalVariables(M);
 
-  for (Module::iterator IFn = M.begin(), IEndFn = M.end(); IFn != IEndFn;
-       ++IFn) {
-    for (Function::iterator IBB = IFn->begin(), IEndBB = IFn->end();
-         IBB != IEndBB; ++IBB) {
-      // Follow the invoke link to find the landing pad instruction
-      const InvokeInst *II = dyn_cast<InvokeInst>(IBB->getTerminator());
-      if (!II) continue;
-
-      const LandingPadInst *LPInst = II->getUnwindDest()->getLandingPadInst();
-      // Look for globals in the clauses of the landing pad instruction
-      for (unsigned Idx = 0, NumClauses = LPInst->getNumClauses();
-           Idx != NumClauses; ++Idx)
+  for (Function &F : M) {
+    for (BasicBlock &BB : F) {
+      Instruction *Pad = BB.getFirstNonPHI();
+      if (!Pad->isEHPad())
+        continue;
+
+      // Keep globals used by landingpads and catchpads.
+      for (const Use &U : Pad->operands()) {
         if (const GlobalVariable *GV =
-            dyn_cast<GlobalVariable>(LPInst->getClause(Idx)
-                                     ->stripPointerCasts()))
+                dyn_cast<GlobalVariable>(U->stripPointerCasts()))
           MustKeepGlobalVariables.insert(GV);
+      }
     }
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp
index d225162860c2..0cac7b71e241 100644
--- a/contrib/llvm/lib/CodeGen/IfConversion.cpp
+++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp
@@ -15,6 +15,7 @@
 #include "llvm/CodeGen/Passes.h"
 #include "BranchFolding.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
@@ -58,6 +59,8 @@ static cl::opt<bool> DisableTriangleFR("disable-ifcvt-triangle-false-rev",
                                        cl::init(false), cl::Hidden);
 static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
                                     cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
+                                        cl::init(false), cl::Hidden);
 static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
                                      cl::init(true), cl::Hidden);
 
@@ -68,6 +71,7 @@ STATISTIC(NumTriangleRev,  "Number of triangle (R) if-conversions performed");
 STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
 STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
 STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
+STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
 STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
 STATISTIC(NumDupBBs,       "Number of duplicated blocks");
 STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated");
@@ -82,10 +86,12 @@ namespace {
       ICTriangleRev,   // Same as ICTriangle, but true path rev condition.
       ICTriangleFalse, // Same as ICTriangle, but on the false path.
       ICTriangle,      // BB is entry of a triangle sub-CFG.
-      ICDiamond        // BB is entry of a diamond sub-CFG.
+      ICDiamond,       // BB is entry of a diamond sub-CFG.
+      ICForkedDiamond  // BB is entry of an almost diamond sub-CFG, with a
+                       // common tail that can be shared.
     };
 
-    /// BBInfo - One per MachineBasicBlock, this is used to cache the result
+    /// One per MachineBasicBlock, this is used to cache the result
     /// if-conversion feasibility analysis. This includes results from
     /// TargetInstrInfo::analyzeBranch() (i.e. TBB, FBB, and Cond), and its
     /// classification, and common tail block of its successors (if it's a
@@ -114,6 +120,7 @@ namespace {
       bool IsAnalyzed      : 1;
       bool IsEnqueued      : 1;
       bool IsBrAnalyzable  : 1;
+      bool IsBrReversible  : 1;
       bool HasFallThrough  : 1;
       bool IsUnpredicable  : 1;
       bool CannotBeCopied  : 1;
@@ -128,13 +135,14 @@ namespace {
       SmallVector<MachineOperand, 4> Predicate;
       BBInfo() : IsDone(false), IsBeingAnalyzed(false),
                  IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
-                 HasFallThrough(false), IsUnpredicable(false),
-                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
-                 ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
+                 IsBrReversible(false), HasFallThrough(false),
+                 IsUnpredicable(false), CannotBeCopied(false),
+                 ClobbersPred(false), NonPredSize(0), ExtraCost(0),
+                 ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
                  FalseBB(nullptr) {}
     };
 
-    /// IfcvtToken - Record information about pending if-conversions to attempt:
+    /// Record information about pending if-conversions to attempt:
     /// BBI             - Corresponding BBInfo.
     /// Kind            - Type of block. See IfcvtKind.
     /// NeedSubsumption - True if the to-be-predicated BB has already been
@@ -148,15 +156,19 @@ namespace {
     struct IfcvtToken {
       BBInfo &BBI;
       IfcvtKind Kind;
-      bool NeedSubsumption;
       unsigned NumDups;
       unsigned NumDups2;
-      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
-        : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+      bool NeedSubsumption : 1;
+      bool TClobbersPred : 1;
+      bool FClobbersPred : 1;
+      IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
+                 bool tc = false, bool fc = false)
+        : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
+          TClobbersPred(tc), FClobbersPred(fc) {}
     };
 
-    /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
-    /// basic block number.
+    /// Results of if-conversion feasibility analysis indexed by basic block
+    /// number.
     std::vector<BBInfo> BBAnalysis;
     TargetSchedModel SchedModel;
 
@@ -172,11 +184,11 @@ namespace {
     bool PreRegAlloc;
     bool MadeChange;
     int FnNum;
-    std::function<bool(const Function &)> PredicateFtor;
+    std::function<bool(const MachineFunction &)> PredicateFtor;
 
   public:
     static char ID;
-    IfConverter(std::function<bool(const Function &)> Ftor = nullptr)
+    IfConverter(std::function<bool(const MachineFunction &)> Ftor = nullptr)
         : MachineFunctionPass(ID), FnNum(-1), PredicateFtor(std::move(Ftor)) {
       initializeIfConverterPass(*PassRegistry::getPassRegistry());
     }
@@ -191,31 +203,58 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
-    bool ReverseBranchCondition(BBInfo &BBI);
+    bool reverseBranchCondition(BBInfo &BBI) const;
     bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
                      BranchProbability Prediction) const;
     bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                        bool FalseBranch, unsigned &Dups,
                        BranchProbability Prediction) const;
+    bool CountDuplicatedInstructions(
+        MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+        MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+        unsigned &Dups1, unsigned &Dups2,
+        MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+        bool SkipUnconditionalBranches) const;
     bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                      unsigned &Dups1, unsigned &Dups2) const;
-    void ScanInstructions(BBInfo &BBI);
-    void AnalyzeBlock(MachineBasicBlock *MBB,
+                      unsigned &Dups1, unsigned &Dups2,
+                      BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+    bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+                            unsigned &Dups1, unsigned &Dups2,
+                            BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+    void AnalyzeBranches(BBInfo &BBI);
+    void ScanInstructions(BBInfo &BBI,
+                          MachineBasicBlock::iterator &Begin,
+                          MachineBasicBlock::iterator &End,
+                          bool BranchUnpredicable = false) const;
+    bool RescanInstructions(
+        MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+        MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+        BBInfo &TrueBBI, BBInfo &FalseBBI) const;
+    void AnalyzeBlock(MachineBasicBlock &MBB,
                       std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
     bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
-                             bool isTriangle = false, bool RevBranch = false);
+                             bool isTriangle = false, bool RevBranch = false,
+                             bool hasCommonTail = false);
     void AnalyzeBlocks(MachineFunction &MF,
                        std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
-    void InvalidatePreds(MachineBasicBlock *BB);
+    void InvalidatePreds(MachineBasicBlock &MBB);
     void RemoveExtraEdges(BBInfo &BBI);
     bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
     bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+    bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+                                unsigned NumDups1, unsigned NumDups2,
+                                bool TClobbersPred, bool FClobbersPred,
+                                bool RemoveBranch, bool MergeAddEdges);
     bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
-                          unsigned NumDups1, unsigned NumDups2);
+                          unsigned NumDups1, unsigned NumDups2,
+                          bool TClobbers, bool FClobbers);
+    bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
+                              unsigned NumDups1, unsigned NumDups2,
+                              bool TClobbers, bool FClobbers);
     void PredicateBlock(BBInfo &BBI,
                         MachineBasicBlock::iterator E,
                         SmallVectorImpl<MachineOperand> &Cond,
@@ -242,12 +281,12 @@ namespace {
                                  Prediction);
     }
 
-    // blockAlwaysFallThrough - Block ends without a terminator.
+    /// Returns true if Block ends without a terminator.
     bool blockAlwaysFallThrough(BBInfo &BBI) const {
       return BBI.IsBrAnalyzable && BBI.TrueBB == nullptr;
     }
 
-    // IfcvtTokenCmp - Used to sort if-conversion candidates.
+    /// Used to sort if-conversion candidates.
     static bool IfcvtTokenCmp(const std::unique_ptr<IfcvtToken> &C1,
                               const std::unique_ptr<IfcvtToken> &C2) {
       int Incr1 = (C1->Kind == ICDiamond)
@@ -282,8 +321,7 @@ INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
 
 bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(*MF.getFunction()) ||
-      (PredicateFtor && !PredicateFtor(*MF.getFunction())))
+  if (skipFunction(*MF.getFunction()) || (PredicateFtor && !PredicateFtor(MF)))
     return false;
 
   const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -402,11 +440,26 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
         DEBUG(dbgs() << "Ifcvt (Diamond): BB#" << BBI.BB->getNumber() << " (T:"
                      << BBI.TrueBB->getNumber() << ",F:"
                      << BBI.FalseBB->getNumber() << ") ");
-        RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2);
+        RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2,
+                                  Token->TClobbersPred,
+                                  Token->FClobbersPred);
         DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
         if (RetVal) ++NumDiamonds;
         break;
       }
+      case ICForkedDiamond: {
+        if (DisableForkedDiamond) break;
+        DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
+                     << BBI.BB->getNumber() << " (T:"
+                     << BBI.TrueBB->getNumber() << ",F:"
+                     << BBI.FalseBB->getNumber() << ") ");
+        RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
+                                      Token->TClobbersPred,
+                                      Token->FClobbersPred);
+        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+        if (RetVal) ++NumForkedDiamonds;
+        break;
+      }
       }
 
       Change |= RetVal;
@@ -435,46 +488,42 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
   return MadeChange;
 }
 
-/// findFalseBlock - BB has a fallthrough. Find its 'false' successor given
-/// its 'true' successor.
+/// BB has a fallthrough. Find its 'false' successor given its 'true' successor.
 static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
                                          MachineBasicBlock *TrueBB) {
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-         E = BB->succ_end(); SI != E; ++SI) {
-    MachineBasicBlock *SuccBB = *SI;
+  for (MachineBasicBlock *SuccBB : BB->successors()) {
     if (SuccBB != TrueBB)
       return SuccBB;
   }
   return nullptr;
 }
 
-/// ReverseBranchCondition - Reverse the condition of the end of the block
-/// branch. Swap block's 'true' and 'false' successors.
-bool IfConverter::ReverseBranchCondition(BBInfo &BBI) {
+/// Reverse the condition of the end of the block branch. Swap block's 'true'
+/// and 'false' successors.
+bool IfConverter::reverseBranchCondition(BBInfo &BBI) const {
   DebugLoc dl;  // FIXME: this is nowhere
-  if (!TII->ReverseBranchCondition(BBI.BrCond)) {
-    TII->RemoveBranch(*BBI.BB);
-    TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
+  if (!TII->reverseBranchCondition(BBI.BrCond)) {
+    TII->removeBranch(*BBI.BB);
+    TII->insertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl);
     std::swap(BBI.TrueBB, BBI.FalseBB);
     return true;
   }
   return false;
 }
 
-/// getNextBlock - Returns the next block in the function blocks ordering. If
-/// it is the end, returns NULL.
-static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
-  MachineFunction::iterator I = BB->getIterator();
-  MachineFunction::iterator E = BB->getParent()->end();
+/// Returns the next block in the function blocks ordering. If it is the end,
+/// returns NULL.
+static inline MachineBasicBlock *getNextBlock(MachineBasicBlock &MBB) {
+  MachineFunction::iterator I = MBB.getIterator();
+  MachineFunction::iterator E = MBB.getParent()->end();
   if (++I == E)
     return nullptr;
   return &*I;
 }
 
-/// ValidSimple - Returns true if the 'true' block (along with its
-/// predecessor) forms a valid simple shape for ifcvt. It also returns the
-/// number of instructions that the ifcvt would need to duplicate if performed
-/// in Dups.
+/// Returns true if the 'true' block (along with its predecessor) forms a valid
+/// simple shape for ifcvt. It also returns the number of instructions that the
+/// ifcvt would need to duplicate if performed in Dups.
 bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
                               BranchProbability Prediction) const {
   Dups = 0;
@@ -495,12 +544,11 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
   return true;
 }
 
-/// ValidTriangle - Returns true if the 'true' and 'false' blocks (along
-/// with their common predecessor) forms a valid triangle shape for ifcvt.
-/// If 'FalseBranch' is true, it checks if 'true' block's false branch
-/// branches to the 'false' block rather than the other way around. It also
-/// returns the number of instructions that the ifcvt would need to duplicate
-/// if performed in 'Dups'.
+/// Returns true if the 'true' and 'false' blocks (along with their common
+/// predecessor) forms a valid triangle shape for ifcvt. If 'FalseBranch' is
+/// true, it checks if 'true' block's false branch branches to the 'false' block
+/// rather than the other way around. It also returns the number of instructions
+/// that the ifcvt would need to duplicate if performed in 'Dups'.
 bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
                                 bool FalseBranch, unsigned &Dups,
                                 BranchProbability Prediction) const {
@@ -540,122 +588,353 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
   return TExit && TExit == FalseBBI.BB;
 }
 
-/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
-/// with their common predecessor) forms a valid diamond shape for ifcvt.
-bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
-                               unsigned &Dups1, unsigned &Dups2) const {
-  Dups1 = Dups2 = 0;
-  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
-      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
-    return false;
-
-  MachineBasicBlock *TT = TrueBBI.TrueBB;
-  MachineBasicBlock *FT = FalseBBI.TrueBB;
-
-  if (!TT && blockAlwaysFallThrough(TrueBBI))
-    TT = getNextBlock(TrueBBI.BB);
-  if (!FT && blockAlwaysFallThrough(FalseBBI))
-    FT = getNextBlock(FalseBBI.BB);
-  if (TT != FT)
-    return false;
-  if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
-    return false;
-  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
-    return false;
+/// Shrink the provided inclusive range by one instruction.
+/// If the range was one instruction (\p It == \p Begin), It is not modified,
+/// but \p Empty is set to true.
+static inline void shrinkInclusiveRange(
+    MachineBasicBlock::iterator &Begin,
+    MachineBasicBlock::iterator &It,
+    bool &Empty) {
+  if (It == Begin)
+    Empty = true;
+  else
+    It--;
+}
 
-  // FIXME: Allow true block to have an early exit?
-  if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
-      (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
-    return false;
+/// Count duplicated instructions and move the iterators to show where they
+/// are.
+/// @param TIB True Iterator Begin
+/// @param FIB False Iterator Begin
+/// These two iterators initially point to the first instruction of the two
+/// blocks, and finally point to the first non-shared instruction.
+/// @param TIE True Iterator End
+/// @param FIE False Iterator End
+/// These two iterators initially point to End() for the two blocks() and
+/// finally point to the first shared instruction in the tail.
+/// Upon return [TIB, TIE), and [FIB, FIE) mark the un-duplicated portions of
+/// two blocks.
+/// @param Dups1 count of duplicated instructions at the beginning of the 2
+/// blocks.
+/// @param Dups2 count of duplicated instructions at the end of the 2 blocks.
+/// @param SkipUnconditionalBranches if true, Don't make sure that
+/// unconditional branches at the end of the blocks are the same. True is
+/// passed when the blocks are analyzable to allow for fallthrough to be
+/// handled.
+/// @return false if the shared portion prevents if conversion.
+bool IfConverter::CountDuplicatedInstructions(
+    MachineBasicBlock::iterator &TIB,
+    MachineBasicBlock::iterator &FIB,
+    MachineBasicBlock::iterator &TIE,
+    MachineBasicBlock::iterator &FIE,
+    unsigned &Dups1, unsigned &Dups2,
+    MachineBasicBlock &TBB, MachineBasicBlock &FBB,
+    bool SkipUnconditionalBranches) const {
 
-  // Count duplicate instructions at the beginning of the true and false blocks.
-  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
-  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
-  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
-  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
   while (TIB != TIE && FIB != FIE) {
     // Skip dbg_value instructions. These do not count.
-    if (TIB->isDebugValue()) {
-      while (TIB != TIE && TIB->isDebugValue())
-        ++TIB;
-      if (TIB == TIE)
-        break;
-    }
-    if (FIB->isDebugValue()) {
-      while (FIB != FIE && FIB->isDebugValue())
-        ++FIB;
-      if (FIB == FIE)
-        break;
-    }
+    TIB = skipDebugInstructionsForward(TIB, TIE);
+    if(TIB == TIE)
+      break;
+    FIB = skipDebugInstructionsForward(FIB, FIE);
+    if(FIB == FIE)
+      break;
     if (!TIB->isIdenticalTo(*FIB))
       break;
-    ++Dups1;
+    // A pred-clobbering instruction in the shared portion prevents
+    // if-conversion.
+    std::vector<MachineOperand> PredDefs;
+    if (TII->DefinesPredicate(*TIB, PredDefs))
+      return false;
+    // If we get all the way to the branch instructions, don't count them.
+    if (!TIB->isBranch())
+      ++Dups1;
     ++TIB;
     ++FIB;
   }
 
-  // Now, in preparation for counting duplicate instructions at the ends of the
-  // blocks, move the end iterators up past any branch instructions.
-  // If both blocks are returning don't skip the branches, since they will
-  // likely be both identical return instructions. In such cases the return
-  // can be left unpredicated.
   // Check for already containing all of the block.
   if (TIB == TIE || FIB == FIE)
     return true;
+  // Now, in preparation for counting duplicate instructions at the ends of the
+  // blocks, move the end iterators up past any branch instructions.
   --TIE;
   --FIE;
-  if (!TrueBBI.BB->succ_empty() || !FalseBBI.BB->succ_empty()) {
-    while (TIE != TIB && TIE->isBranch())
-      --TIE;
-    while (FIE != FIB && FIE->isBranch())
-      --FIE;
+
+  // After this point TIB and TIE define an inclusive range, which means that
+  // TIB == TIE is true when there is one more instruction to consider, not at
+  // the end. Because we may not be able to go before TIB, we need a flag to
+  // indicate a completely empty range.
+  bool TEmpty = false, FEmpty = false;
+
+  // Upon exit TIE and FIE will both point at the last non-shared instruction.
+  // They need to be moved forward to point past the last non-shared
+  // instruction if the range they delimit is non-empty.
+  auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
+    if (!TEmpty)
+      ++TIE;
+    if (!FEmpty)
+      ++FIE;
+  });
+
+  if (!TBB.succ_empty() || !FBB.succ_empty()) {
+    if (SkipUnconditionalBranches) {
+      while (!TEmpty && TIE->isUnconditionalBranch())
+        shrinkInclusiveRange(TIB, TIE, TEmpty);
+      while (!FEmpty && FIE->isUnconditionalBranch())
+        shrinkInclusiveRange(FIB, FIE, FEmpty);
+    }
   }
 
   // If Dups1 includes all of a block, then don't count duplicate
   // instructions at the end of the blocks.
-  if (TIB == TIE || FIB == FIE)
+  if (TEmpty || FEmpty)
     return true;
 
   // Count duplicate instructions at the ends of the blocks.
-  while (TIE != TIB && FIE != FIB) {
+  while (!TEmpty && !FEmpty) {
     // Skip dbg_value instructions. These do not count.
-    if (TIE->isDebugValue()) {
-      while (TIE != TIB && TIE->isDebugValue())
-        --TIE;
-      if (TIE == TIB)
-        break;
+    TIE = skipDebugInstructionsBackward(TIE, TIB);
+    FIE = skipDebugInstructionsBackward(FIE, FIB);
+    TEmpty = TIE == TIB && TIE->isDebugValue();
+    FEmpty = FIE == FIB && FIE->isDebugValue();
+    if (TEmpty || FEmpty)
+      break;
+    if (!TIE->isIdenticalTo(*FIE))
+      break;
+    // We have to verify that any branch instructions are the same, and then we
+    // don't count them toward the # of duplicate instructions.
+    if (!TIE->isBranch())
+      ++Dups2;
+    shrinkInclusiveRange(TIB, TIE, TEmpty);
+    shrinkInclusiveRange(FIB, FIE, FEmpty);
+  }
+  return true;
+}
+
+/// RescanInstructions - Run ScanInstructions on a pair of blocks.
+/// @param TIB - True Iterator Begin, points to first non-shared instruction
+/// @param FIB - False Iterator Begin, points to first non-shared instruction
+/// @param TIE - True Iterator End, points past last non-shared instruction
+/// @param FIE - False Iterator End, points past last non-shared instruction
+/// @param TrueBBI  - BBInfo to update for the true block.
+/// @param FalseBBI - BBInfo to update for the false block.
+/// @returns - false if either block cannot be predicated or if both blocks end
+///   with a predicate-clobbering instruction.
+bool IfConverter::RescanInstructions(
+    MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+    MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+    BBInfo &TrueBBI, BBInfo &FalseBBI) const {
+  bool BranchUnpredicable = true;
+  TrueBBI.IsUnpredicable = FalseBBI.IsUnpredicable = false;
+  ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable);
+  if (TrueBBI.IsUnpredicable)
+    return false;
+  ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable);
+  if (FalseBBI.IsUnpredicable)
+    return false;
+  if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
+    return false;
+  return true;
+}
+
+#ifndef NDEBUG
+static void verifySameBranchInstructions(
+    MachineBasicBlock *MBB1,
+    MachineBasicBlock *MBB2) {
+  MachineBasicBlock::iterator B1 = MBB1->begin();
+  MachineBasicBlock::iterator B2 = MBB2->begin();
+  MachineBasicBlock::iterator E1 = std::prev(MBB1->end());
+  MachineBasicBlock::iterator E2 = std::prev(MBB2->end());
+  bool Empty1 = false, Empty2 = false;
+  while (!Empty1 && !Empty2) {
+    E1 = skipDebugInstructionsBackward(E1, B1);
+    E2 = skipDebugInstructionsBackward(E2, B2);
+    Empty1 = E1 == B1 && E1->isDebugValue();
+    Empty2 = E2 == B2 && E2->isDebugValue();
+
+    if (Empty1 && Empty2)
+      break;
+
+    if (Empty1) {
+      assert(!E2->isBranch() && "Branch mis-match, one block is empty.");
+      break;
     }
-    if (FIE->isDebugValue()) {
-      while (FIE != FIB && FIE->isDebugValue())
-        --FIE;
-      if (FIE == FIB)
-        break;
+    if (Empty2) {
+      assert(!E1->isBranch() && "Branch mis-match, one block is empty.");
+      break;
     }
-    if (!TIE->isIdenticalTo(*FIE))
+
+    if (E1->isBranch() || E2->isBranch())
+      assert(E1->isIdenticalTo(*E2) &&
+             "Branch mis-match, branch instructions don't match.");
+    else
       break;
-    ++Dups2;
-    --TIE;
-    --FIE;
+    shrinkInclusiveRange(B1, E1, Empty1);
+    shrinkInclusiveRange(B2, E2, Empty2);
+  }
+}
+#endif
+
+/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) form a diamond if a common tail block is
+/// extracted.
+/// While not strictly a diamond, this pattern would form a diamond if
+/// tail-merging had merged the shared tails.
+///           EBB
+///         _/   \_
+///         |     |
+///        TBB   FBB
+///        /  \ /   \
+///  FalseBB TrueBB FalseBB
+/// Currently only handles analyzable branches.
+/// Specifically excludes actual diamonds to avoid overlap.
+bool IfConverter::ValidForkedDiamond(
+    BBInfo &TrueBBI, BBInfo &FalseBBI,
+    unsigned &Dups1, unsigned &Dups2,
+    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
+    return false;
+  // Don't IfConvert blocks that can't be folded into their predecessor.
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // This function is specifically looking for conditional tails, as
+  // unconditional tails are already handled by the standard diamond case.
+  if (TrueBBI.BrCond.size() == 0 ||
+      FalseBBI.BrCond.size() == 0)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *TF = TrueBBI.FalseBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+  MachineBasicBlock *FF = FalseBBI.FalseBB;
+
+  if (!TT)
+    TT = getNextBlock(*TrueBBI.BB);
+  if (!TF)
+    TF = getNextBlock(*TrueBBI.BB);
+  if (!FT)
+    FT = getNextBlock(*FalseBBI.BB);
+  if (!FF)
+    FF = getNextBlock(*FalseBBI.BB);
+
+  if (!TT || !TF)
+    return false;
+
+  // Check successors. If they don't match, bail.
+  if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
+    return false;
+
+  bool FalseReversed = false;
+  if (TF == FT && TT == FF) {
+    // If the branches are opposing, but we can't reverse, don't do it.
+    if (!FalseBBI.IsBrReversible)
+      return false;
+    FalseReversed = true;
+    reverseBranchCondition(FalseBBI);
   }
+  auto UnReverseOnExit = make_scope_exit([&]() {
+    if (FalseReversed)
+      reverseBranchCondition(FalseBBI);
+  });
 
+  // Count duplicate instructions at the beginning of the true and false blocks.
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+  if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+                                  *TrueBBI.BB, *FalseBBI.BB,
+                                  /* SkipUnconditionalBranches */ true))
+    return false;
+
+  TrueBBICalc.BB = TrueBBI.BB;
+  FalseBBICalc.BB = FalseBBI.BB;
+  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+    return false;
+
+  // The size is used to decide whether to if-convert, and the shared portions
+  // are subtracted off. Because of the subtraction, we just use the size that
+  // was calculated by the original ScanInstructions, as it is correct.
+  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
   return true;
 }
 
-/// ScanInstructions - Scan all the instructions in the block to determine if
-/// the block is predicable. In most cases, that means all the instructions
-/// in the block are isPredicable(). Also checks if the block contains any
-/// instruction which can clobber a predicate (e.g. condition code register).
-/// If so, the block is not predicable unless it's the last instruction.
-void IfConverter::ScanInstructions(BBInfo &BBI) {
+/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) forms a valid diamond shape for ifcvt.
+bool IfConverter::ValidDiamond(
+    BBInfo &TrueBBI, BBInfo &FalseBBI,
+    unsigned &Dups1, unsigned &Dups2,
+    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+  Dups1 = Dups2 = 0;
+  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+    return false;
+
+  MachineBasicBlock *TT = TrueBBI.TrueBB;
+  MachineBasicBlock *FT = FalseBBI.TrueBB;
+
+  if (!TT && blockAlwaysFallThrough(TrueBBI))
+    TT = getNextBlock(*TrueBBI.BB);
+  if (!FT && blockAlwaysFallThrough(FalseBBI))
+    FT = getNextBlock(*FalseBBI.BB);
+  if (TT != FT)
+    return false;
+  if (!TT && (TrueBBI.IsBrAnalyzable || FalseBBI.IsBrAnalyzable))
+    return false;
+  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+    return false;
+
+  // FIXME: Allow true block to have an early exit?
+  if (TrueBBI.FalseBB || FalseBBI.FalseBB)
+    return false;
+
+  // Count duplicate instructions at the beginning and end of the true and
+  // false blocks.
+  // Skip unconditional branches only if we are considering an analyzable
+  // diamond. Otherwise the branches must be the same.
+  bool SkipUnconditionalBranches =
+      TrueBBI.IsBrAnalyzable && FalseBBI.IsBrAnalyzable;
+  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+  if(!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+                                  *TrueBBI.BB, *FalseBBI.BB,
+                                  SkipUnconditionalBranches))
+    return false;
+
+  TrueBBICalc.BB = TrueBBI.BB;
+  FalseBBICalc.BB = FalseBBI.BB;
+  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+    return false;
+  // The size is used to decide whether to if-convert, and the shared portions
+  // are subtracted off. Because of the subtraction, we just use the size that
+  // was calculated by the original ScanInstructions, as it is correct.
+  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
+  return true;
+}
+
+/// AnalyzeBranches - Look at the branches at the end of a block to determine if
+/// the block is predicable.
+void IfConverter::AnalyzeBranches(BBInfo &BBI) {
   if (BBI.IsDone)
     return;
 
-  bool AlreadyPredicated = !BBI.Predicate.empty();
-  // First analyze the end of BB branches.
   BBI.TrueBB = BBI.FalseBB = nullptr;
   BBI.BrCond.clear();
   BBI.IsBrAnalyzable =
       !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+  BBI.IsBrReversible = (RevCond.size() == 0) ||
+      !TII->reverseBranchCondition(RevCond);
   BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
 
   if (BBI.BrCond.size()) {
@@ -666,16 +945,29 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     if (!BBI.FalseBB) {
       // Malformed bcc? True and false blocks are the same?
       BBI.IsUnpredicable = true;
-      return;
     }
   }
+}
+
+/// ScanInstructions - Scan all the instructions in the block to determine if
+/// the block is predicable. In most cases, that means all the instructions
+/// in the block are isPredicable(). Also checks if the block contains any
+/// instruction which can clobber a predicate (e.g. condition code register).
+/// If so, the block is not predicable unless it's the last instruction.
+void IfConverter::ScanInstructions(BBInfo &BBI,
+                                   MachineBasicBlock::iterator &Begin,
+                                   MachineBasicBlock::iterator &End,
+                                   bool BranchUnpredicable) const {
+  if (BBI.IsDone || BBI.IsUnpredicable)
+    return;
+
+  bool AlreadyPredicated = !BBI.Predicate.empty();
 
-  // Then scan all the instructions.
   BBI.NonPredSize = 0;
   BBI.ExtraCost = 0;
   BBI.ExtraCost2 = 0;
   BBI.ClobbersPred = false;
-  for (auto &MI : *BBI.BB) {
+  for (MachineInstr &MI : make_range(Begin, End)) {
     if (MI.isDebugValue())
       continue;
 
@@ -715,6 +1007,11 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
     bool isPredicated = TII->isPredicated(MI);
     bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
 
+    if (BranchUnpredicable && MI.isBranch()) {
+      BBI.IsUnpredicable = true;
+      return;
+    }
+
     // A conditional branch is not predicable, but it may be eliminated.
     if (isCondBr)
       continue;
@@ -756,13 +1053,24 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
   }
 }
 
-/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
-/// predicated by the specified predicate.
+/// Determine if the block is a suitable candidate to be predicated by the
+/// specified predicate.
+/// @param BBI BBInfo for the block to check
+/// @param Pred Predicate array for the branch that leads to BBI
+/// @param isTriangle true if the Analysis is for a triangle
+/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
+///        case
+/// @param hasCommonTail true if BBI shares a tail with a sibling block that
+///        contains any instruction that would make the block unpredicable.
 bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
                                       SmallVectorImpl<MachineOperand> &Pred,
-                                      bool isTriangle, bool RevBranch) {
+                                      bool isTriangle, bool RevBranch,
+                                      bool hasCommonTail) {
   // If the block is dead or unpredicable, then it cannot be predicated.
-  if (BBI.IsDone || BBI.IsUnpredicable)
+  // Two blocks may share a common unpredicable tail, but this doesn't prevent
+  // them from being if-converted. The non-shared portion is assumed to have
+  // been checked
+  if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
     return false;
 
   // If it is already predicated but we couldn't analyze its terminator, the
@@ -776,7 +1084,7 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
     return false;
 
-  if (BBI.BrCond.size()) {
+  if (!hasCommonTail && BBI.BrCond.size()) {
     if (!isTriangle)
       return false;
 
@@ -784,10 +1092,10 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
     SmallVector<MachineOperand, 4> RevPred(Pred.begin(), Pred.end());
     SmallVector<MachineOperand, 4> Cond(BBI.BrCond.begin(), BBI.BrCond.end());
     if (RevBranch) {
-      if (TII->ReverseBranchCondition(Cond))
+      if (TII->reverseBranchCondition(Cond))
         return false;
     }
-    if (TII->ReverseBranchCondition(RevPred) ||
+    if (TII->reverseBranchCondition(RevPred) ||
         !TII->SubsumesPredicate(Cond, RevPred))
       return false;
   }
@@ -795,13 +1103,12 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
   return true;
 }
 
-/// AnalyzeBlock - Analyze the structure of the sub-CFG starting from
-/// the specified block. Record its successors and whether it looks like an
-/// if-conversion candidate.
+/// Analyze the structure of the sub-CFG starting from the specified block.
+/// Record its successors and whether it looks like an if-conversion candidate.
 void IfConverter::AnalyzeBlock(
-    MachineBasicBlock *MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
+    MachineBasicBlock &MBB, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
   struct BBState {
-    BBState(MachineBasicBlock *BB) : MBB(BB), SuccsAnalyzed(false) {}
+    BBState(MachineBasicBlock &MBB) : MBB(&MBB), SuccsAnalyzed(false) {}
     MachineBasicBlock *MBB;
 
     /// This flag is true if MBB's successors have been analyzed.
@@ -825,7 +1132,10 @@ void IfConverter::AnalyzeBlock(
       BBI.BB = BB;
       BBI.IsBeingAnalyzed = true;
 
-      ScanInstructions(BBI);
+      AnalyzeBranches(BBI);
+      MachineBasicBlock::iterator Begin = BBI.BB->begin();
+      MachineBasicBlock::iterator End = BBI.BB->end();
+      ScanInstructions(BBI, Begin, End);
 
       // Unanalyzable or ends with fallthrough or unconditional branch, or if is
       // not considered for ifcvt anymore.
@@ -854,8 +1164,8 @@ void IfConverter::AnalyzeBlock(
 
       // Push the False and True blocks to the stack.
       State.SuccsAnalyzed = true;
-      BBStack.push_back(BBI.FalseBB);
-      BBStack.push_back(BBI.TrueBB);
+      BBStack.push_back(*BBI.FalseBB);
+      BBStack.push_back(*BBI.TrueBB);
       continue;
     }
 
@@ -871,7 +1181,7 @@ void IfConverter::AnalyzeBlock(
 
     SmallVector<MachineOperand, 4>
         RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
-    bool CanRevCond = !TII->ReverseBranchCondition(RevCond);
+    bool CanRevCond = !TII->reverseBranchCondition(RevCond);
 
     unsigned Dups = 0;
     unsigned Dups2 = 0;
@@ -881,25 +1191,59 @@ void IfConverter::AnalyzeBlock(
 
     BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
 
-    if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
-        MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
-                                         TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
-                           *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
-                                        FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
-                         Prediction) &&
-        FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
-        FeasibilityAnalysis(FalseBBI, RevCond)) {
-      // Diamond:
-      //   EBB
-      //   / \_
-      //  |   |
-      // TBB FBB
-      //   \ /
-      //  TailBB
-      // Note TailBB can be empty.
-      Tokens.push_back(llvm::make_unique<IfcvtToken>(
-          BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
-      Enqueued = true;
+    if (CanRevCond) {
+      BBInfo TrueBBICalc, FalseBBICalc;
+      auto feasibleDiamond = [&]() {
+        bool MeetsSize = MeetIfcvtSizeLimit(
+            *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
+                          TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
+            *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
+                           FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
+            Prediction);
+        bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
+                                                /* IsTriangle */ false, /* RevCond */ false,
+                                                /* hasCommonTail */ true);
+        bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond,
+                                                 /* IsTriangle */ false, /* RevCond */ false,
+                                                 /* hasCommonTail */ true);
+        return MeetsSize && TrueFeasible && FalseFeasible;
+      };
+
+      if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+                       TrueBBICalc, FalseBBICalc)) {
+        if (feasibleDiamond()) {
+          // Diamond:
+          //   EBB
+          //   / \_
+          //  |   |
+          // TBB FBB
+          //   \ /
+          //  TailBB
+          // Note TailBB can be empty.
+          Tokens.push_back(llvm::make_unique<IfcvtToken>(
+              BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+          Enqueued = true;
+        }
+      } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+                                    TrueBBICalc, FalseBBICalc)) {
+        if (feasibleDiamond()) {
+          // ForkedDiamond:
+          // if TBB and FBB have a common tail that includes their conditional
+          // branch instructions, then we can If Convert this pattern.
+          //          EBB
+          //         _/ \_
+          //         |   |
+          //        TBB  FBB
+          //        / \ /   \
+          //  FalseBB TrueBB FalseBB
+          //
+          Tokens.push_back(llvm::make_unique<IfcvtToken>(
+              BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+          Enqueued = true;
+        }
+      }
     }
 
     if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
@@ -985,25 +1329,23 @@ void IfConverter::AnalyzeBlock(
   }
 }
 
-/// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion
-/// candidates.
+/// Analyze all blocks and find entries for all if-conversion candidates.
 void IfConverter::AnalyzeBlocks(
     MachineFunction &MF, std::vector<std::unique_ptr<IfcvtToken>> &Tokens) {
-  for (auto &BB : MF)
-    AnalyzeBlock(&BB, Tokens);
+  for (MachineBasicBlock &MBB : MF)
+    AnalyzeBlock(MBB, Tokens);
 
   // Sort to favor more complex ifcvt scheme.
   std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
 }
 
-/// canFallThroughTo - Returns true either if ToBB is the next block after BB or
-/// that all the intervening blocks are empty (given BB can fall through to its
-/// next block).
-static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
-  MachineFunction::iterator PI = BB->getIterator();
+/// Returns true either if ToMBB is the next block after MBB or that all the
+/// intervening blocks are empty (given MBB can fall through to its next block).
+static bool canFallThroughTo(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB) {
+  MachineFunction::iterator PI = MBB.getIterator();
   MachineFunction::iterator I = std::next(PI);
-  MachineFunction::iterator TI = ToBB->getIterator();
-  MachineFunction::iterator E = BB->getParent()->end();
+  MachineFunction::iterator TI = ToMBB.getIterator();
+  MachineFunction::iterator E = MBB.getParent()->end();
   while (I != TI) {
     // Check isSuccessor to avoid case where the next block is empty, but
     // it's not a successor.
@@ -1014,30 +1356,27 @@ static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) {
   return true;
 }
 
-/// InvalidatePreds - Invalidate predecessor BB info so it would be re-analyzed
-/// to determine if it can be if-converted. If predecessor is already enqueued,
-/// dequeue it!
-void IfConverter::InvalidatePreds(MachineBasicBlock *BB) {
-  for (const auto &Predecessor : BB->predecessors()) {
+/// Invalidate predecessor BB info so it would be re-analyzed to determine if it
+/// can be if-converted. If predecessor is already enqueued, dequeue it!
+void IfConverter::InvalidatePreds(MachineBasicBlock &MBB) {
+  for (const MachineBasicBlock *Predecessor : MBB.predecessors()) {
     BBInfo &PBBI = BBAnalysis[Predecessor->getNumber()];
-    if (PBBI.IsDone || PBBI.BB == BB)
+    if (PBBI.IsDone || PBBI.BB == &MBB)
       continue;
     PBBI.IsAnalyzed = false;
     PBBI.IsEnqueued = false;
   }
 }
 
-/// InsertUncondBranch - Inserts an unconditional branch from BB to ToBB.
-///
-static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB,
+/// Inserts an unconditional branch from \p MBB to \p ToMBB.
+static void InsertUncondBranch(MachineBasicBlock &MBB, MachineBasicBlock &ToMBB,
                                const TargetInstrInfo *TII) {
   DebugLoc dl;  // FIXME: this is nowhere
   SmallVector<MachineOperand, 0> NoCond;
-  TII->InsertBranch(*BB, ToBB, nullptr, NoCond, dl);
+  TII->insertBranch(MBB, &ToMBB, nullptr, NoCond, dl);
 }
 
-/// RemoveExtraEdges - Remove true / false edges if either / both are no longer
-/// successors.
+/// Remove true / false edges if either / both are no longer successors.
 void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
   MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   SmallVector<MachineOperand, 4> Cond;
@@ -1046,29 +1385,42 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) {
 }
 
 /// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all
-/// values defined in MI which are not live/used by MI.
+/// values defined in MI which are also live/used by MI.
 static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
+  const TargetRegisterInfo *TRI = MI.getParent()->getParent()
+    ->getSubtarget().getRegisterInfo();
+
+  // Before stepping forward past MI, remember which regs were live
+  // before MI. This is needed to set the Undef flag only when reg is
+  // dead.
+  SparseSet<unsigned> LiveBeforeMI;
+  LiveBeforeMI.setUniverse(TRI->getNumRegs());
+  for (unsigned Reg : Redefs)
+    LiveBeforeMI.insert(Reg);
+
   SmallVector<std::pair<unsigned, const MachineOperand*>, 4> Clobbers;
   Redefs.stepForward(MI, Clobbers);
 
   // Now add the implicit uses for each of the clobbered values.
-  for (auto Reg : Clobbers) {
+  for (auto Clobber : Clobbers) {
     // FIXME: Const cast here is nasty, but better than making StepForward
     // take a mutable instruction instead of const.
-    MachineOperand &Op = const_cast<MachineOperand&>(*Reg.second);
+    unsigned Reg = Clobber.first;
+    MachineOperand &Op = const_cast<MachineOperand&>(*Clobber.second);
     MachineInstr *OpMI = Op.getParent();
     MachineInstrBuilder MIB(*OpMI->getParent()->getParent(), OpMI);
     if (Op.isRegMask()) {
       // First handle regmasks.  They clobber any entries in the mask which
       // means that we need a def for those registers.
-      MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+      if (LiveBeforeMI.count(Reg))
+        MIB.addReg(Reg, RegState::Implicit);
 
       // We also need to add an implicit def of this register for the later
       // use to read from.
       // For the register allocator to have allocated a register clobbered
       // by the call which is used later, it must be the case that
       // the call doesn't return.
-      MIB.addReg(Reg.first, RegState::Implicit | RegState::Define);
+      MIB.addReg(Reg, RegState::Implicit | RegState::Define);
       continue;
     }
     assert(Op.isReg() && "Register operand required");
@@ -1078,13 +1430,23 @@ static void UpdatePredRedefs(MachineInstr &MI, LivePhysRegs &Redefs) {
       if (Redefs.contains(Op.getReg()))
         Op.setIsDead(false);
     }
-    MIB.addReg(Reg.first, RegState::Implicit | RegState::Undef);
+    if (LiveBeforeMI.count(Reg))
+      MIB.addReg(Reg, RegState::Implicit);
+    else {
+      bool HasLiveSubReg = false;
+      for (MCSubRegIterator S(Reg, TRI); S.isValid(); ++S) {
+        if (!LiveBeforeMI.count(*S))
+          continue;
+        HasLiveSubReg = true;
+        break;
+      }
+      if (HasLiveSubReg)
+        MIB.addReg(Reg, RegState::Implicit);
+    }
   }
 }
 
-/**
- * Remove kill flags from operands with a registers in the @p DontKill set.
- */
+/// Remove kill flags from operands with a registers in the \p DontKill set.
 static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
   for (MIBundleOperands O(MI); O.isValid(); ++O) {
     if (!O->isReg() || !O->isKill())
@@ -1094,20 +1456,17 @@ static void RemoveKills(MachineInstr &MI, const LivePhysRegs &DontKill) {
   }
 }
 
-/**
- * Walks a range of machine instructions and removes kill flags for registers
- * in the @p DontKill set.
- */
+/// Walks a range of machine instructions and removes kill flags for registers
+/// in the \p DontKill set.
 static void RemoveKills(MachineBasicBlock::iterator I,
                         MachineBasicBlock::iterator E,
                         const LivePhysRegs &DontKill,
                         const MCRegisterInfo &MCRI) {
-  for ( ; I != E; ++I)
-    RemoveKills(*I, DontKill);
+  for (MachineInstr &MI : make_range(I, E))
+    RemoveKills(MI, DontKill);
 }
 
-/// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG.
-///
+/// If convert a simple (split, no rejoin) sub-CFG.
 bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
   BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
@@ -1118,54 +1477,56 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   if (Kind == ICSimpleFalse)
     std::swap(CvtBBI, NextBBI);
 
+  MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+  MachineBasicBlock &NextMBB = *NextBBI->BB;
   if (CvtBBI->IsDone ||
-      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+      (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
     // Something has changed. It's no longer safe to predicate this block.
     BBI.IsAnalyzed = false;
     CvtBBI->IsAnalyzed = false;
     return false;
   }
 
-  if (CvtBBI->BB->hasAddressTaken())
+  if (CvtMBB.hasAddressTaken())
     // Conservatively abort if-conversion if BB's address is taken.
     return false;
 
   if (Kind == ICSimpleFalse)
-    if (TII->ReverseBranchCondition(Cond))
+    if (TII->reverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
 
   // Initialize liveins to the first BB. These are potentiall redefined by
   // predicated instructions.
-  Redefs.init(TRI);
-  Redefs.addLiveIns(*CvtBBI->BB);
-  Redefs.addLiveIns(*NextBBI->BB);
+  Redefs.init(*TRI);
+  Redefs.addLiveIns(CvtMBB);
+  Redefs.addLiveIns(NextMBB);
 
   // Compute a set of registers which must not be killed by instructions in
   // BB1: This is everything live-in to BB2.
-  DontKill.init(TRI);
-  DontKill.addLiveIns(*NextBBI->BB);
+  DontKill.init(*TRI);
+  DontKill.addLiveIns(NextMBB);
 
-  if (CvtBBI->BB->pred_size() > 1) {
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+  if (CvtMBB.pred_size() > 1) {
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond);
 
     // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
     // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(CvtBBI->BB, true);
+    BBI.BB->removeSuccessor(&CvtMBB, true);
   } else {
-    RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI);
-    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+    RemoveKills(CvtMBB.begin(), CvtMBB.end(), DontKill, *TRI);
+    PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
 
     // Merge converted block into entry block.
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     MergeBlocks(BBI, *CvtBBI);
   }
 
   bool IterIfcvt = true;
-  if (!canFallThroughTo(BBI.BB, NextBBI->BB)) {
-    InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+  if (!canFallThroughTo(*BBI.BB, NextMBB)) {
+    InsertUncondBranch(*BBI.BB, NextMBB, TII);
     BBI.HasFallThrough = false;
     // Now ifcvt'd block will look like this:
     // BB:
@@ -1185,15 +1546,14 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) {
   // Update block info. BB can be iteratively if-converted.
   if (!IterIfcvt)
     BBI.IsDone = true;
-  InvalidatePreds(BBI.BB);
+  InvalidatePreds(*BBI.BB);
   CvtBBI->IsDone = true;
 
   // FIXME: Must maintain LiveIns.
   return true;
 }
 
-/// IfConvertTriangle - If convert a triangle sub-CFG.
-///
+/// If convert a triangle sub-CFG.
 bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
   BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
@@ -1205,29 +1565,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
     std::swap(CvtBBI, NextBBI);
 
+  MachineBasicBlock &CvtMBB = *CvtBBI->BB;
+  MachineBasicBlock &NextMBB = *NextBBI->BB;
   if (CvtBBI->IsDone ||
-      (CvtBBI->CannotBeCopied && CvtBBI->BB->pred_size() > 1)) {
+      (CvtBBI->CannotBeCopied && CvtMBB.pred_size() > 1)) {
     // Something has changed. It's no longer safe to predicate this block.
     BBI.IsAnalyzed = false;
     CvtBBI->IsAnalyzed = false;
     return false;
   }
 
-  if (CvtBBI->BB->hasAddressTaken())
+  if (CvtMBB.hasAddressTaken())
     // Conservatively abort if-conversion if BB's address is taken.
     return false;
 
   if (Kind == ICTriangleFalse || Kind == ICTriangleFRev)
-    if (TII->ReverseBranchCondition(Cond))
+    if (TII->reverseBranchCondition(Cond))
       llvm_unreachable("Unable to reverse branch condition!");
 
   if (Kind == ICTriangleRev || Kind == ICTriangleFRev) {
-    if (ReverseBranchCondition(*CvtBBI)) {
+    if (reverseBranchCondition(*CvtBBI)) {
       // BB has been changed, modify its predecessors (except for this
       // one) so they don't get ifcvt'ed based on bad intel.
-      for (MachineBasicBlock::pred_iterator PI = CvtBBI->BB->pred_begin(),
-             E = CvtBBI->BB->pred_end(); PI != E; ++PI) {
-        MachineBasicBlock *PBB = *PI;
+      for (MachineBasicBlock *PBB : CvtMBB.predecessors()) {
         if (PBB == BBI.BB)
           continue;
         BBInfo &PBBI = BBAnalysis[PBB->getNumber()];
@@ -1241,9 +1601,9 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
 
   // Initialize liveins to the first BB. These are potentially redefined by
   // predicated instructions.
-  Redefs.init(TRI);
-  Redefs.addLiveIns(*CvtBBI->BB);
-  Redefs.addLiveIns(*NextBBI->BB);
+  Redefs.init(*TRI);
+  Redefs.addLiveIns(CvtMBB);
+  Redefs.addLiveIns(NextMBB);
 
   DontKill.clear();
 
@@ -1251,29 +1611,29 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   BranchProbability CvtNext, CvtFalse, BBNext, BBCvt;
 
   if (HasEarlyExit) {
-    // Get probabilities before modifying CvtBBI->BB and BBI.BB.
-    CvtNext = MBPI->getEdgeProbability(CvtBBI->BB, NextBBI->BB);
-    CvtFalse = MBPI->getEdgeProbability(CvtBBI->BB, CvtBBI->FalseBB);
-    BBNext = MBPI->getEdgeProbability(BBI.BB, NextBBI->BB);
-    BBCvt = MBPI->getEdgeProbability(BBI.BB, CvtBBI->BB);
+    // Get probabilities before modifying CvtMBB and BBI.BB.
+    CvtNext = MBPI->getEdgeProbability(&CvtMBB, &NextMBB);
+    CvtFalse = MBPI->getEdgeProbability(&CvtMBB, CvtBBI->FalseBB);
+    BBNext = MBPI->getEdgeProbability(BBI.BB, &NextMBB);
+    BBCvt = MBPI->getEdgeProbability(BBI.BB, &CvtMBB);
   }
 
-  if (CvtBBI->BB->pred_size() > 1) {
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+  if (CvtMBB.pred_size() > 1) {
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     // Copy instructions in the true block, predicate them, and add them to
     // the entry block.
     CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true);
 
     // RemoveExtraEdges won't work if the block has an unanalyzable branch, so
     // explicitly remove CvtBBI as a successor.
-    BBI.BB->removeSuccessor(CvtBBI->BB, true);
+    BBI.BB->removeSuccessor(&CvtMBB, true);
   } else {
     // Predicate the 'true' block after removing its branch.
-    CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB);
-    PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond);
+    CvtBBI->NonPredSize -= TII->removeBranch(CvtMBB);
+    PredicateBlock(*CvtBBI, CvtMBB.end(), Cond);
 
     // Now merge the entry of the triangle with the true block.
-    BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+    BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
     MergeBlocks(BBI, *CvtBBI, false);
   }
 
@@ -1281,24 +1641,23 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   if (HasEarlyExit) {
     SmallVector<MachineOperand, 4> RevCond(CvtBBI->BrCond.begin(),
                                            CvtBBI->BrCond.end());
-    if (TII->ReverseBranchCondition(RevCond))
+    if (TII->reverseBranchCondition(RevCond))
       llvm_unreachable("Unable to reverse branch condition!");
 
     // Update the edge probability for both CvtBBI->FalseBB and NextBBI.
-    // NewNext = New_Prob(BBI.BB, NextBBI->BB) =
-    //   Prob(BBI.BB, NextBBI->BB) +
-    //   Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, NextBBI->BB)
+    // NewNext = New_Prob(BBI.BB, NextMBB) =
+    //   Prob(BBI.BB, NextMBB) +
+    //   Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, NextMBB)
     // NewFalse = New_Prob(BBI.BB, CvtBBI->FalseBB) =
-    //   Prob(BBI.BB, CvtBBI->BB) * Prob(CvtBBI->BB, CvtBBI->FalseBB)
-    auto NewTrueBB = getNextBlock(BBI.BB);
+    //   Prob(BBI.BB, CvtMBB) * Prob(CvtMBB, CvtBBI->FalseBB)
+    auto NewTrueBB = getNextBlock(*BBI.BB);
     auto NewNext = BBNext + BBCvt * CvtNext;
-    auto NewTrueBBIter =
-        std::find(BBI.BB->succ_begin(), BBI.BB->succ_end(), NewTrueBB);
+    auto NewTrueBBIter = find(BBI.BB->successors(), NewTrueBB);
     if (NewTrueBBIter != BBI.BB->succ_end())
       BBI.BB->setSuccProbability(NewTrueBBIter, NewNext);
 
     auto NewFalse = BBCvt * CvtFalse;
-    TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
+    TII->insertBranch(*BBI.BB, CvtBBI->FalseBB, nullptr, RevCond, dl);
     BBI.BB->addSuccessor(CvtBBI->FalseBB, NewFalse);
   }
 
@@ -1306,18 +1665,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   // predecessors. Otherwise, add an unconditional branch to 'false'.
   bool FalseBBDead = false;
   bool IterIfcvt = true;
-  bool isFallThrough = canFallThroughTo(BBI.BB, NextBBI->BB);
+  bool isFallThrough = canFallThroughTo(*BBI.BB, NextMBB);
   if (!isFallThrough) {
     // Only merge them if the true block does not fallthrough to the false
     // block. By not merging them, we make it possible to iteratively
     // ifcvt the blocks.
     if (!HasEarlyExit &&
-        NextBBI->BB->pred_size() == 1 && !NextBBI->HasFallThrough &&
-        !NextBBI->BB->hasAddressTaken()) {
+        NextMBB.pred_size() == 1 && !NextBBI->HasFallThrough &&
+        !NextMBB.hasAddressTaken()) {
       MergeBlocks(BBI, *NextBBI);
       FalseBBDead = true;
     } else {
-      InsertUncondBranch(BBI.BB, NextBBI->BB, TII);
+      InsertUncondBranch(*BBI.BB, NextMBB, TII);
       BBI.HasFallThrough = false;
     }
     // Mixed predicated and unpredicated code. This cannot be iteratively
@@ -1330,7 +1689,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   // Update block info. BB can be iteratively if-converted.
   if (!IterIfcvt)
     BBI.IsDone = true;
-  InvalidatePreds(BBI.BB);
+  InvalidatePreds(*BBI.BB);
   CvtBBI->IsDone = true;
   if (FalseBBDead)
     NextBBI->IsDone = true;
@@ -1339,23 +1698,25 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) {
   return true;
 }
 
-/// IfConvertDiamond - If convert a diamond sub-CFG.
-///
-bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
-                                   unsigned NumDups1, unsigned NumDups2) {
-  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
-  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
-  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
-  // True block must fall through or end with an unanalyzable terminator.
-  if (!TailBB) {
-    if (blockAlwaysFallThrough(TrueBBI))
-      TailBB = FalseBBI.TrueBB;
-    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
-  }
+/// Common code shared between diamond conversions.
+/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape.
+/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI
+///               and FalseBBI
+/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI
+///               and \p FalseBBI
+/// \p RemoveBranch - Remove the common branch of the two blocks before
+///                   predicating. Only false for unanalyzable fallthrough
+///                   cases. The caller will replace the branch if necessary.
+/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for
+///                    unanalyzable fallthrough
+bool IfConverter::IfConvertDiamondCommon(
+    BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+    unsigned NumDups1, unsigned NumDups2,
+    bool TClobbersPred, bool FClobbersPred,
+    bool RemoveBranch, bool MergeAddEdges) {
 
   if (TrueBBI.IsDone || FalseBBI.IsDone ||
-      TrueBBI.BB->pred_size() > 1 ||
-      FalseBBI.BB->pred_size() > 1) {
+      TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
     // Something has changed. It's no longer safe to predicate these blocks.
     BBI.IsAnalyzed = false;
     TrueBBI.IsAnalyzed = false;
@@ -1373,36 +1734,45 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   BBInfo *BBI1 = &TrueBBI;
   BBInfo *BBI2 = &FalseBBI;
   SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
-  if (TII->ReverseBranchCondition(RevCond))
+  if (TII->reverseBranchCondition(RevCond))
     llvm_unreachable("Unable to reverse branch condition!");
   SmallVector<MachineOperand, 4> *Cond1 = &BBI.BrCond;
   SmallVector<MachineOperand, 4> *Cond2 = &RevCond;
 
   // Figure out the more profitable ordering.
   bool DoSwap = false;
-  if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+  if (TClobbersPred && !FClobbersPred)
     DoSwap = true;
-  else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+  else if (!TClobbersPred && !FClobbersPred) {
     if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
       DoSwap = true;
-  }
+  } else if (TClobbersPred && FClobbersPred)
+    llvm_unreachable("Predicate info cannot be clobbered by both sides.");
   if (DoSwap) {
     std::swap(BBI1, BBI2);
     std::swap(Cond1, Cond2);
   }
 
   // Remove the conditional branch from entry to the blocks.
-  BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB);
+  BBI.NonPredSize -= TII->removeBranch(*BBI.BB);
 
-  // Initialize liveins to the first BB. These are potentially redefined by
-  // predicated instructions.
-  Redefs.init(TRI);
-  Redefs.addLiveIns(*BBI1->BB);
+  MachineBasicBlock &MBB1 = *BBI1->BB;
+  MachineBasicBlock &MBB2 = *BBI2->BB;
+
+  // Initialize the Redefs:
+  // - BB2 live-in regs need implicit uses before being redefined by BB1
+  //   instructions.
+  // - BB1 live-out regs need implicit uses before being redefined by BB2
+  //   instructions. We start with BB1 live-ins so we have the live-out regs
+  //   after tracking the BB1 instructions.
+  Redefs.init(*TRI);
+  Redefs.addLiveIns(MBB1);
+  Redefs.addLiveIns(MBB2);
 
   // Remove the duplicated instructions at the beginnings of both paths.
   // Skip dbg_value instructions
-  MachineBasicBlock::iterator DI1 = BBI1->BB->getFirstNonDebugInstr();
-  MachineBasicBlock::iterator DI2 = BBI2->BB->getFirstNonDebugInstr();
+  MachineBasicBlock::iterator DI1 = MBB1.getFirstNonDebugInstr();
+  MachineBasicBlock::iterator DI2 = MBB2.getFirstNonDebugInstr();
   BBI1->NonPredSize -= NumDups1;
   BBI2->NonPredSize -= NumDups1;
 
@@ -1421,52 +1791,58 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // Compute a set of registers which must not be killed by instructions in BB1:
   // This is everything used+live in BB2 after the duplicated instructions. We
   // can compute this set by simulating liveness backwards from the end of BB2.
-  DontKill.init(TRI);
-  for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(),
-       E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) {
-    DontKill.stepBackward(*I);
-  }
+  DontKill.init(*TRI);
+  for (const MachineInstr &MI : make_range(MBB2.rbegin(), ++DI2.getReverse()))
+    DontKill.stepBackward(MI);
 
-  for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E;
-       ++I) {
+  for (const MachineInstr &MI : make_range(MBB1.begin(), DI1)) {
     SmallVector<std::pair<unsigned, const MachineOperand*>, 4> IgnoredClobbers;
-    Redefs.stepForward(*I, IgnoredClobbers);
+    Redefs.stepForward(MI, IgnoredClobbers);
   }
-  BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
-  BBI2->BB->erase(BBI2->BB->begin(), DI2);
-
-  // Remove branch from the 'true' block, unless it was not analyzable.
-  // Non-analyzable branches need to be preserved, since in such cases,
-  // the CFG structure is not an actual diamond (the join block may not
-  // be present).
-  if (BBI1->IsBrAnalyzable)
-    BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
+  BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
+  MBB2.erase(MBB2.begin(), DI2);
+
+  // The branches have been checked to match, so it is safe to remove the branch
+  // in BB1 and rely on the copy in BB2
+#ifndef NDEBUG
+  // Unanalyzable branches must match exactly. Check that now.
+  if (!BBI1->IsBrAnalyzable)
+    verifySameBranchInstructions(&MBB1, &MBB2);
+#endif
+  BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB);
   // Remove duplicated instructions.
-  DI1 = BBI1->BB->end();
+  DI1 = MBB1.end();
   for (unsigned i = 0; i != NumDups2; ) {
     // NumDups2 only counted non-dbg_value instructions, so this won't
     // run off the head of the list.
-    assert (DI1 != BBI1->BB->begin());
+    assert(DI1 != MBB1.begin());
     --DI1;
     // skip dbg_value instructions
     if (!DI1->isDebugValue())
       ++i;
   }
-  BBI1->BB->erase(DI1, BBI1->BB->end());
+  MBB1.erase(DI1, MBB1.end());
 
   // Kill flags in the true block for registers living into the false block
   // must be removed.
-  RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
+  RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI);
 
-  // Remove 'false' block branch (unless it was not analyzable), and find
-  // the last instruction to predicate.
-  if (BBI2->IsBrAnalyzable)
-    BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
   DI2 = BBI2->BB->end();
+  // The branches have been checked to match. Skip over the branch in the false
+  // block so that we don't try to predicate it.
+  if (RemoveBranch)
+    BBI2->NonPredSize -= TII->removeBranch(*BBI2->BB);
+  else {
+    do {
+      assert(DI2 != MBB2.begin());
+      DI2--;
+    } while (DI2->isBranch() || DI2->isDebugValue());
+    DI2++;
+  }
   while (NumDups2 != 0) {
     // NumDups2 only counted non-dbg_value instructions, so this won't
     // run off the head of the list.
-    assert (DI2 != BBI2->BB->begin());
+    assert(DI2 != MBB2.begin());
     --DI2;
     // skip dbg_value instructions
     if (!DI2->isDebugValue())
@@ -1483,13 +1859,12 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   //   addne  r0, r1, #1
   SmallSet<unsigned, 4> RedefsByFalse;
   SmallSet<unsigned, 4> ExtUses;
-  if (TII->isProfitableToUnpredicate(*BBI1->BB, *BBI2->BB)) {
-    for (MachineBasicBlock::iterator FI = BBI2->BB->begin(); FI != DI2; ++FI) {
-      if (FI->isDebugValue())
+  if (TII->isProfitableToUnpredicate(MBB1, MBB2)) {
+    for (const MachineInstr &FI : make_range(MBB2.begin(), DI2)) {
+      if (FI.isDebugValue())
         continue;
       SmallVector<unsigned, 4> Defs;
-      for (unsigned i = 0, e = FI->getNumOperands(); i != e; ++i) {
-        const MachineOperand &MO = FI->getOperand(i);
+      for (const MachineOperand &MO : FI.operands()) {
         if (!MO.isReg())
           continue;
         unsigned Reg = MO.getReg();
@@ -1506,8 +1881,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
         }
       }
 
-      for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
-        unsigned Reg = Defs[i];
+      for (unsigned Reg : Defs) {
         if (!ExtUses.count(Reg)) {
           for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
                SubRegs.isValid(); ++SubRegs)
@@ -1518,17 +1892,17 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   }
 
   // Predicate the 'true' block.
-  PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse);
+  PredicateBlock(*BBI1, MBB1.end(), *Cond1, &RedefsByFalse);
 
   // After predicating BBI1, if there is a predicated terminator in BBI1 and
   // a non-predicated in BBI2, then we don't want to predicate the one from
   // BBI2. The reason is that if we merged these blocks, we would end up with
   // two predicated terminators in the same block.
-  if (!BBI2->BB->empty() && (DI2 == BBI2->BB->end())) {
-    MachineBasicBlock::iterator BBI1T = BBI1->BB->getFirstTerminator();
-    MachineBasicBlock::iterator BBI2T = BBI2->BB->getFirstTerminator();
-    if (BBI1T != BBI1->BB->end() && TII->isPredicated(*BBI1T) &&
-        BBI2T != BBI2->BB->end() && !TII->isPredicated(*BBI2T))
+  if (!MBB2.empty() && (DI2 == MBB2.end())) {
+    MachineBasicBlock::iterator BBI1T = MBB1.getFirstTerminator();
+    MachineBasicBlock::iterator BBI2T = MBB2.getFirstTerminator();
+    if (BBI1T != MBB1.end() && TII->isPredicated(*BBI1T) &&
+        BBI2T != MBB2.end() && !TII->isPredicated(*BBI2T))
       --DI2;
   }
 
@@ -1536,8 +1910,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   PredicateBlock(*BBI2, DI2, *Cond2);
 
   // Merge the true block into the entry of the diamond.
-  MergeBlocks(BBI, *BBI1, TailBB == nullptr);
-  MergeBlocks(BBI, *BBI2, TailBB == nullptr);
+  MergeBlocks(BBI, *BBI1, MergeAddEdges);
+  MergeBlocks(BBI, *BBI2, MergeAddEdges);
+  return true;
+}
+
+/// If convert an almost-diamond sub-CFG where the true
+/// and false blocks share a common tail.
+bool IfConverter::IfConvertForkedDiamond(
+    BBInfo &BBI, IfcvtKind Kind,
+    unsigned NumDups1, unsigned NumDups2,
+    bool TClobbersPred, bool FClobbersPred) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+  // Save the debug location for later.
+  DebugLoc dl;
+  MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
+  if (TIE != TrueBBI.BB->end())
+    dl = TIE->getDebugLoc();
+  // Removing branches from both blocks is safe, because we have already
+  // determined that both blocks have the same branch instructions. The branch
+  // will be added back at the end, unpredicated.
+  if (!IfConvertDiamondCommon(
+      BBI, TrueBBI, FalseBBI,
+      NumDups1, NumDups2,
+      TClobbersPred, FClobbersPred,
+      /* RemoveBranch */ true, /* MergeAddEdges */ true))
+    return false;
+
+  // Add back the branch.
+  // Debug location saved above when removing the branch from BBI2
+  TII->insertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
+                    TrueBBI.BrCond, dl);
+
+  RemoveExtraEdges(BBI);
+
+  // Update block info.
+  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+  InvalidatePreds(*BBI.BB);
+
+  // FIXME: Must maintain LiveIns.
+  return true;
+}
+
+/// If convert a diamond sub-CFG.
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+                                   unsigned NumDups1, unsigned NumDups2,
+                                   bool TClobbersPred, bool FClobbersPred) {
+  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
+  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+
+  // True block must fall through or end with an unanalyzable terminator.
+  if (!TailBB) {
+    if (blockAlwaysFallThrough(TrueBBI))
+      TailBB = FalseBBI.TrueBB;
+    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+  }
+
+  if (!IfConvertDiamondCommon(
+      BBI, TrueBBI, FalseBBI,
+      NumDups1, NumDups2,
+      TClobbersPred, FClobbersPred,
+      /* RemoveBranch */ TrueBBI.IsBrAnalyzable,
+      /* MergeAddEdges */ TailBB == nullptr))
+    return false;
 
   // If the if-converted block falls through or unconditionally branches into
   // the tail block, and the tail block does not have other predecessors, then
@@ -1560,7 +1998,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       CanMergeTail = false;
     else if (NumPreds == 1 && CanMergeTail) {
       MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
-      if (*PI != BBI1->BB && *PI != BBI2->BB)
+      if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
         CanMergeTail = false;
     }
     if (CanMergeTail) {
@@ -1568,7 +2006,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
       TailBBI.IsDone = true;
     } else {
       BBI.BB->addSuccessor(TailBB, BranchProbability::getOne());
-      InsertUncondBranch(BBI.BB, TailBB, TII);
+      InsertUncondBranch(*BBI.BB, *TailBB, TII);
       BBI.HasFallThrough = false;
     }
   }
@@ -1576,13 +2014,13 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
   // RemoveExtraEdges won't work if the block has an unanalyzable branch,
   // which can happen here if TailBB is unanalyzable and is merged, so
   // explicitly remove BBI1 and BBI2 as successors.
-  BBI.BB->removeSuccessor(BBI1->BB);
-  BBI.BB->removeSuccessor(BBI2->BB, true);
+  BBI.BB->removeSuccessor(TrueBBI.BB);
+  BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
   RemoveExtraEdges(BBI);
 
   // Update block info.
   BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
-  InvalidatePreds(BBI.BB);
+  InvalidatePreds(*BBI.BB);
 
   // FIXME: Must maintain LiveIns.
   return true;
@@ -1594,8 +2032,7 @@ static bool MaySpeculate(const MachineInstr &MI,
   if (!MI.isSafeToMove(nullptr, SawStore))
     return false;
 
-  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI.getOperand(i);
+  for (const MachineOperand &MO : MI.operands()) {
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
@@ -1608,15 +2045,15 @@ static bool MaySpeculate(const MachineInstr &MI,
   return true;
 }
 
-/// PredicateBlock - Predicate instructions from the start of the block to the
-/// specified end with the specified condition.
+/// Predicate instructions from the start of the block to the specified end with
+/// the specified condition.
 void IfConverter::PredicateBlock(BBInfo &BBI,
                                  MachineBasicBlock::iterator E,
                                  SmallVectorImpl<MachineOperand> &Cond,
                                  SmallSet<unsigned, 4> *LaterRedefs) {
   bool AnyUnpred = false;
   bool MaySpec = LaterRedefs != nullptr;
-  for (MachineInstr &I : llvm::make_range(BBI.BB->begin(), E)) {
+  for (MachineInstr &I : make_range(BBI.BB->begin(), E)) {
     if (I.isDebugValue() || TII->isPredicated(I))
       continue;
     // It may be possible not to predicate an instruction if it's the 'true'
@@ -1651,14 +2088,15 @@ void IfConverter::PredicateBlock(BBInfo &BBI,
     ++NumUnpred;
 }
 
-/// CopyAndPredicateBlock - Copy and predicate instructions from source BB to
-/// the destination block. Skip end of block branches if IgnoreBr is true.
+/// Copy and predicate instructions from source BB to the destination block.
+/// Skip end of block branches if IgnoreBr is true.
 void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
                                         SmallVectorImpl<MachineOperand> &Cond,
                                         bool IgnoreBr) {
   MachineFunction &MF = *ToBBI.BB->getParent();
 
-  for (auto &I : *FromBBI.BB) {
+  MachineBasicBlock &FromMBB = *FromBBI.BB;
+  for (MachineInstr &I : FromMBB) {
     // Do not copy the end of the block branches.
     if (IgnoreBr && I.isBranch())
       break;
@@ -1691,13 +2129,12 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
   }
 
   if (!IgnoreBr) {
-    std::vector<MachineBasicBlock *> Succs(FromBBI.BB->succ_begin(),
-                                           FromBBI.BB->succ_end());
-    MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+    std::vector<MachineBasicBlock *> Succs(FromMBB.succ_begin(),
+                                           FromMBB.succ_end());
+    MachineBasicBlock *NBB = getNextBlock(FromMBB);
     MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
 
-    for (unsigned i = 0, e = Succs.size(); i != e; ++i) {
-      MachineBasicBlock *Succ = Succs[i];
+    for (MachineBasicBlock *Succ : Succs) {
       // Fallthrough edge can't be transferred.
       if (Succ == FallThrough)
         continue;
@@ -1714,25 +2151,25 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
   ++NumDupBBs;
 }
 
-/// MergeBlocks - Move all instructions from FromBB to the end of ToBB.
-/// This will leave FromBB as an empty block, so remove all of its
-/// successor edges except for the fall-through edge.  If AddEdges is true,
-/// i.e., when FromBBI's branch is being moved, add those successor edges to
-/// ToBBI.
+/// Move all instructions from FromBB to the end of ToBB.  This will leave
+/// FromBB as an empty block, so remove all of its successor edges except for
+/// the fall-through edge.  If AddEdges is true, i.e., when FromBBI's branch is
+/// being moved, add those successor edges to ToBBI.
 void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
-  assert(!FromBBI.BB->hasAddressTaken() &&
+  MachineBasicBlock &FromMBB = *FromBBI.BB;
+  assert(!FromMBB.hasAddressTaken() &&
          "Removing a BB whose address is taken!");
 
-  // In case FromBBI.BB contains terminators (e.g. return instruction),
+  // In case FromMBB contains terminators (e.g. return instruction),
   // first move the non-terminator instructions, then the terminators.
-  MachineBasicBlock::iterator FromTI = FromBBI.BB->getFirstTerminator();
+  MachineBasicBlock::iterator FromTI = FromMBB.getFirstTerminator();
   MachineBasicBlock::iterator ToTI = ToBBI.BB->getFirstTerminator();
-  ToBBI.BB->splice(ToTI, FromBBI.BB, FromBBI.BB->begin(), FromTI);
+  ToBBI.BB->splice(ToTI, &FromMBB, FromMBB.begin(), FromTI);
 
   // If FromBB has non-predicated terminator we should copy it at the end.
-  if (FromTI != FromBBI.BB->end() && !TII->isPredicated(*FromTI))
+  if (FromTI != FromMBB.end() && !TII->isPredicated(*FromTI))
     ToTI = ToBBI.BB->end();
-  ToBBI.BB->splice(ToTI, FromBBI.BB, FromTI, FromBBI.BB->end());
+  ToBBI.BB->splice(ToTI, &FromMBB, FromTI, FromMBB.end());
 
   // Force normalizing the successors' probabilities of ToBBI.BB to convert all
   // unknown probabilities into known ones.
@@ -1740,25 +2177,23 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   // eliminate all unknown probabilities in MBB.
   ToBBI.BB->normalizeSuccProbs();
 
-  SmallVector<MachineBasicBlock *, 4> FromSuccs(FromBBI.BB->succ_begin(),
-                                                FromBBI.BB->succ_end());
-  MachineBasicBlock *NBB = getNextBlock(FromBBI.BB);
+  SmallVector<MachineBasicBlock *, 4> FromSuccs(FromMBB.succ_begin(),
+                                                FromMBB.succ_end());
+  MachineBasicBlock *NBB = getNextBlock(FromMBB);
   MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : nullptr;
-  // The edge probability from ToBBI.BB to FromBBI.BB, which is only needed when
-  // AddEdges is true and FromBBI.BB is a successor of ToBBI.BB.
+  // The edge probability from ToBBI.BB to FromMBB, which is only needed when
+  // AddEdges is true and FromMBB is a successor of ToBBI.BB.
   auto To2FromProb = BranchProbability::getZero();
-  if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) {
-    To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, FromBBI.BB);
-    // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the
+  if (AddEdges && ToBBI.BB->isSuccessor(&FromMBB)) {
+    To2FromProb = MBPI->getEdgeProbability(ToBBI.BB, &FromMBB);
+    // Set the edge probability from ToBBI.BB to FromMBB to zero to avoid the
     // edge probability being merged to other edges when this edge is removed
     // later.
-    ToBBI.BB->setSuccProbability(
-        std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB),
-        BranchProbability::getZero());
+    ToBBI.BB->setSuccProbability(find(ToBBI.BB->successors(), &FromMBB),
+                                 BranchProbability::getZero());
   }
 
-  for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) {
-    MachineBasicBlock *Succ = FromSuccs[i];
+  for (MachineBasicBlock *Succ : FromSuccs) {
     // Fallthrough edge can't be transferred.
     if (Succ == FallThrough)
       continue;
@@ -1766,26 +2201,26 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
     auto NewProb = BranchProbability::getZero();
     if (AddEdges) {
       // Calculate the edge probability for the edge from ToBBI.BB to Succ,
-      // which is a portion of the edge probability from FromBBI.BB to Succ. The
-      // portion ratio is the edge probability from ToBBI.BB to FromBBI.BB (if
+      // which is a portion of the edge probability from FromMBB to Succ. The
+      // portion ratio is the edge probability from ToBBI.BB to FromMBB (if
       // FromBBI is a successor of ToBBI.BB. See comment below for excepion).
-      NewProb = MBPI->getEdgeProbability(FromBBI.BB, Succ);
+      NewProb = MBPI->getEdgeProbability(&FromMBB, Succ);
 
-      // To2FromProb is 0 when FromBBI.BB is not a successor of ToBBI.BB. This
-      // only happens when if-converting a diamond CFG and FromBBI.BB is the
-      // tail BB.  In this case FromBBI.BB post-dominates ToBBI.BB and hence we
-      // could just use the probabilities on FromBBI.BB's out-edges when adding
+      // To2FromProb is 0 when FromMBB is not a successor of ToBBI.BB. This
+      // only happens when if-converting a diamond CFG and FromMBB is the
+      // tail BB.  In this case FromMBB post-dominates ToBBI.BB and hence we
+      // could just use the probabilities on FromMBB's out-edges when adding
       // new successors.
       if (!To2FromProb.isZero())
         NewProb *= To2FromProb;
     }
 
-    FromBBI.BB->removeSuccessor(Succ);
+    FromMBB.removeSuccessor(Succ);
 
     if (AddEdges) {
       // If the edge from ToBBI.BB to Succ already exists, update the
       // probability of this edge by adding NewProb to it. An example is shown
-      // below, in which A is ToBBI.BB and B is FromBBI.BB. In this case we
+      // below, in which A is ToBBI.BB and B is FromMBB. In this case we
       // don't have to set C as A's successor as it already is. We only need to
       // update the edge probability on A->C. Note that B will not be
       // immediately removed from A's successors. It is possible that B->D is
@@ -1807,7 +2242,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
       //
       if (ToBBI.BB->isSuccessor(Succ))
         ToBBI.BB->setSuccProbability(
-            std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), Succ),
+            find(ToBBI.BB->successors(), Succ),
             MBPI->getEdgeProbability(ToBBI.BB, Succ) + NewProb);
       else
         ToBBI.BB->addSuccessor(Succ, NewProb);
@@ -1815,8 +2250,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
   }
 
   // Now FromBBI always falls through to the next block!
-  if (NBB && !FromBBI.BB->isSuccessor(NBB))
-    FromBBI.BB->addSuccessor(NBB);
+  if (NBB && !FromMBB.isSuccessor(NBB))
+    FromMBB.addSuccessor(NBB);
 
   // Normalize the probabilities of ToBBI.BB's successors with all adjustment
   // we've done above.
@@ -1839,6 +2274,6 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
 }
 
 FunctionPass *
-llvm::createIfConverter(std::function<bool(const Function &)> Ftor) {
+llvm::createIfConverter(std::function<bool(const MachineFunction &)> Ftor) {
   return new IfConverter(std::move(Ftor));
 }
diff --git a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
index 31d6bd0b6dc6..9588dfb72058 100644
--- a/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/contrib/llvm/lib/CodeGen/ImplicitNullChecks.cpp
@@ -51,6 +51,12 @@ static cl::opt<int> PageSize("imp-null-check-page-size",
                              cl::desc("The page size of the target in bytes"),
                              cl::init(4096));
 
+static cl::opt<unsigned> MaxInstsToConsider(
+    "imp-null-max-insts-to-consider",
+    cl::desc("The max number of instructions to consider hoisting loads over "
+             "(the algorithm is quadratic over this number)"),
+    cl::init(8));
+
 #define DEBUG_TYPE "implicit-null-checks"
 
 STATISTIC(NumImplicitNullChecks,
@@ -59,6 +65,44 @@ STATISTIC(NumImplicitNullChecks,
 namespace {
 
 class ImplicitNullChecks : public MachineFunctionPass {
+  /// Return true if \c computeDependence can process \p MI.
+  static bool canHandle(const MachineInstr *MI);
+
+  /// Helper function for \c computeDependence.  Return true if \p A
+  /// and \p B do not have any dependences between them, and can be
+  /// re-ordered without changing program semantics.
+  bool canReorder(const MachineInstr *A, const MachineInstr *B);
+
+  /// A data type for representing the result computed by \c
+  /// computeDependence.  States whether it is okay to reorder the
+  /// instruction passed to \c computeDependence with at most one
+  /// depednency.
+  struct DependenceResult {
+    /// Can we actually re-order \p MI with \p Insts (see \c
+    /// computeDependence).
+    bool CanReorder;
+
+    /// If non-None, then an instruction in \p Insts that also must be
+    /// hoisted.
+    Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence;
+
+    /*implicit*/ DependenceResult(
+        bool CanReorder,
+        Optional<ArrayRef<MachineInstr *>::iterator> PotentialDependence)
+        : CanReorder(CanReorder), PotentialDependence(PotentialDependence) {
+      assert((!PotentialDependence || CanReorder) &&
+             "!CanReorder && PotentialDependence.hasValue() not allowed!");
+    }
+  };
+
+  /// Compute a result for the following question: can \p MI be
+  /// re-ordered from after \p Insts to before it.
+  ///
+  /// \c canHandle should return true for all instructions in \p
+  /// Insts.
+  DependenceResult computeDependence(const MachineInstr *MI,
+                                     ArrayRef<MachineInstr *> Insts);
+
   /// Represents one null check that can be made implicit.
   class NullCheck {
     // The memory operation the null check can be folded into.
@@ -114,6 +158,19 @@ class ImplicitNullChecks : public MachineFunctionPass {
                                    MachineBasicBlock *HandlerMBB);
   void rewriteNullChecks(ArrayRef<NullCheck> NullCheckList);
 
+  /// Is \p MI a memory operation that can be used to implicitly null check the
+  /// value in \p PointerReg?  \p PrevInsts is the set of instruction seen since
+  /// the explicit null check on \p PointerReg.
+  bool isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+                          ArrayRef<MachineInstr *> PrevInsts);
+
+  /// Return true if \p FaultingMI can be hoisted from after the the
+  /// instructions in \p InstsSeenSoFar to before them.  Set \p Dependence to a
+  /// non-null value if we also need to (and legally can) hoist a depedency.
+  bool canHoistLoadInst(MachineInstr *FaultingMI, unsigned PointerReg,
+                        ArrayRef<MachineInstr *> InstsSeenSoFar,
+                        MachineBasicBlock *NullSucc, MachineInstr *&Dependence);
+
 public:
   static char ID;
 
@@ -129,160 +186,70 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 
-/// \brief Detect re-ordering hazards and dependencies.
-///
-/// This class keeps track of defs and uses, and can be queried if a given
-/// machine instruction can be re-ordered from after the machine instructions
-/// seen so far to before them.
-class HazardDetector {
-  static MachineInstr *getUnknownMI() {
-    return DenseMapInfo<MachineInstr *>::getTombstoneKey();
-  }
-
-  // Maps physical registers to the instruction defining them.  If there has
-  // been more than one def of an specific register, that register is mapped to
-  // getUnknownMI().
-  DenseMap<unsigned, MachineInstr *> RegDefs;
-  DenseSet<unsigned> RegUses;
-  const TargetRegisterInfo &TRI;
-  bool hasSeenClobber;
-  AliasAnalysis &AA;
-
-public:
-  explicit HazardDetector(const TargetRegisterInfo &TRI, AliasAnalysis &AA)
-      : TRI(TRI), hasSeenClobber(false), AA(AA) {}
+}
 
-  /// \brief Make a note of \p MI for later queries to isSafeToHoist.
-  ///
-  /// May clobber this HazardDetector instance.  \see isClobbered.
-  void rememberInstruction(MachineInstr *MI);
+bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
+  if (MI->isCall() || MI->mayStore() || MI->hasUnmodeledSideEffects())
+    return false;
+  auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
+  (void)IsRegMask;
 
-  /// \brief Return true if it is safe to hoist \p MI from after all the
-  /// instructions seen so far (via rememberInstruction) to before it.  If \p MI
-  /// has one and only one transitive dependency, set \p Dependency to that
-  /// instruction.  If there are more dependencies, return false.
-  bool isSafeToHoist(MachineInstr *MI, MachineInstr *&Dependency);
+  assert(!llvm::any_of(MI->operands(), IsRegMask) &&
+         "Calls were filtered out above!");
 
-  /// \brief Return true if this instance of HazardDetector has been clobbered
-  /// (i.e. has no more useful information).
-  ///
-  /// A HazardDetecter is clobbered when it sees a construct it cannot
-  /// understand, and it would have to return a conservative answer for all
-  /// future queries.  Having a separate clobbered state lets the client code
-  /// bail early, without making queries about all of the future instructions
-  /// (which would have returned the most conservative answer anyway).
-  ///
-  /// Calling rememberInstruction or isSafeToHoist on a clobbered HazardDetector
-  /// is an error.
-  bool isClobbered() { return hasSeenClobber; }
-};
+  auto IsUnordered = [](MachineMemOperand *MMO) { return MMO->isUnordered(); };
+  return llvm::all_of(MI->memoperands(), IsUnordered);
 }
 
+ImplicitNullChecks::DependenceResult
+ImplicitNullChecks::computeDependence(const MachineInstr *MI,
+                                      ArrayRef<MachineInstr *> Block) {
+  assert(llvm::all_of(Block, canHandle) && "Check this first!");
+  assert(!llvm::is_contained(Block, MI) && "Block must be exclusive of MI!");
 
-void HazardDetector::rememberInstruction(MachineInstr *MI) {
-  assert(!isClobbered() &&
-         "Don't add instructions to a clobbered hazard detector");
+  Optional<ArrayRef<MachineInstr *>::iterator> Dep;
 
-  if (MI->mayStore() || MI->hasUnmodeledSideEffects()) {
-    hasSeenClobber = true;
-    return;
-  }
+  for (auto I = Block.begin(), E = Block.end(); I != E; ++I) {
+    if (canReorder(*I, MI))
+      continue;
 
-  for (auto *MMO : MI->memoperands()) {
-    // Right now we don't want to worry about LLVM's memory model.
-    if (!MMO->isUnordered()) {
-      hasSeenClobber = true;
-      return;
+    if (Dep == None) {
+      // Found one possible dependency, keep track of it.
+      Dep = I;
+    } else {
+      // We found two dependencies, so bail out.
+      return {false, None};
     }
   }
 
-  for (auto &MO : MI->operands()) {
-    if (!MO.isReg() || !MO.getReg())
-      continue;
-
-    if (MO.isDef()) {
-      auto It = RegDefs.find(MO.getReg());
-      if (It == RegDefs.end())
-        RegDefs.insert({MO.getReg(), MI});
-      else {
-        assert(It->second && "Found null MI?");
-        It->second = getUnknownMI();
-      }
-    } else
-      RegUses.insert(MO.getReg());
-  }
+  return {true, Dep};
 }
 
-bool HazardDetector::isSafeToHoist(MachineInstr *MI,
-                                   MachineInstr *&Dependency) {
-  assert(!isClobbered() && "isSafeToHoist cannot do anything useful!");
-  Dependency = nullptr;
+bool ImplicitNullChecks::canReorder(const MachineInstr *A,
+                                    const MachineInstr *B) {
+  assert(canHandle(A) && canHandle(B) && "Precondition!");
 
-  // Right now we don't want to worry about LLVM's memory model.  This can be
-  // made more precise later.
-  for (auto *MMO : MI->memoperands())
-    if (!MMO->isUnordered())
-      return false;
+  // canHandle makes sure that we _can_ correctly analyze the dependencies
+  // between A and B here -- for instance, we should not be dealing with heap
+  // load-store dependencies here.
 
-  for (auto &MO : MI->operands()) {
-    if (MO.isReg() && MO.getReg()) {
-      for (auto &RegDef : RegDefs) {
-        unsigned Reg = RegDef.first;
-        MachineInstr *MI = RegDef.second;
-        if (!TRI.regsOverlap(Reg, MO.getReg()))
-          continue;
+  for (auto MOA : A->operands()) {
+    if (!(MOA.isReg() && MOA.getReg()))
+      continue;
 
-        // We found a write-after-write or read-after-write, see if the
-        // instruction causing this dependency can be hoisted too.
-
-        if (MI == getUnknownMI())
-          // We don't have precise dependency information.
-          return false;
-
-        if (Dependency) {
-          if (Dependency == MI)
-            continue;
-          // We already have one dependency, and we can track only one.
-          return false;
-        }
-
-        // Now check if MI is actually a dependency that can be hoisted.
-
-        // We don't want to track transitive dependencies.  We already know that
-        // MI is the only instruction that defines Reg, but we need to be sure
-        // that it does not use any registers that have been defined (trivially
-        // checked below by ensuring that there are no register uses), and that
-        // it is the only def for every register it defines (otherwise we could
-        // violate a write after write hazard).
-        auto IsMIOperandSafe = [&](MachineOperand &MO) {
-          if (!MO.isReg() || !MO.getReg())
-            return true;
-          if (MO.isUse())
-            return false;
-          assert((!MO.isDef() || RegDefs.count(MO.getReg())) &&
-                 "All defs must be tracked in RegDefs by now!");
-          return !MO.isDef() || RegDefs.find(MO.getReg())->second == MI;
-        };
-
-        if (!all_of(MI->operands(), IsMIOperandSafe))
-          return false;
-
-        // Now check for speculation safety:
-        bool SawStore = true;
-        if (!MI->isSafeToMove(&AA, SawStore) || MI->mayLoad())
-          return false;
-
-        Dependency = MI;
-      }
+    unsigned RegA = MOA.getReg();
+    for (auto MOB : B->operands()) {
+      if (!(MOB.isReg() && MOB.getReg()))
+        continue;
 
-      if (MO.isDef())
-        for (unsigned Reg : RegUses)
-          if (TRI.regsOverlap(Reg, MO.getReg()))
-            return false;  // We found a write-after-read
+      unsigned RegB = MOB.getReg();
+
+      if (TRI->regsOverlap(RegA, RegB))
+        return false;
     }
   }
 
@@ -316,6 +283,96 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
   return false;
 }
 
+bool ImplicitNullChecks::isSuitableMemoryOp(
+    MachineInstr &MI, unsigned PointerReg, ArrayRef<MachineInstr *> PrevInsts) {
+  int64_t Offset;
+  unsigned BaseReg;
+
+  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI) ||
+      BaseReg != PointerReg)
+    return false;
+
+  // We want the load to be issued at a sane offset from PointerReg, so that
+  // if PointerReg is null then the load reliably page faults.
+  if (!(MI.mayLoad() && !MI.isPredicable() && Offset < PageSize))
+    return false;
+
+  // Finally, we need to make sure that the load instruction actually is
+  // loading from PointerReg, and there isn't some re-definition of PointerReg
+  // between the compare and the load.
+  for (auto *PrevMI : PrevInsts)
+    for (auto &PrevMO : PrevMI->operands())
+      if (PrevMO.isReg() && PrevMO.getReg() &&
+          TRI->regsOverlap(PrevMO.getReg(), PointerReg))
+        return false;
+
+  return true;
+}
+
+bool ImplicitNullChecks::canHoistLoadInst(
+    MachineInstr *FaultingMI, unsigned PointerReg,
+    ArrayRef<MachineInstr *> InstsSeenSoFar, MachineBasicBlock *NullSucc,
+    MachineInstr *&Dependence) {
+  auto DepResult = computeDependence(FaultingMI, InstsSeenSoFar);
+  if (!DepResult.CanReorder)
+    return false;
+
+  if (!DepResult.PotentialDependence) {
+    Dependence = nullptr;
+    return true;
+  }
+
+  auto DependenceItr = *DepResult.PotentialDependence;
+  auto *DependenceMI = *DependenceItr;
+
+  // We don't want to reason about speculating loads.  Note -- at this point
+  // we should have already filtered out all of the other non-speculatable
+  // things, like calls and stores.
+  assert(canHandle(DependenceMI) && "Should never have reached here!");
+  if (DependenceMI->mayLoad())
+    return false;
+
+  for (auto &DependenceMO : DependenceMI->operands()) {
+    if (!(DependenceMO.isReg() && DependenceMO.getReg()))
+      continue;
+
+    // Make sure that we won't clobber any live ins to the sibling block by
+    // hoisting Dependency.  For instance, we can't hoist INST to before the
+    // null check (even if it safe, and does not violate any dependencies in
+    // the non_null_block) if %rdx is live in to _null_block.
+    //
+    //    test %rcx, %rcx
+    //    je _null_block
+    //  _non_null_block:
+    //    %rdx<def> = INST
+    //    ...
+    //
+    // This restriction does not apply to the faulting load inst because in
+    // case the pointer loaded from is in the null page, the load will not
+    // semantically execute, and affect machine state.  That is, if the load
+    // was loading into %rax and it faults, the value of %rax should stay the
+    // same as it would have been had the load not have executed and we'd have
+    // branched to NullSucc directly.
+    if (AnyAliasLiveIn(TRI, NullSucc, DependenceMO.getReg()))
+      return false;
+
+    // The Dependency can't be re-defining the base register -- then we won't
+    // get the memory operation on the address we want.  This is already
+    // checked in \c IsSuitableMemoryOp.
+    assert(!TRI->regsOverlap(DependenceMO.getReg(), PointerReg) &&
+           "Should have been checked before!");
+  }
+
+  auto DepDepResult =
+      computeDependence(DependenceMI, {InstsSeenSoFar.begin(), DependenceItr});
+
+  if (!DepDepResult.CanReorder || DepDepResult.PotentialDependence)
+    return false;
+
+  Dependence = DependenceMI;
+  return true;
+}
+
 /// Analyze MBB to check if its terminating branch can be turned into an
 /// implicit null check.  If yes, append a description of the said null check to
 /// NullCheckList and return true, else return false.
@@ -415,63 +472,24 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
   // ptr could be some non-null invalid reference that never gets loaded from
   // because some_cond is always true.
 
-  unsigned PointerReg = MBP.LHS.getReg();
-
-  HazardDetector HD(*TRI, *AA);
-
-  for (auto MII = NotNullSucc->begin(), MIE = NotNullSucc->end(); MII != MIE;
-       ++MII) {
-    MachineInstr &MI = *MII;
-    unsigned BaseReg;
-    int64_t Offset;
-    MachineInstr *Dependency = nullptr;
-    if (TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
-      if (MI.mayLoad() && !MI.isPredicable() && BaseReg == PointerReg &&
-          Offset < PageSize && MI.getDesc().getNumDefs() <= 1 &&
-          HD.isSafeToHoist(&MI, Dependency)) {
-
-        auto DependencyOperandIsOk = [&](MachineOperand &MO) {
-          assert(!(MO.isReg() && MO.isUse()) &&
-                 "No transitive dependendencies please!");
-          if (!MO.isReg() || !MO.getReg() || !MO.isDef())
-            return true;
-
-          // Make sure that we won't clobber any live ins to the sibling block
-          // by hoisting Dependency.  For instance, we can't hoist INST to
-          // before the null check (even if it safe, and does not violate any
-          // dependencies in the non_null_block) if %rdx is live in to
-          // _null_block.
-          //
-          //    test %rcx, %rcx
-          //    je _null_block
-          //  _non_null_block:
-          //    %rdx<def> = INST
-          //    ...
-          if (AnyAliasLiveIn(TRI, NullSucc, MO.getReg()))
-            return false;
-
-          // Make sure Dependency isn't re-defining the base register.  Then we
-          // won't get the memory operation on the address we want.
-          if (TRI->regsOverlap(MO.getReg(), BaseReg))
-            return false;
-
-          return true;
-        };
-
-        bool DependencyOperandsAreOk =
-            !Dependency ||
-            all_of(Dependency->operands(), DependencyOperandIsOk);
-
-        if (DependencyOperandsAreOk) {
-          NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
-                                     NullSucc, Dependency);
-          return true;
-        }
-      }
+  const unsigned PointerReg = MBP.LHS.getReg();
 
-    HD.rememberInstruction(&MI);
-    if (HD.isClobbered())
+  SmallVector<MachineInstr *, 8> InstsSeenSoFar;
+
+  for (auto &MI : *NotNullSucc) {
+    if (!canHandle(&MI) || InstsSeenSoFar.size() >= MaxInstsToConsider)
       return false;
+
+    MachineInstr *Dependence;
+    if (isSuitableMemoryOp(MI, PointerReg, InstsSeenSoFar) &&
+        canHoistLoadInst(&MI, PointerReg, InstsSeenSoFar, NullSucc,
+                         Dependence)) {
+      NullCheckList.emplace_back(&MI, MBP.ConditionDef, &MBB, NotNullSucc,
+                                 NullSucc, Dependence);
+      return true;
+    }
+
+    InstsSeenSoFar.push_back(&MI);
   }
 
   return false;
@@ -518,7 +536,7 @@ void ImplicitNullChecks::rewriteNullChecks(
 
   for (auto &NC : NullCheckList) {
     // Remove the conditional branch dependent on the null check.
-    unsigned BranchesRemoved = TII->RemoveBranch(*NC.getCheckBlock());
+    unsigned BranchesRemoved = TII->removeBranch(*NC.getCheckBlock());
     (void)BranchesRemoved;
     assert(BranchesRemoved > 0 && "expected at least one branch!");
 
@@ -560,13 +578,14 @@ void ImplicitNullChecks::rewriteNullChecks(
     NC.getCheckOperation()->eraseFromParent();
 
     // Insert an *unconditional* branch to not-null successor.
-    TII->InsertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
+    TII->insertBranch(*NC.getCheckBlock(), NC.getNotNullSucc(), nullptr,
                       /*Cond=*/None, DL);
 
     NumImplicitNullChecks++;
   }
 }
 
+
 char ImplicitNullChecks::ID = 0;
 char &llvm::ImplicitNullChecksID = ImplicitNullChecks::ID;
 INITIALIZE_PASS_BEGIN(ImplicitNullChecks, "implicit-null-checks",
diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
index 197db777dd2c..422f2dc2f2fb 100644
--- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
+++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp
@@ -114,7 +114,7 @@ public:
         AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+        MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
         TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -172,7 +172,7 @@ public:
         AA(&pass.getAnalysis<AAResultsWrapperPass>().getAAResults()),
         MDT(pass.getAnalysis<MachineDominatorTree>()),
         Loops(pass.getAnalysis<MachineLoopInfo>()), VRM(vrm),
-        MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()),
+        MFI(mf.getFrameInfo()), MRI(mf.getRegInfo()),
         TII(*mf.getSubtarget().getInstrInfo()),
         TRI(*mf.getSubtarget().getRegisterInfo()),
         MBFI(pass.getAnalysis<MachineBlockFrequencyInfo>()),
@@ -185,10 +185,7 @@ private:
   bool isSnippet(const LiveInterval &SnipLI);
   void collectRegsToSpill();
 
-  bool isRegToSpill(unsigned Reg) {
-    return std::find(RegsToSpill.begin(),
-                     RegsToSpill.end(), Reg) != RegsToSpill.end();
-  }
+  bool isRegToSpill(unsigned Reg) { return is_contained(RegsToSpill, Reg); }
 
   bool isSibling(unsigned Reg);
   bool hoistSpillInsideBB(LiveInterval &SpillLI, MachineInstr &CopyMI);
@@ -380,7 +377,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
   MachineBasicBlock *MBB = LIS.getMBBFromIndex(SrcVNI->def);
   MachineBasicBlock::iterator MII;
   if (SrcVNI->isPHIDef())
-    MII = MBB->SkipPHIsAndLabels(MBB->begin());
+    MII = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
   else {
     MachineInstr *DefMI = LIS.getInstructionFromIndex(SrcVNI->def);
     assert(DefMI && "Defining instruction disappeared");
@@ -553,12 +550,18 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
     return true;
   }
 
-  // Alocate a new register for the remat.
+  // Allocate a new register for the remat.
   unsigned NewVReg = Edit->createFrom(Original);
 
   // Finally we can rematerialize OrigMI before MI.
   SlotIndex DefIdx =
       Edit->rematerializeAt(*MI.getParent(), MI, NewVReg, RM, TRI);
+
+  // We take the DebugLoc from MI, since OrigMI may be attributed to a
+  // different source location. 
+  auto *NewMI = LIS.getInstructionFromIndex(DefIdx);
+  NewMI->setDebugLoc(MI.getDebugLoc());
+
   (void)DefIdx;
   DEBUG(dbgs() << "\tremat:  " << DefIdx << '\t'
                << *LIS.getInstructionFromIndex(DefIdx));
@@ -736,9 +739,12 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
   bool WasCopy = MI->isCopy();
   unsigned ImpReg = 0;
 
-  bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::STATEPOINT ||
-                       MI->getOpcode() == TargetOpcode::PATCHPOINT ||
-                       MI->getOpcode() == TargetOpcode::STACKMAP);
+  // Spill subregs if the target allows it.
+  // We always want to spill subregs for stackmap/patchpoint pseudos.
+  bool SpillSubRegs = TII.isSubregFoldable() ||
+                      MI->getOpcode() == TargetOpcode::STATEPOINT ||
+                      MI->getOpcode() == TargetOpcode::PATCHPOINT ||
+                      MI->getOpcode() == TargetOpcode::STACKMAP;
 
   // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
   // operands.
@@ -751,7 +757,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       ImpReg = MO.getReg();
       continue;
     }
-    // FIXME: Teach targets to deal with subregs.
+
     if (!SpillSubRegs && MO.getSubReg())
       return false;
     // We cannot fold a load instruction into a def.
@@ -762,6 +768,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr*, unsigned> > Ops,
       FoldOps.push_back(Idx);
   }
 
+  // If we only have implicit uses, we won't be able to fold that.
+  // Moreover, TargetInstrInfo::foldMemoryOperand will assert if we try!
+  if (FoldOps.empty())
+    return false;
+
   MachineInstrSpan MIS(MI);
 
   MachineInstr *FoldMI =
diff --git a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
index 3f1111976852..c8f79d7fb71c 100644
--- a/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/contrib/llvm/lib/CodeGen/InterleavedAccessPass.cpp
@@ -29,6 +29,9 @@
 // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
 // intrinsic in ARM backend.
 //
+// In X86, this can be further optimized into a set of target
+// specific loads followed by an optimized sequence of shuffles.
+//
 // E.g. An interleaved store (Factor = 3):
 //        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
 //                                    <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
@@ -37,6 +40,8 @@
 // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
 // intrinsic in ARM backend.
 //
+// Similarly, a set of interleaved stores can be transformed into an optimized
+// sequence of shuffles followed by a set of target specific stores for X86.
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/Passes.h"
@@ -57,8 +62,6 @@ static cl::opt<bool> LowerInterleavedAccesses(
     cl::desc("Enable lowering interleaved accesses to intrinsics"),
     cl::init(true), cl::Hidden);
 
-static unsigned MaxFactor; // The maximum supported interleave factor.
-
 namespace {
 
 class InterleavedAccess : public FunctionPass {
@@ -70,7 +73,7 @@ public:
     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override { return "Interleaved Access Pass"; }
+  StringRef getPassName() const override { return "Interleaved Access Pass"; }
 
   bool runOnFunction(Function &F) override;
 
@@ -84,6 +87,9 @@ private:
   const TargetMachine *TM;
   const TargetLowering *TLI;
 
+  /// The maximum supported interleave factor.
+  unsigned MaxFactor;
+
   /// \brief Transform an interleaved load into target specific intrinsics.
   bool lowerInterleavedLoad(LoadInst *LI,
                             SmallVector<Instruction *, 32> &DeadInsts);
@@ -144,7 +150,7 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
 ///     <0, 2, 4, 6>    (mask of index 0 to extract even elements)
 ///     <1, 3, 5, 7>    (mask of index 1 to extract odd elements)
 static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
-                               unsigned &Index) {
+                               unsigned &Index, unsigned MaxFactor) {
   if (Mask.size() < 2)
     return false;
 
@@ -156,13 +162,19 @@ static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
   return false;
 }
 
-/// \brief Check if the mask is RE-interleave mask for an interleaved store.
-///
-/// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
+/// \brief Check if the mask can be used in an interleaved store.
+//
+/// It checks for a more general pattern than the RE-interleave mask.
+/// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
+/// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
 ///
-/// E.g. The RE-interleave mask (Factor = 2) could be:
-///     <0, 4, 1, 5, 2, 6, 3, 7>
-static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
+/// The particular case of an RE-interleave mask is:
+/// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
+/// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
+static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
+                               unsigned MaxFactor) {
   unsigned NumElts = Mask.size();
   if (NumElts < 4)
     return false;
@@ -172,21 +184,72 @@ static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
     if (NumElts % Factor)
       continue;
 
-    unsigned NumSubElts = NumElts / Factor;
-    if (!isPowerOf2_32(NumSubElts))
+    unsigned LaneLen = NumElts / Factor;
+    if (!isPowerOf2_32(LaneLen))
       continue;
 
-    // Check whether each element matchs the RE-interleaved rule. Ignore undef
-    // elements.
-    unsigned i = 0;
-    for (; i < NumElts; i++)
-      if (Mask[i] >= 0 &&
-          static_cast<unsigned>(Mask[i]) !=
-              (i % Factor) * NumSubElts + i / Factor)
+    // Check whether each element matches the general interleaved rule.
+    // Ignore undef elements, as long as the defined elements match the rule.
+    // Outer loop processes all factors (x, y, z in the above example)
+    unsigned I = 0, J;
+    for (; I < Factor; I++) {
+      unsigned SavedLaneValue;
+      unsigned SavedNoUndefs = 0;
+
+      // Inner loop processes consecutive accesses (x, x+1... in the example)
+      for (J = 0; J < LaneLen - 1; J++) {
+        // Lane computes x's position in the Mask
+        unsigned Lane = J * Factor + I;
+        unsigned NextLane = Lane + Factor;
+        int LaneValue = Mask[Lane];
+        int NextLaneValue = Mask[NextLane];
+
+        // If both are defined, values must be sequential
+        if (LaneValue >= 0 && NextLaneValue >= 0 &&
+            LaneValue + 1 != NextLaneValue)
+          break;
+
+        // If the next value is undef, save the current one as reference
+        if (LaneValue >= 0 && NextLaneValue < 0) {
+          SavedLaneValue = LaneValue;
+          SavedNoUndefs = 1;
+        }
+
+        // Undefs are allowed, but defined elements must still be consecutive:
+        // i.e.: x,..., undef,..., x + 2,..., undef,..., undef,..., x + 5, ....
+        // Verify this by storing the last non-undef followed by an undef
+        // Check that following non-undef masks are incremented with the
+        // corresponding distance.
+        if (SavedNoUndefs > 0 && LaneValue < 0) {
+          SavedNoUndefs++;
+          if (NextLaneValue >= 0 &&
+              SavedLaneValue + SavedNoUndefs != (unsigned)NextLaneValue)
+            break;
+        }
+      }
+
+      if (J < LaneLen - 1)
+        break;
+
+      int StartMask = 0;
+      if (Mask[I] >= 0) {
+        // Check that the start of the I range (J=0) is greater than 0
+        StartMask = Mask[I];
+      } else if (Mask[(LaneLen - 1) * Factor + I] >= 0) {
+        // StartMask defined by the last value in lane
+        StartMask = Mask[(LaneLen - 1) * Factor + I] - J;
+      } else if (SavedNoUndefs > 0) {
+        // StartMask defined by some non-zero value in the j loop
+        StartMask = SavedLaneValue - (LaneLen - 1 - SavedNoUndefs);
+      }
+      // else StartMask remains set to 0, i.e. all elements are undefs
+
+      if (StartMask < 0)
         break;
+    }
 
-    // Find a RE-interleaved mask of current factor.
-    if (i == NumElts)
+    // Found an interleaved mask of current factor.
+    if (I == Factor)
       return true;
   }
 
@@ -224,7 +287,8 @@ bool InterleavedAccess::lowerInterleavedLoad(
   unsigned Factor, Index;
 
   // Check if the first shufflevector is DE-interleave shuffle.
-  if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index))
+  if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
+                          MaxFactor))
     return false;
 
   // Holds the corresponding index for each DE-interleave shuffle.
@@ -342,7 +406,7 @@ bool InterleavedAccess::lowerInterleavedStore(
 
   // Check if the shufflevector is RE-interleave shuffle.
   unsigned Factor;
-  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor))
+  if (!isReInterleaveMask(SVI->getShuffleMask(), Factor, MaxFactor))
     return false;
 
   DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
index 2962f8701625..afd24067ace7 100644
--- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp
@@ -436,8 +436,14 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
     errs() << "WARNING: this target does not support the llvm."
            << (Callee->getIntrinsicID() == Intrinsic::returnaddress ?
              "return" : "frame") << "address intrinsic.\n";
-    CI->replaceAllUsesWith(ConstantPointerNull::get(
-                                            cast<PointerType>(CI->getType())));
+    CI->replaceAllUsesWith(
+        ConstantPointerNull::get(cast<PointerType>(CI->getType())));
+    break;
+  case Intrinsic::addressofreturnaddress:
+    errs() << "WARNING: this target does not support the "
+              "llvm.addressofreturnaddress intrinsic.\n";
+    CI->replaceAllUsesWith(
+        ConstantPointerNull::get(cast<PointerType>(CI->getType())));
     break;
 
   case Intrinsic::prefetch:
diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
index 9eb43d2bec10..26794e28020e 100644
--- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp
@@ -15,7 +15,6 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
 #include "llvm/CodeGen/BasicTTIImpl.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -102,25 +101,12 @@ TargetIRAnalysis LLVMTargetMachine::getTargetIRAnalysis() {
   });
 }
 
-MachineModuleInfo &
-LLVMTargetMachine::addMachineModuleInfo(PassManagerBase &PM) const {
-  MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(),
-                                                 *getMCRegisterInfo(),
-                                                 getObjFileLowering());
-  PM.add(MMI);
-  return *MMI;
-}
-
-void LLVMTargetMachine::addMachineFunctionAnalysis(PassManagerBase &PM,
-    MachineFunctionInitializer *MFInitializer) const {
-  PM.add(new MachineFunctionAnalysis(*this, MFInitializer));
-}
-
 /// addPassesToX helper drives creation and initialization of TargetPassConfig.
 static MCContext *
 addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
                         bool DisableVerify, AnalysisID StartBefore,
-                        AnalysisID StartAfter, AnalysisID StopAfter,
+                        AnalysisID StartAfter, AnalysisID StopBefore,
+                        AnalysisID StopAfter,
                         MachineFunctionInitializer *MFInitializer = nullptr) {
 
   // When in emulated TLS mode, add the LowerEmuTLS pass.
@@ -135,7 +121,8 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
   // Targets may override createPassConfig to provide a target-specific
   // subclass.
   TargetPassConfig *PassConfig = TM->createPassConfig(PM);
-  PassConfig->setStartStopPasses(StartBefore, StartAfter, StopAfter);
+  PassConfig->setStartStopPasses(StartBefore, StartAfter, StopBefore,
+                                 StopAfter);
 
   // Set PassConfig options provided by TargetMachine.
   PassConfig->setDisableVerify(DisableVerify);
@@ -150,8 +137,9 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
 
   PassConfig->addISelPrepare();
 
-  MachineModuleInfo &MMI = TM->addMachineModuleInfo(PM);
-  TM->addMachineFunctionAnalysis(PM, MFInitializer);
+  MachineModuleInfo *MMI = new MachineModuleInfo(TM);
+  MMI->setMachineFunctionInitializer(MFInitializer);
+  PM.add(MMI);
 
   // Enable FastISel with -fast, but allow that to be overridden.
   TM->setO0WantsFastISel(EnableFastISelOption != cl::BOU_FALSE);
@@ -165,6 +153,11 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
     if (PassConfig->addIRTranslator())
       return nullptr;
 
+    PassConfig->addPreLegalizeMachineIR();
+
+    if (PassConfig->addLegalizeMachineIR())
+      return nullptr;
+
     // Before running the register bank selector, ask the target if it
     // wants to run some passes.
     PassConfig->addPreRegBankSelect();
@@ -172,6 +165,21 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
     if (PassConfig->addRegBankSelect())
       return nullptr;
 
+    PassConfig->addPreGlobalInstructionSelect();
+
+    if (PassConfig->addGlobalInstructionSelect())
+      return nullptr;
+
+    // Pass to reset the MachineFunction if the ISel failed.
+    PM.add(createResetMachineFunctionPass(
+        PassConfig->reportDiagnosticWhenGlobalISelFallback()));
+
+    // Provide a fallback path when we do not want to abort on
+    // not-yet-supported input.
+    if (LLVM_UNLIKELY(!PassConfig->isGlobalISelAbortEnabled()) &&
+        PassConfig->addInstSelector())
+      return nullptr;
+
   } else if (PassConfig->addInstSelector())
     return nullptr;
 
@@ -179,21 +187,22 @@ addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM,
 
   PassConfig->setInitialized();
 
-  return &MMI.getContext();
+  return &MMI->getContext();
 }
 
 bool LLVMTargetMachine::addPassesToEmitFile(
     PassManagerBase &PM, raw_pwrite_stream &Out, CodeGenFileType FileType,
     bool DisableVerify, AnalysisID StartBefore, AnalysisID StartAfter,
-    AnalysisID StopAfter, MachineFunctionInitializer *MFInitializer) {
+    AnalysisID StopBefore, AnalysisID StopAfter,
+    MachineFunctionInitializer *MFInitializer) {
   // Add common CodeGen passes.
   MCContext *Context =
       addPassesToGenerateCode(this, PM, DisableVerify, StartBefore, StartAfter,
-                              StopAfter, MFInitializer);
+                              StopBefore, StopAfter, MFInitializer);
   if (!Context)
     return true;
 
-  if (StopAfter) {
+  if (StopBefore || StopAfter) {
     PM.add(createPrintMIRPass(Out));
     return false;
   }
@@ -219,7 +228,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(
       MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
 
     MCAsmBackend *MAB =
-        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+                                       Options.MCOptions);
     auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
     MCStreamer *S = getTarget().createAsmStreamer(
         *Context, std::move(FOut), Options.MCOptions.AsmVerbose,
@@ -233,7 +243,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     // emission fails.
     MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, *Context);
     MCAsmBackend *MAB =
-        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+        getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+                                       Options.MCOptions);
     if (!MCE || !MAB)
       return true;
 
@@ -261,6 +272,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(
     return true;
 
   PM.add(Printer);
+  PM.add(createFreeMachineFunctionPass());
 
   return false;
 }
@@ -275,7 +287,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
                                           bool DisableVerify) {
   // Add common CodeGen passes.
   Ctx = addPassesToGenerateCode(this, PM, DisableVerify, nullptr, nullptr,
-                                nullptr);
+                                nullptr, nullptr);
   if (!Ctx)
     return true;
 
@@ -288,7 +300,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
   MCCodeEmitter *MCE =
       getTarget().createMCCodeEmitter(*getMCInstrInfo(), MRI, *Ctx);
   MCAsmBackend *MAB =
-      getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU);
+      getTarget().createMCAsmBackend(MRI, getTargetTriple().str(), TargetCPU,
+                                     Options.MCOptions);
   if (!MCE || !MAB)
     return true;
 
@@ -306,6 +319,7 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
     return true;
 
   PM.add(Printer);
+  PM.add(createFreeMachineFunctionPass());
 
   return false; // success!
 }
diff --git a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
index 43218492ed1c..86ef898932a7 100644
--- a/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -133,7 +133,7 @@ SUnit *LatencyPriorityQueue::pop() {
 
 void LatencyPriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
-  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  std::vector<SUnit *>::iterator I = find(Queue, SU);
   if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
   Queue.pop_back();
diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
index b810176e6a18..834ed5f06c94 100644
--- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
+++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp
@@ -222,17 +222,13 @@ void LexicalScopes::constructScopeNest(LexicalScope *Scope) {
     LexicalScope *WS = WorkStack.back();
     const SmallVectorImpl<LexicalScope *> &Children = WS->getChildren();
     bool visitedChildren = false;
-    for (SmallVectorImpl<LexicalScope *>::const_iterator SI = Children.begin(),
-                                                         SE = Children.end();
-         SI != SE; ++SI) {
-      LexicalScope *ChildScope = *SI;
+    for (auto &ChildScope : Children)
       if (!ChildScope->getDFSOut()) {
         WorkStack.push_back(ChildScope);
         visitedChildren = true;
         ChildScope->setDFSIn(++Counter);
         break;
       }
-    }
     if (!visitedChildren) {
       WorkStack.pop_back();
       WS->setDFSOut(++Counter);
@@ -247,10 +243,7 @@ void LexicalScopes::assignInstructionRanges(
     DenseMap<const MachineInstr *, LexicalScope *> &MI2ScopeMap) {
 
   LexicalScope *PrevLexicalScope = nullptr;
-  for (SmallVectorImpl<InsnRange>::const_iterator RI = MIRanges.begin(),
-                                                  RE = MIRanges.end();
-       RI != RE; ++RI) {
-    const InsnRange &R = *RI;
+  for (const auto &R : MIRanges) {
     LexicalScope *S = MI2ScopeMap.lookup(R.first);
     assert(S && "Lost LexicalScope for a machine instruction!");
     if (PrevLexicalScope && !PrevLexicalScope->dominates(S))
@@ -281,12 +274,8 @@ void LexicalScopes::getMachineBasicBlocks(
   }
 
   SmallVectorImpl<InsnRange> &InsnRanges = Scope->getRanges();
-  for (SmallVectorImpl<InsnRange>::iterator I = InsnRanges.begin(),
-                                            E = InsnRanges.end();
-       I != E; ++I) {
-    InsnRange &R = *I;
+  for (auto &R : InsnRanges)
     MBBs.insert(R.first->getParent());
-  }
 }
 
 /// dominates - Return true if DebugLoc's lexical scope dominates at least one
@@ -301,9 +290,8 @@ bool LexicalScopes::dominates(const DILocation *DL, MachineBasicBlock *MBB) {
     return true;
 
   bool Result = false;
-  for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
-       ++I) {
-    if (const DILocation *IDL = I->getDebugLoc())
+  for (auto &I : *MBB) {
+    if (const DILocation *IDL = I.getDebugLoc())
       if (LexicalScope *IScope = getOrCreateLexicalScope(IDL))
         if (Scope->dominates(IScope))
           return true;
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
index 4ff88d528108..c945376560f7 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/UniqueVector.h"
+#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -60,6 +61,26 @@ class LiveDebugValues : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
   const TargetInstrInfo *TII;
+  LexicalScopes LS;
+
+  /// Keeps track of lexical scopes associated with a user value's source
+  /// location.
+  class UserValueScopes {
+    DebugLoc DL;
+    LexicalScopes &LS;
+    SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
+
+  public:
+    UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
+
+    /// Return true if current scope dominates at least one machine
+    /// instruction in a given machine basic block.
+    bool dominates(MachineBasicBlock *MBB) {
+      if (LBlocks.empty())
+        LS.getMachineBasicBlocks(DL, LBlocks);
+      return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
+    }
+  };
 
   /// Based on std::pair so it can be used as an index into a DenseMap.
   typedef std::pair<const DILocalVariable *, const DILocation *>
@@ -83,7 +104,7 @@ private:
   struct VarLoc {
     const DebugVariable Var;
     const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
-
+    mutable UserValueScopes UVS;
     enum { InvalidKind = 0, RegisterKind } Kind;
 
     /// The value location. Stored separately to avoid repeatedly
@@ -96,9 +117,9 @@ private:
       uint64_t Hash;
     } Loc;
 
-    VarLoc(const MachineInstr &MI)
+    VarLoc(const MachineInstr &MI, LexicalScopes &LS)
         : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
-          Kind(InvalidKind) {
+          UVS(MI.getDebugLoc(), LS), Kind(InvalidKind) {
       static_assert((sizeof(Loc) == sizeof(uint64_t)),
                     "hash does not cover all members of Loc");
       assert(MI.isDebugValue() && "not a DBG_VALUE");
@@ -125,6 +146,10 @@ private:
       return 0;
     }
 
+    /// Determine whether the lexical scope of this value's debug location
+    /// dominates MBB.
+    bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
+
     void dump() const { MI.dump(); }
 
     bool operator==(const VarLoc &Other) const {
@@ -201,7 +226,8 @@ private:
                 VarLocInMBB &OutLocs, VarLocMap &VarLocIDs);
 
   bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
-            const VarLocMap &VarLocIDs);
+            const VarLocMap &VarLocIDs,
+            SmallPtrSet<const MachineBasicBlock *, 16> &Visited);
 
   bool ExtendRanges(MachineFunction &MF);
 
@@ -217,7 +243,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
   /// Print to ostream with a message.
@@ -228,6 +254,7 @@ public:
   /// Calculate the liveness information for the given machine function.
   bool runOnMachineFunction(MachineFunction &MF) override;
 };
+
 } // namespace
 
 //===----------------------------------------------------------------------===//
@@ -260,6 +287,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
                                        const VarLocMap &VarLocIDs,
                                        const char *msg,
                                        raw_ostream &Out) const {
+  Out << '\n' << msg << '\n';
   for (const MachineBasicBlock &BB : MF) {
     const auto &L = V.lookup(&BB);
     Out << "MBB: " << BB.getName() << ":\n";
@@ -268,7 +296,6 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
       Out << " Var: " << VL.Var.getVar()->getName();
       Out << " MI: ";
       VL.dump();
-      Out << "\n";
     }
   }
   Out << "\n";
@@ -294,7 +321,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
   // Add the VarLoc to OpenRanges from this DBG_VALUE.
   // TODO: Currently handles DBG_VALUE which has only reg as location.
   if (isDbgValueDescribedByReg(MI)) {
-    VarLoc VL(MI);
+    VarLoc VL(MI, LS);
     unsigned ID = VarLocIDs.insert(VL);
     OpenRanges.insert(ID, VL.Var);
   }
@@ -368,7 +395,8 @@ bool LiveDebugValues::transfer(MachineInstr &MI, OpenRangesSet &OpenRanges,
 /// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
 /// source variable in all the predecessors of @MBB reside in the same location.
 bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
-                           VarLocInMBB &InLocs, const VarLocMap &VarLocIDs) {
+                           VarLocInMBB &InLocs, const VarLocMap &VarLocIDs,
+                           SmallPtrSet<const MachineBasicBlock *, 16> &Visited) {
   DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
   bool Changed = false;
 
@@ -376,21 +404,39 @@ bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
 
   // For all predecessors of this MBB, find the set of VarLocs that
   // can be joined.
+  int NumVisited = 0;
   for (auto p : MBB.predecessors()) {
+    // Ignore unvisited predecessor blocks.  As we are processing
+    // the blocks in reverse post-order any unvisited block can
+    // be considered to not remove any incoming values.
+    if (!Visited.count(p))
+      continue;
     auto OL = OutLocs.find(p);
     // Join is null in case of empty OutLocs from any of the pred.
     if (OL == OutLocs.end())
       return false;
 
-    // Just copy over the Out locs to incoming locs for the first predecessor.
-    if (p == *MBB.pred_begin()) {
+    // Just copy over the Out locs to incoming locs for the first visited
+    // predecessor, and for all other predecessors join the Out locs.
+    if (!NumVisited)
       InLocsT = OL->second;
-      continue;
-    }
-    // Join with this predecessor.
-    InLocsT &= OL->second;
+    else
+      InLocsT &= OL->second;
+    NumVisited++;
   }
 
+  // Filter out DBG_VALUES that are out of scope.
+  VarLocSet KillSet;
+  for (auto ID : InLocsT)
+    if (!VarLocIDs[ID].dominates(MBB))
+      KillSet.set(ID);
+  InLocsT.intersectWithComplement(KillSet);
+
+  // As we are processing blocks in reverse post-order we
+  // should have processed at least one predecessor, unless it
+  // is the entry block which has no predecessor.
+  assert((NumVisited || MBB.pred_empty()) &&
+         "Should have processed at least one predecessor");
   if (InLocsT.empty())
     return false;
 
@@ -463,16 +509,18 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
   // To solve it, we perform join() and transfer() using the two worklist method
   // until the ranges converge.
   // Ranges have converged when both worklists are empty.
+  SmallPtrSet<const MachineBasicBlock *, 16> Visited;
   while (!Worklist.empty() || !Pending.empty()) {
     // We track what is on the pending worklist to avoid inserting the same
     // thing twice.  We could avoid this with a custom priority queue, but this
     // is probably not worth it.
     SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+    DEBUG(dbgs() << "Processing Worklist\n");
     while (!Worklist.empty()) {
       MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
       Worklist.pop();
-      MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs);
-
+      MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited);
+      Visited.insert(MBB);
       if (MBBJoined) {
         MBBJoined = false;
         Changed = true;
@@ -505,12 +553,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
 }
 
 bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
+  if (!MF.getFunction()->getSubprogram())
+    // LiveDebugValues will already have removed all DBG_VALUEs.
+    return false;
+
   TRI = MF.getSubtarget().getRegisterInfo();
   TII = MF.getSubtarget().getInstrInfo();
+  LS.initialize(MF);
 
-  bool Changed = false;
-
-  Changed |= ExtendRanges(MF);
-
+  bool Changed = ExtendRanges(MF);
   return Changed;
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
index 966b4f1f4e4d..0934d8cfeaa1 100644
--- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp
@@ -22,7 +22,6 @@
 #include "LiveDebugVariables.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/LexicalScopes.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -76,27 +75,6 @@ LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(nullpt
 /// LocMap - Map of where a user value is live, and its location.
 typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
 
-namespace {
-/// UserValueScopes - Keeps track of lexical scopes associated with a
-/// user value's source location.
-class UserValueScopes {
-  DebugLoc DL;
-  LexicalScopes &LS;
-  SmallPtrSet<const MachineBasicBlock *, 4> LBlocks;
-
-public:
-  UserValueScopes(DebugLoc D, LexicalScopes &L) : DL(std::move(D)), LS(L) {}
-
-  /// dominates - Return true if current scope dominates at least one machine
-  /// instruction in a given machine basic block.
-  bool dominates(MachineBasicBlock *MBB) {
-    if (LBlocks.empty())
-      LS.getMachineBasicBlocks(DL, LBlocks);
-    return LBlocks.count(MBB) != 0 || LS.dominates(DL, MBB);
-  }
-};
-} // end anonymous namespace
-
 /// UserValue - A user value is a part of a debug info user variable.
 ///
 /// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
@@ -221,8 +199,8 @@ public:
       I.setValue(getLocationNo(LocMO));
   }
 
-  /// extendDef - Extend the current definition as far as possible down the
-  /// dominator tree. Stop when meeting an existing def or when leaving the live
+  /// extendDef - Extend the current definition as far as possible down.
+  /// Stop when meeting an existing def or when leaving the live
   /// range of VNI.
   /// End points where VNI is no longer live are added to Kills.
   /// @param Idx   Starting point for the definition.
@@ -231,12 +209,10 @@ public:
   /// @param VNI   When LR is not null, this is the value to restrict to.
   /// @param Kills Append end points of VNI's live range to Kills.
   /// @param LIS   Live intervals analysis.
-  /// @param MDT   Dominator tree.
   void extendDef(SlotIndex Idx, unsigned LocNo,
                  LiveRange *LR, const VNInfo *VNI,
                  SmallVectorImpl<SlotIndex> *Kills,
-                 LiveIntervals &LIS, MachineDominatorTree &MDT,
-                 UserValueScopes &UVS);
+                 LiveIntervals &LIS);
 
   /// addDefsFromCopies - The value in LI/LocNo may be copies to other
   /// registers. Determine if any of the copies are available at the kill
@@ -254,8 +230,7 @@ public:
   /// computeIntervals - Compute the live intervals of all locations after
   /// collecting all their def points.
   void computeIntervals(MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
-                        LiveIntervals &LIS, MachineDominatorTree &MDT,
-                        UserValueScopes &UVS);
+                        LiveIntervals &LIS);
 
   /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is
   /// live. Returns true if any changes were made.
@@ -283,8 +258,6 @@ class LDVImpl {
   LocMap::Allocator allocator;
   MachineFunction *MF;
   LiveIntervals *LIS;
-  LexicalScopes LS;
-  MachineDominatorTree *MDT;
   const TargetRegisterInfo *TRI;
 
   /// Whether emitDebugValues is called.
@@ -342,7 +315,6 @@ public:
            "Dbg values are not emitted in LDV");
     EmitDone = false;
     ModifiedMF = false;
-    LS.reset();
   }
 
   /// mapVirtReg - Map virtual register to an equivalence class.
@@ -541,8 +513,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
 /// data-flow analysis to propagate them beyond basic block boundaries.
 void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, LiveRange *LR,
                           const VNInfo *VNI, SmallVectorImpl<SlotIndex> *Kills,
-                          LiveIntervals &LIS, MachineDominatorTree &MDT,
-                          UserValueScopes &UVS) {
+                          LiveIntervals &LIS) {
   SlotIndex Start = Idx;
   MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
   SlotIndex Stop = LIS.getMBBEndIdx(MBB);
@@ -660,9 +631,7 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo,
 void
 UserValue::computeIntervals(MachineRegisterInfo &MRI,
                             const TargetRegisterInfo &TRI,
-                            LiveIntervals &LIS,
-                            MachineDominatorTree &MDT,
-                            UserValueScopes &UVS) {
+                            LiveIntervals &LIS) {
   SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
 
   // Collect all defs to be extended (Skipping undefs).
@@ -677,7 +646,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
     const MachineOperand &Loc = locations[LocNo];
 
     if (!Loc.isReg()) {
-      extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS, MDT, UVS);
+      extendDef(Idx, LocNo, nullptr, nullptr, nullptr, LIS);
       continue;
     }
 
@@ -690,7 +659,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
         VNI = LI->getVNInfoAt(Idx);
       }
       SmallVector<SlotIndex, 16> Kills;
-      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS, MDT, UVS);
+      extendDef(Idx, LocNo, LI, VNI, &Kills, LIS);
       if (LI)
         addDefsFromCopies(LI, LocNo, Kills, Defs, MRI, LIS);
       continue;
@@ -701,7 +670,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
     LiveRange *LR = &LIS.getRegUnit(Unit);
     const VNInfo *VNI = LR->getVNInfoAt(Idx);
     // Don't track copies from physregs, it is too expensive.
-    extendDef(Idx, LocNo, LR, VNI, nullptr, LIS, MDT, UVS);
+    extendDef(Idx, LocNo, LR, VNI, nullptr, LIS);
   }
 
   // Finally, erase all the undefs.
@@ -714,8 +683,7 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI,
 
 void LDVImpl::computeIntervals() {
   for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
-    UserValueScopes UVS(userValues[i]->getDebugLoc(), LS);
-    userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS, *MDT, UVS);
+    userValues[i]->computeIntervals(MF->getRegInfo(), *TRI, *LIS);
     userValues[i]->mapVirtRegs(this);
   }
 }
@@ -724,9 +692,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
   clear();
   MF = &mf;
   LIS = &pass.getAnalysis<LiveIntervals>();
-  MDT = &pass.getAnalysis<MachineDominatorTree>();
   TRI = mf.getSubtarget().getRegisterInfo();
-  LS.initialize(mf);
   DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
                << mf.getName() << " **********\n");
 
@@ -951,7 +917,7 @@ findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
   while (!(MI = LIS.getInstructionFromIndex(Idx))) {
     // We've reached the beginning of MBB.
     if (Idx == Start) {
-      MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+      MachineBasicBlock::iterator I = MBB->SkipPHIsLabelsAndDebug(MBB->begin());
       return I;
     }
     Idx = Idx.getPrevIndex();
diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
index 93c5ca785ac9..623af492fcd4 100644
--- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp
@@ -59,18 +59,32 @@ public:
   typedef LiveRange::Segment Segment;
   typedef IteratorT iterator;
 
-  VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator &VNInfoAllocator) {
+  /// A counterpart of LiveRange::createDeadDef: Make sure the range has a
+  /// value defined at @p Def.
+  /// If @p ForVNI is null, and there is no value defined at @p Def, a new
+  /// value will be allocated using @p VNInfoAllocator.
+  /// If @p ForVNI is null, the return value is the value defined at @p Def,
+  /// either a pre-existing one, or the one newly created.
+  /// If @p ForVNI is not null, then @p Def should be the location where
+  /// @p ForVNI is defined. If the range does not have a value defined at
+  /// @p Def, the value @p ForVNI will be used instead of allocating a new
+  /// one. If the range already has a value defined at @p Def, it must be
+  /// same as @p ForVNI. In either case, @p ForVNI will be the return value.
+  VNInfo *createDeadDef(SlotIndex Def, VNInfo::Allocator *VNInfoAllocator,
+                        VNInfo *ForVNI) {
     assert(!Def.isDead() && "Cannot define a value at the dead slot");
-
+    assert((!ForVNI || ForVNI->def == Def) &&
+           "If ForVNI is specified, it must match Def");
     iterator I = impl().find(Def);
     if (I == segments().end()) {
-      VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+      VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
       impl().insertAtEnd(Segment(Def, Def.getDeadSlot(), VNI));
       return VNI;
     }
 
     Segment *S = segmentAt(I);
     if (SlotIndex::isSameInstr(Def, S->start)) {
+      assert((!ForVNI || ForVNI == S->valno) && "Value number mismatch");
       assert(S->valno->def == S->start && "Inconsistent existing value def");
 
       // It is possible to have both normal and early-clobber defs of the same
@@ -84,7 +98,7 @@ public:
       return S->valno;
     }
     assert(SlotIndex::isEarlierInstr(Def, S->start) && "Already live at def");
-    VNInfo *VNI = LR->getNextValue(Def, VNInfoAllocator);
+    VNInfo *VNI = ForVNI ? ForVNI : LR->getNextValue(Def, *VNInfoAllocator);
     segments().insert(I, Segment(Def, Def.getDeadSlot(), VNI));
     return VNI;
   }
@@ -93,7 +107,7 @@ public:
     if (segments().empty())
       return nullptr;
     iterator I =
-        impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
+      impl().findInsertPos(Segment(Use.getPrevSlot(), Use, nullptr));
     if (I == segments().begin())
       return nullptr;
     --I;
@@ -104,6 +118,25 @@ public:
     return I->valno;
   }
 
+  std::pair<VNInfo*,bool> extendInBlock(ArrayRef<SlotIndex> Undefs,
+      SlotIndex StartIdx, SlotIndex Use) {
+    if (segments().empty())
+      return std::make_pair(nullptr, false);
+    SlotIndex BeforeUse = Use.getPrevSlot();
+    iterator I = impl().findInsertPos(Segment(BeforeUse, Use, nullptr));
+    if (I == segments().begin())
+      return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+    --I;
+    if (I->end <= StartIdx)
+      return std::make_pair(nullptr, LR->isUndefIn(Undefs, StartIdx, BeforeUse));
+    if (I->end < Use) {
+      if (LR->isUndefIn(Undefs, I->end, BeforeUse))
+        return std::make_pair(nullptr, true);
+      extendSegmentEndTo(I, Use);
+    }
+    return std::make_pair(I->valno, false);
+  }
+
   /// This method is used when we want to extend the segment specified
   /// by I to end at the specified endpoint. To do this, we should
   /// merge and eliminate all segments that this will overlap
@@ -320,13 +353,20 @@ LiveRange::iterator LiveRange::find(SlotIndex Pos) {
   return I;
 }
 
-VNInfo *LiveRange::createDeadDef(SlotIndex Def,
-                                  VNInfo::Allocator &VNInfoAllocator) {
+VNInfo *LiveRange::createDeadDef(SlotIndex Def, VNInfo::Allocator &VNIAlloc) {
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).createDeadDef(Def, &VNIAlloc, nullptr);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).createDeadDef(Def, &VNIAlloc, nullptr);
+}
+
+VNInfo *LiveRange::createDeadDef(VNInfo *VNI) {
   // Use the segment set, if it is available.
   if (segmentSet != nullptr)
-    return CalcLiveRangeUtilSet(this).createDeadDef(Def, VNInfoAllocator);
+    return CalcLiveRangeUtilSet(this).createDeadDef(VNI->def, nullptr, VNI);
   // Otherwise use the segment vector.
-  return CalcLiveRangeUtilVector(this).createDeadDef(Def, VNInfoAllocator);
+  return CalcLiveRangeUtilVector(this).createDeadDef(VNI->def, nullptr, VNI);
 }
 
 // overlaps - Return true if the intersection of the two live ranges is
@@ -507,9 +547,15 @@ void LiveRange::append(const Segment S) {
   segments.push_back(S);
 }
 
-/// extendInBlock - If this range is live before Kill in the basic
-/// block that starts at StartIdx, extend it to be live up to Kill and return
-/// the value. If there is no live range before Kill, return NULL.
+std::pair<VNInfo*,bool> LiveRange::extendInBlock(ArrayRef<SlotIndex> Undefs,
+    SlotIndex StartIdx, SlotIndex Kill) {
+  // Use the segment set, if it is available.
+  if (segmentSet != nullptr)
+    return CalcLiveRangeUtilSet(this).extendInBlock(Undefs, StartIdx, Kill);
+  // Otherwise use the segment vector.
+  return CalcLiveRangeUtilVector(this).extendInBlock(Undefs, StartIdx, Kill);
+}
+
 VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) {
   // Use the segment set, if it is available.
   if (segmentSet != nullptr)
@@ -571,7 +617,7 @@ void LiveRange::removeSegment(SlotIndex Start, SlotIndex End,
 /// Also remove the value# from value# list.
 void LiveRange::removeValNo(VNInfo *ValNo) {
   if (empty()) return;
-  segments.erase(std::remove_if(begin(), end(), [ValNo](const Segment &S) {
+  segments.erase(remove_if(*this, [ValNo](const Segment &S) {
     return S.valno == ValNo;
   }), end());
   // Now that ValNo is dead, remove it.
@@ -824,6 +870,30 @@ unsigned LiveInterval::getSize() const {
   return Sum;
 }
 
+void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
+                                         LaneBitmask LaneMask,
+                                         const MachineRegisterInfo &MRI,
+                                         const SlotIndexes &Indexes) const {
+  assert(TargetRegisterInfo::isVirtualRegister(reg));
+  LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
+  assert((VRegMask & LaneMask).any());
+  const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
+  for (const MachineOperand &MO : MRI.def_operands(reg)) {
+    if (!MO.isUndef())
+      continue;
+    unsigned SubReg = MO.getSubReg();
+    assert(SubReg != 0 && "Undef should only be set on subreg defs");
+    LaneBitmask DefMask = TRI.getSubRegIndexLaneMask(SubReg);
+    LaneBitmask UndefMask = VRegMask & ~DefMask;
+    if ((UndefMask & LaneMask).any()) {
+      const MachineInstr &MI = *MO.getParent();
+      bool EarlyClobber = MO.isEarlyClobber();
+      SlotIndex Pos = Indexes.getInstructionIndex(MI).getRegSlot(EarlyClobber);
+      Undefs.push_back(Pos);
+    }
+  }
+}
+
 raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) {
   return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ')';
 }
@@ -912,15 +982,16 @@ void LiveInterval::verify(const MachineRegisterInfo *MRI) const {
   super::verify();
 
   // Make sure SubRanges are fine and LaneMasks are disjunct.
-  LaneBitmask Mask = 0;
-  LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg) : ~0u;
+  LaneBitmask Mask;
+  LaneBitmask MaxMask = MRI != nullptr ? MRI->getMaxLaneMaskForVReg(reg)
+                                       : LaneBitmask::getAll();
   for (const SubRange &SR : subranges()) {
     // Subrange lanemask should be disjunct to any previous subrange masks.
-    assert((Mask & SR.LaneMask) == 0);
+    assert((Mask & SR.LaneMask).none());
     Mask |= SR.LaneMask;
 
     // subrange mask should not contained in maximum lane mask for the vreg.
-    assert((Mask & ~MaxMask) == 0);
+    assert((Mask & ~MaxMask).none());
     // empty subranges must be removed.
     assert(!SR.empty());
 
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
index 5f3281f6771d..70d34838b237 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -58,10 +58,6 @@ static cl::opt<bool> EnablePrecomputePhysRegs(
 static bool EnablePrecomputePhysRegs = false;
 #endif // NDEBUG
 
-static cl::opt<bool> EnableSubRegLiveness(
-  "enable-subreg-liveness", cl::Hidden, cl::init(true),
-  cl::desc("Enable subregister liveness tracking."));
-
 namespace llvm {
 cl::opt<bool> UseSegmentSetForPhysRegs(
     "use-segment-set-for-physregs", cl::Hidden, cl::init(true),
@@ -119,9 +115,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) {
   Indexes = &getAnalysis<SlotIndexes>();
   DomTree = &getAnalysis<MachineDominatorTree>();
 
-  if (EnableSubRegLiveness && MF->getSubtarget().enableSubRegLiveness())
-    MRI->enableSubRegLiveness(true);
-
   if (!LRCalc)
     LRCalc = new LiveRangeCalc();
 
@@ -504,8 +497,7 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
   return MayHaveSplitComponents;
 }
 
-void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
-{
+void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
   DEBUG(dbgs() << "Shrink: " << SR << '\n');
   assert(TargetRegisterInfo::isVirtualRegister(Reg)
          && "Can only shrink virtual registers");
@@ -514,18 +506,19 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
 
   // Visit all instructions reading Reg.
   SlotIndex LastIdx;
-  for (MachineOperand &MO : MRI->reg_operands(Reg)) {
-    MachineInstr *UseMI = MO.getParent();
-    if (UseMI->isDebugValue())
+  for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
+    // Skip "undef" uses.
+    if (!MO.readsReg())
       continue;
     // Maybe the operand is for a subregister we don't care about.
     unsigned SubReg = MO.getSubReg();
     if (SubReg != 0) {
       LaneBitmask LaneMask = TRI->getSubRegIndexLaneMask(SubReg);
-      if ((LaneMask & SR.LaneMask) == 0)
+      if ((LaneMask & SR.LaneMask).none())
         continue;
     }
     // We only need to visit each instruction once.
+    MachineInstr *UseMI = MO.getParent();
     SlotIndex Idx = getInstructionIndex(*UseMI).getRegSlot();
     if (Idx == LastIdx)
       continue;
@@ -574,11 +567,12 @@ void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg)
 }
 
 void LiveIntervals::extendToIndices(LiveRange &LR,
-                                    ArrayRef<SlotIndex> Indices) {
+                                    ArrayRef<SlotIndex> Indices,
+                                    ArrayRef<SlotIndex> Undefs) {
   assert(LRCalc && "LRCalc not initialized.");
   LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator());
   for (unsigned i = 0, e = Indices.size(); i != e; ++i)
-    LRCalc->extend(LR, Indices[i]);
+    LRCalc->extend(LR, Indices[i], /*PhysReg=*/0, Undefs);
 }
 
 void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
@@ -605,7 +599,7 @@ void LiveIntervals::pruneValue(LiveRange &LR, SlotIndex Kill,
   // Find all blocks that are reachable from KillMBB without leaving VNI's live
   // range. It is possible that KillMBB itself is reachable, so start a DFS
   // from each successor.
-  typedef SmallPtrSet<MachineBasicBlock*, 9> VisitedTy;
+  typedef df_iterator_default_set<MachineBasicBlock*,9> VisitedTy;
   VisitedTy Visited;
   for (MachineBasicBlock::succ_iterator
        SuccI = KillMBB->succ_begin(), SuccE = KillMBB->succ_end();
@@ -724,7 +718,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
         LaneBitmask DefinedLanesMask;
         if (!SRs.empty()) {
           // Compute a mask of lanes that are defined.
-          DefinedLanesMask = 0;
+          DefinedLanesMask = LaneBitmask::getNone();
           for (auto &SRP : SRs) {
             const LiveInterval::SubRange &SR = *SRP.first;
             LiveRange::const_iterator &I = SRP.second;
@@ -737,7 +731,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
             DefinedLanesMask |= SR.LaneMask;
           }
         } else
-          DefinedLanesMask = ~0u;
+          DefinedLanesMask = LaneBitmask::getAll();
 
         bool IsFullWrite = false;
         for (const MachineOperand &MO : MI->operands()) {
@@ -746,7 +740,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
           if (MO.isUse()) {
             // Reading any undefined lanes?
             LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
-            if ((UseMask & ~DefinedLanesMask) != 0)
+            if ((UseMask & ~DefinedLanesMask).any())
               goto CancelKill;
           } else if (MO.getSubReg() == 0) {
             // Writing to the full register?
@@ -954,14 +948,15 @@ public:
         LiveInterval &LI = LIS.getInterval(Reg);
         if (LI.hasSubRanges()) {
           unsigned SubReg = MO.getSubReg();
-          LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
+          LaneBitmask LaneMask = SubReg ? TRI.getSubRegIndexLaneMask(SubReg)
+                                        : MRI.getMaxLaneMaskForVReg(Reg);
           for (LiveInterval::SubRange &S : LI.subranges()) {
-            if ((S.LaneMask & LaneMask) == 0)
+            if ((S.LaneMask & LaneMask).none())
               continue;
             updateRange(S, Reg, S.LaneMask);
           }
         }
-        updateRange(LI, Reg, 0);
+        updateRange(LI, Reg, LaneBitmask::getNone());
         continue;
       }
 
@@ -969,7 +964,7 @@ public:
       // precomputed live range.
       for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
         if (LiveRange *LR = getRegUnitLI(*Units))
-          updateRange(*LR, *Units, 0);
+          updateRange(*LR, *Units, LaneBitmask::getNone());
     }
     if (hasRegMask)
       updateRegMaskSlots();
@@ -985,7 +980,7 @@ private:
       dbgs() << "     ";
       if (TargetRegisterInfo::isVirtualRegister(Reg)) {
         dbgs() << PrintReg(Reg);
-        if (LaneMask != 0)
+        if (LaneMask.any())
           dbgs() << " L" << PrintLaneMask(LaneMask);
       } else {
         dbgs() << PrintRegUnit(Reg, &TRI);
@@ -1039,6 +1034,8 @@ private:
           LiveRange::iterator Prev = std::prev(NewIdxIn);
           Prev->end = NewIdx.getRegSlot();
         }
+        // Extend OldIdxIn.
+        OldIdxIn->end = Next->start;
         return;
       }
 
@@ -1317,8 +1314,8 @@ private:
         if (MO.isUndef())
           continue;
         unsigned SubReg = MO.getSubReg();
-        if (SubReg != 0 && LaneMask != 0
-            && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask) == 0)
+        if (SubReg != 0 && LaneMask.any()
+            && (TRI.getSubRegIndexLaneMask(SubReg) & LaneMask).none())
           continue;
 
         const MachineInstr &MI = *MO.getParent();
@@ -1394,6 +1391,11 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
                                         LaneBitmask LaneMask) {
   LiveInterval::iterator LII = LR.find(endIdx);
   SlotIndex lastUseIdx;
+  if (LII == LR.begin()) {
+    // This happens when the function is called for a subregister that only
+    // occurs _after_ the range that is to be repaired.
+    return;
+  }
   if (LII != LR.end() && LII->start < endIdx)
     lastUseIdx = LII->end;
   else
@@ -1420,7 +1422,7 @@ void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
 
       unsigned SubReg = MO.getSubReg();
       LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubReg);
-      if ((Mask & LaneMask) == 0)
+      if ((Mask & LaneMask).none())
         continue;
 
       if (MO.isDef()) {
@@ -1538,15 +1540,19 @@ void LiveIntervals::removePhysRegDefAt(unsigned Reg, SlotIndex Pos) {
 }
 
 void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
+  // LI may not have the main range computed yet, but its subranges may
+  // be present.
   VNInfo *VNI = LI.getVNInfoAt(Pos);
-  if (VNI == nullptr)
-    return;
-  LI.removeValNo(VNI);
+  if (VNI != nullptr) {
+    assert(VNI->def.getBaseIndex() == Pos.getBaseIndex());
+    LI.removeValNo(VNI);
+  }
 
-  // Also remove the value in subranges.
+  // Also remove the value defined in subranges.
   for (LiveInterval::SubRange &S : LI.subranges()) {
     if (VNInfo *SVNI = S.getVNInfoAt(Pos))
-      S.removeValNo(SVNI);
+      if (SVNI->def.getBaseIndex() == Pos.getBaseIndex())
+        S.removeValNo(SVNI);
   }
   LI.removeEmptySubRanges();
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 025d99ce7881..fc2f233f6d68 100644
--- a/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/LiveIntervalUnion.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -102,9 +103,7 @@ void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
 // Scan the vector of interfering virtual registers in this union. Assume it's
 // quite small.
 bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
-  SmallVectorImpl<LiveInterval*>::const_iterator I =
-    std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
-  return I != InterferingVRegs.end();
+  return is_contained(InterferingVRegs, VirtReg);
 }
 
 // Collect virtual registers in this union that interfere with this
diff --git a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
index 4e2528f47568..dcc41c1718a6 100644
--- a/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
+++ b/contrib/llvm/lib/CodeGen/LivePhysRegs.cpp
@@ -49,7 +49,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
       if (!O->isDef())
         continue;
       unsigned Reg = O->getReg();
-      if (Reg == 0)
+      if (!TargetRegisterInfo::isPhysicalRegister(Reg))
         continue;
       removeReg(Reg);
     } else if (O->isRegMask())
@@ -61,7 +61,7 @@ void LivePhysRegs::stepBackward(const MachineInstr &MI) {
     if (!O->isReg() || !O->readsReg())
       continue;
     unsigned Reg = O->getReg();
-    if (Reg == 0)
+    if (!TargetRegisterInfo::isPhysicalRegister(Reg))
       continue;
     addReg(Reg);
   }
@@ -77,7 +77,7 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
   for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
     if (O->isReg()) {
       unsigned Reg = O->getReg();
-      if (Reg == 0)
+      if (!TargetRegisterInfo::isPhysicalRegister(Reg))
         continue;
       if (O->isDef()) {
         // Note, dead defs are still recorded.  The caller should decide how to
@@ -141,9 +141,19 @@ bool LivePhysRegs::available(const MachineRegisterInfo &MRI,
 }
 
 /// Add live-in registers of basic block \p MBB to \p LiveRegs.
-static void addLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB) {
-  for (const auto &LI : MBB.liveins())
-    LiveRegs.addReg(LI.PhysReg);
+void LivePhysRegs::addBlockLiveIns(const MachineBasicBlock &MBB) {
+  for (const auto &LI : MBB.liveins()) {
+    MCSubRegIndexIterator S(LI.PhysReg, TRI);
+    if (LI.LaneMask.all() || (LI.LaneMask.any() && !S.isValid())) {
+      addReg(LI.PhysReg);
+      continue;
+    }
+    for (; S.isValid(); ++S) {
+      unsigned SI = S.getSubRegIndex();
+      if ((LI.LaneMask & TRI->getSubRegIndexLaneMask(SI)).any())
+        addReg(S.getSubReg());
+    }
+  }
 }
 
 /// Add pristine registers to the given \p LiveRegs. This function removes
@@ -160,12 +170,12 @@ static void addPristines(LivePhysRegs &LiveRegs, const MachineFunction &MF,
 void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
   // To get the live-outs we simply merge the live-ins of all successors.
   for (const MachineBasicBlock *Succ : MBB.successors())
-    ::addLiveIns(*this, *Succ);
+    addBlockLiveIns(*Succ);
 }
 
 void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.isCalleeSavedInfoValid()) {
     if (MBB.isReturnBlock()) {
       // The return block has no successors whose live-ins we could merge
@@ -182,8 +192,31 @@ void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
 
 void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
   const MachineFunction &MF = *MBB.getParent();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   if (MFI.isCalleeSavedInfoValid())
     addPristines(*this, MF, MFI, *TRI);
-  ::addLiveIns(*this, MBB);
+  addBlockLiveIns(MBB);
+}
+
+void llvm::computeLiveIns(LivePhysRegs &LiveRegs, const TargetRegisterInfo &TRI,
+                          MachineBasicBlock &MBB) {
+  assert(MBB.livein_empty());
+  LiveRegs.init(TRI);
+  LiveRegs.addLiveOutsNoPristines(MBB);
+  for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
+    LiveRegs.stepBackward(MI);
+
+  for (unsigned Reg : LiveRegs) {
+    // Skip the register if we are about to add one of its super registers.
+    bool ContainsSuperReg = false;
+    for (MCSuperRegIterator SReg(Reg, &TRI); SReg.isValid(); ++SReg) {
+      if (LiveRegs.contains(*SReg)) {
+        ContainsSuperReg = true;
+        break;
+      }
+    }
+    if (ContainsSuperReg)
+      continue;
+    MBB.addLiveIn(Reg);
+  }
 }
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
index db91ca113dc1..012837608628 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "LiveRangeCalc.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
@@ -23,6 +24,7 @@ void LiveRangeCalc::resetLiveOutMap() {
   unsigned NumBlocks = MF->getNumBlockIDs();
   Seen.clear();
   Seen.resize(NumBlocks);
+  EntryInfoMap.clear();
   Map.resize(NumBlocks);
 }
 
@@ -64,9 +66,8 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
 
     unsigned SubReg = MO.getSubReg();
     if (LI.hasSubRanges() || (SubReg != 0 && TrackSubRegs)) {
-      LaneBitmask Mask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
-                                     : MRI->getMaxLaneMaskForVReg(Reg);
-
+      LaneBitmask SubMask = SubReg != 0 ? TRI.getSubRegIndexLaneMask(SubReg)
+                                        : MRI->getMaxLaneMaskForVReg(Reg);
       // If this is the first time we see a subregister def, initialize
       // subranges by creating a copy of the main range.
       if (!LI.hasSubRanges() && !LI.empty()) {
@@ -74,17 +75,19 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
         LI.createSubRangeFrom(*Alloc, ClassMask, LI);
       }
 
+      LaneBitmask Mask = SubMask;
       for (LiveInterval::SubRange &S : LI.subranges()) {
         // A Mask for subregs common to the existing subrange and current def.
         LaneBitmask Common = S.LaneMask & Mask;
-        if (Common == 0)
+        if (Common.none())
           continue;
-        // A Mask for subregs covered by the subrange but not the current def.
-        LaneBitmask LRest = S.LaneMask & ~Mask;
         LiveInterval::SubRange *CommonRange;
-        if (LRest != 0) {
-          // Split current subrange into Common and LRest ranges.
-          S.LaneMask = LRest;
+        // A Mask for subregs covered by the subrange but not the current def.
+        LaneBitmask RM = S.LaneMask & ~Mask;
+        if (RM.any()) {
+          // Split the subrange S into two parts: one covered by the current
+          // def (CommonRange), and the one not affected by it (updated S).
+          S.LaneMask = RM;
           CommonRange = LI.createSubRangeFrom(*Alloc, Common, S);
         } else {
           assert(Common == S.LaneMask);
@@ -95,7 +98,7 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
         Mask &= ~Common;
       }
       // Create a new SubRange for subregs we did not cover yet.
-      if (Mask != 0) {
+      if (Mask.any()) {
         LiveInterval::SubRange *NewRange = LI.createSubRange(*Alloc, Mask);
         if (MO.isDef())
           createDeadDef(*Indexes, *Alloc, *NewRange, MO);
@@ -116,14 +119,15 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
   // necessary.
   if (LI.hasSubRanges()) {
     for (LiveInterval::SubRange &S : LI.subranges()) {
-      resetLiveOutMap();
-      extendToUses(S, Reg, S.LaneMask);
+      LiveRangeCalc SubLRC;
+      SubLRC.reset(MF, Indexes, DomTree, Alloc);
+      SubLRC.extendToUses(S, Reg, S.LaneMask, &LI);
     }
     LI.clear();
     constructMainRangeFromSubranges(LI);
   } else {
     resetLiveOutMap();
-    extendToUses(LI, Reg, ~0u);
+    extendToUses(LI, Reg, LaneBitmask::getAll());
   }
 }
 
@@ -139,9 +143,8 @@ void LiveRangeCalc::constructMainRangeFromSubranges(LiveInterval &LI) {
         MainRange.createDeadDef(VNI->def, *Alloc);
     }
   }
-
   resetLiveOutMap();
-  extendToUses(MainRange, LI.reg);
+  extendToUses(MainRange, LI.reg, LaneBitmask::getAll(), &LI);
 }
 
 void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
@@ -154,29 +157,34 @@ void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) {
 }
 
 
-void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
-                                 LaneBitmask Mask) {
+void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask Mask,
+                                 LiveInterval *LI) {
+  SmallVector<SlotIndex, 4> Undefs;
+  if (LI != nullptr)
+    LI->computeSubRangeUndefs(Undefs, Mask, *MRI, *Indexes);
+
   // Visit all operands that read Reg. This may include partial defs.
+  bool IsSubRange = !Mask.all();
   const TargetRegisterInfo &TRI = *MRI->getTargetRegisterInfo();
   for (MachineOperand &MO : MRI->reg_nodbg_operands(Reg)) {
     // Clear all kill flags. They will be reinserted after register allocation
     // by LiveIntervalAnalysis::addKillFlags().
     if (MO.isUse())
       MO.setIsKill(false);
-    else {
-      // We only care about uses, but on the main range (mask ~0u) this includes
-      // the "virtual" reads happening for subregister defs.
-      if (Mask != ~0u)
-        continue;
-    }
-
-    if (!MO.readsReg())
+    // MO::readsReg returns "true" for subregister defs. This is for keeping
+    // liveness of the entire register (i.e. for the main range of the live
+    // interval). For subranges, definitions of non-overlapping subregisters
+    // do not count as uses.
+    if (!MO.readsReg() || (IsSubRange && MO.isDef()))
       continue;
+
     unsigned SubReg = MO.getSubReg();
     if (SubReg != 0) {
-      LaneBitmask SubRegMask = TRI.getSubRegIndexLaneMask(SubReg);
-      // Ignore uses not covering the current subrange.
-      if ((SubRegMask & Mask) == 0)
+      LaneBitmask SLM = TRI.getSubRegIndexLaneMask(SubReg);
+      if (MO.isDef())
+        SLM = ~SLM;
+      // Ignore uses not reading the current (sub)range.
+      if ((SLM & Mask).none())
         continue;
     }
 
@@ -205,7 +213,7 @@ void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg,
 
     // MI is reading Reg. We may have visited MI before if it happens to be
     // reading Reg multiple times. That is OK, extend() is idempotent.
-    extend(LR, UseIdx, Reg);
+    extend(LR, UseIdx, Reg, Undefs);
   }
 }
 
@@ -235,8 +243,8 @@ void LiveRangeCalc::updateFromLiveIns() {
   LiveIn.clear();
 }
 
-
-void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
+void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+                           ArrayRef<SlotIndex> Undefs) {
   assert(Use.isValid() && "Invalid SlotIndex");
   assert(Indexes && "Missing SlotIndexes");
   assert(DomTree && "Missing dominator tree");
@@ -245,14 +253,15 @@ void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg) {
   assert(UseMBB && "No MBB at Use");
 
   // Is there a def in the same MBB we can extend?
-  if (LR.extendInBlock(Indexes->getMBBStartIdx(UseMBB), Use))
+  auto EP = LR.extendInBlock(Undefs, Indexes->getMBBStartIdx(UseMBB), Use);
+  if (EP.first != nullptr || EP.second)
     return;
 
   // Find the single reaching def, or determine if Use is jointly dominated by
   // multiple values, and we may need to create even more phi-defs to preserve
   // VNInfo SSA form.  Perform a search for all predecessor blocks where we
   // know the dominating VNInfo.
-  if (findReachingDefs(LR, *UseMBB, Use, PhysReg))
+  if (findReachingDefs(LR, *UseMBB, Use, PhysReg, Undefs))
     return;
 
   // When there were multiple different values, we may need new PHIs.
@@ -271,8 +280,72 @@ void LiveRangeCalc::calculateValues() {
 }
 
 
+bool LiveRangeCalc::isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+                                 MachineBasicBlock &MBB, BitVector &DefOnEntry,
+                                 BitVector &UndefOnEntry) {
+  unsigned BN = MBB.getNumber();
+  if (DefOnEntry[BN])
+    return true;
+  if (UndefOnEntry[BN])
+    return false;
+
+  auto MarkDefined =
+        [this,BN,&DefOnEntry,&UndefOnEntry] (MachineBasicBlock &B) -> bool {
+    for (MachineBasicBlock *S : B.successors())
+      DefOnEntry[S->getNumber()] = true;
+    DefOnEntry[BN] = true;
+    return true;
+  };
+
+  SetVector<unsigned> WorkList;
+  // Checking if the entry of MBB is reached by some def: add all predecessors
+  // that are potentially defined-on-exit to the work list.
+  for (MachineBasicBlock *P : MBB.predecessors())
+    WorkList.insert(P->getNumber());
+
+  for (unsigned i = 0; i != WorkList.size(); ++i) {
+    // Determine if the exit from the block is reached by some def.
+    unsigned N = WorkList[i];
+    MachineBasicBlock &B = *MF->getBlockNumbered(N);
+    if (Seen[N] && Map[&B].first != nullptr)
+      return MarkDefined(B);
+    SlotIndex Begin, End;
+    std::tie(Begin, End) = Indexes->getMBBRange(&B);
+    LiveRange::iterator UB = std::upper_bound(LR.begin(), LR.end(), End);
+    if (UB != LR.begin()) {
+      LiveRange::Segment &Seg = *std::prev(UB);
+      if (Seg.end > Begin) {
+        // There is a segment that overlaps B. If the range is not explicitly
+        // undefined between the end of the segment and the end of the block,
+        // treat the block as defined on exit. If it is, go to the next block
+        // on the work list.
+        if (LR.isUndefIn(Undefs, Seg.end, End))
+          continue;
+        return MarkDefined(B);
+      }
+    }
+
+    // No segment overlaps with this block. If this block is not defined on
+    // entry, or it undefines the range, do not process its predecessors.
+    if (UndefOnEntry[N] || LR.isUndefIn(Undefs, Begin, End)) {
+      UndefOnEntry[N] = true;
+      continue;
+    }
+    if (DefOnEntry[N])
+      return MarkDefined(B);
+
+    // Still don't know: add all predecessors to the work list.
+    for (MachineBasicBlock *P : B.predecessors())
+      WorkList.insert(P->getNumber());
+  }
+
+  UndefOnEntry[BN] = true;
+  return false;
+}
+
 bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
-                                     SlotIndex Use, unsigned PhysReg) {
+                                     SlotIndex Use, unsigned PhysReg,
+                                     ArrayRef<SlotIndex> Undefs) {
   unsigned UseMBBNum = UseMBB.getNumber();
 
   // Block numbers where LR should be live-in.
@@ -282,6 +355,8 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
   bool UniqueVNI = true;
   VNInfo *TheVNI = nullptr;
 
+  bool FoundUndef = false;
+
   // Using Seen as a visited set, perform a BFS for all reaching defs.
   for (unsigned i = 0; i != WorkList.size(); ++i) {
     MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]);
@@ -294,18 +369,20 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
       const MachineInstr *MI = Indexes->getInstructionFromIndex(Use);
       if (MI != nullptr)
         errs() << Use << " " << *MI;
-      llvm_unreachable("Use not jointly dominated by defs.");
+      report_fatal_error("Use not jointly dominated by defs.");
     }
 
     if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
         !MBB->isLiveIn(PhysReg)) {
       MBB->getParent()->verify();
-      errs() << "The register " << PrintReg(PhysReg)
+      const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
+      errs() << "The register " << PrintReg(PhysReg, TRI)
              << " needs to be live in to BB#" << MBB->getNumber()
              << ", but is missing from the live-in list.\n";
-      llvm_unreachable("Invalid global physical register");
+      report_fatal_error("Invalid global physical register");
     }
 #endif
+    FoundUndef |= MBB->pred_empty();
 
     for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
          PE = MBB->pred_end(); PI != PE; ++PI) {
@@ -326,18 +403,21 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 
        // First time we see Pred.  Try to determine the live-out value, but set
        // it as null if Pred is live-through with an unknown value.
-       VNInfo *VNI = LR.extendInBlock(Start, End);
+       auto EP = LR.extendInBlock(Undefs, Start, End);
+       VNInfo *VNI = EP.first;
+       FoundUndef |= EP.second;
        setLiveOutValue(Pred, VNI);
        if (VNI) {
          if (TheVNI && TheVNI != VNI)
            UniqueVNI = false;
          TheVNI = VNI;
-         continue;
        }
+       if (VNI || EP.second)
+         continue;
 
        // No, we need a live-in value for Pred as well
        if (Pred != &UseMBB)
-          WorkList.push_back(Pred->getNumber());
+         WorkList.push_back(Pred->getNumber());
        else
           // Loopback to UseMBB, so value is really live through.
          Use = SlotIndex();
@@ -345,6 +425,9 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
   }
 
   LiveIn.clear();
+  FoundUndef |= (TheVNI == nullptr);
+  if (Undefs.size() > 0 && FoundUndef)
+    UniqueVNI = false;
 
   // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but
   // neither require it. Skip the sorting overhead for small updates.
@@ -353,27 +436,39 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
 
   // If a unique reaching def was found, blit in the live ranges immediately.
   if (UniqueVNI) {
+    assert(TheVNI != nullptr);
     LiveRangeUpdater Updater(&LR);
-    for (SmallVectorImpl<unsigned>::const_iterator I = WorkList.begin(),
-         E = WorkList.end(); I != E; ++I) {
-       SlotIndex Start, End;
-       std::tie(Start, End) = Indexes->getMBBRange(*I);
-       // Trim the live range in UseMBB.
-       if (*I == UseMBBNum && Use.isValid())
-         End = Use;
-       else
-         Map[MF->getBlockNumbered(*I)] = LiveOutPair(TheVNI, nullptr);
-       Updater.add(Start, End, TheVNI);
+    for (unsigned BN : WorkList) {
+      SlotIndex Start, End;
+      std::tie(Start, End) = Indexes->getMBBRange(BN);
+      // Trim the live range in UseMBB.
+      if (BN == UseMBBNum && Use.isValid())
+        End = Use;
+      else
+        Map[MF->getBlockNumbered(BN)] = LiveOutPair(TheVNI, nullptr);
+      Updater.add(Start, End, TheVNI);
     }
     return true;
   }
 
+  // Prepare the defined/undefined bit vectors.
+  auto EF = EntryInfoMap.find(&LR);
+  if (EF == EntryInfoMap.end()) {
+    unsigned N = MF->getNumBlockIDs();
+    EF = EntryInfoMap.insert({&LR, {BitVector(), BitVector()}}).first;
+    EF->second.first.resize(N);
+    EF->second.second.resize(N);
+  }
+  BitVector &DefOnEntry = EF->second.first;
+  BitVector &UndefOnEntry = EF->second.second;
+
   // Multiple values were found, so transfer the work list to the LiveIn array
   // where UpdateSSA will use it as a work list.
   LiveIn.reserve(WorkList.size());
-  for (SmallVectorImpl<unsigned>::const_iterator
-       I = WorkList.begin(), E = WorkList.end(); I != E; ++I) {
-    MachineBasicBlock *MBB = MF->getBlockNumbered(*I);
+  for (unsigned BN : WorkList) {
+    MachineBasicBlock *MBB = MF->getBlockNumbered(BN);
+    if (Undefs.size() > 0 && !isDefOnEntry(LR, Undefs, *MBB, DefOnEntry, UndefOnEntry))
+      continue;
     addLiveInBlock(LR, DomTree->getNode(MBB));
     if (MBB == &UseMBB)
       LiveIn.back().Kill = Use;
@@ -458,10 +553,12 @@ void LiveRangeCalc::updateSSA() {
         I.DomNode = nullptr;
 
         // Add liveness since updateFromLiveIns now skips this node.
-        if (I.Kill.isValid())
-          LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
-        else {
-          LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+        if (I.Kill.isValid()) {
+          if (VNI)
+            LR.addSegment(LiveInterval::Segment(Start, I.Kill, VNI));
+        } else {
+          if (VNI)
+            LR.addSegment(LiveInterval::Segment(Start, End, VNI));
           LOP = LiveOutPair(VNI, Node);
         }
       } else if (IDomValue.first) {
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
index 9de48b722881..1a7598f8044a 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
+++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h
@@ -22,6 +22,7 @@
 #ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
 #define LLVM_LIB_CODEGEN_LIVERANGECALC_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/IndexedMap.h"
 #include "llvm/CodeGen/LiveInterval.h"
@@ -53,6 +54,19 @@ class LiveRangeCalc {
   /// when switching live ranges.
   BitVector Seen;
 
+  /// Map LiveRange to sets of blocks (represented by bit vectors) that
+  /// in the live range are defined on entry and undefined on entry.
+  /// A block is defined on entry if there is a path from at least one of
+  /// the defs in the live range to the entry of the block, and conversely,
+  /// a block is undefined on entry, if there is no such path (i.e. no
+  /// definition reaches the entry of the block). A single LiveRangeCalc
+  /// object is used to track live-out information for multiple registers
+  /// in live range splitting (which is ok, since the live ranges of these
+  /// registers do not overlap), but the defined/undefined information must
+  /// be kept separate for each individual range.
+  /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
+  std::map<LiveRange*,std::pair<BitVector,BitVector>> EntryInfoMap;
+
   /// Map each basic block where a live range is live out to the live-out value
   /// and its defining block.
   ///
@@ -101,18 +115,31 @@ class LiveRangeCalc {
   /// used to add entries directly.
   SmallVector<LiveInBlock, 16> LiveIn;
 
-  /// Assuming that @p LR is live-in to @p UseMBB, find the set of defs that can
-  /// reach it.
+  /// Check if the entry to block @p MBB can be reached by any of the defs
+  /// in @p LR. Return true if none of the defs reach the entry to @p MBB.
+  bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
+                    MachineBasicBlock &MBB, BitVector &DefOnEntry,
+                    BitVector &UndefOnEntry);
+
+  /// Find the set of defs that can reach @p Kill. @p Kill must belong to
+  /// @p UseMBB.
   ///
-  /// If only one def can reach @p UseMBB, all paths from the def to @p UseMBB
-  /// are added to @p LR, and the function returns true.
+  /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill,
+  /// all paths from the def to @p UseMBB are added to @p LR, and the function
+  /// returns true.
   ///
   /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
   /// live in are added to the LiveIn array, and the function returns false.
   ///
+  /// The array @p Undef provides the locations where the range @p LR becomes
+  /// undefined by <def,read-undef> operands on other subranges. If @p Undef
+  /// is non-empty and @p Kill is jointly dominated only by the entries of
+  /// @p Undef, the function returns false.
+  ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
   bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
-                        SlotIndex Kill, unsigned PhysReg);
+                        SlotIndex Kill, unsigned PhysReg,
+                        ArrayRef<SlotIndex> Undefs);
 
   /// updateSSA - Compute the values that will be live in to all requested
   /// blocks in LiveIn.  Create PHI-def values as required to preserve SSA form.
@@ -127,9 +154,16 @@ class LiveRangeCalc {
 
   /// Extend the live range of @p LR to reach all uses of Reg.
   ///
-  /// All uses must be jointly dominated by existing liveness.  PHI-defs are
-  /// inserted as needed to preserve SSA form.
-  void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask);
+  /// If @p LR is a main range, or if @p LI is null, then all uses must be
+  /// jointly dominated by the definitions from @p LR. If @p LR is a subrange
+  /// of the live interval @p LI, corresponding to lane mask @p LaneMask,
+  /// all uses must be jointly dominated by the definitions from @p LR
+  /// together with definitions of other lanes where @p LR becomes undefined
+  /// (via <def,read-undef> operands).
+  /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e.
+  /// LaneBitmask::getAll().
+  void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask,
+                    LiveInterval *LI = nullptr);
 
   /// Reset Map and Seen fields.
   void resetLiveOutMap();
@@ -169,7 +203,8 @@ public:
   /// inserted as required to preserve SSA form.
   ///
   /// PhysReg, when set, is used to verify live-in lists on basic blocks.
-  void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg = 0);
+  void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
+              ArrayRef<SlotIndex> Undefs);
 
   /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
   /// Each instruction defining Reg gets a new VNInfo with a corresponding
@@ -181,7 +216,7 @@ public:
   /// All uses must be jointly dominated by existing liveness.  PHI-defs are
   /// inserted as needed to preserve SSA form.
   void extendToUses(LiveRange &LR, unsigned PhysReg) {
-    extendToUses(LR, PhysReg, ~0u);
+    extendToUses(LR, PhysReg, LaneBitmask::getAll());
   }
 
   /// Calculates liveness for the register specified in live interval @p LI.
diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
index b35c0adfacad..7f1c69c0b4a2 100644
--- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -37,6 +37,13 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) {
     VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
   }
   LiveInterval &LI = LIS.createEmptyInterval(VReg);
+  // Create empty subranges if the OldReg's interval has them. Do not create
+  // the main range here---it will be constructed later after the subranges
+  // have been finalized.
+  LiveInterval &OldLI = LIS.getInterval(OldReg);
+  VNInfo::Allocator &Alloc = LIS.getVNInfoAllocator();
+  for (LiveInterval::SubRange &S : OldLI.subranges())
+    LI.createSubRange(Alloc, S.LaneMask);
   return LI;
 }
 
@@ -66,6 +73,8 @@ void LiveRangeEdit::scanRemattable(AliasAnalysis *aa) {
     unsigned Original = VRM->getOriginal(getReg());
     LiveInterval &OrigLI = LIS.getInterval(Original);
     VNInfo *OrigVNI = OrigLI.getVNInfoAt(VNI->def);
+    if (!OrigVNI)
+      continue;
     MachineInstr *DefMI = LIS.getInstructionFromIndex(OrigVNI->def);
     if (!DefMI)
       continue;
@@ -94,7 +103,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
 
     // We can't remat physreg uses, unless it is a constant.
     if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
-      if (MRI.isConstantPhysReg(MO.getReg(), *OrigMI->getParent()->getParent()))
+      if (MRI.isConstantPhysReg(MO.getReg()))
         continue;
       return false;
     }
@@ -227,7 +236,7 @@ bool LiveRangeEdit::useIsKill(const LiveInterval &LI,
   unsigned SubReg = MO.getSubReg();
   LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubReg);
   for (const LiveInterval::SubRange &S : LI.subranges()) {
-    if ((S.LaneMask & LaneMask) != 0 && S.Query(Idx).isKill())
+    if ((S.LaneMask & LaneMask).any() && S.Query(Idx).isKill())
       return true;
   }
   return false;
@@ -263,7 +272,11 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
   bool ReadsPhysRegs = false;
   bool isOrigDef = false;
   unsigned Dest;
-  if (VRM && MI->getOperand(0).isReg()) {
+  // Only optimize rematerialize case when the instruction has one def, since
+  // otherwise we could leave some dead defs in the code.  This case is
+  // extremely rare.
+  if (VRM && MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
+      MI->getDesc().getNumDefs() == 1) {
     Dest = MI->getOperand(0).getReg();
     unsigned Original = VRM->getOriginal(Dest);
     LiveInterval &OrigLI = LIS.getInterval(Original);
@@ -335,6 +348,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
     // allocations of the func are done.
     if (isOrigDef && DeadRemats && TII.isTriviallyReMaterializable(*MI, AA)) {
       LiveInterval &NewLI = createEmptyIntervalFrom(Dest);
+      NewLI.removeEmptySubRanges();
       VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator());
       NewLI.addSegment(LiveInterval::Segment(Idx, Idx.getDeadSlot(), VNI));
       pop_back();
@@ -428,6 +442,9 @@ LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg)
   if (VRM)
     VRM->grow();
 
+  if (Parent && !Parent->isSpillable())
+    LIS.getInterval(VReg).markNotSpillable();
+
   NewRegs.push_back(VReg);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
index 7ee87c1e650f..7a51386aa9ca 100644
--- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -70,15 +70,16 @@ void LiveRegMatrix::releaseMemory() {
   }
 }
 
-template<typename Callable>
-bool foreachUnit(const TargetRegisterInfo *TRI, LiveInterval &VRegInterval,
-                 unsigned PhysReg, Callable Func) {
+template <typename Callable>
+static bool foreachUnit(const TargetRegisterInfo *TRI,
+                        LiveInterval &VRegInterval, unsigned PhysReg,
+                        Callable Func) {
   if (VRegInterval.hasSubRanges()) {
     for (MCRegUnitMaskIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
       unsigned Unit = (*Units).first;
       LaneBitmask Mask = (*Units).second;
       for (LiveInterval::SubRange &S : VRegInterval.subranges()) {
-        if (S.LaneMask & Mask) {
+        if ((S.LaneMask & Mask).any()) {
           if (Func(Unit, S))
             return true;
           break;
diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
index dd87216f5e6b..269b990a3149 100644
--- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp
@@ -643,7 +643,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
   // register before its uses due to dominance properties of SSA (except for PHI
   // nodes, which are treated as a special case).
   MachineBasicBlock *Entry = &MF->front();
-  SmallPtrSet<MachineBasicBlock*,16> Visited;
+  df_iterator_default_set<MachineBasicBlock*,16> Visited;
 
   for (MachineBasicBlock *MBB : depth_first_ext(Entry, Visited)) {
     runOnBlock(MBB, NumRegs);
diff --git a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
index af7392f4435b..e189fb0dd89d 100644
--- a/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/contrib/llvm/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -51,12 +51,21 @@ namespace {
     MachineBasicBlock::iterator MI; // Instr referencing the frame
     int64_t LocalOffset;            // Local offset of the frame idx referenced
     int FrameIdx;                   // The frame index
+
+    // Order reference instruction appears in program. Used to ensure
+    // deterministic order when multiple instructions may reference the same
+    // location.
+    unsigned Order;
+
   public:
-    FrameRef(MachineBasicBlock::iterator I, int64_t Offset, int Idx) :
-      MI(I), LocalOffset(Offset), FrameIdx(Idx) {}
+    FrameRef(MachineInstr *I, int64_t Offset, int Idx, unsigned Ord) :
+      MI(I), LocalOffset(Offset), FrameIdx(Idx), Order(Ord) {}
+
     bool operator<(const FrameRef &RHS) const {
-      return LocalOffset < RHS.LocalOffset;
+      return std::tie(LocalOffset, FrameIdx, Order) <
+             std::tie(RHS.LocalOffset, RHS.FrameIdx, RHS.Order);
     }
+
     MachineBasicBlock::iterator getMachineInstr() const { return MI; }
     int64_t getLocalOffset() const { return LocalOffset; }
     int getFrameIndex() const { return FrameIdx; }
@@ -67,17 +76,17 @@ namespace {
     /// StackObjSet - A set of stack object indexes
     typedef SmallSetVector<int, 8> StackObjSet;
 
-    void AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx, int64_t &Offset,
+    void AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx, int64_t &Offset,
                            bool StackGrowsDown, unsigned &MaxAlign);
     void AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                                SmallSet<int, 16> &ProtectedObjs,
-                               MachineFrameInfo *MFI, bool StackGrowsDown,
+                               MachineFrameInfo &MFI, bool StackGrowsDown,
                                int64_t &Offset, unsigned &MaxAlign);
     void calculateFrameObjectOffsets(MachineFunction &Fn);
     bool insertFrameReferenceRegisters(MachineFunction &Fn);
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LocalStackSlotPass() : MachineFunctionPass(ID) { 
+    explicit LocalStackSlotPass() : MachineFunctionPass(ID) {
       initializeLocalStackSlotPassPass(*PassRegistry::getPassRegistry());
     }
     bool runOnMachineFunction(MachineFunction &MF) override;
@@ -102,9 +111,9 @@ INITIALIZE_PASS_END(LocalStackSlotPass, "localstackalloc",
 
 
 bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  unsigned LocalObjectCount = MFI->getObjectIndexEnd();
+  unsigned LocalObjectCount = MFI.getObjectIndexEnd();
 
   // If the target doesn't want/need this pass, or if there are no locals
   // to consider, early exit.
@@ -112,7 +121,7 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
     return true;
 
   // Make sure we have enough space to store the local offsets.
-  LocalOffsets.resize(MFI->getObjectIndexEnd());
+  LocalOffsets.resize(MFI.getObjectIndexEnd());
 
   // Lay out the local blob.
   calculateFrameObjectOffsets(MF);
@@ -125,21 +134,21 @@ bool LocalStackSlotPass::runOnMachineFunction(MachineFunction &MF) {
   // Otherwise, PEI can do a bit better job of getting the alignment right
   // without a hole at the start since it knows the alignment of the stack
   // at the start of local allocation, and this pass doesn't.
-  MFI->setUseLocalStackAllocationBlock(UsedBaseRegs);
+  MFI.setUseLocalStackAllocationBlock(UsedBaseRegs);
 
   return true;
 }
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
-void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
+void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo &MFI,
                                            int FrameIdx, int64_t &Offset,
                                            bool StackGrowsDown,
                                            unsigned &MaxAlign) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
-  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -154,10 +163,10 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
   // Keep the offset available for base register allocation
   LocalOffsets[FrameIdx] = LocalOffset;
   // And tell MFI about it for PEI to use later
-  MFI->mapLocalFrameObject(FrameIdx, LocalOffset);
+  MFI.mapLocalFrameObject(FrameIdx, LocalOffset);
 
   if (!StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
   ++NumAllocations;
 }
@@ -166,7 +175,7 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
 /// those required to be close to the Stack Protector) to stack offsets.
 void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                                            SmallSet<int, 16> &ProtectedObjs,
-                                           MachineFrameInfo *MFI,
+                                           MachineFrameInfo &MFI,
                                            bool StackGrowsDown, int64_t &Offset,
                                            unsigned &MaxAlign) {
 
@@ -183,7 +192,7 @@ void LocalStackSlotPass::AssignProtectedObjSet(const StackObjSet &UnassignedObjs
 ///
 void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   bool StackGrowsDown =
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
@@ -194,22 +203,22 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI->getStackProtectorIndex() >= 0) {
+  if (MFI.getStackProtectorIndex() >= 0) {
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), Offset,
+    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
                       StackGrowsDown, MaxAlign);
 
     // Assign large stack objects first.
-    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-      if (MFI->isDeadObjectIndex(i))
+    for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+      if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI->getStackProtectorIndex() == (int)i)
+      if (MFI.getStackProtectorIndex() == (int)i)
         continue;
 
-      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
       case StackProtector::SSPLK_None:
         continue;
       case StackProtector::SSPLK_SmallArray:
@@ -235,10 +244,10 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Then assign frame offsets to stack objects that are not used to spill
   // callee saved registers.
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isDeadObjectIndex(i))
+  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+    if (MFI.isDeadObjectIndex(i))
       continue;
-    if (MFI->getStackProtectorIndex() == (int)i)
+    if (MFI.getStackProtectorIndex() == (int)i)
       continue;
     if (ProtectedObjs.count(i))
       continue;
@@ -247,8 +256,8 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   }
 
   // Remember how big this blob of stack space is
-  MFI->setLocalFrameSize(Offset);
-  MFI->setLocalFrameMaxAlign(MaxAlign);
+  MFI.setLocalFrameSize(Offset);
+  MFI.setLocalFrameMaxAlign(MaxAlign);
 }
 
 static inline bool
@@ -273,7 +282,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // and ask the target to create a defining instruction for it.
   bool UsedBaseReg = false;
 
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
   const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
   const TargetFrameLowering &TFI = *Fn.getSubtarget().getFrameLowering();
   bool StackGrowsDown =
@@ -285,6 +294,8 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
   // choose the first one).
   SmallVector<FrameRef, 64> FrameReferenceInsns;
 
+  unsigned Order = 0;
+
   for (MachineBasicBlock &BB : Fn) {
     for (MachineInstr &MI : BB) {
       // Debug value, stackmap and patchpoint instructions can't be out of
@@ -305,21 +316,22 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
         // an object allocated in the local block.
         if (MI.getOperand(i).isFI()) {
           // Don't try this with values not in the local block.
-          if (!MFI->isObjectPreAllocated(MI.getOperand(i).getIndex()))
+          if (!MFI.isObjectPreAllocated(MI.getOperand(i).getIndex()))
             break;
           int Idx = MI.getOperand(i).getIndex();
           int64_t LocalOffset = LocalOffsets[Idx];
           if (!TRI->needsFrameBaseReg(&MI, LocalOffset))
             break;
-          FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx));
+          FrameReferenceInsns.push_back(FrameRef(&MI, LocalOffset, Idx, Order++));
           break;
         }
       }
     }
   }
 
-  // Sort the frame references by local offset
-  array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+  // Sort the frame references by local offset.
+  // Use frame index as a tie-breaker in case MI's have the same offset.
+  std::sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
 
   MachineBasicBlock *Entry = &Fn.front();
 
@@ -332,7 +344,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
     MachineInstr &MI = *FR.getMachineInstr();
     int64_t LocalOffset = FR.getLocalOffset();
     int FrameIdx = FR.getFrameIndex();
-    assert(MFI->isObjectPreAllocated(FrameIdx) &&
+    assert(MFI.isObjectPreAllocated(FrameIdx) &&
            "Only pre-allocated locals expected!");
 
     DEBUG(dbgs() << "Considering: " << MI);
@@ -349,7 +361,7 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
     assert(idx < MI.getNumOperands() && "Cannot find FI operand");
 
     int64_t Offset = 0;
-    int64_t FrameSizeAdjust = StackGrowsDown ? MFI->getLocalFrameSize() : 0;
+    int64_t FrameSizeAdjust = StackGrowsDown ? MFI.getLocalFrameSize() : 0;
 
     DEBUG(dbgs() << "  Replacing FI in: " << MI);
 
diff --git a/contrib/llvm/lib/CodeGen/LowLevelType.cpp b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
new file mode 100644
index 000000000000..d74b7306e0f4
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/LowLevelType.cpp
@@ -0,0 +1,71 @@
+//===-- llvm/CodeGen/GlobalISel/LowLevelType.cpp --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file This file implements the more header-heavy bits of the LLT class to
+/// avoid polluting users' namespaces.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+LLT::LLT(Type &Ty, const DataLayout &DL) {
+  if (auto VTy = dyn_cast<VectorType>(&Ty)) {
+    SizeInBits = VTy->getElementType()->getPrimitiveSizeInBits();
+    ElementsOrAddrSpace = VTy->getNumElements();
+    Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+  } else if (auto PTy = dyn_cast<PointerType>(&Ty)) {
+    Kind = Pointer;
+    SizeInBits = DL.getTypeSizeInBits(&Ty);
+    ElementsOrAddrSpace = PTy->getAddressSpace();
+  } else if (Ty.isSized()) {
+    // Aggregates are no different from real scalars as far as GlobalISel is
+    // concerned.
+    Kind = Scalar;
+    SizeInBits = DL.getTypeSizeInBits(&Ty);
+    ElementsOrAddrSpace = 1;
+    assert(SizeInBits != 0 && "invalid zero-sized type");
+  } else {
+    Kind = Invalid;
+    SizeInBits = ElementsOrAddrSpace = 0;
+  }
+}
+
+LLT::LLT(MVT VT) {
+  if (VT.isVector()) {
+    SizeInBits = VT.getVectorElementType().getSizeInBits();
+    ElementsOrAddrSpace = VT.getVectorNumElements();
+    Kind = ElementsOrAddrSpace == 1 ? Scalar : Vector;
+  } else if (VT.isValid()) {
+    // Aggregates are no different from real scalars as far as GlobalISel is
+    // concerned.
+    Kind = Scalar;
+    SizeInBits = VT.getSizeInBits();
+    ElementsOrAddrSpace = 1;
+    assert(SizeInBits != 0 && "invalid zero-sized type");
+  } else {
+    Kind = Invalid;
+    SizeInBits = ElementsOrAddrSpace = 0;
+  }
+}
+
+void LLT::print(raw_ostream &OS) const {
+  if (isVector())
+    OS << "<" << ElementsOrAddrSpace << " x s" << SizeInBits << ">";
+  else if (isPointer())
+    OS << "p" << getAddressSpace();
+  else if (isValid()) {
+    assert(isScalar() && "unexpected type");
+    OS << "s" << getScalarSizeInBits();
+  } else
+    llvm_unreachable("trying to print an invalid type");
+}
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
index 6e3de52f1a9c..1f1ce6e8d725 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.cpp
@@ -173,14 +173,20 @@ static Cursor lexName(Cursor C, MIToken &Token, MIToken::TokenKind Type,
   return C;
 }
 
-static Cursor maybeLexIntegerType(Cursor C, MIToken &Token) {
-  if (C.peek() != 'i' || !isdigit(C.peek(1)))
+static Cursor maybeLexIntegerOrScalarType(Cursor C, MIToken &Token) {
+  if ((C.peek() != 'i' && C.peek() != 's' && C.peek() != 'p') ||
+      !isdigit(C.peek(1)))
     return None;
+  char Kind = C.peek();
   auto Range = C;
-  C.advance(); // Skip 'i'
+  C.advance(); // Skip 'i', 's', or 'p'
   while (isdigit(C.peek()))
     C.advance();
-  Token.reset(MIToken::IntegerType, Range.upto(C));
+
+  Token.reset(Kind == 'i'
+                  ? MIToken::IntegerType
+                  : (Kind == 's' ? MIToken::ScalarType : MIToken::PointerType),
+              Range.upto(C));
   return C;
 }
 
@@ -199,12 +205,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("tied-def", MIToken::kw_tied_def)
       .Case("frame-setup", MIToken::kw_frame_setup)
       .Case("debug-location", MIToken::kw_debug_location)
-      .Case(".cfi_same_value", MIToken::kw_cfi_same_value)
-      .Case(".cfi_offset", MIToken::kw_cfi_offset)
-      .Case(".cfi_def_cfa_register", MIToken::kw_cfi_def_cfa_register)
-      .Case(".cfi_def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
-      .Case(".cfi_def_cfa", MIToken::kw_cfi_def_cfa)
+      .Case("same_value", MIToken::kw_cfi_same_value)
+      .Case("offset", MIToken::kw_cfi_offset)
+      .Case("def_cfa_register", MIToken::kw_cfi_def_cfa_register)
+      .Case("def_cfa_offset", MIToken::kw_cfi_def_cfa_offset)
+      .Case("def_cfa", MIToken::kw_cfi_def_cfa)
       .Case("blockaddress", MIToken::kw_blockaddress)
+      .Case("intrinsic", MIToken::kw_intrinsic)
       .Case("target-index", MIToken::kw_target_index)
       .Case("half", MIToken::kw_half)
       .Case("float", MIToken::kw_float)
@@ -215,6 +222,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("target-flags", MIToken::kw_target_flags)
       .Case("volatile", MIToken::kw_volatile)
       .Case("non-temporal", MIToken::kw_non_temporal)
+      .Case("dereferenceable", MIToken::kw_dereferenceable)
       .Case("invariant", MIToken::kw_invariant)
       .Case("align", MIToken::kw_align)
       .Case("stack", MIToken::kw_stack)
@@ -227,11 +235,13 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
       .Case("landing-pad", MIToken::kw_landing_pad)
       .Case("liveins", MIToken::kw_liveins)
       .Case("successors", MIToken::kw_successors)
+      .Case("floatpred", MIToken::kw_floatpred)
+      .Case("intpred", MIToken::kw_intpred)
       .Default(MIToken::Identifier);
 }
 
 static Cursor maybeLexIdentifier(Cursor C, MIToken &Token) {
-  if (!isalpha(C.peek()) && C.peek() != '_' && C.peek() != '.')
+  if (!isalpha(C.peek()) && C.peek() != '_')
     return None;
   auto Range = C;
   while (isIdentifierChar(C.peek()))
@@ -366,6 +376,11 @@ static Cursor lexVirtualRegister(Cursor C, MIToken &Token) {
   return C;
 }
 
+/// Returns true for a character allowed in a register name.
+static bool isRegisterChar(char C) {
+  return isIdentifierChar(C) && C != '.';
+}
+
 static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
   if (C.peek() != '%')
     return None;
@@ -373,7 +388,7 @@ static Cursor maybeLexRegister(Cursor C, MIToken &Token) {
     return lexVirtualRegister(C, Token);
   auto Range = C;
   C.advance(); // Skip '%'
-  while (isIdentifierChar(C.peek()))
+  while (isRegisterChar(C.peek()))
     C.advance();
   Token.reset(MIToken::NamedRegister, Range.upto(C))
       .setStringValue(Range.upto(C).drop_front(1)); // Drop the '%'
@@ -409,19 +424,6 @@ static bool isValidHexFloatingPointPrefix(char C) {
   return C == 'H' || C == 'K' || C == 'L' || C == 'M';
 }
 
-static Cursor maybeLexHexFloatingPointLiteral(Cursor C, MIToken &Token) {
-  if (C.peek() != '0' || C.peek(1) != 'x')
-    return None;
-  Cursor Range = C;
-  C.advance(2); // Skip '0x'
-  if (isValidHexFloatingPointPrefix(C.peek()))
-    C.advance();
-  while (isxdigit(C.peek()))
-    C.advance();
-  Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
-  return C;
-}
-
 static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
   C.advance();
   // Skip over [0-9]*([eE][-+]?[0-9]+)?
@@ -438,6 +440,28 @@ static Cursor lexFloatingPointLiteral(Cursor Range, Cursor C, MIToken &Token) {
   return C;
 }
 
+static Cursor maybeLexHexadecimalLiteral(Cursor C, MIToken &Token) {
+  if (C.peek() != '0' || (C.peek(1) != 'x' && C.peek(1) != 'X'))
+    return None;
+  Cursor Range = C;
+  C.advance(2);
+  unsigned PrefLen = 2;
+  if (isValidHexFloatingPointPrefix(C.peek())) {
+    C.advance();
+    PrefLen++;
+  }
+  while (isxdigit(C.peek()))
+    C.advance();
+  StringRef StrVal = Range.upto(C);
+  if (StrVal.size() <= PrefLen)
+    return None;
+  if (PrefLen == 2)
+    Token.reset(MIToken::HexLiteral, Range.upto(C));
+  else // It must be 3, which means that there was a floating-point prefix.
+    Token.reset(MIToken::FloatingPointLiteral, Range.upto(C));
+  return C;
+}
+
 static Cursor maybeLexNumericalLiteral(Cursor C, MIToken &Token) {
   if (!isdigit(C.peek()) && (C.peek() != '-' || !isdigit(C.peek(1))))
     return None;
@@ -485,6 +509,8 @@ static MIToken::TokenKind symbolToken(char C) {
   switch (C) {
   case ',':
     return MIToken::comma;
+  case '.':
+    return MIToken::dot;
   case '=':
     return MIToken::equal;
   case ':':
@@ -566,7 +592,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
     return C.remaining();
   }
 
-  if (Cursor R = maybeLexIntegerType(C, Token))
+  if (Cursor R = maybeLexIntegerOrScalarType(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexMachineBasicBlock(C, Token, ErrorCallback))
     return R.remaining();
@@ -592,7 +618,7 @@ StringRef llvm::lexMIToken(StringRef Source, MIToken &Token,
     return R.remaining();
   if (Cursor R = maybeLexExternalSymbol(C, Token, ErrorCallback))
     return R.remaining();
-  if (Cursor R = maybeLexHexFloatingPointLiteral(C, Token))
+  if (Cursor R = maybeLexHexadecimalLiteral(C, Token))
     return R.remaining();
   if (Cursor R = maybeLexNumericalLiteral(C, Token))
     return R.remaining();
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
index 32fc8ab271e6..edba749b5fce 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MILexer.h
@@ -38,6 +38,7 @@ struct MIToken {
     underscore,
     colon,
     coloncolon,
+    dot,
     exclaim,
     lparen,
     rparen,
@@ -53,6 +54,7 @@ struct MIToken {
     kw_implicit_define,
     kw_def,
     kw_dead,
+    kw_dereferenceable,
     kw_killed,
     kw_undef,
     kw_internal,
@@ -67,6 +69,7 @@ struct MIToken {
     kw_cfi_def_cfa_offset,
     kw_cfi_def_cfa,
     kw_blockaddress,
+    kw_intrinsic,
     kw_target_index,
     kw_half,
     kw_float,
@@ -89,6 +92,8 @@ struct MIToken {
     kw_landing_pad,
     kw_liveins,
     kw_successors,
+    kw_floatpred,
+    kw_intpred,
 
     // Named metadata keywords
     md_tbaa,
@@ -102,6 +107,8 @@ struct MIToken {
     NamedRegister,
     MachineBasicBlockLabel,
     MachineBasicBlock,
+    PointerType,
+    ScalarType,
     StackObject,
     FixedStackObject,
     NamedGlobalValue,
@@ -111,6 +118,7 @@ struct MIToken {
     // Other tokens
     IntegerLiteral,
     FloatingPointLiteral,
+    HexLiteral,
     VirtualRegister,
     ConstantPoolItem,
     JumpTableIndex,
@@ -160,7 +168,7 @@ public:
 
   bool isMemoryOperandFlag() const {
     return Kind == kw_volatile || Kind == kw_non_temporal ||
-           Kind == kw_invariant;
+           Kind == kw_dereferenceable || Kind == kw_invariant;
   }
 
   bool is(TokenKind K) const { return Kind == K; }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
index b3fd16f15889..c8bed0890dd6 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.cpp
@@ -14,6 +14,7 @@
 #include "MIParser.h"
 #include "MILexer.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/AsmParser/SlotMapping.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -26,13 +27,16 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/ValueSymbolTable.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cctype>
 
 using namespace llvm;
 
@@ -41,6 +45,17 @@ PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
   : MF(MF), SM(&SM), IRSlots(IRSlots) {
 }
 
+VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
+  auto I = VRegInfos.insert(std::make_pair(Num, nullptr));
+  if (I.second) {
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    VRegInfo *Info = new (Allocator) VRegInfo;
+    Info->VReg = MRI.createIncompleteVirtualRegister();
+    I.first->second = Info;
+  }
+  return *I.first->second;
+}
+
 namespace {
 
 /// A wrapper struct around the 'MachineOperand' struct that includes a source
@@ -65,7 +80,7 @@ class MIParser {
   SMDiagnostic &Error;
   StringRef Source, CurrentSource;
   MIToken Token;
-  const PerFunctionMIParsingState &PFS;
+  PerFunctionMIParsingState &PFS;
   /// Maps from instruction names to op codes.
   StringMap<unsigned> Names2InstrOpCodes;
   /// Maps from register names to registers.
@@ -86,7 +101,7 @@ class MIParser {
   StringMap<unsigned> Names2BitmaskTargetFlags;
 
 public:
-  MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+  MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
            StringRef Source);
 
   /// \p SkipChar gives the number of characters to skip before looking
@@ -109,7 +124,8 @@ public:
   bool parse(MachineInstr *&MI);
   bool parseStandaloneMBB(MachineBasicBlock *&MBB);
   bool parseStandaloneNamedRegister(unsigned &Reg);
-  bool parseStandaloneVirtualRegister(unsigned &Reg);
+  bool parseStandaloneVirtualRegister(VRegInfo *&Info);
+  bool parseStandaloneRegister(unsigned &Reg);
   bool parseStandaloneStackObject(int &FI);
   bool parseStandaloneMDNode(MDNode *&Node);
 
@@ -119,21 +135,19 @@ public:
   bool parseBasicBlockLiveins(MachineBasicBlock &MBB);
   bool parseBasicBlockSuccessors(MachineBasicBlock &MBB);
 
-  bool parseRegister(unsigned &Reg);
+  bool parseNamedRegister(unsigned &Reg);
+  bool parseVirtualRegister(VRegInfo *&Info);
+  bool parseRegister(unsigned &Reg, VRegInfo *&VRegInfo);
   bool parseRegisterFlag(unsigned &Flags);
   bool parseSubRegisterIndex(unsigned &SubReg);
   bool parseRegisterTiedDefIndex(unsigned &TiedDefIdx);
-  bool parseSize(unsigned &Size);
   bool parseRegisterOperand(MachineOperand &Dest,
                             Optional<unsigned> &TiedDefIdx, bool IsDef = false);
   bool parseImmediateOperand(MachineOperand &Dest);
   bool parseIRConstant(StringRef::iterator Loc, StringRef Source,
                        const Constant *&C);
   bool parseIRConstant(StringRef::iterator Loc, const Constant *&C);
-  bool parseIRType(StringRef::iterator Loc, StringRef Source, unsigned &Read,
-                   Type *&Ty);
-  // \p MustBeSized defines whether or not \p Ty must be sized.
-  bool parseIRType(StringRef::iterator Loc, Type *&Ty, bool MustBeSized = true);
+  bool parseLowLevelType(StringRef::iterator Loc, LLT &Ty);
   bool parseTypedImmediateOperand(MachineOperand &Dest);
   bool parseFPImmediateOperand(MachineOperand &Dest);
   bool parseMBBReference(MachineBasicBlock *&MBB);
@@ -155,6 +169,8 @@ public:
   bool parseCFIOperand(MachineOperand &Dest);
   bool parseIRBlock(BasicBlock *&BB, const Function &F);
   bool parseBlockAddressOperand(MachineOperand &Dest);
+  bool parseIntrinsicOperand(MachineOperand &Dest);
+  bool parsePredicateOperand(MachineOperand &Dest);
   bool parseTargetIndexOperand(MachineOperand &Dest);
   bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
   bool parseMachineOperand(MachineOperand &Dest,
@@ -181,6 +197,12 @@ private:
   /// Return true if an error occurred.
   bool getUint64(uint64_t &Result);
 
+  /// Convert the hexadecimal literal in the current token into an unsigned
+  ///  APInt with a minimum bitwidth required to represent the value.
+  ///
+  /// Return true if the literal does not represent an integer value.
+  bool getHexUint(APInt &Result);
+
   /// If the current token is of the given kind, consume it and return false.
   /// Otherwise report an error and return true.
   bool expectAndConsume(MIToken::TokenKind TokenKind);
@@ -254,7 +276,7 @@ private:
 
 } // end anonymous namespace
 
-MIParser::MIParser(const PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
+MIParser::MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
                    StringRef Source)
     : MF(PFS.MF), Error(Error), Source(Source), CurrentSource(Source), PFS(PFS)
 {}
@@ -362,7 +384,7 @@ bool MIParser::parseBasicBlockDefinition(
 
   if (!Name.empty()) {
     BB = dyn_cast_or_null<BasicBlock>(
-        MF.getFunction()->getValueSymbolTable().lookup(Name));
+        MF.getFunction()->getValueSymbolTable()->lookup(Name));
     if (!BB)
       return error(Loc, Twine("basic block '") + Name +
                             "' is not defined in the function '" +
@@ -437,10 +459,24 @@ bool MIParser::parseBasicBlockLiveins(MachineBasicBlock &MBB) {
     if (Token.isNot(MIToken::NamedRegister))
       return error("expected a named register");
     unsigned Reg = 0;
-    if (parseRegister(Reg))
+    if (parseNamedRegister(Reg))
       return true;
-    MBB.addLiveIn(Reg);
     lex();
+    LaneBitmask Mask = LaneBitmask::getAll();
+    if (consumeIfPresent(MIToken::colon)) {
+      // Parse lane mask.
+      if (Token.isNot(MIToken::IntegerLiteral) &&
+          Token.isNot(MIToken::HexLiteral))
+        return error("expected a lane mask");
+      static_assert(sizeof(LaneBitmask::Type) == sizeof(unsigned),
+                    "Use correct get-function for lane mask");
+      LaneBitmask::Type V;
+      if (getUnsigned(V))
+        return error("invalid lane mask value");
+      Mask = LaneBitmask(V);
+      lex();
+    }
+    MBB.addLiveIn(Reg, Mask);
   } while (consumeIfPresent(MIToken::comma));
   return false;
 }
@@ -461,7 +497,8 @@ bool MIParser::parseBasicBlockSuccessors(MachineBasicBlock &MBB) {
     lex();
     unsigned Weight = 0;
     if (consumeIfPresent(MIToken::lparen)) {
-      if (Token.isNot(MIToken::IntegerLiteral))
+      if (Token.isNot(MIToken::IntegerLiteral) &&
+          Token.isNot(MIToken::HexLiteral))
         return error("expected an integer literal after '('");
       if (getUnsigned(Weight))
         return true;
@@ -597,14 +634,6 @@ bool MIParser::parse(MachineInstr *&MI) {
   if (Token.isError() || parseInstruction(OpCode, Flags))
     return true;
 
-  Type *Ty = nullptr;
-  if (isPreISelGenericOpcode(OpCode)) {
-    // For generic opcode, a type is mandatory.
-    auto Loc = Token.location();
-    if (parseIRType(Loc, Ty))
-      return true;
-  }
-
   // Parse the remaining machine operands.
   while (!Token.isNewlineOrEOF() && Token.isNot(MIToken::kw_debug_location) &&
          Token.isNot(MIToken::coloncolon) && Token.isNot(MIToken::lbrace)) {
@@ -660,8 +689,6 @@ bool MIParser::parse(MachineInstr *&MI) {
   // TODO: Check for extraneous machine operands.
   MI = MF.CreateMachineInstr(MCID, DebugLocation, /*NoImplicit=*/true);
   MI->setFlags(Flags);
-  if (Ty)
-    MI->setType(Ty);
   for (const auto &Operand : Operands)
     MI->addOperand(MF, Operand.Operand);
   if (assignRegisterTies(*MI, Operands))
@@ -692,7 +719,7 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
   lex();
   if (Token.isNot(MIToken::NamedRegister))
     return error("expected a named register");
-  if (parseRegister(Reg))
+  if (parseNamedRegister(Reg))
     return true;
   lex();
   if (Token.isNot(MIToken::Eof))
@@ -700,12 +727,28 @@ bool MIParser::parseStandaloneNamedRegister(unsigned &Reg) {
   return false;
 }
 
-bool MIParser::parseStandaloneVirtualRegister(unsigned &Reg) {
+bool MIParser::parseStandaloneVirtualRegister(VRegInfo *&Info) {
   lex();
   if (Token.isNot(MIToken::VirtualRegister))
     return error("expected a virtual register");
-  if (parseRegister(Reg))
+  if (parseVirtualRegister(Info))
+    return true;
+  lex();
+  if (Token.isNot(MIToken::Eof))
+    return error("expected end of string after the register reference");
+  return false;
+}
+
+bool MIParser::parseStandaloneRegister(unsigned &Reg) {
+  lex();
+  if (Token.isNot(MIToken::NamedRegister) &&
+      Token.isNot(MIToken::VirtualRegister))
+    return error("expected either a named or virtual register");
+
+  VRegInfo *Info;
+  if (parseRegister(Reg, Info))
     return true;
+
   lex();
   if (Token.isNot(MIToken::Eof))
     return error("expected end of string after the register reference");
@@ -800,33 +843,39 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
   return false;
 }
 
-bool MIParser::parseRegister(unsigned &Reg) {
+bool MIParser::parseNamedRegister(unsigned &Reg) {
+  assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
+  StringRef Name = Token.stringValue();
+  if (getRegisterByName(Name, Reg))
+    return error(Twine("unknown register name '") + Name + "'");
+  return false;
+}
+
+bool MIParser::parseVirtualRegister(VRegInfo *&Info) {
+  assert(Token.is(MIToken::VirtualRegister) && "Needs VirtualRegister token");
+  unsigned ID;
+  if (getUnsigned(ID))
+    return true;
+  Info = &PFS.getVRegInfo(ID);
+  return false;
+}
+
+bool MIParser::parseRegister(unsigned &Reg, VRegInfo *&Info) {
   switch (Token.kind()) {
   case MIToken::underscore:
     Reg = 0;
-    break;
-  case MIToken::NamedRegister: {
-    StringRef Name = Token.stringValue();
-    if (getRegisterByName(Name, Reg))
-      return error(Twine("unknown register name '") + Name + "'");
-    break;
-  }
-  case MIToken::VirtualRegister: {
-    unsigned ID;
-    if (getUnsigned(ID))
+    return false;
+  case MIToken::NamedRegister:
+    return parseNamedRegister(Reg);
+  case MIToken::VirtualRegister:
+    if (parseVirtualRegister(Info))
       return true;
-    const auto RegInfo = PFS.VirtualRegisterSlots.find(ID);
-    if (RegInfo == PFS.VirtualRegisterSlots.end())
-      return error(Twine("use of undefined virtual register '%") + Twine(ID) +
-                   "'");
-    Reg = RegInfo->second;
-    break;
-  }
+    Reg = Info->VReg;
+    return false;
   // TODO: Parse other register kinds.
   default:
     llvm_unreachable("The current token should be a register");
   }
-  return false;
 }
 
 bool MIParser::parseRegisterFlag(unsigned &Flags) {
@@ -871,10 +920,10 @@ bool MIParser::parseRegisterFlag(unsigned &Flags) {
 }
 
 bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
-  assert(Token.is(MIToken::colon));
+  assert(Token.is(MIToken::dot));
   lex();
   if (Token.isNot(MIToken::Identifier))
-    return error("expected a subregister index after ':'");
+    return error("expected a subregister index after '.'");
   auto Name = Token.stringValue();
   SubReg = getSubRegIndex(Name);
   if (!SubReg)
@@ -885,7 +934,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
 
 bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
   if (!consumeIfPresent(MIToken::kw_tied_def))
-    return error("expected 'tied-def' after '('");
+    return true;
   if (Token.isNot(MIToken::IntegerLiteral))
     return error("expected an integer literal after 'tied-def'");
   if (getUnsigned(TiedDefIdx))
@@ -896,17 +945,6 @@ bool MIParser::parseRegisterTiedDefIndex(unsigned &TiedDefIdx) {
   return false;
 }
 
-bool MIParser::parseSize(unsigned &Size) {
-  if (Token.isNot(MIToken::IntegerLiteral))
-    return error("expected an integer literal for the size");
-  if (getUnsigned(Size))
-    return true;
-  lex();
-  if (expectAndConsume(MIToken::rparen))
-    return true;
-  return false;
-}
-
 bool MIParser::assignRegisterTies(MachineInstr &MI,
                                   ArrayRef<ParsedMachineOperand> Operands) {
   SmallVector<std::pair<unsigned, unsigned>, 4> TiedRegisterPairs;
@@ -947,7 +985,6 @@ bool MIParser::assignRegisterTies(MachineInstr &MI,
 bool MIParser::parseRegisterOperand(MachineOperand &Dest,
                                     Optional<unsigned> &TiedDefIdx,
                                     bool IsDef) {
-  unsigned Reg;
   unsigned Flags = IsDef ? RegState::Define : 0;
   while (Token.isRegisterFlag()) {
     if (parseRegisterFlag(Flags))
@@ -955,38 +992,62 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
   }
   if (!Token.isRegister())
     return error("expected a register after register flags");
-  if (parseRegister(Reg))
+  unsigned Reg;
+  VRegInfo *RegInfo;
+  if (parseRegister(Reg, RegInfo))
     return true;
   lex();
   unsigned SubReg = 0;
-  if (Token.is(MIToken::colon)) {
+  if (Token.is(MIToken::dot)) {
     if (parseSubRegisterIndex(SubReg))
       return true;
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
       return error("subregister index expects a virtual register");
   }
+  MachineRegisterInfo &MRI = MF.getRegInfo();
   if ((Flags & RegState::Define) == 0) {
     if (consumeIfPresent(MIToken::lparen)) {
       unsigned Idx;
-      if (parseRegisterTiedDefIndex(Idx))
-        return true;
-      TiedDefIdx = Idx;
+      if (!parseRegisterTiedDefIndex(Idx))
+        TiedDefIdx = Idx;
+      else {
+        // Try a redundant low-level type.
+        LLT Ty;
+        if (parseLowLevelType(Token.location(), Ty))
+          return error("expected tied-def or low-level type after '('");
+
+        if (expectAndConsume(MIToken::rparen))
+          return true;
+
+        if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+          return error("inconsistent type for generic virtual register");
+
+        MRI.setType(Reg, Ty);
+      }
     }
   } else if (consumeIfPresent(MIToken::lparen)) {
-    // Virtual registers may have a size with GlobalISel.
+    // Virtual registers may have a tpe with GlobalISel.
     if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      return error("unexpected size on physical register");
-    unsigned Size;
-    if (parseSize(Size))
+      return error("unexpected type on physical register");
+
+    LLT Ty;
+    if (parseLowLevelType(Token.location(), Ty))
       return true;
 
-    MachineRegisterInfo &MRI = MF.getRegInfo();
-    MRI.setSize(Reg, Size);
-  } else if (PFS.GenericVRegs.count(Reg)) {
-    // Generic virtual registers must have a size.
-    // If we end up here this means the size hasn't been specified and
+    if (expectAndConsume(MIToken::rparen))
+      return true;
+
+    if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
+      return error("inconsistent type for generic virtual register");
+
+    MRI.setType(Reg, Ty);
+  } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+    // Generic virtual registers must have a type.
+    // If we end up here this means the type hasn't been specified and
     // this is bad!
-    return error("generic virtual registers must have a size");
+    if (RegInfo->Kind == VRegInfo::GENERIC ||
+        RegInfo->Kind == VRegInfo::REGBANK)
+      return error("generic virtual registers must have a type");
   }
   Dest = MachineOperand::CreateReg(
       Reg, Flags & RegState::Define, Flags & RegState::Implicit,
@@ -1010,7 +1071,7 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, StringRef StringValue,
                                const Constant *&C) {
   auto Source = StringValue.str(); // The source has to be null terminated.
   SMDiagnostic Err;
-  C = parseConstantValue(Source.c_str(), Err, *MF.getFunction()->getParent(),
+  C = parseConstantValue(Source, Err, *MF.getFunction()->getParent(),
                          &PFS.IRSlots);
   if (!C)
     return error(Loc + Err.getColumnNo(), Err.getMessage());
@@ -1024,35 +1085,45 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
   return false;
 }
 
-bool MIParser::parseIRType(StringRef::iterator Loc, StringRef StringValue,
-                           unsigned &Read, Type *&Ty) {
-  auto Source = StringValue.str(); // The source has to be null terminated.
-  SMDiagnostic Err;
-  Ty = parseTypeAtBeginning(Source.c_str(), Read, Err,
-                            *MF.getFunction()->getParent(), &PFS.IRSlots);
-  if (!Ty)
-    return error(Loc + Err.getColumnNo(), Err.getMessage());
-  return false;
-}
+bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
+  if (Token.is(MIToken::ScalarType)) {
+    Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+    lex();
+    return false;
+  } else if (Token.is(MIToken::PointerType)) {
+    const DataLayout &DL = MF.getFunction()->getParent()->getDataLayout();
+    unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+    Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
+    lex();
+    return false;
+  }
 
-bool MIParser::parseIRType(StringRef::iterator Loc, Type *&Ty,
-                           bool MustBeSized) {
-  // At this point we enter in the IR world, i.e., to get the correct type,
-  // we need to hand off the whole string, not just the current token.
-  // E.g., <4 x i64> would give '<' as a token and there is not much
-  // the IR parser can do with that.
-  unsigned Read = 0;
-  if (parseIRType(Loc, StringRef(Loc), Read, Ty))
-    return true;
-  // The type must be sized, otherwise there is not much the backend
-  // can do with it.
-  if (MustBeSized && !Ty->isSized())
-    return error("expected a sized type");
-  // The next token is Read characters from the Loc.
-  // However, the current location is not Loc, but Loc + the length of Token.
-  // Therefore, subtract the length of Token (range().end() - Loc) to the
-  // number of characters to skip before the next token.
-  lex(Read - (Token.range().end() - Loc));
+  // Now we're looking for a vector.
+  if (Token.isNot(MIToken::less))
+    return error(Loc,
+                 "expected unsized, pN, sN or <N x sM> for GlobalISel type");
+
+  lex();
+
+  if (Token.isNot(MIToken::IntegerLiteral))
+    return error(Loc, "expected <N x sM> for vctor type");
+  uint64_t NumElements = Token.integerValue().getZExtValue();
+  lex();
+
+  if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
+    return error(Loc, "expected '<N x sM>' for vector type");
+  lex();
+
+  if (Token.isNot(MIToken::ScalarType))
+    return error(Loc, "expected '<N x sM>' for vector type");
+  uint64_t ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+  lex();
+
+  if (Token.isNot(MIToken::greater))
+    return error(Loc, "expected '<N x sM>' for vector type");
+  lex();
+
+  Ty = LLT::vector(NumElements, ScalarSize);
   return false;
 }
 
@@ -1072,7 +1143,8 @@ bool MIParser::parseTypedImmediateOperand(MachineOperand &Dest) {
 bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
   auto Loc = Token.location();
   lex();
-  if (Token.isNot(MIToken::FloatingPointLiteral))
+  if (Token.isNot(MIToken::FloatingPointLiteral) &&
+      Token.isNot(MIToken::HexLiteral))
     return error("expected a floating point literal");
   const Constant *C = nullptr;
   if (parseIRConstant(Loc, C))
@@ -1082,13 +1154,24 @@ bool MIParser::parseFPImmediateOperand(MachineOperand &Dest) {
 }
 
 bool MIParser::getUnsigned(unsigned &Result) {
-  assert(Token.hasIntegerValue() && "Expected a token with an integer value");
-  const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
-  uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
-  if (Val64 == Limit)
-    return error("expected 32-bit integer (too large)");
-  Result = Val64;
-  return false;
+  if (Token.hasIntegerValue()) {
+    const uint64_t Limit = uint64_t(std::numeric_limits<unsigned>::max()) + 1;
+    uint64_t Val64 = Token.integerValue().getLimitedValue(Limit);
+    if (Val64 == Limit)
+      return error("expected 32-bit integer (too large)");
+    Result = Val64;
+    return false;
+  }
+  if (Token.is(MIToken::HexLiteral)) {
+    APInt A;
+    if (getHexUint(A))
+      return true;
+    if (A.getBitWidth() > 32)
+      return error("expected 32-bit integer (too large)");
+    Result = A.getZExtValue();
+    return false;
+  }
+  return true;
 }
 
 bool MIParser::parseMBBReference(MachineBasicBlock *&MBB) {
@@ -1128,7 +1211,7 @@ bool MIParser::parseStackFrameIndex(int &FI) {
                  "'");
   StringRef Name;
   if (const auto *Alloca =
-          MF.getFrameInfo()->getObjectAllocation(ObjectInfo->second))
+          MF.getFrameInfo().getObjectAllocation(ObjectInfo->second))
     Name = Alloca->getName();
   if (!Token.stringValue().empty() && Token.stringValue() != Name)
     return error(Twine("the name of the stack object '%stack.") + Twine(ID) +
@@ -1293,7 +1376,7 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
   if (Token.isNot(MIToken::NamedRegister))
     return error("expected a cfi register");
   unsigned LLVMReg;
-  if (parseRegister(LLVMReg))
+  if (parseNamedRegister(LLVMReg))
     return true;
   const auto *TRI = MF.getSubtarget().getRegisterInfo();
   assert(TRI && "Expected target register info");
@@ -1308,7 +1391,6 @@ bool MIParser::parseCFIRegister(unsigned &Reg) {
 bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   auto Kind = Token.kind();
   lex();
-  auto &MMI = MF.getMMI();
   int Offset;
   unsigned Reg;
   unsigned CFIIndex;
@@ -1316,27 +1398,26 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
   case MIToken::kw_cfi_same_value:
     if (parseCFIRegister(Reg))
       return true;
-    CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
+    CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(nullptr, Reg));
     break;
   case MIToken::kw_cfi_offset:
     if (parseCFIRegister(Reg) || expectAndConsume(MIToken::comma) ||
         parseCFIOffset(Offset))
       return true;
     CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
+        MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, Reg, Offset));
     break;
   case MIToken::kw_cfi_def_cfa_register:
     if (parseCFIRegister(Reg))
       return true;
     CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
+        MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
     break;
   case MIToken::kw_cfi_def_cfa_offset:
     if (parseCFIOffset(Offset))
       return true;
     // NB: MCCFIInstruction::createDefCfaOffset negates the offset.
-    CFIIndex = MMI.addFrameInst(
+    CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
     break;
   case MIToken::kw_cfi_def_cfa:
@@ -1345,7 +1426,7 @@ bool MIParser::parseCFIOperand(MachineOperand &Dest) {
       return true;
     // NB: MCCFIInstruction::createDefCfa negates the offset.
     CFIIndex =
-        MMI.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
+        MF.addFrameInst(MCCFIInstruction::createDefCfa(nullptr, Reg, -Offset));
     break;
   default:
     // TODO: Parse the other CFI operands.
@@ -1359,7 +1440,7 @@ bool MIParser::parseIRBlock(BasicBlock *&BB, const Function &F) {
   switch (Token.kind()) {
   case MIToken::NamedIRBlock: {
     BB = dyn_cast_or_null<BasicBlock>(
-        F.getValueSymbolTable().lookup(Token.stringValue()));
+        F.getValueSymbolTable()->lookup(Token.stringValue()));
     if (!BB)
       return error(Twine("use of undefined IR block '") + Token.range() + "'");
     break;
@@ -1411,6 +1492,93 @@ bool MIParser::parseBlockAddressOperand(MachineOperand &Dest) {
   return false;
 }
 
+bool MIParser::parseIntrinsicOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_intrinsic));
+  lex();
+  if (expectAndConsume(MIToken::lparen))
+    return error("expected syntax intrinsic(@llvm.whatever)");
+
+  if (Token.isNot(MIToken::NamedGlobalValue))
+    return error("expected syntax intrinsic(@llvm.whatever)");
+
+  std::string Name = Token.stringValue();
+  lex();
+
+  if (expectAndConsume(MIToken::rparen))
+    return error("expected ')' to terminate intrinsic name");
+
+  // Find out what intrinsic we're dealing with, first try the global namespace
+  // and then the target's private intrinsics if that fails.
+  const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+  Intrinsic::ID ID = Function::lookupIntrinsicID(Name);
+  if (ID == Intrinsic::not_intrinsic && TII)
+    ID = static_cast<Intrinsic::ID>(TII->lookupName(Name));
+
+  if (ID == Intrinsic::not_intrinsic)
+    return error("unknown intrinsic name");
+  Dest = MachineOperand::CreateIntrinsicID(ID);
+
+  return false;
+}
+
+bool MIParser::parsePredicateOperand(MachineOperand &Dest) {
+  assert(Token.is(MIToken::kw_intpred) || Token.is(MIToken::kw_floatpred));
+  bool IsFloat = Token.is(MIToken::kw_floatpred);
+  lex();
+
+  if (expectAndConsume(MIToken::lparen))
+    return error("expected syntax intpred(whatever) or floatpred(whatever");
+
+  if (Token.isNot(MIToken::Identifier))
+    return error("whatever");
+
+  CmpInst::Predicate Pred;
+  if (IsFloat) {
+    Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+               .Case("false", CmpInst::FCMP_FALSE)
+               .Case("oeq", CmpInst::FCMP_OEQ)
+               .Case("ogt", CmpInst::FCMP_OGT)
+               .Case("oge", CmpInst::FCMP_OGE)
+               .Case("olt", CmpInst::FCMP_OLT)
+               .Case("ole", CmpInst::FCMP_OLE)
+               .Case("one", CmpInst::FCMP_ONE)
+               .Case("ord", CmpInst::FCMP_ORD)
+               .Case("uno", CmpInst::FCMP_UNO)
+               .Case("ueq", CmpInst::FCMP_UEQ)
+               .Case("ugt", CmpInst::FCMP_UGT)
+               .Case("uge", CmpInst::FCMP_UGE)
+               .Case("ult", CmpInst::FCMP_ULT)
+               .Case("ule", CmpInst::FCMP_ULE)
+               .Case("une", CmpInst::FCMP_UNE)
+               .Case("true", CmpInst::FCMP_TRUE)
+               .Default(CmpInst::BAD_FCMP_PREDICATE);
+    if (!CmpInst::isFPPredicate(Pred))
+      return error("invalid floating-point predicate");
+  } else {
+    Pred = StringSwitch<CmpInst::Predicate>(Token.stringValue())
+               .Case("eq", CmpInst::ICMP_EQ)
+               .Case("ne", CmpInst::ICMP_NE)
+               .Case("sgt", CmpInst::ICMP_SGT)
+               .Case("sge", CmpInst::ICMP_SGE)
+               .Case("slt", CmpInst::ICMP_SLT)
+               .Case("sle", CmpInst::ICMP_SLE)
+               .Case("ugt", CmpInst::ICMP_UGT)
+               .Case("uge", CmpInst::ICMP_UGE)
+               .Case("ult", CmpInst::ICMP_ULT)
+               .Case("ule", CmpInst::ICMP_ULE)
+               .Default(CmpInst::BAD_ICMP_PREDICATE);
+    if (!CmpInst::isIntPredicate(Pred))
+      return error("invalid integer predicate");
+  }
+
+  lex();
+  Dest = MachineOperand::CreatePredicate(Pred);
+  if (expectAndConsume(MIToken::rparen))
+    return error("predicate should be terminated by ')'.");
+
+  return false;
+}
+
 bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
   assert(Token.is(MIToken::kw_target_index));
   lex();
@@ -1441,8 +1609,8 @@ bool MIParser::parseLiveoutRegisterMaskOperand(MachineOperand &Dest) {
   while (true) {
     if (Token.isNot(MIToken::NamedRegister))
       return error("expected a named register");
-    unsigned Reg = 0;
-    if (parseRegister(Reg))
+    unsigned Reg;
+    if (parseNamedRegister(Reg))
       return true;
     lex();
     Mask[Reg / 32] |= 1U << (Reg % 32);
@@ -1511,10 +1679,15 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
     return parseCFIOperand(Dest);
   case MIToken::kw_blockaddress:
     return parseBlockAddressOperand(Dest);
+  case MIToken::kw_intrinsic:
+    return parseIntrinsicOperand(Dest);
   case MIToken::kw_target_index:
     return parseTargetIndexOperand(Dest);
   case MIToken::kw_liveout:
     return parseLiveoutRegisterMaskOperand(Dest);
+  case MIToken::kw_floatpred:
+  case MIToken::kw_intpred:
+    return parsePredicateOperand(Dest);
   case MIToken::Error:
     return true;
   case MIToken::Identifier:
@@ -1523,7 +1696,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
       lex();
       break;
     }
-  // fallthrough
+    LLVM_FALLTHROUGH;
   default:
     // FIXME: Parse the MCSymbol machine operand.
     return error("expected a machine operand");
@@ -1613,7 +1786,7 @@ bool MIParser::parseOperandsOffset(MachineOperand &Op) {
 bool MIParser::parseIRValue(const Value *&V) {
   switch (Token.kind()) {
   case MIToken::NamedIRValue: {
-    V = MF.getFunction()->getValueSymbolTable().lookup(Token.stringValue());
+    V = MF.getFunction()->getValueSymbolTable()->lookup(Token.stringValue());
     break;
   }
   case MIToken::IRValue: {
@@ -1647,10 +1820,35 @@ bool MIParser::parseIRValue(const Value *&V) {
 }
 
 bool MIParser::getUint64(uint64_t &Result) {
-  assert(Token.hasIntegerValue());
-  if (Token.integerValue().getActiveBits() > 64)
-    return error("expected 64-bit integer (too large)");
-  Result = Token.integerValue().getZExtValue();
+  if (Token.hasIntegerValue()) {
+    if (Token.integerValue().getActiveBits() > 64)
+      return error("expected 64-bit integer (too large)");
+    Result = Token.integerValue().getZExtValue();
+    return false;
+  }
+  if (Token.is(MIToken::HexLiteral)) {
+    APInt A;
+    if (getHexUint(A))
+      return true;
+    if (A.getBitWidth() > 64)
+      return error("expected 64-bit integer (too large)");
+    Result = A.getZExtValue();
+    return false;
+  }
+  return true;
+}
+
+bool MIParser::getHexUint(APInt &Result) {
+  assert(Token.is(MIToken::HexLiteral));
+  StringRef S = Token.range();
+  assert(S[0] == '0' && tolower(S[1]) == 'x');
+  // This could be a floating point literal with a special prefix.
+  if (!isxdigit(S[2]))
+    return true;
+  StringRef V = S.substr(2);
+  APInt A(V.size()*4, V, 16);
+  Result = APInt(A.getActiveBits(),
+                 ArrayRef<uint64_t>(A.getRawData(), A.getNumWords()));
   return false;
 }
 
@@ -1663,6 +1861,9 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
   case MIToken::kw_non_temporal:
     Flags |= MachineMemOperand::MONonTemporal;
     break;
+  case MIToken::kw_dereferenceable:
+    Flags |= MachineMemOperand::MODereferenceable;
+    break;
   case MIToken::kw_invariant:
     Flags |= MachineMemOperand::MOInvariant;
     break;
@@ -2059,36 +2260,42 @@ bool llvm::parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
   return MIParser(PFS, Error, Src).parseBasicBlockDefinitions(PFS.MBBSlots);
 }
 
-bool llvm::parseMachineInstructions(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMachineInstructions(PerFunctionMIParsingState &PFS,
                                     StringRef Src, SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseBasicBlocks();
 }
 
-bool llvm::parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMBBReference(PerFunctionMIParsingState &PFS,
                              MachineBasicBlock *&MBB, StringRef Src,
                              SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneMBB(MBB);
 }
 
-bool llvm::parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseRegisterReference(PerFunctionMIParsingState &PFS,
+                                  unsigned &Reg, StringRef Src,
+                                  SMDiagnostic &Error) {
+  return MIParser(PFS, Error, Src).parseStandaloneRegister(Reg);
+}
+
+bool llvm::parseNamedRegisterReference(PerFunctionMIParsingState &PFS,
                                        unsigned &Reg, StringRef Src,
                                        SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneNamedRegister(Reg);
 }
 
-bool llvm::parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
-                                         unsigned &Reg, StringRef Src,
+bool llvm::parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+                                         VRegInfo *&Info, StringRef Src,
                                          SMDiagnostic &Error) {
-  return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Reg);
+  return MIParser(PFS, Error, Src).parseStandaloneVirtualRegister(Info);
 }
 
-bool llvm::parseStackObjectReference(const PerFunctionMIParsingState &PFS,
+bool llvm::parseStackObjectReference(PerFunctionMIParsingState &PFS,
                                      int &FI, StringRef Src,
                                      SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneStackObject(FI);
 }
 
-bool llvm::parseMDNode(const PerFunctionMIParsingState &PFS,
+bool llvm::parseMDNode(PerFunctionMIParsingState &PFS,
                        MDNode *&Node, StringRef Src, SMDiagnostic &Error) {
   return MIParser(PFS, Error, Src).parseStandaloneMDNode(Node);
 }
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
index 18895b9e54eb..93a4d84ba62f 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIParser.h
@@ -26,26 +26,42 @@ class MachineFunction;
 class MachineInstr;
 class MachineRegisterInfo;
 class MDNode;
+class RegisterBank;
 struct SlotMapping;
 class SMDiagnostic;
 class SourceMgr;
+class TargetRegisterClass;
+
+struct VRegInfo {
+  enum uint8_t {
+    UNKNOWN, NORMAL, GENERIC, REGBANK
+  } Kind = UNKNOWN;
+  bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
+  union {
+    const TargetRegisterClass *RC;
+    const RegisterBank *RegBank;
+  } D;
+  unsigned VReg;
+  unsigned PreferredReg = 0;
+};
 
 struct PerFunctionMIParsingState {
+  BumpPtrAllocator Allocator;
   MachineFunction &MF;
   SourceMgr *SM;
   const SlotMapping &IRSlots;
 
   DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
-  DenseMap<unsigned, unsigned> VirtualRegisterSlots;
+  DenseMap<unsigned, VRegInfo*> VRegInfos;
   DenseMap<unsigned, int> FixedStackObjectSlots;
   DenseMap<unsigned, int> StackObjectSlots;
   DenseMap<unsigned, unsigned> ConstantPoolSlots;
   DenseMap<unsigned, unsigned> JumpTableSlots;
-  /// Hold the generic virtual registers.
-  SmallSet<unsigned, 8> GenericVRegs;
 
   PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
                             const SlotMapping &IRSlots);
+
+  VRegInfo &getVRegInfo(unsigned VReg);
 };
 
 /// Parse the machine basic block definitions, and skip the machine
@@ -73,26 +89,29 @@ bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
 /// on the given source string.
 ///
 /// Return true if an error occurred.
-bool parseMachineInstructions(const PerFunctionMIParsingState &PFS,
-                              StringRef Src, SMDiagnostic &Error);
+bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
+                              SMDiagnostic &Error);
 
-bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool parseMBBReference(PerFunctionMIParsingState &PFS,
                        MachineBasicBlock *&MBB, StringRef Src,
                        SMDiagnostic &Error);
 
-bool parseNamedRegisterReference(const PerFunctionMIParsingState &PFS,
-                                 unsigned &Reg, StringRef Src,
-                                 SMDiagnostic &Error);
+bool parseRegisterReference(PerFunctionMIParsingState &PFS,
+                            unsigned &Reg, StringRef Src,
+                            SMDiagnostic &Error);
+
+bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
+                                 StringRef Src, SMDiagnostic &Error);
 
-bool parseVirtualRegisterReference(const PerFunctionMIParsingState &PFS,
-                                   unsigned &Reg, StringRef Src,
+bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
+                                   VRegInfo *&Info, StringRef Src,
                                    SMDiagnostic &Error);
 
-bool parseStackObjectReference(const PerFunctionMIParsingState &PFS,
-                               int &FI, StringRef Src, SMDiagnostic &Error);
+bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
+                               StringRef Src, SMDiagnostic &Error);
 
-bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
-                 StringRef Src, SMDiagnostic &Error);
+bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
+                 SMDiagnostic &Error);
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
index 4aa3df6326e9..3dff1147631b 100644
--- a/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -102,10 +102,10 @@ public:
   /// Return true if error occurred.
   bool initializeMachineFunction(MachineFunction &MF);
 
-  bool initializeRegisterInfo(PerFunctionMIParsingState &PFS,
-                              const yaml::MachineFunction &YamlMF);
+  bool parseRegisterInfo(PerFunctionMIParsingState &PFS,
+                         const yaml::MachineFunction &YamlMF);
 
-  void inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+  bool setupRegisterInfo(const PerFunctionMIParsingState &PFS,
                          const yaml::MachineFunction &YamlMF);
 
   bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
@@ -128,10 +128,10 @@ public:
                                const yaml::MachineJumpTable &YamlJTI);
 
 private:
-  bool parseMDNode(const PerFunctionMIParsingState &PFS, MDNode *&Node,
+  bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node,
                    const yaml::StringValue &Source);
 
-  bool parseMBBReference(const PerFunctionMIParsingState &PFS,
+  bool parseMBBReference(PerFunctionMIParsingState &PFS,
                          MachineBasicBlock *&MBB,
                          const yaml::StringValue &Source);
 
@@ -160,6 +160,8 @@ private:
   ///
   /// Return null if the name isn't a register bank.
   const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
+
+  void computeFunctionProperties(MachineFunction &MF);
 };
 
 } // end namespace llvm
@@ -255,7 +257,8 @@ std::unique_ptr<Module> MIRParserImpl::parse() {
 bool MIRParserImpl::parseMachineFunction(yaml::Input &In, Module &M,
                                          bool NoLLVMIR) {
   auto MF = llvm::make_unique<yaml::MachineFunction>();
-  yaml::yamlize(In, *MF, false);
+  yaml::EmptyContext Ctx;
+  yaml::yamlize(In, *MF, false, Ctx);
   if (In.error())
     return true;
   auto FunctionName = MF->Name;
@@ -279,6 +282,43 @@ void MIRParserImpl::createDummyFunction(StringRef Name, Module &M) {
   new UnreachableInst(Context, BB);
 }
 
+static bool isSSA(const MachineFunction &MF) {
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+    if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
+      return false;
+  }
+  return true;
+}
+
+void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
+  MachineFunctionProperties &Properties = MF.getProperties();
+
+  bool HasPHI = false;
+  bool HasInlineAsm = false;
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (MI.isPHI())
+        HasPHI = true;
+      if (MI.isInlineAsm())
+        HasInlineAsm = true;
+    }
+  }
+  if (!HasPHI)
+    Properties.set(MachineFunctionProperties::Property::NoPHIs);
+  MF.setHasInlineAsm(HasInlineAsm);
+
+  if (isSSA(MF))
+    Properties.set(MachineFunctionProperties::Property::IsSSA);
+  else
+    Properties.reset(MachineFunctionProperties::Property::IsSSA);
+
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+  if (MRI.getNumVirtRegs() == 0)
+    Properties.set(MachineFunctionProperties::Property::NoVRegs);
+}
+
 bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   auto It = Functions.find(MF.getName());
   if (It == Functions.end())
@@ -289,11 +329,17 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   if (YamlMF.Alignment)
     MF.setAlignment(YamlMF.Alignment);
   MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
-  MF.setHasInlineAsm(YamlMF.HasInlineAsm);
-  if (YamlMF.AllVRegsAllocated)
-    MF.getProperties().set(MachineFunctionProperties::Property::AllVRegsAllocated);
+
+  if (YamlMF.Legalized)
+    MF.getProperties().set(MachineFunctionProperties::Property::Legalized);
+  if (YamlMF.RegBankSelected)
+    MF.getProperties().set(
+        MachineFunctionProperties::Property::RegBankSelected);
+  if (YamlMF.Selected)
+    MF.getProperties().set(MachineFunctionProperties::Property::Selected);
+
   PerFunctionMIParsingState PFS(MF, SM, IRSlots);
-  if (initializeRegisterInfo(PFS, YamlMF))
+  if (parseRegisterInfo(PFS, YamlMF))
     return true;
   if (!YamlMF.Constants.empty()) {
     auto *ConstantPool = MF.getConstantPool();
@@ -343,62 +389,60 @@ bool MIRParserImpl::initializeMachineFunction(MachineFunction &MF) {
   }
   PFS.SM = &SM;
 
-  inferRegisterInfo(PFS, YamlMF);
-  // FIXME: This is a temporary workaround until the reserved registers can be
-  // serialized.
-  MF.getRegInfo().freezeReservedRegs(MF);
+  if (setupRegisterInfo(PFS, YamlMF))
+    return true;
+
+  computeFunctionProperties(MF);
+
   MF.verify();
   return false;
 }
 
-bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
-    const yaml::MachineFunction &YamlMF) {
+bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
+                                      const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
-  assert(RegInfo.isSSA());
-  if (!YamlMF.IsSSA)
-    RegInfo.leaveSSA();
   assert(RegInfo.tracksLiveness());
   if (!YamlMF.TracksRegLiveness)
     RegInfo.invalidateLiveness();
-  RegInfo.enableSubRegLiveness(YamlMF.TracksSubRegLiveness);
 
   SMDiagnostic Error;
   // Parse the virtual register information.
   for (const auto &VReg : YamlMF.VirtualRegisters) {
-    unsigned Reg;
+    VRegInfo &Info = PFS.getVRegInfo(VReg.ID.Value);
+    if (Info.Explicit)
+      return error(VReg.ID.SourceRange.Start,
+                   Twine("redefinition of virtual register '%") +
+                       Twine(VReg.ID.Value) + "'");
+    Info.Explicit = true;
+
     if (StringRef(VReg.Class.Value).equals("_")) {
-      // This is a generic virtual register.
-      // The size will be set appropriately when we reach the definition.
-      Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
-      PFS.GenericVRegs.insert(Reg);
+      Info.Kind = VRegInfo::GENERIC;
     } else {
       const auto *RC = getRegClass(MF, VReg.Class.Value);
       if (RC) {
-        Reg = RegInfo.createVirtualRegister(RC);
+        Info.Kind = VRegInfo::NORMAL;
+        Info.D.RC = RC;
       } else {
-        const auto *RegBank = getRegBank(MF, VReg.Class.Value);
+        const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value);
         if (!RegBank)
           return error(
               VReg.Class.SourceRange.Start,
               Twine("use of undefined register class or register bank '") +
                   VReg.Class.Value + "'");
-        Reg = RegInfo.createGenericVirtualRegister(/*Size*/ 1);
-        RegInfo.setRegBank(Reg, *RegBank);
-        PFS.GenericVRegs.insert(Reg);
+        Info.Kind = VRegInfo::REGBANK;
+        Info.D.RegBank = RegBank;
       }
     }
-    if (!PFS.VirtualRegisterSlots.insert(std::make_pair(VReg.ID.Value, Reg))
-             .second)
-      return error(VReg.ID.SourceRange.Start,
-                   Twine("redefinition of virtual register '%") +
-                       Twine(VReg.ID.Value) + "'");
+
     if (!VReg.PreferredRegister.Value.empty()) {
-      unsigned PreferredReg = 0;
-      if (parseNamedRegisterReference(PFS, PreferredReg,
-                                      VReg.PreferredRegister.Value, Error))
+      if (Info.Kind != VRegInfo::NORMAL)
+        return error(VReg.Class.SourceRange.Start,
+              Twine("preferred register can only be set for normal vregs"));
+
+      if (parseRegisterReference(PFS, Info.PreferredReg,
+                                 VReg.PreferredRegister.Value, Error))
         return error(Error, VReg.PreferredRegister.SourceRange);
-      RegInfo.setSimpleHint(Reg, PreferredReg);
     }
   }
 
@@ -409,9 +453,11 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
       return error(Error, LiveIn.Register.SourceRange);
     unsigned VReg = 0;
     if (!LiveIn.VirtualRegister.Value.empty()) {
-      if (parseVirtualRegisterReference(PFS, VReg, LiveIn.VirtualRegister.Value,
+      VRegInfo *Info;
+      if (parseVirtualRegisterReference(PFS, Info, LiveIn.VirtualRegister.Value,
                                         Error))
         return error(Error, LiveIn.VirtualRegister.SourceRange);
+      VReg = Info->VReg;
     }
     RegInfo.addLiveIn(Reg, VReg);
   }
@@ -430,26 +476,57 @@ bool MIRParserImpl::initializeRegisterInfo(PerFunctionMIParsingState &PFS,
   return false;
 }
 
-void MIRParserImpl::inferRegisterInfo(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
                                       const yaml::MachineFunction &YamlMF) {
-  if (YamlMF.CalleeSavedRegisters)
-    return;
-  MachineRegisterInfo &MRI = PFS.MF.getRegInfo();
-  for (const MachineBasicBlock &MBB : PFS.MF) {
-    for (const MachineInstr &MI : MBB) {
-      for (const MachineOperand &MO : MI.operands()) {
-        if (!MO.isRegMask())
-          continue;
-        MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+  MachineFunction &MF = PFS.MF;
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  bool Error = false;
+  // Create VRegs
+  for (auto P : PFS.VRegInfos) {
+    const VRegInfo &Info = *P.second;
+    unsigned Reg = Info.VReg;
+    switch (Info.Kind) {
+    case VRegInfo::UNKNOWN:
+      error(Twine("Cannot determine class/bank of virtual register ") +
+            Twine(P.first) + " in function '" + MF.getName() + "'");
+      Error = true;
+      break;
+    case VRegInfo::NORMAL:
+      MRI.setRegClass(Reg, Info.D.RC);
+      if (Info.PreferredReg != 0)
+        MRI.setSimpleHint(Reg, Info.PreferredReg);
+      break;
+    case VRegInfo::GENERIC:
+      break;
+    case VRegInfo::REGBANK:
+      MRI.setRegBank(Reg, *Info.D.RegBank);
+      break;
+    }
+  }
+
+  // Compute MachineRegisterInfo::UsedPhysRegMask
+  if (!YamlMF.CalleeSavedRegisters) {
+    for (const MachineBasicBlock &MBB : MF) {
+      for (const MachineInstr &MI : MBB) {
+        for (const MachineOperand &MO : MI.operands()) {
+          if (!MO.isRegMask())
+            continue;
+          MRI.addPhysRegsUsedFromRegMask(MO.getRegMask());
+        }
       }
     }
   }
+
+  // FIXME: This is a temporary workaround until the reserved registers can be
+  // serialized.
+  MRI.freezeReservedRegs(MF);
+  return Error;
 }
 
 bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
                                         const yaml::MachineFunction &YamlMF) {
   MachineFunction &MF = PFS.MF;
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const Function &F = *MF.getFunction();
   const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
   MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
@@ -507,7 +584,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
     const yaml::StringValue &Name = Object.Name;
     if (!Name.Value.empty()) {
       Alloca = dyn_cast_or_null<AllocaInst>(
-          F.getValueSymbolTable().lookup(Name.Value));
+          F.getValueSymbolTable()->lookup(Name.Value));
       if (!Alloca)
         return error(Name.SourceRange.Start,
                      "alloca instruction named '" + Name.Value +
@@ -597,11 +674,11 @@ bool MIRParserImpl::parseStackObjectsDebugInfo(PerFunctionMIParsingState &PFS,
       typecheckMDNode(DIExpr, Expr, Object.DebugExpr, "DIExpression", *this) ||
       typecheckMDNode(DILoc, Loc, Object.DebugLoc, "DILocation", *this))
     return true;
-  PFS.MF.getMMI().setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
+  PFS.MF.setVariableDbgInfo(DIVar, DIExpr, unsigned(FrameIdx), DILoc);
   return false;
 }
 
-bool MIRParserImpl::parseMDNode(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseMDNode(PerFunctionMIParsingState &PFS,
     MDNode *&Node, const yaml::StringValue &Source) {
   if (Source.Value.empty())
     return false;
@@ -657,7 +734,7 @@ bool MIRParserImpl::initializeJumpTableInfo(PerFunctionMIParsingState &PFS,
   return false;
 }
 
-bool MIRParserImpl::parseMBBReference(const PerFunctionMIParsingState &PFS,
+bool MIRParserImpl::parseMBBReference(PerFunctionMIParsingState &PFS,
                                       MachineBasicBlock *&MBB,
                                       const yaml::StringValue &Source) {
   SMDiagnostic Error;
@@ -784,6 +861,14 @@ std::unique_ptr<MIRParser>
 llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
                       LLVMContext &Context) {
   auto Filename = Contents->getBufferIdentifier();
+  if (Context.shouldDiscardValueNames()) {
+    Context.diagnose(DiagnosticInfoMIRParser(
+        DS_Error,
+        SMDiagnostic(
+            Filename, SourceMgr::DK_Error,
+            "Can't read MIR with a Context that discards named Values")));
+    return nullptr;
+  }
   return llvm::make_unique<MIRParser>(
       llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
 }
diff --git a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
index 703c99d9edd3..eb13d2d3ec0c 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -14,6 +14,7 @@
 
 #include "MIRPrinter.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/MIRYamlMapping.h"
 #include "llvm/CodeGen/MachineConstantPool.h"
@@ -27,13 +28,16 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
 using namespace llvm;
@@ -86,10 +90,8 @@ public:
                const MachineConstantPool &ConstantPool);
   void convert(ModuleSlotTracker &MST, yaml::MachineJumpTable &YamlJTI,
                const MachineJumpTableInfo &JTI);
-  void convertStackObjects(yaml::MachineFunction &MF,
-                           const MachineFrameInfo &MFI, MachineModuleInfo &MMI,
-                           ModuleSlotTracker &MST,
-                           const TargetRegisterInfo *TRI);
+  void convertStackObjects(yaml::MachineFunction &YMF,
+                           const MachineFunction &MF, ModuleSlotTracker &MST);
 
 private:
   void initRegisterMaskIds(const MachineFunction &MF);
@@ -121,7 +123,7 @@ public:
   void printTargetFlags(const MachineOperand &Op);
   void print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
              unsigned I, bool ShouldPrintRegisterTies,
-             const MachineRegisterInfo *MRI = nullptr, bool IsDef = false);
+             LLT TypeToPrint, bool IsDef = false);
   void print(const MachineMemOperand &Op);
 
   void print(const MCCFIInstruction &CFI, const TargetRegisterInfo *TRI);
@@ -172,16 +174,19 @@ void MIRPrinter::print(const MachineFunction &MF) {
   YamlMF.Name = MF.getName();
   YamlMF.Alignment = MF.getAlignment();
   YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
-  YamlMF.HasInlineAsm = MF.hasInlineAsm();
-  YamlMF.AllVRegsAllocated = MF.getProperties().hasProperty(
-      MachineFunctionProperties::Property::AllVRegsAllocated);
+
+  YamlMF.Legalized = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::Legalized);
+  YamlMF.RegBankSelected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::RegBankSelected);
+  YamlMF.Selected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::Selected);
 
   convert(YamlMF, MF.getRegInfo(), MF.getSubtarget().getRegisterInfo());
   ModuleSlotTracker MST(MF.getFunction()->getParent());
   MST.incorporateFunction(*MF.getFunction());
-  convert(MST, YamlMF.FrameInfo, *MF.getFrameInfo());
-  convertStackObjects(YamlMF, *MF.getFrameInfo(), MF.getMMI(), MST,
-                      MF.getSubtarget().getRegisterInfo());
+  convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
+  convertStackObjects(YamlMF, MF, MST);
   if (const auto *ConstantPool = MF.getConstantPool())
     convert(YamlMF, *ConstantPool);
   if (const auto *JumpTableInfo = MF.getJumpTableInfo())
@@ -203,9 +208,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
 void MIRPrinter::convert(yaml::MachineFunction &MF,
                          const MachineRegisterInfo &RegInfo,
                          const TargetRegisterInfo *TRI) {
-  MF.IsSSA = RegInfo.isSSA();
   MF.TracksRegLiveness = RegInfo.tracksLiveness();
-  MF.TracksSubRegLiveness = RegInfo.subRegLivenessEnabled();
 
   // Print the virtual register definitions.
   for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
@@ -219,7 +222,8 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
       VReg.Class = StringRef(RegInfo.getRegBankOrNull(Reg)->getName()).lower();
     else {
       VReg.Class = std::string("_");
-      assert(RegInfo.getSize(Reg) && "Generic registers must have a size");
+      assert((RegInfo.def_empty(Reg) || RegInfo.getType(Reg).isValid()) &&
+             "Generic registers must have a valid type");
     }
     unsigned PreferredReg = RegInfo.getSimpleHint(Reg);
     if (PreferredReg)
@@ -279,11 +283,11 @@ void MIRPrinter::convert(ModuleSlotTracker &MST,
   }
 }
 
-void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
-                                     const MachineFrameInfo &MFI,
-                                     MachineModuleInfo &MMI,
-                                     ModuleSlotTracker &MST,
-                                     const TargetRegisterInfo *TRI) {
+void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
+                                     const MachineFunction &MF,
+                                     ModuleSlotTracker &MST) {
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   // Process fixed stack objects.
   unsigned ID = 0;
   for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
@@ -300,7 +304,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
     YamlObject.Alignment = MFI.getObjectAlignment(I);
     YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
     YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
-    MF.FixedStackObjects.push_back(YamlObject);
+    YMF.FixedStackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(
         std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
   }
@@ -325,7 +329,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
     YamlObject.Size = MFI.getObjectSize(I);
     YamlObject.Alignment = MFI.getObjectAlignment(I);
 
-    MF.StackObjects.push_back(YamlObject);
+    YMF.StackObjects.push_back(YamlObject);
     StackObjectOperandMapping.insert(std::make_pair(
         I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
   }
@@ -338,9 +342,9 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
     if (StackObject.IsFixed)
-      MF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+      YMF.FixedStackObjects[StackObject.ID].CalleeSavedRegister = Reg;
     else
-      MF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
+      YMF.StackObjects[StackObject.ID].CalleeSavedRegister = Reg;
   }
   for (unsigned I = 0, E = MFI.getLocalFrameObjectCount(); I < E; ++I) {
     auto LocalObject = MFI.getLocalFrameObjectMap(I);
@@ -349,26 +353,26 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &MF,
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
     assert(!StackObject.IsFixed && "Expected a locally mapped stack object");
-    MF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
+    YMF.StackObjects[StackObject.ID].LocalOffset = LocalObject.second;
   }
 
   // Print the stack object references in the frame information class after
   // converting the stack objects.
   if (MFI.hasStackProtectorIndex()) {
-    raw_string_ostream StrOS(MF.FrameInfo.StackProtector.Value);
+    raw_string_ostream StrOS(YMF.FrameInfo.StackProtector.Value);
     MIPrinter(StrOS, MST, RegisterMaskIds, StackObjectOperandMapping)
         .printStackObjectReference(MFI.getStackProtectorIndex());
   }
 
   // Print the debug variable information.
-  for (MachineModuleInfo::VariableDbgInfo &DebugVar :
-       MMI.getVariableDbgInfo()) {
+  for (const MachineFunction::VariableDbgInfo &DebugVar :
+       MF.getVariableDbgInfo()) {
     auto StackObjectInfo = StackObjectOperandMapping.find(DebugVar.Slot);
     assert(StackObjectInfo != StackObjectOperandMapping.end() &&
            "Invalid stack object index");
     const FrameIndexOperand &StackObject = StackObjectInfo->second;
     assert(!StackObject.IsFixed && "Expected a non-fixed stack object");
-    auto &Object = MF.StackObjects[StackObject.ID];
+    auto &Object = YMF.StackObjects[StackObject.ID];
     {
       raw_string_ostream StrOS(Object.DebugVar.Value);
       DebugVar.Var->printAsOperand(StrOS, MST);
@@ -475,7 +479,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
         OS << ", ";
       printMBBReference(**I);
       if (MBB.hasSuccessorProbabilities())
-        OS << '(' << MBB.getSuccProbability(I) << ')';
+        OS << '('
+           << format("0x%08" PRIx32, MBB.getSuccProbability(I).getNumerator())
+           << ')';
     }
     OS << "\n";
     HasLineAttributes = true;
@@ -492,8 +498,8 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
         OS << ", ";
       First = false;
       printReg(LI.PhysReg, OS, TRI);
-      if (LI.LaneMask != ~0u)
-        OS << ':' << PrintLaneMask(LI.LaneMask);
+      if (!LI.LaneMask.all())
+        OS << ":0x" << PrintLaneMask(LI.LaneMask);
     }
     OS << "\n";
     HasLineAttributes = true;
@@ -537,6 +543,27 @@ static bool hasComplexRegisterTies(const MachineInstr &MI) {
   return false;
 }
 
+static LLT getTypeToPrint(const MachineInstr &MI, unsigned OpIdx,
+                          SmallBitVector &PrintedTypes,
+                          const MachineRegisterInfo &MRI) {
+  const MachineOperand &Op = MI.getOperand(OpIdx);
+  if (!Op.isReg())
+    return LLT{};
+
+  if (MI.isVariadic() || OpIdx >= MI.getNumExplicitOperands())
+    return MRI.getType(Op.getReg());
+
+  auto &OpInfo = MI.getDesc().OpInfo[OpIdx];
+  if (!OpInfo.isGenericType())
+    return MRI.getType(Op.getReg());
+
+  if (PrintedTypes[OpInfo.getGenericTypeIndex()])
+    return LLT{};
+
+  PrintedTypes.set(OpInfo.getGenericTypeIndex());
+  return MRI.getType(Op.getReg());
+}
+
 void MIPrinter::print(const MachineInstr &MI) {
   const auto *MF = MI.getParent()->getParent();
   const auto &MRI = MF->getRegInfo();
@@ -548,6 +575,7 @@ void MIPrinter::print(const MachineInstr &MI) {
   if (MI.isCFIInstruction())
     assert(MI.getNumOperands() == 1 && "Expected 1 operand in CFI instruction");
 
+  SmallBitVector PrintedTypes(8);
   bool ShouldPrintRegisterTies = hasComplexRegisterTies(MI);
   unsigned I = 0, E = MI.getNumOperands();
   for (; I < E && MI.getOperand(I).isReg() && MI.getOperand(I).isDef() &&
@@ -555,7 +583,8 @@ void MIPrinter::print(const MachineInstr &MI) {
        ++I) {
     if (I)
       OS << ", ";
-    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies, &MRI,
+    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
+          getTypeToPrint(MI, I, PrintedTypes, MRI),
           /*IsDef=*/true);
   }
 
@@ -564,11 +593,6 @@ void MIPrinter::print(const MachineInstr &MI) {
   if (MI.getFlag(MachineInstr::FrameSetup))
     OS << "frame-setup ";
   OS << TII->getName(MI.getOpcode());
-  if (isPreISelGenericOpcode(MI.getOpcode())) {
-    assert(MI.getType() && "Generic instructions must have a type");
-    OS << ' ';
-    MI.getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
-  }
   if (I < E)
     OS << ' ';
 
@@ -576,7 +600,8 @@ void MIPrinter::print(const MachineInstr &MI) {
   for (; I < E; ++I) {
     if (NeedComma)
       OS << ", ";
-    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies);
+    print(MI.getOperand(I), TRI, I, ShouldPrintRegisterTies,
+          getTypeToPrint(MI, I, PrintedTypes, MRI));
     NeedComma = true;
   }
 
@@ -748,8 +773,8 @@ static const char *getTargetIndexName(const MachineFunction &MF, int Index) {
 }
 
 void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
-                      unsigned I, bool ShouldPrintRegisterTies,
-                      const MachineRegisterInfo *MRI, bool IsDef) {
+                      unsigned I, bool ShouldPrintRegisterTies, LLT TypeToPrint,
+                      bool IsDef) {
   printTargetFlags(Op);
   switch (Op.getType()) {
   case MachineOperand::MO_Register:
@@ -773,12 +798,11 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     printReg(Op.getReg(), OS, TRI);
     // Print the sub register.
     if (Op.getSubReg() != 0)
-      OS << ':' << TRI->getSubRegIndexName(Op.getSubReg());
+      OS << '.' << TRI->getSubRegIndexName(Op.getSubReg());
     if (ShouldPrintRegisterTies && Op.isTied() && !Op.isDef())
       OS << "(tied-def " << Op.getParent()->findTiedOperandIdx(I) << ")";
-    assert((!IsDef || MRI) && "for IsDef, MRI must be provided");
-    if (IsDef && MRI->getSize(Op.getReg()))
-      OS << '(' << MRI->getSize(Op.getReg()) << ')';
+    if (TypeToPrint.isValid())
+      OS << '(' << TypeToPrint << ')';
     break;
   case MachineOperand::MO_Immediate:
     OS << Op.getImm();
@@ -861,8 +885,25 @@ void MIPrinter::print(const MachineOperand &Op, const TargetRegisterInfo *TRI,
     OS << "<mcsymbol " << *Op.getMCSymbol() << ">";
     break;
   case MachineOperand::MO_CFIIndex: {
-    const auto &MMI = Op.getParent()->getParent()->getParent()->getMMI();
-    print(MMI.getFrameInstructions()[Op.getCFIIndex()], TRI);
+    const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+    print(MF.getFrameInstructions()[Op.getCFIIndex()], TRI);
+    break;
+  }
+  case MachineOperand::MO_IntrinsicID: {
+    Intrinsic::ID ID = Op.getIntrinsicID();
+    if (ID < Intrinsic::num_intrinsics)
+      OS << "intrinsic(@" << Intrinsic::getName(ID, None) << ')';
+    else {
+      const MachineFunction &MF = *Op.getParent()->getParent()->getParent();
+      const TargetIntrinsicInfo *TII = MF.getTarget().getIntrinsicInfo();
+      OS << "intrinsic(@" << TII->getName(ID) << ')';
+    }
+    break;
+  }
+  case MachineOperand::MO_Predicate: {
+    auto Pred = static_cast<CmpInst::Predicate>(Op.getPredicate());
+    OS << (CmpInst::isIntPredicate(Pred) ? "int" : "float") << "pred("
+       << CmpInst::getPredicateName(Pred) << ')';
     break;
   }
   }
@@ -875,6 +916,8 @@ void MIPrinter::print(const MachineMemOperand &Op) {
     OS << "volatile ";
   if (Op.isNonTemporal())
     OS << "non-temporal ";
+  if (Op.isDereferenceable())
+    OS << "dereferenceable ";
   if (Op.isInvariant())
     OS << "invariant ";
   if (Op.isLoad())
@@ -917,6 +960,9 @@ void MIPrinter::print(const MachineMemOperand &Op) {
       printLLVMNameWithoutPrefix(
           OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
       break;
+    case PseudoSourceValue::TargetCustom:
+      llvm_unreachable("TargetCustom pseudo source values are not supported");
+      break;
     }
   }
   printOffset(Op.getOffset());
@@ -956,32 +1002,32 @@ void MIPrinter::print(const MCCFIInstruction &CFI,
                       const TargetRegisterInfo *TRI) {
   switch (CFI.getOperation()) {
   case MCCFIInstruction::OpSameValue:
-    OS << ".cfi_same_value ";
+    OS << "same_value ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
     break;
   case MCCFIInstruction::OpOffset:
-    OS << ".cfi_offset ";
+    OS << "offset ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
     OS << ", " << CFI.getOffset();
     break;
   case MCCFIInstruction::OpDefCfaRegister:
-    OS << ".cfi_def_cfa_register ";
+    OS << "def_cfa_register ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
     break;
   case MCCFIInstruction::OpDefCfaOffset:
-    OS << ".cfi_def_cfa_offset ";
+    OS << "def_cfa_offset ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     OS << CFI.getOffset();
     break;
   case MCCFIInstruction::OpDefCfa:
-    OS << ".cfi_def_cfa ";
+    OS << "def_cfa ";
     if (CFI.getLabel())
       OS << "<mcsymbol> ";
     printCFIRegister(CFI.getRegister(), OS, TRI);
diff --git a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
index 8e7566a4e46b..c690bcfad567 100644
--- a/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MIRPrintingPass.cpp
@@ -33,7 +33,7 @@ struct MIRPrintingPass : public MachineFunctionPass {
   MIRPrintingPass() : MachineFunctionPass(ID), OS(dbgs()) {}
   MIRPrintingPass(raw_ostream &OS) : MachineFunctionPass(ID), OS(OS) {}
 
-  const char *getPassName() const override { return "MIR Printing Pass"; }
+  StringRef getPassName() const override { return "MIR Printing Pass"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
index 689dd0764ce0..549424d257fe 100644
--- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -51,7 +51,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const {
   if (!CachedMCSymbol) {
     const MachineFunction *MF = getParent();
     MCContext &Ctx = MF->getContext();
-    const char *Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
+    auto Prefix = Ctx.getAsmInfo()->getPrivateLabelPrefix();
     assert(getNumber() >= 0 && "cannot get label for unreachable MBB");
     CachedMCSymbol = Ctx.getOrCreateSymbol(Twine(Prefix) + "BB" +
                                            Twine(MF->getFunctionNumber()) +
@@ -74,7 +74,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineBasicBlock &MBB) {
 /// MBBs start out as #-1. When a MBB is added to a MachineFunction, it
 /// gets the next available unique MBB number. If it is removed from a
 /// MachineFunction, it goes back to being #-1.
-void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
+void ilist_callback_traits<MachineBasicBlock>::addNodeToList(
+    MachineBasicBlock *N) {
   MachineFunction &MF = *N->getParent();
   N->Number = MF.addToMBBNumbering(N);
 
@@ -85,7 +86,8 @@ void ilist_traits<MachineBasicBlock>::addNodeToList(MachineBasicBlock *N) {
     I->AddRegOperandsToUseLists(RegInfo);
 }
 
-void ilist_traits<MachineBasicBlock>::removeNodeFromList(MachineBasicBlock *N) {
+void ilist_callback_traits<MachineBasicBlock>::removeNodeFromList(
+    MachineBasicBlock *N) {
   N->getParent()->removeFromMBBNumbering(N->Number);
   N->Number = -1;
 }
@@ -116,15 +118,13 @@ void ilist_traits<MachineInstr>::removeNodeFromList(MachineInstr *N) {
 
 /// When moving a range of instructions from one MBB list to another, we need to
 /// update the parent pointers and the use/def lists.
-void ilist_traits<MachineInstr>::
-transferNodesFromList(ilist_traits<MachineInstr> &FromList,
-                      ilist_iterator<MachineInstr> First,
-                      ilist_iterator<MachineInstr> Last) {
+void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
+                                                       instr_iterator First,
+                                                       instr_iterator Last) {
   assert(Parent->getParent() == FromList.Parent->getParent() &&
         "MachineInstr parent mismatch!");
-
-  // Splice within the same MBB -> no change.
-  if (Parent == FromList.Parent) return;
+  assert(this != &FromList && "Called without a real transfer...");
+  assert(Parent != FromList.Parent && "Two lists have the same parent?");
 
   // If splicing between two blocks within the same function, just update the
   // parent pointers.
@@ -132,7 +132,7 @@ transferNodesFromList(ilist_traits<MachineInstr> &FromList,
     First->setParent(Parent);
 }
 
-void ilist_traits<MachineInstr>::deleteNode(MachineInstr* MI) {
+void ilist_traits<MachineInstr>::deleteNode(MachineInstr *MI) {
   assert(!MI->getParent() && "MI is still in a block!");
   Parent->getParent()->DeleteMachineInstr(MI);
 }
@@ -149,12 +149,25 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
 MachineBasicBlock::iterator
 MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
   iterator E = end();
+  while (I != E && (I->isPHI() || I->isPosition()))
+    ++I;
+  // FIXME: This needs to change if we wish to bundle labels
+  // inside the bundle.
+  assert((I == E || !I->isInsideBundle()) &&
+         "First non-phi / non-label instruction is inside a bundle!");
+  return I;
+}
+
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsLabelsAndDebug(MachineBasicBlock::iterator I) {
+  iterator E = end();
   while (I != E && (I->isPHI() || I->isPosition() || I->isDebugValue()))
     ++I;
   // FIXME: This needs to change if we wish to bundle labels / dbg_values
   // inside the bundle.
   assert((I == E || !I->isInsideBundle()) &&
-         "First non-phi / non-label instruction is inside a bundle!");
+         "First non-phi / non-label / non-debug "
+         "instruction is inside a bundle!");
   return I;
 }
 
@@ -178,10 +191,7 @@ MachineBasicBlock::instr_iterator MachineBasicBlock::getFirstInstrTerminator() {
 
 MachineBasicBlock::iterator MachineBasicBlock::getFirstNonDebugInstr() {
   // Skip over begin-of-block dbg_value instructions.
-  iterator I = begin(), E = end();
-  while (I != E && I->isDebugValue())
-    ++I;
-  return I;
+  return skipDebugInstructionsForward(begin(), end());
 }
 
 MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
@@ -278,7 +288,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
     OS << "    Live Ins:";
     for (const auto &LI : make_range(livein_begin(), livein_end())) {
       OS << ' ' << PrintReg(LI.PhysReg, TRI);
-      if (LI.LaneMask != ~0u)
+      if (!LI.LaneMask.all())
         OS << ':' << PrintLaneMask(LI.LaneMask);
     }
     OS << '\n';
@@ -323,22 +333,20 @@ void MachineBasicBlock::printAsOperand(raw_ostream &OS,
 }
 
 void MachineBasicBlock::removeLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) {
-  LiveInVector::iterator I = std::find_if(
-      LiveIns.begin(), LiveIns.end(),
-      [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  LiveInVector::iterator I = find_if(
+      LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
   if (I == LiveIns.end())
     return;
 
   I->LaneMask &= ~LaneMask;
-  if (I->LaneMask == 0)
+  if (I->LaneMask.none())
     LiveIns.erase(I);
 }
 
 bool MachineBasicBlock::isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask) const {
-  livein_iterator I = std::find_if(
-      LiveIns.begin(), LiveIns.end(),
-      [Reg] (const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
-  return I != livein_end() && (I->LaneMask & LaneMask) != 0;
+  livein_iterator I = find_if(
+      LiveIns, [Reg](const RegisterMaskPair &LI) { return LI.PhysReg == Reg; });
+  return I != livein_end() && (I->LaneMask & LaneMask).any();
 }
 
 void MachineBasicBlock::sortUniqueLiveIns() {
@@ -418,7 +426,7 @@ void MachineBasicBlock::updateTerminator() {
       // The block has an unconditional branch. If its successor is now its
       // layout successor, delete the branch.
       if (isLayoutSuccessor(TBB))
-        TII->RemoveBranch(*this);
+        TII->removeBranch(*this);
     } else {
       // The block has an unconditional fallthrough. If its successor is not its
       // layout successor, insert a branch. First we have to locate the only
@@ -438,7 +446,7 @@ void MachineBasicBlock::updateTerminator() {
       // Finally update the unconditional successor to be reached via a branch
       // if it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
-        TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+        TII->insertBranch(*this, TBB, nullptr, Cond, DL);
     }
     return;
   }
@@ -448,13 +456,13 @@ void MachineBasicBlock::updateTerminator() {
     // successors is its layout successor, rewrite it to a fallthrough
     // conditional branch.
     if (isLayoutSuccessor(TBB)) {
-      if (TII->ReverseBranchCondition(Cond))
+      if (TII->reverseBranchCondition(Cond))
         return;
-      TII->RemoveBranch(*this);
-      TII->InsertBranch(*this, FBB, nullptr, Cond, DL);
+      TII->removeBranch(*this);
+      TII->insertBranch(*this, FBB, nullptr, Cond, DL);
     } else if (isLayoutSuccessor(FBB)) {
-      TII->RemoveBranch(*this);
-      TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+      TII->removeBranch(*this);
+      TII->insertBranch(*this, TBB, nullptr, Cond, DL);
     }
     return;
   }
@@ -476,37 +484,37 @@ void MachineBasicBlock::updateTerminator() {
       // Remove the conditional jump, leaving unconditional fallthrough.
       // FIXME: This does not seem like a reasonable pattern to support, but it
       // has been seen in the wild coming out of degenerate ARM test cases.
-      TII->RemoveBranch(*this);
+      TII->removeBranch(*this);
   
       // Finally update the unconditional successor to be reached via a branch if
       // it would not be reached by fallthrough.
       if (!isLayoutSuccessor(TBB))
-        TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+        TII->insertBranch(*this, TBB, nullptr, Cond, DL);
       return;
     }
 
     // We enter here iff exactly one successor is TBB which cannot fallthrough
     // and the rest successors if any are EHPads.  In this case, we need to
     // change the conditional branch into unconditional branch.
-    TII->RemoveBranch(*this);
+    TII->removeBranch(*this);
     Cond.clear();
-    TII->InsertBranch(*this, TBB, nullptr, Cond, DL);
+    TII->insertBranch(*this, TBB, nullptr, Cond, DL);
     return;
   }
 
   // The block has a fallthrough conditional branch.
   if (isLayoutSuccessor(TBB)) {
-    if (TII->ReverseBranchCondition(Cond)) {
+    if (TII->reverseBranchCondition(Cond)) {
       // We can't reverse the condition, add an unconditional branch.
       Cond.clear();
-      TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+      TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
       return;
     }
-    TII->RemoveBranch(*this);
-    TII->InsertBranch(*this, FallthroughBB, nullptr, Cond, DL);
+    TII->removeBranch(*this);
+    TII->insertBranch(*this, FallthroughBB, nullptr, Cond, DL);
   } else if (!isLayoutSuccessor(FallthroughBB)) {
-    TII->RemoveBranch(*this);
-    TII->InsertBranch(*this, TBB, FallthroughBB, Cond, DL);
+    TII->removeBranch(*this);
+    TII->insertBranch(*this, TBB, FallthroughBB, Cond, DL);
   }
 }
 
@@ -545,7 +553,7 @@ void MachineBasicBlock::addSuccessorWithoutProb(MachineBasicBlock *Succ) {
 
 void MachineBasicBlock::removeSuccessor(MachineBasicBlock *Succ,
                                         bool NormalizeSuccProbs) {
-  succ_iterator I = std::find(Successors.begin(), Successors.end(), Succ);
+  succ_iterator I = find(Successors, Succ);
   removeSuccessor(I, NormalizeSuccProbs);
 }
 
@@ -611,7 +619,7 @@ void MachineBasicBlock::addPredecessor(MachineBasicBlock *Pred) {
 }
 
 void MachineBasicBlock::removePredecessor(MachineBasicBlock *Pred) {
-  pred_iterator I = std::find(Predecessors.begin(), Predecessors.end(), Pred);
+  pred_iterator I = find(Predecessors, Pred);
   assert(I != Predecessors.end() && "Pred is not a predecessor of this block!");
   Predecessors.erase(I);
 }
@@ -661,11 +669,11 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
 }
 
 bool MachineBasicBlock::isPredecessor(const MachineBasicBlock *MBB) const {
-  return std::find(pred_begin(), pred_end(), MBB) != pred_end();
+  return is_contained(predecessors(), MBB);
 }
 
 bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
-  return std::find(succ_begin(), succ_end(), MBB) != succ_end();
+  return is_contained(successors(), MBB);
 }
 
 bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
@@ -775,7 +783,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
           continue;
 
         unsigned Reg = OI->getReg();
-        if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end())
+        if (!is_contained(UsedRegs, Reg))
           UsedRegs.push_back(Reg);
       }
     }
@@ -802,9 +810,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
 
     for (SmallVectorImpl<MachineInstr*>::iterator I = Terminators.begin(),
         E = Terminators.end(); I != E; ++I) {
-      if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) ==
-          NewTerminators.end())
-       Indexes->removeMachineInstrFromMaps(**I);
+      if (!is_contained(NewTerminators, *I))
+        Indexes->removeMachineInstrFromMaps(**I);
     }
   }
 
@@ -813,7 +820,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
   if (!NMBB->isLayoutSuccessor(Succ)) {
     SmallVector<MachineOperand, 4> Cond;
     const TargetInstrInfo *TII = getParent()->getSubtarget().getInstrInfo();
-    TII->InsertBranch(*NMBB, Succ, nullptr, Cond, DL);
+    TII->insertBranch(*NMBB, Succ, nullptr, Cond, DL);
 
     if (Indexes) {
       for (MachineInstr &MI : NMBB->instrs()) {
@@ -1090,16 +1097,16 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 
   bool Changed = false;
 
-  MachineFunction::iterator FallThru = std::next(getIterator());
+  MachineBasicBlock *FallThru = getNextNode();
 
   if (!DestA && !DestB) {
     // Block falls through to successor.
-    DestA = &*FallThru;
-    DestB = &*FallThru;
+    DestA = FallThru;
+    DestB = FallThru;
   } else if (DestA && !DestB) {
     if (IsCond)
       // Block ends in conditional jump that falls through to successor.
-      DestB = &*FallThru;
+      DestB = FallThru;
   } else {
     assert(DestA && DestB && IsCond &&
            "CFG in a bad state. Cannot correct CFG edges");
@@ -1130,17 +1137,11 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
 /// instructions.  Return UnknownLoc if there is none.
 DebugLoc
 MachineBasicBlock::findDebugLoc(instr_iterator MBBI) {
-  DebugLoc DL;
-  instr_iterator E = instr_end();
-  if (MBBI == E)
-    return DL;
-
   // Skip debug declarations, we don't want a DebugLoc from them.
-  while (MBBI != E && MBBI->isDebugValue())
-    MBBI++;
-  if (MBBI != E)
-    DL = MBBI->getDebugLoc();
-  return DL;
+  MBBI = skipDebugInstructionsForward(MBBI, instr_end());
+  if (MBBI != instr_end())
+    return MBBI->getDebugLoc();
+  return {};
 }
 
 /// Return probability of the edge from this block to MBB.
@@ -1287,3 +1288,7 @@ MachineBasicBlock::getEndClobberMask(const TargetRegisterInfo *TRI) const {
   // care what kind of return it is, putting a mask after it is a no-op.
   return isReturnBlock() && !succ_empty() ? TRI->getNoPreservedMask() : nullptr;
 }
+
+void MachineBasicBlock::clearLiveIns() {
+  LiveIns.clear();
+}
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 6c0f99fa111e..7d5124d30a04 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -42,9 +42,7 @@ static cl::opt<GVDAGType> ViewMachineBlockFreqPropagationDAG(
                           "display a graph using the raw "
                           "integer fractional block frequency representation."),
                clEnumValN(GVDT_Count, "count", "display a graph using the real "
-                                               "profile count if available."),
-
-               clEnumValEnd));
+                                               "profile count if available.")));
 
 extern cl::opt<std::string> ViewBlockFreqFuncName;
 extern cl::opt<unsigned> ViewHotFreqPercent;
@@ -52,29 +50,26 @@ extern cl::opt<unsigned> ViewHotFreqPercent;
 namespace llvm {
 
 template <> struct GraphTraits<MachineBlockFrequencyInfo *> {
-  typedef const MachineBasicBlock NodeType;
+  typedef const MachineBasicBlock *NodeRef;
   typedef MachineBasicBlock::const_succ_iterator ChildIteratorType;
-  typedef MachineFunction::const_iterator nodes_iterator;
+  typedef pointer_iterator<MachineFunction::const_iterator> nodes_iterator;
 
-  static inline const NodeType *
-  getEntryNode(const MachineBlockFrequencyInfo *G) {
+  static NodeRef getEntryNode(const MachineBlockFrequencyInfo *G) {
     return &G->getFunction()->front();
   }
 
-  static ChildIteratorType child_begin(const NodeType *N) {
+  static ChildIteratorType child_begin(const NodeRef N) {
     return N->succ_begin();
   }
 
-  static ChildIteratorType child_end(const NodeType *N) {
-    return N->succ_end();
-  }
+  static ChildIteratorType child_end(const NodeRef N) { return N->succ_end(); }
 
   static nodes_iterator nodes_begin(const MachineBlockFrequencyInfo *G) {
-    return G->getFunction()->begin();
+    return nodes_iterator(G->getFunction()->begin());
   }
 
   static nodes_iterator nodes_end(const MachineBlockFrequencyInfo *G) {
-    return G->getFunction()->end();
+    return nodes_iterator(G->getFunction()->end());
   }
 };
 
@@ -175,6 +170,12 @@ Optional<uint64_t> MachineBlockFrequencyInfo::getBlockProfileCount(
   return MBFI ? MBFI->getBlockProfileCount(*F, MBB) : None;
 }
 
+Optional<uint64_t>
+MachineBlockFrequencyInfo::getProfileCountFromFreq(uint64_t Freq) const {
+  const Function *F = MBFI->getFunction()->getFunction();
+  return MBFI ? MBFI->getProfileCountFromFreq(*F, Freq) : None;
+}
+
 const MachineFunction *MachineBlockFrequencyInfo::getFunction() const {
   return MBFI ? MBFI->getFunction() : nullptr;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 03dda8b36a71..40e3840e6b0b 100644
--- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -40,6 +40,7 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/TailDuplicator.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -78,10 +79,14 @@ static cl::opt<unsigned> ExitBlockBias(
              "over the original exit to be considered the new exit."),
     cl::init(0), cl::Hidden);
 
+// Definition:
+// - Outlining: placement of a basic block outside the chain or hot path.
+
 static cl::opt<bool> OutlineOptionalBranches(
     "outline-optional-branches",
-    cl::desc("Put completely optional branches, i.e. branches with a common "
-             "post dominator, out of line."),
+    cl::desc("Outlining optional branches will place blocks that are optional "
+              "branches, i.e. branches with a common post dominator, outside "
+              "the hot path or chain"),
     cl::init(false), cl::Hidden);
 
 static cl::opt<unsigned> OutlineOptionalThreshold(
@@ -117,6 +122,12 @@ static cl::opt<unsigned> MisfetchCost(
 static cl::opt<unsigned> JumpInstCost("jump-inst-cost",
                                       cl::desc("Cost of jump instructions."),
                                       cl::init(1), cl::Hidden);
+static cl::opt<bool>
+TailDupPlacement("tail-dup-placement",
+              cl::desc("Perform tail duplication during placement. "
+                       "Creates more fallthrough opportunites in "
+                       "outline branches."),
+              cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
 BranchFoldPlacement("branch-fold-placement",
@@ -124,6 +135,14 @@ BranchFoldPlacement("branch-fold-placement",
                        "Reduces code size."),
               cl::init(true), cl::Hidden);
 
+// Heuristic for tail duplication.
+static cl::opt<unsigned> TailDuplicatePlacementThreshold(
+    "tail-dup-placement-threshold",
+    cl::desc("Instruction cutoff for tail duplication during layout. "
+             "Tail merging during layout is forced to have a threshold "
+             "that won't conflict."), cl::init(2),
+    cl::Hidden);
+
 extern cl::opt<unsigned> StaticLikelyProb;
 extern cl::opt<unsigned> ProfileLikelyProb;
 
@@ -181,6 +200,16 @@ public:
   /// \brief End of blocks within the chain.
   iterator end() { return Blocks.end(); }
 
+  bool remove(MachineBasicBlock* BB) {
+    for(iterator i = begin(); i != end(); ++i) {
+      if (*i == BB) {
+        Blocks.erase(i);
+        return true;
+      }
+    }
+    return false;
+  }
+
   /// \brief Merge a block chain into this one.
   ///
   /// This routine merges a block chain into this one. It takes care of forming
@@ -235,7 +264,7 @@ public:
 namespace {
 class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A typedef for a block filter set.
-  typedef SmallPtrSet<MachineBasicBlock *, 16> BlockFilterSet;
+  typedef SmallSetVector<MachineBasicBlock *, 16> BlockFilterSet;
 
   /// \brief work lists of blocks that are ready to be laid out
   SmallVector<MachineBasicBlock *, 16> BlockWorkList;
@@ -253,6 +282,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A handle to the loop info.
   MachineLoopInfo *MLI;
 
+  /// \brief Preferred loop exit.
+  /// Member variable for convenience. It may be removed by duplication deep
+  /// in the call stack.
+  MachineBasicBlock *PreferredLoopExit;
+
   /// \brief A handle to the target's instruction info.
   const TargetInstrInfo *TII;
 
@@ -262,6 +296,13 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// \brief A handle to the post dominator tree.
   MachineDominatorTree *MDT;
 
+  /// \brief Duplicator used to duplicate tails during placement.
+  ///
+  /// Placement decisions can open up new tail duplication opportunities, but
+  /// since tail duplication affects placement decisions of later blocks, it
+  /// must be done inline.
+  TailDuplicator TailDup;
+
   /// \brief A set of blocks that are unavoidably execute, i.e. they dominate
   /// all terminators of the MachineFunction.
   SmallPtrSet<MachineBasicBlock *, 4> UnavoidableBlocks;
@@ -283,8 +324,26 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// between basic blocks.
   DenseMap<MachineBasicBlock *, BlockChain *> BlockToChain;
 
+#ifndef NDEBUG
+  /// The set of basic blocks that have terminators that cannot be fully
+  /// analyzed.  These basic blocks cannot be re-ordered safely by
+  /// MachineBlockPlacement, and we must preserve physical layout of these
+  /// blocks and their successors through the pass.
+  SmallPtrSet<MachineBasicBlock *, 4> BlocksWithUnanalyzableExits;
+#endif
+
+  /// Decrease the UnscheduledPredecessors count for all blocks in chain, and
+  /// if the count goes to 0, add them to the appropriate work list.
   void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
                            const BlockFilterSet *BlockFilter = nullptr);
+
+  /// Decrease the UnscheduledPredecessors count for a single block, and
+  /// if the count goes to 0, add them to the appropriate work list.
+  void markBlockSuccessors(
+      BlockChain &Chain, MachineBasicBlock *BB, MachineBasicBlock *LoopHeaderBB,
+      const BlockFilterSet *BlockFilter = nullptr);
+
+
   BranchProbability
   collectViableSuccessors(MachineBasicBlock *BB, BlockChain &Chain,
                           const BlockFilterSet *BlockFilter,
@@ -294,6 +353,16 @@ class MachineBlockPlacement : public MachineFunctionPass {
                                  const BlockFilterSet *BlockFilter,
                                  BranchProbability SuccProb,
                                  BranchProbability HotProb);
+  bool repeatedlyTailDuplicateBlock(
+      MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+      MachineBasicBlock *LoopHeaderBB,
+      BlockChain &Chain, BlockFilterSet *BlockFilter,
+      MachineFunction::iterator &PrevUnplacedBlockIt);
+  bool maybeTailDuplicateBlock(MachineBasicBlock *BB, MachineBasicBlock *LPred,
+                               const BlockChain &Chain,
+                               BlockFilterSet *BlockFilter,
+                               MachineFunction::iterator &PrevUnplacedBlockIt,
+                               bool &DuplicatedToPred);
   bool
   hasBetterLayoutPredecessor(MachineBasicBlock *BB, MachineBasicBlock *Succ,
                              BlockChain &SuccChain, BranchProbability SuccProb,
@@ -319,7 +388,7 @@ class MachineBlockPlacement : public MachineFunctionPass {
                      SmallPtrSetImpl<BlockChain *> &UpdatedPreds,
                      const BlockFilterSet *BlockFilter);
   void buildChain(MachineBasicBlock *BB, BlockChain &Chain,
-                  const BlockFilterSet *BlockFilter = nullptr);
+                  BlockFilterSet *BlockFilter = nullptr);
   MachineBasicBlock *findBestLoopTop(MachineLoop &L,
                                      const BlockFilterSet &LoopBlockSet);
   MachineBasicBlock *findBestLoopExit(MachineLoop &L,
@@ -384,37 +453,49 @@ static std::string getBlockName(MachineBasicBlock *BB) {
 /// When a chain is being merged into the "placed" chain, this routine will
 /// quickly walk the successors of each block in the chain and mark them as
 /// having one fewer active predecessor. It also adds any successors of this
-/// chain which reach the zero-predecessor state to the worklist passed in.
+/// chain which reach the zero-predecessor state to the appropriate worklist.
 void MachineBlockPlacement::markChainSuccessors(
     BlockChain &Chain, MachineBasicBlock *LoopHeaderBB,
     const BlockFilterSet *BlockFilter) {
   // Walk all the blocks in this chain, marking their successors as having
   // a predecessor placed.
   for (MachineBasicBlock *MBB : Chain) {
-    // Add any successors for which this is the only un-placed in-loop
-    // predecessor to the worklist as a viable candidate for CFG-neutral
-    // placement. No subsequent placement of this block will violate the CFG
-    // shape, so we get to use heuristics to choose a favorable placement.
-    for (MachineBasicBlock *Succ : MBB->successors()) {
-      if (BlockFilter && !BlockFilter->count(Succ))
-        continue;
-      BlockChain &SuccChain = *BlockToChain[Succ];
-      // Disregard edges within a fixed chain, or edges to the loop header.
-      if (&Chain == &SuccChain || Succ == LoopHeaderBB)
-        continue;
+    markBlockSuccessors(Chain, MBB, LoopHeaderBB, BlockFilter);
+  }
+}
 
-      // This is a cross-chain edge that is within the loop, so decrement the
-      // loop predecessor count of the destination chain.
-      if (SuccChain.UnscheduledPredecessors == 0 ||
-          --SuccChain.UnscheduledPredecessors > 0)
-        continue;
+/// \brief Mark a single block's successors as having one fewer preds.
+///
+/// Under normal circumstances, this is only called by markChainSuccessors,
+/// but if a block that was to be placed is completely tail-duplicated away,
+/// and was duplicated into the chain end, we need to redo markBlockSuccessors
+/// for just that block.
+void MachineBlockPlacement::markBlockSuccessors(
+    BlockChain &Chain, MachineBasicBlock *MBB, MachineBasicBlock *LoopHeaderBB,
+    const BlockFilterSet *BlockFilter) {
+  // Add any successors for which this is the only un-placed in-loop
+  // predecessor to the worklist as a viable candidate for CFG-neutral
+  // placement. No subsequent placement of this block will violate the CFG
+  // shape, so we get to use heuristics to choose a favorable placement.
+  for (MachineBasicBlock *Succ : MBB->successors()) {
+    if (BlockFilter && !BlockFilter->count(Succ))
+      continue;
+    BlockChain &SuccChain = *BlockToChain[Succ];
+    // Disregard edges within a fixed chain, or edges to the loop header.
+    if (&Chain == &SuccChain || Succ == LoopHeaderBB)
+      continue;
 
-      auto *MBB = *SuccChain.begin();
-      if (MBB->isEHPad())
-        EHPadWorkList.push_back(MBB);
-      else
-        BlockWorkList.push_back(MBB);
-    }
+    // This is a cross-chain edge that is within the loop, so decrement the
+    // loop predecessor count of the destination chain.
+    if (SuccChain.UnscheduledPredecessors == 0 ||
+        --SuccChain.UnscheduledPredecessors > 0)
+      continue;
+
+    auto *NewBB = *SuccChain.begin();
+    if (NewBB->isEHPad())
+      EHPadWorkList.push_back(NewBB);
+    else
+      BlockWorkList.push_back(NewBB);
   }
 }
 
@@ -627,16 +708,46 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   // BB->Succ. This is equivalent to looking the CFG backward with backward
   // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected (without
   // profile data).
-
+  // --------------------------------------------------------------------------
+  // Case 3: forked diamond
+  //       S
+  //      / \
+  //     /   \
+  //   BB    Pred
+  //   | \   / |
+  //   |  \ /  |
+  //   |   X   |
+  //   |  / \  |
+  //   | /   \ |
+  //   S1     S2
+  //
+  // The current block is BB and edge BB->S1 is now being evaluated.
+  // As above S->BB was already selected because
+  // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >= prob(BB->S2).
+  //
+  // topo-order:
+  //
+  //     S-------|                     ---S
+  //     |       |                     |  |
+  //  ---BB      |                     |  BB
+  //  |          |                     |  |
+  //  |  Pred----|                     |  S1----
+  //  |  |                             |       |
+  //  --(S1 or S2)                     ---Pred--
+  //
+  // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
+  //    + min(freq(Pred->S1), freq(Pred->S2))
+  // Non-topo-order cost:
+  // In the worst case, S2 will not get laid out after Pred.
+  // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
+  // To be conservative, we can assume that min(freq(Pred->S1), freq(Pred->S2))
+  // is 0. Then the non topo layout is better when
+  // freq(S->Pred) < freq(BB->S1).
+  // This is exactly what is checked below.
+  // Note there are other shapes that apply (Pred may not be a single block,
+  // but they all fit this general pattern.)
   BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
 
-  // Forward checking. For case 2, SuccProb will be 1.
-  if (SuccProb < HotProb) {
-    DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
-                 << " (prob) (CFG conflict)\n");
-    return true;
-  }
-
   // Make sure that a hot successor doesn't have a globally more
   // important predecessor.
   BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) * RealSuccProb;
@@ -647,11 +758,11 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
         (BlockFilter && !BlockFilter->count(Pred)) ||
         BlockToChain[Pred] == &Chain)
       continue;
-    // Do backward checking. For case 1, it is actually redundant check. For
-    // case 2 above, we need a backward checking to filter out edges that are
-    // not 'strongly' biased. With profile data available, the check is mostly
-    // redundant too (when threshold prob is set at 50%) unless S has more than
-    // two successors.
+    // Do backward checking.
+    // For all cases above, we need a backward checking to filter out edges that
+    // are not 'strongly' biased. With profile data available, the check is
+    // mostly redundant for case 2 (when threshold prob is set at 50%) unless S
+    // has more than two successors.
     // BB  Pred
     //  \ /
     //  Succ
@@ -660,6 +771,8 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
     //      i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb + freq(Pred->Succ) *
     //      HotProb
     //      i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) * HotProb
+    // Case 1 is covered too, because the first equation reduces to:
+    // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
     BlockFrequency PredEdgeFreq =
         MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
     if (PredEdgeFreq * HotProb >= CandidateEdgeFreq * HotProb.getCompl()) {
@@ -669,7 +782,7 @@ bool MachineBlockPlacement::hasBetterLayoutPredecessor(
   }
 
   if (BadCFGConflict) {
-    DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
+    DEBUG(dbgs() << "    Not a candidate: " << getBlockName(Succ) << " -> " << SuccProb
                  << " (prob) (non-cold CFG conflict)\n");
     return true;
   }
@@ -699,7 +812,7 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
   auto AdjustedSumProb =
       collectViableSuccessors(BB, Chain, BlockFilter, Successors);
 
-  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  DEBUG(dbgs() << "Selecting best successor for: " << getBlockName(BB) << "\n");
   for (MachineBasicBlock *Succ : Successors) {
     auto RealSuccProb = MBPI->getEdgeProbability(BB, Succ);
     BranchProbability SuccProb =
@@ -718,15 +831,23 @@ MachineBlockPlacement::selectBestSuccessor(MachineBasicBlock *BB,
       continue;
 
     DEBUG(
-        dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
-               << " (prob)"
+        dbgs() << "    Candidate: " << getBlockName(Succ) << ", probability: "
+               << SuccProb
                << (SuccChain.UnscheduledPredecessors != 0 ? " (CFG break)" : "")
                << "\n");
-    if (BestSucc && BestProb >= SuccProb)
+
+    if (BestSucc && BestProb >= SuccProb) {
+      DEBUG(dbgs() << "    Not the best candidate, continuing\n");
       continue;
+    }
+
+    DEBUG(dbgs() << "    Setting it as best candidate\n");
     BestSucc = Succ;
     BestProb = SuccProb;
   }
+  if (BestSucc)
+    DEBUG(dbgs() << "    Selected: " << getBlockName(BestSucc) << "\n");
+
   return BestSucc;
 }
 
@@ -746,10 +867,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock(
   // worklist of already placed entries.
   // FIXME: If this shows up on profiles, it could be folded (at the cost of
   // some code complexity) into the loop below.
-  WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(),
-                                [&](MachineBasicBlock *BB) {
-                                  return BlockToChain.lookup(BB) == &Chain;
-                                }),
+  WorkList.erase(remove_if(WorkList,
+                           [&](MachineBasicBlock *BB) {
+                             return BlockToChain.lookup(BB) == &Chain;
+                           }),
                  WorkList.end());
 
   if (WorkList.empty())
@@ -858,7 +979,7 @@ void MachineBlockPlacement::fillWorkLists(
 
 void MachineBlockPlacement::buildChain(
     MachineBasicBlock *BB, BlockChain &Chain,
-    const BlockFilterSet *BlockFilter) {
+    BlockFilterSet *BlockFilter) {
   assert(BB && "BB must not be null.\n");
   assert(BlockToChain[BB] == &Chain && "BlockToChainMap mis-match.\n");
   MachineFunction::iterator PrevUnplacedBlockIt = F->begin();
@@ -893,6 +1014,17 @@ void MachineBlockPlacement::buildChain(
                       "layout successor until the CFG reduces\n");
     }
 
+    // Placement may have changed tail duplication opportunities.
+    // Check for that now.
+    if (TailDupPlacement && BestSucc) {
+      // If the chosen successor was duplicated into all its predecessors,
+      // don't bother laying it out, just go round the loop again with BB as
+      // the chain end.
+      if (repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
+                                       BlockFilter, PrevUnplacedBlockIt))
+        continue;
+    }
+
     // Place this block, updating the datastructures to reflect its placement.
     BlockChain &SuccChain = *BlockToChain[BestSucc];
     // Zero out UnscheduledPredecessors for the successor we're about to merge in case
@@ -922,6 +1054,16 @@ void MachineBlockPlacement::buildChain(
 MachineBasicBlock *
 MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
                                        const BlockFilterSet &LoopBlockSet) {
+  // Placing the latch block before the header may introduce an extra branch
+  // that skips this block the first time the loop is executed, which we want
+  // to avoid when optimising for size.
+  // FIXME: in theory there is a case that does not introduce a new branch,
+  // i.e. when the layout predecessor does not fallthrough to the loop header.
+  // In practice this never happens though: there always seems to be a preheader
+  // that can fallthrough and that is also placed before the header.
+  if (F->getFunction()->optForSize())
+    return L.getHeader();
+
   // Check that the header hasn't been fused with a preheader block due to
   // crazy branches. If it has, we need to start with the header at the top to
   // prevent pulling the preheader into the loop body.
@@ -937,7 +1079,7 @@ MachineBlockPlacement::findBestLoopTop(MachineLoop &L,
   for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
     if (!LoopBlockSet.count(Pred))
       continue;
-    DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", "
+    DEBUG(dbgs() << "    header pred: " << getBlockName(Pred) << ", has "
                  << Pred->succ_size() << " successors, ";
           MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
     if (Pred->succ_size() > 1)
@@ -1066,8 +1208,14 @@ MachineBlockPlacement::findBestLoopExit(MachineLoop &L,
   }
   // Without a candidate exiting block or with only a single block in the
   // loop, just use the loop header to layout the loop.
-  if (!ExitingBB || L.getNumBlocks() == 1)
+  if (!ExitingBB) {
+    DEBUG(dbgs() << "    No other candidate exit blocks, using loop header\n");
     return nullptr;
+  }
+  if (L.getNumBlocks() == 1) {
+    DEBUG(dbgs() << "    Loop has 1 block, using loop header as exit\n");
+    return nullptr;
+  }
 
   // Also, if we have exit blocks which lead to outer loops but didn't select
   // one of them as the exiting block we are rotating toward, disable loop
@@ -1116,8 +1264,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
     }
   }
 
-  BlockChain::iterator ExitIt =
-      std::find(LoopChain.begin(), LoopChain.end(), ExitingBB);
+  BlockChain::iterator ExitIt = find(LoopChain, ExitingBB);
   if (ExitIt == LoopChain.end())
     return;
 
@@ -1140,7 +1287,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
 void MachineBlockPlacement::rotateLoopWithProfile(
     BlockChain &LoopChain, MachineLoop &L, const BlockFilterSet &LoopBlockSet) {
   auto HeaderBB = L.getHeader();
-  auto HeaderIter = std::find(LoopChain.begin(), LoopChain.end(), HeaderBB);
+  auto HeaderIter = find(LoopChain, HeaderBB);
   auto RotationPos = LoopChain.end();
 
   BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -1340,9 +1487,8 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
   // If we selected just the header for the loop top, look for a potentially
   // profitable exit block in the event that rotating the loop can eliminate
   // branches by placing an exit edge at the bottom.
-  MachineBasicBlock *ExitingBB = nullptr;
   if (!RotateLoopWithProfile && LoopTop == L.getHeader())
-    ExitingBB = findBestLoopExit(L, LoopBlockSet);
+    PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
 
   BlockChain &LoopChain = *BlockToChain[LoopTop];
 
@@ -1361,7 +1507,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
   if (RotateLoopWithProfile)
     rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
   else
-    rotateLoop(LoopChain, ExitingBB, LoopBlockSet);
+    rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
 
   DEBUG({
     // Crash at the end so we get all of the debugging output first.
@@ -1374,7 +1520,7 @@ void MachineBlockPlacement::buildLoopChains(MachineLoop &L) {
     }
     for (MachineBasicBlock *ChainBB : LoopChain) {
       dbgs() << "          ... " << getBlockName(ChainBB) << "\n";
-      if (!LoopBlockSet.erase(ChainBB)) {
+      if (!LoopBlockSet.remove(ChainBB)) {
         // We don't mark the loop as bad here because there are real situations
         // where this can occur. For example, with an unanalyzable fallthrough
         // from a loop block to a non-loop block or vice versa.
@@ -1451,6 +1597,9 @@ void MachineBlockPlacement::buildCFGChains() {
                    << getBlockName(BB) << " -> " << getBlockName(NextBB)
                    << "\n");
       Chain->merge(NextBB, nullptr);
+#ifndef NDEBUG
+      BlocksWithUnanalyzableExits.insert(&*BB);
+#endif
       FI = NextFI;
       BB = NextBB;
     }
@@ -1460,6 +1609,7 @@ void MachineBlockPlacement::buildCFGChains() {
   collectMustExecuteBBs();
 
   // Build any loop-based chains.
+  PreferredLoopExit = nullptr;
   for (MachineLoop *L : *MLI)
     buildLoopChains(*L);
 
@@ -1522,6 +1672,19 @@ void MachineBlockPlacement::buildCFGChains() {
     Cond.clear();
     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For AnalyzeBranch.
 
+#ifndef NDEBUG
+    if (!BlocksWithUnanalyzableExits.count(PrevBB)) {
+      // Given the exact block placement we chose, we may actually not _need_ to
+      // be able to edit PrevBB's terminator sequence, but not being _able_ to
+      // do that at this point is a bug.
+      assert((!TII->analyzeBranch(*PrevBB, TBB, FBB, Cond) ||
+              !PrevBB->canFallThrough()) &&
+             "Unexpected block with un-analyzable fallthrough!");
+      Cond.clear();
+      TBB = FBB = nullptr;
+    }
+#endif
+
     // The "PrevBB" is not yet updated to reflect current code layout, so,
     //   o. it may fall-through to a block without explicit "goto" instruction
     //      before layout, and no longer fall-through it after layout; or
@@ -1576,15 +1739,15 @@ void MachineBlockPlacement::optimizeBranches() {
       if (TBB && !Cond.empty() && FBB &&
           MBPI->getEdgeProbability(ChainBB, FBB) >
               MBPI->getEdgeProbability(ChainBB, TBB) &&
-          !TII->ReverseBranchCondition(Cond)) {
+          !TII->reverseBranchCondition(Cond)) {
         DEBUG(dbgs() << "Reverse order of the two branches: "
                      << getBlockName(ChainBB) << "\n");
         DEBUG(dbgs() << "    Edge probability: "
                      << MBPI->getEdgeProbability(ChainBB, FBB) << " vs "
                      << MBPI->getEdgeProbability(ChainBB, TBB) << "\n");
         DebugLoc dl; // FIXME: this is nowhere
-        TII->RemoveBranch(*ChainBB);
-        TII->InsertBranch(*ChainBB, FBB, TBB, Cond, dl);
+        TII->removeBranch(*ChainBB);
+        TII->insertBranch(*ChainBB, FBB, TBB, Cond, dl);
         ChainBB->updateTerminator();
       }
     }
@@ -1659,6 +1822,175 @@ void MachineBlockPlacement::alignBlocks() {
   }
 }
 
+/// Tail duplicate \p BB into (some) predecessors if profitable, repeating if
+/// it was duplicated into its chain predecessor and removed.
+/// \p BB    - Basic block that may be duplicated.
+///
+/// \p LPred - Chosen layout predecessor of \p BB.
+///            Updated to be the chain end if LPred is removed.
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+///                  Used to identify which blocks to update predecessor
+///                  counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+///                          chosen in the given order due to unnatural CFG
+///                          only needed if \p BB is removed and
+///                          \p PrevUnplacedBlockIt pointed to \p BB.
+/// @return true if \p BB was removed.
+bool MachineBlockPlacement::repeatedlyTailDuplicateBlock(
+    MachineBasicBlock *BB, MachineBasicBlock *&LPred,
+    MachineBasicBlock *LoopHeaderBB,
+    BlockChain &Chain, BlockFilterSet *BlockFilter,
+    MachineFunction::iterator &PrevUnplacedBlockIt) {
+  bool Removed, DuplicatedToLPred;
+  bool DuplicatedToOriginalLPred;
+  Removed = maybeTailDuplicateBlock(BB, LPred, Chain, BlockFilter,
+                                    PrevUnplacedBlockIt,
+                                    DuplicatedToLPred);
+  if (!Removed)
+    return false;
+  DuplicatedToOriginalLPred = DuplicatedToLPred;
+  // Iteratively try to duplicate again. It can happen that a block that is
+  // duplicated into is still small enough to be duplicated again.
+  // No need to call markBlockSuccessors in this case, as the blocks being
+  // duplicated from here on are already scheduled.
+  // Note that DuplicatedToLPred always implies Removed.
+  while (DuplicatedToLPred) {
+    assert (Removed && "Block must have been removed to be duplicated into its "
+            "layout predecessor.");
+    MachineBasicBlock *DupBB, *DupPred;
+    // The removal callback causes Chain.end() to be updated when a block is
+    // removed. On the first pass through the loop, the chain end should be the
+    // same as it was on function entry. On subsequent passes, because we are
+    // duplicating the block at the end of the chain, if it is removed the
+    // chain will have shrunk by one block.
+    BlockChain::iterator ChainEnd = Chain.end();
+    DupBB = *(--ChainEnd);
+    // Now try to duplicate again.
+    if (ChainEnd == Chain.begin())
+      break;
+    DupPred = *std::prev(ChainEnd);
+    Removed = maybeTailDuplicateBlock(DupBB, DupPred, Chain, BlockFilter,
+                                      PrevUnplacedBlockIt,
+                                      DuplicatedToLPred);
+  }
+  // If BB was duplicated into LPred, it is now scheduled. But because it was
+  // removed, markChainSuccessors won't be called for its chain. Instead we
+  // call markBlockSuccessors for LPred to achieve the same effect. This must go
+  // at the end because repeating the tail duplication can increase the number
+  // of unscheduled predecessors.
+  LPred = *std::prev(Chain.end());
+  if (DuplicatedToOriginalLPred)
+    markBlockSuccessors(Chain, LPred, LoopHeaderBB, BlockFilter);
+  return true;
+}
+
+/// Tail duplicate \p BB into (some) predecessors if profitable.
+/// \p BB    - Basic block that may be duplicated
+/// \p LPred - Chosen layout predecessor of \p BB
+/// \p Chain - Chain to which \p LPred belongs, and \p BB will belong.
+/// \p BlockFilter - Set of blocks that belong to the loop being laid out.
+///                  Used to identify which blocks to update predecessor
+///                  counts.
+/// \p PrevUnplacedBlockIt - Iterator pointing to the last block that was
+///                          chosen in the given order due to unnatural CFG
+///                          only needed if \p BB is removed and
+///                          \p PrevUnplacedBlockIt pointed to \p BB.
+/// \p DuplicatedToLPred - True if the block was duplicated into LPred. Will
+///                        only be true if the block was removed.
+/// \return  - True if the block was duplicated into all preds and removed.
+bool MachineBlockPlacement::maybeTailDuplicateBlock(
+    MachineBasicBlock *BB, MachineBasicBlock *LPred,
+    const BlockChain &Chain, BlockFilterSet *BlockFilter,
+    MachineFunction::iterator &PrevUnplacedBlockIt,
+    bool &DuplicatedToLPred) {
+
+  DuplicatedToLPred = false;
+  DEBUG(dbgs() << "Redoing tail duplication for Succ#"
+        << BB->getNumber() << "\n");
+  bool IsSimple = TailDup.isSimpleBB(BB);
+  // Blocks with single successors don't create additional fallthrough
+  // opportunities. Don't duplicate them. TODO: When conditional exits are
+  // analyzable, allow them to be duplicated.
+  if (!IsSimple && BB->succ_size() == 1)
+    return false;
+  if (!TailDup.shouldTailDuplicate(IsSimple, *BB))
+    return false;
+  // This has to be a callback because none of it can be done after
+  // BB is deleted.
+  bool Removed = false;
+  auto RemovalCallback =
+      [&](MachineBasicBlock *RemBB) {
+        // Signal to outer function
+        Removed = true;
+
+        // Conservative default.
+        bool InWorkList = true;
+        // Remove from the Chain and Chain Map
+        if (BlockToChain.count(RemBB)) {
+          BlockChain *Chain = BlockToChain[RemBB];
+          InWorkList = Chain->UnscheduledPredecessors == 0;
+          Chain->remove(RemBB);
+          BlockToChain.erase(RemBB);
+        }
+
+        // Handle the unplaced block iterator
+        if (&(*PrevUnplacedBlockIt) == RemBB) {
+          PrevUnplacedBlockIt++;
+        }
+
+        // Handle the Work Lists
+        if (InWorkList) {
+          SmallVectorImpl<MachineBasicBlock *> &RemoveList = BlockWorkList;
+          if (RemBB->isEHPad())
+            RemoveList = EHPadWorkList;
+          RemoveList.erase(
+              remove_if(RemoveList,
+                        [RemBB](MachineBasicBlock *BB) {return BB == RemBB;}),
+              RemoveList.end());
+        }
+
+        // Handle the filter set
+        if (BlockFilter) {
+          BlockFilter->remove(RemBB);
+        }
+
+        // Remove the block from loop info.
+        MLI->removeBlock(RemBB);
+        if (RemBB == PreferredLoopExit)
+          PreferredLoopExit = nullptr;
+
+        DEBUG(dbgs() << "TailDuplicator deleted block: "
+              << getBlockName(RemBB) << "\n");
+      };
+  auto RemovalCallbackRef =
+      llvm::function_ref<void(MachineBasicBlock*)>(RemovalCallback);
+
+  SmallVector<MachineBasicBlock *, 8> DuplicatedPreds;
+  TailDup.tailDuplicateAndUpdate(IsSimple, BB, LPred,
+                                 &DuplicatedPreds, &RemovalCallbackRef);
+
+  // Update UnscheduledPredecessors to reflect tail-duplication.
+  DuplicatedToLPred = false;
+  for (MachineBasicBlock *Pred : DuplicatedPreds) {
+    // We're only looking for unscheduled predecessors that match the filter.
+    BlockChain* PredChain = BlockToChain[Pred];
+    if (Pred == LPred)
+      DuplicatedToLPred = true;
+    if (Pred == LPred || (BlockFilter && !BlockFilter->count(Pred))
+        || PredChain == &Chain)
+      continue;
+    for (MachineBasicBlock *NewSucc : Pred->successors()) {
+      if (BlockFilter && !BlockFilter->count(NewSucc))
+        continue;
+      BlockChain *NewChain = BlockToChain[NewSucc];
+      if (NewChain != &Chain && NewChain != PredChain)
+        NewChain->UnscheduledPredecessors++;
+    }
+  }
+  return Removed;
+}
+
 bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -1675,6 +2007,18 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget().getInstrInfo();
   TLI = MF.getSubtarget().getTargetLowering();
   MDT = &getAnalysis<MachineDominatorTree>();
+
+  // Initialize PreferredLoopExit to nullptr here since it may never be set if
+  // there are no MachineLoops.
+  PreferredLoopExit = nullptr;
+
+  if (TailDupPlacement) {
+    unsigned TailDupSize = TailDuplicatePlacementThreshold;
+    if (MF.getFunction()->optForSize())
+      TailDupSize = 1;
+    TailDup.initMF(MF, MBPI, /* LayoutMode */ true, TailDupSize);
+  }
+
   assert(BlockToChain.empty());
 
   buildCFGChains();
@@ -1688,14 +2032,17 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
                          BranchFoldPlacement;
   // No tail merging opportunities if the block number is less than four.
   if (MF.size() > 3 && EnableTailMerge) {
+    unsigned TailMergeSize = TailDuplicatePlacementThreshold + 1;
     BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
-                    *MBPI);
+                    *MBPI, TailMergeSize);
 
     if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
                             getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
                             /*AfterBlockPlacement=*/true)) {
       // Redo the layout if tail merging creates/removes/moves blocks.
       BlockToChain.clear();
+      // Must redo the dominator tree if blocks were changed.
+      MDT->runOnMachineFunction(MF);
       ChainAllocator.DestroyAll();
       buildCFGChains();
     }
diff --git a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index fe7340618374..21eff9dfff9c 100644
--- a/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -50,8 +50,7 @@ BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(
     const MachineBasicBlock *Src, const MachineBasicBlock *Dst) const {
   // This is a linear search. Try to use the const_succ_iterator version when
   // possible.
-  return getEdgeProbability(Src,
-                            std::find(Src->succ_begin(), Src->succ_end(), Dst));
+  return getEdgeProbability(Src, find(Src->successors(), Dst));
 }
 
 bool MachineBranchProbabilityInfo::isEdgeHot(
diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
index 1209f73d9601..0766f465456c 100644
--- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp
@@ -177,8 +177,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
   unsigned LookAheadLeft = LookAheadLimit;
   while (LookAheadLeft) {
     // Skip over dbg_value's.
-    while (I != E && I->isDebugValue())
-      ++I;
+    I = skipDebugInstructionsForward(I, E);
 
     if (I == E)
       // Reached end of block, register is obviously dead.
@@ -227,7 +226,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
     if (TargetRegisterInfo::isVirtualRegister(Reg))
       continue;
     // Reading constant physregs is ok.
-    if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+    if (!MRI->isConstantPhysReg(Reg))
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
         PhysRefs.insert(*AI);
   }
@@ -346,7 +345,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
     // Okay, this instruction does a load. As a refinement, we allow the target
     // to decide whether the loaded value is actually a constant. If so, we can
     // actually use it as a load.
-    if (!MI->isInvariantLoad(AA))
+    if (!MI->isDereferenceableInvariantLoad(AA))
       // FIXME: we should be able to hoist loads with no other side effects if
       // there are no other instructions which can change memory in this loop.
       // This is a trivial form of alias analysis.
diff --git a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
index 6b5c6ba82506..5beed5f5dd08 100644
--- a/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCombiner.cpp
@@ -56,7 +56,7 @@ public:
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
-  const char *getPassName() const override { return "Machine InstCombiner"; }
+  StringRef getPassName() const override { return "Machine InstCombiner"; }
 
 private:
   bool doSubstitute(unsigned NewSize, unsigned OldSize);
@@ -71,6 +71,7 @@ private:
   improvesCriticalPathLen(MachineBasicBlock *MBB, MachineInstr *Root,
                           MachineTraceMetrics::Trace BlockTrace,
                           SmallVectorImpl<MachineInstr *> &InsInstrs,
+                          SmallVectorImpl<MachineInstr *> &DelInstrs,
                           DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
                           MachineCombinerPattern Pattern);
   bool preservesResourceLen(MachineBasicBlock *MBB,
@@ -134,7 +135,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
   // are tracked in the InstrIdxForVirtReg map depth is looked up in InstrDepth
   for (auto *InstrPtr : InsInstrs) { // for each Use
     unsigned IDepth = 0;
-    DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(); dbgs() << "\n";);
+    DEBUG(dbgs() << "NEW INSTR "; InstrPtr->dump(TII); dbgs() << "\n";);
     for (const MachineOperand &MO : InstrPtr->operands()) {
       // Check for virtual register operand.
       if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
@@ -242,6 +243,7 @@ bool MachineCombiner::improvesCriticalPathLen(
     MachineBasicBlock *MBB, MachineInstr *Root,
     MachineTraceMetrics::Trace BlockTrace,
     SmallVectorImpl<MachineInstr *> &InsInstrs,
+    SmallVectorImpl<MachineInstr *> &DelInstrs,
     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
     MachineCombinerPattern Pattern) {
   assert(TSchedModel.hasInstrSchedModelOrItineraries() &&
@@ -269,8 +271,13 @@ bool MachineCombiner::improvesCriticalPathLen(
   // A more flexible cost calculation for the critical path includes the slack
   // of the original code sequence. This may allow the transform to proceed
   // even if the instruction depths (data dependency cycles) become worse.
+
   unsigned NewRootLatency = getLatency(Root, NewRoot, BlockTrace);
-  unsigned RootLatency = TSchedModel.computeInstrLatency(Root);
+  unsigned RootLatency = 0;
+
+  for (auto I : DelInstrs)
+    RootLatency += TSchedModel.computeInstrLatency(I);
+
   unsigned RootSlack = BlockTrace.getInstrSlack(*Root);
 
   DEBUG(dbgs() << " NewRootLatency: " << NewRootLatency << "\n";
@@ -421,7 +428,7 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
       // resource pressure.
       if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount) ||
           (improvesCriticalPathLen(MBB, &MI, BlockTrace, InsInstrs,
-                                   InstrIdxForVirtReg, P) &&
+                                   DelInstrs, InstrIdxForVirtReg, P) &&
            preservesResourceLen(MBB, BlockTrace, InsInstrs, DelInstrs))) {
         for (auto *InstrPtr : InsInstrs)
           MBB->insert((MachineBasicBlock::iterator) &MI, InstrPtr);
diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
index 8fdf39d54bd0..92d043df26b8 100644
--- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp
@@ -56,7 +56,7 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -245,7 +245,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
       // Remember source that's copied to Def. Once it's clobbered, then
       // it's no longer available for copy propagation.
       RegList &DestList = SrcMap[Src];
-      if (std::find(DestList.begin(), DestList.end(), Def) == DestList.end())
+      if (!is_contained(DestList, Def))
         DestList.push_back(Def);
 
       continue;
diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
index a7c63ef4c852..c1d5ea96cd17 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp
@@ -54,28 +54,29 @@ static cl::opt<unsigned>
 
 void MachineFunctionInitializer::anchor() {}
 
-void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
-  // Leave this function even in NDEBUG as an out-of-line anchor.
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  for (BitVector::size_type i = 0; i < Properties.size(); ++i) {
-    bool HasProperty = Properties[i];
-    if (OnlySet && !HasProperty)
+static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
+  typedef MachineFunctionProperties::Property P;
+  switch(Prop) {
+  case P::FailedISel: return "FailedISel";
+  case P::IsSSA: return "IsSSA";
+  case P::Legalized: return "Legalized";
+  case P::NoPHIs: return "NoPHIs";
+  case P::NoVRegs: return "NoVRegs";
+  case P::RegBankSelected: return "RegBankSelected";
+  case P::Selected: return "Selected";
+  case P::TracksLiveness: return "TracksLiveness";
+  }
+  llvm_unreachable("Invalid machine function property");
+}
+
+void MachineFunctionProperties::print(raw_ostream &OS) const {
+  const char *Separator = "";
+  for (BitVector::size_type I = 0; I < Properties.size(); ++I) {
+    if (!Properties[I])
       continue;
-    switch(static_cast<Property>(i)) {
-      case Property::IsSSA:
-        ROS << (HasProperty ? "SSA, " : "Post SSA, ");
-        break;
-      case Property::TracksLiveness:
-        ROS << (HasProperty ? "" : "not ") << "tracking liveness, ";
-        break;
-      case Property::AllVRegsAllocated:
-        ROS << (HasProperty ? "AllVRegsAllocated" : "HasVRegs");
-        break;
-      default:
-        break;
-    }
+    OS << Separator << getPropertyName(static_cast<Property>(I));
+    Separator = ", ";
   }
-#endif
 }
 
 //===----------------------------------------------------------------------===//
@@ -85,7 +86,7 @@ void MachineFunctionProperties::print(raw_ostream &ROS, bool OnlySet) const {
 // Out-of-line virtual method.
 MachineFunctionInfo::~MachineFunctionInfo() {}
 
-void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
+void ilist_alloc_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
   MBB->getParent()->DeleteMachineBasicBlock(MBB);
 }
 
@@ -100,6 +101,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
                                  unsigned FunctionNum, MachineModuleInfo &mmi)
     : Fn(F), Target(TM), STI(TM.getSubtargetImpl(*F)), Ctx(mmi.getContext()),
       MMI(mmi) {
+  FunctionNumber = FunctionNum;
+  init();
+}
+
+void MachineFunction::init() {
   // Assume the function starts in SSA form with correct liveness.
   Properties.set(MachineFunctionProperties::Property::IsSSA);
   Properties.set(MachineFunctionProperties::Property::TracksLiveness);
@@ -112,11 +118,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   // We can realign the stack if the target supports it and the user hasn't
   // explicitly asked us not to.
   bool CanRealignSP = STI->getFrameLowering()->isStackRealignable() &&
-                      !F->hasFnAttribute("no-realign-stack");
+                      !Fn->hasFnAttribute("no-realign-stack");
   FrameInfo = new (Allocator) MachineFrameInfo(
       getFnStackAlignment(STI, Fn), /*StackRealignable=*/CanRealignSP,
       /*ForceRealign=*/CanRealignSP &&
-          F->hasFnAttribute(Attribute::StackAlignment));
+          Fn->hasFnAttribute(Attribute::StackAlignment));
 
   if (Fn->hasFnAttribute(Attribute::StackAlignment))
     FrameInfo->ensureMaxAlignment(Fn->getFnStackAlignment());
@@ -133,15 +139,14 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
   if (AlignAllFunctions)
     Alignment = AlignAllFunctions;
 
-  FunctionNumber = FunctionNum;
   JumpTableInfo = nullptr;
 
   if (isFuncletEHPersonality(classifyEHPersonality(
-          F->hasPersonalityFn() ? F->getPersonalityFn() : nullptr))) {
+          Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr))) {
     WinEHInfo = new (Allocator) WinEHFuncInfo();
   }
 
-  assert(TM.isCompatibleDataLayout(getDataLayout()) &&
+  assert(Target.isCompatibleDataLayout(getDataLayout()) &&
          "Can't create a MachineFunction using a Module with a "
          "Target-incompatible DataLayout attached\n");
 
@@ -149,6 +154,11 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
 }
 
 MachineFunction::~MachineFunction() {
+  clear();
+}
+
+void MachineFunction::clear() {
+  Properties.reset();
   // Don't call destructors on MachineInstr and MachineOperand. All of their
   // memory comes from the BumpPtrAllocator which is about to be purged.
   //
@@ -296,9 +306,12 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
 
 MachineMemOperand *MachineFunction::getMachineMemOperand(
     MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s,
-    unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges) {
+    unsigned base_alignment, const AAMDNodes &AAInfo, const MDNode *Ranges,
+    SynchronizationScope SynchScope, AtomicOrdering Ordering,
+    AtomicOrdering FailureOrdering) {
   return new (Allocator)
-      MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges);
+      MachineMemOperand(PtrInfo, f, s, base_alignment, AAInfo, Ranges,
+                        SynchScope, Ordering, FailureOrdering);
 }
 
 MachineMemOperand *
@@ -308,13 +321,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
     return new (Allocator)
                MachineMemOperand(MachinePointerInfo(MMO->getValue(),
                                                     MMO->getOffset()+Offset),
-                                 MMO->getFlags(), Size,
-                                 MMO->getBaseAlignment());
+                                 MMO->getFlags(), Size, MMO->getBaseAlignment(),
+                                 AAMDNodes(), nullptr, MMO->getSynchScope(),
+                                 MMO->getOrdering(), MMO->getFailureOrdering());
   return new (Allocator)
              MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
                                                   MMO->getOffset()+Offset),
-                               MMO->getFlags(), Size,
-                               MMO->getBaseAlignment());
+                               MMO->getFlags(), Size, MMO->getBaseAlignment(),
+                               AAMDNodes(), nullptr, MMO->getSynchScope(),
+                               MMO->getOrdering(), MMO->getFailureOrdering());
 }
 
 MachineInstr::mmo_iterator
@@ -345,7 +360,9 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
           getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOStore,
                                (*I)->getSize(), (*I)->getBaseAlignment(),
-                               (*I)->getAAInfo());
+                               (*I)->getAAInfo(), nullptr,
+                               (*I)->getSynchScope(), (*I)->getOrdering(),
+                               (*I)->getFailureOrdering());
         Result[Index] = JustLoad;
       }
       ++Index;
@@ -377,7 +394,9 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
           getMachineMemOperand((*I)->getPointerInfo(),
                                (*I)->getFlags() & ~MachineMemOperand::MOLoad,
                                (*I)->getSize(), (*I)->getBaseAlignment(),
-                               (*I)->getAAInfo());
+                               (*I)->getAAInfo(), nullptr,
+                               (*I)->getSynchScope(), (*I)->getOrdering(),
+                               (*I)->getFailureOrdering());
         Result[Index] = JustStore;
       }
       ++Index;
@@ -406,9 +425,8 @@ StringRef MachineFunction::getName() const {
 
 void MachineFunction::print(raw_ostream &OS, const SlotIndexes *Indexes) const {
   OS << "# Machine code for function " << getName() << ": ";
-  OS << "Properties: <";
   getProperties().print(OS);
-  OS << ">\n";
+  OS << '\n';
 
   // Print Frame Information
   FrameInfo->print(*this, OS);
@@ -535,8 +553,8 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
   assert(JumpTableInfo && "No jump tables");
   assert(JTI < JumpTableInfo->getJumpTables().size() && "Invalid JTI!");
 
-  const char *Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
-                                       : DL.getPrivateGlobalPrefix();
+  StringRef Prefix = isLinkerPrivate ? DL.getLinkerPrivateGlobalPrefix()
+                                     : DL.getPrivateGlobalPrefix();
   SmallString<60> Name;
   raw_svector_ostream(Name)
     << Prefix << "JTI" << getFunctionNumber() << '_' << JTI;
@@ -550,6 +568,193 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const {
                                Twine(getFunctionNumber()) + "$pb");
 }
 
+/// \name Exception Handling
+/// \{
+
+LandingPadInfo &
+MachineFunction::getOrCreateLandingPadInfo(MachineBasicBlock *LandingPad) {
+  unsigned N = LandingPads.size();
+  for (unsigned i = 0; i < N; ++i) {
+    LandingPadInfo &LP = LandingPads[i];
+    if (LP.LandingPadBlock == LandingPad)
+      return LP;
+  }
+
+  LandingPads.push_back(LandingPadInfo(LandingPad));
+  return LandingPads[N];
+}
+
+void MachineFunction::addInvoke(MachineBasicBlock *LandingPad,
+                                MCSymbol *BeginLabel, MCSymbol *EndLabel) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.BeginLabels.push_back(BeginLabel);
+  LP.EndLabels.push_back(EndLabel);
+}
+
+MCSymbol *MachineFunction::addLandingPad(MachineBasicBlock *LandingPad) {
+  MCSymbol *LandingPadLabel = Ctx.createTempSymbol();
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.LandingPadLabel = LandingPadLabel;
+  return LandingPadLabel;
+}
+
+void MachineFunction::addCatchTypeInfo(MachineBasicBlock *LandingPad,
+                                       ArrayRef<const GlobalValue *> TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  for (unsigned N = TyInfo.size(); N; --N)
+    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
+}
+
+void MachineFunction::addFilterTypeInfo(MachineBasicBlock *LandingPad,
+                                        ArrayRef<const GlobalValue *> TyInfo) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  std::vector<unsigned> IdsInFilter(TyInfo.size());
+  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
+    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
+  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
+}
+
+void MachineFunction::tidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
+  for (unsigned i = 0; i != LandingPads.size(); ) {
+    LandingPadInfo &LandingPad = LandingPads[i];
+    if (LandingPad.LandingPadLabel &&
+        !LandingPad.LandingPadLabel->isDefined() &&
+        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
+      LandingPad.LandingPadLabel = nullptr;
+
+    // Special case: we *should* emit LPs with null LP MBB. This indicates
+    // "nounwind" case.
+    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
+      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
+      MCSymbol *EndLabel = LandingPad.EndLabels[j];
+      if ((BeginLabel->isDefined() ||
+           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
+          (EndLabel->isDefined() ||
+           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
+
+      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
+      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
+      --j;
+      --e;
+    }
+
+    // Remove landing pads with no try-ranges.
+    if (LandingPads[i].BeginLabels.empty()) {
+      LandingPads.erase(LandingPads.begin() + i);
+      continue;
+    }
+
+    // If there is no landing pad, ensure that the list of typeids is empty.
+    // If the only typeid is a cleanup, this is the same as having no typeids.
+    if (!LandingPad.LandingPadBlock ||
+        (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
+      LandingPad.TypeIds.clear();
+    ++i;
+  }
+}
+
+void MachineFunction::addCleanup(MachineBasicBlock *LandingPad) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  LP.TypeIds.push_back(0);
+}
+
+void MachineFunction::addSEHCatchHandler(MachineBasicBlock *LandingPad,
+                                         const Function *Filter,
+                                         const BlockAddress *RecoverBA) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Filter;
+  Handler.RecoverBA = RecoverBA;
+  LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineFunction::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
+                                           const Function *Cleanup) {
+  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
+  SEHHandler Handler;
+  Handler.FilterOrFinally = Cleanup;
+  Handler.RecoverBA = nullptr;
+  LP.SEHHandlers.push_back(Handler);
+}
+
+void MachineFunction::setCallSiteLandingPad(MCSymbol *Sym,
+                                            ArrayRef<unsigned> Sites) {
+  LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
+}
+
+unsigned MachineFunction::getTypeIDFor(const GlobalValue *TI) {
+  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
+    if (TypeInfos[i] == TI) return i + 1;
+
+  TypeInfos.push_back(TI);
+  return TypeInfos.size();
+}
+
+int MachineFunction::getFilterIDFor(std::vector<unsigned> &TyIds) {
+  // If the new filter coincides with the tail of an existing filter, then
+  // re-use the existing filter.  Folding filters more than this requires
+  // re-ordering filters and/or their elements - probably not worth it.
+  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
+       E = FilterEnds.end(); I != E; ++I) {
+    unsigned i = *I, j = TyIds.size();
+
+    while (i && j)
+      if (FilterIds[--i] != TyIds[--j])
+        goto try_next;
+
+    if (!j)
+      // The new filter coincides with range [i, end) of the existing filter.
+      return -(1 + i);
+
+try_next:;
+  }
+
+  // Add the new filter.
+  int FilterID = -(1 + FilterIds.size());
+  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
+  FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
+  FilterEnds.push_back(FilterIds.size());
+  FilterIds.push_back(0); // terminator
+  return FilterID;
+}
+
+void llvm::addLandingPadInfo(const LandingPadInst &I, MachineBasicBlock &MBB) {
+  MachineFunction &MF = *MBB.getParent();
+  if (const auto *PF = dyn_cast<Function>(
+          I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
+    MF.getMMI().addPersonality(PF);
+
+  if (I.isCleanup())
+    MF.addCleanup(&MBB);
+
+  // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
+  //        but we need to do it this way because of how the DWARF EH emitter
+  //        processes the clauses.
+  for (unsigned i = I.getNumClauses(); i != 0; --i) {
+    Value *Val = I.getClause(i - 1);
+    if (I.isCatch(i - 1)) {
+      MF.addCatchTypeInfo(&MBB,
+                          dyn_cast<GlobalValue>(Val->stripPointerCasts()));
+    } else {
+      // Add filters in a list.
+      Constant *CVal = cast<Constant>(Val);
+      SmallVector<const GlobalValue *, 4> FilterList;
+      for (User::op_iterator II = CVal->op_begin(), IE = CVal->op_end();
+           II != IE; ++II)
+        FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
+
+      MF.addFilterTypeInfo(&MBB, FilterList);
+    }
+  }
+}
+
+/// \}
+
 //===----------------------------------------------------------------------===//
 //  MachineFrameInfo implementation
 //===----------------------------------------------------------------------===//
@@ -634,11 +839,11 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
 /// Create a spill slot at a fixed location on the stack.
 /// Returns an index with a negative value.
 int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
-                                                  int64_t SPOffset) {
+                                                  int64_t SPOffset,
+                                                  bool Immutable) {
   unsigned Align = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
   Align = clampStackAlignment(!StackRealignable, Align, StackAlignment);
-  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset,
-                                              /*Immutable*/ true,
+  Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable,
                                               /*isSS*/ true,
                                               /*Alloca*/ nullptr,
                                               /*isAliased*/ false));
@@ -890,13 +1095,20 @@ MachineConstantPoolEntry::getSectionKind(const DataLayout *DL) const {
 }
 
 MachineConstantPool::~MachineConstantPool() {
+  // A constant may be a member of both Constants and MachineCPVsSharingEntries,
+  // so keep track of which we've deleted to avoid double deletions.
+  DenseSet<MachineConstantPoolValue*> Deleted;
   for (unsigned i = 0, e = Constants.size(); i != e; ++i)
-    if (Constants[i].isMachineConstantPoolEntry())
+    if (Constants[i].isMachineConstantPoolEntry()) {
+      Deleted.insert(Constants[i].Val.MachineCPVal);
       delete Constants[i].Val.MachineCPVal;
+    }
   for (DenseSet<MachineConstantPoolValue*>::iterator I =
        MachineCPVsSharingEntries.begin(), E = MachineCPVsSharingEntries.end();
-       I != E; ++I)
-    delete *I;
+       I != E; ++I) {
+    if (Deleted.count(*I) == 0)
+      delete *I;
+  }
 }
 
 /// Test whether the given two constants can be allocated the same constant pool
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
deleted file mode 100644
index 338cd1e22032..000000000000
--- a/contrib/llvm/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===-- MachineFunctionAnalysis.cpp ---------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file contains the definitions of the MachineFunctionAnalysis members.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/GCMetadata.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/CodeGen/MachineFunctionInitializer.h"
-using namespace llvm;
-
-char MachineFunctionAnalysis::ID = 0;
-
-MachineFunctionAnalysis::MachineFunctionAnalysis(
-    const TargetMachine &tm, MachineFunctionInitializer *MFInitializer)
-    : FunctionPass(ID), TM(tm), MF(nullptr), MFInitializer(MFInitializer) {
-  initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-}
-
-MachineFunctionAnalysis::~MachineFunctionAnalysis() {
-  releaseMemory();
-  assert(!MF && "MachineFunctionAnalysis left initialized!");
-}
-
-void MachineFunctionAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.setPreservesAll();
-  AU.addRequired<MachineModuleInfo>();
-}
-
-bool MachineFunctionAnalysis::doInitialization(Module &M) {
-  MachineModuleInfo *MMI = getAnalysisIfAvailable<MachineModuleInfo>();
-  assert(MMI && "MMI not around yet??");
-  MMI->setModule(&M);
-  NextFnNum = 0;
-  return false;
-}
-
-
-bool MachineFunctionAnalysis::runOnFunction(Function &F) {
-  assert(!MF && "MachineFunctionAnalysis already initialized!");
-  MF = new MachineFunction(&F, TM, NextFnNum++,
-                           getAnalysis<MachineModuleInfo>());
-  if (MFInitializer)
-    MFInitializer->initializeMachineFunction(*MF);
-  return false;
-}
-
-void MachineFunctionAnalysis::releaseMemory() {
-  delete MF;
-  MF = nullptr;
-}
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
index 228fe170ab46..2265676ff8b1 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPass.cpp
@@ -22,7 +22,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Dominators.h"
@@ -41,7 +41,9 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
   if (F.hasAvailableExternallyLinkage())
     return false;
 
-  MachineFunction &MF = getAnalysis<MachineFunctionAnalysis>().getMF();
+  MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+  MachineFunction &MF = MMI.getMachineFunction(F);
+
   MachineFunctionProperties &MFProps = MF.getProperties();
 
 #ifndef NDEBUG
@@ -49,7 +51,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
     errs() << "MachineFunctionProperties required by " << getPassName()
            << " pass are not met by function " << F.getName() << ".\n"
            << "Required properties: ";
-    RequiredProperties.print(errs(), /*OnlySet=*/true);
+    RequiredProperties.print(errs());
     errs() << "\nCurrent properties: ";
     MFProps.print(errs());
     errs() << "\n";
@@ -60,13 +62,13 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
   bool RV = runOnMachineFunction(MF);
 
   MFProps.set(SetProperties);
-  MFProps.clear(ClearedProperties);
+  MFProps.reset(ClearedProperties);
   return RV;
 }
 
 void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<MachineFunctionAnalysis>();
-  AU.addPreserved<MachineFunctionAnalysis>();
+  AU.addRequired<MachineModuleInfo>();
+  AU.addPreserved<MachineModuleInfo>();
 
   // MachineFunctionPass preserves all LLVM IR passes, but there's no
   // high-level way to express this. Instead, just list a bunch of
diff --git a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 4f424ff292cc..0d533c3f4f23 100644
--- a/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -34,7 +34,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
   MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
       : MachineFunctionPass(ID), OS(os), Banner(banner) {}
 
-  const char *getPassName() const override { return "MachineFunction Printer"; }
+  StringRef getPassName() const override { return "MachineFunction Printer"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
index 3cdf8d2941d3..d2ce001103df 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp
@@ -26,6 +26,7 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
@@ -40,6 +41,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetIntrinsicInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -91,6 +93,8 @@ void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
     // Note that getSubReg() may return 0 if the sub-register doesn't exist.
     // That won't happen in legal code.
     setSubReg(0);
+    if (isDef())
+      setIsUndef(false);
   }
   setReg(Reg);
 }
@@ -171,6 +175,16 @@ void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
   Contents.Sym = Sym;
 }
 
+void MachineOperand::ChangeToFrameIndex(int Idx) {
+  assert((!isReg() || !isTied()) &&
+         "Cannot change a tied operand into a FrameIndex");
+
+  removeRegFromUses();
+
+  OpKind = MO_FrameIndex;
+  setIndex(Idx);
+}
+
 /// ChangeToRegister - Replace this operand with a new register operand of
 /// the specified value.  If an operand is known to be an register already,
 /// the setReg method should be used.
@@ -256,6 +270,10 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
     return getCFIIndex() == Other.getCFIIndex();
   case MachineOperand::MO_Metadata:
     return getMetadata() == Other.getMetadata();
+  case MachineOperand::MO_IntrinsicID:
+    return getIntrinsicID() == Other.getIntrinsicID();
+  case MachineOperand::MO_Predicate:
+    return getPredicate() == Other.getPredicate();
   }
   llvm_unreachable("Invalid machine operand type");
 }
@@ -300,18 +318,23 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getMCSymbol());
   case MachineOperand::MO_CFIIndex:
     return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getCFIIndex());
+  case MachineOperand::MO_IntrinsicID:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
+  case MachineOperand::MO_Predicate:
+    return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
   }
   llvm_unreachable("Invalid machine operand type");
 }
 
-void MachineOperand::print(raw_ostream &OS,
-                           const TargetRegisterInfo *TRI) const {
+void MachineOperand::print(raw_ostream &OS, const TargetRegisterInfo *TRI,
+                           const TargetIntrinsicInfo *IntrinsicInfo) const {
   ModuleSlotTracker DummyMST(nullptr);
-  print(OS, DummyMST, TRI);
+  print(OS, DummyMST, TRI, IntrinsicInfo);
 }
 
 void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                           const TargetRegisterInfo *TRI) const {
+                           const TargetRegisterInfo *TRI,
+                           const TargetIntrinsicInfo *IntrinsicInfo) const {
   switch (getType()) {
   case MachineOperand::MO_Register:
     OS << PrintReg(getReg(), TRI, getSubReg());
@@ -378,7 +401,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
     } else if (getFPImm()->getType()->isHalfTy()) {
       APFloat APF = getFPImm()->getValueAPF();
       bool Unused;
-      APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Unused);
+      APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Unused);
       OS << "half " << APF.convertToFloat();
     } else {
       OS << getFPImm()->getValueAPF().convertToDouble();
@@ -454,12 +477,32 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
   case MachineOperand::MO_CFIIndex:
     OS << "<call frame instruction>";
     break;
+  case MachineOperand::MO_IntrinsicID: {
+    Intrinsic::ID ID = getIntrinsicID();
+    if (ID < Intrinsic::num_intrinsics)
+      OS << "<intrinsic:@" << Intrinsic::getName(ID, None) << '>';
+    else if (IntrinsicInfo)
+      OS << "<intrinsic:@" << IntrinsicInfo->getName(ID) << '>';
+    else
+      OS << "<intrinsic:" << ID << '>';
+    break;
+  }
+  case MachineOperand::MO_Predicate: {
+    auto Pred = static_cast<CmpInst::Predicate>(getPredicate());
+    OS << '<' << (CmpInst::isIntPredicate(Pred) ? "intpred" : "floatpred")
+       << CmpInst::getPredicateName(Pred) << '>';
+  }
   }
-
   if (unsigned TF = getTargetFlags())
     OS << "[TF=" << TF << ']';
 }
 
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void MachineOperand::dump() const {
+  dbgs() << *this << '\n';
+}
+#endif
+
 //===----------------------------------------------------------------------===//
 // MachineMemOperand Implementation
 //===----------------------------------------------------------------------===//
@@ -500,7 +543,10 @@ MachinePointerInfo MachinePointerInfo::getStack(MachineFunction &MF,
 MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
                                      uint64_t s, unsigned int a,
                                      const AAMDNodes &AAInfo,
-                                     const MDNode *Ranges)
+                                     const MDNode *Ranges,
+                                     SynchronizationScope SynchScope,
+                                     AtomicOrdering Ordering,
+                                     AtomicOrdering FailureOrdering)
     : PtrInfo(ptrinfo), Size(s), FlagVals(f), BaseAlignLog2(Log2_32(a) + 1),
       AAInfo(AAInfo), Ranges(Ranges) {
   assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue*>() ||
@@ -508,6 +554,13 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
          "invalid pointer value");
   assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
   assert((isLoad() || isStore()) && "Not a load/store!");
+
+  AtomicInfo.SynchScope = static_cast<unsigned>(SynchScope);
+  assert(getSynchScope() == SynchScope && "Value truncated");
+  AtomicInfo.Ordering = static_cast<unsigned>(Ordering);
+  assert(getOrdering() == Ordering && "Value truncated");
+  AtomicInfo.FailureOrdering = static_cast<unsigned>(FailureOrdering);
+  assert(getFailureOrdering() == FailureOrdering && "Value truncated");
 }
 
 /// Profile - Gather unique data for the object.
@@ -623,10 +676,10 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
     OS << ")";
   }
 
-  // Print nontemporal info.
   if (isNonTemporal())
     OS << "(nontemporal)";
-
+  if (isDereferenceable())
+    OS << "(dereferenceable)";
   if (isInvariant())
     OS << "(invariant)";
 }
@@ -653,12 +706,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
                            DebugLoc dl, bool NoImp)
     : MCID(&tid), Parent(nullptr), Operands(nullptr), NumOperands(0), Flags(0),
       AsmPrinterFlags(0), NumMemRefs(0), MemRefs(nullptr),
-      debugLoc(std::move(dl))
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-      ,
-      Ty(nullptr)
-#endif
-{
+      debugLoc(std::move(dl)) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   // Reserve space for the expected number of operands.
@@ -677,12 +725,7 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid,
 MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
     : MCID(&MI.getDesc()), Parent(nullptr), Operands(nullptr), NumOperands(0),
       Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs),
-      MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc())
-#ifdef LLVM_BUILD_GLOBAL_ISEL
-      ,
-      Ty(nullptr)
-#endif
-{
+      MemRefs(MI.MemRefs), debugLoc(MI.getDebugLoc()) {
   assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor");
 
   CapOperands = OperandCapacity::get(MI.getNumOperands());
@@ -705,25 +748,6 @@ MachineRegisterInfo *MachineInstr::getRegInfo() {
   return nullptr;
 }
 
-// Implement dummy setter and getter for type when
-// global-isel is not built.
-// The proper implementation is WIP and is tracked here:
-// PR26576.
-#ifndef LLVM_BUILD_GLOBAL_ISEL
-void MachineInstr::setType(Type *Ty) {}
-
-Type *MachineInstr::getType() const { return nullptr; }
-
-#else
-void MachineInstr::setType(Type *Ty) {
-  assert((!Ty || isPreISelGenericOpcode(getOpcode())) &&
-         "Non generic instructions are not supposed to be typed");
-  this->Ty = Ty;
-}
-
-Type *MachineInstr::getType() const { return Ty; }
-#endif // LLVM_BUILD_GLOBAL_ISEL
-
 /// RemoveRegOperandsFromUseLists - Unlink all of the register operands in
 /// this instruction from their respective use lists.  This requires that the
 /// operands already be on their use lists.
@@ -976,16 +1000,24 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
     return false;
 
   if (isBundle()) {
-    // Both instructions are bundles, compare MIs inside the bundle.
+    // We have passed the test above that both instructions have the same
+    // opcode, so we know that both instructions are bundles here. Let's compare
+    // MIs inside the bundle.
+    assert(Other.isBundle() && "Expected that both instructions are bundles.");
     MachineBasicBlock::const_instr_iterator I1 = getIterator();
-    MachineBasicBlock::const_instr_iterator E1 = getParent()->instr_end();
     MachineBasicBlock::const_instr_iterator I2 = Other.getIterator();
-    MachineBasicBlock::const_instr_iterator E2 = Other.getParent()->instr_end();
-    while (++I1 != E1 && I1->isInsideBundle()) {
+    // Loop until we analysed the last intruction inside at least one of the
+    // bundles.
+    while (I1->isBundledWithSucc() && I2->isBundledWithSucc()) {
+      ++I1;
       ++I2;
-      if (I2 == E2 || !I2->isInsideBundle() || !I1->isIdenticalTo(*I2, Check))
+      if (!I1->isIdenticalTo(*I2, Check))
         return false;
     }
+    // If we've reached the end of just one of the two bundles, but not both,
+    // the instructions are not identical.
+    if (I1->isBundledWithSucc() || I2->isBundledWithSucc())
+      return false;
   }
 
   // Check operands to make sure they match.
@@ -1287,8 +1319,8 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
 /// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
 /// the specific register or -1 if it is not found. It further tightens
 /// the search criteria to a use that kills the register if isKill is true.
-int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
-                                          const TargetRegisterInfo *TRI) const {
+int MachineInstr::findRegisterUseOperandIdx(
+    unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const {
   for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
     const MachineOperand &MO = getOperand(i);
     if (!MO.isReg() || !MO.isUse())
@@ -1296,11 +1328,9 @@ int MachineInstr::findRegisterUseOperandIdx(unsigned Reg, bool isKill,
     unsigned MOReg = MO.getReg();
     if (!MOReg)
       continue;
-    if (MOReg == Reg ||
-        (TRI &&
-         TargetRegisterInfo::isPhysicalRegister(MOReg) &&
-         TargetRegisterInfo::isPhysicalRegister(Reg) &&
-         TRI->isSubRegister(MOReg, Reg)))
+    if (MOReg == Reg || (TRI && TargetRegisterInfo::isPhysicalRegister(MOReg) &&
+                         TargetRegisterInfo::isPhysicalRegister(Reg) &&
+                         TRI->isSubRegister(MOReg, Reg)))
       if (!isKill || MO.isKill())
         return i;
   }
@@ -1533,7 +1563,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
   // destination. The check for isInvariantLoad gives the targe the chance to
   // classify the load as always returning a constant, e.g. a constant pool
   // load.
-  if (mayLoad() && !isInvariantLoad(AA))
+  if (mayLoad() && !isDereferenceableInvariantLoad(AA))
     // Otherwise, this is a real load.  If there is a store between the load and
     // end of block, we can't move it.
     return !SawStore;
@@ -1564,12 +1594,10 @@ bool MachineInstr::hasOrderedMemoryRef() const {
   });
 }
 
-/// isInvariantLoad - Return true if this instruction is loading from a
-/// location whose value is invariant across the function.  For example,
-/// loading a value from the constant pool or from the argument area
-/// of a function if it does not change.  This should only return true of
-/// *all* loads the instruction does are invariant (if it does multiple loads).
-bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
+/// isDereferenceableInvariantLoad - Return true if this instruction will never
+/// trap and is loading from a location whose value is invariant across a run of
+/// this function.
+bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
   // If the instruction doesn't load at all, it isn't an invariant load.
   if (!mayLoad())
     return false;
@@ -1579,16 +1607,17 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
   if (memoperands_empty())
     return false;
 
-  const MachineFrameInfo *MFI = getParent()->getParent()->getFrameInfo();
+  const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
 
   for (MachineMemOperand *MMO : memoperands()) {
     if (MMO->isVolatile()) return false;
     if (MMO->isStore()) return false;
-    if (MMO->isInvariant()) continue;
+    if (MMO->isInvariant() && MMO->isDereferenceable())
+      continue;
 
     // A load from a constant PseudoSourceValue is invariant.
     if (const PseudoSourceValue *PSV = MMO->getPseudoValue())
-      if (PSV->isConstant(MFI))
+      if (PSV->isConstant(&MFI))
         continue;
 
     if (const Value *V = MMO->getValue()) {
@@ -1663,35 +1692,40 @@ void MachineInstr::copyImplicitOps(MachineFunction &MF,
   }
 }
 
-LLVM_DUMP_METHOD void MachineInstr::dump() const {
+LLVM_DUMP_METHOD void MachineInstr::dump(const TargetInstrInfo *TII) const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
-  dbgs() << "  " << *this;
+  dbgs() << "  ";
+  print(dbgs(), false /* SkipOpers */, TII);
 #endif
 }
 
-void MachineInstr::print(raw_ostream &OS, bool SkipOpers) const {
+void MachineInstr::print(raw_ostream &OS, bool SkipOpers,
+                         const TargetInstrInfo *TII) const {
   const Module *M = nullptr;
   if (const MachineBasicBlock *MBB = getParent())
     if (const MachineFunction *MF = MBB->getParent())
       M = MF->getFunction()->getParent();
 
   ModuleSlotTracker MST(M);
-  print(OS, MST, SkipOpers);
+  print(OS, MST, SkipOpers, TII);
 }
 
 void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
-                         bool SkipOpers) const {
+                         bool SkipOpers, const TargetInstrInfo *TII) const {
   // We can be a bit tidier if we know the MachineFunction.
   const MachineFunction *MF = nullptr;
   const TargetRegisterInfo *TRI = nullptr;
   const MachineRegisterInfo *MRI = nullptr;
-  const TargetInstrInfo *TII = nullptr;
+  const TargetIntrinsicInfo *IntrinsicInfo = nullptr;
+
   if (const MachineBasicBlock *MBB = getParent()) {
     MF = MBB->getParent();
     if (MF) {
       MRI = &MF->getRegInfo();
       TRI = MF->getSubtarget().getRegisterInfo();
-      TII = MF->getSubtarget().getInstrInfo();
+      if (!TII)
+        TII = MF->getSubtarget().getInstrInfo();
+      IntrinsicInfo = MF->getTarget().getIntrinsicInfo();
     }
   }
 
@@ -1705,13 +1739,13 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
          !getOperand(StartOp).isImplicit();
        ++StartOp) {
     if (StartOp != 0) OS << ", ";
-    getOperand(StartOp).print(OS, MST, TRI);
+    getOperand(StartOp).print(OS, MST, TRI, IntrinsicInfo);
     unsigned Reg = getOperand(StartOp).getReg();
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
       VirtRegs.push_back(Reg);
-      unsigned Size;
-      if (MRI && (Size = MRI->getSize(Reg)))
-        OS << '(' << Size << ')';
+      LLT Ty = MRI ? MRI->getType(Reg) : LLT{};
+      if (Ty.isValid())
+        OS << '(' << Ty << ')';
     }
   }
 
@@ -1724,12 +1758,6 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
   else
     OS << "UNKNOWN";
 
-  if (getType()) {
-    OS << ' ';
-    getType()->print(OS, /*IsForDebug*/ false, /*NoDetails*/ true);
-    OS << ' ';
-  }
-
   if (SkipOpers)
     return;
 
@@ -2145,8 +2173,8 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
     unsigned Reg = MO.getReg();
     if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
     // If there are no uses, including partial uses, the def is dead.
-    if (std::none_of(UsedRegs.begin(), UsedRegs.end(),
-                     [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+    if (none_of(UsedRegs,
+                [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
       MO.setIsDead();
   }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
index e4686b3c5c4e..b5621a09c6ff 100644
--- a/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineInstrBundle.cpp
@@ -24,7 +24,8 @@ namespace {
   class UnpackMachineBundles : public MachineFunctionPass {
   public:
     static char ID; // Pass identification
-    UnpackMachineBundles(std::function<bool(const Function &)> Ftor = nullptr)
+    UnpackMachineBundles(
+        std::function<bool(const MachineFunction &)> Ftor = nullptr)
         : MachineFunctionPass(ID), PredicateFtor(std::move(Ftor)) {
       initializeUnpackMachineBundlesPass(*PassRegistry::getPassRegistry());
     }
@@ -32,7 +33,7 @@ namespace {
     bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
-    std::function<bool(const Function &)> PredicateFtor;
+    std::function<bool(const MachineFunction &)> PredicateFtor;
   };
 } // end anonymous namespace
 
@@ -42,7 +43,7 @@ INITIALIZE_PASS(UnpackMachineBundles, "unpack-mi-bundles",
                 "Unpack machine instruction bundles", false, false)
 
 bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
-  if (PredicateFtor && !PredicateFtor(*MF.getFunction()))
+  if (PredicateFtor && !PredicateFtor(MF))
     return false;
 
   bool Changed = false;
@@ -78,7 +79,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) {
 }
 
 FunctionPass *
-llvm::createUnpackMachineBundles(std::function<bool(const Function &)> Ftor) {
+llvm::createUnpackMachineBundles(
+    std::function<bool(const MachineFunction &)> Ftor) {
   return new UnpackMachineBundles(std::move(Ftor));
 }
 
diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
index 119751b17f56..b3d18435985e 100644
--- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp
@@ -92,8 +92,7 @@ namespace {
     SmallVector<MachineBasicBlock*, 8> ExitBlocks;
 
     bool isExitBlock(const MachineBasicBlock *MBB) const {
-      return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
-        ExitBlocks.end();
+      return is_contained(ExitBlocks, MBB);
     }
 
     // Track 'estimated' register pressure.
@@ -268,7 +267,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
   TII = ST.getInstrInfo();
   TLI = ST.getTargetLowering();
   TRI = ST.getRegisterInfo();
-  MFI = MF.getFrameInfo();
+  MFI = &MF.getFrameInfo();
   MRI = &MF.getRegInfo();
   SchedModel.init(ST.getSchedModel(), &ST, TII);
 
@@ -896,7 +895,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
+        if (!MRI->isConstantPhysReg(Reg))
           return false;
         // Otherwise it's safe to move.
         continue;
@@ -1139,7 +1138,8 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
 
   // High register pressure situation, only hoist if the instruction is going
   // to be remat'ed.
-  if (!TII->isTriviallyReMaterializable(MI, AA) && !MI.isInvariantLoad(AA)) {
+  if (!TII->isTriviallyReMaterializable(MI, AA) &&
+      !MI.isDereferenceableInvariantLoad(AA)) {
     DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
     return false;
   }
@@ -1158,7 +1158,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   // If not, we may be able to unfold a load and hoist that.
   // First test whether the instruction is loading from an amenable
   // memory location.
-  if (!MI->isInvariantLoad(AA))
+  if (!MI->isDereferenceableInvariantLoad(AA))
     return nullptr;
 
   // Next determine the register class for a temporary register.
@@ -1336,6 +1336,11 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
     // Otherwise, splice the instruction to the preheader.
     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
 
+    // Since we are moving the instruction out of its basic block, we do not
+    // retain its debug location. Doing so would degrade the debugging 
+    // experience and adversely affect the accuracy of profiling information.
+    MI->setDebugLoc(DebugLoc());
+
     // Update register pressure for BBs from header to this block.
     UpdateBackTraceRegPressure(MI);
 
diff --git a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 376f78fda1c4..fdeaf7b71161 100644
--- a/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -77,6 +77,51 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
   return BotMBB;
 }
 
+MachineBasicBlock *MachineLoop::findLoopControlBlock() {
+  if (MachineBasicBlock *Latch = getLoopLatch()) {
+    if (isLoopExiting(Latch))
+      return Latch;
+    else
+      return getExitingBlock();
+  }
+  return nullptr;
+}
+
+MachineBasicBlock *
+MachineLoopInfo::findLoopPreheader(MachineLoop *L,
+                                   bool SpeculativePreheader) const {
+  if (MachineBasicBlock *PB = L->getLoopPreheader())
+    return PB;
+
+  if (!SpeculativePreheader)
+    return nullptr;
+
+  MachineBasicBlock *HB = L->getHeader(), *LB = L->getLoopLatch();
+  if (HB->pred_size() != 2 || HB->hasAddressTaken())
+    return nullptr;
+  // Find the predecessor of the header that is not the latch block.
+  MachineBasicBlock *Preheader = nullptr;
+  for (MachineBasicBlock *P : HB->predecessors()) {
+    if (P == LB)
+      continue;
+    // Sanity.
+    if (Preheader)
+      return nullptr;
+    Preheader = P;
+  }
+
+  // Check if the preheader candidate is a successor of any other loop
+  // headers. We want to avoid having two loop setups in the same block.
+  for (MachineBasicBlock *S : Preheader->successors()) {
+    if (S == HB)
+      continue;
+    MachineLoop *T = getLoopFor(S);
+    if (T && T->getHeader() == S)
+      return nullptr;
+  }
+  return Preheader;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MachineLoop::dump() const {
   print(dbgs());
diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
index 244e3fbc4e8f..6618857477ed 100644
--- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp
@@ -9,26 +9,31 @@
 
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionInitializer.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
 using namespace llvm;
 using namespace llvm::dwarf;
 
 // Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
-                "Machine Module Information", false, false)
+INITIALIZE_TM_PASS(MachineModuleInfo, "machinemoduleinfo",
+                   "Machine Module Information", false, false)
 char MachineModuleInfo::ID = 0;
 
 // Out of line virtual method.
@@ -54,7 +59,7 @@ public:
 class MMIAddrLabelMap {
   MCContext &Context;
   struct AddrLabelSymEntry {
-    /// Symbols - The symbols for the label.
+    /// The symbols for the label.
     TinyPtrVector<MCSymbol *> Symbols;
 
     Function *Fn;   // The containing function of the BasicBlock.
@@ -63,14 +68,13 @@ class MMIAddrLabelMap {
 
   DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
 
-  /// BBCallbacks - Callbacks for the BasicBlock's that we have entries for.  We
-  /// use this so we get notified if a block is deleted or RAUWd.
+  /// Callbacks for the BasicBlock's that we have entries for.  We use this so
+  /// we get notified if a block is deleted or RAUWd.
   std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
 
-  /// DeletedAddrLabelsNeedingEmission - This is a per-function list of symbols
-  /// whose corresponding BasicBlock got deleted.  These symbols need to be
-  /// emitted at some point in the file, so AsmPrinter emits them after the
-  /// function body.
+  /// This is a per-function list of symbols whose corresponding BasicBlock got
+  /// deleted.  These symbols need to be emitted at some point in the file, so
+  /// AsmPrinter emits them after the function body.
   DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
     DeletedAddrLabelsNeedingEmission;
 public:
@@ -112,8 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
   return Entry.Symbols;
 }
 
-/// takeDeletedSymbolsForFunction - If we have any deleted symbols for F, return
-/// them.
+/// If we have any deleted symbols for F, return them.
 void MMIAddrLabelMap::
 takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
   DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >::iterator I =
@@ -186,20 +189,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
 
 //===----------------------------------------------------------------------===//
 
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
-                                     const MCRegisterInfo &MRI,
-                                     const MCObjectFileInfo *MOFI)
-  : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, nullptr, false) {
+MachineModuleInfo::MachineModuleInfo(const TargetMachine *TM)
+  : ImmutablePass(ID), TM(*TM),
+    Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+            TM->getObjFileLowering(), nullptr, false) {
   initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
 }
 
-MachineModuleInfo::MachineModuleInfo()
-  : ImmutablePass(ID), Context(nullptr, nullptr, nullptr) {
-  llvm_unreachable("This MachineModuleInfo constructor should never be called, "
-                   "MMI should always be explicitly constructed by "
-                   "LLVMTargetMachine");
-}
-
 MachineModuleInfo::~MachineModuleInfo() {
 }
 
@@ -207,13 +203,9 @@ bool MachineModuleInfo::doInitialization(Module &M) {
 
   ObjFileMMI = nullptr;
   CurCallSite = 0;
-  CallsEHReturn = false;
-  CallsUnwindInit = false;
-  HasEHFunclets = false;
   DbgInfoAvailable = UsesVAFloatArgument = UsesMorestackAddr = false;
-  PersonalityTypeCache = EHPersonality::Unknown;
   AddrLabelSymbols = nullptr;
-  TheModule = nullptr;
+  TheModule = &M;
 
   return false;
 }
@@ -233,30 +225,8 @@ bool MachineModuleInfo::doFinalization(Module &M) {
   return false;
 }
 
-/// EndFunction - Discard function meta information.
-///
-void MachineModuleInfo::EndFunction() {
-  // Clean up frame info.
-  FrameInstructions.clear();
-
-  // Clean up exception info.
-  LandingPads.clear();
-  PersonalityTypeCache = EHPersonality::Unknown;
-  CallSiteMap.clear();
-  TypeInfos.clear();
-  FilterIds.clear();
-  FilterEnds.clear();
-  CallsEHReturn = false;
-  CallsUnwindInit = false;
-  HasEHFunclets = false;
-  VariableDbgInfos.clear();
-}
-
 //===- Address of Block Management ----------------------------------------===//
 
-/// getAddrLabelSymbolToEmit - Return the symbol to be used for the specified
-/// basic block when its address is taken.  If other blocks were RAUW'd to
-/// this one, we may have to emit them as well, return the whole set.
 ArrayRef<MCSymbol *>
 MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
   // Lazily create AddrLabelSymbols.
@@ -265,11 +235,6 @@ MachineModuleInfo::getAddrLabelSymbolToEmit(const BasicBlock *BB) {
  return AddrLabelSymbols->getAddrLabelSymbolToEmit(const_cast<BasicBlock*>(BB));
 }
 
-
-/// takeDeletedSymbolsForFunction - If the specified function has had any
-/// references to address-taken blocks generated, but the block got deleted,
-/// return the symbol now so we can emit it.  This prevents emitting a
-/// reference to a symbol that has no definition.
 void MachineModuleInfo::
 takeDeletedSymbolsForFunction(const Function *F,
                               std::vector<MCSymbol*> &Result) {
@@ -279,40 +244,8 @@ takeDeletedSymbolsForFunction(const Function *F,
      takeDeletedSymbolsForFunction(const_cast<Function*>(F), Result);
 }
 
-//===- EH -----------------------------------------------------------------===//
-
-/// getOrCreateLandingPadInfo - Find or create an LandingPadInfo for the
-/// specified MachineBasicBlock.
-LandingPadInfo &MachineModuleInfo::getOrCreateLandingPadInfo
-    (MachineBasicBlock *LandingPad) {
-  unsigned N = LandingPads.size();
-  for (unsigned i = 0; i < N; ++i) {
-    LandingPadInfo &LP = LandingPads[i];
-    if (LP.LandingPadBlock == LandingPad)
-      return LP;
-  }
-
-  LandingPads.push_back(LandingPadInfo(LandingPad));
-  return LandingPads[N];
-}
-
-/// addInvoke - Provide the begin and end labels of an invoke style call and
-/// associate it with a try landing pad block.
-void MachineModuleInfo::addInvoke(MachineBasicBlock *LandingPad,
-                                  MCSymbol *BeginLabel, MCSymbol *EndLabel) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.BeginLabels.push_back(BeginLabel);
-  LP.EndLabels.push_back(EndLabel);
-}
-
-/// addLandingPad - Provide the label of a try LandingPad block.
-///
-MCSymbol *MachineModuleInfo::addLandingPad(MachineBasicBlock *LandingPad) {
-  MCSymbol *LandingPadLabel = Context.createTempSymbol();
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.LandingPadLabel = LandingPadLabel;
-  return LandingPadLabel;
-}
+/// \name Exception Handling
+/// \{
 
 void MachineModuleInfo::addPersonality(const Function *Personality) {
   for (unsigned i = 0; i < Personalities.size(); ++i)
@@ -321,143 +254,83 @@ void MachineModuleInfo::addPersonality(const Function *Personality) {
   Personalities.push_back(Personality);
 }
 
-/// addCatchTypeInfo - Provide the catch typeinfo for a landing pad.
-///
-void MachineModuleInfo::
-addCatchTypeInfo(MachineBasicBlock *LandingPad,
-                 ArrayRef<const GlobalValue *> TyInfo) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  for (unsigned N = TyInfo.size(); N; --N)
-    LP.TypeIds.push_back(getTypeIDFor(TyInfo[N - 1]));
-}
-
-/// addFilterTypeInfo - Provide the filter typeinfo for a landing pad.
-///
-void MachineModuleInfo::
-addFilterTypeInfo(MachineBasicBlock *LandingPad,
-                  ArrayRef<const GlobalValue *> TyInfo) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  std::vector<unsigned> IdsInFilter(TyInfo.size());
-  for (unsigned I = 0, E = TyInfo.size(); I != E; ++I)
-    IdsInFilter[I] = getTypeIDFor(TyInfo[I]);
-  LP.TypeIds.push_back(getFilterIDFor(IdsInFilter));
-}
-
-/// addCleanup - Add a cleanup action for a landing pad.
-///
-void MachineModuleInfo::addCleanup(MachineBasicBlock *LandingPad) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  LP.TypeIds.push_back(0);
-}
+/// \}
+
+MachineFunction &MachineModuleInfo::getMachineFunction(const Function &F) {
+  // Shortcut for the common case where a sequence of MachineFunctionPasses
+  // all query for the same Function.
+  if (LastRequest == &F)
+    return *LastResult;
+
+  auto I = MachineFunctions.insert(
+      std::make_pair(&F, std::unique_ptr<MachineFunction>()));
+  MachineFunction *MF;
+  if (I.second) {
+    // No pre-existing machine function, create a new one.
+    MF = new MachineFunction(&F, TM, NextFnNum++, *this);
+    // Update the set entry.
+    I.first->second.reset(MF);
+
+    if (MFInitializer)
+      if (MFInitializer->initializeMachineFunction(*MF))
+        report_fatal_error("Unable to initialize machine function");
+  } else {
+    MF = I.first->second.get();
+  }
 
-void MachineModuleInfo::addSEHCatchHandler(MachineBasicBlock *LandingPad,
-                                           const Function *Filter,
-                                           const BlockAddress *RecoverBA) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  SEHHandler Handler;
-  Handler.FilterOrFinally = Filter;
-  Handler.RecoverBA = RecoverBA;
-  LP.SEHHandlers.push_back(Handler);
+  LastRequest = &F;
+  LastResult = MF;
+  return *MF;
 }
 
-void MachineModuleInfo::addSEHCleanupHandler(MachineBasicBlock *LandingPad,
-                                             const Function *Cleanup) {
-  LandingPadInfo &LP = getOrCreateLandingPadInfo(LandingPad);
-  SEHHandler Handler;
-  Handler.FilterOrFinally = Cleanup;
-  Handler.RecoverBA = nullptr;
-  LP.SEHHandlers.push_back(Handler);
+void MachineModuleInfo::deleteMachineFunctionFor(Function &F) {
+  MachineFunctions.erase(&F);
+  LastRequest = nullptr;
+  LastResult = nullptr;
 }
 
-/// TidyLandingPads - Remap landing pad labels and remove any deleted landing
-/// pads.
-void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
-  for (unsigned i = 0; i != LandingPads.size(); ) {
-    LandingPadInfo &LandingPad = LandingPads[i];
-    if (LandingPad.LandingPadLabel &&
-        !LandingPad.LandingPadLabel->isDefined() &&
-        (!LPMap || (*LPMap)[LandingPad.LandingPadLabel] == 0))
-      LandingPad.LandingPadLabel = nullptr;
-
-    // Special case: we *should* emit LPs with null LP MBB. This indicates
-    // "nounwind" case.
-    if (!LandingPad.LandingPadLabel && LandingPad.LandingPadBlock) {
-      LandingPads.erase(LandingPads.begin() + i);
-      continue;
-    }
-
-    for (unsigned j = 0, e = LandingPads[i].BeginLabels.size(); j != e; ++j) {
-      MCSymbol *BeginLabel = LandingPad.BeginLabels[j];
-      MCSymbol *EndLabel = LandingPad.EndLabels[j];
-      if ((BeginLabel->isDefined() ||
-           (LPMap && (*LPMap)[BeginLabel] != 0)) &&
-          (EndLabel->isDefined() ||
-           (LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
-      LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
-      LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
-      --j;
-      --e;
-    }
-
-    // Remove landing pads with no try-ranges.
-    if (LandingPads[i].BeginLabels.empty()) {
-      LandingPads.erase(LandingPads.begin() + i);
-      continue;
-    }
+namespace {
+/// This pass frees the MachineFunction object associated with a Function.
+class FreeMachineFunction : public FunctionPass {
+public:
+  static char ID;
+  FreeMachineFunction() : FunctionPass(ID) {}
 
-    // If there is no landing pad, ensure that the list of typeids is empty.
-    // If the only typeid is a cleanup, this is the same as having no typeids.
-    if (!LandingPad.LandingPadBlock ||
-        (LandingPad.TypeIds.size() == 1 && !LandingPad.TypeIds[0]))
-      LandingPad.TypeIds.clear();
-    ++i;
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineModuleInfo>();
+    AU.addPreserved<MachineModuleInfo>();
   }
-}
 
-/// setCallSiteLandingPad - Map the landing pad's EH symbol to the call site
-/// indexes.
-void MachineModuleInfo::setCallSiteLandingPad(MCSymbol *Sym,
-                                              ArrayRef<unsigned> Sites) {
-  LPadToCallSiteMap[Sym].append(Sites.begin(), Sites.end());
-}
-
-/// getTypeIDFor - Return the type id for the specified typeinfo.  This is
-/// function wide.
-unsigned MachineModuleInfo::getTypeIDFor(const GlobalValue *TI) {
-  for (unsigned i = 0, N = TypeInfos.size(); i != N; ++i)
-    if (TypeInfos[i] == TI) return i + 1;
+  bool runOnFunction(Function &F) override {
+    MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+    MMI.deleteMachineFunctionFor(F);
+    return true;
+  }
+};
+char FreeMachineFunction::ID;
+} // end anonymous namespace
 
-  TypeInfos.push_back(TI);
-  return TypeInfos.size();
+namespace llvm {
+FunctionPass *createFreeMachineFunctionPass() {
+  return new FreeMachineFunction();
 }
-
-/// getFilterIDFor - Return the filter id for the specified typeinfos.  This is
-/// function wide.
-int MachineModuleInfo::getFilterIDFor(std::vector<unsigned> &TyIds) {
-  // If the new filter coincides with the tail of an existing filter, then
-  // re-use the existing filter.  Folding filters more than this requires
-  // re-ordering filters and/or their elements - probably not worth it.
-  for (std::vector<unsigned>::iterator I = FilterEnds.begin(),
-       E = FilterEnds.end(); I != E; ++I) {
-    unsigned i = *I, j = TyIds.size();
-
-    while (i && j)
-      if (FilterIds[--i] != TyIds[--j])
-        goto try_next;
-
-    if (!j)
-      // The new filter coincides with range [i, end) of the existing filter.
-      return -(1 + i);
-
-try_next:;
+} // end namespace llvm
+
+//===- MMI building helpers -----------------------------------------------===//
+
+void llvm::computeUsesVAFloatArgument(const CallInst &I,
+                                      MachineModuleInfo &MMI) {
+  FunctionType *FT =
+      cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+  if (FT->isVarArg() && !MMI.usesVAFloatArgument()) {
+    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+      Type *T = I.getArgOperand(i)->getType();
+      for (auto i : post_order(T)) {
+        if (i->isFloatingPointTy()) {
+          MMI.setUsesVAFloatArgument(true);
+          return;
+        }
+      }
+    }
   }
-
-  // Add the new filter.
-  int FilterID = -(1 + FilterIds.size());
-  FilterIds.reserve(FilterIds.size() + TyIds.size() + 1);
-  FilterIds.insert(FilterIds.end(), TyIds.begin(), TyIds.end());
-  FilterEnds.push_back(FilterIds.size());
-  FilterIds.push_back(0); // terminator
-  return FilterID;
 }
diff --git a/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
new file mode 100644
index 000000000000..43a18099d39a
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -0,0 +1,3984 @@
+//===-- MachinePipeliner.cpp - Machine Software Pipeliner Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Swing Modulo Scheduling (SMS) software pipeliner.
+//
+// Software pipelining (SWP) is an instruction scheduling technique for loops
+// that overlap loop iterations and explioits ILP via a compiler transformation.
+//
+// Swing Modulo Scheduling is an implementation of software pipelining
+// that generates schedules that are near optimal in terms of initiation
+// interval, register requirements, and stage count. See the papers:
+//
+// "Swing Modulo Scheduling: A Lifetime-Sensitive Approach", by J. Llosa,
+// A. Gonzalez, E. Ayguade, and M. Valero. In PACT '96 Processings of the 1996
+// Conference on Parallel Architectures and Compilation Techiniques.
+//
+// "Lifetime-Sensitive Modulo Scheduling in a Production Environment", by J.
+// Llosa, E. Ayguade, A. Gonzalez, M. Valero, and J. Eckhardt. In IEEE
+// Transactions on Computers, Vol. 50, No. 3, 2001.
+//
+// "An Implementation of Swing Modulo Scheduling With Extensions for
+// Superblocks", by T. Lattner, Master's Thesis, University of Illinois at
+// Urbana-Chambpain, 2005.
+//
+//
+// The SMS algorithm consists of three main steps after computing the minimal
+// initiation interval (MII).
+// 1) Analyze the dependence graph and compute information about each
+//    instruction in the graph.
+// 2) Order the nodes (instructions) by priority based upon the heuristics
+//    described in the algorithm.
+// 3) Attempt to schedule the nodes in the specified order using the MII.
+//
+// This SMS implementation is a target-independent back-end pass. When enabled,
+// the pass runs just prior to the register allocation pass, while the machine
+// IR is in SSA form. If software pipelining is successful, then the original
+// loop is replaced by the optimized loop. The optimized loop contains one or
+// more prolog blocks, the pipelined kernel, and one or more epilog blocks. If
+// the instructions cannot be scheduled in a given MII, we increase the MII by
+// one and try again.
+//
+// The SMS implementation is an extension of the ScheduleDAGInstrs class. We
+// represent loop carried dependences in the DAG as order edges to the Phi
+// nodes. We also perform several passes over the DAG to eliminate unnecessary
+// edges that inhibit the ability to pipeline. The implementation uses the
+// DFAPacketizer class to compute the minimum initiation interval and the check
+// where an instruction may be inserted in the pipelined schedule.
+//
+// In order for the SMS pass to work, several target specific hooks need to be
+// implemented to get information about the loop structure and to rewrite
+// instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
+#include "llvm/CodeGen/ScheduleDAGMutation.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstdint>
+#include <deque>
+#include <functional>
+#include <iterator>
+#include <map>
+#include <tuple>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pipeliner"
+
+STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
+STATISTIC(NumPipelined, "Number of loops software pipelined");
+
+/// A command line option to turn software pipelining on or off.
+static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
+                               cl::ZeroOrMore,
+                               cl::desc("Enable Software Pipelining"));
+
+/// A command line option to enable SWP at -Os.
+static cl::opt<bool> EnableSWPOptSize("enable-pipeliner-opt-size",
+                                      cl::desc("Enable SWP at Os."), cl::Hidden,
+                                      cl::init(false));
+
+/// A command line argument to limit minimum initial interval for pipelining.
+static cl::opt<int> SwpMaxMii("pipeliner-max-mii",
+                              cl::desc("Size limit for the the MII."),
+                              cl::Hidden, cl::init(27));
+
+/// A command line argument to limit the number of stages in the pipeline.
+static cl::opt<int>
+    SwpMaxStages("pipeliner-max-stages",
+                 cl::desc("Maximum stages allowed in the generated scheduled."),
+                 cl::Hidden, cl::init(3));
+
+/// A command line option to disable the pruning of chain dependences due to
+/// an unrelated Phi.
+static cl::opt<bool>
+    SwpPruneDeps("pipeliner-prune-deps",
+                 cl::desc("Prune dependences between unrelated Phi nodes."),
+                 cl::Hidden, cl::init(true));
+
+/// A command line option to disable the pruning of loop carried order
+/// dependences.
+static cl::opt<bool>
+    SwpPruneLoopCarried("pipeliner-prune-loop-carried",
+                        cl::desc("Prune loop carried order dependences."),
+                        cl::Hidden, cl::init(true));
+
+#ifndef NDEBUG
+static cl::opt<int> SwpLoopLimit("pipeliner-max", cl::Hidden, cl::init(-1));
+#endif
+
+static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
+                                     cl::ReallyHidden, cl::init(false),
+                                     cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+
+namespace {
+
+class NodeSet;
+class SMSchedule;
+class SwingSchedulerDAG;
+
+/// The main class in the implementation of the target independent
+/// software pipeliner pass.
+class MachinePipeliner : public MachineFunctionPass {
+public:
+  MachineFunction *MF = nullptr;
+  const MachineLoopInfo *MLI = nullptr;
+  const MachineDominatorTree *MDT = nullptr;
+  const InstrItineraryData *InstrItins;
+  const TargetInstrInfo *TII = nullptr;
+  RegisterClassInfo RegClassInfo;
+
+#ifndef NDEBUG
+  static int NumTries;
+#endif
+  /// Cache the target analysis information about the loop.
+  struct LoopInfo {
+    MachineBasicBlock *TBB = nullptr;
+    MachineBasicBlock *FBB = nullptr;
+    SmallVector<MachineOperand, 4> BrCond;
+    MachineInstr *LoopInductionVar = nullptr;
+    MachineInstr *LoopCompare = nullptr;
+  };
+  LoopInfo LI;
+
+  static char ID;
+  MachinePipeliner() : MachineFunctionPass(ID) {
+    initializeMachinePipelinerPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addRequired<MachineLoopInfo>();
+    AU.addRequired<MachineDominatorTree>();
+    AU.addRequired<LiveIntervals>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+private:
+  bool canPipelineLoop(MachineLoop &L);
+  bool scheduleLoop(MachineLoop &L);
+  bool swingModuloScheduler(MachineLoop &L);
+};
+
+/// This class builds the dependence graph for the instructions in a loop,
+/// and attempts to schedule the instructions using the SMS algorithm.
+class SwingSchedulerDAG : public ScheduleDAGInstrs {
+  MachinePipeliner &Pass;
+  /// The minimum initiation interval between iterations for this schedule.
+  unsigned MII;
+  /// Set to true if a valid pipelined schedule is found for the loop.
+  bool Scheduled;
+  MachineLoop &Loop;
+  LiveIntervals &LIS;
+  const RegisterClassInfo &RegClassInfo;
+
+  /// A toplogical ordering of the SUnits, which is needed for changing
+  /// dependences and iterating over the SUnits.
+  ScheduleDAGTopologicalSort Topo;
+
+  struct NodeInfo {
+    int ASAP;
+    int ALAP;
+    NodeInfo() : ASAP(0), ALAP(0) {}
+  };
+  /// Computed properties for each node in the graph.
+  std::vector<NodeInfo> ScheduleInfo;
+
+  enum OrderKind { BottomUp = 0, TopDown = 1 };
+  /// Computed node ordering for scheduling.
+  SetVector<SUnit *> NodeOrder;
+
+  typedef SmallVector<NodeSet, 8> NodeSetType;
+  typedef DenseMap<unsigned, unsigned> ValueMapTy;
+  typedef SmallVectorImpl<MachineBasicBlock *> MBBVectorTy;
+  typedef DenseMap<MachineInstr *, MachineInstr *> InstrMapTy;
+
+  /// Instructions to change when emitting the final schedule.
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>> InstrChanges;
+
+  /// We may create a new instruction, so remember it because it
+  /// must be deleted when the pass is finished.
+  SmallPtrSet<MachineInstr *, 4> NewMIs;
+
+  /// Ordered list of DAG postprocessing steps.
+  std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
+
+  /// Helper class to implement Johnson's circuit finding algorithm.
+  class Circuits {
+    std::vector<SUnit> &SUnits;
+    SetVector<SUnit *> Stack;
+    BitVector Blocked;
+    SmallVector<SmallPtrSet<SUnit *, 4>, 10> B;
+    SmallVector<SmallVector<int, 4>, 16> AdjK;
+    unsigned NumPaths;
+    static unsigned MaxPaths;
+
+  public:
+    Circuits(std::vector<SUnit> &SUs)
+        : SUnits(SUs), Stack(), Blocked(SUs.size()), B(SUs.size()),
+          AdjK(SUs.size()) {}
+    /// Reset the data structures used in the circuit algorithm.
+    void reset() {
+      Stack.clear();
+      Blocked.reset();
+      B.assign(SUnits.size(), SmallPtrSet<SUnit *, 4>());
+      NumPaths = 0;
+    }
+    void createAdjacencyStructure(SwingSchedulerDAG *DAG);
+    bool circuit(int V, int S, NodeSetType &NodeSets, bool HasBackedge = false);
+    void unblock(int U);
+  };
+
+public:
+  SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
+                    const RegisterClassInfo &rci)
+      : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), MII(0),
+        Scheduled(false), Loop(L), LIS(lis), RegClassInfo(rci),
+        Topo(SUnits, &ExitSU) {
+    P.MF->getSubtarget().getSMSMutations(Mutations);
+  }
+
+  void schedule() override;
+  void finishBlock() override;
+
+  /// Return true if the loop kernel has been scheduled.
+  bool hasNewSchedule() { return Scheduled; }
+
+  /// Return the earliest time an instruction may be scheduled.
+  int getASAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ASAP; }
+
+  /// Return the latest time an instruction my be scheduled.
+  int getALAP(SUnit *Node) { return ScheduleInfo[Node->NodeNum].ALAP; }
+
+  /// The mobility function, which the the number of slots in which
+  /// an instruction may be scheduled.
+  int getMOV(SUnit *Node) { return getALAP(Node) - getASAP(Node); }
+
+  /// The depth, in the dependence graph, for a node.
+  int getDepth(SUnit *Node) { return Node->getDepth(); }
+
+  /// The height, in the dependence graph, for a node.
+  int getHeight(SUnit *Node) { return Node->getHeight(); }
+
+  /// Return true if the dependence is a back-edge in the data dependence graph.
+  /// Since the DAG doesn't contain cycles, we represent a cycle in the graph
+  /// using an anti dependence from a Phi to an instruction.
+  bool isBackedge(SUnit *Source, const SDep &Dep) {
+    if (Dep.getKind() != SDep::Anti)
+      return false;
+    return Source->getInstr()->isPHI() || Dep.getSUnit()->getInstr()->isPHI();
+  }
+
+  /// Return true if the dependence is an order dependence between non-Phis.
+  static bool isOrder(SUnit *Source, const SDep &Dep) {
+    if (Dep.getKind() != SDep::Order)
+      return false;
+    return (!Source->getInstr()->isPHI() &&
+            !Dep.getSUnit()->getInstr()->isPHI());
+  }
+
+  bool isLoopCarriedOrder(SUnit *Source, const SDep &Dep, bool isSucc = true);
+
+  /// The latency of the dependence.
+  unsigned getLatency(SUnit *Source, const SDep &Dep) {
+    // Anti dependences represent recurrences, so use the latency of the
+    // instruction on the back-edge.
+    if (Dep.getKind() == SDep::Anti) {
+      if (Source->getInstr()->isPHI())
+        return Dep.getSUnit()->Latency;
+      if (Dep.getSUnit()->getInstr()->isPHI())
+        return Source->Latency;
+      return Dep.getLatency();
+    }
+    return Dep.getLatency();
+  }
+
+  /// The distance function, which indicates that operation V of iteration I
+  /// depends on operations U of iteration I-distance.
+  unsigned getDistance(SUnit *U, SUnit *V, const SDep &Dep) {
+    // Instructions that feed a Phi have a distance of 1. Computing larger
+    // values for arrays requires data dependence information.
+    if (V->getInstr()->isPHI() && Dep.getKind() == SDep::Anti)
+      return 1;
+    return 0;
+  }
+
+  /// Set the Minimum Initiation Interval for this schedule attempt.
+  void setMII(unsigned mii) { MII = mii; }
+
+  MachineInstr *applyInstrChange(MachineInstr *MI, SMSchedule &Schedule,
+                                 bool UpdateDAG = false);
+
+  /// Return the new base register that was stored away for the changed
+  /// instruction.
+  unsigned getInstrBaseReg(SUnit *SU) {
+    DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+        InstrChanges.find(SU);
+    if (It != InstrChanges.end())
+      return It->second.first;
+    return 0;
+  }
+
+  void addMutation(std::unique_ptr<ScheduleDAGMutation> Mutation) {
+    Mutations.push_back(std::move(Mutation));
+  }
+
+private:
+  void addLoopCarriedDependences(AliasAnalysis *AA);
+  void updatePhiDependences();
+  void changeDependences();
+  unsigned calculateResMII();
+  unsigned calculateRecMII(NodeSetType &RecNodeSets);
+  void findCircuits(NodeSetType &NodeSets);
+  void fuseRecs(NodeSetType &NodeSets);
+  void removeDuplicateNodes(NodeSetType &NodeSets);
+  void computeNodeFunctions(NodeSetType &NodeSets);
+  void registerPressureFilter(NodeSetType &NodeSets);
+  void colocateNodeSets(NodeSetType &NodeSets);
+  void checkNodeSets(NodeSetType &NodeSets);
+  void groupRemainingNodes(NodeSetType &NodeSets);
+  void addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+                         SetVector<SUnit *> &NodesAdded);
+  void computeNodeOrder(NodeSetType &NodeSets);
+  bool schedulePipeline(SMSchedule &Schedule);
+  void generatePipelinedLoop(SMSchedule &Schedule);
+  void generateProlog(SMSchedule &Schedule, unsigned LastStage,
+                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+                      MBBVectorTy &PrologBBs);
+  void generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+                      MachineBasicBlock *KernelBB, ValueMapTy *VRMap,
+                      MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+  void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+                            MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+                            SMSchedule &Schedule, ValueMapTy *VRMap,
+                            InstrMapTy &InstrMap, unsigned LastStageNum,
+                            unsigned CurStageNum, bool IsLast);
+  void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
+                    MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
+                    SMSchedule &Schedule, ValueMapTy *VRMap,
+                    InstrMapTy &InstrMap, unsigned LastStageNum,
+                    unsigned CurStageNum, bool IsLast);
+  void removeDeadInstructions(MachineBasicBlock *KernelBB,
+                              MBBVectorTy &EpilogBBs);
+  void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs,
+                      SMSchedule &Schedule);
+  void addBranches(MBBVectorTy &PrologBBs, MachineBasicBlock *KernelBB,
+                   MBBVectorTy &EpilogBBs, SMSchedule &Schedule,
+                   ValueMapTy *VRMap);
+  bool computeDelta(MachineInstr &MI, unsigned &Delta);
+  void updateMemOperands(MachineInstr &NewMI, MachineInstr &OldMI,
+                         unsigned Num);
+  MachineInstr *cloneInstr(MachineInstr *OldMI, unsigned CurStageNum,
+                           unsigned InstStageNum);
+  MachineInstr *cloneAndChangeInstr(MachineInstr *OldMI, unsigned CurStageNum,
+                                    unsigned InstStageNum,
+                                    SMSchedule &Schedule);
+  void updateInstruction(MachineInstr *NewMI, bool LastDef,
+                         unsigned CurStageNum, unsigned InstStageNum,
+                         SMSchedule &Schedule, ValueMapTy *VRMap);
+  MachineInstr *findDefInLoop(unsigned Reg);
+  unsigned getPrevMapVal(unsigned StageNum, unsigned PhiStage, unsigned LoopVal,
+                         unsigned LoopStage, ValueMapTy *VRMap,
+                         MachineBasicBlock *BB);
+  void rewritePhiValues(MachineBasicBlock *NewBB, unsigned StageNum,
+                        SMSchedule &Schedule, ValueMapTy *VRMap,
+                        InstrMapTy &InstrMap);
+  void rewriteScheduledInstr(MachineBasicBlock *BB, SMSchedule &Schedule,
+                             InstrMapTy &InstrMap, unsigned CurStageNum,
+                             unsigned PhiNum, MachineInstr *Phi,
+                             unsigned OldReg, unsigned NewReg,
+                             unsigned PrevReg = 0);
+  bool canUseLastOffsetValue(MachineInstr *MI, unsigned &BasePos,
+                             unsigned &OffsetPos, unsigned &NewBase,
+                             int64_t &NewOffset);
+  void postprocessDAG();
+};
+
+/// A NodeSet contains a set of SUnit DAG nodes with additional information
+/// that assigns a priority to the set.
+class NodeSet {
+  SetVector<SUnit *> Nodes;
+  bool HasRecurrence;
+  unsigned RecMII = 0;
+  int MaxMOV = 0;
+  int MaxDepth = 0;
+  unsigned Colocate = 0;
+  SUnit *ExceedPressure = nullptr;
+
+public:
+  typedef SetVector<SUnit *>::const_iterator iterator;
+
+  NodeSet() : Nodes(), HasRecurrence(false) {}
+
+  NodeSet(iterator S, iterator E) : Nodes(S, E), HasRecurrence(true) {}
+
+  bool insert(SUnit *SU) { return Nodes.insert(SU); }
+
+  void insert(iterator S, iterator E) { Nodes.insert(S, E); }
+
+  template <typename UnaryPredicate> bool remove_if(UnaryPredicate P) {
+    return Nodes.remove_if(P);
+  }
+
+  unsigned count(SUnit *SU) const { return Nodes.count(SU); }
+
+  bool hasRecurrence() { return HasRecurrence; };
+
+  unsigned size() const { return Nodes.size(); }
+
+  bool empty() const { return Nodes.empty(); }
+
+  SUnit *getNode(unsigned i) const { return Nodes[i]; };
+
+  void setRecMII(unsigned mii) { RecMII = mii; };
+
+  void setColocate(unsigned c) { Colocate = c; };
+
+  void setExceedPressure(SUnit *SU) { ExceedPressure = SU; }
+
+  bool isExceedSU(SUnit *SU) { return ExceedPressure == SU; }
+
+  int compareRecMII(NodeSet &RHS) { return RecMII - RHS.RecMII; }
+
+  int getRecMII() { return RecMII; }
+
+  /// Summarize node functions for the entire node set.
+  void computeNodeSetInfo(SwingSchedulerDAG *SSD) {
+    for (SUnit *SU : *this) {
+      MaxMOV = std::max(MaxMOV, SSD->getMOV(SU));
+      MaxDepth = std::max(MaxDepth, SSD->getDepth(SU));
+    }
+  }
+
+  void clear() {
+    Nodes.clear();
+    RecMII = 0;
+    HasRecurrence = false;
+    MaxMOV = 0;
+    MaxDepth = 0;
+    Colocate = 0;
+    ExceedPressure = nullptr;
+  }
+
+  operator SetVector<SUnit *> &() { return Nodes; }
+
+  /// Sort the node sets by importance. First, rank them by recurrence MII,
+  /// then by mobility (least mobile done first), and finally by depth.
+  /// Each node set may contain a colocate value which is used as the first
+  /// tie breaker, if it's set.
+  bool operator>(const NodeSet &RHS) const {
+    if (RecMII == RHS.RecMII) {
+      if (Colocate != 0 && RHS.Colocate != 0 && Colocate != RHS.Colocate)
+        return Colocate < RHS.Colocate;
+      if (MaxMOV == RHS.MaxMOV)
+        return MaxDepth > RHS.MaxDepth;
+      return MaxMOV < RHS.MaxMOV;
+    }
+    return RecMII > RHS.RecMII;
+  }
+
+  bool operator==(const NodeSet &RHS) const {
+    return RecMII == RHS.RecMII && MaxMOV == RHS.MaxMOV &&
+           MaxDepth == RHS.MaxDepth;
+  }
+
+  bool operator!=(const NodeSet &RHS) const { return !operator==(RHS); }
+
+  iterator begin() { return Nodes.begin(); }
+  iterator end() { return Nodes.end(); }
+
+  void print(raw_ostream &os) const {
+    os << "Num nodes " << size() << " rec " << RecMII << " mov " << MaxMOV
+       << " depth " << MaxDepth << " col " << Colocate << "\n";
+    for (const auto &I : Nodes)
+      os << "   SU(" << I->NodeNum << ") " << *(I->getInstr());
+    os << "\n";
+  }
+
+  void dump() const { print(dbgs()); }
+};
+
+/// This class repesents the scheduled code.  The main data structure is a
+/// map from scheduled cycle to instructions.  During scheduling, the
+/// data structure explicitly represents all stages/iterations.   When
+/// the algorithm finshes, the schedule is collapsed into a single stage,
+/// which represents instructions from different loop iterations.
+///
+/// The SMS algorithm allows negative values for cycles, so the first cycle
+/// in the schedule is the smallest cycle value.
+class SMSchedule {
+private:
+  /// Map from execution cycle to instructions.
+  DenseMap<int, std::deque<SUnit *>> ScheduledInstrs;
+
+  /// Map from instruction to execution cycle.
+  std::map<SUnit *, int> InstrToCycle;
+
+  /// Map for each register and the max difference between its uses and def.
+  /// The first element in the pair is the max difference in stages. The
+  /// second is true if the register defines a Phi value and loop value is
+  /// scheduled before the Phi.
+  std::map<unsigned, std::pair<unsigned, bool>> RegToStageDiff;
+
+  /// Keep track of the first cycle value in the schedule.  It starts
+  /// as zero, but the algorithm allows negative values.
+  int FirstCycle;
+
+  /// Keep track of the last cycle value in the schedule.
+  int LastCycle;
+
+  /// The initiation interval (II) for the schedule.
+  int InitiationInterval;
+
+  /// Target machine information.
+  const TargetSubtargetInfo &ST;
+
+  /// Virtual register information.
+  MachineRegisterInfo &MRI;
+
+  DFAPacketizer *Resources;
+
+public:
+  SMSchedule(MachineFunction *mf)
+      : ST(mf->getSubtarget()), MRI(mf->getRegInfo()),
+        Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {
+    FirstCycle = 0;
+    LastCycle = 0;
+    InitiationInterval = 0;
+  }
+
+  ~SMSchedule() {
+    ScheduledInstrs.clear();
+    InstrToCycle.clear();
+    RegToStageDiff.clear();
+    delete Resources;
+  }
+
+  void reset() {
+    ScheduledInstrs.clear();
+    InstrToCycle.clear();
+    RegToStageDiff.clear();
+    FirstCycle = 0;
+    LastCycle = 0;
+    InitiationInterval = 0;
+  }
+
+  /// Set the initiation interval for this schedule.
+  void setInitiationInterval(int ii) { InitiationInterval = ii; }
+
+  /// Return the first cycle in the completed schedule.  This
+  /// can be a negative value.
+  int getFirstCycle() const { return FirstCycle; }
+
+  /// Return the last cycle in the finalized schedule.
+  int getFinalCycle() const { return FirstCycle + InitiationInterval - 1; }
+
+  /// Return the cycle of the earliest scheduled instruction in the dependence
+  /// chain.
+  int earliestCycleInChain(const SDep &Dep);
+
+  /// Return the cycle of the latest scheduled instruction in the dependence
+  /// chain.
+  int latestCycleInChain(const SDep &Dep);
+
+  void computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+                    int *MinEnd, int *MaxStart, int II, SwingSchedulerDAG *DAG);
+  bool insert(SUnit *SU, int StartCycle, int EndCycle, int II);
+
+  /// Iterators for the cycle to instruction map.
+  typedef DenseMap<int, std::deque<SUnit *>>::iterator sched_iterator;
+  typedef DenseMap<int, std::deque<SUnit *>>::const_iterator
+      const_sched_iterator;
+
+  /// Return true if the instruction is scheduled at the specified stage.
+  bool isScheduledAtStage(SUnit *SU, unsigned StageNum) {
+    return (stageScheduled(SU) == (int)StageNum);
+  }
+
+  /// Return the stage for a scheduled instruction.  Return -1 if
+  /// the instruction has not been scheduled.
+  int stageScheduled(SUnit *SU) const {
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+    if (it == InstrToCycle.end())
+      return -1;
+    return (it->second - FirstCycle) / InitiationInterval;
+  }
+
+  /// Return the cycle for a scheduled instruction. This function normalizes
+  /// the first cycle to be 0.
+  unsigned cycleScheduled(SUnit *SU) const {
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SU);
+    assert(it != InstrToCycle.end() && "Instruction hasn't been scheduled.");
+    return (it->second - FirstCycle) % InitiationInterval;
+  }
+
+  /// Return the maximum stage count needed for this schedule.
+  unsigned getMaxStageCount() {
+    return (LastCycle - FirstCycle) / InitiationInterval;
+  }
+
+  /// Return the max. number of stages/iterations that can occur between a
+  /// register definition and its uses.
+  unsigned getStagesForReg(int Reg, unsigned CurStage) {
+    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+    if (CurStage > getMaxStageCount() && Stages.first == 0 && Stages.second)
+      return 1;
+    return Stages.first;
+  }
+
+  /// The number of stages for a Phi is a little different than other
+  /// instructions. The minimum value computed in RegToStageDiff is 1
+  /// because we assume the Phi is needed for at least 1 iteration.
+  /// This is not the case if the loop value is scheduled prior to the
+  /// Phi in the same stage.  This function returns the number of stages
+  /// or iterations needed between the Phi definition and any uses.
+  unsigned getStagesForPhi(int Reg) {
+    std::pair<unsigned, bool> Stages = RegToStageDiff[Reg];
+    if (Stages.second)
+      return Stages.first;
+    return Stages.first - 1;
+  }
+
+  /// Return the instructions that are scheduled at the specified cycle.
+  std::deque<SUnit *> &getInstructions(int cycle) {
+    return ScheduledInstrs[cycle];
+  }
+
+  bool isValidSchedule(SwingSchedulerDAG *SSD);
+  void finalizeSchedule(SwingSchedulerDAG *SSD);
+  bool orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+                       std::deque<SUnit *> &Insts);
+  bool isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi);
+  bool isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD, MachineInstr *Inst,
+                             MachineOperand &MO);
+  void print(raw_ostream &os) const;
+  void dump() const;
+};
+
+} // end anonymous namespace
+
+unsigned SwingSchedulerDAG::Circuits::MaxPaths = 5;
+char MachinePipeliner::ID = 0;
+#ifndef NDEBUG
+int MachinePipeliner::NumTries = 0;
+#endif
+char &llvm::MachinePipelinerID = MachinePipeliner::ID;
+INITIALIZE_PASS_BEGIN(MachinePipeliner, "pipeliner",
+                      "Modulo Software Pipelining", false, false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(MachinePipeliner, "pipeliner",
+                    "Modulo Software Pipelining", false, false)
+
+/// The "main" function for implementing Swing Modulo Scheduling.
+bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
+  if (skipFunction(*mf.getFunction()))
+    return false;
+
+  if (!EnableSWP)
+    return false;
+
+  if (mf.getFunction()->getAttributes().hasAttribute(
+          AttributeSet::FunctionIndex, Attribute::OptimizeForSize) &&
+      !EnableSWPOptSize.getPosition())
+    return false;
+
+  MF = &mf;
+  MLI = &getAnalysis<MachineLoopInfo>();
+  MDT = &getAnalysis<MachineDominatorTree>();
+  TII = MF->getSubtarget().getInstrInfo();
+  RegClassInfo.runOnMachineFunction(*MF);
+
+  for (auto &L : *MLI)
+    scheduleLoop(*L);
+
+  return false;
+}
+
+/// Attempt to perform the SMS algorithm on the specified loop. This function is
+/// the main entry point for the algorithm.  The function identifies candidate
+/// loops, calculates the minimum initiation interval, and attempts to schedule
+/// the loop.
+bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
+  bool Changed = false;
+  for (auto &InnerLoop : L)
+    Changed |= scheduleLoop(*InnerLoop);
+
+#ifndef NDEBUG
+  // Stop trying after reaching the limit (if any).
+  int Limit = SwpLoopLimit;
+  if (Limit >= 0) {
+    if (NumTries >= SwpLoopLimit)
+      return Changed;
+    NumTries++;
+  }
+#endif
+
+  if (!canPipelineLoop(L))
+    return Changed;
+
+  ++NumTrytoPipeline;
+
+  Changed = swingModuloScheduler(L);
+
+  return Changed;
+}
+
+/// Return true if the loop can be software pipelined.  The algorithm is
+/// restricted to loops with a single basic block.  Make sure that the
+/// branch in the loop can be analyzed.
+bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
+  if (L.getNumBlocks() != 1)
+    return false;
+
+  // Check if the branch can't be understood because we can't do pipelining
+  // if that's the case.
+  LI.TBB = nullptr;
+  LI.FBB = nullptr;
+  LI.BrCond.clear();
+  if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+    return false;
+
+  LI.LoopInductionVar = nullptr;
+  LI.LoopCompare = nullptr;
+  if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+    return false;
+
+  if (!L.getLoopPreheader())
+    return false;
+
+  // If any of the Phis contain subregs, then we can't pipeline
+  // because we don't know how to maintain subreg information in the
+  // VMap structure.
+  MachineBasicBlock *MBB = L.getHeader();
+  for (MachineBasicBlock::iterator BBI = MBB->instr_begin(),
+                                   BBE = MBB->getFirstNonPHI();
+       BBI != BBE; ++BBI)
+    for (unsigned i = 1; i != BBI->getNumOperands(); i += 2)
+      if (BBI->getOperand(i).getSubReg() != 0)
+        return false;
+
+  return true;
+}
+
+/// The SMS algorithm consists of the following main steps:
+/// 1. Computation and analysis of the dependence graph.
+/// 2. Ordering of the nodes (instructions).
+/// 3. Attempt to Schedule the loop.
+bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
+  assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
+
+  SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+
+  MachineBasicBlock *MBB = L.getHeader();
+  // The kernel should not include any terminator instructions.  These
+  // will be added back later.
+  SMS.startBlock(MBB);
+
+  // Compute the number of 'real' instructions in the basic block by
+  // ignoring terminators.
+  unsigned size = MBB->size();
+  for (MachineBasicBlock::iterator I = MBB->getFirstTerminator(),
+                                   E = MBB->instr_end();
+       I != E; ++I, --size)
+    ;
+
+  SMS.enterRegion(MBB, MBB->begin(), MBB->getFirstTerminator(), size);
+  SMS.schedule();
+  SMS.exitRegion();
+
+  SMS.finishBlock();
+  return SMS.hasNewSchedule();
+}
+
+/// We override the schedule function in ScheduleDAGInstrs to implement the
+/// scheduling part of the Swing Modulo Scheduling algorithm.
+void SwingSchedulerDAG::schedule() {
+  AliasAnalysis *AA = &Pass.getAnalysis<AAResultsWrapperPass>().getAAResults();
+  buildSchedGraph(AA);
+  addLoopCarriedDependences(AA);
+  updatePhiDependences();
+  Topo.InitDAGTopologicalSorting();
+  postprocessDAG();
+  changeDependences();
+  DEBUG({
+    for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+      SUnits[su].dumpAll(this);
+  });
+
+  NodeSetType NodeSets;
+  findCircuits(NodeSets);
+
+  // Calculate the MII.
+  unsigned ResMII = calculateResMII();
+  unsigned RecMII = calculateRecMII(NodeSets);
+
+  fuseRecs(NodeSets);
+
+  // This flag is used for testing and can cause correctness problems.
+  if (SwpIgnoreRecMII)
+    RecMII = 0;
+
+  MII = std::max(ResMII, RecMII);
+  DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII << ", res=" << ResMII
+               << ")\n");
+
+  // Can't schedule a loop without a valid MII.
+  if (MII == 0)
+    return;
+
+  // Don't pipeline large loops.
+  if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+    return;
+
+  computeNodeFunctions(NodeSets);
+
+  registerPressureFilter(NodeSets);
+
+  colocateNodeSets(NodeSets);
+
+  checkNodeSets(NodeSets);
+
+  DEBUG({
+    for (auto &I : NodeSets) {
+      dbgs() << "  Rec NodeSet ";
+      I.dump();
+    }
+  });
+
+  std::sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+
+  groupRemainingNodes(NodeSets);
+
+  removeDuplicateNodes(NodeSets);
+
+  DEBUG({
+    for (auto &I : NodeSets) {
+      dbgs() << "  NodeSet ";
+      I.dump();
+    }
+  });
+
+  computeNodeOrder(NodeSets);
+
+  SMSchedule Schedule(Pass.MF);
+  Scheduled = schedulePipeline(Schedule);
+
+  if (!Scheduled)
+    return;
+
+  unsigned numStages = Schedule.getMaxStageCount();
+  // No need to generate pipeline if there are no overlapped iterations.
+  if (numStages == 0)
+    return;
+
+  // Check that the maximum stage count is less than user-defined limit.
+  if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+    return;
+
+  generatePipelinedLoop(Schedule);
+  ++NumPipelined;
+}
+
+/// Clean up after the software pipeliner runs.
+void SwingSchedulerDAG::finishBlock() {
+  for (MachineInstr *I : NewMIs)
+    MF.DeleteMachineInstr(I);
+  NewMIs.clear();
+
+  // Call the superclass.
+  ScheduleDAGInstrs::finishBlock();
+}
+
+/// Return the register values for  the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+                       unsigned &InitVal, unsigned &LoopVal) {
+  assert(Phi.isPHI() && "Expecting a Phi.");
+
+  InitVal = 0;
+  LoopVal = 0;
+  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+    if (Phi.getOperand(i + 1).getMBB() != Loop)
+      InitVal = Phi.getOperand(i).getReg();
+    else if (Phi.getOperand(i + 1).getMBB() == Loop)
+      LoopVal = Phi.getOperand(i).getReg();
+
+  assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes from the incoming block.
+static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+    if (Phi.getOperand(i + 1).getMBB() != LoopBB)
+      return Phi.getOperand(i).getReg();
+  return 0;
+}
+
+/// Return the Phi register value that comes the the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+  for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+    if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+      return Phi.getOperand(i).getReg();
+  return 0;
+}
+
+/// Return true if SUb can be reached from SUa following the chain edges.
+static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
+  SmallPtrSet<SUnit *, 8> Visited;
+  SmallVector<SUnit *, 8> Worklist;
+  Worklist.push_back(SUa);
+  while (!Worklist.empty()) {
+    const SUnit *SU = Worklist.pop_back_val();
+    for (auto &SI : SU->Succs) {
+      SUnit *SuccSU = SI.getSUnit();
+      if (SI.getKind() == SDep::Order) {
+        if (Visited.count(SuccSU))
+          continue;
+        if (SuccSU == SUb)
+          return true;
+        Worklist.push_back(SuccSU);
+        Visited.insert(SuccSU);
+      }
+    }
+  }
+  return false;
+}
+
+/// Return true if the instruction causes a chain between memory
+/// references before and after it.
+static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
+  return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+         (MI.hasOrderedMemoryRef() &&
+          (!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
+}
+
+/// Return the underlying objects for the memory references of an instruction.
+/// This function calls the code in ValueTracking, but first checks that the
+/// instruction has a memory operand.
+static void getUnderlyingObjects(MachineInstr *MI,
+                                 SmallVectorImpl<Value *> &Objs,
+                                 const DataLayout &DL) {
+  if (!MI->hasOneMemOperand())
+    return;
+  MachineMemOperand *MM = *MI->memoperands_begin();
+  if (!MM->getValue())
+    return;
+  GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
+}
+
+/// Add a chain edge between a load and store if the store can be an
+/// alias of the load on a subsequent iteration, i.e., a loop carried
+/// dependence. This code is very similar to the code in ScheduleDAGInstrs
+/// but that code doesn't create loop carried dependences.
+void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
+  MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+  for (auto &SU : SUnits) {
+    MachineInstr &MI = *SU.getInstr();
+    if (isDependenceBarrier(MI, AA))
+      PendingLoads.clear();
+    else if (MI.mayLoad()) {
+      SmallVector<Value *, 4> Objs;
+      getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+      for (auto V : Objs) {
+        SmallVector<SUnit *, 4> &SUs = PendingLoads[V];
+        SUs.push_back(&SU);
+      }
+    } else if (MI.mayStore()) {
+      SmallVector<Value *, 4> Objs;
+      getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
+      for (auto V : Objs) {
+        MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
+            PendingLoads.find(V);
+        if (I == PendingLoads.end())
+          continue;
+        for (auto Load : I->second) {
+          if (isSuccOrder(Load, &SU))
+            continue;
+          MachineInstr &LdMI = *Load->getInstr();
+          // First, perform the cheaper check that compares the base register.
+          // If they are the same and the load offset is less than the store
+          // offset, then mark the dependence as loop carried potentially.
+          unsigned BaseReg1, BaseReg2;
+          int64_t Offset1, Offset2;
+          if (!TII->getMemOpBaseRegImmOfs(LdMI, BaseReg1, Offset1, TRI) ||
+              !TII->getMemOpBaseRegImmOfs(MI, BaseReg2, Offset2, TRI)) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;            
+          }
+          if (BaseReg1 == BaseReg2 && (int)Offset1 < (int)Offset2) {
+            assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+                   "What happened to the chain edge?");
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          // Second, the more expensive check that uses alias analysis on the
+          // base registers. If they alias, and the load offset is less than
+          // the store offset, the mark the dependence as loop carried.
+          if (!AA) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          MachineMemOperand *MMO1 = *LdMI.memoperands_begin();
+          MachineMemOperand *MMO2 = *MI.memoperands_begin();
+          if (!MMO1->getValue() || !MMO2->getValue()) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          if (MMO1->getValue() == MMO2->getValue() &&
+              MMO1->getOffset() <= MMO2->getOffset()) {
+            SU.addPred(SDep(Load, SDep::Barrier));
+            continue;
+          }
+          AliasResult AAResult = AA->alias(
+              MemoryLocation(MMO1->getValue(), MemoryLocation::UnknownSize,
+                             MMO1->getAAInfo()),
+              MemoryLocation(MMO2->getValue(), MemoryLocation::UnknownSize,
+                             MMO2->getAAInfo()));
+
+          if (AAResult != NoAlias)
+            SU.addPred(SDep(Load, SDep::Barrier));
+        }
+      }
+    }
+  }
+}
+
+/// Update the phi dependences to the DAG because ScheduleDAGInstrs no longer
+/// processes dependences for PHIs. This function adds true dependences
+/// from a PHI to a use, and a loop carried dependence from the use to the
+/// PHI. The loop carried dependence is represented as an anti dependence
+/// edge. This function also removes chain dependences between unrelated
+/// PHIs.
+void SwingSchedulerDAG::updatePhiDependences() {
+  SmallVector<SDep, 4> RemoveDeps;
+  const TargetSubtargetInfo &ST = MF.getSubtarget<TargetSubtargetInfo>();
+
+  // Iterate over each DAG node.
+  for (SUnit &I : SUnits) {
+    RemoveDeps.clear();
+    // Set to true if the instruction has an operand defined by a Phi.
+    unsigned HasPhiUse = 0;
+    unsigned HasPhiDef = 0;
+    MachineInstr *MI = I.getInstr();
+    // Iterate over each operand, and we process the definitions.
+    for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                    MOE = MI->operands_end();
+         MOI != MOE; ++MOI) {
+      if (!MOI->isReg())
+        continue;
+      unsigned Reg = MOI->getReg();
+      if (MOI->isDef()) {
+        // If the register is used by a Phi, then create an anti dependence.
+        for (MachineRegisterInfo::use_instr_iterator
+                 UI = MRI.use_instr_begin(Reg),
+                 UE = MRI.use_instr_end();
+             UI != UE; ++UI) {
+          MachineInstr *UseMI = &*UI;
+          SUnit *SU = getSUnit(UseMI);
+          if (SU != nullptr && UseMI->isPHI()) {
+            if (!MI->isPHI()) {
+              SDep Dep(SU, SDep::Anti, Reg);
+              I.addPred(Dep);
+            } else {
+              HasPhiDef = Reg;
+              // Add a chain edge to a dependent Phi that isn't an existing
+              // predecessor.
+              if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+                I.addPred(SDep(SU, SDep::Barrier));
+            }
+          }
+        }
+      } else if (MOI->isUse()) {
+        // If the register is defined by a Phi, then create a true dependence.
+        MachineInstr *DefMI = MRI.getUniqueVRegDef(Reg);
+        if (DefMI == nullptr)
+          continue;
+        SUnit *SU = getSUnit(DefMI);
+        if (SU != nullptr && DefMI->isPHI()) {
+          if (!MI->isPHI()) {
+            SDep Dep(SU, SDep::Data, Reg);
+            Dep.setLatency(0);
+            ST.adjustSchedDependency(SU, &I, Dep);
+            I.addPred(Dep);
+          } else {
+            HasPhiUse = Reg;
+            // Add a chain edge to a dependent Phi that isn't an existing
+            // predecessor.
+            if (SU->NodeNum < I.NodeNum && !I.isPred(SU))
+              I.addPred(SDep(SU, SDep::Barrier));
+          }
+        }
+      }
+    }
+    // Remove order dependences from an unrelated Phi.
+    if (!SwpPruneDeps)
+      continue;
+    for (auto &PI : I.Preds) {
+      MachineInstr *PMI = PI.getSUnit()->getInstr();
+      if (PMI->isPHI() && PI.getKind() == SDep::Order) {
+        if (I.getInstr()->isPHI()) {
+          if (PMI->getOperand(0).getReg() == HasPhiUse)
+            continue;
+          if (getLoopPhiReg(*PMI, PMI->getParent()) == HasPhiDef)
+            continue;
+        }
+        RemoveDeps.push_back(PI);
+      }
+    }
+    for (int i = 0, e = RemoveDeps.size(); i != e; ++i)
+      I.removePred(RemoveDeps[i]);
+  }
+}
+
+/// Iterate over each DAG node and see if we can change any dependences
+/// in order to reduce the recurrence MII.
+void SwingSchedulerDAG::changeDependences() {
+  // See if an instruction can use a value from the previous iteration.
+  // If so, we update the base and offset of the instruction and change
+  // the dependences.
+  for (SUnit &I : SUnits) {
+    unsigned BasePos = 0, OffsetPos = 0, NewBase = 0;
+    int64_t NewOffset = 0;
+    if (!canUseLastOffsetValue(I.getInstr(), BasePos, OffsetPos, NewBase,
+                               NewOffset))
+      continue;
+
+    // Get the MI and SUnit for the instruction that defines the original base.
+    unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
+    MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
+    if (!DefMI)
+      continue;
+    SUnit *DefSU = getSUnit(DefMI);
+    if (!DefSU)
+      continue;
+    // Get the MI and SUnit for the instruction that defins the new base.
+    MachineInstr *LastMI = MRI.getUniqueVRegDef(NewBase);
+    if (!LastMI)
+      continue;
+    SUnit *LastSU = getSUnit(LastMI);
+    if (!LastSU)
+      continue;
+
+    if (Topo.IsReachable(&I, LastSU))
+      continue;
+
+    // Remove the dependence. The value now depends on a prior iteration.
+    SmallVector<SDep, 4> Deps;
+    for (SUnit::pred_iterator P = I.Preds.begin(), E = I.Preds.end(); P != E;
+         ++P)
+      if (P->getSUnit() == DefSU)
+        Deps.push_back(*P);
+    for (int i = 0, e = Deps.size(); i != e; i++) {
+      Topo.RemovePred(&I, Deps[i].getSUnit());
+      I.removePred(Deps[i]);
+    }
+    // Remove the chain dependence between the instructions.
+    Deps.clear();
+    for (auto &P : LastSU->Preds)
+      if (P.getSUnit() == &I && P.getKind() == SDep::Order)
+        Deps.push_back(P);
+    for (int i = 0, e = Deps.size(); i != e; i++) {
+      Topo.RemovePred(LastSU, Deps[i].getSUnit());
+      LastSU->removePred(Deps[i]);
+    }
+
+    // Add a dependence between the new instruction and the instruction
+    // that defines the new base.
+    SDep Dep(&I, SDep::Anti, NewBase);
+    LastSU->addPred(Dep);
+
+    // Remember the base and offset information so that we can update the
+    // instruction during code generation.
+    InstrChanges[&I] = std::make_pair(NewBase, NewOffset);
+  }
+}
+
+namespace {
+
+// FuncUnitSorter - Comparison operator used to sort instructions by
+// the number of functional unit choices.
+struct FuncUnitSorter {
+  const InstrItineraryData *InstrItins;
+  DenseMap<unsigned, unsigned> Resources;
+
+  // Compute the number of functional unit alternatives needed
+  // at each stage, and take the minimum value. We prioritize the
+  // instructions by the least number of choices first.
+  unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
+    unsigned schedClass = Inst->getDesc().getSchedClass();
+    unsigned min = UINT_MAX;
+    for (const InstrStage *IS = InstrItins->beginStage(schedClass),
+                          *IE = InstrItins->endStage(schedClass);
+         IS != IE; ++IS) {
+      unsigned funcUnits = IS->getUnits();
+      unsigned numAlternatives = countPopulation(funcUnits);
+      if (numAlternatives < min) {
+        min = numAlternatives;
+        F = funcUnits;
+      }
+    }
+    return min;
+  }
+
+  // Compute the critical resources needed by the instruction. This
+  // function records the functional units needed by instructions that
+  // must use only one functional unit. We use this as a tie breaker
+  // for computing the resource MII. The instrutions that require
+  // the same, highly used, functional unit have high priority.
+  void calcCriticalResources(MachineInstr &MI) {
+    unsigned SchedClass = MI.getDesc().getSchedClass();
+    for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
+                          *IE = InstrItins->endStage(SchedClass);
+         IS != IE; ++IS) {
+      unsigned FuncUnits = IS->getUnits();
+      if (countPopulation(FuncUnits) == 1)
+        Resources[FuncUnits]++;
+    }
+  }
+
+  FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+  /// Return true if IS1 has less priority than IS2.
+  bool operator()(const MachineInstr *IS1, const MachineInstr *IS2) const {
+    unsigned F1 = 0, F2 = 0;
+    unsigned MFUs1 = minFuncUnits(IS1, F1);
+    unsigned MFUs2 = minFuncUnits(IS2, F2);
+    if (MFUs1 == 1 && MFUs2 == 1)
+      return Resources.lookup(F1) < Resources.lookup(F2);
+    return MFUs1 > MFUs2;
+  }
+};
+
+} // end anonymous namespace
+
+/// Calculate the resource constrained minimum initiation interval for the
+/// specified loop. We use the DFA to model the resources needed for
+/// each instruction, and we ignore dependences. A different DFA is created
+/// for each cycle that is required. When adding a new instruction, we attempt
+/// to add it to each existing DFA, until a legal space is found. If the
+/// instruction cannot be reserved in an existing DFA, we create a new one.
+unsigned SwingSchedulerDAG::calculateResMII() {
+  SmallVector<DFAPacketizer *, 8> Resources;
+  MachineBasicBlock *MBB = Loop.getHeader();
+  Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+
+  // Sort the instructions by the number of available choices for scheduling,
+  // least to most. Use the number of critical resources as the tie breaker.
+  FuncUnitSorter FUS =
+      FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+  for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
+                                   E = MBB->getFirstTerminator();
+       I != E; ++I)
+    FUS.calcCriticalResources(*I);
+  PriorityQueue<MachineInstr *, std::vector<MachineInstr *>, FuncUnitSorter>
+      FuncUnitOrder(FUS);
+
+  for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
+                                   E = MBB->getFirstTerminator();
+       I != E; ++I)
+    FuncUnitOrder.push(&*I);
+
+  while (!FuncUnitOrder.empty()) {
+    MachineInstr *MI = FuncUnitOrder.top();
+    FuncUnitOrder.pop();
+    if (TII->isZeroCost(MI->getOpcode()))
+      continue;
+    // Attempt to reserve the instruction in an existing DFA. At least one
+    // DFA is needed for each cycle.
+    unsigned NumCycles = getSUnit(MI)->Latency;
+    unsigned ReservedCycles = 0;
+    SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
+    SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+    for (unsigned C = 0; C < NumCycles; ++C)
+      while (RI != RE) {
+        if ((*RI++)->canReserveResources(*MI)) {
+          ++ReservedCycles;
+          break;
+        }
+      }
+    // Start reserving resources using existing DFAs.
+    for (unsigned C = 0; C < ReservedCycles; ++C) {
+      --RI;
+      (*RI)->reserveResources(*MI);
+    }
+    // Add new DFAs, if needed, to reserve resources.
+    for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
+      DFAPacketizer *NewResource =
+          TII->CreateTargetScheduleState(MF.getSubtarget());
+      assert(NewResource->canReserveResources(*MI) && "Reserve error.");
+      NewResource->reserveResources(*MI);
+      Resources.push_back(NewResource);
+    }
+  }
+  int Resmii = Resources.size();
+  // Delete the memory for each of the DFAs that were created earlier.
+  for (DFAPacketizer *RI : Resources) {
+    DFAPacketizer *D = RI;
+    delete D;
+  }
+  Resources.clear();
+  return Resmii;
+}
+
+/// Calculate the recurrence-constrainted minimum initiation interval.
+/// Iterate over each circuit.  Compute the delay(c) and distance(c)
+/// for each circuit. The II needs to satisfy the inequality
+/// delay(c) - II*distance(c) <= 0. For each circuit, choose the smallest
+/// II that satistifies the inequality, and the RecMII is the maximum
+/// of those values.
+unsigned SwingSchedulerDAG::calculateRecMII(NodeSetType &NodeSets) {
+  unsigned RecMII = 0;
+
+  for (NodeSet &Nodes : NodeSets) {
+    if (Nodes.size() == 0)
+      continue;
+
+    unsigned Delay = Nodes.size() - 1;
+    unsigned Distance = 1;
+
+    // ii = ceil(delay / distance)
+    unsigned CurMII = (Delay + Distance - 1) / Distance;
+    Nodes.setRecMII(CurMII);
+    if (CurMII > RecMII)
+      RecMII = CurMII;
+  }
+
+  return RecMII;
+}
+
+/// Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+/// but we do this to find the circuits, and then change them back.
+static void swapAntiDependences(std::vector<SUnit> &SUnits) {
+  SmallVector<std::pair<SUnit *, SDep>, 8> DepsAdded;
+  for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SUnits[i];
+    for (SUnit::pred_iterator IP = SU->Preds.begin(), EP = SU->Preds.end();
+         IP != EP; ++IP) {
+      if (IP->getKind() != SDep::Anti)
+        continue;
+      DepsAdded.push_back(std::make_pair(SU, *IP));
+    }
+  }
+  for (SmallVector<std::pair<SUnit *, SDep>, 8>::iterator I = DepsAdded.begin(),
+                                                          E = DepsAdded.end();
+       I != E; ++I) {
+    // Remove this anti dependency and add one in the reverse direction.
+    SUnit *SU = I->first;
+    SDep &D = I->second;
+    SUnit *TargetSU = D.getSUnit();
+    unsigned Reg = D.getReg();
+    unsigned Lat = D.getLatency();
+    SU->removePred(D);
+    SDep Dep(SU, SDep::Anti, Reg);
+    Dep.setLatency(Lat);
+    TargetSU->addPred(Dep);
+  }
+}
+
+/// Create the adjacency structure of the nodes in the graph.
+void SwingSchedulerDAG::Circuits::createAdjacencyStructure(
+    SwingSchedulerDAG *DAG) {
+  BitVector Added(SUnits.size());
+  for (int i = 0, e = SUnits.size(); i != e; ++i) {
+    Added.reset();
+    // Add any successor to the adjacency matrix and exclude duplicates.
+    for (auto &SI : SUnits[i].Succs) {
+      // Do not process a boundary node and a back-edge is processed only
+      // if it goes to a Phi.
+      if (SI.getSUnit()->isBoundaryNode() ||
+          (SI.getKind() == SDep::Anti && !SI.getSUnit()->getInstr()->isPHI()))
+        continue;
+      int N = SI.getSUnit()->NodeNum;
+      if (!Added.test(N)) {
+        AdjK[i].push_back(N);
+        Added.set(N);
+      }
+    }
+    // A chain edge between a store and a load is treated as a back-edge in the
+    // adjacency matrix.
+    for (auto &PI : SUnits[i].Preds) {
+      if (!SUnits[i].getInstr()->mayStore() ||
+          !DAG->isLoopCarriedOrder(&SUnits[i], PI, false))
+        continue;
+      if (PI.getKind() == SDep::Order && PI.getSUnit()->getInstr()->mayLoad()) {
+        int N = PI.getSUnit()->NodeNum;
+        if (!Added.test(N)) {
+          AdjK[i].push_back(N);
+          Added.set(N);
+        }
+      }
+    }
+  }
+}
+
+/// Identify an elementary circuit in the dependence graph starting at the
+/// specified node.
+bool SwingSchedulerDAG::Circuits::circuit(int V, int S, NodeSetType &NodeSets,
+                                          bool HasBackedge) {
+  SUnit *SV = &SUnits[V];
+  bool F = false;
+  Stack.insert(SV);
+  Blocked.set(V);
+
+  for (auto W : AdjK[V]) {
+    if (NumPaths > MaxPaths)
+      break;
+    if (W < S)
+      continue;
+    if (W == S) {
+      if (!HasBackedge)
+        NodeSets.push_back(NodeSet(Stack.begin(), Stack.end()));
+      F = true;
+      ++NumPaths;
+      break;
+    } else if (!Blocked.test(W)) {
+      if (circuit(W, S, NodeSets, W < V ? true : HasBackedge))
+        F = true;
+    }
+  }
+
+  if (F)
+    unblock(V);
+  else {
+    for (auto W : AdjK[V]) {
+      if (W < S)
+        continue;
+      if (B[W].count(SV) == 0)
+        B[W].insert(SV);
+    }
+  }
+  Stack.pop_back();
+  return F;
+}
+
+/// Unblock a node in the circuit finding algorithm.
+void SwingSchedulerDAG::Circuits::unblock(int U) {
+  Blocked.reset(U);
+  SmallPtrSet<SUnit *, 4> &BU = B[U];
+  while (!BU.empty()) {
+    SmallPtrSet<SUnit *, 4>::iterator SI = BU.begin();
+    assert(SI != BU.end() && "Invalid B set.");
+    SUnit *W = *SI;
+    BU.erase(W);
+    if (Blocked.test(W->NodeNum))
+      unblock(W->NodeNum);
+  }
+}
+
+/// Identify all the elementary circuits in the dependence graph using
+/// Johnson's circuit algorithm.
+void SwingSchedulerDAG::findCircuits(NodeSetType &NodeSets) {
+  // Swap all the anti dependences in the DAG. That means it is no longer a DAG,
+  // but we do this to find the circuits, and then change them back.
+  swapAntiDependences(SUnits);
+
+  Circuits Cir(SUnits);
+  // Create the adjacency structure.
+  Cir.createAdjacencyStructure(this);
+  for (int i = 0, e = SUnits.size(); i != e; ++i) {
+    Cir.reset();
+    Cir.circuit(i, i, NodeSets);
+  }
+
+  // Change the dependences back so that we've created a DAG again.
+  swapAntiDependences(SUnits);
+}
+
+/// Return true for DAG nodes that we ignore when computing the cost functions.
+/// We ignore the back-edge recurrence in order to avoid unbounded recurison
+/// in the calculation of the ASAP, ALAP, etc functions.
+static bool ignoreDependence(const SDep &D, bool isPred) {
+  if (D.isArtificial())
+    return true;
+  return D.getKind() == SDep::Anti && isPred;
+}
+
+/// Compute several functions need to order the nodes for scheduling.
+///  ASAP - Earliest time to schedule a node.
+///  ALAP - Latest time to schedule a node.
+///  MOV - Mobility function, difference between ALAP and ASAP.
+///  D - Depth of each node.
+///  H - Height of each node.
+void SwingSchedulerDAG::computeNodeFunctions(NodeSetType &NodeSets) {
+
+  ScheduleInfo.resize(SUnits.size());
+
+  DEBUG({
+    for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
+                                                    E = Topo.end();
+         I != E; ++I) {
+      SUnit *SU = &SUnits[*I];
+      SU->dump(this);
+    }
+  });
+
+  int maxASAP = 0;
+  // Compute ASAP.
+  for (ScheduleDAGTopologicalSort::const_iterator I = Topo.begin(),
+                                                  E = Topo.end();
+       I != E; ++I) {
+    int asap = 0;
+    SUnit *SU = &SUnits[*I];
+    for (SUnit::const_pred_iterator IP = SU->Preds.begin(),
+                                    EP = SU->Preds.end();
+         IP != EP; ++IP) {
+      if (ignoreDependence(*IP, true))
+        continue;
+      SUnit *pred = IP->getSUnit();
+      asap = std::max(asap, (int)(getASAP(pred) + getLatency(SU, *IP) -
+                                  getDistance(pred, SU, *IP) * MII));
+    }
+    maxASAP = std::max(maxASAP, asap);
+    ScheduleInfo[*I].ASAP = asap;
+  }
+
+  // Compute ALAP and MOV.
+  for (ScheduleDAGTopologicalSort::const_reverse_iterator I = Topo.rbegin(),
+                                                          E = Topo.rend();
+       I != E; ++I) {
+    int alap = maxASAP;
+    SUnit *SU = &SUnits[*I];
+    for (SUnit::const_succ_iterator IS = SU->Succs.begin(),
+                                    ES = SU->Succs.end();
+         IS != ES; ++IS) {
+      if (ignoreDependence(*IS, true))
+        continue;
+      SUnit *succ = IS->getSUnit();
+      alap = std::min(alap, (int)(getALAP(succ) - getLatency(SU, *IS) +
+                                  getDistance(SU, succ, *IS) * MII));
+    }
+
+    ScheduleInfo[*I].ALAP = alap;
+  }
+
+  // After computing the node functions, compute the summary for each node set.
+  for (NodeSet &I : NodeSets)
+    I.computeNodeSetInfo(this);
+
+  DEBUG({
+    for (unsigned i = 0; i < SUnits.size(); i++) {
+      dbgs() << "\tNode " << i << ":\n";
+      dbgs() << "\t   ASAP = " << getASAP(&SUnits[i]) << "\n";
+      dbgs() << "\t   ALAP = " << getALAP(&SUnits[i]) << "\n";
+      dbgs() << "\t   MOV  = " << getMOV(&SUnits[i]) << "\n";
+      dbgs() << "\t   D    = " << getDepth(&SUnits[i]) << "\n";
+      dbgs() << "\t   H    = " << getHeight(&SUnits[i]) << "\n";
+    }
+  });
+}
+
+/// Compute the Pred_L(O) set, as defined in the paper. The set is defined
+/// as the predecessors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool pred_L(SetVector<SUnit *> &NodeOrder,
+                   SmallSetVector<SUnit *, 8> &Preds,
+                   const NodeSet *S = nullptr) {
+  Preds.clear();
+  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
+       I != E; ++I) {
+    for (SUnit::pred_iterator PI = (*I)->Preds.begin(), PE = (*I)->Preds.end();
+         PI != PE; ++PI) {
+      if (S && S->count(PI->getSUnit()) == 0)
+        continue;
+      if (ignoreDependence(*PI, true))
+        continue;
+      if (NodeOrder.count(PI->getSUnit()) == 0)
+        Preds.insert(PI->getSUnit());
+    }
+    // Back-edges are predecessors with an anti-dependence.
+    for (SUnit::const_succ_iterator IS = (*I)->Succs.begin(),
+                                    ES = (*I)->Succs.end();
+         IS != ES; ++IS) {
+      if (IS->getKind() != SDep::Anti)
+        continue;
+      if (S && S->count(IS->getSUnit()) == 0)
+        continue;
+      if (NodeOrder.count(IS->getSUnit()) == 0)
+        Preds.insert(IS->getSUnit());
+    }
+  }
+  return Preds.size() > 0;
+}
+
+/// Compute the Succ_L(O) set, as defined in the paper. The set is defined
+/// as the successors of the elements of NodeOrder that are not also in
+/// NodeOrder.
+static bool succ_L(SetVector<SUnit *> &NodeOrder,
+                   SmallSetVector<SUnit *, 8> &Succs,
+                   const NodeSet *S = nullptr) {
+  Succs.clear();
+  for (SetVector<SUnit *>::iterator I = NodeOrder.begin(), E = NodeOrder.end();
+       I != E; ++I) {
+    for (SUnit::succ_iterator SI = (*I)->Succs.begin(), SE = (*I)->Succs.end();
+         SI != SE; ++SI) {
+      if (S && S->count(SI->getSUnit()) == 0)
+        continue;
+      if (ignoreDependence(*SI, false))
+        continue;
+      if (NodeOrder.count(SI->getSUnit()) == 0)
+        Succs.insert(SI->getSUnit());
+    }
+    for (SUnit::const_pred_iterator PI = (*I)->Preds.begin(),
+                                    PE = (*I)->Preds.end();
+         PI != PE; ++PI) {
+      if (PI->getKind() != SDep::Anti)
+        continue;
+      if (S && S->count(PI->getSUnit()) == 0)
+        continue;
+      if (NodeOrder.count(PI->getSUnit()) == 0)
+        Succs.insert(PI->getSUnit());
+    }
+  }
+  return Succs.size() > 0;
+}
+
+/// Return true if there is a path from the specified node to any of the nodes
+/// in DestNodes. Keep track and return the nodes in any path.
+static bool computePath(SUnit *Cur, SetVector<SUnit *> &Path,
+                        SetVector<SUnit *> &DestNodes,
+                        SetVector<SUnit *> &Exclude,
+                        SmallPtrSet<SUnit *, 8> &Visited) {
+  if (Cur->isBoundaryNode())
+    return false;
+  if (Exclude.count(Cur) != 0)
+    return false;
+  if (DestNodes.count(Cur) != 0)
+    return true;
+  if (!Visited.insert(Cur).second)
+    return Path.count(Cur) != 0;
+  bool FoundPath = false;
+  for (auto &SI : Cur->Succs)
+    FoundPath |= computePath(SI.getSUnit(), Path, DestNodes, Exclude, Visited);
+  for (auto &PI : Cur->Preds)
+    if (PI.getKind() == SDep::Anti)
+      FoundPath |=
+          computePath(PI.getSUnit(), Path, DestNodes, Exclude, Visited);
+  if (FoundPath)
+    Path.insert(Cur);
+  return FoundPath;
+}
+
+/// Return true if Set1 is a subset of Set2.
+template <class S1Ty, class S2Ty> static bool isSubset(S1Ty &Set1, S2Ty &Set2) {
+  for (typename S1Ty::iterator I = Set1.begin(), E = Set1.end(); I != E; ++I)
+    if (Set2.count(*I) == 0)
+      return false;
+  return true;
+}
+
+/// Compute the live-out registers for the instructions in a node-set.
+/// The live-out registers are those that are defined in the node-set,
+/// but not used. Except for use operands of Phis.
+static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
+                            NodeSet &NS) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  SmallVector<RegisterMaskPair, 8> LiveOutRegs;
+  SmallSet<unsigned, 4> Uses;
+  for (SUnit *SU : NS) {
+    const MachineInstr *MI = SU->getInstr();
+    if (MI->isPHI())
+      continue;
+    for (const MachineOperand &MO : MI->operands())
+      if (MO.isReg() && MO.isUse()) {
+        unsigned Reg = MO.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg))
+          Uses.insert(Reg);
+        else if (MRI.isAllocatable(Reg))
+          for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+            Uses.insert(*Units);
+      }
+  }
+  for (SUnit *SU : NS)
+    for (const MachineOperand &MO : SU->getInstr()->operands())
+      if (MO.isReg() && MO.isDef() && !MO.isDead()) {
+        unsigned Reg = MO.getReg();
+        if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+          if (!Uses.count(Reg))
+            LiveOutRegs.push_back(RegisterMaskPair(Reg,
+                                                   LaneBitmask::getNone()));
+        } else if (MRI.isAllocatable(Reg)) {
+          for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+            if (!Uses.count(*Units))
+              LiveOutRegs.push_back(RegisterMaskPair(*Units,
+                                                     LaneBitmask::getNone()));
+        }
+      }
+  RPTracker.addLiveRegs(LiveOutRegs);
+}
+
+/// A heuristic to filter nodes in recurrent node-sets if the register
+/// pressure of a set is too high.
+void SwingSchedulerDAG::registerPressureFilter(NodeSetType &NodeSets) {
+  for (auto &NS : NodeSets) {
+    // Skip small node-sets since they won't cause register pressure problems.
+    if (NS.size() <= 2)
+      continue;
+    IntervalPressure RecRegPressure;
+    RegPressureTracker RecRPTracker(RecRegPressure);
+    RecRPTracker.init(&MF, &RegClassInfo, &LIS, BB, BB->end(), false, true);
+    computeLiveOuts(MF, RecRPTracker, NS);
+    RecRPTracker.closeBottom();
+
+    std::vector<SUnit *> SUnits(NS.begin(), NS.end());
+    std::sort(SUnits.begin(), SUnits.end(), [](const SUnit *A, const SUnit *B) {
+      return A->NodeNum > B->NodeNum;
+    });
+
+    for (auto &SU : SUnits) {
+      // Since we're computing the register pressure for a subset of the
+      // instructions in a block, we need to set the tracker for each
+      // instruction in the node-set. The tracker is set to the instruction
+      // just after the one we're interested in.
+      MachineBasicBlock::const_iterator CurInstI = SU->getInstr();
+      RecRPTracker.setPos(std::next(CurInstI));
+
+      RegPressureDelta RPDelta;
+      ArrayRef<PressureChange> CriticalPSets;
+      RecRPTracker.getMaxUpwardPressureDelta(SU->getInstr(), nullptr, RPDelta,
+                                             CriticalPSets,
+                                             RecRegPressure.MaxSetPressure);
+      if (RPDelta.Excess.isValid()) {
+        DEBUG(dbgs() << "Excess register pressure: SU(" << SU->NodeNum << ") "
+                     << TRI->getRegPressureSetName(RPDelta.Excess.getPSet())
+                     << ":" << RPDelta.Excess.getUnitInc());
+        NS.setExceedPressure(SU);
+        break;
+      }
+      RecRPTracker.recede();
+    }
+  }
+}
+
+/// A heuristic to colocate node sets that have the same set of
+/// successors.
+void SwingSchedulerDAG::colocateNodeSets(NodeSetType &NodeSets) {
+  unsigned Colocate = 0;
+  for (int i = 0, e = NodeSets.size(); i < e; ++i) {
+    NodeSet &N1 = NodeSets[i];
+    SmallSetVector<SUnit *, 8> S1;
+    if (N1.empty() || !succ_L(N1, S1))
+      continue;
+    for (int j = i + 1; j < e; ++j) {
+      NodeSet &N2 = NodeSets[j];
+      if (N1.compareRecMII(N2) != 0)
+        continue;
+      SmallSetVector<SUnit *, 8> S2;
+      if (N2.empty() || !succ_L(N2, S2))
+        continue;
+      if (isSubset(S1, S2) && S1.size() == S2.size()) {
+        N1.setColocate(++Colocate);
+        N2.setColocate(Colocate);
+        break;
+      }
+    }
+  }
+}
+
+/// Check if the existing node-sets are profitable. If not, then ignore the
+/// recurrent node-sets, and attempt to schedule all nodes together. This is
+/// a heuristic. If the MII is large and there is a non-recurrent node with
+/// a large depth compared to the MII, then it's best to try and schedule
+/// all instruction together instead of starting with the recurrent node-sets.
+void SwingSchedulerDAG::checkNodeSets(NodeSetType &NodeSets) {
+  // Look for loops with a large MII.
+  if (MII <= 20)
+    return;
+  // Check if the node-set contains only a simple add recurrence.
+  for (auto &NS : NodeSets)
+    if (NS.size() > 2)
+      return;
+  // If the depth of any instruction is significantly larger than the MII, then
+  // ignore the recurrent node-sets and treat all instructions equally.
+  for (auto &SU : SUnits)
+    if (SU.getDepth() > MII * 1.5) {
+      NodeSets.clear();
+      DEBUG(dbgs() << "Clear recurrence node-sets\n");
+      return;
+    }
+}
+
+/// Add the nodes that do not belong to a recurrence set into groups
+/// based upon connected componenets.
+void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
+  SetVector<SUnit *> NodesAdded;
+  SmallPtrSet<SUnit *, 8> Visited;
+  // Add the nodes that are on a path between the previous node sets and
+  // the current node set.
+  for (NodeSet &I : NodeSets) {
+    SmallSetVector<SUnit *, 8> N;
+    // Add the nodes from the current node set to the previous node set.
+    if (succ_L(I, N)) {
+      SetVector<SUnit *> Path;
+      for (SUnit *NI : N) {
+        Visited.clear();
+        computePath(NI, Path, NodesAdded, I, Visited);
+      }
+      if (Path.size() > 0)
+        I.insert(Path.begin(), Path.end());
+    }
+    // Add the nodes from the previous node set to the current node set.
+    N.clear();
+    if (succ_L(NodesAdded, N)) {
+      SetVector<SUnit *> Path;
+      for (SUnit *NI : N) {
+        Visited.clear();
+        computePath(NI, Path, I, NodesAdded, Visited);
+      }
+      if (Path.size() > 0)
+        I.insert(Path.begin(), Path.end());
+    }
+    NodesAdded.insert(I.begin(), I.end());
+  }
+
+  // Create a new node set with the connected nodes of any successor of a node
+  // in a recurrent set.
+  NodeSet NewSet;
+  SmallSetVector<SUnit *, 8> N;
+  if (succ_L(NodesAdded, N))
+    for (SUnit *I : N)
+      addConnectedNodes(I, NewSet, NodesAdded);
+  if (NewSet.size() > 0)
+    NodeSets.push_back(NewSet);
+
+  // Create a new node set with the connected nodes of any predecessor of a node
+  // in a recurrent set.
+  NewSet.clear();
+  if (pred_L(NodesAdded, N))
+    for (SUnit *I : N)
+      addConnectedNodes(I, NewSet, NodesAdded);
+  if (NewSet.size() > 0)
+    NodeSets.push_back(NewSet);
+
+  // Create new nodes sets with the connected nodes any any remaining node that
+  // has no predecessor.
+  for (unsigned i = 0; i < SUnits.size(); ++i) {
+    SUnit *SU = &SUnits[i];
+    if (NodesAdded.count(SU) == 0) {
+      NewSet.clear();
+      addConnectedNodes(SU, NewSet, NodesAdded);
+      if (NewSet.size() > 0)
+        NodeSets.push_back(NewSet);
+    }
+  }
+}
+
+/// Add the node to the set, and add all is its connected nodes to the set.
+void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
+                                          SetVector<SUnit *> &NodesAdded) {
+  NewSet.insert(SU);
+  NodesAdded.insert(SU);
+  for (auto &SI : SU->Succs) {
+    SUnit *Successor = SI.getSUnit();
+    if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
+      addConnectedNodes(Successor, NewSet, NodesAdded);
+  }
+  for (auto &PI : SU->Preds) {
+    SUnit *Predecessor = PI.getSUnit();
+    if (!PI.isArtificial() && NodesAdded.count(Predecessor) == 0)
+      addConnectedNodes(Predecessor, NewSet, NodesAdded);
+  }
+}
+
+/// Return true if Set1 contains elements in Set2. The elements in common
+/// are returned in a different container.
+static bool isIntersect(SmallSetVector<SUnit *, 8> &Set1, const NodeSet &Set2,
+                        SmallSetVector<SUnit *, 8> &Result) {
+  Result.clear();
+  for (unsigned i = 0, e = Set1.size(); i != e; ++i) {
+    SUnit *SU = Set1[i];
+    if (Set2.count(SU) != 0)
+      Result.insert(SU);
+  }
+  return !Result.empty();
+}
+
+/// Merge the recurrence node sets that have the same initial node.
+void SwingSchedulerDAG::fuseRecs(NodeSetType &NodeSets) {
+  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+       ++I) {
+    NodeSet &NI = *I;
+    for (NodeSetType::iterator J = I + 1; J != E;) {
+      NodeSet &NJ = *J;
+      if (NI.getNode(0)->NodeNum == NJ.getNode(0)->NodeNum) {
+        if (NJ.compareRecMII(NI) > 0)
+          NI.setRecMII(NJ.getRecMII());
+        for (NodeSet::iterator NII = J->begin(), ENI = J->end(); NII != ENI;
+             ++NII)
+          I->insert(*NII);
+        NodeSets.erase(J);
+        E = NodeSets.end();
+      } else {
+        ++J;
+      }
+    }
+  }
+}
+
+/// Remove nodes that have been scheduled in previous NodeSets.
+void SwingSchedulerDAG::removeDuplicateNodes(NodeSetType &NodeSets) {
+  for (NodeSetType::iterator I = NodeSets.begin(), E = NodeSets.end(); I != E;
+       ++I)
+    for (NodeSetType::iterator J = I + 1; J != E;) {
+      J->remove_if([&](SUnit *SUJ) { return I->count(SUJ); });
+
+      if (J->size() == 0) {
+        NodeSets.erase(J);
+        E = NodeSets.end();
+      } else {
+        ++J;
+      }
+    }
+}
+
+/// Return true if Inst1 defines a value that is used in Inst2.
+static bool hasDataDependence(SUnit *Inst1, SUnit *Inst2) {
+  for (auto &SI : Inst1->Succs)
+    if (SI.getSUnit() == Inst2 && SI.getKind() == SDep::Data)
+      return true;
+  return false;
+}
+
+/// Compute an ordered list of the dependence graph nodes, which
+/// indicates the order that the nodes will be scheduled.  This is a
+/// two-level algorithm. First, a partial order is created, which
+/// consists of a list of sets ordered from highest to lowest priority.
+void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
+  SmallSetVector<SUnit *, 8> R;
+  NodeOrder.clear();
+
+  for (auto &Nodes : NodeSets) {
+    DEBUG(dbgs() << "NodeSet size " << Nodes.size() << "\n");
+    OrderKind Order;
+    SmallSetVector<SUnit *, 8> N;
+    if (pred_L(NodeOrder, N) && isSubset(N, Nodes)) {
+      R.insert(N.begin(), N.end());
+      Order = BottomUp;
+      DEBUG(dbgs() << "  Bottom up (preds) ");
+    } else if (succ_L(NodeOrder, N) && isSubset(N, Nodes)) {
+      R.insert(N.begin(), N.end());
+      Order = TopDown;
+      DEBUG(dbgs() << "  Top down (succs) ");
+    } else if (isIntersect(N, Nodes, R)) {
+      // If some of the successors are in the existing node-set, then use the
+      // top-down ordering.
+      Order = TopDown;
+      DEBUG(dbgs() << "  Top down (intersect) ");
+    } else if (NodeSets.size() == 1) {
+      for (auto &N : Nodes)
+        if (N->Succs.size() == 0)
+          R.insert(N);
+      Order = BottomUp;
+      DEBUG(dbgs() << "  Bottom up (all) ");
+    } else {
+      // Find the node with the highest ASAP.
+      SUnit *maxASAP = nullptr;
+      for (SUnit *SU : Nodes) {
+        if (maxASAP == nullptr || getASAP(SU) >= getASAP(maxASAP))
+          maxASAP = SU;
+      }
+      R.insert(maxASAP);
+      Order = BottomUp;
+      DEBUG(dbgs() << "  Bottom up (default) ");
+    }
+
+    while (!R.empty()) {
+      if (Order == TopDown) {
+        // Choose the node with the maximum height.  If more than one, choose
+        // the node with the lowest MOV. If still more than one, check if there
+        // is a dependence between the instructions.
+        while (!R.empty()) {
+          SUnit *maxHeight = nullptr;
+          for (SUnit *I : R) {
+            if (maxHeight == nullptr || getHeight(I) > getHeight(maxHeight))
+              maxHeight = I;
+            else if (getHeight(I) == getHeight(maxHeight) &&
+                     getMOV(I) < getMOV(maxHeight) &&
+                     !hasDataDependence(maxHeight, I))
+              maxHeight = I;
+            else if (hasDataDependence(I, maxHeight))
+              maxHeight = I;
+          }
+          NodeOrder.insert(maxHeight);
+          DEBUG(dbgs() << maxHeight->NodeNum << " ");
+          R.remove(maxHeight);
+          for (const auto &I : maxHeight->Succs) {
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            if (ignoreDependence(I, false))
+              continue;
+            R.insert(I.getSUnit());
+          }
+          // Back-edges are predecessors with an anti-dependence.
+          for (const auto &I : maxHeight->Preds) {
+            if (I.getKind() != SDep::Anti)
+              continue;
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            R.insert(I.getSUnit());
+          }
+        }
+        Order = BottomUp;
+        DEBUG(dbgs() << "\n   Switching order to bottom up ");
+        SmallSetVector<SUnit *, 8> N;
+        if (pred_L(NodeOrder, N, &Nodes))
+          R.insert(N.begin(), N.end());
+      } else {
+        // Choose the node with the maximum depth.  If more than one, choose
+        // the node with the lowest MOV. If there is still more than one, check
+        // for a dependence between the instructions.
+        while (!R.empty()) {
+          SUnit *maxDepth = nullptr;
+          for (SUnit *I : R) {
+            if (maxDepth == nullptr || getDepth(I) > getDepth(maxDepth))
+              maxDepth = I;
+            else if (getDepth(I) == getDepth(maxDepth) &&
+                     getMOV(I) < getMOV(maxDepth) &&
+                     !hasDataDependence(I, maxDepth))
+              maxDepth = I;
+            else if (hasDataDependence(maxDepth, I))
+              maxDepth = I;
+          }
+          NodeOrder.insert(maxDepth);
+          DEBUG(dbgs() << maxDepth->NodeNum << " ");
+          R.remove(maxDepth);
+          if (Nodes.isExceedSU(maxDepth)) {
+            Order = TopDown;
+            R.clear();
+            R.insert(Nodes.getNode(0));
+            break;
+          }
+          for (const auto &I : maxDepth->Preds) {
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            if (I.getKind() == SDep::Anti)
+              continue;
+            R.insert(I.getSUnit());
+          }
+          // Back-edges are predecessors with an anti-dependence.
+          for (const auto &I : maxDepth->Succs) {
+            if (I.getKind() != SDep::Anti)
+              continue;
+            if (Nodes.count(I.getSUnit()) == 0)
+              continue;
+            if (NodeOrder.count(I.getSUnit()) != 0)
+              continue;
+            R.insert(I.getSUnit());
+          }
+        }
+        Order = TopDown;
+        DEBUG(dbgs() << "\n   Switching order to top down ");
+        SmallSetVector<SUnit *, 8> N;
+        if (succ_L(NodeOrder, N, &Nodes))
+          R.insert(N.begin(), N.end());
+      }
+    }
+    DEBUG(dbgs() << "\nDone with Nodeset\n");
+  }
+
+  DEBUG({
+    dbgs() << "Node order: ";
+    for (SUnit *I : NodeOrder)
+      dbgs() << " " << I->NodeNum << " ";
+    dbgs() << "\n";
+  });
+}
+
+/// Process the nodes in the computed order and create the pipelined schedule
+/// of the instructions, if possible. Return true if a schedule is found.
+bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
+
+  if (NodeOrder.size() == 0)
+    return false;
+
+  bool scheduleFound = false;
+  // Keep increasing II until a valid schedule is found.
+  for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+    Schedule.reset();
+    Schedule.setInitiationInterval(II);
+    DEBUG(dbgs() << "Try to schedule with " << II << "\n");
+
+    SetVector<SUnit *>::iterator NI = NodeOrder.begin();
+    SetVector<SUnit *>::iterator NE = NodeOrder.end();
+    do {
+      SUnit *SU = *NI;
+
+      // Compute the schedule time for the instruction, which is based
+      // upon the scheduled time for any predecessors/successors.
+      int EarlyStart = INT_MIN;
+      int LateStart = INT_MAX;
+      // These values are set when the size of the schedule window is limited
+      // due to chain dependences.
+      int SchedEnd = INT_MAX;
+      int SchedStart = INT_MIN;
+      Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
+                            II, this);
+      DEBUG({
+        dbgs() << "Inst (" << SU->NodeNum << ") ";
+        SU->getInstr()->dump();
+        dbgs() << "\n";
+      });
+      DEBUG({
+        dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
+               << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+      });
+
+      if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
+          SchedStart > LateStart)
+        scheduleFound = false;
+      else if (EarlyStart != INT_MIN && LateStart == INT_MAX) {
+        SchedEnd = std::min(SchedEnd, EarlyStart + (int)II - 1);
+        scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+      } else if (EarlyStart == INT_MIN && LateStart != INT_MAX) {
+        SchedStart = std::max(SchedStart, LateStart - (int)II + 1);
+        scheduleFound = Schedule.insert(SU, LateStart, SchedStart, II);
+      } else if (EarlyStart != INT_MIN && LateStart != INT_MAX) {
+        SchedEnd =
+            std::min(SchedEnd, std::min(LateStart, EarlyStart + (int)II - 1));
+        // When scheduling a Phi it is better to start at the late cycle and go
+        // backwards. The default order may insert the Phi too far away from
+        // its first dependence.
+        if (SU->getInstr()->isPHI())
+          scheduleFound = Schedule.insert(SU, SchedEnd, EarlyStart, II);
+        else
+          scheduleFound = Schedule.insert(SU, EarlyStart, SchedEnd, II);
+      } else {
+        int FirstCycle = Schedule.getFirstCycle();
+        scheduleFound = Schedule.insert(SU, FirstCycle + getASAP(SU),
+                                        FirstCycle + getASAP(SU) + II - 1, II);
+      }
+      // Even if we find a schedule, make sure the schedule doesn't exceed the
+      // allowable number of stages. We keep trying if this happens.
+      if (scheduleFound)
+        if (SwpMaxStages > -1 &&
+            Schedule.getMaxStageCount() > (unsigned)SwpMaxStages)
+          scheduleFound = false;
+
+      DEBUG({
+        if (!scheduleFound)
+          dbgs() << "\tCan't schedule\n";
+      });
+    } while (++NI != NE && scheduleFound);
+
+    // If a schedule is found, check if it is a valid schedule too.
+    if (scheduleFound)
+      scheduleFound = Schedule.isValidSchedule(this);
+  }
+
+  DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+
+  if (scheduleFound)
+    Schedule.finalizeSchedule(this);
+  else
+    Schedule.reset();
+
+  return scheduleFound && Schedule.getMaxStageCount() > 0;
+}
+
+/// Given a schedule for the loop, generate a new version of the loop,
+/// and replace the old version.  This function generates a prolog
+/// that contains the initial iterations in the pipeline, and kernel
+/// loop, and the epilogue that contains the code for the final
+/// iterations.
+void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
+  // Create a new basic block for the kernel and add it to the CFG.
+  MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+
+  unsigned MaxStageCount = Schedule.getMaxStageCount();
+
+  // Remember the registers that are used in different stages. The index is
+  // the iteration, or stage, that the instruction is scheduled in.  This is
+  // a map between register names in the orignal block and the names created
+  // in each stage of the pipelined loop.
+  ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+  InstrMapTy InstrMap;
+
+  SmallVector<MachineBasicBlock *, 4> PrologBBs;
+  // Generate the prolog instructions that set up the pipeline.
+  generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
+  MF.insert(BB->getIterator(), KernelBB);
+
+  // Rearrange the instructions to generate the new, pipelined loop,
+  // and update register names as needed.
+  for (int Cycle = Schedule.getFirstCycle(),
+           LastCycle = Schedule.getFinalCycle();
+       Cycle <= LastCycle; ++Cycle) {
+    std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
+    // This inner loop schedules each instruction in the cycle.
+    for (SUnit *CI : CycleInstrs) {
+      if (CI->getInstr()->isPHI())
+        continue;
+      unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
+      MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
+      updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
+      KernelBB->push_back(NewMI);
+      InstrMap[NewMI] = CI->getInstr();
+    }
+  }
+
+  // Copy any terminator instructions to the new kernel, and update
+  // names as needed.
+  for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
+                                   E = BB->instr_end();
+       I != E; ++I) {
+    MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+    updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
+    KernelBB->push_back(NewMI);
+    InstrMap[NewMI] = &*I;
+  }
+
+  KernelBB->transferSuccessors(BB);
+  KernelBB->replaceSuccessor(BB, KernelBB);
+
+  generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
+                       VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
+  generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
+               InstrMap, MaxStageCount, MaxStageCount, false);
+
+  DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+
+  SmallVector<MachineBasicBlock *, 4> EpilogBBs;
+  // Generate the epilog instructions to complete the pipeline.
+  generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
+                 PrologBBs);
+
+  // We need this step because the register allocation doesn't handle some
+  // situations well, so we insert copies to help out.
+  splitLifetimes(KernelBB, EpilogBBs, Schedule);
+
+  // Remove dead instructions due to loop induction variables.
+  removeDeadInstructions(KernelBB, EpilogBBs);
+
+  // Add branches between prolog and epilog blocks.
+  addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
+
+  // Remove the original loop since it's no longer referenced.
+  BB->clear();
+  BB->eraseFromParent();
+
+  delete[] VRMap;
+}
+
+/// Generate the pipeline prolog code.
+void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
+                                       MachineBasicBlock *KernelBB,
+                                       ValueMapTy *VRMap,
+                                       MBBVectorTy &PrologBBs) {
+  MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
+  assert(PreheaderBB != NULL &&
+         "Need to add code to handle loops w/o preheader");
+  MachineBasicBlock *PredBB = PreheaderBB;
+  InstrMapTy InstrMap;
+
+  // Generate a basic block for each stage, not including the last stage,
+  // which will be generated in the kernel. Each basic block may contain
+  // instructions from multiple stages/iterations.
+  for (unsigned i = 0; i < LastStage; ++i) {
+    // Create and insert the prolog basic block prior to the original loop
+    // basic block.  The original loop is removed later.
+    MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+    PrologBBs.push_back(NewBB);
+    MF.insert(BB->getIterator(), NewBB);
+    NewBB->transferSuccessors(PredBB);
+    PredBB->addSuccessor(NewBB);
+    PredBB = NewBB;
+
+    // Generate instructions for each appropriate stage. Process instructions
+    // in original program order.
+    for (int StageNum = i; StageNum >= 0; --StageNum) {
+      for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+                                       BBE = BB->getFirstTerminator();
+           BBI != BBE; ++BBI) {
+        if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
+          if (BBI->isPHI())
+            continue;
+          MachineInstr *NewMI =
+              cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
+          updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
+                            VRMap);
+          NewBB->push_back(NewMI);
+          InstrMap[NewMI] = &*BBI;
+        }
+      }
+    }
+    rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
+    DEBUG({
+      dbgs() << "prolog:\n";
+      NewBB->dump();
+    });
+  }
+
+  PredBB->replaceSuccessor(BB, KernelBB);
+
+  // Check if we need to remove the branch from the preheader to the original
+  // loop, and replace it with a branch to the new loop.
+  unsigned numBranches = TII->removeBranch(*PreheaderBB);
+  if (numBranches) {
+    SmallVector<MachineOperand, 0> Cond;
+    TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
+  }
+}
+
+/// Generate the pipeline epilog code. The epilog code finishes the iterations
+/// that were started in either the prolog or the kernel.  We create a basic
+/// block for each stage that needs to complete.
+void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
+                                       MachineBasicBlock *KernelBB,
+                                       ValueMapTy *VRMap,
+                                       MBBVectorTy &EpilogBBs,
+                                       MBBVectorTy &PrologBBs) {
+  // We need to change the branch from the kernel to the first epilog block, so
+  // this call to analyze branch uses the kernel rather than the original BB.
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+  SmallVector<MachineOperand, 4> Cond;
+  bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
+  assert(!checkBranch && "generateEpilog must be able to analyze the branch");
+  if (checkBranch)
+    return;
+
+  MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
+  if (*LoopExitI == KernelBB)
+    ++LoopExitI;
+  assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
+  MachineBasicBlock *LoopExitBB = *LoopExitI;
+
+  MachineBasicBlock *PredBB = KernelBB;
+  MachineBasicBlock *EpilogStart = LoopExitBB;
+  InstrMapTy InstrMap;
+
+  // Generate a basic block for each stage, not including the last stage,
+  // which was generated for the kernel.  Each basic block may contain
+  // instructions from multiple stages/iterations.
+  int EpilogStage = LastStage + 1;
+  for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
+    MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
+    EpilogBBs.push_back(NewBB);
+    MF.insert(BB->getIterator(), NewBB);
+
+    PredBB->replaceSuccessor(LoopExitBB, NewBB);
+    NewBB->addSuccessor(LoopExitBB);
+
+    if (EpilogStart == LoopExitBB)
+      EpilogStart = NewBB;
+
+    // Add instructions to the epilog depending on the current block.
+    // Process instructions in original program order.
+    for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
+      for (auto &BBI : *BB) {
+        if (BBI.isPHI())
+          continue;
+        MachineInstr *In = &BBI;
+        if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
+          MachineInstr *NewMI = cloneInstr(In, EpilogStage - LastStage, 0);
+          updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
+          NewBB->push_back(NewMI);
+          InstrMap[NewMI] = In;
+        }
+      }
+    }
+    generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
+                         VRMap, InstrMap, LastStage, EpilogStage, i == 1);
+    generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
+                 InstrMap, LastStage, EpilogStage, i == 1);
+    PredBB = NewBB;
+
+    DEBUG({
+      dbgs() << "epilog:\n";
+      NewBB->dump();
+    });
+  }
+
+  // Fix any Phi nodes in the loop exit block.
+  for (MachineInstr &MI : *LoopExitBB) {
+    if (!MI.isPHI())
+      break;
+    for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
+      MachineOperand &MO = MI.getOperand(i);
+      if (MO.getMBB() == BB)
+        MO.setMBB(PredBB);
+    }
+  }
+
+  // Create a branch to the new epilog from the kernel.
+  // Remove the original branch and add a new branch to the epilog.
+  TII->removeBranch(*KernelBB);
+  TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+  // Add a branch to the loop exit.
+  if (EpilogBBs.size() > 0) {
+    MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
+    SmallVector<MachineOperand, 4> Cond1;
+    TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
+  }
+}
+
+/// Replace all uses of FromReg that appear outside the specified
+/// basic block with ToReg.
+static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
+                                    MachineBasicBlock *MBB,
+                                    MachineRegisterInfo &MRI,
+                                    LiveIntervals &LIS) {
+  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
+                                         E = MRI.use_end();
+       I != E;) {
+    MachineOperand &O = *I;
+    ++I;
+    if (O.getParent()->getParent() != MBB)
+      O.setReg(ToReg);
+  }
+  if (!LIS.hasInterval(ToReg))
+    LIS.createEmptyInterval(ToReg);
+}
+
+/// Return true if the register has a use that occurs outside the
+/// specified loop.
+static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
+                            MachineRegisterInfo &MRI) {
+  for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
+                                         E = MRI.use_end();
+       I != E; ++I)
+    if (I->getParent()->getParent() != BB)
+      return true;
+  return false;
+}
+
+/// Generate Phis for the specific block in the generated pipelined code.
+/// This function looks at the Phis from the original code to guide the
+/// creation of new Phis.
+void SwingSchedulerDAG::generateExistingPhis(
+    MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+    MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
+    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+    bool IsLast) {
+  // Compute the stage number for the inital value of the Phi, which
+  // comes from the prolog. The prolog to use depends on to which kernel/
+  // epilog that we're adding the Phi.
+  unsigned PrologStage = 0;
+  unsigned PrevStage = 0;
+  bool InKernel = (LastStageNum == CurStageNum);
+  if (InKernel) {
+    PrologStage = LastStageNum - 1;
+    PrevStage = CurStageNum;
+  } else {
+    PrologStage = LastStageNum - (CurStageNum - LastStageNum);
+    PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
+  }
+
+  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+                                   BBE = BB->getFirstNonPHI();
+       BBI != BBE; ++BBI) {
+    unsigned Def = BBI->getOperand(0).getReg();
+
+    unsigned InitVal = 0;
+    unsigned LoopVal = 0;
+    getPhiRegs(*BBI, BB, InitVal, LoopVal);
+
+    unsigned PhiOp1 = 0;
+    // The Phi value from the loop body typically is defined in the loop, but
+    // not always. So, we need to check if the value is defined in the loop.
+    unsigned PhiOp2 = LoopVal;
+    if (VRMap[LastStageNum].count(LoopVal))
+      PhiOp2 = VRMap[LastStageNum][LoopVal];
+
+    int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
+    int LoopValStage =
+        Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
+    unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
+    if (NumStages == 0) {
+      // We don't need to generate a Phi anymore, but we need to rename any uses
+      // of the Phi value.
+      unsigned NewReg = VRMap[PrevStage][LoopVal];
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
+                            Def, NewReg);
+      if (VRMap[CurStageNum].count(LoopVal))
+        VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
+    }
+    // Adjust the number of Phis needed depending on the number of prologs left,
+    // and the distance from where the Phi is first scheduled.
+    unsigned NumPhis = NumStages;
+    if (!InKernel && (int)PrologStage < LoopValStage)
+      // The NumPhis is the maximum number of new Phis needed during the steady
+      // state. If the Phi has not been scheduled in current prolog, then we
+      // need to generate less Phis.
+      NumPhis = std::max((int)NumPhis - (int)(LoopValStage - PrologStage), 1);
+    // The number of Phis cannot exceed the number of prolog stages. Each
+    // stage can potentially define two values.
+    NumPhis = std::min(NumPhis, PrologStage + 2);
+
+    unsigned NewReg = 0;
+
+    unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
+    // In the epilog, we may need to look back one stage to get the correct
+    // Phi name because the epilog and prolog blocks execute the same stage.
+    // The correct name is from the previous block only when the Phi has
+    // been completely scheduled prior to the epilog, and Phi value is not
+    // needed in multiple stages.
+    int StageDiff = 0;
+    if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
+        NumPhis == 1)
+      StageDiff = 1;
+    // Adjust the computations below when the phi and the loop definition
+    // are scheduled in different stages.
+    if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
+      StageDiff = StageScheduled - LoopValStage;
+    for (unsigned np = 0; np < NumPhis; ++np) {
+      // If the Phi hasn't been scheduled, then use the initial Phi operand
+      // value. Otherwise, use the scheduled version of the instruction. This
+      // is a little complicated when a Phi references another Phi.
+      if (np > PrologStage || StageScheduled >= (int)LastStageNum)
+        PhiOp1 = InitVal;
+      // Check if the Phi has already been scheduled in a prolog stage.
+      else if (PrologStage >= AccessStage + StageDiff + np &&
+               VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
+        PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
+      // Check if the Phi has already been scheduled, but the loop intruction
+      // is either another Phi, or doesn't occur in the loop.
+      else if (PrologStage >= AccessStage + StageDiff + np) {
+        // If the Phi references another Phi, we need to examine the other
+        // Phi to get the correct value.
+        PhiOp1 = LoopVal;
+        MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
+        int Indirects = 1;
+        while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
+          int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
+          if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
+            PhiOp1 = getInitPhiReg(*InstOp1, BB);
+          else
+            PhiOp1 = getLoopPhiReg(*InstOp1, BB);
+          InstOp1 = MRI.getVRegDef(PhiOp1);
+          int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
+          int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
+          if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
+              VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
+            PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
+            break;
+          }
+          ++Indirects;
+        }
+      } else
+        PhiOp1 = InitVal;
+      // If this references a generated Phi in the kernel, get the Phi operand
+      // from the incoming block.
+      if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
+        if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+          PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+
+      MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
+      bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
+      // In the epilog, a map lookup is needed to get the value from the kernel,
+      // or previous epilog block. How is does this depends on if the
+      // instruction is scheduled in the previous block.
+      if (!InKernel) {
+        int StageDiffAdj = 0;
+        if (LoopValStage != -1 && StageScheduled > LoopValStage)
+          StageDiffAdj = StageScheduled - LoopValStage;
+        // Use the loop value defined in the kernel, unless the kernel
+        // contains the last definition of the Phi.
+        if (np == 0 && PrevStage == LastStageNum &&
+            (StageScheduled != 0 || LoopValStage != 0) &&
+            VRMap[PrevStage - StageDiffAdj].count(LoopVal))
+          PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
+        // Use the value defined by the Phi. We add one because we switch
+        // from looking at the loop value to the Phi definition.
+        else if (np > 0 && PrevStage == LastStageNum &&
+                 VRMap[PrevStage - np + 1].count(Def))
+          PhiOp2 = VRMap[PrevStage - np + 1][Def];
+        // Use the loop value defined in the kernel.
+        else if ((unsigned)LoopValStage + StageDiffAdj > PrologStage + 1 &&
+                 VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
+          PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
+        // Use the value defined by the Phi, unless we're generating the first
+        // epilog and the Phi refers to a Phi in a different stage.
+        else if (VRMap[PrevStage - np].count(Def) &&
+                 (!LoopDefIsPhi || PrevStage != LastStageNum))
+          PhiOp2 = VRMap[PrevStage - np][Def];
+      }
+
+      // Check if we can reuse an existing Phi. This occurs when a Phi
+      // references another Phi, and the other Phi is scheduled in an
+      // earlier stage. We can try to reuse an existing Phi up until the last
+      // stage of the current Phi.
+      if (LoopDefIsPhi && (int)PrologStage >= StageScheduled) {
+        int LVNumStages = Schedule.getStagesForPhi(LoopVal);
+        int StageDiff = (StageScheduled - LoopValStage);
+        LVNumStages -= StageDiff;
+        if (LVNumStages > (int)np) {
+          NewReg = PhiOp2;
+          unsigned ReuseStage = CurStageNum;
+          if (Schedule.isLoopCarried(this, *PhiInst))
+            ReuseStage -= LVNumStages;
+          // Check if the Phi to reuse has been generated yet. If not, then
+          // there is nothing to reuse.
+          if (VRMap[ReuseStage].count(LoopVal)) {
+            NewReg = VRMap[ReuseStage][LoopVal];
+
+            rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                  &*BBI, Def, NewReg);
+            // Update the map with the new Phi name.
+            VRMap[CurStageNum - np][Def] = NewReg;
+            PhiOp2 = NewReg;
+            if (VRMap[LastStageNum - np - 1].count(LoopVal))
+              PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+            if (IsLast && np == NumPhis - 1)
+              replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+            continue;
+          }
+        } else if (InKernel && StageDiff > 0 &&
+                   VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+          PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
+      }
+
+      const TargetRegisterClass *RC = MRI.getRegClass(Def);
+      NewReg = MRI.createVirtualRegister(RC);
+
+      MachineInstrBuilder NewPhi =
+          BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+                  TII->get(TargetOpcode::PHI), NewReg);
+      NewPhi.addReg(PhiOp1).addMBB(BB1);
+      NewPhi.addReg(PhiOp2).addMBB(BB2);
+      if (np == 0)
+        InstrMap[NewPhi] = &*BBI;
+
+      // We define the Phis after creating the new pipelined code, so
+      // we need to rename the Phi values in scheduled instructions.
+
+      unsigned PrevReg = 0;
+      if (InKernel && VRMap[PrevStage - np].count(LoopVal))
+        PrevReg = VRMap[PrevStage - np][LoopVal];
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
+                            Def, NewReg, PrevReg);
+      // If the Phi has been scheduled, use the new name for rewriting.
+      if (VRMap[CurStageNum - np].count(Def)) {
+        unsigned R = VRMap[CurStageNum - np][Def];
+        rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
+                              R, NewReg);
+      }
+
+      // Check if we need to rename any uses that occurs after the loop. The
+      // register to replace depends on whether the Phi is scheduled in the
+      // epilog.
+      if (IsLast && np == NumPhis - 1)
+        replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+
+      // In the kernel, a dependent Phi uses the value from this Phi.
+      if (InKernel)
+        PhiOp2 = NewReg;
+
+      // Update the map with the new Phi name.
+      VRMap[CurStageNum - np][Def] = NewReg;
+    }
+
+    while (NumPhis++ < NumStages) {
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
+                            &*BBI, Def, NewReg, 0);
+    }
+
+    // Check if we need to rename a Phi that has been eliminated due to
+    // scheduling.
+    if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
+      replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
+  }
+}
+
+/// Generate Phis for the specified block in the generated pipelined code.
+/// These are new Phis needed because the definition is scheduled after the
+/// use in the pipelened sequence.
+void SwingSchedulerDAG::generatePhis(
+    MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+    MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
+    InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+    bool IsLast) {
+  // Compute the stage number that contains the initial Phi value, and
+  // the Phi from the previous stage.
+  unsigned PrologStage = 0;
+  unsigned PrevStage = 0;
+  unsigned StageDiff = CurStageNum - LastStageNum;
+  bool InKernel = (StageDiff == 0);
+  if (InKernel) {
+    PrologStage = LastStageNum - 1;
+    PrevStage = CurStageNum;
+  } else {
+    PrologStage = LastStageNum - StageDiff;
+    PrevStage = LastStageNum + StageDiff - 1;
+  }
+
+  for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
+                                   BBE = BB->instr_end();
+       BBI != BBE; ++BBI) {
+    for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
+      MachineOperand &MO = BBI->getOperand(i);
+      if (!MO.isReg() || !MO.isDef() ||
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+
+      int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
+      assert(StageScheduled != -1 && "Expecting scheduled instruction.");
+      unsigned Def = MO.getReg();
+      unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
+      // An instruction scheduled in stage 0 and is used after the loop
+      // requires a phi in the epilog for the last definition from either
+      // the kernel or prolog.
+      if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
+          hasUseAfterLoop(Def, BB, MRI))
+        NumPhis = 1;
+      if (!InKernel && (unsigned)StageScheduled > PrologStage)
+        continue;
+
+      unsigned PhiOp2 = VRMap[PrevStage][Def];
+      if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+        if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+          PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+      // The number of Phis can't exceed the number of prolog stages. The
+      // prolog stage number is zero based.
+      if (NumPhis > PrologStage + 1 - StageScheduled)
+        NumPhis = PrologStage + 1 - StageScheduled;
+      for (unsigned np = 0; np < NumPhis; ++np) {
+        unsigned PhiOp1 = VRMap[PrologStage][Def];
+        if (np <= PrologStage)
+          PhiOp1 = VRMap[PrologStage - np][Def];
+        if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
+          if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+            PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+          if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
+            PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+        }
+        if (!InKernel)
+          PhiOp2 = VRMap[PrevStage - np][Def];
+
+        const TargetRegisterClass *RC = MRI.getRegClass(Def);
+        unsigned NewReg = MRI.createVirtualRegister(RC);
+
+        MachineInstrBuilder NewPhi =
+            BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+                    TII->get(TargetOpcode::PHI), NewReg);
+        NewPhi.addReg(PhiOp1).addMBB(BB1);
+        NewPhi.addReg(PhiOp2).addMBB(BB2);
+        if (np == 0)
+          InstrMap[NewPhi] = &*BBI;
+
+        // Rewrite uses and update the map. The actions depend upon whether
+        // we generating code for the kernel or epilog blocks.
+        if (InKernel) {
+          rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                &*BBI, PhiOp1, NewReg);
+          rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                &*BBI, PhiOp2, NewReg);
+
+          PhiOp2 = NewReg;
+          VRMap[PrevStage - np - 1][Def] = NewReg;
+        } else {
+          VRMap[CurStageNum - np][Def] = NewReg;
+          if (np == NumPhis - 1)
+            rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
+                                  &*BBI, Def, NewReg);
+        }
+        if (IsLast && np == NumPhis - 1)
+          replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+      }
+    }
+  }
+}
+
+/// Remove instructions that generate values with no uses.
+/// Typically, these are induction variable operations that generate values
+/// used in the loop itself.  A dead instruction has a definition with
+/// no uses, or uses that occur in the original loop only.
+void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
+                                               MBBVectorTy &EpilogBBs) {
+  // For each epilog block, check that the value defined by each instruction
+  // is used.  If not, delete it.
+  for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
+                                     MBE = EpilogBBs.rend();
+       MBB != MBE; ++MBB)
+    for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
+                                                   ME = (*MBB)->instr_rend();
+         MI != ME;) {
+      // From DeadMachineInstructionElem. Don't delete inline assembly.
+      if (MI->isInlineAsm()) {
+        ++MI;
+        continue;
+      }
+      bool SawStore = false;
+      // Check if it's safe to remove the instruction due to side effects.
+      // We can, and want to, remove Phis here.
+      if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
+        ++MI;
+        continue;
+      }
+      bool used = true;
+      for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+                                      MOE = MI->operands_end();
+           MOI != MOE; ++MOI) {
+        if (!MOI->isReg() || !MOI->isDef())
+          continue;
+        unsigned reg = MOI->getReg();
+        unsigned realUses = 0;
+        for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
+                                               EI = MRI.use_end();
+             UI != EI; ++UI) {
+          // Check if there are any uses that occur only in the original
+          // loop.  If so, that's not a real use.
+          if (UI->getParent()->getParent() != BB) {
+            realUses++;
+            used = true;
+            break;
+          }
+        }
+        if (realUses > 0)
+          break;
+        used = false;
+      }
+      if (!used) {
+        MI++->eraseFromParent();
+        continue;
+      }
+      ++MI;
+    }
+  // In the kernel block, check if we can remove a Phi that generates a value
+  // used in an instruction removed in the epilog block.
+  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+                                   BBE = KernelBB->getFirstNonPHI();
+       BBI != BBE;) {
+    MachineInstr *MI = &*BBI;
+    ++BBI;
+    unsigned reg = MI->getOperand(0).getReg();
+    if (MRI.use_begin(reg) == MRI.use_end()) {
+      MI->eraseFromParent();
+    }
+  }
+}
+
+/// For loop carried definitions, we split the lifetime of a virtual register
+/// that has uses past the definition in the next iteration. A copy with a new
+/// virtual register is inserted before the definition, which helps with
+/// generating a better register assignment.
+///
+///   v1 = phi(a, v2)     v1 = phi(a, v2)
+///   v2 = phi(b, v3)     v2 = phi(b, v3)
+///   v3 = ..             v4 = copy v1
+///   .. = V1             v3 = ..
+///                       .. = v4
+void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
+                                       MBBVectorTy &EpilogBBs,
+                                       SMSchedule &Schedule) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+                                   BBF = KernelBB->getFirstNonPHI();
+       BBI != BBF; ++BBI) {
+    unsigned Def = BBI->getOperand(0).getReg();
+    // Check for any Phi definition that used as an operand of another Phi
+    // in the same block.
+    for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
+                                                 E = MRI.use_instr_end();
+         I != E; ++I) {
+      if (I->isPHI() && I->getParent() == KernelBB) {
+        // Get the loop carried definition.
+        unsigned LCDef = getLoopPhiReg(*BBI, KernelBB);
+        if (!LCDef)
+          continue;
+        MachineInstr *MI = MRI.getVRegDef(LCDef);
+        if (!MI || MI->getParent() != KernelBB || MI->isPHI())
+          continue;
+        // Search through the rest of the block looking for uses of the Phi
+        // definition. If one occurs, then split the lifetime.
+        unsigned SplitReg = 0;
+        for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
+                                    KernelBB->instr_end()))
+          if (BBJ.readsRegister(Def)) {
+            // We split the lifetime when we find the first use.
+            if (SplitReg == 0) {
+              SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
+              BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+                      TII->get(TargetOpcode::COPY), SplitReg)
+                  .addReg(Def);
+            }
+            BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
+          }
+        if (!SplitReg)
+          continue;
+        // Search through each of the epilog blocks for any uses to be renamed.
+        for (auto &Epilog : EpilogBBs)
+          for (auto &I : *Epilog)
+            if (I.readsRegister(Def))
+              I.substituteRegister(Def, SplitReg, 0, *TRI);
+        break;
+      }
+    }
+  }
+}
+
+/// Remove the incoming block from the Phis in a basic block.
+static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
+  for (MachineInstr &MI : *BB) {
+    if (!MI.isPHI())
+      break;
+    for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
+      if (MI.getOperand(i + 1).getMBB() == Incoming) {
+        MI.RemoveOperand(i + 1);
+        MI.RemoveOperand(i);
+        break;
+      }
+  }
+}
+
+/// Create branches from each prolog basic block to the appropriate epilog
+/// block.  These edges are needed if the loop ends before reaching the
+/// kernel.
+void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
+                                    MachineBasicBlock *KernelBB,
+                                    MBBVectorTy &EpilogBBs,
+                                    SMSchedule &Schedule, ValueMapTy *VRMap) {
+  assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
+  MachineInstr *IndVar = Pass.LI.LoopInductionVar;
+  MachineInstr *Cmp = Pass.LI.LoopCompare;
+  MachineBasicBlock *LastPro = KernelBB;
+  MachineBasicBlock *LastEpi = KernelBB;
+
+  // Start from the blocks connected to the kernel and work "out"
+  // to the first prolog and the last epilog blocks.
+  SmallVector<MachineInstr *, 4> PrevInsts;
+  unsigned MaxIter = PrologBBs.size() - 1;
+  unsigned LC = UINT_MAX;
+  unsigned LCMin = UINT_MAX;
+  for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
+    // Add branches to the prolog that go to the corresponding
+    // epilog, and the fall-thru prolog/kernel block.
+    MachineBasicBlock *Prolog = PrologBBs[j];
+    MachineBasicBlock *Epilog = EpilogBBs[i];
+    // We've executed one iteration, so decrement the loop count and check for
+    // the loop end.
+    SmallVector<MachineOperand, 4> Cond;
+    // Check if the LOOP0 has already been removed. If so, then there is no need
+    // to reduce the trip count.
+    if (LC != 0)
+      LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
+                                MaxIter);
+
+    // Record the value of the first trip count, which is used to determine if
+    // branches and blocks can be removed for constant trip counts.
+    if (LCMin == UINT_MAX)
+      LCMin = LC;
+
+    unsigned numAdded = 0;
+    if (TargetRegisterInfo::isVirtualRegister(LC)) {
+      Prolog->addSuccessor(Epilog);
+      numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
+    } else if (j >= LCMin) {
+      Prolog->addSuccessor(Epilog);
+      Prolog->removeSuccessor(LastPro);
+      LastEpi->removeSuccessor(Epilog);
+      numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
+      removePhis(Epilog, LastEpi);
+      // Remove the blocks that are no longer referenced.
+      if (LastPro != LastEpi) {
+        LastEpi->clear();
+        LastEpi->eraseFromParent();
+      }
+      LastPro->clear();
+      LastPro->eraseFromParent();
+    } else {
+      numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
+      removePhis(Epilog, Prolog);
+    }
+    LastPro = Prolog;
+    LastEpi = Epilog;
+    for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
+                                                   E = Prolog->instr_rend();
+         I != E && numAdded > 0; ++I, --numAdded)
+      updateInstruction(&*I, false, j, 0, Schedule, VRMap);
+  }
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  unsigned BaseReg;
+  int64_t Offset;
+  if (!TII->getMemOpBaseRegImmOfs(MI, BaseReg, Offset, TRI))
+    return false;
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  // Check if there is a Phi. If so, get the definition in the loop.
+  MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+  if (BaseDef && BaseDef->isPHI()) {
+    BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+    BaseDef = MRI.getVRegDef(BaseReg);
+  }
+  if (!BaseDef)
+    return false;
+
+  int D = 0;
+  if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+    return false;
+
+  Delta = D;
+  return true;
+}
+
+/// Update the memory operand with a new offset when the pipeliner
+/// generates a new copy of the instruction that refers to a
+/// different memory location.
+void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
+                                          MachineInstr &OldMI, unsigned Num) {
+  if (Num == 0)
+    return;
+  // If the instruction has memory operands, then adjust the offset
+  // when the instruction appears in different stages.
+  unsigned NumRefs = NewMI.memoperands_end() - NewMI.memoperands_begin();
+  if (NumRefs == 0)
+    return;
+  MachineInstr::mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NumRefs);
+  unsigned Refs = 0;
+  for (MachineMemOperand *MMO : NewMI.memoperands()) {
+    if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
+        (!MMO->getValue())) {
+      NewMemRefs[Refs++] = MMO;
+      continue;
+    }
+    unsigned Delta;
+    if (computeDelta(OldMI, Delta)) {
+      int64_t AdjOffset = Delta * Num;
+      NewMemRefs[Refs++] =
+          MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize());
+    } else
+      NewMemRefs[Refs++] = MF.getMachineMemOperand(MMO, 0, UINT64_MAX);
+  }
+  NewMI.setMemRefs(NewMemRefs, NewMemRefs + NumRefs);
+}
+
+/// Clone the instruction for the new pipelined loop and update the
+/// memory operands, if needed.
+MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
+                                            unsigned CurStageNum,
+                                            unsigned InstStageNum) {
+  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+  // Check for tied operands in inline asm instructions. This should be handled
+  // elsewhere, but I'm not sure of the best solution.
+  if (OldMI->isInlineAsm())
+    for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+      const auto &MO = OldMI->getOperand(i);
+      if (MO.isReg() && MO.isUse())
+        break;
+      unsigned UseIdx;
+      if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
+        NewMI->tieOperands(i, UseIdx);
+    }
+  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+  return NewMI;
+}
+
+/// Clone the instruction for the new pipelined loop. If needed, this
+/// function updates the instruction using the values saved in the
+/// InstrChanges structure.
+MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
+                                                     unsigned CurStageNum,
+                                                     unsigned InstStageNum,
+                                                     SMSchedule &Schedule) {
+  MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+      InstrChanges.find(getSUnit(OldMI));
+  if (It != InstrChanges.end()) {
+    std::pair<unsigned, int64_t> RegAndOffset = It->second;
+    unsigned BasePos, OffsetPos;
+    if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
+      return nullptr;
+    int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
+    MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
+    if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
+      NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
+    NewMI->getOperand(OffsetPos).setImm(NewOffset);
+  }
+  updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+  return NewMI;
+}
+
+/// Update the machine instruction with new virtual registers.  This
+/// function may change the defintions and/or uses.
+void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
+                                          unsigned CurStageNum,
+                                          unsigned InstrStageNum,
+                                          SMSchedule &Schedule,
+                                          ValueMapTy *VRMap) {
+  for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+    MachineOperand &MO = NewMI->getOperand(i);
+    if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+      continue;
+    unsigned reg = MO.getReg();
+    if (MO.isDef()) {
+      // Create a new virtual register for the definition.
+      const TargetRegisterClass *RC = MRI.getRegClass(reg);
+      unsigned NewReg = MRI.createVirtualRegister(RC);
+      MO.setReg(NewReg);
+      VRMap[CurStageNum][reg] = NewReg;
+      if (LastDef)
+        replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+    } else if (MO.isUse()) {
+      MachineInstr *Def = MRI.getVRegDef(reg);
+      // Compute the stage that contains the last definition for instruction.
+      int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
+      unsigned StageNum = CurStageNum;
+      if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
+        // Compute the difference in stages between the defintion and the use.
+        unsigned StageDiff = (InstrStageNum - DefStageNum);
+        // Make an adjustment to get the last definition.
+        StageNum -= StageDiff;
+      }
+      if (VRMap[StageNum].count(reg))
+        MO.setReg(VRMap[StageNum][reg]);
+    }
+  }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+  SmallPtrSet<MachineInstr *, 8> Visited;
+  MachineInstr *Def = MRI.getVRegDef(Reg);
+  while (Def->isPHI()) {
+    if (!Visited.insert(Def).second)
+      break;
+    for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+      if (Def->getOperand(i + 1).getMBB() == BB) {
+        Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+        break;
+      }
+  }
+  return Def;
+}
+
+/// Return the new name for the value from the previous stage.
+unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
+                                          unsigned LoopVal, unsigned LoopStage,
+                                          ValueMapTy *VRMap,
+                                          MachineBasicBlock *BB) {
+  unsigned PrevVal = 0;
+  if (StageNum > PhiStage) {
+    MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
+    if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
+      // The name is defined in the previous stage.
+      PrevVal = VRMap[StageNum - 1][LoopVal];
+    else if (VRMap[StageNum].count(LoopVal))
+      // The previous name is defined in the current stage when the instruction
+      // order is swapped.
+      PrevVal = VRMap[StageNum][LoopVal];
+    else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
+      // The loop value hasn't yet been scheduled.
+      PrevVal = LoopVal;
+    else if (StageNum == PhiStage + 1)
+      // The loop value is another phi, which has not been scheduled.
+      PrevVal = getInitPhiReg(*LoopInst, BB);
+    else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
+      // The loop value is another phi, which has been scheduled.
+      PrevVal =
+          getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
+                        LoopStage, VRMap, BB);
+  }
+  return PrevVal;
+}
+
+/// Rewrite the Phi values in the specified block to use the mappings
+/// from the initial operand. Once the Phi is scheduled, we switch
+/// to using the loop value instead of the Phi value, so those names
+/// do not need to be rewritten.
+void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
+                                         unsigned StageNum,
+                                         SMSchedule &Schedule,
+                                         ValueMapTy *VRMap,
+                                         InstrMapTy &InstrMap) {
+  for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+                                   BBE = BB->getFirstNonPHI();
+       BBI != BBE; ++BBI) {
+    unsigned InitVal = 0;
+    unsigned LoopVal = 0;
+    getPhiRegs(*BBI, BB, InitVal, LoopVal);
+    unsigned PhiDef = BBI->getOperand(0).getReg();
+
+    unsigned PhiStage =
+        (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
+    unsigned LoopStage =
+        (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
+    unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
+    if (NumPhis > StageNum)
+      NumPhis = StageNum;
+    for (unsigned np = 0; np <= NumPhis; ++np) {
+      unsigned NewVal =
+          getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
+      if (!NewVal)
+        NewVal = InitVal;
+      rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &*BBI,
+                            PhiDef, NewVal);
+    }
+  }
+}
+
+/// Rewrite a previously scheduled instruction to use the register value
+/// from the new instruction. Make sure the instruction occurs in the
+/// basic block, and we don't change the uses in the new instruction.
+void SwingSchedulerDAG::rewriteScheduledInstr(
+    MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
+    unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
+    unsigned NewReg, unsigned PrevReg) {
+  bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
+  int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
+  // Rewrite uses that have been scheduled already to use the new
+  // Phi register.
+  for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
+                                         EI = MRI.use_end();
+       UI != EI;) {
+    MachineOperand &UseOp = *UI;
+    MachineInstr *UseMI = UseOp.getParent();
+    ++UI;
+    if (UseMI->getParent() != BB)
+      continue;
+    if (UseMI->isPHI()) {
+      if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
+        continue;
+      if (getLoopPhiReg(*UseMI, BB) != OldReg)
+        continue;
+    }
+    InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
+    assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
+    SUnit *OrigMISU = getSUnit(OrigInstr->second);
+    int StageSched = Schedule.stageScheduled(OrigMISU);
+    int CycleSched = Schedule.cycleScheduled(OrigMISU);
+    unsigned ReplaceReg = 0;
+    // This is the stage for the scheduled instruction.
+    if (StagePhi == StageSched && Phi->isPHI()) {
+      int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
+      if (PrevReg && InProlog)
+        ReplaceReg = PrevReg;
+      else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
+               (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
+        ReplaceReg = PrevReg;
+      else
+        ReplaceReg = NewReg;
+    }
+    // The scheduled instruction occurs before the scheduled Phi, and the
+    // Phi is not loop carried.
+    if (!InProlog && StagePhi + 1 == StageSched &&
+        !Schedule.isLoopCarried(this, *Phi))
+      ReplaceReg = NewReg;
+    if (StagePhi > StageSched && Phi->isPHI())
+      ReplaceReg = NewReg;
+    if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
+      ReplaceReg = NewReg;
+    if (ReplaceReg) {
+      MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+      UseOp.setReg(ReplaceReg);
+    }
+  }
+}
+
+/// Check if we can change the instruction to use an offset value from the
+/// previous iteration. If so, return true and set the base and offset values
+/// so that we can rewrite the load, if necessary.
+///   v1 = Phi(v0, v3)
+///   v2 = load v1, 0
+///   v3 = post_store v1, 4, x
+/// This function enables the load to be rewritten as v2 = load v3, 4.
+bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
+                                              unsigned &BasePos,
+                                              unsigned &OffsetPos,
+                                              unsigned &NewBase,
+                                              int64_t &Offset) {
+  // Get the load instruction.
+  if (TII->isPostIncrement(*MI))
+    return false;
+  unsigned BasePosLd, OffsetPosLd;
+  if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
+    return false;
+  unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
+
+  // Look for the Phi instruction.
+  MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
+  MachineInstr *Phi = MRI.getVRegDef(BaseReg);
+  if (!Phi || !Phi->isPHI())
+    return false;
+  // Get the register defined in the loop block.
+  unsigned PrevReg = getLoopPhiReg(*Phi, MI->getParent());
+  if (!PrevReg)
+    return false;
+
+  // Check for the post-increment load/store instruction.
+  MachineInstr *PrevDef = MRI.getVRegDef(PrevReg);
+  if (!PrevDef || PrevDef == MI)
+    return false;
+
+  if (!TII->isPostIncrement(*PrevDef))
+    return false;
+
+  unsigned BasePos1 = 0, OffsetPos1 = 0;
+  if (!TII->getBaseAndOffsetPosition(*PrevDef, BasePos1, OffsetPos1))
+    return false;
+
+  // Make sure offset values are both positive or both negative.
+  int64_t LoadOffset = MI->getOperand(OffsetPosLd).getImm();
+  int64_t StoreOffset = PrevDef->getOperand(OffsetPos1).getImm();
+  if ((LoadOffset >= 0) != (StoreOffset >= 0))
+    return false;
+
+  // Set the return value once we determine that we return true.
+  BasePos = BasePosLd;
+  OffsetPos = OffsetPosLd;
+  NewBase = PrevReg;
+  Offset = StoreOffset;
+  return true;
+}
+
+/// Apply changes to the instruction if needed. The changes are need
+/// to improve the scheduling and depend up on the final schedule.
+MachineInstr *SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
+                                                  SMSchedule &Schedule,
+                                                  bool UpdateDAG) {
+  SUnit *SU = getSUnit(MI);
+  DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
+      InstrChanges.find(SU);
+  if (It != InstrChanges.end()) {
+    std::pair<unsigned, int64_t> RegAndOffset = It->second;
+    unsigned BasePos, OffsetPos;
+    if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+      return nullptr;
+    unsigned BaseReg = MI->getOperand(BasePos).getReg();
+    MachineInstr *LoopDef = findDefInLoop(BaseReg);
+    int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
+    int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
+    int BaseStageNum = Schedule.stageScheduled(SU);
+    int BaseCycleNum = Schedule.cycleScheduled(SU);
+    if (BaseStageNum < DefStageNum) {
+      MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+      int OffsetDiff = DefStageNum - BaseStageNum;
+      if (DefCycleNum < BaseCycleNum) {
+        NewMI->getOperand(BasePos).setReg(RegAndOffset.first);
+        if (OffsetDiff > 0)
+          --OffsetDiff;
+      }
+      int64_t NewOffset =
+          MI->getOperand(OffsetPos).getImm() + RegAndOffset.second * OffsetDiff;
+      NewMI->getOperand(OffsetPos).setImm(NewOffset);
+      if (UpdateDAG) {
+        SU->setInstr(NewMI);
+        MISUnitMap[NewMI] = SU;
+      }
+      NewMIs.insert(NewMI);
+      return NewMI;
+    }
+  }
+  return nullptr;
+}
+
+/// Return true for an order dependence that is loop carried potentially.
+/// An order dependence is loop carried if the destination defines a value
+/// that may be used by the source in a subsequent iteration.
+bool SwingSchedulerDAG::isLoopCarriedOrder(SUnit *Source, const SDep &Dep,
+                                           bool isSucc) {
+  if (!isOrder(Source, Dep) || Dep.isArtificial())
+    return false;
+
+  if (!SwpPruneLoopCarried)
+    return true;
+
+  MachineInstr *SI = Source->getInstr();
+  MachineInstr *DI = Dep.getSUnit()->getInstr();
+  if (!isSucc)
+    std::swap(SI, DI);
+  assert(SI != nullptr && DI != nullptr && "Expecting SUnit with an MI.");
+
+  // Assume ordered loads and stores may have a loop carried dependence.
+  if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+      SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
+    return true;
+
+  // Only chain dependences between a load and store can be loop carried.
+  if (!DI->mayStore() || !SI->mayLoad())
+    return false;
+
+  unsigned DeltaS, DeltaD;
+  if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
+    return true;
+
+  unsigned BaseRegS, BaseRegD;
+  int64_t OffsetS, OffsetD;
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  if (!TII->getMemOpBaseRegImmOfs(*SI, BaseRegS, OffsetS, TRI) ||
+      !TII->getMemOpBaseRegImmOfs(*DI, BaseRegD, OffsetD, TRI))
+    return true;
+
+  if (BaseRegS != BaseRegD)
+    return true;
+
+  uint64_t AccessSizeS = (*SI->memoperands_begin())->getSize();
+  uint64_t AccessSizeD = (*DI->memoperands_begin())->getSize();
+
+  // This is the main test, which checks the offset values and the loop
+  // increment value to determine if the accesses may be loop carried.
+  if (OffsetS >= OffsetD)
+    return OffsetS + AccessSizeS > DeltaS;
+  else if (OffsetS < OffsetD)
+    return OffsetD + AccessSizeD > DeltaD;
+
+  return true;
+}
+
+void SwingSchedulerDAG::postprocessDAG() {
+  for (auto &M : Mutations)
+    M->apply(this);
+}
+
+/// Try to schedule the node at the specified StartCycle and continue
+/// until the node is schedule or the EndCycle is reached.  This function
+/// returns true if the node is scheduled.  This routine may search either
+/// forward or backward for a place to insert the instruction based upon
+/// the relative values of StartCycle and EndCycle.
+bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
+  bool forward = true;
+  if (StartCycle > EndCycle)
+    forward = false;
+
+  // The terminating condition depends on the direction.
+  int termCycle = forward ? EndCycle + 1 : EndCycle - 1;
+  for (int curCycle = StartCycle; curCycle != termCycle;
+       forward ? ++curCycle : --curCycle) {
+
+    // Add the already scheduled instructions at the specified cycle to the DFA.
+    Resources->clearResources();
+    for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
+         checkCycle <= LastCycle; checkCycle += II) {
+      std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
+
+      for (std::deque<SUnit *>::iterator I = cycleInstrs.begin(),
+                                         E = cycleInstrs.end();
+           I != E; ++I) {
+        if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
+          continue;
+        assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+               "These instructions have already been scheduled.");
+        Resources->reserveResources(*(*I)->getInstr());
+      }
+    }
+    if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
+        Resources->canReserveResources(*SU->getInstr())) {
+      DEBUG({
+        dbgs() << "\tinsert at cycle " << curCycle << " ";
+        SU->getInstr()->dump();
+      });
+
+      ScheduledInstrs[curCycle].push_back(SU);
+      InstrToCycle.insert(std::make_pair(SU, curCycle));
+      if (curCycle > LastCycle)
+        LastCycle = curCycle;
+      if (curCycle < FirstCycle)
+        FirstCycle = curCycle;
+      return true;
+    }
+    DEBUG({
+      dbgs() << "\tfailed to insert at cycle " << curCycle << " ";
+      SU->getInstr()->dump();
+    });
+  }
+  return false;
+}
+
+// Return the cycle of the earliest scheduled instruction in the chain.
+int SMSchedule::earliestCycleInChain(const SDep &Dep) {
+  SmallPtrSet<SUnit *, 8> Visited;
+  SmallVector<SDep, 8> Worklist;
+  Worklist.push_back(Dep);
+  int EarlyCycle = INT_MAX;
+  while (!Worklist.empty()) {
+    const SDep &Cur = Worklist.pop_back_val();
+    SUnit *PrevSU = Cur.getSUnit();
+    if (Visited.count(PrevSU))
+      continue;
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(PrevSU);
+    if (it == InstrToCycle.end())
+      continue;
+    EarlyCycle = std::min(EarlyCycle, it->second);
+    for (const auto &PI : PrevSU->Preds)
+      if (SwingSchedulerDAG::isOrder(PrevSU, PI))
+        Worklist.push_back(PI);
+    Visited.insert(PrevSU);
+  }
+  return EarlyCycle;
+}
+
+// Return the cycle of the latest scheduled instruction in the chain.
+int SMSchedule::latestCycleInChain(const SDep &Dep) {
+  SmallPtrSet<SUnit *, 8> Visited;
+  SmallVector<SDep, 8> Worklist;
+  Worklist.push_back(Dep);
+  int LateCycle = INT_MIN;
+  while (!Worklist.empty()) {
+    const SDep &Cur = Worklist.pop_back_val();
+    SUnit *SuccSU = Cur.getSUnit();
+    if (Visited.count(SuccSU))
+      continue;
+    std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
+    if (it == InstrToCycle.end())
+      continue;
+    LateCycle = std::max(LateCycle, it->second);
+    for (const auto &SI : SuccSU->Succs)
+      if (SwingSchedulerDAG::isOrder(SuccSU, SI))
+        Worklist.push_back(SI);
+    Visited.insert(SuccSU);
+  }
+  return LateCycle;
+}
+
+/// If an instruction has a use that spans multiple iterations, then
+/// return true. These instructions are characterized by having a back-ege
+/// to a Phi, which contains a reference to another Phi.
+static SUnit *multipleIterations(SUnit *SU, SwingSchedulerDAG *DAG) {
+  for (auto &P : SU->Preds)
+    if (DAG->isBackedge(SU, P) && P.getSUnit()->getInstr()->isPHI())
+      for (auto &S : P.getSUnit()->Succs)
+        if (S.getKind() == SDep::Order && S.getSUnit()->getInstr()->isPHI())
+          return P.getSUnit();
+  return nullptr;
+}
+
+/// Compute the scheduling start slot for the instruction.  The start slot
+/// depends on any predecessor or successor nodes scheduled already.
+void SMSchedule::computeStart(SUnit *SU, int *MaxEarlyStart, int *MinLateStart,
+                              int *MinEnd, int *MaxStart, int II,
+                              SwingSchedulerDAG *DAG) {
+  // Iterate over each instruction that has been scheduled already.  The start
+  // slot computuation depends on whether the previously scheduled instruction
+  // is a predecessor or successor of the specified instruction.
+  for (int cycle = getFirstCycle(); cycle <= LastCycle; ++cycle) {
+
+    // Iterate over each instruction in the current cycle.
+    for (SUnit *I : getInstructions(cycle)) {
+      // Because we're processing a DAG for the dependences, we recognize
+      // the back-edge in recurrences by anti dependences.
+      for (unsigned i = 0, e = (unsigned)SU->Preds.size(); i != e; ++i) {
+        const SDep &Dep = SU->Preds[i];
+        if (Dep.getSUnit() == I) {
+          if (!DAG->isBackedge(SU, Dep)) {
+            int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+            if (DAG->isLoopCarriedOrder(SU, Dep, false)) {
+              int End = earliestCycleInChain(Dep) + (II - 1);
+              *MinEnd = std::min(*MinEnd, End);
+            }
+          } else {
+            int LateStart = cycle - DAG->getLatency(SU, Dep) +
+                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+            *MinLateStart = std::min(*MinLateStart, LateStart);
+          }
+        }
+        // For instruction that requires multiple iterations, make sure that
+        // the dependent instruction is not scheduled past the definition.
+        SUnit *BE = multipleIterations(I, DAG);
+        if (BE && Dep.getSUnit() == BE && !SU->getInstr()->isPHI() &&
+            !SU->isPred(I))
+          *MinLateStart = std::min(*MinLateStart, cycle);
+      }
+      for (unsigned i = 0, e = (unsigned)SU->Succs.size(); i != e; ++i)
+        if (SU->Succs[i].getSUnit() == I) {
+          const SDep &Dep = SU->Succs[i];
+          if (!DAG->isBackedge(SU, Dep)) {
+            int LateStart = cycle - DAG->getLatency(SU, Dep) +
+                            DAG->getDistance(SU, Dep.getSUnit(), Dep) * II;
+            *MinLateStart = std::min(*MinLateStart, LateStart);
+            if (DAG->isLoopCarriedOrder(SU, Dep)) {
+              int Start = latestCycleInChain(Dep) + 1 - II;
+              *MaxStart = std::max(*MaxStart, Start);
+            }
+          } else {
+            int EarlyStart = cycle + DAG->getLatency(SU, Dep) -
+                             DAG->getDistance(Dep.getSUnit(), SU, Dep) * II;
+            *MaxEarlyStart = std::max(*MaxEarlyStart, EarlyStart);
+          }
+        }
+    }
+  }
+}
+
+/// Order the instructions within a cycle so that the definitions occur
+/// before the uses. Returns true if the instruction is added to the start
+/// of the list, or false if added to the end.
+bool SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
+                                 std::deque<SUnit *> &Insts) {
+  MachineInstr *MI = SU->getInstr();
+  bool OrderBeforeUse = false;
+  bool OrderAfterDef = false;
+  bool OrderBeforeDef = false;
+  unsigned MoveDef = 0;
+  unsigned MoveUse = 0;
+  int StageInst1 = stageScheduled(SU);
+
+  unsigned Pos = 0;
+  for (std::deque<SUnit *>::iterator I = Insts.begin(), E = Insts.end(); I != E;
+       ++I, ++Pos) {
+    // Relative order of Phis does not matter.
+    if (MI->isPHI() && (*I)->getInstr()->isPHI())
+      continue;
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      MachineOperand &MO = MI->getOperand(i);
+      if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        continue;
+      unsigned Reg = MO.getReg();
+      unsigned BasePos, OffsetPos;
+      if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
+        if (MI->getOperand(BasePos).getReg() == Reg)
+          if (unsigned NewReg = SSD->getInstrBaseReg(SU))
+            Reg = NewReg;
+      bool Reads, Writes;
+      std::tie(Reads, Writes) =
+          (*I)->getInstr()->readsWritesVirtualRegister(Reg);
+      if (MO.isDef() && Reads && stageScheduled(*I) <= StageInst1) {
+        OrderBeforeUse = true;
+        MoveUse = Pos;
+      } else if (MO.isDef() && Reads && stageScheduled(*I) > StageInst1) {
+        // Add the instruction after the scheduled instruction.
+        OrderAfterDef = true;
+        MoveDef = Pos;
+      } else if (MO.isUse() && Writes && stageScheduled(*I) == StageInst1) {
+        if (cycleScheduled(*I) == cycleScheduled(SU) && !(*I)->isSucc(SU)) {
+          OrderBeforeUse = true;
+          MoveUse = Pos;
+        } else {
+          OrderAfterDef = true;
+          MoveDef = Pos;
+        }
+      } else if (MO.isUse() && Writes && stageScheduled(*I) > StageInst1) {
+        OrderBeforeUse = true;
+        MoveUse = Pos;
+        if (MoveUse != 0) {
+          OrderAfterDef = true;
+          MoveDef = Pos - 1;
+        }
+      } else if (MO.isUse() && Writes && stageScheduled(*I) < StageInst1) {
+        // Add the instruction before the scheduled instruction.
+        OrderBeforeUse = true;
+        MoveUse = Pos;
+      } else if (MO.isUse() && stageScheduled(*I) == StageInst1 &&
+                 isLoopCarriedDefOfUse(SSD, (*I)->getInstr(), MO)) {
+        OrderBeforeDef = true;
+        MoveUse = Pos;
+      }
+    }
+    // Check for order dependences between instructions. Make sure the source
+    // is ordered before the destination.
+    for (auto &S : SU->Succs)
+      if (S.getKind() == SDep::Order) {
+        if (S.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
+          OrderBeforeUse = true;
+          MoveUse = Pos;
+        }
+      } else if (TargetRegisterInfo::isPhysicalRegister(S.getReg())) {
+        if (cycleScheduled(SU) != cycleScheduled(S.getSUnit())) {
+          if (S.isAssignedRegDep()) {
+            OrderAfterDef = true;
+            MoveDef = Pos;
+          }
+        } else {
+          OrderBeforeUse = true;
+          MoveUse = Pos;
+        }
+      }
+    for (auto &P : SU->Preds)
+      if (P.getKind() == SDep::Order) {
+        if (P.getSUnit() == *I && stageScheduled(*I) == StageInst1) {
+          OrderAfterDef = true;
+          MoveDef = Pos;
+        }
+      } else if (TargetRegisterInfo::isPhysicalRegister(P.getReg())) {
+        if (cycleScheduled(SU) != cycleScheduled(P.getSUnit())) {
+          if (P.isAssignedRegDep()) {
+            OrderBeforeUse = true;
+            MoveUse = Pos;
+          }
+        } else {
+          OrderAfterDef = true;
+          MoveDef = Pos;
+        }
+      }
+  }
+
+  // A circular dependence.
+  if (OrderAfterDef && OrderBeforeUse && MoveUse == MoveDef)
+    OrderBeforeUse = false;
+
+  // OrderAfterDef takes precedences over OrderBeforeDef. The latter is due
+  // to a loop-carried dependence.
+  if (OrderBeforeDef)
+    OrderBeforeUse = !OrderAfterDef || (MoveUse > MoveDef);
+
+  // The uncommon case when the instruction order needs to be updated because
+  // there is both a use and def.
+  if (OrderBeforeUse && OrderAfterDef) {
+    SUnit *UseSU = Insts.at(MoveUse);
+    SUnit *DefSU = Insts.at(MoveDef);
+    if (MoveUse > MoveDef) {
+      Insts.erase(Insts.begin() + MoveUse);
+      Insts.erase(Insts.begin() + MoveDef);
+    } else {
+      Insts.erase(Insts.begin() + MoveDef);
+      Insts.erase(Insts.begin() + MoveUse);
+    }
+    if (orderDependence(SSD, UseSU, Insts)) {
+      Insts.push_front(SU);
+      orderDependence(SSD, DefSU, Insts);
+      return true;
+    }
+    Insts.pop_back();
+    Insts.push_back(SU);
+    Insts.push_back(UseSU);
+    orderDependence(SSD, DefSU, Insts);
+    return false;
+  }
+  // Put the new instruction first if there is a use in the list. Otherwise,
+  // put it at the end of the list.
+  if (OrderBeforeUse)
+    Insts.push_front(SU);
+  else
+    Insts.push_back(SU);
+  return OrderBeforeUse;
+}
+
+/// Return true if the scheduled Phi has a loop carried operand.
+bool SMSchedule::isLoopCarried(SwingSchedulerDAG *SSD, MachineInstr &Phi) {
+  if (!Phi.isPHI())
+    return false;
+  assert(Phi.isPHI() && "Expecing a Phi.");
+  SUnit *DefSU = SSD->getSUnit(&Phi);
+  unsigned DefCycle = cycleScheduled(DefSU);
+  int DefStage = stageScheduled(DefSU);
+
+  unsigned InitVal = 0;
+  unsigned LoopVal = 0;
+  getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+  SUnit *UseSU = SSD->getSUnit(MRI.getVRegDef(LoopVal));
+  if (!UseSU)
+    return true;
+  if (UseSU->getInstr()->isPHI())
+    return true;
+  unsigned LoopCycle = cycleScheduled(UseSU);
+  int LoopStage = stageScheduled(UseSU);
+  return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+/// Return true if the instruction is a definition that is loop carried
+/// and defines the use on the next iteration.
+///        v1 = phi(v2, v3)
+///  (Def) v3 = op v1
+///  (MO)   = v1
+/// If MO appears before Def, then then v1 and v3 may get assigned to the same
+/// register.
+bool SMSchedule::isLoopCarriedDefOfUse(SwingSchedulerDAG *SSD,
+                                       MachineInstr *Def, MachineOperand &MO) {
+  if (!MO.isReg())
+    return false;
+  if (Def->isPHI())
+    return false;
+  MachineInstr *Phi = MRI.getVRegDef(MO.getReg());
+  if (!Phi || !Phi->isPHI() || Phi->getParent() != Def->getParent())
+    return false;
+  if (!isLoopCarried(SSD, *Phi))
+    return false;
+  unsigned LoopReg = getLoopPhiReg(*Phi, Phi->getParent());
+  for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) {
+    MachineOperand &DMO = Def->getOperand(i);
+    if (!DMO.isReg() || !DMO.isDef())
+      continue;
+    if (DMO.getReg() == LoopReg)
+      return true;
+  }
+  return false;
+}
+
+// Check if the generated schedule is valid. This function checks if
+// an instruction that uses a physical register is scheduled in a
+// different stage than the definition. The pipeliner does not handle
+// physical register values that may cross a basic block boundary.
+bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
+  for (int i = 0, e = SSD->SUnits.size(); i < e; ++i) {
+    SUnit &SU = SSD->SUnits[i];
+    if (!SU.hasPhysRegDefs)
+      continue;
+    int StageDef = stageScheduled(&SU);
+    assert(StageDef != -1 && "Instruction should have been scheduled.");
+    for (auto &SI : SU.Succs)
+      if (SI.isAssignedRegDep())
+        if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
+          if (stageScheduled(SI.getSUnit()) != StageDef)
+            return false;
+  }
+  return true;
+}
+
+/// After the schedule has been formed, call this function to combine
+/// the instructions from the different stages/cycles.  That is, this
+/// function creates a schedule that represents a single iteration.
+void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
+  // Move all instructions to the first stage from later stages.
+  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+    for (int stage = 1, lastStage = getMaxStageCount(); stage <= lastStage;
+         ++stage) {
+      std::deque<SUnit *> &cycleInstrs =
+          ScheduledInstrs[cycle + (stage * InitiationInterval)];
+      for (std::deque<SUnit *>::reverse_iterator I = cycleInstrs.rbegin(),
+                                                 E = cycleInstrs.rend();
+           I != E; ++I)
+        ScheduledInstrs[cycle].push_front(*I);
+    }
+  }
+  // Iterate over the definitions in each instruction, and compute the
+  // stage difference for each use.  Keep the maximum value.
+  for (auto &I : InstrToCycle) {
+    int DefStage = stageScheduled(I.first);
+    MachineInstr *MI = I.first->getInstr();
+    for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+      MachineOperand &Op = MI->getOperand(i);
+      if (!Op.isReg() || !Op.isDef())
+        continue;
+
+      unsigned Reg = Op.getReg();
+      unsigned MaxDiff = 0;
+      bool PhiIsSwapped = false;
+      for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
+                                             EI = MRI.use_end();
+           UI != EI; ++UI) {
+        MachineOperand &UseOp = *UI;
+        MachineInstr *UseMI = UseOp.getParent();
+        SUnit *SUnitUse = SSD->getSUnit(UseMI);
+        int UseStage = stageScheduled(SUnitUse);
+        unsigned Diff = 0;
+        if (UseStage != -1 && UseStage >= DefStage)
+          Diff = UseStage - DefStage;
+        if (MI->isPHI()) {
+          if (isLoopCarried(SSD, *MI))
+            ++Diff;
+          else
+            PhiIsSwapped = true;
+        }
+        MaxDiff = std::max(Diff, MaxDiff);
+      }
+      RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
+    }
+  }
+
+  // Erase all the elements in the later stages. Only one iteration should
+  // remain in the scheduled list, and it contains all the instructions.
+  for (int cycle = getFinalCycle() + 1; cycle <= LastCycle; ++cycle)
+    ScheduledInstrs.erase(cycle);
+
+  // Change the registers in instruction as specified in the InstrChanges
+  // map. We need to use the new registers to create the correct order.
+  for (int i = 0, e = SSD->SUnits.size(); i != e; ++i) {
+    SUnit *SU = &SSD->SUnits[i];
+    SSD->applyInstrChange(SU->getInstr(), *this, true);
+  }
+
+  // Reorder the instructions in each cycle to fix and improve the
+  // generated code.
+  for (int Cycle = getFirstCycle(), E = getFinalCycle(); Cycle <= E; ++Cycle) {
+    std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[Cycle];
+    std::deque<SUnit *> newOrderZC;
+    // Put the zero-cost, pseudo instructions at the start of the cycle.
+    for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
+      SUnit *SU = cycleInstrs[i];
+      if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+        orderDependence(SSD, SU, newOrderZC);
+    }
+    std::deque<SUnit *> newOrderI;
+    // Then, add the regular instructions back.
+    for (unsigned i = 0, e = cycleInstrs.size(); i < e; ++i) {
+      SUnit *SU = cycleInstrs[i];
+      if (!ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()))
+        orderDependence(SSD, SU, newOrderI);
+    }
+    // Replace the old order with the new order.
+    cycleInstrs.swap(newOrderZC);
+    cycleInstrs.insert(cycleInstrs.end(), newOrderI.begin(), newOrderI.end());
+  }
+
+  DEBUG(dump(););
+}
+
+/// Print the schedule information to the given output.
+void SMSchedule::print(raw_ostream &os) const {
+  // Iterate over each cycle.
+  for (int cycle = getFirstCycle(); cycle <= getFinalCycle(); ++cycle) {
+    // Iterate over each instruction in the cycle.
+    const_sched_iterator cycleInstrs = ScheduledInstrs.find(cycle);
+    for (SUnit *CI : cycleInstrs->second) {
+      os << "cycle " << cycle << " (" << stageScheduled(CI) << ") ";
+      os << "(" << CI->NodeNum << ") ";
+      CI->getInstr()->print(os);
+      os << "\n";
+    }
+  }
+}
+
+/// Utility function used for debugging to print the schedule.
+void SMSchedule::dump() const { print(dbgs()); }
diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
index 613598dbe215..242cb0b80953 100644
--- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp
@@ -21,11 +21,16 @@
 
 using namespace llvm;
 
+static cl::opt<bool> EnableSubRegLiveness("enable-subreg-liveness", cl::Hidden,
+  cl::init(true), cl::desc("Enable subregister liveness tracking."));
+
 // Pin the vtable to this file.
 void MachineRegisterInfo::Delegate::anchor() {}
 
 MachineRegisterInfo::MachineRegisterInfo(MachineFunction *MF)
-    : MF(MF), TheDelegate(nullptr), TracksSubRegLiveness(false) {
+    : MF(MF), TheDelegate(nullptr),
+      TracksSubRegLiveness(MF->getSubtarget().enableSubRegLiveness() &&
+                           EnableSubRegLiveness) {
   unsigned NumRegs = getTargetRegisterInfo()->getNumRegs();
   VRegInfo.reserve(256);
   RegAllocHints.reserve(256);
@@ -88,6 +93,13 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
   return true;
 }
 
+unsigned MachineRegisterInfo::createIncompleteVirtualRegister() {
+  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+  VRegInfo.grow(Reg);
+  RegAllocHints.grow(Reg);
+  return Reg;
+}
+
 /// createVirtualRegister - Create and return a new virtual register in the
 /// function with the specified register class.
 ///
@@ -98,41 +110,42 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
          "Virtual register RegClass must be allocatable.");
 
   // New virtual register number.
-  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-  VRegInfo.grow(Reg);
+  unsigned Reg = createIncompleteVirtualRegister();
   VRegInfo[Reg].first = RegClass;
-  RegAllocHints.grow(Reg);
   if (TheDelegate)
     TheDelegate->MRI_NoteNewVirtualRegister(Reg);
   return Reg;
 }
 
-unsigned
-MachineRegisterInfo::getSize(unsigned VReg) const {
-  VRegToSizeMap::const_iterator SizeIt = getVRegToSize().find(VReg);
-  return SizeIt != getVRegToSize().end() ? SizeIt->second : 0;
+LLT MachineRegisterInfo::getType(unsigned VReg) const {
+  VRegToTypeMap::const_iterator TypeIt = getVRegToType().find(VReg);
+  return TypeIt != getVRegToType().end() ? TypeIt->second : LLT{};
 }
 
-void MachineRegisterInfo::setSize(unsigned VReg, unsigned Size) {
-  getVRegToSize()[VReg] = Size;
+void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
+  // Check that VReg doesn't have a class.
+  assert((getRegClassOrRegBank(VReg).isNull() ||
+         !getRegClassOrRegBank(VReg).is<const TargetRegisterClass *>()) &&
+         "Can't set the size of a non-generic virtual register");
+  getVRegToType()[VReg] = Ty;
 }
 
 unsigned
-MachineRegisterInfo::createGenericVirtualRegister(unsigned Size) {
-  assert(Size && "Cannot create empty virtual register");
-
+MachineRegisterInfo::createGenericVirtualRegister(LLT Ty) {
   // New virtual register number.
-  unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
-  VRegInfo.grow(Reg);
+  unsigned Reg = createIncompleteVirtualRegister();
   // FIXME: Should we use a dummy register class?
-  VRegInfo[Reg].first = static_cast<TargetRegisterClass *>(nullptr);
-  getVRegToSize()[Reg] = Size;
-  RegAllocHints.grow(Reg);
+  VRegInfo[Reg].first = static_cast<RegisterBank *>(nullptr);
+  getVRegToType()[Reg] = Ty;
   if (TheDelegate)
     TheDelegate->MRI_NoteNewVirtualRegister(Reg);
   return Reg;
 }
 
+void MachineRegisterInfo::clearVirtRegTypes() {
+  getVRegToType().clear();
+}
+
 /// clearVirtRegs - Remove all virtual registers (after physreg assignment).
 void MachineRegisterInfo::clearVirtRegs() {
 #ifndef NDEBUG
@@ -444,13 +457,16 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
          "Invalid ReservedRegs vector from target");
 }
 
-bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg,
-                                            const MachineFunction &MF) const {
+bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
   assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
 
+  const TargetRegisterInfo *TRI = getTargetRegisterInfo();
+  if (TRI->isConstantPhysReg(PhysReg))
+    return true;
+
   // Check if any overlapping register is modified, or allocatable so it may be
   // used later.
-  for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true);
+  for (MCRegAliasIterator AI(PhysReg, TRI, true);
        AI.isValid(); ++AI)
     if (!def_empty(*AI) || isAllocatable(*AI))
       return false;
diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
index 47ad60c5dd56..e9b47559309f 100644
--- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp
@@ -18,7 +18,6 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/AlignOf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
index d921e2977cc7..e06bc517fa91 100644
--- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -230,11 +230,6 @@ static cl::opt<bool> EnablePostRAMachineSched(
     cl::desc("Enable the post-ra machine instruction scheduling pass."),
     cl::init(true), cl::Hidden);
 
-/// Forward declare the standard machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C);
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C);
-
 /// Decrement this iterator until reaching the top or a non-debug instr.
 static MachineBasicBlock::const_iterator
 priorNonDebug(MachineBasicBlock::const_iterator I,
@@ -251,8 +246,8 @@ priorNonDebug(MachineBasicBlock::const_iterator I,
 static MachineBasicBlock::iterator
 priorNonDebug(MachineBasicBlock::iterator I,
               MachineBasicBlock::const_iterator Beg) {
-  return const_cast<MachineInstr*>(
-    &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg));
+  return priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)
+      .getNonConstIterator();
 }
 
 /// If this iterator is a debug value, increment until reaching the End or a
@@ -271,12 +266,8 @@ nextIfDebug(MachineBasicBlock::const_iterator I,
 static MachineBasicBlock::iterator
 nextIfDebug(MachineBasicBlock::iterator I,
             MachineBasicBlock::const_iterator End) {
-  // Cast the return value to nonconst MachineInstr, then cast to an
-  // instr_iterator, which does not check for null, finally return a
-  // bundle_iterator.
-  return MachineBasicBlock::instr_iterator(
-    const_cast<MachineInstr*>(
-      &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+  return nextIfDebug(MachineBasicBlock::const_iterator(I), End)
+      .getNonConstIterator();
 }
 
 /// Instantiate a ScheduleDAGInstrs that will be owned by the caller.
@@ -458,9 +449,10 @@ void MachineSchedulerBase::scheduleRegions(ScheduleDAGInstrs &Scheduler,
       unsigned NumRegionInstrs = 0;
       MachineBasicBlock::iterator I = RegionEnd;
       for (;I != MBB->begin(); --I) {
-        if (isSchedBoundary(&*std::prev(I), &*MBB, MF, TII))
+        MachineInstr &MI = *std::prev(I);
+        if (isSchedBoundary(&MI, &*MBB, MF, TII))
           break;
-        if (!I->isDebugValue())
+        if (!MI.isDebugValue())
           ++NumRegionInstrs;
       }
       // Notify the scheduler of the region, even if we may skip scheduling
@@ -692,8 +684,14 @@ void ScheduleDAGMI::schedule() {
   // This may initialize a DFSResult to be used for queue priority.
   SchedImpl->initialize(this);
 
-  DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
-          SUnits[su].dumpAll(this));
+  DEBUG(
+    if (EntrySU.getInstr() != nullptr)
+      EntrySU.dumpAll(this);
+    for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
+      SUnits[su].dumpAll(this);
+    if (ExitSU.getInstr() != nullptr)
+      ExitSU.dumpAll(this);
+  );
   if (ViewMISchedDAGs) viewGraph();
 
   // Initialize ready queues now that the DAG and priority data are finalized.
@@ -862,6 +860,44 @@ ScheduleDAGMILive::~ScheduleDAGMILive() {
   delete DFSResult;
 }
 
+void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
+  const MachineInstr &MI = *SU.getInstr();
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isReg())
+      continue;
+    if (!MO.readsReg())
+      continue;
+    if (TrackLaneMasks && !MO.isUse())
+      continue;
+
+    unsigned Reg = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(Reg))
+      continue;
+
+    // Ignore re-defs.
+    if (TrackLaneMasks) {
+      bool FoundDef = false;
+      for (const MachineOperand &MO2 : MI.operands()) {
+        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
+          FoundDef = true;
+          break;
+        }
+      }
+      if (FoundDef)
+        continue;
+    }
+
+    // Record this local VReg use.
+    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
+    for (; UI != VRegUses.end(); ++UI) {
+      if (UI->SU == &SU)
+        break;
+    }
+    if (UI == VRegUses.end())
+      VRegUses.insert(VReg2SUnit(Reg, LaneBitmask::getNone(), &SU));
+  }
+}
+
 /// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
 /// crossing a scheduling boundary. [begin, end) includes all instructions in
 /// the region, including the boundary itself and single-instruction regions
@@ -889,6 +925,11 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
 // Setup the register pressure trackers for the top scheduled top and bottom
 // scheduled regions.
 void ScheduleDAGMILive::initRegPressure() {
+  VRegUses.clear();
+  VRegUses.setUniverse(MRI.getNumVirtRegs());
+  for (SUnit &SU : SUnits)
+    collectVRegUses(SU);
+
   TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin,
                     ShouldTrackLaneMasks, false);
   BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd,
@@ -999,7 +1040,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
       // this fact anymore => decrement pressure.
       // If the register has just become dead then other uses make it come
       // back to life => increment pressure.
-      bool Decrement = P.LaneMask != 0;
+      bool Decrement = P.LaneMask.any();
 
       for (const VReg2SUnit &V2SU
            : make_range(VRegUses.find(Reg), VRegUses.end())) {
@@ -1018,7 +1059,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
         );
       }
     } else {
-      assert(P.LaneMask != 0);
+      assert(P.LaneMask.any());
       DEBUG(dbgs() << "  LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n");
       // This may be called before CurrentBottom has been initialized. However,
       // BotRPTracker must have a valid position. We want the value live into the
@@ -1087,6 +1128,8 @@ void ScheduleDAGMILive::schedule() {
   SchedImpl->initialize(this);
 
   DEBUG(
+    if (EntrySU.getInstr() != nullptr)
+      EntrySU.dumpAll(this);
     for (const SUnit &SU : SUnits) {
       SU.dumpAll(this);
       if (ShouldTrackPressure) {
@@ -1095,6 +1138,8 @@ void ScheduleDAGMILive::schedule() {
       }
       dbgs() << '\n';
     }
+    if (ExitSU.getInstr() != nullptr)
+      ExitSU.dumpAll(this);
   );
   if (ViewMISchedDAGs) viewGraph();
 
@@ -1362,7 +1407,8 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
         : SU(su), BaseReg(reg), Offset(ofs) {}
 
     bool operator<(const MemOpInfo&RHS) const {
-      return std::tie(BaseReg, Offset) < std::tie(RHS.BaseReg, RHS.Offset);
+      return std::tie(BaseReg, Offset, SU->NodeNum) <
+             std::tie(RHS.BaseReg, RHS.Offset, RHS.SU->NodeNum);
     }
   };
 
@@ -1395,6 +1441,24 @@ public:
 };
 } // anonymous
 
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createLoadClusterDAGMutation(const TargetInstrInfo *TII,
+                             const TargetRegisterInfo *TRI) {
+  return EnableMemOpCluster ? make_unique<LoadClusterMutation>(TII, TRI)
+                            : nullptr;
+}
+
+std::unique_ptr<ScheduleDAGMutation>
+createStoreClusterDAGMutation(const TargetInstrInfo *TII,
+                              const TargetRegisterInfo *TRI) {
+  return EnableMemOpCluster ? make_unique<StoreClusterMutation>(TII, TRI)
+                            : nullptr;
+}
+
+} // namespace llvm
+
 void BaseMemOpClusterMutation::clusterNeighboringMemOps(
     ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
   SmallVector<MemOpInfo, 32> MemOpRecords;
@@ -1487,29 +1551,23 @@ namespace {
 /// that may be fused by the processor into a single operation.
 class MacroFusion : public ScheduleDAGMutation {
   const TargetInstrInfo &TII;
-  const TargetRegisterInfo &TRI;
 public:
-  MacroFusion(const TargetInstrInfo &TII, const TargetRegisterInfo &TRI)
-    : TII(TII), TRI(TRI) {}
+  MacroFusion(const TargetInstrInfo &TII)
+    : TII(TII) {}
 
   void apply(ScheduleDAGInstrs *DAGInstrs) override;
 };
 } // anonymous
 
-/// Returns true if \p MI reads a register written by \p Other.
-static bool HasDataDep(const TargetRegisterInfo &TRI, const MachineInstr &MI,
-                       const MachineInstr &Other) {
-  for (const MachineOperand &MO : MI.uses()) {
-    if (!MO.isReg() || !MO.readsReg())
-      continue;
+namespace llvm {
 
-    unsigned Reg = MO.getReg();
-    if (Other.modifiesRegister(Reg, &TRI))
-      return true;
-  }
-  return false;
+std::unique_ptr<ScheduleDAGMutation>
+createMacroFusionDAGMutation(const TargetInstrInfo *TII) {
+  return EnableMacroFusion ? make_unique<MacroFusion>(*TII) : nullptr;
 }
 
+} // namespace llvm
+
 /// \brief Callback from DAG postProcessing to create cluster edges to encourage
 /// fused operations.
 void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
@@ -1521,16 +1579,12 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
   if (!Branch)
     return;
 
-  for (SUnit &SU : DAG->SUnits) {
-    // SUnits with successors can't be schedule in front of the ExitSU.
-    if (!SU.Succs.empty())
-      continue;
-    // We only care if the node writes to a register that the branch reads.
-    MachineInstr *Pred = SU.getInstr();
-    if (!HasDataDep(TRI, *Branch, *Pred))
+  for (SDep &PredDep : ExitSU.Preds) {
+    if (PredDep.isWeak())
       continue;
-
-    if (!TII.shouldScheduleAdjacent(*Pred, *Branch))
+    SUnit &SU = *PredDep.getSUnit();
+    MachineInstr &Pred = *SU.getInstr();
+    if (!TII.shouldScheduleAdjacent(Pred, *Branch))
       continue;
 
     // Create a single weak edge from SU to ExitSU. The only effect is to cause
@@ -1543,6 +1597,16 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
     (void)Success;
     assert(Success && "No DAG nodes should be reachable from ExitSU");
 
+    // Adjust latency of data deps between the nodes.
+    for (SDep &PredDep : ExitSU.Preds) {
+      if (PredDep.getSUnit() == &SU)
+        PredDep.setLatency(0);
+    }
+    for (SDep &SuccDep : SU.Succs) {
+      if (SuccDep.getSUnit() == &ExitSU)
+        SuccDep.setLatency(0);
+    }
+
     DEBUG(dbgs() << "Macro Fuse SU(" << SU.NodeNum << ")\n");
     break;
   }
@@ -1572,6 +1636,16 @@ protected:
 };
 } // anonymous
 
+namespace llvm {
+
+std::unique_ptr<ScheduleDAGMutation>
+createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
+                             const TargetRegisterInfo *TRI) {
+  return make_unique<CopyConstrain>(TII, TRI);
+}
+
+} // namespace llvm
+
 /// constrainLocalCopy handles two possibilities:
 /// 1) Local src:
 /// I0:     = dst
@@ -1760,7 +1834,6 @@ void SchedBoundary::reset() {
   Available.clear();
   Pending.clear();
   CheckPending = false;
-  NextSUs.clear();
   CurrCycle = 0;
   CurrMOps = 0;
   MinReadyCycle = UINT_MAX;
@@ -1961,23 +2034,6 @@ void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) {
     Pending.push(SU);
   else
     Available.push(SU);
-
-  // Record this node as an immediate dependent of the scheduled node.
-  NextSUs.insert(SU);
-}
-
-void SchedBoundary::releaseTopNode(SUnit *SU) {
-  if (SU->isScheduled)
-    return;
-
-  releaseNode(SU, SU->TopReadyCycle);
-}
-
-void SchedBoundary::releaseBottomNode(SUnit *SU) {
-  if (SU->isScheduled)
-    return;
-
-  releaseNode(SU, SU->BotReadyCycle);
 }
 
 /// Move the boundary of scheduled code by one cycle.
@@ -2828,9 +2884,8 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
   bool SameBoundary = Zone != nullptr;
   if (SameBoundary) {
     // For loops that are acyclic path limited, aggressively schedule for
-    // latency.  This can result in very long dependence chains scheduled in
-    // sequence, so once every cycle (when CurrMOps == 0), switch to normal
-    // heuristics.
+    // latency. Within an single cycle, whenever CurrMOps > 0, allow normal
+    // heuristics to take precedence.
     if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() &&
         tryLatency(TryCand, Cand, *Zone))
       return;
@@ -2888,13 +2943,6 @@ void GenericScheduler::tryCandidate(SchedCandidate &Cand,
         !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone))
       return;
 
-    // Prefer immediate defs/users of the last scheduled instruction. This is a
-    // local pressure avoidance strategy that also makes the machine code
-    // readable.
-    if (tryGreater(Zone->isNextSU(TryCand.SU), Zone->isNextSU(Cand.SU),
-                   TryCand, Cand, NextDefUse))
-      return;
-
     // Fall through to original instruction order.
     if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum)
         || (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) {
@@ -3105,28 +3153,24 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
 
 /// Create the standard converging machine scheduler. This will be used as the
 /// default scheduler if the target does not set a default.
-static ScheduleDAGInstrs *createGenericSchedLive(MachineSchedContext *C) {
+ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
   ScheduleDAGMILive *DAG = new ScheduleDAGMILive(C, make_unique<GenericScheduler>(C));
   // Register DAG post-processors.
   //
   // FIXME: extend the mutation API to allow earlier mutations to instantiate
   // data and pass it to later mutations. Have a single mutation that gathers
   // the interesting nodes in one pass.
-  DAG->addMutation(make_unique<CopyConstrain>(DAG->TII, DAG->TRI));
-  if (EnableMemOpCluster) {
-    if (DAG->TII->enableClusterLoads())
-      DAG->addMutation(make_unique<LoadClusterMutation>(DAG->TII, DAG->TRI));
-    if (DAG->TII->enableClusterStores())
-      DAG->addMutation(make_unique<StoreClusterMutation>(DAG->TII, DAG->TRI));
-  }
-  if (EnableMacroFusion)
-    DAG->addMutation(make_unique<MacroFusion>(*DAG->TII, *DAG->TRI));
+  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
   return DAG;
 }
 
+static ScheduleDAGInstrs *createConveringSched(MachineSchedContext *C) {
+  return createGenericSchedLive(C);
+}
+
 static MachineSchedRegistry
 GenericSchedRegistry("converge", "Standard converging scheduler.",
-                     createGenericSchedLive);
+                     createConveringSched);
 
 //===----------------------------------------------------------------------===//
 // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy.
@@ -3257,9 +3301,9 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   Top.bumpNode(SU);
 }
 
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-static ScheduleDAGInstrs *createGenericSchedPostRA(MachineSchedContext *C) {
-  return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C), /*IsPostRA=*/true);
+ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
+  return new ScheduleDAGMI(C, make_unique<PostGenericScheduler>(C),
+                           /*RemoveKillFlags=*/true);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp
index 571a5c1d8005..5f87b68123f1 100644
--- a/contrib/llvm/lib/CodeGen/MachineSink.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp
@@ -22,9 +22,15 @@
 #include "llvm/ADT/SparseBitVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/IR/LLVMContext.h"
@@ -34,6 +40,13 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "machine-sink"
@@ -48,12 +61,21 @@ UseBlockFreqInfo("machine-sink-bfi",
            cl::desc("Use block frequency info to find successors to sink"),
            cl::init(true), cl::Hidden);
 
+static cl::opt<unsigned> SplitEdgeProbabilityThreshold(
+    "machine-sink-split-probability-threshold",
+    cl::desc(
+        "Percentage threshold for splitting single-instruction critical edge. "
+        "If the branch threshold is higher than this threshold, we allow "
+        "speculative execution of up to 1 instruction to avoid branching to "
+        "splitted critical edge"),
+    cl::init(40), cl::Hidden);
 
 STATISTIC(NumSunk,      "Number of machine instructions sunk");
 STATISTIC(NumSplit,     "Number of critical edges split");
 STATISTIC(NumCoalesces, "Number of copies coalesced");
 
 namespace {
+
   class MachineSinking : public MachineFunctionPass {
     const TargetInstrInfo *TII;
     const TargetRegisterInfo *TRI;
@@ -62,15 +84,16 @@ namespace {
     MachinePostDominatorTree *PDT; // Machine post dominator tree
     MachineLoopInfo *LI;
     const MachineBlockFrequencyInfo *MBFI;
+    const MachineBranchProbabilityInfo *MBPI;
     AliasAnalysis *AA;
 
     // Remember which edges have been considered for breaking.
-    SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+    SmallSet<std::pair<MachineBasicBlock*, MachineBasicBlock*>, 8>
     CEBCandidates;
     // Remember which edges we are about to split.
     // This is different from CEBCandidates since those edges
     // will be split.
-    SetVector<std::pair<MachineBasicBlock*,MachineBasicBlock*> > ToSplit;
+    SetVector<std::pair<MachineBasicBlock*, MachineBasicBlock*> > ToSplit;
 
     SparseBitVector<> RegsToClearKillFlags;
 
@@ -79,6 +102,7 @@ namespace {
 
   public:
     static char ID; // Pass identification
+
     MachineSinking() : MachineFunctionPass(ID) {
       initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
     }
@@ -92,6 +116,7 @@ namespace {
       AU.addRequired<MachineDominatorTree>();
       AU.addRequired<MachinePostDominatorTree>();
       AU.addRequired<MachineLoopInfo>();
+      AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addPreserved<MachineDominatorTree>();
       AU.addPreserved<MachinePostDominatorTree>();
       AU.addPreserved<MachineLoopInfo>();
@@ -143,12 +168,14 @@ namespace {
     GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
                            AllSuccsCache &AllSuccessors) const;
   };
+
 } // end anonymous namespace
 
 char MachineSinking::ID = 0;
 char &llvm::MachineSinkingID = MachineSinking::ID;
 INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
                 "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
@@ -269,11 +296,12 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
   PDT = &getAnalysis<MachinePostDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
   MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
+  MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   bool EverMadeChange = false;
 
-  while (1) {
+  while (true) {
     bool MadeChange = false;
 
     // Process all basic blocks.
@@ -369,6 +397,10 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
   if (!MI.isCopy() && !TII->isAsCheapAsAMove(MI))
     return true;
 
+  if (From->isSuccessor(To) && MBPI->getEdgeProbability(From, To) <=
+      BranchProbability(SplitEdgeProbabilityThreshold, 100))
+    return true;
+
   // MI is cheap, we probably don't want to break the critical edge for it.
   // However, if this would allow some definitions of its source operands
   // to be sunk then it's probably worth it.
@@ -604,7 +636,7 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
+        if (!MRI->isConstantPhysReg(Reg))
           return nullptr;
       } else if (!MO.isDead()) {
         // A def that isn't dead. We can't move it.
diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
index 86332c8a93a5..ef7e525e8165 100644
--- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp
@@ -430,16 +430,17 @@ public:
   po_iterator_storage(LoopBounds &lb) : LB(lb) {}
   void finishPostorder(const MachineBasicBlock*) {}
 
-  bool insertEdge(const MachineBasicBlock *From, const MachineBasicBlock *To) {
+  bool insertEdge(Optional<const MachineBasicBlock *> From,
+                  const MachineBasicBlock *To) {
     // Skip already visited To blocks.
     MachineTraceMetrics::TraceBlockInfo &TBI = LB.Blocks[To->getNumber()];
     if (LB.Downward ? TBI.hasValidHeight() : TBI.hasValidDepth())
       return false;
     // From is null once when To is the trace center block.
     if (From) {
-      if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(From)) {
+      if (const MachineLoop *FromLoop = LB.Loops->getLoopFor(*From)) {
         // Don't follow backedges, don't leave FromLoop when going upwards.
-        if ((LB.Downward ? To : From) == FromLoop->getHeader())
+        if ((LB.Downward ? To : *From) == FromLoop->getHeader())
           return false;
         // Don't leave FromLoop.
         if (isExitingLoop(FromLoop, LB.Loops->getLoopFor(To)))
diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
index a70adb050455..426a4666c649 100644
--- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -70,6 +70,10 @@ namespace {
 
     unsigned foundErrors;
 
+    // Avoid querying the MachineFunctionProperties for each operand.
+    bool isFunctionRegBankSelected;
+    bool isFunctionSelected;
+
     typedef SmallVector<unsigned, 16> RegVector;
     typedef SmallVector<const uint32_t*, 4> RegMaskVector;
     typedef DenseSet<unsigned> RegSet;
@@ -204,16 +208,13 @@ namespace {
     void visitMachineBasicBlockAfter(const MachineBasicBlock *MBB);
     void visitMachineFunctionAfter();
 
-    template <typename T> void report(const char *msg, ilist_iterator<T> I) {
-      report(msg, &*I);
-    }
     void report(const char *msg, const MachineFunction *MF);
     void report(const char *msg, const MachineBasicBlock *MBB);
     void report(const char *msg, const MachineInstr *MI);
     void report(const char *msg, const MachineOperand *MO, unsigned MONum);
 
     void report_context(const LiveInterval &LI) const;
-    void report_context(const LiveRange &LR, unsigned Reg,
+    void report_context(const LiveRange &LR, unsigned VRegUnit,
                         LaneBitmask LaneMask) const;
     void report_context(const LiveRange::Segment &S) const;
     void report_context(const VNInfo &VNI) const;
@@ -228,10 +229,10 @@ namespace {
     void checkLiveness(const MachineOperand *MO, unsigned MONum);
     void checkLivenessAtUse(const MachineOperand *MO, unsigned MONum,
                             SlotIndex UseIdx, const LiveRange &LR, unsigned Reg,
-                            LaneBitmask LaneMask = 0);
+                            LaneBitmask LaneMask = LaneBitmask::getNone());
     void checkLivenessAtDef(const MachineOperand *MO, unsigned MONum,
                             SlotIndex DefIdx, const LiveRange &LR, unsigned Reg,
-                            LaneBitmask LaneMask = 0);
+                            LaneBitmask LaneMask = LaneBitmask::getNone());
 
     void markReachable(const MachineBasicBlock *MBB);
     void calcRegsPassed();
@@ -242,11 +243,12 @@ namespace {
     void verifyLiveIntervals();
     void verifyLiveInterval(const LiveInterval&);
     void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned,
-                              unsigned);
+                              LaneBitmask);
     void verifyLiveRangeSegment(const LiveRange&,
                                 const LiveRange::const_iterator I, unsigned,
-                                unsigned);
-    void verifyLiveRange(const LiveRange&, unsigned, LaneBitmask LaneMask = 0);
+                                LaneBitmask);
+    void verifyLiveRange(const LiveRange&, unsigned,
+                         LaneBitmask LaneMask = LaneBitmask::getNone());
 
     void verifyStackFrame();
 
@@ -310,15 +312,12 @@ void MachineVerifier::verifySlotIndexes() const {
 
 void MachineVerifier::verifyProperties(const MachineFunction &MF) {
   // If a pass has introduced virtual registers without clearing the
-  // AllVRegsAllocated property (or set it without allocating the vregs)
+  // NoVRegs property (or set it without allocating the vregs)
   // then report an error.
   if (MF.getProperties().hasProperty(
-          MachineFunctionProperties::Property::AllVRegsAllocated) &&
-      MRI->getNumVirtRegs()) {
-    report(
-        "Function has AllVRegsAllocated property but there are VReg operands",
-        &MF);
-  }
+          MachineFunctionProperties::Property::NoVRegs) &&
+      MRI->getNumVirtRegs())
+    report("Function has NoVRegs property but there are VReg operands", &MF);
 }
 
 unsigned MachineVerifier::verify(MachineFunction &MF) {
@@ -330,6 +329,11 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
   TRI = MF.getSubtarget().getRegisterInfo();
   MRI = &MF.getRegInfo();
 
+  isFunctionRegBankSelected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::RegBankSelected);
+  isFunctionSelected = MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::Selected);
+
   LiveVars = nullptr;
   LiveInts = nullptr;
   LiveStks = nullptr;
@@ -359,7 +363,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
     for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(),
            MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) {
       if (MBBI->getParent() != &*MFI) {
-        report("Bad instruction parent pointer", MFI);
+        report("Bad instruction parent pointer", &*MFI);
         errs() << "Instruction: " << *MBBI;
         continue;
       }
@@ -381,7 +385,7 @@ unsigned MachineVerifier::verify(MachineFunction &MF) {
         CurBundle = &*MBBI;
         visitMachineBundleBefore(CurBundle);
       } else if (!CurBundle)
-        report("No bundle header", MBBI);
+        report("No bundle header", &*MBBI);
       visitMachineInstrBefore(&*MBBI);
       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
         const MachineInstr &MI = *MBBI;
@@ -474,11 +478,11 @@ void MachineVerifier::report_context(const LiveInterval &LI) const {
   errs() << "- interval:    " << LI << '\n';
 }
 
-void MachineVerifier::report_context(const LiveRange &LR, unsigned Reg,
+void MachineVerifier::report_context(const LiveRange &LR, unsigned VRegUnit,
                                      LaneBitmask LaneMask) const {
   report_context_liverange(LR);
-  errs() << "- register:    " << PrintReg(Reg, TRI) << '\n';
-  if (LaneMask != 0)
+  report_context_vreg_regunit(VRegUnit);
+  if (LaneMask.any())
     report_context_lanemask(LaneMask);
 }
 
@@ -524,16 +528,6 @@ void MachineVerifier::visitMachineFunctionBefore() {
   lastIndex = SlotIndex();
   regsReserved = MRI->getReservedRegs();
 
-  // A sub-register of a reserved register is also reserved
-  for (int Reg = regsReserved.find_first(); Reg>=0;
-       Reg = regsReserved.find_next(Reg)) {
-    for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
-      // FIXME: This should probably be:
-      // assert(regsReserved.test(*SubRegs) && "Non-reserved sub-register");
-      regsReserved.set(*SubRegs);
-    }
-  }
-
   markReachable(&MF->front());
 
   // Build a set of the basic blocks in the function.
@@ -571,7 +565,8 @@ void
 MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   FirstTerminator = nullptr;
 
-  if (MRI->isSSA()) {
+  if (!MF->getProperties().hasProperty(
+      MachineFunctionProperties::Property::NoPHIs)) {
     // If this block has allocatable physical registers live-in, check that
     // it is an entry block or landing pad.
     for (const auto &LI : MBB->liveins()) {
@@ -757,9 +752,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
   }
   regsLiveInButUnused = regsLive;
 
-  const MachineFrameInfo *MFI = MF->getFrameInfo();
-  assert(MFI && "Function has no frame info");
-  BitVector PR = MFI->getPristineRegs(*MF);
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
+  BitVector PR = MFI.getPristineRegs(*MF);
   for (int I = PR.find_first(); I>0; I = PR.find_next(I)) {
     for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true);
          SubRegs.isValid(); ++SubRegs)
@@ -850,6 +844,10 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
         << MI->getNumOperands() << " given.\n";
   }
 
+  if (MI->isPHI() && MF->getProperties().hasProperty(
+          MachineFunctionProperties::Property::NoPHIs))
+    report("Found PHI instruction with NoPHIs property set", MI);
+
   // Check the tied operands.
   if (MI->isInlineAsm())
     verifyInlineAsm(MI);
@@ -879,6 +877,35 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
     }
   }
 
+  // Check types.
+  if (isPreISelGenericOpcode(MCID.getOpcode())) {
+    if (isFunctionSelected)
+      report("Unexpected generic instruction in a Selected function", MI);
+
+    // Generic instructions specify equality constraints between some
+    // of their operands. Make sure these are consistent.
+    SmallVector<LLT, 4> Types;
+    for (unsigned i = 0; i < MCID.getNumOperands(); ++i) {
+      if (!MCID.OpInfo[i].isGenericType())
+        continue;
+      size_t TypeIdx = MCID.OpInfo[i].getGenericTypeIndex();
+      Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+      LLT OpTy = MRI->getType(MI->getOperand(i).getReg());
+      if (Types[TypeIdx].isValid() && Types[TypeIdx] != OpTy)
+        report("type mismatch in generic instruction", MI);
+      Types[TypeIdx] = OpTy;
+    }
+  }
+
+  // Generic opcodes must not have physical register operands.
+  if (isPreISelGenericOpcode(MCID.getOpcode())) {
+    for (auto &Op : MI->operands()) {
+      if (Op.isReg() && TargetRegisterInfo::isPhysicalRegister(Op.getReg()))
+        report("Generic instruction cannot have physical register", MI);
+    }
+  }
+
   StringRef ErrorInfo;
   if (!TII->verifyInstruction(*MI, ErrorInfo))
     report(ErrorInfo.data(), MI);
@@ -988,25 +1015,62 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
         const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
         if (!RC) {
           // This is a generic virtual register.
-          // It must have a size and it must not have a SubIdx.
-          unsigned Size = MRI->getSize(Reg);
-          if (!Size) {
-            report("Generic virtual register must have a size", MO, MONum);
+
+          // If we're post-Select, we can't have gvregs anymore.
+          if (isFunctionSelected) {
+            report("Generic virtual register invalid in a Selected function",
+                   MO, MONum);
             return;
           }
-          // Make sure the register fits into its register bank if any.
+
+          // The gvreg must have a type and it must not have a SubIdx.
+          LLT Ty = MRI->getType(Reg);
+          if (!Ty.isValid()) {
+            report("Generic virtual register must have a valid type", MO,
+                   MONum);
+            return;
+          }
+
           const RegisterBank *RegBank = MRI->getRegBankOrNull(Reg);
-          if (RegBank && RegBank->getSize() < Size) {
+
+          // If we're post-RegBankSelect, the gvreg must have a bank.
+          if (!RegBank && isFunctionRegBankSelected) {
+            report("Generic virtual register must have a bank in a "
+                   "RegBankSelected function",
+                   MO, MONum);
+            return;
+          }
+
+          // Make sure the register fits into its register bank if any.
+          if (RegBank && Ty.isValid() &&
+              RegBank->getSize() < Ty.getSizeInBits()) {
             report("Register bank is too small for virtual register", MO,
                    MONum);
             errs() << "Register bank " << RegBank->getName() << " too small("
-                   << RegBank->getSize() << ") to fit " << Size << "-bits\n";
+                   << RegBank->getSize() << ") to fit " << Ty.getSizeInBits()
+                   << "-bits\n";
             return;
           }
           if (SubIdx)  {
-            report("Generic virtual register does not subregister index", MO, MONum);
+            report("Generic virtual register does not subregister index", MO,
+                   MONum);
+            return;
+          }
+
+          // If this is a target specific instruction and this operand
+          // has register class constraint, the virtual register must
+          // comply to it.
+          if (!isPreISelGenericOpcode(MCID.getOpcode()) &&
+              TII->getRegClass(MCID, MONum, TRI, *MF)) {
+            report("Virtual register does not match instruction constraint", MO,
+                   MONum);
+            errs() << "Expect register class "
+                   << TRI->getRegClassName(
+                          TII->getRegClass(MCID, MONum, TRI, *MF))
+                   << " but got nothing\n";
             return;
           }
+
           break;
         }
         if (SubIdx) {
@@ -1113,7 +1177,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
   LiveQueryResult LRQ = LR.Query(UseIdx);
   // Check if we have a segment at the use, note however that we only need one
   // live subregister range, the others may be dead.
-  if (!LRQ.valueIn() && LaneMask == 0) {
+  if (!LRQ.valueIn() && LaneMask.none()) {
     report("No live segment at use", MO, MONum);
     report_context_liverange(LR);
     report_context_vreg_regunit(VRegOrUnit);
@@ -1123,7 +1187,7 @@ void MachineVerifier::checkLivenessAtUse(const MachineOperand *MO,
     report("Live range continues after kill flag", MO, MONum);
     report_context_liverange(LR);
     report_context_vreg_regunit(VRegOrUnit);
-    if (LaneMask != 0)
+    if (LaneMask.any())
       report_context_lanemask(LaneMask);
     report_context(UseIdx);
   }
@@ -1138,7 +1202,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
       report("Inconsistent valno->def", MO, MONum);
       report_context_liverange(LR);
       report_context_vreg_regunit(VRegOrUnit);
-      if (LaneMask != 0)
+      if (LaneMask.any())
         report_context_lanemask(LaneMask);
       report_context(*VNI);
       report_context(DefIdx);
@@ -1147,7 +1211,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
     report("No live segment at def", MO, MONum);
     report_context_liverange(LR);
     report_context_vreg_regunit(VRegOrUnit);
-    if (LaneMask != 0)
+    if (LaneMask.any())
       report_context_lanemask(LaneMask);
     report_context(DefIdx);
   }
@@ -1177,7 +1241,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
         report("Live range continues after dead def flag", MO, MONum);
         report_context_liverange(LR);
         report_context_vreg_regunit(VRegOrUnit);
-        if (LaneMask != 0)
+        if (LaneMask.any())
           report_context_lanemask(LaneMask);
       }
     }
@@ -1199,7 +1263,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
     if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
         MO->isKill()) {
       LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
-      if (std::find(VI.Kills.begin(), VI.Kills.end(), MI) == VI.Kills.end())
+      if (!is_contained(VI.Kills, MI))
         report("Kill missing from LiveVariables", MO, MONum);
     }
 
@@ -1225,9 +1289,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
             LaneBitmask MOMask = SubRegIdx != 0
                                ? TRI->getSubRegIndexLaneMask(SubRegIdx)
                                : MRI->getMaxLaneMaskForVReg(Reg);
-            LaneBitmask LiveInMask = 0;
+            LaneBitmask LiveInMask;
             for (const LiveInterval::SubRange &SR : LI.subranges()) {
-              if ((MOMask & SR.LaneMask) == 0)
+              if ((MOMask & SR.LaneMask).none())
                 continue;
               checkLivenessAtUse(MO, MONum, UseIdx, SR, Reg, SR.LaneMask);
               LiveQueryResult LRQ = SR.Query(UseIdx);
@@ -1235,7 +1299,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
                 LiveInMask |= SR.LaneMask;
             }
             // At least parts of the register has to be live at the use.
-            if ((LiveInMask & MOMask) == 0) {
+            if ((LiveInMask & MOMask).none()) {
               report("No live subrange at use", MO, MONum);
               report_context(LI);
               report_context(UseIdx);
@@ -1327,7 +1391,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
               ? TRI->getSubRegIndexLaneMask(SubRegIdx)
               : MRI->getMaxLaneMaskForVReg(Reg);
             for (const LiveInterval::SubRange &SR : LI.subranges()) {
-              if ((SR.LaneMask & MOMask) == 0)
+              if ((SR.LaneMask & MOMask).none())
                 continue;
               checkLivenessAtDef(MO, MONum, DefIdx, SR, Reg, SR.LaneMask);
             }
@@ -1640,8 +1704,8 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
             !TRI->hasRegUnit(MOI->getReg(), Reg))
           continue;
       }
-      if (LaneMask != 0 &&
-          (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask) == 0)
+      if (LaneMask.any() &&
+          (TRI->getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
         continue;
       hasDef = true;
       if (MOI->isEarlyClobber())
@@ -1772,15 +1836,22 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
     for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
       if (!MOI->isReg() || MOI->getReg() != Reg)
         continue;
-      if (LaneMask != 0 &&
-          (LaneMask & TRI->getSubRegIndexLaneMask(MOI->getSubReg())) == 0)
-        continue;
+      unsigned Sub = MOI->getSubReg();
+      LaneBitmask SLM = Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
+                                 : LaneBitmask::getAll();
       if (MOI->isDef()) {
-        if (MOI->getSubReg() != 0)
+        if (Sub != 0) {
           hasSubRegDef = true;
+          // An operand vreg0:sub0<def> reads vreg0:sub1..n. Invert the lane
+          // mask for subregister defs. Read-undef defs will be handled by
+          // readsReg below.
+          SLM = ~SLM;
+        }
         if (MOI->isDead())
           hasDeadDef = true;
       }
+      if (LaneMask.any() && (LaneMask & SLM).none())
+        continue;
       if (MOI->readsReg())
         hasRead = true;
     }
@@ -1788,7 +1859,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       // Make sure that the corresponding machine operand for a "dead" live
       // range has the dead flag. We cannot perform this check for subregister
       // liveranges as partially dead values are allowed.
-      if (LaneMask == 0 && !hasDeadDef) {
+      if (LaneMask.none() && !hasDeadDef) {
         report("Instruction ending live segment on dead slot has no dead flag",
                MI);
         report_context(LR, Reg, LaneMask);
@@ -1798,7 +1869,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
       if (!hasRead) {
         // When tracking subregister liveness, the main range must start new
         // values on partial register writes, even if there is no read.
-        if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask != 0 ||
+        if (!MRI->shouldTrackSubRegLiveness(Reg) || LaneMask.any() ||
             !hasSubRegDef) {
           report("Instruction ending live segment doesn't read the register",
                  MI);
@@ -1842,7 +1913,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
 
       // All predecessors must have a live-out value if this is not a
       // subregister liverange.
-      if (!PVNI && LaneMask == 0) {
+      if (!PVNI && LaneMask.none()) {
         report("Register not marked live out of predecessor", *PI);
         report_context(LR, Reg, LaneMask);
         report_context(*VNI);
@@ -1882,14 +1953,14 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   verifyLiveRange(LI, Reg);
 
-  LaneBitmask Mask = 0;
+  LaneBitmask Mask;
   LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
   for (const LiveInterval::SubRange &SR : LI.subranges()) {
-    if ((Mask & SR.LaneMask) != 0) {
+    if ((Mask & SR.LaneMask).any()) {
       report("Lane masks of sub ranges overlap in live interval", MF);
       report_context(LI);
     }
-    if ((SR.LaneMask & ~MaxMask) != 0) {
+    if ((SR.LaneMask & ~MaxMask).any()) {
       report("Subrange lanemask is invalid", MF);
       report_context(LI);
     }
@@ -1950,11 +2021,11 @@ void MachineVerifier::verifyStackFrame() {
 
   SmallVector<StackStateOfBB, 8> SPState;
   SPState.resize(MF->getNumBlockIDs());
-  SmallPtrSet<const MachineBasicBlock*, 8> Reachable;
+  df_iterator_default_set<const MachineBasicBlock*> Reachable;
 
   // Visit the MBBs in DFS order.
   for (df_ext_iterator<const MachineFunction*,
-                       SmallPtrSet<const MachineBasicBlock*, 8> >
+                       df_iterator_default_set<const MachineBasicBlock*> >
        DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable);
        DFI != DFE; ++DFI) {
     const MachineBasicBlock *MBB = *DFI;
diff --git a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
index 0177e414f8d9..2a8531f337a0 100644
--- a/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
+++ b/contrib/llvm/lib/CodeGen/OptimizePHIs.cpp
@@ -184,7 +184,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
       for (InstrSetIterator PI = PHIsInCycle.begin(), PE = PHIsInCycle.end();
            PI != PE; ++PI) {
         MachineInstr *PhiMI = *PI;
-        if (&*MII == PhiMI)
+        if (MII == PhiMI)
           ++MII;
         PhiMI->eraseFromParent();
       }
diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
index b8d54315d148..c67a25b888bf 100644
--- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp
@@ -175,6 +175,8 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
   ImpDefs.clear();
   VRegPHIUseCount.clear();
 
+  MF.getProperties().set(MachineFunctionProperties::Property::NoPHIs);
+
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
index 4cabc3a8c1fd..4e67ff2e5088 100644
--- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.cpp
@@ -54,6 +54,7 @@ llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
     ++InsertPoint;
   }
 
-  // Make sure the copy goes after any phi nodes however.
+  // Make sure the copy goes after any phi nodes but before
+  // any debug nodes.
   return MBB->SkipPHIsAndLabels(InsertPoint);
 }
diff --git a/contrib/llvm/lib/CodeGen/ParallelCG.cpp b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
index ccdaec1bc180..50dd44fa659f 100644
--- a/contrib/llvm/lib/CodeGen/ParallelCG.cpp
+++ b/contrib/llvm/lib/CodeGen/ParallelCG.cpp
@@ -12,7 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/ParallelCG.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -78,7 +79,7 @@ std::unique_ptr<Module> llvm::splitCodeGen(
           CodegenThreadPool.async(
               [TMFactory, FileType, ThreadOS](const SmallString<0> &BC) {
                 LLVMContext Ctx;
-                ErrorOr<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+                Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
                     MemoryBufferRef(StringRef(BC.data(), BC.size()),
                                     "<split-module>"),
                     Ctx);
diff --git a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
index 32468c90b864..ad9166f1ed23 100644
--- a/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
+++ b/contrib/llvm/lib/CodeGen/PatchableFunction.cpp
@@ -32,7 +32,7 @@ struct PatchableFunction : public MachineFunctionPass {
   bool runOnMachineFunction(MachineFunction &F) override;
    MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 }
diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 60b27dd75a89..11af50fe577c 100644
--- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -70,17 +70,28 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
 #include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "peephole-opt"
@@ -118,6 +129,7 @@ STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
 STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
 
 namespace {
+
   class ValueTrackerResult;
 
   class PeepholeOptimizer : public MachineFunctionPass {
@@ -128,6 +140,7 @@ namespace {
 
   public:
     static char ID; // Pass identification
+
     PeepholeOptimizer() : MachineFunctionPass(ID) {
       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     }
@@ -390,10 +403,12 @@ namespace {
     /// register of the last source.
     unsigned getReg() const { return Reg; }
   };
-}
+
+} // end anonymous namespace
 
 char PeepholeOptimizer::ID = 0;
 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
+
 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
                 "Peephole Optimizations", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -737,6 +752,7 @@ insertPHI(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
 }
 
 namespace {
+
 /// \brief Helper class to rewrite the arguments of a copy-like instruction.
 class CopyRewriter {
 protected:
@@ -820,7 +836,6 @@ public:
                TargetInstrInfo::RegSubRegPair Def,
                PeepholeOptimizer::RewriteMapTy &RewriteMap,
                bool HandleMultipleSources = true) {
-
     TargetInstrInfo::RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
     do {
       ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
@@ -859,7 +874,7 @@ public:
       const MachineOperand &MODef = NewPHI->getOperand(0);
       return TargetInstrInfo::RegSubRegPair(MODef.getReg(), MODef.getSubReg());
 
-    } while (1);
+    } while (true);
 
     return TargetInstrInfo::RegSubRegPair(0, 0);
   }
@@ -1001,6 +1016,7 @@ public:
     TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
     return true;
   }
+
   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
     if (CurrentSrcIdx != 2)
       return false;
@@ -1141,7 +1157,8 @@ public:
     return true;
   }
 };
-} // End namespace.
+
+}  // end anonymous namespace
 
 /// \brief Get the appropriated CopyRewriter for \p MI.
 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
@@ -1523,11 +1540,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
       if (MI->isDebugValue())
           continue;
 
-      // If we run into an instruction we can't fold across, discard
-      // the load candidates.
-      if (MI->isLoadFoldBarrier())
-        FoldAsLoadDefCandidates.clear();
-
       if (MI->isPosition() || MI->isPHI())
         continue;
 
@@ -1571,7 +1583,6 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
         DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI
                      << '\n');
         NAPhysToVirtMIs.clear();
-        continue;
       }
 
       if ((isUncoalescableCopy(*MI) &&
@@ -1622,8 +1633,14 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
       // earlier load into MI.
       if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
           !FoldAsLoadDefCandidates.empty()) {
+
+        // We visit each operand even after successfully folding a previous
+        // one.  This allows us to fold multiple loads into a single
+        // instruction.  We do assume that optimizeLoadInstr doesn't insert
+        // foldable uses earlier in the argument list.  Since we don't restart
+        // iteration, we'd miss such cases.
         const MCInstrDesc &MIDesc = MI->getDesc();
-        for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
+        for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
              ++i) {
           const MachineOperand &MOp = MI->getOperand(i);
           if (!MOp.isReg())
@@ -1650,13 +1667,23 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
               MRI->markUsesInDebugValueAsUndef(FoldedReg);
               FoldAsLoadDefCandidates.erase(FoldedReg);
               ++NumLoadFold;
-              // MI is replaced with FoldMI.
+              
+              // MI is replaced with FoldMI so we can continue trying to fold
               Changed = true;
-              break;
+              MI = FoldMI;
             }
           }
         }
       }
+      
+      // If we run into an instruction we can't fold across, discard
+      // the load candidates.  Note: We might be able to fold *into* this
+      // instruction, so this needs to be after the folding logic.
+      if (MI->isLoadFoldBarrier()) {
+        DEBUG(dbgs() << "Encountered load fold barrier on " << *MI << "\n");
+        FoldAsLoadDefCandidates.clear();
+      }
+
     }
   }
 
@@ -1806,8 +1833,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
   // sub-register we are tracking.
   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
   if (!TRI ||
-      (TRI->getSubRegIndexLaneMask(DefSubReg) &
-       TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
+      !(TRI->getSubRegIndexLaneMask(DefSubReg) &
+        TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
     return ValueTrackerResult();
   // At this point, the value is available in v0 via the same subreg
   // we used for Def.
diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
index 3fce307f3dd4..6081916a6a82 100644
--- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
+++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp
@@ -98,7 +98,7 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
index 20a9a394ebd0..5fca7fa5536b 100644
--- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp
@@ -80,7 +80,7 @@ public:
   MachineFunctionProperties getRequiredProperties() const override {
     MachineFunctionProperties MFP;
     if (UsesCalleeSaves)
-      MFP.set(MachineFunctionProperties::Property::AllVRegsAllocated);
+      MFP.set(MachineFunctionProperties::Property::NoVRegs);
     return MFP;
   }
 
@@ -117,6 +117,10 @@ private:
   // TRI->requiresFrameIndexScavenging() for the current function.
   bool FrameIndexVirtualScavenging;
 
+  // Flag to control whether the scavenger should be passed even though
+  // FrameIndexVirtualScavenging is used.
+  bool FrameIndexEliminationScavenging;
+
   void calculateCallFrameInfo(MachineFunction &Fn);
   void calculateSaveRestoreBlocks(MachineFunction &Fn);
 
@@ -176,6 +180,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 
   RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : nullptr;
   FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
+  FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+    TRI->requiresFrameIndexReplacementScavenging(Fn);
 
   // Calculate the MaxCallFrameSize and AdjustsStack variables for the
   // function's frame information. Also eliminates call frame pseudo
@@ -221,8 +227,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   }
 
   // Warn on stack size when we exceeds the given limit.
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
-  uint64_t StackSize = MFI->getStackSize();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
+  uint64_t StackSize = MFI.getStackSize();
   if (WarnStackSize.getNumOccurrences() > 0 && WarnStackSize < StackSize) {
     DiagnosticInfoStackSize DiagStackSize(*F, StackSize);
     F->getContext().diagnose(DiagStackSize);
@@ -231,8 +237,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
   delete RS;
   SaveBlocks.clear();
   RestoreBlocks.clear();
-  MFI->setSavePoint(nullptr);
-  MFI->setRestorePoint(nullptr);
+  MFI.setSavePoint(nullptr);
+  MFI.setRestorePoint(nullptr);
   return true;
 }
 
@@ -242,10 +248,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
 void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
   const TargetInstrInfo &TII = *Fn.getSubtarget().getInstrInfo();
   const TargetFrameLowering *TFI = Fn.getSubtarget().getFrameLowering();
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   unsigned MaxCallFrameSize = 0;
-  bool AdjustsStack = MFI->adjustsStack();
+  bool AdjustsStack = MFI.adjustsStack();
 
   // Get the function call frame set-up and tear-down instruction opcode
   unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
@@ -274,8 +280,8 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
           AdjustsStack = true;
       }
 
-  MFI->setAdjustsStack(AdjustsStack);
-  MFI->setMaxCallFrameSize(MaxCallFrameSize);
+  MFI.setAdjustsStack(AdjustsStack);
+  MFI.setMaxCallFrameSize(MaxCallFrameSize);
 
   for (std::vector<MachineBasicBlock::iterator>::iterator
          i = FrameSDOps.begin(), e = FrameSDOps.end(); i != e; ++i) {
@@ -293,17 +299,17 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
 /// Compute the sets of entry and return blocks for saving and restoring
 /// callee-saved registers, and placing prolog and epilog code.
 void PEI::calculateSaveRestoreBlocks(MachineFunction &Fn) {
-  const MachineFrameInfo *MFI = Fn.getFrameInfo();
+  const MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   // Even when we do not change any CSR, we still want to insert the
   // prologue and epilogue of the function.
   // So set the save points for those.
 
   // Use the points found by shrink-wrapping, if any.
-  if (MFI->getSavePoint()) {
-    SaveBlocks.push_back(MFI->getSavePoint());
-    assert(MFI->getRestorePoint() && "Both restore and save must be set");
-    MachineBasicBlock *RestoreBlock = MFI->getRestorePoint();
+  if (MFI.getSavePoint()) {
+    SaveBlocks.push_back(MFI.getSavePoint());
+    assert(MFI.getRestorePoint() && "Both restore and save must be set");
+    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
     // If RestoreBlock does not have any successor and is not a return block
     // then the end point is unreachable and we do not need to insert any
     // epilogue.
@@ -340,7 +346,7 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
   }
 
   const TargetFrameLowering *TFI = F.getSubtarget().getFrameLowering();
-  MachineFrameInfo *MFI = F.getFrameInfo();
+  MachineFrameInfo &MFI = F.getFrameInfo();
   if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) {
     // If target doesn't implement this, use generic code.
 
@@ -379,26 +385,26 @@ static void assignCalleeSavedSpillSlots(MachineFunction &F,
         // the TargetRegisterClass if the stack alignment is smaller. Use the
         // min.
         Align = std::min(Align, StackAlign);
-        FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true);
+        FrameIdx = MFI.CreateStackObject(RC->getSize(), Align, true);
         if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx;
         if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
       } else {
         // Spill it to the stack where we must.
         FrameIdx =
-            MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
+            MFI.CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset);
       }
 
       CS.setFrameIdx(FrameIdx);
     }
   }
 
-  MFI->setCalleeSavedInfo(CSI);
+  MFI.setCalleeSavedInfo(CSI);
 }
 
 /// Helper function to update the liveness information for the callee-saved
 /// registers.
 static void updateLiveness(MachineFunction &MF) {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   // Visited will contain all the basic blocks that are in the region
   // where the callee saved registers are alive:
   // - Anything that is not Save or Restore -> LiveThrough.
@@ -409,7 +415,7 @@ static void updateLiveness(MachineFunction &MF) {
   SmallPtrSet<MachineBasicBlock *, 8> Visited;
   SmallVector<MachineBasicBlock *, 8> WorkList;
   MachineBasicBlock *Entry = &MF.front();
-  MachineBasicBlock *Save = MFI->getSavePoint();
+  MachineBasicBlock *Save = MFI.getSavePoint();
 
   if (!Save)
     Save = Entry;
@@ -420,7 +426,7 @@ static void updateLiveness(MachineFunction &MF) {
   }
   Visited.insert(Save);
 
-  MachineBasicBlock *Restore = MFI->getRestorePoint();
+  MachineBasicBlock *Restore = MFI.getRestorePoint();
   if (Restore)
     // By construction Restore cannot be visited, otherwise it
     // means there exists a path to Restore that does not go
@@ -440,7 +446,7 @@ static void updateLiveness(MachineFunction &MF) {
         WorkList.push_back(SuccBB);
   }
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     for (MachineBasicBlock *MBB : Visited) {
@@ -460,10 +466,10 @@ static void insertCSRSpillsAndRestores(MachineFunction &Fn,
                                        const MBBVector &SaveBlocks,
                                        const MBBVector &RestoreBlocks) {
   // Get callee saved register information.
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
-  MFI->setCalleeSavedInfoValid(true);
+  MFI.setCalleeSavedInfoValid(true);
 
   // Early exit if no callee saved registers are modified!
   if (CSI.empty())
@@ -551,14 +557,14 @@ static void doSpillCalleeSavedRegs(MachineFunction &Fn, RegScavenger *RS,
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
 static inline void
-AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign, unsigned Skew) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
-  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -569,11 +575,11 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 
   if (StackGrowsDown) {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
-    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+    MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
   } else {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
-    MFI->setObjectOffset(FrameIdx, Offset);
-    Offset += MFI->getObjectSize(FrameIdx);
+    MFI.setObjectOffset(FrameIdx, Offset);
+    Offset += MFI.getObjectSize(FrameIdx);
   }
 }
 
@@ -581,7 +587,7 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 /// track of them in StackBytesFree.
 ///
 static inline void
-computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
+computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
                       unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
                       int64_t FixedCSEnd, BitVector &StackBytesFree) {
   // Avoid undefined int64_t -> int conversion below in extreme case.
@@ -592,7 +598,7 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
 
   SmallVector<int, 16> AllocatedFrameSlots;
   // Add fixed objects.
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
+  for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
     AllocatedFrameSlots.push_back(i);
   // Add callee-save objects.
   for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
@@ -601,8 +607,8 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
   for (int i : AllocatedFrameSlots) {
     // These are converted from int64_t, but they should always fit in int
     // because of the FixedCSEnd check above.
-    int ObjOffset = MFI->getObjectOffset(i);
-    int ObjSize = MFI->getObjectSize(i);
+    int ObjOffset = MFI.getObjectOffset(i);
+    int ObjSize = MFI.getObjectSize(i);
     int ObjStart, ObjEnd;
     if (StackGrowsDown) {
       // ObjOffset is negative when StackGrowsDown is true.
@@ -621,10 +627,10 @@ computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
 /// Assign frame object to an unused portion of the stack in the fixed stack
 /// object range.  Return true if the allocation was successful.
 ///
-static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
+static inline bool scavengeStackSlot(MachineFrameInfo &MFI, int FrameIdx,
                                      bool StackGrowsDown, unsigned MaxAlign,
                                      BitVector &StackBytesFree) {
-  if (MFI->isVariableSizedObjectIndex(FrameIdx))
+  if (MFI.isVariableSizedObjectIndex(FrameIdx))
     return false;
 
   if (StackBytesFree.none()) {
@@ -634,11 +640,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
     return false;
   }
 
-  unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
+  unsigned ObjAlign = MFI.getObjectAlignment(FrameIdx);
   if (ObjAlign > MaxAlign)
     return false;
 
-  int64_t ObjSize = MFI->getObjectSize(FrameIdx);
+  int64_t ObjSize = MFI.getObjectSize(FrameIdx);
   int FreeStart;
   for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
        FreeStart = StackBytesFree.find_next(FreeStart)) {
@@ -668,11 +674,11 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
     int ObjStart = -(FreeStart + ObjSize);
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
                  << "]\n");
-    MFI->setObjectOffset(FrameIdx, ObjStart);
+    MFI.setObjectOffset(FrameIdx, ObjStart);
   } else {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
                  << "]\n");
-    MFI->setObjectOffset(FrameIdx, FreeStart);
+    MFI.setObjectOffset(FrameIdx, FreeStart);
   }
 
   StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
@@ -684,7 +690,7 @@ static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
 static void
 AssignProtectedObjSet(const StackObjSet &UnassignedObjs,
                       SmallSet<int, 16> &ProtectedObjs,
-                      MachineFrameInfo *MFI, bool StackGrowsDown,
+                      MachineFrameInfo &MFI, bool StackGrowsDown,
                       int64_t &Offset, unsigned &MaxAlign, unsigned Skew) {
 
   for (StackObjSet::const_iterator I = UnassignedObjs.begin(),
@@ -706,7 +712,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -725,17 +731,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // non-fixed objects can't be allocated right at the start of local area.
   // Adjust 'Offset' to point to the end of last fixed sized preallocated
   // object.
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+  for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
       // the object -- which is given by offset. For down growing stack
       // the offset is negative, so we negate the offset to get the distance.
-      FixedOff = -MFI->getObjectOffset(i);
+      FixedOff = -MFI.getObjectOffset(i);
     } else {
       // The maximum distance from the start pointer is at the upper
       // address of the object.
-      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+      FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i);
     }
     if (FixedOff > Offset) Offset = FixedOff;
   }
@@ -746,32 +752,32 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
       // If the stack grows down, we need to add the size to find the lowest
       // address of the object.
-      Offset += MFI->getObjectSize(i);
+      Offset += MFI.getObjectSize(i);
 
-      unsigned Align = MFI->getObjectAlignment(i);
+      unsigned Align = MFI.getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = alignTo(Offset, Align, Skew);
 
       DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << -Offset << "]\n");
-      MFI->setObjectOffset(i, -Offset);        // Set the computed offset
+      MFI.setObjectOffset(i, -Offset);        // Set the computed offset
     }
   } else if (MaxCSFrameIndex >= MinCSFrameIndex) {
     // Be careful about underflow in comparisons agains MinCSFrameIndex.
     for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
-      unsigned Align = MFI->getObjectAlignment(i);
+      unsigned Align = MFI.getObjectAlignment(i);
       // Adjust to alignment boundary
       Offset = alignTo(Offset, Align, Skew);
 
       DEBUG(dbgs() << "alloc FI(" << i << ") at SP[" << Offset << "]\n");
-      MFI->setObjectOffset(i, Offset);
-      Offset += MFI->getObjectSize(i);
+      MFI.setObjectOffset(i, Offset);
+      Offset += MFI.getObjectSize(i);
     }
   }
 
   // FixedCSEnd is the stack offset to the end of the fixed and callee-save
   // stack area.
   int64_t FixedCSEnd = Offset;
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned MaxAlign = MFI.getMaxAlignment();
 
   // Make sure the special register scavenging spill slot is closest to the
   // incoming stack pointer if a frame pointer is required and is closer
@@ -793,8 +799,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // check for whether the frame is large enough to want to use virtual
   // frame index registers. Functions which don't want/need this optimization
   // will continue to use the existing code path.
-  if (MFI->getUseLocalStackAllocationBlock()) {
-    unsigned Align = MFI->getLocalFrameMaxAlign();
+  if (MFI.getUseLocalStackAllocationBlock()) {
+    unsigned Align = MFI.getLocalFrameMaxAlign();
 
     // Adjust to alignment boundary.
     Offset = alignTo(Offset, Align, Skew);
@@ -802,15 +808,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
 
     // Resolve offsets for objects in the local block.
-    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
-      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+    for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
+      std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
       int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
       DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
             FIOffset << "]\n");
-      MFI->setObjectOffset(Entry.first, FIOffset);
+      MFI.setObjectOffset(Entry.first, FIOffset);
     }
     // Allocate the local block
-    Offset += MFI->getLocalFrameSize();
+    Offset += MFI.getLocalFrameSize();
 
     MaxAlign = std::max(Align, MaxAlign);
   }
@@ -823,30 +829,30 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // Make sure that the stack protector comes before the local variables on the
   // stack.
   SmallSet<int, 16> ProtectedObjs;
-  if (MFI->getStackProtectorIndex() >= 0) {
+  if (MFI.getStackProtectorIndex() >= 0) {
     StackObjSet LargeArrayObjs;
     StackObjSet SmallArrayObjs;
     StackObjSet AddrOfObjs;
 
-    AdjustStackOffset(MFI, MFI->getStackProtectorIndex(), StackGrowsDown,
+    AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
                       Offset, MaxAlign, Skew);
 
     // Assign large stack objects first.
-    for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-      if (MFI->isObjectPreAllocated(i) &&
-          MFI->getUseLocalStackAllocationBlock())
+    for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+      if (MFI.isObjectPreAllocated(i) &&
+          MFI.getUseLocalStackAllocationBlock())
         continue;
       if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
         continue;
       if (RS && RS->isScavengingFrameIndex((int)i))
         continue;
-      if (MFI->isDeadObjectIndex(i))
+      if (MFI.isDeadObjectIndex(i))
         continue;
-      if (MFI->getStackProtectorIndex() == (int)i ||
+      if (MFI.getStackProtectorIndex() == (int)i ||
           EHRegNodeFrameIndex == (int)i)
         continue;
 
-      switch (SP->getSSPLayout(MFI->getObjectAllocation(i))) {
+      switch (SP->getSSPLayout(MFI.getObjectAllocation(i))) {
       case StackProtector::SSPLK_None:
         continue;
       case StackProtector::SSPLK_SmallArray:
@@ -874,17 +880,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Then prepare to assign frame offsets to stack objects that are not used to
   // spill callee saved registers.
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isObjectPreAllocated(i) &&
-        MFI->getUseLocalStackAllocationBlock())
+  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+    if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
       continue;
     if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
       continue;
     if (RS && RS->isScavengingFrameIndex((int)i))
       continue;
-    if (MFI->isDeadObjectIndex(i))
+    if (MFI.isDeadObjectIndex(i))
       continue;
-    if (MFI->getStackProtectorIndex() == (int)i ||
+    if (MFI.getStackProtectorIndex() == (int)i ||
         EHRegNodeFrameIndex == (int)i)
       continue;
     if (ProtectedObjs.count(i))
@@ -911,7 +916,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
   BitVector StackBytesFree;
   if (!ObjectsToAllocate.empty() &&
       Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
-      MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
+      MFI.getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
     computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
                           FixedCSEnd, StackBytesFree);
 
@@ -935,8 +940,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
-      Offset += MFI->getMaxCallFrameSize();
+    if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn))
+      Offset += MFI.getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
     // any calls or alloca's, align to the target's StackAlignment value to
@@ -944,8 +949,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
-        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+    if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
@@ -958,7 +963,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Update frame info to pretend that this is part of the stack...
   int64_t StackSize = Offset - LocalAreaOffset;
-  MFI->setStackSize(StackSize);
+  MFI.setStackSize(StackSize);
   NumBytesStackSpace += StackSize;
 }
 
@@ -1009,7 +1014,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
   // Store SPAdj at exit of a basic block.
   SmallVector<int, 8> SPState;
   SPState.resize(Fn.getNumBlockIDs());
-  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  df_iterator_default_set<MachineBasicBlock*> Reachable;
 
   // Iterate over the reachable blocks in DFS order.
   for (auto DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable);
@@ -1047,7 +1052,8 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
   unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
   unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
 
-  if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(*BB);
+  if (RS && FrameIndexEliminationScavenging)
+    RS->enterBasicBlock(*BB);
 
   bool InsideCallSequence = false;
 
@@ -1116,7 +1122,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
       // use that target machine register info object to eliminate
       // it.
       TRI.eliminateFrameIndex(MI, SPAdj, i,
-                              FrameIndexVirtualScavenging ?  nullptr : RS);
+                              FrameIndexEliminationScavenging ?  RS : nullptr);
 
       // Reset the iterator if we were at the beginning of the BB.
       if (AtBeginning) {
@@ -1132,7 +1138,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
     // the SP adjustment made by each instruction in the sequence.
     // This includes both the frame setup/destroy pseudos (handled above),
     // as well as other instructions that have side effects w.r.t the SP.
-    // Note that this must come after eliminateFrameIndex, because 
+    // Note that this must come after eliminateFrameIndex, because
     // if I itself referred to a frame index, we shouldn't count its own
     // adjustment.
     if (DidFinishLoop && InsideCallSequence)
@@ -1141,7 +1147,7 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
     if (DoIncr && I != BB->end()) ++I;
 
     // Update register states.
-    if (RS && !FrameIndexVirtualScavenging && DidFinishLoop)
+    if (RS && FrameIndexEliminationScavenging && DidFinishLoop)
       RS->forward(MI);
   }
 }
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
index 93eeb9cba457..fb49a934431c 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp
@@ -41,7 +41,8 @@ static cl::opt<bool, true>
 VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
                cl::desc("Verify during register allocation"));
 
-const char RegAllocBase::TimerGroupName[] = "Register Allocation";
+const char RegAllocBase::TimerGroupName[] = "regalloc";
+const char RegAllocBase::TimerGroupDescription[] = "Register Allocation";
 bool RegAllocBase::VerifyEnabled = false;
 
 //===----------------------------------------------------------------------===//
@@ -67,7 +68,8 @@ void RegAllocBase::init(VirtRegMap &vrm,
 // register, unify them with the corresponding LiveIntervalUnion, otherwise push
 // them on the priority queue for later assignment.
 void RegAllocBase::seedLiveRegs() {
-  NamedRegionTimer T("Seed Live Regs", TimerGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
+                     TimerGroupDescription, TimePassesIsEnabled);
   for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
     unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
     if (MRI->reg_nodbg_empty(Reg))
@@ -143,6 +145,7 @@ void RegAllocBase::allocatePhysRegs() {
         continue;
       }
       DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+      assert(!SplitVirtReg->empty() && "expecting non-empty interval");
       assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
              "expect split value in virtual register");
       enqueue(SplitVirtReg);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h
index 296ffe8692c6..d8921b5ce6db 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBase.h
+++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h
@@ -105,6 +105,7 @@ protected:
 
   // Use this group name for NamedRegionTimer.
   static const char TimerGroupName[];
+  static const char TimerGroupDescription[];
 
   /// Method called when the allocator is about to remove a LiveInterval.
   virtual void aboutToRemoveInterval(LiveInterval &LI) {}
diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
index 11dfda67377f..a558e371ad4c 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp
@@ -76,9 +76,7 @@ public:
   RABasic();
 
   /// Return the pass name.
-  const char* getPassName() const override {
-    return "Basic Register Allocator";
-  }
+  StringRef getPassName() const override { return "Basic Register Allocator"; }
 
   /// RABasic analysis usage.
   void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -105,6 +103,11 @@ public:
   /// Perform register allocation.
   bool runOnMachineFunction(MachineFunction &mf) override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+
   // Helper for spilling all live virtual registers currently unified under preg
   // that interfere with the most recently queried lvr.  Return true if spilling
   // was successful, and append any new spilled/split intervals to splitLVRs.
diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
index 55fb33edd720..fd759bc372b2 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -149,18 +149,21 @@ namespace {
       spillImpossible = ~0u
     };
   public:
-    const char *getPassName() const override {
-      return "Fast Register Allocator";
-    }
+    StringRef getPassName() const override { return "Fast Register Allocator"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
+    MachineFunctionProperties getRequiredProperties() const override {
+      return MachineFunctionProperties().set(
+          MachineFunctionProperties::Property::NoPHIs);
+    }
+
     MachineFunctionProperties getSetProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -209,8 +212,8 @@ int RAFast::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
     return SS;          // Already has space allocated?
 
   // Allocate a new stack object for this spill location...
-  int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                            RC->getAlignment());
+  int FrameIdx = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
+                                                           RC->getAlignment());
 
   // Assign the slot.
   StackSlotForVirtReg[VirtReg] = FrameIdx;
@@ -360,7 +363,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
     break;
   case regReserved:
     PhysRegState[PhysReg] = regFree;
-    // Fall through
+    LLVM_FALLTHROUGH;
   case regFree:
     MO.setIsKill();
     return;
@@ -389,7 +392,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
       assert((TRI->isSuperRegister(PhysReg, Alias) ||
               TRI->isSuperRegister(Alias, PhysReg)) &&
              "Instruction is not using a subregister of a reserved register");
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case regFree:
       if (TRI->isSuperRegister(PhysReg, Alias)) {
         // Leave the superregister in the working set.
@@ -421,7 +424,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
     break;
   default:
     spillVirtReg(MI, VirtReg);
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case regFree:
   case regReserved:
     PhysRegState[PhysReg] = NewState;
@@ -437,7 +440,7 @@ void RAFast::definePhysReg(MachineInstr &MI, unsigned PhysReg,
       break;
     default:
       spillVirtReg(MI, VirtReg);
-      // Fall through.
+      LLVM_FALLTHROUGH;
     case regFree:
     case regReserved:
       PhysRegState[Alias] = regDisabled;
@@ -1093,8 +1096,6 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
   UsedInInstr.clear();
   UsedInInstr.setUniverse(TRI->getNumRegUnits());
 
-  assert(!MRI->isSSA() && "regalloc requires leaving SSA");
-
   // initialize the virtual->physical register map to have a 'null'
   // mapping for all virtual registers
   StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
index c4d4b1eadf3e..c47cfb1b986f 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -61,8 +61,7 @@ static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode(
     cl::desc("Spill mode for splitting live ranges"),
     cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"),
                clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"),
-               clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"),
-               clEnumValEnd),
+               clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed")),
     cl::init(SplitEditor::SM_Speed));
 
 static cl::opt<unsigned>
@@ -318,9 +317,7 @@ public:
   RAGreedy();
 
   /// Return the pass name.
-  const char* getPassName() const override {
-    return "Greedy Register Allocator";
-  }
+  StringRef getPassName() const override { return "Greedy Register Allocator"; }
 
   /// RAGreedy analysis usage.
   void getAnalysisUsage(AnalysisUsage &AU) const override;
@@ -334,6 +331,11 @@ public:
   /// Perform register allocation.
   bool runOnMachineFunction(MachineFunction &mf) override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+
   static char ID;
 
 private:
@@ -421,6 +423,24 @@ private:
 } // end anonymous namespace
 
 char RAGreedy::ID = 0;
+char &llvm::RAGreedyID = RAGreedy::ID;
+
+INITIALIZE_PASS_BEGIN(RAGreedy, "greedy",
+                "Greedy Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(SpillPlacement)
+INITIALIZE_PASS_END(RAGreedy, "greedy",
+                "Greedy Register Allocator", false, false)
 
 #ifndef NDEBUG
 const char *const RAGreedy::StageName[] = {
@@ -444,19 +464,6 @@ FunctionPass* llvm::createGreedyRegisterAllocator() {
 }
 
 RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
-  initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
-  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
-  initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
-  initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
-  initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
-  initializeLiveStacksPass(*PassRegistry::getPassRegistry());
-  initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
-  initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
-  initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
-  initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
-  initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
-  initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
 }
 
 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -639,6 +646,9 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
         evictInterference(VirtReg, Hint, NewVRegs);
         return Hint;
       }
+      // Record the missed hint, we may be able to recover
+      // at the end if the surrounding allocation changed.
+      SetOfBrokenHints.insert(&VirtReg);
     }
 
   // Try to evict interference from a cheaper alternative.
@@ -859,7 +869,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
                             AllocationOrder &Order,
                             SmallVectorImpl<unsigned> &NewVRegs,
                             unsigned CostPerUseLimit) {
-  NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
+                     TimePassesIsEnabled);
 
   // Keep track of the cheapest interference seen so far.
   EvictionCost BestCost;
@@ -1957,7 +1968,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
 
   // Local intervals are handled separately.
   if (LIS->intervalIsInOneMBB(VirtReg)) {
-    NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("local_split", "Local Splitting", TimerGroupName,
+                       TimerGroupDescription, TimePassesIsEnabled);
     SA->analyze(&VirtReg);
     unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs);
     if (PhysReg || !NewVRegs.empty())
@@ -1965,7 +1977,8 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
     return tryInstructionSplit(VirtReg, Order, NewVRegs);
   }
 
-  NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+  NamedRegionTimer T("global_split", "Global Splitting", TimerGroupName,
+                     TimerGroupDescription, TimePassesIsEnabled);
 
   SA->analyze(&VirtReg);
 
@@ -2103,6 +2116,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
   // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
   // this recoloring "session".
   FixedRegisters.insert(VirtReg.reg);
+  SmallVector<unsigned, 4> CurrentNewVRegs;
 
   Order.rewind();
   while (unsigned PhysReg = Order.next()) {
@@ -2110,6 +2124,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
                  << PrintReg(PhysReg, TRI) << '\n');
     RecoloringCandidates.clear();
     VirtRegToPhysReg.clear();
+    CurrentNewVRegs.clear();
 
     // It is only possible to recolor virtual register interference.
     if (Matrix->checkInterference(VirtReg, PhysReg) >
@@ -2154,8 +2169,11 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     // If we cannot recolor all the interferences, we will have to start again
     // at this point for the next physical register.
     SmallVirtRegSet SaveFixedRegisters(FixedRegisters);
-    if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters,
-                                Depth)) {
+    if (tryRecoloringCandidates(RecoloringQueue, CurrentNewVRegs,
+                                FixedRegisters, Depth)) {
+      // Push the queued vregs into the main queue.
+      for (unsigned NewVReg : CurrentNewVRegs)
+        NewVRegs.push_back(NewVReg);
       // Do not mess up with the global assignment process.
       // I.e., VirtReg must be unassigned.
       Matrix->unassign(VirtReg);
@@ -2169,6 +2187,18 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
     FixedRegisters = SaveFixedRegisters;
     Matrix->unassign(VirtReg);
 
+    // For a newly created vreg which is also in RecoloringCandidates,
+    // don't add it to NewVRegs because its physical register will be restored
+    // below. Other vregs in CurrentNewVRegs are created by calling
+    // selectOrSplit and should be added into NewVRegs.
+    for (SmallVectorImpl<unsigned>::iterator Next = CurrentNewVRegs.begin(),
+                                             End = CurrentNewVRegs.end();
+         Next != End; ++Next) {
+      if (RecoloringCandidates.count(&LIS->getInterval(*Next)))
+        continue;
+      NewVRegs.push_back(*Next);
+    }
+
     for (SmallLISet::iterator It = RecoloringCandidates.begin(),
                               EndIt = RecoloringCandidates.end();
          It != EndIt; ++It) {
@@ -2201,10 +2231,21 @@ bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue,
     DEBUG(dbgs() << "Try to recolor: " << *LI << '\n');
     unsigned PhysReg;
     PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1);
-    if (PhysReg == ~0u || !PhysReg)
+    // When splitting happens, the live-range may actually be empty.
+    // In that case, this is okay to continue the recoloring even
+    // if we did not find an alternative color for it. Indeed,
+    // there will not be anything to color for LI in the end.
+    if (PhysReg == ~0u || (!PhysReg && !LI->empty()))
       return false;
+
+    if (!PhysReg) {
+      assert(LI->empty() && "Only empty live-range do not require a register");
+      DEBUG(dbgs() << "Recoloring of " << *LI << " succeeded. Empty LI.\n");
+      continue;
+    }
     DEBUG(dbgs() << "Recoloring of " << *LI
                  << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n');
+
     Matrix->assign(*LI, PhysReg);
     FixedRegisters.insert(LI->reg);
   }
@@ -2519,7 +2560,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
       return PhysReg;
     }
 
-  assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+  assert((NewVRegs.empty() || Depth) && "Cannot append to existing NewVRegs");
 
   // The first time we see a live range, don't try to split or spill.
   // Wait until the second time, when all smaller ranges have been allocated.
@@ -2531,17 +2572,20 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     return 0;
   }
 
+  if (Stage < RS_Spill) {
+    // Try splitting VirtReg or interferences.
+    unsigned NewVRegSizeBefore = NewVRegs.size();
+    unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+    if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore))
+      return PhysReg;
+  }
+
   // If we couldn't allocate a register from spilling, there is probably some
   // invalid inline assembly. The base class wil report it.
   if (Stage >= RS_Done || !VirtReg.isSpillable())
     return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters,
                                    Depth);
 
-  // Try splitting VirtReg or interferences.
-  unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
-  if (PhysReg || !NewVRegs.empty())
-    return PhysReg;
-
   // Finally spill VirtReg itself.
   if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) {
     // TODO: This is experimental and in particular, we do not model
@@ -2552,7 +2596,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
     DEBUG(dbgs() << "Do as if this register is in memory\n");
     NewVRegs.push_back(VirtReg.reg);
   } else {
-    NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("spill", "Spiller", TimerGroupName,
+                       TimerGroupDescription, TimePassesIsEnabled);
     LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats);
     spiller().spill(LRE);
     setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done);
diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
index d1221ec59bd4..101b30bf3b65 100644
--- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
+++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp
@@ -99,9 +99,7 @@ public:
   }
 
   /// Return the pass name.
-  const char* getPassName() const override {
-    return "PBQP Register Allocator";
-  }
+  StringRef getPassName() const override { return "PBQP Register Allocator"; }
 
   /// PBQP analysis usage.
   void getAnalysisUsage(AnalysisUsage &au) const override;
@@ -109,6 +107,11 @@ public:
   /// Perform register allocation
   bool runOnMachineFunction(MachineFunction &MF) override;
 
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+
 private:
 
   typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
index 50b885423375..ece44c28e9ed 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -48,7 +48,7 @@ public:
     initializeRegUsageInfoCollectorPass(Registry);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Register Usage Information Collector Pass";
   }
 
@@ -57,10 +57,6 @@ public:
   bool runOnMachineFunction(MachineFunction &MF) override;
 
   static char ID;
-
-private:
-  void markRegClobbered(const TargetRegisterInfo *TRI, uint32_t *RegMask,
-                        unsigned PReg);
 };
 } // end of anonymous namespace
 
@@ -76,13 +72,6 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
   return new RegUsageInfoCollector();
 }
 
-void RegUsageInfoCollector::markRegClobbered(const TargetRegisterInfo *TRI,
-                                             uint32_t *RegMask, unsigned PReg) {
-  // If PReg is clobbered then all of its alias are also clobbered.
-  for (MCRegAliasIterator AI(PReg, TRI, true); AI.isValid(); ++AI)
-    RegMask[*AI / 32] &= ~(1u << *AI % 32);
-}
-
 void RegUsageInfoCollector::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<PhysicalRegisterUsageInfo>();
   AU.setPreservesAll();
@@ -116,7 +105,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
 
   for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
     if (MRI->isPhysRegModified(PReg, true))
-      markRegClobbered(TRI, &RegMask[0], PReg);
+      RegMask[PReg / 32] &= ~(1u << PReg % 32);
 
   if (!TargetFrameLowering::isSafeForNoCSROpt(F)) {
     const uint32_t *CallPreservedMask =
diff --git a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
index 759566147cc7..5cc35bfeca63 100644
--- a/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/contrib/llvm/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -52,7 +52,7 @@ public:
     initializeRegUsageInfoPropagationPassPass(Registry);
   }
 
-  const char *getPassName() const override { return RUIP_NAME; }
+  StringRef getPassName() const override { return RUIP_NAME; }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 617ece902e0e..0f4bb59c49a5 100644
--- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -815,14 +815,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
       for (LiveInterval::SubRange &SB : IntB.subranges()) {
         LaneBitmask BMask = SB.LaneMask;
         LaneBitmask Common = BMask & AMask;
-        if (Common == 0)
+        if (Common.none())
           continue;
 
         DEBUG( dbgs() << "\t\tCopy_Merge " << PrintLaneMask(BMask)
                       << " into " << PrintLaneMask(Common) << '\n');
         LaneBitmask BRest = BMask & ~AMask;
         LiveInterval::SubRange *CommonRange;
-        if (BRest != 0) {
+        if (BRest.any()) {
           SB.LaneMask = BRest;
           DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(BRest)
                        << '\n');
@@ -841,7 +841,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
         addSegmentsWithValNo(*CommonRange, BSubValNo, SA, ASubValNo);
         AMask &= ~BMask;
       }
-      if (AMask != 0) {
+      if (AMask.any()) {
         DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(AMask) << '\n');
         LiveRange *NewRange = IntB.createSubRange(Allocator, AMask);
         VNInfo *BSubValNo = NewRange->getNextValue(CopyIdx, Allocator);
@@ -975,6 +975,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
         NewRC = CommonRC;
         DstIdx = 0;
         DefMO.setSubReg(0);
+        DefMO.setIsUndef(false); // Only subregs can have def+undef.
       }
     }
   }
@@ -1060,7 +1061,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
           SR.createDeadDef(DefIndex, Alloc);
         MaxMask &= ~SR.LaneMask;
       }
-      if (MaxMask != 0) {
+      if (MaxMask.any()) {
         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
         SR->createDeadDef(DefIndex, Alloc);
       }
@@ -1153,7 +1154,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
-      if ((SR.LaneMask & SrcMask) == 0)
+      if ((SR.LaneMask & SrcMask).none())
         continue;
       if (SR.liveAt(Idx))
         return false;
@@ -1174,7 +1175,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     // The affected subregister segments can be removed.
     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
-      if ((SR.LaneMask & DstMask) == 0)
+      if ((SR.LaneMask & DstMask).none())
         continue;
 
       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
@@ -1193,10 +1194,10 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
     bool isLive;
-    if (UseMask != ~0u && DstLI.hasSubRanges()) {
+    if (!UseMask.all() && DstLI.hasSubRanges()) {
       isLive = false;
       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
-        if ((SR.LaneMask & UseMask) == 0)
+        if ((SR.LaneMask & UseMask).none())
           continue;
         if (SR.liveAt(UseIdx)) {
           isLive = true;
@@ -1210,6 +1211,17 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
     MO.setIsUndef(true);
     DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
   }
+
+  // A def of a subregister may be a use of the other subregisters, so
+  // deleting a def of a subregister may also remove uses. Since CopyMI
+  // is still part of the function (but about to be erased), mark all
+  // defs of DstReg in it as <undef>, so that shrinkToUses would
+  // ignore them.
+  for (MachineOperand &MO : CopyMI->operands())
+    if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
+      MO.setIsUndef(true);
+  LIS->shrinkToUses(&DstLI);
+
   return true;
 }
 
@@ -1220,7 +1232,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
     Mask = ~Mask;
   bool IsUndef = true;
   for (const LiveInterval::SubRange &S : Int.subranges()) {
-    if ((S.LaneMask & Mask) == 0)
+    if ((S.LaneMask & Mask).none())
       continue;
     if (S.liveAt(UseIdx)) {
       IsUndef = false;
@@ -1446,7 +1458,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
     });
   }
 
-  ShrinkMask = 0;
+  ShrinkMask = LaneBitmask::getNone();
   ShrinkMainRange = false;
 
   // Okay, attempt to join these two intervals.  On failure, this returns false.
@@ -1504,10 +1516,10 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
 
   // Shrink subregister ranges if necessary.
-  if (ShrinkMask != 0) {
+  if (ShrinkMask.any()) {
     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
     for (LiveInterval::SubRange &S : LI.subranges()) {
-      if ((S.LaneMask & ShrinkMask) == 0)
+      if ((S.LaneMask & ShrinkMask).none())
         continue;
       DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
                    << ")\n");
@@ -1558,11 +1570,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
 
   // Deny any overlapping intervals.  This depends on all the reserved
   // register live ranges to look like dead defs.
-  for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI)
-    if (RHS.overlaps(LIS->getRegUnit(*UI))) {
-      DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
-      return false;
+  if (!MRI->isConstantPhysReg(DstReg)) {
+    for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
+      // Abort if not all the regunits are reserved.
+      for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
+        if (!MRI->isReserved(*RI))
+          return false;
+      }
+      if (RHS.overlaps(LIS->getRegUnit(*UI))) {
+        DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n');
+        return false;
+      }
     }
+  }
 
   // Skip any value computations, we are not adding new values to the
   // reserved register.  Also skip merging the live ranges, the reserved
@@ -1584,24 +1604,26 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
     const SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
     const SlotIndex DestRegIdx = LIS->getInstructionIndex(*DestMI).getRegSlot();
 
-    // We checked above that there are no interfering defs of the physical
-    // register. However, for this case, where we intent to move up the def of
-    // the physical register, we also need to check for interfering uses.
-    SlotIndexes *Indexes = LIS->getSlotIndexes();
-    for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
-         SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
-      MachineInstr *MI = LIS->getInstructionFromIndex(SI);
-      if (MI->readsRegister(DstReg, TRI)) {
-        DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
-        return false;
-      }
-
-      // We must also check for clobbers caused by regmasks.
-      for (const auto &MO : MI->operands()) {
-        if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
-          DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+    if (!MRI->isConstantPhysReg(DstReg)) {
+      // We checked above that there are no interfering defs of the physical
+      // register. However, for this case, where we intent to move up the def of
+      // the physical register, we also need to check for interfering uses.
+      SlotIndexes *Indexes = LIS->getSlotIndexes();
+      for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
+           SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
+        MachineInstr *MI = LIS->getInstructionFromIndex(SI);
+        if (MI->readsRegister(DstReg, TRI)) {
+          DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
           return false;
         }
+
+        // We must also check for clobbers caused by regmasks.
+        for (const auto &MO : MI->operands()) {
+          if (MO.isRegMask() && MO.clobbersPhysReg(DstReg)) {
+            DEBUG(dbgs() << "\t\tInterference (regmask clobber): " << *MI);
+            return false;
+          }
+        }
       }
     }
 
@@ -1795,11 +1817,11 @@ class JoinVals {
     /// True once Pruned above has been computed.
     bool PrunedComputed;
 
-    Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0),
+    Val() : Resolution(CR_Keep), WriteLanes(), ValidLanes(),
             RedefVNI(nullptr), OtherVNI(nullptr), ErasableImplicitDef(false),
             Pruned(false), PrunedComputed(false) {}
 
-    bool isAnalyzed() const { return WriteLanes != 0; }
+    bool isAnalyzed() const { return WriteLanes.any(); }
   };
 
   /// One entry per value number in LI.
@@ -1889,12 +1911,22 @@ public:
   /// no useful information and can be removed.
   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
 
+  /// Pruning values in subranges can lead to removing segments in these
+  /// subranges started by IMPLICIT_DEFs. The corresponding segments in
+  /// the main range also need to be removed. This function will mark
+  /// the corresponding values in the main range as pruned, so that
+  /// eraseInstrs can do the final cleanup.
+  /// The parameter @p LI must be the interval whose main range is the
+  /// live range LR.
+  void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
+
   /// Erase any machine instructions that have been coalesced away.
   /// Add erased instructions to ErasedInstrs.
   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
   /// the erased instrs.
   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
-                   SmallVectorImpl<unsigned> &ShrinkRegs);
+                   SmallVectorImpl<unsigned> &ShrinkRegs,
+                   LiveInterval *LI = nullptr);
 
   /// Remove liverange defs at places where implicit defs will be removed.
   void removeImplicitDefs();
@@ -1906,7 +1938,7 @@ public:
 
 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   const {
-  LaneBitmask L = 0;
+  LaneBitmask L;
   for (const MachineOperand &MO : DefMI->operands()) {
     if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
       continue;
@@ -1944,7 +1976,7 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
       for (const LiveInterval::SubRange &S : LI.subranges()) {
         // Transform lanemask to a mask in the joined live interval.
         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
-        if ((SMask & LaneMask) == 0)
+        if ((SMask & LaneMask).none())
           continue;
         LiveQueryResult LRQ = S.Query(Def);
         ValueIn = LRQ.valueIn();
@@ -1984,7 +2016,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   assert(!V.isAnalyzed() && "Value has already been analyzed!");
   VNInfo *VNI = LR.getValNumInfo(ValNo);
   if (VNI->isUnused()) {
-    V.WriteLanes = ~0u;
+    V.WriteLanes = LaneBitmask::getAll();
     return CR_Keep;
   }
 
@@ -1992,16 +2024,17 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   const MachineInstr *DefMI = nullptr;
   if (VNI->isPHIDef()) {
     // Conservatively assume that all lanes in a PHI are valid.
-    LaneBitmask Lanes = SubRangeJoin ? 1 : TRI->getSubRegIndexLaneMask(SubIdx);
+    LaneBitmask Lanes = SubRangeJoin ? LaneBitmask(1)
+                                     : TRI->getSubRegIndexLaneMask(SubIdx);
     V.ValidLanes = V.WriteLanes = Lanes;
   } else {
     DefMI = Indexes->getInstructionFromIndex(VNI->def);
     assert(DefMI != nullptr);
     if (SubRangeJoin) {
       // We don't care about the lanes when joining subregister ranges.
-      V.WriteLanes = V.ValidLanes = 1;
+      V.WriteLanes = V.ValidLanes = LaneBitmask(1);
       if (DefMI->isImplicitDef()) {
-        V.ValidLanes = 0;
+        V.ValidLanes = LaneBitmask::getNone();
         V.ErasableImplicitDef = true;
       }
     } else {
@@ -2074,7 +2107,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     // predecessor, the PHI itself can't introduce any conflicts.
     if (VNI->isPHIDef())
       return CR_Merge;
-    if (V.ValidLanes & OtherV.ValidLanes)
+    if ((V.ValidLanes & OtherV.ValidLanes).any())
       // Overlapping lanes can't be resolved.
       return CR_Impossible;
     else
@@ -2119,7 +2152,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
     // We need the def for the subregister if there is nothing else live at the
     // subrange at this point.
     if (TrackSubRegLiveness
-        && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)) == 0)
+        && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
       return CR_Replace;
     return CR_Erase;
   }
@@ -2159,7 +2192,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   //
   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
   // handles this complex value mapping.
-  if ((V.WriteLanes & OtherV.ValidLanes) == 0)
+  if ((V.WriteLanes & OtherV.ValidLanes).none())
     return CR_Replace;
 
   // If the other live range is killed by DefMI and the live ranges are still
@@ -2180,7 +2213,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   // possibility that no instructions actually read the clobbered lanes.
   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
   // Otherwise Other.RI wouldn't be live here.
-  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0)
+  if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
     return CR_Impossible;
 
   // We need to verify that no instructions are reading the clobbered lanes. To
@@ -2228,11 +2261,11 @@ void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
     Val &OtherV = Other.Vals[V.OtherVNI->id];
     // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
     // its lanes.
-    if ((OtherV.WriteLanes & ~V.ValidLanes) != 0 && TrackSubRegLiveness)
+    if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
       OtherV.ErasableImplicitDef = false;
     OtherV.Pruned = true;
+    LLVM_FALLTHROUGH;
   }
-    // Fall through.
   default:
     // This value number needs to go in the final joined live range.
     Assignments[ValNo] = NewVNInfo.size();
@@ -2289,7 +2322,7 @@ taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
     TaintedLanes &= ~OV.WriteLanes;
     if (!OV.RedefVNI)
       break;
-  } while (TaintedLanes);
+  } while (TaintedLanes.any());
   return true;
 }
 
@@ -2302,8 +2335,8 @@ bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
       continue;
     if (!MO.readsReg())
       continue;
-    if (Lanes & TRI->getSubRegIndexLaneMask(
-                  TRI->composeSubRegIndices(SubIdx, MO.getSubReg())))
+    unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
+    if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
       return true;
   }
   return false;
@@ -2350,7 +2383,7 @@ bool JoinVals::resolveConflicts(JoinVals &Other) {
       Indexes->getInstructionFromIndex(TaintExtent.front().first);
     assert(LastMI && "Range must end at a proper instruction");
     unsigned TaintNum = 0;
-    for(;;) {
+    for (;;) {
       assert(MI != MBB->end() && "Bad LastMI");
       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
         DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
@@ -2415,7 +2448,8 @@ void JoinVals::pruneValues(JoinVals &Other,
           for (MachineOperand &MO :
                Indexes->getInstructionFromIndex(Def)->operands()) {
             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
-              MO.setIsUndef(EraseImpDef);
+              if (MO.getSubReg() != 0)
+                MO.setIsUndef(EraseImpDef);
               MO.setIsDead(false);
             }
           }
@@ -2448,8 +2482,7 @@ void JoinVals::pruneValues(JoinVals &Other,
   }
 }
 
-void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
-{
+void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
   // Look for values being erased.
   bool DidPrune = false;
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
@@ -2486,6 +2519,30 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask)
     LI.removeEmptySubRanges();
 }
 
+/// Check if any of the subranges of @p LI contain a definition at @p Def.
+static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
+  for (LiveInterval::SubRange &SR : LI.subranges()) {
+    if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
+      if (VNI->def == Def)
+        return true;
+  }
+  return false;
+}
+
+void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
+  assert(&static_cast<LiveRange&>(LI) == &LR);
+
+  for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
+    if (Vals[i].Resolution != CR_Keep)
+      continue;
+    VNInfo *VNI = LR.getValNumInfo(i);
+    if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
+      continue;
+    Vals[i].Pruned = true;
+    ShrinkMainRange = true;
+  }
+}
+
 void JoinVals::removeImplicitDefs() {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     Val &V = Vals[i];
@@ -2499,7 +2556,8 @@ void JoinVals::removeImplicitDefs() {
 }
 
 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
-                           SmallVectorImpl<unsigned> &ShrinkRegs) {
+                           SmallVectorImpl<unsigned> &ShrinkRegs,
+                           LiveInterval *LI) {
   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
     // Get the def location before markUnused() below invalidates it.
     SlotIndex Def = LR.getValNumInfo(i)->def;
@@ -2511,13 +2569,65 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
         break;
       // Remove value number i from LR.
+      // For intervals with subranges, removing a segment from the main range
+      // may require extending the previous segment: for each definition of
+      // a subregister, there will be a corresponding def in the main range.
+      // That def may fall in the middle of a segment from another subrange.
+      // In such cases, removing this def from the main range must be
+      // complemented by extending the main range to account for the liveness
+      // of the other subrange.
       VNInfo *VNI = LR.getValNumInfo(i);
+      SlotIndex Def = VNI->def;
+      // The new end point of the main range segment to be extended.
+      SlotIndex NewEnd;
+      if (LI != nullptr) {
+        LiveRange::iterator I = LR.FindSegmentContaining(Def);
+        assert(I != LR.end());
+        // Do not extend beyond the end of the segment being removed.
+        // The segment may have been pruned in preparation for joining
+        // live ranges.
+        NewEnd = I->end;
+      }
+
       LR.removeValNo(VNI);
       // Note that this VNInfo is reused and still referenced in NewVNInfo,
       // make it appear like an unused value number.
       VNI->markUnused();
-      DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n');
-      // FALL THROUGH.
+
+      if (LI != nullptr && LI->hasSubRanges()) {
+        assert(static_cast<LiveRange*>(LI) == &LR);
+        // Determine the end point based on the subrange information:
+        // minimum of (earliest def of next segment,
+        //             latest end point of containing segment)
+        SlotIndex ED, LE;
+        for (LiveInterval::SubRange &SR : LI->subranges()) {
+          LiveRange::iterator I = SR.find(Def);
+          if (I == SR.end())
+            continue;
+          if (I->start > Def)
+            ED = ED.isValid() ? std::min(ED, I->start) : I->start;
+          else
+            LE = LE.isValid() ? std::max(LE, I->end) : I->end;
+        }
+        if (LE.isValid())
+          NewEnd = std::min(NewEnd, LE);
+        if (ED.isValid())
+          NewEnd = std::min(NewEnd, ED);
+
+        // We only want to do the extension if there was a subrange that
+        // was live across Def.
+        if (LE.isValid()) {
+          LiveRange::iterator S = LR.find(Def);
+          if (S != LR.begin())
+            std::prev(S)->end = NewEnd;
+        }
+      }
+      DEBUG({
+        dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
+        if (LI != nullptr)
+          dbgs() << "\t\t  LHS = " << *LI << '\n';
+      });
+      LLVM_FALLTHROUGH;
     }
 
     case CR_Erase: {
@@ -2591,8 +2701,15 @@ void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
 
   // Recompute the parts of the live range we had to remove because of
   // CR_Replace conflicts.
-  DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
-               << " points: " << LRange << '\n');
+  DEBUG({
+    dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+    for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+      dbgs() << EndPoints[i];
+      if (i != n-1)
+        dbgs() << ',';
+    }
+    dbgs() << ":  " << LRange << '\n';
+  });
   LIS->extendToIndices(LRange, EndPoints);
 }
 
@@ -2606,7 +2723,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     // LaneMask of subregisters common to subrange R and ToMerge.
     LaneBitmask Common = RMask & LaneMask;
     // There is nothing to do without common subregs.
-    if (Common == 0)
+    if (Common.none())
       continue;
 
     DEBUG(dbgs() << "\t\tCopy+Merge " << PrintLaneMask(RMask) << " into "
@@ -2615,7 +2732,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     // they have to split into their own subrange.
     LaneBitmask LRest = RMask & ~LaneMask;
     LiveInterval::SubRange *CommonRange;
-    if (LRest != 0) {
+    if (LRest.any()) {
       R.LaneMask = LRest;
       DEBUG(dbgs() << "\t\tReduce Lane to " << PrintLaneMask(LRest) << '\n');
       // Duplicate SubRange for newly merged common stuff.
@@ -2630,7 +2747,7 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
     LaneMask &= ~RMask;
   }
 
-  if (LaneMask != 0) {
+  if (LaneMask.any()) {
     DEBUG(dbgs() << "\t\tNew Lane " << PrintLaneMask(LaneMask) << '\n');
     LI.createSubRangeFrom(Allocator, LaneMask, ToMerge);
   }
@@ -2641,10 +2758,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
-  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), 0, NewVNInfo, CP, LIS,
-                   TRI, false, TrackSubRegLiveness);
-  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), 0, NewVNInfo, CP, LIS,
-                   TRI, false, TrackSubRegLiveness);
+  JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
+                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
+  JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
+                   NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
 
   DEBUG(dbgs() << "\t\tRHS = " << RHS
                << "\n\t\tLHS = " << LHS
@@ -2670,7 +2787,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
                                      : TRI->getSubRegIndexLaneMask(DstIdx);
       // LHS must support subregs or we wouldn't be in this codepath.
-      assert(Mask != 0);
+      assert(Mask.any());
       LHS.createSubRangeFrom(Allocator, Mask, LHS);
     } else if (DstIdx != 0) {
       // Transform LHS lanemasks to new register class if necessary.
@@ -2697,6 +2814,10 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
     }
     DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
 
+    // Pruning implicit defs from subranges may result in the main range
+    // having stale segments.
+    LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
+
     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   }
@@ -2712,7 +2833,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
   // registers to require trimming.
   SmallVector<unsigned, 8> ShrinkRegs;
-  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
+  LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   while (!ShrinkRegs.empty())
     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
@@ -2729,8 +2850,15 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   if (!EndPoints.empty()) {
     // Recompute the parts of the live range we had to remove because of
     // CR_Replace conflicts.
-    DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size()
-                 << " points: " << LHS << '\n');
+    DEBUG({
+      dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
+      for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
+        dbgs() << EndPoints[i];
+        if (i != n-1)
+          dbgs() << ',';
+      }
+      dbgs() << ":  " << LHS << '\n';
+    });
     LIS->extendToIndices((LiveRange&)LHS, EndPoints);
   }
 
@@ -3039,7 +3167,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
           // If subranges are still supported, then the same subregs
           // should still be supported.
           for (LiveInterval::SubRange &S : LI.subranges()) {
-            assert((S.LaneMask & ~MaxMask) == 0);
+            assert((S.LaneMask & ~MaxMask).none());
           }
 #endif
         }
diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
index a21d6c1d4d63..fc84aebb14d7 100644
--- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp
@@ -26,8 +26,8 @@ using namespace llvm;
 static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 const MachineRegisterInfo &MRI, unsigned Reg,
                                 LaneBitmask PrevMask, LaneBitmask NewMask) {
-  assert((PrevMask & ~NewMask) == 0 && "Must not remove bits");
-  if (PrevMask != 0 || NewMask == 0)
+  assert((PrevMask & ~NewMask).none() && "Must not remove bits");
+  if (PrevMask.any() || NewMask.none())
     return;
 
   PSetIterator PSetI = MRI.getPressureSets(Reg);
@@ -40,8 +40,8 @@ static void increaseSetPressure(std::vector<unsigned> &CurrSetPressure,
 static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure,
                                 const MachineRegisterInfo &MRI, unsigned Reg,
                                 LaneBitmask PrevMask, LaneBitmask NewMask) {
-  assert((NewMask & !PrevMask) == 0 && "Must not add bits");
-  if (NewMask != 0 || PrevMask == 0)
+  //assert((NewMask & !PrevMask) == 0 && "Must not add bits");
+  if (NewMask.any() || PrevMask.none())
     return;
 
   PSetIterator PSetI = MRI.getPressureSets(Reg);
@@ -73,7 +73,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Live In: ";
   for (const RegisterMaskPair &P : LiveInRegs) {
     dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
-    if (P.LaneMask != ~0u)
+    if (!P.LaneMask.all())
       dbgs() << ':' << PrintLaneMask(P.LaneMask);
     dbgs() << ' ';
   }
@@ -81,7 +81,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const {
   dbgs() << "Live Out: ";
   for (const RegisterMaskPair &P : LiveOutRegs) {
     dbgs() << PrintVRegOrUnit(P.RegUnit, TRI);
-    if (P.LaneMask != ~0u)
+    if (!P.LaneMask.all())
       dbgs() << ':' << PrintLaneMask(P.LaneMask);
     dbgs() << ' ';
   }
@@ -112,7 +112,7 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
 void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
                                              LaneBitmask PreviousMask,
                                              LaneBitmask NewMask) {
-  if (PreviousMask != 0 || NewMask == 0)
+  if (PreviousMask.any() || NewMask.none())
     return;
 
   PSetIterator PSetI = MRI->getPressureSets(RegUnit);
@@ -266,9 +266,8 @@ bool RegPressureTracker::isBottomClosed() const {
 
 
 SlotIndex RegPressureTracker::getCurrSlot() const {
-  MachineBasicBlock::const_iterator IdxPos = CurrPos;
-  while (IdxPos != MBB->end() && IdxPos->isDebugValue())
-    ++IdxPos;
+  MachineBasicBlock::const_iterator IdxPos =
+    skipDebugInstructionsForward(CurrPos, MBB->end());
   if (IdxPos == MBB->end())
     return LIS->getMBBEndIdx(MBB);
   return LIS->getInstructionIndex(*IdxPos).getRegSlot();
@@ -322,29 +321,28 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
     unsigned RegUnit = Pair.RegUnit;
     if (TargetRegisterInfo::isVirtualRegister(RegUnit)
         && !RPTracker.hasUntiedDef(RegUnit))
-      increaseSetPressure(LiveThruPressure, *MRI, RegUnit, 0, Pair.LaneMask);
+      increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
+                          LaneBitmask::getNone(), Pair.LaneMask);
   }
 }
 
 static LaneBitmask getRegLanes(ArrayRef<RegisterMaskPair> RegUnits,
                                unsigned RegUnit) {
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                        return Other.RegUnit == RegUnit;
-                        });
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I == RegUnits.end())
-    return 0;
+    return LaneBitmask::getNone();
   return I->LaneMask;
 }
 
 static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
                         RegisterMaskPair Pair) {
   unsigned RegUnit = Pair.RegUnit;
-  assert(Pair.LaneMask != 0);
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  assert(Pair.LaneMask.any());
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I == RegUnits.end()) {
     RegUnits.push_back(Pair);
   } else {
@@ -354,28 +352,26 @@ static void addRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
 
 static void setRegZero(SmallVectorImpl<RegisterMaskPair> &RegUnits,
                        unsigned RegUnit) {
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I == RegUnits.end()) {
-    RegUnits.push_back(RegisterMaskPair(RegUnit, 0));
+    RegUnits.push_back(RegisterMaskPair(RegUnit, LaneBitmask::getNone()));
   } else {
-    I->LaneMask = 0;
+    I->LaneMask = LaneBitmask::getNone();
   }
 }
 
 static void removeRegLanes(SmallVectorImpl<RegisterMaskPair> &RegUnits,
                            RegisterMaskPair Pair) {
   unsigned RegUnit = Pair.RegUnit;
-  assert(Pair.LaneMask != 0);
-  auto I = std::find_if(RegUnits.begin(), RegUnits.end(),
-                        [RegUnit](const RegisterMaskPair Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  assert(Pair.LaneMask.any());
+  auto I = find_if(RegUnits, [RegUnit](const RegisterMaskPair Other) {
+    return Other.RegUnit == RegUnit;
+  });
   if (I != RegUnits.end()) {
     I->LaneMask &= ~Pair.LaneMask;
-    if (I->LaneMask == 0)
+    if (I->LaneMask.none())
       RegUnits.erase(I);
   }
 }
@@ -386,14 +382,15 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
     bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
   if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
     const LiveInterval &LI = LIS.getInterval(RegUnit);
-    LaneBitmask Result = 0;
+    LaneBitmask Result;
     if (TrackLaneMasks && LI.hasSubRanges()) {
         for (const LiveInterval::SubRange &SR : LI.subranges()) {
           if (Property(SR, Pos))
             Result |= SR.LaneMask;
         }
     } else if (Property(LI, Pos)) {
-      Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit) : ~0u;
+      Result = TrackLaneMasks ? MRI.getMaxLaneMaskForVReg(RegUnit)
+                              : LaneBitmask::getAll();
     }
 
     return Result;
@@ -403,7 +400,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
     // for physical registers on targets with many registers (GPUs).
     if (LR == nullptr)
       return SafeDefault;
-    return Property(*LR, Pos) ? ~0u : 0;
+    return Property(*LR, Pos) ? LaneBitmask::getAll() : LaneBitmask::getNone();
   }
 }
 
@@ -411,7 +408,8 @@ static LaneBitmask getLiveLanesAt(const LiveIntervals &LIS,
                                   const MachineRegisterInfo &MRI,
                                   bool TrackLaneMasks, unsigned RegUnit,
                                   SlotIndex Pos) {
-  return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+  return getLanesWithProperty(LIS, MRI, TrackLaneMasks, RegUnit, Pos,
+                              LaneBitmask::getAll(),
                               [](const LiveRange &LR, SlotIndex Pos) {
                                 return LR.liveAt(Pos);
                               });
@@ -478,10 +476,10 @@ class RegisterOperandsCollector {
   void pushReg(unsigned Reg,
                SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
     if (TargetRegisterInfo::isVirtualRegister(Reg)) {
-      addRegLanes(RegUnits, RegisterMaskPair(Reg, ~0u));
+      addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
     } else if (MRI.isAllocatable(Reg)) {
       for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-        addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+        addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
     }
   }
 
@@ -516,7 +514,7 @@ class RegisterOperandsCollector {
       addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneMask));
     } else if (MRI.isAllocatable(Reg)) {
       for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
-        addRegLanes(RegUnits, RegisterMaskPair(*Units, ~0u));
+        addRegLanes(RegUnits, RegisterMaskPair(*Units, LaneBitmask::getAll()));
     }
   }
 
@@ -567,11 +565,11 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
     // of a subregister def we need a read-undef flag.
     unsigned RegUnit = I->RegUnit;
     if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
-        AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask) == 0)
+        AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
       AddFlagsMI->setRegisterDefReadUndef(RegUnit);
 
     LaneBitmask ActualDef = I->LaneMask & LiveAfter;
-    if (ActualDef == 0) {
+    if (ActualDef.none()) {
       I = Defs.erase(I);
     } else {
       I->LaneMask = ActualDef;
@@ -582,7 +580,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
     LaneBitmask LiveBefore = getLiveLanesAt(LIS, MRI, true, I->RegUnit,
                                             Pos.getBaseIndex());
     LaneBitmask LaneMask = I->LaneMask & LiveBefore;
-    if (LaneMask == 0) {
+    if (LaneMask.none()) {
       I = Uses.erase(I);
     } else {
       I->LaneMask = LaneMask;
@@ -596,7 +594,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
         continue;
       LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
                                              Pos.getDeadSlot());
-      if (LiveAfter == 0)
+      if (LiveAfter.none())
         AddFlagsMI->setRegisterDefReadUndef(RegUnit);
     }
   }
@@ -673,17 +671,16 @@ void RegPressureTracker::addLiveRegs(ArrayRef<RegisterMaskPair> Regs) {
 
 void RegPressureTracker::discoverLiveInOrOut(RegisterMaskPair Pair,
     SmallVectorImpl<RegisterMaskPair> &LiveInOrOut) {
-  assert(Pair.LaneMask != 0);
+  assert(Pair.LaneMask.any());
 
   unsigned RegUnit = Pair.RegUnit;
-  auto I = std::find_if(LiveInOrOut.begin(), LiveInOrOut.end(),
-                        [RegUnit](const RegisterMaskPair &Other) {
-                          return Other.RegUnit == RegUnit;
-                        });
+  auto I = find_if(LiveInOrOut, [RegUnit](const RegisterMaskPair &Other) {
+    return Other.RegUnit == RegUnit;
+  });
   LaneBitmask PrevMask;
   LaneBitmask NewMask;
   if (I == LiveInOrOut.end()) {
-    PrevMask = 0;
+    PrevMask = LaneBitmask::getNone();
     NewMask = Pair.LaneMask;
     LiveInOrOut.push_back(Pair);
   } else {
@@ -738,14 +735,15 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
     LaneBitmask NewMask = PreviousMask & ~Def.LaneMask;
 
     LaneBitmask LiveOut = Def.LaneMask & ~PreviousMask;
-    if (LiveOut != 0) {
+    if (LiveOut.any()) {
       discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
       // Retroactively model effects on pressure of the live out lanes.
-      increaseSetPressure(CurrSetPressure, *MRI, Reg, 0, LiveOut);
+      increaseSetPressure(CurrSetPressure, *MRI, Reg, LaneBitmask::getNone(),
+                          LiveOut);
       PreviousMask = LiveOut;
     }
 
-    if (NewMask == 0) {
+    if (NewMask.none()) {
       // Add a 0 entry to LiveUses as a marker that the complete vreg has become
       // dead.
       if (TrackLaneMasks && LiveUses != nullptr)
@@ -762,26 +760,25 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
   // Generate liveness for uses.
   for (const RegisterMaskPair &Use : RegOpers.Uses) {
     unsigned Reg = Use.RegUnit;
-    assert(Use.LaneMask != 0);
+    assert(Use.LaneMask.any());
     LaneBitmask PreviousMask = LiveRegs.insert(Use);
     LaneBitmask NewMask = PreviousMask | Use.LaneMask;
     if (NewMask == PreviousMask)
       continue;
 
     // Did the register just become live?
-    if (PreviousMask == 0) {
+    if (PreviousMask.none()) {
       if (LiveUses != nullptr) {
         if (!TrackLaneMasks) {
           addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
         } else {
-          auto I = std::find_if(LiveUses->begin(), LiveUses->end(),
-                                [Reg](const RegisterMaskPair Other) {
-                                return Other.RegUnit == Reg;
-                                });
+          auto I = find_if(*LiveUses, [Reg](const RegisterMaskPair Other) {
+            return Other.RegUnit == Reg;
+          });
           bool IsRedef = I != LiveUses->end();
           if (IsRedef) {
             // ignore re-defs here...
-            assert(I->LaneMask == 0);
+            assert(I->LaneMask.none());
             removeRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
           } else {
             addRegLanes(*LiveUses, RegisterMaskPair(Reg, NewMask));
@@ -792,7 +789,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
       // Discover live outs if this may be the first occurance of this register.
       if (RequireIntervals) {
         LaneBitmask LiveOut = getLiveThroughAt(Reg, SlotIdx);
-        if (LiveOut != 0)
+        if (LiveOut.any())
           discoverLiveOut(RegisterMaskPair(Reg, LiveOut));
       }
     }
@@ -803,7 +800,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
     for (const RegisterMaskPair &Def : RegOpers.Defs) {
       unsigned RegUnit = Def.RegUnit;
       if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
-          (LiveRegs.contains(RegUnit) & Def.LaneMask) == 0)
+          (LiveRegs.contains(RegUnit) & Def.LaneMask).none())
         UntiedDefs.insert(RegUnit);
     }
   }
@@ -819,9 +816,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
     static_cast<RegionPressure&>(P).openTop(CurrPos);
 
   // Find the previous instruction.
-  do
-    --CurrPos;
-  while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+  CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
 
   SlotIndex SlotIdx;
   if (RequireIntervals)
@@ -871,7 +866,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
     unsigned Reg = Use.RegUnit;
     LaneBitmask LiveMask = LiveRegs.contains(Reg);
     LaneBitmask LiveIn = Use.LaneMask & ~LiveMask;
-    if (LiveIn != 0) {
+    if (LiveIn.any()) {
       discoverLiveIn(RegisterMaskPair(Reg, LiveIn));
       increaseRegPressure(Reg, LiveMask, LiveMask | LiveIn);
       LiveRegs.insert(RegisterMaskPair(Reg, LiveIn));
@@ -879,7 +874,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
     // Kill liveness at last uses.
     if (RequireIntervals) {
       LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
-      if (LastUseMask != 0) {
+      if (LastUseMask.any()) {
         LiveRegs.erase(RegisterMaskPair(Reg, LastUseMask));
         decreaseRegPressure(Reg, LiveMask, LiveMask & ~LastUseMask);
       }
@@ -897,9 +892,7 @@ void RegPressureTracker::advance(const RegisterOperands &RegOpers) {
   bumpDeadDefs(RegOpers.DeadDefs);
 
   // Find the next instruction.
-  do
-    ++CurrPos;
-  while (CurrPos != MBB->end() && CurrPos->isDebugValue());
+  CurrPos = skipDebugInstructionsForward(std::next(CurrPos), MBB->end());
 }
 
 void RegPressureTracker::advance() {
@@ -1192,8 +1185,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
       unsigned SubRegIdx = MO.getSubReg();
       LaneBitmask UseMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
       LastUseMask &= ~UseMask;
-      if (LastUseMask == 0)
-        return 0;
+      if (LastUseMask.none())
+        return LaneBitmask::getNone();
     }
   }
   return LastUseMask;
@@ -1202,7 +1195,8 @@ static LaneBitmask findUseBetween(unsigned Reg, LaneBitmask LastUseMask,
 LaneBitmask RegPressureTracker::getLiveLanesAt(unsigned RegUnit,
                                                SlotIndex Pos) const {
   assert(RequireIntervals);
-  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, ~0u,
+  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+                              LaneBitmask::getAll(),
       [](const LiveRange &LR, SlotIndex Pos) {
         return LR.liveAt(Pos);
       });
@@ -1212,7 +1206,7 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
                                                  SlotIndex Pos) const {
   assert(RequireIntervals);
   return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit,
-                              Pos.getBaseIndex(), 0,
+                              Pos.getBaseIndex(), LaneBitmask::getNone(),
       [](const LiveRange &LR, SlotIndex Pos) {
         const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
         return S != nullptr && S->end == Pos.getRegSlot();
@@ -1222,7 +1216,8 @@ LaneBitmask RegPressureTracker::getLastUsedLanes(unsigned RegUnit,
 LaneBitmask RegPressureTracker::getLiveThroughAt(unsigned RegUnit,
                                                  SlotIndex Pos) const {
   assert(RequireIntervals);
-  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos, 0u,
+  return getLanesWithProperty(*LIS, *MRI, TrackLaneMasks, RegUnit, Pos,
+                              LaneBitmask::getNone(),
       [](const LiveRange &LR, SlotIndex Pos) {
         const LiveRange::Segment *S = LR.getSegmentContaining(Pos);
         return S != nullptr && S->start < Pos.getRegSlot(true) &&
@@ -1253,7 +1248,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
     for (const RegisterMaskPair &Use : RegOpers.Uses) {
       unsigned Reg = Use.RegUnit;
       LaneBitmask LastUseMask = getLastUsedLanes(Reg, SlotIdx);
-      if (LastUseMask == 0)
+      if (LastUseMask.none())
         continue;
       // The LastUseMask is queried from the liveness information of instruction
       // which may be further down the schedule. Some lanes may actually not be
@@ -1263,7 +1258,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) {
       SlotIndex CurrIdx = getCurrSlot();
       LastUseMask
         = findUseBetween(Reg, LastUseMask, CurrIdx, SlotIdx, *MRI, LIS);
-      if (LastUseMask == 0)
+      if (LastUseMask.none())
         continue;
 
       LaneBitmask LiveMask = LiveRegs.contains(Reg);
diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
index 6b80179190db..de1c35caa1a0 100644
--- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp
@@ -34,33 +34,12 @@ using namespace llvm;
 void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
   for (MCRegUnitMaskIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
     LaneBitmask UnitMask = (*RUI).second;
-    if (UnitMask == 0 || (LaneMask & UnitMask) != 0)
+    if (UnitMask.none() || (LaneMask & UnitMask).any())
       RegUnitsAvailable.reset((*RUI).first);
   }
 }
 
-void RegScavenger::initRegState() {
-  for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
-         IE = Scavenged.end(); I != IE; ++I) {
-    I->Reg = 0;
-    I->Restore = nullptr;
-  }
-
-  // All register units start out unused.
-  RegUnitsAvailable.set();
-
-  // Live-in registers are in use.
-  for (const auto &LI : MBB->liveins())
-    setRegUsed(LI.PhysReg, LI.LaneMask);
-
-  // Pristine CSRs are also unavailable.
-  const MachineFunction &MF = *MBB->getParent();
-  BitVector PR = MF.getFrameInfo()->getPristineRegs(MF);
-  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
-    setRegUsed(I);
-}
-
-void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+void RegScavenger::init(MachineBasicBlock &MBB) {
   MachineFunction &MF = *MBB.getParent();
   TII = MF.getSubtarget().getInstrInfo();
   TRI = MF.getSubtarget().getRegisterInfo();
@@ -84,16 +63,56 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
   }
   this->MBB = &MBB;
 
-  initRegState();
+  for (SmallVectorImpl<ScavengedInfo>::iterator I = Scavenged.begin(),
+         IE = Scavenged.end(); I != IE; ++I) {
+    I->Reg = 0;
+    I->Restore = nullptr;
+  }
+
+  // All register units start out unused.
+  RegUnitsAvailable.set();
+
+  // Pristine CSRs are not available.
+  BitVector PR = MF.getFrameInfo().getPristineRegs(MF);
+  for (int I = PR.find_first(); I>0; I = PR.find_next(I))
+    setRegUsed(I);
 
   Tracking = false;
 }
 
+void RegScavenger::setLiveInsUsed(const MachineBasicBlock &MBB) {
+  for (const auto &LI : MBB.liveins())
+    setRegUsed(LI.PhysReg, LI.LaneMask);
+}
+
+void RegScavenger::enterBasicBlock(MachineBasicBlock &MBB) {
+  init(MBB);
+  setLiveInsUsed(MBB);
+}
+
+void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
+  init(MBB);
+  // Merge live-ins of successors to get live-outs.
+  for (const MachineBasicBlock *Succ : MBB.successors())
+    setLiveInsUsed(*Succ);
+
+  // Move internal iterator at the last instruction of the block.
+  if (MBB.begin() != MBB.end()) {
+    MBBI = std::prev(MBB.end());
+    Tracking = true;
+  }
+}
+
 void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
   for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
     BV.set(*RUI);
 }
 
+void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) {
+  for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
+    BV.reset(*RUI);
+}
+
 void RegScavenger::determineKillsAndDefs() {
   assert(Tracking && "Must be tracking to determine kills and defs");
 
@@ -245,6 +264,48 @@ void RegScavenger::forward() {
   setUsed(DefRegUnits);
 }
 
+void RegScavenger::backward() {
+  assert(Tracking && "Must be tracking to determine kills and defs");
+
+  const MachineInstr &MI = *MBBI;
+  // Defined or clobbered registers are available now.
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isRegMask()) {
+      for (unsigned RU = 0, RUEnd = TRI->getNumRegUnits(); RU != RUEnd;
+           ++RU) {
+        for (MCRegUnitRootIterator RURI(RU, TRI); RURI.isValid(); ++RURI) {
+          if (MO.clobbersPhysReg(*RURI)) {
+            RegUnitsAvailable.set(RU);
+            break;
+          }
+        }
+      }
+    } else if (MO.isReg() && MO.isDef()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+          isReserved(Reg))
+        continue;
+      addRegUnits(RegUnitsAvailable, Reg);
+    }
+  }
+  // Mark read registers as unavailable.
+  for (const MachineOperand &MO : MI.uses()) {
+    if (MO.isReg() && MO.readsReg()) {
+      unsigned Reg = MO.getReg();
+      if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
+          isReserved(Reg))
+        continue;
+      removeRegUnits(RegUnitsAvailable, Reg);
+    }
+  }
+
+  if (MBBI == MBB->begin()) {
+    MBBI = MachineBasicBlock::iterator(nullptr);
+    Tracking = false;
+  } else
+    --MBBI;
+}
+
 bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
   if (includeReserved && isReserved(Reg))
     return true;
@@ -358,7 +419,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
   for (const MachineOperand &MO : MI.operands()) {
     if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
         !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
-      Candidates.reset(MO.getReg());
+      for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
+        Candidates.reset(*AI);
   }
 
   // Try to find a register that's unused if there is one, as then we won't
@@ -380,7 +442,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
 
   // Find an available scavenging slot with size and alignment matching
   // the requirements of the class RC.
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned NeedSize = RC->getSize();
   unsigned NeedAlign = RC->getAlignment();
 
diff --git a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
index 5cf3e57eb3d3..66f196678dea 100644
--- a/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/RegisterUsageInfo.cpp
@@ -22,7 +22,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "ip-regalloc"
 
-cl::opt<bool> DumpRegUsage(
+static cl::opt<bool> DumpRegUsage(
     "print-regusage", cl::init(false), cl::Hidden,
     cl::desc("print register usage details collected for analysis."));
 
diff --git a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
index ea952d9088fc..2f7ee8bf414c 100644
--- a/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/contrib/llvm/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -48,7 +48,7 @@ public:
   static char ID;
   RenameIndependentSubregs() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Rename Disconnected Subregister Components";
   }
 
@@ -184,7 +184,7 @@ bool RenameIndependentSubregs::findComponents(IntEqClasses &Classes,
     unsigned MergedID = ~0u;
     for (RenameIndependentSubregs::SubRangeInfo &SRInfo : SubRangeInfos) {
       const LiveInterval::SubRange &SR = *SRInfo.SR;
-      if ((SR.LaneMask & LaneMask) == 0)
+      if ((SR.LaneMask & LaneMask).none())
         continue;
       SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
       Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
@@ -219,24 +219,23 @@ void RenameIndependentSubregs::rewriteOperands(const IntEqClasses &Classes,
     if (!MO.isDef() && !MO.readsReg())
       continue;
 
-    MachineInstr &MI = *MO.getParent();
-
-    SlotIndex Pos = LIS->getInstructionIndex(MI);
+    SlotIndex Pos = LIS->getInstructionIndex(*MO.getParent());
+    Pos = MO.isDef() ? Pos.getRegSlot(MO.isEarlyClobber())
+                     : Pos.getBaseIndex();
     unsigned SubRegIdx = MO.getSubReg();
     LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubRegIdx);
 
     unsigned ID = ~0u;
     for (const SubRangeInfo &SRInfo : SubRangeInfos) {
       const LiveInterval::SubRange &SR = *SRInfo.SR;
-      if ((SR.LaneMask & LaneMask) == 0)
+      if ((SR.LaneMask & LaneMask).none())
         continue;
-      LiveRange::const_iterator I = SR.find(Pos);
-      if (I == SR.end())
+      const VNInfo *VNI = SR.getVNInfoAt(Pos);
+      if (VNI == nullptr)
         continue;
 
-      const VNInfo &VNI = *I->valno;
       // Map to local representant ID.
-      unsigned LocalID = SRInfo.ConEQ.getEqClass(&VNI);
+      unsigned LocalID = SRInfo.ConEQ.getEqClass(VNI);
       // Global ID
       ID = Classes[LocalID + SRInfo.Index];
       break;
@@ -354,19 +353,24 @@ void RenameIndependentSubregs::computeMainRangesFixFlags(
     if (I == 0)
       LI.clear();
     LIS->constructMainRangeFromSubranges(LI);
+    // A def of a subregister may be a use of other register lanes. Replacing
+    // such a def with a def of a different register will eliminate the use,
+    // and may cause the recorded live range to be larger than the actual
+    // liveness in the program IR.
+    LIS->shrinkToUses(&LI);
   }
 }
 
 bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
   // Skip renaming if liveness of subregister is not tracked.
-  if (!MF.getSubtarget().enableSubRegLiveness())
+  MRI = &MF.getRegInfo();
+  if (!MRI->subRegLivenessEnabled())
     return false;
 
   DEBUG(dbgs() << "Renaming independent subregister live ranges in "
         << MF.getName() << '\n');
 
   LIS = &getAnalysis<LiveIntervals>();
-  MRI = &MF.getRegInfo();
   TII = MF.getSubtarget().getInstrInfo();
 
   // Iterate over all vregs. Note that we query getNumVirtRegs() the newly
diff --git a/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
new file mode 100644
index 000000000000..451964199ba5
--- /dev/null
+++ b/contrib/llvm/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -0,0 +1,67 @@
+//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements a pass that will conditionally reset a machine
+/// function as if it was just created. This is used to provide a fallback
+/// mechanism when GlobalISel fails, thus the condition for the reset to
+/// happen is that the MachineFunction has the FailedISel property.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "reset-machine-function"
+
+STATISTIC(NumFunctionsReset, "Number of functions reset");
+
+namespace {
+  class ResetMachineFunction : public MachineFunctionPass {
+    /// Tells whether or not this pass should emit a fallback
+    /// diagnostic when it resets a function.
+    bool EmitFallbackDiag;
+
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ResetMachineFunction(bool EmitFallbackDiag = false)
+        : MachineFunctionPass(ID), EmitFallbackDiag(EmitFallbackDiag) {}
+
+    StringRef getPassName() const override { return "ResetMachineFunction"; }
+
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      if (MF.getProperties().hasProperty(
+              MachineFunctionProperties::Property::FailedISel)) {
+        DEBUG(dbgs() << "Reseting: " << MF.getName() << '\n');
+        ++NumFunctionsReset;
+        MF.reset();
+        if (EmitFallbackDiag) {
+          const Function &F = *MF.getFunction();
+          DiagnosticInfoISelFallback DiagFallback(F);
+          F.getContext().diagnose(DiagFallback);
+        }
+        return true;
+      }
+      return false;
+    }
+
+  };
+} // end anonymous namespace
+
+char ResetMachineFunction::ID = 0;
+INITIALIZE_PASS(ResetMachineFunction, DEBUG_TYPE,
+                "reset machine function if ISel failed", false, false)
+
+MachineFunctionPass *
+llvm::createResetMachineFunctionPass(bool EmitFallbackDiag = false) {
+  return new ResetMachineFunction(EmitFallbackDiag);
+}
diff --git a/contrib/llvm/lib/CodeGen/SafeStack.cpp b/contrib/llvm/lib/CodeGen/SafeStack.cpp
index 4a1b9958a5b5..2b82df293c14 100644
--- a/contrib/llvm/lib/CodeGen/SafeStack.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStack.cpp
@@ -52,17 +52,6 @@ using namespace llvm::safestack;
 
 #define DEBUG_TYPE "safestack"
 
-enum UnsafeStackPtrStorageVal { ThreadLocalUSP, SingleThreadUSP };
-
-static cl::opt<UnsafeStackPtrStorageVal> USPStorage("safe-stack-usp-storage",
-    cl::Hidden, cl::init(ThreadLocalUSP),
-    cl::desc("Type of storage for the unsafe stack pointer"),
-    cl::values(clEnumValN(ThreadLocalUSP, "thread-local",
-                          "Thread-local storage"),
-               clEnumValN(SingleThreadUSP, "single-thread",
-                          "Non-thread-local storage"),
-               clEnumValEnd));
-
 namespace llvm {
 
 STATISTIC(NumFunctions, "Total number of functions");
@@ -124,9 +113,6 @@ class SafeStack : public FunctionPass {
   /// might expect to appear on the stack on most common targets.
   enum { StackAlignment = 16 };
 
-  /// \brief Build a value representing a pointer to the unsafe stack pointer.
-  Value *getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F);
-
   /// \brief Return the value of the stack canary.
   Value *getStackGuard(IRBuilder<> &IRB, Function &F);
 
@@ -356,46 +342,8 @@ bool SafeStack::IsSafeStackAlloca(const Value *AllocaPtr, uint64_t AllocaSize) {
   return true;
 }
 
-Value *SafeStack::getOrCreateUnsafeStackPtr(IRBuilder<> &IRB, Function &F) {
-  // Check if there is a target-specific location for the unsafe stack pointer.
-  if (TL)
-    if (Value *V = TL->getSafeStackPointerLocation(IRB))
-      return V;
-
-  // Otherwise, assume the target links with compiler-rt, which provides a
-  // thread-local variable with a magic name.
-  Module &M = *F.getParent();
-  const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
-  auto UnsafeStackPtr =
-      dyn_cast_or_null<GlobalVariable>(M.getNamedValue(UnsafeStackPtrVar));
-
-  bool UseTLS = USPStorage == ThreadLocalUSP;
-
-  if (!UnsafeStackPtr) {
-    auto TLSModel = UseTLS ?
-        GlobalValue::InitialExecTLSModel :
-        GlobalValue::NotThreadLocal;
-    // The global variable is not defined yet, define it ourselves.
-    // We use the initial-exec TLS model because we do not support the
-    // variable living anywhere other than in the main executable.
-    UnsafeStackPtr = new GlobalVariable(
-        M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
-        UnsafeStackPtrVar, nullptr, TLSModel);
-  } else {
-    // The variable exists, check its type and attributes.
-    if (UnsafeStackPtr->getValueType() != StackPtrTy)
-      report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
-    if (UseTLS != UnsafeStackPtr->isThreadLocal())
-      report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
-                         (UseTLS ? "" : "not ") + "be thread-local");
-  }
-  return UnsafeStackPtr;
-}
-
 Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
-  Value *StackGuardVar = nullptr;
-  if (TL)
-    StackGuardVar = TL->getIRStackGuard(IRB);
+  Value *StackGuardVar = TL->getIRStackGuard(IRB);
   if (!StackGuardVar)
     StackGuardVar =
         F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
@@ -752,7 +700,9 @@ bool SafeStack::runOnFunction(Function &F) {
     return false;
   }
 
-  TL = TM ? TM->getSubtargetImpl(F)->getTargetLowering() : nullptr;
+  if (!TM)
+    report_fatal_error("Target machine is required");
+  TL = TM->getSubtargetImpl(F)->getTargetLowering();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
 
   ++NumFunctions;
@@ -764,7 +714,7 @@ bool SafeStack::runOnFunction(Function &F) {
 
   // Collect all points where stack gets unwound and needs to be restored
   // This is only necessary because the runtime (setjmp and unwind code) is
-  // not aware of the unsafe stack and won't unwind/restore it prorerly.
+  // not aware of the unsafe stack and won't unwind/restore it properly.
   // To work around this problem without changing the runtime, we insert
   // instrumentation to restore the unsafe stack pointer when necessary.
   SmallVector<Instruction *, 4> StackRestorePoints;
@@ -786,7 +736,7 @@ bool SafeStack::runOnFunction(Function &F) {
     ++NumUnsafeStackRestorePointsFunctions;
 
   IRBuilder<> IRB(&F.front(), F.begin()->getFirstInsertionPt());
-  UnsafeStackPtr = getOrCreateUnsafeStackPtr(IRB, F);
+  UnsafeStackPtr = TL->getSafeStackPointerLocation(IRB);
 
   // Load the current stack pointer (we'll also use it as a base pointer).
   // FIXME: use a dedicated register for it ?
diff --git a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
index 795eb8d27191..7fbeaddb38e8 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackColoring.cpp
@@ -214,10 +214,12 @@ void StackColoring::calculateLiveIntervals() {
       unsigned AllocaNo = It.second.AllocaNo;
 
       if (IsStart) {
-        assert(!Started.test(AllocaNo));
-        Started.set(AllocaNo);
-        Ended.reset(AllocaNo);
-        Start[AllocaNo] = InstNo;
+        assert(!Started.test(AllocaNo) || Start[AllocaNo] == BBStart);
+        if (!Started.test(AllocaNo)) {
+          Started.set(AllocaNo);
+          Ended.reset(AllocaNo);
+          Start[AllocaNo] = InstNo;
+        }
       } else {
         assert(!Ended.test(AllocaNo));
         if (Started.test(AllocaNo)) {
diff --git a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
index fb433c1856a6..7d4dbd13abf4 100644
--- a/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
+++ b/contrib/llvm/lib/CodeGen/SafeStackLayout.cpp
@@ -132,6 +132,14 @@ void StackLayout::computeLayout() {
   // If this is replaced with something smarter, it must preserve the property
   // that the first object is always at the offset 0 in the stack frame (for
   // StackProtectorSlot), or handle stack protector in some other way.
+
+  // Sort objects by size (largest first) to reduce fragmentation.
+  if (StackObjects.size() > 2)
+    std::stable_sort(StackObjects.begin() + 1, StackObjects.end(),
+                     [](const StackObject &a, const StackObject &b) {
+                       return a.Size > b.Size;
+                     });
+
   for (auto &Obj : StackObjects)
     layoutObject(Obj);
 
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
index efde61ece639..1f0c3283ceb1 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp
@@ -139,8 +139,7 @@ void SUnit::removePred(const SDep &D) {
       SDep P = D;
       P.setSUnit(this);
       SUnit *N = D.getSUnit();
-      SmallVectorImpl<SDep>::iterator Succ = std::find(N->Succs.begin(),
-                                                       N->Succs.end(), P);
+      SmallVectorImpl<SDep>::iterator Succ = find(N->Succs, P);
       assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!");
       N->Succs.erase(Succ);
       Preds.erase(I);
@@ -311,10 +310,20 @@ void SUnit::biasCriticalPath() {
 }
 
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+static void dumpSUIdentifier(const ScheduleDAG &DAG, const SUnit &SU) {
+  if (&SU == &DAG.ExitSU)
+    dbgs() << "ExitSU";
+  else if (&SU == &DAG.EntrySU)
+    dbgs() << "EntrySU";
+  else
+    dbgs() << "SU(" << SU.NodeNum << ")";
+}
+
 /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or
 /// a group of nodes flagged together.
 void SUnit::dump(const ScheduleDAG *G) const {
-  dbgs() << "SU(" << NodeNum << "): ";
+  dumpSUIdentifier(*G, *this);
+  dbgs() << ": ";
   G->dumpNode(this);
 }
 
@@ -338,12 +347,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
          I != E; ++I) {
       dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        dbgs() << "val "; break;
-      case SDep::Anti:        dbgs() << "anti"; break;
-      case SDep::Output:      dbgs() << "out "; break;
-      case SDep::Order:       dbgs() << "ch  "; break;
+      case SDep::Data:   dbgs() << "data "; break;
+      case SDep::Anti:   dbgs() << "anti "; break;
+      case SDep::Output: dbgs() << "out  "; break;
+      case SDep::Order:  dbgs() << "ord  "; break;
       }
-      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+      dumpSUIdentifier(*G, *I->getSUnit());
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
@@ -358,12 +367,12 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
          I != E; ++I) {
       dbgs() << "   ";
       switch (I->getKind()) {
-      case SDep::Data:        dbgs() << "val "; break;
-      case SDep::Anti:        dbgs() << "anti"; break;
-      case SDep::Output:      dbgs() << "out "; break;
-      case SDep::Order:       dbgs() << "ch  "; break;
+      case SDep::Data:   dbgs() << "data "; break;
+      case SDep::Anti:   dbgs() << "anti "; break;
+      case SDep::Output: dbgs() << "out  "; break;
+      case SDep::Order:  dbgs() << "ord  "; break;
       }
-      dbgs() << "SU(" << I->getSUnit()->NodeNum << ")";
+      dumpSUIdentifier(*G, *I->getSUnit());
       if (I->isArtificial())
         dbgs() << " *";
       dbgs() << ": Latency=" << I->getLatency();
diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 22bfd4d1964c..611c5a71bd5a 100644
--- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -77,7 +77,7 @@ static unsigned getReductionSize() {
 static void dumpSUList(ScheduleDAGInstrs::SUList &L) {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "{ ";
-  for (auto *su : L) {
+  for (const SUnit *su : L) {
     dbgs() << "SU(" << su->NodeNum << ")";
     if (su != L.back())
       dbgs() << ", ";
@@ -142,9 +142,7 @@ static void getUnderlyingObjects(const Value *V,
     SmallVector<Value *, 4> Objs;
     GetUnderlyingObjects(const_cast<Value *>(V), Objs, DL);
 
-    for (SmallVectorImpl<Value *>::iterator I = Objs.begin(), IE = Objs.end();
-         I != IE; ++I) {
-      V = *I;
+    for (Value *V : Objs) {
       if (!Visited.insert(V).second)
         continue;
       if (Operator::getOpcode(V) == Instruction::IntToPtr) {
@@ -164,7 +162,7 @@ static void getUnderlyingObjects(const Value *V,
 /// information and it can be tracked to a normal reference to a known
 /// object, return the Value for that object.
 static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
-                                         const MachineFrameInfo *MFI,
+                                         const MachineFrameInfo &MFI,
                                          UnderlyingObjectsVector &Objects,
                                          const DataLayout &DL) {
   auto allMMOsOkay = [&]() {
@@ -178,16 +176,16 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI,
         // overlapping locations. The client code calling this function assumes
         // this is not the case. So return a conservative answer of no known
         // object.
-        if (MFI->hasTailCall())
+        if (MFI.hasTailCall())
           return false;
 
         // For now, ignore PseudoSourceValues which may alias LLVM IR values
         // because the code that uses this function has no way to cope with
         // such aliases.
-        if (PSV->isAliased(MFI))
+        if (PSV->isAliased(&MFI))
           return false;
 
-        bool MayAlias = PSV->mayAlias(MFI);
+        bool MayAlias = PSV->mayAlias(&MFI);
         Objects.push_back(UnderlyingObjectsVector::value_type(PSV, MayAlias));
       } else if (const Value *V = MMO->getValue()) {
         SmallVector<Value *, 4> Objs;
@@ -249,32 +247,27 @@ void ScheduleDAGInstrs::exitRegion() {
 void ScheduleDAGInstrs::addSchedBarrierDeps() {
   MachineInstr *ExitMI = RegionEnd != BB->end() ? &*RegionEnd : nullptr;
   ExitSU.setInstr(ExitMI);
-  bool AllDepKnown = ExitMI &&
-    (ExitMI->isCall() || ExitMI->isBarrier());
-  if (ExitMI && AllDepKnown) {
-    // If it's a call or a barrier, add dependencies on the defs and uses of
-    // instruction.
-    for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
-      const MachineOperand &MO = ExitMI->getOperand(i);
+  // Add dependencies on the defs and uses of the instruction.
+  if (ExitMI) {
+    for (const MachineOperand &MO : ExitMI->operands()) {
       if (!MO.isReg() || MO.isDef()) continue;
       unsigned Reg = MO.getReg();
-      if (Reg == 0) continue;
-
-      if (TRI->isPhysicalRegister(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
-      else if (MO.readsReg()) // ignore undef operands
-        addVRegUseDeps(&ExitSU, i);
+      } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+        addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
+      }
     }
-  } else {
+  }
+  if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
     // For others, e.g. fallthrough, conditional branch, assume the exit
     // uses all the registers that are livein to the successor blocks.
-    assert(Uses.empty() && "Uses in set before adding deps?");
-    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-           SE = BB->succ_end(); SI != SE; ++SI)
-      for (const auto &LI : (*SI)->liveins()) {
+    for (const MachineBasicBlock *Succ : BB->successors()) {
+      for (const auto &LI : Succ->liveins()) {
         if (!Uses.contains(LI.PhysReg))
           Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
       }
+    }
   }
 }
 
@@ -326,6 +319,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
 void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   MachineInstr *MI = SU->getInstr();
   MachineOperand &MO = MI->getOperand(OperIdx);
+  unsigned Reg = MO.getReg();
+  // We do not need to track any dependencies for constant registers.
+  if (MRI.isConstantPhysReg(Reg))
+    return;
 
   // Optionally add output and anti dependencies. For anti
   // dependencies we use a latency of 0 because for a multi-issue
@@ -334,8 +331,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
   // TODO: Using a latency of 1 here for output dependencies assumes
   //       there's no cost for reusing registers.
   SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
-  for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
-       Alias.isValid(); ++Alias) {
+  for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
     if (!Defs.contains(*Alias))
       continue;
     for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
@@ -362,13 +358,11 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
     // Either insert a new Reg2SUnits entry with an empty SUnits list, or
     // retrieve the existing SUnits list for this register's uses.
     // Push this SUnit on the use list.
-    Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg()));
+    Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
     if (RemoveKillFlags)
       MO.setIsKill(false);
-  }
-  else {
+  } else {
     addPhysRegDataDeps(SU, OperIdx);
-    unsigned Reg = MO.getReg();
 
     // clear this register's use list
     if (Uses.contains(Reg))
@@ -404,7 +398,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
   // No point in tracking lanemasks if we don't have interesting subregisters.
   const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
   if (!RC.HasDisjunctSubRegs)
-    return ~0u;
+    return LaneBitmask::getAll();
 
   unsigned SubReg = MO.getSubReg();
   if (SubReg == 0)
@@ -430,14 +424,14 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
     DefLaneMask = getLaneMaskForMO(MO);
     // If we have a <read-undef> flag, none of the lane values comes from an
     // earlier instruction.
-    KillLaneMask = IsKill ? ~0u : DefLaneMask;
+    KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
 
     // Clear undef flag, we'll re-add it later once we know which subregister
     // Def is first.
     MO.setIsUndef(false);
   } else {
-    DefLaneMask = ~0u;
-    KillLaneMask = ~0u;
+    DefLaneMask = LaneBitmask::getAll();
+    KillLaneMask = LaneBitmask::getAll();
   }
 
   if (MO.isDead()) {
@@ -450,12 +444,12 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
          E = CurrentVRegUses.end(); I != E; /*empty*/) {
       LaneBitmask LaneMask = I->LaneMask;
       // Ignore uses of other lanes.
-      if ((LaneMask & KillLaneMask) == 0) {
+      if ((LaneMask & KillLaneMask).none()) {
         ++I;
         continue;
       }
 
-      if ((LaneMask & DefLaneMask) != 0) {
+      if ((LaneMask & DefLaneMask).any()) {
         SUnit *UseSU = I->SU;
         MachineInstr *Use = UseSU->getInstr();
         SDep Dep(SU, SDep::Data, Reg);
@@ -467,7 +461,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 
       LaneMask &= ~KillLaneMask;
       // If we found a Def for all lanes of this use, remove it from the list.
-      if (LaneMask != 0) {
+      if (LaneMask.any()) {
         I->LaneMask = LaneMask;
         ++I;
       } else
@@ -490,7 +484,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
   for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
                                      CurrentVRegDefs.end())) {
     // Ignore defs for other lanes.
-    if ((V2SU.LaneMask & LaneMask) == 0)
+    if ((V2SU.LaneMask & LaneMask).none())
       continue;
     // Add an output dependence.
     SUnit *DefSU = V2SU.SU;
@@ -513,11 +507,11 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
     LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
     V2SU.SU = SU;
     V2SU.LaneMask = OverlapMask;
-    if (NonOverlapMask != 0)
+    if (NonOverlapMask.any())
       CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
   }
   // If there was no CurrentVRegDefs entry for some lanes yet, create one.
-  if (LaneMask != 0)
+  if (LaneMask.any())
     CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
 }
 
@@ -533,7 +527,8 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
   unsigned Reg = MO.getReg();
 
   // Remember the use. Data dependencies will be added when we find the def.
-  LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
+  LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
+                                        : LaneBitmask::getAll();
   CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
 
   // Add antidependences to the following defs of the vreg.
@@ -541,7 +536,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
                                      CurrentVRegDefs.end())) {
     // Ignore defs for unrelated lanes.
     LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
-    if ((PrevDefLaneMask & LaneMask) == 0)
+    if ((PrevDefLaneMask & LaneMask).none())
       continue;
     if (V2SU.SU == SU)
       continue;
@@ -554,7 +549,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 /// (like a call or something with unmodeled side effects).
 static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
   return MI->isCall() || MI->hasUnmodeledSideEffects() ||
-         (MI->hasOrderedMemoryRef() && !MI->isInvariantLoad(AA));
+         (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
 }
 
 /// This returns true if the two MIs need a chain edge between them.
@@ -621,8 +616,8 @@ static bool MIsNeedChainEdge(AliasAnalysis *AA, const MachineFrameInfo *MFI,
 /// Check whether two objects need a chain edge and add it if needed.
 void ScheduleDAGInstrs::addChainDependency (SUnit *SUa, SUnit *SUb,
                                             unsigned Latency) {
-  if (MIsNeedChainEdge(AAForDep, MFI, MF.getDataLayout(), SUa->getInstr(),
-		       SUb->getInstr())) {
+  if (MIsNeedChainEdge(AAForDep, &MFI, MF.getDataLayout(), SUa->getInstr(),
+                       SUb->getInstr())) {
     SDep Dep(SUa, SDep::MayAliasMem);
     Dep.setLatency(Latency);
     SUb->addPred(Dep);
@@ -668,10 +663,10 @@ void ScheduleDAGInstrs::initSUnits() {
     // within an out-of-order core. These are identified by BufferSize=1.
     if (SchedModel.hasInstrSchedModel()) {
       const MCSchedClassDesc *SC = getSchedClass(SU);
-      for (TargetSchedModel::ProcResIter
-             PI = SchedModel.getWriteProcResBegin(SC),
-             PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) {
-        switch (SchedModel.getProcResource(PI->ProcResourceIdx)->BufferSize) {
+      for (const MCWriteProcResEntry &PRE :
+           make_range(SchedModel.getWriteProcResBegin(SC),
+                      SchedModel.getWriteProcResEnd(SC))) {
+        switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
         case 0:
           SU->hasReservedResource = true;
           break;
@@ -686,44 +681,6 @@ void ScheduleDAGInstrs::initSUnits() {
   }
 }
 
-void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
-  const MachineInstr *MI = SU->getInstr();
-  for (const MachineOperand &MO : MI->operands()) {
-    if (!MO.isReg())
-      continue;
-    if (!MO.readsReg())
-      continue;
-    if (TrackLaneMasks && !MO.isUse())
-      continue;
-
-    unsigned Reg = MO.getReg();
-    if (!TargetRegisterInfo::isVirtualRegister(Reg))
-      continue;
-
-    // Ignore re-defs.
-    if (TrackLaneMasks) {
-      bool FoundDef = false;
-      for (const MachineOperand &MO2 : MI->operands()) {
-        if (MO2.isReg() && MO2.isDef() && MO2.getReg() == Reg && !MO2.isDead()) {
-          FoundDef = true;
-          break;
-        }
-      }
-      if (FoundDef)
-        continue;
-    }
-
-    // Record this local VReg use.
-    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
-    for (; UI != VRegUses.end(); ++UI) {
-      if (UI->SU == SU)
-        break;
-    }
-    if (UI == VRegUses.end())
-      VRegUses.insert(VReg2SUnit(Reg, 0, SU));
-  }
-}
-
 class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
 
   /// Current total number of SUs in map.
@@ -901,9 +858,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
   CurrentVRegDefs.setUniverse(NumVirtRegs);
   CurrentVRegUses.setUniverse(NumVirtRegs);
 
-  VRegUses.clear();
-  VRegUses.setUniverse(NumVirtRegs);
-
   // Model data dependencies between instructions being scheduled and the
   // ExitSU.
   addSchedBarrierDeps();
@@ -926,8 +880,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     assert(SU && "No SUnit mapped to this MI");
 
     if (RPTracker) {
-      collectVRegUses(SU);
-
       RegisterOperands RegOpers;
       RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
       if (TrackLaneMasks) {
@@ -957,12 +909,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (!MO.isReg() || !MO.isDef())
         continue;
       unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-
-      if (TRI->isPhysicalRegister(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         addPhysRegDeps(SU, j);
-      else {
+      } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
         HasVRegDef = true;
         addVRegDefDeps(SU, j);
       }
@@ -977,13 +926,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
       if (!MO.isReg() || !MO.isUse())
         continue;
       unsigned Reg = MO.getReg();
-      if (Reg == 0)
-        continue;
-
-      if (TRI->isPhysicalRegister(Reg))
+      if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
         addPhysRegDeps(SU, j);
-      else if (MO.readsReg()) // ignore undef operands
+      } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
         addVRegUseDeps(SU, j);
+      }
     }
 
     // If we haven't seen any uses in this scheduling region, create a
@@ -1023,7 +970,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
     }
 
     // If it's not a store or a variant load, we're done.
-    if (!MI.mayStore() && !(MI.mayLoad() && !MI.isInvariantLoad(AA)))
+    if (!MI.mayStore() &&
+        !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
       continue;
 
     // Always add dependecy edge to BarrierChain if present.
@@ -1200,9 +1148,8 @@ void ScheduleDAGInstrs::startBlockForKills(MachineBasicBlock *BB) {
   LiveRegs.reset();
 
   // Examine the live-in regs of all successors.
-  for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
-       SE = BB->succ_end(); SI != SE; ++SI) {
-    for (const auto &LI : (*SI)->liveins()) {
+  for (const MachineBasicBlock *Succ : BB->successors()) {
+    for (const auto &LI : Succ->liveins()) {
       // Repeat, for reg and all subregs.
       for (MCSubRegIterator SubRegs(LI.PhysReg, TRI, /*IncludeSelf=*/true);
            SubRegs.isValid(); ++SubRegs)
@@ -1225,7 +1172,7 @@ static void toggleBundleKillFlag(MachineInstr *MI, unsigned Reg,
   // might set it on too many operands.  We will clear as many flags as we
   // can though.
   MachineBasicBlock::instr_iterator Begin = MI->getIterator();
-  MachineBasicBlock::instr_iterator End = getBundleEnd(*MI);
+  MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
   while (Begin != End) {
     if (NewKillState) {
       if ((--End)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
@@ -1312,6 +1259,11 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
     // register is used multiple times we only set the kill flag on
     // the first use. Don't set kill flags on undef operands.
     killedRegs.reset();
+
+    // toggleKillFlag can append new operands (implicit defs), so using
+    // a range-based loop is not safe. The new operands will be appended
+    // at the end of the operand list and they don't need to be visited,
+    // so iterating until the currently last operand is ok.
     for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
       MachineOperand &MO = MI.getOperand(i);
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
@@ -1337,13 +1289,12 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
 
       if (MO.isKill() != kill) {
         DEBUG(dbgs() << "Fixing " << MO << " in ");
-        // Warning: toggleKillFlag may invalidate MO.
         toggleKillFlag(&MI, MO);
         DEBUG(MI.dump());
         DEBUG({
           if (MI.getOpcode() == TargetOpcode::BUNDLE) {
             MachineBasicBlock::instr_iterator Begin = MI.getIterator();
-            MachineBasicBlock::instr_iterator End = getBundleEnd(MI);
+            MachineBasicBlock::instr_iterator End = getBundleEnd(Begin);
             while (++Begin != End)
               DEBUG(Begin->dump());
           }
@@ -1355,8 +1306,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock *MBB) {
 
     // Mark any used register (that is not using undef) and subregs as
     // now live...
-    for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
+    for (const MachineOperand &MO : MI.operands()) {
       if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue;
       unsigned Reg = MO.getReg();
       if ((Reg == 0) || MRI.isReserved(Reg)) continue;
@@ -1457,13 +1407,12 @@ public:
     // the subtree limit, then try to join it now since splitting subtrees is
     // only useful if multiple high-pressure paths are possible.
     unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
-    for (SUnit::const_pred_iterator
-           PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) {
-      if (PI->getKind() != SDep::Data)
+    for (const SDep &PredDep : SU->Preds) {
+      if (PredDep.getKind() != SDep::Data)
         continue;
-      unsigned PredNum = PI->getSUnit()->NodeNum;
+      unsigned PredNum = PredDep.getSUnit()->NodeNum;
       if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
-        joinPredSubtree(*PI, SU, /*CheckLimit=*/false);
+        joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
 
       // Either link or merge the TreeData entry from the child to the parent.
       if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
@@ -1505,12 +1454,11 @@ public:
     R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
     assert(SubtreeClasses.getNumClasses() == RootSet.size()
            && "number of roots should match trees");
-    for (SparseSet<RootData>::const_iterator
-           RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) {
-      unsigned TreeID = SubtreeClasses[RI->NodeID];
-      if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID)
-        R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID];
-      R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount;
+    for (const RootData &Root : RootSet) {
+      unsigned TreeID = SubtreeClasses[Root.NodeID];
+      if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
+        R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
+      R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
       // Note that SubInstrCount may be greater than InstrCount if we joined
       // subtrees across a cross edge. InstrCount will be attributed to the
       // original parent, while SubInstrCount will be attributed to the joined
@@ -1524,14 +1472,12 @@ public:
       DEBUG(dbgs() << "  SU(" << Idx << ") in tree "
             << R.DFSNodeData[Idx].SubtreeID << '\n');
     }
-    for (std::vector<std::pair<const SUnit*, const SUnit*> >::const_iterator
-           I = ConnectionPairs.begin(), E = ConnectionPairs.end();
-         I != E; ++I) {
-      unsigned PredTree = SubtreeClasses[I->first->NodeNum];
-      unsigned SuccTree = SubtreeClasses[I->second->NodeNum];
+    for (const std::pair<const SUnit*, const SUnit*> &P : ConnectionPairs) {
+      unsigned PredTree = SubtreeClasses[P.first->NodeNum];
+      unsigned SuccTree = SubtreeClasses[P.second->NodeNum];
       if (PredTree == SuccTree)
         continue;
-      unsigned Depth = I->first->getDepth();
+      unsigned Depth = P.first->getDepth();
       addConnection(PredTree, SuccTree, Depth);
       addConnection(SuccTree, PredTree, Depth);
     }
@@ -1553,9 +1499,8 @@ protected:
     // Four is the magic number of successors before a node is considered a
     // pinch point.
     unsigned NumDataSucs = 0;
-    for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(),
-           SE = PredSU->Succs.end(); SI != SE; ++SI) {
-      if (SI->getKind() == SDep::Data) {
+    for (const SDep &SuccDep : PredSU->Succs) {
+      if (SuccDep.getKind() == SDep::Data) {
         if (++NumDataSucs >= 4)
           return false;
       }
@@ -1575,10 +1520,9 @@ protected:
     do {
       SmallVectorImpl<SchedDFSResult::Connection> &Connections =
         R.SubtreeConnections[FromTree];
-      for (SmallVectorImpl<SchedDFSResult::Connection>::iterator
-             I = Connections.begin(), E = Connections.end(); I != E; ++I) {
-        if (I->TreeID == ToTree) {
-          I->Level = std::max(I->Level, Depth);
+      for (SchedDFSResult::Connection &C : Connections) {
+        if (C.TreeID == ToTree) {
+          C.Level = std::max(C.Level, Depth);
           return;
         }
       }
@@ -1617,9 +1561,9 @@ public:
 } // anonymous
 
 static bool hasDataSucc(const SUnit *SU) {
-  for (SUnit::const_succ_iterator
-         SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) {
-    if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode())
+  for (const SDep &SuccDep : SU->Succs) {
+    if (SuccDep.getKind() == SDep::Data &&
+        !SuccDep.getSUnit()->isBoundaryNode())
       return true;
   }
   return false;
@@ -1632,15 +1576,13 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
     llvm_unreachable("Top-down ILP metric is unimplemnted");
 
   SchedDFSImpl Impl(*this);
-  for (ArrayRef<SUnit>::const_iterator
-         SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) {
-    const SUnit *SU = &*SI;
-    if (Impl.isVisited(SU) || hasDataSucc(SU))
+  for (const SUnit &SU : SUnits) {
+    if (Impl.isVisited(&SU) || hasDataSucc(&SU))
       continue;
 
     SchedDAGReverseDFS DFS;
-    Impl.visitPreorder(SU);
-    DFS.follow(SU);
+    Impl.visitPreorder(&SU);
+    DFS.follow(&SU);
     for (;;) {
       // Traverse the leftmost path as far as possible.
       while (DFS.getPred() != DFS.getPredEnd()) {
@@ -1676,13 +1618,11 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
 /// connected to this tree, record the depth of the connection so that the
 /// nearest connected subtrees can be prioritized.
 void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
-  for (SmallVectorImpl<Connection>::const_iterator
-         I = SubtreeConnections[SubtreeID].begin(),
-         E = SubtreeConnections[SubtreeID].end(); I != E; ++I) {
-    SubtreeConnectLevels[I->TreeID] =
-      std::max(SubtreeConnectLevels[I->TreeID], I->Level);
-    DEBUG(dbgs() << "  Tree: " << I->TreeID
-          << " @" << SubtreeConnectLevels[I->TreeID] << '\n');
+  for (const Connection &C : SubtreeConnections[SubtreeID]) {
+    SubtreeConnectLevels[C.TreeID] =
+      std::max(SubtreeConnectLevels[C.TreeID], C.Level);
+    DEBUG(dbgs() << "  Tree: " << C.TreeID
+          << " @" << SubtreeConnectLevels[C.TreeID] << '\n');
   }
 }
 
diff --git a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 69c487033015..83bc1ba7beb9 100644
--- a/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/contrib/llvm/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -145,7 +145,7 @@ ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[StageCycle];
-        // FALLTHROUGH
+        LLVM_FALLTHROUGH;
       case InstrStage::Reserved:
         // Reserved FUs can conflict only with required ones.
         freeUnits &= ~RequiredScoreboard[StageCycle];
@@ -197,7 +197,7 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
       case InstrStage::Required:
         // Required FUs conflict with both reserved and required ones
         freeUnits &= ~ReservedScoreboard[cycle + i];
-        // FALLTHROUGH
+        LLVM_FALLTHROUGH;
       case InstrStage::Reserved:
         // Reserved FUs can conflict only with required ones.
         freeUnits &= ~RequiredScoreboard[cycle + i];
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5ecc6da32144..b4b41c3d0011 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16,14 +16,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -181,7 +182,7 @@ namespace {
     /// if things it uses can be simplified by bit propagation.
     /// If so, return true.
     bool SimplifyDemandedBits(SDValue Op) {
-      unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+      unsigned BitWidth = Op.getScalarValueSizeInBits();
       APInt Demanded = APInt::getAllOnesValue(BitWidth);
       return SimplifyDemandedBits(Op, Demanded);
     }
@@ -326,7 +327,7 @@ namespace {
 
     SDValue visitFADDForFMACombine(SDNode *N);
     SDValue visitFSUBForFMACombine(SDNode *N);
-    SDValue visitFMULForFMACombine(SDNode *N);
+    SDValue visitFMULForFMADistributiveCombine(SDNode *N);
 
     SDValue XformToShuffleWithZero(SDNode *N);
     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
@@ -334,12 +335,15 @@ namespace {
 
     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
 
+    SDValue foldSelectOfConstants(SDNode *N);
     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
                              SDValue N2, SDValue N3, ISD::CondCode CC,
                              bool NotExtCompare = false);
+    SDValue foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0, SDValue N1,
+                                   SDValue N2, SDValue N3, ISD::CondCode CC);
     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
                           const SDLoc &DL, bool foldBooleans = true);
 
@@ -356,6 +360,7 @@ namespace {
     SDValue BuildSDIV(SDNode *N);
     SDValue BuildSDIVPow2(SDNode *N);
     SDValue BuildUDIV(SDNode *N);
+    SDValue BuildLogBase2(SDValue Op, const SDLoc &DL);
     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
@@ -374,9 +379,14 @@ namespace {
     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
     SDValue ReduceLoadWidth(SDNode *N);
     SDValue ReduceLoadOpStoreWidth(SDNode *N);
+    SDValue splitMergedValStore(StoreSDNode *ST);
     SDValue TransformFPLoadStorePair(SDNode *N);
     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
+    SDValue reduceBuildVecToShuffle(SDNode *N);
+    SDValue createBuildVecShuffle(SDLoc DL, SDNode *N, ArrayRef<int> VectorMask,
+                                  SDValue VecIn1, SDValue VecIn2,
+                                  unsigned LeftIdx);
 
     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
 
@@ -444,10 +454,11 @@ namespace {
     /// This is a helper function for MergeConsecutiveStores. When the source
     /// elements of the consecutive stores are all constants or all extracted
     /// vector elements, try to merge them into one larger store.
-    /// \return True if a merged store was created.
-    bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
-                                         EVT MemVT, unsigned NumStores,
-                                         bool IsConstantSrc, bool UseVector);
+    /// \return number of stores that were merged into a merged store (always
+    /// a prefix of \p StoreNode).
+    bool MergeStoresOfConstantsOrVecElts(
+        SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT, unsigned NumStores,
+        bool IsConstantSrc, bool UseVector);
 
     /// This is a helper function for MergeConsecutiveStores.
     /// Stores that may be merged are placed in StoreNodes.
@@ -464,8 +475,10 @@ namespace {
 
     /// Merge consecutive store operations into a wide store.
     /// This optimization uses wide integers or vectors when possible.
-    /// \return True if some memory operations were changed.
-    bool MergeConsecutiveStores(StoreSDNode *N);
+    /// \return number of stores that were merged into a merged store (the
+    /// affected nodes are stored as a prefix in \p StoreNodes).
+    bool MergeConsecutiveStores(StoreSDNode *N,
+                                SmallVectorImpl<MemOpLink> &StoreNodes);
 
     /// \brief Try to transform a truncation where C is a constant:
     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
@@ -536,10 +549,6 @@ void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
   ((DAGCombiner*)DC)->AddToWorklist(N);
 }
 
-void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
-  ((DAGCombiner*)DC)->removeFromWorklist(N);
-}
-
 SDValue TargetLowering::DAGCombinerInfo::
 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
@@ -620,7 +629,8 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
                               Depth + 1);
   case ISD::FSUB:
     // We can't turn -(A-B) into B-A when we honor signed zeros.
-    if (!Options->UnsafeFPMath) return 0;
+    if (!Options->UnsafeFPMath && !Op.getNode()->getFlags()->hasNoSignedZeros())
+      return 0;
 
     // fold (fneg (fsub A, B)) -> (fsub B, A)
     return 1;
@@ -683,9 +693,6 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
                                             LegalOperations, Depth+1),
                        Op.getOperand(0), Flags);
   case ISD::FSUB:
-    // We can't turn -(A-B) into B-A when we honor signed zeros.
-    assert(Options.UnsafeFPMath);
-
     // fold (fneg (fsub 0, B)) -> B
     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
       if (N0CFP->isZero())
@@ -726,6 +733,15 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
   }
 }
 
+// APInts must be the same size for most operations, this helper
+// function zero extends the shorter of the pair so that they match.
+// We provide an Offset so that we can create bitwidths that won't overflow.
+static void zeroExtendToMatch(APInt &LHS, APInt &RHS, unsigned Offset = 0) {
+  unsigned Bits = Offset + std::max(LHS.getBitWidth(), RHS.getBitWidth());
+  LHS = LHS.zextOrSelf(Bits);
+  RHS = RHS.zextOrSelf(Bits);
+}
+
 // Return true if this node is a setcc, or is a select_cc
 // that selects between the target values used for true and false, making it
 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
@@ -775,42 +791,61 @@ static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
   return nullptr;
 }
 
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// int.
-static ConstantSDNode *isConstOrConstSplat(SDValue N) {
-  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
-    return CN;
-
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
-    BitVector UndefElements;
-    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
-
-    // BuildVectors can truncate their operands. Ignore that case here.
-    // FIXME: We blindly ignore splats which include undef which is overly
-    // pessimistic.
-    if (CN && UndefElements.none() &&
-        CN->getValueType(0) == N.getValueType().getScalarType())
-      return CN;
+// Determines if it is a constant integer or a build vector of constant
+// integers (and undefs).
+// Do not permit build vector implicit truncation.
+static bool isConstantOrConstantVector(SDValue N, bool NoOpaques = false) {
+  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N))
+    return !(Const->isOpaque() && NoOpaques);
+  if (N.getOpcode() != ISD::BUILD_VECTOR)
+    return false;
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  for (const SDValue &Op : N->op_values()) {
+    if (Op.isUndef())
+      continue;
+    ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Op);
+    if (!Const || Const->getAPIntValue().getBitWidth() != BitWidth ||
+        (Const->isOpaque() && NoOpaques))
+      return false;
   }
-
-  return nullptr;
+  return true;
 }
 
-// \brief Returns the SDNode if it is a constant splat BuildVector or constant
-// float.
-static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
-  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
-    return CN;
+// Determines if it is a constant null integer or a splatted vector of a
+// constant null integer (with no undefs).
+// Build vector implicit truncation is not an issue for null values.
+static bool isNullConstantOrNullSplatConstant(SDValue N) {
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isNullValue();
+  return false;
+}
 
-  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
-    BitVector UndefElements;
-    ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+// Determines if it is a constant integer of one or a splatted vector of a
+// constant integer of one (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isOneConstantOrOneSplatConstant(SDValue N) {
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isOne() && Splat->getAPIntValue().getBitWidth() == BitWidth;
+  return false;
+}
 
-    if (CN && UndefElements.none())
-      return CN;
-  }
+// Determines if it is a constant integer of all ones or a splatted vector of a
+// constant integer of all ones (with no undefs).
+// Do not permit build vector implicit truncation.
+static bool isAllOnesConstantOrAllOnesSplatConstant(SDValue N) {
+  unsigned BitWidth = N.getScalarValueSizeInBits();
+  if (ConstantSDNode *Splat = isConstOrConstSplat(N))
+    return Splat->isAllOnesValue() &&
+           Splat->getAPIntValue().getBitWidth() == BitWidth;
+  return false;
+}
 
-  return nullptr;
+// Determines if a BUILD_VECTOR is composed of all-constants possibly mixed with
+// undef's.
+static bool isAnyConstantBuildVector(const SDNode *N) {
+  return ISD::isBuildVectorOfConstantSDNodes(N) ||
+         ISD::isBuildVectorOfConstantFPSDNodes(N);
 }
 
 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
@@ -935,9 +970,9 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
 }
 
 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
-  SDLoc dl(Load);
+  SDLoc DL(Load);
   EVT VT = Load->getValueType(0);
-  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, SDValue(ExtLoad, 0));
 
   DEBUG(dbgs() << "\nReplacing.9 ";
         Load->dump(&DAG);
@@ -953,7 +988,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
 
 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   Replace = false;
-  SDLoc dl(Op);
+  SDLoc DL(Op);
   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
     LoadSDNode *LD = cast<LoadSDNode>(Op);
     EVT MemVT = LD->getMemoryVT();
@@ -962,7 +997,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
                                                        : ISD::EXTLOAD)
       : LD->getExtensionType();
     Replace = true;
-    return DAG.getExtLoad(ExtType, dl, PVT,
+    return DAG.getExtLoad(ExtType, DL, PVT,
                           LD->getChain(), LD->getBasePtr(),
                           MemVT, LD->getMemOperand());
   }
@@ -971,30 +1006,30 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
   switch (Opc) {
   default: break;
   case ISD::AssertSext:
-    return DAG.getNode(ISD::AssertSext, dl, PVT,
+    return DAG.getNode(ISD::AssertSext, DL, PVT,
                        SExtPromoteOperand(Op.getOperand(0), PVT),
                        Op.getOperand(1));
   case ISD::AssertZext:
-    return DAG.getNode(ISD::AssertZext, dl, PVT,
+    return DAG.getNode(ISD::AssertZext, DL, PVT,
                        ZExtPromoteOperand(Op.getOperand(0), PVT),
                        Op.getOperand(1));
   case ISD::Constant: {
     unsigned ExtOpc =
       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
-    return DAG.getNode(ExtOpc, dl, PVT, Op);
+    return DAG.getNode(ExtOpc, DL, PVT, Op);
   }
   }
 
   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
     return SDValue();
-  return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
+  return DAG.getNode(ISD::ANY_EXTEND, DL, PVT, Op);
 }
 
 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
     return SDValue();
   EVT OldVT = Op.getValueType();
-  SDLoc dl(Op);
+  SDLoc DL(Op);
   bool Replace = false;
   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   if (!NewOp.getNode())
@@ -1003,13 +1038,13 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
 
   if (Replace)
     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
-  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
+  return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, NewOp.getValueType(), NewOp,
                      DAG.getValueType(OldVT));
 }
 
 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
   EVT OldVT = Op.getValueType();
-  SDLoc dl(Op);
+  SDLoc DL(Op);
   bool Replace = false;
   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   if (!NewOp.getNode())
@@ -1018,7 +1053,7 @@ SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
 
   if (Replace)
     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
-  return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
+  return DAG.getZeroExtendInReg(NewOp, DL, OldVT);
 }
 
 /// Promote the specified integer binary operation if the target indicates it is
@@ -1072,9 +1107,9 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
 
     DEBUG(dbgs() << "\nPromoting ";
           Op.getNode()->dump(&DAG));
-    SDLoc dl(Op);
-    return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Opc, dl, PVT, NN0, NN1));
+    SDLoc DL(Op);
+    return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                       DAG.getNode(Opc, DL, PVT, NN0, NN1));
   }
   return SDValue();
 }
@@ -1119,9 +1154,9 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
 
     DEBUG(dbgs() << "\nPromoting ";
           Op.getNode()->dump(&DAG));
-    SDLoc dl(Op);
-    return DAG.getNode(ISD::TRUNCATE, dl, VT,
-                       DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
+    SDLoc DL(Op);
+    return DAG.getNode(ISD::TRUNCATE, DL, VT,
+                       DAG.getNode(Opc, DL, PVT, N0, Op.getOperand(1)));
   }
   return SDValue();
 }
@@ -1178,7 +1213,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
     assert(PVT != VT && "Don't know what type to promote to!");
 
-    SDLoc dl(Op);
+    SDLoc DL(Op);
     SDNode *N = Op.getNode();
     LoadSDNode *LD = cast<LoadSDNode>(N);
     EVT MemVT = LD->getMemoryVT();
@@ -1186,10 +1221,10 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
                                                        : ISD::EXTLOAD)
       : LD->getExtensionType();
-    SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
+    SDValue NewLD = DAG.getExtLoad(ExtType, DL, PVT,
                                    LD->getChain(), LD->getBasePtr(),
                                    MemVT, LD->getMemOperand());
-    SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
+    SDValue Result = DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD);
 
     DEBUG(dbgs() << "\nPromoting ";
           N->dump(&DAG);
@@ -1315,7 +1350,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
       continue;
 
     assert(N->getOpcode() != ISD::DELETED_NODE &&
-           RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
+           RV.getOpcode() != ISD::DELETED_NODE &&
            "Node was deleted but visit returned new node!");
 
     DEBUG(dbgs() << " ... into: ";
@@ -1562,8 +1597,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
         break;
 
       case ISD::TokenFactor:
-        if (Op.hasOneUse() &&
-            std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
+        if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
           // Queue up for processing.
           TFs.push_back(Op.getNode());
           // Clean up in case the token factor is removed.
@@ -1571,7 +1605,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
           Changed = true;
           break;
         }
-        // Fall thru
+        LLVM_FALLTHROUGH;
 
       default:
         // Only add if it isn't already in the list.
@@ -1634,6 +1668,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
+  SDLoc DL(N);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1650,61 +1685,73 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   // fold (add x, undef) -> undef
   if (N0.isUndef())
     return N0;
+
   if (N1.isUndef())
     return N1;
+
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
     // canonicalize constant to RHS
     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
-      return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
+      return DAG.getNode(ISD::ADD, DL, VT, N1, N0);
     // fold (add c1, c2) -> c1+c2
-    return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
-                                      N0.getNode(), N1.getNode());
+    return DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N0.getNode(),
+                                      N1.getNode());
   }
+
   // fold (add x, 0) -> x
   if (isNullConstant(N1))
     return N0;
+
   // fold ((c1-A)+c2) -> (c1+c2)-A
-  if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
+  if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
     if (N0.getOpcode() == ISD::SUB)
-      if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
-        SDLoc DL(N);
+      if (isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
         return DAG.getNode(ISD::SUB, DL, VT,
-                           DAG.getConstant(N1C->getAPIntValue()+
-                                           N0C->getAPIntValue(), DL, VT),
+                           DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
                            N0.getOperand(1));
       }
   }
+
   // reassociate add
-  if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
+  if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1))
     return RADD;
+
   // fold ((0-A) + B) -> B-A
-  if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
+  if (N0.getOpcode() == ISD::SUB &&
+      isNullConstantOrNullSplatConstant(N0.getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+
   // fold (A + (0-B)) -> A-B
-  if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
+  if (N1.getOpcode() == ISD::SUB &&
+      isNullConstantOrNullSplatConstant(N1.getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(1));
+
   // fold (A+(B-A)) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
     return N1.getOperand(0);
+
   // fold ((B-A)+A) -> B
   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
     return N0.getOperand(0);
+
   // fold (A+(B-(A+C))) to (B-C)
   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
       N0 == N1.getOperand(1).getOperand(0))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
                        N1.getOperand(1).getOperand(1));
+
   // fold (A+(B-(C+A))) to (B-C)
   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
       N0 == N1.getOperand(1).getOperand(1))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
+    return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
                        N1.getOperand(1).getOperand(0));
+
   // fold (A+((B-A)+or-C)) to (B+or-C)
   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
       N1.getOperand(0).getOpcode() == ISD::SUB &&
       N0 == N1.getOperand(0).getOperand(1))
-    return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
-                       N1.getOperand(0).getOperand(0), N1.getOperand(1));
+    return DAG.getNode(N1.getOpcode(), DL, VT, N1.getOperand(0).getOperand(0),
+                       N1.getOperand(1));
 
   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
@@ -1713,52 +1760,50 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
     SDValue N10 = N1.getOperand(0);
     SDValue N11 = N1.getOperand(1);
 
-    if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
-      return DAG.getNode(ISD::SUB, SDLoc(N), VT,
+    if (isConstantOrConstantVector(N00) || isConstantOrConstantVector(N10))
+      return DAG.getNode(ISD::SUB, DL, VT,
                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   }
 
-  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
+  if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   // fold (a+b) -> (a|b) iff a and b share no bits.
   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
-      VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
-    return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
+      VT.isInteger() && DAG.haveNoCommonBitsSet(N0, N1))
+    return DAG.getNode(ISD::OR, DL, VT, N0, N1);
 
   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstant(N1.getOperand(0).getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+      isNullConstantOrNullSplatConstant(N1.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N0,
+                       DAG.getNode(ISD::SHL, DL, VT,
                                    N1.getOperand(0).getOperand(1),
                                    N1.getOperand(1)));
   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
-      isNullConstant(N0.getOperand(0).getOperand(0)))
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
-                       DAG.getNode(ISD::SHL, SDLoc(N), VT,
+      isNullConstantOrNullSplatConstant(N0.getOperand(0).getOperand(0)))
+    return DAG.getNode(ISD::SUB, DL, VT, N1,
+                       DAG.getNode(ISD::SHL, DL, VT,
                                    N0.getOperand(0).getOperand(1),
                                    N0.getOperand(1)));
 
   if (N1.getOpcode() == ISD::AND) {
     SDValue AndOp0 = N1.getOperand(0);
     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
-    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned DestBits = VT.getScalarSizeInBits();
 
     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
     // and similar xforms where the inner op is either ~0 or 0.
-    if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
-      SDLoc DL(N);
+    if (NumSignBits == DestBits &&
+        isOneConstantOrOneSplatConstant(N1->getOperand(1)))
       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
-    }
   }
 
   // add (sext i1), X -> sub X, (zext i1)
   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
       N0.getOperand(0).getValueType() == MVT::i1 &&
       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
-    SDLoc DL(N);
     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   }
@@ -1767,7 +1812,6 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
     if (TN->getVT() == MVT::i1) {
-      SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
                                  DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
@@ -1853,6 +1897,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
+  SDLoc DL(N);
 
   // fold vector ops
   if (VT.isVector()) {
@@ -1867,62 +1912,97 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
   // fold (sub x, x) -> 0
   // FIXME: Refactor this and xor and other similar operations together.
   if (N0 == N1)
-    return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
+    return tryFoldToZero(DL, TLI, VT, DAG, LegalOperations, LegalTypes);
   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
     // fold (sub c1, c2) -> c1-c2
-    return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
-                                      N0.getNode(), N1.getNode());
+    return DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+                                      N1.getNode());
   }
-  ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
+
   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
   // fold (sub x, c) -> (add x, -c)
   if (N1C) {
-    SDLoc DL(N);
     return DAG.getNode(ISD::ADD, DL, VT, N0,
                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   }
+
+  if (isNullConstantOrNullSplatConstant(N0)) {
+    unsigned BitWidth = VT.getScalarSizeInBits();
+    // Right-shifting everything out but the sign bit followed by negation is
+    // the same as flipping arithmetic/logical shift type without the negation:
+    // -(X >>u 31) -> (X >>s 31)
+    // -(X >>s 31) -> (X >>u 31)
+    if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
+      ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
+      if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+        auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
+        if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
+          return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
+      }
+    }
+
+    // 0 - X --> 0 if the sub is NUW.
+    if (N->getFlags()->hasNoUnsignedWrap())
+      return N0;
+
+    if (DAG.MaskedValueIsZero(N1, ~APInt::getSignBit(BitWidth))) {
+      // N1 is either 0 or the minimum signed value. If the sub is NSW, then
+      // N1 must be 0 because negating the minimum signed value is undefined.
+      if (N->getFlags()->hasNoSignedWrap())
+        return N0;
+
+      // 0 - X --> X if X is 0 or the minimum signed value.
+      return N1;
+    }
+  }
+
   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
-  if (isAllOnesConstant(N0))
-    return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
+  if (isAllOnesConstantOrAllOnesSplatConstant(N0))
+    return DAG.getNode(ISD::XOR, DL, VT, N1, N0);
+
   // fold A-(A-B) -> B
   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
     return N1.getOperand(1);
+
   // fold (A+B)-A -> B
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
     return N0.getOperand(1);
+
   // fold (A+B)-B -> A
   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
     return N0.getOperand(0);
+
   // fold C2-(A+C1) -> (C2-C1)-A
-  ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
-    dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
-  if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
-    SDLoc DL(N);
-    SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
-                                   DL, VT);
-    return DAG.getNode(ISD::SUB, DL, VT, NewC,
-                       N1.getOperand(0));
+  if (N1.getOpcode() == ISD::ADD) {
+    SDValue N11 = N1.getOperand(1);
+    if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
+        isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
+      SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+      return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
+    }
   }
+
   // fold ((A+(B+or-C))-B) -> A+or-C
   if (N0.getOpcode() == ISD::ADD &&
       (N0.getOperand(1).getOpcode() == ISD::SUB ||
        N0.getOperand(1).getOpcode() == ISD::ADD) &&
       N0.getOperand(1).getOperand(0) == N1)
-    return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
-                       N0.getOperand(0), N0.getOperand(1).getOperand(1));
+    return DAG.getNode(N0.getOperand(1).getOpcode(), DL, VT, N0.getOperand(0),
+                       N0.getOperand(1).getOperand(1));
+
   // fold ((A+(C+B))-B) -> A+C
-  if (N0.getOpcode() == ISD::ADD &&
-      N0.getOperand(1).getOpcode() == ISD::ADD &&
+  if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD &&
       N0.getOperand(1).getOperand(1) == N1)
-    return DAG.getNode(ISD::ADD, SDLoc(N), VT,
-                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+    return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0),
+                       N0.getOperand(1).getOperand(0));
+
   // fold ((A-(B-C))-C) -> A-B
-  if (N0.getOpcode() == ISD::SUB &&
-      N0.getOperand(1).getOpcode() == ISD::SUB &&
+  if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB &&
       N0.getOperand(1).getOperand(1) == N1)
-    return DAG.getNode(ISD::SUB, SDLoc(N), VT,
-                       N0.getOperand(0), N0.getOperand(1).getOperand(0));
+    return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+                       N0.getOperand(1).getOperand(0));
 
   // If either operand of a sub is undef, the result is undef
   if (N0.isUndef())
@@ -1937,19 +2017,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
                                     GA->getOffset() -
-                                      (uint64_t)N1C->getSExtValue());
+                                        (uint64_t)N1C->getSExtValue());
       // fold (sub Sym+c1, Sym+c2) -> c1-c2
       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
         if (GA->getGlobal() == GB->getGlobal())
           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
-                                 SDLoc(N), VT);
+                                 DL, VT);
     }
 
   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
     if (TN->getVT() == MVT::i1) {
-      SDLoc DL(N);
       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
                                  DAG.getConstant(1, DL, VT));
       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
@@ -2048,7 +2127,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
   // We require a splat of the entire scalar bit width for non-contiguous
   // bit patterns.
   bool IsFullSplat =
-    ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
+    ConstValue1.getBitWidth() == VT.getScalarSizeInBits();
   // fold (mul x, 1) -> x
   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
     return N0;
@@ -2080,28 +2159,27 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
                                       getShiftAmountTy(N0.getValueType()))));
   }
 
-  APInt Val;
   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
-  if (N1IsConst && N0.getOpcode() == ISD::SHL &&
-      (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
-       isa<ConstantSDNode>(N0.getOperand(1)))) {
+  if (N0.getOpcode() == ISD::SHL &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
-    AddToWorklist(C3.getNode());
-    return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
+    if (isConstantOrConstantVector(C3))
+      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
   }
 
   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   // use.
   {
     SDValue Sh(nullptr, 0), Y(nullptr, 0);
+
     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
     if (N0.getOpcode() == ISD::SHL &&
-        (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
-         isa<ConstantSDNode>(N0.getOperand(1))) &&
+        isConstantOrConstantVector(N0.getOperand(1)) &&
         N0.getNode()->hasOneUse()) {
       Sh = N0; Y = N1;
     } else if (N1.getOpcode() == ISD::SHL &&
-               isa<ConstantSDNode>(N1.getOperand(1)) &&
+               isConstantOrConstantVector(N1.getOperand(1)) &&
                N1.getNode()->hasOneUse()) {
       Sh = N1; Y = N0;
     }
@@ -2188,8 +2266,8 @@ SDValue DAGCombiner::useDivRem(SDNode *Node) {
   SDValue Op1 = Node->getOperand(1);
   SDValue combined;
   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
-         UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
-    SDNode *User = *UI;
+         UE = Op0.getNode()->use_end(); UI != UE;) {
+    SDNode *User = *UI++;
     if (User == Node || User->use_empty())
       continue;
     // Convert the other matching node(s), too;
@@ -2246,10 +2324,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
 
   // If we know the sign bits of both operands are zero, strength reduce to a
   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
-  if (!VT.isVector()) {
-    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
-      return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
-  }
+  if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+    return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
 
   // fold (sdiv X, pow2) -> simple ops after legalize
   // FIXME: We check for the exact bit here because the generic lowering gives
@@ -2302,8 +2378,8 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) {
       return Op;
 
   // sdiv, srem -> sdivrem
-  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
-  // Otherwise, we break the simplification logic in visitREM().
+  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+  // true.  Otherwise, we break the simplification logic in visitREM().
   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue DivRem = useDivRem(N))
         return DivRem;
@@ -2337,25 +2413,33 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
                                                     N0C, N1C))
       return Folded;
+
   // fold (udiv x, (1 << c)) -> x >>u c
-  if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
-    return DAG.getNode(ISD::SRL, DL, VT, N0,
-                       DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
-                                       getShiftAmountTy(N0.getValueType())));
+  if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
+      DAG.isKnownToBeAPowerOfTwo(N1)) {
+    SDValue LogBase2 = BuildLogBase2(N1, DL);
+    AddToWorklist(LogBase2.getNode());
+
+    EVT ShiftVT = getShiftAmountTy(N0.getValueType());
+    SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ShiftVT);
+    AddToWorklist(Trunc.getNode());
+    return DAG.getNode(ISD::SRL, DL, VT, N0, Trunc);
+  }
 
   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   if (N1.getOpcode() == ISD::SHL) {
-    if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
-      if (SHC->getAPIntValue().isPowerOf2()) {
-        EVT ADDVT = N1.getOperand(1).getValueType();
-        SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
-                                  N1.getOperand(1),
-                                  DAG.getConstant(SHC->getAPIntValue()
-                                                                  .logBase2(),
-                                                  DL, ADDVT));
-        AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
-      }
+    SDValue N10 = N1.getOperand(0);
+    if (isConstantOrConstantVector(N10, /*NoOpaques*/ true) &&
+        DAG.isKnownToBeAPowerOfTwo(N10)) {
+      SDValue LogBase2 = BuildLogBase2(N10, DL);
+      AddToWorklist(LogBase2.getNode());
+
+      EVT ADDVT = N1.getOperand(1).getValueType();
+      SDValue Trunc = DAG.getZExtOrTrunc(LogBase2, DL, ADDVT);
+      AddToWorklist(Trunc.getNode());
+      SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, N1.getOperand(1), Trunc);
+      AddToWorklist(Add.getNode());
+      return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
     }
   }
 
@@ -2366,8 +2450,8 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) {
       return Op;
 
   // sdiv, srem -> sdivrem
-  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
-  // Otherwise, we break the simplification logic in visitREM().
+  // If the divisor is constant, then return DIVREM only if isIntDivCheap() is
+  // true.  Otherwise, we break the simplification logic in visitREM().
   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
     if (SDValue DivRem = useDivRem(N))
         return DivRem;
@@ -2401,27 +2485,25 @@ SDValue DAGCombiner::visitREM(SDNode *N) {
   if (isSigned) {
     // If we know the sign bits of both operands are zero, strength reduce to a
     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
-    if (!VT.isVector()) {
-      if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
-        return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
-    }
+    if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
+      return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
   } else {
     // fold (urem x, pow2) -> (and x, pow2-1)
-    if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
-        N1C->getAPIntValue().isPowerOf2()) {
-      return DAG.getNode(ISD::AND, DL, VT, N0,
-                         DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
+    if (DAG.isKnownToBeAPowerOfTwo(N1)) {
+      APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+      SDValue Add =
+          DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+      AddToWorklist(Add.getNode());
+      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
     }
     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
-    if (N1.getOpcode() == ISD::SHL) {
-      ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
-      if (SHC && SHC->getAPIntValue().isPowerOf2()) {
-        APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
-        SDValue Add =
-            DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
-        AddToWorklist(Add.getNode());
-        return DAG.getNode(ISD::AND, DL, VT, N0, Add);
-      }
+    if (N1.getOpcode() == ISD::SHL &&
+        DAG.isKnownToBeAPowerOfTwo(N1.getOperand(0))) {
+      APInt NegOne = APInt::getAllOnesValue(VT.getScalarSizeInBits());
+      SDValue Add =
+          DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
+      AddToWorklist(Add.getNode());
+      return DAG.getNode(ISD::AND, DL, VT, N0, Add);
     }
   }
 
@@ -2477,8 +2559,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
   if (isOneConstant(N1)) {
     SDLoc DL(N);
     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
-                       DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
-                                       DL,
+                       DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
                                        getShiftAmountTy(N0.getValueType())));
   }
   // fold (mulhs x, undef) -> 0
@@ -2706,7 +2787,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
 
   // Bail early if none of these transforms apply.
-  if (N0.getNode()->getNumOperands() == 0) return SDValue();
+  if (N0.getNumOperands() == 0) return SDValue();
 
   // For each of OP in AND/OR/XOR:
   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
@@ -2872,25 +2953,34 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
         LL.getValueType().isInteger()) {
       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        EVT CCVT = getSetCCResultType(LR.getValueType());
+        if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
       }
       if (isAllOnesConstant(LR)) {
         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
         if (Op1 == ISD::SETEQ) {
-          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
-                                        LR.getValueType(), LL, RL);
-          AddToWorklist(ANDNode.getNode());
-          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+          EVT CCVT = getSetCCResultType(LR.getValueType());
+          if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+            SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
+                                          LR.getValueType(), LL, RL);
+            AddToWorklist(ANDNode.getNode());
+            return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+          }
         }
         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
         if (Op1 == ISD::SETGT) {
-          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
-                                       LR.getValueType(), LL, RL);
-          AddToWorklist(ORNode.getNode());
-          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+          EVT CCVT = getSetCCResultType(LR.getValueType());
+          if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+            SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
+                                         LR.getValueType(), LL, RL);
+            AddToWorklist(ORNode.getNode());
+            return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+          }
         }
       }
     }
@@ -2899,14 +2989,17 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
         Op0 == Op1 && LL.getValueType().isInteger() &&
       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
-      SDLoc DL(N0);
-      SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
-                                    LL, DAG.getConstant(1, DL,
-                                                        LL.getValueType()));
-      AddToWorklist(ADDNode.getNode());
-      return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
-                          DAG.getConstant(2, DL, LL.getValueType()),
-                          ISD::SETUGE);
+      EVT CCVT = getSetCCResultType(LL.getValueType());
+      if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+        SDLoc DL(N0);
+        SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
+                                      LL, DAG.getConstant(1, DL,
+                                                          LL.getValueType()));
+        AddToWorklist(ADDNode.getNode());
+        return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
+                            DAG.getConstant(2, DL, LL.getValueType()),
+                            ISD::SETUGE);
+      }
     }
     // canonicalize equivalent to ll == rl
     if (LL == RR && LR == RL) {
@@ -2967,6 +3060,11 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
         unsigned Size = VT.getSizeInBits();
         const APInt &AndMask = CAnd->getAPIntValue();
         unsigned ShiftBits = CShift->getZExtValue();
+
+        // Bail out, this node will probably disappear anyway.
+        if (ShiftBits == 0)
+          return SDValue();
+
         unsigned MaskBits = AndMask.countTrailingOnes();
         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
 
@@ -2985,7 +3083,7 @@ SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
           // extended to handle extensions mixed in.
 
           SDValue SL(N0);
-          assert(ShiftBits != 0 && MaskBits <= Size);
+          assert(MaskBits <= Size);
 
           // Extracting the highest bit of the low half.
           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
@@ -3050,6 +3148,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N1.getValueType();
 
+  // x & x --> x
+  if (N0 == N1)
+    return N0;
+
   // fold vector ops
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3058,16 +3160,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     // fold (and x, 0) -> 0, vector edition
     if (ISD::isBuildVectorAllZeros(N0.getNode()))
       // do not return N0, because undef node may exist in N0
-      return DAG.getConstant(
-          APInt::getNullValue(
-              N0.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N0.getValueType());
+      return DAG.getConstant(APInt::getNullValue(N0.getScalarValueSizeInBits()),
+                             SDLoc(N), N0.getValueType());
     if (ISD::isBuildVectorAllZeros(N1.getNode()))
       // do not return N1, because undef node may exist in N1
-      return DAG.getConstant(
-          APInt::getNullValue(
-              N1.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N1.getValueType());
+      return DAG.getConstant(APInt::getNullValue(N1.getScalarValueSizeInBits()),
+                             SDLoc(N), N1.getValueType());
 
     // fold (and x, -1) -> x, vector edition
     if (ISD::isBuildVectorAllOnes(N0.getNode()))
@@ -3078,7 +3176,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
 
   // fold (and c1, c2) -> c1&c2
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
   if (N0C && N1C && !N1C->isOpaque())
     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   // canonicalize constant to RHS
@@ -3089,7 +3187,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (isAllOnesConstant(N1))
     return N0;
   // if (and x, c) is known to be zero, return 0
-  unsigned BitWidth = VT.getScalarType().getSizeInBits();
+  unsigned BitWidth = VT.getScalarSizeInBits();
   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
                                    APInt::getAllOnesValue(BitWidth)))
     return DAG.getConstant(0, SDLoc(N), VT);
@@ -3098,14 +3196,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     return RAND;
   // fold (and (or x, C), D) -> D if (C & D) == D
   if (N1C && N0.getOpcode() == ISD::OR)
-    if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+    if (ConstantSDNode *ORI = isConstOrConstSplat(N0.getOperand(1)))
       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
         return N1;
   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N0Op0 = N0.getOperand(0);
     APInt Mask = ~N1C->getAPIntValue();
-    Mask = Mask.trunc(N0Op0.getValueSizeInBits());
+    Mask = Mask.trunc(N0Op0.getScalarValueSizeInBits());
     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
                                  N0.getValueType(), N0Op0);
@@ -3156,7 +3254,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
         // that will apply equally to all members of the vector, so AND all the
         // lanes of the constant together.
         EVT VT = Vector->getValueType(0);
-        unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
+        unsigned BitWidth = VT.getScalarSizeInBits();
 
         // If the splat value has been compressed to a bitlength lower
         // than the size of the vector lane, we need to re-expand it to
@@ -3187,8 +3285,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     // Resize the constant to the same size as the original memory access before
     // extension. If it is still the AllOnesValue then this AND is completely
     // unneeded.
-    Constant =
-      Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
+    Constant = Constant.zextOrTrunc(Load->getMemoryVT().getScalarSizeInBits());
 
     bool B;
     switch (Load->getExtensionType()) {
@@ -3230,9 +3327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   // fold (and (load x), 255) -> (zextload x, i8)
   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
-  if (N1C && (N0.getOpcode() == ISD::LOAD ||
-              (N0.getOpcode() == ISD::ANY_EXTEND &&
-               N0.getOperand(0).getOpcode() == ISD::LOAD))) {
+  if (!VT.isVector() && N1C && (N0.getOpcode() == ISD::LOAD ||
+                                (N0.getOpcode() == ISD::ANY_EXTEND &&
+                                 N0.getOperand(0).getOpcode() == ISD::LOAD))) {
     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
     LoadSDNode *LN0 = HasAnyExt
       ? cast<LoadSDNode>(N0.getOperand(0))
@@ -3293,10 +3390,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
       return Tmp;
 
+  // Masking the negated extension of a boolean is just the zero-extended
+  // boolean:
+  // and (sub 0, zext(bool X)), 1 --> zext(bool X)
+  // and (sub 0, sext(bool X)), 1 --> zext(bool X)
+  //
+  // Note: the SimplifyDemandedBits fold below can make an information-losing
+  // transform, and then we have no way to find this better fold.
+  if (N1C && N1C->isOne() && N0.getOpcode() == ISD::SUB) {
+    ConstantSDNode *SubLHS = isConstOrConstSplat(N0.getOperand(0));
+    SDValue SubRHS = N0.getOperand(1);
+    if (SubLHS && SubLHS->isNullValue()) {
+      if (SubRHS.getOpcode() == ISD::ZERO_EXTEND &&
+          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+        return SubRHS;
+      if (SubRHS.getOpcode() == ISD::SIGN_EXTEND &&
+          SubRHS.getOperand(0).getScalarValueSizeInBits() == 1)
+        return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, SubRHS.getOperand(0));
+    }
+  }
+
   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   // fold (and (sra)) -> (and (srl)) when possible.
-  if (!VT.isVector() &&
-      SimplifyDemandedBits(SDValue(N, 0)))
+  if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
 
   // fold (zext_inreg (extload x)) -> (zextload x)
@@ -3305,9 +3421,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    unsigned BitWidth = N1.getScalarValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3325,9 +3441,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
     EVT MemVT = LN0->getMemoryVT();
     // If we zero all the possible extended bits, then we can turn this into
     // a zextload if we are running before legalize or the operation is legal.
-    unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
+    unsigned BitWidth = N1.getScalarValueSizeInBits();
     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
-                           BitWidth - MemVT.getScalarType().getSizeInBits())) &&
+                           BitWidth - MemVT.getScalarSizeInBits())) &&
         ((!LegalOperations && !LN0->isVolatile()) ||
          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
@@ -3391,8 +3507,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
     std::swap(N0, N1);
   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
     return SDValue();
-  if (!N0.getNode()->hasOneUse() ||
-      !N1.getNode()->hasOneUse())
+  if (!N0.getNode()->hasOneUse() || !N1.getNode()->hasOneUse())
     return SDValue();
 
   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -3627,18 +3742,24 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
-        SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
-                                     LR.getValueType(), LL, RL);
-        AddToWorklist(ORNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        EVT CCVT = getSetCCResultType(LR.getValueType());
+        if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+          SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
+                                       LR.getValueType(), LL, RL);
+          AddToWorklist(ORNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
+        }
       }
       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
-        SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
-                                      LR.getValueType(), LL, RL);
-        AddToWorklist(ANDNode.getNode());
-        return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        EVT CCVT = getSetCCResultType(LR.getValueType());
+        if (VT == CCVT || (!LegalOperations && VT == MVT::i1)) {
+          SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
+                                        LR.getValueType(), LL, RL);
+          AddToWorklist(ANDNode.getNode());
+          return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
+        }
       }
     }
     // canonicalize equivalent to ll == rl
@@ -3708,6 +3829,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N1.getValueType();
 
+  // x | x --> x
+  if (N0 == N1)
+    return N0;
+
   // fold vector ops
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
@@ -3723,15 +3848,13 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
     if (ISD::isBuildVectorAllOnes(N0.getNode()))
       // do not return N0, because undef node may exist in N0
       return DAG.getConstant(
-          APInt::getAllOnesValue(
-              N0.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N0.getValueType());
+          APInt::getAllOnesValue(N0.getScalarValueSizeInBits()), SDLoc(N),
+          N0.getValueType());
     if (ISD::isBuildVectorAllOnes(N1.getNode()))
       // do not return N1, because undef node may exist in N1
       return DAG.getConstant(
-          APInt::getAllOnesValue(
-              N1.getValueType().getScalarType().getSizeInBits()),
-          SDLoc(N), N1.getValueType());
+          APInt::getAllOnesValue(N1.getScalarValueSizeInBits()), SDLoc(N),
+          N1.getValueType());
 
     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
     // Do this only if the resulting shuffle is legal.
@@ -4122,6 +4245,110 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
   return nullptr;
 }
 
+namespace {
+/// Helper struct to parse and store a memory address as base + index + offset.
+/// We ignore sign extensions when it is safe to do so.
+/// The following two expressions are not equivalent. To differentiate we need
+/// to store whether there was a sign extension involved in the index
+/// computation.
+///  (load (i64 add (i64 copyfromreg %c)
+///                 (i64 signextend (add (i8 load %index)
+///                                      (i8 1))))
+/// vs
+///
+/// (load (i64 add (i64 copyfromreg %c)
+///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
+///                                         (i32 1)))))
+struct BaseIndexOffset {
+  SDValue Base;
+  SDValue Index;
+  int64_t Offset;
+  bool IsIndexSignExt;
+
+  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
+
+  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
+                  bool IsIndexSignExt) :
+    Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
+
+  bool equalBaseIndex(const BaseIndexOffset &Other) {
+    return Other.Base == Base && Other.Index == Index &&
+      Other.IsIndexSignExt == IsIndexSignExt;
+  }
+
+  /// Parses tree in Ptr for base, index, offset addresses.
+  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
+    bool IsIndexSignExt = false;
+
+    // Split up a folded GlobalAddress+Offset into its component parts.
+    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
+      if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
+        return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
+                                                    SDLoc(GA),
+                                                    GA->getValueType(0),
+                                                    /*Offset=*/0,
+                                                    /*isTargetGA=*/false,
+                                                    GA->getTargetFlags()),
+                               SDValue(),
+                               GA->getOffset(),
+                               IsIndexSignExt);
+      }
+
+    // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
+    // instruction, then it could be just the BASE or everything else we don't
+    // know how to handle. Just use Ptr as BASE and give up.
+    if (Ptr->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // We know that we have at least an ADD instruction. Try to pattern match
+    // the simple case of BASE + OFFSET.
+    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
+      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
+      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
+                              IsIndexSignExt);
+    }
+
+    // Inside a loop the current BASE pointer is calculated using an ADD and a
+    // MUL instruction. In this case Ptr is the actual BASE pointer.
+    // (i64 add (i64 %array_ptr)
+    //          (i64 mul (i64 %induction_var)
+    //                   (i64 %element_size)))
+    if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // Look at Base + Index + Offset cases.
+    SDValue Base = Ptr->getOperand(0);
+    SDValue IndexOffset = Ptr->getOperand(1);
+
+    // Skip signextends.
+    if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
+      IndexOffset = IndexOffset->getOperand(0);
+      IsIndexSignExt = true;
+    }
+
+    // Either the case of Base + Index (no offset) or something else.
+    if (IndexOffset->getOpcode() != ISD::ADD)
+      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
+
+    // Now we have the case of Base + Index + offset.
+    SDValue Index = IndexOffset->getOperand(0);
+    SDValue Offset = IndexOffset->getOperand(1);
+
+    if (!isa<ConstantSDNode>(Offset))
+      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
+
+    // Ignore signextends.
+    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
+      Index = Index->getOperand(0);
+      IsIndexSignExt = true;
+    } else IsIndexSignExt = false;
+
+    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
+    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
+  }
+};
+} // namespace
+
 SDValue DAGCombiner::visitXOR(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -4317,16 +4544,20 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
   if (!BinOpCst) return SDValue();
 
-  // FIXME: disable this unless the input to the binop is a shift by a constant.
-  // If it is not a shift, it pessimizes some common cases like:
-  //
-  //    void foo(int *X, int i) { X[i & 1235] = 1; }
-  //    int bar(int *X, int i) { return X[i & 255]; }
+  // FIXME: disable this unless the input to the binop is a shift by a constant
+  // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
-  if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
-       BinOpLHSVal->getOpcode() != ISD::SRA &&
-       BinOpLHSVal->getOpcode() != ISD::SRL) ||
-      !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
+  bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
+                 BinOpLHSVal->getOpcode() == ISD::SRA ||
+                 BinOpLHSVal->getOpcode() == ISD::SRL;
+  bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
+                        BinOpLHSVal->getOpcode() == ISD::SELECT;
+
+  if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
+      !isCopyOrSelect)
+    return SDValue();
+
+  if (isCopyOrSelect && N->hasOneUse())
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -4366,19 +4597,15 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
     SDValue N01 = N->getOperand(0).getOperand(1);
-
-    if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
-      if (!N01C->isOpaque()) {
-        EVT TruncVT = N->getValueType(0);
-        SDValue N00 = N->getOperand(0).getOperand(0);
-        APInt TruncC = N01C->getAPIntValue();
-        TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
-        SDLoc DL(N);
-
-        return DAG.getNode(ISD::AND, DL, TruncVT,
-                           DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
-                           DAG.getConstant(TruncC, DL, TruncVT));
-      }
+    if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
+      SDLoc DL(N);
+      EVT TruncVT = N->getValueType(0);
+      SDValue N00 = N->getOperand(0).getOperand(0);
+      SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
+      SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
+      AddToWorklist(Trunc00.getNode());
+      AddToWorklist(Trunc01.getNode());
+      return DAG.getNode(ISD::AND, DL, TruncVT, Trunc00, Trunc01);
     }
   }
 
@@ -4404,7 +4631,6 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   if (VT.isVector()) {
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
@@ -4425,12 +4651,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
                                                      N01CV, N1CV))
             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
         }
-      } else {
-        N1C = isConstOrConstSplat(N1);
       }
     }
   }
 
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
+
   // fold (shl c1, c2) -> c1<<c2
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   if (N0C && N1C && !N1C->isOpaque())
@@ -4464,13 +4690,18 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SHL) {
     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      uint64_t c1 = N0C1->getZExtValue();
-      uint64_t c2 = N1C->getZExtValue();
       SDLoc DL(N);
-      if (c1 + c2 >= OpSizeInBits)
+      APInt c1 = N0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+      APInt Sum = c1 + c2;
+      if (Sum.uge(OpSizeInBits))
         return DAG.getConstant(0, DL, VT);
-      return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+      return DAG.getNode(
+          ISD::SHL, DL, VT, N0.getOperand(0),
+          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
     }
   }
 
@@ -4485,18 +4716,22 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N0.getOperand(0).getOpcode() == ISD::SHL) {
     SDValue N0Op0 = N0.getOperand(0);
     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      uint64_t c1 = N0Op0C1->getZExtValue();
-      uint64_t c2 = N1C->getZExtValue();
+      APInt c1 = N0Op0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
       EVT InnerShiftVT = N0Op0.getValueType();
       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
-      if (c2 >= OpSizeInBits - InnerShiftSize) {
+      if (c2.uge(OpSizeInBits - InnerShiftSize)) {
         SDLoc DL(N0);
-        if (c1 + c2 >= OpSizeInBits)
+        APInt Sum = c1 + c2;
+        if (Sum.uge(OpSizeInBits))
           return DAG.getConstant(0, DL, VT);
-        return DAG.getNode(ISD::SHL, DL, VT,
-                           DAG.getNode(N0.getOpcode(), DL, VT,
-                                       N0Op0->getOperand(0)),
-                           DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+        return DAG.getNode(
+            ISD::SHL, DL, VT,
+            DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
+            DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
       }
     }
   }
@@ -4508,8 +4743,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       N0.getOperand(0).getOpcode() == ISD::SRL) {
     SDValue N0Op0 = N0.getOperand(0);
     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
-      uint64_t c1 = N0Op0C1->getZExtValue();
-      if (c1 < VT.getScalarSizeInBits()) {
+      if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
+        uint64_t c1 = N0Op0C1->getZExtValue();
         uint64_t c2 = N1C->getZExtValue();
         if (c1 == c2) {
           SDValue NewOp0 = N0.getOperand(0);
@@ -4569,37 +4804,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
       }
     }
   }
+
   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
-  if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
+  if (N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1) &&
+      isConstantOrConstantVector(N1, /* No Opaques */ true)) {
     unsigned BitSize = VT.getScalarSizeInBits();
     SDLoc DL(N);
-    SDValue HiBitsMask =
-      DAG.getConstant(APInt::getHighBitsSet(BitSize,
-                                            BitSize - N1C->getZExtValue()),
-                      DL, VT);
-    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
-                       HiBitsMask);
+    SDValue AllBits = DAG.getConstant(APInt::getAllOnesValue(BitSize), DL, VT);
+    SDValue HiBitsMask = DAG.getNode(ISD::SHL, DL, VT, AllBits, N1);
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), HiBitsMask);
   }
 
   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
   // Variant of version done on multiply, except mul by a power of 2 is turned
   // into a shift.
-  APInt Val;
-  if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
-      (isa<ConstantSDNode>(N0.getOperand(1)) ||
-       ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
+  if (N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
+      isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+    AddToWorklist(Shl0.getNode());
+    AddToWorklist(Shl1.getNode());
     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   }
 
   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
-  if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
-    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      if (SDValue Folded =
-              DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
-        return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
-    }
+  if (N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse() &&
+      isConstantOrConstantVector(N1, /* No Opaques */ true) &&
+      isConstantOrConstantVector(N0.getOperand(1), /* No Opaques */ true)) {
+    SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
+    if (isConstantOrConstantVector(Shl))
+      return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Shl);
   }
 
   if (N1C && !N1C->isOpaque())
@@ -4613,16 +4848,18 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
+
+  // Arithmetic shifting an all-sign-bit value is a no-op.
+  if (DAG.ComputeNumSignBits(N0) == OpSizeInBits)
+    return N0;
 
   // fold vector ops
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (VT.isVector()) {
+  if (VT.isVector())
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
-    N1C = isConstOrConstSplat(N1);
-  }
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (sra c1, c2) -> (sra c1, c2)
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4634,8 +4871,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
   // fold (sra -1, x) -> -1
   if (isAllOnesConstant(N0))
     return N0;
-  // fold (sra x, (setge c, size(x))) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  // fold (sra x, c >= size(x)) -> undef
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
     return DAG.getUNDEF(VT);
   // fold (sra x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -4656,13 +4893,19 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
 
   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRA) {
-    if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
-      unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
-      if (Sum >= OpSizeInBits)
-        Sum = OpSizeInBits - 1;
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       SDLoc DL(N);
-      return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(Sum, DL, N1.getValueType()));
+      APInt c1 = N0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+      APInt Sum = c1 + c2;
+      if (Sum.uge(OpSizeInBits))
+        Sum = APInt(OpSizeInBits, OpSizeInBits - 1);
+
+      return DAG.getNode(
+          ISD::SRA, DL, VT, N0.getOperand(0),
+          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
     }
   }
 
@@ -4759,16 +5002,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N0.getValueType();
-  unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
+  unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
   // fold vector ops
-  ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
-  if (VT.isVector()) {
+  if (VT.isVector())
     if (SDValue FoldedVOp = SimplifyVBinOp(N))
       return FoldedVOp;
 
-    N1C = isConstOrConstSplat(N1);
-  }
+  ConstantSDNode *N1C = isConstOrConstSplat(N1);
 
   // fold (srl c1, c2) -> c1 >>u c2
   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
@@ -4778,7 +5019,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   if (isNullConstant(N0))
     return N0;
   // fold (srl x, c >= size(x)) -> undef
-  if (N1C && N1C->getZExtValue() >= OpSizeInBits)
+  if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
     return DAG.getUNDEF(VT);
   // fold (srl x, 0) -> x
   if (N1C && N1C->isNullValue())
@@ -4790,14 +5031,19 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
 
   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   if (N1C && N0.getOpcode() == ISD::SRL) {
-    if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
-      uint64_t c1 = N01C->getZExtValue();
-      uint64_t c2 = N1C->getZExtValue();
+    if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
       SDLoc DL(N);
-      if (c1 + c2 >= OpSizeInBits)
+      APInt c1 = N0C1->getAPIntValue();
+      APInt c2 = N1C->getAPIntValue();
+      zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+
+      APInt Sum = c1 + c2;
+      if (Sum.uge(OpSizeInBits))
         return DAG.getConstant(0, DL, VT);
-      return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(c1 + c2, DL, N1.getValueType()));
+
+      return DAG.getNode(
+          ISD::SRL, DL, VT, N0.getOperand(0),
+          DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
     }
   }
 
@@ -4810,7 +5056,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
     uint64_t c2 = N1C->getZExtValue();
     EVT InnerShiftVT = N0.getOperand(0).getValueType();
     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
-    uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+    uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
     if (c1 + OpSizeInBits == InnerShiftSize) {
       SDLoc DL(N0);
@@ -4825,14 +5071,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
   }
 
   // fold (srl (shl x, c), c) -> (and x, cst2)
-  if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
-    unsigned BitSize = N0.getScalarValueSizeInBits();
-    if (BitSize <= 64) {
-      uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
-      SDLoc DL(N);
-      return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
-                         DAG.getConstant(~0ULL >> ShAmt, DL, VT));
-    }
+  if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
+      isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
+    SDLoc DL(N);
+    APInt AllBits = APInt::getAllOnesValue(N0.getScalarValueSizeInBits());
+    SDValue Mask =
+        DAG.getNode(ISD::SRL, DL, VT, DAG.getConstant(AllBits, DL, VT), N1);
+    AddToWorklist(Mask.getNode());
+    return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), Mask);
   }
 
   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
@@ -5065,6 +5311,41 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   }
 }
 
+// TODO: We should handle other cases of selecting between {-1,0,1} here.
+SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
+  SDValue Cond = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  SDValue N2 = N->getOperand(2);
+  EVT VT = N->getValueType(0);
+  EVT CondVT = Cond.getValueType();
+  SDLoc DL(N);
+
+  // fold (select Cond, 0, 1) -> (xor Cond, 1)
+  // We can't do this reliably if integer based booleans have different contents
+  // to floating point based booleans. This is because we can't tell whether we
+  // have an integer-based boolean or a floating-point-based boolean unless we
+  // can find the SETCC that produced it and inspect its operands. This is
+  // fairly easy if C is the SETCC node, but it can potentially be
+  // undiscoverable (or not reasonably discoverable). For example, it could be
+  // in another basic block or it could require searching a complicated
+  // expression.
+  if (VT.isInteger() &&
+      (CondVT == MVT::i1 || (CondVT.isInteger() &&
+                             TLI.getBooleanContents(false, true) ==
+                                 TargetLowering::ZeroOrOneBooleanContent &&
+                             TLI.getBooleanContents(false, false) ==
+                                 TargetLowering::ZeroOrOneBooleanContent)) &&
+      isNullConstant(N1) && isOneConstant(N2)) {
+    SDValue NotCond = DAG.getNode(ISD::XOR, DL, CondVT, Cond,
+                                  DAG.getConstant(1, DL, CondVT));
+    if (VT.bitsEq(CondVT))
+      return NotCond;
+    return DAG.getZExtOrTrunc(NotCond, DL, VT);
+  }
+
+  return SDValue();
+}
+
 SDValue DAGCombiner::visitSELECT(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -5083,36 +5364,10 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
   // fold (select C, 1, X) -> (or C, X)
   if (VT == MVT::i1 && isOneConstant(N1))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
-  // fold (select C, 0, 1) -> (xor C, 1)
-  // We can't do this reliably if integer based booleans have different contents
-  // to floating point based booleans. This is because we can't tell whether we
-  // have an integer-based boolean or a floating-point-based boolean unless we
-  // can find the SETCC that produced it and inspect its operands. This is
-  // fairly easy if C is the SETCC node, but it can potentially be
-  // undiscoverable (or not reasonably discoverable). For example, it could be
-  // in another basic block or it could require searching a complicated
-  // expression.
-  if (VT.isInteger() &&
-      (VT0 == MVT::i1 || (VT0.isInteger() &&
-                          TLI.getBooleanContents(false, false) ==
-                              TLI.getBooleanContents(false, true) &&
-                          TLI.getBooleanContents(false, false) ==
-                              TargetLowering::ZeroOrOneBooleanContent)) &&
-      isNullConstant(N1) && isOneConstant(N2)) {
-    SDValue XORNode;
-    if (VT == VT0) {
-      SDLoc DL(N);
-      return DAG.getNode(ISD::XOR, DL, VT0,
-                         N0, DAG.getConstant(1, DL, VT0));
-    }
-    SDLoc DL0(N0);
-    XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
-                          N0, DAG.getConstant(1, DL0, VT0));
-    AddToWorklist(XORNode.getNode());
-    if (VT.bitsGT(VT0))
-      return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
-    return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
-  }
+
+  if (SDValue V = foldSelectOfConstants(N))
+    return V;
+
   // fold (select C, 0, X) -> (and (not C), X)
   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
@@ -5145,7 +5400,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     // The code in this block deals with the following 2 equivalences:
     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
-    // The target can specify its prefered form with the
+    // The target can specify its preferred form with the
     // shouldNormalizeToSelectSequence() callback. However we always transform
     // to the right anyway if we find the inner select exists in the DAG anyway
     // and we always transform to the left side if we know that we can further
@@ -5214,6 +5469,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
     }
   }
 
+  // select (xor Cond, 1), X, Y -> select Cond, Y, X
+  // select (xor Cond, 0), X, Y -> selext Cond, X, Y
+  if (VT0 == MVT::i1) {
+    if (N0->getOpcode() == ISD::XOR) {
+      if (auto *C = dyn_cast<ConstantSDNode>(N0->getOperand(1))) {
+        SDValue Cond0 = N0->getOperand(0);
+        if (C->isOne())
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
+                             Cond0, N2, N1);
+        else
+          return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(),
+                             Cond0, N1, N2);
+      }
+    }
+  }
+
   // fold selects based on a setcc into other things, such as min/max/abs
   if (N0.getOpcode() == ISD::SETCC) {
     // select x, y (fcmp lt x, y) -> fminnum x, y
@@ -5269,7 +5540,7 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
 // This function assumes all the vselect's arguments are CONCAT_VECTOR
 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
-  SDLoc dl(N);
+  SDLoc DL(N);
   SDValue Cond = N->getOperand(0);
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
@@ -5316,7 +5587,7 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
          "One half of the selector was all UNDEFs and the other was all the "
          "same value. This should have been addressed before this function.");
   return DAG.getNode(
-      ISD::CONCAT_VECTORS, dl, VT,
+      ISD::CONCAT_VECTORS, DL, VT,
       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
 }
@@ -5390,6 +5661,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
   SDValue Mask = MST->getMask();
   SDValue Data  = MST->getValue();
+  EVT VT = Data.getValueType();
   SDLoc DL(N);
 
   // If the MSTORE data type requires splitting and the mask is provided by a
@@ -5399,16 +5671,13 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   if (Mask.getOpcode() == ISD::SETCC) {
 
     // Check if any splitting is required.
-    if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
+    if (TLI.getTypeAction(*DAG.getContext(), VT) !=
         TargetLowering::TypeSplitVector)
       return SDValue();
 
     SDValue MaskLo, MaskHi, Lo, Hi;
     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
 
-    EVT LoVT, HiVT;
-    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
-
     SDValue Chain = MST->getChain();
     SDValue Ptr   = MST->getBasePtr();
 
@@ -5418,8 +5687,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
     // if Alignment is equal to the vector size,
     // take the half of it for the second part
     unsigned SecondHalfAlignment =
-      (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
-         Alignment/2 : Alignment;
+      (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
 
     EVT LoMemVT, HiMemVT;
     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -5433,11 +5701,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
                            Alignment, MST->getAAInfo(), MST->getRanges());
 
     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
-                            MST->isTruncatingStore());
+                            MST->isTruncatingStore(),
+                            MST->isCompressingStore());
 
-    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+                                     MST->isCompressingStore());
 
     MMO = DAG.getMachineFunction().
       getMachineMemOperand(MST->getPointerInfo(),
@@ -5446,7 +5714,8 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
                            MST->getRanges());
 
     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
-                            MST->isTruncatingStore());
+                            MST->isTruncatingStore(),
+                            MST->isCompressingStore());
 
     AddToWorklist(Lo.getNode());
     AddToWorklist(Hi.getNode());
@@ -5585,11 +5854,10 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
-                           ISD::NON_EXTLOAD);
+                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
 
-    unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                      DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+    Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+                                     MLD->isExpandingLoad()); 
 
     MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(),
@@ -5597,7 +5865,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
 
     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
-                           ISD::NON_EXTLOAD);
+                           ISD::NON_EXTLOAD, MLD->isExpandingLoad());
 
     AddToWorklist(Lo.getNode());
     AddToWorklist(Hi.getNode());
@@ -5625,6 +5893,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   SDValue N2 = N->getOperand(2);
   SDLoc DL(N);
 
+  // fold (vselect C, X, X) -> X
+  if (N1 == N2)
+    return N1;
+
   // Canonicalize integer abs.
   // vselect (setg[te] X,  0),  X, -X ->
   // vselect (setgt    X, -1),  X, -X ->
@@ -5648,7 +5920,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
       EVT VT = LHS.getValueType();
       SDValue Shift = DAG.getNode(
           ISD::SRA, DL, VT, LHS,
-          DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
+          DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
       AddToWorklist(Shift.getNode());
       AddToWorklist(Add.getNode());
@@ -5803,7 +6075,7 @@ static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
 
   // We can fold this node into a build_vector.
   unsigned VTBits = SVT.getSizeInBits();
-  unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
+  unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
   SmallVector<SDValue, 8> Elts;
   unsigned NumElts = VT.getVectorNumElements();
   SDLoc DL(N);
@@ -6026,7 +6298,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // fold (sext (truncate (load x))) -> (sext (smaller load x))
     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6038,9 +6310,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // See if the value being truncated is already sign extended.  If so, just
     // eliminate the trunc/sext pair.
     SDValue Op = N0.getOperand(0);
-    unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
-    unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
-    unsigned DestBits = VT.getScalarType().getSizeInBits();
+    unsigned OpBits   = Op.getScalarValueSizeInBits();
+    unsigned MidBits  = N0.getScalarValueSizeInBits();
+    unsigned DestBits = VT.getScalarSizeInBits();
     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
 
     if (OpBits == DestBits) {
@@ -6201,7 +6473,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), T, 0)
     // Here, T can be 1 or -1, depending on the type of the setcc and
     // getBooleanContents().
-    unsigned SetCCWidth = N0.getValueType().getScalarSizeInBits();
+    unsigned SetCCWidth = N0.getScalarValueSizeInBits();
 
     SDLoc DL(N);
     // To determine the "true" side of the select, we need to know the high bit
@@ -6323,7 +6595,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6338,7 +6610,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
     // fold (zext (truncate (load x))) -> (zext (smaller load x))
     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode *oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6528,7 +6800,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       // elements we can use a matching integer vector type and then
       // truncate/sign extend.
       EVT MatchingElementType = EVT::getIntegerVT(
-          *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
+          *DAG.getContext(), N00VT.getScalarSizeInBits());
       EVT MatchingVectorType = EVT::getVectorVT(
           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
       SDValue VsetCC =
@@ -6558,8 +6830,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
       SDValue InnerZExt = N0.getOperand(0);
       // If the original shl may be shifting out bits, do not perform this
       // transformation.
-      unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
-        InnerZExt.getOperand(0).getValueType().getSizeInBits();
+      unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
+        InnerZExt.getOperand(0).getValueSizeInBits();
       if (ShAmtVal > KnownZeroBits)
         return SDValue();
     }
@@ -6598,7 +6870,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   if (N0.getOpcode() == ISD::TRUNCATE) {
     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
-      SDNode* oye = N0.getNode()->getOperand(0).getNode();
+      SDNode *oye = N0.getOperand(0).getNode();
       if (NarrowLoad.getNode() != N0.getNode()) {
         CombineTo(N0.getNode(), NarrowLoad);
         // CombineTo deleted the truncate, if needed, but not what's under it.
@@ -6625,15 +6897,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
       N0.getOperand(1).getOpcode() == ISD::Constant &&
       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
                           N0.getValueType())) {
+    SDLoc DL(N);
     SDValue X = N0.getOperand(0).getOperand(0);
     if (X.getValueType().bitsLT(VT)) {
-      X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
+      X = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X);
     } else if (X.getValueType().bitsGT(VT)) {
-      X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
+      X = DAG.getNode(ISD::TRUNCATE, DL, VT, X);
     }
     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
     Mask = Mask.zext(VT.getSizeInBits());
-    SDLoc DL(N);
     return DAG.getNode(ISD::AND, DL, VT,
                        X, DAG.getConstant(Mask, DL, VT));
   }
@@ -6820,7 +7092,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
       if ((ShAmt & (EVTBits-1)) == 0) {
         N0 = N0.getOperand(0);
         // Is the load width a multiple of size of VT?
-        if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
+        if ((N0.getValueSizeInBits() & (EVTBits-1)) != 0)
           return SDValue();
       }
 
@@ -6952,8 +7224,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
   EVT EVT = cast<VTSDNode>(N1)->getVT();
-  unsigned VTBits = VT.getScalarType().getSizeInBits();
-  unsigned EVTBits = EVT.getScalarType().getSizeInBits();
+  unsigned VTBits = VT.getScalarSizeInBits();
+  unsigned EVTBits = EVT.getScalarSizeInBits();
 
   if (N0.isUndef())
     return DAG.getUNDEF(VT);
@@ -6977,14 +7249,23 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   // if x is small enough.
   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
     SDValue N00 = N0.getOperand(0);
-    if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
+    if (N00.getScalarValueSizeInBits() <= EVTBits &&
+        (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
+      return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
+  }
+
+  // fold (sext_in_reg (zext x)) -> (sext x)
+  // iff we are extending the source sign bit.
+  if (N0.getOpcode() == ISD::ZERO_EXTEND) {
+    SDValue N00 = N0.getOperand(0);
+    if (N00.getScalarValueSizeInBits() == EVTBits &&
         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   }
 
   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
-    return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
+    return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT.getScalarType());
 
   // fold operands of sext_in_reg based on knowledge that the top bits are not
   // demanded.
@@ -7111,6 +7392,10 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     return N0.getOperand(0);
   }
 
+  // If this is anyext(trunc), don't fold it, allow ourselves to be folded.
+  if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ANY_EXTEND))
+    return SDValue();
+
   // Fold extract-and-trunc into a narrow extract. For example:
   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   //   i32 y = TRUNCATE(i64 x)
@@ -7148,7 +7433,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   }
 
   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
-  if (N0.getOpcode() == ISD::SELECT) {
+  if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
     EVT SrcVT = N0.getValueType();
     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
         TLI.isTruncateFree(SrcVT, VT)) {
@@ -7160,15 +7445,15 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
     }
   }
 
-  // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
+  // trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
       uint64_t Amt = CAmt->getZExtValue();
-      unsigned Size = VT.getSizeInBits();
+      unsigned Size = VT.getScalarSizeInBits();
 
-      if (Amt < Size / 2) {
+      if (Amt < Size) {
         SDLoc SL(N);
         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
 
@@ -7525,7 +7810,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
       VT.isInteger() && !VT.isVector()) {
-    unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
+    unsigned OrigXWidth = N0.getOperand(1).getValueSizeInBits();
     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
     if (isTypeLegal(IntXVT)) {
       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
@@ -8221,8 +8506,10 @@ SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   return SDValue();
 }
 
-/// Try to perform FMA combining on a given FMUL node.
-SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
+/// Try to perform FMA combining on a given FMUL node based on the distributive
+/// law x * (y + 1) = x * y + x and variants thereof (commuted versions,
+/// subtraction instead of addition).
+SDValue DAGCombiner::visitFMULForFMADistributiveCombine(SDNode *N) {
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
@@ -8231,17 +8518,23 @@ SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
 
   const TargetOptions &Options = DAG.getTarget().Options;
-  bool AllowFusion =
-      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
 
-  // Floating-point multiply-add with intermediate rounding.
-  bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+  // The transforms below are incorrect when x == 0 and y == inf, because the
+  // intermediate multiplication produces a nan.
+  if (!Options.NoInfsFPMath)
+    return SDValue();
 
   // Floating-point multiply-add without intermediate rounding.
   bool HasFMA =
-      AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
+      (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) &&
+      TLI.isFMAFasterThanFMulAndFAdd(VT) &&
       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
 
+  // Floating-point multiply-add with intermediate rounding. This can result
+  // in a less precise result due to the changed rounding order.
+  bool HasFMAD = Options.UnsafeFPMath &&
+                 (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
+
   // No valid opcode, do not combine.
   if (!HasFMAD && !HasFMA)
     return SDValue();
@@ -8338,17 +8631,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
     return DAG.getNode(ISD::FSUB, DL, VT, N1,
                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
 
+  // FIXME: Auto-upgrade the target/function-level option.
+  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
+    // fold (fadd A, 0) -> A
+    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
+      if (N1C->isZero())
+        return N0;
+  }
+
   // If 'unsafe math' is enabled, fold lots of things.
   if (Options.UnsafeFPMath) {
     // No FP constant should be created after legalization as Instruction
     // Selection pass has a hard time dealing with FP constants.
     bool AllowNewConst = (Level < AfterLegalizeDAG);
 
-    // fold (fadd A, 0) -> A
-    if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
-      if (N1C->isZero())
-        return N0;
-
     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
@@ -8457,7 +8753,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   EVT VT = N->getValueType(0);
-  SDLoc dl(N);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
 
@@ -8468,30 +8764,33 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
 
   // fold (fsub c1, c2) -> c1-c2
   if (N0CFP && N1CFP)
-    return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
+    return DAG.getNode(ISD::FSUB, DL, VT, N0, N1, Flags);
 
   // fold (fsub A, (fneg B)) -> (fadd A, B)
   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
-    return DAG.getNode(ISD::FADD, dl, VT, N0,
+    return DAG.getNode(ISD::FADD, DL, VT, N0,
                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
 
-  // If 'unsafe math' is enabled, fold lots of things.
-  if (Options.UnsafeFPMath) {
-    // (fsub A, 0) -> A
-    if (N1CFP && N1CFP->isZero())
-      return N0;
-
+  // FIXME: Auto-upgrade the target/function-level option.
+  if (Options.UnsafeFPMath || N->getFlags()->hasNoSignedZeros()) {
     // (fsub 0, B) -> -B
     if (N0CFP && N0CFP->isZero()) {
       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
         return GetNegatedExpression(N1, DAG, LegalOperations);
       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
-        return DAG.getNode(ISD::FNEG, dl, VT, N1);
+        return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
     }
+  }
+
+  // If 'unsafe math' is enabled, fold lots of things.
+  if (Options.UnsafeFPMath) {
+    // (fsub A, 0) -> A
+    if (N1CFP && N1CFP->isZero())
+      return N0;
 
     // (fsub x, x) -> 0.0
     if (N0 == N1)
-      return DAG.getConstantFP(0.0f, dl, VT);
+      return DAG.getConstantFP(0.0f, DL, VT);
 
     // (fsub x, (fadd x, y)) -> (fneg y)
     // (fsub x, (fadd y, x)) -> (fneg y)
@@ -8611,7 +8910,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
   }
 
   // FMUL -> FMA combines:
-  if (SDValue Fused = visitFMULForFMACombine(N)) {
+  if (SDValue Fused = visitFMULForFMADistributiveCombine(N)) {
     AddToWorklist(Fused.getNode());
     return Fused;
   }
@@ -8626,14 +8925,14 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
-  SDLoc dl(N);
+  SDLoc DL(N);
   const TargetOptions &Options = DAG.getTarget().Options;
 
   // Constant fold FMA.
   if (isa<ConstantFPSDNode>(N0) &&
       isa<ConstantFPSDNode>(N1) &&
       isa<ConstantFPSDNode>(N2)) {
-    return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
+    return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
   }
 
   if (Options.UnsafeFPMath) {
@@ -8663,8 +8962,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
         isConstantFPBuildVectorOrConstantFP(N1) &&
         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
-      return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                         DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getNode(ISD::FADD, DL, VT, N1, N2.getOperand(1),
                                      &Flags), &Flags);
     }
 
@@ -8672,9 +8971,9 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
     if (N0.getOpcode() == ISD::FMUL &&
         isConstantFPBuildVectorOrConstantFP(N1) &&
         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
-      return DAG.getNode(ISD::FMA, dl, VT,
+      return DAG.getNode(ISD::FMA, DL, VT,
                          N0.getOperand(0),
-                         DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
+                         DAG.getNode(ISD::FMUL, DL, VT, N1, N0.getOperand(1),
                                      &Flags),
                          N2);
     }
@@ -8685,32 +8984,32 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
   if (N1CFP) {
     if (N1CFP->isExactlyValue(1.0))
       // TODO: The FMA node should have flags that propagate to this node.
-      return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
+      return DAG.getNode(ISD::FADD, DL, VT, N0, N2);
 
     if (N1CFP->isExactlyValue(-1.0) &&
         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
-      SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
+      SDValue RHSNeg = DAG.getNode(ISD::FNEG, DL, VT, N0);
       AddToWorklist(RHSNeg.getNode());
       // TODO: The FMA node should have flags that propagate to this node.
-      return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
+      return DAG.getNode(ISD::FADD, DL, VT, N2, RHSNeg);
     }
   }
 
   if (Options.UnsafeFPMath) {
     // (fma x, c, x) -> (fmul x, (c+1))
     if (N1CFP && N0 == N2) {
-    return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                         DAG.getNode(ISD::FADD, dl, VT,
-                                     N1, DAG.getConstantFP(1.0, dl, VT),
-                                     &Flags), &Flags);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getNode(ISD::FADD, DL, VT, N1,
+                                     DAG.getConstantFP(1.0, DL, VT), &Flags),
+                         &Flags);
     }
 
     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
-      return DAG.getNode(ISD::FMUL, dl, VT, N0,
-                         DAG.getNode(ISD::FADD, dl, VT,
-                                     N1, DAG.getConstantFP(-1.0, dl, VT),
-                                     &Flags), &Flags);
+      return DAG.getNode(ISD::FMUL, DL, VT, N0,
+                         DAG.getNode(ISD::FADD, DL, VT, N1,
+                                     DAG.getConstantFP(-1.0, DL, VT), &Flags),
+                         &Flags);
     }
   }
 
@@ -8720,7 +9019,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
 // reciprocal.
 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
-// Notice that this is not always beneficial. One reason is different target
+// Notice that this is not always beneficial. One reason is different targets
 // may have different costs for FDIV and FMUL, so sometimes the cost of two
 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
@@ -8907,14 +9206,18 @@ SDValue DAGCombiner::visitFREM(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
-  if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
+  if (!DAG.getTarget().Options.UnsafeFPMath)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  if (TLI.isFsqrtCheap(N0, DAG))
     return SDValue();
 
   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
   // For now, create a Flags object for use with all unsafe math transforms.
   SDNodeFlags Flags;
   Flags.setUnsafeAlgebra(true);
-  return buildSqrtEstimate(N->getOperand(0), &Flags);
+  return buildSqrtEstimate(N0, &Flags);
 }
 
 /// copysign(x, fp_extend(y)) -> copysign(x, y)
@@ -8941,11 +9244,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   EVT VT = N->getValueType(0);
 
-  if (N0CFP && N1CFP)  // Constant fold
+  if (N0CFP && N1CFP) // Constant fold
     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
 
   if (N1CFP) {
-    const APFloat& V = N1CFP->getValueAPF();
+    const APFloat &V = N1CFP->getValueAPF();
     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
     if (!V.isNegative()) {
@@ -8963,8 +9266,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   // copysign(copysign(x,z), y) -> copysign(x, y)
   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
       N0.getOpcode() == ISD::FCOPYSIGN)
-    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
-                       N0.getOperand(0), N1);
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1);
 
   // copysign(x, abs(y)) -> abs(x)
   if (N1.getOpcode() == ISD::FABS)
@@ -8972,14 +9274,12 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
 
   // copysign(x, copysign(y,z)) -> copysign(x, z)
   if (N1.getOpcode() == ISD::FCOPYSIGN)
-    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
-                       N0, N1.getOperand(1));
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1));
 
   // copysign(x, fp_extend(y)) -> copysign(x, y)
   // copysign(x, fp_round(y)) -> copysign(x, y)
   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
-    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
-                       N0, N1.getOperand(0));
+    return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0));
 
   return SDValue();
 }
@@ -9159,7 +9459,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   // fold (fp_round (fp_round x)) -> (fp_round x)
   if (N0.getOpcode() == ISD::FP_ROUND) {
     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
-    const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
+    const bool N0IsTrunc = N0.getConstantOperandVal(1) == 1;
 
     // Skip this folding if it results in an fp_round from f80 to f16.
     //
@@ -9232,7 +9532,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   // value of X.
   if (N0.getOpcode() == ISD::FP_ROUND
-      && N0.getNode()->getConstantOperandVal(1) == 1) {
+      && N0.getConstantOperandVal(1) == 1) {
     SDValue In = N0.getOperand(0);
     if (In.getValueType() == VT) return In;
     if (VT.bitsLT(In.getValueType()))
@@ -9319,7 +9619,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
       if (N0.getValueType().isVector()) {
         // For a vector, get a mask such as 0x80... per scalar element
         // and splat it.
-        SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = APInt::getSignBit(N0.getScalarValueSizeInBits());
         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
       } else {
         // For a scalar, just generate 0x80...
@@ -9424,7 +9724,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
       if (N0.getValueType().isVector()) {
         // For a vector, get a mask such as 0x7f... per scalar element
         // and splat it.
-        SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
+        SignMask = ~APInt::getSignBit(N0.getScalarValueSizeInBits());
         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
       } else {
         // For a scalar, just generate 0x7f...
@@ -10103,7 +10403,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
   // value.
   // TODO: Handle store large -> read small portion.
   // TODO: Handle TRUNCSTORE/LOADEXT
-  if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
+  if (OptLevel != CodeGenOpt::None &&
+      ISD::isNormalLoad(N) && !LD->isVolatile()) {
     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
       if (PrevST->getBasePtr() == Ptr &&
@@ -10405,7 +10706,7 @@ struct LoadedSlice {
     assert(Inst && Origin && "Unable to replace a non-existing slice.");
     const SDValue &OldBaseAddr = Origin->getBasePtr();
     SDValue BaseAddr = OldBaseAddr;
-    // Get the offset in that chunk of bytes w.r.t. the endianess.
+    // Get the offset in that chunk of bytes w.r.t. the endianness.
     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
     if (Offset) {
@@ -10705,7 +11006,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
        LSIt != LSItEnd; ++LSIt) {
     SDValue SliceInst = LSIt->loadSlice();
     CombineTo(LSIt->Inst, SliceInst, true);
-    if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
+    if (SliceInst.getOpcode() != ISD::LOAD)
       SliceInst = SliceInst.getOperand(0);
     assert(SliceInst->getOpcode() == ISD::LOAD &&
            "It takes more than a zext to get to the loaded slice!!");
@@ -11033,110 +11334,6 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   return SDValue();
 }
 
-namespace {
-/// Helper struct to parse and store a memory address as base + index + offset.
-/// We ignore sign extensions when it is safe to do so.
-/// The following two expressions are not equivalent. To differentiate we need
-/// to store whether there was a sign extension involved in the index
-/// computation.
-///  (load (i64 add (i64 copyfromreg %c)
-///                 (i64 signextend (add (i8 load %index)
-///                                      (i8 1))))
-/// vs
-///
-/// (load (i64 add (i64 copyfromreg %c)
-///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
-///                                         (i32 1)))))
-struct BaseIndexOffset {
-  SDValue Base;
-  SDValue Index;
-  int64_t Offset;
-  bool IsIndexSignExt;
-
-  BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
-
-  BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
-                  bool IsIndexSignExt) :
-    Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
-
-  bool equalBaseIndex(const BaseIndexOffset &Other) {
-    return Other.Base == Base && Other.Index == Index &&
-      Other.IsIndexSignExt == IsIndexSignExt;
-  }
-
-  /// Parses tree in Ptr for base, index, offset addresses.
-  static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
-    bool IsIndexSignExt = false;
-
-    // Split up a folded GlobalAddress+Offset into its component parts.
-    if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
-      if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
-        return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
-                                                    SDLoc(GA),
-                                                    GA->getValueType(0),
-                                                    /*Offset=*/0,
-                                                    /*isTargetGA=*/false,
-                                                    GA->getTargetFlags()),
-                               SDValue(),
-                               GA->getOffset(),
-                               IsIndexSignExt);
-      }
-
-    // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
-    // instruction, then it could be just the BASE or everything else we don't
-    // know how to handle. Just use Ptr as BASE and give up.
-    if (Ptr->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
-    // We know that we have at least an ADD instruction. Try to pattern match
-    // the simple case of BASE + OFFSET.
-    if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
-      int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
-      return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
-                              IsIndexSignExt);
-    }
-
-    // Inside a loop the current BASE pointer is calculated using an ADD and a
-    // MUL instruction. In this case Ptr is the actual BASE pointer.
-    // (i64 add (i64 %array_ptr)
-    //          (i64 mul (i64 %induction_var)
-    //                   (i64 %element_size)))
-    if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
-    // Look at Base + Index + Offset cases.
-    SDValue Base = Ptr->getOperand(0);
-    SDValue IndexOffset = Ptr->getOperand(1);
-
-    // Skip signextends.
-    if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
-      IndexOffset = IndexOffset->getOperand(0);
-      IsIndexSignExt = true;
-    }
-
-    // Either the case of Base + Index (no offset) or something else.
-    if (IndexOffset->getOpcode() != ISD::ADD)
-      return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
-
-    // Now we have the case of Base + Index + offset.
-    SDValue Index = IndexOffset->getOperand(0);
-    SDValue Offset = IndexOffset->getOperand(1);
-
-    if (!isa<ConstantSDNode>(Offset))
-      return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
-
-    // Ignore signextends.
-    if (Index->getOpcode() == ISD::SIGN_EXTEND) {
-      Index = Index->getOperand(0);
-      IsIndexSignExt = true;
-    } else IsIndexSignExt = false;
-
-    int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
-    return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
-  }
-};
-} // namespace
-
 // This is a helper function for visitMUL to check the profitability
 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
 // MulNode is the original multiply, AddNode is (add x, c1),
@@ -11351,6 +11548,7 @@ bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
     }
   }
 
+  StoreNodes.erase(StoreNodes.begin() + NumStores, StoreNodes.end());
   return true;
 }
 
@@ -11493,7 +11691,8 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   return true;
 }
 
-bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
+bool DAGCombiner::MergeConsecutiveStores(
+    StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes) {
   if (OptLevel == CodeGenOpt::None)
     return false;
 
@@ -11537,16 +11736,13 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   // any of the store nodes.
   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
 
-  // Save the StoreSDNodes that we find in the chain.
-  SmallVector<MemOpLink, 8> StoreNodes;
-
   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
 
   // Check if there is anything to merge.
   if (StoreNodes.size() < 2)
     return false;
 
-  // only do dep endence check in AA case
+  // only do dependence check in AA case
   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
                                                   : DAG.getSubtarget().useAA();
   if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
@@ -11582,10 +11778,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
 
     // Check if this store interferes with any of the loads that we found.
     // If we find a load that alias with this store. Stop the sequence.
-    if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
-                    [&](LSBaseSDNode* Ldn) {
-                      return isAlias(Ldn, StoreNodes[i].MemNode);
-                    }))
+    if (any_of(AliasLoadNodes, [&](LSBaseSDNode *Ldn) {
+          return isAlias(Ldn, StoreNodes[i].MemNode);
+        }))
       break;
 
     // Mark this node as useful.
@@ -11899,6 +12094,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
     }
   }
 
+  StoreNodes.erase(StoreNodes.begin() + NumElem, StoreNodes.end());
   return true;
 }
 
@@ -12088,11 +12284,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
     // See if we can simplify the input to this truncstore with knowledge that
     // only the low bits are being used.  For example:
     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
-    SDValue Shorter =
-      GetDemandedBits(Value,
-                      APInt::getLowBitsSet(
-                        Value.getValueType().getScalarType().getSizeInBits(),
-                        ST->getMemoryVT().getScalarType().getSizeInBits()));
+    SDValue Shorter = GetDemandedBits(
+        Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+                                    ST->getMemoryVT().getScalarSizeInBits()));
     AddToWorklist(Value.getNode());
     if (Shorter.getNode())
       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
@@ -12100,10 +12294,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
 
     // Otherwise, see if we can simplify the operation with
     // SimplifyDemandedBits, which only works if the value has a single use.
-    if (SimplifyDemandedBits(Value,
-                        APInt::getLowBitsSet(
-                          Value.getValueType().getScalarType().getSizeInBits(),
-                          ST->getMemoryVT().getScalarType().getSizeInBits())))
+    if (SimplifyDemandedBits(
+            Value,
+            APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+                                 ST->getMemoryVT().getScalarSizeInBits())))
       return SDValue(N, 0);
   }
 
@@ -12144,19 +12338,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
   // Only perform this optimization before the types are legal, because we
   // don't want to perform this optimization on every DAGCombine invocation.
   if (!LegalTypes) {
-    bool EverChanged = false;
-
-    do {
+    for (;;) {
       // There can be multiple store sequences on the same chain.
       // Keep trying to merge store sequences until we are unable to do so
       // or until we merge the last store on the chain.
-      bool Changed = MergeConsecutiveStores(ST);
-      EverChanged |= Changed;
+      SmallVector<MemOpLink, 8> StoreNodes;
+      bool Changed = MergeConsecutiveStores(ST, StoreNodes);
       if (!Changed) break;
-    } while (ST->getOpcode() != ISD::DELETED_NODE);
 
-    if (EverChanged)
-      return SDValue(N, 0);
+      if (any_of(StoreNodes,
+                 [ST](const MemOpLink &Link) { return Link.MemNode == ST; })) {
+        // ST has been merged and no longer exists.
+        return SDValue(N, 0);
+      }
+    }
   }
 
   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
@@ -12169,14 +12364,123 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
       return NewSt;
   }
 
+  if (SDValue NewSt = splitMergedValStore(ST))
+    return NewSt;
+
   return ReduceLoadOpStoreWidth(N);
 }
 
+/// For the instruction sequence of store below, F and I values
+/// are bundled together as an i64 value before being stored into memory.
+/// Sometimes it is more efficent to generate separate stores for F and I,
+/// which can remove the bitwise instructions or sink them to colder places.
+///
+///   (store (or (zext (bitcast F to i32) to i64),
+///              (shl (zext I to i64), 32)), addr)  -->
+///   (store F, addr) and (store I, addr+4)
+///
+/// Similarly, splitting for other merged store can also be beneficial, like:
+/// For pair of {i32, i32}, i64 store --> two i32 stores.
+/// For pair of {i32, i16}, i64 store --> two i32 stores.
+/// For pair of {i16, i16}, i32 store --> two i16 stores.
+/// For pair of {i16, i8},  i32 store --> two i16 stores.
+/// For pair of {i8, i8},   i16 store --> two i8 stores.
+///
+/// We allow each target to determine specifically which kind of splitting is
+/// supported.
+///
+/// The store patterns are commonly seen from the simple code snippet below
+/// if only std::make_pair(...) is sroa transformed before inlined into hoo.
+///   void goo(const std::pair<int, float> &);
+///   hoo() {
+///     ...
+///     goo(std::make_pair(tmp, ftmp));
+///     ...
+///   }
+///
+SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
+  if (OptLevel == CodeGenOpt::None)
+    return SDValue();
+
+  SDValue Val = ST->getValue();
+  SDLoc DL(ST);
+
+  // Match OR operand.
+  if (!Val.getValueType().isScalarInteger() || Val.getOpcode() != ISD::OR)
+    return SDValue();
+
+  // Match SHL operand and get Lower and Higher parts of Val.
+  SDValue Op1 = Val.getOperand(0);
+  SDValue Op2 = Val.getOperand(1);
+  SDValue Lo, Hi;
+  if (Op1.getOpcode() != ISD::SHL) {
+    std::swap(Op1, Op2);
+    if (Op1.getOpcode() != ISD::SHL)
+      return SDValue();
+  }
+  Lo = Op2;
+  Hi = Op1.getOperand(0);
+  if (!Op1.hasOneUse())
+    return SDValue();
+
+  // Match shift amount to HalfValBitSize.
+  unsigned HalfValBitSize = Val.getValueSizeInBits() / 2;
+  ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Op1.getOperand(1));
+  if (!ShAmt || ShAmt->getAPIntValue() != HalfValBitSize)
+    return SDValue();
+
+  // Lo and Hi are zero-extended from int with size less equal than 32
+  // to i64.
+  if (Lo.getOpcode() != ISD::ZERO_EXTEND || !Lo.hasOneUse() ||
+      !Lo.getOperand(0).getValueType().isScalarInteger() ||
+      Lo.getOperand(0).getValueSizeInBits() > HalfValBitSize ||
+      Hi.getOpcode() != ISD::ZERO_EXTEND || !Hi.hasOneUse() ||
+      !Hi.getOperand(0).getValueType().isScalarInteger() ||
+      Hi.getOperand(0).getValueSizeInBits() > HalfValBitSize)
+    return SDValue();
+
+  // Use the EVT of low and high parts before bitcast as the input
+  // of target query.
+  EVT LowTy = (Lo.getOperand(0).getOpcode() == ISD::BITCAST)
+                  ? Lo.getOperand(0).getValueType()
+                  : Lo.getValueType();
+  EVT HighTy = (Hi.getOperand(0).getOpcode() == ISD::BITCAST)
+                   ? Hi.getOperand(0).getValueType()
+                   : Hi.getValueType();
+  if (!TLI.isMultiStoresCheaperThanBitsMerge(LowTy, HighTy))
+    return SDValue();
+
+  // Start to split store.
+  unsigned Alignment = ST->getAlignment();
+  MachineMemOperand::Flags MMOFlags = ST->getMemOperand()->getFlags();
+  AAMDNodes AAInfo = ST->getAAInfo();
+
+  // Change the sizes of Lo and Hi's value types to HalfValBitSize.
+  EVT VT = EVT::getIntegerVT(*DAG.getContext(), HalfValBitSize);
+  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Lo.getOperand(0));
+  Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Hi.getOperand(0));
+
+  SDValue Chain = ST->getChain();
+  SDValue Ptr = ST->getBasePtr();
+  // Lower value store.
+  SDValue St0 = DAG.getStore(Chain, DL, Lo, Ptr, ST->getPointerInfo(),
+                             ST->getAlignment(), MMOFlags, AAInfo);
+  Ptr =
+      DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
+                  DAG.getConstant(HalfValBitSize / 8, DL, Ptr.getValueType()));
+  // Higher value store.
+  SDValue St1 =
+      DAG.getStore(St0, DL, Hi, Ptr,
+                   ST->getPointerInfo().getWithOffset(HalfValBitSize / 8),
+                   Alignment / 2, MMOFlags, AAInfo);
+  return St1;
+}
+
 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   SDValue InVec = N->getOperand(0);
   SDValue InVal = N->getOperand(1);
   SDValue EltNo = N->getOperand(2);
-  SDLoc dl(N);
+  SDLoc DL(N);
 
   // If the inserted element is an UNDEF, just use the input vector.
   if (InVal.isUndef())
@@ -12206,7 +12510,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
     if (Elt < OtherElt) {
       // Swap nodes.
-      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
+      SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT,
                                   InVec.getOperand(0), InVal, EltNo);
       AddToWorklist(NewOp.getNode());
       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
@@ -12237,13 +12541,13 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
     EVT OpVT = Ops[0].getValueType();
     if (InVal.getValueType() != OpVT)
       InVal = OpVT.bitsGT(InVal.getValueType()) ?
-                DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
-                DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+                DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
+                DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
     Ops[Elt] = InVal;
   }
 
   // Return the new vector
-  return DAG.getBuildVector(VT, dl, Ops);
+  return DAG.getBuildVector(VT, DL, Ops);
 }
 
 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
@@ -12544,7 +12848,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
     return SDValue();
 
   unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
+  SDLoc DL(N);
   EVT VT = N->getValueType(0);
 
   // Check to see if this is a BUILD_VECTOR of a bunch of values
@@ -12603,7 +12907,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   assert(ElemRatio > 1 && "Invalid element size ratio");
   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
-                               DAG.getConstant(0, SDLoc(N), SourceType);
+                               DAG.getConstant(0, DL, SourceType);
 
   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
@@ -12634,7 +12938,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   if (!isTypeLegal(VecVT)) return SDValue();
 
   // Make the new BUILD_VECTOR.
-  SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
+  SDValue BV = DAG.getBuildVector(VecVT, DL, Ops);
 
   // The new BUILD_VECTOR node has the potential to be further optimized.
   AddToWorklist(BV.getNode());
@@ -12646,7 +12950,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   EVT VT = N->getValueType(0);
 
   unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
+  SDLoc DL(N);
 
   EVT SrcVT = MVT::Other;
   unsigned Opcode = ISD::DELETED_NODE;
@@ -12707,30 +13011,120 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
     else
       Opnds.push_back(In.getOperand(0));
   }
-  SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
+  SDValue BV = DAG.getBuildVector(NVT, DL, Opnds);
   AddToWorklist(BV.getNode());
 
-  return DAG.getNode(Opcode, dl, VT, BV);
+  return DAG.getNode(Opcode, DL, VT, BV);
 }
 
-SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
-  unsigned NumInScalars = N->getNumOperands();
-  SDLoc dl(N);
+SDValue DAGCombiner::createBuildVecShuffle(SDLoc DL, SDNode *N,
+                                           ArrayRef<int> VectorMask,
+                                           SDValue VecIn1, SDValue VecIn2,
+                                           unsigned LeftIdx) {
+  MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+  SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
+
   EVT VT = N->getValueType(0);
+  EVT InVT1 = VecIn1.getValueType();
+  EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
+
+  unsigned Vec2Offset = InVT1.getVectorNumElements();
+  unsigned NumElems = VT.getVectorNumElements();
+  unsigned ShuffleNumElems = NumElems;
+
+  // We can't generate a shuffle node with mismatched input and output types.
+  // Try to make the types match the type of the output.
+  if (InVT1 != VT || InVT2 != VT) {
+    if ((VT.getSizeInBits() % InVT1.getSizeInBits() == 0) && InVT1 == InVT2) {
+      // If the output vector length is a multiple of both input lengths,
+      // we can concatenate them and pad the rest with undefs.
+      unsigned NumConcats = VT.getSizeInBits() / InVT1.getSizeInBits();
+      assert(NumConcats >= 2 && "Concat needs at least two inputs!");
+      SmallVector<SDValue, 2> ConcatOps(NumConcats, DAG.getUNDEF(InVT1));
+      ConcatOps[0] = VecIn1;
+      ConcatOps[1] = VecIn2 ? VecIn2 : DAG.getUNDEF(InVT1);
+      VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+      VecIn2 = SDValue();
+    } else if (InVT1.getSizeInBits() == VT.getSizeInBits() * 2) {
+      if (!TLI.isExtractSubvectorCheap(VT, NumElems))
+        return SDValue();
 
-  // A vector built entirely of undefs is undef.
-  if (ISD::allOperandsUndef(N))
-    return DAG.getUNDEF(VT);
+      if (!VecIn2.getNode()) {
+        // If we only have one input vector, and it's twice the size of the
+        // output, split it in two.
+        VecIn2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1,
+                             DAG.getConstant(NumElems, DL, IdxTy));
+        VecIn1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, VecIn1, ZeroIdx);
+        // Since we now have shorter input vectors, adjust the offset of the
+        // second vector's start.
+        Vec2Offset = NumElems;
+      } else if (InVT2.getSizeInBits() <= InVT1.getSizeInBits()) {
+        // VecIn1 is wider than the output, and we have another, possibly
+        // smaller input. Pad the smaller input with undefs, shuffle at the
+        // input vector width, and extract the output.
+        // The shuffle type is different than VT, so check legality again.
+        if (LegalOperations &&
+            !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, InVT1))
+          return SDValue();
 
-  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
-    return V;
+        if (InVT1 != InVT2)
+          VecIn2 = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT1,
+                               DAG.getUNDEF(InVT1), VecIn2, ZeroIdx);
+        ShuffleNumElems = NumElems * 2;
+      } else {
+        // Both VecIn1 and VecIn2 are wider than the output, and VecIn2 is wider
+        // than VecIn1. We can't handle this for now - this case will disappear
+        // when we start sorting the vectors by type.
+        return SDValue();
+      }
+    } else {
+      // TODO: Support cases where the length mismatch isn't exactly by a
+      // factor of 2.
+      // TODO: Move this check upwards, so that if we have bad type
+      // mismatches, we don't create any DAG nodes.
+      return SDValue();
+    }
+  }
 
-  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
-    return V;
+  // Initialize mask to undef.
+  SmallVector<int, 8> Mask(ShuffleNumElems, -1);
+
+  // Only need to run up to the number of elements actually used, not the
+  // total number of elements in the shuffle - if we are shuffling a wider
+  // vector, the high lanes should be set to undef.
+  for (unsigned i = 0; i != NumElems; ++i) {
+    if (VectorMask[i] <= 0)
+      continue;
+
+    unsigned ExtIndex = N->getOperand(i).getConstantOperandVal(1);
+    if (VectorMask[i] == (int)LeftIdx) {
+      Mask[i] = ExtIndex;
+    } else if (VectorMask[i] == (int)LeftIdx + 1) {
+      Mask[i] = Vec2Offset + ExtIndex;
+    }
+  }
+
+  // The type the input vectors may have changed above.
+  InVT1 = VecIn1.getValueType();
+
+  // If we already have a VecIn2, it should have the same type as VecIn1.
+  // If we don't, get an undef/zero vector of the appropriate type.
+  VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(InVT1);
+  assert(InVT1 == VecIn2.getValueType() && "Unexpected second input type.");
 
-  // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
-  // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
-  // at most two distinct vectors, turn this into a shuffle node.
+  SDValue Shuffle = DAG.getVectorShuffle(InVT1, DL, VecIn1, VecIn2, Mask);
+  if (ShuffleNumElems > NumElems)
+    Shuffle = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuffle, ZeroIdx);
+
+  return Shuffle;
+}
+
+// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
+// operations. If the types of the vectors we're extracting from allow it,
+// turn this into a vector_shuffle node.
+SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
 
   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   if (!isTypeLegal(VT))
@@ -12740,149 +13134,169 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
     return SDValue();
 
-  SDValue VecIn1, VecIn2;
   bool UsesZeroVector = false;
-  for (unsigned i = 0; i != NumInScalars; ++i) {
+  unsigned NumElems = N->getNumOperands();
+
+  // Record, for each element of the newly built vector, which input vector
+  // that element comes from. -1 stands for undef, 0 for the zero vector,
+  // and positive values for the input vectors.
+  // VectorMask maps each element to its vector number, and VecIn maps vector
+  // numbers to their initial SDValues.
+
+  SmallVector<int, 8> VectorMask(NumElems, -1);
+  SmallVector<SDValue, 8> VecIn;
+  VecIn.push_back(SDValue());
+
+  for (unsigned i = 0; i != NumElems; ++i) {
     SDValue Op = N->getOperand(i);
-    // Ignore undef inputs.
-    if (Op.isUndef()) continue;
 
-    // See if we can combine this build_vector into a blend with a zero vector.
-    if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
+    if (Op.isUndef())
+      continue;
+
+    // See if we can use a blend with a zero vector.
+    // TODO: Should we generalize this to a blend with an arbitrary constant
+    // vector?
+    if (isNullConstant(Op) || isNullFPConstant(Op)) {
       UsesZeroVector = true;
+      VectorMask[i] = 0;
       continue;
     }
 
-    // If this input is something other than a EXTRACT_VECTOR_ELT with a
-    // constant index, bail out.
+    // Not an undef or zero. If the input is something other than an
+    // EXTRACT_VECTOR_ELT with a constant index, bail out.
     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
-        !isa<ConstantSDNode>(Op.getOperand(1))) {
-      VecIn1 = VecIn2 = SDValue(nullptr, 0);
-      break;
-    }
+        !isa<ConstantSDNode>(Op.getOperand(1)))
+      return SDValue();
 
-    // We allow up to two distinct input vectors.
     SDValue ExtractedFromVec = Op.getOperand(0);
-    if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
-      continue;
 
-    if (!VecIn1.getNode()) {
-      VecIn1 = ExtractedFromVec;
-    } else if (!VecIn2.getNode() && !UsesZeroVector) {
-      VecIn2 = ExtractedFromVec;
-    } else {
-      // Too many inputs.
-      VecIn1 = VecIn2 = SDValue(nullptr, 0);
-      break;
-    }
-  }
-
-  // If everything is good, we can make a shuffle operation.
-  if (VecIn1.getNode()) {
-    unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
-    SmallVector<int, 8> Mask;
-    for (unsigned i = 0; i != NumInScalars; ++i) {
-      unsigned Opcode = N->getOperand(i).getOpcode();
-      if (Opcode == ISD::UNDEF) {
-        Mask.push_back(-1);
-        continue;
-      }
+    // All inputs must have the same element type as the output.
+    if (VT.getVectorElementType() !=
+        ExtractedFromVec.getValueType().getVectorElementType())
+      return SDValue();
 
-      // Operands can also be zero.
-      if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
-        assert(UsesZeroVector &&
-               (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
-               "Unexpected node found!");
-        Mask.push_back(NumInScalars+i);
-        continue;
-      }
+    // Have we seen this input vector before?
+    // The vectors are expected to be tiny (usually 1 or 2 elements), so using
+    // a map back from SDValues to numbers isn't worth it.
+    unsigned Idx = std::distance(
+        VecIn.begin(), std::find(VecIn.begin(), VecIn.end(), ExtractedFromVec));
+    if (Idx == VecIn.size())
+      VecIn.push_back(ExtractedFromVec);
 
-      // If extracting from the first vector, just use the index directly.
-      SDValue Extract = N->getOperand(i);
-      SDValue ExtVal = Extract.getOperand(1);
-      unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
-      if (Extract.getOperand(0) == VecIn1) {
-        Mask.push_back(ExtIndex);
-        continue;
-      }
+    VectorMask[i] = Idx;
+  }
 
-      // Otherwise, use InIdx + InputVecSize
-      Mask.push_back(InNumElements + ExtIndex);
-    }
+  // If we didn't find at least one input vector, bail out.
+  if (VecIn.size() < 2)
+    return SDValue();
 
-    // Avoid introducing illegal shuffles with zero.
-    if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
+  // TODO: We want to sort the vectors by descending length, so that adjacent
+  // pairs have similar length, and the longer vector is always first in the
+  // pair.
+
+  // TODO: Should this fire if some of the input vectors has illegal type (like
+  // it does now), or should we let legalization run its course first?
+
+  // Shuffle phase:
+  // Take pairs of vectors, and shuffle them so that the result has elements
+  // from these vectors in the correct places.
+  // For example, given:
+  // t10: i32 = extract_vector_elt t1, Constant:i64<0>
+  // t11: i32 = extract_vector_elt t2, Constant:i64<0>
+  // t12: i32 = extract_vector_elt t3, Constant:i64<0>
+  // t13: i32 = extract_vector_elt t1, Constant:i64<1>
+  // t14: v4i32 = BUILD_VECTOR t10, t11, t12, t13
+  // We will generate:
+  // t20: v4i32 = vector_shuffle<0,4,u,1> t1, t2
+  // t21: v4i32 = vector_shuffle<u,u,0,u> t3, undef
+  SmallVector<SDValue, 4> Shuffles;
+  for (unsigned In = 0, Len = (VecIn.size() / 2); In < Len; ++In) {
+    unsigned LeftIdx = 2 * In + 1;
+    SDValue VecLeft = VecIn[LeftIdx];
+    SDValue VecRight =
+        (LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
+
+    if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
+                                                VecRight, LeftIdx))
+      Shuffles.push_back(Shuffle);
+    else
       return SDValue();
+  }
 
-    // We can't generate a shuffle node with mismatched input and output types.
-    // Attempt to transform a single input vector to the correct type.
-    if ((VT != VecIn1.getValueType())) {
-      // If the input vector type has a different base type to the output
-      // vector type, bail out.
-      EVT VTElemType = VT.getVectorElementType();
-      if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
-          (VecIn2.getNode() &&
-           (VecIn2.getValueType().getVectorElementType() != VTElemType)))
-        return SDValue();
+  // If we need the zero vector as an "ingredient" in the blend tree, add it
+  // to the list of shuffles.
+  if (UsesZeroVector)
+    Shuffles.push_back(VT.isInteger() ? DAG.getConstant(0, DL, VT)
+                                      : DAG.getConstantFP(0.0, DL, VT));
 
-      // If the input vector is too small, widen it.
-      // We only support widening of vectors which are half the size of the
-      // output registers. For example XMM->YMM widening on X86 with AVX.
-      EVT VecInT = VecIn1.getValueType();
-      if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
-        // If we only have one small input, widen it by adding undef values.
-        if (!VecIn2.getNode())
-          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
-                               DAG.getUNDEF(VecIn1.getValueType()));
-        else if (VecIn1.getValueType() == VecIn2.getValueType()) {
-          // If we have two small inputs of the same type, try to concat them.
-          VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
-          VecIn2 = SDValue(nullptr, 0);
-        } else
-          return SDValue();
-      } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
-        // If the input vector is too large, try to split it.
-        // We don't support having two input vectors that are too large.
-        // If the zero vector was used, we can not split the vector,
-        // since we'd need 3 inputs.
-        if (UsesZeroVector || VecIn2.getNode())
-          return SDValue();
+  // If we only have one shuffle, we're done.
+  if (Shuffles.size() == 1)
+    return Shuffles[0];
 
-        if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
-          return SDValue();
+  // Update the vector mask to point to the post-shuffle vectors.
+  for (int &Vec : VectorMask)
+    if (Vec == 0)
+      Vec = Shuffles.size() - 1;
+    else
+      Vec = (Vec - 1) / 2;
+
+  // More than one shuffle. Generate a binary tree of blends, e.g. if from
+  // the previous step we got the set of shuffles t10, t11, t12, t13, we will
+  // generate:
+  // t10: v8i32 = vector_shuffle<0,8,u,u,u,u,u,u> t1, t2
+  // t11: v8i32 = vector_shuffle<u,u,0,8,u,u,u,u> t3, t4
+  // t12: v8i32 = vector_shuffle<u,u,u,u,0,8,u,u> t5, t6
+  // t13: v8i32 = vector_shuffle<u,u,u,u,u,u,0,8> t7, t8
+  // t20: v8i32 = vector_shuffle<0,1,10,11,u,u,u,u> t10, t11
+  // t21: v8i32 = vector_shuffle<u,u,u,u,4,5,14,15> t12, t13
+  // t30: v8i32 = vector_shuffle<0,1,2,3,12,13,14,15> t20, t21
+
+  // Make sure the initial size of the shuffle list is even.
+  if (Shuffles.size() % 2)
+    Shuffles.push_back(DAG.getUNDEF(VT));
+
+  for (unsigned CurSize = Shuffles.size(); CurSize > 1; CurSize /= 2) {
+    if (CurSize % 2) {
+      Shuffles[CurSize] = DAG.getUNDEF(VT);
+      CurSize++;
+    }
+    for (unsigned In = 0, Len = CurSize / 2; In < Len; ++In) {
+      int Left = 2 * In;
+      int Right = 2 * In + 1;
+      SmallVector<int, 8> Mask(NumElems, -1);
+      for (unsigned i = 0; i != NumElems; ++i) {
+        if (VectorMask[i] == Left) {
+          Mask[i] = i;
+          VectorMask[i] = In;
+        } else if (VectorMask[i] == Right) {
+          Mask[i] = i + NumElems;
+          VectorMask[i] = In;
+        }
+      }
 
-        // Try to replace VecIn1 with two extract_subvectors
-        // No need to update the masks, they should still be correct.
-        VecIn2 = DAG.getNode(
-            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-            DAG.getConstant(VT.getVectorNumElements(), dl,
-                            TLI.getVectorIdxTy(DAG.getDataLayout())));
-        VecIn1 = DAG.getNode(
-            ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
-            DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
-      } else
-        return SDValue();
+      Shuffles[In] =
+          DAG.getVectorShuffle(VT, DL, Shuffles[Left], Shuffles[Right], Mask);
     }
+  }
 
-    if (UsesZeroVector)
-      VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
-                                DAG.getConstantFP(0.0, dl, VT);
-    else
-      // If VecIn2 is unused then change it to undef.
-      VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
+  return Shuffles[0];
+}
 
-    // Check that we were able to transform all incoming values to the same
-    // type.
-    if (VecIn2.getValueType() != VecIn1.getValueType() ||
-        VecIn1.getValueType() != VT)
-          return SDValue();
+SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
+  EVT VT = N->getValueType(0);
 
-    // Return the new VECTOR_SHUFFLE node.
-    SDValue Ops[2];
-    Ops[0] = VecIn1;
-    Ops[1] = VecIn2;
-    return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
-  }
+  // A vector built entirely of undefs is undef.
+  if (ISD::allOperandsUndef(N))
+    return DAG.getUNDEF(VT);
+
+  if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
+    return V;
+
+  if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
+    return V;
+
+  if (SDValue V = reduceBuildVecToShuffle(N))
+    return V;
 
   return SDValue();
 }
@@ -13071,8 +13485,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
         return SDValue();
 
-      SDLoc dl = SDLoc(N);
-      SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
+      SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), NVT, Scalar);
       return DAG.getBitcast(VT, Res);
     }
   }
@@ -13208,7 +13621,6 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
     V = V.getOperand(0);
 
   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
-    SDLoc dl(N);
     // Handle only simple case where vector being inserted and vector
     // being extracted are of same type, and are half size of larger vectors.
     EVT BigVT = V->getOperand(0).getValueType();
@@ -13228,11 +13640,11 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
       // Into:
       //    indices are equal or bit offsets are equal => V1
       //    otherwise => (extract_subvec V1, ExtIdx)
-      if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
-          ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
+      if (InsIdx->getZExtValue() * SmallVT.getScalarSizeInBits() ==
+          ExtIdx->getZExtValue() * NVT.getScalarSizeInBits())
         return DAG.getBitcast(NVT, V->getOperand(1));
       return DAG.getNode(
-          ISD::EXTRACT_SUBVECTOR, dl, NVT,
+          ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
           N->getOperand(1));
     }
@@ -13391,6 +13803,84 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
 }
 
+// Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
+// BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
+//
+// SHUFFLE(BUILD_VECTOR(), BUILD_VECTOR()) -> BUILD_VECTOR() is always
+// a simplification in some sense, but it isn't appropriate in general: some
+// BUILD_VECTORs are substantially cheaper than others. The general case
+// of a BUILD_VECTOR requires inserting each element individually (or
+// performing the equivalent in a temporary stack variable). A BUILD_VECTOR of
+// all constants is a single constant pool load.  A BUILD_VECTOR where each
+// element is identical is a splat.  A BUILD_VECTOR where most of the operands
+// are undef lowers to a small number of element insertions.
+//
+// To deal with this, we currently use a bunch of mostly arbitrary heuristics.
+// We don't fold shuffles where one side is a non-zero constant, and we don't
+// fold shuffles if the resulting BUILD_VECTOR would have duplicate
+// non-constant operands. This seems to work out reasonably well in practice.
+static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
+                                       SelectionDAG &DAG,
+                                       const TargetLowering &TLI) {
+  EVT VT = SVN->getValueType(0);
+  unsigned NumElts = VT.getVectorNumElements();
+  SDValue N0 = SVN->getOperand(0);
+  SDValue N1 = SVN->getOperand(1);
+
+  if (!N0->hasOneUse() || !N1->hasOneUse())
+    return SDValue();
+  // If only one of N1,N2 is constant, bail out if it is not ALL_ZEROS as
+  // discussed above.
+  if (!N1.isUndef()) {
+    bool N0AnyConst = isAnyConstantBuildVector(N0.getNode());
+    bool N1AnyConst = isAnyConstantBuildVector(N1.getNode());
+    if (N0AnyConst && !N1AnyConst && !ISD::isBuildVectorAllZeros(N0.getNode()))
+      return SDValue();
+    if (!N0AnyConst && N1AnyConst && !ISD::isBuildVectorAllZeros(N1.getNode()))
+      return SDValue();
+  }
+
+  SmallVector<SDValue, 8> Ops;
+  SmallSet<SDValue, 16> DuplicateOps;
+  for (int M : SVN->getMask()) {
+    SDValue Op = DAG.getUNDEF(VT.getScalarType());
+    if (M >= 0) {
+      int Idx = M < (int)NumElts ? M : M - NumElts;
+      SDValue &S = (M < (int)NumElts ? N0 : N1);
+      if (S.getOpcode() == ISD::BUILD_VECTOR) {
+        Op = S.getOperand(Idx);
+      } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+        if (Idx == 0)
+          Op = S.getOperand(0);
+      } else {
+        // Operand can't be combined - bail out.
+        return SDValue();
+      }
+    }
+
+    // Don't duplicate a non-constant BUILD_VECTOR operand; semantically, this is
+    // fine, but it's likely to generate low-quality code if the target can't
+    // reconstruct an appropriate shuffle.
+    if (!Op.isUndef() && !isa<ConstantSDNode>(Op) && !isa<ConstantFPSDNode>(Op))
+      if (!DuplicateOps.insert(Op).second)
+        return SDValue();
+
+    Ops.push_back(Op);
+  }
+  // BUILD_VECTOR requires all inputs to be of the same type, find the
+  // maximum type and extend them all.
+  EVT SVT = VT.getScalarType();
+  if (SVT.isInteger())
+    for (SDValue &Op : Ops)
+      SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
+  if (SVT != VT.getScalarType())
+    for (SDValue &Op : Ops)
+      Op = TLI.isZExtFree(Op.getValueType(), SVT)
+               ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT)
+               : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT);
+  return DAG.getBuildVector(VT, SDLoc(SVN), Ops);
+}
+
 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   EVT VT = N->getValueType(0);
   unsigned NumElts = VT.getVectorNumElements();
@@ -13506,40 +13996,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
 
   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
-  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
-    SmallVector<SDValue, 8> Ops;
-    for (int M : SVN->getMask()) {
-      SDValue Op = DAG.getUNDEF(VT.getScalarType());
-      if (M >= 0) {
-        int Idx = M % NumElts;
-        SDValue &S = (M < (int)NumElts ? N0 : N1);
-        if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
-          Op = S.getOperand(Idx);
-        } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
-          if (Idx == 0)
-            Op = S.getOperand(0);
-        } else {
-          // Operand can't be combined - bail out.
-          break;
-        }
-      }
-      Ops.push_back(Op);
-    }
-    if (Ops.size() == VT.getVectorNumElements()) {
-      // BUILD_VECTOR requires all inputs to be of the same type, find the
-      // maximum type and extend them all.
-      EVT SVT = VT.getScalarType();
-      if (SVT.isInteger())
-        for (SDValue &Op : Ops)
-          SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
-      if (SVT != VT.getScalarType())
-        for (SDValue &Op : Ops)
-          Op = TLI.isZExtFree(Op.getValueType(), SVT)
-                   ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
-                   : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
-      return DAG.getBuildVector(VT, SDLoc(N), Ops);
-    }
-  }
+  if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
+    if (SDValue Res = combineShuffleOfScalars(SVN, DAG, TLI))
+      return Res;
 
   // If this shuffle only has a single input that is a bitcasted shuffle,
   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
@@ -13647,6 +14106,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
 
+    // Don't try to fold splats; they're likely to simplify somehow, or they
+    // might be free.
+    if (OtherSV->isSplat())
+      return SDValue();
+
     // The incoming shuffle must be of the same type as the result of the
     // current shuffle.
     assert(OtherSV->getOperand(0).getValueType() == VT &&
@@ -13773,10 +14237,20 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
 }
 
 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
+  EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
   SDValue N2 = N->getOperand(2);
 
+  // Combine INSERT_SUBVECTORs where we are inserting to the same index.
+  // INSERT_SUBVECTOR( INSERT_SUBVECTOR( Vec, SubOld, Idx ), SubNew, Idx )
+  // --> INSERT_SUBVECTOR( Vec, SubNew, Idx )
+  if (N0.getOpcode() == ISD::INSERT_SUBVECTOR &&
+      N0.getOperand(1).getValueType() == N1.getValueType() &&
+      N0.getOperand(2) == N2)
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0),
+                       N1, N2);
+
   if (N0.getValueType() != N1.getValueType())
     return SDValue();
 
@@ -13785,7 +14259,6 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
       N2.getOpcode() == ISD::Constant) {
     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
-    EVT VT = N->getValueType(0);
 
     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
     // (concat_vectors Z, Y)
@@ -13836,7 +14309,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
-  SDLoc dl(N);
+  SDLoc DL(N);
 
   // Make sure we're not running after operation legalization where it
   // may have custom lowered the vector shuffles.
@@ -13904,8 +14377,8 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
       return SDValue();
 
-    SDValue Zero = DAG.getConstant(0, dl, ClearVT);
-    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
+    SDValue Zero = DAG.getConstant(0, DL, ClearVT);
+    return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, DL,
                                                    DAG.getBitcast(ClearVT, LHS),
                                                    Zero, Indices));
   };
@@ -14119,6 +14592,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
     MachineMemOperand::Flags MMOFlags = LLD->getMemOperand()->getFlags();
     if (!RLD->isInvariant())
       MMOFlags &= ~MachineMemOperand::MOInvariant;
+    if (!RLD->isDereferenceable())
+      MMOFlags &= ~MachineMemOperand::MODereferenceable;
     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
       // FIXME: Discards pointer and AA info.
       Load = DAG.getLoad(TheSelect->getValueType(0), SDLoc(TheSelect),
@@ -14146,6 +14621,73 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   return false;
 }
 
+/// Try to fold an expression of the form (N0 cond N1) ? N2 : N3 to a shift and
+/// bitwise 'and'.
+SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
+                                            SDValue N1, SDValue N2, SDValue N3,
+                                            ISD::CondCode CC) {
+  // If this is a select where the false operand is zero and the compare is a
+  // check of the sign bit, see if we can perform the "gzip trick":
+  // select_cc setlt X, 0, A, 0 -> and (sra X, size(X)-1), A
+  // select_cc setgt X, 0, A, 0 -> and (not (sra X, size(X)-1)), A
+  EVT XType = N0.getValueType();
+  EVT AType = N2.getValueType();
+  if (!isNullConstant(N3) || !XType.bitsGE(AType))
+    return SDValue();
+
+  // If the comparison is testing for a positive value, we have to invert
+  // the sign bit mask, so only do that transform if the target has a bitwise
+  // 'and not' instruction (the invert is free).
+  if (CC == ISD::SETGT && TLI.hasAndNot(N2)) {
+    // (X > -1) ? A : 0
+    // (X >  0) ? X : 0 <-- This is canonical signed max.
+    if (!(isAllOnesConstant(N1) || (isNullConstant(N1) && N0 == N2)))
+      return SDValue();
+  } else if (CC == ISD::SETLT) {
+    // (X <  0) ? A : 0
+    // (X <  1) ? X : 0 <-- This is un-canonicalized signed min.
+    if (!(isNullConstant(N1) || (isOneConstant(N1) && N0 == N2)))
+      return SDValue();
+  } else {
+    return SDValue();
+  }
+
+  // and (sra X, size(X)-1), A -> "and (srl X, C2), A" iff A is a single-bit
+  // constant.
+  EVT ShiftAmtTy = getShiftAmountTy(N0.getValueType());
+  auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
+  if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
+    unsigned ShCt = XType.getSizeInBits() - N2C->getAPIntValue().logBase2() - 1;
+    SDValue ShiftAmt = DAG.getConstant(ShCt, DL, ShiftAmtTy);
+    SDValue Shift = DAG.getNode(ISD::SRL, DL, XType, N0, ShiftAmt);
+    AddToWorklist(Shift.getNode());
+
+    if (XType.bitsGT(AType)) {
+      Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+      AddToWorklist(Shift.getNode());
+    }
+
+    if (CC == ISD::SETGT)
+      Shift = DAG.getNOT(DL, Shift, AType);
+
+    return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+  }
+
+  SDValue ShiftAmt = DAG.getConstant(XType.getSizeInBits() - 1, DL, ShiftAmtTy);
+  SDValue Shift = DAG.getNode(ISD::SRA, DL, XType, N0, ShiftAmt);
+  AddToWorklist(Shift.getNode());
+
+  if (XType.bitsGT(AType)) {
+    Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
+    AddToWorklist(Shift.getNode());
+  }
+
+  if (CC == ISD::SETGT)
+    Shift = DAG.getNOT(DL, Shift, AType);
+
+  return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
+}
+
 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
 /// where 'cond' is the comparison specified by CC.
 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
@@ -14242,48 +14784,8 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
       }
     }
 
-  // Check to see if we can perform the "gzip trick", transforming
-  // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
-  if (isNullConstant(N3) && CC == ISD::SETLT &&
-      (isNullConstant(N1) ||                 // (a < 0) ? b : 0
-       (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
-    EVT XType = N0.getValueType();
-    EVT AType = N2.getValueType();
-    if (XType.bitsGE(AType)) {
-      // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
-      // single-bit constant.
-      if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
-        unsigned ShCtV = N2C->getAPIntValue().logBase2();
-        ShCtV = XType.getSizeInBits() - ShCtV - 1;
-        SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
-                                       getShiftAmountTy(N0.getValueType()));
-        SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
-                                    XType, N0, ShCt);
-        AddToWorklist(Shift.getNode());
-
-        if (XType.bitsGT(AType)) {
-          Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
-          AddToWorklist(Shift.getNode());
-        }
-
-        return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
-      }
-
-      SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
-                                  XType, N0,
-                                  DAG.getConstant(XType.getSizeInBits() - 1,
-                                                  SDLoc(N0),
-                                         getShiftAmountTy(N0.getValueType())));
-      AddToWorklist(Shift.getNode());
-
-      if (XType.bitsGT(AType)) {
-        Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
-        AddToWorklist(Shift.getNode());
-      }
-
-      return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
-    }
-  }
+  if (SDValue V = foldSelectCCToShiftAnd(DL, N0, N1, N2, N3, CC))
+    return V;
 
   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   // where y is has a single bit set.
@@ -14511,30 +15013,51 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   return S;
 }
 
+/// Determines the LogBase2 value for a non-null input value using the
+/// transform: LogBase2(V) = (EltBits - 1) - ctlz(V).
+SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
+  EVT VT = V.getValueType();
+  unsigned EltBits = VT.getScalarSizeInBits();
+  SDValue Ctlz = DAG.getNode(ISD::CTLZ, DL, VT, V);
+  SDValue Base = DAG.getConstant(EltBits - 1, DL, VT);
+  SDValue LogBase2 = DAG.getNode(ISD::SUB, DL, VT, Base, Ctlz);
+  return LogBase2;
+}
+
+/// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
+/// For the reciprocal, we need to find the zero of the function:
+///   F(X) = A X - 1 [which has a zero at X = 1/A]
+///     =>
+///   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
+///     does not require additional intermediate precision]
 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
-  // Expose the DAG combiner to the target combiner implementations.
-  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
+  // TODO: Handle half and/or extended types?
+  EVT VT = Op.getValueType();
+  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+    return SDValue();
+
+  // If estimates are explicitly disabled for this function, we're done.
+  MachineFunction &MF = DAG.getMachineFunction();
+  int Enabled = TLI.getRecipEstimateDivEnabled(VT, MF);
+  if (Enabled == TLI.ReciprocalEstimate::Disabled)
+    return SDValue();
+
+  // Estimates may be explicitly enabled for this type with a custom number of
+  // refinement steps.
+  int Iterations = TLI.getDivRefinementSteps(VT, MF);
+  if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
+    AddToWorklist(Est.getNode());
 
-  unsigned Iterations = 0;
-  if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
     if (Iterations) {
-      // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
-      // For the reciprocal, we need to find the zero of the function:
-      //   F(X) = A X - 1 [which has a zero at X = 1/A]
-      //     =>
-      //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
-      //     does not require additional intermediate precision]
       EVT VT = Op.getValueType();
       SDLoc DL(Op);
       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
 
-      AddToWorklist(Est.getNode());
-
       // Newton iterations: Est = Est + Est (1 - Arg * Est)
-      for (unsigned i = 0; i < Iterations; ++i) {
+      for (int i = 0; i < Iterations; ++i) {
         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
         AddToWorklist(NewEst.getNode());
 
@@ -14656,16 +15179,47 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
   if (Level >= AfterLegalizeDAG)
     return SDValue();
 
-  // Expose the DAG combiner to the target combiner implementations.
-  TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
-  unsigned Iterations = 0;
+  // TODO: Handle half and/or extended types?
+  EVT VT = Op.getValueType();
+  if (VT.getScalarType() != MVT::f32 && VT.getScalarType() != MVT::f64)
+    return SDValue();
+
+  // If estimates are explicitly disabled for this function, we're done.
+  MachineFunction &MF = DAG.getMachineFunction();
+  int Enabled = TLI.getRecipEstimateSqrtEnabled(VT, MF);
+  if (Enabled == TLI.ReciprocalEstimate::Disabled)
+    return SDValue();
+
+  // Estimates may be explicitly enabled for this type with a custom number of
+  // refinement steps.
+  int Iterations = TLI.getSqrtRefinementSteps(VT, MF);
+
   bool UseOneConstNR = false;
-  if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
+  if (SDValue Est =
+      TLI.getSqrtEstimate(Op, DAG, Enabled, Iterations, UseOneConstNR,
+                          Reciprocal)) {
     AddToWorklist(Est.getNode());
+
     if (Iterations) {
       Est = UseOneConstNR
-                ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
-                : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+            ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
+            : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
+
+      if (!Reciprocal) {
+        // Unfortunately, Est is now NaN if the input was exactly 0.0.
+        // Select out this case and force the answer to 0.0.
+        EVT VT = Op.getValueType();
+        SDLoc DL(Op);
+
+        SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+        EVT CCVT = getSetCCResultType(VT);
+        SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
+        AddToWorklist(ZeroCmp.getNode());
+
+        Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
+                          ZeroCmp, FPZero, Est);
+        AddToWorklist(Est.getNode());
+      }
     }
     return Est;
   }
@@ -14678,23 +15232,7 @@ SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
 }
 
 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
-  SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
-  if (!Est)
-    return SDValue();
-
-  // Unfortunately, Est is now NaN if the input was exactly 0.
-  // Select out this case and force the answer to 0.
-  EVT VT = Est.getValueType();
-  SDLoc DL(Op);
-  SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
-  EVT CCVT = getSetCCResultType(VT);
-  SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
-  AddToWorklist(ZeroCmp.getNode());
-
-  Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
-                    Zero, Est);
-  AddToWorklist(Est.getNode());
-  return Est;
+  return buildSqrtEstimateImpl(Op, Flags, false);
 }
 
 /// Return true if base is a frame index, which is known not to alias with
@@ -14771,9 +15309,9 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   // To catch this case, look up the actual index of frame indices to compute
   // the real alias relationship.
   if (isFrameIndex1 && isFrameIndex2) {
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-    Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
-    Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    Offset1 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+    Offset2 += MFI.getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index b10da002fcfe..e2f33bb433ba 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -353,8 +353,8 @@ void FastISel::recomputeInsertPt() {
 
 void FastISel::removeDeadCode(MachineBasicBlock::iterator I,
                               MachineBasicBlock::iterator E) {
-  assert(static_cast<MachineInstr *>(I) && static_cast<MachineInstr *>(E) &&
-         std::distance(I, E) > 0 && "Invalid iterator!");
+  assert(I.isValid() && E.isValid() && std::distance(I, E) > 0 &&
+         "Invalid iterator!");
   while (I != E) {
     MachineInstr *Dead = &*I;
     ++I;
@@ -455,17 +455,6 @@ bool FastISel::selectBinaryOp(const User *I, unsigned ISDOpcode) {
     return true;
   }
 
-  // Check if the second operand is a constant float.
-  if (const auto *CF = dyn_cast<ConstantFP>(I->getOperand(1))) {
-    unsigned ResultReg = fastEmit_rf(VT.getSimpleVT(), VT.getSimpleVT(),
-                                     ISDOpcode, Op0, Op0IsKill, CF);
-    if (ResultReg) {
-      // We successfully emitted code for the given LLVM Instruction.
-      updateValueMap(I, ResultReg);
-      return true;
-    }
-  }
-
   unsigned Op1 = getRegForValue(I->getOperand(1));
   if (!Op1) // Unhandled operand. Halt "fast" selection and bail.
     return false;
@@ -499,7 +488,7 @@ bool FastISel::selectGetElementPtr(const User *I) {
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
        GTI != E; ++GTI) {
     const Value *Idx = GTI.getOperand();
-    if (auto *StTy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *StTy = GTI.getStructTypeOrNull()) {
       uint64_t Field = cast<ConstantInt>(Idx)->getZExtValue();
       if (Field) {
         // N = N + Offset
@@ -581,7 +570,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
       Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp));
       Ops.push_back(MachineOperand::CreateImm(0));
     } else if (auto *AI = dyn_cast<AllocaInst>(Val)) {
-      // Values coming from a stack location also require a sepcial encoding,
+      // Values coming from a stack location also require a special encoding,
       // but that is added later on by the target specific frame index
       // elimination implementation.
       auto SI = FuncInfo.StaticAllocaMap.find(AI);
@@ -666,7 +655,7 @@ bool FastISel::selectStackmap(const CallInst *I) {
       .addImm(0);
 
   // Inform the Frame Information that we have a stackmap in this function.
-  FuncInfo.MF->getFrameInfo()->setHasStackMap();
+  FuncInfo.MF->getFrameInfo().setHasStackMap();
 
   return true;
 }
@@ -707,7 +696,7 @@ bool FastISel::lowerCallOperands(const CallInst *CI, unsigned ArgIdx,
 
 FastISel::CallLoweringInfo &FastISel::CallLoweringInfo::setCallee(
     const DataLayout &DL, MCContext &Ctx, CallingConv::ID CC, Type *ResultTy,
-    const char *Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
+    StringRef Target, ArgListTy &&ArgsList, unsigned FixedArgs) {
   SmallString<32> MangledName;
   Mangler::getNameWithPrefix(MangledName, Target, DL);
   MCSymbol *Sym = Ctx.getOrCreateSymbol(MangledName);
@@ -845,7 +834,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
   CLI.Call->eraseFromParent();
 
   // Inform the Frame Information that we have a patchpoint in this function.
-  FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
 
   if (CLI.NumResultRegs)
     updateValueMap(I, CLI.ResultReg, CLI.NumResultRegs);
@@ -1077,7 +1066,7 @@ bool FastISel::selectCall(const User *I) {
   }
 
   MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
-  ComputeUsesVAFloatArgument(*Call, &MMI);
+  computeUsesVAFloatArgument(*Call, MMI);
 
   // Handle intrinsic function calls.
   if (const auto *II = dyn_cast<IntrinsicInst>(Call))
@@ -1104,6 +1093,8 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
   case Intrinsic::lifetime_end:
   // The donothing intrinsic does, well, nothing.
   case Intrinsic::donothing:
+  // Neither does the assume intrinsic; it's also OK not to codegen its operand.
+  case Intrinsic::assume:
     return true;
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst *DI = cast<DbgDeclareInst>(II);
@@ -1225,6 +1216,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, ResultReg);
     return true;
   }
+  case Intrinsic::invariant_group_barrier:
   case Intrinsic::expect: {
     unsigned ResultReg = getRegForValue(II->getArgOperand(0));
     if (!ResultReg)
@@ -1324,15 +1316,6 @@ bool FastISel::selectBitCast(const User *I) {
   return true;
 }
 
-// Return true if we should copy from swift error to the final vreg as specified
-// by SwiftErrorWorklist.
-static bool shouldCopySwiftErrorsToFinalVRegs(const TargetLowering &TLI,
-                                              FunctionLoweringInfo &FuncInfo) {
-  if (!TLI.supportSwiftError())
-    return false;
-  return FuncInfo.SwiftErrorWorklist.count(FuncInfo.MBB);
-}
-
 // Remove local value instructions starting from the instruction after
 // SavedLastLocalValue to the current function insert point.
 void FastISel::removeDeadLocalValueCode(MachineInstr *SavedLastLocalValue)
@@ -1357,10 +1340,6 @@ bool FastISel::selectInstruction(const Instruction *I) {
   // Just before the terminator instruction, insert instructions to
   // feed PHI nodes in successor blocks.
   if (isa<TerminatorInst>(I)) {
-    // If we need to materialize any vreg from worklist, we bail out of
-    // FastISel.
-    if (shouldCopySwiftErrorsToFinalVRegs(TLI, FuncInfo))
-      return false;
     if (!handlePHINodesInSuccessorBlocks(I->getParent())) {
       // PHI node handling may have generated local value instructions,
       // even though it failed to handle all PHI nodes.
@@ -1444,7 +1423,7 @@ void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
     // fall-through case, which needs no instructions.
   } else {
     // The unconditional branch case.
-    TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr,
+    TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
                      SmallVector<MachineOperand, 0>(), DbgLoc);
   }
   if (FuncInfo.BPI) {
@@ -1679,7 +1658,7 @@ FastISel::FastISel(FunctionLoweringInfo &FuncInfo,
                    const TargetLibraryInfo *LibInfo,
                    bool SkipTargetIndependentISel)
     : FuncInfo(FuncInfo), MF(FuncInfo.MF), MRI(FuncInfo.MF->getRegInfo()),
-      MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
+      MFI(FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()),
       TM(FuncInfo.MF->getTarget()), DL(MF->getDataLayout()),
       TII(*MF->getSubtarget().getInstrInfo()),
       TLI(*MF->getSubtarget().getTargetLowering()),
@@ -1723,18 +1702,6 @@ unsigned FastISel::fastEmit_ri(MVT, MVT, unsigned, unsigned /*Op0*/,
   return 0;
 }
 
-unsigned FastISel::fastEmit_rf(MVT, MVT, unsigned, unsigned /*Op0*/,
-                               bool /*Op0IsKill*/,
-                               const ConstantFP * /*FPImm*/) {
-  return 0;
-}
-
-unsigned FastISel::fastEmit_rri(MVT, MVT, unsigned, unsigned /*Op0*/,
-                                bool /*Op0IsKill*/, unsigned /*Op1*/,
-                                bool /*Op1IsKill*/, uint64_t /*Imm*/) {
-  return 0;
-}
-
 /// This method is a wrapper of fastEmit_ri. It first tries to emit an
 /// instruction with an immediate operand using fastEmit_ri.
 /// If that fails, it materializes the immediate into a register and try
@@ -2181,6 +2148,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
 
   bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
   bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+  bool IsDereferenceable =
+      I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;
   const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
 
   AAMDNodes AAInfo;
@@ -2195,6 +2164,8 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
     Flags |= MachineMemOperand::MOVolatile;
   if (IsNonTemporal)
     Flags |= MachineMemOperand::MONonTemporal;
+  if (IsDereferenceable)
+    Flags |= MachineMemOperand::MODereferenceable;
   if (IsInvariant)
     Flags |= MachineMemOperand::MOInvariant;
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index e669ffc3d02a..377a5237f15a 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -13,7 +13,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -98,7 +97,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                                        Fn->isVarArg(), Outs, Fn->getContext());
 
   // If this personality uses funclets, we need to do a bit more work.
-  DenseMap<const AllocaInst *, int *> CatchObjects;
+  DenseMap<const AllocaInst *, TinyPtrVector<int *>> CatchObjects;
   EHPersonality Personality = classifyEHPersonality(
       Fn->hasPersonalityFn() ? Fn->getPersonalityFn() : nullptr);
   if (isFuncletEHPersonality(Personality)) {
@@ -115,7 +114,8 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
     for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
       for (WinEHHandlerType &H : TBME.HandlerArray) {
         if (const AllocaInst *AI = H.CatchObj.Alloca)
-          CatchObjects.insert({AI, &H.CatchObj.FrameIndex});
+          CatchObjects.insert({AI, {}}).first->second.push_back(
+              &H.CatchObj.FrameIndex);
         else
           H.CatchObj.FrameIndex = INT_MAX;
       }
@@ -125,11 +125,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
   // Initialize the mapping of values to registers.  This is only set up for
   // instruction values that are used outside of the block that defines
   // them.
-  Function::const_iterator BB = Fn->begin(), EB = Fn->end();
-  for (; BB != EB; ++BB)
-    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
-         I != E; ++I) {
-      if (const AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
+  for (const BasicBlock &BB : *Fn) {
+    for (const Instruction &I : BB) {
+      if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
         Type *Ty = AI->getAllocatedType();
         unsigned Align =
           std::max((unsigned)MF->getDataLayout().getPrefTypeAlignment(Ty),
@@ -138,7 +136,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
         // Static allocas can be folded into the initial stack frame
         // adjustment. For targets that don't realign the stack, don't
         // do this if there is an extra alignment requirement.
-        if (AI->isStaticAlloca() && 
+        if (AI->isStaticAlloca() &&
             (TFI->isStackRealignable() || (Align <= StackAlign))) {
           const ConstantInt *CUI = cast<ConstantInt>(AI->getArraySize());
           uint64_t TySize = MF->getDataLayout().getTypeAllocSize(Ty);
@@ -148,18 +146,20 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           int FrameIndex = INT_MAX;
           auto Iter = CatchObjects.find(AI);
           if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
-            FrameIndex = MF->getFrameInfo()->CreateFixedObject(
+            FrameIndex = MF->getFrameInfo().CreateFixedObject(
                 TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
-            MF->getFrameInfo()->setObjectAlignment(FrameIndex, Align);
+            MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
           } else {
             FrameIndex =
-                MF->getFrameInfo()->CreateStackObject(TySize, Align, false, AI);
+                MF->getFrameInfo().CreateStackObject(TySize, Align, false, AI);
           }
 
           StaticAllocaMap[AI] = FrameIndex;
           // Update the catch handler information.
-          if (Iter != CatchObjects.end())
-            *Iter->second = FrameIndex;
+          if (Iter != CatchObjects.end()) {
+            for (int *CatchObjPtr : Iter->second)
+              *CatchObjPtr = FrameIndex;
+          }
         } else {
           // FIXME: Overaligned static allocas should be grouped into
           // a single dynamic allocation instead of using a separate
@@ -167,20 +167,19 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
           if (Align <= StackAlign)
             Align = 0;
           // Inform the Frame Information that we have variable-sized objects.
-          MF->getFrameInfo()->CreateVariableSizedObject(Align ? Align : 1, AI);
+          MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, AI);
         }
       }
 
       // Look for inline asm that clobbers the SP register.
       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
-        ImmutableCallSite CS(&*I);
+        ImmutableCallSite CS(&I);
         if (isa<InlineAsm>(CS.getCalledValue())) {
           unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
           const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
           std::vector<TargetLowering::AsmOperandInfo> Ops =
               TLI->ParseConstraints(Fn->getParent()->getDataLayout(), TRI, CS);
-          for (size_t I = 0, E = Ops.size(); I != E; ++I) {
-            TargetLowering::AsmOperandInfo &Op = Ops[I];
+          for (TargetLowering::AsmOperandInfo &Op : Ops) {
             if (Op.Type == InlineAsm::isClobber) {
               // Clobbers don't have SDValue operands, hence SDValue().
               TLI->ComputeConstraintToUse(Op, SDValue(), DAG);
@@ -188,7 +187,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                   TLI->getRegForInlineAsmConstraint(TRI, Op.ConstraintCode,
                                                     Op.ConstraintVT);
               if (PhysReg.first == SP)
-                MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+                MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
             }
           }
         }
@@ -197,28 +196,28 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       // Look for calls to the @llvm.va_start intrinsic. We can omit some
       // prologue boilerplate for variadic functions that don't examine their
       // arguments.
-      if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
+      if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
         if (II->getIntrinsicID() == Intrinsic::vastart)
-          MF->getFrameInfo()->setHasVAStart(true);
+          MF->getFrameInfo().setHasVAStart(true);
       }
 
       // If we have a musttail call in a variadic function, we need to ensure we
       // forward implicit register parameters.
-      if (const auto *CI = dyn_cast<CallInst>(I)) {
+      if (const auto *CI = dyn_cast<CallInst>(&I)) {
         if (CI->isMustTailCall() && Fn->isVarArg())
-          MF->getFrameInfo()->setHasMustTailInVarArgFunc(true);
+          MF->getFrameInfo().setHasMustTailInVarArgFunc(true);
       }
 
       // Mark values used outside their block as exported, by allocating
       // a virtual register for them.
-      if (isUsedOutsideOfDefiningBlock(&*I))
-        if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(I)))
-          InitializeRegForValue(&*I);
+      if (isUsedOutsideOfDefiningBlock(&I))
+        if (!isa<AllocaInst>(I) || !StaticAllocaMap.count(cast<AllocaInst>(&I)))
+          InitializeRegForValue(&I);
 
       // Collect llvm.dbg.declare information. This is done now instead of
       // during the initial isel pass through the IR so that it is done
       // in a predictable order.
-      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(I)) {
+      if (const DbgDeclareInst *DI = dyn_cast<DbgDeclareInst>(&I)) {
         assert(DI->getVariable() && "Missing variable");
         assert(DI->getDebugLoc() && "Missing location");
         if (MMI.hasDebugInfo()) {
@@ -234,7 +233,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
                 StaticAllocaMap.find(AI);
               if (SI != StaticAllocaMap.end()) { // Check for VLAs.
                 int FI = SI->second;
-                MMI.setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
+                MF->setVariableDbgInfo(DI->getVariable(), DI->getExpression(),
                                        FI, DI->getDebugLoc());
               }
             }
@@ -243,47 +242,52 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
       }
 
       // Decide the preferred extend type for a value.
-      PreferredExtendType[&*I] = getPreferredExtendForValue(&*I);
+      PreferredExtendType[&I] = getPreferredExtendForValue(&I);
     }
+  }
 
   // Create an initial MachineBasicBlock for each LLVM BasicBlock in F.  This
   // also creates the initial PHI MachineInstrs, though none of the input
   // operands are populated.
-  for (BB = Fn->begin(); BB != EB; ++BB) {
+  for (const BasicBlock &BB : *Fn) {
     // Don't create MachineBasicBlocks for imaginary EH pad blocks. These blocks
     // are really data, and no instructions can live here.
-    if (BB->isEHPad()) {
-      const Instruction *I = BB->getFirstNonPHI();
+    if (BB.isEHPad()) {
+      const Instruction *PadInst = BB.getFirstNonPHI();
       // If this is a non-landingpad EH pad, mark this function as using
       // funclets.
       // FIXME: SEH catchpads do not create funclets, so we could avoid setting
       // this in such cases in order to improve frame layout.
-      if (!isa<LandingPadInst>(I)) {
-        MMI.setHasEHFunclets(true);
-        MF->getFrameInfo()->setHasOpaqueSPAdjustment(true);
+      if (!isa<LandingPadInst>(PadInst)) {
+        MF->setHasEHFunclets(true);
+        MF->getFrameInfo().setHasOpaqueSPAdjustment(true);
       }
-      if (isa<CatchSwitchInst>(I)) {
-        assert(&*BB->begin() == I &&
+      if (isa<CatchSwitchInst>(PadInst)) {
+        assert(&*BB.begin() == PadInst &&
                "WinEHPrepare failed to remove PHIs from imaginary BBs");
         continue;
       }
-      if (isa<FuncletPadInst>(I))
-        assert(&*BB->begin() == I && "WinEHPrepare failed to demote PHIs");
+      if (isa<FuncletPadInst>(PadInst))
+        assert(&*BB.begin() == PadInst && "WinEHPrepare failed to demote PHIs");
     }
 
-    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&*BB);
-    MBBMap[&*BB] = MBB;
+    MachineBasicBlock *MBB = mf.CreateMachineBasicBlock(&BB);
+    MBBMap[&BB] = MBB;
     MF->push_back(MBB);
 
     // Transfer the address-taken flag. This is necessary because there could
     // be multiple MachineBasicBlocks corresponding to one BasicBlock, and only
     // the first one should be marked.
-    if (BB->hasAddressTaken())
+    if (BB.hasAddressTaken())
       MBB->setHasAddressTaken();
 
+    // Mark landing pad blocks.
+    if (BB.isEHPad())
+      MBB->setIsEHPad();
+
     // Create Machine PHI nodes for LLVM PHI nodes, lowering them as
     // appropriate.
-    for (BasicBlock::const_iterator I = BB->begin();
+    for (BasicBlock::const_iterator I = BB.begin();
          const PHINode *PN = dyn_cast<PHINode>(I); ++I) {
       if (PN->use_empty()) continue;
 
@@ -297,8 +301,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
 
       SmallVector<EVT, 4> ValueVTs;
       ComputeValueVTs(*TLI, MF->getDataLayout(), PN->getType(), ValueVTs);
-      for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) {
-        EVT VT = ValueVTs[vti];
+      for (EVT VT : ValueVTs) {
         unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT);
         const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
         for (unsigned i = 0; i != NumRegisters; ++i)
@@ -308,16 +311,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
     }
   }
 
-  // Mark landing pad blocks.
-  SmallVector<const LandingPadInst *, 4> LPads;
-  for (BB = Fn->begin(); BB != EB; ++BB) {
-    const Instruction *FNP = BB->getFirstNonPHI();
-    if (BB->isEHPad() && MBBMap.count(&*BB))
-      MBBMap[&*BB]->setIsEHPad();
-    if (const auto *LPI = dyn_cast<LandingPadInst>(FNP))
-      LPads.push_back(LPI);
-  }
-
   if (!isFuncletEHPersonality(Personality))
     return;
 
@@ -541,75 +534,26 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
   return VReg;
 }
 
-/// ComputeUsesVAFloatArgument - Determine if any floating-point values are
-/// being passed to this variadic function, and set the MachineModuleInfo's
-/// usesVAFloatArgument flag if so. This flag is used to emit an undefined
-/// reference to _fltused on Windows, which will link in MSVCRT's
-/// floating-point support.
-void llvm::ComputeUsesVAFloatArgument(const CallInst &I,
-                                      MachineModuleInfo *MMI)
-{
-  FunctionType *FT = cast<FunctionType>(
-    I.getCalledValue()->getType()->getContainedType(0));
-  if (FT->isVarArg() && !MMI->usesVAFloatArgument()) {
-    for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
-      Type* T = I.getArgOperand(i)->getType();
-      for (auto i : post_order(T)) {
-        if (i->isFloatingPointTy()) {
-          MMI->setUsesVAFloatArgument(true);
-          return;
-        }
-      }
-    }
-  }
-}
-
-/// AddLandingPadInfo - Extract the exception handling information from the
-/// landingpad instruction and add them to the specified machine module info.
-void llvm::AddLandingPadInfo(const LandingPadInst &I, MachineModuleInfo &MMI,
-                             MachineBasicBlock *MBB) {
-  if (const auto *PF = dyn_cast<Function>(
-      I.getParent()->getParent()->getPersonalityFn()->stripPointerCasts()))
-    MMI.addPersonality(PF);
-
-  if (I.isCleanup())
-    MMI.addCleanup(MBB);
-
-  // FIXME: New EH - Add the clauses in reverse order. This isn't 100% correct,
-  //        but we need to do it this way because of how the DWARF EH emitter
-  //        processes the clauses.
-  for (unsigned i = I.getNumClauses(); i != 0; --i) {
-    Value *Val = I.getClause(i - 1);
-    if (I.isCatch(i - 1)) {
-      MMI.addCatchTypeInfo(MBB,
-                           dyn_cast<GlobalValue>(Val->stripPointerCasts()));
-    } else {
-      // Add filters in a list.
-      Constant *CVal = cast<Constant>(Val);
-      SmallVector<const GlobalValue*, 4> FilterList;
-      for (User::op_iterator
-             II = CVal->op_begin(), IE = CVal->op_end(); II != IE; ++II)
-        FilterList.push_back(cast<GlobalValue>((*II)->stripPointerCasts()));
-
-      MMI.addFilterTypeInfo(MBB, FilterList);
-    }
-  }
-}
-
-unsigned FunctionLoweringInfo::findSwiftErrorVReg(const MachineBasicBlock *MBB,
-                                                  const Value* Val) const {
-  // Find the index in SwiftErrorVals.
-  SwiftErrorValues::const_iterator I =
-      std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
-  assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
-  return SwiftErrorMap.lookup(MBB)[I - SwiftErrorVals.begin()];
+unsigned
+FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
+                                                const Value *Val) {
+  auto Key = std::make_pair(MBB, Val);
+  auto It = SwiftErrorVRegDefMap.find(Key);
+  // If this is the first use of this swifterror value in this basic block,
+  // create a new virtual register.
+  // After we processed all basic blocks we will satisfy this "upwards exposed
+  // use" by inserting a copy or phi at the beginning of this block.
+  if (It == SwiftErrorVRegDefMap.end()) {
+    auto &DL = MF->getDataLayout();
+    const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+    auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+    SwiftErrorVRegDefMap[Key] = VReg;
+    SwiftErrorVRegUpwardsUse[Key] = VReg;
+    return VReg;
+  } else return It->second;
 }
 
-void FunctionLoweringInfo::setSwiftErrorVReg(const MachineBasicBlock *MBB,
-                                             const Value* Val, unsigned VReg) {
-  // Find the index in SwiftErrorVals.
-  SwiftErrorValues::iterator I =
-      std::find(SwiftErrorVals.begin(), SwiftErrorVals.end(), Val);
-  assert(I != SwiftErrorVals.end() && "Can't find value in SwiftErrorVals");
-  SwiftErrorMap[MBB][I - SwiftErrorVals.begin()] = VReg;
+void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
+    const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
+  SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
 }
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index c8af73a3b441..3b91e58879b4 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -330,16 +330,24 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
   // shrink VReg's register class within reason.  For example, if VReg == GR32
   // and II requires a GR32_NOSP, just constrain VReg to GR32_NOSP.
   if (II) {
-    const TargetRegisterClass *DstRC = nullptr;
+    const TargetRegisterClass *OpRC = nullptr;
     if (IIOpNum < II->getNumOperands())
-      DstRC = TRI->getAllocatableClass(TII->getRegClass(*II,IIOpNum,TRI,*MF));
-    assert((!DstRC || TargetRegisterInfo::isVirtualRegister(VReg)) &&
-           "Expected VReg");
-    if (DstRC && !MRI->constrainRegClass(VReg, DstRC, MinRCSize)) {
-      unsigned NewVReg = MRI->createVirtualRegister(DstRC);
-      BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
-              TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
-      VReg = NewVReg;
+      OpRC = TII->getRegClass(*II, IIOpNum, TRI, *MF);
+
+    if (OpRC) {
+      const TargetRegisterClass *ConstrainedRC
+        = MRI->constrainRegClass(VReg, OpRC, MinRCSize);
+      if (!ConstrainedRC) {
+        OpRC = TRI->getAllocatableClass(OpRC);
+        assert(OpRC && "Constraints cannot be fulfilled for allocation");
+        unsigned NewVReg = MRI->createVirtualRegister(OpRC);
+        BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
+                TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
+        VReg = NewVReg;
+      } else {
+        assert(ConstrainedRC->isAllocatable() &&
+           "Constraining an allocatable VReg produced an unallocatable class?");
+      }
     }
   }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 18ad9103d459..3485e35e6f5d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -259,19 +259,25 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) {
                            (VT == MVT::f64) ? MVT::i64 : MVT::i32);
   }
 
+  APFloat APF = CFP->getValueAPF();
   EVT OrigVT = VT;
   EVT SVT = VT;
-  while (SVT != MVT::f32 && SVT != MVT::f16) {
-    SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
-    if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) &&
-        // Only do this if the target has a native EXTLOAD instruction from
-        // smaller type.
-        TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
-        TLI.ShouldShrinkFPConstant(OrigVT)) {
-      Type *SType = SVT.getTypeForEVT(*DAG.getContext());
-      LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
-      VT = SVT;
-      Extend = true;
+
+  // We don't want to shrink SNaNs. Converting the SNaN back to its real type
+  // can cause it to be changed into a QNaN on some platforms (e.g. on SystemZ).
+  if (!APF.isSignaling()) {
+    while (SVT != MVT::f32 && SVT != MVT::f16) {
+      SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1);
+      if (ConstantFPSDNode::isValueValidForType(SVT, APF) &&
+          // Only do this if the target has a native EXTLOAD instruction from
+          // smaller type.
+          TLI.isLoadExtLegal(ISD::EXTLOAD, OrigVT, SVT) &&
+          TLI.ShouldShrinkFPConstant(OrigVT)) {
+        Type *SType = SVT.getTypeForEVT(*DAG.getContext());
+        LLVMC = cast<ConstantFP>(ConstantExpr::getFPTrunc(LLVMC, SType));
+        VT = SVT;
+        Extend = true;
+      }
     }
   }
 
@@ -795,7 +801,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
     default: llvm_unreachable("This action is not supported yet!");
     case TargetLowering::Custom:
       isCustom = true;
-      // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     case TargetLowering::Legal: {
       Value = SDValue(Node, 0);
       Chain = SDValue(Node, 1);
@@ -1013,6 +1019,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::ADJUST_TRAMPOLINE:
   case ISD::FRAMEADDR:
   case ISD::RETURNADDR:
+  case ISD::ADDROFRETURNADDR:
     // These operations lie about being legal: when they claim to be legal,
     // they should actually be custom-lowered.
     Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -1061,35 +1068,41 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
     case ISD::SRL:
     case ISD::SRA:
     case ISD::ROTL:
-    case ISD::ROTR:
+    case ISD::ROTR: {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Node->getOperand(1).getValueType().isVector()) {
-        SDValue SAO =
-          DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
-                                    Node->getOperand(1));
-        HandleSDNode Handle(SAO);
-        LegalizeOp(SAO.getNode());
-        NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
-                                         Handle.getValue());
+      SDValue Op0 = Node->getOperand(0);
+      SDValue Op1 = Node->getOperand(1);
+      if (!Op1.getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op1);
+        // The getShiftAmountOperand() may create a new operand node or
+        // return the existing one. If new operand is created we need
+        // to update the parent node.
+        // Do not try to legalize SAO here! It will be automatically legalized
+        // in the next round.
+        if (SAO != Op1)
+          NewNode = DAG.UpdateNodeOperands(Node, Op0, SAO);
       }
-      break;
+    }
+    break;
     case ISD::SRL_PARTS:
     case ISD::SRA_PARTS:
-    case ISD::SHL_PARTS:
+    case ISD::SHL_PARTS: {
       // Legalizing shifts/rotates requires adjusting the shift amount
       // to the appropriate width.
-      if (!Node->getOperand(2).getValueType().isVector()) {
-        SDValue SAO =
-          DAG.getShiftAmountOperand(Node->getOperand(0).getValueType(),
-                                    Node->getOperand(2));
-        HandleSDNode Handle(SAO);
-        LegalizeOp(SAO.getNode());
-        NewNode = DAG.UpdateNodeOperands(Node, Node->getOperand(0),
-                                         Node->getOperand(1),
-                                         Handle.getValue());
+      SDValue Op0 = Node->getOperand(0);
+      SDValue Op1 = Node->getOperand(1);
+      SDValue Op2 = Node->getOperand(2);
+      if (!Op2.getValueType().isVector()) {
+        SDValue SAO = DAG.getShiftAmountOperand(Op0.getValueType(), Op2);
+        // The getShiftAmountOperand() may create a new operand node or
+        // return the existing one. If new operand is created we need
+        // to update the parent node.
+        if (SAO != Op2)
+          NewNode = DAG.UpdateNodeOperands(Node, Op0, Op1, SAO);
       }
-      break;
+    }
+    break;
     }
 
     if (NewNode != Node) {
@@ -1118,12 +1131,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
         ReplaceNode(Node, ResultVals.data());
         return;
       }
+      LLVM_FALLTHROUGH;
     }
-      // FALL THROUGH
     case TargetLowering::Expand:
       if (ExpandNode(Node))
         return;
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     case TargetLowering::LibCall:
       ConvertNodeToLibcall(Node);
       return;
@@ -1204,8 +1217,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
   }
 
   // Add the offset to the index.
-  unsigned EltSize =
-      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+  unsigned EltSize = Vec.getScalarValueSizeInBits() / 8;
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
                     DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
 
@@ -1256,8 +1268,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
   // Then store the inserted part.
 
   // Add the offset to the index.
-  unsigned EltSize =
-      Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+  unsigned EltSize = Vec.getScalarValueSizeInBits() / 8;
 
   Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
                     DAG.getConstant(EltSize, SDLoc(Vec), Idx.getValueType()));
@@ -1593,6 +1604,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
           break;
         }
         // Fallthrough if we are unsigned integer.
+        LLVM_FALLTHROUGH;
     case ISD::SETLE:
     case ISD::SETGT:
     case ISD::SETGE:
@@ -1650,7 +1662,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI);
 
-  unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
+  unsigned SrcSize = SrcOp.getValueSizeInBits();
   unsigned SlotSize = SlotVT.getSizeInBits();
   unsigned DestSize = DestVT.getSizeInBits();
   Type *DestType = DestVT.getTypeForEVT(*DAG.getContext());
@@ -2521,13 +2533,56 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT,
   return DAG.getNode(ISD::TRUNCATE, dl, DestVT, Operation);
 }
 
-/// Open code the operations for BITREVERSE.
+/// Legalize a BITREVERSE scalar/vector operation as a series of mask + shifts.
 SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
   EVT VT = Op.getValueType();
   EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   unsigned Sz = VT.getScalarSizeInBits();
 
-  SDValue Tmp, Tmp2;
+  SDValue Tmp, Tmp2, Tmp3;
+
+  // If we can, perform BSWAP first and then the mask+swap the i4, then i2
+  // and finally the i1 pairs.
+  // TODO: We can easily support i4/i2 legal types if any target ever does.
+  if (Sz >= 8 && isPowerOf2_32(Sz)) {
+    // Create the masks - repeating the pattern every byte.
+    APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
+    APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
+    for (unsigned J = 0; J != Sz; J += 8) {
+      MaskHi4 = MaskHi4.Or(APInt(Sz, 0xF0ull << J));
+      MaskLo4 = MaskLo4.Or(APInt(Sz, 0x0Full << J));
+      MaskHi2 = MaskHi2.Or(APInt(Sz, 0xCCull << J));
+      MaskLo2 = MaskLo2.Or(APInt(Sz, 0x33ull << J));
+      MaskHi1 = MaskHi1.Or(APInt(Sz, 0xAAull << J));
+      MaskLo1 = MaskLo1.Or(APInt(Sz, 0x55ull << J));
+    }
+
+    // BSWAP if the type is wider than a single byte.
+    Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
+
+    // swap i4: ((V & 0xF0) >> 4) | ((V & 0x0F) << 4)
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi4, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo4, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(4, dl, VT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(4, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+    // swap i2: ((V & 0xCC) >> 2) | ((V & 0x33) << 2)
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi2, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo2, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(2, dl, VT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(2, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+
+    // swap i1: ((V & 0xAA) >> 1) | ((V & 0x55) << 1)
+    Tmp2 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskHi1, dl, VT));
+    Tmp3 = DAG.getNode(ISD::AND, dl, VT, Tmp, DAG.getConstant(MaskLo1, dl, VT));
+    Tmp2 = DAG.getNode(ISD::SRL, dl, VT, Tmp2, DAG.getConstant(1, dl, VT));
+    Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Tmp3, DAG.getConstant(1, dl, VT));
+    Tmp = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
+    return Tmp;
+  }
+
   Tmp = DAG.getConstant(0, dl, VT);
   for (unsigned I = 0, J = Sz-1; I < Sz; ++I, --J) {
     if (I < J)
@@ -2551,7 +2606,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
   EVT VT = Op.getValueType();
   EVT SHVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8;
-  switch (VT.getSimpleVT().SimpleTy) {
+  switch (VT.getSimpleVT().getScalarType().SimpleTy) {
   default: llvm_unreachable("Unhandled Expand type in BSWAP!");
   case MVT::i16:
     Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
@@ -2780,10 +2835,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Swap = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
         Node->getOperand(0), Node->getOperand(1), Zero, Zero,
-        cast<AtomicSDNode>(Node)->getMemOperand(),
-        cast<AtomicSDNode>(Node)->getOrdering(),
-        cast<AtomicSDNode>(Node)->getOrdering(),
-        cast<AtomicSDNode>(Node)->getSynchScope());
+        cast<AtomicSDNode>(Node)->getMemOperand());
     Results.push_back(Swap.getValue(0));
     Results.push_back(Swap.getValue(1));
     break;
@@ -2794,9 +2846,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
                                  cast<AtomicSDNode>(Node)->getMemoryVT(),
                                  Node->getOperand(0),
                                  Node->getOperand(1), Node->getOperand(2),
-                                 cast<AtomicSDNode>(Node)->getMemOperand(),
-                                 cast<AtomicSDNode>(Node)->getOrdering(),
-                                 cast<AtomicSDNode>(Node)->getSynchScope());
+                                 cast<AtomicSDNode>(Node)->getMemOperand());
     Results.push_back(Swap.getValue(1));
     break;
   }
@@ -2808,10 +2858,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Res = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs,
         Node->getOperand(0), Node->getOperand(1), Node->getOperand(2),
-        Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(),
-        cast<AtomicSDNode>(Node)->getSuccessOrdering(),
-        cast<AtomicSDNode>(Node)->getFailureOrdering(),
-        cast<AtomicSDNode>(Node)->getSynchScope());
+        Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand());
 
     SDValue ExtRes = Res;
     SDValue LHS = Res;
@@ -2879,15 +2926,32 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(Tmp1);
     break;
   case ISD::SIGN_EXTEND_INREG: {
-    // NOTE: we could fall back on load/store here too for targets without
-    // SAR.  However, it is doubtful that any exist.
     EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
     EVT VT = Node->getValueType(0);
+
+    // An in-register sign-extend of a boolean is a negation:
+    // 'true' (1) sign-extended is -1.
+    // 'false' (0) sign-extended is 0.
+    // However, we must mask the high bits of the source operand because the
+    // SIGN_EXTEND_INREG does not guarantee that the high bits are already zero.
+
+    // TODO: Do this for vectors too?
+    if (ExtraVT.getSizeInBits() == 1) {
+      SDValue One = DAG.getConstant(1, dl, VT);
+      SDValue And = DAG.getNode(ISD::AND, dl, VT, Node->getOperand(0), One);
+      SDValue Zero = DAG.getConstant(0, dl, VT);
+      SDValue Neg = DAG.getNode(ISD::SUB, dl, VT, Zero, And);
+      Results.push_back(Neg);
+      break;
+    }
+
+    // NOTE: we could fall back on load/store here too for targets without
+    // SRA.  However, it is doubtful that any exist.
     EVT ShiftAmountTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
     if (VT.isVector())
       ShiftAmountTy = VT;
-    unsigned BitsDiff = VT.getScalarType().getSizeInBits() -
-                        ExtraVT.getScalarType().getSizeInBits();
+    unsigned BitsDiff = VT.getScalarSizeInBits() -
+                        ExtraVT.getScalarSizeInBits();
     SDValue ShiftCst = DAG.getConstant(BitsDiff, dl, ShiftAmountTy);
     Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
                        Node->getOperand(0), ShiftCst);
@@ -3248,17 +3312,49 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
   }
   case ISD::MULHU:
   case ISD::MULHS: {
-    unsigned ExpandOpcode = Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI :
-                                                              ISD::SMUL_LOHI;
+    unsigned ExpandOpcode =
+        Node->getOpcode() == ISD::MULHU ? ISD::UMUL_LOHI : ISD::SMUL_LOHI;
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
-    assert(TLI.isOperationLegalOrCustom(ExpandOpcode, VT) &&
-           "If this wasn't legal, it shouldn't have been created!");
+
     Tmp1 = DAG.getNode(ExpandOpcode, dl, VTs, Node->getOperand(0),
                        Node->getOperand(1));
     Results.push_back(Tmp1.getValue(1));
     break;
   }
+  case ISD::UMUL_LOHI:
+  case ISD::SMUL_LOHI: {
+    SDValue LHS = Node->getOperand(0);
+    SDValue RHS = Node->getOperand(1);
+    MVT VT = LHS.getSimpleValueType();
+    unsigned MULHOpcode =
+        Node->getOpcode() == ISD::UMUL_LOHI ? ISD::MULHU : ISD::MULHS;
+
+    if (TLI.isOperationLegalOrCustom(MULHOpcode, VT)) {
+      Results.push_back(DAG.getNode(ISD::MUL, dl, VT, LHS, RHS));
+      Results.push_back(DAG.getNode(MULHOpcode, dl, VT, LHS, RHS));
+      break;
+    }
+
+    SmallVector<SDValue, 4> Halves;
+    EVT HalfType = EVT(VT).getHalfSizedIntegerVT(*DAG.getContext());
+    assert(TLI.isTypeLegal(HalfType));
+    if (TLI.expandMUL_LOHI(Node->getOpcode(), VT, Node, LHS, RHS, Halves,
+                           HalfType, DAG,
+                           TargetLowering::MulExpansionKind::Always)) {
+      for (unsigned i = 0; i < 2; ++i) {
+        SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Halves[2 * i]);
+        SDValue Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Halves[2 * i + 1]);
+        SDValue Shift = DAG.getConstant(
+            HalfType.getScalarSizeInBits(), dl,
+            TLI.getShiftAmountTy(HalfType, DAG.getDataLayout()));
+        Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+        Results.push_back(DAG.getNode(ISD::OR, dl, VT, Lo, Hi));
+      }
+      break;
+    }
+    break;
+  }
   case ISD::MUL: {
     EVT VT = Node->getValueType(0);
     SDVTList VTs = DAG.getVTList(VT, VT);
@@ -3293,7 +3389,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         TLI.isOperationLegalOrCustom(ISD::ANY_EXTEND, VT) &&
         TLI.isOperationLegalOrCustom(ISD::SHL, VT) &&
         TLI.isOperationLegalOrCustom(ISD::OR, VT) &&
-        TLI.expandMUL(Node, Lo, Hi, HalfType, DAG)) {
+        TLI.expandMUL(Node, Lo, Hi, HalfType, DAG,
+                      TargetLowering::MulExpansionKind::OnlyLegalOrCustom)) {
       Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
       Hi = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Hi);
       SDValue Shift =
@@ -3416,8 +3513,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       // pre-lowered to the correct types. This all depends upon WideVT not
       // being a legal type for the architecture and thus has to be split to
       // two arguments.
-      SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
-      SDValue Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      SDValue Ret;
+      if(DAG.getDataLayout().isLittleEndian()) {
+        // Halves of WideVT are packed into registers in different order
+        // depending on platform endianness. This is usually handled by
+        // the C calling convention, but we can't defer to it in
+        // the legalizer.
+        SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+        Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      } else {
+        SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+        Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
+      }
       BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
                                DAG.getIntPtrConstant(0, dl));
       TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret,
@@ -3441,6 +3548,15 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
       TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
                              DAG.getConstant(0, dl, VT), ISD::SETNE);
     }
+
+    // Truncate the result if SetCC returns a larger type than needed.
+    EVT RType = Node->getValueType(1);
+    if (RType.getSizeInBits() < TopHalf.getValueSizeInBits())
+      TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf);
+
+    assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() &&
+           "Unexpected result type for S/UMULO legalization");
+
     Results.push_back(BottomHalf);
     Results.push_back(TopHalf);
     break;
@@ -3476,9 +3592,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     SDValue Table = Node->getOperand(1);
     SDValue Index = Node->getOperand(2);
 
-    EVT PTy = TLI.getPointerTy(DAG.getDataLayout());
-
     const DataLayout &TD = DAG.getDataLayout();
+    EVT PTy = TLI.getPointerTy(TD);
+
     unsigned EntrySize =
       DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
 
@@ -3492,7 +3608,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
         ISD::SEXTLOAD, dl, PTy, Chain, Addr,
         MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
     Addr = LD;
-    if (TM.isPositionIndependent()) {
+    if (TLI.isJumpTableRelative()) {
       // For PIC, the sequence is:
       // BRIND(load(Jumptable + index) + RelocBase)
       // RelocBase can be JumpTable, GOT or some sort of global base.
@@ -4019,10 +4135,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     }
     Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
     break;
+  case ISD::BITREVERSE:
   case ISD::BSWAP: {
     unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits();
     Tmp1 = DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, Node->getOperand(0));
-    Tmp1 = DAG.getNode(ISD::BSWAP, dl, NVT, Tmp1);
+    Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1);
     Tmp1 = DAG.getNode(
         ISD::SRL, dl, NVT, Tmp1,
         DAG.getConstant(DiffBits, dl,
@@ -4073,6 +4190,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     ReplacedNode(Node);
     break;
   }
+  case ISD::SDIV:
+  case ISD::SREM:
+  case ISD::UDIV:
+  case ISD::UREM:
   case ISD::AND:
   case ISD::OR:
   case ISD::XOR: {
@@ -4082,7 +4203,20 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
       TruncOp = ISD::BITCAST;
     } else {
       assert(OVT.isInteger() && "Cannot promote logic operation");
-      ExtOp   = ISD::ANY_EXTEND;
+
+      switch (Node->getOpcode()) {
+      default:
+        ExtOp = ISD::ANY_EXTEND;
+        break;
+      case ISD::SDIV:
+      case ISD::SREM:
+        ExtOp = ISD::SIGN_EXTEND;
+        break;
+      case ISD::UDIV:
+      case ISD::UREM:
+        ExtOp = ISD::ZERO_EXTEND;
+        break;
+      }
       TruncOp = ISD::TRUNCATE;
     }
     // Promote each of the values to the new type.
@@ -4093,6 +4227,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     Results.push_back(DAG.getNode(TruncOp, dl, OVT, Tmp1));
     break;
   }
+  case ISD::UMUL_LOHI:
+  case ISD::SMUL_LOHI: {
+    // Promote to a multiply in a wider integer type.
+    unsigned ExtOp = Node->getOpcode() == ISD::UMUL_LOHI ? ISD::ZERO_EXTEND
+                                                         : ISD::SIGN_EXTEND;
+    Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
+    Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
+    Tmp1 = DAG.getNode(ISD::MUL, dl, NVT, Tmp1, Tmp2);
+
+    auto &DL = DAG.getDataLayout();
+    unsigned OriginalSize = OVT.getScalarSizeInBits();
+    Tmp2 = DAG.getNode(
+        ISD::SRL, dl, NVT, Tmp1,
+        DAG.getConstant(OriginalSize, dl, TLI.getScalarShiftAmountTy(DL, NVT)));
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp1));
+    Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
+    break;
+  }
   case ISD::SELECT: {
     unsigned ExtOp, TruncOp;
     if (Node->getValueType(0).isVector() ||
@@ -4351,7 +4503,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
     MVT EltVT = OVT.getVectorElementType();
     MVT NewEltVT = NVT.getVectorElementType();
 
-    // Handle bitcasts to different vector type with the smae total bit size.
+    // Handle bitcasts to different vector type with the same total bit size.
     //
     // e.g. v2i64 = scalar_to_vector x:i64
     //   =>
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 31ebf7bbec13..72b56d84d945 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -632,7 +632,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
   SDLoc dl(N);
 
   auto MMOFlags =
-      L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+      L->getMemOperand()->getFlags() &
+      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   SDValue NewL;
   if (L->getExtensionType() == ISD::NON_EXTLOAD) {
     NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl,
@@ -1465,7 +1466,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
 
   // TODO: Are there fast-math-flags to propagate to this FADD?
   Lo = DAG.getNode(ISD::FADD, dl, VT, Hi,
-                   DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble,
+                   DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble(),
                                              APInt(128, Parts)),
                                      dl, MVT::ppcf128));
   Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, dl, SrcVT),
@@ -1630,7 +1631,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
     assert(N->getOperand(0).getValueType() == MVT::ppcf128 &&
            "Logic only correct for ppcf128!");
     const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
-    APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31));
+    APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
     SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
     //  X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
     // FIXME: generated code sucks.
@@ -2085,7 +2086,8 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_LOAD(SDNode *N) {
   // Load the value as an integer value with the same number of bits.
   EVT IVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   auto MMOFlags =
-      L->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+      L->getMemOperand()->getFlags() &
+      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   SDValue newL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), IVT,
                              SDLoc(N), L->getChain(), L->getBasePtr(),
                              L->getOffset(), L->getPointerInfo(), IVT,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 9a18943291c8..1a92d4a8b595 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -102,6 +102,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::CONCAT_VECTORS:
                          Res = PromoteIntRes_CONCAT_VECTORS(N); break;
 
+  case ISD::ANY_EXTEND_VECTOR_INREG:
+  case ISD::SIGN_EXTEND_VECTOR_INREG:
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
+                         Res = PromoteIntRes_EXTEND_VECTOR_INREG(N); break;
+
   case ISD::SIGN_EXTEND:
   case ISD::ZERO_EXTEND:
   case ISD::ANY_EXTEND:  Res = PromoteIntRes_INT_EXTEND(N); break;
@@ -183,8 +188,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) {
   SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
                               N->getMemoryVT(), ResVT,
                               N->getChain(), N->getBasePtr(),
-                              N->getMemOperand(), N->getOrdering(),
-                              N->getSynchScope());
+                              N->getMemOperand());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -196,8 +200,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
   SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N),
                               N->getMemoryVT(),
                               N->getChain(), N->getBasePtr(),
-                              Op2, N->getMemOperand(), N->getOrdering(),
-                              N->getSynchScope());
+                              Op2, N->getMemOperand());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -220,8 +223,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
     SDValue Res = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs,
         N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3),
-        N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(),
-        N->getSynchScope());
+        N->getMemOperand());
     ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
     ReplaceValueWith(SDValue(N, 2), Res.getValue(2));
     return Res.getValue(1);
@@ -233,8 +235,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N,
       DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other);
   SDValue Res = DAG.getAtomicCmpSwap(
       N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(),
-      N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(),
-      N->getFailureOrdering(), N->getSynchScope());
+      N->getBasePtr(), Op2, Op3, N->getMemOperand());
   // Update the use to N with the newly created Res.
   for (unsigned i = 1, NumResults = N->getNumValues(); i < NumResults; ++i)
     ReplaceValueWith(SDValue(N, i), Res.getValue(i));
@@ -427,7 +428,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
   // Assert that the converted value fits in the original type.  If it doesn't
   // (eg: because the value being converted is too big), then the result of the
   // original operation was undefined anyway, so the assert is still correct.
-  return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+  return DAG.getNode(NewOpc == ISD::FP_TO_UINT ?
                      ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
                      DAG.getValueType(N->getValueType(0).getScalarType()));
 }
@@ -946,14 +947,16 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS,
     SDValue OpL = GetPromotedInteger(NewLHS);
     SDValue OpR = GetPromotedInteger(NewRHS);
 
-    // We would prefer to promote the comparison operand with sign extension,
-    // if we find the operand is actually to truncate an AssertSext. With this
-    // optimization, we can avoid inserting real truncate instruction, which
-    // is redudant eventually.
-    if (OpL->getOpcode() == ISD::AssertSext &&
-        cast<VTSDNode>(OpL->getOperand(1))->getVT() == NewLHS.getValueType() &&
-        OpR->getOpcode() == ISD::AssertSext &&
-        cast<VTSDNode>(OpR->getOperand(1))->getVT() == NewRHS.getValueType()) {
+    // We would prefer to promote the comparison operand with sign extension.
+    // If the width of OpL/OpR excluding the duplicated sign bits is no greater
+    // than the width of NewLHS/NewRH, we can avoid inserting real truncate
+    // instruction, which is redudant eventually.
+    unsigned OpLEffectiveBits =
+        OpL.getValueSizeInBits() - DAG.ComputeNumSignBits(OpL) + 1;
+    unsigned OpREffectiveBits =
+        OpR.getValueSizeInBits() - DAG.ComputeNumSignBits(OpR) + 1;
+    if (OpLEffectiveBits <= NewLHS.getValueSizeInBits() &&
+        OpREffectiveBits <= NewRHS.getValueSizeInBits()) {
       NewLHS = OpL;
       NewRHS = OpR;
     } else {
@@ -990,8 +993,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
 SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) {
   SDValue Op2 = GetPromotedInteger(N->getOperand(2));
   return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(),
-                       N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(),
-                       N->getOrdering(), N->getSynchScope());
+                       N->getChain(), N->getBasePtr(), Op2, N->getMemOperand());
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
@@ -1051,8 +1053,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) {
   // Promote the inserted value.  The type does not need to match the
   // vector element type.  Check that any extra bits introduced will be
   // truncated away.
-  assert(N->getOperand(0).getValueType().getSizeInBits() >=
-         N->getValueType(0).getVectorElementType().getSizeInBits() &&
+  assert(N->getOperand(0).getValueSizeInBits() >=
+         N->getValueType(0).getScalarSizeInBits() &&
          "Type of inserted value narrower than vector element type!");
 
   SmallVector<SDValue, 16> NewOps;
@@ -1081,8 +1083,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N,
     // have to match the vector element type.
 
     // Check that any extra bits introduced will be truncated away.
-    assert(N->getOperand(1).getValueType().getSizeInBits() >=
-           N->getValueType(0).getVectorElementType().getSizeInBits() &&
+    assert(N->getOperand(1).getValueSizeInBits() >=
+           N->getValueType(0).getScalarSizeInBits() &&
            "Type of inserted value narrower than vector element type!");
     return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0),
                                   GetPromotedInteger(N->getOperand(1)),
@@ -1210,7 +1212,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSTORE(MaskedStoreSDNode *N,
 
   return DAG.getMaskedStore(N->getChain(), dl, DataOp, N->getBasePtr(), Mask,
                             N->getMemoryVT(), N->getMemOperand(),
-                            TruncateStore);
+                            TruncateStore, N->isCompressingStore());
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MLOAD(MaskedLoadSDNode *N,
@@ -1233,7 +1235,15 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
     NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
   } else
     NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
-  return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+
+  SDValue Res = SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+  // updated in place.
+  if (Res.getNode() == N)
+    return Res;
+
+  ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+  return SDValue();
 }
 
 SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
@@ -1314,6 +1324,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::CTPOP:       ExpandIntRes_CTPOP(N, Lo, Hi); break;
   case ISD::CTTZ_ZERO_UNDEF:
   case ISD::CTTZ:        ExpandIntRes_CTTZ(N, Lo, Hi); break;
+  case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
   case ISD::FP_TO_SINT:  ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
   case ISD::FP_TO_UINT:  ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
   case ISD::LOAD:        ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
@@ -1352,8 +1363,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
     SDValue Tmp = DAG.getAtomicCmpSwap(
         ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs,
         N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3),
-        AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(),
-        AN->getSynchScope());
+        AN->getMemOperand());
 
     // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine
     // success simply by comparing the loaded value against the ingoing
@@ -1508,8 +1518,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Amt = N->getOperand(1);
   EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
   EVT ShTy = Amt.getValueType();
-  unsigned ShBits = ShTy.getScalarType().getSizeInBits();
-  unsigned NVTBits = NVT.getScalarType().getSizeInBits();
+  unsigned ShBits = ShTy.getScalarSizeInBits();
+  unsigned NVTBits = NVT.getScalarSizeInBits();
   assert(isPowerOf2_32(NVTBits) &&
          "Expanded integer type size not a power of two!");
   SDLoc dl(N);
@@ -1774,7 +1784,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
     switch (BoolType) {
     case TargetLoweringBase::UndefinedBooleanContent:
       OVF = DAG.getNode(ISD::AND, dl, NVT, DAG.getConstant(1, dl, NVT), OVF);
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case TargetLoweringBase::ZeroOrOneBooleanContent:
       Hi = DAG.getNode(N->getOpcode(), dl, NVT, Hi, OVF);
       break;
@@ -2002,6 +2012,19 @@ void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N,
   Hi = DAG.getConstant(0, dl, NVT);
 }
 
+void DAGTypeLegalizer::ExpandIntRes_FLT_ROUNDS(SDNode *N, SDValue &Lo,
+                                               SDValue &Hi) {
+  SDLoc dl(N);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+  unsigned NBitWidth = NVT.getSizeInBits();
+
+  EVT ShiftAmtTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
+  Lo = DAG.getNode(ISD::FLT_ROUNDS_, dl, NVT);
+  // The high part is the sign of Lo, as -1 is a valid value for FLT_ROUNDS
+  Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
+                   DAG.getConstant(NBitWidth - 1, dl, ShiftAmtTy));
+}
+
 void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
                                                SDValue &Hi) {
   SDLoc dl(N);
@@ -2065,7 +2088,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
     if (ExtType == ISD::SEXTLOAD) {
       // The high part is obtained by SRA'ing all but one of the bits of the
       // lo part.
-      unsigned LoSize = Lo.getValueType().getSizeInBits();
+      unsigned LoSize = Lo.getValueSizeInBits();
       Hi = DAG.getNode(ISD::SRA, dl, NVT, Lo,
                        DAG.getConstant(LoSize - 1, dl,
                                        TLI.getPointerTy(DAG.getDataLayout())));
@@ -2166,7 +2189,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
   GetExpandedInteger(N->getOperand(0), LL, LH);
   GetExpandedInteger(N->getOperand(1), RL, RH);
 
-  if (TLI.expandMUL(N, Lo, Hi, NVT, DAG, LL, LH, RL, RH))
+  if (TLI.expandMUL(N, Lo, Hi, NVT, DAG,
+                    TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
+                    LL, LH, RL, RH))
     return;
 
   // If nothing else, we can make a libcall.
@@ -2180,7 +2205,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
   else if (VT == MVT::i128)
     LC = RTLIB::MUL_I128;
 
-  if (LC == RTLIB::UNKNOWN_LIBCALL) {
+  if (LC == RTLIB::UNKNOWN_LIBCALL || !TLI.getLibcallName(LC)) {
     // We'll expand the multiplication by brute force because we have no other
     // options. This is a trivially-generalized version of the code from
     // Hacker's Delight (itself derived from Knuth's Algorithm M from section
@@ -2354,8 +2379,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
     // the new SHL_PARTS operation would need further legalization.
     SDValue ShiftOp = N->getOperand(1);
     EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
-    assert(ShiftTy.getScalarType().getSizeInBits() >=
-           Log2_32_Ceil(VT.getScalarType().getSizeInBits()) &&
+    assert(ShiftTy.getScalarSizeInBits() >=
+           Log2_32_Ceil(VT.getScalarSizeInBits()) &&
            "ShiftAmountTy is too small to cover the range of this type!");
     if (ShiftOp.getValueType() != ShiftTy)
       ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy);
@@ -2436,8 +2461,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N,
            "Operand over promoted?");
     // Split the promoted operand.  This will simplify when it is expanded.
     SplitInteger(Res, Lo, Hi);
-    unsigned ExcessBits =
-      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
                      DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
                                                         ExcessBits)));
@@ -2458,13 +2482,12 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) {
     // The high part gets the sign extension from the lo-part.  This handles
     // things like sextinreg V:i64 from i8.
     Hi = DAG.getNode(ISD::SRA, dl, Hi.getValueType(), Lo,
-                     DAG.getConstant(Hi.getValueType().getSizeInBits() - 1, dl,
+                     DAG.getConstant(Hi.getValueSizeInBits() - 1, dl,
                                      TLI.getPointerTy(DAG.getDataLayout())));
   } else {
     // For example, extension of an i48 to an i64.  Leave the low part alone,
     // sext_inreg the high part.
-    unsigned ExcessBits =
-      EVT.getSizeInBits() - Lo.getValueType().getSizeInBits();
+    unsigned ExcessBits = EVT.getSizeInBits() - Lo.getValueSizeInBits();
     Hi = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Hi.getValueType(), Hi,
                      DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(),
                                                         ExcessBits)));
@@ -2690,8 +2713,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
            "Operand over promoted?");
     // Split the promoted operand.  This will simplify when it is expanded.
     SplitInteger(Res, Lo, Hi);
-    unsigned ExcessBits =
-      Op.getValueType().getSizeInBits() - NVT.getSizeInBits();
+    unsigned ExcessBits = Op.getValueSizeInBits() - NVT.getSizeInBits();
     Hi = DAG.getZeroExtendInReg(Hi, dl,
                                 EVT::getIntegerVT(*DAG.getContext(),
                                                   ExcessBits));
@@ -2707,10 +2729,7 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
   SDValue Swap = DAG.getAtomicCmpSwap(
       ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl,
       cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0),
-      N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(),
-      cast<AtomicSDNode>(N)->getOrdering(),
-      cast<AtomicSDNode>(N)->getOrdering(),
-      cast<AtomicSDNode>(N)->getSynchScope());
+      N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand());
 
   ReplaceValueWith(SDValue(N, 0), Swap.getValue(0));
   ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
@@ -2833,51 +2852,51 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   case ISD::SETUGE: LowCC = ISD::SETUGE; break;
   }
 
-  // Tmp1 = lo(op1) < lo(op2)   // Always unsigned comparison
-  // Tmp2 = hi(op1) < hi(op2)   // Signedness depends on operands
-  // dest = hi(op1) == hi(op2) ? Tmp1 : Tmp2;
+  // LoCmp = lo(op1) < lo(op2)   // Always unsigned comparison
+  // HiCmp = hi(op1) < hi(op2)   // Signedness depends on operands
+  // dest  = hi(op1) == hi(op2) ? LoCmp : HiCmp;
 
   // NOTE: on targets without efficient SELECT of bools, we can always use
   // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3)
   TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true,
                                                  nullptr);
-  SDValue Tmp1, Tmp2;
+  SDValue LoCmp, HiCmp;
   if (TLI.isTypeLegal(LHSLo.getValueType()) &&
       TLI.isTypeLegal(RHSLo.getValueType()))
-    Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()),
-                             LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl);
-  if (!Tmp1.getNode())
-    Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()),
-                        LHSLo, RHSLo, LowCC);
+    LoCmp = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo,
+                              RHSLo, LowCC, false, DagCombineInfo, dl);
+  if (!LoCmp.getNode())
+    LoCmp = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo,
+                         RHSLo, LowCC);
   if (TLI.isTypeLegal(LHSHi.getValueType()) &&
       TLI.isTypeLegal(RHSHi.getValueType()))
-    Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()),
-                             LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl);
-  if (!Tmp2.getNode())
-    Tmp2 = DAG.getNode(ISD::SETCC, dl,
-                       getSetCCResultType(LHSHi.getValueType()),
-                       LHSHi, RHSHi, DAG.getCondCode(CCCode));
-
-  ConstantSDNode *Tmp1C = dyn_cast<ConstantSDNode>(Tmp1.getNode());
-  ConstantSDNode *Tmp2C = dyn_cast<ConstantSDNode>(Tmp2.getNode());
-  if ((Tmp1C && Tmp1C->isNullValue()) ||
-      (Tmp2C && Tmp2C->isNullValue() &&
-       (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
-        CCCode == ISD::SETUGE || CCCode == ISD::SETULE)) ||
-      (Tmp2C && Tmp2C->getAPIntValue() == 1 &&
-       (CCCode == ISD::SETLT || CCCode == ISD::SETGT ||
-        CCCode == ISD::SETUGT || CCCode == ISD::SETULT))) {
-    // low part is known false, returns high part.
+    HiCmp = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi,
+                              RHSHi, CCCode, false, DagCombineInfo, dl);
+  if (!HiCmp.getNode())
+    HiCmp =
+        DAG.getNode(ISD::SETCC, dl, getSetCCResultType(LHSHi.getValueType()),
+                    LHSHi, RHSHi, DAG.getCondCode(CCCode));
+
+  ConstantSDNode *LoCmpC = dyn_cast<ConstantSDNode>(LoCmp.getNode());
+  ConstantSDNode *HiCmpC = dyn_cast<ConstantSDNode>(HiCmp.getNode());
+
+  bool EqAllowed = (CCCode == ISD::SETLE || CCCode == ISD::SETGE ||
+                    CCCode == ISD::SETUGE || CCCode == ISD::SETULE);
+
+  if ((EqAllowed && (HiCmpC && HiCmpC->isNullValue())) ||
+      (!EqAllowed && ((HiCmpC && (HiCmpC->getAPIntValue() == 1)) ||
+                      (LoCmpC && LoCmpC->isNullValue())))) {
     // For LE / GE, if high part is known false, ignore the low part.
-    // For LT / GT, if high part is known true, ignore the low part.
-    NewLHS = Tmp2;
+    // For LT / GT: if low part is known false, return the high part.
+    //              if high part is known true, ignore the low part.
+    NewLHS = HiCmp;
     NewRHS = SDValue();
     return;
   }
 
   if (LHSHi == RHSHi) {
     // Comparing the low bits is enough.
-    NewLHS = Tmp1;
+    NewLHS = LoCmp;
     NewRHS = SDValue();
     return;
   }
@@ -2922,8 +2941,8 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS,
   if (!NewLHS.getNode())
     NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()),
                           LHSHi, RHSHi, ISD::SETEQ);
-  NewLHS = DAG.getSelect(dl, Tmp1.getValueType(),
-                         NewLHS, Tmp1, Tmp2);
+  NewLHS = DAG.getSelect(dl, LoCmp.getValueType(),
+                         NewLHS, LoCmp, HiCmp);
   NewRHS = SDValue();
 }
 
@@ -3198,9 +3217,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
                                cast<AtomicSDNode>(N)->getMemoryVT(),
                                N->getOperand(0),
                                N->getOperand(1), N->getOperand(2),
-                               cast<AtomicSDNode>(N)->getMemOperand(),
-                               cast<AtomicSDNode>(N)->getOrdering(),
-                               cast<AtomicSDNode>(N)->getSynchScope());
+                               cast<AtomicSDNode>(N)->getMemOperand());
   return Swap.getValue(1);
 }
 
@@ -3327,6 +3344,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
   return DAG.getNode(ISD::BUILD_VECTOR, dl, NOutVT, Ops);
 }
 
+SDValue DAGTypeLegalizer::PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N) {
+  EVT VT = N->getValueType(0);
+  EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+  assert(NVT.isVector() && "This type must be promoted to a vector type");
+
+  SDLoc dl(N);
+
+  // For operands whose TypeAction is to promote, extend the promoted node
+  // appropriately (ZERO_EXTEND or SIGN_EXTEND) from the original pre-promotion
+  // type, and then construct a new *_EXTEND_VECTOR_INREG node to the promote-to
+  // type..
+  if (getTypeAction(N->getOperand(0).getValueType())
+      == TargetLowering::TypePromoteInteger) {
+    SDValue Promoted;
+
+    switch(N->getOpcode()) {
+      case ISD::SIGN_EXTEND_VECTOR_INREG:
+        Promoted = SExtPromotedInteger(N->getOperand(0));
+        break;
+      case ISD::ZERO_EXTEND_VECTOR_INREG:
+        Promoted = ZExtPromotedInteger(N->getOperand(0));
+        break;
+      case ISD::ANY_EXTEND_VECTOR_INREG:
+        Promoted = GetPromotedInteger(N->getOperand(0));
+        break;
+      default:
+        llvm_unreachable("Node has unexpected Opcode");
+    }
+    return DAG.getNode(N->getOpcode(), dl, NVT, Promoted);
+  }
+
+  // Directly extend to the appropriate transform-to type.
+  return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
 SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
   EVT OutVT = N->getValueType(0);
   EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 144bed241ee7..693f5e2120a7 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -117,6 +117,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
         Mapped |= 64;
       if (WidenedVectors.find(Res) != WidenedVectors.end())
         Mapped |= 128;
+      if (PromotedFloats.find(Res) != PromotedFloats.end())
+        Mapped |= 256;
 
       if (Node.getNodeId() != Processed) {
         // Since we allow ReplacedValues to map deleted nodes, it may map nodes
@@ -159,6 +161,8 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
           dbgs() << " SplitVectors";
         if (Mapped & 128)
           dbgs() << " WidenedVectors";
+        if (Mapped & 256)
+          dbgs() << " PromotedFloats";
         dbgs() << "\n";
         llvm_unreachable(nullptr);
       }
@@ -484,7 +488,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
   // updated after all operands have been analyzed.  Since this is rare,
   // the code tries to minimize overhead in the non-morphing case.
 
-  SmallVector<SDValue, 8> NewOps;
+  std::vector<SDValue> NewOps;
   unsigned NumProcessed = 0;
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     SDValue OrigOp = N->getOperand(i);
@@ -500,7 +504,7 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) {
       NewOps.push_back(Op);
     } else if (Op != OrigOp) {
       // This is the first operand to change - add all operands so far.
-      NewOps.append(N->op_begin(), N->op_begin() + i);
+      NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i);
       NewOps.push_back(Op);
     }
   }
@@ -794,8 +798,7 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) {
   // Note that in some cases vector operation operands may be greater than
   // the vector element type. For example BUILD_VECTOR of type <1 x i1> with
   // a constant i8 operand.
-  assert(Result.getValueType().getSizeInBits() >=
-         Op.getValueType().getVectorElementType().getSizeInBits() &&
+  assert(Result.getValueSizeInBits() >= Op.getScalarValueSizeInBits() &&
          "Invalid type for scalarized vector");
   AnalyzeNewValue(Result);
 
@@ -905,7 +908,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
 
 /// Convert to an integer of the same size.
 SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
-  unsigned BitWidth = Op.getValueType().getSizeInBits();
+  unsigned BitWidth = Op.getValueSizeInBits();
   return DAG.getNode(ISD::BITCAST, SDLoc(Op),
                      EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
 }
@@ -913,7 +916,7 @@ SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
 /// Convert to a vector of integers of the same size.
 SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
   assert(Op.getValueType().isVector() && "Only applies to vectors!");
-  unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
+  unsigned EltWidth = Op.getScalarValueSizeInBits();
   EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
   unsigned NumElts = Op.getValueType().getVectorNumElements();
   return DAG.getNode(ISD::BITCAST, SDLoc(Op),
@@ -1145,7 +1148,7 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
   SDLoc dl(Op);
   assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() ==
-         Op.getValueType().getSizeInBits() && "Invalid integer splitting!");
+         Op.getValueSizeInBits() && "Invalid integer splitting!");
   Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op);
   Hi = DAG.getNode(ISD::SRL, dl, Op.getValueType(), Op,
                    DAG.getConstant(LoVT.getSizeInBits(), dl,
@@ -1157,8 +1160,8 @@ void DAGTypeLegalizer::SplitInteger(SDValue Op,
 /// size of Op's.
 void DAGTypeLegalizer::SplitInteger(SDValue Op,
                                     SDValue &Lo, SDValue &Hi) {
-  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(),
-                                 Op.getValueType().getSizeInBits()/2);
+  EVT HalfVT =
+      EVT::getIntegerVT(*DAG.getContext(), Op.getValueSizeInBits() / 2);
   SplitInteger(Op, HalfVT, HalfVT, Lo, Hi);
 }
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 84ad8f83d906..d1022af69477 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -242,6 +242,7 @@ private:
   SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
   SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
   SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+  SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
   SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
   SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
   SDValue PromoteIntRes_BITCAST(SDNode *N);
@@ -339,6 +340,7 @@ private:
   void ExpandIntRes_SIGN_EXTEND_INREG (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_TRUNCATE          (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_ZERO_EXTEND       (SDNode *N, SDValue &Lo, SDValue &Hi);
+  void ExpandIntRes_FLT_ROUNDS        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_SINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
   void ExpandIntRes_FP_TO_UINT        (SDNode *N, SDValue &Lo, SDValue &Hi);
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 665180e119b7..3682c32460c6 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -141,11 +141,10 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
         if (DAG.getDataLayout().isBigEndian())
           std::swap(LHS, RHS);
 
-        Vals.push_back(DAG.getNode(ISD::BUILD_PAIR, dl,
-                                   EVT::getIntegerVT(
-                                     *DAG.getContext(),
-                                     LHS.getValueType().getSizeInBits() << 1),
-                                   LHS, RHS));
+        Vals.push_back(DAG.getNode(
+            ISD::BUILD_PAIR, dl,
+            EVT::getIntegerVT(*DAG.getContext(), LHS.getValueSizeInBits() << 1),
+            LHS, RHS));
       }
       Lo = Vals[Slot++];
       Hi = Vals[Slot++];
@@ -337,7 +336,8 @@ void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements,
 
 SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
   SDLoc dl(N);
-  if (N->getValueType(0).isVector()) {
+  if (N->getValueType(0).isVector() &&
+      N->getOperand(0).getValueType().isInteger()) {
     // An illegal expanding type is being converted to a legal vector type.
     // Make a two element vector out of the expanded parts and convert that
     // instead, but only if the new vector type is legal (otherwise there
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 3c9cb17b58b2..d4fa20f35274 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -106,7 +106,8 @@ class VectorLegalizer {
   SDValue ExpandStore(SDValue Op);
   SDValue ExpandFNEG(SDValue Op);
   SDValue ExpandBITREVERSE(SDValue Op);
-  SDValue ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op);
+  SDValue ExpandCTLZ(SDValue Op);
+  SDValue ExpandCTTZ_ZERO_UNDEF(SDValue Op);
 
   /// \brief Implements vector promotion.
   ///
@@ -332,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX:
+  case ISD::SMUL_LOHI:
+  case ISD::UMUL_LOHI:
     QueryType = Node->getValueType(0);
     break;
   case ISD::FP_ROUND_INREG:
@@ -362,7 +365,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
       Result = Tmp1;
       break;
     }
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   }
   case TargetLowering::Expand:
     Result = Expand(Op);
@@ -693,9 +696,11 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
     return UnrollVSETCC(Op);
   case ISD::BITREVERSE:
     return ExpandBITREVERSE(Op);
+  case ISD::CTLZ:
   case ISD::CTLZ_ZERO_UNDEF:
+    return ExpandCTLZ(Op);
   case ISD::CTTZ_ZERO_UNDEF:
-    return ExpandCTLZ_CTTZ_ZERO_UNDEF(Op);
+    return ExpandCTTZ_ZERO_UNDEF(Op);
   default:
     return DAG.UnrollVectorOp(Op.getNode());
   }
@@ -770,8 +775,8 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) {
   SDLoc DL(Op);
   EVT OrigTy = cast<VTSDNode>(Op->getOperand(1))->getVT();
 
-  unsigned BW = VT.getScalarType().getSizeInBits();
-  unsigned OrigBW = OrigTy.getScalarType().getSizeInBits();
+  unsigned BW = VT.getScalarSizeInBits();
+  unsigned OrigBW = OrigTy.getScalarSizeInBits();
   SDValue ShiftSz = DAG.getConstant(BW - OrigBW, DL, VT);
 
   Op = Op.getOperand(0);
@@ -817,8 +822,8 @@ SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) {
   // Now we need sign extend. Do this by shifting the elements. Even if these
   // aren't legal operations, they have a better chance of being legalized
   // without full scalarization than the sign extension does.
-  unsigned EltWidth = VT.getVectorElementType().getSizeInBits();
-  unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits();
+  unsigned EltWidth = VT.getScalarSizeInBits();
+  unsigned SrcEltWidth = SrcVT.getScalarSizeInBits();
   SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, DL, VT);
   return DAG.getNode(ISD::SRA, DL, VT,
                      DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount),
@@ -951,7 +956,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   // If the mask and the type are different sizes, unroll the vector op. This
   // can occur when getSetCCResultType returns something that is different in
   // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8.
-  if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits())
+  if (VT.getSizeInBits() != Op1.getValueSizeInBits())
     return DAG.UnrollVectorOp(Op.getNode());
 
   // Bitcast the operands to be the same type as the mask.
@@ -961,7 +966,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) {
   Op2 = DAG.getNode(ISD::BITCAST, DL, VT, Op2);
 
   SDValue AllOnes = DAG.getConstant(
-    APInt::getAllOnesValue(VT.getScalarType().getSizeInBits()), DL, VT);
+    APInt::getAllOnesValue(VT.getScalarSizeInBits()), DL, VT);
   SDValue NotMask = DAG.getNode(ISD::XOR, DL, VT, Mask, AllOnes);
 
   Op1 = DAG.getNode(ISD::AND, DL, VT, Op1, Mask);
@@ -979,21 +984,20 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
       TLI.getOperationAction(ISD::SRL,        VT) == TargetLowering::Expand)
     return DAG.UnrollVectorOp(Op.getNode());
 
- EVT SVT = VT.getScalarType();
-  assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) &&
-      "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
+  unsigned BW = VT.getScalarSizeInBits();
+  assert((BW == 64 || BW == 32) &&
+         "Elements in vector-UINT_TO_FP must be 32 or 64 bits wide");
 
-  unsigned BW = SVT.getSizeInBits();
-  SDValue HalfWord = DAG.getConstant(BW/2, DL, VT);
+  SDValue HalfWord = DAG.getConstant(BW / 2, DL, VT);
 
   // Constants to clear the upper part of the word.
   // Notice that we can also use SHL+SHR, but using a constant is slightly
   // faster on x86.
-  uint64_t HWMask = (SVT.getSizeInBits()==64)?0x00000000FFFFFFFF:0x0000FFFF;
+  uint64_t HWMask = (BW == 64) ? 0x00000000FFFFFFFF : 0x0000FFFF;
   SDValue HalfWordMask = DAG.getConstant(HWMask, DL, VT);
 
   // Two to the power of half-word-size.
-  SDValue TWOHW = DAG.getConstantFP(1 << (BW/2), DL, Op.getValueType());
+  SDValue TWOHW = DAG.getConstantFP(1 << (BW / 2), DL, Op.getValueType());
 
   // Clear upper part of LO, lower HI
   SDValue HI = DAG.getNode(ISD::SRL, DL, VT, Op.getOperand(0), HalfWord);
@@ -1010,7 +1014,6 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) {
   return DAG.getNode(ISD::FADD, DL, Op.getValueType(), fHI, fLO);
 }
 
-
 SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) {
     SDLoc DL(Op);
@@ -1022,12 +1025,53 @@ SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
   return DAG.UnrollVectorOp(Op.getNode());
 }
 
-SDValue VectorLegalizer::ExpandCTLZ_CTTZ_ZERO_UNDEF(SDValue Op) {
+SDValue VectorLegalizer::ExpandCTLZ(SDValue Op) {
+  EVT VT = Op.getValueType();
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+  // If the non-ZERO_UNDEF version is supported we can use that instead.
+  if (Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF &&
+      TLI.isOperationLegalOrCustom(ISD::CTLZ, VT)) {
+    SDLoc DL(Op);
+    return DAG.getNode(ISD::CTLZ, DL, Op.getValueType(), Op.getOperand(0));
+  }
+
+  // If CTPOP is available we can lower with a CTPOP based method:
+  // u16 ctlz(u16 x) {
+  //   x |= (x >> 1);
+  //   x |= (x >> 2);
+  //   x |= (x >> 4);
+  //   x |= (x >> 8);
+  //   return ctpop(~x);
+  // }
+  // Ref: "Hacker's Delight" by Henry Warren
+  if (isPowerOf2_32(NumBitsPerElt) &&
+      TLI.isOperationLegalOrCustom(ISD::CTPOP, VT) &&
+      TLI.isOperationLegalOrCustom(ISD::SRL, VT) &&
+      TLI.isOperationLegalOrCustomOrPromote(ISD::OR, VT) &&
+      TLI.isOperationLegalOrCustomOrPromote(ISD::XOR, VT)) {
+    SDLoc DL(Op);
+    SDValue Res = Op.getOperand(0);
+    EVT ShiftTy = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
+
+    for (unsigned i = 1; i != NumBitsPerElt; i *= 2)
+      Res = DAG.getNode(
+          ISD::OR, DL, VT, Res,
+          DAG.getNode(ISD::SRL, DL, VT, Res, DAG.getConstant(i, DL, ShiftTy)));
+
+    Res = DAG.getNOT(DL, Res, VT);
+    return DAG.getNode(ISD::CTPOP, DL, VT, Res);
+  }
+
+  // Otherwise go ahead and unroll.
+  return DAG.UnrollVectorOp(Op.getNode());
+}
+
+SDValue VectorLegalizer::ExpandCTTZ_ZERO_UNDEF(SDValue Op) {
   // If the non-ZERO_UNDEF version is supported we can use that instead.
-  unsigned Opc = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF ? ISD::CTLZ : ISD::CTTZ;
-  if (TLI.isOperationLegalOrCustom(Opc, Op.getValueType())) {
+  if (TLI.isOperationLegalOrCustom(ISD::CTTZ, Op.getValueType())) {
     SDLoc DL(Op);
-    return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(0));
+    return DAG.getNode(ISD::CTTZ, DL, Op.getValueType(), Op.getOperand(0));
   }
 
   // Otherwise go ahead and unroll.
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f3adca49ccfe..57c179ac15b8 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -882,7 +882,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
       DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
 
   // Increment the pointer to the other part.
-  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
   StackPtr =
       DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                   DAG.getConstant(IncrementSize, dl, StackPtr.getValueType()));
@@ -1014,7 +1014,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
       DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo());
 
   // Increment the pointer to the other part.
-  unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8;
+  unsigned IncrementSize = Lo.getValueSizeInBits() / 8;
   StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
                          DAG.getConstant(IncrementSize, dl,
                                          StackPtr.getValueType()));
@@ -1114,11 +1114,10 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
                          Alignment, MLD->getAAInfo(), MLD->getRanges());
 
   Lo = DAG.getMaskedLoad(LoVT, dl, Ch, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
-                         ExtType);
+                         ExtType, MLD->isExpandingLoad());
 
-  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, dl, Ptr.getValueType()));
+  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, dl, LoMemVT, DAG,
+                                   MLD->isExpandingLoad());
 
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(MLD->getPointerInfo(),
@@ -1126,7 +1125,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
 
   Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
-                         ExtType);
+                         ExtType, MLD->isExpandingLoad());
 
 
   // Build a factor node to remember that this load is independent of the
@@ -1282,7 +1281,7 @@ void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo,
     LLVMContext &Ctx = *DAG.getContext();
     EVT NewSrcVT = EVT::getVectorVT(
         Ctx, EVT::getIntegerVT(
-                 Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2),
+                 Ctx, SrcVT.getScalarSizeInBits() * 2),
         NumElements);
     EVT SplitSrcVT =
         EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2);
@@ -1769,19 +1768,18 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
                          Alignment, N->getAAInfo(), N->getRanges());
 
   Lo = DAG.getMaskedStore(Ch, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
-                          N->isTruncatingStore());
-
-  unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
-  Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
-                    DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
+                          N->isTruncatingStore(),
+                          N->isCompressingStore());
 
+  Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
+                                   N->isCompressingStore());
   MMO = DAG.getMachineFunction().
     getMachineMemOperand(N->getPointerInfo(),
                          MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
                          SecondHalfAlignment, N->getAAInfo(), N->getRanges());
 
   Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
-                          N->isTruncatingStore());
+                          N->isTruncatingStore(), N->isCompressingStore());
 
   // Build a factor node to remember that this store is independent of the
   // other one.
@@ -1940,8 +1938,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_TruncateHelper(SDNode *N) {
   // if we're trying to split it at all. assert() that's true, just in case.
   assert(!(NumElements & 1) && "Splitting vector, but not in half!");
 
-  unsigned InElementSize = InVT.getVectorElementType().getSizeInBits();
-  unsigned OutElementSize = OutVT.getVectorElementType().getSizeInBits();
+  unsigned InElementSize = InVT.getScalarSizeInBits();
+  unsigned OutElementSize = OutVT.getScalarSizeInBits();
 
   // If the input elements are only 1/2 the width of the result elements,
   // just use the normal splitting. Our trick only work if there's room
@@ -2881,7 +2879,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_MLOAD(MaskedLoadSDNode *N) {
 
   SDValue Res = DAG.getMaskedLoad(WidenVT, dl, N->getChain(), N->getBasePtr(),
                                   Mask, Src0, N->getMemoryVT(),
-                                  N->getMemOperand(), ExtType);
+                                  N->getMemOperand(), ExtType,
+	                                N->isExpandingLoad());
   // Legalize the chain result - switch anything that used the old chain to
   // use the new one.
   ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -3317,7 +3316,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSTORE(SDNode *N, unsigned OpNo) {
          "Mask and data vectors should have the same number of elements");
   return DAG.getMaskedStore(MST->getChain(), dl, WideVal, MST->getBasePtr(),
                             Mask, MST->getMemoryVT(), MST->getMemOperand(),
-                            false);
+                            false, MST->isCompressingStore());
 }
 
 SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 1e5c4a73693f..ded8e68fcbce 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -631,7 +631,7 @@ SUnit *ResourcePriorityQueue::pop() {
 
 void ResourcePriorityQueue::remove(SUnit *SU) {
   assert(!Queue.empty() && "Queue is empty!");
-  std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(), SU);
+  std::vector<SUnit *>::iterator I = find(Queue, SU);
   if (I != std::prev(Queue.end()))
     std::swap(*I, Queue.back());
 
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 802c459a0223..3549ccd9e345 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1339,7 +1339,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) {
     LRegsMapT::iterator LRegsPos = LRegsMap.find(SU);
     if (Reg) {
       SmallVectorImpl<unsigned> &LRegs = LRegsPos->second;
-      if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end())
+      if (!is_contained(LRegs, Reg))
         continue;
     }
     SU->isPending = false;
@@ -1704,8 +1704,7 @@ public:
   void remove(SUnit *SU) override {
     assert(!Queue.empty() && "Queue is empty!");
     assert(SU->NodeQueueId != 0 && "Not in queue!");
-    std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
-                                                 SU);
+    std::vector<SUnit *>::iterator I = find(Queue, SU);
     if (I != std::prev(Queue.end()))
       std::swap(*I, Queue.back());
     Queue.pop_back();
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 29d11c79ac25..b970dc0e5f5f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -134,7 +134,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
   // we care if the resultant vector is all ones, not whether the individual
   // constants are.
   SDValue NotZero = N->getOperand(i);
-  unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+  unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(NotZero)) {
     if (CN->getAPIntValue().countTrailingOnes() < EltSize)
       return false;
@@ -173,7 +173,7 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) {
     // We only want to check enough bits to cover the vector elements, because
     // we care if the resultant vector is all zeros, not whether the individual
     // constants are.
-    unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits();
+    unsigned EltSize = N->getValueType(0).getScalarSizeInBits();
     if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Op)) {
       if (CN->getAPIntValue().countTrailingZeros() < EltSize)
         return false;
@@ -403,7 +403,6 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
     ID.AddPointer(GA->getGlobal());
     ID.AddInteger(GA->getOffset());
     ID.AddInteger(GA->getTargetFlags());
-    ID.AddInteger(GA->getAddressSpace());
     break;
   }
   case ISD::BasicBlock:
@@ -521,24 +520,6 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
   AddNodeIDCustom(ID, N);
 }
 
-/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, temporalness, indexing mode, and
-/// extension/truncation information.
-///
-static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
-                     bool isNonTemporal, bool isInvariant) {
-  assert((ConvType & 3) == ConvType &&
-         "ConvType may not require more than 2 bits!");
-  assert((AM & 7) == AM &&
-         "AM may not require more than 3 bits!");
-  return ConvType |
-         (AM << 2) |
-         (isVolatile << 5) |
-         (isNonTemporal << 6) |
-         (isInvariant << 7);
-}
-
 //===----------------------------------------------------------------------===//
 //                              SelectionDAG Class
 //===----------------------------------------------------------------------===//
@@ -1030,7 +1011,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
          "getZeroExtendInReg should use the vector element type instead of "
          "the vector type!");
   if (Op.getValueType() == VT) return Op;
-  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  unsigned BitWidth = Op.getScalarValueSizeInBits();
   APInt Imm = APInt::getLowBitsSet(BitWidth,
                                    VT.getSizeInBits());
   return getNode(ISD::AND, DL, Op.getValueType(), Op,
@@ -1040,7 +1021,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
 SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
                                               EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
          "The sizes of the input and result must match in order to perform the "
          "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1051,7 +1032,7 @@ SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, const SDLoc &DL,
 SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
                                                EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
          "The sizes of the input and result must match in order to perform the "
          "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1062,7 +1043,7 @@ SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, const SDLoc &DL,
 SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
                                                EVT VT) {
   assert(VT.isVector() && "This DAG node is restricted to vector types.");
-  assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() &&
+  assert(VT.getSizeInBits() == Op.getValueSizeInBits() &&
          "The sizes of the input and result must match in order to perform the "
          "extend in-register.");
   assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() &&
@@ -1166,12 +1147,9 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, const SDLoc &DL,
     // This situation occurs in MIPS MSA.
 
     SmallVector<SDValue, 8> Ops;
-    for (unsigned i = 0; i < VT.getVectorNumElements(); ++i)
+    for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i)
       Ops.insert(Ops.end(), EltParts.begin(), EltParts.end());
-
-    SDValue Result = getNode(ISD::BITCAST, DL, VT,
-                             getNode(ISD::BUILD_VECTOR, DL, ViaVecVT, Ops));
-    return Result;
+    return getNode(ISD::BITCAST, DL, VT, getBuildVector(ViaVecVT, DL, Ops));
   }
 
   assert(Elt->getBitWidth() == EltVT.getSizeInBits() &&
@@ -1280,7 +1258,6 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
   ID.AddPointer(GV);
   ID.AddInteger(Offset);
   ID.AddInteger(TargetFlags);
-  ID.AddInteger(GV->getType()->getAddressSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP))
     return SDValue(E, 0);
@@ -1333,7 +1310,9 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
   assert((TargetFlags == 0 || isTarget) &&
          "Cannot set target flags on target-independent globals");
   if (Alignment == 0)
-    Alignment = getDataLayout().getPrefTypeAlignment(C->getType());
+    Alignment = MF->getFunction()->optForSize()
+                    ? getDataLayout().getABITypeAlignment(C->getType())
+                    : getDataLayout().getPrefTypeAlignment(C->getType());
   unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, Opc, getVTList(VT), None);
@@ -1863,13 +1842,13 @@ SDValue SelectionDAG::expandVACopy(SDNode *Node) {
 }
 
 SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
-  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
   unsigned ByteSize = VT.getStoreSize();
   Type *Ty = VT.getTypeForEVT(*getContext());
   unsigned StackAlign =
       std::max((unsigned)getDataLayout().getPrefTypeAlignment(Ty), minAlign);
 
-  int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+  int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
   return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
 }
 
@@ -1881,8 +1860,8 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
   unsigned Align =
       std::max(DL.getPrefTypeAlignment(Ty1), DL.getPrefTypeAlignment(Ty2));
 
-  MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
+  MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
+  int FrameIdx = MFI.CreateStackObject(Bytes, Align, false);
   return getFrameIndex(FrameIdx, TLI->getPointerTy(getDataLayout()));
 }
 
@@ -1943,29 +1922,29 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
       default: break;
       case ISD::SETEQ:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOEQ: return getConstant(R==APFloat::cmpEqual, dl, VT);
       case ISD::SETNE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETONE: return getConstant(R==APFloat::cmpGreaterThan ||
                                            R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETLT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOLT: return getConstant(R==APFloat::cmpLessThan, dl, VT);
       case ISD::SETGT:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOGT: return getConstant(R==APFloat::cmpGreaterThan, dl, VT);
       case ISD::SETLE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOLE: return getConstant(R==APFloat::cmpLessThan ||
                                            R==APFloat::cmpEqual, dl, VT);
       case ISD::SETGE:  if (R==APFloat::cmpUnordered)
                           return getUNDEF(VT);
-                        // fall through
+                        LLVM_FALLTHROUGH;
       case ISD::SETOGE: return getConstant(R==APFloat::cmpGreaterThan ||
                                            R==APFloat::cmpEqual, dl, VT);
       case ISD::SETO:   return getConstant(R!=APFloat::cmpUnordered, dl, VT);
@@ -1998,11 +1977,7 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
 /// SignBitIsZero - Return true if the sign bit of Op is known to be zero.  We
 /// use this predicate to simplify operations downstream.
 bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
-  // This predicate is not safe for vector operations.
-  if (Op.getValueType().isVector())
-    return false;
-
-  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  unsigned BitWidth = Op.getScalarValueSizeInBits();
   return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
 }
 
@@ -2016,28 +1991,244 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
   return (KnownZero & Mask) == Mask;
 }
 
+/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
+/// is less than the element bit-width of the shift node, return it.
+static const APInt *getValidShiftAmountConstant(SDValue V) {
+  if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
+    // Shifting more than the bitwidth is not valid.
+    const APInt &ShAmt = SA->getAPIntValue();
+    if (ShAmt.ult(V.getScalarValueSizeInBits()))
+      return &ShAmt;
+  }
+  return nullptr;
+}
+
 /// Determine which bits of Op are known to be either zero or one and return
-/// them in the KnownZero/KnownOne bitsets.
+/// them in the KnownZero/KnownOne bitsets. For vectors, the known bits are
+/// those that are shared by every vector element.
 void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
                                     APInt &KnownOne, unsigned Depth) const {
-  unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+  EVT VT = Op.getValueType();
+  APInt DemandedElts = VT.isVector()
+                           ? APInt::getAllOnesValue(VT.getVectorNumElements())
+                           : APInt(1, 1);
+  computeKnownBits(Op, KnownZero, KnownOne, DemandedElts, Depth);
+}
+
+/// Determine which bits of Op are known to be either zero or one and return
+/// them in the KnownZero/KnownOne bitsets. The DemandedElts argument allows
+/// us to only collect the known bits that are shared by the requested vector
+/// elements.
+/// TODO: We only support DemandedElts on a few opcodes so far, the remainder
+/// should be added when they become necessary.
+void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
+                                    APInt &KnownOne, const APInt &DemandedElts,
+                                    unsigned Depth) const {
+  unsigned BitWidth = Op.getScalarValueSizeInBits();
 
   KnownZero = KnownOne = APInt(BitWidth, 0);   // Don't know anything.
   if (Depth == 6)
     return;  // Limit search depth.
 
   APInt KnownZero2, KnownOne2;
+  unsigned NumElts = DemandedElts.getBitWidth();
 
-  switch (Op.getOpcode()) {
+  if (!DemandedElts)
+    return;  // No demanded elts, better to assume we don't know anything.
+
+  unsigned Opcode = Op.getOpcode();
+  switch (Opcode) {
   case ISD::Constant:
     // We know all of the bits for a constant!
     KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
     KnownZero = ~KnownOne;
     break;
+  case ISD::BUILD_VECTOR:
+    // Collect the known bits that are shared by every demanded vector element.
+    assert(NumElts == Op.getValueType().getVectorNumElements() &&
+           "Unexpected vector size");
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) {
+      if (!DemandedElts[i])
+        continue;
+
+      SDValue SrcOp = Op.getOperand(i);
+      computeKnownBits(SrcOp, KnownZero2, KnownOne2, Depth + 1);
+
+      // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+      if (SrcOp.getValueSizeInBits() != BitWidth) {
+        assert(SrcOp.getValueSizeInBits() > BitWidth &&
+               "Expected BUILD_VECTOR implicit truncation");
+        KnownOne2 = KnownOne2.trunc(BitWidth);
+        KnownZero2 = KnownZero2.trunc(BitWidth);
+      }
+
+      // Known bits are the values that are shared by every demanded element.
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+
+      // If we don't know any bits, early out.
+      if (!KnownOne && !KnownZero)
+        break;
+    }
+    break;
+  case ISD::VECTOR_SHUFFLE: {
+    // Collect the known bits that are shared by every vector element referenced
+    // by the shuffle.
+    APInt DemandedLHS(NumElts, 0), DemandedRHS(NumElts, 0);
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
+    assert(NumElts == SVN->getMask().size() && "Unexpected vector size");
+    for (unsigned i = 0; i != NumElts; ++i) {
+      if (!DemandedElts[i])
+        continue;
+
+      int M = SVN->getMaskElt(i);
+      if (M < 0) {
+        // For UNDEF elements, we don't know anything about the common state of
+        // the shuffle result.
+        KnownOne.clearAllBits();
+        KnownZero.clearAllBits();
+        DemandedLHS.clearAllBits();
+        DemandedRHS.clearAllBits();
+        break;
+      }
+
+      if ((unsigned)M < NumElts)
+        DemandedLHS.setBit((unsigned)M % NumElts);
+      else
+        DemandedRHS.setBit((unsigned)M % NumElts);
+    }
+    // Known bits are the values that are shared by every demanded element.
+    if (!!DemandedLHS) {
+      SDValue LHS = Op.getOperand(0);
+      computeKnownBits(LHS, KnownZero2, KnownOne2, DemandedLHS, Depth + 1);
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+    }
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
+    if (!!DemandedRHS) {
+      SDValue RHS = Op.getOperand(1);
+      computeKnownBits(RHS, KnownZero2, KnownOne2, DemandedRHS, Depth + 1);
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+    }
+    break;
+  }
+  case ISD::CONCAT_VECTORS: {
+    // Split DemandedElts and test each of the demanded subvectors.
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    EVT SubVectorVT = Op.getOperand(0).getValueType();
+    unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
+    unsigned NumSubVectors = Op.getNumOperands();
+    for (unsigned i = 0; i != NumSubVectors; ++i) {
+      APInt DemandedSub = DemandedElts.lshr(i * NumSubVectorElts);
+      DemandedSub = DemandedSub.trunc(NumSubVectorElts);
+      if (!!DemandedSub) {
+        SDValue Sub = Op.getOperand(i);
+        computeKnownBits(Sub, KnownZero2, KnownOne2, DemandedSub, Depth + 1);
+        KnownOne &= KnownOne2;
+        KnownZero &= KnownZero2;
+      }
+      // If we don't know any bits, early out.
+      if (!KnownOne && !KnownZero)
+        break;
+    }
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR: {
+    // If we know the element index, just demand that subvector elements,
+    // otherwise demand them all.
+    SDValue Src = Op.getOperand(0);
+    ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+    unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+    if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+      // Offset the demanded elts by the subvector index.
+      uint64_t Idx = SubIdx->getZExtValue();
+      APInt DemandedSrc = DemandedElts.zext(NumSrcElts).shl(Idx);
+      computeKnownBits(Src, KnownZero, KnownOne, DemandedSrc, Depth + 1);
+    } else {
+      computeKnownBits(Src, KnownZero, KnownOne, Depth + 1);
+    }
+    break;
+  }
+  case ISD::BITCAST: {
+    SDValue N0 = Op.getOperand(0);
+    unsigned SubBitWidth = N0.getScalarValueSizeInBits();
+
+    // Ignore bitcasts from floating point.
+    if (!N0.getValueType().isInteger())
+      break;
+
+    // Fast handling of 'identity' bitcasts.
+    if (BitWidth == SubBitWidth) {
+      computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+      break;
+    }
+
+    // Support big-endian targets when it becomes useful.
+    bool IsLE = getDataLayout().isLittleEndian();
+    if (!IsLE)
+      break;
+
+    // Bitcast 'small element' vector to 'large element' scalar/vector.
+    if ((BitWidth % SubBitWidth) == 0) {
+      assert(N0.getValueType().isVector() && "Expected bitcast from vector");
+
+      // Collect known bits for the (larger) output by collecting the known
+      // bits from each set of sub elements and shift these into place.
+      // We need to separately call computeKnownBits for each set of
+      // sub elements as the knownbits for each is likely to be different.
+      unsigned SubScale = BitWidth / SubBitWidth;
+      APInt SubDemandedElts(NumElts * SubScale, 0);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i])
+          SubDemandedElts.setBit(i * SubScale);
+
+      for (unsigned i = 0; i != SubScale; ++i) {
+        computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts.shl(i),
+                         Depth + 1);
+        KnownOne |= KnownOne2.zext(BitWidth).shl(SubBitWidth * i);
+        KnownZero |= KnownZero2.zext(BitWidth).shl(SubBitWidth * i);
+      }
+    }
+
+    // Bitcast 'large element' scalar/vector to 'small element' vector.
+    if ((SubBitWidth % BitWidth) == 0) {
+      assert(Op.getValueType().isVector() && "Expected bitcast to vector");
+
+      // Collect known bits for the (smaller) output by collecting the known
+      // bits from the overlapping larger input elements and extracting the
+      // sub sections we actually care about.
+      unsigned SubScale = SubBitWidth / BitWidth;
+      APInt SubDemandedElts(NumElts / SubScale, 0);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i])
+          SubDemandedElts.setBit(i / SubScale);
+
+      computeKnownBits(N0, KnownZero2, KnownOne2, SubDemandedElts, Depth + 1);
+
+      KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+      for (unsigned i = 0; i != NumElts; ++i)
+        if (DemandedElts[i]) {
+          unsigned Offset = (i % SubScale) * BitWidth;
+          KnownOne &= KnownOne2.lshr(Offset).trunc(BitWidth);
+          KnownZero &= KnownZero2.lshr(Offset).trunc(BitWidth);
+          // If we don't know any bits, early out.
+          if (!KnownOne && !KnownZero)
+            break;
+        }
+    }
+    break;
+  }
   case ISD::AND:
     // If either the LHS or the RHS are Zero, the result is zero.
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // Output known-1 bits are only known if set in both the LHS & RHS.
     KnownOne &= KnownOne2;
@@ -2045,8 +2236,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownZero |= KnownZero2;
     break;
   case ISD::OR:
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // Output known-0 bits are only known if clear in both the LHS & RHS.
     KnownZero &= KnownZero2;
@@ -2054,8 +2247,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownOne |= KnownOne2;
     break;
   case ISD::XOR: {
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // Output known-0 bits are known if clear or set in both the LHS & RHS.
     APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
@@ -2065,11 +2260,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   }
   case ISD::MUL: {
-    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     // If low bits are zero in either operand, output low known-0 bits.
-    // Also compute a conserative estimate for high known-0 bits.
+    // Also compute a conservative estimate for high known-0 bits.
     // More trickiness is possible, but this is sufficient for the
     // interesting case of alignment computation.
     KnownOne.clearAllBits();
@@ -2089,12 +2286,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // For the purposes of computing leading zeros we can conservatively
     // treat a udiv as a logical right shift by the power of 2 known to
     // be less than the denominator.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     unsigned LeadZ = KnownZero2.countLeadingOnes();
 
-    KnownOne2.clearAllBits();
-    KnownZero2.clearAllBits();
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
     if (RHSUnknownLeadingOnes != BitWidth)
       LeadZ = std::min(BitWidth,
@@ -2105,6 +2302,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::SELECT:
     computeKnownBits(Op.getOperand(2), KnownZero, KnownOne, Depth+1);
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
     computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
@@ -2113,6 +2313,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   case ISD::SELECT_CC:
     computeKnownBits(Op.getOperand(3), KnownZero, KnownOne, Depth+1);
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
     computeKnownBits(Op.getOperand(2), KnownZero2, KnownOne2, Depth+1);
 
     // Only known if known in both the LHS and RHS.
@@ -2144,58 +2347,37 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     break;
   case ISD::SHL:
-    // (shl X, C1) & C2 == 0   iff   (X & C2 >>u C1) == 0
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
-
-      // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
-        break;
-
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-      KnownZero <<= ShAmt;
-      KnownOne  <<= ShAmt;
-      // low bits known zero.
-      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt);
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                       Depth + 1);
+      KnownZero = KnownZero << *ShAmt;
+      KnownOne = KnownOne << *ShAmt;
+      // Low bits are known zero.
+      KnownZero |= APInt::getLowBitsSet(BitWidth, ShAmt->getZExtValue());
     }
     break;
   case ISD::SRL:
-    // (ushr X, C1) & C2 == 0   iff  (-1 >> C1) & C2 == 0
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
-
-      // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
-        break;
-
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-      KnownZero = KnownZero.lshr(ShAmt);
-      KnownOne  = KnownOne.lshr(ShAmt);
-
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
-      KnownZero |= HighBits;  // High bits known zero.
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                       Depth + 1);
+      KnownZero = KnownZero.lshr(*ShAmt);
+      KnownOne  = KnownOne.lshr(*ShAmt);
+      // High bits are known zero.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
+      KnownZero |= HighBits;
     }
     break;
   case ISD::SRA:
-    if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      unsigned ShAmt = SA->getZExtValue();
-
-      // If the shift count is an invalid immediate, don't do anything.
-      if (ShAmt >= BitWidth)
-        break;
-
-      // If any of the demanded bits are produced by the sign extension, we also
-      // demand the input sign bit.
-      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
-
-      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-      KnownZero = KnownZero.lshr(ShAmt);
-      KnownOne  = KnownOne.lshr(ShAmt);
-
-      // Handle the sign bits.
+    if (const APInt *ShAmt = getValidShiftAmountConstant(Op)) {
+      computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                       Depth + 1);
+      KnownZero = KnownZero.lshr(*ShAmt);
+      KnownOne  = KnownOne.lshr(*ShAmt);
+      // If we know the value of the sign bit, then we know it is copied across
+      // the high bits by the shift amount.
+      APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt->getZExtValue());
       APInt SignBit = APInt::getSignBit(BitWidth);
-      SignBit = SignBit.lshr(ShAmt);  // Adjust to where it is now in the mask.
-
+      SignBit = SignBit.lshr(*ShAmt);  // Adjust to where it is now in the mask.
       if (KnownZero.intersects(SignBit)) {
         KnownZero |= HighBits;  // New bits are known zero.
       } else if (KnownOne.intersects(SignBit)) {
@@ -2205,7 +2387,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   case ISD::SIGN_EXTEND_INREG: {
     EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
-    unsigned EBits = EVT.getScalarType().getSizeInBits();
+    unsigned EBits = EVT.getScalarSizeInBits();
 
     // Sign extension.  Compute the demanded bits in the result that are not
     // present in the input.
@@ -2220,7 +2402,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     if (NewBits.getBoolValue())
       InputDemandedBits |= InSignBit;
 
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
     KnownOne &= InputDemandedBits;
     KnownZero &= InputDemandedBits;
 
@@ -2253,7 +2436,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // If this is a ZEXTLoad and we are looking at the loaded value.
     if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
       EVT VT = LD->getMemoryVT();
-      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      unsigned MemBits = VT.getScalarSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
     } else if (const MDNode *Ranges = LD->getRanges()) {
       if (LD->getExtensionType() == ISD::NON_EXTLOAD)
@@ -2263,11 +2446,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::ZERO_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
     KnownZero = KnownZero.zext(BitWidth);
     KnownOne = KnownOne.zext(BitWidth);
     KnownZero |= NewBits;
@@ -2275,30 +2459,22 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
-    APInt NewBits   = APInt::getHighBitsSet(BitWidth, BitWidth - InBits);
+    unsigned InBits = InVT.getScalarSizeInBits();
 
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-
-    // Note if the sign bit is known to be zero or one.
-    bool SignBitKnownZero = KnownZero.isNegative();
-    bool SignBitKnownOne  = KnownOne.isNegative();
-
-    KnownZero = KnownZero.zext(BitWidth);
-    KnownOne = KnownOne.zext(BitWidth);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
 
-    // If the sign bit is known zero or one, the top bits match.
-    if (SignBitKnownZero)
-      KnownZero |= NewBits;
-    else if (SignBitKnownOne)
-      KnownOne  |= NewBits;
+    // If the sign bit is known to be zero or one, then sext will extend
+    // it to the top bits, else it will just zext.
+    KnownZero = KnownZero.sext(BitWidth);
+    KnownOne = KnownOne.sext(BitWidth);
     break;
   }
   case ISD::ANY_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     KnownZero = KnownZero.trunc(InBits);
     KnownOne = KnownOne.trunc(InBits);
     computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
@@ -2308,10 +2484,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::TRUNCATE: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     KnownZero = KnownZero.zext(InBits);
     KnownOne = KnownOne.zext(InBits);
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
     KnownZero = KnownZero.trunc(BitWidth);
     KnownOne = KnownOne.trunc(BitWidth);
     break;
@@ -2330,7 +2507,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
 
   case ISD::SUB: {
-    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0))) {
+    if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
       // We know that the top bits of C-X are clear if X contains less bits
       // than C (i.e. no wrap-around can happen).  For example, 20-X is
       // positive if we can prove that X is >= 0 and < 16.
@@ -2338,7 +2515,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
         unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
         // NLZ can't be BitWidth with no sign bit
         APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
-        computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+        computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                         Depth + 1);
 
         // If all of the MaskV bits are known to be zero, then we know the
         // output top bits are zero, because we now know that the output is
@@ -2350,8 +2528,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
         }
       }
     }
+    LLVM_FALLTHROUGH;
   }
-  // fall through
   case ISD::ADD:
   case ISD::ADDE: {
     // Output known-0 bits are known if clear or set in both the low clear bits
@@ -2361,17 +2539,19 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     // known to be clear. For example, if one input has the top 10 bits clear
     // and the other has the top 8 bits clear, we know the top 7 bits of the
     // output must be clear.
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     unsigned KnownZeroHigh = KnownZero2.countLeadingOnes();
     unsigned KnownZeroLow = KnownZero2.countTrailingOnes();
 
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     KnownZeroHigh = std::min(KnownZeroHigh,
                              KnownZero2.countLeadingOnes());
     KnownZeroLow = std::min(KnownZeroLow,
                             KnownZero2.countTrailingOnes());
 
-    if (Op.getOpcode() == ISD::ADD) {
+    if (Opcode == ISD::ADD) {
       KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroLow);
       if (KnownZeroHigh > 1)
         KnownZero |= APInt::getHighBitsSet(BitWidth, KnownZeroHigh - 1);
@@ -2387,11 +2567,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
   }
   case ISD::SREM:
-    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
       const APInt &RA = Rem->getAPIntValue().abs();
       if (RA.isPowerOf2()) {
         APInt LowBits = RA - 1;
-        computeKnownBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1);
+        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                         Depth + 1);
 
         // The low bits of the first operand are unchanged by the srem.
         KnownZero = KnownZero2 & LowBits;
@@ -2411,11 +2592,12 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     }
     break;
   case ISD::UREM: {
-    if (ConstantSDNode *Rem = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *Rem = isConstOrConstSplat(Op.getOperand(1))) {
       const APInt &RA = Rem->getAPIntValue();
       if (RA.isPowerOf2()) {
         APInt LowBits = (RA - 1);
-        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1);
+        computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                         Depth + 1);
 
         // The upper bits are all zero, the lower ones are unchanged.
         KnownZero = KnownZero2 | ~LowBits;
@@ -2426,8 +2608,10 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
 
     // Since the result is less than or equal to either operand, any leading
     // zero bits in either operand must also exist in the result.
-    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
 
     uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
                                 KnownZero2.countLeadingOnes());
@@ -2437,9 +2621,8 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   }
   case ISD::EXTRACT_ELEMENT: {
     computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1);
-    const unsigned Index =
-      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-    const unsigned BitWidth = Op.getValueType().getSizeInBits();
+    const unsigned Index = Op.getConstantOperandVal(1);
+    const unsigned BitWidth = Op.getValueSizeInBits();
 
     // Remove low part of known bits mask
     KnownZero = KnownZero.getHiBits(KnownZero.getBitWidth() - Index * BitWidth);
@@ -2450,8 +2633,74 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     KnownOne = KnownOne.trunc(BitWidth);
     break;
   }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    SDValue InVec = Op.getOperand(0);
+    SDValue EltNo = Op.getOperand(1);
+    EVT VecVT = InVec.getValueType();
+    const unsigned BitWidth = Op.getValueSizeInBits();
+    const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
+    const unsigned NumSrcElts = VecVT.getVectorNumElements();
+    // If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
+    // anything about the extended bits.
+    if (BitWidth > EltBitWidth) {
+      KnownZero = KnownZero.trunc(EltBitWidth);
+      KnownOne = KnownOne.trunc(EltBitWidth);
+    }
+    ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
+    if (ConstEltNo && ConstEltNo->getAPIntValue().ult(NumSrcElts)) {
+      // If we know the element index, just demand that vector element.
+      unsigned Idx = ConstEltNo->getZExtValue();
+      APInt DemandedElt = APInt::getOneBitSet(NumSrcElts, Idx);
+      computeKnownBits(InVec, KnownZero, KnownOne, DemandedElt, Depth + 1);
+    } else {
+      // Unknown element index, so ignore DemandedElts and demand them all.
+      computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+    }
+    if (BitWidth > EltBitWidth) {
+      KnownZero = KnownZero.zext(BitWidth);
+      KnownOne = KnownOne.zext(BitWidth);
+    }
+    break;
+  }
+  case ISD::INSERT_VECTOR_ELT: {
+    SDValue InVec = Op.getOperand(0);
+    SDValue InVal = Op.getOperand(1);
+    SDValue EltNo = Op.getOperand(2);
+
+    ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
+    if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
+      // If we know the element index, split the demand between the
+      // source vector and the inserted element.
+      KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+      unsigned EltIdx = CEltNo->getZExtValue();
+
+      // If we demand the inserted element then add its common known bits.
+      if (DemandedElts[EltIdx]) {
+        computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
+        KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
+        KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+      }
+
+      // If we demand the source vector then add its common known bits, ensuring
+      // that we don't demand the inserted element.
+      APInt VectorElts = DemandedElts & ~(APInt::getOneBitSet(NumElts, EltIdx));
+      if (!!VectorElts) {
+        computeKnownBits(InVec, KnownZero2, KnownOne2, VectorElts, Depth + 1);
+        KnownOne &= KnownOne2;
+        KnownZero &= KnownZero2;
+      }
+    } else {
+      // Unknown element index, so ignore DemandedElts and demand them all.
+      computeKnownBits(InVec, KnownZero, KnownOne, Depth + 1);
+      computeKnownBits(InVal, KnownZero2, KnownOne2, Depth + 1);
+      KnownOne &= KnownOne2.zextOrTrunc(KnownOne.getBitWidth());
+      KnownZero &= KnownZero2.zextOrTrunc(KnownZero.getBitWidth());;
+    }
+    break;
+  }
   case ISD::BSWAP: {
-    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth+1);
+    computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
     KnownZero = KnownZero2.byteSwap();
     KnownOne = KnownOne2.byteSwap();
     break;
@@ -2460,13 +2709,15 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
   case ISD::SMAX:
   case ISD::UMIN:
   case ISD::UMAX: {
-    APInt Op0Zero, Op0One;
-    APInt Op1Zero, Op1One;
-    computeKnownBits(Op.getOperand(0), Op0Zero, Op0One, Depth);
-    computeKnownBits(Op.getOperand(1), Op1Zero, Op1One, Depth);
-
-    KnownZero = Op0Zero & Op1Zero;
-    KnownOne = Op0One & Op1One;
+    computeKnownBits(Op.getOperand(0), KnownZero, KnownOne, DemandedElts,
+                     Depth + 1);
+    // If we don't know any bits, early out.
+    if (!KnownOne && !KnownZero)
+      break;
+    computeKnownBits(Op.getOperand(1), KnownZero2, KnownOne2, DemandedElts,
+                     Depth + 1);
+    KnownZero &= KnownZero2;
+    KnownOne &= KnownOne2;
     break;
   }
   case ISD::FrameIndex:
@@ -2479,9 +2730,9 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
     break;
 
   default:
-    if (Op.getOpcode() < ISD::BUILTIN_OP_END)
+    if (Opcode < ISD::BUILTIN_OP_END)
       break;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case ISD::INTRINSIC_WO_CHAIN:
   case ISD::INTRINSIC_W_CHAIN:
   case ISD::INTRINSIC_VOID:
@@ -2494,6 +2745,13 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero,
 }
 
 bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
+  EVT OpVT = Val.getValueType();
+  unsigned BitWidth = OpVT.getScalarSizeInBits();
+
+  // Is the constant a known power of 2?
+  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(Val))
+    return Const->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+
   // A left-shift of a constant one will have exactly one bit set because
   // shifting the bit off the end is undefined.
   if (Val.getOpcode() == ISD::SHL) {
@@ -2510,12 +2768,19 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
       return true;
   }
 
+  // Are all operands of a build vector constant powers of two?
+  if (Val.getOpcode() == ISD::BUILD_VECTOR)
+    if (llvm::all_of(Val->ops(), [this, BitWidth](SDValue E) {
+          if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(E))
+            return C->getAPIntValue().zextOrTrunc(BitWidth).isPowerOf2();
+          return false;
+        }))
+      return true;
+
   // More could be done here, though the above checks are enough
   // to handle some common cases.
 
   // Fall back to computeKnownBits to catch other known cases.
-  EVT OpVT = Val.getValueType();
-  unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
   APInt KnownZero, KnownOne;
   computeKnownBits(Val, KnownZero, KnownOne);
   return (KnownZero.countPopulation() == BitWidth - 1) &&
@@ -2525,7 +2790,7 @@ bool SelectionDAG::isKnownToBeAPowerOfTwo(SDValue Val) const {
 unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
   EVT VT = Op.getValueType();
   assert(VT.isInteger() && "Invalid VT!");
-  unsigned VTBits = VT.getScalarType().getSizeInBits();
+  unsigned VTBits = VT.getScalarSizeInBits();
   unsigned Tmp, Tmp2;
   unsigned FirstAnswer = 1;
 
@@ -2547,14 +2812,12 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
   }
 
   case ISD::SIGN_EXTEND:
-    Tmp =
-        VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    Tmp = VTBits - Op.getOperand(0).getScalarValueSizeInBits();
     return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
 
   case ISD::SIGN_EXTEND_INREG:
     // Max of the input and what this extends.
-    Tmp =
-      cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarType().getSizeInBits();
+    Tmp = cast<VTSDNode>(Op.getOperand(1))->getVT().getScalarSizeInBits();
     Tmp = VTBits-Tmp+1;
 
     Tmp2 = ComputeNumSignBits(Op.getOperand(0), Depth+1);
@@ -2563,17 +2826,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
   case ISD::SRA:
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     // SRA X, C   -> adds C sign bits.
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-      Tmp += C->getZExtValue();
-      if (Tmp > VTBits) Tmp = VTBits;
+    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
+      APInt ShiftVal = C->getAPIntValue();
+      ShiftVal += Tmp;
+      Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
     }
     return Tmp;
   case ISD::SHL:
-    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(1))) {
       // shl destroys sign bits.
       Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
-      if (C->getZExtValue() >= VTBits ||      // Bad shift.
-          C->getZExtValue() >= Tmp) break;    // Shifted all sign bits out.
+      if (C->getAPIntValue().uge(VTBits) ||      // Bad shift.
+          C->getAPIntValue().uge(Tmp)) break;    // Shifted all sign bits out.
       return Tmp - C->getZExtValue();
     }
     break;
@@ -2679,7 +2943,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
     if (Tmp2 == 1) return 1;
 
     // Handle NEG.
-    if (ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(Op.getOperand(0)))
+    if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0)))
       if (CLHS->isNullValue()) {
         APInt KnownZero, KnownOne;
         computeKnownBits(Op.getOperand(1), KnownZero, KnownOne, Depth+1);
@@ -2701,25 +2965,50 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
     Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
     if (Tmp == 1) return 1;  // Early out.
     return std::min(Tmp, Tmp2)-1;
-  case ISD::TRUNCATE:
-    // FIXME: it's tricky to do anything useful for this, but it is an important
-    // case for targets like X86.
+  case ISD::TRUNCATE: {
+    // Check if the sign bits of source go down as far as the truncated value.
+    unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
+    unsigned NumSrcSignBits = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    if (NumSrcSignBits > (NumSrcBits - VTBits))
+      return NumSrcSignBits - (NumSrcBits - VTBits);
     break;
+  }
   case ISD::EXTRACT_ELEMENT: {
     const int KnownSign = ComputeNumSignBits(Op.getOperand(0), Depth+1);
-    const int BitWidth = Op.getValueType().getSizeInBits();
-    const int Items =
-      Op.getOperand(0).getValueType().getSizeInBits() / BitWidth;
+    const int BitWidth = Op.getValueSizeInBits();
+    const int Items = Op.getOperand(0).getValueSizeInBits() / BitWidth;
 
     // Get reverse index (starting from 1), Op1 value indexes elements from
     // little end. Sign starts at big end.
-    const int rIndex = Items - 1 -
-      cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+    const int rIndex = Items - 1 - Op.getConstantOperandVal(1);
 
     // If the sign portion ends in our element the subtraction gives correct
     // result. Otherwise it gives either negative or > bitwidth result
     return std::max(std::min(KnownSign - rIndex * BitWidth, BitWidth), 0);
   }
+  case ISD::EXTRACT_VECTOR_ELT: {
+    // At the moment we keep this simple and skip tracking the specific
+    // element. This way we get the lowest common denominator for all elements
+    // of the vector.
+    // TODO: get information for given vector element
+    const unsigned BitWidth = Op.getValueSizeInBits();
+    const unsigned EltBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
+    // If BitWidth > EltBitWidth the value is anyext:ed, and we do not know
+    // anything about sign bits. But if the sizes match we can derive knowledge
+    // about sign bits from the vector operand.
+    if (BitWidth == EltBitWidth)
+      return ComputeNumSignBits(Op.getOperand(0), Depth+1);
+    break;
+  }
+  case ISD::EXTRACT_SUBVECTOR:
+    return ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+  case ISD::CONCAT_VECTORS:
+    // Determine the minimum number of sign bits across all input vectors.
+    // Early out if the result is already 1.
+    Tmp = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    for (unsigned i = 1, e = Op.getNumOperands(); (i < e) && (Tmp > 1); ++i)
+      Tmp = std::min(Tmp, ComputeNumSignBits(Op.getOperand(i), Depth + 1));
+    return Tmp;
   }
 
   // If we are looking at the loaded value of the SDNode.
@@ -2730,10 +3019,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const {
       switch (ExtType) {
         default: break;
         case ISD::SEXTLOAD:    // '17' bits known
-          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          Tmp = LD->getMemoryVT().getScalarSizeInBits();
           return VTBits-Tmp+1;
         case ISD::ZEXTLOAD:    // '16' bits known
-          Tmp = LD->getMemoryVT().getScalarType().getSizeInBits();
+          Tmp = LD->getMemoryVT().getScalarSizeInBits();
           return VTBits-Tmp;
       }
     }
@@ -2842,6 +3131,16 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
 static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                                   ArrayRef<SDValue> Ops,
                                   llvm::SelectionDAG &DAG) {
+  assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
+  assert(llvm::all_of(Ops,
+                      [Ops](SDValue Op) {
+                        return Ops[0].getValueType() == Op.getValueType();
+                      }) &&
+         "Concatenation of vectors with inconsistent value types!");
+  assert((Ops.size() * Ops[0].getValueType().getVectorNumElements()) ==
+             VT.getVectorNumElements() &&
+         "Incorrect element count in vector concatenation!");
+
   if (Ops.size() == 1)
     return Ops[0];
 
@@ -2875,7 +3174,7 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
                ? DAG.getZExtOrTrunc(Op, DL, SVT)
                : DAG.getSExtOrTrunc(Op, DL, SVT);
 
-  return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts);
+  return DAG.getBuildVector(VT, DL, Elts);
 }
 
 /// Gets or creates the specified node.
@@ -2924,13 +3223,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     }
     case ISD::BITCAST:
       if (VT == MVT::f16 && C->getValueType(0) == MVT::i16)
-        return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEhalf(), Val), DL, VT);
       if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
-        return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEsingle(), Val), DL, VT);
       if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
-        return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEdouble(), Val), DL, VT);
       if (VT == MVT::f128 && C->getValueType(0) == MVT::i128)
-        return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT);
+        return getConstantFP(APFloat(APFloat::IEEEquad(), Val), DL, VT);
       break;
     case ISD::BSWAP:
       return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(),
@@ -3162,8 +3461,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   case ISD::BITCAST:
     // Basic sanity checking.
-    assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
-           && "Cannot BITCAST between types of different sizes!");
+    assert(VT.getSizeInBits() == Operand.getValueSizeInBits() &&
+           "Cannot BITCAST between types of different sizes!");
     if (VT == Operand.getValueType()) return Operand;  // noop conversion.
     if (OpOpcode == ISD::BITCAST)  // bitconv(bitconv(x)) -> bitconv(x)
       return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
@@ -3333,25 +3632,22 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   EVT SVT = VT.getScalarType();
   SmallVector<SDValue, 4> Outputs;
   for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) {
-    ConstantSDNode *V1 = dyn_cast<ConstantSDNode>(BV1->getOperand(I));
-    ConstantSDNode *V2 = dyn_cast<ConstantSDNode>(BV2->getOperand(I));
-    if (!V1 || !V2) // Not a constant, bail.
-      return SDValue();
-
-    if (V1->isOpaque() || V2->isOpaque())
-      return SDValue();
+    SDValue V1 = BV1->getOperand(I);
+    SDValue V2 = BV2->getOperand(I);
 
     // Avoid BUILD_VECTOR nodes that perform implicit truncation.
-    // FIXME: This is valid and could be handled by truncating the APInts.
+    // FIXME: This is valid and could be handled by truncation.
     if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT)
       return SDValue();
 
     // Fold one vector element.
-    std::pair<APInt, bool> Folded = FoldValue(Opcode, V1->getAPIntValue(),
-                                              V2->getAPIntValue());
-    if (!Folded.second)
+    SDValue ScalarResult = getNode(Opcode, DL, SVT, V1, V2);
+
+    // Scalar folding only succeeded if the result is a constant or UNDEF.
+    if (!ScalarResult.isUndef() && ScalarResult.getOpcode() != ISD::Constant &&
+        ScalarResult.getOpcode() != ISD::ConstantFP)
       return SDValue();
-    Outputs.push_back(getConstant(Folded.first, DL, SVT));
+    Outputs.push_back(ScalarResult);
   }
 
   assert(VT.getVectorNumElements() == Outputs.size() &&
@@ -3394,8 +3690,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
   // All operands must be vector types with the same number of elements as
   // the result type and must be either UNDEF or a build vector of constant
   // or UNDEF scalars.
-  if (!std::all_of(Ops.begin(), Ops.end(), IsConstantBuildVectorOrUndef) ||
-      !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize))
+  if (!all_of(Ops, IsConstantBuildVectorOrUndef) ||
+      !all_of(Ops, IsScalarOrSameVectorSize))
     return SDValue();
 
   // If we are comparing vectors, then the result needs to be a i1 boolean
@@ -3577,8 +3873,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     // amounts.  This catches things like trying to shift an i1024 value by an
     // i8, which is easy to fall into in generic code that uses
     // TLI.getShiftAmount().
-    assert(N2.getValueType().getSizeInBits() >=
-                   Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+    assert(N2.getValueSizeInBits() >= Log2_32_Ceil(N1.getValueSizeInBits()) &&
            "Invalid use of small shift amount with oversized value!");
 
     // Always fold shifts of i1 values so the code generator doesn't need to
@@ -3609,7 +3904,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     assert(VT.isFloatingPoint() &&
            N1.getValueType().isFloatingPoint() &&
            VT.bitsLE(N1.getValueType()) &&
-           N2C && "Invalid FP_ROUND!");
+           N2C && (N2C->getZExtValue() == 0 || N2C->getZExtValue() == 1) &&
+           "Invalid FP_ROUND!");
     if (N1.getValueType() == VT) return N1;  // noop conversion.
     break;
   case ISD::AssertSext:
@@ -3640,7 +3936,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     if (EVT == VT) return N1;  // Not actually extending
 
     auto SignExtendInReg = [&](APInt Val) {
-      unsigned FromBits = EVT.getScalarType().getSizeInBits();
+      unsigned FromBits = EVT.getScalarSizeInBits();
       Val <<= Val.getBitWidth() - FromBits;
       Val = Val.ashr(Val.getBitWidth() - FromBits);
       return getConstant(Val, DL, VT.getScalarType());
@@ -3768,6 +4064,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
       // Trivial extraction.
       if (VT.getSimpleVT() == N1.getSimpleValueType())
         return N1;
+
+      // EXTRACT_SUBVECTOR of INSERT_SUBVECTOR is often created
+      // during shuffle legalization.
+      if (N1.getOpcode() == ISD::INSERT_SUBVECTOR && N2 == N1.getOperand(2) &&
+          VT == N1.getOperand(1).getValueType())
+        return N1.getOperand(1);
     }
     break;
   }
@@ -3868,7 +4170,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
         // Handle undef ^ undef -> 0 special case. This is a common
         // idiom (misuse).
         return getConstant(0, DL, VT);
-      // fallthrough
+      LLVM_FALLTHROUGH;
     case ISD::ADD:
     case ISD::ADDC:
     case ISD::ADDE:
@@ -3977,6 +4279,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
     break;
   case ISD::VECTOR_SHUFFLE:
     llvm_unreachable("should use getVectorShuffle constructor!");
+  case ISD::INSERT_VECTOR_ELT: {
+    ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3);
+    // INSERT_VECTOR_ELT into out-of-bounds element is an UNDEF
+    if (N3C && N3C->getZExtValue() >= N1.getValueType().getVectorNumElements())
+      return getUNDEF(VT);
+    break;
+  }
   case ISD::INSERT_SUBVECTOR: {
     SDValue Index = N3;
     if (VT.isSimple() && N1.getValueType().isSimple()
@@ -4072,7 +4381,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
                               const SDLoc &dl) {
   assert(!Value.isUndef());
 
-  unsigned NumBits = VT.getScalarType().getSizeInBits();
+  unsigned NumBits = VT.getScalarSizeInBits();
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
     assert(C->getAPIntValue().getBitWidth() == 8);
     APInt Val = APInt::getSplat(NumBits, C->getAPIntValue());
@@ -4306,10 +4615,10 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
-  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+  if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
@@ -4342,8 +4651,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
 
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
-      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
-        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI.setObjectAlignment(FI->getIndex(), NewAlign);
       Align = NewAlign;
     }
   }
@@ -4422,10 +4731,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
-  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+  if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   unsigned SrcAlign = DAG.InferPtrAlignment(Src);
   if (Align > SrcAlign)
@@ -4445,8 +4754,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
     unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
-      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
-        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI.setObjectAlignment(FI->getIndex(), NewAlign);
       Align = NewAlign;
     }
   }
@@ -4519,10 +4828,10 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
   std::vector<EVT> MemOps;
   bool DstAlignCanChange = false;
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool OptSize = shouldLowerMemFuncForSize(MF);
   FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
-  if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
+  if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
     DstAlignCanChange = true;
   bool IsZeroVal =
     isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
@@ -4538,8 +4847,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
     unsigned NewAlign = (unsigned)DAG.getDataLayout().getABITypeAlignment(Ty);
     if (NewAlign > Align) {
       // Give the stack frame object a larger alignment if needed.
-      if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign)
-        MFI->setObjectAlignment(FI->getIndex(), NewAlign);
+      if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
+        MFI.setObjectAlignment(FI->getIndex(), NewAlign);
       Align = NewAlign;
     }
   }
@@ -4796,10 +5105,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 SDVTList VTList, ArrayRef<SDValue> Ops,
-                                MachineMemOperand *MMO,
-                                AtomicOrdering SuccessOrdering,
-                                AtomicOrdering FailureOrdering,
-                                SynchronizationScope SynchScope) {
+                                MachineMemOperand *MMO) {
   FoldingSetNodeID ID;
   ID.AddInteger(MemVT.getRawBits());
   AddNodeIDNode(ID, Opcode, VTList, Ops);
@@ -4811,8 +5117,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   }
 
   auto *N = newSDNode<AtomicSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(),
-                                    VTList, MemVT, MMO, SuccessOrdering,
-                                    FailureOrdering, SynchScope);
+                                    VTList, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -4820,14 +5125,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   return SDValue(N, 0);
 }
 
-SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
-                                SDVTList VTList, ArrayRef<SDValue> Ops,
-                                MachineMemOperand *MMO, AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {
-  return getAtomic(Opcode, dl, MemVT, VTList, Ops, MMO, Ordering,
-                   Ordering, SynchScope);
-}
-
 SDValue SelectionDAG::getAtomicCmpSwap(
     unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
     SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
@@ -4847,26 +5144,23 @@ SDValue SelectionDAG::getAtomicCmpSwap(
   auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
                MachineMemOperand::MOStore;
   MachineMemOperand *MMO =
-    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
+    MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+                            AAMDNodes(), nullptr, SynchScope, SuccessOrdering,
+                            FailureOrdering);
 
-  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO,
-                          SuccessOrdering, FailureOrdering, SynchScope);
+  return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
 }
 
 SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
                                        EVT MemVT, SDVTList VTs, SDValue Chain,
                                        SDValue Ptr, SDValue Cmp, SDValue Swp,
-                                       MachineMemOperand *MMO,
-                                       AtomicOrdering SuccessOrdering,
-                                       AtomicOrdering FailureOrdering,
-                                       SynchronizationScope SynchScope) {
+                                       MachineMemOperand *MMO) {
   assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
          Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
   assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
 
   SDValue Ops[] = {Chain, Ptr, Cmp, Swp};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO,
-                   SuccessOrdering, FailureOrdering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
@@ -4892,16 +5186,15 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
 
   MachineMemOperand *MMO =
     MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
-                            MemVT.getStoreSize(), Alignment);
+                            MemVT.getStoreSize(), Alignment, AAMDNodes(),
+                            nullptr, SynchScope, Ordering);
 
-  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO,
-                   Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 SDValue Chain, SDValue Ptr, SDValue Val,
-                                MachineMemOperand *MMO, AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {
+                                MachineMemOperand *MMO) {
   assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
           Opcode == ISD::ATOMIC_LOAD_SUB ||
           Opcode == ISD::ATOMIC_LOAD_AND ||
@@ -4921,18 +5214,17 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
   SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) :
                                                getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr, Val};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
 SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
                                 EVT VT, SDValue Chain, SDValue Ptr,
-                                MachineMemOperand *MMO, AtomicOrdering Ordering,
-                                SynchronizationScope SynchScope) {
+                                MachineMemOperand *MMO) {
   assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op");
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = {Chain, Ptr};
-  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, Ordering, SynchScope);
+  return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO);
 }
 
 /// getMergeValues - Create a MERGE_VALUES node from the given operands.
@@ -5056,7 +5348,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   if (Alignment == 0)  // Ensure that codegen never sees alignment 0
-    Alignment = getEVTAlignment(VT);
+    Alignment = getEVTAlignment(MemVT);
 
   MMOFlags |= MachineMemOperand::MOLoad;
   assert((MMOFlags & MachineMemOperand::MOStore) == 0);
@@ -5101,9 +5393,8 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::LOAD, VTs, Ops);
   ID.AddInteger(MemVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<LoadSDNode>(
+      dl.getIROrder(), VTs, AM, ExtType, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5160,12 +5451,14 @@ SDValue SelectionDAG::getIndexedLoad(SDValue OrigLoad, const SDLoc &dl,
                                      ISD::MemIndexedMode AM) {
   LoadSDNode *LD = cast<LoadSDNode>(OrigLoad);
   assert(LD->getOffset().isUndef() && "Load is already a indexed load!");
-  // Don't propagate the invariant flag.
+  // Don't propagate the invariant or dereferenceable flags.
   auto MMOFlags =
-      LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOInvariant;
+      LD->getMemOperand()->getFlags() &
+      ~(MachineMemOperand::MOInvariant | MachineMemOperand::MODereferenceable);
   return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
                  LD->getChain(), Base, Offset, LD->getPointerInfo(),
-                 LD->getMemoryVT(), LD->getAlignment(), MMOFlags);
+                 LD->getMemoryVT(), LD->getAlignment(), MMOFlags,
+                 LD->getAAInfo());
 }
 
 SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
@@ -5200,8 +5493,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+      dl.getIROrder(), VTs, ISD::UNINDEXED, false, VT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5265,8 +5558,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::STORE, VTs, Ops);
   ID.AddInteger(SVT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<StoreSDNode>(
+      dl.getIROrder(), VTs, ISD::UNINDEXED, true, SVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5311,17 +5604,15 @@ SDValue SelectionDAG::getIndexedStore(SDValue OrigStore, const SDLoc &dl,
 SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
                                     SDValue Ptr, SDValue Mask, SDValue Src0,
                                     EVT MemVT, MachineMemOperand *MMO,
-                                    ISD::LoadExtType ExtTy) {
+                                    ISD::LoadExtType ExtTy, bool isExpanding) {
 
   SDVTList VTs = getVTList(VT, MVT::Other);
   SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ExtTy, ISD::UNINDEXED,
-                                     MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
+      dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5329,7 +5620,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
     return SDValue(E, 0);
   }
   auto *N = newSDNode<MaskedLoadSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
-                                        ExtTy, MemVT, MMO);
+                                        ExtTy, isExpanding, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -5340,7 +5631,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
 SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
                                      SDValue Val, SDValue Ptr, SDValue Mask,
                                      EVT MemVT, MachineMemOperand *MMO,
-                                     bool isTrunc) {
+                                     bool IsTruncating, bool IsCompressing) {
   assert(Chain.getValueType() == MVT::Other &&
         "Invalid chain type");
   EVT VT = Val.getValueType();
@@ -5349,8 +5640,8 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(), MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
+      dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5358,7 +5649,7 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
     return SDValue(E, 0);
   }
   auto *N = newSDNode<MaskedStoreSDNode>(dl.getIROrder(), dl.getDebugLoc(), VTs,
-                                         isTrunc, MemVT, MMO);
+                                         IsTruncating, IsCompressing, MemVT, MMO);
   createOperands(N, Ops);
 
   CSEMap.InsertNode(N, IP);
@@ -5374,10 +5665,8 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(ISD::NON_EXTLOAD, ISD::UNINDEXED,
-                                     MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
+      dl.getIROrder(), VTs, VT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5411,9 +5700,8 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
   ID.AddInteger(VT.getRawBits());
-  ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
-                                     MMO->isNonTemporal(),
-                                     MMO->isInvariant()));
+  ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
+      dl.getIROrder(), VTs, VT, MMO));
   ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
   void *IP = nullptr;
   if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -5545,7 +5833,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, SDVTList VTList,
       if (ConstantSDNode *AndRHS = dyn_cast<ConstantSDNode>(N3.getOperand(1))) {
         // If the and is only masking out bits that cannot effect the shift,
         // eliminate the and.
-        unsigned NumBits = VT.getScalarType().getSizeInBits()*2;
+        unsigned NumBits = VT.getScalarSizeInBits()*2;
         if ((AndRHS->getValue() & (NumBits-1)) == NumBits-1)
           return getNode(Opcode, DL, VT, N1, N2, N3.getOperand(0));
       }
@@ -5870,21 +6158,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2, EVT VT3, EVT VT4,
-                                   ArrayRef<SDValue> Ops) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2,
-                                   SDValue Op1) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1 };
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
                                    EVT VT1, EVT VT2,
                                    SDValue Op1, SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -5893,24 +6166,6 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
 }
 
 SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2,
-                                   SDValue Op1, SDValue Op2,
-                                   SDValue Op3) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
-                                   EVT VT1, EVT VT2, EVT VT3,
-                                   SDValue Op1, SDValue Op2,
-                                   SDValue Op3) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3);
-  SDValue Ops[] = { Op1, Op2, Op3 };
-  return SelectNodeTo(N, MachineOpc, VTs, Ops);
-}
-
-SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
                                    SDVTList VTs,ArrayRef<SDValue> Ops) {
   SDNode *New = MorphNodeTo(N, ~MachineOpc, VTs, Ops);
   // Reset the NodeID to -1.
@@ -5922,14 +6177,14 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
   return New;
 }
 
-/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away
+/// UpdateSDLocOnMergeSDNode - If the opt level is -O0 then it throws away
 /// the line number information on the merged node since it is not possible to
 /// preserve the information that operation is associated with multiple lines.
 /// This will make the debugger working better at -O0, were there is a higher
 /// probability having other instructions associated with that line.
 ///
 /// For IROrder, we keep the smaller of the two
-SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, const SDLoc &OLoc) {
+SDNode *SelectionDAG::UpdateSDLocOnMergeSDNode(SDNode *N, const SDLoc &OLoc) {
   DebugLoc NLoc = N->getDebugLoc();
   if (NLoc && OptLevel == CodeGenOpt::None && OLoc.getDebugLoc() != NLoc) {
     N->setDebugLoc(DebugLoc());
@@ -5963,7 +6218,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
     FoldingSetNodeID ID;
     AddNodeIDNode(ID, Opc, VTs, Ops);
     if (SDNode *ON = FindNodeOrInsertPos(ID, SDLoc(N), IP))
-      return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N));
+      return UpdateSDLocOnMergeSDNode(ON, SDLoc(N));
   }
 
   if (!RemoveNodeFromCSEMaps(N))
@@ -6050,19 +6305,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
 }
 
 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
-                                            EVT VT1, EVT VT2) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  return getMachineNode(Opcode, dl, VTs, None);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
-                                            EVT VT1, EVT VT2, SDValue Op1) {
-  SDVTList VTs = getVTList(VT1, VT2);
-  SDValue Ops[] = { Op1 };
-  return getMachineNode(Opcode, dl, VTs, Ops);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
                                             EVT VT1, EVT VT2, SDValue Op1,
                                             SDValue Op2) {
   SDVTList VTs = getVTList(VT1, VT2);
@@ -6110,13 +6352,6 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
 }
 
 MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
-                                            EVT VT1, EVT VT2, EVT VT3, EVT VT4,
-                                            ArrayRef<SDValue> Ops) {
-  SDVTList VTs = getVTList(VT1, VT2, VT3, VT4);
-  return getMachineNode(Opcode, dl, VTs, Ops);
-}
-
-MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &dl,
                                             ArrayRef<EVT> ResultTys,
                                             ArrayRef<SDValue> Ops) {
   SDVTList VTs = getVTList(ResultTys);
@@ -6135,7 +6370,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
     AddNodeIDNode(ID, ~Opcode, VTs, Ops);
     IP = nullptr;
     if (SDNode *E = FindNodeOrInsertPos(ID, DL, IP)) {
-      return cast<MachineSDNode>(UpdadeSDLocOnMergedSDNode(E, DL));
+      return cast<MachineSDNode>(UpdateSDLocOnMergeSDNode(E, DL));
     }
   }
 
@@ -6255,6 +6490,9 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
          "Cannot replace with this method!");
   assert(From != To.getNode() && "Cannot replace uses of with self");
 
+  // Preserve Debug Values
+  TransferDbgValues(FromN, To);
+
   // Iterate over all the existing uses of From. New uses will be added
   // to the beginning of the use list, which we avoid visiting.
   // This specifically avoids visiting uses of From that arise while the
@@ -6285,8 +6523,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
     AddModifiedNodeToCSEMaps(User);
   }
 
-  // Preserve Debug Values
-  TransferDbgValues(FromN, To);
 
   // If we just RAUW'd the root, take note.
   if (FromN == getRoot())
@@ -6689,6 +6925,40 @@ bool llvm::isBitwiseNot(SDValue V) {
   return V.getOpcode() == ISD::XOR && isAllOnesConstant(V.getOperand(1));
 }
 
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N) {
+  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
+
+    // BuildVectors can truncate their operands. Ignore that case here.
+    // FIXME: We blindly ignore splats which include undef which is overly
+    // pessimistic.
+    if (CN && UndefElements.none() &&
+        CN->getValueType(0) == N.getValueType().getScalarType())
+      return CN;
+  }
+
+  return nullptr;
+}
+
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N) {
+  if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+    return CN;
+
+  if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+    BitVector UndefElements;
+    ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
+
+    if (CN && UndefElements.none())
+      return CN;
+  }
+
+  return nullptr;
+}
+
 HandleSDNode::~HandleSDNode() {
   DropOperands();
 }
@@ -6710,11 +6980,11 @@ AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl,
 MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
                      SDVTList VTs, EVT memvt, MachineMemOperand *mmo)
     : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) {
-  SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
-                                      MMO->isNonTemporal(), MMO->isInvariant());
-  assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
-  assert(isNonTemporal() == MMO->isNonTemporal() &&
-         "Non-temporal encoding error!");
+  MemSDNodeBits.IsVolatile = MMO->isVolatile();
+  MemSDNodeBits.IsNonTemporal = MMO->isNonTemporal();
+  MemSDNodeBits.IsDereferenceable = MMO->isDereferenceable();
+  MemSDNodeBits.IsInvariant = MMO->isInvariant();
+
   // We check here that the size of the memory operand fits within the size of
   // the MMO. This is because the MMO might indicate only a possible address
   // range instead of specifying the affected memory addresses precisely.
@@ -6939,8 +7209,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
   for (; i < ResNE; ++i)
     Scalars.push_back(getUNDEF(EltVT));
 
-  return getNode(ISD::BUILD_VECTOR, dl,
-                 EVT::getVectorVT(*getContext(), EltVT, ResNE), Scalars);
+  EVT VecVT = EVT::getVectorVT(*getContext(), EltVT, ResNE);
+  return getBuildVector(VecVT, dl, Scalars);
 }
 
 bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
@@ -6962,13 +7232,13 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
   if (Loc.getOpcode() == ISD::FrameIndex) {
     if (BaseLoc.getOpcode() != ISD::FrameIndex)
       return false;
-    const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+    const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
-    int FS  = MFI->getObjectSize(FI);
-    int BFS = MFI->getObjectSize(BFI);
+    int FS  = MFI.getObjectSize(FI);
+    int BFS = MFI.getObjectSize(BFI);
     if (FS != BFS || FS != (int)Bytes) return false;
-    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+    return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
   }
 
   // Handle X + C.
@@ -7033,7 +7303,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
   }
 
   if (FrameIdx != (1 << 31)) {
-    const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+    const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
     unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
                                     FrameOffset);
     return FIInfoAlign;
@@ -7124,7 +7394,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
   // false.
   unsigned int nOps = getNumOperands();
   assert(nOps > 0 && "isConstantSplat has 0-size build vector");
-  unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
+  unsigned EltBitSize = VT.getScalarSizeInBits();
 
   for (unsigned j = 0; j < nOps; ++j) {
     unsigned i = isBigEndian ? nOps-1-j : j;
@@ -7265,6 +7535,16 @@ SDNode *SelectionDAG::isConstantIntBuildVectorOrConstantInt(SDValue N) {
   return nullptr;
 }
 
+SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
+  if (isa<ConstantFPSDNode>(N))
+    return N.getNode();
+
+  if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
+    return N.getNode();
+
+  return nullptr;
+}
+
 #ifndef NDEBUG
 static void checkForCyclesHelper(const SDNode *N,
                                  SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e1fc37d2dd8c..cb803585282f 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -183,7 +183,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
         Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
         Hi =
             DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
-                        DAG.getConstant(Lo.getValueType().getSizeInBits(), DL,
+                        DAG.getConstant(Lo.getValueSizeInBits(), DL,
                                         TLI.getPointerTy(DAG.getDataLayout())));
         Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
         Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
@@ -833,8 +833,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
 
       if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
         // If we clobbered the stack pointer, MFI should know about it.
-        assert(DAG.getMachineFunction().getFrameInfo()->
-            hasOpaqueSPAdjustment());
+        assert(DAG.getMachineFunction().getFrameInfo().hasOpaqueSPAdjustment());
       }
     }
   }
@@ -931,46 +930,9 @@ SDValue SelectionDAGBuilder::getControlRoot() {
   return Root;
 }
 
-/// Copy swift error to the final virtual register at end of a basic block, as
-/// specified by SwiftErrorWorklist, if necessary.
-static void copySwiftErrorsToFinalVRegs(SelectionDAGBuilder &SDB) {
-  const TargetLowering &TLI = SDB.DAG.getTargetLoweringInfo();
-  if (!TLI.supportSwiftError())
-    return;
-
-  if (!SDB.FuncInfo.SwiftErrorWorklist.count(SDB.FuncInfo.MBB))
-    return;
-
-  // Go through entries in SwiftErrorWorklist, and create copy as necessary.
-  FunctionLoweringInfo::SwiftErrorVRegs &WorklistEntry =
-      SDB.FuncInfo.SwiftErrorWorklist[SDB.FuncInfo.MBB];
-  FunctionLoweringInfo::SwiftErrorVRegs &MapEntry =
-      SDB.FuncInfo.SwiftErrorMap[SDB.FuncInfo.MBB];
-  for (unsigned I = 0, E = WorklistEntry.size(); I < E; I++) {
-    unsigned WorkReg = WorklistEntry[I];
-
-    // Find the swifterror virtual register for the value in SwiftErrorMap.
-    unsigned MapReg = MapEntry[I];
-    assert(TargetRegisterInfo::isVirtualRegister(MapReg) &&
-           "Entries in SwiftErrorMap should be virtual registers");
-
-    if (WorkReg == MapReg)
-      continue;
-
-    // Create copy from SwiftErrorMap to SwiftWorklist.
-    auto &DL = SDB.DAG.getDataLayout();
-    SDValue CopyNode = SDB.DAG.getCopyToReg(
-        SDB.getRoot(), SDB.getCurSDLoc(), WorkReg,
-        SDB.DAG.getRegister(MapReg, EVT(TLI.getPointerTy(DL))));
-    MapEntry[I] = WorkReg;
-    SDB.DAG.setRoot(CopyNode);
-  }
-}
-
 void SelectionDAGBuilder::visit(const Instruction &I) {
   // Set up outgoing PHI node register values before emitting the terminator.
   if (isa<TerminatorInst>(&I)) {
-    copySwiftErrorsToFinalVRegs(*this);
     HandlePHINodesInSuccessorBlocks(I.getParent());
   }
 
@@ -1021,8 +983,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
     if (Val.getNode()) {
       if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, Offset, false,
                                     Val)) {
-        SDV = DAG.getDbgValue(Variable, Expr, Val.getNode(), Val.getResNo(),
-                              false, Offset, dl, DbgSDNodeOrder);
+        SDV = getDbgValue(Val, Variable, Expr, Offset, dl, DbgSDNodeOrder);
         DAG.AddDbgValue(SDV, Val.getNode(), false);
       }
     } else
@@ -1491,6 +1452,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
   const Function *F = I.getParent()->getParent();
   if (TLI.supportSwiftError() &&
       F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
+    assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
     ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
     Flags.setSwiftError();
     Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1498,7 +1460,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
                                   true /*isfixed*/, 1 /*origidx*/,
                                   0 /*partOffs*/));
     // Create SDNode for the swifterror virtual register.
-    OutVals.push_back(DAG.getRegister(FuncInfo.SwiftErrorMap[FuncInfo.MBB][0],
+    OutVals.push_back(DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(
+                                          FuncInfo.MBB, FuncInfo.SwiftErrorArg),
                                       EVT(TLI.getPointerTy(DL))));
   }
 
@@ -2012,7 +1975,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
   if (Global) {
     MachinePointerInfo MPInfo(Global);
     MachineInstr::mmo_iterator MemRefs = MF.allocateMemRefsArray(1);
-    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant |
+                 MachineMemOperand::MODereferenceable;
     *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
                                        DAG.getEVTAlignment(PtrTy));
     Node->setMemRefs(MemRefs, MemRefs + 1);
@@ -2033,8 +1997,8 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
 
-  MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo();
-  int FI = MFI->getStackProtectorIndex();
+  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
+  int FI = MFI.getStackProtectorIndex();
 
   SDValue Guard;
   SDLoc dl = getCurSDLoc();
@@ -2329,8 +2293,7 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
          "Call to landingpad not in landing pad!");
 
   MachineBasicBlock *MBB = FuncInfo.MBB;
-  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  AddLandingPadInfo(LP, MMI, MBB);
+  addLandingPadInfo(LP, *MBB);
 
   // If there aren't registers to copy the values into (e.g., during SjLj
   // exceptions), then don't bother to create these DAG nodes.
@@ -2484,7 +2447,7 @@ static bool isVectorReductionOp(const User *I) {
     if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
       if (FPOp->getFastMathFlags().unsafeAlgebra())
         break;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   default:
     return false;
   }
@@ -2639,7 +2602,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
   // Coerce the shift amount to the right type if we can.
   if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
     unsigned ShiftSize = ShiftTy.getSizeInBits();
-    unsigned Op2Size = Op2.getValueType().getSizeInBits();
+    unsigned Op2Size = Op2.getValueSizeInBits();
     SDLoc DL = getCurSDLoc();
 
     // If the operand is smaller than the shift count type, promote it.
@@ -2650,7 +2613,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
     // count type has enough bits to represent any shift value, truncate
     // it now. This is a common case and it exposes the truncate to
     // optimization early.
-    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+    else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
       Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
     // Otherwise we'll need to temporarily settle for some other convenient
     // type.  Type legalization will make adjustments once the shiftee is split.
@@ -2731,7 +2694,7 @@ void SelectionDAGBuilder::visitFCmp(const User &I) {
 // Check if the condition of the select has one use or two users that are both
 // selects with the same condition.
 static bool hasOnlySelectUsers(const Value *Cond) {
-  return std::all_of(Cond->user_begin(), Cond->user_end(), [](const Value *V) {
+  return all_of(Cond->users(), [](const Value *V) {
     return isa<SelectInst>(V);
   });
 }
@@ -2998,6 +2961,7 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   SDValue Src1 = getValue(I.getOperand(0));
   SDValue Src2 = getValue(I.getOperand(1));
+  SDLoc DL = getCurSDLoc();
 
   SmallVector<int, 8> Mask;
   ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
@@ -3009,54 +2973,60 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   unsigned SrcNumElts = SrcVT.getVectorNumElements();
 
   if (SrcNumElts == MaskNumElts) {
-    setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, Mask));
+    setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
     return;
   }
 
   // Normalize the shuffle vector since mask and vector length don't match.
-  if (SrcNumElts < MaskNumElts && MaskNumElts % SrcNumElts == 0) {
-    // Mask is longer than the source vectors and is a multiple of the source
-    // vectors.  We can use concatenate vector to make the mask and vectors
-    // lengths match.
-
-    unsigned NumConcat = MaskNumElts / SrcNumElts;
-
-    // Check if the shuffle is some kind of concatenation of the input vectors.
-    bool IsConcat = true;
-    SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
-    for (unsigned i = 0; i != MaskNumElts; ++i) {
-      int Idx = Mask[i];
-      if (Idx < 0)
-        continue;
-      // Ensure the indices in each SrcVT sized piece are sequential and that
-      // the same source is used for the whole piece.
-      if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
-          (ConcatSrcs[i / SrcNumElts] >= 0 &&
-           ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
-        IsConcat = false;
-        break;
+  if (SrcNumElts < MaskNumElts) {
+    // Mask is longer than the source vectors. We can use concatenate vector to
+    // make the mask and vectors lengths match.
+
+    if (MaskNumElts % SrcNumElts == 0) {
+      // Mask length is a multiple of the source vector length.
+      // Check if the shuffle is some kind of concatenation of the input
+      // vectors.
+      unsigned NumConcat = MaskNumElts / SrcNumElts;
+      bool IsConcat = true;
+      SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+      for (unsigned i = 0; i != MaskNumElts; ++i) {
+        int Idx = Mask[i];
+        if (Idx < 0)
+          continue;
+        // Ensure the indices in each SrcVT sized piece are sequential and that
+        // the same source is used for the whole piece.
+        if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+            (ConcatSrcs[i / SrcNumElts] >= 0 &&
+             ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
+          IsConcat = false;
+          break;
+        }
+        // Remember which source this index came from.
+        ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
       }
-      // Remember which source this index came from.
-      ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
-    }
 
-    // The shuffle is concatenating multiple vectors together. Just emit
-    // a CONCAT_VECTORS operation.
-    if (IsConcat) {
-      SmallVector<SDValue, 8> ConcatOps;
-      for (auto Src : ConcatSrcs) {
-        if (Src < 0)
-          ConcatOps.push_back(DAG.getUNDEF(SrcVT));
-        else if (Src == 0)
-          ConcatOps.push_back(Src1);
-        else
-          ConcatOps.push_back(Src2);
+      // The shuffle is concatenating multiple vectors together. Just emit
+      // a CONCAT_VECTORS operation.
+      if (IsConcat) {
+        SmallVector<SDValue, 8> ConcatOps;
+        for (auto Src : ConcatSrcs) {
+          if (Src < 0)
+            ConcatOps.push_back(DAG.getUNDEF(SrcVT));
+          else if (Src == 0)
+            ConcatOps.push_back(Src1);
+          else
+            ConcatOps.push_back(Src2);
+        }
+        setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
+        return;
       }
-      setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(),
-                               VT, ConcatOps));
-      return;
     }
 
+    unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
+    unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
+    EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+                                    PaddedMaskNumElts);
+
     // Pad both vectors with undefs to make them the same length as the mask.
     SDValue UndefVal = DAG.getUNDEF(SrcVT);
 
@@ -3065,24 +3035,32 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
     MOps1[0] = Src1;
     MOps2[0] = Src2;
 
-    Src1 = Src1.isUndef() ? DAG.getUNDEF(VT)
-                          : DAG.getNode(ISD::CONCAT_VECTORS,
-                                        getCurSDLoc(), VT, MOps1);
-    Src2 = Src2.isUndef() ? DAG.getUNDEF(VT)
-                          : DAG.getNode(ISD::CONCAT_VECTORS,
-                                        getCurSDLoc(), VT, MOps2);
+    Src1 = Src1.isUndef()
+               ? DAG.getUNDEF(PaddedVT)
+               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+    Src2 = Src2.isUndef()
+               ? DAG.getUNDEF(PaddedVT)
+               : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
 
     // Readjust mask for new input vector length.
-    SmallVector<int, 8> MappedOps;
+    SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
     for (unsigned i = 0; i != MaskNumElts; ++i) {
       int Idx = Mask[i];
       if (Idx >= (int)SrcNumElts)
-        Idx -= SrcNumElts - MaskNumElts;
-      MappedOps.push_back(Idx);
+        Idx -= SrcNumElts - PaddedMaskNumElts;
+      MappedOps[i] = Idx;
     }
 
-    setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
-                                      MappedOps));
+    SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
+
+    // If the concatenated vector was padded, extract a subvector with the
+    // correct number of elements.
+    if (MaskNumElts != PaddedMaskNumElts)
+      Result = DAG.getNode(
+          ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
+          DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+    setValue(&I, Result);
     return;
   }
 
@@ -3141,10 +3119,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         if (RangeUse[Input] == 0)
           Src = DAG.getUNDEF(VT);
         else {
-          SDLoc dl = getCurSDLoc();
           Src = DAG.getNode(
-              ISD::EXTRACT_SUBVECTOR, dl, VT, Src,
-              DAG.getConstant(StartIdx[Input], dl,
+              ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
+              DAG.getConstant(StartIdx[Input], DL,
                               TLI.getVectorIdxTy(DAG.getDataLayout())));
         }
       }
@@ -3162,8 +3139,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
         MappedOps.push_back(Idx);
       }
 
-      setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2,
-                                        MappedOps));
+      setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
       return;
     }
   }
@@ -3173,7 +3149,6 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
   // to insert and build vector.
   EVT EltVT = VT.getVectorElementType();
   EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
-  SDLoc dl = getCurSDLoc();
   SmallVector<SDValue,8> Ops;
   for (unsigned i = 0; i != MaskNumElts; ++i) {
     int Idx = Mask[i];
@@ -3185,14 +3160,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
       SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
       if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
 
-      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
-                        EltVT, Src, DAG.getConstant(Idx, dl, IdxVT));
+      Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+                        EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
     }
 
     Ops.push_back(Res);
   }
 
-  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops));
+  setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops));
 }
 
 void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
@@ -3293,13 +3268,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
   if (VectorWidth && !N.getValueType().isVector()) {
     LLVMContext &Context = *DAG.getContext();
     EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
-    SmallVector<SDValue, 16> Ops(VectorWidth, N);
-    N = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+    N = DAG.getSplatBuildVector(VT, dl, N);
   }
+
   for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
        GTI != E; ++GTI) {
     const Value *Idx = GTI.getOperand();
-    if (StructType *StTy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *StTy = GTI.getStructTypeOrNull()) {
       unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
       if (Field) {
         // N = N + Offset
@@ -3331,8 +3306,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         if (CI->isZero())
           continue;
         APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
+        LLVMContext &Context = *DAG.getContext();
         SDValue OffsVal = VectorWidth ?
-          DAG.getConstant(Offs, dl, MVT::getVectorVT(PtrTy, VectorWidth)) :
+          DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
           DAG.getConstant(Offs, dl, PtrTy);
 
         // In an inbouds GEP with an offset that is nonnegative even when
@@ -3350,9 +3326,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
       if (!IdxN.getValueType().isVector() && VectorWidth) {
         MVT VT = MVT::getVectorVT(IdxN.getValueType().getSimpleVT(), VectorWidth);
-        SmallVector<SDValue, 16> Ops(VectorWidth, IdxN);
-        IdxN = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
+        IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
       }
+
       // If the index is smaller or larger than intptr_t, truncate or extend
       // it.
       IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
@@ -3433,7 +3409,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
   setValue(&I, DSA);
   DAG.setRoot(DSA.getValue(1));
 
-  assert(FuncInfo.MF->getFrameInfo()->hasVarSizedObjects());
+  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
 }
 
 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
@@ -3462,17 +3438,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
 
   bool isVolatile = I.isVolatile();
   bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
-
-  // The IR notion of invariant_load only guarantees that all *non-faulting*
-  // invariant loads result in the same value.  The MI notion of invariant load
-  // guarantees that the load can be legally moved to any location within its
-  // containing function.  The MI notion of invariant_load is stronger than the
-  // IR notion of invariant_load -- an MI invariant_load is an IR invariant_load
-  // with a guarantee that the location being loaded from is dereferenceable
-  // throughout the function's lifetime.
-
-  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr &&
-                     isDereferenceablePointer(SV, DAG.getDataLayout());
+  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
   unsigned Alignment = I.getAlignment();
 
   AAMDNodes AAInfo;
@@ -3540,6 +3507,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
       MMOFlags |= MachineMemOperand::MONonTemporal;
     if (isInvariant)
       MMOFlags |= MachineMemOperand::MOInvariant;
+    if (isDereferenceable)
+      MMOFlags |= MachineMemOperand::MODereferenceable;
 
     SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
                             MachinePointerInfo(SV, Offsets[i]), Alignment,
@@ -3585,7 +3554,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
   SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
                                       SDValue(Src.getNode(), Src.getResNo()));
   DAG.setRoot(CopyNode);
-  FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
+  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
 }
 
 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3613,9 +3582,9 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
          "expect a single EVT for swifterror");
 
   // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
-  SDValue L = DAG.getCopyFromReg(getRoot(), getCurSDLoc(),
-                                 FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, SV),
-                                 ValueVTs[0]);
+  SDValue L = DAG.getCopyFromReg(
+      getRoot(), getCurSDLoc(),
+      FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, SV), ValueVTs[0]);
 
   setValue(&I, L);
 }
@@ -3697,16 +3666,39 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
   DAG.setRoot(StoreNode);
 }
 
-void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
+                                           bool IsCompressing) {
   SDLoc sdl = getCurSDLoc();
 
-  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
-  Value  *PtrOperand = I.getArgOperand(1);
+  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
+    Src0 = I.getArgOperand(0);
+    Ptr = I.getArgOperand(1);
+    Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
+    Mask = I.getArgOperand(3);
+  };
+  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // llvm.masked.compressstore.*(Src0, Ptr, Mask)
+    Src0 = I.getArgOperand(0);
+    Ptr = I.getArgOperand(1);
+    Mask = I.getArgOperand(2);
+    Alignment = 0;
+  };
+
+  Value  *PtrOperand, *MaskOperand, *Src0Operand;
+  unsigned Alignment;
+  if (IsCompressing)
+    getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+  else
+    getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
   SDValue Ptr = getValue(PtrOperand);
-  SDValue Src0 = getValue(I.getArgOperand(0));
-  SDValue Mask = getValue(I.getArgOperand(3));
+  SDValue Src0 = getValue(Src0Operand);
+  SDValue Mask = getValue(MaskOperand);
+
   EVT VT = Src0.getValueType();
-  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
   if (!Alignment)
     Alignment = DAG.getEVTAlignment(VT);
 
@@ -3719,7 +3711,8 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
                           MachineMemOperand::MOStore,  VT.getStoreSize(),
                           Alignment, AAInfo);
   SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
-                                         MMO, false);
+                                         MMO, false /* Truncating */,
+                                         IsCompressing);
   DAG.setRoot(StoreNode);
   setValue(&I, StoreNode);
 }
@@ -3740,7 +3733,7 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I) {
 // extract the spalt value and use it as a uniform base.
 // In all other cases the function returns 'false'.
 //
-static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
+static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
                            SelectionDAGBuilder* SDB) {
 
   SelectionDAG& DAG = SDB->DAG;
@@ -3777,8 +3770,7 @@ static bool getUniformBase(const Value *& Ptr, SDValue& Base, SDValue& Index,
   if (!Index.getValueType().isVector()) {
     unsigned GEPWidth = GEP->getType()->getVectorNumElements();
     EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
-    SmallVector<SDValue, 16> Ops(GEPWidth, Index);
-    Index = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(Index), VT, Ops);
+    Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
   }
   return true;
 }
@@ -3820,18 +3812,38 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   setValue(&I, Scatter);
 }
 
-void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
+void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
   SDLoc sdl = getCurSDLoc();
 
-  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
-  Value  *PtrOperand = I.getArgOperand(0);
+  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
+    Ptr = I.getArgOperand(0);
+    Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
+    Mask = I.getArgOperand(2);
+    Src0 = I.getArgOperand(3);
+  };
+  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
+                           unsigned& Alignment) {
+    // @llvm.masked.expandload.*(Ptr, Mask, Src0)
+    Ptr = I.getArgOperand(0);
+    Alignment = 0;
+    Mask = I.getArgOperand(1);
+    Src0 = I.getArgOperand(2);
+  };
+
+  Value  *PtrOperand, *MaskOperand, *Src0Operand;
+  unsigned Alignment;
+  if (IsExpanding)
+    getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+  else
+    getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
+
   SDValue Ptr = getValue(PtrOperand);
-  SDValue Src0 = getValue(I.getArgOperand(3));
-  SDValue Mask = getValue(I.getArgOperand(2));
+  SDValue Src0 = getValue(Src0Operand);
+  SDValue Mask = getValue(MaskOperand);
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
-  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
+  EVT VT = Src0.getValueType();
   if (!Alignment)
     Alignment = DAG.getEVTAlignment(VT);
 
@@ -3851,7 +3863,7 @@ void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I) {
                           Alignment, AAInfo, Ranges);
 
   SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
-                                   ISD::NON_EXTLOAD);
+                                   ISD::NON_EXTLOAD, IsExpanding);
   if (AddToChain) {
     SDValue OutChain = Load.getValue(1);
     DAG.setRoot(OutChain);
@@ -4003,13 +4015,13 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
                            MachineMemOperand::MOLoad,
                            VT.getStoreSize(),
                            I.getAlignment() ? I.getAlignment() :
-                                              DAG.getEVTAlignment(VT));
+                                              DAG.getEVTAlignment(VT),
+                           AAMDNodes(), nullptr, Scope, Order);
 
   InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
   SDValue L =
       DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
-                    getValue(I.getPointerOperand()), MMO,
-                    Order, Scope);
+                    getValue(I.getPointerOperand()), MMO);
 
   SDValue OutChain = L.getValue(1);
 
@@ -4047,8 +4059,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
 /// node.
 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
                                                unsigned Intrinsic) {
-  bool HasChain = !I.doesNotAccessMemory();
-  bool OnlyLoad = HasChain && I.onlyReadsMemory();
+  // Ignore the callsite's attributes. A specific call site may be marked with
+  // readnone, but the lowering code will expect the chain based on the
+  // definition.
+  const Function *F = I.getCalledFunction();
+  bool HasChain = !F->doesNotAccessMemory();
+  bool OnlyLoad = HasChain && F->onlyReadsMemory();
 
   // Build the operand list.
   SmallVector<SDValue, 8> Ops;
@@ -4156,7 +4172,7 @@ static SDValue GetExponent(SelectionDAG &DAG, SDValue Op,
 /// getF32Constant - Get 32-bit floating point constant.
 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
                               const SDLoc &dl) {
-  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), dl,
+  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
                            MVT::f32);
 }
 
@@ -4743,6 +4759,32 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
   return true;
 }
 
+/// Return the appropriate SDDbgValue based on N.
+SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
+                                             DILocalVariable *Variable,
+                                             DIExpression *Expr, int64_t Offset,
+                                             DebugLoc dl,
+                                             unsigned DbgSDNodeOrder) {
+  SDDbgValue *SDV;
+  auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode());
+  if (FISDN && Expr->startsWithDeref()) {
+    // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
+    // stack slot locations as such instead of as indirectly addressed
+    // locations.
+    ArrayRef<uint64_t> TrailingElements(Expr->elements_begin() + 1,
+                                        Expr->elements_end());
+    DIExpression *DerefedDIExpr =
+        DIExpression::get(*DAG.getContext(), TrailingElements);
+    int FI = FISDN->getIndex();
+    SDV = DAG.getFrameIndexDbgValue(Variable, DerefedDIExpr, FI, 0, dl,
+                                    DbgSDNodeOrder);
+  } else {
+    SDV = DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false,
+                          Offset, dl, DbgSDNodeOrder);
+  }
+  return SDV;
+}
+
 // VisualStudio defines setjmp as _setjmp
 #if defined(_MSC_VER) && defined(setjmp) && \
                          !defined(setjmp_undefined_for_msvc)
@@ -4774,6 +4816,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                              TLI.getPointerTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return nullptr;
+  case Intrinsic::addressofreturnaddress:
+    setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
+                             TLI.getPointerTy(DAG.getDataLayout())));
+    return nullptr;
   case Intrinsic::frameaddress:
     setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
                              TLI.getPointerTy(DAG.getDataLayout()),
@@ -4850,6 +4896,51 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     updateDAGForMaybeTailCall(MM);
     return nullptr;
   }
+  case Intrinsic::memcpy_element_atomic: {
+    SDValue Dst = getValue(I.getArgOperand(0));
+    SDValue Src = getValue(I.getArgOperand(1));
+    SDValue NumElements = getValue(I.getArgOperand(2));
+    SDValue ElementSize = getValue(I.getArgOperand(3));
+
+    // Emit a library call.
+    TargetLowering::ArgListTy Args;
+    TargetLowering::ArgListEntry Entry;
+    Entry.Ty = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
+    Entry.Node = Dst;
+    Args.push_back(Entry);
+
+    Entry.Node = Src;
+    Args.push_back(Entry);
+    
+    Entry.Ty = I.getArgOperand(2)->getType();
+    Entry.Node = NumElements;
+    Args.push_back(Entry);
+    
+    Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+    Entry.Node = ElementSize;
+    Args.push_back(Entry);
+
+    uint64_t ElementSizeConstant =
+        cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
+    RTLIB::Libcall LibraryCall =
+        RTLIB::getMEMCPY_ELEMENT_ATOMIC(ElementSizeConstant);
+    if (LibraryCall == RTLIB::UNKNOWN_LIBCALL)
+      report_fatal_error("Unsupported element size");
+
+    TargetLowering::CallLoweringInfo CLI(DAG);
+    CLI.setDebugLoc(sdl)
+        .setChain(getRoot())
+        .setCallee(TLI.getLibcallCallingConv(LibraryCall),
+                   Type::getVoidTy(*DAG.getContext()),
+                   DAG.getExternalSymbol(
+                       TLI.getLibcallName(LibraryCall),
+                       TLI.getPointerTy(DAG.getDataLayout())),
+                   std::move(Args));
+
+    std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI);
+    DAG.setRoot(CallResult.second);
+    return nullptr;
+  }
   case Intrinsic::dbg_declare: {
     const DbgDeclareInst &DI = cast<DbgDeclareInst>(I);
     DILocalVariable *Variable = DI.getVariable();
@@ -4944,8 +5035,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
       if (N.getNode()) {
         if (!EmitFuncArgumentDbgValue(V, Variable, Expression, dl, Offset,
                                       false, N)) {
-          SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
-                                false, Offset, dl, SDNodeOrder);
+          SDV = getDbgValue(N, Variable, Expression, Offset, dl, SDNodeOrder);
           DAG.AddDbgValue(SDV, N.getNode(), false);
         }
       } else if (!V->use_empty() ) {
@@ -4980,7 +5070,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::eh_typeid_for: {
     // Find the type id for the given typeinfo.
     GlobalValue *GV = ExtractTypeInfo(I.getArgOperand(0));
-    unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV);
+    unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
     Res = DAG.getConstant(TypeID, sdl, MVT::i32);
     setValue(&I, Res);
     return nullptr;
@@ -4988,7 +5078,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 
   case Intrinsic::eh_return_i32:
   case Intrinsic::eh_return_i64:
-    DAG.getMachineFunction().getMMI().setCallsEHReturn(true);
+    DAG.getMachineFunction().setCallsEHReturn(true);
     DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl,
                             MVT::Other,
                             getControlRoot(),
@@ -4996,7 +5086,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
                             getValue(I.getArgOperand(1))));
     return nullptr;
   case Intrinsic::eh_unwind_init:
-    DAG.getMachineFunction().getMMI().setCallsUnwindInit(true);
+    DAG.getMachineFunction().setCallsUnwindInit(true);
     return nullptr;
   case Intrinsic::eh_dwarf_cfa: {
     setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
@@ -5015,11 +5105,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::eh_sjlj_functioncontext: {
     // Get and store the index of the function context.
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
     AllocaInst *FnCtx =
       cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
     int FI = FuncInfo.StaticAllocaMap[FnCtx];
-    MFI->setFunctionContextIndex(FI);
+    MFI.setFunctionContextIndex(FI);
     return nullptr;
   }
   case Intrinsic::eh_sjlj_setjmp: {
@@ -5055,6 +5145,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::masked_store:
     visitMaskedStore(I);
     return nullptr;
+  case Intrinsic::masked_expandload:
+    visitMaskedLoad(I, true /* IsExpanding */);
+    return nullptr;
+  case Intrinsic::masked_compressstore:
+    visitMaskedStore(I, true /* IsCompressing */);
+    return nullptr;
   case Intrinsic::x86_mmx_pslli_w:
   case Intrinsic::x86_mmx_pslli_d:
   case Intrinsic::x86_mmx_pslli_q:
@@ -5368,7 +5464,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   case Intrinsic::stackprotector: {
     // Emit code into the DAG to store the stack guard onto the stack.
     MachineFunction &MF = DAG.getMachineFunction();
-    MachineFrameInfo *MFI = MF.getFrameInfo();
+    MachineFrameInfo &MFI = MF.getFrameInfo();
     EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
     SDValue Src, Chain = getRoot();
 
@@ -5380,7 +5476,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
     AllocaInst *Slot = cast<AllocaInst>(I.getArgOperand(1));
 
     int FI = FuncInfo.StaticAllocaMap[Slot];
-    MFI->setStackProtectorIndex(FI);
+    MFI.setStackProtectorIndex(FI);
 
     SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
 
@@ -5411,6 +5507,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
   }
   case Intrinsic::annotation:
   case Intrinsic::ptr_annotation:
+  case Intrinsic::invariant_group_barrier:
     // Drop the intrinsic, but forward the value
     setValue(&I, getValue(I.getOperand(0)));
     return nullptr;
@@ -5687,7 +5784,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
 std::pair<SDValue, SDValue>
 SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
                                     const BasicBlock *EHPadBB) {
-  MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineModuleInfo &MMI = MF.getMMI();
   MCSymbol *BeginLabel = nullptr;
 
   if (EHPadBB) {
@@ -5699,7 +5797,7 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     // so as to maintain the ordering of pads in the LSDA.
     unsigned CallSiteIndex = MMI.getCurrentCallSite();
     if (CallSiteIndex) {
-      MMI.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
+      MF.setCallSiteBeginLabel(BeginLabel, CallSiteIndex);
       LPadToCallSiteMap[FuncInfo.MBBMap[EHPadBB]].push_back(CallSiteIndex);
 
       // Now that the call site is handled, stop tracking it.
@@ -5740,13 +5838,13 @@ SelectionDAGBuilder::lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
     DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel));
 
     // Inform MachineModuleInfo of range.
-    if (MMI.hasEHFunclets()) {
+    if (MF.hasEHFunclets()) {
       assert(CLI.CS);
       WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
       EHInfo->addIPToStateRange(cast<InvokeInst>(CLI.CS->getInstruction()),
                                 BeginLabel, EndLabel);
     } else {
-      MMI.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
+      MF.addInvoke(FuncInfo.MBBMap[EHPadBB], BeginLabel, EndLabel);
     }
   }
 
@@ -5785,9 +5883,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
       SwiftErrorVal = V;
       // We find the virtual register for the actual swifterror argument.
       // Instead of using the Value, we use the virtual register instead.
-      Entry.Node = DAG.getRegister(
-          FuncInfo.findSwiftErrorVReg(FuncInfo.MBB, V),
-          EVT(TLI.getPointerTy(DL)));
+      Entry.Node =
+          DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVReg(FuncInfo.MBB, V),
+                          EVT(TLI.getPointerTy(DL)));
     }
 
     Args.push_back(Entry);
@@ -5803,6 +5901,11 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
   if (isTailCall && !isInTailCallPosition(CS, DAG.getTarget()))
     isTailCall = false;
 
+  // Disable tail calls if there is an swifterror argument. Targets have not
+  // been updated to support tail calls.
+  if (TLI.supportSwiftError() && SwiftErrorVal)
+    isTailCall = false;
+
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(getCurSDLoc())
       .setChain(getRoot())
@@ -5827,7 +5930,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
     unsigned VReg = FuncInfo.MF->getRegInfo().createVirtualRegister(RC);
     SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
     // We update the virtual register for the actual swifterror argument.
-    FuncInfo.setSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
+    FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
     DAG.setRoot(CopyNode);
   }
 }
@@ -6035,6 +6138,49 @@ bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) {
   return false;
 }
 
+///
+/// visitMemPCpyCall -- lower a mempcpy call as a memcpy followed by code to
+/// to adjust the dst pointer by the size of the copied memory.
+bool SelectionDAGBuilder::visitMemPCpyCall(const CallInst &I) {
+
+  // Verify argument count: void *mempcpy(void *, const void *, size_t)
+  if (I.getNumArgOperands() != 3)
+    return false;
+
+  SDValue Dst = getValue(I.getArgOperand(0));
+  SDValue Src = getValue(I.getArgOperand(1));
+  SDValue Size = getValue(I.getArgOperand(2));
+
+  unsigned DstAlign = DAG.InferPtrAlignment(Dst);
+  unsigned SrcAlign = DAG.InferPtrAlignment(Src);
+  unsigned Align = std::min(DstAlign, SrcAlign);
+  if (Align == 0) // Alignment of one or both could not be inferred.
+    Align = 1; // 0 and 1 both specify no alignment, but 0 is reserved.
+
+  bool isVol = false;
+  SDLoc sdl = getCurSDLoc();
+
+  // In the mempcpy context we need to pass in a false value for isTailCall
+  // because the return pointer needs to be adjusted by the size of
+  // the copied memory.
+  SDValue MC = DAG.getMemcpy(getRoot(), sdl, Dst, Src, Size, Align, isVol,
+                             false, /*isTailCall=*/false,
+                             MachinePointerInfo(I.getArgOperand(0)),
+                             MachinePointerInfo(I.getArgOperand(1)));
+  assert(MC.getNode() != nullptr &&
+         "** memcpy should not be lowered as TailCall in mempcpy context **");
+  DAG.setRoot(MC);
+
+  // Check if Size needs to be truncated or extended.
+  Size = DAG.getSExtOrTrunc(Size, sdl, Dst.getValueType());
+
+  // Adjust return pointer to point just past the last dst byte.
+  SDValue DstPlusSize = DAG.getNode(ISD::ADD, sdl, Dst.getValueType(),
+                                    Dst, Size);
+  setValue(&I, DstPlusSize);
+  return true;
+}
+
 /// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an
 /// optimized form.  If so, return true and lower it, otherwise return false
 /// and it will be lowered like a normal call.
@@ -6191,7 +6337,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
   }
 
   MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
-  ComputeUsesVAFloatArgument(I, &MMI);
+  computeUsesVAFloatArgument(I, MMI);
 
   const char *RenameFn = nullptr;
   if (Function *F = I.getCalledFunction()) {
@@ -6325,6 +6471,10 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
         if (visitMemCmpCall(I))
           return;
         break;
+      case LibFunc::mempcpy:
+        if (visitMemPCpyCall(I))
+          return;
+        break;
       case LibFunc::memchr:
         if (visitMemChrCall(I))
           return;
@@ -6395,6 +6545,19 @@ public:
     : TargetLowering::AsmOperandInfo(info), CallOperand(nullptr,0) {
   }
 
+  /// Whether or not this operand accesses memory
+  bool hasMemory(const TargetLowering &TLI) const {
+    // Indirect operand accesses access memory.
+    if (isIndirect)
+      return true;
+
+    for (const auto &Code : Codes)
+      if (TLI.getConstraintType(Code) == TargetLowering::C_Memory)
+        return true;
+
+    return false;
+  }
+
   /// getCallOperandValEVT - Return the EVT of the Value* that this operand
   /// corresponds to.  If there is no Value* for this operand, it returns
   /// MVT::Other.
@@ -6447,6 +6610,75 @@ typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
 
 } // end anonymous namespace
 
+/// Make sure that the output operand \p OpInfo and its corresponding input
+/// operand \p MatchingOpInfo have compatible constraint types (otherwise error
+/// out).
+static void patchMatchingInput(const SDISelAsmOperandInfo &OpInfo,
+                               SDISelAsmOperandInfo &MatchingOpInfo,
+                               SelectionDAG &DAG) {
+  if (OpInfo.ConstraintVT == MatchingOpInfo.ConstraintVT)
+    return;
+
+  const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
+  const auto &TLI = DAG.getTargetLoweringInfo();
+
+  std::pair<unsigned, const TargetRegisterClass *> MatchRC =
+      TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
+                                       OpInfo.ConstraintVT);
+  std::pair<unsigned, const TargetRegisterClass *> InputRC =
+      TLI.getRegForInlineAsmConstraint(TRI, MatchingOpInfo.ConstraintCode,
+                                       MatchingOpInfo.ConstraintVT);
+  if ((OpInfo.ConstraintVT.isInteger() !=
+       MatchingOpInfo.ConstraintVT.isInteger()) ||
+      (MatchRC.second != InputRC.second)) {
+    // FIXME: error out in a more elegant fashion
+    report_fatal_error("Unsupported asm: input constraint"
+                       " with a matching output constraint of"
+                       " incompatible type!");
+  }
+  MatchingOpInfo.ConstraintVT = OpInfo.ConstraintVT;
+}
+
+/// Get a direct memory input to behave well as an indirect operand.
+/// This may introduce stores, hence the need for a \p Chain.
+/// \return The (possibly updated) chain.
+static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
+                                        SDISelAsmOperandInfo &OpInfo,
+                                        SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  // If we don't have an indirect input, put it in the constpool if we can,
+  // otherwise spill it to a stack slot.
+  // TODO: This isn't quite right. We need to handle these according to
+  // the addressing mode that the constraint wants. Also, this may take
+  // an additional register for the computation and we don't want that
+  // either.
+
+  // If the operand is a float, integer, or vector constant, spill to a
+  // constant pool entry to get its address.
+  const Value *OpVal = OpInfo.CallOperandVal;
+  if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
+      isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
+    OpInfo.CallOperand = DAG.getConstantPool(
+        cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
+    return Chain;
+  }
+
+  // Otherwise, create a stack slot and emit a store to it before the asm.
+  Type *Ty = OpVal->getType();
+  auto &DL = DAG.getDataLayout();
+  uint64_t TySize = DL.getTypeAllocSize(Ty);
+  unsigned Align = DL.getPrefTypeAlignment(Ty);
+  MachineFunction &MF = DAG.getMachineFunction();
+  int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
+  SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy(DL));
+  Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+                       MachinePointerInfo::getFixedStack(MF, SSFI));
+  OpInfo.CallOperand = StackSlot;
+
+  return Chain;
+}
+
 /// GetRegistersForValue - Assign registers (virtual or physical) for the
 /// specified operand.  We prefer to assign virtual registers, to allow the
 /// register allocator to handle the assignment process.  However, if the asm
@@ -6555,6 +6787,73 @@ static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI,
   // Otherwise, we couldn't allocate enough registers for this.
 }
 
+static unsigned
+findMatchingInlineAsmOperand(unsigned OperandNo,
+                             const std::vector<SDValue> &AsmNodeOperands) {
+  // Scan until we find the definition we already emitted of this operand.
+  unsigned CurOp = InlineAsm::Op_FirstOperand;
+  for (; OperandNo; --OperandNo) {
+    // Advance to the next operand.
+    unsigned OpFlag =
+        cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
+    assert((InlineAsm::isRegDefKind(OpFlag) ||
+            InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
+            InlineAsm::isMemKind(OpFlag)) &&
+           "Skipped past definitions?");
+    CurOp += InlineAsm::getNumOperandRegisters(OpFlag) + 1;
+  }
+  return CurOp;
+}
+
+/// Fill \p Regs with \p NumRegs new virtual registers of type \p RegVT
+/// \return true if it has succeeded, false otherwise
+static bool createVirtualRegs(SmallVector<unsigned, 4> &Regs, unsigned NumRegs,
+                              MVT RegVT, SelectionDAG &DAG) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
+  for (unsigned i = 0, e = NumRegs; i != e; ++i) {
+    if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
+      Regs.push_back(RegInfo.createVirtualRegister(RC));
+    else
+      return false;
+  }
+  return true;
+}
+
+class ExtraFlags {
+  unsigned Flags = 0;
+
+public:
+  explicit ExtraFlags(ImmutableCallSite CS) {
+    const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+    if (IA->hasSideEffects())
+      Flags |= InlineAsm::Extra_HasSideEffects;
+    if (IA->isAlignStack())
+      Flags |= InlineAsm::Extra_IsAlignStack;
+    if (CS.isConvergent())
+      Flags |= InlineAsm::Extra_IsConvergent;
+    Flags |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
+  }
+
+  void update(const llvm::TargetLowering::AsmOperandInfo &OpInfo) {
+    // Ideally, we would only check against memory constraints.  However, the
+    // meaning of an Other constraint can be target-specific and we can't easily
+    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
+    // for Other constraints as well.
+    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+        OpInfo.ConstraintType == TargetLowering::C_Other) {
+      if (OpInfo.Type == InlineAsm::isInput)
+        Flags |= InlineAsm::Extra_MayLoad;
+      else if (OpInfo.Type == InlineAsm::isOutput)
+        Flags |= InlineAsm::Extra_MayStore;
+      else if (OpInfo.Type == InlineAsm::isClobber)
+        Flags |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
+    }
+  }
+
+  unsigned get() const { return Flags; }
+};
+
 /// visitInlineAsm - Handle a call to an InlineAsm object.
 ///
 void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
@@ -6569,6 +6868,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   bool hasMemory = false;
 
+  // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+  ExtraFlags ExtraInfo(CS);
+
   unsigned ArgNo = 0;   // ArgNo - The argument of the CallInst.
   unsigned ResNo = 0;   // ResNo - The result number of the next output.
   for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
@@ -6578,14 +6880,25 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     MVT OpVT = MVT::Other;
 
     // Compute the value type for each operand.
-    switch (OpInfo.Type) {
-    case InlineAsm::isOutput:
-      // Indirect outputs just consume an argument.
-      if (OpInfo.isIndirect) {
-        OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
-        break;
+    if (OpInfo.Type == InlineAsm::isInput ||
+        (OpInfo.Type == InlineAsm::isOutput && OpInfo.isIndirect)) {
+      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+
+      // Process the call argument. BasicBlocks are labels, currently appearing
+      // only in asm's.
+      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
+      } else {
+        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
       }
 
+      OpVT =
+          OpInfo
+              .getCallOperandValEVT(*DAG.getContext(), TLI, DAG.getDataLayout())
+              .getSimpleVT();
+    }
+
+    if (OpInfo.Type == InlineAsm::isOutput && !OpInfo.isIndirect) {
       // The return value of the call is this value.  As such, there is no
       // corresponding argument.
       assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
@@ -6597,43 +6910,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
         OpVT = TLI.getSimpleValueType(DAG.getDataLayout(), CS.getType());
       }
       ++ResNo;
-      break;
-    case InlineAsm::isInput:
-      OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
-      break;
-    case InlineAsm::isClobber:
-      // Nothing to do.
-      break;
     }
 
-    // If this is an input or an indirect output, process the call argument.
-    // BasicBlocks are labels, currently appearing only in asm's.
-    if (OpInfo.CallOperandVal) {
-      if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
-        OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
-      } else {
-        OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
-      }
+    OpInfo.ConstraintVT = OpVT;
 
-      OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI,
-                                         DAG.getDataLayout()).getSimpleVT();
-    }
+    if (!hasMemory)
+      hasMemory = OpInfo.hasMemory(TLI);
 
-    OpInfo.ConstraintVT = OpVT;
+    // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
+    // FIXME: Could we compute this on OpInfo rather than TargetConstraints[i]?
+    auto TargetConstraint = TargetConstraints[i];
 
-    // Indirect operand accesses access memory.
-    if (OpInfo.isIndirect)
-      hasMemory = true;
-    else {
-      for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
-        TargetLowering::ConstraintType
-          CType = TLI.getConstraintType(OpInfo.Codes[j]);
-        if (CType == TargetLowering::C_Memory) {
-          hasMemory = true;
-          break;
-        }
-      }
-    }
+    // Compute the constraint code and ConstraintType to use.
+    TLI.ComputeConstraintToUse(TargetConstraint, SDValue());
+
+    ExtraInfo.update(TargetConstraint);
   }
 
   SDValue Chain, Flag;
@@ -6656,24 +6947,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     // error.
     if (OpInfo.hasMatchingInput()) {
       SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-
-      if (OpInfo.ConstraintVT != Input.ConstraintVT) {
-        const TargetRegisterInfo *TRI = DAG.getSubtarget().getRegisterInfo();
-        std::pair<unsigned, const TargetRegisterClass *> MatchRC =
-            TLI.getRegForInlineAsmConstraint(TRI, OpInfo.ConstraintCode,
-                                             OpInfo.ConstraintVT);
-        std::pair<unsigned, const TargetRegisterClass *> InputRC =
-            TLI.getRegForInlineAsmConstraint(TRI, Input.ConstraintCode,
-                                             Input.ConstraintVT);
-        if ((OpInfo.ConstraintVT.isInteger() !=
-             Input.ConstraintVT.isInteger()) ||
-            (MatchRC.second != InputRC.second)) {
-          report_fatal_error("Unsupported asm: input constraint"
-                             " with a matching output constraint of"
-                             " incompatible type!");
-        }
-        Input.ConstraintVT = OpInfo.ConstraintVT;
-      }
+      patchMatchingInput(OpInfo, Input, DAG);
     }
 
     // Compute the constraint code and ConstraintType to use.
@@ -6691,37 +6965,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
               (OpInfo.Type == InlineAsm::isInput)) &&
              "Can only indirectify direct input operands!");
 
-      // Memory operands really want the address of the value.  If we don't have
-      // an indirect input, put it in the constpool if we can, otherwise spill
-      // it to a stack slot.
-      // TODO: This isn't quite right. We need to handle these according to
-      // the addressing mode that the constraint wants. Also, this may take
-      // an additional register for the computation and we don't want that
-      // either.
-
-      // If the operand is a float, integer, or vector constant, spill to a
-      // constant pool entry to get its address.
-      const Value *OpVal = OpInfo.CallOperandVal;
-      if (isa<ConstantFP>(OpVal) || isa<ConstantInt>(OpVal) ||
-          isa<ConstantVector>(OpVal) || isa<ConstantDataVector>(OpVal)) {
-        OpInfo.CallOperand = DAG.getConstantPool(
-            cast<Constant>(OpVal), TLI.getPointerTy(DAG.getDataLayout()));
-      } else {
-        // Otherwise, create a stack slot and emit a store to it before the
-        // asm.
-        Type *Ty = OpVal->getType();
-        auto &DL = DAG.getDataLayout();
-        uint64_t TySize = DL.getTypeAllocSize(Ty);
-        unsigned Align = DL.getPrefTypeAlignment(Ty);
-        MachineFunction &MF = DAG.getMachineFunction();
-        int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
-        SDValue StackSlot =
-            DAG.getFrameIndex(SSFI, TLI.getPointerTy(DAG.getDataLayout()));
-        Chain = DAG.getStore(
-            Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot,
-            MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI));
-        OpInfo.CallOperand = StackSlot;
-      }
+      // Memory operands really want the address of the value.
+      Chain = getAddressForMemoryInput(Chain, getCurSDLoc(), OpInfo, DAG);
 
       // There is no longer a Value* corresponding to this operand.
       OpInfo.CallOperandVal = nullptr;
@@ -6736,7 +6981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
       GetRegistersForValue(DAG, TLI, getCurSDLoc(), OpInfo);
   }
 
-  // Second pass - Loop over all of the operands, assigning virtual or physregs
+  // Third pass - Loop over all of the operands, assigning virtual or physregs
   // to register class operands.
   for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
     SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
@@ -6761,40 +7006,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
 
   // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
   // bits as operand 3.
-  unsigned ExtraInfo = 0;
-  if (IA->hasSideEffects())
-    ExtraInfo |= InlineAsm::Extra_HasSideEffects;
-  if (IA->isAlignStack())
-    ExtraInfo |= InlineAsm::Extra_IsAlignStack;
-  if (CS.isConvergent())
-    ExtraInfo |= InlineAsm::Extra_IsConvergent;
-  // Set the asm dialect.
-  ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
-
-  // Determine if this InlineAsm MayLoad or MayStore based on the constraints.
-  for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
-    TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i];
-
-    // Compute the constraint code and ConstraintType to use.
-    TLI.ComputeConstraintToUse(OpInfo, SDValue());
-
-    // Ideally, we would only check against memory constraints.  However, the
-    // meaning of an other constraint can be target-specific and we can't easily
-    // reason about it.  Therefore, be conservative and set MayLoad/MayStore
-    // for other constriants as well.
-    if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
-        OpInfo.ConstraintType == TargetLowering::C_Other) {
-      if (OpInfo.Type == InlineAsm::isInput)
-        ExtraInfo |= InlineAsm::Extra_MayLoad;
-      else if (OpInfo.Type == InlineAsm::isOutput)
-        ExtraInfo |= InlineAsm::Extra_MayStore;
-      else if (OpInfo.Type == InlineAsm::isClobber)
-        ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore);
-    }
-  }
-
   AsmNodeOperands.push_back(DAG.getTargetConstant(
-      ExtraInfo, getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
+      ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
 
   // Loop over all of the inputs, copying the operand values into the
   // appropriate registers and processing the output regs.
@@ -6862,24 +7075,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
     case InlineAsm::isInput: {
       SDValue InOperandVal = OpInfo.CallOperand;
 
-      if (OpInfo.isMatchingInputConstraint()) {   // Matching constraint?
+      if (OpInfo.isMatchingInputConstraint()) {
         // If this is required to match an output register we have already set,
         // just use its register.
-        unsigned OperandNo = OpInfo.getMatchedOperand();
-
-        // Scan until we find the definition we already emitted of this operand.
-        // When we find it, create a RegsForValue operand.
-        unsigned CurOp = InlineAsm::Op_FirstOperand;
-        for (; OperandNo; --OperandNo) {
-          // Advance to the next operand.
-          unsigned OpFlag =
-            cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
-          assert((InlineAsm::isRegDefKind(OpFlag) ||
-                  InlineAsm::isRegDefEarlyClobberKind(OpFlag) ||
-                  InlineAsm::isMemKind(OpFlag)) && "Skipped past definitions?");
-          CurOp += InlineAsm::getNumOperandRegisters(OpFlag)+1;
-        }
-
+        auto CurOp = findMatchingInlineAsmOperand(OpInfo.getMatchedOperand(),
+                                                  AsmNodeOperands);
         unsigned OpFlag =
           cast<ConstantSDNode>(AsmNodeOperands[CurOp])->getZExtValue();
         if (InlineAsm::isRegDefKind(OpFlag) ||
@@ -6893,22 +7093,19 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
             return;
           }
 
-          RegsForValue MatchedRegs;
-          MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
           MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType();
-          MatchedRegs.RegVTs.push_back(RegVT);
-          MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo();
-          for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag);
-               i != e; ++i) {
-            if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT))
-              MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC));
-            else {
-              emitInlineAsmError(
-                  CS, "inline asm error: This value"
-                      " type register class is not natively supported!");
-              return;
-            }
+          SmallVector<unsigned, 4> Regs;
+
+          if (!createVirtualRegs(Regs,
+                                 InlineAsm::getNumOperandRegisters(OpFlag),
+                                 RegVT, DAG)) {
+            emitInlineAsmError(CS, "inline asm error: This value type register "
+                                   "class is not natively supported!");
+            return;
           }
+
+          RegsForValue MatchedRegs(Regs, RegVT, InOperandVal.getValueType());
+
           SDLoc dl = getCurSDLoc();
           // Use the produced MatchedRegs object to
           MatchedRegs.getCopyToRegs(InOperandVal, DAG, dl,
@@ -7299,7 +7496,7 @@ void SelectionDAGBuilder::visitStackmap(const CallInst &CI) {
   DAG.setRoot(Chain);
 
   // Inform the Frame Information that we have a stackmap in this function.
-  FuncInfo.MF->getFrameInfo()->setHasStackMap();
+  FuncInfo.MF->getFrameInfo().setHasStackMap();
 }
 
 /// \brief Lower llvm.experimental.patchpoint directly to its target opcode.
@@ -7450,7 +7647,7 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
   DAG.DeleteNode(Call);
 
   // Inform the Frame Information that we have a patchpoint in this function.
-  FuncInfo.MF->getFrameInfo()->setHasPatchPoint();
+  FuncInfo.MF->getFrameInfo().setHasPatchPoint();
 }
 
 /// Returns an AttributeSet representing the attributes applied to the return
@@ -7498,7 +7695,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
     uint64_t TySize = DL.getTypeAllocSize(CLI.RetTy);
     unsigned Align = DL.getPrefTypeAlignment(CLI.RetTy);
     MachineFunction &MF = CLI.DAG.getMachineFunction();
-    DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+    DemoteStackIdx = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
     Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy);
 
     DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy(DL));
@@ -7580,8 +7777,19 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setZExt();
       if (Args[i].isSExt)
         Flags.setSExt();
-      if (Args[i].isInReg)
+      if (Args[i].isInReg) {
+        // If we are using vectorcall calling convention, a structure that is
+        // passed InReg - is surely an HVA
+        if (CLI.CallConv == CallingConv::X86_VectorCall &&
+            isa<StructType>(FinalType)) {
+          // The first value of a structure is marked
+          if (0 == Value)
+            Flags.setHvaStart();
+          Flags.setHva();
+        }
+        // Set InReg Flag
         Flags.setInReg();
+      }
       if (Args[i].isSRet)
         Flags.setSRet();
       if (Args[i].isSwiftSelf)
@@ -7867,8 +8075,19 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         Flags.setZExt();
       if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
         Flags.setSExt();
-      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg))
+      if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) {
+        // If we are using vectorcall calling convention, a structure that is
+        // passed InReg - is surely an HVA
+        if (F.getCallingConv() == CallingConv::X86_VectorCall &&
+            isa<StructType>(I->getType())) {
+          // The first value of a structure is marked
+          if (0 == Value)
+            Flags.setHvaStart();
+          Flags.setHva();
+        }
+        // Set InReg Flag
         Flags.setInReg();
+      }
       if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet))
         Flags.setSRet();
       if (F.getAttributes().hasAttribute(Idx, Attribute::SwiftSelf))
@@ -7990,7 +8209,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
 
     // If this argument is unused then remember its value. It is used to generate
     // debugging information.
-    if (I->use_empty() && NumValues) {
+    bool isSwiftErrorArg =
+        TLI->supportSwiftError() &&
+        F.getAttributes().hasAttribute(Idx, Attribute::SwiftError);
+    if (I->use_empty() && NumValues && !isSwiftErrorArg) {
       SDB->setUnusedArgValue(&*I, InVals[i]);
 
       // Also remember any frame index for use in FastISel.
@@ -8004,7 +8226,10 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
       MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT);
       unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT);
 
-      if (!I->use_empty()) {
+      // Even an apparant 'unused' swifterror argument needs to be returned. So
+      // we do generate a copy for it that can be used on return from the
+      // function.
+      if (!I->use_empty() || isSwiftErrorArg) {
         Optional<ISD::NodeType> AssertOp;
         if (F.getAttributes().hasAttribute(Idx, Attribute::SExt))
           AssertOp = ISD::AssertSext;
@@ -8040,12 +8265,12 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         FuncInfo->setArgumentFrameIndex(&*I, FI->getIndex());
     }
 
-    // Update SwiftErrorMap.
-    if (Res.getOpcode() == ISD::CopyFromReg && TLI->supportSwiftError() &&
-        F.getAttributes().hasAttribute(Idx, Attribute::SwiftError)) {
+    // Update the SwiftErrorVRegDefMap.
+    if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
       unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
       if (TargetRegisterInfo::isVirtualRegister(Reg))
-        FuncInfo->SwiftErrorMap[FuncInfo->MBB][0] = Reg;
+        FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
+                                           FuncInfo->SwiftErrorArg, Reg);
     }
 
     // If this argument is live outside of the entry block, insert a copy from
@@ -8197,14 +8422,14 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
 }
 
 bool SelectionDAGBuilder::isDense(const CaseClusterVector &Clusters,
-                                  unsigned *TotalCases, unsigned First,
-                                  unsigned Last,
-                                  unsigned Density) {
+                                  const SmallVectorImpl<unsigned> &TotalCases,
+                                  unsigned First, unsigned Last,
+                                  unsigned Density) const {
   assert(Last >= First);
   assert(TotalCases[Last] >= TotalCases[First]);
 
-  APInt LowCase = Clusters[First].Low->getValue();
-  APInt HighCase = Clusters[Last].High->getValue();
+  const APInt &LowCase = Clusters[First].Low->getValue();
+  const APInt &HighCase = Clusters[Last].High->getValue();
   assert(LowCase.getBitWidth() == HighCase.getBitWidth());
 
   // FIXME: A range of consecutive cases has 100% density, but only requires one
@@ -8233,7 +8458,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI,
          TLI.isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
 }
 
-bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
+bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
                                          unsigned First, unsigned Last,
                                          const SwitchInst *SI,
                                          MachineBasicBlock *DefaultMBB,
@@ -8252,12 +8477,12 @@ bool SelectionDAGBuilder::buildJumpTable(CaseClusterVector &Clusters,
   for (unsigned I = First; I <= Last; ++I) {
     assert(Clusters[I].Kind == CC_Range);
     Prob += Clusters[I].Prob;
-    APInt Low = Clusters[I].Low->getValue();
-    APInt High = Clusters[I].High->getValue();
+    const APInt &Low = Clusters[I].Low->getValue();
+    const APInt &High = Clusters[I].High->getValue();
     NumCmps += (Low == High) ? 1 : 2;
     if (I != First) {
       // Fill the gap between this and the previous cluster.
-      APInt PreviousHigh = Clusters[I - 1].High->getValue();
+      const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
       assert(PreviousHigh.slt(Low));
       uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
       for (uint64_t J = 0; J < Gap; J++)
@@ -8325,26 +8550,37 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   if (!areJTsAllowed(TLI, SI))
     return;
 
+  const bool OptForSize = DefaultMBB->getParent()->getFunction()->optForSize();
+
   const int64_t N = Clusters.size();
-  const unsigned MinJumpTableSize = TLI.getMinimumJumpTableEntries();
+  const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
+  const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+  const unsigned MaxJumpTableSize =
+                   OptForSize || TLI.getMaximumJumpTableSize() == 0
+                   ? UINT_MAX : TLI.getMaximumJumpTableSize();
+
+  if (N < 2 || N < MinJumpTableEntries)
+    return;
 
   // TotalCases[i]: Total nbr of cases in Clusters[0..i].
   SmallVector<unsigned, 8> TotalCases(N);
-
   for (unsigned i = 0; i < N; ++i) {
-    APInt Hi = Clusters[i].High->getValue();
-    APInt Lo = Clusters[i].Low->getValue();
+    const APInt &Hi = Clusters[i].High->getValue();
+    const APInt &Lo = Clusters[i].Low->getValue();
     TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
     if (i != 0)
       TotalCases[i] += TotalCases[i - 1];
   }
 
-  unsigned MinDensity = JumpTableDensity;
-  if (DefaultMBB->getParent()->getFunction()->optForSize())
-    MinDensity = OptsizeJumpTableDensity;
-  if (N >= MinJumpTableSize
-      && isDense(Clusters, &TotalCases[0], 0, N - 1, MinDensity)) {
-    // Cheap case: the whole range might be suitable for jump table.
+  const unsigned MinDensity =
+    OptForSize ? OptsizeJumpTableDensity : JumpTableDensity;
+
+  // Cheap case: the whole range may be suitable for jump table.
+  unsigned JumpTableSize = (Clusters[N - 1].High->getValue() -
+                            Clusters[0].Low->getValue())
+                           .getLimitedValue(UINT_MAX - 1) + 1;
+  if (JumpTableSize <= MaxJumpTableSize &&
+      isDense(Clusters, TotalCases, 0, N - 1, MinDensity)) {
     CaseCluster JTCluster;
     if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
       Clusters[0] = JTCluster;
@@ -8368,14 +8604,23 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
   SmallVector<unsigned, 8> MinPartitions(N);
   // LastElement[i] is the last element of the partition starting at i.
   SmallVector<unsigned, 8> LastElement(N);
-  // NumTables[i]: nbr of >= MinJumpTableSize partitions from Clusters[i..N-1].
-  SmallVector<unsigned, 8> NumTables(N);
+  // PartitionsScore[i] is used to break ties when choosing between two
+  // partitionings resulting in the same number of partitions.
+  SmallVector<unsigned, 8> PartitionsScore(N);
+  // For PartitionsScore, a small number of comparisons is considered as good as
+  // a jump table and a single comparison is considered better than a jump
+  // table.
+  enum PartitionScores : unsigned {
+    NoTable = 0,
+    Table = 1,
+    FewCases = 1,
+    SingleCase = 2
+  };
 
   // Base case: There is only one way to partition Clusters[N-1].
   MinPartitions[N - 1] = 1;
   LastElement[N - 1] = N - 1;
-  assert(MinJumpTableSize > 1);
-  NumTables[N - 1] = 0;
+  PartitionsScore[N - 1] = PartitionScores::SingleCase;
 
   // Note: loop indexes are signed to avoid underflow.
   for (int64_t i = N - 2; i >= 0; i--) {
@@ -8383,23 +8628,34 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
     // Baseline: Put Clusters[i] into a partition on its own.
     MinPartitions[i] = MinPartitions[i + 1] + 1;
     LastElement[i] = i;
-    NumTables[i] = NumTables[i + 1];
+    PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
 
     // Search for a solution that results in fewer partitions.
     for (int64_t j = N - 1; j > i; j--) {
       // Try building a partition from Clusters[i..j].
-      if (isDense(Clusters, &TotalCases[0], i, j, MinDensity)) {
+      JumpTableSize = (Clusters[j].High->getValue() -
+                       Clusters[i].Low->getValue())
+                      .getLimitedValue(UINT_MAX - 1) + 1;
+      if (JumpTableSize <= MaxJumpTableSize &&
+          isDense(Clusters, TotalCases, i, j, MinDensity)) {
         unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
-        bool IsTable = j - i + 1 >= MinJumpTableSize;
-        unsigned Tables = IsTable + (j == N - 1 ? 0 : NumTables[j + 1]);
-
-        // If this j leads to fewer partitions, or same number of partitions
-        // with more lookup tables, it is a better partitioning.
+        unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+        int64_t NumEntries = j - i + 1;
+
+        if (NumEntries == 1)
+          Score += PartitionScores::SingleCase;
+        else if (NumEntries <= SmallNumberOfEntries)
+          Score += PartitionScores::FewCases;
+        else if (NumEntries >= MinJumpTableEntries)
+          Score += PartitionScores::Table;
+
+        // If this leads to fewer partitions, or to the same number of
+        // partitions with better score, it is a better partitioning.
         if (NumPartitions < MinPartitions[i] ||
-            (NumPartitions == MinPartitions[i] && Tables > NumTables[i])) {
+            (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
           MinPartitions[i] = NumPartitions;
           LastElement[i] = j;
-          NumTables[i] = Tables;
+          PartitionsScore[i] = Score;
         }
       }
     }
@@ -8414,7 +8670,7 @@ void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
     unsigned NumClusters = Last - First + 1;
 
     CaseCluster JTCluster;
-    if (NumClusters >= MinJumpTableSize &&
+    if (NumClusters >= MinJumpTableEntries &&
         buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
       Clusters[DstIndex++] = JTCluster;
     } else {
@@ -9107,7 +9363,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
     WorkList.pop_back();
     unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
 
-    if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None) {
+    if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
+        !DefaultMBB->getParent()->getFunction()->optForMinSize()) {
       // For optimized builds, lower large range as a balanced binary tree.
       splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
       continue;
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index b9888ae87639..abde8a89befc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -305,12 +305,13 @@ private:
   };
 
   /// Check whether a range of clusters is dense enough for a jump table.
-  bool isDense(const CaseClusterVector &Clusters, unsigned *TotalCases,
-               unsigned First, unsigned Last, unsigned MinDensity);
+  bool isDense(const CaseClusterVector &Clusters,
+               const SmallVectorImpl<unsigned> &TotalCases,
+               unsigned First, unsigned Last, unsigned MinDensity) const;
 
   /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
   /// decides it's not a good idea.
-  bool buildJumpTable(CaseClusterVector &Clusters, unsigned First,
+  bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
                       unsigned Last, const SwitchInst *SI,
                       MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
 
@@ -652,8 +653,6 @@ public:
     return CurInst ? CurInst->getDebugLoc() : DebugLoc();
   }
 
-  unsigned getSDNodeOrder() const { return SDNodeOrder; }
-
   void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
 
   void visit(const Instruction &I);
@@ -875,8 +874,8 @@ private:
   void visitAlloca(const AllocaInst &I);
   void visitLoad(const LoadInst &I);
   void visitStore(const StoreInst &I);
-  void visitMaskedLoad(const CallInst &I);
-  void visitMaskedStore(const CallInst &I);
+  void visitMaskedLoad(const CallInst &I, bool IsExpanding = false);
+  void visitMaskedStore(const CallInst &I, bool IsCompressing = false);
   void visitMaskedGather(const CallInst &I);
   void visitMaskedScatter(const CallInst &I);
   void visitAtomicCmpXchg(const AtomicCmpXchgInst &I);
@@ -885,6 +884,7 @@ private:
   void visitPHI(const PHINode &I);
   void visitCall(const CallInst &I);
   bool visitMemCmpCall(const CallInst &I);
+  bool visitMemPCpyCall(const CallInst &I);
   bool visitMemChrCall(const CallInst &I);
   bool visitStrCpyCall(const CallInst &I, bool isStpcpy);
   bool visitStrCmpCall(const CallInst &I);
@@ -941,6 +941,11 @@ private:
   /// Update the DAG and DAG builder with the relevant information after
   /// a new root node has been created which could be a tail call.
   void updateDAGForMaybeTailCall(SDValue MaybeTC);
+
+  /// Return the appropriate SDDbgValue based on N.
+  SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
+                          DIExpression *Expr, int64_t Offset, DebugLoc dl,
+                          unsigned DbgSDNodeOrder);
 };
 
 /// RegsForValue - This struct represents the registers (physical or virtual)
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 93ac6d6f30f7..340088a5fc96 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -100,6 +100,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::JumpTable:                  return "JumpTable";
   case ISD::GLOBAL_OFFSET_TABLE:        return "GLOBAL_OFFSET_TABLE";
   case ISD::RETURNADDR:                 return "RETURNADDR";
+  case ISD::ADDROFRETURNADDR:           return "ADDROFRETURNADDR";
   case ISD::FRAMEADDR:                  return "FRAMEADDR";
   case ISD::LOCAL_RECOVER:              return "LOCAL_RECOVER";
   case ISD::READ_REGISTER:              return "READ_REGISTER";
@@ -120,7 +121,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
     unsigned OpNo = getOpcode() == ISD::INTRINSIC_WO_CHAIN ? 0 : 1;
     unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
     if (IID < Intrinsic::num_intrinsics)
-      return Intrinsic::getName((Intrinsic::ID)IID);
+      return Intrinsic::getName((Intrinsic::ID)IID, None);
     else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
       return TII->getName(IID);
     llvm_unreachable("Invalid intrinsic ID");
@@ -424,9 +425,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
   } else if (const ConstantSDNode *CSDN = dyn_cast<ConstantSDNode>(this)) {
     OS << '<' << CSDN->getAPIntValue() << '>';
   } else if (const ConstantFPSDNode *CSDN = dyn_cast<ConstantFPSDNode>(this)) {
-    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle)
+    if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEsingle())
       OS << '<' << CSDN->getValueAPF().convertToFloat() << '>';
-    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble)
+    else if (&CSDN->getValueAPF().getSemantics()==&APFloat::IEEEdouble())
       OS << '<' << CSDN->getValueAPF().convertToDouble() << '>';
     else {
       OS << "<APFloat(";
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 1d61657194c5..6d717b44eb72 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -426,6 +426,10 @@ static void SplitCriticalSideEffectEdges(Function &Fn) {
 }
 
 bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
+  // If we already selected that function, we do not need to run SDISel.
+  if (mf.getProperties().hasProperty(
+          MachineFunctionProperties::Property::Selected))
+    return false;
   // Do some sanity-checking on the command-line options.
   assert((!EnableFastISelVerbose || TM.Options.EnableFastISel) &&
          "-fast-isel-verbose requires -fast-isel");
@@ -594,16 +598,16 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   // Determine if there are any calls in this machine function.
-  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
   for (const auto &MBB : *MF) {
-    if (MFI->hasCalls() && MF->hasInlineAsm())
+    if (MFI.hasCalls() && MF->hasInlineAsm())
       break;
 
     for (const auto &MI : MBB) {
       const MCInstrDesc &MCID = TII->get(MI.getOpcode());
       if ((MCID.isCall() && !MCID.isReturn()) ||
           MI.isStackAligningInlineAsm()) {
-        MFI->setHasCalls(true);
+        MFI.setHasCalls(true);
       }
       if (MI.isInlineAsm()) {
         MF->setHasInlineAsm(true);
@@ -645,7 +649,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
   }
 
   if (TLI->hasCopyImplyingStackAdjustment(MF))
-    MFI->setHasCopyImplyingStackAdjustment(true);
+    MFI.setHasCopyImplyingStackAdjustment(true);
 
   // Freeze the set of reserved registers now that MachineFrameInfo has been
   // set up. All the information required by getReservedRegs() should be
@@ -721,9 +725,8 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
 }
 
 void SelectionDAGISel::CodeGenAndEmitDAG() {
-  std::string GroupName;
-  if (TimePassesIsEnabled)
-    GroupName = "Instruction Selection and Scheduling";
+  StringRef GroupName = "sdag";
+  StringRef GroupDescription = "Instruction Selection and Scheduling";
   std::string BlockName;
   int BlockNumber = -1;
   (void)BlockNumber;
@@ -751,7 +754,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Run the DAG combiner in pre-legalize mode.
   {
-    NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("combine1", "DAG Combining 1", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel);
   }
 
@@ -765,7 +769,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   bool Changed;
   {
-    NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("legalize_types", "Type Legalization", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     Changed = CurDAG->LegalizeTypes();
   }
 
@@ -780,8 +785,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
     // Run the DAG combiner in post-type-legalize mode.
     {
-      NamedRegionTimer T("DAG Combining after legalize types", GroupName,
-                         TimePassesIsEnabled);
+      NamedRegionTimer T("combine_lt", "DAG Combining after legalize types",
+                         GroupName, GroupDescription, TimePassesIsEnabled);
       CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel);
     }
 
@@ -791,13 +796,15 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   }
 
   {
-    NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("legalize_vec", "Vector Legalization", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     Changed = CurDAG->LegalizeVectors();
   }
 
   if (Changed) {
     {
-      NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled);
+      NamedRegionTimer T("legalize_types2", "Type Legalization 2", GroupName,
+                         GroupDescription, TimePassesIsEnabled);
       CurDAG->LegalizeTypes();
     }
 
@@ -806,8 +813,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
     // Run the DAG combiner in post-type-legalize mode.
     {
-      NamedRegionTimer T("DAG Combining after legalize vectors", GroupName,
-                         TimePassesIsEnabled);
+      NamedRegionTimer T("combine_lv", "DAG Combining after legalize vectors",
+                         GroupName, GroupDescription, TimePassesIsEnabled);
       CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel);
     }
 
@@ -819,7 +826,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
     CurDAG->viewGraph("legalize input for " + BlockName);
 
   {
-    NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("legalize", "DAG Legalization", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     CurDAG->Legalize();
   }
 
@@ -831,7 +839,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Run the DAG combiner in post-legalize mode.
   {
-    NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("combine2", "DAG Combining 2", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel);
   }
 
@@ -847,7 +856,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Third, instruction select all of the operations to machine code, adding the
   // code to the MachineBasicBlock.
   {
-    NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("isel", "Instruction Selection", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     DoInstructionSelection();
   }
 
@@ -860,8 +870,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // Schedule machine code.
   ScheduleDAGSDNodes *Scheduler = CreateScheduler();
   {
-    NamedRegionTimer T("Instruction Scheduling", GroupName,
-                       TimePassesIsEnabled);
+    NamedRegionTimer T("sched", "Instruction Scheduling", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     Scheduler->Run(CurDAG, FuncInfo->MBB);
   }
 
@@ -872,7 +882,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
   // inserted into.
   MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
   {
-    NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
+    NamedRegionTimer T("emit", "Instruction Creation", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
 
     // FuncInfo->InsertPt is passed by reference and set to the end of the
     // scheduled instructions.
@@ -886,8 +897,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
 
   // Free the scheduler state.
   {
-    NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
-                       TimePassesIsEnabled);
+    NamedRegionTimer T("cleanup", "Instruction Scheduling Cleanup", GroupName,
+                       GroupDescription, TimePassesIsEnabled);
     delete Scheduler;
   }
 
@@ -1003,10 +1014,10 @@ bool SelectionDAGISel::PrepareEHLandingPad() {
 
   // Add a label to mark the beginning of the landing pad.  Deletion of the
   // landing pad can thus be detected via the MachineModuleInfo.
-  MCSymbol *Label = MF->getMMI().addLandingPad(MBB);
+  MCSymbol *Label = MF->addLandingPad(MBB);
 
   // Assign the call site to the landing pad's begin label.
-  MF->getMMI().setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
+  MF->setCallSiteLandingPad(Label, SDB->LPadToCallSiteMap[MBB]);
 
   const MCInstrDesc &II = TII->get(TargetOpcode::EH_LABEL);
   BuildMI(*MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II)
@@ -1153,14 +1164,22 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
     return;
 
   FuncInfo->SwiftErrorVals.clear();
-  FuncInfo->SwiftErrorMap.clear();
-  FuncInfo->SwiftErrorWorklist.clear();
+  FuncInfo->SwiftErrorVRegDefMap.clear();
+  FuncInfo->SwiftErrorVRegUpwardsUse.clear();
+  FuncInfo->SwiftErrorArg = nullptr;
 
   // Check if function has a swifterror argument.
+  bool HaveSeenSwiftErrorArg = false;
   for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
        AI != AE; ++AI)
-    if (AI->hasSwiftErrorAttr())
+    if (AI->hasSwiftErrorAttr()) {
+      assert(!HaveSeenSwiftErrorArg &&
+             "Must have only one swifterror parameter");
+      (void)HaveSeenSwiftErrorArg; // silence warning.
+      HaveSeenSwiftErrorArg = true;
+      FuncInfo->SwiftErrorArg = &*AI;
       FuncInfo->SwiftErrorVals.push_back(&*AI);
+    }
 
   for (const auto &LLVMBB : Fn)
     for (const auto &Inst : LLVMBB) {
@@ -1170,95 +1189,152 @@ static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
     }
 }
 
-/// For each basic block, merge incoming swifterror values or simply propagate
-/// them. The merged results will be saved in SwiftErrorMap. For predecessors
-/// that are not yet visited, we create virtual registers to hold the swifterror
-/// values and save them in SwiftErrorWorklist.
-static void mergeIncomingSwiftErrors(FunctionLoweringInfo *FuncInfo,
-                            const TargetLowering *TLI,
-                            const TargetInstrInfo *TII,
-                            const BasicBlock *LLVMBB,
-                            SelectionDAGBuilder *SDB) {
+static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
+                                                const TargetLowering *TLI,
+                                                const TargetInstrInfo *TII,
+                                                const BasicBlock *LLVMBB,
+                                                SelectionDAGBuilder *SDB) {
   if (!TLI->supportSwiftError())
     return;
 
-  // We should only do this when we have swifterror parameter or swifterror
+  // We only need to do this when we have swifterror parameter or swifterror
   // alloc.
   if (FuncInfo->SwiftErrorVals.empty())
     return;
 
-  // At beginning of a basic block, insert PHI nodes or get the virtual
-  // register from the only predecessor, and update SwiftErrorMap; if one
-  // of the predecessors is not visited, update SwiftErrorWorklist.
-  // At end of a basic block, if a block is in SwiftErrorWorklist, insert copy
-  // to sync up the virtual register assignment.
-
-  // Always create a virtual register for each swifterror value in entry block.
-  auto &DL = SDB->DAG.getDataLayout();
-  const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
   if (pred_begin(LLVMBB) == pred_end(LLVMBB)) {
-    for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
+    auto &DL = FuncInfo->MF->getDataLayout();
+    auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+    for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+      // We will always generate a copy from the argument. It is always used at
+      // least by the 'return' of the swifterror.
+      if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
+        continue;
       unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
       // Assign Undef to Vreg. We construct MI directly to make sure it works
       // with FastISel.
-      BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(),
-          TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
-      FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
+      BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
+              SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
+              VReg);
+      FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
     }
-    return;
   }
+}
 
-  if (auto *UniquePred = LLVMBB->getUniquePredecessor()) {
-    auto *UniquePredMBB = FuncInfo->MBBMap[UniquePred];
-    if (!FuncInfo->SwiftErrorMap.count(UniquePredMBB)) {
-      // Update SwiftErrorWorklist with a new virtual register.
-      for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
-        unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-        FuncInfo->SwiftErrorWorklist[UniquePredMBB].push_back(VReg);
-        // Propagate the information from the single predecessor.
-        FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
-      }
-      return;
-    }
-    // Propagate the information from the single predecessor.
-    FuncInfo->SwiftErrorMap[FuncInfo->MBB] =
-      FuncInfo->SwiftErrorMap[UniquePredMBB];
+/// Propagate swifterror values through the machine function CFG.
+static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
+  auto *TLI = FuncInfo->TLI;
+  if (!TLI->supportSwiftError())
     return;
-  }
 
-  // For the case of multiple predecessors, update SwiftErrorWorklist.
-  // Handle the case where we have two or more predecessors being the same.
-  for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
-       PI != PE; ++PI) {
-    auto *PredMBB = FuncInfo->MBBMap[*PI];
-    if (!FuncInfo->SwiftErrorMap.count(PredMBB) &&
-        !FuncInfo->SwiftErrorWorklist.count(PredMBB)) {
-      for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
-        unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-        // When we actually visit the basic block PredMBB, we will materialize
-        // the virtual register assignment in copySwiftErrorsToFinalVRegs.
-        FuncInfo->SwiftErrorWorklist[PredMBB].push_back(VReg);
+  // We only need to do this when we have swifterror parameter or swifterror
+  // alloc.
+  if (FuncInfo->SwiftErrorVals.empty())
+    return;
+
+  // For each machine basic block in reverse post order.
+  ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
+  for (ReversePostOrderTraversal<MachineFunction *>::rpo_iterator
+           It = RPOT.begin(),
+           E = RPOT.end();
+       It != E; ++It) {
+    MachineBasicBlock *MBB = *It;
+
+    // For each swifterror value in the function.
+    for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
+      auto Key = std::make_pair(MBB, SwiftErrorVal);
+      auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
+      auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
+      bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
+      unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
+      bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
+      assert(!(UpwardsUse && !DownwardDef) &&
+             "We can't have an upwards use but no downwards def");
+
+      // If there is no upwards exposed use and an entry for the swifterror in
+      // the def map for this value we don't need to do anything: We already
+      // have a downward def for this basic block.
+      if (!UpwardsUse && DownwardDef)
+        continue;
+
+      // Otherwise we either have an upwards exposed use vreg that we need to
+      // materialize or need to forward the downward def from predecessors.
+
+      // Check whether we have a single vreg def from all predecessors.
+      // Otherwise we need a phi.
+      SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
+      SmallSet<const MachineBasicBlock*, 8> Visited;
+      for (auto *Pred : MBB->predecessors()) {
+        if (!Visited.insert(Pred).second)
+          continue;
+        VRegs.push_back(std::make_pair(
+            Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
+        if (Pred != MBB)
+          continue;
+        // We have a self-edge.
+        // If there was no upwards use in this basic block there is now one: the
+        // phi needs to use it self.
+        if (!UpwardsUse) {
+          UpwardsUse = true;
+          UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
+          assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
+          UUseVReg = UUseIt->second;
+        }
+      }
+
+      // We need a phi node if we have more than one predecessor with different
+      // downward defs.
+      bool needPHI =
+          VRegs.size() >= 1 &&
+          std::find_if(
+              VRegs.begin(), VRegs.end(),
+              [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
+                  -> bool { return V.second != VRegs[0].second; }) !=
+              VRegs.end();
+
+      // If there is no upwards exposed used and we don't need a phi just
+      // forward the swifterror vreg from the predecessor(s).
+      if (!UpwardsUse && !needPHI) {
+        assert(!VRegs.empty() &&
+               "No predecessors? The entry block should bail out earlier");
+        // Just forward the swifterror vreg from the predecessor(s).
+        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
+        continue;
+      }
+
+      auto DLoc = isa<Instruction>(SwiftErrorVal)
+                      ? dyn_cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+                      : DebugLoc();
+      const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
+
+      // If we don't need a phi create a copy to the upward exposed vreg.
+      if (!needPHI) {
+        assert(UpwardsUse);
+        unsigned DestReg = UUseVReg;
+        BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+                DestReg)
+            .addReg(VRegs[0].second);
+        continue;
+      }
+
+      // We need a phi: if there is an upwards exposed use we already have a
+      // destination virtual register number otherwise we generate a new one.
+      auto &DL = FuncInfo->MF->getDataLayout();
+      auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+      unsigned PHIVReg =
+          UpwardsUse ? UUseVReg
+                     : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
+      MachineInstrBuilder SwiftErrorPHI =
+          BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+                  TII->get(TargetOpcode::PHI), PHIVReg);
+      for (auto BBRegPair : VRegs) {
+        SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
       }
-    }
-  }
 
-  // For the case of multiple predecessors, create a virtual register for
-  // each swifterror value and generate Phi node.
-  for (unsigned I = 0, E = FuncInfo->SwiftErrorVals.size(); I < E; I++) {
-    unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
-    FuncInfo->SwiftErrorMap[FuncInfo->MBB].push_back(VReg);
-
-    MachineInstrBuilder SwiftErrorPHI = BuildMI(*FuncInfo->MBB,
-        FuncInfo->MBB->begin(), SDB->getCurDebugLoc(),
-        TII->get(TargetOpcode::PHI), VReg);
-    for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
-         PI != PE; ++PI) {
-      auto *PredMBB = FuncInfo->MBBMap[*PI];
-      unsigned SwiftErrorReg = FuncInfo->SwiftErrorMap.count(PredMBB) ?
-        FuncInfo->SwiftErrorMap[PredMBB][I] :
-        FuncInfo->SwiftErrorWorklist[PredMBB][I];
-      SwiftErrorPHI.addReg(SwiftErrorReg)
-                   .addMBB(PredMBB);
+      // We did not have a definition in this block before: store the phi's vreg
+      // as this block downward exposed def.
+      if (!UpwardsUse)
+        FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
     }
   }
 }
@@ -1309,7 +1385,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     if (!FuncInfo->MBB)
       continue; // Some blocks like catchpads have no code or MBB.
     FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
-    mergeIncomingSwiftErrors(FuncInfo, TLI, TII, LLVMBB, SDB);
+    createSwiftErrorEntriesInEntryBlock(FuncInfo, TLI, TII, LLVMBB, SDB);
 
     // Setup an EH landing-pad block.
     FuncInfo->ExceptionPointerVirtReg = 0;
@@ -1486,6 +1562,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
     FuncInfo->PHINodesToUpdate.clear();
   }
 
+  propagateSwiftErrorVRegs(FuncInfo);
+
   delete FastIS;
   SDB->clearDanglingDebugInfo();
   SDB->SPDescriptor.resetPerFunctionState();
@@ -3388,7 +3466,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
         SelectionDAG::DAGNodeDeletedListener NDL(*CurDAG, [&](SDNode *N,
                                                               SDNode *E) {
           auto &Chain = ChainNodesMatched;
-          assert((!E || llvm::find(Chain, N) == Chain.end()) &&
+          assert((!E || !is_contained(Chain, N)) &&
                  "Chain node replaced during MorphNode");
           Chain.erase(std::remove(Chain.begin(), Chain.end(), N), Chain.end());
         });
@@ -3487,7 +3565,7 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
                 NodeToMatch->getValueType(i) == MVT::iPTR ||
                 Res.getValueType() == MVT::iPTR ||
                 NodeToMatch->getValueType(i).getSizeInBits() ==
-                    Res.getValueType().getSizeInBits()) &&
+                    Res.getValueSizeInBits()) &&
                "invalid replacement");
         CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
       }
@@ -3579,7 +3657,7 @@ void SelectionDAGISel::CannotYetSelect(SDNode *N) {
     unsigned iid =
       cast<ConstantSDNode>(N->getOperand(HasInputChain))->getZExtValue();
     if (iid < Intrinsic::num_intrinsics)
-      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid);
+      Msg << "intrinsic %" << Intrinsic::getName((Intrinsic::ID)iid, None);
     else if (const TargetIntrinsicInfo *TII = TM.getIntrinsicInfo())
       Msg << "target intrinsic %" << TII->getName(iid);
     else
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 90aaba247265..d27e2455978d 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -55,7 +55,8 @@ void StatepointLoweringState::startNewStatepoint(SelectionDAGBuilder &Builder) {
   NextSlotToAllocate = 0;
   // Need to resize this on each safepoint - we need the two to stay in sync and
   // the clear patterns of a SelectionDAGBuilder have no relation to
-  // FunctionLoweringInfo.  SmallBitVector::reset initializes all bits to false.
+  // FunctionLoweringInfo.  Also need to ensure used bits get cleared.
+  AllocatedStackSlots.clear();
   AllocatedStackSlots.resize(Builder.FuncInfo.StatepointStackSlots.size());
 }
 
@@ -70,7 +71,7 @@ SDValue
 StatepointLoweringState::allocateStackSlot(EVT ValueType,
                                            SelectionDAGBuilder &Builder) {
   NumSlotsAllocatedForStatepoints++;
-  auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
 
   unsigned SpillSize = ValueType.getSizeInBits() / 8;
   assert((SpillSize * 8) == ValueType.getSizeInBits() && "Size not in bytes?");
@@ -82,16 +83,16 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
   const size_t NumSlots = AllocatedStackSlots.size();
   assert(NextSlotToAllocate <= NumSlots && "Broken invariant");
 
-  // The stack slots in StatepointStackSlots beyond the first NumSlots were
-  // added in this instance of StatepointLoweringState, and cannot be re-used.
-  assert(NumSlots <= Builder.FuncInfo.StatepointStackSlots.size() &&
+  assert(AllocatedStackSlots.size() ==
+         Builder.FuncInfo.StatepointStackSlots.size() &&
          "Broken invariant");
 
   for (; NextSlotToAllocate < NumSlots; NextSlotToAllocate++) {
     if (!AllocatedStackSlots.test(NextSlotToAllocate)) {
       const int FI = Builder.FuncInfo.StatepointStackSlots[NextSlotToAllocate];
-      if (MFI->getObjectSize(FI) == SpillSize) {
+      if (MFI.getObjectSize(FI) == SpillSize) {
         AllocatedStackSlots.set(NextSlotToAllocate);
+        // TODO: Is ValueType the right thing to use here?
         return Builder.DAG.getFrameIndex(FI, ValueType);
       }
     }
@@ -101,9 +102,13 @@ StatepointLoweringState::allocateStackSlot(EVT ValueType,
 
   SDValue SpillSlot = Builder.DAG.CreateStackTemporary(ValueType);
   const unsigned FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
-  MFI->markAsStatepointSpillSlotObjectIndex(FI);
+  MFI.markAsStatepointSpillSlotObjectIndex(FI);
 
   Builder.FuncInfo.StatepointStackSlots.push_back(FI);
+  AllocatedStackSlots.resize(AllocatedStackSlots.size()+1, true);
+  assert(AllocatedStackSlots.size() ==
+         Builder.FuncInfo.StatepointStackSlots.size() &&
+         "Broken invariant");
 
   StatepointMaxSlotsRequired = std::max<unsigned long>(
       StatepointMaxSlotsRequired, Builder.FuncInfo.StatepointStackSlots.size());
@@ -350,9 +355,8 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
     // vary since we spill vectors of pointers too).  At some point we
     // can consider allowing spills of smaller values to larger slots
     // (i.e. change the '==' in the assert below to a '>=').
-    auto *MFI = Builder.DAG.getMachineFunction().getFrameInfo();
-    assert((MFI->getObjectSize(Index) * 8) ==
-               Incoming.getValueType().getSizeInBits() &&
+    MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
+    assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
            "Bad spill:  stack slot does not match!");
 #endif
 
@@ -370,7 +374,7 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
 /// Lower a single value incoming to a statepoint node.  This value can be
 /// either a deopt value or a gc value, the handling is the same.  We special
 /// case constants and allocas, then fall back to spilling if required.
-static void lowerIncomingStatepointValue(SDValue Incoming,
+static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
                                          SmallVectorImpl<SDValue> &Ops,
                                          SelectionDAGBuilder &Builder) {
   SDValue Chain = Builder.getRoot();
@@ -389,6 +393,14 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
     // relocate the address of the alloca itself?)
     Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
                                                   Incoming.getValueType()));
+  } else if (LiveInOnly) {
+    // If this value is live in (not live-on-return, or live-through), we can
+    // treat it the same way patchpoint treats it's "live in" values.  We'll 
+    // end up folding some of these into stack references, but they'll be 
+    // handled by the register allocator.  Note that we do not have the notion
+    // of a late use so these values might be placed in registers which are 
+    // clobbered by the call.  This is fine for live-in.
+    Ops.push_back(Incoming);
   } else {
     // Otherwise, locate a spill slot and explicitly spill it so it
     // can be found by the runtime later.  We currently do not support
@@ -439,19 +451,38 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
                "non gc managed derived pointer found in statepoint");
       }
     }
+    assert(SI.Bases.size() == SI.Ptrs.size() && "Pointer without base!");
   } else {
     assert(SI.Bases.empty() && "No gc specified, so cannot relocate pointers!");
     assert(SI.Ptrs.empty() && "No gc specified, so cannot relocate pointers!");
   }
 #endif
 
+  // Figure out what lowering strategy we're going to use for each part
+  // Note: Is is conservatively correct to lower both "live-in" and "live-out"
+  // as "live-through". A "live-through" variable is one which is "live-in",
+  // "live-out", and live throughout the lifetime of the call (i.e. we can find
+  // it from any PC within the transitive callee of the statepoint).  In
+  // particular, if the callee spills callee preserved registers we may not
+  // be able to find a value placed in that register during the call.  This is
+  // fine for live-out, but not for live-through.  If we were willing to make
+  // assumptions about the code generator producing the callee, we could
+  // potentially allow live-through values in callee saved registers.
+  const bool LiveInDeopt =
+    SI.StatepointFlags & (uint64_t)StatepointFlags::DeoptLiveIn;
+
+  auto isGCValue =[&](const Value *V) {
+    return is_contained(SI.Ptrs, V) || is_contained(SI.Bases, V);
+  };
+  
   // Before we actually start lowering (and allocating spill slots for values),
   // reserve any stack slots which we judge to be profitable to reuse for a
   // particular value.  This is purely an optimization over the code below and
   // doesn't change semantics at all.  It is important for performance that we
   // reserve slots for both deopt and gc values before lowering either.
   for (const Value *V : SI.DeoptState) {
-    reservePreviousStackSlotForValue(V, Builder);
+    if (!LiveInDeopt || isGCValue(V))
+      reservePreviousStackSlotForValue(V, Builder);
   }
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
     reservePreviousStackSlotForValue(SI.Bases[i], Builder);
@@ -468,7 +499,8 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // what type of values are contained within.
   for (const Value *V : SI.DeoptState) {
     SDValue Incoming = Builder.getValue(V);
-    lowerIncomingStatepointValue(Incoming, Ops, Builder);
+    const bool LiveInValue = LiveInDeopt && !isGCValue(V);
+    lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
   }
 
   // Finally, go ahead and lower all the gc arguments.  There's no prefixed
@@ -478,10 +510,12 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
   // (base[0], ptr[0], base[1], ptr[1], ...)
   for (unsigned i = 0; i < SI.Bases.size(); ++i) {
     const Value *Base = SI.Bases[i];
-    lowerIncomingStatepointValue(Builder.getValue(Base), Ops, Builder);
+    lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
+                                 Ops, Builder);
 
     const Value *Ptr = SI.Ptrs[i];
-    lowerIncomingStatepointValue(Builder.getValue(Ptr), Ops, Builder);
+    lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
+                                 Ops, Builder);
   }
 
   // If there are any explicit spill slots passed to the statepoint, record
@@ -889,7 +923,7 @@ void SelectionDAGBuilder::visitGCResult(const GCResultInst &CI) {
 void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
 #ifndef NDEBUG
   // Consistency check
-  // We skip this check for relocates not in the same basic block as thier
+  // We skip this check for relocates not in the same basic block as their
   // statepoint. It would be too expensive to preserve validation info through
   // different basic blocks.
   if (Relocate.getStatepoint()->getParent() == Relocate.getParent())
diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 806646fbc676..591a37d600cc 100644
--- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -216,7 +216,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
   case ISD::SETUEQ:
     LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 :
           (VT == MVT::f64) ? RTLIB::UO_F64 :
-          (VT == MVT::f128) ? RTLIB::UO_F64 : RTLIB::UO_PPCF128;
+          (VT == MVT::f128) ? RTLIB::UO_F128 : RTLIB::UO_PPCF128;
     LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 :
           (VT == MVT::f64) ? RTLIB::OEQ_F64 :
           (VT == MVT::f128) ? RTLIB::OEQ_F128 : RTLIB::OEQ_PPCF128;
@@ -418,6 +418,58 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op,
   return false;
 }
 
+bool
+TargetLowering::TargetLoweringOpt::SimplifyDemandedBits(SDNode *User,
+                                                        unsigned OpIdx,
+                                                        const APInt &Demanded,
+                                                        DAGCombinerInfo &DCI) {
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Op = User->getOperand(OpIdx);
+  APInt KnownZero, KnownOne;
+
+  if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne,
+                                *this, 0, true))
+    return false;
+
+
+  // Old will not always be the same as Op.  For example:
+  //
+  // Demanded = 0xffffff
+  // Op = i64 truncate (i32 and x, 0xffffff)
+  // In this case simplify demand bits will want to replace the 'and' node
+  // with the value 'x', which will give us:
+  // Old = i32 and x, 0xffffff
+  // New = x
+  if (Old.hasOneUse()) {
+    // For the one use case, we just commit the change.
+    DCI.CommitTargetLoweringOpt(*this);
+    return true;
+  }
+
+  // If Old has more than one use then it must be Op, because the
+  // AssumeSingleUse flag is not propogated to recursive calls of
+  // SimplifyDemanded bits, so the only node with multiple use that
+  // it will attempt to combine will be opt.
+  assert(Old == Op);
+
+  SmallVector <SDValue, 4> NewOps;
+  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
+    if (i == OpIdx) {
+      NewOps.push_back(New);
+      continue;
+    }
+    NewOps.push_back(User->getOperand(i));
+  }
+  DAG.UpdateNodeOperands(User, NewOps);
+  // Op has less users now, so we may be able to perform additional combines
+  // with it.
+  DCI.AddToWorklist(Op.getNode());
+  // User's operands have been updated, so we may be able to do new combines
+  // with it.
+  DCI.AddToWorklist(User);
+  return true;
+}
+
 /// Look at Op. At this point, we know that only the DemandedMask bits of the
 /// result of Op are ever used downstream. If we can use this information to
 /// simplify Op, create a new simplified DAG node and return true, returning the
@@ -430,9 +482,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                           APInt &KnownZero,
                                           APInt &KnownOne,
                                           TargetLoweringOpt &TLO,
-                                          unsigned Depth) const {
+                                          unsigned Depth,
+                                          bool AssumeSingleUse) const {
   unsigned BitWidth = DemandedMask.getBitWidth();
-  assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
+  assert(Op.getScalarValueSizeInBits() == BitWidth &&
          "Mask size mismatches value type size!");
   APInt NewMask = DemandedMask;
   SDLoc dl(Op);
@@ -442,7 +495,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   KnownZero = KnownOne = APInt(BitWidth, 0);
 
   // Other users may use these bits.
-  if (!Op.getNode()->hasOneUse()) {
+  if (!Op.getNode()->hasOneUse() && !AssumeSingleUse) {
     if (Depth != 0) {
       // If not at the root, Just compute the KnownZero/KnownOne bits to
       // simplify things downstream.
@@ -468,22 +521,63 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     KnownOne = cast<ConstantSDNode>(Op)->getAPIntValue();
     KnownZero = ~KnownOne;
     return false;   // Don't fall through, will infinitely loop.
+  case ISD::BUILD_VECTOR:
+    // Collect the known bits that are shared by every constant vector element.
+    KnownZero = KnownOne = APInt::getAllOnesValue(BitWidth);
+    for (SDValue SrcOp : Op->ops()) {
+      if (!isa<ConstantSDNode>(SrcOp)) {
+        // We can only handle all constant values - bail out with no known bits.
+        KnownZero = KnownOne = APInt(BitWidth, 0);
+        return false;
+      }
+      KnownOne2 = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
+      KnownZero2 = ~KnownOne2;
+
+      // BUILD_VECTOR can implicitly truncate sources, we must handle this.
+      if (KnownOne2.getBitWidth() != BitWidth) {
+        assert(KnownOne2.getBitWidth() > BitWidth &&
+               KnownZero2.getBitWidth() > BitWidth &&
+               "Expected BUILD_VECTOR implicit truncation");
+        KnownOne2 = KnownOne2.trunc(BitWidth);
+        KnownZero2 = KnownZero2.trunc(BitWidth);
+      }
+
+      // Known bits are the values that are shared by every element.
+      // TODO: support per-element known bits.
+      KnownOne &= KnownOne2;
+      KnownZero &= KnownZero2;
+    }
+    return false;   // Don't fall through, will infinitely loop.
   case ISD::AND:
     // If the RHS is a constant, check to see if the LHS would be zero without
     // using the bits from the RHS.  Below, we use knowledge about the RHS to
     // simplify the LHS, here we're using information from the LHS to simplify
     // the RHS.
     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+      SDValue Op0 = Op.getOperand(0);
       APInt LHSZero, LHSOne;
       // Do not increment Depth here; that can cause an infinite loop.
-      TLO.DAG.computeKnownBits(Op.getOperand(0), LHSZero, LHSOne, Depth);
+      TLO.DAG.computeKnownBits(Op0, LHSZero, LHSOne, Depth);
       // If the LHS already has zeros where RHSC does, this and is dead.
       if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
-        return TLO.CombineTo(Op, Op.getOperand(0));
+        return TLO.CombineTo(Op, Op0);
+
       // If any of the set bits in the RHS are known zero on the LHS, shrink
       // the constant.
       if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
         return true;
+
+      // Bitwise-not (xor X, -1) is a special case: we don't usually shrink its
+      // constant, but if this 'and' is only clearing bits that were just set by
+      // the xor, then this 'and' can be eliminated by shrinking the mask of
+      // the xor. For example, for a 32-bit X:
+      // and (xor (srl X, 31), -1), 1 --> xor (srl X, 31), 1
+      if (isBitwiseNot(Op0) && Op0.hasOneUse() &&
+          LHSOne == ~RHSC->getAPIntValue()) {
+        SDValue Xor = TLO.DAG.getNode(ISD::XOR, dl, Op.getValueType(),
+                                      Op0.getOperand(0), Op.getOperand(1));
+        return TLO.CombineTo(Op, Xor);
+      }
     }
 
     if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
@@ -599,10 +693,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the RHS is a constant, see if we can simplify it.
     // for XOR, we prefer to force bits to 1 if they will make a -1.
-    // if we can't force bits, try to shrink constant
+    // If we can't force bits, try to shrink the constant.
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
       APInt Expanded = C->getAPIntValue() | (~NewMask);
-      // if we can expand it to have all bits set, do it
+      // If we can expand it to have all bits set, do it.
       if (Expanded.isAllOnesValue()) {
         if (Expanded != C->getAPIntValue()) {
           EVT VT = Op.getValueType();
@@ -610,7 +704,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
                                         TLO.DAG.getConstant(Expanded, dl, VT));
           return TLO.CombineTo(Op, New);
         }
-        // if it already has all the bits set, nothing to change
+        // If it already has all the bits set, nothing to change
         // but don't shrink either!
       } else if (TLO.ShrinkDemandedConstant(Op, NewMask)) {
         return true;
@@ -823,7 +917,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
       // demand the input sign bit.
       APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
       if (HighBits.intersects(NewMask))
-        InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
+        InDemandedMask |= APInt::getSignBit(VT.getScalarSizeInBits());
 
       if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
                                KnownZero, KnownOne, TLO, Depth+1))
@@ -866,9 +960,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt MsbMask = APInt::getHighBitsSet(BitWidth, 1);
     // If we only care about the highest bit, don't bother shifting right.
     if (MsbMask == NewMask) {
-      unsigned ShAmt = ExVT.getScalarType().getSizeInBits();
+      unsigned ShAmt = ExVT.getScalarSizeInBits();
       SDValue InOp = Op.getOperand(0);
-      unsigned VTBits = Op->getValueType(0).getScalarType().getSizeInBits();
+      unsigned VTBits = Op->getValueType(0).getScalarSizeInBits();
       bool AlreadySignExtended =
         TLO.DAG.ComputeNumSignBits(InOp) >= VTBits-ShAmt+1;
       // However if the input is already sign extended we expect the sign
@@ -892,17 +986,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     // present in the input.
     APInt NewBits =
       APInt::getHighBitsSet(BitWidth,
-                            BitWidth - ExVT.getScalarType().getSizeInBits());
+                            BitWidth - ExVT.getScalarSizeInBits());
 
     // If none of the extended bits are demanded, eliminate the sextinreg.
     if ((NewBits & NewMask) == 0)
       return TLO.CombineTo(Op, Op.getOperand(0));
 
     APInt InSignBit =
-      APInt::getSignBit(ExVT.getScalarType().getSizeInBits()).zext(BitWidth);
+      APInt::getSignBit(ExVT.getScalarSizeInBits()).zext(BitWidth);
     APInt InputDemandedBits =
       APInt::getLowBitsSet(BitWidth,
-                           ExVT.getScalarType().getSizeInBits()) &
+                           ExVT.getScalarSizeInBits()) &
       NewMask;
 
     // Since the sign extended bits are demanded, we know that the sign
@@ -919,8 +1013,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
 
     // If the input sign bit is known zero, convert this into a zero extension.
     if (KnownZero.intersects(InSignBit))
-      return TLO.CombineTo(Op,
-                          TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,ExVT));
+      return TLO.CombineTo(Op, TLO.DAG.getZeroExtendInReg(
+                                   Op.getOperand(0), dl, ExVT.getScalarType()));
 
     if (KnownOne.intersects(InSignBit)) {    // Input sign bit known set
       KnownOne |= NewBits;
@@ -957,8 +1051,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ZERO_EXTEND: {
-    unsigned OperandBitWidth =
-      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt InMask = NewMask.trunc(OperandBitWidth);
 
     // If none of the top bits are demanded, convert this into an any_extend.
@@ -980,7 +1073,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   }
   case ISD::SIGN_EXTEND: {
     EVT InVT = Op.getOperand(0).getValueType();
-    unsigned InBits = InVT.getScalarType().getSizeInBits();
+    unsigned InBits = InVT.getScalarSizeInBits();
     APInt InMask    = APInt::getLowBitsSet(BitWidth, InBits);
     APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
     APInt NewBits   = ~InMask & NewMask;
@@ -1020,8 +1113,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     break;
   }
   case ISD::ANY_EXTEND: {
-    unsigned OperandBitWidth =
-      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt InMask = NewMask.trunc(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), InMask,
                              KnownZero, KnownOne, TLO, Depth+1))
@@ -1034,8 +1126,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
   case ISD::TRUNCATE: {
     // Simplify the input, using demanded bit information, and compute the known
     // zero/one bits live out.
-    unsigned OperandBitWidth =
-      Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
+    unsigned OperandBitWidth = Op.getOperand(0).getScalarValueSizeInBits();
     APInt TruncMask = NewMask.zext(OperandBitWidth);
     if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
                              KnownZero, KnownOne, TLO, Depth+1))
@@ -1109,7 +1200,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     if (!TLO.LegalOperations() &&
         !Op.getValueType().isVector() &&
         !Op.getOperand(0).getValueType().isVector() &&
-        NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) &&
+        NewMask == APInt::getSignBit(Op.getValueSizeInBits()) &&
         Op.getOperand(0).getValueType().isFloatingPoint()) {
       bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType());
       bool i32Legal  = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32);
@@ -1120,10 +1211,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
         // Make a FGETSIGN + SHL to move the sign bit into the appropriate
         // place.  We expect the SHL to be eliminated by other optimizations.
         SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, dl, Ty, Op.getOperand(0));
-        unsigned OpVTSizeInBits = Op.getValueType().getSizeInBits();
+        unsigned OpVTSizeInBits = Op.getValueSizeInBits();
         if (!OpVTLegal && OpVTSizeInBits > 32)
           Sign = TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), Sign);
-        unsigned ShVal = Op.getValueType().getSizeInBits()-1;
+        unsigned ShVal = Op.getValueSizeInBits() - 1;
         SDValue ShAmt = TLO.DAG.getConstant(ShVal, dl, Op.getValueType());
         return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl,
                                                  Op.getValueType(),
@@ -1139,16 +1230,27 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
     APInt LoMask = APInt::getLowBitsSet(BitWidth,
                                         BitWidth - NewMask.countLeadingZeros());
     if (SimplifyDemandedBits(Op.getOperand(0), LoMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
-      return true;
-    if (SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
-                             KnownOne2, TLO, Depth+1))
-      return true;
-    // See if the operation should be performed at a smaller bit width.
-    if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl))
+                             KnownOne2, TLO, Depth+1) ||
+        SimplifyDemandedBits(Op.getOperand(1), LoMask, KnownZero2,
+                             KnownOne2, TLO, Depth+1) ||
+        // See if the operation should be performed at a smaller bit width.
+        TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) {
+      const SDNodeFlags *Flags = Op.getNode()->getFlags();
+      if (Flags->hasNoSignedWrap() || Flags->hasNoUnsignedWrap()) {
+        // Disable the nsw and nuw flags. We can no longer guarantee that we
+        // won't wrap after simplification.
+        SDNodeFlags NewFlags = *Flags;
+        NewFlags.setNoSignedWrap(false);
+        NewFlags.setNoUnsignedWrap(false);
+        SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
+                                        Op.getOperand(0), Op.getOperand(1),
+                                        &NewFlags);
+        return TLO.CombineTo(Op, NewOp);
+      }
       return true;
+    }
+    LLVM_FALLTHROUGH;
   }
-  // FALL THROUGH
   default:
     // Just use computeKnownBits to compute output bits.
     TLO.DAG.computeKnownBits(Op, KnownZero, KnownOne, Depth);
@@ -1214,11 +1316,11 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const {
     if (!BV)
       return false;
 
-    BitVector UndefElements;
-    CN = BV->getConstantSplatNode(&UndefElements);
-    // Only interested in constant splats, and we don't try to handle undef
-    // elements in identifying boolean constants.
-    if (!CN || UndefElements.none())
+    // Only interested in constant splats, we don't care about undef
+    // elements in identifying boolean constants and getConstantSplatNode
+    // returns NULL if all ops are undef;
+    CN = BV->getConstantSplatNode();
+    if (!CN)
       return false;
   }
 
@@ -1254,11 +1356,11 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const {
     if (!BV)
       return false;
 
-    BitVector UndefElements;
-    CN = BV->getConstantSplatNode(&UndefElements);
-    // Only interested in constant splats, and we don't try to handle undef
-    // elements in identifying boolean constants.
-    if (!CN || UndefElements.none())
+    // Only interested in constant splats, we don't care about undef
+    // elements in identifying boolean constants and getConstantSplatNode
+    // returns NULL if all ops are undef;
+    CN = BV->getConstantSplatNode();
+    if (!CN)
       return false;
   }
 
@@ -1390,7 +1492,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       const APInt &ShAmt
         = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
       if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
-          ShAmt == Log2_32(N0.getValueType().getSizeInBits())) {
+          ShAmt == Log2_32(N0.getValueSizeInBits())) {
         if ((C1 == 0) == (Cond == ISD::SETEQ)) {
           // (srl (ctlz x), 5) == 0  -> X != 0
           // (srl (ctlz x), 5) != 1  -> X != 0
@@ -1412,8 +1514,8 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       CTPOP = N0.getOperand(0);
 
     if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
-        (N0 == CTPOP || N0.getValueType().getSizeInBits() >
-                        Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+        (N0 == CTPOP ||
+         N0.getValueSizeInBits() > Log2_32_Ceil(CTPOP.getValueSizeInBits()))) {
       EVT CTVT = CTPOP.getValueType();
       SDValue CTOp = CTPOP.getOperand(0);
 
@@ -1478,6 +1580,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
         if (isTypeDesirableForOp(ISD::SETCC, MinVT)) {
           // Will get folded away.
           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, MinVT, PreExt);
+          if (MinBits == 1 && C1 == 1)
+            // Invert the condition.
+            return DAG.getSetCC(dl, VT, Trunc, DAG.getConstant(0, dl, MVT::i1),
+                                Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
           SDValue C = DAG.getConstant(C1.trunc(MinBits), dl, MinVT);
           return DAG.getSetCC(dl, VT, Trunc, C, Cond);
         }
@@ -1530,7 +1636,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
       APInt bestMask;
       unsigned bestWidth = 0, bestOffset = 0;
       if (!Lod->isVolatile() && Lod->isUnindexed()) {
-        unsigned origWidth = N0.getValueType().getSizeInBits();
+        unsigned origWidth = N0.getValueSizeInBits();
         unsigned maskWidth = origWidth;
         // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
         // 8 bits, but have to be careful...
@@ -1577,7 +1683,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
 
     // If the LHS is a ZERO_EXTEND, perform the comparison on the input.
     if (N0.getOpcode() == ISD::ZERO_EXTEND) {
-      unsigned InSize = N0.getOperand(0).getValueType().getSizeInBits();
+      unsigned InSize = N0.getOperand(0).getValueSizeInBits();
 
       // If the comparison constant has bits in the upper part, the
       // zero-extended value could never match.
@@ -2297,7 +2403,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
       Ops.push_back(Op);
       return;
     }
-    // fall through
+    LLVM_FALLTHROUGH;
   case 'i':    // Simple Integer or Relocatable Constant
   case 'n':    // Simple Integer
   case 's': {  // Relocatable Constant
@@ -2946,7 +3052,7 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, const APInt &Divisor,
     Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q,
                             DAG.getConstant(magics.m, dl, VT)).getNode(), 1);
   else
-    return SDValue();       // No mulhu or equvialent
+    return SDValue();       // No mulhu or equivalent
 
   Created->push_back(Q.getNode());
 
@@ -2987,108 +3093,190 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
 // Legalization Utilities
 //===----------------------------------------------------------------------===//
 
-bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
-                               SelectionDAG &DAG, SDValue LL, SDValue LH,
-                               SDValue RL, SDValue RH) const {
-  EVT VT = N->getValueType(0);
-  SDLoc dl(N);
-
-  bool HasMULHS = isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
-  bool HasMULHU = isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
-  bool HasSMUL_LOHI = isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
-  bool HasUMUL_LOHI = isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
-  if (HasMULHU || HasMULHS || HasUMUL_LOHI || HasSMUL_LOHI) {
-    unsigned OuterBitSize = VT.getSizeInBits();
-    unsigned InnerBitSize = HiLoVT.getSizeInBits();
-    unsigned LHSSB = DAG.ComputeNumSignBits(N->getOperand(0));
-    unsigned RHSSB = DAG.ComputeNumSignBits(N->getOperand(1));
-
-    // LL, LH, RL, and RH must be either all NULL or all set to a value.
-    assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
-           (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
-
-    if (!LL.getNode() && !RL.getNode() &&
-        isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
-      LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(0));
-      RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, N->getOperand(1));
-    }
-
-    if (!LL.getNode())
-      return false;
+bool TargetLowering::expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl,
+                                    SDValue LHS, SDValue RHS,
+                                    SmallVectorImpl<SDValue> &Result,
+                                    EVT HiLoVT, SelectionDAG &DAG,
+                                    MulExpansionKind Kind, SDValue LL,
+                                    SDValue LH, SDValue RL, SDValue RH) const {
+  assert(Opcode == ISD::MUL || Opcode == ISD::UMUL_LOHI ||
+         Opcode == ISD::SMUL_LOHI);
+
+  bool HasMULHS = (Kind == MulExpansionKind::Always) ||
+                  isOperationLegalOrCustom(ISD::MULHS, HiLoVT);
+  bool HasMULHU = (Kind == MulExpansionKind::Always) ||
+                  isOperationLegalOrCustom(ISD::MULHU, HiLoVT);
+  bool HasSMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+                      isOperationLegalOrCustom(ISD::SMUL_LOHI, HiLoVT);
+  bool HasUMUL_LOHI = (Kind == MulExpansionKind::Always) ||
+                      isOperationLegalOrCustom(ISD::UMUL_LOHI, HiLoVT);
+
+  if (!HasMULHU && !HasMULHS && !HasUMUL_LOHI && !HasSMUL_LOHI)
+    return false;
 
-    APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
-    if (DAG.MaskedValueIsZero(N->getOperand(0), HighMask) &&
-        DAG.MaskedValueIsZero(N->getOperand(1), HighMask)) {
-      // The inputs are both zero-extended.
-      if (HasUMUL_LOHI) {
-        // We can emit a umul_lohi.
-        Lo = DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
-                         RL);
-        Hi = SDValue(Lo.getNode(), 1);
-        return true;
-      }
-      if (HasMULHU) {
-        // We can emit a mulhu+mul.
-        Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
-        Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
-        return true;
-      }
+  unsigned OuterBitSize = VT.getScalarSizeInBits();
+  unsigned InnerBitSize = HiLoVT.getScalarSizeInBits();
+  unsigned LHSSB = DAG.ComputeNumSignBits(LHS);
+  unsigned RHSSB = DAG.ComputeNumSignBits(RHS);
+
+  // LL, LH, RL, and RH must be either all NULL or all set to a value.
+  assert((LL.getNode() && LH.getNode() && RL.getNode() && RH.getNode()) ||
+         (!LL.getNode() && !LH.getNode() && !RL.getNode() && !RH.getNode()));
+
+  SDVTList VTs = DAG.getVTList(HiLoVT, HiLoVT);
+  auto MakeMUL_LOHI = [&](SDValue L, SDValue R, SDValue &Lo, SDValue &Hi,
+                          bool Signed) -> bool {
+    if ((Signed && HasSMUL_LOHI) || (!Signed && HasUMUL_LOHI)) {
+      Lo = DAG.getNode(Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI, dl, VTs, L, R);
+      Hi = SDValue(Lo.getNode(), 1);
+      return true;
     }
-    if (LHSSB > InnerBitSize && RHSSB > InnerBitSize) {
-      // The input values are both sign-extended.
-      if (HasSMUL_LOHI) {
-        // We can emit a smul_lohi.
-        Lo = DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(HiLoVT, HiLoVT), LL,
-                         RL);
-        Hi = SDValue(Lo.getNode(), 1);
-        return true;
-      }
-      if (HasMULHS) {
-        // We can emit a mulhs+mul.
-        Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
-        Hi = DAG.getNode(ISD::MULHS, dl, HiLoVT, LL, RL);
-        return true;
-      }
+    if ((Signed && HasMULHS) || (!Signed && HasMULHU)) {
+      Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, L, R);
+      Hi = DAG.getNode(Signed ? ISD::MULHS : ISD::MULHU, dl, HiLoVT, L, R);
+      return true;
     }
+    return false;
+  };
 
-    if (!LH.getNode() && !RH.getNode() &&
-        isOperationLegalOrCustom(ISD::SRL, VT) &&
-        isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
-      auto &DL = DAG.getDataLayout();
-      unsigned ShiftAmt = VT.getSizeInBits() - HiLoVT.getSizeInBits();
-      SDValue Shift = DAG.getConstant(ShiftAmt, dl, getShiftAmountTy(VT, DL));
-      LH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(0), Shift);
-      LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
-      RH = DAG.getNode(ISD::SRL, dl, VT, N->getOperand(1), Shift);
-      RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
-    }
+  SDValue Lo, Hi;
 
-    if (!LH.getNode())
-      return false;
+  if (!LL.getNode() && !RL.getNode() &&
+      isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+    LL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LHS);
+    RL = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RHS);
+  }
 
-    if (HasUMUL_LOHI) {
-      // Lo,Hi = umul LHS, RHS.
-      SDValue UMulLOHI = DAG.getNode(ISD::UMUL_LOHI, dl,
-                                     DAG.getVTList(HiLoVT, HiLoVT), LL, RL);
-      Lo = UMulLOHI;
-      Hi = UMulLOHI.getValue(1);
-      RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
-      LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+  if (!LL.getNode())
+    return false;
+
+  APInt HighMask = APInt::getHighBitsSet(OuterBitSize, InnerBitSize);
+  if (DAG.MaskedValueIsZero(LHS, HighMask) &&
+      DAG.MaskedValueIsZero(RHS, HighMask)) {
+    // The inputs are both zero-extended.
+    if (MakeMUL_LOHI(LL, RL, Lo, Hi, false)) {
+      Result.push_back(Lo);
+      Result.push_back(Hi);
+      if (Opcode != ISD::MUL) {
+        SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+        Result.push_back(Zero);
+        Result.push_back(Zero);
+      }
       return true;
     }
-    if (HasMULHU) {
-      Lo = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RL);
-      Hi = DAG.getNode(ISD::MULHU, dl, HiLoVT, LL, RL);
-      RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
-      LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
-      Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+  }
+
+  if (!VT.isVector() && Opcode == ISD::MUL && LHSSB > InnerBitSize &&
+      RHSSB > InnerBitSize) {
+    // The input values are both sign-extended.
+    // TODO non-MUL case?
+    if (MakeMUL_LOHI(LL, RL, Lo, Hi, true)) {
+      Result.push_back(Lo);
+      Result.push_back(Hi);
       return true;
     }
   }
-  return false;
+
+  unsigned ShiftAmount = OuterBitSize - InnerBitSize;
+  EVT ShiftAmountTy = getShiftAmountTy(VT, DAG.getDataLayout());
+  if (APInt::getMaxValue(ShiftAmountTy.getSizeInBits()).ult(ShiftAmount)) {
+    // FIXME getShiftAmountTy does not always return a sensible result when VT
+    // is an illegal type, and so the type may be too small to fit the shift
+    // amount. Override it with i32. The shift will have to be legalized.
+    ShiftAmountTy = MVT::i32;
+  }
+  SDValue Shift = DAG.getConstant(ShiftAmount, dl, ShiftAmountTy);
+
+  if (!LH.getNode() && !RH.getNode() &&
+      isOperationLegalOrCustom(ISD::SRL, VT) &&
+      isOperationLegalOrCustom(ISD::TRUNCATE, HiLoVT)) {
+    LH = DAG.getNode(ISD::SRL, dl, VT, LHS, Shift);
+    LH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, LH);
+    RH = DAG.getNode(ISD::SRL, dl, VT, RHS, Shift);
+    RH = DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, RH);
+  }
+
+  if (!LH.getNode())
+    return false;
+
+  if (!MakeMUL_LOHI(LL, RL, Lo, Hi, false))
+    return false;
+
+  Result.push_back(Lo);
+
+  if (Opcode == ISD::MUL) {
+    RH = DAG.getNode(ISD::MUL, dl, HiLoVT, LL, RH);
+    LH = DAG.getNode(ISD::MUL, dl, HiLoVT, LH, RL);
+    Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, RH);
+    Hi = DAG.getNode(ISD::ADD, dl, HiLoVT, Hi, LH);
+    Result.push_back(Hi);
+    return true;
+  }
+
+  // Compute the full width result.
+  auto Merge = [&](SDValue Lo, SDValue Hi) -> SDValue {
+    Lo = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Lo);
+    Hi = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+    Hi = DAG.getNode(ISD::SHL, dl, VT, Hi, Shift);
+    return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+  };
+
+  SDValue Next = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Hi);
+  if (!MakeMUL_LOHI(LL, RH, Lo, Hi, false))
+    return false;
+
+  // This is effectively the add part of a multiply-add of half-sized operands,
+  // so it cannot overflow.
+  Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+  if (!MakeMUL_LOHI(LH, RL, Lo, Hi, false))
+    return false;
+
+  Next = DAG.getNode(ISD::ADDC, dl, DAG.getVTList(VT, MVT::Glue), Next,
+                     Merge(Lo, Hi));
+
+  SDValue Carry = Next.getValue(1);
+  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+  Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+
+  if (!MakeMUL_LOHI(LH, RH, Lo, Hi, Opcode == ISD::SMUL_LOHI))
+    return false;
+
+  SDValue Zero = DAG.getConstant(0, dl, HiLoVT);
+  Hi = DAG.getNode(ISD::ADDE, dl, DAG.getVTList(HiLoVT, MVT::Glue), Hi, Zero,
+                   Carry);
+  Next = DAG.getNode(ISD::ADD, dl, VT, Next, Merge(Lo, Hi));
+
+  if (Opcode == ISD::SMUL_LOHI) {
+    SDValue NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+                                  DAG.getNode(ISD::ZERO_EXTEND, dl, VT, RL));
+    Next = DAG.getSelectCC(dl, LH, Zero, NextSub, Next, ISD::SETLT);
+
+    NextSub = DAG.getNode(ISD::SUB, dl, VT, Next,
+                          DAG.getNode(ISD::ZERO_EXTEND, dl, VT, LL));
+    Next = DAG.getSelectCC(dl, RH, Zero, NextSub, Next, ISD::SETLT);
+  }
+
+  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+  Next = DAG.getNode(ISD::SRL, dl, VT, Next, Shift);
+  Result.push_back(DAG.getNode(ISD::TRUNCATE, dl, HiLoVT, Next));
+  return true;
+}
+
+bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
+                               SelectionDAG &DAG, MulExpansionKind Kind,
+                               SDValue LL, SDValue LH, SDValue RL,
+                               SDValue RH) const {
+  SmallVector<SDValue, 2> Result;
+  bool Ok = expandMUL_LOHI(N->getOpcode(), N->getValueType(0), N,
+                           N->getOperand(0), N->getOperand(1), Result, HiLoVT,
+                           DAG, Kind, LL, LH, RL, RH);
+  if (Ok) {
+    assert(Result.size() == 2);
+    Lo = Result[0];
+    Hi = Result[1];
+  }
+  return Ok;
 }
 
 bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
@@ -3190,7 +3378,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   }
 
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
-  SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, SL, LD->getValueType(0), Vals);
+  SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
 
   return DAG.getMergeValues({ Value, NewChain }, SL);
 }
@@ -3518,6 +3706,38 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
   return Result;
 }
 
+SDValue 
+TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
+                                       const SDLoc &DL, EVT DataVT,
+                                       SelectionDAG &DAG,
+                                       bool IsCompressedMemory) const {
+  SDValue Increment;
+  EVT AddrVT = Addr.getValueType();
+  EVT MaskVT = Mask.getValueType();
+  assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
+         "Incompatible types of Data and Mask");
+  if (IsCompressedMemory) {
+    // Incrementing the pointer according to number of '1's in the mask.
+    EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
+    SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
+    if (MaskIntVT.getSizeInBits() < 32) {
+      MaskInIntReg = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, MaskInIntReg);
+      MaskIntVT = MVT::i32;
+    }
+
+    // Count '1's with POPCNT.
+    Increment = DAG.getNode(ISD::CTPOP, DL, MaskIntVT, MaskInIntReg);
+    Increment = DAG.getZExtOrTrunc(Increment, DL, AddrVT);
+    // Scale is an element size in bytes.
+    SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
+                                    AddrVT);
+    Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+  } else
+    Increment = DAG.getConstant(DataVT.getSizeInBits() / 8, DL, AddrVT);
+
+  return DAG.getNode(ISD::ADD, DL, AddrVT, Addr, Increment);
+}
+
 //===----------------------------------------------------------------------===//
 // Implementation of Emulated TLS Model
 //===----------------------------------------------------------------------===//
@@ -3550,11 +3770,36 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
   // At last for X86 targets, maybe good for other targets too?
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setAdjustsStack(true);  // Is this only for X86 target?
-  MFI->setHasCalls(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setAdjustsStack(true);  // Is this only for X86 target?
+  MFI.setHasCalls(true);
 
   assert((GA->getOffset() == 0) &&
          "Emulated TLS must have zero offset in GlobalAddressSDNode");
   return CallResult.first;
 }
+
+SDValue TargetLowering::lowerCmpEqZeroToCtlzSrl(SDValue Op,
+                                                SelectionDAG &DAG) const {
+  assert((Op->getOpcode() == ISD::SETCC) && "Input has to be a SETCC node.");
+  if (!isCtlzFast())
+    return SDValue();
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDLoc dl(Op);
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
+    if (C->isNullValue() && CC == ISD::SETEQ) {
+      EVT VT = Op.getOperand(0).getValueType();
+      SDValue Zext = Op.getOperand(0);
+      if (VT.bitsLT(MVT::i32)) {
+        VT = MVT::i32;
+        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
+      }
+      unsigned Log2b = Log2_32(VT.getSizeInBits());
+      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
+      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
+                                DAG.getConstant(Log2b, dl, MVT::i32));
+      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
+    }
+  }
+  return SDValue();
+}
diff --git a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
index 1efc440cd701..ff7d205c1f4c 100644
--- a/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/contrib/llvm/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
 
 using namespace llvm;
 
@@ -81,121 +82,6 @@ ShadowStackGCLowering::ShadowStackGCLowering()
   initializeShadowStackGCLoweringPass(*PassRegistry::getPassRegistry());
 }
 
-namespace {
-/// EscapeEnumerator - This is a little algorithm to find all escape points
-/// from a function so that "finally"-style code can be inserted. In addition
-/// to finding the existing return and unwind instructions, it also (if
-/// necessary) transforms any call instructions into invokes and sends them to
-/// a landing pad.
-///
-/// It's wrapped up in a state machine using the same transform C# uses for
-/// 'yield return' enumerators, This transform allows it to be non-allocating.
-class EscapeEnumerator {
-  Function &F;
-  const char *CleanupBBName;
-
-  // State.
-  int State;
-  Function::iterator StateBB, StateE;
-  IRBuilder<> Builder;
-
-public:
-  EscapeEnumerator(Function &F, const char *N = "cleanup")
-      : F(F), CleanupBBName(N), State(0), Builder(F.getContext()) {}
-
-  IRBuilder<> *Next() {
-    switch (State) {
-    default:
-      return nullptr;
-
-    case 0:
-      StateBB = F.begin();
-      StateE = F.end();
-      State = 1;
-
-    case 1:
-      // Find all 'return', 'resume', and 'unwind' instructions.
-      while (StateBB != StateE) {
-        BasicBlock *CurBB = &*StateBB++;
-
-        // Branches and invokes do not escape, only unwind, resume, and return
-        // do.
-        TerminatorInst *TI = CurBB->getTerminator();
-        if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
-          continue;
-
-        Builder.SetInsertPoint(TI);
-        return &Builder;
-      }
-
-      State = 2;
-
-      // Find all 'call' instructions.
-      SmallVector<Instruction *, 16> Calls;
-      for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
-        for (BasicBlock::iterator II = BB->begin(), EE = BB->end(); II != EE;
-             ++II)
-          if (CallInst *CI = dyn_cast<CallInst>(II))
-            if (!CI->getCalledFunction() ||
-                !CI->getCalledFunction()->getIntrinsicID())
-              Calls.push_back(CI);
-
-      if (Calls.empty())
-        return nullptr;
-
-      // Create a cleanup block.
-      LLVMContext &C = F.getContext();
-      BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
-      Type *ExnTy =
-          StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
-      if (!F.hasPersonalityFn()) {
-        Constant *PersFn = F.getParent()->getOrInsertFunction(
-            "__gcc_personality_v0",
-            FunctionType::get(Type::getInt32Ty(C), true));
-        F.setPersonalityFn(PersFn);
-      }
-      LandingPadInst *LPad =
-          LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
-      LPad->setCleanup(true);
-      ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
-
-      // Transform the 'call' instructions into 'invoke's branching to the
-      // cleanup block. Go in reverse order to make prettier BB names.
-      SmallVector<Value *, 16> Args;
-      for (unsigned I = Calls.size(); I != 0;) {
-        CallInst *CI = cast<CallInst>(Calls[--I]);
-
-        // Split the basic block containing the function call.
-        BasicBlock *CallBB = CI->getParent();
-        BasicBlock *NewBB = CallBB->splitBasicBlock(
-            CI->getIterator(), CallBB->getName() + ".cont");
-
-        // Remove the unconditional branch inserted at the end of CallBB.
-        CallBB->getInstList().pop_back();
-        NewBB->getInstList().remove(CI);
-
-        // Create a new invoke instruction.
-        Args.clear();
-        CallSite CS(CI);
-        Args.append(CS.arg_begin(), CS.arg_end());
-
-        InvokeInst *II =
-            InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, Args,
-                               CI->getName(), CallBB);
-        II->setCallingConv(CI->getCallingConv());
-        II->setAttributes(CI->getAttributes());
-        CI->replaceAllUsesWith(II);
-        delete CI;
-      }
-
-      Builder.SetInsertPoint(RI);
-      return &Builder;
-    }
-  }
-};
-}
-
-
 Constant *ShadowStackGCLowering::GetFrameMap(Function &F) {
   // doInitialization creates the abstract type of this value.
   Type *VoidPtr = Type::getInt8PtrTy(F.getContext());
diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
index d361a6c4ad06..4837495777da 100644
--- a/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
+++ b/contrib/llvm/lib/CodeGen/ShrinkWrap.cpp
@@ -199,9 +199,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  const char *getPassName() const override {
-    return "Shrink Wrapping analysis";
-  }
+  StringRef getPassName() const override { return "Shrink Wrapping analysis"; }
 
   /// \brief Perform the shrink-wrapping analysis and update
   /// the MachineFrameInfo attached to \p MF with the results.
@@ -256,8 +254,8 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
 
 /// \brief Helper function to find the immediate (post) dominator.
 template <typename ListOfBBs, typename DominanceAnalysis>
-MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
-                            DominanceAnalysis &Dom) {
+static MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
+                                   DominanceAnalysis &Dom) {
   MachineBasicBlock *IDom = &Block;
   for (MachineBasicBlock *BB : BBs) {
     IDom = Dom.findNearestCommonDominator(IDom, BB);
@@ -521,9 +519,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
                << ' ' << Save->getName() << "\nRestore: "
                << Restore->getNumber() << ' ' << Restore->getName() << '\n');
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setSavePoint(Save);
-  MFI->setRestorePoint(Restore);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setSavePoint(Save);
+  MFI.setRestorePoint(Restore);
   ++NumCandidates;
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
index ce01c5f23e57..209bbe54ea23 100644
--- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp
@@ -58,7 +58,7 @@ public:
   bool runOnFunction(Function &F) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {}
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SJLJ Exception Handling preparation";
   }
 
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp
index 07be24b18dd5..1c6a84e53944 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.cpp
+++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp
@@ -381,9 +381,59 @@ LLVM_DUMP_METHOD void SplitEditor::dump() const {
 }
 #endif
 
+LiveInterval::SubRange &SplitEditor::getSubRangeForMask(LaneBitmask LM,
+                                                        LiveInterval &LI) {
+  for (LiveInterval::SubRange &S : LI.subranges())
+    if (S.LaneMask == LM)
+      return S;
+  llvm_unreachable("SubRange for this mask not found");
+}
+
+void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
+  if (!LI.hasSubRanges()) {
+    LI.createDeadDef(VNI);
+    return;
+  }
+
+  SlotIndex Def = VNI->def;
+  if (Original) {
+    // If we are transferring a def from the original interval, make sure
+    // to only update the subranges for which the original subranges had
+    // a def at this location.
+    for (LiveInterval::SubRange &S : LI.subranges()) {
+      auto &PS = getSubRangeForMask(S.LaneMask, Edit->getParent());
+      VNInfo *PV = PS.getVNInfoAt(Def);
+      if (PV != nullptr && PV->def == Def)
+        S.createDeadDef(Def, LIS.getVNInfoAllocator());
+    }
+  } else {
+    // This is a new def: either from rematerialization, or from an inserted
+    // copy. Since rematerialization can regenerate a definition of a sub-
+    // register, we need to check which subranges need to be updated.
+    const MachineInstr *DefMI = LIS.getInstructionFromIndex(Def);
+    assert(DefMI != nullptr);
+    LaneBitmask LM;
+    for (const MachineOperand &DefOp : DefMI->defs()) {
+      unsigned R = DefOp.getReg();
+      if (R != LI.reg)
+        continue;
+      if (unsigned SR = DefOp.getSubReg())
+        LM |= TRI.getSubRegIndexLaneMask(SR);
+      else {
+        LM = MRI.getMaxLaneMaskForVReg(R);
+        break;
+      }
+    }
+    for (LiveInterval::SubRange &S : LI.subranges())
+      if ((S.LaneMask & LM).any())
+        S.createDeadDef(Def, LIS.getVNInfoAllocator());
+  }
+}
+
 VNInfo *SplitEditor::defValue(unsigned RegIdx,
                               const VNInfo *ParentVNI,
-                              SlotIndex Idx) {
+                              SlotIndex Idx,
+                              bool Original) {
   assert(ParentVNI && "Mapping  NULL value");
   assert(Idx.isValid() && "Invalid SlotIndex");
   assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI");
@@ -392,28 +442,28 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx,
   // Create a new value.
   VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator());
 
+  bool Force = LI->hasSubRanges();
+  ValueForcePair FP(Force ? nullptr : VNI, Force);
   // Use insert for lookup, so we can add missing values with a second lookup.
   std::pair<ValueMap::iterator, bool> InsP =
-    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id),
-                                 ValueForcePair(VNI, false)));
+    Values.insert(std::make_pair(std::make_pair(RegIdx, ParentVNI->id), FP));
 
-  // This was the first time (RegIdx, ParentVNI) was mapped.
-  // Keep it as a simple def without any liveness.
-  if (InsP.second)
+  // This was the first time (RegIdx, ParentVNI) was mapped, and it is not
+  // forced. Keep it as a simple def without any liveness.
+  if (!Force && InsP.second)
     return VNI;
 
   // If the previous value was a simple mapping, add liveness for it now.
   if (VNInfo *OldVNI = InsP.first->second.getPointer()) {
-    SlotIndex Def = OldVNI->def;
-    LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI));
-    // No longer a simple mapping.  Switch to a complex, non-forced mapping.
-    InsP.first->second = ValueForcePair();
+    addDeadDef(*LI, OldVNI, Original);
+
+    // No longer a simple mapping.  Switch to a complex mapping. If the
+    // interval has subranges, make it a forced mapping.
+    InsP.first->second = ValueForcePair(nullptr, Force);
   }
 
   // This is a complex mapping, add liveness for VNI
-  SlotIndex Def = VNI->def;
-  LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
-
+  addDeadDef(*LI, VNI, Original);
   return VNI;
 }
 
@@ -431,9 +481,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) {
 
   // This was previously a single mapping. Make sure the old def is represented
   // by a trivial live range.
-  SlotIndex Def = VNI->def;
-  LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
-  LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI));
+  addDeadDef(LIS.getInterval(Edit->get(RegIdx)), VNI, false);
+
   // Mark as complex mapped, forced.
   VFP = ValueForcePair(nullptr, true);
 }
@@ -455,13 +504,18 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
   unsigned Original = VRM.getOriginal(Edit->get(RegIdx));
   LiveInterval &OrigLI = LIS.getInterval(Original);
   VNInfo *OrigVNI = OrigLI.getVNInfoAt(UseIdx);
-  LiveRangeEdit::Remat RM(ParentVNI);
-  RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
 
-  if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
-    Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
-    ++NumRemats;
-  } else {
+  bool DidRemat = false;
+  if (OrigVNI) {
+    LiveRangeEdit::Remat RM(ParentVNI);
+    RM.OrigMI = LIS.getInstructionFromIndex(OrigVNI->def);
+    if (Edit->canRematerializeAt(RM, OrigVNI, UseIdx, true)) {
+      Def = Edit->rematerializeAt(MBB, I, LI->reg, RM, TRI, Late);
+      ++NumRemats;
+      DidRemat = true;
+    }
+  }
+  if (!DidRemat) {
     // Can't remat, just insert a copy from parent.
     CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
                .addReg(Edit->getReg());
@@ -472,7 +526,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
   }
 
   // Define the value in Reg.
-  return defValue(RegIdx, ParentVNI, Def);
+  return defValue(RegIdx, ParentVNI, Def, false);
 }
 
 /// Create a new virtual register and live interval.
@@ -621,7 +675,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
   }
 
   VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
-                              MBB.SkipPHIsAndLabels(MBB.begin()));
+                              MBB.SkipPHIsLabelsAndDebug(MBB.begin()));
   RegAssign.insert(Start, VNI->def, OpenIdx);
   DEBUG(dump());
   return VNI->def;
@@ -944,14 +998,15 @@ bool SplitEditor::transferValues() {
       }
 
       // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI.
-      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx);
-      LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
+      DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx
+                   << '(' << PrintReg(Edit->get(RegIdx)) << ')');
+      LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
 
       // Check for a simply defined value that can be blitted directly.
       ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id));
       if (VNInfo *VNI = VFP.getPointer()) {
         DEBUG(dbgs() << ':' << VNI->id);
-        LR.addSegment(LiveInterval::Segment(Start, End, VNI));
+        LI.addSegment(LiveInterval::Segment(Start, End, VNI));
         Start = End;
         continue;
       }
@@ -975,7 +1030,7 @@ bool SplitEditor::transferValues() {
 
       // The first block may be live-in, or it may have its own def.
       if (Start != BlockStart) {
-        VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+        VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
         assert(VNI && "Missing def for complex mapped value");
         DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber());
         // MBB has its own def. Is it also live-out?
@@ -995,7 +1050,7 @@ bool SplitEditor::transferValues() {
         if (BlockStart == ParentVNI->def) {
           // This block has the def of a parent PHI, so it isn't live-in.
           assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?");
-          VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End));
+          VNInfo *VNI = LI.extendInBlock(BlockStart, std::min(BlockEnd, End));
           assert(VNI && "Missing def for complex mapped parent PHI");
           if (End >= BlockEnd)
             LRC.setLiveOutValue(&*MBB, VNI); // Live-out as well.
@@ -1003,10 +1058,10 @@ bool SplitEditor::transferValues() {
           // This block needs a live-in value.  The last block covered may not
           // be live-out.
           if (End < BlockEnd)
-            LRC.addLiveInBlock(LR, MDT[&*MBB], End);
+            LRC.addLiveInBlock(LI, MDT[&*MBB], End);
           else {
             // Live-through, and we don't know the value.
-            LRC.addLiveInBlock(LR, MDT[&*MBB]);
+            LRC.addLiveInBlock(LI, MDT[&*MBB]);
             LRC.setLiveOutValue(&*MBB, nullptr);
           }
         }
@@ -1025,42 +1080,90 @@ bool SplitEditor::transferValues() {
   return Skipped;
 }
 
+static bool removeDeadSegment(SlotIndex Def, LiveRange &LR) {
+  const LiveRange::Segment *Seg = LR.getSegmentContaining(Def);
+  if (Seg == nullptr)
+    return true;
+  if (Seg->end != Def.getDeadSlot())
+    return false;
+  // This is a dead PHI. Remove it.
+  LR.removeSegment(*Seg, true);
+  return true;
+}
+
+void SplitEditor::extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+                                 LiveRange &LR, LaneBitmask LM,
+                                 ArrayRef<SlotIndex> Undefs) {
+  for (MachineBasicBlock *P : B.predecessors()) {
+    SlotIndex End = LIS.getMBBEndIdx(P);
+    SlotIndex LastUse = End.getPrevSlot();
+    // The predecessor may not have a live-out value. That is OK, like an
+    // undef PHI operand.
+    LiveInterval &PLI = Edit->getParent();
+    // Need the cast because the inputs to ?: would otherwise be deemed
+    // "incompatible": SubRange vs LiveInterval.
+    LiveRange &PSR = !LM.all() ? getSubRangeForMask(LM, PLI)
+                               : static_cast<LiveRange&>(PLI);
+    if (PSR.liveAt(LastUse))
+      LRC.extend(LR, End, /*PhysReg=*/0, Undefs);
+  }
+}
+
 void SplitEditor::extendPHIKillRanges() {
   // Extend live ranges to be live-out for successor PHI values.
-  for (const VNInfo *PHIVNI : Edit->getParent().valnos) {
-    if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
-      continue;
-    unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
-    LiveRange &LR = LIS.getInterval(Edit->get(RegIdx));
-
-    // Check whether PHI is dead.
-    const LiveRange::Segment *Segment = LR.getSegmentContaining(PHIVNI->def);
-    assert(Segment != nullptr && "Missing segment for VNI");
-    if (Segment->end == PHIVNI->def.getDeadSlot()) {
-      // This is a dead PHI. Remove it.
-      LR.removeSegment(*Segment, true);
+
+  // Visit each PHI def slot in the parent live interval. If the def is dead,
+  // remove it. Otherwise, extend the live interval to reach the end indexes
+  // of all predecessor blocks.
+
+  LiveInterval &ParentLI = Edit->getParent();
+  for (const VNInfo *V : ParentLI.valnos) {
+    if (V->isUnused() || !V->isPHIDef())
       continue;
-    }
 
+    unsigned RegIdx = RegAssign.lookup(V->def);
+    LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
     LiveRangeCalc &LRC = getLRCalc(RegIdx);
-    MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
-    for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
-         PE = MBB->pred_end(); PI != PE; ++PI) {
-      SlotIndex End = LIS.getMBBEndIdx(*PI);
-      SlotIndex LastUse = End.getPrevSlot();
-      // The predecessor may not have a live-out value. That is OK, like an
-      // undef PHI operand.
-      if (Edit->getParent().liveAt(LastUse)) {
-        assert(RegAssign.lookup(LastUse) == RegIdx &&
-               "Different register assignment in phi predecessor");
-        LRC.extend(LR, End);
-      }
+    MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+    if (!removeDeadSegment(V->def, LI))
+      extendPHIRange(B, LRC, LI, LaneBitmask::getAll(), /*Undefs=*/{});
+  }
+
+  SmallVector<SlotIndex, 4> Undefs;
+  LiveRangeCalc SubLRC;
+
+  for (LiveInterval::SubRange &PS : ParentLI.subranges()) {
+    for (const VNInfo *V : PS.valnos) {
+      if (V->isUnused() || !V->isPHIDef())
+        continue;
+      unsigned RegIdx = RegAssign.lookup(V->def);
+      LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+      LiveInterval::SubRange &S = getSubRangeForMask(PS.LaneMask, LI);
+      if (removeDeadSegment(V->def, S))
+        continue;
+
+      MachineBasicBlock &B = *LIS.getMBBFromIndex(V->def);
+      SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+                   &LIS.getVNInfoAllocator());
+      Undefs.clear();
+      LI.computeSubRangeUndefs(Undefs, PS.LaneMask, MRI, *LIS.getSlotIndexes());
+      extendPHIRange(B, SubLRC, S, PS.LaneMask, Undefs);
     }
   }
 }
 
 /// rewriteAssigned - Rewrite all uses of Edit->getReg().
 void SplitEditor::rewriteAssigned(bool ExtendRanges) {
+  struct ExtPoint {
+    ExtPoint(const MachineOperand &O, unsigned R, SlotIndex N)
+      : MO(O), RegIdx(R), Next(N) {}
+    MachineOperand MO;
+    unsigned RegIdx;
+    SlotIndex Next;
+  };
+
+  SmallVector<ExtPoint,4> ExtPoints;
+
   for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit->getReg()),
        RE = MRI.reg_end(); RI != RE;) {
     MachineOperand &MO = *RI;
@@ -1082,8 +1185,8 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
 
     // Rewrite to the mapped register at Idx.
     unsigned RegIdx = RegAssign.lookup(Idx);
-    LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx));
-    MO.setReg(LI->reg);
+    LiveInterval &LI = LIS.getInterval(Edit->get(RegIdx));
+    MO.setReg(LI.reg);
     DEBUG(dbgs() << "  rewr BB#" << MI->getParent()->getNumber() << '\t'
                  << Idx << ':' << RegIdx << '\t' << *MI);
 
@@ -1095,7 +1198,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     if (MO.isDef()) {
       if (!MO.getSubReg() && !MO.isEarlyClobber())
         continue;
-      // We may wan't to extend a live range for a partial redef, or for a use
+      // We may want to extend a live range for a partial redef, or for a use
       // tied to an early clobber.
       Idx = Idx.getPrevSlot();
       if (!Edit->getParent().liveAt(Idx))
@@ -1103,7 +1206,53 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
     } else
       Idx = Idx.getRegSlot(true);
 
-    getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot());
+    SlotIndex Next = Idx.getNextSlot();
+    if (LI.hasSubRanges()) {
+      // We have to delay extending subranges until we have seen all operands
+      // defining the register. This is because a <def,read-undef> operand
+      // will create an "undef" point, and we cannot extend any subranges
+      // until all of them have been accounted for.
+      if (MO.isUse())
+        ExtPoints.push_back(ExtPoint(MO, RegIdx, Next));
+    } else {
+      LiveRangeCalc &LRC = getLRCalc(RegIdx);
+      LRC.extend(LI, Next, 0, ArrayRef<SlotIndex>());
+    }
+  }
+
+  for (ExtPoint &EP : ExtPoints) {
+    LiveInterval &LI = LIS.getInterval(Edit->get(EP.RegIdx));
+    assert(LI.hasSubRanges());
+
+    LiveRangeCalc SubLRC;
+    unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
+    LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
+                              : MRI.getMaxLaneMaskForVReg(Reg);
+    for (LiveInterval::SubRange &S : LI.subranges()) {
+      if ((S.LaneMask & LM).none())
+        continue;
+      // The problem here can be that the new register may have been created
+      // for a partially defined original register. For example:
+      //   %vreg827:subreg_hireg<def,read-undef> = ...
+      //   ...
+      //   %vreg828<def> = COPY %vreg827
+      if (S.empty())
+        continue;
+      SubLRC.reset(&VRM.getMachineFunction(), LIS.getSlotIndexes(), &MDT,
+                   &LIS.getVNInfoAllocator());
+      SmallVector<SlotIndex, 4> Undefs;
+      LI.computeSubRangeUndefs(Undefs, S.LaneMask, MRI, *LIS.getSlotIndexes());
+      SubLRC.extend(S, EP.Next, 0, Undefs);
+    }
+  }
+
+  for (unsigned R : *Edit) {
+    LiveInterval &LI = LIS.getInterval(R);
+    if (!LI.hasSubRanges())
+      continue;
+    LI.clear();
+    LI.removeEmptySubRanges();
+    LIS.constructMainRangeFromSubranges(LI);
   }
 }
 
@@ -1146,7 +1295,7 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     if (ParentVNI->isUnused())
       continue;
     unsigned RegIdx = RegAssign.lookup(ParentVNI->def);
-    defValue(RegIdx, ParentVNI, ParentVNI->def);
+    defValue(RegIdx, ParentVNI, ParentVNI->def, true);
 
     // Force rematted values to be recomputed everywhere.
     // The new live ranges may be truncated.
@@ -1182,8 +1331,9 @@ void SplitEditor::finish(SmallVectorImpl<unsigned> *LRMap) {
     deleteRematVictims();
 
   // Get rid of unused values and set phi-kill flags.
-  for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) {
-    LiveInterval &LI = LIS.getInterval(*I);
+  for (unsigned Reg : *Edit) {
+    LiveInterval &LI = LIS.getInterval(Reg);
+    LI.removeEmptySubRanges();
     LI.RenumberValues();
   }
 
diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h
index a9684942885e..a75738aaf446 100644
--- a/contrib/llvm/lib/CodeGen/SplitKit.h
+++ b/contrib/llvm/lib/CodeGen/SplitKit.h
@@ -325,12 +325,30 @@ private:
     return LRCalc[SpillMode != SM_Partition && RegIdx != 0];
   }
 
+  /// Find a subrange corresponding to the lane mask @p LM in the live
+  /// interval @p LI. The interval @p LI is assumed to contain such a subrange.
+  /// This function is used to find corresponding subranges between the
+  /// original interval and the new intervals.
+  LiveInterval::SubRange &getSubRangeForMask(LaneBitmask LM, LiveInterval &LI);
+
+  /// Add a segment to the interval LI for the value number VNI. If LI has
+  /// subranges, corresponding segments will be added to them as well, but
+  /// with newly created value numbers. If Original is true, dead def will
+  /// only be added a subrange of LI if the corresponding subrange of the
+  /// original interval has a def at this index. Otherwise, all subranges
+  /// of LI will be updated.
+  void addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original);
+
   /// defValue - define a value in RegIdx from ParentVNI at Idx.
   /// Idx does not have to be ParentVNI->def, but it must be contained within
   /// ParentVNI's live range in ParentLI. The new value is added to the value
-  /// map.
+  /// map. The value being defined may either come from rematerialization
+  /// (or an inserted copy), or it may be coming from the original interval.
+  /// The parameter Original should be true in the latter case, otherwise
+  /// it should be false.
   /// Return the new LI value.
-  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx);
+  VNInfo *defValue(unsigned RegIdx, const VNInfo *ParentVNI, SlotIndex Idx,
+                   bool Original);
 
   /// forceRecompute - Force the live range of ParentVNI in RegIdx to be
   /// recomputed by LiveRangeCalc::extend regardless of the number of defs.
@@ -368,6 +386,15 @@ private:
   /// Return true if any ranges were skipped.
   bool transferValues();
 
+  /// Live range @p LR corresponding to the lane Mask @p LM has a live
+  /// PHI def at the beginning of block @p B. Extend the range @p LR of
+  /// all predecessor values that reach this def. If @p LR is a subrange,
+  /// the array @p Undefs is the set of all locations where it is undefined
+  /// via <def,read-undef> in other subranges for the same register.
+  void extendPHIRange(MachineBasicBlock &B, LiveRangeCalc &LRC,
+                      LiveRange &LR, LaneBitmask LM,
+                      ArrayRef<SlotIndex> Undefs);
+
   /// extendPHIKillRanges - Extend the ranges of all values killed by original
   /// parent PHIDefs.
   void extendPHIKillRanges();
diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp
index 87cd470d5690..89c4b574f17f 100644
--- a/contrib/llvm/lib/CodeGen/StackColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp
@@ -778,10 +778,9 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
   unsigned FixedInstr = 0;
   unsigned FixedMemOp = 0;
   unsigned FixedDbg = 0;
-  MachineModuleInfo *MMI = &MF->getMMI();
 
   // Remap debug information that refers to stack slots.
-  for (auto &VI : MMI->getVariableDbgInfo()) {
+  for (auto &VI : MF->getVariableDbgInfo()) {
     if (!VI.Var)
       continue;
     if (SlotRemap.count(VI.Slot)) {
@@ -980,7 +979,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
                << "********** Function: "
                << ((const Value*)Func.getFunction())->getName() << '\n');
   MF = &Func;
-  MFI = MF->getFrameInfo();
+  MFI = &MF->getFrameInfo();
   Indexes = &getAnalysis<SlotIndexes>();
   SP = &getAnalysis<StackProtector>();
   BlockLiveness.clear();
diff --git a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 87e4eb66c9c9..a5ef7c8229f5 100644
--- a/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -17,7 +17,6 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/Support/CommandLine.h"
@@ -64,7 +63,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
   /// \brief Calculate the liveness information for the given machine function.
@@ -113,7 +112,7 @@ bool StackMapLiveness::runOnMachineFunction(MachineFunction &MF) {
   ++NumStackMapFuncVisited;
 
   // Skip this function if there are no patchpoints to process.
-  if (!MF.getFrameInfo()->hasPatchPoint()) {
+  if (!MF.getFrameInfo().hasPatchPoint()) {
     ++NumStackMapFuncSkipped;
     return false;
   }
@@ -126,7 +125,7 @@ bool StackMapLiveness::calculateLiveness(MachineFunction &MF) {
   // For all basic blocks in the function.
   for (auto &MBB : MF) {
     DEBUG(dbgs() << "****** BB " << MBB.getName() << " ******\n");
-    LiveRegs.init(TRI);
+    LiveRegs.init(*TRI);
     // FIXME: This should probably be addLiveOuts().
     LiveRegs.addLiveOutsNoPristines(MBB);
     bool HasStackMap = false;
diff --git a/contrib/llvm/lib/CodeGen/StackMaps.cpp b/contrib/llvm/lib/CodeGen/StackMaps.cpp
index d91bb8066aed..9b7dd400fc92 100644
--- a/contrib/llvm/lib/CodeGen/StackMaps.cpp
+++ b/contrib/llvm/lib/CodeGen/StackMaps.cpp
@@ -30,16 +30,20 @@ using namespace llvm;
 #define DEBUG_TYPE "stackmaps"
 
 static cl::opt<int> StackMapVersion(
-    "stackmap-version", cl::init(1),
-    cl::desc("Specify the stackmap encoding version (default = 1)"));
+    "stackmap-version", cl::init(2),
+    cl::desc("Specify the stackmap encoding version (default = 2)"));
 
 const char *StackMaps::WSMP = "Stack Maps: ";
 
+StackMapOpers::StackMapOpers(const MachineInstr *MI)
+  : MI(MI) {
+  assert(getVarIdx() <= MI->getNumOperands() &&
+         "invalid stackmap definition");
+}
+
 PatchPointOpers::PatchPointOpers(const MachineInstr *MI)
     : MI(MI), HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() &&
-                     !MI->getOperand(0).isImplicit()),
-      IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() ==
-               CallingConv::AnyReg) {
+                     !MI->getOperand(0).isImplicit()) {
 #ifndef NDEBUG
   unsigned CheckStartIdx = 0, e = MI->getNumOperands();
   while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() &&
@@ -70,7 +74,7 @@ unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const {
 }
 
 StackMaps::StackMaps(AsmPrinter &AP) : AP(AP) {
-  if (StackMapVersion != 1)
+  if (StackMapVersion != 2)
     llvm_unreachable("Unsupported stackmap version!");
 }
 
@@ -272,8 +276,7 @@ StackMaps::parseRegisterLiveOutMask(const uint32_t *Mask) const {
   }
 
   LiveOuts.erase(
-      std::remove_if(LiveOuts.begin(), LiveOuts.end(),
-                     [](const LiveOutReg &LO) { return LO.Reg == 0; }),
+      remove_if(LiveOuts, [](const LiveOutReg &LO) { return LO.Reg == 0; }),
       LiveOuts.end());
 
   return LiveOuts;
@@ -332,20 +335,26 @@ void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint64_t ID,
   CSInfos.emplace_back(CSOffsetExpr, ID, std::move(Locations),
                        std::move(LiveOuts));
 
-  // Record the stack size of the current function.
-  const MachineFrameInfo *MFI = AP.MF->getFrameInfo();
+  // Record the stack size of the current function and update callsite count.
+  const MachineFrameInfo &MFI = AP.MF->getFrameInfo();
   const TargetRegisterInfo *RegInfo = AP.MF->getSubtarget().getRegisterInfo();
   bool HasDynamicFrameSize =
-      MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
-  FnStackSize[AP.CurrentFnSym] =
-      HasDynamicFrameSize ? UINT64_MAX : MFI->getStackSize();
+      MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(*(AP.MF));
+  uint64_t FrameSize = HasDynamicFrameSize ? UINT64_MAX : MFI.getStackSize();
+
+  auto CurrentIt = FnInfos.find(AP.CurrentFnSym);
+  if (CurrentIt != FnInfos.end())
+    CurrentIt->second.RecordCount++;
+  else
+    FnInfos.insert(std::make_pair(AP.CurrentFnSym, FunctionInfo(FrameSize)));
 }
 
 void StackMaps::recordStackMap(const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::STACKMAP && "expected stackmap");
 
-  int64_t ID = MI.getOperand(0).getImm();
-  recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), 2),
+  StackMapOpers opers(&MI);
+  const int64_t ID = MI.getOperand(PatchPointOpers::IDPos).getImm();
+  recordStackMapOpers(MI, ID, std::next(MI.operands_begin(), opers.getVarIdx()),
                       MI.operands_end());
 }
 
@@ -353,8 +362,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
   assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "expected patchpoint");
 
   PatchPointOpers opers(&MI);
-  int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm();
-
+  const int64_t ID = opers.getID();
   auto MOI = std::next(MI.operands_begin(), opers.getStackMapStartIdx());
   recordStackMapOpers(MI, ID, MOI, MI.operands_end(),
                       opers.isAnyReg() && opers.hasDef());
@@ -363,7 +371,7 @@ void StackMaps::recordPatchPoint(const MachineInstr &MI) {
   // verify anyregcc
   auto &Locations = CSInfos.back().Locations;
   if (opers.isAnyReg()) {
-    unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm();
+    unsigned NArgs = opers.getNumCallArgs();
     for (unsigned i = 0, e = (opers.hasDef() ? NArgs + 1 : NArgs); i != e; ++i)
       assert(Locations[i].Type == Location::Register &&
              "anyreg arg must be in reg.");
@@ -384,7 +392,7 @@ void StackMaps::recordStatepoint(const MachineInstr &MI) {
 /// Emit the stackmap header.
 ///
 /// Header {
-///   uint8  : Stack Map Version (currently 1)
+///   uint8  : Stack Map Version (currently 2)
 ///   uint8  : Reserved (expected to be 0)
 ///   uint16 : Reserved (expected to be 0)
 /// }
@@ -398,8 +406,8 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
   OS.EmitIntValue(0, 2);               // Reserved.
 
   // Num functions.
-  DEBUG(dbgs() << WSMP << "#functions = " << FnStackSize.size() << '\n');
-  OS.EmitIntValue(FnStackSize.size(), 4);
+  DEBUG(dbgs() << WSMP << "#functions = " << FnInfos.size() << '\n');
+  OS.EmitIntValue(FnInfos.size(), 4);
   // Num constants.
   DEBUG(dbgs() << WSMP << "#constants = " << ConstPool.size() << '\n');
   OS.EmitIntValue(ConstPool.size(), 4);
@@ -413,15 +421,18 @@ void StackMaps::emitStackmapHeader(MCStreamer &OS) {
 /// StkSizeRecord[NumFunctions] {
 ///   uint64 : Function Address
 ///   uint64 : Stack Size
+///   uint64 : Record Count
 /// }
 void StackMaps::emitFunctionFrameRecords(MCStreamer &OS) {
   // Function Frame records.
   DEBUG(dbgs() << WSMP << "functions:\n");
-  for (auto const &FR : FnStackSize) {
+  for (auto const &FR : FnInfos) {
     DEBUG(dbgs() << WSMP << "function addr: " << FR.first
-                 << " frame size: " << FR.second);
+                 << " frame size: " << FR.second.StackSize
+                 << " callsite count: " << FR.second.RecordCount << '\n');
     OS.EmitSymbolValue(FR.first, 8);
-    OS.EmitIntValue(FR.second, 8);
+    OS.EmitIntValue(FR.second.StackSize, 8);
+    OS.EmitIntValue(FR.second.RecordCount, 8);
   }
 }
 
@@ -522,7 +533,7 @@ void StackMaps::serializeToStackMapSection() {
   // Bail out if there's no stack map data.
   assert((!CSInfos.empty() || ConstPool.empty()) &&
          "Expected empty constant pool too!");
-  assert((!CSInfos.empty() || FnStackSize.empty()) &&
+  assert((!CSInfos.empty() || FnInfos.empty()) &&
          "Expected empty function record too!");
   if (CSInfos.empty())
     return;
diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp
index 89868e43aba4..c2c010a29d44 100644
--- a/contrib/llvm/lib/CodeGen/StackProtector.cpp
+++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp
@@ -50,7 +50,7 @@ static cl::opt<bool> EnableSelectionDAGSP("enable-selectiondag-sp",
                                           cl::init(true), cl::Hidden);
 
 char StackProtector::ID = 0;
-INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors",
+INITIALIZE_TM_PASS(StackProtector, "stack-protector", "Insert stack protectors",
                 false, true)
 
 FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) {
@@ -236,11 +236,6 @@ bool StackProtector::RequiresStackProtector() {
     for (const Instruction &I : BB) {
       if (const AllocaInst *AI = dyn_cast<AllocaInst>(&I)) {
         if (AI->isArrayAllocation()) {
-          // SSP-Strong: Enable protectors for any call to alloca, regardless
-          // of size.
-          if (Strong)
-            return true;
-
           if (const auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
             if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) {
               // A call to alloca with size >= SSPBufferSize requires
diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
index d996714a414a..bae828a2263c 100644
--- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
+++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp
@@ -428,7 +428,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) {
              << "********** Function: " << MF.getName() << '\n';
     });
 
-  MFI = MF.getFrameInfo();
+  MFI = &MF.getFrameInfo();
   TII = MF.getSubtarget().getInstrInfo();
   LS = &getAnalysis<LiveStacks>();
   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
index 2b1fb127497a..e2377d89497d 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp
@@ -47,13 +47,12 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
 
-  auto MMI = getAnalysisIfAvailable<MachineModuleInfo>();
   auto MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
 
-  Duplicator.initMF(MF, MMI, MBPI);
+  Duplicator.initMF(MF, MBPI, /* LayoutMode */ false);
 
   bool MadeChange = false;
-  while (Duplicator.tailDuplicateBlocks(MF))
+  while (Duplicator.tailDuplicateBlocks())
     MadeChange = true;
 
   return MadeChange;
diff --git a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
index 847a09349a59..7709236bbaa8 100644
--- a/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/contrib/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
@@ -40,12 +41,20 @@ STATISTIC(NumTailDupRemoved,
 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
 STATISTIC(NumAddedPHIs, "Number of phis added");
 
+namespace llvm {
+
 // Heuristic for tail duplication.
 static cl::opt<unsigned> TailDuplicateSize(
     "tail-dup-size",
     cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
     cl::Hidden);
 
+cl::opt<unsigned> TailDupIndirectBranchSize(
+    "tail-dup-indirect-size",
+    cl::desc("Maximum instructions to consider tail duplicating blocks that "
+             "end with indirect branches."), cl::init(20),
+    cl::Hidden);
+
 static cl::opt<bool>
     TailDupVerify("tail-dup-verify",
                   cl::desc("Verify sanity of PHI instructions during taildup"),
@@ -54,18 +63,20 @@ static cl::opt<bool>
 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
                                       cl::Hidden);
 
-namespace llvm {
-
-void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin,
-                            const MachineBranchProbabilityInfo *MBPIin) {
-  TII = MF.getSubtarget().getInstrInfo();
-  TRI = MF.getSubtarget().getRegisterInfo();
-  MRI = &MF.getRegInfo();
-  MMI = MMIin;
+void TailDuplicator::initMF(MachineFunction &MFin,
+                            const MachineBranchProbabilityInfo *MBPIin,
+                            bool LayoutModeIn, unsigned TailDupSizeIn) {
+  MF = &MFin;
+  TII = MF->getSubtarget().getInstrInfo();
+  TRI = MF->getSubtarget().getRegisterInfo();
+  MRI = &MF->getRegInfo();
+  MMI = &MF->getMMI();
   MBPI = MBPIin;
+  TailDupSize = TailDupSizeIn;
 
   assert(MBPI != nullptr && "Machine Branch Probability Info required");
 
+  LayoutMode = LayoutModeIn;
   PreRegAlloc = MRI->isSSA();
 }
 
@@ -78,10 +89,7 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
     while (MI != MBB->end()) {
       if (!MI->isPHI())
         break;
-      for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                            PE = Preds.end();
-           PI != PE; ++PI) {
-        MachineBasicBlock *PredBB = *PI;
+      for (MachineBasicBlock *PredBB : Preds) {
         bool Found = false;
         for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
           MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
@@ -119,21 +127,31 @@ static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
 }
 
 /// Tail duplicate the block and cleanup.
-bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
-                                            MachineBasicBlock *MBB) {
+/// \p IsSimple - return value of isSimpleBB
+/// \p MBB - block to be duplicated
+/// \p ForcedLayoutPred - If non-null, treat this block as the layout
+///     predecessor, instead of using the ordering in MF
+/// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
+///     all Preds that received a copy of \p MBB.
+/// \p RemovalCallback - if non-null, called just before MBB is deleted.
+bool TailDuplicator::tailDuplicateAndUpdate(
+    bool IsSimple, MachineBasicBlock *MBB,
+    MachineBasicBlock *ForcedLayoutPred,
+    SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
+    llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   // Save the successors list.
   SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
                                                MBB->succ_end());
 
   SmallVector<MachineBasicBlock *, 8> TDBBs;
   SmallVector<MachineInstr *, 16> Copies;
-  if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies))
+  if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies))
     return false;
 
   ++NumTails;
 
   SmallVector<MachineInstr *, 8> NewPHIs;
-  MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+  MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
 
   // TailBB's immediate successors are now successors of those predecessors
   // which duplicated TailBB. Add the predecessors as sources to the PHI
@@ -145,7 +163,7 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
   // If it is dead, remove it.
   if (isDead) {
     NumTailDupRemoved += MBB->size();
-    removeDeadBlock(MBB);
+    removeDeadBlock(MBB, RemovalCallback);
     ++NumDeadBlocks;
   }
 
@@ -216,21 +234,24 @@ bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple,
   if (NewPHIs.size())
     NumAddedPHIs += NewPHIs.size();
 
+  if (DuplicatedPreds)
+    *DuplicatedPreds = std::move(TDBBs);
+
   return true;
 }
 
 /// Look for small blocks that are unconditionally branched to and do not fall
 /// through. Tail-duplicate their instructions into their predecessors to
 /// eliminate (dynamic) branches.
-bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
+bool TailDuplicator::tailDuplicateBlocks() {
   bool MadeChange = false;
 
   if (PreRegAlloc && TailDupVerify) {
     DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
-    VerifyPHIs(MF, true);
+    VerifyPHIs(*MF, true);
   }
 
-  for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) {
+  for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
     MachineBasicBlock *MBB = &*I++;
 
     if (NumTails == TailDupLimit)
@@ -238,14 +259,14 @@ bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) {
 
     bool IsSimple = isSimpleBB(MBB);
 
-    if (!shouldTailDuplicate(MF, IsSimple, *MBB))
+    if (!shouldTailDuplicate(IsSimple, *MBB))
       continue;
 
-    MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB);
+    MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
   }
 
   if (PreRegAlloc && TailDupVerify)
-    VerifyPHIs(MF, false);
+    VerifyPHIs(*MF, false);
 
   return MadeChange;
 }
@@ -334,10 +355,9 @@ void TailDuplicator::processPHI(
 /// the source operands due to earlier PHI translation.
 void TailDuplicator::duplicateInstruction(
     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
-    MachineFunction &MF,
     DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
     const DenseSet<unsigned> &UsedByPhi) {
-  MachineInstr *NewMI = TII->duplicate(*MI, MF);
+  MachineInstr *NewMI = TII->duplicate(*MI, *MF);
   if (PreRegAlloc) {
     for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
       MachineOperand &MO = NewMI->getOperand(i);
@@ -421,18 +441,14 @@ void TailDuplicator::updateSuccessorsPHIs(
     MachineBasicBlock *FromBB, bool isDead,
     SmallVectorImpl<MachineBasicBlock *> &TDBBs,
     SmallSetVector<MachineBasicBlock *, 8> &Succs) {
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(),
-                                                        SE = Succs.end();
-       SI != SE; ++SI) {
-    MachineBasicBlock *SuccBB = *SI;
-    for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
-         II != EE; ++II) {
-      if (!II->isPHI())
+  for (MachineBasicBlock *SuccBB : Succs) {
+    for (MachineInstr &MI : *SuccBB) {
+      if (!MI.isPHI())
         break;
-      MachineInstrBuilder MIB(*FromBB->getParent(), II);
+      MachineInstrBuilder MIB(*FromBB->getParent(), MI);
       unsigned Idx = 0;
-      for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
-        MachineOperand &MO = II->getOperand(i + 1);
+      for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
+        MachineOperand &MO = MI.getOperand(i + 1);
         if (MO.getMBB() == FromBB) {
           Idx = i;
           break;
@@ -440,17 +456,17 @@ void TailDuplicator::updateSuccessorsPHIs(
       }
 
       assert(Idx != 0);
-      MachineOperand &MO0 = II->getOperand(Idx);
+      MachineOperand &MO0 = MI.getOperand(Idx);
       unsigned Reg = MO0.getReg();
       if (isDead) {
         // Folded into the previous BB.
         // There could be duplicate phi source entries. FIXME: Should sdisel
         // or earlier pass fixed this?
-        for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) {
-          MachineOperand &MO = II->getOperand(i + 1);
+        for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
+          MachineOperand &MO = MI.getOperand(i + 1);
           if (MO.getMBB() == FromBB) {
-            II->RemoveOperand(i + 1);
-            II->RemoveOperand(i);
+            MI.RemoveOperand(i + 1);
+            MI.RemoveOperand(i);
           }
         }
       } else
@@ -474,8 +490,8 @@ void TailDuplicator::updateSuccessorsPHIs(
 
           unsigned SrcReg = LI->second[j].second;
           if (Idx != 0) {
-            II->getOperand(Idx).setReg(SrcReg);
-            II->getOperand(Idx + 1).setMBB(SrcBB);
+            MI.getOperand(Idx).setReg(SrcReg);
+            MI.getOperand(Idx + 1).setMBB(SrcBB);
             Idx = 0;
           } else {
             MIB.addReg(SrcReg).addMBB(SrcBB);
@@ -486,8 +502,8 @@ void TailDuplicator::updateSuccessorsPHIs(
         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
           MachineBasicBlock *SrcBB = TDBBs[j];
           if (Idx != 0) {
-            II->getOperand(Idx).setReg(Reg);
-            II->getOperand(Idx + 1).setMBB(SrcBB);
+            MI.getOperand(Idx).setReg(Reg);
+            MI.getOperand(Idx + 1).setMBB(SrcBB);
             Idx = 0;
           } else {
             MIB.addReg(Reg).addMBB(SrcBB);
@@ -495,19 +511,20 @@ void TailDuplicator::updateSuccessorsPHIs(
         }
       }
       if (Idx != 0) {
-        II->RemoveOperand(Idx + 1);
-        II->RemoveOperand(Idx);
+        MI.RemoveOperand(Idx + 1);
+        MI.RemoveOperand(Idx);
       }
     }
   }
 }
 
 /// Determine if it is profitable to duplicate this block.
-bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
-                                         bool IsSimple,
+bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
                                          MachineBasicBlock &TailBB) {
-  // Only duplicate blocks that end with unconditional branches.
-  if (TailBB.canFallThrough())
+  // When doing tail-duplication during layout, the block ordering is in flux,
+  // so canFallThrough returns a result based on incorrect information and
+  // should just be ignored.
+  if (!LayoutMode && TailBB.canFallThrough())
     return false;
 
   // Don't try to tail-duplicate single-block loops.
@@ -518,12 +535,24 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
   // duplicate only one, because one branch instruction can be eliminated to
   // compensate for the duplication.
   unsigned MaxDuplicateCount;
-  if (TailDuplicateSize.getNumOccurrences() == 0 &&
-      // FIXME: Use Function::optForSize().
-      MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize))
+  if (TailDupSize == 0 &&
+      TailDuplicateSize.getNumOccurrences() == 0 &&
+      MF->getFunction()->optForSize())
     MaxDuplicateCount = 1;
-  else
+  else if (TailDupSize == 0)
     MaxDuplicateCount = TailDuplicateSize;
+  else
+    MaxDuplicateCount = TailDupSize;
+
+  // If the block to be duplicated ends in an unanalyzable fallthrough, don't
+  // duplicate it.
+  // A similar check is necessary in MachineBlockPlacement to make sure pairs of
+  // blocks with unanalyzable fallthrough get layed out contiguously.
+  MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
+  SmallVector<MachineOperand, 4> PredCond;
+  if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
+      TailBB.canFallThrough())
+    return false;
 
   // If the target has hardware branch prediction that can handle indirect
   // branches, duplicating them can often make them predictable when there
@@ -536,7 +565,7 @@ bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF,
     HasIndirectbr = TailBB.back().isIndirectBranch();
 
   if (HasIndirectbr && PreRegAlloc)
-    MaxDuplicateCount = 20;
+    MaxDuplicateCount = TailDupIndirectBranchSize;
 
   // Check the instructions in the block to determine whether tail-duplication
   // is invalid or unlikely to be profitable.
@@ -631,7 +660,7 @@ bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
 
     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
     SmallVector<MachineOperand, 4> PredCond;
-    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
       return false;
 
     if (!PredCond.empty())
@@ -649,11 +678,7 @@ bool TailDuplicator::duplicateSimpleBB(
   SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
                                             TailBB->pred_end());
   bool Changed = false;
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                        PE = Preds.end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
+  for (MachineBasicBlock *PredBB : Preds) {
     if (PredBB->hasEHPadSuccessor())
       continue;
 
@@ -662,7 +687,7 @@ bool TailDuplicator::duplicateSimpleBB(
 
     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
     SmallVector<MachineOperand, 4> PredCond;
-    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
       continue;
 
     Changed = true;
@@ -670,7 +695,7 @@ bool TailDuplicator::duplicateSimpleBB(
                  << "From simple Succ: " << *TailBB);
 
     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
-    MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator());
+    MachineBasicBlock *NextBB = PredBB->getNextNode();
 
     // Make PredFBB explicit.
     if (PredCond.empty())
@@ -700,7 +725,7 @@ bool TailDuplicator::duplicateSimpleBB(
     if (PredTBB == NextBB && PredFBB == nullptr)
       PredTBB = nullptr;
 
-    TII->RemoveBranch(*PredBB);
+    TII->removeBranch(*PredBB);
 
     if (!PredBB->isSuccessor(NewTarget))
       PredBB->replaceSuccessor(TailBB, NewTarget);
@@ -710,17 +735,40 @@ bool TailDuplicator::duplicateSimpleBB(
     }
 
     if (PredTBB)
-      TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
+      TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc());
 
     TDBBs.push_back(PredBB);
   }
   return Changed;
 }
 
+bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
+                                      MachineBasicBlock *PredBB) {
+  // EH edges are ignored by analyzeBranch.
+  if (PredBB->succ_size() > 1)
+    return false;
+
+  MachineBasicBlock *PredTBB, *PredFBB;
+  SmallVector<MachineOperand, 4> PredCond;
+  if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
+    return false;
+  if (!PredCond.empty())
+    return false;
+  return true;
+}
+
 /// If it is profitable, duplicate TailBB's contents in each
 /// of its predecessors.
-bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
-                                   MachineBasicBlock *TailBB,
+/// \p IsSimple result of isSimpleBB
+/// \p TailBB   Block to be duplicated.
+/// \p ForcedLayoutPred  When non-null, use this block as the layout predecessor
+///                      instead of the previous block in MF's order.
+/// \p TDBBs             A vector to keep track of all blocks tail-duplicated
+///                      into.
+/// \p Copies            A vector of copy instructions inserted. Used later to
+///                      walk all the inserted copies and remove redundant ones.
+bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
+                                   MachineBasicBlock *ForcedLayoutPred,
                                    SmallVectorImpl<MachineBasicBlock *> &TDBBs,
                                    SmallVectorImpl<MachineInstr *> &Copies) {
   DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
@@ -737,25 +785,20 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
   bool Changed = false;
   SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
                                                TailBB->pred_end());
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                        PE = Preds.end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-
+  for (MachineBasicBlock *PredBB : Preds) {
     assert(TailBB != PredBB &&
            "Single-block loop should have been rejected earlier!");
-    // EH edges are ignored by AnalyzeBranch.
-    if (PredBB->succ_size() > 1)
-      continue;
 
-    MachineBasicBlock *PredTBB, *PredFBB;
-    SmallVector<MachineOperand, 4> PredCond;
-    if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
-      continue;
-    if (!PredCond.empty())
+    if (!canTailDuplicate(TailBB, PredBB))
       continue;
+
     // Don't duplicate into a fall-through predecessor (at least for now).
-    if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+    bool IsLayoutSuccessor = false;
+    if (ForcedLayoutPred)
+      IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
+    else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
+      IsLayoutSuccessor = true;
+    if (IsLayoutSuccessor)
       continue;
 
     DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
@@ -764,7 +807,7 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
     TDBBs.push_back(PredBB);
 
     // Remove PredBB's unconditional branch.
-    TII->RemoveBranch(*PredBB);
+    TII->removeBranch(*PredBB);
 
     // Clone the contents of TailBB into PredBB.
     DenseMap<unsigned, RegSubRegPair> LocalVRMap;
@@ -782,13 +825,15 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
       } else {
         // Replace def of virtual registers with new registers, and update
         // uses with PHI source register or the new registers.
-        duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi);
+        duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
       }
     }
     appendCopies(PredBB, CopyInfos, Copies);
 
     // Simplify
-    TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true);
+    MachineBasicBlock *PredTBB, *PredFBB;
+    SmallVector<MachineOperand, 4> PredCond;
+    TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
 
     NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
 
@@ -796,10 +841,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
     PredBB->removeSuccessor(PredBB->succ_begin());
     assert(PredBB->succ_empty() &&
            "TailDuplicate called on block with multiple successors!");
-    for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
-                                          E = TailBB->succ_end();
-         I != E; ++I)
-      PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I));
+    for (MachineBasicBlock *Succ : TailBB->successors())
+      PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
 
     Changed = true;
     ++NumTailDups;
@@ -808,17 +851,27 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
   // If TailBB was duplicated into all its predecessors except for the prior
   // block, which falls through unconditionally, move the contents of this
   // block into the prior block.
-  MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator());
+  MachineBasicBlock *PrevBB = ForcedLayoutPred;
+  if (!PrevBB)
+    PrevBB = &*std::prev(TailBB->getIterator());
   MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
   SmallVector<MachineOperand, 4> PriorCond;
   // This has to check PrevBB->succ_size() because EH edges are ignored by
-  // AnalyzeBranch.
+  // analyzeBranch.
   if (PrevBB->succ_size() == 1 &&
-      !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) &&
-      PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 &&
+      // Layout preds are not always CFG preds. Check.
+      *PrevBB->succ_begin() == TailBB &&
+      !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
+      PriorCond.empty() &&
+      (!PriorTBB || PriorTBB == TailBB) &&
+      TailBB->pred_size() == 1 &&
       !TailBB->hasAddressTaken()) {
     DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
                  << "From MBB: " << *TailBB);
+    // There may be a branch to the layout successor. This is unlikely but it
+    // happens. The correct thing to do is to remove the branch before
+    // duplicating the instructions in all cases.
+    TII->removeBranch(*PrevBB);
     if (PreRegAlloc) {
       DenseMap<unsigned, RegSubRegPair> LocalVRMap;
       SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos;
@@ -837,11 +890,12 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
         // uses with PHI source register or the new registers.
         MachineInstr *MI = &*I++;
         assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
-        duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi);
+        duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
         MI->eraseFromParent();
       }
       appendCopies(PrevBB, CopyInfos, Copies);
     } else {
+      TII->removeBranch(*PrevBB);
       // No PHIs to worry about, just splice the instructions over.
       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
     }
@@ -874,11 +928,8 @@ bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple,
   // What we do here is introduce a copy in 3 of the register defined by the
   // phi, just like when we are duplicating 2 into 3, but we don't copy any
   // real instructions or remove the 3 -> 2 edge from the phi in 2.
-  for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
-                                                        PE = Preds.end();
-       PI != PE; ++PI) {
-    MachineBasicBlock *PredBB = *PI;
-    if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end())
+  for (MachineBasicBlock *PredBB : Preds) {
+    if (is_contained(TDBBs, PredBB))
       continue;
 
     // EH edges
@@ -917,10 +968,15 @@ void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
 
 /// Remove the specified dead machine basic block from the function, updating
 /// the CFG.
-void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) {
+void TailDuplicator::removeDeadBlock(
+    MachineBasicBlock *MBB,
+    llvm::function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   assert(MBB->pred_empty() && "MBB must be dead!");
   DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
 
+  if (RemovalCallback)
+    (*RemovalCallback)(MBB);
+
   // Remove all successors.
   while (!MBB->succ_empty())
     MBB->removeSuccessor(MBB->succ_end() - 1);
diff --git a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cac7e63af328..f082add8c7dd 100644
--- a/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -40,7 +40,7 @@ bool TargetFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
 /// is overridden for some targets.
 int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                              int FI, unsigned &FrameReg) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
 
   // By default, assume all frame indices are referenced via whatever
@@ -48,13 +48,13 @@ int TargetFrameLowering::getFrameIndexReference(const MachineFunction &MF,
   // something different.
   FrameReg = RI->getFrameRegister(MF);
 
-  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
-         getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+  return MFI.getObjectOffset(FI) + MFI.getStackSize() -
+         getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
 }
 
 bool TargetFrameLowering::needsFrameIndexResolution(
     const MachineFunction &MF) const {
-  return MF.getFrameInfo()->hasStackObjects();
+  return MF.getFrameInfo().hasStackObjects();
 }
 
 void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
@@ -84,7 +84,7 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
     return;
 
   // Functions which call __builtin_unwind_init get all their registers saved.
-  bool CallsUnwindInit = MF.getMMI().callsUnwindInit();
+  bool CallsUnwindInit = MF.callsUnwindInit();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
   for (unsigned i = 0; CSRegs[i]; ++i) {
     unsigned Reg = CSRegs[i];
diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
index e7330c60ed23..01f91b96b58a 100644
--- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -84,8 +84,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
     if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
                                 strlen(MAI.getSeparatorString())) == 0) {
       atInsnStart = true;
-    } else if (strncmp(Str, MAI.getCommentString(),
-                       strlen(MAI.getCommentString())) == 0) {
+    } else if (strncmp(Str, MAI.getCommentString().data(),
+                       MAI.getCommentString().size()) == 0) {
       // Stop counting as an instruction after a comment until the next
       // separator.
       atInsnStart = false;
@@ -119,7 +119,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
 
   // If MBB isn't immediately before MBB, insert a branch to it.
   if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
-    InsertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
+    insertBranch(*MBB, NewDest, nullptr, SmallVector<MachineOperand, 0>(), DL);
   MBB->addSuccessor(NewDest);
 }
 
@@ -437,13 +437,20 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
                                     const TargetInstrInfo &TII) {
   unsigned StartIdx = 0;
   switch (MI.getOpcode()) {
-  case TargetOpcode::STACKMAP:
-    StartIdx = 2; // Skip ID, nShadowBytes.
+  case TargetOpcode::STACKMAP: {
+    // StackMapLiveValues are foldable
+    StartIdx = StackMapOpers(&MI).getVarIdx();
     break;
+  }
   case TargetOpcode::PATCHPOINT: {
-    // For PatchPoint, the call args are not foldable.
-    PatchPointOpers opers(&MI);
-    StartIdx = opers.getVarIdx();
+    // For PatchPoint, the call args are not foldable (even if reported in the
+    // stackmap e.g. via anyregcc).
+    StartIdx = PatchPointOpers(&MI).getVarIdx();
+    break;
+  }
+  case TargetOpcode::STATEPOINT: {
+    // For statepoints, fold deopt and gc arguments, but not call arguments.
+    StartIdx = StatepointOpers(&MI).getVarIdx();
     break;
   }
   default:
@@ -467,7 +474,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
 
   for (unsigned i = StartIdx; i < MI.getNumOperands(); ++i) {
     MachineOperand &MO = MI.getOperand(i);
-    if (std::find(Ops.begin(), Ops.end(), i) != Ops.end()) {
+    if (is_contained(Ops, i)) {
       unsigned SpillSize;
       unsigned SpillOffset;
       // Compute the spill slot size and offset.
@@ -508,10 +515,36 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   assert(MBB && "foldMemoryOperand needs an inserted instruction");
   MachineFunction &MF = *MBB->getParent();
 
+  // If we're not folding a load into a subreg, the size of the load is the
+  // size of the spill slot. But if we are, we need to figure out what the
+  // actual load size is.
+  int64_t MemSize = 0;
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+
+  if (Flags & MachineMemOperand::MOStore) {
+    MemSize = MFI.getObjectSize(FI);
+  } else {
+    for (unsigned Idx : Ops) {
+      int64_t OpSize = MFI.getObjectSize(FI);
+
+      if (auto SubReg = MI.getOperand(Idx).getSubReg()) {
+        unsigned SubRegSize = TRI->getSubRegIdxSize(SubReg);
+        if (SubRegSize > 0 && !(SubRegSize % 8))
+          OpSize = SubRegSize / 8;
+      }
+
+      MemSize = std::max(MemSize, OpSize);
+    }
+  }
+
+  assert(MemSize && "Did not expect a zero-sized stack slot");
+
   MachineInstr *NewMI = nullptr;
 
   if (MI.getOpcode() == TargetOpcode::STACKMAP ||
-      MI.getOpcode() == TargetOpcode::PATCHPOINT) {
+      MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+      MI.getOpcode() == TargetOpcode::STATEPOINT) {
     // Fold stackmap/patchpoint.
     NewMI = foldPatchpoint(MF, MI, Ops, FI, *this);
     if (NewMI)
@@ -530,10 +563,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
     assert((!(Flags & MachineMemOperand::MOLoad) ||
             NewMI->mayLoad()) &&
            "Folded a use to a non-load!");
-    const MachineFrameInfo &MFI = *MF.getFrameInfo();
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO = MF.getMachineMemOperand(
-        MachinePointerInfo::getFixedStack(MF, FI), Flags, MFI.getObjectSize(FI),
+        MachinePointerInfo::getFixedStack(MF, FI), Flags, MemSize,
         MFI.getObjectAlignment(FI));
     NewMI->addMemOperand(MF, MMO);
 
@@ -550,7 +582,6 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
 
   const MachineOperand &MO = MI.getOperand(1 - Ops[0]);
   MachineBasicBlock::iterator Pos = MI;
-  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
 
   if (Flags == MachineMemOperand::MOStore)
     storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI);
@@ -792,7 +823,8 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   int FrameIndex = 0;
 
   if ((MI.getOpcode() == TargetOpcode::STACKMAP ||
-       MI.getOpcode() == TargetOpcode::PATCHPOINT) &&
+       MI.getOpcode() == TargetOpcode::PATCHPOINT ||
+       MI.getOpcode() == TargetOpcode::STATEPOINT) &&
       isLoadFromStackSlot(LoadMI, FrameIndex)) {
     // Fold stackmap/patchpoint.
     NewMI = foldPatchpoint(MF, MI, Ops, FrameIndex, *this);
@@ -844,7 +876,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
   // simple, and a common case.
   int FrameIdx = 0;
   if (isLoadFromStackSlot(MI, FrameIdx) &&
-      MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx))
+      MF.getFrameInfo().isImmutableObjectIndex(FrameIdx))
     return true;
 
   // Avoid instructions obviously unsafe for remat.
@@ -857,7 +889,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
     return false;
 
   // Avoid instructions which load from potentially varying memory.
-  if (MI.mayLoad() && !MI.isInvariantLoad(AA))
+  if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA))
     return false;
 
   // If any of the registers accessed are non-constant, conservatively assume
@@ -875,7 +907,7 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
         // If the physreg has no defs anywhere, it's just an ambient register
         // and we can freely move its uses. Alternatively, if it's allocatable,
         // it could get allocated to something with a def during allocation.
-        if (!MRI.isConstantPhysReg(Reg, MF))
+        if (!MRI.isConstantPhysReg(Reg))
           return false;
       } else {
         // A physreg def. We can't remat it.
@@ -1091,35 +1123,6 @@ int TargetInstrInfo::computeDefOperandLatency(
   return -1;
 }
 
-unsigned TargetInstrInfo::computeOperandLatency(
-    const InstrItineraryData *ItinData, const MachineInstr &DefMI,
-    unsigned DefIdx, const MachineInstr *UseMI, unsigned UseIdx) const {
-
-  int DefLatency = computeDefOperandLatency(ItinData, DefMI);
-  if (DefLatency >= 0)
-    return DefLatency;
-
-  assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail");
-
-  int OperLatency = 0;
-  if (UseMI)
-    OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, *UseMI, UseIdx);
-  else {
-    unsigned DefClass = DefMI.getDesc().getSchedClass();
-    OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
-  }
-  if (OperLatency >= 0)
-    return OperLatency;
-
-  // No operand latency was found.
-  unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
-
-  // Expected latency is the max of the stage latency and itinerary props.
-  InstrLatency = std::max(InstrLatency,
-                          defaultDefLatency(ItinData->SchedModel, DefMI));
-  return InstrLatency;
-}
-
 bool TargetInstrInfo::getRegSequenceInputs(
     const MachineInstr &MI, unsigned DefIdx,
     SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 6d3fe8ca6473..003311b157fc 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -44,6 +45,14 @@ static cl::opt<bool> JumpIsExpensiveOverride(
     cl::desc("Do not create extra branches to split comparison logic."),
     cl::Hidden);
 
+static cl::opt<unsigned> MinimumJumpTableEntries
+  ("min-jump-table-entries", cl::init(4), cl::Hidden,
+   cl::desc("Set minimum number of entries to use a jump table."));
+
+static cl::opt<unsigned> MaximumJumpTableSize
+  ("max-jump-table-size", cl::init(0), cl::Hidden,
+   cl::desc("Set maximum size of jump tables; zero for no limit."));
+
 // Although this default value is arbitrary, it is not random. It is assumed
 // that a condition that evaluates the same way by a higher percentage than this
 // is best represented as control flow. Therefore, the default value N should be
@@ -352,6 +361,11 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::MEMCPY] = "memcpy";
   Names[RTLIB::MEMMOVE] = "memmove";
   Names[RTLIB::MEMSET] = "memset";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_1] = "__llvm_memcpy_element_atomic_1";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_2] = "__llvm_memcpy_element_atomic_2";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_4] = "__llvm_memcpy_element_atomic_4";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_8] = "__llvm_memcpy_element_atomic_8";
+  Names[RTLIB::MEMCPY_ELEMENT_ATOMIC_16] = "__llvm_memcpy_element_atomic_16";
   Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume";
   Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1";
   Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2";
@@ -488,12 +502,10 @@ static void InitLibcallNames(const char **Names, const Triple &TT) {
   Names[RTLIB::DEOPTIMIZE] = "__llvm_deoptimize";
 }
 
-/// InitLibcallCallingConvs - Set default libcall CallingConvs.
-///
+/// Set default libcall CallingConvs.
 static void InitLibcallCallingConvs(CallingConv::ID *CCs) {
-  for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) {
-    CCs[i] = CallingConv::C;
-  }
+  for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
+    CCs[LC] = CallingConv::C;
 }
 
 /// getFPEXT - Return the FPEXT_*_* value for the given types, or
@@ -756,6 +768,24 @@ RTLIB::Libcall RTLIB::getSYNC(unsigned Opc, MVT VT) {
   return UNKNOWN_LIBCALL;
 }
 
+RTLIB::Libcall RTLIB::getMEMCPY_ELEMENT_ATOMIC(uint64_t ElementSize) {
+  switch (ElementSize) {
+  case 1:
+    return MEMCPY_ELEMENT_ATOMIC_1;
+  case 2:
+    return MEMCPY_ELEMENT_ATOMIC_2;
+  case 4:
+    return MEMCPY_ELEMENT_ATOMIC_4;
+  case 8:
+    return MEMCPY_ELEMENT_ATOMIC_8;
+  case 16:
+    return MEMCPY_ELEMENT_ATOMIC_16;
+  default:
+    return UNKNOWN_LIBCALL;
+  }
+
+}
+
 /// InitCmpLibcallCCs - Set default comparison libcall CC.
 ///
 static void InitCmpLibcallCCs(ISD::CondCode *CCs) {
@@ -804,10 +834,8 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
     = MaxStoresPerMemmoveOptSize = 4;
   UseUnderscoreSetJmp = false;
   UseUnderscoreLongJmp = false;
-  SelectIsExpensive = false;
   HasMultipleConditionRegisters = false;
   HasExtractBitsInsn = false;
-  FsqrtIsCheap = false;
   JumpIsExpensive = JumpIsExpensiveOverride;
   PredictableSelectIsExpensive = false;
   MaskAndBranchFoldingIsLegal = false;
@@ -825,7 +853,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
   PrefLoopAlignment = 0;
   GatherAllAliasesMaxDepth = 6;
   MinStackArgumentAlignment = 1;
-  MinimumJumpTableEntries = 4;
   // TODO: the default will be switched to 0 in the next commit, along
   // with the Target-specific changes necessary.
   MaxAtomicSizeInBitsSupported = 1024;
@@ -956,15 +983,11 @@ EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy,
   return getScalarShiftAmountTy(DL, LHSTy);
 }
 
-/// canOpTrap - Returns true if the operation can trap for the value type.
-/// VT must be a legal type.
 bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const {
   assert(isTypeLegal(VT));
   switch (Op) {
   default:
     return false;
-  case ISD::FDIV:
-  case ISD::FREM:
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::SREM:
@@ -1177,7 +1200,7 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
                                    MachineBasicBlock *MBB) const {
   MachineInstr *MI = &InitialMI;
   MachineFunction &MF = *MI->getParent()->getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // We're handling multiple types of operands here:
   // PATCHPOINT MetaArgs - live-in, read only, direct
@@ -1402,7 +1425,7 @@ void TargetLoweringBase::computeRegisterProperties(
         MVT SVT = (MVT::SimpleValueType) nVT;
         // Promote vectors of integers to vectors with the same number
         // of elements, with a wider element type.
-        if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() &&
+        if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
             SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
           TransformToType[i] = SVT;
           RegisterTypeForVT[i] = SVT;
@@ -1754,9 +1777,41 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL,
   }
 }
 
+Value *TargetLoweringBase::getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
+                                                              bool UseTLS) const {
+  // compiler-rt provides a variable with a magic name.  Targets that do not
+  // link with compiler-rt may also provide such a variable.
+  Module *M = IRB.GetInsertBlock()->getParent()->getParent();
+  const char *UnsafeStackPtrVar = "__safestack_unsafe_stack_ptr";
+  auto UnsafeStackPtr =
+      dyn_cast_or_null<GlobalVariable>(M->getNamedValue(UnsafeStackPtrVar));
+
+  Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
+
+  if (!UnsafeStackPtr) {
+    auto TLSModel = UseTLS ?
+        GlobalValue::InitialExecTLSModel :
+        GlobalValue::NotThreadLocal;
+    // The global variable is not defined yet, define it ourselves.
+    // We use the initial-exec TLS model because we do not support the
+    // variable living anywhere other than in the main executable.
+    UnsafeStackPtr = new GlobalVariable(
+        *M, StackPtrTy, false, GlobalValue::ExternalLinkage, nullptr,
+        UnsafeStackPtrVar, nullptr, TLSModel);
+  } else {
+    // The variable exists, check its type and attributes.
+    if (UnsafeStackPtr->getValueType() != StackPtrTy)
+      report_fatal_error(Twine(UnsafeStackPtrVar) + " must have void* type");
+    if (UseTLS != UnsafeStackPtr->isThreadLocal())
+      report_fatal_error(Twine(UnsafeStackPtrVar) + " must " +
+                         (UseTLS ? "" : "not ") + "be thread-local");
+  }
+  return UnsafeStackPtr;
+}
+
 Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
   if (!TM.getTargetTriple().isAndroid())
-    return nullptr;
+    return getDefaultSafeStackPointerLocation(IRB, true);
 
   // Android provides a libc function to retrieve the address of the current
   // thread's unsafe stack pointer.
@@ -1818,9 +1873,7 @@ Value *TargetLoweringBase::getIRStackGuard(IRBuilder<> &IRB) const {
   if (getTargetMachine().getTargetTriple().isOSOpenBSD()) {
     Module &M = *IRB.GetInsertBlock()->getParent()->getParent();
     PointerType *PtrTy = Type::getInt8PtrTy(M.getContext());
-    auto Guard = cast<GlobalValue>(M.getOrInsertGlobal("__guard_local", PtrTy));
-    Guard->setVisibility(GlobalValue::HiddenVisibility);
-    return Guard;
+    return M.getOrInsertGlobal("__guard_local", PtrTy);
   }
   return nullptr;
 }
@@ -1840,3 +1893,207 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
 Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
   return nullptr;
 }
+
+unsigned TargetLoweringBase::getMinimumJumpTableEntries() const {
+  return MinimumJumpTableEntries;
+}
+
+void TargetLoweringBase::setMinimumJumpTableEntries(unsigned Val) {
+  MinimumJumpTableEntries = Val;
+}
+
+unsigned TargetLoweringBase::getMaximumJumpTableSize() const {
+  return MaximumJumpTableSize;
+}
+
+void TargetLoweringBase::setMaximumJumpTableSize(unsigned Val) {
+  MaximumJumpTableSize = Val;
+}
+
+//===----------------------------------------------------------------------===//
+//  Reciprocal Estimates
+//===----------------------------------------------------------------------===//
+
+/// Get the reciprocal estimate attribute string for a function that will
+/// override the target defaults.
+static StringRef getRecipEstimateForFunc(MachineFunction &MF) {
+  const Function *F = MF.getFunction();
+  StringRef RecipAttrName = "reciprocal-estimates";
+  if (!F->hasFnAttribute(RecipAttrName))
+    return StringRef();
+
+  return F->getFnAttribute(RecipAttrName).getValueAsString();
+}
+
+/// Construct a string for the given reciprocal operation of the given type.
+/// This string should match the corresponding option to the front-end's
+/// "-mrecip" flag assuming those strings have been passed through in an
+/// attribute string. For example, "vec-divf" for a division of a vXf32.
+static std::string getReciprocalOpName(bool IsSqrt, EVT VT) {
+  std::string Name = VT.isVector() ? "vec-" : "";
+
+  Name += IsSqrt ? "sqrt" : "div";
+
+  // TODO: Handle "half" or other float types?
+  if (VT.getScalarType() == MVT::f64) {
+    Name += "d";
+  } else {
+    assert(VT.getScalarType() == MVT::f32 &&
+           "Unexpected FP type for reciprocal estimate");
+    Name += "f";
+  }
+
+  return Name;
+}
+
+/// Return the character position and value (a single numeric character) of a
+/// customized refinement operation in the input string if it exists. Return
+/// false if there is no customized refinement step count.
+static bool parseRefinementStep(StringRef In, size_t &Position,
+                                uint8_t &Value) {
+  const char RefStepToken = ':';
+  Position = In.find(RefStepToken);
+  if (Position == StringRef::npos)
+    return false;
+
+  StringRef RefStepString = In.substr(Position + 1);
+  // Allow exactly one numeric character for the additional refinement
+  // step parameter.
+  if (RefStepString.size() == 1) {
+    char RefStepChar = RefStepString[0];
+    if (RefStepChar >= '0' && RefStepChar <= '9') {
+      Value = RefStepChar - '0';
+      return true;
+    }
+  }
+  report_fatal_error("Invalid refinement step for -recip.");
+}
+
+/// For the input attribute string, return one of the ReciprocalEstimate enum
+/// status values (enabled, disabled, or not specified) for this operation on
+/// the specified data type.
+static int getOpEnabled(bool IsSqrt, EVT VT, StringRef Override) {
+  if (Override.empty())
+    return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+  SmallVector<StringRef, 4> OverrideVector;
+  SplitString(Override, OverrideVector, ",");
+  unsigned NumArgs = OverrideVector.size();
+
+  // Check if "all", "none", or "default" was specified.
+  if (NumArgs == 1) {
+    // Look for an optional setting of the number of refinement steps needed
+    // for this type of reciprocal operation.
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (parseRefinementStep(Override, RefPos, RefSteps)) {
+      // Split the string for further processing.
+      Override = Override.substr(0, RefPos);
+    }
+
+    // All reciprocal types are enabled.
+    if (Override == "all")
+      return TargetLoweringBase::ReciprocalEstimate::Enabled;
+
+    // All reciprocal types are disabled.
+    if (Override == "none")
+      return TargetLoweringBase::ReciprocalEstimate::Disabled;
+
+    // Target defaults for enablement are used.
+    if (Override == "default")
+      return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+  }
+
+  // The attribute string may omit the size suffix ('f'/'d').
+  std::string VTName = getReciprocalOpName(IsSqrt, VT);
+  std::string VTNameNoSize = VTName;
+  VTNameNoSize.pop_back();
+  static const char DisabledPrefix = '!';
+
+  for (StringRef RecipType : OverrideVector) {
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (parseRefinementStep(RecipType, RefPos, RefSteps))
+      RecipType = RecipType.substr(0, RefPos);
+
+    // Ignore the disablement token for string matching.
+    bool IsDisabled = RecipType[0] == DisabledPrefix;
+    if (IsDisabled)
+      RecipType = RecipType.substr(1);
+
+    if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+      return IsDisabled ? TargetLoweringBase::ReciprocalEstimate::Disabled
+                        : TargetLoweringBase::ReciprocalEstimate::Enabled;
+  }
+
+  return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+/// For the input attribute string, return the customized refinement step count
+/// for this operation on the specified data type. If the step count does not
+/// exist, return the ReciprocalEstimate enum value for unspecified.
+static int getOpRefinementSteps(bool IsSqrt, EVT VT, StringRef Override) {
+  if (Override.empty())
+    return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+  SmallVector<StringRef, 4> OverrideVector;
+  SplitString(Override, OverrideVector, ",");
+  unsigned NumArgs = OverrideVector.size();
+
+  // Check if "all", "default", or "none" was specified.
+  if (NumArgs == 1) {
+    // Look for an optional setting of the number of refinement steps needed
+    // for this type of reciprocal operation.
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (!parseRefinementStep(Override, RefPos, RefSteps))
+      return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+
+    // Split the string for further processing.
+    Override = Override.substr(0, RefPos);
+    assert(Override != "none" &&
+           "Disabled reciprocals, but specifed refinement steps?");
+
+    // If this is a general override, return the specified number of steps.
+    if (Override == "all" || Override == "default")
+      return RefSteps;
+  }
+
+  // The attribute string may omit the size suffix ('f'/'d').
+  std::string VTName = getReciprocalOpName(IsSqrt, VT);
+  std::string VTNameNoSize = VTName;
+  VTNameNoSize.pop_back();
+
+  for (StringRef RecipType : OverrideVector) {
+    size_t RefPos;
+    uint8_t RefSteps;
+    if (!parseRefinementStep(RecipType, RefPos, RefSteps))
+      continue;
+
+    RecipType = RecipType.substr(0, RefPos);
+    if (RecipType.equals(VTName) || RecipType.equals(VTNameNoSize))
+      return RefSteps;
+  }
+
+  return TargetLoweringBase::ReciprocalEstimate::Unspecified;
+}
+
+int TargetLoweringBase::getRecipEstimateSqrtEnabled(EVT VT,
+                                                    MachineFunction &MF) const {
+  return getOpEnabled(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getRecipEstimateDivEnabled(EVT VT,
+                                                   MachineFunction &MF) const {
+  return getOpEnabled(false, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getSqrtRefinementSteps(EVT VT,
+                                               MachineFunction &MF) const {
+  return getOpRefinementSteps(true, VT, getRecipEstimateForFunc(MF));
+}
+
+int TargetLoweringBase::getDivRefinementSteps(EVT VT,
+                                              MachineFunction &MF) const {
+  return getOpRefinementSteps(false, VT, getRecipEstimateForFunc(MF));
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 5f814c957e92..eb2a28f574a5 100644
--- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -50,14 +50,14 @@ using namespace dwarf;
 //===----------------------------------------------------------------------===//
 
 MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
   unsigned Encoding = getPersonalityEncoding();
   if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect)
     return getContext().getOrCreateSymbol(StringRef("DW.ref.") +
-                                          TM.getSymbol(GV, Mang)->getName());
+                                          TM.getSymbol(GV)->getName());
   if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr)
-    return TM.getSymbol(GV, Mang);
+    return TM.getSymbol(GV);
   report_fatal_error("We do not support this DWARF encoding yet!");
 }
 
@@ -84,20 +84,19 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
 }
 
 const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
 
   if (Encoding & dwarf::DW_EH_PE_indirect) {
     MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
-    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM);
+    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
 
     // Add information about the stub reference to ELFMMI so that the stub
     // gets emitted by the asmprinter.
     MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
     if (!StubSym.getPointer()) {
-      MCSymbol *Sym = TM.getSymbol(GV, Mang);
+      MCSymbol *Sym = TM.getSymbol(GV);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -106,8 +105,8 @@ const MCExpr *TargetLoweringObjectFileELF::getTTypeGlobalReference(
                         Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
-  return TargetLoweringObjectFile::
-    getTTypeGlobalReference(GV, Encoding, Mang, TM, MMI, Streamer);
+  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+                                                           MMI, Streamer);
 }
 
 static SectionKind
@@ -152,6 +151,11 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
 
 
 static unsigned getELFSectionType(StringRef Name, SectionKind K) {
+  // Use SHT_NOTE for section whose name starts with ".note" to allow
+  // emitting ELF notes from C variable declaration.
+  // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77609
+  if (Name.startswith(".note"))
+    return ELF::SHT_NOTE;
 
   if (Name == ".init_array")
     return ELF::SHT_INIT_ARRAY;
@@ -177,6 +181,9 @@ static unsigned getELFSectionFlags(SectionKind K) {
   if (K.isText())
     Flags |= ELF::SHF_EXECINSTR;
 
+  if (K.isExecuteOnly())
+    Flags |= ELF::SHF_ARM_PURECODE;
+
   if (K.isWriteable())
     Flags |= ELF::SHF_WRITE;
 
@@ -205,16 +212,15 @@ static const Comdat *getELFComdat(const GlobalValue *GV) {
 }
 
 MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
-  StringRef SectionName = GV->getSection();
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  StringRef SectionName = GO->getSection();
 
   // Infer section flags from the section name if we can.
   Kind = getELFKindForNamedSection(SectionName, Kind);
 
   StringRef Group = "";
   unsigned Flags = getELFSectionFlags(Kind);
-  if (const Comdat *C = getELFComdat(GV)) {
+  if (const Comdat *C = getELFComdat(GO)) {
     Group = C->getName();
     Flags |= ELF::SHF_GROUP;
   }
@@ -243,7 +249,7 @@ static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
 }
 
 static MCSectionELF *
-selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
+selectELFSectionForGlobal(MCContext &Ctx, const GlobalObject *GO,
                           SectionKind Kind, Mangler &Mang,
                           const TargetMachine &TM, bool EmitUniqueSection,
                           unsigned Flags, unsigned *NextUniqueID) {
@@ -271,7 +277,7 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
   }
 
   StringRef Group = "";
-  if (const Comdat *C = getELFComdat(GV)) {
+  if (const Comdat *C = getELFComdat(GO)) {
     Flags |= ELF::SHF_GROUP;
     Group = C->getName();
   }
@@ -282,8 +288,8 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
     // We also need alignment here.
     // FIXME: this is getting the alignment of the character, not the
     // alignment of the global!
-    unsigned Align = GV->getParent()->getDataLayout().getPreferredAlignment(
-        cast<GlobalVariable>(GV));
+    unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+        cast<GlobalVariable>(GO));
 
     std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
     Name = SizeSpec + utostr(Align);
@@ -293,25 +299,31 @@ selectELFSectionForGlobal(MCContext &Ctx, const GlobalValue *GV,
   } else {
     Name = getSectionPrefixForGlobal(Kind);
   }
-  // FIXME: Extend the section prefix to include hotness catagories such as .hot
-  //  or .unlikely for functions.
+
+  if (const auto *F = dyn_cast<Function>(GO)) {
+    const auto &OptionalPrefix = F->getSectionPrefix();
+    if (OptionalPrefix)
+      Name += *OptionalPrefix;
+  }
 
   if (EmitUniqueSection && UniqueSectionNames) {
     Name.push_back('.');
-    TM.getNameWithPrefix(Name, GV, Mang, true);
+    TM.getNameWithPrefix(Name, GO, Mang, true);
   }
   unsigned UniqueID = MCContext::GenericSectionID;
   if (EmitUniqueSection && !UniqueSectionNames) {
     UniqueID = *NextUniqueID;
     (*NextUniqueID)++;
   }
+  // Use 0 as the unique ID for execute-only text
+  if (Kind.isExecuteOnly())
+    UniqueID = 0;
   return Ctx.getELFSection(Name, getELFSectionType(Name, Kind), Flags,
                            EntrySize, Group, UniqueID);
 }
 
 MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   unsigned Flags = getELFSectionFlags(Kind);
 
   // If we have -ffunction-section or -fdata-section then we should emit the
@@ -323,14 +335,14 @@ MCSection *TargetLoweringObjectFileELF::SelectSectionForGlobal(
     else
       EmitUniqueSection = TM.getDataSections();
   }
-  EmitUniqueSection |= GV->hasComdat();
+  EmitUniqueSection |= GO->hasComdat();
 
-  return selectELFSectionForGlobal(getContext(), GV, Kind, Mang, TM,
+  return selectELFSectionForGlobal(getContext(), GO, Kind, getMangler(), TM,
                                    EmitUniqueSection, Flags, &NextUniqueID);
 }
 
 MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
-    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+    const Function &F, const TargetMachine &TM) const {
   // If the function can be removed, produce a unique section so that
   // the table doesn't prevent the removal.
   const Comdat *C = F.getComdat();
@@ -339,7 +351,7 @@ MCSection *TargetLoweringObjectFileELF::getSectionForJumpTable(
     return ReadOnlySection;
 
   return selectELFSectionForGlobal(getContext(), &F, SectionKind::getReadOnly(),
-                                   Mang, TM, EmitUniqueSection, ELF::SHF_ALLOC,
+                                   getMangler(), TM, EmitUniqueSection, ELF::SHF_ALLOC,
                                    &NextUniqueID);
 }
 
@@ -423,7 +435,7 @@ MCSection *TargetLoweringObjectFileELF::getStaticDtorSection(
 }
 
 const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
-    const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang,
+    const GlobalValue *LHS, const GlobalValue *RHS,
     const TargetMachine &TM) const {
   // We may only use a PLT-relative relocation to refer to unnamed_addr
   // functions.
@@ -437,22 +449,28 @@ const MCExpr *TargetLoweringObjectFileELF::lowerRelativeReference(
     return nullptr;
 
   return MCBinaryExpr::createSub(
-      MCSymbolRefExpr::create(TM.getSymbol(LHS, Mang), PLTRelativeVariantKind,
+      MCSymbolRefExpr::create(TM.getSymbol(LHS), PLTRelativeVariantKind,
                               getContext()),
-      MCSymbolRefExpr::create(TM.getSymbol(RHS, Mang), getContext()),
-      getContext());
+      MCSymbolRefExpr::create(TM.getSymbol(RHS), getContext()), getContext());
 }
 
 void
 TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) {
   UseInitArray = UseInitArray_;
-  if (!UseInitArray)
+  MCContext &Ctx = getContext();
+  if (!UseInitArray) {
+    StaticCtorSection = Ctx.getELFSection(".ctors", ELF::SHT_PROGBITS,
+                                          ELF::SHF_ALLOC | ELF::SHF_WRITE);
+
+    StaticDtorSection = Ctx.getELFSection(".dtors", ELF::SHT_PROGBITS,
+                                          ELF::SHF_ALLOC | ELF::SHF_WRITE);
     return;
+  }
 
-  StaticCtorSection = getContext().getELFSection(
-      ".init_array", ELF::SHT_INIT_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
-  StaticDtorSection = getContext().getELFSection(
-      ".fini_array", ELF::SHT_FINI_ARRAY, ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  StaticCtorSection = Ctx.getELFSection(".init_array", ELF::SHT_INIT_ARRAY,
+                                        ELF::SHF_WRITE | ELF::SHF_ALLOC);
+  StaticDtorSection = Ctx.getELFSection(".fini_array", ELF::SHT_FINI_ARRAY,
+                                        ELF::SHF_WRITE | ELF::SHF_ALLOC);
 }
 
 //===----------------------------------------------------------------------===//
@@ -464,11 +482,28 @@ TargetLoweringObjectFileMachO::TargetLoweringObjectFileMachO()
   SupportIndirectSymViaGOTPCRel = true;
 }
 
+void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
+                                               const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  if (TM.getRelocationModel() == Reloc::Static) {
+    StaticCtorSection = Ctx.getMachOSection("__TEXT", "__constructor", 0,
+                                            SectionKind::getData());
+    StaticDtorSection = Ctx.getMachOSection("__TEXT", "__destructor", 0,
+                                            SectionKind::getData());
+  } else {
+    StaticCtorSection = Ctx.getMachOSection("__DATA", "__mod_init_func",
+                                            MachO::S_MOD_INIT_FUNC_POINTERS,
+                                            SectionKind::getData());
+    StaticDtorSection = Ctx.getMachOSection("__DATA", "__mod_term_func",
+                                            MachO::S_MOD_TERM_FUNC_POINTERS,
+                                            SectionKind::getData());
+  }
+}
+
 /// emitModuleFlags - Perform code emission for module flags.
-void TargetLoweringObjectFileMachO::
-emitModuleFlags(MCStreamer &Streamer,
-                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                Mangler &Mang, const TargetMachine &TM) const {
+void TargetLoweringObjectFileMachO::emitModuleFlags(
+    MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+    const TargetMachine &TM) const {
   unsigned VersionVal = 0;
   unsigned ImageInfoFlags = 0;
   MDNode *LinkerOptions = nullptr;
@@ -542,23 +577,22 @@ static void checkMachOComdat(const GlobalValue *GV) {
 }
 
 MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // Parse the section specifier and create it if valid.
   StringRef Segment, Section;
   unsigned TAA = 0, StubSize = 0;
   bool TAAParsed;
 
-  checkMachOComdat(GV);
+  checkMachOComdat(GO);
 
   std::string ErrorCode =
-    MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section,
+    MCSectionMachO::ParseSectionSpecifier(GO->getSection(), Segment, Section,
                                           TAA, TAAParsed, StubSize);
   if (!ErrorCode.empty()) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getName() +
+    report_fatal_error("Global variable '" + GO->getName() +
                        "' has an invalid section specifier '" +
-                       GV->getSection() + "': " + ErrorCode + ".");
+                       GO->getSection() + "': " + ErrorCode + ".");
   }
 
   // Get the section.
@@ -575,7 +609,7 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
   // to reject it here.
   if (S->getTypeAndAttributes() != TAA || S->getStubSize() != StubSize) {
     // If invalid, report the error with report_fatal_error.
-    report_fatal_error("Global variable '" + GV->getName() +
+    report_fatal_error("Global variable '" + GO->getName() +
                        "' section type or attributes does not match previous"
                        " section specifier");
   }
@@ -584,20 +618,19 @@ MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal(
 }
 
 MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
-  checkMachOComdat(GV);
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  checkMachOComdat(GO);
 
   // Handle thread local data.
   if (Kind.isThreadBSS()) return TLSBSSSection;
   if (Kind.isThreadData()) return TLSDataSection;
 
   if (Kind.isText())
-    return GV->isWeakForLinker() ? TextCoalSection : TextSection;
+    return GO->isWeakForLinker() ? TextCoalSection : TextSection;
 
   // If this is weak/linkonce, put this in a coalescable section, either in text
   // or data depending on if it is writable.
-  if (GV->isWeakForLinker()) {
+  if (GO->isWeakForLinker()) {
     if (Kind.isReadOnly())
       return ConstTextCoalSection;
     return DataCoalSection;
@@ -605,21 +638,21 @@ MCSection *TargetLoweringObjectFileMachO::SelectSectionForGlobal(
 
   // FIXME: Alignment check should be handled by section classifier.
   if (Kind.isMergeable1ByteCString() &&
-      GV->getParent()->getDataLayout().getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+      GO->getParent()->getDataLayout().getPreferredAlignment(
+          cast<GlobalVariable>(GO)) < 32)
     return CStringSection;
 
   // Do not put 16-bit arrays in the UString section if they have an
   // externally visible label, this runs into issues with certain linker
   // versions.
-  if (Kind.isMergeable2ByteCString() && !GV->hasExternalLinkage() &&
-      GV->getParent()->getDataLayout().getPreferredAlignment(
-          cast<GlobalVariable>(GV)) < 32)
+  if (Kind.isMergeable2ByteCString() && !GO->hasExternalLinkage() &&
+      GO->getParent()->getDataLayout().getPreferredAlignment(
+          cast<GlobalVariable>(GO)) < 32)
     return UStringSection;
 
   // With MachO only variables whose corresponding symbol starts with 'l' or
   // 'L' can be merged, so we only try merging GVs with private linkage.
-  if (GV->hasPrivateLinkage() && Kind.isMergeableConst()) {
+  if (GO->hasPrivateLinkage() && Kind.isMergeableConst()) {
     if (Kind.isMergeableConst4())
       return FourByteConstantSection;
     if (Kind.isMergeableConst8())
@@ -670,23 +703,21 @@ MCSection *TargetLoweringObjectFileMachO::getSectionForConstant(
 }
 
 const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
   // The mach-o version of this method defaults to returning a stub reference.
 
   if (Encoding & DW_EH_PE_indirect) {
     MachineModuleInfoMachO &MachOMMI =
       MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
-    MCSymbol *SSym =
-        getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
 
     // Add information about the stub reference to MachOMMI so that the stub
     // gets emitted by the asmprinter.
     MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
     if (!StubSym.getPointer()) {
-      MCSymbol *Sym = TM.getSymbol(GV, Mang);
+      MCSymbol *Sym = TM.getSymbol(GV);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -695,24 +726,24 @@ const MCExpr *TargetLoweringObjectFileMachO::getTTypeGlobalReference(
                         Encoding & ~dwarf::DW_EH_PE_indirect, Streamer);
   }
 
-  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, Mang,
-                                                           TM, MMI, Streamer);
+  return TargetLoweringObjectFile::getTTypeGlobalReference(GV, Encoding, TM,
+                                                           MMI, Streamer);
 }
 
 MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
   // The mach-o version of this method defaults to returning a stub reference.
   MachineModuleInfoMachO &MachOMMI =
     MMI->getObjFileInfo<MachineModuleInfoMachO>();
 
-  MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", Mang, TM);
+  MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr", TM);
 
   // Add information about the stub reference to MachOMMI so that the stub
   // gets emitted by the asmprinter.
   MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym);
   if (!StubSym.getPointer()) {
-    MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    MCSymbol *Sym = TM.getSymbol(GV);
     StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
   }
 
@@ -793,13 +824,16 @@ static bool canUsePrivateLabel(const MCAsmInfo &AsmInfo,
 }
 
 void TargetLoweringObjectFileMachO::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
     const TargetMachine &TM) const {
-  SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
-  const MCSection *TheSection = SectionForGlobal(GV, GVKind, Mang, TM);
-  bool CannotUsePrivateLabel =
-      !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
-  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+  bool CannotUsePrivateLabel = true;
+  if (auto *GO = GV->getBaseObject()) {
+    SectionKind GOKind = TargetLoweringObjectFile::getKindForGlobal(GO, TM);
+    const MCSection *TheSection = SectionForGlobal(GO, GOKind, TM);
+    CannotUsePrivateLabel =
+        !canUsePrivateLabel(*TM.getMCAsmInfo(), *TheSection);
+  }
+  getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
 }
 
 //===----------------------------------------------------------------------===//
@@ -886,22 +920,21 @@ static int getSelectionForCOFF(const GlobalValue *GV) {
 }
 
 MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   int Selection = 0;
   unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
-  StringRef Name = GV->getSection();
+  StringRef Name = GO->getSection();
   StringRef COMDATSymName = "";
-  if (GV->hasComdat()) {
-    Selection = getSelectionForCOFF(GV);
+  if (GO->hasComdat()) {
+    Selection = getSelectionForCOFF(GO);
     const GlobalValue *ComdatGV;
     if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
-      ComdatGV = getComdatGVForCOFF(GV);
+      ComdatGV = getComdatGVForCOFF(GO);
     else
-      ComdatGV = GV;
+      ComdatGV = GO;
 
     if (!ComdatGV->hasPrivateLinkage()) {
-      MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+      MCSymbol *Sym = TM.getSymbol(ComdatGV);
       COMDATSymName = Sym->getName();
       Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
     } else {
@@ -926,8 +959,7 @@ static const char *getCOFFSectionNameForUniqueGlobal(SectionKind Kind) {
 }
 
 MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // If we have -ffunction-sections then we should emit the global value to a
   // uniqued section specifically for it.
   bool EmitUniquedSection;
@@ -936,32 +968,32 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
   else
     EmitUniquedSection = TM.getDataSections();
 
-  if ((EmitUniquedSection && !Kind.isCommon()) || GV->hasComdat()) {
+  if ((EmitUniquedSection && !Kind.isCommon()) || GO->hasComdat()) {
     const char *Name = getCOFFSectionNameForUniqueGlobal(Kind);
     unsigned Characteristics = getCOFFSectionFlags(Kind, TM);
 
     Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
-    int Selection = getSelectionForCOFF(GV);
+    int Selection = getSelectionForCOFF(GO);
     if (!Selection)
       Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES;
     const GlobalValue *ComdatGV;
-    if (GV->hasComdat())
-      ComdatGV = getComdatGVForCOFF(GV);
+    if (GO->hasComdat())
+      ComdatGV = getComdatGVForCOFF(GO);
     else
-      ComdatGV = GV;
+      ComdatGV = GO;
 
     unsigned UniqueID = MCContext::GenericSectionID;
     if (EmitUniquedSection)
       UniqueID = NextUniqueID++;
 
     if (!ComdatGV->hasPrivateLinkage()) {
-      MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang);
+      MCSymbol *Sym = TM.getSymbol(ComdatGV);
       StringRef COMDATSymName = Sym->getName();
       return getContext().getCOFFSection(Name, Characteristics, Kind,
                                          COMDATSymName, Selection, UniqueID);
     } else {
       SmallString<256> TmpData;
-      Mang.getNameWithPrefix(TmpData, GV, /*CannotUsePrivateLabel=*/true);
+      getMangler().getNameWithPrefix(TmpData, GO, /*CannotUsePrivateLabel=*/true);
       return getContext().getCOFFSection(Name, Characteristics, Kind, TmpData,
                                          Selection, UniqueID);
     }
@@ -986,7 +1018,7 @@ MCSection *TargetLoweringObjectFileCOFF::SelectSectionForGlobal(
 }
 
 void TargetLoweringObjectFileCOFF::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
     const TargetMachine &TM) const {
   bool CannotUsePrivateLabel = false;
   if (GV->hasPrivateLinkage() &&
@@ -994,11 +1026,11 @@ void TargetLoweringObjectFileCOFF::getNameWithPrefix(
        (isa<GlobalVariable>(GV) && TM.getDataSections())))
     CannotUsePrivateLabel = true;
 
-  Mang.getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
+  getMangler().getNameWithPrefix(OutName, GV, CannotUsePrivateLabel);
 }
 
 MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
-    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+    const Function &F, const TargetMachine &TM) const {
   // If the function can be removed, produce a unique section so that
   // the table doesn't prevent the removal.
   const Comdat *C = F.getComdat();
@@ -1010,7 +1042,7 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
   if (F.hasPrivateLinkage())
     return ReadOnlySection;
 
-  MCSymbol *Sym = TM.getSymbol(&F, Mang);
+  MCSymbol *Sym = TM.getSymbol(&F);
   StringRef COMDATSymName = Sym->getName();
 
   SectionKind Kind = SectionKind::getReadOnly();
@@ -1023,10 +1055,9 @@ MCSection *TargetLoweringObjectFileCOFF::getSectionForJumpTable(
                                      COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, UniqueID);
 }
 
-void TargetLoweringObjectFileCOFF::
-emitModuleFlags(MCStreamer &Streamer,
-                ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
-                Mangler &Mang, const TargetMachine &TM) const {
+void TargetLoweringObjectFileCOFF::emitModuleFlags(
+    MCStreamer &Streamer, ArrayRef<Module::ModuleFlagEntry> ModuleFlags,
+    const TargetMachine &TM) const {
   MDNode *LinkerOptions = nullptr;
 
   for (const auto &MFE : ModuleFlags) {
@@ -1052,6 +1083,31 @@ emitModuleFlags(MCStreamer &Streamer,
   }
 }
 
+void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
+                                              const TargetMachine &TM) {
+  TargetLoweringObjectFile::Initialize(Ctx, TM);
+  const Triple &T = TM.getTargetTriple();
+  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+    StaticCtorSection =
+        Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                           COFF::IMAGE_SCN_MEM_READ,
+                           SectionKind::getReadOnly());
+    StaticDtorSection =
+        Ctx.getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                                           COFF::IMAGE_SCN_MEM_READ,
+                           SectionKind::getReadOnly());
+  } else {
+    StaticCtorSection = Ctx.getCOFFSection(
+        ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+        SectionKind::getData());
+    StaticDtorSection = Ctx.getCOFFSection(
+        ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
+        SectionKind::getData());
+  }
+}
+
 MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection(
     unsigned Priority, const MCSymbol *KeySym) const {
   return getContext().getAssociativeCOFFSection(
@@ -1065,7 +1121,7 @@ MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection(
 }
 
 void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
-    raw_ostream &OS, const GlobalValue *GV, const Mangler &Mang) const {
+    raw_ostream &OS, const GlobalValue *GV) const {
   if (!GV->hasDLLExportStorageClass() || GV->isDeclaration())
     return;
 
@@ -1079,14 +1135,14 @@ void TargetLoweringObjectFileCOFF::emitLinkerFlagsForGlobal(
   if (TT.isWindowsGNUEnvironment() || TT.isWindowsCygwinEnvironment()) {
     std::string Flag;
     raw_string_ostream FlagOS(Flag);
-    Mang.getNameWithPrefix(FlagOS, GV, false);
+    getMangler().getNameWithPrefix(FlagOS, GV, false);
     FlagOS.flush();
     if (Flag[0] == GV->getParent()->getDataLayout().getGlobalPrefix())
       OS << Flag.substr(1);
     else
       OS << Flag;
   } else {
-    Mang.getNameWithPrefix(OS, GV, false);
+    getMangler().getNameWithPrefix(OS, GV, false);
   }
 
   if (!GV->getValueType()->isFunctionTy()) {
diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
index 8d2048fa047f..b6da8e0aa60d 100644
--- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp
@@ -29,7 +29,7 @@ bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const {
 
   // Check to see if we should eliminate non-leaf frame pointers.
   if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf"))
-    return MF.getFrameInfo()->hasCalls();
+    return MF.getFrameInfo().hasCalls();
 
   return false;
 }
diff --git a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
index b8c820942cb5..e7ea2b4563f9 100644
--- a/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -38,8 +38,8 @@
 
 using namespace llvm;
 
-static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden,
-    cl::desc("Disable Post Regalloc"));
+static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
+    cl::desc("Disable Post Regalloc Scheduler"));
 static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
     cl::desc("Disable branch folding"));
 static cl::opt<bool> DisableTailDuplicate("disable-tail-duplicate", cl::Hidden,
@@ -98,6 +98,14 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional,
                    cl::desc("Print machine instrs"),
                    cl::value_desc("pass-name"), cl::init("option-unspecified"));
 
+static cl::opt<int> EnableGlobalISelAbort(
+    "global-isel-abort", cl::Hidden,
+    cl::desc("Enable abort calls when \"global\" instruction selection "
+             "fails to lower/select an instruction: 0 disable the abort, "
+             "1 enable the abort, and "
+             "2 disable the abort but emit a diagnostic on failure"),
+    cl::init(1));
+
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
 // substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
@@ -121,8 +129,7 @@ static cl::opt<CFLAAType> UseCFLAA(
                clEnumValN(CFLAAType::Andersen, "anders",
                           "Enable inclusion-based CFL-AA"),
                clEnumValN(CFLAAType::Both, "both", 
-                          "Enable both variants of CFL-AA"),
-               clEnumValEnd));
+                          "Enable both variants of CFL-AA")));
 
 /// Allow standard passes to be disabled by command line options. This supports
 /// simple binary flags that either suppress the pass or do nothing.
@@ -150,7 +157,7 @@ static IdentifyingPassPtr applyDisable(IdentifyingPassPtr PassID,
 static IdentifyingPassPtr overridePass(AnalysisID StandardID,
                                        IdentifyingPassPtr TargetID) {
   if (StandardID == &PostRASchedulerID)
-    return applyDisable(TargetID, DisablePostRA);
+    return applyDisable(TargetID, DisablePostRASched);
 
   if (StandardID == &BranchFolderPassID)
     return applyDisable(TargetID, DisableBranchFold);
@@ -252,8 +259,7 @@ TargetPassConfig::~TargetPassConfig() {
 // Out of line constructor provides default values for pass options and
 // registers all common codegen passes.
 TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm)
-    : ImmutablePass(ID), PM(&pm), StartBefore(nullptr), StartAfter(nullptr),
-      StopAfter(nullptr), Started(true), Stopped(false),
+    : ImmutablePass(ID), PM(&pm), Started(true), Stopped(false),
       AddingMachinePasses(false), TM(tm), Impl(nullptr), Initialized(false),
       DisableVerify(false), EnableTailMerge(true) {
 
@@ -347,6 +353,8 @@ void TargetPassConfig::addPass(Pass *P, bool verifyAfter, bool printAfter) {
 
   if (StartBefore == PassID)
     Started = true;
+  if (StopBefore == PassID)
+    Stopped = true;
   if (Started && !Stopped) {
     std::string Banner;
     // Construct banner message before PM->add() as that may delete the pass.
@@ -469,12 +477,17 @@ void TargetPassConfig::addIRPasses() {
 
   if (getOptLevel() != CodeGenOpt::None && !DisablePartialLibcallInlining)
     addPass(createPartiallyInlineLibCallsPass());
+
+  // Insert calls to mcount-like functions.
+  addPass(createCountingFunctionInserterPass());
 }
 
 /// Turn exception handling constructs into something the code generators can
 /// handle.
 void TargetPassConfig::addPassesToHandleExceptions() {
-  switch (TM->getMCAsmInfo()->getExceptionHandlingType()) {
+  const MCAsmInfo *MCAI = TM->getMCAsmInfo();
+  assert(MCAI && "No MCAsmInfo");
+  switch (MCAI->getExceptionHandlingType()) {
   case ExceptionHandling::SjLj:
     // SjLj piggy-backs on dwarf for this bit. The cleanups done apply to both
     // Dwarf EH prepare needs to be run after SjLj prepare. Otherwise,
@@ -483,7 +496,7 @@ void TargetPassConfig::addPassesToHandleExceptions() {
     // pad is shared by multiple invokes and is also a target of a normal
     // edge from elsewhere.
     addPass(createSjLjEHPreparePass());
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case ExceptionHandling::DwarfCFI:
   case ExceptionHandling::ARM:
     addPass(createDwarfEHPass(TM));
@@ -557,9 +570,6 @@ void TargetPassConfig::addISelPrepare() {
 void TargetPassConfig::addMachinePasses() {
   AddingMachinePasses = true;
 
-  if (TM->Options.EnableIPRA)
-    addPass(createRegUsageInfoPropPass());
-
   // Insert a machine instr printer pass after the specified pass.
   if (!StringRef(PrintMachineInstrs.getValue()).equals("") &&
       !StringRef(PrintMachineInstrs.getValue()).equals("option-unspecified")) {
@@ -575,6 +585,9 @@ void TargetPassConfig::addMachinePasses() {
   // Print the instruction selected machine code...
   printAndVerify("After Instruction Selection");
 
+  if (TM->Options.EnableIPRA)
+    addPass(createRegUsageInfoPropPass());
+
   // Expand pseudo-instructions emitted by ISel.
   addPass(&ExpandISelPseudosID);
 
@@ -886,3 +899,14 @@ void TargetPassConfig::addBlockPlacement() {
       addPass(&MachineBlockPlacementStatsID);
   }
 }
+
+//===---------------------------------------------------------------------===//
+/// GlobalISel Configuration
+//===---------------------------------------------------------------------===//
+bool TargetPassConfig::isGlobalISelAbortEnabled() const {
+  return EnableGlobalISelAbort == 1;
+}
+
+bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
+  return EnableGlobalISelAbort == 2;
+}
diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
index e1d90cb913e5..cd50c5b6571d 100644
--- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp
@@ -30,8 +30,8 @@ using namespace llvm;
 TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
                              regclass_iterator RCB, regclass_iterator RCE,
                              const char *const *SRINames,
-                             const unsigned *SRILaneMasks,
-                             unsigned SRICoveringLanes)
+                             const LaneBitmask *SRILaneMasks,
+                             LaneBitmask SRICoveringLanes)
   : InfoDesc(ID), SubRegIndexNames(SRINames),
     SubRegIndexLaneMasks(SRILaneMasks),
     RegClassBegin(RCB), RegClassEnd(RCE),
@@ -40,6 +40,36 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID,
 
 TargetRegisterInfo::~TargetRegisterInfo() {}
 
+void TargetRegisterInfo::markSuperRegs(BitVector &RegisterSet, unsigned Reg)
+    const {
+  for (MCSuperRegIterator AI(Reg, this, true); AI.isValid(); ++AI)
+    RegisterSet.set(*AI);
+}
+
+bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
+    ArrayRef<MCPhysReg> Exceptions) const {
+  // Check that all super registers of reserved regs are reserved as well.
+  BitVector Checked(getNumRegs());
+  for (int Reg = RegisterSet.find_first(); Reg>=0;
+       Reg = RegisterSet.find_next(Reg)) {
+    if (Checked[Reg])
+      continue;
+    for (MCSuperRegIterator SR(Reg, this); SR.isValid(); ++SR) {
+      if (!RegisterSet[*SR] && !is_contained(Exceptions, Reg)) {
+        dbgs() << "Error: Super register " << PrintReg(*SR, this)
+               << " of reserved register " << PrintReg(Reg, this)
+               << " is not reserved.\n";
+        return false;
+      }
+
+      // We transitively check superregs. So we can remember this for later
+      // to avoid compiletime explosion in deep register hierarchies.
+      Checked.set(*SR);
+    }
+  }
+  return true;
+}
+
 namespace llvm {
 
 Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI,
@@ -97,12 +127,6 @@ Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
   });
 }
 
-Printable PrintLaneMask(LaneBitmask LaneMask) {
-  return Printable([LaneMask](raw_ostream &OS) {
-    OS << format("%08X", LaneMask);
-  });
-}
-
 } // End of llvm namespace
 
 /// getAllocatableClass - Return the maximal subclass of the given register
@@ -354,7 +378,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   // Check that Phys is in the allocation order. We shouldn't heed hints
   // from VirtReg's register class if they aren't in the allocation order. The
   // target probably has a reason for removing the register.
-  if (std::find(Order.begin(), Order.end(), Phys) == Order.end())
+  if (!is_contained(Order, Phys))
     return;
 
   // All clear, tell the register allocator to prefer this register.
@@ -367,11 +391,11 @@ bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 
 bool TargetRegisterInfo::needsStackRealignment(
     const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
   const Function *F = MF.getFunction();
   unsigned StackAlign = TFI->getStackAlignment();
-  bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
+  bool requiresRealignment = ((MFI.getMaxAlignment() > StackAlign) ||
                               F->hasFnAttribute(Attribute::StackAlignment));
   if (MF.getFunction()->hasFnAttribute("stackrealign") || requiresRealignment) {
     if (canRealignStack(MF))
diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
index 022e912aa84f..83e52d335354 100644
--- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -144,7 +144,7 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) {
   unsigned UseIdx = 0;
   for (unsigned i = 0; i != UseOperIdx; ++i) {
     const MachineOperand &MO = MI->getOperand(i);
-    if (MO.isReg() && MO.readsReg())
+    if (MO.isReg() && MO.readsReg() && !MO.isDef())
       ++UseIdx;
   }
   return UseIdx;
diff --git a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
index c3f94a99b4ca..c74707d95b9e 100644
--- a/contrib/llvm/lib/Target/TargetSubtargetInfo.cpp
+++ b/contrib/llvm/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file describes the general parts of a Subtarget.
+/// \file This file describes the general parts of a Subtarget.
 //
 //===----------------------------------------------------------------------===//
 
diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 8feb18b4d030..0f1b2ed994b7 100644
--- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -109,7 +109,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   bool isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
                              MachineInstr *MI, unsigned Dist);
 
-  bool commuteInstruction(MachineInstr *MI,
+  bool commuteInstruction(MachineInstr *MI, unsigned DstIdx,
                           unsigned RegBIdx, unsigned RegCIdx, unsigned Dist);
 
   bool isProfitableToConv3Addr(unsigned RegA, unsigned RegB);
@@ -651,6 +651,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC,
 /// Commute a two-address instruction and update the basic block, distance map,
 /// and live variables if needed. Return true if it is successful.
 bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
+                                                   unsigned DstIdx,
                                                    unsigned RegBIdx,
                                                    unsigned RegCIdx,
                                                    unsigned Dist) {
@@ -671,7 +672,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
   // Update source register map.
   unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
   if (FromRegC) {
-    unsigned RegA = MI->getOperand(0).getReg();
+    unsigned RegA = MI->getOperand(DstIdx).getReg();
     SrcRegMap[RegA] = FromRegC;
   }
 
@@ -1171,6 +1172,9 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
                                                       unsigned BaseOpIdx,
                                                       bool BaseOpKilled,
                                                       unsigned Dist) {
+  if (!MI->isCommutable())
+    return false;
+
   unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
   unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
   unsigned OpsNum = MI->getDesc().getNumOperands();
@@ -1180,7 +1184,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
     // and OtherOpIdx are commutable, it does not really search for
     // other commutable operands and does not change the values of passed
     // variables.
-    if (OtherOpIdx == BaseOpIdx ||
+    if (OtherOpIdx == BaseOpIdx || !MI->getOperand(OtherOpIdx).isReg() ||
         !TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
       continue;
 
@@ -1199,7 +1203,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
     }
 
     // If it's profitable to commute, try to do so.
-    if (DoCommute && commuteInstruction(MI, BaseOpIdx, OtherOpIdx, Dist)) {
+    if (DoCommute && commuteInstruction(MI, DstOpIdx, BaseOpIdx, OtherOpIdx,
+                                        Dist)) {
       ++NumCommuted;
       if (AggressiveCommute)
         ++NumAggrCommuted;
@@ -1567,14 +1572,14 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
     if (!IsEarlyClobber) {
       // Replace other (un-tied) uses of regB with LastCopiedReg.
       for (MachineOperand &MO : MI->operands()) {
-        if (MO.isReg() && MO.getReg() == RegB && MO.getSubReg() == SubRegB &&
+        if (MO.isReg() && MO.getReg() == RegB &&
             MO.isUse()) {
           if (MO.isKill()) {
             MO.setIsKill(false);
             RemovedKillFlag = true;
           }
           MO.setReg(LastCopiedReg);
-          MO.setSubReg(0);
+          MO.setSubReg(MO.getSubReg());
         }
       }
     }
diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
index 501e01c45a8b..c2db56a7657c 100644
--- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp
@@ -40,7 +40,7 @@
 using namespace llvm;
 
 static bool eliminateUnreachableBlock(Function &F) {
-  SmallPtrSet<BasicBlock*, 8> Reachable;
+  df_iterator_default_set<BasicBlock*> Reachable;
 
   // Mark all reachable blocks.
   for (BasicBlock *BB : depth_first_ext(&F, Reachable))
@@ -130,7 +130,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
 }
 
 bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
-  SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+  df_iterator_default_set<MachineBasicBlock*> Reachable;
   bool ModifiedPHI = false;
 
   MMI = getAnalysisIfAvailable<MachineModuleInfo>();
diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
index 8a3a0328870d..0d506d646659 100644
--- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
+++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp
@@ -73,8 +73,8 @@ void VirtRegMap::grow() {
 }
 
 unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
-  int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
-                                                      RC->getAlignment());
+  int SS = MF->getFrameInfo().CreateSpillStackObject(RC->getSize(),
+                                                     RC->getAlignment());
   ++NumSpillSlots;
   return SS;
 }
@@ -110,7 +110,7 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
   assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
          "attempt to assign stack slot to already spilled register");
   assert((SS >= 0 ||
-          (SS >= MF->getFrameInfo()->getObjectIndexBegin())) &&
+          (SS >= MF->getFrameInfo().getObjectIndexBegin())) &&
          "illegal fixed frame index");
   Virt2StackSlotMap[virtReg] = SS;
 }
@@ -177,7 +177,7 @@ public:
   bool runOnMachineFunction(MachineFunction&) override;
   MachineFunctionProperties getSetProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 } // end anonymous namespace
@@ -266,7 +266,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
     SlotIndex MBBBegin = MBBI->first;
     // Advance all subrange iterators so that their end position is just
     // behind MBBBegin (or the iterator is at the end).
-    LaneBitmask LaneMask = 0;
+    LaneBitmask LaneMask;
     for (auto &RangeIterPair : SubRanges) {
       const LiveInterval::SubRange *SR = RangeIterPair.first;
       LiveInterval::const_iterator &SRI = RangeIterPair.second;
@@ -277,7 +277,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
       if (SRI->start <= MBBBegin)
         LaneMask |= SR->LaneMask;
     }
-    if (LaneMask == 0)
+    if (LaneMask.none())
       continue;
     MachineBasicBlock *MBB = MBBI->second;
     MBB->addLiveIn(PhysReg, LaneMask);
@@ -338,10 +338,11 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
   assert(LI.liveAt(BaseIndex) &&
          "Reads of completely dead register should be marked undef already");
   unsigned SubRegIdx = MO.getSubReg();
+  assert(SubRegIdx != 0 && LI.hasSubRanges());
   LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(SubRegIdx);
   // See if any of the relevant subregister liveranges is defined at this point.
   for (const LiveInterval::SubRange &SR : LI.subranges()) {
-    if ((SR.LaneMask & UseMask) != 0 && SR.liveAt(BaseIndex))
+    if ((SR.LaneMask & UseMask).any() && SR.liveAt(BaseIndex))
       return false;
   }
   return true;
diff --git a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
index 041fb7b912bf..568720c66e55 100644
--- a/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
+++ b/contrib/llvm/lib/CodeGen/WinEHPrepare.cpp
@@ -62,7 +62,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Windows exception handling preparation";
   }
 
@@ -521,7 +521,7 @@ void llvm::calculateClrEHStateNumbers(const Function *Fn,
 
     if (const auto *Cleanup = dyn_cast<CleanupPadInst>(Pad)) {
       // Create the entry for this cleanup with the appropriate handler
-      // properties.  Finaly and fault handlers are distinguished by arity.
+      // properties.  Finally and fault handlers are distinguished by arity.
       ClrHandlerType HandlerType =
           (Cleanup->getNumArgOperands() ? ClrHandlerType::Fault
                                         : ClrHandlerType::Finally);
@@ -708,7 +708,7 @@ void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
 
 void WinEHPrepare::cloneCommonBlocks(Function &F) {
   // We need to clone all blocks which belong to multiple funclets.  Values are
-  // remapped throughout the funclet to propogate both the new instructions
+  // remapped throughout the funclet to propagate both the new instructions
   // *and* the new basic blocks themselves.
   for (auto &Funclets : FuncletBlocks) {
     BasicBlock *FuncletPadBB = Funclets.first;
@@ -1202,8 +1202,12 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
       Goto->setSuccessor(0, PHIBlock);
       CatchRet->setSuccessor(NewBlock);
       // Update the color mapping for the newly split edge.
+      // Grab a reference to the ColorVector to be inserted before getting the
+      // reference to the vector we are copying because inserting the new
+      // element in BlockColors might cause the map to be reallocated.
+      ColorVector &ColorsForNewBlock = BlockColors[NewBlock];
       ColorVector &ColorsForPHIBlock = BlockColors[PHIBlock];
-      BlockColors[NewBlock] = ColorsForPHIBlock;
+      ColorsForNewBlock = ColorsForPHIBlock;
       for (BasicBlock *FuncletPad : ColorsForPHIBlock)
         FuncletBlocks[FuncletPad].push_back(NewBlock);
       // Treat the new block as incoming for load insertion.
diff --git a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
index 1f9570895f9d..63bd762eeb2b 100644
--- a/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
+++ b/contrib/llvm/lib/CodeGen/XRayInstrumentation.cpp
@@ -34,7 +34,82 @@ struct XRayInstrumentation : public MachineFunctionPass {
   }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
+
+private:
+  // Replace the original RET instruction with the exit sled code ("patchable
+  //   ret" pseudo-instruction), so that at runtime XRay can replace the sled
+  //   with a code jumping to XRay trampoline, which calls the tracing handler
+  //   and, in the end, issues the RET instruction.
+  // This is the approach to go on CPUs which have a single RET instruction,
+  //   like x86/x86_64.
+  void replaceRetWithPatchableRet(MachineFunction &MF,
+    const TargetInstrInfo *TII);
+
+  // Prepend the original return instruction with the exit sled code ("patchable
+  //   function exit" pseudo-instruction), preserving the original return
+  //   instruction just after the exit sled code.
+  // This is the approach to go on CPUs which have multiple options for the
+  //   return instruction, like ARM. For such CPUs we can't just jump into the
+  //   XRay trampoline and issue a single return instruction there. We rather
+  //   have to call the trampoline and return from it to the original return
+  //   instruction of the function being instrumented.
+  void prependRetWithPatchableExit(MachineFunction &MF,
+    const TargetInstrInfo *TII);
 };
+} // anonymous namespace
+
+void XRayInstrumentation::replaceRetWithPatchableRet(MachineFunction &MF,
+  const TargetInstrInfo *TII)
+{
+  // We look for *all* terminators and returns, then replace those with
+  // PATCHABLE_RET instructions.
+  SmallVector<MachineInstr *, 4> Terminators;
+  for (auto &MBB : MF) {
+    for (auto &T : MBB.terminators()) {
+      unsigned Opc = 0;
+      if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
+        // Replace return instructions with:
+        //   PATCHABLE_RET <Opcode>, <Operand>...
+        Opc = TargetOpcode::PATCHABLE_RET;
+      }
+      if (TII->isTailCall(T)) {
+        // Treat the tail call as a return instruction, which has a
+        // different-looking sled than the normal return case.
+        Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+      }
+      if (Opc != 0) {
+        auto MIB = BuildMI(MBB, T, T.getDebugLoc(), TII->get(Opc))
+                       .addImm(T.getOpcode());
+        for (auto &MO : T.operands())
+          MIB.addOperand(MO);
+        Terminators.push_back(&T);
+      }
+    }
+  }
+
+  for (auto &I : Terminators)
+    I->eraseFromParent();
+}
+
+void XRayInstrumentation::prependRetWithPatchableExit(MachineFunction &MF,
+  const TargetInstrInfo *TII)
+{
+  for (auto &MBB : MF) {
+    for (auto &T : MBB.terminators()) {
+      unsigned Opc = 0;
+      if (T.isReturn()) {
+        Opc = TargetOpcode::PATCHABLE_FUNCTION_EXIT;
+      }
+      if (TII->isTailCall(T)) {
+        Opc = TargetOpcode::PATCHABLE_TAIL_CALL;
+      }
+      if (Opc != 0) {
+        // Prepend the return instruction with PATCHABLE_FUNCTION_EXIT or
+        //   PATCHABLE_TAIL_CALL .
+        BuildMI(MBB, T, T.getDebugLoc(),TII->get(Opc));
+      }
+    }
+  }
 }
 
 bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
@@ -54,39 +129,43 @@ bool XRayInstrumentation::runOnMachineFunction(MachineFunction &MF) {
       return false; // Function is too small.
   }
 
+  // We look for the first non-empty MachineBasicBlock, so that we can insert
+  // the function instrumentation in the appropriate place.
+  auto MBI =
+      find_if(MF, [&](const MachineBasicBlock &MBB) { return !MBB.empty(); });
+  if (MBI == MF.end())
+    return false; // The function is empty.
+
+  auto *TII = MF.getSubtarget().getInstrInfo();
+  auto &FirstMBB = *MBI;
+  auto &FirstMI = *FirstMBB.begin();
+
+  if (!MF.getSubtarget().isXRaySupported()) {
+    FirstMI.emitError("An attempt to perform XRay instrumentation for an"
+      " unsupported target.");
+    return false;
+  }
+
   // FIXME: Do the loop triviality analysis here or in an earlier pass.
 
   // First, insert an PATCHABLE_FUNCTION_ENTER as the first instruction of the
   // MachineFunction.
-  auto &FirstMBB = *MF.begin();
-  auto &FirstMI = *FirstMBB.begin();
-  auto *TII = MF.getSubtarget().getInstrInfo();
   BuildMI(FirstMBB, FirstMI, FirstMI.getDebugLoc(),
           TII->get(TargetOpcode::PATCHABLE_FUNCTION_ENTER));
 
-  // Then we look for *all* terminators and returns, then replace those with
-  // PATCHABLE_RET instructions.
-  SmallVector<MachineInstr *, 4> Terminators;
-  for (auto &MBB : MF) {
-    for (auto &T : MBB.terminators()) {
-      // FIXME: Handle tail calls here too?
-      if (T.isReturn() && T.getOpcode() == TII->getReturnOpcode()) {
-        // Replace return instructions with:
-        //   PATCHABLE_RET <Opcode>, <Operand>...
-        auto MIB = BuildMI(MBB, T, T.getDebugLoc(),
-                           TII->get(TargetOpcode::PATCHABLE_RET))
-                       .addImm(T.getOpcode());
-        for (auto &MO : T.operands())
-          MIB.addOperand(MO);
-        Terminators.push_back(&T);
-        break;
-      }
-    }
+  switch (MF.getTarget().getTargetTriple().getArch()) {
+  case Triple::ArchType::arm:
+  case Triple::ArchType::thumb:
+  case Triple::ArchType::aarch64:
+    // For the architectures which don't have a single return instruction
+    prependRetWithPatchableExit(MF, TII);
+    break;
+  default:
+    // For the architectures that have a single return instruction (such as
+    //   RETQ on x86_64).
+    replaceRetWithPatchableRet(MF, TII);
+    break;
   }
-
-  for (auto &I : Terminators)
-    I->eraseFromParent();
-
   return true;
 }
 
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ByteStream.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ByteStream.cpp
deleted file mode 100644
index 2c43bc6958d2..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/ByteStream.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-//===- ByteStream.cpp - Reads stream data from a byte sequence ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include <cstring>
-
-using namespace llvm;
-using namespace llvm::codeview;
-
-static Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Src,
-                        ArrayRef<uint8_t> Dest) {
-  return make_error<CodeViewError>(cv_error_code::operation_unsupported,
-                                   "ByteStream is immutable.");
-}
-
-static Error writeBytes(uint32_t Offset, ArrayRef<uint8_t> Src,
-                        MutableArrayRef<uint8_t> Dest) {
-  if (Dest.size() < Src.size())
-    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-  if (Offset > Src.size() - Dest.size())
-    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-
-  ::memcpy(Dest.data() + Offset, Src.data(), Src.size());
-  return Error::success();
-}
-
-template <bool Writable>
-Error ByteStream<Writable>::readBytes(uint32_t Offset, uint32_t Size,
-                                      ArrayRef<uint8_t> &Buffer) const {
-  if (Offset > Data.size())
-    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-  if (Data.size() < Size + Offset)
-    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-  Buffer = Data.slice(Offset, Size);
-  return Error::success();
-}
-
-template <bool Writable>
-Error ByteStream<Writable>::readLongestContiguousChunk(
-    uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
-  if (Offset >= Data.size())
-    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-  Buffer = Data.slice(Offset);
-  return Error::success();
-}
-
-template <bool Writable>
-Error ByteStream<Writable>::writeBytes(uint32_t Offset,
-                                       ArrayRef<uint8_t> Buffer) const {
-  return ::writeBytes(Offset, Buffer, Data);
-}
-
-template <bool Writable> uint32_t ByteStream<Writable>::getLength() const {
-  return Data.size();
-}
-
-template <bool Writable> Error ByteStream<Writable>::commit() const {
-  return Error::success();
-}
-
-template <bool Writable> StringRef ByteStream<Writable>::str() const {
-  const char *CharData = reinterpret_cast<const char *>(Data.data());
-  return StringRef(CharData, Data.size());
-}
-
-namespace llvm {
-namespace codeview {
-template class ByteStream<true>;
-template class ByteStream<false>;
-}
-}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
new file mode 100644
index 000000000000..75cfd0dd184e
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CVSymbolVisitor.cpp
@@ -0,0 +1,73 @@
+//===- CVSymbolVisitor.cpp --------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
+
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+template <typename T>
+static Error takeObject(ArrayRef<uint8_t> &Data, const T *&Res) {
+  if (Data.size() < sizeof(*Res))
+    return llvm::make_error<CodeViewError>(cv_error_code::insufficient_buffer);
+  Res = reinterpret_cast<const T *>(Data.data());
+  Data = Data.drop_front(sizeof(*Res));
+  return Error::success();
+}
+
+CVSymbolVisitor::CVSymbolVisitor(SymbolVisitorCallbacks &Callbacks)
+    : Callbacks(Callbacks) {}
+
+template <typename T>
+static Error visitKnownRecord(CVSymbol &Record,
+                              SymbolVisitorCallbacks &Callbacks) {
+  SymbolRecordKind RK = static_cast<SymbolRecordKind>(Record.Type);
+  T KnownRecord(RK);
+  if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
+    return EC;
+  return Error::success();
+}
+
+Error CVSymbolVisitor::visitSymbolRecord(CVSymbol &Record) {
+  if (auto EC = Callbacks.visitSymbolBegin(Record))
+    return EC;
+
+  switch (Record.Type) {
+  default:
+    if (auto EC = Callbacks.visitUnknownSymbol(Record))
+      return EC;
+    break;
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  case EnumName: {                                                             \
+    if (auto EC = visitKnownRecord<Name>(Record, Callbacks))                   \
+      return EC;                                                               \
+    break;                                                                     \
+  }
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)                \
+  SYMBOL_RECORD(EnumVal, EnumVal, AliasName)
+#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def"
+  }
+
+  if (auto EC = Callbacks.visitSymbolEnd(Record))
+    return EC;
+
+  return Error::success();
+}
+
+Error CVSymbolVisitor::visitSymbolStream(const CVSymbolArray &Symbols) {
+  for (auto I : Symbols) {
+    if (auto EC = visitSymbolRecord(I))
+      return EC;
+  }
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
index 09f72214c52b..5171e24f3aac 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CVTypeVisitor.cpp
@@ -9,115 +9,128 @@
 
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
 
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+
 using namespace llvm;
 using namespace llvm::codeview;
 
+CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
+    : Callbacks(Callbacks) {}
+
 template <typename T>
-static Error takeObject(ArrayRef<uint8_t> &Data, const T *&Res) {
-  if (Data.size() < sizeof(*Res))
-    return llvm::make_error<CodeViewError>(cv_error_code::insufficient_buffer);
-  Res = reinterpret_cast<const T *>(Data.data());
-  Data = Data.drop_front(sizeof(*Res));
+static Error visitKnownRecord(CVType &Record, TypeVisitorCallbacks &Callbacks) {
+  TypeRecordKind RK = static_cast<TypeRecordKind>(Record.Type);
+  T KnownRecord(RK);
+  if (auto EC = Callbacks.visitKnownRecord(Record, KnownRecord))
+    return EC;
   return Error::success();
 }
 
-CVTypeVisitor::CVTypeVisitor(TypeVisitorCallbacks &Callbacks)
-    : Callbacks(Callbacks) {}
+template <typename T>
+static Error visitKnownMember(CVMemberRecord &Record,
+                              TypeVisitorCallbacks &Callbacks) {
+  TypeRecordKind RK = static_cast<TypeRecordKind>(Record.Kind);
+  T KnownRecord(RK);
+  if (auto EC = Callbacks.visitKnownMember(Record, KnownRecord))
+    return EC;
+  return Error::success();
+}
 
-Error CVTypeVisitor::visitTypeRecord(const CVRecord<TypeLeafKind> &Record) {
-  ArrayRef<uint8_t> LeafData = Record.Data;
+Error CVTypeVisitor::visitTypeRecord(CVType &Record) {
   if (auto EC = Callbacks.visitTypeBegin(Record))
     return EC;
+
   switch (Record.Type) {
   default:
     if (auto EC = Callbacks.visitUnknownType(Record))
       return EC;
     break;
-  case LF_FIELDLIST:
-    if (auto EC = Callbacks.visitFieldListBegin(Record))
-      return EC;
-    if (auto EC = visitFieldList(Record))
-      return EC;
-    if (auto EC = Callbacks.visitFieldListEnd(Record))
-      return EC;
-    break;
 #define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
   case EnumName: {                                                             \
-    TypeRecordKind RK = static_cast<TypeRecordKind>(EnumName);                 \
-    auto Result = Name##Record::deserialize(RK, LeafData);                     \
-    if (Result.getError())                                                     \
-      return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);   \
-    if (auto EC = Callbacks.visit##Name(*Result))                              \
+    if (auto EC = visitKnownRecord<Name##Record>(Record, Callbacks))           \
       return EC;                                                               \
     break;                                                                     \
   }
 #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)                  \
   TYPE_RECORD(EnumVal, EnumVal, AliasName)
 #define MEMBER_RECORD(EnumName, EnumVal, Name)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "llvm/DebugInfo/CodeView/TypeRecords.def"
   }
+
   if (auto EC = Callbacks.visitTypeEnd(Record))
     return EC;
-  return Error::success();
-}
 
-/// Visits the type records in Data. Sets the error flag on parse failures.
-Error CVTypeVisitor::visitTypeStream(const CVTypeArray &Types) {
-  for (const auto &I : Types) {
-    if (auto EC = visitTypeRecord(I))
-      return EC;
-  }
   return Error::success();
 }
 
-Error CVTypeVisitor::skipPadding(ArrayRef<uint8_t> &Data) {
-  if (Data.empty())
-    return Error::success();
-  uint8_t Leaf = Data.front();
-  if (Leaf < LF_PAD0)
-    return Error::success();
-  // Leaf is greater than 0xf0. We should advance by the number of bytes in
-  // the low 4 bits.
-  unsigned BytesToAdvance = Leaf & 0x0F;
-  if (Data.size() < BytesToAdvance) {
-    return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record,
-                                           "Invalid padding bytes!");
-  }
-  Data = Data.drop_front(BytesToAdvance);
-  return Error::success();
-}
+static Error visitMemberRecord(CVMemberRecord &Record,
+                               TypeVisitorCallbacks &Callbacks) {
+  if (auto EC = Callbacks.visitMemberBegin(Record))
+    return EC;
 
-/// Visits individual member records of a field list record. Member records do
-/// not describe their own length, and need special handling.
-Error CVTypeVisitor::visitFieldList(const CVRecord<TypeLeafKind> &Record) {
-  ArrayRef<uint8_t> RecordData = Record.Data;
-  while (!RecordData.empty()) {
-    const ulittle16_t *LeafPtr;
-    if (auto EC = takeObject(RecordData, LeafPtr))
+  switch (Record.Kind) {
+  default:
+    if (auto EC = Callbacks.visitUnknownMember(Record))
       return EC;
-    TypeLeafKind Leaf = TypeLeafKind(unsigned(*LeafPtr));
-    switch (Leaf) {
-    default:
-      // Field list records do not describe their own length, so we cannot
-      // continue parsing past an unknown member type.
-      if (auto EC = Callbacks.visitUnknownMember(Record))
-        return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);
+    break;
 #define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
   case EnumName: {                                                             \
-    TypeRecordKind RK = static_cast<TypeRecordKind>(EnumName);                 \
-    auto Result = Name##Record::deserialize(RK, RecordData);                   \
-    if (Result.getError())                                                     \
-      return llvm::make_error<CodeViewError>(cv_error_code::corrupt_record);   \
-    if (auto EC = Callbacks.visit##Name(*Result))                              \
+    if (auto EC = visitKnownMember<Name##Record>(Record, Callbacks))           \
       return EC;                                                               \
     break;                                                                     \
   }
 #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)                \
   MEMBER_RECORD(EnumVal, EnumVal, AliasName)
+#define TYPE_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "llvm/DebugInfo/CodeView/TypeRecords.def"
-    }
-    if (auto EC = skipPadding(RecordData))
+  }
+
+  if (auto EC = Callbacks.visitMemberEnd(Record))
+    return EC;
+
+  return Error::success();
+}
+
+Error CVTypeVisitor::visitMemberRecord(CVMemberRecord &Record) {
+  return ::visitMemberRecord(Record, Callbacks);
+}
+
+/// Visits the type records in Data. Sets the error flag on parse failures.
+Error CVTypeVisitor::visitTypeStream(const CVTypeArray &Types) {
+  for (auto I : Types) {
+    if (auto EC = visitTypeRecord(I))
       return EC;
   }
   return Error::success();
 }
+
+Error CVTypeVisitor::visitFieldListMemberStream(msf::StreamReader Reader) {
+  FieldListDeserializer Deserializer(Reader);
+  TypeVisitorCallbackPipeline Pipeline;
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(Callbacks);
+
+  TypeLeafKind Leaf;
+  while (!Reader.empty()) {
+    if (auto EC = Reader.readEnum(Leaf))
+      return EC;
+
+    CVMemberRecord Record;
+    Record.Kind = Leaf;
+    if (auto EC = ::visitMemberRecord(Record, Pipeline))
+      return EC;
+  }
+
+  return Error::success();
+}
+
+Error CVTypeVisitor::visitFieldListMemberStream(ArrayRef<uint8_t> Data) {
+  msf::ByteStream S(Data);
+  msf::StreamReader SR(S);
+  return visitFieldListMemberStream(SR);
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
index aad1d8b25cd0..55c10c076eef 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewError.cpp
@@ -20,7 +20,7 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class CodeViewErrorCategory : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.codeview"; }
+  const char *name() const noexcept override { return "llvm.codeview"; }
 
   std::string message(int Condition) const override {
     switch (static_cast<cv_error_code>(Condition)) {
@@ -33,6 +33,8 @@ public:
       return "The CodeView record is corrupted.";
     case cv_error_code::operation_unsupported:
       return "The requested operation is not supported.";
+    case cv_error_code::unknown_member_record:
+      return "The member record is of an unknown type.";
     }
     llvm_unreachable("Unrecognized cv_error_code");
   }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
new file mode 100644
index 000000000000..9bd85cf9dc68
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -0,0 +1,242 @@
+//===- CodeViewRecordIO.cpp -------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) {
+  RecordLimit Limit;
+  Limit.MaxLength = MaxLength;
+  Limit.BeginOffset = getCurrentOffset();
+  Limits.push_back(Limit);
+  return Error::success();
+}
+
+Error CodeViewRecordIO::endRecord() {
+  assert(!Limits.empty() && "Not in a record!");
+  Limits.pop_back();
+  return Error::success();
+}
+
+uint32_t CodeViewRecordIO::maxFieldLength() const {
+  assert(!Limits.empty() && "Not in a record!");
+
+  // The max length of the next field is the minimum of all lengths that would
+  // be allowed by any of the sub-records we're in.  In practice, we can only
+  // ever be at most 1 sub-record deep (in a FieldList), but this works for
+  // the general case.
+  uint32_t Offset = getCurrentOffset();
+  Optional<uint32_t> Min = Limits.front().bytesRemaining(Offset);
+  for (auto X : makeArrayRef(Limits).drop_front()) {
+    Optional<uint32_t> ThisMin = X.bytesRemaining(Offset);
+    if (ThisMin.hasValue())
+      Min = (Min.hasValue()) ? std::min(*Min, *ThisMin) : *ThisMin;
+  }
+  assert(Min.hasValue() && "Every field must have a maximum length!");
+
+  return *Min;
+}
+
+Error CodeViewRecordIO::skipPadding() {
+  assert(!isWriting() && "Cannot skip padding while writing!");
+
+  if (Reader->bytesRemaining() == 0)
+    return Error::success();
+
+  uint8_t Leaf = Reader->peek();
+  if (Leaf < LF_PAD0)
+    return Error::success();
+  // Leaf is greater than 0xf0. We should advance by the number of bytes in
+  // the low 4 bits.
+  unsigned BytesToAdvance = Leaf & 0x0F;
+  return Reader->skip(BytesToAdvance);
+}
+
+Error CodeViewRecordIO::mapByteVectorTail(ArrayRef<uint8_t> &Bytes) {
+  if (isWriting()) {
+    if (auto EC = Writer->writeBytes(Bytes))
+      return EC;
+  } else {
+    if (auto EC = Reader->readBytes(Bytes, Reader->bytesRemaining()))
+      return EC;
+  }
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapByteVectorTail(std::vector<uint8_t> &Bytes) {
+  ArrayRef<uint8_t> BytesRef(Bytes);
+  if (auto EC = mapByteVectorTail(BytesRef))
+    return EC;
+  if (!isWriting())
+    Bytes.assign(BytesRef.begin(), BytesRef.end());
+
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapInteger(TypeIndex &TypeInd) {
+  if (isWriting()) {
+    if (auto EC = Writer->writeInteger(TypeInd.getIndex()))
+      return EC;
+    return Error::success();
+  }
+
+  uint32_t I;
+  if (auto EC = Reader->readInteger(I))
+    return EC;
+  TypeInd.setIndex(I);
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapEncodedInteger(int64_t &Value) {
+  if (isWriting()) {
+    if (Value >= 0) {
+      if (auto EC = writeEncodedUnsignedInteger(static_cast<uint64_t>(Value)))
+        return EC;
+    } else {
+      if (auto EC = writeEncodedSignedInteger(Value))
+        return EC;
+    }
+  } else {
+    APSInt N;
+    if (auto EC = consume(*Reader, N))
+      return EC;
+    Value = N.getExtValue();
+  }
+
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapEncodedInteger(uint64_t &Value) {
+  if (isWriting()) {
+    if (auto EC = writeEncodedUnsignedInteger(Value))
+      return EC;
+  } else {
+    APSInt N;
+    if (auto EC = consume(*Reader, N))
+      return EC;
+    Value = N.getZExtValue();
+  }
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapEncodedInteger(APSInt &Value) {
+  if (isWriting()) {
+    if (Value.isSigned())
+      return writeEncodedSignedInteger(Value.getSExtValue());
+    return writeEncodedUnsignedInteger(Value.getZExtValue());
+  }
+
+  return consume(*Reader, Value);
+}
+
+Error CodeViewRecordIO::mapStringZ(StringRef &Value) {
+  if (isWriting()) {
+    // Truncate if we attempt to write too much.
+    StringRef S = Value.take_front(maxFieldLength() - 1);
+    if (auto EC = Writer->writeZeroString(S))
+      return EC;
+  } else {
+    if (auto EC = Reader->readZeroString(Value))
+      return EC;
+  }
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapGuid(StringRef &Guid) {
+  constexpr uint32_t GuidSize = 16;
+  if (maxFieldLength() < GuidSize)
+    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
+
+  if (isWriting()) {
+    assert(Guid.size() == 16 && "Invalid Guid Size!");
+    if (auto EC = Writer->writeFixedString(Guid))
+      return EC;
+  } else {
+    if (auto EC = Reader->readFixedString(Guid, 16))
+      return EC;
+  }
+  return Error::success();
+}
+
+Error CodeViewRecordIO::mapStringZVectorZ(std::vector<StringRef> &Value) {
+  if (isWriting()) {
+    for (auto V : Value) {
+      if (auto EC = mapStringZ(V))
+        return EC;
+    }
+    if (auto EC = Writer->writeInteger(uint8_t(0)))
+      return EC;
+  } else {
+    StringRef S;
+    if (auto EC = mapStringZ(S))
+      return EC;
+    while (!S.empty()) {
+      Value.push_back(S);
+      if (auto EC = mapStringZ(S))
+        return EC;
+    };
+  }
+  return Error::success();
+}
+
+Error CodeViewRecordIO::writeEncodedSignedInteger(const int64_t &Value) {
+  assert(Value < 0 && "Encoded integer is not signed!");
+  if (Value >= std::numeric_limits<int8_t>::min()) {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_CHAR)))
+      return EC;
+    if (auto EC = Writer->writeInteger(static_cast<int8_t>(Value)))
+      return EC;
+  } else if (Value >= std::numeric_limits<int16_t>::min()) {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_SHORT)))
+      return EC;
+    if (auto EC = Writer->writeInteger(static_cast<int16_t>(Value)))
+      return EC;
+  } else if (Value >= std::numeric_limits<int32_t>::min()) {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_LONG)))
+      return EC;
+    if (auto EC = Writer->writeInteger(static_cast<int32_t>(Value)))
+      return EC;
+  } else {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_QUADWORD)))
+      return EC;
+    if (auto EC = Writer->writeInteger(Value))
+      return EC;
+  }
+  return Error::success();
+}
+
+Error CodeViewRecordIO::writeEncodedUnsignedInteger(const uint64_t &Value) {
+  if (Value < LF_NUMERIC) {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(Value)))
+      return EC;
+  } else if (Value <= std::numeric_limits<uint16_t>::max()) {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_USHORT)))
+      return EC;
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(Value)))
+      return EC;
+  } else if (Value <= std::numeric_limits<uint32_t>::max()) {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_ULONG)))
+      return EC;
+    if (auto EC = Writer->writeInteger(static_cast<uint32_t>(Value)))
+      return EC;
+  } else {
+    if (auto EC = Writer->writeInteger(static_cast<uint16_t>(LF_UQUADWORD)))
+      return EC;
+    if (auto EC = Writer->writeInteger(Value))
+      return EC;
+  }
+
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
index d59271b2367e..0e20bcb27ec9 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/EnumTables.cpp
@@ -24,6 +24,12 @@ static const EnumEntry<SymbolKind> SymbolTypeNames[] = {
 #undef CV_SYMBOL
 };
 
+static const EnumEntry<TypeLeafKind> TypeLeafNames[] = {
+#define CV_TYPE(name, val) {#name, name},
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+#undef CV_TYPE
+};
+
 static const EnumEntry<uint16_t> RegisterNames[] = {
     CV_ENUM_CLASS_ENT(RegisterId, Unknown),
     CV_ENUM_CLASS_ENT(RegisterId, VFrame),
@@ -324,6 +330,10 @@ ArrayRef<EnumEntry<SymbolKind>> getSymbolTypeNames() {
   return makeArrayRef(SymbolTypeNames);
 }
 
+ArrayRef<EnumEntry<TypeLeafKind>> getTypeLeafNames() {
+  return makeArrayRef(TypeLeafNames);
+}
+
 ArrayRef<EnumEntry<uint16_t>> getRegisterNames() {
   return makeArrayRef(RegisterNames);
 }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
deleted file mode 100644
index 5f229e3d9f94..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/FieldListRecordBuilder.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-//===-- FieldListRecordBuilder.cpp ----------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
-
-using namespace llvm;
-using namespace codeview;
-
-FieldListRecordBuilder::FieldListRecordBuilder()
-    : ListRecordBuilder(TypeRecordKind::FieldList) {}
-
-void FieldListRecordBuilder::writeBaseClass(const BaseClassRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(TypeRecordKind::BaseClass);
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getAccess()));
-  Builder.writeTypeIndex(Record.getBaseType());
-  Builder.writeEncodedUnsignedInteger(Record.getBaseOffset());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeEnumerator(const EnumeratorRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(TypeRecordKind::Enumerator);
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getAccess()));
-  // FIXME: Handle full APInt such as __int128.
-  Builder.writeEncodedUnsignedInteger(Record.getValue().getZExtValue());
-  Builder.writeNullTerminatedString(Record.getName());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeDataMember(const DataMemberRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(Record.getKind());
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getAccess()));
-  Builder.writeTypeIndex(Record.getType());
-  Builder.writeEncodedUnsignedInteger(Record.getFieldOffset());
-  Builder.writeNullTerminatedString(Record.getName());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeOverloadedMethod(
-    const OverloadedMethodRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(TypeRecordKind::OverloadedMethod);
-  Builder.writeUInt16(Record.getNumOverloads());
-  Builder.writeTypeIndex(Record.getMethodList());
-  Builder.writeNullTerminatedString(Record.getName());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeOneMethod(const OneMethodRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  uint16_t Flags = static_cast<uint16_t>(Record.getAccess());
-  Flags |= static_cast<uint16_t>(Record.getKind()) << MethodKindShift;
-  Flags |= static_cast<uint16_t>(Record.getOptions());
-
-  Builder.writeTypeRecordKind(TypeRecordKind::OneMethod);
-  Builder.writeUInt16(Flags);
-  Builder.writeTypeIndex(Record.getType());
-  if (Record.isIntroducingVirtual()) {
-    assert(Record.getVFTableOffset() >= 0);
-    Builder.writeInt32(Record.getVFTableOffset());
-  } else {
-    assert(Record.getVFTableOffset() == -1);
-  }
-
-  Builder.writeNullTerminatedString(Record.getName());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeNestedType(const NestedTypeRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(Record.getKind());
-  Builder.writeUInt16(0);
-  Builder.writeTypeIndex(Record.getNestedType());
-  Builder.writeNullTerminatedString(Record.getName());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeStaticDataMember(
-    const StaticDataMemberRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(Record.getKind());
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getAccess()));
-  Builder.writeTypeIndex(Record.getType());
-  Builder.writeNullTerminatedString(Record.getName());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeVirtualBaseClass(
-    const VirtualBaseClassRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(Record.getKind());
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getAccess()));
-  Builder.writeTypeIndex(Record.getBaseType());
-  Builder.writeTypeIndex(Record.getVBPtrType());
-  Builder.writeEncodedInteger(Record.getVBPtrOffset());
-  Builder.writeEncodedUnsignedInteger(Record.getVTableIndex());
-
-  finishSubRecord();
-}
-
-void FieldListRecordBuilder::writeVFPtr(const VFPtrRecord &Record) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  Builder.writeTypeRecordKind(TypeRecordKind::VFPtr);
-  Builder.writeUInt16(0);
-  Builder.writeTypeIndex(Record.getType());
-
-  finishSubRecord();
-}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp
deleted file mode 100644
index eb79e8ac9a3f..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/ListRecordBuilder.cpp
+++ /dev/null
@@ -1,102 +0,0 @@
-//===-- ListRecordBuilder.cpp ---------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/SmallString.h"
-#include "llvm/DebugInfo/CodeView/ListRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-
-using namespace llvm;
-using namespace codeview;
-
-ListRecordBuilder::ListRecordBuilder(TypeRecordKind Kind)
-    : Kind(Kind), Builder(Kind) {}
-
-void ListRecordBuilder::writeListContinuation(const ListContinuationRecord &R) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  assert(getLastContinuationSize() < 65535 - 8 && "continuation won't fit");
-
-  Builder.writeTypeRecordKind(TypeRecordKind::ListContinuation);
-  Builder.writeUInt16(0);
-  Builder.writeTypeIndex(R.getContinuationIndex());
-
-  // End the current segment manually so that nothing comes after the
-  // continuation.
-  ContinuationOffsets.push_back(Builder.size());
-  SubrecordStart = Builder.size();
-}
-
-void ListRecordBuilder::finishSubRecord() {
-  // The type table inserts a 16 bit size field before each list, so factor that
-  // into our alignment padding.
-  uint32_t Remainder =
-      (Builder.size() + 2 * (ContinuationOffsets.size() + 1)) % 4;
-  if (Remainder != 0) {
-    for (int32_t PaddingBytesLeft = 4 - Remainder; PaddingBytesLeft > 0;
-         --PaddingBytesLeft) {
-      Builder.writeUInt8(LF_PAD0 + PaddingBytesLeft);
-    }
-  }
-
-  // Check if this subrecord makes the current segment not fit in 64K minus the
-  // space for a continuation record (8 bytes). If the segment does not fit,
-  // back up and insert a continuation record, sliding the current subrecord
-  // down.
-  if (getLastContinuationSize() > 65535 - 8) {
-    assert(SubrecordStart != 0 && "can't slide from the start!");
-    SmallString<128> SubrecordCopy(
-        Builder.str().slice(SubrecordStart, Builder.size()));
-    assert(SubrecordCopy.size() < 65530 && "subrecord is too large to slide!");
-    Builder.truncate(SubrecordStart);
-
-    // Write a placeholder continuation record.
-    Builder.writeTypeRecordKind(TypeRecordKind::ListContinuation);
-    Builder.writeUInt16(0);
-    Builder.writeUInt32(0);
-    ContinuationOffsets.push_back(Builder.size());
-    assert(Builder.size() == SubrecordStart + 8 && "wrong continuation size");
-    assert(getLastContinuationSize() < 65535 && "segment too big");
-
-    // Start a new list record of the appropriate kind, and copy the previous
-    // subrecord into place.
-    Builder.writeTypeRecordKind(Kind);
-    Builder.writeBytes(SubrecordCopy);
-  }
-
-  SubrecordStart = Builder.size();
-}
-
-TypeIndex ListRecordBuilder::writeListRecord(TypeTableBuilder &Table) {
-  // Get the continuation segments as a reversed vector of StringRefs for
-  // convenience.
-  SmallVector<StringRef, 1> Segments;
-  StringRef Data = str();
-  size_t LastEnd = 0;
-  for (size_t SegEnd : ContinuationOffsets) {
-    Segments.push_back(Data.slice(LastEnd, SegEnd));
-    LastEnd = SegEnd;
-  }
-  Segments.push_back(Data.slice(LastEnd, Builder.size()));
-
-  // Pop the last record off and emit it directly.
-  StringRef LastRec = Segments.pop_back_val();
-  TypeIndex ContinuationIndex = Table.writeRecord(LastRec);
-
-  // Emit each record with a continuation in reverse order, so that each one
-  // references the previous record.
-  for (StringRef Rec : reverse(Segments)) {
-    assert(*reinterpret_cast<const ulittle16_t *>(Rec.data()) ==
-           unsigned(Kind));
-    ulittle32_t *ContinuationPtr =
-        reinterpret_cast<ulittle32_t *>(const_cast<char *>(Rec.end())) - 1;
-    *ContinuationPtr = ContinuationIndex.getIndex();
-    ContinuationIndex = Table.writeRecord(Rec);
-  }
-  return ContinuationIndex;
-}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
deleted file mode 100644
index 8b9e73b94ff5..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/MemoryTypeTableBuilder.cpp
+++ /dev/null
@@ -1,46 +0,0 @@
-//===-- MemoryTypeTableBuilder.cpp ----------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-
-using namespace llvm;
-using namespace codeview;
-
-TypeIndex MemoryTypeTableBuilder::writeRecord(StringRef Data) {
-  assert(Data.size() <= UINT16_MAX);
-  auto I = HashedRecords.find(Data);
-  if (I != HashedRecords.end()) {
-    return I->second;
-  }
-
-  // The record provided by the user lacks the 2 byte size field prefix and is
-  // not padded to 4 bytes. Ultimately, that is what gets emitted in the object
-  // file, so pad it out now.
-  const int SizeOfRecLen = 2;
-  const int Align = 4;
-  int TotalSize = alignTo(Data.size() + SizeOfRecLen, Align);
-  assert(TotalSize - SizeOfRecLen <= UINT16_MAX);
-  char *Mem =
-      reinterpret_cast<char *>(RecordStorage.Allocate(TotalSize, Align));
-  *reinterpret_cast<ulittle16_t *>(Mem) = uint16_t(TotalSize - SizeOfRecLen);
-  memcpy(Mem + SizeOfRecLen, Data.data(), Data.size());
-  for (int I = Data.size() + SizeOfRecLen; I < TotalSize; ++I)
-    Mem[I] = LF_PAD0 + (TotalSize - I);
-
-  TypeIndex TI(static_cast<uint32_t>(Records.size()) +
-               TypeIndex::FirstNonSimpleIndex);
-
-  // Use only the data supplied by the user as a key to the hash table, so that
-  // future lookups will succeed.
-  HashedRecords.insert(std::make_pair(StringRef(Mem + SizeOfRecLen, Data.size()), TI));
-  Records.push_back(StringRef(Mem, TotalSize));
-
-  return TI;
-}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
deleted file mode 100644
index ae089a352081..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/MethodListRecordBuilder.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-//===-- MethodListRecordBuilder.cpp ---------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
-
-using namespace llvm;
-using namespace codeview;
-
-MethodListRecordBuilder::MethodListRecordBuilder()
-    : ListRecordBuilder(TypeRecordKind::MethodOverloadList) {}
-
-void MethodListRecordBuilder::writeMethod(MemberAccess Access, MethodKind Kind,
-                                          MethodOptions Options, TypeIndex Type,
-                                          int32_t VTableSlotOffset) {
-  TypeRecordBuilder &Builder = getBuilder();
-
-  uint16_t Flags = static_cast<uint16_t>(Access);
-  Flags |= static_cast<uint16_t>(Kind) << MethodKindShift;
-  Flags |= static_cast<uint16_t>(Options);
-
-  Builder.writeUInt16(Flags);
-  Builder.writeUInt16(0);
-  Builder.writeTypeIndex(Type);
-  switch (Kind) {
-  case MethodKind::IntroducingVirtual:
-  case MethodKind::PureIntroducingVirtual:
-    assert(VTableSlotOffset >= 0);
-    Builder.writeInt32(VTableSlotOffset);
-    break;
-
-  default:
-    assert(VTableSlotOffset == -1);
-    break;
-  }
-
-  // TODO: Fail if too big?
-}
-
-void MethodListRecordBuilder::writeMethod(const MethodInfo &Method) {
-  writeMethod(Method.getAccess(), Method.getKind(), Method.getOptions(),
-              Method.getType(), Method.getVTableSlotOffset());
-}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp
index 2e31ed6b5b7f..768ebaa1c980 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstream.cpp
@@ -9,17 +9,20 @@
 
 #include "llvm/DebugInfo/CodeView/ModuleSubstream.h"
 
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 
 ModuleSubstream::ModuleSubstream() : Kind(ModuleSubstreamKind::None) {}
 
-ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind, StreamRef Data)
+ModuleSubstream::ModuleSubstream(ModuleSubstreamKind Kind,
+                                 ReadableStreamRef Data)
     : Kind(Kind), Data(Data) {}
 
-Error ModuleSubstream::initialize(StreamRef Stream, ModuleSubstream &Info) {
+Error ModuleSubstream::initialize(ReadableStreamRef Stream,
+                                  ModuleSubstream &Info) {
   const ModuleSubsectionHeader *Header;
   StreamReader Reader(Stream);
   if (auto EC = Reader.readObject(Header))
@@ -39,4 +42,4 @@ uint32_t ModuleSubstream::getRecordLength() const {
 
 ModuleSubstreamKind ModuleSubstream::getSubstreamKind() const { return Kind; }
 
-StreamRef ModuleSubstream::getRecordData() const { return Data; }
+ReadableStreamRef ModuleSubstream::getRecordData() const { return Data; }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp b/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
index 6f237ee67fe4..524793277980 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/ModuleSubstreamVisitor.cpp
@@ -8,50 +8,54 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 
-Error IModuleSubstreamVisitor::visitSymbols(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitSymbols(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::Symbols, Data);
 }
-Error IModuleSubstreamVisitor::visitLines(StreamRef Data,
+Error IModuleSubstreamVisitor::visitLines(ReadableStreamRef Data,
                                           const LineSubstreamHeader *Header,
                                           const LineInfoArray &Lines) {
   return visitUnknown(ModuleSubstreamKind::Lines, Data);
 }
-Error IModuleSubstreamVisitor::visitStringTable(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitStringTable(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::StringTable, Data);
 }
 Error IModuleSubstreamVisitor::visitFileChecksums(
-    StreamRef Data, const FileChecksumArray &Checksums) {
+    ReadableStreamRef Data, const FileChecksumArray &Checksums) {
   return visitUnknown(ModuleSubstreamKind::FileChecksums, Data);
 }
-Error IModuleSubstreamVisitor::visitFrameData(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitFrameData(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::FrameData, Data);
 }
-Error IModuleSubstreamVisitor::visitInlineeLines(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitInlineeLines(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::InlineeLines, Data);
 }
-Error IModuleSubstreamVisitor::visitCrossScopeImports(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitCrossScopeImports(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::CrossScopeExports, Data);
 }
-Error IModuleSubstreamVisitor::visitCrossScopeExports(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitCrossScopeExports(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::CrossScopeImports, Data);
 }
-Error IModuleSubstreamVisitor::visitILLines(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitILLines(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::ILLines, Data);
 }
-Error IModuleSubstreamVisitor::visitFuncMDTokenMap(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitFuncMDTokenMap(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::FuncMDTokenMap, Data);
 }
-Error IModuleSubstreamVisitor::visitTypeMDTokenMap(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitTypeMDTokenMap(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::TypeMDTokenMap, Data);
 }
-Error IModuleSubstreamVisitor::visitMergedAssemblyInput(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitMergedAssemblyInput(
+    ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::MergedAssemblyInput, Data);
 }
-Error IModuleSubstreamVisitor::visitCoffSymbolRVA(StreamRef Data) {
+Error IModuleSubstreamVisitor::visitCoffSymbolRVA(ReadableStreamRef Data) {
   return visitUnknown(ModuleSubstreamKind::CoffSymbolRVA, Data);
 }
 
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp b/contrib/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
index ab9206a33ec0..6f29caa9bbfc 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/RecordSerialization.cpp
@@ -14,7 +14,9 @@
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/APSInt.h"
+#include "llvm/DebugInfo/CodeView/CodeViewError.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
@@ -31,141 +33,117 @@ StringRef llvm::codeview::getBytesAsCString(ArrayRef<uint8_t> LeafData) {
   return getBytesAsCharacters(LeafData).split('\0').first;
 }
 
-std::error_code llvm::codeview::consume(ArrayRef<uint8_t> &Data, APSInt &Num) {
+Error llvm::codeview::consume(msf::StreamReader &Reader, APSInt &Num) {
   // Used to avoid overload ambiguity on APInt construtor.
   bool FalseVal = false;
-  if (Data.size() < 2)
-    return std::make_error_code(std::errc::illegal_byte_sequence);
-  uint16_t Short = *reinterpret_cast<const ulittle16_t *>(Data.data());
-  Data = Data.drop_front(2);
+  uint16_t Short;
+  if (auto EC = Reader.readInteger(Short))
+    return EC;
+
   if (Short < LF_NUMERIC) {
     Num = APSInt(APInt(/*numBits=*/16, Short, /*isSigned=*/false),
                  /*isUnsigned=*/true);
-    return std::error_code();
+    return Error::success();
   }
+
   switch (Short) {
-  case LF_CHAR:
-    if (Data.size() < 1)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/8,
-                       *reinterpret_cast<const int8_t *>(Data.data()),
-                       /*isSigned=*/true),
-                 /*isUnsigned=*/false);
-    Data = Data.drop_front(1);
-    return std::error_code();
-  case LF_SHORT:
-    if (Data.size() < 2)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/16,
-                       *reinterpret_cast<const little16_t *>(Data.data()),
-                       /*isSigned=*/true),
-                 /*isUnsigned=*/false);
-    Data = Data.drop_front(2);
-    return std::error_code();
-  case LF_USHORT:
-    if (Data.size() < 2)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/16,
-                       *reinterpret_cast<const ulittle16_t *>(Data.data()),
-                       /*isSigned=*/false),
-                 /*isUnsigned=*/true);
-    Data = Data.drop_front(2);
-    return std::error_code();
-  case LF_LONG:
-    if (Data.size() < 4)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/32,
-                       *reinterpret_cast<const little32_t *>(Data.data()),
-                       /*isSigned=*/true),
-                 /*isUnsigned=*/false);
-    Data = Data.drop_front(4);
-    return std::error_code();
-  case LF_ULONG:
-    if (Data.size() < 4)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/32,
-                       *reinterpret_cast<const ulittle32_t *>(Data.data()),
-                       /*isSigned=*/FalseVal),
-                 /*isUnsigned=*/true);
-    Data = Data.drop_front(4);
-    return std::error_code();
-  case LF_QUADWORD:
-    if (Data.size() < 8)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/64,
-                       *reinterpret_cast<const little64_t *>(Data.data()),
-                       /*isSigned=*/true),
-                 /*isUnsigned=*/false);
-    Data = Data.drop_front(8);
-    return std::error_code();
-  case LF_UQUADWORD:
-    if (Data.size() < 8)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    Num = APSInt(APInt(/*numBits=*/64,
-                       *reinterpret_cast<const ulittle64_t *>(Data.data()),
-                       /*isSigned=*/false),
-                 /*isUnsigned=*/true);
-    Data = Data.drop_front(8);
-    return std::error_code();
+  case LF_CHAR: {
+    int8_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(8, N, true), false);
+    return Error::success();
+  }
+  case LF_SHORT: {
+    int16_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(16, N, true), false);
+    return Error::success();
+  }
+  case LF_USHORT: {
+    uint16_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(16, N, false), true);
+    return Error::success();
   }
-  return std::make_error_code(std::errc::illegal_byte_sequence);
+  case LF_LONG: {
+    int32_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(32, N, true), false);
+    return Error::success();
+  }
+  case LF_ULONG: {
+    uint32_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(32, N, FalseVal), true);
+    return Error::success();
+  }
+  case LF_QUADWORD: {
+    int64_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(64, N, true), false);
+    return Error::success();
+  }
+  case LF_UQUADWORD: {
+    uint64_t N;
+    if (auto EC = Reader.readInteger(N))
+      return EC;
+    Num = APSInt(APInt(64, N, false), true);
+    return Error::success();
+  }
+  }
+  return make_error<CodeViewError>(cv_error_code::corrupt_record,
+                                   "Buffer contains invalid APSInt type");
 }
 
-std::error_code llvm::codeview::consume(StringRef &Data, APSInt &Num) {
+Error llvm::codeview::consume(StringRef &Data, APSInt &Num) {
   ArrayRef<uint8_t> Bytes(Data.bytes_begin(), Data.bytes_end());
-  auto EC = consume(Bytes, Num);
-  Data = StringRef(reinterpret_cast<const char *>(Bytes.data()), Bytes.size());
+  msf::ByteStream S(Bytes);
+  msf::StreamReader SR(S);
+  auto EC = consume(SR, Num);
+  Data = Data.take_back(SR.bytesRemaining());
   return EC;
 }
 
 /// Decode a numeric leaf value that is known to be a uint64_t.
-std::error_code llvm::codeview::consume_numeric(ArrayRef<uint8_t> &Data,
-                                                uint64_t &Num) {
+Error llvm::codeview::consume_numeric(msf::StreamReader &Reader,
+                                      uint64_t &Num) {
   APSInt N;
-  if (auto EC = consume(Data, N))
+  if (auto EC = consume(Reader, N))
     return EC;
   if (N.isSigned() || !N.isIntN(64))
-    return std::make_error_code(std::errc::illegal_byte_sequence);
+    return make_error<CodeViewError>(cv_error_code::corrupt_record,
+                                     "Data is not a numeric value!");
   Num = N.getLimitedValue();
-  return std::error_code();
+  return Error::success();
 }
 
-std::error_code llvm::codeview::consume(ArrayRef<uint8_t> &Data,
-                                        uint32_t &Item) {
-  const support::ulittle32_t *IntPtr;
-  if (auto EC = consumeObject(Data, IntPtr))
-    return EC;
-  Item = *IntPtr;
-  return std::error_code();
+Error llvm::codeview::consume(msf::StreamReader &Reader, uint32_t &Item) {
+  return Reader.readInteger(Item);
 }
 
-std::error_code llvm::codeview::consume(StringRef &Data, uint32_t &Item) {
+Error llvm::codeview::consume(StringRef &Data, uint32_t &Item) {
   ArrayRef<uint8_t> Bytes(Data.bytes_begin(), Data.bytes_end());
-  auto EC = consume(Bytes, Item);
-  Data = StringRef(reinterpret_cast<const char *>(Bytes.data()), Bytes.size());
+  msf::ByteStream S(Bytes);
+  msf::StreamReader SR(S);
+  auto EC = consume(SR, Item);
+  Data = Data.take_back(SR.bytesRemaining());
   return EC;
 }
 
-std::error_code llvm::codeview::consume(ArrayRef<uint8_t> &Data,
-                                        int32_t &Item) {
-  const support::little32_t *IntPtr;
-  if (auto EC = consumeObject(Data, IntPtr))
-    return EC;
-  Item = *IntPtr;
-  return std::error_code();
+Error llvm::codeview::consume(msf::StreamReader &Reader, int32_t &Item) {
+  return Reader.readInteger(Item);
 }
 
-std::error_code llvm::codeview::consume(ArrayRef<uint8_t> &Data,
-                                        StringRef &Item) {
-  if (Data.empty())
-    return std::make_error_code(std::errc::illegal_byte_sequence);
-
-  StringRef Rest;
-  std::tie(Item, Rest) = getBytesAsCharacters(Data).split('\0');
-  // We expect this to be null terminated.  If it was not, it is an error.
-  if (Data.size() == Item.size())
-    return std::make_error_code(std::errc::illegal_byte_sequence);
+Error llvm::codeview::consume(msf::StreamReader &Reader, StringRef &Item) {
+  if (Reader.empty())
+    return make_error<CodeViewError>(cv_error_code::corrupt_record,
+                                     "Null terminated string buffer is empty!");
 
-  Data = ArrayRef<uint8_t>(Rest.bytes_begin(), Rest.bytes_end());
-  return std::error_code();
+  return Reader.readZeroString(Item);
 }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 6763c3d562d7..326e1f5add65 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -12,10 +12,14 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
 #include "llvm/DebugInfo/CodeView/TypeDumper.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ScopedPrinter.h"
 
 #include <system_error>
@@ -26,22 +30,22 @@ using namespace llvm::codeview;
 namespace {
 /// Use this private dumper implementation to keep implementation details about
 /// the visitor out of SymbolDumper.h.
-class CVSymbolDumperImpl : public CVSymbolVisitor<CVSymbolDumperImpl> {
+class CVSymbolDumperImpl : public SymbolVisitorCallbacks {
 public:
   CVSymbolDumperImpl(CVTypeDumper &CVTD, SymbolDumpDelegate *ObjDelegate,
                      ScopedPrinter &W, bool PrintRecordBytes)
-      : CVSymbolVisitor(ObjDelegate), CVTD(CVTD), ObjDelegate(ObjDelegate),
-        W(W), PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {}
+      : CVTD(CVTD), ObjDelegate(ObjDelegate), W(W),
+        PrintRecordBytes(PrintRecordBytes), InFunctionScope(false) {}
 
 /// CVSymbolVisitor overrides.
 #define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
-  void visit##Name(SymbolKind Kind, Name &Record);
+  Error visitKnownRecord(CVSymbol &CVR, Name &Record) override;
 #define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "llvm/DebugInfo/CodeView/CVSymbolTypes.def"
 
-  void visitSymbolBegin(SymbolKind Kind, ArrayRef<uint8_t> Data);
-  void visitSymbolEnd(SymbolKind Kind, ArrayRef<uint8_t> OriginalSymData);
-  void visitUnknownSymbol(SymbolKind Kind, ArrayRef<uint8_t> Data);
+  Error visitSymbolBegin(CVSymbol &Record) override;
+  Error visitSymbolEnd(CVSymbol &Record) override;
+  Error visitUnknownSymbol(CVSymbol &Record) override;
 
 private:
   void printLocalVariableAddrRange(const LocalVariableAddrRange &Range,
@@ -76,376 +80,391 @@ void CVSymbolDumperImpl::printLocalVariableAddrGap(
   }
 }
 
-void CVSymbolDumperImpl::visitSymbolBegin(SymbolKind Kind,
-                                          ArrayRef<uint8_t> Data) {}
+Error CVSymbolDumperImpl::visitSymbolBegin(CVSymbol &CVR) {
+  return Error::success();
+}
 
-void CVSymbolDumperImpl::visitSymbolEnd(SymbolKind Kind,
-                                        ArrayRef<uint8_t> OriginalSymData) {
+Error CVSymbolDumperImpl::visitSymbolEnd(CVSymbol &CVR) {
   if (PrintRecordBytes && ObjDelegate)
-    ObjDelegate->printBinaryBlockWithRelocs("SymData", OriginalSymData);
+    ObjDelegate->printBinaryBlockWithRelocs("SymData", CVR.content());
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitBlockSym(SymbolKind Kind, BlockSym &Block) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) {
   DictScope S(W, "BlockStart");
 
   StringRef LinkageName;
-  W.printHex("PtrParent", Block.Header.PtrParent);
-  W.printHex("PtrEnd", Block.Header.PtrEnd);
-  W.printHex("CodeSize", Block.Header.CodeSize);
+  W.printHex("PtrParent", Block.Parent);
+  W.printHex("PtrEnd", Block.End);
+  W.printHex("CodeSize", Block.CodeSize);
   if (ObjDelegate) {
     ObjDelegate->printRelocatedField("CodeOffset", Block.getRelocationOffset(),
-                                     Block.Header.CodeOffset, &LinkageName);
+                                     Block.CodeOffset, &LinkageName);
   }
-  W.printHex("Segment", Block.Header.Segment);
+  W.printHex("Segment", Block.Segment);
   W.printString("BlockName", Block.Name);
   W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitThunk32Sym(SymbolKind Kind, Thunk32Sym &Thunk) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) {
   DictScope S(W, "Thunk32");
-  W.printNumber("Parent", Thunk.Header.Parent);
-  W.printNumber("End", Thunk.Header.End);
-  W.printNumber("Next", Thunk.Header.Next);
-  W.printNumber("Off", Thunk.Header.Off);
-  W.printNumber("Seg", Thunk.Header.Seg);
-  W.printNumber("Len", Thunk.Header.Len);
-  W.printEnum("Ordinal", Thunk.Header.Ord, getThunkOrdinalNames());
-}
-
-void CVSymbolDumperImpl::visitTrampolineSym(SymbolKind Kind,
-                                            TrampolineSym &Tramp) {
+  W.printNumber("Parent", Thunk.Parent);
+  W.printNumber("End", Thunk.End);
+  W.printNumber("Next", Thunk.Next);
+  W.printNumber("Off", Thunk.Offset);
+  W.printNumber("Seg", Thunk.Segment);
+  W.printNumber("Len", Thunk.Length);
+  W.printEnum("Ordinal", uint8_t(Thunk.Thunk), getThunkOrdinalNames());
+  return Error::success();
+}
+
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           TrampolineSym &Tramp) {
   DictScope S(W, "Trampoline");
-  W.printEnum("Type", Tramp.Header.Type, getTrampolineNames());
-  W.printNumber("Size", Tramp.Header.Size);
-  W.printNumber("ThunkOff", Tramp.Header.ThunkOff);
-  W.printNumber("TargetOff", Tramp.Header.TargetOff);
-  W.printNumber("ThunkSection", Tramp.Header.ThunkSection);
-  W.printNumber("TargetSection", Tramp.Header.TargetSection);
+  W.printEnum("Type", uint16_t(Tramp.Type), getTrampolineNames());
+  W.printNumber("Size", Tramp.Size);
+  W.printNumber("ThunkOff", Tramp.ThunkOffset);
+  W.printNumber("TargetOff", Tramp.TargetOffset);
+  W.printNumber("ThunkSection", Tramp.ThunkSection);
+  W.printNumber("TargetSection", Tramp.TargetSection);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitSectionSym(SymbolKind Kind, SectionSym &Section) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, SectionSym &Section) {
   DictScope S(W, "Section");
-  W.printNumber("SectionNumber", Section.Header.SectionNumber);
-  W.printNumber("Alignment", Section.Header.Alignment);
-  W.printNumber("Reserved", Section.Header.Reserved);
-  W.printNumber("Rva", Section.Header.Rva);
-  W.printNumber("Length", Section.Header.Length);
-  W.printFlags("Characteristics", Section.Header.Characteristics,
+  W.printNumber("SectionNumber", Section.SectionNumber);
+  W.printNumber("Alignment", Section.Alignment);
+  W.printNumber("Rva", Section.Rva);
+  W.printNumber("Length", Section.Length);
+  W.printFlags("Characteristics", Section.Characteristics,
                getImageSectionCharacteristicNames(),
                COFF::SectionCharacteristics(0x00F00000));
 
   W.printString("Name", Section.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitCoffGroupSym(SymbolKind Kind,
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            CoffGroupSym &CoffGroup) {
   DictScope S(W, "COFF Group");
-  W.printNumber("Size", CoffGroup.Header.Size);
-  W.printFlags("Characteristics", CoffGroup.Header.Characteristics,
+  W.printNumber("Size", CoffGroup.Size);
+  W.printFlags("Characteristics", CoffGroup.Characteristics,
                getImageSectionCharacteristicNames(),
                COFF::SectionCharacteristics(0x00F00000));
-  W.printNumber("Offset", CoffGroup.Header.Offset);
-  W.printNumber("Segment", CoffGroup.Header.Segment);
+  W.printNumber("Offset", CoffGroup.Offset);
+  W.printNumber("Segment", CoffGroup.Segment);
   W.printString("Name", CoffGroup.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitBPRelativeSym(SymbolKind Kind,
-                                            BPRelativeSym &BPRel) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           BPRelativeSym &BPRel) {
   DictScope S(W, "BPRelativeSym");
 
-  W.printNumber("Offset", BPRel.Header.Offset);
-  CVTD.printTypeIndex("Type", BPRel.Header.Type);
+  W.printNumber("Offset", BPRel.Offset);
+  CVTD.printTypeIndex("Type", BPRel.Type);
   W.printString("VarName", BPRel.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitBuildInfoSym(SymbolKind Kind,
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            BuildInfoSym &BuildInfo) {
   DictScope S(W, "BuildInfo");
 
-  W.printNumber("BuildId", BuildInfo.Header.BuildId);
+  W.printNumber("BuildId", BuildInfo.BuildId);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitCallSiteInfoSym(SymbolKind Kind,
-                                              CallSiteInfoSym &CallSiteInfo) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           CallSiteInfoSym &CallSiteInfo) {
   DictScope S(W, "CallSiteInfo");
 
   StringRef LinkageName;
   if (ObjDelegate) {
-    ObjDelegate->printRelocatedField(
-        "CodeOffset", CallSiteInfo.getRelocationOffset(),
-        CallSiteInfo.Header.CodeOffset, &LinkageName);
+    ObjDelegate->printRelocatedField("CodeOffset",
+                                     CallSiteInfo.getRelocationOffset(),
+                                     CallSiteInfo.CodeOffset, &LinkageName);
   }
-  W.printHex("Segment", CallSiteInfo.Header.Segment);
-  W.printHex("Reserved", CallSiteInfo.Header.Reserved);
-  CVTD.printTypeIndex("Type", CallSiteInfo.Header.Type);
+  W.printHex("Segment", CallSiteInfo.Segment);
+  CVTD.printTypeIndex("Type", CallSiteInfo.Type);
   if (!LinkageName.empty())
     W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitEnvBlockSym(SymbolKind Kind,
-                                          EnvBlockSym &EnvBlock) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           EnvBlockSym &EnvBlock) {
   DictScope S(W, "EnvBlock");
 
-  W.printNumber("Reserved", EnvBlock.Header.Reserved);
   ListScope L(W, "Entries");
   for (auto Entry : EnvBlock.Fields) {
     W.printString(Entry);
   }
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitFileStaticSym(SymbolKind Kind,
-                                            FileStaticSym &FileStatic) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           FileStaticSym &FileStatic) {
   DictScope S(W, "FileStatic");
-  W.printNumber("Index", FileStatic.Header.Index);
-  W.printNumber("ModFilenameOffset", FileStatic.Header.ModFilenameOffset);
-  W.printFlags("Flags", uint16_t(FileStatic.Header.Flags), getLocalFlagNames());
+  W.printNumber("Index", FileStatic.Index);
+  W.printNumber("ModFilenameOffset", FileStatic.ModFilenameOffset);
+  W.printFlags("Flags", uint16_t(FileStatic.Flags), getLocalFlagNames());
   W.printString("Name", FileStatic.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitExportSym(SymbolKind Kind, ExportSym &Export) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) {
   DictScope S(W, "Export");
-  W.printNumber("Ordinal", Export.Header.Ordinal);
-  W.printFlags("Flags", Export.Header.Flags, getExportSymFlagNames());
+  W.printNumber("Ordinal", Export.Ordinal);
+  W.printFlags("Flags", uint16_t(Export.Flags), getExportSymFlagNames());
   W.printString("Name", Export.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitCompile2Sym(SymbolKind Kind,
-                                          Compile2Sym &Compile2) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           Compile2Sym &Compile2) {
   DictScope S(W, "CompilerFlags2");
 
-  W.printEnum("Language", Compile2.Header.getLanguage(),
-              getSourceLanguageNames());
-  W.printFlags("Flags", Compile2.Header.flags & ~0xff,
-               getCompileSym2FlagNames());
-  W.printEnum("Machine", unsigned(Compile2.Header.Machine), getCPUTypeNames());
+  W.printEnum("Language", Compile2.getLanguage(), getSourceLanguageNames());
+  W.printFlags("Flags", Compile2.getFlags(), getCompileSym2FlagNames());
+  W.printEnum("Machine", unsigned(Compile2.Machine), getCPUTypeNames());
   std::string FrontendVersion;
   {
     raw_string_ostream Out(FrontendVersion);
-    Out << Compile2.Header.VersionFrontendMajor << '.'
-        << Compile2.Header.VersionFrontendMinor << '.'
-        << Compile2.Header.VersionFrontendBuild;
+    Out << Compile2.VersionFrontendMajor << '.' << Compile2.VersionFrontendMinor
+        << '.' << Compile2.VersionFrontendBuild;
   }
   std::string BackendVersion;
   {
     raw_string_ostream Out(BackendVersion);
-    Out << Compile2.Header.VersionBackendMajor << '.'
-        << Compile2.Header.VersionBackendMinor << '.'
-        << Compile2.Header.VersionBackendBuild;
+    Out << Compile2.VersionBackendMajor << '.' << Compile2.VersionBackendMinor
+        << '.' << Compile2.VersionBackendBuild;
   }
   W.printString("FrontendVersion", FrontendVersion);
   W.printString("BackendVersion", BackendVersion);
   W.printString("VersionName", Compile2.Version);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitCompile3Sym(SymbolKind Kind,
-                                          Compile3Sym &Compile3) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           Compile3Sym &Compile3) {
   DictScope S(W, "CompilerFlags3");
 
-  W.printEnum("Language", Compile3.Header.getLanguage(),
-              getSourceLanguageNames());
-  W.printFlags("Flags", Compile3.Header.flags & ~0xff,
-               getCompileSym3FlagNames());
-  W.printEnum("Machine", unsigned(Compile3.Header.Machine), getCPUTypeNames());
+  W.printEnum("Language", Compile3.getLanguage(), getSourceLanguageNames());
+  W.printFlags("Flags", Compile3.getFlags(), getCompileSym3FlagNames());
+  W.printEnum("Machine", unsigned(Compile3.Machine), getCPUTypeNames());
   std::string FrontendVersion;
   {
     raw_string_ostream Out(FrontendVersion);
-    Out << Compile3.Header.VersionFrontendMajor << '.'
-        << Compile3.Header.VersionFrontendMinor << '.'
-        << Compile3.Header.VersionFrontendBuild << '.'
-        << Compile3.Header.VersionFrontendQFE;
+    Out << Compile3.VersionFrontendMajor << '.' << Compile3.VersionFrontendMinor
+        << '.' << Compile3.VersionFrontendBuild << '.'
+        << Compile3.VersionFrontendQFE;
   }
   std::string BackendVersion;
   {
     raw_string_ostream Out(BackendVersion);
-    Out << Compile3.Header.VersionBackendMajor << '.'
-        << Compile3.Header.VersionBackendMinor << '.'
-        << Compile3.Header.VersionBackendBuild << '.'
-        << Compile3.Header.VersionBackendQFE;
+    Out << Compile3.VersionBackendMajor << '.' << Compile3.VersionBackendMinor
+        << '.' << Compile3.VersionBackendBuild << '.'
+        << Compile3.VersionBackendQFE;
   }
   W.printString("FrontendVersion", FrontendVersion);
   W.printString("BackendVersion", BackendVersion);
   W.printString("VersionName", Compile3.Version);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitConstantSym(SymbolKind Kind,
-                                          ConstantSym &Constant) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           ConstantSym &Constant) {
   DictScope S(W, "Constant");
 
-  CVTD.printTypeIndex("Type", Constant.Header.Type);
+  CVTD.printTypeIndex("Type", Constant.Type);
   W.printNumber("Value", Constant.Value);
   W.printString("Name", Constant.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDataSym(SymbolKind Kind, DataSym &Data) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, DataSym &Data) {
   DictScope S(W, "DataSym");
 
-  W.printEnum("Kind", uint16_t(Kind), getSymbolTypeNames());
+  W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames());
   StringRef LinkageName;
   if (ObjDelegate) {
     ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(),
-                                     Data.Header.DataOffset, &LinkageName);
+                                     Data.DataOffset, &LinkageName);
   }
-  CVTD.printTypeIndex("Type", Data.Header.Type);
+  CVTD.printTypeIndex("Type", Data.Type);
   W.printString("DisplayName", Data.Name);
   if (!LinkageName.empty())
     W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeFramePointerRelFullScopeSym(
-    SymbolKind Kind,
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR,
     DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) {
   DictScope S(W, "DefRangeFramePointerRelFullScope");
-  W.printNumber("Offset", DefRangeFramePointerRelFullScope.Header.Offset);
+  W.printNumber("Offset", DefRangeFramePointerRelFullScope.Offset);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeFramePointerRelSym(
-    SymbolKind Kind, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
   DictScope S(W, "DefRangeFramePointerRel");
 
-  W.printNumber("Offset", DefRangeFramePointerRel.Header.Offset);
-  printLocalVariableAddrRange(DefRangeFramePointerRel.Header.Range,
+  W.printNumber("Offset", DefRangeFramePointerRel.Offset);
+  printLocalVariableAddrRange(DefRangeFramePointerRel.Range,
                               DefRangeFramePointerRel.getRelocationOffset());
   printLocalVariableAddrGap(DefRangeFramePointerRel.Gaps);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeRegisterRelSym(
-    SymbolKind Kind, DefRangeRegisterRelSym &DefRangeRegisterRel) {
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) {
   DictScope S(W, "DefRangeRegisterRel");
 
-  W.printNumber("BaseRegister", DefRangeRegisterRel.Header.BaseRegister);
+  W.printNumber("BaseRegister", DefRangeRegisterRel.Hdr.Register);
   W.printBoolean("HasSpilledUDTMember",
                  DefRangeRegisterRel.hasSpilledUDTMember());
   W.printNumber("OffsetInParent", DefRangeRegisterRel.offsetInParent());
-  W.printNumber("BasePointerOffset",
-                DefRangeRegisterRel.Header.BasePointerOffset);
-  printLocalVariableAddrRange(DefRangeRegisterRel.Header.Range,
+  W.printNumber("BasePointerOffset", DefRangeRegisterRel.Hdr.BasePointerOffset);
+  printLocalVariableAddrRange(DefRangeRegisterRel.Range,
                               DefRangeRegisterRel.getRelocationOffset());
   printLocalVariableAddrGap(DefRangeRegisterRel.Gaps);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeRegisterSym(
-    SymbolKind Kind, DefRangeRegisterSym &DefRangeRegister) {
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
   DictScope S(W, "DefRangeRegister");
 
-  W.printNumber("Register", DefRangeRegister.Header.Register);
-  W.printNumber("MayHaveNoName", DefRangeRegister.Header.MayHaveNoName);
-  printLocalVariableAddrRange(DefRangeRegister.Header.Range,
+  W.printNumber("Register", DefRangeRegister.Hdr.Register);
+  W.printNumber("MayHaveNoName", DefRangeRegister.Hdr.MayHaveNoName);
+  printLocalVariableAddrRange(DefRangeRegister.Range,
                               DefRangeRegister.getRelocationOffset());
   printLocalVariableAddrGap(DefRangeRegister.Gaps);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeSubfieldRegisterSym(
-    SymbolKind Kind, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
   DictScope S(W, "DefRangeSubfieldRegister");
 
-  W.printNumber("Register", DefRangeSubfieldRegister.Header.Register);
-  W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Header.MayHaveNoName);
-  W.printNumber("OffsetInParent",
-                DefRangeSubfieldRegister.Header.OffsetInParent);
-  printLocalVariableAddrRange(DefRangeSubfieldRegister.Header.Range,
+  W.printNumber("Register", DefRangeSubfieldRegister.Hdr.Register);
+  W.printNumber("MayHaveNoName", DefRangeSubfieldRegister.Hdr.MayHaveNoName);
+  W.printNumber("OffsetInParent", DefRangeSubfieldRegister.Hdr.OffsetInParent);
+  printLocalVariableAddrRange(DefRangeSubfieldRegister.Range,
                               DefRangeSubfieldRegister.getRelocationOffset());
   printLocalVariableAddrGap(DefRangeSubfieldRegister.Gaps);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeSubfieldSym(
-    SymbolKind Kind, DefRangeSubfieldSym &DefRangeSubfield) {
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR, DefRangeSubfieldSym &DefRangeSubfield) {
   DictScope S(W, "DefRangeSubfield");
 
   if (ObjDelegate) {
     StringRef StringTable = ObjDelegate->getStringTable();
-    auto ProgramStringTableOffset = DefRangeSubfield.Header.Program;
+    auto ProgramStringTableOffset = DefRangeSubfield.Program;
     if (ProgramStringTableOffset >= StringTable.size())
-      return parseError();
+      return llvm::make_error<CodeViewError>(
+          "String table offset outside of bounds of String Table!");
     StringRef Program =
         StringTable.drop_front(ProgramStringTableOffset).split('\0').first;
     W.printString("Program", Program);
   }
-  W.printNumber("OffsetInParent", DefRangeSubfield.Header.OffsetInParent);
-  printLocalVariableAddrRange(DefRangeSubfield.Header.Range,
+  W.printNumber("OffsetInParent", DefRangeSubfield.OffsetInParent);
+  printLocalVariableAddrRange(DefRangeSubfield.Range,
                               DefRangeSubfield.getRelocationOffset());
   printLocalVariableAddrGap(DefRangeSubfield.Gaps);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitDefRangeSym(SymbolKind Kind,
-                                          DefRangeSym &DefRange) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           DefRangeSym &DefRange) {
   DictScope S(W, "DefRange");
 
   if (ObjDelegate) {
     StringRef StringTable = ObjDelegate->getStringTable();
-    auto ProgramStringTableOffset = DefRange.Header.Program;
+    auto ProgramStringTableOffset = DefRange.Program;
     if (ProgramStringTableOffset >= StringTable.size())
-      return parseError();
+      return llvm::make_error<CodeViewError>(
+          "String table offset outside of bounds of String Table!");
     StringRef Program =
         StringTable.drop_front(ProgramStringTableOffset).split('\0').first;
     W.printString("Program", Program);
   }
-  printLocalVariableAddrRange(DefRange.Header.Range,
-                              DefRange.getRelocationOffset());
+  printLocalVariableAddrRange(DefRange.Range, DefRange.getRelocationOffset());
   printLocalVariableAddrGap(DefRange.Gaps);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitFrameCookieSym(SymbolKind Kind,
-                                             FrameCookieSym &FrameCookie) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           FrameCookieSym &FrameCookie) {
   DictScope S(W, "FrameCookie");
 
   StringRef LinkageName;
   if (ObjDelegate) {
-    ObjDelegate->printRelocatedField(
-        "CodeOffset", FrameCookie.getRelocationOffset(),
-        FrameCookie.Header.CodeOffset, &LinkageName);
+    ObjDelegate->printRelocatedField("CodeOffset",
+                                     FrameCookie.getRelocationOffset(),
+                                     FrameCookie.CodeOffset, &LinkageName);
   }
-  W.printHex("Register", FrameCookie.Header.Register);
-  W.printEnum("CookieKind", uint16_t(FrameCookie.Header.CookieKind),
+  W.printHex("Register", FrameCookie.Register);
+  W.printEnum("CookieKind", uint16_t(FrameCookie.CookieKind),
               getFrameCookieKindNames());
-  W.printHex("Flags", FrameCookie.Header.Flags);
+  W.printHex("Flags", FrameCookie.Flags);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitFrameProcSym(SymbolKind Kind,
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
                                            FrameProcSym &FrameProc) {
   DictScope S(W, "FrameProc");
 
-  W.printHex("TotalFrameBytes", FrameProc.Header.TotalFrameBytes);
-  W.printHex("PaddingFrameBytes", FrameProc.Header.PaddingFrameBytes);
-  W.printHex("OffsetToPadding", FrameProc.Header.OffsetToPadding);
+  W.printHex("TotalFrameBytes", FrameProc.TotalFrameBytes);
+  W.printHex("PaddingFrameBytes", FrameProc.PaddingFrameBytes);
+  W.printHex("OffsetToPadding", FrameProc.OffsetToPadding);
   W.printHex("BytesOfCalleeSavedRegisters",
-             FrameProc.Header.BytesOfCalleeSavedRegisters);
-  W.printHex("OffsetOfExceptionHandler",
-             FrameProc.Header.OffsetOfExceptionHandler);
+             FrameProc.BytesOfCalleeSavedRegisters);
+  W.printHex("OffsetOfExceptionHandler", FrameProc.OffsetOfExceptionHandler);
   W.printHex("SectionIdOfExceptionHandler",
-             FrameProc.Header.SectionIdOfExceptionHandler);
-  W.printFlags("Flags", FrameProc.Header.Flags, getFrameProcSymFlagNames());
+             FrameProc.SectionIdOfExceptionHandler);
+  W.printFlags("Flags", static_cast<uint32_t>(FrameProc.Flags),
+               getFrameProcSymFlagNames());
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitHeapAllocationSiteSym(
-    SymbolKind Kind, HeapAllocationSiteSym &HeapAllocSite) {
+Error CVSymbolDumperImpl::visitKnownRecord(
+    CVSymbol &CVR, HeapAllocationSiteSym &HeapAllocSite) {
   DictScope S(W, "HeapAllocationSite");
 
   StringRef LinkageName;
   if (ObjDelegate) {
-    ObjDelegate->printRelocatedField(
-        "CodeOffset", HeapAllocSite.getRelocationOffset(),
-        HeapAllocSite.Header.CodeOffset, &LinkageName);
+    ObjDelegate->printRelocatedField("CodeOffset",
+                                     HeapAllocSite.getRelocationOffset(),
+                                     HeapAllocSite.CodeOffset, &LinkageName);
   }
-  W.printHex("Segment", HeapAllocSite.Header.Segment);
-  W.printHex("CallInstructionSize", HeapAllocSite.Header.CallInstructionSize);
-  CVTD.printTypeIndex("Type", HeapAllocSite.Header.Type);
+  W.printHex("Segment", HeapAllocSite.Segment);
+  W.printHex("CallInstructionSize", HeapAllocSite.CallInstructionSize);
+  CVTD.printTypeIndex("Type", HeapAllocSite.Type);
   if (!LinkageName.empty())
     W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitInlineSiteSym(SymbolKind Kind,
-                                            InlineSiteSym &InlineSite) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           InlineSiteSym &InlineSite) {
   DictScope S(W, "InlineSite");
 
-  W.printHex("PtrParent", InlineSite.Header.PtrParent);
-  W.printHex("PtrEnd", InlineSite.Header.PtrEnd);
-  CVTD.printTypeIndex("Inlinee", InlineSite.Header.Inlinee);
+  W.printHex("PtrParent", InlineSite.Parent);
+  W.printHex("PtrEnd", InlineSite.End);
+  CVTD.printTypeIndex("Inlinee", InlineSite.Inlinee);
 
   ListScope BinaryAnnotations(W, "BinaryAnnotations");
   for (auto &Annotation : InlineSite.annotations()) {
     switch (Annotation.OpCode) {
     case BinaryAnnotationsOpCode::Invalid:
-      return parseError();
+      return llvm::make_error<CodeViewError>(
+          "Invalid binary annotation opcode!");
     case BinaryAnnotationsOpCode::CodeOffset:
     case BinaryAnnotationsOpCode::ChangeCodeOffset:
     case BinaryAnnotationsOpCode::ChangeCodeLength:
@@ -486,157 +505,180 @@ void CVSymbolDumperImpl::visitInlineSiteSym(SymbolKind Kind,
     }
     }
   }
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitRegisterSym(SymbolKind Kind,
-                                          RegisterSym &Register) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           RegisterSym &Register) {
   DictScope S(W, "RegisterSym");
-  W.printNumber("Type", Register.Header.Index);
-  W.printEnum("Seg", uint16_t(Register.Header.Register), getRegisterNames());
+  W.printNumber("Type", Register.Index);
+  W.printEnum("Seg", uint16_t(Register.Register), getRegisterNames());
   W.printString("Name", Register.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitPublicSym32(SymbolKind Kind,
-                                          PublicSym32 &Public) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, PublicSym32 &Public) {
   DictScope S(W, "PublicSym");
-  W.printNumber("Type", Public.Header.Index);
-  W.printNumber("Seg", Public.Header.Seg);
-  W.printNumber("Off", Public.Header.Off);
+  W.printNumber("Type", Public.Index);
+  W.printNumber("Seg", Public.Segment);
+  W.printNumber("Off", Public.Offset);
   W.printString("Name", Public.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitProcRefSym(SymbolKind Kind, ProcRefSym &ProcRef) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcRefSym &ProcRef) {
   DictScope S(W, "ProcRef");
-  W.printNumber("SumName", ProcRef.Header.SumName);
-  W.printNumber("SymOffset", ProcRef.Header.SymOffset);
-  W.printNumber("Mod", ProcRef.Header.Mod);
+  W.printNumber("SumName", ProcRef.SumName);
+  W.printNumber("SymOffset", ProcRef.SymOffset);
+  W.printNumber("Mod", ProcRef.Module);
   W.printString("Name", ProcRef.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitLabelSym(SymbolKind Kind, LabelSym &Label) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) {
   DictScope S(W, "Label");
 
   StringRef LinkageName;
   if (ObjDelegate) {
     ObjDelegate->printRelocatedField("CodeOffset", Label.getRelocationOffset(),
-                                     Label.Header.CodeOffset, &LinkageName);
+                                     Label.CodeOffset, &LinkageName);
   }
-  W.printHex("Segment", Label.Header.Segment);
-  W.printHex("Flags", Label.Header.Flags);
-  W.printFlags("Flags", Label.Header.Flags, getProcSymFlagNames());
+  W.printHex("Segment", Label.Segment);
+  W.printHex("Flags", uint8_t(Label.Flags));
+  W.printFlags("Flags", uint8_t(Label.Flags), getProcSymFlagNames());
   W.printString("DisplayName", Label.Name);
   if (!LinkageName.empty())
     W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitLocalSym(SymbolKind Kind, LocalSym &Local) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) {
   DictScope S(W, "Local");
 
-  CVTD.printTypeIndex("Type", Local.Header.Type);
-  W.printFlags("Flags", uint16_t(Local.Header.Flags), getLocalFlagNames());
+  CVTD.printTypeIndex("Type", Local.Type);
+  W.printFlags("Flags", uint16_t(Local.Flags), getLocalFlagNames());
   W.printString("VarName", Local.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitObjNameSym(SymbolKind Kind, ObjNameSym &ObjName) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ObjNameSym &ObjName) {
   DictScope S(W, "ObjectName");
 
-  W.printHex("Signature", ObjName.Header.Signature);
+  W.printHex("Signature", ObjName.Signature);
   W.printString("ObjectName", ObjName.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitProcSym(SymbolKind Kind, ProcSym &Proc) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) {
   DictScope S(W, "ProcStart");
 
   if (InFunctionScope)
-    return parseError();
+    return llvm::make_error<CodeViewError>(
+        "Visiting a ProcSym while inside function scope!");
 
   InFunctionScope = true;
 
   StringRef LinkageName;
-  W.printEnum("Kind", uint16_t(Kind), getSymbolTypeNames());
-  W.printHex("PtrParent", Proc.Header.PtrParent);
-  W.printHex("PtrEnd", Proc.Header.PtrEnd);
-  W.printHex("PtrNext", Proc.Header.PtrNext);
-  W.printHex("CodeSize", Proc.Header.CodeSize);
-  W.printHex("DbgStart", Proc.Header.DbgStart);
-  W.printHex("DbgEnd", Proc.Header.DbgEnd);
-  CVTD.printTypeIndex("FunctionType", Proc.Header.FunctionType);
+  W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames());
+  W.printHex("PtrParent", Proc.Parent);
+  W.printHex("PtrEnd", Proc.End);
+  W.printHex("PtrNext", Proc.Next);
+  W.printHex("CodeSize", Proc.CodeSize);
+  W.printHex("DbgStart", Proc.DbgStart);
+  W.printHex("DbgEnd", Proc.DbgEnd);
+  CVTD.printTypeIndex("FunctionType", Proc.FunctionType);
   if (ObjDelegate) {
     ObjDelegate->printRelocatedField("CodeOffset", Proc.getRelocationOffset(),
-                                     Proc.Header.CodeOffset, &LinkageName);
+                                     Proc.CodeOffset, &LinkageName);
   }
-  W.printHex("Segment", Proc.Header.Segment);
-  W.printFlags("Flags", static_cast<uint8_t>(Proc.Header.Flags),
+  W.printHex("Segment", Proc.Segment);
+  W.printFlags("Flags", static_cast<uint8_t>(Proc.Flags),
                getProcSymFlagNames());
   W.printString("DisplayName", Proc.Name);
   if (!LinkageName.empty())
     W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitScopeEndSym(SymbolKind Kind,
-                                          ScopeEndSym &ScopeEnd) {
-  if (Kind == SymbolKind::S_END)
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           ScopeEndSym &ScopeEnd) {
+  if (CVR.kind() == SymbolKind::S_END)
     DictScope S(W, "BlockEnd");
-  else if (Kind == SymbolKind::S_PROC_ID_END)
+  else if (CVR.kind() == SymbolKind::S_PROC_ID_END)
     DictScope S(W, "ProcEnd");
-  else if (Kind == SymbolKind::S_INLINESITE_END)
+  else if (CVR.kind() == SymbolKind::S_INLINESITE_END)
     DictScope S(W, "InlineSiteEnd");
 
   InFunctionScope = false;
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitCallerSym(SymbolKind Kind, CallerSym &Caller) {
-  ListScope S(W, Kind == S_CALLEES ? "Callees" : "Callers");
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) {
+  ListScope S(W, CVR.kind() == S_CALLEES ? "Callees" : "Callers");
   for (auto FuncID : Caller.Indices)
     CVTD.printTypeIndex("FuncID", FuncID);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitRegRelativeSym(SymbolKind Kind,
-                                             RegRelativeSym &RegRel) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           RegRelativeSym &RegRel) {
   DictScope S(W, "RegRelativeSym");
 
-  W.printHex("Offset", RegRel.Header.Offset);
-  CVTD.printTypeIndex("Type", RegRel.Header.Type);
-  W.printHex("Register", RegRel.Header.Register);
+  W.printHex("Offset", RegRel.Offset);
+  CVTD.printTypeIndex("Type", RegRel.Type);
+  W.printHex("Register", RegRel.Register);
   W.printString("VarName", RegRel.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitThreadLocalDataSym(SymbolKind Kind,
-                                                 ThreadLocalDataSym &Data) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR,
+                                           ThreadLocalDataSym &Data) {
   DictScope S(W, "ThreadLocalDataSym");
 
   StringRef LinkageName;
   if (ObjDelegate) {
     ObjDelegate->printRelocatedField("DataOffset", Data.getRelocationOffset(),
-                                     Data.Header.DataOffset, &LinkageName);
+                                     Data.DataOffset, &LinkageName);
   }
-  CVTD.printTypeIndex("Type", Data.Header.Type);
+  CVTD.printTypeIndex("Type", Data.Type);
   W.printString("DisplayName", Data.Name);
   if (!LinkageName.empty())
     W.printString("LinkageName", LinkageName);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitUDTSym(SymbolKind Kind, UDTSym &UDT) {
+Error CVSymbolDumperImpl::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
   DictScope S(W, "UDT");
-  CVTD.printTypeIndex("Type", UDT.Header.Type);
+  CVTD.printTypeIndex("Type", UDT.Type);
   W.printString("UDTName", UDT.Name);
+  return Error::success();
 }
 
-void CVSymbolDumperImpl::visitUnknownSymbol(SymbolKind Kind,
-                                            ArrayRef<uint8_t> Data) {
+Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
   DictScope S(W, "UnknownSym");
-  W.printEnum("Kind", uint16_t(Kind), getSymbolTypeNames());
-  W.printNumber("Length", uint32_t(Data.size()));
+  W.printEnum("Kind", uint16_t(CVR.kind()), getSymbolTypeNames());
+  W.printNumber("Length", CVR.length());
+  return Error::success();
 }
 
-bool CVSymbolDumper::dump(const CVRecord<SymbolKind> &Record) {
+Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
+  SymbolVisitorCallbackPipeline Pipeline;
+  SymbolDeserializer Deserializer(ObjDelegate.get());
   CVSymbolDumperImpl Dumper(CVTD, ObjDelegate.get(), W, PrintRecordBytes);
-  Dumper.visitSymbolRecord(Record);
-  return !Dumper.hadError();
+
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(Dumper);
+  CVSymbolVisitor Visitor(Pipeline);
+  return Visitor.visitSymbolRecord(Record);
 }
 
-bool CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
+Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
+  SymbolVisitorCallbackPipeline Pipeline;
+  SymbolDeserializer Deserializer(ObjDelegate.get());
   CVSymbolDumperImpl Dumper(CVTD, ObjDelegate.get(), W, PrintRecordBytes);
-  Dumper.visitSymbolStream(Symbols);
-  return !Dumper.hadError();
+
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(Dumper);
+  CVSymbolVisitor Visitor(Pipeline);
+  return Visitor.visitSymbolStream(Symbols);
 }
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
new file mode 100644
index 000000000000..bb1731465495
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -0,0 +1,464 @@
+//===- SymbolRecordMapping.cpp -----------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/SymbolRecordMapping.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+#define error(X)                                                               \
+  if (auto EC = X)                                                             \
+    return EC;
+
+namespace {
+struct MapGap {
+  Error operator()(CodeViewRecordIO &IO, LocalVariableAddrGap &Gap) const {
+    error(IO.mapInteger(Gap.GapStartOffset));
+    error(IO.mapInteger(Gap.Range));
+    return Error::success();
+  }
+};
+}
+
+static Error mapLocalVariableAddrRange(CodeViewRecordIO &IO,
+                                       LocalVariableAddrRange &Range) {
+  error(IO.mapInteger(Range.OffsetStart));
+  error(IO.mapInteger(Range.ISectStart));
+  error(IO.mapInteger(Range.Range));
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitSymbolBegin(CVSymbol &Record) {
+  error(IO.beginRecord(MaxRecordLength - sizeof(RecordPrefix)));
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) {
+  error(IO.endRecord());
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, BlockSym &Block) {
+
+  error(IO.mapInteger(Block.Parent));
+  error(IO.mapInteger(Block.End));
+  error(IO.mapInteger(Block.CodeSize));
+  error(IO.mapInteger(Block.CodeOffset));
+  error(IO.mapInteger(Block.Segment));
+  error(IO.mapStringZ(Block.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, Thunk32Sym &Thunk) {
+
+  error(IO.mapInteger(Thunk.Parent));
+  error(IO.mapInteger(Thunk.End));
+  error(IO.mapInteger(Thunk.Next));
+  error(IO.mapInteger(Thunk.Offset));
+  error(IO.mapInteger(Thunk.Segment));
+  error(IO.mapInteger(Thunk.Length));
+  error(IO.mapEnum(Thunk.Thunk));
+  error(IO.mapStringZ(Thunk.Name));
+  error(IO.mapByteVectorTail(Thunk.VariantData));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            TrampolineSym &Tramp) {
+
+  error(IO.mapEnum(Tramp.Type));
+  error(IO.mapInteger(Tramp.Size));
+  error(IO.mapInteger(Tramp.ThunkOffset));
+  error(IO.mapInteger(Tramp.TargetOffset));
+  error(IO.mapInteger(Tramp.ThunkSection));
+  error(IO.mapInteger(Tramp.TargetSection));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            SectionSym &Section) {
+  uint8_t Padding = 0;
+
+  error(IO.mapInteger(Section.SectionNumber));
+  error(IO.mapInteger(Section.Alignment));
+  error(IO.mapInteger(Padding));
+  error(IO.mapInteger(Section.Rva));
+  error(IO.mapInteger(Section.Length));
+  error(IO.mapInteger(Section.Characteristics));
+  error(IO.mapStringZ(Section.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            CoffGroupSym &CoffGroup) {
+
+  error(IO.mapInteger(CoffGroup.Size));
+  error(IO.mapInteger(CoffGroup.Characteristics));
+  error(IO.mapInteger(CoffGroup.Offset));
+  error(IO.mapInteger(CoffGroup.Segment));
+  error(IO.mapStringZ(CoffGroup.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            BPRelativeSym &BPRel) {
+
+  error(IO.mapInteger(BPRel.Offset));
+  error(IO.mapInteger(BPRel.Type));
+  error(IO.mapStringZ(BPRel.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            BuildInfoSym &BuildInfo) {
+
+  error(IO.mapInteger(BuildInfo.BuildId));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            CallSiteInfoSym &CallSiteInfo) {
+  uint16_t Padding = 0;
+
+  error(IO.mapInteger(CallSiteInfo.CodeOffset));
+  error(IO.mapInteger(CallSiteInfo.Segment));
+  error(IO.mapInteger(Padding));
+  error(IO.mapInteger(CallSiteInfo.Type));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            EnvBlockSym &EnvBlock) {
+
+  uint8_t Reserved = 0;
+  error(IO.mapInteger(Reserved));
+  error(IO.mapStringZVectorZ(EnvBlock.Fields));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            FileStaticSym &FileStatic) {
+
+  error(IO.mapInteger(FileStatic.Index));
+  error(IO.mapInteger(FileStatic.ModFilenameOffset));
+  error(IO.mapEnum(FileStatic.Flags));
+  error(IO.mapStringZ(FileStatic.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, ExportSym &Export) {
+
+  error(IO.mapInteger(Export.Ordinal));
+  error(IO.mapEnum(Export.Flags));
+  error(IO.mapStringZ(Export.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            Compile2Sym &Compile2) {
+
+  error(IO.mapEnum(Compile2.Flags));
+  error(IO.mapEnum(Compile2.Machine));
+  error(IO.mapInteger(Compile2.VersionFrontendMajor));
+  error(IO.mapInteger(Compile2.VersionFrontendMinor));
+  error(IO.mapInteger(Compile2.VersionFrontendBuild));
+  error(IO.mapInteger(Compile2.VersionBackendMajor));
+  error(IO.mapInteger(Compile2.VersionBackendMinor));
+  error(IO.mapInteger(Compile2.VersionBackendBuild));
+  error(IO.mapStringZ(Compile2.Version));
+  error(IO.mapStringZVectorZ(Compile2.ExtraStrings));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            Compile3Sym &Compile3) {
+
+  error(IO.mapEnum(Compile3.Flags));
+  error(IO.mapEnum(Compile3.Machine));
+  error(IO.mapInteger(Compile3.VersionFrontendMajor));
+  error(IO.mapInteger(Compile3.VersionFrontendMinor));
+  error(IO.mapInteger(Compile3.VersionFrontendBuild));
+  error(IO.mapInteger(Compile3.VersionFrontendQFE));
+  error(IO.mapInteger(Compile3.VersionBackendMajor));
+  error(IO.mapInteger(Compile3.VersionBackendMinor));
+  error(IO.mapInteger(Compile3.VersionBackendBuild));
+  error(IO.mapInteger(Compile3.VersionBackendQFE));
+  error(IO.mapStringZ(Compile3.Version));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            ConstantSym &Constant) {
+
+  error(IO.mapInteger(Constant.Type));
+  error(IO.mapEncodedInteger(Constant.Value));
+  error(IO.mapStringZ(Constant.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, DataSym &Data) {
+
+  error(IO.mapInteger(Data.Type));
+  error(IO.mapInteger(Data.DataOffset));
+  error(IO.mapInteger(Data.Segment));
+  error(IO.mapStringZ(Data.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR, DefRangeFramePointerRelSym &DefRangeFramePointerRel) {
+
+  error(IO.mapInteger(DefRangeFramePointerRel.Offset));
+  error(mapLocalVariableAddrRange(IO, DefRangeFramePointerRel.Range));
+  error(IO.mapVectorTail(DefRangeFramePointerRel.Gaps, MapGap()));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR,
+    DefRangeFramePointerRelFullScopeSym &DefRangeFramePointerRelFullScope) {
+
+  error(IO.mapInteger(DefRangeFramePointerRelFullScope.Offset));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR, DefRangeRegisterRelSym &DefRangeRegisterRel) {
+
+  error(IO.mapObject(DefRangeRegisterRel.Hdr.Register));
+  error(IO.mapObject(DefRangeRegisterRel.Hdr.Flags));
+  error(IO.mapObject(DefRangeRegisterRel.Hdr.BasePointerOffset));
+  error(mapLocalVariableAddrRange(IO, DefRangeRegisterRel.Range));
+  error(IO.mapVectorTail(DefRangeRegisterRel.Gaps, MapGap()));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR, DefRangeRegisterSym &DefRangeRegister) {
+
+  error(IO.mapObject(DefRangeRegister.Hdr.Register));
+  error(IO.mapObject(DefRangeRegister.Hdr.MayHaveNoName));
+  error(mapLocalVariableAddrRange(IO, DefRangeRegister.Range));
+  error(IO.mapVectorTail(DefRangeRegister.Gaps, MapGap()));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR, DefRangeSubfieldRegisterSym &DefRangeSubfieldRegister) {
+
+  error(IO.mapObject(DefRangeSubfieldRegister.Hdr.Register));
+  error(IO.mapObject(DefRangeSubfieldRegister.Hdr.MayHaveNoName));
+  error(IO.mapObject(DefRangeSubfieldRegister.Hdr.OffsetInParent));
+  error(mapLocalVariableAddrRange(IO, DefRangeSubfieldRegister.Range));
+  error(IO.mapVectorTail(DefRangeSubfieldRegister.Gaps, MapGap()));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR, DefRangeSubfieldSym &DefRangeSubfield) {
+
+  error(IO.mapInteger(DefRangeSubfield.Program));
+  error(IO.mapInteger(DefRangeSubfield.OffsetInParent));
+  error(mapLocalVariableAddrRange(IO, DefRangeSubfield.Range));
+  error(IO.mapVectorTail(DefRangeSubfield.Gaps, MapGap()));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            DefRangeSym &DefRange) {
+
+  error(IO.mapInteger(DefRange.Program));
+  error(mapLocalVariableAddrRange(IO, DefRange.Range));
+  error(IO.mapVectorTail(DefRange.Gaps, MapGap()));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            FrameCookieSym &FrameCookie) {
+
+  error(IO.mapInteger(FrameCookie.CodeOffset));
+  error(IO.mapInteger(FrameCookie.Register));
+  error(IO.mapInteger(FrameCookie.CookieKind));
+  error(IO.mapInteger(FrameCookie.Flags));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            FrameProcSym &FrameProc) {
+  error(IO.mapInteger(FrameProc.TotalFrameBytes));
+  error(IO.mapInteger(FrameProc.PaddingFrameBytes));
+  error(IO.mapInteger(FrameProc.OffsetToPadding));
+  error(IO.mapInteger(FrameProc.BytesOfCalleeSavedRegisters));
+  error(IO.mapInteger(FrameProc.OffsetOfExceptionHandler));
+  error(IO.mapInteger(FrameProc.SectionIdOfExceptionHandler));
+  error(IO.mapEnum(FrameProc.Flags));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(
+    CVSymbol &CVR, HeapAllocationSiteSym &HeapAllocSite) {
+
+  error(IO.mapInteger(HeapAllocSite.CodeOffset));
+  error(IO.mapInteger(HeapAllocSite.Segment));
+  error(IO.mapInteger(HeapAllocSite.CallInstructionSize));
+  error(IO.mapInteger(HeapAllocSite.Type));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            InlineSiteSym &InlineSite) {
+
+  error(IO.mapInteger(InlineSite.Parent));
+  error(IO.mapInteger(InlineSite.End));
+  error(IO.mapInteger(InlineSite.Inlinee));
+  error(IO.mapByteVectorTail(InlineSite.AnnotationData));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            RegisterSym &Register) {
+
+  error(IO.mapInteger(Register.Index));
+  error(IO.mapEnum(Register.Register));
+  error(IO.mapStringZ(Register.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            PublicSym32 &Public) {
+
+  error(IO.mapInteger(Public.Index));
+  error(IO.mapInteger(Public.Offset));
+  error(IO.mapInteger(Public.Segment));
+  error(IO.mapStringZ(Public.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            ProcRefSym &ProcRef) {
+
+  error(IO.mapInteger(ProcRef.SumName));
+  error(IO.mapInteger(ProcRef.SymOffset));
+  error(IO.mapInteger(ProcRef.Module));
+  error(IO.mapStringZ(ProcRef.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, LabelSym &Label) {
+
+  error(IO.mapInteger(Label.CodeOffset));
+  error(IO.mapInteger(Label.Segment));
+  error(IO.mapEnum(Label.Flags));
+  error(IO.mapStringZ(Label.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, LocalSym &Local) {
+  error(IO.mapInteger(Local.Type));
+  error(IO.mapEnum(Local.Flags));
+  error(IO.mapStringZ(Local.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            ObjNameSym &ObjName) {
+
+  error(IO.mapInteger(ObjName.Signature));
+  error(IO.mapStringZ(ObjName.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, ProcSym &Proc) {
+  error(IO.mapInteger(Proc.Parent));
+  error(IO.mapInteger(Proc.End));
+  error(IO.mapInteger(Proc.Next));
+  error(IO.mapInteger(Proc.CodeSize));
+  error(IO.mapInteger(Proc.DbgStart));
+  error(IO.mapInteger(Proc.DbgEnd));
+  error(IO.mapInteger(Proc.FunctionType));
+  error(IO.mapInteger(Proc.CodeOffset));
+  error(IO.mapInteger(Proc.Segment));
+  error(IO.mapEnum(Proc.Flags));
+  error(IO.mapStringZ(Proc.Name));
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            ScopeEndSym &ScopeEnd) {
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, CallerSym &Caller) {
+  error(IO.mapVectorN<uint32_t>(
+      Caller.Indices,
+      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            RegRelativeSym &RegRel) {
+
+  error(IO.mapInteger(RegRel.Offset));
+  error(IO.mapInteger(RegRel.Type));
+  error(IO.mapInteger(RegRel.Register));
+  error(IO.mapStringZ(RegRel.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR,
+                                            ThreadLocalDataSym &Data) {
+
+  error(IO.mapInteger(Data.Type));
+  error(IO.mapInteger(Data.DataOffset));
+  error(IO.mapInteger(Data.Segment));
+  error(IO.mapStringZ(Data.Name));
+
+  return Error::success();
+}
+
+Error SymbolRecordMapping::visitKnownRecord(CVSymbol &CVR, UDTSym &UDT) {
+
+  error(IO.mapInteger(UDT.Type));
+  error(IO.mapStringZ(UDT.Name));
+
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumper.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumper.cpp
index 345e2a49888c..4274d834076a 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeDumper.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeDumper.cpp
@@ -10,9 +10,11 @@
 #include "llvm/DebugInfo/CodeView/TypeDumper.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
 #include "llvm/Support/ScopedPrinter.h"
 
 using namespace llvm;
@@ -195,35 +197,68 @@ static StringRef getLeafTypeName(TypeLeafKind LT) {
   case ename:                                                                  \
     return #name;
 #include "llvm/DebugInfo/CodeView/TypeRecords.def"
-  case LF_FIELDLIST:
-    return "FieldList";
   default:
     break;
   }
   return "UnknownLeaf";
 }
 
-Error CVTypeDumper::visitTypeBegin(const CVRecord<TypeLeafKind> &Record) {
+Error CVTypeDumper::visitTypeBegin(CVRecord<TypeLeafKind> &Record) {
+  assert(!IsInFieldList);
   // Reset Name to the empty string. If the visitor sets it, we know it.
   Name = "";
 
-  W->startLine() << getLeafTypeName(Record.Type) << " ("
-                 << HexNumber(getNextTypeIndex()) << ") {\n";
+  W->startLine() << getLeafTypeName(Record.Type);
+  W->getOStream() << " (" << HexNumber(getNextTypeIndex()) << ")";
+  W->getOStream() << " {\n";
   W->indent();
   W->printEnum("TypeLeafKind", unsigned(Record.Type),
                makeArrayRef(LeafTypeNames));
+  if (Record.Type == LF_FIELDLIST) {
+    // Record that we're in a field list so that members do not get assigned
+    // type indices.
+    IsInFieldList = true;
+  }
   return Error::success();
 }
 
-Error CVTypeDumper::visitTypeEnd(const CVRecord<TypeLeafKind> &Record) {
-  if (Record.Type == LF_FIELDLIST)
-    Name = "<field list>";
+Error CVTypeDumper::visitTypeEnd(CVRecord<TypeLeafKind> &Record) {
+  if (Record.Type == LF_FIELDLIST) {
+    assert(IsInFieldList);
+    IsInFieldList = false;
+  }
+  assert(!IsInFieldList);
 
-  // Always record some name for every type, even if Name is empty. CVUDTNames
-  // is indexed by type index, and must have one entry for every type.
+  // Record every type that is not a field list member, even if Name is empty.
+  // CVUDTNames is indexed by type index, and must have one entry for every
+  // type.  Field list members are not recorded, and are only referenced by
+  // their containing field list record.
   recordType(Name);
 
   if (PrintRecordBytes)
+    W->printBinaryBlock("LeafData", getBytesAsCharacters(Record.content()));
+
+  W->unindent();
+  W->startLine() << "}\n";
+  return Error::success();
+}
+
+Error CVTypeDumper::visitMemberBegin(CVMemberRecord &Record) {
+  assert(IsInFieldList);
+  // Reset Name to the empty string. If the visitor sets it, we know it.
+  Name = "";
+
+  W->startLine() << getLeafTypeName(Record.Kind);
+  W->getOStream() << " {\n";
+  W->indent();
+  W->printEnum("TypeLeafKind", unsigned(Record.Kind),
+               makeArrayRef(LeafTypeNames));
+  return Error::success();
+}
+
+Error CVTypeDumper::visitMemberEnd(CVMemberRecord &Record) {
+  assert(IsInFieldList);
+  if (PrintRecordBytes)
     W->printBinaryBlock("LeafData", getBytesAsCharacters(Record.Data));
 
   W->unindent();
@@ -231,7 +266,18 @@ Error CVTypeDumper::visitTypeEnd(const CVRecord<TypeLeafKind> &Record) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitStringId(StringIdRecord &String) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     FieldListRecord &FieldList) {
+  CVTypeVisitor Visitor(*this);
+  if (auto EC = Visitor.visitFieldListMemberStream(FieldList.Data))
+    return EC;
+
+  Name = "<field list>";
+  return Error::success();
+}
+
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     StringIdRecord &String) {
   printTypeIndex("Id", String.getId());
   W->printString("StringData", String.getString());
   // Put this in CVUDTNames so it gets printed with LF_UDT_SRC_LINE.
@@ -239,7 +285,8 @@ Error CVTypeDumper::visitStringId(StringIdRecord &String) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitArgList(ArgListRecord &Args) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     ArgListRecord &Args) {
   auto Indices = Args.getIndices();
   uint32_t Size = Indices.size();
   W->printNumber("NumArgs", Size);
@@ -257,7 +304,8 @@ Error CVTypeDumper::visitArgList(ArgListRecord &Args) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitClass(ClassRecord &Class) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     ClassRecord &Class) {
   uint16_t Props = static_cast<uint16_t>(Class.getOptions());
   W->printNumber("MemberCount", Class.getMemberCount());
   W->printFlags("Properties", Props, makeArrayRef(ClassOptionNames));
@@ -272,7 +320,8 @@ Error CVTypeDumper::visitClass(ClassRecord &Class) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitUnion(UnionRecord &Union) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     UnionRecord &Union) {
   uint16_t Props = static_cast<uint16_t>(Union.getOptions());
   W->printNumber("MemberCount", Union.getMemberCount());
   W->printFlags("Properties", Props, makeArrayRef(ClassOptionNames));
@@ -285,7 +334,8 @@ Error CVTypeDumper::visitUnion(UnionRecord &Union) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitEnum(EnumRecord &Enum) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     EnumRecord &Enum) {
   uint16_t Props = static_cast<uint16_t>(Enum.getOptions());
   W->printNumber("NumEnumerators", Enum.getMemberCount());
   W->printFlags("Properties", uint16_t(Enum.getOptions()),
@@ -299,7 +349,8 @@ Error CVTypeDumper::visitEnum(EnumRecord &Enum) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitArray(ArrayRecord &AT) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     ArrayRecord &AT) {
   printTypeIndex("ElementType", AT.getElementType());
   printTypeIndex("IndexType", AT.getIndexType());
   W->printNumber("SizeOf", AT.getSize());
@@ -308,7 +359,8 @@ Error CVTypeDumper::visitArray(ArrayRecord &AT) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitVFTable(VFTableRecord &VFT) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     VFTableRecord &VFT) {
   printTypeIndex("CompleteClass", VFT.getCompleteClass());
   printTypeIndex("OverriddenVFTable", VFT.getOverriddenVTable());
   W->printHex("VFPtrOffset", VFT.getVFPtrOffset());
@@ -319,7 +371,8 @@ Error CVTypeDumper::visitVFTable(VFTableRecord &VFT) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitMemberFuncId(MemberFuncIdRecord &Id) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     MemberFuncIdRecord &Id) {
   printTypeIndex("ClassType", Id.getClassType());
   printTypeIndex("FunctionType", Id.getFunctionType());
   W->printString("Name", Id.getName());
@@ -327,7 +380,8 @@ Error CVTypeDumper::visitMemberFuncId(MemberFuncIdRecord &Id) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitProcedure(ProcedureRecord &Proc) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     ProcedureRecord &Proc) {
   printTypeIndex("ReturnType", Proc.getReturnType());
   W->printEnum("CallingConvention", uint8_t(Proc.getCallConv()),
                makeArrayRef(CallingConventions));
@@ -345,7 +399,8 @@ Error CVTypeDumper::visitProcedure(ProcedureRecord &Proc) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitMemberFunction(MemberFunctionRecord &MF) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     MemberFunctionRecord &MF) {
   printTypeIndex("ReturnType", MF.getReturnType());
   printTypeIndex("ClassType", MF.getClassType());
   printTypeIndex("ThisType", MF.getThisType());
@@ -369,11 +424,11 @@ Error CVTypeDumper::visitMemberFunction(MemberFunctionRecord &MF) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitMethodOverloadList(
-    MethodOverloadListRecord &MethodList) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     MethodOverloadListRecord &MethodList) {
   for (auto &M : MethodList.getMethods()) {
     ListScope S(*W, "Method");
-    printMemberAttributes(M.getAccess(), M.getKind(), M.getOptions());
+    printMemberAttributes(M.getAccess(), M.getMethodKind(), M.getOptions());
     printTypeIndex("Type", M.getType());
     if (M.isIntroducingVirtual())
       W->printHex("VFTableOffset", M.getVFTableOffset());
@@ -381,7 +436,8 @@ Error CVTypeDumper::visitMethodOverloadList(
   return Error::success();
 }
 
-Error CVTypeDumper::visitFuncId(FuncIdRecord &Func) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     FuncIdRecord &Func) {
   printTypeIndex("ParentScope", Func.getParentScope());
   printTypeIndex("FunctionType", Func.getFunctionType());
   W->printString("Name", Func.getName());
@@ -389,7 +445,8 @@ Error CVTypeDumper::visitFuncId(FuncIdRecord &Func) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitTypeServer2(TypeServer2Record &TS) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     TypeServer2Record &TS) {
   W->printBinary("Signature", TS.getGuid());
   W->printNumber("Age", TS.getAge());
   W->printString("Name", TS.getName());
@@ -397,7 +454,8 @@ Error CVTypeDumper::visitTypeServer2(TypeServer2Record &TS) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitPointer(PointerRecord &Ptr) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     PointerRecord &Ptr) {
   printTypeIndex("PointeeType", Ptr.getReferentType());
   W->printHex("PointerAttributes", uint32_t(Ptr.getOptions()));
   W->printEnum("PtrType", unsigned(Ptr.getPointerKind()),
@@ -448,7 +506,8 @@ Error CVTypeDumper::visitPointer(PointerRecord &Ptr) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitModifier(ModifierRecord &Mod) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     ModifierRecord &Mod) {
   uint16_t Mods = static_cast<uint16_t>(Mod.getModifiers());
   printTypeIndex("ModifiedType", Mod.getModifiedType());
   W->printFlags("Modifiers", Mods, makeArrayRef(TypeModifierNames));
@@ -466,26 +525,31 @@ Error CVTypeDumper::visitModifier(ModifierRecord &Mod) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitBitField(BitFieldRecord &BitField) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     BitFieldRecord &BitField) {
   printTypeIndex("Type", BitField.getType());
   W->printNumber("BitSize", BitField.getBitSize());
   W->printNumber("BitOffset", BitField.getBitOffset());
   return Error::success();
 }
 
-Error CVTypeDumper::visitVFTableShape(VFTableShapeRecord &Shape) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     VFTableShapeRecord &Shape) {
   W->printNumber("VFEntryCount", Shape.getEntryCount());
+  Name = saveName("<vftable " + utostr(Shape.getEntryCount()) + " methods>");
   return Error::success();
 }
 
-Error CVTypeDumper::visitUdtSourceLine(UdtSourceLineRecord &Line) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     UdtSourceLineRecord &Line) {
   printTypeIndex("UDT", Line.getUDT());
   printTypeIndex("SourceFile", Line.getSourceFile());
   W->printNumber("LineNumber", Line.getLineNumber());
   return Error::success();
 }
 
-Error CVTypeDumper::visitUdtModSourceLine(UdtModSourceLineRecord &Line) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     UdtModSourceLineRecord &Line) {
   printTypeIndex("UDT", Line.getUDT());
   printTypeIndex("SourceFile", Line.getSourceFile());
   W->printNumber("LineNumber", Line.getLineNumber());
@@ -493,7 +557,8 @@ Error CVTypeDumper::visitUdtModSourceLine(UdtModSourceLineRecord &Line) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitBuildInfo(BuildInfoRecord &Args) {
+Error CVTypeDumper::visitKnownRecord(CVRecord<TypeLeafKind> &CVR,
+                                     BuildInfoRecord &Args) {
   W->printNumber("NumArgs", static_cast<uint32_t>(Args.getArgs().size()));
 
   ListScope Arguments(*W, "Arguments");
@@ -521,29 +586,28 @@ void CVTypeDumper::printMemberAttributes(MemberAccess Access, MethodKind Kind,
   }
 }
 
-Error CVTypeDumper::visitUnknownMember(const CVRecord<TypeLeafKind> &Record) {
-  W->printHex("UnknownMember", unsigned(Record.Type));
+Error CVTypeDumper::visitUnknownMember(CVMemberRecord &Record) {
+  W->printHex("UnknownMember", unsigned(Record.Kind));
   return Error::success();
 }
 
-Error CVTypeDumper::visitUnknownType(const CVRecord<TypeLeafKind> &Record) {
-  DictScope S(*W, "UnknownType");
-  W->printEnum("Kind", uint16_t(Record.Type), makeArrayRef(LeafTypeNames));
-  W->printNumber("Length", uint32_t(Record.Data.size()));
+Error CVTypeDumper::visitUnknownType(CVRecord<TypeLeafKind> &Record) {
+  W->printEnum("Kind", uint16_t(Record.kind()), makeArrayRef(LeafTypeNames));
+  W->printNumber("Length", uint32_t(Record.content().size()));
   return Error::success();
 }
 
-Error CVTypeDumper::visitNestedType(NestedTypeRecord &Nested) {
-  DictScope S(*W, "NestedType");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     NestedTypeRecord &Nested) {
   printTypeIndex("Type", Nested.getNestedType());
   W->printString("Name", Nested.getName());
   Name = Nested.getName();
   return Error::success();
 }
 
-Error CVTypeDumper::visitOneMethod(OneMethodRecord &Method) {
-  DictScope S(*W, "OneMethod");
-  MethodKind K = Method.getKind();
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     OneMethodRecord &Method) {
+  MethodKind K = Method.getMethodKind();
   printMemberAttributes(Method.getAccess(), K, Method.getOptions());
   printTypeIndex("Type", Method.getType());
   // If virtual, then read the vftable offset.
@@ -554,8 +618,8 @@ Error CVTypeDumper::visitOneMethod(OneMethodRecord &Method) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitOverloadedMethod(OverloadedMethodRecord &Method) {
-  DictScope S(*W, "OverloadedMethod");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     OverloadedMethodRecord &Method) {
   W->printHex("MethodCount", Method.getNumOverloads());
   printTypeIndex("MethodListIndex", Method.getMethodList());
   W->printString("Name", Method.getName());
@@ -563,8 +627,8 @@ Error CVTypeDumper::visitOverloadedMethod(OverloadedMethodRecord &Method) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitDataMember(DataMemberRecord &Field) {
-  DictScope S(*W, "DataMember");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     DataMemberRecord &Field) {
   printMemberAttributes(Field.getAccess(), MethodKind::Vanilla,
                         MethodOptions::None);
   printTypeIndex("Type", Field.getType());
@@ -574,8 +638,8 @@ Error CVTypeDumper::visitDataMember(DataMemberRecord &Field) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitStaticDataMember(StaticDataMemberRecord &Field) {
-  DictScope S(*W, "StaticDataMember");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     StaticDataMemberRecord &Field) {
   printMemberAttributes(Field.getAccess(), MethodKind::Vanilla,
                         MethodOptions::None);
   printTypeIndex("Type", Field.getType());
@@ -584,14 +648,14 @@ Error CVTypeDumper::visitStaticDataMember(StaticDataMemberRecord &Field) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitVFPtr(VFPtrRecord &VFTable) {
-  DictScope S(*W, "VFPtr");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     VFPtrRecord &VFTable) {
   printTypeIndex("Type", VFTable.getType());
   return Error::success();
 }
 
-Error CVTypeDumper::visitEnumerator(EnumeratorRecord &Enum) {
-  DictScope S(*W, "Enumerator");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     EnumeratorRecord &Enum) {
   printMemberAttributes(Enum.getAccess(), MethodKind::Vanilla,
                         MethodOptions::None);
   W->printNumber("EnumValue", Enum.getValue());
@@ -600,8 +664,8 @@ Error CVTypeDumper::visitEnumerator(EnumeratorRecord &Enum) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitBaseClass(BaseClassRecord &Base) {
-  DictScope S(*W, "BaseClass");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     BaseClassRecord &Base) {
   printMemberAttributes(Base.getAccess(), MethodKind::Vanilla,
                         MethodOptions::None);
   printTypeIndex("BaseType", Base.getBaseType());
@@ -609,8 +673,8 @@ Error CVTypeDumper::visitBaseClass(BaseClassRecord &Base) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitVirtualBaseClass(VirtualBaseClassRecord &Base) {
-  DictScope S(*W, "VirtualBaseClass");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     VirtualBaseClassRecord &Base) {
   printMemberAttributes(Base.getAccess(), MethodKind::Vanilla,
                         MethodOptions::None);
   printTypeIndex("BaseType", Base.getBaseType());
@@ -620,8 +684,8 @@ Error CVTypeDumper::visitVirtualBaseClass(VirtualBaseClassRecord &Base) {
   return Error::success();
 }
 
-Error CVTypeDumper::visitListContinuation(ListContinuationRecord &Cont) {
-  DictScope S(*W, "ListContinuation");
+Error CVTypeDumper::visitKnownMember(CVMemberRecord &CVR,
+                                     ListContinuationRecord &Cont) {
   printTypeIndex("ContinuationIndex", Cont.getContinuationIndex());
   return Error::success();
 }
@@ -665,25 +729,37 @@ void CVTypeDumper::printTypeIndex(StringRef FieldName, TypeIndex TI) {
 
 Error CVTypeDumper::dump(const CVRecord<TypeLeafKind> &Record) {
   assert(W && "printer should not be null");
-  CVTypeVisitor Visitor(*this);
+  TypeDeserializer Deserializer;
+  TypeVisitorCallbackPipeline Pipeline;
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(*this);
+
+  CVTypeVisitor Visitor(Pipeline);
 
-  if (auto EC = Visitor.visitTypeRecord(Record))
+  CVRecord<TypeLeafKind> RecordCopy = Record;
+  if (auto EC = Visitor.visitTypeRecord(RecordCopy))
     return EC;
   return Error::success();
 }
 
 Error CVTypeDumper::dump(const CVTypeArray &Types) {
   assert(W && "printer should not be null");
-  CVTypeVisitor Visitor(*this);
+  TypeDeserializer Deserializer;
+  TypeVisitorCallbackPipeline Pipeline;
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(*this);
+
+  CVTypeVisitor Visitor(Pipeline);
+
   if (auto EC = Visitor.visitTypeStream(Types))
     return EC;
   return Error::success();
 }
 
 Error CVTypeDumper::dump(ArrayRef<uint8_t> Data) {
-  ByteStream<> Stream(Data);
+  msf::ByteStream Stream(Data);
   CVTypeArray Types;
-  StreamReader Reader(Stream);
+  msf::StreamReader Reader(Stream);
   if (auto EC = Reader.readArray(Types, Reader.getLength()))
     return EC;
 
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecord.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecord.cpp
index f63371e8c14f..b951c068ca86 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecord.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecord.cpp
@@ -8,374 +8,15 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
 
 //===----------------------------------------------------------------------===//
-// Type record deserialization
-//===----------------------------------------------------------------------===//
-
-ErrorOr<MemberPointerInfo>
-MemberPointerInfo::deserialize(ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  if (auto EC = consumeObject(Data, L))
-    return EC;
-
-  TypeIndex T = L->ClassType;
-  uint16_t R = L->Representation;
-  PointerToMemberRepresentation PMR =
-      static_cast<PointerToMemberRepresentation>(R);
-  return MemberPointerInfo(T, PMR);
-}
-
-ErrorOr<ModifierRecord> ModifierRecord::deserialize(TypeRecordKind Kind,
-                                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  if (auto EC = consumeObject(Data, L))
-    return EC;
-
-  TypeIndex M = L->ModifiedType;
-  uint16_t O = L->Modifiers;
-  ModifierOptions MO = static_cast<ModifierOptions>(O);
-  return ModifierRecord(M, MO);
-}
-
-ErrorOr<ProcedureRecord> ProcedureRecord::deserialize(TypeRecordKind Kind,
-                                                      ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  if (auto EC = consumeObject(Data, L))
-    return EC;
-  return ProcedureRecord(L->ReturnType, L->CallConv, L->Options,
-                         L->NumParameters, L->ArgListType);
-}
-
-ErrorOr<MemberFunctionRecord>
-MemberFunctionRecord::deserialize(TypeRecordKind Kind,
-                                  ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  CV_DESERIALIZE(Data, L);
-  return MemberFunctionRecord(L->ReturnType, L->ClassType, L->ThisType,
-                              L->CallConv, L->Options, L->NumParameters,
-                              L->ArgListType, L->ThisAdjustment);
-}
-
-ErrorOr<MemberFuncIdRecord>
-MemberFuncIdRecord::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return MemberFuncIdRecord(L->ClassType, L->FunctionType, Name);
-}
-
-ErrorOr<ArgListRecord> ArgListRecord::deserialize(TypeRecordKind Kind,
-                                                  ArrayRef<uint8_t> &Data) {
-  if (Kind != TypeRecordKind::StringList && Kind != TypeRecordKind::ArgList)
-    return std::make_error_code(std::errc::illegal_byte_sequence);
-
-  const Layout *L = nullptr;
-  ArrayRef<TypeIndex> Indices;
-  CV_DESERIALIZE(Data, L, CV_ARRAY_FIELD_N(Indices, L->NumArgs));
-  return ArgListRecord(Kind, Indices);
-}
-
-ErrorOr<PointerRecord> PointerRecord::deserialize(TypeRecordKind Kind,
-                                                  ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  if (auto EC = consumeObject(Data, L))
-    return EC;
-
-  PointerKind PtrKind = L->getPtrKind();
-  PointerMode Mode = L->getPtrMode();
-  uint32_t Opts = L->Attrs;
-  PointerOptions Options = static_cast<PointerOptions>(Opts);
-  uint8_t Size = L->getPtrSize();
-
-  if (L->isPointerToMember()) {
-    auto E = MemberPointerInfo::deserialize(Data);
-    if (E.getError())
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-    return PointerRecord(L->PointeeType, PtrKind, Mode, Options, Size, *E);
-  }
-
-  return PointerRecord(L->PointeeType, PtrKind, Mode, Options, Size);
-}
-
-ErrorOr<NestedTypeRecord>
-NestedTypeRecord::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return NestedTypeRecord(L->Type, Name);
-}
-
-ErrorOr<ArrayRecord> ArrayRecord::deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  uint64_t Size;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Size), Name);
-  return ArrayRecord(L->ElementType, L->IndexType, Size, Name);
-}
-
-ErrorOr<ClassRecord> ClassRecord::deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data) {
-  uint64_t Size = 0;
-  StringRef Name;
-  StringRef UniqueName;
-  uint16_t Props;
-  const Layout *L = nullptr;
-
-  CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Size), Name,
-                 CV_CONDITIONAL_FIELD(UniqueName, L->hasUniqueName()));
-
-  Props = L->Properties;
-  uint16_t WrtValue = (Props & WinRTKindMask) >> WinRTKindShift;
-  WindowsRTClassKind WRT = static_cast<WindowsRTClassKind>(WrtValue);
-  uint16_t HfaMask = (Props & HfaKindMask) >> HfaKindShift;
-  HfaKind Hfa = static_cast<HfaKind>(HfaMask);
-
-  ClassOptions Options = static_cast<ClassOptions>(Props);
-  return ClassRecord(Kind, L->MemberCount, Options, Hfa, WRT, L->FieldList,
-                     L->DerivedFrom, L->VShape, Size, Name, UniqueName);
-}
-
-ErrorOr<UnionRecord> UnionRecord::deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data) {
-  uint64_t Size = 0;
-  StringRef Name;
-  StringRef UniqueName;
-  uint16_t Props;
-
-  const Layout *L = nullptr;
-  CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Size), Name,
-                 CV_CONDITIONAL_FIELD(UniqueName, L->hasUniqueName()));
-
-  Props = L->Properties;
-
-  uint16_t HfaMask = (Props & HfaKindMask) >> HfaKindShift;
-  HfaKind Hfa = static_cast<HfaKind>(HfaMask);
-  ClassOptions Options = static_cast<ClassOptions>(Props);
-  return UnionRecord(L->MemberCount, Options, Hfa, L->FieldList, Size, Name,
-                     UniqueName);
-}
-
-ErrorOr<EnumRecord> EnumRecord::deserialize(TypeRecordKind Kind,
-                                            ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  StringRef UniqueName;
-  CV_DESERIALIZE(Data, L, Name,
-                 CV_CONDITIONAL_FIELD(UniqueName, L->hasUniqueName()));
-
-  uint16_t P = L->Properties;
-  ClassOptions Options = static_cast<ClassOptions>(P);
-  return EnumRecord(L->NumEnumerators, Options, L->FieldListType, Name,
-                    UniqueName, L->UnderlyingType);
-}
-
-ErrorOr<BitFieldRecord> BitFieldRecord::deserialize(TypeRecordKind Kind,
-                                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  CV_DESERIALIZE(Data, L);
-  return BitFieldRecord(L->Type, L->BitSize, L->BitOffset);
-}
-
-ErrorOr<VFTableShapeRecord>
-VFTableShapeRecord::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  if (auto EC = consumeObject(Data, L))
-    return EC;
-
-  std::vector<VFTableSlotKind> Slots;
-  uint16_t Count = L->VFEntryCount;
-  while (Count > 0) {
-    if (Data.empty())
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-
-    // Process up to 2 nibbles at a time (if there are at least 2 remaining)
-    uint8_t Value = Data[0] & 0x0F;
-    Slots.push_back(static_cast<VFTableSlotKind>(Value));
-    if (--Count > 0) {
-      Value = (Data[0] & 0xF0) >> 4;
-      Slots.push_back(static_cast<VFTableSlotKind>(Value));
-      --Count;
-    }
-    Data = Data.slice(1);
-  }
-
-  return VFTableShapeRecord(Slots);
-}
-
-ErrorOr<TypeServer2Record>
-TypeServer2Record::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return TypeServer2Record(StringRef(L->Guid, 16), L->Age, Name);
-}
-
-ErrorOr<StringIdRecord> StringIdRecord::deserialize(TypeRecordKind Kind,
-                                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return StringIdRecord(L->id, Name);
-}
-
-ErrorOr<FuncIdRecord> FuncIdRecord::deserialize(TypeRecordKind Kind,
-                                                ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return FuncIdRecord(L->ParentScope, L->FunctionType, Name);
-}
-
-ErrorOr<UdtSourceLineRecord>
-UdtSourceLineRecord::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  CV_DESERIALIZE(Data, L);
-  return UdtSourceLineRecord(L->UDT, L->SourceFile, L->LineNumber);
-}
-
-ErrorOr<BuildInfoRecord> BuildInfoRecord::deserialize(TypeRecordKind Kind,
-                                                      ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  ArrayRef<TypeIndex> Indices;
-  CV_DESERIALIZE(Data, L, CV_ARRAY_FIELD_N(Indices, L->NumArgs));
-  return BuildInfoRecord(Indices);
-}
-
-ErrorOr<VFTableRecord> VFTableRecord::deserialize(TypeRecordKind Kind,
-                                                  ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  std::vector<StringRef> Names;
-  CV_DESERIALIZE(Data, L, Name, CV_ARRAY_FIELD_TAIL(Names));
-  return VFTableRecord(L->CompleteClass, L->OverriddenVFTable, L->VFPtrOffset,
-                       Name, Names);
-}
-
-ErrorOr<OneMethodRecord> OneMethodRecord::deserialize(TypeRecordKind Kind,
-                                                      ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  int32_t VFTableOffset = -1;
-
-  CV_DESERIALIZE(Data, L, CV_CONDITIONAL_FIELD(VFTableOffset,
-                                               L->Attrs.isIntroducedVirtual()),
-                 Name);
-
-  MethodOptions Options = L->Attrs.getFlags();
-  MethodKind MethKind = L->Attrs.getMethodKind();
-  MemberAccess Access = L->Attrs.getAccess();
-  OneMethodRecord Method(L->Type, MethKind, Options, Access, VFTableOffset,
-                         Name);
-  // Validate the vftable offset.
-  if (Method.isIntroducingVirtual() && Method.getVFTableOffset() < 0)
-    return std::make_error_code(std::errc::illegal_byte_sequence);
-  return Method;
-}
-
-ErrorOr<MethodOverloadListRecord>
-MethodOverloadListRecord::deserialize(TypeRecordKind Kind,
-                                      ArrayRef<uint8_t> &Data) {
-  std::vector<OneMethodRecord> Methods;
-  while (!Data.empty()) {
-    const Layout *L = nullptr;
-    int32_t VFTableOffset = -1;
-    CV_DESERIALIZE(Data, L, CV_CONDITIONAL_FIELD(
-                                VFTableOffset, L->Attrs.isIntroducedVirtual()));
-
-    MethodOptions Options = L->Attrs.getFlags();
-    MethodKind MethKind = L->Attrs.getMethodKind();
-    MemberAccess Access = L->Attrs.getAccess();
-
-    Methods.emplace_back(L->Type, MethKind, Options, Access, VFTableOffset,
-                         StringRef());
-
-    // Validate the vftable offset.
-    auto &Method = Methods.back();
-    if (Method.isIntroducingVirtual() && Method.getVFTableOffset() < 0)
-      return std::make_error_code(std::errc::illegal_byte_sequence);
-  }
-  return MethodOverloadListRecord(Methods);
-}
-
-ErrorOr<OverloadedMethodRecord>
-OverloadedMethodRecord::deserialize(TypeRecordKind Kind,
-                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return OverloadedMethodRecord(L->MethodCount, L->MethList, Name);
-}
-
-ErrorOr<DataMemberRecord>
-DataMemberRecord::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  uint64_t Offset;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Offset), Name);
-  return DataMemberRecord(L->Attrs.getAccess(), L->Type, Offset, Name);
-}
-
-ErrorOr<StaticDataMemberRecord>
-StaticDataMemberRecord::deserialize(TypeRecordKind Kind,
-                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Name);
-  return StaticDataMemberRecord(L->Attrs.getAccess(), L->Type, Name);
-}
-
-ErrorOr<EnumeratorRecord>
-EnumeratorRecord::deserialize(TypeRecordKind Kind, ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  APSInt Value;
-  StringRef Name;
-  CV_DESERIALIZE(Data, L, Value, Name);
-  return EnumeratorRecord(L->Attrs.getAccess(), Value, Name);
-}
-
-ErrorOr<VFPtrRecord> VFPtrRecord::deserialize(TypeRecordKind Kind,
-                                              ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  if (auto EC = consumeObject(Data, L))
-    return EC;
-  return VFPtrRecord(L->Type);
-}
-
-ErrorOr<BaseClassRecord> BaseClassRecord::deserialize(TypeRecordKind Kind,
-                                                      ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  uint64_t Offset;
-  CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Offset));
-  return BaseClassRecord(L->Attrs.getAccess(), L->BaseType, Offset);
-}
-
-ErrorOr<VirtualBaseClassRecord>
-VirtualBaseClassRecord::deserialize(TypeRecordKind Kind,
-                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  uint64_t Offset;
-  uint64_t Index;
-  CV_DESERIALIZE(Data, L, CV_NUMERIC_FIELD(Offset), CV_NUMERIC_FIELD(Index));
-  return VirtualBaseClassRecord(L->Attrs.getAccess(), L->BaseType, L->VBPtrType,
-                                Offset, Index);
-}
-
-ErrorOr<ListContinuationRecord>
-ListContinuationRecord::deserialize(TypeRecordKind Kind,
-                                    ArrayRef<uint8_t> &Data) {
-  const Layout *L = nullptr;
-  CV_DESERIALIZE(Data, L);
-  return ListContinuationRecord(L->ContinuationIndex);
-}
-
-//===----------------------------------------------------------------------===//
 // Type index remapping
 //===----------------------------------------------------------------------===//
 
@@ -437,7 +78,7 @@ bool PointerRecord::remapTypeIndices(ArrayRef<TypeIndex> IndexMap) {
   bool Success = true;
   Success &= remapIndex(IndexMap, ReferentType);
   if (isPointerToMember())
-    Success &= MemberInfo.remapTypeIndices(IndexMap);
+    Success &= MemberInfo->remapTypeIndices(IndexMap);
   return Success;
 }
 
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
deleted file mode 100644
index 112612cc85ea..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordBuilder.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-//===-- TypeRecordBuilder.cpp ---------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
-
-using namespace llvm;
-using namespace codeview;
-
-TypeRecordBuilder::TypeRecordBuilder(TypeRecordKind Kind)
-    : Stream(Buffer), Writer(Stream) {
-  writeTypeRecordKind(Kind);
-}
-
-StringRef TypeRecordBuilder::str() {
-  return StringRef(Buffer.data(), Buffer.size());
-}
-
-void TypeRecordBuilder::writeUInt8(uint8_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeInt16(int16_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeUInt16(uint16_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeInt32(int32_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeUInt32(uint32_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeInt64(int64_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeUInt64(uint64_t Value) {
-  Writer.write(Value);
-}
-
-void TypeRecordBuilder::writeEncodedInteger(int64_t Value) {
-  if (Value >= 0) {
-    writeEncodedUnsignedInteger(static_cast<uint64_t>(Value));
-  } else {
-    writeEncodedSignedInteger(Value);
-  }
-}
-
-void TypeRecordBuilder::writeEncodedSignedInteger(int64_t Value) {
-  if (Value >= std::numeric_limits<int8_t>::min() &&
-      Value <= std::numeric_limits<int8_t>::max()) {
-    writeUInt16(LF_CHAR);
-    writeInt16(static_cast<int8_t>(Value));
-  } else if (Value >= std::numeric_limits<int16_t>::min() &&
-             Value <= std::numeric_limits<int16_t>::max()) {
-    writeUInt16(LF_SHORT);
-    writeInt16(static_cast<int16_t>(Value));
-  } else if (Value >= std::numeric_limits<int32_t>::min() &&
-             Value <= std::numeric_limits<int32_t>::max()) {
-    writeUInt16(LF_LONG);
-    writeInt32(static_cast<int32_t>(Value));
-  } else {
-    writeUInt16(LF_QUADWORD);
-    writeInt64(Value);
-  }
-}
-
-void TypeRecordBuilder::writeEncodedUnsignedInteger(uint64_t Value) {
-  if (Value < LF_CHAR) {
-    writeUInt16(static_cast<uint16_t>(Value));
-  } else if (Value <= std::numeric_limits<uint16_t>::max()) {
-    writeUInt16(LF_USHORT);
-    writeUInt16(static_cast<uint16_t>(Value));
-  } else if (Value <= std::numeric_limits<uint32_t>::max()) {
-    writeUInt16(LF_ULONG);
-    writeUInt32(static_cast<uint32_t>(Value));
-  } else {
-    writeUInt16(LF_UQUADWORD);
-    writeUInt64(Value);
-  }
-}
-
-void TypeRecordBuilder::writeNullTerminatedString(StringRef Value) {
-  // Microsoft's linker seems to have trouble with symbol names longer than
-  // 0xffd8 bytes.
-  Value = Value.substr(0, 0xffd8);
-  Stream.write(Value.data(), Value.size());
-  writeUInt8(0);
-}
-
-void TypeRecordBuilder::writeGuid(StringRef Guid) {
-  assert(Guid.size() == 16);
-  Stream.write(Guid.data(), 16);
-}
-
-void TypeRecordBuilder::writeTypeIndex(TypeIndex TypeInd) {
-  writeUInt32(TypeInd.getIndex());
-}
-
-void TypeRecordBuilder::writeTypeRecordKind(TypeRecordKind Kind) {
-  writeUInt16(static_cast<uint16_t>(Kind));
-}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
new file mode 100644
index 000000000000..f46e08d55429
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeRecordMapping.cpp
@@ -0,0 +1,467 @@
+//===- TypeRecordMapping.cpp ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeRecordMapping.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+#define error(X)                                                               \
+  if (auto EC = X)                                                             \
+    return EC;
+
+namespace {
+struct MapOneMethodRecord {
+  explicit MapOneMethodRecord(bool IsFromOverloadList)
+      : IsFromOverloadList(IsFromOverloadList) {}
+
+  Error operator()(CodeViewRecordIO &IO, OneMethodRecord &Method) const {
+    error(IO.mapInteger(Method.Attrs.Attrs));
+    if (IsFromOverloadList) {
+      uint16_t Padding = 0;
+      error(IO.mapInteger(Padding));
+    }
+    error(IO.mapInteger(Method.Type));
+    if (Method.isIntroducingVirtual()) {
+      error(IO.mapInteger(Method.VFTableOffset));
+    } else if (!IO.isWriting())
+      Method.VFTableOffset = -1;
+
+    if (!IsFromOverloadList)
+      error(IO.mapStringZ(Method.Name));
+
+    return Error::success();
+  }
+
+private:
+  bool IsFromOverloadList;
+};
+}
+
+static Error mapNameAndUniqueName(CodeViewRecordIO &IO, StringRef &Name,
+                                  StringRef &UniqueName, bool HasUniqueName) {
+  if (IO.isWriting()) {
+    // Try to be smart about what we write here.  We can't write anything too
+    // large, so if we're going to go over the limit, truncate both the name
+    // and unique name by the same amount.
+    size_t BytesLeft = IO.maxFieldLength();
+    if (HasUniqueName) {
+      size_t BytesNeeded = Name.size() + UniqueName.size() + 2;
+      StringRef N = Name;
+      StringRef U = UniqueName;
+      if (BytesNeeded > BytesLeft) {
+        size_t BytesToDrop = (BytesNeeded - BytesLeft);
+        size_t DropN = std::min(N.size(), BytesToDrop / 2);
+        size_t DropU = std::min(U.size(), BytesToDrop - DropN);
+
+        N = N.drop_back(DropN);
+        U = U.drop_back(DropU);
+      }
+
+      error(IO.mapStringZ(N));
+      error(IO.mapStringZ(U));
+    } else {
+      size_t BytesNeeded = Name.size() + 1;
+      StringRef N = Name;
+      if (BytesNeeded > BytesLeft) {
+        size_t BytesToDrop = std::min(N.size(), BytesToDrop);
+        N = N.drop_back(BytesToDrop);
+      }
+      error(IO.mapStringZ(N));
+    }
+  } else {
+    error(IO.mapStringZ(Name));
+    if (HasUniqueName)
+      error(IO.mapStringZ(UniqueName));
+  }
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitTypeBegin(CVType &CVR) {
+  assert(!TypeKind.hasValue() && "Already in a type mapping!");
+  assert(!MemberKind.hasValue() && "Already in a member mapping!");
+
+  // FieldList and MethodList records can be any length because they can be
+  // split with continuation records.  All other record types cannot be
+  // longer than the maximum record length.
+  Optional<uint32_t> MaxLen;
+  if (CVR.Type != TypeLeafKind::LF_FIELDLIST &&
+      CVR.Type != TypeLeafKind::LF_METHODLIST)
+    MaxLen = MaxRecordLength - sizeof(RecordPrefix);
+  error(IO.beginRecord(MaxLen));
+  TypeKind = CVR.Type;
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitTypeEnd(CVType &Record) {
+  assert(TypeKind.hasValue() && "Not in a type mapping!");
+  assert(!MemberKind.hasValue() && "Still in a member mapping!");
+
+  error(IO.endRecord());
+
+  TypeKind.reset();
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitMemberBegin(CVMemberRecord &Record) {
+  assert(TypeKind.hasValue() && "Not in a type mapping!");
+  assert(!MemberKind.hasValue() && "Already in a member mapping!");
+
+  // The largest possible subrecord is one in which there is a record prefix,
+  // followed by the subrecord, followed by a continuation, and that entire
+  // sequence spaws `MaxRecordLength` bytes.  So the record's length is
+  // calculated as follows.
+  constexpr uint32_t ContinuationLength = 8;
+  error(IO.beginRecord(MaxRecordLength - sizeof(RecordPrefix) -
+                       ContinuationLength));
+
+  MemberKind = Record.Kind;
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitMemberEnd(CVMemberRecord &Record) {
+  assert(TypeKind.hasValue() && "Not in a type mapping!");
+  assert(MemberKind.hasValue() && "Not in a member mapping!");
+
+  if (!IO.isWriting()) {
+    if (auto EC = IO.skipPadding())
+      return EC;
+  }
+
+  MemberKind.reset();
+  error(IO.endRecord());
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ModifierRecord &Record) {
+  error(IO.mapInteger(Record.ModifiedType));
+  error(IO.mapEnum(Record.Modifiers));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          ProcedureRecord &Record) {
+  error(IO.mapInteger(Record.ReturnType));
+  error(IO.mapEnum(Record.CallConv));
+  error(IO.mapEnum(Record.Options));
+  error(IO.mapInteger(Record.ParameterCount));
+  error(IO.mapInteger(Record.ArgumentList));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          MemberFunctionRecord &Record) {
+  error(IO.mapInteger(Record.ReturnType));
+  error(IO.mapInteger(Record.ClassType));
+  error(IO.mapInteger(Record.ThisType));
+  error(IO.mapEnum(Record.CallConv));
+  error(IO.mapEnum(Record.Options));
+  error(IO.mapInteger(Record.ParameterCount));
+  error(IO.mapInteger(Record.ArgumentList));
+  error(IO.mapInteger(Record.ThisPointerAdjustment));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArgListRecord &Record) {
+  error(IO.mapVectorN<uint32_t>(
+      Record.StringIndices,
+      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, PointerRecord &Record) {
+  error(IO.mapInteger(Record.ReferentType));
+  error(IO.mapInteger(Record.Attrs));
+
+  if (Record.isPointerToMember()) {
+    if (!IO.isWriting())
+      Record.MemberInfo.emplace();
+
+    MemberPointerInfo &M = *Record.MemberInfo;
+    error(IO.mapInteger(M.ContainingType));
+    error(IO.mapEnum(M.Representation));
+  }
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ArrayRecord &Record) {
+  error(IO.mapInteger(Record.ElementType));
+  error(IO.mapInteger(Record.IndexType));
+  error(IO.mapEncodedInteger(Record.Size));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, ClassRecord &Record) {
+  assert((CVR.Type == TypeLeafKind::LF_STRUCTURE) ||
+         (CVR.Type == TypeLeafKind::LF_CLASS) ||
+         (CVR.Type == TypeLeafKind::LF_INTERFACE));
+
+  error(IO.mapInteger(Record.MemberCount));
+  error(IO.mapEnum(Record.Options));
+  error(IO.mapInteger(Record.FieldList));
+  error(IO.mapInteger(Record.DerivationList));
+  error(IO.mapInteger(Record.VTableShape));
+  error(IO.mapEncodedInteger(Record.Size));
+  error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
+                             Record.hasUniqueName()));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, UnionRecord &Record) {
+  error(IO.mapInteger(Record.MemberCount));
+  error(IO.mapEnum(Record.Options));
+  error(IO.mapInteger(Record.FieldList));
+  error(IO.mapEncodedInteger(Record.Size));
+  error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
+                             Record.hasUniqueName()));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, EnumRecord &Record) {
+  error(IO.mapInteger(Record.MemberCount));
+  error(IO.mapEnum(Record.Options));
+  error(IO.mapInteger(Record.UnderlyingType));
+  error(IO.mapInteger(Record.FieldList));
+  error(mapNameAndUniqueName(IO, Record.Name, Record.UniqueName,
+                             Record.hasUniqueName()));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, BitFieldRecord &Record) {
+  error(IO.mapInteger(Record.Type));
+  error(IO.mapInteger(Record.BitSize));
+  error(IO.mapInteger(Record.BitOffset));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          VFTableShapeRecord &Record) {
+  uint16_t Size;
+  if (IO.isWriting()) {
+    ArrayRef<VFTableSlotKind> Slots = Record.getSlots();
+    Size = Slots.size();
+    error(IO.mapInteger(Size));
+
+    for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
+      uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
+      if ((SlotIndex + 1) < Slots.size()) {
+        Byte |= static_cast<uint8_t>(Slots[SlotIndex + 1]);
+      }
+      error(IO.mapInteger(Byte));
+    }
+  } else {
+    error(IO.mapInteger(Size));
+    for (uint16_t I = 0; I < Size; I += 2) {
+      uint8_t Byte;
+      error(IO.mapInteger(Byte));
+      Record.Slots.push_back(static_cast<VFTableSlotKind>(Byte & 0xF));
+      if ((I + 1) < Size)
+        Record.Slots.push_back(static_cast<VFTableSlotKind>(Byte >> 4));
+    }
+  }
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, VFTableRecord &Record) {
+  error(IO.mapInteger(Record.CompleteClass));
+  error(IO.mapInteger(Record.OverriddenVFTable));
+  error(IO.mapInteger(Record.VFPtrOffset));
+  uint32_t NamesLen = 0;
+  if (IO.isWriting()) {
+    for (auto Name : Record.MethodNames)
+      NamesLen += Name.size() + 1;
+  }
+  error(IO.mapInteger(NamesLen));
+  error(IO.mapVectorTail(
+      Record.MethodNames,
+      [](CodeViewRecordIO &IO, StringRef &S) { return IO.mapStringZ(S); }));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, StringIdRecord &Record) {
+  error(IO.mapInteger(Record.Id));
+  error(IO.mapStringZ(Record.String));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          UdtSourceLineRecord &Record) {
+  error(IO.mapInteger(Record.UDT));
+  error(IO.mapInteger(Record.SourceFile));
+  error(IO.mapInteger(Record.LineNumber));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          UdtModSourceLineRecord &Record) {
+  error(IO.mapInteger(Record.UDT));
+  error(IO.mapInteger(Record.SourceFile));
+  error(IO.mapInteger(Record.LineNumber));
+  error(IO.mapInteger(Record.Module));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR, FuncIdRecord &Record) {
+  error(IO.mapInteger(Record.ParentScope));
+  error(IO.mapInteger(Record.FunctionType));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          MemberFuncIdRecord &Record) {
+  error(IO.mapInteger(Record.ClassType));
+  error(IO.mapInteger(Record.FunctionType));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          BuildInfoRecord &Record) {
+  error(IO.mapVectorN<uint16_t>(
+      Record.ArgIndices,
+      [](CodeViewRecordIO &IO, TypeIndex &N) { return IO.mapInteger(N); }));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          MethodOverloadListRecord &Record) {
+  // TODO: Split the list into multiple records if it's longer than 64KB, using
+  // a subrecord of TypeRecordKind::Index to chain the records together.
+  error(IO.mapVectorTail(Record.Methods, MapOneMethodRecord(true)));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          FieldListRecord &Record) {
+  error(IO.mapByteVectorTail(Record.Data));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownRecord(CVType &CVR,
+                                          TypeServer2Record &Record) {
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          BaseClassRecord &Record) {
+  error(IO.mapInteger(Record.Attrs.Attrs));
+  error(IO.mapInteger(Record.Type));
+  error(IO.mapEncodedInteger(Record.Offset));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          EnumeratorRecord &Record) {
+  error(IO.mapInteger(Record.Attrs.Attrs));
+
+  // FIXME: Handle full APInt such as __int128.
+  error(IO.mapEncodedInteger(Record.Value));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          DataMemberRecord &Record) {
+  error(IO.mapInteger(Record.Attrs.Attrs));
+  error(IO.mapInteger(Record.Type));
+  error(IO.mapEncodedInteger(Record.FieldOffset));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          OverloadedMethodRecord &Record) {
+  error(IO.mapInteger(Record.NumOverloads));
+  error(IO.mapInteger(Record.MethodList));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          OneMethodRecord &Record) {
+  MapOneMethodRecord Mapper(false);
+  return Mapper(IO, Record);
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          NestedTypeRecord &Record) {
+  uint16_t Padding = 0;
+  error(IO.mapInteger(Padding));
+  error(IO.mapInteger(Record.Type));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          StaticDataMemberRecord &Record) {
+
+  error(IO.mapInteger(Record.Attrs.Attrs));
+  error(IO.mapInteger(Record.Type));
+  error(IO.mapStringZ(Record.Name));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          VirtualBaseClassRecord &Record) {
+
+  error(IO.mapInteger(Record.Attrs.Attrs));
+  error(IO.mapInteger(Record.BaseType));
+  error(IO.mapInteger(Record.VBPtrType));
+  error(IO.mapEncodedInteger(Record.VBPtrOffset));
+  error(IO.mapEncodedInteger(Record.VTableIndex));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          VFPtrRecord &Record) {
+  uint16_t Padding = 0;
+  error(IO.mapInteger(Padding));
+  error(IO.mapInteger(Record.Type));
+
+  return Error::success();
+}
+
+Error TypeRecordMapping::visitKnownMember(CVMemberRecord &CVR,
+                                          ListContinuationRecord &Record) {
+  uint16_t Padding = 0;
+  error(IO.mapInteger(Padding));
+  error(IO.mapInteger(Record.ContinuationIndex));
+
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp
new file mode 100644
index 000000000000..f24fcff86274
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeSerializer.cpp
@@ -0,0 +1,243 @@
+//===- TypeSerialzier.cpp ---------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
+
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+
+#include <string.h>
+
+using namespace llvm;
+using namespace llvm::codeview;
+
+bool TypeSerializer::isInFieldList() const {
+  return TypeKind.hasValue() && *TypeKind == TypeLeafKind::LF_FIELDLIST;
+}
+
+TypeIndex TypeSerializer::calcNextTypeIndex() const {
+  if (LastTypeIndex.isNoneType())
+    return TypeIndex(TypeIndex::FirstNonSimpleIndex);
+  else
+    return TypeIndex(LastTypeIndex.getIndex() + 1);
+}
+
+TypeIndex TypeSerializer::incrementTypeIndex() {
+  TypeIndex Previous = LastTypeIndex;
+  LastTypeIndex = calcNextTypeIndex();
+  return Previous;
+}
+
+MutableArrayRef<uint8_t> TypeSerializer::getCurrentSubRecordData() {
+  assert(isInFieldList());
+  return getCurrentRecordData().drop_front(CurrentSegment.length());
+}
+
+MutableArrayRef<uint8_t> TypeSerializer::getCurrentRecordData() {
+  return MutableArrayRef<uint8_t>(RecordBuffer).take_front(Writer.getOffset());
+}
+
+Error TypeSerializer::writeRecordPrefix(TypeLeafKind Kind) {
+  RecordPrefix Prefix;
+  Prefix.RecordKind = Kind;
+  Prefix.RecordLen = 0;
+  if (auto EC = Writer.writeObject(Prefix))
+    return EC;
+  return Error::success();
+}
+
+TypeIndex
+TypeSerializer::insertRecordBytesPrivate(MutableArrayRef<uint8_t> Record) {
+  assert(Record.size() % 4 == 0 && "Record is not aligned to 4 bytes!");
+
+  StringRef S(reinterpret_cast<const char *>(Record.data()), Record.size());
+
+  TypeIndex NextTypeIndex = calcNextTypeIndex();
+  auto Result = HashedRecords.try_emplace(S, NextTypeIndex);
+  if (Result.second) {
+    LastTypeIndex = NextTypeIndex;
+    SeenRecords.push_back(Record);
+  }
+  return Result.first->getValue();
+}
+
+Expected<MutableArrayRef<uint8_t>>
+TypeSerializer::addPadding(MutableArrayRef<uint8_t> Record) {
+  uint32_t Align = Record.size() % 4;
+  if (Align == 0)
+    return Record;
+
+  int PaddingBytes = 4 - Align;
+  int N = PaddingBytes;
+  while (PaddingBytes > 0) {
+    uint8_t Pad = static_cast<uint8_t>(LF_PAD0 + PaddingBytes);
+    if (auto EC = Writer.writeInteger(Pad))
+      return std::move(EC);
+    --PaddingBytes;
+  }
+  return MutableArrayRef<uint8_t>(Record.data(), Record.size() + N);
+}
+
+TypeSerializer::TypeSerializer(BumpPtrAllocator &Storage)
+    : RecordStorage(Storage), LastTypeIndex(),
+      RecordBuffer(MaxRecordLength * 2), Stream(RecordBuffer), Writer(Stream),
+      Mapping(Writer) {
+  // RecordBuffer needs to be able to hold enough data so that if we are 1
+  // byte short of MaxRecordLen, and then we try to write MaxRecordLen bytes,
+  // we won't overflow.
+}
+
+ArrayRef<MutableArrayRef<uint8_t>> TypeSerializer::records() const {
+  return SeenRecords;
+}
+
+TypeIndex TypeSerializer::getLastTypeIndex() const { return LastTypeIndex; }
+
+TypeIndex TypeSerializer::insertRecordBytes(MutableArrayRef<uint8_t> Record) {
+  assert(!TypeKind.hasValue() && "Already in a type mapping!");
+  assert(Writer.getOffset() == 0 && "Stream has data already!");
+
+  return insertRecordBytesPrivate(Record);
+}
+
+Error TypeSerializer::visitTypeBegin(CVType &Record) {
+  assert(!TypeKind.hasValue() && "Already in a type mapping!");
+  assert(Writer.getOffset() == 0 && "Stream has data already!");
+
+  if (auto EC = writeRecordPrefix(Record.kind()))
+    return EC;
+
+  TypeKind = Record.kind();
+  if (auto EC = Mapping.visitTypeBegin(Record))
+    return EC;
+
+  return Error::success();
+}
+
+Expected<TypeIndex> TypeSerializer::visitTypeEndGetIndex(CVType &Record) {
+  assert(TypeKind.hasValue() && "Not in a type mapping!");
+  if (auto EC = Mapping.visitTypeEnd(Record))
+    return std::move(EC);
+
+  // Update the record's length and fill out the CVType members to point to
+  // the stable memory holding the record's data.
+  auto ThisRecordData = getCurrentRecordData();
+  auto ExpectedData = addPadding(ThisRecordData);
+  if (!ExpectedData)
+    return ExpectedData.takeError();
+  ThisRecordData = *ExpectedData;
+
+  RecordPrefix *Prefix =
+      reinterpret_cast<RecordPrefix *>(ThisRecordData.data());
+  Prefix->RecordLen = ThisRecordData.size() - sizeof(uint16_t);
+
+  uint8_t *Copy = RecordStorage.Allocate<uint8_t>(ThisRecordData.size());
+  ::memcpy(Copy, ThisRecordData.data(), ThisRecordData.size());
+  ThisRecordData = MutableArrayRef<uint8_t>(Copy, ThisRecordData.size());
+  Record = CVType(*TypeKind, ThisRecordData);
+  TypeIndex InsertedTypeIndex = insertRecordBytesPrivate(ThisRecordData);
+
+  // Write out each additional segment in reverse order, and update each
+  // record's continuation index to point to the previous one.
+  for (auto X : reverse(FieldListSegments)) {
+    auto CIBytes = X.take_back(sizeof(uint32_t));
+    support::ulittle32_t *CI =
+        reinterpret_cast<support::ulittle32_t *>(CIBytes.data());
+    assert(*CI == 0xB0C0B0C0 && "Invalid TypeIndex placeholder");
+    *CI = InsertedTypeIndex.getIndex();
+    InsertedTypeIndex = insertRecordBytesPrivate(X);
+  }
+
+  TypeKind.reset();
+  Writer.setOffset(0);
+  FieldListSegments.clear();
+  CurrentSegment.SubRecords.clear();
+
+  return InsertedTypeIndex;
+}
+
+Error TypeSerializer::visitTypeEnd(CVType &Record) {
+  auto ExpectedIndex = visitTypeEndGetIndex(Record);
+  if (!ExpectedIndex)
+    return ExpectedIndex.takeError();
+  return Error::success();
+}
+
+Error TypeSerializer::visitMemberBegin(CVMemberRecord &Record) {
+  assert(isInFieldList() && "Not in a field list!");
+  assert(!MemberKind.hasValue() && "Already in a member record!");
+  MemberKind = Record.Kind;
+
+  if (auto EC = Mapping.visitMemberBegin(Record))
+    return EC;
+
+  return Error::success();
+}
+
+Error TypeSerializer::visitMemberEnd(CVMemberRecord &Record) {
+  if (auto EC = Mapping.visitMemberEnd(Record))
+    return EC;
+
+  // Check if this subrecord makes the current segment not fit in 64K minus
+  // the space for a continuation record (8 bytes). If the segment does not
+  // fit, insert a continuation record.
+  if (Writer.getOffset() > MaxRecordLength - ContinuationLength) {
+    MutableArrayRef<uint8_t> Data = getCurrentRecordData();
+    SubRecord LastSubRecord = CurrentSegment.SubRecords.back();
+    uint32_t CopySize = CurrentSegment.length() - LastSubRecord.Size;
+    auto CopyData = Data.take_front(CopySize);
+    auto LeftOverData = Data.drop_front(CopySize);
+    assert(LastSubRecord.Size == LeftOverData.size());
+
+    // Allocate stable storage for the record and copy the old record plus
+    // continuation over.
+    uint16_t LengthWithSize = CopySize + ContinuationLength;
+    assert(LengthWithSize <= MaxRecordLength);
+    RecordPrefix *Prefix = reinterpret_cast<RecordPrefix *>(CopyData.data());
+    Prefix->RecordLen = LengthWithSize - sizeof(uint16_t);
+
+    uint8_t *SegmentBytes = RecordStorage.Allocate<uint8_t>(LengthWithSize);
+    auto SavedSegment = MutableArrayRef<uint8_t>(SegmentBytes, LengthWithSize);
+    msf::MutableByteStream CS(SavedSegment);
+    msf::StreamWriter CW(CS);
+    if (auto EC = CW.writeBytes(CopyData))
+      return EC;
+    if (auto EC = CW.writeEnum(TypeLeafKind::LF_INDEX))
+      return EC;
+    if (auto EC = CW.writeInteger(uint16_t(0)))
+      return EC;
+    if (auto EC = CW.writeInteger(uint32_t(0xB0C0B0C0)))
+      return EC;
+    FieldListSegments.push_back(SavedSegment);
+
+    // Write a new placeholder record prefix to mark the start of this new
+    // top-level record.
+    Writer.setOffset(0);
+    if (auto EC = writeRecordPrefix(TypeLeafKind::LF_FIELDLIST))
+      return EC;
+
+    // Then move over the subrecord that overflowed the old segment to the
+    // beginning of this segment.  Note that we have to use memmove here
+    // instead of Writer.writeBytes(), because the new and old locations
+    // could overlap.
+    ::memmove(Stream.data().data() + sizeof(RecordPrefix), LeftOverData.data(),
+              LeftOverData.size());
+    // And point the segment writer at the end of that subrecord.
+    Writer.setOffset(LeftOverData.size() + sizeof(RecordPrefix));
+
+    CurrentSegment.SubRecords.clear();
+    CurrentSegment.SubRecords.push_back(LastSubRecord);
+  }
+
+  // Update the CVMemberRecord since we may have shifted around or gotten
+  // padded.
+  Record.Data = getCurrentSubRecordData();
+
+  MemberKind.reset();
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
index ebfda2462be1..ed6cf5743a12 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
+++ b/contrib/llvm/lib/DebugInfo/CodeView/TypeStreamMerger.cpp
@@ -11,10 +11,11 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ScopedPrinter.h"
@@ -53,37 +54,61 @@ namespace {
 ///   existing destination type index.
 class TypeStreamMerger : public TypeVisitorCallbacks {
 public:
-  TypeStreamMerger(TypeTableBuilder &DestStream) : DestStream(DestStream) {
+  TypeStreamMerger(TypeTableBuilder &DestStream)
+      : DestStream(DestStream), FieldListBuilder(DestStream) {
     assert(!hadError());
   }
 
 /// TypeVisitorCallbacks overrides.
 #define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
-  Error visit##Name(Name##Record &Record) override;
+  Error visitKnownRecord(CVType &CVR, Name##Record &Record) override;
 #define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
-  TYPE_RECORD(EnumName, EnumVal, Name)
+  Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override;
 #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "llvm/DebugInfo/CodeView/TypeRecords.def"
 
-  Error visitUnknownType(const CVRecord<TypeLeafKind> &Record) override;
+  Error visitUnknownType(CVType &Record) override;
 
-  Error visitTypeBegin(const CVRecord<TypeLeafKind> &Record) override;
-  Error visitTypeEnd(const CVRecord<TypeLeafKind> &Record) override;
-
-  Error visitFieldListEnd(const CVRecord<TypeLeafKind> &Record) override;
+  Error visitTypeBegin(CVType &Record) override;
+  Error visitTypeEnd(CVType &Record) override;
+  Error visitMemberEnd(CVMemberRecord &Record) override;
 
   bool mergeStream(const CVTypeArray &Types);
 
 private:
+  template <typename RecordType>
+  Error visitKnownRecordImpl(RecordType &Record) {
+    FoundBadTypeIndex |= !Record.remapTypeIndices(IndexMap);
+    IndexMap.push_back(DestStream.writeKnownType(Record));
+    return Error::success();
+  }
+
+  Error visitKnownRecordImpl(FieldListRecord &Record) {
+    CVTypeVisitor Visitor(*this);
+
+    if (auto EC = Visitor.visitFieldListMemberStream(Record.Data))
+      return EC;
+    return Error::success();
+  }
+
+  template <typename RecordType>
+  Error visitKnownMemberRecordImpl(RecordType &Record) {
+    FoundBadTypeIndex |= !Record.remapTypeIndices(IndexMap);
+    FieldListBuilder.writeMemberType(Record);
+    return Error::success();
+  }
+
   bool hadError() { return FoundBadTypeIndex; }
 
   bool FoundBadTypeIndex = false;
 
-  FieldListRecordBuilder FieldBuilder;
+  BumpPtrAllocator Allocator;
 
   TypeTableBuilder &DestStream;
+  FieldListRecordBuilder FieldListBuilder;
 
+  bool IsInFieldList{false};
   size_t BeginIndexMapSize = 0;
 
   /// Map from source type index to destination type index. Indexed by source
@@ -93,39 +118,45 @@ private:
 
 } // end anonymous namespace
 
-Error TypeStreamMerger::visitTypeBegin(const CVRecord<TypeLeafKind> &Rec) {
-  BeginIndexMapSize = IndexMap.size();
+Error TypeStreamMerger::visitTypeBegin(CVRecord<TypeLeafKind> &Rec) {
+  if (Rec.Type == TypeLeafKind::LF_FIELDLIST) {
+    assert(!IsInFieldList);
+    IsInFieldList = true;
+    FieldListBuilder.begin();
+  } else
+    BeginIndexMapSize = IndexMap.size();
   return Error::success();
 }
 
-Error TypeStreamMerger::visitTypeEnd(const CVRecord<TypeLeafKind> &Rec) {
-  assert(IndexMap.size() == BeginIndexMapSize + 1);
+Error TypeStreamMerger::visitTypeEnd(CVRecord<TypeLeafKind> &Rec) {
+  if (Rec.Type == TypeLeafKind::LF_FIELDLIST) {
+    TypeIndex Index = FieldListBuilder.end();
+    IndexMap.push_back(Index);
+    IsInFieldList = false;
+  }
   return Error::success();
 }
 
-Error TypeStreamMerger::visitFieldListEnd(const CVRecord<TypeLeafKind> &Rec) {
-  IndexMap.push_back(DestStream.writeFieldList(FieldBuilder));
-  FieldBuilder.reset();
+Error TypeStreamMerger::visitMemberEnd(CVMemberRecord &Rec) {
+  assert(IndexMap.size() == BeginIndexMapSize + 1);
   return Error::success();
 }
 
 #define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
-  Error TypeStreamMerger::visit##Name(Name##Record &Record) {                  \
-    FoundBadTypeIndex |= !Record.remapTypeIndices(IndexMap);                   \
-    IndexMap.push_back(DestStream.write##Name(Record));                        \
-    return Error::success();                                                   \
+  Error TypeStreamMerger::visitKnownRecord(CVType &CVR,                        \
+                                           Name##Record &Record) {             \
+    return visitKnownRecordImpl(Record);                                       \
   }
 #define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
-  Error TypeStreamMerger::visit##Name(Name##Record &Record) {                  \
-    FoundBadTypeIndex |= !Record.remapTypeIndices(IndexMap);                   \
-    FieldBuilder.write##Name(Record);                                          \
-    return Error::success();                                                   \
+  Error TypeStreamMerger::visitKnownMember(CVMemberRecord &CVR,                \
+                                           Name##Record &Record) {             \
+    return visitKnownMemberRecordImpl(Record);                                 \
   }
 #define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
 #include "llvm/DebugInfo/CodeView/TypeRecords.def"
 
-Error TypeStreamMerger::visitUnknownType(const CVRecord<TypeLeafKind> &Rec) {
+Error TypeStreamMerger::visitUnknownType(CVType &Rec) {
   // We failed to translate a type. Translate this index as "not translated".
   IndexMap.push_back(
       TypeIndex(SimpleTypeKind::NotTranslated, SimpleTypeMode::Direct));
@@ -134,7 +165,14 @@ Error TypeStreamMerger::visitUnknownType(const CVRecord<TypeLeafKind> &Rec) {
 
 bool TypeStreamMerger::mergeStream(const CVTypeArray &Types) {
   assert(IndexMap.empty());
-  CVTypeVisitor Visitor(*this);
+  TypeVisitorCallbackPipeline Pipeline;
+
+  TypeDeserializer Deserializer;
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(*this);
+
+  CVTypeVisitor Visitor(Pipeline);
+
   if (auto EC = Visitor.visitTypeStream(Types)) {
     consumeError(std::move(EC));
     return false;
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp b/contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
deleted file mode 100644
index 647538ee8ceb..000000000000
--- a/contrib/llvm/lib/DebugInfo/CodeView/TypeTableBuilder.cpp
+++ /dev/null
@@ -1,303 +0,0 @@
-//===-- TypeTableBuilder.cpp ----------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
-#include "llvm/DebugInfo/CodeView/FieldListRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/MethodListRecordBuilder.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
-#include "llvm/DebugInfo/CodeView/TypeRecordBuilder.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-using namespace codeview;
-
-TypeTableBuilder::TypeTableBuilder() {}
-
-TypeTableBuilder::~TypeTableBuilder() {}
-
-TypeIndex TypeTableBuilder::writeModifier(const ModifierRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeTypeIndex(Record.getModifiedType());
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getModifiers()));
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeProcedure(const ProcedureRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeTypeIndex(Record.getReturnType());
-  Builder.writeUInt8(static_cast<uint8_t>(Record.getCallConv()));
-  Builder.writeUInt8(static_cast<uint8_t>(Record.getOptions()));
-  Builder.writeUInt16(Record.getParameterCount());
-  Builder.writeTypeIndex(Record.getArgumentList());
-
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeMemberFunction(const MemberFunctionRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeTypeIndex(Record.getReturnType());
-  Builder.writeTypeIndex(Record.getClassType());
-  Builder.writeTypeIndex(Record.getThisType());
-  Builder.writeUInt8(static_cast<uint8_t>(Record.getCallConv()));
-  Builder.writeUInt8(static_cast<uint8_t>(Record.getOptions()));
-  Builder.writeUInt16(Record.getParameterCount());
-  Builder.writeTypeIndex(Record.getArgumentList());
-  Builder.writeInt32(Record.getThisPointerAdjustment());
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeArgList(const ArgListRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeUInt32(Record.getIndices().size());
-  for (TypeIndex TI : Record.getIndices()) {
-    Builder.writeTypeIndex(TI);
-  }
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writePointer(const PointerRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeTypeIndex(Record.getReferentType());
-  uint32_t flags = static_cast<uint32_t>(Record.getOptions()) |
-                   (Record.getSize() << PointerRecord::PointerSizeShift) |
-                   (static_cast<uint32_t>(Record.getMode())
-                    << PointerRecord::PointerModeShift) |
-                   (static_cast<uint32_t>(Record.getPointerKind())
-                    << PointerRecord::PointerKindShift);
-  Builder.writeUInt32(flags);
-
-  if (Record.isPointerToMember()) {
-    const MemberPointerInfo &M = Record.getMemberInfo();
-    Builder.writeTypeIndex(M.getContainingType());
-    Builder.writeUInt16(static_cast<uint16_t>(M.getRepresentation()));
-  }
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeArray(const ArrayRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeTypeIndex(Record.getElementType());
-  Builder.writeTypeIndex(Record.getIndexType());
-  Builder.writeEncodedUnsignedInteger(Record.getSize());
-  Builder.writeNullTerminatedString(Record.getName());
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeClass(const ClassRecord &Record) {
-  assert((Record.getKind() == TypeRecordKind::Struct) ||
-         (Record.getKind() == TypeRecordKind::Class) ||
-         (Record.getKind() == TypeRecordKind::Interface));
-
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeUInt16(Record.getMemberCount());
-  uint16_t Flags =
-      static_cast<uint16_t>(Record.getOptions()) |
-      (static_cast<uint16_t>(Record.getHfa()) << ClassRecord::HfaKindShift) |
-      (static_cast<uint16_t>(Record.getWinRTKind())
-       << ClassRecord::WinRTKindShift);
-  Builder.writeUInt16(Flags);
-  Builder.writeTypeIndex(Record.getFieldList());
-  Builder.writeTypeIndex(Record.getDerivationList());
-  Builder.writeTypeIndex(Record.getVTableShape());
-  Builder.writeEncodedUnsignedInteger(Record.getSize());
-  Builder.writeNullTerminatedString(Record.getName());
-  if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
-      ClassOptions::None) {
-    Builder.writeNullTerminatedString(Record.getUniqueName());
-  }
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeUnion(const UnionRecord &Record) {
-  TypeRecordBuilder Builder(TypeRecordKind::Union);
-  Builder.writeUInt16(Record.getMemberCount());
-  uint16_t Flags =
-      static_cast<uint16_t>(Record.getOptions()) |
-      (static_cast<uint16_t>(Record.getHfa()) << ClassRecord::HfaKindShift);
-  Builder.writeUInt16(Flags);
-  Builder.writeTypeIndex(Record.getFieldList());
-  Builder.writeEncodedUnsignedInteger(Record.getSize());
-  Builder.writeNullTerminatedString(Record.getName());
-  if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
-      ClassOptions::None) {
-    Builder.writeNullTerminatedString(Record.getUniqueName());
-  }
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeEnum(const EnumRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeUInt16(Record.getMemberCount());
-  Builder.writeUInt16(static_cast<uint16_t>(Record.getOptions()));
-  Builder.writeTypeIndex(Record.getUnderlyingType());
-  Builder.writeTypeIndex(Record.getFieldList());
-  Builder.writeNullTerminatedString(Record.getName());
-  if ((Record.getOptions() & ClassOptions::HasUniqueName) !=
-      ClassOptions::None) {
-    Builder.writeNullTerminatedString(Record.getUniqueName());
-  }
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeBitField(const BitFieldRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  Builder.writeTypeIndex(Record.getType());
-  Builder.writeUInt8(Record.getBitSize());
-  Builder.writeUInt8(Record.getBitOffset());
-
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeVFTableShape(const VFTableShapeRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-
-  ArrayRef<VFTableSlotKind> Slots = Record.getSlots();
-
-  Builder.writeUInt16(Slots.size());
-  for (size_t SlotIndex = 0; SlotIndex < Slots.size(); SlotIndex += 2) {
-    uint8_t Byte = static_cast<uint8_t>(Slots[SlotIndex]) << 4;
-    if ((SlotIndex + 1) < Slots.size()) {
-      Byte |= static_cast<uint8_t>(Slots[SlotIndex + 1]);
-    }
-    Builder.writeUInt8(Byte);
-  }
-
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeVFTable(const VFTableRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  Builder.writeTypeIndex(Record.getCompleteClass());
-  Builder.writeTypeIndex(Record.getOverriddenVTable());
-  Builder.writeUInt32(Record.getVFPtrOffset());
-
-  // Sum up the lengths of the null-terminated names.
-  size_t NamesLen = Record.getName().size() + 1;
-  for (StringRef MethodName : Record.getMethodNames())
-    NamesLen += MethodName.size() + 1;
-
-  Builder.writeUInt32(NamesLen);
-  Builder.writeNullTerminatedString(Record.getName());
-  for (StringRef MethodName : Record.getMethodNames())
-    Builder.writeNullTerminatedString(MethodName);
-
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeStringId(const StringIdRecord &Record) {
-  TypeRecordBuilder Builder(TypeRecordKind::StringId);
-  Builder.writeTypeIndex(Record.getId());
-  Builder.writeNullTerminatedString(Record.getString());
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeUdtSourceLine(const UdtSourceLineRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  Builder.writeTypeIndex(Record.getUDT());
-  Builder.writeTypeIndex(Record.getSourceFile());
-  Builder.writeUInt32(Record.getLineNumber());
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeUdtModSourceLine(const UdtModSourceLineRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  Builder.writeTypeIndex(Record.getUDT());
-  Builder.writeTypeIndex(Record.getSourceFile());
-  Builder.writeUInt32(Record.getLineNumber());
-  Builder.writeUInt16(Record.getModule());
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeFuncId(const FuncIdRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  Builder.writeTypeIndex(Record.getParentScope());
-  Builder.writeTypeIndex(Record.getFunctionType());
-  Builder.writeNullTerminatedString(Record.getName());
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeMemberFuncId(const MemberFuncIdRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  Builder.writeTypeIndex(Record.getClassType());
-  Builder.writeTypeIndex(Record.getFunctionType());
-  Builder.writeNullTerminatedString(Record.getName());
-  return writeRecord(Builder);
-}
-
-TypeIndex
-TypeTableBuilder::writeBuildInfo(const BuildInfoRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  assert(Record.getArgs().size() <= UINT16_MAX);
-  Builder.writeUInt16(Record.getArgs().size());
-  for (TypeIndex Arg : Record.getArgs())
-    Builder.writeTypeIndex(Arg);
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeRecord(TypeRecordBuilder &Builder) {
-  return writeRecord(Builder.str());
-}
-
-TypeIndex TypeTableBuilder::writeFieldList(FieldListRecordBuilder &FieldList) {
-  return FieldList.writeListRecord(*this);
-}
-
-TypeIndex TypeTableBuilder::writeMethodOverloadList(
-    const MethodOverloadListRecord &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  for (const OneMethodRecord &Method : Record.getMethods()) {
-    uint16_t Flags = static_cast<uint16_t>(Method.getAccess());
-    Flags |= static_cast<uint16_t>(Method.getKind())
-             << MemberAttributes::MethodKindShift;
-    Flags |= static_cast<uint16_t>(Method.getOptions());
-    Builder.writeUInt16(Flags);
-    Builder.writeUInt16(0); // padding
-    Builder.writeTypeIndex(Method.getType());
-    if (Method.isIntroducingVirtual()) {
-      assert(Method.getVFTableOffset() >= 0);
-      Builder.writeInt32(Method.getVFTableOffset());
-    } else {
-      assert(Method.getVFTableOffset() == -1);
-    }
-  }
-
-  // TODO: Split the list into multiple records if it's longer than 64KB, using
-  // a subrecord of TypeRecordKind::Index to chain the records together.
-  return writeRecord(Builder);
-}
-
-TypeIndex TypeTableBuilder::writeTypeServer2(const TypeServer2Record &Record) {
-  TypeRecordBuilder Builder(Record.getKind());
-  Builder.writeGuid(Record.getGuid());
-  Builder.writeUInt32(Record.getAge());
-  Builder.writeNullTerminatedString(Record.getName());
-  return writeRecord(Builder);
-}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index 9314c9eabc48..6126470aa099 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
@@ -16,9 +18,11 @@ using namespace dwarf;
 
 void DWARFAbbreviationDeclaration::clear() {
   Code = 0;
-  Tag = 0;
+  Tag = DW_TAG_null;
+  CodeByteSize = 0;
   HasChildren = false;
   AttributeSpecs.clear();
+  FixedAttributeSize.reset();
 }
 
 DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
@@ -26,59 +30,102 @@ DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() {
 }
 
 bool
-DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint32_t* OffsetPtr) {
+DWARFAbbreviationDeclaration::extract(DataExtractor Data, 
+                                      uint32_t* OffsetPtr) {
   clear();
+  const uint32_t Offset = *OffsetPtr;
   Code = Data.getULEB128(OffsetPtr);
   if (Code == 0) {
     return false;
   }
-  Tag = Data.getULEB128(OffsetPtr);
+  CodeByteSize = *OffsetPtr - Offset;
+  Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
+  if (Tag == DW_TAG_null) {
+    clear();
+    return false;
+  }
   uint8_t ChildrenByte = Data.getU8(OffsetPtr);
   HasChildren = (ChildrenByte == DW_CHILDREN_yes);
+  // Assign a value to our optional FixedAttributeSize member variable. If
+  // this member variable still has a value after the while loop below, then
+  // all attribute data in this abbreviation declaration has a fixed byte size.
+  FixedAttributeSize = FixedSizeInfo();
 
+  // Read all of the abbreviation attributes and forms.
   while (true) {
-    uint32_t CurOffset = *OffsetPtr;
-    uint16_t Attr = Data.getULEB128(OffsetPtr);
-    if (CurOffset == *OffsetPtr) {
-      clear();
-      return false;
-    }
-    CurOffset = *OffsetPtr;
-    uint16_t Form = Data.getULEB128(OffsetPtr);
-    if (CurOffset == *OffsetPtr) {
+    auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
+    auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
+    if (A && F) {
+      auto FixedFormByteSize = DWARFFormValue::getFixedByteSize(F);
+      AttributeSpecs.push_back(AttributeSpec(A, F, FixedFormByteSize));
+      // If this abbrevation still has a fixed byte size, then update the
+      // FixedAttributeSize as needed.
+      if (FixedAttributeSize) {
+        if (FixedFormByteSize)
+          FixedAttributeSize->NumBytes += *FixedFormByteSize;
+        else {
+          switch (F) {
+          case DW_FORM_addr:
+            ++FixedAttributeSize->NumAddrs;
+            break;
+
+          case DW_FORM_ref_addr:
+            ++FixedAttributeSize->NumRefAddrs;
+            break;
+
+          case DW_FORM_strp:
+          case DW_FORM_GNU_ref_alt:
+          case DW_FORM_GNU_strp_alt:
+          case DW_FORM_line_strp:
+          case DW_FORM_sec_offset:
+          case DW_FORM_strp_sup:
+          case DW_FORM_ref_sup:
+            ++FixedAttributeSize->NumDwarfOffsets;
+            break;
+
+          default:
+            // Indicate we no longer have a fixed byte size for this
+            // abbreviation by clearing the FixedAttributeSize optional value
+            // so it doesn't have a value.
+            FixedAttributeSize.reset();
+            break;
+          }
+        }
+      }
+    } else if (A == 0 && F == 0) {
+      // We successfully reached the end of this abbreviation declaration
+      // since both attribute and form are zero.
+      break;
+    } else {
+      // Attribute and form pairs must either both be non-zero, in which case
+      // they are added to the abbreviation declaration, or both be zero to
+      // terminate the abbrevation declaration. In this case only one was
+      // zero which is an error.
       clear();
       return false;
     }
-    if (Attr == 0 && Form == 0)
-      break;
-    AttributeSpecs.push_back(AttributeSpec(Attr, Form));
-  }
-
-  if (Tag == 0) {
-    clear();
-    return false;
   }
   return true;
 }
 
 void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
-  const char *tagString = TagString(getTag());
+  auto tagString = TagString(getTag());
   OS << '[' << getCode() << "] ";
-  if (tagString)
+  if (!tagString.empty())
     OS << tagString;
   else
     OS << format("DW_TAG_Unknown_%x", getTag());
   OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n';
   for (const AttributeSpec &Spec : AttributeSpecs) {
     OS << '\t';
-    const char *attrString = AttributeString(Spec.Attr);
-    if (attrString)
+    auto attrString = AttributeString(Spec.Attr);
+    if (!attrString.empty())
       OS << attrString;
     else
       OS << format("DW_AT_Unknown_%x", Spec.Attr);
     OS << '\t';
-    const char *formString = FormEncodingString(Spec.Form);
-    if (formString)
+    auto formString = FormEncodingString(Spec.Form);
+    if (!formString.empty())
       OS << formString;
     else
       OS << format("DW_FORM_Unknown_%x", Spec.Form);
@@ -87,11 +134,65 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const {
   OS << '\n';
 }
 
-uint32_t
-DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const {
+Optional<uint32_t>
+DWARFAbbreviationDeclaration::findAttributeIndex(dwarf::Attribute Attr) const {
   for (uint32_t i = 0, e = AttributeSpecs.size(); i != e; ++i) {
-    if (AttributeSpecs[i].Attr == attr)
+    if (AttributeSpecs[i].Attr == Attr)
       return i;
   }
-  return -1U;
+  return None;
+}
+
+Optional<DWARFFormValue> DWARFAbbreviationDeclaration::getAttributeValue(
+    const uint32_t DIEOffset, const dwarf::Attribute Attr,
+    const DWARFUnit &U) const {
+  Optional<uint32_t> MatchAttrIndex = findAttributeIndex(Attr);
+  if (!MatchAttrIndex)
+    return None;
+
+  auto DebugInfoData = U.getDebugInfoExtractor();
+
+  // Add the byte size of ULEB that for the abbrev Code so we can start
+  // skipping the attribute data.
+  uint32_t Offset = DIEOffset + CodeByteSize;
+  uint32_t AttrIndex = 0;
+  for (const auto &Spec : AttributeSpecs) {
+    if (*MatchAttrIndex == AttrIndex) {
+      // We have arrived at the attribute to extract, extract if from Offset.
+      DWARFFormValue FormValue(Spec.Form);
+      if (FormValue.extractValue(DebugInfoData, &Offset, &U))
+        return FormValue;
+    }
+    // March Offset along until we get to the attribute we want.
+    if (Optional<uint8_t> FixedSize = Spec.getByteSize(U))
+      Offset += *FixedSize;
+    else
+      DWARFFormValue::skipValue(Spec.Form, DebugInfoData, &Offset, &U);
+    ++AttrIndex;
+  }
+  return None;
+}
+
+size_t DWARFAbbreviationDeclaration::FixedSizeInfo::getByteSize(
+    const DWARFUnit &U) const {
+  size_t ByteSize = NumBytes;
+  if (NumAddrs)
+    ByteSize += NumAddrs * U.getAddressByteSize();
+  if (NumRefAddrs)
+    ByteSize += NumRefAddrs * U.getRefAddrByteSize();
+  if (NumDwarfOffsets)
+    ByteSize += NumDwarfOffsets * U.getDwarfOffsetByteSize();
+  return ByteSize;
+}
+
+Optional<uint8_t> DWARFAbbreviationDeclaration::AttributeSpec::getByteSize(
+    const DWARFUnit &U) const {
+  return ByteSize ? ByteSize : DWARFFormValue::getFixedByteSize(Form, &U);
+}
+
+Optional<size_t> DWARFAbbreviationDeclaration::getFixedAttributesByteSize(
+    const DWARFUnit &U) const {
+  if (FixedAttributeSize)
+    return FixedAttributeSize->getByteSize(U);
+  return None;
 }
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
index 8ae05432869a..7111ad3f9fc7 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFAcceleratorTable.cpp
@@ -39,7 +39,7 @@ bool DWARFAcceleratorTable::extract() {
 
   for (unsigned i = 0; i < NumAtoms; ++i) {
     uint16_t AtomType = AccelSection.getU16(&Offset);
-    uint16_t AtomForm = AccelSection.getU16(&Offset);
+    auto AtomForm = static_cast<dwarf::Form>(AccelSection.getU16(&Offset));
     HdrData.Atoms.push_back(std::make_pair(AtomType, AtomForm));
   }
 
@@ -61,12 +61,14 @@ void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
   SmallVector<DWARFFormValue, 3> AtomForms;
   for (const auto &Atom: HdrData.Atoms) {
     OS << format("Atom[%d] Type: ", i++);
-    if (const char *TypeString = dwarf::AtomTypeString(Atom.first))
+    auto TypeString = dwarf::AtomTypeString(Atom.first);
+    if (!TypeString.empty())
       OS << TypeString;
     else
       OS << format("DW_ATOM_Unknown_0x%x", Atom.first);
     OS << " Form: ";
-    if (const char *FormString = dwarf::FormEncodingString(Atom.second))
+    auto FormString = dwarf::FormEncodingString(Atom.second);
+    if (!FormString.empty())
       OS << FormString;
     else
       OS << format("DW_FORM_Unknown_0x%x", Atom.second);
@@ -118,7 +120,7 @@ void DWARFAcceleratorTable::dump(raw_ostream &OS) const {
           for (auto &Atom : AtomForms) {
             OS << format("{Atom[%d]: ", i++);
             if (Atom.extractValue(AccelSection, &DataOffset, nullptr))
-              Atom.dump(OS, nullptr);
+              Atom.dump(OS);
             else
               OS << "Error extracting the value";
             OS << "} ";
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
index 39a7c772dc76..948972f8f136 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFCompileUnit.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -22,12 +24,11 @@ void DWARFCompileUnit::dump(raw_ostream &OS) {
      << " (next unit at " << format("0x%08x", getNextUnitOffset())
      << ")\n";
 
-  if (const DWARFDebugInfoEntryMinimal *CU = getUnitDIE(false))
-    CU->dump(OS, this, -1U);
+  if (DWARFDie CUDie = getUnitDIE(false))
+    CUDie.dump(OS, -1U);
   else
     OS << "<compile unit can't be parsed!>\n\n";
 }
 
 // VTable anchor.
-DWARFCompileUnit::~DWARFCompileUnit() {
-}
+DWARFCompileUnit::~DWARFCompileUnit() = default;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
index e8ea71b325ae..7df66c76e8b5 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFContext.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/DebugInfo/DWARF/DWARFAcceleratorTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFDebugArangeSet.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
 #include "llvm/DebugInfo/DWARF/DWARFUnitIndex.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/RelocVisitor.h"
@@ -32,37 +33,6 @@ typedef DWARFDebugLine::LineTable DWARFLineTable;
 typedef DILineInfoSpecifier::FileLineInfoKind FileLineInfoKind;
 typedef DILineInfoSpecifier::FunctionNameKind FunctionNameKind;
 
-static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data,
-                           bool LittleEndian, bool GnuStyle) {
-  OS << "\n." << Name << " contents:\n";
-  DataExtractor pubNames(Data, LittleEndian, 0);
-  uint32_t offset = 0;
-  while (pubNames.isValidOffset(offset)) {
-    OS << "length = " << format("0x%08x", pubNames.getU32(&offset));
-    OS << " version = " << format("0x%04x", pubNames.getU16(&offset));
-    OS << " unit_offset = " << format("0x%08x", pubNames.getU32(&offset));
-    OS << " unit_size = " << format("0x%08x", pubNames.getU32(&offset)) << '\n';
-    if (GnuStyle)
-      OS << "Offset     Linkage  Kind     Name\n";
-    else
-      OS << "Offset     Name\n";
-
-    while (offset < Data.size()) {
-      uint32_t dieRef = pubNames.getU32(&offset);
-      if (dieRef == 0)
-        break;
-      OS << format("0x%8.8x ", dieRef);
-      if (GnuStyle) {
-        PubIndexEntryDescriptor desc(pubNames.getU8(&offset));
-        OS << format("%-8s", dwarf::GDBIndexEntryLinkageString(desc.Linkage))
-           << ' ' << format("%-8s", dwarf::GDBIndexEntryKindString(desc.Kind))
-           << ' ';
-      }
-      OS << '\"' << pubNames.getCStr(&offset) << "\"\n";
-    }
-  }
-}
-
 static void dumpAccelSection(raw_ostream &OS, StringRef Name,
                              const DWARFSection& Section, StringRef StringSection,
                              bool LittleEndian) {
@@ -75,7 +45,8 @@ static void dumpAccelSection(raw_ostream &OS, StringRef Name,
   Accel.dump(OS);
 }
 
-void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH) {
+void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
+                        bool SummarizeTypes) {
   if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) {
     OS << ".debug_abbrev contents:\n";
     getDebugAbbrev()->dump(OS);
@@ -104,7 +75,7 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH) {
     OS << "\n.debug_types contents:\n";
     for (const auto &TUS : type_unit_sections())
       for (const auto &TU : TUS)
-        TU->dump(OS);
+        TU->dump(OS, SummarizeTypes);
   }
 
   if ((DumpType == DIDT_All || DumpType == DIDT_TypesDwo) &&
@@ -112,7 +83,7 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH) {
     OS << "\n.debug_types.dwo contents:\n";
     for (const auto &DWOTUS : dwo_type_unit_sections())
       for (const auto &DWOTU : DWOTUS)
-        DWOTU->dump(OS);
+        DWOTU->dump(OS, SummarizeTypes);
   }
 
   if (DumpType == DIDT_All || DumpType == DIDT_Loc) {
@@ -153,16 +124,16 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH) {
     OS << "\n.debug_line contents:\n";
     for (const auto &CU : compile_units()) {
       savedAddressByteSize = CU->getAddressByteSize();
-      const auto *CUDIE = CU->getUnitDIE();
-      if (CUDIE == nullptr)
+      auto CUDIE = CU->getUnitDIE();
+      if (!CUDIE)
         continue;
-      unsigned stmtOffset = CUDIE->getAttributeValueAsSectionOffset(
-          CU.get(), DW_AT_stmt_list, -1U);
-      if (stmtOffset != -1U) {
+      if (auto StmtOffset =
+              CUDIE.getAttributeValueAsSectionOffset(DW_AT_stmt_list)) {
         DataExtractor lineData(getLineSection().Data, isLittleEndian(),
                                savedAddressByteSize);
         DWARFDebugLine::LineTable LineTable;
-        LineTable.parse(lineData, &getLineSection().Relocs, &stmtOffset);
+        uint32_t Offset = *StmtOffset;
+        LineTable.parse(lineData, &getLineSection().Relocs, &Offset);
         LineTable.dump(OS);
       }
     }
@@ -228,20 +199,22 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH) {
   }
 
   if (DumpType == DIDT_All || DumpType == DIDT_Pubnames)
-    dumpPubSection(OS, "debug_pubnames", getPubNamesSection(),
-                   isLittleEndian(), false);
+    DWARFDebugPubTable(getPubNamesSection(), isLittleEndian(), false)
+        .dump("debug_pubnames", OS);
 
   if (DumpType == DIDT_All || DumpType == DIDT_Pubtypes)
-    dumpPubSection(OS, "debug_pubtypes", getPubTypesSection(),
-                   isLittleEndian(), false);
+    DWARFDebugPubTable(getPubTypesSection(), isLittleEndian(), false)
+        .dump("debug_pubtypes", OS);
 
   if (DumpType == DIDT_All || DumpType == DIDT_GnuPubnames)
-    dumpPubSection(OS, "debug_gnu_pubnames", getGnuPubNamesSection(),
-                   isLittleEndian(), true /* GnuStyle */);
+    DWARFDebugPubTable(getGnuPubNamesSection(), isLittleEndian(),
+                       true /* GnuStyle */)
+        .dump("debug_gnu_pubnames", OS);
 
   if (DumpType == DIDT_All || DumpType == DIDT_GnuPubtypes)
-    dumpPubSection(OS, "debug_gnu_pubtypes", getGnuPubTypesSection(),
-                   isLittleEndian(), true /* GnuStyle */);
+    DWARFDebugPubTable(getGnuPubTypesSection(), isLittleEndian(),
+                       true /* GnuStyle */)
+        .dump("debug_gnu_pubtypes", OS);
 
   if ((DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) &&
       !getStringOffsetDWOSection().empty()) {
@@ -256,6 +229,12 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH) {
     }
   }
 
+  if ((DumpType == DIDT_All || DumpType == DIDT_GdbIndex) &&
+      !getGdbIndexSection().empty()) {
+    OS << "\n.gnu_index contents:\n";
+    getGdbIndex().dump(OS);
+  }
+
   if (DumpType == DIDT_All || DumpType == DIDT_AppleNames)
     dumpAccelSection(OS, "apple_names", getAppleNamesSection(),
                      getStringSection(), isLittleEndian());
@@ -295,6 +274,16 @@ const DWARFUnitIndex &DWARFContext::getTUIndex() {
   return *TUIndex;
 }
 
+DWARFGdbIndex &DWARFContext::getGdbIndex() {
+  if (GdbIndex)
+    return *GdbIndex;
+
+  DataExtractor GdbIndexData(getGdbIndexSection(), true /*LE*/, 0);
+  GdbIndex = llvm::make_unique<DWARFGdbIndex>();
+  GdbIndex->parse(GdbIndexData);
+  return *GdbIndex;
+}
+
 const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() {
   if (Abbrev)
     return Abbrev.get();
@@ -393,16 +382,15 @@ DWARFContext::getLineTableForUnit(DWARFUnit *U) {
   if (!Line)
     Line.reset(new DWARFDebugLine(&getLineSection().Relocs));
 
-  const auto *UnitDIE = U->getUnitDIE();
-  if (UnitDIE == nullptr)
+  auto UnitDIE = U->getUnitDIE();
+  if (!UnitDIE)
     return nullptr;
 
-  unsigned stmtOffset =
-      UnitDIE->getAttributeValueAsSectionOffset(U, DW_AT_stmt_list, -1U);
-  if (stmtOffset == -1U)
+  auto Offset = UnitDIE.getAttributeValueAsSectionOffset(DW_AT_stmt_list);
+  if (!Offset)
     return nullptr; // No line table for this compile unit.
 
-  stmtOffset += U->getLineTableOffset();
+  uint32_t stmtOffset = *Offset + U->getLineTableOffset();
   // See if the line table is cached.
   if (const DWARFLineTable *lt = Line->getLineTable(stmtOffset))
     return lt;
@@ -458,14 +446,12 @@ static bool getFunctionNameForAddress(DWARFCompileUnit *CU, uint64_t Address,
     return false;
   // The address may correspond to instruction in some inlined function,
   // so we have to build the chain of inlined functions and take the
-  // name of the topmost function in it.
-  const DWARFDebugInfoEntryInlinedChain &InlinedChain =
-      CU->getInlinedChainForAddress(Address);
-  if (InlinedChain.DIEs.size() == 0)
+  // name of the topmost function in it.SmallVectorImpl<DWARFDie> &InlinedChain
+  SmallVector<DWARFDie, 4> InlinedChain;
+  CU->getInlinedChainForAddress(Address, InlinedChain);
+  if (InlinedChain.size() == 0)
     return false;
-  const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0];
-  if (const char *Name =
-          TopFunctionDIE.getSubroutineName(InlinedChain.U, Kind)) {
+  if (const char *Name = InlinedChain[0].getSubroutineName(Kind)) {
     FunctionName = Name;
     return true;
   }
@@ -540,9 +526,9 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
     return InliningInfo;
 
   const DWARFLineTable *LineTable = nullptr;
-  const DWARFDebugInfoEntryInlinedChain &InlinedChain =
-      CU->getInlinedChainForAddress(Address);
-  if (InlinedChain.DIEs.size() == 0) {
+  SmallVector<DWARFDie, 4> InlinedChain;
+  CU->getInlinedChainForAddress(Address, InlinedChain);
+  if (InlinedChain.size() == 0) {
     // If there is no DIE for address (e.g. it is in unavailable .dwo file),
     // try to at least get file/line info from symbol table.
     if (Spec.FLIKind != FileLineInfoKind::None) {
@@ -557,12 +543,11 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
   }
 
   uint32_t CallFile = 0, CallLine = 0, CallColumn = 0;
-  for (uint32_t i = 0, n = InlinedChain.DIEs.size(); i != n; i++) {
-    const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain.DIEs[i];
+  for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) {
+    DWARFDie &FunctionDIE = InlinedChain[i];
     DILineInfo Frame;
     // Get function name if necessary.
-    if (const char *Name =
-            FunctionDIE.getSubroutineName(InlinedChain.U, Spec.FNKind))
+    if (const char *Name = FunctionDIE.getSubroutineName(Spec.FNKind))
       Frame.FunctionName = Name;
     if (Spec.FLIKind != FileLineInfoKind::None) {
       if (i == 0) {
@@ -584,8 +569,7 @@ DWARFContext::getInliningInfoForAddress(uint64_t Address,
       }
       // Get call file/line/column of a current DIE.
       if (i + 1 < n) {
-        FunctionDIE.getCallerFrame(InlinedChain.U, CallFile, CallLine,
-                                   CallColumn);
+        FunctionDIE.getCallerFrame(CallFile, CallLine, CallColumn);
       }
     }
     InliningInfo.addFrame(Frame);
@@ -718,6 +702,7 @@ DWARFContextInMemory::DWARFContextInMemory(const object::ObjectFile &Obj,
             .Case("apple_objc", &AppleObjCSection.Data)
             .Case("debug_cu_index", &CUIndexSection)
             .Case("debug_tu_index", &TUIndexSection)
+            .Case("gdb_index", &GdbIndexSection)
             // Any more debug info sections go here.
             .Default(nullptr);
     if (SectionData) {
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 9b6a9a788230..32b8320e26c5 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -12,31 +12,36 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
+#include <cinttypes>
+#include <cstdint>
 #include <string>
-#include <utility>
 #include <vector>
 
 using namespace llvm;
 using namespace dwarf;
 
-
 /// \brief Abstract frame entry defining the common interface concrete
 /// entries implement.
 class llvm::FrameEntry {
 public:
   enum FrameKind {FK_CIE, FK_FDE};
+
   FrameEntry(FrameKind K, uint64_t Offset, uint64_t Length)
       : Kind(K), Offset(Offset), Length(Length) {}
 
-  virtual ~FrameEntry() {
-  }
+  virtual ~FrameEntry() = default;
 
   FrameKind getKind() const { return Kind; }
   virtual uint64_t getOffset() const { return Offset; }
@@ -95,7 +100,6 @@ protected:
   }
 };
 
-
 // See DWARF standard v3, section 7.23
 const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0;
 const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f;
@@ -194,6 +198,7 @@ void FrameEntry::parseInstructions(DataExtractor Data, uint32_t *Offset,
 }
 
 namespace {
+
 /// \brief DWARF Common Information Entry (CIE)
 class CIE : public FrameEntry {
 public:
@@ -215,14 +220,16 @@ public:
         FDEPointerEncoding(FDEPointerEncoding),
         LSDAPointerEncoding(LSDAPointerEncoding) {}
 
-  ~CIE() override {}
+  ~CIE() override = default;
 
   StringRef getAugmentationString() const { return Augmentation; }
   uint64_t getCodeAlignmentFactor() const { return CodeAlignmentFactor; }
   int64_t getDataAlignmentFactor() const { return DataAlignmentFactor; }
+
   uint32_t getFDEPointerEncoding() const {
     return FDEPointerEncoding;
   }
+
   uint32_t getLSDAPointerEncoding() const {
     return LSDAPointerEncoding;
   }
@@ -274,7 +281,6 @@ private:
   uint32_t LSDAPointerEncoding;
 };
 
-
 /// \brief DWARF Frame Description Entry (FDE)
 class FDE : public FrameEntry {
 public:
@@ -288,7 +294,7 @@ public:
         InitialLocation(InitialLocation), AddressRange(AddressRange),
         LinkedCIE(Cie) {}
 
-  ~FDE() override {}
+  ~FDE() override = default;
 
   CIE *getLinkedCIE() const { return LinkedCIE; }
 
@@ -336,7 +342,7 @@ static ArrayRef<OperandType[2]> getOperandTypes() {
   do {                                          \
     OpTypes[OP][0] = OPTYPE0;                   \
     OpTypes[OP][1] = OPTYPE1;                   \
-  } while (0)
+  } while (false)
 #define DECLARE_OP1(OP, OPTYPE0) DECLARE_OP2(OP, OPTYPE0, OT_None)
 #define DECLARE_OP0(OP) DECLARE_OP1(OP, OT_None)
 
@@ -373,6 +379,7 @@ static ArrayRef<OperandType[2]> getOperandTypes() {
 #undef DECLARE_OP0
 #undef DECLARE_OP1
 #undef DECLARE_OP2
+
   return ArrayRef<OperandType[2]>(&OpTypes[0], DW_CFA_restore+1);
 }
 
@@ -387,13 +394,15 @@ static void printOperand(raw_ostream &OS, uint8_t Opcode, unsigned OperandIdx,
   OperandType Type = OpTypes[Opcode][OperandIdx];
 
   switch (Type) {
-  case OT_Unset:
+  case OT_Unset: {
     OS << " Unsupported " << (OperandIdx ? "second" : "first") << " operand to";
-    if (const char *OpcodeName = CallFrameString(Opcode))
+    auto OpcodeName = CallFrameString(Opcode);
+    if (!OpcodeName.empty())
       OS << " " << OpcodeName;
     else
       OS << format(" Opcode %x",  Opcode);
     break;
+  }
   case OT_None:
     break;
   case OT_Address:
@@ -459,8 +468,7 @@ void FrameEntry::dumpInstructions(raw_ostream &OS) const {
 DWARFDebugFrame::DWARFDebugFrame(bool IsEH) : IsEH(IsEH) {
 }
 
-DWARFDebugFrame::~DWARFDebugFrame() {
-}
+DWARFDebugFrame::~DWARFDebugFrame() = default;
 
 static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data,
                                               uint32_t Offset, int Length) {
@@ -611,12 +619,14 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
         }
       }
 
-      auto Cie = make_unique<CIE>(StartOffset, Length, Version,
-                                  AugmentationString, AddressSize,
-                                  SegmentDescriptorSize, CodeAlignmentFactor,
-                                  DataAlignmentFactor, ReturnAddressRegister,
-                                  AugmentationData, FDEPointerEncoding,
-                                  LSDAPointerEncoding);
+      auto Cie = llvm::make_unique<CIE>(StartOffset, Length, Version,
+                                        AugmentationString, AddressSize,
+                                        SegmentDescriptorSize,
+                                        CodeAlignmentFactor,
+                                        DataAlignmentFactor,
+                                        ReturnAddressRegister,
+                                        AugmentationData, FDEPointerEncoding,
+                                        LSDAPointerEncoding);
       CIEs[StartOffset] = Cie.get();
       Entries.emplace_back(std::move(Cie));
     } else {
@@ -668,7 +678,6 @@ void DWARFDebugFrame::parse(DataExtractor Data) {
   }
 }
 
-
 void DWARFDebugFrame::dump(raw_ostream &OS) const {
   OS << "\n";
   for (const auto &Entry : Entries) {
@@ -677,4 +686,3 @@ void DWARFDebugFrame::dump(raw_ostream &OS) const {
     OS << "\n";
   }
 }
-
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
index 62d5e666aef9..9f623e4954c8 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugInfoEntry.cpp
@@ -22,170 +22,17 @@ using namespace llvm;
 using namespace dwarf;
 using namespace syntax;
 
-// Small helper to extract a DIE pointed by a reference
-// attribute. It looks up the Unit containing the DIE and calls
-// DIE.extractFast with the right unit. Returns new unit on success,
-// nullptr otherwise.
-static const DWARFUnit *findUnitAndExtractFast(DWARFDebugInfoEntryMinimal &DIE,
-                                               const DWARFUnit *Unit,
-                                               uint32_t *Offset) {
-  Unit = Unit->getUnitSection().getUnitForOffset(*Offset);
-  return (Unit && DIE.extractFast(Unit, Offset)) ? Unit : nullptr;
-}
-
-void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, DWARFUnit *u,
-                                      unsigned recurseDepth,
-                                      unsigned indent) const {
-  DataExtractor debug_info_data = u->getDebugInfoExtractor();
-  uint32_t offset = Offset;
-
-  if (debug_info_data.isValidOffset(offset)) {
-    uint32_t abbrCode = debug_info_data.getULEB128(&offset);
-    WithColor(OS, syntax::Address).get() << format("\n0x%8.8x: ", Offset);
-
-    if (abbrCode) {
-      if (AbbrevDecl) {
-          const char *tagString = TagString(getTag());
-          if (tagString)
-            WithColor(OS, syntax::Tag).get().indent(indent) << tagString;
-          else
-            WithColor(OS, syntax::Tag).get().indent(indent) <<
-              format("DW_TAG_Unknown_%x", getTag());
-
-        OS << format(" [%u] %c\n", abbrCode,
-                     AbbrevDecl->hasChildren() ? '*' : ' ');
-
-        // Dump all data in the DIE for the attributes.
-        for (const auto &AttrSpec : AbbrevDecl->attributes()) {
-          dumpAttribute(OS, u, &offset, AttrSpec.Attr, AttrSpec.Form, indent);
-        }
-
-        const DWARFDebugInfoEntryMinimal *child = getFirstChild();
-        if (recurseDepth > 0 && child) {
-          while (child) {
-            child->dump(OS, u, recurseDepth-1, indent+2);
-            child = child->getSibling();
-          }
-        }
-      } else {
-        OS << "Abbreviation code not found in 'debug_abbrev' class for code: "
-           << abbrCode << '\n';
-      }
-    } else {
-      OS.indent(indent) << "NULL\n";
-    }
-  }
-}
-
-static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) {
-  OS << " (";
-  do {
-    uint64_t Shift = countTrailingZeros(Val);
-    assert(Shift < 64 && "undefined behavior");
-    uint64_t Bit = 1ULL << Shift;
-    if (const char *PropName = ApplePropertyString(Bit))
-      OS << PropName;
-    else
-      OS << format("DW_APPLE_PROPERTY_0x%" PRIx64, Bit);
-    if (!(Val ^= Bit))
-      break;
-    OS << ", ";
-  } while (true);
-  OS << ")";
-}
-
-static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges,
-                       unsigned AddressSize, unsigned Indent) {
-  if (Ranges.empty())
-    return;
-
-  for (const auto &Range: Ranges) {
-    OS << '\n';
-    OS.indent(Indent);
-    OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")",
-                 AddressSize*2, Range.first,
-                 AddressSize*2, Range.second);
-  }
-}
-
-void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS,
-                                               DWARFUnit *u,
-                                               uint32_t *offset_ptr,
-                                               uint16_t attr, uint16_t form,
-                                               unsigned indent) const {
-  const char BaseIndent[] = "            ";
-  OS << BaseIndent;
-  OS.indent(indent+2);
-  const char *attrString = AttributeString(attr);
-  if (attrString)
-    WithColor(OS, syntax::Attribute) << attrString;
-  else
-    WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", attr);
-
-  const char *formString = FormEncodingString(form);
-  if (formString)
-    OS << " [" << formString << ']';
-  else
-    OS << format(" [DW_FORM_Unknown_%x]", form);
-
-  DWARFFormValue formValue(form);
-
-  if (!formValue.extractValue(u->getDebugInfoExtractor(), offset_ptr, u))
-    return;
-
-  OS << "\t(";
-  
-  const char *Name = nullptr;
-  std::string File;
-  auto Color = syntax::Enumerator;
-  if (attr == DW_AT_decl_file || attr == DW_AT_call_file) {
-    Color = syntax::String;
-    if (const auto *LT = u->getContext().getLineTableForUnit(u))
-      if (LT->getFileNameByIndex(
-             formValue.getAsUnsignedConstant().getValue(),
-             u->getCompilationDir(),
-             DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
-        File = '"' + File + '"';
-        Name = File.c_str();
-      }
-  } else if (Optional<uint64_t> Val = formValue.getAsUnsignedConstant())
-    Name = AttributeValueString(attr, *Val);
-
-  if (Name)
-    WithColor(OS, Color) << Name;
-  else if (attr == DW_AT_decl_line || attr == DW_AT_call_line)
-    OS << *formValue.getAsUnsignedConstant();
-  else
-    formValue.dump(OS, u);
-
-  // We have dumped the attribute raw value. For some attributes
-  // having both the raw value and the pretty-printed value is
-  // interesting. These attributes are handled below.
-  if (attr == DW_AT_specification || attr == DW_AT_abstract_origin) {
-    Optional<uint64_t> Ref = formValue.getAsReference(u);
-    if (Ref.hasValue()) {
-      uint32_t RefOffset = Ref.getValue();
-      DWARFDebugInfoEntryMinimal DIE;
-      if (const DWARFUnit *RefU = findUnitAndExtractFast(DIE, u, &RefOffset))
-        if (const char *Name = DIE.getName(RefU, DINameKind::LinkageName))
-          OS << " \"" << Name << '\"';
-    }
-  } else if (attr == DW_AT_APPLE_property_attribute) {
-    if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
-      dumpApplePropertyAttribute(OS, *OptVal);
-  } else if (attr == DW_AT_ranges) {
-    dumpRanges(OS, getAddressRanges(u), u->getAddressByteSize(),
-               sizeof(BaseIndent)+indent+4);
-  }
-
-  OS << ")\n";
-}
-
-bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
+bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U,
                                              uint32_t *OffsetPtr) {
+  DataExtractor DebugInfoData = U.getDebugInfoExtractor();
+  const uint32_t UEndOffset = U.getNextUnitOffset();
+  return extractFast(U, OffsetPtr, DebugInfoData, UEndOffset, 0);
+}
+bool DWARFDebugInfoEntry::extractFast(const DWARFUnit &U, uint32_t *OffsetPtr,
+                                      const DataExtractor &DebugInfoData,
+                                      uint32_t UEndOffset, uint32_t D) {
   Offset = *OffsetPtr;
-  DataExtractor DebugInfoData = U->getDebugInfoExtractor();
-  uint32_t UEndOffset = U->getNextUnitOffset();
+  Depth = D;
   if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset))
     return false;
   uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr);
@@ -194,267 +41,32 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U,
     AbbrevDecl = nullptr;
     return true;
   }
-  AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
+  AbbrevDecl = U.getAbbreviations()->getAbbreviationDeclaration(AbbrCode);
   if (nullptr == AbbrevDecl) {
     // Restore the original offset.
     *OffsetPtr = Offset;
     return false;
   }
-  ArrayRef<uint8_t> FixedFormSizes = DWARFFormValue::getFixedFormSizes(
-      U->getAddressByteSize(), U->getVersion());
-  assert(FixedFormSizes.size() > 0);
+  // See if all attributes in this DIE have fixed byte sizes. If so, we can
+  // just add this size to the offset to skip to the next DIE.
+  if (Optional<size_t> FixedSize = AbbrevDecl->getFixedAttributesByteSize(U)) {
+    *OffsetPtr += *FixedSize;
+    return true;
+  }
 
   // Skip all data in the .debug_info for the attributes
   for (const auto &AttrSpec : AbbrevDecl->attributes()) {
-    uint16_t Form = AttrSpec.Form;
-
-    uint8_t FixedFormSize =
-        (Form < FixedFormSizes.size()) ? FixedFormSizes[Form] : 0;
-    if (FixedFormSize)
-      *OffsetPtr += FixedFormSize;
-    else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) {
-      // Restore the original offset.
+    // Check if this attribute has a fixed byte size.
+    if (Optional<uint8_t> FixedSize = AttrSpec.getByteSize(U)) {
+      // Attribute byte size if fixed, just add the size to the offset.
+      *OffsetPtr += *FixedSize;
+    } else if (!DWARFFormValue::skipValue(AttrSpec.Form, DebugInfoData,
+                                          OffsetPtr, &U)) {
+      // We failed to skip this attribute's value, restore the original offset
+      // and return the failure status.
       *OffsetPtr = Offset;
       return false;
     }
   }
   return true;
 }
-
-bool DWARFDebugInfoEntryMinimal::isSubprogramDIE() const {
-  return getTag() == DW_TAG_subprogram;
-}
-
-bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const {
-  uint32_t Tag = getTag();
-  return Tag == DW_TAG_subprogram ||
-         Tag == DW_TAG_inlined_subroutine;
-}
-
-bool DWARFDebugInfoEntryMinimal::getAttributeValue(
-    const DWARFUnit *U, const uint16_t Attr, DWARFFormValue &FormValue) const {
-  if (!AbbrevDecl)
-    return false;
-
-  uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr);
-  if (AttrIdx == -1U)
-    return false;
-
-  DataExtractor DebugInfoData = U->getDebugInfoExtractor();
-  uint32_t DebugInfoOffset = getOffset();
-
-  // Skip the abbreviation code so we are at the data for the attributes
-  DebugInfoData.getULEB128(&DebugInfoOffset);
-
-  // Skip preceding attribute values.
-  for (uint32_t i = 0; i < AttrIdx; ++i) {
-    DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i),
-                              DebugInfoData, &DebugInfoOffset, U);
-  }
-
-  FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx));
-  return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U);
-}
-
-const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString(
-    const DWARFUnit *U, const uint16_t Attr, const char *FailValue) const {
-  DWARFFormValue FormValue;
-  if (!getAttributeValue(U, Attr, FormValue))
-    return FailValue;
-  Optional<const char *> Result = FormValue.getAsCString(U);
-  return Result.hasValue() ? Result.getValue() : FailValue;
-}
-
-uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsAddress(
-    const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
-  DWARFFormValue FormValue;
-  if (!getAttributeValue(U, Attr, FormValue))
-    return FailValue;
-  Optional<uint64_t> Result = FormValue.getAsAddress(U);
-  return Result.hasValue() ? Result.getValue() : FailValue;
-}
-
-uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsignedConstant(
-    const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
-  DWARFFormValue FormValue;
-  if (!getAttributeValue(U, Attr, FormValue))
-    return FailValue;
-  Optional<uint64_t> Result = FormValue.getAsUnsignedConstant();
-  return Result.hasValue() ? Result.getValue() : FailValue;
-}
-
-uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsReference(
-    const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
-  DWARFFormValue FormValue;
-  if (!getAttributeValue(U, Attr, FormValue))
-    return FailValue;
-  Optional<uint64_t> Result = FormValue.getAsReference(U);
-  return Result.hasValue() ? Result.getValue() : FailValue;
-}
-
-uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSectionOffset(
-    const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const {
-  DWARFFormValue FormValue;
-  if (!getAttributeValue(U, Attr, FormValue))
-    return FailValue;
-  Optional<uint64_t> Result = FormValue.getAsSectionOffset();
-  return Result.hasValue() ? Result.getValue() : FailValue;
-}
-
-uint64_t
-DWARFDebugInfoEntryMinimal::getRangesBaseAttribute(const DWARFUnit *U,
-                                                   uint64_t FailValue) const {
-  uint64_t Result =
-      getAttributeValueAsSectionOffset(U, DW_AT_ranges_base, -1ULL);
-  if (Result != -1ULL)
-    return Result;
-  return getAttributeValueAsSectionOffset(U, DW_AT_GNU_ranges_base, FailValue);
-}
-
-bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U,
-                                                 uint64_t &LowPC,
-                                                 uint64_t &HighPC) const {
-  LowPC = getAttributeValueAsAddress(U, DW_AT_low_pc, -1ULL);
-  if (LowPC == -1ULL)
-    return false;
-  HighPC = getAttributeValueAsAddress(U, DW_AT_high_pc, -1ULL);
-  if (HighPC == -1ULL) {
-    // Since DWARF4, DW_AT_high_pc may also be of class constant, in which case
-    // it represents function size.
-    HighPC = getAttributeValueAsUnsignedConstant(U, DW_AT_high_pc, -1ULL);
-    if (HighPC != -1ULL)
-      HighPC += LowPC;
-  }
-  return (HighPC != -1ULL);
-}
-
-DWARFAddressRangesVector
-DWARFDebugInfoEntryMinimal::getAddressRanges(const DWARFUnit *U) const {
-  if (isNULL())
-    return DWARFAddressRangesVector();
-  // Single range specified by low/high PC.
-  uint64_t LowPC, HighPC;
-  if (getLowAndHighPC(U, LowPC, HighPC)) {
-    return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC));
-  }
-  // Multiple ranges from .debug_ranges section.
-  uint32_t RangesOffset =
-      getAttributeValueAsSectionOffset(U, DW_AT_ranges, -1U);
-  if (RangesOffset != -1U) {
-    DWARFDebugRangeList RangeList;
-    if (U->extractRangeList(RangesOffset, RangeList))
-      return RangeList.getAbsoluteRanges(U->getBaseAddress());
-  }
-  return DWARFAddressRangesVector();
-}
-
-void DWARFDebugInfoEntryMinimal::collectChildrenAddressRanges(
-    const DWARFUnit *U, DWARFAddressRangesVector& Ranges) const {
-  if (isNULL())
-    return;
-  if (isSubprogramDIE()) {
-    const auto &DIERanges = getAddressRanges(U);
-    Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end());
-  }
-
-  const DWARFDebugInfoEntryMinimal *Child = getFirstChild();
-  while (Child) {
-    Child->collectChildrenAddressRanges(U, Ranges);
-    Child = Child->getSibling();
-  }
-}
-
-bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress(
-    const DWARFUnit *U, const uint64_t Address) const {
-  for (const auto& R : getAddressRanges(U)) {
-    if (R.first <= Address && Address < R.second)
-      return true;
-  }
-  return false;
-}
-
-const char *
-DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U,
-                                              DINameKind Kind) const {
-  if (!isSubroutineDIE())
-    return nullptr;
-  return getName(U, Kind);
-}
-
-const char *
-DWARFDebugInfoEntryMinimal::getName(const DWARFUnit *U,
-                                    DINameKind Kind) const {
-  if (Kind == DINameKind::None)
-    return nullptr;
-  // Try to get mangled name only if it was asked for.
-  if (Kind == DINameKind::LinkageName) {
-    if (const char *name =
-            getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, nullptr))
-      return name;
-    if (const char *name =
-            getAttributeValueAsString(U, DW_AT_linkage_name, nullptr))
-      return name;
-  }
-  if (const char *name = getAttributeValueAsString(U, DW_AT_name, nullptr))
-    return name;
-  // Try to get name from specification DIE.
-  uint32_t spec_ref =
-      getAttributeValueAsReference(U, DW_AT_specification, -1U);
-  if (spec_ref != -1U) {
-    DWARFDebugInfoEntryMinimal spec_die;
-    if (const DWARFUnit *RefU = findUnitAndExtractFast(spec_die, U, &spec_ref)) {
-      if (const char *name = spec_die.getName(RefU, Kind))
-        return name;
-    }
-  }
-  // Try to get name from abstract origin DIE.
-  uint32_t abs_origin_ref =
-      getAttributeValueAsReference(U, DW_AT_abstract_origin, -1U);
-  if (abs_origin_ref != -1U) {
-    DWARFDebugInfoEntryMinimal abs_origin_die;
-    if (const DWARFUnit *RefU = findUnitAndExtractFast(abs_origin_die, U,
-                                                       &abs_origin_ref)) {
-      if (const char *name = abs_origin_die.getName(RefU, Kind))
-        return name;
-    }
-  }
-  return nullptr;
-}
-
-void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFUnit *U,
-                                                uint32_t &CallFile,
-                                                uint32_t &CallLine,
-                                                uint32_t &CallColumn) const {
-  CallFile = getAttributeValueAsUnsignedConstant(U, DW_AT_call_file, 0);
-  CallLine = getAttributeValueAsUnsignedConstant(U, DW_AT_call_line, 0);
-  CallColumn = getAttributeValueAsUnsignedConstant(U, DW_AT_call_column, 0);
-}
-
-DWARFDebugInfoEntryInlinedChain
-DWARFDebugInfoEntryMinimal::getInlinedChainForAddress(
-    const DWARFUnit *U, const uint64_t Address) const {
-  DWARFDebugInfoEntryInlinedChain InlinedChain;
-  InlinedChain.U = U;
-  if (isNULL())
-    return InlinedChain;
-  for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) {
-    // Append current DIE to inlined chain only if it has correct tag
-    // (e.g. it is not a lexical block).
-    if (DIE->isSubroutineDIE()) {
-      InlinedChain.DIEs.push_back(*DIE);
-    }
-    // Try to get child which also contains provided address.
-    const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild();
-    while (Child) {
-      if (Child->addressRangeContainsAddress(U, Address)) {
-        // Assume there is only one such child.
-        break;
-      }
-      Child = Child->getSibling();
-    }
-    DIE = Child;
-  }
-  // Reverse the obtained chain to make the root of inlined chain last.
-  std::reverse(InlinedChain.DIEs.begin(), InlinedChain.DIEs.end());
-  return InlinedChain;
-}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
index 30cb83398dd4..494059461fd7 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLine.cpp
@@ -42,8 +42,8 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const {
      << format("     opcode_base: %u\n", OpcodeBase);
 
   for (uint32_t i = 0; i < StandardOpcodeLengths.size(); ++i)
-    OS << format("standard_opcode_lengths[%s] = %u\n", LNStandardString(i + 1),
-                 StandardOpcodeLengths[i]);
+    OS << format("standard_opcode_lengths[%s] = %u\n",
+                 LNStandardString(i + 1).data(), StandardOpcodeLengths[i]);
 
   if (!IncludeDirectories.empty())
     for (uint32_t i = 0; i < IncludeDirectories.size(); ++i)
@@ -624,12 +624,17 @@ bool DWARFDebugLine::LineTable::lookupAddressRange(
   return true;
 }
 
-bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
-                                                   const char *CompDir,
-                                                   FileLineInfoKind Kind,
-                                                   std::string &Result) const {
-  if (FileIndex == 0 || FileIndex > Prologue.FileNames.size() ||
-      Kind == FileLineInfoKind::None)
+bool
+DWARFDebugLine::LineTable::hasFileAtIndex(uint64_t FileIndex) const {
+  return FileIndex != 0 && FileIndex <= Prologue.FileNames.size();
+}
+
+bool
+DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex,
+                                              const char *CompDir,
+                                              FileLineInfoKind Kind,
+                                              std::string &Result) const {
+  if (Kind == FileLineInfoKind::None || !hasFileAtIndex(FileIndex))
     return false;
   const FileNameEntry &Entry = Prologue.FileNames[FileIndex - 1];
   const char *FileName = Entry.Name;
@@ -673,5 +678,6 @@ bool DWARFDebugLine::LineTable::getFileLineInfoForAddress(
     return false;
   Result.Line = Row.Line;
   Result.Column = Row.Column;
+  Result.Discriminator = Row.Discriminator;
   return true;
 }
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
index a7b46b842fee..ae5b9d70a2eb 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugLoc.cpp
@@ -82,9 +82,9 @@ void DWARFDebugLocDWO::parse(DataExtractor data) {
     Loc.Offset = Offset;
     dwarf::LocationListEntry Kind;
     while ((Kind = static_cast<dwarf::LocationListEntry>(
-                data.getU8(&Offset))) != dwarf::DW_LLE_end_of_list_entry) {
+                data.getU8(&Offset))) != dwarf::DW_LLE_end_of_list) {
 
-      if (Kind != dwarf::DW_LLE_start_length_entry) {
+      if (Kind != dwarf::DW_LLE_startx_length) {
         llvm::errs() << "error: dumping support for LLE of kind " << (int)Kind
                      << " not implemented\n";
         return;
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
new file mode 100644
index 000000000000..3c1fe93090c6
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDebugPubTable.cpp
@@ -0,0 +1,65 @@
+//===-- DWARFDebugPubTable.cpp ---------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFDebugPubTable.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::dwarf;
+
+DWARFDebugPubTable::DWARFDebugPubTable(StringRef Data, bool LittleEndian,
+                                       bool GnuStyle)
+    : GnuStyle(GnuStyle) {
+  DataExtractor PubNames(Data, LittleEndian, 0);
+  uint32_t Offset = 0;
+  while (PubNames.isValidOffset(Offset)) {
+    Sets.push_back({});
+    Set &SetData = Sets.back();
+
+    SetData.Length = PubNames.getU32(&Offset);
+    SetData.Version = PubNames.getU16(&Offset);
+    SetData.Offset = PubNames.getU32(&Offset);
+    SetData.Size = PubNames.getU32(&Offset);
+
+    while (Offset < Data.size()) {
+      uint32_t DieRef = PubNames.getU32(&Offset);
+      if (DieRef == 0)
+        break;
+      uint8_t IndexEntryValue = GnuStyle ? PubNames.getU8(&Offset) : 0;
+      const char *Name = PubNames.getCStr(&Offset);
+      SetData.Entries.push_back(
+          {DieRef, PubIndexEntryDescriptor(IndexEntryValue), Name});
+    }
+  }
+}
+
+void DWARFDebugPubTable::dump(StringRef Name, raw_ostream &OS) const {
+  OS << "\n." << Name << " contents: a\n";
+  for (const Set &S : Sets) {
+    OS << "length = " << format("0x%08x", S.Length);
+    OS << " version = " << format("0x%04x", S.Version);
+    OS << " unit_offset = " << format("0x%08x", S.Offset);
+    OS << " unit_size = " << format("0x%08x", S.Size) << '\n';
+    OS << (GnuStyle ? "Offset     Linkage  Kind     Name\n"
+                    : "Offset     Name\n");
+
+    for (const Entry &E : S.Entries) {
+      OS << format("0x%8.8x ", E.SecOffset);
+      if (GnuStyle) {
+        StringRef EntryLinkage =
+            dwarf::GDBIndexEntryLinkageString(E.Descriptor.Linkage);
+        StringRef EntryKind = dwarf::GDBIndexEntryKindString(E.Descriptor.Kind);
+        OS << format("%-8s", EntryLinkage.data()) << ' '
+           << format("%-8s", EntryKind.data()) << ' ';
+      }
+      OS << '\"' << E.Name << "\"\n";
+    }
+  }
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
new file mode 100644
index 000000000000..deec16330224
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFDie.cpp
@@ -0,0 +1,441 @@
+//===-- DWARFDie.cpp ------------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "SyntaxHighlighting.h"
+#include "llvm/DebugInfo/DWARF/DWARFCompileUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugInfoEntry.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace dwarf;
+using namespace syntax;
+
+namespace {
+ static void dumpApplePropertyAttribute(raw_ostream &OS, uint64_t Val) {
+  OS << " (";
+  do {
+    uint64_t Shift = countTrailingZeros(Val);
+    assert(Shift < 64 && "undefined behavior");
+    uint64_t Bit = 1ULL << Shift;
+    auto PropName = ApplePropertyString(Bit);
+    if (!PropName.empty())
+      OS << PropName;
+    else
+      OS << format("DW_APPLE_PROPERTY_0x%" PRIx64, Bit);
+    if (!(Val ^= Bit))
+      break;
+    OS << ", ";
+  } while (true);
+  OS << ")";
+}
+
+static void dumpRanges(raw_ostream &OS, const DWARFAddressRangesVector& Ranges,
+                       unsigned AddressSize, unsigned Indent) {
+  if (Ranges.empty())
+    return;
+  
+  for (const auto &Range: Ranges) {
+    OS << '\n';
+    OS.indent(Indent);
+    OS << format("[0x%0*" PRIx64 " - 0x%0*" PRIx64 ")",
+                 AddressSize*2, Range.first,
+                 AddressSize*2, Range.second);
+  }
+}
+
+static void dumpAttribute(raw_ostream &OS, const DWARFDie &Die,
+                          uint32_t *OffsetPtr, dwarf::Attribute Attr,
+                          dwarf::Form Form, unsigned Indent) {
+  if (!Die.isValid())
+    return;
+  const char BaseIndent[] = "            ";
+  OS << BaseIndent;
+  OS.indent(Indent+2);
+  auto attrString = AttributeString(Attr);
+  if (!attrString.empty())
+    WithColor(OS, syntax::Attribute) << attrString;
+  else
+    WithColor(OS, syntax::Attribute).get() << format("DW_AT_Unknown_%x", Attr);
+  
+  auto formString = FormEncodingString(Form);
+  if (!formString.empty())
+    OS << " [" << formString << ']';
+  else
+    OS << format(" [DW_FORM_Unknown_%x]", Form);
+  
+  DWARFUnit *U = Die.getDwarfUnit();
+  DWARFFormValue formValue(Form);
+  
+  if (!formValue.extractValue(U->getDebugInfoExtractor(), OffsetPtr, U))
+    return;
+  
+  OS << "\t(";
+  
+  StringRef Name;
+  std::string File;
+  auto Color = syntax::Enumerator;
+  if (Attr == DW_AT_decl_file || Attr == DW_AT_call_file) {
+    Color = syntax::String;
+    if (const auto *LT = U->getContext().getLineTableForUnit(U))
+      if (LT->getFileNameByIndex(formValue.getAsUnsignedConstant().getValue(), U->getCompilationDir(), DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, File)) {
+        File = '"' + File + '"';
+        Name = File;
+      }
+  } else if (Optional<uint64_t> Val = formValue.getAsUnsignedConstant())
+    Name = AttributeValueString(Attr, *Val);
+  
+  if (!Name.empty())
+    WithColor(OS, Color) << Name;
+  else if (Attr == DW_AT_decl_line || Attr == DW_AT_call_line)
+    OS << *formValue.getAsUnsignedConstant();
+  else
+    formValue.dump(OS);
+  
+  // We have dumped the attribute raw value. For some attributes
+  // having both the raw value and the pretty-printed value is
+  // interesting. These attributes are handled below.
+  if (Attr == DW_AT_specification || Attr == DW_AT_abstract_origin) {
+    if (const char *Name = Die.getAttributeValueAsReferencedDie(Attr).getName(DINameKind::LinkageName))
+        OS << " \"" << Name << '\"';
+  } else if (Attr == DW_AT_APPLE_property_attribute) {
+    if (Optional<uint64_t> OptVal = formValue.getAsUnsignedConstant())
+      dumpApplePropertyAttribute(OS, *OptVal);
+  } else if (Attr == DW_AT_ranges) {
+    dumpRanges(OS, Die.getAddressRanges(), U->getAddressByteSize(),
+               sizeof(BaseIndent)+Indent+4);
+  }
+  
+  OS << ")\n";
+}
+
+} // end anonymous namespace
+
+bool DWARFDie::isSubprogramDIE() const {
+  return getTag() == DW_TAG_subprogram;
+}
+
+bool DWARFDie::isSubroutineDIE() const {
+  auto Tag = getTag();
+  return Tag == DW_TAG_subprogram || Tag == DW_TAG_inlined_subroutine;
+}
+
+Optional<DWARFFormValue>
+DWARFDie::getAttributeValue(dwarf::Attribute Attr) const {
+  if (!isValid())
+    return None;
+  auto AbbrevDecl = getAbbreviationDeclarationPtr();
+  if (AbbrevDecl)
+    return AbbrevDecl->getAttributeValue(getOffset(), Attr, *U);
+  return None;
+}
+
+const char *DWARFDie::getAttributeValueAsString(dwarf::Attribute Attr,
+                                                const char *FailValue) const {
+  auto FormValue = getAttributeValue(Attr);
+  if (!FormValue)
+    return FailValue;
+  Optional<const char *> Result = FormValue->getAsCString();
+  return Result.hasValue() ? Result.getValue() : FailValue;
+}
+
+uint64_t DWARFDie::getAttributeValueAsAddress(dwarf::Attribute Attr,
+                                              uint64_t FailValue) const {
+  if (auto Value = getAttributeValueAsAddress(Attr))
+    return *Value;
+  return FailValue;
+}
+
+Optional<uint64_t>
+DWARFDie::getAttributeValueAsAddress(dwarf::Attribute Attr) const {
+  if (auto FormValue = getAttributeValue(Attr))
+    return FormValue->getAsAddress();
+  return None;
+}
+
+int64_t DWARFDie::getAttributeValueAsSignedConstant(dwarf::Attribute Attr,
+                                                    int64_t FailValue) const {
+  if (auto Value = getAttributeValueAsSignedConstant(Attr))
+    return *Value;
+  return FailValue;
+}
+
+Optional<int64_t>
+DWARFDie::getAttributeValueAsSignedConstant(dwarf::Attribute Attr) const {
+  if (auto FormValue = getAttributeValue(Attr))
+    return FormValue->getAsSignedConstant();
+  return None;
+}
+
+uint64_t
+DWARFDie::getAttributeValueAsUnsignedConstant(dwarf::Attribute Attr,
+                                              uint64_t FailValue) const {
+  if (auto Value = getAttributeValueAsUnsignedConstant(Attr))
+    return *Value;
+  return FailValue;
+}
+
+
+Optional<uint64_t>
+DWARFDie::getAttributeValueAsUnsignedConstant(dwarf::Attribute Attr) const {
+  if (auto FormValue = getAttributeValue(Attr))
+    return FormValue->getAsUnsignedConstant();
+  return None;
+}
+
+uint64_t DWARFDie::getAttributeValueAsReference(dwarf::Attribute Attr,
+                                                uint64_t FailValue) const {
+  if (auto Value = getAttributeValueAsReference(Attr))
+    return *Value;
+  return FailValue;
+}
+
+
+Optional<uint64_t>
+DWARFDie::getAttributeValueAsReference(dwarf::Attribute Attr) const {
+  if (auto FormValue = getAttributeValue(Attr))
+    return FormValue->getAsReference();
+  return None;
+}
+
+uint64_t DWARFDie::getAttributeValueAsSectionOffset(dwarf::Attribute Attr,
+                                                    uint64_t FailValue) const {
+  if (auto Value = getAttributeValueAsSectionOffset(Attr))
+    return *Value;
+  return FailValue;
+}
+
+Optional<uint64_t>
+DWARFDie::getAttributeValueAsSectionOffset(dwarf::Attribute Attr) const {
+  if (auto FormValue = getAttributeValue(Attr))
+    return FormValue->getAsSectionOffset();
+  return None;
+}
+
+
+DWARFDie
+DWARFDie::getAttributeValueAsReferencedDie(dwarf::Attribute Attr) const {
+  auto SpecRef = getAttributeValueAsReference(Attr);
+  if (SpecRef) {
+    auto SpecUnit = U->getUnitSection().getUnitForOffset(*SpecRef);
+    if (SpecUnit)
+      return SpecUnit->getDIEForOffset(*SpecRef);
+  }
+  return DWARFDie();
+}
+
+Optional<uint64_t>
+DWARFDie::getRangesBaseAttribute() const {
+  auto Result = getAttributeValueAsSectionOffset(DW_AT_rnglists_base);
+  if (Result)
+    return Result;
+  return getAttributeValueAsSectionOffset(DW_AT_GNU_ranges_base);
+}
+
+Optional<uint64_t> DWARFDie::getHighPC(uint64_t LowPC) const {
+  if (auto FormValue = getAttributeValue(DW_AT_high_pc)) {
+    if (auto Address = FormValue->getAsAddress()) {
+      // High PC is an address.
+      return Address;
+    }
+    if (auto Offset = FormValue->getAsUnsignedConstant()) {
+      // High PC is an offset from LowPC.
+      return LowPC + *Offset;
+    }
+  }
+  return None;
+}
+
+bool DWARFDie::getLowAndHighPC(uint64_t &LowPC, uint64_t &HighPC) const {
+  auto LowPcAddr = getAttributeValueAsAddress(DW_AT_low_pc);
+  if (!LowPcAddr)
+    return false;
+  if (auto HighPcAddr = getHighPC(*LowPcAddr)) {
+    LowPC = *LowPcAddr;
+    HighPC = *HighPcAddr;
+    return true;
+  }
+  return false;
+}
+
+DWARFAddressRangesVector
+DWARFDie::getAddressRanges() const {
+  if (isNULL())
+    return DWARFAddressRangesVector();
+  // Single range specified by low/high PC.
+  uint64_t LowPC, HighPC;
+  if (getLowAndHighPC(LowPC, HighPC)) {
+    return DWARFAddressRangesVector(1, std::make_pair(LowPC, HighPC));
+  }
+  // Multiple ranges from .debug_ranges section.
+  auto RangesOffset = getAttributeValueAsSectionOffset(DW_AT_ranges);
+  if (RangesOffset) {
+    DWARFDebugRangeList RangeList;
+    if (U->extractRangeList(*RangesOffset, RangeList))
+      return RangeList.getAbsoluteRanges(U->getBaseAddress());
+  }
+  return DWARFAddressRangesVector();
+}
+
+void
+DWARFDie::collectChildrenAddressRanges(DWARFAddressRangesVector& Ranges) const {
+  if (isNULL())
+    return;
+  if (isSubprogramDIE()) {
+    const auto &DIERanges = getAddressRanges();
+    Ranges.insert(Ranges.end(), DIERanges.begin(), DIERanges.end());
+  }
+
+  DWARFDie Child = getFirstChild();
+  while (Child) {
+    Child.collectChildrenAddressRanges(Ranges);
+    Child = Child.getSibling();
+  }
+}
+
+bool DWARFDie::addressRangeContainsAddress(const uint64_t Address) const {
+  for (const auto& R : getAddressRanges()) {
+    if (R.first <= Address && Address < R.second)
+      return true;
+  }
+  return false;
+}
+
+const char *
+DWARFDie::getSubroutineName(DINameKind Kind) const {
+  if (!isSubroutineDIE())
+    return nullptr;
+  return getName(Kind);
+}
+
+const char *
+DWARFDie::getName(DINameKind Kind) const {
+  if (!isValid() || Kind == DINameKind::None)
+    return nullptr;
+  const char *name = nullptr;
+  // Try to get mangled name only if it was asked for.
+  if (Kind == DINameKind::LinkageName) {
+    if ((name = getAttributeValueAsString(DW_AT_MIPS_linkage_name, nullptr)))
+      return name;
+    if ((name = getAttributeValueAsString(DW_AT_linkage_name, nullptr)))
+      return name;
+  }
+  if ((name = getAttributeValueAsString(DW_AT_name, nullptr)))
+    return name;
+  // Try to get name from specification DIE.
+  DWARFDie SpecDie = getAttributeValueAsReferencedDie(DW_AT_specification);
+  if (SpecDie && (name = SpecDie.getName(Kind)))
+    return name;
+  // Try to get name from abstract origin DIE.
+  DWARFDie AbsDie = getAttributeValueAsReferencedDie(DW_AT_abstract_origin);
+  if (AbsDie && (name = AbsDie.getName(Kind)))
+    return name;
+  return nullptr;
+}
+
+void DWARFDie::getCallerFrame(uint32_t &CallFile, uint32_t &CallLine,
+                              uint32_t &CallColumn) const {
+  CallFile = getAttributeValueAsUnsignedConstant(DW_AT_call_file, 0);
+  CallLine = getAttributeValueAsUnsignedConstant(DW_AT_call_line, 0);
+  CallColumn = getAttributeValueAsUnsignedConstant(DW_AT_call_column, 0);
+}
+
+void DWARFDie::dump(raw_ostream &OS, unsigned RecurseDepth,
+                    unsigned Indent) const {
+  if (!isValid())
+    return;
+  DataExtractor debug_info_data = U->getDebugInfoExtractor();
+  const uint32_t Offset = getOffset();
+  uint32_t offset = Offset;
+  
+  if (debug_info_data.isValidOffset(offset)) {
+    uint32_t abbrCode = debug_info_data.getULEB128(&offset);
+    WithColor(OS, syntax::Address).get() << format("\n0x%8.8x: ", Offset);
+    
+    if (abbrCode) {
+      auto AbbrevDecl = getAbbreviationDeclarationPtr();
+      if (AbbrevDecl) {
+        auto tagString = TagString(getTag());
+        if (!tagString.empty())
+          WithColor(OS, syntax::Tag).get().indent(Indent) << tagString;
+        else
+          WithColor(OS, syntax::Tag).get().indent(Indent)
+          << format("DW_TAG_Unknown_%x", getTag());
+        
+        OS << format(" [%u] %c\n", abbrCode,
+                     AbbrevDecl->hasChildren() ? '*' : ' ');
+        
+        // Dump all data in the DIE for the attributes.
+        for (const auto &AttrSpec : AbbrevDecl->attributes()) {
+          dumpAttribute(OS, *this, &offset, AttrSpec.Attr, AttrSpec.Form,
+                        Indent);
+        }
+        
+        DWARFDie child = getFirstChild();
+        if (RecurseDepth > 0 && child) {
+          while (child) {
+            child.dump(OS, RecurseDepth-1, Indent+2);
+            child = child.getSibling();
+          }
+        }
+      } else {
+        OS << "Abbreviation code not found in 'debug_abbrev' class for code: "
+        << abbrCode << '\n';
+      }
+    } else {
+      OS.indent(Indent) << "NULL\n";
+    }
+  }
+}
+
+
+void DWARFDie::getInlinedChainForAddress(
+    const uint64_t Address, SmallVectorImpl<DWARFDie> &InlinedChain) const {
+  if (isNULL())
+    return;
+  DWARFDie DIE(*this);
+  while (DIE) {
+    // Append current DIE to inlined chain only if it has correct tag
+    // (e.g. it is not a lexical block).
+    if (DIE.isSubroutineDIE())
+      InlinedChain.push_back(DIE);
+
+    // Try to get child which also contains provided address.
+    DWARFDie Child = DIE.getFirstChild();
+    while (Child) {
+      if (Child.addressRangeContainsAddress(Address)) {
+        // Assume there is only one such child.
+        break;
+      }
+      Child = Child.getSibling();
+    }
+    DIE = Child;
+  }
+  // Reverse the obtained chain to make the root of inlined chain last.
+  std::reverse(InlinedChain.begin(), InlinedChain.end());
+}
+
+DWARFDie DWARFDie::getParent() const {
+  if (isValid())
+    return U->getParent(Die);
+  return DWARFDie();
+}
+
+DWARFDie DWARFDie::getSibling() const {
+  if (isValid())
+    return U->getSibling(Die);
+  return DWARFDie();
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
index 3dc58423df68..e48a6f0981b7 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFFormValue.cpp
@@ -23,60 +23,6 @@ using namespace llvm;
 using namespace dwarf;
 using namespace syntax;
 
-namespace {
-uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) {
-  // FIXME: Support DWARF64.
-  return (Version == 2) ? AddrSize : 4;
-}
-
-template <uint8_t AddrSize, uint8_t RefAddrSize>
-ArrayRef<uint8_t> makeFixedFormSizesArrayRef() {
-  static const uint8_t sizes[] = {
-    0,           // 0x00 unused
-    AddrSize,    // 0x01 DW_FORM_addr
-    0,           // 0x02 unused
-    0,           // 0x03 DW_FORM_block2
-    0,           // 0x04 DW_FORM_block4
-    2,           // 0x05 DW_FORM_data2
-    4,           // 0x06 DW_FORM_data4
-    8,           // 0x07 DW_FORM_data8
-    0,           // 0x08 DW_FORM_string
-    0,           // 0x09 DW_FORM_block
-    0,           // 0x0a DW_FORM_block1
-    1,           // 0x0b DW_FORM_data1
-    1,           // 0x0c DW_FORM_flag
-    0,           // 0x0d DW_FORM_sdata
-    4,           // 0x0e DW_FORM_strp
-    0,           // 0x0f DW_FORM_udata
-    RefAddrSize, // 0x10 DW_FORM_ref_addr
-    1,           // 0x11 DW_FORM_ref1
-    2,           // 0x12 DW_FORM_ref2
-    4,           // 0x13 DW_FORM_ref4
-    8,           // 0x14 DW_FORM_ref8
-    0,           // 0x15 DW_FORM_ref_udata
-    0,           // 0x16 DW_FORM_indirect
-    4,           // 0x17 DW_FORM_sec_offset
-    0,           // 0x18 DW_FORM_exprloc
-    0,           // 0x19 DW_FORM_flag_present
-  };
-  return makeArrayRef(sizes);
-}
-}
-
-ArrayRef<uint8_t> DWARFFormValue::getFixedFormSizes(uint8_t AddrSize,
-                                                    uint16_t Version) {
-  uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version);
-  if (AddrSize == 4 && RefAddrSize == 4)
-    return makeFixedFormSizesArrayRef<4, 4>();
-  if (AddrSize == 4 && RefAddrSize == 8)
-    return makeFixedFormSizesArrayRef<4, 8>();
-  if (AddrSize == 8 && RefAddrSize == 4)
-    return makeFixedFormSizesArrayRef<8, 4>();
-  if (AddrSize == 8 && RefAddrSize == 8)
-    return makeFixedFormSizesArrayRef<8, 8>();
-  return None;
-}
-
 static const DWARFFormValue::FormClass DWARF4FormClasses[] = {
   DWARFFormValue::FC_Unknown,       // 0x0
   DWARFFormValue::FC_Address,       // 0x01 DW_FORM_addr
@@ -108,6 +54,217 @@ static const DWARFFormValue::FormClass DWARF4FormClasses[] = {
   DWARFFormValue::FC_Flag,          // 0x19 DW_FORM_flag_present
 };
 
+namespace {
+
+/// A helper class that can be used in DWARFFormValue.cpp functions that need
+/// to know the byte size of DW_FORM values that vary in size depending on the
+/// DWARF version, address byte size, or DWARF32 or DWARF64.
+class FormSizeHelper {
+  uint16_t Version;
+  uint8_t AddrSize;
+  llvm::dwarf::DwarfFormat Format;
+
+public:
+  FormSizeHelper(uint16_t V, uint8_t A, llvm::dwarf::DwarfFormat F)
+  : Version(V), AddrSize(A), Format(F) {}
+  uint8_t getAddressByteSize() const { return AddrSize; }
+  uint8_t getRefAddrByteSize() const {
+    if (Version == 2)
+      return AddrSize;
+    return getDwarfOffsetByteSize();
+  }
+  uint8_t getDwarfOffsetByteSize() const {
+    switch (Format) {
+      case dwarf::DwarfFormat::DWARF32:
+        return 4;
+      case dwarf::DwarfFormat::DWARF64:
+        return 8;
+    }
+    llvm_unreachable("Invalid Format value");
+  }
+};
+
+} // end anonymous namespace
+
+template <class T>
+static Optional<uint8_t> getFixedByteSize(dwarf::Form Form, const T *U) {
+  switch (Form) {
+    case DW_FORM_addr:
+      if (U)
+        return U->getAddressByteSize();
+      return None;
+
+    case DW_FORM_block:          // ULEB128 length L followed by L bytes.
+    case DW_FORM_block1:         // 1 byte length L followed by L bytes.
+    case DW_FORM_block2:         // 2 byte length L followed by L bytes.
+    case DW_FORM_block4:         // 4 byte length L followed by L bytes.
+    case DW_FORM_string:         // C-string with null terminator.
+    case DW_FORM_sdata:          // SLEB128.
+    case DW_FORM_udata:          // ULEB128.
+    case DW_FORM_ref_udata:      // ULEB128.
+    case DW_FORM_indirect:       // ULEB128.
+    case DW_FORM_exprloc:        // ULEB128 length L followed by L bytes.
+    case DW_FORM_strx:           // ULEB128.
+    case DW_FORM_addrx:          // ULEB128.
+    case DW_FORM_loclistx:       // ULEB128.
+    case DW_FORM_rnglistx:       // ULEB128.
+    case DW_FORM_GNU_addr_index: // ULEB128.
+    case DW_FORM_GNU_str_index:  // ULEB128.
+      return None;
+
+    case DW_FORM_ref_addr:
+      if (U)
+        return U->getRefAddrByteSize();
+      return None;
+
+    case DW_FORM_flag:
+    case DW_FORM_data1:
+    case DW_FORM_ref1:
+      return 1;
+
+    case DW_FORM_data2:
+    case DW_FORM_ref2:
+      return 2;
+
+    case DW_FORM_data4:
+    case DW_FORM_ref4:
+      return 4;
+
+    case DW_FORM_strp:
+    case DW_FORM_GNU_ref_alt:
+    case DW_FORM_GNU_strp_alt:
+    case DW_FORM_line_strp:
+    case DW_FORM_sec_offset:
+    case DW_FORM_strp_sup:
+    case DW_FORM_ref_sup:
+      if (U)
+        return U->getDwarfOffsetByteSize();
+      return None;
+
+    case DW_FORM_data8:
+    case DW_FORM_ref8:
+    case DW_FORM_ref_sig8:
+      return 8;
+
+    case DW_FORM_flag_present:
+      return 0;
+
+    case DW_FORM_data16:
+      return 16;
+
+    case DW_FORM_implicit_const:
+      // The implicit value is stored in the abbreviation as a ULEB128, any
+      // there no data in debug info.
+      return 0;
+
+    default:
+      llvm_unreachable("Handle this form in this switch statement");
+  }
+  return None;
+}
+
+template <class T>
+static bool skipFormValue(dwarf::Form Form, const DataExtractor &DebugInfoData,
+                          uint32_t *OffsetPtr, const T *U) {
+  bool Indirect = false;
+  do {
+    switch (Form) {
+        // Blocks of inlined data that have a length field and the data bytes
+        // inlined in the .debug_info.
+      case DW_FORM_exprloc:
+      case DW_FORM_block: {
+        uint64_t size = DebugInfoData.getULEB128(OffsetPtr);
+        *OffsetPtr += size;
+        return true;
+      }
+      case DW_FORM_block1: {
+        uint8_t size = DebugInfoData.getU8(OffsetPtr);
+        *OffsetPtr += size;
+        return true;
+      }
+      case DW_FORM_block2: {
+        uint16_t size = DebugInfoData.getU16(OffsetPtr);
+        *OffsetPtr += size;
+        return true;
+      }
+      case DW_FORM_block4: {
+        uint32_t size = DebugInfoData.getU32(OffsetPtr);
+        *OffsetPtr += size;
+        return true;
+      }
+
+        // Inlined NULL terminated C-strings.
+      case DW_FORM_string:
+        DebugInfoData.getCStr(OffsetPtr);
+        return true;
+
+      case DW_FORM_addr:
+      case DW_FORM_ref_addr:
+      case DW_FORM_flag_present:
+      case DW_FORM_data1:
+      case DW_FORM_data2:
+      case DW_FORM_data4:
+      case DW_FORM_data8:
+      case DW_FORM_flag:
+      case DW_FORM_ref1:
+      case DW_FORM_ref2:
+      case DW_FORM_ref4:
+      case DW_FORM_ref8:
+      case DW_FORM_ref_sig8:
+      case DW_FORM_ref_sup:
+      case DW_FORM_sec_offset:
+      case DW_FORM_strp:
+      case DW_FORM_strp_sup:
+      case DW_FORM_line_strp:
+      case DW_FORM_GNU_ref_alt:
+      case DW_FORM_GNU_strp_alt:
+        if (Optional<uint8_t> FixedSize = ::getFixedByteSize(Form, U)) {
+          *OffsetPtr += *FixedSize;
+          return true;
+        }
+        return false;
+
+        // signed or unsigned LEB 128 values.
+      case DW_FORM_sdata:
+        DebugInfoData.getSLEB128(OffsetPtr);
+        return true;
+
+      case DW_FORM_udata:
+      case DW_FORM_ref_udata:
+      case DW_FORM_strx:
+      case DW_FORM_addrx:
+      case DW_FORM_loclistx:
+      case DW_FORM_rnglistx:
+      case DW_FORM_GNU_addr_index:
+      case DW_FORM_GNU_str_index:
+        DebugInfoData.getULEB128(OffsetPtr);
+        return true;
+        
+      case DW_FORM_indirect:
+        Indirect = true;
+        Form = static_cast<dwarf::Form>(DebugInfoData.getULEB128(OffsetPtr));
+        break;
+        
+      default:
+        return false;
+    }
+  } while (Indirect);
+  return true;
+}
+
+Optional<uint8_t> DWARFFormValue::getFixedByteSize(dwarf::Form Form,
+                                                   const DWARFUnit *U) {
+  return ::getFixedByteSize(Form, U);
+}
+
+Optional<uint8_t>
+DWARFFormValue::getFixedByteSize(dwarf::Form Form, uint16_t Version,
+                                 uint8_t AddrSize,
+                                 llvm::dwarf::DwarfFormat Format) {
+  FormSizeHelper FSH(Version, AddrSize, Format);
+  return ::getFixedByteSize(Form, &FSH);
+}
+
 bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
   // First, check DWARF4 form classes.
   if (Form < makeArrayRef(DWARF4FormClasses).size() &&
@@ -123,6 +280,8 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
   case DW_FORM_GNU_str_index:
   case DW_FORM_GNU_strp_alt:
     return (FC == FC_String);
+  default:
+    break;
   }
   // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset.
   // Don't check for DWARF version here, as some producers may still do this
@@ -131,8 +290,10 @@ bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const {
          FC == FC_SectionOffset;
 }
 
-bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
+bool DWARFFormValue::extractValue(const DataExtractor &data, 
+                                  uint32_t *offset_ptr,
                                   const DWARFUnit *cu) {
+  U = cu;
   bool indirect = false;
   bool is_block = false;
   Value.data = nullptr;
@@ -143,16 +304,15 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     switch (Form) {
     case DW_FORM_addr:
     case DW_FORM_ref_addr: {
-      if (!cu)
+      if (!U)
         return false;
       uint16_t AddrSize =
           (Form == DW_FORM_addr)
-              ? cu->getAddressByteSize()
-              : getRefAddrSize(cu->getAddressByteSize(), cu->getVersion());
-      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr);
-      if (AI != cu->getRelocMap()->end()) {
-        const std::pair<uint8_t, int64_t> &R = AI->second;
-        Value.uval = data.getUnsigned(offset_ptr, AddrSize) + R.second;
+              ? U->getAddressByteSize()
+              : U->getRefAddrByteSize();
+      RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
+      if (AI != U->getRelocMap()->end()) {
+        Value.uval = data.getUnsigned(offset_ptr, AddrSize) + AI->second.second;
       } else
         Value.uval = data.getUnsigned(offset_ptr, AddrSize);
       break;
@@ -186,10 +346,10 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
     case DW_FORM_data4:
     case DW_FORM_ref4: {
       Value.uval = data.getU32(offset_ptr);
-      if (!cu)
+      if (!U)
         break;
-      RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr-4);
-      if (AI != cu->getRelocMap()->end())
+      RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr-4);
+      if (AI != U->getRelocMap()->end())
         Value.uval += AI->second.second;
       break;
     }
@@ -208,20 +368,22 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
       Value.cstr = data.getCStr(offset_ptr);
       break;
     case DW_FORM_indirect:
-      Form = data.getULEB128(offset_ptr);
+      Form = static_cast<dwarf::Form>(data.getULEB128(offset_ptr));
       indirect = true;
       break;
-    case DW_FORM_sec_offset:
     case DW_FORM_strp:
+    case DW_FORM_sec_offset:
     case DW_FORM_GNU_ref_alt:
-    case DW_FORM_GNU_strp_alt: {
-      // FIXME: This is 64-bit for DWARF64.
-      Value.uval = data.getU32(offset_ptr);
-      if (!cu)
-        break;
-      RelocAddrMap::const_iterator AI =
-          cu->getRelocMap()->find(*offset_ptr - 4);
-      if (AI != cu->getRelocMap()->end())
+    case DW_FORM_GNU_strp_alt:
+    case DW_FORM_line_strp:
+    case DW_FORM_strp_sup:
+    case DW_FORM_ref_sup: {
+      if (!U)
+        return false;
+      RelocAddrMap::const_iterator AI = U->getRelocMap()->find(*offset_ptr);
+      uint8_t Size = U->getDwarfOffsetByteSize();
+      Value.uval = data.getUnsigned(offset_ptr, Size);
+      if (AI != U->getRelocMap()->end())
         Value.uval += AI->second.second;
       break;
     }
@@ -252,123 +414,26 @@ bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr,
   return true;
 }
 
-bool
-DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr,
-                          const DWARFUnit *cu) const {
-  return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, cu);
+bool DWARFFormValue::skipValue(DataExtractor DebugInfoData,
+                               uint32_t *offset_ptr, const DWARFUnit *U) const {
+  return DWARFFormValue::skipValue(Form, DebugInfoData, offset_ptr, U);
 }
 
-bool
-DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
-                          uint32_t *offset_ptr, const DWARFUnit *cu) {
-  return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(),
-                   cu->getAddressByteSize());
+bool DWARFFormValue::skipValue(dwarf::Form form, DataExtractor DebugInfoData,
+                               uint32_t *offset_ptr, const DWARFUnit *U) {
+  return skipFormValue(form, DebugInfoData, offset_ptr, U);
 }
-bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data,
-                               uint32_t *offset_ptr, uint16_t Version,
-                               uint8_t AddrSize) {
-  bool indirect = false;
-  do {
-    switch (form) {
-    // Blocks if inlined data that have a length field and the data bytes
-    // inlined in the .debug_info
-    case DW_FORM_exprloc:
-    case DW_FORM_block: {
-      uint64_t size = debug_info_data.getULEB128(offset_ptr);
-      *offset_ptr += size;
-      return true;
-    }
-    case DW_FORM_block1: {
-      uint8_t size = debug_info_data.getU8(offset_ptr);
-      *offset_ptr += size;
-      return true;
-    }
-    case DW_FORM_block2: {
-      uint16_t size = debug_info_data.getU16(offset_ptr);
-      *offset_ptr += size;
-      return true;
-    }
-    case DW_FORM_block4: {
-      uint32_t size = debug_info_data.getU32(offset_ptr);
-      *offset_ptr += size;
-      return true;
-    }
-
-    // Inlined NULL terminated C-strings
-    case DW_FORM_string:
-      debug_info_data.getCStr(offset_ptr);
-      return true;
-
-    // Compile unit address sized values
-    case DW_FORM_addr:
-      *offset_ptr += AddrSize;
-      return true;
-    case DW_FORM_ref_addr:
-      *offset_ptr += getRefAddrSize(AddrSize, Version);
-      return true;
-
-    // 0 byte values - implied from the form.
-    case DW_FORM_flag_present:
-      return true;
-
-    // 1 byte values
-    case DW_FORM_data1:
-    case DW_FORM_flag:
-    case DW_FORM_ref1:
-      *offset_ptr += 1;
-      return true;
-
-    // 2 byte values
-    case DW_FORM_data2:
-    case DW_FORM_ref2:
-      *offset_ptr += 2;
-      return true;
-
-    // 4 byte values
-    case DW_FORM_data4:
-    case DW_FORM_ref4:
-      *offset_ptr += 4;
-      return true;
 
-    // 8 byte values
-    case DW_FORM_data8:
-    case DW_FORM_ref8:
-    case DW_FORM_ref_sig8:
-      *offset_ptr += 8;
-      return true;
-
-    // signed or unsigned LEB 128 values
-    //  case DW_FORM_APPLE_db_str:
-    case DW_FORM_sdata:
-    case DW_FORM_udata:
-    case DW_FORM_ref_udata:
-    case DW_FORM_GNU_str_index:
-    case DW_FORM_GNU_addr_index:
-      debug_info_data.getULEB128(offset_ptr);
-      return true;
-
-    case DW_FORM_indirect:
-      indirect = true;
-      form = debug_info_data.getULEB128(offset_ptr);
-      break;
-
-    // FIXME: 4 for DWARF32, 8 for DWARF64.
-    case DW_FORM_sec_offset:
-    case DW_FORM_strp:
-    case DW_FORM_GNU_ref_alt:
-    case DW_FORM_GNU_strp_alt:
-      *offset_ptr += 4;
-      return true;
-
-    default:
-      return false;
-    }
-  } while (indirect);
-  return true;
+bool DWARFFormValue::skipValue(dwarf::Form form, DataExtractor DebugInfoData,
+                               uint32_t *offset_ptr, uint16_t Version,
+                               uint8_t AddrSize,
+                               llvm::dwarf::DwarfFormat Format) {
+  FormSizeHelper FSH(Version, AddrSize, Format);
+  return skipFormValue(form, DebugInfoData, offset_ptr, &FSH);
 }
 
 void
-DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
+DWARFFormValue::dump(raw_ostream &OS) const {
   uint64_t uvalue = Value.uval;
   bool cu_relative_offset = false;
 
@@ -377,7 +442,9 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
   case DW_FORM_GNU_addr_index: {
     OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue);
     uint64_t Address;
-    if (cu->getAddrOffsetSectionItem(uvalue, Address))
+    if (U == nullptr)
+      OS << "<invalid dwarf unit>";
+    else if (U->getAddrOffsetSectionItem(uvalue, Address))
       OS << format("0x%016" PRIx64, Address);
     else
       OS << "<no .debug_addr section>";
@@ -428,17 +495,17 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
   case DW_FORM_udata:     OS << Value.uval; break;
   case DW_FORM_strp: {
     OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue);
-    dumpString(OS, cu);
+    dumpString(OS);
     break;
   }
   case DW_FORM_GNU_str_index: {
     OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue);
-    dumpString(OS, cu);
+    dumpString(OS);
     break;
   }
   case DW_FORM_GNU_strp_alt: {
     OS << format("alt indirect string, offset: 0x%" PRIx64 "", uvalue);
-    dumpString(OS, cu);
+    dumpString(OS);
     break;
   }
   case DW_FORM_ref_addr:
@@ -487,13 +554,13 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const {
   if (cu_relative_offset) {
     OS << " => {";
     WithColor(OS, syntax::Address).get()
-      << format("0x%8.8" PRIx64, uvalue + (cu ? cu->getOffset() : 0));
+      << format("0x%8.8" PRIx64, uvalue + (U ? U->getOffset() : 0));
     OS << "}";
   }
 }
 
-void DWARFFormValue::dumpString(raw_ostream &OS, const DWARFUnit *U) const {
-  Optional<const char *> DbgStr = getAsCString(U);
+void DWARFFormValue::dumpString(raw_ostream &OS) const {
+  Optional<const char *> DbgStr = getAsCString();
   if (DbgStr.hasValue()) {
     raw_ostream &COS = WithColor(OS, syntax::String);
     COS << '"';
@@ -502,7 +569,7 @@ void DWARFFormValue::dumpString(raw_ostream &OS, const DWARFUnit *U) const {
   }
 }
 
-Optional<const char *> DWARFFormValue::getAsCString(const DWARFUnit *U) const {
+Optional<const char *> DWARFFormValue::getAsCString() const {
   if (!isFormClass(FC_String))
     return None;
   if (Form == DW_FORM_string)
@@ -523,7 +590,7 @@ Optional<const char *> DWARFFormValue::getAsCString(const DWARFUnit *U) const {
   return None;
 }
 
-Optional<uint64_t> DWARFFormValue::getAsAddress(const DWARFUnit *U) const {
+Optional<uint64_t> DWARFFormValue::getAsAddress() const {
   if (!isFormClass(FC_Address))
     return None;
   if (Form == DW_FORM_GNU_addr_index) {
@@ -536,7 +603,7 @@ Optional<uint64_t> DWARFFormValue::getAsAddress(const DWARFUnit *U) const {
   return Value.uval;
 }
 
-Optional<uint64_t> DWARFFormValue::getAsReference(const DWARFUnit *U) const {
+Optional<uint64_t> DWARFFormValue::getAsReference() const {
   if (!isFormClass(FC_Reference))
     return None;
   switch (Form) {
@@ -549,8 +616,9 @@ Optional<uint64_t> DWARFFormValue::getAsReference(const DWARFUnit *U) const {
       return None;
     return Value.uval + U->getOffset();
   case DW_FORM_ref_addr:
+  case DW_FORM_ref_sig8:
+  case DW_FORM_GNU_ref_alt:
     return Value.uval;
-  // FIXME: Add proper support for DW_FORM_ref_sig8 and DW_FORM_GNU_ref_alt.
   default:
     return None;
   }
@@ -593,3 +661,15 @@ Optional<ArrayRef<uint8_t>> DWARFFormValue::getAsBlock() const {
   return makeArrayRef(Value.data, Value.uval);
 }
 
+Optional<uint64_t> DWARFFormValue::getAsCStringOffset() const {
+  if (!isFormClass(FC_String) && Form == DW_FORM_string)
+    return None;
+  return Value.uval;
+}
+
+Optional<uint64_t> DWARFFormValue::getAsReferenceUVal() const {
+  if (!isFormClass(FC_Reference))
+    return None;
+  return Value.uval;
+}
+
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
new file mode 100644
index 000000000000..ebb996162f1b
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFGdbIndex.cpp
@@ -0,0 +1,176 @@
+//===-- DWARFGdbIndex.cpp -------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFGdbIndex.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+// .gdb_index section format reference:
+// https://sourceware.org/gdb/onlinedocs/gdb/Index-Section-Format.html
+
+void DWARFGdbIndex::dumpCUList(raw_ostream &OS) const {
+  OS << format("\n  CU list offset = 0x%x, has %" PRId64 " entries:",
+               CuListOffset, (uint64_t)CuList.size())
+     << '\n';
+  uint32_t I = 0;
+  for (const CompUnitEntry &CU : CuList)
+    OS << format("    %d: Offset = 0x%llx, Length = 0x%llx\n", I++, CU.Offset,
+                 CU.Length);
+}
+
+void DWARFGdbIndex::dumpAddressArea(raw_ostream &OS) const {
+  OS << format("\n  Address area offset = 0x%x, has %" PRId64 " entries:",
+               AddressAreaOffset, (uint64_t)AddressArea.size())
+     << '\n';
+  for (const AddressEntry &Addr : AddressArea)
+    OS << format(
+        "    Low address = 0x%llx, High address = 0x%llx, CU index = %d\n",
+        Addr.LowAddress, Addr.HighAddress, Addr.CuIndex);
+}
+
+void DWARFGdbIndex::dumpSymbolTable(raw_ostream &OS) const {
+  OS << format("\n  Symbol table offset = 0x%x, size = %" PRId64
+               ", filled slots:",
+               SymbolTableOffset, (uint64_t)SymbolTable.size())
+     << '\n';
+  uint32_t I = -1;
+  for (const SymTableEntry &E : SymbolTable) {
+    ++I;
+    if (!E.NameOffset && !E.VecOffset)
+      continue;
+
+    OS << format("    %d: Name offset = 0x%x, CU vector offset = 0x%x\n", I,
+                 E.NameOffset, E.VecOffset);
+
+    StringRef Name = ConstantPoolStrings.substr(
+        ConstantPoolOffset - StringPoolOffset + E.NameOffset);
+
+    auto CuVector = std::find_if(
+        ConstantPoolVectors.begin(), ConstantPoolVectors.end(),
+        [&](const std::pair<uint32_t, SmallVector<uint32_t, 0>> &V) {
+          return V.first == E.VecOffset;
+        });
+    assert(CuVector != ConstantPoolVectors.end() && "Invalid symbol table");
+    uint32_t CuVectorId = CuVector - ConstantPoolVectors.begin();
+    OS << format("      String name: %s, CU vector index: %d\n", Name.data(),
+                 CuVectorId);
+  }
+}
+
+void DWARFGdbIndex::dumpConstantPool(raw_ostream &OS) const {
+  OS << format("\n  Constant pool offset = 0x%x, has %" PRId64 " CU vectors:",
+               ConstantPoolOffset, (uint64_t)ConstantPoolVectors.size());
+  uint32_t I = 0;
+  for (const auto &V : ConstantPoolVectors) {
+    OS << format("\n    %d(0x%x): ", I++, V.first);
+    for (uint32_t Val : V.second)
+      OS << format("0x%x ", Val);
+  }
+  OS << '\n';
+}
+
+void DWARFGdbIndex::dump(raw_ostream &OS) {
+  if (HasError) {
+    OS << "\n<error parsing>\n";
+    return;
+  }
+
+  if (HasContent) {
+    OS << "  Version = " << Version << '\n';
+    dumpCUList(OS);
+    dumpAddressArea(OS);
+    dumpSymbolTable(OS);
+    dumpConstantPool(OS);
+  }
+}
+
+bool DWARFGdbIndex::parseImpl(DataExtractor Data) {
+  uint32_t Offset = 0;
+
+  // Only version 7 is supported at this moment.
+  Version = Data.getU32(&Offset);
+  if (Version != 7)
+    return false;
+
+  CuListOffset = Data.getU32(&Offset);
+  uint32_t CuTypesOffset = Data.getU32(&Offset);
+  AddressAreaOffset = Data.getU32(&Offset);
+  SymbolTableOffset = Data.getU32(&Offset);
+  ConstantPoolOffset = Data.getU32(&Offset);
+
+  if (Offset != CuListOffset)
+    return false;
+
+  uint32_t CuListSize = (CuTypesOffset - CuListOffset) / 16;
+  CuList.reserve(CuListSize);
+  for (uint32_t i = 0; i < CuListSize; ++i) {
+    uint64_t CuOffset = Data.getU64(&Offset);
+    uint64_t CuLength = Data.getU64(&Offset);
+    CuList.push_back({CuOffset, CuLength});
+  }
+
+  // CU Types are no longer needed as DWARF skeleton type units never made it
+  // into the standard.
+  uint32_t CuTypesListSize = (AddressAreaOffset - CuTypesOffset) / 24;
+  if (CuTypesListSize != 0)
+    return false;
+
+  uint32_t AddressAreaSize = (SymbolTableOffset - AddressAreaOffset) / 20;
+  AddressArea.reserve(AddressAreaSize);
+  for (uint32_t i = 0; i < AddressAreaSize; ++i) {
+    uint64_t LowAddress = Data.getU64(&Offset);
+    uint64_t HighAddress = Data.getU64(&Offset);
+    uint32_t CuIndex = Data.getU32(&Offset);
+    AddressArea.push_back({LowAddress, HighAddress, CuIndex});
+  }
+
+  // The symbol table. This is an open addressed hash table. The size of the
+  // hash table is always a power of 2.
+  // Each slot in the hash table consists of a pair of offset_type values. The
+  // first value is the offset of the symbol's name in the constant pool. The
+  // second value is the offset of the CU vector in the constant pool.
+  // If both values are 0, then this slot in the hash table is empty. This is ok
+  // because while 0 is a valid constant pool index, it cannot be a valid index
+  // for both a string and a CU vector.
+  uint32_t SymTableSize = (ConstantPoolOffset - SymbolTableOffset) / 8;
+  SymbolTable.reserve(SymTableSize);
+  uint32_t CuVectorsTotal = 0;
+  for (uint32_t i = 0; i < SymTableSize; ++i) {
+    uint32_t NameOffset = Data.getU32(&Offset);
+    uint32_t CuVecOffset = Data.getU32(&Offset);
+    SymbolTable.push_back({NameOffset, CuVecOffset});
+    if (NameOffset || CuVecOffset)
+      ++CuVectorsTotal;
+  }
+
+  // The constant pool. CU vectors are stored first, followed by strings.
+  // The first value is the number of CU indices in the vector. Each subsequent
+  // value is the index and symbol attributes of a CU in the CU list.
+  for (uint32_t i = 0; i < CuVectorsTotal; ++i) {
+    ConstantPoolVectors.emplace_back(0, SmallVector<uint32_t, 0>());
+    auto &Vec = ConstantPoolVectors.back();
+    Vec.first = Offset - ConstantPoolOffset;
+
+    uint32_t Num = Data.getU32(&Offset);
+    for (uint32_t j = 0; j < Num; ++j)
+      Vec.second.push_back(Data.getU32(&Offset));
+  }
+
+  ConstantPoolStrings = Data.getData().drop_front(Offset);
+  StringPoolOffset = Offset;
+  return true;
+}
+
+void DWARFGdbIndex::parse(DataExtractor Data) {
+  HasContent = !Data.getData().empty();
+  HasError = HasContent && !parseImpl(Data);
+}
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
index 766e8ac16f0c..88fb20381f95 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFTypeUnit.cpp
@@ -8,6 +8,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/DWARF/DWARFTypeUnit.h"
+#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -22,19 +24,29 @@ bool DWARFTypeUnit::extractImpl(DataExtractor debug_info,
   return TypeOffset < getLength();
 }
 
-void DWARFTypeUnit::dump(raw_ostream &OS) {
+void DWARFTypeUnit::dump(raw_ostream &OS, bool SummarizeTypes) {
+  DWARFDie TD = getDIEForOffset(TypeOffset + getOffset());
+  const char *Name = TD.getAttributeValueAsString(llvm::dwarf::DW_AT_name, "");
+
+  if (SummarizeTypes) {
+    OS << "name = '" << Name << "'"
+       << " type_signature = " << format("0x%16" PRIx64, TypeHash)
+       << " length = " << format("0x%08x", getLength()) << '\n';
+    return;
+  }
+
   OS << format("0x%08x", getOffset()) << ": Type Unit:"
      << " length = " << format("0x%08x", getLength())
      << " version = " << format("0x%04x", getVersion())
      << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset())
      << " addr_size = " << format("0x%02x", getAddressByteSize())
+     << " name = '" << Name << "'"
      << " type_signature = " << format("0x%16" PRIx64, TypeHash)
      << " type_offset = " << format("0x%04x", TypeOffset)
-     << " (next unit at " << format("0x%08x", getNextUnitOffset())
-     << ")\n";
+     << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n";
 
-  if (const DWARFDebugInfoEntryMinimal *TU = getUnitDIE(false))
-    TU->dump(OS, this, -1U);
+  if (DWARFDie TU = getUnitDIE(false))
+    TU.dump(OS, -1U);
   else
     OS << "<type unit can't be parsed!>\n\n";
 }
diff --git a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
index 13c2b508bfa3..63fb0d3bc368 100644
--- a/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
+++ b/contrib/llvm/lib/DebugInfo/DWARF/DWARFUnit.cpp
@@ -7,14 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFAbbreviationDeclaration.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
-#include "llvm/DebugInfo/DWARF/DWARFFormValue.h"
-#include "llvm/Support/Dwarf.h"
+#include "llvm/DebugInfo/DWARF/DWARFDebugAbbrev.h"
+#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Path.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 #include <cstdio>
+#include <vector>
 
 namespace llvm {
+
 using namespace dwarf;
 
 void DWARFUnitSectionBase::parse(DWARFContext &C, const DWARFSection &Section) {
@@ -49,8 +60,7 @@ DWARFUnit::DWARFUnit(DWARFContext &DC, const DWARFSection &Section,
   clear();
 }
 
-DWARFUnit::~DWARFUnit() {
-}
+DWARFUnit::~DWARFUnit() = default;
 
 bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index,
                                                 uint64_t &Result) const {
@@ -121,7 +131,7 @@ bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) {
 bool DWARFUnit::extractRangeList(uint32_t RangeListOffset,
                                         DWARFDebugRangeList &RangeList) const {
   // Require that compile unit is extracted.
-  assert(DieArray.size() > 0);
+  assert(!DieArray.empty());
   DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize);
   uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset;
   return RangeList.extract(RangesData, &ActualRangeListOffset);
@@ -141,53 +151,16 @@ void DWARFUnit::clear() {
 }
 
 const char *DWARFUnit::getCompilationDir() {
-  extractDIEsIfNeeded(true);
-  if (DieArray.empty())
-    return nullptr;
-  return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, nullptr);
+  return getUnitDIE().getAttributeValueAsString(DW_AT_comp_dir, nullptr);
 }
 
-uint64_t DWARFUnit::getDWOId() {
-  extractDIEsIfNeeded(true);
-  const uint64_t FailValue = -1ULL;
-  if (DieArray.empty())
-    return FailValue;
-  return DieArray[0]
-      .getAttributeValueAsUnsignedConstant(this, DW_AT_GNU_dwo_id, FailValue);
-}
-
-void DWARFUnit::setDIERelations() {
-  if (DieArray.size() <= 1)
-    return;
-
-  std::vector<DWARFDebugInfoEntryMinimal *> ParentChain;
-  DWARFDebugInfoEntryMinimal *SiblingChain = nullptr;
-  for (auto &DIE : DieArray) {
-    if (SiblingChain) {
-      SiblingChain->setSibling(&DIE);
-    }
-    if (const DWARFAbbreviationDeclaration *AbbrDecl =
-            DIE.getAbbreviationDeclarationPtr()) {
-      // Normal DIE.
-      if (AbbrDecl->hasChildren()) {
-        ParentChain.push_back(&DIE);
-        SiblingChain = nullptr;
-      } else {
-        SiblingChain = &DIE;
-      }
-    } else {
-      // NULL entry terminates the sibling chain.
-      SiblingChain = ParentChain.back();
-      ParentChain.pop_back();
-    }
-  }
-  assert(SiblingChain == nullptr || SiblingChain == &DieArray[0]);
-  assert(ParentChain.empty());
+Optional<uint64_t> DWARFUnit::getDWOId() {
+  return getUnitDIE().getAttributeValueAsUnsignedConstant(DW_AT_GNU_dwo_id);
 }
 
 void DWARFUnit::extractDIEsToVector(
     bool AppendCUDie, bool AppendNonCUDies,
-    std::vector<DWARFDebugInfoEntryMinimal> &Dies) const {
+    std::vector<DWARFDebugInfoEntry> &Dies) const {
   if (!AppendCUDie && !AppendNonCUDies)
     return;
 
@@ -195,11 +168,13 @@ void DWARFUnit::extractDIEsToVector(
   // next compilation unit header.
   uint32_t DIEOffset = Offset + getHeaderSize();
   uint32_t NextCUOffset = getNextUnitOffset();
-  DWARFDebugInfoEntryMinimal DIE;
+  DWARFDebugInfoEntry DIE;
+  DataExtractor DebugInfoData = getDebugInfoExtractor();
   uint32_t Depth = 0;
   bool IsCUDie = true;
 
-  while (DIEOffset < NextCUOffset && DIE.extractFast(this, &DIEOffset)) {
+  while (DIE.extractFast(*this, &DIEOffset, DebugInfoData, NextCUOffset,
+                         Depth)) {
     if (IsCUDie) {
       if (AppendCUDie)
         Dies.push_back(DIE);
@@ -237,11 +212,11 @@ void DWARFUnit::extractDIEsToVector(
 }
 
 size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
-  if ((CUDieOnly && DieArray.size() > 0) ||
+  if ((CUDieOnly && !DieArray.empty()) ||
       DieArray.size() > 1)
     return 0; // Already parsed.
 
-  bool HasCUDie = DieArray.size() > 0;
+  bool HasCUDie = !DieArray.empty();
   extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray);
 
   if (DieArray.empty())
@@ -249,25 +224,25 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) {
 
   // If CU DIE was just parsed, copy several attribute values from it.
   if (!HasCUDie) {
-    uint64_t BaseAddr =
-        DieArray[0].getAttributeValueAsAddress(this, DW_AT_low_pc, -1ULL);
-    if (BaseAddr == -1ULL)
-      BaseAddr = DieArray[0].getAttributeValueAsAddress(this, DW_AT_entry_pc, 0);
-    setBaseAddress(BaseAddr);
-    AddrOffsetSectionBase = DieArray[0].getAttributeValueAsSectionOffset(
-        this, DW_AT_GNU_addr_base, 0);
-    RangeSectionBase = DieArray[0].getAttributeValueAsSectionOffset(
-        this, DW_AT_ranges_base, 0);
+    DWARFDie UnitDie = getUnitDIE();
+    auto BaseAddr = UnitDie.getAttributeValueAsAddress(DW_AT_low_pc);
+    if (!BaseAddr)
+      BaseAddr = UnitDie.getAttributeValueAsAddress(DW_AT_entry_pc);
+    if (BaseAddr)
+      setBaseAddress(*BaseAddr);
+    AddrOffsetSectionBase = UnitDie.getAttributeValueAsSectionOffset(
+        DW_AT_GNU_addr_base, 0);
+    RangeSectionBase = UnitDie.getAttributeValueAsSectionOffset(
+        DW_AT_rnglists_base, 0);
     // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for
     // skeleton CU DIE, so that DWARF users not aware of it are not broken.
   }
 
-  setDIERelations();
   return DieArray.size();
 }
 
 DWARFUnit::DWOHolder::DWOHolder(StringRef DWOPath)
-    : DWOFile(), DWOContext(), DWOU(nullptr) {
+    : DWOU(nullptr) {
   auto Obj = object::ObjectFile::createObjectFile(DWOPath);
   if (!Obj) {
     // TODO: Actually report errors helpfully.
@@ -286,15 +261,15 @@ bool DWARFUnit::parseDWO() {
     return false;
   if (DWO.get())
     return false;
-  extractDIEsIfNeeded(true);
-  if (DieArray.empty())
+  DWARFDie UnitDie = getUnitDIE();
+  if (!UnitDie)
     return false;
   const char *DWOFileName =
-      DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, nullptr);
+      UnitDie.getAttributeValueAsString(DW_AT_GNU_dwo_name, nullptr);
   if (!DWOFileName)
     return false;
   const char *CompilationDir =
-      DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, nullptr);
+      UnitDie.getAttributeValueAsString(DW_AT_comp_dir, nullptr);
   SmallString<16> AbsolutePath;
   if (sys::path::is_relative(DWOFileName) && CompilationDir != nullptr) {
     sys::path::append(AbsolutePath, CompilationDir);
@@ -309,8 +284,8 @@ bool DWARFUnit::parseDWO() {
   }
   // Share .debug_addr and .debug_ranges section with compile unit in .dwo
   DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase);
-  uint32_t DWORangesBase = DieArray[0].getRangesBaseAttribute(this, 0);
-  DWOCU->setRangesSection(RangeSection, DWORangesBase);
+  auto DWORangesBase = UnitDie.getRangesBaseAttribute();
+  DWOCU->setRangesSection(RangeSection, DWORangesBase ? *DWORangesBase : 0);
   return true;
 }
 
@@ -323,7 +298,7 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) {
     // contents which will cause just the internal pointers to be swapped
     // so that when temporary vector goes out of scope, it will destroy the
     // contents.
-    std::vector<DWARFDebugInfoEntryMinimal> TmpArray;
+    std::vector<DWARFDebugInfoEntry> TmpArray;
     DieArray.swap(TmpArray);
     // Save at least the compile unit DIE
     if (KeepCUDie)
@@ -332,11 +307,11 @@ void DWARFUnit::clearDIEs(bool KeepCUDie) {
 }
 
 void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
-  const auto *U = getUnitDIE();
-  if (U == nullptr)
+  DWARFDie UnitDie = getUnitDIE();
+  if (!UnitDie)
     return;
   // First, check if unit DIE describes address ranges for the whole unit.
-  const auto &CUDIERanges = U->getAddressRanges(this);
+  const auto &CUDIERanges = UnitDie.getAddressRanges();
   if (!CUDIERanges.empty()) {
     CURanges.insert(CURanges.end(), CUDIERanges.begin(), CUDIERanges.end());
     return;
@@ -349,7 +324,7 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
   // up parsing the DWARF and then throwing them all away to keep memory usage
   // down.
   const bool ClearDIEs = extractDIEsIfNeeded(false) > 1;
-  DieArray[0].collectChildrenAddressRanges(this, CURanges);
+  getUnitDIE().collectChildrenAddressRanges(CURanges);
 
   // Collect address ranges from DIEs in .dwo if necessary.
   bool DWOCreated = parseDWO();
@@ -364,36 +339,37 @@ void DWARFUnit::collectAddressRanges(DWARFAddressRangesVector &CURanges) {
     clearDIEs(true);
 }
 
-const DWARFDebugInfoEntryMinimal *
+DWARFDie
 DWARFUnit::getSubprogramForAddress(uint64_t Address) {
   extractDIEsIfNeeded(false);
-  for (const DWARFDebugInfoEntryMinimal &DIE : DieArray) {
+  for (const DWARFDebugInfoEntry &D : DieArray) {
+    DWARFDie DIE(this, &D);
     if (DIE.isSubprogramDIE() &&
-        DIE.addressRangeContainsAddress(this, Address)) {
-      return &DIE;
+        DIE.addressRangeContainsAddress(Address)) {
+      return DIE;
     }
   }
-  return nullptr;
+  return DWARFDie();
 }
 
-DWARFDebugInfoEntryInlinedChain
-DWARFUnit::getInlinedChainForAddress(uint64_t Address) {
+void
+DWARFUnit::getInlinedChainForAddress(uint64_t Address,
+                                     SmallVectorImpl<DWARFDie> &InlinedChain) {
   // First, find a subprogram that contains the given address (the root
   // of inlined chain).
-  const DWARFUnit *ChainCU = nullptr;
-  const DWARFDebugInfoEntryMinimal *SubprogramDIE;
+  DWARFDie SubprogramDIE;
   // Try to look for subprogram DIEs in the DWO file.
   parseDWO();
-  if (DWO) {
-    if ((SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address)))
-      ChainCU = DWO->getUnit();
-  } else if ((SubprogramDIE = getSubprogramForAddress(Address)))
-    ChainCU = this;
+  if (DWO)
+    SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address);
+  else
+    SubprogramDIE = getSubprogramForAddress(Address);
 
   // Get inlined chain rooted at this subprogram DIE.
-  if (!SubprogramDIE)
-    return DWARFDebugInfoEntryInlinedChain();
-  return SubprogramDIE->getInlinedChainForAddress(ChainCU, Address);
+  if (SubprogramDIE)
+    SubprogramDIE.getInlinedChainForAddress(Address, InlinedChain);
+  else
+    InlinedChain.clear();
 }
 
 const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
@@ -403,4 +379,43 @@ const DWARFUnitIndex &getDWARFUnitIndex(DWARFContext &Context,
   assert(Kind == DW_SECT_TYPES);
   return Context.getTUIndex();
 }
+
+DWARFDie DWARFUnit::getParent(const DWARFDebugInfoEntry *Die) {
+  if (!Die)
+    return DWARFDie();
+  const uint32_t Depth = Die->getDepth();
+  // Unit DIEs always have a depth of zero and never have parents.
+  if (Depth == 0)
+    return DWARFDie();
+  // Depth of 1 always means parent is the compile/type unit.
+  if (Depth == 1)
+    return getUnitDIE();
+  // Look for previous DIE with a depth that is one less than the Die's depth.
+  const uint32_t ParentDepth = Depth - 1;
+  for (uint32_t I = getDIEIndex(Die) - 1; I > 0; --I) {
+    if (DieArray[I].getDepth() == ParentDepth)
+      return DWARFDie(this, &DieArray[I]);
+  }
+  return DWARFDie();
 }
+
+DWARFDie DWARFUnit::getSibling(const DWARFDebugInfoEntry *Die) {
+  if (!Die)
+    return DWARFDie();
+  uint32_t Depth = Die->getDepth();
+  // Unit DIEs always have a depth of zero and never have siblings.
+  if (Depth == 0)
+    return DWARFDie();
+  // NULL DIEs don't have siblings.
+  if (Die->getAbbreviationDeclarationPtr() == nullptr)
+    return DWARFDie();
+  
+  // Find the next DIE whose depth is the same as the Die's depth.
+  for (size_t I=getDIEIndex(Die)+1, EndIdx = DieArray.size(); I<EndIdx; ++I) {
+    if (DieArray[I].getDepth() == Depth)
+      return DWARFDie(this, &DieArray[I]);
+  }
+  return DWARFDie();
+}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/MsfBuilder.cpp b/contrib/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
index 16b086bdaa98..5b1b5d8dc4d5 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/MsfBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/MSF/MSFBuilder.cpp
@@ -1,4 +1,3 @@
-//===- MSFBuilder.cpp - MSF Directory & Metadata Builder --------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,12 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/MsfBuilder.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
 
 using namespace llvm;
-using namespace llvm::pdb;
-using namespace llvm::pdb::msf;
+using namespace llvm::msf;
 using namespace llvm::support;
 
 namespace {
@@ -21,12 +19,14 @@ const uint32_t kFreePageMap0Block = 1;
 const uint32_t kFreePageMap1Block = 2;
 const uint32_t kNumReservedPages = 3;
 
+const uint32_t kDefaultFreePageMap = kFreePageMap0Block;
 const uint32_t kDefaultBlockMapAddr = kNumReservedPages;
 }
 
-MsfBuilder::MsfBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow,
+MSFBuilder::MSFBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow,
                        BumpPtrAllocator &Allocator)
-    : Allocator(Allocator), IsGrowable(CanGrow), BlockSize(BlockSize),
+    : Allocator(Allocator), IsGrowable(CanGrow),
+      FreePageMap(kDefaultFreePageMap), BlockSize(BlockSize),
       MininumBlocks(MinBlockCount), BlockMapAddr(kDefaultBlockMapAddr),
       FreeBlocks(MinBlockCount, true) {
   FreeBlocks[kSuperBlockBlock] = false;
@@ -35,48 +35,49 @@ MsfBuilder::MsfBuilder(uint32_t BlockSize, uint32_t MinBlockCount, bool CanGrow,
   FreeBlocks[BlockMapAddr] = false;
 }
 
-Expected<MsfBuilder> MsfBuilder::create(BumpPtrAllocator &Allocator,
+Expected<MSFBuilder> MSFBuilder::create(BumpPtrAllocator &Allocator,
                                         uint32_t BlockSize,
                                         uint32_t MinBlockCount, bool CanGrow) {
-  if (!msf::isValidBlockSize(BlockSize))
-    return make_error<RawError>(raw_error_code::unspecified,
+  if (!isValidBlockSize(BlockSize))
+    return make_error<MSFError>(msf_error_code::invalid_format,
                                 "The requested block size is unsupported");
 
-  return MsfBuilder(BlockSize,
+  return MSFBuilder(BlockSize,
                     std::max(MinBlockCount, msf::getMinimumBlockCount()),
                     CanGrow, Allocator);
 }
 
-Error MsfBuilder::setBlockMapAddr(uint32_t Addr) {
+Error MSFBuilder::setBlockMapAddr(uint32_t Addr) {
   if (Addr == BlockMapAddr)
     return Error::success();
 
   if (Addr >= FreeBlocks.size()) {
     if (!IsGrowable)
-      return make_error<RawError>(raw_error_code::unspecified,
+      return make_error<MSFError>(msf_error_code::insufficient_buffer,
                                   "Cannot grow the number of blocks");
-    FreeBlocks.resize(Addr + 1);
+    FreeBlocks.resize(Addr + 1, true);
   }
 
   if (!isBlockFree(Addr))
-    return make_error<RawError>(raw_error_code::unspecified,
-                                "Attempt to reuse an allocated block");
+    return make_error<MSFError>(
+        msf_error_code::block_in_use,
+        "Requested block map address is already in use");
   FreeBlocks[BlockMapAddr] = true;
   FreeBlocks[Addr] = false;
   BlockMapAddr = Addr;
   return Error::success();
 }
 
-void MsfBuilder::setFreePageMap(uint32_t Fpm) { FreePageMap = Fpm; }
+void MSFBuilder::setFreePageMap(uint32_t Fpm) { FreePageMap = Fpm; }
 
-void MsfBuilder::setUnknown1(uint32_t Unk1) { Unknown1 = Unk1; }
+void MSFBuilder::setUnknown1(uint32_t Unk1) { Unknown1 = Unk1; }
 
-Error MsfBuilder::setDirectoryBlocksHint(ArrayRef<uint32_t> DirBlocks) {
+Error MSFBuilder::setDirectoryBlocksHint(ArrayRef<uint32_t> DirBlocks) {
   for (auto B : DirectoryBlocks)
     FreeBlocks[B] = true;
   for (auto B : DirBlocks) {
     if (!isBlockFree(B)) {
-      return make_error<RawError>(raw_error_code::unspecified,
+      return make_error<MSFError>(msf_error_code::unspecified,
                                   "Attempt to reuse an allocated block");
     }
     FreeBlocks[B] = false;
@@ -86,7 +87,7 @@ Error MsfBuilder::setDirectoryBlocksHint(ArrayRef<uint32_t> DirBlocks) {
   return Error::success();
 }
 
-Error MsfBuilder::allocateBlocks(uint32_t NumBlocks,
+Error MSFBuilder::allocateBlocks(uint32_t NumBlocks,
                                  MutableArrayRef<uint32_t> Blocks) {
   if (NumBlocks == 0)
     return Error::success();
@@ -94,7 +95,7 @@ Error MsfBuilder::allocateBlocks(uint32_t NumBlocks,
   uint32_t NumFreeBlocks = FreeBlocks.count();
   if (NumFreeBlocks < NumBlocks) {
     if (!IsGrowable)
-      return make_error<RawError>(raw_error_code::unspecified,
+      return make_error<MSFError>(msf_error_code::insufficient_buffer,
                                   "There are no free Blocks in the file");
     uint32_t AllocBlocks = NumBlocks - NumFreeBlocks;
     FreeBlocks.resize(AllocBlocks + FreeBlocks.size(), true);
@@ -113,32 +114,33 @@ Error MsfBuilder::allocateBlocks(uint32_t NumBlocks,
   return Error::success();
 }
 
-uint32_t MsfBuilder::getNumUsedBlocks() const {
+uint32_t MSFBuilder::getNumUsedBlocks() const {
   return getTotalBlockCount() - getNumFreeBlocks();
 }
 
-uint32_t MsfBuilder::getNumFreeBlocks() const { return FreeBlocks.count(); }
+uint32_t MSFBuilder::getNumFreeBlocks() const { return FreeBlocks.count(); }
 
-uint32_t MsfBuilder::getTotalBlockCount() const { return FreeBlocks.size(); }
+uint32_t MSFBuilder::getTotalBlockCount() const { return FreeBlocks.size(); }
 
-bool MsfBuilder::isBlockFree(uint32_t Idx) const { return FreeBlocks[Idx]; }
+bool MSFBuilder::isBlockFree(uint32_t Idx) const { return FreeBlocks[Idx]; }
 
-Error MsfBuilder::addStream(uint32_t Size, ArrayRef<uint32_t> Blocks) {
+Expected<uint32_t> MSFBuilder::addStream(uint32_t Size,
+                                         ArrayRef<uint32_t> Blocks) {
   // Add a new stream mapped to the specified blocks.  Verify that the specified
   // blocks are both necessary and sufficient for holding the requested number
   // of bytes, and verify that all requested blocks are free.
   uint32_t ReqBlocks = bytesToBlocks(Size, BlockSize);
   if (ReqBlocks != Blocks.size())
-    return make_error<RawError>(
-        raw_error_code::unspecified,
+    return make_error<MSFError>(
+        msf_error_code::invalid_format,
         "Incorrect number of blocks for requested stream size");
   for (auto Block : Blocks) {
     if (Block >= FreeBlocks.size())
       FreeBlocks.resize(Block + 1, true);
 
     if (!FreeBlocks.test(Block))
-      return make_error<RawError>(
-          raw_error_code::unspecified,
+      return make_error<MSFError>(
+          msf_error_code::unspecified,
           "Attempt to re-use an already allocated block");
   }
   // Mark all the blocks occupied by the new stream as not free.
@@ -146,20 +148,20 @@ Error MsfBuilder::addStream(uint32_t Size, ArrayRef<uint32_t> Blocks) {
     FreeBlocks.reset(Block);
   }
   StreamData.push_back(std::make_pair(Size, Blocks));
-  return Error::success();
+  return StreamData.size() - 1;
 }
 
-Error MsfBuilder::addStream(uint32_t Size) {
+Expected<uint32_t> MSFBuilder::addStream(uint32_t Size) {
   uint32_t ReqBlocks = bytesToBlocks(Size, BlockSize);
   std::vector<uint32_t> NewBlocks;
   NewBlocks.resize(ReqBlocks);
   if (auto EC = allocateBlocks(ReqBlocks, NewBlocks))
-    return EC;
+    return std::move(EC);
   StreamData.push_back(std::make_pair(Size, NewBlocks));
-  return Error::success();
+  return StreamData.size() - 1;
 }
 
-Error MsfBuilder::setStreamSize(uint32_t Idx, uint32_t Size) {
+Error MSFBuilder::setStreamSize(uint32_t Idx, uint32_t Size) {
   uint32_t OldSize = getStreamSize(Idx);
   if (OldSize == Size)
     return Error::success();
@@ -192,17 +194,17 @@ Error MsfBuilder::setStreamSize(uint32_t Idx, uint32_t Size) {
   return Error::success();
 }
 
-uint32_t MsfBuilder::getNumStreams() const { return StreamData.size(); }
+uint32_t MSFBuilder::getNumStreams() const { return StreamData.size(); }
 
-uint32_t MsfBuilder::getStreamSize(uint32_t StreamIdx) const {
+uint32_t MSFBuilder::getStreamSize(uint32_t StreamIdx) const {
   return StreamData[StreamIdx].first;
 }
 
-ArrayRef<uint32_t> MsfBuilder::getStreamBlocks(uint32_t StreamIdx) const {
+ArrayRef<uint32_t> MSFBuilder::getStreamBlocks(uint32_t StreamIdx) const {
   return StreamData[StreamIdx].second;
 }
 
-uint32_t MsfBuilder::computeDirectoryByteSize() const {
+uint32_t MSFBuilder::computeDirectoryByteSize() const {
   // The directory has the following layout, where each item is a ulittle32_t:
   //    NumStreams
   //    StreamSizes[NumStreams]
@@ -218,18 +220,19 @@ uint32_t MsfBuilder::computeDirectoryByteSize() const {
   return Size;
 }
 
-Expected<Layout> MsfBuilder::build() {
-  Layout L;
-  L.SB = Allocator.Allocate<SuperBlock>();
-  std::memcpy(L.SB->MagicBytes, Magic, sizeof(Magic));
-  L.SB->BlockMapAddr = BlockMapAddr;
-  L.SB->BlockSize = BlockSize;
-  L.SB->NumDirectoryBytes = computeDirectoryByteSize();
-  L.SB->FreeBlockMapBlock = FreePageMap;
-  L.SB->Unknown1 = Unknown1;
-
-  uint32_t NumDirectoryBlocks =
-      bytesToBlocks(L.SB->NumDirectoryBytes, BlockSize);
+Expected<MSFLayout> MSFBuilder::build() {
+  SuperBlock *SB = Allocator.Allocate<SuperBlock>();
+  MSFLayout L;
+  L.SB = SB;
+
+  std::memcpy(SB->MagicBytes, Magic, sizeof(Magic));
+  SB->BlockMapAddr = BlockMapAddr;
+  SB->BlockSize = BlockSize;
+  SB->NumDirectoryBytes = computeDirectoryByteSize();
+  SB->FreeBlockMapBlock = FreePageMap;
+  SB->Unknown1 = Unknown1;
+
+  uint32_t NumDirectoryBlocks = bytesToBlocks(SB->NumDirectoryBytes, BlockSize);
   if (NumDirectoryBlocks > DirectoryBlocks.size()) {
     // Our hint wasn't enough to satisfy the entire directory.  Allocate
     // remaining pages.
@@ -249,9 +252,9 @@ Expected<Layout> MsfBuilder::build() {
   }
 
   // Don't set the number of blocks in the file until after allocating Blocks
-  // for
-  // the directory, since the allocation might cause the file to need to grow.
-  L.SB->NumBlocks = FreeBlocks.size();
+  // for the directory, since the allocation might cause the file to need to
+  // grow.
+  SB->NumBlocks = FreeBlocks.size();
 
   ulittle32_t *DirBlocks = Allocator.Allocate<ulittle32_t>(NumDirectoryBlocks);
   std::uninitialized_copy_n(DirectoryBlocks.begin(), NumDirectoryBlocks,
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/MsfCommon.cpp b/contrib/llvm/lib/DebugInfo/MSF/MSFCommon.cpp
index 5d97f33e1103..fdab7884646e 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/MsfCommon.cpp
+++ b/contrib/llvm/lib/DebugInfo/MSF/MSFCommon.cpp
@@ -1,4 +1,4 @@
-//===- MsfCommon.cpp - Common types and functions for MSF files -*- C++ -*-===//
+//===- MSFCommon.cpp - Common types and functions for MSF files -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,25 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/MsfCommon.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
 
 using namespace llvm;
-using namespace llvm::pdb::msf;
+using namespace llvm::msf;
 
-Error llvm::pdb::msf::validateSuperBlock(const SuperBlock &SB) {
+Error llvm::msf::validateSuperBlock(const SuperBlock &SB) {
   // Check the magic bytes.
   if (std::memcmp(SB.MagicBytes, Magic, sizeof(Magic)) != 0)
-    return make_error<RawError>(raw_error_code::corrupt_file,
+    return make_error<MSFError>(msf_error_code::invalid_format,
                                 "MSF magic header doesn't match");
 
   if (!isValidBlockSize(SB.BlockSize))
-    return make_error<RawError>(raw_error_code::corrupt_file,
+    return make_error<MSFError>(msf_error_code::invalid_format,
                                 "Unsupported block size.");
 
   // We don't support directories whose sizes aren't a multiple of four bytes.
   if (SB.NumDirectoryBytes % sizeof(support::ulittle32_t) != 0)
-    return make_error<RawError>(raw_error_code::corrupt_file,
+    return make_error<MSFError>(msf_error_code::invalid_format,
                                 "Directory size is not multiple of 4.");
 
   // The number of blocks which comprise the directory is a simple function of
@@ -37,12 +37,21 @@ Error llvm::pdb::msf::validateSuperBlock(const SuperBlock &SB) {
   // block numbers.  It is unclear what would happen if the number of blocks
   // couldn't fit on a single block.
   if (NumDirectoryBlocks > SB.BlockSize / sizeof(support::ulittle32_t))
-    return make_error<RawError>(raw_error_code::corrupt_file,
+    return make_error<MSFError>(msf_error_code::invalid_format,
                                 "Too many directory blocks.");
 
   if (SB.BlockMapAddr == 0)
-    return make_error<RawError>(raw_error_code::corrupt_file,
+    return make_error<MSFError>(msf_error_code::invalid_format,
                                 "Block 0 is reserved");
 
+  if (SB.BlockMapAddr >= SB.NumBlocks)
+    return make_error<MSFError>(msf_error_code::invalid_format,
+                                "Block map address is invalid.");
+
+  if (SB.FreeBlockMapBlock != 1 && SB.FreeBlockMapBlock != 2)
+    return make_error<MSFError>(
+        msf_error_code::invalid_format,
+        "The free block map isn't at block 1 or block 2.");
+
   return Error::success();
 }
diff --git a/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp b/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
new file mode 100644
index 000000000000..1b8294e47e75
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/MSF/MSFError.cpp
@@ -0,0 +1,70 @@
+//===- MSFError.cpp - Error extensions for MSF files ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+
+namespace {
+// FIXME: This class is only here to support the transition to llvm::Error. It
+// will be removed once this transition is complete. Clients should prefer to
+// deal with the Error value directly, rather than converting to error_code.
+class MSFErrorCategory : public std::error_category {
+public:
+  const char *name() const noexcept override { return "llvm.msf"; }
+
+  std::string message(int Condition) const override {
+    switch (static_cast<msf_error_code>(Condition)) {
+    case msf_error_code::unspecified:
+      return "An unknown error has occurred.";
+    case msf_error_code::insufficient_buffer:
+      return "The buffer is not large enough to read the requested number of "
+             "bytes.";
+    case msf_error_code::not_writable:
+      return "The specified stream is not writable.";
+    case msf_error_code::no_stream:
+      return "The specified stream does not exist.";
+    case msf_error_code::invalid_format:
+      return "The data is in an unexpected format.";
+    case msf_error_code::block_in_use:
+      return "The block is already in use.";
+    }
+    llvm_unreachable("Unrecognized msf_error_code");
+  }
+};
+} // end anonymous namespace
+
+static ManagedStatic<MSFErrorCategory> Category;
+
+char MSFError::ID = 0;
+
+MSFError::MSFError(msf_error_code C) : MSFError(C, "") {}
+
+MSFError::MSFError(const std::string &Context)
+    : MSFError(msf_error_code::unspecified, Context) {}
+
+MSFError::MSFError(msf_error_code C, const std::string &Context) : Code(C) {
+  ErrMsg = "MSF Error: ";
+  std::error_code EC = convertToErrorCode();
+  if (Code != msf_error_code::unspecified)
+    ErrMsg += EC.message() + "  ";
+  if (!Context.empty())
+    ErrMsg += Context;
+}
+
+void MSFError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
+
+const std::string &MSFError::getErrorMessage() const { return ErrMsg; }
+
+std::error_code MSFError::convertToErrorCode() const {
+  return std::error_code(static_cast<int>(Code), *Category);
+}
diff --git a/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp b/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
new file mode 100644
index 000000000000..e52c88a5bfb8
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -0,0 +1,415 @@
+//===- MappedBlockStream.cpp - Reads stream data from an MSF file ---------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+
+#include "llvm/DebugInfo/MSF/IMSFFile.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/MSFStreamLayout.h"
+
+using namespace llvm;
+using namespace llvm::msf;
+
+namespace {
+template <typename Base> class MappedBlockStreamImpl : public Base {
+public:
+  template <typename... Args>
+  MappedBlockStreamImpl(Args &&... Params)
+      : Base(std::forward<Args>(Params)...) {}
+};
+}
+
+static void initializeFpmStreamLayout(const MSFLayout &Layout,
+                                      MSFStreamLayout &FpmLayout) {
+  uint32_t NumFpmIntervals = msf::getNumFpmIntervals(Layout);
+  support::ulittle32_t FpmBlock = Layout.SB->FreeBlockMapBlock;
+  assert(FpmBlock == 1 || FpmBlock == 2);
+  while (NumFpmIntervals > 0) {
+    FpmLayout.Blocks.push_back(FpmBlock);
+    FpmBlock += msf::getFpmIntervalLength(Layout);
+    --NumFpmIntervals;
+  }
+  FpmLayout.Length = msf::getFullFpmByteSize(Layout);
+}
+
+typedef std::pair<uint32_t, uint32_t> Interval;
+static Interval intersect(const Interval &I1, const Interval &I2) {
+  return std::make_pair(std::max(I1.first, I2.first),
+                        std::min(I1.second, I2.second));
+}
+
+MappedBlockStream::MappedBlockStream(uint32_t BlockSize, uint32_t NumBlocks,
+                                     const MSFStreamLayout &Layout,
+                                     const ReadableStream &MsfData)
+    : BlockSize(BlockSize), NumBlocks(NumBlocks), StreamLayout(Layout),
+      MsfData(MsfData) {}
+
+std::unique_ptr<MappedBlockStream>
+MappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks,
+                                const MSFStreamLayout &Layout,
+                                const ReadableStream &MsfData) {
+  return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
+      BlockSize, NumBlocks, Layout, MsfData);
+}
+
+std::unique_ptr<MappedBlockStream>
+MappedBlockStream::createIndexedStream(const MSFLayout &Layout,
+                                       const ReadableStream &MsfData,
+                                       uint32_t StreamIndex) {
+  assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
+  MSFStreamLayout SL;
+  SL.Blocks = Layout.StreamMap[StreamIndex];
+  SL.Length = Layout.StreamSizes[StreamIndex];
+  return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
+      Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+}
+
+std::unique_ptr<MappedBlockStream>
+MappedBlockStream::createDirectoryStream(const MSFLayout &Layout,
+                                         const ReadableStream &MsfData) {
+  MSFStreamLayout SL;
+  SL.Blocks = Layout.DirectoryBlocks;
+  SL.Length = Layout.SB->NumDirectoryBytes;
+  return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+}
+
+std::unique_ptr<MappedBlockStream>
+MappedBlockStream::createFpmStream(const MSFLayout &Layout,
+                                   const ReadableStream &MsfData) {
+  MSFStreamLayout SL;
+  initializeFpmStreamLayout(Layout, SL);
+  return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+}
+
+Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+                                   ArrayRef<uint8_t> &Buffer) const {
+  // Make sure we aren't trying to read beyond the end of the stream.
+  if (Size > StreamLayout.Length)
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+  if (Offset > StreamLayout.Length - Size)
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+
+  if (tryReadContiguously(Offset, Size, Buffer))
+    return Error::success();
+
+  auto CacheIter = CacheMap.find(Offset);
+  if (CacheIter != CacheMap.end()) {
+    // Try to find an alloc that was large enough for this request.
+    for (auto &Entry : CacheIter->second) {
+      if (Entry.size() >= Size) {
+        Buffer = Entry.slice(0, Size);
+        return Error::success();
+      }
+    }
+  }
+
+  // We couldn't find a buffer that started at the correct offset (the most
+  // common scenario).  Try to see if there is a buffer that starts at some
+  // other offset but overlaps the desired range.
+  for (auto &CacheItem : CacheMap) {
+    Interval RequestExtent = std::make_pair(Offset, Offset + Size);
+
+    // We already checked this one on the fast path above.
+    if (CacheItem.first == Offset)
+      continue;
+    // If the initial extent of the cached item is beyond the ending extent
+    // of the request, there is no overlap.
+    if (CacheItem.first >= Offset + Size)
+      continue;
+
+    // We really only have to check the last item in the list, since we append
+    // in order of increasing length.
+    if (CacheItem.second.empty())
+      continue;
+
+    auto CachedAlloc = CacheItem.second.back();
+    // If the initial extent of the request is beyond the ending extent of
+    // the cached item, there is no overlap.
+    Interval CachedExtent =
+        std::make_pair(CacheItem.first, CacheItem.first + CachedAlloc.size());
+    if (RequestExtent.first >= CachedExtent.first + CachedExtent.second)
+      continue;
+
+    Interval Intersection = intersect(CachedExtent, RequestExtent);
+    // Only use this if the entire request extent is contained in the cached
+    // extent.
+    if (Intersection != RequestExtent)
+      continue;
+
+    uint32_t CacheRangeOffset =
+        AbsoluteDifference(CachedExtent.first, Intersection.first);
+    Buffer = CachedAlloc.slice(CacheRangeOffset, Size);
+    return Error::success();
+  }
+
+  // Otherwise allocate a large enough buffer in the pool, memcpy the data
+  // into it, and return an ArrayRef to that.  Do not touch existing pool
+  // allocations, as existing clients may be holding a pointer which must
+  // not be invalidated.
+  uint8_t *WriteBuffer = static_cast<uint8_t *>(Pool.Allocate(Size, 8));
+  if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size)))
+    return EC;
+
+  if (CacheIter != CacheMap.end()) {
+    CacheIter->second.emplace_back(WriteBuffer, Size);
+  } else {
+    std::vector<CacheEntry> List;
+    List.emplace_back(WriteBuffer, Size);
+    CacheMap.insert(std::make_pair(Offset, List));
+  }
+  Buffer = ArrayRef<uint8_t>(WriteBuffer, Size);
+  return Error::success();
+}
+
+Error MappedBlockStream::readLongestContiguousChunk(
+    uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+  // Make sure we aren't trying to read beyond the end of the stream.
+  if (Offset >= StreamLayout.Length)
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+  uint32_t First = Offset / BlockSize;
+  uint32_t Last = First;
+
+  while (Last < NumBlocks - 1) {
+    if (StreamLayout.Blocks[Last] != StreamLayout.Blocks[Last + 1] - 1)
+      break;
+    ++Last;
+  }
+
+  uint32_t OffsetInFirstBlock = Offset % BlockSize;
+  uint32_t BytesFromFirstBlock = BlockSize - OffsetInFirstBlock;
+  uint32_t BlockSpan = Last - First + 1;
+  uint32_t ByteSpan = BytesFromFirstBlock + (BlockSpan - 1) * BlockSize;
+
+  ArrayRef<uint8_t> BlockData;
+  uint32_t MsfOffset = blockToOffset(StreamLayout.Blocks[First], BlockSize);
+  if (auto EC = MsfData.readBytes(MsfOffset, BlockSize, BlockData))
+    return EC;
+
+  BlockData = BlockData.drop_front(OffsetInFirstBlock);
+  Buffer = ArrayRef<uint8_t>(BlockData.data(), ByteSpan);
+  return Error::success();
+}
+
+uint32_t MappedBlockStream::getLength() const { return StreamLayout.Length; }
+
+bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
+                                            ArrayRef<uint8_t> &Buffer) const {
+  if (Size == 0) {
+    Buffer = ArrayRef<uint8_t>();
+    return true;
+  }
+  // Attempt to fulfill the request with a reference directly into the stream.
+  // This can work even if the request crosses a block boundary, provided that
+  // all subsequent blocks are contiguous.  For example, a 10k read with a 4k
+  // block size can be filled with a reference if, from the starting offset,
+  // 3 blocks in a row are contiguous.
+  uint32_t BlockNum = Offset / BlockSize;
+  uint32_t OffsetInBlock = Offset % BlockSize;
+  uint32_t BytesFromFirstBlock = std::min(Size, BlockSize - OffsetInBlock);
+  uint32_t NumAdditionalBlocks =
+      llvm::alignTo(Size - BytesFromFirstBlock, BlockSize) / BlockSize;
+
+  uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
+  uint32_t E = StreamLayout.Blocks[BlockNum];
+  for (uint32_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
+    if (StreamLayout.Blocks[I + BlockNum] != E)
+      return false;
+  }
+
+  // Read out the entire block where the requested offset starts.  Then drop
+  // bytes from the beginning so that the actual starting byte lines up with
+  // the requested starting byte.  Then, since we know this is a contiguous
+  // cross-block span, explicitly resize the ArrayRef to cover the entire
+  // request length.
+  ArrayRef<uint8_t> BlockData;
+  uint32_t FirstBlockAddr = StreamLayout.Blocks[BlockNum];
+  uint32_t MsfOffset = blockToOffset(FirstBlockAddr, BlockSize);
+  if (auto EC = MsfData.readBytes(MsfOffset, BlockSize, BlockData)) {
+    consumeError(std::move(EC));
+    return false;
+  }
+  BlockData = BlockData.drop_front(OffsetInBlock);
+  Buffer = ArrayRef<uint8_t>(BlockData.data(), Size);
+  return true;
+}
+
+Error MappedBlockStream::readBytes(uint32_t Offset,
+                                   MutableArrayRef<uint8_t> Buffer) const {
+  uint32_t BlockNum = Offset / BlockSize;
+  uint32_t OffsetInBlock = Offset % BlockSize;
+
+  // Make sure we aren't trying to read beyond the end of the stream.
+  if (Buffer.size() > StreamLayout.Length)
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+  if (Offset > StreamLayout.Length - Buffer.size())
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+
+  uint32_t BytesLeft = Buffer.size();
+  uint32_t BytesWritten = 0;
+  uint8_t *WriteBuffer = Buffer.data();
+  while (BytesLeft > 0) {
+    uint32_t StreamBlockAddr = StreamLayout.Blocks[BlockNum];
+
+    ArrayRef<uint8_t> BlockData;
+    uint32_t Offset = blockToOffset(StreamBlockAddr, BlockSize);
+    if (auto EC = MsfData.readBytes(Offset, BlockSize, BlockData))
+      return EC;
+
+    const uint8_t *ChunkStart = BlockData.data() + OffsetInBlock;
+    uint32_t BytesInChunk = std::min(BytesLeft, BlockSize - OffsetInBlock);
+    ::memcpy(WriteBuffer + BytesWritten, ChunkStart, BytesInChunk);
+
+    BytesWritten += BytesInChunk;
+    BytesLeft -= BytesInChunk;
+    ++BlockNum;
+    OffsetInBlock = 0;
+  }
+
+  return Error::success();
+}
+
+uint32_t MappedBlockStream::getNumBytesCopied() const {
+  return static_cast<uint32_t>(Pool.getBytesAllocated());
+}
+
+void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); }
+
+void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
+                                           ArrayRef<uint8_t> Data) const {
+  // If this write overlapped a read which previously came from the pool,
+  // someone may still be holding a pointer to that alloc which is now invalid.
+  // Compute the overlapping range and update the cache entry, so any
+  // outstanding buffers are automatically updated.
+  for (const auto &MapEntry : CacheMap) {
+    // If the end of the written extent precedes the beginning of the cached
+    // extent, ignore this map entry.
+    if (Offset + Data.size() < MapEntry.first)
+      continue;
+    for (const auto &Alloc : MapEntry.second) {
+      // If the end of the cached extent precedes the beginning of the written
+      // extent, ignore this alloc.
+      if (MapEntry.first + Alloc.size() < Offset)
+        continue;
+
+      // If we get here, they are guaranteed to overlap.
+      Interval WriteInterval = std::make_pair(Offset, Offset + Data.size());
+      Interval CachedInterval =
+          std::make_pair(MapEntry.first, MapEntry.first + Alloc.size());
+      // If they overlap, we need to write the new data into the overlapping
+      // range.
+      auto Intersection = intersect(WriteInterval, CachedInterval);
+      assert(Intersection.first <= Intersection.second);
+
+      uint32_t Length = Intersection.second - Intersection.first;
+      uint32_t SrcOffset =
+          AbsoluteDifference(WriteInterval.first, Intersection.first);
+      uint32_t DestOffset =
+          AbsoluteDifference(CachedInterval.first, Intersection.first);
+      ::memcpy(Alloc.data() + DestOffset, Data.data() + SrcOffset, Length);
+    }
+  }
+}
+
+WritableMappedBlockStream::WritableMappedBlockStream(
+    uint32_t BlockSize, uint32_t NumBlocks, const MSFStreamLayout &Layout,
+    const WritableStream &MsfData)
+    : ReadInterface(BlockSize, NumBlocks, Layout, MsfData),
+      WriteInterface(MsfData) {}
+
+std::unique_ptr<WritableMappedBlockStream>
+WritableMappedBlockStream::createStream(uint32_t BlockSize, uint32_t NumBlocks,
+                                        const MSFStreamLayout &Layout,
+                                        const WritableStream &MsfData) {
+  return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
+      BlockSize, NumBlocks, Layout, MsfData);
+}
+
+std::unique_ptr<WritableMappedBlockStream>
+WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout,
+                                               const WritableStream &MsfData,
+                                               uint32_t StreamIndex) {
+  assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
+  MSFStreamLayout SL;
+  SL.Blocks = Layout.StreamMap[StreamIndex];
+  SL.Length = Layout.StreamSizes[StreamIndex];
+  return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+}
+
+std::unique_ptr<WritableMappedBlockStream>
+WritableMappedBlockStream::createDirectoryStream(
+    const MSFLayout &Layout, const WritableStream &MsfData) {
+  MSFStreamLayout SL;
+  SL.Blocks = Layout.DirectoryBlocks;
+  SL.Length = Layout.SB->NumDirectoryBytes;
+  return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+}
+
+std::unique_ptr<WritableMappedBlockStream>
+WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
+                                           const WritableStream &MsfData) {
+  MSFStreamLayout SL;
+  initializeFpmStreamLayout(Layout, SL);
+  return createStream(Layout.SB->BlockSize, Layout.SB->NumBlocks, SL, MsfData);
+}
+
+Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
+                                           ArrayRef<uint8_t> &Buffer) const {
+  return ReadInterface.readBytes(Offset, Size, Buffer);
+}
+
+Error WritableMappedBlockStream::readLongestContiguousChunk(
+    uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
+  return ReadInterface.readLongestContiguousChunk(Offset, Buffer);
+}
+
+uint32_t WritableMappedBlockStream::getLength() const {
+  return ReadInterface.getLength();
+}
+
+Error WritableMappedBlockStream::writeBytes(uint32_t Offset,
+                                            ArrayRef<uint8_t> Buffer) const {
+  // Make sure we aren't trying to write beyond the end of the stream.
+  if (Buffer.size() > getStreamLength())
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+
+  if (Offset > getStreamLayout().Length - Buffer.size())
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+
+  uint32_t BlockNum = Offset / getBlockSize();
+  uint32_t OffsetInBlock = Offset % getBlockSize();
+
+  uint32_t BytesLeft = Buffer.size();
+  uint32_t BytesWritten = 0;
+  while (BytesLeft > 0) {
+    uint32_t StreamBlockAddr = getStreamLayout().Blocks[BlockNum];
+    uint32_t BytesToWriteInChunk =
+        std::min(BytesLeft, getBlockSize() - OffsetInBlock);
+
+    const uint8_t *Chunk = Buffer.data() + BytesWritten;
+    ArrayRef<uint8_t> ChunkData(Chunk, BytesToWriteInChunk);
+    uint32_t MsfOffset = blockToOffset(StreamBlockAddr, getBlockSize());
+    MsfOffset += OffsetInBlock;
+    if (auto EC = WriteInterface.writeBytes(MsfOffset, ChunkData))
+      return EC;
+
+    BytesLeft -= BytesToWriteInChunk;
+    BytesWritten += BytesToWriteInChunk;
+    ++BlockNum;
+    OffsetInBlock = 0;
+  }
+
+  ReadInterface.fixCacheAfterWrite(Offset, Buffer);
+
+  return Error::success();
+}
+
+Error WritableMappedBlockStream::commit() const {
+  return WriteInterface.commit();
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/StreamReader.cpp b/contrib/llvm/lib/DebugInfo/MSF/StreamReader.cpp
index 64e45487322e..b85fd14a3b7f 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/StreamReader.cpp
+++ b/contrib/llvm/lib/DebugInfo/MSF/StreamReader.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 
 using namespace llvm;
-using namespace llvm::codeview;
+using namespace llvm::msf;
 
-StreamReader::StreamReader(StreamRef S) : Stream(S), Offset(0) {}
+StreamReader::StreamReader(ReadableStreamRef S) : Stream(S), Offset(0) {}
 
 Error StreamReader::readLongestContiguousChunk(ArrayRef<uint8_t> &Buffer) {
   if (auto EC = Stream.readLongestContiguousChunk(Offset, Buffer))
@@ -31,6 +31,14 @@ Error StreamReader::readBytes(ArrayRef<uint8_t> &Buffer, uint32_t Size) {
   return Error::success();
 }
 
+Error StreamReader::readInteger(uint8_t &Dest) {
+  const uint8_t *P;
+  if (auto EC = readObject(P))
+    return EC;
+  Dest = *P;
+  return Error::success();
+}
+
 Error StreamReader::readInteger(uint16_t &Dest) {
   const support::ulittle16_t *P;
   if (auto EC = readObject(P))
@@ -47,6 +55,46 @@ Error StreamReader::readInteger(uint32_t &Dest) {
   return Error::success();
 }
 
+Error StreamReader::readInteger(uint64_t &Dest) {
+  const support::ulittle64_t *P;
+  if (auto EC = readObject(P))
+    return EC;
+  Dest = *P;
+  return Error::success();
+}
+
+Error StreamReader::readInteger(int8_t &Dest) {
+  const int8_t *P;
+  if (auto EC = readObject(P))
+    return EC;
+  Dest = *P;
+  return Error::success();
+}
+
+Error StreamReader::readInteger(int16_t &Dest) {
+  const support::little16_t *P;
+  if (auto EC = readObject(P))
+    return EC;
+  Dest = *P;
+  return Error::success();
+}
+
+Error StreamReader::readInteger(int32_t &Dest) {
+  const support::little32_t *P;
+  if (auto EC = readObject(P))
+    return EC;
+  Dest = *P;
+  return Error::success();
+}
+
+Error StreamReader::readInteger(int64_t &Dest) {
+  const support::little64_t *P;
+  if (auto EC = readObject(P))
+    return EC;
+  Dest = *P;
+  return Error::success();
+}
+
 Error StreamReader::readZeroString(StringRef &Dest) {
   uint32_t Length = 0;
   // First compute the length of the string by reading 1 byte at a time.
@@ -80,14 +128,29 @@ Error StreamReader::readFixedString(StringRef &Dest, uint32_t Length) {
   return Error::success();
 }
 
-Error StreamReader::readStreamRef(StreamRef &Ref) {
+Error StreamReader::readStreamRef(ReadableStreamRef &Ref) {
   return readStreamRef(Ref, bytesRemaining());
 }
 
-Error StreamReader::readStreamRef(StreamRef &Ref, uint32_t Length) {
+Error StreamReader::readStreamRef(ReadableStreamRef &Ref, uint32_t Length) {
   if (bytesRemaining() < Length)
-    return make_error<CodeViewError>(cv_error_code::insufficient_buffer);
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
   Ref = Stream.slice(Offset, Length);
   Offset += Length;
   return Error::success();
 }
+
+Error StreamReader::skip(uint32_t Amount) {
+  if (Amount > bytesRemaining())
+    return make_error<MSFError>(msf_error_code::insufficient_buffer);
+  Offset += Amount;
+  return Error::success();
+}
+
+uint8_t StreamReader::peek() const {
+  ArrayRef<uint8_t> Buffer;
+  auto EC = Stream.readBytes(Offset, 1, Buffer);
+  assert(!EC && "Cannot peek an empty buffer!");
+  llvm::consumeError(std::move(EC));
+  return Buffer[0];
+}
diff --git a/contrib/llvm/lib/DebugInfo/CodeView/StreamWriter.cpp b/contrib/llvm/lib/DebugInfo/MSF/StreamWriter.cpp
index f61c6b522f57..cdae7c5acc04 100644
--- a/contrib/llvm/lib/DebugInfo/CodeView/StreamWriter.cpp
+++ b/contrib/llvm/lib/DebugInfo/MSF/StreamWriter.cpp
@@ -7,16 +7,16 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
 
-#include "llvm/DebugInfo/CodeView/CodeViewError.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/StreamRef.h"
+#include "llvm/DebugInfo/MSF/MSFError.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 
 using namespace llvm;
-using namespace llvm::codeview;
+using namespace llvm::msf;
 
-StreamWriter::StreamWriter(StreamRef S) : Stream(S), Offset(0) {}
+StreamWriter::StreamWriter(WritableStreamRef S) : Stream(S), Offset(0) {}
 
 Error StreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
   if (auto EC = Stream.writeBytes(Offset, Buffer))
@@ -25,6 +25,8 @@ Error StreamWriter::writeBytes(ArrayRef<uint8_t> Buffer) {
   return Error::success();
 }
 
+Error StreamWriter::writeInteger(uint8_t Int) { return writeObject(Int); }
+
 Error StreamWriter::writeInteger(uint16_t Int) {
   return writeObject(support::ulittle16_t(Int));
 }
@@ -33,6 +35,24 @@ Error StreamWriter::writeInteger(uint32_t Int) {
   return writeObject(support::ulittle32_t(Int));
 }
 
+Error StreamWriter::writeInteger(uint64_t Int) {
+  return writeObject(support::ulittle64_t(Int));
+}
+
+Error StreamWriter::writeInteger(int8_t Int) { return writeObject(Int); }
+
+Error StreamWriter::writeInteger(int16_t Int) {
+  return writeObject(support::little16_t(Int));
+}
+
+Error StreamWriter::writeInteger(int32_t Int) {
+  return writeObject(support::little32_t(Int));
+}
+
+Error StreamWriter::writeInteger(int64_t Int) {
+  return writeObject(support::little64_t(Int));
+}
+
 Error StreamWriter::writeZeroString(StringRef Str) {
   if (auto EC = writeFixedString(Str))
     return EC;
@@ -51,14 +71,15 @@ Error StreamWriter::writeFixedString(StringRef Str) {
   return Error::success();
 }
 
-Error StreamWriter::writeStreamRef(StreamRef Ref) {
+Error StreamWriter::writeStreamRef(ReadableStreamRef Ref) {
   if (auto EC = writeStreamRef(Ref, Ref.getLength()))
     return EC;
-  Offset += Ref.getLength();
+  // Don't increment Offset here, it is done by the overloaded call to
+  // writeStreamRef.
   return Error::success();
 }
 
-Error StreamWriter::writeStreamRef(StreamRef Ref, uint32_t Length) {
+Error StreamWriter::writeStreamRef(ReadableStreamRef Ref, uint32_t Length) {
   Ref = Ref.slice(0, Length);
 
   StreamReader SrcReader(Ref);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
index 1d72a92b5145..0da877b0fbad 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIAError.cpp
@@ -10,7 +10,7 @@ using namespace llvm::pdb;
 // deal with the Error value directly, rather than converting to error_code.
 class DIAErrorCategory : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.pdb.dia"; }
+  const char *name() const noexcept override { return "llvm.pdb.dia"; }
 
   std::string message(int Condition) const override {
     switch (static_cast<dia_error_code>(Condition)) {
@@ -38,21 +38,20 @@ char DIAError::ID = 0;
 
 DIAError::DIAError(dia_error_code C) : DIAError(C, "") {}
 
-DIAError::DIAError(const std::string &Context)
+DIAError::DIAError(StringRef Context)
     : DIAError(dia_error_code::unspecified, Context) {}
 
-DIAError::DIAError(dia_error_code C, const std::string &Context) : Code(C) {
+DIAError::DIAError(dia_error_code C, StringRef Context) : Code(C) {
   ErrMsg = "DIA Error: ";
   std::error_code EC = convertToErrorCode();
-  if (Code != dia_error_code::unspecified)
-    ErrMsg += EC.message() + "  ";
+  ErrMsg += EC.message() + "  ";
   if (!Context.empty())
     ErrMsg += Context;
 }
 
 void DIAError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
 
-const std::string &DIAError::getErrorMessage() const { return ErrMsg; }
+StringRef DIAError::getErrorMessage() const { return ErrMsg; }
 
 std::error_code DIAError::convertToErrorCode() const {
   return std::error_code(static_cast<int>(Code), *Category);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
index fa224af8cb87..6ecf335812b5 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/DIA/DIASession.cpp
@@ -20,31 +20,36 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
 #include "llvm/Support/ConvertUTF.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-namespace {
-
-Error ErrorFromHResult(HRESULT Result) {
+static Error ErrorFromHResult(HRESULT Result, StringRef Context) {
   switch (Result) {
   case E_PDB_NOT_FOUND:
-    return make_error<GenericError>(generic_error_code::invalid_path);
+    return make_error<GenericError>(generic_error_code::invalid_path, Context);
   case E_PDB_FORMAT:
-    return make_error<DIAError>(dia_error_code::invalid_file_format);
+    return make_error<DIAError>(dia_error_code::invalid_file_format, Context);
   case E_INVALIDARG:
-    return make_error<DIAError>(dia_error_code::invalid_parameter);
+    return make_error<DIAError>(dia_error_code::invalid_parameter, Context);
   case E_UNEXPECTED:
-    return make_error<DIAError>(dia_error_code::already_loaded);
+    return make_error<DIAError>(dia_error_code::already_loaded, Context);
   case E_PDB_INVALID_SIG:
   case E_PDB_INVALID_AGE:
-    return make_error<DIAError>(dia_error_code::debug_info_mismatch);
-  default:
-    return make_error<DIAError>(dia_error_code::unspecified);
+    return make_error<DIAError>(dia_error_code::debug_info_mismatch, Context);
+  default: {
+    std::string S;
+    raw_string_ostream OS(S);
+    OS << "HRESULT: " << format_hex(static_cast<DWORD>(Result), 10, true)
+       << ": " << Context;
+    return make_error<DIAError>(dia_error_code::unspecified, OS.str());
+  }
   }
 }
 
-Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
+static Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
   if (SUCCEEDED(CoCreateInstance(CLSID_DiaSource, nullptr, CLSCTX_INPROC_SERVER,
                                  IID_IDiaDataSource,
                                  reinterpret_cast<LPVOID *>(&DiaDataSource))))
@@ -55,12 +60,11 @@ Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
 #if !defined(_MSC_VER)
   return llvm::make_error<GenericError>(
       "DIA is only supported when using MSVC.");
-#endif
-
+#else
   const wchar_t *msdia_dll = nullptr;
-#if _MSC_VER == 1900
+#if _MSC_VER >= 1900 && _MSC_VER < 2000
   msdia_dll = L"msdia140.dll"; // VS2015
-#elif _MSC_VER == 1800
+#elif _MSC_VER >= 1800
   msdia_dll = L"msdia120.dll"; // VS2013
 #else
 #error "Unknown Visual Studio version."
@@ -69,10 +73,9 @@ Error LoadDIA(CComPtr<IDiaDataSource> &DiaDataSource) {
   HRESULT HR;
   if (FAILED(HR = NoRegCoCreate(msdia_dll, CLSID_DiaSource, IID_IDiaDataSource,
                                 reinterpret_cast<LPVOID *>(&DiaDataSource))))
-    return ErrorFromHResult(HR);
+    return ErrorFromHResult(HR, "Calling NoRegCoCreate");
   return Error::success();
-}
-
+#endif
 }
 
 DIASession::DIASession(CComPtr<IDiaSession> DiaSession) : Session(DiaSession) {}
@@ -93,10 +96,10 @@ Error DIASession::createFromPdb(StringRef Path,
   const wchar_t *Path16Str = reinterpret_cast<const wchar_t*>(Path16.data());
   HRESULT HR;
   if (FAILED(HR = DiaDataSource->loadDataFromPdb(Path16Str)))
-    return ErrorFromHResult(HR);
+    return ErrorFromHResult(HR, "Calling loadDataFromPdb");
 
   if (FAILED(HR = DiaDataSource->openSession(&DiaSession)))
-    return ErrorFromHResult(HR);
+    return ErrorFromHResult(HR, "Calling openSession");
 
   Session.reset(new DIASession(DiaSession));
   return Error::success();
@@ -118,10 +121,10 @@ Error DIASession::createFromExe(StringRef Path,
   const wchar_t *Path16Str = reinterpret_cast<const wchar_t *>(Path16.data());
   HRESULT HR;
   if (FAILED(HR = DiaDataSource->loadDataForExe(Path16Str, nullptr, nullptr)))
-    return ErrorFromHResult(HR);
+    return ErrorFromHResult(HR, "Calling loadDataForExe");
 
   if (FAILED(HR = DiaDataSource->openSession(&DiaSession)))
-    return ErrorFromHResult(HR);
+    return ErrorFromHResult(HR, "Calling openSession");
 
   Session.reset(new DIASession(DiaSession));
   return Error::success();
diff --git a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
index 34e179998021..789f3b813170 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/GenericError.cpp
@@ -20,7 +20,7 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class GenericErrorCategory : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.pdb"; }
+  const char *name() const noexcept override { return "llvm.pdb"; }
 
   std::string message(int Condition) const override {
     switch (static_cast<generic_error_code>(Condition)) {
@@ -45,11 +45,10 @@ char GenericError::ID = 0;
 
 GenericError::GenericError(generic_error_code C) : GenericError(C, "") {}
 
-GenericError::GenericError(const std::string &Context)
+GenericError::GenericError(StringRef Context)
     : GenericError(generic_error_code::unspecified, Context) {}
 
-GenericError::GenericError(generic_error_code C, const std::string &Context)
-    : Code(C) {
+GenericError::GenericError(generic_error_code C, StringRef Context) : Code(C) {
   ErrMsg = "PDB Error: ";
   std::error_code EC = convertToErrorCode();
   if (Code != generic_error_code::unspecified)
@@ -60,7 +59,7 @@ GenericError::GenericError(generic_error_code C, const std::string &Context)
 
 void GenericError::log(raw_ostream &OS) const { OS << ErrMsg << "\n"; }
 
-const std::string &GenericError::getErrorMessage() const { return ErrMsg; }
+StringRef GenericError::getErrorMessage() const { return ErrMsg; }
 
 std::error_code GenericError::convertToErrorCode() const {
   return std::error_code(static_cast<int>(Code), *Category);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp
index 46b422f5a76a..8cb1fbef51f4 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/IPDBSourceFile.cpp
@@ -1,4 +1,4 @@
-//===- IPDBSourceFile.cpp - base interface for a PDB source file *- C++ -*-===//
+//===- IPDBSourceFile.cpp - base interface for a PDB source file ----------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -8,15 +8,17 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
-
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cstdint>
+#include <string>
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-IPDBSourceFile::~IPDBSourceFile() {}
+IPDBSourceFile::~IPDBSourceFile() = default;
 
 void IPDBSourceFile::dump(raw_ostream &OS, int Indent) const {
   OS.indent(Indent);
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
index 69a908eb341c..0d720591b81d 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDB.cpp
@@ -14,7 +14,7 @@
 #include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
 #include "llvm/DebugInfo/PDB/PDB.h"
-#if HAVE_DIA_SDK
+#if LLVM_ENABLE_DIA_SDK
 #include "llvm/DebugInfo/PDB/DIA/DIASession.h"
 #endif
 #include "llvm/DebugInfo/PDB/Raw/RawSession.h"
@@ -30,7 +30,7 @@ Error llvm::pdb::loadDataForPDB(PDB_ReaderType Type, StringRef Path,
   if (Type == PDB_ReaderType::Raw)
     return RawSession::createFromPdb(Path, Session);
 
-#if HAVE_DIA_SDK
+#if LLVM_ENABLE_DIA_SDK
   return DIASession::createFromPdb(Path, Session);
 #else
   return llvm::make_error<GenericError>("DIA is not installed on the system");
@@ -43,7 +43,7 @@ Error llvm::pdb::loadDataForEXE(PDB_ReaderType Type, StringRef Path,
   if (Type == PDB_ReaderType::Raw)
     return RawSession::createFromExe(Path, Session);
 
-#if HAVE_DIA_SDK
+#if LLVM_ENABLE_DIA_SDK
   return DIASession::createFromExe(Path, Session);
 #else
   return llvm::make_error<GenericError>("DIA is not installed on the system");
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
index 773230263da8..94b81ecf561e 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBContext.cpp
@@ -29,8 +29,8 @@ PDBContext::PDBContext(const COFFObjectFile &Object,
     Session->setLoadAddress(ImageBase.get());
 }
 
-void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType,
-                      bool DumpEH) {}
+void PDBContext::dump(raw_ostream &OS, DIDumpType DumpType, bool DumpEH,
+                      bool SummarizeTypes) {}
 
 DILineInfo PDBContext::getLineInfoForAddress(uint64_t Address,
                                              DILineInfoSpecifier Specifier) {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
index a347c67ba8e1..541fcda45177 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBInterfaceAnchors.cpp
@@ -15,15 +15,14 @@
 #include "llvm/DebugInfo/PDB/IPDBLineNumber.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
 #include "llvm/DebugInfo/PDB/IPDBSession.h"
-#include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-IPDBSession::~IPDBSession() {}
+IPDBSession::~IPDBSession() = default;
 
-IPDBDataStream::~IPDBDataStream() {}
+IPDBDataStream::~IPDBDataStream() = default;
 
-IPDBRawSymbol::~IPDBRawSymbol() {}
+IPDBRawSymbol::~IPDBRawSymbol() = default;
 
-IPDBLineNumber::~IPDBLineNumber() {}
+IPDBLineNumber::~IPDBLineNumber() = default;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp
index 9450a988dd6f..2f819312e54e 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymDumper.cpp
@@ -20,7 +20,7 @@ using namespace llvm::pdb;
 PDBSymDumper::PDBSymDumper(bool ShouldRequireImpl)
     : RequireImpl(ShouldRequireImpl) {}
 
-PDBSymDumper::~PDBSymDumper() {}
+PDBSymDumper::~PDBSymDumper() = default;
 
 void PDBSymDumper::dump(const PDBSymbolAnnotation &Symbol) {
   PDB_SYMDUMP_UNREACHABLE(PDBSymbolAnnotation)
diff --git a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
index 78b3afc6079a..633e11aacf12 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/PDBSymbol.cpp
@@ -8,9 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/PDB/PDBSymbol.h"
-
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
+#include "llvm/DebugInfo/PDB/PDBExtras.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolAnnotation.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolBlock.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolCompiland.h"
@@ -42,12 +42,9 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolTypeVTableShape.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolUnknown.h"
 #include "llvm/DebugInfo/PDB/PDBSymbolUsingNamespace.h"
-#include "llvm/DebugInfo/PDB/PDBSymDumper.h"
-#include <memory>
-#include <utility>
-
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include <algorithm>
 #include <memory>
-#include <utility>
 
 using namespace llvm;
 using namespace llvm::pdb;
@@ -56,7 +53,7 @@ PDBSymbol::PDBSymbol(const IPDBSession &PDBSession,
                      std::unique_ptr<IPDBRawSymbol> Symbol)
     : Session(PDBSession), RawSymbol(std::move(Symbol)) {}
 
-PDBSymbol::~PDBSymbol() {}
+PDBSymbol::~PDBSymbol() = default;
 
 #define FACTORY_SYMTAG_CASE(Tag, Type)                                         \
   case PDB_SymType::Tag:                                                       \
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp
index 3c0586c728f9..4f4a0cf65785 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStream.cpp
@@ -7,13 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/StringRef.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
 #include "llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
 #include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
@@ -22,49 +22,17 @@
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 using namespace llvm::support;
 
-namespace {
-// Some of the values are stored in bitfields.  Since this needs to be portable
-// across compilers and architectures (big / little endian in particular) we
-// can't use the actual structures below, but must instead do the shifting
-// and masking ourselves.  The struct definitions are provided for reference.
-
-// struct DbiFlags {
-//  uint16_t IncrementalLinking : 1;  // True if linked incrementally
-//  uint16_t IsStripped : 1;          // True if private symbols were stripped.
-//  uint16_t HasCTypes : 1;           // True if linked with /debug:ctypes.
-//  uint16_t Reserved : 13;
-//};
-const uint16_t FlagIncrementalMask = 0x0001;
-const uint16_t FlagStrippedMask = 0x0002;
-const uint16_t FlagHasCTypesMask = 0x0004;
-
-// struct DbiBuildNo {
-//  uint16_t MinorVersion : 8;
-//  uint16_t MajorVersion : 7;
-//  uint16_t NewVersionFormat : 1;
-//};
-const uint16_t BuildMinorMask = 0x00FF;
-const uint16_t BuildMinorShift = 0;
-
-const uint16_t BuildMajorMask = 0x7F00;
-const uint16_t BuildMajorShift = 8;
-
-struct FileInfoSubstreamHeader {
-  ulittle16_t NumModules;     // Total # of modules, should match number of
-                              // records in the ModuleInfo substream.
-  ulittle16_t NumSourceFiles; // Total # of source files.  This value is not
-                              // accurate because PDB actually supports more
-                              // than 64k source files, so we ignore it and
-                              // compute the value from other stream fields.
-};
-}
-
 template <typename ContribType>
 static Error loadSectionContribs(FixedStreamArray<ContribType> &Output,
                                  StreamReader &Reader) {
@@ -81,15 +49,14 @@ static Error loadSectionContribs(FixedStreamArray<ContribType> &Output,
 
 DbiStream::DbiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream)
     : Pdb(File), Stream(std::move(Stream)), Header(nullptr) {
-  static_assert(sizeof(HeaderInfo) == 64, "Invalid HeaderInfo size!");
 }
 
-DbiStream::~DbiStream() {}
+DbiStream::~DbiStream() = default;
 
 Error DbiStream::reload() {
   StreamReader Reader(*Stream);
 
-  if (Stream->getLength() < sizeof(HeaderInfo))
+  if (Stream->getLength() < sizeof(DbiStreamHeader))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "DBI Stream does not contain a header.");
   if (auto EC = Reader.readObject(Header))
@@ -116,7 +83,7 @@ Error DbiStream::reload() {
                                 "DBI Age does not match PDB Age.");
 
   if (Stream->getLength() !=
-      sizeof(HeaderInfo) + Header->ModiSubstreamSize +
+      sizeof(DbiStreamHeader) + Header->ModiSubstreamSize +
           Header->SecContrSubstreamSize + Header->SectionMapSize +
           Header->FileInfoSize + Header->TypeServerSize +
           Header->OptionalDbgHdrSize + Header->ECSubstreamSize)
@@ -142,14 +109,11 @@ Error DbiStream::reload() {
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "DBI type server substream not aligned.");
 
-  // Since each ModInfo in the stream is a variable length, we have to iterate
-  // them to know how many there actually are.
-  VarStreamArray<ModInfo> ModInfoArray;
-  if (auto EC = Reader.readArray(ModInfoArray, Header->ModiSubstreamSize))
+  if (auto EC =
+          Reader.readStreamRef(ModInfoSubstream, Header->ModiSubstreamSize))
+    return EC;
+  if (auto EC = initializeModInfoArray())
     return EC;
-  for (auto &Info : ModInfoArray) {
-    ModuleInfos.emplace_back(Info);
-  }
 
   if (auto EC = Reader.readStreamRef(SecContrSubstream,
                                      Header->SecContrSubstreamSize))
@@ -209,25 +173,27 @@ uint16_t DbiStream::getGlobalSymbolStreamIndex() const {
 uint16_t DbiStream::getFlags() const { return Header->Flags; }
 
 bool DbiStream::isIncrementallyLinked() const {
-  return (Header->Flags & FlagIncrementalMask) != 0;
+  return (Header->Flags & DbiFlags::FlagIncrementalMask) != 0;
 }
 
 bool DbiStream::hasCTypes() const {
-  return (Header->Flags & FlagHasCTypesMask) != 0;
+  return (Header->Flags & DbiFlags::FlagHasCTypesMask) != 0;
 }
 
 bool DbiStream::isStripped() const {
-  return (Header->Flags & FlagStrippedMask) != 0;
+  return (Header->Flags & DbiFlags::FlagStrippedMask) != 0;
 }
 
 uint16_t DbiStream::getBuildNumber() const { return Header->BuildNumber; }
 
 uint16_t DbiStream::getBuildMajorVersion() const {
-  return (Header->BuildNumber & BuildMajorMask) >> BuildMajorShift;
+  return (Header->BuildNumber & DbiBuildNo::BuildMajorMask) >>
+         DbiBuildNo::BuildMajorShift;
 }
 
 uint16_t DbiStream::getBuildMinorVersion() const {
-  return (Header->BuildNumber & BuildMinorMask) >> BuildMinorShift;
+  return (Header->BuildNumber & DbiBuildNo::BuildMinorMask) >>
+         DbiBuildNo::BuildMinorShift;
 }
 
 uint16_t DbiStream::getPdbDllRbld() const { return Header->PdbDllRbld; }
@@ -243,21 +209,20 @@ PDB_Machine DbiStream::getMachineType() const {
   return static_cast<PDB_Machine>(Machine);
 }
 
-codeview::FixedStreamArray<object::coff_section>
-DbiStream::getSectionHeaders() {
+msf::FixedStreamArray<object::coff_section> DbiStream::getSectionHeaders() {
   return SectionHeaders;
 }
 
-codeview::FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() {
+msf::FixedStreamArray<object::FpoData> DbiStream::getFpoRecords() {
   return FpoRecords;
 }
 
 ArrayRef<ModuleInfoEx> DbiStream::modules() const { return ModuleInfos; }
-codeview::FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const {
+msf::FixedStreamArray<SecMapEntry> DbiStream::getSectionMap() const {
   return SectionMap;
 }
 
-void llvm::pdb::DbiStream::visitSectionContributions(
+void DbiStream::visitSectionContributions(
     ISectionContribVisitor &Visitor) const {
   if (SectionContribVersion == DbiSecContribVer60) {
     for (auto &SC : SectionContribs)
@@ -285,6 +250,24 @@ Error DbiStream::initializeSectionContributionData() {
                               "Unsupported DBI Section Contribution version");
 }
 
+Error DbiStream::initializeModInfoArray() {
+  if (ModInfoSubstream.getLength() == 0)
+    return Error::success();
+
+  // Since each ModInfo in the stream is a variable length, we have to iterate
+  // them to know how many there actually are.
+  StreamReader Reader(ModInfoSubstream);
+
+  VarStreamArray<ModInfo> ModInfoArray;
+  if (auto EC = Reader.readArray(ModInfoArray, ModInfoSubstream.getLength()))
+    return EC;
+  for (auto &Info : ModInfoArray) {
+    ModuleInfos.emplace_back(Info);
+  }
+
+  return Error::success();
+}
+
 // Initializes this->SectionHeaders.
 Error DbiStream::initializeSectionHeadersData() {
   if (DbgStreams.size() == 0)
@@ -294,22 +277,21 @@ Error DbiStream::initializeSectionHeadersData() {
   if (StreamNum >= Pdb.getNumStreams())
     return make_error<RawError>(raw_error_code::no_stream);
 
-  auto SHS = MappedBlockStream::createIndexedStream(StreamNum, Pdb);
-  if (!SHS)
-    return SHS.takeError();
+  auto SHS = MappedBlockStream::createIndexedStream(
+      Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum);
 
-  size_t StreamLen = (*SHS)->getLength();
+  size_t StreamLen = SHS->getLength();
   if (StreamLen % sizeof(object::coff_section))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Corrupted section header stream.");
 
   size_t NumSections = StreamLen / sizeof(object::coff_section);
-  codeview::StreamReader Reader(**SHS);
+  msf::StreamReader Reader(*SHS);
   if (auto EC = Reader.readArray(SectionHeaders, NumSections))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Could not read a bitmap.");
 
-  SectionHeaderStream = std::move(*SHS);
+  SectionHeaderStream = std::move(SHS);
   return Error::success();
 }
 
@@ -321,27 +303,26 @@ Error DbiStream::initializeFpoRecords() {
   uint32_t StreamNum = getDebugStreamIndex(DbgHeaderType::NewFPO);
 
   // This means there is no FPO data.
-  if (StreamNum == InvalidStreamIndex)
+  if (StreamNum == kInvalidStreamIndex)
     return Error::success();
 
   if (StreamNum >= Pdb.getNumStreams())
     return make_error<RawError>(raw_error_code::no_stream);
 
-  auto FS = MappedBlockStream::createIndexedStream(StreamNum, Pdb);
-  if (!FS)
-    return FS.takeError();
+  auto FS = MappedBlockStream::createIndexedStream(
+      Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum);
 
-  size_t StreamLen = (*FS)->getLength();
+  size_t StreamLen = FS->getLength();
   if (StreamLen % sizeof(object::FpoData))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Corrupted New FPO stream.");
 
   size_t NumRecords = StreamLen / sizeof(object::FpoData);
-  codeview::StreamReader Reader(**FS);
+  msf::StreamReader Reader(*FS);
   if (auto EC = Reader.readArray(FpoRecords, NumRecords))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Corrupted New FPO stream.");
-  FpoStream = std::move(*FS);
+  FpoStream = std::move(FS);
   return Error::success();
 }
 
@@ -359,18 +340,6 @@ Error DbiStream::initializeSectionMapData() {
 }
 
 Error DbiStream::initializeFileInfo() {
-  // The layout of the FileInfoSubstream is like this:
-  // struct {
-  //   ulittle16_t NumModules;
-  //   ulittle16_t NumSourceFiles;
-  //   ulittle16_t ModIndices[NumModules];
-  //   ulittle16_t ModFileCounts[NumModules];
-  //   ulittle32_t FileNameOffsets[NumSourceFiles];
-  //   char Names[][NumSourceFiles];
-  // };
-  // with the caveat that `NumSourceFiles` cannot be trusted, so
-  // it is computed by summing `ModFileCounts`.
-  //
   if (FileInfoSubstream.getLength() == 0)
     return Error::success();
 
@@ -437,7 +406,10 @@ Error DbiStream::initializeFileInfo() {
 }
 
 uint32_t DbiStream::getDebugStreamIndex(DbgHeaderType Type) const {
-  return DbgStreams[static_cast<uint16_t>(Type)];
+  uint16_t T = static_cast<uint16_t>(Type);
+  if (T >= DbgStreams.size())
+    return kInvalidStreamIndex;
+  return DbgStreams[T];
 }
 
 Expected<StringRef> DbiStream::getFileNameForIndex(uint32_t Index) const {
@@ -452,11 +424,3 @@ Expected<StringRef> DbiStream::getFileNameForIndex(uint32_t Index) const {
     return std::move(EC);
   return Name;
 }
-
-Error DbiStream::commit() {
-  StreamWriter Writer(*Stream);
-  if (auto EC = Writer.writeObject(*Header))
-    return EC;
-
-  return Error::success();
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp
index 34ff8ae3a907..1d5b8d693b1e 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/DbiStreamBuilder.cpp
@@ -9,18 +9,28 @@
 
 #include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h"
 
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/Object/COFF.h"
+#include "llvm/Support/COFF.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
-DbiStreamBuilder::DbiStreamBuilder()
-    : Age(1), BuildNumber(0), PdbDllVersion(0), PdbDllRbld(0), Flags(0),
-      MachineType(PDB_Machine::x86) {}
+namespace {
+class ModiSubstreamBuilder {};
+}
+
+DbiStreamBuilder::DbiStreamBuilder(msf::MSFBuilder &Msf)
+    : Msf(Msf), Allocator(Msf.getAllocator()), Age(1), BuildNumber(0),
+      PdbDllVersion(0), PdbDllRbld(0), Flags(0), MachineType(PDB_Machine::x86),
+      Header(nullptr), DbgStreams((int)DbgHeaderType::Max) {}
 
 void DbiStreamBuilder::setVersionHeader(PdbRaw_DbiVer V) { VerHeader = V; }
 
@@ -36,24 +46,207 @@ void DbiStreamBuilder::setFlags(uint16_t F) { Flags = F; }
 
 void DbiStreamBuilder::setMachineType(PDB_Machine M) { MachineType = M; }
 
+void DbiStreamBuilder::setSectionContribs(ArrayRef<SectionContrib> Arr) {
+  SectionContribs = Arr;
+}
+
+void DbiStreamBuilder::setSectionMap(ArrayRef<SecMapEntry> SecMap) {
+  SectionMap = SecMap;
+}
+
+Error DbiStreamBuilder::addDbgStream(pdb::DbgHeaderType Type,
+                                     ArrayRef<uint8_t> Data) {
+  if (DbgStreams[(int)Type].StreamNumber)
+    return make_error<RawError>(raw_error_code::duplicate_entry,
+                                "The specified stream type already exists");
+  auto ExpectedIndex = Msf.addStream(Data.size());
+  if (!ExpectedIndex)
+    return ExpectedIndex.takeError();
+  uint32_t Index = std::move(*ExpectedIndex);
+  DbgStreams[(int)Type].Data = Data;
+  DbgStreams[(int)Type].StreamNumber = Index;
+  return Error::success();
+}
+
 uint32_t DbiStreamBuilder::calculateSerializedLength() const {
   // For now we only support serializing the header.
-  return sizeof(DbiStream::HeaderInfo);
-}
-
-Expected<std::unique_ptr<DbiStream>> DbiStreamBuilder::build(PDBFile &File) {
-  if (!VerHeader.hasValue())
-    return make_error<RawError>(raw_error_code::unspecified,
-                                "Missing DBI Stream Version");
-
-  auto DbiS = MappedBlockStream::createIndexedStream(StreamDBI, File);
-  if (!DbiS)
-    return DbiS.takeError();
-  auto DS = std::move(*DbiS);
-  DbiStream::HeaderInfo *H =
-      static_cast<DbiStream::HeaderInfo *>(DS->getAllocator().Allocate(
-          sizeof(DbiStream::HeaderInfo),
-          llvm::AlignOf<DbiStream::HeaderInfo>::Alignment));
+  return sizeof(DbiStreamHeader) + calculateFileInfoSubstreamSize() +
+         calculateModiSubstreamSize() + calculateSectionContribsStreamSize() +
+         calculateSectionMapStreamSize() + calculateDbgStreamsSize();
+}
+
+Error DbiStreamBuilder::addModuleInfo(StringRef ObjFile, StringRef Module) {
+  auto Entry = llvm::make_unique<ModuleInfo>();
+  ModuleInfo *M = Entry.get();
+  Entry->Mod = Module;
+  Entry->Obj = ObjFile;
+  auto Result = ModuleInfos.insert(std::make_pair(Module, std::move(Entry)));
+  if (!Result.second)
+    return make_error<RawError>(raw_error_code::duplicate_entry,
+                                "The specified module already exists");
+  ModuleInfoList.push_back(M);
+  return Error::success();
+}
+
+Error DbiStreamBuilder::addModuleSourceFile(StringRef Module, StringRef File) {
+  auto ModIter = ModuleInfos.find(Module);
+  if (ModIter == ModuleInfos.end())
+    return make_error<RawError>(raw_error_code::no_entry,
+                                "The specified module was not found");
+  uint32_t Index = SourceFileNames.size();
+  SourceFileNames.insert(std::make_pair(File, Index));
+  auto &ModEntry = *ModIter;
+  ModEntry.second->SourceFiles.push_back(File);
+  return Error::success();
+}
+
+uint32_t DbiStreamBuilder::calculateModiSubstreamSize() const {
+  uint32_t Size = 0;
+  for (const auto &M : ModuleInfoList) {
+    Size += sizeof(ModuleInfoHeader);
+    Size += M->Mod.size() + 1;
+    Size += M->Obj.size() + 1;
+  }
+  return alignTo(Size, sizeof(uint32_t));
+}
+
+uint32_t DbiStreamBuilder::calculateSectionContribsStreamSize() const {
+  if (SectionContribs.empty())
+    return 0;
+  return sizeof(enum PdbRaw_DbiSecContribVer) +
+         sizeof(SectionContribs[0]) * SectionContribs.size();
+}
+
+uint32_t DbiStreamBuilder::calculateSectionMapStreamSize() const {
+  if (SectionMap.empty())
+    return 0;
+  return sizeof(SecMapHeader) + sizeof(SecMapEntry) * SectionMap.size();
+}
+
+uint32_t DbiStreamBuilder::calculateFileInfoSubstreamSize() const {
+  uint32_t Size = 0;
+  Size += sizeof(ulittle16_t);                         // NumModules
+  Size += sizeof(ulittle16_t);                         // NumSourceFiles
+  Size += ModuleInfoList.size() * sizeof(ulittle16_t); // ModIndices
+  Size += ModuleInfoList.size() * sizeof(ulittle16_t); // ModFileCounts
+  uint32_t NumFileInfos = 0;
+  for (const auto &M : ModuleInfoList)
+    NumFileInfos += M->SourceFiles.size();
+  Size += NumFileInfos * sizeof(ulittle32_t); // FileNameOffsets
+  Size += calculateNamesBufferSize();
+  return alignTo(Size, sizeof(uint32_t));
+}
+
+uint32_t DbiStreamBuilder::calculateNamesBufferSize() const {
+  uint32_t Size = 0;
+  for (const auto &F : SourceFileNames) {
+    Size += F.getKeyLength() + 1; // Names[I];
+  }
+  return Size;
+}
+
+uint32_t DbiStreamBuilder::calculateDbgStreamsSize() const {
+  return DbgStreams.size() * sizeof(uint16_t);
+}
+
+Error DbiStreamBuilder::generateModiSubstream() {
+  uint32_t Size = calculateModiSubstreamSize();
+  auto Data = Allocator.Allocate<uint8_t>(Size);
+
+  ModInfoBuffer = MutableByteStream(MutableArrayRef<uint8_t>(Data, Size));
+
+  StreamWriter ModiWriter(ModInfoBuffer);
+  for (const auto &M : ModuleInfoList) {
+    ModuleInfoHeader Layout = {};
+    Layout.ModDiStream = kInvalidStreamIndex;
+    Layout.NumFiles = M->SourceFiles.size();
+    if (auto EC = ModiWriter.writeObject(Layout))
+      return EC;
+    if (auto EC = ModiWriter.writeZeroString(M->Mod))
+      return EC;
+    if (auto EC = ModiWriter.writeZeroString(M->Obj))
+      return EC;
+  }
+  if (ModiWriter.bytesRemaining() > sizeof(uint32_t))
+    return make_error<RawError>(raw_error_code::invalid_format,
+                                "Unexpected bytes in Modi Stream Data");
+  return Error::success();
+}
+
+Error DbiStreamBuilder::generateFileInfoSubstream() {
+  uint32_t Size = calculateFileInfoSubstreamSize();
+  uint32_t NameSize = calculateNamesBufferSize();
+  auto Data = Allocator.Allocate<uint8_t>(Size);
+  uint32_t NamesOffset = Size - NameSize;
+
+  FileInfoBuffer = MutableByteStream(MutableArrayRef<uint8_t>(Data, Size));
+
+  WritableStreamRef MetadataBuffer =
+      WritableStreamRef(FileInfoBuffer).keep_front(NamesOffset);
+  StreamWriter MetadataWriter(MetadataBuffer);
+
+  uint16_t ModiCount = std::min<uint32_t>(UINT16_MAX, ModuleInfos.size());
+  uint16_t FileCount = std::min<uint32_t>(UINT16_MAX, SourceFileNames.size());
+  if (auto EC = MetadataWriter.writeInteger(ModiCount)) // NumModules
+    return EC;
+  if (auto EC = MetadataWriter.writeInteger(FileCount)) // NumSourceFiles
+    return EC;
+  for (uint16_t I = 0; I < ModiCount; ++I) {
+    if (auto EC = MetadataWriter.writeInteger(I)) // Mod Indices
+      return EC;
+  }
+  for (const auto MI : ModuleInfoList) {
+    FileCount = static_cast<uint16_t>(MI->SourceFiles.size());
+    if (auto EC = MetadataWriter.writeInteger(FileCount)) // Mod File Counts
+      return EC;
+  }
+
+  // Before writing the FileNameOffsets array, write the NamesBuffer array.
+  // A side effect of this is that this will actually compute the various
+  // file name offsets, so we can then go back and write the FileNameOffsets
+  // array to the other substream.
+  NamesBuffer = WritableStreamRef(FileInfoBuffer).drop_front(NamesOffset);
+  StreamWriter NameBufferWriter(NamesBuffer);
+  for (auto &Name : SourceFileNames) {
+    Name.second = NameBufferWriter.getOffset();
+    if (auto EC = NameBufferWriter.writeZeroString(Name.getKey()))
+      return EC;
+  }
+
+  for (const auto MI : ModuleInfoList) {
+    for (StringRef Name : MI->SourceFiles) {
+      auto Result = SourceFileNames.find(Name);
+      if (Result == SourceFileNames.end())
+        return make_error<RawError>(raw_error_code::no_entry,
+                                    "The source file was not found.");
+      if (auto EC = MetadataWriter.writeInteger(Result->second))
+        return EC;
+    }
+  }
+
+  if (NameBufferWriter.bytesRemaining() > 0)
+    return make_error<RawError>(raw_error_code::invalid_format,
+                                "The names buffer contained unexpected data.");
+
+  if (MetadataWriter.bytesRemaining() > sizeof(uint32_t))
+    return make_error<RawError>(
+        raw_error_code::invalid_format,
+        "The metadata buffer contained unexpected data.");
+
+  return Error::success();
+}
+
+Error DbiStreamBuilder::finalize() {
+  if (Header)
+    return Error::success();
+
+  DbiStreamHeader *H = Allocator.Allocate<DbiStreamHeader>();
+
+  if (auto EC = generateModiSubstream())
+    return EC;
+  if (auto EC = generateFileInfoSubstream())
+    return EC;
+
   H->VersionHeader = *VerHeader;
   H->VersionSignature = -1;
   H->Age = Age;
@@ -64,18 +257,156 @@ Expected<std::unique_ptr<DbiStream>> DbiStreamBuilder::build(PDBFile &File) {
   H->MachineType = static_cast<uint16_t>(MachineType);
 
   H->ECSubstreamSize = 0;
-  H->FileInfoSize = 0;
-  H->ModiSubstreamSize = 0;
-  H->OptionalDbgHdrSize = 0;
-  H->SecContrSubstreamSize = 0;
-  H->SectionMapSize = 0;
+  H->FileInfoSize = FileInfoBuffer.getLength();
+  H->ModiSubstreamSize = ModInfoBuffer.getLength();
+  H->OptionalDbgHdrSize = DbgStreams.size() * sizeof(uint16_t);
+  H->SecContrSubstreamSize = calculateSectionContribsStreamSize();
+  H->SectionMapSize = calculateSectionMapStreamSize();
   H->TypeServerSize = 0;
-  H->SymRecordStreamIndex = DbiStream::InvalidStreamIndex;
-  H->PublicSymbolStreamIndex = DbiStream::InvalidStreamIndex;
-  H->MFCTypeServerIndex = DbiStream::InvalidStreamIndex;
-  H->GlobalSymbolStreamIndex = DbiStream::InvalidStreamIndex;
-
-  auto Dbi = llvm::make_unique<DbiStream>(File, std::move(DS));
-  Dbi->Header = H;
-  return std::move(Dbi);
+  H->SymRecordStreamIndex = kInvalidStreamIndex;
+  H->PublicSymbolStreamIndex = kInvalidStreamIndex;
+  H->MFCTypeServerIndex = kInvalidStreamIndex;
+  H->GlobalSymbolStreamIndex = kInvalidStreamIndex;
+
+  Header = H;
+  return Error::success();
+}
+
+Error DbiStreamBuilder::finalizeMsfLayout() {
+  uint32_t Length = calculateSerializedLength();
+  if (auto EC = Msf.setStreamSize(StreamDBI, Length))
+    return EC;
+  return Error::success();
+}
+
+static uint16_t toSecMapFlags(uint32_t Flags) {
+  uint16_t Ret = 0;
+  if (Flags & COFF::IMAGE_SCN_MEM_READ)
+    Ret |= static_cast<uint16_t>(OMFSegDescFlags::Read);
+  if (Flags & COFF::IMAGE_SCN_MEM_WRITE)
+    Ret |= static_cast<uint16_t>(OMFSegDescFlags::Write);
+  if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
+    Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute);
+  if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE)
+    Ret |= static_cast<uint16_t>(OMFSegDescFlags::Execute);
+  if (!(Flags & COFF::IMAGE_SCN_MEM_16BIT))
+    Ret |= static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit);
+
+  // This seems always 1.
+  Ret |= static_cast<uint16_t>(OMFSegDescFlags::IsSelector);
+
+  return Ret;
+}
+
+// A utility function to create Section Contributions
+// for a given input sections.
+std::vector<SectionContrib> DbiStreamBuilder::createSectionContribs(
+    ArrayRef<object::coff_section> SecHdrs) {
+  std::vector<SectionContrib> Ret;
+
+  // Create a SectionContrib for each input section.
+  for (auto &Sec : SecHdrs) {
+    Ret.emplace_back();
+    auto &Entry = Ret.back();
+    memset(&Entry, 0, sizeof(Entry));
+
+    Entry.Off = Sec.PointerToRawData;
+    Entry.Size = Sec.SizeOfRawData;
+    Entry.Characteristics = Sec.Characteristics;
+  }
+  return Ret;
+}
+
+// A utility function to create a Section Map for a given list of COFF sections.
+//
+// A Section Map seem to be a copy of a COFF section list in other format.
+// I don't know why a PDB file contains both a COFF section header and
+// a Section Map, but it seems it must be present in a PDB.
+std::vector<SecMapEntry> DbiStreamBuilder::createSectionMap(
+    ArrayRef<llvm::object::coff_section> SecHdrs) {
+  std::vector<SecMapEntry> Ret;
+  int Idx = 0;
+
+  auto Add = [&]() -> SecMapEntry & {
+    Ret.emplace_back();
+    auto &Entry = Ret.back();
+    memset(&Entry, 0, sizeof(Entry));
+
+    Entry.Frame = Idx + 1;
+
+    // We don't know the meaning of these fields yet.
+    Entry.SecName = UINT16_MAX;
+    Entry.ClassName = UINT16_MAX;
+
+    return Entry;
+  };
+
+  for (auto &Hdr : SecHdrs) {
+    auto &Entry = Add();
+    Entry.Flags = toSecMapFlags(Hdr.Characteristics);
+    Entry.SecByteLength = Hdr.VirtualSize;
+    ++Idx;
+  }
+
+  // The last entry is for absolute symbols.
+  auto &Entry = Add();
+  Entry.Flags = static_cast<uint16_t>(OMFSegDescFlags::AddressIs32Bit) |
+                static_cast<uint16_t>(OMFSegDescFlags::IsAbsoluteAddress);
+  Entry.SecByteLength = UINT32_MAX;
+
+  return Ret;
+}
+
+Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
+                               const msf::WritableStream &Buffer) {
+  if (auto EC = finalize())
+    return EC;
+
+  auto InfoS =
+      WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamDBI);
+
+  StreamWriter Writer(*InfoS);
+  if (auto EC = Writer.writeObject(*Header))
+    return EC;
+
+  if (auto EC = Writer.writeStreamRef(ModInfoBuffer))
+    return EC;
+
+  if (!SectionContribs.empty()) {
+    if (auto EC = Writer.writeEnum(DbiSecContribVer60))
+      return EC;
+    if (auto EC = Writer.writeArray(SectionContribs))
+      return EC;
+  }
+
+  if (!SectionMap.empty()) {
+    ulittle16_t Size = static_cast<ulittle16_t>(SectionMap.size());
+    SecMapHeader SMHeader = {Size, Size};
+    if (auto EC = Writer.writeObject(SMHeader))
+      return EC;
+    if (auto EC = Writer.writeArray(SectionMap))
+      return EC;
+  }
+
+  if (auto EC = Writer.writeStreamRef(FileInfoBuffer))
+    return EC;
+
+  for (auto &Stream : DbgStreams)
+    if (auto EC = Writer.writeInteger(Stream.StreamNumber))
+      return EC;
+
+  for (auto &Stream : DbgStreams) {
+    if (Stream.StreamNumber == kInvalidStreamIndex)
+      continue;
+    auto WritableStream = WritableMappedBlockStream::createIndexedStream(
+        Layout, Buffer, Stream.StreamNumber);
+    StreamWriter DbgStreamWriter(*WritableStream);
+    if (auto EC = DbgStreamWriter.writeArray(Stream.Data))
+      return EC;
+  }
+
+  if (Writer.bytesRemaining() > 0)
+    return make_error<RawError>(raw_error_code::invalid_format,
+                                "Unexpected bytes found in DBI Stream");
+  return Error::success();
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/GSI.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/GSI.cpp
new file mode 100644
index 000000000000..6ecbb5c8cfad
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/GSI.cpp
@@ -0,0 +1,93 @@
+//===- GSI.cpp - Common Functions for GlobalsStream and PublicsStream  ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "GSI.h"
+
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace pdb {
+
+static Error checkHashHdrVersion(const GSIHashHeader *HashHdr) {
+  if (HashHdr->VerHdr != GSIHashHeader::HdrVersion)
+    return make_error<RawError>(
+        raw_error_code::feature_unsupported,
+        "Encountered unsupported globals stream version.");
+
+  return Error::success();
+}
+
+Error readGSIHashBuckets(
+    msf::FixedStreamArray<support::ulittle32_t> &HashBuckets,
+    const GSIHashHeader *HashHdr, msf::StreamReader &Reader) {
+  if (auto EC = checkHashHdrVersion(HashHdr))
+    return EC;
+
+  // Before the actual hash buckets, there is a bitmap of length determined by
+  // IPHR_HASH.
+  ArrayRef<uint8_t> Bitmap;
+  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
+  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
+  if (auto EC = Reader.readBytes(Bitmap, NumBitmapEntries))
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Could not read a bitmap."));
+  uint32_t NumBuckets = 0;
+  for (uint8_t B : Bitmap)
+    NumBuckets += countPopulation(B);
+
+  // Hash buckets follow.
+  if (auto EC = Reader.readArray(HashBuckets, NumBuckets))
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Hash buckets corrupted."));
+
+  return Error::success();
+}
+
+Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
+                        msf::StreamReader &Reader) {
+  if (Reader.readObject(HashHdr))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Stream does not contain a GSIHashHeader.");
+
+  if (HashHdr->VerSignature != GSIHashHeader::HdrSignature)
+    return make_error<RawError>(
+        raw_error_code::feature_unsupported,
+        "GSIHashHeader signature (0xffffffff) not found.");
+
+  return Error::success();
+}
+
+Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords,
+                         const GSIHashHeader *HashHdr,
+                         msf::StreamReader &Reader) {
+  if (auto EC = checkHashHdrVersion(HashHdr))
+    return EC;
+
+  // HashHdr->HrSize specifies the number of bytes of PSHashRecords we have.
+  // Verify that we can read them all.
+  if (HashHdr->HrSize % sizeof(PSHashRecord))
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Invalid HR array size.");
+  uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord);
+  if (auto EC = Reader.readArray(HashRecords, NumHashRecords))
+    return joinErrors(std::move(EC),
+                      make_error<RawError>(raw_error_code::corrupt_file,
+                                           "Error reading hash records."));
+
+  return Error::success();
+}
+}
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/GSI.h b/contrib/llvm/lib/DebugInfo/PDB/Raw/GSI.h
new file mode 100644
index 000000000000..82cebd946538
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/GSI.h
@@ -0,0 +1,70 @@
+//===- GSI.h - Common Declarations for GlobalsStream and PublicsStream ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The data structures defined in this file are based on the reference
+// implementation which is available at
+// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
+//
+// When you are reading the reference source code, you'd find the
+// information below useful.
+//
+//  - ppdb1->m_fMinimalDbgInfo seems to be always true.
+//  - SMALLBUCKETS macro is defined.
+//
+// The reference doesn't compile, so I learned just by reading code.
+// It's not guaranteed to be correct.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H
+#define LLVM_LIB_DEBUGINFO_PDB_RAW_GSI_H
+
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+namespace msf {
+class StreamReader;
+}
+
+namespace pdb {
+
+/// From https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.cpp
+static const unsigned IPHR_HASH = 4096;
+
+/// Header of the hash tables found in the globals and publics sections.
+/// Based on GSIHashHeader in
+/// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
+struct GSIHashHeader {
+  enum : unsigned {
+    HdrSignature = ~0U,
+    HdrVersion = 0xeffe0000 + 19990810,
+  };
+  support::ulittle32_t VerSignature;
+  support::ulittle32_t VerHdr;
+  support::ulittle32_t HrSize;
+  support::ulittle32_t NumBuckets;
+};
+
+Error readGSIHashBuckets(
+    msf::FixedStreamArray<support::ulittle32_t> &HashBuckets,
+    const GSIHashHeader *HashHdr, msf::StreamReader &Reader);
+Error readGSIHashHeader(const GSIHashHeader *&HashHdr,
+                        msf::StreamReader &Reader);
+Error readGSIHashRecords(msf::FixedStreamArray<PSHashRecord> &HashRecords,
+                         const GSIHashHeader *HashHdr,
+                         msf::StreamReader &Reader);
+}
+}
+
+#endif
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp
new file mode 100644
index 000000000000..31afc9200b1b
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/GlobalsStream.cpp
@@ -0,0 +1,42 @@
+//===- GlobalsStream.cpp - PDB Index of Symbols by Name ---- ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "GSI.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+
+GlobalsStream::GlobalsStream(std::unique_ptr<MappedBlockStream> Stream)
+    : Stream(std::move(Stream)) {}
+
+GlobalsStream::~GlobalsStream() = default;
+
+Error GlobalsStream::reload() {
+  StreamReader Reader(*Stream);
+
+  const GSIHashHeader *HashHdr;
+  if (auto EC = readGSIHashHeader(HashHdr, Reader))
+    return EC;
+
+  if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
+    return EC;
+
+  if (auto EC = readGSIHashBuckets(HashBuckets, HashHdr, Reader))
+    return EC;
+  NumBuckets = HashBuckets.size();
+
+  return Error::success();
+}
+
+Error GlobalsStream::commit() { return Error::success(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/Hash.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/Hash.cpp
index 23cb55786d78..b9f685ec69d4 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/Hash.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/Hash.cpp
@@ -11,6 +11,7 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/JamCRC.h"
 
 using namespace llvm;
 using namespace llvm::support;
@@ -73,59 +74,13 @@ uint32_t pdb::hashStringV2(StringRef Str) {
     Hash ^= (Hash >> 6);
   }
 
-  return Hash * 1664525L + 1013904223L;
+  return Hash * 1664525U + 1013904223U;
 }
 
-static const uint32_t V8HashTable[] = {
-    0x00000000, 0x77073096, 0xEE0E612C, 0x990951BA, 0x076DC419, 0x706AF48F,
-    0xE963A535, 0x9E6495A3, 0x0EDB8832, 0x79DCB8A4, 0xE0D5E91E, 0x97D2D988,
-    0x09B64C2B, 0x7EB17CBD, 0xE7B82D07, 0x90BF1D91, 0x1DB71064, 0x6AB020F2,
-    0xF3B97148, 0x84BE41DE, 0x1ADAD47D, 0x6DDDE4EB, 0xF4D4B551, 0x83D385C7,
-    0x136C9856, 0x646BA8C0, 0xFD62F97A, 0x8A65C9EC, 0x14015C4F, 0x63066CD9,
-    0xFA0F3D63, 0x8D080DF5, 0x3B6E20C8, 0x4C69105E, 0xD56041E4, 0xA2677172,
-    0x3C03E4D1, 0x4B04D447, 0xD20D85FD, 0xA50AB56B, 0x35B5A8FA, 0x42B2986C,
-    0xDBBBC9D6, 0xACBCF940, 0x32D86CE3, 0x45DF5C75, 0xDCD60DCF, 0xABD13D59,
-    0x26D930AC, 0x51DE003A, 0xC8D75180, 0xBFD06116, 0x21B4F4B5, 0x56B3C423,
-    0xCFBA9599, 0xB8BDA50F, 0x2802B89E, 0x5F058808, 0xC60CD9B2, 0xB10BE924,
-    0x2F6F7C87, 0x58684C11, 0xC1611DAB, 0xB6662D3D, 0x76DC4190, 0x01DB7106,
-    0x98D220BC, 0xEFD5102A, 0x71B18589, 0x06B6B51F, 0x9FBFE4A5, 0xE8B8D433,
-    0x7807C9A2, 0x0F00F934, 0x9609A88E, 0xE10E9818, 0x7F6A0DBB, 0x086D3D2D,
-    0x91646C97, 0xE6635C01, 0x6B6B51F4, 0x1C6C6162, 0x856530D8, 0xF262004E,
-    0x6C0695ED, 0x1B01A57B, 0x8208F4C1, 0xF50FC457, 0x65B0D9C6, 0x12B7E950,
-    0x8BBEB8EA, 0xFCB9887C, 0x62DD1DDF, 0x15DA2D49, 0x8CD37CF3, 0xFBD44C65,
-    0x4DB26158, 0x3AB551CE, 0xA3BC0074, 0xD4BB30E2, 0x4ADFA541, 0x3DD895D7,
-    0xA4D1C46D, 0xD3D6F4FB, 0x4369E96A, 0x346ED9FC, 0xAD678846, 0xDA60B8D0,
-    0x44042D73, 0x33031DE5, 0xAA0A4C5F, 0xDD0D7CC9, 0x5005713C, 0x270241AA,
-    0xBE0B1010, 0xC90C2086, 0x5768B525, 0x206F85B3, 0xB966D409, 0xCE61E49F,
-    0x5EDEF90E, 0x29D9C998, 0xB0D09822, 0xC7D7A8B4, 0x59B33D17, 0x2EB40D81,
-    0xB7BD5C3B, 0xC0BA6CAD, 0xEDB88320, 0x9ABFB3B6, 0x03B6E20C, 0x74B1D29A,
-    0xEAD54739, 0x9DD277AF, 0x04DB2615, 0x73DC1683, 0xE3630B12, 0x94643B84,
-    0x0D6D6A3E, 0x7A6A5AA8, 0xE40ECF0B, 0x9309FF9D, 0x0A00AE27, 0x7D079EB1,
-    0xF00F9344, 0x8708A3D2, 0x1E01F268, 0x6906C2FE, 0xF762575D, 0x806567CB,
-    0x196C3671, 0x6E6B06E7, 0xFED41B76, 0x89D32BE0, 0x10DA7A5A, 0x67DD4ACC,
-    0xF9B9DF6F, 0x8EBEEFF9, 0x17B7BE43, 0x60B08ED5, 0xD6D6A3E8, 0xA1D1937E,
-    0x38D8C2C4, 0x4FDFF252, 0xD1BB67F1, 0xA6BC5767, 0x3FB506DD, 0x48B2364B,
-    0xD80D2BDA, 0xAF0A1B4C, 0x36034AF6, 0x41047A60, 0xDF60EFC3, 0xA867DF55,
-    0x316E8EEF, 0x4669BE79, 0xCB61B38C, 0xBC66831A, 0x256FD2A0, 0x5268E236,
-    0xCC0C7795, 0xBB0B4703, 0x220216B9, 0x5505262F, 0xC5BA3BBE, 0xB2BD0B28,
-    0x2BB45A92, 0x5CB36A04, 0xC2D7FFA7, 0xB5D0CF31, 0x2CD99E8B, 0x5BDEAE1D,
-    0x9B64C2B0, 0xEC63F226, 0x756AA39C, 0x026D930A, 0x9C0906A9, 0xEB0E363F,
-    0x72076785, 0x05005713, 0x95BF4A82, 0xE2B87A14, 0x7BB12BAE, 0x0CB61B38,
-    0x92D28E9B, 0xE5D5BE0D, 0x7CDCEFB7, 0x0BDBDF21, 0x86D3D2D4, 0xF1D4E242,
-    0x68DDB3F8, 0x1FDA836E, 0x81BE16CD, 0xF6B9265B, 0x6FB077E1, 0x18B74777,
-    0x88085AE6, 0xFF0F6A70, 0x66063BCA, 0x11010B5C, 0x8F659EFF, 0xF862AE69,
-    0x616BFFD3, 0x166CCF45, 0xA00AE278, 0xD70DD2EE, 0x4E048354, 0x3903B3C2,
-    0xA7672661, 0xD06016F7, 0x4969474D, 0x3E6E77DB, 0xAED16A4A, 0xD9D65ADC,
-    0x40DF0B66, 0x37D83BF0, 0xA9BCAE53, 0xDEBB9EC5, 0x47B2CF7F, 0x30B5FFE9,
-    0xBDBDF21C, 0xCABAC28A, 0x53B39330, 0x24B4A3A6, 0xBAD03605, 0xCDD70693,
-    0x54DE5729, 0x23D967BF, 0xB3667A2E, 0xC4614AB8, 0x5D681B02, 0x2A6F2B94,
-    0xB40BBE37, 0xC30C8EA1, 0x5A05DF1B, 0x2D02EF8D,
-};
-
 // Corresponds to `SigForPbCb` in langapi/shared/crc32.h.
 uint32_t pdb::hashBufferV8(ArrayRef<uint8_t> Buf) {
-  uint32_t Hash = 0;
-  for (uint8_t Byte : Buf)
-    Hash = (Hash >> 8) ^ V8HashTable[(Hash & 0xff) ^ Byte];
-  return Hash;
+  JamCRC JC(/*Init=*/0U);
+  JC.update(makeArrayRef<char>(reinterpret_cast<const char *>(Buf.data()),
+                               Buf.size()));
+  return JC.getCRC();
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/IndexedStreamData.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/IndexedStreamData.cpp
deleted file mode 100644
index 9bd16ea76efc..000000000000
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/IndexedStreamData.cpp
+++ /dev/null
@@ -1,25 +0,0 @@
-//===- IndexedStreamData.cpp - Standard PDB Stream Data ---------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/IPDBFile.h"
-
-using namespace llvm;
-using namespace llvm::pdb;
-
-IndexedStreamData::IndexedStreamData(uint32_t StreamIdx, const IPDBFile &File)
-    : StreamIdx(StreamIdx), File(File) {}
-
-uint32_t IndexedStreamData::getLength() {
-  return File.getStreamByteSize(StreamIdx);
-}
-
-ArrayRef<support::ulittle32_t> IndexedStreamData::getStreamBlocks() {
-  return File.getStreamBlockList(StreamIdx);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStream.cpp
index c33a764587c7..f19535d11806 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStream.cpp
@@ -10,24 +10,25 @@
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
 InfoStream::InfoStream(std::unique_ptr<MappedBlockStream> Stream)
     : Stream(std::move(Stream)) {}
 
 Error InfoStream::reload() {
-  codeview::StreamReader Reader(*Stream);
+  StreamReader Reader(*Stream);
 
-  const HeaderInfo *H;
+  const InfoStreamHeader *H;
   if (auto EC = Reader.readObject(H))
     return joinErrors(
         std::move(EC),
@@ -74,17 +75,3 @@ uint32_t InfoStream::getSignature() const { return Signature; }
 uint32_t InfoStream::getAge() const { return Age; }
 
 PDB_UniqueId InfoStream::getGuid() const { return Guid; }
-
-Error InfoStream::commit() {
-  StreamWriter Writer(*Stream);
-
-  HeaderInfo H;
-  H.Age = Age;
-  H.Signature = Signature;
-  H.Version = Version;
-  H.Guid = Guid;
-  if (auto EC = Writer.writeObject(H))
-    return EC;
-
-  return NamedStreams.commit(Writer);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp
index 7be9cc32db96..73fbf853b4f7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/InfoStreamBuilder.cpp
@@ -9,16 +9,20 @@
 
 #include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h"
 
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
-InfoStreamBuilder::InfoStreamBuilder() {}
+InfoStreamBuilder::InfoStreamBuilder(msf::MSFBuilder &Msf)
+    : Msf(Msf), Ver(PdbRaw_ImplVer::PdbImplVC70), Sig(-1), Age(0) {}
 
 void InfoStreamBuilder::setVersion(PdbRaw_ImplVer V) { Ver = V; }
 
@@ -33,35 +37,29 @@ NameMapBuilder &InfoStreamBuilder::getNamedStreamsBuilder() {
 }
 
 uint32_t InfoStreamBuilder::calculateSerializedLength() const {
-  return sizeof(InfoStream::HeaderInfo) +
-         NamedStreams.calculateSerializedLength();
+  return sizeof(InfoStreamHeader) + NamedStreams.calculateSerializedLength();
 }
 
-Expected<std::unique_ptr<InfoStream>> InfoStreamBuilder::build(PDBFile &File) {
-  if (!Ver.hasValue())
-    return make_error<RawError>(raw_error_code::unspecified,
-                                "Missing PDB Stream Version");
-  if (!Sig.hasValue())
-    return make_error<RawError>(raw_error_code::unspecified,
-                                "Missing PDB Stream Signature");
-  if (!Age.hasValue())
-    return make_error<RawError>(raw_error_code::unspecified,
-                                "Missing PDB Stream Age");
-  if (!Guid.hasValue())
-    return make_error<RawError>(raw_error_code::unspecified,
-                                "Missing PDB Stream Guid");
+Error InfoStreamBuilder::finalizeMsfLayout() {
+  uint32_t Length = calculateSerializedLength();
+  if (auto EC = Msf.setStreamSize(StreamPDB, Length))
+    return EC;
+  return Error::success();
+}
+
+Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
+                                const msf::WritableStream &Buffer) const {
+  auto InfoS =
+      WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB);
+  StreamWriter Writer(*InfoS);
+
+  InfoStreamHeader H;
+  H.Age = Age;
+  H.Signature = Sig;
+  H.Version = Ver;
+  H.Guid = Guid;
+  if (auto EC = Writer.writeObject(H))
+    return EC;
 
-  auto InfoS = MappedBlockStream::createIndexedStream(StreamPDB, File);
-  if (!InfoS)
-    return InfoS.takeError();
-  auto Info = llvm::make_unique<InfoStream>(std::move(*InfoS));
-  Info->Version = *Ver;
-  Info->Signature = *Sig;
-  Info->Age = *Age;
-  Info->Guid = *Guid;
-  auto NS = NamedStreams.build();
-  if (!NS)
-    return NS.takeError();
-  Info->NamedStreams = **NS;
-  return std::move(Info);
+  return NamedStreams.commit(Writer);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp
deleted file mode 100644
index 92b2048c3c22..000000000000
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/MappedBlockStream.cpp
+++ /dev/null
@@ -1,310 +0,0 @@
-//===- MappedBlockStream.cpp - Reads stream data from a PDBFile -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
-#include "llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/IPDBStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawError.h"
-
-using namespace llvm;
-using namespace llvm::pdb;
-
-namespace {
-// This exists so that we can use make_unique while still keeping the
-// constructor of MappedBlockStream private, forcing users to go through
-// the `create` interface.
-class MappedBlockStreamImpl : public MappedBlockStream {
-public:
-  MappedBlockStreamImpl(std::unique_ptr<IPDBStreamData> Data,
-                        const IPDBFile &File)
-      : MappedBlockStream(std::move(Data), File) {}
-};
-}
-
-typedef std::pair<uint32_t, uint32_t> Interval;
-static Interval intersect(const Interval &I1, const Interval &I2) {
-  return std::make_pair(std::max(I1.first, I2.first),
-                        std::min(I1.second, I2.second));
-}
-
-MappedBlockStream::MappedBlockStream(std::unique_ptr<IPDBStreamData> Data,
-                                     const IPDBFile &Pdb)
-    : Pdb(Pdb), Data(std::move(Data)) {}
-
-Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
-                                   ArrayRef<uint8_t> &Buffer) const {
-  // Make sure we aren't trying to read beyond the end of the stream.
-  if (Size > Data->getLength())
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-  if (Offset > Data->getLength() - Size)
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-
-  if (tryReadContiguously(Offset, Size, Buffer))
-    return Error::success();
-
-  auto CacheIter = CacheMap.find(Offset);
-  if (CacheIter != CacheMap.end()) {
-    // Try to find an alloc that was large enough for this request.
-    for (auto &Entry : CacheIter->second) {
-      if (Entry.size() >= Size) {
-        Buffer = Entry.slice(0, Size);
-        return Error::success();
-      }
-    }
-  }
-
-  // We couldn't find a buffer that started at the correct offset (the most
-  // common scenario).  Try to see if there is a buffer that starts at some
-  // other offset but overlaps the desired range.
-  for (auto &CacheItem : CacheMap) {
-    Interval RequestExtent = std::make_pair(Offset, Offset + Size);
-
-    // We already checked this one on the fast path above.
-    if (CacheItem.first == Offset)
-      continue;
-    // If the initial extent of the cached item is beyond the ending extent
-    // of the request, there is no overlap.
-    if (CacheItem.first >= Offset + Size)
-      continue;
-
-    // We really only have to check the last item in the list, since we append
-    // in order of increasing length.
-    if (CacheItem.second.empty())
-      continue;
-
-    auto CachedAlloc = CacheItem.second.back();
-    // If the initial extent of the request is beyond the ending extent of
-    // the cached item, there is no overlap.
-    Interval CachedExtent =
-        std::make_pair(CacheItem.first, CacheItem.first + CachedAlloc.size());
-    if (RequestExtent.first >= CachedExtent.first + CachedExtent.second)
-      continue;
-
-    Interval Intersection = intersect(CachedExtent, RequestExtent);
-    // Only use this if the entire request extent is contained in the cached
-    // extent.
-    if (Intersection != RequestExtent)
-      continue;
-
-    uint32_t CacheRangeOffset =
-        AbsoluteDifference(CachedExtent.first, Intersection.first);
-    Buffer = CachedAlloc.slice(CacheRangeOffset, Size);
-    return Error::success();
-  }
-
-  // Otherwise allocate a large enough buffer in the pool, memcpy the data
-  // into it, and return an ArrayRef to that.  Do not touch existing pool
-  // allocations, as existing clients may be holding a pointer which must
-  // not be invalidated.
-  uint8_t *WriteBuffer = static_cast<uint8_t *>(Pool.Allocate(Size, 8));
-  if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size)))
-    return EC;
-
-  if (CacheIter != CacheMap.end()) {
-    CacheIter->second.emplace_back(WriteBuffer, Size);
-  } else {
-    std::vector<CacheEntry> List;
-    List.emplace_back(WriteBuffer, Size);
-    CacheMap.insert(std::make_pair(Offset, List));
-  }
-  Buffer = ArrayRef<uint8_t>(WriteBuffer, Size);
-  return Error::success();
-}
-
-Error MappedBlockStream::readLongestContiguousChunk(
-    uint32_t Offset, ArrayRef<uint8_t> &Buffer) const {
-  // Make sure we aren't trying to read beyond the end of the stream.
-  if (Offset >= Data->getLength())
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-  uint32_t First = Offset / Pdb.getBlockSize();
-  uint32_t Last = First;
-
-  auto BlockList = Data->getStreamBlocks();
-  while (Last < Pdb.getBlockCount() - 1) {
-    if (BlockList[Last] != BlockList[Last + 1] - 1)
-      break;
-    ++Last;
-  }
-
-  uint32_t OffsetInFirstBlock = Offset % Pdb.getBlockSize();
-  uint32_t BytesFromFirstBlock = Pdb.getBlockSize() - OffsetInFirstBlock;
-  uint32_t BlockSpan = Last - First + 1;
-  uint32_t ByteSpan =
-      BytesFromFirstBlock + (BlockSpan - 1) * Pdb.getBlockSize();
-  auto Result = Pdb.getBlockData(BlockList[First], Pdb.getBlockSize());
-  if (!Result)
-    return Result.takeError();
-  Buffer = Result->drop_front(OffsetInFirstBlock);
-  Buffer = ArrayRef<uint8_t>(Buffer.data(), ByteSpan);
-  return Error::success();
-}
-
-uint32_t MappedBlockStream::getLength() const { return Data->getLength(); }
-
-Error MappedBlockStream::commit() const { return Error::success(); }
-
-bool MappedBlockStream::tryReadContiguously(uint32_t Offset, uint32_t Size,
-                                            ArrayRef<uint8_t> &Buffer) const {
-  // Attempt to fulfill the request with a reference directly into the stream.
-  // This can work even if the request crosses a block boundary, provided that
-  // all subsequent blocks are contiguous.  For example, a 10k read with a 4k
-  // block size can be filled with a reference if, from the starting offset,
-  // 3 blocks in a row are contiguous.
-  uint32_t BlockNum = Offset / Pdb.getBlockSize();
-  uint32_t OffsetInBlock = Offset % Pdb.getBlockSize();
-  uint32_t BytesFromFirstBlock =
-      std::min(Size, Pdb.getBlockSize() - OffsetInBlock);
-  uint32_t NumAdditionalBlocks =
-      llvm::alignTo(Size - BytesFromFirstBlock, Pdb.getBlockSize()) /
-      Pdb.getBlockSize();
-
-  auto BlockList = Data->getStreamBlocks();
-  uint32_t RequiredContiguousBlocks = NumAdditionalBlocks + 1;
-  uint32_t E = BlockList[BlockNum];
-  for (uint32_t I = 0; I < RequiredContiguousBlocks; ++I, ++E) {
-    if (BlockList[I + BlockNum] != E)
-      return false;
-  }
-
-  uint32_t FirstBlockAddr = BlockList[BlockNum];
-  auto Result = Pdb.getBlockData(FirstBlockAddr, Pdb.getBlockSize());
-  if (!Result) {
-    consumeError(Result.takeError());
-    return false;
-  }
-  auto Data = Result->drop_front(OffsetInBlock);
-  Buffer = ArrayRef<uint8_t>(Data.data(), Size);
-  return true;
-}
-
-Error MappedBlockStream::readBytes(uint32_t Offset,
-                                   MutableArrayRef<uint8_t> Buffer) const {
-  uint32_t BlockNum = Offset / Pdb.getBlockSize();
-  uint32_t OffsetInBlock = Offset % Pdb.getBlockSize();
-
-  // Make sure we aren't trying to read beyond the end of the stream.
-  if (Buffer.size() > Data->getLength())
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-  if (Offset > Data->getLength() - Buffer.size())
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-
-  uint32_t BytesLeft = Buffer.size();
-  uint32_t BytesWritten = 0;
-  uint8_t *WriteBuffer = Buffer.data();
-  auto BlockList = Data->getStreamBlocks();
-  while (BytesLeft > 0) {
-    uint32_t StreamBlockAddr = BlockList[BlockNum];
-
-    auto Result = Pdb.getBlockData(StreamBlockAddr, Pdb.getBlockSize());
-    if (!Result)
-      return Result.takeError();
-
-    auto Data = *Result;
-    const uint8_t *ChunkStart = Data.data() + OffsetInBlock;
-    uint32_t BytesInChunk =
-        std::min(BytesLeft, Pdb.getBlockSize() - OffsetInBlock);
-    ::memcpy(WriteBuffer + BytesWritten, ChunkStart, BytesInChunk);
-
-    BytesWritten += BytesInChunk;
-    BytesLeft -= BytesInChunk;
-    ++BlockNum;
-    OffsetInBlock = 0;
-  }
-
-  return Error::success();
-}
-
-Error MappedBlockStream::writeBytes(uint32_t Offset,
-                                    ArrayRef<uint8_t> Buffer) const {
-  // Make sure we aren't trying to write beyond the end of the stream.
-  if (Buffer.size() > Data->getLength())
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-
-  if (Offset > Data->getLength() - Buffer.size())
-    return make_error<RawError>(raw_error_code::insufficient_buffer);
-
-  uint32_t BlockNum = Offset / Pdb.getBlockSize();
-  uint32_t OffsetInBlock = Offset % Pdb.getBlockSize();
-
-  uint32_t BytesLeft = Buffer.size();
-  auto BlockList = Data->getStreamBlocks();
-  uint32_t BytesWritten = 0;
-  while (BytesLeft > 0) {
-    uint32_t StreamBlockAddr = BlockList[BlockNum];
-    uint32_t BytesToWriteInChunk =
-        std::min(BytesLeft, Pdb.getBlockSize() - OffsetInBlock);
-
-    const uint8_t *Chunk = Buffer.data() + BytesWritten;
-    ArrayRef<uint8_t> ChunkData(Chunk, BytesToWriteInChunk);
-    if (auto EC = Pdb.setBlockData(StreamBlockAddr, OffsetInBlock, ChunkData))
-      return EC;
-
-    BytesLeft -= BytesToWriteInChunk;
-    BytesWritten += BytesToWriteInChunk;
-    ++BlockNum;
-    OffsetInBlock = 0;
-  }
-
-  // If this write overlapped a read which previously came from the pool,
-  // someone may still be holding a pointer to that alloc which is now invalid.
-  // Compute the overlapping range and update the cache entry, so any
-  // outstanding buffers are automatically updated.
-  for (const auto &MapEntry : CacheMap) {
-    // If the end of the written extent precedes the beginning of the cached
-    // extent, ignore this map entry.
-    if (Offset + BytesWritten < MapEntry.first)
-      continue;
-    for (const auto &Alloc : MapEntry.second) {
-      // If the end of the cached extent precedes the beginning of the written
-      // extent, ignore this alloc.
-      if (MapEntry.first + Alloc.size() < Offset)
-        continue;
-
-      // If we get here, they are guaranteed to overlap.
-      Interval WriteInterval = std::make_pair(Offset, Offset + BytesWritten);
-      Interval CachedInterval =
-          std::make_pair(MapEntry.first, MapEntry.first + Alloc.size());
-      // If they overlap, we need to write the new data into the overlapping
-      // range.
-      auto Intersection = intersect(WriteInterval, CachedInterval);
-      assert(Intersection.first <= Intersection.second);
-
-      uint32_t Length = Intersection.second - Intersection.first;
-      uint32_t SrcOffset =
-          AbsoluteDifference(WriteInterval.first, Intersection.first);
-      uint32_t DestOffset =
-          AbsoluteDifference(CachedInterval.first, Intersection.first);
-      ::memcpy(Alloc.data() + DestOffset, Buffer.data() + SrcOffset, Length);
-    }
-  }
-
-  return Error::success();
-}
-
-uint32_t MappedBlockStream::getNumBytesCopied() const {
-  return static_cast<uint32_t>(Pool.getBytesAllocated());
-}
-
-Expected<std::unique_ptr<MappedBlockStream>>
-MappedBlockStream::createIndexedStream(uint32_t StreamIdx,
-                                       const IPDBFile &File) {
-  if (StreamIdx >= File.getNumStreams())
-    return make_error<RawError>(raw_error_code::no_stream);
-
-  auto Data = llvm::make_unique<IndexedStreamData>(StreamIdx, File);
-  return llvm::make_unique<MappedBlockStreamImpl>(std::move(Data), File);
-}
-
-Expected<std::unique_ptr<MappedBlockStream>>
-MappedBlockStream::createDirectoryStream(const PDBFile &File) {
-  auto Data = llvm::make_unique<DirectoryStreamData>(File);
-  return llvm::make_unique<MappedBlockStreamImpl>(std::move(Data), File);
-}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/ModInfo.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/ModInfo.cpp
index bae135f77bc0..b34d7700d036 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/ModInfo.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/ModInfo.cpp
@@ -7,76 +7,27 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
-
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MathExtras.h"
+#include <cstdint>
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 using namespace llvm::support;
 
-namespace {
-
-struct SCBytes {
-  ulittle16_t Section;
-  char Padding1[2];
-  little32_t Offset;
-  little32_t Size;
-  ulittle32_t Characteristics;
-  ulittle16_t ModuleIndex;
-  char Padding2[2];
-  ulittle32_t DataCrc;
-  ulittle32_t RelocCrc;
-};
-
-// struct Flags {
-//  uint16_t fWritten : 1;   // True if ModInfo is dirty
-//  uint16_t fECEnabled : 1; // Is EC symbolic info present?  (What is EC?)
-//  uint16_t unused : 6;     // Reserved
-//  uint16_t iTSM : 8;       // Type Server Index for this module
-//};
-const uint16_t HasECFlagMask = 0x2;
-
-const uint16_t TypeServerIndexMask = 0xFF00;
-const uint16_t TypeServerIndexShift = 8;
-}
+ModInfo::ModInfo() = default;
+
+ModInfo::ModInfo(const ModInfo &Info) = default;
 
-struct ModInfo::FileLayout {
-  ulittle32_t Mod;           // Currently opened module.  This field is a
-                             // pointer in the reference implementation, but
-                             // that won't work on 64-bit systems, and anyway
-                             // it doesn't make sense to read a pointer from a
-                             // file.  For now it is unused, so just ignore it.
-  SCBytes SC;                // First section contribution of this module.
-  ulittle16_t Flags;         // See Flags definition.
-  ulittle16_t ModDiStream;   // Stream Number of module debug info
-  ulittle32_t SymBytes;      // Size of local symbol debug info in above stream
-  ulittle32_t LineBytes;     // Size of line number debug info in above stream
-  ulittle32_t C13Bytes;      // Size of C13 line number info in above stream
-  ulittle16_t NumFiles;      // Number of files contributing to this module
-  char Padding1[2];          // Padding so the next field is 4-byte aligned.
-  ulittle32_t FileNameOffs;  // array of [0..NumFiles) DBI name buffer offsets.
-                             // This field is a pointer in the reference
-                             // implementation, but as with `Mod`, we ignore it
-                             // for now since it is unused.
-  ulittle32_t SrcFileNameNI; // Name Index for src file name
-  ulittle32_t PdbFilePathNI; // Name Index for path to compiler PDB
-                             // Null terminated Module name
-                             // Null terminated Obj File Name
-};
-
-ModInfo::ModInfo() : Layout(nullptr) {}
-
-ModInfo::ModInfo(const ModInfo &Info)
-    : ModuleName(Info.ModuleName), ObjFileName(Info.ObjFileName),
-      Layout(Info.Layout) {}
-
-ModInfo::~ModInfo() {}
-
-Error ModInfo::initialize(codeview::StreamRef Stream, ModInfo &Info) {
-  codeview::StreamReader Reader(Stream);
+ModInfo::~ModInfo() = default;
+
+Error ModInfo::initialize(ReadableStreamRef Stream, ModInfo &Info) {
+  StreamReader Reader(Stream);
   if (auto EC = Reader.readObject(Info.Layout))
     return EC;
 
@@ -88,10 +39,13 @@ Error ModInfo::initialize(codeview::StreamRef Stream, ModInfo &Info) {
   return Error::success();
 }
 
-bool ModInfo::hasECInfo() const { return (Layout->Flags & HasECFlagMask) != 0; }
+bool ModInfo::hasECInfo() const {
+  return (Layout->Flags & ModInfoFlags::HasECFlagMask) != 0;
+}
 
 uint16_t ModInfo::getTypeServerIndex() const {
-  return (Layout->Flags & TypeServerIndexMask) >> TypeServerIndexShift;
+  return (Layout->Flags & ModInfoFlags::TypeServerIndexMask) >>
+         ModInfoFlags::TypeServerIndexShift;
 }
 
 uint16_t ModInfo::getModuleStreamIndex() const { return Layout->ModDiStream; }
@@ -121,7 +75,7 @@ StringRef ModInfo::getObjFileName() const { return ObjFileName; }
 uint32_t ModInfo::getRecordLength() const {
   uint32_t M = ModuleName.str().size() + 1;
   uint32_t O = ObjFileName.str().size() + 1;
-  uint32_t Size = sizeof(FileLayout) + M + O;
-  Size = llvm::alignTo(Size, 4);
+  uint32_t Size = sizeof(ModuleInfoHeader) + M + O;
+  Size = alignTo(Size, 4);
   return Size;
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/ModStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/ModStream.cpp
index 3415fcd47790..0ffc5b7d44aa 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/ModStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/ModStream.cpp
@@ -7,39 +7,43 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/ModStream.h"
-
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamRef.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
+#include "llvm/DebugInfo/PDB/Raw/ModStream.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
 ModStream::ModStream(const ModInfo &Module,
                      std::unique_ptr<MappedBlockStream> Stream)
     : Mod(Module), Stream(std::move(Stream)) {}
 
-ModStream::~ModStream() {}
+ModStream::~ModStream() = default;
 
 Error ModStream::reload() {
-  codeview::StreamReader Reader(*Stream);
+  StreamReader Reader(*Stream);
 
   uint32_t SymbolSize = Mod.getSymbolDebugInfoByteSize();
   uint32_t C11Size = Mod.getLineInfoByteSize();
   uint32_t C13Size = Mod.getC13LineInfoByteSize();
 
   if (C11Size > 0 && C13Size > 0)
-    return llvm::make_error<RawError>(raw_error_code::corrupt_file,
-                                      "Module has both C11 and C13 line info");
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Module has both C11 and C13 line info");
 
-  codeview::StreamRef S;
+  ReadableStreamRef S;
 
-  uint32_t SymbolSubstreamSig = 0;
-  if (auto EC = Reader.readInteger(SymbolSubstreamSig))
+  if (auto EC = Reader.readInteger(Signature))
     return EC;
   if (auto EC = Reader.readArray(SymbolsSubstream, SymbolSize - 4))
     return EC;
@@ -49,7 +53,7 @@ Error ModStream::reload() {
   if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size))
     return EC;
 
-  codeview::StreamReader LineReader(C13LinesSubstream);
+  StreamReader LineReader(C13LinesSubstream);
   if (auto EC = LineReader.readArray(LineInfo, LineReader.bytesRemaining()))
     return EC;
 
@@ -59,8 +63,8 @@ Error ModStream::reload() {
   if (auto EC = Reader.readStreamRef(GlobalRefsSubstream, GlobalRefsSize))
     return EC;
   if (Reader.bytesRemaining() > 0)
-    return llvm::make_error<RawError>(raw_error_code::corrupt_file,
-                                      "Unexpected bytes in module stream.");
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "Unexpected bytes in module stream.");
 
   return Error::success();
 }
@@ -69,14 +73,13 @@ iterator_range<codeview::CVSymbolArray::Iterator>
 ModStream::symbols(bool *HadError) const {
   // It's OK if the stream is empty.
   if (SymbolsSubstream.getUnderlyingStream().getLength() == 0)
-    return llvm::make_range(SymbolsSubstream.end(), SymbolsSubstream.end());
-  return llvm::make_range(SymbolsSubstream.begin(HadError),
-                          SymbolsSubstream.end());
+    return make_range(SymbolsSubstream.end(), SymbolsSubstream.end());
+  return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end());
 }
 
 iterator_range<codeview::ModuleSubstreamArray::Iterator>
 ModStream::lines(bool *HadError) const {
-  return llvm::make_range(LineInfo.begin(HadError), LineInfo.end());
+  return make_range(LineInfo.begin(HadError), LineInfo.end());
 }
 
 Error ModStream::commit() { return Error::success(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
index ae4ebf27721e..84cccb354bd8 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/NameHashTable.cpp
@@ -10,18 +10,19 @@
 #include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/Raw/Hash.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::support;
 using namespace llvm::pdb;
 
 NameHashTable::NameHashTable() : Signature(0), HashVersion(0), NameCount(0) {}
 
-Error NameHashTable::load(codeview::StreamReader &Stream) {
+Error NameHashTable::load(StreamReader &Stream) {
   struct Header {
     support::ulittle32_t Signature;
     support::ulittle32_t HashVersion;
@@ -72,7 +73,7 @@ StringRef NameHashTable::getStringForID(uint32_t ID) const {
   // the starting offset of the string we're looking for.  So just seek into
   // the desired offset and a read a null terminated stream from that offset.
   StringRef Result;
-  codeview::StreamReader NameReader(NamesBuffer);
+  StreamReader NameReader(NamesBuffer);
   NameReader.setOffset(ID);
   if (auto EC = NameReader.readZeroString(Result))
     consumeError(std::move(EC));
@@ -98,7 +99,6 @@ uint32_t NameHashTable::getIDForString(StringRef Str) const {
   return IDs[0];
 }
 
-codeview::FixedStreamArray<support::ulittle32_t>
-NameHashTable::name_ids() const {
+FixedStreamArray<support::ulittle32_t> NameHashTable::name_ids() const {
   return IDs;
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp
index b8a4eb79a48c..0f55f58da381 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMap.cpp
@@ -7,20 +7,24 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/NameMap.h"
 #include "llvm/ADT/SparseBitVector.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/PDB/Raw/NameMap.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
 
 using namespace llvm;
-using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
-NameMap::NameMap() {}
-
-Error NameMap::load(codeview::StreamReader &Stream) {
+NameMap::NameMap() = default;
 
+Error NameMap::load(StreamReader &Stream) {
   // This is some sort of weird string-set/hash table encoded in the stream.
   // It starts with the number of bytes in the table.
   uint32_t NumberOfBytes;
@@ -145,63 +149,9 @@ Error NameMap::load(codeview::StreamReader &Stream) {
   return Error::success();
 }
 
-Error NameMap::commit(codeview::StreamWriter &Writer) {
-  // The first field is the number of bytes of string data.  So add
-  // up the length of all strings plus a null terminator for each
-  // one.
-  uint32_t NumBytes = 0;
-  for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) {
-    NumBytes += B->getKeyLength() + 1;
-  }
-
-  if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data
-    return EC;
-  // Now all of the string data itself.
-  for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) {
-    if (auto EC = Writer.writeZeroString(B->getKey()))
-      return EC;
-  }
-
-  if (auto EC = Writer.writeInteger(Mapping.size())) // Hash Size
-    return EC;
-
-  if (auto EC = Writer.writeInteger(Mapping.size())) // Max Number of Strings
-    return EC;
-
-  if (auto EC = Writer.writeInteger(Mapping.size())) // Num Present Words
-    return EC;
-
-  // For each entry in the mapping, write a bit mask which represents a bucket
-  // to store it in.  We don't use this, so the value we write isn't important
-  // to us, it just has to be there.
-  for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) {
-    if (auto EC = Writer.writeInteger(1U))
-      return EC;
-  }
-
-  if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words
-    return EC;
-
-  // Mappings of each word.
-  uint32_t OffsetSoFar = 0;
-  for (auto B = Mapping.begin(), E = Mapping.end(); B != E; ++B) {
-    // This is a list of key value pairs where the key is the offset into the
-    // strings buffer, and the value is a stream number.  Write each pair.
-    if (auto EC = Writer.writeInteger(OffsetSoFar))
-      return EC;
-
-    if (auto EC = Writer.writeInteger(B->second))
-      return EC;
-
-    OffsetSoFar += B->getKeyLength() + 1;
-  }
-
-  return Error::success();
-}
-
 iterator_range<StringMapConstIterator<uint32_t>> NameMap::entries() const {
-  return llvm::make_range<StringMapConstIterator<uint32_t>>(Mapping.begin(),
-                                                            Mapping.end());
+  return make_range<StringMapConstIterator<uint32_t>>(Mapping.begin(),
+                                                      Mapping.end());
 }
 
 bool NameMap::tryGetValue(StringRef Name, uint32_t &Value) const {
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
index 41c6c2cd810a..f570d5931b0f 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/NameMapBuilder.cpp
@@ -7,15 +7,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/NameMapBuilder.h"
-
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
 #include "llvm/DebugInfo/PDB/Raw/NameMap.h"
+#include "llvm/DebugInfo/PDB/Raw/NameMapBuilder.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-NameMapBuilder::NameMapBuilder() {}
+NameMapBuilder::NameMapBuilder() = default;
 
 void NameMapBuilder::addMapping(StringRef Name, uint32_t Mapping) {
   StringDataBytes += Name.size() + 1;
@@ -48,3 +52,57 @@ uint32_t NameMapBuilder::calculateSerializedLength() const {
 
   return TotalLength;
 }
+
+Error NameMapBuilder::commit(msf::StreamWriter &Writer) const {
+  // The first field is the number of bytes of string data.  So add
+  // up the length of all strings plus a null terminator for each
+  // one.
+  uint32_t NumBytes = 0;
+  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
+    NumBytes += B->getKeyLength() + 1;
+  }
+
+  if (auto EC = Writer.writeInteger(NumBytes)) // Number of bytes of string data
+    return EC;
+  // Now all of the string data itself.
+  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
+    if (auto EC = Writer.writeZeroString(B->getKey()))
+      return EC;
+  }
+
+  if (auto EC = Writer.writeInteger(Map.size())) // Hash Size
+    return EC;
+
+  if (auto EC = Writer.writeInteger(Map.size())) // Max Number of Strings
+    return EC;
+
+  if (auto EC = Writer.writeInteger(Map.size())) // Num Present Words
+    return EC;
+
+  // For each entry in the mapping, write a bit mask which represents a bucket
+  // to store it in.  We don't use this, so the value we write isn't important
+  // to us, it just has to be there.
+  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
+    if (auto EC = Writer.writeInteger(1U))
+      return EC;
+  }
+
+  if (auto EC = Writer.writeInteger(0U)) // Num Deleted Words
+    return EC;
+
+  // Mappings of each word.
+  uint32_t OffsetSoFar = 0;
+  for (auto B = Map.begin(), E = Map.end(); B != E; ++B) {
+    // This is a list of key value pairs where the key is the offset into the
+    // strings buffer, and the value is a stream number.  Write each pair.
+    if (auto EC = Writer.writeInteger(OffsetSoFar))
+      return EC;
+
+    if (auto EC = Writer.writeInteger(B->second))
+      return EC;
+
+    OffsetSoFar += B->getKeyLength() + 1;
+  }
+
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
index 95016753dc14..53491518b8c7 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFile.cpp
@@ -7,68 +7,84 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/DebugInfo/CodeView/StreamArray.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
-#include "llvm/DebugInfo/PDB/Raw/DirectoryStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
+#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/NameHashTable.h"
+#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
 #include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/FileOutputBuffer.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
 namespace {
 typedef FixedStreamArray<support::ulittle32_t> ulittle_array;
-}
+} // end anonymous namespace
 
-PDBFile::PDBFile(std::unique_ptr<StreamInterface> PdbFileBuffer)
-    : Buffer(std::move(PdbFileBuffer)), SB(nullptr) {}
+PDBFile::PDBFile(std::unique_ptr<ReadableStream> PdbFileBuffer,
+                 BumpPtrAllocator &Allocator)
+    : Allocator(Allocator), Buffer(std::move(PdbFileBuffer)) {}
 
-PDBFile::~PDBFile() {}
+PDBFile::~PDBFile() = default;
 
-uint32_t PDBFile::getBlockSize() const { return SB->BlockSize; }
+uint32_t PDBFile::getBlockSize() const { return ContainerLayout.SB->BlockSize; }
 
-uint32_t PDBFile::getFreeBlockMapBlock() const { return SB->FreeBlockMapBlock; }
+uint32_t PDBFile::getFreeBlockMapBlock() const {
+  return ContainerLayout.SB->FreeBlockMapBlock;
+}
 
-uint32_t PDBFile::getBlockCount() const { return SB->NumBlocks; }
+uint32_t PDBFile::getBlockCount() const {
+  return ContainerLayout.SB->NumBlocks;
+}
 
-uint32_t PDBFile::getNumDirectoryBytes() const { return SB->NumDirectoryBytes; }
+uint32_t PDBFile::getNumDirectoryBytes() const {
+  return ContainerLayout.SB->NumDirectoryBytes;
+}
 
-uint32_t PDBFile::getBlockMapIndex() const { return SB->BlockMapAddr; }
+uint32_t PDBFile::getBlockMapIndex() const {
+  return ContainerLayout.SB->BlockMapAddr;
+}
 
-uint32_t PDBFile::getUnknown1() const { return SB->Unknown1; }
+uint32_t PDBFile::getUnknown1() const { return ContainerLayout.SB->Unknown1; }
 
 uint32_t PDBFile::getNumDirectoryBlocks() const {
-  return msf::bytesToBlocks(SB->NumDirectoryBytes, SB->BlockSize);
+  return msf::bytesToBlocks(ContainerLayout.SB->NumDirectoryBytes,
+                            ContainerLayout.SB->BlockSize);
 }
 
 uint64_t PDBFile::getBlockMapOffset() const {
-  return (uint64_t)SB->BlockMapAddr * SB->BlockSize;
+  return (uint64_t)ContainerLayout.SB->BlockMapAddr *
+         ContainerLayout.SB->BlockSize;
 }
 
-uint32_t PDBFile::getNumStreams() const { return StreamSizes.size(); }
+uint32_t PDBFile::getNumStreams() const {
+  return ContainerLayout.StreamSizes.size();
+}
 
 uint32_t PDBFile::getStreamByteSize(uint32_t StreamIndex) const {
-  return StreamSizes[StreamIndex];
+  return ContainerLayout.StreamSizes[StreamIndex];
 }
 
 ArrayRef<support::ulittle32_t>
 PDBFile::getStreamBlockList(uint32_t StreamIndex) const {
-  return StreamMap[StreamIndex];
+  return ContainerLayout.StreamMap[StreamIndex];
 }
 
 uint32_t PDBFile::getFileSize() const { return Buffer->getLength(); }
@@ -85,41 +101,72 @@ Expected<ArrayRef<uint8_t>> PDBFile::getBlockData(uint32_t BlockIndex,
 
 Error PDBFile::setBlockData(uint32_t BlockIndex, uint32_t Offset,
                             ArrayRef<uint8_t> Data) const {
-  if (Offset >= getBlockSize())
-    return make_error<RawError>(
-        raw_error_code::invalid_block_address,
-        "setBlockData attempted to write out of block bounds.");
-  if (Data.size() > getBlockSize() - Offset)
-    return make_error<RawError>(
-        raw_error_code::invalid_block_address,
-        "setBlockData attempted to write out of block bounds.");
-
-  uint64_t StreamBlockOffset = msf::blockToOffset(BlockIndex, getBlockSize());
-  StreamBlockOffset += Offset;
-  return Buffer->writeBytes(StreamBlockOffset, Data);
+  return make_error<RawError>(raw_error_code::not_writable,
+                              "PDBFile is immutable");
 }
 
 Error PDBFile::parseFileHeaders() {
   StreamReader Reader(*Buffer);
 
+  // Initialize SB.
+  const msf::SuperBlock *SB = nullptr;
   if (auto EC = Reader.readObject(SB)) {
     consumeError(std::move(EC));
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Does not contain superblock");
   }
 
-  if (auto EC = setSuperBlock(SB))
+  if (auto EC = msf::validateSuperBlock(*SB))
+    return EC;
+
+  if (Buffer->getLength() % SB->BlockSize != 0)
+    return make_error<RawError>(raw_error_code::corrupt_file,
+                                "File size is not a multiple of block size");
+  ContainerLayout.SB = SB;
+
+  // Initialize Free Page Map.
+  ContainerLayout.FreePageMap.resize(SB->NumBlocks);
+  // The Fpm exists either at block 1 or block 2 of the MSF.  However, this
+  // allows for a maximum of getBlockSize() * 8 blocks bits in the Fpm, and
+  // thusly an equal number of total blocks in the file.  For a block size
+  // of 4KiB (very common), this would yield 32KiB total blocks in file, for a
+  // maximum file size of 32KiB * 4KiB = 128MiB.  Obviously this won't do, so
+  // the Fpm is split across the file at `getBlockSize()` intervals.  As a
+  // result, every block whose index is of the form |{1,2} + getBlockSize() * k|
+  // for any non-negative integer k is an Fpm block.  In theory, we only really
+  // need to reserve blocks of the form |{1,2} + getBlockSize() * 8 * k|, but
+  // current versions of the MSF format already expect the Fpm to be arranged
+  // at getBlockSize() intervals, so we have to be compatible.
+  // See the function fpmPn() for more information:
+  // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
+  auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer);
+  StreamReader FpmReader(*FpmStream);
+  ArrayRef<uint8_t> FpmBytes;
+  if (auto EC = FpmReader.readBytes(FpmBytes,
+                                    msf::getFullFpmByteSize(ContainerLayout)))
     return EC;
+  uint32_t BlocksRemaining = getBlockCount();
+  uint32_t BI = 0;
+  for (auto Byte : FpmBytes) {
+    uint32_t BlocksThisByte = std::min(BlocksRemaining, 8U);
+    for (uint32_t I = 0; I < BlocksThisByte; ++I) {
+      if (Byte & (1 << I))
+        ContainerLayout.FreePageMap[BI] = true;
+      --BlocksRemaining;
+      ++BI;
+    }
+  }
 
   Reader.setOffset(getBlockMapOffset());
-  if (auto EC = Reader.readArray(DirectoryBlocks, getNumDirectoryBlocks()))
+  if (auto EC = Reader.readArray(ContainerLayout.DirectoryBlocks,
+                                 getNumDirectoryBlocks()))
     return EC;
 
   return Error::success();
 }
 
 Error PDBFile::parseStreamData() {
-  assert(SB);
+  assert(ContainerLayout.SB);
   if (DirectoryStream)
     return Error::success();
 
@@ -130,21 +177,20 @@ Error PDBFile::parseStreamData() {
   // is exactly what we are attempting to parse.  By specifying a custom
   // subclass of IPDBStreamData which only accesses the fields that have already
   // been parsed, we can avoid this and reuse MappedBlockStream.
-  auto DS = MappedBlockStream::createDirectoryStream(*this);
-  if (!DS)
-    return DS.takeError();
-  StreamReader Reader(**DS);
+  auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer);
+  StreamReader Reader(*DS);
   if (auto EC = Reader.readInteger(NumStreams))
     return EC;
 
-  if (auto EC = Reader.readArray(StreamSizes, NumStreams))
+  if (auto EC = Reader.readArray(ContainerLayout.StreamSizes, NumStreams))
     return EC;
   for (uint32_t I = 0; I < NumStreams; ++I) {
     uint32_t StreamSize = getStreamByteSize(I);
     // FIXME: What does StreamSize ~0U mean?
     uint64_t NumExpectedStreamBlocks =
-        StreamSize == UINT32_MAX ? 0 : msf::bytesToBlocks(StreamSize,
-                                                          SB->BlockSize);
+        StreamSize == UINT32_MAX
+            ? 0
+            : msf::bytesToBlocks(StreamSize, ContainerLayout.SB->BlockSize);
 
     // For convenience, we store the block array contiguously.  This is because
     // if someone calls setStreamMap(), it is more convenient to be able to call
@@ -156,29 +202,46 @@ Error PDBFile::parseStreamData() {
     if (auto EC = Reader.readArray(Blocks, NumExpectedStreamBlocks))
       return EC;
     for (uint32_t Block : Blocks) {
-      uint64_t BlockEndOffset = (uint64_t)(Block + 1) * SB->BlockSize;
+      uint64_t BlockEndOffset =
+          (uint64_t)(Block + 1) * ContainerLayout.SB->BlockSize;
       if (BlockEndOffset > getFileSize())
         return make_error<RawError>(raw_error_code::corrupt_file,
                                     "Stream block map is corrupt.");
     }
-    StreamMap.push_back(Blocks);
+    ContainerLayout.StreamMap.push_back(Blocks);
   }
 
   // We should have read exactly SB->NumDirectoryBytes bytes.
   assert(Reader.bytesRemaining() == 0);
-  DirectoryStream = std::move(*DS);
+  DirectoryStream = std::move(DS);
   return Error::success();
 }
 
-llvm::ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
-  return DirectoryBlocks;
+ArrayRef<support::ulittle32_t> PDBFile::getDirectoryBlockArray() const {
+  return ContainerLayout.DirectoryBlocks;
+}
+
+Expected<GlobalsStream &> PDBFile::getPDBGlobalsStream() {
+  if (!Globals) {
+    auto DbiS = getPDBDbiStream();
+    if (!DbiS)
+      return DbiS.takeError();
+
+    auto GlobalS = safelyCreateIndexedStream(
+        ContainerLayout, *Buffer, DbiS->getGlobalSymbolStreamIndex());
+    if (!GlobalS) return GlobalS.takeError();
+    auto TempGlobals = llvm::make_unique<GlobalsStream>(std::move(*GlobalS));
+    if (auto EC = TempGlobals->reload())
+      return std::move(EC);
+    Globals = std::move(TempGlobals);
+  }
+  return *Globals;
 }
 
 Expected<InfoStream &> PDBFile::getPDBInfoStream() {
   if (!Info) {
-    auto InfoS = MappedBlockStream::createIndexedStream(StreamPDB, *this);
-    if (!InfoS)
-      return InfoS.takeError();
+    auto InfoS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamPDB);
+    if (!InfoS) return InfoS.takeError();
     auto TempInfo = llvm::make_unique<InfoStream>(std::move(*InfoS));
     if (auto EC = TempInfo->reload())
       return std::move(EC);
@@ -189,9 +252,8 @@ Expected<InfoStream &> PDBFile::getPDBInfoStream() {
 
 Expected<DbiStream &> PDBFile::getPDBDbiStream() {
   if (!Dbi) {
-    auto DbiS = MappedBlockStream::createIndexedStream(StreamDBI, *this);
-    if (!DbiS)
-      return DbiS.takeError();
+    auto DbiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamDBI);
+    if (!DbiS) return DbiS.takeError();
     auto TempDbi = llvm::make_unique<DbiStream>(*this, std::move(*DbiS));
     if (auto EC = TempDbi->reload())
       return std::move(EC);
@@ -202,9 +264,8 @@ Expected<DbiStream &> PDBFile::getPDBDbiStream() {
 
 Expected<TpiStream &> PDBFile::getPDBTpiStream() {
   if (!Tpi) {
-    auto TpiS = MappedBlockStream::createIndexedStream(StreamTPI, *this);
-    if (!TpiS)
-      return TpiS.takeError();
+    auto TpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamTPI);
+    if (!TpiS) return TpiS.takeError();
     auto TempTpi = llvm::make_unique<TpiStream>(*this, std::move(*TpiS));
     if (auto EC = TempTpi->reload())
       return std::move(EC);
@@ -215,9 +276,8 @@ Expected<TpiStream &> PDBFile::getPDBTpiStream() {
 
 Expected<TpiStream &> PDBFile::getPDBIpiStream() {
   if (!Ipi) {
-    auto IpiS = MappedBlockStream::createIndexedStream(StreamIPI, *this);
-    if (!IpiS)
-      return IpiS.takeError();
+    auto IpiS = safelyCreateIndexedStream(ContainerLayout, *Buffer, StreamIPI);
+    if (!IpiS) return IpiS.takeError();
     auto TempIpi = llvm::make_unique<TpiStream>(*this, std::move(*IpiS));
     if (auto EC = TempIpi->reload())
       return std::move(EC);
@@ -232,12 +292,9 @@ Expected<PublicsStream &> PDBFile::getPDBPublicsStream() {
     if (!DbiS)
       return DbiS.takeError();
 
-    uint32_t PublicsStreamNum = DbiS->getPublicSymbolStreamIndex();
-
-    auto PublicS =
-        MappedBlockStream::createIndexedStream(PublicsStreamNum, *this);
-    if (!PublicS)
-      return PublicS.takeError();
+    auto PublicS = safelyCreateIndexedStream(
+        ContainerLayout, *Buffer, DbiS->getPublicSymbolStreamIndex());
+    if (!PublicS) return PublicS.takeError();
     auto TempPublics =
         llvm::make_unique<PublicsStream>(*this, std::move(*PublicS));
     if (auto EC = TempPublics->reload())
@@ -254,11 +311,10 @@ Expected<SymbolStream &> PDBFile::getPDBSymbolStream() {
       return DbiS.takeError();
 
     uint32_t SymbolStreamNum = DbiS->getSymRecordStreamIndex();
-
     auto SymbolS =
-        MappedBlockStream::createIndexedStream(SymbolStreamNum, *this);
-    if (!SymbolS)
-      return SymbolS.takeError();
+        safelyCreateIndexedStream(ContainerLayout, *Buffer, SymbolStreamNum);
+    if (!SymbolS) return SymbolS.takeError();
+
     auto TempSymbols = llvm::make_unique<SymbolStream>(std::move(*SymbolS));
     if (auto EC = TempSymbols->reload())
       return std::move(EC);
@@ -275,14 +331,9 @@ Expected<NameHashTable &> PDBFile::getStringTable() {
 
     uint32_t NameStreamIndex = IS->getNamedStreamIndex("/names");
 
-    if (NameStreamIndex == 0)
-      return make_error<RawError>(raw_error_code::no_stream);
-    if (NameStreamIndex >= getNumStreams())
-      return make_error<RawError>(raw_error_code::no_stream);
-
-    auto NS = MappedBlockStream::createIndexedStream(NameStreamIndex, *this);
-    if (!NS)
-      return NS.takeError();
+    auto NS =
+        safelyCreateIndexedStream(ContainerLayout, *Buffer, NameStreamIndex);
+    if (!NS) return NS.takeError();
 
     StreamReader Reader(**NS);
     auto N = llvm::make_unique<NameHashTable>();
@@ -294,72 +345,47 @@ Expected<NameHashTable &> PDBFile::getStringTable() {
   return *StringTable;
 }
 
-Error PDBFile::setSuperBlock(const msf::SuperBlock *Block) {
-  if (auto EC = msf::validateSuperBlock(*Block))
-    return EC;
-
-  if (Buffer->getLength() % SB->BlockSize != 0)
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "File size is not a multiple of block size");
+bool PDBFile::hasPDBDbiStream() const { return StreamDBI < getNumStreams(); }
 
-  SB = Block;
-  return Error::success();
+bool PDBFile::hasPDBGlobalsStream() {
+  auto DbiS = getPDBDbiStream();
+  if (!DbiS) return false;
+  return DbiS->getGlobalSymbolStreamIndex() < getNumStreams();
 }
 
-Error PDBFile::commit() {
-  StreamWriter Writer(*Buffer);
-
-  if (auto EC = Writer.writeObject(*SB))
-    return EC;
-  Writer.setOffset(getBlockMapOffset());
-  if (auto EC = Writer.writeArray(DirectoryBlocks))
-    return EC;
-
-  auto DS = MappedBlockStream::createDirectoryStream(*this);
-  if (!DS)
-    return DS.takeError();
-  auto DirStream = std::move(*DS);
-  StreamWriter DW(*DirStream);
-  if (auto EC = DW.writeInteger(this->getNumStreams()))
-    return EC;
-
-  if (auto EC = DW.writeArray(StreamSizes))
-    return EC;
-
-  for (const auto &Blocks : StreamMap) {
-    if (auto EC = DW.writeArray(Blocks))
-      return EC;
-  }
-
-  if (Info) {
-    if (auto EC = Info->commit())
-      return EC;
-  }
+bool PDBFile::hasPDBInfoStream() { return StreamPDB < getNumStreams(); }
 
-  if (Dbi) {
-    if (auto EC = Dbi->commit())
-      return EC;
-  }
+bool PDBFile::hasPDBIpiStream() const { return StreamIPI < getNumStreams(); }
 
-  if (Symbols) {
-    if (auto EC = Symbols->commit())
-      return EC;
-  }
+bool PDBFile::hasPDBPublicsStream() {
+  auto DbiS = getPDBDbiStream();
+  if (!DbiS) return false;
+  return DbiS->getPublicSymbolStreamIndex() < getNumStreams();
+}
 
-  if (Publics) {
-    if (auto EC = Publics->commit())
-      return EC;
-  }
+bool PDBFile::hasPDBSymbolStream() {
+  auto DbiS = getPDBDbiStream();
+  if (!DbiS) return false;
+  return DbiS->getSymRecordStreamIndex() < getNumStreams();
+}
 
-  if (Tpi) {
-    if (auto EC = Tpi->commit())
-      return EC;
-  }
+bool PDBFile::hasPDBTpiStream() const { return StreamTPI < getNumStreams(); }
 
-  if (Ipi) {
-    if (auto EC = Ipi->commit())
-      return EC;
-  }
+bool PDBFile::hasStringTable() {
+  auto IS = getPDBInfoStream();
+  if (!IS) return false;
+  return IS->getNamedStreamIndex("/names") < getNumStreams();
+}
 
-  return Buffer->commit();
+/// Wrapper around MappedBlockStream::createIndexedStream()
+/// that checks if a stream with that index actually exists.
+/// If it does not, the return value will have an MSFError with
+/// code msf_error_code::no_stream. Else, the return value will
+/// contain the stream returned by createIndexedStream().
+Expected<std::unique_ptr<MappedBlockStream>> PDBFile::safelyCreateIndexedStream(
+    const MSFLayout &Layout, const ReadableStream &MsfData,
+    uint32_t StreamIndex) const {
+  if (StreamIndex >= getNumStreams())
+    return make_error<RawError>(raw_error_code::no_stream);
+  return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex);
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp
index 9063fd62d295..6fec0e32a8ae 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/PDBFileBuilder.cpp
@@ -11,92 +11,138 @@
 
 #include "llvm/ADT/BitVector.h"
 
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
-#include "llvm/DebugInfo/CodeView/StreamWriter.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/StreamInterface.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStreamBuilder.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 using namespace llvm::support;
 
-PDBFileBuilder::PDBFileBuilder(
-    std::unique_ptr<codeview::StreamInterface> FileBuffer)
-    : File(llvm::make_unique<PDBFile>(std::move(FileBuffer))) {}
+PDBFileBuilder::PDBFileBuilder(BumpPtrAllocator &Allocator)
+    : Allocator(Allocator) {}
 
-Error PDBFileBuilder::initialize(const msf::SuperBlock &Super) {
-  auto ExpectedMsf =
-      MsfBuilder::create(File->Allocator, Super.BlockSize, Super.NumBlocks);
+Error PDBFileBuilder::initialize(uint32_t BlockSize) {
+  auto ExpectedMsf = MSFBuilder::create(Allocator, BlockSize);
   if (!ExpectedMsf)
     return ExpectedMsf.takeError();
-
-  auto &MsfResult = *ExpectedMsf;
-  if (auto EC = MsfResult.setBlockMapAddr(Super.BlockMapAddr))
-    return EC;
-  Msf = llvm::make_unique<MsfBuilder>(std::move(MsfResult));
-  Msf->setFreePageMap(Super.FreeBlockMapBlock);
-  Msf->setUnknown1(Super.Unknown1);
+  Msf = llvm::make_unique<MSFBuilder>(std::move(*ExpectedMsf));
   return Error::success();
 }
 
-MsfBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; }
+MSFBuilder &PDBFileBuilder::getMsfBuilder() { return *Msf; }
 
 InfoStreamBuilder &PDBFileBuilder::getInfoBuilder() {
   if (!Info)
-    Info = llvm::make_unique<InfoStreamBuilder>();
+    Info = llvm::make_unique<InfoStreamBuilder>(*Msf);
   return *Info;
 }
 
 DbiStreamBuilder &PDBFileBuilder::getDbiBuilder() {
   if (!Dbi)
-    Dbi = llvm::make_unique<DbiStreamBuilder>();
+    Dbi = llvm::make_unique<DbiStreamBuilder>(*Msf);
   return *Dbi;
 }
 
-Expected<std::unique_ptr<PDBFile>> PDBFileBuilder::build() {
+TpiStreamBuilder &PDBFileBuilder::getTpiBuilder() {
+  if (!Tpi)
+    Tpi = llvm::make_unique<TpiStreamBuilder>(*Msf, StreamTPI);
+  return *Tpi;
+}
+
+TpiStreamBuilder &PDBFileBuilder::getIpiBuilder() {
+  if (!Ipi)
+    Ipi = llvm::make_unique<TpiStreamBuilder>(*Msf, StreamIPI);
+  return *Ipi;
+}
+
+Expected<msf::MSFLayout> PDBFileBuilder::finalizeMsfLayout() const {
   if (Info) {
-    uint32_t Length = Info->calculateSerializedLength();
-    if (auto EC = Msf->setStreamSize(StreamPDB, Length))
+    if (auto EC = Info->finalizeMsfLayout())
       return std::move(EC);
   }
   if (Dbi) {
-    uint32_t Length = Dbi->calculateSerializedLength();
-    if (auto EC = Msf->setStreamSize(StreamDBI, Length))
+    if (auto EC = Dbi->finalizeMsfLayout())
       return std::move(EC);
   }
+  if (Tpi) {
+    if (auto EC = Tpi->finalizeMsfLayout())
+      return std::move(EC);
+  }
+  if (Ipi) {
+    if (auto EC = Ipi->finalizeMsfLayout())
+      return std::move(EC);
+  }
+
+  return Msf->build();
+}
 
-  auto ExpectedLayout = Msf->build();
+Error PDBFileBuilder::commit(StringRef Filename) {
+  auto ExpectedLayout = finalizeMsfLayout();
   if (!ExpectedLayout)
     return ExpectedLayout.takeError();
+  auto &Layout = *ExpectedLayout;
 
-  const msf::Layout &L = *ExpectedLayout;
-  File->StreamMap = L.StreamMap;
-  File->StreamSizes = L.StreamSizes;
-  File->DirectoryBlocks = L.DirectoryBlocks;
-  File->SB = L.SB;
+  uint64_t Filesize = Layout.SB->BlockSize * Layout.SB->NumBlocks;
+  auto OutFileOrError = FileOutputBuffer::create(Filename, Filesize);
+  if (OutFileOrError.getError())
+    return llvm::make_error<pdb::GenericError>(generic_error_code::invalid_path,
+                                               Filename);
+  FileBufferByteStream Buffer(std::move(*OutFileOrError));
+  StreamWriter Writer(Buffer);
+
+  if (auto EC = Writer.writeObject(*Layout.SB))
+    return EC;
+  uint32_t BlockMapOffset =
+      msf::blockToOffset(Layout.SB->BlockMapAddr, Layout.SB->BlockSize);
+  Writer.setOffset(BlockMapOffset);
+  if (auto EC = Writer.writeArray(Layout.DirectoryBlocks))
+    return EC;
+
+  auto DirStream =
+      WritableMappedBlockStream::createDirectoryStream(Layout, Buffer);
+  StreamWriter DW(*DirStream);
+  if (auto EC =
+          DW.writeInteger(static_cast<uint32_t>(Layout.StreamSizes.size())))
+    return EC;
+
+  if (auto EC = DW.writeArray(Layout.StreamSizes))
+    return EC;
+
+  for (const auto &Blocks : Layout.StreamMap) {
+    if (auto EC = DW.writeArray(Blocks))
+      return EC;
+  }
 
   if (Info) {
-    auto ExpectedInfo = Info->build(*File);
-    if (!ExpectedInfo)
-      return ExpectedInfo.takeError();
-    File->Info = std::move(*ExpectedInfo);
+    if (auto EC = Info->commit(Layout, Buffer))
+      return EC;
   }
 
   if (Dbi) {
-    auto ExpectedDbi = Dbi->build(*File);
-    if (!ExpectedDbi)
-      return ExpectedDbi.takeError();
-    File->Dbi = std::move(*ExpectedDbi);
+    if (auto EC = Dbi->commit(Layout, Buffer))
+      return EC;
   }
 
-  if (File->Info && File->Dbi && File->Info->getAge() != File->Dbi->getAge())
-    return llvm::make_error<RawError>(
-        raw_error_code::corrupt_file,
-        "PDB Stream Age doesn't match Dbi Stream Age!");
+  if (Tpi) {
+    if (auto EC = Tpi->commit(Layout, Buffer))
+      return EC;
+  }
+
+  if (Ipi) {
+    if (auto EC = Ipi->commit(Layout, Buffer))
+      return EC;
+  }
 
-  return std::move(File);
+  return Buffer.commit();
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
index af3d2d026b48..b31f605a078c 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/PublicsStream.cpp
@@ -22,30 +22,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
-
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "GSI.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
-#include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Raw/PublicsStream.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
-
-#include "llvm/ADT/BitVector.h"
 #include "llvm/Support/Endian.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::support;
 using namespace llvm::pdb;
 
-
-static const unsigned IPHR_HASH = 4096;
-
 // This is PSGSIHDR struct defined in
 // https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/dbi/gsi.h
 struct PublicsStream::HeaderInfo {
@@ -59,23 +54,11 @@ struct PublicsStream::HeaderInfo {
   ulittle32_t NumSections;
 };
 
-// This is GSIHashHdr.
-struct PublicsStream::GSIHashHeader {
-  enum : unsigned {
-    HdrSignature = ~0U,
-    HdrVersion = 0xeffe0000 + 19990810,
-  };
-  ulittle32_t VerSignature;
-  ulittle32_t VerHdr;
-  ulittle32_t HrSize;
-  ulittle32_t NumBuckets;
-};
-
 PublicsStream::PublicsStream(PDBFile &File,
                              std::unique_ptr<MappedBlockStream> Stream)
     : Pdb(File), Stream(std::move(Stream)) {}
 
-PublicsStream::~PublicsStream() {}
+PublicsStream::~PublicsStream() = default;
 
 uint32_t PublicsStream::getSymHash() const { return Header->SymHash; }
 uint32_t PublicsStream::getAddrMap() const { return Header->AddrMap; }
@@ -86,7 +69,7 @@ uint32_t PublicsStream::getAddrMap() const { return Header->AddrMap; }
 // we skip over the hash table which we believe contains information about
 // public symbols.
 Error PublicsStream::reload() {
-  codeview::StreamReader Reader(*Stream);
+  StreamReader Reader(*Stream);
 
   // Check stream size.
   if (Reader.bytesRemaining() < sizeof(HeaderInfo) + sizeof(GSIHashHeader))
@@ -98,40 +81,15 @@ Error PublicsStream::reload() {
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Publics Stream does not contain a header.");
 
-  if (Reader.readObject(HashHdr))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Publics Stream does not contain a header.");
+  if (auto EC = readGSIHashHeader(HashHdr, Reader))
+    return EC;
 
-  // An array of HashRecord follows. Read them.
-  if (HashHdr->HrSize % sizeof(PSHashRecord))
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Invalid HR array size.");
-  uint32_t NumHashRecords = HashHdr->HrSize / sizeof(PSHashRecord);
-  if (auto EC = Reader.readArray(HashRecords, NumHashRecords))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Could not read an HR array"));
+  if (auto EC = readGSIHashRecords(HashRecords, HashHdr, Reader))
+    return EC;
 
-  // A bitmap of a fixed length follows.
-  size_t BitmapSizeInBits = alignTo(IPHR_HASH + 1, 32);
-  uint32_t NumBitmapEntries = BitmapSizeInBits / 8;
-  if (auto EC = Reader.readBytes(Bitmap, NumBitmapEntries))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Could not read a bitmap."));
-  for (uint8_t B : Bitmap)
-    NumBuckets += countPopulation(B);
-
-  // We don't yet understand the following data structures completely,
-  // but we at least know the types and sizes. Here we are trying
-  // to read the stream till end so that we at least can detect
-  // corrupted streams.
-
-  // Hash buckets follow.
-  if (auto EC = Reader.readArray(HashBuckets, NumBuckets))
-    return joinErrors(std::move(EC),
-                      make_error<RawError>(raw_error_code::corrupt_file,
-                                           "Hash buckets corrupted."));
+  if (auto EC = readGSIHashBuckets(HashBuckets, HashHdr, Reader))
+    return EC;
+  NumBuckets = HashBuckets.size();
 
   // Something called "address map" follows.
   uint32_t NumAddressMapEntries = Header->AddrMap / sizeof(uint32_t);
@@ -163,7 +121,7 @@ PublicsStream::getSymbols(bool *HadError) const {
   auto SymbolS = Pdb.getPDBSymbolStream();
   if (SymbolS.takeError()) {
     codeview::CVSymbolArray::Iterator Iter;
-    return llvm::make_range(Iter, Iter);
+    return make_range(Iter, Iter);
   }
   SymbolStream &SS = SymbolS.get();
 
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/RawError.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/RawError.cpp
index eb169f70e11c..f4a5057509eb 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/RawError.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/RawError.cpp
@@ -11,7 +11,7 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class RawErrorCategory : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.pdb.raw"; }
+  const char *name() const noexcept override { return "llvm.pdb.raw"; }
 
   std::string message(int Condition) const override {
     switch (static_cast<raw_error_code>(Condition)) {
@@ -19,6 +19,8 @@ public:
       return "An unknown error has occurred.";
     case raw_error_code::feature_unsupported:
       return "The feature is unsupported by the implementation.";
+    case raw_error_code::invalid_format:
+      return "The record is in an unexpected format.";
     case raw_error_code::corrupt_file:
       return "The PDB file is corrupt.";
     case raw_error_code::insufficient_buffer:
@@ -30,6 +32,10 @@ public:
       return "The specified item does not exist in the array.";
     case raw_error_code::invalid_block_address:
       return "The specified block address is not valid.";
+    case raw_error_code::duplicate_entry:
+      return "The entry already exists.";
+    case raw_error_code::no_entry:
+      return "The entry does not exist.";
     case raw_error_code::not_writable:
       return "The PDB does not support writing.";
     case raw_error_code::invalid_tpi_hash:
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp
index 455d33140dd4..cd3a2064c717 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/RawSession.cpp
@@ -7,10 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/RawSession.h"
-
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
-#include "llvm/DebugInfo/CodeView/StreamInterface.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
 #include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBSourceFile.h"
@@ -18,59 +16,51 @@
 #include "llvm/DebugInfo/PDB/PDBSymbolExe.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
-
+#include "llvm/DebugInfo/PDB/Raw/RawSession.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include <algorithm>
+#include <memory>
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
-namespace {
-// We need a class which behaves like an immutable ByteStream, but whose data
-// is backed by an llvm::MemoryBuffer.  It also needs to own the underlying
-// MemoryBuffer, so this simple adapter is a good way to achieve that.
-class InputByteStream : public codeview::ByteStream<false> {
-public:
-  explicit InputByteStream(std::unique_ptr<MemoryBuffer> Buffer)
-      : ByteStream(ArrayRef<uint8_t>(Buffer->getBuffer().bytes_begin(),
-                                     Buffer->getBuffer().bytes_end())),
-        MemBuffer(std::move(Buffer)) {}
-
-  std::unique_ptr<MemoryBuffer> MemBuffer;
-};
-}
-
-RawSession::RawSession(std::unique_ptr<PDBFile> PdbFile)
-    : Pdb(std::move(PdbFile)) {}
+RawSession::RawSession(std::unique_ptr<PDBFile> PdbFile,
+                       std::unique_ptr<BumpPtrAllocator> Allocator)
+    : Pdb(std::move(PdbFile)), Allocator(std::move(Allocator)) {}
 
-RawSession::~RawSession() {}
+RawSession::~RawSession() = default;
 
 Error RawSession::createFromPdb(StringRef Path,
                                 std::unique_ptr<IPDBSession> &Session) {
-
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
       MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
                                    /*RequiresNullTerminator=*/false);
   if (!ErrorOrBuffer)
-    return llvm::make_error<GenericError>(generic_error_code::invalid_path);
+    return make_error<GenericError>(generic_error_code::invalid_path);
 
   std::unique_ptr<MemoryBuffer> Buffer = std::move(*ErrorOrBuffer);
-  auto Stream = llvm::make_unique<InputByteStream>(std::move(Buffer));
+  auto Stream = llvm::make_unique<MemoryBufferByteStream>(std::move(Buffer));
 
-  std::unique_ptr<PDBFile> File(new PDBFile(std::move(Stream)));
+  auto Allocator = llvm::make_unique<BumpPtrAllocator>();
+  auto File = llvm::make_unique<PDBFile>(std::move(Stream), *Allocator);
   if (auto EC = File->parseFileHeaders())
     return EC;
   if (auto EC = File->parseStreamData())
     return EC;
 
-  Session.reset(new RawSession(std::move(File)));
+  Session =
+      llvm::make_unique<RawSession>(std::move(File), std::move(Allocator));
 
   return Error::success();
 }
 
 Error RawSession::createFromExe(StringRef Path,
                                 std::unique_ptr<IPDBSession> &Session) {
-  return llvm::make_error<RawError>(raw_error_code::feature_unsupported);
+  return make_error<RawError>(raw_error_code::feature_unsupported);
 }
 
 uint64_t RawSession::getLoadAddress() const { return 0; }
@@ -103,26 +93,26 @@ RawSession::findLineNumbersByAddress(uint64_t Address, uint32_t Length) const {
 
 std::unique_ptr<IPDBEnumSourceFiles>
 RawSession::findSourceFiles(const PDBSymbolCompiland *Compiland,
-                            llvm::StringRef Pattern,
+                            StringRef Pattern,
                             PDB_NameSearchFlags Flags) const {
   return nullptr;
 }
 
 std::unique_ptr<IPDBSourceFile>
 RawSession::findOneSourceFile(const PDBSymbolCompiland *Compiland,
-                              llvm::StringRef Pattern,
+                              StringRef Pattern,
                               PDB_NameSearchFlags Flags) const {
   return nullptr;
 }
 
 std::unique_ptr<IPDBEnumChildren<PDBSymbolCompiland>>
-RawSession::findCompilandsForSourceFile(llvm::StringRef Pattern,
+RawSession::findCompilandsForSourceFile(StringRef Pattern,
                                         PDB_NameSearchFlags Flags) const {
   return nullptr;
 }
 
 std::unique_ptr<PDBSymbolCompiland>
-RawSession::findOneCompilandForSourceFile(llvm::StringRef Pattern,
+RawSession::findOneCompilandForSourceFile(StringRef Pattern,
                                           PDB_NameSearchFlags Flags) const {
   return nullptr;
 }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/SymbolStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/SymbolStream.cpp
index 41b2a64bfb14..2f3ac3497f39 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/SymbolStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/SymbolStream.cpp
@@ -10,10 +10,9 @@
 #include "llvm/DebugInfo/PDB/Raw/SymbolStream.h"
 
 #include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
@@ -21,6 +20,7 @@
 #include "llvm/Support/Endian.h"
 
 using namespace llvm;
+using namespace llvm::msf;
 using namespace llvm::support;
 using namespace llvm::pdb;
 
@@ -30,7 +30,7 @@ SymbolStream::SymbolStream(std::unique_ptr<MappedBlockStream> Stream)
 SymbolStream::~SymbolStream() {}
 
 Error SymbolStream::reload() {
-  codeview::StreamReader Reader(*Stream);
+  StreamReader Reader(*Stream);
 
   if (auto EC = Reader.readArray(SymbolRecords, Stream->getLength()))
     return EC;
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiHashing.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiHashing.cpp
new file mode 100644
index 000000000000..6c3ddb3d57af
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiHashing.cpp
@@ -0,0 +1,110 @@
+//===- TpiHashing.cpp -----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/PDB/Raw/TpiHashing.h"
+
+#include "llvm/DebugInfo/PDB/Raw/Hash.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::pdb;
+
+// Corresponds to `fUDTAnon`.
+template <typename T> static bool isAnonymous(T &Rec) {
+  StringRef Name = Rec.getName();
+  return Name == "<unnamed-tag>" || Name == "__unnamed" ||
+         Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed");
+}
+
+// Computes a hash for a given TPI record.
+template <typename T>
+static uint32_t getTpiHash(T &Rec, ArrayRef<uint8_t> FullRecord) {
+  auto Opts = static_cast<uint16_t>(Rec.getOptions());
+
+  bool ForwardRef =
+      Opts & static_cast<uint16_t>(ClassOptions::ForwardReference);
+  bool Scoped = Opts & static_cast<uint16_t>(ClassOptions::Scoped);
+  bool UniqueName = Opts & static_cast<uint16_t>(ClassOptions::HasUniqueName);
+  bool IsAnon = UniqueName && isAnonymous(Rec);
+
+  if (!ForwardRef && !Scoped && !IsAnon)
+    return hashStringV1(Rec.getName());
+  if (!ForwardRef && UniqueName && !IsAnon)
+    return hashStringV1(Rec.getUniqueName());
+  return hashBufferV8(FullRecord);
+}
+
+template <typename T> static uint32_t getSourceLineHash(T &Rec) {
+  char Buf[4];
+  support::endian::write32le(Buf, Rec.getUDT().getIndex());
+  return hashStringV1(StringRef(Buf, 4));
+}
+
+void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR,
+                                          UdtSourceLineRecord &Rec) {
+  CVR.Hash = getSourceLineHash(Rec);
+}
+
+void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR,
+                                          UdtModSourceLineRecord &Rec) {
+  CVR.Hash = getSourceLineHash(Rec);
+}
+
+void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, ClassRecord &Rec) {
+  CVR.Hash = getTpiHash(Rec, CVR.data());
+}
+
+void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, EnumRecord &Rec) {
+  CVR.Hash = getTpiHash(Rec, CVR.data());
+}
+
+void TpiHashUpdater::visitKnownRecordImpl(CVType &CVR, UnionRecord &Rec) {
+  CVR.Hash = getTpiHash(Rec, CVR.data());
+}
+
+Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UdtSourceLineRecord &Rec) {
+  return verifySourceLine(Rec.getUDT());
+}
+
+Error TpiHashVerifier::visitKnownRecord(CVType &CVR,
+                                        UdtModSourceLineRecord &Rec) {
+  return verifySourceLine(Rec.getUDT());
+}
+
+Error TpiHashVerifier::visitKnownRecord(CVType &CVR, ClassRecord &Rec) {
+  if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
+    return errorInvalidHash();
+  return Error::success();
+}
+Error TpiHashVerifier::visitKnownRecord(CVType &CVR, EnumRecord &Rec) {
+  if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
+    return errorInvalidHash();
+  return Error::success();
+}
+Error TpiHashVerifier::visitKnownRecord(CVType &CVR, UnionRecord &Rec) {
+  if (getTpiHash(Rec, CVR.data()) % NumHashBuckets != HashValues[Index])
+    return errorInvalidHash();
+  return Error::success();
+}
+
+Error TpiHashVerifier::verifySourceLine(codeview::TypeIndex TI) {
+  char Buf[4];
+  support::endian::write32le(Buf, TI.getIndex());
+  uint32_t Hash = hashStringV1(StringRef(Buf, 4));
+  if (Hash % NumHashBuckets != HashValues[Index])
+    return errorInvalidHash();
+  return Error::success();
+}
+
+Error TpiHashVerifier::visitTypeBegin(CVType &Rec) {
+  ++Index;
+  RawRecord = Rec;
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp
index 5617e57ccf67..a1167cd98454 100644
--- a/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStream.cpp
@@ -7,155 +7,55 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
-
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
-#include "llvm/DebugInfo/CodeView/CodeView.h"
-#include "llvm/DebugInfo/CodeView/StreamReader.h"
-#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
-#include "llvm/DebugInfo/PDB/Raw/Hash.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
-#include "llvm/DebugInfo/PDB/Raw/MappedBlockStream.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
-
+#include "llvm/DebugInfo/PDB/Raw/TpiHashing.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+#include <vector>
 
 using namespace llvm;
 using namespace llvm::codeview;
 using namespace llvm::support;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
-namespace {
-const uint32_t MinHashBuckets = 0x1000;
-const uint32_t MaxHashBuckets = 0x40000;
-}
-
-// This corresponds to `HDR` in PDB/dbi/tpi.h.
-struct TpiStream::HeaderInfo {
-  struct EmbeddedBuf {
-    little32_t Off;
-    ulittle32_t Length;
-  };
-
-  ulittle32_t Version;
-  ulittle32_t HeaderSize;
-  ulittle32_t TypeIndexBegin;
-  ulittle32_t TypeIndexEnd;
-  ulittle32_t TypeRecordBytes;
-
-  // The following members correspond to `TpiHash` in PDB/dbi/tpi.h.
-  ulittle16_t HashStreamIndex;
-  ulittle16_t HashAuxStreamIndex;
-  ulittle32_t HashKeySize;
-  ulittle32_t NumHashBuckets;
-
-  EmbeddedBuf HashValueBuffer;
-  EmbeddedBuf IndexOffsetBuffer;
-  EmbeddedBuf HashAdjBuffer;
-};
-
 TpiStream::TpiStream(const PDBFile &File,
                      std::unique_ptr<MappedBlockStream> Stream)
     : Pdb(File), Stream(std::move(Stream)) {}
 
-TpiStream::~TpiStream() {}
-
-// Corresponds to `fUDTAnon`.
-template <typename T> static bool isAnonymous(T &Rec) {
-  StringRef Name = Rec.getName();
-  return Name == "<unnamed-tag>" || Name == "__unnamed" ||
-      Name.endswith("::<unnamed-tag>") || Name.endswith("::__unnamed");
-}
-
-// Computes a hash for a given TPI record.
-template <typename T>
-static uint32_t getTpiHash(T &Rec, const CVRecord<TypeLeafKind> &RawRec) {
-  auto Opts = static_cast<uint16_t>(Rec.getOptions());
-
-  bool ForwardRef =
-      Opts & static_cast<uint16_t>(ClassOptions::ForwardReference);
-  bool Scoped = Opts & static_cast<uint16_t>(ClassOptions::Scoped);
-  bool UniqueName = Opts & static_cast<uint16_t>(ClassOptions::HasUniqueName);
-  bool IsAnon = UniqueName && isAnonymous(Rec);
-
-  if (!ForwardRef && !Scoped && !IsAnon)
-    return hashStringV1(Rec.getName());
-  if (!ForwardRef && UniqueName && !IsAnon)
-    return hashStringV1(Rec.getUniqueName());
-  return hashBufferV8(RawRec.RawData);
-}
-
-namespace {
-class TpiHashVerifier : public TypeVisitorCallbacks {
-public:
-  TpiHashVerifier(FixedStreamArray<support::ulittle32_t> &HashValues,
-                  uint32_t NumHashBuckets)
-      : HashValues(HashValues), NumHashBuckets(NumHashBuckets) {}
-
-  Error visitUdtSourceLine(UdtSourceLineRecord &Rec) override {
-    return verifySourceLine(Rec);
-  }
-
-  Error visitUdtModSourceLine(UdtModSourceLineRecord &Rec) override {
-    return verifySourceLine(Rec);
-  }
-
-  Error visitClass(ClassRecord &Rec) override { return verify(Rec); }
-  Error visitEnum(EnumRecord &Rec) override { return verify(Rec); }
-  Error visitUnion(UnionRecord &Rec) override { return verify(Rec); }
-
-  Error visitTypeBegin(const CVRecord<TypeLeafKind> &Rec) override {
-    ++Index;
-    RawRecord = &Rec;
-    return Error::success();
-  }
-
-private:
-  template <typename T> Error verify(T &Rec) {
-    uint32_t Hash = getTpiHash(Rec, *RawRecord);
-    if (Hash % NumHashBuckets != HashValues[Index])
-      return errorInvalidHash();
-    return Error::success();
-  }
-
-  template <typename T> Error verifySourceLine(T &Rec) {
-    char Buf[4];
-    support::endian::write32le(Buf, Rec.getUDT().getIndex());
-    uint32_t Hash = hashStringV1(StringRef(Buf, 4));
-    if (Hash % NumHashBuckets != HashValues[Index])
-      return errorInvalidHash();
-    return Error::success();
-  }
-
-  Error errorInvalidHash() {
-    return make_error<RawError>(
-        raw_error_code::invalid_tpi_hash,
-        "Type index is 0x" + utohexstr(TypeIndex::FirstNonSimpleIndex + Index));
-  }
-
-  FixedStreamArray<support::ulittle32_t> HashValues;
-  const CVRecord<TypeLeafKind> *RawRecord;
-  uint32_t NumHashBuckets;
-  uint32_t Index = -1;
-};
-}
+TpiStream::~TpiStream() = default;
 
 // Verifies that a given type record matches with a given hash value.
 // Currently we only verify SRC_LINE records.
 Error TpiStream::verifyHashValues() {
   TpiHashVerifier Verifier(HashValues, Header->NumHashBuckets);
-  CVTypeVisitor Visitor(Verifier);
+  TypeDeserializer Deserializer;
+
+  TypeVisitorCallbackPipeline Pipeline;
+  Pipeline.addCallbackToPipeline(Deserializer);
+  Pipeline.addCallbackToPipeline(Verifier);
+
+  CVTypeVisitor Visitor(Pipeline);
   return Visitor.visitTypeStream(TypeRecords);
 }
 
 Error TpiStream::reload() {
   StreamReader Reader(*Stream);
 
-  if (Reader.bytesRemaining() < sizeof(HeaderInfo))
+  if (Reader.bytesRemaining() < sizeof(TpiStreamHeader))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "TPI Stream does not contain a header.");
 
@@ -167,7 +67,7 @@ Error TpiStream::reload() {
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Unsupported TPI Version.");
 
-  if (Header->HeaderSize != sizeof(HeaderInfo))
+  if (Header->HeaderSize != sizeof(TpiStreamHeader))
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "Corrupt TPI Header size.");
 
@@ -175,8 +75,8 @@ Error TpiStream::reload() {
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "TPI Stream expected 4 byte hash key size.");
 
-  if (Header->NumHashBuckets < MinHashBuckets ||
-      Header->NumHashBuckets > MaxHashBuckets)
+  if (Header->NumHashBuckets < MinTpiHashBuckets ||
+      Header->NumHashBuckets > MaxTpiHashBuckets)
     return make_error<RawError>(raw_error_code::corrupt_file,
                                 "TPI Stream Invalid number of hash buckets.");
 
@@ -185,43 +85,47 @@ Error TpiStream::reload() {
     return EC;
 
   // Hash indices, hash values, etc come from the hash stream.
-  if (Header->HashStreamIndex >= Pdb.getNumStreams())
-    return make_error<RawError>(raw_error_code::corrupt_file,
-                                "Invalid TPI hash stream index.");
-
-  auto HS =
-      MappedBlockStream::createIndexedStream(Header->HashStreamIndex, Pdb);
-  if (!HS)
-    return HS.takeError();
-  StreamReader HSR(**HS);
-
-  uint32_t NumHashValues = Header->HashValueBuffer.Length / sizeof(ulittle32_t);
-  if (NumHashValues != NumTypeRecords())
-    return make_error<RawError>(
-        raw_error_code::corrupt_file,
-        "TPI hash count does not match with the number of type records.");
-  HSR.setOffset(Header->HashValueBuffer.Off);
-  if (auto EC = HSR.readArray(HashValues, NumHashValues))
-    return EC;
-
-  HSR.setOffset(Header->IndexOffsetBuffer.Off);
-  uint32_t NumTypeIndexOffsets =
-      Header->IndexOffsetBuffer.Length / sizeof(TypeIndexOffset);
-  if (auto EC = HSR.readArray(TypeIndexOffsets, NumTypeIndexOffsets))
-    return EC;
-
-  HSR.setOffset(Header->HashAdjBuffer.Off);
-  uint32_t NumHashAdjustments =
-      Header->HashAdjBuffer.Length / sizeof(TypeIndexOffset);
-  if (auto EC = HSR.readArray(HashAdjustments, NumHashAdjustments))
-    return EC;
-
-  HashStream = std::move(*HS);
-
-  // TPI hash table is a parallel array for the type records.
-  // Verify that the hash values match with type records.
-  if (auto EC = verifyHashValues())
-    return EC;
+  if (Header->HashStreamIndex != kInvalidStreamIndex) {
+    if (Header->HashStreamIndex >= Pdb.getNumStreams())
+      return make_error<RawError>(raw_error_code::corrupt_file,
+                                  "Invalid TPI hash stream index.");
+
+    auto HS = MappedBlockStream::createIndexedStream(
+        Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex);
+    StreamReader HSR(*HS);
+
+    uint32_t NumHashValues =
+        Header->HashValueBuffer.Length / sizeof(ulittle32_t);
+    if (NumHashValues != NumTypeRecords())
+      return make_error<RawError>(
+          raw_error_code::corrupt_file,
+          "TPI hash count does not match with the number of type records.");
+    HSR.setOffset(Header->HashValueBuffer.Off);
+    if (auto EC = HSR.readArray(HashValues, NumHashValues))
+      return EC;
+    std::vector<ulittle32_t> HashValueList;
+    for (auto I : HashValues)
+      HashValueList.push_back(I);
+
+    HSR.setOffset(Header->IndexOffsetBuffer.Off);
+    uint32_t NumTypeIndexOffsets =
+        Header->IndexOffsetBuffer.Length / sizeof(TypeIndexOffset);
+    if (auto EC = HSR.readArray(TypeIndexOffsets, NumTypeIndexOffsets))
+      return EC;
+
+    HSR.setOffset(Header->HashAdjBuffer.Off);
+    uint32_t NumHashAdjustments =
+        Header->HashAdjBuffer.Length / sizeof(TypeIndexOffset);
+    if (auto EC = HSR.readArray(HashAdjustments, NumHashAdjustments))
+      return EC;
+
+    HashStream = std::move(HS);
+
+    // TPI hash table is a parallel array for the type records.
+    // Verify that the hash values match with type records.
+    if (auto EC = verifyHashValues())
+      return EC;
+  }
 
   return Error::success();
 }
@@ -267,7 +171,7 @@ TpiStream::getHashAdjustments() const {
 
 iterator_range<CVTypeArray::Iterator>
 TpiStream::types(bool *HadError) const {
-  return llvm::make_range(TypeRecords.begin(HadError), TypeRecords.end());
+  return make_range(TypeRecords.begin(HadError), TypeRecords.end());
 }
 
 Error TpiStream::commit() { return Error::success(); }
diff --git a/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStreamBuilder.cpp b/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStreamBuilder.cpp
new file mode 100644
index 000000000000..c769321f18c1
--- /dev/null
+++ b/contrib/llvm/lib/DebugInfo/PDB/Raw/TpiStreamBuilder.cpp
@@ -0,0 +1,145 @@
+//===- TpiStreamBuilder.cpp -   -------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/DebugInfo/CodeView/TypeIndex.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamArray.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
+#include "llvm/DebugInfo/MSF/StreamWriter.h"
+#include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Raw/RawError.h"
+#include "llvm/DebugInfo/PDB/Raw/RawTypes.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Error.h"
+#include <algorithm>
+#include <cstdint>
+
+using namespace llvm;
+using namespace llvm::msf;
+using namespace llvm::pdb;
+using namespace llvm::support;
+
+TpiStreamBuilder::TpiStreamBuilder(MSFBuilder &Msf, uint32_t StreamIdx)
+    : Msf(Msf), Allocator(Msf.getAllocator()), Header(nullptr), Idx(StreamIdx) {
+}
+
+TpiStreamBuilder::~TpiStreamBuilder() = default;
+
+void TpiStreamBuilder::setVersionHeader(PdbRaw_TpiVer Version) {
+  VerHeader = Version;
+}
+
+void TpiStreamBuilder::addTypeRecord(const codeview::CVType &Record) {
+  TypeRecords.push_back(Record);
+  TypeRecordStream.setItems(TypeRecords);
+}
+
+Error TpiStreamBuilder::finalize() {
+  if (Header)
+    return Error::success();
+
+  TpiStreamHeader *H = Allocator.Allocate<TpiStreamHeader>();
+
+  uint32_t Count = TypeRecords.size();
+  uint32_t HashBufferSize = calculateHashBufferSize();
+
+  H->Version = *VerHeader;
+  H->HeaderSize = sizeof(TpiStreamHeader);
+  H->TypeIndexBegin = codeview::TypeIndex::FirstNonSimpleIndex;
+  H->TypeIndexEnd = H->TypeIndexBegin + Count;
+  H->TypeRecordBytes = TypeRecordStream.getLength();
+
+  H->HashStreamIndex = HashStreamIndex;
+  H->HashAuxStreamIndex = kInvalidStreamIndex;
+  H->HashKeySize = sizeof(ulittle32_t);
+  H->NumHashBuckets = MinTpiHashBuckets;
+
+  // Recall that hash values go into a completely different stream identified by
+  // the `HashStreamIndex` field of the `TpiStreamHeader`.  Therefore, the data
+  // begins at offset 0 of this independent stream.
+  H->HashValueBuffer.Off = 0;
+  H->HashValueBuffer.Length = HashBufferSize;
+  H->HashAdjBuffer.Off = H->HashValueBuffer.Off + H->HashValueBuffer.Length;
+  H->HashAdjBuffer.Length = 0;
+  H->IndexOffsetBuffer.Off = H->HashAdjBuffer.Off + H->HashAdjBuffer.Length;
+  H->IndexOffsetBuffer.Length = 0;
+
+  Header = H;
+  return Error::success();
+}
+
+uint32_t TpiStreamBuilder::calculateSerializedLength() const {
+  return sizeof(TpiStreamHeader) + TypeRecordStream.getLength();
+}
+
+uint32_t TpiStreamBuilder::calculateHashBufferSize() const {
+  if (TypeRecords.empty() || !TypeRecords[0].Hash.hasValue())
+    return 0;
+  return TypeRecords.size() * sizeof(ulittle32_t);
+}
+
+Error TpiStreamBuilder::finalizeMsfLayout() {
+  uint32_t Length = calculateSerializedLength();
+  if (auto EC = Msf.setStreamSize(Idx, Length))
+    return EC;
+
+  uint32_t HashBufferSize = calculateHashBufferSize();
+
+  if (HashBufferSize == 0)
+    return Error::success();
+
+  auto ExpectedIndex = Msf.addStream(HashBufferSize);
+  if (!ExpectedIndex)
+    return ExpectedIndex.takeError();
+  HashStreamIndex = *ExpectedIndex;
+  ulittle32_t *H = Allocator.Allocate<ulittle32_t>(TypeRecords.size());
+  MutableArrayRef<ulittle32_t> HashBuffer(H, TypeRecords.size());
+  for (uint32_t I = 0; I < TypeRecords.size(); ++I) {
+    HashBuffer[I] = *TypeRecords[I].Hash % MinTpiHashBuckets;
+  }
+  ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(HashBuffer.data()),
+                          HashBufferSize);
+  HashValueStream = llvm::make_unique<ByteStream>(Bytes);
+  return Error::success();
+}
+
+Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
+                               const msf::WritableStream &Buffer) {
+  if (auto EC = finalize())
+    return EC;
+
+  auto InfoS =
+      WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx);
+
+  StreamWriter Writer(*InfoS);
+  if (auto EC = Writer.writeObject(*Header))
+    return EC;
+
+  auto RecordArray = VarStreamArray<codeview::CVType>(TypeRecordStream);
+  if (auto EC = Writer.writeArray(RecordArray))
+    return EC;
+
+  if (HashStreamIndex != kInvalidStreamIndex) {
+    auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
+                                                              HashStreamIndex);
+    StreamWriter HW(*HVS);
+    if (auto EC = HW.writeStreamRef(*HashValueStream))
+      return EC;
+  }
+
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index adbe0cb69edb..1abb368127af 100644
--- a/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/contrib/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -391,10 +391,10 @@ LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName) {
   // If this is a COFF object containing PDB info, use a PDBContext to
   // symbolize. Otherwise, use DWARF.
   if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
-    const debug_pdb_info *PDBInfo;
+    const codeview::DebugInfo *DebugInfo;
     StringRef PDBFileName;
-    auto EC = CoffObject->getDebugPDBInfo(PDBInfo, PDBFileName);
-    if (!EC && PDBInfo != nullptr) {
+    auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
+    if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) {
       using namespace pdb;
       std::unique_ptr<IPDBSession> Session;
       if (auto Err = loadDataForEXE(PDB_ReaderType::DIA,
diff --git a/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
new file mode 100644
index 000000000000..097b6ca2e083
--- /dev/null
+++ b/contrib/llvm/lib/Demangle/ItaniumDemangle.cpp
@@ -0,0 +1,4276 @@
+//===- ItaniumDemangle.cpp ------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+
+// This file exports a single function: llvm::itanium_demangle.
+// It also has no dependencies on the rest of llvm. It is implemented this way
+// so that it can be easily reused in libcxxabi.
+
+#include <algorithm>
+#include <cctype>
+#include <cstdlib>
+#include <cstring>
+#include <numeric>
+#include <string>
+#include <vector>
+
+#ifdef _MSC_VER
+// snprintf is implemented in VS 2015
+#if _MSC_VER < 1900
+#define snprintf _snprintf_s
+#endif
+#endif
+
+enum {
+  unknown_error = -4,
+  invalid_args = -3,
+  invalid_mangled_name,
+  memory_alloc_failure,
+  success
+};
+
+template <class C>
+static const char *parse_type(const char *first, const char *last, C &db);
+template <class C>
+static const char *parse_encoding(const char *first, const char *last, C &db);
+template <class C>
+static const char *parse_name(const char *first, const char *last, C &db,
+                              bool *ends_with_template_args = 0);
+template <class C>
+static const char *parse_expression(const char *first, const char *last, C &db);
+template <class C>
+static const char *parse_template_args(const char *first, const char *last,
+                                       C &db);
+template <class C>
+static const char *parse_operator_name(const char *first, const char *last,
+                                       C &db);
+template <class C>
+static const char *parse_unqualified_name(const char *first, const char *last,
+                                          C &db);
+template <class C>
+static const char *parse_decltype(const char *first, const char *last, C &db);
+
+// <number> ::= [n] <non-negative decimal integer>
+
+static const char *parse_number(const char *first, const char *last) {
+  if (first != last) {
+    const char *t = first;
+    if (*t == 'n')
+      ++t;
+    if (t != last) {
+      if (*t == '0') {
+        first = t + 1;
+      } else if ('1' <= *t && *t <= '9') {
+        first = t + 1;
+        while (first != last && std::isdigit(*first))
+          ++first;
+      }
+    }
+  }
+  return first;
+}
+
+namespace {
+template <class Float> struct float_data;
+
+template <> struct float_data<float> {
+  static const size_t mangled_size = 8;
+  static const size_t max_demangled_size = 24;
+  static const char *spec;
+};
+const char *float_data<float>::spec = "%af";
+
+template <> struct float_data<double> {
+  static const size_t mangled_size = 16;
+  static const size_t max_demangled_size = 32;
+  static const char *spec;
+};
+
+const char *float_data<double>::spec = "%a";
+
+template <> struct float_data<long double> {
+#if defined(__mips__) && defined(__mips_n64) || defined(__aarch64__) ||        \
+    defined(__wasm__)
+  static const size_t mangled_size = 32;
+#elif defined(__arm__) || defined(__mips__) || defined(__hexagon__)
+  static const size_t mangled_size = 16;
+#else
+  static const size_t mangled_size =
+      20; // May need to be adjusted to 16 or 24 on other platforms
+#endif
+  static const size_t max_demangled_size = 40;
+  static const char *spec;
+};
+
+const char *float_data<long double>::spec = "%LaL";
+}
+
+template <class Float, class C>
+static const char *parse_floating_number(const char *first, const char *last,
+                                         C &db) {
+  const size_t N = float_data<Float>::mangled_size;
+  if (static_cast<std::size_t>(last - first) > N) {
+    last = first + N;
+    union {
+      Float value;
+      char buf[sizeof(Float)];
+    };
+    const char *t = first;
+    char *e = buf;
+    for (; t != last; ++t, ++e) {
+      if (!isxdigit(*t))
+        return first;
+      unsigned d1 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
+                                : static_cast<unsigned>(*t - 'a' + 10);
+      ++t;
+      unsigned d0 = isdigit(*t) ? static_cast<unsigned>(*t - '0')
+                                : static_cast<unsigned>(*t - 'a' + 10);
+      *e = static_cast<char>((d1 << 4) + d0);
+    }
+    if (*t == 'E') {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+      std::reverse(buf, e);
+#endif
+      char num[float_data<Float>::max_demangled_size] = {0};
+      int n = snprintf(num, sizeof(num), float_data<Float>::spec, value);
+      if (static_cast<std::size_t>(n) >= sizeof(num))
+        return first;
+      db.names.push_back(std::string(num, static_cast<std::size_t>(n)));
+      first = t + 1;
+    }
+  }
+  return first;
+}
+
+// <source-name> ::= <positive length number> <identifier>
+
+template <class C>
+static const char *parse_source_name(const char *first, const char *last,
+                                     C &db) {
+  if (first != last) {
+    char c = *first;
+    if (isdigit(c) && first + 1 != last) {
+      const char *t = first + 1;
+      size_t n = static_cast<size_t>(c - '0');
+      for (c = *t; isdigit(c); c = *t) {
+        n = n * 10 + static_cast<size_t>(c - '0');
+        if (++t == last)
+          return first;
+      }
+      if (static_cast<size_t>(last - t) >= n) {
+        std::string r(t, n);
+        if (r.substr(0, 10) == "_GLOBAL__N")
+          db.names.push_back("(anonymous namespace)");
+        else
+          db.names.push_back(std::move(r));
+        first = t + n;
+      }
+    }
+  }
+  return first;
+}
+
+// <substitution> ::= S <seq-id> _
+//                ::= S_
+// <substitution> ::= Sa # ::std::allocator
+// <substitution> ::= Sb # ::std::basic_string
+// <substitution> ::= Ss # ::std::basic_string < char,
+//                                               ::std::char_traits<char>,
+//                                               ::std::allocator<char> >
+// <substitution> ::= Si # ::std::basic_istream<char,  std::char_traits<char> >
+// <substitution> ::= So # ::std::basic_ostream<char,  std::char_traits<char> >
+// <substitution> ::= Sd # ::std::basic_iostream<char, std::char_traits<char> >
+
+template <class C>
+static const char *parse_substitution(const char *first, const char *last,
+                                      C &db) {
+  if (last - first >= 2) {
+    if (*first == 'S') {
+      switch (first[1]) {
+      case 'a':
+        db.names.push_back("std::allocator");
+        first += 2;
+        break;
+      case 'b':
+        db.names.push_back("std::basic_string");
+        first += 2;
+        break;
+      case 's':
+        db.names.push_back("std::string");
+        first += 2;
+        break;
+      case 'i':
+        db.names.push_back("std::istream");
+        first += 2;
+        break;
+      case 'o':
+        db.names.push_back("std::ostream");
+        first += 2;
+        break;
+      case 'd':
+        db.names.push_back("std::iostream");
+        first += 2;
+        break;
+      case '_':
+        if (!db.subs.empty()) {
+          for (const auto &n : db.subs.front())
+            db.names.push_back(n);
+          first += 2;
+        }
+        break;
+      default:
+        if (std::isdigit(first[1]) || std::isupper(first[1])) {
+          size_t sub = 0;
+          const char *t = first + 1;
+          if (std::isdigit(*t))
+            sub = static_cast<size_t>(*t - '0');
+          else
+            sub = static_cast<size_t>(*t - 'A') + 10;
+          for (++t; t != last && (std::isdigit(*t) || std::isupper(*t)); ++t) {
+            sub *= 36;
+            if (std::isdigit(*t))
+              sub += static_cast<size_t>(*t - '0');
+            else
+              sub += static_cast<size_t>(*t - 'A') + 10;
+          }
+          if (t == last || *t != '_')
+            return first;
+          ++sub;
+          if (sub < db.subs.size()) {
+            for (const auto &n : db.subs[sub])
+              db.names.push_back(n);
+            first = t + 1;
+          }
+        }
+        break;
+      }
+    }
+  }
+  return first;
+}
+
+// <builtin-type> ::= v    # void
+//                ::= w    # wchar_t
+//                ::= b    # bool
+//                ::= c    # char
+//                ::= a    # signed char
+//                ::= h    # unsigned char
+//                ::= s    # short
+//                ::= t    # unsigned short
+//                ::= i    # int
+//                ::= j    # unsigned int
+//                ::= l    # long
+//                ::= m    # unsigned long
+//                ::= x    # long long, __int64
+//                ::= y    # unsigned long long, __int64
+//                ::= n    # __int128
+//                ::= o    # unsigned __int128
+//                ::= f    # float
+//                ::= d    # double
+//                ::= e    # long double, __float80
+//                ::= g    # __float128
+//                ::= z    # ellipsis
+//                ::= Dd   # IEEE 754r decimal floating point (64 bits)
+//                ::= De   # IEEE 754r decimal floating point (128 bits)
+//                ::= Df   # IEEE 754r decimal floating point (32 bits)
+//                ::= Dh   # IEEE 754r half-precision floating point (16 bits)
+//                ::= Di   # char32_t
+//                ::= Ds   # char16_t
+//                ::= Da   # auto (in dependent new-expressions)
+//                ::= Dc   # decltype(auto)
+//                ::= Dn   # std::nullptr_t (i.e., decltype(nullptr))
+//                ::= u <source-name>    # vendor extended type
+
+template <class C>
+static const char *parse_builtin_type(const char *first, const char *last,
+                                      C &db) {
+  if (first != last) {
+    switch (*first) {
+    case 'v':
+      db.names.push_back("void");
+      ++first;
+      break;
+    case 'w':
+      db.names.push_back("wchar_t");
+      ++first;
+      break;
+    case 'b':
+      db.names.push_back("bool");
+      ++first;
+      break;
+    case 'c':
+      db.names.push_back("char");
+      ++first;
+      break;
+    case 'a':
+      db.names.push_back("signed char");
+      ++first;
+      break;
+    case 'h':
+      db.names.push_back("unsigned char");
+      ++first;
+      break;
+    case 's':
+      db.names.push_back("short");
+      ++first;
+      break;
+    case 't':
+      db.names.push_back("unsigned short");
+      ++first;
+      break;
+    case 'i':
+      db.names.push_back("int");
+      ++first;
+      break;
+    case 'j':
+      db.names.push_back("unsigned int");
+      ++first;
+      break;
+    case 'l':
+      db.names.push_back("long");
+      ++first;
+      break;
+    case 'm':
+      db.names.push_back("unsigned long");
+      ++first;
+      break;
+    case 'x':
+      db.names.push_back("long long");
+      ++first;
+      break;
+    case 'y':
+      db.names.push_back("unsigned long long");
+      ++first;
+      break;
+    case 'n':
+      db.names.push_back("__int128");
+      ++first;
+      break;
+    case 'o':
+      db.names.push_back("unsigned __int128");
+      ++first;
+      break;
+    case 'f':
+      db.names.push_back("float");
+      ++first;
+      break;
+    case 'd':
+      db.names.push_back("double");
+      ++first;
+      break;
+    case 'e':
+      db.names.push_back("long double");
+      ++first;
+      break;
+    case 'g':
+      db.names.push_back("__float128");
+      ++first;
+      break;
+    case 'z':
+      db.names.push_back("...");
+      ++first;
+      break;
+    case 'u': {
+      const char *t = parse_source_name(first + 1, last, db);
+      if (t != first + 1)
+        first = t;
+    } break;
+    case 'D':
+      if (first + 1 != last) {
+        switch (first[1]) {
+        case 'd':
+          db.names.push_back("decimal64");
+          first += 2;
+          break;
+        case 'e':
+          db.names.push_back("decimal128");
+          first += 2;
+          break;
+        case 'f':
+          db.names.push_back("decimal32");
+          first += 2;
+          break;
+        case 'h':
+          db.names.push_back("decimal16");
+          first += 2;
+          break;
+        case 'i':
+          db.names.push_back("char32_t");
+          first += 2;
+          break;
+        case 's':
+          db.names.push_back("char16_t");
+          first += 2;
+          break;
+        case 'a':
+          db.names.push_back("auto");
+          first += 2;
+          break;
+        case 'c':
+          db.names.push_back("decltype(auto)");
+          first += 2;
+          break;
+        case 'n':
+          db.names.push_back("std::nullptr_t");
+          first += 2;
+          break;
+        }
+      }
+      break;
+    }
+  }
+  return first;
+}
+
+// <CV-qualifiers> ::= [r] [V] [K]
+
+static const char *parse_cv_qualifiers(const char *first, const char *last,
+                                       unsigned &cv) {
+  cv = 0;
+  if (first != last) {
+    if (*first == 'r') {
+      cv |= 4;
+      ++first;
+    }
+    if (*first == 'V') {
+      cv |= 2;
+      ++first;
+    }
+    if (*first == 'K') {
+      cv |= 1;
+      ++first;
+    }
+  }
+  return first;
+}
+
+// <template-param> ::= T_    # first template parameter
+//                  ::= T <parameter-2 non-negative number> _
+
+template <class C>
+static const char *parse_template_param(const char *first, const char *last,
+                                        C &db) {
+  if (last - first >= 2) {
+    if (*first == 'T') {
+      if (first[1] == '_') {
+        if (db.template_param.empty())
+          return first;
+        if (!db.template_param.back().empty()) {
+          for (auto &t : db.template_param.back().front())
+            db.names.push_back(t);
+          first += 2;
+        } else {
+          db.names.push_back("T_");
+          first += 2;
+          db.fix_forward_references = true;
+        }
+      } else if (isdigit(first[1])) {
+        const char *t = first + 1;
+        size_t sub = static_cast<size_t>(*t - '0');
+        for (++t; t != last && isdigit(*t); ++t) {
+          sub *= 10;
+          sub += static_cast<size_t>(*t - '0');
+        }
+        if (t == last || *t != '_' || db.template_param.empty())
+          return first;
+        ++sub;
+        if (sub < db.template_param.back().size()) {
+          for (auto &temp : db.template_param.back()[sub])
+            db.names.push_back(temp);
+          first = t + 1;
+        } else {
+          db.names.push_back(std::string(first, t + 1));
+          first = t + 1;
+          db.fix_forward_references = true;
+        }
+      }
+    }
+  }
+  return first;
+}
+
+// cc <type> <expression>                               # const_cast<type>
+// (expression)
+
+template <class C>
+static const char *parse_const_cast_expr(const char *first, const char *last,
+                                         C &db) {
+  if (last - first >= 3 && first[0] == 'c' && first[1] == 'c') {
+    const char *t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_expression(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto expr = db.names.back().move_full();
+        db.names.pop_back();
+        if (db.names.empty())
+          return first;
+        db.names.back() =
+            "const_cast<" + db.names.back().move_full() + ">(" + expr + ")";
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+// dc <type> <expression>                               # dynamic_cast<type>
+// (expression)
+
+template <class C>
+static const char *parse_dynamic_cast_expr(const char *first, const char *last,
+                                           C &db) {
+  if (last - first >= 3 && first[0] == 'd' && first[1] == 'c') {
+    const char *t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_expression(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto expr = db.names.back().move_full();
+        db.names.pop_back();
+        if (db.names.empty())
+          return first;
+        db.names.back() =
+            "dynamic_cast<" + db.names.back().move_full() + ">(" + expr + ")";
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+// rc <type> <expression>                               # reinterpret_cast<type>
+// (expression)
+
+template <class C>
+static const char *parse_reinterpret_cast_expr(const char *first,
+                                               const char *last, C &db) {
+  if (last - first >= 3 && first[0] == 'r' && first[1] == 'c') {
+    const char *t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_expression(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto expr = db.names.back().move_full();
+        db.names.pop_back();
+        if (db.names.empty())
+          return first;
+        db.names.back() = "reinterpret_cast<" + db.names.back().move_full() +
+                          ">(" + expr + ")";
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+// sc <type> <expression>                               # static_cast<type>
+// (expression)
+
+template <class C>
+static const char *parse_static_cast_expr(const char *first, const char *last,
+                                          C &db) {
+  if (last - first >= 3 && first[0] == 's' && first[1] == 'c') {
+    const char *t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_expression(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto expr = db.names.back().move_full();
+        db.names.pop_back();
+        db.names.back() =
+            "static_cast<" + db.names.back().move_full() + ">(" + expr + ")";
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+// sp <expression>                                  # pack expansion
+
+template <class C>
+static const char *parse_pack_expansion(const char *first, const char *last,
+                                        C &db) {
+  if (last - first >= 3 && first[0] == 's' && first[1] == 'p') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2)
+      first = t;
+  }
+  return first;
+}
+
+// st <type>                                            # sizeof (a type)
+
+template <class C>
+static const char *parse_sizeof_type_expr(const char *first, const char *last,
+                                          C &db) {
+  if (last - first >= 3 && first[0] == 's' && first[1] == 't') {
+    const char *t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back() = "sizeof (" + db.names.back().move_full() + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+// sz <expr>                                            # sizeof (a expression)
+
+template <class C>
+static const char *parse_sizeof_expr_expr(const char *first, const char *last,
+                                          C &db) {
+  if (last - first >= 3 && first[0] == 's' && first[1] == 'z') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back() = "sizeof (" + db.names.back().move_full() + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+// sZ <template-param>                                  # size of a parameter
+// pack
+
+template <class C>
+static const char *parse_sizeof_param_pack_expr(const char *first,
+                                                const char *last, C &db) {
+  if (last - first >= 3 && first[0] == 's' && first[1] == 'Z' &&
+      first[2] == 'T') {
+    size_t k0 = db.names.size();
+    const char *t = parse_template_param(first + 2, last, db);
+    size_t k1 = db.names.size();
+    if (t != first + 2) {
+      std::string tmp("sizeof...(");
+      size_t k = k0;
+      if (k != k1) {
+        tmp += db.names[k].move_full();
+        for (++k; k != k1; ++k)
+          tmp += ", " + db.names[k].move_full();
+      }
+      tmp += ")";
+      for (; k1 != k0; --k1)
+        db.names.pop_back();
+      db.names.push_back(std::move(tmp));
+      first = t;
+    }
+  }
+  return first;
+}
+
+// <function-param> ::= fp <top-level CV-qualifiers> _ # L == 0, first parameter
+//                  ::= fp <top-level CV-qualifiers> <parameter-2 non-negative
+//                  number> _   # L == 0, second and later parameters
+//                  ::= fL <L-1 non-negative number> p <top-level CV-qualifiers>
+//                  _         # L > 0, first parameter
+//                  ::= fL <L-1 non-negative number> p <top-level CV-qualifiers>
+//                  <parameter-2 non-negative number> _   # L > 0, second and
+//                  later parameters
+
+template <class C>
+static const char *parse_function_param(const char *first, const char *last,
+                                        C &db) {
+  if (last - first >= 3 && *first == 'f') {
+    if (first[1] == 'p') {
+      unsigned cv;
+      const char *t = parse_cv_qualifiers(first + 2, last, cv);
+      const char *t1 = parse_number(t, last);
+      if (t1 != last && *t1 == '_') {
+        db.names.push_back("fp" + std::string(t, t1));
+        first = t1 + 1;
+      }
+    } else if (first[1] == 'L') {
+      unsigned cv;
+      const char *t0 = parse_number(first + 2, last);
+      if (t0 != last && *t0 == 'p') {
+        ++t0;
+        const char *t = parse_cv_qualifiers(t0, last, cv);
+        const char *t1 = parse_number(t, last);
+        if (t1 != last && *t1 == '_') {
+          db.names.push_back("fp" + std::string(t, t1));
+          first = t1 + 1;
+        }
+      }
+    }
+  }
+  return first;
+}
+
+// sZ <function-param>                                  # size of a function
+// parameter pack
+
+template <class C>
+static const char *parse_sizeof_function_param_pack_expr(const char *first,
+                                                         const char *last,
+                                                         C &db) {
+  if (last - first >= 3 && first[0] == 's' && first[1] == 'Z' &&
+      first[2] == 'f') {
+    const char *t = parse_function_param(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back() = "sizeof...(" + db.names.back().move_full() + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+// te <expression>                                      # typeid (expression)
+// ti <type>                                            # typeid (type)
+
+template <class C>
+static const char *parse_typeid_expr(const char *first, const char *last,
+                                     C &db) {
+  if (last - first >= 3 && first[0] == 't' &&
+      (first[1] == 'e' || first[1] == 'i')) {
+    const char *t;
+    if (first[1] == 'e')
+      t = parse_expression(first + 2, last, db);
+    else
+      t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back() = "typeid(" + db.names.back().move_full() + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+// tw <expression>                                      # throw expression
+
+template <class C>
+static const char *parse_throw_expr(const char *first, const char *last,
+                                    C &db) {
+  if (last - first >= 3 && first[0] == 't' && first[1] == 'w') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back() = "throw " + db.names.back().move_full();
+      first = t;
+    }
+  }
+  return first;
+}
+
+// ds <expression> <expression>                         # expr.*expr
+
+template <class C>
+static const char *parse_dot_star_expr(const char *first, const char *last,
+                                       C &db) {
+  if (last - first >= 3 && first[0] == 'd' && first[1] == 's') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_expression(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto expr = db.names.back().move_full();
+        db.names.pop_back();
+        db.names.back().first += ".*" + expr;
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+// <simple-id> ::= <source-name> [ <template-args> ]
+
+template <class C>
+static const char *parse_simple_id(const char *first, const char *last, C &db) {
+  if (first != last) {
+    const char *t = parse_source_name(first, last, db);
+    if (t != first) {
+      const char *t1 = parse_template_args(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto args = db.names.back().move_full();
+        db.names.pop_back();
+        db.names.back().first += std::move(args);
+      }
+      first = t1;
+    } else
+      first = t;
+  }
+  return first;
+}
+
+// <unresolved-type> ::= <template-param>
+//                   ::= <decltype>
+//                   ::= <substitution>
+
+template <class C>
+static const char *parse_unresolved_type(const char *first, const char *last,
+                                         C &db) {
+  if (first != last) {
+    const char *t = first;
+    switch (*first) {
+    case 'T': {
+      size_t k0 = db.names.size();
+      t = parse_template_param(first, last, db);
+      size_t k1 = db.names.size();
+      if (t != first && k1 == k0 + 1) {
+        db.subs.push_back(typename C::sub_type(1, db.names.back()));
+        first = t;
+      } else {
+        for (; k1 != k0; --k1)
+          db.names.pop_back();
+      }
+      break;
+    }
+    case 'D':
+      t = parse_decltype(first, last, db);
+      if (t != first) {
+        if (db.names.empty())
+          return first;
+        db.subs.push_back(typename C::sub_type(1, db.names.back()));
+        first = t;
+      }
+      break;
+    case 'S':
+      t = parse_substitution(first, last, db);
+      if (t != first)
+        first = t;
+      else {
+        if (last - first > 2 && first[1] == 't') {
+          t = parse_unqualified_name(first + 2, last, db);
+          if (t != first + 2) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first.insert(0, "std::");
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+            first = t;
+          }
+        }
+      }
+      break;
+    }
+  }
+  return first;
+}
+
+// <destructor-name> ::= <unresolved-type>                               # e.g.,
+// ~T or ~decltype(f())
+//                   ::= <simple-id>                                     # e.g.,
+//                   ~A<2*N>
+
+template <class C>
+static const char *parse_destructor_name(const char *first, const char *last,
+                                         C &db) {
+  if (first != last) {
+    const char *t = parse_unresolved_type(first, last, db);
+    if (t == first)
+      t = parse_simple_id(first, last, db);
+    if (t != first) {
+      if (db.names.empty())
+        return first;
+      db.names.back().first.insert(0, "~");
+      first = t;
+    }
+  }
+  return first;
+}
+
+// <base-unresolved-name> ::= <simple-id>                                #
+// unresolved name
+//          extension     ::= <operator-name>                            #
+//          unresolved operator-function-id
+//          extension     ::= <operator-name> <template-args>            #
+//          unresolved operator template-id
+//                        ::= on <operator-name>                         #
+//                        unresolved operator-function-id
+//                        ::= on <operator-name> <template-args>         #
+//                        unresolved operator template-id
+//                        ::= dn <destructor-name>                       #
+//                        destructor or pseudo-destructor;
+//                                                                         #
+//                                                                         e.g.
+//                                                                         ~X or
+//                                                                         ~X<N-1>
+
+template <class C>
+static const char *parse_base_unresolved_name(const char *first,
+                                              const char *last, C &db) {
+  if (last - first >= 2) {
+    if ((first[0] == 'o' || first[0] == 'd') && first[1] == 'n') {
+      if (first[0] == 'o') {
+        const char *t = parse_operator_name(first + 2, last, db);
+        if (t != first + 2) {
+          first = parse_template_args(t, last, db);
+          if (first != t) {
+            if (db.names.size() < 2)
+              return first;
+            auto args = db.names.back().move_full();
+            db.names.pop_back();
+            db.names.back().first += std::move(args);
+          }
+        }
+      } else {
+        const char *t = parse_destructor_name(first + 2, last, db);
+        if (t != first + 2)
+          first = t;
+      }
+    } else {
+      const char *t = parse_simple_id(first, last, db);
+      if (t == first) {
+        t = parse_operator_name(first, last, db);
+        if (t != first) {
+          first = parse_template_args(t, last, db);
+          if (first != t) {
+            if (db.names.size() < 2)
+              return first;
+            auto args = db.names.back().move_full();
+            db.names.pop_back();
+            db.names.back().first += std::move(args);
+          }
+        }
+      } else
+        first = t;
+    }
+  }
+  return first;
+}
+
+// <unresolved-qualifier-level> ::= <simple-id>
+
+template <class C>
+static const char *parse_unresolved_qualifier_level(const char *first,
+                                                    const char *last, C &db) {
+  return parse_simple_id(first, last, db);
+}
+
+// <unresolved-name>
+//  extension        ::= srN <unresolved-type> [<template-args>]
+//  <unresolved-qualifier-level>* E <base-unresolved-name>
+//                   ::= [gs] <base-unresolved-name>                     # x or
+//                   (with "gs") ::x
+//                   ::= [gs] sr <unresolved-qualifier-level>+ E
+//                   <base-unresolved-name>
+//                                                                       # A::x,
+//                                                                       N::y,
+//                                                                       A<T>::z;
+//                                                                       "gs"
+//                                                                       means
+//                                                                       leading
+//                                                                       "::"
+//                   ::= sr <unresolved-type> <base-unresolved-name>     # T::x
+//                   / decltype(p)::x
+//  extension        ::= sr <unresolved-type> <template-args>
+//  <base-unresolved-name>
+//                                                                       #
+//                                                                       T::N::x
+//                                                                       /decltype(p)::N::x
+//  (ignored)        ::= srN <unresolved-type>  <unresolved-qualifier-level>+ E
+//  <base-unresolved-name>
+
+template <class C>
+static const char *parse_unresolved_name(const char *first, const char *last,
+                                         C &db) {
+  if (last - first > 2) {
+    const char *t = first;
+    bool global = false;
+    if (t[0] == 'g' && t[1] == 's') {
+      global = true;
+      t += 2;
+    }
+    const char *t2 = parse_base_unresolved_name(t, last, db);
+    if (t2 != t) {
+      if (global) {
+        if (db.names.empty())
+          return first;
+        db.names.back().first.insert(0, "::");
+      }
+      first = t2;
+    } else if (last - t > 2 && t[0] == 's' && t[1] == 'r') {
+      if (t[2] == 'N') {
+        t += 3;
+        const char *t1 = parse_unresolved_type(t, last, db);
+        if (t1 == t || t1 == last)
+          return first;
+        t = t1;
+        t1 = parse_template_args(t, last, db);
+        if (t1 != t) {
+          if (db.names.size() < 2)
+            return first;
+          auto args = db.names.back().move_full();
+          db.names.pop_back();
+          db.names.back().first += std::move(args);
+          t = t1;
+          if (t == last) {
+            db.names.pop_back();
+            return first;
+          }
+        }
+        while (*t != 'E') {
+          t1 = parse_unresolved_qualifier_level(t, last, db);
+          if (t1 == t || t1 == last || db.names.size() < 2)
+            return first;
+          auto s = db.names.back().move_full();
+          db.names.pop_back();
+          db.names.back().first += "::" + std::move(s);
+          t = t1;
+        }
+        ++t;
+        t1 = parse_base_unresolved_name(t, last, db);
+        if (t1 == t) {
+          if (!db.names.empty())
+            db.names.pop_back();
+          return first;
+        }
+        if (db.names.size() < 2)
+          return first;
+        auto s = db.names.back().move_full();
+        db.names.pop_back();
+        db.names.back().first += "::" + std::move(s);
+        first = t1;
+      } else {
+        t += 2;
+        const char *t1 = parse_unresolved_type(t, last, db);
+        if (t1 != t) {
+          t = t1;
+          t1 = parse_template_args(t, last, db);
+          if (t1 != t) {
+            if (db.names.size() < 2)
+              return first;
+            auto args = db.names.back().move_full();
+            db.names.pop_back();
+            db.names.back().first += std::move(args);
+            t = t1;
+          }
+          t1 = parse_base_unresolved_name(t, last, db);
+          if (t1 == t) {
+            if (!db.names.empty())
+              db.names.pop_back();
+            return first;
+          }
+          if (db.names.size() < 2)
+            return first;
+          auto s = db.names.back().move_full();
+          db.names.pop_back();
+          db.names.back().first += "::" + std::move(s);
+          first = t1;
+        } else {
+          t1 = parse_unresolved_qualifier_level(t, last, db);
+          if (t1 == t || t1 == last)
+            return first;
+          t = t1;
+          if (global) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first.insert(0, "::");
+          }
+          while (*t != 'E') {
+            t1 = parse_unresolved_qualifier_level(t, last, db);
+            if (t1 == t || t1 == last || db.names.size() < 2)
+              return first;
+            auto s = db.names.back().move_full();
+            db.names.pop_back();
+            db.names.back().first += "::" + std::move(s);
+            t = t1;
+          }
+          ++t;
+          t1 = parse_base_unresolved_name(t, last, db);
+          if (t1 == t) {
+            if (!db.names.empty())
+              db.names.pop_back();
+            return first;
+          }
+          if (db.names.size() < 2)
+            return first;
+          auto s = db.names.back().move_full();
+          db.names.pop_back();
+          db.names.back().first += "::" + std::move(s);
+          first = t1;
+        }
+      }
+    }
+  }
+  return first;
+}
+
+// dt <expression> <unresolved-name>                    # expr.name
+
+template <class C>
+static const char *parse_dot_expr(const char *first, const char *last, C &db) {
+  if (last - first >= 3 && first[0] == 'd' && first[1] == 't') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_unresolved_name(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto name = db.names.back().move_full();
+        db.names.pop_back();
+        if (db.names.empty())
+          return first;
+        db.names.back().first += "." + name;
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+// cl <expression>+ E                                   # call
+
+template <class C>
+static const char *parse_call_expr(const char *first, const char *last, C &db) {
+  if (last - first >= 4 && first[0] == 'c' && first[1] == 'l') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      if (t == last)
+        return first;
+      if (db.names.empty())
+        return first;
+      db.names.back().first += db.names.back().second;
+      db.names.back().second = std::string();
+      db.names.back().first.append("(");
+      bool first_expr = true;
+      while (*t != 'E') {
+        const char *t1 = parse_expression(t, last, db);
+        if (t1 == t || t1 == last)
+          return first;
+        if (db.names.empty())
+          return first;
+        auto tmp = db.names.back().move_full();
+        db.names.pop_back();
+        if (!tmp.empty()) {
+          if (db.names.empty())
+            return first;
+          if (!first_expr) {
+            db.names.back().first.append(", ");
+            first_expr = false;
+          }
+          db.names.back().first.append(tmp);
+        }
+        t = t1;
+      }
+      ++t;
+      if (db.names.empty())
+        return first;
+      db.names.back().first.append(")");
+      first = t;
+    }
+  }
+  return first;
+}
+
+// [gs] nw <expression>* _ <type> E                     # new (expr-list) type
+// [gs] nw <expression>* _ <type> <initializer>         # new (expr-list) type
+// (init)
+// [gs] na <expression>* _ <type> E                     # new[] (expr-list) type
+// [gs] na <expression>* _ <type> <initializer>         # new[] (expr-list) type
+// (init)
+// <initializer> ::= pi <expression>* E                 # parenthesized
+// initialization
+
+template <class C>
+static const char *parse_new_expr(const char *first, const char *last, C &db) {
+  if (last - first >= 4) {
+    const char *t = first;
+    bool parsed_gs = false;
+    if (t[0] == 'g' && t[1] == 's') {
+      t += 2;
+      parsed_gs = true;
+    }
+    if (t[0] == 'n' && (t[1] == 'w' || t[1] == 'a')) {
+      bool is_array = t[1] == 'a';
+      t += 2;
+      if (t == last)
+        return first;
+      bool has_expr_list = false;
+      bool first_expr = true;
+      while (*t != '_') {
+        const char *t1 = parse_expression(t, last, db);
+        if (t1 == t || t1 == last)
+          return first;
+        has_expr_list = true;
+        if (!first_expr) {
+          if (db.names.empty())
+            return first;
+          auto tmp = db.names.back().move_full();
+          db.names.pop_back();
+          if (!tmp.empty()) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first.append(", ");
+            db.names.back().first.append(tmp);
+            first_expr = false;
+          }
+        }
+        t = t1;
+      }
+      ++t;
+      const char *t1 = parse_type(t, last, db);
+      if (t1 == t || t1 == last)
+        return first;
+      t = t1;
+      bool has_init = false;
+      if (last - t >= 3 && t[0] == 'p' && t[1] == 'i') {
+        t += 2;
+        has_init = true;
+        first_expr = true;
+        while (*t != 'E') {
+          t1 = parse_expression(t, last, db);
+          if (t1 == t || t1 == last)
+            return first;
+          if (!first_expr) {
+            if (db.names.empty())
+              return first;
+            auto tmp = db.names.back().move_full();
+            db.names.pop_back();
+            if (!tmp.empty()) {
+              if (db.names.empty())
+                return first;
+              db.names.back().first.append(", ");
+              db.names.back().first.append(tmp);
+              first_expr = false;
+            }
+          }
+          t = t1;
+        }
+      }
+      if (*t != 'E')
+        return first;
+      std::string init_list;
+      if (has_init) {
+        if (db.names.empty())
+          return first;
+        init_list = db.names.back().move_full();
+        db.names.pop_back();
+      }
+      if (db.names.empty())
+        return first;
+      auto type = db.names.back().move_full();
+      db.names.pop_back();
+      std::string expr_list;
+      if (has_expr_list) {
+        if (db.names.empty())
+          return first;
+        expr_list = db.names.back().move_full();
+        db.names.pop_back();
+      }
+      std::string r;
+      if (parsed_gs)
+        r = "::";
+      if (is_array)
+        r += "[] ";
+      else
+        r += " ";
+      if (has_expr_list)
+        r += "(" + expr_list + ") ";
+      r += type;
+      if (has_init)
+        r += " (" + init_list + ")";
+      db.names.push_back(std::move(r));
+      first = t + 1;
+    }
+  }
+  return first;
+}
+
+// cv <type> <expression>                               # conversion with one
+// argument
+// cv <type> _ <expression>* E                          # conversion with a
+// different number of arguments
+
+template <class C>
+static const char *parse_conversion_expr(const char *first, const char *last,
+                                         C &db) {
+  if (last - first >= 3 && first[0] == 'c' && first[1] == 'v') {
+    bool try_to_parse_template_args = db.try_to_parse_template_args;
+    db.try_to_parse_template_args = false;
+    const char *t = parse_type(first + 2, last, db);
+    db.try_to_parse_template_args = try_to_parse_template_args;
+    if (t != first + 2 && t != last) {
+      if (*t != '_') {
+        const char *t1 = parse_expression(t, last, db);
+        if (t1 == t)
+          return first;
+        t = t1;
+      } else {
+        ++t;
+        if (t == last)
+          return first;
+        if (*t == 'E')
+          db.names.emplace_back();
+        else {
+          bool first_expr = true;
+          while (*t != 'E') {
+            const char *t1 = parse_expression(t, last, db);
+            if (t1 == t || t1 == last)
+              return first;
+            if (!first_expr) {
+              if (db.names.empty())
+                return first;
+              auto tmp = db.names.back().move_full();
+              db.names.pop_back();
+              if (!tmp.empty()) {
+                if (db.names.empty())
+                  return first;
+                db.names.back().first.append(", ");
+                db.names.back().first.append(tmp);
+                first_expr = false;
+              }
+            }
+            t = t1;
+          }
+        }
+        ++t;
+      }
+      if (db.names.size() < 2)
+        return first;
+      auto tmp = db.names.back().move_full();
+      db.names.pop_back();
+      db.names.back() = "(" + db.names.back().move_full() + ")(" + tmp + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+// pt <expression> <expression>                    # expr->name
+
+template <class C>
+static const char *parse_arrow_expr(const char *first, const char *last,
+                                    C &db) {
+  if (last - first >= 3 && first[0] == 'p' && first[1] == 't') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      const char *t1 = parse_expression(t, last, db);
+      if (t1 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto tmp = db.names.back().move_full();
+        db.names.pop_back();
+        db.names.back().first += "->";
+        db.names.back().first += tmp;
+        first = t1;
+      }
+    }
+  }
+  return first;
+}
+
+//  <ref-qualifier> ::= R                   # & ref-qualifier
+//  <ref-qualifier> ::= O                   # && ref-qualifier
+
+// <function-type> ::= F [Y] <bare-function-type> [<ref-qualifier>] E
+
+template <class C>
+static const char *parse_function_type(const char *first, const char *last,
+                                       C &db) {
+  if (first != last && *first == 'F') {
+    const char *t = first + 1;
+    if (t != last) {
+      if (*t == 'Y') {
+        /* extern "C" */
+        if (++t == last)
+          return first;
+      }
+      const char *t1 = parse_type(t, last, db);
+      if (t1 != t) {
+        t = t1;
+        std::string sig("(");
+        int ref_qual = 0;
+        while (true) {
+          if (t == last) {
+            db.names.pop_back();
+            return first;
+          }
+          if (*t == 'E') {
+            ++t;
+            break;
+          }
+          if (*t == 'v') {
+            ++t;
+            continue;
+          }
+          if (*t == 'R' && t + 1 != last && t[1] == 'E') {
+            ref_qual = 1;
+            ++t;
+            continue;
+          }
+          if (*t == 'O' && t + 1 != last && t[1] == 'E') {
+            ref_qual = 2;
+            ++t;
+            continue;
+          }
+          size_t k0 = db.names.size();
+          t1 = parse_type(t, last, db);
+          size_t k1 = db.names.size();
+          if (t1 == t || t1 == last)
+            return first;
+          for (size_t k = k0; k < k1; ++k) {
+            if (sig.size() > 1)
+              sig += ", ";
+            sig += db.names[k].move_full();
+          }
+          for (size_t k = k0; k < k1; ++k)
+            db.names.pop_back();
+          t = t1;
+        }
+        sig += ")";
+        switch (ref_qual) {
+        case 1:
+          sig += " &";
+          break;
+        case 2:
+          sig += " &&";
+          break;
+        }
+        if (db.names.empty())
+          return first;
+        db.names.back().first += " ";
+        db.names.back().second.insert(0, sig);
+        first = t;
+      }
+    }
+  }
+  return first;
+}
+
+// <pointer-to-member-type> ::= M <class type> <member type>
+
+template <class C>
+static const char *parse_pointer_to_member_type(const char *first,
+                                                const char *last, C &db) {
+  if (first != last && *first == 'M') {
+    const char *t = parse_type(first + 1, last, db);
+    if (t != first + 1) {
+      const char *t2 = parse_type(t, last, db);
+      if (t2 != t) {
+        if (db.names.size() < 2)
+          return first;
+        auto func = std::move(db.names.back());
+        db.names.pop_back();
+        auto class_type = std::move(db.names.back());
+        if (!func.second.empty() && func.second.front() == '(') {
+          db.names.back().first =
+              std::move(func.first) + "(" + class_type.move_full() + "::*";
+          db.names.back().second = ")" + std::move(func.second);
+        } else {
+          db.names.back().first =
+              std::move(func.first) + " " + class_type.move_full() + "::*";
+          db.names.back().second = std::move(func.second);
+        }
+        first = t2;
+      }
+    }
+  }
+  return first;
+}
+
+// <array-type> ::= A <positive dimension number> _ <element type>
+//              ::= A [<dimension expression>] _ <element type>
+
+template <class C>
+static const char *parse_array_type(const char *first, const char *last,
+                                    C &db) {
+  if (first != last && *first == 'A' && first + 1 != last) {
+    if (first[1] == '_') {
+      const char *t = parse_type(first + 2, last, db);
+      if (t != first + 2) {
+        if (db.names.empty())
+          return first;
+        if (db.names.back().second.substr(0, 2) == " [")
+          db.names.back().second.erase(0, 1);
+        db.names.back().second.insert(0, " []");
+        first = t;
+      }
+    } else if ('1' <= first[1] && first[1] <= '9') {
+      const char *t = parse_number(first + 1, last);
+      if (t != last && *t == '_') {
+        const char *t2 = parse_type(t + 1, last, db);
+        if (t2 != t + 1) {
+          if (db.names.empty())
+            return first;
+          if (db.names.back().second.substr(0, 2) == " [")
+            db.names.back().second.erase(0, 1);
+          db.names.back().second.insert(0,
+                                        " [" + std::string(first + 1, t) + "]");
+          first = t2;
+        }
+      }
+    } else {
+      const char *t = parse_expression(first + 1, last, db);
+      if (t != first + 1 && t != last && *t == '_') {
+        const char *t2 = parse_type(++t, last, db);
+        if (t2 != t) {
+          if (db.names.size() < 2)
+            return first;
+          auto type = std::move(db.names.back());
+          db.names.pop_back();
+          auto expr = std::move(db.names.back());
+          db.names.back().first = std::move(type.first);
+          if (type.second.substr(0, 2) == " [")
+            type.second.erase(0, 1);
+          db.names.back().second =
+              " [" + expr.move_full() + "]" + std::move(type.second);
+          first = t2;
+        }
+      }
+    }
+  }
+  return first;
+}
+
+// <decltype>  ::= Dt <expression> E  # decltype of an id-expression or class
+// member access (C++0x)
+//             ::= DT <expression> E  # decltype of an expression (C++0x)
+
+template <class C>
+static const char *parse_decltype(const char *first, const char *last, C &db) {
+  if (last - first >= 4 && first[0] == 'D') {
+    switch (first[1]) {
+    case 't':
+    case 'T': {
+      const char *t = parse_expression(first + 2, last, db);
+      if (t != first + 2 && t != last && *t == 'E') {
+        if (db.names.empty())
+          return first;
+        db.names.back() = "decltype(" + db.names.back().move_full() + ")";
+        first = t + 1;
+      }
+    } break;
+    }
+  }
+  return first;
+}
+
+// extension:
+// <vector-type>           ::= Dv <positive dimension number> _
+//                                    <extended element type>
+//                         ::= Dv [<dimension expression>] _ <element type>
+// <extended element type> ::= <element type>
+//                         ::= p # AltiVec vector pixel
+
+template <class C>
+static const char *parse_vector_type(const char *first, const char *last,
+                                     C &db) {
+  if (last - first > 3 && first[0] == 'D' && first[1] == 'v') {
+    if ('1' <= first[2] && first[2] <= '9') {
+      const char *t = parse_number(first + 2, last);
+      if (t == last || *t != '_')
+        return first;
+      const char *num = first + 2;
+      size_t sz = static_cast<size_t>(t - num);
+      if (++t != last) {
+        if (*t != 'p') {
+          const char *t1 = parse_type(t, last, db);
+          if (t1 != t) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first += " vector[" + std::string(num, sz) + "]";
+            first = t1;
+          }
+        } else {
+          ++t;
+          db.names.push_back("pixel vector[" + std::string(num, sz) + "]");
+          first = t;
+        }
+      }
+    } else {
+      std::string num;
+      const char *t1 = first + 2;
+      if (*t1 != '_') {
+        const char *t = parse_expression(t1, last, db);
+        if (t != t1) {
+          if (db.names.empty())
+            return first;
+          num = db.names.back().move_full();
+          db.names.pop_back();
+          t1 = t;
+        }
+      }
+      if (t1 != last && *t1 == '_' && ++t1 != last) {
+        const char *t = parse_type(t1, last, db);
+        if (t != t1) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first += " vector[" + num + "]";
+          first = t;
+        }
+      }
+    }
+  }
+  return first;
+}
+
+// <type> ::= <builtin-type>
+//        ::= <function-type>
+//        ::= <class-enum-type>
+//        ::= <array-type>
+//        ::= <pointer-to-member-type>
+//        ::= <template-param>
+//        ::= <template-template-param> <template-args>
+//        ::= <decltype>
+//        ::= <substitution>
+//        ::= <CV-qualifiers> <type>
+//        ::= P <type>        # pointer-to
+//        ::= R <type>        # reference-to
+//        ::= O <type>        # rvalue reference-to (C++0x)
+//        ::= C <type>        # complex pair (C 2000)
+//        ::= G <type>        # imaginary (C 2000)
+//        ::= Dp <type>       # pack expansion (C++0x)
+//        ::= U <source-name> <type>  # vendor extended type qualifier
+// extension := U <objc-name> <objc-type>  # objc-type<identifier>
+// extension := <vector-type> # <vector-type> starts with Dv
+
+// <objc-name> ::= <k0 number> objcproto <k1 number> <identifier>  # k0 = 9 +
+// <number of digits in k1> + k1
+// <objc-type> := <source-name>  # PU<11+>objcproto 11objc_object<source-name>
+// 11objc_object -> id<source-name>
+
+template <class C>
+static const char *parse_type(const char *first, const char *last, C &db) {
+  if (first != last) {
+    switch (*first) {
+    case 'r':
+    case 'V':
+    case 'K': {
+      unsigned cv = 0;
+      const char *t = parse_cv_qualifiers(first, last, cv);
+      if (t != first) {
+        bool is_function = *t == 'F';
+        size_t k0 = db.names.size();
+        const char *t1 = parse_type(t, last, db);
+        size_t k1 = db.names.size();
+        if (t1 != t) {
+          if (is_function)
+            db.subs.pop_back();
+          db.subs.emplace_back();
+          for (size_t k = k0; k < k1; ++k) {
+            if (is_function) {
+              size_t p = db.names[k].second.size();
+              if (db.names[k].second[p - 2] == '&')
+                p -= 3;
+              else if (db.names[k].second.back() == '&')
+                p -= 2;
+              if (cv & 1) {
+                db.names[k].second.insert(p, " const");
+                p += 6;
+              }
+              if (cv & 2) {
+                db.names[k].second.insert(p, " volatile");
+                p += 9;
+              }
+              if (cv & 4)
+                db.names[k].second.insert(p, " restrict");
+            } else {
+              if (cv & 1)
+                db.names[k].first.append(" const");
+              if (cv & 2)
+                db.names[k].first.append(" volatile");
+              if (cv & 4)
+                db.names[k].first.append(" restrict");
+            }
+            db.subs.back().push_back(db.names[k]);
+          }
+          first = t1;
+        }
+      }
+    } break;
+    default: {
+      const char *t = parse_builtin_type(first, last, db);
+      if (t != first) {
+        first = t;
+      } else {
+        switch (*first) {
+        case 'A':
+          t = parse_array_type(first, last, db);
+          if (t != first) {
+            if (db.names.empty())
+              return first;
+            first = t;
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          }
+          break;
+        case 'C':
+          t = parse_type(first + 1, last, db);
+          if (t != first + 1) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first.append(" complex");
+            first = t;
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          }
+          break;
+        case 'F':
+          t = parse_function_type(first, last, db);
+          if (t != first) {
+            if (db.names.empty())
+              return first;
+            first = t;
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          }
+          break;
+        case 'G':
+          t = parse_type(first + 1, last, db);
+          if (t != first + 1) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first.append(" imaginary");
+            first = t;
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          }
+          break;
+        case 'M':
+          t = parse_pointer_to_member_type(first, last, db);
+          if (t != first) {
+            if (db.names.empty())
+              return first;
+            first = t;
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          }
+          break;
+        case 'O': {
+          size_t k0 = db.names.size();
+          t = parse_type(first + 1, last, db);
+          size_t k1 = db.names.size();
+          if (t != first + 1) {
+            db.subs.emplace_back();
+            for (size_t k = k0; k < k1; ++k) {
+              if (db.names[k].second.substr(0, 2) == " [") {
+                db.names[k].first += " (";
+                db.names[k].second.insert(0, ")");
+              } else if (!db.names[k].second.empty() &&
+                         db.names[k].second.front() == '(') {
+                db.names[k].first += "(";
+                db.names[k].second.insert(0, ")");
+              }
+              db.names[k].first.append("&&");
+              db.subs.back().push_back(db.names[k]);
+            }
+            first = t;
+          }
+          break;
+        }
+        case 'P': {
+          size_t k0 = db.names.size();
+          t = parse_type(first + 1, last, db);
+          size_t k1 = db.names.size();
+          if (t != first + 1) {
+            db.subs.emplace_back();
+            for (size_t k = k0; k < k1; ++k) {
+              if (db.names[k].second.substr(0, 2) == " [") {
+                db.names[k].first += " (";
+                db.names[k].second.insert(0, ")");
+              } else if (!db.names[k].second.empty() &&
+                         db.names[k].second.front() == '(') {
+                db.names[k].first += "(";
+                db.names[k].second.insert(0, ")");
+              }
+              if (first[1] != 'U' ||
+                  db.names[k].first.substr(0, 12) != "objc_object<") {
+                db.names[k].first.append("*");
+              } else {
+                db.names[k].first.replace(0, 11, "id");
+              }
+              db.subs.back().push_back(db.names[k]);
+            }
+            first = t;
+          }
+          break;
+        }
+        case 'R': {
+          size_t k0 = db.names.size();
+          t = parse_type(first + 1, last, db);
+          size_t k1 = db.names.size();
+          if (t != first + 1) {
+            db.subs.emplace_back();
+            for (size_t k = k0; k < k1; ++k) {
+              if (db.names[k].second.substr(0, 2) == " [") {
+                db.names[k].first += " (";
+                db.names[k].second.insert(0, ")");
+              } else if (!db.names[k].second.empty() &&
+                         db.names[k].second.front() == '(') {
+                db.names[k].first += "(";
+                db.names[k].second.insert(0, ")");
+              }
+              db.names[k].first.append("&");
+              db.subs.back().push_back(db.names[k]);
+            }
+            first = t;
+          }
+          break;
+        }
+        case 'T': {
+          size_t k0 = db.names.size();
+          t = parse_template_param(first, last, db);
+          size_t k1 = db.names.size();
+          if (t != first) {
+            db.subs.emplace_back();
+            for (size_t k = k0; k < k1; ++k)
+              db.subs.back().push_back(db.names[k]);
+            if (db.try_to_parse_template_args && k1 == k0 + 1) {
+              const char *t1 = parse_template_args(t, last, db);
+              if (t1 != t) {
+                auto args = db.names.back().move_full();
+                db.names.pop_back();
+                db.names.back().first += std::move(args);
+                db.subs.push_back(typename C::sub_type(1, db.names.back()));
+                t = t1;
+              }
+            }
+            first = t;
+          }
+          break;
+        }
+        case 'U':
+          if (first + 1 != last) {
+            t = parse_source_name(first + 1, last, db);
+            if (t != first + 1) {
+              const char *t2 = parse_type(t, last, db);
+              if (t2 != t) {
+                if (db.names.size() < 2)
+                  return first;
+                auto type = db.names.back().move_full();
+                db.names.pop_back();
+                if (db.names.back().first.substr(0, 9) != "objcproto") {
+                  db.names.back() = type + " " + db.names.back().move_full();
+                } else {
+                  auto proto = db.names.back().move_full();
+                  db.names.pop_back();
+                  t = parse_source_name(proto.data() + 9,
+                                        proto.data() + proto.size(), db);
+                  if (t != proto.data() + 9) {
+                    db.names.back() =
+                        type + "<" + db.names.back().move_full() + ">";
+                  } else {
+                    db.names.push_back(type + " " + proto);
+                  }
+                }
+                db.subs.push_back(typename C::sub_type(1, db.names.back()));
+                first = t2;
+              }
+            }
+          }
+          break;
+        case 'S':
+          if (first + 1 != last && first[1] == 't') {
+            t = parse_name(first, last, db);
+            if (t != first) {
+              if (db.names.empty())
+                return first;
+              db.subs.push_back(typename C::sub_type(1, db.names.back()));
+              first = t;
+            }
+          } else {
+            t = parse_substitution(first, last, db);
+            if (t != first) {
+              first = t;
+              // Parsed a substitution.  If the substitution is a
+              //  <template-param> it might be followed by <template-args>.
+              t = parse_template_args(first, last, db);
+              if (t != first) {
+                if (db.names.size() < 2)
+                  return first;
+                auto template_args = db.names.back().move_full();
+                db.names.pop_back();
+                db.names.back().first += template_args;
+                // Need to create substitution for <template-template-param>
+                // <template-args>
+                db.subs.push_back(typename C::sub_type(1, db.names.back()));
+                first = t;
+              }
+            }
+          }
+          break;
+        case 'D':
+          if (first + 1 != last) {
+            switch (first[1]) {
+            case 'p': {
+              size_t k0 = db.names.size();
+              t = parse_type(first + 2, last, db);
+              size_t k1 = db.names.size();
+              if (t != first + 2) {
+                db.subs.emplace_back();
+                for (size_t k = k0; k < k1; ++k)
+                  db.subs.back().push_back(db.names[k]);
+                first = t;
+                return first;
+              }
+              break;
+            }
+            case 't':
+            case 'T':
+              t = parse_decltype(first, last, db);
+              if (t != first) {
+                if (db.names.empty())
+                  return first;
+                db.subs.push_back(typename C::sub_type(1, db.names.back()));
+                first = t;
+                return first;
+              }
+              break;
+            case 'v':
+              t = parse_vector_type(first, last, db);
+              if (t != first) {
+                if (db.names.empty())
+                  return first;
+                db.subs.push_back(typename C::sub_type(1, db.names.back()));
+                first = t;
+                return first;
+              }
+              break;
+            }
+          }
+        // drop through
+        default:
+          // must check for builtin-types before class-enum-types to avoid
+          // ambiguities with operator-names
+          t = parse_builtin_type(first, last, db);
+          if (t != first) {
+            first = t;
+          } else {
+            t = parse_name(first, last, db);
+            if (t != first) {
+              if (db.names.empty())
+                return first;
+              db.subs.push_back(typename C::sub_type(1, db.names.back()));
+              first = t;
+            }
+          }
+          break;
+        }
+      }
+      break;
+    }
+    }
+  }
+  return first;
+}
+
+//   <operator-name>
+//                   ::= aa    # &&
+//                   ::= ad    # & (unary)
+//                   ::= an    # &
+//                   ::= aN    # &=
+//                   ::= aS    # =
+//                   ::= cl    # ()
+//                   ::= cm    # ,
+//                   ::= co    # ~
+//                   ::= cv <type>    # (cast)
+//                   ::= da    # delete[]
+//                   ::= de    # * (unary)
+//                   ::= dl    # delete
+//                   ::= dv    # /
+//                   ::= dV    # /=
+//                   ::= eo    # ^
+//                   ::= eO    # ^=
+//                   ::= eq    # ==
+//                   ::= ge    # >=
+//                   ::= gt    # >
+//                   ::= ix    # []
+//                   ::= le    # <=
+//                   ::= li <source-name>  # operator ""
+//                   ::= ls    # <<
+//                   ::= lS    # <<=
+//                   ::= lt    # <
+//                   ::= mi    # -
+//                   ::= mI    # -=
+//                   ::= ml    # *
+//                   ::= mL    # *=
+//                   ::= mm    # -- (postfix in <expression> context)
+//                   ::= na    # new[]
+//                   ::= ne    # !=
+//                   ::= ng    # - (unary)
+//                   ::= nt    # !
+//                   ::= nw    # new
+//                   ::= oo    # ||
+//                   ::= or    # |
+//                   ::= oR    # |=
+//                   ::= pm    # ->*
+//                   ::= pl    # +
+//                   ::= pL    # +=
+//                   ::= pp    # ++ (postfix in <expression> context)
+//                   ::= ps    # + (unary)
+//                   ::= pt    # ->
+//                   ::= qu    # ?
+//                   ::= rm    # %
+//                   ::= rM    # %=
+//                   ::= rs    # >>
+//                   ::= rS    # >>=
+//                   ::= v <digit> <source-name>        # vendor extended
+//                   operator
+
+template <class C>
+static const char *parse_operator_name(const char *first, const char *last,
+                                       C &db) {
+  if (last - first >= 2) {
+    switch (first[0]) {
+    case 'a':
+      switch (first[1]) {
+      case 'a':
+        db.names.push_back("operator&&");
+        first += 2;
+        break;
+      case 'd':
+      case 'n':
+        db.names.push_back("operator&");
+        first += 2;
+        break;
+      case 'N':
+        db.names.push_back("operator&=");
+        first += 2;
+        break;
+      case 'S':
+        db.names.push_back("operator=");
+        first += 2;
+        break;
+      }
+      break;
+    case 'c':
+      switch (first[1]) {
+      case 'l':
+        db.names.push_back("operator()");
+        first += 2;
+        break;
+      case 'm':
+        db.names.push_back("operator,");
+        first += 2;
+        break;
+      case 'o':
+        db.names.push_back("operator~");
+        first += 2;
+        break;
+      case 'v': {
+        bool try_to_parse_template_args = db.try_to_parse_template_args;
+        db.try_to_parse_template_args = false;
+        const char *t = parse_type(first + 2, last, db);
+        db.try_to_parse_template_args = try_to_parse_template_args;
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "operator ");
+          db.parsed_ctor_dtor_cv = true;
+          first = t;
+        }
+      } break;
+      }
+      break;
+    case 'd':
+      switch (first[1]) {
+      case 'a':
+        db.names.push_back("operator delete[]");
+        first += 2;
+        break;
+      case 'e':
+        db.names.push_back("operator*");
+        first += 2;
+        break;
+      case 'l':
+        db.names.push_back("operator delete");
+        first += 2;
+        break;
+      case 'v':
+        db.names.push_back("operator/");
+        first += 2;
+        break;
+      case 'V':
+        db.names.push_back("operator/=");
+        first += 2;
+        break;
+      }
+      break;
+    case 'e':
+      switch (first[1]) {
+      case 'o':
+        db.names.push_back("operator^");
+        first += 2;
+        break;
+      case 'O':
+        db.names.push_back("operator^=");
+        first += 2;
+        break;
+      case 'q':
+        db.names.push_back("operator==");
+        first += 2;
+        break;
+      }
+      break;
+    case 'g':
+      switch (first[1]) {
+      case 'e':
+        db.names.push_back("operator>=");
+        first += 2;
+        break;
+      case 't':
+        db.names.push_back("operator>");
+        first += 2;
+        break;
+      }
+      break;
+    case 'i':
+      if (first[1] == 'x') {
+        db.names.push_back("operator[]");
+        first += 2;
+      }
+      break;
+    case 'l':
+      switch (first[1]) {
+      case 'e':
+        db.names.push_back("operator<=");
+        first += 2;
+        break;
+      case 'i': {
+        const char *t = parse_source_name(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "operator\"\" ");
+          first = t;
+        }
+      } break;
+      case 's':
+        db.names.push_back("operator<<");
+        first += 2;
+        break;
+      case 'S':
+        db.names.push_back("operator<<=");
+        first += 2;
+        break;
+      case 't':
+        db.names.push_back("operator<");
+        first += 2;
+        break;
+      }
+      break;
+    case 'm':
+      switch (first[1]) {
+      case 'i':
+        db.names.push_back("operator-");
+        first += 2;
+        break;
+      case 'I':
+        db.names.push_back("operator-=");
+        first += 2;
+        break;
+      case 'l':
+        db.names.push_back("operator*");
+        first += 2;
+        break;
+      case 'L':
+        db.names.push_back("operator*=");
+        first += 2;
+        break;
+      case 'm':
+        db.names.push_back("operator--");
+        first += 2;
+        break;
+      }
+      break;
+    case 'n':
+      switch (first[1]) {
+      case 'a':
+        db.names.push_back("operator new[]");
+        first += 2;
+        break;
+      case 'e':
+        db.names.push_back("operator!=");
+        first += 2;
+        break;
+      case 'g':
+        db.names.push_back("operator-");
+        first += 2;
+        break;
+      case 't':
+        db.names.push_back("operator!");
+        first += 2;
+        break;
+      case 'w':
+        db.names.push_back("operator new");
+        first += 2;
+        break;
+      }
+      break;
+    case 'o':
+      switch (first[1]) {
+      case 'o':
+        db.names.push_back("operator||");
+        first += 2;
+        break;
+      case 'r':
+        db.names.push_back("operator|");
+        first += 2;
+        break;
+      case 'R':
+        db.names.push_back("operator|=");
+        first += 2;
+        break;
+      }
+      break;
+    case 'p':
+      switch (first[1]) {
+      case 'm':
+        db.names.push_back("operator->*");
+        first += 2;
+        break;
+      case 'l':
+        db.names.push_back("operator+");
+        first += 2;
+        break;
+      case 'L':
+        db.names.push_back("operator+=");
+        first += 2;
+        break;
+      case 'p':
+        db.names.push_back("operator++");
+        first += 2;
+        break;
+      case 's':
+        db.names.push_back("operator+");
+        first += 2;
+        break;
+      case 't':
+        db.names.push_back("operator->");
+        first += 2;
+        break;
+      }
+      break;
+    case 'q':
+      if (first[1] == 'u') {
+        db.names.push_back("operator?");
+        first += 2;
+      }
+      break;
+    case 'r':
+      switch (first[1]) {
+      case 'm':
+        db.names.push_back("operator%");
+        first += 2;
+        break;
+      case 'M':
+        db.names.push_back("operator%=");
+        first += 2;
+        break;
+      case 's':
+        db.names.push_back("operator>>");
+        first += 2;
+        break;
+      case 'S':
+        db.names.push_back("operator>>=");
+        first += 2;
+        break;
+      }
+      break;
+    case 'v':
+      if (std::isdigit(first[1])) {
+        const char *t = parse_source_name(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "operator ");
+          first = t;
+        }
+      }
+      break;
+    }
+  }
+  return first;
+}
+
+template <class C>
+static const char *parse_integer_literal(const char *first, const char *last,
+                                         const std::string &lit, C &db) {
+  const char *t = parse_number(first, last);
+  if (t != first && t != last && *t == 'E') {
+    if (lit.size() > 3)
+      db.names.push_back("(" + lit + ")");
+    else
+      db.names.emplace_back();
+    if (*first == 'n') {
+      db.names.back().first += '-';
+      ++first;
+    }
+    db.names.back().first.append(first, t);
+    if (lit.size() <= 3)
+      db.names.back().first += lit;
+    first = t + 1;
+  }
+  return first;
+}
+
+// <expr-primary> ::= L <type> <value number> E                          #
+// integer literal
+//                ::= L <type> <value float> E                           #
+//                floating literal
+//                ::= L <string type> E                                  #
+//                string literal
+//                ::= L <nullptr type> E                                 #
+//                nullptr literal (i.e., "LDnE")
+//                ::= L <type> <real-part float> _ <imag-part float> E   #
+//                complex floating point literal (C 2000)
+//                ::= L <mangled-name> E                                 #
+//                external name
+
+template <class C>
+static const char *parse_expr_primary(const char *first, const char *last,
+                                      C &db) {
+  if (last - first >= 4 && *first == 'L') {
+    switch (first[1]) {
+    case 'w': {
+      const char *t = parse_integer_literal(first + 2, last, "wchar_t", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'b':
+      if (first[3] == 'E') {
+        switch (first[2]) {
+        case '0':
+          db.names.push_back("false");
+          first += 4;
+          break;
+        case '1':
+          db.names.push_back("true");
+          first += 4;
+          break;
+        }
+      }
+      break;
+    case 'c': {
+      const char *t = parse_integer_literal(first + 2, last, "char", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'a': {
+      const char *t = parse_integer_literal(first + 2, last, "signed char", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'h': {
+      const char *t =
+          parse_integer_literal(first + 2, last, "unsigned char", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 's': {
+      const char *t = parse_integer_literal(first + 2, last, "short", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 't': {
+      const char *t =
+          parse_integer_literal(first + 2, last, "unsigned short", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'i': {
+      const char *t = parse_integer_literal(first + 2, last, "", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'j': {
+      const char *t = parse_integer_literal(first + 2, last, "u", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'l': {
+      const char *t = parse_integer_literal(first + 2, last, "l", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'm': {
+      const char *t = parse_integer_literal(first + 2, last, "ul", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'x': {
+      const char *t = parse_integer_literal(first + 2, last, "ll", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'y': {
+      const char *t = parse_integer_literal(first + 2, last, "ull", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'n': {
+      const char *t = parse_integer_literal(first + 2, last, "__int128", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'o': {
+      const char *t =
+          parse_integer_literal(first + 2, last, "unsigned __int128", db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'f': {
+      const char *t = parse_floating_number<float>(first + 2, last, db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'd': {
+      const char *t = parse_floating_number<double>(first + 2, last, db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case 'e': {
+      const char *t = parse_floating_number<long double>(first + 2, last, db);
+      if (t != first + 2)
+        first = t;
+    } break;
+    case '_':
+      if (first[2] == 'Z') {
+        const char *t = parse_encoding(first + 3, last, db);
+        if (t != first + 3 && t != last && *t == 'E')
+          first = t + 1;
+      }
+      break;
+    case 'T':
+      // Invalid mangled name per
+      //   http://sourcerytools.com/pipermail/cxx-abi-dev/2011-August/002422.html
+      break;
+    default: {
+      // might be named type
+      const char *t = parse_type(first + 1, last, db);
+      if (t != first + 1 && t != last) {
+        if (*t != 'E') {
+          const char *n = t;
+          for (; n != last && isdigit(*n); ++n)
+            ;
+          if (n != t && n != last && *n == 'E') {
+            if (db.names.empty())
+              return first;
+            db.names.back() =
+                "(" + db.names.back().move_full() + ")" + std::string(t, n);
+            first = n + 1;
+            break;
+          }
+        } else {
+          first = t + 1;
+          break;
+        }
+      }
+    }
+    }
+  }
+  return first;
+}
+
+static std::string base_name(std::string &s) {
+  if (s.empty())
+    return s;
+  if (s == "std::string") {
+    s = "std::basic_string<char, std::char_traits<char>, std::allocator<char> "
+        ">";
+    return "basic_string";
+  }
+  if (s == "std::istream") {
+    s = "std::basic_istream<char, std::char_traits<char> >";
+    return "basic_istream";
+  }
+  if (s == "std::ostream") {
+    s = "std::basic_ostream<char, std::char_traits<char> >";
+    return "basic_ostream";
+  }
+  if (s == "std::iostream") {
+    s = "std::basic_iostream<char, std::char_traits<char> >";
+    return "basic_iostream";
+  }
+  const char *const pf = s.data();
+  const char *pe = pf + s.size();
+  if (pe[-1] == '>') {
+    unsigned c = 1;
+    while (true) {
+      if (--pe == pf)
+        return std::string();
+      if (pe[-1] == '<') {
+        if (--c == 0) {
+          --pe;
+          break;
+        }
+      } else if (pe[-1] == '>')
+        ++c;
+    }
+  }
+  if (pe - pf <= 1)
+    return std::string();
+  const char *p0 = pe - 1;
+  for (; p0 != pf; --p0) {
+    if (*p0 == ':') {
+      ++p0;
+      break;
+    }
+  }
+  return std::string(p0, pe);
+}
+
+// <ctor-dtor-name> ::= C1    # complete object constructor
+//                  ::= C2    # base object constructor
+//                  ::= C3    # complete object allocating constructor
+//   extension      ::= C5    # ?
+//                  ::= D0    # deleting destructor
+//                  ::= D1    # complete object destructor
+//                  ::= D2    # base object destructor
+//   extension      ::= D5    # ?
+
+template <class C>
+static const char *parse_ctor_dtor_name(const char *first, const char *last,
+                                        C &db) {
+  if (last - first >= 2 && !db.names.empty()) {
+    switch (first[0]) {
+    case 'C':
+      switch (first[1]) {
+      case '1':
+      case '2':
+      case '3':
+      case '5':
+        if (db.names.empty())
+          return first;
+        db.names.push_back(base_name(db.names.back().first));
+        first += 2;
+        db.parsed_ctor_dtor_cv = true;
+        break;
+      }
+      break;
+    case 'D':
+      switch (first[1]) {
+      case '0':
+      case '1':
+      case '2':
+      case '5':
+        if (db.names.empty())
+          return first;
+        db.names.push_back("~" + base_name(db.names.back().first));
+        first += 2;
+        db.parsed_ctor_dtor_cv = true;
+        break;
+      }
+      break;
+    }
+  }
+  return first;
+}
+
+// <unnamed-type-name> ::= Ut [ <nonnegative number> ] _
+//                     ::= <closure-type-name>
+//
+// <closure-type-name> ::= Ul <lambda-sig> E [ <nonnegative number> ] _
+//
+// <lambda-sig> ::= <parameter type>+  # Parameter types or "v" if the lambda
+// has no parameters
+
+template <class C>
+static const char *parse_unnamed_type_name(const char *first, const char *last,
+                                           C &db) {
+  if (last - first > 2 && first[0] == 'U') {
+    char type = first[1];
+    switch (type) {
+    case 't': {
+      db.names.push_back(std::string("'unnamed"));
+      const char *t0 = first + 2;
+      if (t0 == last) {
+        db.names.pop_back();
+        return first;
+      }
+      if (std::isdigit(*t0)) {
+        const char *t1 = t0 + 1;
+        while (t1 != last && std::isdigit(*t1))
+          ++t1;
+        db.names.back().first.append(t0, t1);
+        t0 = t1;
+      }
+      db.names.back().first.push_back('\'');
+      if (t0 == last || *t0 != '_') {
+        db.names.pop_back();
+        return first;
+      }
+      first = t0 + 1;
+    } break;
+    case 'l': {
+      db.names.push_back(std::string("'lambda'("));
+      const char *t0 = first + 2;
+      if (first[2] == 'v') {
+        db.names.back().first += ')';
+        ++t0;
+      } else {
+        const char *t1 = parse_type(t0, last, db);
+        if (t1 == t0) {
+          if (!db.names.empty())
+            db.names.pop_back();
+          return first;
+        }
+        if (db.names.size() < 2)
+          return first;
+        auto tmp = db.names.back().move_full();
+        db.names.pop_back();
+        db.names.back().first.append(tmp);
+        t0 = t1;
+        while (true) {
+          t1 = parse_type(t0, last, db);
+          if (t1 == t0)
+            break;
+          if (db.names.size() < 2)
+            return first;
+          tmp = db.names.back().move_full();
+          db.names.pop_back();
+          if (!tmp.empty()) {
+            db.names.back().first.append(", ");
+            db.names.back().first.append(tmp);
+          }
+          t0 = t1;
+        }
+        if (db.names.empty())
+          return first;
+        db.names.back().first.append(")");
+      }
+      if (t0 == last || *t0 != 'E') {
+        if (!db.names.empty())
+          db.names.pop_back();
+        return first;
+      }
+      ++t0;
+      if (t0 == last) {
+        if (!db.names.empty())
+          db.names.pop_back();
+        return first;
+      }
+      if (std::isdigit(*t0)) {
+        const char *t1 = t0 + 1;
+        while (t1 != last && std::isdigit(*t1))
+          ++t1;
+        db.names.back().first.insert(db.names.back().first.begin() + 7, t0, t1);
+        t0 = t1;
+      }
+      if (t0 == last || *t0 != '_') {
+        if (!db.names.empty())
+          db.names.pop_back();
+        return first;
+      }
+      first = t0 + 1;
+    } break;
+    }
+  }
+  return first;
+}
+
+// <unqualified-name> ::= <operator-name>
+//                    ::= <ctor-dtor-name>
+//                    ::= <source-name>
+//                    ::= <unnamed-type-name>
+
+template <class C>
+static const char *parse_unqualified_name(const char *first, const char *last,
+                                          C &db) {
+  if (first != last) {
+    const char *t;
+    switch (*first) {
+    case 'C':
+    case 'D':
+      t = parse_ctor_dtor_name(first, last, db);
+      if (t != first)
+        first = t;
+      break;
+    case 'U':
+      t = parse_unnamed_type_name(first, last, db);
+      if (t != first)
+        first = t;
+      break;
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      t = parse_source_name(first, last, db);
+      if (t != first)
+        first = t;
+      break;
+    default:
+      t = parse_operator_name(first, last, db);
+      if (t != first)
+        first = t;
+      break;
+    };
+  }
+  return first;
+}
+
+// <unscoped-name> ::= <unqualified-name>
+//                 ::= St <unqualified-name>   # ::std::
+// extension       ::= StL<unqualified-name>
+
+template <class C>
+static const char *parse_unscoped_name(const char *first, const char *last,
+                                       C &db) {
+  if (last - first >= 2) {
+    const char *t0 = first;
+    bool St = false;
+    if (first[0] == 'S' && first[1] == 't') {
+      t0 += 2;
+      St = true;
+      if (t0 != last && *t0 == 'L')
+        ++t0;
+    }
+    const char *t1 = parse_unqualified_name(t0, last, db);
+    if (t1 != t0) {
+      if (St) {
+        if (db.names.empty())
+          return first;
+        db.names.back().first.insert(0, "std::");
+      }
+      first = t1;
+    }
+  }
+  return first;
+}
+
+// at <type>                                            # alignof (a type)
+
+template <class C>
+static const char *parse_alignof_type(const char *first, const char *last,
+                                      C &db) {
+  if (last - first >= 3 && first[0] == 'a' && first[1] == 't') {
+    const char *t = parse_type(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back().first = "alignof (" + db.names.back().move_full() + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+// az <expression>                                            # alignof (a
+// expression)
+
+template <class C>
+static const char *parse_alignof_expr(const char *first, const char *last,
+                                      C &db) {
+  if (last - first >= 3 && first[0] == 'a' && first[1] == 'z') {
+    const char *t = parse_expression(first + 2, last, db);
+    if (t != first + 2) {
+      if (db.names.empty())
+        return first;
+      db.names.back().first = "alignof (" + db.names.back().move_full() + ")";
+      first = t;
+    }
+  }
+  return first;
+}
+
+template <class C>
+static const char *parse_noexcept_expression(const char *first,
+                                             const char *last, C &db) {
+  const char *t1 = parse_expression(first, last, db);
+  if (t1 != first) {
+    if (db.names.empty())
+      return first;
+    db.names.back().first = "noexcept (" + db.names.back().move_full() + ")";
+    first = t1;
+  }
+  return first;
+}
+
+template <class C>
+static const char *parse_prefix_expression(const char *first, const char *last,
+                                           const std::string &op,
+                                           C &db) {
+  const char *t1 = parse_expression(first, last, db);
+  if (t1 != first) {
+    if (db.names.empty())
+      return first;
+    db.names.back().first = op + "(" + db.names.back().move_full() + ")";
+    first = t1;
+  }
+  return first;
+}
+
+template <class C>
+static const char *parse_binary_expression(const char *first, const char *last,
+                                           const std::string &op,
+                                           C &db) {
+  const char *t1 = parse_expression(first, last, db);
+  if (t1 != first) {
+    const char *t2 = parse_expression(t1, last, db);
+    if (t2 != t1) {
+      if (db.names.size() < 2)
+        return first;
+      auto op2 = db.names.back().move_full();
+      db.names.pop_back();
+      auto op1 = db.names.back().move_full();
+      auto &nm = db.names.back().first;
+      nm.clear();
+      if (op == ">")
+        nm += '(';
+      nm += "(" + op1 + ") " + op + " (" + op2 + ")";
+      if (op == ">")
+        nm += ')';
+      first = t2;
+    } else if (!db.names.empty())
+      db.names.pop_back();
+  }
+  return first;
+}
+
+// <expression> ::= <unary operator-name> <expression>
+//              ::= <binary operator-name> <expression> <expression>
+//              ::= <ternary operator-name> <expression> <expression>
+//              <expression>
+//              ::= cl <expression>+ E                                   # call
+//              ::= cv <type> <expression>                               #
+//              conversion with one argument
+//              ::= cv <type> _ <expression>* E                          #
+//              conversion with a different number of arguments
+//              ::= [gs] nw <expression>* _ <type> E                     # new
+//              (expr-list) type
+//              ::= [gs] nw <expression>* _ <type> <initializer>         # new
+//              (expr-list) type (init)
+//              ::= [gs] na <expression>* _ <type> E                     # new[]
+//              (expr-list) type
+//              ::= [gs] na <expression>* _ <type> <initializer>         # new[]
+//              (expr-list) type (init)
+//              ::= [gs] dl <expression>                                 #
+//              delete expression
+//              ::= [gs] da <expression>                                 #
+//              delete[] expression
+//              ::= pp_ <expression>                                     #
+//              prefix ++
+//              ::= mm_ <expression>                                     #
+//              prefix --
+//              ::= ti <type>                                            #
+//              typeid (type)
+//              ::= te <expression>                                      #
+//              typeid (expression)
+//              ::= dc <type> <expression>                               #
+//              dynamic_cast<type> (expression)
+//              ::= sc <type> <expression>                               #
+//              static_cast<type> (expression)
+//              ::= cc <type> <expression>                               #
+//              const_cast<type> (expression)
+//              ::= rc <type> <expression>                               #
+//              reinterpret_cast<type> (expression)
+//              ::= st <type>                                            #
+//              sizeof (a type)
+//              ::= sz <expression>                                      #
+//              sizeof (an expression)
+//              ::= at <type>                                            #
+//              alignof (a type)
+//              ::= az <expression>                                      #
+//              alignof (an expression)
+//              ::= nx <expression>                                      #
+//              noexcept (expression)
+//              ::= <template-param>
+//              ::= <function-param>
+//              ::= dt <expression> <unresolved-name>                    #
+//              expr.name
+//              ::= pt <expression> <unresolved-name>                    #
+//              expr->name
+//              ::= ds <expression> <expression>                         #
+//              expr.*expr
+//              ::= sZ <template-param>                                  # size
+//              of a parameter pack
+//              ::= sZ <function-param>                                  # size
+//              of a function parameter pack
+//              ::= sp <expression>                                      # pack
+//              expansion
+//              ::= tw <expression>                                      # throw
+//              expression
+//              ::= tr                                                   # throw
+//              with no operand (rethrow)
+//              ::= <unresolved-name>                                    # f(p),
+//              N::f(p), ::f(p),
+//                                                                       #
+//                                                                       freestanding
+//                                                                       dependent
+//                                                                       name
+//                                                                       (e.g.,
+//                                                                       T::x),
+//                                                                       #
+//                                                                       objectless
+//                                                                       nonstatic
+//                                                                       member
+//                                                                       reference
+//              ::= <expr-primary>
+
+template <class C>
+static const char *parse_expression(const char *first, const char *last,
+                                    C &db) {
+  if (last - first >= 2) {
+    const char *t = first;
+    bool parsed_gs = false;
+    if (last - first >= 4 && t[0] == 'g' && t[1] == 's') {
+      t += 2;
+      parsed_gs = true;
+    }
+    switch (*t) {
+    case 'L':
+      first = parse_expr_primary(first, last, db);
+      break;
+    case 'T':
+      first = parse_template_param(first, last, db);
+      break;
+    case 'f':
+      first = parse_function_param(first, last, db);
+      break;
+    case 'a':
+      switch (t[1]) {
+      case 'a':
+        t = parse_binary_expression(first + 2, last, "&&", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'd':
+        t = parse_prefix_expression(first + 2, last, "&", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'n':
+        t = parse_binary_expression(first + 2, last, "&", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'N':
+        t = parse_binary_expression(first + 2, last, "&=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'S':
+        t = parse_binary_expression(first + 2, last, "=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 't':
+        first = parse_alignof_type(first, last, db);
+        break;
+      case 'z':
+        first = parse_alignof_expr(first, last, db);
+        break;
+      }
+      break;
+    case 'c':
+      switch (t[1]) {
+      case 'c':
+        first = parse_const_cast_expr(first, last, db);
+        break;
+      case 'l':
+        first = parse_call_expr(first, last, db);
+        break;
+      case 'm':
+        t = parse_binary_expression(first + 2, last, ",", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'o':
+        t = parse_prefix_expression(first + 2, last, "~", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'v':
+        first = parse_conversion_expr(first, last, db);
+        break;
+      }
+      break;
+    case 'd':
+      switch (t[1]) {
+      case 'a': {
+        const char *t1 = parse_expression(t + 2, last, db);
+        if (t1 != t + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first =
+              (parsed_gs ? std::string("::") : std::string()) + "delete[] " +
+              db.names.back().move_full();
+          first = t1;
+        }
+      } break;
+      case 'c':
+        first = parse_dynamic_cast_expr(first, last, db);
+        break;
+      case 'e':
+        t = parse_prefix_expression(first + 2, last, "*", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'l': {
+        const char *t1 = parse_expression(t + 2, last, db);
+        if (t1 != t + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first =
+              (parsed_gs ? std::string("::") : std::string()) + "delete " +
+              db.names.back().move_full();
+          first = t1;
+        }
+      } break;
+      case 'n':
+        return parse_unresolved_name(first, last, db);
+      case 's':
+        first = parse_dot_star_expr(first, last, db);
+        break;
+      case 't':
+        first = parse_dot_expr(first, last, db);
+        break;
+      case 'v':
+        t = parse_binary_expression(first + 2, last, "/", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'V':
+        t = parse_binary_expression(first + 2, last, "/=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 'e':
+      switch (t[1]) {
+      case 'o':
+        t = parse_binary_expression(first + 2, last, "^", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'O':
+        t = parse_binary_expression(first + 2, last, "^=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'q':
+        t = parse_binary_expression(first + 2, last, "==", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 'g':
+      switch (t[1]) {
+      case 'e':
+        t = parse_binary_expression(first + 2, last, ">=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 't':
+        t = parse_binary_expression(first + 2, last, ">", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 'i':
+      if (t[1] == 'x') {
+        const char *t1 = parse_expression(first + 2, last, db);
+        if (t1 != first + 2) {
+          const char *t2 = parse_expression(t1, last, db);
+          if (t2 != t1) {
+            if (db.names.size() < 2)
+              return first;
+            auto op2 = db.names.back().move_full();
+            db.names.pop_back();
+            auto op1 = db.names.back().move_full();
+            db.names.back() = "(" + op1 + ")[" + op2 + "]";
+            first = t2;
+          } else if (!db.names.empty())
+            db.names.pop_back();
+        }
+      }
+      break;
+    case 'l':
+      switch (t[1]) {
+      case 'e':
+        t = parse_binary_expression(first + 2, last, "<=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 's':
+        t = parse_binary_expression(first + 2, last, "<<", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'S':
+        t = parse_binary_expression(first + 2, last, "<<=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 't':
+        t = parse_binary_expression(first + 2, last, "<", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 'm':
+      switch (t[1]) {
+      case 'i':
+        t = parse_binary_expression(first + 2, last, "-", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'I':
+        t = parse_binary_expression(first + 2, last, "-=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'l':
+        t = parse_binary_expression(first + 2, last, "*", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'L':
+        t = parse_binary_expression(first + 2, last, "*=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'm':
+        if (first + 2 != last && first[2] == '_') {
+          t = parse_prefix_expression(first + 3, last, "--", db);
+          if (t != first + 3)
+            first = t;
+        } else {
+          const char *t1 = parse_expression(first + 2, last, db);
+          if (t1 != first + 2) {
+            if (db.names.empty())
+              return first;
+            db.names.back() = "(" + db.names.back().move_full() + ")--";
+            first = t1;
+          }
+        }
+        break;
+      }
+      break;
+    case 'n':
+      switch (t[1]) {
+      case 'a':
+      case 'w':
+        first = parse_new_expr(first, last, db);
+        break;
+      case 'e':
+        t = parse_binary_expression(first + 2, last, "!=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'g':
+        t = parse_prefix_expression(first + 2, last, "-", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 't':
+        t = parse_prefix_expression(first + 2, last, "!", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'x':
+        t = parse_noexcept_expression(first + 2, last, db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 'o':
+      switch (t[1]) {
+      case 'n':
+        return parse_unresolved_name(first, last, db);
+      case 'o':
+        t = parse_binary_expression(first + 2, last, "||", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'r':
+        t = parse_binary_expression(first + 2, last, "|", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'R':
+        t = parse_binary_expression(first + 2, last, "|=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 'p':
+      switch (t[1]) {
+      case 'm':
+        t = parse_binary_expression(first + 2, last, "->*", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'l':
+        t = parse_binary_expression(first + 2, last, "+", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'L':
+        t = parse_binary_expression(first + 2, last, "+=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'p':
+        if (first + 2 != last && first[2] == '_') {
+          t = parse_prefix_expression(first + 3, last, "++", db);
+          if (t != first + 3)
+            first = t;
+        } else {
+          const char *t1 = parse_expression(first + 2, last, db);
+          if (t1 != first + 2) {
+            if (db.names.empty())
+              return first;
+            db.names.back() = "(" + db.names.back().move_full() + ")++";
+            first = t1;
+          }
+        }
+        break;
+      case 's':
+        t = parse_prefix_expression(first + 2, last, "+", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 't':
+        first = parse_arrow_expr(first, last, db);
+        break;
+      }
+      break;
+    case 'q':
+      if (t[1] == 'u') {
+        const char *t1 = parse_expression(first + 2, last, db);
+        if (t1 != first + 2) {
+          const char *t2 = parse_expression(t1, last, db);
+          if (t2 != t1) {
+            const char *t3 = parse_expression(t2, last, db);
+            if (t3 != t2) {
+              if (db.names.size() < 3)
+                return first;
+              auto op3 = db.names.back().move_full();
+              db.names.pop_back();
+              auto op2 = db.names.back().move_full();
+              db.names.pop_back();
+              auto op1 = db.names.back().move_full();
+              db.names.back() = "(" + op1 + ") ? (" + op2 + ") : (" + op3 + ")";
+              first = t3;
+            } else {
+              if (db.names.size() < 2)
+                return first;
+              db.names.pop_back();
+              db.names.pop_back();
+            }
+          } else if (!db.names.empty())
+            db.names.pop_back();
+        }
+      }
+      break;
+    case 'r':
+      switch (t[1]) {
+      case 'c':
+        first = parse_reinterpret_cast_expr(first, last, db);
+        break;
+      case 'm':
+        t = parse_binary_expression(first + 2, last, "%", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'M':
+        t = parse_binary_expression(first + 2, last, "%=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 's':
+        t = parse_binary_expression(first + 2, last, ">>", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      case 'S':
+        t = parse_binary_expression(first + 2, last, ">>=", db);
+        if (t != first + 2)
+          first = t;
+        break;
+      }
+      break;
+    case 's':
+      switch (t[1]) {
+      case 'c':
+        first = parse_static_cast_expr(first, last, db);
+        break;
+      case 'p':
+        first = parse_pack_expansion(first, last, db);
+        break;
+      case 'r':
+        return parse_unresolved_name(first, last, db);
+      case 't':
+        first = parse_sizeof_type_expr(first, last, db);
+        break;
+      case 'z':
+        first = parse_sizeof_expr_expr(first, last, db);
+        break;
+      case 'Z':
+        if (last - t >= 3) {
+          switch (t[2]) {
+          case 'T':
+            first = parse_sizeof_param_pack_expr(first, last, db);
+            break;
+          case 'f':
+            first = parse_sizeof_function_param_pack_expr(first, last, db);
+            break;
+          }
+        }
+        break;
+      }
+      break;
+    case 't':
+      switch (t[1]) {
+      case 'e':
+      case 'i':
+        first = parse_typeid_expr(first, last, db);
+        break;
+      case 'r':
+        db.names.push_back("throw");
+        first += 2;
+        break;
+      case 'w':
+        first = parse_throw_expr(first, last, db);
+        break;
+      }
+      break;
+    case '1':
+    case '2':
+    case '3':
+    case '4':
+    case '5':
+    case '6':
+    case '7':
+    case '8':
+    case '9':
+      return parse_unresolved_name(first, last, db);
+    }
+  }
+  return first;
+}
+
+// <template-arg> ::= <type>                                             # type
+// or template
+//                ::= X <expression> E                                   #
+//                expression
+//                ::= <expr-primary>                                     #
+//                simple expressions
+//                ::= J <template-arg>* E                                #
+//                argument pack
+//                ::= LZ <encoding> E                                    #
+//                extension
+
+template <class C>
+static const char *parse_template_arg(const char *first, const char *last,
+                                      C &db) {
+  if (first != last) {
+    const char *t;
+    switch (*first) {
+    case 'X':
+      t = parse_expression(first + 1, last, db);
+      if (t != first + 1) {
+        if (t != last && *t == 'E')
+          first = t + 1;
+      }
+      break;
+    case 'J':
+      t = first + 1;
+      if (t == last)
+        return first;
+      while (*t != 'E') {
+        const char *t1 = parse_template_arg(t, last, db);
+        if (t1 == t)
+          return first;
+        t = t1;
+      }
+      first = t + 1;
+      break;
+    case 'L':
+      // <expr-primary> or LZ <encoding> E
+      if (first + 1 != last && first[1] == 'Z') {
+        t = parse_encoding(first + 2, last, db);
+        if (t != first + 2 && t != last && *t == 'E')
+          first = t + 1;
+      } else
+        first = parse_expr_primary(first, last, db);
+      break;
+    default:
+      // <type>
+      first = parse_type(first, last, db);
+      break;
+    }
+  }
+  return first;
+}
+
+// <template-args> ::= I <template-arg>* E
+//     extension, the abi says <template-arg>+
+
+template <class C>
+static const char *parse_template_args(const char *first, const char *last,
+                                       C &db) {
+  if (last - first >= 2 && *first == 'I') {
+    if (db.tag_templates)
+      db.template_param.back().clear();
+    const char *t = first + 1;
+    std::string args("<");
+    while (*t != 'E') {
+      if (db.tag_templates)
+        db.template_param.emplace_back();
+      size_t k0 = db.names.size();
+      const char *t1 = parse_template_arg(t, last, db);
+      size_t k1 = db.names.size();
+      if (db.tag_templates)
+        db.template_param.pop_back();
+      if (t1 == t || t1 == last)
+        return first;
+      if (db.tag_templates) {
+        db.template_param.back().emplace_back();
+        for (size_t k = k0; k < k1; ++k)
+          db.template_param.back().back().push_back(db.names[k]);
+      }
+      for (size_t k = k0; k < k1; ++k) {
+        if (args.size() > 1)
+          args += ", ";
+        args += db.names[k].move_full();
+      }
+      for (; k1 > k0; --k1)
+        if (!db.names.empty())
+          db.names.pop_back();
+      t = t1;
+    }
+    first = t + 1;
+    if (args.back() != '>')
+      args += ">";
+    else
+      args += " >";
+    db.names.push_back(std::move(args));
+  }
+  return first;
+}
+
+// <nested-name> ::= N [<CV-qualifiers>] [<ref-qualifier>] <prefix>
+// <unqualified-name> E
+//               ::= N [<CV-qualifiers>] [<ref-qualifier>] <template-prefix>
+//               <template-args> E
+//
+// <prefix> ::= <prefix> <unqualified-name>
+//          ::= <template-prefix> <template-args>
+//          ::= <template-param>
+//          ::= <decltype>
+//          ::= # empty
+//          ::= <substitution>
+//          ::= <prefix> <data-member-prefix>
+//  extension ::= L
+//
+// <template-prefix> ::= <prefix> <template unqualified-name>
+//                   ::= <template-param>
+//                   ::= <substitution>
+
+template <class C>
+static const char *parse_nested_name(const char *first, const char *last, C &db,
+                                     bool *ends_with_template_args) {
+  if (first != last && *first == 'N') {
+    unsigned cv;
+    const char *t0 = parse_cv_qualifiers(first + 1, last, cv);
+    if (t0 == last)
+      return first;
+    db.ref = 0;
+    if (*t0 == 'R') {
+      db.ref = 1;
+      ++t0;
+    } else if (*t0 == 'O') {
+      db.ref = 2;
+      ++t0;
+    }
+    db.names.emplace_back();
+    if (last - t0 >= 2 && t0[0] == 'S' && t0[1] == 't') {
+      t0 += 2;
+      db.names.back().first = "std";
+    }
+    if (t0 == last) {
+      db.names.pop_back();
+      return first;
+    }
+    bool pop_subs = false;
+    bool component_ends_with_template_args = false;
+    while (*t0 != 'E') {
+      component_ends_with_template_args = false;
+      const char *t1;
+      switch (*t0) {
+      case 'S':
+        if (t0 + 1 != last && t0[1] == 't')
+          goto do_parse_unqualified_name;
+        t1 = parse_substitution(t0, last, db);
+        if (t1 != t0 && t1 != last) {
+          auto name = db.names.back().move_full();
+          db.names.pop_back();
+          if (db.names.empty())
+            return first;
+          if (!db.names.back().first.empty()) {
+            db.names.back().first += "::" + name;
+            db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          } else
+            db.names.back().first = name;
+          pop_subs = true;
+          t0 = t1;
+        } else
+          return first;
+        break;
+      case 'T':
+        t1 = parse_template_param(t0, last, db);
+        if (t1 != t0 && t1 != last) {
+          auto name = db.names.back().move_full();
+          db.names.pop_back();
+          if (db.names.empty())
+            return first;
+          if (!db.names.back().first.empty())
+            db.names.back().first += "::" + name;
+          else
+            db.names.back().first = name;
+          db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          pop_subs = true;
+          t0 = t1;
+        } else
+          return first;
+        break;
+      case 'D':
+        if (t0 + 1 != last && t0[1] != 't' && t0[1] != 'T')
+          goto do_parse_unqualified_name;
+        t1 = parse_decltype(t0, last, db);
+        if (t1 != t0 && t1 != last) {
+          auto name = db.names.back().move_full();
+          db.names.pop_back();
+          if (db.names.empty())
+            return first;
+          if (!db.names.back().first.empty())
+            db.names.back().first += "::" + name;
+          else
+            db.names.back().first = name;
+          db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          pop_subs = true;
+          t0 = t1;
+        } else
+          return first;
+        break;
+      case 'I':
+        t1 = parse_template_args(t0, last, db);
+        if (t1 != t0 && t1 != last) {
+          auto name = db.names.back().move_full();
+          db.names.pop_back();
+          if (db.names.empty())
+            return first;
+          db.names.back().first += name;
+          db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          t0 = t1;
+          component_ends_with_template_args = true;
+        } else
+          return first;
+        break;
+      case 'L':
+        if (++t0 == last)
+          return first;
+        break;
+      default:
+      do_parse_unqualified_name:
+        t1 = parse_unqualified_name(t0, last, db);
+        if (t1 != t0 && t1 != last) {
+          auto name = db.names.back().move_full();
+          db.names.pop_back();
+          if (db.names.empty())
+            return first;
+          if (!db.names.back().first.empty())
+            db.names.back().first += "::" + name;
+          else
+            db.names.back().first = name;
+          db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          pop_subs = true;
+          t0 = t1;
+        } else
+          return first;
+      }
+    }
+    first = t0 + 1;
+    db.cv = cv;
+    if (pop_subs && !db.subs.empty())
+      db.subs.pop_back();
+    if (ends_with_template_args)
+      *ends_with_template_args = component_ends_with_template_args;
+  }
+  return first;
+}
+
+// <discriminator> := _ <non-negative number>      # when number < 10
+//                 := __ <non-negative number> _   # when number >= 10
+//  extension      := decimal-digit+               # at the end of string
+
+static const char *parse_discriminator(const char *first, const char *last) {
+  // parse but ignore discriminator
+  if (first != last) {
+    if (*first == '_') {
+      const char *t1 = first + 1;
+      if (t1 != last) {
+        if (std::isdigit(*t1))
+          first = t1 + 1;
+        else if (*t1 == '_') {
+          for (++t1; t1 != last && std::isdigit(*t1); ++t1)
+            ;
+          if (t1 != last && *t1 == '_')
+            first = t1 + 1;
+        }
+      }
+    } else if (std::isdigit(*first)) {
+      const char *t1 = first + 1;
+      for (; t1 != last && std::isdigit(*t1); ++t1)
+        ;
+      if (t1 == last)
+        first = last;
+    }
+  }
+  return first;
+}
+
+// <local-name> := Z <function encoding> E <entity name> [<discriminator>]
+//              := Z <function encoding> E s [<discriminator>]
+//              := Z <function encoding> Ed [ <parameter number> ] _ <entity
+//              name>
+
+template <class C>
+static const char *parse_local_name(const char *first, const char *last, C &db,
+                                    bool *ends_with_template_args) {
+  if (first != last && *first == 'Z') {
+    const char *t = parse_encoding(first + 1, last, db);
+    if (t != first + 1 && t != last && *t == 'E' && ++t != last) {
+      switch (*t) {
+      case 's':
+        first = parse_discriminator(t + 1, last);
+        if (db.names.empty())
+          return first;
+        db.names.back().first.append("::string literal");
+        break;
+      case 'd':
+        if (++t != last) {
+          const char *t1 = parse_number(t, last);
+          if (t1 != last && *t1 == '_') {
+            t = t1 + 1;
+            t1 = parse_name(t, last, db, ends_with_template_args);
+            if (t1 != t) {
+              if (db.names.size() < 2)
+                return first;
+              auto name = db.names.back().move_full();
+              db.names.pop_back();
+              if (db.names.empty())
+                return first;
+              db.names.back().first.append("::");
+              db.names.back().first.append(name);
+              first = t1;
+            } else if (!db.names.empty())
+              db.names.pop_back();
+          }
+        }
+        break;
+      default: {
+        const char *t1 = parse_name(t, last, db, ends_with_template_args);
+        if (t1 != t) {
+          // parse but ignore discriminator
+          first = parse_discriminator(t1, last);
+          if (db.names.size() < 2)
+            return first;
+          auto name = db.names.back().move_full();
+          db.names.pop_back();
+          if (db.names.empty())
+            return first;
+          db.names.back().first.append("::");
+          db.names.back().first.append(name);
+        } else if (!db.names.empty())
+          db.names.pop_back();
+      } break;
+      }
+    }
+  }
+  return first;
+}
+
+// <name> ::= <nested-name> // N
+//        ::= <local-name> # See Scope Encoding below  // Z
+//        ::= <unscoped-template-name> <template-args>
+//        ::= <unscoped-name>
+
+// <unscoped-template-name> ::= <unscoped-name>
+//                          ::= <substitution>
+
+template <class C>
+static const char *parse_name(const char *first, const char *last, C &db,
+                              bool *ends_with_template_args) {
+  if (last - first >= 2) {
+    const char *t0 = first;
+    // extension: ignore L here
+    if (*t0 == 'L')
+      ++t0;
+    switch (*t0) {
+    case 'N': {
+      const char *t1 = parse_nested_name(t0, last, db, ends_with_template_args);
+      if (t1 != t0)
+        first = t1;
+      break;
+    }
+    case 'Z': {
+      const char *t1 = parse_local_name(t0, last, db, ends_with_template_args);
+      if (t1 != t0)
+        first = t1;
+      break;
+    }
+    default: {
+      const char *t1 = parse_unscoped_name(t0, last, db);
+      if (t1 != t0) {
+        if (t1 != last &&
+            *t1 == 'I') // <unscoped-template-name> <template-args>
+        {
+          if (db.names.empty())
+            return first;
+          db.subs.push_back(typename C::sub_type(1, db.names.back()));
+          t0 = t1;
+          t1 = parse_template_args(t0, last, db);
+          if (t1 != t0) {
+            if (db.names.size() < 2)
+              return first;
+            auto tmp = db.names.back().move_full();
+            db.names.pop_back();
+            if (db.names.empty())
+              return first;
+            db.names.back().first += tmp;
+            first = t1;
+            if (ends_with_template_args)
+              *ends_with_template_args = true;
+          }
+        } else // <unscoped-name>
+          first = t1;
+      } else { // try <substitution> <template-args>
+        t1 = parse_substitution(t0, last, db);
+        if (t1 != t0 && t1 != last && *t1 == 'I') {
+          t0 = t1;
+          t1 = parse_template_args(t0, last, db);
+          if (t1 != t0) {
+            if (db.names.size() < 2)
+              return first;
+            auto tmp = db.names.back().move_full();
+            db.names.pop_back();
+            if (db.names.empty())
+              return first;
+            db.names.back().first += tmp;
+            first = t1;
+            if (ends_with_template_args)
+              *ends_with_template_args = true;
+          }
+        }
+      }
+      break;
+    }
+    }
+  }
+  return first;
+}
+
+// <call-offset> ::= h <nv-offset> _
+//               ::= v <v-offset> _
+//
+// <nv-offset> ::= <offset number>
+//               # non-virtual base override
+//
+// <v-offset>  ::= <offset number> _ <virtual offset number>
+//               # virtual base override, with vcall offset
+
+static const char *parse_call_offset(const char *first, const char *last) {
+  if (first != last) {
+    switch (*first) {
+    case 'h': {
+      const char *t = parse_number(first + 1, last);
+      if (t != first + 1 && t != last && *t == '_')
+        first = t + 1;
+    } break;
+    case 'v': {
+      const char *t = parse_number(first + 1, last);
+      if (t != first + 1 && t != last && *t == '_') {
+        const char *t2 = parse_number(++t, last);
+        if (t2 != t && t2 != last && *t2 == '_')
+          first = t2 + 1;
+      }
+    } break;
+    }
+  }
+  return first;
+}
+
+// <special-name> ::= TV <type>    # virtual table
+//                ::= TT <type>    # VTT structure (construction vtable index)
+//                ::= TI <type>    # typeinfo structure
+//                ::= TS <type>    # typeinfo name (null-terminated byte string)
+//                ::= Tc <call-offset> <call-offset> <base encoding>
+//                    # base is the nominal target function of thunk
+//                    # first call-offset is 'this' adjustment
+//                    # second call-offset is result adjustment
+//                ::= T <call-offset> <base encoding>
+//                    # base is the nominal target function of thunk
+//                ::= GV <object name> # Guard variable for one-time
+//                initialization
+//                                     # No <type>
+//      extension ::= TC <first type> <number> _ <second type> # construction
+//      vtable for second-in-first
+//      extension ::= GR <object name> # reference temporary for object
+
+template <class C>
+static const char *parse_special_name(const char *first, const char *last,
+                                      C &db) {
+  if (last - first > 2) {
+    const char *t;
+    switch (*first) {
+    case 'T':
+      switch (first[1]) {
+      case 'V':
+        // TV <type>    # virtual table
+        t = parse_type(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "vtable for ");
+          first = t;
+        }
+        break;
+      case 'T':
+        // TT <type>    # VTT structure (construction vtable index)
+        t = parse_type(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "VTT for ");
+          first = t;
+        }
+        break;
+      case 'I':
+        // TI <type>    # typeinfo structure
+        t = parse_type(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "typeinfo for ");
+          first = t;
+        }
+        break;
+      case 'S':
+        // TS <type>    # typeinfo name (null-terminated byte string)
+        t = parse_type(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "typeinfo name for ");
+          first = t;
+        }
+        break;
+      case 'c':
+        // Tc <call-offset> <call-offset> <base encoding>
+        {
+          const char *t0 = parse_call_offset(first + 2, last);
+          if (t0 == first + 2)
+            break;
+          const char *t1 = parse_call_offset(t0, last);
+          if (t1 == t0)
+            break;
+          t = parse_encoding(t1, last, db);
+          if (t != t1) {
+            if (db.names.empty())
+              return first;
+            db.names.back().first.insert(0, "covariant return thunk to ");
+            first = t;
+          }
+        }
+        break;
+      case 'C':
+        // extension ::= TC <first type> <number> _ <second type> # construction
+        // vtable for second-in-first
+        t = parse_type(first + 2, last, db);
+        if (t != first + 2) {
+          const char *t0 = parse_number(t, last);
+          if (t0 != t && t0 != last && *t0 == '_') {
+            const char *t1 = parse_type(++t0, last, db);
+            if (t1 != t0) {
+              if (db.names.size() < 2)
+                return first;
+              auto left = db.names.back().move_full();
+              db.names.pop_back();
+              if (db.names.empty())
+                return first;
+              db.names.back().first = "construction vtable for " +
+                                      std::move(left) + "-in-" +
+                                      db.names.back().move_full();
+              first = t1;
+            }
+          }
+        }
+        break;
+      default:
+        // T <call-offset> <base encoding>
+        {
+          const char *t0 = parse_call_offset(first + 1, last);
+          if (t0 == first + 1)
+            break;
+          t = parse_encoding(t0, last, db);
+          if (t != t0) {
+            if (db.names.empty())
+              return first;
+            if (first[1] == 'v') {
+              db.names.back().first.insert(0, "virtual thunk to ");
+              first = t;
+            } else {
+              db.names.back().first.insert(0, "non-virtual thunk to ");
+              first = t;
+            }
+          }
+        }
+        break;
+      }
+      break;
+    case 'G':
+      switch (first[1]) {
+      case 'V':
+        // GV <object name> # Guard variable for one-time initialization
+        t = parse_name(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "guard variable for ");
+          first = t;
+        }
+        break;
+      case 'R':
+        // extension ::= GR <object name> # reference temporary for object
+        t = parse_name(first + 2, last, db);
+        if (t != first + 2) {
+          if (db.names.empty())
+            return first;
+          db.names.back().first.insert(0, "reference temporary for ");
+          first = t;
+        }
+        break;
+      }
+      break;
+    }
+  }
+  return first;
+}
+
+namespace {
+template <class T> class save_value {
+  T &restore_;
+  T original_value_;
+
+public:
+  save_value(T &restore) : restore_(restore), original_value_(restore) {}
+
+  ~save_value() { restore_ = std::move(original_value_); }
+
+  save_value(const save_value &) = delete;
+  save_value &operator=(const save_value &) = delete;
+};
+}
+
+// <encoding> ::= <function name> <bare-function-type>
+//            ::= <data name>
+//            ::= <special-name>
+
+template <class C>
+static const char *parse_encoding(const char *first, const char *last, C &db) {
+  if (first != last) {
+    save_value<decltype(db.encoding_depth)> su(db.encoding_depth);
+    ++db.encoding_depth;
+    save_value<decltype(db.tag_templates)> sb(db.tag_templates);
+    if (db.encoding_depth > 1)
+      db.tag_templates = true;
+    switch (*first) {
+    case 'G':
+    case 'T':
+      first = parse_special_name(first, last, db);
+      break;
+    default: {
+      bool ends_with_template_args = false;
+      const char *t = parse_name(first, last, db, &ends_with_template_args);
+      unsigned cv = db.cv;
+      unsigned ref = db.ref;
+      if (t != first) {
+        if (t != last && *t != 'E' && *t != '.') {
+          save_value<bool> sb2(db.tag_templates);
+          db.tag_templates = false;
+          const char *t2;
+          std::string ret2;
+          if (db.names.empty())
+            return first;
+          const std::string &nm = db.names.back().first;
+          if (nm.empty())
+            return first;
+          if (!db.parsed_ctor_dtor_cv && ends_with_template_args) {
+            t2 = parse_type(t, last, db);
+            if (t2 == t)
+              return first;
+            if (db.names.size() < 2)
+              return first;
+            auto ret1 = std::move(db.names.back().first);
+            ret2 = std::move(db.names.back().second);
+            if (ret2.empty())
+              ret1 += ' ';
+            db.names.pop_back();
+            if (db.names.empty())
+              return first;
+
+            db.names.back().first.insert(0, ret1);
+            t = t2;
+          }
+          db.names.back().first += '(';
+          if (t != last && *t == 'v') {
+            ++t;
+          } else {
+            bool first_arg = true;
+            while (true) {
+              size_t k0 = db.names.size();
+              t2 = parse_type(t, last, db);
+              size_t k1 = db.names.size();
+              if (t2 == t)
+                break;
+              if (k1 > k0) {
+                std::string tmp;
+                for (size_t k = k0; k < k1; ++k) {
+                  if (!tmp.empty())
+                    tmp += ", ";
+                  tmp += db.names[k].move_full();
+                }
+                for (size_t k = k0; k < k1; ++k) {
+                  if (db.names.empty())
+                    return first;
+                  db.names.pop_back();
+                }
+                if (!tmp.empty()) {
+                  if (db.names.empty())
+                    return first;
+                  if (!first_arg)
+                    db.names.back().first += ", ";
+                  else
+                    first_arg = false;
+                  db.names.back().first += tmp;
+                }
+              }
+              t = t2;
+            }
+          }
+          if (db.names.empty())
+            return first;
+          db.names.back().first += ')';
+          if (cv & 1)
+            db.names.back().first.append(" const");
+          if (cv & 2)
+            db.names.back().first.append(" volatile");
+          if (cv & 4)
+            db.names.back().first.append(" restrict");
+          if (ref == 1)
+            db.names.back().first.append(" &");
+          else if (ref == 2)
+            db.names.back().first.append(" &&");
+          db.names.back().first += ret2;
+          first = t;
+        } else
+          first = t;
+      }
+      break;
+    }
+    }
+  }
+  return first;
+}
+
+// _block_invoke
+// _block_invoke<decimal-digit>+
+// _block_invoke_<decimal-digit>+
+
+template <class C>
+static const char *parse_block_invoke(const char *first, const char *last,
+                                      C &db) {
+  if (last - first >= 13) {
+    const char test[] = "_block_invoke";
+    const char *t = first;
+    for (int i = 0; i < 13; ++i, ++t) {
+      if (*t != test[i])
+        return first;
+    }
+    if (t != last) {
+      if (*t == '_') {
+        // must have at least 1 decimal digit
+        if (++t == last || !std::isdigit(*t))
+          return first;
+        ++t;
+      }
+      // parse zero or more digits
+      while (t != last && isdigit(*t))
+        ++t;
+    }
+    if (db.names.empty())
+      return first;
+    db.names.back().first.insert(0, "invocation function for block in ");
+    first = t;
+  }
+  return first;
+}
+
+// extension
+// <dot-suffix> := .<anything and everything>
+
+template <class C>
+static const char *parse_dot_suffix(const char *first, const char *last,
+                                    C &db) {
+  if (first != last && *first == '.') {
+    if (db.names.empty())
+      return first;
+    db.names.back().first += " (" + std::string(first, last) + ")";
+    first = last;
+  }
+  return first;
+}
+
+// <block-involcaton-function> ___Z<encoding>_block_invoke
+// <block-involcaton-function> ___Z<encoding>_block_invoke<decimal-digit>+
+// <block-involcaton-function> ___Z<encoding>_block_invoke_<decimal-digit>+
+// <mangled-name> ::= _Z<encoding>
+//                ::= <type>
+
+template <class C>
+static void demangle(const char *first, const char *last, C &db, int &status) {
+  if (first >= last) {
+    status = invalid_mangled_name;
+    return;
+  }
+  if (*first == '_') {
+    if (last - first >= 4) {
+      if (first[1] == 'Z') {
+        const char *t = parse_encoding(first + 2, last, db);
+        if (t != first + 2 && t != last && *t == '.')
+          t = parse_dot_suffix(t, last, db);
+        if (t != last)
+          status = invalid_mangled_name;
+      } else if (first[1] == '_' && first[2] == '_' && first[3] == 'Z') {
+        const char *t = parse_encoding(first + 4, last, db);
+        if (t != first + 4 && t != last) {
+          const char *t1 = parse_block_invoke(t, last, db);
+          if (t1 != last)
+            status = invalid_mangled_name;
+        } else
+          status = invalid_mangled_name;
+      } else
+        status = invalid_mangled_name;
+    } else
+      status = invalid_mangled_name;
+  } else {
+    const char *t = parse_type(first, last, db);
+    if (t != last)
+      status = invalid_mangled_name;
+  }
+  if (status == success && db.names.empty())
+    status = invalid_mangled_name;
+}
+
+namespace {
+template <class StrT> struct string_pair {
+  StrT first;
+  StrT second;
+
+  string_pair() = default;
+  string_pair(StrT f) : first(std::move(f)) {}
+  string_pair(StrT f, StrT s) : first(std::move(f)), second(std::move(s)) {}
+  template <size_t N> string_pair(const char (&s)[N]) : first(s, N - 1) {}
+
+  size_t size() const { return first.size() + second.size(); }
+  StrT full() const { return first + second; }
+  StrT move_full() { return std::move(first) + std::move(second); }
+};
+
+struct Db {
+  typedef std::vector<string_pair<std::string>> sub_type;
+  typedef std::vector<sub_type> template_param_type;
+  sub_type names;
+  template_param_type subs;
+  std::vector<template_param_type> template_param;
+  unsigned cv = 0;
+  unsigned ref = 0;
+  unsigned encoding_depth = 0;
+  bool parsed_ctor_dtor_cv = false;
+  bool tag_templates = true;
+  bool fix_forward_references = false;
+  bool try_to_parse_template_args = true;
+
+  Db() : subs(0, names), template_param(0, subs) {}
+};
+}
+
+char *llvm::itaniumDemangle(const char *mangled_name, char *buf, size_t *n,
+                            int *status) {
+  if (mangled_name == nullptr || (buf != nullptr && n == nullptr)) {
+    if (status)
+      *status = invalid_args;
+    return nullptr;
+  }
+
+  size_t len = std::strlen(mangled_name);
+  if (len < 2 || strncmp(mangled_name, "_Z", 2)) {
+    if (len < 4 || strncmp(mangled_name, "___Z", 4)) {
+      if (status)
+        *status = invalid_mangled_name;
+      return nullptr;
+    }
+  }
+
+  size_t internal_size = buf != nullptr ? *n : 0;
+  Db db;
+  db.template_param.emplace_back();
+  int internal_status = success;
+  demangle(mangled_name, mangled_name + len, db, internal_status);
+  if (internal_status == success && db.fix_forward_references &&
+      !db.template_param.empty() && !db.template_param.front().empty()) {
+    db.fix_forward_references = false;
+    db.tag_templates = false;
+    db.names.clear();
+    db.subs.clear();
+    demangle(mangled_name, mangled_name + len, db, internal_status);
+    if (db.fix_forward_references)
+      internal_status = invalid_mangled_name;
+  }
+  if (internal_status == success) {
+    size_t sz = db.names.back().size() + 1;
+    if (sz > internal_size) {
+      char *newbuf = static_cast<char *>(std::realloc(buf, sz));
+      if (newbuf == nullptr) {
+        internal_status = memory_alloc_failure;
+        buf = nullptr;
+      } else {
+        buf = newbuf;
+        if (n != nullptr)
+          *n = sz;
+      }
+    }
+    if (buf != nullptr) {
+      db.names.back().first += db.names.back().second;
+      std::memcpy(buf, db.names.back().first.data(), sz - 1);
+      buf[sz - 1] = char(0);
+    }
+  } else
+    buf = nullptr;
+  if (status)
+    *status = internal_status;
+  return buf;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
index a8e68bf49abc..b4bed325f491 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -18,6 +18,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -48,12 +49,13 @@ STATISTIC(NumGlobals  , "Number of global vars initialized");
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
     std::unique_ptr<Module> M, std::string *ErrorStr,
     std::shared_ptr<MCJITMemoryManager> MemMgr,
-    std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+
+    std::shared_ptr<JITSymbolResolver> Resolver,
     std::unique_ptr<TargetMachine> TM) = nullptr;
 
 ExecutionEngine *(*ExecutionEngine::OrcMCJITReplacementCtor)(
   std::string *ErrorStr, std::shared_ptr<MCJITMemoryManager> MemMgr,
-  std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+  std::shared_ptr<JITSymbolResolver> Resolver,
   std::unique_ptr<TargetMachine> TM) = nullptr;
 
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
@@ -61,6 +63,8 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(std::unique_ptr<Module> M,
 
 void JITEventListener::anchor() {}
 
+void ObjectCache::anchor() {}
+
 void ExecutionEngine::Init(std::unique_ptr<Module> M) {
   CompilingLazily         = false;
   GVCompilationDisabled   = false;
@@ -151,7 +155,7 @@ bool ExecutionEngine::removeModule(Module *M) {
   return false;
 }
 
-Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
+Function *ExecutionEngine::FindFunctionNamed(StringRef FnName) {
   for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
     Function *F = Modules[i]->getFunction(FnName);
     if (F && !F->isDeclaration())
@@ -160,7 +164,7 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) {
   return nullptr;
 }
 
-GlobalVariable *ExecutionEngine::FindGlobalVariableNamed(const char *Name, bool AllowInternal) {
+GlobalVariable *ExecutionEngine::FindGlobalVariableNamed(StringRef Name, bool AllowInternal) {
   for (unsigned i = 0, e = Modules.size(); i != e; ++i) {
     GlobalVariable *GV = Modules[i]->getGlobalVariable(Name,AllowInternal);
     if (GV && !GV->isDeclaration())
@@ -366,7 +370,7 @@ void *ArgvArray::reset(LLVMContext &C, ExecutionEngine *EE,
 
 void ExecutionEngine::runStaticConstructorsDestructors(Module &module,
                                                        bool isDtors) {
-  const char *Name = isDtors ? "llvm.global_dtors" : "llvm.global_ctors";
+  StringRef Name(isDtors ? "llvm.global_dtors" : "llvm.global_ctors");
   GlobalVariable *GV = module.getNamedGlobal(Name);
 
   // If this global has internal linkage, or if it has a use, then it must be
@@ -499,8 +503,8 @@ EngineBuilder::setMemoryManager(std::unique_ptr<MCJITMemoryManager> MM) {
 }
 
 EngineBuilder&
-EngineBuilder::setSymbolResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> SR) {
-  Resolver = std::shared_ptr<RuntimeDyld::SymbolResolver>(std::move(SR));
+EngineBuilder::setSymbolResolver(std::unique_ptr<JITSymbolResolver> SR) {
+  Resolver = std::shared_ptr<JITSymbolResolver>(std::move(SR));
   return *this;
 }
 
@@ -688,7 +692,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       else if (CE->getType()->isDoubleTy())
         GV.DoubleVal = GV.IntVal.roundToDouble();
       else if (CE->getType()->isX86_FP80Ty()) {
-        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended());
         (void)apf.convertFromAPInt(GV.IntVal,
                                    false,
                                    APFloat::rmNearestTiesToEven);
@@ -703,7 +707,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       else if (CE->getType()->isDoubleTy())
         GV.DoubleVal = GV.IntVal.signedRoundToDouble();
       else if (CE->getType()->isX86_FP80Ty()) {
-        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended);
+        APFloat apf = APFloat::getZero(APFloat::x87DoubleExtended());
         (void)apf.convertFromAPInt(GV.IntVal,
                                    true,
                                    APFloat::rmNearestTiesToEven);
@@ -720,7 +724,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       else if (Op0->getType()->isDoubleTy())
         GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth);
       else if (Op0->getType()->isX86_FP80Ty()) {
-        APFloat apf = APFloat(APFloat::x87DoubleExtended, GV.IntVal);
+        APFloat apf = APFloat(APFloat::x87DoubleExtended(), GV.IntVal);
         uint64_t v;
         bool ignored;
         (void)apf.convertToInteger(&v, BitWidth,
diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index d6b209a91d76..1d7c6e714ed0 100644
--- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -187,7 +187,7 @@ LLVMBool LLVMCreateMCJITCompilerForModule(
     // NoFramePointerElim.
     for (auto &F : *Mod) {
       auto Attrs = F.getAttributes();
-      auto Value = options.NoFramePointerElim ? "true" : "false";
+      StringRef Value(options.NoFramePointerElim ? "true" : "false");
       Attrs = Attrs.addAttribute(F.getContext(), AttributeSet::FunctionIndex,
                                  "no-frame-pointer-elim", Value);
       F.setAttributes(Attrs);
diff --git a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
index 1ab6203dd6da..dad099d73c96 100644
--- a/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/GDBRegistrationListener.cpp
@@ -69,16 +69,6 @@ struct RegisteredObjectInfo {
                        OwningBinary<ObjectFile> Obj)
     : Size(Size), Entry(Entry), Obj(std::move(Obj)) {}
 
-  RegisteredObjectInfo(RegisteredObjectInfo &&Other)
-    : Size(Other.Size), Entry(Other.Entry), Obj(std::move(Other.Obj)) {}
-
-  RegisteredObjectInfo& operator=(RegisteredObjectInfo &&Other) {
-    Size = Other.Size;
-    Entry = Other.Entry;
-    Obj = std::move(Other.Obj);
-    return *this;
-  }
-
   std::size_t Size;
   jit_code_entry *Entry;
   OwningBinary<ObjectFile> Obj;
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
index 1eb4f7d19342..923f6e7147db 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -999,7 +999,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
   uint64_t Total = 0;
 
   for (; I != E; ++I) {
-    if (StructType *STy = dyn_cast<StructType>(*I)) {
+    if (StructType *STy = I.getStructTypeOrNull()) {
       const StructLayout *SLO = getDataLayout().getStructLayout(STy);
 
       const ConstantInt *CPU = cast<ConstantInt>(I.getOperand());
@@ -1007,7 +1007,6 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
 
       Total += SLO->getElementOffset(Index);
     } else {
-      SequentialType *ST = cast<SequentialType>(*I);
       // Get the index number for the array... which must be long type...
       GenericValue IdxGV = getOperandValue(I.getOperand(), SF);
 
@@ -1020,7 +1019,7 @@ GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
         assert(BitWidth == 64 && "Invalid index type for getelementptr");
         Idx = (int64_t)IdxGV.IntVal.getZExtValue();
       }
-      Total += getDataLayout().getTypeAllocSize(ST->getElementType()) * Idx;
+      Total += getDataLayout().getTypeAllocSize(I.getIndexedType()) * Idx;
     }
   }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 441f0eb85721..ee75bee9c533 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -20,20 +20,31 @@
 //===----------------------------------------------------------------------===//
 
 #include "Interpreter.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Config/config.h"     // Detect libffi
+#include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/UniqueLock.h"
+#include <cassert>
 #include <cmath>
 #include <csignal>
+#include <cstdint>
 #include <cstdio>
 #include <cstring>
 #include <map>
+#include <string>
+#include <utility>
+#include <vector>
 
 #ifdef HAVE_FFI_CALL
 #ifdef HAVE_FFI_H
@@ -290,7 +301,6 @@ GenericValue Interpreter::callExternalFunction(Function *F,
   return GenericValue();
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Functions "exported" to the running application...
 //
@@ -331,7 +341,7 @@ static GenericValue lle_X_sprintf(FunctionType *FT,
   // close enough for now.
   GenericValue GV;
   GV.IntVal = APInt(32, strlen(FmtStr));
-  while (1) {
+  while (true) {
     switch (*FmtStr) {
     case 0: return GV;             // Null terminator...
     default:                       // Normal nonspecial character
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
index bc7da2e4f6af..9818adfff82e 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.cpp
@@ -35,9 +35,13 @@ extern "C" void LLVMLinkInInterpreter() { }
 ExecutionEngine *Interpreter::create(std::unique_ptr<Module> M,
                                      std::string *ErrStr) {
   // Tell this Module to materialize everything and release the GVMaterializer.
-  if (std::error_code EC = M->materializeAll()) {
+  if (Error Err = M->materializeAll()) {
+    std::string Msg;
+    handleAllErrors(std::move(Err), [&](ErrorInfoBase &EIB) {
+      Msg = EIB.message();
+    });
     if (ErrStr)
-      *ErrStr = EC.message();
+      *ErrStr = Msg;
     // We got an error, just return 0
     return nullptr;
   }
diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
index 2e5a867a200f..5c16448404bb 100644
--- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -41,12 +41,9 @@ class AllocaHolder {
 public:
   AllocaHolder() {}
 
-  // Make this type move-only. Define explicit move special members for MSVC.
-  AllocaHolder(AllocaHolder &&RHS) : Allocations(std::move(RHS.Allocations)) {}
-  AllocaHolder &operator=(AllocaHolder &&RHS) {
-    Allocations = std::move(RHS.Allocations);
-    return *this;
-  }
+  // Make this type move-only.
+  AllocaHolder(AllocaHolder &&) = default;
+  AllocaHolder &operator=(AllocaHolder &&RHS) = default;
 
   ~AllocaHolder() {
     for (void *Allocation : Allocations)
@@ -72,22 +69,6 @@ struct ExecutionContext {
   AllocaHolder Allocas;            // Track memory allocated by alloca
 
   ExecutionContext() : CurFunction(nullptr), CurBB(nullptr), CurInst(nullptr) {}
-
-  ExecutionContext(ExecutionContext &&O)
-      : CurFunction(O.CurFunction), CurBB(O.CurBB), CurInst(O.CurInst),
-        Caller(O.Caller), Values(std::move(O.Values)),
-        VarArgs(std::move(O.VarArgs)), Allocas(std::move(O.Allocas)) {}
-
-  ExecutionContext &operator=(ExecutionContext &&O) {
-    CurFunction = O.CurFunction;
-    CurBB = O.CurBB;
-    CurInst = O.CurInst;
-    Caller = O.Caller;
-    Values = std::move(O.Values);
-    VarArgs = std::move(O.VarArgs);
-    Allocas = std::move(O.Allocas);
-    return *this;
-  }
 };
 
 // Interpreter - This class represents the entirety of the interpreter.
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index 7fb328babfe8..ff8749fbfed4 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -29,8 +29,6 @@
 
 using namespace llvm;
 
-void ObjectCache::anchor() {}
-
 namespace {
 
 static struct RegisterJIT {
@@ -46,7 +44,7 @@ ExecutionEngine*
 MCJIT::createJIT(std::unique_ptr<Module> M,
                  std::string *ErrorStr,
                  std::shared_ptr<MCJITMemoryManager> MemMgr,
-                 std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+                 std::shared_ptr<JITSymbolResolver> Resolver,
                  std::unique_ptr<TargetMachine> TM) {
   // Try to register the program as a source of symbols to resolve against.
   //
@@ -67,7 +65,7 @@ MCJIT::createJIT(std::unique_ptr<Module> M,
 
 MCJIT::MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> TM,
              std::shared_ptr<MCJITMemoryManager> MemMgr,
-             std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
+             std::shared_ptr<JITSymbolResolver> Resolver)
     : ExecutionEngine(TM->createDataLayout(), std::move(M)), TM(std::move(TM)),
       Ctx(nullptr), MemMgr(std::move(MemMgr)),
       Resolver(*this, std::move(Resolver)), Dyld(*this->MemMgr, this->Resolver),
@@ -276,20 +274,21 @@ void MCJIT::finalizeModule(Module *M) {
   finalizeLoadedModules();
 }
 
-RuntimeDyld::SymbolInfo MCJIT::findExistingSymbol(const std::string &Name) {
-  SmallString<128> FullName;
-  Mangler::getNameWithPrefix(FullName, Name, getDataLayout());
-
-  if (void *Addr = getPointerToGlobalIfAvailable(FullName))
-    return RuntimeDyld::SymbolInfo(static_cast<uint64_t>(
-                                     reinterpret_cast<uintptr_t>(Addr)),
-                                   JITSymbolFlags::Exported);
+JITSymbol MCJIT::findExistingSymbol(const std::string &Name) {
+  if (void *Addr = getPointerToGlobalIfAvailable(Name))
+    return JITSymbol(static_cast<uint64_t>(
+                         reinterpret_cast<uintptr_t>(Addr)),
+                     JITSymbolFlags::Exported);
 
-  return Dyld.getSymbol(FullName);
+  return Dyld.getSymbol(Name);
 }
 
 Module *MCJIT::findModuleForSymbol(const std::string &Name,
                                    bool CheckFunctionsOnly) {
+  StringRef DemangledName = Name;
+  if (DemangledName[0] == getDataLayout().getGlobalPrefix())
+    DemangledName = DemangledName.substr(1);
+
   MutexGuard locked(lock);
 
   // If it hasn't already been generated, see if it's in one of our modules.
@@ -297,11 +296,11 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
                               E = OwnedModules.end_added();
        I != E; ++I) {
     Module *M = *I;
-    Function *F = M->getFunction(Name);
+    Function *F = M->getFunction(DemangledName);
     if (F && !F->isDeclaration())
       return M;
     if (!CheckFunctionsOnly) {
-      GlobalVariable *G = M->getGlobalVariable(Name);
+      GlobalVariable *G = M->getGlobalVariable(DemangledName);
       if (G && !G->isDeclaration())
         return M;
       // FIXME: Do we need to worry about global aliases?
@@ -313,11 +312,16 @@ Module *MCJIT::findModuleForSymbol(const std::string &Name,
 
 uint64_t MCJIT::getSymbolAddress(const std::string &Name,
                                  bool CheckFunctionsOnly) {
-  return findSymbol(Name, CheckFunctionsOnly).getAddress();
+  std::string MangledName;
+  {
+    raw_string_ostream MangledNameStream(MangledName);
+    Mangler::getNameWithPrefix(MangledNameStream, Name, getDataLayout());
+  }
+  return findSymbol(MangledName, CheckFunctionsOnly).getAddress();
 }
 
-RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
-                                          bool CheckFunctionsOnly) {
+JITSymbol MCJIT::findSymbol(const std::string &Name,
+                            bool CheckFunctionsOnly) {
   MutexGuard locked(lock);
 
   // First, check to see if we already have this symbol.
@@ -367,7 +371,7 @@ RuntimeDyld::SymbolInfo MCJIT::findSymbol(const std::string &Name,
   if (LazyFunctionCreator) {
     auto Addr = static_cast<uint64_t>(
                   reinterpret_cast<uintptr_t>(LazyFunctionCreator(Name)));
-    return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+    return JITSymbol(Addr, JITSymbolFlags::Exported);
   }
 
   return nullptr;
@@ -442,7 +446,7 @@ void MCJIT::runStaticConstructorsDestructors(bool isDtors) {
       isDtors, OwnedModules.begin_finalized(), OwnedModules.end_finalized());
 }
 
-Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName,
+Function *MCJIT::FindFunctionNamedInModulePtrSet(StringRef FnName,
                                                  ModulePtrSet::iterator I,
                                                  ModulePtrSet::iterator E) {
   for (; I != E; ++I) {
@@ -453,7 +457,7 @@ Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName,
   return nullptr;
 }
 
-GlobalVariable *MCJIT::FindGlobalVariableNamedInModulePtrSet(const char *Name,
+GlobalVariable *MCJIT::FindGlobalVariableNamedInModulePtrSet(StringRef Name,
                                                              bool AllowInternal,
                                                              ModulePtrSet::iterator I,
                                                              ModulePtrSet::iterator E) {
@@ -466,7 +470,7 @@ GlobalVariable *MCJIT::FindGlobalVariableNamedInModulePtrSet(const char *Name,
 }
 
 
-Function *MCJIT::FindFunctionNamed(const char *FnName) {
+Function *MCJIT::FindFunctionNamed(StringRef FnName) {
   Function *F = FindFunctionNamedInModulePtrSet(
       FnName, OwnedModules.begin_added(), OwnedModules.end_added());
   if (!F)
@@ -478,7 +482,7 @@ Function *MCJIT::FindFunctionNamed(const char *FnName) {
   return F;
 }
 
-GlobalVariable *MCJIT::FindGlobalVariableNamed(const char *Name, bool AllowInternal) {
+GlobalVariable *MCJIT::FindGlobalVariableNamed(StringRef Name, bool AllowInternal) {
   GlobalVariable *GV = FindGlobalVariableNamedInModulePtrSet(
       Name, AllowInternal, OwnedModules.begin_added(), OwnedModules.end_added());
   if (!GV)
@@ -587,7 +591,10 @@ GenericValue MCJIT::runFunction(Function *F, ArrayRef<GenericValue> ArgValues) {
     }
   }
 
-  llvm_unreachable("Full-featured argument passing not supported yet!");
+  report_fatal_error("MCJIT::runFunction does not support full-featured "
+                     "argument passing. Please use "
+                     "ExecutionEngine::getFunctionAddress and cast the result "
+                     "to the desired function pointer type.");
 }
 
 void *MCJIT::getPointerToNamedFunction(StringRef Name, bool AbortOnFailure) {
@@ -622,7 +629,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) {
   if (!L)
     return;
   MutexGuard locked(lock);
-  auto I = std::find(EventListeners.rbegin(), EventListeners.rend(), L);
+  auto I = find(reverse(EventListeners), L);
   if (I != EventListeners.rend()) {
     std::swap(*I, EventListeners.back());
     EventListeners.pop_back();
@@ -644,13 +651,9 @@ void MCJIT::NotifyFreeingObject(const object::ObjectFile& Obj) {
     L->NotifyFreeingObject(Obj);
 }
 
-RuntimeDyld::SymbolInfo
+JITSymbol
 LinkingSymbolResolver::findSymbol(const std::string &Name) {
   auto Result = ParentEngine.findSymbol(Name, false);
-  // If the symbols wasn't found and it begins with an underscore, try again
-  // without the underscore.
-  if (!Result && Name[0] == '_')
-    Result = ParentEngine.findSymbol(Name.substr(1), false);
   if (Result)
     return Result;
   if (ParentEngine.isSymbolSearchingDisabled())
diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
index e25f76cd57f3..daf578f5daae 100644
--- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -26,23 +26,22 @@ class MCJIT;
 // functions across modules that it owns.  It aggregates the memory manager
 // that is passed in to the MCJIT constructor and defers most functionality
 // to that object.
-class LinkingSymbolResolver : public RuntimeDyld::SymbolResolver {
+class LinkingSymbolResolver : public JITSymbolResolver {
 public:
   LinkingSymbolResolver(MCJIT &Parent,
-                        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver)
+                        std::shared_ptr<JITSymbolResolver> Resolver)
     : ParentEngine(Parent), ClientResolver(std::move(Resolver)) {}
 
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override;
+  JITSymbol findSymbol(const std::string &Name) override;
 
   // MCJIT doesn't support logical dylibs.
-  RuntimeDyld::SymbolInfo
-  findSymbolInLogicalDylib(const std::string &Name) override {
+  JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
     return nullptr;
   }
 
 private:
   MCJIT &ParentEngine;
-  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
+  std::shared_ptr<JITSymbolResolver> ClientResolver;
 };
 
 // About Module states: added->loaded->finalized.
@@ -68,7 +67,7 @@ private:
 class MCJIT : public ExecutionEngine {
   MCJIT(std::unique_ptr<Module> M, std::unique_ptr<TargetMachine> tm,
         std::shared_ptr<MCJITMemoryManager> MemMgr,
-        std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver);
+        std::shared_ptr<JITSymbolResolver> Resolver);
 
   typedef llvm::SmallPtrSet<Module *, 4> ModulePtrSet;
 
@@ -195,11 +194,11 @@ class MCJIT : public ExecutionEngine {
   // perform lookup of pre-compiled code to avoid re-compilation.
   ObjectCache *ObjCache;
 
-  Function *FindFunctionNamedInModulePtrSet(const char *FnName,
+  Function *FindFunctionNamedInModulePtrSet(StringRef FnName,
                                             ModulePtrSet::iterator I,
                                             ModulePtrSet::iterator E);
 
-  GlobalVariable *FindGlobalVariableNamedInModulePtrSet(const char *Name,
+  GlobalVariable *FindGlobalVariableNamedInModulePtrSet(StringRef Name,
                                                         bool AllowInternal,
                                                         ModulePtrSet::iterator I,
                                                         ModulePtrSet::iterator E);
@@ -222,12 +221,12 @@ public:
   /// FindFunctionNamed - Search all of the active modules to find the function that
   /// defines FnName.  This is very slow operation and shouldn't be used for
   /// general code.
-  Function *FindFunctionNamed(const char *FnName) override;
+  Function *FindFunctionNamed(StringRef FnName) override;
 
   /// FindGlobalVariableNamed - Search all of the active modules to find the
   /// global variable that defines Name.  This is very slow operation and
   /// shouldn't be used for general code.
-  GlobalVariable *FindGlobalVariableNamed(const char *Name,
+  GlobalVariable *FindGlobalVariableNamed(StringRef Name,
                                           bool AllowInternal = false) override;
 
   /// Sets the object manager that MCJIT should use to avoid compilation.
@@ -305,16 +304,22 @@ public:
   createJIT(std::unique_ptr<Module> M,
             std::string *ErrorStr,
             std::shared_ptr<MCJITMemoryManager> MemMgr,
-            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+            std::shared_ptr<JITSymbolResolver> Resolver,
             std::unique_ptr<TargetMachine> TM);
 
   // @}
 
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name,
-                                     bool CheckFunctionsOnly);
+  // Takes a mangled name and returns the corresponding JITSymbol (if a
+  // definition of that mangled name has been added to the JIT).
+  JITSymbol findSymbol(const std::string &Name, bool CheckFunctionsOnly);
+
   // DEPRECATED - Please use findSymbol instead.
+  //
   // This is not directly exposed via the ExecutionEngine API, but it is
   // used by the LinkingMemoryManager.
+  //
+  // getSymbolAddress takes an unmangled name and returns the corresponding
+  // JITSymbol if a definition of the name has been added to the JIT.
   uint64_t getSymbolAddress(const std::string &Name,
                             bool CheckFunctionsOnly);
 
@@ -330,9 +335,8 @@ protected:
                            const RuntimeDyld::LoadedObjectInfo &L);
   void NotifyFreeingObject(const object::ObjectFile& Obj);
 
-  RuntimeDyld::SymbolInfo findExistingSymbol(const std::string &Name);
-  Module *findModuleForSymbol(const std::string &Name,
-                              bool CheckFunctionsOnly);
+  JITSymbol findExistingSymbol(const std::string &Name);
+  Module *findModuleForSymbol(const std::string &Name, bool CheckFunctionsOnly);
 };
 
 } // end llvm namespace
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
index 6f7c29feef0d..711b887da6ef 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/IndirectionUtils.cpp
@@ -24,7 +24,7 @@ void IndirectStubsManager::anchor() {}
 
 std::unique_ptr<JITCompileCallbackManager>
 createLocalCompileCallbackManager(const Triple &T,
-                                  TargetAddress ErrorHandlerAddress) {
+                                  JITTargetAddress ErrorHandlerAddress) {
   switch (T.getArch()) {
     default: return nullptr;
 
@@ -71,7 +71,7 @@ createLocalIndirectStubsManagerBuilder(const Triple &T) {
   }
 }
 
-Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) {
+Constant* createIRTypedAddress(FunctionType &FT, JITTargetAddress Addr) {
   Constant *AddrIntVal =
     ConstantInt::get(Type::getInt64Ty(FT.getContext()), Addr);
   Constant *AddrPtrVal =
@@ -241,5 +241,14 @@ GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA,
   return NewA;
 }
 
+void cloneModuleFlagsMetadata(Module &Dst, const Module &Src,
+                              ValueToValueMapTy &VMap) {
+  auto *MFs = Src.getModuleFlagsMetadata();
+  if (!MFs)
+    return;
+  for (auto *MF : MFs->operands())
+    Dst.addModuleFlag(MapMetadata(MF, VMap));
+}
+
 } // End namespace orc.
 } // End namespace llvm.
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
index 57666a9dada0..8f2d6fd6c32b 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/NullResolver.cpp
@@ -14,12 +14,11 @@
 namespace llvm {
 namespace orc {
 
-RuntimeDyld::SymbolInfo NullResolver::findSymbol(const std::string &Name) {
+JITSymbol NullResolver::findSymbol(const std::string &Name) {
   llvm_unreachable("Unexpected cross-object symbol reference");
 }
 
-RuntimeDyld::SymbolInfo
-NullResolver::findSymbolInLogicalDylib(const std::string &Name) {
+JITSymbol NullResolver::findSymbolInLogicalDylib(const std::string &Name) {
   llvm_unreachable("Unexpected cross-object symbol reference");
 }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 9ae9b20feb0a..a74fae775ac4 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -44,8 +44,8 @@ private:
   class GenericHandle {
   public:
     virtual ~GenericHandle() {}
-    virtual orc::JITSymbol findSymbolIn(const std::string &Name,
-                                        bool ExportedSymbolsOnly) = 0;
+    virtual JITSymbol findSymbolIn(const std::string &Name,
+                                   bool ExportedSymbolsOnly) = 0;
     virtual void removeModule() = 0;
   };
 
@@ -54,8 +54,8 @@ private:
     GenericHandleImpl(LayerT &Layer, typename LayerT::ModuleSetHandleT Handle)
         : Layer(Layer), Handle(std::move(Handle)) {}
 
-    orc::JITSymbol findSymbolIn(const std::string &Name,
-                                bool ExportedSymbolsOnly) override {
+    JITSymbol findSymbolIn(const std::string &Name,
+                           bool ExportedSymbolsOnly) override {
       return Layer.findSymbolIn(Handle, Name, ExportedSymbolsOnly);
     }
 
@@ -109,55 +109,56 @@ public:
   }
 
   template <typename PtrTy>
-  static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+  static PtrTy fromTargetAddress(JITTargetAddress Addr) {
     return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
   }
 
-  orc::TargetAddress
+  JITTargetAddress
   createLazyCompileCallback(LLVMOrcLazyCompileCallbackFn Callback,
                             void *CallbackCtx) {
     auto CCInfo = CCMgr->getCompileCallback();
-    CCInfo.setCompileAction([=]() -> orc::TargetAddress {
+    CCInfo.setCompileAction([=]() -> JITTargetAddress {
       return Callback(wrap(this), CallbackCtx);
     });
     return CCInfo.getAddress();
   }
 
   LLVMOrcErrorCode createIndirectStub(StringRef StubName,
-                                      orc::TargetAddress Addr) {
+                                      JITTargetAddress Addr) {
     return mapError(
         IndirectStubsMgr->createStub(StubName, Addr, JITSymbolFlags::Exported));
   }
 
   LLVMOrcErrorCode setIndirectStubPointer(StringRef Name,
-                                          orc::TargetAddress Addr) {
+                                          JITTargetAddress Addr) {
     return mapError(IndirectStubsMgr->updatePointer(Name, Addr));
   }
 
-  std::unique_ptr<RuntimeDyld::SymbolResolver>
+  std::unique_ptr<JITSymbolResolver>
   createResolver(LLVMOrcSymbolResolverFn ExternalResolver,
                  void *ExternalResolverCtx) {
     return orc::createLambdaResolver(
-        [this, ExternalResolver, ExternalResolverCtx](const std::string &Name) {
+        [this, ExternalResolver, ExternalResolverCtx](const std::string &Name)
+            -> JITSymbol {
           // Search order:
           // 1. JIT'd symbols.
           // 2. Runtime overrides.
           // 3. External resolver (if present).
 
           if (auto Sym = CODLayer.findSymbol(Name, true))
-            return Sym.toRuntimeDyldSymbol();
+            return Sym;
           if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
             return Sym;
 
           if (ExternalResolver)
-            return RuntimeDyld::SymbolInfo(
+            return JITSymbol(
                 ExternalResolver(Name.c_str(), ExternalResolverCtx),
                 llvm::JITSymbolFlags::Exported);
 
-          return RuntimeDyld::SymbolInfo(nullptr);
+          return JITSymbol(nullptr);
         },
         [](const std::string &Name) {
-          return RuntimeDyld::SymbolInfo(nullptr);
+          return JITSymbol(nullptr);
         });
   }
 
@@ -222,14 +223,14 @@ public:
     FreeHandleIndexes.push_back(H);
   }
 
-  orc::JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
+  JITSymbol findSymbol(const std::string &Name, bool ExportedSymbolsOnly) {
     if (auto Sym = IndirectStubsMgr->findStub(Name, ExportedSymbolsOnly))
       return Sym;
     return CODLayer.findSymbol(mangle(Name), ExportedSymbolsOnly);
   }
 
-  orc::JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name,
-                              bool ExportedSymbolsOnly) {
+  JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name,
+                         bool ExportedSymbolsOnly) {
     return GenericHandles[H]->findSymbolIn(Name, ExportedSymbolsOnly);
   }
 
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
index 22f1303f4330..c531fe369920 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -25,7 +25,7 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class OrcErrorCategory : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "orc"; }
+  const char *name() const noexcept override { return "orc"; }
 
   std::string message(int condition) const override {
     switch (static_cast<OrcErrorCode>(condition)) {
@@ -39,10 +39,14 @@ public:
       return "Remote indirect stubs owner does not exist";
     case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse:
       return "Remote indirect stubs owner Id already in use";
+    case OrcErrorCode::RPCResponseAbandoned:
+      return "RPC response abandoned";
     case OrcErrorCode::UnexpectedRPCCall:
       return "Unexpected RPC call";
     case OrcErrorCode::UnexpectedRPCResponse:
       return "Unexpected RPC response";
+    case OrcErrorCode::UnknownRPCFunction:
+      return "Unknown RPC function";
     }
     llvm_unreachable("Unhandled error code");
   }
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index d1083072c98b..af70960a1f92 100644
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/contrib/llvm/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -14,18 +14,40 @@
 #ifndef LLVM_LIB_EXECUTIONENGINE_ORC_ORCMCJITREPLACEMENT_H
 #define LLVM_LIB_EXECUTIONENGINE_ORC_ORCMCJITREPLACEMENT_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
 #include "llvm/ExecutionEngine/Orc/IRCompileLayer.h"
 #include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
 #include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/Object/Archive.h"
+#include "llvm/Object/Binary.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <algorithm>
+#include <map>
+#include <memory>
+#include <set>
+#include <string>
+#include <vector>
 
 namespace llvm {
 namespace orc {
 
 class OrcMCJITReplacement : public ExecutionEngine {
-
   // OrcMCJITReplacement needs to do a little extra book-keeping to ensure that
   // Orc's automatic finalization doesn't kick in earlier than MCJIT clients are
   // expecting - see finalizeMemory.
@@ -111,17 +133,18 @@ class OrcMCJITReplacement : public ExecutionEngine {
     std::shared_ptr<MCJITMemoryManager> ClientMM;
   };
 
-  class LinkingResolver : public RuntimeDyld::SymbolResolver {
+  class LinkingResolver : public JITSymbolResolver {
   public:
     LinkingResolver(OrcMCJITReplacement &M) : M(M) {}
 
-    RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
-      return M.findMangledSymbol(Name);
+    JITSymbol findSymbol(const std::string &Name) override {
+      return M.ClientResolver->findSymbol(Name);
     }
 
-    RuntimeDyld::SymbolInfo
-    findSymbolInLogicalDylib(const std::string &Name) override {
-      return M.ClientResolver->findSymbol(Name);
+    JITSymbol findSymbolInLogicalDylib(const std::string &Name) override {
+      if (auto Sym = M.findMangledSymbol(Name))
+        return Sym;
+      return M.ClientResolver->findSymbolInLogicalDylib(Name);
     }
 
   private:
@@ -133,7 +156,7 @@ private:
   static ExecutionEngine *
   createOrcMCJITReplacement(std::string *ErrorMsg,
                             std::shared_ptr<MCJITMemoryManager> MemMgr,
-                            std::shared_ptr<RuntimeDyld::SymbolResolver> Resolver,
+                            std::shared_ptr<JITSymbolResolver> Resolver,
                             std::unique_ptr<TargetMachine> TM) {
     return new OrcMCJITReplacement(std::move(MemMgr), std::move(Resolver),
                                    std::move(TM));
@@ -146,7 +169,7 @@ public:
 
   OrcMCJITReplacement(
       std::shared_ptr<MCJITMemoryManager> MemMgr,
-      std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver,
+      std::shared_ptr<JITSymbolResolver> ClientResolver,
       std::unique_ptr<TargetMachine> TM)
       : ExecutionEngine(TM->createDataLayout()), TM(std::move(TM)),
         MemMgr(*this, std::move(MemMgr)), Resolver(*this),
@@ -193,7 +216,7 @@ public:
     return findSymbol(Name).getAddress();
   }
 
-  RuntimeDyld::SymbolInfo findSymbol(StringRef Name) {
+  JITSymbol findSymbol(StringRef Name) {
     return findMangledSymbol(Mangle(Name));
   }
 
@@ -242,14 +265,13 @@ public:
   }
 
 private:
-
-  RuntimeDyld::SymbolInfo findMangledSymbol(StringRef Name) {
+  JITSymbol findMangledSymbol(StringRef Name) {
     if (auto Sym = LazyEmitLayer.findSymbol(Name, false))
-      return Sym.toRuntimeDyldSymbol();
+      return Sym;
     if (auto Sym = ClientResolver->findSymbol(Name))
       return Sym;
     if (auto Sym = scanArchives(Name))
-      return Sym.toRuntimeDyldSymbol();
+      return Sym;
 
     return nullptr;
   }
@@ -305,7 +327,6 @@ private:
     }
 
   private:
-
     static const object::ObjectFile& getObject(const object::ObjectFile &Obj) {
       return Obj;
     }
@@ -322,6 +343,7 @@ private:
   class NotifyFinalizedT {
   public:
     NotifyFinalizedT(OrcMCJITReplacement &M) : M(M) {}
+
     void operator()(ObjectLinkingLayerBase::ObjSetHandleT H) {
       M.UnfinalizedSections.erase(H);
     }
@@ -346,7 +368,7 @@ private:
   std::unique_ptr<TargetMachine> TM;
   MCJITReplacementMemMgr MemMgr;
   LinkingResolver Resolver;
-  std::shared_ptr<RuntimeDyld::SymbolResolver> ClientResolver;
+  std::shared_ptr<JITSymbolResolver> ClientResolver;
   Mangler Mang;
 
   NotifyObjectLoadedT NotifyObjectLoaded;
@@ -373,7 +395,7 @@ private:
   std::vector<object::OwningBinary<object::Archive>> Archives;
 };
 
-} // End namespace orc.
-} // End namespace llvm.
+} // end namespace orc
+} // end namespace llvm
 
 #endif // LLVM_LIB_EXECUTIONENGINE_ORC_MCJITREPLACEMENT_H
diff --git a/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp b/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
deleted file mode 100644
index d1a021aee3ab..000000000000
--- a/contrib/llvm/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
+++ /dev/null
@@ -1,53 +0,0 @@
-//===------- OrcRemoteTargetRPCAPI.cpp - ORC Remote API utilities ---------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
-
-namespace llvm {
-namespace orc {
-namespace remote {
-
-#define FUNCNAME(X) \
-  case X ## Id: \
-  return #X
-
-const char *OrcRemoteTargetRPCAPI::getJITFuncIdName(JITFuncId Id) {
-  switch (Id) {
-  case InvalidId:
-    return "*** Invalid JITFuncId ***";
-  FUNCNAME(CallIntVoid);
-  FUNCNAME(CallMain);
-  FUNCNAME(CallVoidVoid);
-  FUNCNAME(CreateRemoteAllocator);
-  FUNCNAME(CreateIndirectStubsOwner);
-  FUNCNAME(DeregisterEHFrames);
-  FUNCNAME(DestroyRemoteAllocator);
-  FUNCNAME(DestroyIndirectStubsOwner);
-  FUNCNAME(EmitIndirectStubs);
-  FUNCNAME(EmitResolverBlock);
-  FUNCNAME(EmitTrampolineBlock);
-  FUNCNAME(GetSymbolAddress);
-  FUNCNAME(GetRemoteInfo);
-  FUNCNAME(ReadMem);
-  FUNCNAME(RegisterEHFrames);
-  FUNCNAME(ReserveMem);
-  FUNCNAME(RequestCompile);
-  FUNCNAME(SetProtections);
-  FUNCNAME(TerminateSession);
-  FUNCNAME(WriteMem);
-  FUNCNAME(WritePtr);
-  };
-  return nullptr;
-}
-
-#undef FUNCNAME
-
-} // end namespace remote
-} // end namespace orc
-} // end namespace llvm
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
new file mode 100644
index 000000000000..8769dcf73747
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITSymbol.cpp
@@ -0,0 +1,41 @@
+//===----------- JITSymbol.cpp - JITSymbol class implementation -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// JITSymbol class implementation plus helper functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/Object/SymbolicFile.h"
+
+using namespace llvm;
+
+JITSymbolFlags llvm::JITSymbolFlags::fromGlobalValue(const GlobalValue &GV) {
+  JITSymbolFlags Flags = JITSymbolFlags::None;
+  if (GV.hasWeakLinkage() || GV.hasLinkOnceLinkage())
+    Flags |= JITSymbolFlags::Weak;
+  if (GV.hasCommonLinkage())
+    Flags |= JITSymbolFlags::Common;
+  if (!GV.hasLocalLinkage() && !GV.hasHiddenVisibility())
+    Flags |= JITSymbolFlags::Exported;
+  return Flags;
+}
+
+JITSymbolFlags
+llvm::JITSymbolFlags::fromObjectSymbol(const object::BasicSymbolRef &Symbol) {
+  JITSymbolFlags Flags = JITSymbolFlags::None;
+  if (Symbol.getFlags() & object::BasicSymbolRef::SF_Weak)
+    Flags |= JITSymbolFlags::Weak;
+  if (Symbol.getFlags() & object::BasicSymbolRef::SF_Common)
+    Flags |= JITSymbolFlags::Common;
+  if (Symbol.getFlags() & object::BasicSymbolRef::SF_Exported)
+    Flags |= JITSymbolFlags::Exported;
+  return Flags;
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
index e39acc7ee144..de73fbde8eb7 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RTDyldMemoryManager.cpp
@@ -97,7 +97,8 @@ static const char *processFDE(const char *Entry, bool isDeregister) {
 void RTDyldMemoryManager::registerEHFramesInProcess(uint8_t *Addr,
                                                     size_t Size) {
   // On OS X OS X __register_frame takes a single FDE as an argument.
-  // See http://lists.llvm.org/pipermail/llvm-dev/2013-April/061768.html
+  // See http://lists.llvm.org/pipermail/llvm-dev/2013-April/061737.html
+  // and projects/libunwind/src/UnwindLevel1-gcc-ext.c.
   const char *P = (const char *)Addr;
   const char *End = P + Size;
   do  {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 1dfbe31f2717..63b56f725209 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -39,7 +39,7 @@ enum RuntimeDyldErrorCode {
 // deal with the Error value directly, rather than converting to error_code.
 class RuntimeDyldErrorCategory : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "runtimedyld"; }
+  const char *name() const noexcept override { return "runtimedyld"; }
 
   std::string message(int Condition) const override {
     switch (static_cast<RuntimeDyldErrorCode>(Condition)) {
@@ -205,6 +205,10 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
        ++I) {
     uint32_t Flags = I->getFlags();
 
+    // Skip undefined symbols.
+    if (Flags & SymbolRef::SF_Undefined)
+      continue;
+
     if (Flags & SymbolRef::SF_Common)
       CommonSymbols.push_back(*I);
     else {
@@ -224,11 +228,25 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
         return NameOrErr.takeError();
 
       // Compute JIT symbol flags.
-      JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
-      if (Flags & SymbolRef::SF_Weak)
-        RTDyldSymFlags |= JITSymbolFlags::Weak;
-      if (Flags & SymbolRef::SF_Exported)
-        RTDyldSymFlags |= JITSymbolFlags::Exported;
+      JITSymbolFlags JITSymFlags = JITSymbolFlags::fromObjectSymbol(*I);
+
+      // If this is a weak definition, check to see if there's a strong one.
+      // If there is, skip this symbol (we won't be providing it: the strong
+      // definition will). If there's no strong definition, make this definition
+      // strong.
+      if (JITSymFlags.isWeak()) {
+        // First check whether there's already a definition in this instance.
+        // FIXME: Override existing weak definitions with strong ones.
+        if (GlobalSymbolTable.count(Name))
+          continue;
+        // Then check the symbol resolver to see if there's a definition
+        // elsewhere in this logical dylib.
+        if (auto Sym = Resolver.findSymbolInLogicalDylib(Name))
+          if (Sym.getFlags().isStrongDefinition())
+            continue;
+        // else
+        JITSymFlags &= ~JITSymbolFlags::Weak;
+      }
 
       if (Flags & SymbolRef::SF_Absolute &&
           SymType != object::SymbolRef::ST_File) {
@@ -245,7 +263,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
                      << format("%p", (uintptr_t)Addr)
                      << " flags: " << Flags << "\n");
         GlobalSymbolTable[Name] =
-          SymbolTableEntry(SectionID, Addr, RTDyldSymFlags);
+          SymbolTableEntry(SectionID, Addr, JITSymFlags);
       } else if (SymType == object::SymbolRef::ST_Function ||
                  SymType == object::SymbolRef::ST_Data ||
                  SymType == object::SymbolRef::ST_Unknown ||
@@ -278,7 +296,7 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
                      << format("%p", (uintptr_t)SectOffset)
                      << " flags: " << Flags << "\n");
         GlobalSymbolTable[Name] =
-          SymbolTableEntry(SectionID, SectOffset, RTDyldSymFlags);
+          SymbolTableEntry(SectionID, SectOffset, JITSymFlags);
       }
     }
   }
@@ -584,13 +602,19 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
       return NameOrErr.takeError();
 
     // Skip common symbols already elsewhere.
-    if (GlobalSymbolTable.count(Name) ||
-        Resolver.findSymbolInLogicalDylib(Name)) {
+    if (GlobalSymbolTable.count(Name)) {
       DEBUG(dbgs() << "\tSkipping already emitted common symbol '" << Name
                    << "'\n");
       continue;
     }
 
+    if (auto Sym = Resolver.findSymbolInLogicalDylib(Name)) {
+      if (!Sym.getFlags().isCommon()) {
+        DEBUG(dbgs() << "\tSkipping common symbol '" << Name
+                     << "' in favor of stronger definition.\n");
+        continue;
+      }
+    }
     uint32_t Align = Sym.getAlignment();
     uint64_t Size = Sym.getCommonSize();
 
@@ -628,16 +652,11 @@ Error RuntimeDyldImpl::emitCommonSymbols(const ObjectFile &Obj,
       Addr += AlignOffset;
       Offset += AlignOffset;
     }
-    uint32_t Flags = Sym.getFlags();
-    JITSymbolFlags RTDyldSymFlags = JITSymbolFlags::None;
-    if (Flags & SymbolRef::SF_Weak)
-      RTDyldSymFlags |= JITSymbolFlags::Weak;
-    if (Flags & SymbolRef::SF_Exported)
-      RTDyldSymFlags |= JITSymbolFlags::Exported;
+    JITSymbolFlags JITSymFlags = JITSymbolFlags::fromObjectSymbol(Sym);
     DEBUG(dbgs() << "Allocating common symbol " << Name << " address "
                  << format("%p", Addr) << "\n");
     GlobalSymbolTable[Name] =
-      SymbolTableEntry(SectionID, Offset, RTDyldSymFlags);
+      SymbolTableEntry(SectionID, Offset, JITSymFlags);
     Offset += Size;
     Addr += Size;
   }
@@ -974,10 +993,10 @@ uint64_t RuntimeDyld::LoadedObjectInfo::getSectionLoadAddress(
 }
 
 void RuntimeDyld::MemoryManager::anchor() {}
-void RuntimeDyld::SymbolResolver::anchor() {}
+void JITSymbolResolver::anchor() {}
 
 RuntimeDyld::RuntimeDyld(RuntimeDyld::MemoryManager &MemMgr,
-                         RuntimeDyld::SymbolResolver &Resolver)
+                         JITSymbolResolver &Resolver)
     : MemMgr(MemMgr), Resolver(Resolver) {
   // FIXME: There's a potential issue lurking here if a single instance of
   // RuntimeDyld is used to load multiple objects.  The current implementation
@@ -994,8 +1013,8 @@ RuntimeDyld::~RuntimeDyld() {}
 
 static std::unique_ptr<RuntimeDyldCOFF>
 createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
-                      RuntimeDyld::SymbolResolver &Resolver,
-                      bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
+                      JITSymbolResolver &Resolver, bool ProcessAllSections,
+                      RuntimeDyldCheckerImpl *Checker) {
   std::unique_ptr<RuntimeDyldCOFF> Dyld =
     RuntimeDyldCOFF::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
@@ -1004,10 +1023,11 @@ createRuntimeDyldCOFF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
 }
 
 static std::unique_ptr<RuntimeDyldELF>
-createRuntimeDyldELF(RuntimeDyld::MemoryManager &MM,
-                     RuntimeDyld::SymbolResolver &Resolver,
-                     bool ProcessAllSections, RuntimeDyldCheckerImpl *Checker) {
-  std::unique_ptr<RuntimeDyldELF> Dyld(new RuntimeDyldELF(MM, Resolver));
+createRuntimeDyldELF(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
+                     JITSymbolResolver &Resolver, bool ProcessAllSections,
+                     RuntimeDyldCheckerImpl *Checker) {
+  std::unique_ptr<RuntimeDyldELF> Dyld =
+      RuntimeDyldELF::create(Arch, MM, Resolver);
   Dyld->setProcessAllSections(ProcessAllSections);
   Dyld->setRuntimeDyldChecker(Checker);
   return Dyld;
@@ -1015,7 +1035,7 @@ createRuntimeDyldELF(RuntimeDyld::MemoryManager &MM,
 
 static std::unique_ptr<RuntimeDyldMachO>
 createRuntimeDyldMachO(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MM,
-                       RuntimeDyld::SymbolResolver &Resolver,
+                       JITSymbolResolver &Resolver,
                        bool ProcessAllSections,
                        RuntimeDyldCheckerImpl *Checker) {
   std::unique_ptr<RuntimeDyldMachO> Dyld =
@@ -1029,7 +1049,9 @@ std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyld::loadObject(const ObjectFile &Obj) {
   if (!Dyld) {
     if (Obj.isELF())
-      Dyld = createRuntimeDyldELF(MemMgr, Resolver, ProcessAllSections, Checker);
+      Dyld =
+          createRuntimeDyldELF(static_cast<Triple::ArchType>(Obj.getArch()),
+                               MemMgr, Resolver, ProcessAllSections, Checker);
     else if (Obj.isMachO())
       Dyld = createRuntimeDyldMachO(
                static_cast<Triple::ArchType>(Obj.getArch()), MemMgr, Resolver,
@@ -1056,7 +1078,7 @@ void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
   return Dyld->getSymbolLocalAddress(Name);
 }
 
-RuntimeDyld::SymbolInfo RuntimeDyld::getSymbol(StringRef Name) const {
+JITEvaluatedSymbol RuntimeDyld::getSymbol(StringRef Name) const {
   if (!Dyld)
     return nullptr;
   return Dyld->getSymbol(Name);
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
index 24bd9a002c20..1bd28ef37ed1 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.cpp
@@ -44,7 +44,7 @@ namespace llvm {
 std::unique_ptr<RuntimeDyldCOFF>
 llvm::RuntimeDyldCOFF::create(Triple::ArchType Arch,
                               RuntimeDyld::MemoryManager &MemMgr,
-                              RuntimeDyld::SymbolResolver &Resolver) {
+                              JITSymbolResolver &Resolver) {
   switch (Arch) {
   default: llvm_unreachable("Unsupported target for RuntimeDyldCOFF.");
   case Triple::x86:
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
index 03a91f6cf690..729a358fa0ea 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldCOFF.h
@@ -33,11 +33,11 @@ public:
 
   static std::unique_ptr<RuntimeDyldCOFF>
   create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr,
-         RuntimeDyld::SymbolResolver &Resolver);
+         JITSymbolResolver &Resolver);
 
 protected:
   RuntimeDyldCOFF(RuntimeDyld::MemoryManager &MemMgr,
-                  RuntimeDyld::SymbolResolver &Resolver)
+                  JITSymbolResolver &Resolver)
     : RuntimeDyldImpl(MemMgr, Resolver) {}
   uint64_t getSymbolOffset(const SymbolRef &Sym);
 };
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
index 090b9a3857e3..7bfa79445584 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp
@@ -217,7 +217,7 @@ private:
   // given symbol and get the value of the requested operand.
   // Returns an error if the instruction cannot be decoded, or the requested
   // operand is not an immediate.
-  // On success, retuns a pair containing the value of the operand, plus
+  // On success, returns a pair containing the value of the operand, plus
   // the expression remaining to be evaluated.
   std::pair<EvalResult, StringRef> evalDecodeOperand(StringRef Expr) const {
     if (!Expr.startswith("("))
@@ -463,7 +463,7 @@ private:
                           Expr.substr(FirstNonDigit));
   }
 
-  // Evaluate a constant numeric expression (hexidecimal or decimal) and
+  // Evaluate a constant numeric expression (hexadecimal or decimal) and
   // return a pair containing the result, and the expression remaining to be
   // evaluated.
   std::pair<EvalResult, StringRef> evalNumberExpr(StringRef Expr) const {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index 9cbdb13a3572..a5a30fab5b69 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -13,6 +13,7 @@
 
 #include "RuntimeDyldELF.h"
 #include "RuntimeDyldCheckerImpl.h"
+#include "Targets/RuntimeDyldELFMips.h"
 #include "llvm/ADT/IntervalMap.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -27,9 +28,34 @@
 
 using namespace llvm;
 using namespace llvm::object;
+using namespace llvm::support::endian;
 
 #define DEBUG_TYPE "dyld"
 
+static void or32le(void *P, int32_t V) { write32le(P, read32le(P) | V); }
+
+static void or32AArch64Imm(void *L, uint64_t Imm) {
+  or32le(L, (Imm & 0xFFF) << 10);
+}
+
+template <class T> static void write(bool isBE, void *P, T V) {
+  isBE ? write<T, support::big>(P, V) : write<T, support::little>(P, V);
+}
+
+static void write32AArch64Addr(void *L, uint64_t Imm) {
+  uint32_t ImmLo = (Imm & 0x3) << 29;
+  uint32_t ImmHi = (Imm & 0x1FFFFC) << 3;
+  uint64_t Mask = (0x3 << 29) | (0x1FFFFC << 3);
+  write32le(L, (read32le(L) & ~Mask) | ImmLo | ImmHi);
+}
+
+// Return the bits [Start, End] from Val shifted Start bits.
+// For instance, getBits(0xF0, 4, 8) returns 0xF.
+static uint64_t getBits(uint64_t Val, int Start, int End) {
+  uint64_t Mask = ((uint64_t)1 << (End + 1 - Start)) - 1;
+  return (Val >> Start) & Mask;
+}
+
 namespace {
 
 template <class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> {
@@ -184,7 +210,7 @@ LoadedELFObjectInfo::getObjectForDebug(const ObjectFile &Obj) const {
 namespace llvm {
 
 RuntimeDyldELF::RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
-                               RuntimeDyld::SymbolResolver &Resolver)
+                               JITSymbolResolver &Resolver)
     : RuntimeDyldImpl(MemMgr, Resolver), GOTSectionID(0), CurrentGOTIndex(0) {}
 RuntimeDyldELF::~RuntimeDyldELF() {}
 
@@ -211,6 +237,21 @@ void RuntimeDyldELF::deregisterEHFrames() {
   RegisteredEHFrameSections.clear();
 }
 
+std::unique_ptr<RuntimeDyldELF>
+llvm::RuntimeDyldELF::create(Triple::ArchType Arch,
+                             RuntimeDyld::MemoryManager &MemMgr,
+                             JITSymbolResolver &Resolver) {
+  switch (Arch) {
+  default:
+    return make_unique<RuntimeDyldELF>(MemMgr, Resolver);
+  case Triple::mips:
+  case Triple::mipsel:
+  case Triple::mips64:
+  case Triple::mips64el:
+    return make_unique<RuntimeDyldELFMips>(MemMgr, Resolver);
+  }
+}
+
 std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
 RuntimeDyldELF::loadObject(const object::ObjectFile &O) {
   if (auto ObjSectionToIDOrErr = loadObjectImpl(O))
@@ -309,6 +350,8 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
   uint32_t *TargetPtr =
       reinterpret_cast<uint32_t *>(Section.getAddressWithOffset(Offset));
   uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+  // Data should use target endian. Code should always use little endian.
+  bool isBE = Arch == Triple::aarch64_be;
 
   DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
                << format("%llx", Section.getAddressWithOffset(Offset))
@@ -321,17 +364,14 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
   default:
     llvm_unreachable("Relocation type not implemented yet!");
     break;
-  case ELF::R_AARCH64_ABS64: {
-    uint64_t *TargetPtr =
-        reinterpret_cast<uint64_t *>(Section.getAddressWithOffset(Offset));
-    *TargetPtr = Value + Addend;
+  case ELF::R_AARCH64_ABS64:
+    write(isBE, TargetPtr, Value + Addend);
     break;
-  }
   case ELF::R_AARCH64_PREL32: {
     uint64_t Result = Value + Addend - FinalAddress;
     assert(static_cast<int64_t>(Result) >= INT32_MIN &&
            static_cast<int64_t>(Result) <= UINT32_MAX);
-    *TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU);
+    write(isBE, TargetPtr, static_cast<uint32_t>(Result & 0xffffffffU));
     break;
   }
   case ELF::R_AARCH64_CALL26: // fallthrough
@@ -342,62 +382,21 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
 
     // "Check that -2^27 <= result < 2^27".
     assert(isInt<28>(BranchImm));
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xfc000000U;
-    // Immediate goes in bits 25:0 of B and BL.
-    *TargetPtr |= static_cast<uint32_t>(BranchImm & 0xffffffcU) >> 2;
+    or32le(TargetPtr, (BranchImm & 0x0FFFFFFC) >> 2);
     break;
   }
-  case ELF::R_AARCH64_MOVW_UABS_G3: {
-    uint64_t Result = Value + Addend;
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xffe0001fU;
-    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
-    *TargetPtr |= Result >> (48 - 5);
-    // Shift must be "lsl #48", in bits 22:21
-    assert((*TargetPtr >> 21 & 0x3) == 3 && "invalid shift for relocation");
+  case ELF::R_AARCH64_MOVW_UABS_G3:
+    or32le(TargetPtr, ((Value + Addend) & 0xFFFF000000000000) >> 43);
     break;
-  }
-  case ELF::R_AARCH64_MOVW_UABS_G2_NC: {
-    uint64_t Result = Value + Addend;
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xffe0001fU;
-    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
-    *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5));
-    // Shift must be "lsl #32", in bits 22:21
-    assert((*TargetPtr >> 21 & 0x3) == 2 && "invalid shift for relocation");
+  case ELF::R_AARCH64_MOVW_UABS_G2_NC:
+    or32le(TargetPtr, ((Value + Addend) & 0xFFFF00000000) >> 27);
     break;
-  }
-  case ELF::R_AARCH64_MOVW_UABS_G1_NC: {
-    uint64_t Result = Value + Addend;
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xffe0001fU;
-    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
-    *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5));
-    // Shift must be "lsl #16", in bits 22:2
-    assert((*TargetPtr >> 21 & 0x3) == 1 && "invalid shift for relocation");
+  case ELF::R_AARCH64_MOVW_UABS_G1_NC:
+    or32le(TargetPtr, ((Value + Addend) & 0xFFFF0000) >> 11);
     break;
-  }
-  case ELF::R_AARCH64_MOVW_UABS_G0_NC: {
-    uint64_t Result = Value + Addend;
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xffe0001fU;
-    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
-    *TargetPtr |= ((Result & 0xffffU) << 5);
-    // Shift must be "lsl #0", in bits 22:21.
-    assert((*TargetPtr >> 21 & 0x3) == 0 && "invalid shift for relocation");
+  case ELF::R_AARCH64_MOVW_UABS_G0_NC:
+    or32le(TargetPtr, ((Value + Addend) & 0xFFFF) << 5);
     break;
-  }
   case ELF::R_AARCH64_ADR_PREL_PG_HI21: {
     // Operation: Page(S+A) - Page(P)
     uint64_t Result =
@@ -406,40 +405,30 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
     // Check that -2^32 <= X < 2^32
     assert(isInt<33>(Result) && "overflow check failed for relocation");
 
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0x9f00001fU;
     // Immediate goes in bits 30:29 + 5:23 of ADRP instruction, taken
     // from bits 32:12 of X.
-    *TargetPtr |= ((Result & 0x3000U) << (29 - 12));
-    *TargetPtr |= ((Result & 0x1ffffc000ULL) >> (14 - 5));
+    write32AArch64Addr(TargetPtr, Result >> 12);
     break;
   }
-  case ELF::R_AARCH64_LDST32_ABS_LO12_NC: {
+  case ELF::R_AARCH64_ADD_ABS_LO12_NC:
+    // Operation: S + A
+    // Immediate goes in bits 21:10 of LD/ST instruction, taken
+    // from bits 11:0 of X
+    or32AArch64Imm(TargetPtr, Value + Addend);
+    break;
+  case ELF::R_AARCH64_LDST32_ABS_LO12_NC:
     // Operation: S + A
-    uint64_t Result = Value + Addend;
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xffc003ffU;
     // Immediate goes in bits 21:10 of LD/ST instruction, taken
     // from bits 11:2 of X
-    *TargetPtr |= ((Result & 0xffc) << (10 - 2));
+    or32AArch64Imm(TargetPtr, getBits(Value + Addend, 2, 11));
     break;
-  }
-  case ELF::R_AARCH64_LDST64_ABS_LO12_NC: {
+  case ELF::R_AARCH64_LDST64_ABS_LO12_NC:
     // Operation: S + A
-    uint64_t Result = Value + Addend;
-
-    // AArch64 code is emitted with .rela relocations. The data already in any
-    // bits affected by the relocation on entry is garbage.
-    *TargetPtr &= 0xffc003ffU;
     // Immediate goes in bits 21:10 of LD/ST instruction, taken
     // from bits 11:3 of X
-    *TargetPtr |= ((Result & 0xff8) << (10 - 3));
+    or32AArch64Imm(TargetPtr, getBits(Value + Addend, 3, 11));
     break;
   }
-  }
 }
 
 void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
@@ -463,10 +452,15 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
 
   case ELF::R_ARM_NONE:
     break;
+    // Write a 31bit signed offset
   case ELF::R_ARM_PREL31:
+    support::ulittle32_t::ref{TargetPtr} =
+        (support::ulittle32_t::ref{TargetPtr} & 0x80000000) |
+        ((Value - FinalAddress) & ~0x80000000);
+    break;
   case ELF::R_ARM_TARGET1:
   case ELF::R_ARM_ABS32:
-    *TargetPtr = Value;
+    support::ulittle32_t::ref{TargetPtr} = Value;
     break;
     // Write first 16 bit of 32 bit value to the mov instruction.
     // Last 4 bit should be shifted.
@@ -476,9 +470,9 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
       Value = Value & 0xFFFF;
     else if (Type == ELF::R_ARM_MOVT_ABS)
       Value = (Value >> 16) & 0xFFFF;
-    *TargetPtr &= ~0x000F0FFF;
-    *TargetPtr |= Value & 0xFFF;
-    *TargetPtr |= ((Value >> 12) & 0xF) << 16;
+    support::ulittle32_t::ref{TargetPtr} =
+        (support::ulittle32_t::ref{TargetPtr} & ~0x000F0FFF) | (Value & 0xFFF) |
+        (((Value >> 12) & 0xF) << 16);
     break;
     // Write 24 bit relative value to the branch instruction.
   case ELF::R_ARM_PC24: // Fall through.
@@ -486,298 +480,26 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
   case ELF::R_ARM_JUMP24:
     int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
     RelValue = (RelValue & 0x03FFFFFC) >> 2;
-    assert((*TargetPtr & 0xFFFFFF) == 0xFFFFFE);
-    *TargetPtr &= 0xFF000000;
-    *TargetPtr |= RelValue;
+    assert((support::ulittle32_t::ref{TargetPtr} & 0xFFFFFF) == 0xFFFFFE);
+    support::ulittle32_t::ref{TargetPtr} =
+        (support::ulittle32_t::ref{TargetPtr} & 0xFF000000) | RelValue;
     break;
   }
 }
 
-void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
-                                           uint64_t Offset, uint32_t Value,
-                                           uint32_t Type, int32_t Addend) {
-  uint8_t *TargetPtr = Section.getAddressWithOffset(Offset);
-  Value += Addend;
-
-  DEBUG(dbgs() << "resolveMIPSRelocation, LocalAddress: "
-               << Section.getAddressWithOffset(Offset) << " FinalAddress: "
-               << format("%p", Section.getLoadAddressWithOffset(Offset))
-               << " Value: " << format("%x", Value)
-               << " Type: " << format("%x", Type)
-               << " Addend: " << format("%x", Addend) << "\n");
-
-  uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
-
-  switch (Type) {
-  default:
-    llvm_unreachable("Not implemented relocation type!");
-    break;
-  case ELF::R_MIPS_32:
-    writeBytesUnaligned(Value, TargetPtr, 4);
-    break;
-  case ELF::R_MIPS_26:
-    Insn &= 0xfc000000;
-    Insn |= (Value & 0x0fffffff) >> 2;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  case ELF::R_MIPS_HI16:
-    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
-    Insn &= 0xffff0000;
-    Insn |= ((Value + 0x8000) >> 16) & 0xffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  case ELF::R_MIPS_LO16:
-    Insn &= 0xffff0000;
-    Insn |= Value & 0xffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  case ELF::R_MIPS_PC32: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    writeBytesUnaligned(Value - FinalAddress, (uint8_t *)TargetPtr, 4);
-    break;
-  }
-  case ELF::R_MIPS_PC16: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    Insn &= 0xffff0000;
-    Insn |= ((Value - FinalAddress) >> 2) & 0xffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  }
-  case ELF::R_MIPS_PC19_S2: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    Insn &= 0xfff80000;
-    Insn |= ((Value - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  }
-  case ELF::R_MIPS_PC21_S2: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    Insn &= 0xffe00000;
-    Insn |= ((Value - FinalAddress) >> 2) & 0x1fffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  }
-  case ELF::R_MIPS_PC26_S2: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    Insn &= 0xfc000000;
-    Insn |= ((Value - FinalAddress) >> 2) & 0x3ffffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  }
-  case ELF::R_MIPS_PCHI16: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    Insn &= 0xffff0000;
-    Insn |= ((Value - FinalAddress + 0x8000) >> 16) & 0xffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  }
-  case ELF::R_MIPS_PCLO16: {
-    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    Insn &= 0xffff0000;
-    Insn |= (Value - FinalAddress) & 0xffff;
-    writeBytesUnaligned(Insn, TargetPtr, 4);
-    break;
-  }
-  }
-}
-
 void RuntimeDyldELF::setMipsABI(const ObjectFile &Obj) {
   if (Arch == Triple::UnknownArch ||
       !StringRef(Triple::getArchTypePrefix(Arch)).equals("mips")) {
     IsMipsO32ABI = false;
+    IsMipsN32ABI = false;
     IsMipsN64ABI = false;
     return;
   }
   unsigned AbiVariant;
   Obj.getPlatformFlags(AbiVariant);
   IsMipsO32ABI = AbiVariant & ELF::EF_MIPS_ABI_O32;
+  IsMipsN32ABI = AbiVariant & ELF::EF_MIPS_ABI2;
   IsMipsN64ABI = Obj.getFileFormatName().equals("ELF64-mips");
-  if (AbiVariant & ELF::EF_MIPS_ABI2)
-    llvm_unreachable("Mips N32 ABI is not supported yet");
-}
-
-void RuntimeDyldELF::resolveMIPS64Relocation(const SectionEntry &Section,
-                                             uint64_t Offset, uint64_t Value,
-                                             uint32_t Type, int64_t Addend,
-                                             uint64_t SymOffset,
-                                             SID SectionID) {
-  uint32_t r_type = Type & 0xff;
-  uint32_t r_type2 = (Type >> 8) & 0xff;
-  uint32_t r_type3 = (Type >> 16) & 0xff;
-
-  // RelType is used to keep information for which relocation type we are
-  // applying relocation.
-  uint32_t RelType = r_type;
-  int64_t CalculatedValue = evaluateMIPS64Relocation(Section, Offset, Value,
-                                                     RelType, Addend,
-                                                     SymOffset, SectionID);
-  if (r_type2 != ELF::R_MIPS_NONE) {
-    RelType = r_type2;
-    CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType,
-                                               CalculatedValue, SymOffset,
-                                               SectionID);
-  }
-  if (r_type3 != ELF::R_MIPS_NONE) {
-    RelType = r_type3;
-    CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType,
-                                               CalculatedValue, SymOffset,
-                                               SectionID);
-  }
-  applyMIPS64Relocation(Section.getAddressWithOffset(Offset), CalculatedValue,
-                        RelType);
-}
-
-int64_t
-RuntimeDyldELF::evaluateMIPS64Relocation(const SectionEntry &Section,
-                                         uint64_t Offset, uint64_t Value,
-                                         uint32_t Type, int64_t Addend,
-                                         uint64_t SymOffset, SID SectionID) {
-
-  DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x"
-               << format("%llx", Section.getAddressWithOffset(Offset))
-               << " FinalAddress: 0x"
-               << format("%llx", Section.getLoadAddressWithOffset(Offset))
-               << " Value: 0x" << format("%llx", Value) << " Type: 0x"
-               << format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
-               << " SymOffset: " << format("%x", SymOffset) << "\n");
-
-  switch (Type) {
-  default:
-    llvm_unreachable("Not implemented relocation type!");
-    break;
-  case ELF::R_MIPS_JALR:
-  case ELF::R_MIPS_NONE:
-    break;
-  case ELF::R_MIPS_32:
-  case ELF::R_MIPS_64:
-    return Value + Addend;
-  case ELF::R_MIPS_26:
-    return ((Value + Addend) >> 2) & 0x3ffffff;
-  case ELF::R_MIPS_GPREL16: {
-    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
-    return Value + Addend - (GOTAddr + 0x7ff0);
-  }
-  case ELF::R_MIPS_SUB:
-    return Value - Addend;
-  case ELF::R_MIPS_HI16:
-    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
-    return ((Value + Addend + 0x8000) >> 16) & 0xffff;
-  case ELF::R_MIPS_LO16:
-    return (Value + Addend) & 0xffff;
-  case ELF::R_MIPS_CALL16:
-  case ELF::R_MIPS_GOT_DISP:
-  case ELF::R_MIPS_GOT_PAGE: {
-    uint8_t *LocalGOTAddr =
-        getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
-    uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, 8);
-
-    Value += Addend;
-    if (Type == ELF::R_MIPS_GOT_PAGE)
-      Value = (Value + 0x8000) & ~0xffff;
-
-    if (GOTEntry)
-      assert(GOTEntry == Value &&
-                   "GOT entry has two different addresses.");
-    else
-      writeBytesUnaligned(Value, LocalGOTAddr, 8);
-
-    return (SymOffset - 0x7ff0) & 0xffff;
-  }
-  case ELF::R_MIPS_GOT_OFST: {
-    int64_t page = (Value + Addend + 0x8000) & ~0xffff;
-    return (Value + Addend - page) & 0xffff;
-  }
-  case ELF::R_MIPS_GPREL32: {
-    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
-    return Value + Addend - (GOTAddr + 0x7ff0);
-  }
-  case ELF::R_MIPS_PC16: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return ((Value + Addend - FinalAddress) >> 2) & 0xffff;
-  }
-  case ELF::R_MIPS_PC32: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return Value + Addend - FinalAddress;
-  }
-  case ELF::R_MIPS_PC18_S3: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return ((Value + Addend - (FinalAddress & ~0x7)) >> 3) & 0x3ffff;
-  }
-  case ELF::R_MIPS_PC19_S2: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return ((Value + Addend - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
-  }
-  case ELF::R_MIPS_PC21_S2: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff;
-  }
-  case ELF::R_MIPS_PC26_S2: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff;
-  }
-  case ELF::R_MIPS_PCHI16: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff;
-  }
-  case ELF::R_MIPS_PCLO16: {
-    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
-    return (Value + Addend - FinalAddress) & 0xffff;
-  }
-  }
-  return 0;
-}
-
-void RuntimeDyldELF::applyMIPS64Relocation(uint8_t *TargetPtr,
-                                           int64_t CalculatedValue,
-                                           uint32_t Type) {
-  uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
-
-  switch (Type) {
-    default:
-      break;
-    case ELF::R_MIPS_32:
-    case ELF::R_MIPS_GPREL32:
-    case ELF::R_MIPS_PC32:
-      writeBytesUnaligned(CalculatedValue & 0xffffffff, TargetPtr, 4);
-      break;
-    case ELF::R_MIPS_64:
-    case ELF::R_MIPS_SUB:
-      writeBytesUnaligned(CalculatedValue, TargetPtr, 8);
-      break;
-    case ELF::R_MIPS_26:
-    case ELF::R_MIPS_PC26_S2:
-      Insn = (Insn & 0xfc000000) | CalculatedValue;
-      writeBytesUnaligned(Insn, TargetPtr, 4);
-      break;
-    case ELF::R_MIPS_GPREL16:
-      Insn = (Insn & 0xffff0000) | (CalculatedValue & 0xffff);
-      writeBytesUnaligned(Insn, TargetPtr, 4);
-      break;
-    case ELF::R_MIPS_HI16:
-    case ELF::R_MIPS_LO16:
-    case ELF::R_MIPS_PCHI16:
-    case ELF::R_MIPS_PCLO16:
-    case ELF::R_MIPS_PC16:
-    case ELF::R_MIPS_CALL16:
-    case ELF::R_MIPS_GOT_DISP:
-    case ELF::R_MIPS_GOT_PAGE:
-    case ELF::R_MIPS_GOT_OFST:
-      Insn = (Insn & 0xffff0000) | CalculatedValue;
-      writeBytesUnaligned(Insn, TargetPtr, 4);
-      break;
-    case ELF::R_MIPS_PC18_S3:
-      Insn = (Insn & 0xfffc0000) | CalculatedValue;
-      writeBytesUnaligned(Insn, TargetPtr, 4);
-      break;
-    case ELF::R_MIPS_PC19_S2:
-      Insn = (Insn & 0xfff80000) | CalculatedValue;
-      writeBytesUnaligned(Insn, TargetPtr, 4);
-      break;
-    case ELF::R_MIPS_PC21_S2:
-      Insn = (Insn & 0xffe00000) | CalculatedValue;
-      writeBytesUnaligned(Insn, TargetPtr, 4);
-      break;
-    }
 }
 
 // Return the .TOC. section and offset.
@@ -1124,19 +846,6 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
     resolveARMRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL), Type,
                          (uint32_t)(Addend & 0xffffffffL));
     break;
-  case Triple::mips: // Fall through.
-  case Triple::mipsel:
-  case Triple::mips64:
-  case Triple::mips64el:
-    if (IsMipsO32ABI)
-      resolveMIPSRelocation(Section, Offset, (uint32_t)(Value & 0xffffffffL),
-                            Type, (uint32_t)(Addend & 0xffffffffL));
-    else if (IsMipsN64ABI)
-      resolveMIPS64Relocation(Section, Offset, Value, Type, Addend, SymOffset,
-                              SectionID);
-    else
-      llvm_unreachable("Mips ABI not handled");
-    break;
   case Triple::ppc:
     resolvePPC32Relocation(Section, Offset, Value, Type, Addend);
     break;
@@ -1258,6 +967,7 @@ RuntimeDyldELF::processRelocationRef(
       break;
     }
     case SymbolRef::ST_Data:
+    case SymbolRef::ST_Function:
     case SymbolRef::ST_Unknown: {
       Value.SymbolName = TargetName.data();
       Value.Addend = Addend;
@@ -1468,7 +1178,7 @@ RuntimeDyldELF::processRelocationRef(
         Value.Addend += SignExtend32<28>((Opcode & 0x03ffffff) << 2);
       processSimpleRelocation(SectionID, Offset, RelType, Value);
     }
-  } else if (IsMipsN64ABI) {
+  } else if (IsMipsN32ABI || IsMipsN64ABI) {
     uint32_t r_type = RelType & 0xff;
     RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
     if (r_type == ELF::R_MIPS_CALL16 || r_type == ELF::R_MIPS_GOT_PAGE
@@ -1805,7 +1515,7 @@ size_t RuntimeDyldELF::getGOTEntrySize() {
   case Triple::mipsel:
   case Triple::mips64:
   case Triple::mips64el:
-    if (IsMipsO32ABI)
+    if (IsMipsO32ABI || IsMipsN32ABI)
       Result = sizeof(uint32_t);
     else if (IsMipsN64ABI)
       Result = sizeof(uint64_t);
@@ -1870,7 +1580,7 @@ Error RuntimeDyldELF::finalizeLoad(const ObjectFile &Obj,
     // For now, initialize all GOT entries to zero.  We'll fill them in as
     // needed when GOT-based relocations are applied.
     memset(Addr, 0, TotalSize);
-    if (IsMipsN64ABI) {
+    if (IsMipsN32ABI || IsMipsN64ABI) {
       // To correctly resolve Mips GOT relocations, we need a mapping from
       // object's sections to GOTs.
       for (section_iterator SI = Obj.section_begin(), SE = Obj.section_end();
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 82931b9f45a6..796127ab92bd 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -43,9 +43,6 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   void resolveARMRelocation(const SectionEntry &Section, uint64_t Offset,
                             uint32_t Value, uint32_t Type, int32_t Addend);
 
-  void resolveMIPSRelocation(const SectionEntry &Section, uint64_t Offset,
-                             uint32_t Value, uint32_t Type, int32_t Addend);
-
   void resolvePPC32Relocation(const SectionEntry &Section, uint64_t Offset,
                               uint64_t Value, uint32_t Type, int64_t Addend);
 
@@ -55,18 +52,6 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   void resolveSystemZRelocation(const SectionEntry &Section, uint64_t Offset,
                                 uint64_t Value, uint32_t Type, int64_t Addend);
 
-  void resolveMIPS64Relocation(const SectionEntry &Section, uint64_t Offset,
-                               uint64_t Value, uint32_t Type, int64_t Addend,
-                               uint64_t SymOffset, SID SectionID);
-
-  int64_t evaluateMIPS64Relocation(const SectionEntry &Section,
-                                   uint64_t Offset, uint64_t Value,
-                                   uint32_t Type,  int64_t Addend,
-                                   uint64_t SymOffset, SID SectionID);
-
-  void applyMIPS64Relocation(uint8_t *TargetPtr, int64_t CalculatedValue,
-                             uint32_t Type);
-
   unsigned getMaxStubSize() override {
     if (Arch == Triple::aarch64 || Arch == Triple::aarch64_be)
       return 20; // movz; movk; movk; movk; br
@@ -99,9 +84,10 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   Error findOPDEntrySection(const ELFObjectFileBase &Obj,
                             ObjSectionToIDMap &LocalSections,
                             RelocationValueRef &Rel);
-
+protected:
   size_t getGOTEntrySize();
 
+private:
   SectionEntry &getSection(unsigned SectionID) { return Sections[SectionID]; }
 
   // Allocate no GOT entries for use in the given section.
@@ -138,10 +124,12 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
   // that consume more than one slot)
   unsigned CurrentGOTIndex;
 
+protected:
   // A map from section to a GOT section that has entries for section's GOT
   // relocations. (Mips64 specific)
   DenseMap<SID, SID> SectionToGOTMap;
 
+private:
   // A map to avoid duplicate got entries (Mips64 specific)
   StringMap<uint64_t> GOTSymbolOffsets;
 
@@ -159,9 +147,13 @@ class RuntimeDyldELF : public RuntimeDyldImpl {
 
 public:
   RuntimeDyldELF(RuntimeDyld::MemoryManager &MemMgr,
-                 RuntimeDyld::SymbolResolver &Resolver);
+                 JITSymbolResolver &Resolver);
   ~RuntimeDyldELF() override;
 
+  static std::unique_ptr<RuntimeDyldELF>
+  create(Triple::ArchType Arch, RuntimeDyld::MemoryManager &MemMgr,
+         JITSymbolResolver &Resolver);
+
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
 
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index 76bd3fc295b8..279d0de2da76 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -149,26 +149,41 @@ public:
   /// The size of this relocation (MachO specific).
   unsigned Size;
 
+  // COFF specific.
+  bool IsTargetThumbFunc;
+
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
       : SectionID(id), Offset(offset), RelType(type), Addend(addend),
-        SymOffset(0), IsPCRel(false), Size(0) {}
+        SymOffset(0), IsPCRel(false), Size(0), IsTargetThumbFunc(false) {}
 
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
                   uint64_t symoffset)
       : SectionID(id), Offset(offset), RelType(type), Addend(addend),
-        SymOffset(symoffset), IsPCRel(false), Size(0) {}
+        SymOffset(symoffset), IsPCRel(false), Size(0),
+        IsTargetThumbFunc(false) {}
 
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
                   bool IsPCRel, unsigned Size)
       : SectionID(id), Offset(offset), RelType(type), Addend(addend),
-        SymOffset(0), IsPCRel(IsPCRel), Size(Size) {}
+        SymOffset(0), IsPCRel(IsPCRel), Size(Size), IsTargetThumbFunc(false) {}
 
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
                   unsigned SectionA, uint64_t SectionAOffset, unsigned SectionB,
                   uint64_t SectionBOffset, bool IsPCRel, unsigned Size)
       : SectionID(id), Offset(offset), RelType(type),
         Addend(SectionAOffset - SectionBOffset + addend), IsPCRel(IsPCRel),
-        Size(Size) {
+        Size(Size), IsTargetThumbFunc(false) {
+    Sections.SectionA = SectionA;
+    Sections.SectionB = SectionB;
+  }
+
+  RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+                  unsigned SectionA, uint64_t SectionAOffset, unsigned SectionB,
+                  uint64_t SectionBOffset, bool IsPCRel, unsigned Size,
+                  bool IsTargetThumbFunc)
+      : SectionID(id), Offset(offset), RelType(type),
+        Addend(SectionAOffset - SectionBOffset + addend), IsPCRel(IsPCRel),
+        Size(Size), IsTargetThumbFunc(IsTargetThumbFunc) {
     Sections.SectionA = SectionA;
     Sections.SectionB = SectionB;
   }
@@ -199,20 +214,23 @@ public:
 };
 
 /// @brief Symbol info for RuntimeDyld. 
-class SymbolTableEntry : public JITSymbolBase {
+class SymbolTableEntry {
 public:
   SymbolTableEntry()
-    : JITSymbolBase(JITSymbolFlags::None), Offset(0), SectionID(0) {}
+      : Offset(0), SectionID(0) {}
 
   SymbolTableEntry(unsigned SectionID, uint64_t Offset, JITSymbolFlags Flags)
-    : JITSymbolBase(Flags), Offset(Offset), SectionID(SectionID) {}
+      : Offset(Offset), SectionID(SectionID), Flags(Flags) {}
 
   unsigned getSectionID() const { return SectionID; }
   uint64_t getOffset() const { return Offset; }
 
+  JITSymbolFlags getFlags() const { return Flags; }
+
 private:
   uint64_t Offset;
   unsigned SectionID;
+  JITSymbolFlags Flags;
 };
 
 typedef StringMap<SymbolTableEntry> RTDyldSymbolTable;
@@ -227,7 +245,7 @@ protected:
   RuntimeDyld::MemoryManager &MemMgr;
 
   // The symbol resolver to use for external symbols.
-  RuntimeDyld::SymbolResolver &Resolver;
+  JITSymbolResolver &Resolver;
 
   // Attached RuntimeDyldChecker instance. Null if no instance attached.
   RuntimeDyldCheckerImpl *Checker;
@@ -272,6 +290,7 @@ protected:
   Triple::ArchType Arch;
   bool IsTargetLittleEndian;
   bool IsMipsO32ABI;
+  bool IsMipsN32ABI;
   bool IsMipsN64ABI;
 
   // True if all sections should be passed to the memory manager, false if only
@@ -335,6 +354,7 @@ protected:
 
   virtual void setMipsABI(const ObjectFile &Obj) {
     IsMipsO32ABI = false;
+    IsMipsN32ABI = false;
     IsMipsN64ABI = false;
   }
 
@@ -420,7 +440,7 @@ protected:
 
 public:
   RuntimeDyldImpl(RuntimeDyld::MemoryManager &MemMgr,
-                  RuntimeDyld::SymbolResolver &Resolver)
+                  JITSymbolResolver &Resolver)
     : MemMgr(MemMgr), Resolver(Resolver), Checker(nullptr),
       ProcessAllSections(false), HasError(false) {
   }
@@ -451,7 +471,7 @@ public:
     return getSectionAddress(SymInfo.getSectionID()) + SymInfo.getOffset();
   }
 
-  RuntimeDyld::SymbolInfo getSymbol(StringRef Name) const {
+  JITEvaluatedSymbol getSymbol(StringRef Name) const {
     // FIXME: Just look up as a function for now. Overly simple of course.
     // Work in progress.
     RTDyldSymbolTable::const_iterator pos = GlobalSymbolTable.find(Name);
@@ -462,7 +482,7 @@ public:
     if (SymEntry.getSectionID() != AbsoluteSymbolSection)
       SectionAddr = getSectionLoadAddress(SymEntry.getSectionID());
     uint64_t TargetAddr = SectionAddr + SymEntry.getOffset();
-    return RuntimeDyld::SymbolInfo(TargetAddr, SymEntry.getFlags());
+    return JITEvaluatedSymbol(TargetAddr, SymEntry.getFlags());
   }
 
   void resolveRelocations();
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index fd109aea91d4..00541e8c06fe 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -343,7 +343,7 @@ void RuntimeDyldMachOCRTPBase<Impl>::registerEHFrames() {
 std::unique_ptr<RuntimeDyldMachO>
 RuntimeDyldMachO::create(Triple::ArchType Arch,
                          RuntimeDyld::MemoryManager &MemMgr,
-                         RuntimeDyld::SymbolResolver &Resolver) {
+                         JITSymbolResolver &Resolver) {
   switch (Arch) {
   default:
     llvm_unreachable("Unsupported target for RuntimeDyldMachO.");
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 30f3bb3bf07d..67a5020fc4f9 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -50,7 +50,7 @@ protected:
   SmallVector<EHFrameRelatedSections, 2> UnregisteredEHFrameSections;
 
   RuntimeDyldMachO(RuntimeDyld::MemoryManager &MemMgr,
-                   RuntimeDyld::SymbolResolver &Resolver)
+                   JITSymbolResolver &Resolver)
       : RuntimeDyldImpl(MemMgr, Resolver) {}
 
   /// This convenience method uses memcpy to extract a contiguous addend (the
@@ -124,7 +124,7 @@ public:
   static std::unique_ptr<RuntimeDyldMachO>
   create(Triple::ArchType Arch,
          RuntimeDyld::MemoryManager &MemMgr,
-         RuntimeDyld::SymbolResolver &Resolver);
+         JITSymbolResolver &Resolver);
 
   std::unique_ptr<RuntimeDyld::LoadedObjectInfo>
   loadObject(const object::ObjectFile &O) override;
@@ -152,7 +152,7 @@ private:
 
 public:
   RuntimeDyldMachOCRTPBase(RuntimeDyld::MemoryManager &MemMgr,
-                           RuntimeDyld::SymbolResolver &Resolver)
+                           JITSymbolResolver &Resolver)
     : RuntimeDyldMachO(MemMgr, Resolver) {}
 
   Error finalizeLoad(const ObjectFile &Obj,
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
index 44fda87e0f94..0398413e1532 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFI386.h
@@ -25,7 +25,7 @@ namespace llvm {
 class RuntimeDyldCOFFI386 : public RuntimeDyldCOFF {
 public:
   RuntimeDyldCOFFI386(RuntimeDyld::MemoryManager &MM,
-                      RuntimeDyld::SymbolResolver &Resolver)
+                      JITSymbolResolver &Resolver)
       : RuntimeDyldCOFF(MM, Resolver) {}
 
   unsigned getMaxStubSize() override {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
index ff7d1d439252..8c6af0bd9c6d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFThumb.h
@@ -22,10 +22,30 @@
 
 namespace llvm {
 
+static bool isThumbFunc(symbol_iterator Symbol, const ObjectFile &Obj,
+                        section_iterator Section) {
+  Expected<SymbolRef::Type> SymTypeOrErr = Symbol->getType();
+  if (!SymTypeOrErr) {
+    std::string Buf;
+    raw_string_ostream OS(Buf);
+    logAllUnhandledErrors(SymTypeOrErr.takeError(), OS, "");
+    OS.flush();
+    report_fatal_error(Buf);
+  }
+
+  if (*SymTypeOrErr != SymbolRef::ST_Function)
+    return false;
+
+  // We check the IMAGE_SCN_MEM_16BIT flag in the section of the symbol to tell
+  // if it's thumb or not
+  return cast<COFFObjectFile>(Obj).getCOFFSection(*Section)->Characteristics &
+         COFF::IMAGE_SCN_MEM_16BIT;
+}
+
 class RuntimeDyldCOFFThumb : public RuntimeDyldCOFF {
 public:
   RuntimeDyldCOFFThumb(RuntimeDyld::MemoryManager &MM,
-                       RuntimeDyld::SymbolResolver &Resolver)
+                       JITSymbolResolver &Resolver)
       : RuntimeDyldCOFF(MM, Resolver) {}
 
   unsigned getMaxStubSize() override {
@@ -92,12 +112,22 @@ public:
       else
         return TargetSectionIDOrErr.takeError();
 
+      // We need to find out if the relocation is relative to a thumb function
+      // so that we include the ISA selection bit when resolve the relocation
+      bool IsTargetThumbFunc = isThumbFunc(Symbol, Obj, Section);
+
       switch (RelType) {
       default: llvm_unreachable("unsupported relocation type");
       case COFF::IMAGE_REL_ARM_ABSOLUTE:
         // This relocation is ignored.
         break;
-      case COFF::IMAGE_REL_ARM_ADDR32:
+      case COFF::IMAGE_REL_ARM_ADDR32: {
+        RelocationEntry RE = RelocationEntry(
+            SectionID, Offset, RelType, Addend, TargetSectionID,
+            getSymbolOffset(*Symbol), 0, 0, false, 0, IsTargetThumbFunc);
+        addRelocationForSection(RE, TargetSectionID);
+        break;
+      }
       case COFF::IMAGE_REL_ARM_ADDR32NB: {
         RelocationEntry RE =
             RelocationEntry(SectionID, Offset, RelType, Addend, TargetSectionID,
@@ -118,9 +148,9 @@ public:
         break;
       }
       case COFF::IMAGE_REL_ARM_MOV32T: {
-        RelocationEntry RE =
-            RelocationEntry(SectionID, Offset, RelType, Addend, TargetSectionID,
-                            getSymbolOffset(*Symbol), 0, 0, false, 0);
+        RelocationEntry RE = RelocationEntry(
+            SectionID, Offset, RelType, Addend, TargetSectionID,
+            getSymbolOffset(*Symbol), 0, 0, false, 0, IsTargetThumbFunc);
         addRelocationForSection(RE, TargetSectionID);
         break;
       }
@@ -142,6 +172,7 @@ public:
   void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override {
     const auto Section = Sections[RE.SectionID];
     uint8_t *Target = Section.getAddressWithOffset(RE.Offset);
+    int ISASelectionBit = RE.IsTargetThumbFunc ? 1 : 0;
 
     switch (RE.RelType) {
     default: llvm_unreachable("unsupported relocation type");
@@ -154,6 +185,7 @@ public:
           RE.Sections.SectionA == static_cast<uint32_t>(-1)
               ? Value
               : Sections[RE.Sections.SectionA].getLoadAddressWithOffset(RE.Addend);
+      Result |= ISASelectionBit;
       assert(static_cast<int32_t>(Result) <= INT32_MAX &&
              "relocation overflow");
       assert(static_cast<int32_t>(Result) >= INT32_MIN &&
@@ -178,6 +210,7 @@ public:
                    << " RelType: IMAGE_REL_ARM_ADDR32NB"
                    << " TargetSection: " << RE.Sections.SectionA
                    << " Value: " << format("0x%08" PRIx32, Result) << '\n');
+      Result |= ISASelectionBit;
       writeBytesUnaligned(Result, Target, 4);
       break;
     }
@@ -225,10 +258,11 @@ public:
         Bytes[0] |= ((Immediate & 0xf000) >> 12);
         Bytes[1] |= ((Immediate & 0x0800) >> 11);
         Bytes[2] |= ((Immediate & 0x00ff) >>  0);
-        Bytes[3] |= ((Immediate & 0x0700) >>  8);
+        Bytes[3] |= (((Immediate & 0x0700) >>  8) << 4);
       };
 
-      EncodeImmediate(&Target[0], static_cast<uint32_t>(Result) >> 00);
+      EncodeImmediate(&Target[0],
+                      (static_cast<uint32_t>(Result) >> 00) | ISASelectionBit);
       EncodeImmediate(&Target[4], static_cast<uint32_t>(Result) >> 16);
 
       break;
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
index df8681da24d1..109beb36f1ee 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldCOFFX86_64.h
@@ -33,7 +33,7 @@ private:
 
 public:
   RuntimeDyldCOFFX86_64(RuntimeDyld::MemoryManager &MM,
-                        RuntimeDyld::SymbolResolver &Resolver)
+                        JITSymbolResolver &Resolver)
     : RuntimeDyldCOFF(MM, Resolver) {}
 
   unsigned getMaxStubSize() override {
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
new file mode 100644
index 000000000000..cae4d69789a2
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.cpp
@@ -0,0 +1,312 @@
+//===-- RuntimeDyldELFMips.cpp ---- ELF/Mips specific code. -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RuntimeDyldELFMips.h"
+#include "llvm/Support/ELF.h"
+
+#define DEBUG_TYPE "dyld"
+
+void RuntimeDyldELFMips::resolveRelocation(const RelocationEntry &RE,
+                                           uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  if (IsMipsO32ABI)
+    resolveMIPSO32Relocation(Section, RE.Offset, Value, RE.RelType, RE.Addend);
+  else if (IsMipsN32ABI) {
+    resolveMIPSN32Relocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+                             RE.SymOffset, RE.SectionID);
+  } else if (IsMipsN64ABI)
+    resolveMIPSN64Relocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+                             RE.SymOffset, RE.SectionID);
+  else
+    llvm_unreachable("Mips ABI not handled");
+}
+
+uint64_t RuntimeDyldELFMips::evaluateRelocation(const RelocationEntry &RE,
+                                                uint64_t Value,
+                                                uint64_t Addend) {
+  if (IsMipsN32ABI) {
+    const SectionEntry &Section = Sections[RE.SectionID];
+    Value = evaluateMIPS64Relocation(Section, RE.Offset, Value, RE.RelType,
+                                     Addend, RE.SymOffset, RE.SectionID);
+    return Value;
+  }
+  llvm_unreachable("Not reachable");
+}
+
+void RuntimeDyldELFMips::applyRelocation(const RelocationEntry &RE,
+                                         uint64_t Value) {
+  if (IsMipsN32ABI) {
+    const SectionEntry &Section = Sections[RE.SectionID];
+    applyMIPSRelocation(Section.getAddressWithOffset(RE.Offset), Value,
+                        RE.RelType);
+    return;
+  }
+  llvm_unreachable("Not reachable");
+}
+
+int64_t
+RuntimeDyldELFMips::evaluateMIPS32Relocation(const SectionEntry &Section,
+                                             uint64_t Offset, uint64_t Value,
+                                             uint32_t Type) {
+
+  DEBUG(dbgs() << "evaluateMIPS32Relocation, LocalAddress: 0x"
+               << format("%llx", Section.getAddressWithOffset(Offset))
+               << " FinalAddress: 0x"
+               << format("%llx", Section.getLoadAddressWithOffset(Offset))
+               << " Value: 0x" << format("%llx", Value) << " Type: 0x"
+               << format("%x", Type) << "\n");
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Unknown relocation type!");
+    return Value;
+  case ELF::R_MIPS_32:
+    return Value;
+  case ELF::R_MIPS_26:
+    return Value >> 2;
+  case ELF::R_MIPS_HI16:
+    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+    return (Value + 0x8000) >> 16;
+  case ELF::R_MIPS_LO16:
+    return Value;
+  case ELF::R_MIPS_PC32: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return Value - FinalAddress;
+  }
+  case ELF::R_MIPS_PC16: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return (Value - FinalAddress) >> 2;
+  }
+  case ELF::R_MIPS_PC19_S2: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return (Value - (FinalAddress & ~0x3)) >> 2;
+  }
+  case ELF::R_MIPS_PC21_S2: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return (Value - FinalAddress) >> 2;
+  }
+  case ELF::R_MIPS_PC26_S2: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return (Value - FinalAddress) >> 2;
+  }
+  case ELF::R_MIPS_PCHI16: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return (Value - FinalAddress + 0x8000) >> 16;
+  }
+  case ELF::R_MIPS_PCLO16: {
+    uint32_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return Value - FinalAddress;
+  }
+  }
+}
+
+int64_t RuntimeDyldELFMips::evaluateMIPS64Relocation(
+    const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type,
+    int64_t Addend, uint64_t SymOffset, SID SectionID) {
+
+  DEBUG(dbgs() << "evaluateMIPS64Relocation, LocalAddress: 0x"
+               << format("%llx", Section.getAddressWithOffset(Offset))
+               << " FinalAddress: 0x"
+               << format("%llx", Section.getLoadAddressWithOffset(Offset))
+               << " Value: 0x" << format("%llx", Value) << " Type: 0x"
+               << format("%x", Type) << " Addend: 0x" << format("%llx", Addend)
+               << " SymOffset: " << format("%x", SymOffset) << "\n");
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Not implemented relocation type!");
+    break;
+  case ELF::R_MIPS_JALR:
+  case ELF::R_MIPS_NONE:
+    break;
+  case ELF::R_MIPS_32:
+  case ELF::R_MIPS_64:
+    return Value + Addend;
+  case ELF::R_MIPS_26:
+    return ((Value + Addend) >> 2) & 0x3ffffff;
+  case ELF::R_MIPS_GPREL16: {
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+    return Value + Addend - (GOTAddr + 0x7ff0);
+  }
+  case ELF::R_MIPS_SUB:
+    return Value - Addend;
+  case ELF::R_MIPS_HI16:
+    // Get the higher 16-bits. Also add 1 if bit 15 is 1.
+    return ((Value + Addend + 0x8000) >> 16) & 0xffff;
+  case ELF::R_MIPS_LO16:
+    return (Value + Addend) & 0xffff;
+  case ELF::R_MIPS_CALL16:
+  case ELF::R_MIPS_GOT_DISP:
+  case ELF::R_MIPS_GOT_PAGE: {
+    uint8_t *LocalGOTAddr =
+        getSectionAddress(SectionToGOTMap[SectionID]) + SymOffset;
+    uint64_t GOTEntry = readBytesUnaligned(LocalGOTAddr, getGOTEntrySize());
+
+    Value += Addend;
+    if (Type == ELF::R_MIPS_GOT_PAGE)
+      Value = (Value + 0x8000) & ~0xffff;
+
+    if (GOTEntry)
+      assert(GOTEntry == Value &&
+                   "GOT entry has two different addresses.");
+    else
+      writeBytesUnaligned(Value, LocalGOTAddr, getGOTEntrySize());
+
+    return (SymOffset - 0x7ff0) & 0xffff;
+  }
+  case ELF::R_MIPS_GOT_OFST: {
+    int64_t page = (Value + Addend + 0x8000) & ~0xffff;
+    return (Value + Addend - page) & 0xffff;
+  }
+  case ELF::R_MIPS_GPREL32: {
+    uint64_t GOTAddr = getSectionLoadAddress(SectionToGOTMap[SectionID]);
+    return Value + Addend - (GOTAddr + 0x7ff0);
+  }
+  case ELF::R_MIPS_PC16: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - FinalAddress) >> 2) & 0xffff;
+  }
+  case ELF::R_MIPS_PC32: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return Value + Addend - FinalAddress;
+  }
+  case ELF::R_MIPS_PC18_S3: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - (FinalAddress & ~0x7)) >> 3) & 0x3ffff;
+  }
+  case ELF::R_MIPS_PC19_S2: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - (FinalAddress & ~0x3)) >> 2) & 0x7ffff;
+  }
+  case ELF::R_MIPS_PC21_S2: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - FinalAddress) >> 2) & 0x1fffff;
+  }
+  case ELF::R_MIPS_PC26_S2: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - FinalAddress) >> 2) & 0x3ffffff;
+  }
+  case ELF::R_MIPS_PCHI16: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return ((Value + Addend - FinalAddress + 0x8000) >> 16) & 0xffff;
+  }
+  case ELF::R_MIPS_PCLO16: {
+    uint64_t FinalAddress = Section.getLoadAddressWithOffset(Offset);
+    return (Value + Addend - FinalAddress) & 0xffff;
+  }
+  }
+  return 0;
+}
+
+void RuntimeDyldELFMips::applyMIPSRelocation(uint8_t *TargetPtr, int64_t Value,
+                                             uint32_t Type) {
+  uint32_t Insn = readBytesUnaligned(TargetPtr, 4);
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Unknown relocation type!");
+    break;
+  case ELF::R_MIPS_GPREL16:
+  case ELF::R_MIPS_HI16:
+  case ELF::R_MIPS_LO16:
+  case ELF::R_MIPS_PC16:
+  case ELF::R_MIPS_PCHI16:
+  case ELF::R_MIPS_PCLO16:
+  case ELF::R_MIPS_CALL16:
+  case ELF::R_MIPS_GOT_DISP:
+  case ELF::R_MIPS_GOT_PAGE:
+  case ELF::R_MIPS_GOT_OFST:
+    Insn = (Insn & 0xffff0000) | (Value & 0x0000ffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_MIPS_PC18_S3:
+    Insn = (Insn & 0xfffc0000) | (Value & 0x0003ffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_MIPS_PC19_S2:
+    Insn = (Insn & 0xfff80000) | (Value & 0x0007ffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_MIPS_PC21_S2:
+    Insn = (Insn & 0xffe00000) | (Value & 0x001fffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_MIPS_26:
+  case ELF::R_MIPS_PC26_S2:
+    Insn = (Insn & 0xfc000000) | (Value & 0x03ffffff);
+    writeBytesUnaligned(Insn, TargetPtr, 4);
+    break;
+  case ELF::R_MIPS_32:
+  case ELF::R_MIPS_GPREL32:
+  case ELF::R_MIPS_PC32:
+    writeBytesUnaligned(Value & 0xffffffff, TargetPtr, 4);
+    break;
+  case ELF::R_MIPS_64:
+  case ELF::R_MIPS_SUB:
+    writeBytesUnaligned(Value, TargetPtr, 8);
+    break;
+  }
+}
+
+void RuntimeDyldELFMips::resolveMIPSN32Relocation(
+    const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type,
+    int64_t Addend, uint64_t SymOffset, SID SectionID) {
+  int64_t CalculatedValue = evaluateMIPS64Relocation(
+      Section, Offset, Value, Type, Addend, SymOffset, SectionID);
+  applyMIPSRelocation(Section.getAddressWithOffset(Offset), CalculatedValue,
+                      Type);
+}
+
+void RuntimeDyldELFMips::resolveMIPSN64Relocation(
+    const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type,
+    int64_t Addend, uint64_t SymOffset, SID SectionID) {
+  uint32_t r_type = Type & 0xff;
+  uint32_t r_type2 = (Type >> 8) & 0xff;
+  uint32_t r_type3 = (Type >> 16) & 0xff;
+
+  // RelType is used to keep information for which relocation type we are
+  // applying relocation.
+  uint32_t RelType = r_type;
+  int64_t CalculatedValue = evaluateMIPS64Relocation(Section, Offset, Value,
+                                                     RelType, Addend,
+                                                     SymOffset, SectionID);
+  if (r_type2 != ELF::R_MIPS_NONE) {
+    RelType = r_type2;
+    CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType,
+                                               CalculatedValue, SymOffset,
+                                               SectionID);
+  }
+  if (r_type3 != ELF::R_MIPS_NONE) {
+    RelType = r_type3;
+    CalculatedValue = evaluateMIPS64Relocation(Section, Offset, 0, RelType,
+                                               CalculatedValue, SymOffset,
+                                               SectionID);
+  }
+  applyMIPSRelocation(Section.getAddressWithOffset(Offset), CalculatedValue,
+                      RelType);
+}
+
+void RuntimeDyldELFMips::resolveMIPSO32Relocation(const SectionEntry &Section,
+                                                  uint64_t Offset,
+                                                  uint32_t Value, uint32_t Type,
+                                                  int32_t Addend) {
+  uint8_t *TargetPtr = Section.getAddressWithOffset(Offset);
+  Value += Addend;
+
+  DEBUG(dbgs() << "resolveMIPSO32Relocation, LocalAddress: "
+               << Section.getAddressWithOffset(Offset) << " FinalAddress: "
+               << format("%p", Section.getLoadAddressWithOffset(Offset))
+               << " Value: " << format("%x", Value)
+               << " Type: " << format("%x", Type)
+               << " Addend: " << format("%x", Addend) << "\n");
+
+  Value = evaluateMIPS32Relocation(Section, Offset, Value, Type);
+
+  applyMIPSRelocation(TargetPtr, Value, Type);
+}
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
new file mode 100644
index 000000000000..ce54a2717673
--- /dev/null
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldELFMips.h
@@ -0,0 +1,68 @@
+//===-- RuntimeDyldELFMips.h ---- ELF/Mips specific code. -------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFMIPS_H
+#define LLVM_LIB_EXECUTIONENGINE_RUNTIMEDYLD_TARGETS_RUNTIMEDYLDELFMIPS_H
+
+#include "../RuntimeDyldELF.h"
+#include <string>
+
+#define DEBUG_TYPE "dyld"
+
+namespace llvm {
+
+class RuntimeDyldELFMips : public RuntimeDyldELF {
+public:
+
+  typedef uint64_t TargetPtrT;
+
+  RuntimeDyldELFMips(RuntimeDyld::MemoryManager &MM,
+                     JITSymbolResolver &Resolver)
+      : RuntimeDyldELF(MM, Resolver) {}
+
+  void resolveRelocation(const RelocationEntry &RE, uint64_t Value) override;
+
+protected:
+  void resolveMIPSO32Relocation(const SectionEntry &Section, uint64_t Offset,
+                                uint32_t Value, uint32_t Type, int32_t Addend);
+  void resolveMIPSN32Relocation(const SectionEntry &Section, uint64_t Offset,
+                                uint64_t Value, uint32_t Type, int64_t Addend,
+                                uint64_t SymOffset, SID SectionID);
+  void resolveMIPSN64Relocation(const SectionEntry &Section, uint64_t Offset,
+                                uint64_t Value, uint32_t Type, int64_t Addend,
+                                uint64_t SymOffset, SID SectionID);
+
+private:
+  /// \brief A object file specific relocation resolver
+  /// \param RE The relocation to be resolved
+  /// \param Value Target symbol address to apply the relocation action
+  uint64_t evaluateRelocation(const RelocationEntry &RE, uint64_t Value,
+                              uint64_t Addend);
+
+  /// \brief A object file specific relocation resolver
+  /// \param RE The relocation to be resolved
+  /// \param Value Target symbol address to apply the relocation action
+  void applyRelocation(const RelocationEntry &RE, uint64_t Value);
+
+  int64_t evaluateMIPS32Relocation(const SectionEntry &Section, uint64_t Offset,
+                                   uint64_t Value, uint32_t Type);
+  int64_t evaluateMIPS64Relocation(const SectionEntry &Section,
+                                   uint64_t Offset, uint64_t Value,
+                                   uint32_t Type,  int64_t Addend,
+                                   uint64_t SymOffset, SID SectionID);
+
+  void applyMIPSRelocation(uint8_t *TargetPtr, int64_t CalculatedValue,
+                           uint32_t Type);
+
+};
+}
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
index 63598f197070..97cbc153b227 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOAArch64.h
@@ -24,7 +24,7 @@ public:
   typedef uint64_t TargetPtrT;
 
   RuntimeDyldMachOAArch64(RuntimeDyld::MemoryManager &MM,
-                          RuntimeDyld::SymbolResolver &Resolver)
+                          JITSymbolResolver &Resolver)
       : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
@@ -97,7 +97,8 @@ public:
       (void)p;
       assert((*p & 0x3B000000) == 0x39000000 &&
              "Only expected load / store instructions.");
-    } // fall-through
+      LLVM_FALLTHROUGH;
+    }
     case MachO::ARM64_RELOC_PAGEOFF12: {
       // Verify that the relocation points to one of the expected load / store
       // or add / sub instructions.
@@ -196,7 +197,8 @@ public:
       assert((*p & 0x3B000000) == 0x39000000 &&
              "Only expected load / store instructions.");
       (void)p;
-    } // fall-through
+      LLVM_FALLTHROUGH;
+    }
     case MachO::ARM64_RELOC_PAGEOFF12: {
       // Verify that the relocation points to one of the expected load / store
       // or add / sub instructions.
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
index 0abf9daba505..adca0eeb08b4 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOARM.h
@@ -27,7 +27,7 @@ public:
   typedef uint32_t TargetPtrT;
 
   RuntimeDyldMachOARM(RuntimeDyld::MemoryManager &MM,
-                      RuntimeDyld::SymbolResolver &Resolver)
+                      JITSymbolResolver &Resolver)
     : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
index 2c79b3f7c819..c42f1751a181 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOI386.h
@@ -24,7 +24,7 @@ public:
   typedef uint32_t TargetPtrT;
 
   RuntimeDyldMachOI386(RuntimeDyld::MemoryManager &MM,
-                       RuntimeDyld::SymbolResolver &Resolver)
+                       JITSymbolResolver &Resolver)
       : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 0; }
diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
index bc4822983244..32fd3efddd0d 100644
--- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
+++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/Targets/RuntimeDyldMachOX86_64.h
@@ -24,7 +24,7 @@ public:
   typedef uint64_t TargetPtrT;
 
   RuntimeDyldMachOX86_64(RuntimeDyld::MemoryManager &MM,
-                         RuntimeDyld::SymbolResolver &Resolver)
+                         JITSymbolResolver &Resolver)
       : RuntimeDyldMachOCRTPBase(MM, Resolver) {}
 
   unsigned getMaxStubSize() override { return 8; }
diff --git a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
index 1ad5f1740115..50478eac6827 100644
--- a/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -196,8 +196,8 @@ SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup,
 
   // Remove all blocks which are now empty
   MemGroup.FreeMem.erase(
-      std::remove_if(MemGroup.FreeMem.begin(), MemGroup.FreeMem.end(),
-                     [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }),
+      remove_if(MemGroup.FreeMem,
+                [](FreeMemBlock &FreeMB) { return FreeMB.Free.size() == 0; }),
       MemGroup.FreeMem.end());
 
   return std::error_code();
diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
index b45f0c89de8b..6e2973c04e5d 100644
--- a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
+++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp
@@ -48,9 +48,8 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
   // Adjust the triple to match what the user requested.
   const Target *TheTarget = nullptr;
   if (!MArch.empty()) {
-    auto I = std::find_if(
-        TargetRegistry::targets().begin(), TargetRegistry::targets().end(),
-        [&](const Target &T) { return MArch == T.getName(); });
+    auto I = find_if(TargetRegistry::targets(),
+                     [&](const Target &T) { return MArch == T.getName(); });
 
     if (I == TargetRegistry::targets().end()) {
       if (ErrorStr)
diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp
index 9b2399dd880c..eecef9423f2e 100644
--- a/contrib/llvm/lib/IR/AsmWriter.cpp
+++ b/contrib/llvm/lib/IR/AsmWriter.cpp
@@ -1,3 +1,4 @@
+
 //===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===//
 //
 //                     The LLVM Compiler Infrastructure
@@ -310,6 +311,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   case CallingConv::X86_StdCall:   Out << "x86_stdcallcc"; break;
   case CallingConv::X86_FastCall:  Out << "x86_fastcallcc"; break;
   case CallingConv::X86_ThisCall:  Out << "x86_thiscallcc"; break;
+  case CallingConv::X86_RegCall:   Out << "x86_regcallcc"; break;
   case CallingConv::X86_VectorCall:Out << "x86_vectorcallcc"; break;
   case CallingConv::Intel_OCL_BI:  Out << "intel_ocl_bicc"; break;
   case CallingConv::ARM_APCS:      Out << "arm_apcscc"; break;
@@ -336,9 +338,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) {
   }
 }
 
-// PrintEscapedString - Print each character of the specified string, escaping
-// it if it is not printable or if it is an escape char.
-static void PrintEscapedString(StringRef Name, raw_ostream &Out) {
+void llvm::PrintEscapedString(StringRef Name, raw_ostream &Out) {
   for (unsigned i = 0, e = Name.size(); i != e; ++i) {
     unsigned char C = Name[i];
     if (isprint(C) && C != '\\' && C != '"')
@@ -1041,39 +1041,6 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Metadata *MD,
                                    SlotTracker *Machine, const Module *Context,
                                    bool FromValue = false);
 
-static const char *getPredicateText(unsigned predicate) {
-  const char * pred = "unknown";
-  switch (predicate) {
-  case FCmpInst::FCMP_FALSE: pred = "false"; break;
-  case FCmpInst::FCMP_OEQ:   pred = "oeq"; break;
-  case FCmpInst::FCMP_OGT:   pred = "ogt"; break;
-  case FCmpInst::FCMP_OGE:   pred = "oge"; break;
-  case FCmpInst::FCMP_OLT:   pred = "olt"; break;
-  case FCmpInst::FCMP_OLE:   pred = "ole"; break;
-  case FCmpInst::FCMP_ONE:   pred = "one"; break;
-  case FCmpInst::FCMP_ORD:   pred = "ord"; break;
-  case FCmpInst::FCMP_UNO:   pred = "uno"; break;
-  case FCmpInst::FCMP_UEQ:   pred = "ueq"; break;
-  case FCmpInst::FCMP_UGT:   pred = "ugt"; break;
-  case FCmpInst::FCMP_UGE:   pred = "uge"; break;
-  case FCmpInst::FCMP_ULT:   pred = "ult"; break;
-  case FCmpInst::FCMP_ULE:   pred = "ule"; break;
-  case FCmpInst::FCMP_UNE:   pred = "une"; break;
-  case FCmpInst::FCMP_TRUE:  pred = "true"; break;
-  case ICmpInst::ICMP_EQ:    pred = "eq"; break;
-  case ICmpInst::ICMP_NE:    pred = "ne"; break;
-  case ICmpInst::ICMP_SGT:   pred = "sgt"; break;
-  case ICmpInst::ICMP_SGE:   pred = "sge"; break;
-  case ICmpInst::ICMP_SLT:   pred = "slt"; break;
-  case ICmpInst::ICMP_SLE:   pred = "sle"; break;
-  case ICmpInst::ICMP_UGT:   pred = "ugt"; break;
-  case ICmpInst::ICMP_UGE:   pred = "uge"; break;
-  case ICmpInst::ICMP_ULT:   pred = "ult"; break;
-  case ICmpInst::ICMP_ULE:   pred = "ule"; break;
-  }
-  return pred;
-}
-
 static void writeAtomicRMWOperation(raw_ostream &Out,
                                     AtomicRMWInst::BinOp Op) {
   switch (Op) {
@@ -1139,15 +1106,15 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
   }
 
   if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CV)) {
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle ||
-        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) {
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle() ||
+        &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble()) {
       // We would like to output the FP constant value in exponential notation,
       // but we cannot do this if doing so will lose precision.  Check here to
       // make sure that we only output it in exponential format if we can parse
       // the value back and get the same value.
       //
       bool ignored;
-      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble;
+      bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble();
       bool isInf = CFP->getValueAPF().isInfinity();
       bool isNaN = CFP->getValueAPF().isNaN();
       if (!isInf && !isNaN) {
@@ -1164,7 +1131,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
             ((StrVal[0] == '-' || StrVal[0] == '+') &&
              (StrVal[1] >= '0' && StrVal[1] <= '9'))) {
           // Reparse stringized version!
-          if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) {
+          if (APFloat(APFloat::IEEEdouble(), StrVal).convertToDouble() == Val) {
             Out << StrVal;
             return;
           }
@@ -1179,7 +1146,7 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
       APFloat apf = CFP->getValueAPF();
       // Floats are represented in ASCII IR as double, convert.
       if (!isDouble)
-        apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+        apf.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
                           &ignored);
       Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true);
       return;
@@ -1190,26 +1157,26 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     // fixed number of hex digits.
     Out << "0x";
     APInt API = CFP->getValueAPF().bitcastToAPInt();
-    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) {
+    if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended()) {
       Out << 'K';
       Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4,
                                   /*Upper=*/true);
       Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
       return;
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) {
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad()) {
       Out << 'L';
       Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
       Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) {
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble()) {
       Out << 'M';
       Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
       Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16,
                                   /*Upper=*/true);
-    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) {
+    } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf()) {
       Out << 'H';
       Out << format_hex_no_prefix(API.getZExtValue(), 4,
                                   /*Upper=*/true);
@@ -1349,15 +1316,22 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV,
     Out << CE->getOpcodeName();
     WriteOptimizationInfo(Out, CE);
     if (CE->isCompare())
-      Out << ' ' << getPredicateText(CE->getPredicate());
+      Out << ' ' << CmpInst::getPredicateName(
+                        static_cast<CmpInst::Predicate>(CE->getPredicate()));
     Out << " (";
 
+    Optional<unsigned> InRangeOp;
     if (const GEPOperator *GEP = dyn_cast<GEPOperator>(CE)) {
       TypePrinter.print(GEP->getSourceElementType(), Out);
       Out << ", ";
+      InRangeOp = GEP->getInRangeIndex();
+      if (InRangeOp)
+        ++*InRangeOp;
     }
 
     for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) {
+      if (InRangeOp && unsigned(OI - CE->op_begin()) == *InRangeOp)
+        Out << "inrange ";
       TypePrinter.print((*OI)->getType(), Out);
       Out << ' ';
       WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context);
@@ -1434,14 +1408,15 @@ struct MDFieldPrinter {
   }
   void printTag(const DINode *N);
   void printMacinfoType(const DIMacroNode *N);
+  void printChecksumKind(const DIFile *N);
   void printString(StringRef Name, StringRef Value,
                    bool ShouldSkipEmpty = true);
   void printMetadata(StringRef Name, const Metadata *MD,
                      bool ShouldSkipNull = true);
   template <class IntTy>
   void printInt(StringRef Name, IntTy Int, bool ShouldSkipZero = true);
-  void printBool(StringRef Name, bool Value);
-  void printDIFlags(StringRef Name, unsigned Flags);
+  void printBool(StringRef Name, bool Value, Optional<bool> Default = None);
+  void printDIFlags(StringRef Name, DINode::DIFlags Flags);
   template <class IntTy, class Stringifier>
   void printDwarfEnum(StringRef Name, IntTy Value, Stringifier toString,
                       bool ShouldSkipZero = true);
@@ -1451,7 +1426,8 @@ struct MDFieldPrinter {
 
 void MDFieldPrinter::printTag(const DINode *N) {
   Out << FS << "tag: ";
-  if (const char *Tag = dwarf::TagString(N->getTag()))
+  auto Tag = dwarf::TagString(N->getTag());
+  if (!Tag.empty())
     Out << Tag;
   else
     Out << N->getTag();
@@ -1459,12 +1435,20 @@ void MDFieldPrinter::printTag(const DINode *N) {
 
 void MDFieldPrinter::printMacinfoType(const DIMacroNode *N) {
   Out << FS << "type: ";
-  if (const char *Type = dwarf::MacinfoString(N->getMacinfoType()))
+  auto Type = dwarf::MacinfoString(N->getMacinfoType());
+  if (!Type.empty())
     Out << Type;
   else
     Out << N->getMacinfoType();
 }
 
+void MDFieldPrinter::printChecksumKind(const DIFile *N) {
+  if (N->getChecksumKind() == DIFile::CSK_None)
+    // Skip CSK_None checksum kind.
+    return;
+  Out << FS << "checksumkind: " << N->getChecksumKindAsString();
+}
+
 void MDFieldPrinter::printString(StringRef Name, StringRef Value,
                                  bool ShouldSkipEmpty) {
   if (ShouldSkipEmpty && Value.empty())
@@ -1503,23 +1487,26 @@ void MDFieldPrinter::printInt(StringRef Name, IntTy Int, bool ShouldSkipZero) {
   Out << FS << Name << ": " << Int;
 }
 
-void MDFieldPrinter::printBool(StringRef Name, bool Value) {
+void MDFieldPrinter::printBool(StringRef Name, bool Value,
+                               Optional<bool> Default) {
+  if (Default && Value == *Default)
+    return;
   Out << FS << Name << ": " << (Value ? "true" : "false");
 }
 
-void MDFieldPrinter::printDIFlags(StringRef Name, unsigned Flags) {
+void MDFieldPrinter::printDIFlags(StringRef Name, DINode::DIFlags Flags) {
   if (!Flags)
     return;
 
   Out << FS << Name << ": ";
 
-  SmallVector<unsigned, 8> SplitFlags;
-  unsigned Extra = DINode::splitFlags(Flags, SplitFlags);
+  SmallVector<DINode::DIFlags, 8> SplitFlags;
+  auto Extra = DINode::splitFlags(Flags, SplitFlags);
 
   FieldSeparator FlagsFS(" | ");
-  for (unsigned F : SplitFlags) {
-    const char *StringF = DINode::getFlagString(F);
-    assert(StringF && "Expected valid flag");
+  for (auto F : SplitFlags) {
+    auto StringF = DINode::getFlagString(F);
+    assert(!StringF.empty() && "Expected valid flag");
     Out << FlagsFS << StringF;
   }
   if (Extra || SplitFlags.empty())
@@ -1539,7 +1526,8 @@ void MDFieldPrinter::printDwarfEnum(StringRef Name, IntTy Value,
     return;
 
   Out << FS << Name << ": ";
-  if (const char *S = toString(Value))
+  auto S = toString(Value);
+  if (!S.empty())
     Out << S;
   else
     Out << Value;
@@ -1673,6 +1661,8 @@ static void writeDIFile(raw_ostream &Out, const DIFile *N, TypePrinting *,
                       /* ShouldSkipEmpty */ false);
   Printer.printString("directory", N->getDirectory(),
                       /* ShouldSkipEmpty */ false);
+  Printer.printChecksumKind(N);
+  Printer.printString("checksum", N->getChecksum(), /* ShouldSkipEmpty */ true);
   Out << ")";
 }
 
@@ -1697,6 +1687,7 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N,
   Printer.printMetadata("imports", N->getRawImportedEntities());
   Printer.printMetadata("macros", N->getRawMacros());
   Printer.printInt("dwoId", N->getDWOId());
+  Printer.printBool("splitDebugInlining", N->getSplitDebugInlining(), true);
   Out << ")";
 }
 
@@ -1765,6 +1756,7 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N,
   Printer.printMetadata("scope", N->getRawScope(), /* ShouldSkipNull */ false);
   Printer.printMetadata("file", N->getRawFile());
   Printer.printInt("line", N->getLine());
+  Printer.printBool("exportSymbols", N->getExportSymbols(), false);
   Out << ")";
 }
 
@@ -1845,8 +1837,8 @@ static void writeDIGlobalVariable(raw_ostream &Out, const DIGlobalVariable *N,
   Printer.printMetadata("type", N->getRawType());
   Printer.printBool("isLocal", N->isLocalToUnit());
   Printer.printBool("isDefinition", N->isDefinition());
-  Printer.printMetadata("variable", N->getRawVariable());
   Printer.printMetadata("declaration", N->getRawStaticDataMemberDeclaration());
+  Printer.printInt("align", N->getAlignInBits());
   Out << ")";
 }
 
@@ -1862,6 +1854,7 @@ static void writeDILocalVariable(raw_ostream &Out, const DILocalVariable *N,
   Printer.printInt("line", N->getLine());
   Printer.printMetadata("type", N->getRawType());
   Printer.printDIFlags("flags", N->getFlags());
+  Printer.printInt("align", N->getAlignInBits());
   Out << ")";
 }
 
@@ -1872,8 +1865,8 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
   FieldSeparator FS;
   if (N->isValid()) {
     for (auto I = N->expr_op_begin(), E = N->expr_op_end(); I != E; ++I) {
-      const char *OpStr = dwarf::OperationEncodingString(I->getOp());
-      assert(OpStr && "Expected valid opcode");
+      auto OpStr = dwarf::OperationEncodingString(I->getOp());
+      assert(!OpStr.empty() && "Expected valid opcode");
 
       Out << FS << OpStr;
       for (unsigned A = 0, AE = I->getNumArgs(); A != AE; ++A)
@@ -1886,6 +1879,18 @@ static void writeDIExpression(raw_ostream &Out, const DIExpression *N,
   Out << ")";
 }
 
+static void writeDIGlobalVariableExpression(raw_ostream &Out,
+                                            const DIGlobalVariableExpression *N,
+                                            TypePrinting *TypePrinter,
+                                            SlotTracker *Machine,
+                                            const Module *Context) {
+  Out << "!DIGlobalVariableExpression(";
+  MDFieldPrinter Printer(Out, TypePrinter, Machine, Context);
+  Printer.printMetadata("var", N->getVariable());
+  Printer.printMetadata("expr", N->getExpression());
+  Out << ")";
+}
+
 static void writeDIObjCProperty(raw_ostream &Out, const DIObjCProperty *N,
                                 TypePrinting *TypePrinter, SlotTracker *Machine,
                                 const Module *Context) {
@@ -2869,7 +2874,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
 
   // Print out the compare instruction predicates
   if (const CmpInst *CI = dyn_cast<CmpInst>(&I))
-    Out << ' ' << getPredicateText(CI->getPredicate());
+    Out << ' ' << CmpInst::getPredicateName(CI->getPredicate());
 
   // Print out the atomicrmw operation
   if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(&I))
@@ -3008,7 +3013,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
     }
 
     Operand = CI->getCalledValue();
-    FunctionType *FTy = cast<FunctionType>(CI->getFunctionType());
+    FunctionType *FTy = CI->getFunctionType();
     Type *RetTy = FTy->getReturnType();
     const AttributeSet &PAL = CI->getAttributes();
 
@@ -3045,7 +3050,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
 
   } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
     Operand = II->getCalledValue();
-    FunctionType *FTy = cast<FunctionType>(II->getFunctionType());
+    FunctionType *FTy = II->getFunctionType();
     Type *RetTy = FTy->getReturnType();
     const AttributeSet &PAL = II->getAttributes();
 
diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h
index d58bff56576d..d0d27101aa86 100644
--- a/contrib/llvm/lib/IR/AttributeImpl.h
+++ b/contrib/llvm/lib/IR/AttributeImpl.h
@@ -16,17 +16,22 @@
 #ifndef LLVM_LIB_IR_ATTRIBUTEIMPL_H
 #define LLVM_LIB_IR_ATTRIBUTEIMPL_H
 
+#include "AttributeSetNode.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
-#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Attributes.h"
-#include "AttributeSetNode.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <algorithm>
+#include <cassert>
 #include <climits>
+#include <cstddef>
+#include <cstdint>
 #include <string>
+#include <utility>
 
 namespace llvm {
 
-class Constant;
 class LLVMContext;
 
 //===----------------------------------------------------------------------===//
@@ -36,10 +41,6 @@ class LLVMContext;
 class AttributeImpl : public FoldingSetNode {
   unsigned char KindID; ///< Holds the AttrEntryKind of the attribute
 
-  // AttributesImpl is uniqued, these should not be publicly available.
-  void operator=(const AttributeImpl &) = delete;
-  AttributeImpl(const AttributeImpl &) = delete;
-
 protected:
   enum AttrEntryKind {
     EnumAttrEntry,
@@ -50,6 +51,10 @@ protected:
   AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
 
 public:
+  // AttributesImpl is uniqued, these should not be available.
+  AttributeImpl(const AttributeImpl &) = delete;
+  AttributeImpl &operator=(const AttributeImpl &) = delete;
+
   virtual ~AttributeImpl();
 
   bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
@@ -85,9 +90,6 @@ public:
     ID.AddString(Kind);
     if (!Values.empty()) ID.AddString(Values);
   }
-
-  // FIXME: Remove this!
-  static uint64_t getAttrMask(Attribute::AttrKind Val);
 };
 
 //===----------------------------------------------------------------------===//
@@ -168,12 +170,9 @@ private:
     return getTrailingObjects<IndexAttrPair>() + Slot;
   }
 
-  // AttributesSet is uniqued, these should not be publicly available.
-  void operator=(const AttributeSetImpl &) = delete;
-  AttributeSetImpl(const AttributeSetImpl &) = delete;
 public:
   AttributeSetImpl(LLVMContext &C,
-                   ArrayRef<std::pair<unsigned, AttributeSetNode *> > Slots)
+                   ArrayRef<std::pair<unsigned, AttributeSetNode *>> Slots)
       : Context(C), NumSlots(Slots.size()), AvailableFunctionAttrs(0) {
     static_assert(Attribute::EndAttrKinds <=
                       sizeof(AvailableFunctionAttrs) * CHAR_BIT,
@@ -206,6 +205,10 @@ public:
     }
   }
 
+  // AttributesSetImpt is uniqued, these should not be available.
+  AttributeSetImpl(const AttributeSetImpl &) = delete;
+  AttributeSetImpl &operator=(const AttributeSetImpl &) = delete;
+
   void operator delete(void *p) { ::operator delete(p); }
 
   /// \brief Get the context that created this AttributeSetImpl.
@@ -251,19 +254,16 @@ public:
     Profile(ID, makeArrayRef(getNode(0), getNumSlots()));
   }
   static void Profile(FoldingSetNodeID &ID,
-                      ArrayRef<std::pair<unsigned, AttributeSetNode*> > Nodes) {
-    for (unsigned i = 0, e = Nodes.size(); i != e; ++i) {
-      ID.AddInteger(Nodes[i].first);
-      ID.AddPointer(Nodes[i].second);
+                      ArrayRef<std::pair<unsigned, AttributeSetNode*>> Nodes) {
+    for (const auto &Node : Nodes) {
+      ID.AddInteger(Node.first);
+      ID.AddPointer(Node.second);
     }
   }
 
-  // FIXME: This atrocity is temporary.
-  uint64_t Raw(unsigned Index) const;
-
   void dump() const;
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_IR_ATTRIBUTEIMPL_H
diff --git a/contrib/llvm/lib/IR/AttributeSetNode.h b/contrib/llvm/lib/IR/AttributeSetNode.h
index fab1ed51e4d6..23ce3713c20b 100644
--- a/contrib/llvm/lib/IR/AttributeSetNode.h
+++ b/contrib/llvm/lib/IR/AttributeSetNode.h
@@ -15,10 +15,17 @@
 #ifndef LLVM_IR_ATTRIBUTESETNODE_H
 #define LLVM_IR_ATTRIBUTESETNODE_H
 
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/Support/TrailingObjects.h"
+#include <algorithm>
 #include <climits>
+#include <cstdint>
+#include <string>
+#include <utility>
 
 namespace llvm {
 
@@ -49,10 +56,11 @@ class AttributeSetNode final
     }
   }
 
-  // AttributesSetNode is uniqued, these should not be publicly available.
-  void operator=(const AttributeSetNode &) = delete;
-  AttributeSetNode(const AttributeSetNode &) = delete;
 public:
+  // AttributesSetNode is uniqued, these should not be available.
+  AttributeSetNode(const AttributeSetNode &) = delete;
+  AttributeSetNode &operator=(const AttributeSetNode &) = delete;
+
   void operator delete(void *p) { ::operator delete(p); }
 
   static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
@@ -88,11 +96,11 @@ public:
     Profile(ID, makeArrayRef(begin(), end()));
   }
   static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
-    for (unsigned I = 0, E = AttrList.size(); I != E; ++I)
-      AttrList[I].Profile(ID);
+    for (const auto &Attr : AttrList)
+      Attr.Profile(ID);
   }
 };
 
-} // end llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_IR_ATTRIBUTESETNODE_H
diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp
index d774c1ae9dfb..1ec53cf1e1d6 100644
--- a/contrib/llvm/lib/IR/Attributes.cpp
+++ b/contrib/llvm/lib/IR/Attributes.cpp
@@ -38,7 +38,7 @@ using namespace llvm;
 //
 // In order to do this, we need to reserve one value of the second (optional)
 // allocsize argument to signify "not present."
-LLVM_CONSTEXPR static unsigned AllocSizeNumElemsNotPresent = -1;
+static const unsigned AllocSizeNumElemsNotPresent = -1;
 
 static uint64_t packAllocSizeArgs(unsigned ElemSizeArg,
                                   const Optional<unsigned> &NumElemsArg) {
@@ -381,10 +381,18 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     std::string Result;
     Result += (Twine('"') + getKindAsString() + Twine('"')).str();
 
-    StringRef Val = pImpl->getValueAsString();
-    if (Val.empty()) return Result;
-
-    Result += ("=\"" + Val + Twine('"')).str();
+    std::string AttrVal = pImpl->getValueAsString();
+    if (AttrVal.empty()) return Result;
+
+    // Since some attribute strings contain special characters that cannot be
+    // printable, those have to be escaped to make the attribute value printable
+    // as is.  e.g. "\01__gnu_mcount_nc"
+    {
+      raw_string_ostream OS(Result);
+      OS << "=\"";
+      PrintEscapedString(AttrVal, OS);
+      OS << "\"";
+    }
     return Result;
   }
 
@@ -464,78 +472,6 @@ bool AttributeImpl::operator<(const AttributeImpl &AI) const {
   return getKindAsString() < AI.getKindAsString();
 }
 
-uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) {
-  // FIXME: Remove this.
-  switch (Val) {
-  case Attribute::EndAttrKinds:
-    llvm_unreachable("Synthetic enumerators which should never get here");
-
-  case Attribute::None:            return 0;
-  case Attribute::ZExt:            return 1 << 0;
-  case Attribute::SExt:            return 1 << 1;
-  case Attribute::NoReturn:        return 1 << 2;
-  case Attribute::InReg:           return 1 << 3;
-  case Attribute::StructRet:       return 1 << 4;
-  case Attribute::NoUnwind:        return 1 << 5;
-  case Attribute::NoAlias:         return 1 << 6;
-  case Attribute::ByVal:           return 1 << 7;
-  case Attribute::Nest:            return 1 << 8;
-  case Attribute::ReadNone:        return 1 << 9;
-  case Attribute::ReadOnly:        return 1 << 10;
-  case Attribute::NoInline:        return 1 << 11;
-  case Attribute::AlwaysInline:    return 1 << 12;
-  case Attribute::OptimizeForSize: return 1 << 13;
-  case Attribute::StackProtect:    return 1 << 14;
-  case Attribute::StackProtectReq: return 1 << 15;
-  case Attribute::Alignment:       return 31 << 16;
-  case Attribute::NoCapture:       return 1 << 21;
-  case Attribute::NoRedZone:       return 1 << 22;
-  case Attribute::NoImplicitFloat: return 1 << 23;
-  case Attribute::Naked:           return 1 << 24;
-  case Attribute::InlineHint:      return 1 << 25;
-  case Attribute::StackAlignment:  return 7 << 26;
-  case Attribute::ReturnsTwice:    return 1 << 29;
-  case Attribute::UWTable:         return 1 << 30;
-  case Attribute::NonLazyBind:     return 1U << 31;
-  case Attribute::SanitizeAddress: return 1ULL << 32;
-  case Attribute::MinSize:         return 1ULL << 33;
-  case Attribute::NoDuplicate:     return 1ULL << 34;
-  case Attribute::StackProtectStrong: return 1ULL << 35;
-  case Attribute::SanitizeThread:  return 1ULL << 36;
-  case Attribute::SanitizeMemory:  return 1ULL << 37;
-  case Attribute::NoBuiltin:       return 1ULL << 38;
-  case Attribute::Returned:        return 1ULL << 39;
-  case Attribute::Cold:            return 1ULL << 40;
-  case Attribute::Builtin:         return 1ULL << 41;
-  case Attribute::OptimizeNone:    return 1ULL << 42;
-  case Attribute::InAlloca:        return 1ULL << 43;
-  case Attribute::NonNull:         return 1ULL << 44;
-  case Attribute::JumpTable:       return 1ULL << 45;
-  case Attribute::Convergent:      return 1ULL << 46;
-  case Attribute::SafeStack:       return 1ULL << 47;
-  case Attribute::NoRecurse:       return 1ULL << 48;
-  case Attribute::InaccessibleMemOnly:         return 1ULL << 49;
-  case Attribute::InaccessibleMemOrArgMemOnly: return 1ULL << 50;
-  case Attribute::SwiftSelf:       return 1ULL << 51;
-  case Attribute::SwiftError:      return 1ULL << 52;
-  case Attribute::WriteOnly:       return 1ULL << 53;
-  case Attribute::Dereferenceable:
-    llvm_unreachable("dereferenceable attribute not supported in raw format");
-    break;
-  case Attribute::DereferenceableOrNull:
-    llvm_unreachable("dereferenceable_or_null attribute not supported in raw "
-                     "format");
-    break;
-  case Attribute::ArgMemOnly:
-    llvm_unreachable("argmemonly attribute not supported in raw format");
-    break;
-  case Attribute::AllocSize:
-    llvm_unreachable("allocsize not supported in raw format");
-    break;
-  }
-  llvm_unreachable("Unsupported attribute type");
-}
-
 //===----------------------------------------------------------------------===//
 // AttributeSetNode Definition
 //===----------------------------------------------------------------------===//
@@ -645,39 +581,6 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
 // AttributeSetImpl Definition
 //===----------------------------------------------------------------------===//
 
-uint64_t AttributeSetImpl::Raw(unsigned Index) const {
-  for (unsigned I = 0, E = getNumSlots(); I != E; ++I) {
-    if (getSlotIndex(I) != Index) continue;
-    const AttributeSetNode *ASN = getSlotNode(I);
-    uint64_t Mask = 0;
-
-    for (AttributeSetNode::iterator II = ASN->begin(),
-           IE = ASN->end(); II != IE; ++II) {
-      Attribute Attr = *II;
-
-      // This cannot handle string attributes.
-      if (Attr.isStringAttribute()) continue;
-
-      Attribute::AttrKind Kind = Attr.getKindAsEnum();
-
-      if (Kind == Attribute::Alignment)
-        Mask |= (Log2_32(ASN->getAlignment()) + 1) << 16;
-      else if (Kind == Attribute::StackAlignment)
-        Mask |= (Log2_32(ASN->getStackAlignment()) + 1) << 26;
-      else if (Kind == Attribute::Dereferenceable)
-        llvm_unreachable("dereferenceable not supported in bit mask");
-      else if (Kind == Attribute::AllocSize)
-        llvm_unreachable("allocsize not supported in bit mask");
-      else
-        Mask |= AttributeImpl::getAttrMask(Kind);
-    }
-
-    return Mask;
-  }
-
-  return 0;
-}
-
 LLVM_DUMP_METHOD void AttributeSetImpl::dump() const {
   AttributeSet(const_cast<AttributeSetImpl *>(this)).dump();
 }
@@ -721,10 +624,11 @@ AttributeSet AttributeSet::get(LLVMContext &C,
                            const std::pair<unsigned, Attribute> &RHS) {
                           return LHS.first < RHS.first;
                         }) && "Misordered Attributes list!");
-  assert(std::none_of(Attrs.begin(), Attrs.end(),
-                      [](const std::pair<unsigned, Attribute> &Pair) {
-                        return Pair.second.hasAttribute(Attribute::None);
-                      }) && "Pointless attribute!");
+  assert(none_of(Attrs,
+                 [](const std::pair<unsigned, Attribute> &Pair) {
+                   return Pair.second.hasAttribute(Attribute::None);
+                 }) &&
+         "Pointless attribute!");
 
   // Create a vector if (unsigned, AttributeSetNode*) pairs from the attributes
   // list.
@@ -738,8 +642,7 @@ AttributeSet AttributeSet::get(LLVMContext &C,
       ++I;
     }
 
-    AttrPairVec.push_back(std::make_pair(Index,
-                                         AttributeSetNode::get(C, AttrVec)));
+    AttrPairVec.emplace_back(Index, AttributeSetNode::get(C, AttrVec));
   }
 
   return getImpl(C, AttrPairVec);
@@ -791,13 +694,12 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
     default:
       Attr = Attribute::get(C, Kind);
     }
-    Attrs.push_back(std::make_pair(Index, Attr));
+    Attrs.emplace_back(Index, Attr);
   }
 
   // Add target-dependent (string) attributes.
   for (const auto &TDA : B.td_attrs())
-    Attrs.push_back(
-        std::make_pair(Index, Attribute::get(C, TDA.first, TDA.second)));
+    Attrs.emplace_back(Index, Attribute::get(C, TDA.first, TDA.second));
 
   return get(C, Attrs);
 }
@@ -806,7 +708,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
                                ArrayRef<Attribute::AttrKind> Kinds) {
   SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
   for (Attribute::AttrKind K : Kinds)
-    Attrs.push_back(std::make_pair(Index, Attribute::get(C, K)));
+    Attrs.emplace_back(Index, Attribute::get(C, K));
   return get(C, Attrs);
 }
 
@@ -814,7 +716,7 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index,
                                ArrayRef<StringRef> Kinds) {
   SmallVector<std::pair<unsigned, Attribute>, 8> Attrs;
   for (StringRef K : Kinds)
-    Attrs.push_back(std::make_pair(Index, Attribute::get(C, K)));
+    Attrs.emplace_back(Index, Attribute::get(C, K));
   return get(C, Attrs);
 }
 
@@ -1108,6 +1010,10 @@ bool AttributeSet::hasFnAttribute(Attribute::AttrKind Kind) const {
   return pImpl && pImpl->hasFnAttribute(Kind);
 }
 
+bool AttributeSet::hasFnAttribute(StringRef Kind) const {
+  return hasAttribute(AttributeSet::FunctionIndex, Kind);
+}
+
 bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr,
                                     unsigned *Index) const {
   if (!pImpl) return false;
@@ -1158,7 +1064,7 @@ uint64_t AttributeSet::getDereferenceableOrNullBytes(unsigned Index) const {
 std::pair<unsigned, Optional<unsigned>>
 AttributeSet::getAllocSizeArgs(unsigned Index) const {
   AttributeSetNode *ASN = getAttributes(Index);
-  return ASN ? ASN->getAllocSizeArgs() : std::make_pair(0, 0);
+  return ASN ? ASN->getAllocSizeArgs() : std::make_pair(0u, Optional<unsigned>(0u));
 }
 
 std::string AttributeSet::getAsString(unsigned Index, bool InAttrGrp) const {
@@ -1209,11 +1115,6 @@ AttributeSet AttributeSet::getSlotAttributes(unsigned Slot) const {
   return pImpl->getSlotAttributes(Slot);
 }
 
-uint64_t AttributeSet::Raw(unsigned Index) const {
-  // FIXME: Remove this.
-  return pImpl ? pImpl->Raw(Index) : 0;
-}
-
 LLVM_DUMP_METHOD void AttributeSet::dump() const {
   dbgs() << "PAL[\n";
 
@@ -1514,30 +1415,6 @@ bool AttrBuilder::operator==(const AttrBuilder &B) {
          DerefBytes == B.DerefBytes;
 }
 
-AttrBuilder &AttrBuilder::addRawValue(uint64_t Val) {
-  // FIXME: Remove this in 4.0.
-  if (!Val) return *this;
-
-  for (Attribute::AttrKind I = Attribute::None; I != Attribute::EndAttrKinds;
-       I = Attribute::AttrKind(I + 1)) {
-    if (I == Attribute::Dereferenceable ||
-        I == Attribute::DereferenceableOrNull ||
-        I == Attribute::ArgMemOnly ||
-        I == Attribute::AllocSize)
-      continue;
-    if (uint64_t A = (Val & AttributeImpl::getAttrMask(I))) {
-      Attrs[I] = true;
- 
-      if (I == Attribute::Alignment)
-        Alignment = 1ULL << ((A >> 16) - 1);
-      else if (I == Attribute::StackAlignment)
-        StackAlignment = 1ULL << ((A >> 26)-1);
-    }
-  }
- 
-  return *this;
-}
-
 //===----------------------------------------------------------------------===//
 // AttributeFuncs Function Defintions
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp
index 2e4a2f89e2c7..2d9d0f95efa5 100644
--- a/contrib/llvm/lib/IR/AutoUpgrade.cpp
+++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp
@@ -31,6 +31,8 @@
 #include <cstring>
 using namespace llvm;
 
+static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
+
 // Upgrade the declarations of the SSE4.1 functions whose arguments have
 // changed their type from v4f32 to v2i64.
 static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
@@ -42,7 +44,7 @@ static bool UpgradeSSE41Function(Function* F, Intrinsic::ID IID,
     return false;
 
   // Yes, it's old, replace it with new version.
-  F->setName(F->getName() + ".old");
+  rename(F);
   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
   return true;
 }
@@ -58,7 +60,7 @@ static bool UpgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID,
     return false;
 
   // Move this function aside and map down.
-  F->setName(F->getName() + ".old");
+  rename(F);
   NewFn = Intrinsic::getDeclaration(F->getParent(), IID);
   return true;
 }
@@ -135,25 +137,49 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
 
   case 'c': {
     if (Name.startswith("ctlz.") && F->arg_size() == 1) {
-      F->setName(Name + ".old");
+      rename(F);
       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::ctlz,
                                         F->arg_begin()->getType());
       return true;
     }
     if (Name.startswith("cttz.") && F->arg_size() == 1) {
-      F->setName(Name + ".old");
+      rename(F);
       NewFn = Intrinsic::getDeclaration(F->getParent(), Intrinsic::cttz,
                                         F->arg_begin()->getType());
       return true;
     }
     break;
   }
-
+  case 'i': {
+    if (Name.startswith("invariant.start")) {
+      auto Args = F->getFunctionType()->params();
+      Type* ObjectPtr[1] = {Args[1]};
+      if (F->getName() !=
+          Intrinsic::getName(Intrinsic::invariant_start, ObjectPtr)) {
+        rename(F);
+        NewFn = Intrinsic::getDeclaration(
+            F->getParent(), Intrinsic::invariant_start, ObjectPtr);
+        return true;
+      }
+    }
+    if (Name.startswith("invariant.end")) {
+      auto Args = F->getFunctionType()->params();
+      Type* ObjectPtr[1] = {Args[2]};
+      if (F->getName() !=
+          Intrinsic::getName(Intrinsic::invariant_end, ObjectPtr)) {
+        rename(F);
+        NewFn = Intrinsic::getDeclaration(F->getParent(),
+                                          Intrinsic::invariant_end, ObjectPtr);
+        return true;
+      }
+    }
+    break;
+  }
   case 'm': {
     if (Name.startswith("masked.load.")) {
       Type *Tys[] = { F->getReturnType(), F->arg_begin()->getType() };
       if (F->getName() != Intrinsic::getName(Intrinsic::masked_load, Tys)) {
-        F->setName(Name + ".old");
+        rename(F);
         NewFn = Intrinsic::getDeclaration(F->getParent(),
                                           Intrinsic::masked_load,
                                           Tys);
@@ -164,7 +190,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       auto Args = F->getFunctionType()->params();
       Type *Tys[] = { Args[0], Args[1] };
       if (F->getName() != Intrinsic::getName(Intrinsic::masked_store, Tys)) {
-        F->setName(Name + ".old");
+        rename(F);
         NewFn = Intrinsic::getDeclaration(F->getParent(),
                                           Intrinsic::masked_store,
                                           Tys);
@@ -180,7 +206,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     if (F->arg_size() == 2 && Name.startswith("objectsize.")) {
       Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
       if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) {
-        F->setName(Name + ".old");
+        rename(F);
         NewFn = Intrinsic::getDeclaration(F->getParent(),
                                           Intrinsic::objectsize, Tys);
         return true;
@@ -193,117 +219,172 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
       NewFn = nullptr;
       return true;
     }
+    break;
 
   case 'x': {
     bool IsX86 = Name.startswith("x86.");
     if (IsX86)
       Name = Name.substr(4);
 
+    // All of the intrinsics matches below should be marked with which llvm
+    // version started autoupgrading them. At some point in the future we would
+    // like to use this information to remove upgrade code for some older
+    // intrinsics. It is currently undecided how we will determine that future
+    // point.
     if (IsX86 &&
-        (Name.startswith("sse2.pcmpeq.") ||
-         Name.startswith("sse2.pcmpgt.") ||
-         Name.startswith("avx2.pcmpeq.") ||
-         Name.startswith("avx2.pcmpgt.") ||
-         Name.startswith("avx512.mask.pcmpeq.") ||
-         Name.startswith("avx512.mask.pcmpgt.") ||
-         Name == "sse41.pmaxsb" ||
-         Name == "sse2.pmaxs.w" ||
-         Name == "sse41.pmaxsd" ||
-         Name == "sse2.pmaxu.b" ||
-         Name == "sse41.pmaxuw" ||
-         Name == "sse41.pmaxud" ||
-         Name == "sse41.pminsb" ||
-         Name == "sse2.pmins.w" ||
-         Name == "sse41.pminsd" ||
-         Name == "sse2.pminu.b" ||
-         Name == "sse41.pminuw" ||
-         Name == "sse41.pminud" ||
-         Name.startswith("avx2.pmax") ||
-         Name.startswith("avx2.pmin") ||
-         Name.startswith("avx2.vbroadcast") ||
-         Name.startswith("avx2.pbroadcast") ||
-         Name.startswith("avx.vpermil.") ||
-         Name.startswith("sse2.pshuf") ||
-         Name.startswith("avx512.pbroadcast") ||
-         Name.startswith("avx512.mask.broadcast.s") ||
-         Name.startswith("avx512.mask.movddup") ||
-         Name.startswith("avx512.mask.movshdup") ||
-         Name.startswith("avx512.mask.movsldup") ||
-         Name.startswith("avx512.mask.pshuf.d.") ||
-         Name.startswith("avx512.mask.pshufl.w.") ||
-         Name.startswith("avx512.mask.pshufh.w.") ||
-         Name.startswith("avx512.mask.vpermil.p") ||
-         Name.startswith("avx512.mask.perm.df.") ||
-         Name.startswith("avx512.mask.perm.di.") ||
-         Name.startswith("avx512.mask.punpckl") ||
-         Name.startswith("avx512.mask.punpckh") ||
-         Name.startswith("avx512.mask.unpckl.") ||
-         Name.startswith("avx512.mask.unpckh.") ||
-         Name.startswith("avx512.mask.pand.") ||
-         Name.startswith("avx512.mask.pandn.") ||
-         Name.startswith("avx512.mask.por.") ||
-         Name.startswith("avx512.mask.pxor.") ||
-         Name.startswith("sse41.pmovsx") ||
-         Name.startswith("sse41.pmovzx") ||
-         Name.startswith("avx2.pmovsx") ||
-         Name.startswith("avx2.pmovzx") ||
-         Name == "sse2.cvtdq2pd" ||
-         Name == "sse2.cvtps2pd" ||
-         Name == "avx.cvtdq2.pd.256" ||
-         Name == "avx.cvt.ps2.pd.256" ||
-         Name.startswith("avx.vinsertf128.") ||
-         Name == "avx2.vinserti128" ||
-         Name.startswith("avx.vextractf128.") ||
-         Name == "avx2.vextracti128" ||
-         Name.startswith("sse4a.movnt.") ||
-         Name.startswith("avx.movnt.") ||
-         Name.startswith("avx512.storent.") ||
-         Name == "sse2.storel.dq" ||
-         Name.startswith("sse.storeu.") ||
-         Name.startswith("sse2.storeu.") ||
-         Name.startswith("avx.storeu.") ||
-         Name.startswith("avx512.mask.storeu.p") ||
-         Name.startswith("avx512.mask.storeu.b.") ||
-         Name.startswith("avx512.mask.storeu.w.") ||
-         Name.startswith("avx512.mask.storeu.d.") ||
-         Name.startswith("avx512.mask.storeu.q.") ||
-         Name.startswith("avx512.mask.store.p") ||
-         Name.startswith("avx512.mask.store.b.") ||
-         Name.startswith("avx512.mask.store.w.") ||
-         Name.startswith("avx512.mask.store.d.") ||
-         Name.startswith("avx512.mask.store.q.") ||
-         Name.startswith("avx512.mask.loadu.p") ||
-         Name.startswith("avx512.mask.loadu.b.") ||
-         Name.startswith("avx512.mask.loadu.w.") ||
-         Name.startswith("avx512.mask.loadu.d.") ||
-         Name.startswith("avx512.mask.loadu.q.") ||
-         Name.startswith("avx512.mask.load.p") ||
-         Name.startswith("avx512.mask.load.b.") ||
-         Name.startswith("avx512.mask.load.w.") ||
-         Name.startswith("avx512.mask.load.d.") ||
-         Name.startswith("avx512.mask.load.q.") ||
-         Name == "sse42.crc32.64.8" ||
-         Name.startswith("avx.vbroadcast.s") ||
-         Name.startswith("avx512.mask.palignr.") ||
-         Name.startswith("sse2.psll.dq") ||
-         Name.startswith("sse2.psrl.dq") ||
-         Name.startswith("avx2.psll.dq") ||
-         Name.startswith("avx2.psrl.dq") ||
-         Name.startswith("avx512.psll.dq") ||
-         Name.startswith("avx512.psrl.dq") ||
-         Name == "sse41.pblendw" ||
-         Name.startswith("sse41.blendp") ||
-         Name.startswith("avx.blend.p") ||
-         Name == "avx2.pblendw" ||
-         Name.startswith("avx2.pblendd.") ||
-         Name == "avx2.vbroadcasti128" ||
-         Name == "xop.vpcmov" ||
-         (Name.startswith("xop.vpcom") && F->arg_size() == 2))) {
+        (Name.startswith("sse2.pcmpeq.") || // Added in 3.1
+         Name.startswith("sse2.pcmpgt.") || // Added in 3.1
+         Name.startswith("avx2.pcmpeq.") || // Added in 3.1
+         Name.startswith("avx2.pcmpgt.") || // Added in 3.1
+         Name.startswith("avx512.mask.pcmpeq.") || // Added in 3.9
+         Name.startswith("avx512.mask.pcmpgt.") || // Added in 3.9
+         Name == "sse.add.ss" || // Added in 4.0
+         Name == "sse2.add.sd" || // Added in 4.0
+         Name == "sse.sub.ss" || // Added in 4.0
+         Name == "sse2.sub.sd" || // Added in 4.0
+         Name == "sse.mul.ss" || // Added in 4.0
+         Name == "sse2.mul.sd" || // Added in 4.0
+         Name == "sse.div.ss" || // Added in 4.0
+         Name == "sse2.div.sd" || // Added in 4.0
+         Name == "sse41.pmaxsb" || // Added in 3.9
+         Name == "sse2.pmaxs.w" || // Added in 3.9
+         Name == "sse41.pmaxsd" || // Added in 3.9
+         Name == "sse2.pmaxu.b" || // Added in 3.9
+         Name == "sse41.pmaxuw" || // Added in 3.9
+         Name == "sse41.pmaxud" || // Added in 3.9
+         Name == "sse41.pminsb" || // Added in 3.9
+         Name == "sse2.pmins.w" || // Added in 3.9
+         Name == "sse41.pminsd" || // Added in 3.9
+         Name == "sse2.pminu.b" || // Added in 3.9
+         Name == "sse41.pminuw" || // Added in 3.9
+         Name == "sse41.pminud" || // Added in 3.9
+         Name.startswith("avx512.mask.pshuf.b.") || // Added in 4.0
+         Name.startswith("avx2.pmax") || // Added in 3.9
+         Name.startswith("avx2.pmin") || // Added in 3.9
+         Name.startswith("avx512.mask.pmax") || // Added in 4.0
+         Name.startswith("avx512.mask.pmin") || // Added in 4.0
+         Name.startswith("avx2.vbroadcast") || // Added in 3.8
+         Name.startswith("avx2.pbroadcast") || // Added in 3.8
+         Name.startswith("avx.vpermil.") || // Added in 3.1
+         Name.startswith("sse2.pshuf") || // Added in 3.9
+         Name.startswith("avx512.pbroadcast") || // Added in 3.9
+         Name.startswith("avx512.mask.broadcast.s") || // Added in 3.9
+         Name.startswith("avx512.mask.movddup") || // Added in 3.9
+         Name.startswith("avx512.mask.movshdup") || // Added in 3.9
+         Name.startswith("avx512.mask.movsldup") || // Added in 3.9
+         Name.startswith("avx512.mask.pshuf.d.") || // Added in 3.9
+         Name.startswith("avx512.mask.pshufl.w.") || // Added in 3.9
+         Name.startswith("avx512.mask.pshufh.w.") || // Added in 3.9
+         Name.startswith("avx512.mask.shuf.p") || // Added in 4.0
+         Name.startswith("avx512.mask.vpermil.p") || // Added in 3.9
+         Name.startswith("avx512.mask.perm.df.") || // Added in 3.9
+         Name.startswith("avx512.mask.perm.di.") || // Added in 3.9
+         Name.startswith("avx512.mask.punpckl") || // Added in 3.9
+         Name.startswith("avx512.mask.punpckh") || // Added in 3.9
+         Name.startswith("avx512.mask.unpckl.") || // Added in 3.9
+         Name.startswith("avx512.mask.unpckh.") || // Added in 3.9
+         Name.startswith("avx512.mask.pand.") || // Added in 3.9
+         Name.startswith("avx512.mask.pandn.") || // Added in 3.9
+         Name.startswith("avx512.mask.por.") || // Added in 3.9
+         Name.startswith("avx512.mask.pxor.") || // Added in 3.9
+         Name.startswith("avx512.mask.and.") || // Added in 3.9
+         Name.startswith("avx512.mask.andn.") || // Added in 3.9
+         Name.startswith("avx512.mask.or.") || // Added in 3.9
+         Name.startswith("avx512.mask.xor.") || // Added in 3.9
+         Name.startswith("avx512.mask.padd.") || // Added in 4.0
+         Name.startswith("avx512.mask.psub.") || // Added in 4.0
+         Name.startswith("avx512.mask.pmull.") || // Added in 4.0
+         Name.startswith("avx512.mask.cvtdq2pd.") || // Added in 4.0
+         Name.startswith("avx512.mask.cvtudq2pd.") || // Added in 4.0
+         Name.startswith("avx512.mask.pmul.dq.") || // Added in 4.0
+         Name.startswith("avx512.mask.pmulu.dq.") || // Added in 4.0
+         Name == "avx512.mask.add.pd.128" || // Added in 4.0
+         Name == "avx512.mask.add.pd.256" || // Added in 4.0
+         Name == "avx512.mask.add.ps.128" || // Added in 4.0
+         Name == "avx512.mask.add.ps.256" || // Added in 4.0
+         Name == "avx512.mask.div.pd.128" || // Added in 4.0
+         Name == "avx512.mask.div.pd.256" || // Added in 4.0
+         Name == "avx512.mask.div.ps.128" || // Added in 4.0
+         Name == "avx512.mask.div.ps.256" || // Added in 4.0
+         Name == "avx512.mask.mul.pd.128" || // Added in 4.0
+         Name == "avx512.mask.mul.pd.256" || // Added in 4.0
+         Name == "avx512.mask.mul.ps.128" || // Added in 4.0
+         Name == "avx512.mask.mul.ps.256" || // Added in 4.0
+         Name == "avx512.mask.sub.pd.128" || // Added in 4.0
+         Name == "avx512.mask.sub.pd.256" || // Added in 4.0
+         Name == "avx512.mask.sub.ps.128" || // Added in 4.0
+         Name == "avx512.mask.sub.ps.256" || // Added in 4.0
+         Name.startswith("avx512.mask.vpermilvar.") || // Added in 4.0
+         Name.startswith("avx512.mask.psll.d") || // Added in 4.0
+         Name.startswith("avx512.mask.psll.q") || // Added in 4.0
+         Name.startswith("avx512.mask.psll.w") || // Added in 4.0
+         Name.startswith("avx512.mask.psra.d") || // Added in 4.0
+         Name.startswith("avx512.mask.psra.q") || // Added in 4.0
+         Name.startswith("avx512.mask.psra.w") || // Added in 4.0
+         Name.startswith("avx512.mask.psrl.d") || // Added in 4.0
+         Name.startswith("avx512.mask.psrl.q") || // Added in 4.0
+         Name.startswith("avx512.mask.psrl.w") || // Added in 4.0
+         Name.startswith("avx512.mask.pslli") || // Added in 4.0
+         Name.startswith("avx512.mask.psrai") || // Added in 4.0
+         Name.startswith("avx512.mask.psrli") || // Added in 4.0
+         Name.startswith("avx512.mask.psllv") || // Added in 4.0
+         Name.startswith("avx512.mask.psrav") || // Added in 4.0
+         Name.startswith("avx512.mask.psrlv") || // Added in 4.0
+         Name.startswith("sse41.pmovsx") || // Added in 3.8
+         Name.startswith("sse41.pmovzx") || // Added in 3.9
+         Name.startswith("avx2.pmovsx") || // Added in 3.9
+         Name.startswith("avx2.pmovzx") || // Added in 3.9
+         Name.startswith("avx512.mask.pmovsx") || // Added in 4.0
+         Name.startswith("avx512.mask.pmovzx") || // Added in 4.0
+         Name == "sse2.cvtdq2pd" || // Added in 3.9
+         Name == "sse2.cvtps2pd" || // Added in 3.9
+         Name == "avx.cvtdq2.pd.256" || // Added in 3.9
+         Name == "avx.cvt.ps2.pd.256" || // Added in 3.9
+         Name.startswith("avx.vinsertf128.") || // Added in 3.7
+         Name == "avx2.vinserti128" || // Added in 3.7
+         Name.startswith("avx.vextractf128.") || // Added in 3.7
+         Name == "avx2.vextracti128" || // Added in 3.7
+         Name.startswith("sse4a.movnt.") || // Added in 3.9
+         Name.startswith("avx.movnt.") || // Added in 3.2
+         Name.startswith("avx512.storent.") || // Added in 3.9
+         Name == "sse2.storel.dq" || // Added in 3.9
+         Name.startswith("sse.storeu.") || // Added in 3.9
+         Name.startswith("sse2.storeu.") || // Added in 3.9
+         Name.startswith("avx.storeu.") || // Added in 3.9
+         Name.startswith("avx512.mask.storeu.") || // Added in 3.9
+         Name.startswith("avx512.mask.store.p") || // Added in 3.9
+         Name.startswith("avx512.mask.store.b.") || // Added in 3.9
+         Name.startswith("avx512.mask.store.w.") || // Added in 3.9
+         Name.startswith("avx512.mask.store.d.") || // Added in 3.9
+         Name.startswith("avx512.mask.store.q.") || // Added in 3.9
+         Name.startswith("avx512.mask.loadu.") || // Added in 3.9
+         Name.startswith("avx512.mask.load.") || // Added in 3.9
+         Name == "sse42.crc32.64.8" || // Added in 3.4
+         Name.startswith("avx.vbroadcast.s") || // Added in 3.5
+         Name.startswith("avx512.mask.palignr.") || // Added in 3.9
+         Name.startswith("avx512.mask.valign.") || // Added in 4.0
+         Name.startswith("sse2.psll.dq") || // Added in 3.7
+         Name.startswith("sse2.psrl.dq") || // Added in 3.7
+         Name.startswith("avx2.psll.dq") || // Added in 3.7
+         Name.startswith("avx2.psrl.dq") || // Added in 3.7
+         Name.startswith("avx512.psll.dq") || // Added in 3.9
+         Name.startswith("avx512.psrl.dq") || // Added in 3.9
+         Name == "sse41.pblendw" || // Added in 3.7
+         Name.startswith("sse41.blendp") || // Added in 3.7
+         Name.startswith("avx.blend.p") || // Added in 3.7
+         Name == "avx2.pblendw" || // Added in 3.7
+         Name.startswith("avx2.pblendd.") || // Added in 3.7
+         Name.startswith("avx.vbroadcastf128") || // Added in 4.0
+         Name == "avx2.vbroadcasti128" || // Added in 3.7
+         Name == "xop.vpcmov" || // Added in 3.8
+         Name.startswith("avx512.mask.move.s") || // Added in 4.0
+         (Name.startswith("xop.vpcom") && // Added in 3.2
+          F->arg_size() == 2))) {
       NewFn = nullptr;
       return true;
     }
     // SSE4.1 ptest functions may have an old signature.
-    if (IsX86 && Name.startswith("sse41.ptest")) {
+    if (IsX86 && Name.startswith("sse41.ptest")) { // Added in 3.2
       if (Name.substr(11) == "c")
         return UpgradeSSE41Function(F, Intrinsic::x86_sse41_ptestc, NewFn);
       if (Name.substr(11) == "z")
@@ -313,67 +394,44 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
     }
     // Several blend and other instructions with masks used the wrong number of
     // bits.
-    if (IsX86 && Name == "sse41.insertps")
+    if (IsX86 && Name == "sse41.insertps") // Added in 3.6
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_insertps,
                                               NewFn);
-    if (IsX86 && Name == "sse41.dppd")
+    if (IsX86 && Name == "sse41.dppd") // Added in 3.6
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dppd,
                                               NewFn);
-    if (IsX86 && Name == "sse41.dpps")
+    if (IsX86 && Name == "sse41.dpps") // Added in 3.6
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_dpps,
                                               NewFn);
-    if (IsX86 && Name == "sse41.mpsadbw")
+    if (IsX86 && Name == "sse41.mpsadbw") // Added in 3.6
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_sse41_mpsadbw,
                                               NewFn);
-    if (IsX86 && Name == "avx.dp.ps.256")
+    if (IsX86 && Name == "avx.dp.ps.256") // Added in 3.6
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx_dp_ps_256,
                                               NewFn);
-    if (IsX86 && Name == "avx2.mpsadbw")
+    if (IsX86 && Name == "avx2.mpsadbw") // Added in 3.6
       return UpgradeX86IntrinsicsWith8BitMask(F, Intrinsic::x86_avx2_mpsadbw,
                                               NewFn);
 
-    // frcz.ss/sd may need to have an argument dropped
+    // frcz.ss/sd may need to have an argument dropped. Added in 3.2
     if (IsX86 && Name.startswith("xop.vfrcz.ss") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
+      rename(F);
       NewFn = Intrinsic::getDeclaration(F->getParent(),
                                         Intrinsic::x86_xop_vfrcz_ss);
       return true;
     }
     if (IsX86 && Name.startswith("xop.vfrcz.sd") && F->arg_size() == 2) {
-      F->setName(Name + ".old");
+      rename(F);
       NewFn = Intrinsic::getDeclaration(F->getParent(),
                                         Intrinsic::x86_xop_vfrcz_sd);
       return true;
     }
-    if (IsX86 && (Name.startswith("avx512.mask.pslli.") ||
-                  Name.startswith("avx512.mask.psrai.") ||
-                  Name.startswith("avx512.mask.psrli."))) {
-      Intrinsic::ID ShiftID;
-      if (Name.slice(12, 16) == "psll")
-        ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psll_di_512
-                                  : Intrinsic::x86_avx512_mask_psll_qi_512;
-      else if (Name.slice(12, 16) == "psra")
-        ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psra_di_512
-                                  : Intrinsic::x86_avx512_mask_psra_qi_512;
-      else
-        ShiftID = Name[18] == 'd' ? Intrinsic::x86_avx512_mask_psrl_di_512
-                                  : Intrinsic::x86_avx512_mask_psrl_qi_512;
-      F->setName("llvm.x86." + Name + ".old");
-      NewFn = Intrinsic::getDeclaration(F->getParent(), ShiftID);
-      return true;
-    }
-    // Fix the FMA4 intrinsics to remove the 4
-    if (IsX86 && Name.startswith("fma4.")) {
-      F->setName("llvm.x86.fma" + Name.substr(5));
-      NewFn = F;
-      return true;
-    }
     // Upgrade any XOP PERMIL2 index operand still using a float/double vector.
-    if (IsX86 && Name.startswith("xop.vpermil2")) {
+    if (IsX86 && Name.startswith("xop.vpermil2")) { // Added in 3.9
       auto Params = F->getFunctionType()->params();
       auto Idx = Params[2];
       if (Idx->getScalarType()->isFloatingPointTy()) {
-        F->setName("llvm.x86." + Name + ".old");
+        rename(F);
         unsigned IdxSize = Idx->getPrimitiveSizeInBits();
         unsigned EltSize = Idx->getScalarSizeInBits();
         Intrinsic::ID Permil2ID;
@@ -517,13 +575,23 @@ static Value *EmitX86Select(IRBuilder<> &Builder, Value *Mask,
   return Builder.CreateSelect(Mask, Op0, Op1);
 }
 
-static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
-                                          Value *Op0, Value *Op1, Value *Shift,
-                                          Value *Passthru, Value *Mask) {
+// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
+// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
+// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
+static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
+                                        Value *Op1, Value *Shift,
+                                        Value *Passthru, Value *Mask,
+                                        bool IsVALIGN) {
   unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
 
   unsigned NumElts = Op0->getType()->getVectorNumElements();
-  assert(NumElts % 16 == 0);
+  assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
+  assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
+  assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
+
+  // Mask the immediate for VALIGN.
+  if (IsVALIGN)
+    ShiftVal &= (NumElts - 1);
 
   // If palignr is shifting the pair of vectors more than the size of two
   // lanes, emit zero.
@@ -540,10 +608,10 @@ static Value *UpgradeX86PALIGNRIntrinsics(IRBuilder<> &Builder,
 
   uint32_t Indices[64];
   // 256-bit palignr operates on 128-bit lanes so we need to handle that
-  for (unsigned l = 0; l != NumElts; l += 16) {
+  for (unsigned l = 0; l < NumElts; l += 16) {
     for (unsigned i = 0; i != 16; ++i) {
       unsigned Idx = ShiftVal + i;
-      if (Idx >= 16)
+      if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
         Idx += NumElts - 16; // End of lane, switch operand.
       Indices[l + i] = Idx + l;
     }
@@ -601,7 +669,12 @@ static Value *upgradeIntMinMax(IRBuilder<> &Builder, CallInst &CI,
   Value *Op0 = CI.getArgOperand(0);
   Value *Op1 = CI.getArgOperand(1);
   Value *Cmp = Builder.CreateICmp(Pred, Op0, Op1);
-  return Builder.CreateSelect(Cmp, Op0, Op1);
+  Value *Res = Builder.CreateSelect(Cmp, Op0, Op1);
+
+  if (CI.getNumArgOperands() == 4)
+    Res = EmitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
+
+  return Res;
 }
 
 static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
@@ -629,6 +702,30 @@ static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
                                                      std::max(NumElts, 8U)));
 }
 
+// Replace a masked intrinsic with an older unmasked intrinsic.
+static Value *UpgradeX86MaskedShift(IRBuilder<> &Builder, CallInst &CI,
+                                    Intrinsic::ID IID) {
+  Function *F = CI.getCalledFunction();
+  Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
+  Value *Rep = Builder.CreateCall(Intrin,
+                                 { CI.getArgOperand(0), CI.getArgOperand(1) });
+  return EmitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
+}
+
+static Value* upgradeMaskedMove(IRBuilder<> &Builder, CallInst &CI) {
+  Value* A = CI.getArgOperand(0);
+  Value* B = CI.getArgOperand(1);
+  Value* Src = CI.getArgOperand(2);
+  Value* Mask = CI.getArgOperand(3);
+
+  Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
+  Value* Cmp = Builder.CreateIsNotNull(AndNode);
+  Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
+  Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
+  Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
+  return Builder.CreateInsertElement(A, Select, (uint64_t)0);
+}
+
 /// Upgrade a call to an old intrinsic. All argument and return casting must be
 /// provided to seamlessly integrate with existing context.
 void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
@@ -650,67 +747,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     if (IsX86)
       Name = Name.substr(4);
 
-    Value *Rep;
-    // Upgrade packed integer vector compare intrinsics to compare instructions.
-    if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
-                  Name.startswith("avx2.pcmpeq."))) {
-      Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
-                                 "pcmpeq");
-      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
-    } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
-                         Name.startswith("avx2.pcmpgt."))) {
-      Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
-                                  "pcmpgt");
-      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
-    } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
-      Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
-    } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
-      Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
-    } else if (IsX86 && (Name == "sse41.pmaxsb" ||
-                         Name == "sse2.pmaxs.w" ||
-                         Name == "sse41.pmaxsd" ||
-                         Name.startswith("avx2.pmaxs"))) {
-      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
-    } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
-                         Name == "sse41.pmaxuw" ||
-                         Name == "sse41.pmaxud" ||
-                         Name.startswith("avx2.pmaxu"))) {
-      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
-    } else if (IsX86 && (Name == "sse41.pminsb" ||
-                         Name == "sse2.pmins.w" ||
-                         Name == "sse41.pminsd" ||
-                         Name.startswith("avx2.pmins"))) {
-      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
-    } else if (IsX86 && (Name == "sse2.pminu.b" ||
-                         Name == "sse41.pminuw" ||
-                         Name == "sse41.pminud" ||
-                         Name.startswith("avx2.pminu"))) {
-      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
-    } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
-                         Name == "sse2.cvtps2pd" ||
-                         Name == "avx.cvtdq2.pd.256" ||
-                         Name == "avx.cvt.ps2.pd.256")) {
-      // Lossless i32/float to double conversion.
-      // Extract the bottom elements if necessary and convert to double vector.
-      Value *Src = CI->getArgOperand(0);
-      VectorType *SrcTy = cast<VectorType>(Src->getType());
-      VectorType *DstTy = cast<VectorType>(CI->getType());
-      Rep = CI->getArgOperand(0);
-
-      unsigned NumDstElts = DstTy->getNumElements();
-      if (NumDstElts < SrcTy->getNumElements()) {
-        assert(NumDstElts == 2 && "Unexpected vector size");
-        uint32_t ShuffleMask[2] = { 0, 1 };
-        Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
-                                          ShuffleMask);
-      }
-
-      bool Int2Double = (StringRef::npos != Name.find("cvtdq2"));
-      if (Int2Double)
-        Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
-      else
-        Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
-    } else if (IsX86 && Name.startswith("sse4a.movnt.")) {
+    if (IsX86 && Name.startswith("sse4a.movnt.")) {
       Module *M = F->getParent();
       SmallVector<Metadata *, 1> Elts;
       Elts.push_back(
@@ -734,8 +771,10 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (IsX86 && (Name.startswith("avx.movnt.") ||
-                         Name.startswith("avx512.storent."))) {
+    }
+
+    if (IsX86 && (Name.startswith("avx.movnt.") ||
+                  Name.startswith("avx512.storent."))) {
       Module *M = F->getParent();
       SmallVector<Metadata *, 1> Elts;
       Elts.push_back(
@@ -757,7 +796,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (IsX86 && Name == "sse2.storel.dq") {
+    }
+
+    if (IsX86 && Name == "sse2.storel.dq") {
       Value *Arg0 = CI->getArgOperand(0);
       Value *Arg1 = CI->getArgOperand(1);
 
@@ -772,9 +813,11 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (IsX86 && (Name.startswith("sse.storeu.") ||
-                         Name.startswith("sse2.storeu.") ||
-                         Name.startswith("avx.storeu."))) {
+    }
+
+    if (IsX86 && (Name.startswith("sse.storeu.") ||
+                  Name.startswith("sse2.storeu.") ||
+                  Name.startswith("avx.storeu."))) {
       Value *Arg0 = CI->getArgOperand(0);
       Value *Arg1 = CI->getArgOperand(1);
 
@@ -786,41 +829,140 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (IsX86 && (Name.startswith("avx512.mask.storeu.p") ||
-                         Name.startswith("avx512.mask.storeu.b.") ||
-                         Name.startswith("avx512.mask.storeu.w.") ||
-                         Name.startswith("avx512.mask.storeu.d.") ||
-                         Name.startswith("avx512.mask.storeu.q."))) {
+    }
+
+    if (IsX86 && (Name.startswith("avx512.mask.storeu."))) {
       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
                          CI->getArgOperand(2), /*Aligned*/false);
 
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (IsX86 && (Name.startswith("avx512.mask.store.p") ||
-                         Name.startswith("avx512.mask.store.b.") ||
-                         Name.startswith("avx512.mask.store.w.") ||
-                         Name.startswith("avx512.mask.store.d.") ||
-                         Name.startswith("avx512.mask.store.q."))) {
+    }
+
+    if (IsX86 && (Name.startswith("avx512.mask.store."))) {
       UpgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
                          CI->getArgOperand(2), /*Aligned*/true);
 
       // Remove intrinsic.
       CI->eraseFromParent();
       return;
-    } else if (IsX86 && (Name.startswith("avx512.mask.loadu.p") ||
-                         Name.startswith("avx512.mask.loadu.b.") ||
-                         Name.startswith("avx512.mask.loadu.w.") ||
-                         Name.startswith("avx512.mask.loadu.d.") ||
-                         Name.startswith("avx512.mask.loadu.q."))) {
+    }
+
+    Value *Rep;
+    // Upgrade packed integer vector compare intrinsics to compare instructions.
+    if (IsX86 && (Name.startswith("sse2.pcmpeq.") ||
+                  Name.startswith("avx2.pcmpeq."))) {
+      Rep = Builder.CreateICmpEQ(CI->getArgOperand(0), CI->getArgOperand(1),
+                                 "pcmpeq");
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (IsX86 && (Name.startswith("sse2.pcmpgt.") ||
+                         Name.startswith("avx2.pcmpgt."))) {
+      Rep = Builder.CreateICmpSGT(CI->getArgOperand(0), CI->getArgOperand(1),
+                                  "pcmpgt");
+      Rep = Builder.CreateSExt(Rep, CI->getType(), "");
+    } else if (IsX86 && (Name == "sse.add.ss" || Name == "sse2.add.sd")) {
+      Type *I32Ty = Type::getInt32Ty(C);
+      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+                                                 ConstantInt::get(I32Ty, 0));
+      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+                                                 ConstantInt::get(I32Ty, 0));
+      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+                                        Builder.CreateFAdd(Elt0, Elt1),
+                                        ConstantInt::get(I32Ty, 0));
+    } else if (IsX86 && (Name == "sse.sub.ss" || Name == "sse2.sub.sd")) {
+      Type *I32Ty = Type::getInt32Ty(C);
+      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+                                                 ConstantInt::get(I32Ty, 0));
+      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+                                                 ConstantInt::get(I32Ty, 0));
+      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+                                        Builder.CreateFSub(Elt0, Elt1),
+                                        ConstantInt::get(I32Ty, 0));
+    } else if (IsX86 && (Name == "sse.mul.ss" || Name == "sse2.mul.sd")) {
+      Type *I32Ty = Type::getInt32Ty(C);
+      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+                                                 ConstantInt::get(I32Ty, 0));
+      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+                                                 ConstantInt::get(I32Ty, 0));
+      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+                                        Builder.CreateFMul(Elt0, Elt1),
+                                        ConstantInt::get(I32Ty, 0));
+    } else if (IsX86 && (Name == "sse.div.ss" || Name == "sse2.div.sd")) {
+      Type *I32Ty = Type::getInt32Ty(C);
+      Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
+                                                 ConstantInt::get(I32Ty, 0));
+      Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
+                                                 ConstantInt::get(I32Ty, 0));
+      Rep = Builder.CreateInsertElement(CI->getArgOperand(0),
+                                        Builder.CreateFDiv(Elt0, Elt1),
+                                        ConstantInt::get(I32Ty, 0));
+    } else if (IsX86 && Name.startswith("avx512.mask.pcmpeq.")) {
+      Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_EQ);
+    } else if (IsX86 && Name.startswith("avx512.mask.pcmpgt.")) {
+      Rep = upgradeMaskedCompare(Builder, *CI, ICmpInst::ICMP_SGT);
+    } else if (IsX86 && (Name == "sse41.pmaxsb" ||
+                         Name == "sse2.pmaxs.w" ||
+                         Name == "sse41.pmaxsd" ||
+                         Name.startswith("avx2.pmaxs") ||
+                         Name.startswith("avx512.mask.pmaxs"))) {
+      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SGT);
+    } else if (IsX86 && (Name == "sse2.pmaxu.b" ||
+                         Name == "sse41.pmaxuw" ||
+                         Name == "sse41.pmaxud" ||
+                         Name.startswith("avx2.pmaxu") ||
+                         Name.startswith("avx512.mask.pmaxu"))) {
+      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_UGT);
+    } else if (IsX86 && (Name == "sse41.pminsb" ||
+                         Name == "sse2.pmins.w" ||
+                         Name == "sse41.pminsd" ||
+                         Name.startswith("avx2.pmins") ||
+                         Name.startswith("avx512.mask.pmins"))) {
+      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_SLT);
+    } else if (IsX86 && (Name == "sse2.pminu.b" ||
+                         Name == "sse41.pminuw" ||
+                         Name == "sse41.pminud" ||
+                         Name.startswith("avx2.pminu") ||
+                         Name.startswith("avx512.mask.pminu"))) {
+      Rep = upgradeIntMinMax(Builder, *CI, ICmpInst::ICMP_ULT);
+    } else if (IsX86 && (Name == "sse2.cvtdq2pd" ||
+                         Name == "sse2.cvtps2pd" ||
+                         Name == "avx.cvtdq2.pd.256" ||
+                         Name == "avx.cvt.ps2.pd.256" ||
+                         Name.startswith("avx512.mask.cvtdq2pd.") ||
+                         Name.startswith("avx512.mask.cvtudq2pd."))) {
+      // Lossless i32/float to double conversion.
+      // Extract the bottom elements if necessary and convert to double vector.
+      Value *Src = CI->getArgOperand(0);
+      VectorType *SrcTy = cast<VectorType>(Src->getType());
+      VectorType *DstTy = cast<VectorType>(CI->getType());
+      Rep = CI->getArgOperand(0);
+
+      unsigned NumDstElts = DstTy->getNumElements();
+      if (NumDstElts < SrcTy->getNumElements()) {
+        assert(NumDstElts == 2 && "Unexpected vector size");
+        uint32_t ShuffleMask[2] = { 0, 1 };
+        Rep = Builder.CreateShuffleVector(Rep, UndefValue::get(SrcTy),
+                                          ShuffleMask);
+      }
+
+      bool SInt2Double = (StringRef::npos != Name.find("cvtdq2"));
+      bool UInt2Double = (StringRef::npos != Name.find("cvtudq2"));
+      if (SInt2Double)
+        Rep = Builder.CreateSIToFP(Rep, DstTy, "cvtdq2pd");
+      else if (UInt2Double)
+        Rep = Builder.CreateUIToFP(Rep, DstTy, "cvtudq2pd");
+      else
+        Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
+
+      if (CI->getNumArgOperands() == 3)
+        Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+                            CI->getArgOperand(1));
+    } else if (IsX86 && (Name.startswith("avx512.mask.loadu."))) {
       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
                               CI->getArgOperand(1), CI->getArgOperand(2),
                               /*Aligned*/false);
-    } else if (IsX86 && (Name.startswith("avx512.mask.load.p") ||
-                         Name.startswith("avx512.mask.load.b.") ||
-                         Name.startswith("avx512.mask.load.w.") ||
-                         Name.startswith("avx512.mask.load.d.") ||
-                         Name.startswith("avx512.mask.load.q."))) {
+    } else if (IsX86 && (Name.startswith("avx512.mask.load."))) {
       Rep = UpgradeMaskedLoad(Builder, CI->getArgOperand(0),
                               CI->getArgOperand(1),CI->getArgOperand(2),
                               /*Aligned*/true);
@@ -886,7 +1028,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
       Rep = Builder.CreateCall(CRC32, {Trunc0, CI->getArgOperand(1)});
       Rep = Builder.CreateZExt(Rep, CI->getType(), "");
-    } else if (IsX86 && Name.startswith("avx.vbroadcast")) {
+    } else if (IsX86 && Name.startswith("avx.vbroadcast.s")) {
       // Replace broadcasts with a series of insertelements.
       Type *VecTy = CI->getType();
       Type *EltTy = VecTy->getVectorElementType();
@@ -902,7 +1044,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     } else if (IsX86 && (Name.startswith("sse41.pmovsx") ||
                          Name.startswith("sse41.pmovzx") ||
                          Name.startswith("avx2.pmovsx") ||
-                         Name.startswith("avx2.pmovzx"))) {
+                         Name.startswith("avx2.pmovzx") ||
+                         Name.startswith("avx512.mask.pmovsx") ||
+                         Name.startswith("avx512.mask.pmovzx"))) {
       VectorType *SrcTy = cast<VectorType>(CI->getArgOperand(0)->getType());
       VectorType *DstTy = cast<VectorType>(CI->getType());
       unsigned NumDstElts = DstTy->getNumElements();
@@ -918,15 +1062,25 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       bool DoSext = (StringRef::npos != Name.find("pmovsx"));
       Rep = DoSext ? Builder.CreateSExt(SV, DstTy)
                    : Builder.CreateZExt(SV, DstTy);
-    } else if (IsX86 && Name == "avx2.vbroadcasti128") {
-      // Replace vbroadcasts with a vector shuffle.
-      Type *VT = VectorType::get(Type::getInt64Ty(C), 2);
+      // If there are 3 arguments, it's a masked intrinsic so we need a select.
+      if (CI->getNumArgOperands() == 3)
+        Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
+                            CI->getArgOperand(1));
+    } else if (IsX86 && (Name.startswith("avx.vbroadcastf128") ||
+                         Name == "avx2.vbroadcasti128")) {
+      // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
+      Type *EltTy = CI->getType()->getVectorElementType();
+      unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
+      Type *VT = VectorType::get(EltTy, NumSrcElts);
       Value *Op = Builder.CreatePointerCast(CI->getArgOperand(0),
                                             PointerType::getUnqual(VT));
-      Value *Load = Builder.CreateLoad(VT, Op);
-      uint32_t Idxs[4] = { 0, 1, 0, 1 };
-      Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
-                                        Idxs);
+      Value *Load = Builder.CreateAlignedLoad(Op, 1);
+      if (NumSrcElts == 2)
+        Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+                                          { 0, 1, 0, 1 });
+      else
+        Rep = Builder.CreateShuffleVector(Load, UndefValue::get(Load->getType()),
+                                          { 0, 1, 2, 3, 0, 1, 2, 3 });
     } else if (IsX86 && (Name.startswith("avx2.pbroadcast") ||
                          Name.startswith("avx2.vbroadcast") ||
                          Name.startswith("avx512.pbroadcast") ||
@@ -942,11 +1096,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
         Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
                             CI->getArgOperand(1));
     } else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
-      Rep = UpgradeX86PALIGNRIntrinsics(Builder, CI->getArgOperand(0),
-                                        CI->getArgOperand(1),
-                                        CI->getArgOperand(2),
-                                        CI->getArgOperand(3),
-                                        CI->getArgOperand(4));
+      Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
+                                      CI->getArgOperand(1),
+                                      CI->getArgOperand(2),
+                                      CI->getArgOperand(3),
+                                      CI->getArgOperand(4),
+                                      false);
+    } else if (IsX86 && Name.startswith("avx512.mask.valign.")) {
+      Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
+                                      CI->getArgOperand(1),
+                                      CI->getArgOperand(2),
+                                      CI->getArgOperand(3),
+                                      CI->getArgOperand(4),
+                                      true);
     } else if (IsX86 && (Name == "sse2.psll.dq" ||
                          Name == "avx2.psll.dq")) {
       // 128/256-bit shift left specified in bits.
@@ -1123,6 +1285,31 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       if (CI->getNumArgOperands() == 4)
         Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                             CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.shuf.p")) {
+      Value *Op0 = CI->getArgOperand(0);
+      Value *Op1 = CI->getArgOperand(1);
+      unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+      unsigned NumElts = CI->getType()->getVectorNumElements();
+
+      unsigned NumLaneElts = 128/CI->getType()->getScalarSizeInBits();
+      unsigned HalfLaneElts = NumLaneElts / 2;
+
+      SmallVector<uint32_t, 16> Idxs(NumElts);
+      for (unsigned i = 0; i != NumElts; ++i) {
+        // Base index is the starting element of the lane.
+        Idxs[i] = i - (i % NumLaneElts);
+        // If we are half way through the lane switch to the other source.
+        if ((i % NumLaneElts) >= HalfLaneElts)
+          Idxs[i] += NumElts;
+        // Now select the specific element. By adding HalfLaneElts bits from
+        // the immediate. Wrapping around the immediate every 8-bits.
+        Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
+      }
+
+      Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
+
+      Rep = EmitX86Select(Builder, CI->getArgOperand(4), Rep,
+                          CI->getArgOperand(3));
     } else if (IsX86 && (Name.startswith("avx512.mask.movddup") ||
                          Name.startswith("avx512.mask.movshdup") ||
                          Name.startswith("avx512.mask.movsldup"))) {
@@ -1194,6 +1381,333 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
       Rep = Builder.CreateXor(CI->getArgOperand(0), CI->getArgOperand(1));
       Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
                           CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.and.")) {
+      VectorType *FTy = cast<VectorType>(CI->getType());
+      VectorType *ITy = VectorType::getInteger(FTy);
+      Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
+                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+      Rep = Builder.CreateBitCast(Rep, FTy);
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.andn.")) {
+      VectorType *FTy = cast<VectorType>(CI->getType());
+      VectorType *ITy = VectorType::getInteger(FTy);
+      Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
+      Rep = Builder.CreateAnd(Rep,
+                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+      Rep = Builder.CreateBitCast(Rep, FTy);
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.or.")) {
+      VectorType *FTy = cast<VectorType>(CI->getType());
+      VectorType *ITy = VectorType::getInteger(FTy);
+      Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
+                             Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+      Rep = Builder.CreateBitCast(Rep, FTy);
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.xor.")) {
+      VectorType *FTy = cast<VectorType>(CI->getType());
+      VectorType *ITy = VectorType::getInteger(FTy);
+      Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
+                              Builder.CreateBitCast(CI->getArgOperand(1), ITy));
+      Rep = Builder.CreateBitCast(Rep, FTy);
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.padd.")) {
+      Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.psub.")) {
+      Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.pmull.")) {
+      Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && (Name.startswith("avx512.mask.add.p"))) {
+      Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.div.p")) {
+      Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.mul.p")) {
+      Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.sub.p")) {
+      Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.pshuf.b.")) {
+      VectorType *VecTy = cast<VectorType>(CI->getType());
+      Intrinsic::ID IID;
+      if (VecTy->getPrimitiveSizeInBits() == 128)
+        IID = Intrinsic::x86_ssse3_pshuf_b_128;
+      else if (VecTy->getPrimitiveSizeInBits() == 256)
+        IID = Intrinsic::x86_avx2_pshuf_b;
+      else if (VecTy->getPrimitiveSizeInBits() == 512)
+        IID = Intrinsic::x86_avx512_pshuf_b_512;
+      else
+        llvm_unreachable("Unexpected intrinsic");
+
+      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+                               { CI->getArgOperand(0), CI->getArgOperand(1) });
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && (Name.startswith("avx512.mask.pmul.dq.") ||
+                         Name.startswith("avx512.mask.pmulu.dq."))) {
+      bool IsUnsigned = Name[16] == 'u';
+      VectorType *VecTy = cast<VectorType>(CI->getType());
+      Intrinsic::ID IID;
+      if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
+        IID = Intrinsic::x86_sse41_pmuldq;
+      else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
+        IID = Intrinsic::x86_avx2_pmul_dq;
+      else if (!IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
+        IID = Intrinsic::x86_avx512_pmul_dq_512;
+      else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 128)
+        IID = Intrinsic::x86_sse2_pmulu_dq;
+      else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 256)
+        IID = Intrinsic::x86_avx2_pmulu_dq;
+      else if (IsUnsigned && VecTy->getPrimitiveSizeInBits() == 512)
+        IID = Intrinsic::x86_avx512_pmulu_dq_512;
+      else
+        llvm_unreachable("Unexpected intrinsic");
+
+      Rep = Builder.CreateCall(Intrinsic::getDeclaration(F->getParent(), IID),
+                               { CI->getArgOperand(0), CI->getArgOperand(1) });
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
+    } else if (IsX86 && Name.startswith("avx512.mask.psll")) {
+      bool IsImmediate = Name[16] == 'i' ||
+                         (Name.size() > 18 && Name[18] == 'i');
+      bool IsVariable = Name[16] == 'v';
+      char Size = Name[16] == '.' ? Name[17] :
+                  Name[17] == '.' ? Name[18] :
+                  Name[18] == '.' ? Name[19] :
+                                    Name[20];
+
+      Intrinsic::ID IID;
+      if (IsVariable && Name[17] != '.') {
+        if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
+          IID = Intrinsic::x86_avx2_psllv_q;
+        else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
+          IID = Intrinsic::x86_avx2_psllv_q_256;
+        else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
+          IID = Intrinsic::x86_avx2_psllv_d;
+        else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
+          IID = Intrinsic::x86_avx2_psllv_d_256;
+        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
+          IID = Intrinsic::x86_avx512_psllv_w_128;
+        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
+          IID = Intrinsic::x86_avx512_psllv_w_256;
+        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
+          IID = Intrinsic::x86_avx512_psllv_w_512;
+        else
+          llvm_unreachable("Unexpected size");
+      } else if (Name.endswith(".128")) {
+        if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
+                            : Intrinsic::x86_sse2_psll_d;
+        else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
+                            : Intrinsic::x86_sse2_psll_q;
+        else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
+                            : Intrinsic::x86_sse2_psll_w;
+        else
+          llvm_unreachable("Unexpected size");
+      } else if (Name.endswith(".256")) {
+        if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
+                            : Intrinsic::x86_avx2_psll_d;
+        else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
+                            : Intrinsic::x86_avx2_psll_q;
+        else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
+                            : Intrinsic::x86_avx2_psll_w;
+        else
+          llvm_unreachable("Unexpected size");
+      } else {
+        if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
+          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512 :
+                IsVariable  ? Intrinsic::x86_avx512_psllv_d_512 :
+                              Intrinsic::x86_avx512_psll_d_512;
+        else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
+          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512 :
+                IsVariable  ? Intrinsic::x86_avx512_psllv_q_512 :
+                              Intrinsic::x86_avx512_psll_q_512;
+        else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
+          IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
+                            : Intrinsic::x86_avx512_psll_w_512;
+        else
+          llvm_unreachable("Unexpected size");
+      }
+
+      Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
+    } else if (IsX86 && Name.startswith("avx512.mask.psrl")) {
+      bool IsImmediate = Name[16] == 'i' ||
+                         (Name.size() > 18 && Name[18] == 'i');
+      bool IsVariable = Name[16] == 'v';
+      char Size = Name[16] == '.' ? Name[17] :
+                  Name[17] == '.' ? Name[18] :
+                  Name[18] == '.' ? Name[19] :
+                                    Name[20];
+
+      Intrinsic::ID IID;
+      if (IsVariable && Name[17] != '.') {
+        if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
+          IID = Intrinsic::x86_avx2_psrlv_q;
+        else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
+          IID = Intrinsic::x86_avx2_psrlv_q_256;
+        else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
+          IID = Intrinsic::x86_avx2_psrlv_d;
+        else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
+          IID = Intrinsic::x86_avx2_psrlv_d_256;
+        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
+          IID = Intrinsic::x86_avx512_psrlv_w_128;
+        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
+          IID = Intrinsic::x86_avx512_psrlv_w_256;
+        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
+          IID = Intrinsic::x86_avx512_psrlv_w_512;
+        else
+          llvm_unreachable("Unexpected size");
+      } else if (Name.endswith(".128")) {
+        if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
+                            : Intrinsic::x86_sse2_psrl_d;
+        else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
+                            : Intrinsic::x86_sse2_psrl_q;
+        else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
+                            : Intrinsic::x86_sse2_psrl_w;
+        else
+          llvm_unreachable("Unexpected size");
+      } else if (Name.endswith(".256")) {
+        if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
+                            : Intrinsic::x86_avx2_psrl_d;
+        else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
+                            : Intrinsic::x86_avx2_psrl_q;
+        else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
+                            : Intrinsic::x86_avx2_psrl_w;
+        else
+          llvm_unreachable("Unexpected size");
+      } else {
+        if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512 :
+                IsVariable  ? Intrinsic::x86_avx512_psrlv_d_512 :
+                              Intrinsic::x86_avx512_psrl_d_512;
+        else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512 :
+                IsVariable  ? Intrinsic::x86_avx512_psrlv_q_512 :
+                              Intrinsic::x86_avx512_psrl_q_512;
+        else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
+                            : Intrinsic::x86_avx512_psrl_w_512;
+        else
+          llvm_unreachable("Unexpected size");
+      }
+
+      Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
+    } else if (IsX86 && Name.startswith("avx512.mask.psra")) {
+      bool IsImmediate = Name[16] == 'i' ||
+                         (Name.size() > 18 && Name[18] == 'i');
+      bool IsVariable = Name[16] == 'v';
+      char Size = Name[16] == '.' ? Name[17] :
+                  Name[17] == '.' ? Name[18] :
+                  Name[18] == '.' ? Name[19] :
+                                    Name[20];
+
+      Intrinsic::ID IID;
+      if (IsVariable && Name[17] != '.') {
+        if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
+          IID = Intrinsic::x86_avx2_psrav_d;
+        else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
+          IID = Intrinsic::x86_avx2_psrav_d_256;
+        else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
+          IID = Intrinsic::x86_avx512_psrav_w_128;
+        else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
+          IID = Intrinsic::x86_avx512_psrav_w_256;
+        else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
+          IID = Intrinsic::x86_avx512_psrav_w_512;
+        else
+          llvm_unreachable("Unexpected size");
+      } else if (Name.endswith(".128")) {
+        if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
+                            : Intrinsic::x86_sse2_psra_d;
+        else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128 :
+                IsVariable  ? Intrinsic::x86_avx512_psrav_q_128 :
+                              Intrinsic::x86_avx512_psra_q_128;
+        else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
+          IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
+                            : Intrinsic::x86_sse2_psra_w;
+        else
+          llvm_unreachable("Unexpected size");
+      } else if (Name.endswith(".256")) {
+        if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
+                            : Intrinsic::x86_avx2_psra_d;
+        else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256 :
+                IsVariable  ? Intrinsic::x86_avx512_psrav_q_256 :
+                              Intrinsic::x86_avx512_psra_q_256;
+        else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
+          IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
+                            : Intrinsic::x86_avx2_psra_w;
+        else
+          llvm_unreachable("Unexpected size");
+      } else {
+        if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512 :
+                IsVariable  ? Intrinsic::x86_avx512_psrav_d_512 :
+                              Intrinsic::x86_avx512_psra_d_512;
+        else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512 :
+                IsVariable  ? Intrinsic::x86_avx512_psrav_q_512 :
+                              Intrinsic::x86_avx512_psra_q_512;
+        else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
+          IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
+                            : Intrinsic::x86_avx512_psra_w_512;
+        else
+          llvm_unreachable("Unexpected size");
+      }
+
+      Rep = UpgradeX86MaskedShift(Builder, *CI, IID);
+    } else if (IsX86 && Name.startswith("avx512.mask.move.s")) {
+      Rep = upgradeMaskedMove(Builder, *CI);
+    } else if (IsX86 && Name.startswith("avx512.mask.vpermilvar.")) {
+      Intrinsic::ID IID;
+      if (Name.endswith("ps.128"))
+        IID = Intrinsic::x86_avx_vpermilvar_ps;
+      else if (Name.endswith("pd.128"))
+        IID = Intrinsic::x86_avx_vpermilvar_pd;
+      else if (Name.endswith("ps.256"))
+        IID = Intrinsic::x86_avx_vpermilvar_ps_256;
+      else if (Name.endswith("pd.256"))
+        IID = Intrinsic::x86_avx_vpermilvar_pd_256;
+      else if (Name.endswith("ps.512"))
+        IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
+      else if (Name.endswith("pd.512"))
+        IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
+      else
+        llvm_unreachable("Unexpected vpermilvar intrinsic");
+
+      Function *Intrin = Intrinsic::getDeclaration(F->getParent(), IID);
+      Rep = Builder.CreateCall(Intrin,
+                               { CI->getArgOperand(0), CI->getArgOperand(1) });
+      Rep = EmitX86Select(Builder, CI->getArgOperand(3), Rep,
+                          CI->getArgOperand(2));
     } else {
       llvm_unreachable("Unknown function for CallInst upgrade.");
     }
@@ -1212,12 +1726,6 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
   default:
     llvm_unreachable("Unknown function for CallInst upgrade.");
 
-  case Intrinsic::x86_avx512_mask_psll_di_512:
-  case Intrinsic::x86_avx512_mask_psra_di_512:
-  case Intrinsic::x86_avx512_mask_psrl_di_512:
-  case Intrinsic::x86_avx512_mask_psll_qi_512:
-  case Intrinsic::x86_avx512_mask_psra_qi_512:
-  case Intrinsic::x86_avx512_mask_psrl_qi_512:
   case Intrinsic::arm_neon_vld1:
   case Intrinsic::arm_neon_vld2:
   case Intrinsic::arm_neon_vld3:
@@ -1332,6 +1840,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
     return;
   }
 
+  case Intrinsic::invariant_start:
+  case Intrinsic::invariant_end:
   case Intrinsic::masked_load:
   case Intrinsic::masked_store: {
     SmallVector<Value *, 4> Args(CI->arg_operands().begin(),
@@ -1361,28 +1871,26 @@ void llvm::UpgradeCallsToIntrinsic(Function *F) {
   }
 }
 
-void llvm::UpgradeInstWithTBAATag(Instruction *I) {
-  MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa);
-  assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag");
+MDNode *llvm::UpgradeTBAANode(MDNode &MD) {
   // Check if the tag uses struct-path aware TBAA format.
-  if (isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3)
-    return;
+  if (isa<MDNode>(MD.getOperand(0)) && MD.getNumOperands() >= 3)
+    return &MD;
 
-  if (MD->getNumOperands() == 3) {
-    Metadata *Elts[] = {MD->getOperand(0), MD->getOperand(1)};
-    MDNode *ScalarType = MDNode::get(I->getContext(), Elts);
+  auto &Context = MD.getContext();
+  if (MD.getNumOperands() == 3) {
+    Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
+    MDNode *ScalarType = MDNode::get(Context, Elts);
     // Create a MDNode <ScalarType, ScalarType, offset 0, const>
     Metadata *Elts2[] = {ScalarType, ScalarType,
-                         ConstantAsMetadata::get(Constant::getNullValue(
-                             Type::getInt64Ty(I->getContext()))),
-                         MD->getOperand(2)};
-    I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2));
-  } else {
-    // Create a MDNode <MD, MD, offset 0>
-    Metadata *Elts[] = {MD, MD, ConstantAsMetadata::get(Constant::getNullValue(
-                                    Type::getInt64Ty(I->getContext())))};
-    I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts));
+                         ConstantAsMetadata::get(
+                             Constant::getNullValue(Type::getInt64Ty(Context))),
+                         MD.getOperand(2)};
+    return MDNode::get(Context, Elts2);
   }
+  // Create a MDNode <MD, MD, offset 0>
+  Metadata *Elts[] = {&MD, &MD, ConstantAsMetadata::get(Constant::getNullValue(
+                                    Type::getInt64Ty(Context)))};
+  return MDNode::get(Context, Elts);
 }
 
 Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy,
@@ -1462,11 +1970,11 @@ bool llvm::UpgradeModuleFlags(Module &M) {
   }
   // "Objective-C Class Properties" is recently added for Objective-C. We
   // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
-  // flag of value 0, so we can correclty report error when trying to link
-  // an ObjC bitcode without this module flag with an ObjC bitcode with this
-  // module flag.
+  // flag of value 0, so we can correclty downgrade this flag when trying to
+  // link an ObjC bitcode without this module flag with an ObjC bitcode with
+  // this module flag.
   if (HasObjCFlag && !HasClassProperties) {
-    M.addModuleFlag(llvm::Module::Error, "Objective-C Class Properties",
+    M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
                     (uint32_t)0);
     return true;
   }
@@ -1524,7 +2032,7 @@ MDNode *llvm::upgradeInstructionLoopAttachment(MDNode &N) {
   if (!T)
     return &N;
 
-  if (!llvm::any_of(T->operands(), isOldLoopArgument))
+  if (none_of(T->operands(), isOldLoopArgument))
     return &N;
 
   SmallVector<Metadata *, 8> Ops;
diff --git a/contrib/llvm/lib/IR/BasicBlock.cpp b/contrib/llvm/lib/IR/BasicBlock.cpp
index 4640b4f9d413..19e784923658 100644
--- a/contrib/llvm/lib/IR/BasicBlock.cpp
+++ b/contrib/llvm/lib/IR/BasicBlock.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 
 ValueSymbolTable *BasicBlock::getValueSymbolTable() {
   if (Function *F = getParent())
-    return &F->getValueSymbolTable();
+    return F->getValueSymbolTable();
   return nullptr;
 }
 
diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp
index c06a99c05363..098ff90a0a95 100644
--- a/contrib/llvm/lib/IR/ConstantFold.cpp
+++ b/contrib/llvm/lib/IR/ConstantFold.cpp
@@ -120,7 +120,6 @@ static Constant *FoldBitCast(Constant *V, Type *DestTy) {
             IdxList.push_back(Zero);
           } else if (SequentialType *STy =
                      dyn_cast<SequentialType>(ElTy)) {
-            if (ElTy->isPointerTy()) break;  // Can't index into pointers!
             ElTy = STy->getElementType();
             IdxList.push_back(Zero);
           } else {
@@ -545,7 +544,10 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
     } else if (CE->getOpcode() == Instruction::GetElementPtr &&
                // Do not fold addrspacecast (gep 0, .., 0). It might make the
                // addrspacecast uncanonicalized.
-               opc != Instruction::AddrSpaceCast) {
+               opc != Instruction::AddrSpaceCast &&
+               // Do not fold bitcast (gep) with inrange index, as this loses
+               // information.
+               !cast<GEPOperator>(CE)->getInRangeIndex().hasValue()) {
       // If all of the indexes in the GEP are null values, there is no pointer
       // adjustment going on.  We might as well cast the source pointer.
       bool isAllNull = true;
@@ -588,13 +590,13 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
     if (ConstantFP *FPC = dyn_cast<ConstantFP>(V)) {
       bool ignored;
       APFloat Val = FPC->getValueAPF();
-      Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf :
-                  DestTy->isFloatTy() ? APFloat::IEEEsingle :
-                  DestTy->isDoubleTy() ? APFloat::IEEEdouble :
-                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended :
-                  DestTy->isFP128Ty() ? APFloat::IEEEquad :
-                  DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble :
-                  APFloat::Bogus,
+      Val.convert(DestTy->isHalfTy() ? APFloat::IEEEhalf() :
+                  DestTy->isFloatTy() ? APFloat::IEEEsingle() :
+                  DestTy->isDoubleTy() ? APFloat::IEEEdouble() :
+                  DestTy->isX86_FP80Ty() ? APFloat::x87DoubleExtended() :
+                  DestTy->isFP128Ty() ? APFloat::IEEEquad() :
+                  DestTy->isPPC_FP128Ty() ? APFloat::PPCDoubleDouble() :
+                  APFloat::Bogus(),
                   APFloat::rmNearestTiesToEven, &ignored);
       return ConstantFP::get(V->getContext(), Val);
     }
@@ -889,10 +891,8 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
   unsigned NumElts;
   if (StructType *ST = dyn_cast<StructType>(Agg->getType()))
     NumElts = ST->getNumElements();
-  else if (ArrayType *AT = dyn_cast<ArrayType>(Agg->getType()))
-    NumElts = AT->getNumElements();
   else
-    NumElts = Agg->getType()->getVectorNumElements();
+    NumElts = cast<SequentialType>(Agg->getType())->getNumElements();
 
   SmallVector<Constant*, 32> Result;
   for (unsigned i = 0; i != NumElts; ++i) {
@@ -925,7 +925,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
         // Handle undef ^ undef -> 0 special case. This is a common
         // idiom (misuse).
         return Constant::getNullValue(C1->getType());
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case Instruction::Add:
     case Instruction::Sub:
       return UndefValue::get(C1->getType());
@@ -2016,22 +2016,8 @@ static bool isInBoundsIndices(ArrayRef<IndexTy> Idxs) {
 }
 
 /// Test whether a given ConstantInt is in-range for a SequentialType.
-static bool isIndexInRangeOfSequentialType(SequentialType *STy,
-                                           const ConstantInt *CI) {
-  // And indices are valid when indexing along a pointer
-  if (isa<PointerType>(STy))
-    return true;
-
-  uint64_t NumElements = 0;
-  // Determine the number of elements in our sequential type.
-  if (auto *ATy = dyn_cast<ArrayType>(STy))
-    NumElements = ATy->getNumElements();
-  else if (auto *VTy = dyn_cast<VectorType>(STy))
-    NumElements = VTy->getNumElements();
-
-  assert((isa<ArrayType>(STy) || NumElements > 0) &&
-         "didn't expect non-array type to have zero elements!");
-
+static bool isIndexInRangeOfArrayType(uint64_t NumElements,
+                                      const ConstantInt *CI) {
   // We cannot bounds check the index if it doesn't fit in an int64_t.
   if (CI->getValue().getActiveBits() > 64)
     return false;
@@ -2046,22 +2032,18 @@ static bool isIndexInRangeOfSequentialType(SequentialType *STy,
   return true;
 }
 
-template<typename IndexTy>
-static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
-                                               bool inBounds,
-                                               ArrayRef<IndexTy> Idxs) {
+Constant *llvm::ConstantFoldGetElementPtr(Type *PointeeTy, Constant *C,
+                                          bool InBounds,
+                                          Optional<unsigned> InRangeIndex,
+                                          ArrayRef<Value *> Idxs) {
   if (Idxs.empty()) return C;
   Constant *Idx0 = cast<Constant>(Idxs[0]);
   if ((Idxs.size() == 1 && Idx0->isNullValue()))
     return C;
 
   if (isa<UndefValue>(C)) {
-    PointerType *PtrTy = cast<PointerType>(C->getType()->getScalarType());
-    Type *Ty = GetElementPtrInst::getIndexedType(PointeeTy, Idxs);
-    assert(Ty && "Invalid indices for GEP!");
-    Type *GEPTy = PointerType::get(Ty, PtrTy->getAddressSpace());
-    if (VectorType *VT = dyn_cast<VectorType>(C->getType()))
-      GEPTy = VectorType::get(GEPTy, VT->getNumElements());
+    Type *GEPTy = GetElementPtrInst::getGEPReturnType(
+        C, makeArrayRef((Value * const *)Idxs.data(), Idxs.size()));
     return UndefValue::get(GEPTy);
   }
 
@@ -2090,10 +2072,10 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
     // getelementptr instructions into a single instruction.
     //
     if (CE->getOpcode() == Instruction::GetElementPtr) {
-      Type *LastTy = nullptr;
+      gep_type_iterator LastI = gep_type_end(CE);
       for (gep_type_iterator I = gep_type_begin(CE), E = gep_type_end(CE);
            I != E; ++I)
-        LastTy = *I;
+        LastI = I;
 
       // We cannot combine indices if doing so would take us outside of an
       // array or vector.  Doing otherwise could trick us if we evaluated such a
@@ -2116,9 +2098,11 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
       bool PerformFold = false;
       if (Idx0->isNullValue())
         PerformFold = true;
-      else if (SequentialType *STy = dyn_cast_or_null<SequentialType>(LastTy))
+      else if (LastI.isSequential())
         if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx0))
-          PerformFold = isIndexInRangeOfSequentialType(STy, CI);
+          PerformFold =
+              !LastI.isBoundedSequential() ||
+              isIndexInRangeOfArrayType(LastI.getSequentialNumElements(), CI);
 
       if (PerformFold) {
         SmallVector<Value*, 16> NewIndices;
@@ -2150,9 +2134,18 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
 
         NewIndices.push_back(Combined);
         NewIndices.append(Idxs.begin() + 1, Idxs.end());
+
+        // The combined GEP normally inherits its index inrange attribute from
+        // the inner GEP, but if the inner GEP's last index was adjusted by the
+        // outer GEP, any inbounds attribute on that index is invalidated.
+        Optional<unsigned> IRIndex = cast<GEPOperator>(CE)->getInRangeIndex();
+        if (IRIndex && *IRIndex == CE->getNumOperands() - 2 && !Idx0->isNullValue())
+          IRIndex = None;
+
         return ConstantExpr::getGetElementPtr(
             cast<GEPOperator>(CE)->getSourceElementType(), CE->getOperand(0),
-            NewIndices, inBounds && cast<GEPOperator>(CE)->isInBounds());
+            NewIndices, InBounds && cast<GEPOperator>(CE)->isInBounds(),
+            IRIndex);
       }
     }
 
@@ -2177,8 +2170,9 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
         if (SrcArrayTy && DstArrayTy
             && SrcArrayTy->getElementType() == DstArrayTy->getElementType()
             && SrcPtrTy->getAddressSpace() == DstPtrTy->getAddressSpace())
-          return ConstantExpr::getGetElementPtr(
-              SrcArrayTy, (Constant *)CE->getOperand(0), Idxs, inBounds);
+          return ConstantExpr::getGetElementPtr(SrcArrayTy,
+                                                (Constant *)CE->getOperand(0),
+                                                Idxs, InBounds, InRangeIndex);
       }
     }
   }
@@ -2198,25 +2192,26 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
       Unknown = true;
       continue;
     }
+    if (InRangeIndex && i == *InRangeIndex + 1) {
+      // If an index is marked inrange, we cannot apply this canonicalization to
+      // the following index, as that will cause the inrange index to point to
+      // the wrong element.
+      continue;
+    }
     if (isa<StructType>(Ty)) {
       // The verify makes sure that GEPs into a struct are in range.
       continue;
     }
     auto *STy = cast<SequentialType>(Ty);
-    if (isa<PointerType>(STy)) {
-      // We don't know if it's in range or not.
-      Unknown = true;
-      continue;
-    }
     if (isa<VectorType>(STy)) {
       // There can be awkward padding in after a non-power of two vector.
       Unknown = true;
       continue;
     }
-    if (isIndexInRangeOfSequentialType(STy, CI))
+    if (isIndexInRangeOfArrayType(STy->getNumElements(), CI))
       // It's in range, skip to the next index.
       continue;
-    if (!isa<SequentialType>(Prev)) {
+    if (isa<StructType>(Prev)) {
       // It's out of range, but the prior dimension is a struct
       // so we can't do anything about it.
       Unknown = true;
@@ -2260,27 +2255,17 @@ static Constant *ConstantFoldGetElementPtrImpl(Type *PointeeTy, Constant *C,
   if (!NewIdxs.empty()) {
     for (unsigned i = 0, e = Idxs.size(); i != e; ++i)
       if (!NewIdxs[i]) NewIdxs[i] = cast<Constant>(Idxs[i]);
-    return ConstantExpr::getGetElementPtr(PointeeTy, C, NewIdxs, inBounds);
+    return ConstantExpr::getGetElementPtr(PointeeTy, C, NewIdxs, InBounds,
+                                          InRangeIndex);
   }
 
   // If all indices are known integers and normalized, we can do a simple
   // check for the "inbounds" property.
-  if (!Unknown && !inBounds)
+  if (!Unknown && !InBounds)
     if (auto *GV = dyn_cast<GlobalVariable>(C))
       if (!GV->hasExternalWeakLinkage() && isInBoundsIndices(Idxs))
-        return ConstantExpr::getInBoundsGetElementPtr(PointeeTy, C, Idxs);
+        return ConstantExpr::getGetElementPtr(PointeeTy, C, Idxs,
+                                              /*InBounds=*/true, InRangeIndex);
 
   return nullptr;
 }
-
-Constant *llvm::ConstantFoldGetElementPtr(Type *Ty, Constant *C,
-                                          bool inBounds,
-                                          ArrayRef<Constant *> Idxs) {
-  return ConstantFoldGetElementPtrImpl(Ty, C, inBounds, Idxs);
-}
-
-Constant *llvm::ConstantFoldGetElementPtr(Type *Ty, Constant *C,
-                                          bool inBounds,
-                                          ArrayRef<Value *> Idxs) {
-  return ConstantFoldGetElementPtrImpl(Ty, C, inBounds, Idxs);
-}
diff --git a/contrib/llvm/lib/IR/ConstantFold.h b/contrib/llvm/lib/IR/ConstantFold.h
index 9b0a937c84df..2d8de1132b96 100644
--- a/contrib/llvm/lib/IR/ConstantFold.h
+++ b/contrib/llvm/lib/IR/ConstantFold.h
@@ -19,6 +19,8 @@
 #ifndef LLVM_LIB_IR_CONSTANTFOLD_H
 #define LLVM_LIB_IR_CONSTANTFOLD_H
 
+#include "llvm/ADT/Optional.h"
+
 namespace llvm {
 template <typename T> class ArrayRef;
   class Value;
@@ -46,9 +48,8 @@ template <typename T> class ArrayRef;
                                           Constant *V2);
   Constant *ConstantFoldCompareInstruction(unsigned short predicate,
                                            Constant *C1, Constant *C2);
-  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds,
-                                      ArrayRef<Constant *> Idxs);
-  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool inBounds,
+  Constant *ConstantFoldGetElementPtr(Type *Ty, Constant *C, bool InBounds,
+                                      Optional<unsigned> InRangeIndex,
                                       ArrayRef<Value *> Idxs);
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/IR/ConstantRange.cpp b/contrib/llvm/lib/IR/ConstantRange.cpp
index 0f5c7128f3d0..a85ad465317c 100644
--- a/contrib/llvm/lib/IR/ConstantRange.cpp
+++ b/contrib/llvm/lib/IR/ConstantRange.cpp
@@ -147,6 +147,14 @@ bool ConstantRange::getEquivalentICmp(CmpInst::Predicate &Pred,
     Pred = isEmptySet() ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE;
     RHS = APInt(getBitWidth(), 0);
     Success = true;
+  } else if (auto *OnlyElt = getSingleElement()) {
+    Pred = CmpInst::ICMP_EQ;
+    RHS = *OnlyElt;
+    Success = true;
+  } else if (auto *OnlyMissingElt = getSingleMissingElement()) {
+    Pred = CmpInst::ICMP_NE;
+    RHS = *OnlyMissingElt;
+    Success = true;
   } else if (getLower().isMinSignedValue() || getLower().isMinValue()) {
     Pred =
         getLower().isMinSignedValue() ? CmpInst::ICMP_SLT : CmpInst::ICMP_ULT;
@@ -526,6 +534,49 @@ ConstantRange ConstantRange::unionWith(const ConstantRange &CR) const {
   return ConstantRange(L, U);
 }
 
+ConstantRange ConstantRange::castOp(Instruction::CastOps CastOp,
+                                    uint32_t ResultBitWidth) const {
+  switch (CastOp) {
+  default:
+    llvm_unreachable("unsupported cast type");
+  case Instruction::Trunc:
+    return truncate(ResultBitWidth);
+  case Instruction::SExt:
+    return signExtend(ResultBitWidth);
+  case Instruction::ZExt:
+    return zeroExtend(ResultBitWidth);
+  case Instruction::BitCast:
+    return *this;
+  case Instruction::FPToUI:
+  case Instruction::FPToSI:
+    if (getBitWidth() == ResultBitWidth)
+      return *this;
+    else
+      return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  case Instruction::UIToFP: {
+    // TODO: use input range if available
+    auto BW = getBitWidth();
+    APInt Min = APInt::getMinValue(BW).zextOrSelf(ResultBitWidth);
+    APInt Max = APInt::getMaxValue(BW).zextOrSelf(ResultBitWidth);
+    return ConstantRange(Min, Max);
+  }
+  case Instruction::SIToFP: {
+    // TODO: use input range if available
+    auto BW = getBitWidth();
+    APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(ResultBitWidth);
+    APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(ResultBitWidth);
+    return ConstantRange(SMin, SMax);
+  }
+  case Instruction::FPTrunc:
+  case Instruction::FPExt:
+  case Instruction::IntToPtr:
+  case Instruction::PtrToInt:
+  case Instruction::AddrSpaceCast:
+    // Conservatively return full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  };
+}
+
 /// zeroExtend - Return a new range in the specified integer type, which must
 /// be strictly larger than the current type.  The returned range will
 /// correspond to the possible range of values as if the source range had been
@@ -645,6 +696,42 @@ ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
   return *this;
 }
 
+ConstantRange ConstantRange::binaryOp(Instruction::BinaryOps BinOp,
+                                      const ConstantRange &Other) const {
+  assert(BinOp >= Instruction::BinaryOpsBegin &&
+         BinOp < Instruction::BinaryOpsEnd && "Binary operators only!");
+
+  switch (BinOp) {
+  case Instruction::Add:
+    return add(Other);
+  case Instruction::Sub:
+    return sub(Other);
+  case Instruction::Mul:
+    return multiply(Other);
+  case Instruction::UDiv:
+    return udiv(Other);
+  case Instruction::Shl:
+    return shl(Other);
+  case Instruction::LShr:
+    return lshr(Other);
+  case Instruction::And:
+    return binaryAnd(Other);
+  case Instruction::Or:
+    return binaryOr(Other);
+  // Note: floating point operations applied to abstract ranges are just
+  // ideal integer operations with a lossy representation
+  case Instruction::FAdd:
+    return add(Other);
+  case Instruction::FSub:
+    return sub(Other);
+  case Instruction::FMul:
+    return multiply(Other);
+  default:
+    // Conservatively return full set.
+    return ConstantRange(getBitWidth(), /*isFullSet=*/true);
+  }
+}
+
 ConstantRange
 ConstantRange::add(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
@@ -666,6 +753,19 @@ ConstantRange::add(const ConstantRange &Other) const {
   return X;
 }
 
+ConstantRange ConstantRange::addWithNoSignedWrap(const APInt &Other) const {
+  // Calculate the subset of this range such that "X + Other" is
+  // guaranteed not to wrap (overflow) for all X in this subset.
+  // makeGuaranteedNoWrapRegion will produce an exact NSW range since we are
+  // passing a single element range.
+  auto NSWRange = ConstantRange::makeGuaranteedNoWrapRegion(BinaryOperator::Add,
+                                      ConstantRange(Other),
+                                      OverflowingBinaryOperator::NoSignedWrap);
+  auto NSWConstrainedRange = intersectWith(NSWRange);
+
+  return NSWConstrainedRange.add(ConstantRange(Other));
+}
+
 ConstantRange
 ConstantRange::sub(const ConstantRange &Other) const {
   if (isEmptySet() || Other.isEmptySet())
@@ -901,3 +1001,25 @@ void ConstantRange::print(raw_ostream &OS) const {
 LLVM_DUMP_METHOD void ConstantRange::dump() const {
   print(dbgs());
 }
+
+ConstantRange llvm::getConstantRangeFromMetadata(const MDNode &Ranges) {
+  const unsigned NumRanges = Ranges.getNumOperands() / 2;
+  assert(NumRanges >= 1 && "Must have at least one range!");
+  assert(Ranges.getNumOperands() % 2 == 0 && "Must be a sequence of pairs");
+
+  auto *FirstLow = mdconst::extract<ConstantInt>(Ranges.getOperand(0));
+  auto *FirstHigh = mdconst::extract<ConstantInt>(Ranges.getOperand(1));
+
+  ConstantRange CR(FirstLow->getValue(), FirstHigh->getValue());
+
+  for (unsigned i = 1; i < NumRanges; ++i) {
+    auto *Low = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 0));
+    auto *High = mdconst::extract<ConstantInt>(Ranges.getOperand(2 * i + 1));
+
+    // Note: unionWith will potentially create a range that contains values not
+    // contained in any of the original N ranges.
+    CR = CR.unionWith(ConstantRange(Low->getValue(), High->getValue()));
+  }
+
+  return CR;
+}
diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp
index d8d55b472f32..533b9245277f 100644
--- a/contrib/llvm/lib/IR/Constants.cpp
+++ b/contrib/llvm/lib/IR/Constants.cpp
@@ -198,22 +198,22 @@ Constant *Constant::getNullValue(Type *Ty) {
     return ConstantInt::get(Ty, 0);
   case Type::HalfTyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEhalf));
+                           APFloat::getZero(APFloat::IEEEhalf()));
   case Type::FloatTyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEsingle));
+                           APFloat::getZero(APFloat::IEEEsingle()));
   case Type::DoubleTyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEdouble));
+                           APFloat::getZero(APFloat::IEEEdouble()));
   case Type::X86_FP80TyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::x87DoubleExtended));
+                           APFloat::getZero(APFloat::x87DoubleExtended()));
   case Type::FP128TyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat::getZero(APFloat::IEEEquad));
+                           APFloat::getZero(APFloat::IEEEquad()));
   case Type::PPC_FP128TyID:
     return ConstantFP::get(Ty->getContext(),
-                           APFloat(APFloat::PPCDoubleDouble,
+                           APFloat(APFloat::PPCDoubleDouble(),
                                    APInt::getNullValue(128)));
   case Type::PointerTyID:
     return ConstantPointerNull::get(cast<PointerType>(Ty));
@@ -347,10 +347,8 @@ static bool canTrapImpl(const Constant *C,
     return false;
   case Instruction::UDiv:
   case Instruction::SDiv:
-  case Instruction::FDiv:
   case Instruction::URem:
   case Instruction::SRem:
-  case Instruction::FRem:
     // Div and rem can trap if the RHS is not known to be non-zero.
     if (!isa<ConstantInt>(CE->getOperand(1)) ||CE->getOperand(1)->isNullValue())
       return true;
@@ -547,14 +545,14 @@ Constant *ConstantInt::getFalse(Type *Ty) {
 ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) {
   // get an existing value or the insertion position
   LLVMContextImpl *pImpl = Context.pImpl;
-  ConstantInt *&Slot = pImpl->IntConstants[V];
+  std::unique_ptr<ConstantInt> &Slot = pImpl->IntConstants[V];
   if (!Slot) {
     // Get the corresponding integer type for the bit width of the value.
     IntegerType *ITy = IntegerType::get(Context, V.getBitWidth());
-    Slot = new ConstantInt(ITy, V);
+    Slot.reset(new ConstantInt(ITy, V));
   }
   assert(Slot->getType() == IntegerType::get(Context, V.getBitWidth()));
-  return Slot;
+  return Slot.get();
 }
 
 Constant *ConstantInt::get(Type *Ty, uint64_t V, bool isSigned) {
@@ -606,18 +604,18 @@ void ConstantInt::destroyConstantImpl() {
 
 static const fltSemantics *TypeToFloatSemantics(Type *Ty) {
   if (Ty->isHalfTy())
-    return &APFloat::IEEEhalf;
+    return &APFloat::IEEEhalf();
   if (Ty->isFloatTy())
-    return &APFloat::IEEEsingle;
+    return &APFloat::IEEEsingle();
   if (Ty->isDoubleTy())
-    return &APFloat::IEEEdouble;
+    return &APFloat::IEEEdouble();
   if (Ty->isX86_FP80Ty())
-    return &APFloat::x87DoubleExtended;
+    return &APFloat::x87DoubleExtended();
   else if (Ty->isFP128Ty())
-    return &APFloat::IEEEquad;
+    return &APFloat::IEEEquad();
 
   assert(Ty->isPPC_FP128Ty() && "Unknown FP format");
-  return &APFloat::PPCDoubleDouble;
+  return &APFloat::PPCDoubleDouble();
 }
 
 void ConstantFP::anchor() { }
@@ -687,29 +685,29 @@ Constant *ConstantFP::getZeroValueForNegation(Type *Ty) {
 ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) {
   LLVMContextImpl* pImpl = Context.pImpl;
 
-  ConstantFP *&Slot = pImpl->FPConstants[V];
+  std::unique_ptr<ConstantFP> &Slot = pImpl->FPConstants[V];
 
   if (!Slot) {
     Type *Ty;
-    if (&V.getSemantics() == &APFloat::IEEEhalf)
+    if (&V.getSemantics() == &APFloat::IEEEhalf())
       Ty = Type::getHalfTy(Context);
-    else if (&V.getSemantics() == &APFloat::IEEEsingle)
+    else if (&V.getSemantics() == &APFloat::IEEEsingle())
       Ty = Type::getFloatTy(Context);
-    else if (&V.getSemantics() == &APFloat::IEEEdouble)
+    else if (&V.getSemantics() == &APFloat::IEEEdouble())
       Ty = Type::getDoubleTy(Context);
-    else if (&V.getSemantics() == &APFloat::x87DoubleExtended)
+    else if (&V.getSemantics() == &APFloat::x87DoubleExtended())
       Ty = Type::getX86_FP80Ty(Context);
-    else if (&V.getSemantics() == &APFloat::IEEEquad)
+    else if (&V.getSemantics() == &APFloat::IEEEquad())
       Ty = Type::getFP128Ty(Context);
     else {
-      assert(&V.getSemantics() == &APFloat::PPCDoubleDouble && 
+      assert(&V.getSemantics() == &APFloat::PPCDoubleDouble() && 
              "Unknown FP format");
       Ty = Type::getPPC_FP128Ty(Context);
     }
-    Slot = new ConstantFP(Ty, V);
+    Slot.reset(new ConstantFP(Ty, V));
   }
 
-  return Slot;
+  return Slot.get();
 }
 
 Constant *ConstantFP::getInfinity(Type *Ty, bool Negative) {
@@ -796,10 +794,8 @@ UndefValue *UndefValue::getElementValue(unsigned Idx) const {
 
 unsigned UndefValue::getNumElements() const {
   Type *Ty = getType();
-  if (auto *AT = dyn_cast<ArrayType>(Ty))
-    return AT->getNumElements();
-  if (auto *VT = dyn_cast<VectorType>(Ty))
-    return VT->getNumElements();
+  if (auto *ST = dyn_cast<SequentialType>(Ty))
+    return ST->getNumElements();
   return Ty->getStructNumElements();
 }
 
@@ -1075,19 +1071,16 @@ bool ConstantExpr::isGEPWithNoNotionalOverIndexing() const {
   gep_type_iterator GEPI = gep_type_begin(this), E = gep_type_end(this);
   User::const_op_iterator OI = std::next(this->op_begin());
 
-  // Skip the first index, as it has no static limit.
-  ++GEPI;
-  ++OI;
-
-  // The remaining indices must be compile-time known integers within the
-  // bounds of the corresponding notional static array types.
+  // The remaining indices may be compile-time known integers within the bounds
+  // of the corresponding notional static array types.
   for (; GEPI != E; ++GEPI, ++OI) {
-    ConstantInt *CI = dyn_cast<ConstantInt>(*OI);
-    if (!CI) return false;
-    if (ArrayType *ATy = dyn_cast<ArrayType>(*GEPI))
-      if (CI->getValue().getActiveBits() > 64 ||
-          CI->getZExtValue() >= ATy->getNumElements())
-        return false;
+    if (isa<UndefValue>(*OI))
+      continue;
+    auto *CI = dyn_cast<ConstantInt>(*OI);
+    if (!CI || (GEPI.isBoundedSequential() &&
+                (CI->getValue().getActiveBits() > 64 ||
+                 CI->getZExtValue() >= GEPI.getSequentialNumElements())))
+      return false;
   }
 
   // All the indices checked out.
@@ -1169,7 +1162,7 @@ Constant *ConstantExpr::getWithOperands(ArrayRef<Constant *> Ops, Type *Ty,
     assert(SrcTy || (Ops[0]->getType() == getOperand(0)->getType()));
     return ConstantExpr::getGetElementPtr(
         SrcTy ? SrcTy : GEPO->getSourceElementType(), Ops[0], Ops.slice(1),
-        GEPO->isInBounds(), OnlyIfReducedTy);
+        GEPO->isInBounds(), GEPO->getInRangeIndex(), OnlyIfReducedTy);
   }
   case Instruction::ICmp:
   case Instruction::FCmp:
@@ -1217,40 +1210,40 @@ bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
 
   // FIXME rounding mode needs to be more flexible
   case Type::HalfTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEhalf)
+    if (&Val2.getSemantics() == &APFloat::IEEEhalf())
       return true;
-    Val2.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &losesInfo);
+    Val2.convert(APFloat::IEEEhalf(), APFloat::rmNearestTiesToEven, &losesInfo);
     return !losesInfo;
   }
   case Type::FloatTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEsingle)
+    if (&Val2.getSemantics() == &APFloat::IEEEsingle())
       return true;
-    Val2.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &losesInfo);
+    Val2.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &losesInfo);
     return !losesInfo;
   }
   case Type::DoubleTyID: {
-    if (&Val2.getSemantics() == &APFloat::IEEEhalf ||
-        &Val2.getSemantics() == &APFloat::IEEEsingle ||
-        &Val2.getSemantics() == &APFloat::IEEEdouble)
+    if (&Val2.getSemantics() == &APFloat::IEEEhalf() ||
+        &Val2.getSemantics() == &APFloat::IEEEsingle() ||
+        &Val2.getSemantics() == &APFloat::IEEEdouble())
       return true;
-    Val2.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &losesInfo);
+    Val2.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &losesInfo);
     return !losesInfo;
   }
   case Type::X86_FP80TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
-           &Val2.getSemantics() == &APFloat::IEEEsingle || 
-           &Val2.getSemantics() == &APFloat::IEEEdouble ||
-           &Val2.getSemantics() == &APFloat::x87DoubleExtended;
+    return &Val2.getSemantics() == &APFloat::IEEEhalf() ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle() || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble() ||
+           &Val2.getSemantics() == &APFloat::x87DoubleExtended();
   case Type::FP128TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
-           &Val2.getSemantics() == &APFloat::IEEEsingle || 
-           &Val2.getSemantics() == &APFloat::IEEEdouble ||
-           &Val2.getSemantics() == &APFloat::IEEEquad;
+    return &Val2.getSemantics() == &APFloat::IEEEhalf() ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle() || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble() ||
+           &Val2.getSemantics() == &APFloat::IEEEquad();
   case Type::PPC_FP128TyID:
-    return &Val2.getSemantics() == &APFloat::IEEEhalf ||
-           &Val2.getSemantics() == &APFloat::IEEEsingle || 
-           &Val2.getSemantics() == &APFloat::IEEEdouble ||
-           &Val2.getSemantics() == &APFloat::PPCDoubleDouble;
+    return &Val2.getSemantics() == &APFloat::IEEEhalf() ||
+           &Val2.getSemantics() == &APFloat::IEEEsingle() || 
+           &Val2.getSemantics() == &APFloat::IEEEdouble() ||
+           &Val2.getSemantics() == &APFloat::PPCDoubleDouble();
   }
 }
 
@@ -1261,12 +1254,13 @@ bool ConstantFP::isValueValidForType(Type *Ty, const APFloat& Val) {
 ConstantAggregateZero *ConstantAggregateZero::get(Type *Ty) {
   assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
          "Cannot create an aggregate zero of non-aggregate type!");
-  
-  ConstantAggregateZero *&Entry = Ty->getContext().pImpl->CAZConstants[Ty];
+
+  std::unique_ptr<ConstantAggregateZero> &Entry =
+      Ty->getContext().pImpl->CAZConstants[Ty];
   if (!Entry)
-    Entry = new ConstantAggregateZero(Ty);
+    Entry.reset(new ConstantAggregateZero(Ty));
 
-  return Entry;
+  return Entry.get();
 }
 
 /// Remove the constant from the constant table.
@@ -1327,11 +1321,12 @@ const APInt &Constant::getUniqueInteger() const {
 //
 
 ConstantPointerNull *ConstantPointerNull::get(PointerType *Ty) {
-  ConstantPointerNull *&Entry = Ty->getContext().pImpl->CPNConstants[Ty];
+  std::unique_ptr<ConstantPointerNull> &Entry =
+      Ty->getContext().pImpl->CPNConstants[Ty];
   if (!Entry)
-    Entry = new ConstantPointerNull(Ty);
+    Entry.reset(new ConstantPointerNull(Ty));
 
-  return Entry;
+  return Entry.get();
 }
 
 /// Remove the constant from the constant table.
@@ -1340,11 +1335,11 @@ void ConstantPointerNull::destroyConstantImpl() {
 }
 
 UndefValue *UndefValue::get(Type *Ty) {
-  UndefValue *&Entry = Ty->getContext().pImpl->UVConstants[Ty];
+  std::unique_ptr<UndefValue> &Entry = Ty->getContext().pImpl->UVConstants[Ty];
   if (!Entry)
-    Entry = new UndefValue(Ty);
+    Entry.reset(new UndefValue(Ty));
 
-  return Entry;
+  return Entry.get();
 }
 
 /// Remove the constant from the constant table.
@@ -1893,6 +1888,7 @@ Constant *ConstantExpr::getSelect(Constant *C, Constant *V1, Constant *V2,
 
 Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
                                          ArrayRef<Value *> Idxs, bool InBounds,
+                                         Optional<unsigned> InRangeIndex,
                                          Type *OnlyIfReducedTy) {
   if (!Ty)
     Ty = cast<PointerType>(C->getType()->getScalarType())->getElementType();
@@ -1901,7 +1897,8 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
         Ty ==
         cast<PointerType>(C->getType()->getScalarType())->getContainedType(0u));
 
-  if (Constant *FC = ConstantFoldGetElementPtr(Ty, C, InBounds, Idxs))
+  if (Constant *FC =
+          ConstantFoldGetElementPtr(Ty, C, InBounds, InRangeIndex, Idxs))
     return FC;          // Fold a few common cases.
 
   // Get the result type of the getelementptr!
@@ -1937,9 +1934,12 @@ Constant *ConstantExpr::getGetElementPtr(Type *Ty, Constant *C,
       Idx = ConstantVector::getSplat(NumVecElts, Idx);
     ArgVec.push_back(Idx);
   }
+
+  unsigned SubClassOptionalData = InBounds ? GEPOperator::IsInBounds : 0;
+  if (InRangeIndex && *InRangeIndex < 63)
+    SubClassOptionalData |= (*InRangeIndex + 1) << 1;
   const ConstantExprKeyType Key(Instruction::GetElementPtr, ArgVec, 0,
-                                InBounds ? GEPOperator::IsInBounds : 0, None,
-                                Ty);
+                                SubClassOptionalData, None, Ty);
 
   LLVMContextImpl *pImpl = C->getContext().pImpl;
   return pImpl->ExprConstants.getOrCreate(ReqTy, Key);
@@ -2610,15 +2610,15 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const {
     llvm_unreachable("Accessor can only be used when element is float/double!");
   case Type::HalfTyID: {
     auto EltVal = *reinterpret_cast<const uint16_t *>(EltPtr);
-    return APFloat(APFloat::IEEEhalf, APInt(16, EltVal));
+    return APFloat(APFloat::IEEEhalf(), APInt(16, EltVal));
   }
   case Type::FloatTyID: {
     auto EltVal = *reinterpret_cast<const uint32_t *>(EltPtr);
-    return APFloat(APFloat::IEEEsingle, APInt(32, EltVal));
+    return APFloat(APFloat::IEEEsingle(), APInt(32, EltVal));
   }
   case Type::DoubleTyID: {
     auto EltVal = *reinterpret_cast<const uint64_t *>(EltPtr);
-    return APFloat(APFloat::IEEEdouble, APInt(64, EltVal));
+    return APFloat(APFloat::IEEEdouble(), APInt(64, EltVal));
   }
   }
 }
diff --git a/contrib/llvm/lib/IR/ConstantsContext.h b/contrib/llvm/lib/IR/ConstantsContext.h
index 7db87edff010..eda751d8af4a 100644
--- a/contrib/llvm/lib/IR/ConstantsContext.h
+++ b/contrib/llvm/lib/IR/ConstantsContext.h
@@ -1,4 +1,4 @@
-//===-- ConstantsContext.h - Constants-related Context Interals -----------===//
+//===-- ConstantsContext.h - Constants-related Context Interals -*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -15,14 +15,26 @@
 #ifndef LLVM_LIB_IR_CONSTANTSCONTEXT_H
 #define LLVM_LIB_IR_CONSTANTSCONTEXT_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <utility>
 
 #define DEBUG_TYPE "ir"
 
@@ -32,16 +44,20 @@ namespace llvm {
 /// behind the scenes to implement unary constant exprs.
 class UnaryConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
   UnaryConstantExpr(unsigned Opcode, Constant *C, Type *Ty)
     : ConstantExpr(Ty, Opcode, &Op<0>(), 1) {
     Op<0>() = C;
   }
+
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
 
@@ -49,12 +65,8 @@ public:
 /// behind the scenes to implement binary constant exprs.
 class BinaryConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
   BinaryConstantExpr(unsigned Opcode, Constant *C1, Constant *C2,
                      unsigned Flags)
     : ConstantExpr(C1->getType(), Opcode, &Op<0>(), 2) {
@@ -62,6 +74,14 @@ public:
     Op<1>() = C2;
     SubclassOptionalData = Flags;
   }
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
@@ -70,18 +90,22 @@ public:
 /// behind the scenes to implement select constant exprs.
 class SelectConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
   SelectConstantExpr(Constant *C1, Constant *C2, Constant *C3)
     : ConstantExpr(C2->getType(), Instruction::Select, &Op<0>(), 3) {
     Op<0>() = C1;
     Op<1>() = C2;
     Op<2>() = C3;
   }
+
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
@@ -91,18 +115,22 @@ public:
 /// extractelement constant exprs.
 class ExtractElementConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
   ExtractElementConstantExpr(Constant *C1, Constant *C2)
     : ConstantExpr(cast<VectorType>(C1->getType())->getElementType(),
                    Instruction::ExtractElement, &Op<0>(), 2) {
     Op<0>() = C1;
     Op<1>() = C2;
   }
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
@@ -112,12 +140,8 @@ public:
 /// insertelement constant exprs.
 class InsertElementConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
   InsertElementConstantExpr(Constant *C1, Constant *C2, Constant *C3)
     : ConstantExpr(C1->getType(), Instruction::InsertElement,
                    &Op<0>(), 3) {
@@ -125,6 +149,14 @@ public:
     Op<1>() = C2;
     Op<2>() = C3;
   }
+
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
@@ -134,12 +166,8 @@ public:
 /// shufflevector constant exprs.
 class ShuffleVectorConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly three operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 3);
-  }
   ShuffleVectorConstantExpr(Constant *C1, Constant *C2, Constant *C3)
   : ConstantExpr(VectorType::get(
                    cast<VectorType>(C1->getType())->getElementType(),
@@ -150,6 +178,14 @@ public:
     Op<1>() = C2;
     Op<2>() = C3;
   }
+
+  // allocate space for exactly three operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 3);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 };
@@ -159,12 +195,8 @@ public:
 /// extractvalue constant exprs.
 class ExtractValueConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 1);
-  }
   ExtractValueConstantExpr(Constant *Agg, ArrayRef<unsigned> IdxList,
                            Type *DestTy)
       : ConstantExpr(DestTy, Instruction::ExtractValue, &Op<0>(), 1),
@@ -172,6 +204,13 @@ public:
     Op<0>() = Agg;
   }
 
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 1);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Indices - These identify which value to extract.
   const SmallVector<unsigned, 4> Indices;
 
@@ -191,12 +230,8 @@ public:
 /// insertvalue constant exprs.
 class InsertValueConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly one operand
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
   InsertValueConstantExpr(Constant *Agg, Constant *Val,
                           ArrayRef<unsigned> IdxList, Type *DestTy)
       : ConstantExpr(DestTy, Instruction::InsertValue, &Op<0>(), 2),
@@ -205,6 +240,13 @@ public:
     Op<1>() = Val;
   }
 
+  // allocate space for exactly one operand
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Indices - These identify the position for the insertion.
   const SmallVector<unsigned, 4> Indices;
 
@@ -224,10 +266,12 @@ public:
 class GetElementPtrConstantExpr : public ConstantExpr {
   Type *SrcElementTy;
   Type *ResElementTy;
-  void anchor() override;
+
   GetElementPtrConstantExpr(Type *SrcElementTy, Constant *C,
                             ArrayRef<Constant *> IdxList, Type *DestTy);
 
+  void anchor() override;
+
 public:
   static GetElementPtrConstantExpr *Create(Type *SrcElementTy, Constant *C,
                                            ArrayRef<Constant *> IdxList,
@@ -237,8 +281,10 @@ public:
     Result->SubclassOptionalData = Flags;
     return Result;
   }
+
   Type *getSourceElementType() const;
   Type *getResultElementType() const;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -255,12 +301,8 @@ public:
 // needed in order to store the predicate value for these instructions.
 class CompareConstantExpr : public ConstantExpr {
   void anchor() override;
-  void *operator new(size_t, unsigned) = delete;
+
 public:
-  // allocate space for exactly two operands
-  void *operator new(size_t s) {
-    return User::operator new(s, 2);
-  }
   unsigned short predicate;
   CompareConstantExpr(Type *ty, Instruction::OtherOps opc,
                       unsigned short pred,  Constant* LHS, Constant* RHS)
@@ -268,6 +310,14 @@ public:
     Op<0>() = LHS;
     Op<1>() = RHS;
   }
+
+  // allocate space for exactly two operands
+  void *operator new(size_t s) {
+    return User::operator new(s, 2);
+  }
+
+  void *operator new(size_t, unsigned) = delete;
+
   /// Transparently provide more efficient getOperand methods.
   DECLARE_TRANSPARENT_OPERAND_ACCESSORS(Value);
 
@@ -373,6 +423,7 @@ template <class ConstantClass> struct ConstantAggrKeyType {
   bool operator==(const ConstantAggrKeyType &X) const {
     return Operands == X.Operands;
   }
+
   bool operator==(const ConstantClass *C) const {
     if (Operands.size() != C->getNumOperands())
       return false;
@@ -381,6 +432,7 @@ template <class ConstantClass> struct ConstantAggrKeyType {
         return false;
     return true;
   }
+
   unsigned getHash() const {
     return hash_combine_range(Operands.begin(), Operands.end());
   }
@@ -416,6 +468,7 @@ struct InlineAsmKeyType {
            AsmString == X.AsmString && Constraints == X.Constraints &&
            FTy == X.FTy;
   }
+
   bool operator==(const InlineAsm *Asm) const {
     return HasSideEffects == Asm->hasSideEffects() &&
            IsAlignStack == Asm->isAlignStack() &&
@@ -424,6 +477,7 @@ struct InlineAsmKeyType {
            Constraints == Asm->getConstraintString() &&
            FTy == Asm->getFunctionType();
   }
+
   unsigned getHash() const {
     return hash_combine(AsmString, Constraints, HasSideEffects, IsAlignStack,
                         AsmDialect, FTy);
@@ -553,22 +607,28 @@ private:
     static inline ConstantClass *getEmptyKey() {
       return ConstantClassInfo::getEmptyKey();
     }
+
     static inline ConstantClass *getTombstoneKey() {
       return ConstantClassInfo::getTombstoneKey();
     }
+
     static unsigned getHashValue(const ConstantClass *CP) {
       SmallVector<Constant *, 32> Storage;
       return getHashValue(LookupKey(CP->getType(), ValType(CP, Storage)));
     }
+
     static bool isEqual(const ConstantClass *LHS, const ConstantClass *RHS) {
       return LHS == RHS;
     }
+
     static unsigned getHashValue(const LookupKey &Val) {
       return hash_combine(Val.first, Val.second.getHash());
     }
+
     static unsigned getHashValue(const LookupKeyHashed &Val) {
       return Val.first;
     }
+
     static bool isEqual(const LookupKey &LHS, const ConstantClass *RHS) {
       if (RHS == getEmptyKey() || RHS == getTombstoneKey())
         return false;
@@ -576,6 +636,7 @@ private:
         return false;
       return LHS.second == RHS;
     }
+
     static bool isEqual(const LookupKeyHashed &LHS, const ConstantClass *RHS) {
       return isEqual(LHS.second, RHS);
     }
@@ -595,6 +656,7 @@ public:
     for (auto &I : Map)
       delete I; // Asserts that use_empty().
   }
+
 private:
   ConstantClass *create(TypeClass *Ty, ValType V, LookupKeyHashed &HashKey) {
     ConstantClass *Result = V.create(Ty);
@@ -665,4 +727,4 @@ public:
 
 } // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_IR_CONSTANTSCONTEXT_H
diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp
index 3c4b0cf2f8ff..00bb476c0b3c 100644
--- a/contrib/llvm/lib/IR/Core.cpp
+++ b/contrib/llvm/lib/IR/Core.cpp
@@ -14,7 +14,7 @@
 
 #include "llvm-c/Core.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/Attributes.h"
 #include "AttributeSetNode.h"
 #include "llvm/IR/CallSite.h"
@@ -578,8 +578,11 @@ LLVMTypeRef LLVMVectorType(LLVMTypeRef ElementType, unsigned ElementCount) {
   return wrap(VectorType::get(unwrap(ElementType), ElementCount));
 }
 
-LLVMTypeRef LLVMGetElementType(LLVMTypeRef Ty) {
-  return wrap(unwrap<SequentialType>(Ty)->getElementType());
+LLVMTypeRef LLVMGetElementType(LLVMTypeRef WrappedTy) {
+  auto *Ty = unwrap<Type>(WrappedTy);
+  if (auto *PTy = dyn_cast<PointerType>(Ty))
+    return wrap(PTy->getElementType());
+  return wrap(cast<SequentialType>(Ty)->getElementType());
 }
 
 unsigned LLVMGetArrayLength(LLVMTypeRef ArrayTy) {
@@ -980,7 +983,7 @@ double LLVMConstRealGetDouble(LLVMValueRef ConstantVal, LLVMBool *LosesInfo) {
 
   bool APFLosesInfo;
   APFloat APF = cFP->getValueAPF();
-  APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &APFLosesInfo);
+  APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &APFLosesInfo);
   *LosesInfo = APFLosesInfo;
   return APF.convertToDouble();
 }
@@ -1178,6 +1181,12 @@ LLVMValueRef LLVMConstUDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
                                     unwrap<Constant>(RHSConstant)));
 }
 
+LLVMValueRef LLVMConstExactUDiv(LLVMValueRef LHSConstant,
+                                LLVMValueRef RHSConstant) {
+  return wrap(ConstantExpr::getExactUDiv(unwrap<Constant>(LHSConstant),
+                                         unwrap<Constant>(RHSConstant)));
+}
+
 LLVMValueRef LLVMConstSDiv(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
   return wrap(ConstantExpr::getSDiv(unwrap<Constant>(LHSConstant),
                                     unwrap<Constant>(RHSConstant)));
@@ -1829,17 +1838,6 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
     F->clearGC();
 }
 
-void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
-  Function *Func = unwrap<Function>(Fn);
-  const AttributeSet PAL = Func->getAttributes();
-  AttrBuilder B(PA);
-  const AttributeSet PALnew =
-    PAL.addAttributes(Func->getContext(), AttributeSet::FunctionIndex,
-                      AttributeSet::get(Func->getContext(),
-                                        AttributeSet::FunctionIndex, B));
-  Func->setAttributes(PALnew);
-}
-
 void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
                              LLVMAttributeRef A) {
   unwrap<Function>(F)->addAttribute(Idx, unwrap(A));
@@ -1847,12 +1845,16 @@ void LLVMAddAttributeAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
 
 unsigned LLVMGetAttributeCountAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx) {
   auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
+  if (!ASN)
+    return 0;
   return ASN->getNumAttributes();
 }
 
 void LLVMGetAttributesAtIndex(LLVMValueRef F, LLVMAttributeIndex Idx,
                               LLVMAttributeRef *Attrs) {
   auto *ASN = AttributeSetNode::get(unwrap<Function>(F)->getAttributes(), Idx);
+  if (!ASN)
+    return;
   for (auto A: make_range(ASN->begin(), ASN->end()))
     *Attrs++ = wrap(A);
 }
@@ -1892,23 +1894,6 @@ void LLVMAddTargetDependentFunctionAttr(LLVMValueRef Fn, const char *A,
   Func->addAttributes(Idx, Set);
 }
 
-void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
-  Function *Func = unwrap<Function>(Fn);
-  const AttributeSet PAL = Func->getAttributes();
-  AttrBuilder B(PA);
-  const AttributeSet PALnew =
-    PAL.removeAttributes(Func->getContext(), AttributeSet::FunctionIndex,
-                         AttributeSet::get(Func->getContext(),
-                                           AttributeSet::FunctionIndex, B));
-  Func->setAttributes(PALnew);
-}
-
-LLVMAttribute LLVMGetFunctionAttr(LLVMValueRef Fn) {
-  Function *Func = unwrap<Function>(Fn);
-  const AttributeSet PAL = Func->getAttributes();
-  return (LLVMAttribute)PAL.Raw(AttributeSet::FunctionIndex);
-}
-
 /*--.. Operations on parameters ............................................--*/
 
 unsigned LLVMCountParams(LLVMValueRef FnRef) {
@@ -1967,24 +1952,6 @@ LLVMValueRef LLVMGetPreviousParam(LLVMValueRef Arg) {
   return wrap(&*--I);
 }
 
-void LLVMAddAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  Argument *A = unwrap<Argument>(Arg);
-  AttrBuilder B(PA);
-  A->addAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
-}
-
-void LLVMRemoveAttribute(LLVMValueRef Arg, LLVMAttribute PA) {
-  Argument *A = unwrap<Argument>(Arg);
-  AttrBuilder B(PA);
-  A->removeAttr(AttributeSet::get(A->getContext(), A->getArgNo() + 1,  B));
-}
-
-LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) {
-  Argument *A = unwrap<Argument>(Arg);
-  return (LLVMAttribute)A->getParent()->getAttributes().
-    Raw(A->getArgNo()+1);
-}
-
 void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) {
   Argument *A = unwrap<Argument>(Arg);
   AttrBuilder B;
@@ -2193,26 +2160,6 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) {
     .setCallingConv(static_cast<CallingConv::ID>(CC));
 }
 
-void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index,
-                           LLVMAttribute PA) {
-  CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  AttrBuilder B(PA);
-  Call.setAttributes(
-    Call.getAttributes().addAttributes(Call->getContext(), index,
-                                       AttributeSet::get(Call->getContext(),
-                                                         index, B)));
-}
-
-void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index,
-                              LLVMAttribute PA) {
-  CallSite Call = CallSite(unwrap<Instruction>(Instr));
-  AttrBuilder B(PA);
-  Call.setAttributes(Call.getAttributes()
-                       .removeAttributes(Call->getContext(), index,
-                                         AttributeSet::get(Call->getContext(),
-                                                           index, B)));
-}
-
 void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index,
                                 unsigned align) {
   CallSite Call = CallSite(unwrap<Instruction>(Instr));
@@ -2233,6 +2180,8 @@ unsigned LLVMGetCallSiteAttributeCount(LLVMValueRef C,
                                        LLVMAttributeIndex Idx) {
   auto CS = CallSite(unwrap<Instruction>(C));
   auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
+  if (!ASN)
+    return 0;
   return ASN->getNumAttributes();
 }
 
@@ -2240,6 +2189,8 @@ void LLVMGetCallSiteAttributes(LLVMValueRef C, LLVMAttributeIndex Idx,
                                LLVMAttributeRef *Attrs) {
   auto CS = CallSite(unwrap<Instruction>(C));
   auto *ASN = AttributeSetNode::get(CS.getAttributes(), Idx);
+  if (!ASN)
+    return;
   for (auto A: make_range(ASN->begin(), ASN->end()))
     *Attrs++ = wrap(A);
 }
@@ -2410,8 +2361,8 @@ LLVMBuilderRef LLVMCreateBuilder(void) {
 void LLVMPositionBuilder(LLVMBuilderRef Builder, LLVMBasicBlockRef Block,
                          LLVMValueRef Instr) {
   BasicBlock *BB = unwrap(Block);
-  Instruction *I = Instr? unwrap<Instruction>(Instr) : (Instruction*) BB->end();
-  unwrap(Builder)->SetInsertPoint(BB, I->getIterator());
+  auto I = Instr ? unwrap<Instruction>(Instr)->getIterator() : BB->end();
+  unwrap(Builder)->SetInsertPoint(BB, I);
 }
 
 void LLVMPositionBuilderBefore(LLVMBuilderRef Builder, LLVMValueRef Instr) {
@@ -2624,6 +2575,11 @@ LLVMValueRef LLVMBuildUDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
   return wrap(unwrap(B)->CreateUDiv(unwrap(LHS), unwrap(RHS), Name));
 }
 
+LLVMValueRef LLVMBuildExactUDiv(LLVMBuilderRef B, LLVMValueRef LHS,
+                                LLVMValueRef RHS, const char *Name) {
+  return wrap(unwrap(B)->CreateExactUDiv(unwrap(LHS), unwrap(RHS), Name));
+}
+
 LLVMValueRef LLVMBuildSDiv(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
                            const char *Name) {
   return wrap(unwrap(B)->CreateSDiv(unwrap(LHS), unwrap(RHS), Name));
diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp
index 01b47f386e1c..2ea572490b6d 100644
--- a/contrib/llvm/lib/IR/DIBuilder.cpp
+++ b/contrib/llvm/lib/IR/DIBuilder.cpp
@@ -109,21 +109,20 @@ static DIScope *getNonCompileUnitScope(DIScope *N) {
 }
 
 DICompileUnit *DIBuilder::createCompileUnit(
-    unsigned Lang, StringRef Filename, StringRef Directory, StringRef Producer,
-    bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
-    DICompileUnit::DebugEmissionKind Kind, uint64_t DWOId) {
+    unsigned Lang, DIFile *File, StringRef Producer, bool isOptimized,
+    StringRef Flags, unsigned RunTimeVer, StringRef SplitName,
+    DICompileUnit::DebugEmissionKind Kind, uint64_t DWOId,
+    bool SplitDebugInlining) {
 
   assert(((Lang <= dwarf::DW_LANG_Fortran08 && Lang >= dwarf::DW_LANG_C89) ||
           (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) &&
          "Invalid Language tag");
-  assert(!Filename.empty() &&
-         "Unable to create compile unit without filename");
 
   assert(!CUNode && "Can only make one compile unit per DIBuilder instance");
   CUNode = DICompileUnit::getDistinct(
-      VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer,
-      isOptimized, Flags, RunTimeVer, SplitName, Kind, nullptr, nullptr,
-      nullptr, nullptr, nullptr, DWOId);
+      VMContext, Lang, File, Producer, isOptimized, Flags, RunTimeVer,
+      SplitName, Kind, nullptr, nullptr, nullptr, nullptr, nullptr, DWOId,
+      SplitDebugInlining);
 
   // Create a named metadata so that it is easier to find cu in a module.
   NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu");
@@ -175,8 +174,9 @@ DIImportedEntity *DIBuilder::createImportedDeclaration(DIScope *Context,
                                 Context, Decl, Line, Name, AllImportedModules);
 }
 
-DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory) {
-  return DIFile::get(VMContext, Filename, Directory);
+DIFile *DIBuilder::createFile(StringRef Filename, StringRef Directory,
+                              DIFile::ChecksumKind CSKind, StringRef Checksum) {
+  return DIFile::get(VMContext, Filename, Directory, CSKind, Checksum);
 }
 
 DIEnumerator *DIBuilder::createEnumerator(StringRef Name, int64_t Val) {
@@ -194,33 +194,32 @@ DIBasicType *DIBuilder::createNullPtrType() {
 }
 
 DIBasicType *DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits,
-                                        uint64_t AlignInBits,
                                         unsigned Encoding) {
   assert(!Name.empty() && "Unable to create type without name");
   return DIBasicType::get(VMContext, dwarf::DW_TAG_base_type, Name, SizeInBits,
-                          AlignInBits, Encoding);
+                          0, Encoding);
 }
 
 DIDerivedType *DIBuilder::createQualifiedType(unsigned Tag, DIType *FromTy) {
   return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, FromTy, 0,
-                            0, 0, 0);
+                            0, 0, DINode::FlagZero);
 }
 
 DIDerivedType *DIBuilder::createPointerType(DIType *PointeeTy,
                                             uint64_t SizeInBits,
-                                            uint64_t AlignInBits,
+                                            uint32_t AlignInBits,
                                             StringRef Name) {
   // FIXME: Why is there a name here?
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_pointer_type, Name,
                             nullptr, 0, nullptr, PointeeTy, SizeInBits,
-                            AlignInBits, 0, 0);
+                            AlignInBits, 0, DINode::FlagZero);
 }
 
 DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
                                                   DIType *Base,
                                                   uint64_t SizeInBits,
-                                                  uint64_t AlignInBits,
-                                                  unsigned Flags) {
+                                                  uint32_t AlignInBits,
+                                                  DINode::DIFlags Flags) {
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_ptr_to_member_type, "",
                             nullptr, 0, nullptr, PointeeTy, SizeInBits,
                             AlignInBits, 0, Flags, Base);
@@ -228,10 +227,10 @@ DIDerivedType *DIBuilder::createMemberPointerType(DIType *PointeeTy,
 
 DIDerivedType *DIBuilder::createReferenceType(unsigned Tag, DIType *RTy,
                                               uint64_t SizeInBits,
-                                              uint64_t AlignInBits) {
+                                              uint32_t AlignInBits) {
   assert(RTy && "Unable to create reference type");
   return DIDerivedType::get(VMContext, Tag, "", nullptr, 0, nullptr, RTy,
-                            SizeInBits, AlignInBits, 0, 0);
+                            SizeInBits, AlignInBits, 0, DINode::FlagZero);
 }
 
 DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
@@ -239,19 +238,19 @@ DIDerivedType *DIBuilder::createTypedef(DIType *Ty, StringRef Name,
                                         DIScope *Context) {
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_typedef, Name, File,
                             LineNo, getNonCompileUnitScope(Context), Ty, 0, 0,
-                            0, 0);
+                            0, DINode::FlagZero);
 }
 
 DIDerivedType *DIBuilder::createFriend(DIType *Ty, DIType *FriendTy) {
   assert(Ty && "Invalid type!");
   assert(FriendTy && "Invalid friend type!");
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_friend, "", nullptr, 0, Ty,
-                            FriendTy, 0, 0, 0, 0);
+                            FriendTy, 0, 0, 0, DINode::FlagZero);
 }
 
 DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
                                             uint64_t BaseOffset,
-                                            unsigned Flags) {
+                                            DINode::DIFlags Flags) {
   assert(Ty && "Unable to create inheritance");
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_inheritance, "", nullptr,
                             0, Ty, BaseTy, 0, 0, BaseOffset, Flags);
@@ -260,9 +259,9 @@ DIDerivedType *DIBuilder::createInheritance(DIType *Ty, DIType *BaseTy,
 DIDerivedType *DIBuilder::createMemberType(DIScope *Scope, StringRef Name,
                                            DIFile *File, unsigned LineNumber,
                                            uint64_t SizeInBits,
-                                           uint64_t AlignInBits,
+                                           uint32_t AlignInBits,
                                            uint64_t OffsetInBits,
-                                           unsigned Flags, DIType *Ty) {
+                                           DINode::DIFlags Flags, DIType *Ty) {
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
                             LineNumber, getNonCompileUnitScope(Scope), Ty,
                             SizeInBits, AlignInBits, OffsetInBits, Flags);
@@ -276,33 +275,33 @@ static ConstantAsMetadata *getConstantOrNull(Constant *C) {
 
 DIDerivedType *DIBuilder::createBitFieldMemberType(
     DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-    uint64_t StorageOffsetInBits, unsigned Flags, DIType *Ty) {
+    uint64_t SizeInBits, uint64_t OffsetInBits, uint64_t StorageOffsetInBits,
+    DINode::DIFlags Flags, DIType *Ty) {
   Flags |= DINode::FlagBitField;
   return DIDerivedType::get(
       VMContext, dwarf::DW_TAG_member, Name, File, LineNumber,
-      getNonCompileUnitScope(Scope), Ty, SizeInBits, AlignInBits, OffsetInBits,
-      Flags, ConstantAsMetadata::get(ConstantInt::get(
-                 IntegerType::get(VMContext, 64), StorageOffsetInBits)));
+      getNonCompileUnitScope(Scope), Ty, SizeInBits, /* AlignInBits */ 0,
+      OffsetInBits, Flags,
+      ConstantAsMetadata::get(ConstantInt::get(IntegerType::get(VMContext, 64),
+                                               StorageOffsetInBits)));
 }
 
-DIDerivedType *DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name,
-                                                 DIFile *File,
-                                                 unsigned LineNumber,
-                                                 DIType *Ty, unsigned Flags,
-                                                 llvm::Constant *Val) {
+DIDerivedType *
+DIBuilder::createStaticMemberType(DIScope *Scope, StringRef Name, DIFile *File,
+                                  unsigned LineNumber, DIType *Ty,
+                                  DINode::DIFlags Flags, llvm::Constant *Val,
+                                  uint32_t AlignInBits) {
   Flags |= DINode::FlagStaticMember;
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
-                            LineNumber, getNonCompileUnitScope(Scope), Ty, 0, 0,
-                            0, Flags, getConstantOrNull(Val));
+                            LineNumber, getNonCompileUnitScope(Scope), Ty, 0,
+                            AlignInBits, 0, Flags, getConstantOrNull(Val));
 }
 
-DIDerivedType *DIBuilder::createObjCIVar(StringRef Name, DIFile *File,
-                                         unsigned LineNumber,
-                                         uint64_t SizeInBits,
-                                         uint64_t AlignInBits,
-                                         uint64_t OffsetInBits, unsigned Flags,
-                                         DIType *Ty, MDNode *PropertyNode) {
+DIDerivedType *
+DIBuilder::createObjCIVar(StringRef Name, DIFile *File, unsigned LineNumber,
+                          uint64_t SizeInBits, uint32_t AlignInBits,
+                          uint64_t OffsetInBits, DINode::DIFlags Flags,
+                          DIType *Ty, MDNode *PropertyNode) {
   return DIDerivedType::get(VMContext, dwarf::DW_TAG_member, Name, File,
                             LineNumber, getNonCompileUnitScope(File), Ty,
                             SizeInBits, AlignInBits, OffsetInBits, Flags,
@@ -358,8 +357,8 @@ DIBuilder::createTemplateParameterPack(DIScope *Context, StringRef Name,
 
 DICompositeType *DIBuilder::createClassType(
     DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-    unsigned Flags, DIType *DerivedFrom, DINodeArray Elements,
+    uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+    DINode::DIFlags Flags, DIType *DerivedFrom, DINodeArray Elements,
     DIType *VTableHolder, MDNode *TemplateParams, StringRef UniqueIdentifier) {
   assert((!Context || isa<DIScope>(Context)) &&
          "createClassType should be called with a valid Context");
@@ -375,7 +374,7 @@ DICompositeType *DIBuilder::createClassType(
 
 DICompositeType *DIBuilder::createStructType(
     DIScope *Context, StringRef Name, DIFile *File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+    uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags,
     DIType *DerivedFrom, DINodeArray Elements, unsigned RunTimeLang,
     DIType *VTableHolder, StringRef UniqueIdentifier) {
   auto *R = DICompositeType::get(
@@ -388,7 +387,7 @@ DICompositeType *DIBuilder::createStructType(
 
 DICompositeType *DIBuilder::createUnionType(
     DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags,
+    uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags,
     DINodeArray Elements, unsigned RunTimeLang, StringRef UniqueIdentifier) {
   auto *R = DICompositeType::get(
       VMContext, dwarf::DW_TAG_union_type, Name, File, LineNumber,
@@ -399,7 +398,8 @@ DICompositeType *DIBuilder::createUnionType(
 }
 
 DISubroutineType *DIBuilder::createSubroutineType(DITypeRefArray ParameterTypes,
-                                                  unsigned Flags, unsigned CC) {
+                                                  DINode::DIFlags Flags,
+                                                  unsigned CC) {
   return DISubroutineType::get(VMContext, Flags, CC, ParameterTypes);
 }
 
@@ -413,29 +413,29 @@ DICompositeType *DIBuilder::createExternalTypeRef(unsigned Tag, DIFile *File,
 
 DICompositeType *DIBuilder::createEnumerationType(
     DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber,
-    uint64_t SizeInBits, uint64_t AlignInBits, DINodeArray Elements,
+    uint64_t SizeInBits, uint32_t AlignInBits, DINodeArray Elements,
     DIType *UnderlyingType, StringRef UniqueIdentifier) {
   auto *CTy = DICompositeType::get(
       VMContext, dwarf::DW_TAG_enumeration_type, Name, File, LineNumber,
       getNonCompileUnitScope(Scope), UnderlyingType, SizeInBits, AlignInBits, 0,
-      0, Elements, 0, nullptr, nullptr, UniqueIdentifier);
+      DINode::FlagZero, Elements, 0, nullptr, nullptr, UniqueIdentifier);
   AllEnumTypes.push_back(CTy);
   trackIfUnresolved(CTy);
   return CTy;
 }
 
-DICompositeType *DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits,
-                                            DIType *Ty,
+DICompositeType *DIBuilder::createArrayType(uint64_t Size,
+                                            uint32_t AlignInBits, DIType *Ty,
                                             DINodeArray Subscripts) {
   auto *R = DICompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
                                  nullptr, 0, nullptr, Ty, Size, AlignInBits, 0,
-                                 0, Subscripts, 0, nullptr);
+                                 DINode::FlagZero, Subscripts, 0, nullptr);
   trackIfUnresolved(R);
   return R;
 }
 
 DICompositeType *DIBuilder::createVectorType(uint64_t Size,
-                                             uint64_t AlignInBits, DIType *Ty,
+                                             uint32_t AlignInBits, DIType *Ty,
                                              DINodeArray Subscripts) {
   auto *R = DICompositeType::get(VMContext, dwarf::DW_TAG_array_type, "",
                                  nullptr, 0, nullptr, Ty, Size, AlignInBits, 0,
@@ -445,7 +445,7 @@ DICompositeType *DIBuilder::createVectorType(uint64_t Size,
 }
 
 static DIType *createTypeWithFlags(LLVMContext &Context, DIType *Ty,
-                                   unsigned FlagsToSet) {
+                                   DINode::DIFlags FlagsToSet) {
   auto NewTy = Ty->clone();
   NewTy->setFlags(NewTy->getFlags() | FlagsToSet);
   return MDNode::replaceWithUniqued(std::move(NewTy));
@@ -462,7 +462,7 @@ DIType *DIBuilder::createObjectPointerType(DIType *Ty) {
   // FIXME: Restrict this to the nodes where it's valid.
   if (Ty->isObjectPointer())
     return Ty;
-  unsigned Flags = DINode::FlagObjectPointer | DINode::FlagArtificial;
+  DINode::DIFlags Flags = DINode::FlagObjectPointer | DINode::FlagArtificial;
   return createTypeWithFlags(VMContext, Ty, Flags);
 }
 
@@ -479,7 +479,7 @@ DIBasicType *DIBuilder::createUnspecifiedParameter() { return nullptr; }
 DICompositeType *
 DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIScope *Scope,
                              DIFile *F, unsigned Line, unsigned RuntimeLang,
-                             uint64_t SizeInBits, uint64_t AlignInBits,
+                             uint64_t SizeInBits, uint32_t AlignInBits,
                              StringRef UniqueIdentifier) {
   // FIXME: Define in terms of createReplaceableForwardDecl() by calling
   // replaceWithUniqued().
@@ -493,8 +493,8 @@ DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIScope *Scope,
 
 DICompositeType *DIBuilder::createReplaceableCompositeType(
     unsigned Tag, StringRef Name, DIScope *Scope, DIFile *F, unsigned Line,
-    unsigned RuntimeLang, uint64_t SizeInBits, uint64_t AlignInBits,
-    unsigned Flags, StringRef UniqueIdentifier) {
+    unsigned RuntimeLang, uint64_t SizeInBits, uint32_t AlignInBits,
+    DINode::DIFlags Flags, StringRef UniqueIdentifier) {
   auto *RetTy =
       DICompositeType::getTemporary(
           VMContext, Tag, Name, F, Line, getNonCompileUnitScope(Scope), nullptr,
@@ -533,30 +533,31 @@ static void checkGlobalVariableScope(DIScope *Context) {
 #endif
 }
 
-DIGlobalVariable *DIBuilder::createGlobalVariable(
+DIGlobalVariableExpression *DIBuilder::createGlobalVariableExpression(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
-    unsigned LineNumber, DIType *Ty, bool isLocalToUnit, Constant *Val,
-    MDNode *Decl) {
+    unsigned LineNumber, DIType *Ty, bool isLocalToUnit, DIExpression *Expr,
+    MDNode *Decl, uint32_t AlignInBits) {
   checkGlobalVariableScope(Context);
 
-  auto *N = DIGlobalVariable::getDistinct(
+  auto *GV = DIGlobalVariable::getDistinct(
       VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
-      LineNumber, Ty, isLocalToUnit, true, Val,
-      cast_or_null<DIDerivedType>(Decl));
+      LineNumber, Ty, isLocalToUnit, true, cast_or_null<DIDerivedType>(Decl),
+      AlignInBits);
+  auto *N = DIGlobalVariableExpression::get(VMContext, GV, Expr);
   AllGVs.push_back(N);
   return N;
 }
 
 DIGlobalVariable *DIBuilder::createTempGlobalVariableFwdDecl(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *F,
-    unsigned LineNumber, DIType *Ty, bool isLocalToUnit, Constant *Val,
-    MDNode *Decl) {
+    unsigned LineNumber, DIType *Ty, bool isLocalToUnit, MDNode *Decl,
+    uint32_t AlignInBits) {
   checkGlobalVariableScope(Context);
 
   return DIGlobalVariable::getTemporary(
              VMContext, cast_or_null<DIScope>(Context), Name, LinkageName, F,
-             LineNumber, Ty, isLocalToUnit, false, Val,
-             cast_or_null<DIDerivedType>(Decl))
+             LineNumber, Ty, isLocalToUnit, false,
+             cast_or_null<DIDerivedType>(Decl), AlignInBits)
       .release();
 }
 
@@ -564,7 +565,8 @@ static DILocalVariable *createLocalVariable(
     LLVMContext &VMContext,
     DenseMap<MDNode *, SmallVector<TrackingMDNodeRef, 1>> &PreservedVariables,
     DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
-    unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) {
+    unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags,
+    uint32_t AlignInBits) {
   // FIXME: Why getNonCompileUnitScope()?
   // FIXME: Why is "!Context" okay here?
   // FIXME: Why doesn't this check for a subprogram or lexical block (AFAICT
@@ -573,7 +575,7 @@ static DILocalVariable *createLocalVariable(
 
   auto *Node =
       DILocalVariable::get(VMContext, cast_or_null<DILocalScope>(Context), Name,
-                           File, LineNo, Ty, ArgNo, Flags);
+                           File, LineNo, Ty, ArgNo, Flags, AlignInBits);
   if (AlwaysPreserve) {
     // The optimizer may remove local variables. If there is an interest
     // to preserve variable info in such situation then stash it in a
@@ -588,18 +590,20 @@ static DILocalVariable *createLocalVariable(
 DILocalVariable *DIBuilder::createAutoVariable(DIScope *Scope, StringRef Name,
                                                DIFile *File, unsigned LineNo,
                                                DIType *Ty, bool AlwaysPreserve,
-                                               unsigned Flags) {
+                                               DINode::DIFlags Flags,
+                                               uint32_t AlignInBits) {
   return createLocalVariable(VMContext, PreservedVariables, Scope, Name,
                              /* ArgNo */ 0, File, LineNo, Ty, AlwaysPreserve,
-                             Flags);
+                             Flags, AlignInBits);
 }
 
 DILocalVariable *DIBuilder::createParameterVariable(
     DIScope *Scope, StringRef Name, unsigned ArgNo, DIFile *File,
-    unsigned LineNo, DIType *Ty, bool AlwaysPreserve, unsigned Flags) {
+    unsigned LineNo, DIType *Ty, bool AlwaysPreserve, DINode::DIFlags Flags) {
   assert(ArgNo && "Expected non-zero argument number for parameter");
   return createLocalVariable(VMContext, PreservedVariables, Scope, Name, ArgNo,
-                             File, LineNo, Ty, AlwaysPreserve, Flags);
+                             File, LineNo, Ty, AlwaysPreserve, Flags,
+                             /* AlignInBits */0);
 }
 
 DIExpression *DIBuilder::createExpression(ArrayRef<uint64_t> Addr) {
@@ -612,9 +616,9 @@ DIExpression *DIBuilder::createExpression(ArrayRef<int64_t> Signed) {
   return createExpression(Addr);
 }
 
-DIExpression *DIBuilder::createBitPieceExpression(unsigned OffsetInBytes,
+DIExpression *DIBuilder::createFragmentExpression(unsigned OffsetInBytes,
                                                   unsigned SizeInBytes) {
-  uint64_t Addr[] = {dwarf::DW_OP_bit_piece, OffsetInBytes, SizeInBytes};
+  uint64_t Addr[] = {dwarf::DW_OP_LLVM_fragment, OffsetInBytes, SizeInBytes};
   return DIExpression::get(VMContext, Addr);
 }
 
@@ -628,8 +632,8 @@ static DISubprogram *getSubprogram(bool IsDistinct, Ts &&... Args) {
 DISubprogram *DIBuilder::createFunction(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
     unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-    bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
-    DITemplateParameterArray TParams, DISubprogram *Decl) {
+    bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
+    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) {
   auto *Node = getSubprogram(
       /* IsDistinct = */ isDefinition, VMContext,
       getNonCompileUnitScope(Context), Name, LinkageName, File, LineNo, Ty,
@@ -646,8 +650,8 @@ DISubprogram *DIBuilder::createFunction(
 DISubprogram *DIBuilder::createTempFunctionFwdDecl(
     DIScope *Context, StringRef Name, StringRef LinkageName, DIFile *File,
     unsigned LineNo, DISubroutineType *Ty, bool isLocalToUnit,
-    bool isDefinition, unsigned ScopeLine, unsigned Flags, bool isOptimized,
-    DITemplateParameterArray TParams, DISubprogram *Decl) {
+    bool isDefinition, unsigned ScopeLine, DINode::DIFlags Flags,
+    bool isOptimized, DITemplateParameterArray TParams, DISubprogram *Decl) {
   return DISubprogram::getTemporary(
              VMContext, getNonCompileUnitScope(Context), Name, LinkageName,
              File, LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, nullptr,
@@ -656,13 +660,14 @@ DISubprogram *DIBuilder::createTempFunctionFwdDecl(
       .release();
 }
 
-DISubprogram *
-DIBuilder::createMethod(DIScope *Context, StringRef Name, StringRef LinkageName,
-                        DIFile *F, unsigned LineNo, DISubroutineType *Ty,
-                        bool isLocalToUnit, bool isDefinition, unsigned VK,
-                        unsigned VIndex, int ThisAdjustment,
-                        DIType *VTableHolder, unsigned Flags, bool isOptimized,
-                        DITemplateParameterArray TParams) {
+DISubprogram *DIBuilder::createMethod(DIScope *Context, StringRef Name,
+                                      StringRef LinkageName, DIFile *F,
+                                      unsigned LineNo, DISubroutineType *Ty,
+                                      bool isLocalToUnit, bool isDefinition,
+                                      unsigned VK, unsigned VIndex,
+                                      int ThisAdjustment, DIType *VTableHolder,
+                                      DINode::DIFlags Flags, bool isOptimized,
+                                      DITemplateParameterArray TParams) {
   assert(getNonCompileUnitScope(Context) &&
          "Methods should have both a Context and a context that isn't "
          "the compile unit.");
@@ -680,9 +685,10 @@ DIBuilder::createMethod(DIScope *Context, StringRef Name, StringRef LinkageName,
 }
 
 DINamespace *DIBuilder::createNameSpace(DIScope *Scope, StringRef Name,
-                                        DIFile *File, unsigned LineNo) {
+                                        DIFile *File, unsigned LineNo,
+                                        bool ExportSymbols) {
   return DINamespace::get(VMContext, getNonCompileUnitScope(Scope), File, Name,
-                          LineNo);
+                          LineNo, ExportSymbols);
 }
 
 DIModule *DIBuilder::createModule(DIScope *Scope, StringRef Name,
diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp
index 20a15fb8831f..d15a34c0b936 100644
--- a/contrib/llvm/lib/IR/DataLayout.cpp
+++ b/contrib/llvm/lib/IR/DataLayout.cpp
@@ -182,6 +182,7 @@ void DataLayout::reset(StringRef Desc) {
   BigEndian = false;
   StackNaturalAlign = 0;
   ManglingMode = MM_None;
+  NonIntegralAddressSpaces.clear();
 
   // Default alignments
   for (const LayoutAlignElem &E : DefaultAlignments) {
@@ -234,6 +235,19 @@ void DataLayout::parseSpecifier(StringRef Desc) {
     StringRef &Tok  = Split.first;  // Current token.
     StringRef &Rest = Split.second; // The rest of the string.
 
+    if (Tok == "ni") {
+      do {
+        Split = split(Rest, ':');
+        Rest = Split.second;
+        unsigned AS = getInt(Split.first);
+        if (AS == 0)
+          report_fatal_error("Address space 0 can never be non-integral");
+        NonIntegralAddressSpaces.push_back(AS);
+      } while (!Rest.empty());
+
+      continue;
+    }
+
     char Specifier = Tok.front();
     Tok = Tok.substr(1);
 
@@ -492,10 +506,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
       // with what clang and llvm-gcc do.
       unsigned Align = getTypeAllocSize(cast<VectorType>(Ty)->getElementType());
       Align *= cast<VectorType>(Ty)->getNumElements();
-      // If the alignment is not a power of 2, round up to the next power of 2.
-      // This happens for non-power-of-2 length vectors.
-      if (Align & (Align-1))
-        Align = NextPowerOf2(Align);
+      Align = PowerOf2Ceil(Align);
       return Align;
     }
   }
@@ -508,8 +519,7 @@ unsigned DataLayout::getAlignmentInfo(AlignTypeEnum AlignType,
   // layout.
   if (BestMatchIdx == -1) {
     unsigned Align = getTypeStoreSize(Ty);
-    if (Align & (Align-1))
-      Align = NextPowerOf2(Align);
+    Align = PowerOf2Ceil(Align);
     return Align;
   }
 
@@ -727,15 +737,12 @@ int64_t DataLayout::getIndexedOffsetInType(Type *ElemTy,
                                            ArrayRef<Value *> Indices) const {
   int64_t Result = 0;
 
-  // We can use 0 as the address space as we don't need
-  // to get pointer types back from gep_type_iterator.
-  unsigned AS = 0;
   generic_gep_type_iterator<Value* const*>
-    GTI = gep_type_begin(ElemTy, AS, Indices),
-    GTE = gep_type_end(ElemTy, AS, Indices);
+    GTI = gep_type_begin(ElemTy, Indices),
+    GTE = gep_type_end(ElemTy, Indices);
   for (; GTI != GTE; ++GTI) {
     Value *Idx = GTI.getOperand();
-    if (auto *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       assert(Idx->getType()->isIntegerTy(32) && "Illegal struct idx");
       unsigned FieldNo = cast<ConstantInt>(Idx)->getZExtValue();
 
diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp
index 1d3c8299255b..6b9bc689a446 100644
--- a/contrib/llvm/lib/IR/DebugInfo.cpp
+++ b/contrib/llvm/lib/IR/DebugInfo.cpp
@@ -53,11 +53,12 @@ void DebugInfoFinder::reset() {
 void DebugInfoFinder::processModule(const Module &M) {
   for (auto *CU : M.debug_compile_units()) {
     addCompileUnit(CU);
-    for (auto *DIG : CU->getGlobalVariables()) {
-      if (addGlobalVariable(DIG)) {
-        processScope(DIG->getScope());
-        processType(DIG->getType().resolve());
-      }
+    for (auto DIG : CU->getGlobalVariables()) {
+      if (!addGlobalVariable(DIG))
+        continue;
+      auto *GV = DIG->getVariable();
+      processScope(GV->getScope());
+      processType(GV->getType().resolve());
     }
     for (auto *ET : CU->getEnumTypes())
       processType(ET);
@@ -206,10 +207,7 @@ bool DebugInfoFinder::addCompileUnit(DICompileUnit *CU) {
   return true;
 }
 
-bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable *DIG) {
-  if (!DIG)
-    return false;
-
+bool DebugInfoFinder::addGlobalVariable(DIGlobalVariableExpression *DIG) {
   if (!NodesSeen.insert(DIG).second)
     return false;
 
@@ -272,7 +270,11 @@ bool llvm::StripDebugInfo(Module &M) {
          NME = M.named_metadata_end(); NMI != NME;) {
     NamedMDNode *NMD = &*NMI;
     ++NMI;
-    if (NMD->getName().startswith("llvm.dbg.")) {
+
+    // We're stripping debug info, and without them, coverage information
+    // doesn't quite make sense.
+    if (NMD->getName().startswith("llvm.dbg.") ||
+        NMD->getName() == "llvm.gcov") {
       NMD->eraseFromParent();
       Changed = true;
     }
@@ -281,12 +283,314 @@ bool llvm::StripDebugInfo(Module &M) {
   for (Function &F : M)
     Changed |= stripDebugInfo(F);
 
+  for (auto &GV : M.globals()) {
+    SmallVector<MDNode *, 1> MDs;
+    GV.getMetadata(LLVMContext::MD_dbg, MDs);
+    if (!MDs.empty()) {
+      GV.eraseMetadata(LLVMContext::MD_dbg);
+      Changed = true;
+    }
+  }
+
   if (GVMaterializer *Materializer = M.getMaterializer())
     Materializer->setStripDebugInfo();
 
   return Changed;
 }
 
+namespace {
+
+/// Helper class to downgrade -g metadata to -gline-tables-only metadata.
+class DebugTypeInfoRemoval {
+  DenseMap<Metadata *, Metadata *> Replacements;
+
+public:
+  /// The (void)() type.
+  MDNode *EmptySubroutineType;
+
+private:
+  /// Remember what linkage name we originally had before stripping. If we end
+  /// up making two subprograms identical who originally had different linkage
+  /// names, then we need to make one of them distinct, to avoid them getting
+  /// uniqued. Maps the new node to the old linkage name.
+  DenseMap<DISubprogram *, StringRef> NewToLinkageName;
+
+  // TODO: Remember the distinct subprogram we created for a given linkage name,
+  // so that we can continue to unique whenever possible. Map <newly created
+  // node, old linkage name> to the first (possibly distinct) mdsubprogram
+  // created for that combination. This is not strictly needed for correctness,
+  // but can cut down on the number of MDNodes and let us diff cleanly with the
+  // output of -gline-tables-only.
+
+public:
+  DebugTypeInfoRemoval(LLVMContext &C)
+      : EmptySubroutineType(DISubroutineType::get(C, DINode::FlagZero, 0,
+                                                  MDNode::get(C, {}))) {}
+
+  Metadata *map(Metadata *M) {
+    if (!M)
+      return nullptr;
+    auto Replacement = Replacements.find(M);
+    if (Replacement != Replacements.end())
+      return Replacement->second;
+
+    return M;
+  }
+  MDNode *mapNode(Metadata *N) { return dyn_cast_or_null<MDNode>(map(N)); }
+
+  /// Recursively remap N and all its referenced children. Does a DF post-order
+  /// traversal, so as to remap bottoms up.
+  void traverseAndRemap(MDNode *N) { traverse(N); }
+
+private:
+  // Create a new DISubprogram, to replace the one given.
+  DISubprogram *getReplacementSubprogram(DISubprogram *MDS) {
+    auto *FileAndScope = cast_or_null<DIFile>(map(MDS->getFile()));
+    StringRef LinkageName = MDS->getName().empty() ? MDS->getLinkageName() : "";
+    DISubprogram *Declaration = nullptr;
+    auto *Type = cast_or_null<DISubroutineType>(map(MDS->getType()));
+    DITypeRef ContainingType(map(MDS->getContainingType()));
+    auto *Unit = cast_or_null<DICompileUnit>(map(MDS->getUnit()));
+    auto Variables = nullptr;
+    auto TemplateParams = nullptr;
+
+    // Make a distinct DISubprogram, for situations that warrent it.
+    auto distinctMDSubprogram = [&]() {
+      return DISubprogram::getDistinct(
+          MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
+          FileAndScope, MDS->getLine(), Type, MDS->isLocalToUnit(),
+          MDS->isDefinition(), MDS->getScopeLine(), ContainingType,
+          MDS->getVirtuality(), MDS->getVirtualIndex(),
+          MDS->getThisAdjustment(), MDS->getFlags(), MDS->isOptimized(), Unit,
+          TemplateParams, Declaration, Variables);
+    };
+
+    if (MDS->isDistinct())
+      return distinctMDSubprogram();
+
+    auto *NewMDS = DISubprogram::get(
+        MDS->getContext(), FileAndScope, MDS->getName(), LinkageName,
+        FileAndScope, MDS->getLine(), Type, MDS->isLocalToUnit(),
+        MDS->isDefinition(), MDS->getScopeLine(), ContainingType,
+        MDS->getVirtuality(), MDS->getVirtualIndex(), MDS->getThisAdjustment(),
+        MDS->getFlags(), MDS->isOptimized(), Unit, TemplateParams, Declaration,
+        Variables);
+
+    StringRef OldLinkageName = MDS->getLinkageName();
+
+    // See if we need to make a distinct one.
+    auto OrigLinkage = NewToLinkageName.find(NewMDS);
+    if (OrigLinkage != NewToLinkageName.end()) {
+      if (OrigLinkage->second == OldLinkageName)
+        // We're good.
+        return NewMDS;
+
+      // Otherwise, need to make a distinct one.
+      // TODO: Query the map to see if we already have one.
+      return distinctMDSubprogram();
+    }
+
+    NewToLinkageName.insert({NewMDS, MDS->getLinkageName()});
+    return NewMDS;
+  }
+
+  /// Create a new compile unit, to replace the one given
+  DICompileUnit *getReplacementCU(DICompileUnit *CU) {
+    // Drop skeleton CUs.
+    if (CU->getDWOId())
+      return nullptr;
+
+    auto *File = cast_or_null<DIFile>(map(CU->getFile()));
+    MDTuple *EnumTypes = nullptr;
+    MDTuple *RetainedTypes = nullptr;
+    MDTuple *GlobalVariables = nullptr;
+    MDTuple *ImportedEntities = nullptr;
+    return DICompileUnit::getDistinct(
+        CU->getContext(), CU->getSourceLanguage(), File, CU->getProducer(),
+        CU->isOptimized(), CU->getFlags(), CU->getRuntimeVersion(),
+        CU->getSplitDebugFilename(), DICompileUnit::LineTablesOnly, EnumTypes,
+        RetainedTypes, GlobalVariables, ImportedEntities, CU->getMacros(),
+        CU->getDWOId(), CU->getSplitDebugInlining());
+  }
+
+  DILocation *getReplacementMDLocation(DILocation *MLD) {
+    auto *Scope = map(MLD->getScope());
+    auto *InlinedAt = map(MLD->getInlinedAt());
+    if (MLD->isDistinct())
+      return DILocation::getDistinct(MLD->getContext(), MLD->getLine(),
+                                     MLD->getColumn(), Scope, InlinedAt);
+    return DILocation::get(MLD->getContext(), MLD->getLine(), MLD->getColumn(),
+                           Scope, InlinedAt);
+  }
+
+  /// Create a new generic MDNode, to replace the one given
+  MDNode *getReplacementMDNode(MDNode *N) {
+    SmallVector<Metadata *, 8> Ops;
+    Ops.reserve(N->getNumOperands());
+    for (auto &I : N->operands())
+      if (I)
+        Ops.push_back(map(I));
+    auto *Ret = MDNode::get(N->getContext(), Ops);
+    return Ret;
+  }
+
+  /// Attempt to re-map N to a newly created node.
+  void remap(MDNode *N) {
+    if (Replacements.count(N))
+      return;
+
+    auto doRemap = [&](MDNode *N) -> MDNode * {
+      if (!N)
+        return nullptr;
+      if (auto *MDSub = dyn_cast<DISubprogram>(N)) {
+        remap(MDSub->getUnit());
+        return getReplacementSubprogram(MDSub);
+      }
+      if (isa<DISubroutineType>(N))
+        return EmptySubroutineType;
+      if (auto *CU = dyn_cast<DICompileUnit>(N))
+        return getReplacementCU(CU);
+      if (isa<DIFile>(N))
+        return N;
+      if (auto *MDLB = dyn_cast<DILexicalBlockBase>(N))
+        // Remap to our referenced scope (recursively).
+        return mapNode(MDLB->getScope());
+      if (auto *MLD = dyn_cast<DILocation>(N))
+        return getReplacementMDLocation(MLD);
+
+      // Otherwise, if we see these, just drop them now. Not strictly necessary,
+      // but this speeds things up a little.
+      if (isa<DINode>(N))
+        return nullptr;
+
+      return getReplacementMDNode(N);
+    };
+    Replacements[N] = doRemap(N);
+  }
+
+  /// Do the remapping traversal.
+  void traverse(MDNode *);
+};
+
+} // Anonymous namespace.
+
+void DebugTypeInfoRemoval::traverse(MDNode *N) {
+  if (!N || Replacements.count(N))
+    return;
+
+  // To avoid cycles, as well as for efficiency sake, we will sometimes prune
+  // parts of the graph.
+  auto prune = [](MDNode *Parent, MDNode *Child) {
+    if (auto *MDS = dyn_cast<DISubprogram>(Parent))
+      return Child == MDS->getVariables().get();
+    return false;
+  };
+
+  SmallVector<MDNode *, 16> ToVisit;
+  DenseSet<MDNode *> Opened;
+
+  // Visit each node starting at N in post order, and map them.
+  ToVisit.push_back(N);
+  while (!ToVisit.empty()) {
+    auto *N = ToVisit.back();
+    if (!Opened.insert(N).second) {
+      // Close it.
+      remap(N);
+      ToVisit.pop_back();
+      continue;
+    }
+    for (auto &I : N->operands())
+      if (auto *MDN = dyn_cast_or_null<MDNode>(I))
+        if (!Opened.count(MDN) && !Replacements.count(MDN) && !prune(N, MDN) &&
+            !isa<DICompileUnit>(MDN))
+          ToVisit.push_back(MDN);
+  }
+}
+
+bool llvm::stripNonLineTableDebugInfo(Module &M) {
+  bool Changed = false;
+
+  // First off, delete the debug intrinsics.
+  auto RemoveUses = [&](StringRef Name) {
+    if (auto *DbgVal = M.getFunction(Name)) {
+      while (!DbgVal->use_empty())
+        cast<Instruction>(DbgVal->user_back())->eraseFromParent();
+      DbgVal->eraseFromParent();
+      Changed = true;
+    }
+  };
+  RemoveUses("llvm.dbg.declare");
+  RemoveUses("llvm.dbg.value");
+
+  // Delete non-CU debug info named metadata nodes.
+  for (auto NMI = M.named_metadata_begin(), NME = M.named_metadata_end();
+       NMI != NME;) {
+    NamedMDNode *NMD = &*NMI;
+    ++NMI;
+    // Specifically keep dbg.cu around.
+    if (NMD->getName() == "llvm.dbg.cu")
+      continue;
+  }
+
+  // Drop all dbg attachments from global variables.
+  for (auto &GV : M.globals())
+    GV.eraseMetadata(LLVMContext::MD_dbg);
+
+  DebugTypeInfoRemoval Mapper(M.getContext());
+  auto remap = [&](llvm::MDNode *Node) -> llvm::MDNode * {
+    if (!Node)
+      return nullptr;
+    Mapper.traverseAndRemap(Node);
+    auto *NewNode = Mapper.mapNode(Node);
+    Changed |= Node != NewNode;
+    Node = NewNode;
+    return NewNode;
+  };
+
+  // Rewrite the DebugLocs to be equivalent to what
+  // -gline-tables-only would have created.
+  for (auto &F : M) {
+    if (auto *SP = F.getSubprogram()) {
+      Mapper.traverseAndRemap(SP);
+      auto *NewSP = cast<DISubprogram>(Mapper.mapNode(SP));
+      Changed |= SP != NewSP;
+      F.setSubprogram(NewSP);
+    }
+    for (auto &BB : F) {
+      for (auto &I : BB) {
+        if (I.getDebugLoc() == DebugLoc())
+          continue;
+
+        // Make a replacement.
+        auto &DL = I.getDebugLoc();
+        auto *Scope = DL.getScope();
+        MDNode *InlinedAt = DL.getInlinedAt();
+        Scope = remap(Scope);
+        InlinedAt = remap(InlinedAt);
+        I.setDebugLoc(
+            DebugLoc::get(DL.getLine(), DL.getCol(), Scope, InlinedAt));
+      }
+    }
+  }
+
+  // Create a new llvm.dbg.cu, which is equivalent to the one
+  // -gline-tables-only would have created.
+  for (auto &NMD : M.getNamedMDList()) {
+    SmallVector<MDNode *, 8> Ops;
+    for (MDNode *Op : NMD.operands())
+      Ops.push_back(remap(Op));
+ 
+    if (!Changed)
+      continue;
+ 
+    NMD.clearOperands();
+    for (auto *Op : Ops)
+      if (Op)
+        NMD.addOperand(Op);
+  }
+  return Changed;
+}
+
 unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) {
   if (auto *Val = mdconst::dyn_extract_or_null<ConstantInt>(
           M.getModuleFlag("Debug Info Version")))
diff --git a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
index c58e3685b3c9..8e21a907e15e 100644
--- a/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
+++ b/contrib/llvm/lib/IR/DebugInfoMetadata.cpp
@@ -65,29 +65,29 @@ DILocation *DILocation::getImpl(LLVMContext &Context, unsigned Line,
                    Storage, Context.pImpl->DILocations);
 }
 
-unsigned DINode::getFlag(StringRef Flag) {
-  return StringSwitch<unsigned>(Flag)
+DINode::DIFlags DINode::getFlag(StringRef Flag) {
+  return StringSwitch<DIFlags>(Flag)
 #define HANDLE_DI_FLAG(ID, NAME) .Case("DIFlag" #NAME, Flag##NAME)
 #include "llvm/IR/DebugInfoFlags.def"
-      .Default(0);
+      .Default(DINode::FlagZero);
 }
 
-const char *DINode::getFlagString(unsigned Flag) {
+StringRef DINode::getFlagString(DIFlags Flag) {
   switch (Flag) {
-  default:
-    return "";
 #define HANDLE_DI_FLAG(ID, NAME)                                               \
   case Flag##NAME:                                                             \
     return "DIFlag" #NAME;
 #include "llvm/IR/DebugInfoFlags.def"
   }
+  return "";
 }
 
-unsigned DINode::splitFlags(unsigned Flags,
-                            SmallVectorImpl<unsigned> &SplitFlags) {
-  // Accessibility and member pointer flags need to be specially handled, since
-  // they're packed together.
-  if (unsigned A = Flags & FlagAccessibility) {
+DINode::DIFlags DINode::splitFlags(DIFlags Flags,
+                                   SmallVectorImpl<DIFlags> &SplitFlags) {
+  // Flags that are packed together need to be specially handled, so
+  // that, for example, we emit "DIFlagPublic" and not
+  // "DIFlagPrivate | DIFlagProtected".
+  if (DIFlags A = Flags & FlagAccessibility) {
     if (A == FlagPrivate)
       SplitFlags.push_back(FlagPrivate);
     else if (A == FlagProtected)
@@ -96,7 +96,7 @@ unsigned DINode::splitFlags(unsigned Flags,
       SplitFlags.push_back(FlagPublic);
     Flags &= ~A;
   }
-  if (unsigned R = Flags & FlagPtrToMemberRep) {
+  if (DIFlags R = Flags & FlagPtrToMemberRep) {
     if (R == FlagSingleInheritance)
       SplitFlags.push_back(FlagSingleInheritance);
     else if (R == FlagMultipleInheritance)
@@ -105,14 +105,17 @@ unsigned DINode::splitFlags(unsigned Flags,
       SplitFlags.push_back(FlagVirtualInheritance);
     Flags &= ~R;
   }
+  if ((Flags & FlagIndirectVirtualBase) == FlagIndirectVirtualBase) {
+    Flags &= ~FlagIndirectVirtualBase;
+    SplitFlags.push_back(FlagIndirectVirtualBase);
+  }
 
 #define HANDLE_DI_FLAG(ID, NAME)                                               \
-  if (unsigned Bit = Flags & ID) {                                             \
+  if (DIFlags Bit = Flags & Flag##NAME) {                                      \
     SplitFlags.push_back(Bit);                                                 \
     Flags &= ~Bit;                                                             \
   }
 #include "llvm/IR/DebugInfoFlags.def"
-
   return Flags;
 }
 
@@ -229,7 +232,7 @@ DIEnumerator *DIEnumerator::getImpl(LLVMContext &Context, int64_t Value,
 
 DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
                                   MDString *Name, uint64_t SizeInBits,
-                                  uint64_t AlignInBits, unsigned Encoding,
+                                  uint32_t AlignInBits, unsigned Encoding,
                                   StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(DIBasicType,
@@ -242,7 +245,7 @@ DIBasicType *DIBasicType::getImpl(LLVMContext &Context, unsigned Tag,
 DIDerivedType *DIDerivedType::getImpl(
     LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
     unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
-    uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+    uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
     Metadata *ExtraData, StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(DIDerivedType,
@@ -257,7 +260,7 @@ DIDerivedType *DIDerivedType::getImpl(
 DICompositeType *DICompositeType::getImpl(
     LLVMContext &Context, unsigned Tag, MDString *Name, Metadata *File,
     unsigned Line, Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
-    uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+    uint32_t AlignInBits, uint64_t OffsetInBits, DIFlags Flags,
     Metadata *Elements, unsigned RuntimeLang, Metadata *VTableHolder,
     Metadata *TemplateParams, MDString *Identifier, StorageType Storage,
     bool ShouldCreate) {
@@ -278,8 +281,8 @@ DICompositeType *DICompositeType::getImpl(
 DICompositeType *DICompositeType::buildODRType(
     LLVMContext &Context, MDString &Identifier, unsigned Tag, MDString *Name,
     Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType,
-    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-    unsigned Flags, Metadata *Elements, unsigned RuntimeLang,
+    uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+    DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
     Metadata *VTableHolder, Metadata *TemplateParams) {
   assert(!Identifier.getString().empty() && "Expected valid identifier");
   if (!Context.isODRUniquingDebugTypes())
@@ -312,8 +315,8 @@ DICompositeType *DICompositeType::buildODRType(
 DICompositeType *DICompositeType::getODRType(
     LLVMContext &Context, MDString &Identifier, unsigned Tag, MDString *Name,
     Metadata *File, unsigned Line, Metadata *Scope, Metadata *BaseType,
-    uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits,
-    unsigned Flags, Metadata *Elements, unsigned RuntimeLang,
+    uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits,
+    DIFlags Flags, Metadata *Elements, unsigned RuntimeLang,
     Metadata *VTableHolder, Metadata *TemplateParams) {
   assert(!Identifier.getString().empty() && "Expected valid identifier");
   if (!Context.isODRUniquingDebugTypes())
@@ -335,9 +338,8 @@ DICompositeType *DICompositeType::getODRTypeIfExists(LLVMContext &Context,
   return Context.pImpl->DITypeMap->lookup(&Identifier);
 }
 
-DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context,
-                                            unsigned Flags, uint8_t CC,
-                                            Metadata *TypeArray,
+DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context, DIFlags Flags,
+                                            uint8_t CC, Metadata *TypeArray,
                                             StorageType Storage,
                                             bool ShouldCreate) {
   DEFINE_GETIMPL_LOOKUP(DISubroutineType, (Flags, CC, TypeArray));
@@ -345,14 +347,34 @@ DISubroutineType *DISubroutineType::getImpl(LLVMContext &Context,
   DEFINE_GETIMPL_STORE(DISubroutineType, (Flags, CC), Ops);
 }
 
+static const char *ChecksumKindName[DIFile::CSK_Last + 1] = {
+  "CSK_None",
+  "CSK_MD5",
+  "CSK_SHA1"
+};
+
+DIFile::ChecksumKind DIFile::getChecksumKind(StringRef CSKindStr) {
+  return StringSwitch<DIFile::ChecksumKind>(CSKindStr)
+      .Case("CSK_MD5", DIFile::CSK_MD5)
+      .Case("CSK_SHA1", DIFile::CSK_SHA1)
+      .Default(DIFile::CSK_None);
+}
+
+StringRef DIFile::getChecksumKindAsString() const {
+  assert(CSKind <= DIFile::CSK_Last && "Invalid checksum kind");
+  return ChecksumKindName[CSKind];
+}
+
 DIFile *DIFile::getImpl(LLVMContext &Context, MDString *Filename,
-                        MDString *Directory, StorageType Storage,
+                        MDString *Directory, DIFile::ChecksumKind CSKind,
+                        MDString *Checksum, StorageType Storage,
                         bool ShouldCreate) {
   assert(isCanonical(Filename) && "Expected canonical MDString");
   assert(isCanonical(Directory) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DIFile, (Filename, Directory));
-  Metadata *Ops[] = {Filename, Directory};
-  DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIFile, Ops);
+  assert(isCanonical(Checksum) && "Expected canonical MDString");
+  DEFINE_GETIMPL_LOOKUP(DIFile, (Filename, Directory, CSKind, Checksum));
+  Metadata *Ops[] = {Filename, Directory, Checksum};
+  DEFINE_GETIMPL_STORE(DIFile, (CSKind), Ops);
 }
 
 DICompileUnit *DICompileUnit::getImpl(
@@ -361,7 +383,8 @@ DICompileUnit *DICompileUnit::getImpl(
     unsigned RuntimeVersion, MDString *SplitDebugFilename,
     unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes,
     Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros,
-    uint64_t DWOId, StorageType Storage, bool ShouldCreate) {
+    uint64_t DWOId, bool SplitDebugInlining, StorageType Storage,
+    bool ShouldCreate) {
   assert(Storage != Uniqued && "Cannot unique DICompileUnit");
   assert(isCanonical(Producer) && "Expected canonical MDString");
   assert(isCanonical(Flags) && "Expected canonical MDString");
@@ -371,9 +394,10 @@ DICompileUnit *DICompileUnit::getImpl(
       File,      Producer,      Flags,           SplitDebugFilename,
       EnumTypes, RetainedTypes, GlobalVariables, ImportedEntities,
       Macros};
-  return storeImpl(new (array_lengthof(Ops)) DICompileUnit(
-                       Context, Storage, SourceLanguage, IsOptimized,
-                       RuntimeVersion, EmissionKind, DWOId, Ops),
+  return storeImpl(new (array_lengthof(Ops))
+                       DICompileUnit(Context, Storage, SourceLanguage,
+                                     IsOptimized, RuntimeVersion, EmissionKind,
+                                     DWOId, SplitDebugInlining, Ops),
                    Storage);
 }
 
@@ -412,7 +436,7 @@ DISubprogram *DISubprogram::getImpl(
     MDString *LinkageName, Metadata *File, unsigned Line, Metadata *Type,
     bool IsLocalToUnit, bool IsDefinition, unsigned ScopeLine,
     Metadata *ContainingType, unsigned Virtuality, unsigned VirtualIndex,
-    int ThisAdjustment, unsigned Flags, bool IsOptimized, Metadata *Unit,
+    int ThisAdjustment, DIFlags Flags, bool IsOptimized, Metadata *Unit,
     Metadata *TemplateParams, Metadata *Declaration, Metadata *Variables,
     StorageType Storage, bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
@@ -467,11 +491,12 @@ DILexicalBlockFile *DILexicalBlockFile::getImpl(LLVMContext &Context,
 
 DINamespace *DINamespace::getImpl(LLVMContext &Context, Metadata *Scope,
                                   Metadata *File, MDString *Name, unsigned Line,
-                                  StorageType Storage, bool ShouldCreate) {
+                                  bool ExportSymbols, StorageType Storage,
+                                  bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
-  DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, File, Name, Line));
+  DEFINE_GETIMPL_LOOKUP(DINamespace, (Scope, File, Name, Line, ExportSymbols));
   Metadata *Ops[] = {File, Scope, Name};
-  DEFINE_GETIMPL_STORE(DINamespace, (Line), Ops);
+  DEFINE_GETIMPL_STORE(DINamespace, (Line, ExportSymbols), Ops);
 }
 
 DIModule *DIModule::getImpl(LLVMContext &Context, Metadata *Scope,
@@ -509,25 +534,27 @@ DIGlobalVariable *
 DIGlobalVariable::getImpl(LLVMContext &Context, Metadata *Scope, MDString *Name,
                           MDString *LinkageName, Metadata *File, unsigned Line,
                           Metadata *Type, bool IsLocalToUnit, bool IsDefinition,
-                          Metadata *Variable,
                           Metadata *StaticDataMemberDeclaration,
-                          StorageType Storage, bool ShouldCreate) {
+                          uint32_t AlignInBits, StorageType Storage,
+                          bool ShouldCreate) {
   assert(isCanonical(Name) && "Expected canonical MDString");
   assert(isCanonical(LinkageName) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(DIGlobalVariable,
                         (Scope, Name, LinkageName, File, Line, Type,
-                         IsLocalToUnit, IsDefinition, Variable,
-                         StaticDataMemberDeclaration));
-  Metadata *Ops[] = {Scope, Name,        File,     Type,
-                     Name,  LinkageName, Variable, StaticDataMemberDeclaration};
-  DEFINE_GETIMPL_STORE(DIGlobalVariable, (Line, IsLocalToUnit, IsDefinition),
+                         IsLocalToUnit, IsDefinition,
+                         StaticDataMemberDeclaration, AlignInBits));
+  Metadata *Ops[] = {
+      Scope, Name, File, Type, Name, LinkageName, StaticDataMemberDeclaration};
+  DEFINE_GETIMPL_STORE(DIGlobalVariable,
+                       (Line, IsLocalToUnit, IsDefinition, AlignInBits),
                        Ops);
 }
 
 DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
                                           MDString *Name, Metadata *File,
                                           unsigned Line, Metadata *Type,
-                                          unsigned Arg, unsigned Flags,
+                                          unsigned Arg, DIFlags Flags,
+                                          uint32_t AlignInBits,
                                           StorageType Storage,
                                           bool ShouldCreate) {
   // 64K ought to be enough for any frontend.
@@ -536,9 +563,10 @@ DILocalVariable *DILocalVariable::getImpl(LLVMContext &Context, Metadata *Scope,
   assert(Scope && "Expected scope");
   assert(isCanonical(Name) && "Expected canonical MDString");
   DEFINE_GETIMPL_LOOKUP(DILocalVariable,
-                        (Scope, Name, File, Line, Type, Arg, Flags));
+                        (Scope, Name, File, Line, Type, Arg, Flags,
+                         AlignInBits));
   Metadata *Ops[] = {Scope, Name, File, Type};
-  DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags), Ops);
+  DEFINE_GETIMPL_STORE(DILocalVariable, (Line, Arg, Flags, AlignInBits), Ops);
 }
 
 DIExpression *DIExpression::getImpl(LLVMContext &Context,
@@ -550,8 +578,9 @@ DIExpression *DIExpression::getImpl(LLVMContext &Context,
 
 unsigned DIExpression::ExprOperand::getSize() const {
   switch (getOp()) {
-  case dwarf::DW_OP_bit_piece:
+  case dwarf::DW_OP_LLVM_fragment:
     return 3;
+  case dwarf::DW_OP_constu:
   case dwarf::DW_OP_plus:
   case dwarf::DW_OP_minus:
     return 2;
@@ -570,9 +599,19 @@ bool DIExpression::isValid() const {
     switch (I->getOp()) {
     default:
       return false;
-    case dwarf::DW_OP_bit_piece:
-      // Piece expressions must be at the end.
+    case dwarf::DW_OP_LLVM_fragment:
+      // A fragment operator must appear at the end.
       return I->get() + I->getSize() == E->get();
+    case dwarf::DW_OP_stack_value: {
+      // Must be the last one or followed by a DW_OP_LLVM_fragment.
+      if (I->get() + I->getSize() == E->get())
+        break;
+      auto J = I;
+      if ((++J)->getOp() != dwarf::DW_OP_LLVM_fragment)
+        return false;
+      break;
+    }
+    case dwarf::DW_OP_constu:
     case dwarf::DW_OP_plus:
     case dwarf::DW_OP_minus:
     case dwarf::DW_OP_deref:
@@ -582,22 +621,35 @@ bool DIExpression::isValid() const {
   return true;
 }
 
-bool DIExpression::isBitPiece() const {
-  assert(isValid() && "Expected valid expression");
-  if (unsigned N = getNumElements())
-    if (N >= 3)
-      return getElement(N - 3) == dwarf::DW_OP_bit_piece;
-  return false;
-}
-
-uint64_t DIExpression::getBitPieceOffset() const {
-  assert(isBitPiece() && "Expected bit piece");
-  return getElement(getNumElements() - 2);
+Optional<DIExpression::FragmentInfo>
+DIExpression::getFragmentInfo(expr_op_iterator Start, expr_op_iterator End) {
+  for (auto I = Start; I != End; ++I)
+    if (I->getOp() == dwarf::DW_OP_LLVM_fragment) {
+      DIExpression::FragmentInfo Info = {I->getArg(1), I->getArg(0)};
+      return Info;
+    }
+  return None;
+}
+
+bool DIExpression::isConstant() const {
+  // Recognize DW_OP_constu C DW_OP_stack_value (DW_OP_LLVM_fragment Len Ofs)?.
+  if (getNumElements() != 3 && getNumElements() != 6)
+    return false;
+  if (getElement(0) != dwarf::DW_OP_constu ||
+      getElement(2) != dwarf::DW_OP_stack_value)
+    return false;
+  if (getNumElements() == 6 && getElement(3) != dwarf::DW_OP_LLVM_fragment)
+    return false;
+  return true;
 }
 
-uint64_t DIExpression::getBitPieceSize() const {
-  assert(isBitPiece() && "Expected bit piece");
-  return getElement(getNumElements() - 1);
+DIGlobalVariableExpression *
+DIGlobalVariableExpression::getImpl(LLVMContext &Context, Metadata *Variable,
+                                    Metadata *Expression, StorageType Storage,
+                                    bool ShouldCreate) {
+  DEFINE_GETIMPL_LOOKUP(DIGlobalVariableExpression, (Variable, Expression));
+  Metadata *Ops[] = {Variable, Expression};
+  DEFINE_GETIMPL_STORE_NO_CONSTRUCTOR_ARGS(DIGlobalVariableExpression, Ops);
 }
 
 DIObjCProperty *DIObjCProperty::getImpl(
diff --git a/contrib/llvm/lib/IR/DiagnosticInfo.cpp b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
index ce67be328ab1..ea71fde26e0e 100644
--- a/contrib/llvm/lib/IR/DiagnosticInfo.cpp
+++ b/contrib/llvm/lib/IR/DiagnosticInfo.cpp
@@ -12,11 +12,12 @@
 // Diagnostics reporting is still done as part of the LLVMContext.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/DiagnosticInfo.h"
 #include "LLVMContextImpl.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instruction.h"
@@ -91,7 +92,7 @@ int llvm::getNextAvailablePluginDiagnosticKind() {
   return ++PluginKindID;
 }
 
-const char *DiagnosticInfoOptimizationRemarkAnalysis::AlwaysPrint = "";
+const char *OptimizationRemarkAnalysis::AlwaysPrint = "";
 
 DiagnosticInfoInlineAsm::DiagnosticInfoInlineAsm(const Instruction &I,
                                                  const Twine &MsgStr,
@@ -170,23 +171,110 @@ const std::string DiagnosticInfoWithDebugLocBase::getLocationStr() const {
   return (Filename + ":" + Twine(Line) + ":" + Twine(Column)).str();
 }
 
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, Value *V)
+    : Key(Key) {
+  if (auto *F = dyn_cast<Function>(V)) {
+    if (DISubprogram *SP = F->getSubprogram())
+      DLoc = DebugLoc::get(SP->getScopeLine(), 0, SP);
+  }
+  else if (auto *I = dyn_cast<Instruction>(V))
+    DLoc = I->getDebugLoc();
+
+  // Only include names that correspond to user variables.  FIXME: we should use
+  // debug info if available to get the name of the user variable.
+  if (isa<llvm::Argument>(V) || isa<GlobalValue>(V))
+    Val = GlobalValue::getRealLinkageName(V->getName());
+  else if (isa<Constant>(V)) {
+    raw_string_ostream OS(Val);
+    V->printAsOperand(OS, /*PrintType=*/false);
+  } else if (auto *I = dyn_cast<Instruction>(V))
+    Val = I->getOpcodeName();
+}
+
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, Type *T)
+    : Key(Key) {
+  raw_string_ostream OS(Val);
+  OS << *T;
+}
+
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, int N)
+    : Key(Key), Val(itostr(N)) {}
+
+DiagnosticInfoOptimizationBase::Argument::Argument(StringRef Key, unsigned N)
+    : Key(Key), Val(utostr(N)) {}
+
 void DiagnosticInfoOptimizationBase::print(DiagnosticPrinter &DP) const {
   DP << getLocationStr() << ": " << getMsg();
   if (Hotness)
     DP << " (hotness: " << *Hotness << ")";
 }
 
-bool DiagnosticInfoOptimizationRemark::isEnabled() const {
+OptimizationRemark::OptimizationRemark(const char *PassName,
+                                       StringRef RemarkName,
+                                       const DebugLoc &DLoc, Value *CodeRegion)
+    : DiagnosticInfoOptimizationBase(
+          DK_OptimizationRemark, DS_Remark, PassName, RemarkName,
+          *cast<BasicBlock>(CodeRegion)->getParent(), DLoc, CodeRegion) {}
+
+OptimizationRemark::OptimizationRemark(const char *PassName,
+                                       StringRef RemarkName, Instruction *Inst)
+    : DiagnosticInfoOptimizationBase(DK_OptimizationRemark, DS_Remark, PassName,
+                                     RemarkName,
+                                     *Inst->getParent()->getParent(),
+                                     Inst->getDebugLoc(), Inst->getParent()) {}
+
+bool OptimizationRemark::isEnabled() const {
   return PassRemarksOptLoc.Pattern &&
          PassRemarksOptLoc.Pattern->match(getPassName());
 }
 
-bool DiagnosticInfoOptimizationRemarkMissed::isEnabled() const {
+OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
+                                                   StringRef RemarkName,
+                                                   const DebugLoc &DLoc,
+                                                   Value *CodeRegion)
+    : DiagnosticInfoOptimizationBase(
+          DK_OptimizationRemarkMissed, DS_Remark, PassName, RemarkName,
+          *cast<BasicBlock>(CodeRegion)->getParent(), DLoc, CodeRegion) {}
+
+OptimizationRemarkMissed::OptimizationRemarkMissed(const char *PassName,
+                                                   StringRef RemarkName,
+                                                   Instruction *Inst)
+    : DiagnosticInfoOptimizationBase(DK_OptimizationRemarkMissed, DS_Remark,
+                                     PassName, RemarkName,
+                                     *Inst->getParent()->getParent(),
+                                     Inst->getDebugLoc(), Inst->getParent()) {}
+
+bool OptimizationRemarkMissed::isEnabled() const {
   return PassRemarksMissedOptLoc.Pattern &&
          PassRemarksMissedOptLoc.Pattern->match(getPassName());
 }
 
-bool DiagnosticInfoOptimizationRemarkAnalysis::isEnabled() const {
+OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName,
+                                                       StringRef RemarkName,
+                                                       const DebugLoc &DLoc,
+                                                       Value *CodeRegion)
+    : DiagnosticInfoOptimizationBase(
+          DK_OptimizationRemarkAnalysis, DS_Remark, PassName, RemarkName,
+          *cast<BasicBlock>(CodeRegion)->getParent(), DLoc, CodeRegion) {}
+
+OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(const char *PassName,
+                                                       StringRef RemarkName,
+                                                       Instruction *Inst)
+    : DiagnosticInfoOptimizationBase(DK_OptimizationRemarkAnalysis, DS_Remark,
+                                     PassName, RemarkName,
+                                     *Inst->getParent()->getParent(),
+                                     Inst->getDebugLoc(), Inst->getParent()) {}
+
+OptimizationRemarkAnalysis::OptimizationRemarkAnalysis(enum DiagnosticKind Kind,
+                                                       const char *PassName,
+                                                       StringRef RemarkName,
+                                                       const DebugLoc &DLoc,
+                                                       Value *CodeRegion)
+    : DiagnosticInfoOptimizationBase(Kind, DS_Remark, PassName, RemarkName,
+                                     *cast<BasicBlock>(CodeRegion)->getParent(),
+                                     DLoc, CodeRegion) {}
+
+bool OptimizationRemarkAnalysis::isEnabled() const {
   return shouldAlwaysPrint() ||
          (PassRemarksAnalysisOptLoc.Pattern &&
           PassRemarksAnalysisOptLoc.Pattern->match(getPassName()));
@@ -199,14 +287,14 @@ void DiagnosticInfoMIRParser::print(DiagnosticPrinter &DP) const {
 void llvm::emitOptimizationRemark(LLVMContext &Ctx, const char *PassName,
                                   const Function &Fn, const DebugLoc &DLoc,
                                   const Twine &Msg) {
-  Ctx.diagnose(DiagnosticInfoOptimizationRemark(PassName, Fn, DLoc, Msg));
+  Ctx.diagnose(OptimizationRemark(PassName, Fn, DLoc, Msg));
 }
 
 void llvm::emitOptimizationRemarkMissed(LLVMContext &Ctx, const char *PassName,
                                         const Function &Fn,
                                         const DebugLoc &DLoc,
                                         const Twine &Msg) {
-  Ctx.diagnose(DiagnosticInfoOptimizationRemarkMissed(PassName, Fn, DLoc, Msg));
+  Ctx.diagnose(OptimizationRemarkMissed(PassName, Fn, DLoc, Msg));
 }
 
 void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
@@ -214,8 +302,7 @@ void llvm::emitOptimizationRemarkAnalysis(LLVMContext &Ctx,
                                           const Function &Fn,
                                           const DebugLoc &DLoc,
                                           const Twine &Msg) {
-  Ctx.diagnose(
-      DiagnosticInfoOptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
+  Ctx.diagnose(OptimizationRemarkAnalysis(PassName, Fn, DLoc, Msg));
 }
 
 void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
@@ -223,8 +310,7 @@ void llvm::emitOptimizationRemarkAnalysisFPCommute(LLVMContext &Ctx,
                                                    const Function &Fn,
                                                    const DebugLoc &DLoc,
                                                    const Twine &Msg) {
-  Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisFPCommute(PassName, Fn,
-                                                                 DLoc, Msg));
+  Ctx.diagnose(OptimizationRemarkAnalysisFPCommute(PassName, Fn, DLoc, Msg));
 }
 
 void llvm::emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
@@ -232,8 +318,7 @@ void llvm::emitOptimizationRemarkAnalysisAliasing(LLVMContext &Ctx,
                                                   const Function &Fn,
                                                   const DebugLoc &DLoc,
                                                   const Twine &Msg) {
-  Ctx.diagnose(DiagnosticInfoOptimizationRemarkAnalysisAliasing(PassName, Fn,
-                                                                DLoc, Msg));
+  Ctx.diagnose(OptimizationRemarkAnalysisAliasing(PassName, Fn, DLoc, Msg));
 }
 
 bool DiagnosticInfoOptimizationFailure::isEnabled() const {
@@ -262,3 +347,42 @@ void llvm::emitLoopInterleaveWarning(LLVMContext &Ctx, const Function &Fn,
   Ctx.diagnose(DiagnosticInfoOptimizationFailure(
       Fn, DLoc, Twine("loop not interleaved: " + Msg)));
 }
+
+void DiagnosticInfoISelFallback::print(DiagnosticPrinter &DP) const {
+  DP << "Instruction selection used fallback path for " << getFunction();
+}
+
+DiagnosticInfoOptimizationBase &DiagnosticInfoOptimizationBase::
+operator<<(StringRef S) {
+  Args.emplace_back(S);
+  return *this;
+}
+
+DiagnosticInfoOptimizationBase &DiagnosticInfoOptimizationBase::
+operator<<(Argument A) {
+  Args.push_back(std::move(A));
+  return *this;
+}
+
+DiagnosticInfoOptimizationBase &DiagnosticInfoOptimizationBase::
+operator<<(setIsVerbose V) {
+  IsVerbose = true;
+  return *this;
+}
+
+DiagnosticInfoOptimizationBase &DiagnosticInfoOptimizationBase::
+operator<<(setExtraArgs EA) {
+  FirstExtraArgIndex = Args.size();
+  return *this;
+}
+
+std::string DiagnosticInfoOptimizationBase::getMsg() const {
+  std::string Str;
+  raw_string_ostream OS(Str);
+  for (const DiagnosticInfoOptimizationBase::Argument &Arg :
+       make_range(Args.begin(), FirstExtraArgIndex == -1
+                                    ? Args.end()
+                                    : Args.begin() + FirstExtraArgIndex))
+    OS << Arg.Val;
+  return OS.str();
+}
diff --git a/contrib/llvm/lib/IR/Dominators.cpp b/contrib/llvm/lib/IR/Dominators.cpp
index 57e3df76d023..1880807da7eb 100644
--- a/contrib/llvm/lib/IR/Dominators.cpp
+++ b/contrib/llvm/lib/IR/Dominators.cpp
@@ -64,9 +64,13 @@ template class llvm::DomTreeNodeBase<BasicBlock>;
 template class llvm::DominatorTreeBase<BasicBlock>;
 
 template void llvm::Calculate<Function, BasicBlock *>(
-    DominatorTreeBase<GraphTraits<BasicBlock *>::NodeType> &DT, Function &F);
+    DominatorTreeBase<
+        typename std::remove_pointer<GraphTraits<BasicBlock *>::NodeRef>::type>
+        &DT,
+    Function &F);
 template void llvm::Calculate<Function, Inverse<BasicBlock *>>(
-    DominatorTreeBase<GraphTraits<Inverse<BasicBlock *>>::NodeType> &DT,
+    DominatorTreeBase<typename std::remove_pointer<
+        GraphTraits<Inverse<BasicBlock *>>::NodeRef>::type> &DT,
     Function &F);
 
 // dominates - Return true if Def dominates a use in User. This performs
@@ -301,13 +305,13 @@ void DominatorTree::verifyDomTree() const {
 //===----------------------------------------------------------------------===//
 
 DominatorTree DominatorTreeAnalysis::run(Function &F,
-                                         AnalysisManager<Function> &) {
+                                         FunctionAnalysisManager &) {
   DominatorTree DT;
   DT.recalculate(F);
   return DT;
 }
 
-char DominatorTreeAnalysis::PassID;
+AnalysisKey DominatorTreeAnalysis::Key;
 
 DominatorTreePrinterPass::DominatorTreePrinterPass(raw_ostream &OS) : OS(OS) {}
 
diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp
index e1223d0d0337..05419aa3d2bb 100644
--- a/contrib/llvm/lib/IR/Function.cpp
+++ b/contrib/llvm/lib/IR/Function.cpp
@@ -26,10 +26,6 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/RWMutex.h"
-#include "llvm/Support/StringPool.h"
-#include "llvm/Support/Threading.h"
 using namespace llvm;
 
 // Explicit instantiations of SymbolTableListTraits since some of the methods
@@ -262,7 +258,10 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
   assert(FunctionType::isValidReturnType(getReturnType()) &&
          "invalid return type");
   setGlobalObjectSubClassData(0);
-  SymTab = new ValueSymbolTable();
+
+  // We only need a symbol table for a function if the context keeps value names
+  if (!getContext().shouldDiscardValueNames())
+    SymTab = make_unique<ValueSymbolTable>();
 
   // If the function has arguments, mark them as lazily built.
   if (Ty->getNumParams())
@@ -271,6 +270,7 @@ Function::Function(FunctionType *Ty, LinkageTypes Linkage, const Twine &name,
   if (ParentModule)
     ParentModule->getFunctionList().push_back(this);
 
+  HasLLVMReservedName = getName().startswith("llvm.");
   // Ensure intrinsics have the right parameter attributes.
   // Note, the IntID field will have been set in Value::setName if this function
   // name is a valid intrinsic ID.
@@ -283,7 +283,6 @@ Function::~Function() {
 
   // Delete all of the method arguments and unlink from symbol table...
   ArgumentList.clear();
-  delete SymTab;
 
   // Remove the function from the on-the-side GC table.
   clearGC();
@@ -332,10 +331,6 @@ bool Function::arg_empty() const {
   return getFunctionType()->getNumParams() == 0;
 }
 
-void Function::setParent(Module *parent) {
-  Parent = parent;
-}
-
 // dropAllReferences() - This function causes all the subinstructions to "let
 // go" of all references that they are maintaining.  This allows one to
 // 'delete' a whole class at a time, even though there may be circular
@@ -488,9 +483,7 @@ static ArrayRef<const char *> findTargetSubtable(StringRef Name) {
 
 /// \brief This does the actual lookup of an intrinsic ID which
 /// matches the given function name.
-static Intrinsic::ID lookupIntrinsicID(const ValueName *ValName) {
-  StringRef Name = ValName->getKey();
-
+Intrinsic::ID Function::lookupIntrinsicID(StringRef Name) {
   ArrayRef<const char *> NameTable = findTargetSubtable(Name);
   int Idx = Intrinsic::lookupLLVMIntrinsicByName(NameTable, Name);
   if (Idx == -1)
@@ -508,12 +501,14 @@ static Intrinsic::ID lookupIntrinsicID(const ValueName *ValName) {
 }
 
 void Function::recalculateIntrinsicID() {
-  const ValueName *ValName = this->getValueName();
-  if (!ValName || !isIntrinsic()) {
+  StringRef Name = getName();
+  if (!Name.startswith("llvm.")) {
+    HasLLVMReservedName = false;
     IntID = Intrinsic::not_intrinsic;
     return;
   }
-  IntID = lookupIntrinsicID(ValName);
+  HasLLVMReservedName = true;
+  IntID = lookupIntrinsicID(Name);
 }
 
 /// Returns a stable mangling for the type specified for use in the name
@@ -557,6 +552,13 @@ static std::string getMangledTypeStr(Type* Ty) {
   return Result;
 }
 
+StringRef Intrinsic::getName(ID id) {
+  assert(id < num_intrinsics && "Invalid intrinsic ID!");
+  assert(!isOverloaded(id) &&
+         "This version of getName does not support overloading");
+  return IntrinsicNameTable[id];
+}
+
 std::string Intrinsic::getName(ID id, ArrayRef<Type*> Tys) {
   assert(id < num_intrinsics && "Invalid intrinsic ID!");
   std::string Result(IntrinsicNameTable[id]);
@@ -608,10 +610,11 @@ enum IIT_Info {
   IIT_HALF_VEC_ARG = 30,
   IIT_SAME_VEC_WIDTH_ARG = 31,
   IIT_PTR_TO_ARG = 32,
-  IIT_VEC_OF_PTRS_TO_ELT = 33,
-  IIT_I128 = 34,
-  IIT_V512 = 35,
-  IIT_V1024 = 36
+  IIT_PTR_TO_ELT = 33,
+  IIT_VEC_OF_PTRS_TO_ELT = 34,
+  IIT_I128 = 35,
+  IIT_V512 = 36,
+  IIT_V1024 = 37
 };
 
 
@@ -745,6 +748,11 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
                                              ArgInfo));
     return;
   }
+  case IIT_PTR_TO_ELT: {
+    unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
+    OutputTable.push_back(IITDescriptor::get(IITDescriptor::PtrToElt, ArgInfo));
+    return;
+  }
   case IIT_VEC_OF_PTRS_TO_ELT: {
     unsigned ArgInfo = (NextElt == Infos.size() ? 0 : Infos[NextElt++]);
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::VecOfPtrsToElt,
@@ -754,9 +762,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef<unsigned char> Infos,
   case IIT_EMPTYSTRUCT:
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct, 0));
     return;
-  case IIT_STRUCT5: ++StructElts; // FALL THROUGH.
-  case IIT_STRUCT4: ++StructElts; // FALL THROUGH.
-  case IIT_STRUCT3: ++StructElts; // FALL THROUGH.
+  case IIT_STRUCT5: ++StructElts; LLVM_FALLTHROUGH;
+  case IIT_STRUCT4: ++StructElts; LLVM_FALLTHROUGH;
+  case IIT_STRUCT3: ++StructElts; LLVM_FALLTHROUGH;
   case IIT_STRUCT2: {
     OutputTable.push_back(IITDescriptor::get(IITDescriptor::Struct,StructElts));
 
@@ -871,6 +879,14 @@ static Type *DecodeFixedType(ArrayRef<Intrinsic::IITDescriptor> &Infos,
     Type *Ty = Tys[D.getArgumentNumber()];
     return PointerType::getUnqual(Ty);
   }
+  case IITDescriptor::PtrToElt: {
+    Type *Ty = Tys[D.getArgumentNumber()];
+    VectorType *VTy = dyn_cast<VectorType>(Ty);
+    if (!VTy)
+      llvm_unreachable("Expected an argument of Vector Type");
+    Type *EltTy = VTy->getVectorElementType();
+    return PointerType::getUnqual(EltTy);
+  }
   case IITDescriptor::VecOfPtrsToElt: {
     Type *Ty = Tys[D.getArgumentNumber()];
     VectorType *VTy = dyn_cast<VectorType>(Ty);
@@ -1049,7 +1065,7 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
       if (D.getArgumentNumber() >= ArgTys.size())
         return true;
       VectorType * ReferenceType =
-              dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
+        dyn_cast<VectorType>(ArgTys[D.getArgumentNumber()]);
       VectorType *ThisArgType = dyn_cast<VectorType>(Ty);
       if (!ThisArgType || !ReferenceType ||
           (ReferenceType->getVectorNumElements() !=
@@ -1065,6 +1081,16 @@ bool Intrinsic::matchIntrinsicType(Type *Ty, ArrayRef<Intrinsic::IITDescriptor>
       PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
       return (!ThisArgType || ThisArgType->getElementType() != ReferenceType);
     }
+    case IITDescriptor::PtrToElt: {
+      if (D.getArgumentNumber() >= ArgTys.size())
+        return true;
+      VectorType * ReferenceType =
+        dyn_cast<VectorType> (ArgTys[D.getArgumentNumber()]);
+      PointerType *ThisArgType = dyn_cast<PointerType>(Ty);
+
+      return (!ThisArgType || !ReferenceType ||
+              ThisArgType->getElementType() != ReferenceType->getElementType());
+    }
     case IITDescriptor::VecOfPtrsToElt: {
       if (D.getArgumentNumber() >= ArgTys.size())
         return true;
@@ -1264,7 +1290,27 @@ Optional<uint64_t> Function::getEntryCount() const {
     if (MDString *MDS = dyn_cast<MDString>(MD->getOperand(0)))
       if (MDS->getString().equals("function_entry_count")) {
         ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(1));
-        return CI->getValue().getZExtValue();
+        uint64_t Count = CI->getValue().getZExtValue();
+        if (Count == 0)
+          return None;
+        return Count;
       }
   return None;
 }
+
+void Function::setSectionPrefix(StringRef Prefix) {
+  MDBuilder MDB(getContext());
+  setMetadata(LLVMContext::MD_section_prefix,
+              MDB.createFunctionSectionPrefix(Prefix));
+}
+
+Optional<StringRef> Function::getSectionPrefix() const {
+  if (MDNode *MD = getMetadata(LLVMContext::MD_section_prefix)) {
+    assert(dyn_cast<MDString>(MD->getOperand(0))
+               ->getString()
+               .equals("function_section_prefix") &&
+           "Metadata not match");
+    return dyn_cast<MDString>(MD->getOperand(1))->getString();
+  }
+  return None;
+}
diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp
index a9f7f45ee305..3bbcf781e5dd 100644
--- a/contrib/llvm/lib/IR/GCOV.cpp
+++ b/contrib/llvm/lib/IR/GCOV.cpp
@@ -17,7 +17,6 @@
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp
index 6715484518a9..31f89514151c 100644
--- a/contrib/llvm/lib/IR/Globals.cpp
+++ b/contrib/llvm/lib/IR/Globals.cpp
@@ -15,12 +15,14 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
@@ -28,12 +30,19 @@ using namespace llvm;
 //                            GlobalValue Class
 //===----------------------------------------------------------------------===//
 
+// GlobalValue should be a Constant, plus a type, a module, some flags, and an
+// intrinsic ID. Add an assert to prevent people from accidentally growing
+// GlobalValue while adding flags.
+static_assert(sizeof(GlobalValue) ==
+                  sizeof(Constant) + 2 * sizeof(void *) + 2 * sizeof(unsigned),
+              "unexpected GlobalValue size growth");
+
 bool GlobalValue::isMaterializable() const {
   if (const Function *F = dyn_cast<Function>(this))
     return F->isMaterializable();
   return false;
 }
-std::error_code GlobalValue::materialize() {
+Error GlobalValue::materialize() {
   return getParent()->materialize(this);
 }
 
@@ -213,6 +222,34 @@ bool GlobalValue::canIncreaseAlignment() const {
   return true;
 }
 
+GlobalObject *GlobalValue::getBaseObject() {
+  if (auto *GO = dyn_cast<GlobalObject>(this))
+    return GO;
+  if (auto *GA = dyn_cast<GlobalAlias>(this))
+    return GA->getBaseObject();
+  return nullptr;
+}
+
+bool GlobalValue::isAbsoluteSymbolRef() const {
+  auto *GO = dyn_cast<GlobalObject>(this);
+  if (!GO)
+    return false;
+
+  return GO->getMetadata(LLVMContext::MD_absolute_symbol);
+}
+
+Optional<ConstantRange> GlobalValue::getAbsoluteSymbolRange() const {
+  auto *GO = dyn_cast<GlobalObject>(this);
+  if (!GO)
+    return None;
+
+  MDNode *MD = GO->getMetadata(LLVMContext::MD_absolute_symbol);
+  if (!MD)
+    return None;
+
+  return getConstantRangeFromMetadata(*MD);
+}
+
 //===----------------------------------------------------------------------===//
 // GlobalVariable Implementation
 //===----------------------------------------------------------------------===//
@@ -257,10 +294,6 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant,
     M.getGlobalList().push_back(this);
 }
 
-void GlobalVariable::setParent(Module *parent) {
-  Parent = parent;
-}
-
 void GlobalVariable::removeFromParent() {
   getParent()->getGlobalList().remove(getIterator());
 }
@@ -359,10 +392,6 @@ GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) {
   return create(Aliasee->getLinkage(), Name, Aliasee);
 }
 
-void GlobalAlias::setParent(Module *parent) {
-  Parent = parent;
-}
-
 void GlobalAlias::removeFromParent() {
   getParent()->getAliasList().remove(getIterator());
 }
@@ -396,10 +425,6 @@ GlobalIFunc *GlobalIFunc::create(Type *Ty, unsigned AddressSpace,
   return new GlobalIFunc(Ty, AddressSpace, Link, Name, Resolver, ParentModule);
 }
 
-void GlobalIFunc::setParent(Module *parent) {
-  Parent = parent;
-}
-
 void GlobalIFunc::removeFromParent() {
   getParent()->getIFuncList().remove(getIterator());
 }
diff --git a/contrib/llvm/lib/IR/IRBuilder.cpp b/contrib/llvm/lib/IR/IRBuilder.cpp
index 298331d51c88..d3e410d6d033 100644
--- a/contrib/llvm/lib/IR/IRBuilder.cpp
+++ b/contrib/llvm/lib/IR/IRBuilder.cpp
@@ -191,6 +191,26 @@ CallInst *IRBuilderBase::CreateLifetimeEnd(Value *Ptr, ConstantInt *Size) {
   return createCallHelper(TheFn, Ops, this);
 }
 
+CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
+
+  assert(isa<PointerType>(Ptr->getType()) &&
+         "invariant.start only applies to pointers.");
+  Ptr = getCastedInt8PtrValue(Ptr);
+  if (!Size)
+    Size = getInt64(-1);
+  else
+    assert(Size->getType() == getInt64Ty() &&
+           "invariant.start requires the size to be an i64");
+
+  Value *Ops[] = {Size, Ptr};
+  // Fill in the single overloaded type: memory object type.
+  Type *ObjectPtr[1] = {Ptr->getType()};
+  Module *M = BB->getParent()->getParent();
+  Value *TheFn =
+      Intrinsic::getDeclaration(M, Intrinsic::invariant_start, ObjectPtr);
+  return createCallHelper(TheFn, Ops, this);
+}
+
 CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
   assert(Cond->getType() == getInt1Ty() &&
          "an assumption condition must be of type i1");
diff --git a/contrib/llvm/lib/IR/IRPrintingPasses.cpp b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
index 4d2f9b98911b..05e206cfd6cb 100644
--- a/contrib/llvm/lib/IR/IRPrintingPasses.cpp
+++ b/contrib/llvm/lib/IR/IRPrintingPasses.cpp
@@ -26,7 +26,7 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
     : OS(OS), Banner(Banner),
       ShouldPreserveUseListOrder(ShouldPreserveUseListOrder) {}
 
-PreservedAnalyses PrintModulePass::run(Module &M, AnalysisManager<Module> &) {
+PreservedAnalyses PrintModulePass::run(Module &M, ModuleAnalysisManager &) {
   OS << Banner;
   if (llvm::isFunctionInPrintList("*"))
     M.print(OS, nullptr, ShouldPreserveUseListOrder);
@@ -43,7 +43,7 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner)
     : OS(OS), Banner(Banner) {}
 
 PreservedAnalyses PrintFunctionPass::run(Function &F,
-                                         AnalysisManager<Function> &) {
+                                         FunctionAnalysisManager &) {
   if (isFunctionInPrintList(F.getName()))
     OS << Banner << static_cast<Value &>(F);
   return PreservedAnalyses::all();
diff --git a/contrib/llvm/lib/IR/InlineAsm.cpp b/contrib/llvm/lib/IR/InlineAsm.cpp
index d6cf8c543dbd..5a9118571040 100644
--- a/contrib/llvm/lib/IR/InlineAsm.cpp
+++ b/contrib/llvm/lib/IR/InlineAsm.cpp
@@ -265,7 +265,7 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
         break;
       }
       ++NumIndirect;
-      // FALLTHROUGH for Indirect Outputs.
+      LLVM_FALLTHROUGH; // We fall through for Indirect Outputs.
     case InlineAsm::isInput:
       if (NumClobbers) return false;               // inputs before clobbers.
       ++NumInputs;
diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp
index ed08f85c60b6..2fa03489081d 100644
--- a/contrib/llvm/lib/IR/Instruction.cpp
+++ b/contrib/llvm/lib/IR/Instruction.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -92,8 +93,13 @@ void Instruction::insertAfter(Instruction *InsertPos) {
 /// Unlink this instruction from its current basic block and insert it into the
 /// basic block that MovePos lives in, right before MovePos.
 void Instruction::moveBefore(Instruction *MovePos) {
-  MovePos->getParent()->getInstList().splice(
-      MovePos->getIterator(), getParent()->getInstList(), getIterator());
+  moveBefore(*MovePos->getParent(), MovePos->getIterator());
+}
+
+void Instruction::moveBefore(BasicBlock &BB,
+                             SymbolTableList<Instruction>::iterator I) {
+  assert(I == BB.end() || I->getParent() == &BB);
+  BB.getInstList().splice(I, getParent()->getInstList(), getIterator());
 }
 
 void Instruction::setHasNoUnsignedWrap(bool b) {
@@ -120,47 +126,31 @@ bool Instruction::isExact() const {
   return cast<PossiblyExactOperator>(this)->isExact();
 }
 
-/// Set or clear the unsafe-algebra flag on this instruction, which must be an
-/// operator which supports this flag. See LangRef.html for the meaning of this
-/// flag.
 void Instruction::setHasUnsafeAlgebra(bool B) {
   assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
   cast<FPMathOperator>(this)->setHasUnsafeAlgebra(B);
 }
 
-/// Set or clear the NoNaNs flag on this instruction, which must be an operator
-/// which supports this flag. See LangRef.html for the meaning of this flag.
 void Instruction::setHasNoNaNs(bool B) {
   assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
   cast<FPMathOperator>(this)->setHasNoNaNs(B);
 }
 
-/// Set or clear the no-infs flag on this instruction, which must be an operator
-/// which supports this flag. See LangRef.html for the meaning of this flag.
 void Instruction::setHasNoInfs(bool B) {
   assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
   cast<FPMathOperator>(this)->setHasNoInfs(B);
 }
 
-/// Set or clear the no-signed-zeros flag on this instruction, which must be an
-/// operator which supports this flag. See LangRef.html for the meaning of this
-/// flag.
 void Instruction::setHasNoSignedZeros(bool B) {
   assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
   cast<FPMathOperator>(this)->setHasNoSignedZeros(B);
 }
 
-/// Set or clear the allow-reciprocal flag on this instruction, which must be an
-/// operator which supports this flag. See LangRef.html for the meaning of this
-/// flag.
 void Instruction::setHasAllowReciprocal(bool B) {
   assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
   cast<FPMathOperator>(this)->setHasAllowReciprocal(B);
 }
 
-/// Convenience function for setting all the fast-math flags on this
-/// instruction, which must be an operator which supports these flags. See
-/// LangRef.html for the meaning of these flats.
 void Instruction::setFastMathFlags(FastMathFlags FMF) {
   assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op");
   cast<FPMathOperator>(this)->setFastMathFlags(FMF);
@@ -171,45 +161,36 @@ void Instruction::copyFastMathFlags(FastMathFlags FMF) {
   cast<FPMathOperator>(this)->copyFastMathFlags(FMF);
 }
 
-/// Determine whether the unsafe-algebra flag is set.
 bool Instruction::hasUnsafeAlgebra() const {
   assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
   return cast<FPMathOperator>(this)->hasUnsafeAlgebra();
 }
 
-/// Determine whether the no-NaNs flag is set.
 bool Instruction::hasNoNaNs() const {
   assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
   return cast<FPMathOperator>(this)->hasNoNaNs();
 }
 
-/// Determine whether the no-infs flag is set.
 bool Instruction::hasNoInfs() const {
   assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
   return cast<FPMathOperator>(this)->hasNoInfs();
 }
 
-/// Determine whether the no-signed-zeros flag is set.
 bool Instruction::hasNoSignedZeros() const {
   assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
   return cast<FPMathOperator>(this)->hasNoSignedZeros();
 }
 
-/// Determine whether the allow-reciprocal flag is set.
 bool Instruction::hasAllowReciprocal() const {
   assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
   return cast<FPMathOperator>(this)->hasAllowReciprocal();
 }
 
-/// Convenience function for getting all the fast-math flags, which must be an
-/// operator which supports these flags. See LangRef.html for the meaning of
-/// these flags.
 FastMathFlags Instruction::getFastMathFlags() const {
   assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op");
   return cast<FPMathOperator>(this)->getFastMathFlags();
 }
 
-/// Copy I's fast-math flags
 void Instruction::copyFastMathFlags(const Instruction *I) {
   copyFastMathFlags(I->getFastMathFlags());
 }
@@ -343,7 +324,7 @@ const char *Instruction::getOpcodeName(unsigned OpCode) {
   }
 }
 
-/// Return true if both instructions have the same special state This must be
+/// Return true if both instructions have the same special state. This must be
 /// kept in sync with FunctionComparator::cmpOperations in
 /// lib/Transforms/IPO/MergeFunctions.cpp.
 static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
@@ -402,17 +383,11 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2,
   return true;
 }
 
-/// isIdenticalTo - Return true if the specified instruction is exactly
-/// identical to the current one.  This means that all operands match and any
-/// extra information (e.g. load is volatile) agree.
 bool Instruction::isIdenticalTo(const Instruction *I) const {
   return isIdenticalToWhenDefined(I) &&
          SubclassOptionalData == I->SubclassOptionalData;
 }
 
-/// isIdenticalToWhenDefined - This is like isIdenticalTo, except that it
-/// ignores the SubclassOptionalData flags, which specify conditions
-/// under which the instruction's result is undefined.
 bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const {
   if (getOpcode() != I->getOpcode() ||
       getNumOperands() != I->getNumOperands() ||
@@ -463,9 +438,6 @@ bool Instruction::isSameOperationAs(const Instruction *I,
   return haveSameSpecialState(this, I, IgnoreAlignment);
 }
 
-/// isUsedOutsideOfBlock - Return true if there are any uses of I outside of the
-/// specified block.  Note that PHI nodes are considered to evaluate their
-/// operands in the corresponding predecessor block.
 bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
   for (const Use &U : uses()) {
     // PHI nodes uses values in the corresponding predecessor block.  For other
@@ -484,8 +456,6 @@ bool Instruction::isUsedOutsideOfBlock(const BasicBlock *BB) const {
   return false;
 }
 
-/// mayReadFromMemory - Return true if this instruction may read memory.
-///
 bool Instruction::mayReadFromMemory() const {
   switch (getOpcode()) {
   default: return false;
@@ -506,8 +476,6 @@ bool Instruction::mayReadFromMemory() const {
   }
 }
 
-/// mayWriteToMemory - Return true if this instruction may modify memory.
-///
 bool Instruction::mayWriteToMemory() const {
   switch (getOpcode()) {
   default: return false;
@@ -553,7 +521,7 @@ bool Instruction::mayThrow() const {
   return isa<ResumeInst>(this);
 }
 
-/// isAssociative - Return true if the instruction is associative:
+/// Return true if the instruction is associative:
 ///
 ///   Associative operators satisfy:  x op (y op z) === (x op y) op z
 ///
@@ -578,7 +546,7 @@ bool Instruction::isAssociative() const {
   }
 }
 
-/// isCommutative - Return true if the instruction is commutative:
+/// Return true if the instruction is commutative:
 ///
 ///   Commutative operators satisfy: (x op y) === (y op x)
 ///
@@ -600,7 +568,7 @@ bool Instruction::isCommutative(unsigned op) {
   }
 }
 
-/// isIdempotent - Return true if the instruction is idempotent:
+/// Return true if the instruction is idempotent:
 ///
 ///   Idempotent operators satisfy:  x op x === x
 ///
@@ -610,7 +578,7 @@ bool Instruction::isIdempotent(unsigned Opcode) {
   return Opcode == And || Opcode == Or;
 }
 
-/// isNilpotent - Return true if the instruction is nilpotent:
+/// Return true if the instruction is nilpotent:
 ///
 ///   Nilpotent operators satisfy:  x op x === Id,
 ///
@@ -627,6 +595,45 @@ Instruction *Instruction::cloneImpl() const {
   llvm_unreachable("Subclass of Instruction failed to implement cloneImpl");
 }
 
+void Instruction::swapProfMetadata() {
+  MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
+  if (!ProfileData || ProfileData->getNumOperands() != 3 ||
+      !isa<MDString>(ProfileData->getOperand(0)))
+    return;
+
+  MDString *MDName = cast<MDString>(ProfileData->getOperand(0));
+  if (MDName->getString() != "branch_weights")
+    return;
+
+  // The first operand is the name. Fetch them backwards and build a new one.
+  Metadata *Ops[] = {ProfileData->getOperand(0), ProfileData->getOperand(2),
+                     ProfileData->getOperand(1)};
+  setMetadata(LLVMContext::MD_prof,
+              MDNode::get(ProfileData->getContext(), Ops));
+}
+
+void Instruction::copyMetadata(const Instruction &SrcInst,
+                               ArrayRef<unsigned> WL) {
+  if (!SrcInst.hasMetadata())
+    return;
+
+  DenseSet<unsigned> WLS;
+  for (unsigned M : WL)
+    WLS.insert(M);
+
+  // Otherwise, enumerate and copy over metadata from the old instruction to the
+  // new one.
+  SmallVector<std::pair<unsigned, MDNode *>, 4> TheMDs;
+  SrcInst.getAllMetadataOtherThanDebugLoc(TheMDs);
+  for (const auto &MD : TheMDs) {
+    if (WL.empty() || WLS.count(MD.first))
+      setMetadata(MD.first, MD.second);
+  }
+  if (WL.empty() || WLS.count(LLVMContext::MD_dbg))
+    setDebugLoc(SrcInst.getDebugLoc());
+  return;
+}
+
 Instruction *Instruction::clone() const {
   Instruction *New = nullptr;
   switch (getOpcode()) {
@@ -641,16 +648,6 @@ Instruction *Instruction::clone() const {
   }
 
   New->SubclassOptionalData = SubclassOptionalData;
-  if (!hasMetadata())
-    return New;
-
-  // Otherwise, enumerate and copy over metadata from the old instruction to the
-  // new one.
-  SmallVector<std::pair<unsigned, MDNode *>, 4> TheMDs;
-  getAllMetadataOtherThanDebugLoc(TheMDs);
-  for (const auto &MD : TheMDs)
-    New->setMetadata(MD.first, MD.second);
-
-  New->setDebugLoc(getDebugLoc());
+  New->copyMetadata(*this);
   return New;
 }
diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp
index b9c693ff19ad..b67926943429 100644
--- a/contrib/llvm/lib/IR/Instructions.cpp
+++ b/contrib/llvm/lib/IR/Instructions.cpp
@@ -350,12 +350,6 @@ void CallInst::addAttribute(unsigned i, Attribute::AttrKind Kind) {
   setAttributes(PAL);
 }
 
-void CallInst::addAttribute(unsigned i, StringRef Kind, StringRef Value) {
-  AttributeSet PAL = getAttributes();
-  PAL = PAL.addAttribute(getContext(), i, Kind, Value);
-  setAttributes(PAL);
-}
-
 void CallInst::addAttribute(unsigned i, Attribute Attr) {
   AttributeSet PAL = getAttributes();
   PAL = PAL.addAttribute(getContext(), i, Attr);
@@ -374,15 +368,6 @@ void CallInst::removeAttribute(unsigned i, StringRef Kind) {
   setAttributes(PAL);
 }
 
-void CallInst::removeAttribute(unsigned i, Attribute Attr) {
-  AttributeSet PAL = getAttributes();
-  AttrBuilder B(Attr);
-  LLVMContext &Context = getContext();
-  PAL = PAL.removeAttributes(Context, i,
-                             AttributeSet::get(Context, i, B));
-  setAttributes(PAL);
-}
-
 void CallInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   AttributeSet PAL = getAttributes();
   PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
@@ -405,14 +390,6 @@ bool CallInst::paramHasAttr(unsigned i, Attribute::AttrKind Kind) const {
   return false;
 }
 
-Attribute CallInst::getAttribute(unsigned i, Attribute::AttrKind Kind) const {
-  return getAttributes().getAttribute(i, Kind);
-}
-
-Attribute CallInst::getAttribute(unsigned i, StringRef Kind) const {
-  return getAttributes().getAttribute(i, Kind);
-}
-
 bool CallInst::dataOperandHasImpliedAttr(unsigned i,
                                          Attribute::AttrKind Kind) const {
   // There are getNumOperands() - 1 data operands.  The last operand is the
@@ -766,23 +743,6 @@ void InvokeInst::removeAttribute(unsigned i, StringRef Kind) {
   setAttributes(PAL);
 }
 
-void InvokeInst::removeAttribute(unsigned i, Attribute Attr) {
-  AttributeSet PAL = getAttributes();
-  AttrBuilder B(Attr);
-  PAL = PAL.removeAttributes(getContext(), i,
-                             AttributeSet::get(getContext(), i, B));
-  setAttributes(PAL);
-}
-
-Attribute InvokeInst::getAttribute(unsigned i,
-                                   Attribute::AttrKind Kind) const {
-  return getAttributes().getAttribute(i, Kind);
-}
-
-Attribute InvokeInst::getAttribute(unsigned i, StringRef Kind) const {
-  return getAttributes().getAttribute(i, Kind);
-}
-
 void InvokeInst::addDereferenceableAttr(unsigned i, uint64_t Bytes) {
   AttributeSet PAL = getAttributes();
   PAL = PAL.addDereferenceableAttr(getContext(), i, Bytes);
@@ -1209,15 +1169,7 @@ void BranchInst::swapSuccessors() {
 
   // Update profile metadata if present and it matches our structural
   // expectations.
-  MDNode *ProfileData = getMetadata(LLVMContext::MD_prof);
-  if (!ProfileData || ProfileData->getNumOperands() != 3)
-    return;
-
-  // The first operand is the name. Fetch them backwards and build a new one.
-  Metadata *Ops[] = {ProfileData->getOperand(0), ProfileData->getOperand(2),
-                     ProfileData->getOperand(1)};
-  setMetadata(LLVMContext::MD_prof,
-              MDNode::get(ProfileData->getContext(), Ops));
+  swapProfMetadata();
 }
 
 BasicBlock *BranchInst::getSuccessorV(unsigned idx) const {
@@ -1916,9 +1868,6 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
   return false;
 }
 
-/// getMaskValue - Return the index from the shuffle mask for the specified
-/// output result.  This is either -1 if the element is undef or a number less
-/// than 2*numelements.
 int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
   assert(i < Mask->getType()->getVectorNumElements() && "Index out of range");
   if (ConstantDataSequential *CDS =dyn_cast<ConstantDataSequential>(Mask))
@@ -1929,8 +1878,6 @@ int ShuffleVectorInst::getMaskValue(Constant *Mask, unsigned i) {
   return cast<ConstantInt>(C)->getZExtValue();
 }
 
-/// getShuffleMask - Return the full mask for this instruction, where each
-/// element is the element number and undef's are returned as -1.
 void ShuffleVectorInst::getShuffleMask(Constant *Mask,
                                        SmallVectorImpl<int> &Result) {
   unsigned NumElts = Mask->getType()->getVectorNumElements();
@@ -2289,11 +2236,10 @@ const Value *BinaryOperator::getNotArgument(const Value *BinOp) {
 }
 
 
-// swapOperands - Exchange the two operands to this instruction.  This
-// instruction is safe to use on any binary instruction and does not
-// modify the semantics of the instruction.  If the instruction is
-// order dependent (SetLT f.e.) the opcode is changed.
-//
+// Exchange the two operands to this instruction. This instruction is safe to
+// use on any binary instruction and does not modify the semantics of the
+// instruction. If the instruction is order-dependent (SetLT f.e.), the opcode
+// is changed.
 bool BinaryOperator::swapOperands() {
   if (!isCommutative())
     return true; // Can't commute operands
@@ -2306,9 +2252,6 @@ bool BinaryOperator::swapOperands() {
 //                             FPMathOperator Class
 //===----------------------------------------------------------------------===//
 
-/// getFPAccuracy - Get the maximum error permitted by this operation in ULPs.
-/// An accuracy of 0.0 means that the operation should be performed with the
-/// default precision.
 float FPMathOperator::getFPAccuracy() const {
   const MDNode *MD =
       cast<Instruction>(this)->getMetadata(LLVMContext::MD_fpmath);
@@ -2603,7 +2546,8 @@ unsigned CastInst::isEliminableCastPair(
     case 14:
       // bitcast, addrspacecast -> addrspacecast if the element type of
       // bitcast's source is the same as that of addrspacecast's destination.
-      if (SrcTy->getPointerElementType() == DstTy->getPointerElementType())
+      if (SrcTy->getScalarType()->getPointerElementType() ==
+          DstTy->getScalarType()->getPointerElementType())
         return Instruction::AddrSpaceCast;
       return 0;
 
@@ -3465,6 +3409,38 @@ CmpInst::Predicate CmpInst::getInversePredicate(Predicate pred) {
   }
 }
 
+StringRef CmpInst::getPredicateName(Predicate Pred) {
+  switch (Pred) {
+  default:                   return "unknown";
+  case FCmpInst::FCMP_FALSE: return "false";
+  case FCmpInst::FCMP_OEQ:   return "oeq";
+  case FCmpInst::FCMP_OGT:   return "ogt";
+  case FCmpInst::FCMP_OGE:   return "oge";
+  case FCmpInst::FCMP_OLT:   return "olt";
+  case FCmpInst::FCMP_OLE:   return "ole";
+  case FCmpInst::FCMP_ONE:   return "one";
+  case FCmpInst::FCMP_ORD:   return "ord";
+  case FCmpInst::FCMP_UNO:   return "uno";
+  case FCmpInst::FCMP_UEQ:   return "ueq";
+  case FCmpInst::FCMP_UGT:   return "ugt";
+  case FCmpInst::FCMP_UGE:   return "uge";
+  case FCmpInst::FCMP_ULT:   return "ult";
+  case FCmpInst::FCMP_ULE:   return "ule";
+  case FCmpInst::FCMP_UNE:   return "une";
+  case FCmpInst::FCMP_TRUE:  return "true";
+  case ICmpInst::ICMP_EQ:    return "eq";
+  case ICmpInst::ICMP_NE:    return "ne";
+  case ICmpInst::ICMP_SGT:   return "sgt";
+  case ICmpInst::ICMP_SGE:   return "sge";
+  case ICmpInst::ICMP_SLT:   return "slt";
+  case ICmpInst::ICMP_SLE:   return "sle";
+  case ICmpInst::ICMP_UGT:   return "ugt";
+  case ICmpInst::ICMP_UGE:   return "uge";
+  case ICmpInst::ICMP_ULT:   return "ult";
+  case ICmpInst::ICMP_ULE:   return "ule";
+  }
+}
+
 void ICmpInst::anchor() {}
 
 ICmpInst::Predicate ICmpInst::getSignedPredicate(Predicate pred) {
@@ -3493,69 +3469,6 @@ ICmpInst::Predicate ICmpInst::getUnsignedPredicate(Predicate pred) {
   }
 }
 
-/// Initialize a set of values that all satisfy the condition with C.
-///
-ConstantRange 
-ICmpInst::makeConstantRange(Predicate pred, const APInt &C) {
-  APInt Lower(C);
-  APInt Upper(C);
-  uint32_t BitWidth = C.getBitWidth();
-  switch (pred) {
-  default: llvm_unreachable("Invalid ICmp opcode to ConstantRange ctor!");
-  case ICmpInst::ICMP_EQ: ++Upper; break;
-  case ICmpInst::ICMP_NE: ++Lower; break;
-  case ICmpInst::ICMP_ULT:
-    Lower = APInt::getMinValue(BitWidth);
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_SLT:
-    Lower = APInt::getSignedMinValue(BitWidth);
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_UGT: 
-    ++Lower; Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_SGT:
-    ++Lower; Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
-    // Check for an empty-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/false);
-    break;
-  case ICmpInst::ICMP_ULE: 
-    Lower = APInt::getMinValue(BitWidth); ++Upper; 
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  case ICmpInst::ICMP_SLE: 
-    Lower = APInt::getSignedMinValue(BitWidth); ++Upper; 
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  case ICmpInst::ICMP_UGE:
-    Upper = APInt::getMinValue(BitWidth);        // Min = Next(Max)
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  case ICmpInst::ICMP_SGE:
-    Upper = APInt::getSignedMinValue(BitWidth);  // Min = Next(Max)
-    // Check for a full-set condition.
-    if (Lower == Upper)
-      return ConstantRange(BitWidth, /*isFullSet=*/true);
-    break;
-  }
-  return ConstantRange(Lower, Upper);
-}
-
 CmpInst::Predicate CmpInst::getSwappedPredicate(Predicate pred) {
   switch (pred) {
     default: llvm_unreachable("Unknown cmp predicate!");
diff --git a/contrib/llvm/lib/IR/IntrinsicInst.cpp b/contrib/llvm/lib/IR/IntrinsicInst.cpp
index 3f747117b728..240250662aec 100644
--- a/contrib/llvm/lib/IR/IntrinsicInst.cpp
+++ b/contrib/llvm/lib/IR/IntrinsicInst.cpp
@@ -25,6 +25,7 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -83,3 +84,12 @@ int llvm::Intrinsic::lookupLLVMIntrinsicByName(ArrayRef<const char *> NameTable,
     return LastLow - NameTable.begin();
   return -1;
 }
+
+Value *InstrProfIncrementInst::getStep() const {
+  if (InstrProfIncrementInstStep::classof(this)) {
+    return const_cast<Value *>(getArgOperand(4));
+  }
+  const Module *M = getModule();
+  LLVMContext &Context = M->getContext();
+  return ConstantInt::get(Type::getInt64Ty(Context), 1);
+}
diff --git a/contrib/llvm/lib/IR/LLVMContext.cpp b/contrib/llvm/lib/IR/LLVMContext.cpp
index d27fcfb1b7a8..dd66f144f04f 100644
--- a/contrib/llvm/lib/IR/LLVMContext.cpp
+++ b/contrib/llvm/lib/IR/LLVMContext.cpp
@@ -35,108 +35,36 @@ using namespace llvm;
 LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) {
   // Create the fixed metadata kinds. This is done in the same order as the
   // MD_* enum values so that they correspond.
-
-  // Create the 'dbg' metadata kind.
-  unsigned DbgID = getMDKindID("dbg");
-  assert(DbgID == MD_dbg && "dbg kind id drifted"); (void)DbgID;
-
-  // Create the 'tbaa' metadata kind.
-  unsigned TBAAID = getMDKindID("tbaa");
-  assert(TBAAID == MD_tbaa && "tbaa kind id drifted"); (void)TBAAID;
-
-  // Create the 'prof' metadata kind.
-  unsigned ProfID = getMDKindID("prof");
-  assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID;
-
-  // Create the 'fpmath' metadata kind.
-  unsigned FPAccuracyID = getMDKindID("fpmath");
-  assert(FPAccuracyID == MD_fpmath && "fpmath kind id drifted");
-  (void)FPAccuracyID;
-
-  // Create the 'range' metadata kind.
-  unsigned RangeID = getMDKindID("range");
-  assert(RangeID == MD_range && "range kind id drifted");
-  (void)RangeID;
-
-  // Create the 'tbaa.struct' metadata kind.
-  unsigned TBAAStructID = getMDKindID("tbaa.struct");
-  assert(TBAAStructID == MD_tbaa_struct && "tbaa.struct kind id drifted");
-  (void)TBAAStructID;
-
-  // Create the 'invariant.load' metadata kind.
-  unsigned InvariantLdId = getMDKindID("invariant.load");
-  assert(InvariantLdId == MD_invariant_load && "invariant.load kind id drifted");
-  (void)InvariantLdId;
-
-  // Create the 'alias.scope' metadata kind.
-  unsigned AliasScopeID = getMDKindID("alias.scope");
-  assert(AliasScopeID == MD_alias_scope && "alias.scope kind id drifted");
-  (void)AliasScopeID;
-
-  // Create the 'noalias' metadata kind.
-  unsigned NoAliasID = getMDKindID("noalias");
-  assert(NoAliasID == MD_noalias && "noalias kind id drifted");
-  (void)NoAliasID;
-
-  // Create the 'nontemporal' metadata kind.
-  unsigned NonTemporalID = getMDKindID("nontemporal");
-  assert(NonTemporalID == MD_nontemporal && "nontemporal kind id drifted");
-  (void)NonTemporalID;
-
-  // Create the 'llvm.mem.parallel_loop_access' metadata kind.
-  unsigned MemParallelLoopAccessID = getMDKindID("llvm.mem.parallel_loop_access");
-  assert(MemParallelLoopAccessID == MD_mem_parallel_loop_access &&
-         "mem_parallel_loop_access kind id drifted");
-  (void)MemParallelLoopAccessID;
-
-  // Create the 'nonnull' metadata kind.
-  unsigned NonNullID = getMDKindID("nonnull");
-  assert(NonNullID == MD_nonnull && "nonnull kind id drifted");
-  (void)NonNullID;
-  
-  // Create the 'dereferenceable' metadata kind.
-  unsigned DereferenceableID = getMDKindID("dereferenceable");
-  assert(DereferenceableID == MD_dereferenceable && 
-         "dereferenceable kind id drifted");
-  (void)DereferenceableID;
-  
-  // Create the 'dereferenceable_or_null' metadata kind.
-  unsigned DereferenceableOrNullID = getMDKindID("dereferenceable_or_null");
-  assert(DereferenceableOrNullID == MD_dereferenceable_or_null && 
-         "dereferenceable_or_null kind id drifted");
-  (void)DereferenceableOrNullID;
-
-  // Create the 'make.implicit' metadata kind.
-  unsigned MakeImplicitID = getMDKindID("make.implicit");
-  assert(MakeImplicitID == MD_make_implicit &&
-         "make.implicit kind id drifted");
-  (void)MakeImplicitID;
-
-  // Create the 'unpredictable' metadata kind.
-  unsigned UnpredictableID = getMDKindID("unpredictable");
-  assert(UnpredictableID == MD_unpredictable &&
-         "unpredictable kind id drifted");
-  (void)UnpredictableID;
-
-  // Create the 'invariant.group' metadata kind.
-  unsigned InvariantGroupId = getMDKindID("invariant.group");
-  assert(InvariantGroupId == MD_invariant_group &&
-         "invariant.group kind id drifted");
-  (void)InvariantGroupId;
-
-  // Create the 'align' metadata kind.
-  unsigned AlignID = getMDKindID("align");
-  assert(AlignID == MD_align && "align kind id drifted");
-  (void)AlignID;
-
-  // Create the 'llvm.loop' metadata kind.
-  unsigned LoopID = getMDKindID("llvm.loop");
-  assert(LoopID == MD_loop && "llvm.loop kind id drifted");
-  (void)LoopID;
-
-  unsigned TypeID = getMDKindID("type");
-  assert(TypeID == MD_type && "type kind id drifted");
-  (void)TypeID;
+  std::pair<unsigned, StringRef> MDKinds[] = {
+    {MD_dbg, "dbg"},
+    {MD_tbaa, "tbaa"},
+    {MD_prof, "prof"},
+    {MD_fpmath, "fpmath"},
+    {MD_range, "range"},
+    {MD_tbaa_struct, "tbaa.struct"},
+    {MD_invariant_load, "invariant.load"},
+    {MD_alias_scope, "alias.scope"},
+    {MD_noalias, "noalias"},
+    {MD_nontemporal, "nontemporal"},
+    {MD_mem_parallel_loop_access, "llvm.mem.parallel_loop_access"},
+    {MD_nonnull, "nonnull"},
+    {MD_dereferenceable, "dereferenceable"},
+    {MD_dereferenceable_or_null, "dereferenceable_or_null"},
+    {MD_make_implicit, "make.implicit"},
+    {MD_unpredictable, "unpredictable"},
+    {MD_invariant_group, "invariant.group"},
+    {MD_align, "align"},
+    {MD_loop, "llvm.loop"},
+    {MD_type, "type"},
+    {MD_section_prefix, "section_prefix"},
+    {MD_absolute_symbol, "absolute_symbol"},
+  };
+
+  for (auto &MDKind : MDKinds) {
+    unsigned ID = getMDKindID(MDKind.second);
+    assert(ID == MDKind.first && "metadata kind id drifted");
+    (void)ID;
+  }
 
   auto *DeoptEntry = pImpl->getOrInsertBundleTag("deopt");
   assert(DeoptEntry->second == LLVMContext::OB_deopt &&
@@ -203,6 +131,14 @@ bool LLVMContext::getDiagnosticHotnessRequested() const {
   return pImpl->DiagnosticHotnessRequested;
 }
 
+yaml::Output *LLVMContext::getDiagnosticsOutputFile() {
+  return pImpl->DiagnosticsOutputFile.get();
+}
+
+void LLVMContext::setDiagnosticsOutputFile(std::unique_ptr<yaml::Output> F) {
+  pImpl->DiagnosticsOutputFile = std::move(F);
+}
+
 LLVMContext::DiagnosticHandlerTy LLVMContext::getDiagnosticHandler() const {
   return pImpl->DiagnosticHandler;
 }
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.cpp b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
index b0b2c61bdf1f..c43356c53826 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.cpp
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.cpp
@@ -94,12 +94,13 @@ LLVMContextImpl::~LLVMContextImpl() {
   ArrayConstants.freeConstants();
   StructConstants.freeConstants();
   VectorConstants.freeConstants();
-  DeleteContainerSeconds(CAZConstants);
-  DeleteContainerSeconds(CPNConstants);
-  DeleteContainerSeconds(UVConstants);
   InlineAsms.freeConstants();
-  DeleteContainerSeconds(IntConstants);
-  DeleteContainerSeconds(FPConstants);
+
+  CAZConstants.clear();
+  CPNConstants.clear();
+  UVConstants.clear();
+  IntConstants.clear();
+  FPConstants.clear();
 
   for (auto &CDSConstant : CDSConstants)
     delete CDSConstant.second;
diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h
index 7820e2ab958d..e9e30ef0656f 100644
--- a/contrib/llvm/lib/IR/LLVMContextImpl.h
+++ b/contrib/llvm/lib/IR/LLVMContextImpl.h
@@ -33,6 +33,7 @@
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/YAMLTraits.h"
 #include <vector>
 
 namespace llvm {
@@ -67,8 +68,8 @@ struct DenseMapAPIntKeyInfo {
 };
 
 struct DenseMapAPFloatKeyInfo {
-  static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus, 1); }
-  static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus, 2); }
+  static inline APFloat getEmptyKey() { return APFloat(APFloat::Bogus(), 1); }
+  static inline APFloat getTombstoneKey() { return APFloat(APFloat::Bogus(), 2); }
   static unsigned getHashValue(const APFloat &Key) {
     return static_cast<unsigned>(hash_value(Key));
   }
@@ -318,11 +319,11 @@ template <> struct MDNodeKeyImpl<DIBasicType> {
   unsigned Tag;
   MDString *Name;
   uint64_t SizeInBits;
-  uint64_t AlignInBits;
+  uint32_t AlignInBits;
   unsigned Encoding;
 
   MDNodeKeyImpl(unsigned Tag, MDString *Name, uint64_t SizeInBits,
-                uint64_t AlignInBits, unsigned Encoding)
+                uint32_t AlignInBits, unsigned Encoding)
       : Tag(Tag), Name(Name), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
         Encoding(Encoding) {}
   MDNodeKeyImpl(const DIBasicType *N)
@@ -348,23 +349,23 @@ template <> struct MDNodeKeyImpl<DIDerivedType> {
   Metadata *Scope;
   Metadata *BaseType;
   uint64_t SizeInBits;
-  uint64_t AlignInBits;
   uint64_t OffsetInBits;
+  uint32_t AlignInBits;
   unsigned Flags;
   Metadata *ExtraData;
 
   MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
                 Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
-                uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+                uint32_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
                 Metadata *ExtraData)
       : Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
-        BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
-        OffsetInBits(OffsetInBits), Flags(Flags), ExtraData(ExtraData) {}
+        BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
+        AlignInBits(AlignInBits), Flags(Flags), ExtraData(ExtraData) {}
   MDNodeKeyImpl(const DIDerivedType *N)
       : Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
         Line(N->getLine()), Scope(N->getRawScope()),
         BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
-        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()),
         Flags(N->getFlags()), ExtraData(N->getRawExtraData()) {}
 
   bool isKeyOf(const DIDerivedType *RHS) const {
@@ -429,8 +430,8 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
   Metadata *Scope;
   Metadata *BaseType;
   uint64_t SizeInBits;
-  uint64_t AlignInBits;
   uint64_t OffsetInBits;
+  uint32_t AlignInBits;
   unsigned Flags;
   Metadata *Elements;
   unsigned RuntimeLang;
@@ -440,20 +441,20 @@ template <> struct MDNodeKeyImpl<DICompositeType> {
 
   MDNodeKeyImpl(unsigned Tag, MDString *Name, Metadata *File, unsigned Line,
                 Metadata *Scope, Metadata *BaseType, uint64_t SizeInBits,
-                uint64_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
+                uint32_t AlignInBits, uint64_t OffsetInBits, unsigned Flags,
                 Metadata *Elements, unsigned RuntimeLang,
                 Metadata *VTableHolder, Metadata *TemplateParams,
                 MDString *Identifier)
       : Tag(Tag), Name(Name), File(File), Line(Line), Scope(Scope),
-        BaseType(BaseType), SizeInBits(SizeInBits), AlignInBits(AlignInBits),
-        OffsetInBits(OffsetInBits), Flags(Flags), Elements(Elements),
+        BaseType(BaseType), SizeInBits(SizeInBits), OffsetInBits(OffsetInBits),
+        AlignInBits(AlignInBits), Flags(Flags), Elements(Elements),
         RuntimeLang(RuntimeLang), VTableHolder(VTableHolder),
         TemplateParams(TemplateParams), Identifier(Identifier) {}
   MDNodeKeyImpl(const DICompositeType *N)
       : Tag(N->getTag()), Name(N->getRawName()), File(N->getRawFile()),
         Line(N->getLine()), Scope(N->getRawScope()),
         BaseType(N->getRawBaseType()), SizeInBits(N->getSizeInBits()),
-        AlignInBits(N->getAlignInBits()), OffsetInBits(N->getOffsetInBits()),
+        OffsetInBits(N->getOffsetInBits()), AlignInBits(N->getAlignInBits()),
         Flags(N->getFlags()), Elements(N->getRawElements()),
         RuntimeLang(N->getRuntimeLang()), VTableHolder(N->getRawVTableHolder()),
         TemplateParams(N->getRawTemplateParams()),
@@ -502,17 +503,26 @@ template <> struct MDNodeKeyImpl<DISubroutineType> {
 template <> struct MDNodeKeyImpl<DIFile> {
   MDString *Filename;
   MDString *Directory;
+  DIFile::ChecksumKind CSKind;
+  MDString *Checksum;
 
-  MDNodeKeyImpl(MDString *Filename, MDString *Directory)
-      : Filename(Filename), Directory(Directory) {}
+  MDNodeKeyImpl(MDString *Filename, MDString *Directory,
+                DIFile::ChecksumKind CSKind, MDString *Checksum)
+      : Filename(Filename), Directory(Directory), CSKind(CSKind),
+        Checksum(Checksum) {}
   MDNodeKeyImpl(const DIFile *N)
-      : Filename(N->getRawFilename()), Directory(N->getRawDirectory()) {}
+      : Filename(N->getRawFilename()), Directory(N->getRawDirectory()),
+        CSKind(N->getChecksumKind()), Checksum(N->getRawChecksum()) {}
 
   bool isKeyOf(const DIFile *RHS) const {
     return Filename == RHS->getRawFilename() &&
-           Directory == RHS->getRawDirectory();
+           Directory == RHS->getRawDirectory() &&
+           CSKind == RHS->getChecksumKind() &&
+           Checksum == RHS->getRawChecksum();
+  }
+  unsigned getHashValue() const {
+    return hash_combine(Filename, Directory, CSKind, Checksum);
   }
-  unsigned getHashValue() const { return hash_combine(Filename, Directory); }
 };
 
 template <> struct MDNodeKeyImpl<DISubprogram> {
@@ -673,16 +683,20 @@ template <> struct MDNodeKeyImpl<DINamespace> {
   Metadata *File;
   MDString *Name;
   unsigned Line;
+  bool ExportSymbols;
 
-  MDNodeKeyImpl(Metadata *Scope, Metadata *File, MDString *Name, unsigned Line)
-      : Scope(Scope), File(File), Name(Name), Line(Line) {}
+  MDNodeKeyImpl(Metadata *Scope, Metadata *File, MDString *Name, unsigned Line,
+                bool ExportSymbols)
+      : Scope(Scope), File(File), Name(Name), Line(Line),
+        ExportSymbols(ExportSymbols) {}
   MDNodeKeyImpl(const DINamespace *N)
       : Scope(N->getRawScope()), File(N->getRawFile()), Name(N->getRawName()),
-        Line(N->getLine()) {}
+        Line(N->getLine()), ExportSymbols(N->getExportSymbols()) {}
 
   bool isKeyOf(const DINamespace *RHS) const {
     return Scope == RHS->getRawScope() && File == RHS->getRawFile() &&
-           Name == RHS->getRawName() && Line == RHS->getLine();
+           Name == RHS->getRawName() && Line == RHS->getLine() &&
+           ExportSymbols == RHS->getExportSymbols();
   }
   unsigned getHashValue() const {
     return hash_combine(Scope, File, Name, Line);
@@ -758,24 +772,25 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
   Metadata *Type;
   bool IsLocalToUnit;
   bool IsDefinition;
-  Metadata *Variable;
   Metadata *StaticDataMemberDeclaration;
+  uint32_t AlignInBits;
 
   MDNodeKeyImpl(Metadata *Scope, MDString *Name, MDString *LinkageName,
                 Metadata *File, unsigned Line, Metadata *Type,
-                bool IsLocalToUnit, bool IsDefinition, Metadata *Variable,
-                Metadata *StaticDataMemberDeclaration)
+                bool IsLocalToUnit, bool IsDefinition,
+                Metadata *StaticDataMemberDeclaration, uint32_t AlignInBits)
       : Scope(Scope), Name(Name), LinkageName(LinkageName), File(File),
         Line(Line), Type(Type), IsLocalToUnit(IsLocalToUnit),
-        IsDefinition(IsDefinition), Variable(Variable),
-        StaticDataMemberDeclaration(StaticDataMemberDeclaration) {}
+        IsDefinition(IsDefinition),
+        StaticDataMemberDeclaration(StaticDataMemberDeclaration),
+        AlignInBits(AlignInBits) {}
   MDNodeKeyImpl(const DIGlobalVariable *N)
       : Scope(N->getRawScope()), Name(N->getRawName()),
         LinkageName(N->getRawLinkageName()), File(N->getRawFile()),
         Line(N->getLine()), Type(N->getRawType()),
         IsLocalToUnit(N->isLocalToUnit()), IsDefinition(N->isDefinition()),
-        Variable(N->getRawVariable()),
-        StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()) {}
+        StaticDataMemberDeclaration(N->getRawStaticDataMemberDeclaration()),
+        AlignInBits(N->getAlignInBits()) {}
 
   bool isKeyOf(const DIGlobalVariable *RHS) const {
     return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
@@ -783,13 +798,20 @@ template <> struct MDNodeKeyImpl<DIGlobalVariable> {
            File == RHS->getRawFile() && Line == RHS->getLine() &&
            Type == RHS->getRawType() && IsLocalToUnit == RHS->isLocalToUnit() &&
            IsDefinition == RHS->isDefinition() &&
-           Variable == RHS->getRawVariable() &&
            StaticDataMemberDeclaration ==
-               RHS->getRawStaticDataMemberDeclaration();
+               RHS->getRawStaticDataMemberDeclaration() &&
+           AlignInBits == RHS->getAlignInBits();
   }
   unsigned getHashValue() const {
+    // We do not use AlignInBits in hashing function here on purpose:
+    // in most cases this param for local variable is zero (for function param
+    // it is always zero). This leads to lots of hash collisions and errors on
+    // cases with lots of similar variables.
+    // clang/test/CodeGen/debug-info-257-args.c is an example of this problem,
+    // generated IR is random for each run and test fails with Align included.
+    // TODO: make hashing work fine with such situations
     return hash_combine(Scope, Name, LinkageName, File, Line, Type,
-                        IsLocalToUnit, IsDefinition, Variable,
+                        IsLocalToUnit, IsDefinition, /* AlignInBits, */
                         StaticDataMemberDeclaration);
   }
 };
@@ -802,23 +824,32 @@ template <> struct MDNodeKeyImpl<DILocalVariable> {
   Metadata *Type;
   unsigned Arg;
   unsigned Flags;
+  uint32_t AlignInBits;
 
   MDNodeKeyImpl(Metadata *Scope, MDString *Name, Metadata *File, unsigned Line,
-                Metadata *Type, unsigned Arg, unsigned Flags)
+                Metadata *Type, unsigned Arg, unsigned Flags,
+                uint32_t AlignInBits)
       : Scope(Scope), Name(Name), File(File), Line(Line), Type(Type), Arg(Arg),
-        Flags(Flags) {}
+        Flags(Flags), AlignInBits(AlignInBits) {}
   MDNodeKeyImpl(const DILocalVariable *N)
       : Scope(N->getRawScope()), Name(N->getRawName()), File(N->getRawFile()),
         Line(N->getLine()), Type(N->getRawType()), Arg(N->getArg()),
-        Flags(N->getFlags()) {}
+        Flags(N->getFlags()), AlignInBits(N->getAlignInBits()) {}
 
   bool isKeyOf(const DILocalVariable *RHS) const {
     return Scope == RHS->getRawScope() && Name == RHS->getRawName() &&
            File == RHS->getRawFile() && Line == RHS->getLine() &&
            Type == RHS->getRawType() && Arg == RHS->getArg() &&
-           Flags == RHS->getFlags();
+           Flags == RHS->getFlags() && AlignInBits == RHS->getAlignInBits();
   }
   unsigned getHashValue() const {
+    // We do not use AlignInBits in hashing function here on purpose:
+    // in most cases this param for local variable is zero (for function param
+    // it is always zero). This leads to lots of hash collisions and errors on
+    // cases with lots of similar variables.
+    // clang/test/CodeGen/debug-info-257-args.c is an example of this problem,
+    // generated IR is random for each run and test fails with Align included.
+    // TODO: make hashing work fine with such situations
     return hash_combine(Scope, Name, File, Line, Type, Arg, Flags);
   }
 };
@@ -837,6 +868,22 @@ template <> struct MDNodeKeyImpl<DIExpression> {
   }
 };
 
+template <> struct MDNodeKeyImpl<DIGlobalVariableExpression> {
+  Metadata *Variable;
+  Metadata *Expression;
+
+  MDNodeKeyImpl(Metadata *Variable, Metadata *Expression)
+      : Variable(Variable), Expression(Expression) {}
+  MDNodeKeyImpl(const DIGlobalVariableExpression *N)
+      : Variable(N->getRawVariable()), Expression(N->getRawExpression()) {}
+
+  bool isKeyOf(const DIGlobalVariableExpression *RHS) const {
+    return Variable == RHS->getRawVariable() &&
+           Expression == RHS->getRawExpression();
+  }
+  unsigned getHashValue() const { return hash_combine(Variable, Expression); }
+};
+
 template <> struct MDNodeKeyImpl<DIObjCProperty> {
   MDString *Name;
   Metadata *File;
@@ -928,7 +975,7 @@ template <> struct MDNodeKeyImpl<DIMacroFile> {
 
   bool isKeyOf(const DIMacroFile *RHS) const {
     return MIType == RHS->getMacinfoType() && Line == RHS->getLine() &&
-           File == RHS->getRawFile() && File == RHS->getRawElements();
+           File == RHS->getRawFile() && Elements == RHS->getRawElements();
   }
   unsigned getHashValue() const {
     return hash_combine(MIType, Line, File, Elements);
@@ -998,9 +1045,8 @@ public:
   ///
   /// Erases all attachments matching the \c shouldRemove predicate.
   template <class PredTy> void remove_if(PredTy shouldRemove) {
-    Attachments.erase(
-        std::remove_if(Attachments.begin(), Attachments.end(), shouldRemove),
-        Attachments.end());
+    Attachments.erase(llvm::remove_if(Attachments, shouldRemove),
+                      Attachments.end());
   }
 };
 
@@ -1044,14 +1090,17 @@ public:
   void *DiagnosticContext;
   bool RespectDiagnosticFilters;
   bool DiagnosticHotnessRequested;
+  std::unique_ptr<yaml::Output> DiagnosticsOutputFile;
 
   LLVMContext::YieldCallbackTy YieldCallback;
   void *YieldOpaqueHandle;
 
-  typedef DenseMap<APInt, ConstantInt *, DenseMapAPIntKeyInfo> IntMapTy;
+  typedef DenseMap<APInt, std::unique_ptr<ConstantInt>, DenseMapAPIntKeyInfo>
+      IntMapTy;
   IntMapTy IntConstants;
 
-  typedef DenseMap<APFloat, ConstantFP *, DenseMapAPFloatKeyInfo> FPMapTy;
+  typedef DenseMap<APFloat, std::unique_ptr<ConstantFP>, DenseMapAPFloatKeyInfo>
+      FPMapTy;
   FPMapTy FPConstants;
 
   FoldingSet<AttributeImpl> AttrsSet;
@@ -1077,7 +1126,7 @@ public:
   // them on context teardown.
   std::vector<MDNode *> DistinctMDNodes;
 
-  DenseMap<Type*, ConstantAggregateZero*> CAZConstants;
+  DenseMap<Type *, std::unique_ptr<ConstantAggregateZero>> CAZConstants;
 
   typedef ConstantUniqueMap<ConstantArray> ArrayConstantsTy;
   ArrayConstantsTy ArrayConstants;
@@ -1087,11 +1136,11 @@ public:
   
   typedef ConstantUniqueMap<ConstantVector> VectorConstantsTy;
   VectorConstantsTy VectorConstants;
-  
-  DenseMap<PointerType*, ConstantPointerNull*> CPNConstants;
 
-  DenseMap<Type*, UndefValue*> UVConstants;
-  
+  DenseMap<PointerType *, std::unique_ptr<ConstantPointerNull>> CPNConstants;
+
+  DenseMap<Type *, std::unique_ptr<UndefValue>> UVConstants;
+
   StringMap<ConstantDataSequential*> CDSConstants;
 
   DenseMap<std::pair<const Function *, const BasicBlock *>, BlockAddress *>
diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp
index 8f71d822d271..628a67bd639c 100644
--- a/contrib/llvm/lib/IR/LegacyPassManager.cpp
+++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp
@@ -11,19 +11,19 @@
 //
 //===----------------------------------------------------------------------===//
 
-
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/LegacyPassManagers.h"
 #include "llvm/IR/LegacyPassNameParser.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
-#include "llvm/Support/TimeValue.h"
 #include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
@@ -56,8 +56,7 @@ PassDebugging("debug-pass", cl::Hidden,
   clEnumVal(Arguments , "print pass arguments to pass to 'opt'"),
   clEnumVal(Structure , "print pass structure before run()"),
   clEnumVal(Executions, "print pass name before it is executed"),
-  clEnumVal(Details   , "print pass details when it is executed"),
-                             clEnumValEnd));
+  clEnumVal(Details   , "print pass details when it is executed")));
 
 namespace {
 typedef llvm::cl::list<const llvm::PassInfo *, bool, PassNameParser>
@@ -193,9 +192,7 @@ public:
   PMDataManager *getAsPMDataManager() override { return this; }
   Pass *getAsPass() override { return this; }
 
-  const char *getPassName() const override {
-    return "BasicBlock Pass Manager";
-  }
+  StringRef getPassName() const override { return "BasicBlock Pass Manager"; }
 
   // Print passes managed by this manager
   void dumpPassStructure(unsigned Offset) override {
@@ -340,9 +337,7 @@ public:
   /// its runOnFunction() for function F.
   Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F) override;
 
-  const char *getPassName() const override {
-    return "Module Pass Manager";
-  }
+  StringRef getPassName() const override { return "Module Pass Manager"; }
 
   PMDataManager *getAsPMDataManager() override { return this; }
   Pass *getAsPass() override { return this; }
@@ -454,7 +449,7 @@ class TimingInfo {
   TimerGroup TG;
 public:
   // Use 'create' member to get this.
-  TimingInfo() : TG("... Pass execution timing report ...") {}
+  TimingInfo() : TG("pass", "... Pass execution timing report ...") {}
 
   // TimingDtor - Print out information about timing information
   ~TimingInfo() {
@@ -477,8 +472,10 @@ public:
 
     sys::SmartScopedLock<true> Lock(*TimingInfoMutex);
     Timer *&T = TimingData[P];
-    if (!T)
-      T = new Timer(P->getPassName(), TG);
+    if (!T) {
+      StringRef PassName = P->getPassName();
+      T = new Timer(PassName, PassName, TG);
+    }
     return T;
   }
 };
@@ -538,12 +535,11 @@ PMTopLevelManager::setLastUser(ArrayRef<Pass*> AnalysisPasses, Pass *P) {
 
     // If AP is the last user of other passes then make P last user of
     // such passes.
-    for (DenseMap<Pass *, Pass *>::iterator LUI = LastUser.begin(),
-           LUE = LastUser.end(); LUI != LUE; ++LUI) {
-      if (LUI->second == AP)
+    for (auto LU : LastUser) {
+      if (LU.second == AP)
         // DenseMap iterator is not invalidated here because
         // this is just updating existing entries.
-        LastUser[LUI->first] = P;
+        LastUser[LU.first] = P;
     }
   }
 }
@@ -684,7 +680,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
 
   if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) {
     Pass *PP = P->createPrinterPass(
-      dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***");
+        dbgs(), ("*** IR Dump Before " + P->getPassName() + " ***").str());
     PP->assignPassManager(activeStack, getTopLevelPassManagerType());
   }
 
@@ -693,7 +689,7 @@ void PMTopLevelManager::schedulePass(Pass *P) {
 
   if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) {
     Pass *PP = P->createPrinterPass(
-      dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***");
+        dbgs(), ("*** IR Dump After " + P->getPassName() + " ***").str());
     PP->assignPassManager(activeStack, getTopLevelPassManagerType());
   }
 }
@@ -793,10 +789,9 @@ void PMTopLevelManager::initializeAllAnalysisInfo() {
   for (PMDataManager *IPM : IndirectPassManagers)
     IPM->initializeAnalysisInfo();
 
-  for (DenseMap<Pass *, Pass *>::iterator DMI = LastUser.begin(),
-        DME = LastUser.end(); DMI != DME; ++DMI) {
-    SmallPtrSet<Pass *, 8> &L = InversedLastUser[DMI->second];
-    L.insert(DMI->first);
+  for (auto LU : LastUser) {
+    SmallPtrSet<Pass *, 8> &L = InversedLastUser[LU.second];
+    L.insert(LU.first);
   }
 }
 
@@ -837,13 +832,9 @@ bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) {
     return true;
 
   const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
-  for (SmallVectorImpl<Pass *>::iterator I = HigherLevelAnalysis.begin(),
-         E = HigherLevelAnalysis.end(); I  != E; ++I) {
-    Pass *P1 = *I;
+  for (Pass *P1 : HigherLevelAnalysis) {
     if (P1->getAsImmutablePass() == nullptr &&
-        std::find(PreservedSet.begin(), PreservedSet.end(),
-                  P1->getPassID()) ==
-           PreservedSet.end())
+        !is_contained(PreservedSet, P1->getPassID()))
       return false;
   }
 
@@ -860,9 +851,7 @@ void PMDataManager::verifyPreservedAnalysis(Pass *P) {
   const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet();
 
   // Verify preserved analysis
-  for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(),
-         E = PreservedSet.end(); I != E; ++I) {
-    AnalysisID AID = *I;
+  for (AnalysisID AID : PreservedSet) {
     if (Pass *AP = findAnalysisPass(AID, true)) {
       TimeRegion PassTimer(getPassTimer(AP));
       AP->verifyAnalysis();
@@ -881,8 +870,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
          E = AvailableAnalysis.end(); I != E; ) {
     DenseMap<AnalysisID, Pass*>::iterator Info = I++;
     if (Info->second->getAsImmutablePass() == nullptr &&
-        std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
-        PreservedSet.end()) {
+        !is_contained(PreservedSet, Info->first)) {
       // Remove this analysis
       if (PassDebugging >= Details) {
         Pass *S = Info->second;
@@ -905,8 +893,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
            E = InheritedAnalysis[Index]->end(); I != E; ) {
       DenseMap<AnalysisID, Pass *>::iterator Info = I++;
       if (Info->second->getAsImmutablePass() == nullptr &&
-          std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
-             PreservedSet.end()) {
+          !is_contained(PreservedSet, Info->first)) {
         // Remove this analysis
         if (PassDebugging >= Details) {
           Pass *S = Info->second;
@@ -937,9 +924,8 @@ void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
     dbgs() << " Free these instances\n";
   }
 
-  for (SmallVectorImpl<Pass *>::iterator I = DeadPasses.begin(),
-         E = DeadPasses.end(); I != E; ++I)
-    freePass(*I, Msg, DBG_STR);
+  for (Pass *P : DeadPasses)
+    freePass(P, Msg, DBG_STR);
 }
 
 void PMDataManager::freePass(Pass *P, StringRef Msg,
@@ -1145,7 +1131,7 @@ void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
                                  StringRef Msg) {
   if (PassDebugging < Executions)
     return;
-  dbgs() << "[" << sys::TimeValue::now().str() << "] " << (void *)this
+  dbgs() << "[" << std::chrono::system_clock::now() << "] " << (void *)this
          << std::string(getDepth() * 2 + 1, ' ');
   switch (S1) {
   case EXECUTION_MSG:
@@ -1394,8 +1380,9 @@ void FunctionPassManager::add(Pass *P) {
 /// so, return true.
 ///
 bool FunctionPassManager::run(Function &F) {
-  if (std::error_code EC = F.materialize())
-    report_fatal_error("Error reading bitcode file: " + EC.message());
+  handleAllErrors(F.materialize(), [&](ErrorInfoBase &EIB) {
+    report_fatal_error("Error reading bitcode file: " + EIB.message());
+  });
   return FPM->run(F);
 }
 
diff --git a/contrib/llvm/lib/IR/MDBuilder.cpp b/contrib/llvm/lib/IR/MDBuilder.cpp
index a5a4cd06db09..f4bfd5992151 100644
--- a/contrib/llvm/lib/IR/MDBuilder.cpp
+++ b/contrib/llvm/lib/IR/MDBuilder.cpp
@@ -63,6 +63,12 @@ MDNode *MDBuilder::createFunctionEntryCount(uint64_t Count) {
                       createConstant(ConstantInt::get(Int64Ty, Count))});
 }
 
+MDNode *MDBuilder::createFunctionSectionPrefix(StringRef Prefix) {
+  return MDNode::get(Context,
+                     {createString("function_section_prefix"),
+                      createString(Prefix)});
+}
+
 MDNode *MDBuilder::createRange(const APInt &Lo, const APInt &Hi) {
   assert(Lo.getBitWidth() == Hi.getBitWidth() && "Mismatched bitwidths!");
 
diff --git a/contrib/llvm/lib/IR/Mangler.cpp b/contrib/llvm/lib/IR/Mangler.cpp
index ddf024df8ccc..41e11b3945e4 100644
--- a/contrib/llvm/lib/IR/Mangler.cpp
+++ b/contrib/llvm/lib/IR/Mangler.cpp
@@ -121,7 +121,7 @@ void Mangler::getNameWithPrefix(raw_ostream &OS, const GlobalValue *GV,
     // already.
     unsigned &ID = AnonGlobalIDs[GV];
     if (ID == 0)
-      ID = NextAnonGlobalID++;
+      ID = AnonGlobalIDs.size();
 
     // Must mangle the global into a unique ID.
     getNameWithPrefixImpl(OS, "__unnamed_" + Twine(ID), DL, PrefixTy);
diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp
index f35c64b27b5b..1d1930459239 100644
--- a/contrib/llvm/lib/IR/Metadata.cpp
+++ b/contrib/llvm/lib/IR/Metadata.cpp
@@ -16,6 +16,7 @@
 #include "MetadataImpl.h"
 #include "SymbolTableListTraitsImpl.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/IR/ConstantRange.h"
@@ -313,8 +314,7 @@ ValueAsMetadata *ValueAsMetadata::get(Value *V) {
   if (!Entry) {
     assert((isa<Constant>(V) || isa<Argument>(V) || isa<Instruction>(V)) &&
            "Expected constant or function-local value");
-    assert(!V->IsUsedByMD &&
-           "Expected this to be the only metadata use");
+    assert(!V->IsUsedByMD && "Expected this to be the only metadata use");
     V->IsUsedByMD = true;
     if (auto *C = dyn_cast<Constant>(V))
       Entry = new ConstantAsMetadata(C);
@@ -359,14 +359,12 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) {
   auto &Store = Context.pImpl->ValuesAsMetadata;
   auto I = Store.find(From);
   if (I == Store.end()) {
-    assert(!From->IsUsedByMD &&
-           "Expected From not to be used by metadata");
+    assert(!From->IsUsedByMD && "Expected From not to be used by metadata");
     return;
   }
 
   // Remove old entry from the map.
-  assert(From->IsUsedByMD &&
-         "Expected From to be used by metadata");
+  assert(From->IsUsedByMD && "Expected From to be used by metadata");
   From->IsUsedByMD = false;
   ValueAsMetadata *MD = I->second;
   assert(MD && "Expected valid metadata");
@@ -403,8 +401,7 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) {
   }
 
   // Update MD in place (and update the map entry).
-  assert(!To->IsUsedByMD &&
-         "Expected this to be the only metadata use");
+  assert(!To->IsUsedByMD && "Expected this to be the only metadata use");
   To->IsUsedByMD = true;
   MD->V = To;
   Entry = MD;
@@ -416,7 +413,7 @@ void ValueAsMetadata::handleRAUW(Value *From, Value *To) {
 
 MDString *MDString::get(LLVMContext &Context, StringRef Str) {
   auto &Store = Context.pImpl->MDStringCache;
-  auto I = Store.emplace_second(Str);
+  auto I = Store.try_emplace(Str);
   auto &MapEntry = I.first->getValue();
   if (!I.second)
     return &MapEntry;
@@ -437,7 +434,7 @@ StringRef MDString::getString() const {
 // prepended to them.
 #define HANDLE_MDNODE_LEAF(CLASS)                                              \
   static_assert(                                                               \
-      llvm::AlignOf<uint64_t>::Alignment >= llvm::AlignOf<CLASS>::Alignment,   \
+      alignof(uint64_t) >= alignof(CLASS),                                     \
       "Alignment is insufficient after objects prepended to " #CLASS);
 #include "llvm/IR/Metadata.def"
 
@@ -445,7 +442,7 @@ void *MDNode::operator new(size_t Size, unsigned NumOps) {
   size_t OpSize = NumOps * sizeof(MDOperand);
   // uint64_t is the most aligned type we need support (ensured by static_assert
   // above)
-  OpSize = alignTo(OpSize, llvm::alignOf<uint64_t>());
+  OpSize = alignTo(OpSize, alignof(uint64_t));
   void *Ptr = reinterpret_cast<char *>(::operator new(OpSize + Size)) + OpSize;
   MDOperand *O = static_cast<MDOperand *>(Ptr);
   for (MDOperand *E = O - NumOps; O != E; --O)
@@ -456,7 +453,7 @@ void *MDNode::operator new(size_t Size, unsigned NumOps) {
 void MDNode::operator delete(void *Mem) {
   MDNode *N = static_cast<MDNode *>(Mem);
   size_t OpSize = N->NumOperands * sizeof(MDOperand);
-  OpSize = alignTo(OpSize, llvm::alignOf<uint64_t>());
+  OpSize = alignTo(OpSize, alignof(uint64_t));
 
   MDOperand *O = static_cast<MDOperand *>(Mem);
   for (MDOperand *E = O - N->NumOperands; O != E; --O)
@@ -862,42 +859,32 @@ MDNode *MDNode::concatenate(MDNode *A, MDNode *B) {
   if (!B)
     return A;
 
-  SmallVector<Metadata *, 4> MDs;
-  MDs.reserve(A->getNumOperands() + B->getNumOperands());
-  MDs.append(A->op_begin(), A->op_end());
-  MDs.append(B->op_begin(), B->op_end());
+  SmallSetVector<Metadata *, 4> MDs(A->op_begin(), A->op_end());
+  MDs.insert(B->op_begin(), B->op_end());
 
   // FIXME: This preserves long-standing behaviour, but is it really the right
   // behaviour?  Or was that an unintended side-effect of node uniquing?
-  return getOrSelfReference(A->getContext(), MDs);
+  return getOrSelfReference(A->getContext(), MDs.getArrayRef());
 }
 
 MDNode *MDNode::intersect(MDNode *A, MDNode *B) {
   if (!A || !B)
     return nullptr;
 
-  SmallVector<Metadata *, 4> MDs;
-  for (Metadata *MD : A->operands())
-    if (std::find(B->op_begin(), B->op_end(), MD) != B->op_end())
-      MDs.push_back(MD);
+  SmallSetVector<Metadata *, 4> MDs(A->op_begin(), A->op_end());
+  SmallPtrSet<Metadata *, 4> BSet(B->op_begin(), B->op_end());
+  MDs.remove_if([&](Metadata *MD) { return !is_contained(BSet, MD); });
 
   // FIXME: This preserves long-standing behaviour, but is it really the right
   // behaviour?  Or was that an unintended side-effect of node uniquing?
-  return getOrSelfReference(A->getContext(), MDs);
+  return getOrSelfReference(A->getContext(), MDs.getArrayRef());
 }
 
 MDNode *MDNode::getMostGenericAliasScope(MDNode *A, MDNode *B) {
   if (!A || !B)
     return nullptr;
 
-  SmallVector<Metadata *, 4> MDs(B->op_begin(), B->op_end());
-  for (Metadata *MD : A->operands())
-    if (std::find(B->op_begin(), B->op_end(), MD) == B->op_end())
-      MDs.push_back(MD);
-
-  // FIXME: This preserves long-standing behaviour, but is it really the right
-  // behaviour?  Or was that an unintended side-effect of node uniquing?
-  return getOrSelfReference(A->getContext(), MDs);
+  return concatenate(A, B);
 }
 
 MDNode *MDNode::getMostGenericFPMath(MDNode *A, MDNode *B) {
@@ -1065,17 +1052,11 @@ void NamedMDNode::setOperand(unsigned I, MDNode *New) {
   getNMDOps(Operands)[I].reset(New);
 }
 
-void NamedMDNode::eraseFromParent() {
-  getParent()->eraseNamedMetadata(this);
-}
+void NamedMDNode::eraseFromParent() { getParent()->eraseNamedMetadata(this); }
 
-void NamedMDNode::dropAllReferences() {
-  getNMDOps(Operands).clear();
-}
+void NamedMDNode::clearOperands() { getNMDOps(Operands).clear(); }
 
-StringRef NamedMDNode::getName() const {
-  return StringRef(Name);
-}
+StringRef NamedMDNode::getName() const { return StringRef(Name); }
 
 //===----------------------------------------------------------------------===//
 // Instruction Metadata method implementations.
@@ -1173,14 +1154,13 @@ MDNode *Instruction::getMetadataImpl(StringRef Kind) const {
 }
 
 void Instruction::dropUnknownNonDebugMetadata(ArrayRef<unsigned> KnownIDs) {
-  SmallSet<unsigned, 5> KnownSet;
-  KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
-
   if (!hasMetadataHashEntry())
     return; // Nothing to remove!
 
   auto &InstructionMetadata = getContext().pImpl->InstructionMetadata;
 
+  SmallSet<unsigned, 4> KnownSet;
+  KnownSet.insert(KnownIDs.begin(), KnownIDs.end());
   if (KnownSet.empty()) {
     // Just drop our entry at the store.
     InstructionMetadata.erase(this);
@@ -1209,7 +1189,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
     DbgLoc = DebugLoc(Node);
     return;
   }
-  
+
   // Handle the case when we're adding/updating metadata on an instruction.
   if (Node) {
     auto &Info = getContext().pImpl->InstructionMetadata[this];
@@ -1226,7 +1206,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) {
           (getContext().pImpl->InstructionMetadata.count(this) > 0)) &&
          "HasMetadata bit out of date!");
   if (!hasMetadataHashEntry())
-    return;  // Nothing to remove!
+    return; // Nothing to remove!
   auto &Info = getContext().pImpl->InstructionMetadata[this];
 
   // Handle removal of an existing value.
@@ -1261,12 +1241,13 @@ MDNode *Instruction::getMetadataImpl(unsigned KindID) const {
 void Instruction::getAllMetadataImpl(
     SmallVectorImpl<std::pair<unsigned, MDNode *>> &Result) const {
   Result.clear();
-  
+
   // Handle 'dbg' as a special case since it is not stored in the hash table.
   if (DbgLoc) {
     Result.push_back(
         std::make_pair((unsigned)LLVMContext::MD_dbg, DbgLoc.getAsMDNode()));
-    if (!hasMetadataHashEntry()) return;
+    if (!hasMetadataHashEntry())
+      return;
   }
 
   assert(hasMetadataHashEntry() &&
@@ -1288,10 +1269,11 @@ void Instruction::getAllMetadataOtherThanDebugLocImpl(
   Info.getAll(Result);
 }
 
-bool Instruction::extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) {
-  assert((getOpcode() == Instruction::Br ||
-          getOpcode() == Instruction::Select) &&
-         "Looking for branch weights on something besides branch or select");
+bool Instruction::extractProfMetadata(uint64_t &TrueVal,
+                                      uint64_t &FalseVal) const {
+  assert(
+      (getOpcode() == Instruction::Br || getOpcode() == Instruction::Select) &&
+      "Looking for branch weights on something besides branch or select");
 
   auto *ProfileData = getMetadata(LLVMContext::MD_prof);
   if (!ProfileData || ProfileData->getNumOperands() != 3)
@@ -1312,11 +1294,12 @@ bool Instruction::extractProfMetadata(uint64_t &TrueVal, uint64_t &FalseVal) {
   return true;
 }
 
-bool Instruction::extractProfTotalWeight(uint64_t &TotalVal) {
+bool Instruction::extractProfTotalWeight(uint64_t &TotalVal) const {
   assert((getOpcode() == Instruction::Br ||
           getOpcode() == Instruction::Select ||
           getOpcode() == Instruction::Call ||
-          getOpcode() == Instruction::Invoke) &&
+          getOpcode() == Instruction::Invoke ||
+          getOpcode() == Instruction::Switch) &&
          "Looking for branch weights on something besides branch");
 
   TotalVal = 0;
@@ -1433,7 +1416,29 @@ void GlobalObject::copyMetadata(const GlobalObject *Other, unsigned Offset) {
                   *MDNode::get(getContext(), {NewOffsetMD, TypeId}));
       continue;
     }
-    addMetadata(MD.first, *MD.second);
+    // If an offset adjustment was specified we need to modify the DIExpression
+    // to prepend the adjustment:
+    // !DIExpression(DW_OP_plus, Offset, [original expr])
+    auto *Attachment = MD.second;
+    if (Offset != 0 && MD.first == LLVMContext::MD_dbg) {
+      DIGlobalVariable *GV = dyn_cast<DIGlobalVariable>(Attachment);
+      DIExpression *E = nullptr;
+      if (!GV) {
+        auto *GVE = cast<DIGlobalVariableExpression>(Attachment);
+        GV = GVE->getVariable();
+        E = GVE->getExpression();
+      }
+      ArrayRef<uint64_t> OrigElements;
+      if (E)
+        OrigElements = E->getElements();
+      std::vector<uint64_t> Elements(OrigElements.size() + 2);
+      Elements[0] = dwarf::DW_OP_plus;
+      Elements[1] = Offset;
+      std::copy(OrigElements.begin(), OrigElements.end(), Elements.begin() + 2);
+      E = DIExpression::get(getContext(), Elements);
+      Attachment = DIGlobalVariableExpression::get(getContext(), GV, E);
+    }
+    addMetadata(MD.first, *Attachment);
   }
 }
 
@@ -1453,3 +1458,15 @@ void Function::setSubprogram(DISubprogram *SP) {
 DISubprogram *Function::getSubprogram() const {
   return cast_or_null<DISubprogram>(getMetadata(LLVMContext::MD_dbg));
 }
+
+void GlobalVariable::addDebugInfo(DIGlobalVariableExpression *GV) {
+  addMetadata(LLVMContext::MD_dbg, *GV);
+}
+
+void GlobalVariable::getDebugInfo(
+    SmallVectorImpl<DIGlobalVariableExpression *> &GVs) const {
+  SmallVector<MDNode *, 1> MDs;
+  getMetadata(LLVMContext::MD_dbg, MDs);
+  for (MDNode *MD : MDs)
+    GVs.push_back(cast<DIGlobalVariableExpression>(MD));
+}
diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp
index ae81b25b9517..1911f84340c6 100644
--- a/contrib/llvm/lib/IR/Module.cpp
+++ b/contrib/llvm/lib/IR/Module.cpp
@@ -25,6 +25,8 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Support/Dwarf.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/RandomNumberGenerator.h"
 #include <algorithm>
@@ -404,23 +406,23 @@ void Module::setMaterializer(GVMaterializer *GVM) {
   Materializer.reset(GVM);
 }
 
-std::error_code Module::materialize(GlobalValue *GV) {
+Error Module::materialize(GlobalValue *GV) {
   if (!Materializer)
-    return std::error_code();
+    return Error::success();
 
   return Materializer->materialize(GV);
 }
 
-std::error_code Module::materializeAll() {
+Error Module::materializeAll() {
   if (!Materializer)
-    return std::error_code();
+    return Error::success();
   std::unique_ptr<GVMaterializer> M = std::move(Materializer);
   return M->materializeModule();
 }
 
-std::error_code Module::materializeMetadata() {
+Error Module::materializeMetadata() {
   if (!Materializer)
-    return std::error_code();
+    return Error::success();
   return Materializer->materializeMetadata();
 }
 
@@ -519,6 +521,10 @@ Metadata *Module::getProfileSummary() {
   return getModuleFlag("ProfileSummary");
 }
 
+void Module::setOwnedMemoryBuffer(std::unique_ptr<MemoryBuffer> MB) {
+  OwnedMemoryBuffer = std::move(MB);
+}
+
 GlobalVariable *llvm::collectUsedGlobalVariables(
     const Module &M, SmallPtrSetImpl<GlobalValue *> &Set, bool CompilerUsed) {
   const char *Name = CompilerUsed ? "llvm.compiler.used" : "llvm.used";
diff --git a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
index 6107cf40a08d..9072f4bc7b12 100644
--- a/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
+++ b/contrib/llvm/lib/IR/ModuleSummaryIndex.cpp
@@ -20,8 +20,17 @@ using namespace llvm;
 // per-module instances.
 void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
                                    uint64_t NextModuleId) {
+  if (Other->modulePaths().empty())
+    return;
+
+  assert(Other->modulePaths().size() == 1 &&
+         "Can only merge from an single-module index at that time");
+
+  StringRef OtherModPath = Other->modulePaths().begin()->first();
+  StringRef ModPath = addModulePath(OtherModPath, NextModuleId,
+                                    Other->getModuleHash(OtherModPath))
+                          ->first();
 
-  StringRef ModPath;
   for (auto &OtherGlobalValSummaryLists : *Other) {
     GlobalValue::GUID ValueGUID = OtherGlobalValSummaryLists.first;
     GlobalValueSummaryList &List = OtherGlobalValSummaryLists.second;
@@ -31,16 +40,6 @@ void ModuleSummaryIndex::mergeFrom(std::unique_ptr<ModuleSummaryIndex> Other,
     assert(List.size() == 1);
     std::unique_ptr<GlobalValueSummary> Summary = std::move(List.front());
 
-    // Add the module path string ref for this module if we haven't already
-    // saved a reference to it.
-    if (ModPath.empty()) {
-      auto Path = Summary->modulePath();
-      ModPath = addModulePath(Path, NextModuleId, Other->getModuleHash(Path))
-                    ->first();
-    } else
-      assert(ModPath == Summary->modulePath() &&
-             "Each module in the combined map should have a unique ID");
-
     // Note the module path string ref was copied above and is still owned by
     // the original per-module index. Reset it to the new module path
     // string reference owned by the combined index.
diff --git a/contrib/llvm/lib/IR/Operator.cpp b/contrib/llvm/lib/IR/Operator.cpp
index 8a94053a72c5..2fba24d99b30 100644
--- a/contrib/llvm/lib/IR/Operator.cpp
+++ b/contrib/llvm/lib/IR/Operator.cpp
@@ -33,7 +33,7 @@ bool GEPOperator::accumulateConstantOffset(const DataLayout &DL,
       continue;
 
     // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       unsigned ElementIdx = OpC->getZExtValue();
       const StructLayout *SL = DL.getStructLayout(STy);
       Offset += APInt(Offset.getBitWidth(), SL->getElementOffset(ElementIdx));
diff --git a/contrib/llvm/lib/IR/Pass.cpp b/contrib/llvm/lib/IR/Pass.cpp
index 69299fea7634..a42945ef3fff 100644
--- a/contrib/llvm/lib/IR/Pass.cpp
+++ b/contrib/llvm/lib/IR/Pass.cpp
@@ -64,7 +64,7 @@ void Pass::dumpPassStructure(unsigned Offset) {
 /// implemented in terms of the name that is registered by one of the
 /// Registration templates, but can be overloaded directly.
 ///
-const char *Pass::getPassName() const {
+StringRef Pass::getPassName() const {
   AnalysisID AID =  getPassID();
   const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(AID);
   if (PI)
@@ -218,7 +218,7 @@ Pass *Pass::createPass(AnalysisID ID) {
 
 // RegisterAGBase implementation
 //
-RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID,
+RegisterAGBase::RegisterAGBase(StringRef Name, const void *InterfaceID,
                                const void *PassID, bool isDefault)
     : PassInfo(Name, InterfaceID) {
   PassRegistry::getPassRegistry()->registerAnalysisGroup(InterfaceID, PassID,
diff --git a/contrib/llvm/lib/IR/PassManager.cpp b/contrib/llvm/lib/IR/PassManager.cpp
index 8563a4019a2b..8f68bb1daecf 100644
--- a/contrib/llvm/lib/IR/PassManager.cpp
+++ b/contrib/llvm/lib/IR/PassManager.cpp
@@ -13,12 +13,82 @@
 
 using namespace llvm;
 
-// Explicit template instantiations for core template typedefs.
+// Explicit template instantiations and specialization defininitions for core
+// template typedefs.
 namespace llvm {
+template class AllAnalysesOn<Module>;
+template class AllAnalysesOn<Function>;
 template class PassManager<Module>;
 template class PassManager<Function>;
 template class AnalysisManager<Module>;
 template class AnalysisManager<Function>;
 template class InnerAnalysisManagerProxy<FunctionAnalysisManager, Module>;
 template class OuterAnalysisManagerProxy<ModuleAnalysisManager, Function>;
+
+template <>
+bool FunctionAnalysisManagerModuleProxy::Result::invalidate(
+    Module &M, const PreservedAnalyses &PA,
+    ModuleAnalysisManager::Invalidator &Inv) {
+  // If literally everything is preserved, we're done.
+  if (PA.areAllPreserved())
+    return false; // This is still a valid proxy.
+
+  // If this proxy isn't marked as preserved, then even if the result remains
+  // valid, the key itself may no longer be valid, so we clear everything.
+  //
+  // Note that in order to preserve this proxy, a module pass must ensure that
+  // the FAM has been completely updated to handle the deletion of functions.
+  // Specifically, any FAM-cached results for those functions need to have been
+  // forcibly cleared. When preserved, this proxy will only invalidate results
+  // cached on functions *still in the module* at the end of the module pass.
+  auto PAC = PA.getChecker<FunctionAnalysisManagerModuleProxy>();
+  if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Module>>()) {
+    InnerAM->clear();
+    return true;
+  }
+
+  // Directly check if the relevant set is preserved.
+  bool AreFunctionAnalysesPreserved =
+      PA.allAnalysesInSetPreserved<AllAnalysesOn<Function>>();
+
+  // Now walk all the functions to see if any inner analysis invalidation is
+  // necessary.
+  for (Function &F : M) {
+    Optional<PreservedAnalyses> FunctionPA;
+
+    // Check to see whether the preserved set needs to be pruned based on
+    // module-level analysis invalidation that triggers deferred invalidation
+    // registered with the outer analysis manager proxy for this function.
+    if (auto *OuterProxy =
+            InnerAM->getCachedResult<ModuleAnalysisManagerFunctionProxy>(F))
+      for (const auto &OuterInvalidationPair :
+           OuterProxy->getOuterInvalidations()) {
+        AnalysisKey *OuterAnalysisID = OuterInvalidationPair.first;
+        const auto &InnerAnalysisIDs = OuterInvalidationPair.second;
+        if (Inv.invalidate(OuterAnalysisID, M, PA)) {
+          if (!FunctionPA)
+            FunctionPA = PA;
+          for (AnalysisKey *InnerAnalysisID : InnerAnalysisIDs)
+            FunctionPA->abandon(InnerAnalysisID);
+        }
+      }
+
+    // Check if we needed a custom PA set, and if so we'll need to run the
+    // inner invalidation.
+    if (FunctionPA) {
+      InnerAM->invalidate(F, *FunctionPA);
+      continue;
+    }
+
+    // Otherwise we only need to do invalidation if the original PA set didn't
+    // preserve all function analyses.
+    if (!AreFunctionAnalysesPreserved)
+      InnerAM->invalidate(F, PA);
+  }
+
+  // Return false to indicate that this result is still a valid proxy.
+  return false;
 }
+}
+
+AnalysisSetKey PreservedAnalyses::AllAnalysesKey;
diff --git a/contrib/llvm/lib/IR/PassRegistry.cpp b/contrib/llvm/lib/IR/PassRegistry.cpp
index 09b17ba308dd..584dee2869c1 100644
--- a/contrib/llvm/lib/IR/PassRegistry.cpp
+++ b/contrib/llvm/lib/IR/PassRegistry.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/PassRegistry.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/PassSupport.h"
 #include "llvm/Support/ManagedStatic.h"
 
@@ -121,6 +122,6 @@ void PassRegistry::addRegistrationListener(PassRegistrationListener *L) {
 void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) {
   sys::SmartScopedWriter<true> Guard(Lock);
 
-  auto I = std::find(Listeners.begin(), Listeners.end(), L);
+  auto I = find(Listeners, L);
   Listeners.erase(I);
 }
diff --git a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
index 50573d8d688a..6ddab6b4c69d 100644
--- a/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
+++ b/contrib/llvm/lib/IR/SymbolTableListTraitsImpl.h
@@ -81,11 +81,10 @@ void SymbolTableListTraits<ValueSubClass>::removeNodeFromList(
 
 template <typename ValueSubClass>
 void SymbolTableListTraits<ValueSubClass>::transferNodesFromList(
-    SymbolTableListTraits &L2, ilist_iterator<ValueSubClass> first,
-    ilist_iterator<ValueSubClass> last) {
+    SymbolTableListTraits &L2, iterator first, iterator last) {
   // We only have to do work here if transferring instructions between BBs
   ItemParentClass *NewIP = getListOwner(), *OldIP = L2.getListOwner();
-  if (NewIP == OldIP) return;  // No work to do at all...
+  assert(NewIP != OldIP && "Expected different list owners");
 
   // We only have to update symbol table entries if we are transferring the
   // instructions to a different symtab object...
diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp
index 5c97a4ea0705..ca866738f882 100644
--- a/contrib/llvm/lib/IR/Type.cpp
+++ b/contrib/llvm/lib/IR/Type.cpp
@@ -296,9 +296,9 @@ FunctionType *FunctionType::get(Type *ReturnType,
   FunctionType *FT;
 
   if (I == pImpl->FunctionTypes.end()) {
-    FT = (FunctionType*) pImpl->TypeAllocator.
-      Allocate(sizeof(FunctionType) + sizeof(Type*) * (Params.size() + 1),
-               AlignOf<FunctionType>::Alignment);
+    FT = (FunctionType *)pImpl->TypeAllocator.Allocate(
+        sizeof(FunctionType) + sizeof(Type *) * (Params.size() + 1),
+        alignof(FunctionType));
     new (FT) FunctionType(ReturnType, Params, isVarArg);
     pImpl->FunctionTypes.insert(FT);
   } else {
@@ -601,9 +601,7 @@ bool CompositeType::indexValid(unsigned Idx) const {
 //===----------------------------------------------------------------------===//
 
 ArrayType::ArrayType(Type *ElType, uint64_t NumEl)
-  : SequentialType(ArrayTyID, ElType) {
-  NumElements = NumEl;
-}
+  : SequentialType(ArrayTyID, ElType, NumEl) {}
 
 ArrayType *ArrayType::get(Type *ElementType, uint64_t NumElements) {
   assert(isValidElementType(ElementType) && "Invalid type for array element!");
@@ -628,9 +626,7 @@ bool ArrayType::isValidElementType(Type *ElemTy) {
 //===----------------------------------------------------------------------===//
 
 VectorType::VectorType(Type *ElType, unsigned NumEl)
-  : SequentialType(VectorTyID, ElType) {
-  NumElements = NumEl;
-}
+  : SequentialType(VectorTyID, ElType, NumEl) {}
 
 VectorType *VectorType::get(Type *ElementType, unsigned NumElements) {
   assert(NumElements > 0 && "#Elements of a VectorType must be greater than 0");
@@ -673,13 +669,10 @@ PointerType *PointerType::get(Type *EltTy, unsigned AddressSpace) {
 
 
 PointerType::PointerType(Type *E, unsigned AddrSpace)
-  : SequentialType(PointerTyID, E) {
-#ifndef NDEBUG
-  const unsigned oldNCT = NumContainedTys;
-#endif
+  : Type(E->getContext(), PointerTyID), PointeeTy(E) {
+  ContainedTys = &PointeeTy;
+  NumContainedTys = 1;
   setSubclassData(AddrSpace);
-  // Check for miscompile. PR11652.
-  assert(oldNCT == NumContainedTys && "bitfield written out of bounds?");
 }
 
 PointerType *Type::getPointerTo(unsigned addrs) const {
diff --git a/contrib/llvm/lib/IR/User.cpp b/contrib/llvm/lib/IR/User.cpp
index a75abe6938c9..497b4aa17643 100644
--- a/contrib/llvm/lib/IR/User.cpp
+++ b/contrib/llvm/lib/IR/User.cpp
@@ -43,10 +43,9 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
 void User::allocHungoffUses(unsigned N, bool IsPhi) {
   assert(HasHungOffUses && "alloc must have hung off uses");
 
-  static_assert(AlignOf<Use>::Alignment >= AlignOf<Use::UserRef>::Alignment,
+  static_assert(alignof(Use) >= alignof(Use::UserRef),
                 "Alignment is insufficient for 'hung-off-uses' pieces");
-  static_assert(AlignOf<Use::UserRef>::Alignment >=
-                    AlignOf<BasicBlock *>::Alignment,
+  static_assert(alignof(Use::UserRef) >= alignof(BasicBlock *),
                 "Alignment is insufficient for 'hung-off-uses' pieces");
 
   // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp
index 6ae37fa6f15a..91a999b58004 100644
--- a/contrib/llvm/lib/IR/Value.cpp
+++ b/contrib/llvm/lib/IR/Value.cpp
@@ -124,10 +124,10 @@ bool Value::isUsedInBasicBlock(const BasicBlock *BB) const {
   const_user_iterator UI = user_begin(), UE = user_end();
   for (; BI != BE && UI != UE; ++BI, ++UI) {
     // Scan basic block: Check if this Value is used by the instruction at BI.
-    if (std::find(BI->op_begin(), BI->op_end(), this) != BI->op_end())
+    if (is_contained(BI->operands(), this))
       return true;
     // Scan use list: Check if the use at UI is in BB.
-    const Instruction *User = dyn_cast<Instruction>(*UI);
+    const auto *User = dyn_cast<Instruction>(*UI);
     if (User && User->getParent() == BB)
       return true;
   }
@@ -143,16 +143,16 @@ static bool getSymTab(Value *V, ValueSymbolTable *&ST) {
   if (Instruction *I = dyn_cast<Instruction>(V)) {
     if (BasicBlock *P = I->getParent())
       if (Function *PP = P->getParent())
-        ST = &PP->getValueSymbolTable();
+        ST = PP->getValueSymbolTable();
   } else if (BasicBlock *BB = dyn_cast<BasicBlock>(V)) {
     if (Function *P = BB->getParent())
-      ST = &P->getValueSymbolTable();
+      ST = P->getValueSymbolTable();
   } else if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) {
     if (Module *P = GV->getParent())
       ST = &P->getValueSymbolTable();
   } else if (Argument *A = dyn_cast<Argument>(V)) {
     if (Function *P = A->getParent())
-      ST = &P->getValueSymbolTable();
+      ST = P->getValueSymbolTable();
   } else {
     assert(isa<Constant>(V) && "Unknown value type!");
     return true;  // no name is setable for this.
@@ -367,7 +367,7 @@ static bool contains(Value *Expr, Value *V) {
 }
 #endif // NDEBUG
 
-void Value::replaceAllUsesWith(Value *New) {
+void Value::doRAUW(Value *New, bool NoMetadata) {
   assert(New && "Value::replaceAllUsesWith(<null>) is invalid!");
   assert(!contains(New, this) &&
          "this->replaceAllUsesWith(expr(this)) is NOT valid!");
@@ -377,7 +377,7 @@ void Value::replaceAllUsesWith(Value *New) {
   // Notify all ValueHandles (if present) that this value is going away.
   if (HasValueHandle)
     ValueHandleBase::ValueIsRAUWd(this, New);
-  if (isUsedByMetadata())
+  if (!NoMetadata && isUsedByMetadata())
     ValueAsMetadata::handleRAUW(this, New);
 
   while (!use_empty()) {
@@ -398,6 +398,14 @@ void Value::replaceAllUsesWith(Value *New) {
     BB->replaceSuccessorsPhiUsesWith(cast<BasicBlock>(New));
 }
 
+void Value::replaceAllUsesWith(Value *New) {
+  doRAUW(New, false /* NoMetadata */);
+}
+
+void Value::replaceNonMetadataUsesWith(Value *New) {
+  doRAUW(New, true /* NoMetadata */);
+}
+
 // Like replaceAllUsesWith except it does not handle constants or basic blocks.
 // This routine leaves uses within BB.
 void Value::replaceUsesOutsideBlock(Value *New, BasicBlock *BB) {
@@ -449,7 +457,7 @@ static Value *stripPointerCastsAndOffsets(Value *V) {
       case PSK_InBoundsConstantIndices:
         if (!GEP->hasAllConstantIndices())
           return V;
-        // fallthrough
+        LLVM_FALLTHROUGH;
       case PSK_InBounds:
         if (!GEP->isInBounds())
           return V;
@@ -664,6 +672,16 @@ void Value::reverseUseList() {
   Head->setPrev(&UseList);
 }
 
+bool Value::isSwiftError() const {
+  auto *Arg = dyn_cast<Argument>(this);
+  if (Arg)
+    return Arg->hasSwiftErrorAttr();
+  auto *Alloca = dyn_cast<AllocaInst>(this);
+  if (!Alloca)
+    return false;
+  return Alloca->isSwiftError();
+}
+
 //===----------------------------------------------------------------------===//
 //                             ValueHandleBase Class
 //===----------------------------------------------------------------------===//
@@ -848,7 +866,7 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) {
       // virtual (or inline) interface to handle this though, so instead we make
       // the TrackingVH accessors guarantee that a client never sees this value.
 
-      // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     case Weak:
       // Weak goes to the new value, which will unlink it from Old's list.
       Entry->operator=(New);
diff --git a/contrib/llvm/lib/IR/ValueSymbolTable.cpp b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
index f6f1dd984e9f..8a6a320fc2d1 100644
--- a/contrib/llvm/lib/IR/ValueSymbolTable.cpp
+++ b/contrib/llvm/lib/IR/ValueSymbolTable.cpp
@@ -15,8 +15,13 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Type.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "valuesymtab"
@@ -35,7 +40,7 @@ ValueSymbolTable::~ValueSymbolTable() {
 ValueName *ValueSymbolTable::makeUniqueName(Value *V,
                                             SmallString<256> &UniqueName) {
   unsigned BaseSize = UniqueName.size();
-  while (1) {
+  while (true) {
     // Trim any suffix off and append the next number.
     UniqueName.resize(BaseSize);
     raw_svector_ostream S(UniqueName);
@@ -94,7 +99,6 @@ ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
   return makeUniqueName(V, UniqueName);
 }
 
-
 // dump - print out the symbol table
 //
 LLVM_DUMP_METHOD void ValueSymbolTable::dump() const {
diff --git a/contrib/llvm/lib/IR/ValueTypes.cpp b/contrib/llvm/lib/IR/ValueTypes.cpp
index ff1e431c2e9a..2132e1659225 100644
--- a/contrib/llvm/lib/IR/ValueTypes.cpp
+++ b/contrib/llvm/lib/IR/ValueTypes.cpp
@@ -26,7 +26,7 @@ EVT EVT::changeExtendedTypeToInteger() const {
 
 EVT EVT::changeExtendedVectorElementTypeToInteger() const {
   LLVMContext &Context = LLVMTy->getContext();
-  EVT IntTy = getIntegerVT(Context, getVectorElementType().getSizeInBits());
+  EVT IntTy = getIntegerVT(Context, getScalarSizeInBits());
   return getVectorVT(Context, IntTy, getVectorNumElements());
 }
 
diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp
index 682e934d4b0f..5855059a189c 100644
--- a/contrib/llvm/lib/IR/Verifier.cpp
+++ b/contrib/llvm/lib/IR/Verifier.cpp
@@ -45,48 +45,86 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/Verifier.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ilist.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Argument.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/Comdat.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/InlineAsm.h"
-#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ModuleSlotTracker.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
-#include <cstdarg>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
+
 using namespace llvm;
 
 static cl::opt<bool> VerifyDebugInfo("verify-debug-info", cl::init(true));
 
-namespace {
+namespace llvm {
+
 struct VerifierSupport {
   raw_ostream *OS;
-  const Module *M = nullptr;
-  Optional<ModuleSlotTracker> MST;
+  const Module &M;
+  ModuleSlotTracker MST;
+  const DataLayout &DL;
+  LLVMContext &Context;
 
   /// Track the brokenness of the module while recursively visiting.
   bool Broken = false;
@@ -95,16 +133,11 @@ struct VerifierSupport {
   /// Whether to treat broken debug info as an error.
   bool TreatBrokenDebugInfoAsError = true;
 
-  explicit VerifierSupport(raw_ostream *OS) : OS(OS) {}
+  explicit VerifierSupport(raw_ostream *OS, const Module &M)
+      : OS(OS), M(M), MST(&M), DL(M.getDataLayout()), Context(M.getContext()) {}
 
 private:
-  template <class NodeTy> void Write(const ilist_iterator<NodeTy> &I) {
-    Write(&*I);
-  }
-
   void Write(const Module *M) {
-    if (!M)
-      return;
     *OS << "; ModuleID = '" << M->getModuleIdentifier() << "'\n";
   }
 
@@ -112,13 +145,14 @@ private:
     if (!V)
       return;
     if (isa<Instruction>(V)) {
-      V->print(*OS, *MST);
+      V->print(*OS, MST);
       *OS << '\n';
     } else {
-      V->printAsOperand(*OS, true, *MST);
+      V->printAsOperand(*OS, true, MST);
       *OS << '\n';
     }
   }
+
   void Write(ImmutableCallSite CS) {
     Write(CS.getInstruction());
   }
@@ -126,7 +160,7 @@ private:
   void Write(const Metadata *MD) {
     if (!MD)
       return;
-    MD->print(*OS, *MST, M);
+    MD->print(*OS, MST, &M);
     *OS << '\n';
   }
 
@@ -137,7 +171,7 @@ private:
   void Write(const NamedMDNode *NMD) {
     if (!NMD)
       return;
-    NMD->print(*OS, *MST);
+    NMD->print(*OS, MST);
     *OS << '\n';
   }
 
@@ -153,6 +187,14 @@ private:
     *OS << *C;
   }
 
+  void Write(const APInt *AI) {
+    if (!AI)
+      return;
+    *OS << *AI << '\n';
+  }
+
+  void Write(const unsigned i) { *OS << i << '\n'; }
+
   template <typename T> void Write(ArrayRef<T> Vs) {
     for (const T &V : Vs)
       Write(V);
@@ -206,10 +248,13 @@ public:
   }
 };
 
+} // namespace llvm
+
+namespace {
+
 class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   friend class InstVisitor<Verifier>;
 
-  LLVMContext *Context;
   DominatorTree DT;
 
   /// \brief When verifying a basic block, keep track of all of the
@@ -252,28 +297,23 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
   // constant expressions, we can arrive at a particular user many times.
   SmallPtrSet<const Value *, 32> GlobalValueVisited;
 
-  void checkAtomicMemAccessSize(const Module *M, Type *Ty,
-                                const Instruction *I);
+  TBAAVerifier TBAAVerifyHelper;
 
-  void updateModule(const Module *NewM) {
-    if (M == NewM)
-      return;
-    MST.emplace(NewM);
-    M = NewM;
-  }
+  void checkAtomicMemAccessSize(Type *Ty, const Instruction *I);
 
 public:
-  explicit Verifier(raw_ostream *OS, bool ShouldTreatBrokenDebugInfoAsError)
-      : VerifierSupport(OS), Context(nullptr), LandingPadResultTy(nullptr),
-        SawFrameEscape(false) {
+  explicit Verifier(raw_ostream *OS, bool ShouldTreatBrokenDebugInfoAsError,
+                    const Module &M)
+      : VerifierSupport(OS, M), LandingPadResultTy(nullptr),
+        SawFrameEscape(false), TBAAVerifyHelper(this) {
     TreatBrokenDebugInfoAsError = ShouldTreatBrokenDebugInfoAsError;
   }
 
   bool hasBrokenDebugInfo() const { return BrokenDebugInfo; }
 
   bool verify(const Function &F) {
-    updateModule(F.getParent());
-    Context = &M->getContext();
+    assert(F.getParent() == &M &&
+           "An instance of this class only works with a specific module!");
 
     // First ensure the function is well-enough formed to compute dominance
     // information, and directly compute a dominance tree. We don't rely on the
@@ -291,7 +331,7 @@ public:
       if (OS) {
         *OS << "Basic Block in function '" << F.getName()
             << "' does not have terminator!\n";
-        BB.printAsOperand(*OS, true, *MST);
+        BB.printAsOperand(*OS, true, MST);
         *OS << "\n";
       }
       return false;
@@ -309,9 +349,8 @@ public:
     return !Broken;
   }
 
-  bool verify(const Module &M) {
-    updateModule(&M);
-    Context = &M.getContext();
+  /// Verify the module that this instance of \c Verifier was initialized with.
+  bool verify() {
     Broken = false;
 
     // Collect all declarations of the llvm.experimental.deoptimize intrinsic.
@@ -364,8 +403,8 @@ private:
                        SmallVectorImpl<const MDNode *> &Requirements);
   void visitFunction(const Function &F);
   void visitBasicBlock(BasicBlock &BB);
-  void visitRangeMetadata(Instruction& I, MDNode* Range, Type* Ty);
-  void visitDereferenceableMetadata(Instruction& I, MDNode* MD);
+  void visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty);
+  void visitDereferenceableMetadata(Instruction &I, MDNode *MD);
 
   template <class Ty> bool isValidMetadataArray(const MDTuple &N);
 #define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS) void visit##CLASS(const CLASS &N);
@@ -428,6 +467,7 @@ private:
   void visitInsertValueInst(InsertValueInst &IVI);
   void visitEHPadPredecessors(Instruction &I);
   void visitLandingPadInst(LandingPadInst &LPI);
+  void visitResumeInst(ResumeInst &RI);
   void visitCatchPadInst(CatchPadInst &CPI);
   void visitCatchReturnInst(CatchReturnInst &CatchReturn);
   void visitCleanupPadInst(CleanupPadInst &CPI);
@@ -456,7 +496,7 @@ private:
   void verifyFrameRecoverIndices();
   void verifySiblingFuncletUnwinds();
 
-  void verifyBitPieceExpression(const DbgInfoIntrinsic &I);
+  void verifyFragmentExpression(const DbgInfoIntrinsic &I);
 
   /// Module-level debug info verification...
   void verifyCompileUnits();
@@ -465,17 +505,17 @@ private:
   /// declarations share the same calling convention.
   void verifyDeoptimizeCallingConvs();
 };
-} // End anonymous namespace
+
+} // end anonymous namespace
 
 /// We know that cond should be true, if not print an error message.
 #define Assert(C, ...) \
-  do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (0)
+  do { if (!(C)) { CheckFailed(__VA_ARGS__); return; } } while (false)
 
 /// We know that a debug info condition should be true, if not print
 /// an error message.
 #define AssertDI(C, ...) \
-  do { if (!(C)) { DebugInfoCheckFailed(__VA_ARGS__); return; } } while (0)
-
+  do { if (!(C)) { DebugInfoCheckFailed(__VA_ARGS__); return; } } while (false)
 
 void Verifier::visit(Instruction &I) {
   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i)
@@ -517,17 +557,17 @@ void Verifier::visitGlobalValue(const GlobalValue &GV) {
   forEachUser(&GV, GlobalValueVisited, [&](const Value *V) -> bool {
     if (const Instruction *I = dyn_cast<Instruction>(V)) {
       if (!I->getParent() || !I->getParent()->getParent())
-        CheckFailed("Global is referenced by parentless instruction!", &GV,
-                    M, I);
-      else if (I->getParent()->getParent()->getParent() != M)
-        CheckFailed("Global is referenced in a different module!", &GV,
-                    M, I, I->getParent()->getParent(),
+        CheckFailed("Global is referenced by parentless instruction!", &GV, &M,
+                    I);
+      else if (I->getParent()->getParent()->getParent() != &M)
+        CheckFailed("Global is referenced in a different module!", &GV, &M, I,
+                    I->getParent()->getParent(),
                     I->getParent()->getParent()->getParent());
       return false;
     } else if (const Function *F = dyn_cast<Function>(V)) {
-      if (F->getParent() != M)
-        CheckFailed("Global is used by function in a different module", &GV,
-                    M, F, F->getParent());
+      if (F->getParent() != &M)
+        CheckFailed("Global is used by function in a different module", &GV, &M,
+                    F, F->getParent());
       return false;
     }
     return true;
@@ -540,7 +580,6 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
            "Global variable initializer type does not match global "
            "variable type!",
            &GV);
-
     // If the global has common linkage, it must have a zero initializer and
     // cannot be constant.
     if (GV.hasCommonLinkage()) {
@@ -561,7 +600,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
     if (ArrayType *ATy = dyn_cast<ArrayType>(GV.getValueType())) {
       StructType *STy = dyn_cast<StructType>(ATy->getElementType());
       PointerType *FuncPtrTy =
-          FunctionType::get(Type::getVoidTy(*Context), false)->getPointerTo();
+          FunctionType::get(Type::getVoidTy(Context), false)->getPointerTo();
       // FIXME: Reject the 2-field form in LLVM 4.0.
       Assert(STy &&
                  (STy->getNumElements() == 2 || STy->getNumElements() == 3) &&
@@ -606,6 +645,16 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) {
              GV.hasAvailableExternallyLinkage(),
          "Global is marked as dllimport, but not external", &GV);
 
+  // Visit any debug info attachments.
+  SmallVector<MDNode *, 1> MDs;
+  GV.getMetadata(LLVMContext::MD_dbg, MDs);
+  for (auto *MD : MDs) {
+    if (auto *GVE = dyn_cast<DIGlobalVariableExpression>(MD))
+      visitDIGlobalVariableExpression(*GVE);
+    else
+      AssertDI(false, "!dbg attachment of global variable must be a DIGlobalVariableExpression");
+  }
+
   if (!GV.hasInitializer()) {
     visitGlobalValue(GV);
     return;
@@ -672,10 +721,15 @@ void Verifier::visitGlobalAlias(const GlobalAlias &GA) {
 }
 
 void Verifier::visitNamedMDNode(const NamedMDNode &NMD) {
+  // There used to be various other llvm.dbg.* nodes, but we don't support
+  // upgrading them and we want to reserve the namespace for future uses.
+  if (NMD.getName().startswith("llvm.dbg."))
+    AssertDI(NMD.getName() == "llvm.dbg.cu",
+             "unrecognized named metadata node in the llvm.dbg namespace",
+             &NMD);
   for (const MDNode *MD : NMD.operands()) {
-    if (NMD.getName() == "llvm.dbg.cu") {
+    if (NMD.getName() == "llvm.dbg.cu")
       AssertDI(MD && isa<DICompileUnit>(MD), "invalid compile unit", &NMD, MD);
-    }
 
     if (!MD)
       continue;
@@ -769,7 +823,7 @@ static bool isScope(const Metadata *MD) { return !MD || isa<DIScope>(MD); }
 static bool isDINode(const Metadata *MD) { return !MD || isa<DINode>(MD); }
 
 template <class Ty>
-bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
+static bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
   for (Metadata *MD : N.operands()) {
     if (MD) {
       if (!isa<Ty>(MD))
@@ -782,13 +836,11 @@ bool isValidMetadataArrayImpl(const MDTuple &N, bool AllowNull) {
   return true;
 }
 
-template <class Ty>
-bool isValidMetadataArray(const MDTuple &N) {
+template <class Ty> static bool isValidMetadataArray(const MDTuple &N) {
   return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ false);
 }
 
-template <class Ty>
-bool isValidMetadataNullArray(const MDTuple &N) {
+template <class Ty> static bool isValidMetadataNullArray(const MDTuple &N) {
   return isValidMetadataArrayImpl<Ty>(N, /* AllowNull */ true);
 }
 
@@ -835,6 +887,7 @@ void Verifier::visitDIDerivedType(const DIDerivedType &N) {
                N.getTag() == dwarf::DW_TAG_const_type ||
                N.getTag() == dwarf::DW_TAG_volatile_type ||
                N.getTag() == dwarf::DW_TAG_restrict_type ||
+               N.getTag() == dwarf::DW_TAG_atomic_type ||
                N.getTag() == dwarf::DW_TAG_member ||
                N.getTag() == dwarf::DW_TAG_inheritance ||
                N.getTag() == dwarf::DW_TAG_friend,
@@ -908,6 +961,8 @@ void Verifier::visitDISubroutineType(const DISubroutineType &N) {
 
 void Verifier::visitDIFile(const DIFile &N) {
   AssertDI(N.getTag() == dwarf::DW_TAG_file_type, "invalid tag", &N);
+  AssertDI((N.getChecksumKind() != DIFile::CSK_None ||
+            N.getChecksum().empty()), "invalid checksum kind", &N);
 }
 
 void Verifier::visitDICompileUnit(const DICompileUnit &N) {
@@ -937,15 +992,15 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) {
     for (Metadata *Op : N.getRetainedTypes()->operands()) {
       AssertDI(Op && (isa<DIType>(Op) ||
                       (isa<DISubprogram>(Op) &&
-                       cast<DISubprogram>(Op)->isDefinition() == false)),
+                       !cast<DISubprogram>(Op)->isDefinition())),
                "invalid retained type", &N, Op);
     }
   }
   if (auto *Array = N.getRawGlobalVariables()) {
     AssertDI(isa<MDTuple>(Array), "invalid global variable list", &N, Array);
     for (Metadata *Op : N.getGlobalVariables()->operands()) {
-      AssertDI(Op && isa<DIGlobalVariable>(Op), "invalid global variable ref",
-               &N, Op);
+      AssertDI(Op && (isa<DIGlobalVariableExpression>(Op)),
+               "invalid global variable ref", &N, Op);
     }
   }
   if (auto *Array = N.getRawImportedEntities()) {
@@ -1088,12 +1143,6 @@ void Verifier::visitDIGlobalVariable(const DIGlobalVariable &N) {
 
   AssertDI(N.getTag() == dwarf::DW_TAG_variable, "invalid tag", &N);
   AssertDI(!N.getName().empty(), "missing global variable name", &N);
-  if (auto *V = N.getRawVariable()) {
-    AssertDI(isa<ConstantAsMetadata>(V) &&
-                 !isa<Function>(cast<ConstantAsMetadata>(V)->getValue()),
-             "invalid global varaible ref", &N, V);
-    visitConstantExprsRecursively(cast<ConstantAsMetadata>(V)->getValue());
-  }
   if (auto *Member = N.getRawStaticDataMemberDeclaration()) {
     AssertDI(isa<DIDerivedType>(Member),
              "invalid static data member declaration", &N, Member);
@@ -1113,6 +1162,15 @@ void Verifier::visitDIExpression(const DIExpression &N) {
   AssertDI(N.isValid(), "invalid expression", &N);
 }
 
+void Verifier::visitDIGlobalVariableExpression(
+    const DIGlobalVariableExpression &GVE) {
+  AssertDI(GVE.getVariable(), "missing variable");
+  if (auto *Var = GVE.getVariable())
+    visitDIGlobalVariable(*Var);
+  if (auto *Expr = GVE.getExpression())
+    visitDIExpression(*Expr);
+}
+
 void Verifier::visitDIObjCProperty(const DIObjCProperty &N) {
   AssertDI(N.getTag() == dwarf::DW_TAG_APPLE_property, "invalid tag", &N);
   if (auto *T = N.getRawType())
@@ -1134,7 +1192,7 @@ void Verifier::visitDIImportedEntity(const DIImportedEntity &N) {
 void Verifier::visitComdat(const Comdat &C) {
   // The Module is invalid if the GlobalValue has private linkage.  Entities
   // with private linkage don't have entries in the symbol table.
-  if (const GlobalValue *GV = M->getNamedValue(C.getName()))
+  if (const GlobalValue *GV = M.getNamedValue(C.getName()))
     Assert(!GV->hasPrivateLinkage(), "comdat global value has private linkage",
            GV);
 }
@@ -1401,12 +1459,12 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, unsigned Idx, Type *Ty,
          "'noinline and alwaysinline' are incompatible!",
          V);
 
-  Assert(!AttrBuilder(Attrs, Idx)
-              .overlaps(AttributeFuncs::typeIncompatible(Ty)),
-         "Wrong types for attribute: " +
-         AttributeSet::get(*Context, Idx,
-                        AttributeFuncs::typeIncompatible(Ty)).getAsString(Idx),
-         V);
+  Assert(
+      !AttrBuilder(Attrs, Idx).overlaps(AttributeFuncs::typeIncompatible(Ty)),
+      "Wrong types for attribute: " +
+          AttributeSet::get(Context, Idx, AttributeFuncs::typeIncompatible(Ty))
+              .getAsString(Idx),
+      V);
 
   if (PointerType *PTy = dyn_cast<PointerType>(Ty)) {
     SmallPtrSet<Type*, 4> Visited;
@@ -1630,8 +1688,8 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) {
     if (const auto *GV = dyn_cast<GlobalValue>(C)) {
       // Global Values get visited separately, but we do need to make sure
       // that the global value is in the correct module
-      Assert(GV->getParent() == M, "Referencing global in another module!",
-             EntryC, M, GV, GV->getParent());
+      Assert(GV->getParent() == &M, "Referencing global in another module!",
+             EntryC, &M, GV, GV->getParent());
       continue;
     }
 
@@ -1648,12 +1706,23 @@ void Verifier::visitConstantExprsRecursively(const Constant *EntryC) {
 }
 
 void Verifier::visitConstantExpr(const ConstantExpr *CE) {
-  if (CE->getOpcode() != Instruction::BitCast)
-    return;
-
-  Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
-                               CE->getType()),
-         "Invalid bitcast", CE);
+  if (CE->getOpcode() == Instruction::BitCast)
+    Assert(CastInst::castIsValid(Instruction::BitCast, CE->getOperand(0),
+                                 CE->getType()),
+           "Invalid bitcast", CE);
+
+  if (CE->getOpcode() == Instruction::IntToPtr ||
+      CE->getOpcode() == Instruction::PtrToInt) {
+    auto *PtrTy = CE->getOpcode() == Instruction::IntToPtr
+                      ? CE->getType()
+                      : CE->getOperand(0)->getType();
+    StringRef Msg = CE->getOpcode() == Instruction::IntToPtr
+                        ? "inttoptr not supported for non-integral pointers"
+                        : "ptrtoint not supported for non-integral pointers";
+    Assert(
+        !DL.isNonIntegralPointerType(cast<PointerType>(PtrTy->getScalarType())),
+        Msg);
+  }
 }
 
 bool Verifier::verifyAttributeCount(AttributeSet Attrs, unsigned Params) {
@@ -1783,7 +1852,7 @@ void Verifier::verifyStatepoint(ImmutableCallSite CS) {
     Assert(Call, "illegal use of statepoint token", &CI, U);
     if (!Call) continue;
     Assert(isa<GCRelocateInst>(Call) || isa<GCResultInst>(Call),
-           "gc.result or gc.relocate are the only value uses"
+           "gc.result or gc.relocate are the only value uses "
            "of a gc.statepoint",
            &CI, U);
     if (isa<GCResultInst>(Call)) {
@@ -1880,7 +1949,7 @@ void Verifier::visitFunction(const Function &F) {
   FunctionType *FT = F.getFunctionType();
   unsigned NumArgs = F.arg_size();
 
-  Assert(Context == &F.getContext(),
+  Assert(&Context == &F.getContext(),
          "Function context does not match Module context!", &F);
 
   Assert(!F.hasCommonLinkage(), "Functions may not have common linkage", &F);
@@ -2035,7 +2104,7 @@ void Verifier::visitFunction(const Function &F) {
   if (F.getIntrinsicID() && F.getParent()->isMaterialized()) {
     const User *U;
     if (F.hasAddressTaken(&U))
-      Assert(0, "Invalid user of intrinsic instruction!", U);
+      Assert(false, "Invalid user of intrinsic instruction!", U);
   }
 
   Assert(!F.hasDLLImportStorageClass() ||
@@ -2078,9 +2147,9 @@ void Verifier::visitFunction(const Function &F) {
         continue;
 
       // FIXME: Once N is canonical, check "SP == &N".
-      Assert(SP->describes(&F),
-             "!dbg attachment points at wrong subprogram for function", N, &F,
-             &I, DL, Scope, SP);
+      AssertDI(SP->describes(&F),
+               "!dbg attachment points at wrong subprogram for function", N, &F,
+               &I, DL, Scope, SP);
     }
 }
 
@@ -2218,7 +2287,7 @@ void Verifier::visitSelectInst(SelectInst &SI) {
 /// a pass, if any exist, it's an error.
 ///
 void Verifier::visitUserOp1(Instruction &I) {
-  Assert(0, "User-defined operators should not live outside of a pass!", &I);
+  Assert(false, "User-defined operators should not live outside of a pass!", &I);
 }
 
 void Verifier::visitTruncInst(TruncInst &I) {
@@ -2409,6 +2478,11 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
 
   Assert(SrcTy->getScalarType()->isPointerTy(),
          "PtrToInt source must be pointer", &I);
+
+  if (auto *PTy = dyn_cast<PointerType>(SrcTy->getScalarType()))
+    Assert(!DL.isNonIntegralPointerType(PTy),
+           "ptrtoint not supported for non-integral pointers");
+
   Assert(DestTy->getScalarType()->isIntegerTy(),
          "PtrToInt result must be integral", &I);
   Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "PtrToInt type mismatch",
@@ -2433,6 +2507,11 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
          "IntToPtr source must be an integral", &I);
   Assert(DestTy->getScalarType()->isPointerTy(),
          "IntToPtr result must be a pointer", &I);
+
+  if (auto *PTy = dyn_cast<PointerType>(DestTy->getScalarType()))
+    Assert(!DL.isNonIntegralPointerType(PTy),
+           "inttoptr not supported for non-integral pointers");
+
   Assert(SrcTy->isVectorTy() == DestTy->isVectorTy(), "IntToPtr type mismatch",
          &I);
   if (SrcTy->isVectorTy()) {
@@ -2541,15 +2620,20 @@ void Verifier::verifyCallSite(CallSite CS) {
   }
 
   // For each argument of the callsite, if it has the swifterror argument,
-  // make sure the underlying alloca has swifterror as well.
+  // make sure the underlying alloca/parameter it comes from has a swifterror as
+  // well.
   for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i)
     if (CS.paramHasAttr(i+1, Attribute::SwiftError)) {
       Value *SwiftErrorArg = CS.getArgument(i);
-      auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets());
-      Assert(AI, "swifterror argument should come from alloca", AI, I);
-      if (AI)
+      if (auto AI = dyn_cast<AllocaInst>(SwiftErrorArg->stripInBoundsOffsets())) {
         Assert(AI->isSwiftError(),
                "swifterror argument for call has mismatched alloca", AI, I);
+        continue;
+      }
+      auto ArgI = dyn_cast<Argument>(SwiftErrorArg);
+      Assert(ArgI, "swifterror argument should come from an alloca or parameter", SwiftErrorArg, I);
+      Assert(ArgI->hasSwiftErrorAttr(),
+             "swifterror argument for call has mismatched parameter", ArgI, I);
     }
 
   if (FTy->isVarArg()) {
@@ -2915,10 +2999,8 @@ static bool isContiguous(const ConstantRange &A, const ConstantRange &B) {
   return A.getUpper() == B.getLower() || A.getLower() == B.getUpper();
 }
 
-void Verifier::visitRangeMetadata(Instruction& I,
-                                  MDNode* Range, Type* Ty) {
-  assert(Range &&
-         Range == I.getMetadata(LLVMContext::MD_range) &&
+void Verifier::visitRangeMetadata(Instruction &I, MDNode *Range, Type *Ty) {
+  assert(Range && Range == I.getMetadata(LLVMContext::MD_range) &&
          "precondition violation");
 
   unsigned NumOperands = Range->getNumOperands();
@@ -2965,9 +3047,8 @@ void Verifier::visitRangeMetadata(Instruction& I,
   }
 }
 
-void Verifier::checkAtomicMemAccessSize(const Module *M, Type *Ty,
-                                        const Instruction *I) {
-  unsigned Size = M->getDataLayout().getTypeSizeInBits(Ty);
+void Verifier::checkAtomicMemAccessSize(Type *Ty, const Instruction *I) {
+  unsigned Size = DL.getTypeSizeInBits(Ty);
   Assert(Size >= 8, "atomic memory access' size must be byte-sized", Ty, I);
   Assert(!(Size & (Size - 1)),
          "atomic memory access' operand must have a power-of-two size", Ty, I);
@@ -2991,7 +3072,7 @@ void Verifier::visitLoadInst(LoadInst &LI) {
            "atomic load operand must have integer, pointer, or floating point "
            "type!",
            ElTy, &LI);
-    checkAtomicMemAccessSize(M, ElTy, &LI);
+    checkAtomicMemAccessSize(ElTy, &LI);
   } else {
     Assert(LI.getSynchScope() == CrossThread,
            "Non-atomic load cannot have SynchronizationScope specified", &LI);
@@ -3020,7 +3101,7 @@ void Verifier::visitStoreInst(StoreInst &SI) {
            "atomic store operand must have integer, pointer, or floating point "
            "type!",
            ElTy, &SI);
-    checkAtomicMemAccessSize(M, ElTy, &SI);
+    checkAtomicMemAccessSize(ElTy, &SI);
   } else {
     Assert(SI.getSynchScope() == CrossThread,
            "Non-atomic store cannot have SynchronizationScope specified", &SI);
@@ -3109,7 +3190,7 @@ void Verifier::visitAtomicCmpXchgInst(AtomicCmpXchgInst &CXI) {
   Assert(ElTy->isIntegerTy() || ElTy->isPointerTy(),
         "cmpxchg operand must have integer or pointer type",
          ElTy, &CXI);
-  checkAtomicMemAccessSize(M, ElTy, &CXI);
+  checkAtomicMemAccessSize(ElTy, &CXI);
   Assert(ElTy == CXI.getOperand(1)->getType(),
          "Expected value type does not match pointer operand type!", &CXI,
          ElTy);
@@ -3128,7 +3209,7 @@ void Verifier::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
   Type *ElTy = PTy->getElementType();
   Assert(ElTy->isIntegerTy(), "atomicrmw operand must have integer type!",
          &RMWI, ElTy);
-  checkAtomicMemAccessSize(M, ElTy, &RMWI);
+  checkAtomicMemAccessSize(ElTy, &RMWI);
   Assert(ElTy == RMWI.getOperand(1)->getType(),
          "Argument value type does not match pointer operand type!", &RMWI,
          ElTy);
@@ -3288,6 +3369,21 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) {
   visitInstruction(LPI);
 }
 
+void Verifier::visitResumeInst(ResumeInst &RI) {
+  Assert(RI.getFunction()->hasPersonalityFn(),
+         "ResumeInst needs to be in a function with a personality.", &RI);
+
+  if (!LandingPadResultTy)
+    LandingPadResultTy = RI.getValue()->getType();
+  else
+    Assert(LandingPadResultTy == RI.getValue()->getType(),
+           "The resume instruction should have a consistent result type "
+           "inside a function.",
+           &RI);
+
+  visitTerminatorInst(RI);
+}
+
 void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
   BasicBlock *BB = CPI.getParent();
 
@@ -3640,7 +3736,7 @@ void Verifier::visitInstruction(Instruction &I) {
     // Check to make sure that only first-class-values are operands to
     // instructions.
     if (!I.getOperand(i)->getType()->isFirstClassType()) {
-      Assert(0, "Instruction operands must be first-class values!", &I);
+      Assert(false, "Instruction operands must be first-class values!", &I);
     }
 
     if (Function *F = dyn_cast<Function>(I.getOperand(i))) {
@@ -3653,14 +3749,16 @@ void Verifier::visitInstruction(Instruction &I) {
       Assert(
           !F->isIntrinsic() || isa<CallInst>(I) ||
               F->getIntrinsicID() == Intrinsic::donothing ||
+              F->getIntrinsicID() == Intrinsic::coro_resume ||
+              F->getIntrinsicID() == Intrinsic::coro_destroy ||
               F->getIntrinsicID() == Intrinsic::experimental_patchpoint_void ||
               F->getIntrinsicID() == Intrinsic::experimental_patchpoint_i64 ||
               F->getIntrinsicID() == Intrinsic::experimental_gc_statepoint,
-          "Cannot invoke an intrinsic other than donothing, patchpoint or "
-          "statepoint",
+          "Cannot invoke an intrinsic other than donothing, patchpoint, "
+          "statepoint, coro_resume or coro_destroy",
           &I);
-      Assert(F->getParent() == M, "Referencing function in another module!",
-             &I, M, F, F->getParent());
+      Assert(F->getParent() == &M, "Referencing function in another module!",
+             &I, &M, F, F->getParent());
     } else if (BasicBlock *OpBB = dyn_cast<BasicBlock>(I.getOperand(i))) {
       Assert(OpBB->getParent() == BB->getParent(),
              "Referring to a basic block in another function!", &I);
@@ -3668,7 +3766,8 @@ void Verifier::visitInstruction(Instruction &I) {
       Assert(OpArg->getParent() == BB->getParent(),
              "Referring to an argument in another function!", &I);
     } else if (GlobalValue *GV = dyn_cast<GlobalValue>(I.getOperand(i))) {
-      Assert(GV->getParent() == M, "Referencing global in another module!", &I, M, GV, GV->getParent());
+      Assert(GV->getParent() == &M, "Referencing global in another module!", &I,
+             &M, GV, GV->getParent());
     } else if (isa<Instruction>(I.getOperand(i))) {
       verifyDominatesUse(I, i);
     } else if (isa<InlineAsm>(I.getOperand(i))) {
@@ -3676,9 +3775,12 @@ void Verifier::visitInstruction(Instruction &I) {
                  (i + 3 == e && isa<InvokeInst>(I)),
              "Cannot take the address of an inline asm!", &I);
     } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(I.getOperand(i))) {
-      if (CE->getType()->isPtrOrPtrVectorTy()) {
+      if (CE->getType()->isPtrOrPtrVectorTy() ||
+          !DL.getNonIntegralAddressSpaces().empty()) {
         // If we have a ConstantExpr pointer, we need to see if it came from an
-        // illegal bitcast (inttoptr <constant int> )
+        // illegal bitcast.  If the datalayout string specifies non-integral
+        // address spaces then we also need to check for illegal ptrtoint and
+        // inttoptr expressions.
         visitConstantExprsRecursively(CE);
       }
     }
@@ -3691,7 +3793,7 @@ void Verifier::visitInstruction(Instruction &I) {
     if (ConstantFP *CFP0 =
             mdconst::dyn_extract_or_null<ConstantFP>(MD->getOperand(0))) {
       const APFloat &Accuracy = CFP0->getValueAPF();
-      Assert(&Accuracy.getSemantics() == &APFloat::IEEEsingle,
+      Assert(&Accuracy.getSemantics() == &APFloat::IEEEsingle(),
              "fpmath accuracy must have float type", &I);
       Assert(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(),
              "fpmath accuracy not a positive number!", &I);
@@ -3721,6 +3823,9 @@ void Verifier::visitInstruction(Instruction &I) {
   if (MDNode *MD = I.getMetadata(LLVMContext::MD_dereferenceable_or_null))
     visitDereferenceableMetadata(I, MD);
 
+  if (MDNode *TBAA = I.getMetadata(LLVMContext::MD_tbaa))
+    TBAAVerifyHelper.visitTBAAMetadata(I, TBAA);
+
   if (MDNode *AlignMD = I.getMetadata(LLVMContext::MD_align)) {
     Assert(I.getType()->isPointerTy(), "align applies only to pointer types",
            &I);
@@ -3743,7 +3848,7 @@ void Verifier::visitInstruction(Instruction &I) {
   }
 
   if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I))
-    verifyBitPieceExpression(*DII);
+    verifyFragmentExpression(*DII);
 
   InstsInThisBlock.insert(&I);
 }
@@ -3803,6 +3908,20 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
   switch (ID) {
   default:
     break;
+  case Intrinsic::coro_id: {
+    auto *InfoArg = CS.getArgOperand(3)->stripPointerCasts();
+    if (isa<ConstantPointerNull>(InfoArg))
+      break;
+    auto *GV = dyn_cast<GlobalVariable>(InfoArg);
+    Assert(GV && GV->isConstant() && GV->hasDefinitiveInitializer(),
+      "info argument of llvm.coro.begin must refer to an initialized "
+      "constant");
+    Constant *Init = GV->getInitializer();
+    Assert(isa<ConstantStruct>(Init) || isa<ConstantArray>(Init),
+      "info argument of llvm.coro.begin must refer to either a struct or "
+      "an array");
+    break;
+  }
   case Intrinsic::ctlz:  // llvm.ctlz
   case Intrinsic::cttz:  // llvm.cttz
     Assert(isa<ConstantInt>(CS.getArgOperand(1)),
@@ -3833,6 +3952,32 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
            CS);
     break;
   }
+  case Intrinsic::memcpy_element_atomic: {
+    ConstantInt *ElementSizeCI = dyn_cast<ConstantInt>(CS.getArgOperand(3));
+    Assert(ElementSizeCI, "element size of the element-wise atomic memory "
+                          "intrinsic must be a constant int",
+           CS);
+    const APInt &ElementSizeVal = ElementSizeCI->getValue();
+    Assert(ElementSizeVal.isPowerOf2(),
+           "element size of the element-wise atomic memory intrinsic "
+           "must be a power of 2",
+           CS);
+
+    auto IsValidAlignment = [&](uint64_t Alignment) {
+      return isPowerOf2_64(Alignment) && ElementSizeVal.ule(Alignment);
+    };
+    
+    uint64_t DstAlignment = CS.getParamAlignment(1),
+             SrcAlignment = CS.getParamAlignment(2);
+
+    Assert(IsValidAlignment(DstAlignment),
+           "incorrect alignment of the destination argument",
+           CS);
+    Assert(IsValidAlignment(SrcAlignment),
+           "incorrect alignment of the source argument",
+           CS);
+    break;
+  }
   case Intrinsic::gcroot:
   case Intrinsic::gcwrite:
   case Intrinsic::gcread:
@@ -4181,10 +4326,10 @@ void Verifier::visitDbgIntrinsic(StringRef Kind, DbgIntrinsicTy &DII) {
   if (!VarSP || !LocSP)
     return; // Broken scope chains are checked elsewhere.
 
-  Assert(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
-                             " variable and !dbg attachment",
-         &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
-         Loc->getScope()->getSubprogram());
+  AssertDI(VarSP == LocSP, "mismatched subprogram between llvm.dbg." + Kind +
+                               " variable and !dbg attachment",
+           &DII, BB, F, Var, Var->getScope()->getSubprogram(), Loc,
+           Loc->getScope()->getSubprogram());
 }
 
 static uint64_t getVariableSize(const DILocalVariable &V) {
@@ -4210,7 +4355,7 @@ static uint64_t getVariableSize(const DILocalVariable &V) {
   return 0;
 }
 
-void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I) {
+void Verifier::verifyFragmentExpression(const DbgInfoIntrinsic &I) {
   DILocalVariable *V;
   DIExpression *E;
   if (auto *DVI = dyn_cast<DbgValueInst>(&I)) {
@@ -4227,7 +4372,8 @@ void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I) {
     return;
 
   // Nothing to do if this isn't a bit piece expression.
-  if (!E->isBitPiece())
+  auto Fragment = E->getFragmentInfo();
+  if (!Fragment)
     return;
 
   // The frontend helps out GDB by emitting the members of local anonymous
@@ -4245,21 +4391,21 @@ void Verifier::verifyBitPieceExpression(const DbgInfoIntrinsic &I) {
   if (!VarSize)
     return;
 
-  unsigned PieceSize = E->getBitPieceSize();
-  unsigned PieceOffset = E->getBitPieceOffset();
-  Assert(PieceSize + PieceOffset <= VarSize,
-         "piece is larger than or outside of variable", &I, V, E);
-  Assert(PieceSize != VarSize, "piece covers entire variable", &I, V, E);
+  unsigned FragSize = Fragment->SizeInBits;
+  unsigned FragOffset = Fragment->OffsetInBits;
+  AssertDI(FragSize + FragOffset <= VarSize,
+         "fragment is larger than or outside of variable", &I, V, E);
+  AssertDI(FragSize != VarSize, "fragment covers entire variable", &I, V, E);
 }
 
 void Verifier::verifyCompileUnits() {
-  auto *CUs = M->getNamedMetadata("llvm.dbg.cu");
+  auto *CUs = M.getNamedMetadata("llvm.dbg.cu");
   SmallPtrSet<const Metadata *, 2> Listed;
   if (CUs)
     Listed.insert(CUs->op_begin(), CUs->op_end());
-  Assert(
-      std::all_of(CUVisited.begin(), CUVisited.end(),
-                  [&Listed](const Metadata *CU) { return Listed.count(CU); }),
+  AssertDI(
+      all_of(CUVisited,
+             [&Listed](const Metadata *CU) { return Listed.count(CU); }),
       "All DICompileUnits must be listed in llvm.dbg.cu");
   CUVisited.clear();
 }
@@ -4285,7 +4431,7 @@ bool llvm::verifyFunction(const Function &f, raw_ostream *OS) {
   Function &F = const_cast<Function &>(f);
 
   // Don't use a raw_null_ostream.  Printing IR is expensive.
-  Verifier V(OS, /*ShouldTreatBrokenDebugInfoAsError=*/true);
+  Verifier V(OS, /*ShouldTreatBrokenDebugInfoAsError=*/true, *f.getParent());
 
   // Note that this function's return value is inverted from what you would
   // expect of a function called "verify".
@@ -4295,13 +4441,13 @@ bool llvm::verifyFunction(const Function &f, raw_ostream *OS) {
 bool llvm::verifyModule(const Module &M, raw_ostream *OS,
                         bool *BrokenDebugInfo) {
   // Don't use a raw_null_ostream.  Printing IR is expensive.
-  Verifier V(OS, /*ShouldTreatBrokenDebugInfoAsError=*/!BrokenDebugInfo);
+  Verifier V(OS, /*ShouldTreatBrokenDebugInfoAsError=*/!BrokenDebugInfo, M);
 
   bool Broken = false;
   for (const Function &F : M)
     Broken |= !V.verify(F);
 
-  Broken |= !V.verify(M);
+  Broken |= !V.verify();
   if (BrokenDebugInfo)
     *BrokenDebugInfo = V.hasBrokenDebugInfo();
   // Note that this function's return value is inverted from what you would
@@ -4310,26 +4456,30 @@ bool llvm::verifyModule(const Module &M, raw_ostream *OS,
 }
 
 namespace {
+
 struct VerifierLegacyPass : public FunctionPass {
   static char ID;
 
-  Verifier V;
+  std::unique_ptr<Verifier> V;
   bool FatalErrors = true;
 
-  VerifierLegacyPass()
-      : FunctionPass(ID),
-        V(&dbgs(), /*ShouldTreatBrokenDebugInfoAsError=*/false) {
+  VerifierLegacyPass() : FunctionPass(ID) {
     initializeVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
   }
   explicit VerifierLegacyPass(bool FatalErrors)
       : FunctionPass(ID),
-        V(&dbgs(), /*ShouldTreatBrokenDebugInfoAsError=*/false),
         FatalErrors(FatalErrors) {
     initializeVerifierLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
+  bool doInitialization(Module &M) override {
+    V = llvm::make_unique<Verifier>(
+        &dbgs(), /*ShouldTreatBrokenDebugInfoAsError=*/false, M);
+    return false;
+  }
+
   bool runOnFunction(Function &F) override {
-    if (!V.verify(F) && FatalErrors)
+    if (!V->verify(F) && FatalErrors)
       report_fatal_error("Broken function found, compilation aborted!");
 
     return false;
@@ -4339,17 +4489,17 @@ struct VerifierLegacyPass : public FunctionPass {
     bool HasErrors = false;
     for (Function &F : M)
       if (F.isDeclaration())
-        HasErrors |= !V.verify(F);
+        HasErrors |= !V->verify(F);
 
-    HasErrors |= !V.verify(M);
+    HasErrors |= !V->verify();
     if (FatalErrors) {
       if (HasErrors)
         report_fatal_error("Broken module found, compilation aborted!");
-      assert(!V.hasBrokenDebugInfo() && "Module contains invalid debug info");
+      assert(!V->hasBrokenDebugInfo() && "Module contains invalid debug info");
     }
 
     // Strip broken debug info.
-    if (V.hasBrokenDebugInfo()) {
+    if (V->hasBrokenDebugInfo()) {
       DiagnosticInfoIgnoringInvalidDebugMetadata DiagInvalid(M);
       M.getContext().diagnose(DiagInvalid);
       if (!StripDebugInfo(M))
@@ -4362,6 +4512,277 @@ struct VerifierLegacyPass : public FunctionPass {
     AU.setPreservesAll();
   }
 };
+
+} // end anonymous namespace
+
+/// Helper to issue failure from the TBAA verification
+template <typename... Tys> void TBAAVerifier::CheckFailed(Tys &&... Args) {
+  if (Diagnostic)
+    return Diagnostic->CheckFailed(Args...);
+}
+
+#define AssertTBAA(C, ...)                                                     \
+  do {                                                                         \
+    if (!(C)) {                                                                \
+      CheckFailed(__VA_ARGS__);                                                \
+      return false;                                                            \
+    }                                                                          \
+  } while (false)
+
+/// Verify that \p BaseNode can be used as the "base type" in the struct-path
+/// TBAA scheme.  This means \p BaseNode is either a scalar node, or a
+/// struct-type node describing an aggregate data structure (like a struct).
+TBAAVerifier::TBAABaseNodeSummary
+TBAAVerifier::verifyTBAABaseNode(Instruction &I, const MDNode *BaseNode) {
+  if (BaseNode->getNumOperands() < 2) {
+    CheckFailed("Base nodes must have at least two operands", &I, BaseNode);
+    return {true, ~0u};
+  }
+
+  auto Itr = TBAABaseNodes.find(BaseNode);
+  if (Itr != TBAABaseNodes.end())
+    return Itr->second;
+
+  auto Result = verifyTBAABaseNodeImpl(I, BaseNode);
+  auto InsertResult = TBAABaseNodes.insert({BaseNode, Result});
+  (void)InsertResult;
+  assert(InsertResult.second && "We just checked!");
+  return Result;
+}
+
+TBAAVerifier::TBAABaseNodeSummary
+TBAAVerifier::verifyTBAABaseNodeImpl(Instruction &I, const MDNode *BaseNode) {
+  const TBAAVerifier::TBAABaseNodeSummary InvalidNode = {true, ~0u};
+
+  if (BaseNode->getNumOperands() == 2) {
+    // Scalar nodes can only be accessed at offset 0.
+    return isValidScalarTBAANode(BaseNode)
+               ? TBAAVerifier::TBAABaseNodeSummary({false, 0})
+               : InvalidNode;
+  }
+
+  if (BaseNode->getNumOperands() % 2 != 1) {
+    CheckFailed("Struct tag nodes must have an odd number of operands!",
+                BaseNode);
+    return InvalidNode;
+  }
+
+  if (!isa<MDString>(BaseNode->getOperand(0))) {
+    CheckFailed("Struct tag nodes have a string as their first operand",
+                BaseNode);
+    return InvalidNode;
+  }
+
+  bool Failed = false;
+
+  Optional<APInt> PrevOffset;
+  unsigned BitWidth = ~0u;
+
+  // We've already checked that BaseNode is not a degenerate root node with one
+  // operand in \c verifyTBAABaseNode, so this loop should run at least once.
+  for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) {
+    const MDOperand &FieldTy = BaseNode->getOperand(Idx);
+    const MDOperand &FieldOffset = BaseNode->getOperand(Idx + 1);
+    if (!isa<MDNode>(FieldTy)) {
+      CheckFailed("Incorrect field entry in struct type node!", &I, BaseNode);
+      Failed = true;
+      continue;
+    }
+
+    auto *OffsetEntryCI =
+        mdconst::dyn_extract_or_null<ConstantInt>(FieldOffset);
+    if (!OffsetEntryCI) {
+      CheckFailed("Offset entries must be constants!", &I, BaseNode);
+      Failed = true;
+      continue;
+    }
+
+    if (BitWidth == ~0u)
+      BitWidth = OffsetEntryCI->getBitWidth();
+
+    if (OffsetEntryCI->getBitWidth() != BitWidth) {
+      CheckFailed(
+          "Bitwidth between the offsets and struct type entries must match", &I,
+          BaseNode);
+      Failed = true;
+      continue;
+    }
+
+    // NB! As far as I can tell, we generate a non-strictly increasing offset
+    // sequence only from structs that have zero size bit fields.  When
+    // recursing into a contained struct in \c getFieldNodeFromTBAABaseNode we
+    // pick the field lexically the latest in struct type metadata node.  This
+    // mirrors the actual behavior of the alias analysis implementation.
+    bool IsAscending =
+        !PrevOffset || PrevOffset->ule(OffsetEntryCI->getValue());
+
+    if (!IsAscending) {
+      CheckFailed("Offsets must be increasing!", &I, BaseNode);
+      Failed = true;
+    }
+
+    PrevOffset = OffsetEntryCI->getValue();
+  }
+
+  return Failed ? InvalidNode
+                : TBAAVerifier::TBAABaseNodeSummary(false, BitWidth);
+}
+
+static bool IsRootTBAANode(const MDNode *MD) {
+  return MD->getNumOperands() < 2;
+}
+
+static bool IsScalarTBAANodeImpl(const MDNode *MD,
+                                 SmallPtrSetImpl<const MDNode *> &Visited) {
+  if (MD->getNumOperands() != 2 && MD->getNumOperands() != 3)
+    return false;
+
+  if (!isa<MDString>(MD->getOperand(0)))
+    return false;
+
+  if (MD->getNumOperands() == 3) {
+    auto *Offset = mdconst::dyn_extract<ConstantInt>(MD->getOperand(2));
+    if (!(Offset && Offset->isZero() && isa<MDString>(MD->getOperand(0))))
+      return false;
+  }
+
+  auto *Parent = dyn_cast_or_null<MDNode>(MD->getOperand(1));
+  return Parent && Visited.insert(Parent).second &&
+         (IsRootTBAANode(Parent) || IsScalarTBAANodeImpl(Parent, Visited));
+}
+
+bool TBAAVerifier::isValidScalarTBAANode(const MDNode *MD) {
+  auto ResultIt = TBAAScalarNodes.find(MD);
+  if (ResultIt != TBAAScalarNodes.end())
+    return ResultIt->second;
+
+  SmallPtrSet<const MDNode *, 4> Visited;
+  bool Result = IsScalarTBAANodeImpl(MD, Visited);
+  auto InsertResult = TBAAScalarNodes.insert({MD, Result});
+  (void)InsertResult;
+  assert(InsertResult.second && "Just checked!");
+
+  return Result;
+}
+
+/// Returns the field node at the offset \p Offset in \p BaseNode.  Update \p
+/// Offset in place to be the offset within the field node returned.
+///
+/// We assume we've okayed \p BaseNode via \c verifyTBAABaseNode.
+MDNode *TBAAVerifier::getFieldNodeFromTBAABaseNode(Instruction &I,
+                                                   const MDNode *BaseNode,
+                                                   APInt &Offset) {
+  assert(BaseNode->getNumOperands() >= 2 && "Invalid base node!");
+
+  // Scalar nodes have only one possible "field" -- their parent in the access
+  // hierarchy.  Offset must be zero at this point, but our caller is supposed
+  // to Assert that.
+  if (BaseNode->getNumOperands() == 2)
+    return cast<MDNode>(BaseNode->getOperand(1));
+
+  for (unsigned Idx = 1; Idx < BaseNode->getNumOperands(); Idx += 2) {
+    auto *OffsetEntryCI =
+        mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx + 1));
+    if (OffsetEntryCI->getValue().ugt(Offset)) {
+      if (Idx == 1) {
+        CheckFailed("Could not find TBAA parent in struct type node", &I,
+                    BaseNode, &Offset);
+        return nullptr;
+      }
+
+      auto *PrevOffsetEntryCI =
+          mdconst::extract<ConstantInt>(BaseNode->getOperand(Idx - 1));
+      Offset -= PrevOffsetEntryCI->getValue();
+      return cast<MDNode>(BaseNode->getOperand(Idx - 2));
+    }
+  }
+
+  auto *LastOffsetEntryCI = mdconst::extract<ConstantInt>(
+      BaseNode->getOperand(BaseNode->getNumOperands() - 1));
+
+  Offset -= LastOffsetEntryCI->getValue();
+  return cast<MDNode>(BaseNode->getOperand(BaseNode->getNumOperands() - 2));
+}
+
+bool TBAAVerifier::visitTBAAMetadata(Instruction &I, const MDNode *MD) {
+  AssertTBAA(isa<LoadInst>(I) || isa<StoreInst>(I) || isa<CallInst>(I) ||
+                 isa<VAArgInst>(I) || isa<AtomicRMWInst>(I) ||
+                 isa<AtomicCmpXchgInst>(I),
+             "TBAA is only for loads, stores and calls!", &I);
+
+  bool IsStructPathTBAA =
+      isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
+
+  AssertTBAA(
+      IsStructPathTBAA,
+      "Old-style TBAA is no longer allowed, use struct-path TBAA instead", &I);
+
+  AssertTBAA(MD->getNumOperands() < 5,
+             "Struct tag metadata must have either 3 or 4 operands", &I, MD);
+
+  MDNode *BaseNode = dyn_cast_or_null<MDNode>(MD->getOperand(0));
+  MDNode *AccessType = dyn_cast_or_null<MDNode>(MD->getOperand(1));
+
+  if (MD->getNumOperands() == 4) {
+    auto *IsImmutableCI =
+        mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(3));
+    AssertTBAA(IsImmutableCI,
+               "Immutability tag on struct tag metadata must be a constant", &I,
+               MD);
+    AssertTBAA(
+        IsImmutableCI->isZero() || IsImmutableCI->isOne(),
+        "Immutability part of the struct tag metadata must be either 0 or 1",
+        &I, MD);
+  }
+
+  AssertTBAA(BaseNode && AccessType,
+             "Malformed struct tag metadata:  base and access-type "
+             "should be non-null and point to Metadata nodes",
+             &I, MD, BaseNode, AccessType);
+
+  AssertTBAA(isValidScalarTBAANode(AccessType),
+             "Access type node must be a valid scalar type", &I, MD,
+             AccessType);
+
+  auto *OffsetCI = mdconst::dyn_extract_or_null<ConstantInt>(MD->getOperand(2));
+  AssertTBAA(OffsetCI, "Offset must be constant integer", &I, MD);
+
+  APInt Offset = OffsetCI->getValue();
+  bool SeenAccessTypeInPath = false;
+
+  SmallPtrSet<MDNode *, 4> StructPath;
+
+  for (/* empty */; BaseNode && !IsRootTBAANode(BaseNode);
+       BaseNode = getFieldNodeFromTBAABaseNode(I, BaseNode, Offset)) {
+    if (!StructPath.insert(BaseNode).second) {
+      CheckFailed("Cycle detected in struct path", &I, MD);
+      return false;
+    }
+
+    bool Invalid;
+    unsigned BaseNodeBitWidth;
+    std::tie(Invalid, BaseNodeBitWidth) = verifyTBAABaseNode(I, BaseNode);
+
+    // If the base node is invalid in itself, then we've already printed all the
+    // errors we wanted to print.
+    if (Invalid)
+      return false;
+
+    SeenAccessTypeInPath |= BaseNode == AccessType;
+
+    if (isValidScalarTBAANode(BaseNode) || BaseNode == AccessType)
+      AssertTBAA(Offset == 0, "Offset not zero at the point of scalar access",
+                 &I, MD, &Offset);
+
+    AssertTBAA(BaseNodeBitWidth == Offset.getBitWidth() ||
+                   (BaseNodeBitWidth == 0 && Offset == 0),
+               "Access bit-width not the same as description bit-width", &I, MD,
+               BaseNodeBitWidth, Offset.getBitWidth());
+  }
+
+  AssertTBAA(SeenAccessTypeInPath, "Did not see access type in access path!",
+             &I, MD);
+  return true;
 }
 
 char VerifierLegacyPass::ID = 0;
@@ -4371,7 +4792,7 @@ FunctionPass *llvm::createVerifierPass(bool FatalErrors) {
   return new VerifierLegacyPass(FatalErrors);
 }
 
-char VerifierAnalysis::PassID;
+AnalysisKey VerifierAnalysis::Key;
 VerifierAnalysis::Result VerifierAnalysis::run(Module &M,
                                                ModuleAnalysisManager &) {
   Result Res;
diff --git a/contrib/llvm/lib/IRReader/IRReader.cpp b/contrib/llvm/lib/IRReader/IRReader.cpp
index 9b243fc571d0..ba587ced7182 100644
--- a/contrib/llvm/lib/IRReader/IRReader.cpp
+++ b/contrib/llvm/lib/IRReader/IRReader.cpp
@@ -11,7 +11,7 @@
 #include "llvm-c/Core.h"
 #include "llvm-c/IRReader.h"
 #include "llvm/AsmParser/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -26,19 +26,23 @@ namespace llvm {
   extern bool TimePassesIsEnabled;
 }
 
-static const char *const TimeIRParsingGroupName = "LLVM IR Parsing";
-static const char *const TimeIRParsingName = "Parse IR";
+static const char *const TimeIRParsingGroupName = "irparse";
+static const char *const TimeIRParsingGroupDescription = "LLVM IR Parsing";
+static const char *const TimeIRParsingName = "parse";
+static const char *const TimeIRParsingDescription = "Parse IR";
 
 static std::unique_ptr<Module>
 getLazyIRModule(std::unique_ptr<MemoryBuffer> Buffer, SMDiagnostic &Err,
                 LLVMContext &Context, bool ShouldLazyLoadMetadata) {
   if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
                 (const unsigned char *)Buffer->getBufferEnd())) {
-    ErrorOr<std::unique_ptr<Module>> ModuleOrErr = getLazyBitcodeModule(
+    Expected<std::unique_ptr<Module>> ModuleOrErr = getOwningLazyBitcodeModule(
         std::move(Buffer), Context, ShouldLazyLoadMetadata);
-    if (std::error_code EC = ModuleOrErr.getError()) {
-      Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                         EC.message());
+    if (Error E = ModuleOrErr.takeError()) {
+      handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                           EIB.message());
+      });
       return nullptr;
     }
     return std::move(ModuleOrErr.get());
@@ -65,15 +69,18 @@ std::unique_ptr<Module> llvm::getLazyIRFileModule(StringRef Filename,
 
 std::unique_ptr<Module> llvm::parseIR(MemoryBufferRef Buffer, SMDiagnostic &Err,
                                       LLVMContext &Context) {
-  NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
+  NamedRegionTimer T(TimeIRParsingName, TimeIRParsingDescription,
+                     TimeIRParsingGroupName, TimeIRParsingGroupDescription,
                      TimePassesIsEnabled);
   if (isBitcode((const unsigned char *)Buffer.getBufferStart(),
                 (const unsigned char *)Buffer.getBufferEnd())) {
-    ErrorOr<std::unique_ptr<Module>> ModuleOrErr =
+    Expected<std::unique_ptr<Module>> ModuleOrErr =
         parseBitcodeFile(Buffer, Context);
-    if (std::error_code EC = ModuleOrErr.getError()) {
-      Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
-                         EC.message());
+    if (Error E = ModuleOrErr.takeError()) {
+      handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+        Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
+                           EIB.message());
+      });
       return nullptr;
     }
     return std::move(ModuleOrErr.get());
diff --git a/contrib/llvm/lib/LTO/Caching.cpp b/contrib/llvm/lib/LTO/Caching.cpp
new file mode 100644
index 000000000000..fd5bdb0bc01a
--- /dev/null
+++ b/contrib/llvm/lib/LTO/Caching.cpp
@@ -0,0 +1,99 @@
+//===-Caching.cpp - LLVM Link Time Optimizer Cache Handling ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Caching for ThinLTO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/Caching.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::lto;
+
+static void commitEntry(StringRef TempFilename, StringRef EntryPath) {
+  // Rename to final destination (hopefully race condition won't matter here)
+  auto EC = sys::fs::rename(TempFilename, EntryPath);
+  if (EC) {
+    // Renaming failed, probably not the same filesystem, copy and delete.
+    // FIXME: Avoid needing to do this by creating the temporary file in the
+    // cache directory.
+    {
+      auto ReloadedBufferOrErr = MemoryBuffer::getFile(TempFilename);
+      if (auto EC = ReloadedBufferOrErr.getError())
+        report_fatal_error(Twine("Failed to open temp file '") + TempFilename +
+                           "': " + EC.message() + "\n");
+
+      raw_fd_ostream OS(EntryPath, EC, sys::fs::F_None);
+      if (EC)
+        report_fatal_error(Twine("Failed to open ") + EntryPath +
+                           " to save cached entry\n");
+      // I'm not sure what are the guarantee if two processes are doing this
+      // at the same time.
+      OS << (*ReloadedBufferOrErr)->getBuffer();
+    }
+    sys::fs::remove(TempFilename);
+  }
+}
+
+NativeObjectCache lto::localCache(std::string CacheDirectoryPath,
+                                  AddFileFn AddFile) {
+  return [=](unsigned Task, StringRef Key) -> AddStreamFn {
+    // First, see if we have a cache hit.
+    SmallString<64> EntryPath;
+    sys::path::append(EntryPath, CacheDirectoryPath, Key);
+    if (sys::fs::exists(EntryPath)) {
+      AddFile(Task, EntryPath);
+      return AddStreamFn();
+    }
+
+    // This native object stream is responsible for commiting the resulting
+    // file to the cache and calling AddFile to add it to the link.
+    struct CacheStream : NativeObjectStream {
+      AddFileFn AddFile;
+      std::string TempFilename;
+      std::string EntryPath;
+      unsigned Task;
+
+      CacheStream(std::unique_ptr<raw_pwrite_stream> OS, AddFileFn AddFile,
+                  std::string TempFilename, std::string EntryPath,
+                  unsigned Task)
+          : NativeObjectStream(std::move(OS)), AddFile(AddFile),
+            TempFilename(TempFilename), EntryPath(EntryPath), Task(Task) {}
+
+      ~CacheStream() {
+        // Make sure the file is closed before committing it.
+        OS.reset();
+        commitEntry(TempFilename, EntryPath);
+        AddFile(Task, EntryPath);
+      }
+    };
+
+    return [=](size_t Task) -> std::unique_ptr<NativeObjectStream> {
+      // Write to a temporary to avoid race condition
+      int TempFD;
+      SmallString<64> TempFilename;
+      std::error_code EC =
+          sys::fs::createTemporaryFile("Thin", "tmp.o", TempFD, TempFilename);
+      if (EC) {
+        errs() << "Error: " << EC.message() << "\n";
+        report_fatal_error("ThinLTO: Can't get a temporary file");
+      }
+
+      // This CacheStream will move the temporary file into the cache when done.
+      return llvm::make_unique<CacheStream>(
+          llvm::make_unique<raw_fd_ostream>(TempFD, /* ShouldClose */ true),
+          AddFile, TempFilename.str(), EntryPath.str(), Task);
+    };
+  };
+}
diff --git a/contrib/llvm/lib/LTO/LTO.cpp b/contrib/llvm/lib/LTO/LTO.cpp
index 10226c4a3ff6..7364f0e0cd31 100644
--- a/contrib/llvm/lib/LTO/LTO.cpp
+++ b/contrib/llvm/lib/LTO/LTO.cpp
@@ -12,32 +12,130 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/LTO/LTO.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/CodeGen/Analysis.h"
+#include "llvm/IR/AutoUpgrade.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/LTO/LTOBackend.h"
+#include "llvm/Linker/IRMover.h"
+#include "llvm/Object/ModuleSummaryIndexObjectFile.h"
+#include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
 
-namespace llvm {
+#include <set>
 
-// Simple helper to load a module from bitcode
-std::unique_ptr<Module> loadModuleFromBuffer(const MemoryBufferRef &Buffer,
-                                             LLVMContext &Context, bool Lazy) {
-  SMDiagnostic Err;
-  ErrorOr<std::unique_ptr<Module>> ModuleOrErr(nullptr);
-  if (Lazy) {
-    ModuleOrErr =
-        getLazyBitcodeModule(MemoryBuffer::getMemBuffer(Buffer, false), Context,
-                             /* ShouldLazyLoadMetadata */ Lazy);
-  } else {
-    ModuleOrErr = parseBitcodeFile(Buffer, Context);
+using namespace llvm;
+using namespace lto;
+using namespace object;
+
+#define DEBUG_TYPE "lto"
+
+// Returns a unique hash for the Module considering the current list of
+// export/import and other global analysis results.
+// The hash is produced in \p Key.
+static void computeCacheKey(
+    SmallString<40> &Key, const Config &Conf, const ModuleSummaryIndex &Index,
+    StringRef ModuleID, const FunctionImporter::ImportMapTy &ImportList,
+    const FunctionImporter::ExportSetTy &ExportList,
+    const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+    const GVSummaryMapTy &DefinedGlobals) {
+  // Compute the unique hash for this entry.
+  // This is based on the current compiler version, the module itself, the
+  // export list, the hash for every single module in the import list, the
+  // list of ResolvedODR for the module, and the list of preserved symbols.
+  SHA1 Hasher;
+
+  // Start with the compiler revision
+  Hasher.update(LLVM_VERSION_STRING);
+#ifdef HAVE_LLVM_REVISION
+  Hasher.update(LLVM_REVISION);
+#endif
+
+  // Include the parts of the LTO configuration that affect code generation.
+  auto AddString = [&](StringRef Str) {
+    Hasher.update(Str);
+    Hasher.update(ArrayRef<uint8_t>{0});
+  };
+  auto AddUnsigned = [&](unsigned I) {
+    uint8_t Data[4];
+    Data[0] = I;
+    Data[1] = I >> 8;
+    Data[2] = I >> 16;
+    Data[3] = I >> 24;
+    Hasher.update(ArrayRef<uint8_t>{Data, 4});
+  };
+  AddString(Conf.CPU);
+  // FIXME: Hash more of Options. For now all clients initialize Options from
+  // command-line flags (which is unsupported in production), but may set
+  // RelaxELFRelocations. The clang driver can also pass FunctionSections,
+  // DataSections and DebuggerTuning via command line flags.
+  AddUnsigned(Conf.Options.RelaxELFRelocations);
+  AddUnsigned(Conf.Options.FunctionSections);
+  AddUnsigned(Conf.Options.DataSections);
+  AddUnsigned((unsigned)Conf.Options.DebuggerTuning);
+  for (auto &A : Conf.MAttrs)
+    AddString(A);
+  AddUnsigned(Conf.RelocModel);
+  AddUnsigned(Conf.CodeModel);
+  AddUnsigned(Conf.CGOptLevel);
+  AddUnsigned(Conf.OptLevel);
+  AddString(Conf.OptPipeline);
+  AddString(Conf.AAPipeline);
+  AddString(Conf.OverrideTriple);
+  AddString(Conf.DefaultTriple);
+
+  // Include the hash for the current module
+  auto ModHash = Index.getModuleHash(ModuleID);
+  Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+  for (auto F : ExportList)
+    // The export list can impact the internalization, be conservative here
+    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&F, sizeof(F)));
+
+  // Include the hash for every module we import functions from
+  for (auto &Entry : ImportList) {
+    auto ModHash = Index.getModuleHash(Entry.first());
+    Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
+  }
+
+  // Include the hash for the resolved ODR.
+  for (auto &Entry : ResolvedODR) {
+    Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.first,
+                                    sizeof(GlobalValue::GUID)));
+    Hasher.update(ArrayRef<uint8_t>((const uint8_t *)&Entry.second,
+                                    sizeof(GlobalValue::LinkageTypes)));
+  }
+
+  // Include the hash for the linkage type to reflect internalization and weak
+  // resolution.
+  for (auto &GS : DefinedGlobals) {
+    GlobalValue::LinkageTypes Linkage = GS.second->linkage();
+    Hasher.update(
+        ArrayRef<uint8_t>((const uint8_t *)&Linkage, sizeof(Linkage)));
   }
-  if (std::error_code EC = ModuleOrErr.getError()) {
-    Err = SMDiagnostic(Buffer.getBufferIdentifier(), SourceMgr::DK_Error,
-                       EC.message());
-    Err.print("ThinLTO", errs());
-    report_fatal_error("Can't load module, abort.");
+
+  if (!Conf.SampleProfile.empty()) {
+    auto FileOrErr = MemoryBuffer::getFile(Conf.SampleProfile);
+    if (FileOrErr)
+      Hasher.update(FileOrErr.get()->getBuffer());
   }
-  return std::move(ModuleOrErr.get());
+
+  Key = toHex(Hasher.result());
 }
 
 static void thinLTOResolveWeakForLinkerGUID(
@@ -48,20 +146,25 @@ static void thinLTOResolveWeakForLinkerGUID(
     function_ref<void(StringRef, GlobalValue::GUID, GlobalValue::LinkageTypes)>
         recordNewLinkage) {
   for (auto &S : GVSummaryList) {
-    if (GlobalInvolvedWithAlias.count(S.get()))
-      continue;
     GlobalValue::LinkageTypes OriginalLinkage = S->linkage();
     if (!GlobalValue::isWeakForLinker(OriginalLinkage))
       continue;
     // We need to emit only one of these. The prevailing module will keep it,
     // but turned into a weak, while the others will drop it when possible.
+    // This is both a compile-time optimization and a correctness
+    // transformation. This is necessary for correctness when we have exported
+    // a reference - we need to convert the linkonce to weak to
+    // ensure a copy is kept to satisfy the exported reference.
+    // FIXME: We may want to split the compile time and correctness
+    // aspects into separate routines.
     if (isPrevailing(GUID, S.get())) {
       if (GlobalValue::isLinkOnceLinkage(OriginalLinkage))
         S->setLinkage(GlobalValue::getWeakLinkage(
             GlobalValue::isLinkOnceODRLinkage(OriginalLinkage)));
     }
-    // Alias can't be turned into available_externally.
+    // Alias and aliasee can't be turned into available_externally.
     else if (!isa<AliasSummary>(S.get()) &&
+             !GlobalInvolvedWithAlias.count(S.get()) &&
              (GlobalValue::isLinkOnceODRLinkage(OriginalLinkage) ||
               GlobalValue::isWeakODRLinkage(OriginalLinkage)))
       S->setLinkage(GlobalValue::AvailableExternallyLinkage);
@@ -76,7 +179,7 @@ static void thinLTOResolveWeakForLinkerGUID(
 // current module. However there is a chance that another module is still
 // referencing them because of the import. We make sure we always emit at least
 // one copy.
-void thinLTOResolveWeakForLinkerInIndex(
+void llvm::thinLTOResolveWeakForLinkerInIndex(
     ModuleSummaryIndex &Index,
     function_ref<bool(GlobalValue::GUID, const GlobalValueSummary *)>
         isPrevailing,
@@ -110,10 +213,702 @@ static void thinLTOInternalizeAndPromoteGUID(
 
 // Update the linkages in the given \p Index to mark exported values
 // as external and non-exported values as internal.
-void thinLTOInternalizeAndPromoteInIndex(
+void llvm::thinLTOInternalizeAndPromoteInIndex(
     ModuleSummaryIndex &Index,
     function_ref<bool(StringRef, GlobalValue::GUID)> isExported) {
   for (auto &I : Index)
     thinLTOInternalizeAndPromoteGUID(I.second, I.first, isExported);
 }
+
+struct InputFile::InputModule {
+  BitcodeModule BM;
+  std::unique_ptr<Module> Mod;
+
+  // The range of ModuleSymbolTable entries for this input module.
+  size_t SymBegin, SymEnd;
+};
+
+// Requires a destructor for std::vector<InputModule>.
+InputFile::~InputFile() = default;
+
+Expected<std::unique_ptr<InputFile>> InputFile::create(MemoryBufferRef Object) {
+  std::unique_ptr<InputFile> File(new InputFile);
+
+  ErrorOr<MemoryBufferRef> BCOrErr =
+      IRObjectFile::findBitcodeInMemBuffer(Object);
+  if (!BCOrErr)
+    return errorCodeToError(BCOrErr.getError());
+
+  Expected<std::vector<BitcodeModule>> BMsOrErr =
+      getBitcodeModuleList(*BCOrErr);
+  if (!BMsOrErr)
+    return BMsOrErr.takeError();
+
+  if (BMsOrErr->empty())
+    return make_error<StringError>("Bitcode file does not contain any modules",
+                                   inconvertibleErrorCode());
+
+  // Create an InputModule for each module in the InputFile, and add it to the
+  // ModuleSymbolTable.
+  for (auto BM : *BMsOrErr) {
+    Expected<std::unique_ptr<Module>> MOrErr =
+        BM.getLazyModule(File->Ctx, /*ShouldLazyLoadMetadata*/ true,
+                         /*IsImporting*/ false);
+    if (!MOrErr)
+      return MOrErr.takeError();
+
+    size_t SymBegin = File->SymTab.symbols().size();
+    File->SymTab.addModule(MOrErr->get());
+    size_t SymEnd = File->SymTab.symbols().size();
+
+    for (const auto &C : (*MOrErr)->getComdatSymbolTable()) {
+      auto P = File->ComdatMap.insert(
+          std::make_pair(&C.second, File->Comdats.size()));
+      assert(P.second);
+      (void)P;
+      File->Comdats.push_back(C.first());
+    }
+
+    File->Mods.push_back({BM, std::move(*MOrErr), SymBegin, SymEnd});
+  }
+
+  return std::move(File);
+}
+
+Expected<int> InputFile::Symbol::getComdatIndex() const {
+  if (!isGV())
+    return -1;
+  const GlobalObject *GO = getGV()->getBaseObject();
+  if (!GO)
+    return make_error<StringError>("Unable to determine comdat of alias!",
+                                   inconvertibleErrorCode());
+  if (const Comdat *C = GO->getComdat()) {
+    auto I = File->ComdatMap.find(C);
+    assert(I != File->ComdatMap.end());
+    return I->second;
+  }
+  return -1;
+}
+
+StringRef InputFile::getName() const {
+  return Mods[0].BM.getModuleIdentifier();
+}
+
+StringRef InputFile::getSourceFileName() const {
+  return Mods[0].Mod->getSourceFileName();
+}
+
+iterator_range<InputFile::symbol_iterator>
+InputFile::module_symbols(InputModule &IM) {
+  return llvm::make_range(
+      symbol_iterator(SymTab.symbols().data() + IM.SymBegin, SymTab, this),
+      symbol_iterator(SymTab.symbols().data() + IM.SymEnd, SymTab, this));
+}
+
+LTO::RegularLTOState::RegularLTOState(unsigned ParallelCodeGenParallelismLevel,
+                                      Config &Conf)
+    : ParallelCodeGenParallelismLevel(ParallelCodeGenParallelismLevel),
+      Ctx(Conf) {}
+
+LTO::ThinLTOState::ThinLTOState(ThinBackend Backend) : Backend(Backend) {
+  if (!Backend)
+    this->Backend =
+        createInProcessThinBackend(llvm::heavyweight_hardware_concurrency());
+}
+
+LTO::LTO(Config Conf, ThinBackend Backend,
+         unsigned ParallelCodeGenParallelismLevel)
+    : Conf(std::move(Conf)),
+      RegularLTO(ParallelCodeGenParallelismLevel, this->Conf),
+      ThinLTO(std::move(Backend)) {}
+
+// Requires a destructor for MapVector<BitcodeModule>.
+LTO::~LTO() = default;
+
+// Add the given symbol to the GlobalResolutions map, and resolve its partition.
+void LTO::addSymbolToGlobalRes(SmallPtrSet<GlobalValue *, 8> &Used,
+                               const InputFile::Symbol &Sym,
+                               SymbolResolution Res, unsigned Partition) {
+  GlobalValue *GV = Sym.isGV() ? Sym.getGV() : nullptr;
+
+  auto &GlobalRes = GlobalResolutions[Sym.getName()];
+  if (GV) {
+    GlobalRes.UnnamedAddr &= GV->hasGlobalUnnamedAddr();
+    if (Res.Prevailing)
+      GlobalRes.IRName = GV->getName();
+  }
+  if (Res.VisibleToRegularObj || (GV && Used.count(GV)) ||
+      (GlobalRes.Partition != GlobalResolution::Unknown &&
+       GlobalRes.Partition != Partition))
+    GlobalRes.Partition = GlobalResolution::External;
+  else
+    GlobalRes.Partition = Partition;
+}
+
+static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
+                                  ArrayRef<SymbolResolution> Res) {
+  StringRef Path = Input->getName();
+  OS << Path << '\n';
+  auto ResI = Res.begin();
+  for (const InputFile::Symbol &Sym : Input->symbols()) {
+    assert(ResI != Res.end());
+    SymbolResolution Res = *ResI++;
+
+    OS << "-r=" << Path << ',' << Sym.getName() << ',';
+    if (Res.Prevailing)
+      OS << 'p';
+    if (Res.FinalDefinitionInLinkageUnit)
+      OS << 'l';
+    if (Res.VisibleToRegularObj)
+      OS << 'x';
+    OS << '\n';
+  }
+  assert(ResI == Res.end());
+}
+
+Error LTO::add(std::unique_ptr<InputFile> Input,
+               ArrayRef<SymbolResolution> Res) {
+  assert(!CalledGetMaxTasks);
+
+  if (Conf.ResolutionFile)
+    writeToResolutionFile(*Conf.ResolutionFile, Input.get(), Res);
+
+  const SymbolResolution *ResI = Res.begin();
+  for (InputFile::InputModule &IM : Input->Mods)
+    if (Error Err = addModule(*Input, IM, ResI, Res.end()))
+      return Err;
+
+  assert(ResI == Res.end());
+  return Error::success();
+}
+
+Error LTO::addModule(InputFile &Input, InputFile::InputModule &IM,
+                     const SymbolResolution *&ResI,
+                     const SymbolResolution *ResE) {
+  // FIXME: move to backend
+  Module &M = *IM.Mod;
+
+  if (M.getDataLayoutStr().empty())
+    return make_error<StringError>("input module has no datalayout",
+                                    inconvertibleErrorCode());
+
+  if (!Conf.OverrideTriple.empty())
+    M.setTargetTriple(Conf.OverrideTriple);
+  else if (M.getTargetTriple().empty())
+    M.setTargetTriple(Conf.DefaultTriple);
+
+  Expected<bool> HasThinLTOSummary = IM.BM.hasSummary();
+  if (!HasThinLTOSummary)
+    return HasThinLTOSummary.takeError();
+
+  if (*HasThinLTOSummary)
+    return addThinLTO(IM.BM, M, Input.module_symbols(IM), ResI, ResE);
+  else
+    return addRegularLTO(IM.BM, ResI, ResE);
+}
+
+// Add a regular LTO object to the link.
+Error LTO::addRegularLTO(BitcodeModule BM, const SymbolResolution *&ResI,
+                         const SymbolResolution *ResE) {
+  if (!RegularLTO.CombinedModule) {
+    RegularLTO.CombinedModule =
+        llvm::make_unique<Module>("ld-temp.o", RegularLTO.Ctx);
+    RegularLTO.Mover = llvm::make_unique<IRMover>(*RegularLTO.CombinedModule);
+  }
+  Expected<std::unique_ptr<Module>> MOrErr =
+      BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
+                       /*IsImporting*/ false);
+  if (!MOrErr)
+    return MOrErr.takeError();
+
+  Module &M = **MOrErr;
+  if (Error Err = M.materializeMetadata())
+    return Err;
+  UpgradeDebugInfo(M);
+
+  ModuleSymbolTable SymTab;
+  SymTab.addModule(&M);
+
+  SmallPtrSet<GlobalValue *, 8> Used;
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+
+  std::vector<GlobalValue *> Keep;
+
+  for (GlobalVariable &GV : M.globals())
+    if (GV.hasAppendingLinkage())
+      Keep.push_back(&GV);
+
+  for (const InputFile::Symbol &Sym :
+       make_range(InputFile::symbol_iterator(SymTab.symbols().begin(), SymTab,
+                                             nullptr),
+                  InputFile::symbol_iterator(SymTab.symbols().end(), SymTab,
+                                             nullptr))) {
+    assert(ResI != ResE);
+    SymbolResolution Res = *ResI++;
+    addSymbolToGlobalRes(Used, Sym, Res, 0);
+
+    if (Sym.getFlags() & object::BasicSymbolRef::SF_Undefined)
+      continue;
+    if (Res.Prevailing && Sym.isGV()) {
+      GlobalValue *GV = Sym.getGV();
+      Keep.push_back(GV);
+      switch (GV->getLinkage()) {
+      default:
+        break;
+      case GlobalValue::LinkOnceAnyLinkage:
+        GV->setLinkage(GlobalValue::WeakAnyLinkage);
+        break;
+      case GlobalValue::LinkOnceODRLinkage:
+        GV->setLinkage(GlobalValue::WeakODRLinkage);
+        break;
+      }
+    }
+    // Common resolution: collect the maximum size/alignment over all commons.
+    // We also record if we see an instance of a common as prevailing, so that
+    // if none is prevailing we can ignore it later.
+    if (Sym.getFlags() & object::BasicSymbolRef::SF_Common) {
+      // FIXME: We should figure out what to do about commons defined by asm.
+      // For now they aren't reported correctly by ModuleSymbolTable.
+      auto &CommonRes = RegularLTO.Commons[Sym.getGV()->getName()];
+      CommonRes.Size = std::max(CommonRes.Size, Sym.getCommonSize());
+      CommonRes.Align = std::max(CommonRes.Align, Sym.getCommonAlignment());
+      CommonRes.Prevailing |= Res.Prevailing;
+    }
+
+    // FIXME: use proposed local attribute for FinalDefinitionInLinkageUnit.
+  }
+
+  return RegularLTO.Mover->move(std::move(*MOrErr), Keep,
+                                [](GlobalValue &, IRMover::ValueAdder) {},
+                                /* LinkModuleInlineAsm */ true,
+                                /* IsPerformingImport */ false);
+}
+
+// Add a ThinLTO object to the link.
+// FIXME: This function should not need to take as many parameters once we have
+// a bitcode symbol table.
+Error LTO::addThinLTO(BitcodeModule BM, Module &M,
+                      iterator_range<InputFile::symbol_iterator> Syms,
+                      const SymbolResolution *&ResI,
+                      const SymbolResolution *ResE) {
+  SmallPtrSet<GlobalValue *, 8> Used;
+  collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
+
+  Expected<std::unique_ptr<ModuleSummaryIndex>> SummaryOrErr = BM.getSummary();
+  if (!SummaryOrErr)
+    return SummaryOrErr.takeError();
+  ThinLTO.CombinedIndex.mergeFrom(std::move(*SummaryOrErr),
+                                  ThinLTO.ModuleMap.size());
+
+  for (const InputFile::Symbol &Sym : Syms) {
+    assert(ResI != ResE);
+    SymbolResolution Res = *ResI++;
+    addSymbolToGlobalRes(Used, Sym, Res, ThinLTO.ModuleMap.size() + 1);
+
+    if (Res.Prevailing && Sym.isGV())
+      ThinLTO.PrevailingModuleForGUID[Sym.getGV()->getGUID()] =
+          BM.getModuleIdentifier();
+  }
+
+  if (!ThinLTO.ModuleMap.insert({BM.getModuleIdentifier(), BM}).second)
+    return make_error<StringError>(
+        "Expected at most one ThinLTO module per bitcode file",
+        inconvertibleErrorCode());
+
+  return Error::success();
+}
+
+unsigned LTO::getMaxTasks() const {
+  CalledGetMaxTasks = true;
+  return RegularLTO.ParallelCodeGenParallelismLevel + ThinLTO.ModuleMap.size();
+}
+
+Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
+  // Save the status of having a regularLTO combined module, as
+  // this is needed for generating the ThinLTO Task ID, and
+  // the CombinedModule will be moved at the end of runRegularLTO.
+  bool HasRegularLTO = RegularLTO.CombinedModule != nullptr;
+  // Invoke regular LTO if there was a regular LTO module to start with.
+  if (HasRegularLTO)
+    if (auto E = runRegularLTO(AddStream))
+      return E;
+  return runThinLTO(AddStream, Cache, HasRegularLTO);
+}
+
+Error LTO::runRegularLTO(AddStreamFn AddStream) {
+  // Make sure commons have the right size/alignment: we kept the largest from
+  // all the prevailing when adding the inputs, and we apply it here.
+  const DataLayout &DL = RegularLTO.CombinedModule->getDataLayout();
+  for (auto &I : RegularLTO.Commons) {
+    if (!I.second.Prevailing)
+      // Don't do anything if no instance of this common was prevailing.
+      continue;
+    GlobalVariable *OldGV = RegularLTO.CombinedModule->getNamedGlobal(I.first);
+    if (OldGV && DL.getTypeAllocSize(OldGV->getValueType()) == I.second.Size) {
+      // Don't create a new global if the type is already correct, just make
+      // sure the alignment is correct.
+      OldGV->setAlignment(I.second.Align);
+      continue;
+    }
+    ArrayType *Ty =
+        ArrayType::get(Type::getInt8Ty(RegularLTO.Ctx), I.second.Size);
+    auto *GV = new GlobalVariable(*RegularLTO.CombinedModule, Ty, false,
+                                  GlobalValue::CommonLinkage,
+                                  ConstantAggregateZero::get(Ty), "");
+    GV->setAlignment(I.second.Align);
+    if (OldGV) {
+      OldGV->replaceAllUsesWith(ConstantExpr::getBitCast(GV, OldGV->getType()));
+      GV->takeName(OldGV);
+      OldGV->eraseFromParent();
+    } else {
+      GV->setName(I.first);
+    }
+  }
+
+  if (Conf.PreOptModuleHook &&
+      !Conf.PreOptModuleHook(0, *RegularLTO.CombinedModule))
+    return Error::success();
+
+  if (!Conf.CodeGenOnly) {
+    for (const auto &R : GlobalResolutions) {
+      if (R.second.IRName.empty())
+        continue;
+      if (R.second.Partition != 0 &&
+          R.second.Partition != GlobalResolution::External)
+        continue;
+
+      GlobalValue *GV =
+          RegularLTO.CombinedModule->getNamedValue(R.second.IRName);
+      // Ignore symbols defined in other partitions.
+      if (!GV || GV->hasLocalLinkage())
+        continue;
+      GV->setUnnamedAddr(R.second.UnnamedAddr ? GlobalValue::UnnamedAddr::Global
+                                              : GlobalValue::UnnamedAddr::None);
+      if (R.second.Partition == 0)
+        GV->setLinkage(GlobalValue::InternalLinkage);
+    }
+
+    if (Conf.PostInternalizeModuleHook &&
+        !Conf.PostInternalizeModuleHook(0, *RegularLTO.CombinedModule))
+      return Error::success();
+  }
+  return backend(Conf, AddStream, RegularLTO.ParallelCodeGenParallelismLevel,
+                 std::move(RegularLTO.CombinedModule));
+}
+
+/// This class defines the interface to the ThinLTO backend.
+class lto::ThinBackendProc {
+protected:
+  Config &Conf;
+  ModuleSummaryIndex &CombinedIndex;
+  const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries;
+
+public:
+  ThinBackendProc(Config &Conf, ModuleSummaryIndex &CombinedIndex,
+                  const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries)
+      : Conf(Conf), CombinedIndex(CombinedIndex),
+        ModuleToDefinedGVSummaries(ModuleToDefinedGVSummaries) {}
+
+  virtual ~ThinBackendProc() {}
+  virtual Error start(
+      unsigned Task, BitcodeModule BM,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
+  virtual Error wait() = 0;
+};
+
+namespace {
+class InProcessThinBackend : public ThinBackendProc {
+  ThreadPool BackendThreadPool;
+  AddStreamFn AddStream;
+  NativeObjectCache Cache;
+
+  Optional<Error> Err;
+  std::mutex ErrMu;
+
+public:
+  InProcessThinBackend(
+      Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      unsigned ThinLTOParallelismLevel,
+      const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+      AddStreamFn AddStream, NativeObjectCache Cache)
+      : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
+        BackendThreadPool(ThinLTOParallelismLevel),
+        AddStream(std::move(AddStream)), Cache(std::move(Cache)) {}
+
+  Error runThinLTOBackendThread(
+      AddStreamFn AddStream, NativeObjectCache Cache, unsigned Task,
+      BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      const GVSummaryMapTy &DefinedGlobals,
+      MapVector<StringRef, BitcodeModule> &ModuleMap) {
+    auto RunThinBackend = [&](AddStreamFn AddStream) {
+      LTOLLVMContext BackendContext(Conf);
+      Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
+      if (!MOrErr)
+        return MOrErr.takeError();
+
+      return thinBackend(Conf, Task, AddStream, **MOrErr, CombinedIndex,
+                         ImportList, DefinedGlobals, ModuleMap);
+    };
+
+    auto ModuleID = BM.getModuleIdentifier();
+
+    if (!Cache || !CombinedIndex.modulePaths().count(ModuleID) ||
+        all_of(CombinedIndex.getModuleHash(ModuleID),
+               [](uint32_t V) { return V == 0; }))
+      // Cache disabled or no entry for this module in the combined index or
+      // no module hash.
+      return RunThinBackend(AddStream);
+
+    SmallString<40> Key;
+    // The module may be cached, this helps handling it.
+    computeCacheKey(Key, Conf, CombinedIndex, ModuleID, ImportList, ExportList,
+                    ResolvedODR, DefinedGlobals);
+    if (AddStreamFn CacheAddStream = Cache(Task, Key))
+      return RunThinBackend(CacheAddStream);
+
+    return Error::success();
+  }
+
+  Error start(
+      unsigned Task, BitcodeModule BM,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+    StringRef ModulePath = BM.getModuleIdentifier();
+    assert(ModuleToDefinedGVSummaries.count(ModulePath));
+    const GVSummaryMapTy &DefinedGlobals =
+        ModuleToDefinedGVSummaries.find(ModulePath)->second;
+    BackendThreadPool.async(
+        [=](BitcodeModule BM, ModuleSummaryIndex &CombinedIndex,
+            const FunctionImporter::ImportMapTy &ImportList,
+            const FunctionImporter::ExportSetTy &ExportList,
+            const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>
+                &ResolvedODR,
+            const GVSummaryMapTy &DefinedGlobals,
+            MapVector<StringRef, BitcodeModule> &ModuleMap) {
+          Error E = runThinLTOBackendThread(
+              AddStream, Cache, Task, BM, CombinedIndex, ImportList,
+              ExportList, ResolvedODR, DefinedGlobals, ModuleMap);
+          if (E) {
+            std::unique_lock<std::mutex> L(ErrMu);
+            if (Err)
+              Err = joinErrors(std::move(*Err), std::move(E));
+            else
+              Err = std::move(E);
+          }
+        },
+        BM, std::ref(CombinedIndex), std::ref(ImportList),
+        std::ref(ExportList), std::ref(ResolvedODR), std::ref(DefinedGlobals),
+        std::ref(ModuleMap));
+    return Error::success();
+  }
+
+  Error wait() override {
+    BackendThreadPool.wait();
+    if (Err)
+      return std::move(*Err);
+    else
+      return Error::success();
+  }
+};
+} // end anonymous namespace
+
+ThinBackend lto::createInProcessThinBackend(unsigned ParallelismLevel) {
+  return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
+             const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+             AddStreamFn AddStream, NativeObjectCache Cache) {
+    return llvm::make_unique<InProcessThinBackend>(
+        Conf, CombinedIndex, ParallelismLevel, ModuleToDefinedGVSummaries,
+        AddStream, Cache);
+  };
+}
+
+// Given the original \p Path to an output file, replace any path
+// prefix matching \p OldPrefix with \p NewPrefix. Also, create the
+// resulting directory if it does not yet exist.
+std::string lto::getThinLTOOutputFile(const std::string &Path,
+                                      const std::string &OldPrefix,
+                                      const std::string &NewPrefix) {
+  if (OldPrefix.empty() && NewPrefix.empty())
+    return Path;
+  SmallString<128> NewPath(Path);
+  llvm::sys::path::replace_path_prefix(NewPath, OldPrefix, NewPrefix);
+  StringRef ParentPath = llvm::sys::path::parent_path(NewPath.str());
+  if (!ParentPath.empty()) {
+    // Make sure the new directory exists, creating it if necessary.
+    if (std::error_code EC = llvm::sys::fs::create_directories(ParentPath))
+      llvm::errs() << "warning: could not create directory '" << ParentPath
+                   << "': " << EC.message() << '\n';
+  }
+  return NewPath.str();
+}
+
+namespace {
+class WriteIndexesThinBackend : public ThinBackendProc {
+  std::string OldPrefix, NewPrefix;
+  bool ShouldEmitImportsFiles;
+
+  std::string LinkedObjectsFileName;
+  std::unique_ptr<llvm::raw_fd_ostream> LinkedObjectsFile;
+
+public:
+  WriteIndexesThinBackend(
+      Config &Conf, ModuleSummaryIndex &CombinedIndex,
+      const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+      std::string OldPrefix, std::string NewPrefix, bool ShouldEmitImportsFiles,
+      std::string LinkedObjectsFileName)
+      : ThinBackendProc(Conf, CombinedIndex, ModuleToDefinedGVSummaries),
+        OldPrefix(OldPrefix), NewPrefix(NewPrefix),
+        ShouldEmitImportsFiles(ShouldEmitImportsFiles),
+        LinkedObjectsFileName(LinkedObjectsFileName) {}
+
+  Error start(
+      unsigned Task, BitcodeModule BM,
+      const FunctionImporter::ImportMapTy &ImportList,
+      const FunctionImporter::ExportSetTy &ExportList,
+      const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
+      MapVector<StringRef, BitcodeModule> &ModuleMap) override {
+    StringRef ModulePath = BM.getModuleIdentifier();
+    std::string NewModulePath =
+        getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
+
+    std::error_code EC;
+    if (!LinkedObjectsFileName.empty()) {
+      if (!LinkedObjectsFile) {
+        LinkedObjectsFile = llvm::make_unique<raw_fd_ostream>(
+            LinkedObjectsFileName, EC, sys::fs::OpenFlags::F_None);
+        if (EC)
+          return errorCodeToError(EC);
+      }
+      *LinkedObjectsFile << NewModulePath << '\n';
+    }
+
+    std::map<std::string, GVSummaryMapTy> ModuleToSummariesForIndex;
+    gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
+                                     ImportList, ModuleToSummariesForIndex);
+
+    raw_fd_ostream OS(NewModulePath + ".thinlto.bc", EC,
+                      sys::fs::OpenFlags::F_None);
+    if (EC)
+      return errorCodeToError(EC);
+    WriteIndexToFile(CombinedIndex, OS, &ModuleToSummariesForIndex);
+
+    if (ShouldEmitImportsFiles)
+      return errorCodeToError(
+          EmitImportsFiles(ModulePath, NewModulePath + ".imports", ImportList));
+    return Error::success();
+  }
+
+  Error wait() override { return Error::success(); }
+};
+} // end anonymous namespace
+
+ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix,
+                                               std::string NewPrefix,
+                                               bool ShouldEmitImportsFiles,
+                                               std::string LinkedObjectsFile) {
+  return [=](Config &Conf, ModuleSummaryIndex &CombinedIndex,
+             const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
+             AddStreamFn AddStream, NativeObjectCache Cache) {
+    return llvm::make_unique<WriteIndexesThinBackend>(
+        Conf, CombinedIndex, ModuleToDefinedGVSummaries, OldPrefix, NewPrefix,
+        ShouldEmitImportsFiles, LinkedObjectsFile);
+  };
+}
+
+Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
+                      bool HasRegularLTO) {
+  if (ThinLTO.ModuleMap.empty())
+    return Error::success();
+
+  if (Conf.CombinedIndexHook && !Conf.CombinedIndexHook(ThinLTO.CombinedIndex))
+    return Error::success();
+
+  // Collect for each module the list of function it defines (GUID ->
+  // Summary).
+  StringMap<std::map<GlobalValue::GUID, GlobalValueSummary *>>
+      ModuleToDefinedGVSummaries(ThinLTO.ModuleMap.size());
+  ThinLTO.CombinedIndex.collectDefinedGVSummariesPerModule(
+      ModuleToDefinedGVSummaries);
+  // Create entries for any modules that didn't have any GV summaries
+  // (either they didn't have any GVs to start with, or we suppressed
+  // generation of the summaries because they e.g. had inline assembly
+  // uses that couldn't be promoted/renamed on export). This is so
+  // InProcessThinBackend::start can still launch a backend thread, which
+  // is passed the map of summaries for the module, without any special
+  // handling for this case.
+  for (auto &Mod : ThinLTO.ModuleMap)
+    if (!ModuleToDefinedGVSummaries.count(Mod.first))
+      ModuleToDefinedGVSummaries.try_emplace(Mod.first);
+
+  StringMap<FunctionImporter::ImportMapTy> ImportLists(
+      ThinLTO.ModuleMap.size());
+  StringMap<FunctionImporter::ExportSetTy> ExportLists(
+      ThinLTO.ModuleMap.size());
+  StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
+
+  if (Conf.OptLevel > 0) {
+    ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
+                             ImportLists, ExportLists);
+
+    std::set<GlobalValue::GUID> ExportedGUIDs;
+    for (auto &Res : GlobalResolutions) {
+      if (!Res.second.IRName.empty() &&
+          Res.second.Partition == GlobalResolution::External)
+        ExportedGUIDs.insert(GlobalValue::getGUID(Res.second.IRName));
+    }
+
+    auto isPrevailing = [&](GlobalValue::GUID GUID,
+                            const GlobalValueSummary *S) {
+      return ThinLTO.PrevailingModuleForGUID[GUID] == S->modulePath();
+    };
+    auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
+      const auto &ExportList = ExportLists.find(ModuleIdentifier);
+      return (ExportList != ExportLists.end() &&
+              ExportList->second.count(GUID)) ||
+             ExportedGUIDs.count(GUID);
+    };
+    thinLTOInternalizeAndPromoteInIndex(ThinLTO.CombinedIndex, isExported);
+
+    auto recordNewLinkage = [&](StringRef ModuleIdentifier,
+                                GlobalValue::GUID GUID,
+                                GlobalValue::LinkageTypes NewLinkage) {
+      ResolvedODR[ModuleIdentifier][GUID] = NewLinkage;
+    };
+
+    thinLTOResolveWeakForLinkerInIndex(ThinLTO.CombinedIndex, isPrevailing,
+                                       recordNewLinkage);
+  }
+
+  std::unique_ptr<ThinBackendProc> BackendProc =
+      ThinLTO.Backend(Conf, ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
+                      AddStream, Cache);
+
+  // Partition numbers for ThinLTO jobs start at 1 (see comments for
+  // GlobalResolution in LTO.h). Task numbers, however, start at
+  // ParallelCodeGenParallelismLevel if an LTO module is present, as tasks 0
+  // through ParallelCodeGenParallelismLevel-1 are reserved for parallel code
+  // generation partitions.
+  unsigned Task =
+      HasRegularLTO ? RegularLTO.ParallelCodeGenParallelismLevel : 0;
+  unsigned Partition = 1;
+
+  for (auto &Mod : ThinLTO.ModuleMap) {
+    if (Error E = BackendProc->start(Task, Mod.second, ImportLists[Mod.first],
+                                     ExportLists[Mod.first],
+                                     ResolvedODR[Mod.first], ThinLTO.ModuleMap))
+      return E;
+
+    ++Task;
+    ++Partition;
+  }
+
+  return BackendProc->wait();
 }
diff --git a/contrib/llvm/lib/LTO/LTOBackend.cpp b/contrib/llvm/lib/LTO/LTOBackend.cpp
new file mode 100644
index 000000000000..6342cbe4fd90
--- /dev/null
+++ b/contrib/llvm/lib/LTO/LTOBackend.cpp
@@ -0,0 +1,375 @@
+//===-LTOBackend.cpp - LLVM Link Time Optimizer Backend -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the "backend" phase of LTO, i.e. it performs
+// optimization and code generation on a loaded module. It is generally used
+// internally by the LTO class but can also be used independently, for example
+// to implement a standalone ThinLTO backend.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/LTOBackend.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/LTO/legacy/UpdateCompilerUsed.h"
+#include "llvm/MC/SubtargetFeature.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/ThreadPool.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/FunctionImportUtils.h"
+#include "llvm/Transforms/Utils/SplitModule.h"
+
+using namespace llvm;
+using namespace lto;
+
+LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
+  errs() << "failed to open " << Path << ": " << Msg << '\n';
+  errs().flush();
+  exit(1);
+}
+
+Error Config::addSaveTemps(std::string OutputFileName,
+                           bool UseInputModulePath) {
+  ShouldDiscardValueNames = false;
+
+  std::error_code EC;
+  ResolutionFile = llvm::make_unique<raw_fd_ostream>(
+      OutputFileName + "resolution.txt", EC, sys::fs::OpenFlags::F_Text);
+  if (EC)
+    return errorCodeToError(EC);
+
+  auto setHook = [&](std::string PathSuffix, ModuleHookFn &Hook) {
+    // Keep track of the hook provided by the linker, which also needs to run.
+    ModuleHookFn LinkerHook = Hook;
+    Hook = [=](unsigned Task, const Module &M) {
+      // If the linker's hook returned false, we need to pass that result
+      // through.
+      if (LinkerHook && !LinkerHook(Task, M))
+        return false;
+
+      std::string PathPrefix;
+      // If this is the combined module (not a ThinLTO backend compile) or the
+      // user hasn't requested using the input module's path, emit to a file
+      // named from the provided OutputFileName with the Task ID appended.
+      if (M.getModuleIdentifier() == "ld-temp.o" || !UseInputModulePath) {
+        PathPrefix = OutputFileName + utostr(Task);
+      } else
+        PathPrefix = M.getModuleIdentifier();
+      std::string Path = PathPrefix + "." + PathSuffix + ".bc";
+      std::error_code EC;
+      raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
+      // Because -save-temps is a debugging feature, we report the error
+      // directly and exit.
+      if (EC)
+        reportOpenError(Path, EC.message());
+      WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false);
+      return true;
+    };
+  };
+
+  setHook("0.preopt", PreOptModuleHook);
+  setHook("1.promote", PostPromoteModuleHook);
+  setHook("2.internalize", PostInternalizeModuleHook);
+  setHook("3.import", PostImportModuleHook);
+  setHook("4.opt", PostOptModuleHook);
+  setHook("5.precodegen", PreCodeGenModuleHook);
+
+  CombinedIndexHook = [=](const ModuleSummaryIndex &Index) {
+    std::string Path = OutputFileName + "index.bc";
+    std::error_code EC;
+    raw_fd_ostream OS(Path, EC, sys::fs::OpenFlags::F_None);
+    // Because -save-temps is a debugging feature, we report the error
+    // directly and exit.
+    if (EC)
+      reportOpenError(Path, EC.message());
+    WriteIndexToFile(Index, OS);
+    return true;
+  };
+
+  return Error::success();
+}
+
+namespace {
+
+std::unique_ptr<TargetMachine>
+createTargetMachine(Config &Conf, StringRef TheTriple,
+                    const Target *TheTarget) {
+  SubtargetFeatures Features;
+  Features.getDefaultSubtargetFeatures(Triple(TheTriple));
+  for (const std::string &A : Conf.MAttrs)
+    Features.AddFeature(A);
+
+  return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
+      TheTriple, Conf.CPU, Features.getString(), Conf.Options, Conf.RelocModel,
+      Conf.CodeModel, Conf.CGOptLevel));
+}
+
+static void runNewPMCustomPasses(Module &Mod, TargetMachine *TM,
+                                 std::string PipelineDesc,
+                                 std::string AAPipelineDesc,
+                                 bool DisableVerify) {
+  PassBuilder PB(TM);
+  AAManager AA;
+
+  // Parse a custom AA pipeline if asked to.
+  if (!AAPipelineDesc.empty())
+    if (!PB.parseAAPipeline(AA, AAPipelineDesc))
+      report_fatal_error("unable to parse AA pipeline description: " +
+                         AAPipelineDesc);
+
+  LoopAnalysisManager LAM;
+  FunctionAnalysisManager FAM;
+  CGSCCAnalysisManager CGAM;
+  ModuleAnalysisManager MAM;
+
+  // Register the AA manager first so that our version is the one used.
+  FAM.registerPass([&] { return std::move(AA); });
+
+  // Register all the basic analyses with the managers.
+  PB.registerModuleAnalyses(MAM);
+  PB.registerCGSCCAnalyses(CGAM);
+  PB.registerFunctionAnalyses(FAM);
+  PB.registerLoopAnalyses(LAM);
+  PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
+
+  ModulePassManager MPM;
+
+  // Always verify the input.
+  MPM.addPass(VerifierPass());
+
+  // Now, add all the passes we've been requested to.
+  if (!PB.parsePassPipeline(MPM, PipelineDesc))
+    report_fatal_error("unable to parse pass pipeline description: " +
+                       PipelineDesc);
+
+  if (!DisableVerify)
+    MPM.addPass(VerifierPass());
+  MPM.run(Mod, MAM);
+}
+
+static void runOldPMPasses(Config &Conf, Module &Mod, TargetMachine *TM,
+                           bool IsThinLTO) {
+  legacy::PassManager passes;
+  passes.add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+
+  PassManagerBuilder PMB;
+  PMB.LibraryInfo = new TargetLibraryInfoImpl(Triple(TM->getTargetTriple()));
+  PMB.Inliner = createFunctionInliningPass();
+  // Unconditionally verify input since it is not verified before this
+  // point and has unknown origin.
+  PMB.VerifyInput = true;
+  PMB.VerifyOutput = !Conf.DisableVerify;
+  PMB.LoopVectorize = true;
+  PMB.SLPVectorize = true;
+  PMB.OptLevel = Conf.OptLevel;
+  PMB.PGOSampleUse = Conf.SampleProfile;
+  if (IsThinLTO)
+    PMB.populateThinLTOPassManager(passes);
+  else
+    PMB.populateLTOPassManager(passes);
+  passes.run(Mod);
+}
+
+bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
+         bool IsThinLTO) {
+  if (Conf.OptPipeline.empty())
+    runOldPMPasses(Conf, Mod, TM, IsThinLTO);
+  else
+    runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
+                         Conf.DisableVerify);
+  return !Conf.PostOptModuleHook || Conf.PostOptModuleHook(Task, Mod);
+}
+
+void codegen(Config &Conf, TargetMachine *TM, AddStreamFn AddStream,
+             unsigned Task, Module &Mod) {
+  if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
+    return;
+
+  auto Stream = AddStream(Task);
+  legacy::PassManager CodeGenPasses;
+  if (TM->addPassesToEmitFile(CodeGenPasses, *Stream->OS,
+                              TargetMachine::CGFT_ObjectFile))
+    report_fatal_error("Failed to setup codegen");
+  CodeGenPasses.run(Mod);
+}
+
+void splitCodeGen(Config &C, TargetMachine *TM, AddStreamFn AddStream,
+                  unsigned ParallelCodeGenParallelismLevel,
+                  std::unique_ptr<Module> Mod) {
+  ThreadPool CodegenThreadPool(ParallelCodeGenParallelismLevel);
+  unsigned ThreadCount = 0;
+  const Target *T = &TM->getTarget();
+
+  SplitModule(
+      std::move(Mod), ParallelCodeGenParallelismLevel,
+      [&](std::unique_ptr<Module> MPart) {
+        // We want to clone the module in a new context to multi-thread the
+        // codegen. We do it by serializing partition modules to bitcode
+        // (while still on the main thread, in order to avoid data races) and
+        // spinning up new threads which deserialize the partitions into
+        // separate contexts.
+        // FIXME: Provide a more direct way to do this in LLVM.
+        SmallString<0> BC;
+        raw_svector_ostream BCOS(BC);
+        WriteBitcodeToFile(MPart.get(), BCOS);
+
+        // Enqueue the task
+        CodegenThreadPool.async(
+            [&](const SmallString<0> &BC, unsigned ThreadId) {
+              LTOLLVMContext Ctx(C);
+              Expected<std::unique_ptr<Module>> MOrErr = parseBitcodeFile(
+                  MemoryBufferRef(StringRef(BC.data(), BC.size()), "ld-temp.o"),
+                  Ctx);
+              if (!MOrErr)
+                report_fatal_error("Failed to read bitcode");
+              std::unique_ptr<Module> MPartInCtx = std::move(MOrErr.get());
+
+              std::unique_ptr<TargetMachine> TM =
+                  createTargetMachine(C, MPartInCtx->getTargetTriple(), T);
+
+              codegen(C, TM.get(), AddStream, ThreadId, *MPartInCtx);
+            },
+            // Pass BC using std::move to ensure that it get moved rather than
+            // copied into the thread's context.
+            std::move(BC), ThreadCount++);
+      },
+      false);
+
+  // Because the inner lambda (which runs in a worker thread) captures our local
+  // variables, we need to wait for the worker threads to terminate before we
+  // can leave the function scope.
+  CodegenThreadPool.wait();
+}
+
+Expected<const Target *> initAndLookupTarget(Config &C, Module &Mod) {
+  if (!C.OverrideTriple.empty())
+    Mod.setTargetTriple(C.OverrideTriple);
+  else if (Mod.getTargetTriple().empty())
+    Mod.setTargetTriple(C.DefaultTriple);
+
+  std::string Msg;
+  const Target *T = TargetRegistry::lookupTarget(Mod.getTargetTriple(), Msg);
+  if (!T)
+    return make_error<StringError>(Msg, inconvertibleErrorCode());
+  return T;
+}
+
+}
+
+static void handleAsmUndefinedRefs(Module &Mod, TargetMachine &TM) {
+  // Collect the list of undefined symbols used in asm and update
+  // llvm.compiler.used to prevent optimization to drop these from the output.
+  StringSet<> AsmUndefinedRefs;
+  ModuleSymbolTable::CollectAsmSymbols(
+      Triple(Mod.getTargetTriple()), Mod.getModuleInlineAsm(),
+      [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
+        if (Flags & object::BasicSymbolRef::SF_Undefined)
+          AsmUndefinedRefs.insert(Name);
+      });
+  updateCompilerUsed(Mod, TM, AsmUndefinedRefs);
+}
+
+Error lto::backend(Config &C, AddStreamFn AddStream,
+                   unsigned ParallelCodeGenParallelismLevel,
+                   std::unique_ptr<Module> Mod) {
+  Expected<const Target *> TOrErr = initAndLookupTarget(C, *Mod);
+  if (!TOrErr)
+    return TOrErr.takeError();
+
+  std::unique_ptr<TargetMachine> TM =
+      createTargetMachine(C, Mod->getTargetTriple(), *TOrErr);
+
+  handleAsmUndefinedRefs(*Mod, *TM);
+
+  if (!C.CodeGenOnly)
+    if (!opt(C, TM.get(), 0, *Mod, /*IsThinLTO=*/false))
+      return Error::success();
+
+  if (ParallelCodeGenParallelismLevel == 1) {
+    codegen(C, TM.get(), AddStream, 0, *Mod);
+  } else {
+    splitCodeGen(C, TM.get(), AddStream, ParallelCodeGenParallelismLevel,
+                 std::move(Mod));
+  }
+  return Error::success();
+}
+
+Error lto::thinBackend(Config &Conf, unsigned Task, AddStreamFn AddStream,
+                       Module &Mod, ModuleSummaryIndex &CombinedIndex,
+                       const FunctionImporter::ImportMapTy &ImportList,
+                       const GVSummaryMapTy &DefinedGlobals,
+                       MapVector<StringRef, BitcodeModule> &ModuleMap) {
+  Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
+  if (!TOrErr)
+    return TOrErr.takeError();
+
+  std::unique_ptr<TargetMachine> TM =
+      createTargetMachine(Conf, Mod.getTargetTriple(), *TOrErr);
+
+  handleAsmUndefinedRefs(Mod, *TM);
+
+  if (Conf.CodeGenOnly) {
+    codegen(Conf, TM.get(), AddStream, Task, Mod);
+    return Error::success();
+  }
+
+  if (Conf.PreOptModuleHook && !Conf.PreOptModuleHook(Task, Mod))
+    return Error::success();
+
+  renameModuleForThinLTO(Mod, CombinedIndex);
+
+  thinLTOResolveWeakForLinkerModule(Mod, DefinedGlobals);
+
+  if (Conf.PostPromoteModuleHook && !Conf.PostPromoteModuleHook(Task, Mod))
+    return Error::success();
+
+  if (!DefinedGlobals.empty())
+    thinLTOInternalizeModule(Mod, DefinedGlobals);
+
+  if (Conf.PostInternalizeModuleHook &&
+      !Conf.PostInternalizeModuleHook(Task, Mod))
+    return Error::success();
+
+  auto ModuleLoader = [&](StringRef Identifier) {
+    assert(Mod.getContext().isODRUniquingDebugTypes() &&
+           "ODR Type uniquing should be enabled on the context");
+    auto I = ModuleMap.find(Identifier);
+    assert(I != ModuleMap.end());
+    return I->second.getLazyModule(Mod.getContext(),
+                                   /*ShouldLazyLoadMetadata=*/true,
+                                   /*IsImporting*/ true);
+  };
+
+  FunctionImporter Importer(CombinedIndex, ModuleLoader);
+  if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
+    return Err;
+
+  if (Conf.PostImportModuleHook && !Conf.PostImportModuleHook(Task, Mod))
+    return Error::success();
+
+  if (!opt(Conf, TM.get(), Task, Mod, /*IsThinLTO=*/true))
+    return Error::success();
+
+  codegen(Conf, TM.get(), AddStream, Task, Mod);
+  return Error::success();
+}
diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
index 1da2d18b7d1c..6af31e61f946 100644
--- a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp
@@ -19,7 +19,7 @@
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CodeGen/ParallelCG.h"
 #include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/Config/config.h"
@@ -49,6 +49,7 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetOptions.h"
@@ -58,6 +59,7 @@
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/ObjCARC.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 #include <system_error>
 using namespace llvm;
 
@@ -89,6 +91,16 @@ cl::opt<bool> LTOStripInvalidDebugInfo(
     cl::init(false),
 #endif
     cl::Hidden);
+
+cl::opt<std::string>
+    LTORemarksFilename("lto-pass-remarks-output",
+                       cl::desc("Output filename for pass remarks"),
+                       cl::value_desc("filename"));
+
+cl::opt<bool> LTOPassRemarksWithHotness(
+    "lto-pass-remarks-with-hotness",
+    cl::desc("With PGO, include profile count in optimization remarks"),
+    cl::Hidden);
 }
 
 LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context)
@@ -130,15 +142,18 @@ void LTOCodeGenerator::initializeLTOPasses() {
   initializeCFGSimplifyPassPass(R);
 }
 
+void LTOCodeGenerator::setAsmUndefinedRefs(LTOModule *Mod) {
+  const std::vector<StringRef> &undefs = Mod->getAsmUndefinedRefs();
+  for (int i = 0, e = undefs.size(); i != e; ++i)
+    AsmUndefinedRefs[undefs[i]] = 1;
+}
+
 bool LTOCodeGenerator::addModule(LTOModule *Mod) {
   assert(&Mod->getModule().getContext() == &Context &&
          "Expected module in same context");
 
   bool ret = TheLinker->linkInModule(Mod->takeModule());
-
-  const std::vector<const char *> &undefs = Mod->getAsmUndefinedRefs();
-  for (int i = 0, e = undefs.size(); i != e; ++i)
-    AsmUndefinedRefs[undefs[i]] = 1;
+  setAsmUndefinedRefs(Mod);
 
   // We've just changed the input, so let's make sure we verify it.
   HasVerifiedInput = false;
@@ -154,10 +169,7 @@ void LTOCodeGenerator::setModule(std::unique_ptr<LTOModule> Mod) {
 
   MergedModule = Mod->takeModule();
   TheLinker = make_unique<Linker>(*MergedModule);
-
-  const std::vector<const char*> &Undefs = Mod->getAsmUndefinedRefs();
-  for (int I = 0, E = Undefs.size(); I != E; ++I)
-    AsmUndefinedRefs[Undefs[I]] = 1;
+  setAsmUndefinedRefs(&*Mod);
 
   // We've just changed the input, so let's make sure we verify it.
   HasVerifiedInput = false;
@@ -185,20 +197,21 @@ void LTOCodeGenerator::setOptLevel(unsigned Level) {
   switch (OptLevel) {
   case 0:
     CGOptLevel = CodeGenOpt::None;
-    break;
+    return;
   case 1:
     CGOptLevel = CodeGenOpt::Less;
-    break;
+    return;
   case 2:
     CGOptLevel = CodeGenOpt::Default;
-    break;
+    return;
   case 3:
     CGOptLevel = CodeGenOpt::Aggressive;
-    break;
+    return;
   }
+  llvm_unreachable("Unknown optimization level!");
 }
 
-bool LTOCodeGenerator::writeMergedModules(const char *Path) {
+bool LTOCodeGenerator::writeMergedModules(StringRef Path) {
   if (!determineTarget())
     return false;
 
@@ -239,7 +252,7 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
   SmallString<128> Filename;
   int FD;
 
-  const char *Extension =
+  StringRef Extension
       (FileType == TargetMachine::CGFT_AssemblyFile ? "s" : "o");
 
   std::error_code EC =
@@ -250,11 +263,12 @@ bool LTOCodeGenerator::compileOptimizedToFile(const char **Name) {
   }
 
   // generate object file
-  tool_output_file objFile(Filename.c_str(), FD);
+  tool_output_file objFile(Filename, FD);
 
   bool genResult = compileOptimized(&objFile.os());
   objFile.os().close();
   if (objFile.os().has_error()) {
+    emitError((Twine("could not write object file: ") + Filename).str());
     objFile.os().clear_error();
     sys::fs::remove(Twine(Filename));
     return false;
@@ -363,32 +377,19 @@ std::unique_ptr<TargetMachine> LTOCodeGenerator::createTargetMachine() {
 void LTOCodeGenerator::preserveDiscardableGVs(
     Module &TheModule,
     llvm::function_ref<bool(const GlobalValue &)> mustPreserveGV) {
-  SetVector<Constant *> UsedValuesSet;
-  if (GlobalVariable *LLVMUsed =
-          TheModule.getGlobalVariable("llvm.compiler.used")) {
-    ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
-    for (auto &V : Inits->operands())
-      UsedValuesSet.insert(cast<Constant>(&V));
-    LLVMUsed->eraseFromParent();
-  }
-  llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext());
+  std::vector<GlobalValue *> Used;
   auto mayPreserveGlobal = [&](GlobalValue &GV) {
-    if (!GV.isDiscardableIfUnused() || GV.isDeclaration())
+    if (!GV.isDiscardableIfUnused() || GV.isDeclaration() ||
+        !mustPreserveGV(GV))
       return;
-    if (!mustPreserveGV(GV))
-      return;
-    if (GV.hasAvailableExternallyLinkage()) {
-      emitWarning(
+    if (GV.hasAvailableExternallyLinkage())
+      return emitWarning(
           (Twine("Linker asked to preserve available_externally global: '") +
            GV.getName() + "'").str());
-      return;
-    }
-    if (GV.hasInternalLinkage()) {
-      emitWarning((Twine("Linker asked to preserve internal global: '") +
+    if (GV.hasInternalLinkage())
+      return emitWarning((Twine("Linker asked to preserve internal global: '") +
                    GV.getName() + "'").str());
-      return;
-    }
-    UsedValuesSet.insert(ConstantExpr::getBitCast(&GV, i8PTy));
+    Used.push_back(&GV);
   };
   for (auto &GV : TheModule)
     mayPreserveGlobal(GV);
@@ -397,15 +398,10 @@ void LTOCodeGenerator::preserveDiscardableGVs(
   for (auto &GV : TheModule.aliases())
     mayPreserveGlobal(GV);
 
-  if (UsedValuesSet.empty())
+  if (Used.empty())
     return;
 
-  llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesSet.size());
-  auto *LLVMUsed = new llvm::GlobalVariable(
-      TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage,
-      llvm::ConstantArray::get(ATy, UsedValuesSet.getArrayRef()),
-      "llvm.compiler.used");
-  LLVMUsed->setSection("llvm.metadata");
+  appendToCompilerUsed(TheModule, Used);
 }
 
 void LTOCodeGenerator::applyScopeRestrictions() {
@@ -414,6 +410,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
 
   // Declare a callback for the internalize pass that will ask for every
   // candidate GlobalValue if it can be internalized or not.
+  Mangler Mang;
   SmallString<64> MangledName;
   auto mustPreserveGV = [&](const GlobalValue &GV) -> bool {
     // Unnamed globals can't be mangled, but they can't be preserved either.
@@ -425,8 +422,7 @@ void LTOCodeGenerator::applyScopeRestrictions() {
     // underscore.
     MangledName.clear();
     MangledName.reserve(GV.getName().size() + 1);
-    Mangler::getNameWithPrefix(MangledName, GV.getName(),
-                               MergedModule->getDataLayout());
+    Mang.getNameWithPrefix(MangledName, &GV, /*CannotUsePrivateLabel=*/false);
     return MustPreserveSymbols.count(MangledName);
   };
 
@@ -510,6 +506,33 @@ void LTOCodeGenerator::verifyMergedModuleOnce() {
     report_fatal_error("Broken module found, compilation aborted!");
 }
 
+bool LTOCodeGenerator::setupOptimizationRemarks() {
+  if (LTORemarksFilename != "") {
+    std::error_code EC;
+    DiagnosticOutputFile = llvm::make_unique<tool_output_file>(
+        LTORemarksFilename, EC, sys::fs::F_None);
+    if (EC) {
+      emitError(EC.message());
+      return false;
+    }
+    Context.setDiagnosticsOutputFile(
+        llvm::make_unique<yaml::Output>(DiagnosticOutputFile->os()));
+  }
+
+  if (LTOPassRemarksWithHotness)
+    Context.setDiagnosticHotnessRequested(true);
+
+  return true;
+}
+
+void LTOCodeGenerator::finishOptimizationRemarks() {
+  if (DiagnosticOutputFile) {
+    DiagnosticOutputFile->keep();
+    // FIXME: LTOCodeGenerator dtor is not invoked on Darwin
+    DiagnosticOutputFile->os().flush();
+  }
+}
+
 /// Optimize merged modules using various IPO passes
 bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
                                 bool DisableGVNLoadPRE,
@@ -517,6 +540,9 @@ bool LTOCodeGenerator::optimize(bool DisableVerify, bool DisableInline,
   if (!this->determineTarget())
     return false;
 
+  if (!setupOptimizationRemarks())
+    return false;
+
   // We always run the verifier once on the merged module, the `DisableVerify`
   // parameter only applies to subsequent verify.
   verifyMergedModuleOnce();
@@ -585,12 +611,14 @@ bool LTOCodeGenerator::compileOptimized(ArrayRef<raw_pwrite_stream *> Out) {
   if (llvm::AreStatisticsEnabled())
     llvm::PrintStatistics();
 
+  finishOptimizationRemarks();
+
   return true;
 }
 
 /// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging
 /// LTO problems.
-void LTOCodeGenerator::setCodeGenDebugOptions(const char *Options) {
+void LTOCodeGenerator::setCodeGenDebugOptions(StringRef Options) {
   for (std::pair<StringRef, StringRef> o = getToken(Options); !o.first.empty();
        o = getToken(o.second))
     CodegenOptions.push_back(o.first);
diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp
index a1d6f93f0875..89aeb8000038 100644
--- a/contrib/llvm/lib/LTO/LTOModule.cpp
+++ b/contrib/llvm/lib/LTO/LTOModule.cpp
@@ -14,12 +14,11 @@
 
 #include "llvm/LTO/legacy/LTOModule.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/CodeGen/Analysis.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCExpr.h"
@@ -49,9 +48,11 @@
 using namespace llvm;
 using namespace llvm::object;
 
-LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj,
+LTOModule::LTOModule(std::unique_ptr<Module> M, MemoryBufferRef MBRef,
                      llvm::TargetMachine *TM)
-    : IRFile(std::move(Obj)), _target(TM) {}
+    : Mod(std::move(M)), MBRef(MBRef), _target(TM) {
+  SymTab.addModule(Mod.get());
+}
 
 LTOModule::~LTOModule() {}
 
@@ -63,7 +64,7 @@ bool LTOModule::isBitcodeFile(const void *Mem, size_t Length) {
   return bool(BCData);
 }
 
-bool LTOModule::isBitcodeFile(const char *Path) {
+bool LTOModule::isBitcodeFile(StringRef Path) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getFile(Path);
   if (!BufferOrErr)
@@ -77,13 +78,12 @@ bool LTOModule::isBitcodeFile(const char *Path) {
 bool LTOModule::isThinLTO() {
   // Right now the detection is only based on the summary presence. We may want
   // to add a dedicated flag at some point.
-  return hasGlobalValueSummary(IRFile->getMemoryBufferRef(),
-                            [](const DiagnosticInfo &DI) {
-                              DiagnosticPrinterRawOStream DP(errs());
-                              DI.print(DP);
-                              errs() << '\n';
-                              return;
-                            });
+  Expected<bool> Result = hasGlobalValueSummary(MBRef);
+  if (!Result) {
+    logAllUnhandledErrors(Result.takeError(), errs(), "");
+    return false;
+  }
+  return *Result;
 }
 
 bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
@@ -93,8 +93,11 @@ bool LTOModule::isBitcodeForTarget(MemoryBuffer *Buffer,
   if (!BCOrErr)
     return false;
   LLVMContext Context;
-  std::string Triple = getBitcodeTargetTriple(*BCOrErr, Context);
-  return StringRef(Triple).startswith(TriplePrefix);
+  ErrorOr<std::string> TripleOrErr =
+      expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(*BCOrErr));
+  if (!TripleOrErr)
+    return false;
+  return StringRef(*TripleOrErr).startswith(TriplePrefix);
 }
 
 std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
@@ -103,11 +106,15 @@ std::string LTOModule::getProducerString(MemoryBuffer *Buffer) {
   if (!BCOrErr)
     return "";
   LLVMContext Context;
-  return getBitcodeProducerString(*BCOrErr, Context);
+  ErrorOr<std::string> ProducerOrErr = expectedToErrorOrAndEmitErrors(
+      Context, getBitcodeProducerString(*BCOrErr));
+  if (!ProducerOrErr)
+    return "";
+  return *ProducerOrErr;
 }
 
 ErrorOr<std::unique_ptr<LTOModule>>
-LTOModule::createFromFile(LLVMContext &Context, const char *path,
+LTOModule::createFromFile(LLVMContext &Context, StringRef path,
                           const TargetOptions &options) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getFile(path);
@@ -121,15 +128,15 @@ LTOModule::createFromFile(LLVMContext &Context, const char *path,
 }
 
 ErrorOr<std::unique_ptr<LTOModule>>
-LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path,
+LTOModule::createFromOpenFile(LLVMContext &Context, int fd, StringRef path,
                               size_t size, const TargetOptions &options) {
   return createFromOpenFileSlice(Context, fd, path, size, 0, options);
 }
 
 ErrorOr<std::unique_ptr<LTOModule>>
-LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd,
-                                   const char *path, size_t map_size,
-                                   off_t offset, const TargetOptions &options) {
+LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, StringRef path,
+                                   size_t map_size, off_t offset,
+                                   const TargetOptions &options) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
       MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset);
   if (std::error_code EC = BufferOrErr.getError()) {
@@ -179,20 +186,14 @@ parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context,
 
   if (!ShouldBeLazy) {
     // Parse the full file.
-    ErrorOr<std::unique_ptr<Module>> M = parseBitcodeFile(*MBOrErr, Context);
-    if (std::error_code EC = M.getError())
-      return EC;
-    return std::move(*M);
+    return expectedToErrorOrAndEmitErrors(Context,
+                                          parseBitcodeFile(*MBOrErr, Context));
   }
 
   // Parse lazily.
-  std::unique_ptr<MemoryBuffer> LightweightBuf =
-      MemoryBuffer::getMemBuffer(*MBOrErr, false);
-  ErrorOr<std::unique_ptr<Module>> M = getLazyBitcodeModule(
-      std::move(LightweightBuf), Context, true /*ShouldLazyLoadMetadata*/);
-  if (std::error_code EC = M.getError())
-    return EC;
-  return std::move(*M);
+  return expectedToErrorOrAndEmitErrors(
+      Context,
+      getLazyBitcodeModule(*MBOrErr, Context, true /*ShouldLazyLoadMetadata*/));
 }
 
 ErrorOr<std::unique_ptr<LTOModule>>
@@ -232,12 +233,8 @@ LTOModule::makeLTOModule(MemoryBufferRef Buffer, const TargetOptions &options,
 
   TargetMachine *target =
       march->createTargetMachine(TripleStr, CPU, FeatureStr, options, None);
-  M->setDataLayout(target->createDataLayout());
-
-  std::unique_ptr<object::IRObjectFile> IRObj(
-      new object::IRObjectFile(Buffer, std::move(M)));
 
-  std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(IRObj), target));
+  std::unique_ptr<LTOModule> Ret(new LTOModule(std::move(M), Buffer, target));
   Ret->parseSymbols();
   Ret->parseMetadata();
 
@@ -281,7 +278,7 @@ void LTOModule::addObjCClass(const GlobalVariable *clgv) {
         _undefines.insert(std::make_pair(superclassName, NameAndAttributes()));
     if (IterBool.second) {
       NameAndAttributes &info = IterBool.first->second;
-      info.name = IterBool.first->first().data();
+      info.name = IterBool.first->first();
       info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
       info.isFunction = false;
       info.symbol = clgv;
@@ -294,7 +291,7 @@ void LTOModule::addObjCClass(const GlobalVariable *clgv) {
     auto Iter = _defines.insert(className).first;
 
     NameAndAttributes info;
-    info.name = Iter->first().data();
+    info.name = Iter->first();
     info.attributes = LTO_SYMBOL_PERMISSIONS_DATA |
       LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT;
     info.isFunction = false;
@@ -320,7 +317,7 @@ void LTOModule::addObjCCategory(const GlobalVariable *clgv) {
     return;
 
   NameAndAttributes &info = IterBool.first->second;
-  info.name = IterBool.first->first().data();
+  info.name = IterBool.first->first();
   info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
   info.isFunction = false;
   info.symbol = clgv;
@@ -339,24 +336,25 @@ void LTOModule::addObjCClassRef(const GlobalVariable *clgv) {
     return;
 
   NameAndAttributes &info = IterBool.first->second;
-  info.name = IterBool.first->first().data();
+  info.name = IterBool.first->first();
   info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED;
   info.isFunction = false;
   info.symbol = clgv;
 }
 
-void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) {
+void LTOModule::addDefinedDataSymbol(ModuleSymbolTable::Symbol Sym) {
   SmallString<64> Buffer;
   {
     raw_svector_ostream OS(Buffer);
-    Sym.printName(OS);
+    SymTab.printSymbolName(OS, Sym);
+    Buffer.c_str();
   }
 
-  const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
-  addDefinedDataSymbol(Buffer.c_str(), V);
+  const GlobalValue *V = Sym.get<GlobalValue *>();
+  addDefinedDataSymbol(Buffer, V);
 }
 
-void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
+void LTOModule::addDefinedDataSymbol(StringRef Name, const GlobalValue *v) {
   // Add to list of defined symbols.
   addDefinedSymbol(Name, v, false);
 
@@ -406,24 +404,24 @@ void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) {
   }
 }
 
-void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) {
+void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) {
   SmallString<64> Buffer;
   {
     raw_svector_ostream OS(Buffer);
-    Sym.printName(OS);
+    SymTab.printSymbolName(OS, Sym);
+    Buffer.c_str();
   }
 
-  const Function *F =
-      cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl()));
-  addDefinedFunctionSymbol(Buffer.c_str(), F);
+  const Function *F = cast<Function>(Sym.get<GlobalValue *>());
+  addDefinedFunctionSymbol(Buffer, F);
 }
 
-void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) {
+void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) {
   // add to list of defined symbols
   addDefinedSymbol(Name, F, true);
 }
 
-void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
+void LTOModule::addDefinedSymbol(StringRef Name, const GlobalValue *def,
                                  bool isFunction) {
   // set alignment part log2() can have rounding errors
   uint32_t align = def->getAlignment();
@@ -472,8 +470,8 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
   // fill information structure
   NameAndAttributes info;
   StringRef NameRef = Iter->first();
-  info.name = NameRef.data();
-  assert(info.name[NameRef.size()] == '\0');
+  info.name = NameRef;
+  assert(NameRef.data()[NameRef.size()] == '\0');
   info.attributes = attr;
   info.isFunction = isFunction;
   info.symbol = def;
@@ -484,7 +482,7 @@ void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def,
 
 /// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the
 /// defined list.
-void LTOModule::addAsmGlobalSymbol(const char *name,
+void LTOModule::addAsmGlobalSymbol(StringRef name,
                                    lto_symbol_attributes scope) {
   auto IterBool = _defines.insert(name);
 
@@ -492,7 +490,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
   if (!IterBool.second)
     return;
 
-  NameAndAttributes &info = _undefines[IterBool.first->first().data()];
+  NameAndAttributes &info = _undefines[IterBool.first->first()];
 
   if (info.symbol == nullptr) {
     // FIXME: This is trying to take care of module ASM like this:
@@ -504,7 +502,7 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
     // much.
 
     // fill information structure
-    info.name = IterBool.first->first().data();
+    info.name = IterBool.first->first();
     info.attributes =
       LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope;
     info.isFunction = false;
@@ -526,10 +524,10 @@ void LTOModule::addAsmGlobalSymbol(const char *name,
 
 /// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the
 /// undefined list.
-void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
+void LTOModule::addAsmGlobalSymbolUndef(StringRef name) {
   auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
 
-  _asm_undefines.push_back(IterBool.first->first().data());
+  _asm_undefines.push_back(IterBool.first->first());
 
   // we already have the symbol
   if (!IterBool.second)
@@ -538,19 +536,20 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) {
   uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;
   attr |= LTO_SYMBOL_SCOPE_DEFAULT;
   NameAndAttributes &info = IterBool.first->second;
-  info.name = IterBool.first->first().data();
+  info.name = IterBool.first->first();
   info.attributes = attr;
   info.isFunction = false;
   info.symbol = nullptr;
 }
 
 /// Add a symbol which isn't defined just yet to a list to be resolved later.
-void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
+void LTOModule::addPotentialUndefinedSymbol(ModuleSymbolTable::Symbol Sym,
                                             bool isFunc) {
   SmallString<64> name;
   {
     raw_svector_ostream OS(name);
-    Sym.printName(OS);
+    SymTab.printSymbolName(OS, Sym);
+    name.c_str();
   }
 
   auto IterBool = _undefines.insert(std::make_pair(name, NameAndAttributes()));
@@ -561,9 +560,9 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
 
   NameAndAttributes &info = IterBool.first->second;
 
-  info.name = IterBool.first->first().data();
+  info.name = IterBool.first->first();
 
-  const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
+  const GlobalValue *decl = Sym.dyn_cast<GlobalValue *>();
 
   if (decl->hasExternalWeakLinkage())
     info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF;
@@ -575,9 +574,9 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym,
 }
 
 void LTOModule::parseSymbols() {
-  for (auto &Sym : IRFile->symbols()) {
-    const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl());
-    uint32_t Flags = Sym.getFlags();
+  for (auto Sym : SymTab.symbols()) {
+    auto *GV = Sym.dyn_cast<GlobalValue *>();
+    uint32_t Flags = SymTab.getSymbolFlags(Sym);
     if (Flags & object::BasicSymbolRef::SF_FormatSpecific)
       continue;
 
@@ -587,9 +586,10 @@ void LTOModule::parseSymbols() {
       SmallString<64> Buffer;
       {
         raw_svector_ostream OS(Buffer);
-        Sym.printName(OS);
+        SymTab.printSymbolName(OS, Sym);
+        Buffer.c_str();
       }
-      const char *Name = Buffer.c_str();
+      StringRef Name(Buffer);
 
       if (IsUndefined)
         addAsmGlobalSymbolUndef(Name);
@@ -648,12 +648,10 @@ void LTOModule::parseMetadata() {
   }
 
   // Globals
-  Mangler Mang;
   for (const NameAndAttributes &Sym : _symbols) {
     if (!Sym.symbol)
       continue;
-    _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol,
-                                                            Mang);
+    _target->getObjFileLowering()->emitLinkerFlagsForGlobal(OS, Sym.symbol);
   }
 
   // Add other interesting metadata here.
diff --git a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
index bfb0980bc165..880dc3dfae98 100644
--- a/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/contrib/llvm/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -21,10 +21,12 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/ExecutionEngine/ObjectMemoryBuffer.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
@@ -38,10 +40,13 @@
 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/Support/CachePruning.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SHA1.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ThreadPool.h"
+#include "llvm/Support/Threading.h"
+#include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/IPO/FunctionImport.h"
@@ -59,17 +64,34 @@ using namespace llvm;
 namespace llvm {
 // Flags -discard-value-names, defined in LTOCodeGenerator.cpp
 extern cl::opt<bool> LTODiscardValueNames;
+extern cl::opt<std::string> LTORemarksFilename;
+extern cl::opt<bool> LTOPassRemarksWithHotness;
 }
 
 namespace {
 
-static cl::opt<int> ThreadCount("threads",
-                                cl::init(std::thread::hardware_concurrency()));
+static cl::opt<int>
+    ThreadCount("threads", cl::init(llvm::heavyweight_hardware_concurrency()));
 
-static void diagnosticHandler(const DiagnosticInfo &DI) {
-  DiagnosticPrinterRawOStream DP(errs());
-  DI.print(DP);
-  errs() << '\n';
+Expected<std::unique_ptr<tool_output_file>>
+setupOptimizationRemarks(LLVMContext &Ctx, int Count) {
+  if (LTOPassRemarksWithHotness)
+    Ctx.setDiagnosticHotnessRequested(true);
+
+  if (LTORemarksFilename.empty())
+    return nullptr;
+
+  std::string FileName =
+      LTORemarksFilename + ".thin." + llvm::utostr(Count) + ".yaml";
+  std::error_code EC;
+  auto DiagnosticOutputFile =
+      llvm::make_unique<tool_output_file>(FileName, EC, sys::fs::F_None);
+  if (EC)
+    return errorCodeToError(EC);
+  Ctx.setDiagnosticsOutputFile(
+      llvm::make_unique<yaml::Output>(DiagnosticOutputFile->os()));
+  DiagnosticOutputFile->keep();
+  return std::move(DiagnosticOutputFile);
 }
 
 // Simple helper to save temporary files for debug.
@@ -78,9 +100,9 @@ static void saveTempBitcode(const Module &TheModule, StringRef TempDir,
   if (TempDir.empty())
     return;
   // User asked to save temps, let dump the bitcode file after import.
-  auto SaveTempPath = TempDir + llvm::utostr(count) + Suffix;
+  std::string SaveTempPath = (TempDir + llvm::utostr(count) + Suffix).str();
   std::error_code EC;
-  raw_fd_ostream OS(SaveTempPath.str(), EC, sys::fs::F_None);
+  raw_fd_ostream OS(SaveTempPath, EC, sys::fs::F_None);
   if (EC)
     report_fatal_error(Twine("Failed to open ") + SaveTempPath +
                        " to save optimized bitcode\n");
@@ -144,22 +166,48 @@ static void promoteModule(Module &TheModule, const ModuleSummaryIndex &Index) {
     report_fatal_error("renameModuleForThinLTO failed");
 }
 
+static std::unique_ptr<Module>
+loadModuleFromBuffer(const MemoryBufferRef &Buffer, LLVMContext &Context,
+                     bool Lazy, bool IsImporting) {
+  SMDiagnostic Err;
+  Expected<std::unique_ptr<Module>> ModuleOrErr =
+      Lazy
+          ? getLazyBitcodeModule(Buffer, Context,
+                                 /* ShouldLazyLoadMetadata */ true, IsImporting)
+          : parseBitcodeFile(Buffer, Context);
+  if (!ModuleOrErr) {
+    handleAllErrors(ModuleOrErr.takeError(), [&](ErrorInfoBase &EIB) {
+      SMDiagnostic Err = SMDiagnostic(Buffer.getBufferIdentifier(),
+                                      SourceMgr::DK_Error, EIB.message());
+      Err.print("ThinLTO", errs());
+    });
+    report_fatal_error("Can't load module, abort.");
+  }
+  return std::move(ModuleOrErr.get());
+}
+
 static void
 crossImportIntoModule(Module &TheModule, const ModuleSummaryIndex &Index,
                       StringMap<MemoryBufferRef> &ModuleMap,
                       const FunctionImporter::ImportMapTy &ImportList) {
-  ModuleLoader Loader(TheModule.getContext(), ModuleMap);
+  auto Loader = [&](StringRef Identifier) {
+    return loadModuleFromBuffer(ModuleMap[Identifier], TheModule.getContext(),
+                                /*Lazy=*/true, /*IsImporting*/ true);
+  };
+
   FunctionImporter Importer(Index, Loader);
-  Importer.importFunctions(TheModule, ImportList);
+  if (!Importer.importFunctions(TheModule, ImportList))
+    report_fatal_error("importFunctions failed");
 }
 
-static void optimizeModule(Module &TheModule, TargetMachine &TM) {
+static void optimizeModule(Module &TheModule, TargetMachine &TM,
+                           unsigned OptLevel) {
   // Populate the PassManager
   PassManagerBuilder PMB;
   PMB.LibraryInfo = new TargetLibraryInfoImpl(TM.getTargetTriple());
   PMB.Inliner = createFunctionInliningPass();
   // FIXME: should get it from the bitcode?
-  PMB.OptLevel = 3;
+  PMB.OptLevel = OptLevel;
   PMB.LoopVectorize = true;
   PMB.SLPVectorize = true;
   PMB.VerifyInput = true;
@@ -233,11 +281,22 @@ public:
     if (CachePath.empty())
       return;
 
+    if (!Index.modulePaths().count(ModuleID))
+      // The module does not have an entry, it can't have a hash at all
+      return;
+
     // Compute the unique hash for this entry
     // This is based on the current compiler version, the module itself, the
     // export list, the hash for every single module in the import list, the
     // list of ResolvedODR for the module, and the list of preserved symbols.
 
+    // Include the hash for the current module
+    auto ModHash = Index.getModuleHash(ModuleID);
+
+    if (all_of(ModHash, [](uint32_t V) { return V == 0; }))
+      // No hash entry, no caching!
+      return;
+
     SHA1 Hasher;
 
     // Start with the compiler revision
@@ -246,8 +305,6 @@ public:
     Hasher.update(LLVM_REVISION);
 #endif
 
-    // Include the hash for the current module
-    auto ModHash = Index.getModuleHash(ModuleID);
     Hasher.update(ArrayRef<uint8_t>((uint8_t *)&ModHash[0], sizeof(ModHash)));
     for (auto F : ExportList)
       // The export list can impact the internalization, be conservative here
@@ -288,10 +345,9 @@ public:
   }
 
   // Cache the Produced object file
-  std::unique_ptr<MemoryBuffer>
-  write(std::unique_ptr<MemoryBuffer> OutputBuffer) {
+  void write(const MemoryBuffer &OutputBuffer) {
     if (EntryPath.empty())
-      return OutputBuffer;
+      return;
 
     // Write to a temporary to avoid race condition
     SmallString<128> TempFilename;
@@ -304,7 +360,7 @@ public:
     }
     {
       raw_fd_ostream OS(TempFD, /* ShouldClose */ true);
-      OS << OutputBuffer->getBuffer();
+      OS << OutputBuffer.getBuffer();
     }
     // Rename to final destination (hopefully race condition won't matter here)
     EC = sys::fs::rename(TempFilename, EntryPath);
@@ -314,16 +370,8 @@ public:
       if (EC)
         report_fatal_error(Twine("Failed to open ") + EntryPath +
                            " to save cached entry\n");
-      OS << OutputBuffer->getBuffer();
-    }
-    auto ReloadedBufferOrErr = MemoryBuffer::getFile(EntryPath);
-    if (auto EC = ReloadedBufferOrErr.getError()) {
-      // FIXME diagnose
-      errs() << "error: can't reload cached file '" << EntryPath
-             << "': " << EC.message() << "\n";
-      return OutputBuffer;
+      OS << OutputBuffer.getBuffer();
     }
-    return std::move(*ReloadedBufferOrErr);
   }
 };
 
@@ -336,7 +384,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
                      const GVSummaryMapTy &DefinedGlobals,
                      const ThinLTOCodeGenerator::CachingOptions &CacheOptions,
                      bool DisableCodeGen, StringRef SaveTempsDir,
-                     unsigned count) {
+                     unsigned OptLevel, unsigned count) {
 
   // "Benchmark"-like optimization: single-source case
   bool SingleModule = (ModuleMap.size() == 1);
@@ -368,7 +416,7 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
     saveTempBitcode(TheModule, SaveTempsDir, count, ".3.imported.bc");
   }
 
-  optimizeModule(TheModule, TM);
+  optimizeModule(TheModule, TM, OptLevel);
 
   saveTempBitcode(TheModule, SaveTempsDir, count, ".4.opt.bc");
 
@@ -377,8 +425,9 @@ ProcessThinLTOModule(Module &TheModule, ModuleSummaryIndex &Index,
     SmallVector<char, 128> OutputBuffer;
     {
       raw_svector_ostream OS(OutputBuffer);
-      ModuleSummaryIndexBuilder IndexBuilder(&TheModule);
-      WriteBitcodeToFile(&TheModule, OS, true, &IndexBuilder.getIndex());
+      ProfileSummaryInfo PSI(TheModule);
+      auto Index = buildModuleSummaryIndex(TheModule, nullptr, nullptr);
+      WriteBitcodeToFile(&TheModule, OS, true, &Index);
     }
     return make_unique<ObjectMemoryBuffer>(std::move(OutputBuffer));
   }
@@ -439,14 +488,23 @@ void ThinLTOCodeGenerator::addModule(StringRef Identifier, StringRef Data) {
   if (Modules.empty()) {
     // First module added, so initialize the triple and some options
     LLVMContext Context;
-    Triple TheTriple(getBitcodeTargetTriple(Buffer, Context));
+    StringRef TripleStr;
+    ErrorOr<std::string> TripleOrErr =
+        expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer));
+    if (TripleOrErr)
+      TripleStr = *TripleOrErr;
+    Triple TheTriple(TripleStr);
     initTMBuilder(TMBuilder, Triple(TheTriple));
   }
 #ifndef NDEBUG
   else {
     LLVMContext Context;
-    assert(TMBuilder.TheTriple.str() ==
-               getBitcodeTargetTriple(Buffer, Context) &&
+    StringRef TripleStr;
+    ErrorOr<std::string> TripleOrErr =
+        expectedToErrorOrAndEmitErrors(Context, getBitcodeTargetTriple(Buffer));
+    if (TripleOrErr)
+      TripleStr = *TripleOrErr;
+    assert(TMBuilder.TheTriple.str() == TripleStr &&
            "ThinLTO modules with different triple not supported");
   }
 #endif
@@ -477,6 +535,7 @@ std::unique_ptr<TargetMachine> TargetMachineBuilder::create() const {
   SubtargetFeatures Features(MAttr);
   Features.getDefaultSubtargetFeatures(TheTriple);
   std::string FeatureStr = Features.getString();
+
   return std::unique_ptr<TargetMachine>(TheTarget->createTargetMachine(
       TheTriple.str(), MCpu, FeatureStr, Options, RelocModel,
       CodeModel::Default, CGOptLevel));
@@ -490,13 +549,13 @@ std::unique_ptr<ModuleSummaryIndex> ThinLTOCodeGenerator::linkCombinedIndex() {
   std::unique_ptr<ModuleSummaryIndex> CombinedIndex;
   uint64_t NextModuleId = 0;
   for (auto &ModuleBuffer : Modules) {
-    ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
-        object::ModuleSummaryIndexObjectFile::create(ModuleBuffer,
-                                                     diagnosticHandler);
-    if (std::error_code EC = ObjOrErr.getError()) {
+    Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
+        object::ModuleSummaryIndexObjectFile::create(ModuleBuffer);
+    if (!ObjOrErr) {
       // FIXME diagnose
-      errs() << "error: can't create ModuleSummaryIndexObjectFile for buffer: "
-             << EC.message() << "\n";
+      logAllUnhandledErrors(
+          ObjOrErr.takeError(), errs(),
+          "error: can't create ModuleSummaryIndexObjectFile for buffer: ");
       return nullptr;
     }
     auto Index = (*ObjOrErr)->takeIndex();
@@ -517,6 +576,7 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
                                    ModuleSummaryIndex &Index) {
   auto ModuleCount = Index.modulePaths().size();
   auto ModuleIdentifier = TheModule.getModuleIdentifier();
+
   // Collect for each module the list of function it defines (GUID -> Summary).
   StringMap<GVSummaryMapTy> ModuleToDefinedGVSummaries;
   Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
@@ -534,6 +594,20 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
   thinLTOResolveWeakForLinkerModule(
       TheModule, ModuleToDefinedGVSummaries[ModuleIdentifier]);
 
+  // Convert the preserved symbols set from string to GUID
+  auto GUIDPreservedSymbols = computeGUIDPreservedSymbols(
+      PreservedSymbols, Triple(TheModule.getTargetTriple()));
+
+  // Promote the exported values in the index, so that they are promoted
+  // in the module.
+  auto isExported = [&](StringRef ModuleIdentifier, GlobalValue::GUID GUID) {
+    const auto &ExportList = ExportLists.find(ModuleIdentifier);
+    return (ExportList != ExportLists.end() &&
+            ExportList->second.count(GUID)) ||
+           GUIDPreservedSymbols.count(GUID);
+  };
+  thinLTOInternalizeAndPromoteInIndex(Index, isExported);
+
   promoteModule(TheModule, Index);
 }
 
@@ -578,7 +652,7 @@ void ThinLTOCodeGenerator::gatherImportedSummariesForModule(
                            ExportLists);
 
   llvm::gatherImportedSummariesForModule(ModulePath, ModuleToDefinedGVSummaries,
-                                         ImportLists,
+                                         ImportLists[ModulePath],
                                          ModuleToSummariesForIndex);
 }
 
@@ -601,7 +675,7 @@ void ThinLTOCodeGenerator::emitImports(StringRef ModulePath,
                            ExportLists);
 
   std::error_code EC;
-  if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists)))
+  if ((EC = EmitImportsFiles(ModulePath, OutputName, ImportLists[ModulePath])))
     report_fatal_error(Twine("Failed to open ") + OutputName +
                        " to save imports lists\n");
 }
@@ -654,7 +728,7 @@ void ThinLTOCodeGenerator::optimize(Module &TheModule) {
   initTMBuilder(TMBuilder, Triple(TheModule.getTargetTriple()));
 
   // Optimize now
-  optimizeModule(TheModule, *TMBuilder.create());
+  optimizeModule(TheModule, *TMBuilder.create(), OptLevel);
 }
 
 /**
@@ -665,6 +739,43 @@ std::unique_ptr<MemoryBuffer> ThinLTOCodeGenerator::codegen(Module &TheModule) {
   return codegenModule(TheModule, *TMBuilder.create());
 }
 
+/// Write out the generated object file, either from CacheEntryPath or from
+/// OutputBuffer, preferring hard-link when possible.
+/// Returns the path to the generated file in SavedObjectsDirectoryPath.
+static std::string writeGeneratedObject(int count, StringRef CacheEntryPath,
+                                        StringRef SavedObjectsDirectoryPath,
+                                        const MemoryBuffer &OutputBuffer) {
+  SmallString<128> OutputPath(SavedObjectsDirectoryPath);
+  llvm::sys::path::append(OutputPath, Twine(count) + ".thinlto.o");
+  OutputPath.c_str(); // Ensure the string is null terminated.
+  if (sys::fs::exists(OutputPath))
+    sys::fs::remove(OutputPath);
+
+  // We don't return a memory buffer to the linker, just a list of files.
+  if (!CacheEntryPath.empty()) {
+    // Cache is enabled, hard-link the entry (or copy if hard-link fails).
+    auto Err = sys::fs::create_hard_link(CacheEntryPath, OutputPath);
+    if (!Err)
+      return OutputPath.str();
+    // Hard linking failed, try to copy.
+    Err = sys::fs::copy_file(CacheEntryPath, OutputPath);
+    if (!Err)
+      return OutputPath.str();
+    // Copy failed (could be because the CacheEntry was removed from the cache
+    // in the meantime by another process), fall back and try to write down the
+    // buffer to the output.
+    errs() << "error: can't link or copy from cached entry '" << CacheEntryPath
+           << "' to '" << OutputPath << "'\n";
+  }
+  // No cache entry, just write out the buffer.
+  std::error_code Err;
+  raw_fd_ostream OS(OutputPath, Err, sys::fs::F_None);
+  if (Err)
+    report_fatal_error("Can't open output '" + OutputPath + "'\n");
+  OS << OutputBuffer.getBuffer();
+  return OutputPath.str();
+}
+
 // Main entry point for the ThinLTO processing
 void ThinLTOCodeGenerator::run() {
   if (CodeGenOnly) {
@@ -679,7 +790,8 @@ void ThinLTOCodeGenerator::run() {
         Context.setDiscardValueNames(LTODiscardValueNames);
 
         // Parse module now
-        auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
+        auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false,
+                                              /*IsImporting*/ false);
 
         // CodeGen
         ProducedBinaries[count] = codegen(*TheModule);
@@ -705,7 +817,16 @@ void ThinLTOCodeGenerator::run() {
 
   // Prepare the resulting object vector
   assert(ProducedBinaries.empty() && "The generator should not be reused");
-  ProducedBinaries.resize(Modules.size());
+  if (SavedObjectsDirectoryPath.empty())
+    ProducedBinaries.resize(Modules.size());
+  else {
+    sys::fs::create_directories(SavedObjectsDirectoryPath);
+    bool IsDir;
+    sys::fs::is_directory(SavedObjectsDirectoryPath, IsDir);
+    if (!IsDir)
+      report_fatal_error("Unexistent dir: '" + SavedObjectsDirectoryPath + "'");
+    ProducedBinaryFiles.resize(Modules.size());
+  }
 
   // Prepare the module map.
   auto ModuleMap = generateModuleMap(Modules);
@@ -785,16 +906,22 @@ void ThinLTOCodeGenerator::run() {
                                     ImportLists[ModuleIdentifier], ExportList,
                                     ResolvedODR[ModuleIdentifier],
                                     DefinedFunctions, GUIDPreservedSymbols);
+        auto CacheEntryPath = CacheEntry.getEntryPath();
 
         {
           auto ErrOrBuffer = CacheEntry.tryLoadingBuffer();
           DEBUG(dbgs() << "Cache " << (ErrOrBuffer ? "hit" : "miss") << " '"
-                       << CacheEntry.getEntryPath() << "' for buffer " << count
-                       << " " << ModuleIdentifier << "\n");
+                       << CacheEntryPath << "' for buffer " << count << " "
+                       << ModuleIdentifier << "\n");
 
           if (ErrOrBuffer) {
             // Cache Hit!
-            ProducedBinaries[count] = std::move(ErrOrBuffer.get());
+            if (SavedObjectsDirectoryPath.empty())
+              ProducedBinaries[count] = std::move(ErrOrBuffer.get());
+            else
+              ProducedBinaryFiles[count] = writeGeneratedObject(
+                  count, CacheEntryPath, SavedObjectsDirectoryPath,
+                  *ErrOrBuffer.get());
             return;
           }
         }
@@ -802,9 +929,16 @@ void ThinLTOCodeGenerator::run() {
         LLVMContext Context;
         Context.setDiscardValueNames(LTODiscardValueNames);
         Context.enableDebugTypeODRUniquing();
+        auto DiagFileOrErr = setupOptimizationRemarks(Context, count);
+        if (!DiagFileOrErr) {
+          errs() << "Error: " << toString(DiagFileOrErr.takeError()) << "\n";
+          report_fatal_error("ThinLTO: Can't get an output file for the "
+                             "remarks");
+        }
 
         // Parse module now
-        auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false);
+        auto TheModule = loadModuleFromBuffer(ModuleBuffer, Context, false,
+                                              /*IsImporting*/ false);
 
         // Save temps: original file.
         saveTempBitcode(*TheModule, SaveTempsDir, count, ".0.original.bc");
@@ -815,17 +949,41 @@ void ThinLTOCodeGenerator::run() {
             *TheModule, *Index, ModuleMap, *TMBuilder.create(), ImportList,
             ExportList, GUIDPreservedSymbols,
             ModuleToDefinedGVSummaries[ModuleIdentifier], CacheOptions,
-            DisableCodeGen, SaveTempsDir, count);
-
-        OutputBuffer = CacheEntry.write(std::move(OutputBuffer));
-        ProducedBinaries[count] = std::move(OutputBuffer);
+            DisableCodeGen, SaveTempsDir, OptLevel, count);
+
+        // Commit to the cache (if enabled)
+        CacheEntry.write(*OutputBuffer);
+
+        if (SavedObjectsDirectoryPath.empty()) {
+          // We need to generated a memory buffer for the linker.
+          if (!CacheEntryPath.empty()) {
+            // Cache is enabled, reload from the cache
+            // We do this to lower memory pressuree: the buffer is on the heap
+            // and releasing it frees memory that can be used for the next input
+            // file. The final binary link will read from the VFS cache
+            // (hopefully!) or from disk if the memory pressure wasn't too high.
+            auto ReloadedBufferOrErr = CacheEntry.tryLoadingBuffer();
+            if (auto EC = ReloadedBufferOrErr.getError()) {
+              // On error, keeping the preexisting buffer and printing a
+              // diagnostic is more friendly than just crashing.
+              errs() << "error: can't reload cached file '" << CacheEntryPath
+                     << "': " << EC.message() << "\n";
+            } else {
+              OutputBuffer = std::move(*ReloadedBufferOrErr);
+            }
+          }
+          ProducedBinaries[count] = std::move(OutputBuffer);
+          return;
+        }
+        ProducedBinaryFiles[count] = writeGeneratedObject(
+            count, CacheEntryPath, SavedObjectsDirectoryPath, *OutputBuffer);
       }, IndexCount);
     }
   }
 
   CachePruning(CacheOptions.Path)
-      .setPruningInterval(CacheOptions.PruningInterval)
-      .setEntryExpiration(CacheOptions.Expiration)
+      .setPruningInterval(std::chrono::seconds(CacheOptions.PruningInterval))
+      .setEntryExpiration(std::chrono::seconds(CacheOptions.Expiration))
       .setMaxSize(CacheOptions.MaxPercentageOfAvailableSpace)
       .prune();
 
diff --git a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
index a574db6fb5aa..b67d9ea5989d 100644
--- a/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
+++ b/contrib/llvm/lib/LTO/UpdateCompilerUsed.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
 
@@ -28,16 +29,16 @@ class PreserveLibCallsAndAsmUsed {
 public:
   PreserveLibCallsAndAsmUsed(const StringSet<> &AsmUndefinedRefs,
                              const TargetMachine &TM,
-                             SmallPtrSetImpl<const GlobalValue *> &LLVMUsed)
+                             std::vector<GlobalValue *> &LLVMUsed)
       : AsmUndefinedRefs(AsmUndefinedRefs), TM(TM), LLVMUsed(LLVMUsed) {}
 
-  void findInModule(const Module &TheModule) {
+  void findInModule(Module &TheModule) {
     initializeLibCalls(TheModule);
-    for (const Function &F : TheModule)
+    for (Function &F : TheModule)
       findLibCallsAndAsm(F);
-    for (const GlobalVariable &GV : TheModule.globals())
+    for (GlobalVariable &GV : TheModule.globals())
       findLibCallsAndAsm(GV);
-    for (const GlobalAlias &GA : TheModule.aliases())
+    for (GlobalAlias &GA : TheModule.aliases())
       findLibCallsAndAsm(GA);
   }
 
@@ -51,7 +52,7 @@ private:
   StringSet<> Libcalls;
 
   // Output
-  SmallPtrSetImpl<const GlobalValue *> &LLVMUsed;
+  std::vector<GlobalValue *> &LLVMUsed;
 
   // Collect names of runtime library functions. User-defined functions with the
   // same names are added to llvm.compiler.used to prevent them from being
@@ -86,7 +87,7 @@ private:
     }
   }
 
-  void findLibCallsAndAsm(const GlobalValue &GV) {
+  void findLibCallsAndAsm(GlobalValue &GV) {
     // There are no restrictions to apply to declarations.
     if (GV.isDeclaration())
       return;
@@ -100,13 +101,15 @@ private:
     // optimizations like -globalopt, causing problems when later optimizations
     // add new library calls (e.g., llvm.memset => memset and printf => puts).
     // Leave it to the linker to remove any dead code (e.g. with -dead_strip).
-    if (isa<Function>(GV) && Libcalls.count(GV.getName()))
-      LLVMUsed.insert(&GV);
+    if (isa<Function>(GV) && Libcalls.count(GV.getName())) {
+      LLVMUsed.push_back(&GV);
+      return;
+    }
 
     SmallString<64> Buffer;
     TM.getNameWithPrefix(Buffer, &GV, Mangler);
     if (AsmUndefinedRefs.count(Buffer))
-      LLVMUsed.insert(&GV);
+      LLVMUsed.push_back(&GV);
   }
 };
 
@@ -114,33 +117,12 @@ private:
 
 void llvm::updateCompilerUsed(Module &TheModule, const TargetMachine &TM,
                               const StringSet<> &AsmUndefinedRefs) {
-  SmallPtrSet<const GlobalValue *, 8> UsedValues;
+  std::vector<GlobalValue *> UsedValues;
   PreserveLibCallsAndAsmUsed(AsmUndefinedRefs, TM, UsedValues)
       .findInModule(TheModule);
 
   if (UsedValues.empty())
     return;
 
-  llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(TheModule.getContext());
-  std::vector<Constant *> UsedValuesList;
-  for (const auto *GV : UsedValues) {
-    Constant *c =
-        ConstantExpr::getBitCast(const_cast<GlobalValue *>(GV), i8PTy);
-    UsedValuesList.push_back(c);
-  }
-
-  GlobalVariable *LLVMUsed = TheModule.getGlobalVariable("llvm.compiler.used");
-  if (LLVMUsed) {
-    ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
-    for (auto &V : Inits->operands())
-      UsedValuesList.push_back(cast<Constant>(&V));
-    LLVMUsed->eraseFromParent();
-  }
-
-  llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, UsedValuesList.size());
-  LLVMUsed = new llvm::GlobalVariable(
-      TheModule, ATy, false, llvm::GlobalValue::AppendingLinkage,
-      llvm::ConstantArray::get(ATy, UsedValuesList), "llvm.compiler.used");
-
-  LLVMUsed->setSection("llvm.metadata");
+  appendToCompilerUsed(TheModule, UsedValues);
 }
diff --git a/contrib/llvm/lib/LibDriver/LibDriver.cpp b/contrib/llvm/lib/LibDriver/LibDriver.cpp
index ea6d921d0f8e..bcdec4f7a933 100644
--- a/contrib/llvm/lib/LibDriver/LibDriver.cpp
+++ b/contrib/llvm/lib/LibDriver/LibDriver.cpp
@@ -143,6 +143,15 @@ int llvm::libDriverMain(llvm::ArrayRef<const char*> ArgsArr) {
       });
       return 1;
     }
+    sys::fs::file_magic Magic =
+        sys::fs::identify_magic(MOrErr->Buf->getBuffer());
+    if (Magic != sys::fs::file_magic::coff_object &&
+        Magic != sys::fs::file_magic::bitcode &&
+        Magic != sys::fs::file_magic::windows_resource) {
+      llvm::errs() << Arg->getValue()
+                   << ": not a COFF object, bitcode or resource file\n";
+      return 1;
+    }
     Members.emplace_back(std::move(*MOrErr));
   }
 
diff --git a/contrib/llvm/lib/Linker/IRMover.cpp b/contrib/llvm/lib/Linker/IRMover.cpp
index 09c67bc47863..9f3cfc0eace4 100644
--- a/contrib/llvm/lib/Linker/IRMover.cpp
+++ b/contrib/llvm/lib/Linker/IRMover.cpp
@@ -169,11 +169,9 @@ bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) {
     if (DSTy->isLiteral() != SSTy->isLiteral() ||
         DSTy->isPacked() != SSTy->isPacked())
       return false;
-  } else if (ArrayType *DATy = dyn_cast<ArrayType>(DstTy)) {
-    if (DATy->getNumElements() != cast<ArrayType>(SrcTy)->getNumElements())
-      return false;
-  } else if (VectorType *DVTy = dyn_cast<VectorType>(DstTy)) {
-    if (DVTy->getNumElements() != cast<VectorType>(SrcTy)->getNumElements())
+  } else if (auto *DSeqTy = dyn_cast<SequentialType>(DstTy)) {
+    if (DSeqTy->getNumElements() !=
+        cast<SequentialType>(SrcTy)->getNumElements())
       return false;
   }
 
@@ -281,7 +279,7 @@ Type *TypeMapTy::get(Type *Ty, SmallPtrSet<StructType *, 8> &Visited) {
   }
 
   // If all of the element types mapped directly over and the type is not
-  // a nomed struct, then the type is usable as-is.
+  // a named struct, then the type is usable as-is.
   if (!AnyChange && IsUniqued)
     return *Entry = Ty;
 
@@ -397,6 +395,12 @@ class IRLinker {
       Worklist.push_back(GV);
   }
 
+  /// Flag whether the ModuleInlineAsm string in Src should be linked with
+  /// (concatenated into) the ModuleInlineAsm string for the destination
+  /// module. It should be true for full LTO, but not when importing for
+  /// ThinLTO, otherwise we can have duplicate symbols.
+  bool LinkModuleInlineAsm;
+
   /// Set to true when all global value body linking is complete (including
   /// lazy linking). Used to prevent metadata linking from creating new
   /// references.
@@ -465,7 +469,7 @@ class IRLinker {
 
   Error linkModuleFlagsMetadata();
 
-  void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src);
+  void linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src);
   Error linkFunctionBody(Function &Dst, Function &Src);
   void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src);
   Error linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src);
@@ -476,16 +480,21 @@ class IRLinker {
   Function *copyFunctionProto(const Function *SF);
   GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA);
 
+  /// When importing for ThinLTO, prevent importing of types listed on
+  /// the DICompileUnit that we don't need a copy of in the importing
+  /// module.
+  void prepareCompileUnitsForImport();
   void linkNamedMDNodes();
 
 public:
   IRLinker(Module &DstM, MDMapT &SharedMDs,
            IRMover::IdentifiedStructTypeSet &Set, std::unique_ptr<Module> SrcM,
            ArrayRef<GlobalValue *> ValuesToLink,
-           std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor)
+           std::function<void(GlobalValue &, IRMover::ValueAdder)> AddLazyFor,
+           bool LinkModuleInlineAsm, bool IsPerformingImport)
       : DstM(DstM), SrcM(std::move(SrcM)), AddLazyFor(std::move(AddLazyFor)),
         TypeMap(Set), GValMaterializer(*this), LValMaterializer(*this),
-        SharedMDs(SharedMDs),
+        SharedMDs(SharedMDs), LinkModuleInlineAsm(LinkModuleInlineAsm),
         Mapper(ValueMap, RF_MoveDistinctMDs | RF_IgnoreMissingLocals, &TypeMap,
                &GValMaterializer),
         AliasMCID(Mapper.registerAlternateMappingContext(AliasValueMap,
@@ -493,6 +502,8 @@ public:
     ValueMap.getMDMap() = std::move(SharedMDs);
     for (GlobalValue *GV : ValuesToLink)
       maybeAdd(GV);
+    if (IsPerformingImport)
+      prepareCompileUnitsForImport();
   }
   ~IRLinker() { SharedMDs = std::move(*ValueMap.getMDMap()); }
 
@@ -561,7 +572,7 @@ Value *IRLinker::materialize(Value *V, bool ForAlias) {
   }
 
   // When linking a global for an alias, it will always be linked. However we
-  // need to check if it was not already scheduled to satify a reference from a
+  // need to check if it was not already scheduled to satisfy a reference from a
   // regular global value initializer. We know if it has been schedule if the
   // "New" GlobalValue that is mapped here for the alias is the same as the one
   // already mapped. If there is an entry in the ValueMap but the value is
@@ -813,18 +824,17 @@ IRLinker::linkAppendingVarProto(GlobalVariable *DstGV,
   SmallVector<Constant *, 16> SrcElements;
   getArrayElements(SrcGV->getInitializer(), SrcElements);
 
-  if (IsNewStructor)
-    SrcElements.erase(
-        std::remove_if(SrcElements.begin(), SrcElements.end(),
-                       [this](Constant *E) {
-                         auto *Key = dyn_cast<GlobalValue>(
-                             E->getAggregateElement(2)->stripPointerCasts());
-                         if (!Key)
-                           return false;
-                         GlobalValue *DGV = getLinkedToGlobal(Key);
-                         return !shouldLink(DGV, *Key);
-                       }),
-        SrcElements.end());
+  if (IsNewStructor) {
+    auto It = remove_if(SrcElements, [this](Constant *E) {
+      auto *Key =
+          dyn_cast<GlobalValue>(E->getAggregateElement(2)->stripPointerCasts());
+      if (!Key)
+        return false;
+      GlobalValue *DGV = getLinkedToGlobal(Key);
+      return !shouldLink(DGV, *Key);
+    });
+    SrcElements.erase(It, SrcElements.end());
+  }
   uint64_t NewSize = DstNumElements + SrcElements.size();
   ArrayType *NewType = ArrayType::get(EltTy, NewSize);
 
@@ -951,7 +961,7 @@ Expected<Constant *> IRLinker::linkGlobalValueProto(GlobalValue *SGV,
 
 /// Update the initializers in the Dest module now that all globals that may be
 /// referenced are in Dest.
-void IRLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) {
+void IRLinker::linkGlobalVariable(GlobalVariable &Dst, GlobalVariable &Src) {
   // Figure out what the initializer looks like in the dest module.
   Mapper.scheduleMapGlobalInitializer(Dst, *Src.getInitializer());
 }
@@ -963,8 +973,8 @@ Error IRLinker::linkFunctionBody(Function &Dst, Function &Src) {
   assert(Dst.isDeclaration() && !Src.isDeclaration());
 
   // Materialize if needed.
-  if (std::error_code EC = Src.materialize())
-    return errorCodeToError(EC);
+  if (Error Err = Src.materialize())
+    return Err;
 
   // Link in the operands without remapping.
   if (Src.hasPrefixData())
@@ -994,13 +1004,77 @@ Error IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) {
   if (auto *F = dyn_cast<Function>(&Src))
     return linkFunctionBody(cast<Function>(Dst), *F);
   if (auto *GVar = dyn_cast<GlobalVariable>(&Src)) {
-    linkGlobalInit(cast<GlobalVariable>(Dst), *GVar);
+    linkGlobalVariable(cast<GlobalVariable>(Dst), *GVar);
     return Error::success();
   }
   linkAliasBody(cast<GlobalAlias>(Dst), cast<GlobalAlias>(Src));
   return Error::success();
 }
 
+void IRLinker::prepareCompileUnitsForImport() {
+  NamedMDNode *SrcCompileUnits = SrcM->getNamedMetadata("llvm.dbg.cu");
+  if (!SrcCompileUnits)
+    return;
+  // When importing for ThinLTO, prevent importing of types listed on
+  // the DICompileUnit that we don't need a copy of in the importing
+  // module. They will be emitted by the originating module.
+  for (unsigned I = 0, E = SrcCompileUnits->getNumOperands(); I != E; ++I) {
+    auto *CU = cast<DICompileUnit>(SrcCompileUnits->getOperand(I));
+    assert(CU && "Expected valid compile unit");
+    // Enums, macros, and retained types don't need to be listed on the
+    // imported DICompileUnit. This means they will only be imported
+    // if reached from the mapped IR. Do this by setting their value map
+    // entries to nullptr, which will automatically prevent their importing
+    // when reached from the DICompileUnit during metadata mapping.
+    ValueMap.MD()[CU->getRawEnumTypes()].reset(nullptr);
+    ValueMap.MD()[CU->getRawMacros()].reset(nullptr);
+    ValueMap.MD()[CU->getRawRetainedTypes()].reset(nullptr);
+    // If we ever start importing global variable defs, we'll need to
+    // add their DIGlobalVariable to the globals list on the imported
+    // DICompileUnit. Confirm none are imported, and then we can
+    // map the list of global variables to nullptr.
+    assert(none_of(
+               ValuesToLink,
+               [](const GlobalValue *GV) { return isa<GlobalVariable>(GV); }) &&
+           "Unexpected importing of a GlobalVariable definition");
+    ValueMap.MD()[CU->getRawGlobalVariables()].reset(nullptr);
+
+    // Imported entities only need to be mapped in if they have local
+    // scope, as those might correspond to an imported entity inside a
+    // function being imported (any locally scoped imported entities that
+    // don't end up referenced by an imported function will not be emitted
+    // into the object). Imported entities not in a local scope
+    // (e.g. on the namespace) only need to be emitted by the originating
+    // module. Create a list of the locally scoped imported entities, and
+    // replace the source CUs imported entity list with the new list, so
+    // only those are mapped in.
+    // FIXME: Locally-scoped imported entities could be moved to the
+    // functions they are local to instead of listing them on the CU, and
+    // we would naturally only link in those needed by function importing.
+    SmallVector<TrackingMDNodeRef, 4> AllImportedModules;
+    bool ReplaceImportedEntities = false;
+    for (auto *IE : CU->getImportedEntities()) {
+      DIScope *Scope = IE->getScope();
+      assert(Scope && "Invalid Scope encoding!");
+      if (isa<DILocalScope>(Scope))
+        AllImportedModules.emplace_back(IE);
+      else
+        ReplaceImportedEntities = true;
+    }
+    if (ReplaceImportedEntities) {
+      if (!AllImportedModules.empty())
+        CU->replaceImportedEntities(MDTuple::get(
+            CU->getContext(),
+            SmallVector<Metadata *, 16>(AllImportedModules.begin(),
+                                        AllImportedModules.end())));
+      else
+        // If there were no local scope imported entities, we can map
+        // the whole list to nullptr.
+        ValueMap.MD()[CU->getRawImportedEntities()].reset(nullptr);
+    }
+  }
+}
+
 /// Insert all of the named MDNodes in Src into the Dest module.
 void IRLinker::linkNamedMDNodes() {
   const NamedMDNode *SrcModFlags = SrcM->getModuleFlagsMetadata();
@@ -1191,8 +1265,8 @@ static std::string mergeTriples(const Triple &SrcTriple,
 Error IRLinker::run() {
   // Ensure metadata materialized before value mapping.
   if (SrcM->getMaterializer())
-    if (std::error_code EC = SrcM->getMaterializer()->materializeMetadata())
-      return errorCodeToError(EC);
+    if (Error Err = SrcM->getMaterializer()->materializeMetadata())
+      return Err;
 
   // Inherit the target data from the source module if the destination module
   // doesn't have one already.
@@ -1223,7 +1297,7 @@ Error IRLinker::run() {
   DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple));
 
   // Append the module inline asm string.
-  if (!SrcM->getModuleInlineAsm().empty()) {
+  if (LinkModuleInlineAsm && !SrcM->getModuleInlineAsm().empty()) {
     if (DstM.getModuleInlineAsm().empty())
       DstM.setModuleInlineAsm(SrcM->getModuleInlineAsm());
     else
@@ -1361,9 +1435,11 @@ IRMover::IRMover(Module &M) : Composite(M) {
 
 Error IRMover::move(
     std::unique_ptr<Module> Src, ArrayRef<GlobalValue *> ValuesToLink,
-    std::function<void(GlobalValue &, ValueAdder Add)> AddLazyFor) {
+    std::function<void(GlobalValue &, ValueAdder Add)> AddLazyFor,
+    bool LinkModuleInlineAsm, bool IsPerformingImport) {
   IRLinker TheIRLinker(Composite, SharedMDs, IdentifiedStructTypes,
-                       std::move(Src), ValuesToLink, std::move(AddLazyFor));
+                       std::move(Src), ValuesToLink, std::move(AddLazyFor),
+                       LinkModuleInlineAsm, IsPerformingImport);
   Error E = TheIRLinker.run();
   Composite.dropTriviallyDeadConstantArrays();
   return E;
diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp
index fae9c95ebe8f..cf2c4ccf523e 100644
--- a/contrib/llvm/lib/Linker/LinkModules.cpp
+++ b/contrib/llvm/lib/Linker/LinkModules.cpp
@@ -582,7 +582,9 @@ bool ModuleLinker::run() {
   if (Error E = Mover.move(std::move(SrcM), ValuesToLink.getArrayRef(),
                            [this](GlobalValue &GV, IRMover::ValueAdder Add) {
                              addLazyFor(GV, Add);
-                           })) {
+                           },
+                           /* LinkModuleInlineAsm */ !isPerformingImport(),
+                           /* IsPerformingImport */ isPerformingImport())) {
     handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
       DstM.getContext().diagnose(LinkDiagnosticInfo(DS_Error, EIB.message()));
       HasErrors = true;
diff --git a/contrib/llvm/lib/MC/ConstantPools.cpp b/contrib/llvm/lib/MC/ConstantPools.cpp
index 17a23d063b7d..9608c2c656b7 100644
--- a/contrib/llvm/lib/MC/ConstantPools.cpp
+++ b/contrib/llvm/lib/MC/ConstantPools.cpp
@@ -36,10 +36,20 @@ void ConstantPool::emitEntries(MCStreamer &Streamer) {
 
 const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context,
                                      unsigned Size, SMLoc Loc) {
+  const MCConstantExpr *C = dyn_cast<MCConstantExpr>(Value);
+
+  // Check if there is existing entry for the same constant. If so, reuse it.
+  auto Itr = C ? CachedEntries.find(C->getValue()) : CachedEntries.end();
+  if (Itr != CachedEntries.end())
+    return Itr->second;
+
   MCSymbol *CPEntryLabel = Context.createTempSymbol();
 
   Entries.push_back(ConstantPoolEntry(CPEntryLabel, Value, Size, Loc));
-  return MCSymbolRefExpr::create(CPEntryLabel, Context);
+  const auto SymRef = MCSymbolRefExpr::create(CPEntryLabel, Context);
+  if (C)
+    CachedEntries[C->getValue()] = SymRef;
+  return SymRef;
 }
 
 bool ConstantPool::empty() { return Entries.empty(); }
@@ -79,7 +89,7 @@ void AssemblerConstantPools::emitAll(MCStreamer &Streamer) {
 }
 
 void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
-  MCSection *Section = Streamer.getCurrentSection().first;
+  MCSection *Section = Streamer.getCurrentSectionOnly();
   if (ConstantPool *CP = getConstantPool(Section)) {
     emitConstantPool(Streamer, Section, *CP);
   }
@@ -88,7 +98,7 @@ void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) {
 const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer,
                                                const MCExpr *Expr,
                                                unsigned Size, SMLoc Loc) {
-  MCSection *Section = Streamer.getCurrentSection().first;
+  MCSection *Section = Streamer.getCurrentSectionOnly();
   return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext(),
                                                    Size, Loc);
 }
diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
index dc21b48ca6f6..a8c88dda6936 100644
--- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp
@@ -568,25 +568,27 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm,
   // If we change such a relocation to use the section, the linker would think
   // that it pointed to another string and subtracting 42 at runtime will
   // produce the wrong value.
-  auto &Sec = cast<MCSectionELF>(Sym->getSection());
-  unsigned Flags = Sec.getFlags();
-  if (Flags & ELF::SHF_MERGE) {
-    if (C != 0)
-      return true;
+  if (Sym->isInSection()) {
+    auto &Sec = cast<MCSectionELF>(Sym->getSection());
+    unsigned Flags = Sec.getFlags();
+    if (Flags & ELF::SHF_MERGE) {
+      if (C != 0)
+        return true;
 
-    // It looks like gold has a bug (http://sourceware.org/PR16794) and can
-    // only handle section relocations to mergeable sections if using RELA.
-    if (!hasRelocationAddend())
+      // It looks like gold has a bug (http://sourceware.org/PR16794) and can
+      // only handle section relocations to mergeable sections if using RELA.
+      if (!hasRelocationAddend())
+        return true;
+    }
+
+    // Most TLS relocations use a got, so they need the symbol. Even those that
+    // are just an offset (@tpoff), require a symbol in gold versions before
+    // 5efeedf61e4fe720fd3e9a08e6c91c10abb66d42 (2014-09-26) which fixed
+    // http://sourceware.org/PR16773.
+    if (Flags & ELF::SHF_TLS)
       return true;
   }
 
-  // Most TLS relocations use a got, so they need the symbol. Even those that
-  // are just an offset (@tpoff), require a symbol in gold versions before
-  // 5efeedf61e4fe720fd3e9a08e6c91c10abb66d42 (2014-09-26) which fixed
-  // http://sourceware.org/PR16773.
-  if (Flags & ELF::SHF_TLS)
-    return true;
-
   // If the symbol is a thumb function the final relocation must set the lowest
   // bit. With a symbol that is done by just having the symbol have that bit
   // set, so we would lose the bit if we relocated with the section.
@@ -1127,7 +1129,7 @@ void ELFObjectWriter::writeRelocations(const MCAssembler &Asm,
 
 const MCSectionELF *ELFObjectWriter::createStringTable(MCContext &Ctx) {
   const MCSectionELF *StrtabSection = SectionTable[StringTableIndex - 1];
-  getStream() << StrTabBuilder.data();
+  StrTabBuilder.write(getStream());
   return StrtabSection;
 }
 
diff --git a/contrib/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm/lib/MC/MCAsmBackend.cpp
index b868b9d48896..570f764f6642 100644
--- a/contrib/llvm/lib/MC/MCAsmBackend.cpp
+++ b/contrib/llvm/lib/MC/MCAsmBackend.cpp
@@ -34,6 +34,10 @@ const MCFixupKindInfo &MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
       {"FK_GPRel_2", 0, 16, 0},
       {"FK_GPRel_4", 0, 32, 0},
       {"FK_GPRel_8", 0, 64, 0},
+      {"FK_DTPRel_4", 0, 32, 0},
+      {"FK_DTPRel_8", 0, 64, 0},
+      {"FK_TPRel_4", 0, 32, 0},
+      {"FK_TPRel_8", 0, 64, 0},
       {"FK_SecRel_1", 0, 8, 0},
       {"FK_SecRel_2", 0, 16, 0},
       {"FK_SecRel_4", 0, 32, 0},
diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp
index 4a05175fdec3..3eb8f50de5a8 100644
--- a/contrib/llvm/lib/MC/MCAsmInfo.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp
@@ -31,7 +31,6 @@ MCAsmInfo::MCAsmInfo() {
   HasSubsectionsViaSymbols = false;
   HasMachoZeroFillDirective = false;
   HasMachoTBSSDirective = false;
-  HasStaticCtorDtorReferenceInStaticMode = false;
   MaxInstLength = 4;
   MinInstAlignment = 1;
   DollarIsPC = false;
diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
index fc60313dd6b2..e95cf488cd30 100644
--- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
+++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp
@@ -76,7 +76,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() {
   ZeroDirective = "\t.space\t";  // ".space N" emits N zeros.
   HasMachoZeroFillDirective = true;  // Uses .zerofill
   HasMachoTBSSDirective = true; // Uses .tbss
-  HasStaticCtorDtorReferenceInStaticMode = true;
 
   // FIXME: Change this once MC is the system assembler.
   HasAggressiveSymbolFolding = false;
diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
index ef2f7810deaa..817009a65363 100644
--- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp
@@ -14,6 +14,7 @@
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixupKindInfo.h"
@@ -99,7 +100,7 @@ public:
   /// file if applicable as a QoI issue to make the output of the compiler
   /// more readable.  This only affects the MCAsmStreamer, and only when
   /// verbose assembly output is enabled.
-  void AddComment(const Twine &T) override;
+  void AddComment(const Twine &T, bool EOL = true) override;
 
   /// AddEncodingComment - Add a comment showing the encoding of an instruction.
   void AddEncodingComment(const MCInst &Inst, const MCSubtargetInfo &);
@@ -149,8 +150,8 @@ public:
   void EndCOFFSymbolDef() override;
   void EmitCOFFSafeSEH(MCSymbol const *Symbol) override;
   void EmitCOFFSectionIndex(MCSymbol const *Symbol) override;
-  void EmitCOFFSecRel32(MCSymbol const *Symbol) override;
-  void emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) override;
+  void EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) override;
+  void emitELFSize(MCSymbol *Symbol, const MCExpr *Value) override;
   void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                         unsigned ByteAlignment) override;
 
@@ -180,6 +181,11 @@ public:
 
   void EmitSLEB128Value(const MCExpr *Value) override;
 
+  void EmitDTPRel32Value(const MCExpr *Value) override;
+  void EmitDTPRel64Value(const MCExpr *Value) override;
+  void EmitTPRel32Value(const MCExpr *Value) override;
+  void EmitTPRel64Value(const MCExpr *Value) override;
+
   void EmitGPRel64Value(const MCExpr *Value) override;
 
   void EmitGPRel32Value(const MCExpr *Value) override;
@@ -203,7 +209,8 @@ public:
                          unsigned MaxBytesToEmit = 0) override;
 
   void emitValueToOffset(const MCExpr *Offset,
-                         unsigned char Value = 0) override;
+                         unsigned char Value,
+                         SMLoc Loc) override;
 
   void EmitFileDirective(StringRef Filename) override;
   unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
@@ -215,16 +222,21 @@ public:
                              StringRef FileName) override;
   MCSymbol *getDwarfLineTableSymbol(unsigned CUID) override;
 
-  unsigned EmitCVFileDirective(unsigned FileNo, StringRef Filename) override;
+  bool EmitCVFileDirective(unsigned FileNo, StringRef Filename) override;
+  bool EmitCVFuncIdDirective(unsigned FuncId) override;
+  bool EmitCVInlineSiteIdDirective(unsigned FunctionId, unsigned IAFunc,
+                                   unsigned IAFile, unsigned IALine,
+                                   unsigned IACol, SMLoc Loc) override;
   void EmitCVLocDirective(unsigned FunctionId, unsigned FileNo, unsigned Line,
                           unsigned Column, bool PrologueEnd, bool IsStmt,
-                          StringRef FileName) override;
+                          StringRef FileName, SMLoc Loc) override;
   void EmitCVLinetableDirective(unsigned FunctionId, const MCSymbol *FnStart,
                                 const MCSymbol *FnEnd) override;
-  void EmitCVInlineLinetableDirective(
-      unsigned PrimaryFunctionId, unsigned SourceFileId, unsigned SourceLineNum,
-      const MCSymbol *FnStartSym, const MCSymbol *FnEndSym,
-      ArrayRef<unsigned> SecondaryFunctionIds) override;
+  void EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
+                                      unsigned SourceFileId,
+                                      unsigned SourceLineNum,
+                                      const MCSymbol *FnStartSym,
+                                      const MCSymbol *FnEndSym) override;
   void EmitCVDefRangeDirective(
       ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
       StringRef FixedSizePortion) override;
@@ -289,12 +301,14 @@ public:
 /// file if applicable as a QoI issue to make the output of the compiler
 /// more readable.  This only affects the MCAsmStreamer, and only when
 /// verbose assembly output is enabled.
-void MCAsmStreamer::AddComment(const Twine &T) {
+/// By deafult EOL is set to true so that each comment goes on its own line.
+void MCAsmStreamer::AddComment(const Twine &T, bool EOL) {
   if (!IsVerboseAsm) return;
 
   T.toVector(CommentToEmit);
-  // Each comment goes on its own line.
-  CommentToEmit.push_back('\n');
+ 
+  if (EOL)
+    CommentToEmit.push_back('\n'); // Place comment in a new line.
 }
 
 void MCAsmStreamer::EmitCommentsAndEOL() {
@@ -357,9 +371,10 @@ void MCAsmStreamer::addExplicitComment(const Twine &T) {
     ExplicitCommentToEmit.append("\t");
     ExplicitCommentToEmit.append(c.str());
   } else if (c.front() == '#') {
-    // # are comments for ## commentString. Output extra #.
-    ExplicitCommentToEmit.append("\t#");
-    ExplicitCommentToEmit.append(c.str());
+
+    ExplicitCommentToEmit.append("\t");
+    ExplicitCommentToEmit.append(MAI->getCommentString());
+    ExplicitCommentToEmit.append(c.slice(1, c.size()).str());
   } else
     assert(false && "Unexpected Assembly Comment");
   // full line comments immediately output
@@ -602,13 +617,15 @@ void MCAsmStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
   EmitEOL();
 }
 
-void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {
   OS << "\t.secrel32\t";
   Symbol->print(OS, MAI);
+  if (Offset != 0)
+    OS << '+' << Offset;
   EmitEOL();
 }
 
-void MCAsmStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {
+void MCAsmStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
   assert(MAI->hasDotTypeDotSizeDirective());
   OS << "\t.size\t";
   Symbol->print(OS, MAI);
@@ -737,7 +754,7 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
 }
 
 void MCAsmStreamer::EmitBytes(StringRef Data) {
-  assert(getCurrentSection().first &&
+  assert(getCurrentSectionOnly() &&
          "Cannot emit contents before setting section!");
   if (Data.empty()) return;
 
@@ -782,7 +799,7 @@ void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) {
 void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
                                   SMLoc Loc) {
   assert(Size <= 8 && "Invalid size");
-  assert(getCurrentSection().first &&
+  assert(getCurrentSectionOnly() &&
          "Cannot emit contents before setting section!");
   const char *Directive = nullptr;
   switch (Size) {
@@ -855,6 +872,34 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
   EmitEOL();
 }
 
+void MCAsmStreamer::EmitDTPRel64Value(const MCExpr *Value) {
+  assert(MAI->getDTPRel64Directive() != nullptr);
+  OS << MAI->getDTPRel64Directive();
+  Value->print(OS, MAI);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitDTPRel32Value(const MCExpr *Value) {
+  assert(MAI->getDTPRel32Directive() != nullptr);
+  OS << MAI->getDTPRel32Directive();
+  Value->print(OS, MAI);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitTPRel64Value(const MCExpr *Value) {
+  assert(MAI->getTPRel64Directive() != nullptr);
+  OS << MAI->getTPRel64Directive();
+  Value->print(OS, MAI);
+  EmitEOL();
+}
+
+void MCAsmStreamer::EmitTPRel32Value(const MCExpr *Value) {
+  assert(MAI->getTPRel32Directive() != nullptr);
+  OS << MAI->getTPRel32Directive();
+  Value->print(OS, MAI);
+  EmitEOL();
+}
+
 void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) {
   assert(MAI->getGPRel64Directive() != nullptr);
   OS << MAI->getGPRel64Directive();
@@ -971,7 +1016,8 @@ void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
 }
 
 void MCAsmStreamer::emitValueToOffset(const MCExpr *Offset,
-                                      unsigned char Value) {
+                                      unsigned char Value,
+                                      SMLoc Loc) {
   // FIXME: Verify that Offset is associated with the current section.
   OS << ".org ";
   Offset->print(OS, MAI);
@@ -1068,29 +1114,43 @@ MCSymbol *MCAsmStreamer::getDwarfLineTableSymbol(unsigned CUID) {
   return MCStreamer::getDwarfLineTableSymbol(0);
 }
 
-unsigned MCAsmStreamer::EmitCVFileDirective(unsigned FileNo,
-                                            StringRef Filename) {
-  if (!getContext().getCVFile(Filename, FileNo))
-    return 0;
+bool MCAsmStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename) {
+  if (!getContext().getCVContext().addFile(FileNo, Filename))
+    return false;
 
   OS << "\t.cv_file\t" << FileNo << ' ';
 
   PrintQuotedString(Filename, OS);
   EmitEOL();
+  return true;
+}
 
-  return FileNo;
+bool MCAsmStreamer::EmitCVFuncIdDirective(unsigned FuncId) {
+  OS << "\t.cv_func_id " << FuncId << '\n';
+  return MCStreamer::EmitCVFuncIdDirective(FuncId);
+}
+
+bool MCAsmStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId,
+                                                unsigned IAFunc,
+                                                unsigned IAFile,
+                                                unsigned IALine, unsigned IACol,
+                                                SMLoc Loc) {
+  OS << "\t.cv_inline_site_id " << FunctionId << " within " << IAFunc
+     << " inlined_at " << IAFile << ' ' << IALine << ' ' << IACol << '\n';
+  return MCStreamer::EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
+                                                 IALine, IACol, Loc);
 }
 
 void MCAsmStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                        unsigned Line, unsigned Column,
                                        bool PrologueEnd, bool IsStmt,
-                                       StringRef FileName) {
+                                       StringRef FileName, SMLoc Loc) {
   OS << "\t.cv_loc\t" << FunctionId << " " << FileNo << " " << Line << " "
      << Column;
   if (PrologueEnd)
     OS << " prologue_end";
 
-  unsigned OldIsStmt = getContext().getCurrentCVLoc().isStmt();
+  unsigned OldIsStmt = getContext().getCVContext().getCurrentCVLoc().isStmt();
   if (IsStmt != OldIsStmt) {
     OS << " is_stmt ";
 
@@ -1102,12 +1162,12 @@ void MCAsmStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
 
   if (IsVerboseAsm) {
     OS.PadToColumn(MAI->getCommentColumn());
-    OS << MAI->getCommentString() << ' ' << FileName << ':'
-       << Line << ':' << Column;
+    OS << MAI->getCommentString() << ' ' << FileName << ':' << Line << ':'
+       << Column;
   }
   EmitEOL();
   this->MCStreamer::EmitCVLocDirective(FunctionId, FileNo, Line, Column,
-                                       PrologueEnd, IsStmt, FileName);
+                                       PrologueEnd, IsStmt, FileName, Loc);
 }
 
 void MCAsmStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -1121,24 +1181,19 @@ void MCAsmStreamer::EmitCVLinetableDirective(unsigned FunctionId,
   this->MCStreamer::EmitCVLinetableDirective(FunctionId, FnStart, FnEnd);
 }
 
-void MCAsmStreamer::EmitCVInlineLinetableDirective(
-    unsigned PrimaryFunctionId, unsigned SourceFileId, unsigned SourceLineNum,
-    const MCSymbol *FnStartSym, const MCSymbol *FnEndSym,
-    ArrayRef<unsigned> SecondaryFunctionIds) {
+void MCAsmStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
+                                                   unsigned SourceFileId,
+                                                   unsigned SourceLineNum,
+                                                   const MCSymbol *FnStartSym,
+                                                   const MCSymbol *FnEndSym) {
   OS << "\t.cv_inline_linetable\t" << PrimaryFunctionId << ' ' << SourceFileId
      << ' ' << SourceLineNum << ' ';
   FnStartSym->print(OS, MAI);
   OS << ' ';
   FnEndSym->print(OS, MAI);
-  if (!SecondaryFunctionIds.empty()) {
-    OS << " contains";
-    for (unsigned SecondaryFunctionId : SecondaryFunctionIds)
-      OS << ' ' << SecondaryFunctionId;
-  }
   EmitEOL();
   this->MCStreamer::EmitCVInlineLinetableDirective(
-      PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym,
-      SecondaryFunctionIds);
+      PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym);
 }
 
 void MCAsmStreamer::EmitCVDefRangeDirective(
@@ -1531,8 +1586,9 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst,
   }
 }
 
-void MCAsmStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) {
-  assert(getCurrentSection().first &&
+void MCAsmStreamer::EmitInstruction(const MCInst &Inst,
+                                    const MCSubtargetInfo &STI) {
+  assert(getCurrentSectionOnly() &&
          "Cannot emit contents before setting section!");
 
   // Show the encoding in a comment if we have a code emitter.
diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp
index 7a42108ceaf3..83fcec92e2b5 100644
--- a/contrib/llvm/lib/MC/MCAssembler.cpp
+++ b/contrib/llvm/lib/MC/MCAssembler.cpp
@@ -278,22 +278,29 @@ uint64_t MCAssembler::computeFragmentSize(const MCAsmLayout &Layout,
   case MCFragment::FT_Org: {
     const MCOrgFragment &OF = cast<MCOrgFragment>(F);
     MCValue Value;
-    if (!OF.getOffset().evaluateAsValue(Value, Layout))
-      report_fatal_error("expected assembly-time absolute expression");
+    if (!OF.getOffset().evaluateAsValue(Value, Layout)) {
+      getContext().reportError(OF.getLoc(),
+                               "expected assembly-time absolute expression");
+        return 0;
+    }
 
-    // FIXME: We need a way to communicate this error.
     uint64_t FragmentOffset = Layout.getFragmentOffset(&OF);
     int64_t TargetLocation = Value.getConstant();
     if (const MCSymbolRefExpr *A = Value.getSymA()) {
       uint64_t Val;
-      if (!Layout.getSymbolOffset(A->getSymbol(), Val))
-        report_fatal_error("expected absolute expression");
+      if (!Layout.getSymbolOffset(A->getSymbol(), Val)) {
+        getContext().reportError(OF.getLoc(), "expected absolute expression");
+        return 0;
+      }
       TargetLocation += Val;
     }
     int64_t Size = TargetLocation - FragmentOffset;
-    if (Size < 0 || Size >= 0x40000000)
-      report_fatal_error("invalid .org offset '" + Twine(TargetLocation) +
-                         "' (at offset '" + Twine(FragmentOffset) + "')");
+    if (Size < 0 || Size >= 0x40000000) {
+      getContext().reportError(
+          OF.getLoc(), "invalid .org offset '" + Twine(TargetLocation) +
+                           "' (at offset '" + Twine(FragmentOffset) + "')");
+      return 0;
+    }
     return Size;
   }
 
@@ -575,8 +582,8 @@ void MCAssembler::writeSectionData(const MCSection *Sec,
         // into a virtual section. This is to support clients which use standard
         // directives to fill the contents of virtual sections.
         const MCDataFragment &DF = cast<MCDataFragment>(F);
-        assert(DF.fixup_begin() == DF.fixup_end() &&
-               "Cannot have fixups in virtual section!");
+        if (DF.fixup_begin() != DF.fixup_end())
+          report_fatal_error("cannot have fixups in virtual section!");
         for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i)
           if (DF.getContents()[i]) {
             if (auto *ELFSec = dyn_cast<const MCSectionELF>(Sec))
@@ -660,7 +667,8 @@ void MCAssembler::layout(MCAsmLayout &Layout) {
 
   // Layout until everything fits.
   while (layoutOnce(Layout))
-    continue;
+    if (getContext().hadError())
+      return;
 
   DEBUG_WITH_TYPE("mc-dump", {
       llvm::errs() << "assembler backend - post-relaxation\n--\n";
@@ -912,7 +920,9 @@ bool MCAssembler::layoutOnce(MCAsmLayout &Layout) {
 void MCAssembler::finishLayout(MCAsmLayout &Layout) {
   // The layout is done. Mark every fragment as valid.
   for (unsigned int i = 0, n = Layout.getSectionOrder().size(); i != n; ++i) {
-    Layout.getFragmentOffset(&*Layout.getSectionOrder()[i]->rbegin());
+    MCSection &Section = *Layout.getSectionOrder()[i];
+    Layout.getFragmentOffset(&*Section.rbegin());
+    computeFragmentSize(Layout, *Section.rbegin());
   }
   getBackend().finishLayout(*this, Layout);
 }
diff --git a/contrib/llvm/lib/MC/MCCodeView.cpp b/contrib/llvm/lib/MC/MCCodeView.cpp
index 65cff41abebe..3773542cf8a2 100644
--- a/contrib/llvm/lib/MC/MCCodeView.cpp
+++ b/contrib/llvm/lib/MC/MCCodeView.cpp
@@ -65,6 +65,50 @@ bool CodeViewContext::addFile(unsigned FileNumber, StringRef Filename) {
   return true;
 }
 
+bool CodeViewContext::recordFunctionId(unsigned FuncId) {
+  if (FuncId >= Functions.size())
+    Functions.resize(FuncId + 1);
+
+  // Return false if this function info was already allocated.
+  if (!Functions[FuncId].isUnallocatedFunctionInfo())
+    return false;
+
+  // Mark this as an allocated normal function, and leave the rest alone.
+  Functions[FuncId].ParentFuncIdPlusOne = MCCVFunctionInfo::FunctionSentinel;
+  return true;
+}
+
+bool CodeViewContext::recordInlinedCallSiteId(unsigned FuncId, unsigned IAFunc,
+                                              unsigned IAFile, unsigned IALine,
+                                              unsigned IACol) {
+  if (FuncId >= Functions.size())
+    Functions.resize(FuncId + 1);
+
+  // Return false if this function info was already allocated.
+  if (!Functions[FuncId].isUnallocatedFunctionInfo())
+    return false;
+
+  MCCVFunctionInfo::LineInfo InlinedAt;
+  InlinedAt.File = IAFile;
+  InlinedAt.Line = IALine;
+  InlinedAt.Col = IACol;
+
+  // Mark this as an inlined call site and record call site line info.
+  MCCVFunctionInfo *Info = &Functions[FuncId];
+  Info->ParentFuncIdPlusOne = IAFunc + 1;
+  Info->InlinedAt = InlinedAt;
+
+  // Walk up the call chain adding this function id to the InlinedAtMap of all
+  // transitive callers until we hit a real function.
+  while (Info->isInlinedCallSite()) {
+    InlinedAt = Info->InlinedAt;
+    Info = getCVFunctionInfo(Info->getParentFuncId());
+    Info->InlinedAtMap[FuncId] = InlinedAt;
+  }
+
+  return true;
+}
+
 MCDataFragment *CodeViewContext::getStringTableFragment() {
   if (!StrTabFragment) {
     StrTabFragment = new MCDataFragment();
@@ -156,7 +200,7 @@ void CodeViewContext::emitLineTableForFunction(MCObjectStreamer &OS,
   OS.EmitIntValue(unsigned(ModuleSubstreamKind::Lines), 4);
   OS.emitAbsoluteSymbolDiff(LineEnd, LineBegin, 4);
   OS.EmitLabel(LineBegin);
-  OS.EmitCOFFSecRel32(FuncBegin);
+  OS.EmitCOFFSecRel32(FuncBegin, /*Offset=*/0);
   OS.EmitCOFFSectionIndex(FuncBegin);
 
   // Actual line info.
@@ -237,15 +281,17 @@ static uint32_t encodeSignedNumber(uint32_t Data) {
   return Data << 1;
 }
 
-void CodeViewContext::emitInlineLineTableForFunction(
-    MCObjectStreamer &OS, unsigned PrimaryFunctionId, unsigned SourceFileId,
-    unsigned SourceLineNum, const MCSymbol *FnStartSym,
-    const MCSymbol *FnEndSym, ArrayRef<unsigned> SecondaryFunctionIds) {
+void CodeViewContext::emitInlineLineTableForFunction(MCObjectStreamer &OS,
+                                                     unsigned PrimaryFunctionId,
+                                                     unsigned SourceFileId,
+                                                     unsigned SourceLineNum,
+                                                     const MCSymbol *FnStartSym,
+                                                     const MCSymbol *FnEndSym) {
   // Create and insert a fragment into the current section that will be encoded
   // later.
-  new MCCVInlineLineTableFragment(
-      PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym,
-      SecondaryFunctionIds, OS.getCurrentSectionOnly());
+  new MCCVInlineLineTableFragment(PrimaryFunctionId, SourceFileId,
+                                  SourceLineNum, FnStartSym, FnEndSym,
+                                  OS.getCurrentSectionOnly());
 }
 
 void CodeViewContext::emitDefRange(
@@ -280,69 +326,92 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
   size_t LocBegin;
   size_t LocEnd;
   std::tie(LocBegin, LocEnd) = getLineExtent(Frag.SiteFuncId);
-  for (unsigned SecondaryId : Frag.SecondaryFuncs) {
-    auto Extent = getLineExtent(SecondaryId);
+
+  // Include all child inline call sites in our .cv_loc extent.
+  MCCVFunctionInfo *SiteInfo = getCVFunctionInfo(Frag.SiteFuncId);
+  for (auto &KV : SiteInfo->InlinedAtMap) {
+    unsigned ChildId = KV.first;
+    auto Extent = getLineExtent(ChildId);
     LocBegin = std::min(LocBegin, Extent.first);
     LocEnd = std::max(LocEnd, Extent.second);
   }
+
   if (LocBegin >= LocEnd)
     return;
   ArrayRef<MCCVLineEntry> Locs = getLinesForExtent(LocBegin, LocEnd);
   if (Locs.empty())
     return;
 
-  SmallSet<unsigned, 8> InlinedFuncIds;
-  InlinedFuncIds.insert(Frag.SiteFuncId);
-  InlinedFuncIds.insert(Frag.SecondaryFuncs.begin(), Frag.SecondaryFuncs.end());
-
   // Make an artificial start location using the function start and the inlinee
   // lines start location information. All deltas start relative to this
   // location.
   MCCVLineEntry StartLoc(Frag.getFnStartSym(), MCCVLoc(Locs.front()));
   StartLoc.setFileNum(Frag.StartFileId);
   StartLoc.setLine(Frag.StartLineNum);
-  const MCCVLineEntry *LastLoc = &StartLoc;
   bool HaveOpenRange = false;
 
+  const MCSymbol *LastLabel = Frag.getFnStartSym();
+  MCCVFunctionInfo::LineInfo LastSourceLoc, CurSourceLoc;
+  LastSourceLoc.File = Frag.StartFileId;
+  LastSourceLoc.Line = Frag.StartLineNum;
+
   SmallVectorImpl<char> &Buffer = Frag.getContents();
   Buffer.clear(); // Clear old contents if we went through relaxation.
   for (const MCCVLineEntry &Loc : Locs) {
-    if (!InlinedFuncIds.count(Loc.getFunctionId())) {
-      // We've hit a cv_loc not attributed to this inline call site. Use this
-      // label to end the PC range.
-      if (HaveOpenRange) {
-        unsigned Length =
-            computeLabelDiff(Layout, LastLoc->getLabel(), Loc.getLabel());
-        compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
-        compressAnnotation(Length, Buffer);
+    // Exit early if our line table would produce an oversized InlineSiteSym
+    // record. Account for the ChangeCodeLength annotation emitted after the
+    // loop ends.
+    constexpr uint32_t InlineSiteSize = 12;
+    constexpr uint32_t AnnotationSize = 8;
+    size_t MaxBufferSize = MaxRecordLength - InlineSiteSize - AnnotationSize;
+    if (Buffer.size() >= MaxBufferSize)
+      break;
+
+    if (Loc.getFunctionId() == Frag.SiteFuncId) {
+      CurSourceLoc.File = Loc.getFileNum();
+      CurSourceLoc.Line = Loc.getLine();
+    } else {
+      auto I = SiteInfo->InlinedAtMap.find(Loc.getFunctionId());
+      if (I != SiteInfo->InlinedAtMap.end()) {
+        // This .cv_loc is from a child inline call site. Use the source
+        // location of the inlined call site instead of the .cv_loc directive
+        // source location.
+        CurSourceLoc = I->second;
+      } else {
+        // We've hit a cv_loc not attributed to this inline call site. Use this
+        // label to end the PC range.
+        if (HaveOpenRange) {
+          unsigned Length = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
+          compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
+          compressAnnotation(Length, Buffer);
+          LastLabel = Loc.getLabel();
+        }
+        HaveOpenRange = false;
+        continue;
       }
-      HaveOpenRange = false;
-      continue;
     }
 
-    // If we've already opened the function and we're at an indirectly inlined
-    // location, continue until the next directly inlined location.
-    bool DirectlyInlined = Loc.getFunctionId() == Frag.SiteFuncId;
-    if (!DirectlyInlined && HaveOpenRange)
+    // Skip this .cv_loc if we have an open range and this isn't a meaningful
+    // source location update. The current table format does not support column
+    // info, so we can skip updates for those.
+    if (HaveOpenRange && CurSourceLoc.File == LastSourceLoc.File &&
+        CurSourceLoc.Line == LastSourceLoc.Line)
       continue;
+
     HaveOpenRange = true;
 
-    if (Loc.getFileNum() != LastLoc->getFileNum()) {
+    if (CurSourceLoc.File != LastSourceLoc.File) {
       // File ids are 1 based, and each file checksum table entry is 8 bytes
       // long. See emitFileChecksums above.
-      unsigned FileOffset = 8 * (Loc.getFileNum() - 1);
+      unsigned FileOffset = 8 * (CurSourceLoc.File - 1);
       compressAnnotation(BinaryAnnotationsOpCode::ChangeFile, Buffer);
       compressAnnotation(FileOffset, Buffer);
     }
 
-    int LineDelta = Loc.getLine() - LastLoc->getLine();
-    if (LineDelta == 0)
-      continue;
-
+    int LineDelta = CurSourceLoc.Line - LastSourceLoc.Line;
     unsigned EncodedLineDelta = encodeSignedNumber(LineDelta);
-    unsigned CodeDelta =
-        computeLabelDiff(Layout, LastLoc->getLabel(), Loc.getLabel());
-    if (CodeDelta == 0) {
+    unsigned CodeDelta = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
+    if (CodeDelta == 0 && LineDelta != 0) {
       compressAnnotation(BinaryAnnotationsOpCode::ChangeLineOffset, Buffer);
       compressAnnotation(EncodedLineDelta, Buffer);
     } else if (EncodedLineDelta < 0x8 && CodeDelta <= 0xf) {
@@ -355,29 +424,29 @@ void CodeViewContext::encodeInlineLineTable(MCAsmLayout &Layout,
       compressAnnotation(Operand, Buffer);
     } else {
       // Otherwise use the separate line and code deltas.
-      compressAnnotation(BinaryAnnotationsOpCode::ChangeLineOffset, Buffer);
-      compressAnnotation(EncodedLineDelta, Buffer);
+      if (LineDelta != 0) {
+        compressAnnotation(BinaryAnnotationsOpCode::ChangeLineOffset, Buffer);
+        compressAnnotation(EncodedLineDelta, Buffer);
+      }
       compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeOffset, Buffer);
       compressAnnotation(CodeDelta, Buffer);
     }
 
-    LastLoc = &Loc;
+    LastLabel = Loc.getLabel();
+    LastSourceLoc = CurSourceLoc;
   }
 
   assert(HaveOpenRange);
 
   unsigned EndSymLength =
-      computeLabelDiff(Layout, LastLoc->getLabel(), Frag.getFnEndSym());
+      computeLabelDiff(Layout, LastLabel, Frag.getFnEndSym());
   unsigned LocAfterLength = ~0U;
   ArrayRef<MCCVLineEntry> LocAfter = getLinesForExtent(LocEnd, LocEnd + 1);
   if (!LocAfter.empty()) {
     // Only try to compute this difference if we're in the same section.
     const MCCVLineEntry &Loc = LocAfter[0];
-    if (&Loc.getLabel()->getSection(false) ==
-        &LastLoc->getLabel()->getSection(false)) {
-      LocAfterLength =
-          computeLabelDiff(Layout, LastLoc->getLabel(), Loc.getLabel());
-    }
+    if (&Loc.getLabel()->getSection(false) == &LastLabel->getSection(false))
+      LocAfterLength = computeLabelDiff(Layout, LastLabel, Loc.getLabel());
   }
 
   compressAnnotation(BinaryAnnotationsOpCode::ChangeCodeLength, Buffer);
@@ -393,16 +462,41 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
   Fixups.clear();
   raw_svector_ostream OS(Contents);
 
-  // Write down each range where the variable is defined.
+  // Compute all the sizes up front.
+  SmallVector<std::pair<unsigned, unsigned>, 4> GapAndRangeSizes;
+  const MCSymbol *LastLabel = nullptr;
   for (std::pair<const MCSymbol *, const MCSymbol *> Range : Frag.getRanges()) {
+    unsigned GapSize =
+        LastLabel ? computeLabelDiff(Layout, LastLabel, Range.first) : 0;
     unsigned RangeSize = computeLabelDiff(Layout, Range.first, Range.second);
+    GapAndRangeSizes.push_back({GapSize, RangeSize});
+    LastLabel = Range.second;
+  }
+
+  // Write down each range where the variable is defined.
+  for (size_t I = 0, E = Frag.getRanges().size(); I != E;) {
+    // If the range size of multiple consecutive ranges is under the max,
+    // combine the ranges and emit some gaps.
+    const MCSymbol *RangeBegin = Frag.getRanges()[I].first;
+    unsigned RangeSize = GapAndRangeSizes[I].second;
+    size_t J = I + 1;
+    for (; J != E; ++J) {
+      unsigned GapAndRangeSize = GapAndRangeSizes[J].first + GapAndRangeSizes[J].second;
+      if (RangeSize + GapAndRangeSize > MaxDefRange)
+        break;
+      RangeSize += GapAndRangeSize;
+    }
+    unsigned NumGaps = J - I - 1;
+
+    support::endian::Writer<support::little> LEWriter(OS);
+
     unsigned Bias = 0;
     // We must split the range into chunks of MaxDefRange, this is a fundamental
     // limitation of the file format.
     do {
       uint16_t Chunk = std::min((uint32_t)MaxDefRange, RangeSize);
 
-      const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Range.first, Ctx);
+      const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(RangeBegin, Ctx);
       const MCBinaryExpr *BE =
           MCBinaryExpr::createAdd(SRE, MCConstantExpr::create(Bias, Ctx), Ctx);
       MCValue Res;
@@ -413,8 +507,8 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
       StringRef FixedSizePortion = Frag.getFixedSizePortion();
       // Our record is a fixed sized prefix and a LocalVariableAddrRange that we
       // are artificially constructing.
-      size_t RecordSize =
-          FixedSizePortion.size() + sizeof(LocalVariableAddrRange);
+      size_t RecordSize = FixedSizePortion.size() +
+                          sizeof(LocalVariableAddrRange) + 4 * NumGaps;
       // Write out the recrod size.
       support::endian::Writer<support::little>(OS).write<uint16_t>(RecordSize);
       // Write out the fixed size prefix.
@@ -427,12 +521,25 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
       Fixups.push_back(MCFixup::create(Contents.size(), BE, FK_SecRel_2));
       Contents.resize(Contents.size() + 2); // Fixup for section index.
       // Write down the range's extent.
-      support::endian::Writer<support::little>(OS).write<uint16_t>(Chunk);
+      LEWriter.write<uint16_t>(Chunk);
 
       // Move on to the next range.
       Bias += Chunk;
       RangeSize -= Chunk;
     } while (RangeSize > 0);
+
+    // Emit the gaps afterwards.
+    assert((NumGaps == 0 || Bias < MaxDefRange) &&
+           "large ranges should not have gaps");
+    unsigned GapStartOffset = GapAndRangeSizes[I].second;
+    for (++I; I != J; ++I) {
+      unsigned GapSize, RangeSize;
+      assert(I < GapAndRangeSizes.size());
+      std::tie(GapSize, RangeSize) = GapAndRangeSizes[I];
+      LEWriter.write<uint16_t>(GapStartOffset);
+      LEWriter.write<uint16_t>(RangeSize);
+      GapStartOffset += GapSize + RangeSize;
+    }
   }
 }
 
@@ -442,7 +549,8 @@ void CodeViewContext::encodeDefRange(MCAsmLayout &Layout,
 // a line entry made for it is made.
 //
 void MCCVLineEntry::Make(MCObjectStreamer *MCOS) {
-  if (!MCOS->getContext().getCVLocSeen())
+  CodeViewContext &CVC = MCOS->getContext().getCVContext();
+  if (!CVC.getCVLocSeen())
     return;
 
   // Create a symbol at in the current section for use in the line entry.
@@ -451,14 +559,14 @@ void MCCVLineEntry::Make(MCObjectStreamer *MCOS) {
   MCOS->EmitLabel(LineSym);
 
   // Get the current .loc info saved in the context.
-  const MCCVLoc &CVLoc = MCOS->getContext().getCurrentCVLoc();
+  const MCCVLoc &CVLoc = CVC.getCurrentCVLoc();
 
   // Create a (local) line entry with the symbol and the current .loc info.
   MCCVLineEntry LineEntry(LineSym, CVLoc);
 
   // clear CVLocSeen saying the current .loc info is now used.
-  MCOS->getContext().clearCVLocSeen();
+  CVC.clearCVLocSeen();
 
   // Add the line entry to this section's entries.
-  MCOS->getContext().getCVContext().addLineEntry(LineEntry);
+  CVC.addLineEntry(LineEntry);
 }
diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp
index 47ed1ca3add5..4798991ceed6 100644
--- a/contrib/llvm/lib/MC/MCContext.cpp
+++ b/contrib/llvm/lib/MC/MCContext.cpp
@@ -125,15 +125,15 @@ MCSymbol *MCContext::getOrCreateSymbol(const Twine &Name) {
 }
 
 MCSymbolELF *MCContext::getOrCreateSectionSymbol(const MCSectionELF &Section) {
-  MCSymbolELF *&Sym = SectionSymbols[&Section];
+  MCSymbol *&Sym = SectionSymbols[&Section];
   if (Sym)
-    return Sym;
+    return cast<MCSymbolELF>(Sym);
 
   StringRef Name = Section.getSectionName();
   auto NameIter = UsedNames.insert(std::make_pair(Name, false)).first;
   Sym = new (&*NameIter, *this) MCSymbolELF(&*NameIter, /*isTemporary*/ false);
 
-  return Sym;
+  return cast<MCSymbolELF>(Sym);
 }
 
 MCSymbol *MCContext::getOrCreateFrameAllocSymbol(StringRef FuncName,
@@ -173,7 +173,7 @@ MCSymbol *MCContext::createSymbol(StringRef Name, bool AlwaysAddSuffix,
   if (CanBeUnnamed && !UseNamesOnTempLabels)
     return createSymbolImpl(nullptr, true);
 
-  // Determine whether this is an user writter assembler temporary or normal
+  // Determine whether this is a user written assembler temporary or normal
   // label, if used.
   bool IsTemporary = CanBeUnnamed;
   if (AllowTemporaryLabels && !IsTemporary)
@@ -260,6 +260,13 @@ MCSymbol *MCContext::lookupSymbol(const Twine &Name) const {
   return Symbols.lookup(NameRef);
 }
 
+void MCContext::setSymbolValue(MCStreamer &Streamer,
+                              StringRef Sym,
+                              uint64_t Val) {
+  auto Symbol = getOrCreateSymbol(Sym);
+  Streamer.EmitAssignment(Symbol, MCConstantExpr::create(Val, *this));
+}
+
 //===----------------------------------------------------------------------===//
 // Section Management
 //===----------------------------------------------------------------------===//
@@ -361,7 +368,9 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
   StringRef CachedName = Entry.first.SectionName;
 
   SectionKind Kind;
-  if (Flags & ELF::SHF_EXECINSTR)
+  if (Flags & ELF::SHF_ARM_PURECODE)
+    Kind = SectionKind::getExecuteOnly();
+  else if (Flags & ELF::SHF_EXECINSTR)
     Kind = SectionKind::getText();
   else
     Kind = SectionKind::getReadOnly();
@@ -494,14 +503,6 @@ CodeViewContext &MCContext::getCVContext() {
   return *CVContext.get();
 }
 
-unsigned MCContext::getCVFile(StringRef FileName, unsigned FileNumber) {
-  return getCVContext().addFile(FileNumber, FileName) ? FileNumber : 0;
-}
-
-bool MCContext::isValidCVFileNumber(unsigned FileNumber) {
-  return getCVContext().isValidFileNumber(FileNumber);
-}
-
 //===----------------------------------------------------------------------===//
 // Error Reporting
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
index 21e8748b797a..aa5072743bdf 100644
--- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
+++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp
@@ -9,6 +9,9 @@
 
 #include "Disassembler.h"
 #include "llvm-c/Disassembler.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -16,12 +19,19 @@
 #include "llvm/MC/MCDisassembler/MCSymbolizer.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSchedule.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstddef>
+#include <cstring>
 
 using namespace llvm;
 
@@ -116,7 +126,7 @@ LLVMDisasmContextRef LLVMCreateDisasm(const char *TT, void *DisInfo,
 // LLVMDisasmDispose() disposes of the disassembler specified by the context.
 //
 void LLVMDisasmDispose(LLVMDisasmContextRef DCR){
-  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
   delete DC;
 }
 
@@ -128,7 +138,7 @@ static void emitComments(LLVMDisasmContext *DC,
   StringRef Comments = DC->CommentsToEmit.str();
   // Get the default information for printing a comment.
   const MCAsmInfo *MAI = DC->getAsmInfo();
-  const char *CommentBegin = MAI->getCommentString();
+  StringRef CommentBegin = MAI->getCommentString();
   unsigned CommentColumn = MAI->getCommentColumn();
   bool IsFirst = true;
   while (!Comments.empty()) {
@@ -211,7 +221,6 @@ static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
   return Latency;
 }
 
-
 /// \brief Emits latency information in DC->CommentStream for \p Inst, based
 /// on the information available in \p DC.
 static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
@@ -239,7 +248,7 @@ static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) {
 size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
                              uint64_t BytesSize, uint64_t PC, char *OutString,
                              size_t OutStringSize){
-  LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+  LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
   // Wrap the pointer to the Bytes, BytesSize and PC in a MemoryObject.
   ArrayRef<uint8_t> Data(Bytes, BytesSize);
 
@@ -288,21 +297,21 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes,
 //
 int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
   if (Options & LLVMDisassembler_Option_UseMarkup){
-      LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+      LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
       MCInstPrinter *IP = DC->getIP();
-      IP->setUseMarkup(1);
+      IP->setUseMarkup(true);
       DC->addOptions(LLVMDisassembler_Option_UseMarkup);
       Options &= ~LLVMDisassembler_Option_UseMarkup;
   }
   if (Options & LLVMDisassembler_Option_PrintImmHex){
-      LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+      LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
       MCInstPrinter *IP = DC->getIP();
-      IP->setPrintImmHex(1);
+      IP->setPrintImmHex(true);
       DC->addOptions(LLVMDisassembler_Option_PrintImmHex);
       Options &= ~LLVMDisassembler_Option_PrintImmHex;
   }
   if (Options & LLVMDisassembler_Option_AsmPrinterVariant){
-      LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+      LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
       // Try to set up the new instruction printer.
       const MCAsmInfo *MAI = DC->getAsmInfo();
       const MCInstrInfo *MII = DC->getInstrInfo();
@@ -318,14 +327,14 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){
       }
   }
   if (Options & LLVMDisassembler_Option_SetInstrComments) {
-    LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+    LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
     MCInstPrinter *IP = DC->getIP();
     IP->setCommentStream(DC->CommentStream);
     DC->addOptions(LLVMDisassembler_Option_SetInstrComments);
     Options &= ~LLVMDisassembler_Option_SetInstrComments;
   }
   if (Options & LLVMDisassembler_Option_PrintLatency) {
-    LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR;
+    LLVMDisasmContext *DC = static_cast<LLVMDisasmContext *>(DCR);
     DC->addOptions(LLVMDisassembler_Option_PrintLatency);
     Options &= ~LLVMDisassembler_Option_PrintLatency;
   }
diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp
index 54b2c918c849..a7551a3283a3 100644
--- a/contrib/llvm/lib/MC/MCDwarf.cpp
+++ b/contrib/llvm/lib/MC/MCDwarf.cpp
@@ -122,7 +122,8 @@ EmitDwarfLineTable(MCObjectStreamer *MCOS, MCSection *Section,
       MCOS->EmitIntValue(dwarf::DW_LNS_set_column, 1);
       MCOS->EmitULEB128IntValue(Column);
     }
-    if (Discriminator != LineEntry.getDiscriminator()) {
+    if (Discriminator != LineEntry.getDiscriminator() &&
+        MCOS->getContext().getDwarfVersion() >= 4) {
       Discriminator = LineEntry.getDiscriminator();
       unsigned Size = getULEB128Size(Discriminator);
       MCOS->EmitIntValue(dwarf::DW_LNS_extended_op, 1);
@@ -916,7 +917,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS,
   MCContext &context = MCOS->getContext();
   // We won't create dwarf labels for symbols in sections that we are not
   // generating debug info for.
-  if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSection().first))
+  if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSectionOnly()))
     return;
 
   // The dwarf label's name does not have the symbol name's leading
diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp
index 7d858c306d2e..0ef1b2a8bdca 100644
--- a/contrib/llvm/lib/MC/MCELFStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp
@@ -329,8 +329,8 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *S, uint64_t Size,
       ->setSize(MCConstantExpr::create(Size, getContext()));
 }
 
-void MCELFStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {
-  Symbol->setSize(Value);
+void MCELFStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {
+  cast<MCSymbolELF>(Symbol)->setSize(Value);
 }
 
 void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *S, uint64_t Size,
diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp
index 6f90ff843bd0..bcc43a54d620 100644
--- a/contrib/llvm/lib/MC/MCExpr.cpp
+++ b/contrib/llvm/lib/MC/MCExpr.cpp
@@ -129,12 +129,10 @@ void MCExpr::print(raw_ostream &OS, const MCAsmInfo *MAI, bool InParens) const {
   llvm_unreachable("Invalid expression kind!");
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCExpr::dump() const {
   dbgs() << *this;
   dbgs() << '\n';
 }
-#endif
 
 /* *** */
 
@@ -277,6 +275,10 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_Hexagon_IE: return "IE";
   case VK_Hexagon_IE_GOT: return "IEGOT";
   case VK_WebAssembly_FUNCTION: return "FUNCTION";
+  case VK_AMDGPU_GOTPCREL32_LO: return "gotpcrel32@lo";
+  case VK_AMDGPU_GOTPCREL32_HI: return "gotpcrel32@hi";
+  case VK_AMDGPU_REL32_LO: return "rel32@lo";
+  case VK_AMDGPU_REL32_HI: return "rel32@hi";
   }
   llvm_unreachable("Invalid variant kind");
 }
@@ -374,6 +376,10 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) {
     .Case("prel31", VK_ARM_PREL31)
     .Case("sbrel", VK_ARM_SBREL)
     .Case("tlsldo", VK_ARM_TLSLDO)
+    .Case("gotpcrel32@lo", VK_AMDGPU_GOTPCREL32_LO)
+    .Case("gotpcrel32@hi", VK_AMDGPU_GOTPCREL32_HI)
+    .Case("rel32@lo", VK_AMDGPU_REL32_LO)
+    .Case("rel32@hi", VK_AMDGPU_REL32_HI)
     .Default(VK_Invalid);
 }
 
diff --git a/contrib/llvm/lib/MC/MCFragment.cpp b/contrib/llvm/lib/MC/MCFragment.cpp
index 1eb1d2996cb1..8ff8f8aba1c1 100644
--- a/contrib/llvm/lib/MC/MCFragment.cpp
+++ b/contrib/llvm/lib/MC/MCFragment.cpp
@@ -10,6 +10,7 @@
 #include "llvm/MC/MCFragment.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAsmLayout.h"
@@ -231,13 +232,7 @@ uint64_t llvm::computeBundlePadding(const MCAssembler &Assembler,
 
 /* *** */
 
-void ilist_node_traits<MCFragment>::deleteNode(MCFragment *V) {
-  V->destroy();
-}
-
-MCFragment::MCFragment() : Kind(FragmentType(~0)), HasInstructions(false),
-                           AlignToBundleEnd(false), BundlePadding(0) {
-}
+void ilist_alloc_traits<MCFragment>::deleteNode(MCFragment *V) { V->destroy(); }
 
 MCFragment::~MCFragment() { }
 
@@ -315,7 +310,6 @@ raw_ostream &operator<<(raw_ostream &OS, const MCFixup &AF) {
 
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCFragment::dump() {
   raw_ostream &OS = llvm::errs();
 
@@ -475,4 +469,3 @@ LLVM_DUMP_METHOD void MCAssembler::dump() {
   }
   OS << "]>\n";
 }
-#endif
diff --git a/contrib/llvm/lib/MC/MCInst.cpp b/contrib/llvm/lib/MC/MCInst.cpp
index 16bc597cf3a2..2da8ecc4ff6a 100644
--- a/contrib/llvm/lib/MC/MCInst.cpp
+++ b/contrib/llvm/lib/MC/MCInst.cpp
@@ -34,12 +34,10 @@ void MCOperand::print(raw_ostream &OS) const {
   OS << ">";
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCOperand::dump() const {
   print(dbgs());
   dbgs() << "\n";
 }
-#endif
 
 void MCInst::print(raw_ostream &OS) const {
   OS << "<MCInst " << getOpcode();
@@ -65,9 +63,7 @@ void MCInst::dump_pretty(raw_ostream &OS, const MCInstPrinter *Printer,
   OS << ">";
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCInst::dump() const {
   print(dbgs());
   dbgs() << "\n";
 }
-#endif
diff --git a/contrib/llvm/lib/MC/MCLabel.cpp b/contrib/llvm/lib/MC/MCLabel.cpp
index d973fc93b98c..b443cbbbf43e 100644
--- a/contrib/llvm/lib/MC/MCLabel.cpp
+++ b/contrib/llvm/lib/MC/MCLabel.cpp
@@ -16,8 +16,6 @@ void MCLabel::print(raw_ostream &OS) const {
   OS << '"' << getInstance() << '"';
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCLabel::dump() const {
   print(dbgs());
 }
-#endif
diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
index d05bcea14c98..8fd71f62e4e5 100644
--- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
+++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp
@@ -177,20 +177,6 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(const Triple &T) {
                            MachO::S_THREAD_LOCAL_VARIABLE_POINTERS,
                            SectionKind::getMetadata());
 
-  if (!PositionIndependent) {
-    StaticCtorSection = Ctx->getMachOSection("__TEXT", "__constructor", 0,
-                                             SectionKind::getData());
-    StaticDtorSection = Ctx->getMachOSection("__TEXT", "__destructor", 0,
-                                             SectionKind::getData());
-  } else {
-    StaticCtorSection = Ctx->getMachOSection("__DATA", "__mod_init_func",
-                                             MachO::S_MOD_INIT_FUNC_POINTERS,
-                                             SectionKind::getData());
-    StaticDtorSection = Ctx->getMachOSection("__DATA", "__mod_term_func",
-                                             MachO::S_MOD_TERM_FUNC_POINTERS,
-                                             SectionKind::getData());
-  }
-
   // Exception Handling.
   LSDASection = Ctx->getMachOSection("__TEXT", "__gcc_except_tab", 0,
                                      SectionKind::getReadOnlyWithRel());
@@ -311,6 +297,7 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
     if (Ctx->getAsmInfo()->getExceptionHandlingType() == ExceptionHandling::ARM)
       break;
     // Fallthrough if not using EHABI
+    LLVM_FALLTHROUGH;
   case Triple::ppc:
   case Triple::x86:
     PersonalityEncoding = PositionIndependent
@@ -395,6 +382,14 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
                     dwarf::DW_EH_PE_sdata4;
     // We don't support PC-relative LSDA references in GAS so we use the default
     // DW_EH_PE_absptr for those.
+
+    // FreeBSD must be explicit about the data size and using pcrel since it's
+    // assembler/linker won't do the automatic conversion that the Linux tools
+    // do.
+    if (T.isOSFreeBSD()) {
+      PersonalityEncoding |= dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+      LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+    }
     break;
   case Triple::ppc64:
   case Triple::ppc64le:
@@ -498,12 +493,6 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(const Triple &T) {
       Ctx->getELFSection(".rodata.cst32", ELF::SHT_PROGBITS,
                          ELF::SHF_ALLOC | ELF::SHF_MERGE, 32, "");
 
-  StaticCtorSection = Ctx->getELFSection(".ctors", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
-  StaticDtorSection = Ctx->getELFSection(".dtors", ELF::SHT_PROGBITS,
-                                         ELF::SHF_ALLOC | ELF::SHF_WRITE);
-
   // Exception Handling Sections.
 
   // FIXME: We're emitting LSDA info into a readonly section on ELF, even though
@@ -621,26 +610,6 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(const Triple &T) {
       ".rdata", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
       SectionKind::getReadOnly());
 
-  if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
-    StaticCtorSection =
-        Ctx->getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                            COFF::IMAGE_SCN_MEM_READ,
-                            SectionKind::getReadOnly());
-    StaticDtorSection =
-        Ctx->getCOFFSection(".CRT$XTX", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                                            COFF::IMAGE_SCN_MEM_READ,
-                            SectionKind::getReadOnly());
-  } else {
-    StaticCtorSection = Ctx->getCOFFSection(
-        ".ctors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
-        SectionKind::getData());
-    StaticDtorSection = Ctx->getCOFFSection(
-        ".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
-                      COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE,
-        SectionKind::getData());
-  }
-
   // FIXME: We're emitting LSDA info into a readonly section on COFF, even
   // though it contains relocatable pointers.  In PIC mode, this is probably a
   // big runtime hit for C++ apps.  Either the contents of the LSDA need to be
diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
index d2ac0f50261d..cae5c1f8d156 100644
--- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp
@@ -13,6 +13,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
@@ -127,7 +128,7 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
   flushPendingLabels(DF, DF->getContents().size());
 
   MCCVLineEntry::Make(this);
-  MCDwarfLineEntry::Make(this, getCurrentSection().first);
+  MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
 
   // Avoid fixups when possible.
   int64_t AbsValue;
@@ -235,7 +236,7 @@ void MCObjectStreamer::EmitInstruction(const MCInst &Inst,
   // Now that a machine instruction has been assembled into this section, make
   // a line entry for any .loc directive that has been seen.
   MCCVLineEntry::Make(this);
-  MCDwarfLineEntry::Make(this, getCurrentSection().first);
+  MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
 
   // If this instruction doesn't need relaxation, just emit it as data.
   MCAssembler &Assembler = getAssembler();
@@ -304,7 +305,7 @@ void MCObjectStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line,
                                              StringRef FileName) {
   // In case we see two .loc directives in a row, make sure the
   // first one gets a line entry.
-  MCDwarfLineEntry::Make(this, getCurrentSection().first);
+  MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
 
   this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags,
                                           Isa, Discriminator, FileName);
@@ -368,13 +369,13 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel,
 void MCObjectStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                           unsigned Line, unsigned Column,
                                           bool PrologueEnd, bool IsStmt,
-                                          StringRef FileName) {
+                                          StringRef FileName, SMLoc Loc) {
   // In case we see two .cv_loc directives in a row, make sure the
   // first one gets a line entry.
   MCCVLineEntry::Make(this);
 
   this->MCStreamer::EmitCVLocDirective(FunctionId, FileNo, Line, Column,
-                                       PrologueEnd, IsStmt, FileName);
+                                       PrologueEnd, IsStmt, FileName, Loc);
 }
 
 void MCObjectStreamer::EmitCVLinetableDirective(unsigned FunctionId,
@@ -387,14 +388,12 @@ void MCObjectStreamer::EmitCVLinetableDirective(unsigned FunctionId,
 
 void MCObjectStreamer::EmitCVInlineLinetableDirective(
     unsigned PrimaryFunctionId, unsigned SourceFileId, unsigned SourceLineNum,
-    const MCSymbol *FnStartSym, const MCSymbol *FnEndSym,
-    ArrayRef<unsigned> SecondaryFunctionIds) {
+    const MCSymbol *FnStartSym, const MCSymbol *FnEndSym) {
   getContext().getCVContext().emitInlineLineTableForFunction(
       *this, PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym,
-      FnEndSym, SecondaryFunctionIds);
+      FnEndSym);
   this->MCStreamer::EmitCVInlineLinetableDirective(
-      PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym,
-      SecondaryFunctionIds);
+      PrimaryFunctionId, SourceFileId, SourceLineNum, FnStartSym, FnEndSym);
 }
 
 void MCObjectStreamer::EmitCVDefRangeDirective(
@@ -414,7 +413,7 @@ void MCObjectStreamer::EmitCVFileChecksumsDirective() {
 
 void MCObjectStreamer::EmitBytes(StringRef Data) {
   MCCVLineEntry::Make(this);
-  MCDwarfLineEntry::Make(this, getCurrentSection().first);
+  MCDwarfLineEntry::Make(this, getCurrentSectionOnly());
   MCDataFragment *DF = getOrCreateDataFragment();
   flushPendingLabels(DF, DF->getContents().size());
   DF->getContents().append(Data.begin(), Data.end());
@@ -429,7 +428,7 @@ void MCObjectStreamer::EmitValueToAlignment(unsigned ByteAlignment,
   insert(new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit));
 
   // Update the maximum alignment on the current section if necessary.
-  MCSection *CurSec = getCurrentSection().first;
+  MCSection *CurSec = getCurrentSectionOnly();
   if (ByteAlignment > CurSec->getAlignment())
     CurSec->setAlignment(ByteAlignment);
 }
@@ -441,8 +440,49 @@ void MCObjectStreamer::EmitCodeAlignment(unsigned ByteAlignment,
 }
 
 void MCObjectStreamer::emitValueToOffset(const MCExpr *Offset,
-                                         unsigned char Value) {
-  insert(new MCOrgFragment(*Offset, Value));
+                                         unsigned char Value,
+                                         SMLoc Loc) {
+  insert(new MCOrgFragment(*Offset, Value, Loc));
+}
+
+// Associate DTPRel32 fixup with data and resize data area
+void MCObjectStreamer::EmitDTPRel32Value(const MCExpr *Value) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
+
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
+                                            Value, FK_DTPRel_4));
+  DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
+// Associate DTPRel64 fixup with data and resize data area
+void MCObjectStreamer::EmitDTPRel64Value(const MCExpr *Value) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
+
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
+                                            Value, FK_DTPRel_8));
+  DF->getContents().resize(DF->getContents().size() + 8, 0);
+}
+
+// Associate TPRel32 fixup with data and resize data area
+void MCObjectStreamer::EmitTPRel32Value(const MCExpr *Value) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
+
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
+                                            Value, FK_TPRel_4));
+  DF->getContents().resize(DF->getContents().size() + 4, 0);
+}
+
+// Associate TPRel64 fixup with data and resize data area
+void MCObjectStreamer::EmitTPRel64Value(const MCExpr *Value) {
+  MCDataFragment *DF = getOrCreateDataFragment();
+  flushPendingLabels(DF, DF->getContents().size());
+
+  DF->getFixups().push_back(MCFixup::create(DF->getContents().size(),
+                                            Value, FK_TPRel_8));
+  DF->getContents().resize(DF->getContents().size() + 8, 0);
 }
 
 // Associate GPRel32 fixup with data and resize data area
@@ -455,7 +495,7 @@ void MCObjectStreamer::EmitGPRel32Value(const MCExpr *Value) {
   DF->getContents().resize(DF->getContents().size() + 4, 0);
 }
 
-// Associate GPRel32 fixup with data and resize data area
+// Associate GPRel64 fixup with data and resize data area
 void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) {
   MCDataFragment *DF = getOrCreateDataFragment();
   flushPendingLabels(DF, DF->getContents().size());
@@ -491,9 +531,7 @@ bool MCObjectStreamer::EmitRelocDirective(const MCExpr &Offset, StringRef Name,
 }
 
 void MCObjectStreamer::emitFill(uint64_t NumBytes, uint8_t FillValue) {
-  const MCSection *Sec = getCurrentSection().first;
-  (void)Sec;
-  assert(Sec && "need a section");
+  assert(getCurrentSectionOnly() && "need a section");
   insert(new MCFillFragment(FillValue, NumBytes));
 }
 
diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
index d56071aea4df..87ecf9e0227f 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp
@@ -12,19 +12,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCAsmInfo.h"
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/Support/SMLoc.h"
+#include "llvm/Support/SaveAndRestore.h"
+#include <cassert>
 #include <cctype>
-#include <cerrno>
 #include <cstdio>
-#include <cstdlib>
+#include <cstring>
+#include <string>
+#include <tuple>
+#include <utility>
+
 using namespace llvm;
 
-AsmLexer::AsmLexer(const MCAsmInfo &MAI) : MAI(MAI) {
-  CurPtr = nullptr;
-  IsAtStartOfLine = true;
-  IsAtStartOfStatement = true;
+AsmLexer::AsmLexer(const MCAsmInfo &MAI)
+    : MAI(MAI), CurPtr(nullptr), IsAtStartOfLine(true),
+      IsAtStartOfStatement(true), IsParsingMSInlineAsm(false),
+      IsPeeking(false) {
   AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@");
 }
 
@@ -133,6 +142,7 @@ static bool IsIdentifierChar(char c, bool AllowAt) {
   return isalnum(c) || c == '_' || c == '$' || c == '.' ||
          (c == '@' && AllowAt) || c == '?';
 }
+
 AsmToken AsmLexer::LexIdentifier() {
   // Check for floating point literals.
   if (CurPtr[-1] == '.' && isdigit(*CurPtr)) {
@@ -171,12 +181,19 @@ AsmToken AsmLexer::LexSlash() {
 
   // C Style comment.
   ++CurPtr;  // skip the star.
+  const char *CommentTextStart = CurPtr;
   while (CurPtr != CurBuf.end()) {
     switch (*CurPtr++) {
     case '*':
       // End of the comment?
       if (*CurPtr != '/')
         break;
+      // If we have a CommentConsumer, notify it about the comment.
+      if (CommentConsumer) {
+        CommentConsumer->HandleComment(
+            SMLoc::getFromPointer(CommentTextStart),
+            StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
+      }
       ++CurPtr;   // End the */.
       return AsmToken(AsmToken::Comment,
                       StringRef(TokStart, CurPtr - TokStart));
@@ -192,12 +209,20 @@ AsmToken AsmLexer::LexLineComment() {
   // comment. While it would be nicer to leave this two tokens,
   // backwards compatability with TargetParsers makes keeping this in this form
   // better.
+  const char *CommentTextStart = CurPtr;
   int CurChar = getNextChar();
   while (CurChar != '\n' && CurChar != '\r' && CurChar != EOF)
     CurChar = getNextChar();
 
+  // If we have a CommentConsumer, notify it about the comment.
+  if (CommentConsumer) {
+    CommentConsumer->HandleComment(
+        SMLoc::getFromPointer(CommentTextStart),
+        StringRef(CommentTextStart, CurPtr - 1 - CommentTextStart));
+  }
+
   IsAtStartOfLine = true;
-  // Whis is a whole line comment. leave newline
+  // This is a whole line comment. leave newline
   if (IsAtStartOfStatement)
     return AsmToken(AsmToken::EndOfStatement,
                     StringRef(TokStart, CurPtr - TokStart));
@@ -222,7 +247,7 @@ static void SkipIgnoredIntegerSuffix(const char *&CurPtr) {
 static unsigned doLookAhead(const char *&CurPtr, unsigned DefaultRadix) {
   const char *FirstHex = nullptr;
   const char *LookAhead = CurPtr;
-  while (1) {
+  while (true) {
     if (isdigit(*LookAhead)) {
       ++LookAhead;
     } else if (isxdigit(*LookAhead)) {
@@ -255,6 +280,45 @@ static AsmToken intToken(StringRef Ref, APInt &Value)
 ///   Hex integer: 0x[0-9a-fA-F]+ or [0x]?[0-9][0-9a-fA-F]*[hH]
 ///   Decimal integer: [1-9][0-9]*
 AsmToken AsmLexer::LexDigit() {
+  // MASM-flavor binary integer: [01]+[bB]
+  // MASM-flavor hexadecimal integer: [0-9][0-9a-fA-F]*[hH]
+  if (IsParsingMSInlineAsm && isdigit(CurPtr[-1])) {
+    const char *FirstNonBinary = (CurPtr[-1] != '0' && CurPtr[-1] != '1') ?
+                                   CurPtr - 1 : nullptr;
+    const char *OldCurPtr = CurPtr;
+    while (isxdigit(*CurPtr)) {
+      if (*CurPtr != '0' && *CurPtr != '1' && !FirstNonBinary)
+        FirstNonBinary = CurPtr;
+      ++CurPtr;
+    }
+
+    unsigned Radix = 0;
+    if (*CurPtr == 'h' || *CurPtr == 'H') {
+      // hexadecimal number
+      ++CurPtr;
+      Radix = 16;
+    } else if (FirstNonBinary && FirstNonBinary + 1 == CurPtr &&
+               (*FirstNonBinary == 'b' || *FirstNonBinary == 'B'))
+      Radix = 2;
+
+    if (Radix == 2 || Radix == 16) {
+      StringRef Result(TokStart, CurPtr - TokStart);
+      APInt Value(128, 0, true);
+
+      if (Result.drop_back().getAsInteger(Radix, Value))
+        return ReturnError(TokStart, Radix == 2 ? "invalid binary number" :
+                             "invalid hexdecimal number");
+
+      // MSVC accepts and ignores type suffices on integer literals.
+      SkipIgnoredIntegerSuffix(CurPtr);
+
+      return intToken(Result, Value);
+   }
+
+    // octal/decimal integers, or floating point numbers, fall through
+    CurPtr = OldCurPtr;
+  }
+
   // Decimal integer: [1-9][0-9]*
   if (CurPtr[-1] != '0' || CurPtr[0] == '.') {
     unsigned Radix = doLookAhead(CurPtr, 10);
@@ -283,7 +347,7 @@ AsmToken AsmLexer::LexDigit() {
     return intToken(Result, Value);
   }
 
-  if ((*CurPtr == 'b') || (*CurPtr == 'B')) {
+  if (!IsParsingMSInlineAsm && ((*CurPtr == 'b') || (*CurPtr == 'B'))) {
     ++CurPtr;
     // See if we actually have "0b" as part of something like "jmp 0b\n"
     if (!isdigit(CurPtr[0])) {
@@ -332,7 +396,7 @@ AsmToken AsmLexer::LexDigit() {
       return ReturnError(TokStart, "invalid hexadecimal number");
 
     // Consume the optional [hH].
-    if (*CurPtr == 'h' || *CurPtr == 'H')
+    if (!IsParsingMSInlineAsm && (*CurPtr == 'h' || *CurPtr == 'H'))
       ++CurPtr;
 
     // The darwin/x86 (and x86-64) assembler accepts and ignores ULL and LL
@@ -397,7 +461,6 @@ AsmToken AsmLexer::LexSingleQuote() {
   return AsmToken(AsmToken::Integer, Res, Value);
 }
 
-
 /// LexQuote: String: "..."
 AsmToken AsmLexer::LexQuote() {
   int CurChar = getNextChar();
@@ -439,17 +502,15 @@ StringRef AsmLexer::LexUntilEndOfLine() {
 
 size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
                             bool ShouldSkipSpace) {
-  const char *SavedTokStart = TokStart;
-  const char *SavedCurPtr = CurPtr;
-  bool SavedAtStartOfLine = IsAtStartOfLine;
-  bool SavedAtStartOfStatement = IsAtStartOfStatement;
-  bool SavedSkipSpace = SkipSpace;
-
+  SaveAndRestore<const char *> SavedTokenStart(TokStart);
+  SaveAndRestore<const char *> SavedCurPtr(CurPtr);
+  SaveAndRestore<bool> SavedAtStartOfLine(IsAtStartOfLine);
+  SaveAndRestore<bool> SavedAtStartOfStatement(IsAtStartOfStatement);
+  SaveAndRestore<bool> SavedSkipSpace(SkipSpace, ShouldSkipSpace);
+  SaveAndRestore<bool> SavedIsPeeking(IsPeeking, true);
   std::string SavedErr = getErr();
   SMLoc SavedErrLoc = getErrLoc();
 
-  SkipSpace = ShouldSkipSpace;
-
   size_t ReadCount;
   for (ReadCount = 0; ReadCount < Buf.size(); ++ReadCount) {
     AsmToken Token = LexToken();
@@ -461,27 +522,20 @@ size_t AsmLexer::peekTokens(MutableArrayRef<AsmToken> Buf,
   }
 
   SetError(SavedErrLoc, SavedErr);
-
-  SkipSpace = SavedSkipSpace;
-  IsAtStartOfLine = SavedAtStartOfLine;
-  IsAtStartOfStatement = SavedAtStartOfStatement;
-  CurPtr = SavedCurPtr;
-  TokStart = SavedTokStart;
-
   return ReadCount;
 }
 
 bool AsmLexer::isAtStartOfComment(const char *Ptr) {
-  const char *CommentString = MAI.getCommentString();
+  StringRef CommentString = MAI.getCommentString();
 
-  if (CommentString[1] == '\0')
+  if (CommentString.size() == 1)
     return CommentString[0] == Ptr[0];
 
-  // FIXME: special case for the bogus "##" comment string in X86MCAsmInfoDarwin
+  // Allow # preprocessor commments also be counted as comments for "##" cases
   if (CommentString[1] == '#')
     return CommentString[0] == Ptr[0];
 
-  return strncmp(Ptr, CommentString, strlen(CommentString)) == 0;
+  return strncmp(Ptr, CommentString.data(), CommentString.size()) == 0;
 }
 
 bool AsmLexer::isAtStatementSeparator(const char *Ptr) {
@@ -494,7 +548,7 @@ AsmToken AsmLexer::LexToken() {
   // This always consumes at least one character.
   int CurChar = getNextChar();
 
-  if (CurChar == '#' && IsAtStartOfStatement) {
+  if (!IsPeeking && CurChar == '#' && IsAtStartOfStatement) {
     // If this starts with a '#', this may be a cpp
     // hash directive and otherwise a line comment.
     AsmToken TokenBuf[2];
@@ -600,7 +654,46 @@ AsmToken AsmLexer::LexToken() {
       return AsmToken(AsmToken::ExclaimEqual, StringRef(TokStart, 2));
     }
     return AsmToken(AsmToken::Exclaim, StringRef(TokStart, 1));
-  case '%': return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
+  case '%':
+    if (MAI.hasMipsExpressions()) {
+      AsmToken::TokenKind Operator;
+      unsigned OperatorLength;
+
+      std::tie(Operator, OperatorLength) =
+          StringSwitch<std::pair<AsmToken::TokenKind, unsigned>>(
+              StringRef(CurPtr))
+              .StartsWith("call16", {AsmToken::PercentCall16, 7})
+              .StartsWith("call_hi", {AsmToken::PercentCall_Hi, 8})
+              .StartsWith("call_lo", {AsmToken::PercentCall_Lo, 8})
+              .StartsWith("dtprel_hi", {AsmToken::PercentDtprel_Hi, 10})
+              .StartsWith("dtprel_lo", {AsmToken::PercentDtprel_Lo, 10})
+              .StartsWith("got_disp", {AsmToken::PercentGot_Disp, 9})
+              .StartsWith("got_hi", {AsmToken::PercentGot_Hi, 7})
+              .StartsWith("got_lo", {AsmToken::PercentGot_Lo, 7})
+              .StartsWith("got_ofst", {AsmToken::PercentGot_Ofst, 9})
+              .StartsWith("got_page", {AsmToken::PercentGot_Page, 9})
+              .StartsWith("gottprel", {AsmToken::PercentGottprel, 9})
+              .StartsWith("got", {AsmToken::PercentGot, 4})
+              .StartsWith("gp_rel", {AsmToken::PercentGp_Rel, 7})
+              .StartsWith("higher", {AsmToken::PercentHigher, 7})
+              .StartsWith("highest", {AsmToken::PercentHighest, 8})
+              .StartsWith("hi", {AsmToken::PercentHi, 3})
+              .StartsWith("lo", {AsmToken::PercentLo, 3})
+              .StartsWith("neg", {AsmToken::PercentNeg, 4})
+              .StartsWith("pcrel_hi", {AsmToken::PercentPcrel_Hi, 9})
+              .StartsWith("pcrel_lo", {AsmToken::PercentPcrel_Lo, 9})
+              .StartsWith("tlsgd", {AsmToken::PercentTlsgd, 6})
+              .StartsWith("tlsldm", {AsmToken::PercentTlsldm, 7})
+              .StartsWith("tprel_hi", {AsmToken::PercentTprel_Hi, 9})
+              .StartsWith("tprel_lo", {AsmToken::PercentTprel_Lo, 9})
+              .Default({AsmToken::Percent, 1});
+
+      if (Operator != AsmToken::Percent) {
+        CurPtr += OperatorLength - 1;
+        return AsmToken(Operator, StringRef(TokStart, OperatorLength));
+      }
+    }
+    return AsmToken(AsmToken::Percent, StringRef(TokStart, 1));
   case '/':
     IsAtStartOfStatement = OldIsAtStartOfStatement;
     return LexSlash();
diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
index 1548aee84227..da54155b3b9d 100644
--- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -12,42 +12,69 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCParser/AsmCond.h"
 #include "llvm/MC/MCParser/AsmLexer.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
 #include "llvm/MC/MCParser/MCAsmParserUtils.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Dwarf.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cassert>
 #include <cctype>
+#include <cstddef>
+#include <cstdint>
 #include <deque>
+#include <memory>
+#include <sstream>
 #include <string>
+#include <tuple>
+#include <utility>
 #include <vector>
+
 using namespace llvm;
 
 MCAsmParserSemaCallback::~MCAsmParserSemaCallback() {}
 
+static cl::opt<unsigned> AsmMacroMaxNestingDepth(
+     "asm-macro-max-nesting-depth", cl::init(20), cl::Hidden,
+     cl::desc("The maximum nesting depth allowed for assembly macros."));
+
 namespace {
+
 /// \brief Helper types for tracking macro definitions.
 typedef std::vector<AsmToken> MCAsmMacroArgument;
 typedef std::vector<MCAsmMacroArgument> MCAsmMacroArguments;
@@ -113,6 +140,7 @@ struct ParseStatementInfo {
 class AsmParser : public MCAsmParser {
   AsmParser(const AsmParser &) = delete;
   void operator=(const AsmParser &) = delete;
+
 private:
   AsmLexer Lexer;
   MCContext &Ctx;
@@ -150,9 +178,6 @@ private:
   /// \brief Keeps track of how many .macro's have been instantiated.
   unsigned NumOfMacroInstantiations;
 
-  /// Flag tracking whether any errors have been encountered.
-  unsigned HadError : 1;
-
   /// The values from the last parsed cpp hash file line comment if any.
   struct CppHashInfoTy {
     StringRef Filename;
@@ -206,6 +231,9 @@ public:
   MCAsmLexer &getLexer() override { return Lexer; }
   MCContext &getContext() override { return Ctx; }
   MCStreamer &getStreamer() override { return Out; }
+
+  CodeViewContext &getCVContext() { return Ctx.getCVContext(); }
+
   unsigned getAssemblerDialect() override {
     if (AssemblerDialect == ~0U)
       return MAI.getAssemblerDialect();
@@ -216,16 +244,16 @@ public:
     AssemblerDialect = i;
   }
 
-  void Note(SMLoc L, const Twine &Msg,
-            ArrayRef<SMRange> Ranges = None) override;
-  bool Warning(SMLoc L, const Twine &Msg,
-               ArrayRef<SMRange> Ranges = None) override;
-  bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = None) override;
+  void Note(SMLoc L, const Twine &Msg, SMRange Range = None) override;
+  bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) override;
+  bool printError(SMLoc L, const Twine &Msg, SMRange Range = None) override;
 
   const AsmToken &Lex() override;
 
-  void setParsingInlineAsm(bool V) override { ParsingInlineAsm = V; }
+  void setParsingInlineAsm(bool V) override {
+    ParsingInlineAsm = V;
+    Lexer.setParsingMSInlineAsm(V);
+  }
   bool isParsingInlineAsm() override { return ParsingInlineAsm; }
 
   bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
@@ -244,39 +272,20 @@ public:
                              SMLoc &EndLoc) override;
   bool parseAbsoluteExpression(int64_t &Res) override;
 
+  /// \brief Parse a floating point expression using the float \p Semantics
+  /// and set \p Res to the value.
+  bool parseRealValue(const fltSemantics &Semantics, APInt &Res);
+
   /// \brief Parse an identifier or string (as a quoted identifier)
   /// and set \p Res to the identifier contents.
   bool parseIdentifier(StringRef &Res) override;
   void eatToEndOfStatement() override;
 
-  void checkForValidSection() override;
-
-  bool getTokenLoc(SMLoc &Loc) {
-    Loc = getTok().getLoc();
-    return false;
-  }
-
-  /// parseToken - If current token has the specified kind, eat it and
-  /// return success.  Otherwise, emit the specified error and return failure.
-  bool parseToken(AsmToken::TokenKind T, const Twine &ErrMsg) {
-    if (getTok().getKind() != T)
-      return TokError(ErrMsg);
-    Lex();
-    return false;
-  }
-
-  bool parseIntToken(int64_t &V, const Twine &ErrMsg) {
-    if (getTok().getKind() != AsmToken::Integer)
-      return TokError(ErrMsg);
-    V = getTok().getIntVal();
-    Lex();
-    return false;
-  }
+  bool checkForValidSection() override;
 
   /// }
 
 private:
-
   bool parseStatement(ParseStatementInfo &Info,
                       MCAsmParserSemaCallback *SI);
   bool parseCurlyBlockScope(SmallVectorImpl<AsmRewrite>& AsmStrRewrites);
@@ -326,29 +335,19 @@ private:
 
   void printMacroInstantiations();
   void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg,
-                    ArrayRef<SMRange> Ranges = None) const {
+                    SMRange Range = None) const {
+    ArrayRef<SMRange> Ranges(Range);
     SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges);
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
 
-  bool check(bool P, SMLoc Loc, const Twine &Msg) {
-    if (P)
-      return Error(Loc, Msg);
-    return false;
-  }
-
-  bool check(bool P, const Twine &Msg) {
-    if (P)
-      return TokError(Msg);
-    return false;
-  }
-
   /// \brief Enter the specified file. This returns true on failure.
   bool enterIncludeFile(const std::string &Filename);
 
   /// \brief Process the specified file for the .incbin directive.
   /// This returns true on failure.
-  bool processIncbinFile(const std::string &Filename);
+  bool processIncbinFile(const std::string &Filename, int64_t Skip = 0,
+                         const MCExpr *Count = nullptr, SMLoc Loc = SMLoc());
 
   /// \brief Reset the current lexer position to that given by \p Loc. The
   /// current token is not set; clients should ensure Lex() is called
@@ -379,12 +378,18 @@ private:
 
   bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc);
 
+  bool parseCVFunctionId(int64_t &FunctionId, StringRef DirectiveName);
+  bool parseCVFileId(int64_t &FileId, StringRef DirectiveName);
+
   // Generic (target and platform independent) directive parsing.
   enum DirectiveKind {
     DK_NO_DIRECTIVE, // Placeholder
     DK_SET, DK_EQU, DK_EQUIV, DK_ASCII, DK_ASCIZ, DK_STRING, DK_BYTE, DK_SHORT,
     DK_RELOC,
     DK_VALUE, DK_2BYTE, DK_LONG, DK_INT, DK_4BYTE, DK_QUAD, DK_8BYTE, DK_OCTA,
+    DK_DC, DK_DC_A, DK_DC_B, DK_DC_D, DK_DC_L, DK_DC_S, DK_DC_W, DK_DC_X,
+    DK_DCB, DK_DCB_B, DK_DCB_D, DK_DCB_L, DK_DCB_S, DK_DCB_W, DK_DCB_X,
+    DK_DS, DK_DS_B, DK_DS_D, DK_DS_L, DK_DS_P, DK_DS_S, DK_DS_W, DK_DS_X,
     DK_SINGLE, DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW,
     DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR,
     DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK,
@@ -397,8 +402,9 @@ private:
     DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFNES, DK_IFDEF, DK_IFNDEF,
     DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF,
     DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS,
-    DK_CV_FILE, DK_CV_LOC, DK_CV_LINETABLE, DK_CV_INLINE_LINETABLE,
-    DK_CV_DEF_RANGE, DK_CV_STRINGTABLE, DK_CV_FILECHECKSUMS,
+    DK_CV_FILE, DK_CV_FUNC_ID, DK_CV_INLINE_SITE_ID, DK_CV_LOC, DK_CV_LINETABLE,
+    DK_CV_INLINE_LINETABLE, DK_CV_DEF_RANGE, DK_CV_STRINGTABLE,
+    DK_CV_FILECHECKSUMS,
     DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA,
     DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER,
     DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA,
@@ -419,9 +425,11 @@ private:
   // ".ascii", ".asciz", ".string"
   bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated);
   bool parseDirectiveReloc(SMLoc DirectiveLoc); // ".reloc"
-  bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ...
-  bool parseDirectiveOctaValue(); // ".octa"
-  bool parseDirectiveRealValue(const fltSemantics &); // ".single", ...
+  bool parseDirectiveValue(StringRef IDVal,
+                           unsigned Size);       // ".byte", ".long", ...
+  bool parseDirectiveOctaValue(StringRef IDVal); // ".octa", ...
+  bool parseDirectiveRealValue(StringRef IDVal,
+                               const fltSemantics &); // ".single", ...
   bool parseDirectiveFill(); // ".fill"
   bool parseDirectiveZero(); // ".zero"
   // ".set", ".equ", ".equiv"
@@ -436,9 +444,11 @@ private:
   bool parseDirectiveLoc();
   bool parseDirectiveStabs();
 
-  // ".cv_file", ".cv_loc", ".cv_linetable", "cv_inline_linetable",
-  // ".cv_def_range"
+  // ".cv_file", ".cv_func_id", ".cv_inline_site_id", ".cv_loc", ".cv_linetable",
+  // ".cv_inline_linetable", ".cv_def_range"
   bool parseDirectiveCVFile();
+  bool parseDirectiveCVFuncId();
+  bool parseDirectiveCVInlineSiteId();
   bool parseDirectiveCVLoc();
   bool parseDirectiveCVLinetable();
   bool parseDirectiveCVInlineLinetable();
@@ -484,6 +494,12 @@ private:
   // ".space", ".skip"
   bool parseDirectiveSpace(StringRef IDVal);
 
+  // ".dcb"
+  bool parseDirectiveDCB(StringRef IDVal, unsigned Size);
+  bool parseDirectiveRealDCB(StringRef IDVal, const fltSemantics &);
+  // ".ds"
+  bool parseDirectiveDS(StringRef IDVal, unsigned Size);
+
   // .sleb128 (Signed=true) and .uleb128 (Signed=false)
   bool parseDirectiveLEB128(bool Signed);
 
@@ -542,7 +558,8 @@ private:
 
   void initializeDirectiveKindMap();
 };
-}
+
+} // end anonymous namespace
 
 namespace llvm {
 
@@ -550,7 +567,7 @@ extern MCAsmParserExtension *createDarwinAsmParser();
 extern MCAsmParserExtension *createELFAsmParser();
 extern MCAsmParserExtension *createCOFFAsmParser();
 
-}
+} // end namespace llvm
 
 enum { DEFAULT_ADDRSPACE = 0 };
 
@@ -558,8 +575,9 @@ AsmParser::AsmParser(SourceMgr &SM, MCContext &Ctx, MCStreamer &Out,
                      const MCAsmInfo &MAI)
     : Lexer(MAI), Ctx(Ctx), Out(Out), MAI(MAI), SrcMgr(SM),
       PlatformParser(nullptr), CurBuffer(SM.getMainFileID()),
-      MacrosEnabledFlag(true), HadError(false), CppHashInfo(),
-      AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) {
+      MacrosEnabledFlag(true), CppHashInfo(), AssemblerDialect(~0U),
+      IsDarwin(false), ParsingInlineAsm(false) {
+  HadError = false;
   // Save the old handler.
   SavedDiagHandler = SrcMgr.getDiagHandler();
   SavedDiagContext = SrcMgr.getDiagContext();
@@ -602,24 +620,25 @@ void AsmParser::printMacroInstantiations() {
                  "while in macro instantiation");
 }
 
-void AsmParser::Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
-  printMessage(L, SourceMgr::DK_Note, Msg, Ranges);
+void AsmParser::Note(SMLoc L, const Twine &Msg, SMRange Range) {
+  printPendingErrors();
+  printMessage(L, SourceMgr::DK_Note, Msg, Range);
   printMacroInstantiations();
 }
 
-bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+bool AsmParser::Warning(SMLoc L, const Twine &Msg, SMRange Range) {
   if(getTargetParser().getTargetOptions().MCNoWarn)
     return false;
   if (getTargetParser().getTargetOptions().MCFatalWarnings)
-    return Error(L, Msg, Ranges);
-  printMessage(L, SourceMgr::DK_Warning, Msg, Ranges);
+    return Error(L, Msg, Range);
+  printMessage(L, SourceMgr::DK_Warning, Msg, Range);
   printMacroInstantiations();
   return false;
 }
 
-bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) {
+bool AsmParser::printError(SMLoc L, const Twine &Msg, SMRange Range) {
   HadError = true;
-  printMessage(L, SourceMgr::DK_Error, Msg, Ranges);
+  printMessage(L, SourceMgr::DK_Error, Msg, Range);
   printMacroInstantiations();
   return true;
 }
@@ -639,7 +658,8 @@ bool AsmParser::enterIncludeFile(const std::string &Filename) {
 /// Process the specified .incbin file by searching for it in the include paths
 /// then just emitting the byte contents of the file to the streamer. This
 /// returns true on failure.
-bool AsmParser::processIncbinFile(const std::string &Filename) {
+bool AsmParser::processIncbinFile(const std::string &Filename, int64_t Skip,
+                                  const MCExpr *Count, SMLoc Loc) {
   std::string IncludedFile;
   unsigned NewBuf =
       SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile);
@@ -647,7 +667,17 @@ bool AsmParser::processIncbinFile(const std::string &Filename) {
     return true;
 
   // Pick up the bytes from the file and emit them.
-  getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer());
+  StringRef Bytes = SrcMgr.getMemoryBuffer(NewBuf)->getBuffer();
+  Bytes = Bytes.drop_front(Skip);
+  if (Count) {
+    int64_t Res;
+    if (!Count->evaluateAsAbsolute(Res))
+      return Error(Loc, "expected absolute expression");
+    if (Res < 0)
+      return Warning(Loc, "negative count has no effect");
+    Bytes = Bytes.take_front(Res);
+  }
+  getStreamer().EmitBytes(Bytes);
   return false;
 }
 
@@ -688,7 +718,6 @@ const AsmToken &AsmParser::Lex() {
     }
   }
 
-
   return *tok;
 }
 
@@ -706,7 +735,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   // If we are generating dwarf for assembly source files save the initial text
   // section and generate a .file directive.
   if (getContext().getGenDwarfForAssembly()) {
-    MCSection *Sec = getStreamer().getCurrentSection().first;
+    MCSection *Sec = getStreamer().getCurrentSectionOnly();
     if (!Sec->getBeginSymbol()) {
       MCSymbol *SectionStartSym = getContext().createTempSymbol();
       getStreamer().EmitLabel(SectionStartSym);
@@ -725,30 +754,38 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
     if (!parseStatement(Info, nullptr))
       continue;
 
-    // If we've failed, but on a Error Token, but did not consume it in
-    // favor of a better message, emit it now.
-    if (Lexer.getTok().is(AsmToken::Error)) {
+    // If we have a Lexer Error we are on an Error Token. Load in Lexer Error
+    // for printing ErrMsg via Lex() only if no (presumably better) parser error
+    // exists.
+    if (!hasPendingError() && Lexer.getTok().is(AsmToken::Error)) {
       Lex();
     }
 
-    // We had an error, validate that one was emitted and recover by skipping to
-    // the next line.
-    assert(HadError && "Parse statement returned an error, but none emitted!");
-    eatToEndOfStatement();
+    // parseStatement returned true so may need to emit an error.
+    printPendingErrors();
+
+    // Skipping to the next line if needed.
+    if (!getLexer().isAtStartOfStatement())
+      eatToEndOfStatement();
   }
 
+  // All errors should have been emitted.
+  assert(!hasPendingError() && "unexpected error from parseStatement");
+
+  getTargetParser().flushPendingInstructions(getStreamer());
+
   if (TheCondState.TheCond != StartingCondState.TheCond ||
       TheCondState.Ignore != StartingCondState.Ignore)
-    return TokError("unmatched .ifs or .elses");
-
+    printError(getTok().getLoc(), "unmatched .ifs or .elses");
   // Check to see there are no empty DwarfFile slots.
   const auto &LineTables = getContext().getMCDwarfLineTables();
   if (!LineTables.empty()) {
     unsigned Index = 0;
     for (const auto &File : LineTables.begin()->second.getMCDwarfFiles()) {
       if (File.Name.empty() && Index != 0)
-        TokError("unassigned file number: " + Twine(Index) +
-                 " for .file directives");
+        printError(getTok().getLoc(), "unassigned file number: " +
+                                          Twine(Index) +
+                                          " for .file directives");
       ++Index;
     }
   }
@@ -768,9 +805,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
           // FIXME: We would really like to refer back to where the symbol was
           // first referenced for a source location. We need to add something
           // to track that. Currently, we just point to the end of the file.
-          HadError |=
-              Error(getTok().getLoc(), "assembler local symbol '" +
-                                           Sym->getName() + "' not defined");
+          printError(getTok().getLoc(), "assembler local symbol '" +
+                                            Sym->getName() + "' not defined");
       }
     }
 
@@ -781,7 +817,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
         // Reset the state of any "# line file" directives we've seen to the
         // context as it was at the diagnostic site.
         CppHashInfo = std::get<1>(LocSym);
-        HadError |= Error(std::get<0>(LocSym), "directional label undefined");
+        printError(std::get<0>(LocSym), "directional label undefined");
       }
     }
   }
@@ -794,11 +830,13 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) {
   return HadError || getContext().hadError();
 }
 
-void AsmParser::checkForValidSection() {
-  if (!ParsingInlineAsm && !getStreamer().getCurrentSection().first) {
-    TokError("expected section directive before assembly directive");
+bool AsmParser::checkForValidSection() {
+  if (!ParsingInlineAsm && !getStreamer().getCurrentSectionOnly()) {
     Out.InitSections(false);
+    return Error(getTok().getLoc(),
+                 "expected section directive before assembly directive");
   }
+  return false;
 }
 
 /// \brief Throw away the rest of the line for testing purposes.
@@ -888,8 +926,10 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
   case AsmToken::Identifier: {
     StringRef Identifier;
     if (parseIdentifier(Identifier)) {
-      if (FirstTokenKind == AsmToken::Dollar) {
+      // We may have failed but $ may be a valid token.
+      if (getTok().is(AsmToken::Dollar)) {
         if (Lexer.getMAI().getDollarIsPC()) {
+          Lex();
           // This is a '$' reference, which references the current PC.  Emit a
           // temporary label to the streamer and refer to it.
           MCSymbol *Sym = Ctx.createTempSymbol();
@@ -933,6 +973,9 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 
     // This is a symbol reference.
     StringRef SymbolName = Identifier;
+    if (SymbolName.empty())
+      return true;
+
     MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
 
     // Lookup the symbol variant if used.
@@ -999,7 +1042,7 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     return false;
   }
   case AsmToken::Real: {
-    APFloat RealVal(APFloat::IEEEdouble, getTok().getString());
+    APFloat RealVal(APFloat::IEEEdouble(), getTok().getString());
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
     Res = MCConstantExpr::create(IntVal, getContext());
     EndLoc = Lexer.getTok().getEndLoc();
@@ -1042,6 +1085,43 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
       return true;
     Res = MCUnaryExpr::createNot(Res, getContext());
     return false;
+  // MIPS unary expression operators. The lexer won't generate these tokens if
+  // MCAsmInfo::HasMipsExpressions is false for the target.
+  case AsmToken::PercentCall16:
+  case AsmToken::PercentCall_Hi:
+  case AsmToken::PercentCall_Lo:
+  case AsmToken::PercentDtprel_Hi:
+  case AsmToken::PercentDtprel_Lo:
+  case AsmToken::PercentGot:
+  case AsmToken::PercentGot_Disp:
+  case AsmToken::PercentGot_Hi:
+  case AsmToken::PercentGot_Lo:
+  case AsmToken::PercentGot_Ofst:
+  case AsmToken::PercentGot_Page:
+  case AsmToken::PercentGottprel:
+  case AsmToken::PercentGp_Rel:
+  case AsmToken::PercentHi:
+  case AsmToken::PercentHigher:
+  case AsmToken::PercentHighest:
+  case AsmToken::PercentLo:
+  case AsmToken::PercentNeg:
+  case AsmToken::PercentPcrel_Hi:
+  case AsmToken::PercentPcrel_Lo:
+  case AsmToken::PercentTlsgd:
+  case AsmToken::PercentTlsldm:
+  case AsmToken::PercentTprel_Hi:
+  case AsmToken::PercentTprel_Lo:
+    Lex(); // Eat the operator.
+    if (Lexer.isNot(AsmToken::LParen))
+      return TokError("expected '(' after operator");
+    Lex(); // Eat the operator.
+    if (parseExpression(Res, EndLoc))
+      return true;
+    if (Lexer.isNot(AsmToken::RParen))
+      return TokError("expected ')'");
+    Lex(); // Eat the operator.
+    Res = getTargetParser().createTargetUnaryExpr(Res, FirstTokenKind, Ctx);
+    return !Res;
   }
 }
 
@@ -1356,7 +1436,7 @@ unsigned AsmParser::getBinOpPrecedence(AsmToken::TokenKind K,
 /// Res contains the LHS of the expression on input.
 bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
                               SMLoc &EndLoc) {
-  while (1) {
+  while (true) {
     MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add;
     unsigned TokPrec = getBinOpPrecedence(Lexer.getKind(), Kind);
 
@@ -1390,6 +1470,7 @@ bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res,
 ///   ::= Label* Identifier OperandList* EndOfStatement
 bool AsmParser::parseStatement(ParseStatementInfo &Info,
                                MCAsmParserSemaCallback *SI) {
+  assert(!hasPendingError() && "parseStatement started with pending error");
   // Eat initial spaces and comments
   while (Lexer.is(AsmToken::Space))
     Lex();
@@ -1401,6 +1482,16 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     Lex();
     return false;
   }
+  if (Lexer.is(AsmToken::Hash)) {
+    // Seeing a hash here means that it was an end-of-line comment in
+    // an asm syntax where hash's are not comment and the previous
+    // statement parser did not check the end of statement. Relex as
+    // EndOfStatement.
+    StringRef CommentStr = parseStringToEndOfStatement();
+    Lexer.Lex();
+    Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
+    return false;
+  }
   // Statements always start with an identifier.
   AsmToken ID = getTok();
   SMLoc IDLoc = ID.getLoc();
@@ -1412,15 +1503,19 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   if (Lexer.is(AsmToken::Integer)) {
     LocalLabelVal = getTok().getIntVal();
     if (LocalLabelVal < 0) {
-      if (!TheCondState.Ignore)
-        return TokError("unexpected token at start of statement");
+      if (!TheCondState.Ignore) {
+        Lex(); // always eat a token
+        return Error(IDLoc, "unexpected token at start of statement");
+      }
       IDVal = "";
     } else {
       IDVal = getTok().getString();
       Lex(); // Consume the integer token to be used as an identifier token.
       if (Lexer.getKind() != AsmToken::Colon) {
-        if (!TheCondState.Ignore)
-          return TokError("unexpected token at start of statement");
+        if (!TheCondState.Ignore) {
+          Lex(); // always eat a token
+          return Error(IDLoc, "unexpected token at start of statement");
+        }
       }
     }
   } else if (Lexer.is(AsmToken::Dot)) {
@@ -1437,8 +1532,10 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     Lex();
     IDVal = "}";
   } else if (parseIdentifier(IDVal)) {
-    if (!TheCondState.Ignore)
-      return TokError("unexpected token at start of statement");
+    if (!TheCondState.Ignore) {
+      Lex(); // always eat a token
+      return Error(IDLoc, "unexpected token at start of statement");
+    }
     IDVal = "";
   }
 
@@ -1500,7 +1597,8 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   case AsmToken::Colon: {
     if (!getTargetParser().isLabel(ID))
       break;
-    checkForValidSection();
+    if (checkForValidSection())
+      return true;
 
     // identifier ':'   -> Label.
     Lex();
@@ -1534,6 +1632,16 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     if (!Sym->isUndefined() || Sym->isVariable())
       return Error(IDLoc, "invalid symbol redefinition");
 
+    // End of Labels should be treated as end of line for lexing
+    // purposes but that information is not available to the Lexer who
+    // does not understand Labels. This may cause us to see a Hash
+    // here instead of a preprocessor line comment.
+    if (getTok().is(AsmToken::Hash)) {
+      StringRef CommentStr = parseStringToEndOfStatement();
+      Lexer.Lex();
+      Lexer.UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
+    }
+
     // Consume any end of statement token, if present, to avoid spurious
     // AddBlankLine calls().
     if (getTok().is(AsmToken::EndOfStatement)) {
@@ -1552,8 +1660,6 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
 
     getTargetParser().onLabelParsed(Sym);
 
-
-
     return false;
   }
 
@@ -1590,9 +1696,22 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     //    manner, or at least have a default behavior that's shared between
     //    all targets and platforms.
 
-    // First query the target-specific parser. It will return 'true' if it
-    // isn't interested in this directive.
-    if (!getTargetParser().ParseDirective(ID))
+    getTargetParser().flushPendingInstructions(getStreamer());
+
+    SMLoc StartTokLoc = getTok().getLoc();
+    bool TPDirectiveReturn = getTargetParser().ParseDirective(ID);
+
+    if (hasPendingError())
+      return true;
+    // Currently the return value should be true if we are
+    // uninterested but as this is at odds with the standard parsing
+    // convention (return true = error) we have instances of a parsed
+    // directive that fails returning true as an error. Catch these
+    // cases as best as possible errors here.
+    if (TPDirectiveReturn && StartTokLoc != getTok().getLoc())
+      return true;
+    // Return if we did some parsing or believe we succeeded.
+    if (!TPDirectiveReturn || StartTokLoc != getTok().getLoc())
       return false;
 
     // Next, check the extension directive map to see if any extension has
@@ -1618,25 +1737,34 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     case DK_STRING:
       return parseDirectiveAscii(IDVal, true);
     case DK_BYTE:
-      return parseDirectiveValue(1);
+    case DK_DC_B:
+      return parseDirectiveValue(IDVal, 1);
+    case DK_DC:
+    case DK_DC_W:
     case DK_SHORT:
     case DK_VALUE:
     case DK_2BYTE:
-      return parseDirectiveValue(2);
+      return parseDirectiveValue(IDVal, 2);
     case DK_LONG:
     case DK_INT:
     case DK_4BYTE:
-      return parseDirectiveValue(4);
+    case DK_DC_L:
+      return parseDirectiveValue(IDVal, 4);
     case DK_QUAD:
     case DK_8BYTE:
-      return parseDirectiveValue(8);
+      return parseDirectiveValue(IDVal, 8);
+    case DK_DC_A:
+      return parseDirectiveValue(IDVal,
+                                 getContext().getAsmInfo()->getPointerSize());
     case DK_OCTA:
-      return parseDirectiveOctaValue();
+      return parseDirectiveOctaValue(IDVal);
     case DK_SINGLE:
     case DK_FLOAT:
-      return parseDirectiveRealValue(APFloat::IEEEsingle);
+    case DK_DC_S:
+      return parseDirectiveRealValue(IDVal, APFloat::IEEEsingle());
     case DK_DOUBLE:
-      return parseDirectiveRealValue(APFloat::IEEEdouble);
+    case DK_DC_D:
+      return parseDirectiveRealValue(IDVal, APFloat::IEEEdouble());
     case DK_ALIGN: {
       bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes();
       return parseDirectiveAlign(IsPow2, /*ExprSize=*/1);
@@ -1731,6 +1859,10 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveStabs();
     case DK_CV_FILE:
       return parseDirectiveCVFile();
+    case DK_CV_FUNC_ID:
+      return parseDirectiveCVFuncId();
+    case DK_CV_INLINE_SITE_ID:
+      return parseDirectiveCVInlineSiteId();
     case DK_CV_LOC:
       return parseDirectiveCVLoc();
     case DK_CV_LINETABLE:
@@ -1805,6 +1937,34 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
       return parseDirectiveWarning(IDLoc);
     case DK_RELOC:
       return parseDirectiveReloc(IDLoc);
+    case DK_DCB:
+    case DK_DCB_W:
+      return parseDirectiveDCB(IDVal, 2);
+    case DK_DCB_B:
+      return parseDirectiveDCB(IDVal, 1);
+    case DK_DCB_D:
+      return parseDirectiveRealDCB(IDVal, APFloat::IEEEdouble());
+    case DK_DCB_L:
+      return parseDirectiveDCB(IDVal, 4);
+    case DK_DCB_S:
+      return parseDirectiveRealDCB(IDVal, APFloat::IEEEsingle());
+    case DK_DC_X:
+    case DK_DCB_X:
+      return TokError(Twine(IDVal) +
+                      " not currently supported for this target");
+    case DK_DS:
+    case DK_DS_W:
+      return parseDirectiveDS(IDVal, 2);
+    case DK_DS_B:
+      return parseDirectiveDS(IDVal, 1);
+    case DK_DS_D:
+      return parseDirectiveDS(IDVal, 8);
+    case DK_DS_L:
+    case DK_DS_S:
+      return parseDirectiveDS(IDVal, 4);
+    case DK_DS_P:
+    case DK_DS_X:
+      return parseDirectiveDS(IDVal, 12);
     }
 
     return Error(IDLoc, "unknown directive");
@@ -1821,14 +1981,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
 
   if (ParsingInlineAsm && (IDVal == "even"))
     Info.AsmRewrites->emplace_back(AOK_EVEN, IDLoc, 4);
-  checkForValidSection();
+  if (checkForValidSection())
+    return true;
 
   // Canonicalize the opcode to lower case.
   std::string OpcodeStr = IDVal.lower();
   ParseInstructionInfo IInfo(Info.AsmRewrites);
-  bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
-                                                     Info.ParsedOperands);
-  Info.ParseError = HadError;
+  bool ParseHadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, ID,
+                                                          Info.ParsedOperands);
+  Info.ParseError = ParseHadError;
 
   // Dump the parsed representation, if requested.
   if (getShowParsedOperands()) {
@@ -1845,11 +2006,15 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
     printMessage(IDLoc, SourceMgr::DK_Note, OS.str());
   }
 
+  // Fail even if ParseInstruction erroneously returns false.
+  if (hasPendingError() || ParseHadError)
+    return true;
+
   // If we are generating dwarf for the current section then generate a .loc
   // directive for the instruction.
-  if (!HadError && getContext().getGenDwarfForAssembly() &&
+  if (!ParseHadError && getContext().getGenDwarfForAssembly() &&
       getContext().getGenDwarfSectionSyms().count(
-          getStreamer().getCurrentSection().first)) {
+          getStreamer().getCurrentSectionOnly())) {
     unsigned Line;
     if (ActiveMacros.empty())
       Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer);
@@ -1889,15 +2054,13 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info,
   }
 
   // If parsing succeeded, match the instruction.
-  if (!HadError) {
+  if (!ParseHadError) {
     uint64_t ErrorInfo;
-    getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
-                                              Info.ParsedOperands, Out,
-                                              ErrorInfo, ParsingInlineAsm);
+    if (getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode,
+                                                  Info.ParsedOperands, Out,
+                                                  ErrorInfo, ParsingInlineAsm))
+      return true;
   }
-
-  // Don't skip the rest of the line, the instruction parser is responsible for
-  // that.
   return false;
 }
 
@@ -1933,6 +2096,7 @@ bool AsmParser::parseCppHashLineFilenameComment(SMLoc L) {
          "Lexing Cpp line comment: Expected String");
   StringRef Filename = getTok().getString();
   Lex();
+
   // Get rid of the enclosing quotes.
   Filename = Filename.substr(1, Filename.size() - 2);
 
@@ -2158,6 +2322,7 @@ static bool isOperator(AsmToken::TokenKind kind) {
 }
 
 namespace {
+
 class AsmLexerSkipSpaceRAII {
 public:
   AsmLexerSkipSpaceRAII(AsmLexer &Lexer, bool SkipSpace) : Lexer(Lexer) {
@@ -2171,7 +2336,8 @@ public:
 private:
   AsmLexer &Lexer;
 };
-}
+
+} // end anonymous namespace
 
 bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
 
@@ -2190,7 +2356,7 @@ bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, bool Vararg) {
 
   bool SpaceEaten;
 
-  for (;;) {
+  while (true) {
     SpaceEaten = false;
     if (Lexer.is(AsmToken::Eof) || Lexer.is(AsmToken::Equal))
       return TokError("unexpected token in macro instantiation");
@@ -2265,27 +2431,19 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
     MCAsmMacroParameter FA;
 
     if (Lexer.is(AsmToken::Identifier) && Lexer.peekTok().is(AsmToken::Equal)) {
-      if (parseIdentifier(FA.Name)) {
-        Error(IDLoc, "invalid argument identifier for formal argument");
-        eatToEndOfStatement();
-        return true;
-      }
+      if (parseIdentifier(FA.Name))
+        return Error(IDLoc, "invalid argument identifier for formal argument");
+
+      if (Lexer.isNot(AsmToken::Equal))
+        return TokError("expected '=' after formal parameter identifier");
 
-      if (Lexer.isNot(AsmToken::Equal)) {
-        TokError("expected '=' after formal parameter identifier");
-        eatToEndOfStatement();
-        return true;
-      }
       Lex();
 
       NamedParametersFound = true;
     }
 
-    if (NamedParametersFound && FA.Name.empty()) {
-      Error(IDLoc, "cannot mix positional and keyword arguments");
-      eatToEndOfStatement();
-      return true;
-    }
+    if (NamedParametersFound && FA.Name.empty())
+      return Error(IDLoc, "cannot mix positional and keyword arguments");
 
     bool Vararg = HasVararg && Parameter == (NParameters - 1);
     if (parseMacroArgument(FA.Value, Vararg))
@@ -2300,10 +2458,8 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M,
 
       if (FAI >= NParameters) {
         assert(M && "expected macro to be defined");
-        Error(IDLoc,
-              "parameter named '" + FA.Name + "' does not exist for macro '" +
-              M->Name + "'");
-        return true;
+        return Error(IDLoc, "parameter named '" + FA.Name +
+                                "' does not exist for macro '" + M->Name + "'");
       }
       PI = FAI;
     }
@@ -2359,10 +2515,17 @@ void AsmParser::defineMacro(StringRef Name, MCAsmMacro Macro) {
 void AsmParser::undefineMacro(StringRef Name) { MacroMap.erase(Name); }
 
 bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) {
-  // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate
-  // this, although we should protect against infinite loops.
-  if (ActiveMacros.size() == 20)
-    return TokError("macros cannot be nested more than 20 levels deep");
+  // Arbitrarily limit macro nesting depth (default matches 'as'). We can
+  // eliminate this, although we should protect against infinite loops.
+  unsigned MaxNestingDepth = AsmMacroMaxNestingDepth;
+  if (ActiveMacros.size() == MaxNestingDepth) {
+    std::ostringstream MaxNestingDepthError;
+    MaxNestingDepthError << "macros cannot be nested more than "
+                         << MaxNestingDepth << " levels deep."
+                         << " Use -asm-macro-max-nesting-depth to increase "
+                            "this limit.";
+    return TokError(MaxNestingDepthError.str());
+  }
 
   MCAsmMacroArguments A;
   if (parseMacroArguments(M, A))
@@ -2446,14 +2609,19 @@ bool AsmParser::parseIdentifier(StringRef &Res) {
     SMLoc PrefixLoc = getLexer().getLoc();
 
     // Consume the prefix character, and check for a following identifier.
-    Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
-    if (Lexer.isNot(AsmToken::Identifier))
+
+    AsmToken Buf[1];
+    Lexer.peekTokens(Buf, false);
+
+    if (Buf[0].isNot(AsmToken::Identifier))
       return true;
 
     // We have a '$' or '@' followed by an identifier, make sure they are adjacent.
-    if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer())
+    if (PrefixLoc.getPointer() + 1 != Buf[0].getLoc().getPointer())
       return true;
 
+    // eat $ or @
+    Lexer.Lex(); // Lexer's Lex guarantees consecutive token.
     // Construct the joined identifier and consume the token.
     Res =
         StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1);
@@ -2477,17 +2645,15 @@ bool AsmParser::parseIdentifier(StringRef &Res) {
 ///   ::= .set identifier ',' expression
 bool AsmParser::parseDirectiveSet(StringRef IDVal, bool allow_redef) {
   StringRef Name;
-
-  if (check(parseIdentifier(Name),
-            "expected identifier after '" + Twine(IDVal) + "'") ||
-      parseToken(AsmToken::Comma, "unexpected token in '" + Twine(IDVal) + "'"))
-    return true;
-
-  return parseAssignment(Name, allow_redef, true);
+  if (check(parseIdentifier(Name), "expected identifier") ||
+      parseToken(AsmToken::Comma) || parseAssignment(Name, allow_redef, true))
+    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+  return false;
 }
 
 bool AsmParser::parseEscapedString(std::string &Data) {
-  assert(getLexer().is(AsmToken::String) && "Unexpected current token!");
+  if (check(getTok().isNot(AsmToken::String), "expected string"))
+    return true;
 
   Data = "";
   StringRef Str = getTok().getStringContents();
@@ -2548,30 +2714,18 @@ bool AsmParser::parseEscapedString(std::string &Data) {
 /// parseDirectiveAscii:
 ///   ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ]
 bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    checkForValidSection();
-
-    for (;;) {
-      std::string Data;
-      if (check(getTok().isNot(AsmToken::String),
-                "expected string in '" + Twine(IDVal) + "' directive") ||
-          parseEscapedString(Data))
-        return true;
-
-      getStreamer().EmitBytes(Data);
-      if (ZeroTerminated)
-        getStreamer().EmitBytes(StringRef("\0", 1));
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      if (parseToken(AsmToken::Comma,
-                     "unexpected token in '" + Twine(IDVal) + "' directive"))
-        return true;
-    }
-  }
+  auto parseOp = [&]() -> bool {
+    std::string Data;
+    if (checkForValidSection() || parseEscapedString(Data))
+      return true;
+    getStreamer().EmitBytes(Data);
+    if (ZeroTerminated)
+      getStreamer().EmitBytes(StringRef("\0", 1));
+    return false;
+  };
 
-  Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
   return false;
 }
 
@@ -2582,11 +2736,12 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
   const MCExpr *Expr = nullptr;
 
   SMLoc OffsetLoc = Lexer.getTok().getLoc();
+  int64_t OffsetValue;
+  // We can only deal with constant expressions at the moment.
+
   if (parseExpression(Offset))
     return true;
 
-  // We can only deal with constant expressions at the moment.
-  int64_t OffsetValue;
   if (check(!Offset->evaluateAsAbsolute(OffsetValue), OffsetLoc,
             "expression is not a constant value") ||
       check(OffsetValue < 0, OffsetLoc, "expression is negative") ||
@@ -2610,162 +2765,141 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
   }
 
   if (parseToken(AsmToken::EndOfStatement,
-                 "unexpected token in .reloc directive") ||
-      check(getStreamer().EmitRelocDirective(*Offset, Name, Expr, DirectiveLoc),
-            NameLoc, "unknown relocation name"))
-    return true;
+                 "unexpected token in .reloc directive"))
+      return true;
+
+  if (getStreamer().EmitRelocDirective(*Offset, Name, Expr, DirectiveLoc))
+    return Error(NameLoc, "unknown relocation name");
+
   return false;
 }
 
 /// parseDirectiveValue
 ///  ::= (.byte | .short | ... ) [ expression (, expression)* ]
-bool AsmParser::parseDirectiveValue(unsigned Size) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    checkForValidSection();
-
-    for (;;) {
-      const MCExpr *Value;
-      SMLoc ExprLoc = getLexer().getLoc();
-      if (parseExpression(Value))
-        return true;
-
-      // Special case constant expressions to match code generator.
-      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
-        assert(Size <= 8 && "Invalid size");
-        uint64_t IntValue = MCE->getValue();
-        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
-          return Error(ExprLoc, "literal value out of range for directive");
-        getStreamer().EmitIntValue(IntValue, Size);
-      } else
-        getStreamer().EmitValue(Value, Size, ExprLoc);
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      // FIXME: Improve diagnostic.
-      if (parseToken(AsmToken::Comma, "unexpected token in directive"))
-        return true;
-    }
-  }
+bool AsmParser::parseDirectiveValue(StringRef IDVal, unsigned Size) {
+  auto parseOp = [&]() -> bool {
+    const MCExpr *Value;
+    SMLoc ExprLoc = getLexer().getLoc();
+    if (checkForValidSection() || parseExpression(Value))
+      return true;
+    // Special case constant expressions to match code generator.
+    if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+      assert(Size <= 8 && "Invalid size");
+      uint64_t IntValue = MCE->getValue();
+      if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+        return Error(ExprLoc, "out of range literal value");
+      getStreamer().EmitIntValue(IntValue, Size);
+    } else
+      getStreamer().EmitValue(Value, Size, ExprLoc);
+    return false;
+  };
 
-  Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
   return false;
 }
 
 /// ParseDirectiveOctaValue
 ///  ::= .octa [ hexconstant (, hexconstant)* ]
-bool AsmParser::parseDirectiveOctaValue() {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    checkForValidSection();
-
-    for (;;) {
-      if (getTok().is(AsmToken::Error))
-        return true;
-      if (getTok().isNot(AsmToken::Integer) && getTok().isNot(AsmToken::BigNum))
-        return TokError("unknown token in expression");
 
-      SMLoc ExprLoc = getLexer().getLoc();
-      APInt IntValue = getTok().getAPIntVal();
-      Lex();
+bool AsmParser::parseDirectiveOctaValue(StringRef IDVal) {
+  auto parseOp = [&]() -> bool {
+    if (checkForValidSection())
+      return true;
+    if (getTok().isNot(AsmToken::Integer) && getTok().isNot(AsmToken::BigNum))
+      return TokError("unknown token in expression");
+    SMLoc ExprLoc = getTok().getLoc();
+    APInt IntValue = getTok().getAPIntVal();
+    uint64_t hi, lo;
+    Lex();
+    if (!IntValue.isIntN(128))
+      return Error(ExprLoc, "out of range literal value");
+    if (!IntValue.isIntN(64)) {
+      hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
+      lo = IntValue.getLoBits(64).getZExtValue();
+    } else {
+      hi = 0;
+      lo = IntValue.getZExtValue();
+    }
+    if (MAI.isLittleEndian()) {
+      getStreamer().EmitIntValue(lo, 8);
+      getStreamer().EmitIntValue(hi, 8);
+    } else {
+      getStreamer().EmitIntValue(hi, 8);
+      getStreamer().EmitIntValue(lo, 8);
+    }
+    return false;
+  };
 
-      uint64_t hi, lo;
-      if (IntValue.isIntN(64)) {
-        hi = 0;
-        lo = IntValue.getZExtValue();
-      } else if (IntValue.isIntN(128)) {
-        // It might actually have more than 128 bits, but the top ones are zero.
-        hi = IntValue.getHiBits(IntValue.getBitWidth() - 64).getZExtValue();
-        lo = IntValue.getLoBits(64).getZExtValue();
-      } else
-        return Error(ExprLoc, "literal value out of range for directive");
-
-      if (MAI.isLittleEndian()) {
-        getStreamer().EmitIntValue(lo, 8);
-        getStreamer().EmitIntValue(hi, 8);
-      } else {
-        getStreamer().EmitIntValue(hi, 8);
-        getStreamer().EmitIntValue(lo, 8);
-      }
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
+  return false;
+}
 
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
+bool AsmParser::parseRealValue(const fltSemantics &Semantics, APInt &Res) {
+  // We don't truly support arithmetic on floating point expressions, so we
+  // have to manually parse unary prefixes.
+  bool IsNeg = false;
+  if (getLexer().is(AsmToken::Minus)) {
+    Lexer.Lex();
+    IsNeg = true;
+  } else if (getLexer().is(AsmToken::Plus))
+    Lexer.Lex();
 
-      // FIXME: Improve diagnostic.
-      if (parseToken(AsmToken::Comma, "unexpected token in directive"))
-        return true;
-    }
-  }
+  if (Lexer.is(AsmToken::Error))
+    return TokError(Lexer.getErr());
+  if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
+      Lexer.isNot(AsmToken::Identifier))
+    return TokError("unexpected token in directive");
 
+  // Convert to an APFloat.
+  APFloat Value(Semantics);
+  StringRef IDVal = getTok().getString();
+  if (getLexer().is(AsmToken::Identifier)) {
+    if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
+      Value = APFloat::getInf(Semantics);
+    else if (!IDVal.compare_lower("nan"))
+      Value = APFloat::getNaN(Semantics, false, ~0);
+    else
+      return TokError("invalid floating point literal");
+  } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
+             APFloat::opInvalidOp)
+    return TokError("invalid floating point literal");
+  if (IsNeg)
+    Value.changeSign();
+
+  // Consume the numeric token.
   Lex();
+
+  Res = Value.bitcastToAPInt();
+
   return false;
 }
 
 /// parseDirectiveRealValue
 ///  ::= (.single | .double) [ expression (, expression)* ]
-bool AsmParser::parseDirectiveRealValue(const fltSemantics &Semantics) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    checkForValidSection();
-
-    for (;;) {
-      // We don't truly support arithmetic on floating point expressions, so we
-      // have to manually parse unary prefixes.
-      bool IsNeg = false;
-      if (getLexer().is(AsmToken::Minus)) {
-        Lexer.Lex();
-        IsNeg = true;
-      } else if (getLexer().is(AsmToken::Plus))
-        Lexer.Lex();
-
-      if (Lexer.is(AsmToken::Error))
-        return TokError(Lexer.getErr());
-      if (Lexer.isNot(AsmToken::Integer) && Lexer.isNot(AsmToken::Real) &&
-          Lexer.isNot(AsmToken::Identifier))
-        return TokError("unexpected token in directive");
-
-      // Convert to an APFloat.
-      APFloat Value(Semantics);
-      StringRef IDVal = getTok().getString();
-      if (getLexer().is(AsmToken::Identifier)) {
-        if (!IDVal.compare_lower("infinity") || !IDVal.compare_lower("inf"))
-          Value = APFloat::getInf(Semantics);
-        else if (!IDVal.compare_lower("nan"))
-          Value = APFloat::getNaN(Semantics, false, ~0);
-        else
-          return TokError("invalid floating point literal");
-      } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) ==
-                 APFloat::opInvalidOp)
-        return TokError("invalid floating point literal");
-      if (IsNeg)
-        Value.changeSign();
-
-      // Consume the numeric token.
-      Lex();
-
-      // Emit the value as an integer.
-      APInt AsInt = Value.bitcastToAPInt();
-      getStreamer().EmitIntValue(AsInt.getLimitedValue(),
-                                 AsInt.getBitWidth() / 8);
-
-      if (Lexer.is(AsmToken::EndOfStatement))
-        break;
-
-      if (parseToken(AsmToken::Comma, "unexpected token in directive"))
-        return true;
-    }
-  }
+bool AsmParser::parseDirectiveRealValue(StringRef IDVal,
+                                        const fltSemantics &Semantics) {
+  auto parseOp = [&]() -> bool {
+    APInt AsInt;
+    if (checkForValidSection() || parseRealValue(Semantics, AsInt))
+      return true;
+    getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+                               AsInt.getBitWidth() / 8);
+    return false;
+  };
 
-  Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in '" + Twine(IDVal) + "' directive");
   return false;
 }
 
 /// parseDirectiveZero
 ///  ::= .zero expression
 bool AsmParser::parseDirectiveZero() {
-  checkForValidSection();
-
   SMLoc NumBytesLoc = Lexer.getLoc();
   const MCExpr *NumBytes;
-  if (parseExpression(NumBytes))
+  if (checkForValidSection() || parseExpression(NumBytes))
     return true;
 
   int64_t Val = 0;
@@ -2786,32 +2920,29 @@ bool AsmParser::parseDirectiveZero() {
 /// parseDirectiveFill
 ///  ::= .fill expression [ , expression [ , expression ] ]
 bool AsmParser::parseDirectiveFill() {
-  checkForValidSection();
-
   SMLoc NumValuesLoc = Lexer.getLoc();
   const MCExpr *NumValues;
-  if (parseExpression(NumValues))
+  if (checkForValidSection() || parseExpression(NumValues))
     return true;
 
   int64_t FillSize = 1;
   int64_t FillExpr = 0;
 
   SMLoc SizeLoc, ExprLoc;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
 
-    if (parseToken(AsmToken::Comma, "unexpected token in '.fill' directive") ||
-        getTokenLoc(SizeLoc) || parseAbsoluteExpression(FillSize))
+  if (parseOptionalToken(AsmToken::Comma)) {
+    SizeLoc = getTok().getLoc();
+    if (parseAbsoluteExpression(FillSize))
       return true;
-
-    if (getLexer().isNot(AsmToken::EndOfStatement)) {
-      if (parseToken(AsmToken::Comma,
-                     "unexpected token in '.fill' directive") ||
-          getTokenLoc(ExprLoc) || parseAbsoluteExpression(FillExpr) ||
-          parseToken(AsmToken::EndOfStatement,
-                     "unexpected token in '.fill' directive"))
+    if (parseOptionalToken(AsmToken::Comma)) {
+      ExprLoc = getTok().getLoc();
+      if (parseAbsoluteExpression(FillExpr))
         return true;
     }
   }
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.fill' directive"))
+    return true;
 
   if (FillSize < 0) {
     Warning(SizeLoc, "'.fill' directive with negative size has no effect");
@@ -2833,73 +2964,64 @@ bool AsmParser::parseDirectiveFill() {
 /// parseDirectiveOrg
 ///  ::= .org expression [ , expression ]
 bool AsmParser::parseDirectiveOrg() {
-  checkForValidSection();
-
   const MCExpr *Offset;
-  if (parseExpression(Offset))
+  SMLoc OffsetLoc = Lexer.getLoc();
+  if (checkForValidSection() || parseExpression(Offset))
     return true;
 
   // Parse optional fill expression.
   int64_t FillExpr = 0;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    if (parseToken(AsmToken::Comma, "unexpected token in '.org' directive") ||
-        parseAbsoluteExpression(FillExpr))
-      return true;
-  }
+  if (parseOptionalToken(AsmToken::Comma))
+    if (parseAbsoluteExpression(FillExpr))
+      return addErrorSuffix(" in '.org' directive");
+  if (parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix(" in '.org' directive");
 
-  if (parseToken(AsmToken::EndOfStatement,
-                 "unexpected token in '.org' directive"))
-    return true;
-
-  getStreamer().emitValueToOffset(Offset, FillExpr);
+  getStreamer().emitValueToOffset(Offset, FillExpr, OffsetLoc);
   return false;
 }
 
 /// parseDirectiveAlign
 ///  ::= {.align, ...} expression [ , expression [ , expression ]]
 bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
-  checkForValidSection();
-
   SMLoc AlignmentLoc = getLexer().getLoc();
   int64_t Alignment;
-  if (parseAbsoluteExpression(Alignment))
-    return true;
-
   SMLoc MaxBytesLoc;
   bool HasFillExpr = false;
   int64_t FillExpr = 0;
   int64_t MaxBytesToFill = 0;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    if (parseToken(AsmToken::Comma, "unexpected token in directive"))
-      return true;
-
-    // The fill expression can be omitted while specifying a maximum number of
-    // alignment bytes, e.g:
-    //  .align 3,,4
-    if (getTok().isNot(AsmToken::Comma)) {
-      HasFillExpr = true;
-      if (parseAbsoluteExpression(FillExpr))
-        return true;
-    }
 
-    if (getTok().isNot(AsmToken::EndOfStatement)) {
-      if (parseToken(AsmToken::Comma, "unexpected token in directive") ||
-          getTokenLoc(MaxBytesLoc) || parseAbsoluteExpression(MaxBytesToFill))
-        return true;
+  auto parseAlign = [&]() -> bool {
+    if (checkForValidSection() || parseAbsoluteExpression(Alignment))
+      return true;
+    if (parseOptionalToken(AsmToken::Comma)) {
+      // The fill expression can be omitted while specifying a maximum number of
+      // alignment bytes, e.g:
+      //  .align 3,,4
+      if (getTok().isNot(AsmToken::Comma)) {
+        HasFillExpr = true;
+        if (parseAbsoluteExpression(FillExpr))
+          return true;
+      }
+      if (parseOptionalToken(AsmToken::Comma))
+        if (parseTokenLoc(MaxBytesLoc) ||
+            parseAbsoluteExpression(MaxBytesToFill))
+          return true;
     }
-  }
+    return parseToken(AsmToken::EndOfStatement);
+  };
 
-  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
-    return true;
+  if (parseAlign())
+    return addErrorSuffix(" in directive");
 
-  if (!HasFillExpr)
-    FillExpr = 0;
+  // Always emit an alignment here even if we thrown an error.
+  bool ReturnVal = false;
 
   // Compute alignment in bytes.
   if (IsPow2) {
     // FIXME: Diagnose overflow.
     if (Alignment >= 32) {
-      Error(AlignmentLoc, "invalid alignment value");
+      ReturnVal |= Error(AlignmentLoc, "invalid alignment value");
       Alignment = 31;
     }
 
@@ -2911,13 +3033,14 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
     if (Alignment == 0)
       Alignment = 1;
     if (!isPowerOf2_64(Alignment))
-      Error(AlignmentLoc, "alignment must be a power of 2");
+      ReturnVal |= Error(AlignmentLoc, "alignment must be a power of 2");
   }
 
   // Diagnose non-sensical max bytes to align.
   if (MaxBytesLoc.isValid()) {
     if (MaxBytesToFill < 1) {
-      Error(MaxBytesLoc, "alignment directive can never be satisfied in this "
+      ReturnVal |= Error(MaxBytesLoc,
+                         "alignment directive can never be satisfied in this "
                          "many bytes, ignoring maximum bytes expression");
       MaxBytesToFill = 0;
     }
@@ -2931,7 +3054,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
 
   // Check whether we should use optimal code alignment for this .align
   // directive.
-  const MCSection *Section = getStreamer().getCurrentSection().first;
+  const MCSection *Section = getStreamer().getCurrentSectionOnly();
   assert(Section && "must have section to emit alignment");
   bool UseCodeAlign = Section->UseCodeAlign();
   if ((!HasFillExpr || Lexer.getMAI().getTextAlignFillValue() == FillExpr) &&
@@ -2943,7 +3066,7 @@ bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
                                        MaxBytesToFill);
   }
 
-  return false;
+  return ReturnVal;
 }
 
 /// parseDirectiveFile
@@ -2997,7 +3120,7 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
       getContext().setGenDwarfForAssembly(false);
     else if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename) ==
         0)
-      Error(FileNumberLoc, "file number already allocated");
+      return Error(FileNumberLoc, "file number already allocated");
   }
 
   return false;
@@ -3007,7 +3130,7 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
 /// ::= .line [number]
 bool AsmParser::parseDirectiveLine() {
   int64_t LineNumber;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+  if (getLexer().is(AsmToken::Integer)) {
     if (parseIntToken(LineNumber, "unexpected token in '.line' directive"))
       return true;
     (void)LineNumber;
@@ -3056,65 +3179,61 @@ bool AsmParser::parseDirectiveLoc() {
   unsigned Flags = DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0;
   unsigned Isa = 0;
   int64_t Discriminator = 0;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
 
-      StringRef Name;
-      SMLoc Loc = getTok().getLoc();
-      if (parseIdentifier(Name))
-        return TokError("unexpected token in '.loc' directive");
-
-      if (Name == "basic_block")
-        Flags |= DWARF2_FLAG_BASIC_BLOCK;
-      else if (Name == "prologue_end")
-        Flags |= DWARF2_FLAG_PROLOGUE_END;
-      else if (Name == "epilogue_begin")
-        Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
-      else if (Name == "is_stmt") {
-        Loc = getTok().getLoc();
-        const MCExpr *Value;
-        if (parseExpression(Value))
-          return true;
-        // The expression must be the constant 0 or 1.
-        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
-          int Value = MCE->getValue();
-          if (Value == 0)
-            Flags &= ~DWARF2_FLAG_IS_STMT;
-          else if (Value == 1)
-            Flags |= DWARF2_FLAG_IS_STMT;
-          else
-            return Error(Loc, "is_stmt value not 0 or 1");
-        } else {
-          return Error(Loc, "is_stmt value not the constant value of 0 or 1");
-        }
-      } else if (Name == "isa") {
-        Loc = getTok().getLoc();
-        const MCExpr *Value;
-        if (parseExpression(Value))
-          return true;
-        // The expression must be a constant greater or equal to 0.
-        if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
-          int Value = MCE->getValue();
-          if (Value < 0)
-            return Error(Loc, "isa number less than zero");
-          Isa = Value;
-        } else {
-          return Error(Loc, "isa number not a constant value");
-        }
-      } else if (Name == "discriminator") {
-        if (parseAbsoluteExpression(Discriminator))
-          return true;
+  auto parseLocOp = [&]() -> bool {
+    StringRef Name;
+    SMLoc Loc = getTok().getLoc();
+    if (parseIdentifier(Name))
+      return TokError("unexpected token in '.loc' directive");
+
+    if (Name == "basic_block")
+      Flags |= DWARF2_FLAG_BASIC_BLOCK;
+    else if (Name == "prologue_end")
+      Flags |= DWARF2_FLAG_PROLOGUE_END;
+    else if (Name == "epilogue_begin")
+      Flags |= DWARF2_FLAG_EPILOGUE_BEGIN;
+    else if (Name == "is_stmt") {
+      Loc = getTok().getLoc();
+      const MCExpr *Value;
+      if (parseExpression(Value))
+        return true;
+      // The expression must be the constant 0 or 1.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        int Value = MCE->getValue();
+        if (Value == 0)
+          Flags &= ~DWARF2_FLAG_IS_STMT;
+        else if (Value == 1)
+          Flags |= DWARF2_FLAG_IS_STMT;
+        else
+          return Error(Loc, "is_stmt value not 0 or 1");
       } else {
-        return Error(Loc, "unknown sub-directive in '.loc' directive");
+        return Error(Loc, "is_stmt value not the constant value of 0 or 1");
       }
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
+    } else if (Name == "isa") {
+      Loc = getTok().getLoc();
+      const MCExpr *Value;
+      if (parseExpression(Value))
+        return true;
+      // The expression must be a constant greater or equal to 0.
+      if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+        int Value = MCE->getValue();
+        if (Value < 0)
+          return Error(Loc, "isa number less than zero");
+        Isa = Value;
+      } else {
+        return Error(Loc, "isa number not a constant value");
+      }
+    } else if (Name == "discriminator") {
+      if (parseAbsoluteExpression(Discriminator))
+        return true;
+    } else {
+      return Error(Loc, "unknown sub-directive in '.loc' directive");
     }
-  }
-  Lex();
+    return false;
+  };
+
+  if (parseMany(parseLocOp, false /*hasComma*/))
+    return true;
 
   getStreamer().EmitDwarfLocDirective(FileNumber, LineNumber, ColumnPos, Flags,
                                       Isa, Discriminator, StringRef());
@@ -3144,11 +3263,113 @@ bool AsmParser::parseDirectiveCVFile() {
       // directory. Allow the strings to have escaped octal character sequence.
       parseEscapedString(Filename) ||
       parseToken(AsmToken::EndOfStatement,
-                 "unexpected token in '.cv_file' directive") ||
-      check(getStreamer().EmitCVFileDirective(FileNumber, Filename) == 0,
-            FileNumberLoc, "file number already allocated"))
+                 "unexpected token in '.cv_file' directive"))
     return true;
 
+  if (!getStreamer().EmitCVFileDirective(FileNumber, Filename))
+    return Error(FileNumberLoc, "file number already allocated");
+
+  return false;
+}
+
+bool AsmParser::parseCVFunctionId(int64_t &FunctionId,
+                                  StringRef DirectiveName) {
+  SMLoc Loc;
+  return parseTokenLoc(Loc) ||
+         parseIntToken(FunctionId, "expected function id in '" + DirectiveName +
+                                       "' directive") ||
+         check(FunctionId < 0 || FunctionId >= UINT_MAX, Loc,
+               "expected function id within range [0, UINT_MAX)");
+}
+
+bool AsmParser::parseCVFileId(int64_t &FileNumber, StringRef DirectiveName) {
+  SMLoc Loc;
+  return parseTokenLoc(Loc) ||
+         parseIntToken(FileNumber, "expected integer in '" + DirectiveName +
+                                       "' directive") ||
+         check(FileNumber < 1, Loc, "file number less than one in '" +
+                                        DirectiveName + "' directive") ||
+         check(!getCVContext().isValidFileNumber(FileNumber), Loc,
+               "unassigned file number in '" + DirectiveName + "' directive");
+}
+
+/// parseDirectiveCVFuncId
+/// ::= .cv_func_id FunctionId
+///
+/// Introduces a function ID that can be used with .cv_loc.
+bool AsmParser::parseDirectiveCVFuncId() {
+  SMLoc FunctionIdLoc = getTok().getLoc();
+  int64_t FunctionId;
+
+  if (parseCVFunctionId(FunctionId, ".cv_func_id") ||
+      parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.cv_func_id' directive"))
+    return true;
+
+  if (!getStreamer().EmitCVFuncIdDirective(FunctionId))
+    return Error(FunctionIdLoc, "function id already allocated");
+
+  return false;
+}
+
+/// parseDirectiveCVInlineSiteId
+/// ::= .cv_inline_site_id FunctionId
+///         "within" IAFunc
+///         "inlined_at" IAFile IALine [IACol]
+///
+/// Introduces a function ID that can be used with .cv_loc. Includes "inlined
+/// at" source location information for use in the line table of the caller,
+/// whether the caller is a real function or another inlined call site.
+bool AsmParser::parseDirectiveCVInlineSiteId() {
+  SMLoc FunctionIdLoc = getTok().getLoc();
+  int64_t FunctionId;
+  int64_t IAFunc;
+  int64_t IAFile;
+  int64_t IALine;
+  int64_t IACol = 0;
+
+  // FunctionId
+  if (parseCVFunctionId(FunctionId, ".cv_inline_site_id"))
+    return true;
+
+  // "within"
+  if (check((getLexer().isNot(AsmToken::Identifier) ||
+             getTok().getIdentifier() != "within"),
+            "expected 'within' identifier in '.cv_inline_site_id' directive"))
+    return true;
+  Lex();
+
+  // IAFunc
+  if (parseCVFunctionId(IAFunc, ".cv_inline_site_id"))
+    return true;
+
+  // "inlined_at"
+  if (check((getLexer().isNot(AsmToken::Identifier) ||
+             getTok().getIdentifier() != "inlined_at"),
+            "expected 'inlined_at' identifier in '.cv_inline_site_id' "
+            "directive") )
+    return true;
+  Lex();
+
+  // IAFile IALine
+  if (parseCVFileId(IAFile, ".cv_inline_site_id") ||
+      parseIntToken(IALine, "expected line number after 'inlined_at'"))
+    return true;
+
+  // [IACol]
+  if (getLexer().is(AsmToken::Integer)) {
+    IACol = getTok().getIntVal();
+    Lex();
+  }
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.cv_inline_site_id' directive"))
+    return true;
+
+  if (!getStreamer().EmitCVInlineSiteIdDirective(FunctionId, IAFunc, IAFile,
+                                                 IALine, IACol, FunctionIdLoc))
+    return Error(FunctionIdLoc, "function id already allocated");
+
   return false;
 }
 
@@ -3160,18 +3381,11 @@ bool AsmParser::parseDirectiveCVFile() {
 /// third number is a column position (zero if not specified).  The remaining
 /// optional items are .loc sub-directives.
 bool AsmParser::parseDirectiveCVLoc() {
+  SMLoc DirectiveLoc = getTok().getLoc();
   SMLoc Loc;
   int64_t FunctionId, FileNumber;
-  if (getTokenLoc(Loc) ||
-      parseIntToken(FunctionId, "unexpected token in '.cv_loc' directive") ||
-      check(FunctionId < 0, Loc,
-            "function id less than zero in '.cv_loc' directive") ||
-      getTokenLoc(Loc) ||
-      parseIntToken(FileNumber, "expected integer in '.cv_loc' directive") ||
-      check(FileNumber < 1, Loc,
-            "file number less than one in '.cv_loc' directive") ||
-      check(!getContext().isValidCVFileNumber(FileNumber), Loc,
-            "unassigned file number in '.cv_loc' directive"))
+  if (parseCVFunctionId(FunctionId, ".cv_loc") ||
+      parseCVFileId(FileNumber, ".cv_loc"))
     return true;
 
   int64_t LineNumber = 0;
@@ -3192,12 +3406,12 @@ bool AsmParser::parseDirectiveCVLoc() {
 
   bool PrologueEnd = false;
   uint64_t IsStmt = 0;
-  while (getLexer().isNot(AsmToken::EndOfStatement)) {
+
+  auto parseOp = [&]() -> bool {
     StringRef Name;
     SMLoc Loc = getTok().getLoc();
     if (parseIdentifier(Name))
       return TokError("unexpected token in '.cv_loc' directive");
-
     if (Name == "prologue_end")
       PrologueEnd = true;
     else if (Name == "is_stmt") {
@@ -3215,11 +3429,15 @@ bool AsmParser::parseDirectiveCVLoc() {
     } else {
       return Error(Loc, "unknown sub-directive in '.cv_loc' directive");
     }
-  }
-  Lex();
+    return false;
+  };
+
+  if (parseMany(parseOp, false /*hasComma*/))
+    return true;
 
   getStreamer().EmitCVLocDirective(FunctionId, FileNumber, LineNumber,
-                                   ColumnPos, PrologueEnd, IsStmt, StringRef());
+                                   ColumnPos, PrologueEnd, IsStmt, StringRef(),
+                                   DirectiveLoc);
   return false;
 }
 
@@ -3229,18 +3447,15 @@ bool AsmParser::parseDirectiveCVLinetable() {
   int64_t FunctionId;
   StringRef FnStartName, FnEndName;
   SMLoc Loc = getTok().getLoc();
-  if (parseIntToken(FunctionId,
-                    "expected Integer in '.cv_linetable' directive") ||
-      check(FunctionId < 0, Loc,
-            "function id less than zero in '.cv_linetable' directive") ||
+  if (parseCVFunctionId(FunctionId, ".cv_linetable") ||
       parseToken(AsmToken::Comma,
                  "unexpected token in '.cv_linetable' directive") ||
-      getTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
-                                "expected identifier in directive") ||
+      parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
+                                  "expected identifier in directive") ||
       parseToken(AsmToken::Comma,
                  "unexpected token in '.cv_linetable' directive") ||
-      getTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
-                                "expected identifier in directive"))
+      parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
+                                  "expected identifier in directive"))
     return true;
 
   MCSymbol *FnStartSym = getContext().getOrCreateSymbol(FnStartName);
@@ -3252,52 +3467,29 @@ bool AsmParser::parseDirectiveCVLinetable() {
 
 /// parseDirectiveCVInlineLinetable
 /// ::= .cv_inline_linetable PrimaryFunctionId FileId LineNum FnStart FnEnd
-///          ("contains" SecondaryFunctionId+)?
 bool AsmParser::parseDirectiveCVInlineLinetable() {
   int64_t PrimaryFunctionId, SourceFileId, SourceLineNum;
   StringRef FnStartName, FnEndName;
   SMLoc Loc = getTok().getLoc();
-  if (parseIntToken(
-          PrimaryFunctionId,
-          "expected PrimaryFunctionId in '.cv_inline_linetable' directive") ||
-      check(PrimaryFunctionId < 0, Loc,
-            "function id less than zero in '.cv_inline_linetable' directive") ||
-      getTokenLoc(Loc) ||
+  if (parseCVFunctionId(PrimaryFunctionId, ".cv_inline_linetable") ||
+      parseTokenLoc(Loc) ||
       parseIntToken(
           SourceFileId,
           "expected SourceField in '.cv_inline_linetable' directive") ||
       check(SourceFileId <= 0, Loc,
             "File id less than zero in '.cv_inline_linetable' directive") ||
-      getTokenLoc(Loc) ||
+      parseTokenLoc(Loc) ||
       parseIntToken(
           SourceLineNum,
           "expected SourceLineNum in '.cv_inline_linetable' directive") ||
       check(SourceLineNum < 0, Loc,
             "Line number less than zero in '.cv_inline_linetable' directive") ||
-      getTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
-                                "expected identifier in directive") ||
-      getTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
-                                "expected identifier in directive"))
+      parseTokenLoc(Loc) || check(parseIdentifier(FnStartName), Loc,
+                                  "expected identifier in directive") ||
+      parseTokenLoc(Loc) || check(parseIdentifier(FnEndName), Loc,
+                                  "expected identifier in directive"))
     return true;
 
-  SmallVector<unsigned, 8> SecondaryFunctionIds;
-  if (getLexer().is(AsmToken::Identifier)) {
-    if (getTok().getIdentifier() != "contains")
-      return TokError(
-          "unexpected identifier in '.cv_inline_linetable' directive");
-    Lex();
-
-    while (getLexer().isNot(AsmToken::EndOfStatement)) {
-      int64_t SecondaryFunctionId = getTok().getIntVal();
-      if (SecondaryFunctionId < 0)
-        return TokError(
-            "function id less than zero in '.cv_inline_linetable' directive");
-      Lex();
-
-      SecondaryFunctionIds.push_back(SecondaryFunctionId);
-    }
-  }
-
   if (parseToken(AsmToken::EndOfStatement, "Expected End of Statement"))
     return true;
 
@@ -3305,7 +3497,7 @@ bool AsmParser::parseDirectiveCVInlineLinetable() {
   MCSymbol *FnEndSym = getContext().getOrCreateSymbol(FnEndName);
   getStreamer().EmitCVInlineLinetableDirective(PrimaryFunctionId, SourceFileId,
                                                SourceLineNum, FnStartSym,
-                                               FnEndSym, SecondaryFunctionIds);
+                                               FnEndSym);
   return false;
 }
 
@@ -3388,12 +3580,12 @@ bool AsmParser::parseDirectiveCFISections() {
 /// ::= .cfi_startproc [simple]
 bool AsmParser::parseDirectiveCFIStartProc() {
   StringRef Simple;
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    if (parseIdentifier(Simple) || Simple != "simple")
-      return TokError("unexpected token in .cfi_startproc directive");
-
-  if (parseToken(AsmToken::EndOfStatement, "Expected end of statement"))
-    return true;
+  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
+    if (check(parseIdentifier(Simple) || Simple != "simple",
+              "unexpected token") ||
+        parseToken(AsmToken::EndOfStatement))
+      return addErrorSuffix(" in '.cfi_startproc' directive");
+  }
 
   getStreamer().EmitCFIStartProc(!Simple.empty());
   return false;
@@ -3728,7 +3920,7 @@ bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) {
   AsmToken EndToken, StartToken = getTok();
   unsigned MacroDepth = 0;
   // Lex the macro definition.
-  for (;;) {
+  while (true) {
     // Ignore Lexing errors in macros.
     while (Lexer.is(AsmToken::Error)) {
       Lexer.Lex();
@@ -3924,14 +4116,16 @@ bool AsmParser::parseDirectiveEndMacro(StringRef Directive) {
 bool AsmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
   StringRef Name;
   SMLoc Loc;
-  if (getTokenLoc(Loc) || check(parseIdentifier(Name), Loc,
-                                "expected identifier in '.purgem' directive") ||
+  if (parseTokenLoc(Loc) ||
+      check(parseIdentifier(Name), Loc,
+            "expected identifier in '.purgem' directive") ||
       parseToken(AsmToken::EndOfStatement,
-                 "unexpected token in '.purgem' directive") ||
-      check(!lookupMacro(Name), DirectiveLoc,
-            "macro '" + Name + "' is not defined"))
+                 "unexpected token in '.purgem' directive"))
     return true;
 
+  if (!lookupMacro(Name))
+    return Error(DirectiveLoc, "macro '" + Name + "' is not defined");
+
   undefineMacro(Name);
   return false;
 }
@@ -3939,13 +4133,11 @@ bool AsmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) {
 /// parseDirectiveBundleAlignMode
 /// ::= {.bundle_align_mode} expression
 bool AsmParser::parseDirectiveBundleAlignMode() {
-  checkForValidSection();
-
   // Expect a single argument: an expression that evaluates to a constant
   // in the inclusive range 0-30.
   SMLoc ExprLoc = getLexer().getLoc();
   int64_t AlignSizePow2;
-  if (parseAbsoluteExpression(AlignSizePow2) ||
+  if (checkForValidSection() || parseAbsoluteExpression(AlignSizePow2) ||
       parseToken(AsmToken::EndOfStatement, "unexpected token after expression "
                                            "in '.bundle_align_mode' "
                                            "directive") ||
@@ -3962,25 +4154,24 @@ bool AsmParser::parseDirectiveBundleAlignMode() {
 /// parseDirectiveBundleLock
 /// ::= {.bundle_lock} [align_to_end]
 bool AsmParser::parseDirectiveBundleLock() {
-  checkForValidSection();
+  if (checkForValidSection())
+    return true;
   bool AlignToEnd = false;
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    StringRef Option;
-    SMLoc Loc = getTok().getLoc();
-    const char *kInvalidOptionError =
-        "invalid option for '.bundle_lock' directive";
+  StringRef Option;
+  SMLoc Loc = getTok().getLoc();
+  const char *kInvalidOptionError =
+      "invalid option for '.bundle_lock' directive";
 
+  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
     if (check(parseIdentifier(Option), Loc, kInvalidOptionError) ||
         check(Option != "align_to_end", Loc, kInvalidOptionError) ||
-        check(getTok().isNot(AsmToken::EndOfStatement), Loc,
-              "unexpected token after '.bundle_lock' directive option"))
+        parseToken(AsmToken::EndOfStatement,
+                   "unexpected token after '.bundle_lock' directive option"))
       return true;
     AlignToEnd = true;
   }
 
-  Lex();
-
   getStreamer().EmitBundleLock(AlignToEnd);
   return false;
 }
@@ -3988,9 +4179,8 @@ bool AsmParser::parseDirectiveBundleLock() {
 /// parseDirectiveBundleLock
 /// ::= {.bundle_lock}
 bool AsmParser::parseDirectiveBundleUnlock() {
-  checkForValidSection();
-
-  if (parseToken(AsmToken::EndOfStatement,
+  if (checkForValidSection() ||
+      parseToken(AsmToken::EndOfStatement,
                  "unexpected token in '.bundle_unlock' directive"))
     return true;
 
@@ -4001,28 +4191,119 @@ bool AsmParser::parseDirectiveBundleUnlock() {
 /// parseDirectiveSpace
 /// ::= (.skip | .space) expression [ , expression ]
 bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
-  checkForValidSection();
 
   SMLoc NumBytesLoc = Lexer.getLoc();
   const MCExpr *NumBytes;
-  if (parseExpression(NumBytes))
+  if (checkForValidSection() || parseExpression(NumBytes))
     return true;
 
   int64_t FillExpr = 0;
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
+  if (parseOptionalToken(AsmToken::Comma))
+    if (parseAbsoluteExpression(FillExpr))
+      return addErrorSuffix("in '" + Twine(IDVal) + "' directive");
+  if (parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix("in '" + Twine(IDVal) + "' directive");
 
-    if (parseToken(AsmToken::Comma,
-                   "unexpected token in '" + Twine(IDVal) + "' directive") ||
-        parseAbsoluteExpression(FillExpr))
-      return true;
+  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
+  getStreamer().emitFill(*NumBytes, FillExpr, NumBytesLoc);
+
+  return false;
+}
+
+/// parseDirectiveDCB
+/// ::= .dcb.{b, l, w} expression, expression
+bool AsmParser::parseDirectiveDCB(StringRef IDVal, unsigned Size) {
+  SMLoc NumValuesLoc = Lexer.getLoc();
+  int64_t NumValues;
+  if (checkForValidSection() || parseAbsoluteExpression(NumValues))
+    return true;
+
+  if (NumValues < 0) {
+    Warning(NumValuesLoc, "'" + Twine(IDVal) + "' directive with negative repeat count has no effect");
+    return false;
+  }
+
+  if (parseToken(AsmToken::Comma,
+                 "unexpected token in '" + Twine(IDVal) + "' directive"))
+    return true;
+
+  const MCExpr *Value;
+  SMLoc ExprLoc = getLexer().getLoc();
+  if (parseExpression(Value))
+    return true;
+
+  // Special case constant expressions to match code generator.
+  if (const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value)) {
+    assert(Size <= 8 && "Invalid size");
+    uint64_t IntValue = MCE->getValue();
+    if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+      return Error(ExprLoc, "literal value out of range for directive");
+    for (uint64_t i = 0, e = NumValues; i != e; ++i)
+      getStreamer().EmitIntValue(IntValue, Size);
+  } else {
+    for (uint64_t i = 0, e = NumValues; i != e; ++i)
+      getStreamer().EmitValue(Value, Size, ExprLoc);
   }
 
   if (parseToken(AsmToken::EndOfStatement,
                  "unexpected token in '" + Twine(IDVal) + "' directive"))
     return true;
 
-  // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0.
-  getStreamer().emitFill(*NumBytes, FillExpr, NumBytesLoc);
+  return false;
+}
+
+/// parseDirectiveRealDCB
+/// ::= .dcb.{d, s} expression, expression
+bool AsmParser::parseDirectiveRealDCB(StringRef IDVal, const fltSemantics &Semantics) {
+  SMLoc NumValuesLoc = Lexer.getLoc();
+  int64_t NumValues;
+  if (checkForValidSection() || parseAbsoluteExpression(NumValues))
+    return true;
+
+  if (NumValues < 0) {
+    Warning(NumValuesLoc, "'" + Twine(IDVal) + "' directive with negative repeat count has no effect");
+    return false;
+  }
+
+  if (parseToken(AsmToken::Comma,
+                 "unexpected token in '" + Twine(IDVal) + "' directive"))
+    return true;
+
+  APInt AsInt;
+  if (parseRealValue(Semantics, AsInt))
+    return true;
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '" + Twine(IDVal) + "' directive"))
+    return true;
+
+  for (uint64_t i = 0, e = NumValues; i != e; ++i)
+    getStreamer().EmitIntValue(AsInt.getLimitedValue(),
+                               AsInt.getBitWidth() / 8);
+
+  return false;
+}
+
+/// parseDirectiveDS
+/// ::= .ds.{b, d, l, p, s, w, x} expression
+bool AsmParser::parseDirectiveDS(StringRef IDVal, unsigned Size) {
+
+  SMLoc NumValuesLoc = Lexer.getLoc();
+  int64_t NumValues;
+  if (checkForValidSection() || parseAbsoluteExpression(NumValues))
+    return true;
+
+  if (NumValues < 0) {
+    Warning(NumValuesLoc, "'" + Twine(IDVal) + "' directive with negative repeat count has no effect");
+    return false;
+  }
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '" + Twine(IDVal) + "' directive"))
+    return true;
+
+  for (uint64_t i = 0, e = NumValues; i != e; ++i)
+    getStreamer().emitFill(Size, 0);
 
   return false;
 }
@@ -4030,25 +4311,22 @@ bool AsmParser::parseDirectiveSpace(StringRef IDVal) {
 /// parseDirectiveLEB128
 /// ::= (.sleb128 | .uleb128) [ expression (, expression)* ]
 bool AsmParser::parseDirectiveLEB128(bool Signed) {
-  checkForValidSection();
-  const MCExpr *Value;
+  if (checkForValidSection())
+    return true;
 
-  for (;;) {
+  auto parseOp = [&]() -> bool {
+    const MCExpr *Value;
     if (parseExpression(Value))
       return true;
-
     if (Signed)
       getStreamer().EmitSLEB128Value(Value);
     else
       getStreamer().EmitULEB128Value(Value);
+    return false;
+  };
 
-    if (getLexer().is(AsmToken::EndOfStatement))
-      break;
-
-    if (parseToken(AsmToken::Comma, "unexpected token in directive"))
-      return true;
-  }
-  Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in directive");
 
   return false;
 }
@@ -4056,39 +4334,32 @@ bool AsmParser::parseDirectiveLEB128(bool Signed) {
 /// parseDirectiveSymbolAttribute
 ///  ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ]
 bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) {
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      StringRef Name;
-      SMLoc Loc = getTok().getLoc();
-
-      if (parseIdentifier(Name))
-        return Error(Loc, "expected identifier in directive");
-
-      MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
-
-      // Assembler local symbols don't make any sense here. Complain loudly.
-      if (Sym->isTemporary())
-        return Error(Loc, "non-local symbol required in directive");
-
-      if (!getStreamer().EmitSymbolAttribute(Sym, Attr))
-        return Error(Loc, "unable to emit symbol attribute");
+  auto parseOp = [&]() -> bool {
+    StringRef Name;
+    SMLoc Loc = getTok().getLoc();
+    if (parseIdentifier(Name))
+      return Error(Loc, "expected identifier");
+    MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
 
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
+    // Assembler local symbols don't make any sense here. Complain loudly.
+    if (Sym->isTemporary())
+      return Error(Loc, "non-local symbol required");
 
-      if (parseToken(AsmToken::Comma, "unexpected token in directive"))
-        return true;
-    }
-  }
+    if (!getStreamer().EmitSymbolAttribute(Sym, Attr))
+      return Error(Loc, "unable to emit symbol attribute");
+    return false;
+  };
 
-  Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in directive");
   return false;
 }
 
 /// parseDirectiveComm
 ///  ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ]
 bool AsmParser::parseDirectiveComm(bool IsLocal) {
-  checkForValidSection();
+  if (checkForValidSection())
+    return true;
 
   SMLoc IDLoc = getLexer().getLoc();
   StringRef Name;
@@ -4128,10 +4399,9 @@ bool AsmParser::parseDirectiveComm(bool IsLocal) {
     }
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '.comm' or '.lcomm' directive");
-
-  Lex();
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.comm' or '.lcomm' directive"))
+    return true;
 
   // NOTE: a size of zero for a .comm should create a undefined symbol
   // but a size of .lcomm creates a bss symbol of size zero.
@@ -4171,9 +4441,9 @@ bool AsmParser::parseDirectiveAbort() {
     return true;
 
   if (Str.empty())
-    Error(Loc, ".abort detected. Assembly stopping.");
+    return Error(Loc, ".abort detected. Assembly stopping.");
   else
-    Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
+    return Error(Loc, ".abort '" + Str + "' detected. Assembly stopping.");
   // FIXME: Actually abort assembly here.
 
   return false;
@@ -4201,20 +4471,43 @@ bool AsmParser::parseDirectiveInclude() {
 }
 
 /// parseDirectiveIncbin
-///  ::= .incbin "filename"
+///  ::= .incbin "filename" [ , skip [ , count ] ]
 bool AsmParser::parseDirectiveIncbin() {
   // Allow the strings to have escaped octal character sequence.
   std::string Filename;
   SMLoc IncbinLoc = getTok().getLoc();
   if (check(getTok().isNot(AsmToken::String),
             "expected string in '.incbin' directive") ||
-      parseEscapedString(Filename) ||
-      parseToken(AsmToken::EndOfStatement,
-                 "unexpected token in '.incbin' directive") ||
-      // Attempt to process the included file.
-      check(processIncbinFile(Filename), IncbinLoc,
-            "Could not find incbin file '" + Filename + "'"))
+      parseEscapedString(Filename))
+    return true;
+
+  int64_t Skip = 0;
+  const MCExpr *Count = nullptr;
+  SMLoc SkipLoc, CountLoc;
+  if (parseOptionalToken(AsmToken::Comma)) {
+    // The skip expression can be omitted while specifying the count, e.g:
+    //  .incbin "filename",,4
+    if (getTok().isNot(AsmToken::Comma)) {
+      if (parseTokenLoc(SkipLoc) || parseAbsoluteExpression(Skip))
+        return true;
+    }
+    if (parseOptionalToken(AsmToken::Comma)) {
+      CountLoc = getTok().getLoc();
+      if (parseExpression(Count))
+        return true;
+    }
+  }
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.incbin' directive"))
+    return true;
+
+  if (check(Skip < 0, SkipLoc, "skip is negative"))
     return true;
+
+  // Attempt to process the included file.
+  if (processIncbinFile(Filename, Skip, Count, CountLoc))
+    return Error(IncbinLoc, "Could not find incbin file '" + Filename + "'");
   return false;
 }
 
@@ -4317,11 +4610,8 @@ bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) {
 bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual) {
   if (Lexer.isNot(AsmToken::String)) {
     if (ExpectEqual)
-      TokError("expected string parameter for '.ifeqs' directive");
-    else
-      TokError("expected string parameter for '.ifnes' directive");
-    eatToEndOfStatement();
-    return true;
+      return TokError("expected string parameter for '.ifeqs' directive");
+    return TokError("expected string parameter for '.ifnes' directive");
   }
 
   StringRef String1 = getTok().getStringContents();
@@ -4329,22 +4619,17 @@ bool AsmParser::parseDirectiveIfeqs(SMLoc DirectiveLoc, bool ExpectEqual) {
 
   if (Lexer.isNot(AsmToken::Comma)) {
     if (ExpectEqual)
-      TokError("expected comma after first string for '.ifeqs' directive");
-    else
-      TokError("expected comma after first string for '.ifnes' directive");
-    eatToEndOfStatement();
-    return true;
+      return TokError(
+          "expected comma after first string for '.ifeqs' directive");
+    return TokError("expected comma after first string for '.ifnes' directive");
   }
 
   Lex();
 
   if (Lexer.isNot(AsmToken::String)) {
     if (ExpectEqual)
-      TokError("expected string parameter for '.ifeqs' directive");
-    else
-      TokError("expected string parameter for '.ifnes' directive");
-    eatToEndOfStatement();
-    return true;
+      return TokError("expected string parameter for '.ifeqs' directive");
+    return TokError("expected string parameter for '.ifnes' directive");
   }
 
   StringRef String2 = getTok().getStringContents();
@@ -4389,8 +4674,8 @@ bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) {
 bool AsmParser::parseDirectiveElseIf(SMLoc DirectiveLoc) {
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-    Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or "
-                        " an .elseif");
+    return Error(DirectiveLoc, "Encountered a .elseif that doesn't follow an"
+                               " .if or  an .elseif");
   TheCondState.TheCond = AsmCond::ElseIfCond;
 
   bool LastIgnoreState = false;
@@ -4404,10 +4689,10 @@ bool AsmParser::parseDirectiveElseIf(SMLoc DirectiveLoc) {
     if (parseAbsoluteExpression(ExprValue))
       return true;
 
-    if (getLexer().isNot(AsmToken::EndOfStatement))
-      return TokError("unexpected token in '.elseif' directive");
+    if (parseToken(AsmToken::EndOfStatement,
+                   "unexpected token in '.elseif' directive"))
+      return true;
 
-    Lex();
     TheCondState.CondMet = ExprValue;
     TheCondState.Ignore = !TheCondState.CondMet;
   }
@@ -4424,8 +4709,8 @@ bool AsmParser::parseDirectiveElse(SMLoc DirectiveLoc) {
 
   if (TheCondState.TheCond != AsmCond::IfCond &&
       TheCondState.TheCond != AsmCond::ElseIfCond)
-    Error(DirectiveLoc, "Encountered a .else that doesn't follow a .if or an "
-                        ".elseif");
+    return Error(DirectiveLoc, "Encountered a .else that doesn't follow "
+                               " an .if or an .elseif");
   TheCondState.TheCond = AsmCond::ElseCond;
   bool LastIgnoreState = false;
   if (!TheCondStack.empty())
@@ -4446,7 +4731,7 @@ bool AsmParser::parseDirectiveEnd(SMLoc DirectiveLoc) {
     return true;
 
   while (Lexer.isNot(AsmToken::Eof))
-    Lex();
+    Lexer.Lex();
 
   return false;
 }
@@ -4467,18 +4752,14 @@ bool AsmParser::parseDirectiveError(SMLoc L, bool WithMessage) {
 
   StringRef Message = ".error directive invoked in source file";
   if (Lexer.isNot(AsmToken::EndOfStatement)) {
-    if (Lexer.isNot(AsmToken::String)) {
-      TokError(".error argument must be a string");
-      eatToEndOfStatement();
-      return true;
-    }
+    if (Lexer.isNot(AsmToken::String))
+      return TokError(".error argument must be a string");
 
     Message = getTok().getStringContents();
     Lex();
   }
 
-  Error(L, Message);
-  return true;
+  return Error(L, Message);
 }
 
 /// parseDirectiveWarning
@@ -4492,19 +4773,19 @@ bool AsmParser::parseDirectiveWarning(SMLoc L) {
   }
 
   StringRef Message = ".warning directive invoked in source file";
-  if (Lexer.isNot(AsmToken::EndOfStatement)) {
-    if (Lexer.isNot(AsmToken::String)) {
-      TokError(".warning argument must be a string");
-      eatToEndOfStatement();
-      return true;
-    }
+
+  if (!parseOptionalToken(AsmToken::EndOfStatement)) {
+    if (Lexer.isNot(AsmToken::String))
+      return TokError(".warning argument must be a string");
 
     Message = getTok().getStringContents();
     Lex();
+    if (parseToken(AsmToken::EndOfStatement,
+                   "expected end of statement in '.warning' directive"))
+      return true;
   }
 
-  Warning(L, Message);
-  return false;
+  return Warning(L, Message);
 }
 
 /// parseDirectiveEndIf
@@ -4515,8 +4796,8 @@ bool AsmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) {
     return true;
 
   if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty())
-    Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or "
-                        ".else");
+    return Error(DirectiveLoc, "Encountered a .endif that doesn't follow "
+                               "an .if or .else");
   if (!TheCondStack.empty()) {
     TheCondState = TheCondStack.back();
     TheCondStack.pop_back();
@@ -4610,9 +4891,11 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".loc"] = DK_LOC;
   DirectiveKindMap[".stabs"] = DK_STABS;
   DirectiveKindMap[".cv_file"] = DK_CV_FILE;
+  DirectiveKindMap[".cv_func_id"] = DK_CV_FUNC_ID;
   DirectiveKindMap[".cv_loc"] = DK_CV_LOC;
   DirectiveKindMap[".cv_linetable"] = DK_CV_LINETABLE;
   DirectiveKindMap[".cv_inline_linetable"] = DK_CV_INLINE_LINETABLE;
+  DirectiveKindMap[".cv_inline_site_id"] = DK_CV_INLINE_SITE_ID;
   DirectiveKindMap[".cv_def_range"] = DK_CV_DEF_RANGE;
   DirectiveKindMap[".cv_stringtable"] = DK_CV_STRINGTABLE;
   DirectiveKindMap[".cv_filechecksums"] = DK_CV_FILECHECKSUMS;
@@ -4649,16 +4932,39 @@ void AsmParser::initializeDirectiveKindMap() {
   DirectiveKindMap[".error"] = DK_ERROR;
   DirectiveKindMap[".warning"] = DK_WARNING;
   DirectiveKindMap[".reloc"] = DK_RELOC;
+  DirectiveKindMap[".dc"] = DK_DC;
+  DirectiveKindMap[".dc.a"] = DK_DC_A;
+  DirectiveKindMap[".dc.b"] = DK_DC_B;
+  DirectiveKindMap[".dc.d"] = DK_DC_D;
+  DirectiveKindMap[".dc.l"] = DK_DC_L;
+  DirectiveKindMap[".dc.s"] = DK_DC_S;
+  DirectiveKindMap[".dc.w"] = DK_DC_W;
+  DirectiveKindMap[".dc.x"] = DK_DC_X;
+  DirectiveKindMap[".dcb"] = DK_DCB;
+  DirectiveKindMap[".dcb.b"] = DK_DCB_B;
+  DirectiveKindMap[".dcb.d"] = DK_DCB_D;
+  DirectiveKindMap[".dcb.l"] = DK_DCB_L;
+  DirectiveKindMap[".dcb.s"] = DK_DCB_S;
+  DirectiveKindMap[".dcb.w"] = DK_DCB_W;
+  DirectiveKindMap[".dcb.x"] = DK_DCB_X;
+  DirectiveKindMap[".ds"] = DK_DS;
+  DirectiveKindMap[".ds.b"] = DK_DS_B;
+  DirectiveKindMap[".ds.d"] = DK_DS_D;
+  DirectiveKindMap[".ds.l"] = DK_DS_L;
+  DirectiveKindMap[".ds.p"] = DK_DS_P;
+  DirectiveKindMap[".ds.s"] = DK_DS_S;
+  DirectiveKindMap[".ds.w"] = DK_DS_W;
+  DirectiveKindMap[".ds.x"] = DK_DS_X;
 }
 
 MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
   AsmToken EndToken, StartToken = getTok();
 
   unsigned NestLevel = 0;
-  for (;;) {
+  while (true) {
     // Check whether we have reached the end of the file.
     if (getLexer().is(AsmToken::Eof)) {
-      Error(DirectiveLoc, "no matching '.endr' in definition");
+      printError(DirectiveLoc, "no matching '.endr' in definition");
       return nullptr;
     }
 
@@ -4675,7 +4981,8 @@ MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) {
         EndToken = getTok();
         Lex();
         if (Lexer.isNot(AsmToken::EndOfStatement)) {
-          TokError("unexpected token in '.endr' directive");
+          printError(getTok().getLoc(),
+                     "unexpected token in '.endr' directive");
           return nullptr;
         }
         break;
@@ -4725,7 +5032,6 @@ bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc, StringRef Dir) {
 
   int64_t Count;
   if (!CountExpr->evaluateAsAbsolute(Count)) {
-    eatToEndOfStatement();
     return Error(CountLoc, "unexpected token in '" + Dir + "' directive");
   }
 
@@ -4928,11 +5234,16 @@ bool AsmParser::parseMSInlineAsm(
       continue;
 
     ParseStatementInfo Info(&AsmStrRewrites);
-    if (parseStatement(Info, &SI))
-      return true;
+    bool StatementErr = parseStatement(Info, &SI);
 
-    if (Info.ParseError)
+    if (StatementErr || Info.ParseError) {
+      // Emit pending errors if any exist.
+      printPendingErrors();
       return true;
+    }
+
+    // No pending error should exist here.
+    assert(!hasPendingError() && "unexpected error from parseStatement");
 
     if (Info.Opcode == ~0U)
       continue;
@@ -5158,20 +5469,15 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
   SMLoc EqualLoc = Parser.getTok().getLoc();
 
   if (Parser.parseExpression(Value)) {
-    Parser.TokError("missing expression");
-    Parser.eatToEndOfStatement();
-    return true;
+    return Parser.TokError("missing expression");
   }
 
   // Note: we don't count b as used in "a = b". This is to allow
   // a = b
   // b = c
 
-  if (Parser.getTok().isNot(AsmToken::EndOfStatement))
-    return Parser.TokError("unexpected token in assignment");
-
-  // Eat the end of statement marker.
-  Parser.Lex();
+  if (Parser.parseToken(AsmToken::EndOfStatement))
+    return true;
 
   // Validate that the LHS is allowed to be a variable (either it has not been
   // used as a symbol, or it is an absolute symbol).
@@ -5197,7 +5503,7 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
                           "invalid reassignment of non-absolute variable '" +
                               Name + "'");
   } else if (Name == ".") {
-    Parser.getStreamer().emitValueToOffset(Value, 0);
+    Parser.getStreamer().emitValueToOffset(Value, 0, EqualLoc);
     return false;
   } else
     Sym = Parser.getContext().getOrCreateSymbol(Name);
@@ -5207,8 +5513,8 @@ bool parseAssignmentExpression(StringRef Name, bool allow_redef,
   return false;
 }
 
-} // namespace MCParserUtils
-} // namespace llvm
+} // end namespace MCParserUtils
+} // end namespace llvm
 
 /// \brief Create an MCAsmParser instance.
 MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C,
diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
index 653627ad8dca..f4114795a92d 100644
--- a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp
@@ -41,7 +41,8 @@ class COFFAsmParser : public MCAsmParserExtension {
                           COFF::COMDATType Type);
 
   bool ParseSectionName(StringRef &SectionName);
-  bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags);
+  bool ParseSectionFlags(StringRef SectionName, StringRef FlagsString,
+                         unsigned *Flags);
 
   void Initialize(MCAsmParser &Parser) override {
     // Call the base implementation.
@@ -155,17 +156,19 @@ static SectionKind computeSectionKind(unsigned Flags) {
   return SectionKind::getData();
 }
 
-bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) {
+bool COFFAsmParser::ParseSectionFlags(StringRef SectionName,
+                                      StringRef FlagsString, unsigned *Flags) {
   enum {
-    None      = 0,
-    Alloc     = 1 << 0,
-    Code      = 1 << 1,
-    Load      = 1 << 2,
-    InitData  = 1 << 3,
-    Shared    = 1 << 4,
-    NoLoad    = 1 << 5,
-    NoRead    = 1 << 6,
-    NoWrite  =  1 << 7
+    None        = 0,
+    Alloc       = 1 << 0,
+    Code        = 1 << 1,
+    Load        = 1 << 2,
+    InitData    = 1 << 3,
+    Shared      = 1 << 4,
+    NoLoad      = 1 << 5,
+    NoRead      = 1 << 6,
+    NoWrite     = 1 << 7,
+    Discardable = 1 << 8,
   };
 
   bool ReadOnlyRemoved = false;
@@ -198,6 +201,10 @@ bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) {
       SecFlags &= ~Load;
       break;
 
+    case 'D': // discardable
+      SecFlags |= Discardable;
+      break;
+
     case 'r': // read-only
       ReadOnlyRemoved = false;
       SecFlags |= NoWrite;
@@ -249,6 +256,9 @@ bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) {
     *Flags |= COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA;
   if (SecFlags & NoLoad)
     *Flags |= COFF::IMAGE_SCN_LNK_REMOVE;
+  if ((SecFlags & Discardable) ||
+      MCSectionCOFF::isImplicitlyDiscardable(SectionName))
+    *Flags |= COFF::IMAGE_SCN_MEM_DISCARDABLE;
   if ((SecFlags & NoRead) == 0)
     *Flags |= COFF::IMAGE_SCN_MEM_READ;
   if ((SecFlags & NoWrite) == 0)
@@ -326,7 +336,8 @@ bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
 //   a: Ignored.
 //   b: BSS section (uninitialized data)
 //   d: data section (initialized data)
-//   n: Discardable section
+//   n: "noload" section (removed by linker)
+//   D: Discardable section
 //   r: Readable section
 //   s: Shared section
 //   w: Writable section
@@ -353,7 +364,7 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) {
     StringRef FlagsStr = getTok().getStringContents();
     Lex();
 
-    if (ParseSectionFlags(FlagsStr, &Flags))
+    if (ParseSectionFlags(SectionName, FlagsStr, &Flags))
       return true;
   }
 
@@ -444,13 +455,26 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) {
   if (getParser().parseIdentifier(SymbolID))
     return TokError("expected identifier in directive");
 
+  int64_t Offset = 0;
+  SMLoc OffsetLoc;
+  if (getLexer().is(AsmToken::Plus)) {
+    OffsetLoc = getLexer().getLoc();
+    if (getParser().parseAbsoluteExpression(Offset))
+      return true;
+  }
+
   if (getLexer().isNot(AsmToken::EndOfStatement))
     return TokError("unexpected token in directive");
 
+  if (Offset < 0 || Offset > UINT32_MAX)
+    return Error(OffsetLoc,
+                 "invalid '.secrel32' directive offset, can't be less "
+                 "than zero or greater than UINT32_MAX");
+
   MCSymbol *Symbol = getContext().getOrCreateSymbol(SymbolID);
 
   Lex();
-  getStreamer().EmitCOFFSecRel32(Symbol);
+  getStreamer().EmitCOFFSecRel32(Symbol, Offset);
   return false;
 }
 
@@ -514,8 +538,8 @@ bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) {
     if (parseCOMDATType(Type))
       return true;
 
-  const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>(
-                                       getStreamer().getCurrentSection().first);
+  const MCSectionCOFF *Current =
+      static_cast<const MCSectionCOFF *>(getStreamer().getCurrentSectionOnly());
 
   if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE)
     return Error(Loc, "cannot make section associative with .linkonce");
diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
index 37515d9c074d..94aa70ef0326 100644
--- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp
@@ -37,7 +37,7 @@ class DarwinAsmParser : public MCAsmParserExtension {
     getParser().addDirectiveHandler(Directive, Handler);
   }
 
-  bool parseSectionSwitch(const char *Segment, const char *Section,
+  bool parseSectionSwitch(StringRef Segment, StringRef Section,
                           unsigned TAA = 0, unsigned ImplicitAlign = 0,
                           unsigned StubSize = 0);
 
@@ -389,8 +389,7 @@ public:
 
 } // end anonymous namespace
 
-bool DarwinAsmParser::parseSectionSwitch(const char *Segment,
-                                         const char *Section,
+bool DarwinAsmParser::parseSectionSwitch(StringRef Segment, StringRef Section,
                                          unsigned TAA, unsigned Align,
                                          unsigned StubSize) {
   if (getLexer().isNot(AsmToken::EndOfStatement))
@@ -469,8 +468,8 @@ bool DarwinAsmParser::parseDirectiveDesc(StringRef, SMLoc) {
 /// parseDirectiveIndirectSymbol
 ///  ::= .indirect_symbol identifier
 bool DarwinAsmParser::parseDirectiveIndirectSymbol(StringRef, SMLoc Loc) {
-  const MCSectionMachO *Current = static_cast<const MCSectionMachO*>(
-                                       getStreamer().getCurrentSection().first);
+  const MCSectionMachO *Current = static_cast<const MCSectionMachO *>(
+      getStreamer().getCurrentSectionOnly());
   MachO::SectionType SectionType = Current->getType();
   if (SectionType != MachO::S_NON_LAZY_SYMBOL_POINTERS &&
       SectionType != MachO::S_LAZY_SYMBOL_POINTERS &&
@@ -615,7 +614,7 @@ bool DarwinAsmParser::parseDirectiveSection(StringRef, SMLoc) {
                                           TAA, TAAParsed, StubSize);
 
   if (!ErrorStr.empty())
-    return Error(Loc, ErrorStr.c_str());
+    return Error(Loc, ErrorStr);
 
   // Issue a warning if the target is not powerpc and Section is a *coal* section.
   Triple TT = getParser().getContext().getObjectFileInfo()->getTargetTriple();
@@ -700,7 +699,7 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) {
   if (!OS) {
     std::error_code EC;
     auto NewOS = llvm::make_unique<raw_fd_ostream>(
-        SecureLogFile, EC, sys::fs::F_Append | sys::fs::F_Text);
+        StringRef(SecureLogFile), EC, sys::fs::F_Append | sys::fs::F_Text);
     if (EC)
        return Error(IDLoc, Twine("can't open secure log file: ") +
                                SecureLogFile + " (" + EC.message() + ")");
diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 47d19a824d19..8d7ba0d03362 100644
--- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -264,6 +264,10 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) {
 static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
   unsigned flags = 0;
 
+  // If a valid numerical value is set for the section flag, use it verbatim
+  if (!flagsStr.getAsInteger(0, flags))
+    return flags;
+
   for (char i : flagsStr) {
     switch (i) {
     case 'a':
@@ -293,6 +297,9 @@ static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) {
     case 'd':
       flags |= ELF::XCORE_SHF_DP_SECTION;
       break;
+    case 'y':
+      flags |= ELF::SHF_ARM_PURECODE;
+      break;
     case 'G':
       flags |= ELF::SHF_GROUP;
       break;
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
index d95cd12accbc..63c0daba09a0 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmLexer.cpp
@@ -12,7 +12,9 @@
 
 using namespace llvm;
 
-MCAsmLexer::MCAsmLexer() : TokStart(nullptr), SkipSpace(true) {
+MCAsmLexer::MCAsmLexer()
+    : TokStart(nullptr), SkipSpace(true), IsAtStartOfStatement(true),
+      CommentConsumer(nullptr) {
   CurTok.emplace_back(AsmToken::Space, StringRef());
 }
 
diff --git a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
index dc7a3f00840f..98f4daf972d6 100644
--- a/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
+++ b/contrib/llvm/lib/MC/MCParser/MCAsmParser.cpp
@@ -17,8 +17,9 @@
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
-MCAsmParser::MCAsmParser() : TargetParser(nullptr), ShowParsedOperands(0) {
-}
+MCAsmParser::MCAsmParser()
+    : TargetParser(nullptr), ShowParsedOperands(0), HadError(false),
+      PendingErrors() {}
 
 MCAsmParser::~MCAsmParser() {
 }
@@ -33,18 +34,109 @@ const AsmToken &MCAsmParser::getTok() const {
   return getLexer().getTok();
 }
 
-bool MCAsmParser::TokError(const Twine &Msg, ArrayRef<SMRange> Ranges) {
-  Error(getLexer().getLoc(), Msg, Ranges);
+bool MCAsmParser::parseTokenLoc(SMLoc &Loc) {
+  Loc = getTok().getLoc();
+  return false;
+}
+
+bool MCAsmParser::parseEOL(const Twine &Msg) {
+  if (getTok().getKind() == AsmToken::Hash) {
+    StringRef CommentStr = parseStringToEndOfStatement();
+    getLexer().Lex();
+    getLexer().UnLex(AsmToken(AsmToken::EndOfStatement, CommentStr));
+  }
+  if (getTok().getKind() != AsmToken::EndOfStatement)
+    return Error(getTok().getLoc(), Msg);
+  Lex();
+  return false;
+}
+
+bool MCAsmParser::parseToken(AsmToken::TokenKind T, const Twine &Msg) {
+  if (T == AsmToken::EndOfStatement)
+    return parseEOL(Msg);
+  if (getTok().getKind() != T)
+    return Error(getTok().getLoc(), Msg);
+  Lex();
+  return false;
+}
+
+bool MCAsmParser::parseIntToken(int64_t &V, const Twine &Msg) {
+  if (getTok().getKind() != AsmToken::Integer)
+    return TokError(Msg);
+  V = getTok().getIntVal();
+  Lex();
+  return false;
+}
+
+bool MCAsmParser::parseOptionalToken(AsmToken::TokenKind T) {
+  bool Present = (getTok().getKind() == T);
+  // if token is EOL and current token is # this is an EOL comment.
+  if (getTok().getKind() == AsmToken::Hash && T == AsmToken::EndOfStatement)
+    Present = true;
+  if (Present)
+    parseToken(T);
+  return Present;
+}
+
+bool MCAsmParser::check(bool P, const Twine &Msg) {
+  return check(P, getTok().getLoc(), Msg);
+}
+
+bool MCAsmParser::check(bool P, SMLoc Loc, const Twine &Msg) {
+  if (P)
+    return Error(Loc, Msg);
+  return false;
+}
+
+bool MCAsmParser::TokError(const Twine &Msg, SMRange Range) {
+  return Error(getLexer().getLoc(), Msg, Range);
+}
+
+bool MCAsmParser::Error(SMLoc L, const Twine &Msg, SMRange Range) {
+  HadError = true;
+
+  MCPendingError PErr;
+  PErr.Loc = L;
+  Msg.toVector(PErr.Msg);
+  PErr.Range = Range;
+  PendingErrors.push_back(PErr);
+
+  // If we threw this parsing error after a lexing error, this should
+  // supercede the lexing error and so we remove it from the Lexer
+  // before it can propagate
+  if (getTok().is(AsmToken::Error))
+    getLexer().Lex();
+  return true;
+}
+
+bool MCAsmParser::addErrorSuffix(const Twine &Suffix) {
+  // Make sure lexing errors have propagated to the parser.
+  if (getTok().is(AsmToken::Error))
+    Lex();
+  for (auto &PErr : PendingErrors)
+    Suffix.toVector(PErr.Msg);
   return true;
 }
 
+bool MCAsmParser::parseMany(std::function<bool()> parseOne, bool hasComma) {
+  if (parseOptionalToken(AsmToken::EndOfStatement))
+    return false;
+  while (1) {
+    if (parseOne())
+      return true;
+    if (parseOptionalToken(AsmToken::EndOfStatement))
+      return false;
+    if (hasComma && parseToken(AsmToken::Comma))
+      return true;
+  }
+  return false;
+}
+
 bool MCAsmParser::parseExpression(const MCExpr *&Res) {
   SMLoc L;
   return parseExpression(Res, L);
 }
 
 LLVM_DUMP_METHOD void MCParsedAsmOperand::dump() const {
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   dbgs() << "  " << *this;
-#endif
 }
diff --git a/contrib/llvm/lib/MC/MCRegisterInfo.cpp b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
index c76bb646c126..ea117f3caa85 100644
--- a/contrib/llvm/lib/MC/MCRegisterInfo.cpp
+++ b/contrib/llvm/lib/MC/MCRegisterInfo.cpp
@@ -12,6 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 
@@ -62,6 +64,8 @@ int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
   const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs;
   unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize;
 
+  if (!M)
+    return -1;
   DwarfLLVMRegPair Key = { RegNum, 0 };
   const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
   if (I == M+Size || I->FromReg != RegNum)
@@ -73,6 +77,8 @@ int MCRegisterInfo::getLLVMRegNum(unsigned RegNum, bool isEH) const {
   const DwarfLLVMRegPair *M = isEH ? EHDwarf2LRegs : Dwarf2LRegs;
   unsigned Size = isEH ? EHDwarf2LRegsSize : Dwarf2LRegsSize;
 
+  if (!M)
+    return -1;
   DwarfLLVMRegPair Key = { RegNum, 0 };
   const DwarfLLVMRegPair *I = std::lower_bound(M, M+Size, Key);
   assert(I != M+Size && I->FromReg == RegNum && "Invalid RegNum");
diff --git a/contrib/llvm/lib/MC/MCSection.cpp b/contrib/llvm/lib/MC/MCSection.cpp
index 32e4cce4f68c..9064cdf2f319 100644
--- a/contrib/llvm/lib/MC/MCSection.cpp
+++ b/contrib/llvm/lib/MC/MCSection.cpp
@@ -85,7 +85,6 @@ MCSection::getSubsectionInsertionPoint(unsigned Subsection) {
   return IP;
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCSection::dump() {
   raw_ostream &OS = llvm::errs();
 
@@ -98,12 +97,3 @@ LLVM_DUMP_METHOD void MCSection::dump() {
   }
   OS << "]>";
 }
-#endif
-
-MCSection::iterator MCSection::begin() { return Fragments.begin(); }
-
-MCSection::iterator MCSection::end() { return Fragments.end(); }
-
-MCSection::reverse_iterator MCSection::rbegin() { return Fragments.rbegin(); }
-
-MCSection::reverse_iterator MCSection::rend() { return Fragments.rend(); }
diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
index b8373f40b8be..f2dd47d81b7e 100644
--- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp
@@ -64,6 +64,9 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << 'n';
   if (getCharacteristics() & COFF::IMAGE_SCN_MEM_SHARED)
     OS << 's';
+  if ((getCharacteristics() & COFF::IMAGE_SCN_MEM_DISCARDABLE) &&
+      !isImplicitlyDiscardable(SectionName))
+    OS << 'D';
   OS << '"';
 
   if (getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) {
diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp
index 5a0bb7fe986f..587b28f71b7d 100644
--- a/contrib/llvm/lib/MC/MCSectionELF.cpp
+++ b/contrib/llvm/lib/MC/MCSectionELF.cpp
@@ -110,6 +110,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI,
     OS << 'c';
   if (Flags & ELF::XCORE_SHF_DP_SECTION)
     OS << 'd';
+  if (Flags & ELF::SHF_ARM_PURECODE)
+    OS << 'y';
 
   OS << '"';
 
diff --git a/contrib/llvm/lib/MC/MCSectionMachO.cpp b/contrib/llvm/lib/MC/MCSectionMachO.cpp
index 879c6e5ff932..c2a772fdbdac 100644
--- a/contrib/llvm/lib/MC/MCSectionMachO.cpp
+++ b/contrib/llvm/lib/MC/MCSectionMachO.cpp
@@ -17,10 +17,10 @@ using namespace llvm;
 /// types.  This *must* be kept in order with and stay synchronized with the
 /// section type list.
 static const struct {
-  const char *AssemblerName, *EnumName;
+  StringRef AssemblerName, EnumName;
 } SectionTypeDescriptors[MachO::LAST_KNOWN_SECTION_TYPE+1] = {
   { "regular",                  "S_REGULAR" },                    // 0x00
-  { nullptr,                    "S_ZEROFILL" },                   // 0x01
+  { StringRef(),                "S_ZEROFILL" },                   // 0x01
   { "cstring_literals",         "S_CSTRING_LITERALS" },           // 0x02
   { "4byte_literals",           "S_4BYTE_LITERALS" },             // 0x03
   { "8byte_literals",           "S_8BYTE_LITERALS" },             // 0x04
@@ -31,11 +31,11 @@ static const struct {
   { "mod_init_funcs",           "S_MOD_INIT_FUNC_POINTERS" },     // 0x09
   { "mod_term_funcs",           "S_MOD_TERM_FUNC_POINTERS" },     // 0x0A
   { "coalesced",                "S_COALESCED" },                  // 0x0B
-  { nullptr, /*FIXME??*/        "S_GB_ZEROFILL" },                // 0x0C
+  { StringRef(), /*FIXME??*/    "S_GB_ZEROFILL" },                // 0x0C
   { "interposing",              "S_INTERPOSING" },                // 0x0D
   { "16byte_literals",          "S_16BYTE_LITERALS" },            // 0x0E
-  { nullptr, /*FIXME??*/        "S_DTRACE_DOF" },                 // 0x0F
-  { nullptr, /*FIXME??*/        "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
+  { StringRef(), /*FIXME??*/    "S_DTRACE_DOF" },                 // 0x0F
+  { StringRef(), /*FIXME??*/    "S_LAZY_DYLIB_SYMBOL_POINTERS" }, // 0x10
   { "thread_local_regular",     "S_THREAD_LOCAL_REGULAR" },       // 0x11
   { "thread_local_zerofill",    "S_THREAD_LOCAL_ZEROFILL" },      // 0x12
   { "thread_local_variables",   "S_THREAD_LOCAL_VARIABLES" },     // 0x13
@@ -51,7 +51,7 @@ static const struct {
 /// by attribute, instead it is searched.
 static const struct {
   unsigned AttrFlag;
-  const char *AssemblerName, *EnumName;
+  StringRef AssemblerName, EnumName;
 } SectionAttrDescriptors[] = {
 #define ENTRY(ASMNAME, ENUM) \
   { MachO::ENUM, ASMNAME, #ENUM },
@@ -62,11 +62,11 @@ ENTRY("no_dead_strip",       S_ATTR_NO_DEAD_STRIP)
 ENTRY("live_support",        S_ATTR_LIVE_SUPPORT)
 ENTRY("self_modifying_code", S_ATTR_SELF_MODIFYING_CODE)
 ENTRY("debug",               S_ATTR_DEBUG)
-ENTRY(nullptr /*FIXME*/,     S_ATTR_SOME_INSTRUCTIONS)
-ENTRY(nullptr /*FIXME*/,     S_ATTR_EXT_RELOC)
-ENTRY(nullptr /*FIXME*/,     S_ATTR_LOC_RELOC)
+ENTRY(StringRef() /*FIXME*/, S_ATTR_SOME_INSTRUCTIONS)
+ENTRY(StringRef() /*FIXME*/, S_ATTR_EXT_RELOC)
+ENTRY(StringRef() /*FIXME*/, S_ATTR_LOC_RELOC)
 #undef ENTRY
-  { 0, "none", nullptr }, // used if section has no attributes but has a stub size
+  { 0, "none", StringRef() }, // used if section has no attributes but has a stub size
 };
 
 MCSectionMachO::MCSectionMachO(StringRef Segment, StringRef Section,
@@ -105,7 +105,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
   assert(SectionType <= MachO::LAST_KNOWN_SECTION_TYPE &&
          "Invalid SectionType specified!");
 
-  if (SectionTypeDescriptors[SectionType].AssemblerName) {
+  if (!SectionTypeDescriptors[SectionType].AssemblerName.empty()) {
     OS << ',';
     OS << SectionTypeDescriptors[SectionType].AssemblerName;
   } else {
@@ -138,7 +138,7 @@ void MCSectionMachO::PrintSwitchToSection(const MCAsmInfo &MAI,
     SectionAttrs &= ~SectionAttrDescriptors[i].AttrFlag;
 
     OS << Separator;
-    if (SectionAttrDescriptors[i].AssemblerName)
+    if (!SectionAttrDescriptors[i].AssemblerName.empty())
       OS << SectionAttrDescriptors[i].AssemblerName;
     else
       OS << "<<" << SectionAttrDescriptors[i].EnumName << ">>";
@@ -212,8 +212,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
   auto TypeDescriptor = std::find_if(
       std::begin(SectionTypeDescriptors), std::end(SectionTypeDescriptors),
       [&](decltype(*SectionTypeDescriptors) &Descriptor) {
-        return Descriptor.AssemblerName &&
-               SectionType == Descriptor.AssemblerName;
+        return SectionType == Descriptor.AssemblerName;
       });
 
   // If we didn't find the section type, reject it.
@@ -241,8 +240,7 @@ std::string MCSectionMachO::ParseSectionSpecifier(StringRef Spec,        // In.
     auto AttrDescriptorI = std::find_if(
         std::begin(SectionAttrDescriptors), std::end(SectionAttrDescriptors),
         [&](decltype(*SectionAttrDescriptors) &Descriptor) {
-          return Descriptor.AssemblerName &&
-                 SectionAttr.trim() == Descriptor.AssemblerName;
+          return SectionAttr.trim() == Descriptor.AssemblerName;
         });
     if (AttrDescriptorI == std::end(SectionAttrDescriptors))
       return "mach-o section specifier has invalid attribute";
diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp
index 6c8828f71ba2..fb28f856f671 100644
--- a/contrib/llvm/lib/MC/MCStreamer.cpp
+++ b/contrib/llvm/lib/MC/MCStreamer.cpp
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCCodeView.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
@@ -124,7 +125,23 @@ void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size,
   if (!IsSectionRelative)
     EmitValueImpl(MCSymbolRefExpr::create(Sym, getContext()), Size);
   else
-    EmitCOFFSecRel32(Sym);
+    EmitCOFFSecRel32(Sym, /*Offset=*/0);
+}
+
+void MCStreamer::EmitDTPRel64Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
+}
+
+void MCStreamer::EmitDTPRel32Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
+}
+
+void MCStreamer::EmitTPRel64Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
+}
+
+void MCStreamer::EmitTPRel32Value(const MCExpr *Value) {
+  report_fatal_error("unsupported directive in streamer");
 }
 
 void MCStreamer::EmitGPRel64Value(const MCExpr *Value) {
@@ -199,26 +216,58 @@ void MCStreamer::EnsureValidDwarfFrame() {
     report_fatal_error("No open frame");
 }
 
-unsigned MCStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename) {
-  return getContext().getCVFile(Filename, FileNo);
+bool MCStreamer::EmitCVFileDirective(unsigned FileNo, StringRef Filename) {
+  return getContext().getCVContext().addFile(FileNo, Filename);
+}
+
+bool MCStreamer::EmitCVFuncIdDirective(unsigned FunctionId) {
+  return getContext().getCVContext().recordFunctionId(FunctionId);
+}
+
+bool MCStreamer::EmitCVInlineSiteIdDirective(unsigned FunctionId,
+                                             unsigned IAFunc, unsigned IAFile,
+                                             unsigned IALine, unsigned IACol,
+                                             SMLoc Loc) {
+  if (getContext().getCVContext().getCVFunctionInfo(IAFunc) == nullptr) {
+    getContext().reportError(Loc, "parent function id not introduced by "
+                                  ".cv_func_id or .cv_inline_site_id");
+    return true;
+  }
+
+  return getContext().getCVContext().recordInlinedCallSiteId(
+      FunctionId, IAFunc, IAFile, IALine, IACol);
 }
 
 void MCStreamer::EmitCVLocDirective(unsigned FunctionId, unsigned FileNo,
                                     unsigned Line, unsigned Column,
                                     bool PrologueEnd, bool IsStmt,
-                                    StringRef FileName) {
-  getContext().setCurrentCVLoc(FunctionId, FileNo, Line, Column, PrologueEnd,
-                               IsStmt);
+                                    StringRef FileName, SMLoc Loc) {
+  CodeViewContext &CVC = getContext().getCVContext();
+  MCCVFunctionInfo *FI = CVC.getCVFunctionInfo(FunctionId);
+  if (!FI)
+    return getContext().reportError(
+        Loc, "function id not introduced by .cv_func_id or .cv_inline_site_id");
+
+  // Track the section
+  if (FI->Section == nullptr)
+    FI->Section = getCurrentSectionOnly();
+  else if (FI->Section != getCurrentSectionOnly())
+    return getContext().reportError(
+        Loc,
+        "all .cv_loc directives for a function must be in the same section");
+
+  CVC.setCurrentCVLoc(FunctionId, FileNo, Line, Column, PrologueEnd, IsStmt);
 }
 
 void MCStreamer::EmitCVLinetableDirective(unsigned FunctionId,
                                           const MCSymbol *Begin,
                                           const MCSymbol *End) {}
 
-void MCStreamer::EmitCVInlineLinetableDirective(
-    unsigned PrimaryFunctionId, unsigned SourceFileId, unsigned SourceLineNum,
-    const MCSymbol *FnStartSym, const MCSymbol *FnEndSym,
-    ArrayRef<unsigned> SecondaryFunctionIds) {}
+void MCStreamer::EmitCVInlineLinetableDirective(unsigned PrimaryFunctionId,
+                                                unsigned SourceFileId,
+                                                unsigned SourceLineNum,
+                                                const MCSymbol *FnStartSym,
+                                                const MCSymbol *FnEndSym) {}
 
 void MCStreamer::EmitCVDefRangeDirective(
     ArrayRef<std::pair<const MCSymbol *, const MCSymbol *>> Ranges,
@@ -243,7 +292,7 @@ void MCStreamer::AssignFragment(MCSymbol *Symbol, MCFragment *Fragment) {
 
 void MCStreamer::EmitLabel(MCSymbol *Symbol) {
   assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
-  assert(getCurrentSection().first && "Cannot emit before setting section!");
+  assert(getCurrentSectionOnly() && "Cannot emit before setting section!");
   assert(!Symbol->getFragment() && "Unexpected fragment on symbol data!");
   Symbol->setFragment(&getCurrentSectionOnly()->getDummyFragment());
 
@@ -292,13 +341,17 @@ void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) {
   Frame.End = (MCSymbol *) 1;
 }
 
-MCSymbol *MCStreamer::EmitCFICommon() {
-  EnsureValidDwarfFrame();
-  MCSymbol *Label = getContext().createTempSymbol();
+MCSymbol *MCStreamer::EmitCFILabel() {
+  MCSymbol *Label = getContext().createTempSymbol("cfi", true);
   EmitLabel(Label);
   return Label;
 }
 
+MCSymbol *MCStreamer::EmitCFICommon() {
+  EnsureValidDwarfFrame();
+  return EmitCFILabel();
+}
+
 void MCStreamer::EmitCFIDefCfa(int64_t Register, int64_t Offset) {
   MCSymbol *Label = EmitCFICommon();
   MCCFIInstruction Instruction =
@@ -455,8 +508,7 @@ void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) {
   if (CurrentWinFrameInfo && !CurrentWinFrameInfo->End)
     report_fatal_error("Starting a function before ending the previous one!");
 
-  MCSymbol *StartProc = getContext().createTempSymbol();
-  EmitLabel(StartProc);
+  MCSymbol *StartProc = EmitCFILabel();
 
   WinFrameInfos.push_back(new WinEH::FrameInfo(Symbol, StartProc));
   CurrentWinFrameInfo = WinFrameInfos.back();
@@ -468,16 +520,14 @@ void MCStreamer::EmitWinCFIEndProc() {
   if (CurrentWinFrameInfo->ChainedParent)
     report_fatal_error("Not all chained regions terminated!");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
   CurrentWinFrameInfo->End = Label;
 }
 
 void MCStreamer::EmitWinCFIStartChained() {
   EnsureValidWinFrameInfo();
 
-  MCSymbol *StartProc = getContext().createTempSymbol();
-  EmitLabel(StartProc);
+  MCSymbol *StartProc = EmitCFILabel();
 
   WinFrameInfos.push_back(new WinEH::FrameInfo(CurrentWinFrameInfo->Function,
                                                StartProc, CurrentWinFrameInfo));
@@ -490,8 +540,7 @@ void MCStreamer::EmitWinCFIEndChained() {
   if (!CurrentWinFrameInfo->ChainedParent)
     report_fatal_error("End of a chained region outside a chained region!");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   CurrentWinFrameInfo->End = Label;
   CurrentWinFrameInfo =
@@ -555,8 +604,7 @@ void MCStreamer::EmitSyntaxDirective() {}
 void MCStreamer::EmitWinCFIPushReg(unsigned Register) {
   EnsureValidWinFrameInfo();
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   WinEH::Instruction Inst = Win64EH::Instruction::PushNonVol(Label, Register);
   CurrentWinFrameInfo->Instructions.push_back(Inst);
@@ -571,8 +619,7 @@ void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) {
   if (Offset > 240)
     report_fatal_error("Frame offset must be less than or equal to 240!");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   WinEH::Instruction Inst =
       Win64EH::Instruction::SetFPReg(Label, Register, Offset);
@@ -587,8 +634,7 @@ void MCStreamer::EmitWinCFIAllocStack(unsigned Size) {
   if (Size & 7)
     report_fatal_error("Misaligned stack allocation!");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   WinEH::Instruction Inst = Win64EH::Instruction::Alloc(Label, Size);
   CurrentWinFrameInfo->Instructions.push_back(Inst);
@@ -599,8 +645,7 @@ void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) {
   if (Offset & 7)
     report_fatal_error("Misaligned saved register offset!");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   WinEH::Instruction Inst =
       Win64EH::Instruction::SaveNonVol(Label, Register, Offset);
@@ -612,8 +657,7 @@ void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) {
   if (Offset & 0x0F)
     report_fatal_error("Misaligned saved vector register offset!");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   WinEH::Instruction Inst =
       Win64EH::Instruction::SaveXMM(Label, Register, Offset);
@@ -625,8 +669,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code) {
   if (CurrentWinFrameInfo->Instructions.size() > 0)
     report_fatal_error("If present, PushMachFrame must be the first UOP");
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   WinEH::Instruction Inst = Win64EH::Instruction::PushMachFrame(Label, Code);
   CurrentWinFrameInfo->Instructions.push_back(Inst);
@@ -635,8 +678,7 @@ void MCStreamer::EmitWinCFIPushFrame(bool Code) {
 void MCStreamer::EmitWinCFIEndProlog() {
   EnsureValidWinFrameInfo();
 
-  MCSymbol *Label = getContext().createTempSymbol();
-  EmitLabel(Label);
+  MCSymbol *Label = EmitCFILabel();
 
   CurrentWinFrameInfo->PrologEnd = Label;
 }
@@ -647,8 +689,7 @@ void MCStreamer::EmitCOFFSafeSEH(MCSymbol const *Symbol) {
 void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
 }
 
-void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
-}
+void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol, uint64_t Offset) {}
 
 /// EmitRawText - If this file is backed by an assembly streamer, this dumps
 /// the specified string in the output .s file.  This capability is
@@ -756,7 +797,7 @@ void MCStreamer::EndCOFFSymbolDef() {}
 void MCStreamer::EmitFileDirective(StringRef Filename) {}
 void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {}
 void MCStreamer::EmitCOFFSymbolType(int Type) {}
-void MCStreamer::emitELFSize(MCSymbolELF *Symbol, const MCExpr *Value) {}
+void MCStreamer::emitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
 void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                        unsigned ByteAlignment) {}
 void MCStreamer::EmitTBSSSymbol(MCSection *Section, MCSymbol *Symbol,
@@ -778,7 +819,8 @@ void MCStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
                                       unsigned MaxBytesToEmit) {}
 void MCStreamer::EmitCodeAlignment(unsigned ByteAlignment,
                                    unsigned MaxBytesToEmit) {}
-void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value) {}
+void MCStreamer::emitValueToOffset(const MCExpr *Offset, unsigned char Value,
+                                   SMLoc Loc) {}
 void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {}
 void MCStreamer::EmitBundleLock(bool AlignToEnd) {}
 void MCStreamer::FinishImpl() {}
diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp
index 2ddece6bddce..20d985df7ea0 100644
--- a/contrib/llvm/lib/MC/MCSymbol.cpp
+++ b/contrib/llvm/lib/MC/MCSymbol.cpp
@@ -31,10 +31,9 @@ void *MCSymbol::operator new(size_t s, const StringMapEntry<bool> *Name,
   // For safety, ensure that the alignment of a pointer is enough for an
   // MCSymbol.  This also ensures we don't need padding between the name and
   // symbol.
-  static_assert((unsigned)AlignOf<MCSymbol>::Alignment <=
-                AlignOf<NameEntryStorageTy>::Alignment,
+  static_assert((unsigned)alignof(MCSymbol) <= alignof(NameEntryStorageTy),
                 "Bad alignment of MCSymbol");
-  void *Storage = Ctx.allocate(Size, alignOf<NameEntryStorageTy>());
+  void *Storage = Ctx.allocate(Size, alignof(NameEntryStorageTy));
   NameEntryStorageTy *Start = static_cast<NameEntryStorageTy*>(Storage);
   NameEntryStorageTy *End = Start + (Name ? 1 : 0);
   return End;
@@ -76,6 +75,4 @@ void MCSymbol::print(raw_ostream &OS, const MCAsmInfo *MAI) const {
   OS << '"';
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCSymbol::dump() const { dbgs() << *this; }
-#endif
diff --git a/contrib/llvm/lib/MC/MCTargetOptions.cpp b/contrib/llvm/lib/MC/MCTargetOptions.cpp
index 465622715526..419210537eea 100644
--- a/contrib/llvm/lib/MC/MCTargetOptions.cpp
+++ b/contrib/llvm/lib/MC/MCTargetOptions.cpp
@@ -14,10 +14,12 @@ namespace llvm {
 
 MCTargetOptions::MCTargetOptions()
     : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false),
-      MCFatalWarnings(false), MCNoWarn(false), MCSaveTempLabels(false),
+      MCFatalWarnings(false), MCNoWarn(false), MCNoDeprecatedWarn(false),
+      MCSaveTempLabels(false),
       MCUseDwarfDirectory(false), MCIncrementalLinkerCompatible(false),
-      ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false),
-      DwarfVersion(0), ABIName() {}
+      MCPIECopyRelocations(false), ShowMCEncoding(false),
+      ShowMCInst(false), AsmVerbose(false),
+      PreserveAsmComments(true), DwarfVersion(0), ABIName() {}
 
 StringRef MCTargetOptions::getABIName() const {
   return ABIName;
diff --git a/contrib/llvm/lib/MC/MCValue.cpp b/contrib/llvm/lib/MC/MCValue.cpp
index 32a6adbf224e..c1336d6d1b49 100644
--- a/contrib/llvm/lib/MC/MCValue.cpp
+++ b/contrib/llvm/lib/MC/MCValue.cpp
@@ -37,11 +37,9 @@ void MCValue::print(raw_ostream &OS) const {
     OS << " + " << getConstant();
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void MCValue::dump() const {
   print(dbgs());
 }
-#endif
 
 MCSymbolRefExpr::VariantKind MCValue::getAccessVariant() const {
   const MCSymbolRefExpr *B = getSymB();
diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp
index e39271949d94..c4b35f5db9b4 100644
--- a/contrib/llvm/lib/MC/MachObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp
@@ -422,7 +422,7 @@ void MachObjectWriter::writeLinkerOptionsLoadCommand(
   uint64_t BytesWritten = sizeof(MachO::linker_option_command);
   for (const std::string &Option : Options) {
     // Write each string, including the null byte.
-    writeBytes(Option.c_str(), Option.size() + 1);
+    writeBytes(Option, Option.size() + 1);
     BytesWritten += Option.size() + 1;
   }
 
@@ -882,7 +882,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
                                               sizeof(MachO::nlist_64) :
                                               sizeof(MachO::nlist));
     writeSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols,
-                           StringTableOffset, StringTable.data().size());
+                           StringTableOffset, StringTable.getSize());
 
     writeDysymtabLoadCommand(FirstLocalSymbol, NumLocalSymbols,
                              FirstExternalSymbol, NumExternalSymbols,
@@ -977,7 +977,7 @@ void MachObjectWriter::writeObject(MCAssembler &Asm,
         writeNlist(Entry, Layout);
 
     // Write the string table.
-    getStream() << StringTable.data();
+    StringTable.write(getStream());
   }
 }
 
diff --git a/contrib/llvm/lib/MC/StringTableBuilder.cpp b/contrib/llvm/lib/MC/StringTableBuilder.cpp
index 9d95952a6d30..1a501bcafc12 100644
--- a/contrib/llvm/lib/MC/StringTableBuilder.cpp
+++ b/contrib/llvm/lib/MC/StringTableBuilder.cpp
@@ -9,15 +9,18 @@
 
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/Support/COFF.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Support/raw_ostream.h"
 
 #include <vector>
 
 using namespace llvm;
 
-StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
-    : K(K), Alignment(Alignment) {
+StringTableBuilder::~StringTableBuilder() {}
+
+void StringTableBuilder::initSize() {
   // Account for leading bytes in table so that offsets returned from add are
   // correct.
   switch (K) {
@@ -26,19 +29,46 @@ StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
     break;
   case MachO:
   case ELF:
+    // Start the table with a NUL byte.
     Size = 1;
     break;
   case WinCOFF:
+    // Make room to write the table size later.
     Size = 4;
     break;
   }
 }
 
-typedef std::pair<CachedHash<StringRef>, size_t> StringPair;
+StringTableBuilder::StringTableBuilder(Kind K, unsigned Alignment)
+    : K(K), Alignment(Alignment) {
+  initSize();
+}
+
+void StringTableBuilder::write(raw_ostream &OS) const {
+  assert(isFinalized());
+  SmallString<0> Data;
+  Data.resize(getSize());
+  write((uint8_t *)&Data[0]);
+  OS << Data;
+}
+
+typedef std::pair<CachedHashStringRef, size_t> StringPair;
+
+void StringTableBuilder::write(uint8_t *Buf) const {
+  assert(isFinalized());
+  for (const StringPair &P : StringIndexMap) {
+    StringRef Data = P.first.val();
+    if (!Data.empty())
+      memcpy(Buf + P.second, Data.data(), Data.size());
+  }
+  if (K != WinCOFF)
+    return;
+  support::endian::write32le(Buf, Size);
+}
 
 // Returns the character at Pos from end of a string.
 static int charTailAt(StringPair *P, size_t Pos) {
-  StringRef S = P->first.Val;
+  StringRef S = P->first.val();
   if (Pos >= S.size())
     return -1;
   return (unsigned char)S[S.size() - Pos - 1];
@@ -86,106 +116,69 @@ void StringTableBuilder::finalizeInOrder() {
 }
 
 void StringTableBuilder::finalizeStringTable(bool Optimize) {
-  typedef std::pair<CachedHash<StringRef>, size_t> StringOffsetPair;
-  std::vector<StringOffsetPair *> Strings;
-  Strings.reserve(StringIndexMap.size());
-  for (StringOffsetPair &P : StringIndexMap)
-    Strings.push_back(&P);
-
-  if (!Strings.empty()) {
-    // If we're optimizing, sort by name. If not, sort by previously assigned
-    // offset.
-    if (Optimize) {
-      multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
-    } else {
-      std::sort(Strings.begin(), Strings.end(),
-                [](const StringOffsetPair *LHS, const StringOffsetPair *RHS) {
-                  return LHS->second < RHS->second;
-                });
-    }
-  }
+  Finalized = true;
 
-  switch (K) {
-  case RAW:
-    break;
-  case ELF:
-  case MachO:
-    // Start the table with a NUL byte.
-    StringTable += '\x00';
-    break;
-  case WinCOFF:
-    // Make room to write the table size later.
-    StringTable.append(4, '\x00');
-    break;
-  }
+  if (Optimize) {
+    std::vector<StringPair *> Strings;
+    Strings.reserve(StringIndexMap.size());
+    for (StringPair &P : StringIndexMap)
+      Strings.push_back(&P);
 
-  StringRef Previous;
-  for (StringOffsetPair *P : Strings) {
-    StringRef S = P->first.Val;
-    if (K == WinCOFF)
-      assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
-
-    if (Optimize && Previous.endswith(S)) {
-      size_t Pos = StringTable.size() - S.size() - (K != RAW);
-      if (!(Pos & (Alignment - 1))) {
-        P->second = Pos;
-        continue;
-      }
+    if (!Strings.empty()) {
+      // If we're optimizing, sort by name. If not, sort by previously assigned
+      // offset.
+      multikey_qsort(&Strings[0], &Strings[0] + Strings.size(), 0);
     }
 
-    if (Optimize) {
-      size_t Start = alignTo(StringTable.size(), Alignment);
-      P->second = Start;
-      StringTable.append(Start - StringTable.size(), '\0');
-    } else {
-      assert(P->second == StringTable.size() &&
-             "different strtab offset after finalization");
-    }
+    initSize();
+
+    StringRef Previous;
+    for (StringPair *P : Strings) {
+      StringRef S = P->first.val();
+      if (Previous.endswith(S)) {
+        size_t Pos = Size - S.size() - (K != RAW);
+        if (!(Pos & (Alignment - 1))) {
+          P->second = Pos;
+          continue;
+        }
+      }
 
-    StringTable += S;
-    if (K != RAW)
-      StringTable += '\x00';
-    Previous = S;
-  }
+      Size = alignTo(Size, Alignment);
+      P->second = Size;
 
-  switch (K) {
-  case RAW:
-  case ELF:
-    break;
-  case MachO:
-    // Pad to multiple of 4.
-    while (StringTable.size() % 4)
-      StringTable += '\x00';
-    break;
-  case WinCOFF:
-    // Write the table size in the first word.
-    assert(StringTable.size() <= std::numeric_limits<uint32_t>::max());
-    uint32_t Size = static_cast<uint32_t>(StringTable.size());
-    support::endian::write<uint32_t, support::little, support::unaligned>(
-        StringTable.data(), Size);
-    break;
+      Size += S.size();
+      if (K != RAW)
+        ++Size;
+      Previous = S;
+    }
   }
 
-  Size = StringTable.size();
+  if (K == MachO)
+    Size = alignTo(Size, 4); // Pad to multiple of 4.
 }
 
 void StringTableBuilder::clear() {
-  StringTable.clear();
+  Finalized = false;
   StringIndexMap.clear();
 }
 
-size_t StringTableBuilder::getOffset(StringRef S) const {
+size_t StringTableBuilder::getOffset(CachedHashStringRef S) const {
   assert(isFinalized());
   auto I = StringIndexMap.find(S);
   assert(I != StringIndexMap.end() && "String is not in table!");
   return I->second;
 }
 
-size_t StringTableBuilder::add(StringRef S) {
+size_t StringTableBuilder::add(CachedHashStringRef S) {
+  if (K == WinCOFF)
+    assert(S.size() > COFF::NameSize && "Short string in COFF string table!");
+
   assert(!isFinalized());
-  size_t Start = alignTo(Size, Alignment);
-  auto P = StringIndexMap.insert(std::make_pair(S, Start));
-  if (P.second)
+  auto P = StringIndexMap.insert(std::make_pair(S, 0));
+  if (P.second) {
+    size_t Start = alignTo(Size, Alignment);
+    P.first->second = Start;
     Size = Start + S.size() + (K != RAW);
+  }
   return P.first->second;
 }
diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp
index a97cd1db6932..32f06f8a7d6a 100644
--- a/contrib/llvm/lib/MC/SubtargetFeature.cpp
+++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp
@@ -282,13 +282,11 @@ void SubtargetFeatures::print(raw_ostream &OS) const {
   OS << "\n";
 }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 /// dump - Dump feature info.
 ///
 LLVM_DUMP_METHOD void SubtargetFeatures::dump() const {
   print(dbgs());
 }
-#endif
 
 /// Adds the default features for the specified target triple.
 ///
diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
index f316a5af387d..afc5c6a14d11 100644
--- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -34,7 +34,6 @@
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/JamCRC.h"
-#include "llvm/Support/TimeValue.h"
 #include <cstdio>
 #include <ctime>
 
@@ -792,7 +791,7 @@ void WinCOFFObjectWriter::recordRelocation(
     }
   }
 
-  // The fixed value never makes sense for section indicies, ignore it.
+  // The fixed value never makes sense for section indices, ignore it.
   if (Fixup.getKind() == FK_SecRel_2)
     FixedValue = 0;
 
@@ -1082,7 +1081,7 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
     if (Symbol->getIndex() != -1)
       WriteSymbol(*Symbol);
 
-  getStream().write(Strings.data().data(), Strings.data().size());
+  Strings.write(getStream());
 }
 
 MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_)
diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
index 5c6407ef1e5c..6383d8794030 100644
--- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
+++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp
@@ -195,11 +195,20 @@ void MCWinCOFFStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) {
   DF->getContents().resize(DF->getContents().size() + 2, 0);
 }
 
-void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) {
+void MCWinCOFFStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol,
+                                         uint64_t Offset) {
   MCDataFragment *DF = getOrCreateDataFragment();
-  const MCSymbolRefExpr *SRE = MCSymbolRefExpr::create(Symbol, getContext());
-  MCFixup Fixup = MCFixup::create(DF->getContents().size(), SRE, FK_SecRel_4);
+  // Create Symbol A for the relocation relative reference.
+  const MCExpr *MCE = MCSymbolRefExpr::create(Symbol, getContext());
+  // Add the constant offset, if given.
+  if (Offset)
+    MCE = MCBinaryExpr::createAdd(
+        MCE, MCConstantExpr::create(Offset, getContext()), getContext());
+  // Build the secrel32 relocation.
+  MCFixup Fixup = MCFixup::create(DF->getContents().size(), MCE, FK_SecRel_4);
+  // Record the relocation.
   DF->getFixups().push_back(Fixup);
+  // Emit 4 bytes (zeros) to the object file.
   DF->getContents().resize(DF->getContents().size() + 4, 0);
 }
 
diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp
index daf301e2e7e4..f2021f796d12 100644
--- a/contrib/llvm/lib/Object/Archive.cpp
+++ b/contrib/llvm/lib/Object/Archive.cpp
@@ -27,125 +27,380 @@ static const char *const ThinMagic = "!<thin>\n";
 
 void Archive::anchor() { }
 
-StringRef ArchiveMemberHeader::getName() const {
+static Error
+malformedError(Twine Msg) {
+  std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
+  return make_error<GenericBinaryError>(std::move(StringMsg),
+                                        object_error::parse_failed);
+}
+
+ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
+                                         const char *RawHeaderPtr,
+                                         uint64_t Size, Error *Err)
+    : Parent(Parent),
+      ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
+  if (RawHeaderPtr == nullptr)
+    return;
+  ErrorAsOutParameter ErrAsOutParam(Err);
+
+  if (Size < sizeof(ArMemHdrType)) {
+    if (Err) {
+      std::string Msg("remaining size of archive too small for next archive "
+                      "member header ");
+      Expected<StringRef> NameOrErr = getName(Size);
+      if (!NameOrErr) {
+        consumeError(NameOrErr.takeError());
+        uint64_t Offset = RawHeaderPtr - Parent->getData().data();
+        *Err = malformedError(Msg + "at offset " + Twine(Offset));
+      } else
+        *Err = malformedError(Msg + "for " + NameOrErr.get());
+    }
+    return;
+  }
+  if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
+    if (Err) {
+      std::string Buf;
+      raw_string_ostream OS(Buf);
+      OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator,
+                                       sizeof(ArMemHdr->Terminator)));
+      OS.flush();
+      std::string Msg("terminator characters in archive member \"" + Buf +
+                      "\" not the correct \"`\\n\" values for the archive "
+                      "member header ");
+      Expected<StringRef> NameOrErr = getName(Size);
+      if (!NameOrErr) {
+        consumeError(NameOrErr.takeError());
+        uint64_t Offset = RawHeaderPtr - Parent->getData().data();
+        *Err = malformedError(Msg + "at offset " + Twine(Offset));
+      } else
+        *Err = malformedError(Msg + "for " + NameOrErr.get());
+    }
+    return;
+  }
+}
+
+// This gets the raw name from the ArMemHdr->Name field and checks that it is
+// valid for the kind of archive.  If it is not valid it returns an Error.
+Expected<StringRef> ArchiveMemberHeader::getRawName() const {
   char EndCond;
-  if (Name[0] == '/' || Name[0] == '#')
+  auto Kind = Parent->kind();
+  if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
+    if (ArMemHdr->Name[0] == ' ') {
+      uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                        Parent->getData().data();
+      return malformedError("name contains a leading space for archive member "
+                            "header at offset " + Twine(Offset));
+    }
+    EndCond = ' ';
+  }
+  else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
     EndCond = ' ';
   else
     EndCond = '/';
   llvm::StringRef::size_type end =
-      llvm::StringRef(Name, sizeof(Name)).find(EndCond);
+      llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
   if (end == llvm::StringRef::npos)
-    end = sizeof(Name);
-  assert(end <= sizeof(Name) && end > 0);
+    end = sizeof(ArMemHdr->Name);
+  assert(end <= sizeof(ArMemHdr->Name) && end > 0);
   // Don't include the EndCond if there is one.
-  return llvm::StringRef(Name, end);
+  return llvm::StringRef(ArMemHdr->Name, end);
 }
 
-ErrorOr<uint32_t> ArchiveMemberHeader::getSize() const {
+// This gets the name looking up long names. Size is the size of the archive
+// member including the header, so the size of any name following the header
+// is checked to make sure it does not overflow.
+Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
+
+  // This can be called from the ArchiveMemberHeader constructor when the
+  // archive header is truncated to produce an error message with the name.
+  // Make sure the name field is not truncated.
+  if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
+    uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
+    return malformedError("archive header truncated before the name field "
+                          "for archive member header at offset " +
+                          Twine(ArchiveOffset));
+  }
+
+  // The raw name itself can be invalid.
+  Expected<StringRef> NameOrErr = getRawName();
+  if (!NameOrErr)
+    return NameOrErr.takeError();
+  StringRef Name = NameOrErr.get();
+
+  // Check if it's a special name.
+  if (Name[0] == '/') {
+    if (Name.size() == 1) // Linker member.
+      return Name;
+    if (Name.size() == 2 && Name[1] == '/') // String table.
+      return Name;
+    // It's a long name.
+    // Get the string table offset.
+    std::size_t StringOffset;
+    if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
+      std::string Buf;
+      raw_string_ostream OS(Buf);
+      OS.write_escaped(Name.substr(1).rtrim(' '));
+      OS.flush();
+      uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
+                               Parent->getData().data();
+      return malformedError("long name offset characters after the '/' are "
+                            "not all decimal numbers: '" + Buf + "' for "
+                            "archive member header at offset " +
+                            Twine(ArchiveOffset));
+    }
+
+    // Verify it.
+    if (StringOffset >= Parent->getStringTable().size()) {
+      uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
+                               Parent->getData().data();
+      return malformedError("long name offset " + Twine(StringOffset) + " past "
+                            "the end of the string table for archive member "
+                            "header at offset " + Twine(ArchiveOffset));
+    }
+    const char *addr = Parent->getStringTable().begin() + StringOffset;
+
+    // GNU long file names end with a "/\n".
+    if (Parent->kind() == Archive::K_GNU ||
+        Parent->kind() == Archive::K_MIPS64) {
+      StringRef::size_type End = StringRef(addr).find('\n');
+      return StringRef(addr, End - 1);
+    }
+    return addr;
+  }
+
+  if (Name.startswith("#1/")) {
+    uint64_t NameLength;
+    if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
+      std::string Buf;
+      raw_string_ostream OS(Buf);
+      OS.write_escaped(Name.substr(3).rtrim(' '));
+      OS.flush();
+      uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
+                        Parent->getData().data();
+      return malformedError("long name length characters after the #1/ are "
+                            "not all decimal numbers: '" + Buf + "' for "
+                            "archive member header at offset " +
+                            Twine(ArchiveOffset));
+    }
+    if (getSizeOf() + NameLength > Size) {
+      uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
+                        Parent->getData().data();
+      return malformedError("long name length: " + Twine(NameLength) +
+                            " extends past the end of the member or archive "
+                            "for archive member header at offset " +
+                            Twine(ArchiveOffset));
+    }
+    return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
+                     NameLength).rtrim('\0');
+  }
+
+  // It is not a long name so trim the blanks at the end of the name.
+  if (Name[Name.size() - 1] != '/')
+    return Name.rtrim(' ');
+
+  // It's a simple name.
+  return Name.drop_back(1);
+}
+
+Expected<uint32_t> ArchiveMemberHeader::getSize() const {
   uint32_t Ret;
-  if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
-    return object_error::parse_failed; // Size is not a decimal number.
+  if (llvm::StringRef(ArMemHdr->Size,
+        sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
+    std::string Buf;
+    raw_string_ostream OS(Buf);
+    OS.write_escaped(llvm::StringRef(ArMemHdr->Size,
+                                     sizeof(ArMemHdr->Size)).rtrim(" "));
+    OS.flush();
+    uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
+    return malformedError("characters in size field in archive header are not "
+                          "all decimal numbers: '" + Buf + "' for archive "
+                          "member header at offset " + Twine(Offset));
+  }
   return Ret;
 }
 
-sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
+Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
   unsigned Ret;
-  if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(' ').getAsInteger(8, Ret))
-    llvm_unreachable("Access mode is not an octal number.");
+  if (StringRef(ArMemHdr->AccessMode,
+                sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
+    std::string Buf;
+    raw_string_ostream OS(Buf);
+    OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode,
+                                   sizeof(ArMemHdr->AccessMode)).rtrim(" "));
+    OS.flush();
+    uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
+    return malformedError("characters in AccessMode field in archive header "
+                          "are not all decimal numbers: '" + Buf + "' for the "
+                          "archive member header at offset " + Twine(Offset));
+  }
   return static_cast<sys::fs::perms>(Ret);
 }
 
-sys::TimeValue ArchiveMemberHeader::getLastModified() const {
+Expected<sys::TimePoint<std::chrono::seconds>>
+ArchiveMemberHeader::getLastModified() const {
   unsigned Seconds;
-  if (StringRef(LastModified, sizeof(LastModified)).rtrim(' ')
-          .getAsInteger(10, Seconds))
-    llvm_unreachable("Last modified time not a decimal number.");
+  if (StringRef(ArMemHdr->LastModified,
+                sizeof(ArMemHdr->LastModified)).rtrim(' ')
+          .getAsInteger(10, Seconds)) {
+    std::string Buf;
+    raw_string_ostream OS(Buf);
+    OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified,
+                                   sizeof(ArMemHdr->LastModified)).rtrim(" "));
+    OS.flush();
+    uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
+    return malformedError("characters in LastModified field in archive header "
+                          "are not all decimal numbers: '" + Buf + "' for the "
+                          "archive member header at offset " + Twine(Offset));
+  }
 
-  sys::TimeValue Ret;
-  Ret.fromEpochTime(Seconds);
-  return Ret;
+  return sys::toTimePoint(Seconds);
 }
 
-unsigned ArchiveMemberHeader::getUID() const {
+Expected<unsigned> ArchiveMemberHeader::getUID() const {
   unsigned Ret;
-  StringRef User = StringRef(UID, sizeof(UID)).rtrim(' ');
+  StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
   if (User.empty())
     return 0;
-  if (User.getAsInteger(10, Ret))
-    llvm_unreachable("UID time not a decimal number.");
+  if (User.getAsInteger(10, Ret)) {
+    std::string Buf;
+    raw_string_ostream OS(Buf);
+    OS.write_escaped(User);
+    OS.flush();
+    uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
+    return malformedError("characters in UID field in archive header "
+                          "are not all decimal numbers: '" + Buf + "' for the "
+                          "archive member header at offset " + Twine(Offset));
+  }
   return Ret;
 }
 
-unsigned ArchiveMemberHeader::getGID() const {
+Expected<unsigned> ArchiveMemberHeader::getGID() const {
   unsigned Ret;
-  StringRef Group = StringRef(GID, sizeof(GID)).rtrim(' ');
+  StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
   if (Group.empty())
     return 0;
-  if (Group.getAsInteger(10, Ret))
-    llvm_unreachable("GID time not a decimal number.");
+  if (Group.getAsInteger(10, Ret)) {
+    std::string Buf;
+    raw_string_ostream OS(Buf);
+    OS.write_escaped(Group);
+    OS.flush();
+    uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
+                      Parent->getData().data();
+    return malformedError("characters in GID field in archive header "
+                          "are not all decimal numbers: '" + Buf + "' for the "
+                          "archive member header at offset " + Twine(Offset));
+  }
   return Ret;
 }
 
 Archive::Child::Child(const Archive *Parent, StringRef Data,
                       uint16_t StartOfFile)
-    : Parent(Parent), Data(Data), StartOfFile(StartOfFile) {}
+    : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
+      Data(Data), StartOfFile(StartOfFile) {
+}
 
-Archive::Child::Child(const Archive *Parent, const char *Start,
-                      std::error_code *EC)
-    : Parent(Parent) {
+Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
+    : Parent(Parent),
+      Header(Parent, Start,
+             Parent
+               ? Parent->getData().size() - (Start - Parent->getData().data())
+               : 0, Err) {
   if (!Start)
     return;
 
-  uint64_t Size = sizeof(ArchiveMemberHeader);
+  // If we are pointed to real data, Start is not a nullptr, then there must be
+  // a non-null Err pointer available to report malformed data on.  Only in
+  // the case sentinel value is being constructed is Err is permitted to be a
+  // nullptr.
+  assert(Err && "Err can't be nullptr if Start is not a nullptr");
+
+  ErrorAsOutParameter ErrAsOutParam(Err);
+
+  // If there was an error in the construction of the Header 
+  // then just return with the error now set.
+  if (*Err)
+    return;
+
+  uint64_t Size = Header.getSizeOf();
   Data = StringRef(Start, Size);
-  if (!isThinMember()) {
-    ErrorOr<uint64_t> MemberSize = getRawSize();
-    if ((*EC = MemberSize.getError()))
+  Expected<bool> isThinOrErr = isThinMember();
+  if (!isThinOrErr) {
+    *Err = isThinOrErr.takeError();
+    return;
+  }
+  bool isThin = isThinOrErr.get();
+  if (!isThin) {
+    Expected<uint64_t> MemberSize = getRawSize();
+    if (!MemberSize) {
+      *Err = MemberSize.takeError();
       return;
+    }
     Size += MemberSize.get();
     Data = StringRef(Start, Size);
   }
 
   // Setup StartOfFile and PaddingBytes.
-  StartOfFile = sizeof(ArchiveMemberHeader);
+  StartOfFile = Header.getSizeOf();
   // Don't include attached name.
-  StringRef Name = getRawName();
+  Expected<StringRef> NameOrErr = getRawName();
+  if (!NameOrErr){
+    *Err = NameOrErr.takeError();
+    return;
+  }
+  StringRef Name = NameOrErr.get();
   if (Name.startswith("#1/")) {
     uint64_t NameSize;
-    if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize))
-      llvm_unreachable("Long name length is not an integer");
+    if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
+      std::string Buf;
+      raw_string_ostream OS(Buf);
+      OS.write_escaped(Name.substr(3).rtrim(' '));
+      OS.flush();
+      uint64_t Offset = Start - Parent->getData().data();
+      *Err = malformedError("long name length characters after the #1/ are "
+                            "not all decimal numbers: '" + Buf + "' for "
+                            "archive member header at offset " +
+                            Twine(Offset));
+      return;
+    }
     StartOfFile += NameSize;
   }
 }
 
-ErrorOr<uint64_t> Archive::Child::getSize() const {
+Expected<uint64_t> Archive::Child::getSize() const {
   if (Parent->IsThin) {
-    ErrorOr<uint32_t> Size = getHeader()->getSize();
-    if (std::error_code EC = Size.getError())
-      return EC;
+    Expected<uint32_t> Size = Header.getSize();
+    if (!Size)
+      return Size.takeError();
     return Size.get();
   }
   return Data.size() - StartOfFile;
 }
 
-ErrorOr<uint64_t> Archive::Child::getRawSize() const {
-  ErrorOr<uint32_t> Size = getHeader()->getSize();
-  if (std::error_code EC = Size.getError())
-    return EC;
-  return Size.get();
+Expected<uint64_t> Archive::Child::getRawSize() const {
+  return Header.getSize();
 }
 
-bool Archive::Child::isThinMember() const {
-  StringRef Name = getHeader()->getName();
+Expected<bool> Archive::Child::isThinMember() const {
+  Expected<StringRef> NameOrErr = Header.getRawName();
+  if (!NameOrErr)
+    return NameOrErr.takeError();
+  StringRef Name = NameOrErr.get();
   return Parent->IsThin && Name != "/" && Name != "//";
 }
 
-ErrorOr<std::string> Archive::Child::getFullName() const {
-  assert(isThinMember());
-  ErrorOr<StringRef> NameOrErr = getName();
-  if (std::error_code EC = NameOrErr.getError())
-    return EC;
+Expected<std::string> Archive::Child::getFullName() const {
+  Expected<bool> isThin = isThinMember();
+  if (!isThin)
+    return isThin.takeError();
+  assert(isThin.get());
+  Expected<StringRef> NameOrErr = getName();
+  if (!NameOrErr)
+    return NameOrErr.takeError();
   StringRef Name = *NameOrErr;
   if (sys::path::is_absolute(Name))
     return Name;
@@ -156,25 +411,29 @@ ErrorOr<std::string> Archive::Child::getFullName() const {
   return StringRef(FullName);
 }
 
-ErrorOr<StringRef> Archive::Child::getBuffer() const {
-  if (!isThinMember()) {
-    ErrorOr<uint32_t> Size = getSize();
-    if (std::error_code EC = Size.getError())
-      return EC;
+Expected<StringRef> Archive::Child::getBuffer() const {
+  Expected<bool> isThinOrErr = isThinMember();
+  if (!isThinOrErr)
+    return isThinOrErr.takeError();
+  bool isThin = isThinOrErr.get();
+  if (!isThin) {
+    Expected<uint32_t> Size = getSize();
+    if (!Size)
+      return Size.takeError();
     return StringRef(Data.data() + StartOfFile, Size.get());
   }
-  ErrorOr<std::string> FullNameOrEr = getFullName();
-  if (std::error_code EC = FullNameOrEr.getError())
-    return EC;
-  const std::string &FullName = *FullNameOrEr;
+  Expected<std::string> FullNameOrErr = getFullName();
+  if (!FullNameOrErr)
+    return FullNameOrErr.takeError();
+  const std::string &FullName = *FullNameOrErr;
   ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
   if (std::error_code EC = Buf.getError())
-    return EC;
+    return errorCodeToError(EC);
   Parent->ThinBuffers.push_back(std::move(*Buf));
   return Parent->ThinBuffers.back()->getBuffer();
 }
 
-ErrorOr<Archive::Child> Archive::Child::getNext() const {
+Expected<Archive::Child> Archive::Child::getNext() const {
   size_t SpaceToSkip = Data.size();
   // If it's odd, add 1 to make it even.
   if (SpaceToSkip & 1)
@@ -184,16 +443,25 @@ ErrorOr<Archive::Child> Archive::Child::getNext() const {
 
   // Check to see if this is at the end of the archive.
   if (NextLoc == Parent->Data.getBufferEnd())
-    return Child(Parent, nullptr, nullptr);
+    return Child(nullptr, nullptr, nullptr);
 
   // Check to see if this is past the end of the archive.
-  if (NextLoc > Parent->Data.getBufferEnd())
-    return object_error::parse_failed;
+  if (NextLoc > Parent->Data.getBufferEnd()) {
+    std::string Msg("offset to next archive member past the end of the archive "
+                    "after member ");
+    Expected<StringRef> NameOrErr = getName();
+    if (!NameOrErr) {
+      consumeError(NameOrErr.takeError());
+      uint64_t Offset = Data.data() - Parent->getData().data();
+      return malformedError(Msg + "at offset " + Twine(Offset));
+    } else
+      return malformedError(Msg + NameOrErr.get());
+  }
 
-  std::error_code EC;
-  Child Ret(Parent, NextLoc, &EC);
-  if (EC)
-    return EC;
+  Error Err = Error::success();
+  Child Ret(Parent, NextLoc, &Err);
+  if (Err)
+    return std::move(Err);
   return Ret;
 }
 
@@ -204,64 +472,34 @@ uint64_t Archive::Child::getChildOffset() const {
   return offset;
 }
 
-ErrorOr<StringRef> Archive::Child::getName() const {
-  StringRef name = getRawName();
-  // Check if it's a special name.
-  if (name[0] == '/') {
-    if (name.size() == 1) // Linker member.
-      return name;
-    if (name.size() == 2 && name[1] == '/') // String table.
-      return name;
-    // It's a long name.
-    // Get the offset.
-    std::size_t offset;
-    if (name.substr(1).rtrim(' ').getAsInteger(10, offset))
-      llvm_unreachable("Long name offset is not an integer");
-
-    // Verify it.
-    if (offset >= Parent->StringTable.size())
-      return object_error::parse_failed;
-    const char *addr = Parent->StringTable.begin() + offset;
-
-    // GNU long file names end with a "/\n".
-    if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
-      StringRef::size_type End = StringRef(addr).find('\n');
-      return StringRef(addr, End - 1);
-    }
-    return StringRef(addr);
-  } else if (name.startswith("#1/")) {
-    uint64_t name_size;
-    if (name.substr(3).rtrim(' ').getAsInteger(10, name_size))
-      llvm_unreachable("Long name length is not an ingeter");
-    return Data.substr(sizeof(ArchiveMemberHeader), name_size).rtrim('\0');
-  } else {
-    // It is not a long name so trim the blanks at the end of the name.
-    if (name[name.size() - 1] != '/') {
-      return name.rtrim(' ');
-    }
-  }
-  // It's a simple name.
-  if (name[name.size() - 1] == '/')
-    return name.substr(0, name.size() - 1);
-  return name;
+Expected<StringRef> Archive::Child::getName() const {
+  Expected<uint64_t> RawSizeOrErr = getRawSize();
+  if (!RawSizeOrErr)
+    return RawSizeOrErr.takeError();
+  uint64_t RawSize = RawSizeOrErr.get();
+  Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
+  if (!NameOrErr)
+    return NameOrErr.takeError();
+  StringRef Name = NameOrErr.get();
+  return Name;
 }
 
-ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
-  ErrorOr<StringRef> NameOrErr = getName();
-  if (std::error_code EC = NameOrErr.getError())
-    return EC;
+Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
+  Expected<StringRef> NameOrErr = getName();
+  if (!NameOrErr)
+    return NameOrErr.takeError();
   StringRef Name = NameOrErr.get();
-  ErrorOr<StringRef> Buf = getBuffer();
-  if (std::error_code EC = Buf.getError())
-    return EC;
+  Expected<StringRef> Buf = getBuffer();
+  if (!Buf)
+    return Buf.takeError();
   return MemoryBufferRef(*Buf, Name);
 }
 
 Expected<std::unique_ptr<Binary>>
 Archive::Child::getAsBinary(LLVMContext *Context) const {
-  ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
-  if (std::error_code EC = BuffOrErr.getError())
-    return errorCodeToError(EC);
+  Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
+  if (!BuffOrErr)
+    return BuffOrErr.takeError();
 
   auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
   if (BinaryOrErr)
@@ -270,7 +508,7 @@ Archive::Child::getAsBinary(LLVMContext *Context) const {
 }
 
 Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
-  Error Err;
+  Error Err = Error::success();
   std::unique_ptr<Archive> Ret(new Archive(Source, Err));
   if (Err)
     return std::move(Err);
@@ -284,7 +522,7 @@ void Archive::setFirstRegular(const Child &C) {
 
 Archive::Archive(MemoryBufferRef Source, Error &Err)
     : Binary(Binary::ID_Archive, Source) {
-  ErrorAsOutParameter ErrAsOutParam(Err);
+  ErrorAsOutParameter ErrAsOutParam(&Err);
   StringRef Buffer = Data.getBuffer();
   // Check for sufficient magic.
   if (Buffer.startswith(ThinMagic)) {
@@ -297,17 +535,20 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
     return;
   }
 
+  // Make sure Format is initialized before any call to
+  // ArchiveMemberHeader::getName() is made.  This could be a valid empty
+  // archive which is the same in all formats.  So claiming it to be gnu to is
+  // fine if not totally correct before we look for a string table or table of
+  // contents.
+  Format = K_GNU;
+
   // Get the special members.
   child_iterator I = child_begin(Err, false);
   if (Err)
     return;
   child_iterator E = child_end();
 
-  // This is at least a valid empty archive. Since an empty archive is the
-  // same in all formats, just claim it to be gnu to make sure Format is
-  // initialized.
-  Format = K_GNU;
-
+  // See if this is a valid empty archive and if so return.
   if (I == E) {
     Err = Error::success();
     return;
@@ -322,7 +563,12 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
     return false;
   };
 
-  StringRef Name = C->getRawName();
+  Expected<StringRef> NameOrErr = C->getRawName();
+  if (!NameOrErr) {
+    Err = NameOrErr.takeError();
+    return;
+  }
+  StringRef Name = NameOrErr.get();
 
   // Below is the pattern that is used to figure out the archive format
   // GNU archive format
@@ -348,9 +594,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
       Format = K_BSD;
     else // Name == "__.SYMDEF_64"
       Format = K_DARWIN64;
-    // We know that the symbol table is not an external file, so we just assert
-    // there is no error.
-    SymbolTable = *C->getBuffer();
+    // We know that the symbol table is not an external file, but we still must
+    // check any Expected<> return value.
+    Expected<StringRef> BufOrErr = C->getBuffer();
+    if (!BufOrErr) {
+      Err = BufOrErr.takeError();
+      return;
+    }
+    SymbolTable = BufOrErr.get();
     if (Increment())
       return;
     setFirstRegular(*C);
@@ -362,24 +613,34 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
   if (Name.startswith("#1/")) {
     Format = K_BSD;
     // We know this is BSD, so getName will work since there is no string table.
-    ErrorOr<StringRef> NameOrErr = C->getName();
-    if (auto ec = NameOrErr.getError()) {
-      Err = errorCodeToError(ec);
+    Expected<StringRef> NameOrErr = C->getName();
+    if (!NameOrErr) {
+      Err = NameOrErr.takeError();
       return;
     }
     Name = NameOrErr.get();
     if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
-      // We know that the symbol table is not an external file, so we just
-      // assert there is no error.
-      SymbolTable = *C->getBuffer();
+      // We know that the symbol table is not an external file, but we still
+      // must check any Expected<> return value.
+      Expected<StringRef> BufOrErr = C->getBuffer();
+      if (!BufOrErr) {
+        Err = BufOrErr.takeError();
+        return;
+      }
+      SymbolTable = BufOrErr.get();
       if (Increment())
         return;
     }
     else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
       Format = K_DARWIN64;
-      // We know that the symbol table is not an external file, so we just
-      // assert there is no error.
-      SymbolTable = *C->getBuffer();
+      // We know that the symbol table is not an external file, but we still
+      // must check any Expected<> return value.
+      Expected<StringRef> BufOrErr = C->getBuffer();
+      if (!BufOrErr) {
+        Err = BufOrErr.takeError();
+        return;
+      }
+      SymbolTable = BufOrErr.get();
       if (Increment())
         return;
     }
@@ -394,9 +655,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
 
   bool has64SymTable = false;
   if (Name == "/" || Name == "/SYM64/") {
-    // We know that the symbol table is not an external file, so we just assert
-    // there is no error.
-    SymbolTable = *C->getBuffer();
+    // We know that the symbol table is not an external file, but we still
+    // must check any Expected<> return value.
+    Expected<StringRef> BufOrErr = C->getBuffer();
+    if (!BufOrErr) {
+      Err = BufOrErr.takeError();
+      return;
+    }
+    SymbolTable = BufOrErr.get();
     if (Name == "/SYM64/")
       has64SymTable = true;
 
@@ -406,14 +672,24 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
       Err = Error::success();
       return;
     }
-    Name = C->getRawName();
+    Expected<StringRef> NameOrErr = C->getRawName();
+    if (!NameOrErr) {
+      Err = NameOrErr.takeError();
+      return;
+    }
+    Name = NameOrErr.get();
   }
 
   if (Name == "//") {
     Format = has64SymTable ? K_MIPS64 : K_GNU;
-    // The string table is never an external member, so we just assert on the
-    // ErrorOr.
-    StringTable = *C->getBuffer();
+    // The string table is never an external member, but we still
+    // must check any Expected<> return value.
+    Expected<StringRef> BufOrErr = C->getBuffer();
+    if (!BufOrErr) {
+      Err = BufOrErr.takeError();
+      return;
+    }
+    StringTable = BufOrErr.get();
     if (Increment())
       return;
     setFirstRegular(*C);
@@ -434,9 +710,14 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
   }
 
   Format = K_COFF;
-  // We know that the symbol table is not an external file, so we just assert
-  // there is no error.
-  SymbolTable = *C->getBuffer();
+  // We know that the symbol table is not an external file, but we still
+  // must check any Expected<> return value.
+  Expected<StringRef> BufOrErr = C->getBuffer();
+  if (!BufOrErr) {
+    Err = BufOrErr.takeError();
+    return;
+  }
+  SymbolTable = BufOrErr.get();
 
   if (Increment())
     return;
@@ -447,12 +728,22 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
     return;
   }
 
-  Name = C->getRawName();
+  NameOrErr = C->getRawName();
+  if (!NameOrErr) {
+    Err = NameOrErr.takeError();
+    return;
+  }
+  Name = NameOrErr.get();
 
   if (Name == "//") {
-    // The string table is never an external member, so we just assert on the
-    // ErrorOr.
-    StringTable = *C->getBuffer();
+    // The string table is never an external member, but we still
+    // must check any Expected<> return value.
+    Expected<StringRef> BufOrErr = C->getBuffer();
+    if (!BufOrErr) {
+      Err = BufOrErr.takeError();
+      return;
+    }
+    StringTable = BufOrErr.get();
     if (Increment())
       return;
   }
@@ -463,7 +754,7 @@ Archive::Archive(MemoryBufferRef Source, Error &Err)
 
 Archive::child_iterator Archive::child_begin(Error &Err,
                                              bool SkipInternal) const {
-  if (Data.getBufferSize() == 8) // empty archive.
+  if (isEmpty())
     return child_end();
 
   if (SkipInternal)
@@ -472,25 +763,21 @@ Archive::child_iterator Archive::child_begin(Error &Err,
                           &Err);
 
   const char *Loc = Data.getBufferStart() + strlen(Magic);
-  std::error_code EC;
-  Child C(this, Loc, &EC);
-  if (EC) {
-    ErrorAsOutParameter ErrAsOutParam(Err);
-    Err = errorCodeToError(EC);
+  Child C(this, Loc, &Err);
+  if (Err)
     return child_end();
-  }
   return child_iterator(C, &Err);
 }
 
 Archive::child_iterator Archive::child_end() const {
-  return child_iterator(Child(this, nullptr, nullptr), nullptr);
+  return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
 }
 
 StringRef Archive::Symbol::getName() const {
   return Parent->getSymbolTable().begin() + StringIndex;
 }
 
-ErrorOr<Archive::Child> Archive::Symbol::getMember() const {
+Expected<Archive::Child> Archive::Symbol::getMember() const {
   const char *Buf = Parent->getSymbolTable().begin();
   const char *Offsets = Buf;
   if (Parent->kind() == K_MIPS64 || Parent->kind() == K_DARWIN64)
@@ -525,7 +812,7 @@ ErrorOr<Archive::Child> Archive::Symbol::getMember() const {
 
     uint32_t SymbolCount = read32le(Buf);
     if (SymbolIndex >= SymbolCount)
-      return object_error::parse_failed;
+      return errorCodeToError(object_error::parse_failed);
 
     // Skip SymbolCount to get to the indices table.
     const char *Indices = Buf + 4;
@@ -537,16 +824,16 @@ ErrorOr<Archive::Child> Archive::Symbol::getMember() const {
     --OffsetIndex;
 
     if (OffsetIndex >= MemberCount)
-      return object_error::parse_failed;
+      return errorCodeToError(object_error::parse_failed);
 
     Offset = read32le(Offsets + OffsetIndex * 4);
   }
 
   const char *Loc = Parent->getData().begin() + Offset;
-  std::error_code EC;
-  Child C(Parent, Loc, &EC);
-  if (EC)
-    return EC;
+  Error Err = Error::success();
+  Child C(Parent, Loc, &Err);
+  if (Err)
+    return std::move(Err);
   return C;
 }
 
@@ -677,10 +964,13 @@ Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
       if (auto MemberOrErr = bs->getMember())
         return Child(*MemberOrErr);
       else
-        return errorCodeToError(MemberOrErr.getError());
+        return MemberOrErr.takeError();
     }
   }
   return Optional<Child>();
 }
 
+// Returns true if archive file contains no member file.
+bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
+
 bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }
diff --git a/contrib/llvm/lib/Object/ArchiveWriter.cpp b/contrib/llvm/lib/Object/ArchiveWriter.cpp
index 53573262104c..f8e3c5a0a03f 100644
--- a/contrib/llvm/lib/Object/ArchiveWriter.cpp
+++ b/contrib/llvm/lib/Object/ArchiveWriter.cpp
@@ -40,17 +40,30 @@ NewArchiveMember::NewArchiveMember(MemoryBufferRef BufRef)
 Expected<NewArchiveMember>
 NewArchiveMember::getOldMember(const object::Archive::Child &OldMember,
                                bool Deterministic) {
-  ErrorOr<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef();
+  Expected<llvm::MemoryBufferRef> BufOrErr = OldMember.getMemoryBufferRef();
   if (!BufOrErr)
-    return errorCodeToError(BufOrErr.getError());
+    return BufOrErr.takeError();
 
   NewArchiveMember M;
+  assert(M.IsNew == false);
   M.Buf = MemoryBuffer::getMemBuffer(*BufOrErr, false);
   if (!Deterministic) {
-    M.ModTime = OldMember.getLastModified();
-    M.UID = OldMember.getUID();
-    M.GID = OldMember.getGID();
-    M.Perms = OldMember.getAccessMode();
+    auto ModTimeOrErr = OldMember.getLastModified();
+    if (!ModTimeOrErr)
+      return ModTimeOrErr.takeError();
+    M.ModTime = ModTimeOrErr.get();
+    Expected<unsigned> UIDOrErr = OldMember.getUID();
+    if (!UIDOrErr)
+      return UIDOrErr.takeError();
+    M.UID = UIDOrErr.get();
+    Expected<unsigned> GIDOrErr = OldMember.getGID();
+    if (!GIDOrErr)
+      return GIDOrErr.takeError();
+    M.GID = GIDOrErr.get();
+    Expected<sys::fs::perms> AccessModeOrErr = OldMember.getAccessMode();
+    if (!AccessModeOrErr)
+      return AccessModeOrErr.takeError();
+    M.Perms = AccessModeOrErr.get();
   }
   return std::move(M);
 }
@@ -81,9 +94,11 @@ Expected<NewArchiveMember> NewArchiveMember::getFile(StringRef FileName,
     return errorCodeToError(std::error_code(errno, std::generic_category()));
 
   NewArchiveMember M;
+  M.IsNew = true;
   M.Buf = std::move(*MemberBufferOrErr);
   if (!Deterministic) {
-    M.ModTime = Status.getLastModificationTime();
+    M.ModTime = std::chrono::time_point_cast<std::chrono::seconds>(
+        Status.getLastModificationTime());
     M.UID = Status.getUser();
     M.GID = Status.getGroup();
     M.Perms = Status.permissions();
@@ -115,11 +130,10 @@ static void print32(raw_ostream &Out, object::Archive::Kind Kind,
     support::endian::Writer<support::little>(Out).write(Val);
 }
 
-static void printRestOfMemberHeader(raw_fd_ostream &Out,
-                                    const sys::TimeValue &ModTime, unsigned UID,
-                                    unsigned GID, unsigned Perms,
-                                    unsigned Size) {
-  printWithSpacePadding(Out, ModTime.toEpochTime(), 12);
+static void printRestOfMemberHeader(
+    raw_fd_ostream &Out, const sys::TimePoint<std::chrono::seconds> &ModTime,
+    unsigned UID, unsigned GID, unsigned Perms, unsigned Size) {
+  printWithSpacePadding(Out, sys::toTimeT(ModTime), 12);
   printWithSpacePadding(Out, UID, 6, true);
   printWithSpacePadding(Out, GID, 6, true);
   printWithSpacePadding(Out, format("%o", Perms), 8);
@@ -127,17 +141,20 @@ static void printRestOfMemberHeader(raw_fd_ostream &Out,
   Out << "`\n";
 }
 
-static void printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name,
-                                      const sys::TimeValue &ModTime,
-                                      unsigned UID, unsigned GID,
-                                      unsigned Perms, unsigned Size) {
+static void
+printGNUSmallMemberHeader(raw_fd_ostream &Out, StringRef Name,
+                          const sys::TimePoint<std::chrono::seconds> &ModTime,
+                          unsigned UID, unsigned GID, unsigned Perms,
+                          unsigned Size) {
   printWithSpacePadding(Out, Twine(Name) + "/", 16);
   printRestOfMemberHeader(Out, ModTime, UID, GID, Perms, Size);
 }
 
-static void printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
-                                 const sys::TimeValue &ModTime, unsigned UID,
-                                 unsigned GID, unsigned Perms, unsigned Size) {
+static void
+printBSDMemberHeader(raw_fd_ostream &Out, StringRef Name,
+                     const sys::TimePoint<std::chrono::seconds> &ModTime,
+                     unsigned UID, unsigned GID, unsigned Perms,
+                     unsigned Size) {
   uint64_t PosAfterHeader = Out.tell() + 60 + Name.size();
   // Pad so that even 64 bit object files are aligned.
   unsigned Pad = OffsetToAlignment(PosAfterHeader, 8);
@@ -159,8 +176,8 @@ static void
 printMemberHeader(raw_fd_ostream &Out, object::Archive::Kind Kind, bool Thin,
                   StringRef Name,
                   std::vector<unsigned>::iterator &StringMapIndexIter,
-                  const sys::TimeValue &ModTime, unsigned UID, unsigned GID,
-                  unsigned Perms, unsigned Size) {
+                  const sys::TimePoint<std::chrono::seconds> &ModTime,
+                  unsigned UID, unsigned GID, unsigned Perms, unsigned Size) {
   if (Kind == object::Archive::K_BSD)
     return printBSDMemberHeader(Out, Name, ModTime, UID, GID, Perms, Size);
   if (!useStringTable(Thin, Name))
@@ -190,6 +207,12 @@ static std::string computeRelativePath(StringRef From, StringRef To) {
   for (auto ToE = sys::path::end(To); ToI != ToE; ++ToI)
     sys::path::append(Relative, *ToI);
 
+#ifdef LLVM_ON_WIN32
+  // Replace backslashes with slashes so that the path is portable between *nix
+  // and Windows.
+  std::replace(Relative.begin(), Relative.end(), '\\', '/');
+#endif
+
   return Relative.str();
 }
 
@@ -210,9 +233,12 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName,
     }
     StringMapIndexes.push_back(Out.tell() - StartOffset);
 
-    if (Thin)
-      Out << computeRelativePath(ArcName, Path);
-    else
+    if (Thin) {
+      if (M.IsNew)
+        Out << computeRelativePath(ArcName, Path);
+      else
+        Out << M.Buf->getBufferIdentifier();
+    } else
       Out << Name;
 
     Out << "/\n";
@@ -227,12 +253,12 @@ static void writeStringTable(raw_fd_ostream &Out, StringRef ArcName,
   Out.seek(Pos);
 }
 
-static sys::TimeValue now(bool Deterministic) {
+static sys::TimePoint<std::chrono::seconds> now(bool Deterministic) {
+  using namespace std::chrono;
+
   if (!Deterministic)
-    return sys::TimeValue::now();
-  sys::TimeValue TV;
-  TV.fromEpochTime(0);
-  return TV;
+    return time_point_cast<seconds>(system_clock::now());
+  return sys::TimePoint<seconds>();
 }
 
 // Returns the offset of the first reference to a member offset.
diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp
index ec051fec375b..8467d349cd95 100644
--- a/contrib/llvm/lib/Object/Binary.cpp
+++ b/contrib/llvm/lib/Object/Binary.cpp
@@ -63,10 +63,12 @@ Expected<std::unique_ptr<Binary>> object::createBinary(MemoryBufferRef Buffer,
     case sys::fs::file_magic::coff_import_library:
     case sys::fs::file_magic::pecoff_executable:
     case sys::fs::file_magic::bitcode:
+    case sys::fs::file_magic::wasm_object:
       return ObjectFile::createSymbolicFile(Buffer, Type, Context);
     case sys::fs::file_magic::macho_universal_binary:
       return MachOUniversalBinary::create(Buffer);
     case sys::fs::file_magic::unknown:
+    case sys::fs::file_magic::coff_cl_gl_object:
     case sys::fs::file_magic::windows_resource:
       // Unrecognized object file format.
       return errorCodeToError(object_error::invalid_file_type);
diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp
index 0f790086cfc5..a2d8f12449e6 100644
--- a/contrib/llvm/lib/Object/COFFObjectFile.cpp
+++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp
@@ -157,6 +157,13 @@ uint64_t COFFObjectFile::getSymbolValueImpl(DataRefImpl Ref) const {
   return getCOFFSymbol(Ref).getValue();
 }
 
+uint32_t COFFObjectFile::getSymbolAlignment(DataRefImpl Ref) const {
+  // MSVC/link.exe seems to align symbols to the next-power-of-2
+  // up to 32 bytes.
+  COFFSymbolRef Symb = getCOFFSymbol(Ref);
+  return std::min(uint64_t(32), PowerOf2Ceil(Symb.getValue()));
+}
+
 Expected<uint64_t> COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const {
   uint64_t Result = getSymbolValue(Ref);
   COFFSymbolRef Symb = getCOFFSymbol(Ref);
@@ -487,17 +494,18 @@ std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint,
   return std::error_code();
 }
 
-std::error_code COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir,
-                                                const debug_pdb_info *&PDBInfo,
-                                                StringRef &PDBFileName) const {
+std::error_code
+COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir,
+                                const codeview::DebugInfo *&PDBInfo,
+                                StringRef &PDBFileName) const {
   ArrayRef<uint8_t> InfoBytes;
   if (std::error_code EC = getRvaAndSizeAsBytes(
           DebugDir->AddressOfRawData, DebugDir->SizeOfData, InfoBytes))
     return EC;
-  if (InfoBytes.size() < sizeof(debug_pdb_info) + 1)
+  if (InfoBytes.size() < sizeof(*PDBInfo) + 1)
     return object_error::parse_failed;
-  PDBInfo = reinterpret_cast<const debug_pdb_info *>(InfoBytes.data());
-  InfoBytes = InfoBytes.drop_front(sizeof(debug_pdb_info));
+  PDBInfo = reinterpret_cast<const codeview::DebugInfo *>(InfoBytes.data());
+  InfoBytes = InfoBytes.drop_front(sizeof(*PDBInfo));
   PDBFileName = StringRef(reinterpret_cast<const char *>(InfoBytes.data()),
                           InfoBytes.size());
   // Truncate the name at the first null byte. Ignore any padding.
@@ -505,8 +513,9 @@ std::error_code COFFObjectFile::getDebugPDBInfo(const debug_directory *DebugDir,
   return std::error_code();
 }
 
-std::error_code COFFObjectFile::getDebugPDBInfo(const debug_pdb_info *&PDBInfo,
-                                                StringRef &PDBFileName) const {
+std::error_code
+COFFObjectFile::getDebugPDBInfo(const codeview::DebugInfo *&PDBInfo,
+                                StringRef &PDBFileName) const {
   for (const debug_directory &D : debug_directories())
     if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW)
       return getDebugPDBInfo(&D, PDBInfo, PDBFileName);
@@ -538,7 +547,7 @@ std::error_code COFFObjectFile::initImportTablePtr() {
   if (std::error_code EC = checkOffset(Data, IntPtr, DataEntry->Size))
     return EC;
   ImportDirectory = reinterpret_cast<
-      const import_directory_table_entry *>(IntPtr);
+      const coff_import_directory_table_entry *>(IntPtr);
   return std::error_code();
 }
 
@@ -716,17 +725,23 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
     }
     if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize)))
       return;
-    CurPtr += COFFHeader->SizeOfOptionalHeader;
   }
 
+  if (COFFHeader)
+    CurPtr += COFFHeader->SizeOfOptionalHeader;
+
   if ((EC = getObject(SectionTable, Data, base() + CurPtr,
                       (uint64_t)getNumberOfSections() * sizeof(coff_section))))
     return;
 
   // Initialize the pointer to the symbol table.
   if (getPointerToSymbolTable() != 0) {
-    if ((EC = initSymbolTablePtr()))
-      return;
+    if ((EC = initSymbolTablePtr())) {
+      SymbolTable16 = nullptr;
+      SymbolTable32 = nullptr;
+      StringTable = nullptr;
+      StringTableSize = 0;
+    }
   } else {
     // We had better not have any symbols if we don't have a symbol table.
     if (getNumberOfSymbols() != 0) {
@@ -756,13 +771,13 @@ COFFObjectFile::COFFObjectFile(MemoryBufferRef Object, std::error_code &EC)
   EC = std::error_code();
 }
 
-basic_symbol_iterator COFFObjectFile::symbol_begin_impl() const {
+basic_symbol_iterator COFFObjectFile::symbol_begin() const {
   DataRefImpl Ret;
   Ret.p = getSymbolTable();
   return basic_symbol_iterator(SymbolRef(Ret, this));
 }
 
-basic_symbol_iterator COFFObjectFile::symbol_end_impl() const {
+basic_symbol_iterator COFFObjectFile::symbol_end() const {
   // The symbol table ends where the string table begins.
   DataRefImpl Ret;
   Ret.p = reinterpret_cast<uintptr_t>(StringTable);
@@ -772,7 +787,7 @@ basic_symbol_iterator COFFObjectFile::symbol_end_impl() const {
 import_directory_iterator COFFObjectFile::import_directory_begin() const {
   if (!ImportDirectory)
     return import_directory_end();
-  if (ImportDirectory[0].ImportLookupTableRVA == 0)
+  if (ImportDirectory->isNull())
     return import_directory_end();
   return import_directory_iterator(
       ImportDirectoryEntryRef(ImportDirectory, 0, this));
@@ -1201,14 +1216,14 @@ operator==(const ImportDirectoryEntryRef &Other) const {
 
 void ImportDirectoryEntryRef::moveNext() {
   ++Index;
-  if (ImportTable[Index].ImportLookupTableRVA == 0) {
+  if (ImportTable[Index].isNull()) {
     Index = -1;
     ImportTable = nullptr;
   }
 }
 
 std::error_code ImportDirectoryEntryRef::getImportTableEntry(
-    const import_directory_table_entry *&Result) const {
+    const coff_import_directory_table_entry *&Result) const {
   return getObject(Result, OwningObject->Data, ImportTable + Index);
 }
 
@@ -1250,13 +1265,13 @@ importedSymbolEnd(uint32_t RVA, const COFFObjectFile *Object) {
 
 imported_symbol_iterator
 ImportDirectoryEntryRef::imported_symbol_begin() const {
-  return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA,
+  return importedSymbolBegin(ImportTable[Index].ImportAddressTableRVA,
                              OwningObject);
 }
 
 imported_symbol_iterator
 ImportDirectoryEntryRef::imported_symbol_end() const {
-  return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA,
+  return importedSymbolEnd(ImportTable[Index].ImportAddressTableRVA,
                            OwningObject);
 }
 
@@ -1265,6 +1280,21 @@ ImportDirectoryEntryRef::imported_symbols() const {
   return make_range(imported_symbol_begin(), imported_symbol_end());
 }
 
+imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_begin() const {
+  return importedSymbolBegin(ImportTable[Index].ImportLookupTableRVA,
+                             OwningObject);
+}
+
+imported_symbol_iterator ImportDirectoryEntryRef::lookup_table_end() const {
+  return importedSymbolEnd(ImportTable[Index].ImportLookupTableRVA,
+                           OwningObject);
+}
+
+iterator_range<imported_symbol_iterator>
+ImportDirectoryEntryRef::lookup_table_symbols() const {
+  return make_range(lookup_table_begin(), lookup_table_end());
+}
+
 std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const {
   uintptr_t IntPtr = 0;
   if (std::error_code EC =
diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp
index 2dde18a24280..23682e1fabfd 100644
--- a/contrib/llvm/lib/Object/ELF.cpp
+++ b/contrib/llvm/lib/Object/ELF.cpp
@@ -54,6 +54,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
       break;
     }
     break;
+  case ELF::EM_AVR:
+    switch (Type) {
+#include "llvm/Support/ELFRelocs/AVR.def"
+    default:
+      break;
+    }
+    break;
   case ELF::EM_HEXAGON:
     switch (Type) {
 #include "llvm/Support/ELFRelocs/Hexagon.def"
@@ -82,6 +89,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
       break;
     }
     break;
+  case ELF::EM_RISCV:
+    switch (Type) {
+#include "llvm/Support/ELFRelocs/RISCV.def"
+    default:
+      break;
+    }
+    break;
   case ELF::EM_S390:
     switch (Type) {
 #include "llvm/Support/ELFRelocs/SystemZ.def"
diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp
index 2357526b7894..7d43a84f3e0e 100644
--- a/contrib/llvm/lib/Object/Error.cpp
+++ b/contrib/llvm/lib/Object/Error.cpp
@@ -24,12 +24,12 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class _object_error_category : public std::error_category {
 public:
-  const char* name() const LLVM_NOEXCEPT override;
+  const char* name() const noexcept override;
   std::string message(int ev) const override;
 };
 }
 
-const char *_object_error_category::name() const LLVM_NOEXCEPT {
+const char *_object_error_category::name() const noexcept {
   return "llvm.object";
 }
 
@@ -50,6 +50,8 @@ std::string _object_error_category::message(int EV) const {
     return "Invalid section index";
   case object_error::bitcode_section_not_found:
     return "Bitcode section not found in object file";
+  case object_error::invalid_symbol_index:
+    return "Invalid symbol index";
   }
   llvm_unreachable("An enumerator of object_error does not have a message "
                    "defined.");
@@ -77,18 +79,17 @@ const std::error_category &object::object_category() {
 
 llvm::Error llvm::object::isNotObjectErrorInvalidFileType(llvm::Error Err) {
   if (auto Err2 =
-       handleErrors(std::move(Err),
-         [](std::unique_ptr<ECError> M) {
-           // Try to handle 'M'. If successful, return a success value from
-           // the handler.
-           if (M->convertToErrorCode() == object_error::invalid_file_type)
-             return Error::success();
+          handleErrors(std::move(Err), [](std::unique_ptr<ECError> M) -> Error {
+            // Try to handle 'M'. If successful, return a success value from
+            // the handler.
+            if (M->convertToErrorCode() == object_error::invalid_file_type)
+              return Error::success();
 
-           // We failed to handle 'M' - return it from the handler.
-           // This value will be passed back from catchErrors and
-           // wind up in Err2, where it will be returned from this function.
-           return Error(std::move(M));
-         }))
+            // We failed to handle 'M' - return it from the handler.
+            // This value will be passed back from catchErrors and
+            // wind up in Err2, where it will be returned from this function.
+            return Error(std::move(M));
+          }))
     return Err2;
   return Err;
 }
diff --git a/contrib/llvm/lib/Object/IRObjectFile.cpp b/contrib/llvm/lib/Object/IRObjectFile.cpp
index 42c8ecd62da8..adbf0de6d1bc 100644
--- a/contrib/llvm/lib/Object/IRObjectFile.cpp
+++ b/contrib/llvm/lib/Object/IRObjectFile.cpp
@@ -14,7 +14,7 @@
 #include "llvm/Object/IRObjectFile.h"
 #include "RecordStreamer.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/GVMaterializer.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Mangler.h"
@@ -35,246 +35,52 @@
 using namespace llvm;
 using namespace object;
 
-IRObjectFile::IRObjectFile(MemoryBufferRef Object, std::unique_ptr<Module> Mod)
-    : SymbolicFile(Binary::ID_IR, Object), M(std::move(Mod)) {
-  Mang.reset(new Mangler());
-  CollectAsmUndefinedRefs(Triple(M->getTargetTriple()), M->getModuleInlineAsm(),
-                          [this](StringRef Name, BasicSymbolRef::Flags Flags) {
-                            AsmSymbols.emplace_back(Name, std::move(Flags));
-                          });
+IRObjectFile::IRObjectFile(MemoryBufferRef Object,
+                           std::vector<std::unique_ptr<Module>> Mods)
+    : SymbolicFile(Binary::ID_IR, Object), Mods(std::move(Mods)) {
+  for (auto &M : this->Mods)
+    SymTab.addModule(M.get());
 }
 
-// Parse inline ASM and collect the list of symbols that are not defined in
-// the current module. This is inspired from IRObjectFile.
-void IRObjectFile::CollectAsmUndefinedRefs(
-    const Triple &TT, StringRef InlineAsm,
-    function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmUndefinedRefs) {
-  if (InlineAsm.empty())
-    return;
+IRObjectFile::~IRObjectFile() {}
 
-  std::string Err;
-  const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
-  if (!T)
-    return;
-
-  std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str()));
-  if (!MRI)
-    return;
-
-  std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str()));
-  if (!MAI)
-    return;
-
-  std::unique_ptr<MCSubtargetInfo> STI(
-      T->createMCSubtargetInfo(TT.str(), "", ""));
-  if (!STI)
-    return;
-
-  std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo());
-  if (!MCII)
-    return;
-
-  MCObjectFileInfo MOFI;
-  MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
-  MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx);
-  std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(MCCtx));
-  T->createNullTargetStreamer(*Streamer);
-
-  std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
-  SourceMgr SrcMgr;
-  SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
-  std::unique_ptr<MCAsmParser> Parser(
-      createMCAsmParser(SrcMgr, MCCtx, *Streamer, *MAI));
-
-  MCTargetOptions MCOptions;
-  std::unique_ptr<MCTargetAsmParser> TAP(
-      T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
-  if (!TAP)
-    return;
-
-  Parser->setTargetParser(*TAP);
-  if (Parser->Run(false))
-    return;
-
-  for (auto &KV : *Streamer) {
-    StringRef Key = KV.first();
-    RecordStreamer::State Value = KV.second;
-    uint32_t Res = BasicSymbolRef::SF_None;
-    switch (Value) {
-    case RecordStreamer::NeverSeen:
-      llvm_unreachable("foo");
-    case RecordStreamer::DefinedGlobal:
-      Res |= BasicSymbolRef::SF_Global;
-      break;
-    case RecordStreamer::Defined:
-      break;
-    case RecordStreamer::Global:
-    case RecordStreamer::Used:
-      Res |= BasicSymbolRef::SF_Undefined;
-      Res |= BasicSymbolRef::SF_Global;
-      break;
-    case RecordStreamer::GlobalWeak:
-      Res |= BasicSymbolRef::SF_Weak;
-      Res |= BasicSymbolRef::SF_Global;
-      break;
-    }
-    AsmUndefinedRefs(Key, BasicSymbolRef::Flags(Res));
-  }
-}
-
-IRObjectFile::~IRObjectFile() {
- }
-
-static GlobalValue *getGV(DataRefImpl &Symb) {
-  if ((Symb.p & 3) == 3)
-    return nullptr;
-
-  return reinterpret_cast<GlobalValue*>(Symb.p & ~uintptr_t(3));
-}
-
-static uintptr_t skipEmpty(Module::const_alias_iterator I, const Module &M) {
-  if (I == M.alias_end())
-    return 3;
-  const GlobalValue *GV = &*I;
-  return reinterpret_cast<uintptr_t>(GV) | 2;
-}
-
-static uintptr_t skipEmpty(Module::const_global_iterator I, const Module &M) {
-  if (I == M.global_end())
-    return skipEmpty(M.alias_begin(), M);
-  const GlobalValue *GV = &*I;
-  return reinterpret_cast<uintptr_t>(GV) | 1;
-}
-
-static uintptr_t skipEmpty(Module::const_iterator I, const Module &M) {
-  if (I == M.end())
-    return skipEmpty(M.global_begin(), M);
-  const GlobalValue *GV = &*I;
-  return reinterpret_cast<uintptr_t>(GV) | 0;
-}
-
-static unsigned getAsmSymIndex(DataRefImpl Symb) {
-  assert((Symb.p & uintptr_t(3)) == 3);
-  uintptr_t Index = Symb.p & ~uintptr_t(3);
-  Index >>= 2;
-  return Index;
+static ModuleSymbolTable::Symbol getSym(DataRefImpl &Symb) {
+  return *reinterpret_cast<ModuleSymbolTable::Symbol *>(Symb.p);
 }
 
 void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
-  const GlobalValue *GV = getGV(Symb);
-  uintptr_t Res;
-
-  switch (Symb.p & 3) {
-  case 0: {
-    Module::const_iterator Iter(static_cast<const Function*>(GV));
-    ++Iter;
-    Res = skipEmpty(Iter, *M);
-    break;
-  }
-  case 1: {
-    Module::const_global_iterator Iter(static_cast<const GlobalVariable*>(GV));
-    ++Iter;
-    Res = skipEmpty(Iter, *M);
-    break;
-  }
-  case 2: {
-    Module::const_alias_iterator Iter(static_cast<const GlobalAlias*>(GV));
-    ++Iter;
-    Res = skipEmpty(Iter, *M);
-    break;
-  }
-  case 3: {
-    unsigned Index = getAsmSymIndex(Symb);
-    assert(Index < AsmSymbols.size());
-    ++Index;
-    Res = (Index << 2) | 3;
-    break;
-  }
-  default:
-    llvm_unreachable("unreachable case");
-  }
-
-  Symb.p = Res;
+  Symb.p += sizeof(ModuleSymbolTable::Symbol);
 }
 
 std::error_code IRObjectFile::printSymbolName(raw_ostream &OS,
                                               DataRefImpl Symb) const {
-  const GlobalValue *GV = getGV(Symb);
-  if (!GV) {
-    unsigned Index = getAsmSymIndex(Symb);
-    assert(Index <= AsmSymbols.size());
-    OS << AsmSymbols[Index].first;
-    return std::error_code();
-  }
-
-  if (GV->hasDLLImportStorageClass())
-    OS << "__imp_";
-
-  if (Mang)
-    Mang->getNameWithPrefix(OS, GV, false);
-  else
-    OS << GV->getName();
-
+  SymTab.printSymbolName(OS, getSym(Symb));
   return std::error_code();
 }
 
 uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const {
-  const GlobalValue *GV = getGV(Symb);
-
-  if (!GV) {
-    unsigned Index = getAsmSymIndex(Symb);
-    assert(Index <= AsmSymbols.size());
-    return AsmSymbols[Index].second;
-  }
-
-  uint32_t Res = BasicSymbolRef::SF_None;
-  if (GV->isDeclarationForLinker())
-    Res |= BasicSymbolRef::SF_Undefined;
-  else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
-    Res |= BasicSymbolRef::SF_Hidden;
-  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
-    if (GVar->isConstant())
-      Res |= BasicSymbolRef::SF_Const;
-  }
-  if (GV->hasPrivateLinkage())
-    Res |= BasicSymbolRef::SF_FormatSpecific;
-  if (!GV->hasLocalLinkage())
-    Res |= BasicSymbolRef::SF_Global;
-  if (GV->hasCommonLinkage())
-    Res |= BasicSymbolRef::SF_Common;
-  if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
-      GV->hasExternalWeakLinkage())
-    Res |= BasicSymbolRef::SF_Weak;
-
-  if (GV->getName().startswith("llvm."))
-    Res |= BasicSymbolRef::SF_FormatSpecific;
-  else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
-    if (Var->getSection() == "llvm.metadata")
-      Res |= BasicSymbolRef::SF_FormatSpecific;
-  }
-
-  return Res;
+  return SymTab.getSymbolFlags(getSym(Symb));
 }
 
-GlobalValue *IRObjectFile::getSymbolGV(DataRefImpl Symb) { return getGV(Symb); }
-
-std::unique_ptr<Module> IRObjectFile::takeModule() { return std::move(M); }
-
-basic_symbol_iterator IRObjectFile::symbol_begin_impl() const {
-  Module::const_iterator I = M->begin();
+basic_symbol_iterator IRObjectFile::symbol_begin() const {
   DataRefImpl Ret;
-  Ret.p = skipEmpty(I, *M);
+  Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data());
   return basic_symbol_iterator(BasicSymbolRef(Ret, this));
 }
 
-basic_symbol_iterator IRObjectFile::symbol_end_impl() const {
+basic_symbol_iterator IRObjectFile::symbol_end() const {
   DataRefImpl Ret;
-  uint64_t NumAsm = AsmSymbols.size();
-  NumAsm <<= 2;
-  Ret.p = 3 | NumAsm;
+  Ret.p = reinterpret_cast<uintptr_t>(SymTab.symbols().data() +
+                                      SymTab.symbols().size());
   return basic_symbol_iterator(BasicSymbolRef(Ret, this));
 }
 
+StringRef IRObjectFile::getTargetTriple() const {
+  // Each module must have the same target triple, so we arbitrarily access the
+  // first one.
+  return Mods[0]->getTargetTriple();
+}
+
 ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInObject(const ObjectFile &Obj) {
   for (const SectionRef &Sec : Obj.sections()) {
     if (Sec.isBitcode()) {
@@ -307,22 +113,28 @@ ErrorOr<MemoryBufferRef> IRObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Ob
   }
 }
 
-ErrorOr<std::unique_ptr<IRObjectFile>>
-llvm::object::IRObjectFile::create(MemoryBufferRef Object,
-                                   LLVMContext &Context) {
+Expected<std::unique_ptr<IRObjectFile>>
+IRObjectFile::create(MemoryBufferRef Object, LLVMContext &Context) {
   ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
   if (!BCOrErr)
-    return BCOrErr.getError();
-
-  std::unique_ptr<MemoryBuffer> Buff =
-      MemoryBuffer::getMemBuffer(BCOrErr.get(), false);
-
-  ErrorOr<std::unique_ptr<Module>> MOrErr =
-      getLazyBitcodeModule(std::move(Buff), Context,
-                           /*ShouldLazyLoadMetadata*/ true);
-  if (std::error_code EC = MOrErr.getError())
-    return EC;
+    return errorCodeToError(BCOrErr.getError());
+
+  Expected<std::vector<BitcodeModule>> BMsOrErr =
+      getBitcodeModuleList(*BCOrErr);
+  if (!BMsOrErr)
+    return BMsOrErr.takeError();
+
+  std::vector<std::unique_ptr<Module>> Mods;
+  for (auto BM : *BMsOrErr) {
+    Expected<std::unique_ptr<Module>> MOrErr =
+        BM.getLazyModule(Context, /*ShouldLazyLoadMetadata*/ true,
+                         /*IsImporting*/ false);
+    if (!MOrErr)
+      return MOrErr.takeError();
+
+    Mods.push_back(std::move(*MOrErr));
+  }
 
-  std::unique_ptr<Module> &M = MOrErr.get();
-  return llvm::make_unique<IRObjectFile>(Object, std::move(M));
+  return std::unique_ptr<IRObjectFile>(
+      new IRObjectFile(*BCOrErr, std::move(Mods)));
 }
diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp
index 563236f95a5b..40105000c56c 100644
--- a/contrib/llvm/lib/Object/MachOObjectFile.cpp
+++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp
@@ -27,6 +27,7 @@
 #include <cctype>
 #include <cstring>
 #include <limits>
+#include <list>
 
 using namespace llvm;
 using namespace object;
@@ -47,37 +48,37 @@ malformedError(Twine Msg) {
 
 // FIXME: Replace all uses of this function with getStructOrErr.
 template <typename T>
-static T getStruct(const MachOObjectFile *O, const char *P) {
+static T getStruct(const MachOObjectFile &O, const char *P) {
   // Don't read before the beginning or past the end of the file
-  if (P < O->getData().begin() || P + sizeof(T) > O->getData().end())
+  if (P < O.getData().begin() || P + sizeof(T) > O.getData().end())
     report_fatal_error("Malformed MachO file.");
 
   T Cmd;
   memcpy(&Cmd, P, sizeof(T));
-  if (O->isLittleEndian() != sys::IsLittleEndianHost)
+  if (O.isLittleEndian() != sys::IsLittleEndianHost)
     MachO::swapStruct(Cmd);
   return Cmd;
 }
 
 template <typename T>
-static Expected<T> getStructOrErr(const MachOObjectFile *O, const char *P) {
+static Expected<T> getStructOrErr(const MachOObjectFile &O, const char *P) {
   // Don't read before the beginning or past the end of the file
-  if (P < O->getData().begin() || P + sizeof(T) > O->getData().end())
+  if (P < O.getData().begin() || P + sizeof(T) > O.getData().end())
     return malformedError("Structure read out-of-range");
 
   T Cmd;
   memcpy(&Cmd, P, sizeof(T));
-  if (O->isLittleEndian() != sys::IsLittleEndianHost)
+  if (O.isLittleEndian() != sys::IsLittleEndianHost)
     MachO::swapStruct(Cmd);
   return Cmd;
 }
 
 static const char *
-getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
+getSectionPtr(const MachOObjectFile &O, MachOObjectFile::LoadCommandInfo L,
               unsigned Sec) {
   uintptr_t CommandAddr = reinterpret_cast<uintptr_t>(L.Ptr);
 
-  bool Is64 = O->is64Bit();
+  bool Is64 = O.is64Bit();
   unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) :
                                     sizeof(MachO::segment_command);
   unsigned SectionSize = Is64 ? sizeof(MachO::section_64) :
@@ -87,12 +88,12 @@ getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L,
   return reinterpret_cast<const char*>(SectionAddr);
 }
 
-static const char *getPtr(const MachOObjectFile *O, size_t Offset) {
-  return O->getData().substr(Offset, 1).data();
+static const char *getPtr(const MachOObjectFile &O, size_t Offset) {
+  return O.getData().substr(Offset, 1).data();
 }
 
 static MachO::nlist_base
-getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) {
+getSymbolTableEntryBase(const MachOObjectFile &O, DataRefImpl DRI) {
   const char *P = reinterpret_cast<const char *>(DRI.p);
   return getStruct<MachO::nlist_base>(O, P);
 }
@@ -112,8 +113,8 @@ static void advance(T &it, size_t Val) {
     ++it;
 }
 
-static unsigned getCPUType(const MachOObjectFile *O) {
-  return O->getHeader().cputype;
+static unsigned getCPUType(const MachOObjectFile &O) {
+  return O.getHeader().cputype;
 }
 
 static uint32_t
@@ -126,22 +127,21 @@ getScatteredRelocationAddress(const MachO::any_relocation_info &RE) {
   return RE.r_word0 & 0xffffff;
 }
 
-static bool getPlainRelocationPCRel(const MachOObjectFile *O,
+static bool getPlainRelocationPCRel(const MachOObjectFile &O,
                                     const MachO::any_relocation_info &RE) {
-  if (O->isLittleEndian())
+  if (O.isLittleEndian())
     return (RE.r_word1 >> 24) & 1;
   return (RE.r_word1 >> 7) & 1;
 }
 
 static bool
-getScatteredRelocationPCRel(const MachOObjectFile *O,
-                            const MachO::any_relocation_info &RE) {
+getScatteredRelocationPCRel(const MachO::any_relocation_info &RE) {
   return (RE.r_word0 >> 30) & 1;
 }
 
-static unsigned getPlainRelocationLength(const MachOObjectFile *O,
+static unsigned getPlainRelocationLength(const MachOObjectFile &O,
                                          const MachO::any_relocation_info &RE) {
-  if (O->isLittleEndian())
+  if (O.isLittleEndian())
     return (RE.r_word1 >> 25) & 3;
   return (RE.r_word1 >> 5) & 3;
 }
@@ -151,25 +151,25 @@ getScatteredRelocationLength(const MachO::any_relocation_info &RE) {
   return (RE.r_word0 >> 28) & 3;
 }
 
-static unsigned getPlainRelocationType(const MachOObjectFile *O,
+static unsigned getPlainRelocationType(const MachOObjectFile &O,
                                        const MachO::any_relocation_info &RE) {
-  if (O->isLittleEndian())
+  if (O.isLittleEndian())
     return RE.r_word1 >> 28;
   return RE.r_word1 & 0xf;
 }
 
-static uint32_t getSectionFlags(const MachOObjectFile *O,
+static uint32_t getSectionFlags(const MachOObjectFile &O,
                                 DataRefImpl Sec) {
-  if (O->is64Bit()) {
-    MachO::section_64 Sect = O->getSection64(Sec);
+  if (O.is64Bit()) {
+    MachO::section_64 Sect = O.getSection64(Sec);
     return Sect.flags;
   }
-  MachO::section Sect = O->getSection(Sec);
+  MachO::section Sect = O.getSection(Sec);
   return Sect.flags;
 }
 
 static Expected<MachOObjectFile::LoadCommandInfo>
-getLoadCommandInfo(const MachOObjectFile *Obj, const char *Ptr,
+getLoadCommandInfo(const MachOObjectFile &Obj, const char *Ptr,
                    uint32_t LoadCommandIndex) {
   if (auto CmdOrErr = getStructOrErr<MachO::load_command>(Obj, Ptr)) {
     if (CmdOrErr->cmdsize < 8)
@@ -181,31 +181,31 @@ getLoadCommandInfo(const MachOObjectFile *Obj, const char *Ptr,
 }
 
 static Expected<MachOObjectFile::LoadCommandInfo>
-getFirstLoadCommandInfo(const MachOObjectFile *Obj) {
-  unsigned HeaderSize = Obj->is64Bit() ? sizeof(MachO::mach_header_64)
-                                       : sizeof(MachO::mach_header);
-  if (sizeof(MachOObjectFile::LoadCommandInfo) > Obj->getHeader().sizeofcmds)
+getFirstLoadCommandInfo(const MachOObjectFile &Obj) {
+  unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64)
+                                      : sizeof(MachO::mach_header);
+  if (sizeof(MachO::load_command) > Obj.getHeader().sizeofcmds)
     return malformedError("load command 0 extends past the end all load "
                           "commands in the file");
   return getLoadCommandInfo(Obj, getPtr(Obj, HeaderSize), 0);
 }
 
 static Expected<MachOObjectFile::LoadCommandInfo>
-getNextLoadCommandInfo(const MachOObjectFile *Obj, uint32_t LoadCommandIndex,
+getNextLoadCommandInfo(const MachOObjectFile &Obj, uint32_t LoadCommandIndex,
                        const MachOObjectFile::LoadCommandInfo &L) {
-  unsigned HeaderSize = Obj->is64Bit() ? sizeof(MachO::mach_header_64)
-                                       : sizeof(MachO::mach_header);
-  if (L.Ptr + L.C.cmdsize + sizeof(MachOObjectFile::LoadCommandInfo) >
-      Obj->getData().data() + HeaderSize + Obj->getHeader().sizeofcmds)
+  unsigned HeaderSize = Obj.is64Bit() ? sizeof(MachO::mach_header_64)
+                                      : sizeof(MachO::mach_header);
+  if (L.Ptr + L.C.cmdsize + sizeof(MachO::load_command) >
+      Obj.getData().data() + HeaderSize + Obj.getHeader().sizeofcmds)
     return malformedError("load command " + Twine(LoadCommandIndex + 1) +
                           " extends past the end all load commands in the file");
   return getLoadCommandInfo(Obj, L.Ptr + L.C.cmdsize, LoadCommandIndex + 1);
 }
 
 template <typename T>
-static void parseHeader(const MachOObjectFile *Obj, T &Header,
+static void parseHeader(const MachOObjectFile &Obj, T &Header,
                         Error &Err) {
-  if (sizeof(T) > Obj->getData().size()) {
+  if (sizeof(T) > Obj.getData().size()) {
     Err = malformedError("the mach header extends past the end of the "
                          "file");
     return;
@@ -216,31 +216,160 @@ static void parseHeader(const MachOObjectFile *Obj, T &Header,
     Err = HeaderOrErr.takeError();
 }
 
+// This is used to check for overlapping of Mach-O elements.
+struct MachOElement {
+  uint64_t Offset;
+  uint64_t Size;
+  const char *Name;
+};
+
+static Error checkOverlappingElement(std::list<MachOElement> &Elements,
+                                     uint64_t Offset, uint64_t Size,
+                                     const char *Name) {
+  if (Size == 0)
+    return Error::success();
+
+  for (auto it=Elements.begin() ; it != Elements.end(); ++it) {
+    auto E = *it;
+    if ((Offset >= E.Offset && Offset < E.Offset + E.Size) ||
+        (Offset + Size > E.Offset && Offset + Size < E.Offset + E.Size) ||
+        (Offset <= E.Offset && Offset + Size >= E.Offset + E.Size))
+      return malformedError(Twine(Name) + " at offset " + Twine(Offset) +
+                            " with a size of " + Twine(Size) + ", overlaps " +
+                            E.Name + " at offset " + Twine(E.Offset) + " with "
+                            "a size of " + Twine(E.Size));
+    auto nt = it;
+    nt++;
+    if (nt != Elements.end()) {
+      auto N = *nt;
+      if (Offset + Size <= N.Offset) {
+        Elements.insert(nt, {Offset, Size, Name});
+        return Error::success();
+      }
+    }
+  }
+  Elements.push_back({Offset, Size, Name});
+  return Error::success();
+}
+
 // Parses LC_SEGMENT or LC_SEGMENT_64 load command, adds addresses of all
 // sections to \param Sections, and optionally sets
 // \param IsPageZeroSegment to true.
-template <typename SegmentCmd>
+template <typename Segment, typename Section>
 static Error parseSegmentLoadCommand(
-    const MachOObjectFile *Obj, const MachOObjectFile::LoadCommandInfo &Load,
+    const MachOObjectFile &Obj, const MachOObjectFile::LoadCommandInfo &Load,
     SmallVectorImpl<const char *> &Sections, bool &IsPageZeroSegment,
-    uint32_t LoadCommandIndex, const char *CmdName) {
-  const unsigned SegmentLoadSize = sizeof(SegmentCmd);
+    uint32_t LoadCommandIndex, const char *CmdName, uint64_t SizeOfHeaders,
+    std::list<MachOElement> &Elements) {
+  const unsigned SegmentLoadSize = sizeof(Segment);
   if (Load.C.cmdsize < SegmentLoadSize)
     return malformedError("load command " + Twine(LoadCommandIndex) +
                           " " + CmdName + " cmdsize too small");
-  if (auto SegOrErr = getStructOrErr<SegmentCmd>(Obj, Load.Ptr)) {
-    SegmentCmd S = SegOrErr.get();
-    const unsigned SectionSize =
-      Obj->is64Bit() ? sizeof(MachO::section_64) : sizeof(MachO::section);
+  if (auto SegOrErr = getStructOrErr<Segment>(Obj, Load.Ptr)) {
+    Segment S = SegOrErr.get();
+    const unsigned SectionSize = sizeof(Section);
+    uint64_t FileSize = Obj.getData().size();
     if (S.nsects > std::numeric_limits<uint32_t>::max() / SectionSize ||
         S.nsects * SectionSize > Load.C.cmdsize - SegmentLoadSize)
       return malformedError("load command " + Twine(LoadCommandIndex) +
-                            " inconsistent cmdsize in " + CmdName + 
+                            " inconsistent cmdsize in " + CmdName +
                             " for the number of sections");
     for (unsigned J = 0; J < S.nsects; ++J) {
       const char *Sec = getSectionPtr(Obj, Load, J);
       Sections.push_back(Sec);
+      Section s = getStruct<Section>(Obj, Sec);
+      if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
+          Obj.getHeader().filetype != MachO::MH_DSYM &&
+          s.flags != MachO::S_ZEROFILL &&
+          s.flags != MachO::S_THREAD_LOCAL_ZEROFILL &&
+          s.offset > FileSize)
+        return malformedError("offset field of section " + Twine(J) + " in " +
+                              CmdName + " command " + Twine(LoadCommandIndex) +
+                              " extends past the end of the file");
+      if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
+          Obj.getHeader().filetype != MachO::MH_DSYM &&
+          s.flags != MachO::S_ZEROFILL &&
+          s.flags != MachO::S_THREAD_LOCAL_ZEROFILL && S.fileoff == 0 &&
+          s.offset < SizeOfHeaders && s.size != 0)
+        return malformedError("offset field of section " + Twine(J) + " in " +
+                              CmdName + " command " + Twine(LoadCommandIndex) +
+                              " not past the headers of the file");
+      uint64_t BigSize = s.offset;
+      BigSize += s.size;
+      if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
+          Obj.getHeader().filetype != MachO::MH_DSYM &&
+          s.flags != MachO::S_ZEROFILL &&
+          s.flags != MachO::S_THREAD_LOCAL_ZEROFILL &&
+          BigSize > FileSize)
+        return malformedError("offset field plus size field of section " +
+                              Twine(J) + " in " + CmdName + " command " +
+                              Twine(LoadCommandIndex) +
+                              " extends past the end of the file");
+      if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
+          Obj.getHeader().filetype != MachO::MH_DSYM &&
+          s.flags != MachO::S_ZEROFILL &&
+          s.flags != MachO::S_THREAD_LOCAL_ZEROFILL &&
+          s.size > S.filesize)
+        return malformedError("size field of section " +
+                              Twine(J) + " in " + CmdName + " command " +
+                              Twine(LoadCommandIndex) +
+                              " greater than the segment");
+      if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
+          Obj.getHeader().filetype != MachO::MH_DSYM && s.size != 0 &&
+          s.addr < S.vmaddr)
+        return malformedError("addr field of section " + Twine(J) + " in " +
+                              CmdName + " command " + Twine(LoadCommandIndex) +
+                              " less than the segment's vmaddr");
+      BigSize = s.addr;
+      BigSize += s.size;
+      uint64_t BigEnd = S.vmaddr;
+      BigEnd += S.vmsize;
+      if (S.vmsize != 0 && s.size != 0 && BigSize > BigEnd)
+        return malformedError("addr field plus size of section " + Twine(J) +
+                              " in " + CmdName + " command " +
+                              Twine(LoadCommandIndex) +
+                              " greater than than "
+                              "the segment's vmaddr plus vmsize");
+      if (Obj.getHeader().filetype != MachO::MH_DYLIB_STUB &&
+          Obj.getHeader().filetype != MachO::MH_DSYM &&
+          s.flags != MachO::S_ZEROFILL &&
+          s.flags != MachO::S_THREAD_LOCAL_ZEROFILL)
+        if (Error Err = checkOverlappingElement(Elements, s.offset, s.size,
+                                                "section contents"))
+          return Err;
+      if (s.reloff > FileSize)
+        return malformedError("reloff field of section " + Twine(J) + " in " +
+                              CmdName + " command " + Twine(LoadCommandIndex) +
+                              " extends past the end of the file");
+      BigSize = s.nreloc;
+      BigSize *= sizeof(struct MachO::relocation_info);
+      BigSize += s.reloff;
+      if (BigSize > FileSize)
+        return malformedError("reloff field plus nreloc field times sizeof("
+                              "struct relocation_info) of section " +
+                              Twine(J) + " in " + CmdName + " command " +
+                              Twine(LoadCommandIndex) +
+                              " extends past the end of the file");
+      if (Error Err = checkOverlappingElement(Elements, s.reloff, s.nreloc *
+                                              sizeof(struct
+                                              MachO::relocation_info),
+                                              "section relocation entries"))
+        return Err;
     }
+    if (S.fileoff > FileSize)
+      return malformedError("load command " + Twine(LoadCommandIndex) +
+                            " fileoff field in " + CmdName +
+                            " extends past the end of the file");
+    uint64_t BigSize = S.fileoff;
+    BigSize += S.filesize;
+    if (BigSize > FileSize)
+      return malformedError("load command " + Twine(LoadCommandIndex) +
+                            " fileoff field plus filesize field in " +
+                            CmdName + " extends past the end of the file");
+    if (S.vmsize != 0 && S.filesize > S.vmsize)
+      return malformedError("load command " + Twine(LoadCommandIndex) +
+                            " fileoff field in " + CmdName +
+                            " greater than vmsize field");
     IsPageZeroSegment |= StringRef("__PAGEZERO").equals(S.segname);
   } else
     return SegOrErr.takeError();
@@ -248,54 +377,771 @@ static Error parseSegmentLoadCommand(
   return Error::success();
 }
 
+static Error checkSymtabCommand(const MachOObjectFile &Obj,
+                                const MachOObjectFile::LoadCommandInfo &Load,
+                                uint32_t LoadCommandIndex,
+                                const char **SymtabLoadCmd,
+                                std::list<MachOElement> &Elements) {
+  if (Load.C.cmdsize < sizeof(MachO::symtab_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_SYMTAB cmdsize too small");
+  if (*SymtabLoadCmd != nullptr)
+    return malformedError("more than one LC_SYMTAB command");
+  MachO::symtab_command Symtab =
+    getStruct<MachO::symtab_command>(Obj, Load.Ptr);
+  if (Symtab.cmdsize != sizeof(MachO::symtab_command))
+    return malformedError("LC_SYMTAB command " + Twine(LoadCommandIndex) +
+                          " has incorrect cmdsize");
+  uint64_t FileSize = Obj.getData().size();
+  if (Symtab.symoff > FileSize)
+    return malformedError("symoff field of LC_SYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end "
+                          "of the file");
+  uint64_t SymtabSize = Symtab.nsyms;
+  const char *struct_nlist_name;
+  if (Obj.is64Bit()) {
+    SymtabSize *= sizeof(MachO::nlist_64);
+    struct_nlist_name = "struct nlist_64";
+  } else {
+    SymtabSize *= sizeof(MachO::nlist);
+    struct_nlist_name = "struct nlist";
+  }
+  uint64_t BigSize = SymtabSize;
+  BigSize += Symtab.symoff;
+  if (BigSize > FileSize)
+    return malformedError("symoff field plus nsyms field times sizeof(" +
+                          Twine(struct_nlist_name) + ") of LC_SYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end "
+                          "of the file");
+  if (Error Err = checkOverlappingElement(Elements, Symtab.symoff, SymtabSize,
+                                          "symbol table"))
+    return Err;
+  if (Symtab.stroff > FileSize)
+    return malformedError("stroff field of LC_SYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end "
+                          "of the file");
+  BigSize = Symtab.stroff;
+  BigSize += Symtab.strsize;
+  if (BigSize > FileSize)
+    return malformedError("stroff field plus strsize field of LC_SYMTAB "
+                          "command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  if (Error Err = checkOverlappingElement(Elements, Symtab.stroff,
+                                          Symtab.strsize, "string table"))
+    return Err;
+  *SymtabLoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkDysymtabCommand(const MachOObjectFile &Obj,
+                                  const MachOObjectFile::LoadCommandInfo &Load,
+                                  uint32_t LoadCommandIndex,
+                                  const char **DysymtabLoadCmd,
+                                  std::list<MachOElement> &Elements) {
+  if (Load.C.cmdsize < sizeof(MachO::dysymtab_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_DYSYMTAB cmdsize too small");
+  if (*DysymtabLoadCmd != nullptr)
+    return malformedError("more than one LC_DYSYMTAB command");
+  MachO::dysymtab_command Dysymtab =
+    getStruct<MachO::dysymtab_command>(Obj, Load.Ptr);
+  if (Dysymtab.cmdsize != sizeof(MachO::dysymtab_command))
+    return malformedError("LC_DYSYMTAB command " + Twine(LoadCommandIndex) +
+                          " has incorrect cmdsize");
+  uint64_t FileSize = Obj.getData().size();
+  if (Dysymtab.tocoff > FileSize)
+    return malformedError("tocoff field of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  uint64_t BigSize = Dysymtab.ntoc;
+  BigSize *= sizeof(MachO::dylib_table_of_contents);
+  BigSize += Dysymtab.tocoff;
+  if (BigSize > FileSize)
+    return malformedError("tocoff field plus ntoc field times sizeof(struct "
+                          "dylib_table_of_contents) of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, Dysymtab.tocoff,
+                                          Dysymtab.ntoc * sizeof(struct
+					  MachO::dylib_table_of_contents),
+					  "table of contents"))
+    return Err;
+  if (Dysymtab.modtaboff > FileSize)
+    return malformedError("modtaboff field of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  BigSize = Dysymtab.nmodtab;
+  const char *struct_dylib_module_name;
+  uint64_t sizeof_modtab;
+  if (Obj.is64Bit()) {
+    sizeof_modtab = sizeof(MachO::dylib_module_64);
+    struct_dylib_module_name = "struct dylib_module_64";
+  } else {
+    sizeof_modtab = sizeof(MachO::dylib_module);
+    struct_dylib_module_name = "struct dylib_module";
+  }
+  BigSize *= sizeof_modtab;
+  BigSize += Dysymtab.modtaboff;
+  if (BigSize > FileSize)
+    return malformedError("modtaboff field plus nmodtab field times sizeof(" +
+                          Twine(struct_dylib_module_name) + ") of LC_DYSYMTAB "
+                          "command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  if (Error Err = checkOverlappingElement(Elements, Dysymtab.modtaboff,
+                                          Dysymtab.nmodtab * sizeof_modtab,
+					  "module table"))
+    return Err;
+  if (Dysymtab.extrefsymoff > FileSize)
+    return malformedError("extrefsymoff field of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  BigSize = Dysymtab.nextrefsyms;
+  BigSize *= sizeof(MachO::dylib_reference);
+  BigSize += Dysymtab.extrefsymoff;
+  if (BigSize > FileSize)
+    return malformedError("extrefsymoff field plus nextrefsyms field times "
+                          "sizeof(struct dylib_reference) of LC_DYSYMTAB "
+                          "command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  if (Error Err = checkOverlappingElement(Elements, Dysymtab.extrefsymoff,
+                                          Dysymtab.nextrefsyms *
+                                          sizeof(MachO::dylib_reference),
+					  "reference table"))
+    return Err;
+  if (Dysymtab.indirectsymoff > FileSize)
+    return malformedError("indirectsymoff field of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  BigSize = Dysymtab.nindirectsyms;
+  BigSize *= sizeof(uint32_t);
+  BigSize += Dysymtab.indirectsymoff;
+  if (BigSize > FileSize)
+    return malformedError("indirectsymoff field plus nindirectsyms field times "
+                          "sizeof(uint32_t) of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, Dysymtab.indirectsymoff,
+                                          Dysymtab.nindirectsyms *
+                                          sizeof(uint32_t),
+					  "indirect table"))
+    return Err;
+  if (Dysymtab.extreloff > FileSize)
+    return malformedError("extreloff field of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  BigSize = Dysymtab.nextrel;
+  BigSize *= sizeof(MachO::relocation_info);
+  BigSize += Dysymtab.extreloff;
+  if (BigSize > FileSize)
+    return malformedError("extreloff field plus nextrel field times sizeof"
+                          "(struct relocation_info) of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, Dysymtab.extreloff,
+                                          Dysymtab.nextrel *
+                                          sizeof(MachO::relocation_info),
+					  "external relocation table"))
+    return Err;
+  if (Dysymtab.locreloff > FileSize)
+    return malformedError("locreloff field of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  BigSize = Dysymtab.nlocrel;
+  BigSize *= sizeof(MachO::relocation_info);
+  BigSize += Dysymtab.locreloff;
+  if (BigSize > FileSize)
+    return malformedError("locreloff field plus nlocrel field times sizeof"
+                          "(struct relocation_info) of LC_DYSYMTAB command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, Dysymtab.locreloff,
+                                          Dysymtab.nlocrel *
+                                          sizeof(MachO::relocation_info),
+					  "local relocation table"))
+    return Err;
+  *DysymtabLoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkLinkeditDataCommand(const MachOObjectFile &Obj,
+                                 const MachOObjectFile::LoadCommandInfo &Load,
+                                 uint32_t LoadCommandIndex,
+                                 const char **LoadCmd, const char *CmdName,
+                                 std::list<MachOElement> &Elements,
+                                 const char *ElementName) {
+  if (Load.C.cmdsize < sizeof(MachO::linkedit_data_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " cmdsize too small");
+  if (*LoadCmd != nullptr)
+    return malformedError("more than one " + Twine(CmdName) + " command");
+  MachO::linkedit_data_command LinkData =
+    getStruct<MachO::linkedit_data_command>(Obj, Load.Ptr);
+  if (LinkData.cmdsize != sizeof(MachO::linkedit_data_command))
+    return malformedError(Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " has incorrect cmdsize");
+  uint64_t FileSize = Obj.getData().size();
+  if (LinkData.dataoff > FileSize)
+    return malformedError("dataoff field of " + Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  uint64_t BigSize = LinkData.dataoff;
+  BigSize += LinkData.datasize;
+  if (BigSize > FileSize)
+    return malformedError("dataoff field plus datasize field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, LinkData.dataoff,
+                                          LinkData.datasize, ElementName))
+    return Err;
+  *LoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkDyldInfoCommand(const MachOObjectFile &Obj,
+                                  const MachOObjectFile::LoadCommandInfo &Load,
+                                  uint32_t LoadCommandIndex,
+                                  const char **LoadCmd, const char *CmdName,
+                                  std::list<MachOElement> &Elements) {
+  if (Load.C.cmdsize < sizeof(MachO::dyld_info_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " cmdsize too small");
+  if (*LoadCmd != nullptr)
+    return malformedError("more than one LC_DYLD_INFO and or LC_DYLD_INFO_ONLY "
+                          "command");
+  MachO::dyld_info_command DyldInfo =
+    getStruct<MachO::dyld_info_command>(Obj, Load.Ptr);
+  if (DyldInfo.cmdsize != sizeof(MachO::dyld_info_command))
+    return malformedError(Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " has incorrect cmdsize");
+  uint64_t FileSize = Obj.getData().size();
+  if (DyldInfo.rebase_off > FileSize)
+    return malformedError("rebase_off field of " + Twine(CmdName) +
+                          " command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  uint64_t BigSize = DyldInfo.rebase_off;
+  BigSize += DyldInfo.rebase_size;
+  if (BigSize > FileSize)
+    return malformedError("rebase_off field plus rebase_size field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, DyldInfo.rebase_off,
+                                          DyldInfo.rebase_size,
+                                          "dyld rebase info"))
+    return Err;
+  if (DyldInfo.bind_off > FileSize)
+    return malformedError("bind_off field of " + Twine(CmdName) +
+                          " command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  BigSize = DyldInfo.bind_off;
+  BigSize += DyldInfo.bind_size;
+  if (BigSize > FileSize)
+    return malformedError("bind_off field plus bind_size field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, DyldInfo.bind_off,
+                                          DyldInfo.bind_size,
+                                          "dyld bind info"))
+    return Err;
+  if (DyldInfo.weak_bind_off > FileSize)
+    return malformedError("weak_bind_off field of " + Twine(CmdName) +
+                          " command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  BigSize = DyldInfo.weak_bind_off;
+  BigSize += DyldInfo.weak_bind_size;
+  if (BigSize > FileSize)
+    return malformedError("weak_bind_off field plus weak_bind_size field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, DyldInfo.weak_bind_off,
+                                          DyldInfo.weak_bind_size,
+                                          "dyld weak bind info"))
+    return Err;
+  if (DyldInfo.lazy_bind_off > FileSize)
+    return malformedError("lazy_bind_off field of " + Twine(CmdName) +
+                          " command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  BigSize = DyldInfo.lazy_bind_off;
+  BigSize += DyldInfo.lazy_bind_size;
+  if (BigSize > FileSize)
+    return malformedError("lazy_bind_off field plus lazy_bind_size field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, DyldInfo.lazy_bind_off,
+                                          DyldInfo.lazy_bind_size,
+                                          "dyld lazy bind info"))
+    return Err;
+  if (DyldInfo.export_off > FileSize)
+    return malformedError("export_off field of " + Twine(CmdName) +
+                          " command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  BigSize = DyldInfo.export_off;
+  BigSize += DyldInfo.export_size;
+  if (BigSize > FileSize)
+    return malformedError("export_off field plus export_size field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, DyldInfo.export_off,
+                                          DyldInfo.export_size,
+                                          "dyld export info"))
+    return Err;
+  *LoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkDylibCommand(const MachOObjectFile &Obj,
+                               const MachOObjectFile::LoadCommandInfo &Load,
+                               uint32_t LoadCommandIndex, const char *CmdName) {
+  if (Load.C.cmdsize < sizeof(MachO::dylib_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " cmdsize too small");
+  MachO::dylib_command D = getStruct<MachO::dylib_command>(Obj, Load.Ptr);
+  if (D.dylib.name < sizeof(MachO::dylib_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " name.offset field too small, not past "
+                          "the end of the dylib_command struct");
+  if (D.dylib.name >= D.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " name.offset field extends past the end "
+                          "of the load command");
+  // Make sure there is a null between the starting offset of the name and
+  // the end of the load command.
+  uint32_t i;
+  const char *P = (const char *)Load.Ptr;
+  for (i = D.dylib.name; i < D.cmdsize; i++)
+    if (P[i] == '\0')
+      break;
+  if (i >= D.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " library name extends past the end of the "
+                          "load command");
+  return Error::success();
+}
+
+static Error checkDylibIdCommand(const MachOObjectFile &Obj,
+                                 const MachOObjectFile::LoadCommandInfo &Load,
+                                 uint32_t LoadCommandIndex,
+                                 const char **LoadCmd) {
+  if (Error Err = checkDylibCommand(Obj, Load, LoadCommandIndex,
+                                     "LC_ID_DYLIB"))
+    return Err;
+  if (*LoadCmd != nullptr)
+    return malformedError("more than one LC_ID_DYLIB command");
+  if (Obj.getHeader().filetype != MachO::MH_DYLIB &&
+      Obj.getHeader().filetype != MachO::MH_DYLIB_STUB)
+    return malformedError("LC_ID_DYLIB load command in non-dynamic library "
+                          "file type");
+  *LoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkDyldCommand(const MachOObjectFile &Obj,
+                              const MachOObjectFile::LoadCommandInfo &Load,
+                              uint32_t LoadCommandIndex, const char *CmdName) {
+  if (Load.C.cmdsize < sizeof(MachO::dylinker_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " cmdsize too small");
+  MachO::dylinker_command D = getStruct<MachO::dylinker_command>(Obj, Load.Ptr);
+  if (D.name < sizeof(MachO::dylinker_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " name.offset field too small, not past "
+                          "the end of the dylinker_command struct");
+  if (D.name >= D.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " name.offset field extends past the end "
+                          "of the load command");
+  // Make sure there is a null between the starting offset of the name and
+  // the end of the load command.
+  uint32_t i;
+  const char *P = (const char *)Load.Ptr;
+  for (i = D.name; i < D.cmdsize; i++)
+    if (P[i] == '\0')
+      break;
+  if (i >= D.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " dyld name extends past the end of the "
+                          "load command");
+  return Error::success();
+}
+
+static Error checkVersCommand(const MachOObjectFile &Obj,
+                              const MachOObjectFile::LoadCommandInfo &Load,
+                              uint32_t LoadCommandIndex,
+                              const char **LoadCmd, const char *CmdName) {
+  if (Load.C.cmdsize != sizeof(MachO::version_min_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " has incorrect cmdsize");
+  if (*LoadCmd != nullptr)
+    return malformedError("more than one LC_VERSION_MIN_MACOSX, "
+                          "LC_VERSION_MIN_IPHONEOS, LC_VERSION_MIN_TVOS or "
+                          "LC_VERSION_MIN_WATCHOS command");
+  *LoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkRpathCommand(const MachOObjectFile &Obj,
+                               const MachOObjectFile::LoadCommandInfo &Load,
+                               uint32_t LoadCommandIndex) {
+  if (Load.C.cmdsize < sizeof(MachO::rpath_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_RPATH cmdsize too small");
+  MachO::rpath_command R = getStruct<MachO::rpath_command>(Obj, Load.Ptr);
+  if (R.path < sizeof(MachO::rpath_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_RPATH path.offset field too small, not past "
+                          "the end of the rpath_command struct");
+  if (R.path >= R.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_RPATH path.offset field extends past the end "
+                          "of the load command");
+  // Make sure there is a null between the starting offset of the path and
+  // the end of the load command.
+  uint32_t i;
+  const char *P = (const char *)Load.Ptr;
+  for (i = R.path; i < R.cmdsize; i++)
+    if (P[i] == '\0')
+      break;
+  if (i >= R.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_RPATH library name extends past the end of the "
+                          "load command");
+  return Error::success();
+}
+
+static Error checkEncryptCommand(const MachOObjectFile &Obj,
+                                 const MachOObjectFile::LoadCommandInfo &Load,
+                                 uint32_t LoadCommandIndex,
+                                 uint64_t cryptoff, uint64_t cryptsize,
+                                 const char **LoadCmd, const char *CmdName) {
+  if (*LoadCmd != nullptr)
+    return malformedError("more than one LC_ENCRYPTION_INFO and or "
+                          "LC_ENCRYPTION_INFO_64 command");
+  uint64_t FileSize = Obj.getData().size();
+  if (cryptoff > FileSize)
+    return malformedError("cryptoff field of " + Twine(CmdName) +
+                          " command " + Twine(LoadCommandIndex) + " extends "
+                          "past the end of the file");
+  uint64_t BigSize = cryptoff;
+  BigSize += cryptsize;
+  if (BigSize > FileSize)
+    return malformedError("cryptoff field plus cryptsize field of " +
+                          Twine(CmdName) + " command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  *LoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+static Error checkLinkerOptCommand(const MachOObjectFile &Obj,
+                                   const MachOObjectFile::LoadCommandInfo &Load,
+                                   uint32_t LoadCommandIndex) {
+  if (Load.C.cmdsize < sizeof(MachO::linker_option_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_LINKER_OPTION cmdsize too small");
+  MachO::linker_option_command L =
+    getStruct<MachO::linker_option_command>(Obj, Load.Ptr);
+  // Make sure the count of strings is correct.
+  const char *string = (const char *)Load.Ptr +
+                       sizeof(struct MachO::linker_option_command);
+  uint32_t left = L.cmdsize - sizeof(struct MachO::linker_option_command);
+  uint32_t i = 0;
+  while (left > 0) {
+    while (*string == '\0' && left > 0) {
+      string++;
+      left--;
+    }
+    if (left > 0) {
+      i++;
+      uint32_t NullPos = StringRef(string, left).find('\0');
+      uint32_t len = std::min(NullPos, left) + 1;
+      string += len;
+      left -= len;
+    }
+  }
+  if (L.count != i)
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_LINKER_OPTION string count " + Twine(L.count) +
+                          " does not match number of strings");
+  return Error::success();
+}
+
+static Error checkSubCommand(const MachOObjectFile &Obj,
+                             const MachOObjectFile::LoadCommandInfo &Load,
+                             uint32_t LoadCommandIndex, const char *CmdName,
+                             size_t SizeOfCmd, const char *CmdStructName,
+                             uint32_t PathOffset, const char *PathFieldName) {
+  if (PathOffset < SizeOfCmd)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " " + PathFieldName + ".offset field too "
+                          "small, not past the end of the " + CmdStructName);
+  if (PathOffset >= Load.C.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " " + PathFieldName + ".offset field "
+                          "extends past the end of the load command");
+  // Make sure there is a null between the starting offset of the path and
+  // the end of the load command.
+  uint32_t i;
+  const char *P = (const char *)Load.Ptr;
+  for (i = PathOffset; i < Load.C.cmdsize; i++)
+    if (P[i] == '\0')
+      break;
+  if (i >= Load.C.cmdsize)
+    return malformedError("load command " + Twine(LoadCommandIndex) + " " +
+                          CmdName + " " + PathFieldName + " name extends past "
+                          "the end of the load command");
+  return Error::success();
+}
+
+static Error checkThreadCommand(const MachOObjectFile &Obj,
+                                const MachOObjectFile::LoadCommandInfo &Load,
+                                uint32_t LoadCommandIndex,
+                                const char *CmdName) {
+  if (Load.C.cmdsize < sizeof(MachO::thread_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          CmdName + " cmdsize too small");
+  MachO::thread_command T =
+    getStruct<MachO::thread_command>(Obj, Load.Ptr);
+  const char *state = Load.Ptr + sizeof(MachO::thread_command);
+  const char *end = Load.Ptr + T.cmdsize;
+  uint32_t nflavor = 0;
+  uint32_t cputype = getCPUType(Obj);
+  while (state < end) {
+    if(state + sizeof(uint32_t) > end)
+      return malformedError("load command " + Twine(LoadCommandIndex) +
+                            "flavor in " + CmdName + " extends past end of "
+                            "command");
+    uint32_t flavor;
+    memcpy(&flavor, state, sizeof(uint32_t));
+    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(flavor);
+    state += sizeof(uint32_t);
+
+    if(state + sizeof(uint32_t) > end)
+      return malformedError("load command " + Twine(LoadCommandIndex) +
+                            " count in " + CmdName + " extends past end of "
+                            "command");
+    uint32_t count;
+    memcpy(&count, state, sizeof(uint32_t));
+    if (Obj.isLittleEndian() != sys::IsLittleEndianHost)
+      sys::swapByteOrder(count);
+    state += sizeof(uint32_t);
+
+    if (cputype == MachO::CPU_TYPE_X86_64) {
+      if (flavor == MachO::x86_THREAD_STATE64) {
+        if (count != MachO::x86_THREAD_STATE64_COUNT)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " count not x86_THREAD_STATE64_COUNT for "
+                                "flavor number " + Twine(nflavor) + " which is "
+                                "a x86_THREAD_STATE64 flavor in " + CmdName +
+                                " command");
+        if (state + sizeof(MachO::x86_thread_state64_t) > end)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " x86_THREAD_STATE64 extends past end of "
+                                "command in " + CmdName + " command");
+        state += sizeof(MachO::x86_thread_state64_t);
+      } else {
+        return malformedError("load command " + Twine(LoadCommandIndex) +
+                              " unknown flavor (" + Twine(flavor) + ") for "
+                              "flavor number " + Twine(nflavor) + " in " +
+                              CmdName + " command");
+      }
+    } else if (cputype == MachO::CPU_TYPE_ARM) {
+      if (flavor == MachO::ARM_THREAD_STATE) {
+        if (count != MachO::ARM_THREAD_STATE_COUNT)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " count not ARM_THREAD_STATE_COUNT for "
+                                "flavor number " + Twine(nflavor) + " which is "
+                                "a ARM_THREAD_STATE flavor in " + CmdName +
+                                " command");
+        if (state + sizeof(MachO::arm_thread_state32_t) > end)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " ARM_THREAD_STATE extends past end of "
+                                "command in " + CmdName + " command");
+        state += sizeof(MachO::arm_thread_state32_t);
+      } else {
+        return malformedError("load command " + Twine(LoadCommandIndex) +
+                              " unknown flavor (" + Twine(flavor) + ") for "
+                              "flavor number " + Twine(nflavor) + " in " +
+                              CmdName + " command");
+      }
+    } else if (cputype == MachO::CPU_TYPE_ARM64) {
+      if (flavor == MachO::ARM_THREAD_STATE64) {
+        if (count != MachO::ARM_THREAD_STATE64_COUNT)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " count not ARM_THREAD_STATE64_COUNT for "
+                                "flavor number " + Twine(nflavor) + " which is "
+                                "a ARM_THREAD_STATE64 flavor in " + CmdName +
+                                " command");
+        if (state + sizeof(MachO::arm_thread_state64_t) > end)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " ARM_THREAD_STATE64 extends past end of "
+                                "command in " + CmdName + " command");
+        state += sizeof(MachO::arm_thread_state64_t);
+      } else {
+        return malformedError("load command " + Twine(LoadCommandIndex) +
+                              " unknown flavor (" + Twine(flavor) + ") for "
+                              "flavor number " + Twine(nflavor) + " in " +
+                              CmdName + " command");
+      }
+    } else if (cputype == MachO::CPU_TYPE_POWERPC) {
+      if (flavor == MachO::PPC_THREAD_STATE) {
+        if (count != MachO::PPC_THREAD_STATE_COUNT)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " count not PPC_THREAD_STATE_COUNT for "
+                                "flavor number " + Twine(nflavor) + " which is "
+                                "a PPC_THREAD_STATE flavor in " + CmdName +
+                                " command");
+        if (state + sizeof(MachO::ppc_thread_state32_t) > end)
+          return malformedError("load command " + Twine(LoadCommandIndex) +
+                                " PPC_THREAD_STATE extends past end of "
+                                "command in " + CmdName + " command");
+        state += sizeof(MachO::ppc_thread_state32_t);
+      } else {
+        return malformedError("load command " + Twine(LoadCommandIndex) +
+                              " unknown flavor (" + Twine(flavor) + ") for "
+                              "flavor number " + Twine(nflavor) + " in " +
+                              CmdName + " command");
+      }
+    } else {
+      return malformedError("unknown cputype (" + Twine(cputype) + ") load "
+                            "command " + Twine(LoadCommandIndex) + " for " +
+                            CmdName + " command can't be checked");
+    }
+    nflavor++;
+  }
+  return Error::success();
+}
+
+static Error checkTwoLevelHintsCommand(const MachOObjectFile &Obj,
+                                       const MachOObjectFile::LoadCommandInfo
+                                         &Load,
+                                       uint32_t LoadCommandIndex,
+                                       const char **LoadCmd,
+                                       std::list<MachOElement> &Elements) {
+  if (Load.C.cmdsize != sizeof(MachO::twolevel_hints_command))
+    return malformedError("load command " + Twine(LoadCommandIndex) +
+                          " LC_TWOLEVEL_HINTS has incorrect cmdsize");
+  if (*LoadCmd != nullptr)
+    return malformedError("more than one LC_TWOLEVEL_HINTS command");
+  MachO::twolevel_hints_command Hints =
+    getStruct<MachO::twolevel_hints_command>(Obj, Load.Ptr);
+  uint64_t FileSize = Obj.getData().size();
+  if (Hints.offset > FileSize)
+    return malformedError("offset field of LC_TWOLEVEL_HINTS command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  uint64_t BigSize = Hints.nhints;
+  BigSize *= Hints.nhints * sizeof(MachO::twolevel_hint);
+  BigSize += Hints.offset;
+  if (BigSize > FileSize)
+    return malformedError("offset field plus nhints times sizeof(struct "
+                          "twolevel_hint) field of LC_TWOLEVEL_HINTS command " +
+                          Twine(LoadCommandIndex) + " extends past the end of "
+                          "the file");
+  if (Error Err = checkOverlappingElement(Elements, Hints.offset, Hints.nhints *
+                                          sizeof(MachO::twolevel_hint),
+                                          "two level hints"))
+    return Err;
+  *LoadCmd = Load.Ptr;
+  return Error::success();
+}
+
+// Returns true if the libObject code does not support the load command and its
+// contents.  The cmd value it is treated as an unknown load command but with
+// an error message that says the cmd value is obsolete.
+static bool isLoadCommandObsolete(uint32_t cmd) {
+  if (cmd == MachO::LC_SYMSEG ||
+      cmd == MachO::LC_LOADFVMLIB ||
+      cmd == MachO::LC_IDFVMLIB ||
+      cmd == MachO::LC_IDENT ||
+      cmd == MachO::LC_FVMFILE ||
+      cmd == MachO::LC_PREPAGE ||
+      cmd == MachO::LC_PREBOUND_DYLIB ||
+      cmd == MachO::LC_TWOLEVEL_HINTS ||
+      cmd == MachO::LC_PREBIND_CKSUM)
+    return true;
+  return false;
+}
+
 Expected<std::unique_ptr<MachOObjectFile>>
 MachOObjectFile::create(MemoryBufferRef Object, bool IsLittleEndian,
-                        bool Is64Bits) {
-  Error Err;
+                        bool Is64Bits, uint32_t UniversalCputype,
+                        uint32_t UniversalIndex) {
+  Error Err = Error::success();
   std::unique_ptr<MachOObjectFile> Obj(
       new MachOObjectFile(std::move(Object), IsLittleEndian,
-                           Is64Bits, Err));
+                          Is64Bits, Err, UniversalCputype,
+                          UniversalIndex));
   if (Err)
     return std::move(Err);
   return std::move(Obj);
 }
 
 MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
-                                 bool Is64bits, Error &Err)
+                                 bool Is64bits, Error &Err,
+                                 uint32_t UniversalCputype,
+                                 uint32_t UniversalIndex)
     : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object),
       SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr),
       DataInCodeLoadCmd(nullptr), LinkOptHintsLoadCmd(nullptr),
       DyldInfoLoadCmd(nullptr), UuidLoadCmd(nullptr),
       HasPageZeroSegment(false) {
-  ErrorAsOutParameter ErrAsOutParam(Err);
-  uint64_t BigSize;
+  ErrorAsOutParameter ErrAsOutParam(&Err);
+  uint64_t SizeOfHeaders;
+  uint32_t cputype;
   if (is64Bit()) {
-    parseHeader(this, Header64, Err);
-    BigSize = sizeof(MachO::mach_header_64);
+    parseHeader(*this, Header64, Err);
+    SizeOfHeaders = sizeof(MachO::mach_header_64);
+    cputype = Header64.cputype;
   } else {
-    parseHeader(this, Header, Err);
-    BigSize = sizeof(MachO::mach_header);
+    parseHeader(*this, Header, Err);
+    SizeOfHeaders = sizeof(MachO::mach_header);
+    cputype = Header.cputype;
   }
   if (Err)
     return;
-  BigSize += getHeader().sizeofcmds;
-  if (getData().data() + BigSize > getData().end()) {
+  SizeOfHeaders += getHeader().sizeofcmds;
+  if (getData().data() + SizeOfHeaders > getData().end()) {
     Err = malformedError("load commands extend past the end of the file");
     return;
   }
-
-  uint32_t LoadCommandCount = getHeader().ncmds;
-  if (LoadCommandCount == 0)
+  if (UniversalCputype != 0 && cputype != UniversalCputype) {
+    Err = malformedError("universal header architecture: " +
+                         Twine(UniversalIndex) + "'s cputype does not match "
+                         "object file's mach header");
     return;
+  }
+  std::list<MachOElement> Elements;
+  Elements.push_back({0, SizeOfHeaders, "Mach-O headers"});
 
+  uint32_t LoadCommandCount = getHeader().ncmds;
   LoadCommandInfo Load;
-  if (auto LoadOrErr = getFirstLoadCommandInfo(this))
-    Load = *LoadOrErr;
-  else {
-    Err = LoadOrErr.takeError();
-    return;
+  if (LoadCommandCount != 0) {
+    if (auto LoadOrErr = getFirstLoadCommandInfo(*this))
+      Load = *LoadOrErr;
+    else {
+      Err = LoadOrErr.takeError();
+      return;
+    }
   }
 
+  const char *DyldIdLoadCmd = nullptr;
+  const char *FuncStartsLoadCmd = nullptr;
+  const char *SplitInfoLoadCmd = nullptr;
+  const char *CodeSignDrsLoadCmd = nullptr;
+  const char *CodeSignLoadCmd = nullptr;
+  const char *VersLoadCmd = nullptr;
+  const char *SourceLoadCmd = nullptr;
+  const char *EntryPointLoadCmd = nullptr;
+  const char *EncryptLoadCmd = nullptr;
+  const char *RoutinesLoadCmd = nullptr;
+  const char *UnixThreadLoadCmd = nullptr;
+  const char *TwoLevelHintsLoadCmd = nullptr;
   for (unsigned I = 0; I < LoadCommandCount; ++I) {
     if (is64Bit()) {
       if (Load.C.cmdsize % 8 != 0) {
@@ -318,66 +1164,274 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
     }
     LoadCommands.push_back(Load);
     if (Load.C.cmd == MachO::LC_SYMTAB) {
-      // Multiple symbol tables
-      if (SymtabLoadCmd) {
-        Err = malformedError("Multiple symbol tables");
+      if ((Err = checkSymtabCommand(*this, Load, I, &SymtabLoadCmd, Elements)))
         return;
-      }
-      SymtabLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == MachO::LC_DYSYMTAB) {
-      // Multiple dynamic symbol tables
-      if (DysymtabLoadCmd) {
-        Err = malformedError("Multiple dynamic symbol tables");
+      if ((Err = checkDysymtabCommand(*this, Load, I, &DysymtabLoadCmd,
+                                      Elements)))
         return;
-      }
-      DysymtabLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) {
-      // Multiple data in code tables
-      if (DataInCodeLoadCmd) {
-        Err = malformedError("Multiple data-in-code tables");
+      if ((Err = checkLinkeditDataCommand(*this, Load, I, &DataInCodeLoadCmd,
+                                          "LC_DATA_IN_CODE", Elements,
+                                          "data in code info")))
         return;
-      }
-      DataInCodeLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == MachO::LC_LINKER_OPTIMIZATION_HINT) {
-      // Multiple linker optimization hint tables
-      if (LinkOptHintsLoadCmd) {
-        Err = malformedError("Multiple linker optimization hint tables");
+      if ((Err = checkLinkeditDataCommand(*this, Load, I, &LinkOptHintsLoadCmd,
+                                          "LC_LINKER_OPTIMIZATION_HINT",
+                                          Elements, "linker optimization "
+                                          "hints")))
         return;
-      }
-      LinkOptHintsLoadCmd = Load.Ptr;
-    } else if (Load.C.cmd == MachO::LC_DYLD_INFO ||
-               Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
-      // Multiple dyldinfo load commands
-      if (DyldInfoLoadCmd) {
-        Err = malformedError("Multiple dyldinfo load commands");
+    } else if (Load.C.cmd == MachO::LC_FUNCTION_STARTS) {
+      if ((Err = checkLinkeditDataCommand(*this, Load, I, &FuncStartsLoadCmd,
+                                          "LC_FUNCTION_STARTS", Elements,
+                                          "function starts data")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_SEGMENT_SPLIT_INFO) {
+      if ((Err = checkLinkeditDataCommand(*this, Load, I, &SplitInfoLoadCmd,
+                                          "LC_SEGMENT_SPLIT_INFO", Elements,
+                                          "split info data")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_DYLIB_CODE_SIGN_DRS) {
+      if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignDrsLoadCmd,
+                                          "LC_DYLIB_CODE_SIGN_DRS", Elements,
+                                          "code signing RDs data")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_CODE_SIGNATURE) {
+      if ((Err = checkLinkeditDataCommand(*this, Load, I, &CodeSignLoadCmd,
+                                          "LC_CODE_SIGNATURE", Elements,
+                                          "code signature data")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_DYLD_INFO) {
+      if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
+                                      "LC_DYLD_INFO", Elements)))
+        return;
+    } else if (Load.C.cmd == MachO::LC_DYLD_INFO_ONLY) {
+      if ((Err = checkDyldInfoCommand(*this, Load, I, &DyldInfoLoadCmd,
+                                      "LC_DYLD_INFO_ONLY", Elements)))
         return;
-      }
-      DyldInfoLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == MachO::LC_UUID) {
-      // Multiple UUID load commands
+      if (Load.C.cmdsize != sizeof(MachO::uuid_command)) {
+        Err = malformedError("LC_UUID command " + Twine(I) + " has incorrect "
+                             "cmdsize");
+        return;
+      }
       if (UuidLoadCmd) {
-        Err = malformedError("Multiple UUID load commands");
+        Err = malformedError("more than one LC_UUID command");
         return;
       }
       UuidLoadCmd = Load.Ptr;
     } else if (Load.C.cmd == MachO::LC_SEGMENT_64) {
-      if ((Err = parseSegmentLoadCommand<MachO::segment_command_64>(
-                   this, Load, Sections, HasPageZeroSegment, I,
-                   "LC_SEGMENT_64")))
+      if ((Err = parseSegmentLoadCommand<MachO::segment_command_64,
+                                         MachO::section_64>(
+                   *this, Load, Sections, HasPageZeroSegment, I,
+                   "LC_SEGMENT_64", SizeOfHeaders, Elements)))
         return;
     } else if (Load.C.cmd == MachO::LC_SEGMENT) {
-      if ((Err = parseSegmentLoadCommand<MachO::segment_command>(
-                   this, Load, Sections, HasPageZeroSegment, I, "LC_SEGMENT")))
+      if ((Err = parseSegmentLoadCommand<MachO::segment_command,
+                                         MachO::section>(
+                   *this, Load, Sections, HasPageZeroSegment, I,
+                   "LC_SEGMENT", SizeOfHeaders, Elements)))
+        return;
+    } else if (Load.C.cmd == MachO::LC_ID_DYLIB) {
+      if ((Err = checkDylibIdCommand(*this, Load, I, &DyldIdLoadCmd)))
+        return;
+    } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB) {
+      if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_DYLIB")))
+        return;
+      Libraries.push_back(Load.Ptr);
+    } else if (Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB) {
+      if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_WEAK_DYLIB")))
+        return;
+      Libraries.push_back(Load.Ptr);
+    } else if (Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB) {
+      if ((Err = checkDylibCommand(*this, Load, I, "LC_LAZY_LOAD_DYLIB")))
+        return;
+      Libraries.push_back(Load.Ptr);
+    } else if (Load.C.cmd == MachO::LC_REEXPORT_DYLIB) {
+      if ((Err = checkDylibCommand(*this, Load, I, "LC_REEXPORT_DYLIB")))
         return;
-    } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB ||
-               Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB ||
-               Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB ||
-               Load.C.cmd == MachO::LC_REEXPORT_DYLIB ||
-               Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
       Libraries.push_back(Load.Ptr);
+    } else if (Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) {
+      if ((Err = checkDylibCommand(*this, Load, I, "LC_LOAD_UPWARD_DYLIB")))
+        return;
+      Libraries.push_back(Load.Ptr);
+    } else if (Load.C.cmd == MachO::LC_ID_DYLINKER) {
+      if ((Err = checkDyldCommand(*this, Load, I, "LC_ID_DYLINKER")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_LOAD_DYLINKER) {
+      if ((Err = checkDyldCommand(*this, Load, I, "LC_LOAD_DYLINKER")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_DYLD_ENVIRONMENT) {
+      if ((Err = checkDyldCommand(*this, Load, I, "LC_DYLD_ENVIRONMENT")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_VERSION_MIN_MACOSX) {
+      if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
+                                  "LC_VERSION_MIN_MACOSX")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_VERSION_MIN_IPHONEOS) {
+      if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
+                                  "LC_VERSION_MIN_IPHONEOS")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_VERSION_MIN_TVOS) {
+      if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
+                                  "LC_VERSION_MIN_TVOS")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_VERSION_MIN_WATCHOS) {
+      if ((Err = checkVersCommand(*this, Load, I, &VersLoadCmd,
+                                  "LC_VERSION_MIN_WATCHOS")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_RPATH) {
+      if ((Err = checkRpathCommand(*this, Load, I)))
+        return;
+    } else if (Load.C.cmd == MachO::LC_SOURCE_VERSION) {
+      if (Load.C.cmdsize != sizeof(MachO::source_version_command)) {
+        Err = malformedError("LC_SOURCE_VERSION command " + Twine(I) +
+                             " has incorrect cmdsize");
+        return;
+      }
+      if (SourceLoadCmd) {
+        Err = malformedError("more than one LC_SOURCE_VERSION command");
+        return;
+      }
+      SourceLoadCmd = Load.Ptr;
+    } else if (Load.C.cmd == MachO::LC_MAIN) {
+      if (Load.C.cmdsize != sizeof(MachO::entry_point_command)) {
+        Err = malformedError("LC_MAIN command " + Twine(I) +
+                             " has incorrect cmdsize");
+        return;
+      }
+      if (EntryPointLoadCmd) {
+        Err = malformedError("more than one LC_MAIN command");
+        return;
+      }
+      EntryPointLoadCmd = Load.Ptr;
+    } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO) {
+      if (Load.C.cmdsize != sizeof(MachO::encryption_info_command)) {
+        Err = malformedError("LC_ENCRYPTION_INFO command " + Twine(I) +
+                             " has incorrect cmdsize");
+        return;
+      }
+      MachO::encryption_info_command E =
+        getStruct<MachO::encryption_info_command>(*this, Load.Ptr);
+      if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize,
+                                     &EncryptLoadCmd, "LC_ENCRYPTION_INFO")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_ENCRYPTION_INFO_64) {
+      if (Load.C.cmdsize != sizeof(MachO::encryption_info_command_64)) {
+        Err = malformedError("LC_ENCRYPTION_INFO_64 command " + Twine(I) +
+                             " has incorrect cmdsize");
+        return;
+      }
+      MachO::encryption_info_command_64 E =
+        getStruct<MachO::encryption_info_command_64>(*this, Load.Ptr);
+      if ((Err = checkEncryptCommand(*this, Load, I, E.cryptoff, E.cryptsize,
+                                     &EncryptLoadCmd, "LC_ENCRYPTION_INFO_64")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_LINKER_OPTION) {
+      if ((Err = checkLinkerOptCommand(*this, Load, I)))
+        return;
+    } else if (Load.C.cmd == MachO::LC_SUB_FRAMEWORK) {
+      if (Load.C.cmdsize < sizeof(MachO::sub_framework_command)) {
+        Err =  malformedError("load command " + Twine(I) +
+                              " LC_SUB_FRAMEWORK cmdsize too small");
+        return;
+      }
+      MachO::sub_framework_command S =
+        getStruct<MachO::sub_framework_command>(*this, Load.Ptr);
+      if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_FRAMEWORK",
+                                 sizeof(MachO::sub_framework_command),
+                                 "sub_framework_command", S.umbrella,
+                                 "umbrella")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_SUB_UMBRELLA) {
+      if (Load.C.cmdsize < sizeof(MachO::sub_umbrella_command)) {
+        Err =  malformedError("load command " + Twine(I) +
+                              " LC_SUB_UMBRELLA cmdsize too small");
+        return;
+      }
+      MachO::sub_umbrella_command S =
+        getStruct<MachO::sub_umbrella_command>(*this, Load.Ptr);
+      if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_UMBRELLA",
+                                 sizeof(MachO::sub_umbrella_command),
+                                 "sub_umbrella_command", S.sub_umbrella,
+                                 "sub_umbrella")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_SUB_LIBRARY) {
+      if (Load.C.cmdsize < sizeof(MachO::sub_library_command)) {
+        Err =  malformedError("load command " + Twine(I) +
+                              " LC_SUB_LIBRARY cmdsize too small");
+        return;
+      }
+      MachO::sub_library_command S =
+        getStruct<MachO::sub_library_command>(*this, Load.Ptr);
+      if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_LIBRARY",
+                                 sizeof(MachO::sub_library_command),
+                                 "sub_library_command", S.sub_library,
+                                 "sub_library")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_SUB_CLIENT) {
+      if (Load.C.cmdsize < sizeof(MachO::sub_client_command)) {
+        Err =  malformedError("load command " + Twine(I) +
+                              " LC_SUB_CLIENT cmdsize too small");
+        return;
+      }
+      MachO::sub_client_command S =
+        getStruct<MachO::sub_client_command>(*this, Load.Ptr);
+      if ((Err = checkSubCommand(*this, Load, I, "LC_SUB_CLIENT",
+                                 sizeof(MachO::sub_client_command),
+                                 "sub_client_command", S.client, "client")))
+        return;
+    } else if (Load.C.cmd == MachO::LC_ROUTINES) {
+      if (Load.C.cmdsize != sizeof(MachO::routines_command)) {
+        Err = malformedError("LC_ROUTINES command " + Twine(I) +
+                             " has incorrect cmdsize");
+        return;
+      }
+      if (RoutinesLoadCmd) {
+        Err = malformedError("more than one LC_ROUTINES and or LC_ROUTINES_64 "
+                             "command");
+        return;
+      }
+      RoutinesLoadCmd = Load.Ptr;
+    } else if (Load.C.cmd == MachO::LC_ROUTINES_64) {
+      if (Load.C.cmdsize != sizeof(MachO::routines_command_64)) {
+        Err = malformedError("LC_ROUTINES_64 command " + Twine(I) +
+                             " has incorrect cmdsize");
+        return;
+      }
+      if (RoutinesLoadCmd) {
+        Err = malformedError("more than one LC_ROUTINES_64 and or LC_ROUTINES "
+                             "command");
+        return;
+      }
+      RoutinesLoadCmd = Load.Ptr;
+    } else if (Load.C.cmd == MachO::LC_UNIXTHREAD) {
+      if ((Err = checkThreadCommand(*this, Load, I, "LC_UNIXTHREAD")))
+        return;
+      if (UnixThreadLoadCmd) {
+        Err = malformedError("more than one LC_UNIXTHREAD command");
+        return;
+      }
+      UnixThreadLoadCmd = Load.Ptr;
+    } else if (Load.C.cmd == MachO::LC_THREAD) {
+      if ((Err = checkThreadCommand(*this, Load, I, "LC_THREAD")))
+        return;
+    // Note: LC_TWOLEVEL_HINTS is really obsolete and is not supported.
+    } else if (Load.C.cmd == MachO::LC_TWOLEVEL_HINTS) {
+       if ((Err = checkTwoLevelHintsCommand(*this, Load, I,
+                                            &TwoLevelHintsLoadCmd, Elements)))
+         return;
+    } else if (isLoadCommandObsolete(Load.C.cmd)) {
+      Err = malformedError("load command " + Twine(I) + " for cmd value of: " +
+                           Twine(Load.C.cmd) + " is obsolete and not "
+                           "supported");
+      return;
     }
+    // TODO: generate a error for unknown load commands by default.  But still
+    // need work out an approach to allow or not allow unknown values like this
+    // as an option for some uses like lldb.
     if (I < LoadCommandCount - 1) {
-      if (auto LoadOrErr = getNextLoadCommandInfo(this, I, Load))
+      if (auto LoadOrErr = getNextLoadCommandInfo(*this, I, Load))
         Load = *LoadOrErr;
       else {
         Err = LoadOrErr.takeError();
@@ -393,9 +1447,9 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
     }
   } else if (DysymtabLoadCmd) {
     MachO::symtab_command Symtab =
-      getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
+      getStruct<MachO::symtab_command>(*this, SymtabLoadCmd);
     MachO::dysymtab_command Dysymtab =
-      getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
+      getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd);
     if (Dysymtab.nlocalsym != 0 && Dysymtab.ilocalsym > Symtab.nsyms) {
       Err = malformedError("ilocalsym in LC_DYSYMTAB load command "
                            "extends past the end of the symbol table");
@@ -434,11 +1488,86 @@ MachOObjectFile::MachOObjectFile(MemoryBufferRef Object, bool IsLittleEndian,
       return;
     }
   }
+  if ((getHeader().filetype == MachO::MH_DYLIB ||
+       getHeader().filetype == MachO::MH_DYLIB_STUB) &&
+       DyldIdLoadCmd == nullptr) {
+    Err = malformedError("no LC_ID_DYLIB load command in dynamic library "
+                         "filetype");
+    return;
+  }
   assert(LoadCommands.size() == LoadCommandCount);
 
   Err = Error::success();
 }
 
+Error MachOObjectFile::checkSymbolTable() const {
+  uint32_t Flags = 0;
+  if (is64Bit()) {
+    MachO::mach_header_64 H_64 = MachOObjectFile::getHeader64();
+    Flags = H_64.flags;
+  } else {
+    MachO::mach_header H = MachOObjectFile::getHeader();
+    Flags = H.flags;
+  }
+  uint8_t NType = 0;
+  uint8_t NSect = 0;
+  uint16_t NDesc = 0;
+  uint32_t NStrx = 0;
+  uint64_t NValue = 0;
+  uint32_t SymbolIndex = 0;
+  MachO::symtab_command S = getSymtabLoadCommand();
+  for (const SymbolRef &Symbol : symbols()) {
+    DataRefImpl SymDRI = Symbol.getRawDataRefImpl();
+    if (is64Bit()) {
+      MachO::nlist_64 STE_64 = getSymbol64TableEntry(SymDRI);
+      NType = STE_64.n_type;
+      NSect = STE_64.n_sect;
+      NDesc = STE_64.n_desc;
+      NStrx = STE_64.n_strx;
+      NValue = STE_64.n_value;
+    } else {
+      MachO::nlist STE = getSymbolTableEntry(SymDRI);
+      NType = STE.n_type;
+      NType = STE.n_type;
+      NSect = STE.n_sect;
+      NDesc = STE.n_desc;
+      NStrx = STE.n_strx;
+      NValue = STE.n_value;
+    }
+    if ((NType & MachO::N_STAB) == 0 &&
+        (NType & MachO::N_TYPE) == MachO::N_SECT) {
+      if (NSect == 0 || NSect > Sections.size())
+        return malformedError("bad section index: " + Twine((int)NSect) +
+                              " for symbol at index " + Twine(SymbolIndex));
+    }
+    if ((NType & MachO::N_STAB) == 0 &&
+        (NType & MachO::N_TYPE) == MachO::N_INDR) {
+      if (NValue >= S.strsize)
+        return malformedError("bad n_value: " + Twine((int)NValue) + " past "
+                              "the end of string table, for N_INDR symbol at "
+                              "index " + Twine(SymbolIndex));
+    }
+    if ((Flags & MachO::MH_TWOLEVEL) == MachO::MH_TWOLEVEL &&
+        (((NType & MachO::N_TYPE) == MachO::N_UNDF && NValue == 0) ||
+         (NType & MachO::N_TYPE) == MachO::N_PBUD)) {
+      uint32_t LibraryOrdinal = MachO::GET_LIBRARY_ORDINAL(NDesc);
+      if (LibraryOrdinal != 0 &&
+          LibraryOrdinal != MachO::EXECUTABLE_ORDINAL &&
+          LibraryOrdinal != MachO::DYNAMIC_LOOKUP_ORDINAL &&
+          LibraryOrdinal - 1 >= Libraries.size() ) {
+        return malformedError("bad library ordinal: " + Twine(LibraryOrdinal) +
+                            " for symbol at index " + Twine(SymbolIndex));
+      }
+    }
+    if (NStrx >= S.strsize)
+      return malformedError("bad string table index: " + Twine((int)NStrx) +
+                            " past the end of string table, for symbol at "
+                            "index " + Twine(SymbolIndex));
+    SymbolIndex++;
+  }
+  return Error::success();
+}
+
 void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
   unsigned SymbolTableEntrySize = is64Bit() ?
     sizeof(MachO::nlist_64) :
@@ -448,7 +1577,7 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
 
 Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const {
   StringRef StringTable = getStringTableData();
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+  MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
   const char *Start = &StringTable.data()[Entry.n_strx];
   if (Start < getData().begin() || Start >= getData().end()) {
     return malformedError("bad string index: " + Twine(Entry.n_strx) +
@@ -459,7 +1588,7 @@ Expected<StringRef> MachOObjectFile::getSymbolName(DataRefImpl Symb) const {
 
 unsigned MachOObjectFile::getSectionType(SectionRef Sec) const {
   DataRefImpl DRI = Sec.getRawDataRefImpl();
-  uint32_t Flags = getSectionFlags(this, DRI);
+  uint32_t Flags = getSectionFlags(*this, DRI);
   return Flags & MachO::SECTION_TYPE;
 }
 
@@ -477,7 +1606,7 @@ uint64_t MachOObjectFile::getNValue(DataRefImpl Sym) const {
 std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb,
                                                  StringRef &Res) const {
   StringRef StringTable = getStringTableData();
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+  MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
   if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR)
     return object_error::parse_failed;
   uint64_t NValue = getNValue(Symb);
@@ -499,7 +1628,7 @@ Expected<uint64_t> MachOObjectFile::getSymbolAddress(DataRefImpl Sym) const {
 uint32_t MachOObjectFile::getSymbolAlignment(DataRefImpl DRI) const {
   uint32_t flags = getSymbolFlags(DRI);
   if (flags & SymbolRef::SF_Common) {
-    MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
+    MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI);
     return 1 << MachO::GET_COMM_ALIGN(Entry.n_desc);
   }
   return 0;
@@ -511,7 +1640,7 @@ uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const {
 
 Expected<SymbolRef::Type>
 MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+  MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
   uint8_t n_type = Entry.n_type;
 
   // If this is a STAB debugging symbol, we can do nothing more.
@@ -534,7 +1663,7 @@ MachOObjectFile::getSymbolType(DataRefImpl Symb) const {
 }
 
 uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, DRI);
+  MachO::nlist_base Entry = getSymbolTableEntryBase(*this, DRI);
 
   uint8_t MachOType = Entry.n_type;
   uint16_t MachOFlags = Entry.n_desc;
@@ -574,7 +1703,7 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const {
 
 Expected<section_iterator>
 MachOObjectFile::getSymbolSection(DataRefImpl Symb) const {
-  MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb);
+  MachO::nlist_base Entry = getSymbolTableEntryBase(*this, Symb);
   uint8_t index = Entry.n_sect;
 
   if (index == 0)
@@ -590,7 +1719,7 @@ MachOObjectFile::getSymbolSection(DataRefImpl Symb) const {
 
 unsigned MachOObjectFile::getSymbolSectionID(SymbolRef Sym) const {
   MachO::nlist_base Entry =
-      getSymbolTableEntryBase(this, Sym.getRawDataRefImpl());
+      getSymbolTableEntryBase(*this, Sym.getRawDataRefImpl());
   return Entry.n_sect - 1;
 }
 
@@ -677,12 +1806,12 @@ bool MachOObjectFile::isSectionCompressed(DataRefImpl Sec) const {
 }
 
 bool MachOObjectFile::isSectionText(DataRefImpl Sec) const {
-  uint32_t Flags = getSectionFlags(this, Sec);
+  uint32_t Flags = getSectionFlags(*this, Sec);
   return Flags & MachO::S_ATTR_PURE_INSTRUCTIONS;
 }
 
 bool MachOObjectFile::isSectionData(DataRefImpl Sec) const {
-  uint32_t Flags = getSectionFlags(this, Sec);
+  uint32_t Flags = getSectionFlags(*this, Sec);
   unsigned SectionType = Flags & MachO::SECTION_TYPE;
   return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
          !(SectionType == MachO::S_ZEROFILL ||
@@ -690,7 +1819,7 @@ bool MachOObjectFile::isSectionData(DataRefImpl Sec) const {
 }
 
 bool MachOObjectFile::isSectionBSS(DataRefImpl Sec) const {
-  uint32_t Flags = getSectionFlags(this, Sec);
+  uint32_t Flags = getSectionFlags(*this, Sec);
   unsigned SectionType = Flags & MachO::SECTION_TYPE;
   return !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) &&
          (SectionType == MachO::S_ZEROFILL ||
@@ -766,7 +1895,7 @@ MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
     sizeof(MachO::nlist);
   uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize;
   DataRefImpl Sym;
-  Sym.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  Sym.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset));
   return symbol_iterator(SymbolRef(Sym, this));
 }
 
@@ -1051,7 +2180,7 @@ std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index,
   if (LibrariesShortNames.size() == 0) {
     for (unsigned i = 0; i < Libraries.size(); i++) {
       MachO::dylib_command D =
-        getStruct<MachO::dylib_command>(this, Libraries[i]);
+        getStruct<MachO::dylib_command>(*this, Libraries[i]);
       if (D.dylib.name >= D.cmdsize)
         return object_error::parse_failed;
       const char *P = (const char *)(Libraries[i]) + D.dylib.name;
@@ -1079,7 +2208,7 @@ MachOObjectFile::getRelocationRelocatedSection(relocation_iterator Rel) const {
   return section_iterator(SectionRef(Sec, this));
 }
 
-basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
+basic_symbol_iterator MachOObjectFile::symbol_begin() const {
   DataRefImpl DRI;
   MachO::symtab_command Symtab = getSymtabLoadCommand();
   if (!SymtabLoadCmd || Symtab.nsyms == 0)
@@ -1088,7 +2217,7 @@ basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const {
   return getSymbolByIndex(0);
 }
 
-basic_symbol_iterator MachOObjectFile::symbol_end_impl() const {
+basic_symbol_iterator MachOObjectFile::symbol_end() const {
   DataRefImpl DRI;
   MachO::symtab_command Symtab = getSymtabLoadCommand();
   if (!SymtabLoadCmd || Symtab.nsyms == 0)
@@ -1099,7 +2228,7 @@ basic_symbol_iterator MachOObjectFile::symbol_end_impl() const {
     sizeof(MachO::nlist);
   unsigned Offset = Symtab.symoff +
     Symtab.nsyms * SymbolTableEntrySize;
-  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset));
   return basic_symbol_iterator(SymbolRef(DRI, this));
 }
 
@@ -1110,7 +2239,7 @@ basic_symbol_iterator MachOObjectFile::getSymbolByIndex(unsigned Index) const {
   unsigned SymbolTableEntrySize =
     is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
   DataRefImpl DRI;
-  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff));
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff));
   DRI.p += Index * SymbolTableEntrySize;
   return basic_symbol_iterator(SymbolRef(DRI, this));
 }
@@ -1122,7 +2251,7 @@ uint64_t MachOObjectFile::getSymbolIndex(DataRefImpl Symb) const {
   unsigned SymbolTableEntrySize =
     is64Bit() ? sizeof(MachO::nlist_64) : sizeof(MachO::nlist);
   DataRefImpl DRIstart;
-  DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(this, Symtab.symoff));
+  DRIstart.p = reinterpret_cast<uintptr_t>(getPtr(*this, Symtab.symoff));
   uint64_t Index = (Symb.p - DRIstart.p) / SymbolTableEntrySize;
   return Index;
 }
@@ -1143,7 +2272,7 @@ uint8_t MachOObjectFile::getBytesInAddress() const {
 }
 
 StringRef MachOObjectFile::getFileFormatName() const {
-  unsigned CPUType = getCPUType(this);
+  unsigned CPUType = getCPUType(*this);
   if (!is64Bit()) {
     switch (CPUType) {
     case llvm::MachO::CPU_TYPE_I386:
@@ -1189,14 +2318,19 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) {
 }
 
 Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
-                                      const char **McpuDefault) {
+                                      const char **McpuDefault,
+                                      const char **ArchFlag) {
   if (McpuDefault)
     *McpuDefault = nullptr;
+  if (ArchFlag)
+    *ArchFlag = nullptr;
 
   switch (CPUType) {
   case MachO::CPU_TYPE_I386:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_I386_ALL:
+      if (ArchFlag)
+        *ArchFlag = "i386";
       return Triple("i386-apple-darwin");
     default:
       return Triple();
@@ -1204,8 +2338,12 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
   case MachO::CPU_TYPE_X86_64:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_X86_64_ALL:
+      if (ArchFlag)
+        *ArchFlag = "x86_64";
       return Triple("x86_64-apple-darwin");
     case MachO::CPU_SUBTYPE_X86_64_H:
+      if (ArchFlag)
+        *ArchFlag = "x86_64h";
       return Triple("x86_64h-apple-darwin");
     default:
       return Triple();
@@ -1213,30 +2351,50 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
   case MachO::CPU_TYPE_ARM:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_ARM_V4T:
+      if (ArchFlag)
+        *ArchFlag = "armv4t";
       return Triple("armv4t-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V5TEJ:
+      if (ArchFlag)
+        *ArchFlag = "armv5e";
       return Triple("armv5e-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_XSCALE:
+      if (ArchFlag)
+        *ArchFlag = "xscale";
       return Triple("xscale-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V6:
+      if (ArchFlag)
+        *ArchFlag = "armv6";
       return Triple("armv6-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V6M:
       if (McpuDefault)
         *McpuDefault = "cortex-m0";
+      if (ArchFlag)
+        *ArchFlag = "armv6m";
       return Triple("armv6m-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V7:
+      if (ArchFlag)
+        *ArchFlag = "armv7";
       return Triple("armv7-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V7EM:
       if (McpuDefault)
         *McpuDefault = "cortex-m4";
+      if (ArchFlag)
+        *ArchFlag = "armv7em";
       return Triple("thumbv7em-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V7K:
+      if (ArchFlag)
+        *ArchFlag = "armv7k";
       return Triple("armv7k-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V7M:
       if (McpuDefault)
         *McpuDefault = "cortex-m3";
+      if (ArchFlag)
+        *ArchFlag = "armv7m";
       return Triple("thumbv7m-apple-darwin");
     case MachO::CPU_SUBTYPE_ARM_V7S:
+      if (ArchFlag)
+        *ArchFlag = "armv7s";
       return Triple("armv7s-apple-darwin");
     default:
       return Triple();
@@ -1244,6 +2402,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
   case MachO::CPU_TYPE_ARM64:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_ARM64_ALL:
+      if (ArchFlag)
+        *ArchFlag = "arm64";
       return Triple("arm64-apple-darwin");
     default:
       return Triple();
@@ -1251,6 +2411,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
   case MachO::CPU_TYPE_POWERPC:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_POWERPC_ALL:
+      if (ArchFlag)
+        *ArchFlag = "ppc";
       return Triple("ppc-apple-darwin");
     default:
       return Triple();
@@ -1258,6 +2420,8 @@ Triple MachOObjectFile::getArchTriple(uint32_t CPUType, uint32_t CPUSubType,
   case MachO::CPU_TYPE_POWERPC64:
     switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) {
     case MachO::CPU_SUBTYPE_POWERPC_ALL:
+      if (ArchFlag)
+        *ArchFlag = "ppc64";
       return Triple("ppc64-apple-darwin");
     default:
       return Triple();
@@ -1293,7 +2457,7 @@ bool MachOObjectFile::isValidArch(StringRef ArchFlag) {
 }
 
 unsigned MachOObjectFile::getArch() const {
-  return getArch(getCPUType(this));
+  return getArch(getCPUType(*this));
 }
 
 Triple MachOObjectFile::getArchTriple(const char **McpuDefault) const {
@@ -1318,7 +2482,7 @@ dice_iterator MachOObjectFile::begin_dices() const {
     return dice_iterator(DiceRef(DRI, this));
 
   MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand();
-  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, DicLC.dataoff));
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, DicLC.dataoff));
   return dice_iterator(DiceRef(DRI, this));
 }
 
@@ -1329,7 +2493,7 @@ dice_iterator MachOObjectFile::end_dices() const {
 
   MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand();
   unsigned Offset = DicLC.dataoff + DicLC.datasize;
-  DRI.p = reinterpret_cast<uintptr_t>(getPtr(this, Offset));
+  DRI.p = reinterpret_cast<uintptr_t>(getPtr(*this, Offset));
   return dice_iterator(DiceRef(DRI, this));
 }
 
@@ -1982,7 +3146,7 @@ MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const {
 bool
 MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE)
   const {
-  if (getCPUType(this) == MachO::CPU_TYPE_X86_64)
+  if (getCPUType(*this) == MachO::CPU_TYPE_X86_64)
     return false;
   return getPlainRelocationAddress(RE) & MachO::R_SCATTERED;
 }
@@ -2026,15 +3190,15 @@ unsigned MachOObjectFile::getAnyRelocationAddress(
 unsigned MachOObjectFile::getAnyRelocationPCRel(
     const MachO::any_relocation_info &RE) const {
   if (isRelocationScattered(RE))
-    return getScatteredRelocationPCRel(this, RE);
-  return getPlainRelocationPCRel(this, RE);
+    return getScatteredRelocationPCRel(RE);
+  return getPlainRelocationPCRel(*this, RE);
 }
 
 unsigned MachOObjectFile::getAnyRelocationLength(
     const MachO::any_relocation_info &RE) const {
   if (isRelocationScattered(RE))
     return getScatteredRelocationLength(RE);
-  return getPlainRelocationLength(this, RE);
+  return getPlainRelocationLength(*this, RE);
 }
 
 unsigned
@@ -2042,7 +3206,7 @@ MachOObjectFile::getAnyRelocationType(
                                    const MachO::any_relocation_info &RE) const {
   if (isRelocationScattered(RE))
     return getScatteredRelocationType(RE);
-  return getPlainRelocationType(this, RE);
+  return getPlainRelocationType(*this, RE);
 }
 
 SectionRef
@@ -2060,141 +3224,141 @@ MachOObjectFile::getAnyRelocationSection(
 
 MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const {
   assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
-  return getStruct<MachO::section>(this, Sections[DRI.d.a]);
+  return getStruct<MachO::section>(*this, Sections[DRI.d.a]);
 }
 
 MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const {
   assert(DRI.d.a < Sections.size() && "Should have detected this earlier");
-  return getStruct<MachO::section_64>(this, Sections[DRI.d.a]);
+  return getStruct<MachO::section_64>(*this, Sections[DRI.d.a]);
 }
 
 MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L,
                                            unsigned Index) const {
-  const char *Sec = getSectionPtr(this, L, Index);
-  return getStruct<MachO::section>(this, Sec);
+  const char *Sec = getSectionPtr(*this, L, Index);
+  return getStruct<MachO::section>(*this, Sec);
 }
 
 MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L,
                                                 unsigned Index) const {
-  const char *Sec = getSectionPtr(this, L, Index);
-  return getStruct<MachO::section_64>(this, Sec);
+  const char *Sec = getSectionPtr(*this, L, Index);
+  return getStruct<MachO::section_64>(*this, Sec);
 }
 
 MachO::nlist
 MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const {
   const char *P = reinterpret_cast<const char *>(DRI.p);
-  return getStruct<MachO::nlist>(this, P);
+  return getStruct<MachO::nlist>(*this, P);
 }
 
 MachO::nlist_64
 MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const {
   const char *P = reinterpret_cast<const char *>(DRI.p);
-  return getStruct<MachO::nlist_64>(this, P);
+  return getStruct<MachO::nlist_64>(*this, P);
 }
 
 MachO::linkedit_data_command
 MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::linkedit_data_command>(this, L.Ptr);
+  return getStruct<MachO::linkedit_data_command>(*this, L.Ptr);
 }
 
 MachO::segment_command
 MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::segment_command>(this, L.Ptr);
+  return getStruct<MachO::segment_command>(*this, L.Ptr);
 }
 
 MachO::segment_command_64
 MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::segment_command_64>(this, L.Ptr);
+  return getStruct<MachO::segment_command_64>(*this, L.Ptr);
 }
 
 MachO::linker_option_command
 MachOObjectFile::getLinkerOptionLoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::linker_option_command>(this, L.Ptr);
+  return getStruct<MachO::linker_option_command>(*this, L.Ptr);
 }
 
 MachO::version_min_command
 MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::version_min_command>(this, L.Ptr);
+  return getStruct<MachO::version_min_command>(*this, L.Ptr);
 }
 
 MachO::dylib_command
 MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::dylib_command>(this, L.Ptr);
+  return getStruct<MachO::dylib_command>(*this, L.Ptr);
 }
 
 MachO::dyld_info_command
 MachOObjectFile::getDyldInfoLoadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::dyld_info_command>(this, L.Ptr);
+  return getStruct<MachO::dyld_info_command>(*this, L.Ptr);
 }
 
 MachO::dylinker_command
 MachOObjectFile::getDylinkerCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::dylinker_command>(this, L.Ptr);
+  return getStruct<MachO::dylinker_command>(*this, L.Ptr);
 }
 
 MachO::uuid_command
 MachOObjectFile::getUuidCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::uuid_command>(this, L.Ptr);
+  return getStruct<MachO::uuid_command>(*this, L.Ptr);
 }
 
 MachO::rpath_command
 MachOObjectFile::getRpathCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::rpath_command>(this, L.Ptr);
+  return getStruct<MachO::rpath_command>(*this, L.Ptr);
 }
 
 MachO::source_version_command
 MachOObjectFile::getSourceVersionCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::source_version_command>(this, L.Ptr);
+  return getStruct<MachO::source_version_command>(*this, L.Ptr);
 }
 
 MachO::entry_point_command
 MachOObjectFile::getEntryPointCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::entry_point_command>(this, L.Ptr);
+  return getStruct<MachO::entry_point_command>(*this, L.Ptr);
 }
 
 MachO::encryption_info_command
 MachOObjectFile::getEncryptionInfoCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::encryption_info_command>(this, L.Ptr);
+  return getStruct<MachO::encryption_info_command>(*this, L.Ptr);
 }
 
 MachO::encryption_info_command_64
 MachOObjectFile::getEncryptionInfoCommand64(const LoadCommandInfo &L) const {
-  return getStruct<MachO::encryption_info_command_64>(this, L.Ptr);
+  return getStruct<MachO::encryption_info_command_64>(*this, L.Ptr);
 }
 
 MachO::sub_framework_command
 MachOObjectFile::getSubFrameworkCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::sub_framework_command>(this, L.Ptr);
+  return getStruct<MachO::sub_framework_command>(*this, L.Ptr);
 }
 
 MachO::sub_umbrella_command
 MachOObjectFile::getSubUmbrellaCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::sub_umbrella_command>(this, L.Ptr);
+  return getStruct<MachO::sub_umbrella_command>(*this, L.Ptr);
 }
 
 MachO::sub_library_command
 MachOObjectFile::getSubLibraryCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::sub_library_command>(this, L.Ptr);
+  return getStruct<MachO::sub_library_command>(*this, L.Ptr);
 }
 
 MachO::sub_client_command
 MachOObjectFile::getSubClientCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::sub_client_command>(this, L.Ptr);
+  return getStruct<MachO::sub_client_command>(*this, L.Ptr);
 }
 
 MachO::routines_command
 MachOObjectFile::getRoutinesCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::routines_command>(this, L.Ptr);
+  return getStruct<MachO::routines_command>(*this, L.Ptr);
 }
 
 MachO::routines_command_64
 MachOObjectFile::getRoutinesCommand64(const LoadCommandInfo &L) const {
-  return getStruct<MachO::routines_command_64>(this, L.Ptr);
+  return getStruct<MachO::routines_command_64>(*this, L.Ptr);
 }
 
 MachO::thread_command
 MachOObjectFile::getThreadCommand(const LoadCommandInfo &L) const {
-  return getStruct<MachO::thread_command>(this, L.Ptr);
+  return getStruct<MachO::thread_command>(*this, L.Ptr);
 }
 
 MachO::any_relocation_info
@@ -2211,15 +3375,15 @@ MachOObjectFile::getRelocation(DataRefImpl Rel) const {
   }
 
   auto P = reinterpret_cast<const MachO::any_relocation_info *>(
-      getPtr(this, Offset)) + Rel.d.b;
+      getPtr(*this, Offset)) + Rel.d.b;
   return getStruct<MachO::any_relocation_info>(
-      this, reinterpret_cast<const char *>(P));
+      *this, reinterpret_cast<const char *>(P));
 }
 
 MachO::data_in_code_entry
 MachOObjectFile::getDice(DataRefImpl Rel) const {
   const char *P = reinterpret_cast<const char *>(Rel.p);
-  return getStruct<MachO::data_in_code_entry>(this, P);
+  return getStruct<MachO::data_in_code_entry>(*this, P);
 }
 
 const MachO::mach_header &MachOObjectFile::getHeader() const {
@@ -2235,19 +3399,19 @@ uint32_t MachOObjectFile::getIndirectSymbolTableEntry(
                                              const MachO::dysymtab_command &DLC,
                                              unsigned Index) const {
   uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t);
-  return getStruct<uint32_t>(this, getPtr(this, Offset));
+  return getStruct<uint32_t>(*this, getPtr(*this, Offset));
 }
 
 MachO::data_in_code_entry
 MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset,
                                          unsigned Index) const {
   uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry);
-  return getStruct<MachO::data_in_code_entry>(this, getPtr(this, Offset));
+  return getStruct<MachO::data_in_code_entry>(*this, getPtr(*this, Offset));
 }
 
 MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
   if (SymtabLoadCmd)
-    return getStruct<MachO::symtab_command>(this, SymtabLoadCmd);
+    return getStruct<MachO::symtab_command>(*this, SymtabLoadCmd);
 
   // If there is no SymtabLoadCmd return a load command with zero'ed fields.
   MachO::symtab_command Cmd;
@@ -2262,7 +3426,7 @@ MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const {
 
 MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
   if (DysymtabLoadCmd)
-    return getStruct<MachO::dysymtab_command>(this, DysymtabLoadCmd);
+    return getStruct<MachO::dysymtab_command>(*this, DysymtabLoadCmd);
 
   // If there is no DysymtabLoadCmd return a load command with zero'ed fields.
   MachO::dysymtab_command Cmd;
@@ -2292,7 +3456,7 @@ MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const {
 MachO::linkedit_data_command
 MachOObjectFile::getDataInCodeLoadCommand() const {
   if (DataInCodeLoadCmd)
-    return getStruct<MachO::linkedit_data_command>(this, DataInCodeLoadCmd);
+    return getStruct<MachO::linkedit_data_command>(*this, DataInCodeLoadCmd);
 
   // If there is no DataInCodeLoadCmd return a load command with zero'ed fields.
   MachO::linkedit_data_command Cmd;
@@ -2306,7 +3470,7 @@ MachOObjectFile::getDataInCodeLoadCommand() const {
 MachO::linkedit_data_command
 MachOObjectFile::getLinkOptHintsLoadCommand() const {
   if (LinkOptHintsLoadCmd)
-    return getStruct<MachO::linkedit_data_command>(this, LinkOptHintsLoadCmd);
+    return getStruct<MachO::linkedit_data_command>(*this, LinkOptHintsLoadCmd);
 
   // If there is no LinkOptHintsLoadCmd return a load command with zero'ed
   // fields.
@@ -2323,9 +3487,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoRebaseOpcodes() const {
     return None;
 
   MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
   const uint8_t *Ptr =
-      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.rebase_off));
+      reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.rebase_off));
   return makeArrayRef(Ptr, DyldInfo.rebase_size);
 }
 
@@ -2334,9 +3498,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoBindOpcodes() const {
     return None;
 
   MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
   const uint8_t *Ptr =
-      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.bind_off));
+      reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.bind_off));
   return makeArrayRef(Ptr, DyldInfo.bind_size);
 }
 
@@ -2345,9 +3509,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoWeakBindOpcodes() const {
     return None;
 
   MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
   const uint8_t *Ptr =
-      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.weak_bind_off));
+      reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.weak_bind_off));
   return makeArrayRef(Ptr, DyldInfo.weak_bind_size);
 }
 
@@ -2356,9 +3520,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoLazyBindOpcodes() const {
     return None;
 
   MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
   const uint8_t *Ptr =
-      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.lazy_bind_off));
+      reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.lazy_bind_off));
   return makeArrayRef(Ptr, DyldInfo.lazy_bind_size);
 }
 
@@ -2367,9 +3531,9 @@ ArrayRef<uint8_t> MachOObjectFile::getDyldInfoExportsTrie() const {
     return None;
 
   MachO::dyld_info_command DyldInfo =
-      getStruct<MachO::dyld_info_command>(this, DyldInfoLoadCmd);
+      getStruct<MachO::dyld_info_command>(*this, DyldInfoLoadCmd);
   const uint8_t *Ptr =
-      reinterpret_cast<const uint8_t *>(getPtr(this, DyldInfo.export_off));
+      reinterpret_cast<const uint8_t *>(getPtr(*this, DyldInfo.export_off));
   return makeArrayRef(Ptr, DyldInfo.export_size);
 }
 
@@ -2408,16 +3572,22 @@ bool MachOObjectFile::isRelocatableObject() const {
 }
 
 Expected<std::unique_ptr<MachOObjectFile>>
-ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer) {
+ObjectFile::createMachOObjectFile(MemoryBufferRef Buffer,
+                                  uint32_t UniversalCputype,
+                                  uint32_t UniversalIndex) {
   StringRef Magic = Buffer.getBuffer().slice(0, 4);
   if (Magic == "\xFE\xED\xFA\xCE")
-    return MachOObjectFile::create(Buffer, false, false);
+    return MachOObjectFile::create(Buffer, false, false,
+                                   UniversalCputype, UniversalIndex);
   if (Magic == "\xCE\xFA\xED\xFE")
-    return MachOObjectFile::create(Buffer, true, false);
+    return MachOObjectFile::create(Buffer, true, false,
+                                   UniversalCputype, UniversalIndex);
   if (Magic == "\xFE\xED\xFA\xCF")
-    return MachOObjectFile::create(Buffer, false, true);
+    return MachOObjectFile::create(Buffer, false, true,
+                                   UniversalCputype, UniversalIndex);
   if (Magic == "\xCF\xFA\xED\xFE")
-    return MachOObjectFile::create(Buffer, true, true);
+    return MachOObjectFile::create(Buffer, true, true,
+                                   UniversalCputype, UniversalIndex);
   return make_error<GenericBinaryError>("Unrecognized MachO magic number",
                                         object_error::invalid_file_type);
 }
diff --git a/contrib/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm/lib/Object/MachOUniversal.cpp
index 66c9151eb69c..309708e9b37c 100644
--- a/contrib/llvm/lib/Object/MachOUniversal.cpp
+++ b/contrib/llvm/lib/Object/MachOUniversal.cpp
@@ -42,6 +42,7 @@ static T getUniversalBinaryStruct(const char *Ptr) {
 MachOUniversalBinary::ObjectForArch::ObjectForArch(
     const MachOUniversalBinary *Parent, uint32_t Index)
     : Parent(Parent), Index(Index) {
+  // The iterators use Parent as a nullptr and an Index+1 == NumberOfObjects.
   if (!Parent || Index >= Parent->getNumberOfObjects()) {
     clear();
   } else {
@@ -51,16 +52,10 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch(
       const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) +
                               Index * sizeof(MachO::fat_arch);
       Header = getUniversalBinaryStruct<MachO::fat_arch>(HeaderPos);
-      if (ParentData.size() < Header.offset + Header.size) {
-        clear();
-      }
     } else { // Parent->getMagic() == MachO::FAT_MAGIC_64
       const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) +
                               Index * sizeof(MachO::fat_arch_64);
       Header64 = getUniversalBinaryStruct<MachO::fat_arch_64>(HeaderPos);
-      if (ParentData.size() < Header64.offset + Header64.size) {
-        clear();
-      }
     }
   }
 }
@@ -73,13 +68,17 @@ MachOUniversalBinary::ObjectForArch::getAsObjectFile() const {
 
   StringRef ParentData = Parent->getData();
   StringRef ObjectData;
-  if (Parent->getMagic() == MachO::FAT_MAGIC)
+  uint32_t cputype;
+  if (Parent->getMagic() == MachO::FAT_MAGIC) {
     ObjectData = ParentData.substr(Header.offset, Header.size);
-  else // Parent->getMagic() == MachO::FAT_MAGIC_64
+    cputype = Header.cputype;
+  } else { // Parent->getMagic() == MachO::FAT_MAGIC_64
     ObjectData = ParentData.substr(Header64.offset, Header64.size);
+    cputype = Header64.cputype;
+  }
   StringRef ObjectName = Parent->getFileName();
   MemoryBufferRef ObjBuffer(ObjectData, ObjectName);
-  return ObjectFile::createMachOObjectFile(ObjBuffer);
+  return ObjectFile::createMachOObjectFile(ObjBuffer, cputype, Index);
 }
 
 Expected<std::unique_ptr<Archive>>
@@ -103,7 +102,7 @@ void MachOUniversalBinary::anchor() { }
 
 Expected<std::unique_ptr<MachOUniversalBinary>>
 MachOUniversalBinary::create(MemoryBufferRef Source) {
-  Error Err;
+  Error Err = Error::success();
   std::unique_ptr<MachOUniversalBinary> Ret(
       new MachOUniversalBinary(Source, Err));
   if (Err)
@@ -114,7 +113,7 @@ MachOUniversalBinary::create(MemoryBufferRef Source) {
 MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err)
     : Binary(Binary::ID_MachOUniversalBinary, Source), Magic(0),
       NumberOfObjects(0) {
-  ErrorAsOutParameter ErrAsOutParam(Err);
+  ErrorAsOutParameter ErrAsOutParam(&Err);
   if (Data.getBufferSize() < sizeof(MachO::fat_header)) {
     Err = make_error<GenericBinaryError>("File too small to be a Mach-O "
                                          "universal file",
@@ -127,6 +126,10 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err)
       getUniversalBinaryStruct<MachO::fat_header>(Buf.begin());
   Magic = H.magic;
   NumberOfObjects = H.nfat_arch;
+  if (NumberOfObjects == 0) {
+    Err = malformedError("contains zero architecture types");
+    return;
+  }
   uint32_t MinSize = sizeof(MachO::fat_header);
   if (Magic == MachO::FAT_MAGIC)
     MinSize += sizeof(MachO::fat_arch) * NumberOfObjects;
@@ -142,6 +145,68 @@ MachOUniversalBinary::MachOUniversalBinary(MemoryBufferRef Source, Error &Err)
                          " structs would extend past the end of the file");
     return;
   }
+  for (uint32_t i = 0; i < NumberOfObjects; i++) {
+    ObjectForArch A(this, i);
+    uint64_t bigSize = A.getOffset();
+    bigSize += A.getSize();
+    if (bigSize > Buf.size()) {
+      Err = malformedError("offset plus size of cputype (" +
+        Twine(A.getCPUType()) + ") cpusubtype (" +
+        Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
+        ") extends past the end of the file");
+      return;
+    }
+#define MAXSECTALIGN 15 /* 2**15 or 0x8000 */
+    if (A.getAlign() > MAXSECTALIGN) {
+      Err = malformedError("align (2^" + Twine(A.getAlign()) + ") too large "
+        "for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" +
+        Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
+        ") (maximum 2^" + Twine(MAXSECTALIGN) + ")");
+      return;
+    }
+    if(A.getOffset() % (1 << A.getAlign()) != 0){
+      Err = malformedError("offset: " + Twine(A.getOffset()) +
+        " for cputype (" + Twine(A.getCPUType()) + ") cpusubtype (" +
+        Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
+        ") not aligned on it's alignment (2^" + Twine(A.getAlign()) + ")");
+      return;
+    }
+    if (A.getOffset() < MinSize) {
+      Err =  malformedError("cputype (" + Twine(A.getCPUType()) + ") "
+        "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
+        ") offset " + Twine(A.getOffset()) + " overlaps universal headers");
+      return;
+    }
+  }
+  for (uint32_t i = 0; i < NumberOfObjects; i++) {
+    ObjectForArch A(this, i);
+    for (uint32_t j = i + 1; j < NumberOfObjects; j++) {
+      ObjectForArch B(this, j);
+      if (A.getCPUType() == B.getCPUType() &&
+          (A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) ==
+          (B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK)) {
+        Err = malformedError("contains two of the same architecture (cputype "
+          "(" + Twine(A.getCPUType()) + ") cpusubtype (" +
+          Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) + "))");
+        return;
+      }
+      if ((A.getOffset() >= B.getOffset() &&
+           A.getOffset() < B.getOffset() + B.getSize()) ||
+          (A.getOffset() + A.getSize() > B.getOffset() &&
+           A.getOffset() + A.getSize() < B.getOffset() + B.getSize()) ||
+          (A.getOffset() <= B.getOffset() &&
+           A.getOffset() + A.getSize() >= B.getOffset() + B.getSize())) {
+        Err =  malformedError("cputype (" + Twine(A.getCPUType()) + ") "
+          "cpusubtype (" + Twine(A.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK) +
+          ") at offset " + Twine(A.getOffset()) + " with a size of " +
+          Twine(A.getSize()) + ", overlaps cputype (" + Twine(B.getCPUType()) +
+          ") cpusubtype (" + Twine(B.getCPUSubType() & ~MachO::CPU_SUBTYPE_MASK)
+          + ") at offset " + Twine(B.getOffset()) + " with a size of "
+          + Twine(B.getSize()));
+        return;
+      }
+    }
+  }
   Err = Error::success();
 }
 
@@ -153,10 +218,9 @@ MachOUniversalBinary::getObjectForArch(StringRef ArchName) const {
                                               ArchName,
                                           object_error::arch_not_found);
 
-  for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) {
-    if (I->getArchTypeName() == ArchName)
-      return I->getAsObjectFile();
-  }
+  for (auto &Obj : objects())
+    if (Obj.getArchFlagName() == ArchName)
+      return Obj.getAsObjectFile();
   return make_error<GenericBinaryError>("fat file does not "
                                         "contain " +
                                             ArchName,
diff --git a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp
index e6b1040d8f5d..202783e7d993 100644
--- a/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ModuleSummaryIndexObjectFile.cpp
@@ -13,7 +13,7 @@
 
 #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Object/ObjectFile.h"
@@ -67,59 +67,40 @@ ModuleSummaryIndexObjectFile::findBitcodeInMemBuffer(MemoryBufferRef Object) {
   }
 }
 
-// Looks for module summary index in the given memory buffer.
-// returns true if found, else false.
-bool ModuleSummaryIndexObjectFile::hasGlobalValueSummaryInMemBuffer(
-    MemoryBufferRef Object,
-    const DiagnosticHandlerFunction &DiagnosticHandler) {
-  ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
-  if (!BCOrErr)
-    return false;
-
-  return hasGlobalValueSummary(BCOrErr.get(), DiagnosticHandler);
-}
-
 // Parse module summary index in the given memory buffer.
 // Return new ModuleSummaryIndexObjectFile instance containing parsed
 // module summary/index.
-ErrorOr<std::unique_ptr<ModuleSummaryIndexObjectFile>>
-ModuleSummaryIndexObjectFile::create(
-    MemoryBufferRef Object,
-    const DiagnosticHandlerFunction &DiagnosticHandler) {
-  std::unique_ptr<ModuleSummaryIndex> Index;
-
+Expected<std::unique_ptr<ModuleSummaryIndexObjectFile>>
+ModuleSummaryIndexObjectFile::create(MemoryBufferRef Object) {
   ErrorOr<MemoryBufferRef> BCOrErr = findBitcodeInMemBuffer(Object);
   if (!BCOrErr)
-    return BCOrErr.getError();
+    return errorCodeToError(BCOrErr.getError());
 
-  ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IOrErr =
-      getModuleSummaryIndex(BCOrErr.get(), DiagnosticHandler);
+  Expected<std::unique_ptr<ModuleSummaryIndex>> IOrErr =
+      getModuleSummaryIndex(BCOrErr.get());
 
-  if (std::error_code EC = IOrErr.getError())
-    return EC;
-
-  Index = std::move(IOrErr.get());
+  if (!IOrErr)
+    return IOrErr.takeError();
 
+  std::unique_ptr<ModuleSummaryIndex> Index = std::move(IOrErr.get());
   return llvm::make_unique<ModuleSummaryIndexObjectFile>(Object,
                                                          std::move(Index));
 }
 
 // Parse the module summary index out of an IR file and return the summary
 // index object if found, or nullptr if not.
-ErrorOr<std::unique_ptr<ModuleSummaryIndex>> llvm::getModuleSummaryIndexForFile(
-    StringRef Path, const DiagnosticHandlerFunction &DiagnosticHandler) {
+Expected<std::unique_ptr<ModuleSummaryIndex>>
+llvm::getModuleSummaryIndexForFile(StringRef Path) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
       MemoryBuffer::getFileOrSTDIN(Path);
   std::error_code EC = FileOrErr.getError();
   if (EC)
-    return EC;
+    return errorCodeToError(EC);
   MemoryBufferRef BufferRef = (FileOrErr.get())->getMemBufferRef();
-  ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
-      object::ModuleSummaryIndexObjectFile::create(BufferRef,
-                                                   DiagnosticHandler);
-  EC = ObjOrErr.getError();
-  if (EC)
-    return EC;
+  Expected<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
+      object::ModuleSummaryIndexObjectFile::create(BufferRef);
+  if (!ObjOrErr)
+    return ObjOrErr.takeError();
 
   object::ModuleSummaryIndexObjectFile &Obj = **ObjOrErr;
   return Obj.takeIndex();
diff --git a/contrib/llvm/lib/Object/ModuleSymbolTable.cpp b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
new file mode 100644
index 000000000000..90488007ff59
--- /dev/null
+++ b/contrib/llvm/lib/Object/ModuleSymbolTable.cpp
@@ -0,0 +1,189 @@
+//===- ModuleSymbolTable.cpp - symbol table for in-memory IR ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class represents a symbol table built from in-memory IR. It provides
+// access to GlobalValues and should only be used if such access is required
+// (e.g. in the LTO implementation).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/IRObjectFile.h"
+#include "RecordStreamer.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/IR/GVMaterializer.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/IR/Module.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+using namespace object;
+
+void ModuleSymbolTable::addModule(Module *M) {
+  if (FirstMod)
+    assert(FirstMod->getTargetTriple() == M->getTargetTriple());
+  else
+    FirstMod = M;
+
+  for (Function &F : *M)
+    SymTab.push_back(&F);
+  for (GlobalVariable &GV : M->globals())
+    SymTab.push_back(&GV);
+  for (GlobalAlias &GA : M->aliases())
+    SymTab.push_back(&GA);
+
+  CollectAsmSymbols(Triple(M->getTargetTriple()), M->getModuleInlineAsm(),
+                    [this](StringRef Name, BasicSymbolRef::Flags Flags) {
+                      SymTab.push_back(new (AsmSymbols.Allocate())
+                                           AsmSymbol(Name, Flags));
+                    });
+}
+
+void ModuleSymbolTable::CollectAsmSymbols(
+    const Triple &TT, StringRef InlineAsm,
+    function_ref<void(StringRef, BasicSymbolRef::Flags)> AsmSymbol) {
+  if (InlineAsm.empty())
+    return;
+
+  std::string Err;
+  const Target *T = TargetRegistry::lookupTarget(TT.str(), Err);
+  assert(T && T->hasMCAsmParser());
+
+  std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(TT.str()));
+  if (!MRI)
+    return;
+
+  std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, TT.str()));
+  if (!MAI)
+    return;
+
+  std::unique_ptr<MCSubtargetInfo> STI(
+      T->createMCSubtargetInfo(TT.str(), "", ""));
+  if (!STI)
+    return;
+
+  std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo());
+  if (!MCII)
+    return;
+
+  MCObjectFileInfo MOFI;
+  MCContext MCCtx(MAI.get(), MRI.get(), &MOFI);
+  MOFI.InitMCObjectFileInfo(TT, /*PIC*/ false, CodeModel::Default, MCCtx);
+  RecordStreamer Streamer(MCCtx);
+  T->createNullTargetStreamer(Streamer);
+
+  std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm));
+  SourceMgr SrcMgr;
+  SrcMgr.AddNewSourceBuffer(std::move(Buffer), SMLoc());
+  std::unique_ptr<MCAsmParser> Parser(
+      createMCAsmParser(SrcMgr, MCCtx, Streamer, *MAI));
+
+  MCTargetOptions MCOptions;
+  std::unique_ptr<MCTargetAsmParser> TAP(
+      T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions));
+  if (!TAP)
+    return;
+
+  Parser->setTargetParser(*TAP);
+  if (Parser->Run(false))
+    return;
+
+  for (auto &KV : Streamer) {
+    StringRef Key = KV.first();
+    RecordStreamer::State Value = KV.second;
+    // FIXME: For now we just assume that all asm symbols are executable.
+    uint32_t Res = BasicSymbolRef::SF_Executable;
+    switch (Value) {
+    case RecordStreamer::NeverSeen:
+      llvm_unreachable("NeverSeen should have been replaced earlier");
+    case RecordStreamer::DefinedGlobal:
+      Res |= BasicSymbolRef::SF_Global;
+      break;
+    case RecordStreamer::Defined:
+      break;
+    case RecordStreamer::Global:
+    case RecordStreamer::Used:
+      Res |= BasicSymbolRef::SF_Undefined;
+      Res |= BasicSymbolRef::SF_Global;
+      break;
+    case RecordStreamer::DefinedWeak:
+      Res |= BasicSymbolRef::SF_Weak;
+      Res |= BasicSymbolRef::SF_Global;
+      break;
+    case RecordStreamer::UndefinedWeak:
+      Res |= BasicSymbolRef::SF_Weak;
+      Res |= BasicSymbolRef::SF_Undefined;
+    }
+    AsmSymbol(Key, BasicSymbolRef::Flags(Res));
+  }
+}
+
+void ModuleSymbolTable::printSymbolName(raw_ostream &OS, Symbol S) const {
+  if (S.is<AsmSymbol *>()) {
+    OS << S.get<AsmSymbol *>()->first;
+    return;
+  }
+
+  auto *GV = S.get<GlobalValue *>();
+  if (GV->hasDLLImportStorageClass())
+    OS << "__imp_";
+
+  Mang.getNameWithPrefix(OS, GV, false);
+}
+
+uint32_t ModuleSymbolTable::getSymbolFlags(Symbol S) const {
+  if (S.is<AsmSymbol *>())
+    return S.get<AsmSymbol *>()->second;
+
+  auto *GV = S.get<GlobalValue *>();
+
+  uint32_t Res = BasicSymbolRef::SF_None;
+  if (GV->isDeclarationForLinker())
+    Res |= BasicSymbolRef::SF_Undefined;
+  else if (GV->hasHiddenVisibility() && !GV->hasLocalLinkage())
+    Res |= BasicSymbolRef::SF_Hidden;
+  if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) {
+    if (GVar->isConstant())
+      Res |= BasicSymbolRef::SF_Const;
+  }
+  if (dyn_cast_or_null<Function>(GV->getBaseObject()))
+    Res |= BasicSymbolRef::SF_Executable;
+  if (isa<GlobalAlias>(GV))
+    Res |= BasicSymbolRef::SF_Indirect;
+  if (GV->hasPrivateLinkage())
+    Res |= BasicSymbolRef::SF_FormatSpecific;
+  if (!GV->hasLocalLinkage())
+    Res |= BasicSymbolRef::SF_Global;
+  if (GV->hasCommonLinkage())
+    Res |= BasicSymbolRef::SF_Common;
+  if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage() ||
+      GV->hasExternalWeakLinkage())
+    Res |= BasicSymbolRef::SF_Weak;
+
+  if (GV->getName().startswith("llvm."))
+    Res |= BasicSymbolRef::SF_FormatSpecific;
+  else if (auto *Var = dyn_cast<GlobalVariable>(GV)) {
+    if (Var->getSection() == "llvm.metadata")
+      Res |= BasicSymbolRef::SF_FormatSpecific;
+  }
+
+  return Res;
+}
diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp
index 92f9c1f4f0a0..f36388b677f3 100644
--- a/contrib/llvm/lib/Object/ObjectFile.cpp
+++ b/contrib/llvm/lib/Object/ObjectFile.cpp
@@ -11,9 +11,10 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/ObjectFile.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/MachO.h"
-#include "llvm/Object/ObjectFile.h"
+#include "llvm/Object/Wasm.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -78,6 +79,7 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) {
   switch (Type) {
   case sys::fs::file_magic::unknown:
   case sys::fs::file_magic::bitcode:
+  case sys::fs::file_magic::coff_cl_gl_object:
   case sys::fs::file_magic::archive:
   case sys::fs::file_magic::macho_universal_binary:
   case sys::fs::file_magic::windows_resource:
@@ -104,6 +106,8 @@ ObjectFile::createObjectFile(MemoryBufferRef Object, sys::fs::file_magic Type) {
   case sys::fs::file_magic::coff_import_library:
   case sys::fs::file_magic::pecoff_executable:
     return errorOrToExpected(createCOFFObjectFile(Object));
+  case sys::fs::file_magic::wasm_object:
+    return createWasmObjectFile(Object);
   }
   llvm_unreachable("Unexpected Object File Type");
 }
@@ -118,8 +122,8 @@ ObjectFile::createObjectFile(StringRef ObjectPath) {
 
   Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
       createObjectFile(Buffer->getMemBufferRef());
-  if (!ObjOrErr)
-    ObjOrErr.takeError();
+  if (Error Err = ObjOrErr.takeError())
+    return std::move(Err);
   std::unique_ptr<ObjectFile> Obj = std::move(ObjOrErr.get());
 
   return OwningBinary<ObjectFile>(std::move(Obj), std::move(Buffer));
diff --git a/contrib/llvm/lib/Object/RecordStreamer.cpp b/contrib/llvm/lib/Object/RecordStreamer.cpp
index f03bd5e5fb9f..572b960bc85f 100644
--- a/contrib/llvm/lib/Object/RecordStreamer.cpp
+++ b/contrib/llvm/lib/Object/RecordStreamer.cpp
@@ -23,8 +23,10 @@ void RecordStreamer::markDefined(const MCSymbol &Symbol) {
   case Used:
     S = Defined;
     break;
-  case GlobalWeak:
+  case DefinedWeak:
     break;
+  case UndefinedWeak:
+    S = DefinedWeak;
   }
 }
 
@@ -34,15 +36,16 @@ void RecordStreamer::markGlobal(const MCSymbol &Symbol,
   switch (S) {
   case DefinedGlobal:
   case Defined:
-    S = (Attribute == MCSA_Weak) ? GlobalWeak : DefinedGlobal;
+    S = (Attribute == MCSA_Weak) ? DefinedWeak : DefinedGlobal;
     break;
 
   case NeverSeen:
   case Global:
   case Used:
-    S = (Attribute == MCSA_Weak) ? GlobalWeak : Global;
+    S = (Attribute == MCSA_Weak) ? UndefinedWeak : Global;
     break;
-  case GlobalWeak:
+  case UndefinedWeak:
+  case DefinedWeak:
     break;
   }
 }
@@ -53,7 +56,8 @@ void RecordStreamer::markUsed(const MCSymbol &Symbol) {
   case DefinedGlobal:
   case Defined:
   case Global:
-  case GlobalWeak:
+  case DefinedWeak:
+  case UndefinedWeak:
     break;
 
   case NeverSeen:
@@ -92,6 +96,8 @@ bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
                                          MCSymbolAttr Attribute) {
   if (Attribute == MCSA_Global || Attribute == MCSA_Weak)
     markGlobal(*Symbol, Attribute);
+  if (Attribute == MCSA_LazyReference)
+    markUsed(*Symbol);
   return true;
 }
 
diff --git a/contrib/llvm/lib/Object/RecordStreamer.h b/contrib/llvm/lib/Object/RecordStreamer.h
index 71337a60591b..617d8a43fbd2 100644
--- a/contrib/llvm/lib/Object/RecordStreamer.h
+++ b/contrib/llvm/lib/Object/RecordStreamer.h
@@ -15,7 +15,8 @@
 namespace llvm {
 class RecordStreamer : public MCStreamer {
 public:
-  enum State { NeverSeen, Global, GlobalWeak, Defined, DefinedGlobal, Used };
+  enum State { NeverSeen, Global, Defined, DefinedGlobal, DefinedWeak, Used,
+               UndefinedWeak};
 
 private:
   StringMap<State> Symbols;
diff --git a/contrib/llvm/lib/Object/SymbolSize.cpp b/contrib/llvm/lib/Object/SymbolSize.cpp
index 1d5cd78e6d9c..dd49d5f116b3 100644
--- a/contrib/llvm/lib/Object/SymbolSize.cpp
+++ b/contrib/llvm/lib/Object/SymbolSize.cpp
@@ -16,19 +16,13 @@
 using namespace llvm;
 using namespace object;
 
-namespace {
-struct SymEntry {
-  symbol_iterator I;
-  uint64_t Address;
-  unsigned Number;
-  unsigned SectionID;
-};
-}
-
-static int compareAddress(const SymEntry *A, const SymEntry *B) {
+// Orders increasingly by (SectionID, Address).
+int llvm::object::compareAddress(const SymEntry *A, const SymEntry *B) {
   if (A->SectionID != B->SectionID)
-    return A->SectionID - B->SectionID;
-  return A->Address - B->Address;
+    return A->SectionID < B->SectionID ? -1 : 1;
+  if (A->Address != B->Address)
+    return A->Address < B->Address ? -1 : 1;
+  return 0;
 }
 
 static unsigned getSectionID(const ObjectFile &O, SectionRef Sec) {
diff --git a/contrib/llvm/lib/Object/SymbolicFile.cpp b/contrib/llvm/lib/Object/SymbolicFile.cpp
index 1e8e31b6b22d..4b51a49cf342 100644
--- a/contrib/llvm/lib/Object/SymbolicFile.cpp
+++ b/contrib/llvm/lib/Object/SymbolicFile.cpp
@@ -35,10 +35,11 @@ Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
   switch (Type) {
   case sys::fs::file_magic::bitcode:
     if (Context)
-      return errorOrToExpected(IRObjectFile::create(Object, *Context));
-  // Fallthrough
+      return IRObjectFile::create(Object, *Context);
+    LLVM_FALLTHROUGH;
   case sys::fs::file_magic::unknown:
   case sys::fs::file_magic::archive:
+  case sys::fs::file_magic::coff_cl_gl_object:
   case sys::fs::file_magic::macho_universal_binary:
   case sys::fs::file_magic::windows_resource:
     return errorCodeToError(object_error::invalid_file_type);
@@ -57,6 +58,7 @@ Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
   case sys::fs::file_magic::macho_dsym_companion:
   case sys::fs::file_magic::macho_kext_bundle:
   case sys::fs::file_magic::pecoff_executable:
+  case sys::fs::file_magic::wasm_object:
     return ObjectFile::createObjectFile(Object, Type);
   case sys::fs::file_magic::coff_import_library:
     return std::unique_ptr<SymbolicFile>(new COFFImportFile(Object));
@@ -73,9 +75,9 @@ Expected<std::unique_ptr<SymbolicFile>> SymbolicFile::createSymbolicFile(
     if (!BCData)
       return std::move(Obj);
 
-    return errorOrToExpected(IRObjectFile::create(
-                                 MemoryBufferRef(BCData->getBuffer(),
-                                 Object.getBufferIdentifier()), *Context));
+    return IRObjectFile::create(
+        MemoryBufferRef(BCData->getBuffer(), Object.getBufferIdentifier()),
+        *Context);
   }
   }
   llvm_unreachable("Unexpected Binary File Type");
diff --git a/contrib/llvm/lib/Object/WasmObjectFile.cpp b/contrib/llvm/lib/Object/WasmObjectFile.cpp
new file mode 100644
index 000000000000..2b61a8a034f6
--- /dev/null
+++ b/contrib/llvm/lib/Object/WasmObjectFile.cpp
@@ -0,0 +1,313 @@
+//===- WasmObjectFile.cpp - Wasm object file implementation -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Object/Wasm.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/LEB128.h"
+
+namespace llvm {
+namespace object {
+
+Expected<std::unique_ptr<WasmObjectFile>>
+ObjectFile::createWasmObjectFile(MemoryBufferRef Buffer) {
+  Error Err = Error::success();
+  auto ObjectFile = llvm::make_unique<WasmObjectFile>(Buffer, Err);
+  if (Err)
+    return std::move(Err);
+
+  return std::move(ObjectFile);
+}
+
+namespace {
+
+uint32_t readUint32(const uint8_t *&Ptr) {
+  uint32_t Result = support::endian::read32le(Ptr);
+  Ptr += sizeof(Result);
+  return Result;
+}
+
+uint64_t readULEB128(const uint8_t *&Ptr) {
+  unsigned Count;
+  uint64_t Result = decodeULEB128(Ptr, &Count);
+  Ptr += Count;
+  return Result;
+}
+
+StringRef readString(const uint8_t *&Ptr) {
+  uint32_t StringLen = readULEB128(Ptr);
+  StringRef Return = StringRef(reinterpret_cast<const char *>(Ptr), StringLen);
+  Ptr += StringLen;
+  return Return;
+}
+
+Error readSection(wasm::WasmSection &Section, const uint8_t *&Ptr,
+                  const uint8_t *Start) {
+  // TODO(sbc): Avoid reading past EOF in the case of malformed files.
+  Section.Offset = Ptr - Start;
+  Section.Type = readULEB128(Ptr);
+  uint32_t Size = readULEB128(Ptr);
+  if (Size == 0)
+    return make_error<StringError>("Zero length section",
+                                   object_error::parse_failed);
+  Section.Content = ArrayRef<uint8_t>(Ptr, Size);
+  Ptr += Size;
+  return Error::success();
+}
+}
+
+WasmObjectFile::WasmObjectFile(MemoryBufferRef Buffer, Error &Err)
+    : ObjectFile(Binary::ID_Wasm, Buffer) {
+  ErrorAsOutParameter ErrAsOutParam(&Err);
+  Header.Magic = getData().substr(0, 4);
+  if (Header.Magic != StringRef("\0asm", 4)) {
+    Err = make_error<StringError>("Bad magic number",
+                                  object_error::parse_failed);
+    return;
+  }
+  const uint8_t *Ptr = getPtr(4);
+  Header.Version = readUint32(Ptr);
+  if (Header.Version != wasm::WasmVersion) {
+    Err = make_error<StringError>("Bad version number",
+                                  object_error::parse_failed);
+    return;
+  }
+
+  const uint8_t *Eof = getPtr(getData().size());
+  wasm::WasmSection Sec;
+  while (Ptr < Eof) {
+    if ((Err = readSection(Sec, Ptr, getPtr(0))))
+      return;
+    if (Sec.Type == wasm::WASM_SEC_USER) {
+      if ((Err = parseUserSection(Sec, Sec.Content.data(), Sec.Content.size())))
+        return;
+    }
+    Sections.push_back(Sec);
+  }
+}
+
+Error WasmObjectFile::parseUserSection(wasm::WasmSection &Sec,
+                                       const uint8_t *Ptr, size_t Length) {
+  Sec.Name = readString(Ptr);
+  return Error::success();
+}
+
+const uint8_t *WasmObjectFile::getPtr(size_t Offset) const {
+  return reinterpret_cast<const uint8_t *>(getData().substr(Offset, 1).data());
+}
+
+const wasm::WasmObjectHeader &WasmObjectFile::getHeader() const {
+  return Header;
+}
+
+void WasmObjectFile::moveSymbolNext(DataRefImpl &Symb) const {
+  llvm_unreachable("not yet implemented");
+}
+
+std::error_code WasmObjectFile::printSymbolName(raw_ostream &OS,
+                                                DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return object_error::invalid_symbol_index;
+}
+
+uint32_t WasmObjectFile::getSymbolFlags(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return 0;
+}
+
+basic_symbol_iterator WasmObjectFile::symbol_begin() const {
+  return BasicSymbolRef(DataRefImpl(), this);
+}
+
+basic_symbol_iterator WasmObjectFile::symbol_end() const {
+  return BasicSymbolRef(DataRefImpl(), this);
+}
+
+Expected<StringRef> WasmObjectFile::getSymbolName(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return errorCodeToError(object_error::invalid_symbol_index);
+}
+
+Expected<uint64_t> WasmObjectFile::getSymbolAddress(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return errorCodeToError(object_error::invalid_symbol_index);
+}
+
+uint64_t WasmObjectFile::getSymbolValueImpl(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return 0;
+}
+
+uint32_t WasmObjectFile::getSymbolAlignment(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return 0;
+}
+
+uint64_t WasmObjectFile::getCommonSymbolSizeImpl(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return 0;
+}
+
+Expected<SymbolRef::Type>
+WasmObjectFile::getSymbolType(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return errorCodeToError(object_error::invalid_symbol_index);
+}
+
+Expected<section_iterator>
+WasmObjectFile::getSymbolSection(DataRefImpl Symb) const {
+  llvm_unreachable("not yet implemented");
+  return errorCodeToError(object_error::invalid_symbol_index);
+}
+
+void WasmObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; }
+
+std::error_code WasmObjectFile::getSectionName(DataRefImpl Sec,
+                                               StringRef &Res) const {
+  const wasm::WasmSection &S = Sections[Sec.d.a];
+#define ECase(X)                                                               \
+  case wasm::WASM_SEC_##X:                                                     \
+    Res = #X;                                                                  \
+    break
+  switch (S.Type) {
+    ECase(TYPE);
+    ECase(IMPORT);
+    ECase(FUNCTION);
+    ECase(TABLE);
+    ECase(MEMORY);
+    ECase(GLOBAL);
+    ECase(EXPORT);
+    ECase(START);
+    ECase(ELEM);
+    ECase(CODE);
+    ECase(DATA);
+  case wasm::WASM_SEC_USER:
+    Res = S.Name;
+    break;
+  default:
+    return object_error::invalid_section_index;
+  }
+#undef ECase
+  return std::error_code();
+}
+
+uint64_t WasmObjectFile::getSectionAddress(DataRefImpl Sec) const { return 0; }
+
+uint64_t WasmObjectFile::getSectionSize(DataRefImpl Sec) const {
+  const wasm::WasmSection &S = Sections[Sec.d.a];
+  return S.Content.size();
+}
+
+std::error_code WasmObjectFile::getSectionContents(DataRefImpl Sec,
+                                                   StringRef &Res) const {
+  const wasm::WasmSection &S = Sections[Sec.d.a];
+  // This will never fail since wasm sections can never be empty (user-sections
+  // must have a name and non-user sections each have a defined structure).
+  Res = StringRef(reinterpret_cast<const char *>(S.Content.data()),
+                  S.Content.size());
+  return std::error_code();
+}
+
+uint64_t WasmObjectFile::getSectionAlignment(DataRefImpl Sec) const {
+  return 1;
+}
+
+bool WasmObjectFile::isSectionCompressed(DataRefImpl Sec) const {
+  return false;
+}
+
+bool WasmObjectFile::isSectionText(DataRefImpl Sec) const {
+  const wasm::WasmSection &S = Sections[Sec.d.a];
+  return S.Type == wasm::WASM_SEC_CODE;
+}
+
+bool WasmObjectFile::isSectionData(DataRefImpl Sec) const {
+  const wasm::WasmSection &S = Sections[Sec.d.a];
+  return S.Type == wasm::WASM_SEC_DATA;
+}
+
+bool WasmObjectFile::isSectionBSS(DataRefImpl Sec) const { return false; }
+
+bool WasmObjectFile::isSectionVirtual(DataRefImpl Sec) const { return false; }
+
+bool WasmObjectFile::isSectionBitcode(DataRefImpl Sec) const { return false; }
+
+relocation_iterator WasmObjectFile::section_rel_begin(DataRefImpl Sec) const {
+  llvm_unreachable("not yet implemented");
+  RelocationRef Rel;
+  return relocation_iterator(Rel);
+}
+
+relocation_iterator WasmObjectFile::section_rel_end(DataRefImpl Sec) const {
+  llvm_unreachable("not yet implemented");
+  RelocationRef Rel;
+  return relocation_iterator(Rel);
+}
+
+section_iterator WasmObjectFile::getRelocatedSection(DataRefImpl Sec) const {
+  llvm_unreachable("not yet implemented");
+  SectionRef Ref;
+  return section_iterator(Ref);
+}
+
+void WasmObjectFile::moveRelocationNext(DataRefImpl &Rel) const {
+  llvm_unreachable("not yet implemented");
+}
+
+uint64_t WasmObjectFile::getRelocationOffset(DataRefImpl Rel) const {
+  llvm_unreachable("not yet implemented");
+  return 0;
+}
+
+symbol_iterator WasmObjectFile::getRelocationSymbol(DataRefImpl Rel) const {
+  llvm_unreachable("not yet implemented");
+  SymbolRef Ref;
+  return symbol_iterator(Ref);
+}
+
+uint64_t WasmObjectFile::getRelocationType(DataRefImpl Rel) const {
+  llvm_unreachable("not yet implemented");
+  return 0;
+}
+
+void WasmObjectFile::getRelocationTypeName(
+    DataRefImpl Rel, SmallVectorImpl<char> &Result) const {
+  llvm_unreachable("not yet implemented");
+}
+
+section_iterator WasmObjectFile::section_begin() const {
+  DataRefImpl Ref;
+  Ref.d.a = 0;
+  return section_iterator(SectionRef(Ref, this));
+}
+
+section_iterator WasmObjectFile::section_end() const {
+  DataRefImpl Ref;
+  Ref.d.a = Sections.size();
+  return section_iterator(SectionRef(Ref, this));
+}
+
+uint8_t WasmObjectFile::getBytesInAddress() const { return 4; }
+
+StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; }
+
+unsigned WasmObjectFile::getArch() const { return Triple::wasm32; }
+
+SubtargetFeatures WasmObjectFile::getFeatures() const {
+  return SubtargetFeatures();
+}
+
+bool WasmObjectFile::isRelocatableObject() const { return false; }
+
+const wasm::WasmSection *
+WasmObjectFile::getWasmSection(const SectionRef &Section) const {
+  return &Sections[Section.getRawDataRefImpl().d.a];
+}
+
+} // end namespace object
+} // end namespace llvm
diff --git a/contrib/llvm/lib/ObjectYAML/DWARFYAML.cpp b/contrib/llvm/lib/ObjectYAML/DWARFYAML.cpp
new file mode 100644
index 000000000000..42a448a7bdfd
--- /dev/null
+++ b/contrib/llvm/lib/ObjectYAML/DWARFYAML.cpp
@@ -0,0 +1,120 @@
+//===- DWARFYAML.cpp - DWARF YAMLIO implementation ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines classes for handling the YAML representation of DWARF Debug
+// Info.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ObjectYAML/DWARFYAML.h"
+
+namespace llvm {
+
+bool DWARFYAML::Data::isEmpty() const {
+  return 0 == DebugStrings.size() + AbbrevDecls.size();
+}
+
+namespace yaml {
+
+void MappingTraits<DWARFYAML::Data>::mapping(IO &IO, DWARFYAML::Data &DWARF) {
+  auto oldContext = IO.getContext();
+  IO.setContext(&DWARF);
+  IO.mapOptional("debug_str", DWARF.DebugStrings);
+  IO.mapOptional("debug_abbrev", DWARF.AbbrevDecls);
+  if(!DWARF.ARanges.empty() || !IO.outputting())
+    IO.mapOptional("debug_aranges", DWARF.ARanges);
+  if(!DWARF.PubNames.Entries.empty() || !IO.outputting())
+    IO.mapOptional("debug_pubnames", DWARF.PubNames);
+  if(!DWARF.PubTypes.Entries.empty() || !IO.outputting())
+    IO.mapOptional("debug_pubtypes", DWARF.PubTypes);
+  if(!DWARF.GNUPubNames.Entries.empty() || !IO.outputting())
+    IO.mapOptional("debug_gnu_pubnames", DWARF.GNUPubNames);
+  if(!DWARF.GNUPubTypes.Entries.empty() || !IO.outputting())
+    IO.mapOptional("debug_gnu_pubtypes", DWARF.GNUPubTypes);
+  IO.mapOptional("debug_info", DWARF.CompileUnits);
+  IO.setContext(&oldContext);
+}
+
+void MappingTraits<DWARFYAML::Abbrev>::mapping(IO &IO,
+                                               DWARFYAML::Abbrev &Abbrev) {
+  IO.mapRequired("Code", Abbrev.Code);
+  IO.mapRequired("Tag", Abbrev.Tag);
+  IO.mapRequired("Children", Abbrev.Children);
+  IO.mapRequired("Attributes", Abbrev.Attributes);
+}
+
+void MappingTraits<DWARFYAML::AttributeAbbrev>::mapping(
+    IO &IO, DWARFYAML::AttributeAbbrev &AttAbbrev) {
+  IO.mapRequired("Attribute", AttAbbrev.Attribute);
+  IO.mapRequired("Form", AttAbbrev.Form);
+}
+
+void MappingTraits<DWARFYAML::ARangeDescriptor>::mapping(
+    IO &IO, DWARFYAML::ARangeDescriptor &Descriptor) {
+  IO.mapRequired("Address", Descriptor.Address);
+  IO.mapRequired("Length", Descriptor.Length);
+}
+
+void MappingTraits<DWARFYAML::ARange>::mapping(IO &IO,
+                                                DWARFYAML::ARange &Range) {
+  IO.mapRequired("Length", Range.Length);
+  IO.mapRequired("Version", Range.Version);
+  IO.mapRequired("CuOffset", Range.CuOffset);
+  IO.mapRequired("AddrSize", Range.AddrSize);
+  IO.mapRequired("SegSize", Range.SegSize);
+  IO.mapRequired("Descriptors", Range.Descriptors);
+}
+
+void MappingTraits<DWARFYAML::PubEntry>::mapping(IO &IO,
+                                                 DWARFYAML::PubEntry &Entry) {
+  IO.mapRequired("DieOffset", Entry.DieOffset);
+  if (reinterpret_cast<DWARFYAML::PubSection *>(IO.getContext())->IsGNUStyle)
+    IO.mapRequired("Descriptor", Entry.Descriptor);
+  IO.mapRequired("Name", Entry.Name);
+}
+
+void MappingTraits<DWARFYAML::PubSection>::mapping(
+    IO &IO, DWARFYAML::PubSection &Section) {
+  auto OldContext = IO.getContext();
+  IO.setContext(&Section);
+
+  IO.mapRequired("Length", Section.Length);
+  IO.mapRequired("Version", Section.Version);
+  IO.mapRequired("UnitOffset", Section.UnitOffset);
+  IO.mapRequired("UnitSize", Section.UnitSize);
+  IO.mapRequired("Entries", Section.Entries);
+
+  IO.setContext(OldContext);
+}
+
+void MappingTraits<DWARFYAML::Unit>::mapping(IO &IO, DWARFYAML::Unit &Unit) {
+  IO.mapRequired("Length", Unit.Length);
+  IO.mapRequired("Version", Unit.Version);
+  IO.mapRequired("AbbrOffset", Unit.AbbrOffset);
+  IO.mapRequired("AddrSize", Unit.AddrSize);
+  IO.mapOptional("Entries", Unit.Entries);
+}
+
+void MappingTraits<DWARFYAML::Entry>::mapping(IO &IO, DWARFYAML::Entry &Entry) {
+  IO.mapRequired("AbbrCode", Entry.AbbrCode);
+  IO.mapRequired("Values", Entry.Values);
+}
+
+void MappingTraits<DWARFYAML::FormValue>::mapping(IO &IO,
+                                             DWARFYAML::FormValue &FormValue) {
+  IO.mapOptional("Value", FormValue.Value);
+  if(!FormValue.CStr.empty() || !IO.outputting())
+    IO.mapOptional("CStr", FormValue.CStr);
+  if(!FormValue.BlockData.empty() || !IO.outputting())
+    IO.mapOptional("BlockData", FormValue.BlockData);
+}
+
+} // namespace llvm::yaml
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp b/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
index 2137eee4752f..fe9af9f3ac76 100644
--- a/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -194,6 +194,7 @@ ScalarEnumerationTraits<ELFYAML::ELF_EM>::enumeration(IO &IO,
   ECase(EM_78KOR)
   ECase(EM_56800EX)
   ECase(EM_AMDGPU)
+  ECase(EM_RISCV)
   ECase(EM_LANAI)
   ECase(EM_BPF)
 #undef ECase
@@ -422,6 +423,9 @@ void ScalarBitSetTraits<ELFYAML::ELF_SHF>::bitset(IO &IO,
   BCase(SHF_GROUP)
   BCase(SHF_TLS)
   switch(Object->Header.Machine) {
+  case ELF::EM_ARM:
+    BCase(SHF_ARM_PURECODE)
+    break;
   case ELF::EM_AMDGPU:
     BCase(SHF_AMDGPU_HSA_GLOBAL)
     BCase(SHF_AMDGPU_HSA_READONLY)
@@ -529,6 +533,9 @@ void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration(
   case ELF::EM_ARM:
 #include "llvm/Support/ELFRelocs/ARM.def"
     break;
+  case ELF::EM_RISCV:
+#include "llvm/Support/ELFRelocs/RISCV.def"
+    break;
   case ELF::EM_LANAI:
 #include "llvm/Support/ELFRelocs/Lanai.def"
     break;
diff --git a/contrib/llvm/lib/ObjectYAML/MachOYAML.cpp b/contrib/llvm/lib/ObjectYAML/MachOYAML.cpp
index d819e80836ce..a033a79189bd 100644
--- a/contrib/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ObjectYAML/MachOYAML.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/MachO.h"
 
 #include <string.h> // For memcpy, memset and strnlen.
@@ -22,6 +23,13 @@ namespace llvm {
 
 MachOYAML::LoadCommand::~LoadCommand() {}
 
+bool MachOYAML::LinkEditData::isEmpty() const {
+  return 0 ==
+         RebaseOpcodes.size() + BindOpcodes.size() + WeakBindOpcodes.size() +
+             LazyBindOpcodes.size() + ExportTrie.Children.size() +
+             NameList.size() + StringTable.size();
+}
+
 namespace yaml {
 
 void ScalarTraits<char_16>::output(const char_16 &Val, void *,
@@ -93,9 +101,17 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
     IO.setContext(&Object);
   }
   IO.mapTag("!mach-o", true);
+  IO.mapOptional("IsLittleEndian", Object.IsLittleEndian,
+                 sys::IsLittleEndianHost);
+  Object.DWARF.IsLittleEndian = Object.IsLittleEndian;
+
   IO.mapRequired("FileHeader", Object.Header);
   IO.mapOptional("LoadCommands", Object.LoadCommands);
-  IO.mapOptional("LinkEditData", Object.LinkEdit);
+  if(!Object.LinkEdit.isEmpty() || !IO.outputting())
+    IO.mapOptional("LinkEditData", Object.LinkEdit);
+
+  if(!Object.DWARF.isEmpty() || !IO.outputting())
+    IO.mapOptional("DWARF", Object.DWARF);
 
   if (IO.getContext() == &Object)
     IO.setContext(nullptr);
@@ -138,7 +154,8 @@ void MappingTraits<MachOYAML::LinkEditData>::mapping(
   IO.mapOptional("BindOpcodes", LinkEditData.BindOpcodes);
   IO.mapOptional("WeakBindOpcodes", LinkEditData.WeakBindOpcodes);
   IO.mapOptional("LazyBindOpcodes", LinkEditData.LazyBindOpcodes);
-  IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
+  if(LinkEditData.ExportTrie.Children.size() > 0 || !IO.outputting())
+    IO.mapOptional("ExportTrie", LinkEditData.ExportTrie);
   IO.mapOptional("NameList", LinkEditData.NameList);
   IO.mapOptional("StringTable", LinkEditData.StringTable);
 }
diff --git a/contrib/llvm/lib/ObjectYAML/ObjectYAML.cpp b/contrib/llvm/lib/ObjectYAML/ObjectYAML.cpp
index 97741b5ec8b7..cbbaac6062a7 100644
--- a/contrib/llvm/lib/ObjectYAML/ObjectYAML.cpp
+++ b/contrib/llvm/lib/ObjectYAML/ObjectYAML.cpp
@@ -51,7 +51,7 @@ void MappingTraits<YamlObjectFile>::mapping(IO &IO,
       else
         IO.setError(
             llvm::Twine("YAML Object File unsupported document type tag '") +
-            llvm::Twine(Tag.c_str()) + llvm::Twine("'!"));
+            llvm::Twine(Tag) + llvm::Twine("'!"));
     }
   }
 }
diff --git a/contrib/llvm/lib/Option/ArgList.cpp b/contrib/llvm/lib/Option/ArgList.cpp
index 0826ef873195..f94de866ef34 100644
--- a/contrib/llvm/lib/Option/ArgList.cpp
+++ b/contrib/llvm/lib/Option/ArgList.cpp
@@ -39,9 +39,9 @@ void ArgList::append(Arg *A) {
 }
 
 void ArgList::eraseArg(OptSpecifier Id) {
-  Args.erase(std::remove_if(begin(), end(),
-                            [=](Arg *A) { return A->getOption().matches(Id); }),
-             end());
+  Args.erase(
+      remove_if(*this, [=](Arg *A) { return A->getOption().matches(Id); }),
+      end());
 }
 
 Arg *ArgList::getLastArgNoClaim(OptSpecifier Id) const {
@@ -259,19 +259,36 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0,
   }
 }
 
-void ArgList::AddAllArgs(ArgStringList &Output,
-                         ArrayRef<OptSpecifier> Ids) const {
+void ArgList::AddAllArgsExcept(ArgStringList &Output,
+                               ArrayRef<OptSpecifier> Ids,
+                               ArrayRef<OptSpecifier> ExcludeIds) const {
   for (const Arg *Arg : Args) {
-    for (OptSpecifier Id : Ids) {
+    bool Excluded = false;
+    for (OptSpecifier Id : ExcludeIds) {
       if (Arg->getOption().matches(Id)) {
-        Arg->claim();
-        Arg->render(*this, Output);
+        Excluded = true;
         break;
       }
     }
+    if (!Excluded) {
+      for (OptSpecifier Id : Ids) {
+        if (Arg->getOption().matches(Id)) {
+          Arg->claim();
+          Arg->render(*this, Output);
+          break;
+        }
+      }
+    }
   }
 }
 
+/// This is a nicer interface when you don't have a list of Ids to exclude.
+void ArgList::AddAllArgs(ArgStringList &Output,
+                         ArrayRef<OptSpecifier> Ids) const {
+  ArrayRef<OptSpecifier> Exclude = None;
+  AddAllArgsExcept(Output, Ids, Exclude);
+}
+
 /// This 3-opt variant of AddAllArgs could be eliminated in favor of one
 /// that accepts a single specifier, given the above which accepts any number.
 void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0,
diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp
index 13aa9667b5c2..7eafb00855d7 100644
--- a/contrib/llvm/lib/Option/OptTable.cpp
+++ b/contrib/llvm/lib/Option/OptTable.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Option/OptTable.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Option/Arg.h"
 #include "llvm/Option/ArgList.h"
 #include "llvm/Option/Option.h"
@@ -142,8 +143,7 @@ OptTable::OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase)
     StringRef Prefix = I->getKey();
     for (StringRef::const_iterator C = Prefix.begin(), CE = Prefix.end();
                                    C != CE; ++C)
-      if (std::find(PrefixChars.begin(), PrefixChars.end(), *C)
-            == PrefixChars.end())
+      if (!is_contained(PrefixChars, *C))
         PrefixChars.push_back(*C);
   }
 }
@@ -317,7 +317,7 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) {
   case Option::SeparateClass: case Option::JoinedOrSeparateClass:
   case Option::RemainingArgsClass: case Option::RemainingArgsJoinedClass:
     Name += ' ';
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case Option::JoinedClass: case Option::CommaJoinedClass:
   case Option::JoinedAndSeparateClass:
     if (const char *MetaVarName = Opts.getOptionMetaVar(Id))
diff --git a/contrib/llvm/lib/Passes/PassBuilder.cpp b/contrib/llvm/lib/Passes/PassBuilder.cpp
index 0e64df80f91f..6e0aae5fd852 100644
--- a/contrib/llvm/lib/Passes/PassBuilder.cpp
+++ b/contrib/llvm/lib/Passes/PassBuilder.cpp
@@ -24,6 +24,7 @@
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BlockFrequencyInfoImpl.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFGPrinter.h"
 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
@@ -38,6 +39,7 @@
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
 #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
@@ -58,15 +60,19 @@
 #include "llvm/Support/Regex.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/GCOVProfiler.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/ConstantMerge.h"
 #include "llvm/Transforms/IPO/CrossDSOCFI.h"
 #include "llvm/Transforms/IPO/DeadArgumentElimination.h"
 #include "llvm/Transforms/IPO/ElimAvailExtern.h"
 #include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
 #include "llvm/Transforms/IPO/GlobalDCE.h"
 #include "llvm/Transforms/IPO/GlobalOpt.h"
+#include "llvm/Transforms/IPO/GlobalSplit.h"
 #include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/IPO/Internalize.h"
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
 #include "llvm/Transforms/IPO/PartialInlining.h"
@@ -91,29 +97,41 @@
 #include "llvm/Transforms/Scalar/IndVarSimplify.h"
 #include "llvm/Transforms/Scalar/JumpThreading.h"
 #include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
 #include "llvm/Transforms/Scalar/LoopDeletion.h"
 #include "llvm/Transforms/Scalar/LoopDistribute.h"
 #include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
 #include "llvm/Transforms/Scalar/LoopInstSimplify.h"
 #include "llvm/Transforms/Scalar/LoopRotation.h"
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/Transforms/Scalar/LowerAtomic.h"
 #include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
 #include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
 #include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/NaryReassociate.h"
+#include "llvm/Transforms/Scalar/NewGVN.h"
 #include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
 #include "llvm/Transforms/Scalar/Reassociate.h"
 #include "llvm/Transforms/Scalar/SCCP.h"
 #include "llvm/Transforms/Scalar/SROA.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Scalar/Sink.h"
+#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
 #include "llvm/Transforms/Scalar/TailRecursionElimination.h"
 #include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
 #include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
 #include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LowerInvoke.h"
 #include "llvm/Transforms/Utils/Mem2Reg.h"
 #include "llvm/Transforms/Utils/MemorySSA.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
 #include "llvm/Transforms/Utils/SimplifyInstructions.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 #include "llvm/Transforms/Vectorize/SLPVectorizer.h"
 
@@ -123,11 +141,26 @@ using namespace llvm;
 
 static Regex DefaultAliasRegex("^(default|lto-pre-link|lto)<(O[0123sz])>$");
 
+static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
+  switch (Level) {
+  case PassBuilder::O0:
+  case PassBuilder::O1:
+  case PassBuilder::O2:
+  case PassBuilder::O3:
+    return false;
+
+  case PassBuilder::Os:
+  case PassBuilder::Oz:
+    return true;
+  }
+  llvm_unreachable("Invalid optimization level!");
+}
+
 namespace {
 
 /// \brief No-op module pass which does nothing.
 struct NoOpModulePass {
-  PreservedAnalyses run(Module &M, AnalysisManager<Module> &) {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
     return PreservedAnalyses::all();
   }
   static StringRef name() { return "NoOpModulePass"; }
@@ -136,18 +169,18 @@ struct NoOpModulePass {
 /// \brief No-op module analysis.
 class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> {
   friend AnalysisInfoMixin<NoOpModuleAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   struct Result {};
-  Result run(Module &, AnalysisManager<Module> &) { return Result(); }
+  Result run(Module &, ModuleAnalysisManager &) { return Result(); }
   static StringRef name() { return "NoOpModuleAnalysis"; }
 };
 
 /// \brief No-op CGSCC pass which does nothing.
 struct NoOpCGSCCPass {
-  PreservedAnalyses run(LazyCallGraph::SCC &C,
-                        AnalysisManager<LazyCallGraph::SCC> &) {
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
+                        LazyCallGraph &, CGSCCUpdateResult &UR) {
     return PreservedAnalyses::all();
   }
   static StringRef name() { return "NoOpCGSCCPass"; }
@@ -156,11 +189,11 @@ struct NoOpCGSCCPass {
 /// \brief No-op CGSCC analysis.
 class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> {
   friend AnalysisInfoMixin<NoOpCGSCCAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   struct Result {};
-  Result run(LazyCallGraph::SCC &, AnalysisManager<LazyCallGraph::SCC> &) {
+  Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) {
     return Result();
   }
   static StringRef name() { return "NoOpCGSCCAnalysis"; }
@@ -168,7 +201,7 @@ public:
 
 /// \brief No-op function pass which does nothing.
 struct NoOpFunctionPass {
-  PreservedAnalyses run(Function &F, AnalysisManager<Function> &) {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
     return PreservedAnalyses::all();
   }
   static StringRef name() { return "NoOpFunctionPass"; }
@@ -177,17 +210,17 @@ struct NoOpFunctionPass {
 /// \brief No-op function analysis.
 class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> {
   friend AnalysisInfoMixin<NoOpFunctionAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   struct Result {};
-  Result run(Function &, AnalysisManager<Function> &) { return Result(); }
+  Result run(Function &, FunctionAnalysisManager &) { return Result(); }
   static StringRef name() { return "NoOpFunctionAnalysis"; }
 };
 
 /// \brief No-op loop pass which does nothing.
 struct NoOpLoopPass {
-  PreservedAnalyses run(Loop &L, AnalysisManager<Loop> &) {
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &) {
     return PreservedAnalyses::all();
   }
   static StringRef name() { return "NoOpLoopPass"; }
@@ -196,18 +229,18 @@ struct NoOpLoopPass {
 /// \brief No-op loop analysis.
 class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> {
   friend AnalysisInfoMixin<NoOpLoopAnalysis>;
-  static char PassID;
+  static AnalysisKey Key;
 
 public:
   struct Result {};
-  Result run(Loop &, AnalysisManager<Loop> &) { return Result(); }
+  Result run(Loop &, LoopAnalysisManager &) { return Result(); }
   static StringRef name() { return "NoOpLoopAnalysis"; }
 };
 
-char NoOpModuleAnalysis::PassID;
-char NoOpCGSCCAnalysis::PassID;
-char NoOpFunctionAnalysis::PassID;
-char NoOpLoopAnalysis::PassID;
+AnalysisKey NoOpModuleAnalysis::Key;
+AnalysisKey NoOpCGSCCAnalysis::Key;
+AnalysisKey NoOpFunctionAnalysis::Key;
+AnalysisKey NoOpLoopAnalysis::Key;
 
 } // End anonymous namespace.
 
@@ -235,43 +268,356 @@ void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
 #include "PassRegistry.def"
 }
 
-void PassBuilder::addPerModuleDefaultPipeline(ModulePassManager &MPM,
-                                              OptimizationLevel Level,
-                                              bool DebugLogging) {
-  // FIXME: Finish fleshing this out to match the legacy pipelines.
+FunctionPassManager
+PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
+                                                 bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations!");
+  FunctionPassManager FPM(DebugLogging);
+
+  // Form SSA out of local memory accesses after breaking apart aggregates into
+  // scalars.
+  FPM.addPass(SROA());
+
+  // Catch trivial redundancies
+  FPM.addPass(EarlyCSEPass());
+
+  // Speculative execution if the target has divergent branches; otherwise nop.
+  FPM.addPass(SpeculativeExecutionPass());
+
+  // Optimize based on known information about branches, and cleanup afterward.
+  FPM.addPass(JumpThreadingPass());
+  FPM.addPass(CorrelatedValuePropagationPass());
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+
+  if (!isOptimizingForSize(Level))
+    FPM.addPass(LibCallsShrinkWrapPass());
+
+  FPM.addPass(TailCallElimPass());
+  FPM.addPass(SimplifyCFGPass());
+
+  // Form canonically associated expression trees, and simplify the trees using
+  // basic mathematical properties. For example, this will form (nearly)
+  // minimal multiplication trees.
+  FPM.addPass(ReassociatePass());
+
+  // Add the primary loop simplification pipeline.
+  // FIXME: Currently this is split into two loop pass pipelines because we run
+  // some function passes in between them. These can and should be replaced by
+  // loop pass equivalenst but those aren't ready yet. Specifically,
+  // `SimplifyCFGPass` and `InstCombinePass` are used. We have
+  // `LoopSimplifyCFGPass` which isn't yet powerful enough, and the closest to
+  // the other we have is `LoopInstSimplify`.
+  LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
+
+  // FIXME: Enable these when the loop pass manager can support enforcing loop
+  // simplified and LCSSA form as well as updating the loop nest after
+  // transformations and we finsih porting the loop passes.
+#if 0
+  // Rotate Loop - disable header duplication at -Oz
+  LPM1.addPass(LoopRotatePass(Level != Oz));
+  LPM1.addPass(LICMPass());
+  LPM1.addPass(LoopUnswitchPass(/* OptimizeForSize */ Level != O3));
+  LPM2.addPass(IndVarSimplifyPass());
+  LPM2.addPass(LoopIdiomPass());
+  LPM2.addPass(LoopDeletionPass());
+  LPM2.addPass(SimpleLoopUnrollPass());
+#endif
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1)));
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2)));
+
+  // Eliminate redundancies.
+  if (Level != O1) {
+    // These passes add substantial compile time so skip them at O1.
+    FPM.addPass(MergedLoadStoreMotionPass());
+    FPM.addPass(GVN());
+  }
+
+  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+  FPM.addPass(MemCpyOptPass());
+
+  // Sparse conditional constant propagation.
+  // FIXME: It isn't clear why we do this *after* loop passes rather than
+  // before...
+  FPM.addPass(SCCPPass());
+
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  FPM.addPass(BDCEPass());
+
+  // Run instcombine after redundancy and dead bit elimination to exploit
+  // opportunities opened up by them.
+  FPM.addPass(InstCombinePass());
+
+  // Re-consider control flow based optimizations after redundancy elimination,
+  // redo DCE, etc.
+  FPM.addPass(JumpThreadingPass());
+  FPM.addPass(CorrelatedValuePropagationPass());
+  FPM.addPass(DSEPass());
+  // FIXME: Enable this when the loop pass manager can support enforcing loop
+  // simplified and LCSSA form as well as updating the loop nest after
+  // transformations and we finsih porting the loop passes.
+#if 0
+  FPM.addPass(createFunctionToLoopPassAdaptor(LICMPass()));
+#endif
+
+  // Finally, do an expensive DCE pass to catch all the dead code exposed by
+  // the simplifications and basic cleanup after all the simplifications.
+  FPM.addPass(ADCEPass());
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+
+  return FPM;
+}
+
+ModulePassManager
+PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
+                                           bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  ModulePassManager MPM(DebugLogging);
+
+  // Force any function attributes we want the rest of the pipeline te observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Do basic inference of function attributes from known properties of system
+  // libraries and other oracles.
+  MPM.addPass(InferFunctionAttrsPass());
+
+  // Create an early function pass manager to cleanup the output of the
+  // frontend.
   FunctionPassManager EarlyFPM(DebugLogging);
   EarlyFPM.addPass(SimplifyCFGPass());
   EarlyFPM.addPass(SROA());
   EarlyFPM.addPass(EarlyCSEPass());
   EarlyFPM.addPass(LowerExpectIntrinsicPass());
-
+  EarlyFPM.addPass(GVNHoistPass());
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
+
+  // Interprocedural constant propagation now that basic cleanup has occured
+  // and prior to optimizing globals.
+  // FIXME: This position in the pipeline hasn't been carefully considered in
+  // years, it should be re-analyzed.
+  MPM.addPass(IPSCCPPass());
+
+  // Optimize globals to try and fold them into constants.
+  MPM.addPass(GlobalOptPass());
+
+  // Promote any localized globals to SSA registers.
+  // FIXME: Should this instead by a run of SROA?
+  // FIXME: We should probably run instcombine and simplify-cfg afterward to
+  // delete control flows that are dead once globals have been folded to
+  // constants.
+  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+  // Remove any dead arguments exposed by cleanups and constand folding
+  // globals.
+  MPM.addPass(DeadArgumentEliminationPass());
+
+  // Create a small function pass pipeline to cleanup after all the global
+  // optimizations.
+  FunctionPassManager GlobalCleanupPM(DebugLogging);
+  GlobalCleanupPM.addPass(InstCombinePass());
+  GlobalCleanupPM.addPass(SimplifyCFGPass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
+
+  // FIXME: Enable this when cross-IR-unit analysis invalidation is working.
+#if 0
+  MPM.addPass(RequireAnalysisPass<GlobalsAA>());
+#endif
+
+  // Now begin the main postorder CGSCC pipeline.
+  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+  // manager and trying to emulate its precise behavior. Much of this doesn't
+  // make a lot of sense and we should revisit the core CGSCC structure.
+  CGSCCPassManager MainCGPipeline(DebugLogging);
+
+  // Note: historically, the PruneEH pass was run first to deduce nounwind and
+  // generally clean up exception handling overhead. It isn't clear this is
+  // valuable as the inliner doesn't currently care whether it is inlining an
+  // invoke or a call.
+
+  // Run the inliner first. The theory is that we are walking bottom-up and so
+  // the callees have already been fully optimized, and we want to inline them
+  // into the callers so that our optimizations can reflect that.
+  // FIXME; Customize the threshold based on optimization level.
+  MainCGPipeline.addPass(InlinerPass());
+
+  // Now deduce any function attributes based in the current code.
+  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+  // Lastly, add the core function simplification pipeline nested inside the
+  // CGSCC walk.
+  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+      buildFunctionSimplificationPipeline(Level, DebugLogging)));
+
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(std::move(MainCGPipeline)));
+
+  // This ends the canonicalization and simplification phase of the pipeline.
+  // At this point, we expect to have canonical and simple IR which we begin
+  // *optimizing* for efficient execution going forward.
+
+  // Eliminate externally available functions now that inlining is over -- we
+  // won't emit these anyways.
+  MPM.addPass(EliminateAvailableExternallyPass());
+
+  // Do RPO function attribute inference across the module to forward-propagate
+  // attributes where applicable.
+  // FIXME: Is this really an optimization rather than a canonicalization?
+  MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+  // Recompute GloblasAA here prior to function passes. This is particularly
+  // useful as the above will have inlined, DCE'ed, and function-attr
+  // propagated everything. We should at this point have a reasonably minimal
+  // and richly annotated call graph. By computing aliasing and mod/ref
+  // information for all local globals here, the late loop passes and notably
+  // the vectorizer will be able to use them to help recognize vectorizable
+  // memory operations.
+  // FIXME: Enable this once analysis invalidation is fully supported.
+#if 0
+  MPM.addPass(Require<GlobalsAA>());
+#endif
+
+  FunctionPassManager OptimizePM(DebugLogging);
+  OptimizePM.addPass(Float2IntPass());
+  // FIXME: We need to run some loop optimizations to re-rotate loops after
+  // simplify-cfg and others undo their rotation.
+
+  // Optimize the loop execution. These passes operate on entire loop nests
+  // rather than on each loop in an inside-out manner, and so they are actually
+  // function passes.
+  OptimizePM.addPass(LoopDistributePass());
+#if 0
+  // FIXME: LoopVectorize relies on "requiring" LCSSA which isn't supported in
+  // the new PM.
+  OptimizePM.addPass(LoopVectorizePass());
+#endif
+  // FIXME: Need to port Loop Load Elimination and add it here.
+  OptimizePM.addPass(InstCombinePass());
+
+  // Optimize parallel scalar instruction chains into SIMD instructions.
+  OptimizePM.addPass(SLPVectorizerPass());
+
+  // Cleanup after vectorizers.
+  OptimizePM.addPass(SimplifyCFGPass());
+  OptimizePM.addPass(InstCombinePass());
+
+  // Unroll small loops to hide loop backedge latency and saturate any parallel
+  // execution resources of an out-of-order processor.
+  // FIXME: Need to add once loop pass pipeline is available.
+
+  // FIXME: Add the loop sink pass when ported.
+
+  // FIXME: Add cleanup from the loop pass manager when we're forming LCSSA
+  // here.
+
+  // Now that we've vectorized and unrolled loops, we may have more refined
+  // alignment information, try to re-derive it here.
+  OptimizePM.addPass(AlignmentFromAssumptionsPass());
+
+  // ADd the core optimizing pipeline.
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
+
+  // Now we need to do some global optimization transforms.
+  // FIXME: It would seem like these should come first in the optimization
+  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
+  // ordering here.
+  MPM.addPass(GlobalDCEPass());
+  MPM.addPass(ConstantMergePass());
+
+  return MPM;
 }
 
-void PassBuilder::addLTOPreLinkDefaultPipeline(ModulePassManager &MPM,
-                                               OptimizationLevel Level,
-                                               bool DebugLogging) {
+ModulePassManager
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
+                                            bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations for the default pipeline!");
   // FIXME: We should use a customized pre-link pipeline!
-  addPerModuleDefaultPipeline(MPM, Level, DebugLogging);
+  return buildPerModuleDefaultPipeline(Level, DebugLogging);
 }
 
-void PassBuilder::addLTODefaultPipeline(ModulePassManager &MPM,
-                                        OptimizationLevel Level,
-                                        bool DebugLogging) {
+ModulePassManager PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level,
+                                                       bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  ModulePassManager MPM(DebugLogging);
+
   // FIXME: Finish fleshing this out to match the legacy LTO pipelines.
   FunctionPassManager LateFPM(DebugLogging);
   LateFPM.addPass(InstCombinePass());
   LateFPM.addPass(SimplifyCFGPass());
 
   MPM.addPass(createModuleToFunctionPassAdaptor(std::move(LateFPM)));
+
+  return MPM;
+}
+
+AAManager PassBuilder::buildDefaultAAPipeline() {
+  AAManager AA;
+
+  // The order in which these are registered determines their priority when
+  // being queried.
+
+  // First we register the basic alias analysis that provides the majority of
+  // per-function local AA logic. This is a stateless, on-demand local set of
+  // AA techniques.
+  AA.registerFunctionAnalysis<BasicAA>();
+
+  // Next we query fast, specialized alias analyses that wrap IR-embedded
+  // information about aliasing.
+  AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+  AA.registerFunctionAnalysis<TypeBasedAA>();
+
+  // Add support for querying global aliasing information when available.
+  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
+  // analysis, all that the `AAManager` can do is query for any *cached*
+  // results from `GlobalsAA` through a readonly proxy..
+#if 0
+  // FIXME: Enable once the invalidation logic supports this. Currently, the
+  // `AAManager` will hold stale references to the module analyses.
+  AA.registerModuleAnalysis<GlobalsAA>();
+#endif
+
+  return AA;
+}
+
+static Optional<int> parseRepeatPassName(StringRef Name) {
+  if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
+    return None;
+  int Count;
+  if (Name.getAsInteger(0, Count) || Count <= 0)
+    return None;
+  return Count;
+}
+
+static Optional<int> parseDevirtPassName(StringRef Name) {
+  if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
+    return None;
+  int Count;
+  if (Name.getAsInteger(0, Count) || Count <= 0)
+    return None;
+  return Count;
 }
 
-#ifndef NDEBUG
 static bool isModulePassName(StringRef Name) {
   // Manually handle aliases for pre-configured pipeline fragments.
   if (Name.startswith("default") || Name.startswith("lto"))
     return DefaultAliasRegex.match(Name);
 
+  // Explicitly handle pass manager names.
+  if (Name == "module")
+    return true;
+  if (Name == "cgscc")
+    return true;
+  if (Name == "function")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+
 #define MODULE_PASS(NAME, CREATE_PASS)                                         \
   if (Name == NAME)                                                            \
     return true;
@@ -282,9 +628,20 @@ static bool isModulePassName(StringRef Name) {
 
   return false;
 }
-#endif
 
 static bool isCGSCCPassName(StringRef Name) {
+  // Explicitly handle pass manager names.
+  if (Name == "cgscc")
+    return true;
+  if (Name == "function")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+  if (parseDevirtPassName(Name))
+    return true;
+
 #define CGSCC_PASS(NAME, CREATE_PASS)                                          \
   if (Name == NAME)                                                            \
     return true;
@@ -297,6 +654,16 @@ static bool isCGSCCPassName(StringRef Name) {
 }
 
 static bool isFunctionPassName(StringRef Name) {
+  // Explicitly handle pass manager names.
+  if (Name == "function")
+    return true;
+  if (Name == "loop")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+
 #define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
   if (Name == NAME)                                                            \
     return true;
@@ -309,6 +676,14 @@ static bool isFunctionPassName(StringRef Name) {
 }
 
 static bool isLoopPassName(StringRef Name) {
+  // Explicitly handle pass manager names.
+  if (Name == "loop")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+
 #define LOOP_PASS(NAME, CREATE_PASS)                                           \
   if (Name == NAME)                                                            \
     return true;
@@ -320,8 +695,108 @@ static bool isLoopPassName(StringRef Name) {
   return false;
 }
 
-bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name,
-                                      bool DebugLogging) {
+Optional<std::vector<PassBuilder::PipelineElement>>
+PassBuilder::parsePipelineText(StringRef Text) {
+  std::vector<PipelineElement> ResultPipeline;
+
+  SmallVector<std::vector<PipelineElement> *, 4> PipelineStack = {
+      &ResultPipeline};
+  for (;;) {
+    std::vector<PipelineElement> &Pipeline = *PipelineStack.back();
+    size_t Pos = Text.find_first_of(",()");
+    Pipeline.push_back({Text.substr(0, Pos), {}});
+
+    // If we have a single terminating name, we're done.
+    if (Pos == Text.npos)
+      break;
+
+    char Sep = Text[Pos];
+    Text = Text.substr(Pos + 1);
+    if (Sep == ',')
+      // Just a name ending in a comma, continue.
+      continue;
+
+    if (Sep == '(') {
+      // Push the inner pipeline onto the stack to continue processing.
+      PipelineStack.push_back(&Pipeline.back().InnerPipeline);
+      continue;
+    }
+
+    assert(Sep == ')' && "Bogus separator!");
+    // When handling the close parenthesis, we greedily consume them to avoid
+    // empty strings in the pipeline.
+    do {
+      // If we try to pop the outer pipeline we have unbalanced parentheses.
+      if (PipelineStack.size() == 1)
+        return None;
+
+      PipelineStack.pop_back();
+    } while (Text.consume_front(")"));
+
+    // Check if we've finished parsing.
+    if (Text.empty())
+      break;
+
+    // Otherwise, the end of an inner pipeline always has to be followed by
+    // a comma, and then we can continue.
+    if (!Text.consume_front(","))
+      return None;
+  }
+
+  if (PipelineStack.size() > 1)
+    // Unbalanced paretheses.
+    return None;
+
+  assert(PipelineStack.back() == &ResultPipeline &&
+         "Wrong pipeline at the bottom of the stack!");
+  return {std::move(ResultPipeline)};
+}
+
+bool PassBuilder::parseModulePass(ModulePassManager &MPM,
+                                  const PipelineElement &E, bool VerifyEachPass,
+                                  bool DebugLogging) {
+  auto &Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "module") {
+      ModulePassManager NestedMPM(DebugLogging);
+      if (!parseModulePassPipeline(NestedMPM, InnerPipeline, VerifyEachPass,
+                                   DebugLogging))
+        return false;
+      MPM.addPass(std::move(NestedMPM));
+      return true;
+    }
+    if (Name == "cgscc") {
+      CGSCCPassManager CGPM(DebugLogging);
+      if (!parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
+                                  DebugLogging))
+        return false;
+      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM),
+                                                          DebugLogging));
+      return true;
+    }
+    if (Name == "function") {
+      FunctionPassManager FPM(DebugLogging);
+      if (!parseFunctionPassPipeline(FPM, InnerPipeline, VerifyEachPass,
+                                     DebugLogging))
+        return false;
+      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      return true;
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      ModulePassManager NestedMPM(DebugLogging);
+      if (!parseModulePassPipeline(NestedMPM, InnerPipeline, VerifyEachPass,
+                                   DebugLogging))
+        return false;
+      MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
+      return true;
+    }
+    // Normal passes can't have pipelines.
+    return false;
+  }
+
   // Manually handle aliases for pre-configured pipeline fragments.
   if (Name.startswith("default") || Name.startswith("lto")) {
     SmallVector<StringRef, 3> Matches;
@@ -329,25 +804,29 @@ bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name,
       return false;
     assert(Matches.size() == 3 && "Must capture two matched strings!");
 
-    auto L = StringSwitch<OptimizationLevel>(Matches[2])
-                 .Case("O0", O0)
-                 .Case("O1", O1)
-                 .Case("O2", O2)
-                 .Case("O3", O3)
-                 .Case("Os", Os)
-                 .Case("Oz", Oz);
+    OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
+        .Case("O0", O0)
+        .Case("O1", O1)
+        .Case("O2", O2)
+        .Case("O3", O3)
+        .Case("Os", Os)
+        .Case("Oz", Oz);
+    if (L == O0)
+      // At O0 we do nothing at all!
+      return true;
 
     if (Matches[1] == "default") {
-      addPerModuleDefaultPipeline(MPM, L, DebugLogging);
+      MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
     } else if (Matches[1] == "lto-pre-link") {
-      addLTOPreLinkDefaultPipeline(MPM, L, DebugLogging);
+      MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging));
     } else {
       assert(Matches[1] == "lto" && "Not one of the matched options!");
-      addLTODefaultPipeline(MPM, L, DebugLogging);
+      MPM.addPass(buildLTODefaultPipeline(L, DebugLogging));
     }
     return true;
   }
 
+  // Finally expand the basic registered passes from the .inc file.
 #define MODULE_PASS(NAME, CREATE_PASS)                                         \
   if (Name == NAME) {                                                          \
     MPM.addPass(CREATE_PASS);                                                  \
@@ -355,8 +834,9 @@ bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name,
   }
 #define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
   if (Name == "require<" NAME ">") {                                           \
-    MPM.addPass(RequireAnalysisPass<                                           \
-                std::remove_reference<decltype(CREATE_PASS)>::type>());        \
+    MPM.addPass(                                                               \
+        RequireAnalysisPass<                                                   \
+            std::remove_reference<decltype(CREATE_PASS)>::type, Module>());    \
     return true;                                                               \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
@@ -369,7 +849,55 @@ bool PassBuilder::parseModulePassName(ModulePassManager &MPM, StringRef Name,
   return false;
 }
 
-bool PassBuilder::parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name) {
+bool PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
+                                 const PipelineElement &E, bool VerifyEachPass,
+                                 bool DebugLogging) {
+  auto &Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "cgscc") {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
+                                  DebugLogging))
+        return false;
+      // Add the nested pass manager with the appropriate adaptor.
+      CGPM.addPass(std::move(NestedCGPM));
+      return true;
+    }
+    if (Name == "function") {
+      FunctionPassManager FPM(DebugLogging);
+      if (!parseFunctionPassPipeline(FPM, InnerPipeline, VerifyEachPass,
+                                     DebugLogging))
+        return false;
+      // Add the nested pass manager with the appropriate adaptor.
+      CGPM.addPass(
+          createCGSCCToFunctionPassAdaptor(std::move(FPM), DebugLogging));
+      return true;
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
+                                  DebugLogging))
+        return false;
+      CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
+      return true;
+    }
+    if (auto MaxRepetitions = parseDevirtPassName(Name)) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (!parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, VerifyEachPass,
+                                  DebugLogging))
+        return false;
+      CGPM.addPass(createDevirtSCCRepeatedPass(std::move(NestedCGPM),
+                                               *MaxRepetitions, DebugLogging));
+      return true;
+    }
+    // Normal passes can't have pipelines.
+    return false;
+  }
+
+  // Now expand the basic registered passes from the .inc file.
 #define CGSCC_PASS(NAME, CREATE_PASS)                                          \
   if (Name == NAME) {                                                          \
     CGPM.addPass(CREATE_PASS);                                                 \
@@ -378,7 +906,9 @@ bool PassBuilder::parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name) {
 #define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
   if (Name == "require<" NAME ">") {                                           \
     CGPM.addPass(RequireAnalysisPass<                                          \
-                 std::remove_reference<decltype(CREATE_PASS)>::type>());       \
+                 std::remove_reference<decltype(CREATE_PASS)>::type,           \
+                 LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,    \
+                 CGSCCUpdateResult &>());                                      \
     return true;                                                               \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
@@ -391,8 +921,45 @@ bool PassBuilder::parseCGSCCPassName(CGSCCPassManager &CGPM, StringRef Name) {
   return false;
 }
 
-bool PassBuilder::parseFunctionPassName(FunctionPassManager &FPM,
-                                        StringRef Name) {
+bool PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
+                                    const PipelineElement &E,
+                                    bool VerifyEachPass, bool DebugLogging) {
+  auto &Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "function") {
+      FunctionPassManager NestedFPM(DebugLogging);
+      if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass,
+                                     DebugLogging))
+        return false;
+      // Add the nested pass manager with the appropriate adaptor.
+      FPM.addPass(std::move(NestedFPM));
+      return true;
+    }
+    if (Name == "loop") {
+      LoopPassManager LPM(DebugLogging);
+      if (!parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
+                                 DebugLogging))
+        return false;
+      // Add the nested pass manager with the appropriate adaptor.
+      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
+      return true;
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      FunctionPassManager NestedFPM(DebugLogging);
+      if (!parseFunctionPassPipeline(NestedFPM, InnerPipeline, VerifyEachPass,
+                                     DebugLogging))
+        return false;
+      FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
+      return true;
+    }
+    // Normal passes can't have pipelines.
+    return false;
+  }
+
+  // Now expand the basic registered passes from the .inc file.
 #define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
   if (Name == NAME) {                                                          \
     FPM.addPass(CREATE_PASS);                                                  \
@@ -400,8 +967,9 @@ bool PassBuilder::parseFunctionPassName(FunctionPassManager &FPM,
   }
 #define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
   if (Name == "require<" NAME ">") {                                           \
-    FPM.addPass(RequireAnalysisPass<                                           \
-                std::remove_reference<decltype(CREATE_PASS)>::type>());        \
+    FPM.addPass(                                                               \
+        RequireAnalysisPass<                                                   \
+            std::remove_reference<decltype(CREATE_PASS)>::type, Function>());  \
     return true;                                                               \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
@@ -414,20 +982,48 @@ bool PassBuilder::parseFunctionPassName(FunctionPassManager &FPM,
   return false;
 }
 
-bool PassBuilder::parseLoopPassName(LoopPassManager &FPM, StringRef Name) {
+bool PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
+                                bool VerifyEachPass, bool DebugLogging) {
+  StringRef Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "loop") {
+      LoopPassManager NestedLPM(DebugLogging);
+      if (!parseLoopPassPipeline(NestedLPM, InnerPipeline, VerifyEachPass,
+                                 DebugLogging))
+        return false;
+      // Add the nested pass manager with the appropriate adaptor.
+      LPM.addPass(std::move(NestedLPM));
+      return true;
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      LoopPassManager NestedLPM(DebugLogging);
+      if (!parseLoopPassPipeline(NestedLPM, InnerPipeline, VerifyEachPass,
+                                 DebugLogging))
+        return false;
+      LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
+      return true;
+    }
+    // Normal passes can't have pipelines.
+    return false;
+  }
+
+  // Now expand the basic registered passes from the .inc file.
 #define LOOP_PASS(NAME, CREATE_PASS)                                           \
   if (Name == NAME) {                                                          \
-    FPM.addPass(CREATE_PASS);                                                  \
+    LPM.addPass(CREATE_PASS);                                                  \
     return true;                                                               \
   }
 #define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
   if (Name == "require<" NAME ">") {                                           \
-    FPM.addPass(RequireAnalysisPass<                                           \
-                std::remove_reference<decltype(CREATE_PASS)>::type>());        \
+    LPM.addPass(RequireAnalysisPass<                                           \
+                std::remove_reference<decltype(CREATE_PASS)>::type, Loop>());  \
     return true;                                                               \
   }                                                                            \
   if (Name == "invalidate<" NAME ">") {                                        \
-    FPM.addPass(InvalidateAnalysisPass<                                        \
+    LPM.addPass(InvalidateAnalysisPass<                                        \
                 std::remove_reference<decltype(CREATE_PASS)>::type>());        \
     return true;                                                               \
   }
@@ -455,148 +1051,40 @@ bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
 }
 
 bool PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
-                                        StringRef &PipelineText,
+                                        ArrayRef<PipelineElement> Pipeline,
                                         bool VerifyEachPass,
                                         bool DebugLogging) {
-  for (;;) {
-    // Parse nested pass managers by recursing.
-    if (PipelineText.startswith("loop(")) {
-      LoopPassManager NestedLPM(DebugLogging);
-
-      // Parse the inner pipeline inte the nested manager.
-      PipelineText = PipelineText.substr(strlen("loop("));
-      if (!parseLoopPassPipeline(NestedLPM, PipelineText, VerifyEachPass,
-                                 DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      LPM.addPass(std::move(NestedLPM));
-    } else {
-      // Otherwise try to parse a pass name.
-      size_t End = PipelineText.find_first_of(",)");
-      if (!parseLoopPassName(LPM, PipelineText.substr(0, End)))
-        return false;
-      // TODO: Ideally, we would run a LoopVerifierPass() here in the
-      // VerifyEachPass case, but we don't have such a verifier yet.
-
-      PipelineText = PipelineText.substr(End);
-    }
-
-    if (PipelineText.empty() || PipelineText[0] == ')')
-      return true;
-
-    assert(PipelineText[0] == ',');
-    PipelineText = PipelineText.substr(1);
+  for (const auto &Element : Pipeline) {
+    if (!parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
+      return false;
+    // FIXME: No verifier support for Loop passes!
   }
+  return true;
 }
 
 bool PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
-                                            StringRef &PipelineText,
+                                            ArrayRef<PipelineElement> Pipeline,
                                             bool VerifyEachPass,
                                             bool DebugLogging) {
-  for (;;) {
-    // Parse nested pass managers by recursing.
-    if (PipelineText.startswith("function(")) {
-      FunctionPassManager NestedFPM(DebugLogging);
-
-      // Parse the inner pipeline inte the nested manager.
-      PipelineText = PipelineText.substr(strlen("function("));
-      if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
-                                     DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      FPM.addPass(std::move(NestedFPM));
-    } else if (PipelineText.startswith("loop(")) {
-      LoopPassManager NestedLPM(DebugLogging);
-
-      // Parse the inner pipeline inte the nested manager.
-      PipelineText = PipelineText.substr(strlen("loop("));
-      if (!parseLoopPassPipeline(NestedLPM, PipelineText, VerifyEachPass,
-                                 DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      FPM.addPass(createFunctionToLoopPassAdaptor(std::move(NestedLPM)));
-    } else {
-      // Otherwise try to parse a pass name.
-      size_t End = PipelineText.find_first_of(",)");
-      if (!parseFunctionPassName(FPM, PipelineText.substr(0, End)))
-        return false;
-      if (VerifyEachPass)
-        FPM.addPass(VerifierPass());
-
-      PipelineText = PipelineText.substr(End);
-    }
-
-    if (PipelineText.empty() || PipelineText[0] == ')')
-      return true;
-
-    assert(PipelineText[0] == ',');
-    PipelineText = PipelineText.substr(1);
+  for (const auto &Element : Pipeline) {
+    if (!parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
+      return false;
+    if (VerifyEachPass)
+      FPM.addPass(VerifierPass());
   }
+  return true;
 }
 
 bool PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
-                                         StringRef &PipelineText,
+                                         ArrayRef<PipelineElement> Pipeline,
                                          bool VerifyEachPass,
                                          bool DebugLogging) {
-  for (;;) {
-    // Parse nested pass managers by recursing.
-    if (PipelineText.startswith("cgscc(")) {
-      CGSCCPassManager NestedCGPM(DebugLogging);
-
-      // Parse the inner pipeline into the nested manager.
-      PipelineText = PipelineText.substr(strlen("cgscc("));
-      if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass,
-                                  DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      CGPM.addPass(std::move(NestedCGPM));
-    } else if (PipelineText.startswith("function(")) {
-      FunctionPassManager NestedFPM(DebugLogging);
-
-      // Parse the inner pipeline inte the nested manager.
-      PipelineText = PipelineText.substr(strlen("function("));
-      if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
-                                     DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      CGPM.addPass(
-          createCGSCCToFunctionPassAdaptor(std::move(NestedFPM), DebugLogging));
-    } else {
-      // Otherwise try to parse a pass name.
-      size_t End = PipelineText.find_first_of(",)");
-      if (!parseCGSCCPassName(CGPM, PipelineText.substr(0, End)))
-        return false;
-      // FIXME: No verifier support for CGSCC passes!
-
-      PipelineText = PipelineText.substr(End);
-    }
-
-    if (PipelineText.empty() || PipelineText[0] == ')')
-      return true;
-
-    assert(PipelineText[0] == ',');
-    PipelineText = PipelineText.substr(1);
+  for (const auto &Element : Pipeline) {
+    if (!parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
+      return false;
+    // FIXME: No verifier support for CGSCC passes!
   }
+  return true;
 }
 
 void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
@@ -605,7 +1093,6 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
                                        ModuleAnalysisManager &MAM) {
   MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
   MAM.registerPass([&] { return CGSCCAnalysisManagerModuleProxy(CGAM); });
-  CGAM.registerPass([&] { return FunctionAnalysisManagerCGSCCProxy(FAM); });
   CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); });
   FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); });
   FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
@@ -614,71 +1101,16 @@ void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
 }
 
 bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
-                                          StringRef &PipelineText,
+                                          ArrayRef<PipelineElement> Pipeline,
                                           bool VerifyEachPass,
                                           bool DebugLogging) {
-  for (;;) {
-    // Parse nested pass managers by recursing.
-    if (PipelineText.startswith("module(")) {
-      ModulePassManager NestedMPM(DebugLogging);
-
-      // Parse the inner pipeline into the nested manager.
-      PipelineText = PipelineText.substr(strlen("module("));
-      if (!parseModulePassPipeline(NestedMPM, PipelineText, VerifyEachPass,
-                                   DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Now add the nested manager as a module pass.
-      MPM.addPass(std::move(NestedMPM));
-    } else if (PipelineText.startswith("cgscc(")) {
-      CGSCCPassManager NestedCGPM(DebugLogging);
-
-      // Parse the inner pipeline inte the nested manager.
-      PipelineText = PipelineText.substr(strlen("cgscc("));
-      if (!parseCGSCCPassPipeline(NestedCGPM, PipelineText, VerifyEachPass,
-                                  DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(NestedCGPM),
-                                                          DebugLogging));
-    } else if (PipelineText.startswith("function(")) {
-      FunctionPassManager NestedFPM(DebugLogging);
-
-      // Parse the inner pipeline inte the nested manager.
-      PipelineText = PipelineText.substr(strlen("function("));
-      if (!parseFunctionPassPipeline(NestedFPM, PipelineText, VerifyEachPass,
-                                     DebugLogging) ||
-          PipelineText.empty())
-        return false;
-      assert(PipelineText[0] == ')');
-      PipelineText = PipelineText.substr(1);
-
-      // Add the nested pass manager with the appropriate adaptor.
-      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(NestedFPM)));
-    } else {
-      // Otherwise try to parse a pass name.
-      size_t End = PipelineText.find_first_of(",)");
-      if (!parseModulePassName(MPM, PipelineText.substr(0, End), DebugLogging))
-        return false;
-      if (VerifyEachPass)
-        MPM.addPass(VerifierPass());
-
-      PipelineText = PipelineText.substr(End);
-    }
-
-    if (PipelineText.empty() || PipelineText[0] == ')')
-      return true;
-
-    assert(PipelineText[0] == ',');
-    PipelineText = PipelineText.substr(1);
+  for (const auto &Element : Pipeline) {
+    if (!parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
+      return false;
+    if (VerifyEachPass)
+      MPM.addPass(VerifierPass());
   }
+  return true;
 }
 
 // Primary pass pipeline description parsing routine.
@@ -687,61 +1119,37 @@ bool PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
 bool PassBuilder::parsePassPipeline(ModulePassManager &MPM,
                                     StringRef PipelineText, bool VerifyEachPass,
                                     bool DebugLogging) {
-  // By default, try to parse the pipeline as-if it were within an implicit
-  // 'module(...)' pass pipeline. If this will parse at all, it needs to
-  // consume the entire string.
-  if (parseModulePassPipeline(MPM, PipelineText, VerifyEachPass, DebugLogging))
-    return PipelineText.empty();
-
-  // This isn't parsable as a module pipeline, look for the end of a pass name
-  // and directly drop down to that layer.
-  StringRef FirstName =
-      PipelineText.substr(0, PipelineText.find_first_of(",)"));
-  assert(!isModulePassName(FirstName) &&
-         "Already handled all module pipeline options.");
-
-  // If this looks like a CGSCC pass, parse the whole thing as a CGSCC
-  // pipeline.
-  if (PipelineText.startswith("cgscc(") || isCGSCCPassName(FirstName)) {
-    CGSCCPassManager CGPM(DebugLogging);
-    if (!parseCGSCCPassPipeline(CGPM, PipelineText, VerifyEachPass,
-                                DebugLogging) ||
-        !PipelineText.empty())
+  auto Pipeline = parsePipelineText(PipelineText);
+  if (!Pipeline || Pipeline->empty())
+    return false;
+
+  // If the first name isn't at the module layer, wrap the pipeline up
+  // automatically.
+  StringRef FirstName = Pipeline->front().Name;
+
+  if (!isModulePassName(FirstName)) {
+    if (isCGSCCPassName(FirstName))
+      Pipeline = {{"cgscc", std::move(*Pipeline)}};
+    else if (isFunctionPassName(FirstName))
+      Pipeline = {{"function", std::move(*Pipeline)}};
+    else if (isLoopPassName(FirstName))
+      Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
+    else
+      // Unknown pass name!
       return false;
-    MPM.addPass(
-        createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM), DebugLogging));
-    return true;
   }
 
-  // Similarly, if this looks like a Function pass, parse the whole thing as
-  // a Function pipelien.
-  if (PipelineText.startswith("function(") || isFunctionPassName(FirstName)) {
-    FunctionPassManager FPM(DebugLogging);
-    if (!parseFunctionPassPipeline(FPM, PipelineText, VerifyEachPass,
-                                   DebugLogging) ||
-        !PipelineText.empty())
-      return false;
-    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
-    return true;
-  }
+  return parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging);
+}
 
-  // If this looks like a Loop pass, parse the whole thing as a Loop pipeline.
-  if (PipelineText.startswith("loop(") || isLoopPassName(FirstName)) {
-    LoopPassManager LPM(DebugLogging);
-    if (!parseLoopPassPipeline(LPM, PipelineText, VerifyEachPass,
-                               DebugLogging) ||
-        !PipelineText.empty())
-      return false;
-    FunctionPassManager FPM(DebugLogging);
-    FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM)));
-    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
+  // If the pipeline just consists of the word 'default' just replace the AA
+  // manager with our default one.
+  if (PipelineText == "default") {
+    AA = buildDefaultAAPipeline();
     return true;
   }
 
-  return false;
-}
-
-bool PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
   while (!PipelineText.empty()) {
     StringRef Name;
     std::tie(Name, PipelineText) = PipelineText.split(',');
diff --git a/contrib/llvm/lib/Passes/PassRegistry.def b/contrib/llvm/lib/Passes/PassRegistry.def
index b717057632b4..a9939fddb98c 100644
--- a/contrib/llvm/lib/Passes/PassRegistry.def
+++ b/contrib/llvm/lib/Passes/PassRegistry.def
@@ -21,6 +21,7 @@
 #endif
 MODULE_ANALYSIS("callgraph", CallGraphAnalysis())
 MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
+MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis())
 MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
 MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
 MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
@@ -37,13 +38,16 @@ MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
 #ifndef MODULE_PASS
 #define MODULE_PASS(NAME, CREATE_PASS)
 #endif
+MODULE_PASS("always-inline", AlwaysInlinerPass())
 MODULE_PASS("constmerge", ConstantMergePass())
 MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
 MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
 MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
 MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
+MODULE_PASS("function-import", FunctionImportPass())
 MODULE_PASS("globaldce", GlobalDCEPass())
 MODULE_PASS("globalopt", GlobalOptPass())
+MODULE_PASS("globalsplit", GlobalSplitPass())
 MODULE_PASS("inferattrs", InferFunctionAttrsPass())
 MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
 MODULE_PASS("instrprof", InstrProfiling())
@@ -51,6 +55,7 @@ MODULE_PASS("internalize", InternalizePass())
 MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 MODULE_PASS("ipsccp", IPSCCPPass())
 MODULE_PASS("lowertypetests", LowerTypeTestsPass())
+MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
 MODULE_PASS("no-op-module", NoOpModulePass())
 MODULE_PASS("partial-inliner", PartialInlinerPass())
 MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
@@ -62,6 +67,7 @@ MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
 MODULE_PASS("print", PrintModulePass(dbgs()))
 MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs()))
 MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
+MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
 MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
 MODULE_PASS("sample-profile", SampleProfileLoaderPass())
 MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
@@ -73,6 +79,7 @@ MODULE_PASS("verify", VerifierPass())
 #define CGSCC_ANALYSIS(NAME, CREATE_PASS)
 #endif
 CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
+CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy())
 #undef CGSCC_ANALYSIS
 
 #ifndef CGSCC_PASS
@@ -80,6 +87,7 @@ CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
 #endif
 CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
+CGSCC_PASS("inline", InlinerPass())
 CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
 #undef CGSCC_PASS
 
@@ -129,28 +137,38 @@ FUNCTION_PASS("adce", ADCEPass())
 FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass())
 FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass())
 FUNCTION_PASS("bdce", BDCEPass())
+FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
 FUNCTION_PASS("consthoist", ConstantHoistingPass())
 FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
 FUNCTION_PASS("dce", DCEPass())
 FUNCTION_PASS("dse", DSEPass())
-FUNCTION_PASS("early-cse", EarlyCSEPass())
+FUNCTION_PASS("dot-cfg", CFGPrinterPass())
+FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
+FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
+FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
 FUNCTION_PASS("gvn-hoist", GVNHoistPass())
 FUNCTION_PASS("instcombine", InstCombinePass())
 FUNCTION_PASS("instsimplify", InstSimplifierPass())
 FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
 FUNCTION_PASS("float2int", Float2IntPass())
 FUNCTION_PASS("no-op-function", NoOpFunctionPass())
+FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
 FUNCTION_PASS("loweratomic", LowerAtomicPass())
 FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
 FUNCTION_PASS("guard-widening", GuardWideningPass())
 FUNCTION_PASS("gvn", GVN())
 FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
+FUNCTION_PASS("lowerinvoke", LowerInvokePass())
 FUNCTION_PASS("mem2reg", PromotePass())
 FUNCTION_PASS("memcpyopt", MemCpyOptPass())
 FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
+FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
+FUNCTION_PASS("newgvn", NewGVNPass())
 FUNCTION_PASS("jump-threading", JumpThreadingPass())
 FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
 FUNCTION_PASS("lcssa", LCSSAPass())
+FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
 FUNCTION_PASS("loop-distribute", LoopDistributePass())
 FUNCTION_PASS("loop-vectorize", LoopVectorizePass())
 FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
@@ -170,13 +188,17 @@ FUNCTION_PASS("sccp", SCCPPass())
 FUNCTION_PASS("simplify-cfg", SimplifyCFGPass())
 FUNCTION_PASS("sink", SinkingPass())
 FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
+FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
 FUNCTION_PASS("sroa", SROA())
 FUNCTION_PASS("tailcallelim", TailCallElimPass())
 FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
 FUNCTION_PASS("verify", VerifierPass())
 FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
+FUNCTION_PASS("verify<loops>", LoopVerifierPass())
 FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
 FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
+FUNCTION_PASS("view-cfg", CFGViewerPass())
+FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
 #undef FUNCTION_PASS
 
 #ifndef LOOP_ANALYSIS
@@ -199,7 +221,9 @@ LOOP_PASS("no-op-loop", NoOpLoopPass())
 LOOP_PASS("print", PrintLoopPass(dbgs()))
 LOOP_PASS("loop-deletion", LoopDeletionPass())
 LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
+LOOP_PASS("strength-reduce", LoopStrengthReducePass())
 LOOP_PASS("indvars", IndVarSimplifyPass())
+LOOP_PASS("unroll", LoopUnrollPass())
 LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
 LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
 #undef LOOP_PASS
diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
index fcd4e24bdfcb..6d907c7098e0 100644
--- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
+++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMapping.cpp
@@ -183,72 +183,101 @@ void FunctionRecordIterator::skipOtherFiles() {
     *this = FunctionRecordIterator();
 }
 
+Error CoverageMapping::loadFunctionRecord(
+    const CoverageMappingRecord &Record,
+    IndexedInstrProfReader &ProfileReader) {
+  StringRef OrigFuncName = Record.FunctionName;
+  if (Record.Filenames.empty())
+    OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName);
+  else
+    OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
+
+  // Don't load records for functions we've already seen.
+  if (!FunctionNames.insert(OrigFuncName).second)
+    return Error::success();
+
+  CounterMappingContext Ctx(Record.Expressions);
+
+  std::vector<uint64_t> Counts;
+  if (Error E = ProfileReader.getFunctionCounts(Record.FunctionName,
+                                                Record.FunctionHash, Counts)) {
+    instrprof_error IPE = InstrProfError::take(std::move(E));
+    if (IPE == instrprof_error::hash_mismatch) {
+      MismatchedFunctionCount++;
+      return Error::success();
+    } else if (IPE != instrprof_error::unknown_function)
+      return make_error<InstrProfError>(IPE);
+    Counts.assign(Record.MappingRegions.size(), 0);
+  }
+  Ctx.setCounts(Counts);
+
+  assert(!Record.MappingRegions.empty() && "Function has no regions");
+
+  FunctionRecord Function(OrigFuncName, Record.Filenames);
+  for (const auto &Region : Record.MappingRegions) {
+    Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
+    if (auto E = ExecutionCount.takeError()) {
+      llvm::consumeError(std::move(E));
+      return Error::success();
+    }
+    Function.pushRegion(Region, *ExecutionCount);
+  }
+  if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
+    MismatchedFunctionCount++;
+    return Error::success();
+  }
+
+  Functions.push_back(std::move(Function));
+  return Error::success();
+}
+
 Expected<std::unique_ptr<CoverageMapping>>
 CoverageMapping::load(CoverageMappingReader &CoverageReader,
                       IndexedInstrProfReader &ProfileReader) {
   auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
 
-  std::vector<uint64_t> Counts;
-  for (const auto &Record : CoverageReader) {
-    CounterMappingContext Ctx(Record.Expressions);
-
-    Counts.clear();
-    if (Error E = ProfileReader.getFunctionCounts(
-            Record.FunctionName, Record.FunctionHash, Counts)) {
-      instrprof_error IPE = InstrProfError::take(std::move(E));
-      if (IPE == instrprof_error::hash_mismatch) {
-        Coverage->MismatchedFunctionCount++;
-        continue;
-      } else if (IPE != instrprof_error::unknown_function)
-        return make_error<InstrProfError>(IPE);
-      Counts.assign(Record.MappingRegions.size(), 0);
-    }
-    Ctx.setCounts(Counts);
+  for (const auto &Record : CoverageReader)
+    if (Error E = Coverage->loadFunctionRecord(Record, ProfileReader))
+      return std::move(E);
 
-    assert(!Record.MappingRegions.empty() && "Function has no regions");
+  return std::move(Coverage);
+}
 
-    StringRef OrigFuncName = Record.FunctionName;
-    if (Record.Filenames.empty())
-      OrigFuncName = getFuncNameWithoutPrefix(OrigFuncName);
-    else
-      OrigFuncName =
-          getFuncNameWithoutPrefix(OrigFuncName, Record.Filenames[0]);
-    FunctionRecord Function(OrigFuncName, Record.Filenames);
-    for (const auto &Region : Record.MappingRegions) {
-      Expected<int64_t> ExecutionCount = Ctx.evaluate(Region.Count);
-      if (auto E = ExecutionCount.takeError()) {
-        llvm::consumeError(std::move(E));
-        break;
-      }
-      Function.pushRegion(Region, *ExecutionCount);
-    }
-    if (Function.CountedRegions.size() != Record.MappingRegions.size()) {
-      Coverage->MismatchedFunctionCount++;
-      continue;
-    }
+Expected<std::unique_ptr<CoverageMapping>> CoverageMapping::load(
+    ArrayRef<std::unique_ptr<CoverageMappingReader>> CoverageReaders,
+    IndexedInstrProfReader &ProfileReader) {
+  auto Coverage = std::unique_ptr<CoverageMapping>(new CoverageMapping());
 
-    Coverage->Functions.push_back(std::move(Function));
-  }
+  for (const auto &CoverageReader : CoverageReaders)
+    for (const auto &Record : *CoverageReader)
+      if (Error E = Coverage->loadFunctionRecord(Record, ProfileReader))
+        return std::move(E);
 
   return std::move(Coverage);
 }
 
 Expected<std::unique_ptr<CoverageMapping>>
-CoverageMapping::load(StringRef ObjectFilename, StringRef ProfileFilename,
-                      StringRef Arch) {
-  auto CounterMappingBuff = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
-  if (std::error_code EC = CounterMappingBuff.getError())
-    return errorCodeToError(EC);
-  auto CoverageReaderOrErr =
-      BinaryCoverageReader::create(CounterMappingBuff.get(), Arch);
-  if (Error E = CoverageReaderOrErr.takeError())
-    return std::move(E);
-  auto CoverageReader = std::move(CoverageReaderOrErr.get());
+CoverageMapping::load(ArrayRef<StringRef> ObjectFilenames,
+                      StringRef ProfileFilename, StringRef Arch) {
   auto ProfileReaderOrErr = IndexedInstrProfReader::create(ProfileFilename);
   if (Error E = ProfileReaderOrErr.takeError())
     return std::move(E);
   auto ProfileReader = std::move(ProfileReaderOrErr.get());
-  return load(*CoverageReader, *ProfileReader);
+
+  SmallVector<std::unique_ptr<CoverageMappingReader>, 4> Readers;
+  SmallVector<std::unique_ptr<MemoryBuffer>, 4> Buffers;
+  for (StringRef ObjectFilename : ObjectFilenames) {
+    auto CovMappingBufOrErr = MemoryBuffer::getFileOrSTDIN(ObjectFilename);
+    if (std::error_code EC = CovMappingBufOrErr.getError())
+      return errorCodeToError(EC);
+    auto CoverageReaderOrErr =
+        BinaryCoverageReader::create(CovMappingBufOrErr.get(), Arch);
+    if (Error E = CoverageReaderOrErr.takeError())
+      return std::move(E);
+    Readers.push_back(std::move(CoverageReaderOrErr.get()));
+    Buffers.push_back(std::move(CovMappingBufOrErr.get()));
+  }
+  return load(Readers, *ProfileReader);
 }
 
 namespace {
@@ -560,7 +589,7 @@ std::string getCoverageMapErrString(coveragemap_error Err) {
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class CoverageMappingErrorCategoryType : public std::error_category {
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.coveragemap"; }
+  const char *name() const noexcept override { return "llvm.coveragemap"; }
   std::string message(int IE) const override {
     return getCoverageMapErrString(static_cast<coveragemap_error>(IE));
   }
diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index 1a4b4f590841..a6c7031ccd3d 100644
--- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -648,7 +648,7 @@ BinaryCoverageReader::create(std::unique_ptr<MemoryBuffer> &ObjectBuffer,
   StringRef Coverage;
   uint8_t BytesInAddress;
   support::endianness Endian;
-  Error E;
+  Error E = Error::success();
   consumeError(std::move(E));
   if (ObjectBuffer->getBuffer().startswith(TestingFormatMagic))
     // This is a special format used for testing.
diff --git a/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp b/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
index 8ff90d62cfdc..82356333b937 100644
--- a/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/Coverage/CoverageMappingWriter.cpp
@@ -108,8 +108,16 @@ static void writeCounter(ArrayRef<CounterExpression> Expressions, Counter C,
 
 void CoverageMappingWriter::write(raw_ostream &OS) {
   // Sort the regions in an ascending order by the file id and the starting
-  // location.
-  std::stable_sort(MappingRegions.begin(), MappingRegions.end());
+  // location. Sort by region kinds to ensure stable order for tests.
+  std::stable_sort(
+      MappingRegions.begin(), MappingRegions.end(),
+      [](const CounterMappingRegion &LHS, const CounterMappingRegion &RHS) {
+        if (LHS.FileID != RHS.FileID)
+          return LHS.FileID < RHS.FileID;
+        if (LHS.startLoc() != RHS.startLoc())
+          return LHS.startLoc() < RHS.startLoc();
+        return LHS.Kind < RHS.Kind;
+      });
 
   // Write out the fileid -> filename mapping.
   encodeULEB128(VirtualFileMapping.size(), OS);
diff --git a/contrib/llvm/lib/ProfileData/InstrProf.cpp b/contrib/llvm/lib/ProfileData/InstrProf.cpp
index 6962f82a5ef5..77c6ffc9c253 100644
--- a/contrib/llvm/lib/ProfileData/InstrProf.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProf.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -69,6 +70,8 @@ std::string getInstrProfErrString(instrprof_error Err) {
     return "Failed to compress data (zlib)";
   case instrprof_error::uncompress_failed:
     return "Failed to uncompress data (zlib)";
+  case instrprof_error::empty_raw_profile:
+    return "Empty raw profile file";
   }
   llvm_unreachable("A value of instrprof_error has no message.");
 }
@@ -77,7 +80,7 @@ std::string getInstrProfErrString(instrprof_error Err) {
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class InstrProfErrorCategoryType : public std::error_category {
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.instrprof"; }
+  const char *name() const noexcept override { return "llvm.instrprof"; }
   std::string message(int IE) const override {
     return getInstrProfErrString(static_cast<instrprof_error>(IE));
   }
@@ -135,6 +138,9 @@ std::string getPGOFuncName(StringRef RawFuncName,
 // (when \c InLTO is true): LTO's internalization privatizes many global linkage
 // symbols. This happens after value profile annotation, but those internal
 // linkage functions should not have a source prefix.
+// Additionally, for ThinLTO mode, exported internal functions are promoted
+// and renamed. We need to ensure that the original internal PGO name is
+// used when computing the GUID that is compared against the profiled GUIDs.
 // To differentiate compiler generated internal symbols from original ones,
 // PGOFuncName meta data are created and attached to the original internal
 // symbols in the value profile annotation step
@@ -780,4 +786,29 @@ void createPGOFuncNameMetadata(Function &F, StringRef PGOFuncName) {
   F.setMetadata(getPGOFuncNameMetadataName(), N);
 }
 
+bool needsComdatForCounter(const Function &F, const Module &M) {
+  if (F.hasComdat())
+    return true;
+
+  Triple TT(M.getTargetTriple());
+  if (!TT.isOSBinFormatELF())
+    return false;
+
+  // See createPGOFuncNameVar for more details. To avoid link errors, profile
+  // counters for function with available_externally linkage needs to be changed
+  // to linkonce linkage. On ELF based systems, this leads to weak symbols to be
+  // created. Without using comdat, duplicate entries won't be removed by the
+  // linker leading to increased data segement size and raw profile size. Even
+  // worse, since the referenced counter from profile per-function data object
+  // will be resolved to the common strong definition, the profile counts for
+  // available_externally functions will end up being duplicated in raw profile
+  // data. This can result in distorted profile as the counts of those dups
+  // will be accumulated by the profile merger.
+  GlobalValue::LinkageTypes Linkage = F.getLinkage();
+  if (Linkage != GlobalValue::ExternalWeakLinkage &&
+      Linkage != GlobalValue::AvailableExternallyLinkage)
+    return false;
+
+  return true;
+}
 } // end namespace llvm
diff --git a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
index 81c13b35ce30..ad407f07957f 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -46,6 +46,9 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
   if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
     return make_error<InstrProfError>(instrprof_error::too_large);
 
+  if (Buffer->getBufferSize() == 0)
+    return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
+
   std::unique_ptr<InstrProfReader> Result;
   // Create the reader.
   if (IndexedInstrProfReader::hasFormat(*Buffer))
@@ -286,7 +289,7 @@ Error RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
   if (CurrentPos + sizeof(RawInstrProf::Header) > End)
     return make_error<InstrProfError>(instrprof_error::malformed);
   // The writer ensures each profile is padded to start at an aligned address.
-  if (reinterpret_cast<size_t>(CurrentPos) % alignOf<uint64_t>())
+  if (reinterpret_cast<size_t>(CurrentPos) % alignof(uint64_t))
     return make_error<InstrProfError>(instrprof_error::malformed);
   // The magic should have the same byte order as in the previous header.
   uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
diff --git a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
index e25299ef6706..029d75660a73 100644
--- a/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
+++ b/contrib/llvm/lib/ProfileData/InstrProfWriter.cpp
@@ -13,10 +13,18 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ProfileData/InstrProfWriter.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/ProfileSummary.h"
+#include "llvm/ProfileData/ProfileCommon.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/OnDiskHashTable.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <string>
 #include <tuple>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -29,6 +37,7 @@ struct PatchItem {
 };
 
 namespace llvm {
+
 // A wrapper class to abstract writer stream with support of bytes
 // back patching.
 class ProfOStream {
@@ -40,6 +49,7 @@ public:
 
   uint64_t tell() { return OS.tell(); }
   void write(uint64_t V) { LE.write<uint64_t>(V); }
+
   // \c patch can only be called when all data is written and flushed.
   // For raw_string_ostream, the patch is done on the target string
   // directly and it won't be reflected in the stream's internal buffer.
@@ -65,6 +75,7 @@ public:
       }
     }
   }
+
   // If \c OS is an instance of \c raw_fd_ostream, this field will be
   // true. Otherwise, \c OS will be an raw_string_ostream.
   bool IsFDOStream;
@@ -139,7 +150,8 @@ public:
     }
   }
 };
-}
+
+} // end namespace llvm
 
 InstrProfWriter::InstrProfWriter(bool Sparse)
     : Sparse(Sparse), FunctionData(), ProfileKind(PF_Unknown),
@@ -152,6 +164,7 @@ void InstrProfWriter::setValueProfDataEndianness(
     support::endianness Endianness) {
   InfoObj->ValueProfDataEndianness = Endianness;
 }
+
 void InstrProfWriter::setOutputSparse(bool Sparse) {
   this->Sparse = Sparse;
 }
@@ -182,13 +195,20 @@ Error InstrProfWriter::addRecord(InstrProfRecord &&I, uint64_t Weight) {
   return Dest.takeError();
 }
 
+Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) {
+  for (auto &I : IPW.FunctionData)
+    for (auto &Func : I.getValue())
+      if (Error E = addRecord(std::move(Func.second), 1))
+        return E;
+  return Error::success();
+}
+
 bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
   if (!Sparse)
     return true;
   for (const auto &Func : PD) {
     const InstrProfRecord &IPR = Func.second;
-    if (std::any_of(IPR.Counts.begin(), IPR.Counts.end(),
-                    [](uint64_t Count) { return Count > 0; }))
+    if (any_of(IPR.Counts, [](uint64_t Count) { return Count > 0; }))
       return true;
   }
   return false;
@@ -261,7 +281,7 @@ void InstrProfWriter::writeImpl(ProfOStream &OS) {
   // structure to be serialized out (to disk or buffer).
   std::unique_ptr<ProfileSummary> PS = ISB.getSummary();
   setSummary(TheSummary.get(), *PS);
-  InfoObj->SummaryBuilder = 0;
+  InfoObj->SummaryBuilder = nullptr;
 
   // Now do the final patch:
   PatchItem PatchItems[] = {
diff --git a/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp b/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
index f8c3717007b3..9fb2ec1b39d9 100644
--- a/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
+++ b/contrib/llvm/lib/ProfileData/ProfileSummaryBuilder.cpp
@@ -59,14 +59,14 @@ void SampleProfileSummaryBuilder::addRecord(
 void ProfileSummaryBuilder::computeDetailedSummary() {
   if (DetailedSummaryCutoffs.empty())
     return;
-  auto Iter = CountFrequencies.begin();
-  auto End = CountFrequencies.end();
   std::sort(DetailedSummaryCutoffs.begin(), DetailedSummaryCutoffs.end());
+  auto Iter = CountFrequencies.begin();
+  const auto End = CountFrequencies.end();
 
   uint32_t CountsSeen = 0;
   uint64_t CurrSum = 0, Count = 0;
 
-  for (uint32_t Cutoff : DetailedSummaryCutoffs) {
+  for (const uint32_t Cutoff : DetailedSummaryCutoffs) {
     assert(Cutoff <= 999999);
     APInt Temp(128, TotalCount);
     APInt N(128, Cutoff);
diff --git a/contrib/llvm/lib/ProfileData/SampleProf.cpp b/contrib/llvm/lib/ProfileData/SampleProf.cpp
index cb0246113d80..5bcfff0801d5 100644
--- a/contrib/llvm/lib/ProfileData/SampleProf.cpp
+++ b/contrib/llvm/lib/ProfileData/SampleProf.cpp
@@ -24,7 +24,7 @@ namespace {
 // will be removed once this transition is complete. Clients should prefer to
 // deal with the Error value directly, rather than converting to error_code.
 class SampleProfErrorCategoryType : public std::error_category {
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.sampleprof"; }
+  const char *name() const noexcept override { return "llvm.sampleprof"; }
   std::string message(int IE) const override {
     sampleprof_error E = static_cast<sampleprof_error>(IE);
     switch (E) {
diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp
index f9370b85234f..30f0deab90a0 100644
--- a/contrib/llvm/lib/Support/APFloat.cpp
+++ b/contrib/llvm/lib/Support/APFloat.cpp
@@ -19,8 +19,10 @@
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include <cstring>
 #include <limits.h>
 
@@ -39,16 +41,15 @@ using namespace llvm;
 static_assert(integerPartWidth % 4 == 0, "Part width must be divisible by 4!");
 
 namespace llvm {
-
   /* Represents floating point arithmetic semantics.  */
   struct fltSemantics {
     /* The largest E such that 2^E is representable; this matches the
        definition of IEEE 754.  */
-    APFloat::ExponentType maxExponent;
+    APFloatBase::ExponentType maxExponent;
 
     /* The smallest E such that 2^E is a normalized number; this
        matches the definition of IEEE 754.  */
-    APFloat::ExponentType minExponent;
+    APFloatBase::ExponentType minExponent;
 
     /* Number of bits in the significand.  This includes the integer
        bit.  */
@@ -58,12 +59,12 @@ namespace llvm {
     unsigned int sizeInBits;
   };
 
-  const fltSemantics APFloat::IEEEhalf = { 15, -14, 11, 16 };
-  const fltSemantics APFloat::IEEEsingle = { 127, -126, 24, 32 };
-  const fltSemantics APFloat::IEEEdouble = { 1023, -1022, 53, 64 };
-  const fltSemantics APFloat::IEEEquad = { 16383, -16382, 113, 128 };
-  const fltSemantics APFloat::x87DoubleExtended = { 16383, -16382, 64, 80 };
-  const fltSemantics APFloat::Bogus = { 0, 0, 0, 0 };
+  static const fltSemantics semIEEEhalf = {15, -14, 11, 16};
+  static const fltSemantics semIEEEsingle = {127, -126, 24, 32};
+  static const fltSemantics semIEEEdouble = {1023, -1022, 53, 64};
+  static const fltSemantics semIEEEquad = {16383, -16382, 113, 128};
+  static const fltSemantics semX87DoubleExtended = {16383, -16382, 64, 80};
+  static const fltSemantics semBogus = {0, 0, 0, 0};
 
   /* The PowerPC format consists of two doubles.  It does not map cleanly
      onto the usual format above.  It is approximated using twice the
@@ -76,7 +77,40 @@ namespace llvm {
      to represent all possible values held by a PPC double-double number,
      for example: (long double) 1.0 + (long double) 0x1p-106
      Should this be replaced by a full emulation of PPC double-double?  */
-  const fltSemantics APFloat::PPCDoubleDouble = { 1023, -1022 + 53, 53 + 53, 128 };
+  static const fltSemantics semPPCDoubleDouble = {0, 0, 0, 0};
+
+  /* There are temporary semantics for the real PPCDoubleDouble implementation.
+     Currently, APFloat of PPCDoubleDouble holds one PPCDoubleDoubleImpl as the
+     high part of double double, and one IEEEdouble as the low part, so that
+     the old operations operate on PPCDoubleDoubleImpl, while the newly added
+     operations also populate the IEEEdouble.
+
+     TODO: Once all functions support DoubleAPFloat mode, we'll change all
+     PPCDoubleDoubleImpl to IEEEdouble and remove PPCDoubleDoubleImpl.  */
+  static const fltSemantics semPPCDoubleDoubleImpl = {1023, -1022 + 53, 53 + 53,
+                                                      128};
+
+  const fltSemantics &APFloatBase::IEEEhalf() {
+    return semIEEEhalf;
+  }
+  const fltSemantics &APFloatBase::IEEEsingle() {
+    return semIEEEsingle;
+  }
+  const fltSemantics &APFloatBase::IEEEdouble() {
+    return semIEEEdouble;
+  }
+  const fltSemantics &APFloatBase::IEEEquad() {
+    return semIEEEquad;
+  }
+  const fltSemantics &APFloatBase::x87DoubleExtended() {
+    return semX87DoubleExtended;
+  }
+  const fltSemantics &APFloatBase::Bogus() {
+    return semBogus;
+  }
+  const fltSemantics &APFloatBase::PPCDoubleDouble() {
+    return semPPCDoubleDouble;
+  }
 
   /* A tight upper bound on number of parts required to hold the value
      pow(5, power) is
@@ -94,6 +128,24 @@ namespace llvm {
   const unsigned int maxPowerOfFiveExponent = maxExponent + maxPrecision - 1;
   const unsigned int maxPowerOfFiveParts = 2 + ((maxPowerOfFiveExponent * 815)
                                                 / (351 * integerPartWidth));
+
+  unsigned int APFloatBase::semanticsPrecision(const fltSemantics &semantics) {
+    return semantics.precision;
+  }
+  APFloatBase::ExponentType
+  APFloatBase::semanticsMaxExponent(const fltSemantics &semantics) {
+    return semantics.maxExponent;
+  }
+  APFloatBase::ExponentType
+  APFloatBase::semanticsMinExponent(const fltSemantics &semantics) {
+    return semantics.minExponent;
+  }
+  unsigned int APFloatBase::semanticsSizeInBits(const fltSemantics &semantics) {
+    return semantics.sizeInBits;
+  }
+
+  unsigned APFloatBase::getSizeInBits(const fltSemantics &Sem) {
+    return Sem.sizeInBits;
 }
 
 /* A bunch of private, handy routines.  */
@@ -576,10 +628,9 @@ writeSignedDecimal (char *dst, int value)
   return dst;
 }
 
+namespace detail {
 /* Constructors.  */
-void
-APFloat::initialize(const fltSemantics *ourSemantics)
-{
+void IEEEFloat::initialize(const fltSemantics *ourSemantics) {
   unsigned int count;
 
   semantics = ourSemantics;
@@ -588,16 +639,12 @@ APFloat::initialize(const fltSemantics *ourSemantics)
     significand.parts = new integerPart[count];
 }
 
-void
-APFloat::freeSignificand()
-{
+void IEEEFloat::freeSignificand() {
   if (needsCleanup())
     delete [] significand.parts;
 }
 
-void
-APFloat::assign(const APFloat &rhs)
-{
+void IEEEFloat::assign(const IEEEFloat &rhs) {
   assert(semantics == rhs.semantics);
 
   sign = rhs.sign;
@@ -607,9 +654,7 @@ APFloat::assign(const APFloat &rhs)
     copySignificand(rhs);
 }
 
-void
-APFloat::copySignificand(const APFloat &rhs)
-{
+void IEEEFloat::copySignificand(const IEEEFloat &rhs) {
   assert(isFiniteNonZero() || category == fcNaN);
   assert(rhs.partCount() >= partCount());
 
@@ -620,8 +665,7 @@ APFloat::copySignificand(const APFloat &rhs)
 /* Make this number a NaN, with an arbitrary but deterministic value
    for the significand.  If double or longer, this is a signalling NaN,
    which may not be ideal.  If float, this is QNaN(0).  */
-void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
-{
+void IEEEFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill) {
   category = fcNaN;
   sign = Negative;
 
@@ -663,20 +707,11 @@ void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
   // For x87 extended precision, we want to make a NaN, not a
   // pseudo-NaN.  Maybe we should expose the ability to make
   // pseudo-NaNs?
-  if (semantics == &APFloat::x87DoubleExtended)
+  if (semantics == &semX87DoubleExtended)
     APInt::tcSetBit(significand, QNaNBit + 1);
 }
 
-APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
-                         const APInt *fill) {
-  APFloat value(Sem, uninitialized);
-  value.makeNaN(SNaN, Negative, fill);
-  return value;
-}
-
-APFloat &
-APFloat::operator=(const APFloat &rhs)
-{
+IEEEFloat &IEEEFloat::operator=(const IEEEFloat &rhs) {
   if (this != &rhs) {
     if (semantics != rhs.semantics) {
       freeSignificand();
@@ -688,8 +723,7 @@ APFloat::operator=(const APFloat &rhs)
   return *this;
 }
 
-APFloat &
-APFloat::operator=(APFloat &&rhs) {
+IEEEFloat &IEEEFloat::operator=(IEEEFloat &&rhs) {
   freeSignificand();
 
   semantics = rhs.semantics;
@@ -698,19 +732,17 @@ APFloat::operator=(APFloat &&rhs) {
   category = rhs.category;
   sign = rhs.sign;
 
-  rhs.semantics = &Bogus;
+  rhs.semantics = &semBogus;
   return *this;
 }
 
-bool
-APFloat::isDenormal() const {
+bool IEEEFloat::isDenormal() const {
   return isFiniteNonZero() && (exponent == semantics->minExponent) &&
          (APInt::tcExtractBit(significandParts(), 
                               semantics->precision - 1) == 0);
 }
 
-bool
-APFloat::isSmallest() const {
+bool IEEEFloat::isSmallest() const {
   // The smallest number by magnitude in our format will be the smallest
   // denormal, i.e. the floating point number with exponent being minimum
   // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0).
@@ -718,7 +750,7 @@ APFloat::isSmallest() const {
     significandMSB() == 0;
 }
 
-bool APFloat::isSignificandAllOnes() const {
+bool IEEEFloat::isSignificandAllOnes() const {
   // Test if the significand excluding the integral bit is all ones. This allows
   // us to test for binade boundaries.
   const integerPart *Parts = significandParts();
@@ -740,7 +772,7 @@ bool APFloat::isSignificandAllOnes() const {
   return true;
 }
 
-bool APFloat::isSignificandAllZeros() const {
+bool IEEEFloat::isSignificandAllZeros() const {
   // Test if the significand excluding the integral bit is all zeros. This
   // allows us to test for binade boundaries.
   const integerPart *Parts = significandParts();
@@ -762,25 +794,22 @@ bool APFloat::isSignificandAllZeros() const {
   return true;
 }
 
-bool
-APFloat::isLargest() const {
+bool IEEEFloat::isLargest() const {
   // The largest number by magnitude in our format will be the floating point
   // number with maximum exponent and with significand that is all ones.
   return isFiniteNonZero() && exponent == semantics->maxExponent
     && isSignificandAllOnes();
 }
 
-bool
-APFloat::isInteger() const {
+bool IEEEFloat::isInteger() const {
   // This could be made more efficient; I'm going for obviously correct.
   if (!isFinite()) return false;
-  APFloat truncated = *this;
+  IEEEFloat truncated = *this;
   truncated.roundToIntegral(rmTowardZero);
   return compare(truncated) == cmpEqual;
 }
 
-bool
-APFloat::bitwiseIsEqual(const APFloat &rhs) const {
+bool IEEEFloat::bitwiseIsEqual(const IEEEFloat &rhs) const {
   if (this == &rhs)
     return true;
   if (semantics != rhs.semantics ||
@@ -797,7 +826,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const {
                     rhs.significandParts());
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
+IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, integerPart value) {
   initialize(&ourSemantics);
   sign = 0;
   category = fcNormal;
@@ -807,93 +836,54 @@ APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) {
   normalize(rmNearestTiesToEven, lfExactlyZero);
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics) {
+IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics) {
   initialize(&ourSemantics);
   category = fcZero;
   sign = false;
 }
 
-APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
-  // Allocates storage if necessary but does not initialize it.
-  initialize(&ourSemantics);
-}
-
-APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) {
-  initialize(&ourSemantics);
-  convertFromString(text, rmNearestTiesToEven);
-}
+// Delegate to the previous constructor, because later copy constructor may
+// actually inspects category, which can't be garbage.
+IEEEFloat::IEEEFloat(const fltSemantics &ourSemantics, uninitializedTag tag)
+    : IEEEFloat(ourSemantics) {}
 
-APFloat::APFloat(const APFloat &rhs) {
+IEEEFloat::IEEEFloat(const IEEEFloat &rhs) {
   initialize(rhs.semantics);
   assign(rhs);
 }
 
-APFloat::APFloat(APFloat &&rhs) : semantics(&Bogus) {
+IEEEFloat::IEEEFloat(IEEEFloat &&rhs) : semantics(&semBogus) {
   *this = std::move(rhs);
 }
 
-APFloat::~APFloat()
-{
-  freeSignificand();
-}
+IEEEFloat::~IEEEFloat() { freeSignificand(); }
 
 // Profile - This method 'profiles' an APFloat for use with FoldingSet.
-void APFloat::Profile(FoldingSetNodeID& ID) const {
+void IEEEFloat::Profile(FoldingSetNodeID &ID) const {
   ID.Add(bitcastToAPInt());
 }
 
-unsigned int
-APFloat::partCount() const
-{
+unsigned int IEEEFloat::partCount() const {
   return partCountForBits(semantics->precision + 1);
 }
 
-unsigned int
-APFloat::semanticsPrecision(const fltSemantics &semantics)
-{
-  return semantics.precision;
-}
-APFloat::ExponentType
-APFloat::semanticsMaxExponent(const fltSemantics &semantics)
-{
-  return semantics.maxExponent;
-}
-APFloat::ExponentType
-APFloat::semanticsMinExponent(const fltSemantics &semantics)
-{
-  return semantics.minExponent;
-}
-unsigned int
-APFloat::semanticsSizeInBits(const fltSemantics &semantics)
-{
-  return semantics.sizeInBits;
-}
-
-const integerPart *
-APFloat::significandParts() const
-{
-  return const_cast<APFloat *>(this)->significandParts();
+const integerPart *IEEEFloat::significandParts() const {
+  return const_cast<IEEEFloat *>(this)->significandParts();
 }
 
-integerPart *
-APFloat::significandParts()
-{
+integerPart *IEEEFloat::significandParts() {
   if (partCount() > 1)
     return significand.parts;
   else
     return &significand.part;
 }
 
-void
-APFloat::zeroSignificand()
-{
+void IEEEFloat::zeroSignificand() {
   APInt::tcSet(significandParts(), 0, partCount());
 }
 
 /* Increment an fcNormal floating point number's significand.  */
-void
-APFloat::incrementSignificand()
-{
+void IEEEFloat::incrementSignificand() {
   integerPart carry;
 
   carry = APInt::tcIncrement(significandParts(), partCount());
@@ -904,9 +894,7 @@ APFloat::incrementSignificand()
 }
 
 /* Add the significand of the RHS.  Returns the carry flag.  */
-integerPart
-APFloat::addSignificand(const APFloat &rhs)
-{
+integerPart IEEEFloat::addSignificand(const IEEEFloat &rhs) {
   integerPart *parts;
 
   parts = significandParts();
@@ -919,9 +907,8 @@ APFloat::addSignificand(const APFloat &rhs)
 
 /* Subtract the significand of the RHS with a borrow flag.  Returns
    the borrow flag.  */
-integerPart
-APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
-{
+integerPart IEEEFloat::subtractSignificand(const IEEEFloat &rhs,
+                                           integerPart borrow) {
   integerPart *parts;
 
   parts = significandParts();
@@ -936,9 +923,8 @@ APFloat::subtractSignificand(const APFloat &rhs, integerPart borrow)
 /* Multiply the significand of the RHS.  If ADDEND is non-NULL, add it
    on to the full-precision result of the multiplication.  Returns the
    lost fraction.  */
-lostFraction
-APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
-{
+lostFraction IEEEFloat::multiplySignificand(const IEEEFloat &rhs,
+                                            const IEEEFloat *addend) {
   unsigned int omsb;        // One, not zero, based MSB.
   unsigned int partsCount, newPartsCount, precision;
   integerPart *lhsSignificand;
@@ -1011,7 +997,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
       significand.parts = fullSignificand;
     semantics = &extendedSemantics;
 
-    APFloat extendedAddend(*addend);
+    IEEEFloat extendedAddend(*addend);
     status = extendedAddend.convert(extendedSemantics, rmTowardZero, &ignored);
     assert(status == opOK);
     (void)status;
@@ -1045,7 +1031,8 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
   // the radix point (i.e. "MSB . rest-significant-bits").
   //
   // Note that the result is not normalized when "omsb < precision". So, the
-  // caller needs to call APFloat::normalize() if normalized value is expected.
+  // caller needs to call IEEEFloat::normalize() if normalized value is
+  // expected.
   if (omsb > precision) {
     unsigned int bits, significantParts;
     lostFraction lf;
@@ -1066,9 +1053,7 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend)
 }
 
 /* Multiply the significands of LHS and RHS to DST.  */
-lostFraction
-APFloat::divideSignificand(const APFloat &rhs)
-{
+lostFraction IEEEFloat::divideSignificand(const IEEEFloat &rhs) {
   unsigned int bit, i, partsCount;
   const integerPart *rhsSignificand;
   integerPart *lhsSignificand, *dividend, *divisor;
@@ -1150,22 +1135,16 @@ APFloat::divideSignificand(const APFloat &rhs)
   return lost_fraction;
 }
 
-unsigned int
-APFloat::significandMSB() const
-{
+unsigned int IEEEFloat::significandMSB() const {
   return APInt::tcMSB(significandParts(), partCount());
 }
 
-unsigned int
-APFloat::significandLSB() const
-{
+unsigned int IEEEFloat::significandLSB() const {
   return APInt::tcLSB(significandParts(), partCount());
 }
 
 /* Note that a zero result is NOT normalized to fcZero.  */
-lostFraction
-APFloat::shiftSignificandRight(unsigned int bits)
-{
+lostFraction IEEEFloat::shiftSignificandRight(unsigned int bits) {
   /* Our exponent should not overflow.  */
   assert((ExponentType) (exponent + bits) >= exponent);
 
@@ -1175,9 +1154,7 @@ APFloat::shiftSignificandRight(unsigned int bits)
 }
 
 /* Shift the significand left BITS bits, subtract BITS from its exponent.  */
-void
-APFloat::shiftSignificandLeft(unsigned int bits)
-{
+void IEEEFloat::shiftSignificandLeft(unsigned int bits) {
   assert(bits < semantics->precision);
 
   if (bits) {
@@ -1190,9 +1167,8 @@ APFloat::shiftSignificandLeft(unsigned int bits)
   }
 }
 
-APFloat::cmpResult
-APFloat::compareAbsoluteValue(const APFloat &rhs) const
-{
+IEEEFloat::cmpResult
+IEEEFloat::compareAbsoluteValue(const IEEEFloat &rhs) const {
   int compare;
 
   assert(semantics == rhs.semantics);
@@ -1217,9 +1193,7 @@ APFloat::compareAbsoluteValue(const APFloat &rhs) const
 
 /* Handle overflow.  Sign is preserved.  We either become infinity or
    the largest finite number.  */
-APFloat::opStatus
-APFloat::handleOverflow(roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::handleOverflow(roundingMode rounding_mode) {
   /* Infinity?  */
   if (rounding_mode == rmNearestTiesToEven ||
       rounding_mode == rmNearestTiesToAway ||
@@ -1243,11 +1217,9 @@ APFloat::handleOverflow(roundingMode rounding_mode)
    would need to be rounded away from zero (i.e., by increasing the
    signficand).  This routine must work for fcZero of both signs, and
    fcNormal numbers.  */
-bool
-APFloat::roundAwayFromZero(roundingMode rounding_mode,
-                           lostFraction lost_fraction,
-                           unsigned int bit) const
-{
+bool IEEEFloat::roundAwayFromZero(roundingMode rounding_mode,
+                                  lostFraction lost_fraction,
+                                  unsigned int bit) const {
   /* NaNs and infinities should not have lost fractions.  */
   assert(isFiniteNonZero() || category == fcZero);
 
@@ -1280,10 +1252,8 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode,
   llvm_unreachable("Invalid rounding mode found");
 }
 
-APFloat::opStatus
-APFloat::normalize(roundingMode rounding_mode,
-                   lostFraction lost_fraction)
-{
+IEEEFloat::opStatus IEEEFloat::normalize(roundingMode rounding_mode,
+                                         lostFraction lost_fraction) {
   unsigned int omsb;                /* One, not zero, based MSB.  */
   int exponentChange;
 
@@ -1388,9 +1358,8 @@ APFloat::normalize(roundingMode rounding_mode,
   return (opStatus) (opUnderflow | opInexact);
 }
 
-APFloat::opStatus
-APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
-{
+IEEEFloat::opStatus IEEEFloat::addOrSubtractSpecials(const IEEEFloat &rhs,
+                                                     bool subtract) {
   switch (PackCategoriesIntoKey(category, rhs.category)) {
   default:
     llvm_unreachable(nullptr);
@@ -1445,9 +1414,8 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract)
 }
 
 /* Add or subtract two normal numbers.  */
-lostFraction
-APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
-{
+lostFraction IEEEFloat::addOrSubtractSignificand(const IEEEFloat &rhs,
+                                                 bool subtract) {
   integerPart carry;
   lostFraction lost_fraction;
   int bits;
@@ -1461,7 +1429,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
 
   /* Subtraction is more subtle than one might naively expect.  */
   if (subtract) {
-    APFloat temp_rhs(rhs);
+    IEEEFloat temp_rhs(rhs);
     bool reverse;
 
     if (bits == 0) {
@@ -1500,7 +1468,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
     (void)carry;
   } else {
     if (bits > 0) {
-      APFloat temp_rhs(rhs);
+      IEEEFloat temp_rhs(rhs);
 
       lost_fraction = temp_rhs.shiftSignificandRight(bits);
       carry = addSignificand(temp_rhs);
@@ -1517,9 +1485,7 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract)
   return lost_fraction;
 }
 
-APFloat::opStatus
-APFloat::multiplySpecials(const APFloat &rhs)
-{
+IEEEFloat::opStatus IEEEFloat::multiplySpecials(const IEEEFloat &rhs) {
   switch (PackCategoriesIntoKey(category, rhs.category)) {
   default:
     llvm_unreachable(nullptr);
@@ -1561,9 +1527,7 @@ APFloat::multiplySpecials(const APFloat &rhs)
   }
 }
 
-APFloat::opStatus
-APFloat::divideSpecials(const APFloat &rhs)
-{
+IEEEFloat::opStatus IEEEFloat::divideSpecials(const IEEEFloat &rhs) {
   switch (PackCategoriesIntoKey(category, rhs.category)) {
   default:
     llvm_unreachable(nullptr);
@@ -1602,9 +1566,7 @@ APFloat::divideSpecials(const APFloat &rhs)
   }
 }
 
-APFloat::opStatus
-APFloat::modSpecials(const APFloat &rhs)
-{
+IEEEFloat::opStatus IEEEFloat::modSpecials(const IEEEFloat &rhs) {
   switch (PackCategoriesIntoKey(category, rhs.category)) {
   default:
     llvm_unreachable(nullptr);
@@ -1640,32 +1602,25 @@ APFloat::modSpecials(const APFloat &rhs)
 }
 
 /* Change sign.  */
-void
-APFloat::changeSign()
-{
+void IEEEFloat::changeSign() {
   /* Look mummy, this one's easy.  */
   sign = !sign;
 }
 
-void
-APFloat::clearSign()
-{
+void IEEEFloat::clearSign() {
   /* So is this one. */
   sign = 0;
 }
 
-void
-APFloat::copySign(const APFloat &rhs)
-{
+void IEEEFloat::copySign(const IEEEFloat &rhs) {
   /* And this one. */
   sign = rhs.sign;
 }
 
 /* Normalized addition or subtraction.  */
-APFloat::opStatus
-APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
-                       bool subtract)
-{
+IEEEFloat::opStatus IEEEFloat::addOrSubtract(const IEEEFloat &rhs,
+                                             roundingMode rounding_mode,
+                                             bool subtract) {
   opStatus fs;
 
   fs = addOrSubtractSpecials(rhs, subtract);
@@ -1693,23 +1648,20 @@ APFloat::addOrSubtract(const APFloat &rhs, roundingMode rounding_mode,
 }
 
 /* Normalized addition.  */
-APFloat::opStatus
-APFloat::add(const APFloat &rhs, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::add(const IEEEFloat &rhs,
+                                   roundingMode rounding_mode) {
   return addOrSubtract(rhs, rounding_mode, false);
 }
 
 /* Normalized subtraction.  */
-APFloat::opStatus
-APFloat::subtract(const APFloat &rhs, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::subtract(const IEEEFloat &rhs,
+                                        roundingMode rounding_mode) {
   return addOrSubtract(rhs, rounding_mode, true);
 }
 
 /* Normalized multiply.  */
-APFloat::opStatus
-APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::multiply(const IEEEFloat &rhs,
+                                        roundingMode rounding_mode) {
   opStatus fs;
 
   sign ^= rhs.sign;
@@ -1726,9 +1678,8 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode)
 }
 
 /* Normalized divide.  */
-APFloat::opStatus
-APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::divide(const IEEEFloat &rhs,
+                                      roundingMode rounding_mode) {
   opStatus fs;
 
   sign ^= rhs.sign;
@@ -1745,11 +1696,9 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode)
 }
 
 /* Normalized remainder.  This is not currently correct in all cases.  */
-APFloat::opStatus
-APFloat::remainder(const APFloat &rhs)
-{
+IEEEFloat::opStatus IEEEFloat::remainder(const IEEEFloat &rhs) {
   opStatus fs;
-  APFloat V = *this;
+  IEEEFloat V = *this;
   unsigned int origSign = sign;
 
   fs = V.divide(rhs, rmNearestTiesToEven);
@@ -1761,8 +1710,10 @@ APFloat::remainder(const APFloat &rhs)
   bool ignored;
   fs = V.convertToInteger(x, parts * integerPartWidth, true,
                           rmNearestTiesToEven, &ignored);
-  if (fs==opInvalidOp)
+  if (fs==opInvalidOp) {
+    delete[] x;
     return fs;
+  }
 
   fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
                                         rmNearestTiesToEven);
@@ -1782,14 +1733,12 @@ APFloat::remainder(const APFloat &rhs)
 
 /* Normalized llvm frem (C fmod).
    This is not currently correct in all cases.  */
-APFloat::opStatus
-APFloat::mod(const APFloat &rhs)
-{
+IEEEFloat::opStatus IEEEFloat::mod(const IEEEFloat &rhs) {
   opStatus fs;
   fs = modSpecials(rhs);
 
   if (isFiniteNonZero() && rhs.isFiniteNonZero()) {
-    APFloat V = *this;
+    IEEEFloat V = *this;
     unsigned int origSign = sign;
 
     fs = V.divide(rhs, rmNearestTiesToEven);
@@ -1801,8 +1750,10 @@ APFloat::mod(const APFloat &rhs)
     bool ignored;
     fs = V.convertToInteger(x, parts * integerPartWidth, true,
                             rmTowardZero, &ignored);
-    if (fs==opInvalidOp)
+    if (fs==opInvalidOp) {
+      delete[] x;
       return fs;
+    }
 
     fs = V.convertFromZeroExtendedInteger(x, parts * integerPartWidth, true,
                                           rmNearestTiesToEven);
@@ -1822,11 +1773,9 @@ APFloat::mod(const APFloat &rhs)
 }
 
 /* Normalized fused-multiply-add.  */
-APFloat::opStatus
-APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
-                          const APFloat &addend,
-                          roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::fusedMultiplyAdd(const IEEEFloat &multiplicand,
+                                                const IEEEFloat &addend,
+                                                roundingMode rounding_mode) {
   opStatus fs;
 
   /* Post-multiplication sign, before addition.  */
@@ -1867,7 +1816,7 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand,
 }
 
 /* Rounding-mode corrrect round to integral value.  */
-APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
+IEEEFloat::opStatus IEEEFloat::roundToIntegral(roundingMode rounding_mode) {
   opStatus fs;
 
   // If the exponent is large enough, we know that this value is already
@@ -1884,7 +1833,7 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
   // addition instead.
   APInt IntegerConstant(NextPowerOf2(semanticsPrecision(*semantics)), 1);
   IntegerConstant <<= semanticsPrecision(*semantics)-1;
-  APFloat MagicConstant(*semantics);
+  IEEEFloat MagicConstant(*semantics);
   fs = MagicConstant.convertFromAPInt(IntegerConstant, false,
                                       rmNearestTiesToEven);
   MagicConstant.copySign(*this);
@@ -1910,9 +1859,7 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) {
 
 
 /* Comparison requires normalized numbers.  */
-APFloat::cmpResult
-APFloat::compare(const APFloat &rhs) const
-{
+IEEEFloat::cmpResult IEEEFloat::compare(const IEEEFloat &rhs) const {
   cmpResult result;
 
   assert(semantics == rhs.semantics);
@@ -1982,17 +1929,16 @@ APFloat::compare(const APFloat &rhs) const
   return result;
 }
 
-/// APFloat::convert - convert a value of one floating point type to another.
+/// IEEEFloat::convert - convert a value of one floating point type to another.
 /// The return value corresponds to the IEEE754 exceptions.  *losesInfo
 /// records whether the transformation lost information, i.e. whether
 /// converting the result back to the original type will produce the
 /// original value (this is almost the same as return value==fsOK, but there
 /// are edge cases where this is not so).
 
-APFloat::opStatus
-APFloat::convert(const fltSemantics &toSemantics,
-                 roundingMode rounding_mode, bool *losesInfo)
-{
+IEEEFloat::opStatus IEEEFloat::convert(const fltSemantics &toSemantics,
+                                       roundingMode rounding_mode,
+                                       bool *losesInfo) {
   lostFraction lostFraction;
   unsigned int newPartCount, oldPartCount;
   opStatus fs;
@@ -2005,8 +1951,8 @@ APFloat::convert(const fltSemantics &toSemantics,
   shift = toSemantics.precision - fromSemantics.precision;
 
   bool X86SpecialNan = false;
-  if (&fromSemantics == &APFloat::x87DoubleExtended &&
-      &toSemantics != &APFloat::x87DoubleExtended && category == fcNaN &&
+  if (&fromSemantics == &semX87DoubleExtended &&
+      &toSemantics != &semX87DoubleExtended && category == fcNaN &&
       (!(*significandParts() & 0x8000000000000000ULL) ||
        !(*significandParts() & 0x4000000000000000ULL))) {
     // x86 has some unusual NaNs which cannot be represented in any other
@@ -2070,7 +2016,7 @@ APFloat::convert(const fltSemantics &toSemantics,
 
     // For x87 extended precision, we want to make a NaN, not a special NaN if
     // the input wasn't special either.
-    if (!X86SpecialNan && semantics == &APFloat::x87DoubleExtended)
+    if (!X86SpecialNan && semantics == &semX87DoubleExtended)
       APInt::tcSetBit(significandParts(), semantics->precision - 1);
 
     // gcc forces the Quiet bit on, which means (float)(double)(float_sNan)
@@ -2096,12 +2042,9 @@ APFloat::convert(const fltSemantics &toSemantics,
 
    Note that for conversions to integer type the C standard requires
    round-to-zero to always be used.  */
-APFloat::opStatus
-APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
-                                      bool isSigned,
-                                      roundingMode rounding_mode,
-                                      bool *isExact) const
-{
+IEEEFloat::opStatus IEEEFloat::convertToSignExtendedInteger(
+    integerPart *parts, unsigned int width, bool isSigned,
+    roundingMode rounding_mode, bool *isExact) const {
   lostFraction lost_fraction;
   const integerPart *src;
   unsigned int dstPartsCount, truncatedBits;
@@ -2208,11 +2151,11 @@ APFloat::convertToSignExtendedInteger(integerPart *parts, unsigned int width,
    the original value.  This is almost equivalent to result==opOK,
    except for negative zeroes.
 */
-APFloat::opStatus
-APFloat::convertToInteger(integerPart *parts, unsigned int width,
-                          bool isSigned,
-                          roundingMode rounding_mode, bool *isExact) const
-{
+IEEEFloat::opStatus IEEEFloat::convertToInteger(integerPart *parts,
+                                                unsigned int width,
+                                                bool isSigned,
+                                                roundingMode rounding_mode,
+                                                bool *isExact) const {
   opStatus fs;
 
   fs = convertToSignExtendedInteger(parts, width, isSigned, rounding_mode,
@@ -2242,10 +2185,9 @@ APFloat::convertToInteger(integerPart *parts, unsigned int width,
    an APSInt, whose initial bit-width and signed-ness are used to determine the
    precision of the conversion.
  */
-APFloat::opStatus
-APFloat::convertToInteger(APSInt &result,
-                          roundingMode rounding_mode, bool *isExact) const
-{
+IEEEFloat::opStatus IEEEFloat::convertToInteger(APSInt &result,
+                                                roundingMode rounding_mode,
+                                                bool *isExact) const {
   unsigned bitWidth = result.getBitWidth();
   SmallVector<uint64_t, 4> parts(result.getNumWords());
   opStatus status = convertToInteger(
@@ -2258,11 +2200,8 @@ APFloat::convertToInteger(APSInt &result,
 /* Convert an unsigned integer SRC to a floating point number,
    rounding according to ROUNDING_MODE.  The sign of the floating
    point number is not modified.  */
-APFloat::opStatus
-APFloat::convertFromUnsignedParts(const integerPart *src,
-                                  unsigned int srcCount,
-                                  roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::convertFromUnsignedParts(
+    const integerPart *src, unsigned int srcCount, roundingMode rounding_mode) {
   unsigned int omsb, precision, dstCount;
   integerPart *dst;
   lostFraction lost_fraction;
@@ -2289,11 +2228,8 @@ APFloat::convertFromUnsignedParts(const integerPart *src,
   return normalize(rounding_mode, lost_fraction);
 }
 
-APFloat::opStatus
-APFloat::convertFromAPInt(const APInt &Val,
-                          bool isSigned,
-                          roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::convertFromAPInt(const APInt &Val, bool isSigned,
+                                                roundingMode rounding_mode) {
   unsigned int partCount = Val.getNumWords();
   APInt api = Val;
 
@@ -2309,12 +2245,10 @@ APFloat::convertFromAPInt(const APInt &Val,
 /* Convert a two's complement integer SRC to a floating point number,
    rounding according to ROUNDING_MODE.  ISSIGNED is true if the
    integer is signed, in which case it must be sign-extended.  */
-APFloat::opStatus
-APFloat::convertFromSignExtendedInteger(const integerPart *src,
-                                        unsigned int srcCount,
-                                        bool isSigned,
-                                        roundingMode rounding_mode)
-{
+IEEEFloat::opStatus
+IEEEFloat::convertFromSignExtendedInteger(const integerPart *src,
+                                          unsigned int srcCount, bool isSigned,
+                                          roundingMode rounding_mode) {
   opStatus status;
 
   if (isSigned &&
@@ -2337,11 +2271,10 @@ APFloat::convertFromSignExtendedInteger(const integerPart *src,
 }
 
 /* FIXME: should this just take a const APInt reference?  */
-APFloat::opStatus
-APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
-                                        unsigned int width, bool isSigned,
-                                        roundingMode rounding_mode)
-{
+IEEEFloat::opStatus
+IEEEFloat::convertFromZeroExtendedInteger(const integerPart *parts,
+                                          unsigned int width, bool isSigned,
+                                          roundingMode rounding_mode) {
   unsigned int partCount = partCountForBits(width);
   APInt api = APInt(width, makeArrayRef(parts, partCount));
 
@@ -2354,9 +2287,9 @@ APFloat::convertFromZeroExtendedInteger(const integerPart *parts,
   return convertFromUnsignedParts(api.getRawData(), partCount, rounding_mode);
 }
 
-APFloat::opStatus
-APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus
+IEEEFloat::convertFromHexadecimalString(StringRef s,
+                                        roundingMode rounding_mode) {
   lostFraction lost_fraction = lfExactlyZero;
 
   category = fcNormal;
@@ -2434,11 +2367,10 @@ APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode)
   return normalize(rounding_mode, lost_fraction);
 }
 
-APFloat::opStatus
-APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
-                                      unsigned sigPartCount, int exp,
-                                      roundingMode rounding_mode)
-{
+IEEEFloat::opStatus
+IEEEFloat::roundSignificandWithExponent(const integerPart *decSigParts,
+                                        unsigned sigPartCount, int exp,
+                                        roundingMode rounding_mode) {
   unsigned int parts, pow5PartCount;
   fltSemantics calcSemantics = { 32767, -32767, 0, 0 };
   integerPart pow5Parts[maxPowerOfFiveParts];
@@ -2460,8 +2392,9 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
     excessPrecision = calcSemantics.precision - semantics->precision;
     truncatedBits = excessPrecision;
 
-    APFloat decSig = APFloat::getZero(calcSemantics, sign);
-    APFloat pow5(calcSemantics);
+    IEEEFloat decSig(calcSemantics, uninitialized);
+    decSig.makeZero(sign);
+    IEEEFloat pow5(calcSemantics);
 
     sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount,
                                                 rmNearestTiesToEven);
@@ -2519,9 +2452,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts,
   }
 }
 
-APFloat::opStatus
-APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus
+IEEEFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) {
   decimalInfo D;
   opStatus fs;
 
@@ -2637,8 +2569,7 @@ APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode)
   return fs;
 }
 
-bool
-APFloat::convertFromStringSpecials(StringRef str) {
+bool IEEEFloat::convertFromStringSpecials(StringRef str) {
   if (str.equals("inf") || str.equals("INFINITY")) {
     makeInf(false);
     return true;
@@ -2662,9 +2593,8 @@ APFloat::convertFromStringSpecials(StringRef str) {
   return false;
 }
 
-APFloat::opStatus
-APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
-{
+IEEEFloat::opStatus IEEEFloat::convertFromString(StringRef str,
+                                                 roundingMode rounding_mode) {
   assert(!str.empty() && "Invalid string length");
 
   // Handle special cases.
@@ -2714,10 +2644,9 @@ APFloat::convertFromString(StringRef str, roundingMode rounding_mode)
    1 (normal numbers) or 2 (normal numbers rounded-away-from-zero with
    any other digits zero).
 */
-unsigned int
-APFloat::convertToHexString(char *dst, unsigned int hexDigits,
-                            bool upperCase, roundingMode rounding_mode) const
-{
+unsigned int IEEEFloat::convertToHexString(char *dst, unsigned int hexDigits,
+                                           bool upperCase,
+                                           roundingMode rounding_mode) const {
   char *p;
 
   p = dst;
@@ -2762,11 +2691,9 @@ APFloat::convertToHexString(char *dst, unsigned int hexDigits,
    form of a normal floating point number with the specified number of
    hexadecimal digits.  If HEXDIGITS is zero the minimum number of
    digits necessary to print the value precisely is output.  */
-char *
-APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
-                                  bool upperCase,
-                                  roundingMode rounding_mode) const
-{
+char *IEEEFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
+                                          bool upperCase,
+                                          roundingMode rounding_mode) const {
   unsigned int count, valueBits, shift, partsCount, outputDigits;
   const char *hexDigitChars;
   const integerPart *significand;
@@ -2866,7 +2793,7 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits,
   return writeSignedDecimal (dst, exponent);
 }
 
-hash_code llvm::hash_value(const APFloat &Arg) {
+hash_code hash_value(const IEEEFloat &Arg) {
   if (!Arg.isFiniteNonZero())
     return hash_combine((uint8_t)Arg.category,
                         // NaN has no sign, fix it at zero.
@@ -2890,10 +2817,8 @@ hash_code llvm::hash_value(const APFloat &Arg) {
 // Denormals have exponent minExponent in APFloat, but minExponent-1 in
 // the actual IEEE respresentations.  We compensate for that here.
 
-APInt
-APFloat::convertF80LongDoubleAPFloatToAPInt() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended);
+APInt IEEEFloat::convertF80LongDoubleAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended);
   assert(partCount()==2);
 
   uint64_t myexponent, mysignificand;
@@ -2922,10 +2847,8 @@ APFloat::convertF80LongDoubleAPFloatToAPInt() const
   return APInt(80, words);
 }
 
-APInt
-APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&PPCDoubleDouble);
+APInt IEEEFloat::convertPPCDoubleDoubleAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleImpl);
   assert(partCount()==2);
 
   uint64_t words[2];
@@ -2939,14 +2862,14 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
   // Declare fltSemantics before APFloat that uses it (and
   // saves pointer to it) to ensure correct destruction order.
   fltSemantics extendedSemantics = *semantics;
-  extendedSemantics.minExponent = IEEEdouble.minExponent;
-  APFloat extended(*this);
+  extendedSemantics.minExponent = semIEEEdouble.minExponent;
+  IEEEFloat extended(*this);
   fs = extended.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo);
   assert(fs == opOK && !losesInfo);
   (void)fs;
 
-  APFloat u(extended);
-  fs = u.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
+  IEEEFloat u(extended);
+  fs = u.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
   assert(fs == opOK || fs == opInexact);
   (void)fs;
   words[0] = *u.convertDoubleAPFloatToAPInt().getRawData();
@@ -2960,9 +2883,9 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
     assert(fs == opOK && !losesInfo);
     (void)fs;
 
-    APFloat v(extended);
+    IEEEFloat v(extended);
     v.subtract(u, rmNearestTiesToEven);
-    fs = v.convert(IEEEdouble, rmNearestTiesToEven, &losesInfo);
+    fs = v.convert(semIEEEdouble, rmNearestTiesToEven, &losesInfo);
     assert(fs == opOK && !losesInfo);
     (void)fs;
     words[1] = *v.convertDoubleAPFloatToAPInt().getRawData();
@@ -2973,10 +2896,8 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const
   return APInt(128, words);
 }
 
-APInt
-APFloat::convertQuadrupleAPFloatToAPInt() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&IEEEquad);
+APInt IEEEFloat::convertQuadrupleAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEquad);
   assert(partCount()==2);
 
   uint64_t myexponent, mysignificand, mysignificand2;
@@ -3009,10 +2930,8 @@ APFloat::convertQuadrupleAPFloatToAPInt() const
   return APInt(128, words);
 }
 
-APInt
-APFloat::convertDoubleAPFloatToAPInt() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble);
+APInt IEEEFloat::convertDoubleAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble);
   assert(partCount()==1);
 
   uint64_t myexponent, mysignificand;
@@ -3039,10 +2958,8 @@ APFloat::convertDoubleAPFloatToAPInt() const
                      (mysignificand & 0xfffffffffffffLL))));
 }
 
-APInt
-APFloat::convertFloatAPFloatToAPInt() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle);
+APInt IEEEFloat::convertFloatAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle);
   assert(partCount()==1);
 
   uint32_t myexponent, mysignificand;
@@ -3068,10 +2985,8 @@ APFloat::convertFloatAPFloatToAPInt() const
                     (mysignificand & 0x7fffff)));
 }
 
-APInt
-APFloat::convertHalfAPFloatToAPInt() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&IEEEhalf);
+APInt IEEEFloat::convertHalfAPFloatToAPInt() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEhalf);
   assert(partCount()==1);
 
   uint32_t myexponent, mysignificand;
@@ -3101,42 +3016,36 @@ APFloat::convertHalfAPFloatToAPInt() const
 // point constant as it would appear in memory.  It is not a conversion,
 // and treating the result as a normal integer is unlikely to be useful.
 
-APInt
-APFloat::bitcastToAPInt() const
-{
-  if (semantics == (const llvm::fltSemantics*)&IEEEhalf)
+APInt IEEEFloat::bitcastToAPInt() const {
+  if (semantics == (const llvm::fltSemantics*)&semIEEEhalf)
     return convertHalfAPFloatToAPInt();
 
-  if (semantics == (const llvm::fltSemantics*)&IEEEsingle)
+  if (semantics == (const llvm::fltSemantics*)&semIEEEsingle)
     return convertFloatAPFloatToAPInt();
 
-  if (semantics == (const llvm::fltSemantics*)&IEEEdouble)
+  if (semantics == (const llvm::fltSemantics*)&semIEEEdouble)
     return convertDoubleAPFloatToAPInt();
 
-  if (semantics == (const llvm::fltSemantics*)&IEEEquad)
+  if (semantics == (const llvm::fltSemantics*)&semIEEEquad)
     return convertQuadrupleAPFloatToAPInt();
 
-  if (semantics == (const llvm::fltSemantics*)&PPCDoubleDouble)
+  if (semantics == (const llvm::fltSemantics *)&semPPCDoubleDoubleImpl)
     return convertPPCDoubleDoubleAPFloatToAPInt();
 
-  assert(semantics == (const llvm::fltSemantics*)&x87DoubleExtended &&
+  assert(semantics == (const llvm::fltSemantics*)&semX87DoubleExtended &&
          "unknown format!");
   return convertF80LongDoubleAPFloatToAPInt();
 }
 
-float
-APFloat::convertToFloat() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&IEEEsingle &&
+float IEEEFloat::convertToFloat() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEsingle &&
          "Float semantics are not IEEEsingle");
   APInt api = bitcastToAPInt();
   return api.bitsToFloat();
 }
 
-double
-APFloat::convertToDouble() const
-{
-  assert(semantics == (const llvm::fltSemantics*)&IEEEdouble &&
+double IEEEFloat::convertToDouble() const {
+  assert(semantics == (const llvm::fltSemantics*)&semIEEEdouble &&
          "Float semantics are not IEEEdouble");
   APInt api = bitcastToAPInt();
   return api.bitsToDouble();
@@ -3149,16 +3058,14 @@ APFloat::convertToDouble() const
 ///  exponent = 0, integer bit 1 ("pseudodenormal")
 ///  exponent!=0 nor all 1's, integer bit 0 ("unnormal")
 /// At the moment, the first two are treated as NaNs, the second two as Normal.
-void
-APFloat::initFromF80LongDoubleAPInt(const APInt &api)
-{
+void IEEEFloat::initFromF80LongDoubleAPInt(const APInt &api) {
   assert(api.getBitWidth()==80);
   uint64_t i1 = api.getRawData()[0];
   uint64_t i2 = api.getRawData()[1];
   uint64_t myexponent = (i2 & 0x7fff);
   uint64_t mysignificand = i1;
 
-  initialize(&APFloat::x87DoubleExtended);
+  initialize(&semX87DoubleExtended);
   assert(partCount()==2);
 
   sign = static_cast<unsigned int>(i2>>15);
@@ -3183,9 +3090,7 @@ APFloat::initFromF80LongDoubleAPInt(const APInt &api)
   }
 }
 
-void
-APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
-{
+void IEEEFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) {
   assert(api.getBitWidth()==128);
   uint64_t i1 = api.getRawData()[0];
   uint64_t i2 = api.getRawData()[1];
@@ -3194,14 +3099,14 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
 
   // Get the first double and convert to our format.
   initFromDoubleAPInt(APInt(64, i1));
-  fs = convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
+  fs = convert(semPPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo);
   assert(fs == opOK && !losesInfo);
   (void)fs;
 
   // Unless we have a special case, add in second double.
   if (isFiniteNonZero()) {
-    APFloat v(IEEEdouble, APInt(64, i2));
-    fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo);
+    IEEEFloat v(semIEEEdouble, APInt(64, i2));
+    fs = v.convert(semPPCDoubleDoubleImpl, rmNearestTiesToEven, &losesInfo);
     assert(fs == opOK && !losesInfo);
     (void)fs;
 
@@ -3209,9 +3114,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api)
   }
 }
 
-void
-APFloat::initFromQuadrupleAPInt(const APInt &api)
-{
+void IEEEFloat::initFromQuadrupleAPInt(const APInt &api) {
   assert(api.getBitWidth()==128);
   uint64_t i1 = api.getRawData()[0];
   uint64_t i2 = api.getRawData()[1];
@@ -3219,7 +3122,7 @@ APFloat::initFromQuadrupleAPInt(const APInt &api)
   uint64_t mysignificand  = i1;
   uint64_t mysignificand2 = i2 & 0xffffffffffffLL;
 
-  initialize(&APFloat::IEEEquad);
+  initialize(&semIEEEquad);
   assert(partCount()==2);
 
   sign = static_cast<unsigned int>(i2>>63);
@@ -3249,15 +3152,13 @@ APFloat::initFromQuadrupleAPInt(const APInt &api)
   }
 }
 
-void
-APFloat::initFromDoubleAPInt(const APInt &api)
-{
+void IEEEFloat::initFromDoubleAPInt(const APInt &api) {
   assert(api.getBitWidth()==64);
   uint64_t i = *api.getRawData();
   uint64_t myexponent = (i >> 52) & 0x7ff;
   uint64_t mysignificand = i & 0xfffffffffffffLL;
 
-  initialize(&APFloat::IEEEdouble);
+  initialize(&semIEEEdouble);
   assert(partCount()==1);
 
   sign = static_cast<unsigned int>(i>>63);
@@ -3282,15 +3183,13 @@ APFloat::initFromDoubleAPInt(const APInt &api)
   }
 }
 
-void
-APFloat::initFromFloatAPInt(const APInt & api)
-{
+void IEEEFloat::initFromFloatAPInt(const APInt &api) {
   assert(api.getBitWidth()==32);
   uint32_t i = (uint32_t)*api.getRawData();
   uint32_t myexponent = (i >> 23) & 0xff;
   uint32_t mysignificand = i & 0x7fffff;
 
-  initialize(&APFloat::IEEEsingle);
+  initialize(&semIEEEsingle);
   assert(partCount()==1);
 
   sign = i >> 31;
@@ -3315,15 +3214,13 @@ APFloat::initFromFloatAPInt(const APInt & api)
   }
 }
 
-void
-APFloat::initFromHalfAPInt(const APInt & api)
-{
+void IEEEFloat::initFromHalfAPInt(const APInt &api) {
   assert(api.getBitWidth()==16);
   uint32_t i = (uint32_t)*api.getRawData();
   uint32_t myexponent = (i >> 10) & 0x1f;
   uint32_t mysignificand = i & 0x3ff;
 
-  initialize(&APFloat::IEEEhalf);
+  initialize(&semIEEEhalf);
   assert(partCount()==1);
 
   sign = i >> 15;
@@ -3352,53 +3249,26 @@ APFloat::initFromHalfAPInt(const APInt & api)
 /// we infer the floating point type from the size of the APInt.  The
 /// isIEEE argument distinguishes between PPC128 and IEEE128 (not meaningful
 /// when the size is anything else).
-void
-APFloat::initFromAPInt(const fltSemantics* Sem, const APInt& api)
-{
-  if (Sem == &IEEEhalf)
+void IEEEFloat::initFromAPInt(const fltSemantics *Sem, const APInt &api) {
+  if (Sem == &semIEEEhalf)
     return initFromHalfAPInt(api);
-  if (Sem == &IEEEsingle)
+  if (Sem == &semIEEEsingle)
     return initFromFloatAPInt(api);
-  if (Sem == &IEEEdouble)
+  if (Sem == &semIEEEdouble)
     return initFromDoubleAPInt(api);
-  if (Sem == &x87DoubleExtended)
+  if (Sem == &semX87DoubleExtended)
     return initFromF80LongDoubleAPInt(api);
-  if (Sem == &IEEEquad)
+  if (Sem == &semIEEEquad)
     return initFromQuadrupleAPInt(api);
-  if (Sem == &PPCDoubleDouble)
+  if (Sem == &semPPCDoubleDoubleImpl)
     return initFromPPCDoubleDoubleAPInt(api);
 
   llvm_unreachable(nullptr);
 }
 
-APFloat
-APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE)
-{
-  switch (BitWidth) {
-  case 16:
-    return APFloat(IEEEhalf, APInt::getAllOnesValue(BitWidth));
-  case 32:
-    return APFloat(IEEEsingle, APInt::getAllOnesValue(BitWidth));
-  case 64:
-    return APFloat(IEEEdouble, APInt::getAllOnesValue(BitWidth));
-  case 80:
-    return APFloat(x87DoubleExtended, APInt::getAllOnesValue(BitWidth));
-  case 128:
-    if (isIEEE)
-      return APFloat(IEEEquad, APInt::getAllOnesValue(BitWidth));
-    return APFloat(PPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
-  default:
-    llvm_unreachable("Unknown floating bit width");
-  }
-}
-
-unsigned APFloat::getSizeInBits(const fltSemantics &Sem) {
-  return Sem.sizeInBits;
-}
-
 /// Make this number the largest magnitude normal number in the given
 /// semantics.
-void APFloat::makeLargest(bool Negative) {
+void IEEEFloat::makeLargest(bool Negative) {
   // We want (in interchange format):
   //   sign = {Negative}
   //   exponent = 1..10
@@ -3423,7 +3293,7 @@ void APFloat::makeLargest(bool Negative) {
 
 /// Make this number the smallest magnitude denormal number in the given
 /// semantics.
-void APFloat::makeSmallest(bool Negative) {
+void IEEEFloat::makeSmallest(bool Negative) {
   // We want (in interchange format):
   //   sign = {Negative}
   //   exponent = 0..0
@@ -3434,55 +3304,30 @@ void APFloat::makeSmallest(bool Negative) {
   APInt::tcSet(significandParts(), 1, partCount());
 }
 
-
-APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) {
-  // We want (in interchange format):
-  //   sign = {Negative}
-  //   exponent = 1..10
-  //   significand = 1..1
-  APFloat Val(Sem, uninitialized);
-  Val.makeLargest(Negative);
-  return Val;
-}
-
-APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) {
-  // We want (in interchange format):
-  //   sign = {Negative}
-  //   exponent = 0..0
-  //   significand = 0..01
-  APFloat Val(Sem, uninitialized);
-  Val.makeSmallest(Negative);
-  return Val;
-}
-
-APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) {
-  APFloat Val(Sem, uninitialized);
-
+void IEEEFloat::makeSmallestNormalized(bool Negative) {
   // We want (in interchange format):
   //   sign = {Negative}
   //   exponent = 0..0
   //   significand = 10..0
 
-  Val.category = fcNormal;
-  Val.zeroSignificand();
-  Val.sign = Negative;
-  Val.exponent = Sem.minExponent;
-  Val.significandParts()[partCountForBits(Sem.precision)-1] |=
-    (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth));
-
-  return Val;
+  category = fcNormal;
+  zeroSignificand();
+  sign = Negative;
+  exponent = semantics->minExponent;
+  significandParts()[partCountForBits(semantics->precision) - 1] |=
+      (((integerPart)1) << ((semantics->precision - 1) % integerPartWidth));
 }
 
-APFloat::APFloat(const fltSemantics &Sem, const APInt &API) {
+IEEEFloat::IEEEFloat(const fltSemantics &Sem, const APInt &API) {
   initFromAPInt(&Sem, API);
 }
 
-APFloat::APFloat(float f) {
-  initFromAPInt(&IEEEsingle, APInt::floatToBits(f));
+IEEEFloat::IEEEFloat(float f) {
+  initFromAPInt(&semIEEEsingle, APInt::floatToBits(f));
 }
 
-APFloat::APFloat(double d) {
-  initFromAPInt(&IEEEdouble, APInt::doubleToBits(d));
+IEEEFloat::IEEEFloat(double d) {
+  initFromAPInt(&semIEEEdouble, APInt::doubleToBits(d));
 }
 
 namespace {
@@ -3569,9 +3414,8 @@ namespace {
   }
 }
 
-void APFloat::toString(SmallVectorImpl<char> &Str,
-                       unsigned FormatPrecision,
-                       unsigned FormatMaxPadding) const {
+void IEEEFloat::toString(SmallVectorImpl<char> &Str, unsigned FormatPrecision,
+                         unsigned FormatMaxPadding) const {
   switch (category) {
   case fcInfinity:
     if (isNegative())
@@ -3772,7 +3616,7 @@ void APFloat::toString(SmallVectorImpl<char> &Str,
     Str.push_back(buffer[NDigits-I-1]);
 }
 
-bool APFloat::getExactInverse(APFloat *inv) const {
+bool IEEEFloat::getExactInverse(IEEEFloat *inv) const {
   // Special floats and denormals have no exact inverse.
   if (!isFiniteNonZero())
     return false;
@@ -3783,7 +3627,7 @@ bool APFloat::getExactInverse(APFloat *inv) const {
     return false;
 
   // Get the inverse.
-  APFloat reciprocal(*semantics, 1ULL);
+  IEEEFloat reciprocal(*semantics, 1ULL);
   if (reciprocal.divide(*this, rmNearestTiesToEven) != opOK)
     return false;
 
@@ -3801,7 +3645,7 @@ bool APFloat::getExactInverse(APFloat *inv) const {
   return true;
 }
 
-bool APFloat::isSignaling() const {
+bool IEEEFloat::isSignaling() const {
   if (!isNaN())
     return false;
 
@@ -3814,7 +3658,7 @@ bool APFloat::isSignaling() const {
 ///
 /// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with
 /// appropriate sign switching before/after the computation.
-APFloat::opStatus APFloat::next(bool nextDown) {
+IEEEFloat::opStatus IEEEFloat::next(bool nextDown) {
   // If we are performing nextDown, swap sign so we have -x.
   if (nextDown)
     changeSign();
@@ -3930,46 +3774,44 @@ APFloat::opStatus APFloat::next(bool nextDown) {
   return result;
 }
 
-void
-APFloat::makeInf(bool Negative) {
+void IEEEFloat::makeInf(bool Negative) {
   category = fcInfinity;
   sign = Negative;
   exponent = semantics->maxExponent + 1;
   APInt::tcSet(significandParts(), 0, partCount());
 }
 
-void
-APFloat::makeZero(bool Negative) {
+void IEEEFloat::makeZero(bool Negative) {
   category = fcZero;
   sign = Negative;
   exponent = semantics->minExponent-1;
   APInt::tcSet(significandParts(), 0, partCount());
 }
 
-void APFloat::makeQuiet() {
+void IEEEFloat::makeQuiet() {
   assert(isNaN());
   APInt::tcSetBit(significandParts(), semantics->precision - 2);
 }
 
-int llvm::ilogb(const APFloat &Arg) {
+int ilogb(const IEEEFloat &Arg) {
   if (Arg.isNaN())
-    return APFloat::IEK_NaN;
+    return IEEEFloat::IEK_NaN;
   if (Arg.isZero())
-    return APFloat::IEK_Zero;
+    return IEEEFloat::IEK_Zero;
   if (Arg.isInfinity())
-    return APFloat::IEK_Inf;
+    return IEEEFloat::IEK_Inf;
   if (!Arg.isDenormal())
     return Arg.exponent;
 
-  APFloat Normalized(Arg);
+  IEEEFloat Normalized(Arg);
   int SignificandBits = Arg.getSemantics().precision - 1;
 
   Normalized.exponent += SignificandBits;
-  Normalized.normalize(APFloat::rmNearestTiesToEven, lfExactlyZero);
+  Normalized.normalize(IEEEFloat::rmNearestTiesToEven, lfExactlyZero);
   return Normalized.exponent - SignificandBits;
 }
 
-APFloat llvm::scalbn(APFloat X, int Exp, APFloat::roundingMode RoundingMode) {
+IEEEFloat scalbn(IEEEFloat X, int Exp, IEEEFloat::roundingMode RoundingMode) {
   auto MaxExp = X.getSemantics().maxExponent;
   auto MinExp = X.getSemantics().minExponent;
 
@@ -3990,21 +3832,359 @@ APFloat llvm::scalbn(APFloat X, int Exp, APFloat::roundingMode RoundingMode) {
   return X;
 }
 
-APFloat llvm::frexp(const APFloat &Val, int &Exp, APFloat::roundingMode RM) {
+IEEEFloat frexp(const IEEEFloat &Val, int &Exp, IEEEFloat::roundingMode RM) {
   Exp = ilogb(Val);
 
   // Quiet signalling nans.
-  if (Exp == APFloat::IEK_NaN) {
-    APFloat Quiet(Val);
+  if (Exp == IEEEFloat::IEK_NaN) {
+    IEEEFloat Quiet(Val);
     Quiet.makeQuiet();
     return Quiet;
   }
 
-  if (Exp == APFloat::IEK_Inf)
+  if (Exp == IEEEFloat::IEK_Inf)
     return Val;
 
   // 1 is added because frexp is defined to return a normalized fraction in
   // +/-[0.5, 1.0), rather than the usual +/-[1.0, 2.0).
-  Exp = Exp == APFloat::IEK_Zero ? 0 : Exp + 1;
+  Exp = Exp == IEEEFloat::IEK_Zero ? 0 : Exp + 1;
   return scalbn(Val, -Exp, RM);
 }
+
+DoubleAPFloat::DoubleAPFloat(const fltSemantics &S)
+    : Semantics(&S), Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl),
+                                           APFloat(semIEEEdouble)}) {
+  assert(Semantics == &semPPCDoubleDouble);
+}
+
+DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, uninitializedTag)
+    : Semantics(&S),
+      Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl, uninitialized),
+                            APFloat(semIEEEdouble, uninitialized)}) {
+  assert(Semantics == &semPPCDoubleDouble);
+}
+
+DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, integerPart I)
+    : Semantics(&S), Floats(new APFloat[2]{APFloat(semPPCDoubleDoubleImpl, I),
+                                           APFloat(semIEEEdouble)}) {
+  assert(Semantics == &semPPCDoubleDouble);
+}
+
+DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, const APInt &I)
+    : Semantics(&S), Floats(new APFloat[2]{
+                         APFloat(semPPCDoubleDoubleImpl, I),
+                         APFloat(semIEEEdouble, APInt(64, I.getRawData()[1]))}) {
+  assert(Semantics == &semPPCDoubleDouble);
+}
+
+DoubleAPFloat::DoubleAPFloat(const fltSemantics &S, APFloat &&First,
+                             APFloat &&Second)
+    : Semantics(&S),
+      Floats(new APFloat[2]{std::move(First), std::move(Second)}) {
+  assert(Semantics == &semPPCDoubleDouble);
+  // TODO Check for First == &IEEEdouble once the transition is done.
+  assert(&Floats[0].getSemantics() == &semPPCDoubleDoubleImpl ||
+         &Floats[0].getSemantics() == &semIEEEdouble);
+  assert(&Floats[1].getSemantics() == &semIEEEdouble);
+}
+
+DoubleAPFloat::DoubleAPFloat(const DoubleAPFloat &RHS)
+    : Semantics(RHS.Semantics),
+      Floats(RHS.Floats ? new APFloat[2]{APFloat(RHS.Floats[0]),
+                                         APFloat(RHS.Floats[1])}
+                        : nullptr) {
+  assert(Semantics == &semPPCDoubleDouble);
+}
+
+DoubleAPFloat::DoubleAPFloat(DoubleAPFloat &&RHS)
+    : Semantics(RHS.Semantics), Floats(std::move(RHS.Floats)) {
+  RHS.Semantics = &semBogus;
+  assert(Semantics == &semPPCDoubleDouble);
+}
+
+DoubleAPFloat &DoubleAPFloat::operator=(const DoubleAPFloat &RHS) {
+  if (Semantics == RHS.Semantics && RHS.Floats) {
+    Floats[0] = RHS.Floats[0];
+    Floats[1] = RHS.Floats[1];
+  } else if (this != &RHS) {
+    this->~DoubleAPFloat();
+    new (this) DoubleAPFloat(RHS);
+  }
+  return *this;
+}
+
+// "Software for Doubled-Precision Floating-Point Computations",
+// by Seppo Linnainmaa, ACM TOMS vol 7 no 3, September 1981, pages 272-283.
+APFloat::opStatus DoubleAPFloat::addImpl(const APFloat &a, const APFloat &aa,
+                                         const APFloat &c, const APFloat &cc,
+                                         roundingMode RM) {
+  int Status = opOK;
+  APFloat z = a;
+  Status |= z.add(c, RM);
+  if (!z.isFinite()) {
+    if (!z.isInfinity()) {
+      Floats[0] = std::move(z);
+      Floats[1].makeZero(false);
+      return (opStatus)Status;
+    }
+    Status = opOK;
+    auto AComparedToC = a.compareAbsoluteValue(c);
+    z = cc;
+    Status |= z.add(aa, RM);
+    if (AComparedToC == APFloat::cmpGreaterThan) {
+      // z = cc + aa + c + a;
+      Status |= z.add(c, RM);
+      Status |= z.add(a, RM);
+    } else {
+      // z = cc + aa + a + c;
+      Status |= z.add(a, RM);
+      Status |= z.add(c, RM);
+    }
+    if (!z.isFinite()) {
+      Floats[0] = std::move(z);
+      Floats[1].makeZero(false);
+      return (opStatus)Status;
+    }
+    Floats[0] = z;
+    APFloat zz = aa;
+    Status |= zz.add(cc, RM);
+    if (AComparedToC == APFloat::cmpGreaterThan) {
+      // Floats[1] = a - z + c + zz;
+      Floats[1] = a;
+      Status |= Floats[1].subtract(z, RM);
+      Status |= Floats[1].add(c, RM);
+      Status |= Floats[1].add(zz, RM);
+    } else {
+      // Floats[1] = c - z + a + zz;
+      Floats[1] = c;
+      Status |= Floats[1].subtract(z, RM);
+      Status |= Floats[1].add(a, RM);
+      Status |= Floats[1].add(zz, RM);
+    }
+  } else {
+    // q = a - z;
+    APFloat q = a;
+    Status |= q.subtract(z, RM);
+
+    // zz = q + c + (a - (q + z)) + aa + cc;
+    // Compute a - (q + z) as -((q + z) - a) to avoid temporary copies.
+    auto zz = q;
+    Status |= zz.add(c, RM);
+    Status |= q.add(z, RM);
+    Status |= q.subtract(a, RM);
+    q.changeSign();
+    Status |= zz.add(q, RM);
+    Status |= zz.add(aa, RM);
+    Status |= zz.add(cc, RM);
+    if (zz.isZero() && !zz.isNegative()) {
+      Floats[0] = std::move(z);
+      Floats[1].makeZero(false);
+      return opOK;
+    }
+    Floats[0] = z;
+    Status |= Floats[0].add(zz, RM);
+    if (!Floats[0].isFinite()) {
+      Floats[1].makeZero(false);
+      return (opStatus)Status;
+    }
+    Floats[1] = std::move(z);
+    Status |= Floats[1].subtract(Floats[0], RM);
+    Status |= Floats[1].add(zz, RM);
+  }
+  return (opStatus)Status;
+}
+
+APFloat::opStatus DoubleAPFloat::addWithSpecial(const DoubleAPFloat &LHS,
+                                                const DoubleAPFloat &RHS,
+                                                DoubleAPFloat &Out,
+                                                roundingMode RM) {
+  if (LHS.getCategory() == fcNaN) {
+    Out = LHS;
+    return opOK;
+  }
+  if (RHS.getCategory() == fcNaN) {
+    Out = RHS;
+    return opOK;
+  }
+  if (LHS.getCategory() == fcZero) {
+    Out = RHS;
+    return opOK;
+  }
+  if (RHS.getCategory() == fcZero) {
+    Out = LHS;
+    return opOK;
+  }
+  if (LHS.getCategory() == fcInfinity && RHS.getCategory() == fcInfinity &&
+      LHS.isNegative() != RHS.isNegative()) {
+    Out.makeNaN(false, Out.isNegative(), nullptr);
+    return opInvalidOp;
+  }
+  if (LHS.getCategory() == fcInfinity) {
+    Out = LHS;
+    return opOK;
+  }
+  if (RHS.getCategory() == fcInfinity) {
+    Out = RHS;
+    return opOK;
+  }
+  assert(LHS.getCategory() == fcNormal && RHS.getCategory() == fcNormal);
+
+  // These conversions will go away once PPCDoubleDoubleImpl goes away.
+  // (PPCDoubleDoubleImpl, IEEEDouble) -> (IEEEDouble, IEEEDouble)
+  APFloat A(semIEEEdouble,
+            APInt(64, LHS.Floats[0].bitcastToAPInt().getRawData()[0])),
+      AA(LHS.Floats[1]),
+      C(semIEEEdouble, APInt(64, RHS.Floats[0].bitcastToAPInt().getRawData()[0])),
+      CC(RHS.Floats[1]);
+  assert(&AA.getSemantics() == &semIEEEdouble);
+  assert(&CC.getSemantics() == &semIEEEdouble);
+  Out.Floats[0] = APFloat(semIEEEdouble);
+  assert(&Out.Floats[1].getSemantics() == &semIEEEdouble);
+
+  auto Ret = Out.addImpl(A, AA, C, CC, RM);
+
+  // (IEEEDouble, IEEEDouble) -> (PPCDoubleDoubleImpl, IEEEDouble)
+  uint64_t Buffer[] = {Out.Floats[0].bitcastToAPInt().getRawData()[0],
+                       Out.Floats[1].bitcastToAPInt().getRawData()[0]};
+  Out.Floats[0] = APFloat(semPPCDoubleDoubleImpl, APInt(128, 2, Buffer));
+  return Ret;
+}
+
+APFloat::opStatus DoubleAPFloat::add(const DoubleAPFloat &RHS,
+                                     roundingMode RM) {
+  return addWithSpecial(*this, RHS, *this, RM);
+}
+
+APFloat::opStatus DoubleAPFloat::subtract(const DoubleAPFloat &RHS,
+                                          roundingMode RM) {
+  changeSign();
+  auto Ret = add(RHS, RM);
+  changeSign();
+  return Ret;
+}
+
+void DoubleAPFloat::changeSign() {
+  Floats[0].changeSign();
+  Floats[1].changeSign();
+}
+
+APFloat::cmpResult
+DoubleAPFloat::compareAbsoluteValue(const DoubleAPFloat &RHS) const {
+  auto Result = Floats[0].compareAbsoluteValue(RHS.Floats[0]);
+  if (Result != cmpEqual)
+    return Result;
+  Result = Floats[1].compareAbsoluteValue(RHS.Floats[1]);
+  if (Result == cmpLessThan || Result == cmpGreaterThan) {
+    auto Against = Floats[0].isNegative() ^ Floats[1].isNegative();
+    auto RHSAgainst = RHS.Floats[0].isNegative() ^ RHS.Floats[1].isNegative();
+    if (Against && !RHSAgainst)
+      return cmpLessThan;
+    if (!Against && RHSAgainst)
+      return cmpGreaterThan;
+    if (!Against && !RHSAgainst)
+      return Result;
+    if (Against && RHSAgainst)
+      return (cmpResult)(cmpLessThan + cmpGreaterThan - Result);
+  }
+  return Result;
+}
+
+APFloat::fltCategory DoubleAPFloat::getCategory() const {
+  return Floats[0].getCategory();
+}
+
+bool DoubleAPFloat::isNegative() const { return Floats[0].isNegative(); }
+
+void DoubleAPFloat::makeInf(bool Neg) {
+  Floats[0].makeInf(Neg);
+  Floats[1].makeZero(false);
+}
+
+void DoubleAPFloat::makeNaN(bool SNaN, bool Neg, const APInt *fill) {
+  Floats[0].makeNaN(SNaN, Neg, fill);
+  Floats[1].makeZero(false);
+}
+
+} // End detail namespace
+
+APFloat::Storage::Storage(IEEEFloat F, const fltSemantics &Semantics) {
+  if (usesLayout<IEEEFloat>(Semantics)) {
+    new (&IEEE) IEEEFloat(std::move(F));
+    return;
+  }
+  if (usesLayout<DoubleAPFloat>(Semantics)) {
+    new (&Double)
+        DoubleAPFloat(Semantics, APFloat(std::move(F), F.getSemantics()),
+                      APFloat(semIEEEdouble));
+    return;
+  }
+  llvm_unreachable("Unexpected semantics");
+}
+
+APFloat::opStatus APFloat::convertFromString(StringRef Str, roundingMode RM) {
+  return getIEEE().convertFromString(Str, RM);
+}
+
+hash_code hash_value(const APFloat &Arg) { return hash_value(Arg.getIEEE()); }
+
+APFloat::APFloat(const fltSemantics &Semantics, StringRef S)
+    : APFloat(Semantics) {
+  convertFromString(S, rmNearestTiesToEven);
+}
+
+APFloat::opStatus APFloat::convert(const fltSemantics &ToSemantics,
+                                   roundingMode RM, bool *losesInfo) {
+  if (&getSemantics() == &ToSemantics)
+    return opOK;
+  if (usesLayout<IEEEFloat>(getSemantics()) &&
+      usesLayout<IEEEFloat>(ToSemantics))
+    return U.IEEE.convert(ToSemantics, RM, losesInfo);
+  if (usesLayout<IEEEFloat>(getSemantics()) &&
+      usesLayout<DoubleAPFloat>(ToSemantics)) {
+    assert(&ToSemantics == &semPPCDoubleDouble);
+    auto Ret = U.IEEE.convert(semPPCDoubleDoubleImpl, RM, losesInfo);
+    *this = APFloat(DoubleAPFloat(semPPCDoubleDouble, std::move(*this),
+                                  APFloat(semIEEEdouble)),
+                    ToSemantics);
+    return Ret;
+  }
+  if (usesLayout<DoubleAPFloat>(getSemantics()) &&
+      usesLayout<IEEEFloat>(ToSemantics)) {
+    auto Ret = getIEEE().convert(ToSemantics, RM, losesInfo);
+    *this = APFloat(std::move(getIEEE()), ToSemantics);
+    return Ret;
+  }
+  llvm_unreachable("Unexpected semantics");
+}
+
+APFloat APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) {
+  if (isIEEE) {
+    switch (BitWidth) {
+    case 16:
+      return APFloat(semIEEEhalf, APInt::getAllOnesValue(BitWidth));
+    case 32:
+      return APFloat(semIEEEsingle, APInt::getAllOnesValue(BitWidth));
+    case 64:
+      return APFloat(semIEEEdouble, APInt::getAllOnesValue(BitWidth));
+    case 80:
+      return APFloat(semX87DoubleExtended, APInt::getAllOnesValue(BitWidth));
+    case 128:
+      return APFloat(semIEEEquad, APInt::getAllOnesValue(BitWidth));
+    default:
+      llvm_unreachable("Unknown floating bit width");
+    }
+  } else {
+    assert(BitWidth == 128);
+    return APFloat(semPPCDoubleDouble, APInt::getAllOnesValue(BitWidth));
+  }
+}
+
+void APFloat::print(raw_ostream &OS) const {
+  SmallVector<char, 16> Buffer;
+  toString(Buffer);
+  OS << Buffer << "\n";
+}
+
+void APFloat::dump() const { print(dbgs()); }
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp
index 66eee99c6ec2..0c0b498f1375 100644
--- a/contrib/llvm/lib/Support/APInt.cpp
+++ b/contrib/llvm/lib/Support/APInt.cpp
@@ -22,10 +22,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <climits>
 #include <cmath>
 #include <cstdlib>
 #include <cstring>
-#include <limits>
 using namespace llvm;
 
 #define DEBUG_TYPE "apint"
@@ -260,6 +260,14 @@ APInt& APInt::operator+=(const APInt& RHS) {
   return clearUnusedBits();
 }
 
+APInt& APInt::operator+=(uint64_t RHS) {
+  if (isSingleWord())
+    VAL += RHS;
+  else
+    add_1(pVal, pVal, getNumWords(), RHS);
+  return clearUnusedBits();
+}
+
 /// Subtracts the integer array y from the integer array x
 /// @returns returns the borrow out.
 /// @brief Generalized subtraction of 64-bit integer arrays.
@@ -286,6 +294,14 @@ APInt& APInt::operator-=(const APInt& RHS) {
   return clearUnusedBits();
 }
 
+APInt& APInt::operator-=(uint64_t RHS) {
+  if (isSingleWord())
+    VAL -= RHS;
+  else
+    sub_1(pVal, getNumWords(), RHS);
+  return clearUnusedBits();
+}
+
 /// Multiplies an integer array, x, by a uint64_t integer and places the result
 /// into dest.
 /// @returns the carry out of the multiplication.
@@ -470,44 +486,6 @@ APInt APInt::operator*(const APInt& RHS) const {
   return Result;
 }
 
-APInt APInt::operator+(const APInt& RHS) const {
-  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-  if (isSingleWord())
-    return APInt(BitWidth, VAL + RHS.VAL);
-  APInt Result(BitWidth, 0);
-  add(Result.pVal, this->pVal, RHS.pVal, getNumWords());
-  Result.clearUnusedBits();
-  return Result;
-}
-
-APInt APInt::operator+(uint64_t RHS) const {
-  if (isSingleWord())
-    return APInt(BitWidth, VAL + RHS);
-  APInt Result(*this);
-  add_1(Result.pVal, Result.pVal, getNumWords(), RHS);
-  Result.clearUnusedBits();
-  return Result;
-}
-
-APInt APInt::operator-(const APInt& RHS) const {
-  assert(BitWidth == RHS.BitWidth && "Bit widths must be the same");
-  if (isSingleWord())
-    return APInt(BitWidth, VAL - RHS.VAL);
-  APInt Result(BitWidth, 0);
-  sub(Result.pVal, this->pVal, RHS.pVal, getNumWords());
-  Result.clearUnusedBits();
-  return Result;
-}
-
-APInt APInt::operator-(uint64_t RHS) const {
-  if (isSingleWord())
-    return APInt(BitWidth, VAL - RHS);
-  APInt Result(*this);
-  sub_1(Result.pVal, getNumWords(), RHS);
-  Result.clearUnusedBits();
-  return Result;
-}
-
 bool APInt::EqualSlowCase(const APInt& RHS) const {
   return std::equal(pVal, pVal + getNumWords(), RHS.pVal);
 }
@@ -1064,11 +1042,7 @@ APInt APInt::ashr(unsigned shiftAmt) const {
   if (isSingleWord()) {
     if (shiftAmt == BitWidth)
       return APInt(BitWidth, 0); // undefined
-    else {
-      unsigned SignBit = APINT_BITS_PER_WORD - BitWidth;
-      return APInt(BitWidth,
-        (((int64_t(VAL) << SignBit) >> SignBit) >> shiftAmt));
-    }
+    return APInt(BitWidth, SignExtend64(VAL, BitWidth) >> shiftAmt);
   }
 
   // If all the bits were shifted out, the result is, technically, undefined.
@@ -1521,7 +1495,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r,
   assert(n>1 && "n must be > 1");
 
   // b denotes the base of the number system. In our case b is 2^32.
-  LLVM_CONSTEXPR uint64_t b = uint64_t(1) << 32;
+  const uint64_t b = uint64_t(1) << 32;
 
   DEBUG(dbgs() << "KnuthDiv: m=" << m << " n=" << n << '\n');
   DEBUG(dbgs() << "KnuthDiv: original:");
diff --git a/contrib/llvm/lib/Support/ARMBuildAttrs.cpp b/contrib/llvm/lib/Support/ARMBuildAttrs.cpp
index 6d34f76f0c21..134ef8b587b7 100644
--- a/contrib/llvm/lib/Support/ARMBuildAttrs.cpp
+++ b/contrib/llvm/lib/Support/ARMBuildAttrs.cpp
@@ -15,7 +15,7 @@ using namespace llvm;
 namespace {
 const struct {
   ARMBuildAttrs::AttrType Attr;
-  const char *TagName;
+  StringRef TagName;
 } ARMAttributeTags[] = {
   { ARMBuildAttrs::File, "Tag_File" },
   { ARMBuildAttrs::Section, "Tag_Section" },
@@ -78,17 +78,23 @@ StringRef AttrTypeAsString(unsigned Attr, bool HasTagPrefix) {
 StringRef AttrTypeAsString(AttrType Attr, bool HasTagPrefix) {
   for (unsigned TI = 0, TE = sizeof(ARMAttributeTags) / sizeof(*ARMAttributeTags);
        TI != TE; ++TI)
-    if (ARMAttributeTags[TI].Attr == Attr)
-      return ARMAttributeTags[TI].TagName + (HasTagPrefix ? 0 : 4);
+    if (ARMAttributeTags[TI].Attr == Attr) {
+      auto TagName = ARMAttributeTags[TI].TagName;
+      return HasTagPrefix ? TagName : TagName.drop_front(4);
+    }
   return "";
 }
 
 int AttrTypeFromString(StringRef Tag) {
   bool HasTagPrefix = Tag.startswith("Tag_");
-  for (unsigned TI = 0, TE = sizeof(ARMAttributeTags) / sizeof(*ARMAttributeTags);
-       TI != TE; ++TI)
-    if (StringRef(ARMAttributeTags[TI].TagName + (HasTagPrefix ? 0 : 4)) == Tag)
+  for (unsigned TI = 0,
+                TE = sizeof(ARMAttributeTags) / sizeof(*ARMAttributeTags);
+       TI != TE; ++TI) {
+    auto TagName = ARMAttributeTags[TI].TagName;
+    if (TagName.drop_front(HasTagPrefix ? 0 : 4) == Tag) {
       return ARMAttributeTags[TI].Attr;
+    }
+  }
   return -1;
 }
 }
diff --git a/contrib/llvm/lib/Support/CachePruning.cpp b/contrib/llvm/lib/Support/CachePruning.cpp
index bd42befce386..3831625962ca 100644
--- a/contrib/llvm/lib/Support/CachePruning.cpp
+++ b/contrib/llvm/lib/Support/CachePruning.cpp
@@ -22,6 +22,7 @@
 #define DEBUG_TYPE "cache-pruning"
 
 #include <set>
+#include <system_error>
 
 using namespace llvm;
 
@@ -34,6 +35,8 @@ static void writeTimestampFile(StringRef TimestampFile) {
 
 /// Prune the cache of files that haven't been accessed in a long time.
 bool CachePruning::prune() {
+  using namespace std::chrono;
+
   if (Path.empty())
     return false;
 
@@ -44,7 +47,7 @@ bool CachePruning::prune() {
   if (!isPathDir)
     return false;
 
-  if (Expiration == 0 && PercentageOfAvailableSpace == 0) {
+  if (Expiration == seconds(0) && PercentageOfAvailableSpace == 0) {
     DEBUG(dbgs() << "No pruning settings set, exit early\n");
     // Nothing will be pruned, early exit
     return false;
@@ -54,7 +57,7 @@ bool CachePruning::prune() {
   SmallString<128> TimestampFile(Path);
   sys::path::append(TimestampFile, "llvmcache.timestamp");
   sys::fs::file_status FileStatus;
-  sys::TimeValue CurrentTime = sys::TimeValue::now();
+  const auto CurrentTime = system_clock::now();
   if (auto EC = sys::fs::status(TimestampFile, FileStatus)) {
     if (EC == errc::no_such_file_or_directory) {
       // If the timestamp file wasn't there, create one now.
@@ -64,14 +67,14 @@ bool CachePruning::prune() {
       return false;
     }
   } else {
-    if (Interval) {
+    if (Interval == seconds(0)) {
       // Check whether the time stamp is older than our pruning interval.
       // If not, do nothing.
-      sys::TimeValue TimeStampModTime = FileStatus.getLastModificationTime();
-      auto TimeInterval = sys::TimeValue(sys::TimeValue::SecondsType(Interval));
+      const auto TimeStampModTime = FileStatus.getLastModificationTime();
       auto TimeStampAge = CurrentTime - TimeStampModTime;
-      if (TimeStampAge <= TimeInterval) {
-        DEBUG(dbgs() << "Timestamp file too recent (" << TimeStampAge.seconds()
+      if (TimeStampAge <= Interval) {
+        DEBUG(dbgs() << "Timestamp file too recent ("
+                     << duration_cast<seconds>(TimeStampAge).count()
                      << "s old), do not prune.\n");
         return false;
       }
@@ -102,7 +105,6 @@ bool CachePruning::prune() {
   std::error_code EC;
   SmallString<128> CachePathNative;
   sys::path::native(Path, CachePathNative);
-  auto TimeExpiration = sys::TimeValue(sys::TimeValue::SecondsType(Expiration));
   // Walk all of the files within this directory.
   for (sys::fs::directory_iterator File(CachePathNative, EC), FileEnd;
        File != FileEnd && !EC; File.increment(EC)) {
@@ -118,11 +120,11 @@ bool CachePruning::prune() {
     }
 
     // If the file hasn't been used recently enough, delete it
-    sys::TimeValue FileAccessTime = FileStatus.getLastAccessedTime();
+    const auto FileAccessTime = FileStatus.getLastAccessedTime();
     auto FileAge = CurrentTime - FileAccessTime;
-    if (FileAge > TimeExpiration) {
-      DEBUG(dbgs() << "Remove " << File->path() << " (" << FileAge.seconds()
-                   << "s old)\n");
+    if (FileAge > Expiration) {
+      DEBUG(dbgs() << "Remove " << File->path() << " ("
+                   << duration_cast<seconds>(FileAge).count() << "s old)\n");
       sys::fs::remove(File->path());
       continue;
     }
diff --git a/contrib/llvm/lib/Support/Chrono.cpp b/contrib/llvm/lib/Support/Chrono.cpp
new file mode 100644
index 000000000000..cdadbd879979
--- /dev/null
+++ b/contrib/llvm/lib/Support/Chrono.cpp
@@ -0,0 +1,47 @@
+//===- Support/Chrono.cpp - Utilities for Timing Manipulation ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/Chrono.h"
+#include "llvm/Config/config.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+using namespace sys;
+
+static inline struct tm getStructTM(TimePoint<> TP) {
+  struct tm Storage;
+  std::time_t OurTime = toTimeT(TP);
+
+#if defined(LLVM_ON_UNIX)
+  struct tm *LT = ::localtime_r(&OurTime, &Storage);
+  assert(LT);
+  (void)LT;
+#endif
+#if defined(LLVM_ON_WIN32)
+  int Error = ::localtime_s(&Storage, &OurTime);
+  assert(!Error);
+  (void)Error;
+#endif
+
+  return Storage;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, TimePoint<> TP) {
+  struct tm LT = getStructTM(TP);
+  char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")];
+  strftime(Buffer, sizeof(Buffer), "%Y-%m-%d %H:%M:%S", &LT);
+  return OS << Buffer << '.'
+            << format("%.9lu",
+                      long((TP.time_since_epoch() % std::chrono::seconds(1))
+                               .count()));
+}
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp
index a5d2ba2d6a2d..0a989706b436 100644
--- a/contrib/llvm/lib/Support/CommandLine.cpp
+++ b/contrib/llvm/lib/Support/CommandLine.cpp
@@ -20,6 +20,7 @@
 #include "llvm-c/Support.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
@@ -29,10 +30,12 @@
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/StringSaver.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cstdlib>
@@ -42,6 +45,17 @@ using namespace cl;
 
 #define DEBUG_TYPE "commandline"
 
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+namespace llvm {
+// If LLVM_ENABLE_ABI_BREAKING_CHECKS is set the flag -mllvm -reverse-iterate
+// can be used to toggle forward/reverse iteration of unordered containers.
+// This will help uncover differences in codegen caused due to undefined
+// iteration order.
+static cl::opt<bool, true> ReverseIteration("reverse-iterate",
+  cl::location(ReverseIterate<bool>::value));
+}
+#endif
+
 //===----------------------------------------------------------------------===//
 // Template instantiations and anchors.
 //
@@ -90,10 +104,10 @@ public:
   // Globals for name and overview of program.  Program name is not a string to
   // avoid static ctor/dtor issues.
   std::string ProgramName;
-  const char *ProgramOverview;
+  StringRef ProgramOverview;
 
   // This collects additional help to be printed.
-  std::vector<const char *> MoreHelp;
+  std::vector<StringRef> MoreHelp;
 
   // This collects the different option categories that have been registered.
   SmallPtrSet<OptionCategory *, 16> RegisteredOptionCategories;
@@ -101,7 +115,7 @@ public:
   // This collects the different subcommands that have been registered.
   SmallPtrSet<SubCommand *, 4> RegisteredSubCommands;
 
-  CommandLineParser() : ProgramOverview(nullptr), ActiveSubCommand(nullptr) {
+  CommandLineParser() : ActiveSubCommand(nullptr) {
     registerSubCommand(&*TopLevelSubCommand);
     registerSubCommand(&*AllSubCommands);
   }
@@ -109,9 +123,9 @@ public:
   void ResetAllOptionOccurrences();
 
   bool ParseCommandLineOptions(int argc, const char *const *argv,
-                               const char *Overview, bool IgnoreErrors);
+                               StringRef Overview, bool IgnoreErrors);
 
-  void addLiteralOption(Option &Opt, SubCommand *SC, const char *Name) {
+  void addLiteralOption(Option &Opt, SubCommand *SC, StringRef Name) {
     if (Opt.hasArgStr())
       return;
     if (!SC->OptionsMap.insert(std::make_pair(Name, &Opt)).second) {
@@ -131,7 +145,7 @@ public:
     }
   }
 
-  void addLiteralOption(Option &Opt, const char *Name) {
+  void addLiteralOption(Option &Opt, StringRef Name) {
     if (Opt.Subs.empty())
       addLiteralOption(Opt, &*TopLevelSubCommand, Name);
     else {
@@ -283,7 +297,7 @@ public:
   void registerSubCommand(SubCommand *sub) {
     assert(count_if(RegisteredSubCommands,
                     [sub](const SubCommand *Sub) {
-                      return (sub->getName() != nullptr) &&
+                      return (!sub->getName().empty()) &&
                              (Sub->getName() == sub->getName());
                     }) == 0 &&
            "Duplicate subcommands");
@@ -298,7 +312,7 @@ public:
             O->hasArgStr())
           addOption(O, sub);
         else
-          addLiteralOption(*O, sub, E.first().str().c_str());
+          addLiteralOption(*O, sub, E.first());
       }
     }
   }
@@ -307,10 +321,16 @@ public:
     RegisteredSubCommands.erase(sub);
   }
 
+  iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator>
+  getRegisteredSubcommands() {
+    return make_range(RegisteredSubCommands.begin(),
+                      RegisteredSubCommands.end());
+  }
+
   void reset() {
     ActiveSubCommand = nullptr;
     ProgramName.clear();
-    ProgramOverview = nullptr;
+    ProgramOverview = StringRef();
 
     MoreHelp.clear();
     RegisteredOptionCategories.clear();
@@ -328,18 +348,18 @@ private:
   SubCommand *ActiveSubCommand;
 
   Option *LookupOption(SubCommand &Sub, StringRef &Arg, StringRef &Value);
-  SubCommand *LookupSubCommand(const char *Name);
+  SubCommand *LookupSubCommand(StringRef Name);
 };
 
 } // namespace
 
 static ManagedStatic<CommandLineParser> GlobalParser;
 
-void cl::AddLiteralOption(Option &O, const char *Name) {
+void cl::AddLiteralOption(Option &O, StringRef Name) {
   GlobalParser->addLiteralOption(O, Name);
 }
 
-extrahelp::extrahelp(const char *Help) : morehelp(Help) {
+extrahelp::extrahelp(StringRef Help) : morehelp(Help) {
   GlobalParser->MoreHelp.push_back(Help);
 }
 
@@ -353,6 +373,7 @@ void Option::removeArgument() { GlobalParser->removeOption(this); }
 void Option::setArgStr(StringRef S) {
   if (FullyInitialized)
     GlobalParser->updateArgStr(this, S);
+  assert(S[0] != '-' && "Option can't start with '-");
   ArgStr = S;
 }
 
@@ -426,13 +447,13 @@ Option *CommandLineParser::LookupOption(SubCommand &Sub, StringRef &Arg,
   return I->second;
 }
 
-SubCommand *CommandLineParser::LookupSubCommand(const char *Name) {
-  if (Name == nullptr)
+SubCommand *CommandLineParser::LookupSubCommand(StringRef Name) {
+  if (Name.empty())
     return &*TopLevelSubCommand;
   for (auto S : RegisteredSubCommands) {
     if (S == &*AllSubCommands)
       continue;
-    if (S->getName() == nullptr)
+    if (S->getName().empty())
       continue;
 
     if (StringRef(S->getName()) == StringRef(Name))
@@ -505,7 +526,6 @@ static bool CommaSeparateAndAddOccurrence(Option *Handler, unsigned pos,
         return true;
       // Erase the portion before the comma, AND the comma.
       Val = Val.substr(Pos + 1);
-      Value.substr(Pos + 1); // Increment the original value pointer as well.
       // Check for another comma.
       Pos = Val.find(',');
     }
@@ -533,7 +553,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
         return Handler->error("requires a value!");
       // Steal the next argument, like for '-o filename'
       assert(argv && "null check");
-      Value = argv[++i];
+      Value = StringRef(argv[++i]);
     }
     break;
   case ValueDisallowed:
@@ -567,7 +587,7 @@ static inline bool ProvideOption(Option *Handler, StringRef ArgName,
     if (i + 1 >= argc)
       return Handler->error("not enough values!");
     assert(argv && "null check");
-    Value = argv[++i];
+    Value = StringRef(argv[++i]);
 
     if (CommaSeparateAndAddOccurrence(Handler, i, ArgName, Value, MultiArg))
       return true;
@@ -725,7 +745,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
     // End the token if this is whitespace.
     if (isWhitespace(Src[I])) {
       if (!Token.empty())
-        NewArgv.push_back(Saver.save(Token.c_str()));
+        NewArgv.push_back(Saver.save(StringRef(Token)).data());
       Token.clear();
       continue;
     }
@@ -736,7 +756,7 @@ void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver,
 
   // Append the last token after hitting EOF with no whitespace.
   if (!Token.empty())
-    NewArgv.push_back(Saver.save(Token.c_str()));
+    NewArgv.push_back(Saver.save(StringRef(Token)).data());
   // Mark the end of response files
   if (MarkEOLs)
     NewArgv.push_back(nullptr);
@@ -817,7 +837,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
     if (State == UNQUOTED) {
       // Whitespace means the end of the token.
       if (isWhitespace(Src[I])) {
-        NewArgv.push_back(Saver.save(Token.c_str()));
+        NewArgv.push_back(Saver.save(StringRef(Token)).data());
         Token.clear();
         State = INIT;
         // Mark the end of lines in response files
@@ -852,7 +872,7 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver,
   }
   // Append the last token after hitting EOF with no whitespace.
   if (!Token.empty())
-    NewArgv.push_back(Saver.save(Token.c_str()));
+    NewArgv.push_back(Saver.save(StringRef(Token)).data());
   // Mark the end of response files
   if (MarkEOLs)
     NewArgv.push_back(nullptr);
@@ -864,10 +884,10 @@ static bool hasUTF8ByteOrderMark(ArrayRef<char> S) {
   return (S.size() >= 3 && S[0] == '\xef' && S[1] == '\xbb' && S[2] == '\xbf');
 }
 
-static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
+static bool ExpandResponseFile(StringRef FName, StringSaver &Saver,
                                TokenizerCallback Tokenizer,
                                SmallVectorImpl<const char *> &NewArgv,
-                               bool MarkEOLs = false) {
+                               bool MarkEOLs, bool RelativeNames) {
   ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
       MemoryBuffer::getFile(FName);
   if (!MemBufOrErr)
@@ -892,6 +912,30 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
   // Tokenize the contents into NewArgv.
   Tokenizer(Str, Saver, NewArgv, MarkEOLs);
 
+  // If names of nested response files should be resolved relative to including
+  // file, replace the included response file names with their full paths
+  // obtained by required resolution.
+  if (RelativeNames)
+    for (unsigned I = 0; I < NewArgv.size(); ++I)
+      if (NewArgv[I]) {
+        StringRef Arg = NewArgv[I];
+        if (Arg.front() == '@') {
+          StringRef FileName = Arg.drop_front();
+          if (llvm::sys::path::is_relative(FileName)) {
+            SmallString<128> ResponseFile;
+            ResponseFile.append(1, '@');
+            if (llvm::sys::path::is_relative(FName)) {
+              SmallString<128> curr_dir;
+              llvm::sys::fs::current_path(curr_dir);
+              ResponseFile.append(curr_dir.str());
+            }
+            llvm::sys::path::append(
+                ResponseFile, llvm::sys::path::parent_path(FName), FileName);
+            NewArgv[I] = Saver.save(ResponseFile.c_str()).data();
+          }
+        }
+      }
+
   return true;
 }
 
@@ -899,7 +943,7 @@ static bool ExpandResponseFile(const char *FName, StringSaver &Saver,
 /// StringSaver and tokenization strategy.
 bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
                              SmallVectorImpl<const char *> &Argv,
-                             bool MarkEOLs) {
+                             bool MarkEOLs, bool RelativeNames) {
   unsigned RspFiles = 0;
   bool AllExpanded = true;
 
@@ -923,11 +967,9 @@ bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer,
 
     // Replace this response file argument with the tokenization of its
     // contents.  Nested response files are expanded in subsequent iterations.
-    // FIXME: If a nested response file uses a relative path, is it relative to
-    // the cwd of the process or the response file?
     SmallVector<const char *, 0> ExpandedArgv;
     if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv,
-                            MarkEOLs)) {
+                            MarkEOLs, RelativeNames)) {
       // We couldn't read this file, so we leave it in the argument stream and
       // move on.
       AllExpanded = false;
@@ -952,45 +994,26 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar,
   assert(envVar && "Environment variable name missing");
 
   // Get the environment variable they want us to parse options out of.
-#ifdef _WIN32
-  std::wstring wenvVar;
-  if (!llvm::ConvertUTF8toWide(envVar, wenvVar)) {
-    assert(false &&
-           "Unicode conversion of environment variable name failed");
-    return;
-  }
-  const wchar_t *wenvValue = _wgetenv(wenvVar.c_str());
-  if (!wenvValue)
-    return;
-  std::string envValueBuffer;
-  if (!llvm::convertWideToUTF8(wenvValue, envValueBuffer)) {
-    assert(false &&
-           "Unicode conversion of environment variable value failed");
-    return;
-  }
-  const char *envValue = envValueBuffer.c_str();
-#else
-  const char *envValue = getenv(envVar);
+  llvm::Optional<std::string> envValue = sys::Process::GetEnv(StringRef(envVar));
   if (!envValue)
     return;
-#endif
 
   // Get program's "name", which we wouldn't know without the caller
   // telling us.
   SmallVector<const char *, 20> newArgv;
   BumpPtrAllocator A;
   StringSaver Saver(A);
-  newArgv.push_back(Saver.save(progName));
+  newArgv.push_back(Saver.save(progName).data());
 
   // Parse the value of the environment variable into a "command line"
   // and hand it off to ParseCommandLineOptions().
-  TokenizeGNUCommandLine(envValue, Saver, newArgv);
+  TokenizeGNUCommandLine(*envValue, Saver, newArgv);
   int newArgc = static_cast<int>(newArgv.size());
-  ParseCommandLineOptions(newArgc, &newArgv[0], Overview);
+  ParseCommandLineOptions(newArgc, &newArgv[0], StringRef(Overview));
 }
 
 bool cl::ParseCommandLineOptions(int argc, const char *const *argv,
-                                 const char *Overview, bool IgnoreErrors) {
+                                 StringRef Overview, bool IgnoreErrors) {
   return GlobalParser->ParseCommandLineOptions(argc, argv, Overview,
                                                IgnoreErrors);
 }
@@ -1006,7 +1029,7 @@ void CommandLineParser::ResetAllOptionOccurrences() {
 
 bool CommandLineParser::ParseCommandLineOptions(int argc,
                                                 const char *const *argv,
-                                                const char *Overview,
+                                                StringRef Overview,
                                                 bool IgnoreErrors) {
   assert(hasOptions() && "No options specified!");
 
@@ -1019,7 +1042,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
   argc = static_cast<int>(newArgv.size());
 
   // Copy the program name into ProgName, making sure not to overflow it.
-  ProgramName = sys::path::filename(argv[0]);
+  ProgramName = sys::path::filename(StringRef(argv[0]));
 
   ProgramOverview = Overview;
   bool ErrorParsing = false;
@@ -1035,7 +1058,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
   if (argc >= 2 && argv[FirstArg][0] != '-') {
     // If the first argument specifies a valid subcommand, start processing
     // options from the second argument.
-    ChosenSubCommand = LookupSubCommand(argv[FirstArg]);
+    ChosenSubCommand = LookupSubCommand(StringRef(argv[FirstArg]));
     if (ChosenSubCommand != &*TopLevelSubCommand)
       FirstArg = 2;
   }
@@ -1116,19 +1139,19 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
     if (argv[i][0] != '-' || argv[i][1] == 0 || DashDashFound) {
       // Positional argument!
       if (ActivePositionalArg) {
-        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+        ProvidePositionalOption(ActivePositionalArg, StringRef(argv[i]), i);
         continue; // We are done!
       }
 
       if (!PositionalOpts.empty()) {
-        PositionalVals.push_back(std::make_pair(argv[i], i));
+        PositionalVals.push_back(std::make_pair(StringRef(argv[i]), i));
 
         // All of the positional arguments have been fulfulled, give the rest to
         // the consume after option... if it's specified...
         //
         if (PositionalVals.size() >= NumPositionalRequired && ConsumeAfterOpt) {
           for (++i; i < argc; ++i)
-            PositionalVals.push_back(std::make_pair(argv[i], i));
+            PositionalVals.push_back(std::make_pair(StringRef(argv[i]), i));
           break; // Handle outside of the argument processing loop...
         }
 
@@ -1144,19 +1167,19 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
       // If there is a positional argument eating options, check to see if this
       // option is another positional argument.  If so, treat it as an argument,
       // otherwise feed it to the eating positional.
-      ArgName = argv[i] + 1;
+      ArgName = StringRef(argv[i] + 1);
       // Eat leading dashes.
       while (!ArgName.empty() && ArgName[0] == '-')
         ArgName = ArgName.substr(1);
 
       Handler = LookupOption(*ChosenSubCommand, ArgName, Value);
       if (!Handler || Handler->getFormattingFlag() != cl::Positional) {
-        ProvidePositionalOption(ActivePositionalArg, argv[i], i);
+        ProvidePositionalOption(ActivePositionalArg, StringRef(argv[i]), i);
         continue; // We are done!
       }
 
     } else { // We start with a '-', must be an argument.
-      ArgName = argv[i] + 1;
+      ArgName = StringRef(argv[i] + 1);
       // Eat leading dashes.
       while (!ArgName.empty() && ArgName[0] == '-')
         ArgName = ArgName.substr(1);
@@ -1193,7 +1216,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
         for (SmallVectorImpl<Option *>::iterator I = SinkOpts.begin(),
                                                  E = SinkOpts.end();
              I != E; ++I)
-          (*I)->addOccurrence(i, "", argv[i]);
+          (*I)->addOccurrence(i, "", StringRef(argv[i]));
       }
       continue;
     }
@@ -1212,7 +1235,8 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
       errs() << ProgramName
              << ": Not enough positional command line arguments specified!\n"
              << "Must specify at least " << NumPositionalRequired
-             << " positional arguments: See: " << argv[0] << " -help\n";
+             << " positional argument" << (NumPositionalRequired > 1 ? "s" : "")
+             << ": See: " << argv[0] << " - help\n";
     }
 
     ErrorParsing = true;
@@ -1245,7 +1269,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
         switch (PositionalOpts[i]->getNumOccurrencesFlag()) {
         case cl::Optional:
           Done = true; // Optional arguments want _at most_ one value
-        // FALL THROUGH
+          LLVM_FALLTHROUGH;
         case cl::ZeroOrMore: // Zero or more will take all they can get...
         case cl::OneOrMore:  // One or more will take all they can get...
           ProvidePositionalOption(PositionalOpts[i],
@@ -1299,7 +1323,7 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
         Opt.second->error("must be specified at least once!");
         ErrorParsing = true;
       }
-    // Fall through
+      LLVM_FALLTHROUGH;
     default:
       break;
     }
@@ -1354,7 +1378,7 @@ bool Option::addOccurrence(unsigned pos, StringRef ArgName, StringRef Value,
   case Required:
     if (NumOccurrences > 1)
       return error("must occur exactly one time!", ArgName);
-  // Fall through
+    LLVM_FALLTHROUGH;
   case OneOrMore:
   case ZeroOrMore:
   case ConsumeAfter:
@@ -1406,7 +1430,8 @@ void alias::printOptionInfo(size_t GlobalWidth) const {
 // Return the width of the option tag for printing...
 size_t basic_parser_impl::getOptionWidth(const Option &O) const {
   size_t Len = O.ArgStr.size();
-  if (const char *ValName = getValueName())
+  auto ValName = getValueName();
+  if (!ValName.empty())
     Len += getValueStr(O, ValName).size() + 3;
 
   return Len + 6;
@@ -1419,7 +1444,8 @@ void basic_parser_impl::printOptionInfo(const Option &O,
                                         size_t GlobalWidth) const {
   outs() << "  -" << O.ArgStr;
 
-  if (const char *ValName = getValueName())
+  auto ValName = getValueName();
+  if (!ValName.empty())
     outs() << "=<" << getValueStr(O, ValName) << '>';
 
   printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O));
@@ -1529,11 +1555,11 @@ bool parser<float>::parse(Option &O, StringRef ArgName, StringRef Arg,
 // findOption - Return the option number corresponding to the specified
 // argument string.  If the option is not found, getNumOptions() is returned.
 //
-unsigned generic_parser_base::findOption(const char *Name) {
+unsigned generic_parser_base::findOption(StringRef Name) {
   unsigned e = getNumOptions();
 
   for (unsigned i = 0; i != e; ++i) {
-    if (strcmp(getOption(i), Name) == 0)
+    if (getOption(i) == Name)
       return i;
   }
   return e;
@@ -1544,12 +1570,12 @@ size_t generic_parser_base::getOptionWidth(const Option &O) const {
   if (O.hasArgStr()) {
     size_t Size = O.ArgStr.size() + 6;
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
-      Size = std::max(Size, std::strlen(getOption(i)) + 8);
+      Size = std::max(Size, getOption(i).size() + 8);
     return Size;
   } else {
     size_t BaseSize = 0;
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i)
-      BaseSize = std::max(BaseSize, std::strlen(getOption(i)) + 8);
+      BaseSize = std::max(BaseSize, getOption(i).size() + 8);
     return BaseSize;
   }
 }
@@ -1564,7 +1590,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
     printHelpStr(O.HelpStr, GlobalWidth, O.ArgStr.size() + 6);
 
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
-      size_t NumSpaces = GlobalWidth - strlen(getOption(i)) - 8;
+      size_t NumSpaces = GlobalWidth - getOption(i).size() - 8;
       outs() << "    =" << getOption(i);
       outs().indent(NumSpaces) << " -   " << getDescription(i) << '\n';
     }
@@ -1572,9 +1598,9 @@ void generic_parser_base::printOptionInfo(const Option &O,
     if (!O.HelpStr.empty())
       outs() << "  " << O.HelpStr << '\n';
     for (unsigned i = 0, e = getNumOptions(); i != e; ++i) {
-      const char *Option = getOption(i);
+      auto Option = getOption(i);
       outs() << "    -" << Option;
-      printHelpStr(getDescription(i), GlobalWidth, std::strlen(Option) + 8);
+      printHelpStr(getDescription(i), GlobalWidth, Option.size() + 8);
     }
   }
 }
@@ -1596,7 +1622,7 @@ void generic_parser_base::printGenericOptionDiff(
       continue;
 
     outs() << "= " << getOption(i);
-    size_t L = std::strlen(getOption(i));
+    size_t L = getOption(i).size();
     size_t NumSpaces = MaxOptWidth > L ? MaxOptWidth - L : 0;
     outs().indent(NumSpaces) << " (default: ";
     for (unsigned j = 0; j != NumOpts; ++j) {
@@ -1709,9 +1735,9 @@ static void
 sortSubCommands(const SmallPtrSetImpl<SubCommand *> &SubMap,
                 SmallVectorImpl<std::pair<const char *, SubCommand *>> &Subs) {
   for (const auto &S : SubMap) {
-    if (S->getName() == nullptr)
+    if (S->getName().empty())
       continue;
-    Subs.push_back(std::make_pair(S->getName(), S));
+    Subs.push_back(std::make_pair(S->getName().data(), S));
   }
   array_pod_sort(Subs.begin(), Subs.end(), SubNameCompare);
 }
@@ -1734,7 +1760,7 @@ protected:
   void printSubCommands(StrSubCommandPairVector &Subs, size_t MaxSubLen) {
     for (const auto &S : Subs) {
       outs() << "  " << S.first;
-      if (S.second->getDescription()) {
+      if (!S.second->getDescription().empty()) {
         outs().indent(MaxSubLen - strlen(S.first));
         outs() << " - " << S.second->getDescription();
       }
@@ -1762,14 +1788,16 @@ public:
     StrSubCommandPairVector Subs;
     sortSubCommands(GlobalParser->RegisteredSubCommands, Subs);
 
-    if (GlobalParser->ProgramOverview)
+    if (!GlobalParser->ProgramOverview.empty())
       outs() << "OVERVIEW: " << GlobalParser->ProgramOverview << "\n";
 
-    if (Sub == &*TopLevelSubCommand)
-      outs() << "USAGE: " << GlobalParser->ProgramName
-             << " [subcommand] [options]";
-    else {
-      if (Sub->getDescription() != nullptr) {
+    if (Sub == &*TopLevelSubCommand) {
+      outs() << "USAGE: " << GlobalParser->ProgramName;
+      if (Subs.size() > 2)
+        outs() << " [subcommand]";
+      outs() << " [options]";
+    } else {
+      if (!Sub->getDescription().empty()) {
         outs() << "SUBCOMMAND '" << Sub->getName()
                << "': " << Sub->getDescription() << "\n\n";
       }
@@ -1787,7 +1815,7 @@ public:
     if (ConsumeAfterOpt)
       outs() << " " << ConsumeAfterOpt->HelpStr;
 
-    if (Sub == &*TopLevelSubCommand && Subs.size() > 2) {
+    if (Sub == &*TopLevelSubCommand && !Subs.empty()) {
       // Compute the maximum subcommand length...
       size_t MaxSubLen = 0;
       for (size_t i = 0, e = Subs.size(); i != e; ++i)
@@ -1831,7 +1859,7 @@ public:
   // ordered before B's name. It returns a value greater equal zero otherwise.
   static int OptionCategoryCompare(OptionCategory *const *A,
                                    OptionCategory *const *B) {
-    return strcmp((*A)->getName(), (*B)->getName());
+    return (*A)->getName() == (*B)->getName();
   }
 
   // Make sure we inherit our base class's operator=()
@@ -1888,7 +1916,7 @@ protected:
       outs() << (*Category)->getName() << ":\n";
 
       // Check if description is set.
-      if ((*Category)->getDescription() != nullptr)
+      if (!(*Category)->getDescription().empty())
         outs() << (*Category)->getDescription() << "\n\n";
       else
         outs() << "\n";
@@ -2118,10 +2146,15 @@ void cl::AddExtraVersionPrinter(void (*func)()) {
 StringMap<Option *> &cl::getRegisteredOptions(SubCommand &Sub) {
   auto &Subs = GlobalParser->RegisteredSubCommands;
   (void)Subs;
-  assert(std::find(Subs.begin(), Subs.end(), &Sub) != Subs.end());
+  assert(is_contained(Subs, &Sub));
   return Sub.OptionsMap;
 }
 
+iterator_range<typename SmallPtrSet<SubCommand *, 4>::iterator>
+cl::getRegisteredSubcommands() {
+  return GlobalParser->getRegisteredSubcommands();
+}
+
 void cl::HideUnrelatedOptions(cl::OptionCategory &Category, SubCommand &Sub) {
   for (auto &I : Sub.OptionsMap) {
     if (I.second->Category != &Category &&
@@ -2149,5 +2182,5 @@ void cl::ResetAllOptionOccurrences() {
 
 void LLVMParseCommandLineOptions(int argc, const char *const *argv,
                                  const char *Overview) {
-  llvm::cl::ParseCommandLineOptions(argc, argv, Overview, true);
+  llvm::cl::ParseCommandLineOptions(argc, argv, StringRef(Overview), true);
 }
diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp
index b54613e92b8b..5d556462e89c 100644
--- a/contrib/llvm/lib/Support/Compression.cpp
+++ b/contrib/llvm/lib/Support/Compression.cpp
@@ -62,16 +62,23 @@ zlib::Status zlib::compress(StringRef InputBuffer,
   return Res;
 }
 
+zlib::Status zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+                              size_t &UncompressedSize) {
+  Status Res = encodeZlibReturnValue(
+      ::uncompress((Bytef *)UncompressedBuffer, (uLongf *)&UncompressedSize,
+                   (const Bytef *)InputBuffer.data(), InputBuffer.size()));
+  // Tell MemorySanitizer that zlib output buffer is fully initialized.
+  // This avoids a false report when running LLVM with uninstrumented ZLib.
+  __msan_unpoison(UncompressedBuffer, UncompressedSize);
+  return Res;
+}
+
 zlib::Status zlib::uncompress(StringRef InputBuffer,
                               SmallVectorImpl<char> &UncompressedBuffer,
                               size_t UncompressedSize) {
   UncompressedBuffer.resize(UncompressedSize);
-  Status Res = encodeZlibReturnValue(::uncompress(
-      (Bytef *)UncompressedBuffer.data(), (uLongf *)&UncompressedSize,
-      (const Bytef *)InputBuffer.data(), InputBuffer.size()));
-  // Tell MemorySanitizer that zlib output buffer is fully initialized.
-  // This avoids a false report when running LLVM with uninstrumented ZLib.
-  __msan_unpoison(UncompressedBuffer.data(), UncompressedSize);
+  Status Res =
+      uncompress(InputBuffer, UncompressedBuffer.data(), UncompressedSize);
   UncompressedBuffer.resize(UncompressedSize);
   return Res;
 }
@@ -87,6 +94,10 @@ zlib::Status zlib::compress(StringRef InputBuffer,
                             CompressionLevel Level) {
   return zlib::StatusUnsupported;
 }
+zlib::Status zlib::uncompress(StringRef InputBuffer, char *UncompressedBuffer,
+                              size_t &UncompressedSize) {
+  return zlib::StatusUnsupported;
+}
 zlib::Status zlib::uncompress(StringRef InputBuffer,
                               SmallVectorImpl<char> &UncompressedBuffer,
                               size_t UncompressedSize) {
diff --git a/contrib/llvm/lib/Support/ConvertUTF.c b/contrib/llvm/lib/Support/ConvertUTF.cpp
index 128459a1d548..39fd218d3f07 100644
--- a/contrib/llvm/lib/Support/ConvertUTF.c
+++ b/contrib/llvm/lib/Support/ConvertUTF.cpp
@@ -53,6 +53,8 @@
 #endif
 #include <assert.h>
 
+namespace llvm {
+
 static const int halfShift  = 10; /* used for shifting by 10 bits */
 
 static const UTF32 halfBase = 0x0010000UL;
@@ -62,8 +64,6 @@ static const UTF32 halfMask = 0x3FFUL;
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 #define UNI_SUR_LOW_START   (UTF32)0xDC00
 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
-#define false      0
-#define true        1
 
 /* --------------------------------------------------------------------- */
 
@@ -706,3 +706,5 @@ ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
     similarly unrolled loops.
 
    --------------------------------------------------------------------- */
+
+} // namespace llvm
diff --git a/contrib/llvm/lib/Support/DataStream.cpp b/contrib/llvm/lib/Support/DataStream.cpp
deleted file mode 100644
index 3b10fc5eecaa..000000000000
--- a/contrib/llvm/lib/Support/DataStream.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-//===--- llvm/Support/DataStream.cpp - Lazy streamed data -----------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements DataStreamer, which fetches bytes of Data from
-// a stream source. It provides support for streaming (lazy reading) of
-// bitcode. An example implementation of streaming from a file or stdin
-// is included.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/DataStream.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Program.h"
-#include <string>
-#include <system_error>
-#if !defined(_MSC_VER) && !defined(__MINGW32__)
-#include <unistd.h>
-#else
-#include <io.h>
-#endif
-using namespace llvm;
-
-#define DEBUG_TYPE "Data-stream"
-
-// Interface goals:
-// * StreamingMemoryObject doesn't care about complexities like using
-//   threads/async callbacks to actually overlap download+compile
-// * Don't want to duplicate Data in memory
-// * Don't need to know total Data len in advance
-// Non-goals:
-// StreamingMemoryObject already has random access so this interface only does
-// in-order streaming (no arbitrary seeking, else we'd have to buffer all the
-// Data here in addition to MemoryObject).  This also means that if we want
-// to be able to to free Data, BitstreamBytes/BitcodeReader will implement it
-
-STATISTIC(NumStreamFetches, "Number of calls to Data stream fetch");
-
-namespace llvm {
-DataStreamer::~DataStreamer() {}
-}
-
-namespace {
-
-// Very simple stream backed by a file. Mostly useful for stdin and debugging;
-// actual file access is probably still best done with mmap.
-class DataFileStreamer : public DataStreamer {
- int Fd;
-public:
-  DataFileStreamer() : Fd(0) {}
-  ~DataFileStreamer() override { close(Fd); }
-  size_t GetBytes(unsigned char *buf, size_t len) override {
-    NumStreamFetches++;
-    return read(Fd, buf, len);
-  }
-
-  std::error_code OpenFile(const std::string &Filename) {
-    if (Filename == "-") {
-      Fd = 0;
-      sys::ChangeStdinToBinary();
-      return std::error_code();
-    }
-
-    return sys::fs::openFileForRead(Filename, Fd);
-  }
-};
-
-}
-
-std::unique_ptr<DataStreamer>
-llvm::getDataFileStreamer(const std::string &Filename, std::string *StrError) {
-  std::unique_ptr<DataFileStreamer> s = make_unique<DataFileStreamer>();
-  if (std::error_code e = s->OpenFile(Filename)) {
-    *StrError = std::string("Could not open ") + Filename + ": " +
-        e.message() + "\n";
-    return nullptr;
-  }
-  return std::move(s);
-}
diff --git a/contrib/llvm/lib/Support/Debug.cpp b/contrib/llvm/lib/Support/Debug.cpp
index 323d53279b45..9132911479a1 100644
--- a/contrib/llvm/lib/Support/Debug.cpp
+++ b/contrib/llvm/lib/Support/Debug.cpp
@@ -32,6 +32,7 @@
 
 #undef isCurrentDebugType
 #undef setCurrentDebugType
+#undef setCurrentDebugTypes
 
 using namespace llvm;
 
@@ -62,11 +63,17 @@ bool isCurrentDebugType(const char *DebugType) {
 /// option were specified.  Note that DebugFlag also needs to be set to true for
 /// debug output to be produced.
 ///
+void setCurrentDebugTypes(const char **Types, unsigned Count);
+
 void setCurrentDebugType(const char *Type) {
-  CurrentDebugType->clear();
-  CurrentDebugType->push_back(Type);
+  setCurrentDebugTypes(&Type, 1);
 }
 
+void setCurrentDebugTypes(const char **Types, unsigned Count) {
+  CurrentDebugType->clear();
+  for (size_t T = 0; T < Count; ++T)
+    CurrentDebugType->push_back(Types[T]);
+}
 } // namespace llvm
 
 // All Debug.h functionality is a no-op in NDEBUG mode.
diff --git a/contrib/llvm/lib/Support/DeltaAlgorithm.cpp b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
index 9e52874de832..50ea4e9ce0c6 100644
--- a/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
+++ b/contrib/llvm/lib/Support/DeltaAlgorithm.cpp
@@ -9,6 +9,7 @@
 #include "llvm/ADT/DeltaAlgorithm.h"
 #include <algorithm>
 #include <iterator>
+#include <set>
 using namespace llvm;
 
 DeltaAlgorithm::~DeltaAlgorithm() {
diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp
index 7aea05d7701f..8950e8c919a4 100644
--- a/contrib/llvm/lib/Support/Dwarf.cpp
+++ b/contrib/llvm/lib/Support/Dwarf.cpp
@@ -18,9 +18,10 @@
 using namespace llvm;
 using namespace dwarf;
 
-const char *llvm::dwarf::TagString(unsigned Tag) {
+StringRef llvm::dwarf::TagString(unsigned Tag) {
   switch (Tag) {
-  default: return nullptr;
+  default:
+    return StringRef();
 #define HANDLE_DW_TAG(ID, NAME)                                                \
   case DW_TAG_##NAME:                                                          \
     return "DW_TAG_" #NAME;
@@ -35,240 +36,46 @@ unsigned llvm::dwarf::getTag(StringRef TagString) {
       .Default(DW_TAG_invalid);
 }
 
-const char *llvm::dwarf::ChildrenString(unsigned Children) {
+StringRef llvm::dwarf::ChildrenString(unsigned Children) {
   switch (Children) {
   case DW_CHILDREN_no:                   return "DW_CHILDREN_no";
   case DW_CHILDREN_yes:                  return "DW_CHILDREN_yes";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::AttributeString(unsigned Attribute) {
+StringRef llvm::dwarf::AttributeString(unsigned Attribute) {
   switch (Attribute) {
-  case DW_AT_sibling:                    return "DW_AT_sibling";
-  case DW_AT_location:                   return "DW_AT_location";
-  case DW_AT_name:                       return "DW_AT_name";
-  case DW_AT_ordering:                   return "DW_AT_ordering";
-  case DW_AT_byte_size:                  return "DW_AT_byte_size";
-  case DW_AT_bit_offset:                 return "DW_AT_bit_offset";
-  case DW_AT_bit_size:                   return "DW_AT_bit_size";
-  case DW_AT_stmt_list:                  return "DW_AT_stmt_list";
-  case DW_AT_low_pc:                     return "DW_AT_low_pc";
-  case DW_AT_high_pc:                    return "DW_AT_high_pc";
-  case DW_AT_language:                   return "DW_AT_language";
-  case DW_AT_discr:                      return "DW_AT_discr";
-  case DW_AT_discr_value:                return "DW_AT_discr_value";
-  case DW_AT_visibility:                 return "DW_AT_visibility";
-  case DW_AT_import:                     return "DW_AT_import";
-  case DW_AT_string_length:              return "DW_AT_string_length";
-  case DW_AT_common_reference:           return "DW_AT_common_reference";
-  case DW_AT_comp_dir:                   return "DW_AT_comp_dir";
-  case DW_AT_const_value:                return "DW_AT_const_value";
-  case DW_AT_containing_type:            return "DW_AT_containing_type";
-  case DW_AT_default_value:              return "DW_AT_default_value";
-  case DW_AT_inline:                     return "DW_AT_inline";
-  case DW_AT_is_optional:                return "DW_AT_is_optional";
-  case DW_AT_lower_bound:                return "DW_AT_lower_bound";
-  case DW_AT_producer:                   return "DW_AT_producer";
-  case DW_AT_prototyped:                 return "DW_AT_prototyped";
-  case DW_AT_return_addr:                return "DW_AT_return_addr";
-  case DW_AT_start_scope:                return "DW_AT_start_scope";
-  case DW_AT_bit_stride:                 return "DW_AT_bit_stride";
-  case DW_AT_upper_bound:                return "DW_AT_upper_bound";
-  case DW_AT_abstract_origin:            return "DW_AT_abstract_origin";
-  case DW_AT_accessibility:              return "DW_AT_accessibility";
-  case DW_AT_address_class:              return "DW_AT_address_class";
-  case DW_AT_artificial:                 return "DW_AT_artificial";
-  case DW_AT_base_types:                 return "DW_AT_base_types";
-  case DW_AT_calling_convention:         return "DW_AT_calling_convention";
-  case DW_AT_count:                      return "DW_AT_count";
-  case DW_AT_data_member_location:       return "DW_AT_data_member_location";
-  case DW_AT_decl_column:                return "DW_AT_decl_column";
-  case DW_AT_decl_file:                  return "DW_AT_decl_file";
-  case DW_AT_decl_line:                  return "DW_AT_decl_line";
-  case DW_AT_declaration:                return "DW_AT_declaration";
-  case DW_AT_discr_list:                 return "DW_AT_discr_list";
-  case DW_AT_encoding:                   return "DW_AT_encoding";
-  case DW_AT_external:                   return "DW_AT_external";
-  case DW_AT_frame_base:                 return "DW_AT_frame_base";
-  case DW_AT_friend:                     return "DW_AT_friend";
-  case DW_AT_identifier_case:            return "DW_AT_identifier_case";
-  case DW_AT_macro_info:                 return "DW_AT_macro_info";
-  case DW_AT_namelist_item:              return "DW_AT_namelist_item";
-  case DW_AT_priority:                   return "DW_AT_priority";
-  case DW_AT_segment:                    return "DW_AT_segment";
-  case DW_AT_specification:              return "DW_AT_specification";
-  case DW_AT_static_link:                return "DW_AT_static_link";
-  case DW_AT_type:                       return "DW_AT_type";
-  case DW_AT_use_location:               return "DW_AT_use_location";
-  case DW_AT_variable_parameter:         return "DW_AT_variable_parameter";
-  case DW_AT_virtuality:                 return "DW_AT_virtuality";
-  case DW_AT_vtable_elem_location:       return "DW_AT_vtable_elem_location";
-  case DW_AT_allocated:                  return "DW_AT_allocated";
-  case DW_AT_associated:                 return "DW_AT_associated";
-  case DW_AT_data_location:              return "DW_AT_data_location";
-  case DW_AT_byte_stride:                return "DW_AT_byte_stride";
-  case DW_AT_entry_pc:                   return "DW_AT_entry_pc";
-  case DW_AT_use_UTF8:                   return "DW_AT_use_UTF8";
-  case DW_AT_extension:                  return "DW_AT_extension";
-  case DW_AT_ranges:                     return "DW_AT_ranges";
-  case DW_AT_trampoline:                 return "DW_AT_trampoline";
-  case DW_AT_call_column:                return "DW_AT_call_column";
-  case DW_AT_call_file:                  return "DW_AT_call_file";
-  case DW_AT_call_line:                  return "DW_AT_call_line";
-  case DW_AT_description:                return "DW_AT_description";
-  case DW_AT_binary_scale:               return "DW_AT_binary_scale";
-  case DW_AT_decimal_scale:              return "DW_AT_decimal_scale";
-  case DW_AT_small:                      return "DW_AT_small";
-  case DW_AT_decimal_sign:               return "DW_AT_decimal_sign";
-  case DW_AT_digit_count:                return "DW_AT_digit_count";
-  case DW_AT_picture_string:             return "DW_AT_picture_string";
-  case DW_AT_mutable:                    return "DW_AT_mutable";
-  case DW_AT_threads_scaled:             return "DW_AT_threads_scaled";
-  case DW_AT_explicit:                   return "DW_AT_explicit";
-  case DW_AT_object_pointer:             return "DW_AT_object_pointer";
-  case DW_AT_endianity:                  return "DW_AT_endianity";
-  case DW_AT_elemental:                  return "DW_AT_elemental";
-  case DW_AT_pure:                       return "DW_AT_pure";
-  case DW_AT_recursive:                  return "DW_AT_recursive";
-  case DW_AT_signature:                  return "DW_AT_signature";
-  case DW_AT_main_subprogram:            return "DW_AT_main_subprogram";
-  case DW_AT_data_bit_offset:            return "DW_AT_data_bit_offset";
-  case DW_AT_const_expr:                 return "DW_AT_const_expr";
-  case DW_AT_enum_class:                 return "DW_AT_enum_class";
-  case DW_AT_linkage_name:               return "DW_AT_linkage_name";
-  case DW_AT_string_length_bit_size:     return "DW_AT_string_length_bit_size";
-  case DW_AT_string_length_byte_size:    return "DW_AT_string_length_byte_size";
-  case DW_AT_rank:                       return "DW_AT_rank";
-  case DW_AT_str_offsets_base:           return "DW_AT_str_offsets_base";
-  case DW_AT_addr_base:                  return "DW_AT_addr_base";
-  case DW_AT_ranges_base:                return "DW_AT_ranges_base";
-  case DW_AT_dwo_id:                     return "DW_AT_dwo_id";
-  case DW_AT_dwo_name:                   return "DW_AT_dwo_name";
-  case DW_AT_reference:                  return "DW_AT_reference";
-  case DW_AT_rvalue_reference:           return "DW_AT_rvalue_reference";
-  case DW_AT_MIPS_loop_begin:            return "DW_AT_MIPS_loop_begin";
-  case DW_AT_MIPS_tail_loop_begin:       return "DW_AT_MIPS_tail_loop_begin";
-  case DW_AT_MIPS_epilog_begin:          return "DW_AT_MIPS_epilog_begin";
-  case DW_AT_MIPS_loop_unroll_factor:    return "DW_AT_MIPS_loop_unroll_factor";
-  case DW_AT_MIPS_software_pipeline_depth:
-    return "DW_AT_MIPS_software_pipeline_depth";
-  case DW_AT_MIPS_linkage_name:          return "DW_AT_MIPS_linkage_name";
-  case DW_AT_MIPS_stride:                return "DW_AT_MIPS_stride";
-  case DW_AT_MIPS_abstract_name:         return "DW_AT_MIPS_abstract_name";
-  case DW_AT_MIPS_clone_origin:          return "DW_AT_MIPS_clone_origin";
-  case DW_AT_MIPS_has_inlines:           return "DW_AT_MIPS_has_inlines";
-  case DW_AT_MIPS_stride_byte:           return "DW_AT_MIPS_stride_byte";
-  case DW_AT_MIPS_stride_elem:           return "DW_AT_MIPS_stride_elem";
-  case DW_AT_MIPS_ptr_dopetype:          return "DW_AT_MIPS_ptr_dopetype";
-  case DW_AT_MIPS_allocatable_dopetype:
-    return "DW_AT_MIPS_allocatable_dopetype";
-  case DW_AT_MIPS_assumed_shape_dopetype:
-    return "DW_AT_MIPS_assumed_shape_dopetype";
-  case DW_AT_sf_names:                   return "DW_AT_sf_names";
-  case DW_AT_src_info:                   return "DW_AT_src_info";
-  case DW_AT_mac_info:                   return "DW_AT_mac_info";
-  case DW_AT_src_coords:                 return "DW_AT_src_coords";
-  case DW_AT_body_begin:                 return "DW_AT_body_begin";
-  case DW_AT_body_end:                   return "DW_AT_body_end";
-  case DW_AT_GNU_vector:                 return "DW_AT_GNU_vector";
-  case DW_AT_GNU_template_name:          return "DW_AT_GNU_template_name";
-  case DW_AT_GNU_odr_signature:          return "DW_AT_GNU_odr_signature";
-  case DW_AT_MIPS_assumed_size:          return "DW_AT_MIPS_assumed_size";
-  case DW_AT_lo_user:                    return "DW_AT_lo_user";
-  case DW_AT_hi_user:                    return "DW_AT_hi_user";
-  case DW_AT_BORLAND_property_read:      return "DW_AT_BORLAND_property_read";
-  case DW_AT_BORLAND_property_write:     return "DW_AT_BORLAND_property_write";
-  case DW_AT_BORLAND_property_implements: return "DW_AT_BORLAND_property_implements";
-  case DW_AT_BORLAND_property_index:     return "DW_AT_BORLAND_property_index";
-  case DW_AT_BORLAND_property_default:   return "DW_AT_BORLAND_property_default";
-  case DW_AT_BORLAND_Delphi_unit:        return "DW_AT_BORLAND_Delphi_unit";
-  case DW_AT_BORLAND_Delphi_class:       return "DW_AT_BORLAND_Delphi_class";
-  case DW_AT_BORLAND_Delphi_record:      return "DW_AT_BORLAND_Delphi_record";
-  case DW_AT_BORLAND_Delphi_metaclass:   return "DW_AT_BORLAND_Delphi_metaclass";
-  case DW_AT_BORLAND_Delphi_constructor: return "DW_AT_BORLAND_Delphi_constructor";
-  case DW_AT_BORLAND_Delphi_destructor:  return "DW_AT_BORLAND_Delphi_destructor";
-  case DW_AT_BORLAND_Delphi_anonymous_method: return "DW_AT_BORLAND_Delphi_anonymous_method";
-  case DW_AT_BORLAND_Delphi_interface:   return "DW_AT_BORLAND_Delphi_interface";
-  case DW_AT_BORLAND_Delphi_ABI:         return "DW_AT_BORLAND_Delphi_ABI";
-  case DW_AT_BORLAND_Delphi_return:      return "DW_AT_BORLAND_Delphi_return";
-  case DW_AT_BORLAND_Delphi_frameptr:    return "DW_AT_BORLAND_Delphi_frameptr";
-  case DW_AT_BORLAND_closure:            return "DW_AT_BORLAND_closure";
-  case DW_AT_APPLE_optimized:            return "DW_AT_APPLE_optimized";
-  case DW_AT_APPLE_flags:                return "DW_AT_APPLE_flags";
-  case DW_AT_APPLE_isa:                  return "DW_AT_APPLE_isa";
-  case DW_AT_APPLE_block:                return "DW_AT_APPLE_block";
-  case DW_AT_APPLE_major_runtime_vers:   return "DW_AT_APPLE_major_runtime_vers";
-  case DW_AT_APPLE_runtime_class:        return "DW_AT_APPLE_runtime_class";
-  case DW_AT_APPLE_omit_frame_ptr:       return "DW_AT_APPLE_omit_frame_ptr";
-  case DW_AT_APPLE_property_name:        return "DW_AT_APPLE_property_name";
-  case DW_AT_APPLE_property_getter:      return "DW_AT_APPLE_property_getter";
-  case DW_AT_APPLE_property_setter:      return "DW_AT_APPLE_property_setter";
-  case DW_AT_APPLE_property_attribute:   return "DW_AT_APPLE_property_attribute";
-  case DW_AT_APPLE_property:             return "DW_AT_APPLE_property";
-  case DW_AT_APPLE_objc_complete_type:   return "DW_AT_APPLE_objc_complete_type";
-  case DW_AT_LLVM_include_path:          return "DW_AT_LLVM_include_path";
-  case DW_AT_LLVM_config_macros:         return "DW_AT_LLVM_config_macros";
-  case DW_AT_LLVM_isysroot:              return "DW_AT_LLVM_isysroot";
-
-    // DWARF5 Fission Extension Attribute
-  case DW_AT_GNU_dwo_name:               return "DW_AT_GNU_dwo_name";
-  case DW_AT_GNU_dwo_id:                 return "DW_AT_GNU_dwo_id";
-  case DW_AT_GNU_ranges_base:            return "DW_AT_GNU_ranges_base";
-  case DW_AT_GNU_addr_base:              return "DW_AT_GNU_addr_base";
-  case DW_AT_GNU_pubnames:               return "DW_AT_GNU_pubnames";
-  case DW_AT_GNU_pubtypes:               return "DW_AT_GNU_pubtypes";
-  case DW_AT_GNU_discriminator:          return "DW_AT_GNU_discriminator";
+  default:
+    return StringRef();
+#define HANDLE_DW_AT(ID, NAME)                                                \
+  case DW_AT_##NAME:                                                          \
+    return "DW_AT_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
 }
 
-const char *llvm::dwarf::FormEncodingString(unsigned Encoding) {
+StringRef llvm::dwarf::FormEncodingString(unsigned Encoding) {
   switch (Encoding) {
-  case DW_FORM_addr:                     return "DW_FORM_addr";
-  case DW_FORM_block2:                   return "DW_FORM_block2";
-  case DW_FORM_block4:                   return "DW_FORM_block4";
-  case DW_FORM_data2:                    return "DW_FORM_data2";
-  case DW_FORM_data4:                    return "DW_FORM_data4";
-  case DW_FORM_data8:                    return "DW_FORM_data8";
-  case DW_FORM_string:                   return "DW_FORM_string";
-  case DW_FORM_block:                    return "DW_FORM_block";
-  case DW_FORM_block1:                   return "DW_FORM_block1";
-  case DW_FORM_data1:                    return "DW_FORM_data1";
-  case DW_FORM_flag:                     return "DW_FORM_flag";
-  case DW_FORM_sdata:                    return "DW_FORM_sdata";
-  case DW_FORM_strp:                     return "DW_FORM_strp";
-  case DW_FORM_udata:                    return "DW_FORM_udata";
-  case DW_FORM_ref_addr:                 return "DW_FORM_ref_addr";
-  case DW_FORM_ref1:                     return "DW_FORM_ref1";
-  case DW_FORM_ref2:                     return "DW_FORM_ref2";
-  case DW_FORM_ref4:                     return "DW_FORM_ref4";
-  case DW_FORM_ref8:                     return "DW_FORM_ref8";
-  case DW_FORM_ref_udata:                return "DW_FORM_ref_udata";
-  case DW_FORM_indirect:                 return "DW_FORM_indirect";
-  case DW_FORM_sec_offset:               return "DW_FORM_sec_offset";
-  case DW_FORM_exprloc:                  return "DW_FORM_exprloc";
-  case DW_FORM_flag_present:             return "DW_FORM_flag_present";
-  case DW_FORM_ref_sig8:                 return "DW_FORM_ref_sig8";
-
-    // DWARF5 Fission Extension Forms
-  case DW_FORM_GNU_addr_index:           return "DW_FORM_GNU_addr_index";
-  case DW_FORM_GNU_str_index:            return "DW_FORM_GNU_str_index";
-
-  // Alternate debug sections proposal (output of "dwz" tool).
-  case DW_FORM_GNU_ref_alt:              return "DW_FORM_GNU_ref_alt";
-  case DW_FORM_GNU_strp_alt:             return "DW_FORM_GNU_strp_alt";
+  default:
+    return StringRef();
+#define HANDLE_DW_FORM(ID, NAME)                                                \
+  case DW_FORM_##NAME:                                                          \
+    return "DW_FORM_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
 }
 
-const char *llvm::dwarf::OperationEncodingString(unsigned Encoding) {
+StringRef llvm::dwarf::OperationEncodingString(unsigned Encoding) {
   switch (Encoding) {
-  default: return nullptr;
+  default:
+    return StringRef();
 #define HANDLE_DW_OP(ID, NAME)                                                 \
   case DW_OP_##NAME:                                                           \
     return "DW_OP_" #NAME;
 #include "llvm/Support/Dwarf.def"
+  case DW_OP_LLVM_fragment:
+    return "DW_OP_LLVM_fragment";
   }
 }
 
@@ -276,12 +83,14 @@ unsigned llvm::dwarf::getOperationEncoding(StringRef OperationEncodingString) {
   return StringSwitch<unsigned>(OperationEncodingString)
 #define HANDLE_DW_OP(ID, NAME) .Case("DW_OP_" #NAME, DW_OP_##NAME)
 #include "llvm/Support/Dwarf.def"
+      .Case("DW_OP_LLVM_fragment", DW_OP_LLVM_fragment)
       .Default(0);
 }
 
-const char *llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
+StringRef llvm::dwarf::AttributeEncodingString(unsigned Encoding) {
   switch (Encoding) {
-  default: return nullptr;
+  default:
+    return StringRef();
 #define HANDLE_DW_ATE(ID, NAME)                                                \
   case DW_ATE_##NAME:                                                          \
     return "DW_ATE_" #NAME;
@@ -296,7 +105,7 @@ unsigned llvm::dwarf::getAttributeEncoding(StringRef EncodingString) {
       .Default(0);
 }
 
-const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
+StringRef llvm::dwarf::DecimalSignString(unsigned Sign) {
   switch (Sign) {
   case DW_DS_unsigned:                   return "DW_DS_unsigned";
   case DW_DS_leading_overpunch:          return "DW_DS_leading_overpunch";
@@ -304,10 +113,10 @@ const char *llvm::dwarf::DecimalSignString(unsigned Sign) {
   case DW_DS_leading_separate:           return "DW_DS_leading_separate";
   case DW_DS_trailing_separate:          return "DW_DS_trailing_separate";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::EndianityString(unsigned Endian) {
+StringRef llvm::dwarf::EndianityString(unsigned Endian) {
   switch (Endian) {
   case DW_END_default:                   return "DW_END_default";
   case DW_END_big:                       return "DW_END_big";
@@ -315,32 +124,32 @@ const char *llvm::dwarf::EndianityString(unsigned Endian) {
   case DW_END_lo_user:                   return "DW_END_lo_user";
   case DW_END_hi_user:                   return "DW_END_hi_user";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::AccessibilityString(unsigned Access) {
+StringRef llvm::dwarf::AccessibilityString(unsigned Access) {
   switch (Access) {
   // Accessibility codes
   case DW_ACCESS_public:                 return "DW_ACCESS_public";
   case DW_ACCESS_protected:              return "DW_ACCESS_protected";
   case DW_ACCESS_private:                return "DW_ACCESS_private";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::VisibilityString(unsigned Visibility) {
+StringRef llvm::dwarf::VisibilityString(unsigned Visibility) {
   switch (Visibility) {
   case DW_VIS_local:                     return "DW_VIS_local";
   case DW_VIS_exported:                  return "DW_VIS_exported";
   case DW_VIS_qualified:                 return "DW_VIS_qualified";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::VirtualityString(unsigned Virtuality) {
+StringRef llvm::dwarf::VirtualityString(unsigned Virtuality) {
   switch (Virtuality) {
   default:
-    return nullptr;
+    return StringRef();
 #define HANDLE_DW_VIRTUALITY(ID, NAME)                                         \
   case DW_VIRTUALITY_##NAME:                                                   \
     return "DW_VIRTUALITY_" #NAME;
@@ -356,10 +165,10 @@ unsigned llvm::dwarf::getVirtuality(StringRef VirtualityString) {
       .Default(DW_VIRTUALITY_invalid);
 }
 
-const char *llvm::dwarf::LanguageString(unsigned Language) {
+StringRef llvm::dwarf::LanguageString(unsigned Language) {
   switch (Language) {
   default:
-    return nullptr;
+    return StringRef();
 #define HANDLE_DW_LANG(ID, NAME)                                               \
   case DW_LANG_##NAME:                                                         \
     return "DW_LANG_" #NAME;
@@ -374,20 +183,20 @@ unsigned llvm::dwarf::getLanguage(StringRef LanguageString) {
       .Default(0);
 }
 
-const char *llvm::dwarf::CaseString(unsigned Case) {
+StringRef llvm::dwarf::CaseString(unsigned Case) {
   switch (Case) {
   case DW_ID_case_sensitive:             return "DW_ID_case_sensitive";
   case DW_ID_up_case:                    return "DW_ID_up_case";
   case DW_ID_down_case:                  return "DW_ID_down_case";
   case DW_ID_case_insensitive:           return "DW_ID_case_insensitive";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::ConventionString(unsigned CC) {
+StringRef llvm::dwarf::ConventionString(unsigned CC) {
   switch (CC) {
   default:
-    return nullptr;
+    return StringRef();
 #define HANDLE_DW_CC(ID, NAME)                                               \
   case DW_CC_##NAME:                                                         \
     return "DW_CC_" #NAME;
@@ -402,64 +211,55 @@ unsigned llvm::dwarf::getCallingConvention(StringRef CCString) {
       .Default(0);
 }
 
-const char *llvm::dwarf::InlineCodeString(unsigned Code) {
+StringRef llvm::dwarf::InlineCodeString(unsigned Code) {
   switch (Code) {
   case DW_INL_not_inlined:               return "DW_INL_not_inlined";
   case DW_INL_inlined:                   return "DW_INL_inlined";
   case DW_INL_declared_not_inlined:      return "DW_INL_declared_not_inlined";
   case DW_INL_declared_inlined:          return "DW_INL_declared_inlined";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::ArrayOrderString(unsigned Order) {
+StringRef llvm::dwarf::ArrayOrderString(unsigned Order) {
   switch (Order) {
   case DW_ORD_row_major:                 return "DW_ORD_row_major";
   case DW_ORD_col_major:                 return "DW_ORD_col_major";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::DiscriminantString(unsigned Discriminant) {
+StringRef llvm::dwarf::DiscriminantString(unsigned Discriminant) {
   switch (Discriminant) {
   case DW_DSC_label:                     return "DW_DSC_label";
   case DW_DSC_range:                     return "DW_DSC_range";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::LNStandardString(unsigned Standard) {
+StringRef llvm::dwarf::LNStandardString(unsigned Standard) {
   switch (Standard) {
-  case DW_LNS_copy:                      return "DW_LNS_copy";
-  case DW_LNS_advance_pc:                return "DW_LNS_advance_pc";
-  case DW_LNS_advance_line:              return "DW_LNS_advance_line";
-  case DW_LNS_set_file:                  return "DW_LNS_set_file";
-  case DW_LNS_set_column:                return "DW_LNS_set_column";
-  case DW_LNS_negate_stmt:               return "DW_LNS_negate_stmt";
-  case DW_LNS_set_basic_block:           return "DW_LNS_set_basic_block";
-  case DW_LNS_const_add_pc:              return "DW_LNS_const_add_pc";
-  case DW_LNS_fixed_advance_pc:          return "DW_LNS_fixed_advance_pc";
-  case DW_LNS_set_prologue_end:          return "DW_LNS_set_prologue_end";
-  case DW_LNS_set_epilogue_begin:        return "DW_LNS_set_epilogue_begin";
-  case DW_LNS_set_isa:                   return "DW_LNS_set_isa";
+  default:
+    return StringRef();
+#define HANDLE_DW_LNS(ID, NAME)                                               \
+  case DW_LNS_##NAME:                                                         \
+    return "DW_LNS_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
 }
 
-const char *llvm::dwarf::LNExtendedString(unsigned Encoding) {
+StringRef llvm::dwarf::LNExtendedString(unsigned Encoding) {
   switch (Encoding) {
-  // Line Number Extended Opcode Encodings
-  case DW_LNE_end_sequence:              return "DW_LNE_end_sequence";
-  case DW_LNE_set_address:               return "DW_LNE_set_address";
-  case DW_LNE_define_file:               return "DW_LNE_define_file";
-  case DW_LNE_set_discriminator:         return "DW_LNE_set_discriminator";
-  case DW_LNE_lo_user:                   return "DW_LNE_lo_user";
-  case DW_LNE_hi_user:                   return "DW_LNE_hi_user";
+  default:
+    return StringRef();
+#define HANDLE_DW_LNE(ID, NAME)                                               \
+  case DW_LNE_##NAME:                                                         \
+    return "DW_LNE_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
 }
 
-const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
+StringRef llvm::dwarf::MacinfoString(unsigned Encoding) {
   switch (Encoding) {
   // Macinfo Type Encodings
   case DW_MACINFO_define:                return "DW_MACINFO_define";
@@ -469,7 +269,7 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) {
   case DW_MACINFO_vendor_ext:            return "DW_MACINFO_vendor_ext";
   case DW_MACINFO_invalid:               return "DW_MACINFO_invalid";
   }
-  return nullptr;
+  return StringRef();
 }
 
 unsigned llvm::dwarf::getMacinfo(StringRef MacinfoString) {
@@ -482,80 +282,29 @@ unsigned llvm::dwarf::getMacinfo(StringRef MacinfoString) {
       .Default(DW_MACINFO_invalid);
 }
 
-const char *llvm::dwarf::CallFrameString(unsigned Encoding) {
+StringRef llvm::dwarf::CallFrameString(unsigned Encoding) {
   switch (Encoding) {
-  case DW_CFA_nop:                       return "DW_CFA_nop";
-  case DW_CFA_advance_loc:               return "DW_CFA_advance_loc";
-  case DW_CFA_offset:                    return "DW_CFA_offset";
-  case DW_CFA_restore:                   return "DW_CFA_restore";
-  case DW_CFA_set_loc:                   return "DW_CFA_set_loc";
-  case DW_CFA_advance_loc1:              return "DW_CFA_advance_loc1";
-  case DW_CFA_advance_loc2:              return "DW_CFA_advance_loc2";
-  case DW_CFA_advance_loc4:              return "DW_CFA_advance_loc4";
-  case DW_CFA_offset_extended:           return "DW_CFA_offset_extended";
-  case DW_CFA_restore_extended:          return "DW_CFA_restore_extended";
-  case DW_CFA_undefined:                 return "DW_CFA_undefined";
-  case DW_CFA_same_value:                return "DW_CFA_same_value";
-  case DW_CFA_register:                  return "DW_CFA_register";
-  case DW_CFA_remember_state:            return "DW_CFA_remember_state";
-  case DW_CFA_restore_state:             return "DW_CFA_restore_state";
-  case DW_CFA_def_cfa:                   return "DW_CFA_def_cfa";
-  case DW_CFA_def_cfa_register:          return "DW_CFA_def_cfa_register";
-  case DW_CFA_def_cfa_offset:            return "DW_CFA_def_cfa_offset";
-  case DW_CFA_def_cfa_expression:        return "DW_CFA_def_cfa_expression";
-  case DW_CFA_expression:                return "DW_CFA_expression";
-  case DW_CFA_offset_extended_sf:        return "DW_CFA_offset_extended_sf";
-  case DW_CFA_def_cfa_sf:                return "DW_CFA_def_cfa_sf";
-  case DW_CFA_def_cfa_offset_sf:         return "DW_CFA_def_cfa_offset_sf";
-  case DW_CFA_val_offset:                return "DW_CFA_val_offset";
-  case DW_CFA_val_offset_sf:             return "DW_CFA_val_offset_sf";
-  case DW_CFA_val_expression:            return "DW_CFA_val_expression";
-  case DW_CFA_MIPS_advance_loc8:         return "DW_CFA_MIPS_advance_loc8";
-  case DW_CFA_GNU_window_save:           return "DW_CFA_GNU_window_save";
-  case DW_CFA_GNU_args_size:             return "DW_CFA_GNU_args_size";
-  case DW_CFA_lo_user:                   return "DW_CFA_lo_user";
-  case DW_CFA_hi_user:                   return "DW_CFA_hi_user";
+  default:
+    return StringRef();
+#define HANDLE_DW_CFA(ID, NAME)                                               \
+  case DW_CFA_##NAME:                                                         \
+    return "DW_CFA_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
 }
 
-const char *llvm::dwarf::ApplePropertyString(unsigned Prop) {
+StringRef llvm::dwarf::ApplePropertyString(unsigned Prop) {
   switch (Prop) {
-  case DW_APPLE_PROPERTY_readonly:
-    return "DW_APPLE_PROPERTY_readonly";
-  case DW_APPLE_PROPERTY_getter:
-    return "DW_APPLE_PROPERTY_getter";
-  case DW_APPLE_PROPERTY_assign:
-    return "DW_APPLE_PROPERTY_assign";
-  case DW_APPLE_PROPERTY_readwrite:
-    return "DW_APPLE_PROPERTY_readwrite";
-  case DW_APPLE_PROPERTY_retain:
-    return "DW_APPLE_PROPERTY_retain";
-  case DW_APPLE_PROPERTY_copy:
-    return "DW_APPLE_PROPERTY_copy";
-  case DW_APPLE_PROPERTY_nonatomic:
-    return "DW_APPLE_PROPERTY_nonatomic";
-  case DW_APPLE_PROPERTY_setter:
-    return "DW_APPLE_PROPERTY_setter";
-  case DW_APPLE_PROPERTY_atomic:
-    return "DW_APPLE_PROPERTY_atomic";
-  case DW_APPLE_PROPERTY_weak:
-    return "DW_APPLE_PROPERTY_weak";
-  case DW_APPLE_PROPERTY_strong:
-    return "DW_APPLE_PROPERTY_strong";
-  case DW_APPLE_PROPERTY_unsafe_unretained:
-    return "DW_APPLE_PROPERTY_unsafe_unretained";
-  case DW_APPLE_PROPERTY_nullability:
-    return "DW_APPLE_PROPERTY_nullability";
-  case DW_APPLE_PROPERTY_null_resettable:
-    return "DW_APPLE_PROPERTY_null_resettable";
-  case DW_APPLE_PROPERTY_class:
-    return "DW_APPLE_PROPERTY_class";
+  default:
+    return StringRef();
+#define HANDLE_DW_APPLE_PROPERTY(ID, NAME)                                               \
+  case DW_APPLE_PROPERTY_##NAME:                                                         \
+    return "DW_APPLE_PROPERTY_" #NAME;
+#include "llvm/Support/Dwarf.def"
   }
-  return nullptr;
 }
 
-const char *llvm::dwarf::AtomTypeString(unsigned AT) {
+StringRef llvm::dwarf::AtomTypeString(unsigned AT) {
   switch (AT) {
   case dwarf::DW_ATOM_null:
     return "DW_ATOM_null";
@@ -568,10 +317,10 @@ const char *llvm::dwarf::AtomTypeString(unsigned AT) {
   case DW_ATOM_type_flags:
     return "DW_ATOM_type_flags";
   }
-  return nullptr;
+  return StringRef();
 }
 
-const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) {
+StringRef llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) {
   switch (Kind) {
   case GIEK_NONE:
     return "NONE";
@@ -593,7 +342,8 @@ const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) {
   llvm_unreachable("Unknown GDBIndexEntryKind value");
 }
 
-const char *llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage) {
+StringRef
+llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage) {
   switch (Linkage) {
   case GIEL_EXTERNAL:
     return "EXTERNAL";
@@ -603,7 +353,7 @@ const char *llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage
   llvm_unreachable("Unknown GDBIndexEntryLinkage value");
 }
 
-const char *llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
+StringRef llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
   switch (Attr) {
   case DW_AT_accessibility:
     return AccessibilityString(Val);
@@ -631,5 +381,5 @@ const char *llvm::dwarf::AttributeValueString(uint16_t Attr, unsigned Val) {
     return DiscriminantString(Val);
   }
 
-  return nullptr;
+  return StringRef();
 }
diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp
index 9a7aeb50a216..ced21e46afe8 100644
--- a/contrib/llvm/lib/Support/DynamicLibrary.cpp
+++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp
@@ -41,7 +41,7 @@ char llvm::sys::DynamicLibrary::Invalid = 0;
 
 #else
 
-#if HAVE_DLFCN_H
+#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN)
 #include <dlfcn.h>
 using namespace llvm;
 using namespace llvm::sys;
@@ -119,7 +119,7 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) {
       return i->second;
   }
 
-#if HAVE_DLFCN_H
+#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN)
   // Now search the libraries.
   if (OpenedHandles) {
     for (DenseSet<void *>::iterator I = OpenedHandles->begin(),
diff --git a/contrib/llvm/lib/Support/Error.cpp b/contrib/llvm/lib/Support/Error.cpp
index 6b22691eb1ad..4730c0b26ba0 100644
--- a/contrib/llvm/lib/Support/Error.cpp
+++ b/contrib/llvm/lib/Support/Error.cpp
@@ -11,6 +11,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include <system_error>
 
 
 using namespace llvm;
@@ -27,7 +28,7 @@ namespace {
   // deal with the Error value directly, rather than converting to error_code.
   class ErrorErrorCategory : public std::error_category {
   public:
-    const char *name() const LLVM_NOEXCEPT override { return "Error"; }
+    const char *name() const noexcept override { return "Error"; }
 
     std::string message(int condition) const override {
       switch (static_cast<ErrorErrorCode>(condition)) {
@@ -111,3 +112,18 @@ void report_fatal_error(Error Err, bool GenCrashDiag) {
 }
 
 }
+
+#ifndef _MSC_VER
+namespace llvm {
+
+// One of these two variables will be referenced by a symbol defined in
+// llvm-config.h. We provide a link-time (or load time for DSO) failure when
+// there is a mismatch in the build configuration of the API client and LLVM.
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+int EnableABIBreakingChecks;
+#else
+int DisableABIBreakingChecks;
+#endif
+
+} // end namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
index 651e679f2cb5..2c7bf0435d88 100644
--- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp
+++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp
@@ -32,6 +32,9 @@ FileOutputBuffer::FileOutputBuffer(std::unique_ptr<mapped_file_region> R,
     : Region(std::move(R)), FinalPath(Path), TempPath(TmpPath) {}
 
 FileOutputBuffer::~FileOutputBuffer() {
+  // Close the mapping before deleting the temp file, so that the removal
+  // succeeds.
+  Region.reset();
   sys::fs::remove(Twine(TempPath));
 }
 
diff --git a/contrib/llvm/lib/Support/FileUtilities.cpp b/contrib/llvm/lib/Support/FileUtilities.cpp
index c6a58cc9d038..39dbefff5b70 100644
--- a/contrib/llvm/lib/Support/FileUtilities.cpp
+++ b/contrib/llvm/lib/Support/FileUtilities.cpp
@@ -14,13 +14,17 @@
 
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cctype>
+#include <cmath>
+#include <cstdint>
 #include <cstdlib>
 #include <cstring>
+#include <memory>
 #include <system_error>
+
 using namespace llvm;
 
 static bool isSignedChar(char C) {
@@ -215,7 +219,7 @@ int llvm::DiffFilesWithTolerance(StringRef NameA,
   }
 
   bool CompareFailed = false;
-  while (1) {
+  while (true) {
     // Scan for the end of file or next difference.
     while (F1P < File1End && F2P < File2End && *F1P == *F2P) {
       ++F1P;
diff --git a/contrib/llvm/lib/Support/FoldingSet.cpp b/contrib/llvm/lib/Support/FoldingSet.cpp
index 52baf865d83e..c9bca7f4c1ab 100644
--- a/contrib/llvm/lib/Support/FoldingSet.cpp
+++ b/contrib/llvm/lib/Support/FoldingSet.cpp
@@ -54,8 +54,9 @@ void FoldingSetNodeID::AddPointer(const void *Ptr) {
   // depend on the host. It doesn't matter, however, because hashing on
   // pointer values is inherently unstable. Nothing should depend on the
   // ordering of nodes in the folding set.
-  Bits.append(reinterpret_cast<unsigned *>(&Ptr),
-              reinterpret_cast<unsigned *>(&Ptr+1));
+  static_assert(sizeof(uintptr_t) <= sizeof(unsigned long long),
+                "unexpected pointer size");
+  AddInteger(reinterpret_cast<uintptr_t>(Ptr));
 }
 void FoldingSetNodeID::AddInteger(signed I) {
   Bits.push_back(I);
@@ -80,8 +81,7 @@ void FoldingSetNodeID::AddInteger(long long I) {
 }
 void FoldingSetNodeID::AddInteger(unsigned long long I) {
   AddInteger(unsigned(I));
-  if ((uint64_t)(unsigned)I != I)
-    Bits.push_back(unsigned(I >> 32));
+  AddInteger(unsigned(I >> 32));
 }
 
 void FoldingSetNodeID::AddString(StringRef String) {
@@ -127,8 +127,8 @@ void FoldingSetNodeID::AddString(StringRef String) {
   // Pos will have overshot size by 4 - #bytes left over.
   // No need to take endianness into account here - this is always executed.
   switch (Pos - Size) {
-  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; // Fall thru.
-  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; // Fall thru.
+  case 1: V = (V << 8) | (unsigned char)String[Size - 3]; LLVM_FALLTHROUGH;
+  case 2: V = (V << 8) | (unsigned char)String[Size - 2]; LLVM_FALLTHROUGH;
   case 3: V = (V << 8) | (unsigned char)String[Size - 1]; break;
   default: return; // Nothing left.
   }
diff --git a/contrib/llvm/lib/Support/FormatVariadic.cpp b/contrib/llvm/lib/Support/FormatVariadic.cpp
new file mode 100644
index 000000000000..de61dae814b5
--- /dev/null
+++ b/contrib/llvm/lib/Support/FormatVariadic.cpp
@@ -0,0 +1,156 @@
+//===- FormatVariadic.cpp - Format string parsing and analysis ----*-C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/FormatVariadic.h"
+
+using namespace llvm;
+
+static Optional<AlignStyle> translateLocChar(char C) {
+  switch (C) {
+  case '-':
+    return AlignStyle::Left;
+  case '=':
+    return AlignStyle::Center;
+  case '+':
+    return AlignStyle::Right;
+  default:
+    return None;
+  }
+  LLVM_BUILTIN_UNREACHABLE;
+}
+
+bool formatv_object_base::consumeFieldLayout(StringRef &Spec, AlignStyle &Where,
+                                             size_t &Align, char &Pad) {
+  Where = AlignStyle::Right;
+  Align = 0;
+  Pad = ' ';
+  if (Spec.empty())
+    return true;
+
+  if (Spec.size() > 1) {
+    // A maximum of 2 characters at the beginning can be used for something
+    // other
+    // than the width.
+    // If Spec[1] is a loc char, then Spec[0] is a pad char and Spec[2:...]
+    // contains the width.
+    // Otherwise, if Spec[0] is a loc char, then Spec[1:...] contains the width.
+    // Otherwise, Spec[0:...] contains the width.
+    if (auto Loc = translateLocChar(Spec[1])) {
+      Pad = Spec[0];
+      Where = *Loc;
+      Spec = Spec.drop_front(2);
+    } else if (auto Loc = translateLocChar(Spec[0])) {
+      Where = *Loc;
+      Spec = Spec.drop_front(1);
+    }
+  }
+
+  bool Failed = Spec.consumeInteger(0, Align);
+  return !Failed;
+}
+
+Optional<ReplacementItem>
+formatv_object_base::parseReplacementItem(StringRef Spec) {
+  StringRef RepString = Spec.trim("{}");
+
+  // If the replacement sequence does not start with a non-negative integer,
+  // this is an error.
+  char Pad = ' ';
+  std::size_t Align = 0;
+  AlignStyle Where = AlignStyle::Right;
+  StringRef Options;
+  size_t Index = 0;
+  RepString = RepString.trim();
+  if (RepString.consumeInteger(0, Index)) {
+    assert(false && "Invalid replacement sequence index!");
+    return ReplacementItem{};
+  }
+  RepString = RepString.trim();
+  if (!RepString.empty() && RepString.front() == ',') {
+    RepString = RepString.drop_front();
+    if (!consumeFieldLayout(RepString, Where, Align, Pad))
+      assert(false && "Invalid replacement field layout specification!");
+  }
+  RepString = RepString.trim();
+  if (!RepString.empty() && RepString.front() == ':') {
+    Options = RepString.drop_front().trim();
+    RepString = StringRef();
+  }
+  RepString = RepString.trim();
+  if (!RepString.empty()) {
+    assert(false && "Unexpected characters found in replacement string!");
+  }
+
+  return ReplacementItem{Spec, Index, Align, Where, Pad, Options};
+}
+
+std::pair<ReplacementItem, StringRef>
+formatv_object_base::splitLiteralAndReplacement(StringRef Fmt) {
+  StringRef Rep;
+  StringRef Remainder;
+  std::size_t From = 0;
+  while (From < Fmt.size() && From != StringRef::npos) {
+    std::size_t BO = Fmt.find_first_of('{', From);
+    // Everything up until the first brace is a literal.
+    if (BO != 0)
+      return std::make_pair(ReplacementItem{Fmt.substr(0, BO)}, Fmt.substr(BO));
+
+    StringRef Braces =
+        Fmt.drop_front(BO).take_while([](char C) { return C == '{'; });
+    // If there is more than one brace, then some of them are escaped.  Treat
+    // these as replacements.
+    if (Braces.size() > 1) {
+      size_t NumEscapedBraces = Braces.size() / 2;
+      StringRef Middle = Fmt.substr(BO, NumEscapedBraces);
+      StringRef Right = Fmt.drop_front(BO + NumEscapedBraces * 2);
+      return std::make_pair(ReplacementItem{Middle}, Right);
+    }
+    // An unterminated open brace is undefined.  We treat the rest of the string
+    // as a literal replacement, but we assert to indicate that this is
+    // undefined and that we consider it an error.
+    std::size_t BC = Fmt.find_first_of('}', BO);
+    if (BC == StringRef::npos) {
+      assert(
+          false &&
+          "Unterminated brace sequence.  Escape with {{ for a literal brace.");
+      return std::make_pair(ReplacementItem{Fmt}, StringRef());
+    }
+
+    // Even if there is a closing brace, if there is another open brace before
+    // this closing brace, treat this portion as literal, and try again with the
+    // next one.
+    std::size_t BO2 = Fmt.find_first_of('{', BO + 1);
+    if (BO2 < BC)
+      return std::make_pair(ReplacementItem{Fmt.substr(0, BO2)},
+                            Fmt.substr(BO2));
+
+    StringRef Spec = Fmt.slice(BO + 1, BC);
+    StringRef Right = Fmt.substr(BC + 1);
+
+    auto RI = parseReplacementItem(Spec);
+    if (RI.hasValue())
+      return std::make_pair(*RI, Right);
+
+    // If there was an error parsing the replacement item, treat it as an
+    // invalid replacement spec, and just continue.
+    From = BC + 1;
+  }
+  return std::make_pair(ReplacementItem{Fmt}, StringRef());
+}
+
+std::vector<ReplacementItem>
+formatv_object_base::parseFormatString(StringRef Fmt) {
+  std::vector<ReplacementItem> Replacements;
+  ReplacementItem I;
+  while (!Fmt.empty()) {
+    std::tie(I, Fmt) = splitLiteralAndReplacement(Fmt);
+    if (I.Type != ReplacementType::Empty)
+      Replacements.push_back(I);
+  }
+  return Replacements;
+}
diff --git a/contrib/llvm/lib/Support/GlobPattern.cpp b/contrib/llvm/lib/Support/GlobPattern.cpp
new file mode 100644
index 000000000000..8ee2febeeea1
--- /dev/null
+++ b/contrib/llvm/lib/Support/GlobPattern.cpp
@@ -0,0 +1,167 @@
+//===-- GlobPattern.cpp - Glob pattern matcher implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a glob pattern matcher.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/GlobPattern.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+
+using namespace llvm;
+
+static bool hasWildcard(StringRef S) {
+  return S.find_first_of("?*[") != StringRef::npos;
+}
+
+// Expands character ranges and returns a bitmap.
+// For example, "a-cf-hz" is expanded to "abcfghz".
+static Expected<BitVector> expand(StringRef S, StringRef Original) {
+  BitVector BV(256, false);
+
+  // Expand X-Y.
+  for (;;) {
+    if (S.size() < 3)
+      break;
+
+    // If it doesn't start with something like X-Y,
+    // consume the first character and proceed.
+    if (S[1] != '-') {
+      BV[S[0]] = true;
+      S = S.substr(1);
+      continue;
+    }
+
+    // It must be in the form of X-Y.
+    // Validate it and then interpret the range.
+    if (S[0] > S[2])
+      return make_error<StringError>("invalid glob pattern: " + Original,
+                                     errc::invalid_argument);
+
+    for (int C = S[0]; C <= S[2]; ++C)
+      BV[C] = true;
+    S = S.substr(3);
+  }
+
+  for (char C : S)
+    BV[C] = true;
+  return BV;
+}
+
+// This is a scanner for the glob pattern.
+// A glob pattern token is one of "*", "?", "[<chars>]", "[^<chars>]"
+// (which is a negative form of "[<chars>]"), or a non-meta character.
+// This function returns the first token in S.
+static Expected<BitVector> scan(StringRef &S, StringRef Original) {
+  switch (S[0]) {
+  case '*':
+    S = S.substr(1);
+    // '*' is represented by an empty bitvector.
+    // All other bitvectors are 256-bit long.
+    return BitVector();
+  case '?':
+    S = S.substr(1);
+    return BitVector(256, true);
+  case '[': {
+    size_t End = S.find(']', 1);
+    if (End == StringRef::npos)
+      return make_error<StringError>("invalid glob pattern: " + Original,
+                                     errc::invalid_argument);
+
+    StringRef Chars = S.substr(1, End - 1);
+    S = S.substr(End + 1);
+    if (Chars.startswith("^")) {
+      Expected<BitVector> BV = expand(Chars.substr(1), Original);
+      if (!BV)
+        return BV.takeError();
+      return BV->flip();
+    }
+    return expand(Chars, Original);
+  }
+  default:
+    BitVector BV(256, false);
+    BV[S[0]] = true;
+    S = S.substr(1);
+    return BV;
+  }
+}
+
+Expected<GlobPattern> GlobPattern::create(StringRef S) {
+  GlobPattern Pat;
+
+  // S doesn't contain any metacharacter,
+  // so the regular string comparison should work.
+  if (!hasWildcard(S)) {
+    Pat.Exact = S;
+    return Pat;
+  }
+
+  // S is something like "foo*". We can use startswith().
+  if (S.endswith("*") && !hasWildcard(S.drop_back())) {
+    Pat.Prefix = S.drop_back();
+    return Pat;
+  }
+
+  // S is something like "*foo". We can use endswith().
+  if (S.startswith("*") && !hasWildcard(S.drop_front())) {
+    Pat.Suffix = S.drop_front();
+    return Pat;
+  }
+
+  // Otherwise, we need to do real glob pattern matching.
+  // Parse the pattern now.
+  StringRef Original = S;
+  while (!S.empty()) {
+    Expected<BitVector> BV = scan(S, Original);
+    if (!BV)
+      return BV.takeError();
+    Pat.Tokens.push_back(*BV);
+  }
+  return Pat;
+}
+
+bool GlobPattern::match(StringRef S) const {
+  if (Exact)
+    return S == *Exact;
+  if (Prefix)
+    return S.startswith(*Prefix);
+  if (Suffix)
+    return S.endswith(*Suffix);
+  return matchOne(Tokens, S);
+}
+
+// Runs glob pattern Pats against string S.
+bool GlobPattern::matchOne(ArrayRef<BitVector> Pats, StringRef S) const {
+  for (;;) {
+    if (Pats.empty())
+      return S.empty();
+
+    // If Pats[0] is '*', try to match Pats[1..] against all possible
+    // tail strings of S to see at least one pattern succeeds.
+    if (Pats[0].size() == 0) {
+      Pats = Pats.slice(1);
+      if (Pats.empty())
+        // Fast path. If a pattern is '*', it matches anything.
+        return true;
+      for (size_t I = 0, E = S.size(); I < E; ++I)
+        if (matchOne(Pats, S.substr(I)))
+          return true;
+      return false;
+    }
+
+    // If Pats[0] is not '*', it must consume one character.
+    if (S.empty() || !Pats[0][S[0]])
+      return false;
+    Pats = Pats.slice(1);
+    S = S.substr(1);
+  }
+}
diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp
index 12ac19d3be8c..dd19eee15f62 100644
--- a/contrib/llvm/lib/Support/Host.cpp
+++ b/contrib/llvm/lib/Support/Host.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/Host.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
@@ -19,7 +20,9 @@
 #include "llvm/Config/config.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
+#include <assert.h>
 #include <string.h>
 
 // Include the platform-specific parts of this class.
@@ -69,9 +72,8 @@ static ssize_t LLVM_ATTRIBUTE_UNUSED readCpuInfo(void *Buf, size_t Size) {
 }
 #endif
 
-#if defined(i386) || defined(__i386__) || defined(__x86__) ||                  \
-    defined(_M_IX86) || defined(__x86_64__) || defined(_M_AMD64) ||            \
-    defined(_M_X64)
+#if defined(__i386__) || defined(_M_IX86) || \
+    defined(__x86_64__) || defined(_M_X64)
 
 enum VendorSignatures {
   SIG_INTEL = 0x756e6547 /* Genu */,
@@ -169,30 +171,63 @@ enum ProcessorFeatures {
   FEATURE_EM64T
 };
 
+// The check below for i386 was copied from clang's cpuid.h (__get_cpuid_max).
+// Check motivated by bug reports for OpenSSL crashing on CPUs without CPUID
+// support. Consequently, for i386, the presence of CPUID is checked first
+// via the corresponding eflags bit.
+// Removal of cpuid.h header motivated by PR30384
+// Header cpuid.h and method __get_cpuid_max are not used in llvm, clang, openmp
+// or test-suite, but are used in external projects e.g. libstdcxx
+static bool isCpuIdSupported() {
+#if defined(__GNUC__) || defined(__clang__)
+#if defined(__i386__)
+  int __cpuid_supported;
+  __asm__("  pushfl\n"
+          "  popl   %%eax\n"
+          "  movl   %%eax,%%ecx\n"
+          "  xorl   $0x00200000,%%eax\n"
+          "  pushl  %%eax\n"
+          "  popfl\n"
+          "  pushfl\n"
+          "  popl   %%eax\n"
+          "  movl   $0,%0\n"
+          "  cmpl   %%eax,%%ecx\n"
+          "  je     1f\n"
+          "  movl   $1,%0\n"
+          "1:"
+          : "=r"(__cpuid_supported)
+          :
+          : "eax", "ecx");
+  if (!__cpuid_supported)
+    return false;
+#endif
+  return true;
+#endif
+  return true;
+}
+
 /// getX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in
 /// the specified arguments.  If we can't run cpuid on the host, return true.
 static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
                                unsigned *rECX, unsigned *rEDX) {
+#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
 #if defined(__GNUC__) || defined(__clang__)
-#if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-  // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
-  asm("movq\t%%rbx, %%rsi\n\t"
-      "cpuid\n\t"
-      "xchgq\t%%rbx, %%rsi\n\t"
-      : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
-      : "a"(value));
-  return false;
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-  asm("movl\t%%ebx, %%esi\n\t"
-      "cpuid\n\t"
-      "xchgl\t%%ebx, %%esi\n\t"
-      : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
-      : "a"(value));
-  return false;
-// pedantic #else returns to appease -Wunreachable-code (so we don't generate
-// postprocessed code that looks like "return true; return false;")
+#if defined(__x86_64__)
+  // gcc doesn't know cpuid would clobber ebx/rbx. Preserve it manually.
+  // FIXME: should we save this for Clang?
+  __asm__("movq\t%%rbx, %%rsi\n\t"
+          "cpuid\n\t"
+          "xchgq\t%%rbx, %%rsi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value));
+#elif defined(__i386__)
+  __asm__("movl\t%%ebx, %%esi\n\t"
+          "cpuid\n\t"
+          "xchgl\t%%ebx, %%esi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value));
 #else
-  return true;
+  assert(0 && "This method is defined only for x86.");
 #endif
 #elif defined(_MSC_VER)
   // The MSVC intrinsic is portable across x86 and x64.
@@ -202,6 +237,7 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
   *rEBX = registers[1];
   *rECX = registers[2];
   *rEDX = registers[3];
+#endif
   return false;
 #else
   return true;
@@ -214,15 +250,16 @@ static bool getX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
 static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
                                  unsigned *rEAX, unsigned *rEBX, unsigned *rECX,
                                  unsigned *rEDX) {
-#if defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64)
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
+#if defined(__x86_64__) || defined(_M_X64)
+#if defined(__GNUC__) || defined(__clang__)
   // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
-  asm("movq\t%%rbx, %%rsi\n\t"
-      "cpuid\n\t"
-      "xchgq\t%%rbx, %%rsi\n\t"
-      : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
-      : "a"(value), "c"(subleaf));
-  return false;
+  // FIXME: should we save this for Clang?
+  __asm__("movq\t%%rbx, %%rsi\n\t"
+          "cpuid\n\t"
+          "xchgq\t%%rbx, %%rsi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
 #elif defined(_MSC_VER)
   int registers[4];
   __cpuidex(registers, value, subleaf);
@@ -230,18 +267,14 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
   *rEBX = registers[1];
   *rECX = registers[2];
   *rEDX = registers[3];
-  return false;
-#else
-  return true;
 #endif
-#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
-#if defined(__GNUC__)
-  asm("movl\t%%ebx, %%esi\n\t"
-      "cpuid\n\t"
-      "xchgl\t%%ebx, %%esi\n\t"
-      : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
-      : "a"(value), "c"(subleaf));
-  return false;
+#elif defined(__i386__) || defined(_M_IX86)
+#if defined(__GNUC__) || defined(__clang__)
+  __asm__("movl\t%%ebx, %%esi\n\t"
+          "cpuid\n\t"
+          "xchgl\t%%ebx, %%esi\n\t"
+          : "=a"(*rEAX), "=S"(*rEBX), "=c"(*rECX), "=d"(*rEDX)
+          : "a"(value), "c"(subleaf));
 #elif defined(_MSC_VER)
   __asm {
       mov   eax,value
@@ -256,17 +289,18 @@ static bool getX86CpuIDAndInfoEx(unsigned value, unsigned subleaf,
       mov   esi,rEDX
       mov   dword ptr [esi],edx
   }
-  return false;
+#endif
 #else
-  return true;
+  assert(0 && "This method is defined only for x86.");
 #endif
+  return false;
 #else
   return true;
 #endif
 }
 
 static bool getX86XCR0(unsigned *rEAX, unsigned *rEDX) {
-#if defined(__GNUC__)
+#if defined(__GNUC__) || defined(__clang__)
   // Check xgetbv; this uses a .byte sequence instead of the instruction
   // directly because older assemblers do not include support for xgetbv and
   // there is no easy way to conditionally compile based on the assembler used.
@@ -743,6 +777,14 @@ StringRef sys::getHostCPUName() {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   unsigned MaxLeaf, Vendor;
 
+#if defined(__GNUC__) || defined(__clang__)
+  //FIXME: include cpuid.h from clang or copy __get_cpuid_max here
+  // and simplify it to not invoke __cpuid (like cpu_model.c in
+  // compiler-rt/lib/builtins/cpu_model.c?
+  // Opting for the second option.
+  if(!isCpuIdSupported())
+    return "generic";
+#endif
   if (getX86CpuIDAndInfo(0, &MaxLeaf, &Vendor, &ECX, &EDX))
     return "generic";
   if (getX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
@@ -1148,9 +1190,81 @@ StringRef sys::getHostCPUName() {
 StringRef sys::getHostCPUName() { return "generic"; }
 #endif
 
-#if defined(i386) || defined(__i386__) || defined(__x86__) ||                  \
-    defined(_M_IX86) || defined(__x86_64__) || defined(_M_AMD64) ||            \
-    defined(_M_X64)
+#if defined(__linux__) && defined(__x86_64__)
+// On Linux, the number of physical cores can be computed from /proc/cpuinfo,
+// using the number of unique physical/core id pairs. The following
+// implementation reads the /proc/cpuinfo format on an x86_64 system.
+static int computeHostNumPhysicalCores() {
+  // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
+  // mmapped because it appears to have 0 size.
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
+      llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
+  if (std::error_code EC = Text.getError()) {
+    llvm::errs() << "Can't read "
+                 << "/proc/cpuinfo: " << EC.message() << "\n";
+  }
+  SmallVector<StringRef, 8> strs;
+  (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
+                             /*KeepEmpty=*/false);
+  int CurPhysicalId = -1;
+  int CurCoreId = -1;
+  SmallSet<std::pair<int, int>, 32> UniqueItems;
+  for (auto &Line : strs) {
+    Line = Line.trim();
+    if (!Line.startswith("physical id") && !Line.startswith("core id"))
+      continue;
+    std::pair<StringRef, StringRef> Data = Line.split(':');
+    auto Name = Data.first.trim();
+    auto Val = Data.second.trim();
+    if (Name == "physical id") {
+      assert(CurPhysicalId == -1 &&
+             "Expected a core id before seeing another physical id");
+      Val.getAsInteger(10, CurPhysicalId);
+    }
+    if (Name == "core id") {
+      assert(CurCoreId == -1 &&
+             "Expected a physical id before seeing another core id");
+      Val.getAsInteger(10, CurCoreId);
+    }
+    if (CurPhysicalId != -1 && CurCoreId != -1) {
+      UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId));
+      CurPhysicalId = -1;
+      CurCoreId = -1;
+    }
+  }
+  return UniqueItems.size();
+}
+#elif defined(__APPLE__) && defined(__x86_64__)
+#include <sys/param.h>
+#include <sys/sysctl.h>
+
+// Gets the number of *physical cores* on the machine.
+static int computeHostNumPhysicalCores() {
+  uint32_t count;
+  size_t len = sizeof(count);
+  sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
+  if (count < 1) {
+    int nm[2];
+    nm[0] = CTL_HW;
+    nm[1] = HW_AVAILCPU;
+    sysctl(nm, 2, &count, &len, NULL, 0);
+    if (count < 1)
+      return -1;
+  }
+  return count;
+}
+#else
+// On other systems, return -1 to indicate unknown.
+static int computeHostNumPhysicalCores() { return -1; }
+#endif
+
+int sys::getHostNumPhysicalCores() {
+  static int NumCores = computeHostNumPhysicalCores();
+  return NumCores;
+}
+
+#if defined(__i386__) || defined(_M_IX86) || \
+    defined(__x86_64__) || defined(_M_X64)
 bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
   unsigned MaxLevel;
diff --git a/contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp b/contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp
deleted file mode 100644
index a8b45593ae70..000000000000
--- a/contrib/llvm/lib/Support/IntrusiveRefCntPtr.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//== IntrusiveRefCntPtr.cpp - Smart Refcounting Pointer ----------*- C++ -*-==//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/ADT/IntrusiveRefCntPtr.h"
-
-using namespace llvm;
-
-void RefCountedBaseVPTR::anchor() { }
diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp
index 611f94a9709b..444aaa37c8c8 100644
--- a/contrib/llvm/lib/Support/LockFileManager.cpp
+++ b/contrib/llvm/lib/Support/LockFileManager.cpp
@@ -6,13 +6,22 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+
 #include "llvm/Support/LockFileManager.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Errc.h"
+#include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Signals.h"
+#include <cerrno>
+#include <ctime>
+#include <memory>
+#include <tuple>
+#include <system_error>
 #include <sys/stat.h>
 #include <sys/types.h>
 #if LLVM_ON_WIN32
@@ -31,6 +40,7 @@
 #if USE_OSX_GETHOSTUUID
 #include <uuid/uuid.h>
 #endif
+
 using namespace llvm;
 
 /// \brief Attempt to read the lock file with the given name, if it exists.
@@ -112,6 +122,7 @@ bool LockFileManager::processStillExecuting(StringRef HostID, int PID) {
 }
 
 namespace {
+
 /// An RAII helper object ensure that the unique lock file is removed.
 ///
 /// Ensures that if there is an error or a signal before we finish acquiring the
@@ -127,6 +138,7 @@ public:
   : Filename(Name), RemoveImmediately(true) {
     sys::RemoveFileOnSignal(Filename, nullptr);
   }
+
   ~RemoveUniqueLockFileOnSignal() {
     if (!RemoveImmediately) {
       // Leave the signal handler enabled. It will be removed when the lock is
@@ -136,8 +148,10 @@ public:
     sys::fs::remove(Filename);
     sys::DontRemoveFileOnSignal(Filename);
   }
+
   void lockAcquired() { RemoveImmediately = false; }
 };
+
 } // end anonymous namespace
 
 LockFileManager::LockFileManager(StringRef FileName)
@@ -202,7 +216,7 @@ LockFileManager::LockFileManager(StringRef FileName)
   // held since the .lock symlink will point to a nonexistent file.
   RemoveUniqueLockFileOnSignal RemoveUniqueFile(UniqueLockFileName);
 
-  while (1) {
+  while (true) {
     // Create a link from the lock file name. If this succeeds, we're done.
     std::error_code EC =
         sys::fs::create_link(UniqueLockFileName, LockFileName);
diff --git a/contrib/llvm/lib/Support/MD5.cpp b/contrib/llvm/lib/Support/MD5.cpp
index ceab580984d4..942571eab0f3 100644
--- a/contrib/llvm/lib/Support/MD5.cpp
+++ b/contrib/llvm/lib/Support/MD5.cpp
@@ -248,33 +248,15 @@ void MD5::final(MD5Result &Result) {
   memset(&buffer[used], 0, free - 8);
 
   lo <<= 3;
-  buffer[56] = lo;
-  buffer[57] = lo >> 8;
-  buffer[58] = lo >> 16;
-  buffer[59] = lo >> 24;
-  buffer[60] = hi;
-  buffer[61] = hi >> 8;
-  buffer[62] = hi >> 16;
-  buffer[63] = hi >> 24;
+  support::endian::write32le(&buffer[56], lo);
+  support::endian::write32le(&buffer[60], hi);
 
   body(makeArrayRef(buffer, 64));
 
-  Result[0] = a;
-  Result[1] = a >> 8;
-  Result[2] = a >> 16;
-  Result[3] = a >> 24;
-  Result[4] = b;
-  Result[5] = b >> 8;
-  Result[6] = b >> 16;
-  Result[7] = b >> 24;
-  Result[8] = c;
-  Result[9] = c >> 8;
-  Result[10] = c >> 16;
-  Result[11] = c >> 24;
-  Result[12] = d;
-  Result[13] = d >> 8;
-  Result[14] = d >> 16;
-  Result[15] = d >> 24;
+  support::endian::write32le(&Result[0], a);
+  support::endian::write32le(&Result[4], b);
+  support::endian::write32le(&Result[8], c);
+  support::endian::write32le(&Result[12], d);
 }
 
 void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
@@ -283,4 +265,14 @@ void MD5::stringifyResult(MD5Result &Result, SmallString<32> &Str) {
     Res << format("%.2x", Result[i]);
 }
 
+std::array<uint8_t, 16> MD5::hash(ArrayRef<uint8_t> Data) {
+  MD5 Hash;
+  Hash.update(Data);
+  MD5::MD5Result Res;
+  Hash.final(Res);
+
+  std::array<uint8_t, 16> Arr;
+  memcpy(Arr.data(), Res, sizeof(Res));
+  return Arr;
+}
 }
diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp
index b935cbf1ae7a..a3a18c9283ce 100644
--- a/contrib/llvm/lib/Support/MemoryBuffer.cpp
+++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp
@@ -90,9 +90,9 @@ public:
   /// tail-allocated data.
   void operator delete(void *p) { ::operator delete(p); }
 
-  const char *getBufferIdentifier() const override {
-     // The name is stored after the class itself.
-    return reinterpret_cast<const char*>(this + 1);
+  StringRef getBufferIdentifier() const override {
+    // The name is stored after the class itself.
+    return StringRef(reinterpret_cast<const char *>(this + 1));
   }
 
   BufferKind getBufferKind() const override {
@@ -221,9 +221,9 @@ public:
   /// tail-allocated data.
   void operator delete(void *p) { ::operator delete(p); }
 
-  const char *getBufferIdentifier() const override {
+  StringRef getBufferIdentifier() const override {
     // The name is stored after the class itself.
-    return reinterpret_cast<const char *>(this + 1);
+    return StringRef(reinterpret_cast<const char *>(this + 1));
   }
 
   BufferKind getBufferKind() const override {
@@ -438,6 +438,18 @@ ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() {
   return getMemoryBufferForStream(0, "<stdin>");
 }
 
+ErrorOr<std::unique_ptr<MemoryBuffer>>
+MemoryBuffer::getFileAsStream(const Twine &Filename) {
+  int FD;
+  std::error_code EC = sys::fs::openFileForRead(Filename, FD);
+  if (EC)
+    return EC;
+  ErrorOr<std::unique_ptr<MemoryBuffer>> Ret =
+      getMemoryBufferForStream(FD, Filename);
+  close(FD);
+  return Ret;
+}
+
 MemoryBufferRef MemoryBuffer::getMemBufferRef() const {
   StringRef Data = getBuffer();
   StringRef Identifier = getBufferIdentifier();
diff --git a/contrib/llvm/lib/Support/MemoryObject.cpp b/contrib/llvm/lib/Support/MemoryObject.cpp
deleted file mode 100644
index d796acfa90e7..000000000000
--- a/contrib/llvm/lib/Support/MemoryObject.cpp
+++ /dev/null
@@ -1,14 +0,0 @@
-//===- MemoryObject.cpp - Abstract memory interface -----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/MemoryObject.h"
-using namespace llvm;
-  
-MemoryObject::~MemoryObject() {
-}
diff --git a/contrib/llvm/lib/Support/NativeFormatting.cpp b/contrib/llvm/lib/Support/NativeFormatting.cpp
new file mode 100644
index 000000000000..bb8689141098
--- /dev/null
+++ b/contrib/llvm/lib/Support/NativeFormatting.cpp
@@ -0,0 +1,265 @@
+//===- NativeFormatting.cpp - Low level formatting helpers -------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/NativeFormatting.h"
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+template<typename T, std::size_t N>
+static int format_to_buffer(T Value, char (&Buffer)[N]) {
+  char *EndPtr = std::end(Buffer);
+  char *CurPtr = EndPtr;
+
+  do {
+    *--CurPtr = '0' + char(Value % 10);
+    Value /= 10;
+  } while (Value);
+  return EndPtr - CurPtr;
+}
+
+static void writeWithCommas(raw_ostream &S, ArrayRef<char> Buffer) {
+  assert(!Buffer.empty());
+
+  ArrayRef<char> ThisGroup;
+  int InitialDigits = ((Buffer.size() - 1) % 3) + 1;
+  ThisGroup = Buffer.take_front(InitialDigits);
+  S.write(ThisGroup.data(), ThisGroup.size());
+
+  Buffer = Buffer.drop_front(InitialDigits);
+  assert(Buffer.size() % 3 == 0);
+  while (!Buffer.empty()) {
+    S << ',';
+    ThisGroup = Buffer.take_front(3);
+    S.write(ThisGroup.data(), 3);
+    Buffer = Buffer.drop_front(3);
+  }
+}
+
+template <typename T>
+static void write_unsigned_impl(raw_ostream &S, T N, size_t MinDigits,
+                                IntegerStyle Style, bool IsNegative) {
+  static_assert(std::is_unsigned<T>::value, "Value is not unsigned!");
+
+  char NumberBuffer[128];
+  std::memset(NumberBuffer, '0', sizeof(NumberBuffer));
+
+  size_t Len = 0;
+  Len = format_to_buffer(N, NumberBuffer);
+
+  if (IsNegative)
+    S << '-';
+
+  if (Len < MinDigits && Style != IntegerStyle::Number) {
+    for (size_t I = Len; I < MinDigits; ++I)
+      S << '0';
+  }
+
+  if (Style == IntegerStyle::Number) {
+    writeWithCommas(S, ArrayRef<char>(std::end(NumberBuffer) - Len, Len));
+  } else {
+    S.write(std::end(NumberBuffer) - Len, Len);
+  }
+}
+
+template <typename T>
+static void write_unsigned(raw_ostream &S, T N, size_t MinDigits,
+                           IntegerStyle Style, bool IsNegative = false) {
+  // Output using 32-bit div/mod if possible.
+  if (N == static_cast<uint32_t>(N))
+    write_unsigned_impl(S, static_cast<uint32_t>(N), MinDigits, Style,
+                        IsNegative);
+  else
+    write_unsigned_impl(S, N, MinDigits, Style, IsNegative);
+}
+
+template <typename T>
+static void write_signed(raw_ostream &S, T N, size_t MinDigits,
+                         IntegerStyle Style) {
+  static_assert(std::is_signed<T>::value, "Value is not signed!");
+
+  using UnsignedT = typename std::make_unsigned<T>::type;
+
+  if (N >= 0) {
+    write_unsigned(S, static_cast<UnsignedT>(N), MinDigits, Style);
+    return;
+  }
+
+  UnsignedT UN = -(UnsignedT)N;
+  write_unsigned(S, UN, MinDigits, Style, true);
+}
+
+void llvm::write_integer(raw_ostream &S, unsigned int N, size_t MinDigits,
+                         IntegerStyle Style) {
+  write_unsigned(S, N, MinDigits, Style);
+}
+
+void llvm::write_integer(raw_ostream &S, int N, size_t MinDigits,
+                         IntegerStyle Style) {
+  write_signed(S, N, MinDigits, Style);
+}
+
+void llvm::write_integer(raw_ostream &S, unsigned long N, size_t MinDigits,
+                         IntegerStyle Style) {
+  write_unsigned(S, N, MinDigits, Style);
+}
+
+void llvm::write_integer(raw_ostream &S, long N, size_t MinDigits,
+                         IntegerStyle Style) {
+  write_signed(S, N, MinDigits, Style);
+}
+
+void llvm::write_integer(raw_ostream &S, unsigned long long N, size_t MinDigits,
+                         IntegerStyle Style) {
+  write_unsigned(S, N, MinDigits, Style);
+}
+
+void llvm::write_integer(raw_ostream &S, long long N, size_t MinDigits,
+                         IntegerStyle Style) {
+  write_signed(S, N, MinDigits, Style);
+}
+
+void llvm::write_hex(raw_ostream &S, uint64_t N, HexPrintStyle Style,
+                     Optional<size_t> Width) {
+  const size_t kMaxWidth = 128u;
+
+  size_t W = std::min(kMaxWidth, Width.getValueOr(0u));
+
+  unsigned Nibbles = (64 - countLeadingZeros(N) + 3) / 4;
+  bool Prefix = (Style == HexPrintStyle::PrefixLower ||
+                 Style == HexPrintStyle::PrefixUpper);
+  bool Upper =
+      (Style == HexPrintStyle::Upper || Style == HexPrintStyle::PrefixUpper);
+  unsigned PrefixChars = Prefix ? 2 : 0;
+  unsigned NumChars =
+      std::max(static_cast<unsigned>(W), std::max(1u, Nibbles) + PrefixChars);
+
+  char NumberBuffer[kMaxWidth];
+  ::memset(NumberBuffer, '0', llvm::array_lengthof(NumberBuffer));
+  if (Prefix)
+    NumberBuffer[1] = 'x';
+  char *EndPtr = NumberBuffer + NumChars;
+  char *CurPtr = EndPtr;
+  while (N) {
+    unsigned char x = static_cast<unsigned char>(N) % 16;
+    *--CurPtr = hexdigit(x, !Upper);
+    N /= 16;
+  }
+
+  S.write(NumberBuffer, NumChars);
+}
+
+void llvm::write_double(raw_ostream &S, double N, FloatStyle Style,
+                        Optional<size_t> Precision) {
+  size_t Prec = Precision.getValueOr(getDefaultPrecision(Style));
+
+  if (std::isnan(N)) {
+    S << "nan";
+    return;
+  } else if (std::isinf(N)) {
+    S << "INF";
+    return;
+  }
+
+  char Letter;
+  if (Style == FloatStyle::Exponent)
+    Letter = 'e';
+  else if (Style == FloatStyle::ExponentUpper)
+    Letter = 'E';
+  else
+    Letter = 'f';
+
+  SmallString<8> Spec;
+  llvm::raw_svector_ostream Out(Spec);
+  Out << "%." << Prec << Letter;
+
+  if (Style == FloatStyle::Exponent || Style == FloatStyle::ExponentUpper) {
+#ifdef _WIN32
+// On MSVCRT and compatible, output of %e is incompatible to Posix
+// by default. Number of exponent digits should be at least 2. "%+03d"
+// FIXME: Implement our formatter to here or Support/Format.h!
+#if defined(__MINGW32__)
+    // FIXME: It should be generic to C++11.
+    if (N == 0.0 && std::signbit(N)) {
+      char NegativeZero[] = "-0.000000e+00";
+      if (Style == FloatStyle::ExponentUpper)
+        NegativeZero[strlen(NegativeZero) - 4] = 'E';
+      S << NegativeZero;
+      return;
+    }
+#else
+    int fpcl = _fpclass(N);
+
+    // negative zero
+    if (fpcl == _FPCLASS_NZ) {
+      char NegativeZero[] = "-0.000000e+00";
+      if (Style == FloatStyle::ExponentUpper)
+        NegativeZero[strlen(NegativeZero) - 4] = 'E';
+      S << NegativeZero;
+      return;
+    }
+#endif
+
+    char buf[32];
+    unsigned len;
+    len = format(Spec.c_str(), N).snprint(buf, sizeof(buf));
+    if (len <= sizeof(buf) - 2) {
+      if (len >= 5 && (buf[len - 5] == 'e' || buf[len - 5] == 'E') &&
+          buf[len - 3] == '0') {
+        int cs = buf[len - 4];
+        if (cs == '+' || cs == '-') {
+          int c1 = buf[len - 2];
+          int c0 = buf[len - 1];
+          if (isdigit(static_cast<unsigned char>(c1)) &&
+              isdigit(static_cast<unsigned char>(c0))) {
+            // Trim leading '0': "...e+012" -> "...e+12\0"
+            buf[len - 3] = c1;
+            buf[len - 2] = c0;
+            buf[--len] = 0;
+          }
+        }
+      }
+      S << buf;
+      return;
+    }
+#endif
+  }
+
+  if (Style == FloatStyle::Percent)
+    N *= 100.0;
+
+  char Buf[32];
+  unsigned Len;
+  Len = format(Spec.c_str(), N).snprint(Buf, sizeof(Buf));
+  if (Style == FloatStyle::Percent)
+    ++Len;
+  S << Buf;
+  if (Style == FloatStyle::Percent)
+    S << '%';
+}
+
+bool llvm::isPrefixedHexStyle(HexPrintStyle S) {
+  return (S == HexPrintStyle::PrefixLower || S == HexPrintStyle::PrefixUpper);
+}
+
+size_t llvm::getDefaultPrecision(FloatStyle Style) {
+  switch (Style) {
+  case FloatStyle::Exponent:
+  case FloatStyle::ExponentUpper:
+    return 6; // Number of decimal places.
+  case FloatStyle::Fixed:
+  case FloatStyle::Percent:
+    return 2; // Number of decimal places.
+  }
+  LLVM_BUILTIN_UNREACHABLE;
+}
diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp
index f6355d1df805..0616d05aff57 100644
--- a/contrib/llvm/lib/Support/Path.cpp
+++ b/contrib/llvm/lib/Support/Path.cpp
@@ -707,12 +707,14 @@ static SmallString<256> remove_dots(StringRef path, bool remove_dot_dot) {
   for (StringRef C : llvm::make_range(path::begin(rel), path::end(rel))) {
     if (C == ".")
       continue;
-    if (remove_dot_dot) {
-      if (C == "..") {
-        if (!components.empty())
-          components.pop_back();
+    // Leading ".." will remain in the path unless it's at the root.
+    if (remove_dot_dot && C == "..") {
+      if (!components.empty() && components.back() != "..") {
+        components.pop_back();
         continue;
       }
+      if (path::is_absolute(path))
+        continue;
     }
     components.push_back(C);
   }
@@ -978,62 +980,59 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) {
   Status = st;
 }
 
+template <size_t N>
+static bool startswith(StringRef Magic, const char (&S)[N]) {
+  return Magic.startswith(StringRef(S, N - 1));
+}
+
 /// @brief Identify the magic in magic.
 file_magic identify_magic(StringRef Magic) {
   if (Magic.size() < 4)
     return file_magic::unknown;
   switch ((unsigned char)Magic[0]) {
     case 0x00: {
-      // COFF bigobj or short import library file
-      if (Magic[1] == (char)0x00 && Magic[2] == (char)0xff &&
-          Magic[3] == (char)0xff) {
+      // COFF bigobj, CL.exe's LTO object file, or short import library file
+      if (startswith(Magic, "\0\0\xFF\xFF")) {
         size_t MinSize = offsetof(COFF::BigObjHeader, UUID) + sizeof(COFF::BigObjMagic);
         if (Magic.size() < MinSize)
           return file_magic::coff_import_library;
 
-        int BigObjVersion = read16le(
-            Magic.data() + offsetof(COFF::BigObjHeader, Version));
-        if (BigObjVersion < COFF::BigObjHeader::MinBigObjectVersion)
-          return file_magic::coff_import_library;
-
         const char *Start = Magic.data() + offsetof(COFF::BigObjHeader, UUID);
-        if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) != 0)
-          return file_magic::coff_import_library;
-        return file_magic::coff_object;
+        if (memcmp(Start, COFF::BigObjMagic, sizeof(COFF::BigObjMagic)) == 0)
+          return file_magic::coff_object;
+        if (memcmp(Start, COFF::ClGlObjMagic, sizeof(COFF::BigObjMagic)) == 0)
+          return file_magic::coff_cl_gl_object;
+        return file_magic::coff_import_library;
       }
       // Windows resource file
-      const char Expected[] = { 0, 0, 0, 0, '\x20', 0, 0, 0, '\xff' };
-      if (Magic.size() >= sizeof(Expected) &&
-          memcmp(Magic.data(), Expected, sizeof(Expected)) == 0)
+      if (startswith(Magic, "\0\0\0\0\x20\0\0\0\xFF"))
         return file_magic::windows_resource;
       // 0x0000 = COFF unknown machine type
       if (Magic[1] == 0)
         return file_magic::coff_object;
+      if (startswith(Magic, "\0asm"))
+        return file_magic::wasm_object;
       break;
     }
     case 0xDE:  // 0x0B17C0DE = BC wraper
-      if (Magic[1] == (char)0xC0 && Magic[2] == (char)0x17 &&
-          Magic[3] == (char)0x0B)
+      if (startswith(Magic, "\xDE\xC0\x17\x0B"))
         return file_magic::bitcode;
       break;
     case 'B':
-      if (Magic[1] == 'C' && Magic[2] == (char)0xC0 && Magic[3] == (char)0xDE)
+      if (startswith(Magic, "BC\xC0\xDE"))
         return file_magic::bitcode;
       break;
     case '!':
-      if (Magic.size() >= 8)
-        if (memcmp(Magic.data(), "!<arch>\n", 8) == 0 ||
-            memcmp(Magic.data(), "!<thin>\n", 8) == 0)
-          return file_magic::archive;
+      if (startswith(Magic, "!<arch>\n") || startswith(Magic, "!<thin>\n"))
+        return file_magic::archive;
       break;
 
     case '\177':
-      if (Magic.size() >= 18 && Magic[1] == 'E' && Magic[2] == 'L' &&
-          Magic[3] == 'F') {
+      if (startswith(Magic, "\177ELF") && Magic.size() >= 18) {
         bool Data2MSB = Magic[5] == 2;
         unsigned high = Data2MSB ? 16 : 17;
         unsigned low  = Data2MSB ? 17 : 16;
-        if (Magic[high] == 0)
+        if (Magic[high] == 0) {
           switch (Magic[low]) {
             default: return file_magic::elf;
             case 1: return file_magic::elf_relocatable;
@@ -1041,15 +1040,15 @@ file_magic identify_magic(StringRef Magic) {
             case 3: return file_magic::elf_shared_object;
             case 4: return file_magic::elf_core;
           }
-        else
-          // It's still some type of ELF file.
-          return file_magic::elf;
+        }
+        // It's still some type of ELF file.
+        return file_magic::elf;
       }
       break;
 
     case 0xCA:
-      if (Magic[1] == char(0xFE) && Magic[2] == char(0xBA) &&
-          (Magic[3] == char(0xBE) || Magic[3] == char(0xBF))) {
+      if (startswith(Magic, "\xCA\xFE\xBA\xBE") ||
+          startswith(Magic, "\xCA\xFE\xBA\xBF")) {
         // This is complicated by an overlap with Java class files.
         // See the Mach-O section in /usr/share/file/magic for details.
         if (Magic.size() >= 8 && Magic[7] < 43)
@@ -1064,9 +1063,8 @@ file_magic identify_magic(StringRef Magic) {
     case 0xCE:
     case 0xCF: {
       uint16_t type = 0;
-      if (Magic[0] == char(0xFE) && Magic[1] == char(0xED) &&
-          Magic[2] == char(0xFA) &&
-          (Magic[3] == char(0xCE) || Magic[3] == char(0xCF))) {
+      if (startswith(Magic, "\xFE\xED\xFA\xCE") ||
+          startswith(Magic, "\xFE\xED\xFA\xCF")) {
         /* Native endian */
         size_t MinSize;
         if (Magic[3] == char(0xCE))
@@ -1075,9 +1073,8 @@ file_magic identify_magic(StringRef Magic) {
           MinSize = sizeof(MachO::mach_header_64);
         if (Magic.size() >= MinSize)
           type = Magic[12] << 24 | Magic[13] << 12 | Magic[14] << 8 | Magic[15];
-      } else if ((Magic[0] == char(0xCE) || Magic[0] == char(0xCF)) &&
-                 Magic[1] == char(0xFA) && Magic[2] == char(0xED) &&
-                 Magic[3] == char(0xFE)) {
+      } else if (startswith(Magic, "\xCE\xFA\xED\xFE") ||
+                 startswith(Magic, "\xCF\xFA\xED\xFE")) {
         /* Reverse endian */
         size_t MinSize;
         if (Magic[0] == char(0xCE))
@@ -1120,7 +1117,7 @@ file_magic identify_magic(StringRef Magic) {
       break;
 
     case 'M': // Possible MS-DOS stub on Windows PE file
-      if (Magic[1] == 'Z') {
+      if (startswith(Magic, "MZ")) {
         uint32_t off = read32le(Magic.data() + 0x3c);
         // PE/COFF file, either EXE or DLL.
         if (off < Magic.size() &&
diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
index ebad67bb8dce..5b079ff211fe 100644
--- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp
+++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Support/Watchdog.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include <cstdarg>
 #include <tuple>
 
 #ifdef HAVE_CRASHREPORTERCLIENT_H
@@ -32,7 +33,7 @@ using namespace llvm;
 // If backtrace support is not enabled, compile out support for pretty stack
 // traces.  This has the secondary effect of not requiring thread local storage
 // when backtrace support is disabled.
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
 
 // We need a thread local pointer to manage the stack of our stack trace
 // objects, but we *really* cannot tolerate destructors running and do not want
@@ -79,7 +80,7 @@ static void PrintCurStackTrace(raw_ostream &OS) {
 }
 
 // Integrate with crash reporter libraries.
-#if defined (__APPLE__) && HAVE_CRASHREPORTERCLIENT_H
+#if defined (__APPLE__) && defined(HAVE_CRASHREPORTERCLIENT_H)
 //  If any clients of llvm try to link to libCrashReporterClient.a themselves,
 //  only one crash info struct will be used.
 extern "C" {
@@ -88,12 +89,12 @@ struct crashreporter_annotations_t gCRAnnotations
         __attribute__((section("__DATA," CRASHREPORTER_ANNOTATIONS_SECTION))) 
         = { CRASHREPORTER_ANNOTATIONS_VERSION, 0, 0, 0, 0, 0, 0 };
 }
-#elif defined (__APPLE__) && HAVE_CRASHREPORTER_INFO
-static const char *__crashreporter_info__ = 0;
+#elif defined(__APPLE__) && HAVE_CRASHREPORTER_INFO
+extern "C" const char *__crashreporter_info__
+    __attribute__((visibility("hidden"))) = 0;
 asm(".desc ___crashreporter_info__, 0x10");
 #endif
 
-
 /// CrashHandler - This callback is run if a fatal signal is delivered to the
 /// process, it prints the pretty stack trace.
 static void CrashHandler(void *) {
@@ -122,11 +123,11 @@ static void CrashHandler(void *) {
 #endif
 }
 
-// defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+// defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
 #endif
 
 PrettyStackTraceEntry::PrettyStackTraceEntry() {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
   // Link ourselves.
   NextEntry = PrettyStackTraceHead;
   PrettyStackTraceHead = this;
@@ -134,17 +135,33 @@ PrettyStackTraceEntry::PrettyStackTraceEntry() {
 }
 
 PrettyStackTraceEntry::~PrettyStackTraceEntry() {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
   assert(PrettyStackTraceHead == this &&
          "Pretty stack trace entry destruction is out of order");
   PrettyStackTraceHead = NextEntry;
 #endif
 }
 
-void PrettyStackTraceString::print(raw_ostream &OS) const {
-  OS << Str << "\n";
+void PrettyStackTraceString::print(raw_ostream &OS) const { OS << Str << "\n"; }
+
+PrettyStackTraceFormat::PrettyStackTraceFormat(const char *Format, ...) {
+  va_list AP;
+  va_start(AP, Format);
+  const int SizeOrError = vsnprintf(nullptr, 0, Format, AP);
+  va_end(AP);
+  if (SizeOrError < 0) {
+    return;
+  }
+
+  const int Size = SizeOrError + 1; // '\0'
+  Str.resize(Size);
+  va_start(AP, Format);
+  vsnprintf(Str.data(), Size, Format, AP);
+  va_end(AP);
 }
 
+void PrettyStackTraceFormat::print(raw_ostream &OS) const { OS << Str << "\n"; }
+
 void PrettyStackTraceProgram::print(raw_ostream &OS) const {
   OS << "Program arguments: ";
   // Print the argument list.
@@ -153,7 +170,7 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const {
   OS << '\n';
 }
 
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
 static bool RegisterCrashPrinter() {
   sys::AddSignalHandler(CrashHandler, nullptr);
   return false;
@@ -161,7 +178,7 @@ static bool RegisterCrashPrinter() {
 #endif
 
 void llvm::EnablePrettyStackTrace() {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
   // The first time this is called, we register the crash printer.
   static bool HandlerRegistered = RegisterCrashPrinter();
   (void)HandlerRegistered;
@@ -169,7 +186,7 @@ void llvm::EnablePrettyStackTrace() {
 }
 
 const void *llvm::SavePrettyStackState() {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
   return PrettyStackTraceHead;
 #else
   return nullptr;
@@ -177,7 +194,7 @@ const void *llvm::SavePrettyStackState() {
 }
 
 void llvm::RestorePrettyStackState(const void *Top) {
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES)
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES
   PrettyStackTraceHead =
       static_cast<PrettyStackTraceEntry *>(const_cast<void *>(Top));
 #endif
diff --git a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
index 81d0411d60b4..8ea02d709df1 100644
--- a/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
+++ b/contrib/llvm/lib/Support/RandomNumberGenerator.cpp
@@ -17,6 +17,11 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#ifdef LLVM_ON_WIN32
+#include "Windows/WindowsSupport.h"
+#else
+#include "Unix/Unix.h"
+#endif
 
 using namespace llvm;
 
@@ -42,16 +47,45 @@ RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) {
   // are using a 64-bit RNG. This isn't a problem since the Mersenne
   // twister constructor copies these correctly into its initial state.
   std::vector<uint32_t> Data;
-  Data.reserve(2 + Salt.size());
-  Data.push_back(Seed);
-  Data.push_back(Seed >> 32);
+  Data.resize(2 + Salt.size());
+  Data[0] = Seed;
+  Data[1] = Seed >> 32;
 
-  std::copy(Salt.begin(), Salt.end(), Data.end());
+  std::copy(Salt.begin(), Salt.end(), Data.begin() + 2);
 
   std::seed_seq SeedSeq(Data.begin(), Data.end());
   Generator.seed(SeedSeq);
 }
 
-uint_fast64_t RandomNumberGenerator::operator()() {
+RandomNumberGenerator::result_type RandomNumberGenerator::operator()() {
   return Generator();
 }
+
+// Get random vector of specified size
+std::error_code llvm::getRandomBytes(void *Buffer, size_t Size) {
+#ifdef LLVM_ON_WIN32
+  HCRYPTPROV hProvider;
+  if (CryptAcquireContext(&hProvider, 0, 0, PROV_RSA_FULL,
+                           CRYPT_VERIFYCONTEXT | CRYPT_SILENT)) {
+    ScopedCryptContext ScopedHandle(hProvider);
+    if (CryptGenRandom(hProvider, Size, static_cast<BYTE *>(Buffer)))
+      return std::error_code();
+  }
+  return std::error_code(GetLastError(), std::system_category());
+#else
+  int Fd = open("/dev/urandom", O_RDONLY);
+  if (Fd != -1) {
+    std::error_code Ret;
+    ssize_t BytesRead = read(Fd, Buffer, Size);
+    if (BytesRead == -1)
+      Ret = std::error_code(errno, std::system_category());
+    else if (BytesRead != static_cast<ssize_t>(Size))
+      Ret = std::error_code(EIO, std::system_category());
+    if (close(Fd) == -1)
+      Ret = std::error_code(errno, std::system_category());
+
+    return Ret;
+  }
+  return std::error_code(errno, std::system_category());
+#endif
+}
diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp
index e8344ef74d9c..68ba79e11766 100644
--- a/contrib/llvm/lib/Support/Regex.cpp
+++ b/contrib/llvm/lib/Support/Regex.cpp
@@ -19,6 +19,8 @@
 #include <string>
 using namespace llvm;
 
+Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
+
 Regex::Regex(StringRef regex, unsigned Flags) {
   unsigned flags = 0;
   preg = new llvm_regex();
@@ -32,6 +34,13 @@ Regex::Regex(StringRef regex, unsigned Flags) {
   error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
 }
 
+Regex::Regex(Regex &&regex) {
+  preg = regex.preg;
+  error = regex.error;
+  regex.preg = nullptr;
+  regex.error = REG_BADPAT;
+}
+
 Regex::~Regex() {
   if (preg) {
     llvm_regfree(preg);
@@ -57,6 +66,9 @@ unsigned Regex::getNumMatches() const {
 }
 
 bool Regex::match(StringRef String, SmallVectorImpl<StringRef> *Matches){
+  if (error)
+    return false;
+
   unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
 
   // pmatch needs to have at least one element.
diff --git a/contrib/llvm/lib/Support/SHA1.cpp b/contrib/llvm/lib/Support/SHA1.cpp
index a461d1ebc93d..0eefd998cd75 100644
--- a/contrib/llvm/lib/Support/SHA1.cpp
+++ b/contrib/llvm/lib/Support/SHA1.cpp
@@ -6,8 +6,10 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
 // This code is taken from public domain
-// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c)
+// (http://oauth.googlecode.com/svn/code/c/liboauth/src/sha1.c and
+// http://cvsweb.netbsd.org/bsdweb.cgi/src/common/lib/libc/hash/sha1/sha1.c?rev=1.6)
 // and modified by wrapping it in a C++ interface for LLVM,
 // and removing unnecessary code.
 //
@@ -25,6 +27,49 @@ using namespace llvm;
 #define SHA_BIG_ENDIAN
 #endif
 
+static uint32_t rol(uint32_t Number, int Bits) {
+  return (Number << Bits) | (Number >> (32 - Bits));
+}
+
+static uint32_t blk0(uint32_t *Buf, int I) { return Buf[I]; }
+
+static uint32_t blk(uint32_t *Buf, int I) {
+  Buf[I & 15] = rol(Buf[(I + 13) & 15] ^ Buf[(I + 8) & 15] ^ Buf[(I + 2) & 15] ^
+                        Buf[I & 15],
+                    1);
+  return Buf[I & 15];
+}
+
+static void r0(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
+               int I, uint32_t *Buf) {
+  E += ((B & (C ^ D)) ^ D) + blk0(Buf, I) + 0x5A827999 + rol(A, 5);
+  B = rol(B, 30);
+}
+
+static void r1(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
+               int I, uint32_t *Buf) {
+  E += ((B & (C ^ D)) ^ D) + blk(Buf, I) + 0x5A827999 + rol(A, 5);
+  B = rol(B, 30);
+}
+
+static void r2(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
+               int I, uint32_t *Buf) {
+  E += (B ^ C ^ D) + blk(Buf, I) + 0x6ED9EBA1 + rol(A, 5);
+  B = rol(B, 30);
+}
+
+static void r3(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
+               int I, uint32_t *Buf) {
+  E += (((B | C) & D) | (B & C)) + blk(Buf, I) + 0x8F1BBCDC + rol(A, 5);
+  B = rol(B, 30);
+}
+
+static void r4(uint32_t &A, uint32_t &B, uint32_t &C, uint32_t &D, uint32_t &E,
+               int I, uint32_t *Buf) {
+  E += (B ^ C ^ D) + blk(Buf, I) + 0xCA62C1D6 + rol(A, 5);
+  B = rol(B, 30);
+}
+
 /* code */
 #define SHA1_K0 0x5a827999
 #define SHA1_K20 0x6ed9eba1
@@ -47,56 +92,112 @@ void SHA1::init() {
   InternalState.BufferOffset = 0;
 }
 
-static uint32_t rol32(uint32_t number, uint8_t bits) {
-  return ((number << bits) | (number >> (32 - bits)));
-}
-
 void SHA1::hashBlock() {
-  uint8_t i;
-  uint32_t a, b, c, d, e, t;
-
-  a = InternalState.State[0];
-  b = InternalState.State[1];
-  c = InternalState.State[2];
-  d = InternalState.State[3];
-  e = InternalState.State[4];
-  for (i = 0; i < 80; i++) {
-    if (i >= 16) {
-      t = InternalState.Buffer[(i + 13) & 15] ^
-          InternalState.Buffer[(i + 8) & 15] ^
-          InternalState.Buffer[(i + 2) & 15] ^ InternalState.Buffer[i & 15];
-      InternalState.Buffer[i & 15] = rol32(t, 1);
-    }
-    if (i < 20) {
-      t = (d ^ (b & (c ^ d))) + SHA1_K0;
-    } else if (i < 40) {
-      t = (b ^ c ^ d) + SHA1_K20;
-    } else if (i < 60) {
-      t = ((b & c) | (d & (b | c))) + SHA1_K40;
-    } else {
-      t = (b ^ c ^ d) + SHA1_K60;
-    }
-    t += rol32(a, 5) + e + InternalState.Buffer[i & 15];
-    e = d;
-    d = c;
-    c = rol32(b, 30);
-    b = a;
-    a = t;
-  }
-  InternalState.State[0] += a;
-  InternalState.State[1] += b;
-  InternalState.State[2] += c;
-  InternalState.State[3] += d;
-  InternalState.State[4] += e;
+  uint32_t A = InternalState.State[0];
+  uint32_t B = InternalState.State[1];
+  uint32_t C = InternalState.State[2];
+  uint32_t D = InternalState.State[3];
+  uint32_t E = InternalState.State[4];
+
+  // 4 rounds of 20 operations each. Loop unrolled.
+  r0(A, B, C, D, E, 0, InternalState.Buffer.L);
+  r0(E, A, B, C, D, 1, InternalState.Buffer.L);
+  r0(D, E, A, B, C, 2, InternalState.Buffer.L);
+  r0(C, D, E, A, B, 3, InternalState.Buffer.L);
+  r0(B, C, D, E, A, 4, InternalState.Buffer.L);
+  r0(A, B, C, D, E, 5, InternalState.Buffer.L);
+  r0(E, A, B, C, D, 6, InternalState.Buffer.L);
+  r0(D, E, A, B, C, 7, InternalState.Buffer.L);
+  r0(C, D, E, A, B, 8, InternalState.Buffer.L);
+  r0(B, C, D, E, A, 9, InternalState.Buffer.L);
+  r0(A, B, C, D, E, 10, InternalState.Buffer.L);
+  r0(E, A, B, C, D, 11, InternalState.Buffer.L);
+  r0(D, E, A, B, C, 12, InternalState.Buffer.L);
+  r0(C, D, E, A, B, 13, InternalState.Buffer.L);
+  r0(B, C, D, E, A, 14, InternalState.Buffer.L);
+  r0(A, B, C, D, E, 15, InternalState.Buffer.L);
+  r1(E, A, B, C, D, 16, InternalState.Buffer.L);
+  r1(D, E, A, B, C, 17, InternalState.Buffer.L);
+  r1(C, D, E, A, B, 18, InternalState.Buffer.L);
+  r1(B, C, D, E, A, 19, InternalState.Buffer.L);
+
+  r2(A, B, C, D, E, 20, InternalState.Buffer.L);
+  r2(E, A, B, C, D, 21, InternalState.Buffer.L);
+  r2(D, E, A, B, C, 22, InternalState.Buffer.L);
+  r2(C, D, E, A, B, 23, InternalState.Buffer.L);
+  r2(B, C, D, E, A, 24, InternalState.Buffer.L);
+  r2(A, B, C, D, E, 25, InternalState.Buffer.L);
+  r2(E, A, B, C, D, 26, InternalState.Buffer.L);
+  r2(D, E, A, B, C, 27, InternalState.Buffer.L);
+  r2(C, D, E, A, B, 28, InternalState.Buffer.L);
+  r2(B, C, D, E, A, 29, InternalState.Buffer.L);
+  r2(A, B, C, D, E, 30, InternalState.Buffer.L);
+  r2(E, A, B, C, D, 31, InternalState.Buffer.L);
+  r2(D, E, A, B, C, 32, InternalState.Buffer.L);
+  r2(C, D, E, A, B, 33, InternalState.Buffer.L);
+  r2(B, C, D, E, A, 34, InternalState.Buffer.L);
+  r2(A, B, C, D, E, 35, InternalState.Buffer.L);
+  r2(E, A, B, C, D, 36, InternalState.Buffer.L);
+  r2(D, E, A, B, C, 37, InternalState.Buffer.L);
+  r2(C, D, E, A, B, 38, InternalState.Buffer.L);
+  r2(B, C, D, E, A, 39, InternalState.Buffer.L);
+
+  r3(A, B, C, D, E, 40, InternalState.Buffer.L);
+  r3(E, A, B, C, D, 41, InternalState.Buffer.L);
+  r3(D, E, A, B, C, 42, InternalState.Buffer.L);
+  r3(C, D, E, A, B, 43, InternalState.Buffer.L);
+  r3(B, C, D, E, A, 44, InternalState.Buffer.L);
+  r3(A, B, C, D, E, 45, InternalState.Buffer.L);
+  r3(E, A, B, C, D, 46, InternalState.Buffer.L);
+  r3(D, E, A, B, C, 47, InternalState.Buffer.L);
+  r3(C, D, E, A, B, 48, InternalState.Buffer.L);
+  r3(B, C, D, E, A, 49, InternalState.Buffer.L);
+  r3(A, B, C, D, E, 50, InternalState.Buffer.L);
+  r3(E, A, B, C, D, 51, InternalState.Buffer.L);
+  r3(D, E, A, B, C, 52, InternalState.Buffer.L);
+  r3(C, D, E, A, B, 53, InternalState.Buffer.L);
+  r3(B, C, D, E, A, 54, InternalState.Buffer.L);
+  r3(A, B, C, D, E, 55, InternalState.Buffer.L);
+  r3(E, A, B, C, D, 56, InternalState.Buffer.L);
+  r3(D, E, A, B, C, 57, InternalState.Buffer.L);
+  r3(C, D, E, A, B, 58, InternalState.Buffer.L);
+  r3(B, C, D, E, A, 59, InternalState.Buffer.L);
+
+  r4(A, B, C, D, E, 60, InternalState.Buffer.L);
+  r4(E, A, B, C, D, 61, InternalState.Buffer.L);
+  r4(D, E, A, B, C, 62, InternalState.Buffer.L);
+  r4(C, D, E, A, B, 63, InternalState.Buffer.L);
+  r4(B, C, D, E, A, 64, InternalState.Buffer.L);
+  r4(A, B, C, D, E, 65, InternalState.Buffer.L);
+  r4(E, A, B, C, D, 66, InternalState.Buffer.L);
+  r4(D, E, A, B, C, 67, InternalState.Buffer.L);
+  r4(C, D, E, A, B, 68, InternalState.Buffer.L);
+  r4(B, C, D, E, A, 69, InternalState.Buffer.L);
+  r4(A, B, C, D, E, 70, InternalState.Buffer.L);
+  r4(E, A, B, C, D, 71, InternalState.Buffer.L);
+  r4(D, E, A, B, C, 72, InternalState.Buffer.L);
+  r4(C, D, E, A, B, 73, InternalState.Buffer.L);
+  r4(B, C, D, E, A, 74, InternalState.Buffer.L);
+  r4(A, B, C, D, E, 75, InternalState.Buffer.L);
+  r4(E, A, B, C, D, 76, InternalState.Buffer.L);
+  r4(D, E, A, B, C, 77, InternalState.Buffer.L);
+  r4(C, D, E, A, B, 78, InternalState.Buffer.L);
+  r4(B, C, D, E, A, 79, InternalState.Buffer.L);
+
+  InternalState.State[0] += A;
+  InternalState.State[1] += B;
+  InternalState.State[2] += C;
+  InternalState.State[3] += D;
+  InternalState.State[4] += E;
 }
 
-void SHA1::addUncounted(uint8_t data) {
-  uint8_t *const b = (uint8_t *)InternalState.Buffer;
+void SHA1::addUncounted(uint8_t Data) {
 #ifdef SHA_BIG_ENDIAN
-  b[InternalState.BufferOffset] = data;
+  InternalState.Buffer.C[InternalState.BufferOffset] = Data;
 #else
-  b[InternalState.BufferOffset ^ 3] = data;
+  InternalState.Buffer.C[InternalState.BufferOffset ^ 3] = Data;
 #endif
+
   InternalState.BufferOffset++;
   if (InternalState.BufferOffset == BLOCK_LENGTH) {
     hashBlock();
@@ -104,9 +205,9 @@ void SHA1::addUncounted(uint8_t data) {
   }
 }
 
-void SHA1::writebyte(uint8_t data) {
+void SHA1::writebyte(uint8_t Data) {
   ++InternalState.ByteCount;
-  addUncounted(data);
+  addUncounted(Data);
 }
 
 void SHA1::update(ArrayRef<uint8_t> Data) {
@@ -168,3 +269,13 @@ StringRef SHA1::result() {
   // Return pointer to hash (20 characters)
   return Hash;
 }
+
+std::array<uint8_t, 20> SHA1::hash(ArrayRef<uint8_t> Data) {
+  SHA1 Hash;
+  Hash.update(Data);
+  StringRef S = Hash.final();
+
+  std::array<uint8_t, 20> Arr;
+  memcpy(Arr.data(), S.data(), S.size());
+  return Arr;
+}
diff --git a/contrib/llvm/lib/Support/ScaledNumber.cpp b/contrib/llvm/lib/Support/ScaledNumber.cpp
index b9432d46f9bb..807c9fa521de 100644
--- a/contrib/llvm/lib/Support/ScaledNumber.cpp
+++ b/contrib/llvm/lib/Support/ScaledNumber.cpp
@@ -183,7 +183,7 @@ static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) {
 
   // Build the float and print it.
   uint64_t RawBits[2] = {D, AdjustedE};
-  APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits));
+  APFloat Float(APFloat::x87DoubleExtended(), APInt(80, RawBits));
   SmallVector<char, 24> Chars;
   Float.toString(Chars, Precision, 0);
   return std::string(Chars.begin(), Chars.end());
diff --git a/contrib/llvm/lib/Support/ScopedPrinter.cpp b/contrib/llvm/lib/Support/ScopedPrinter.cpp
index 0225f01156a9..d8ee1efd8f3e 100644
--- a/contrib/llvm/lib/Support/ScopedPrinter.cpp
+++ b/contrib/llvm/lib/Support/ScopedPrinter.cpp
@@ -27,45 +27,18 @@ void ScopedPrinter::printBinaryImpl(StringRef Label, StringRef Str,
 
   if (Block) {
     startLine() << Label;
-    if (Str.size() > 0)
+    if (!Str.empty())
       OS << ": " << Str;
     OS << " (\n";
-    for (size_t addr = 0, end = Data.size(); addr < end; addr += 16) {
-      startLine() << format("  %04" PRIX64 ": ", uint64_t(addr));
-      // Dump line of hex.
-      for (size_t i = 0; i < 16; ++i) {
-        if (i != 0 && i % 4 == 0)
-          OS << ' ';
-        if (addr + i < end)
-          OS << hexdigit((Data[addr + i] >> 4) & 0xF, false)
-             << hexdigit(Data[addr + i] & 0xF, false);
-        else
-          OS << "  ";
-      }
-      // Print ascii.
-      OS << "  |";
-      for (std::size_t i = 0; i < 16 && addr + i < end; ++i) {
-        if (std::isprint(Data[addr + i] & 0xFF))
-          OS << Data[addr + i];
-        else
-          OS << ".";
-      }
-      OS << "|\n";
-    }
-
+    if (!Data.empty())
+      OS << format_bytes_with_ascii(Data, 0, 16, 4, (IndentLevel + 1) * 2, true)
+         << "\n";
     startLine() << ")\n";
   } else {
     startLine() << Label << ":";
-    if (Str.size() > 0)
+    if (!Str.empty())
       OS << " " << Str;
-    OS << " (";
-    for (size_t i = 0; i < Data.size(); ++i) {
-      if (i > 0)
-        OS << " ";
-
-      OS << format("%02X", static_cast<int>(Data[i]));
-    }
-    OS << ")\n";
+    OS << " (" << format_bytes(Data, None, Data.size(), 1, 0, true) << ")\n";
   }
 }
 
diff --git a/contrib/llvm/lib/Support/SmallPtrSet.cpp b/contrib/llvm/lib/Support/SmallPtrSet.cpp
index 539b4eb34da1..aa12e85fa4c0 100644
--- a/contrib/llvm/lib/Support/SmallPtrSet.cpp
+++ b/contrib/llvm/lib/Support/SmallPtrSet.cpp
@@ -16,6 +16,7 @@
 #include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
+#include <cassert>
 #include <cstdlib>
 
 using namespace llvm;
@@ -60,38 +61,13 @@ SmallPtrSetImplBase::insert_imp_big(const void *Ptr) {
   return std::make_pair(Bucket, true);
 }
 
-bool SmallPtrSetImplBase::erase_imp(const void * Ptr) {
-  if (isSmall()) {
-    // Check to see if it is in the set.
-    for (const void **APtr = CurArray, **E = CurArray + NumNonEmpty; APtr != E;
-         ++APtr)
-      if (*APtr == Ptr) {
-        // If it is in the set, replace this element.
-        *APtr = getTombstoneMarker();
-        ++NumTombstones;
-        return true;
-      }
-
-    return false;
-  }
-
-  // Okay, we know we have space.  Find a hash bucket.
-  void **Bucket = const_cast<void**>(FindBucketFor(Ptr));
-  if (*Bucket != Ptr) return false;  // Not in the set?
-
-  // Set this as a tombstone.
-  *Bucket = getTombstoneMarker();
-  ++NumTombstones;
-  return true;
-}
-
 const void * const *SmallPtrSetImplBase::FindBucketFor(const void *Ptr) const {
   unsigned Bucket = DenseMapInfo<void *>::getHashValue(Ptr) & (CurArraySize-1);
   unsigned ArraySize = CurArraySize;
   unsigned ProbeAmt = 1;
   const void *const *Array = CurArray;
   const void *const *Tombstone = nullptr;
-  while (1) {
+  while (true) {
     // If we found an empty bucket, the pointer doesn't exist in the set.
     // Return a tombstone if we've seen one so far, or the empty bucket if
     // not.
diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp
index 6d44a4d51f60..4cb9b2ff2cda 100644
--- a/contrib/llvm/lib/Support/SourceMgr.cpp
+++ b/contrib/llvm/lib/Support/SourceMgr.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SourceMgr.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Locale.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -25,8 +26,8 @@ static const size_t TabStop = 8;
 
 namespace {
   struct LineNoCacheTy {
-    unsigned LastQueryBufferID;
     const char *LastQuery;
+    unsigned LastQueryBufferID;
     unsigned LineNoOfQuery;
   };
 }
@@ -141,7 +142,7 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
   // location to pull out the source line.
   SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
   std::pair<unsigned, unsigned> LineAndCol;
-  const char *BufferID = "<unknown>";
+  StringRef BufferID = "<unknown>";
   std::string LineStr;
   
   if (Loc.isValid()) {
@@ -395,8 +396,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
   // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
   // expanding them later, and bail out rather than show incorrect ranges and
   // misaligned fixits for any other odd characters.
-  if (std::find_if(LineContents.begin(), LineContents.end(), isNonASCII) !=
-      LineContents.end()) {
+  if (find_if(LineContents, isNonASCII) != LineContents.end()) {
     printSourceLine(S, LineContents);
     return;
   }
diff --git a/contrib/llvm/lib/Support/SpecialCaseList.cpp b/contrib/llvm/lib/Support/SpecialCaseList.cpp
index 0ffe4444a179..df524b352351 100644
--- a/contrib/llvm/lib/Support/SpecialCaseList.cpp
+++ b/contrib/llvm/lib/Support/SpecialCaseList.cpp
@@ -15,6 +15,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/SpecialCaseList.h"
+#include "llvm/Support/TrigramIndex.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSet.h"
@@ -32,15 +33,16 @@ namespace llvm {
 /// reason for doing so is efficiency; StringSet is much faster at matching
 /// literal strings than Regex.
 struct SpecialCaseList::Entry {
-  Entry() {}
-  Entry(Entry &&Other)
-      : Strings(std::move(Other.Strings)), RegEx(std::move(Other.RegEx)) {}
-
   StringSet<> Strings;
+  TrigramIndex Trigrams;
   std::unique_ptr<Regex> RegEx;
 
   bool match(StringRef Query) const {
-    return Strings.count(Query) || (RegEx && RegEx->match(Query));
+    if (Strings.count(Query))
+      return true;
+    if (Trigrams.isDefinitelyOut(Query))
+      return false;
+    return RegEx && RegEx->match(Query);
   }
 };
 
@@ -108,13 +110,15 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) {
     StringRef Category = SplitRegexp.second;
 
     // See if we can store Regexp in Strings.
+    auto &Entry = Entries[Prefix][Category];
     if (Regex::isLiteralERE(Regexp)) {
-      Entries[Prefix][Category].Strings.insert(Regexp);
+      Entry.Strings.insert(Regexp);
       continue;
     }
+    Entry.Trigrams.insert(Regexp);
 
     // Replace * with .*
-    for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
+    for (size_t pos = 0; (pos = Regexp.find('*', pos)) != std::string::npos;
          pos += strlen(".*")) {
       Regexp.replace(pos, strlen("*"), ".*");
     }
diff --git a/contrib/llvm/lib/Support/Statistic.cpp b/contrib/llvm/lib/Support/Statistic.cpp
index cdd36794507e..0c50dfd27d61 100644
--- a/contrib/llvm/lib/Support/Statistic.cpp
+++ b/contrib/llvm/lib/Support/Statistic.cpp
@@ -29,7 +29,9 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
 #include <algorithm>
 #include <cstring>
 using namespace llvm;
@@ -37,15 +39,16 @@ using namespace llvm;
 /// -stats - Command line option to cause transformations to emit stats about
 /// what they did.
 ///
-static cl::opt<bool>
-Enabled(
-    "stats",
+static cl::opt<bool> Stats("stats",
     cl::desc("Enable statistics output from program (available with Asserts)"));
 
 
 static cl::opt<bool> StatsAsJSON("stats-json",
                                  cl::desc("Display statistics as json data"));
 
+static bool Enabled;
+static bool PrintOnExit;
+
 namespace {
 /// StatisticInfo - This class is used in a ManagedStatic so that it is created
 /// on demand (when the first statistic is bumped) and destroyed only when
@@ -59,6 +62,7 @@ class StatisticInfo {
   /// Sort statistics by debugtype,name,description.
   void sort();
 public:
+  StatisticInfo();
   ~StatisticInfo();
 
   void addStatistic(const Statistic *S) {
@@ -77,7 +81,7 @@ void Statistic::RegisterStatistic() {
   // printed.
   sys::SmartScopedLock<true> Writer(*StatLock);
   if (!Initialized) {
-    if (Enabled)
+    if (Stats || Enabled)
       StatInfo->addStatistic(this);
 
     TsanHappensBefore(this);
@@ -89,17 +93,24 @@ void Statistic::RegisterStatistic() {
   }
 }
 
+StatisticInfo::StatisticInfo() {
+  // Ensure timergroup lists are created first so they are destructed after us.
+  TimerGroup::ConstructTimerLists();
+}
+
 // Print information when destroyed, iff command line option is specified.
 StatisticInfo::~StatisticInfo() {
-  llvm::PrintStatistics();
+  if (::Stats || PrintOnExit)
+    llvm::PrintStatistics();
 }
 
-void llvm::EnableStatistics() {
-  Enabled.setValue(true);
+void llvm::EnableStatistics(bool PrintOnExit) {
+  Enabled = true;
+  ::PrintOnExit = PrintOnExit;
 }
 
 bool llvm::AreStatisticsEnabled() {
-  return Enabled;
+  return Enabled || Stats;
 }
 
 void StatisticInfo::sort() {
@@ -145,17 +156,6 @@ void llvm::PrintStatistics(raw_ostream &OS) {
   OS.flush();
 }
 
-static void write_json_string_escaped(raw_ostream &OS, const char *string) {
-  // Out current usage should not need any escaping. Keep it simple and just
-  // check that the input is pure ASCII without special characers.
-#ifndef NDEBUG
-  for (const unsigned char *c = (const unsigned char*)string; *c != '\0'; ++c) {
-    assert(*c != '\\' && *c != '\"' && *c >= 0x20 && *c < 0x80);
-  }
-#endif
-  OS << string;
-}
-
 void llvm::PrintStatisticsJSON(raw_ostream &OS) {
   StatisticInfo &Stats = *StatInfo;
 
@@ -166,13 +166,16 @@ void llvm::PrintStatisticsJSON(raw_ostream &OS) {
   const char *delim = "";
   for (const Statistic *Stat : Stats.Stats) {
     OS << delim;
-    OS << "\t\"";
-    write_json_string_escaped(OS, Stat->getDebugType());
-    OS << '.';
-    write_json_string_escaped(OS, Stat->getName());
-    OS << "\": " << Stat->getValue();
+    assert(!yaml::needsQuotes(Stat->getDebugType()) &&
+           "Statistic group/type name is simple.");
+    assert(!yaml::needsQuotes(Stat->getName()) && "Statistic name is simple");
+    OS << "\t\"" << Stat->getDebugType() << '.' << Stat->getName() << "\": "
+       << Stat->getValue();
     delim = ",\n";
   }
+  // Print timers.
+  TimerGroup::printAllJSONValues(OS, delim);
+
   OS << "\n}\n";
   OS.flush();
 }
@@ -195,7 +198,7 @@ void llvm::PrintStatistics() {
   // Check if the -stats option is set instead of checking
   // !Stats.Stats.empty().  In release builds, Statistics operators
   // do nothing, so stats are never Registered.
-  if (Enabled) {
+  if (Stats) {
     // Get the stream to write to.
     std::unique_ptr<raw_ostream> OutStream = CreateInfoOutputFile();
     (*OutStream) << "Statistics are disabled.  "
diff --git a/contrib/llvm/lib/Support/StreamingMemoryObject.cpp b/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
deleted file mode 100644
index fb566179486a..000000000000
--- a/contrib/llvm/lib/Support/StreamingMemoryObject.cpp
+++ /dev/null
@@ -1,138 +0,0 @@
-//===- StreamingMemoryObject.cpp - Streamable data interface -------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/StreamingMemoryObject.h"
-#include <cassert>
-#include <cstddef>
-#include <cstring>
-using namespace llvm;
-
-namespace {
-
-class RawMemoryObject : public MemoryObject {
-public:
-  RawMemoryObject(const unsigned char *Start, const unsigned char *End) :
-    FirstChar(Start), LastChar(End) {
-    assert(LastChar >= FirstChar && "Invalid start/end range");
-  }
-
-  uint64_t getExtent() const override {
-    return LastChar - FirstChar;
-  }
-  uint64_t readBytes(uint8_t *Buf, uint64_t Size,
-                     uint64_t Address) const override;
-  const uint8_t *getPointer(uint64_t address, uint64_t size) const override;
-  bool isValidAddress(uint64_t address) const override {
-    return validAddress(address);
-  }
-
-private:
-  const uint8_t* const FirstChar;
-  const uint8_t* const LastChar;
-
-  // These are implemented as inline functions here to avoid multiple virtual
-  // calls per public function
-  bool validAddress(uint64_t address) const {
-    return static_cast<std::ptrdiff_t>(address) < LastChar - FirstChar;
-  }
-
-  RawMemoryObject(const RawMemoryObject&) = delete;
-  void operator=(const RawMemoryObject&) = delete;
-};
-
-uint64_t RawMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
-                                    uint64_t Address) const {
-  uint64_t BufferSize = LastChar - FirstChar;
-  if (Address >= BufferSize)
-    return 0;
-
-  uint64_t End = Address + Size;
-  if (End > BufferSize)
-    End = BufferSize;
-
-  assert(static_cast<int64_t>(End - Address) >= 0);
-  Size = End - Address;
-  memcpy(Buf, Address + FirstChar, Size);
-  return Size;
-}
-
-const uint8_t *RawMemoryObject::getPointer(uint64_t address,
-                                           uint64_t size) const {
-  return FirstChar + address;
-}
-} // anonymous namespace
-
-namespace llvm {
-// If the bitcode has a header, then its size is known, and we don't have to
-// block until we actually want to read it.
-bool StreamingMemoryObject::isValidAddress(uint64_t address) const {
-  if (ObjectSize && address < ObjectSize) return true;
-  return fetchToPos(address);
-}
-
-uint64_t StreamingMemoryObject::getExtent() const {
-  if (ObjectSize) return ObjectSize;
-  size_t pos = BytesRead + kChunkSize;
-  // keep fetching until we run out of bytes
-  while (fetchToPos(pos)) pos += kChunkSize;
-  return ObjectSize;
-}
-
-uint64_t StreamingMemoryObject::readBytes(uint8_t *Buf, uint64_t Size,
-                                          uint64_t Address) const {
-  fetchToPos(Address + Size - 1);
-  // Note: For wrapped bitcode files will set ObjectSize after the
-  // first call to fetchToPos. In such cases, ObjectSize can be
-  // smaller than BytesRead.
-  size_t MaxAddress =
-      (ObjectSize && ObjectSize < BytesRead) ? ObjectSize : BytesRead;
-  if (Address >= MaxAddress)
-    return 0;
-
-  uint64_t End = Address + Size;
-  if (End > MaxAddress)
-    End = MaxAddress;
-  assert(End >= Address);
-  Size = End - Address;
-  memcpy(Buf, &Bytes[Address + BytesSkipped], Size);
-  return Size;
-}
-
-const uint8_t *StreamingMemoryObject::getPointer(uint64_t Address,
-                                                 uint64_t Size) const {
-  fetchToPos(Address + Size - 1);
-  return &Bytes[Address + BytesSkipped];
-}
-
-bool StreamingMemoryObject::dropLeadingBytes(size_t s) {
-  if (BytesRead < s) return true;
-  BytesSkipped = s;
-  BytesRead -= s;
-  return false;
-}
-
-void StreamingMemoryObject::setKnownObjectSize(size_t size) {
-  ObjectSize = size;
-  Bytes.reserve(size);
-  if (ObjectSize <= BytesRead)
-    EOFReached = true;
-}
-
-MemoryObject *getNonStreamedMemoryObject(const unsigned char *Start,
-                                         const unsigned char *End) {
-  return new RawMemoryObject(Start, End);
-}
-
-StreamingMemoryObject::StreamingMemoryObject(
-    std::unique_ptr<DataStreamer> Streamer)
-    : Bytes(kChunkSize), Streamer(std::move(Streamer)), BytesRead(0),
-      BytesSkipped(0), ObjectSize(0), EOFReached(false) {
-  BytesRead = this->Streamer->GetBytes(&Bytes[0], kChunkSize);
-}
-}
diff --git a/contrib/llvm/lib/Support/StringMap.cpp b/contrib/llvm/lib/Support/StringMap.cpp
index 7da9ccbd40c7..d2315966e32f 100644
--- a/contrib/llvm/lib/Support/StringMap.cpp
+++ b/contrib/llvm/lib/Support/StringMap.cpp
@@ -14,7 +14,9 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/Compiler.h"
+#include "llvm/Support/MathExtras.h"
 #include <cassert>
+
 using namespace llvm;
 
 /// Returns the number of buckets to allocate to ensure that the DenseMap can
@@ -63,7 +65,6 @@ void StringMapImpl::init(unsigned InitSize) {
   TheTable[NumBuckets] = (StringMapEntryBase*)2;
 }
 
-
 /// LookupBucketFor - Look up the bucket that the specified string should end
 /// up in.  If it already exists as a key in the map, the Item pointer for the
 /// specified bucket will be non-null.  Otherwise, it will be null.  In either
@@ -81,7 +82,7 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
 
   unsigned ProbeAmt = 1;
   int FirstTombstone = -1;
-  while (1) {
+  while (true) {
     StringMapEntryBase *BucketItem = TheTable[BucketNo];
     // If we found an empty bucket, this key isn't in the table yet, return it.
     if (LLVM_LIKELY(!BucketItem)) {
@@ -123,7 +124,6 @@ unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
   }
 }
 
-
 /// FindKey - Look up the bucket that contains the specified key. If it exists
 /// in the map, return the bucket number of the key.  Otherwise return -1.
 /// This does not modify the map.
@@ -135,7 +135,7 @@ int StringMapImpl::FindKey(StringRef Key) const {
   unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1);
 
   unsigned ProbeAmt = 1;
-  while (1) {
+  while (true) {
     StringMapEntryBase *BucketItem = TheTable[BucketNo];
     // If we found an empty bucket, this key isn't in the table yet, return.
     if (LLVM_LIKELY(!BucketItem))
@@ -191,8 +191,6 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
   return Result;
 }
 
-
-
 /// RehashTable - Grow the table, redistributing values into the buckets with
 /// the appropriate mod-of-hashtable-size.
 unsigned StringMapImpl::RehashTable(unsigned BucketNo) {
diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp
index 8a9da5edca84..d81250e48dde 100644
--- a/contrib/llvm/lib/Support/StringRef.cpp
+++ b/contrib/llvm/lib/Support/StringRef.cpp
@@ -69,6 +69,11 @@ bool StringRef::endswith_lower(StringRef Suffix) const {
       ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
 }
 
+size_t StringRef::find_lower(char C, size_t From) const {
+  char L = ascii_tolower(C);
+  return find_if([L](char D) { return ascii_tolower(D) == L; }, From);
+}
+
 /// compare_numeric - Compare strings, handle embedded numbers.
 int StringRef::compare_numeric(StringRef RHS) const {
   for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
@@ -143,16 +148,20 @@ size_t StringRef::find(StringRef Str, size_t From) const {
   if (From > Length)
     return npos;
 
+  const char *Start = Data + From;
+  size_t Size = Length - From;
+
   const char *Needle = Str.data();
   size_t N = Str.size();
   if (N == 0)
     return From;
-
-  size_t Size = Length - From;
   if (Size < N)
     return npos;
+  if (N == 1) {
+    const char *Ptr = (const char *)::memchr(Start, Needle[0], Size);
+    return Ptr == nullptr ? npos : Ptr - Data;
+  }
 
-  const char *Start = Data + From;
   const char *Stop = Start + (Size - N + 1);
 
   // For short haystacks or unsupported needles fall back to the naive algorithm
@@ -172,16 +181,40 @@ size_t StringRef::find(StringRef Str, size_t From) const {
     BadCharSkip[(uint8_t)Str[i]] = N-1-i;
 
   do {
-    if (std::memcmp(Start, Needle, N) == 0)
-      return Start - Data;
+    uint8_t Last = Start[N - 1];
+    if (LLVM_UNLIKELY(Last == (uint8_t)Needle[N - 1]))
+      if (std::memcmp(Start, Needle, N - 1) == 0)
+        return Start - Data;
 
     // Otherwise skip the appropriate number of bytes.
-    Start += BadCharSkip[(uint8_t)Start[N-1]];
+    Start += BadCharSkip[Last];
   } while (Start < Stop);
 
   return npos;
 }
 
+size_t StringRef::find_lower(StringRef Str, size_t From) const {
+  StringRef This = substr(From);
+  while (This.size() >= Str.size()) {
+    if (This.startswith_lower(Str))
+      return From;
+    This = This.drop_front();
+    ++From;
+  }
+  return npos;
+}
+
+size_t StringRef::rfind_lower(char C, size_t From) const {
+  From = std::min(From, Length);
+  size_t i = From;
+  while (i != 0) {
+    --i;
+    if (ascii_tolower(Data[i]) == ascii_tolower(C))
+      return i;
+  }
+  return npos;
+}
+
 /// rfind - Search for the last string \arg Str in the string.
 ///
 /// \return - The index of the last occurrence of \arg Str, or npos if not
@@ -198,6 +231,18 @@ size_t StringRef::rfind(StringRef Str) const {
   return npos;
 }
 
+size_t StringRef::rfind_lower(StringRef Str) const {
+  size_t N = Str.size();
+  if (N > Length)
+    return npos;
+  for (size_t i = Length - N + 1, e = 0; i != e;) {
+    --i;
+    if (substr(i, N).equals_lower(Str))
+      return i;
+  }
+  return npos;
+}
+
 /// find_first_of - Find the first character in the string that is in \arg
 /// Chars, or npos if not found.
 ///
@@ -351,6 +396,9 @@ size_t StringRef::count(StringRef Str) const {
 }
 
 static unsigned GetAutoSenseRadix(StringRef &Str) {
+  if (Str.empty())
+    return 10;
+
   if (Str.startswith("0x") || Str.startswith("0X")) {
     Str = Str.substr(2);
     return 16;
@@ -366,17 +414,16 @@ static unsigned GetAutoSenseRadix(StringRef &Str) {
     return 8;
   }
 
-  if (Str.startswith("0"))
+  if (Str[0] == '0' && Str.size() > 1 && ascii_isdigit(Str[1])) {
+    Str = Str.substr(1);
     return 8;
-  
+  }
+
   return 10;
 }
 
-
-/// GetAsUnsignedInteger - Workhorse method that converts a integer character
-/// sequence of radix up to 36 to an unsigned long long value.
-bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
-                                unsigned long long &Result) {
+bool llvm::consumeUnsignedInteger(StringRef &Str, unsigned Radix,
+                                  unsigned long long &Result) {
   // Autosense radix if not specified.
   if (Radix == 0)
     Radix = GetAutoSenseRadix(Str);
@@ -385,44 +432,51 @@ bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
   if (Str.empty()) return true;
 
   // Parse all the bytes of the string given this radix.  Watch for overflow.
+  StringRef Str2 = Str;
   Result = 0;
-  while (!Str.empty()) {
+  while (!Str2.empty()) {
     unsigned CharVal;
-    if (Str[0] >= '0' && Str[0] <= '9')
-      CharVal = Str[0]-'0';
-    else if (Str[0] >= 'a' && Str[0] <= 'z')
-      CharVal = Str[0]-'a'+10;
-    else if (Str[0] >= 'A' && Str[0] <= 'Z')
-      CharVal = Str[0]-'A'+10;
+    if (Str2[0] >= '0' && Str2[0] <= '9')
+      CharVal = Str2[0] - '0';
+    else if (Str2[0] >= 'a' && Str2[0] <= 'z')
+      CharVal = Str2[0] - 'a' + 10;
+    else if (Str2[0] >= 'A' && Str2[0] <= 'Z')
+      CharVal = Str2[0] - 'A' + 10;
     else
-      return true;
+      break;
 
-    // If the parsed value is larger than the integer radix, the string is
-    // invalid.
+    // If the parsed value is larger than the integer radix, we cannot
+    // consume any more characters.
     if (CharVal >= Radix)
-      return true;
+      break;
 
     // Add in this character.
     unsigned long long PrevResult = Result;
-    Result = Result*Radix+CharVal;
+    Result = Result * Radix + CharVal;
 
     // Check for overflow by shifting back and seeing if bits were lost.
-    if (Result/Radix < PrevResult)
+    if (Result / Radix < PrevResult)
       return true;
 
-    Str = Str.substr(1);
+    Str2 = Str2.substr(1);
   }
 
+  // We consider the operation a failure if no characters were consumed
+  // successfully.
+  if (Str.size() == Str2.size())
+    return true;
+
+  Str = Str2;
   return false;
 }
 
-bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
-                              long long &Result) {
+bool llvm::consumeSignedInteger(StringRef &Str, unsigned Radix,
+                                long long &Result) {
   unsigned long long ULLVal;
 
   // Handle positive strings first.
   if (Str.empty() || Str.front() != '-') {
-    if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
+    if (consumeUnsignedInteger(Str, Radix, ULLVal) ||
         // Check for value so large it overflows a signed value.
         (long long)ULLVal < 0)
       return true;
@@ -431,17 +485,41 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
   }
 
   // Get the positive part of the value.
-  if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
+  StringRef Str2 = Str.drop_front(1);
+  if (consumeUnsignedInteger(Str2, Radix, ULLVal) ||
       // Reject values so large they'd overflow as negative signed, but allow
       // "-0".  This negates the unsigned so that the negative isn't undefined
       // on signed overflow.
       (long long)-ULLVal > 0)
     return true;
 
+  Str = Str2;
   Result = -ULLVal;
   return false;
 }
 
+/// GetAsUnsignedInteger - Workhorse method that converts a integer character
+/// sequence of radix up to 36 to an unsigned long long value.
+bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
+                                unsigned long long &Result) {
+  if (consumeUnsignedInteger(Str, Radix, Result))
+    return true;
+
+  // For getAsUnsignedInteger, we require the whole string to be consumed or
+  // else we consider it a failure.
+  return !Str.empty();
+}
+
+bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
+                              long long &Result) {
+  if (consumeSignedInteger(Str, Radix, Result))
+    return true;
+
+  // For getAsSignedInteger, we require the whole string to be consumed or else
+  // we consider it a failure.
+  return !Str.empty();
+}
+
 bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
   StringRef Str = *this;
 
diff --git a/contrib/llvm/lib/Support/StringSaver.cpp b/contrib/llvm/lib/Support/StringSaver.cpp
index bbc1fd276266..335fce3a7bbd 100644
--- a/contrib/llvm/lib/Support/StringSaver.cpp
+++ b/contrib/llvm/lib/Support/StringSaver.cpp
@@ -11,9 +11,9 @@
 
 using namespace llvm;
 
-const char *StringSaver::save(StringRef S) {
+StringRef StringSaver::save(StringRef S) {
   char *P = Alloc.Allocate<char>(S.size() + 1);
   memcpy(P, S.data(), S.size());
   P[S.size()] = '\0';
-  return P;
+  return StringRef(P, S.size());
 }
diff --git a/contrib/llvm/lib/Support/TargetParser.cpp b/contrib/llvm/lib/Support/TargetParser.cpp
index c3f86130b68c..42fab671a251 100644
--- a/contrib/llvm/lib/Support/TargetParser.cpp
+++ b/contrib/llvm/lib/Support/TargetParser.cpp
@@ -52,7 +52,7 @@ static const struct {
 // of the triples and are not conforming with their official names.
 // Check to see if the expectation should be changed.
 // FIXME: TableGen this.
-static const struct {
+template <typename T> struct ArchNames {
   const char *NameCStr;
   size_t NameLength;
   const char *CPUAttrCStr;
@@ -61,7 +61,7 @@ static const struct {
   size_t SubArchLength;
   unsigned DefaultFPU;
   unsigned ArchBaseExtensions;
-  ARM::ArchKind ID;
+  T ID;
   ARMBuildAttrs::CPUArch ArchAttr; // Arch ID in build attributes.
 
   StringRef getName() const { return StringRef(NameCStr, NameLength); }
@@ -71,18 +71,22 @@ static const struct {
 
   // Sub-Arch name.
   StringRef getSubArch() const { return StringRef(SubArchCStr, SubArchLength); }
-} ARCHNames[] = {
+};
+ArchNames<ARM::ArchKind> ARCHNames[] = {
 #define ARM_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)       \
   {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH,       \
    sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ID, ARCH_ATTR},
 #include "llvm/Support/ARMTargetParser.def"
-},AArch64ARCHNames[] = {
-#define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)       \
-  {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH,       \
-   sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, ID, ARCH_ATTR},
-#include "llvm/Support/AArch64TargetParser.def"
 };
 
+ArchNames<AArch64::ArchKind> AArch64ARCHNames[] = {
+ #define AARCH64_ARCH(NAME, ID, CPU_ATTR, SUB_ARCH, ARCH_ATTR, ARCH_FPU, ARCH_BASE_EXT)       \
+   {NAME, sizeof(NAME) - 1, CPU_ATTR, sizeof(CPU_ATTR) - 1, SUB_ARCH,       \
+    sizeof(SUB_ARCH) - 1, ARCH_FPU, ARCH_BASE_EXT, AArch64::ArchKind::ID, ARCH_ATTR},
+ #include "llvm/Support/AArch64TargetParser.def"
+ };
+
+
 // List of Arch Extension names.
 // FIXME: TableGen this.
 static const struct {
@@ -122,24 +126,27 @@ static const struct {
 // When finding the Arch for a CPU, first-found prevails. Sort them accordingly.
 // When this becomes table-generated, we'd probably need two tables.
 // FIXME: TableGen this.
-static const struct {
+template <typename T> struct CpuNames {
   const char *NameCStr;
   size_t NameLength;
-  ARM::ArchKind ArchID;
+  T ArchID;
   bool Default; // is $Name the default CPU for $ArchID ?
   unsigned DefaultExtensions;
 
   StringRef getName() const { return StringRef(NameCStr, NameLength); }
-} CPUNames[] = {
+};
+CpuNames<ARM::ArchKind> CPUNames[] = {
 #define ARM_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
   { NAME, sizeof(NAME) - 1, ID, IS_DEFAULT, DEFAULT_EXT },
 #include "llvm/Support/ARMTargetParser.def"
-},AArch64CPUNames[] = {
-#define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-  { NAME, sizeof(NAME) - 1, ID, IS_DEFAULT, DEFAULT_EXT },
-#include "llvm/Support/AArch64TargetParser.def"
 };
 
+CpuNames<AArch64::ArchKind> AArch64CPUNames[] = {
+ #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
+   { NAME, sizeof(NAME) - 1, AArch64::ArchKind::ID, IS_DEFAULT, DEFAULT_EXT },
+ #include "llvm/Support/AArch64TargetParser.def"
+ };
+
 } // namespace
 
 // ======================================================= //
@@ -193,7 +200,7 @@ unsigned llvm::ARM::getDefaultExtensions(StringRef CPU, unsigned ArchKind) {
 }
 
 bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
-                                 std::vector<const char *> &Features) {
+                                 std::vector<StringRef> &Features) {
 
   if (HWDivKind == ARM::AEK_INVALID)
     return false;
@@ -212,7 +219,7 @@ bool llvm::ARM::getHWDivFeatures(unsigned HWDivKind,
 }
 
 bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
-                                     std::vector<const char *> &Features) {
+                                     std::vector<StringRef> &Features) {
 
   if (Extensions == ARM::AEK_INVALID)
     return false;
@@ -231,7 +238,7 @@ bool llvm::ARM::getExtensionFeatures(unsigned Extensions,
 }
 
 bool llvm::ARM::getFPUFeatures(unsigned FPUKind,
-                               std::vector<const char *> &Features) {
+                               std::vector<StringRef> &Features) {
 
   if (FPUKind >= ARM::FK_LAST || FPUKind == ARM::FK_INVALID)
     return false;
@@ -344,20 +351,20 @@ StringRef llvm::ARM::getArchExtName(unsigned ArchExtKind) {
   return StringRef();
 }
 
-const char *llvm::ARM::getArchExtFeature(StringRef ArchExt) {
+StringRef llvm::ARM::getArchExtFeature(StringRef ArchExt) {
   if (ArchExt.startswith("no")) {
     StringRef ArchExtBase(ArchExt.substr(2));
     for (const auto AE : ARCHExtNames) {
       if (AE.NegFeature && ArchExtBase == AE.getName())
-        return AE.NegFeature;
+        return StringRef(AE.NegFeature);
     }
   }
   for (const auto AE : ARCHExtNames) {
     if (AE.Feature && ArchExt == AE.getName())
-      return AE.Feature;
+      return StringRef(AE.Feature);
   }
 
-  return nullptr;
+  return StringRef();
 }
 
 StringRef llvm::ARM::getHWDivName(unsigned HWDivKind) {
@@ -416,13 +423,13 @@ unsigned llvm::AArch64::getDefaultExtensions(StringRef CPU, unsigned ArchKind) {
 
   return StringSwitch<unsigned>(CPU)
 #define AARCH64_CPU_NAME(NAME, ID, DEFAULT_FPU, IS_DEFAULT, DEFAULT_EXT) \
-    .Case(NAME, AArch64ARCHNames[ID].ArchBaseExtensions | DEFAULT_EXT)
+    .Case(NAME, DEFAULT_EXT)
 #include "llvm/Support/AArch64TargetParser.def"
     .Default(AArch64::AEK_INVALID);
 }
 
 bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
-                                     std::vector<const char *> &Features) {
+                                     std::vector<StringRef> &Features) {
 
   if (Extensions == AArch64::AEK_INVALID)
     return false;
@@ -446,81 +453,77 @@ bool llvm::AArch64::getExtensionFeatures(unsigned Extensions,
 }
 
 bool llvm::AArch64::getFPUFeatures(unsigned FPUKind,
-                               std::vector<const char *> &Features) {
+                               std::vector<StringRef> &Features) {
   return ARM::getFPUFeatures(FPUKind, Features);
 }
 
 bool llvm::AArch64::getArchFeatures(unsigned ArchKind,
-                                     std::vector<const char *> &Features) {
-  if (ArchKind == ARM::AK_INVALID || ArchKind >= ARM::AK_LAST)
-    return false;
-
-  if (ArchKind == ARM::AK_ARMV8_1A)
+                                     std::vector<StringRef> &Features) {
+  if (ArchKind == static_cast<unsigned>(AArch64::ArchKind::AK_ARMV8_1A))
     Features.push_back("+v8.1a");
-  if (ArchKind == ARM::AK_ARMV8_2A)
+  if (ArchKind == static_cast<unsigned>(AArch64::ArchKind::AK_ARMV8_2A))
     Features.push_back("+v8.2a");
 
-  return true;
+  return ArchKind > static_cast<unsigned>(AArch64::ArchKind::AK_INVALID) &&
+         ArchKind < static_cast<unsigned>(AArch64::ArchKind::AK_LAST);
 }
 
 StringRef llvm::AArch64::getArchName(unsigned ArchKind) {
-  for (const auto &AI : AArch64ARCHNames)
-    if (AI.ID == ArchKind)
-      return AI.getName();
-  return StringRef();
+  if (ArchKind >= static_cast<unsigned>(AArch64::ArchKind::AK_LAST))
+    return StringRef();
+  return AArch64ARCHNames[ArchKind].getName();
 }
 
 StringRef llvm::AArch64::getCPUAttr(unsigned ArchKind) {
-  for (const auto &AI : AArch64ARCHNames)
-    if (AI.ID == ArchKind)
-      return AI.getCPUAttr();
-  return StringRef();
+  if (ArchKind == static_cast<unsigned>(AArch64::ArchKind::AK_INVALID) ||
+      ArchKind >= static_cast<unsigned>(AArch64::ArchKind::AK_LAST))
+    return StringRef();
+  return AArch64ARCHNames[ArchKind].getCPUAttr();
 }
 
 StringRef llvm::AArch64::getSubArch(unsigned ArchKind) {
-  for (const auto &AI : AArch64ARCHNames)
-    if (AI.ID == ArchKind)
-      return AI.getSubArch();
-  return StringRef();
+  if (ArchKind == static_cast<unsigned>(AArch64::ArchKind::AK_INVALID) ||
+      ArchKind >= static_cast<unsigned>(AArch64::ArchKind::AK_LAST))
+    return StringRef();
+  return AArch64ARCHNames[ArchKind].getSubArch();
 }
 
 unsigned llvm::AArch64::getArchAttr(unsigned ArchKind) {
-  for (const auto &AI : AArch64ARCHNames)
-    if (AI.ID == ArchKind)
-      return AI.ArchAttr;
-  return ARMBuildAttrs::CPUArch::v8_A;
+  if (ArchKind >= static_cast<unsigned>(AArch64::ArchKind::AK_LAST))
+    return ARMBuildAttrs::CPUArch::v8_A;
+  return AArch64ARCHNames[ArchKind].ArchAttr;
 }
 
-StringRef llvm::AArch64::getArchExtName(unsigned AArchExtKind) {
+StringRef llvm::AArch64::getArchExtName(unsigned ArchExtKind) {
   for (const auto &AE : AArch64ARCHExtNames)
-    if (AArchExtKind == AE.ID)
+    if (ArchExtKind == AE.ID)
       return AE.getName();
   return StringRef();
 }
 
-const char *llvm::AArch64::getArchExtFeature(StringRef ArchExt) {
+StringRef llvm::AArch64::getArchExtFeature(StringRef ArchExt) {
   if (ArchExt.startswith("no")) {
     StringRef ArchExtBase(ArchExt.substr(2));
     for (const auto &AE : AArch64ARCHExtNames) {
       if (AE.NegFeature && ArchExtBase == AE.getName())
-        return AE.NegFeature;
+        return StringRef(AE.NegFeature);
     }
   }
 
   for (const auto &AE : AArch64ARCHExtNames)
     if (AE.Feature && ArchExt == AE.getName())
-      return AE.Feature;
-  return nullptr;
+      return StringRef(AE.Feature);
+  return StringRef();
 }
 
 StringRef llvm::AArch64::getDefaultCPU(StringRef Arch) {
   unsigned AK = parseArch(Arch);
-  if (AK == ARM::AK_INVALID)
+  if (AK == static_cast<unsigned>(AArch64::ArchKind::AK_INVALID))
     return StringRef();
 
   // Look for multiple AKs to find the default for pair AK+Name.
   for (const auto &CPU : AArch64CPUNames)
-    if (CPU.ArchID == AK && CPU.Default)
+    if (static_cast<unsigned>(CPU.ArchID) == AK && CPU.Default)
       return CPU.getName();
 
   // If we can't find a default then target the architecture instead
@@ -575,6 +578,7 @@ static StringRef getArchSynonym(StringRef Arch) {
       .Cases("v8", "v8a", "aarch64", "arm64", "v8-a")
       .Case("v8.1a", "v8.1-a")
       .Case("v8.2a", "v8.2-a")
+      .Case("v8r", "v8-r")
       .Case("v8m.base", "v8-m.base")
       .Case("v8m.main", "v8-m.main")
       .Default(Arch);
@@ -685,7 +689,7 @@ unsigned llvm::ARM::parseArchISA(StringRef Arch) {
       .StartsWith("arm64", ARM::IK_AARCH64)
       .StartsWith("thumb", ARM::IK_THUMB)
       .StartsWith("arm", ARM::IK_ARM)
-      .Default(ARM::EK_INVALID);
+      .Default(ARM::IK_INVALID);
 }
 
 // Little/Big endian
@@ -718,6 +722,7 @@ unsigned llvm::ARM::parseArchProfile(StringRef Arch) {
   case ARM::AK_ARMV8MBaseline:
     return ARM::PK_M;
   case ARM::AK_ARMV7R:
+  case ARM::AK_ARMV8R:
     return ARM::PK_R;
   case ARM::AK_ARMV7A:
   case ARM::AK_ARMV7K:
@@ -765,6 +770,7 @@ unsigned llvm::ARM::parseArchVersion(StringRef Arch) {
   case ARM::AK_ARMV8A:
   case ARM::AK_ARMV8_1A:
   case ARM::AK_ARMV8_2A:
+  case ARM::AK_ARMV8R:
   case ARM::AK_ARMV8MBaseline:
   case ARM::AK_ARMV8MMainline:
     return 8;
@@ -784,14 +790,14 @@ unsigned llvm::AArch64::parseFPU(StringRef FPU) {
 unsigned llvm::AArch64::parseArch(StringRef Arch) {
   Arch = getCanonicalArchName(Arch);
   if (checkArchVersion(Arch) < 8)
-    return ARM::AK_INVALID;
+    return static_cast<unsigned>(AArch64::ArchKind::AK_INVALID);
 
   StringRef Syn = getArchSynonym(Arch);
   for (const auto A : AArch64ARCHNames) {
     if (A.getName().endswith(Syn))
-      return A.ID;
+      return static_cast<unsigned>(A.ID);
   }
-  return ARM::AK_INVALID;
+  return static_cast<unsigned>(AArch64::ArchKind::AK_INVALID);
 }
 
 unsigned llvm::AArch64::parseArchExt(StringRef ArchExt) {
@@ -805,9 +811,9 @@ unsigned llvm::AArch64::parseArchExt(StringRef ArchExt) {
 unsigned llvm::AArch64::parseCPUArch(StringRef CPU) {
   for (const auto C : AArch64CPUNames) {
     if (CPU == C.getName())
-      return C.ArchID;
+      return static_cast<unsigned>(C.ArchID);
   }
-  return ARM::AK_INVALID;
+  return static_cast<unsigned>(AArch64::ArchKind::AK_INVALID);
 }
 
 // ARM, Thumb, AArch64
diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp
index 02a6d334b911..bed9ed64f802 100644
--- a/contrib/llvm/lib/Support/TargetRegistry.cpp
+++ b/contrib/llvm/lib/Support/TargetRegistry.cpp
@@ -30,8 +30,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &ArchName,
   // name, because it might be a backend that has no mapping to a target triple.
   const Target *TheTarget = nullptr;
   if (!ArchName.empty()) {
-    auto I =
-        std::find_if(targets().begin(), targets().end(),
+    auto I = find_if(targets(),
                      [&](const Target &T) { return ArchName == T.getName(); });
 
     if (I == targets().end()) {
@@ -70,7 +69,7 @@ const Target *TargetRegistry::lookupTarget(const std::string &TT,
   }
   Triple::ArchType Arch = Triple(TT).getArch();
   auto ArchMatch = [&](const Target &T) { return T.ArchMatchFn(Arch); };
-  auto I = std::find_if(targets().begin(), targets().end(), ArchMatch);
+  auto I = find_if(targets(), ArchMatch);
 
   if (I == targets().end()) {
     Error = "No available targets are compatible with this triple.";
diff --git a/contrib/llvm/lib/Support/Threading.cpp b/contrib/llvm/lib/Support/Threading.cpp
index e8f5622d0e5c..760f9e2c388b 100644
--- a/contrib/llvm/lib/Support/Threading.cpp
+++ b/contrib/llvm/lib/Support/Threading.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Support/Threading.h"
 #include "llvm/Config/config.h"
 #include "llvm/Support/Atomic.h"
+#include "llvm/Support/Host.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/thread.h"
 #include <cassert>
@@ -116,3 +117,13 @@ void llvm::llvm_execute_on_thread(void (*Fn)(void*), void *UserData,
 }
 
 #endif
+
+unsigned llvm::heavyweight_hardware_concurrency() {
+#if !LLVM_ENABLE_THREADS
+  return 1;
+#endif
+  int NumPhysical = sys::getHostNumPhysicalCores();
+  if (NumPhysical == -1)
+    return thread::hardware_concurrency();
+  return NumPhysical;
+}
diff --git a/contrib/llvm/lib/Support/TimeValue.cpp b/contrib/llvm/lib/Support/TimeValue.cpp
deleted file mode 100644
index 94a4c011693c..000000000000
--- a/contrib/llvm/lib/Support/TimeValue.cpp
+++ /dev/null
@@ -1,56 +0,0 @@
-//===-- TimeValue.cpp - Implement OS TimeValue Concept ----------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//  This file implements the operating system TimeValue concept.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/TimeValue.h"
-#include "llvm/Config/config.h"
-
-namespace llvm {
-
-using namespace sys;
-
-const TimeValue::SecondsType
-  TimeValue::PosixZeroTimeSeconds = -946684800;
-const TimeValue::SecondsType
-  TimeValue::Win32ZeroTimeSeconds = -12591158400ULL;
-
-void TimeValue::normalize() {
-  if ( nanos_ >= NANOSECONDS_PER_SECOND ) {
-    do {
-      seconds_++;
-      nanos_ -= NANOSECONDS_PER_SECOND;
-    } while ( nanos_ >= NANOSECONDS_PER_SECOND );
-  } else if (nanos_ <= -NANOSECONDS_PER_SECOND ) {
-    do {
-      seconds_--;
-      nanos_ += NANOSECONDS_PER_SECOND;
-    } while (nanos_ <= -NANOSECONDS_PER_SECOND);
-  }
-
-  if (seconds_ >= 1 && nanos_ < 0) {
-    seconds_--;
-    nanos_ += NANOSECONDS_PER_SECOND;
-  } else if (seconds_ < 0 && nanos_ > 0) {
-    seconds_++;
-    nanos_ -= NANOSECONDS_PER_SECOND;
-  }
-}
-
-} // namespace llvm
-
-/// Include the platform-specific portion of TimeValue class
-#ifdef LLVM_ON_UNIX
-#include "Unix/TimeValue.inc"
-#endif
-#ifdef LLVM_ON_WIN32
-#include "Windows/TimeValue.inc"
-#endif
diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp
index 49bd39e14e28..fbd73d0b6b3b 100644
--- a/contrib/llvm/lib/Support/Timer.cpp
+++ b/contrib/llvm/lib/Support/Timer.cpp
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Interval Timing implementation.
+/// \file Interval Timing implementation.
 //
 //===----------------------------------------------------------------------===//
 
@@ -21,16 +21,16 @@
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
 using namespace llvm;
 
-// getLibSupportInfoOutputFilename - This ugly hack is brought to you courtesy
-// of constructor/destructor ordering being unspecified by C++.  Basically the
-// problem is that a Statistic object gets destroyed, which ends up calling
-// 'GetLibSupportInfoOutputFile()' (below), which calls this function.
-// LibSupportInfoOutputFilename used to be a global variable, but sometimes it
-// would get destroyed before the Statistic, causing havoc to ensue.  We "fix"
-// this by creating the string the first time it is needed and never destroying
-// it.
+// This ugly hack is brought to you courtesy of constructor/destructor ordering
+// being unspecified by C++.  Basically the problem is that a Statistic object
+// gets destroyed, which ends up calling 'GetLibSupportInfoOutputFile()'
+// (below), which calls this function.  LibSupportInfoOutputFilename used to be
+// a global variable, but sometimes it would get destroyed before the Statistic,
+// causing havoc to ensue.  We "fix" this by creating the string the first time
+// it is needed and never destroying it.
 static ManagedStatic<std::string> LibSupportInfoOutputFilename;
 static std::string &getLibSupportInfoOutputFilename() {
   return *LibSupportInfoOutputFilename;
@@ -50,7 +50,6 @@ namespace {
                    cl::Hidden, cl::location(getLibSupportInfoOutputFilename()));
 }
 
-// Return a file stream to print our output on.
 std::unique_ptr<raw_fd_ostream> llvm::CreateInfoOutputFile() {
   const std::string &OutputFilename = getLibSupportInfoOutputFilename();
   if (OutputFilename.empty())
@@ -83,7 +82,7 @@ static TimerGroup *getDefaultTimerGroup() {
   sys::SmartScopedLock<true> Lock(*TimerLock);
   tmp = DefaultTimerGroup;
   if (!tmp) {
-    tmp = new TimerGroup("Miscellaneous Ungrouped Timers");
+    tmp = new TimerGroup("misc", "Miscellaneous Ungrouped Timers");
     sys::MemoryFence();
     DefaultTimerGroup = tmp;
   }
@@ -95,13 +94,14 @@ static TimerGroup *getDefaultTimerGroup() {
 // Timer Implementation
 //===----------------------------------------------------------------------===//
 
-void Timer::init(StringRef N) {
-  init(N, *getDefaultTimerGroup());
+void Timer::init(StringRef Name, StringRef Description) {
+  init(Name, Description, *getDefaultTimerGroup());
 }
 
-void Timer::init(StringRef N, TimerGroup &tg) {
+void Timer::init(StringRef Name, StringRef Description, TimerGroup &tg) {
   assert(!TG && "Timer already initialized");
-  Name.assign(N.begin(), N.end());
+  this->Name.assign(Name.begin(), Name.end());
+  this->Description.assign(Description.begin(), Description.end());
   Running = Triggered = false;
   TG = &tg;
   TG->addTimer(*this);
@@ -118,8 +118,10 @@ static inline size_t getMemUsage() {
 }
 
 TimeRecord TimeRecord::getCurrentTime(bool Start) {
+  using Seconds = std::chrono::duration<double, std::ratio<1>>;
   TimeRecord Result;
-  sys::TimeValue now(0,0), user(0,0), sys(0,0);
+  sys::TimePoint<> now;
+  std::chrono::nanoseconds user, sys;
 
   if (Start) {
     Result.MemUsed = getMemUsage();
@@ -129,9 +131,9 @@ TimeRecord TimeRecord::getCurrentTime(bool Start) {
     Result.MemUsed = getMemUsage();
   }
 
-  Result.WallTime   =  now.seconds() +  now.microseconds() / 1000000.0;
-  Result.UserTime   = user.seconds() + user.microseconds() / 1000000.0;
-  Result.SystemTime =  sys.seconds() +  sys.microseconds() / 1000000.0;
+  Result.WallTime = Seconds(now.time_since_epoch()).count();
+  Result.UserTime = Seconds(user).count();
+  Result.SystemTime = Seconds(sys).count();
   return Result;
 }
 
@@ -193,54 +195,44 @@ public:
       delete I->second.first;
   }
 
-  Timer &get(StringRef Name, StringRef GroupName) {
+  Timer &get(StringRef Name, StringRef Description, StringRef GroupName,
+             StringRef GroupDescription) {
     sys::SmartScopedLock<true> L(*TimerLock);
 
     std::pair<TimerGroup*, Name2TimerMap> &GroupEntry = Map[GroupName];
 
     if (!GroupEntry.first)
-      GroupEntry.first = new TimerGroup(GroupName);
+      GroupEntry.first = new TimerGroup(GroupName, GroupDescription);
 
     Timer &T = GroupEntry.second[Name];
     if (!T.isInitialized())
-      T.init(Name, *GroupEntry.first);
+      T.init(Name, Description, *GroupEntry.first);
     return T;
   }
 };
 
 }
 
-static ManagedStatic<Name2TimerMap> NamedTimers;
 static ManagedStatic<Name2PairMap> NamedGroupedTimers;
 
-static Timer &getNamedRegionTimer(StringRef Name) {
-  sys::SmartScopedLock<true> L(*TimerLock);
-
-  Timer &T = (*NamedTimers)[Name];
-  if (!T.isInitialized())
-    T.init(Name);
-  return T;
-}
-
-NamedRegionTimer::NamedRegionTimer(StringRef Name,
-                                   bool Enabled)
-  : TimeRegion(!Enabled ? nullptr : &getNamedRegionTimer(Name)) {}
-
-NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef GroupName,
-                                   bool Enabled)
-  : TimeRegion(!Enabled ? nullptr : &NamedGroupedTimers->get(Name, GroupName)){}
+NamedRegionTimer::NamedRegionTimer(StringRef Name, StringRef Description,
+                                   StringRef GroupName,
+                                   StringRef GroupDescription, bool Enabled)
+  : TimeRegion(!Enabled ? nullptr
+                 : &NamedGroupedTimers->get(Name, Description, GroupName,
+                                            GroupDescription)) {}
 
 //===----------------------------------------------------------------------===//
 //   TimerGroup Implementation
 //===----------------------------------------------------------------------===//
 
-/// TimerGroupList - This is the global list of TimerGroups, maintained by the
-/// TimerGroup ctor/dtor and is protected by the TimerLock lock.
+/// This is the global list of TimerGroups, maintained by the TimerGroup
+/// ctor/dtor and is protected by the TimerLock lock.
 static TimerGroup *TimerGroupList = nullptr;
 
-TimerGroup::TimerGroup(StringRef name)
-  : Name(name.begin(), name.end()), FirstTimer(nullptr) {
-
+TimerGroup::TimerGroup(StringRef Name, StringRef Description)
+  : Name(Name.begin(), Name.end()),
+    Description(Description.begin(), Description.end()) {
   // Add the group to TimerGroupList.
   sys::SmartScopedLock<true> L(*TimerLock);
   if (TimerGroupList)
@@ -269,7 +261,7 @@ void TimerGroup::removeTimer(Timer &T) {
 
   // If the timer was started, move its data to TimersToPrint.
   if (T.hasTriggered())
-    TimersToPrint.emplace_back(T.Time, T.Name);
+    TimersToPrint.emplace_back(T.Time, T.Name, T.Description);
 
   T.TG = nullptr;
 
@@ -303,15 +295,15 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   std::sort(TimersToPrint.begin(), TimersToPrint.end());
 
   TimeRecord Total;
-  for (auto &RecordNamePair : TimersToPrint)
-    Total += RecordNamePair.first;
+  for (const PrintRecord &Record : TimersToPrint)
+    Total += Record.Time;
 
   // Print out timing header.
   OS << "===" << std::string(73, '-') << "===\n";
   // Figure out how many spaces to indent TimerGroup name.
-  unsigned Padding = (80-Name.length())/2;
+  unsigned Padding = (80-Description.length())/2;
   if (Padding > 80) Padding = 0;         // Don't allow "negative" numbers
-  OS.indent(Padding) << Name << '\n';
+  OS.indent(Padding) << Description << '\n';
   OS << "===" << std::string(73, '-') << "===\n";
 
   // If this is not an collection of ungrouped times, print the total time.
@@ -334,10 +326,10 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   OS << "  --- Name ---\n";
 
   // Loop through all of the timing data, printing it out.
-  for (unsigned i = 0, e = TimersToPrint.size(); i != e; ++i) {
-    const std::pair<TimeRecord, std::string> &Entry = TimersToPrint[e-i-1];
-    Entry.first.print(Total, OS);
-    OS << Entry.second << '\n';
+  for (const PrintRecord &Record : make_range(TimersToPrint.rbegin(),
+                                              TimersToPrint.rend())) {
+    Record.Time.print(Total, OS);
+    OS << Record.Description << '\n';
   }
 
   Total.print(Total, OS);
@@ -347,29 +339,66 @@ void TimerGroup::PrintQueuedTimers(raw_ostream &OS) {
   TimersToPrint.clear();
 }
 
-/// print - Print any started timers in this group and zero them.
-void TimerGroup::print(raw_ostream &OS) {
-  sys::SmartScopedLock<true> L(*TimerLock);
-
+void TimerGroup::prepareToPrintList() {
   // See if any of our timers were started, if so add them to TimersToPrint and
   // reset them.
   for (Timer *T = FirstTimer; T; T = T->Next) {
     if (!T->hasTriggered()) continue;
-    TimersToPrint.emplace_back(T->Time, T->Name);
-    
+    TimersToPrint.emplace_back(T->Time, T->Name, T->Description);
+
     // Clear out the time.
     T->clear();
   }
+}
+
+void TimerGroup::print(raw_ostream &OS) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+
+  prepareToPrintList();
 
   // If any timers were started, print the group.
   if (!TimersToPrint.empty())
     PrintQueuedTimers(OS);
 }
 
-/// printAll - This static method prints all timers and clears them all out.
 void TimerGroup::printAll(raw_ostream &OS) {
   sys::SmartScopedLock<true> L(*TimerLock);
 
   for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
     TG->print(OS);
 }
+
+void TimerGroup::printJSONValue(raw_ostream &OS, const PrintRecord &R,
+                                const char *suffix, double Value) {
+  assert(!yaml::needsQuotes(Name) && "TimerGroup name needs no quotes");
+  assert(!yaml::needsQuotes(R.Name) && "Timer name needs no quotes");
+  OS << "\t\"time." << Name << '.' << R.Name << suffix << "\": " << Value;
+}
+
+const char *TimerGroup::printJSONValues(raw_ostream &OS, const char *delim) {
+  prepareToPrintList();
+  for (const PrintRecord &R : TimersToPrint) {
+    OS << delim;
+    delim = ",\n";
+
+    const TimeRecord &T = R.Time;
+    printJSONValue(OS, R, ".wall", T.getWallTime());
+    OS << delim;
+    printJSONValue(OS, R, ".user", T.getUserTime());
+    OS << delim;
+    printJSONValue(OS, R, ".sys", T.getSystemTime());
+  }
+  TimersToPrint.clear();
+  return delim;
+}
+
+const char *TimerGroup::printAllJSONValues(raw_ostream &OS, const char *delim) {
+  sys::SmartScopedLock<true> L(*TimerLock);
+  for (TimerGroup *TG = TimerGroupList; TG; TG = TG->Next)
+    delim = TG->printJSONValues(OS, delim);
+  return delim;
+}
+
+void TimerGroup::ConstructTimerLists() {
+  (void)*NamedGroupedTimers;
+}
diff --git a/contrib/llvm/lib/Support/TrigramIndex.cpp b/contrib/llvm/lib/Support/TrigramIndex.cpp
new file mode 100644
index 000000000000..85ab5287566b
--- /dev/null
+++ b/contrib/llvm/lib/Support/TrigramIndex.cpp
@@ -0,0 +1,111 @@
+//===-- TrigramIndex.cpp - a heuristic for SpecialCaseList ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// TrigramIndex implements a heuristic for SpecialCaseList that allows to
+// filter out ~99% incoming queries when all regular expressions in the
+// SpecialCaseList are simple wildcards with '*' and '.'. If rules are more
+// complicated, the check is defeated and it will always pass the queries to a
+// full regex.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TrigramIndex.h"
+#include "llvm/ADT/SmallVector.h"
+
+#include <unordered_map>
+#include <set>
+#include <string>
+
+using namespace llvm;
+
+static const char RegexAdvancedMetachars[] = "()^$|+?[]\\{}";
+
+static bool isAdvancedMetachar(unsigned Char) {
+  return strchr(RegexAdvancedMetachars, Char) != nullptr;
+}
+
+void TrigramIndex::insert(std::string Regex) {
+  if (Defeated) return;
+  std::set<unsigned> Was;
+  unsigned Cnt = 0;
+  unsigned Tri = 0;
+  unsigned Len = 0;
+  bool Escaped = false;
+  for (unsigned Char : Regex) {
+    if (!Escaped) {
+      // Regular expressions allow escaping symbols by preceding it with '\'.
+      if (Char == '\\') {
+        Escaped = true;
+        continue;
+      }
+      if (isAdvancedMetachar(Char)) {
+        // This is a more complicated regex than we can handle here.
+        Defeated = true;
+        return;
+      }
+      if (Char == '.' || Char == '*') {
+        Tri = 0;
+        Len = 0;
+        continue;
+      }
+    }
+    if (Escaped && Char >= '1' && Char <= '9') {
+      Defeated = true;
+      return;
+    }
+    // We have already handled escaping and can reset the flag.
+    Escaped = false;
+    Tri = ((Tri << 8) + Char) & 0xFFFFFF;
+    Len++;
+    if (Len < 3)
+      continue;
+    // We don't want the index to grow too much for the popular trigrams,
+    // as they are weak signals. It's ok to still require them for the
+    // rules we have already processed. It's just a small additional
+    // computational cost.
+    if (Index[Tri].size() >= 4)
+      continue;
+    Cnt++;
+    if (!Was.count(Tri)) {
+      // Adding the current rule to the index.
+      Index[Tri].push_back(Counts.size());
+      Was.insert(Tri);
+    }
+  }
+  if (!Cnt) {
+    // This rule does not have remarkable trigrams to rely on.
+    // We have to always call the full regex chain.
+    Defeated = true;
+    return;
+  }
+  Counts.push_back(Cnt);
+}
+
+bool TrigramIndex::isDefinitelyOut(StringRef Query) const {
+  if (Defeated)
+    return false;
+  std::vector<unsigned> CurCounts(Counts.size());
+  unsigned Tri = 0;
+  for (size_t I = 0; I < Query.size(); I++) {
+    Tri = ((Tri << 8) + Query[I]) & 0xFFFFFF;
+    if (I < 2)
+      continue;
+    const auto &II = Index.find(Tri);
+    if (II == Index.end())
+      continue;
+    for (size_t J : II->second) {
+      CurCounts[J]++;
+      // If we have reached a desired limit, we have to look at the query
+      // more closely by running a full regex.
+      if (CurCounts[J] >= Counts[J])
+        return false;
+    }
+  }
+  return true;
+}
diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp
index 2bac2a310670..6783b40a125d 100644
--- a/contrib/llvm/lib/Support/Triple.cpp
+++ b/contrib/llvm/lib/Support/Triple.cpp
@@ -17,7 +17,7 @@
 #include <cstring>
 using namespace llvm;
 
-const char *Triple::getArchTypeName(ArchType Kind) {
+StringRef Triple::getArchTypeName(ArchType Kind) {
   switch (Kind) {
   case UnknownArch:    return "unknown";
 
@@ -39,11 +39,14 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   case ppc:            return "powerpc";
   case r600:           return "r600";
   case amdgcn:         return "amdgcn";
+  case riscv32:        return "riscv32";
+  case riscv64:        return "riscv64";
   case sparc:          return "sparc";
   case sparcv9:        return "sparcv9";
   case sparcel:        return "sparcel";
   case systemz:        return "s390x";
   case tce:            return "tce";
+  case tcele:          return "tcele";
   case thumb:          return "thumb";
   case thumbeb:        return "thumbeb";
   case x86:            return "i386";
@@ -71,10 +74,10 @@ const char *Triple::getArchTypeName(ArchType Kind) {
   llvm_unreachable("Invalid ArchType!");
 }
 
-const char *Triple::getArchTypePrefix(ArchType Kind) {
+StringRef Triple::getArchTypePrefix(ArchType Kind) {
   switch (Kind) {
   default:
-    return nullptr;
+    return StringRef();
 
   case aarch64:
   case aarch64_be:  return "aarch64";
@@ -134,10 +137,13 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
   case shave:       return "shave";
   case wasm32:
   case wasm64:      return "wasm";
+
+  case riscv32:
+  case riscv64:     return "riscv";
   }
 }
 
-const char *Triple::getVendorTypeName(VendorType Kind) {
+StringRef Triple::getVendorTypeName(VendorType Kind) {
   switch (Kind) {
   case UnknownVendor: return "unknown";
 
@@ -160,7 +166,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) {
   llvm_unreachable("Invalid VendorType!");
 }
 
-const char *Triple::getOSTypeName(OSType Kind) {
+StringRef Triple::getOSTypeName(OSType Kind) {
   switch (Kind) {
   case UnknownOS: return "unknown";
 
@@ -168,6 +174,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case Darwin: return "darwin";
   case DragonFly: return "dragonfly";
   case FreeBSD: return "freebsd";
+  case Fuchsia: return "fuchsia";
   case IOS: return "ios";
   case KFreeBSD: return "kfreebsd";
   case Linux: return "linux";
@@ -192,12 +199,13 @@ const char *Triple::getOSTypeName(OSType Kind) {
   case TvOS: return "tvos";
   case WatchOS: return "watchos";
   case Mesa3D: return "mesa3d";
+  case Contiki: return "contiki";
   }
 
   llvm_unreachable("Invalid OSType");
 }
 
-const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
+StringRef Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   switch (Kind) {
   case UnknownEnvironment: return "unknown";
   case GNU: return "gnu";
@@ -217,6 +225,7 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) {
   case Cygnus: return "cygnus";
   case AMDOpenCL: return "amdopencl";
   case CoreCLR: return "coreclr";
+  case OpenCL: return "opencl";
   }
 
   llvm_unreachable("Invalid EnvironmentType!");
@@ -258,12 +267,15 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
     .Case("ppc64le", ppc64le)
     .Case("r600", r600)
     .Case("amdgcn", amdgcn)
+    .Case("riscv32", riscv32)
+    .Case("riscv64", riscv64)
     .Case("hexagon", hexagon)
     .Case("sparc", sparc)
     .Case("sparcel", sparcel)
     .Case("sparcv9", sparcv9)
     .Case("systemz", systemz)
     .Case("tce", tce)
+    .Case("tcele", tcele)
     .Case("thumb", thumb)
     .Case("thumbeb", thumbeb)
     .Case("x86", x86)
@@ -373,12 +385,15 @@ static Triple::ArchType parseArch(StringRef ArchName) {
     .Case("mips64el", Triple::mips64el)
     .Case("r600", Triple::r600)
     .Case("amdgcn", Triple::amdgcn)
+    .Case("riscv32", Triple::riscv32)
+    .Case("riscv64", Triple::riscv64)
     .Case("hexagon", Triple::hexagon)
     .Cases("s390x", "systemz", Triple::systemz)
     .Case("sparc", Triple::sparc)
     .Case("sparcel", Triple::sparcel)
     .Cases("sparcv9", "sparc64", Triple::sparcv9)
     .Case("tce", Triple::tce)
+    .Case("tcele", Triple::tcele)
     .Case("xcore", Triple::xcore)
     .Case("nvptx", Triple::nvptx)
     .Case("nvptx64", Triple::nvptx64)
@@ -437,6 +452,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("darwin", Triple::Darwin)
     .StartsWith("dragonfly", Triple::DragonFly)
     .StartsWith("freebsd", Triple::FreeBSD)
+    .StartsWith("fuchsia", Triple::Fuchsia)
     .StartsWith("ios", Triple::IOS)
     .StartsWith("kfreebsd", Triple::KFreeBSD)
     .StartsWith("linux", Triple::Linux)
@@ -462,6 +478,7 @@ static Triple::OSType parseOS(StringRef OSName) {
     .StartsWith("tvos", Triple::TvOS)
     .StartsWith("watchos", Triple::WatchOS)
     .StartsWith("mesa3d", Triple::Mesa3D)
+    .StartsWith("contiki", Triple::Contiki)
     .Default(Triple::UnknownOS);
 }
 
@@ -484,6 +501,7 @@ static Triple::EnvironmentType parseEnvironment(StringRef EnvironmentName) {
     .StartsWith("cygnus", Triple::Cygnus)
     .StartsWith("amdopencl", Triple::AMDOpenCL)
     .StartsWith("coreclr", Triple::CoreCLR)
+    .StartsWith("opencl", Triple::OpenCL)
     .Default(Triple::UnknownEnvironment);
 }
 
@@ -546,6 +564,8 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
     return Triple::ARMSubArch_v8_1a;
   case ARM::AK_ARMV8_2A:
     return Triple::ARMSubArch_v8_2a;
+  case ARM::AK_ARMV8R:
+    return Triple::ARMSubArch_v8r;
   case ARM::AK_ARMV8MBaseline:
     return Triple::ARMSubArch_v8m_baseline;
   case ARM::AK_ARMV8MMainline:
@@ -555,7 +575,7 @@ static Triple::SubArchType parseSubArch(StringRef SubArchName) {
   }
 }
 
-static const char *getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
+static StringRef getObjectFormatTypeName(Triple::ObjectFormatType Kind) {
   switch (Kind) {
   case Triple::UnknownObjectFormat: return "";
   case Triple::COFF: return "coff";
@@ -605,6 +625,8 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::r600:
   case Triple::renderscript32:
   case Triple::renderscript64:
+  case Triple::riscv32:
+  case Triple::riscv64:
   case Triple::shave:
   case Triple::sparc:
   case Triple::sparcel:
@@ -613,6 +635,7 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
   case Triple::spir64:
   case Triple::systemz:
   case Triple::tce:
+  case Triple::tcele:
   case Triple::thumbeb:
   case Triple::wasm32:
   case Triple::wasm64:
@@ -1132,9 +1155,11 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::nvptx:
   case llvm::Triple::ppc:
   case llvm::Triple::r600:
+  case llvm::Triple::riscv32:
   case llvm::Triple::sparc:
   case llvm::Triple::sparcel:
   case llvm::Triple::tce:
+  case llvm::Triple::tcele:
   case llvm::Triple::thumb:
   case llvm::Triple::thumbeb:
   case llvm::Triple::x86:
@@ -1160,6 +1185,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
   case llvm::Triple::nvptx64:
   case llvm::Triple::ppc64:
   case llvm::Triple::ppc64le:
+  case llvm::Triple::riscv64:
   case llvm::Triple::sparcv9:
   case llvm::Triple::systemz:
   case llvm::Triple::x86_64:
@@ -1212,9 +1238,11 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::nvptx:
   case Triple::ppc:
   case Triple::r600:
+  case Triple::riscv32:
   case Triple::sparc:
   case Triple::sparcel:
   case Triple::tce:
+  case Triple::tcele:
   case Triple::thumb:
   case Triple::thumbeb:
   case Triple::x86:
@@ -1234,6 +1262,7 @@ Triple Triple::get32BitArchVariant() const {
   case Triple::nvptx64:        T.setArch(Triple::nvptx);   break;
   case Triple::ppc64:          T.setArch(Triple::ppc);     break;
   case Triple::sparcv9:        T.setArch(Triple::sparc);   break;
+  case Triple::riscv64:        T.setArch(Triple::riscv32); break;
   case Triple::x86_64:         T.setArch(Triple::x86);     break;
   case Triple::amdil64:        T.setArch(Triple::amdil);   break;
   case Triple::hsail64:        T.setArch(Triple::hsail);   break;
@@ -1255,6 +1284,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::msp430:
   case Triple::r600:
   case Triple::tce:
+  case Triple::tcele:
   case Triple::xcore:
   case Triple::sparcel:
   case Triple::shave:
@@ -1275,6 +1305,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::nvptx64:
   case Triple::ppc64:
   case Triple::ppc64le:
+  case Triple::riscv64:
   case Triple::sparcv9:
   case Triple::systemz:
   case Triple::x86_64:
@@ -1291,6 +1322,7 @@ Triple Triple::get64BitArchVariant() const {
   case Triple::nvptx:           T.setArch(Triple::nvptx64);    break;
   case Triple::ppc:             T.setArch(Triple::ppc64);      break;
   case Triple::sparc:           T.setArch(Triple::sparcv9);    break;
+  case Triple::riscv32:         T.setArch(Triple::riscv64);    break;
   case Triple::x86:             T.setArch(Triple::x86_64);     break;
   case Triple::amdil:           T.setArch(Triple::amdil64);    break;
   case Triple::hsail:           T.setArch(Triple::hsail64);    break;
@@ -1324,6 +1356,8 @@ Triple Triple::getBigEndianArchVariant() const {
   case Triple::nvptx64:
   case Triple::nvptx:
   case Triple::r600:
+  case Triple::riscv32:
+  case Triple::riscv64:
   case Triple::shave:
   case Triple::spir64:
   case Triple::spir:
@@ -1342,6 +1376,7 @@ Triple Triple::getBigEndianArchVariant() const {
     T.setArch(UnknownArch);
     break;
 
+  case Triple::tcele:   T.setArch(Triple::tce);        break;
   case Triple::aarch64: T.setArch(Triple::aarch64_be); break;
   case Triple::bpfel:   T.setArch(Triple::bpfeb);      break;
   case Triple::mips64el:T.setArch(Triple::mips64);     break;
@@ -1365,7 +1400,6 @@ Triple Triple::getLittleEndianArchVariant() const {
   case Triple::ppc:
   case Triple::sparcv9:
   case Triple::systemz:
-  case Triple::tce:
 
   // ARM is intentionally unsupported here, changing the architecture would
   // drop any arch suffixes.
@@ -1374,6 +1408,7 @@ Triple Triple::getLittleEndianArchVariant() const {
     T.setArch(UnknownArch);
     break;
 
+  case Triple::tce:        T.setArch(Triple::tcele);    break;
   case Triple::aarch64_be: T.setArch(Triple::aarch64);  break;
   case Triple::bpfeb:      T.setArch(Triple::bpfel);    break;
   case Triple::mips64:     T.setArch(Triple::mips64el); break;
@@ -1408,6 +1443,8 @@ bool Triple::isLittleEndian() const {
   case Triple::nvptx:
   case Triple::ppc64le:
   case Triple::r600:
+  case Triple::riscv32:
+  case Triple::riscv64:
   case Triple::shave:
   case Triple::sparcel:
   case Triple::spir64:
@@ -1418,6 +1455,7 @@ bool Triple::isLittleEndian() const {
   case Triple::x86:
   case Triple::x86_64:
   case Triple::xcore:
+  case Triple::tcele:
   case Triple::renderscript32:
   case Triple::renderscript64:
     return true;
diff --git a/contrib/llvm/lib/Support/Twine.cpp b/contrib/llvm/lib/Support/Twine.cpp
index 5e989fb28b4d..465c6e6b8c4c 100644
--- a/contrib/llvm/lib/Support/Twine.cpp
+++ b/contrib/llvm/lib/Support/Twine.cpp
@@ -10,6 +10,7 @@
 #include "llvm/ADT/Twine.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -18,6 +19,11 @@ std::string Twine::str() const {
   if (LHSKind == StdStringKind && RHSKind == EmptyKind)
     return *LHS.stdString;
 
+  // If we're storing a formatv_object, we can avoid an extra copy by formatting
+  // it immediately and returning the result.
+  if (LHSKind == FormatvObjectKind && RHSKind == EmptyKind)
+    return LHS.formatvObject->str();
+
   // Otherwise, flatten and copy the contents first.
   SmallString<256> Vec;
   return toStringRef(Vec).str();
@@ -68,6 +74,9 @@ void Twine::printOneChild(raw_ostream &OS, Child Ptr,
   case Twine::SmallStringKind:
     OS << *Ptr.smallString;
     break;
+  case Twine::FormatvObjectKind:
+    OS << *Ptr.formatvObject;
+    break;
   case Twine::CharKind:
     OS << Ptr.character;
     break;
@@ -121,6 +130,9 @@ void Twine::printOneChildRepr(raw_ostream &OS, Child Ptr,
   case Twine::SmallStringKind:
     OS << "smallstring:\"" << *Ptr.smallString << "\"";
     break;
+  case Twine::FormatvObjectKind:
+    OS << "formatv:\"" << *Ptr.formatvObject << "\"";
+    break;
   case Twine::CharKind:
     OS << "char:\"" << Ptr.character << "\"";
     break;
diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc
index f3463e581735..edbc7938f0cb 100644
--- a/contrib/llvm/lib/Support/Unix/Memory.inc
+++ b/contrib/llvm/lib/Support/Unix/Memory.inc
@@ -91,17 +91,9 @@ Memory::allocateMappedMemory(size_t NumBytes,
   const size_t NumPages = (NumBytes+PageSize-1)/PageSize;
 
   int fd = -1;
-#ifdef NEED_DEV_ZERO_FOR_MMAP
-  static int zero_fd = open("/dev/zero", O_RDWR);
-  if (zero_fd == -1) {
-    EC = std::error_code(errno, std::generic_category());
-    return MemoryBlock();
-  }
-  fd = zero_fd;
-#endif
 
   int MMFlags = MAP_PRIVATE |
-#ifdef HAVE_MMAP_ANONYMOUS
+#ifdef MAP_ANONYMOUS
   MAP_ANONYMOUS
 #else
   MAP_ANON
@@ -161,7 +153,10 @@ Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) {
 
   int Protect = getPosixProtectionFlags(Flags);
 
-  int Result = ::mprotect((void*)((uintptr_t)M.Address & ~(PageSize-1)), PageSize*((M.Size+PageSize-1)/PageSize), Protect);
+  uintptr_t Start = alignAddr((uint8_t *)M.Address - PageSize + 1, PageSize);
+  uintptr_t End = alignAddr((uint8_t *)M.Address + M.Size, PageSize);
+  int Result = ::mprotect((void *)Start, End - Start, Protect);
+
   if (Result != 0)
     return std::error_code(errno, std::generic_category());
 
@@ -185,17 +180,9 @@ Memory::AllocateRWX(size_t NumBytes, const MemoryBlock* NearBlock,
   size_t NumPages = (NumBytes+PageSize-1)/PageSize;
 
   int fd = -1;
-#ifdef NEED_DEV_ZERO_FOR_MMAP
-  static int zero_fd = open("/dev/zero", O_RDWR);
-  if (zero_fd == -1) {
-    MakeErrMsg(ErrMsg, "Can't open /dev/zero device");
-    return MemoryBlock();
-  }
-  fd = zero_fd;
-#endif
 
   int flags = MAP_PRIVATE |
-#ifdef HAVE_MMAP_ANONYMOUS
+#ifdef MAP_ANONYMOUS
   MAP_ANONYMOUS
 #else
   MAP_ANON
diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc
index 84aafcb70d73..e0b11aaff007 100644
--- a/contrib/llvm/lib/Support/Unix/Path.inc
+++ b/contrib/llvm/lib/Support/Unix/Path.inc
@@ -53,7 +53,7 @@
 #include <sys/attr.h>
 #endif
 
-// Both stdio.h and cstdio are included via different pathes and
+// Both stdio.h and cstdio are included via different paths and
 // stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros
 // either.
 #undef ferror
@@ -90,7 +90,8 @@ namespace sys  {
 namespace fs {
 #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
     defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \
-    defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__)
+    defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__) || \
+    defined(_AIX)
 static int
 test_dir(char ret[PATH_MAX], const char *dir, const char *bin)
 {
@@ -161,7 +162,7 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
   }
 #elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \
       defined(__OpenBSD__) || defined(__minix) || defined(__DragonFly__) || \
-      defined(__FreeBSD_kernel__)
+      defined(__FreeBSD_kernel__) || defined(_AIX)
   char exe_path[PATH_MAX];
 
   if (getprogpath(exe_path, argv0) != NULL)
@@ -197,16 +198,12 @@ std::string getMainExecutable(const char *argv0, void *MainAddr) {
   return "";
 }
 
-TimeValue file_status::getLastAccessedTime() const {
-  TimeValue Ret;
-  Ret.fromEpochTime(fs_st_atime);
-  return Ret;
+TimePoint<> file_status::getLastAccessedTime() const {
+  return toTimePoint(fs_st_atime);
 }
 
-TimeValue file_status::getLastModificationTime() const {
-  TimeValue Ret;
-  Ret.fromEpochTime(fs_st_mtime);
-  return Ret;
+TimePoint<> file_status::getLastModificationTime() const {
+  return toTimePoint(fs_st_mtime);
 }
 
 UniqueID file_status::getUniqueID() const {
@@ -288,6 +285,19 @@ std::error_code create_link(const Twine &to, const Twine &from) {
   return std::error_code();
 }
 
+std::error_code create_hard_link(const Twine &to, const Twine &from) {
+  // Get arguments.
+  SmallString<128> from_storage;
+  SmallString<128> to_storage;
+  StringRef f = from.toNullTerminatedStringRef(from_storage);
+  StringRef t = to.toNullTerminatedStringRef(to_storage);
+
+  if (::link(t.begin(), f.begin()) == -1)
+    return std::error_code(errno, std::generic_category());
+
+  return std::error_code();
+}
+
 std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
   SmallString<128> path_storage;
   StringRef p = path.toNullTerminatedStringRef(path_storage);
@@ -329,8 +339,17 @@ std::error_code rename(const Twine &from, const Twine &to) {
 }
 
 std::error_code resize_file(int FD, uint64_t Size) {
+#if defined(HAVE_POSIX_FALLOCATE)
+  // If we have posix_fallocate use it. Unlike ftruncate it always allocates
+  // space, so we get an error if the disk is full.
+  if (int Err = ::posix_fallocate(FD, 0, Size))
+    return std::error_code(Err, std::generic_category());
+#else
+  // Use ftruncate as a fallback. It may or may not allocate space. At least on
+  // OS X with HFS+ it does.
   if (::ftruncate(FD, Size) == -1)
     return std::error_code(errno, std::generic_category());
+#endif
 
   return std::error_code();
 }
@@ -436,20 +455,17 @@ std::error_code status(int FD, file_status &Result) {
   return fillStatus(StatRet, Status, Result);
 }
 
-std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
+std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
 #if defined(HAVE_FUTIMENS)
   timespec Times[2];
-  Times[0].tv_sec = Time.toEpochTime();
-  Times[0].tv_nsec = 0;
-  Times[1] = Times[0];
+  Times[0] = Times[1] = sys::toTimeSpec(Time);
   if (::futimens(FD, Times))
     return std::error_code(errno, std::generic_category());
   return std::error_code();
 #elif defined(HAVE_FUTIMES)
   timeval Times[2];
-  Times[0].tv_sec = Time.toEpochTime();
-  Times[0].tv_usec = 0;
-  Times[1] = Times[0];
+  Times[0] = Times[1] = sys::toTimeVal(
+      std::chrono::time_point_cast<std::chrono::microseconds>(Time));
   if (::futimes(FD, Times))
     return std::error_code(errno, std::generic_category());
   return std::error_code();
diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc
index d81836b92dae..16f8f5a98e52 100644
--- a/contrib/llvm/lib/Support/Unix/Process.inc
+++ b/contrib/llvm/lib/Support/Unix/Process.inc
@@ -17,7 +17,6 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/MutexGuard.h"
-#include "llvm/Support/TimeValue.h"
 #if HAVE_FCNTL_H
 #include <fcntl.h>
 #endif
@@ -60,22 +59,14 @@
 using namespace llvm;
 using namespace sys;
 
-static std::pair<TimeValue, TimeValue> getRUsageTimes() {
+static std::pair<std::chrono::microseconds, std::chrono::microseconds> getRUsageTimes() {
 #if defined(HAVE_GETRUSAGE)
   struct rusage RU;
   ::getrusage(RUSAGE_SELF, &RU);
-  return std::make_pair(
-      TimeValue(
-          static_cast<TimeValue::SecondsType>(RU.ru_utime.tv_sec),
-          static_cast<TimeValue::NanoSecondsType>(
-              RU.ru_utime.tv_usec * TimeValue::NANOSECONDS_PER_MICROSECOND)),
-      TimeValue(
-          static_cast<TimeValue::SecondsType>(RU.ru_stime.tv_sec),
-          static_cast<TimeValue::NanoSecondsType>(
-              RU.ru_stime.tv_usec * TimeValue::NANOSECONDS_PER_MICROSECOND)));
+  return { toDuration(RU.ru_utime), toDuration(RU.ru_stime) };
 #else
 #warning Cannot get usage times on this platform
-  return std::make_pair(TimeValue(), TimeValue());
+  return { std::chrono::microseconds::zero(), std::chrono::microseconds::zero() };
 #endif
 }
 
@@ -121,9 +112,9 @@ size_t Process::GetMallocUsage() {
 #endif
 }
 
-void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
-                           TimeValue &sys_time) {
-  elapsed = TimeValue::now();
+void Process::GetTimeUsage(TimePoint<> &elapsed, std::chrono::nanoseconds &user_time,
+                           std::chrono::nanoseconds &sys_time) {
+  elapsed = std::chrono::system_clock::now();
   std::tie(user_time, sys_time) = getRUsageTimes();
 }
 
@@ -429,7 +420,7 @@ const char *Process::ResetColor() {
   return "\033[0m";
 }
 
-#if !defined(HAVE_DECL_ARC4RANDOM) || !HAVE_DECL_ARC4RANDOM
+#if !HAVE_DECL_ARC4RANDOM
 static unsigned GetRandomNumberSeed() {
   // Attempt to get the initial seed from /dev/urandom, if possible.
   int urandomFD = open("/dev/urandom", O_RDONLY);
@@ -449,13 +440,13 @@ static unsigned GetRandomNumberSeed() {
 
   // Otherwise, swizzle the current time and the process ID to form a reasonable
   // seed.
-  TimeValue Now = TimeValue::now();
-  return hash_combine(Now.seconds(), Now.nanoseconds(), ::getpid());
+  const auto Now = std::chrono::high_resolution_clock::now();
+  return hash_combine(Now.time_since_epoch().count(), ::getpid());
 }
 #endif
 
 unsigned llvm::sys::Process::GetRandomNumber() {
-#if defined(HAVE_DECL_ARC4RANDOM) && HAVE_DECL_ARC4RANDOM
+#if HAVE_DECL_ARC4RANDOM
   return arc4random();
 #else
   static int x = (static_cast<void>(::srand(GetRandomNumberSeed())), 0);
diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc
index 55fd76d375a2..3750d7f4c09d 100644
--- a/contrib/llvm/lib/Support/Unix/Signals.inc
+++ b/contrib/llvm/lib/Support/Unix/Signals.inc
@@ -14,6 +14,7 @@
 
 #include "Unix.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
@@ -33,9 +34,6 @@
 #if HAVE_SYS_STAT_H
 #include <sys/stat.h>
 #endif
-#if HAVE_CXXABI_H
-#include <cxxabi.h>
-#endif
 #if HAVE_DLFCN_H
 #include <dlfcn.h>
 #endif
@@ -45,15 +43,15 @@
 #if HAVE_LINK_H
 #include <link.h>
 #endif
-#if HAVE_UNWIND_BACKTRACE
+#ifdef HAVE__UNWIND_BACKTRACE
 // FIXME: We should be able to use <unwind.h> for any target that has an
 // _Unwind_Backtrace function, but on FreeBSD the configure test passes
 // despite the function not existing, and on Android, <unwind.h> conflicts
 // with <link.h>.
-#ifdef __GLIBC__
+#if defined(__GLIBC__) || defined(__APPLE__)
 #include <unwind.h>
 #else
-#undef HAVE_UNWIND_BACKTRACE
+#undef HAVE__UNWIND_BACKTRACE
 #endif
 #endif
 
@@ -120,14 +118,15 @@ static void RegisterHandler(int Signal) {
 }
 
 #if defined(HAVE_SIGALTSTACK)
-// Hold onto the old alternate signal stack so that it's not reported as a leak.
-// We don't make any attempt to remove our alt signal stack if we remove our
-// signal handlers; that can't be done reliably if someone else is also trying
-// to do the same thing.
+// Hold onto both the old and new alternate signal stack so that it's not
+// reported as a leak. We don't make any attempt to remove our alt signal
+// stack if we remove our signal handlers; that can't be done reliably if
+// someone else is also trying to do the same thing.
 static stack_t OldAltStack;
+static void* NewAltStackPointer;
 
 static void CreateSigAltStack() {
-  const size_t AltStackSize = MINSIGSTKSZ + 8192;
+  const size_t AltStackSize = MINSIGSTKSZ + 64 * 1024;
 
   // If we're executing on the alternate stack, or we already have an alternate
   // signal stack that we're happy with, there's nothing for us to do. Don't
@@ -140,6 +139,7 @@ static void CreateSigAltStack() {
 
   stack_t AltStack = {};
   AltStack.ss_sp = reinterpret_cast<char *>(malloc(AltStackSize));
+  NewAltStackPointer = AltStack.ss_sp; // Save to avoid reporting a leak.
   AltStack.ss_size = AltStackSize;
   if (sigaltstack(&AltStack, &OldAltStack) != 0)
     free(AltStack.ss_sp);
@@ -284,7 +284,7 @@ bool llvm::sys::RemoveFileOnSignal(StringRef Filename,
 void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) {
   sys::SmartScopedLock<true> Guard(*SignalsMutex);
   std::vector<std::string>::reverse_iterator RI =
-    std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+      find(reverse(*FilesToRemove), Filename);
   std::vector<std::string>::iterator I = FilesToRemove->end();
   if (RI != FilesToRemove->rend())
     I = FilesToRemove->erase(RI.base()-1);
@@ -298,7 +298,7 @@ void llvm::sys::AddSignalHandler(void (*FnPtr)(void *), void *Cookie) {
   RegisterHandlers();
 }
 
-#if defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && HAVE_LINK_H &&    \
+#if defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && HAVE_LINK_H &&    \
     (defined(__linux__) || defined(__FreeBSD__) ||                             \
      defined(__FreeBSD_kernel__) || defined(__NetBSD__))
 struct DlIteratePhdrData {
@@ -353,9 +353,9 @@ static bool findModulesAndOffsets(void **StackTrace, int Depth,
                                   StringSaver &StrPool) {
   return false;
 }
-#endif // defined(HAVE_BACKTRACE) && defined(ENABLE_BACKTRACES) && ...
+#endif // defined(HAVE_BACKTRACE) && ENABLE_BACKTRACES && ...
 
-#if defined(ENABLE_BACKTRACES) && defined(HAVE_UNWIND_BACKTRACE)
+#if ENABLE_BACKTRACES && defined(HAVE__UNWIND_BACKTRACE)
 static int unwindBacktrace(void **StackTrace, int MaxEntries) {
   if (MaxEntries < 0)
     return 0;
@@ -393,7 +393,7 @@ static int unwindBacktrace(void **StackTrace, int MaxEntries) {
 // On glibc systems we have the 'backtrace' function, which works nicely, but
 // doesn't demangle symbols.
 void llvm::sys::PrintStackTrace(raw_ostream &OS) {
-#if defined(ENABLE_BACKTRACES)
+#if ENABLE_BACKTRACES
   static void *StackTrace[256];
   int depth = 0;
 #if defined(HAVE_BACKTRACE)
@@ -401,7 +401,7 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
   if (!depth)
     depth = backtrace(StackTrace, static_cast<int>(array_lengthof(StackTrace)));
 #endif
-#if defined(HAVE_UNWIND_BACKTRACE)
+#if defined(HAVE__UNWIND_BACKTRACE)
   // Try _Unwind_Backtrace() if backtrace() failed.
   if (!depth)
     depth = unwindBacktrace(StackTrace,
@@ -441,12 +441,8 @@ void llvm::sys::PrintStackTrace(raw_ostream &OS) {
 
     if (dlinfo.dli_sname != nullptr) {
       OS << ' ';
-#  if HAVE_CXXABI_H
       int res;
-      char* d = abi::__cxa_demangle(dlinfo.dli_sname, nullptr, nullptr, &res);
-#  else
-      char* d = NULL;
-#  endif
+      char* d = itaniumDemangle(dlinfo.dli_sname, nullptr, nullptr, &res);
       if (!d) OS << dlinfo.dli_sname;
       else    OS << d;
       free(d);
@@ -479,7 +475,7 @@ void llvm::sys::PrintStackTraceOnErrorSignal(StringRef Argv0,
 
   AddSignalHandler(PrintStackTraceSignalHandler, nullptr);
 
-#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES)
+#if defined(__APPLE__) && ENABLE_CRASH_OVERRIDES
   // Environment variable to disable any kind of crash dialog.
   if (DisableCrashReporting || getenv("LLVM_DISABLE_CRASH_REPORT")) {
     mach_port_t self = mach_task_self();
diff --git a/contrib/llvm/lib/Support/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc
deleted file mode 100644
index 042e0dacc346..000000000000
--- a/contrib/llvm/lib/Support/Unix/TimeValue.inc
+++ /dev/null
@@ -1,54 +0,0 @@
-//===- Unix/TimeValue.cpp - Unix TimeValue Implementation -------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the Unix specific portion of the TimeValue class.
-//
-//===----------------------------------------------------------------------===//
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only generic UNIX code that
-//===          is guaranteed to work on *all* UNIX variants.
-//===----------------------------------------------------------------------===//
-
-#include "Unix.h"
-
-namespace llvm {
-  using namespace sys;
-
-std::string TimeValue::str() const {
-  time_t OurTime = time_t(this->toEpochTime());
-  struct tm Storage;
-  struct tm *LT = ::localtime_r(&OurTime, &Storage);
-  assert(LT);
-  char Buffer1[sizeof("YYYY-MM-DD HH:MM:SS")];
-  strftime(Buffer1, sizeof(Buffer1), "%Y-%m-%d %H:%M:%S", LT);
-  char Buffer2[sizeof("YYYY-MM-DD HH:MM:SS.MMMUUUNNN")];
-  snprintf(Buffer2, sizeof(Buffer2), "%s.%.9u", Buffer1, this->nanoseconds());
-  return std::string(Buffer2);
-}
-
-TimeValue TimeValue::now() {
-  struct timeval the_time;
-  timerclear(&the_time);
-  if (0 != ::gettimeofday(&the_time,nullptr)) {
-    // This is *really* unlikely to occur because the only gettimeofday
-    // errors concern the timezone parameter which we're passing in as 0.
-    // In the unlikely case it does happen, just return MinTime, no error
-    // message needed.
-    return MinTime();
-  }
-
-  return TimeValue(
-    static_cast<TimeValue::SecondsType>( the_time.tv_sec +
-      PosixZeroTimeSeconds ),
-    static_cast<TimeValue::NanoSecondsType>( the_time.tv_usec *
-      NANOSECONDS_PER_MICROSECOND ) );
-}
-
-}
diff --git a/contrib/llvm/lib/Support/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h
index 871e612f6c16..239a6d60aaef 100644
--- a/contrib/llvm/lib/Support/Unix/Unix.h
+++ b/contrib/llvm/lib/Support/Unix/Unix.h
@@ -19,7 +19,8 @@
 //===          is guaranteed to work on all UNIX variants.
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Config/config.h"     // Get autoconf configuration settings
+#include "llvm/Config/config.h" // Get autoconf configuration settings
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/Errno.h"
 #include <algorithm>
 #include <assert.h>
@@ -48,6 +49,10 @@
 # include <dlfcn.h>
 #endif
 
+#ifdef HAVE_FCNTL_H
+# include <fcntl.h>
+#endif
+
 /// This function builds an error message into \p ErrMsg using the \p prefix
 /// string and the Unix error number given by \p errnum. If errnum is -1, the
 /// default then the value of errno is used.
@@ -65,4 +70,37 @@ static inline bool MakeErrMsg(
   return true;
 }
 
+namespace llvm {
+namespace sys {
+
+/// Convert a struct timeval to a duration. Note that timeval can be used both
+/// as a time point and a duration. Be sure to check what the input represents.
+inline std::chrono::microseconds toDuration(const struct timeval &TV) {
+  return std::chrono::seconds(TV.tv_sec) +
+         std::chrono::microseconds(TV.tv_usec);
+}
+
+/// Convert a time point to struct timespec.
+inline struct timespec toTimeSpec(TimePoint<> TP) {
+  using namespace std::chrono;
+
+  struct timespec RetVal;
+  RetVal.tv_sec = toTimeT(TP);
+  RetVal.tv_nsec = (TP.time_since_epoch() % seconds(1)).count();
+  return RetVal;
+}
+
+/// Convert a time point to struct timeval.
+inline struct timeval toTimeVal(TimePoint<std::chrono::microseconds> TP) {
+  using namespace std::chrono;
+
+  struct timeval RetVal;
+  RetVal.tv_sec = toTimeT(TP);
+  RetVal.tv_usec = (TP.time_since_epoch() % seconds(1)).count();
+  return RetVal;
+}
+
+} // namespace sys
+} // namespace llvm
+
 #endif
diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc
index fab6aeca54a9..27b250b428a5 100644
--- a/contrib/llvm/lib/Support/Windows/Path.inc
+++ b/contrib/llvm/lib/Support/Windows/Path.inc
@@ -164,24 +164,18 @@ ErrorOr<space_info> disk_space(const Twine &Path) {
   return SpaceInfo;
 }
 
-TimeValue file_status::getLastAccessedTime() const {
-  ULARGE_INTEGER UI;
-  UI.LowPart = LastAccessedTimeLow;
-  UI.HighPart = LastAccessedTimeHigh;
-
-  TimeValue Ret;
-  Ret.fromWin32Time(UI.QuadPart);
-  return Ret;
+TimePoint<> file_status::getLastAccessedTime() const {
+  FILETIME Time;
+  Time.dwLowDateTime = LastAccessedTimeLow;
+  Time.dwHighDateTime = LastAccessedTimeHigh;
+  return toTimePoint(Time);
 }
 
-TimeValue file_status::getLastModificationTime() const {
-  ULARGE_INTEGER UI;
-  UI.LowPart = LastWriteTimeLow;
-  UI.HighPart = LastWriteTimeHigh;
-
-  TimeValue Ret;
-  Ret.fromWin32Time(UI.QuadPart);
-  return Ret;
+TimePoint<> file_status::getLastModificationTime() const {
+  FILETIME Time;
+  Time.dwLowDateTime = LastWriteTimeLow;
+  Time.dwHighDateTime = LastWriteTimeHigh;
+  return toTimePoint(Time);
 }
 
 std::error_code current_path(SmallVectorImpl<char> &result) {
@@ -238,6 +232,10 @@ std::error_code create_link(const Twine &to, const Twine &from) {
   return std::error_code();
 }
 
+std::error_code create_hard_link(const Twine &to, const Twine &from) {
+	return create_link(to, from);
+}
+
 std::error_code remove(const Twine &path, bool IgnoreNonExisting) {
   SmallVector<wchar_t, 128> path_utf16;
 
@@ -513,12 +511,8 @@ std::error_code status(int FD, file_status &Result) {
   return getStatus(FileHandle, Result);
 }
 
-std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) {
-  ULARGE_INTEGER UI;
-  UI.QuadPart = Time.toWin32Time();
-  FILETIME FT;
-  FT.dwLowDateTime = UI.LowPart;
-  FT.dwHighDateTime = UI.HighPart;
+std::error_code setLastModificationAndAccessTime(int FD, TimePoint<> Time) {
+  FILETIME FT = toFILETIME(Time);
   HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD));
   if (!SetFileTime(FileHandle, NULL, &FT, &FT))
     return mapWindowsError(::GetLastError());
diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc
index b012991c2a54..8d646b3217a0 100644
--- a/contrib/llvm/lib/Support/Windows/Process.inc
+++ b/contrib/llvm/lib/Support/Windows/Process.inc
@@ -49,18 +49,6 @@
 using namespace llvm;
 using namespace sys;
 
-static TimeValue getTimeValueFromFILETIME(FILETIME Time) {
-  ULARGE_INTEGER TimeInteger;
-  TimeInteger.LowPart = Time.dwLowDateTime;
-  TimeInteger.HighPart = Time.dwHighDateTime;
-
-  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
-  return TimeValue(
-      static_cast<TimeValue::SecondsType>(TimeInteger.QuadPart / 10000000),
-      static_cast<TimeValue::NanoSecondsType>(
-          (TimeInteger.QuadPart % 10000000) * 100));
-}
-
 // This function retrieves the page size using GetNativeSystemInfo() and is
 // present solely so it can be called once to initialize the self_process member
 // below.
@@ -93,17 +81,17 @@ Process::GetMallocUsage()
   return size;
 }
 
-void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time,
-                           TimeValue &sys_time) {
-  elapsed = TimeValue::now();
+void Process::GetTimeUsage(TimePoint<> &elapsed, std::chrono::nanoseconds &user_time,
+                           std::chrono::nanoseconds &sys_time) {
+  elapsed = std::chrono::system_clock::now();;
 
   FILETIME ProcCreate, ProcExit, KernelTime, UserTime;
   if (GetProcessTimes(GetCurrentProcess(), &ProcCreate, &ProcExit, &KernelTime,
                       &UserTime) == 0)
     return;
 
-  user_time = getTimeValueFromFILETIME(UserTime);
-  sys_time = getTimeValueFromFILETIME(KernelTime);
+  user_time = toDuration(UserTime);
+  sys_time = toDuration(KernelTime);
 }
 
 // Some LLVM programs such as bugpoint produce core files as a normal part of
diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc
index 1e2fa4210df6..f739421eece4 100644
--- a/contrib/llvm/lib/Support/Windows/Signals.inc
+++ b/contrib/llvm/lib/Support/Windows/Signals.inc
@@ -28,6 +28,7 @@
 #ifdef __MINGW32__
  #include <imagehlp.h>
 #else
+ #include <crtdbg.h>
  #include <dbghelp.h>
 #endif
 #include <psapi.h>
@@ -266,7 +267,7 @@ static BOOL CALLBACK findModuleCallback(PCSTR ModuleName,
       continue;
     intptr_t Addr = (intptr_t)Data->StackTrace[I];
     if (Beg <= Addr && Addr < End) {
-      Data->Modules[I] = Data->StrPool->save(ModuleName);
+      Data->Modules[I] = Data->StrPool->save(ModuleName).data();
       Data->Offsets[I] = Addr - Beg;
     }
   }
@@ -470,7 +471,7 @@ void sys::DontRemoveFileOnSignal(StringRef Filename) {
   RegisterHandler();
 
   std::vector<std::string>::reverse_iterator I =
-  std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename);
+      find(reverse(*FilesToRemove), Filename);
   if (I != FilesToRemove->rend())
     FilesToRemove->erase(I.base()-1);
 
diff --git a/contrib/llvm/lib/Support/Windows/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc
deleted file mode 100644
index b90b4f1da008..000000000000
--- a/contrib/llvm/lib/Support/Windows/TimeValue.inc
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- Win32/TimeValue.cpp - Win32 TimeValue Implementation -----*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file provides the Win32 implementation of the TimeValue class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "WindowsSupport.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <cctype>
-#include <time.h>
-
-using namespace llvm;
-using namespace llvm::sys;
-
-//===----------------------------------------------------------------------===//
-//=== WARNING: Implementation here must contain only Win32 specific code.
-//===----------------------------------------------------------------------===//
-
-TimeValue TimeValue::now() {
-  uint64_t ft;
-  GetSystemTimeAsFileTime(reinterpret_cast<FILETIME *>(&ft));
-
-  TimeValue t(0, 0);
-  t.fromWin32Time(ft);
-  return t;
-}
-
-std::string TimeValue::str() const {
-  std::string S;
-  struct tm *LT;
-#ifdef __MINGW32__
-  // Old versions of mingw don't have _localtime64_s. Remove this once we drop support
-  // for them.
-  time_t OurTime = time_t(this->toEpochTime());
-  LT = ::localtime(&OurTime);
-  assert(LT);
-#else
-  struct tm Storage;
-  __time64_t OurTime = this->toEpochTime();
-  int Error = ::_localtime64_s(&Storage, &OurTime);
-  assert(!Error);
-  (void)Error;
-  LT = &Storage;
-#endif
-
-  char Buffer[sizeof("YYYY-MM-DD HH:MM:SS")];
-  strftime(Buffer, sizeof(Buffer), "%Y-%m-%d %H:%M:%S", LT);
-  raw_string_ostream OS(S);
-  OS << format("%s.%.9u", static_cast<const char *>(Buffer),
-               this->nanoseconds());
-  OS.flush();
-  return S;
-}
diff --git a/contrib/llvm/lib/Support/Windows/WindowsSupport.h b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
index 18ecdf4e73d9..c358b99ab96a 100644
--- a/contrib/llvm/lib/Support/Windows/WindowsSupport.h
+++ b/contrib/llvm/lib/Support/Windows/WindowsSupport.h
@@ -39,12 +39,13 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h" // Get build system configuration settings
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/Compiler.h"
-#include <system_error>
-#include <windows.h>
-#include <wincrypt.h>
 #include <cassert>
 #include <string>
+#include <system_error>
+#include <windows.h>
+#include <wincrypt.h> // Must be included after windows.h
 
 /// Determines if the program is running on Windows 8 or newer. This
 /// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
@@ -211,6 +212,39 @@ c_str(SmallVectorImpl<T> &str) {
 }
 
 namespace sys {
+
+inline std::chrono::nanoseconds toDuration(FILETIME Time) {
+  ULARGE_INTEGER TimeInteger;
+  TimeInteger.LowPart = Time.dwLowDateTime;
+  TimeInteger.HighPart = Time.dwHighDateTime;
+
+  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+  return std::chrono::nanoseconds(100 * TimeInteger.QuadPart);
+}
+
+inline TimePoint<> toTimePoint(FILETIME Time) {
+  ULARGE_INTEGER TimeInteger;
+  TimeInteger.LowPart = Time.dwLowDateTime;
+  TimeInteger.HighPart = Time.dwHighDateTime;
+
+  // Adjust for different epoch
+  TimeInteger.QuadPart -= 11644473600ll * 10000000;
+
+  // FILETIME's are # of 100 nanosecond ticks (1/10th of a microsecond)
+  return TimePoint<>(std::chrono::nanoseconds(100 * TimeInteger.QuadPart));
+}
+
+inline FILETIME toFILETIME(TimePoint<> TP) {
+  ULARGE_INTEGER TimeInteger;
+  TimeInteger.QuadPart = TP.time_since_epoch().count() / 100;
+  TimeInteger.QuadPart += 11644473600ll * 10000000;
+
+  FILETIME Time;
+  Time.dwLowDateTime = TimeInteger.LowPart;
+  Time.dwHighDateTime = TimeInteger.HighPart;
+  return Time;
+}
+
 namespace path {
 std::error_code widenPath(const Twine &Path8,
                           SmallVectorImpl<wchar_t> &Path16);
diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp
index 620841c2d152..c17a6f6e1ea6 100644
--- a/contrib/llvm/lib/Support/YAMLParser.cpp
+++ b/contrib/llvm/lib/Support/YAMLParser.cpp
@@ -12,12 +12,12 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Support/YAMLParser.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Twine.h"
-#include "llvm/ADT/ilist.h"
-#include "llvm/ADT/ilist_node.h"
+#include "llvm/ADT/AllocatorList.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
@@ -108,7 +108,7 @@ void SequenceNode::anchor() {}
 void AliasNode::anchor() {}
 
 /// Token - A single YAML token.
-struct Token : ilist_node<Token> {
+struct Token {
   enum TokenKind {
     TK_Error, // Uninitialized token.
     TK_StreamStart,
@@ -147,40 +147,7 @@ struct Token : ilist_node<Token> {
 }
 }
 
-namespace llvm {
-template<>
-struct ilist_sentinel_traits<Token> {
-  Token *createSentinel() const {
-    return &Sentinel;
-  }
-  static void destroySentinel(Token*) {}
-
-  Token *provideInitialHead() const { return createSentinel(); }
-  Token *ensureHead(Token*) const { return createSentinel(); }
-  static void noteHead(Token*, Token*) {}
-
-private:
-  mutable Token Sentinel;
-};
-
-template<>
-struct ilist_node_traits<Token> {
-  Token *createNode(const Token &V) {
-    return new (Alloc.Allocate<Token>()) Token(V);
-  }
-  static void deleteNode(Token *V) { V->~Token(); }
-
-  void addNodeToList(Token *) {}
-  void removeNodeFromList(Token *) {}
-  void transferNodesFromList(ilist_node_traits &    /*SrcTraits*/,
-                             ilist_iterator<Token> /*first*/,
-                             ilist_iterator<Token> /*last*/) {}
-
-  BumpPtrAllocator Alloc;
-};
-}
-
-typedef ilist<Token> TokenQueueT;
+typedef llvm::BumpPtrList<Token> TokenQueueT;
 
 namespace {
 /// @brief This struct is used to track simple keys.
@@ -265,8 +232,10 @@ namespace yaml {
 /// @brief Scans YAML tokens from a MemoryBuffer.
 class Scanner {
 public:
-  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true);
-  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true);
+  Scanner(StringRef Input, SourceMgr &SM, bool ShowColors = true,
+          std::error_code *EC = nullptr);
+  Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors = true,
+          std::error_code *EC = nullptr);
 
   /// @brief Parse the next token and return it without popping it.
   Token &peekNext();
@@ -283,6 +252,10 @@ public:
     if (Current >= End)
       Current = End - 1;
 
+    // propagate the error if possible
+    if (EC)
+      *EC = make_error_code(std::errc::invalid_argument);
+
     // Don't print out more errors after the first one we encounter. The rest
     // are just the result of the first, and have no meaning.
     if (!Failed)
@@ -393,10 +366,7 @@ private:
   /// @brief Scan ns-uri-char[39]s starting at Cur.
   ///
   /// This updates Cur and Column while scanning.
-  ///
-  /// @returns A StringRef starting at Cur which covers the longest contiguous
-  ///          sequence of ns-uri-char.
-  StringRef scan_ns_uri_char();
+  void scan_ns_uri_char();
 
   /// @brief Consume a minimal well-formed code unit subsequence starting at
   ///        \a Cur. Return false if it is not the same Unicode scalar value as
@@ -564,6 +534,8 @@ private:
 
   /// @brief Potential simple keys.
   SmallVector<SimpleKey, 4> SimpleKeys;
+
+  std::error_code *EC;
 };
 
 } // end namespace yaml
@@ -758,13 +730,15 @@ std::string yaml::escape(StringRef Input) {
   return EscapedInput;
 }
 
-Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors)
-    : SM(sm), ShowColors(ShowColors) {
+Scanner::Scanner(StringRef Input, SourceMgr &sm, bool ShowColors,
+                 std::error_code *EC)
+    : SM(sm), ShowColors(ShowColors), EC(EC) {
   init(MemoryBufferRef(Input, "YAML"));
 }
 
-Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors)
-    : SM(SM_), ShowColors(ShowColors) {
+Scanner::Scanner(MemoryBufferRef Buffer, SourceMgr &SM_, bool ShowColors,
+                 std::error_code *EC)
+    : SM(SM_), ShowColors(ShowColors), EC(EC) {
   init(Buffer);
 }
 
@@ -802,8 +776,7 @@ Token &Scanner::peekNext() {
     removeStaleSimpleKeyCandidates();
     SimpleKey SK;
     SK.Tok = TokenQueue.begin();
-    if (std::find(SimpleKeys.begin(), SimpleKeys.end(), SK)
-        == SimpleKeys.end())
+    if (!is_contained(SimpleKeys, SK))
       break;
     else
       NeedMore = true;
@@ -819,9 +792,8 @@ Token Scanner::getNext() {
 
   // There cannot be any referenced Token's if the TokenQueue is empty. So do a
   // quick deallocation of them all.
-  if (TokenQueue.empty()) {
-    TokenQueue.Alloc.Reset();
-  }
+  if (TokenQueue.empty())
+    TokenQueue.resetAlloc();
 
   return Ret;
 }
@@ -918,8 +890,7 @@ static bool is_ns_word_char(const char C) {
          || (C >= 'A' && C <= 'Z');
 }
 
-StringRef Scanner::scan_ns_uri_char() {
-  StringRef::iterator Start = Current;
+void Scanner::scan_ns_uri_char() {
   while (true) {
     if (Current == End)
       break;
@@ -935,7 +906,6 @@ StringRef Scanner::scan_ns_uri_char() {
     } else
       break;
   }
-  return StringRef(Start, Current - Start);
 }
 
 bool Scanner::consume(uint32_t Expected) {
@@ -1766,11 +1736,13 @@ bool Scanner::fetchMoreTokens() {
   return false;
 }
 
-Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors)
-    : scanner(new Scanner(Input, SM, ShowColors)), CurrentDoc() {}
+Stream::Stream(StringRef Input, SourceMgr &SM, bool ShowColors,
+               std::error_code *EC)
+    : scanner(new Scanner(Input, SM, ShowColors, EC)), CurrentDoc() {}
 
-Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors)
-    : scanner(new Scanner(InputBuffer, SM, ShowColors)), CurrentDoc() {}
+Stream::Stream(MemoryBufferRef InputBuffer, SourceMgr &SM, bool ShowColors,
+               std::error_code *EC)
+    : scanner(new Scanner(InputBuffer, SM, ShowColors, EC)), CurrentDoc() {}
 
 Stream::~Stream() {}
 
diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp
index 75fac20a8edb..99d2070cb6ed 100644
--- a/contrib/llvm/lib/Support/YAMLTraits.cpp
+++ b/contrib/llvm/lib/Support/YAMLTraits.cpp
@@ -44,13 +44,10 @@ void IO::setContext(void *Context) {
 //  Input
 //===----------------------------------------------------------------------===//
 
-Input::Input(StringRef InputContent,
-             void *Ctxt,
-             SourceMgr::DiagHandlerTy DiagHandler,
-             void *DiagHandlerCtxt)
-  : IO(Ctxt),
-    Strm(new Stream(InputContent, SrcMgr)),
-    CurrentNode(nullptr) {
+Input::Input(StringRef InputContent, void *Ctxt,
+             SourceMgr::DiagHandlerTy DiagHandler, void *DiagHandlerCtxt)
+    : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr, false, &EC)),
+      CurrentNode(nullptr) {
   if (DiagHandler)
     SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt);
   DocIterator = Strm->begin();
diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp
index 275fe1d49459..d073802db932 100644
--- a/contrib/llvm/lib/Support/raw_ostream.cpp
+++ b/contrib/llvm/lib/Support/raw_ostream.cpp
@@ -20,11 +20,16 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/NativeFormatting.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
+#include <algorithm>
 #include <cctype>
 #include <cerrno>
+#include <cstdio>
+#include <iterator>
 #include <sys/stat.h>
 #include <system_error>
 
@@ -110,73 +115,28 @@ void raw_ostream::SetBufferAndMode(char *BufferStart, size_t Size,
 }
 
 raw_ostream &raw_ostream::operator<<(unsigned long N) {
-  // Zero is a special case.
-  if (N == 0)
-    return *this << '0';
-
-  char NumberBuffer[20];
-  char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
-  char *CurPtr = EndPtr;
-
-  while (N) {
-    *--CurPtr = '0' + char(N % 10);
-    N /= 10;
-  }
-  return write(CurPtr, EndPtr-CurPtr);
+  write_integer(*this, static_cast<uint64_t>(N), 0, IntegerStyle::Integer);
+  return *this;
 }
 
 raw_ostream &raw_ostream::operator<<(long N) {
-  if (N <  0) {
-    *this << '-';
-    // Avoid undefined behavior on LONG_MIN with a cast.
-    N = -(unsigned long)N;
-  }
-
-  return this->operator<<(static_cast<unsigned long>(N));
+  write_integer(*this, static_cast<int64_t>(N), 0, IntegerStyle::Integer);
+  return *this;
 }
 
 raw_ostream &raw_ostream::operator<<(unsigned long long N) {
-  // Output using 32-bit div/mod when possible.
-  if (N == static_cast<unsigned long>(N))
-    return this->operator<<(static_cast<unsigned long>(N));
-
-  char NumberBuffer[20];
-  char *EndPtr = std::end(NumberBuffer);
-  char *CurPtr = EndPtr;
-
-  while (N) {
-    *--CurPtr = '0' + char(N % 10);
-    N /= 10;
-  }
-  return write(CurPtr, EndPtr-CurPtr);
+  write_integer(*this, static_cast<uint64_t>(N), 0, IntegerStyle::Integer);
+  return *this;
 }
 
 raw_ostream &raw_ostream::operator<<(long long N) {
-  if (N < 0) {
-    *this << '-';
-    // Avoid undefined behavior on INT64_MIN with a cast.
-    N = -(unsigned long long)N;
-  }
-
-  return this->operator<<(static_cast<unsigned long long>(N));
+  write_integer(*this, static_cast<int64_t>(N), 0, IntegerStyle::Integer);
+  return *this;
 }
 
 raw_ostream &raw_ostream::write_hex(unsigned long long N) {
-  // Zero is a special case.
-  if (N == 0)
-    return *this << '0';
-
-  char NumberBuffer[16];
-  char *EndPtr = std::end(NumberBuffer);
-  char *CurPtr = EndPtr;
-
-  while (N) {
-    unsigned char x = static_cast<unsigned char>(N) % 16;
-    *--CurPtr = hexdigit(x, /*LowerCase*/true);
-    N /= 16;
-  }
-
-  return write(CurPtr, EndPtr-CurPtr);
+  llvm::write_hex(*this, N, HexPrintStyle::Lower);
+  return *this;
 }
 
 raw_ostream &raw_ostream::write_escaped(StringRef Str,
@@ -220,54 +180,15 @@ raw_ostream &raw_ostream::write_escaped(StringRef Str,
 }
 
 raw_ostream &raw_ostream::operator<<(const void *P) {
-  *this << '0' << 'x';
-
-  return write_hex((uintptr_t) P);
+  llvm::write_hex(*this, (uintptr_t)P, HexPrintStyle::PrefixLower);
+  return *this;
 }
 
 raw_ostream &raw_ostream::operator<<(double N) {
-#ifdef _WIN32
-  // On MSVCRT and compatible, output of %e is incompatible to Posix
-  // by default. Number of exponent digits should be at least 2. "%+03d"
-  // FIXME: Implement our formatter to here or Support/Format.h!
-#if defined(__MINGW32__)
-  // FIXME: It should be generic to C++11.
-  if (N == 0.0 && std::signbit(N))
-    return *this << "-0.000000e+00";
-#else
-  int fpcl = _fpclass(N);
-
-  // negative zero
-  if (fpcl == _FPCLASS_NZ)
-    return *this << "-0.000000e+00";
-#endif
-
-  char buf[16];
-  unsigned len;
-  len = format("%e", N).snprint(buf, sizeof(buf));
-  if (len <= sizeof(buf) - 2) {
-    if (len >= 5 && buf[len - 5] == 'e' && buf[len - 3] == '0') {
-      int cs = buf[len - 4];
-      if (cs == '+' || cs == '-') {
-        int c1 = buf[len - 2];
-        int c0 = buf[len - 1];
-        if (isdigit(static_cast<unsigned char>(c1)) &&
-            isdigit(static_cast<unsigned char>(c0))) {
-          // Trim leading '0': "...e+012" -> "...e+12\0"
-          buf[len - 3] = c1;
-          buf[len - 2] = c0;
-          buf[--len] = 0;
-        }
-      }
-    }
-    return this->operator<<(buf);
-  }
-#endif
-  return this->operator<<(format("%e", N));
+  llvm::write_double(*this, N, FloatStyle::Exponent);
+  return *this;
 }
 
-
-
 void raw_ostream::flush_nonempty() {
   assert(OutBufCur > OutBufStart && "Invalid call to flush_nonempty.");
   size_t Length = OutBufCur - OutBufStart;
@@ -344,10 +265,10 @@ void raw_ostream::copy_to_buffer(const char *Ptr, size_t Size) {
   // Handle short strings specially, memcpy isn't very good at very short
   // strings.
   switch (Size) {
-  case 4: OutBufCur[3] = Ptr[3]; // FALL THROUGH
-  case 3: OutBufCur[2] = Ptr[2]; // FALL THROUGH
-  case 2: OutBufCur[1] = Ptr[1]; // FALL THROUGH
-  case 1: OutBufCur[0] = Ptr[0]; // FALL THROUGH
+  case 4: OutBufCur[3] = Ptr[3]; LLVM_FALLTHROUGH;
+  case 3: OutBufCur[2] = Ptr[2]; LLVM_FALLTHROUGH;
+  case 2: OutBufCur[1] = Ptr[1]; LLVM_FALLTHROUGH;
+  case 1: OutBufCur[0] = Ptr[0]; LLVM_FALLTHROUGH;
   case 0: break;
   default:
     memcpy(OutBufCur, Ptr, Size);
@@ -382,7 +303,7 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
   // space.  Iterate until we win.
   SmallVector<char, 128> V;
 
-  while (1) {
+  while (true) {
     V.resize(NextBufferSize);
 
     // Try formatting into the SmallVector.
@@ -398,6 +319,12 @@ raw_ostream &raw_ostream::operator<<(const format_object_base &Fmt) {
   }
 }
 
+raw_ostream &raw_ostream::operator<<(const formatv_object_base &Obj) {
+  SmallString<128> S;
+  Obj.format(*this);
+  return *this;
+}
+
 raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
   unsigned Len = FS.Str.size(); 
   int PadAmount = FS.Width - Len;
@@ -411,51 +338,99 @@ raw_ostream &raw_ostream::operator<<(const FormattedString &FS) {
 
 raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) {
   if (FN.Hex) {
-    unsigned Nibbles = (64 - countLeadingZeros(FN.HexValue)+3)/4;
-    unsigned PrefixChars = FN.HexPrefix ? 2 : 0;
-    unsigned Width = std::max(FN.Width, Nibbles + PrefixChars);
-
-    char NumberBuffer[20] = "0x0000000000000000";
-    if (!FN.HexPrefix)
-      NumberBuffer[1] = '0';
-    char *EndPtr = NumberBuffer+Width;
-    char *CurPtr = EndPtr;
-    unsigned long long N = FN.HexValue;
-    while (N) {
-      unsigned char x = static_cast<unsigned char>(N) % 16;
-      *--CurPtr = hexdigit(x, !FN.Upper);
-      N /= 16;
+    HexPrintStyle Style;
+    if (FN.Upper && FN.HexPrefix)
+      Style = HexPrintStyle::PrefixUpper;
+    else if (FN.Upper && !FN.HexPrefix)
+      Style = HexPrintStyle::Upper;
+    else if (!FN.Upper && FN.HexPrefix)
+      Style = HexPrintStyle::PrefixLower;
+    else
+      Style = HexPrintStyle::Lower;
+    llvm::write_hex(*this, FN.HexValue, Style, FN.Width);
+  } else {
+    llvm::SmallString<16> Buffer;
+    llvm::raw_svector_ostream Stream(Buffer);
+    llvm::write_integer(Stream, FN.DecValue, 0, IntegerStyle::Integer);
+    if (Buffer.size() < FN.Width)
+      indent(FN.Width - Buffer.size());
+    (*this) << Buffer;
+  }
+  return *this;
+}
+
+raw_ostream &raw_ostream::operator<<(const FormattedBytes &FB) {
+  if (FB.Bytes.empty())
+    return *this;
+
+  size_t LineIndex = 0;
+  auto Bytes = FB.Bytes;
+  const size_t Size = Bytes.size();
+  HexPrintStyle HPS = FB.Upper ? HexPrintStyle::Upper : HexPrintStyle::Lower;
+  uint64_t OffsetWidth = 0;
+  if (FB.FirstByteOffset.hasValue()) {
+    // Figure out how many nibbles are needed to print the largest offset
+    // represented by this data set, so that we can align the offset field
+    // to the right width.
+    size_t Lines = Size / FB.NumPerLine;
+    uint64_t MaxOffset = *FB.FirstByteOffset + Lines * FB.NumPerLine;
+    unsigned Power = 0;
+    if (MaxOffset > 0)
+      Power = llvm::Log2_64_Ceil(MaxOffset);
+    OffsetWidth = std::max<uint64_t>(4, llvm::alignTo(Power, 4) / 4);
+  }
+
+  // The width of a block of data including all spaces for group separators.
+  unsigned NumByteGroups =
+      alignTo(FB.NumPerLine, FB.ByteGroupSize) / FB.ByteGroupSize;
+  unsigned BlockCharWidth = FB.NumPerLine * 2 + NumByteGroups - 1;
+
+  while (!Bytes.empty()) {
+    indent(FB.IndentLevel);
+
+    if (FB.FirstByteOffset.hasValue()) {
+      uint64_t Offset = FB.FirstByteOffset.getValue();
+      llvm::write_hex(*this, Offset + LineIndex, HPS, OffsetWidth);
+      *this << ": ";
     }
 
-    return write(NumberBuffer, Width);
-  } else {
-    // Zero is a special case.
-    if (FN.DecValue == 0) {
-      this->indent(FN.Width-1);
-      return *this << '0';
+    auto Line = Bytes.take_front(FB.NumPerLine);
+
+    size_t CharsPrinted = 0;
+    // Print the hex bytes for this line in groups
+    for (size_t I = 0; I < Line.size(); ++I, CharsPrinted += 2) {
+      if (I && (I % FB.ByteGroupSize) == 0) {
+        ++CharsPrinted;
+        *this << " ";
+      }
+      llvm::write_hex(*this, Line[I], HPS, 2);
     }
-    char NumberBuffer[32];
-    char *EndPtr = NumberBuffer+sizeof(NumberBuffer);
-    char *CurPtr = EndPtr;
-    bool Neg = (FN.DecValue < 0);
-    uint64_t N = Neg ? -static_cast<uint64_t>(FN.DecValue) : FN.DecValue;
-    while (N) {
-      *--CurPtr = '0' + char(N % 10);
-      N /= 10;
+
+    if (FB.ASCII) {
+      // Print any spaces needed for any bytes that we didn't print on this
+      // line so that the ASCII bytes are correctly aligned.
+      assert(BlockCharWidth >= CharsPrinted);
+      indent(BlockCharWidth - CharsPrinted + 2);
+      *this << "|";
+
+      // Print the ASCII char values for each byte on this line
+      for (uint8_t Byte : Line) {
+        if (isprint(Byte))
+          *this << static_cast<char>(Byte);
+        else
+          *this << '.';
+      }
+      *this << '|';
     }
-    int Len = EndPtr - CurPtr;
-    int Pad = FN.Width - Len;
-    if (Neg) 
-      --Pad;
-    if (Pad > 0)
-      this->indent(Pad);
-    if (Neg)
-      *this << '-';
-    return write(CurPtr, Len);
+
+    Bytes = Bytes.drop_front(Line.size());
+    LineIndex += Line.size();
+    if (LineIndex < Size)
+      *this << '\n';
   }
+  return *this;
 }
 
-
 /// indent - Insert 'NumSpaces' spaces.
 raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
   static const char Spaces[] = "                                "
@@ -475,7 +450,6 @@ raw_ostream &raw_ostream::indent(unsigned NumSpaces) {
   return *this;
 }
 
-
 //===----------------------------------------------------------------------===//
 //  Formatted Output
 //===----------------------------------------------------------------------===//
@@ -563,7 +537,6 @@ raw_fd_ostream::~raw_fd_ostream() {
     report_fatal_error("IO failure on output stream.", /*GenCrashDiag=*/false);
 }
 
-
 void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
   assert(FD >= 0 && "File already closed.");
   pos += Size;
@@ -625,7 +598,13 @@ void raw_fd_ostream::close() {
 uint64_t raw_fd_ostream::seek(uint64_t off) {
   assert(SupportsSeeking && "Stream does not support seeking!");
   flush();
+#ifdef LLVM_ON_WIN32
+  pos = ::_lseeki64(FD, off, SEEK_SET);
+#elif defined(HAVE_LSEEK64)
+  pos = ::lseek64(FD, off, SEEK_SET);
+#else
   pos = ::lseek(FD, off, SEEK_SET);
+#endif
   if (pos == (uint64_t)-1)
     error_detected();
   return pos;
@@ -740,7 +719,6 @@ raw_ostream &llvm::nulls() {
   return S;
 }
 
-
 //===----------------------------------------------------------------------===//
 //  raw_string_ostream
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Support/xxhash.cpp b/contrib/llvm/lib/Support/xxhash.cpp
new file mode 100644
index 000000000000..a7d990bf6a4b
--- /dev/null
+++ b/contrib/llvm/lib/Support/xxhash.cpp
@@ -0,0 +1,134 @@
+/*
+*  xxHash - Fast Hash algorithm
+*  Copyright (C) 2012-2016, Yann Collet
+*
+*  BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+*  Redistribution and use in source and binary forms, with or without
+*  modification, are permitted provided that the following conditions are
+*  met:
+*
+*  * Redistributions of source code must retain the above copyright
+*  notice, this list of conditions and the following disclaimer.
+*  * Redistributions in binary form must reproduce the above
+*  copyright notice, this list of conditions and the following disclaimer
+*  in the documentation and/or other materials provided with the
+*  distribution.
+*
+*  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+*  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+*  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+*  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+*  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+*  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+*  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+*  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+*  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+*  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+*  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+*  You can contact the author at :
+*  - xxHash homepage: http://www.xxhash.com
+*  - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* based on revision d2df04efcbef7d7f6886d345861e5dfda4edacc1 Removed
+ * everything but a simple interface for computing XXh64. */
+
+#include "llvm/Support/xxhash.h"
+#include "llvm/Support/Endian.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+using namespace llvm;
+using namespace support;
+
+static uint64_t rotl64(uint64_t X, size_t R) {
+  return (X << R) | (X >> (64 - R));
+}
+
+static const uint64_t PRIME64_1 = 11400714785074694791ULL;
+static const uint64_t PRIME64_2 = 14029467366897019727ULL;
+static const uint64_t PRIME64_3 = 1609587929392839161ULL;
+static const uint64_t PRIME64_4 = 9650029242287828579ULL;
+static const uint64_t PRIME64_5 = 2870177450012600261ULL;
+
+static uint64_t round(uint64_t Acc, uint64_t Input) {
+  Acc += Input * PRIME64_2;
+  Acc = rotl64(Acc, 31);
+  Acc *= PRIME64_1;
+  return Acc;
+}
+
+static uint64_t mergeRound(uint64_t Acc, uint64_t Val) {
+  Val = round(0, Val);
+  Acc ^= Val;
+  Acc = Acc * PRIME64_1 + PRIME64_4;
+  return Acc;
+}
+
+uint64_t llvm::xxHash64(StringRef Data) {
+  size_t Len = Data.size();
+  uint64_t Seed = 0;
+  const char *P = Data.data();
+  const char *const BEnd = P + Len;
+  uint64_t H64;
+
+  if (Len >= 32) {
+    const char *const Limit = BEnd - 32;
+    uint64_t V1 = Seed + PRIME64_1 + PRIME64_2;
+    uint64_t V2 = Seed + PRIME64_2;
+    uint64_t V3 = Seed + 0;
+    uint64_t V4 = Seed - PRIME64_1;
+
+    do {
+      V1 = round(V1, endian::read64le(P));
+      P += 8;
+      V2 = round(V2, endian::read64le(P));
+      P += 8;
+      V3 = round(V3, endian::read64le(P));
+      P += 8;
+      V4 = round(V4, endian::read64le(P));
+      P += 8;
+    } while (P <= Limit);
+
+    H64 = rotl64(V1, 1) + rotl64(V2, 7) + rotl64(V3, 12) + rotl64(V4, 18);
+    H64 = mergeRound(H64, V1);
+    H64 = mergeRound(H64, V2);
+    H64 = mergeRound(H64, V3);
+    H64 = mergeRound(H64, V4);
+
+  } else {
+    H64 = Seed + PRIME64_5;
+  }
+
+  H64 += (uint64_t)Len;
+
+  while (P + 8 <= BEnd) {
+    uint64_t const K1 = round(0, endian::read64le(P));
+    H64 ^= K1;
+    H64 = rotl64(H64, 27) * PRIME64_1 + PRIME64_4;
+    P += 8;
+  }
+
+  if (P + 4 <= BEnd) {
+    H64 ^= (uint64_t)(endian::read32le(P)) * PRIME64_1;
+    H64 = rotl64(H64, 23) * PRIME64_2 + PRIME64_3;
+    P += 4;
+  }
+
+  while (P < BEnd) {
+    H64 ^= (*P) * PRIME64_5;
+    H64 = rotl64(H64, 11) * PRIME64_1;
+    P++;
+  }
+
+  H64 ^= H64 >> 33;
+  H64 *= PRIME64_2;
+  H64 ^= H64 >> 29;
+  H64 *= PRIME64_3;
+  H64 ^= H64 >> 32;
+
+  return H64;
+}
diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp
index bb590c7d87f2..278b567fb220 100644
--- a/contrib/llvm/lib/TableGen/Main.cpp
+++ b/contrib/llvm/lib/TableGen/Main.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm/TableGen/Main.h"
 #include "TGParser.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -45,22 +46,25 @@ static cl::list<std::string>
 IncludeDirs("I", cl::desc("Directory of include files"),
             cl::value_desc("directory"), cl::Prefix);
 
+static int reportError(const char *ProgName, Twine Msg) {
+  errs() << ProgName << ": " << Msg;
+  errs().flush();
+  return 1;
+}
+
 /// \brief Create a dependency file for `-d` option.
 ///
 /// This functionality is really only for the benefit of the build system.
 /// It is similar to GCC's `-M*` family of options.
 static int createDependencyFile(const TGParser &Parser, const char *argv0) {
-  if (OutputFilename == "-") {
-    errs() << argv0 << ": the option -d must be used together with -o\n";
-    return 1;
-  }
+  if (OutputFilename == "-")
+    return reportError(argv0, "the option -d must be used together with -o\n");
+
   std::error_code EC;
   tool_output_file DepOut(DependFilename, EC, sys::fs::F_Text);
-  if (EC) {
-    errs() << argv0 << ": error opening " << DependFilename << ":"
-           << EC.message() << "\n";
-    return 1;
-  }
+  if (EC)
+    return reportError(argv0, "error opening " + DependFilename + ":" +
+                                  EC.message() + "\n");
   DepOut.os() << OutputFilename << ":";
   for (const auto &Dep : Parser.getDependencies()) {
     DepOut.os() << ' ' << Dep.first;
@@ -76,11 +80,9 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   // Parse the input file.
   ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr =
       MemoryBuffer::getFileOrSTDIN(InputFilename);
-  if (std::error_code EC = FileOrErr.getError()) {
-    errs() << "Could not open input file '" << InputFilename
-           << "': " << EC.message() << "\n";
-    return 1;
-  }
+  if (std::error_code EC = FileOrErr.getError())
+    return reportError(argv0, "Could not open input file '" + InputFilename +
+                                  "': " + EC.message() + "\n");
 
   // Tell SrcMgr about this buffer, which is what TGParser will pick up.
   SrcMgr.AddNewSourceBuffer(std::move(*FileOrErr), SMLoc());
@@ -96,11 +98,9 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
 
   std::error_code EC;
   tool_output_file Out(OutputFilename, EC, sys::fs::F_Text);
-  if (EC) {
-    errs() << argv0 << ": error opening " << OutputFilename << ":"
-           << EC.message() << "\n";
-    return 1;
-  }
+  if (EC)
+    return reportError(argv0, "error opening " + OutputFilename + ":" +
+                                  EC.message() + "\n");
   if (!DependFilename.empty()) {
     if (int Ret = createDependencyFile(Parser, argv0))
       return Ret;
@@ -109,10 +109,8 @@ int llvm::TableGenMain(char *argv0, TableGenMainFn *MainFn) {
   if (MainFn(Out.os(), Records))
     return 1;
 
-  if (ErrorsPrinted > 0) {
-    errs() << argv0 << ": " << ErrorsPrinted << " errors.\n";
-    return 1;
-  }
+  if (ErrorsPrinted > 0)
+    return reportError(argv0, utostr(ErrorsPrinted) + " errors.\n");
 
   // Declare success.
   Out.keep();
diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp
index 66fbc9a8c96b..ea9c9a19904e 100644
--- a/contrib/llvm/lib/TableGen/Record.cpp
+++ b/contrib/llvm/lib/TableGen/Record.cpp
@@ -15,67 +15,20 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
 #include "llvm/TableGen/Error.h"
+#include <cassert>
+#include <cstdint>
+#include <new>
 
 using namespace llvm;
 
-//===----------------------------------------------------------------------===//
-//    std::string wrapper for DenseMap purposes
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-/// This is a wrapper for std::string suitable for using as a key to a DenseMap.
-/// Because there isn't a particularly
-/// good way to indicate tombstone or empty keys for strings, we want
-/// to wrap std::string to indicate that this is a "special" string
-/// not expected to take on certain values (those of the tombstone and
-/// empty keys).  This makes things a little safer as it clarifies
-/// that DenseMap is really not appropriate for general strings.
-
-class TableGenStringKey {
-public:
-  TableGenStringKey(const std::string &str) : data(str) {}
-  TableGenStringKey(const char *str) : data(str) {}
-
-  const std::string &str() const { return data; }
-
-  friend hash_code hash_value(const TableGenStringKey &Value) {
-    using llvm::hash_value;
-    return hash_value(Value.str());
-  }
-private:
-  std::string data;
-};
-
-/// Specialize DenseMapInfo for TableGenStringKey.
-template<> struct DenseMapInfo<TableGenStringKey> {
-  static inline TableGenStringKey getEmptyKey() {
-    TableGenStringKey Empty("<<<EMPTY KEY>>>");
-    return Empty;
-  }
-  static inline TableGenStringKey getTombstoneKey() {
-    TableGenStringKey Tombstone("<<<TOMBSTONE KEY>>>");
-    return Tombstone;
-  }
-  static unsigned getHashValue(const TableGenStringKey& Val) {
-    using llvm::hash_value;
-    return hash_value(Val);
-  }
-  static bool isEqual(const TableGenStringKey& LHS,
-                      const TableGenStringKey& RHS) {
-    return LHS.str() == RHS.str();
-  }
-};
-
-} // namespace llvm
+static BumpPtrAllocator Allocator;
 
 //===----------------------------------------------------------------------===//
 //    Type implementations
@@ -91,8 +44,8 @@ LLVM_DUMP_METHOD void RecTy::dump() const { print(errs()); }
 
 ListRecTy *RecTy::getListTy() {
   if (!ListTy)
-    ListTy.reset(new ListRecTy(this));
-  return ListTy.get();
+    ListTy = new(Allocator) ListRecTy(this);
+  return ListTy;
 }
 
 bool RecTy::typeIsConvertibleTo(const RecTy *RHS) const {
@@ -109,13 +62,13 @@ bool BitRecTy::typeIsConvertibleTo(const RecTy *RHS) const{
 }
 
 BitsRecTy *BitsRecTy::get(unsigned Sz) {
-  static std::vector<std::unique_ptr<BitsRecTy>> Shared;
+  static std::vector<BitsRecTy*> Shared;
   if (Sz >= Shared.size())
     Shared.resize(Sz + 1);
-  std::unique_ptr<BitsRecTy> &Ty = Shared[Sz];
+  BitsRecTy *&Ty = Shared[Sz];
   if (!Ty)
-    Ty.reset(new BitsRecTy(Sz));
-  return Ty.get();
+    Ty = new(Allocator) BitsRecTy(Sz);
+  return Ty;
 }
 
 std::string BitsRecTy::getAsString() const {
@@ -203,7 +156,6 @@ RecTy *llvm::resolveTypes(RecTy *T1, RecTy *T2) {
   return nullptr;
 }
 
-
 //===----------------------------------------------------------------------===//
 //    Initializer implementations
 //===----------------------------------------------------------------------===//
@@ -263,7 +215,7 @@ ProfileBitsInit(FoldingSetNodeID &ID, ArrayRef<Init *> Range) {
 
 BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
   static FoldingSet<BitsInit> ThePool;
-  static std::vector<std::unique_ptr<BitsInit>> TheActualPool;
+  static std::vector<BitsInit*> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileBitsInit(ID, Range);
@@ -272,12 +224,13 @@ BitsInit *BitsInit::get(ArrayRef<Init *> Range) {
   if (BitsInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  void *Mem = ::operator new (totalSizeToAlloc<Init *>(Range.size()));
-  BitsInit *I = new (Mem) BitsInit(Range.size());
+  void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
+                                 alignof(BitsInit));
+  BitsInit *I = new(Mem) BitsInit(Range.size());
   std::uninitialized_copy(Range.begin(), Range.end(),
                           I->getTrailingObjects<Init *>());
   ThePool.InsertNode(I, IP);
-  TheActualPool.push_back(std::unique_ptr<BitsInit>(I));
+  TheActualPool.push_back(I);
   return I;
 }
 
@@ -312,7 +265,7 @@ Init *BitsInit::convertInitializerTo(RecTy *Ty) const {
 }
 
 Init *
-BitsInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+BitsInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
   SmallVector<Init *, 16> NewBits(Bits.size());
 
   for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
@@ -393,11 +346,11 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const {
 }
 
 IntInit *IntInit::get(int64_t V) {
-  static DenseMap<int64_t, std::unique_ptr<IntInit>> ThePool;
+  static DenseMap<int64_t, IntInit*> ThePool;
 
-  std::unique_ptr<IntInit> &I = ThePool[V];
-  if (!I) I.reset(new IntInit(V));
-  return I.get();
+  IntInit *&I = ThePool[V];
+  if (!I) I = new(Allocator) IntInit(V);
+  return I;
 }
 
 std::string IntInit::getAsString() const {
@@ -437,7 +390,7 @@ Init *IntInit::convertInitializerTo(RecTy *Ty) const {
 }
 
 Init *
-IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+IntInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
   SmallVector<Init *, 16> NewBits(Bits.size());
 
   for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
@@ -450,19 +403,27 @@ IntInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
 }
 
 CodeInit *CodeInit::get(StringRef V) {
-  static StringMap<std::unique_ptr<CodeInit>> ThePool;
+  static DenseMap<StringRef, CodeInit*> ThePool;
 
-  std::unique_ptr<CodeInit> &I = ThePool[V];
-  if (!I) I.reset(new CodeInit(V));
-  return I.get();
+  auto I = ThePool.insert(std::make_pair(V, nullptr));
+  if (I.second) {
+    StringRef VCopy = V.copy(Allocator);
+    I.first->first = VCopy;
+    I.first->second = new(Allocator) CodeInit(VCopy);
+  }
+  return I.first->second;
 }
 
 StringInit *StringInit::get(StringRef V) {
-  static StringMap<std::unique_ptr<StringInit>> ThePool;
+  static DenseMap<StringRef, StringInit*> ThePool;
 
-  std::unique_ptr<StringInit> &I = ThePool[V];
-  if (!I) I.reset(new StringInit(V));
-  return I.get();
+  auto I = ThePool.insert(std::make_pair(V, nullptr));
+  if (I.second) {
+    StringRef VCopy = V.copy(Allocator);
+    I.first->first = VCopy;
+    I.first->second = new(Allocator) StringInit(VCopy);
+  }
+  return I.first->second;
 }
 
 Init *StringInit::convertInitializerTo(RecTy *Ty) const {
@@ -491,7 +452,7 @@ static void ProfileListInit(FoldingSetNodeID &ID,
 
 ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
   static FoldingSet<ListInit> ThePool;
-  static std::vector<std::unique_ptr<ListInit>> TheActualPool;
+  static std::vector<ListInit*> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileListInit(ID, Range, EltTy);
@@ -500,12 +461,13 @@ ListInit *ListInit::get(ArrayRef<Init *> Range, RecTy *EltTy) {
   if (ListInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  void *Mem = ::operator new (totalSizeToAlloc<Init *>(Range.size()));
-  ListInit *I = new (Mem) ListInit(Range.size(), EltTy);
+  void *Mem = Allocator.Allocate(totalSizeToAlloc<Init *>(Range.size()),
+                                 alignof(ListInit));
+  ListInit *I = new(Mem) ListInit(Range.size(), EltTy);
   std::uninitialized_copy(Range.begin(), Range.end(),
                           I->getTrailingObjects<Init *>());
   ThePool.InsertNode(I, IP);
-  TheActualPool.push_back(std::unique_ptr<ListInit>(I));
+  TheActualPool.push_back(I);
   return I;
 }
 
@@ -516,31 +478,40 @@ void ListInit::Profile(FoldingSetNodeID &ID) const {
 }
 
 Init *ListInit::convertInitializerTo(RecTy *Ty) const {
+  if (getType() == Ty)
+    return const_cast<ListInit*>(this);
+
   if (auto *LRT = dyn_cast<ListRecTy>(Ty)) {
-    std::vector<Init*> Elements;
+    SmallVector<Init*, 8> Elements;
+    Elements.reserve(getValues().size());
 
     // Verify that all of the elements of the list are subclasses of the
     // appropriate class!
+    bool Changed = false;
+    RecTy *ElementType = LRT->getElementType();
     for (Init *I : getValues())
-      if (Init *CI = I->convertInitializerTo(LRT->getElementType()))
+      if (Init *CI = I->convertInitializerTo(ElementType)) {
         Elements.push_back(CI);
-      else
+        if (CI != I)
+          Changed = true;
+	  } else
         return nullptr;
 
-    if (isa<ListRecTy>(getType()))
-      return ListInit::get(Elements, Ty);
+    if (!Changed)
+      return const_cast<ListInit*>(this);
+    return ListInit::get(Elements, Ty);
   }
 
   return nullptr;
 }
 
-Init *
-ListInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
-  std::vector<Init*> Vals;
-  for (unsigned i = 0, e = Elements.size(); i != e; ++i) {
-    if (Elements[i] >= size())
+Init *ListInit::convertInitListSlice(ArrayRef<unsigned> Elements) const {
+  SmallVector<Init*, 8> Vals;
+  Vals.reserve(Elements.size());
+  for (unsigned Element : Elements) {
+    if (Element >= size())
       return nullptr;
-    Vals.push_back(getElement(Elements[i]));
+    Vals.push_back(getElement(Element));
   }
   return ListInit::get(Vals, getType());
 }
@@ -554,7 +525,7 @@ Record *ListInit::getElementAsRecord(unsigned i) const {
 }
 
 Init *ListInit::resolveReferences(Record &R, const RecordVal *RV) const {
-  std::vector<Init*> Resolved;
+  SmallVector<Init*, 8> Resolved;
   Resolved.reserve(size());
   bool Changed = false;
 
@@ -589,9 +560,11 @@ Init *ListInit::resolveListElementReference(Record &R, const RecordVal *IRV,
 
 std::string ListInit::getAsString() const {
   std::string Result = "[";
-  for (unsigned i = 0, e = NumValues; i != e; ++i) {
-    if (i) Result += ", ";
-    Result += getElement(i)->getAsString();
+  const char *sep = "";
+  for (Init *Element : *this) {
+    Result += sep;
+    sep = ", ";
+    Result += Element->getAsString();
   }
   return Result + "]";
 }
@@ -629,7 +602,7 @@ ProfileUnOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *Op, RecTy *Type) {
 
 UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
   static FoldingSet<UnOpInit> ThePool;
-  static std::vector<std::unique_ptr<UnOpInit>> TheActualPool;
+  static std::vector<UnOpInit*> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileUnOpInit(ID, Opc, LHS, Type);
@@ -638,9 +611,9 @@ UnOpInit *UnOpInit::get(UnaryOp Opc, Init *LHS, RecTy *Type) {
   if (UnOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  UnOpInit *I = new UnOpInit(Opc, LHS, Type);
+  UnOpInit *I = new(Allocator) UnOpInit(Opc, LHS, Type);
   ThePool.InsertNode(I, IP);
-  TheActualPool.push_back(std::unique_ptr<UnOpInit>(I));
+  TheActualPool.push_back(I);
   return I;
 }
 
@@ -661,9 +634,7 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       if (IntInit *LHSi = dyn_cast<IntInit>(LHS))
         return StringInit::get(LHSi->getAsString());
     } else {
-      if (StringInit *LHSs = dyn_cast<StringInit>(LHS)) {
-        const std::string &Name = LHSs->getValue();
-
+      if (StringInit *Name = dyn_cast<StringInit>(LHS)) {
         // From TGParser::ParseIDValue
         if (CurRec) {
           if (const RecordVal *RV = CurRec->getValue(Name)) {
@@ -701,11 +672,11 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
           }
         }
         assert(CurRec && "NULL pointer");
-        if (Record *D = (CurRec->getRecords()).getDef(Name))
+        if (Record *D = (CurRec->getRecords()).getDef(Name->getValue()))
           return DefInit::get(D);
 
         PrintFatalError(CurRec->getLoc(),
-                        "Undefined reference:'" + Name + "'\n");
+                        "Undefined reference:'" + Name->getValue() + "'\n");
       }
 
       if (isa<IntRecTy>(getType())) {
@@ -777,7 +748,7 @@ ProfileBinOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *RHS,
 BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS,
                           Init *RHS, RecTy *Type) {
   static FoldingSet<BinOpInit> ThePool;
-  static std::vector<std::unique_ptr<BinOpInit>> TheActualPool;
+  static std::vector<BinOpInit*> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileBinOpInit(ID, Opc, LHS, RHS, Type);
@@ -786,9 +757,9 @@ BinOpInit *BinOpInit::get(BinaryOp Opc, Init *LHS,
   if (BinOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  BinOpInit *I = new BinOpInit(Opc, LHS, RHS, Type);
+  BinOpInit *I = new(Allocator) BinOpInit(Opc, LHS, RHS, Type);
   ThePool.InsertNode(I, IP);
-  TheActualPool.push_back(std::unique_ptr<BinOpInit>(I));
+  TheActualPool.push_back(I);
   return I;
 }
 
@@ -796,6 +767,13 @@ void BinOpInit::Profile(FoldingSetNodeID &ID) const {
   ProfileBinOpInit(ID, getOpcode(), getLHS(), getRHS(), getType());
 }
 
+static StringInit *ConcatStringInits(const StringInit *I0,
+                                     const StringInit *I1) {
+  SmallString<80> Concat(I0->getValue());
+  Concat.append(I1->getValue());
+  return StringInit::get(Concat);
+}
+
 Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   switch (getOpcode()) {
   case CONCAT: {
@@ -806,8 +784,8 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       DefInit *ROp = dyn_cast<DefInit>(RHSs->getOperator());
       if (!LOp || !ROp || LOp->getDef() != ROp->getDef())
         PrintFatalError("Concated Dag operators do not match!");
-      std::vector<Init*> Args;
-      std::vector<std::string> ArgNames;
+      SmallVector<Init*, 8> Args;
+      SmallVector<StringInit*, 8> ArgNames;
       for (unsigned i = 0, e = LHSs->getNumArgs(); i != e; ++i) {
         Args.push_back(LHSs->getArg(i));
         ArgNames.push_back(LHSs->getArgName(i));
@@ -816,7 +794,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
         Args.push_back(RHSs->getArg(i));
         ArgNames.push_back(RHSs->getArgName(i));
       }
-      return DagInit::get(LHSs->getOperator(), "", Args, ArgNames);
+      return DagInit::get(LHSs->getOperator(), nullptr, Args, ArgNames);
     }
     break;
   }
@@ -824,7 +802,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     ListInit *LHSs = dyn_cast<ListInit>(LHS);
     ListInit *RHSs = dyn_cast<ListInit>(RHS);
     if (LHSs && RHSs) {
-      std::vector<Init *> Args;
+      SmallVector<Init *, 8> Args;
       Args.insert(Args.end(), LHSs->begin(), LHSs->end());
       Args.insert(Args.end(), RHSs->begin(), RHSs->end());
       return ListInit::get(
@@ -836,7 +814,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
     StringInit *LHSs = dyn_cast<StringInit>(LHS);
     StringInit *RHSs = dyn_cast<StringInit>(RHS);
     if (LHSs && RHSs)
-      return StringInit::get(LHSs->getValue() + RHSs->getValue());
+      return ConcatStringInits(LHSs, RHSs);
     break;
   }
   case EQ: {
@@ -861,6 +839,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
   }
   case ADD:
   case AND:
+  case OR:
   case SHL:
   case SRA:
   case SRL: {
@@ -875,6 +854,7 @@ Init *BinOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const {
       default: llvm_unreachable("Bad opcode!");
       case ADD: Result = LHSv +  RHSv; break;
       case AND: Result = LHSv &  RHSv; break;
+      case OR: Result = LHSv | RHSv; break;
       case SHL: Result = LHSv << RHSv; break;
       case SRA: Result = LHSv >> RHSv; break;
       case SRL: Result = (uint64_t)LHSv >> (uint64_t)RHSv; break;
@@ -902,6 +882,7 @@ std::string BinOpInit::getAsString() const {
   case CONCAT: Result = "!con"; break;
   case ADD: Result = "!add"; break;
   case AND: Result = "!and"; break;
+  case OR: Result = "!or"; break;
   case SHL: Result = "!shl"; break;
   case SRA: Result = "!sra"; break;
   case SRL: Result = "!srl"; break;
@@ -925,7 +906,7 @@ ProfileTernOpInit(FoldingSetNodeID &ID, unsigned Opcode, Init *LHS, Init *MHS,
 TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
                             RecTy *Type) {
   static FoldingSet<TernOpInit> ThePool;
-  static std::vector<std::unique_ptr<TernOpInit>> TheActualPool;
+  static std::vector<TernOpInit*> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileTernOpInit(ID, Opc, LHS, MHS, RHS, Type);
@@ -934,9 +915,9 @@ TernOpInit *TernOpInit::get(TernaryOp Opc, Init *LHS, Init *MHS, Init *RHS,
   if (TernOpInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  TernOpInit *I = new TernOpInit(Opc, LHS, MHS, RHS, Type);
+  TernOpInit *I = new(Allocator) TernOpInit(Opc, LHS, MHS, RHS, Type);
   ThePool.InsertNode(I, IP);
-  TheActualPool.push_back(std::unique_ptr<TernOpInit>(I));
+  TheActualPool.push_back(I);
   return I;
 }
 
@@ -955,8 +936,9 @@ static Init *EvaluateOperation(OpInit *RHSo, Init *LHS, Init *Arg,
     if (isa<DagRecTy>(TArg->getType()))
       return ForeachHelper(LHS, Arg, RHSo, Type, CurRec, CurMultiClass);
 
-  std::vector<Init *> NewOperands;
-  for (unsigned i = 0; i < RHSo->getNumOperands(); ++i) {
+  SmallVector<Init *, 8> NewOperands;
+  NewOperands.reserve(RHSo->getNumOperands());
+  for (unsigned i = 0, e = RHSo->getNumOperands(); i < e; ++i) {
     if (auto *RHSoo = dyn_cast<OpInit>(RHSo->getOperand(i))) {
       if (Init *Result = EvaluateOperation(RHSoo, LHS, Arg,
                                            Type, CurRec, CurMultiClass))
@@ -996,10 +978,10 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
                                          Type, CurRec, CurMultiClass))
       Val = Result;
 
-    std::vector<std::pair<Init *, std::string> > args;
+    SmallVector<std::pair<Init *, StringInit*>, 8> args;
     for (unsigned int i = 0; i < MHSd->getNumArgs(); ++i) {
       Init *Arg = MHSd->getArg(i);
-      std::string ArgName = MHSd->getArgName(i);
+      StringInit *ArgName = MHSd->getArgName(i);
 
       // Process args
       if (Init *Result = EvaluateOperation(RHSo, LHS, Arg, Type,
@@ -1010,13 +992,13 @@ static Init *ForeachHelper(Init *LHS, Init *MHS, Init *RHS, RecTy *Type,
       args.push_back(std::make_pair(Arg, ArgName));
     }
 
-    return DagInit::get(Val, "", args);
+    return DagInit::get(Val, nullptr, args);
   }
 
   ListInit *MHSl = dyn_cast<ListInit>(MHS);
   if (MHSl && isa<ListRecTy>(Type)) {
-    std::vector<Init *> NewOperands;
-    std::vector<Init *> NewList(MHSl->begin(), MHSl->end());
+    SmallVector<Init *, 8> NewOperands;
+    SmallVector<Init *, 8> NewList(MHSl->begin(), MHSl->end());
 
     for (Init *&Item : NewList) {
       NewOperands.clear();
@@ -1148,7 +1130,7 @@ std::string TernOpInit::getAsString() const {
          RHS->getAsString() + ")";
 }
 
-RecTy *TypedInit::getFieldType(const std::string &FieldName) const {
+RecTy *TypedInit::getFieldType(StringInit *FieldName) const {
   if (RecordRecTy *RecordType = dyn_cast<RecordRecTy>(getType()))
     if (RecordVal *Field = RecordType->getRecord()->getValue(FieldName))
       return Field->getType();
@@ -1237,56 +1219,56 @@ TypedInit::convertInitializerTo(RecTy *Ty) const {
   return nullptr;
 }
 
-Init *
-TypedInit::convertInitializerBitRange(const std::vector<unsigned> &Bits) const {
+Init *TypedInit::convertInitializerBitRange(ArrayRef<unsigned> Bits) const {
   BitsRecTy *T = dyn_cast<BitsRecTy>(getType());
   if (!T) return nullptr;  // Cannot subscript a non-bits variable.
   unsigned NumBits = T->getNumBits();
 
-  SmallVector<Init *, 16> NewBits(Bits.size());
-  for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
-    if (Bits[i] >= NumBits)
+  SmallVector<Init *, 16> NewBits;
+  NewBits.reserve(Bits.size());
+  for (unsigned Bit : Bits) {
+    if (Bit >= NumBits)
       return nullptr;
 
-    NewBits[i] = VarBitInit::get(const_cast<TypedInit *>(this), Bits[i]);
+    NewBits.push_back(VarBitInit::get(const_cast<TypedInit *>(this), Bit));
   }
   return BitsInit::get(NewBits);
 }
 
-Init *
-TypedInit::convertInitListSlice(const std::vector<unsigned> &Elements) const {
+Init *TypedInit::convertInitListSlice(ArrayRef<unsigned> Elements) const {
   ListRecTy *T = dyn_cast<ListRecTy>(getType());
   if (!T) return nullptr;  // Cannot subscript a non-list variable.
 
   if (Elements.size() == 1)
     return VarListElementInit::get(const_cast<TypedInit *>(this), Elements[0]);
 
-  std::vector<Init*> ListInits;
+  SmallVector<Init*, 8> ListInits;
   ListInits.reserve(Elements.size());
-  for (unsigned i = 0, e = Elements.size(); i != e; ++i)
+  for (unsigned Element : Elements)
     ListInits.push_back(VarListElementInit::get(const_cast<TypedInit *>(this),
-                                                Elements[i]));
+                                                Element));
   return ListInit::get(ListInits, T);
 }
 
 
-VarInit *VarInit::get(const std::string &VN, RecTy *T) {
+VarInit *VarInit::get(StringRef VN, RecTy *T) {
   Init *Value = StringInit::get(VN);
   return VarInit::get(Value, T);
 }
 
 VarInit *VarInit::get(Init *VN, RecTy *T) {
   typedef std::pair<RecTy *, Init *> Key;
-  static DenseMap<Key, std::unique_ptr<VarInit>> ThePool;
+  static DenseMap<Key, VarInit*> ThePool;
 
   Key TheKey(std::make_pair(T, VN));
 
-  std::unique_ptr<VarInit> &I = ThePool[TheKey];
-  if (!I) I.reset(new VarInit(VN, T));
-  return I.get();
+  VarInit *&I = ThePool[TheKey];
+  if (!I)
+    I = new(Allocator) VarInit(VN, T);
+  return I;
 }
 
-const std::string &VarInit::getName() const {
+StringRef VarInit::getName() const {
   StringInit *NameString = cast<StringInit>(getNameInit());
   return NameString->getValue();
 }
@@ -1320,8 +1302,7 @@ Init *VarInit::resolveListElementReference(Record &R,
   return nullptr;
 }
 
-
-RecTy *VarInit::getFieldType(const std::string &FieldName) const {
+RecTy *VarInit::getFieldType(StringInit *FieldName) const {
   if (RecordRecTy *RTy = dyn_cast<RecordRecTy>(getType()))
     if (const RecordVal *RV = RTy->getRecord()->getValue(FieldName))
       return RV->getType();
@@ -1329,7 +1310,7 @@ RecTy *VarInit::getFieldType(const std::string &FieldName) const {
 }
 
 Init *VarInit::getFieldInit(Record &R, const RecordVal *RV,
-                            const std::string &FieldName) const {
+                            StringInit *FieldName) const {
   if (isa<RecordRecTy>(getType()))
     if (const RecordVal *Val = R.getValue(VarName)) {
       if (RV != Val && (RV || isa<UnsetInit>(Val->getValue())))
@@ -1352,13 +1333,14 @@ Init *VarInit::resolveReferences(Record &R, const RecordVal *RV) const {
 
 VarBitInit *VarBitInit::get(TypedInit *T, unsigned B) {
   typedef std::pair<TypedInit *, unsigned> Key;
-  static DenseMap<Key, std::unique_ptr<VarBitInit>> ThePool;
+  static DenseMap<Key, VarBitInit*> ThePool;
 
   Key TheKey(std::make_pair(T, B));
 
-  std::unique_ptr<VarBitInit> &I = ThePool[TheKey];
-  if (!I) I.reset(new VarBitInit(T, B));
-  return I.get();
+  VarBitInit *&I = ThePool[TheKey];
+  if (!I)
+    I = new(Allocator) VarBitInit(T, B);
+  return I;
 }
 
 Init *VarBitInit::convertInitializerTo(RecTy *Ty) const {
@@ -1383,13 +1365,13 @@ Init *VarBitInit::resolveReferences(Record &R, const RecordVal *RV) const {
 VarListElementInit *VarListElementInit::get(TypedInit *T,
                                             unsigned E) {
   typedef std::pair<TypedInit *, unsigned> Key;
-  static DenseMap<Key, std::unique_ptr<VarListElementInit>> ThePool;
+  static DenseMap<Key, VarListElementInit*> ThePool;
 
   Key TheKey(std::make_pair(T, E));
 
-  std::unique_ptr<VarListElementInit> &I = ThePool[TheKey];
-  if (!I) I.reset(new VarListElementInit(T, E));
-  return I.get();
+  VarListElementInit *&I = ThePool[TheKey];
+  if (!I) I = new(Allocator) VarListElementInit(T, E);
+  return I;
 }
 
 std::string VarListElementInit::getAsString() const {
@@ -1436,31 +1418,30 @@ Init *DefInit::convertInitializerTo(RecTy *Ty) const {
   return nullptr;
 }
 
-RecTy *DefInit::getFieldType(const std::string &FieldName) const {
+RecTy *DefInit::getFieldType(StringInit *FieldName) const {
   if (const RecordVal *RV = Def->getValue(FieldName))
     return RV->getType();
   return nullptr;
 }
 
 Init *DefInit::getFieldInit(Record &R, const RecordVal *RV,
-                            const std::string &FieldName) const {
+                            StringInit *FieldName) const {
   return Def->getValue(FieldName)->getValue();
 }
 
-
 std::string DefInit::getAsString() const {
   return Def->getName();
 }
 
-FieldInit *FieldInit::get(Init *R, const std::string &FN) {
-  typedef std::pair<Init *, TableGenStringKey> Key;
-  static DenseMap<Key, std::unique_ptr<FieldInit>> ThePool;
+FieldInit *FieldInit::get(Init *R, StringInit *FN) {
+  typedef std::pair<Init *, StringInit *> Key;
+  static DenseMap<Key, FieldInit*> ThePool;
 
   Key TheKey(std::make_pair(R, FN));
 
-  std::unique_ptr<FieldInit> &I = ThePool[TheKey];
-  if (!I) I.reset(new FieldInit(R, FN));
-  return I.get();
+  FieldInit *&I = ThePool[TheKey];
+  if (!I) I = new(Allocator) FieldInit(R, FN);
+  return I;
 }
 
 Init *FieldInit::getBit(unsigned Bit) const {
@@ -1498,28 +1479,27 @@ Init *FieldInit::resolveReferences(Record &R, const RecordVal *RV) const {
   return const_cast<FieldInit *>(this);
 }
 
-static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, const std::string &VN,
+static void ProfileDagInit(FoldingSetNodeID &ID, Init *V, StringInit *VN,
                            ArrayRef<Init *> ArgRange,
-                           ArrayRef<std::string> NameRange) {
+                           ArrayRef<StringInit *> NameRange) {
   ID.AddPointer(V);
-  ID.AddString(VN);
+  ID.AddPointer(VN);
 
-  ArrayRef<Init *>::iterator Arg  = ArgRange.begin();
-  ArrayRef<std::string>::iterator  Name = NameRange.begin();
+  ArrayRef<Init *>::iterator Arg = ArgRange.begin();
+  ArrayRef<StringInit *>::iterator Name = NameRange.begin();
   while (Arg != ArgRange.end()) {
     assert(Name != NameRange.end() && "Arg name underflow!");
     ID.AddPointer(*Arg++);
-    ID.AddString(*Name++);
+    ID.AddPointer(*Name++);
   }
   assert(Name == NameRange.end() && "Arg name overflow!");
 }
 
 DagInit *
-DagInit::get(Init *V, const std::string &VN,
-             ArrayRef<Init *> ArgRange,
-             ArrayRef<std::string> NameRange) {
+DagInit::get(Init *V, StringInit *VN, ArrayRef<Init *> ArgRange,
+             ArrayRef<StringInit *> NameRange) {
   static FoldingSet<DagInit> ThePool;
-  static std::vector<std::unique_ptr<DagInit>> TheActualPool;
+  static std::vector<DagInit*> TheActualPool;
 
   FoldingSetNodeID ID;
   ProfileDagInit(ID, V, VN, ArgRange, NameRange);
@@ -1528,17 +1508,17 @@ DagInit::get(Init *V, const std::string &VN,
   if (DagInit *I = ThePool.FindNodeOrInsertPos(ID, IP))
     return I;
 
-  DagInit *I = new DagInit(V, VN, ArgRange, NameRange);
+  DagInit *I = new(Allocator) DagInit(V, VN, ArgRange, NameRange);
   ThePool.InsertNode(I, IP);
-  TheActualPool.push_back(std::unique_ptr<DagInit>(I));
+  TheActualPool.push_back(I);
   return I;
 }
 
 DagInit *
-DagInit::get(Init *V, const std::string &VN,
-             const std::vector<std::pair<Init*, std::string> > &args) {
-  std::vector<Init *> Args;
-  std::vector<std::string> Names;
+DagInit::get(Init *V, StringInit *VN,
+             ArrayRef<std::pair<Init*, StringInit*>> args) {
+  SmallVector<Init *, 8> Args;
+  SmallVector<StringInit *, 8> Names;
 
   for (const auto &Arg : args) {
     Args.push_back(Arg.first);
@@ -1560,52 +1540,54 @@ Init *DagInit::convertInitializerTo(RecTy *Ty) const {
 }
 
 Init *DagInit::resolveReferences(Record &R, const RecordVal *RV) const {
-  std::vector<Init*> NewArgs;
-  for (unsigned i = 0, e = Args.size(); i != e; ++i)
-    NewArgs.push_back(Args[i]->resolveReferences(R, RV));
+  SmallVector<Init*, 8> NewArgs;
+  NewArgs.reserve(Args.size());
+  bool ArgsChanged = false;
+  for (const Init *Arg : Args) {
+    Init *NewArg = Arg->resolveReferences(R, RV);
+    NewArgs.push_back(NewArg);
+    ArgsChanged |= NewArg != Arg;
+  }
 
   Init *Op = Val->resolveReferences(R, RV);
-
-  if (Args != NewArgs || Op != Val)
+  if (Op != Val || ArgsChanged)
     return DagInit::get(Op, ValName, NewArgs, ArgNames);
 
   return const_cast<DagInit *>(this);
 }
 
-
 std::string DagInit::getAsString() const {
   std::string Result = "(" + Val->getAsString();
-  if (!ValName.empty())
-    Result += ":" + ValName;
+  if (ValName)
+    Result += ":" + ValName->getAsUnquotedString();
   if (!Args.empty()) {
     Result += " " + Args[0]->getAsString();
-    if (!ArgNames[0].empty()) Result += ":$" + ArgNames[0];
+    if (ArgNames[0]) Result += ":$" + ArgNames[0]->getAsUnquotedString();
     for (unsigned i = 1, e = Args.size(); i != e; ++i) {
       Result += ", " + Args[i]->getAsString();
-      if (!ArgNames[i].empty()) Result += ":$" + ArgNames[i];
+      if (ArgNames[i]) Result += ":$" + ArgNames[i]->getAsUnquotedString();
     }
   }
   return Result + ")";
 }
 
-
 //===----------------------------------------------------------------------===//
 //    Other implementations
 //===----------------------------------------------------------------------===//
 
 RecordVal::RecordVal(Init *N, RecTy *T, bool P)
-  : NameAndPrefix(N, P), Ty(T) {
-  Value = UnsetInit::get()->convertInitializerTo(Ty);
+  : Name(N), TyAndPrefix(T, P) {
+  Value = UnsetInit::get()->convertInitializerTo(T);
   assert(Value && "Cannot create unset value for current type!");
 }
 
-RecordVal::RecordVal(const std::string &N, RecTy *T, bool P)
-  : NameAndPrefix(StringInit::get(N), P), Ty(T) {
-  Value = UnsetInit::get()->convertInitializerTo(Ty);
+RecordVal::RecordVal(StringRef N, RecTy *T, bool P)
+  : Name(StringInit::get(N)), TyAndPrefix(T, P) {
+  Value = UnsetInit::get()->convertInitializerTo(T);
   assert(Value && "Cannot create unset value for current type!");
 }
 
-const std::string &RecordVal::getName() const {
+StringRef RecordVal::getName() const {
   return cast<StringInit>(getNameInit())->getValue();
 }
 
@@ -1628,7 +1610,7 @@ void Record::init() {
 
   // Every record potentially has a def at the top.  This value is
   // replaced with the top-level def name at instantiation time.
-  RecordVal DN("NAME", StringRecTy::get(), 0);
+  RecordVal DN("NAME", StringRecTy::get(), false);
   addValue(DN);
 }
 
@@ -1641,11 +1623,11 @@ void Record::checkName() {
 
 DefInit *Record::getDefInit() {
   if (!TheInit)
-    TheInit.reset(new DefInit(this, new RecordRecTy(this)));
-  return TheInit.get();
+    TheInit = new(Allocator) DefInit(this, new(Allocator) RecordRecTy(this));
+  return TheInit;
 }
 
-const std::string &Record::getName() const {
+StringRef Record::getName() const {
   return cast<StringInit>(Name)->getValue();
 }
 
@@ -1665,18 +1647,18 @@ void Record::setName(Init *NewName) {
   // this.  See TGParser::ParseDef and TGParser::ParseDefm.
 }
 
-void Record::setName(const std::string &Name) {
+void Record::setName(StringRef Name) {
   setName(StringInit::get(Name));
 }
 
 void Record::resolveReferencesTo(const RecordVal *RV) {
-  for (unsigned i = 0, e = Values.size(); i != e; ++i) {
-    if (RV == &Values[i]) // Skip resolve the same field as the given one
+  for (RecordVal &Value : Values) {
+    if (RV == &Value) // Skip resolve the same field as the given one
       continue;
-    if (Init *V = Values[i].getValue())
-      if (Values[i].setValue(V->resolveReferences(*this, RV)))
+    if (Init *V = Value.getValue())
+      if (Value.setValue(V->resolveReferences(*this, RV)))
         PrintFatalError(getLoc(), "Invalid value is found when setting '" +
-                        Values[i].getNameInitAsString() +
+                        Value.getNameInitAsString() +
                         "' after resolving references" +
                         (RV ? " against '" + RV->getNameInitAsString() +
                               "' of (" + RV->getValue()->getAsUnquotedString() +
@@ -1720,10 +1702,10 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const Record &R) {
   OS << "\n";
 
   for (const RecordVal &Val : R.getValues())
-    if (Val.getPrefix() && !R.isTemplateArg(Val.getName()))
+    if (Val.getPrefix() && !R.isTemplateArg(Val.getNameInit()))
       OS << Val;
   for (const RecordVal &Val : R.getValues())
-    if (!Val.getPrefix() && !R.isTemplateArg(Val.getName()))
+    if (!Val.getPrefix() && !R.isTemplateArg(Val.getNameInit()))
       OS << Val;
 
   return OS << "}\n";
@@ -1737,7 +1719,6 @@ Init *Record::getValueInit(StringRef FieldName) const {
   return R->getValue();
 }
 
-
 std::string Record::getValueAsString(StringRef FieldName) const {
   const RecordVal *R = getValue(FieldName);
   if (!R || !R->getValue())
@@ -1884,7 +1865,6 @@ DagInit *Record::getValueAsDag(StringRef FieldName) const {
     FieldName + "' does not have a dag initializer!");
 }
 
-
 LLVM_DUMP_METHOD void MultiClass::dump() const {
   errs() << "Record:\n";
   Rec.dump();
@@ -1894,7 +1874,6 @@ LLVM_DUMP_METHOD void MultiClass::dump() const {
     Proto->dump();
 }
 
-
 LLVM_DUMP_METHOD void RecordKeeper::dump() const { errs() << *this; }
 
 raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
@@ -1909,7 +1888,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const RecordKeeper &RK) {
 }
 
 std::vector<Record *>
-RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
+RecordKeeper::getAllDerivedDefinitions(StringRef ClassName) const {
   Record *Class = getClass(ClassName);
   if (!Class)
     PrintFatalError("ERROR: Couldn't find the `" + ClassName + "' class!\n");
@@ -1922,35 +1901,25 @@ RecordKeeper::getAllDerivedDefinitions(const std::string &ClassName) const {
   return Defs;
 }
 
-Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
-                        Init *Name, const std::string &Scoper) {
-  RecTy *Type = cast<TypedInit>(Name)->getType();
-
-  BinOpInit *NewName =
-    BinOpInit::get(BinOpInit::STRCONCAT,
-                   BinOpInit::get(BinOpInit::STRCONCAT,
-                                  CurRec.getNameInit(),
-                                  StringInit::get(Scoper),
-                                  Type)->Fold(&CurRec, CurMultiClass),
-                   Name,
-                   Type);
+static Init *GetStrConcat(Init *I0, Init *I1) {
+  // Shortcut for the common case of concatenating two strings.
+  if (const StringInit *I0s = dyn_cast<StringInit>(I0))
+    if (const StringInit *I1s = dyn_cast<StringInit>(I1))
+      return ConcatStringInits(I0s, I1s);
+  return BinOpInit::get(BinOpInit::STRCONCAT, I0, I1, StringRecTy::get());
+}
 
+Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
+                        Init *Name, StringRef Scoper) {
+  Init *NewName = GetStrConcat(CurRec.getNameInit(), StringInit::get(Scoper));
+  NewName = GetStrConcat(NewName, Name);
   if (CurMultiClass && Scoper != "::") {
-    NewName =
-      BinOpInit::get(BinOpInit::STRCONCAT,
-                     BinOpInit::get(BinOpInit::STRCONCAT,
-                                    CurMultiClass->Rec.getNameInit(),
-                                    StringInit::get("::"),
-                                    Type)->Fold(&CurRec, CurMultiClass),
-                     NewName->Fold(&CurRec, CurMultiClass),
-                     Type);
+    Init *Prefix = GetStrConcat(CurMultiClass->Rec.getNameInit(),
+                                StringInit::get("::"));
+    NewName = GetStrConcat(Prefix, NewName);
   }
 
-  return NewName->Fold(&CurRec, CurMultiClass);
-}
-
-Init *llvm::QualifyName(Record &CurRec, MultiClass *CurMultiClass,
-                        const std::string &Name,
-                        const std::string &Scoper) {
-  return QualifyName(CurRec, CurMultiClass, StringInit::get(Name), Scoper);
+  if (BinOpInit *BinOp = dyn_cast<BinOpInit>(NewName))
+    NewName = BinOp->Fold(&CurRec, CurMultiClass);
+  return NewName;
 }
diff --git a/contrib/llvm/lib/TableGen/TGLexer.cpp b/contrib/llvm/lib/TableGen/TGLexer.cpp
index 63b85842d6a9..5d6f7c23e0b6 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.cpp
+++ b/contrib/llvm/lib/TableGen/TGLexer.cpp
@@ -15,11 +15,13 @@
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Config/config.h" // for strtoull()/strtoll() define
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/TableGen/Error.h"
 #include <cctype>
 #include <cerrno>
+#include <cstdint>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -155,7 +157,7 @@ tgtok::TokKind TGLexer::LexToken() {
         case '0': case '1': 
           if (NextChar == 'b')
             return LexNumber();
-          // Fallthrough
+          LLVM_FALLTHROUGH;
         case '2': case '3': case '4': case '5':
         case '6': case '7': case '8': case '9':
         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
@@ -222,7 +224,7 @@ tgtok::TokKind TGLexer::LexString() {
     case '\0':
       if (CurPtr == CurBuf.end())
         return ReturnError(StrStart, "End of file in string literal");
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     default:
       return ReturnError(CurPtr, "invalid escape in string literal");
     }
@@ -246,7 +248,6 @@ tgtok::TokKind TGLexer::LexVarName() {
   return tgtok::VarName;
 }
 
-
 tgtok::TokKind TGLexer::LexIdentifier() {
   // The first letter is [a-zA-Z_#].
   const char *IdentStart = TokStart;
@@ -301,7 +302,6 @@ bool TGLexer::LexInclude() {
   std::string Filename = CurStrVal;
   std::string IncludedFile;
 
-  
   CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
                                     IncludedFile);
   if (!CurBuffer) {
@@ -326,7 +326,7 @@ bool TGLexer::LexInclude() {
 
 void TGLexer::SkipBCPLComment() {
   ++CurPtr;  // skip the second slash.
-  while (1) {
+  while (true) {
     switch (*CurPtr) {
     case '\n':
     case '\r':
@@ -348,7 +348,7 @@ bool TGLexer::SkipCComment() {
   ++CurPtr;  // skip the star.
   unsigned CommentDepth = 1;
   
-  while (1) {
+  while (true) {
     int CurChar = getNextChar();
     switch (CurChar) {
     case EOF:
@@ -436,7 +436,7 @@ tgtok::TokKind TGLexer::LexBracket() {
     return tgtok::l_square;
   ++CurPtr;
   const char *CodeStart = CurPtr;
-  while (1) {
+  while (true) {
     int Char = getNextChar();
     if (Char == EOF) break;
     
@@ -472,6 +472,7 @@ tgtok::TokKind TGLexer::LexExclaim() {
     .Case("con", tgtok::XConcat)
     .Case("add", tgtok::XADD)
     .Case("and", tgtok::XAND)
+    .Case("or", tgtok::XOR)
     .Case("shl", tgtok::XSHL)
     .Case("sra", tgtok::XSRA)
     .Case("srl", tgtok::XSRL)
@@ -485,4 +486,3 @@ tgtok::TokKind TGLexer::LexExclaim() {
 
   return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
 }
-
diff --git a/contrib/llvm/lib/TableGen/TGLexer.h b/contrib/llvm/lib/TableGen/TGLexer.h
index cbc30be8a572..b5b58161878b 100644
--- a/contrib/llvm/lib/TableGen/TGLexer.h
+++ b/contrib/llvm/lib/TableGen/TGLexer.h
@@ -45,9 +45,9 @@ namespace tgtok {
     // Keywords.
     Bit, Bits, Class, Code, Dag, Def, Foreach, Defm, Field, In, Int, Let, List,
     MultiClass, String,
-    
+
     // !keywords.
-    XConcat, XADD, XAND, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
+    XConcat, XADD, XAND, XOR, XSRA, XSRL, XSHL, XListConcat, XStrConcat, XCast,
     XSubst, XForEach, XHead, XTail, XEmpty, XIf, XEq,
 
     // Integer value.
diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp
index 34e90925e929..1a91b37b742b 100644
--- a/contrib/llvm/lib/TableGen/TGParser.cpp
+++ b/contrib/llvm/lib/TableGen/TGParser.cpp
@@ -12,11 +12,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "TGParser.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 #include <algorithm>
+#include <cassert>
+#include <cstdint>
+
 using namespace llvm;
 
 //===----------------------------------------------------------------------===//
@@ -24,10 +32,12 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 namespace llvm {
+
 struct SubClassReference {
   SMRange RefRange;
   Record *Rec;
-  std::vector<Init*> TemplateArgs;
+  SmallVector<Init*, 4> TemplateArgs;
+
   SubClassReference() : Rec(nullptr) {}
 
   bool isInvalid() const { return Rec == nullptr; }
@@ -36,7 +46,8 @@ struct SubClassReference {
 struct SubMultiClassReference {
   SMRange RefRange;
   MultiClass *MC;
-  std::vector<Init*> TemplateArgs;
+  SmallVector<Init*, 4> TemplateArgs;
+
   SubMultiClassReference() : MC(nullptr) {}
 
   bool isInvalid() const { return MC == nullptr; }
@@ -130,7 +141,7 @@ bool TGParser::SetValue(Record *CurRec, SMLoc Loc, Init *ValName,
   }
 
   if (RV->setValue(V)) {
-    std::string InitType = "";
+    std::string InitType;
     if (BitsInit *BI = dyn_cast<BitsInit>(V))
       InitType = (Twine("' of type bit initializer with length ") +
                   Twine(BI->getNumBits())).str();
@@ -328,14 +339,14 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){
 
     IterRec->addValue(RecordVal(IterVar->getName(), IVal->getType(), false));
 
-    if (SetValue(IterRec.get(), Loc, IterVar->getName(), None, IVal))
+    if (SetValue(IterRec.get(), Loc, IterVar->getNameInit(), None, IVal))
       return Error(Loc, "when instantiating this def");
 
     // Resolve it next.
-    IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getName()));
+    IterRec->resolveReferencesTo(IterRec->getValue(IterVar->getNameInit()));
 
     // Remove it.
-    IterRec->removeValue(IterVar->getName());
+    IterRec->removeValue(IterVar->getNameInit());
   }
 
   if (Records.getDef(IterRec->getNameInitAsString())) {
@@ -474,7 +485,7 @@ ParseSubClassReference(Record *CurRec, bool isDefm) {
     return Result;
   }
 
-  Result.TemplateArgs = ParseValueList(CurRec, Result.Rec);
+  ParseValueList(Result.TemplateArgs, CurRec, Result.Rec);
   if (Result.TemplateArgs.empty()) {
     Result.Rec = nullptr;   // Error parsing value list.
     return Result;
@@ -519,7 +530,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
     return Result;
   }
 
-  Result.TemplateArgs = ParseValueList(&CurMC->Rec, &Result.MC->Rec);
+  ParseValueList(Result.TemplateArgs, &CurMC->Rec, &Result.MC->Rec);
   if (Result.TemplateArgs.empty()) {
     Result.MC = nullptr;   // Error parsing value list.
     return Result;
@@ -540,7 +551,7 @@ ParseSubMultiClassReference(MultiClass *CurMC) {
 ///   RangePiece ::= INTVAL
 ///   RangePiece ::= INTVAL '-' INTVAL
 ///   RangePiece ::= INTVAL INTVAL
-bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) {
+bool TGParser::ParseRangePiece(SmallVectorImpl<unsigned> &Ranges) {
   if (Lex.getCode() != tgtok::IntVal) {
     TokError("expected integer or bitrange");
     return true;
@@ -584,26 +595,27 @@ bool TGParser::ParseRangePiece(std::vector<unsigned> &Ranges) {
 ///
 ///   RangeList ::= RangePiece (',' RangePiece)*
 ///
-std::vector<unsigned> TGParser::ParseRangeList() {
-  std::vector<unsigned> Result;
-
+void TGParser::ParseRangeList(SmallVectorImpl<unsigned> &Result) {
   // Parse the first piece.
-  if (ParseRangePiece(Result))
-    return std::vector<unsigned>();
+  if (ParseRangePiece(Result)) {
+    Result.clear();
+    return;
+  }
   while (Lex.getCode() == tgtok::comma) {
     Lex.Lex();  // Eat the comma.
 
     // Parse the next range piece.
-    if (ParseRangePiece(Result))
-      return std::vector<unsigned>();
+    if (ParseRangePiece(Result)) {
+      Result.clear();
+      return;
+    }
   }
-  return Result;
 }
 
 /// ParseOptionalRangeList - Parse either a range list in <>'s or nothing.
 ///   OptionalRangeList ::= '<' RangeList '>'
 ///   OptionalRangeList ::= /*empty*/
-bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
+bool TGParser::ParseOptionalRangeList(SmallVectorImpl<unsigned> &Ranges) {
   if (Lex.getCode() != tgtok::less)
     return false;
 
@@ -611,7 +623,7 @@ bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
   Lex.Lex(); // eat the '<'
 
   // Parse the range list.
-  Ranges = ParseRangeList();
+  ParseRangeList(Ranges);
   if (Ranges.empty()) return true;
 
   if (Lex.getCode() != tgtok::greater) {
@@ -625,7 +637,7 @@ bool TGParser::ParseOptionalRangeList(std::vector<unsigned> &Ranges) {
 /// ParseOptionalBitList - Parse either a bit list in {}'s or nothing.
 ///   OptionalBitList ::= '{' RangeList '}'
 ///   OptionalBitList ::= /*empty*/
-bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
+bool TGParser::ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges) {
   if (Lex.getCode() != tgtok::l_brace)
     return false;
 
@@ -633,7 +645,7 @@ bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
   Lex.Lex(); // eat the '{'
 
   // Parse the range list.
-  Ranges = ParseRangeList();
+  ParseRangeList(Ranges);
   if (Ranges.empty()) return true;
 
   if (Lex.getCode() != tgtok::r_brace) {
@@ -644,7 +656,6 @@ bool TGParser::ParseOptionalBitList(std::vector<unsigned> &Ranges) {
   return false;
 }
 
-
 /// ParseType - Parse and return a tblgen type.  This returns null on error.
 ///
 ///   Type ::= STRING                       // string type
@@ -705,8 +716,7 @@ RecTy *TGParser::ParseType() {
 
 /// ParseIDValue - This is just like ParseIDValue above, but it assumes the ID
 /// has already been read.
-Init *TGParser::ParseIDValue(Record *CurRec,
-                             const std::string &Name, SMLoc NameLoc,
+Init *TGParser::ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
                              IDParseMode Mode) {
   if (CurRec) {
     if (const RecordVal *RV = CurRec->getValue(Name))
@@ -726,8 +736,7 @@ Init *TGParser::ParseIDValue(Record *CurRec,
   }
 
   if (CurMultiClass) {
-    Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name,
-                               "::");
+    Init *MCName = QualifyName(CurMultiClass->Rec, CurMultiClass, Name, "::");
 
     if (CurMultiClass->Rec.isTemplateArg(MCName)) {
       const RecordVal *RV = CurMultiClass->Rec.getValue(MCName);
@@ -739,22 +748,22 @@ Init *TGParser::ParseIDValue(Record *CurRec,
   // If this is in a foreach loop, make sure it's not a loop iterator
   for (const auto &L : Loops) {
     VarInit *IterVar = dyn_cast<VarInit>(L.IterVar);
-    if (IterVar && IterVar->getName() == Name)
+    if (IterVar && IterVar->getNameInit() == Name)
       return IterVar;
   }
 
   if (Mode == ParseNameMode)
-    return StringInit::get(Name);
+    return Name;
 
-  if (Record *D = Records.getDef(Name))
+  if (Record *D = Records.getDef(Name->getValue()))
     return DefInit::get(D);
 
   if (Mode == ParseValueMode) {
-    Error(NameLoc, "Variable not defined: '" + Name + "'");
+    Error(NameLoc, "Variable not defined: '" + Name->getValue() + "'");
     return nullptr;
   }
 
-  return StringInit::get(Name);
+  return Name;
 }
 
 /// ParseOperation - Parse an operator.  This returns null on error.
@@ -871,6 +880,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
   case tgtok::XConcat:
   case tgtok::XADD:
   case tgtok::XAND:
+  case tgtok::XOR:
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
@@ -889,6 +899,7 @@ Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) {
     case tgtok::XConcat: Code = BinOpInit::CONCAT;Type = DagRecTy::get(); break;
     case tgtok::XADD:    Code = BinOpInit::ADD;   Type = IntRecTy::get(); break;
     case tgtok::XAND:    Code = BinOpInit::AND;   Type = IntRecTy::get(); break;
+    case tgtok::XOR:     Code = BinOpInit::OR;    Type = IntRecTy::get(); break;
     case tgtok::XSRA:    Code = BinOpInit::SRA;   Type = IntRecTy::get(); break;
     case tgtok::XSRL:    Code = BinOpInit::SRL;   Type = IntRecTy::get(); break;
     case tgtok::XSHL:    Code = BinOpInit::SHL;   Type = IntRecTy::get(); break;
@@ -1110,7 +1121,6 @@ RecTy *TGParser::ParseOperatorType() {
   return Type;
 }
 
-
 /// ParseSimpleValue - Parse a tblgen value.  This returns null on error.
 ///
 ///   SimpleValue ::= IDValue
@@ -1173,7 +1183,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     break;
   case tgtok::Id: {
     SMLoc NameLoc = Lex.getLoc();
-    std::string Name = Lex.getCurStrVal();
+    StringInit *Name = StringInit::get(Lex.getCurStrVal());
     if (Lex.Lex() != tgtok::less)  // consume the Id.
       return ParseIDValue(CurRec, Name, NameLoc, Mode);    // Value ::= IDValue
 
@@ -1186,14 +1196,15 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     // This is a CLASS<initvalslist> expression.  This is supposed to synthesize
     // a new anonymous definition, deriving from CLASS<initvalslist> with no
     // body.
-    Record *Class = Records.getClass(Name);
+    Record *Class = Records.getClass(Name->getValue());
     if (!Class) {
-      Error(NameLoc, "Expected a class name, got '" + Name + "'");
+      Error(NameLoc, "Expected a class name, got '" + Name->getValue() + "'");
       return nullptr;
     }
 
-    std::vector<Init*> ValueList = ParseValueList(CurRec, Class);
-    if (ValueList.empty()) return nullptr;
+    SubClassReference SCRef;
+    ParseValueList(SCRef.TemplateArgs, CurRec, Class);
+    if (SCRef.TemplateArgs.empty()) return nullptr;
 
     if (Lex.getCode() != tgtok::greater) {
       TokError("expected '>' at end of value list");
@@ -1206,10 +1217,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     auto NewRecOwner = llvm::make_unique<Record>(GetNewAnonymousName(), NameLoc,
                                                  Records, /*IsAnonymous=*/true);
     Record *NewRec = NewRecOwner.get(); // Keep a copy since we may release.
-    SubClassReference SCRef;
     SCRef.RefRange = SMRange(NameLoc, EndLoc);
     SCRef.Rec = Class;
-    SCRef.TemplateArgs = ValueList;
     // Add info about the subclass to NewRec.
     if (AddSubClass(NewRec, SCRef))
       return nullptr;
@@ -1251,10 +1260,10 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   case tgtok::l_brace: {           // Value ::= '{' ValueList '}'
     SMLoc BraceLoc = Lex.getLoc();
     Lex.Lex(); // eat the '{'
-    std::vector<Init*> Vals;
+    SmallVector<Init*, 16> Vals;
 
     if (Lex.getCode() != tgtok::r_brace) {
-      Vals = ParseValueList(CurRec);
+      ParseValueList(Vals, CurRec);
       if (Vals.empty()) return nullptr;
     }
     if (Lex.getCode() != tgtok::r_brace) {
@@ -1301,7 +1310,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   }
   case tgtok::l_square: {          // Value ::= '[' ValueList ']'
     Lex.Lex(); // eat the '['
-    std::vector<Init*> Vals;
+    SmallVector<Init*, 16> Vals;
 
     RecTy *DeducedEltTy = nullptr;
     ListRecTy *GivenListTy = nullptr;
@@ -1317,8 +1326,8 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     }
 
     if (Lex.getCode() != tgtok::r_square) {
-      Vals = ParseValueList(CurRec, nullptr,
-                            GivenListTy ? GivenListTy->getElementType() : nullptr);
+      ParseValueList(Vals, CurRec, nullptr,
+                     GivenListTy ? GivenListTy->getElementType() : nullptr);
       if (Vals.empty()) return nullptr;
     }
     if (Lex.getCode() != tgtok::r_square) {
@@ -1405,19 +1414,19 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
     if (!Operator) return nullptr;
 
     // If the operator name is present, parse it.
-    std::string OperatorName;
+    StringInit *OperatorName = nullptr;
     if (Lex.getCode() == tgtok::colon) {
       if (Lex.Lex() != tgtok::VarName) { // eat the ':'
         TokError("expected variable name in dag operator");
         return nullptr;
       }
-      OperatorName = Lex.getCurStrVal();
+      OperatorName = StringInit::get(Lex.getCurStrVal());
       Lex.Lex();  // eat the VarName.
     }
 
-    std::vector<std::pair<llvm::Init*, std::string> > DagArgs;
+    SmallVector<std::pair<llvm::Init*, StringInit*>, 8> DagArgs;
     if (Lex.getCode() != tgtok::r_paren) {
-      DagArgs = ParseDagArgList(CurRec);
+      ParseDagArgList(DagArgs, CurRec);
       if (DagArgs.empty()) return nullptr;
     }
 
@@ -1437,6 +1446,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType,
   case tgtok::XConcat:
   case tgtok::XADD:
   case tgtok::XAND:
+  case tgtok::XOR:
   case tgtok::XSRA:
   case tgtok::XSRL:
   case tgtok::XSHL:
@@ -1465,7 +1475,7 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
   if (!Result) return nullptr;
 
   // Parse the suffixes now if present.
-  while (1) {
+  while (true) {
     switch (Lex.getCode()) {
     default: return Result;
     case tgtok::l_brace: {
@@ -1475,7 +1485,8 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
 
       SMLoc CurlyLoc = Lex.getLoc();
       Lex.Lex(); // eat the '{'
-      std::vector<unsigned> Ranges = ParseRangeList();
+      SmallVector<unsigned, 16> Ranges;
+      ParseRangeList(Ranges);
       if (Ranges.empty()) return nullptr;
 
       // Reverse the bitlist.
@@ -1497,7 +1508,8 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
     case tgtok::l_square: {
       SMLoc SquareLoc = Lex.getLoc();
       Lex.Lex(); // eat the '['
-      std::vector<unsigned> Ranges = ParseRangeList();
+      SmallVector<unsigned, 16> Ranges;
+      ParseRangeList(Ranges);
       if (Ranges.empty()) return nullptr;
 
       Result = Result->convertInitListSlice(Ranges);
@@ -1514,19 +1526,21 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
       Lex.Lex();
       break;
     }
-    case tgtok::period:
+    case tgtok::period: {
       if (Lex.Lex() != tgtok::Id) {  // eat the .
         TokError("expected field identifier after '.'");
         return nullptr;
       }
-      if (!Result->getFieldType(Lex.getCurStrVal())) {
+      StringInit *FieldName = StringInit::get(Lex.getCurStrVal());
+      if (!Result->getFieldType(FieldName)) {
         TokError("Cannot access field '" + Lex.getCurStrVal() + "' of value '" +
                  Result->getAsString() + "'");
         return nullptr;
       }
-      Result = FieldInit::get(Result, Lex.getCurStrVal());
+      Result = FieldInit::get(Result, FieldName);
       Lex.Lex();  // eat field name
       break;
+    }
 
     case tgtok::paste:
       SMLoc PasteLoc = Lex.getLoc();
@@ -1587,30 +1601,34 @@ Init *TGParser::ParseValue(Record *CurRec, RecTy *ItemType, IDParseMode Mode) {
 ///    DagArg     ::= VARNAME
 ///    DagArgList ::= DagArg
 ///    DagArgList ::= DagArgList ',' DagArg
-std::vector<std::pair<llvm::Init*, std::string> >
-TGParser::ParseDagArgList(Record *CurRec) {
-  std::vector<std::pair<llvm::Init*, std::string> > Result;
+void TGParser::ParseDagArgList(
+    SmallVectorImpl<std::pair<llvm::Init*, StringInit*>> &Result,
+    Record *CurRec) {
 
-  while (1) {
+  while (true) {
     // DagArg ::= VARNAME
     if (Lex.getCode() == tgtok::VarName) {
       // A missing value is treated like '?'.
-      Result.emplace_back(UnsetInit::get(), Lex.getCurStrVal());
+      StringInit *VarName = StringInit::get(Lex.getCurStrVal());
+      Result.emplace_back(UnsetInit::get(), VarName);
       Lex.Lex();
     } else {
       // DagArg ::= Value (':' VARNAME)?
       Init *Val = ParseValue(CurRec);
-      if (!Val)
-        return std::vector<std::pair<llvm::Init*, std::string> >();
+      if (!Val) {
+        Result.clear();
+        return;
+      }
 
       // If the variable name is present, add it.
-      std::string VarName;
+      StringInit *VarName = nullptr;
       if (Lex.getCode() == tgtok::colon) {
         if (Lex.Lex() != tgtok::VarName) { // eat the ':'
           TokError("expected variable name in dag literal");
-          return std::vector<std::pair<llvm::Init*, std::string> >();
+          Result.clear();
+          return;
         }
-        VarName = Lex.getCurStrVal();
+        VarName = StringInit::get(Lex.getCurStrVal());
         Lex.Lex();  // eat the VarName.
       }
 
@@ -1619,27 +1637,24 @@ TGParser::ParseDagArgList(Record *CurRec) {
     if (Lex.getCode() != tgtok::comma) break;
     Lex.Lex(); // eat the ','
   }
-
-  return Result;
 }
 
-
 /// ParseValueList - Parse a comma separated list of values, returning them as a
 /// vector.  Note that this always expects to be able to parse at least one
 /// value.  It returns an empty list if this is not possible.
 ///
 ///   ValueList ::= Value (',' Value)
 ///
-std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
-                                            RecTy *EltTy) {
-  std::vector<Init*> Result;
+void TGParser::ParseValueList(SmallVectorImpl<Init*> &Result, Record *CurRec,
+                              Record *ArgsRec, RecTy *EltTy) {
   RecTy *ItemType = EltTy;
   unsigned int ArgN = 0;
   if (ArgsRec && !EltTy) {
     ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
     if (TArgs.empty()) {
       TokError("template argument provided to non-template class");
-      return std::vector<Init*>();
+      Result.clear();
+      return;
     }
     const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
     if (!RV) {
@@ -1651,7 +1666,10 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
     ++ArgN;
   }
   Result.push_back(ParseValue(CurRec, ItemType));
-  if (!Result.back()) return std::vector<Init*>();
+  if (!Result.back()) {
+    Result.clear();
+    return;
+  }
 
   while (Lex.getCode() == tgtok::comma) {
     Lex.Lex();  // Eat the comma
@@ -1660,7 +1678,8 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
       ArrayRef<Init *> TArgs = ArgsRec->getTemplateArgs();
       if (ArgN >= TArgs.size()) {
         TokError("too many template arguments");
-        return std::vector<Init*>();
+        Result.clear();
+        return;
       }
       const RecordVal *RV = ArgsRec->getValue(TArgs[ArgN]);
       assert(RV && "Template argument record not found??");
@@ -1668,13 +1687,13 @@ std::vector<Init*> TGParser::ParseValueList(Record *CurRec, Record *ArgsRec,
       ++ArgN;
     }
     Result.push_back(ParseValue(CurRec, ItemType));
-    if (!Result.back()) return std::vector<Init*>();
+    if (!Result.back()) {
+      Result.clear();
+      return;
+    }
   }
-
-  return Result;
 }
 
-
 /// ParseDeclaration - Read a declaration, returning the name of field ID, or an
 /// empty string on error.  This can happen in a number of different context's,
 /// including within a def or in the template args for a def (which which case
@@ -1758,7 +1777,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
   Lex.Lex();  // Eat the '='
 
   RecTy *IterType = nullptr;
-  std::vector<unsigned> Ranges;
+  SmallVector<unsigned, 16> Ranges;
 
   switch (Lex.getCode()) {
   default: TokError("Unknown token when expecting a range list"); return nullptr;
@@ -1787,7 +1806,7 @@ VarInit *TGParser::ParseForeachDeclaration(ListInit *&ForeachListValue) {
 
   case tgtok::l_brace: { // '{' RangeList '}'
     Lex.Lex(); // eat the '{'
-    Ranges = ParseRangeList();
+    ParseRangeList(Ranges);
     if (Lex.getCode() != tgtok::r_brace) {
       TokError("expected '}' at end of bit range list");
       return nullptr;
@@ -1848,7 +1867,6 @@ bool TGParser::ParseTemplateArgList(Record *CurRec) {
   return false;
 }
 
-
 /// ParseBodyItem - Parse a single item at within the body of a def or class.
 ///
 ///   BodyItem ::= Declaration ';'
@@ -1869,10 +1887,10 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
     return TokError("expected field identifier after let");
 
   SMLoc IdLoc = Lex.getLoc();
-  std::string FieldName = Lex.getCurStrVal();
+  StringInit *FieldName = StringInit::get(Lex.getCurStrVal());
   Lex.Lex();  // eat the field name.
 
-  std::vector<unsigned> BitList;
+  SmallVector<unsigned, 16> BitList;
   if (ParseOptionalBitList(BitList))
     return true;
   std::reverse(BitList.begin(), BitList.end());
@@ -1883,7 +1901,7 @@ bool TGParser::ParseBodyItem(Record *CurRec) {
 
   RecordVal *Field = CurRec->getValue(FieldName);
   if (!Field)
-    return TokError("Value '" + FieldName + "' unknown!");
+    return TokError("Value '" + FieldName->getValue() + "' unknown!");
 
   RecTy *Type = Field->getType();
 
@@ -1928,7 +1946,7 @@ bool TGParser::ParseBody(Record *CurRec) {
 /// \brief Apply the current let bindings to \a CurRec.
 /// \returns true on error, false otherwise.
 bool TGParser::ApplyLetStack(Record *CurRec) {
-  for (std::vector<LetRecord> &LetInfo : LetStack)
+  for (SmallVectorImpl<LetRecord> &LetInfo : LetStack)
     for (LetRecord &LR : LetInfo)
       if (SetValue(CurRec, LR.Loc, LR.Name, LR.Bits, LR.Value))
         return true;
@@ -1951,7 +1969,7 @@ bool TGParser::ParseObjectBody(Record *CurRec) {
 
     // Read all of the subclasses.
     SubClassReference SubClass = ParseSubClassReference(CurRec, false);
-    while (1) {
+    while (true) {
       // Check for error.
       if (!SubClass.Rec) return true;
 
@@ -2139,38 +2157,44 @@ bool TGParser::ParseClass() {
 ///   LetList ::= LetItem (',' LetItem)*
 ///   LetItem ::= ID OptionalRangeList '=' Value
 ///
-std::vector<LetRecord> TGParser::ParseLetList() {
-  std::vector<LetRecord> Result;
-
-  while (1) {
+void TGParser::ParseLetList(SmallVectorImpl<LetRecord> &Result) {
+  while (true) {
     if (Lex.getCode() != tgtok::Id) {
       TokError("expected identifier in let definition");
-      return std::vector<LetRecord>();
+      Result.clear();
+      return;
     }
-    std::string Name = Lex.getCurStrVal();
+
+    StringInit *Name = StringInit::get(Lex.getCurStrVal());
     SMLoc NameLoc = Lex.getLoc();
     Lex.Lex();  // Eat the identifier.
 
     // Check for an optional RangeList.
-    std::vector<unsigned> Bits;
-    if (ParseOptionalRangeList(Bits))
-      return std::vector<LetRecord>();
+    SmallVector<unsigned, 16> Bits;
+    if (ParseOptionalRangeList(Bits)) {
+      Result.clear();
+      return;
+    }
     std::reverse(Bits.begin(), Bits.end());
 
     if (Lex.getCode() != tgtok::equal) {
       TokError("expected '=' in let expression");
-      return std::vector<LetRecord>();
+      Result.clear();
+      return;
     }
     Lex.Lex();  // eat the '='.
 
     Init *Val = ParseValue(nullptr);
-    if (!Val) return std::vector<LetRecord>();
+    if (!Val) {
+      Result.clear();
+      return;
+    }
 
     // Now that we have everything, add the record.
-    Result.emplace_back(std::move(Name), std::move(Bits), Val, NameLoc);
+    Result.emplace_back(Name, Bits, Val, NameLoc);
 
     if (Lex.getCode() != tgtok::comma)
-      return Result;
+      return;
     Lex.Lex();  // eat the comma.
   }
 }
@@ -2186,7 +2210,8 @@ bool TGParser::ParseTopLevelLet(MultiClass *CurMultiClass) {
   Lex.Lex();
 
   // Add this entry to the let stack.
-  std::vector<LetRecord> LetInfo = ParseLetList();
+  SmallVector<LetRecord, 8> LetInfo;
+  ParseLetList(LetInfo);
   if (LetInfo.empty()) return true;
   LetStack.push_back(std::move(LetInfo));
 
@@ -2264,7 +2289,7 @@ bool TGParser::ParseMultiClass() {
     // Read all of the submulticlasses.
     SubMultiClassReference SubMultiClass =
       ParseSubMultiClassReference(CurMultiClass);
-    while (1) {
+    while (true) {
       // Check for error.
       if (!SubMultiClass.MC) return true;
 
@@ -2312,7 +2337,7 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
                                            Init *&DefmPrefix,
                                            SMRange DefmPrefixRange,
                                            ArrayRef<Init *> TArgs,
-                                           std::vector<Init *> &TemplateVals) {
+                                           ArrayRef<Init *> TemplateVals) {
   // We need to preserve DefProto so it can be reused for later
   // instantiations, so create a new Record to inherit from it.
 
@@ -2353,8 +2378,8 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
   // Set the value for NAME. We don't resolve references to it 'til later,
   // though, so that uses in nested multiclass names don't get
   // confused.
-  if (SetValue(CurRec.get(), Ref.RefRange.Start, "NAME", None, DefmPrefix,
-               /*AllowSelfAssignment*/true)) {
+  if (SetValue(CurRec.get(), Ref.RefRange.Start, StringInit::get("NAME"), None,
+               DefmPrefix, /*AllowSelfAssignment*/true)) {
     Error(DefmPrefixRange.Start, "Could not resolve " +
           CurRec->getNameInitAsString() + ":NAME to '" +
           DefmPrefix->getAsUnquotedString() + "'");
@@ -2433,7 +2458,7 @@ Record *TGParser::InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
 bool TGParser::ResolveMulticlassDefArgs(MultiClass &MC, Record *CurRec,
                                         SMLoc DefmPrefixLoc, SMLoc SubClassLoc,
                                         ArrayRef<Init *> TArgs,
-                                        std::vector<Init *> &TemplateVals,
+                                        ArrayRef<Init *> TemplateVals,
                                         bool DeleteArgs) {
   // Loop over all of the template arguments, setting them to the specified
   // value or leaving them as the default if necessary.
@@ -2519,7 +2544,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
   SMLoc SubClassLoc = Lex.getLoc();
   SubClassReference Ref = ParseSubClassReference(nullptr, true);
 
-  while (1) {
+  while (true) {
     if (!Ref.Rec) return true;
 
     // To instantiate a multiclass, we need to first get the multiclass, then
@@ -2527,7 +2552,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
     // template parameters.
     MultiClass *MC = MultiClasses[Ref.Rec->getName()].get();
     assert(MC && "Didn't lookup multiclass correctly?");
-    std::vector<Init*> &TemplateVals = Ref.TemplateArgs;
+    ArrayRef<Init*> TemplateVals = Ref.TemplateArgs;
 
     // Verify that the correct number of template arguments were specified.
     ArrayRef<Init *> TArgs = MC->Rec.getTemplateArgs();
@@ -2589,7 +2614,7 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) {
     // Process all the classes to inherit as if they were part of a
     // regular 'def' and inherit all record values.
     SubClassReference SubClass = ParseSubClassReference(nullptr, false);
-    while (1) {
+    while (true) {
       // Check for error.
       if (!SubClass.Rec) return true;
 
@@ -2664,4 +2689,3 @@ bool TGParser::ParseFile() {
 
   return TokError("Unexpected input at top level");
 }
-
diff --git a/contrib/llvm/lib/TableGen/TGParser.h b/contrib/llvm/lib/TableGen/TGParser.h
index 739d9a9c5f37..76f7d8fe5026 100644
--- a/contrib/llvm/lib/TableGen/TGParser.h
+++ b/contrib/llvm/lib/TableGen/TGParser.h
@@ -32,12 +32,11 @@ namespace llvm {
   struct SubMultiClassReference;
 
   struct LetRecord {
-    std::string Name;
+    StringInit *Name;
     std::vector<unsigned> Bits;
     Init *Value;
     SMLoc Loc;
-    LetRecord(const std::string &N, const std::vector<unsigned> &B, Init *V,
-              SMLoc L)
+    LetRecord(StringInit *N, ArrayRef<unsigned> B, Init *V, SMLoc L)
       : Name(N), Bits(B), Value(V), Loc(L) {
     }
   };
@@ -54,7 +53,7 @@ namespace llvm {
 
 class TGParser {
   TGLexer Lex;
-  std::vector<std::vector<LetRecord> > LetStack;
+  std::vector<SmallVector<LetRecord, 4>> LetStack;
   std::map<std::string, std::unique_ptr<MultiClass>> MultiClasses;
 
   /// Loops - Keep track of any foreach loops we are within.
@@ -107,12 +106,6 @@ private:  // Semantic analysis methods.
   bool SetValue(Record *TheRec, SMLoc Loc, Init *ValName,
                 ArrayRef<unsigned> BitList, Init *V,
                 bool AllowSelfAssignment = false);
-  bool SetValue(Record *TheRec, SMLoc Loc, const std::string &ValName,
-                ArrayRef<unsigned> BitList, Init *V,
-                bool AllowSelfAssignment = false) {
-    return SetValue(TheRec, Loc, StringInit::get(ValName), BitList, V,
-                    AllowSelfAssignment);
-  }
   bool AddSubClass(Record *Rec, SubClassReference &SubClass);
   bool AddSubMultiClass(MultiClass *CurMC,
                         SubMultiClassReference &SubMultiClass);
@@ -141,12 +134,11 @@ private:  // Parser methods.
   Record *InstantiateMulticlassDef(MultiClass &MC, Record *DefProto,
                                    Init *&DefmPrefix, SMRange DefmPrefixRange,
                                    ArrayRef<Init *> TArgs,
-                                   std::vector<Init *> &TemplateVals);
+                                   ArrayRef<Init *> TemplateVals);
   bool ResolveMulticlassDefArgs(MultiClass &MC, Record *DefProto,
                                 SMLoc DefmPrefixLoc, SMLoc SubClassLoc,
                                 ArrayRef<Init *> TArgs,
-                                std::vector<Init *> &TemplateVals,
-                                bool DeleteArgs);
+                                ArrayRef<Init *> TemplateVals, bool DeleteArgs);
   bool ResolveMulticlassDef(MultiClass &MC,
                             Record *CurRec,
                             Record *DefProto,
@@ -155,7 +147,7 @@ private:  // Parser methods.
   bool ParseDef(MultiClass *CurMultiClass);
   bool ParseForeach(MultiClass *CurMultiClass);
   bool ParseTopLevelLet(MultiClass *CurMultiClass);
-  std::vector<LetRecord> ParseLetList();
+  void ParseLetList(SmallVectorImpl<LetRecord> &Result);
 
   bool ParseObjectBody(Record *CurRec);
   bool ParseBody(Record *CurRec);
@@ -168,19 +160,21 @@ private:  // Parser methods.
   SubClassReference ParseSubClassReference(Record *CurRec, bool isDefm);
   SubMultiClassReference ParseSubMultiClassReference(MultiClass *CurMC);
 
-  Init *ParseIDValue(Record *CurRec, const std::string &Name, SMLoc NameLoc,
+  Init *ParseIDValue(Record *CurRec, StringInit *Name, SMLoc NameLoc,
                      IDParseMode Mode = ParseValueMode);
   Init *ParseSimpleValue(Record *CurRec, RecTy *ItemType = nullptr,
                          IDParseMode Mode = ParseValueMode);
   Init *ParseValue(Record *CurRec, RecTy *ItemType = nullptr,
                    IDParseMode Mode = ParseValueMode);
-  std::vector<Init*> ParseValueList(Record *CurRec, Record *ArgsRec = nullptr,
-                                    RecTy *EltTy = nullptr);
-  std::vector<std::pair<llvm::Init*, std::string> > ParseDagArgList(Record *);
-  bool ParseOptionalRangeList(std::vector<unsigned> &Ranges);
-  bool ParseOptionalBitList(std::vector<unsigned> &Ranges);
-  std::vector<unsigned> ParseRangeList();
-  bool ParseRangePiece(std::vector<unsigned> &Ranges);
+  void ParseValueList(SmallVectorImpl<llvm::Init*> &Result, Record *CurRec,
+                      Record *ArgsRec = nullptr, RecTy *EltTy = nullptr);
+  void ParseDagArgList(
+      SmallVectorImpl<std::pair<llvm::Init*, StringInit*>> &Result,
+      Record *CurRec);
+  bool ParseOptionalRangeList(SmallVectorImpl<unsigned> &Ranges);
+  bool ParseOptionalBitList(SmallVectorImpl<unsigned> &Ranges);
+  void ParseRangeList(SmallVectorImpl<unsigned> &Result);
+  bool ParseRangePiece(SmallVectorImpl<unsigned> &Ranges);
   RecTy *ParseType();
   Init *ParseOperation(Record *CurRec, RecTy *ItemType);
   RecTy *ParseOperatorType();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.h b/contrib/llvm/lib/Target/AArch64/AArch64.h
index c767c75fce57..fd106a8d9b0b 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.h
@@ -30,12 +30,12 @@ FunctionPass *createAArch64DeadRegisterDefinitions();
 FunctionPass *createAArch64RedundantCopyEliminationPass();
 FunctionPass *createAArch64ConditionalCompares();
 FunctionPass *createAArch64AdvSIMDScalar();
-FunctionPass *createAArch64BranchRelaxation();
 FunctionPass *createAArch64ISelDag(AArch64TargetMachine &TM,
                                  CodeGenOpt::Level OptLevel);
 FunctionPass *createAArch64StorePairSuppressPass();
 FunctionPass *createAArch64ExpandPseudoPass();
 FunctionPass *createAArch64LoadStoreOptimizationPass();
+FunctionPass *createAArch64VectorByElementOptPass();
 ModulePass *createAArch64PromoteConstantPass();
 FunctionPass *createAArch64ConditionOptimizerPass();
 FunctionPass *createAArch64AddressTypePromotionPass();
@@ -46,7 +46,21 @@ FunctionPass *createAArch64CleanupLocalDynamicTLSPass();
 
 FunctionPass *createAArch64CollectLOHPass();
 
+void initializeAArch64A53Fix835769Pass(PassRegistry&);
+void initializeAArch64A57FPLoadBalancingPass(PassRegistry&);
+void initializeAArch64AddressTypePromotionPass(PassRegistry&);
+void initializeAArch64AdvSIMDScalarPass(PassRegistry&);
+void initializeAArch64CollectLOHPass(PassRegistry&);
+void initializeAArch64ConditionalComparesPass(PassRegistry&);
+void initializeAArch64ConditionOptimizerPass(PassRegistry&);
+void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry&);
 void initializeAArch64ExpandPseudoPass(PassRegistry&);
+void initializeAArch64LoadStoreOptPass(PassRegistry&);
+void initializeAArch64VectorByElementOptPass(PassRegistry&);
+void initializeAArch64PromoteConstantPass(PassRegistry&);
+void initializeAArch64RedundantCopyEliminationPass(PassRegistry&);
+void initializeAArch64StorePairSuppressPass(PassRegistry&);
+void initializeLDTLSCleanupPass(PassRegistry&);
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td
index b97a0f155dc2..c40391d5ad9d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64.td
@@ -35,6 +35,9 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
 def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
   "Enable ARMv8 Reliability, Availability and Serviceability Extensions">;
 
+def FeatureLSE : SubtargetFeature<"lse", "HasLSE", "true",
+  "Enable ARMv8.1 Large System Extension (LSE) atomic instructions">;
+
 def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true",
   "Enable ARMv8 PMUv3 Performance Monitors extension">;
 
@@ -61,10 +64,6 @@ def FeatureReserveX18 : SubtargetFeature<"reserve-x18", "ReserveX18", "true",
                                          "Reserve X18, making it unavailable "
                                          "as a GPR">;
 
-def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
-                                            "MergeNarrowLoads", "true",
-                                            "Merge narrow load instructions">;
-
 def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
                                     "Use alias analysis during codegen">;
 
@@ -94,23 +93,28 @@ def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
     "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
     "true", "Use alternative pattern for sextload convert to f32">;
 
-def FeatureMacroOpFusion : SubtargetFeature<
-    "macroop-fusion", "HasMacroOpFusion", "true",
-    "CPU supports macro op fusion">;
+def FeatureArithmeticBccFusion : SubtargetFeature<
+    "arith-bcc-fusion", "HasArithmeticBccFusion", "true",
+    "CPU fuses arithmetic+bcc operations">;
+
+def FeatureArithmeticCbzFusion : SubtargetFeature<
+    "arith-cbz-fusion", "HasArithmeticCbzFusion", "true",
+    "CPU fuses arithmetic + cbz/cbnz operations">;
 
 def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
     "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
     "Disable latency scheduling heuristic">;
 
 def FeatureUseRSqrt : SubtargetFeature<
-    "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;
+    "use-reciprocal-square-root", "UseRSqrt", "true",
+    "Use the reciprocal square root approximation">;
 
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
 
 def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
-  "Support ARM v8.1a instructions", [FeatureCRC]>;
+  "Support ARM v8.1a instructions", [FeatureCRC, FeatureLSE]>;
 
 def HasV8_2aOps : SubtargetFeature<"v8.2a", "HasV8_2aOps", "true",
   "Support ARM v8.2a instructions", [HasV8_1aOps, FeatureRAS]>;
@@ -143,8 +147,9 @@ include "AArch64SystemOperands.td"
 include "AArch64SchedA53.td"
 include "AArch64SchedA57.td"
 include "AArch64SchedCyclone.td"
-include "AArch64SchedM1.td"
+include "AArch64SchedFalkor.td"
 include "AArch64SchedKryo.td"
+include "AArch64SchedM1.td"
 include "AArch64SchedVulcan.td"
 
 def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
@@ -176,7 +181,6 @@ def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
                                    FeatureCrypto,
                                    FeatureCustomCheapAsMoveHandling,
                                    FeatureFPARMv8,
-                                   FeatureMergeNarrowLd,
                                    FeatureNEON,
                                    FeaturePerfMon,
                                    FeaturePostRAScheduler,
@@ -207,7 +211,8 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                    FeatureCrypto,
                                    FeatureDisableLatencySchedHeuristic,
                                    FeatureFPARMv8,
-                                   FeatureMacroOpFusion,
+                                   FeatureArithmeticBccFusion,
+                                   FeatureArithmeticCbzFusion,
                                    FeatureNEON,
                                    FeaturePerfMon,
                                    FeatureSlowMisaligned128Store,
@@ -216,17 +221,31 @@ def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
                                    ]>;
 
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
-                                    "Samsung Exynos-M1 processors", [
-                                    FeatureAvoidQuadLdStPairs,
-                                    FeatureCRC,
-                                    FeatureCrypto,
-                                    FeatureCustomCheapAsMoveHandling,
-                                    FeatureFPARMv8,
-                                    FeatureNEON,
-                                    FeaturePerfMon,
-                                    FeaturePostRAScheduler,
-                                    FeatureUseRSqrt
-                                    ]>;
+                                    "Samsung Exynos-M1 processors",
+                                    [FeatureAvoidQuadLdStPairs,
+                                     FeatureCRC,
+                                     FeatureCrypto,
+                                     FeatureCustomCheapAsMoveHandling,
+                                     FeatureFPARMv8,
+                                     FeatureNEON,
+                                     FeaturePerfMon,
+                                     FeaturePostRAScheduler,
+                                     FeatureSlowMisaligned128Store,
+                                     FeatureUseRSqrt,
+                                     FeatureZCZeroing]>;
+
+def ProcExynosM2 : SubtargetFeature<"exynosm2", "ARMProcFamily", "ExynosM1",
+                                    "Samsung Exynos-M2/M3 processors",
+                                    [FeatureAvoidQuadLdStPairs,
+                                     FeatureCRC,
+                                     FeatureCrypto,
+                                     FeatureCustomCheapAsMoveHandling,
+                                     FeatureFPARMv8,
+                                     FeatureNEON,
+                                     FeaturePerfMon,
+                                     FeaturePostRAScheduler,
+                                     FeatureSlowMisaligned128Store,
+                                     FeatureZCZeroing]>;
 
 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
                                    "Qualcomm Kryo processors", [
@@ -234,7 +253,6 @@ def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
                                    FeatureCrypto,
                                    FeatureCustomCheapAsMoveHandling,
                                    FeatureFPARMv8,
-                                   FeatureMergeNarrowLd,
                                    FeatureNEON,
                                    FeaturePerfMon,
                                    FeaturePostRAScheduler,
@@ -242,12 +260,21 @@ def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
                                    FeatureZCZeroing
                                    ]>;
 
+def ProcFalkor  : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor",
+                                   "Qualcomm Falkor processors", [
+                                   FeatureCRC,
+                                   FeatureCrypto,
+                                   FeatureFPARMv8,
+                                   FeatureNEON,
+                                   FeaturePerfMon
+                                   ]>;
+
 def ProcVulcan  : SubtargetFeature<"vulcan", "ARMProcFamily", "Vulcan",
                                    "Broadcom Vulcan processors", [
                                    FeatureCRC,
                                    FeatureCrypto,
                                    FeatureFPARMv8,
-                                   FeatureMacroOpFusion,
+                                   FeatureArithmeticBccFusion,
                                    FeatureNEON,
                                    FeaturePostRAScheduler,
                                    FeaturePredictableSelectIsExpensive,
@@ -270,6 +297,9 @@ def : ProcessorModel<"cortex-a72", CortexA57Model, [ProcA72]>;
 def : ProcessorModel<"cortex-a73", CortexA57Model, [ProcA73]>;
 def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>;
 def : ProcessorModel<"exynos-m1", ExynosM1Model, [ProcExynosM1]>;
+def : ProcessorModel<"exynos-m2", ExynosM1Model, [ProcExynosM2]>;
+def : ProcessorModel<"exynos-m3", ExynosM1Model, [ProcExynosM2]>;
+def : ProcessorModel<"falkor", FalkorModel, [ProcFalkor]>;
 def : ProcessorModel<"kryo", KryoModel, [ProcKryo]>;
 def : ProcessorModel<"vulcan", VulcanModel, [ProcVulcan]>;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
index c2cca63f4977..e6afb42440a7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A53Fix835769.cpp
@@ -82,16 +82,18 @@ class AArch64A53Fix835769 : public MachineFunctionPass {
 
 public:
   static char ID;
-  explicit AArch64A53Fix835769() : MachineFunctionPass(ID) {}
+  explicit AArch64A53Fix835769() : MachineFunctionPass(ID) {
+    initializeAArch64A53Fix835769Pass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Workaround A53 erratum 835769 pass";
   }
 
@@ -107,6 +109,9 @@ char AArch64A53Fix835769::ID = 0;
 
 } // end anonymous namespace
 
+INITIALIZE_PASS(AArch64A53Fix835769, "aarch64-fix-cortex-a53-835769-pass",
+                "AArch64 fix for A53 erratum 835769", false, false)
+
 //===----------------------------------------------------------------------===//
 
 bool
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
index 0465e59dc54a..0aa597bcdc56 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
@@ -95,10 +95,6 @@ static bool isMla(MachineInstr *MI) {
   }
 }
 
-namespace llvm {
-static void initializeAArch64A57FPLoadBalancingPass(PassRegistry &);
-}
-
 //===----------------------------------------------------------------------===//
 
 namespace {
@@ -126,10 +122,10 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "A57 FP Anti-dependency breaker";
   }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
index 4846ef08c983..0cbb2db1134a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AddressTypePromotion.cpp
@@ -47,10 +47,6 @@ using namespace llvm;
 #define DEBUG_TYPE "aarch64-type-promotion"
 
 static cl::opt<bool>
-EnableAddressTypePromotion("aarch64-type-promotion", cl::Hidden,
-                           cl::desc("Enable the type promotion pass"),
-                           cl::init(true));
-static cl::opt<bool>
 EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
             cl::desc("Enable merging of redundant sexts when one is dominating"
                      " the other."),
@@ -62,10 +58,6 @@ EnableMerge("aarch64-type-promotion-merge", cl::Hidden,
 //                       AArch64AddressTypePromotion
 //===----------------------------------------------------------------------===//
 
-namespace llvm {
-void initializeAArch64AddressTypePromotionPass(PassRegistry &);
-}
-
 namespace {
 class AArch64AddressTypePromotion : public FunctionPass {
 
@@ -76,9 +68,7 @@ public:
     initializeAArch64AddressTypePromotionPass(*PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override {
-    return AARCH64_TYPE_PROMO_NAME;
-  }
+  StringRef getPassName() const override { return AARCH64_TYPE_PROMO_NAME; }
 
   /// Iterate over the functions and promote the computation of interesting
   // sext instructions.
@@ -481,7 +471,7 @@ bool AArch64AddressTypePromotion::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  if (!EnableAddressTypePromotion || F.isDeclaration())
+  if (F.isDeclaration())
     return false;
   Func = &F;
   ConsideredSExtType = Type::getInt64Ty(Func->getContext());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
index d0a2dd3fa1fc..bc2320dd20b3 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AdvSIMDScalarPass.cpp
@@ -61,10 +61,6 @@ STATISTIC(NumScalarInsnsUsed, "Number of scalar instructions used");
 STATISTIC(NumCopiesDeleted, "Number of cross-class copies deleted");
 STATISTIC(NumCopiesInserted, "Number of cross-class copies inserted");
 
-namespace llvm {
-void initializeAArch64AdvSIMDScalarPass(PassRegistry &);
-}
-
 #define AARCH64_ADVSIMD_NAME "AdvSIMD Scalar Operation Optimization"
 
 namespace {
@@ -94,9 +90,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
-  const char *getPassName() const override {
-    return AARCH64_ADVSIMD_NAME;
-  }
+  StringRef getPassName() const override { return AARCH64_ADVSIMD_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
index 22374f754603..b2d96a32fd3a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
@@ -37,6 +37,9 @@
 #include "llvm/MC/MCLinkerOptimizationHint.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
@@ -56,9 +59,7 @@ public:
       : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(OutContext, *this),
         SM(*this), AArch64FI(nullptr) {}
 
-  const char *getPassName() const override {
-    return "AArch64 Assembly Printer";
-  }
+  StringRef getPassName() const override { return "AArch64 Assembly Printer"; }
 
   /// \brief Wrapper for MCInstLowering.lowerOperand() for the
   /// tblgen'erated pseudo lowering.
@@ -70,6 +71,14 @@ public:
                      const MachineInstr &MI);
   void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
                        const MachineInstr &MI);
+
+  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+
+  void EmitXRayTable();
+  void EmitSled(const MachineInstr &MI, SledKind Kind);
+
   /// \brief tblgen'erated driver function for lowering simple MI->MC
   /// pseudo instructions.
   bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
@@ -85,7 +94,9 @@ public:
   bool runOnMachineFunction(MachineFunction &F) override {
     AArch64FI = F.getInfo<AArch64FunctionInfo>();
     STI = static_cast<const AArch64Subtarget*>(&F.getSubtarget());
-    return AsmPrinter::runOnMachineFunction(F);
+    bool Result = AsmPrinter::runOnMachineFunction(F);
+    EmitXRayTable();
+    return Result;
   }
 
 private:
@@ -124,6 +135,114 @@ private:
 
 //===----------------------------------------------------------------------===//
 
+void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void AArch64AsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void AArch64AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::TAIL_CALL);
+}
+
+void AArch64AsmPrinter::EmitXRayTable()
+{
+  //TODO: merge the logic for ELF XRay sleds at a higher level, so to avoid
+  // code duplication as it is now for x86_64, ARM32 and AArch64.
+  if (Sleds.empty())
+    return;
+
+  auto PrevSection = OutStreamer->getCurrentSectionOnly();
+  auto Fn = MF->getFunction();
+  MCSection *Section;
+
+  if (STI->isTargetELF()) {
+    if (Fn->hasComdat())
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+        ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+        Fn->getComdat()->getName());
+    else
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+        ELF::SHF_ALLOC);
+  } else if (STI->isTargetMachO()) {
+    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+                                         SectionKind::getReadOnlyWithRel());
+  } else {
+    llvm_unreachable("Unsupported target");
+  }
+
+  // Before we switch over, we force a reference to a label inside the
+  // xray_instr_map section. Since EmitXRayTable() is always called just
+  // before the function's end, we assume that this is happening after the
+  // last return instruction.
+  //
+  // We then align the reference to 16 byte boundaries, which we determined
+  // experimentally to be beneficial to avoid causing decoder stalls.
+  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+  OutStreamer->EmitCodeAlignment(16);
+  OutStreamer->EmitSymbolValue(Tmp, 8, false);
+  OutStreamer->SwitchSection(Section);
+  OutStreamer->EmitLabel(Tmp);
+  for (const auto &Sled : Sleds) {
+    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
+    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
+    auto Kind = static_cast<uint8_t>(Sled.Kind);
+    OutStreamer->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&Kind), 1));
+    OutStreamer->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+    OutStreamer->EmitZeros(14);
+  }
+  OutStreamer->SwitchSection(PrevSection);
+
+  Sleds.clear();
+}
+
+void AArch64AsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
+{
+  static const int8_t NoopsInSledCount = 7;
+  // We want to emit the following pattern:
+  //
+  // .Lxray_sled_N:
+  //   ALIGN
+  //   B #32
+  //   ; 7 NOP instructions (28 bytes)
+  // .tmpN
+  //
+  // We need the 28 bytes (7 instructions) because at runtime, we'd be patching
+  // over the full 32 bytes (8 instructions) with the following pattern:
+  //
+  //   STP X0, X30, [SP, #-16]! ; push X0 and the link register to the stack
+  //   LDR W0, #12 ; W0 := function ID
+  //   LDR X16,#12 ; X16 := addr of __xray_FunctionEntry or __xray_FunctionExit
+  //   BLR X16 ; call the tracing trampoline
+  //   ;DATA: 32 bits of function ID
+  //   ;DATA: lower 32 bits of the address of the trampoline
+  //   ;DATA: higher 32 bits of the address of the trampoline
+  //   LDP X0, X30, [SP], #16 ; pop X0 and the link register from the stack
+  //
+  OutStreamer->EmitCodeAlignment(4);
+  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitLabel(CurSled);
+  auto Target = OutContext.createTempSymbol();
+
+  // Emit "B #32" instruction, which jumps over the next 28 bytes.
+  // The operand has to be the number of 4-byte instructions to jump over,
+  // including the current instruction.
+  EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::B).addImm(8));
+
+  for (int8_t I = 0; I < NoopsInSledCount; I++)
+    EmitToStreamer(*OutStreamer, MCInstBuilder(AArch64::HINT).addImm(0));
+
+  OutStreamer->EmitLabel(Target);
+  recordSled(CurSled, MI, Kind);
+}
+
 void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) {
   const Triple &TT = TM.getTargetTriple();
   if (TT.isOSBinFormatMachO()) {
@@ -162,7 +281,7 @@ MCSymbol *AArch64AsmPrinter::GetCPISymbol(unsigned CPID) const {
   // Darwin uses a linker-private symbol name for constant-pools (to
   // avoid addends on the relocation?), ELF has no such concept and
   // uses a normal private symbol.
-  if (getDataLayout().getLinkerPrivateGlobalPrefix()[0])
+  if (!getDataLayout().getLinkerPrivateGlobalPrefix().empty())
     return OutContext.getOrCreateSymbol(
         Twine(getDataLayout().getLinkerPrivateGlobalPrefix()) + "CPI" +
         Twine(getFunctionNumber()) + "_" + Twine(CPID));
@@ -354,7 +473,7 @@ void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI,
 
 void AArch64AsmPrinter::LowerSTACKMAP(MCStreamer &OutStreamer, StackMaps &SM,
                                       const MachineInstr &MI) {
-  unsigned NumNOPBytes = MI.getOperand(1).getImm();
+  unsigned NumNOPBytes = StackMapOpers(&MI).getNumPatchBytes();
 
   SM.recordStackMap(MI);
   assert(NumNOPBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
@@ -386,7 +505,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
 
   PatchPointOpers Opers(&MI);
 
-  int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+  int64_t CallTarget = Opers.getCallTarget().getImm();
   unsigned EncodedBytes = 0;
   if (CallTarget) {
     assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
@@ -411,7 +530,7 @@ void AArch64AsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
     EmitToStreamer(OutStreamer, MCInstBuilder(AArch64::BLR).addReg(ScratchReg));
   }
   // Emit padding.
-  unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+  unsigned NumBytes = Opers.getNumPatchBytes();
   assert(NumBytes >= EncodedBytes &&
          "Patchpoint can't request size less than the length of a call.");
   assert((NumBytes - EncodedBytes) % 4 == 0 &&
@@ -569,6 +688,18 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   case TargetOpcode::PATCHPOINT:
     return LowerPATCHPOINT(*OutStreamer, SM, *MI);
+
+  case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
+    LowerPATCHABLE_FUNCTION_ENTER(*MI);
+    return;
+
+  case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
+    LowerPATCHABLE_FUNCTION_EXIT(*MI);
+    return;
+
+  case TargetOpcode::PATCHABLE_TAIL_CALL:
+    LowerPATCHABLE_TAIL_CALL(*MI);
+    return;
   }
 
   // Finally, do the automated lowerings for everything else.
@@ -579,7 +710,7 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeAArch64AsmPrinter() {
-  RegisterAsmPrinter<AArch64AsmPrinter> X(TheAArch64leTarget);
-  RegisterAsmPrinter<AArch64AsmPrinter> Y(TheAArch64beTarget);
-  RegisterAsmPrinter<AArch64AsmPrinter> Z(TheARM64Target);
+  RegisterAsmPrinter<AArch64AsmPrinter> X(getTheAArch64leTarget());
+  RegisterAsmPrinter<AArch64AsmPrinter> Y(getTheAArch64beTarget());
+  RegisterAsmPrinter<AArch64AsmPrinter> Z(getTheARM64Target());
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
deleted file mode 100644
index 9ec6ae4118a4..000000000000
--- a/contrib/llvm/lib/Target/AArch64/AArch64BranchRelaxation.cpp
+++ /dev/null
@@ -1,520 +0,0 @@
-//===-- AArch64BranchRelaxation.cpp - AArch64 branch relaxation -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-//===----------------------------------------------------------------------===//
-
-#include "AArch64.h"
-#include "AArch64InstrInfo.h"
-#include "AArch64MachineFunctionInfo.h"
-#include "AArch64Subtarget.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-using namespace llvm;
-
-#define DEBUG_TYPE "aarch64-branch-relax"
-
-static cl::opt<bool>
-BranchRelaxation("aarch64-branch-relax", cl::Hidden, cl::init(true),
-                 cl::desc("Relax out of range conditional branches"));
-
-static cl::opt<unsigned>
-TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
-                    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
-
-static cl::opt<unsigned>
-CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
-                    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
-
-static cl::opt<unsigned>
-BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
-                    cl::desc("Restrict range of Bcc instructions (DEBUG)"));
-
-STATISTIC(NumSplit, "Number of basic blocks split");
-STATISTIC(NumRelaxed, "Number of conditional branches relaxed");
-
-namespace llvm {
-void initializeAArch64BranchRelaxationPass(PassRegistry &);
-}
-
-#define AARCH64_BR_RELAX_NAME "AArch64 branch relaxation pass"
-
-namespace {
-class AArch64BranchRelaxation : public MachineFunctionPass {
-  /// BasicBlockInfo - Information about the offset and size of a single
-  /// basic block.
-  struct BasicBlockInfo {
-    /// Offset - Distance from the beginning of the function to the beginning
-    /// of this basic block.
-    ///
-    /// The offset is always aligned as required by the basic block.
-    unsigned Offset;
-
-    /// Size - Size of the basic block in bytes.  If the block contains
-    /// inline assembly, this is a worst case estimate.
-    ///
-    /// The size does not include any alignment padding whether from the
-    /// beginning of the block, or from an aligned jump table at the end.
-    unsigned Size;
-
-    BasicBlockInfo() : Offset(0), Size(0) {}
-
-    /// Compute the offset immediately following this block.  If LogAlign is
-    /// specified, return the offset the successor block will get if it has
-    /// this alignment.
-    unsigned postOffset(unsigned LogAlign = 0) const {
-      unsigned PO = Offset + Size;
-      unsigned Align = 1 << LogAlign;
-      return (PO + Align - 1) / Align * Align;
-    }
-  };
-
-  SmallVector<BasicBlockInfo, 16> BlockInfo;
-
-  MachineFunction *MF;
-  const AArch64InstrInfo *TII;
-
-  bool relaxBranchInstructions();
-  void scanFunction();
-  MachineBasicBlock *splitBlockBeforeInstr(MachineInstr *MI);
-  void adjustBlockOffsets(MachineBasicBlock &MBB);
-  bool isBlockInRange(MachineInstr *MI, MachineBasicBlock *BB, unsigned Disp);
-  bool fixupConditionalBranch(MachineInstr *MI);
-  void computeBlockSize(const MachineBasicBlock &MBB);
-  unsigned getInstrOffset(MachineInstr *MI) const;
-  void dumpBBs();
-  void verify();
-
-public:
-  static char ID;
-  AArch64BranchRelaxation() : MachineFunctionPass(ID) {
-    initializeAArch64BranchRelaxationPass(*PassRegistry::getPassRegistry());
-  }
-
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return AARCH64_BR_RELAX_NAME;
-  }
-};
-char AArch64BranchRelaxation::ID = 0;
-}
-
-INITIALIZE_PASS(AArch64BranchRelaxation, "aarch64-branch-relax",
-                AARCH64_BR_RELAX_NAME, false, false)
-
-/// verify - check BBOffsets, BBSizes, alignment of islands
-void AArch64BranchRelaxation::verify() {
-#ifndef NDEBUG
-  unsigned PrevNum = MF->begin()->getNumber();
-  for (MachineBasicBlock &MBB : *MF) {
-    unsigned Align = MBB.getAlignment();
-    unsigned Num = MBB.getNumber();
-    assert(BlockInfo[Num].Offset % (1u << Align) == 0);
-    assert(!Num || BlockInfo[PrevNum].postOffset() <= BlockInfo[Num].Offset);
-    PrevNum = Num;
-  }
-#endif
-}
-
-/// print block size and offset information - debugging
-void AArch64BranchRelaxation::dumpBBs() {
-  for (auto &MBB : *MF) {
-    const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
-    dbgs() << format("BB#%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
-           << format("size=%#x\n", BBI.Size);
-  }
-}
-
-/// BBHasFallthrough - Return true if the specified basic block can fallthrough
-/// into the block immediately after it.
-static bool BBHasFallthrough(MachineBasicBlock *MBB) {
-  // Get the next machine basic block in the function.
-  MachineFunction::iterator MBBI(MBB);
-  // Can't fall off end of function.
-  auto NextBB = std::next(MBBI);
-  if (NextBB == MBB->getParent()->end())
-    return false;
-
-  for (MachineBasicBlock *S : MBB->successors())
-    if (S == &*NextBB)
-      return true;
-
-  return false;
-}
-
-/// scanFunction - Do the initial scan of the function, building up
-/// information about each block.
-void AArch64BranchRelaxation::scanFunction() {
-  BlockInfo.clear();
-  BlockInfo.resize(MF->getNumBlockIDs());
-
-  // First thing, compute the size of all basic blocks, and see if the function
-  // has any inline assembly in it. If so, we have to be conservative about
-  // alignment assumptions, as we don't know for sure the size of any
-  // instructions in the inline assembly.
-  for (MachineBasicBlock &MBB : *MF)
-    computeBlockSize(MBB);
-
-  // Compute block offsets and known bits.
-  adjustBlockOffsets(*MF->begin());
-}
-
-/// computeBlockSize - Compute the size for MBB.
-/// This function updates BlockInfo directly.
-void AArch64BranchRelaxation::computeBlockSize(const MachineBasicBlock &MBB) {
-  unsigned Size = 0;
-  for (const MachineInstr &MI : MBB)
-    Size += TII->GetInstSizeInBytes(MI);
-  BlockInfo[MBB.getNumber()].Size = Size;
-}
-
-/// getInstrOffset - Return the current offset of the specified machine
-/// instruction from the start of the function.  This offset changes as stuff is
-/// moved around inside the function.
-unsigned AArch64BranchRelaxation::getInstrOffset(MachineInstr *MI) const {
-  MachineBasicBlock *MBB = MI->getParent();
-
-  // The offset is composed of two things: the sum of the sizes of all MBB's
-  // before this instruction's block, and the offset from the start of the block
-  // it is in.
-  unsigned Offset = BlockInfo[MBB->getNumber()].Offset;
-
-  // Sum instructions before MI in MBB.
-  for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
-    assert(I != MBB->end() && "Didn't find MI in its own basic block?");
-    Offset += TII->GetInstSizeInBytes(*I);
-  }
-  return Offset;
-}
-
-void AArch64BranchRelaxation::adjustBlockOffsets(MachineBasicBlock &Start) {
-  unsigned PrevNum = Start.getNumber();
-  for (auto &MBB : make_range(MachineFunction::iterator(Start), MF->end())) {
-    unsigned Num = MBB.getNumber();
-    if (!Num) // block zero is never changed from offset zero.
-      continue;
-    // Get the offset and known bits at the end of the layout predecessor.
-    // Include the alignment of the current block.
-    unsigned LogAlign = MBB.getAlignment();
-    BlockInfo[Num].Offset = BlockInfo[PrevNum].postOffset(LogAlign);
-    PrevNum = Num;
-  }
-}
-
-/// Split the basic block containing MI into two blocks, which are joined by
-/// an unconditional branch.  Update data structures and renumber blocks to
-/// account for this change and returns the newly created block.
-/// NOTE: Successor list of the original BB is out of date after this function,
-/// and must be updated by the caller! Other transforms follow using this
-/// utility function, so no point updating now rather than waiting.
-MachineBasicBlock *
-AArch64BranchRelaxation::splitBlockBeforeInstr(MachineInstr *MI) {
-  MachineBasicBlock *OrigBB = MI->getParent();
-
-  // Create a new MBB for the code after the OrigBB.
-  MachineBasicBlock *NewBB =
-      MF->CreateMachineBasicBlock(OrigBB->getBasicBlock());
-  MF->insert(++OrigBB->getIterator(), NewBB);
-
-  // Splice the instructions starting with MI over to NewBB.
-  NewBB->splice(NewBB->end(), OrigBB, MI, OrigBB->end());
-
-  // Add an unconditional branch from OrigBB to NewBB.
-  // Note the new unconditional branch is not being recorded.
-  // There doesn't seem to be meaningful DebugInfo available; this doesn't
-  // correspond to anything in the source.
-  BuildMI(OrigBB, DebugLoc(), TII->get(AArch64::B)).addMBB(NewBB);
-
-  // Insert an entry into BlockInfo to align it properly with the block numbers.
-  BlockInfo.insert(BlockInfo.begin() + NewBB->getNumber(), BasicBlockInfo());
-
-  // Figure out how large the OrigBB is.  As the first half of the original
-  // block, it cannot contain a tablejump.  The size includes
-  // the new jump we added.  (It should be possible to do this without
-  // recounting everything, but it's very confusing, and this is rarely
-  // executed.)
-  computeBlockSize(*OrigBB);
-
-  // Figure out how large the NewMBB is.  As the second half of the original
-  // block, it may contain a tablejump.
-  computeBlockSize(*NewBB);
-
-  // All BBOffsets following these blocks must be modified.
-  adjustBlockOffsets(*OrigBB);
-
-  ++NumSplit;
-
-  return NewBB;
-}
-
-/// isBlockInRange - Returns true if the distance between specific MI and
-/// specific BB can fit in MI's displacement field.
-bool AArch64BranchRelaxation::isBlockInRange(MachineInstr *MI,
-                                             MachineBasicBlock *DestBB,
-                                             unsigned Bits) {
-  unsigned MaxOffs = ((1 << (Bits - 1)) - 1) << 2;
-  unsigned BrOffset = getInstrOffset(MI);
-  unsigned DestOffset = BlockInfo[DestBB->getNumber()].Offset;
-
-  DEBUG(dbgs() << "Branch of destination BB#" << DestBB->getNumber()
-               << " from BB#" << MI->getParent()->getNumber()
-               << " max delta=" << MaxOffs << " from " << getInstrOffset(MI)
-               << " to " << DestOffset << " offset "
-               << int(DestOffset - BrOffset) << "\t" << *MI);
-
-  // Branch before the Dest.
-  if (BrOffset <= DestOffset)
-    return (DestOffset - BrOffset <= MaxOffs);
-  return (BrOffset - DestOffset <= MaxOffs);
-}
-
-static bool isConditionalBranch(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case AArch64::TBZW:
-  case AArch64::TBNZW:
-  case AArch64::TBZX:
-  case AArch64::TBNZX:
-  case AArch64::CBZW:
-  case AArch64::CBNZW:
-  case AArch64::CBZX:
-  case AArch64::CBNZX:
-  case AArch64::Bcc:
-    return true;
-  }
-}
-
-static MachineBasicBlock *getDestBlock(MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  default:
-    llvm_unreachable("unexpected opcode!");
-  case AArch64::TBZW:
-  case AArch64::TBNZW:
-  case AArch64::TBZX:
-  case AArch64::TBNZX:
-    return MI->getOperand(2).getMBB();
-  case AArch64::CBZW:
-  case AArch64::CBNZW:
-  case AArch64::CBZX:
-  case AArch64::CBNZX:
-  case AArch64::Bcc:
-    return MI->getOperand(1).getMBB();
-  }
-}
-
-static unsigned getOppositeConditionOpcode(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("unexpected opcode!");
-  case AArch64::TBNZW:   return AArch64::TBZW;
-  case AArch64::TBNZX:   return AArch64::TBZX;
-  case AArch64::TBZW:    return AArch64::TBNZW;
-  case AArch64::TBZX:    return AArch64::TBNZX;
-  case AArch64::CBNZW:   return AArch64::CBZW;
-  case AArch64::CBNZX:   return AArch64::CBZX;
-  case AArch64::CBZW:    return AArch64::CBNZW;
-  case AArch64::CBZX:    return AArch64::CBNZX;
-  case AArch64::Bcc:     return AArch64::Bcc; // Condition is an operand for Bcc.
-  }
-}
-
-static unsigned getBranchDisplacementBits(unsigned Opc) {
-  switch (Opc) {
-  default:
-    llvm_unreachable("unexpected opcode!");
-  case AArch64::TBNZW:
-  case AArch64::TBZW:
-  case AArch64::TBNZX:
-  case AArch64::TBZX:
-    return TBZDisplacementBits;
-  case AArch64::CBNZW:
-  case AArch64::CBZW:
-  case AArch64::CBNZX:
-  case AArch64::CBZX:
-    return CBZDisplacementBits;
-  case AArch64::Bcc:
-    return BCCDisplacementBits;
-  }
-}
-
-static inline void invertBccCondition(MachineInstr *MI) {
-  assert(MI->getOpcode() == AArch64::Bcc && "Unexpected opcode!");
-  AArch64CC::CondCode CC = (AArch64CC::CondCode)MI->getOperand(0).getImm();
-  CC = AArch64CC::getInvertedCondCode(CC);
-  MI->getOperand(0).setImm((int64_t)CC);
-}
-
-/// fixupConditionalBranch - Fix up a conditional branch whose destination is
-/// too far away to fit in its displacement field. It is converted to an inverse
-/// conditional branch + an unconditional branch to the destination.
-bool AArch64BranchRelaxation::fixupConditionalBranch(MachineInstr *MI) {
-  MachineBasicBlock *DestBB = getDestBlock(MI);
-
-  // Add an unconditional branch to the destination and invert the branch
-  // condition to jump over it:
-  // tbz L1
-  // =>
-  // tbnz L2
-  // b   L1
-  // L2:
-
-  // If the branch is at the end of its MBB and that has a fall-through block,
-  // direct the updated conditional branch to the fall-through block. Otherwise,
-  // split the MBB before the next instruction.
-  MachineBasicBlock *MBB = MI->getParent();
-  MachineInstr *BMI = &MBB->back();
-  bool NeedSplit = (BMI != MI) || !BBHasFallthrough(MBB);
-
-  if (BMI != MI) {
-    if (std::next(MachineBasicBlock::iterator(MI)) ==
-            std::prev(MBB->getLastNonDebugInstr()) &&
-        BMI->getOpcode() == AArch64::B) {
-      // Last MI in the BB is an unconditional branch. Can we simply invert the
-      // condition and swap destinations:
-      // beq L1
-      // b   L2
-      // =>
-      // bne L2
-      // b   L1
-      MachineBasicBlock *NewDest = BMI->getOperand(0).getMBB();
-      if (isBlockInRange(MI, NewDest,
-                         getBranchDisplacementBits(MI->getOpcode()))) {
-        DEBUG(dbgs() << "  Invert condition and swap its destination with "
-                     << *BMI);
-        BMI->getOperand(0).setMBB(DestBB);
-        unsigned OpNum = (MI->getOpcode() == AArch64::TBZW ||
-                          MI->getOpcode() == AArch64::TBNZW ||
-                          MI->getOpcode() == AArch64::TBZX ||
-                          MI->getOpcode() == AArch64::TBNZX)
-                             ? 2
-                             : 1;
-        MI->getOperand(OpNum).setMBB(NewDest);
-        MI->setDesc(TII->get(getOppositeConditionOpcode(MI->getOpcode())));
-        if (MI->getOpcode() == AArch64::Bcc)
-          invertBccCondition(MI);
-        return true;
-      }
-    }
-  }
-
-  if (NeedSplit) {
-    // Analyze the branch so we know how to update the successor lists.
-    MachineBasicBlock *TBB, *FBB;
-    SmallVector<MachineOperand, 2> Cond;
-    TII->analyzeBranch(*MBB, TBB, FBB, Cond, false);
-
-    MachineBasicBlock *NewBB = splitBlockBeforeInstr(MI);
-    // No need for the branch to the next block. We're adding an unconditional
-    // branch to the destination.
-    int delta = TII->GetInstSizeInBytes(MBB->back());
-    BlockInfo[MBB->getNumber()].Size -= delta;
-    MBB->back().eraseFromParent();
-    // BlockInfo[SplitBB].Offset is wrong temporarily, fixed below
-
-    // Update the successor lists according to the transformation to follow.
-    // Do it here since if there's no split, no update is needed.
-    MBB->replaceSuccessor(FBB, NewBB);
-    NewBB->addSuccessor(FBB);
-  }
-  MachineBasicBlock *NextBB = &*std::next(MachineFunction::iterator(MBB));
-
-  DEBUG(dbgs() << "  Insert B to BB#" << DestBB->getNumber()
-               << ", invert condition and change dest. to BB#"
-               << NextBB->getNumber() << "\n");
-
-  // Insert a new conditional branch and a new unconditional branch.
-  MachineInstrBuilder MIB = BuildMI(
-      MBB, DebugLoc(), TII->get(getOppositeConditionOpcode(MI->getOpcode())))
-                                .addOperand(MI->getOperand(0));
-  if (MI->getOpcode() == AArch64::TBZW || MI->getOpcode() == AArch64::TBNZW ||
-      MI->getOpcode() == AArch64::TBZX || MI->getOpcode() == AArch64::TBNZX)
-    MIB.addOperand(MI->getOperand(1));
-  if (MI->getOpcode() == AArch64::Bcc)
-    invertBccCondition(MIB);
-  MIB.addMBB(NextBB);
-  BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
-  BuildMI(MBB, DebugLoc(), TII->get(AArch64::B)).addMBB(DestBB);
-  BlockInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
-
-  // Remove the old conditional branch.  It may or may not still be in MBB.
-  BlockInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI);
-  MI->eraseFromParent();
-
-  // Finally, keep the block offsets up to date.
-  adjustBlockOffsets(*MBB);
-  return true;
-}
-
-bool AArch64BranchRelaxation::relaxBranchInstructions() {
-  bool Changed = false;
-  // Relaxing branches involves creating new basic blocks, so re-eval
-  // end() for termination.
-  for (MachineFunction::iterator I = MF->begin(); I != MF->end(); ++I) {
-    MachineBasicBlock &MBB = *I;
-    MachineInstr &MI = *MBB.getFirstTerminator();
-    if (isConditionalBranch(MI.getOpcode()) &&
-        !isBlockInRange(&MI, getDestBlock(&MI),
-                        getBranchDisplacementBits(MI.getOpcode()))) {
-      fixupConditionalBranch(&MI);
-      ++NumRelaxed;
-      Changed = true;
-    }
-  }
-  return Changed;
-}
-
-bool AArch64BranchRelaxation::runOnMachineFunction(MachineFunction &mf) {
-  MF = &mf;
-
-  // If the pass is disabled, just bail early.
-  if (!BranchRelaxation)
-    return false;
-
-  DEBUG(dbgs() << "***** AArch64BranchRelaxation *****\n");
-
-  TII = (const AArch64InstrInfo *)MF->getSubtarget().getInstrInfo();
-
-  // Renumber all of the machine basic blocks in the function, guaranteeing that
-  // the numbers agree with the position of the block in the function.
-  MF->RenumberBlocks();
-
-  // Do the initial scan of the function, building up information about the
-  // sizes of each block.
-  scanFunction();
-
-  DEBUG(dbgs() << "  Basic blocks before relaxation\n");
-  DEBUG(dumpBBs());
-
-  bool MadeChange = false;
-  while (relaxBranchInstructions())
-    MadeChange = true;
-
-  // After a while, this might be made debug-only, but it is not expensive.
-  verify();
-
-  DEBUG(dbgs() << "  Basic blocks after relaxation\n");
-  DEBUG(dbgs() << '\n'; dumpBBs());
-
-  BlockInfo.clear();
-
-  return MadeChange;
-}
-
-/// Returns an instance of the AArch64 Branch Relaxation pass.
-FunctionPass *llvm::createAArch64BranchRelaxation() {
-  return new AArch64BranchRelaxation();
-}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
index e3522e63c21c..a4950af32097 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.cpp
@@ -16,9 +16,14 @@
 #include "AArch64CallLowering.h"
 #include "AArch64ISelLowering.h"
 
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
 using namespace llvm;
 
 #ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -29,76 +34,284 @@ AArch64CallLowering::AArch64CallLowering(const AArch64TargetLowering &TLI)
   : CallLowering(&TLI) {
 }
 
+struct IncomingArgHandler : public CallLowering::ValueHandler {
+  IncomingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+    : ValueHandler(MIRBuilder, MRI) {}
+
+  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+                           MachinePointerInfo &MPO) override {
+    auto &MFI = MIRBuilder.getMF().getFrameInfo();
+    int FI = MFI.CreateFixedObject(Size, Offset, true);
+    MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+    unsigned AddrReg = MRI.createGenericVirtualRegister(LLT::pointer(0, 64));
+    MIRBuilder.buildFrameIndex(AddrReg, FI);
+    return AddrReg;
+  }
+
+  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+                        CCValAssign &VA) override {
+    markPhysRegUsed(PhysReg);
+    MIRBuilder.buildCopy(ValVReg, PhysReg);
+    // FIXME: assert extension
+  }
+
+  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+                            MachinePointerInfo &MPO, CCValAssign &VA) override {
+    auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+        MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant, Size,
+        0);
+    MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+  }
+
+  /// How the physical register gets marked varies between formal
+  /// parameters (it's a basic-block live-in), and a call instruction
+  /// (it's an implicit-def of the BL).
+  virtual void markPhysRegUsed(unsigned PhysReg) = 0;
+};
+
+struct FormalArgHandler : public IncomingArgHandler {
+  FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+      : IncomingArgHandler(MIRBuilder, MRI) {}
+
+  void markPhysRegUsed(unsigned PhysReg) override {
+    MIRBuilder.getMBB().addLiveIn(PhysReg);
+  }
+};
+
+struct CallReturnHandler : public IncomingArgHandler {
+  CallReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+                       MachineInstrBuilder MIB)
+    : IncomingArgHandler(MIRBuilder, MRI), MIB(MIB) {}
+
+  void markPhysRegUsed(unsigned PhysReg) override {
+    MIB.addDef(PhysReg, RegState::Implicit);
+  }
+
+  MachineInstrBuilder MIB;
+};
+
+struct OutgoingArgHandler : public CallLowering::ValueHandler {
+  OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+                     MachineInstrBuilder MIB)
+      : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+
+  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+                           MachinePointerInfo &MPO) override {
+    LLT p0 = LLT::pointer(0, 64);
+    LLT s64 = LLT::scalar(64);
+    unsigned SPReg = MRI.createGenericVirtualRegister(p0);
+    MIRBuilder.buildCopy(SPReg, AArch64::SP);
+
+    unsigned OffsetReg = MRI.createGenericVirtualRegister(s64);
+    MIRBuilder.buildConstant(OffsetReg, Offset);
+
+    unsigned AddrReg = MRI.createGenericVirtualRegister(p0);
+    MIRBuilder.buildGEP(AddrReg, SPReg, OffsetReg);
+
+    MPO = MachinePointerInfo::getStack(MIRBuilder.getMF(), Offset);
+    return AddrReg;
+  }
+
+  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+                        CCValAssign &VA) override {
+    MIB.addUse(PhysReg, RegState::Implicit);
+    unsigned ExtReg = extendRegister(ValVReg, VA);
+    MIRBuilder.buildCopy(PhysReg, ExtReg);
+  }
+
+  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+                            MachinePointerInfo &MPO, CCValAssign &VA) override {
+    auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+        MPO, MachineMemOperand::MOStore, Size, 0);
+    MIRBuilder.buildStore(ValVReg, Addr, *MMO);
+  }
+
+  MachineInstrBuilder MIB;
+};
+
+void AArch64CallLowering::splitToValueTypes(const ArgInfo &OrigArg,
+                                            SmallVectorImpl<ArgInfo> &SplitArgs,
+                                            const DataLayout &DL,
+                                            MachineRegisterInfo &MRI,
+                                            SplitArgTy PerformArgSplit) const {
+  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+  LLVMContext &Ctx = OrigArg.Ty->getContext();
+
+  SmallVector<EVT, 4> SplitVTs;
+  SmallVector<uint64_t, 4> Offsets;
+  ComputeValueVTs(TLI, DL, OrigArg.Ty, SplitVTs, &Offsets, 0);
+
+  if (SplitVTs.size() == 1) {
+    // No splitting to do, but we want to replace the original type (e.g. [1 x
+    // double] -> double).
+    SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
+                           OrigArg.Flags);
+    return;
+  }
+
+  unsigned FirstRegIdx = SplitArgs.size();
+  for (auto SplitVT : SplitVTs) {
+    // FIXME: set split flags if they're actually used (e.g. i128 on AAPCS).
+    Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
+    SplitArgs.push_back(
+        ArgInfo{MRI.createGenericVirtualRegister(LLT{*SplitTy, DL}), SplitTy,
+                OrigArg.Flags});
+  }
+
+  SmallVector<uint64_t, 4> BitOffsets;
+  for (auto Offset : Offsets)
+    BitOffsets.push_back(Offset * 8);
+
+  SmallVector<unsigned, 8> SplitRegs;
+  for (auto I = &SplitArgs[FirstRegIdx]; I != SplitArgs.end(); ++I)
+    SplitRegs.push_back(I->Reg);
+
+  PerformArgSplit(SplitRegs, BitOffsets);
+}
+
 bool AArch64CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
-                                        const Value *Val, unsigned VReg) const {
-  MachineInstr *Return = MIRBuilder.buildInstr(AArch64::RET_ReallyLR);
-  assert(Return && "Unable to build a return instruction?!");
+                                      const Value *Val, unsigned VReg) const {
+  MachineFunction &MF = MIRBuilder.getMF();
+  const Function &F = *MF.getFunction();
 
+  auto MIB = MIRBuilder.buildInstrNoInsert(AArch64::RET_ReallyLR);
   assert(((Val && VReg) || (!Val && !VReg)) && "Return value without a vreg");
+  bool Success = true;
   if (VReg) {
-    assert(Val->getType()->isIntegerTy() && "Type not supported yet");
-    unsigned Size = Val->getType()->getPrimitiveSizeInBits();
-    assert((Size == 64 || Size == 32) && "Size not supported yet");
-    unsigned ResReg = (Size == 32) ? AArch64::W0 : AArch64::X0;
-    // Set the insertion point to be right before Return.
-    MIRBuilder.setInstr(*Return, /* Before */ true);
-    MachineInstr *Copy =
-        MIRBuilder.buildInstr(TargetOpcode::COPY, ResReg, VReg);
-    (void)Copy;
-    assert(Copy->getNextNode() == Return &&
-           "The insertion did not happen where we expected");
-    MachineInstrBuilder(MIRBuilder.getMF(), Return)
-        .addReg(ResReg, RegState::Implicit);
+    const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+    CCAssignFn *AssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
+    MachineRegisterInfo &MRI = MF.getRegInfo();
+    auto &DL = F.getParent()->getDataLayout();
+
+    ArgInfo OrigArg{VReg, Val->getType()};
+    setArgFlags(OrigArg, AttributeSet::ReturnIndex, DL, F);
+
+    SmallVector<ArgInfo, 8> SplitArgs;
+    splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+                      [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
+                        MIRBuilder.buildExtract(Regs, Offsets, VReg);
+                      });
+
+    OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
+    Success = handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler);
   }
+
+  MIRBuilder.insertInstr(MIB);
+  return Success;
+}
+
+bool AArch64CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+                                               const Function &F,
+                                               ArrayRef<unsigned> VRegs) const {
+  auto &Args = F.getArgumentList();
+  MachineFunction &MF = MIRBuilder.getMF();
+  MachineBasicBlock &MBB = MIRBuilder.getMBB();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  auto &DL = F.getParent()->getDataLayout();
+
+  SmallVector<ArgInfo, 8> SplitArgs;
+  unsigned i = 0;
+  for (auto &Arg : Args) {
+    ArgInfo OrigArg{VRegs[i], Arg.getType()};
+    setArgFlags(OrigArg, i + 1, DL, F);
+    splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+                      [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
+                        MIRBuilder.buildSequence(VRegs[i], Regs, Offsets);
+                      });
+    ++i;
+  }
+
+  if (!MBB.empty())
+    MIRBuilder.setInstr(*MBB.begin());
+
+  const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+  CCAssignFn *AssignFn =
+      TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
+
+  FormalArgHandler Handler(MIRBuilder, MRI);
+  if (!handleAssignments(MIRBuilder, AssignFn, SplitArgs, Handler))
+    return false;
+
+  // Move back to the end of the basic block.
+  MIRBuilder.setMBB(MBB);
+
   return true;
 }
 
-bool AArch64CallLowering::lowerFormalArguments(
-    MachineIRBuilder &MIRBuilder, const Function::ArgumentListType &Args,
-    const SmallVectorImpl<unsigned> &VRegs) const {
+bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
+                                    const MachineOperand &Callee,
+                                    const ArgInfo &OrigRet,
+                                    ArrayRef<ArgInfo> OrigArgs) const {
   MachineFunction &MF = MIRBuilder.getMF();
   const Function &F = *MF.getFunction();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  auto &DL = F.getParent()->getDataLayout();
 
-  SmallVector<CCValAssign, 16> ArgLocs;
-  CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+  SmallVector<ArgInfo, 8> SplitArgs;
+  for (auto &OrigArg : OrigArgs) {
+    splitToValueTypes(OrigArg, SplitArgs, DL, MRI,
+                      [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
+                        MIRBuilder.buildExtract(Regs, Offsets, OrigArg.Reg);
+                      });
+  }
 
-  unsigned NumArgs = Args.size();
-  Function::const_arg_iterator CurOrigArg = Args.begin();
+  // Find out which ABI gets to decide where things go.
   const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
-  for (unsigned i = 0; i != NumArgs; ++i, ++CurOrigArg) {
-    MVT ValVT = MVT::getVT(CurOrigArg->getType());
-    CCAssignFn *AssignFn =
-        TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
-    bool Res =
-        AssignFn(i, ValVT, ValVT, CCValAssign::Full, ISD::ArgFlagsTy(), CCInfo);
-    assert(!Res && "Call operand has unhandled type");
-    (void)Res;
-  }
-  assert(ArgLocs.size() == Args.size() &&
-         "We have a different number of location and args?!");
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-
-    assert(VA.isRegLoc() && "Not yet implemented");
-    // Transform the arguments in physical registers into virtual ones.
-    MIRBuilder.getMBB().addLiveIn(VA.getLocReg());
-    MIRBuilder.buildInstr(TargetOpcode::COPY, VRegs[i], VA.getLocReg());
-
-    switch (VA.getLocInfo()) {
-    default:
-      llvm_unreachable("Unknown loc info!");
-    case CCValAssign::Full:
-      break;
-    case CCValAssign::BCvt:
-      // We don't care about bitcast.
-      break;
-    case CCValAssign::AExt:
-    case CCValAssign::SExt:
-    case CCValAssign::ZExt:
-      // Zero/Sign extend the register.
-      assert(0 && "Not yet implemented");
-      break;
-    }
+  CCAssignFn *CallAssignFn =
+      TLI.CCAssignFnForCall(F.getCallingConv(), /*IsVarArg=*/false);
+
+  // Create a temporarily-floating call instruction so we can add the implicit
+  // uses of arg registers.
+  auto MIB = MIRBuilder.buildInstrNoInsert(Callee.isReg() ? AArch64::BLR
+                                                          : AArch64::BL);
+  MIB.addOperand(Callee);
+
+  // Tell the call which registers are clobbered.
+  auto TRI = MF.getSubtarget().getRegisterInfo();
+  MIB.addRegMask(TRI->getCallPreservedMask(MF, F.getCallingConv()));
+
+  // Do the actual argument marshalling.
+  SmallVector<unsigned, 8> PhysRegs;
+  OutgoingArgHandler Handler(MIRBuilder, MRI, MIB);
+  if (!handleAssignments(MIRBuilder, CallAssignFn, SplitArgs, Handler))
+    return false;
+
+  // Now we can add the actual call instruction to the correct basic block.
+  MIRBuilder.insertInstr(MIB);
+
+  // If Callee is a reg, since it is used by a target specific
+  // instruction, it must have a register class matching the
+  // constraint of that instruction.
+  if (Callee.isReg())
+    MIB->getOperand(0).setReg(constrainOperandRegClass(
+        MF, *TRI, MRI, *MF.getSubtarget().getInstrInfo(),
+        *MF.getSubtarget().getRegBankInfo(), *MIB, MIB->getDesc(),
+        Callee.getReg(), 0));
+
+  // Finally we can copy the returned value back into its virtual-register. In
+  // symmetry with the arugments, the physical register must be an
+  // implicit-define of the call instruction.
+  CCAssignFn *RetAssignFn = TLI.CCAssignFnForReturn(F.getCallingConv());
+  if (OrigRet.Reg) {
+    SplitArgs.clear();
+
+    SmallVector<uint64_t, 8> RegOffsets;
+    SmallVector<unsigned, 8> SplitRegs;
+    splitToValueTypes(OrigRet, SplitArgs, DL, MRI,
+                      [&](ArrayRef<unsigned> Regs, ArrayRef<uint64_t> Offsets) {
+                        std::copy(Offsets.begin(), Offsets.end(),
+                                  std::back_inserter(RegOffsets));
+                        std::copy(Regs.begin(), Regs.end(),
+                                  std::back_inserter(SplitRegs));
+                      });
+
+    CallReturnHandler Handler(MIRBuilder, MRI, MIB);
+    if (!handleAssignments(MIRBuilder, RetAssignFn, SplitArgs, Handler))
+      return false;
+
+    if (!RegOffsets.empty())
+      MIRBuilder.buildSequence(OrigRet.Reg, SplitRegs, RegOffsets);
   }
+
   return true;
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
index 411622803461..ce6676249df6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallLowering.h
@@ -16,6 +16,7 @@
 #define LLVM_LIB_TARGET_AARCH64_AARCH64CALLLOWERING
 
 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
 
 namespace llvm {
 
@@ -27,10 +28,29 @@ class AArch64CallLowering: public CallLowering {
 
   bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
                    unsigned VReg) const override;
-  bool
-  lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                       const Function::ArgumentListType &Args,
-                       const SmallVectorImpl<unsigned> &VRegs) const override;
+
+  bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+                            ArrayRef<unsigned> VRegs) const override;
+
+  bool lowerCall(MachineIRBuilder &MIRBuilder, const MachineOperand &Callee,
+                 const ArgInfo &OrigRet,
+                 ArrayRef<ArgInfo> OrigArgs) const override;
+
+private:
+  typedef std::function<void(MachineIRBuilder &, Type *, unsigned,
+                             CCValAssign &)>
+      RegHandler;
+
+  typedef std::function<void(MachineIRBuilder &, int, CCValAssign &)>
+      MemHandler;
+
+  typedef std::function<void(ArrayRef<unsigned>, ArrayRef<uint64_t>)>
+      SplitArgTy;
+
+  void splitToValueTypes(const ArgInfo &OrigArgInfo,
+                         SmallVectorImpl<ArgInfo> &SplitArgs,
+                         const DataLayout &DL, MachineRegisterInfo &MRI,
+                         SplitArgTy SplitArg) const;
 };
 } // End of namespace llvm;
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 178e3971640e..9058617768dd 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -23,6 +23,7 @@ class CCIfBigEndian<CCAction A> :
 //===----------------------------------------------------------------------===//
 
 def CC_AArch64_AAPCS : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
 
@@ -86,6 +87,7 @@ def CC_AArch64_AAPCS : CallingConv<[
 ]>;
 
 def RetCC_AArch64_AAPCS : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
 
@@ -98,6 +100,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
   CCIfBigEndian<CCIfType<[v2i64, v2f64, v4i32, v4f32, v8i16, v8f16, v16i8],
                          CCBitConvertToType<f128>>>,
 
+  CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
   CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7],
                                           [X0, X1, X2, X3, X4, X5, X6, X7]>>,
   CCIfType<[i64], CCAssignToRegWithShadow<[X0, X1, X2, X3, X4, X5, X6, X7],
@@ -121,6 +124,7 @@ def RetCC_AArch64_AAPCS : CallingConv<[
 //     + i128s (i.e. split i64s) don't need even registers.
 //     + Stack slots are sized as needed rather than being at least 64-bit.
 def CC_AArch64_DarwinPCS : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
 
@@ -176,6 +180,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[
 ]>;
 
 def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
   CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
   CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
 
@@ -243,6 +248,8 @@ def RetCC_AArch64_WebKit_JS : CallingConv<[
 // register mapping".
 
 def CC_AArch64_GHC : CallingConv<[
+  CCIfType<[iPTR], CCBitConvertToType<i64>>,
+
   // Handle all vector types as either f64 or v2f64.
   CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType<f64>>,
   CCIfType<[v2i64, v4i32, v8i16, v16i8, v4f32, f128], CCBitConvertToType<v2f64>>,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
index 011a03622ba5..6f8dd3e3ac0c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CleanupLocalDynamicTLSPass.cpp
@@ -33,10 +33,14 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 using namespace llvm;
 
+#define TLSCLEANUP_PASS_NAME "AArch64 Local Dynamic TLS Access Clean-up"
+
 namespace {
 struct LDTLSCleanup : public MachineFunctionPass {
   static char ID;
-  LDTLSCleanup() : MachineFunctionPass(ID) {}
+  LDTLSCleanup() : MachineFunctionPass(ID) {
+    initializeLDTLSCleanupPass(*PassRegistry::getPassRegistry());
+  }
 
   bool runOnMachineFunction(MachineFunction &MF) override {
     if (skipFunction(*MF.getFunction()))
@@ -128,9 +132,7 @@ struct LDTLSCleanup : public MachineFunctionPass {
     return Copy;
   }
 
-  const char *getPassName() const override {
-    return "Local Dynamic TLS Access Clean-up";
-  }
+  StringRef getPassName() const override { return TLSCLEANUP_PASS_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -140,6 +142,9 @@ struct LDTLSCleanup : public MachineFunctionPass {
 };
 }
 
+INITIALIZE_PASS(LDTLSCleanup, "aarch64-local-dynamic-tls-cleanup",
+                TLSCLEANUP_PASS_NAME, false, false)
+
 char LDTLSCleanup::ID = 0;
 FunctionPass *llvm::createAArch64CleanupLocalDynamicTLSPass() {
   return new LDTLSCleanup();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
index 5eecb3a86856..7666011f75b6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp
@@ -138,6 +138,7 @@ BasicBlockScopeOnly("aarch64-collect-loh-bb-only", cl::Hidden,
 
 STATISTIC(NumADRPSimpleCandidate,
           "Number of simplifiable ADRP dominate by another");
+#ifndef NDEBUG
 STATISTIC(NumADRPComplexCandidate2,
           "Number of simplifiable ADRP reachable by 2 defs");
 STATISTIC(NumADRPComplexCandidate3,
@@ -156,18 +157,17 @@ STATISTIC(NumLDRToLDRWithImm,
           "Number of simplifiable LDR with imm reachable by LDR");
 STATISTIC(NumADDToLDR, "Number of simplifiable LDR reachable by ADD");
 STATISTIC(NumLDRToLDR, "Number of simplifiable LDR reachable by LDR");
+#endif // NDEBUG
 STATISTIC(NumADRPToLDR, "Number of simplifiable LDR reachable by ADRP");
+#ifndef NDEBUG
 STATISTIC(NumCplxLvl1, "Number of complex case of level 1");
 STATISTIC(NumTooCplxLvl1, "Number of too complex case of level 1");
 STATISTIC(NumCplxLvl2, "Number of complex case of level 2");
 STATISTIC(NumTooCplxLvl2, "Number of too complex case of level 2");
+#endif // NDEBUG
 STATISTIC(NumADRSimpleCandidate, "Number of simplifiable ADRP + ADD");
 STATISTIC(NumADRComplexCandidate, "Number of too complex ADRP + ADD");
 
-namespace llvm {
-void initializeAArch64CollectLOHPass(PassRegistry &);
-}
-
 #define AARCH64_COLLECT_LOH_NAME "AArch64 Collect Linker Optimization Hint (LOH)"
 
 namespace {
@@ -181,12 +181,10 @@ struct AArch64CollectLOH : public MachineFunctionPass {
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
-    return AARCH64_COLLECT_LOH_NAME;
-  }
+  StringRef getPassName() const override { return AARCH64_COLLECT_LOH_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesAll();
@@ -631,7 +629,7 @@ static void computeADRP(const InstrToInstrs &UseToDefs,
       AArch64FI.addLOHDirective(MCLOH_AdrpAdrp, {L2, L1});
       ++NumADRPSimpleCandidate;
     }
-#ifdef DEBUG
+#ifndef NDEBUG
     else if (Size == 2)
       ++NumADRPComplexCandidate2;
     else if (Size == 3)
@@ -775,10 +773,10 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
                           AArch64FunctionInfo &AArch64FI, const MapRegToId &RegToId,
                           const MachineDominatorTree *MDT) {
   SetOfMachineInstr *InvolvedInLOHs = nullptr;
-#ifdef DEBUG
+#ifndef NDEBUG
   SetOfMachineInstr InvolvedInLOHsStorage;
   InvolvedInLOHs = &InvolvedInLOHsStorage;
-#endif // DEBUG
+#endif // NDEBUG
   DEBUG(dbgs() << "*** Compute LOH for Others\n");
   // ADRP -> ADD/LDR -> LDR/STR pattern.
   // Fall back to ADRP -> ADD pattern if we fail to catch the bigger pattern.
@@ -819,7 +817,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
   // PotentialCandidates are result of a chain ADRP -> ADD/LDR ->
   // A potential candidate becomes a candidate, if its current immediate
   // operand is zero and all nodes of the chain have respectively only one user
-#ifdef DEBUG
+#ifndef NDEBUG
   SetOfMachineInstr DefsOfPotentialCandidates;
 #endif
   for (const MachineInstr *Candidate : PotentialCandidates) {
@@ -835,7 +833,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
           getUses(DefsPerColorToUses,
                   RegToId.find(Def->getOperand(0).getReg())->second, *Def);
       if (Users->size() > 1) {
-#ifdef DEBUG
+#ifndef NDEBUG
         // if all the uses of this def are in potential candidate, this is
         // a complex candidate of level 2.
         bool IsLevel2 = true;
@@ -848,7 +846,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
         }
         if (IsLevel2)
           ++NumCplxLvl2;
-#endif // DEBUG
+#endif // NDEBUG
         PotentialADROpportunities.insert(Def);
         continue;
       }
@@ -863,7 +861,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
         getUses(DefsPerColorToUses,
                 RegToId.find(Def->getOperand(0).getReg())->second, *Def);
     if (Users->size() > 1) {
-#ifdef DEBUG
+#ifndef NDEBUG
       // if all the uses of this def are in the defs of the potential candidate,
       // this is a complex candidate of level 1
       if (DefsOfPotentialCandidates.empty()) {
@@ -885,7 +883,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
       }
       if (!Found)
         ++NumCplxLvl1;
-#endif // DEBUG
+#endif // NDEBUG
       continue;
     }
 
@@ -932,7 +930,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
                "L2 already involved in LOH.");
         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
                "Candidate already involved in LOH.");
-#ifdef DEBUG
+#ifndef NDEBUG
         // get the immediate of the load
         if (Candidate->getOperand(2).getImm() == 0)
           if (ImmediateDefOpc == AArch64::ADDXri)
@@ -943,7 +941,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
           ++NumADDToLDRWithImm;
         else
           ++NumLDRToLDRWithImm;
-#endif // DEBUG
+#endif // NDEBUG
       }
     } else {
       if (ImmediateDefOpc == AArch64::ADRP)
@@ -966,7 +964,7 @@ static void computeOthers(const InstrToInstrs &UseToDefs,
                "L2 already involved in LOH.");
         assert((!InvolvedInLOHs || InvolvedInLOHs->insert(Candidate)) &&
                "Candidate already involved in LOH.");
-#ifdef DEBUG
+#ifndef NDEBUG
         // get the immediate of the store
         if (Candidate->getOperand(2).getImm() == 0)
           if (ImmediateDefOpc == AArch64::ADDXri)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
index 8fff381d391e..8b186328d125 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp
@@ -95,7 +95,9 @@ public:
   typedef std::tuple<int, unsigned, AArch64CC::CondCode> CmpInfo;
 
   static char ID;
-  AArch64ConditionOptimizer() : MachineFunctionPass(ID) {}
+  AArch64ConditionOptimizer() : MachineFunctionPass(ID) {
+    initializeAArch64ConditionOptimizerPass(*PassRegistry::getPassRegistry());
+  }
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   MachineInstr *findSuitableCompare(MachineBasicBlock *MBB);
   CmpInfo adjustCmp(MachineInstr *CmpMI, AArch64CC::CondCode Cmp);
@@ -103,7 +105,7 @@ public:
   bool adjustTo(MachineInstr *CmpMI, AArch64CC::CondCode Cmp, MachineInstr *To,
                 int ToImm);
   bool runOnMachineFunction(MachineFunction &MF) override;
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AArch64 Condition Optimizer";
   }
 };
@@ -111,10 +113,6 @@ public:
 
 char AArch64ConditionOptimizer::ID = 0;
 
-namespace llvm {
-void initializeAArch64ConditionOptimizerPass(PassRegistry &);
-}
-
 INITIALIZE_PASS_BEGIN(AArch64ConditionOptimizer, "aarch64-condopt",
                       "AArch64 CondOpt Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
index e1b0dc724b39..da09b36cac9c 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ConditionalCompares.cpp
@@ -329,7 +329,7 @@ MachineInstr *SSACCmpConv::findConvertibleCompare(MachineBasicBlock *MBB) {
         ++NumImmRangeRejs;
         return nullptr;
       }
-    // Fall through.
+      LLVM_FALLTHROUGH;
     case AArch64::SUBSWrr:
     case AArch64::SUBSXrr:
     case AArch64::ADDSWrr:
@@ -568,7 +568,7 @@ void SSACCmpConv::convert(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks) {
   CmpBB->removeSuccessor(Tail, true);
   Head->transferSuccessorsAndUpdatePHIs(CmpBB);
   DebugLoc TermDL = Head->getFirstTerminator()->getDebugLoc();
-  TII->RemoveBranch(*Head);
+  TII->removeBranch(*Head);
 
   // If the Head terminator was one of the cbz / tbz branches with built-in
   // compare, we need to insert an explicit compare instruction in its place.
@@ -732,10 +732,12 @@ class AArch64ConditionalCompares : public MachineFunctionPass {
 
 public:
   static char ID;
-  AArch64ConditionalCompares() : MachineFunctionPass(ID) {}
+  AArch64ConditionalCompares() : MachineFunctionPass(ID) {
+    initializeAArch64ConditionalComparesPass(*PassRegistry::getPassRegistry());
+  }
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnMachineFunction(MachineFunction &MF) override;
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AArch64 Conditional Compares";
   }
 
@@ -750,10 +752,6 @@ private:
 
 char AArch64ConditionalCompares::ID = 0;
 
-namespace llvm {
-void initializeAArch64ConditionalComparesPass(PassRegistry &);
-}
-
 INITIALIZE_PASS_BEGIN(AArch64ConditionalCompares, "aarch64-ccmp",
                       "AArch64 CCMP Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
index 7a6f7669db5f..30e2b2310456 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64DeadRegisterDefinitionsPass.cpp
@@ -6,9 +6,9 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
-// When allowed by the instruction, replace a dead definition of a GPR with
-// the zero register. This makes the code a bit friendlier towards the
-// hardware's register renamer.
+/// \file When allowed by the instruction, replace a dead definition of a GPR
+/// with the zero register. This makes the code a bit friendlier towards the
+/// hardware's register renamer.
 //===----------------------------------------------------------------------===//
 
 #include "AArch64.h"
@@ -17,43 +17,37 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-dead-defs"
 
 STATISTIC(NumDeadDefsReplaced, "Number of dead definitions replaced");
 
-namespace llvm {
-void initializeAArch64DeadRegisterDefinitionsPass(PassRegistry &);
-}
-
 #define AARCH64_DEAD_REG_DEF_NAME "AArch64 Dead register definitions"
 
 namespace {
 class AArch64DeadRegisterDefinitions : public MachineFunctionPass {
 private:
   const TargetRegisterInfo *TRI;
-  bool implicitlyDefinesOverlappingReg(unsigned Reg, const MachineInstr &MI);
-  bool processMachineBasicBlock(MachineBasicBlock &MBB);
-  bool usesFrameIndex(const MachineInstr &MI);
+  const MachineRegisterInfo *MRI;
+  const TargetInstrInfo *TII;
+  bool Changed;
+  void processMachineBasicBlock(MachineBasicBlock &MBB);
 public:
   static char ID; // Pass identification, replacement for typeid.
-  explicit AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
+  AArch64DeadRegisterDefinitions() : MachineFunctionPass(ID) {
     initializeAArch64DeadRegisterDefinitionsPass(
         *PassRegistry::getPassRegistry());
   }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
-  MachineFunctionProperties getRequiredProperties() const override {
-    return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
-  }
-
-  const char *getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
+  StringRef getPassName() const override { return AARCH64_DEAD_REG_DEF_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -66,25 +60,16 @@ char AArch64DeadRegisterDefinitions::ID = 0;
 INITIALIZE_PASS(AArch64DeadRegisterDefinitions, "aarch64-dead-defs",
                 AARCH64_DEAD_REG_DEF_NAME, false, false)
 
-bool AArch64DeadRegisterDefinitions::implicitlyDefinesOverlappingReg(
-    unsigned Reg, const MachineInstr &MI) {
-  for (const MachineOperand &MO : MI.implicit_operands())
-    if (MO.isReg() && MO.isDef())
-      if (TRI->regsOverlap(Reg, MO.getReg()))
-        return true;
-  return false;
-}
-
-bool AArch64DeadRegisterDefinitions::usesFrameIndex(const MachineInstr &MI) {
-  for (const MachineOperand &Op : MI.uses())
-    if (Op.isFI())
+static bool usesFrameIndex(const MachineInstr &MI) {
+  for (const MachineOperand &MO : MI.uses())
+    if (MO.isFI())
       return true;
   return false;
 }
 
-bool AArch64DeadRegisterDefinitions::processMachineBasicBlock(
+void AArch64DeadRegisterDefinitions::processMachineBasicBlock(
     MachineBasicBlock &MBB) {
-  bool Changed = false;
+  const MachineFunction &MF = *MBB.getParent();
   for (MachineInstr &MI : MBB) {
     if (usesFrameIndex(MI)) {
       // We need to skip this instruction because while it appears to have a
@@ -99,62 +84,63 @@ bool AArch64DeadRegisterDefinitions::processMachineBasicBlock(
       DEBUG(dbgs() << "    Ignoring, XZR or WZR already used by the instruction\n");
       continue;
     }
-    for (int i = 0, e = MI.getDesc().getNumDefs(); i != e; ++i) {
-      MachineOperand &MO = MI.getOperand(i);
-      if (MO.isReg() && MO.isDead() && MO.isDef()) {
-        assert(!MO.isImplicit() && "Unexpected implicit def!");
-        DEBUG(dbgs() << "  Dead def operand #" << i << " in:\n    ";
-              MI.print(dbgs()));
-        // Be careful not to change the register if it's a tied operand.
-        if (MI.isRegTiedToUseOperand(i)) {
-          DEBUG(dbgs() << "    Ignoring, def is tied operand.\n");
-          continue;
-        }
-        // Don't change the register if there's an implicit def of a subreg or
-        // superreg.
-        if (implicitlyDefinesOverlappingReg(MO.getReg(), MI)) {
-          DEBUG(dbgs() << "    Ignoring, implicitly defines overlap reg.\n");
-          continue;
-        }
-        // Make sure the instruction take a register class that contains
-        // the zero register and replace it if so.
-        unsigned NewReg;
-        switch (MI.getDesc().OpInfo[i].RegClass) {
-        default:
-          DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
-          continue;
-        case AArch64::GPR32RegClassID:
-          NewReg = AArch64::WZR;
-          break;
-        case AArch64::GPR64RegClassID:
-          NewReg = AArch64::XZR;
-          break;
-        }
-        DEBUG(dbgs() << "    Replacing with zero register. New:\n      ");
-        MO.setReg(NewReg);
-        DEBUG(MI.print(dbgs()));
-        ++NumDeadDefsReplaced;
-        // Only replace one dead register, see check for zero register above.
-        break;
+    const MCInstrDesc &Desc = MI.getDesc();
+    for (int I = 0, E = Desc.getNumDefs(); I != E; ++I) {
+      MachineOperand &MO = MI.getOperand(I);
+      if (!MO.isReg() || !MO.isDef())
+        continue;
+      // We should not have any relevant physreg defs that are replacable by
+      // zero before register allocation. So we just check for dead vreg defs.
+      unsigned Reg = MO.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+          (!MO.isDead() && !MRI->use_nodbg_empty(Reg)))
+        continue;
+      assert(!MO.isImplicit() && "Unexpected implicit def!");
+      DEBUG(dbgs() << "  Dead def operand #" << I << " in:\n    ";
+            MI.print(dbgs()));
+      // Be careful not to change the register if it's a tied operand.
+      if (MI.isRegTiedToUseOperand(I)) {
+        DEBUG(dbgs() << "    Ignoring, def is tied operand.\n");
+        continue;
+      }
+      const TargetRegisterClass *RC = TII->getRegClass(Desc, I, TRI, MF);
+      unsigned NewReg;
+      if (RC == nullptr) {
+        DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
+        continue;
+      } else if (RC->contains(AArch64::WZR))
+        NewReg = AArch64::WZR;
+      else if (RC->contains(AArch64::XZR))
+        NewReg = AArch64::XZR;
+      else {
+        DEBUG(dbgs() << "    Ignoring, register is not a GPR.\n");
+        continue;
       }
+      DEBUG(dbgs() << "    Replacing with zero register. New:\n      ");
+      MO.setReg(NewReg);
+      MO.setIsDead();
+      DEBUG(MI.print(dbgs()));
+      ++NumDeadDefsReplaced;
+      Changed = true;
+      // Only replace one dead register, see check for zero register above.
+      break;
     }
   }
-  return Changed;
 }
 
 // Scan the function for instructions that have a dead definition of a
 // register. Replace that register with the zero register when possible.
 bool AArch64DeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
-  TRI = MF.getSubtarget().getRegisterInfo();
-  bool Changed = false;
-  DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
-
   if (skipFunction(*MF.getFunction()))
     return false;
 
+  TRI = MF.getSubtarget().getRegisterInfo();
+  TII = MF.getSubtarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+  DEBUG(dbgs() << "***** AArch64DeadRegisterDefinitions *****\n");
+  Changed = false;
   for (auto &MBB : MF)
-    if (processMachineBasicBlock(MBB))
-      Changed = true;
+    processMachineBasicBlock(MBB);
   return Changed;
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
index a1c98251cec4..fe1c0beee0eb 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp
@@ -23,10 +23,6 @@
 #include "llvm/Support/MathExtras.h"
 using namespace llvm;
 
-namespace llvm {
-void initializeAArch64ExpandPseudoPass(PassRegistry &);
-}
-
 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
 
 namespace {
@@ -41,9 +37,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
-  const char *getPassName() const override {
-    return AARCH64_EXPAND_PSEUDO_NAME;
-  }
+  StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
 
 private:
   bool expandMBB(MachineBasicBlock &MBB);
@@ -719,19 +713,19 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
       .addOperand(DesiredLo)
       .addImm(0);
   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
-    .addReg(AArch64::WZR)
-    .addReg(AArch64::WZR)
+    .addUse(AArch64::WZR)
+    .addUse(AArch64::WZR)
     .addImm(AArch64CC::EQ);
   BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
       .addOperand(DesiredHi)
       .addImm(0);
   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CSINCWr), StatusReg)
-      .addReg(StatusReg, RegState::Kill)
-      .addReg(StatusReg, RegState::Kill)
+      .addUse(StatusReg, RegState::Kill)
+      .addUse(StatusReg, RegState::Kill)
       .addImm(AArch64CC::EQ);
   BuildMI(LoadCmpBB, DL, TII->get(AArch64::CBNZW))
-      .addReg(StatusReg, RegState::Kill)
+      .addUse(StatusReg, RegState::Kill)
       .addMBB(DoneBB);
   LoadCmpBB->addSuccessor(DoneBB);
   LoadCmpBB->addSuccessor(StoreBB);
@@ -903,9 +897,14 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
   case AArch64::MOVi64imm:
     return expandMOVImm(MBB, MBBI, 64);
   case AArch64::RET_ReallyLR: {
+    // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
+    // function and missing live-ins. We are fine in practice because callee
+    // saved register handling ensures the register value is restored before
+    // RET, but we need the undef flag here to appease the MachineVerifier
+    // liveness checks.
     MachineInstrBuilder MIB =
         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
-          .addReg(AArch64::LR);
+          .addReg(AArch64::LR, RegState::Undef);
     transferImpOps(MI, MIB, MIB);
     MI.eraseFromParent();
     return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index e2ab7ab79be1..fe2c2d4550a7 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -134,6 +134,7 @@ private:
   bool selectFRem(const Instruction *I);
   bool selectSDiv(const Instruction *I);
   bool selectGetElementPtr(const Instruction *I);
+  bool selectAtomicCmpXchg(const AtomicCmpXchgInst *I);
 
   // Utility helper routines.
   bool isTypeLegal(Type *Ty, MVT &VT);
@@ -185,6 +186,8 @@ private:
                     MachineMemOperand *MMO = nullptr);
   bool emitStore(MVT VT, unsigned SrcReg, Address Addr,
                  MachineMemOperand *MMO = nullptr);
+  bool emitStoreRelease(MVT VT, unsigned SrcReg, unsigned AddrReg,
+                        MachineMemOperand *MMO = nullptr);
   unsigned emitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt);
   unsigned emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt);
   unsigned emitAdd(MVT RetVT, const Value *LHS, const Value *RHS,
@@ -554,7 +557,7 @@ bool AArch64FastISel::computeAddress(const Value *Obj, Address &Addr, Type *Ty)
     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
          GTI != E; ++GTI) {
       const Value *Op = GTI.getOperand();
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
@@ -1997,6 +2000,28 @@ bool AArch64FastISel::selectLoad(const Instruction *I) {
   return true;
 }
 
+bool AArch64FastISel::emitStoreRelease(MVT VT, unsigned SrcReg,
+                                       unsigned AddrReg,
+                                       MachineMemOperand *MMO) {
+  unsigned Opc;
+  switch (VT.SimpleTy) {
+  default: return false;
+  case MVT::i8:  Opc = AArch64::STLRB; break;
+  case MVT::i16: Opc = AArch64::STLRH; break;
+  case MVT::i32: Opc = AArch64::STLRW; break;
+  case MVT::i64: Opc = AArch64::STLRX; break;
+  }
+
+  const MCInstrDesc &II = TII.get(Opc);
+  SrcReg = constrainOperandRegClass(II, SrcReg, 0);
+  AddrReg = constrainOperandRegClass(II, AddrReg, 1);
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+      .addReg(SrcReg)
+      .addReg(AddrReg)
+      .addMemOperand(MMO);
+  return true;
+}
+
 bool AArch64FastISel::emitStore(MVT VT, unsigned SrcReg, Address Addr,
                                 MachineMemOperand *MMO) {
   if (!TLI.allowsMisalignedMemoryAccesses(VT))
@@ -2071,8 +2096,7 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
   // Verify we have a legal type before going any further.  Currently, we handle
   // simple types that will directly fit in a register (i32/f32/i64/f64) or
   // those that can be sign or zero-extended to a basic operation (i1/i8/i16).
-  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true) ||
-      cast<StoreInst>(I)->isAtomic())
+  if (!isTypeSupported(Op0->getType(), VT, /*IsVectorAllowed=*/true))
     return false;
 
   const Value *PtrV = I->getOperand(1);
@@ -2109,9 +2133,23 @@ bool AArch64FastISel::selectStore(const Instruction *I) {
   if (!SrcReg)
     return false;
 
+  auto *SI = cast<StoreInst>(I);
+
+  // Try to emit a STLR for seq_cst/release.
+  if (SI->isAtomic()) {
+    AtomicOrdering Ord = SI->getOrdering();
+    // The non-atomic instructions are sufficient for relaxed stores.
+    if (isReleaseOrStronger(Ord)) {
+      // The STLR addressing mode only supports a base reg; pass that directly.
+      unsigned AddrReg = getRegForValue(PtrV);
+      return emitStoreRelease(VT, SrcReg, AddrReg,
+                              createMachineMemOperandFor(I));
+    }
+  }
+
   // See if we can handle this address.
   Address Addr;
-  if (!computeAddress(I->getOperand(1), Addr, I->getOperand(0)->getType()))
+  if (!computeAddress(PtrV, Addr, Op0->getType()))
     return false;
 
   if (!emitStore(VT, SrcReg, Addr, createMachineMemOperandFor(I)))
@@ -2822,7 +2860,7 @@ bool AArch64FastISel::fastLowerArguments() {
     return false;
 
   CallingConv::ID CC = F->getCallingConv();
-  if (CC != CallingConv::C)
+  if (CC != CallingConv::C && CC != CallingConv::Swift)
     return false;
 
   // Only handle simple cases of up to 8 GPR and FPR each.
@@ -3328,8 +3366,8 @@ bool AArch64FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
   switch (II->getIntrinsicID()) {
   default: return false;
   case Intrinsic::frameaddress: {
-    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
-    MFI->setFrameAddressIsTaken(true);
+    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+    MFI.setFrameAddressIsTaken(true);
 
     const AArch64RegisterInfo *RegInfo =
         static_cast<const AArch64RegisterInfo *>(Subtarget->getRegisterInfo());
@@ -4847,7 +4885,7 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
   for (gep_type_iterator GTI = gep_type_begin(I), E = gep_type_end(I);
        GTI != E; ++GTI) {
     const Value *Idx = GTI.getOperand();
-    if (auto *StTy = dyn_cast<StructType>(*GTI)) {
+    if (auto *StTy = GTI.getStructTypeOrNull()) {
       unsigned Field = cast<ConstantInt>(Idx)->getZExtValue();
       // N = N + Offset
       if (Field)
@@ -4903,6 +4941,73 @@ bool AArch64FastISel::selectGetElementPtr(const Instruction *I) {
   return true;
 }
 
+bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) {
+  assert(TM.getOptLevel() == CodeGenOpt::None &&
+         "cmpxchg survived AtomicExpand at optlevel > -O0");
+
+  auto *RetPairTy = cast<StructType>(I->getType());
+  Type *RetTy = RetPairTy->getTypeAtIndex(0U);
+  assert(RetPairTy->getTypeAtIndex(1U)->isIntegerTy(1) &&
+         "cmpxchg has a non-i1 status result");
+
+  MVT VT;
+  if (!isTypeLegal(RetTy, VT))
+    return false;
+
+  const TargetRegisterClass *ResRC;
+  unsigned Opc, CmpOpc;
+  // This only supports i32/i64, because i8/i16 aren't legal, and the generic
+  // extractvalue selection doesn't support that.
+  if (VT == MVT::i32) {
+    Opc = AArch64::CMP_SWAP_32;
+    CmpOpc = AArch64::SUBSWrs;
+    ResRC = &AArch64::GPR32RegClass;
+  } else if (VT == MVT::i64) {
+    Opc = AArch64::CMP_SWAP_64;
+    CmpOpc = AArch64::SUBSXrs;
+    ResRC = &AArch64::GPR64RegClass;
+  } else {
+    return false;
+  }
+
+  const MCInstrDesc &II = TII.get(Opc);
+
+  const unsigned AddrReg = constrainOperandRegClass(
+      II, getRegForValue(I->getPointerOperand()), II.getNumDefs());
+  const unsigned DesiredReg = constrainOperandRegClass(
+      II, getRegForValue(I->getCompareOperand()), II.getNumDefs() + 1);
+  const unsigned NewReg = constrainOperandRegClass(
+      II, getRegForValue(I->getNewValOperand()), II.getNumDefs() + 2);
+
+  const unsigned ResultReg1 = createResultReg(ResRC);
+  const unsigned ResultReg2 = createResultReg(&AArch64::GPR32RegClass);
+  const unsigned ScratchReg = createResultReg(&AArch64::GPR32RegClass);
+
+  // FIXME: MachineMemOperand doesn't support cmpxchg yet.
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+      .addDef(ResultReg1)
+      .addDef(ScratchReg)
+      .addUse(AddrReg)
+      .addUse(DesiredReg)
+      .addUse(NewReg);
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CmpOpc))
+      .addDef(VT == MVT::i32 ? AArch64::WZR : AArch64::XZR)
+      .addUse(ResultReg1)
+      .addUse(DesiredReg)
+      .addImm(0);
+
+  BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::CSINCWr))
+      .addDef(ResultReg2)
+      .addUse(AArch64::WZR)
+      .addUse(AArch64::WZR)
+      .addImm(AArch64CC::NE);
+
+  assert((ResultReg1 + 1) == ResultReg2 && "Nonconsecutive result registers.");
+  updateValueMap(I, ResultReg1, 2);
+  return true;
+}
+
 bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
   switch (I->getOpcode()) {
   default:
@@ -4976,6 +5081,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
     return selectFRem(I);
   case Instruction::GetElementPtr:
     return selectGetElementPtr(I);
+  case Instruction::AtomicCmpXchg:
+    return selectAtomicCmpXchg(cast<AtomicCmpXchgInst>(I));
   }
 
   // fall-back to target-independent instruction selection.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 82111e5c7259..f5b8c35375f8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -124,23 +124,23 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
   if (MF.getFunction()->hasFnAttribute(Attribute::NoRedZone))
     return false;
 
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   unsigned NumBytes = AFI->getLocalStackSize();
 
-  return !(MFI->hasCalls() || hasFP(MF) || NumBytes > 128);
+  return !(MFI.hasCalls() || hasFP(MF) || NumBytes > 128);
 }
 
 /// hasFP - Return true if the specified function should have a dedicated frame
 /// pointer register.
 bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
   // Retain behavior of always omitting the FP for leaf functions when possible.
-  return (MFI->hasCalls() &&
+  return (MFI.hasCalls() &&
           MF.getTarget().Options.DisableFramePointerElim(MF)) ||
-         MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
-         MFI->hasStackMap() || MFI->hasPatchPoint() ||
+         MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
+         MFI.hasStackMap() || MFI.hasPatchPoint() ||
          RegInfo->needsStackRealignment(MF);
 }
 
@@ -151,7 +151,7 @@ bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
 /// included as part of the stack frame.
 bool
 AArch64FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MF.getFrameInfo().hasVarSizedObjects();
 }
 
 MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
@@ -203,23 +203,23 @@ MachineBasicBlock::iterator AArch64FrameLowering::eliminateCallFramePseudoInstr(
 void AArch64FrameLowering::emitCalleeSavedFrameMoves(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MachineModuleInfo &MMI = MF.getMMI();
-  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const MCRegisterInfo *MRI = STI.getRegisterInfo();
+  const TargetInstrInfo *TII = STI.getInstrInfo();
   DebugLoc DL = MBB.findDebugLoc(MBBI);
 
   // Add callee saved registers to move list.
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   if (CSI.empty())
     return;
 
   for (const auto &Info : CSI) {
     unsigned Reg = Info.getReg();
     int64_t Offset =
-        MFI->getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
+        MFI.getObjectOffset(Info.getFrameIdx()) - getOffsetOfLocalArea();
     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-    unsigned CFIIndex = MMI.addFrameInst(
+    unsigned CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
     BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex)
@@ -286,7 +286,7 @@ bool AArch64FrameLowering::canUseAsPrologue(
 bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
     MachineFunction &MF, unsigned StackBumpBytes) const {
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
 
@@ -298,7 +298,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
   if (StackBumpBytes >= 512)
     return false;
 
-  if (MFI->hasVarSizedObjects())
+  if (MFI.hasVarSizedObjects())
     return false;
 
   if (RegInfo->needsStackRealignment(MF))
@@ -407,7 +407,7 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
 void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
@@ -426,7 +426,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
   if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
     return;
 
-  int NumBytes = (int)MFI->getStackSize();
+  int NumBytes = (int)MFI.getStackSize();
   if (!AFI->hasStackFrame()) {
     assert(!HasFP && "unexpected function without stack frame but with FP");
 
@@ -446,7 +446,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
       // Label used to tie together the PROLOG_LABEL and the MachineMoves.
       MCSymbol *FrameLabel = MMI.getContext().createTempSymbol();
       // Encode the stack size of the leaf function.
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaOffset(FrameLabel, -NumBytes));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
@@ -513,7 +513,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
                       MachineInstr::FrameSetup);
 
     if (NeedsRealignment) {
-      const unsigned Alignment = MFI->getMaxAlignment();
+      const unsigned Alignment = MFI.getMaxAlignment();
       const unsigned NrBitsToZero = countTrailingZeros(Alignment);
       assert(NrBitsToZero > 1);
       assert(scratchSPReg != AArch64::SP);
@@ -621,15 +621,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
     if (HasFP) {
       // Define the current CFA rule to use the provided FP.
       unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfa(nullptr, Reg, 2 * StackGrowth));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
     } else {
       // Encode the stack size of the leaf function.
-      unsigned CFIIndex = MMI.addFrameInst(
-          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI->getStackSize()));
+      unsigned CFIIndex = MF.addFrameInst(
+          MCCFIInstruction::createDefCfaOffset(nullptr, -MFI.getStackSize()));
       BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
@@ -644,7 +644,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
 void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   DebugLoc DL;
@@ -655,7 +655,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
     IsTailCallReturn = RetOpcode == AArch64::TCRETURNdi ||
       RetOpcode == AArch64::TCRETURNri;
   }
-  int NumBytes = MFI->getStackSize();
+  int NumBytes = MFI.getStackSize();
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
 
   // All calls are tail calls in GHC calling conv, and functions have no
@@ -762,7 +762,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   // FIXME: Rather than doing the math here, we should instead just use
   // non-post-indexed loads for the restores if we aren't actually going to
   // be able to save any instructions.
-  if (MFI->hasVarSizedObjects() || AFI->isStackRealigned())
+  if (MFI.hasVarSizedObjects() || AFI->isStackRealigned())
     emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
                     -CSStackSize + 16, TII, MachineInstr::FrameDestroy);
   else if (NumBytes)
@@ -790,13 +790,13 @@ int AArch64FrameLowering::getFrameIndexReference(const MachineFunction &MF,
 int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
                                                      int FI, unsigned &FrameReg,
                                                      bool PreferFP) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  int FPOffset = MFI->getObjectOffset(FI) + 16;
-  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
-  bool isFixed = MFI->isFixedObjectIndex(FI);
+  int FPOffset = MFI.getObjectOffset(FI) + 16;
+  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
+  bool isFixed = MFI.isFixedObjectIndex(FI);
 
   // Use frame pointer to reference fixed objects. Use it for locals if
   // there are VLAs or a dynamically realigned SP (and thus the SP isn't
@@ -821,7 +821,7 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
       // using the FP regardless, though, as the SP offset is unknown
       // and we don't have a base pointer available. If an offset is
       // available via the FP and the SP, use whichever is closest.
-      if (PreferFP || MFI->hasVarSizedObjects() || FPOffset >= 0 ||
+      if (PreferFP || MFI.hasVarSizedObjects() || FPOffset >= 0 ||
           (FPOffset >= -256 && Offset > -FPOffset))
         UseFP = true;
     }
@@ -869,7 +869,7 @@ static bool produceCompactUnwindFrame(MachineFunction &MF) {
            Attrs.hasAttrSomewhere(Attribute::SwiftError));
 }
 
-
+namespace {
 struct RegPairInfo {
   RegPairInfo() : Reg1(AArch64::NoRegister), Reg2(AArch64::NoRegister) {}
   unsigned Reg1;
@@ -879,6 +879,7 @@ struct RegPairInfo {
   bool IsGPR;
   bool isPaired() const { return Reg2 != AArch64::NoRegister; }
 };
+} // end anonymous namespace
 
 static void computeCalleeSaveRegisterPairs(
     MachineFunction &MF, const std::vector<CalleeSavedInfo> &CSI,
@@ -888,7 +889,7 @@ static void computeCalleeSaveRegisterPairs(
     return;
 
   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   CallingConv::ID CC = MF.getFunction()->getCallingConv();
   unsigned Count = CSI.size();
   (void)CC;
@@ -941,8 +942,8 @@ static void computeCalleeSaveRegisterPairs(
       // Round up size of non-pair to pair size if we need to pad the
       // callee-save area to ensure 16-byte alignment.
       Offset -= 16;
-      assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
-      MFI->setObjectAlignment(RPI.FrameIdx, 16);
+      assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
+      MFI.setObjectAlignment(RPI.FrameIdx, 16);
       AFI->setCalleeSaveStackHasFreeSpace(true);
     } else
       Offset -= RPI.isPaired() ? 16 : 8;
@@ -1149,8 +1150,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
   // realistically that's not a big deal at this stage of the game.
   // The CSR spill slots have not been allocated yet, so estimateStackSize
   // won't include them.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned CFSize = MFI->estimateStackSize(MF) + 8 * NumRegsSpilled;
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
   DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
   bool BigStack = (CFSize >= 256);
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
@@ -1180,7 +1181,7 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
     // an emergency spill slot.
     if (!ExtraCSSpill) {
       const TargetRegisterClass *RC = &AArch64::GPR64RegClass;
-      int FI = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), false);
+      int FI = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), false);
       RS->addScavengingFrameIndex(FI);
       DEBUG(dbgs() << "No available CS registers, allocated fi#" << FI
                    << " as the emergency spill slot.\n");
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def b/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
new file mode 100644
index 000000000000..e927d58ad612
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64GenRegisterBankInfo.def
@@ -0,0 +1,173 @@
+//===- AArch64GenRegisterBankInfo.def ----------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines all the static objects used by AArch64RegisterBankInfo.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+namespace llvm {
+namespace AArch64 {
+
+RegisterBank GPRRegBank;
+RegisterBank FPRRegBank;
+RegisterBank CCRRegBank;
+
+RegisterBank *RegBanks[] = {&GPRRegBank, &FPRRegBank, &CCRRegBank};
+
+// PartialMappings.
+enum PartialMappingIdx {
+  PMI_None = -1,
+  PMI_GPR32 = 1,
+  PMI_GPR64,
+  PMI_FPR32,
+  PMI_FPR64,
+  PMI_FPR128,
+  PMI_FPR256,
+  PMI_FPR512,
+  PMI_FirstGPR = PMI_GPR32,
+  PMI_LastGPR = PMI_GPR64,
+  PMI_FirstFPR = PMI_FPR32,
+  PMI_LastFPR = PMI_FPR512,
+  PMI_Min = PMI_FirstGPR,
+};
+
+static unsigned getRegBankBaseIdxOffset(unsigned Size) {
+  assert(Size && "0-sized type!!");
+  // Make anything smaller than 32 gets 32
+  Size = ((Size + 31) / 32) * 32;
+  // 32 is 0, 64 is 1, 128 is 2, and so on.
+  return Log2_32(Size) - /*Log2_32(32)=*/ 5;
+}
+
+RegisterBankInfo::PartialMapping PartMappings[] {
+  /* StartIdx, Length, RegBank */
+  // 0: GPR 32-bit value.
+  {0, 32, GPRRegBank},
+  // 1: GPR 64-bit value.
+  {0, 64, GPRRegBank},
+  // 2: FPR 32-bit value.
+  {0, 32, FPRRegBank},
+  // 3: FPR 64-bit value.
+  {0, 64, FPRRegBank},
+  // 4: FPR 128-bit value.
+  {0, 128, FPRRegBank},
+  // 5: FPR 256-bit value.
+  {0, 256, FPRRegBank},
+  // 6: FPR 512-bit value.
+  {0, 512, FPRRegBank}
+};
+
+enum ValueMappingIdx {
+  First3OpsIdx = 0,
+  Last3OpsIdx = 18,
+  DistanceBetweenRegBanks = 3,
+  FirstCrossRegCpyIdx = 21,
+  LastCrossRegCpyIdx = 27,
+  DistanceBetweenCrossRegCpy = 2
+};
+
+// ValueMappings.
+RegisterBankInfo::ValueMapping ValMappings[]{
+    /* BreakDown, NumBreakDowns */
+    // 3-operands instructions (all binary operations should end up with one of
+    // those mapping).
+    // 0: GPR 32-bit value. <-- This must match First3OpsIdx.
+    {&PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 3: GPR 64-bit value.
+    {&PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&PartMappings[PMI_GPR64 - PMI_Min], 1},
+    // 6: FPR 32-bit value.
+    {&PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 9: FPR 64-bit value.
+    {&PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 12: FPR 128-bit value.
+    {&PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR128 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR128 - PMI_Min], 1},
+    // 15: FPR 256-bit value.
+    {&PartMappings[PMI_FPR256 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR256 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR256 - PMI_Min], 1},
+    // 18: FPR 512-bit value. <-- This must match Last3OpsIdx.
+    {&PartMappings[PMI_FPR512 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR512 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR512 - PMI_Min], 1},
+    // Cross register bank copies.
+    // 21: GPR 32-bit value to FPR 32-bit value. <-- This must match
+    //                                               FirstCrossRegCpyIdx.
+    {&PartMappings[PMI_GPR32 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR32 - PMI_Min], 1},
+    // 23: GPR 64-bit value to FPR 64-bit value.
+    {&PartMappings[PMI_GPR64 - PMI_Min], 1},
+    {&PartMappings[PMI_FPR64 - PMI_Min], 1},
+    // 25: FPR 32-bit value to GPR 32-bit value.
+    {&PartMappings[PMI_FPR32 - PMI_Min], 1},
+    {&PartMappings[PMI_GPR32 - PMI_Min], 1},
+    // 27: FPR 64-bit value to GPR 64-bit value. <-- This must match
+    //                                               LastCrossRegCpyIdx.
+    {&PartMappings[PMI_FPR64 - PMI_Min], 1},
+    {&PartMappings[PMI_GPR64 - PMI_Min], 1}
+};
+
+/// Get the pointer to the ValueMapping representing the RegisterBank
+/// at \p RBIdx with a size of \p Size.
+///
+/// The returned mapping works for instructions with the same kind of
+/// operands for up to 3 operands.
+///
+/// \pre \p RBIdx != PartialMappingIdx::None
+const RegisterBankInfo::ValueMapping *
+getValueMapping(PartialMappingIdx RBIdx, unsigned Size) {
+  assert(RBIdx != PartialMappingIdx::PMI_None && "No mapping needed for that");
+  unsigned ValMappingIdx = First3OpsIdx +
+                           (RBIdx - AArch64::PartialMappingIdx::PMI_Min +
+                            getRegBankBaseIdxOffset(Size)) *
+                               ValueMappingIdx::DistanceBetweenRegBanks;
+  assert(ValMappingIdx >= AArch64::First3OpsIdx &&
+         ValMappingIdx <= AArch64::Last3OpsIdx && "Mapping out of bound");
+
+  return &ValMappings[ValMappingIdx];
+}
+
+/// Get the pointer to the ValueMapping of the operands of a copy
+/// instruction from a GPR or FPR register to a GPR or FPR register
+/// with a size of \p Size.
+///
+/// If \p DstIsGPR is true, the destination of the copy is on GPR,
+/// otherwise it is on FPR. Same thing for \p SrcIsGPR.
+const RegisterBankInfo::ValueMapping *
+getCopyMapping(bool DstIsGPR, bool SrcIsGPR, unsigned Size) {
+  PartialMappingIdx DstRBIdx = DstIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
+  PartialMappingIdx SrcRBIdx = SrcIsGPR ? PMI_FirstGPR : PMI_FirstFPR;
+  if (DstRBIdx == SrcRBIdx)
+    return getValueMapping(DstRBIdx, Size);
+  assert(Size <= 64 && "GPR cannot handle that size");
+  unsigned ValMappingIdx =
+      FirstCrossRegCpyIdx +
+      (DstRBIdx - PMI_Min + getRegBankBaseIdxOffset(Size)) *
+          ValueMappingIdx::DistanceBetweenCrossRegCpy;
+  assert(ValMappingIdx >= AArch64::FirstCrossRegCpyIdx &&
+         ValMappingIdx <= AArch64::LastCrossRegCpyIdx &&
+         "Mapping out of bound");
+  return &ValMappings[ValMappingIdx];
+}
+
+} // End AArch64 namespace.
+} // End llvm namespace.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 8d649250f656..3099383e5b32 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -47,7 +47,7 @@ public:
       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
         ForCodeSize(false) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AArch64 Instruction Selection";
   }
 
@@ -349,7 +349,7 @@ bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
     return false;
 
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
-    unsigned BitSize = N.getValueType().getSizeInBits();
+    unsigned BitSize = N.getValueSizeInBits();
     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
 
@@ -586,6 +586,11 @@ bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
       return false;
 
     Reg = N.getOperand(0);
+
+    // Don't match if free 32-bit -> 64-bit zext can be used instead.
+    if (Ext == AArch64_AM::UXTW &&
+        Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
+      return false;
   }
 
   // AArch64 mandates that the RHS of the operation must use the smallest
@@ -1149,6 +1154,12 @@ void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
 
   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
+
+  // Transfer memoperands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(Ld)->setMemRefs(MemOp, MemOp + 1);
+
   CurDAG->RemoveDeadNode(N);
 }
 
@@ -1197,6 +1208,11 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
 
+  // Transfer memoperands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemIntrinsicSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(St)->setMemRefs(MemOp, MemOp + 1);
+
   ReplaceNode(N, St);
 }
 
@@ -1266,7 +1282,7 @@ void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
 
   if (Narrow)
-    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+    transform(Regs, Regs.begin(),
                    WidenVector(*CurDAG));
 
   SDValue RegSeq = createQTuple(Regs);
@@ -1305,7 +1321,7 @@ void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
 
   if (Narrow)
-    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+    transform(Regs, Regs.begin(),
                    WidenVector(*CurDAG));
 
   SDValue RegSeq = createQTuple(Regs);
@@ -1360,7 +1376,7 @@ void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
 
   if (Narrow)
-    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+    transform(Regs, Regs.begin(),
                    WidenVector(*CurDAG));
 
   SDValue RegSeq = createQTuple(Regs);
@@ -1390,7 +1406,7 @@ void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
 
   if (Narrow)
-    std::transform(Regs.begin(), Regs.end(), Regs.begin(),
+    transform(Regs, Regs.begin(),
                    WidenVector(*CurDAG));
 
   SDValue RegSeq = createQTuple(Regs);
@@ -1859,23 +1875,52 @@ static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
   uint64_t MSB =
       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
 
-  if (Op.getOperand(1) == Orig)
-    return getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
-
   APInt OpUsefulBits(UsefulBits);
   OpUsefulBits = 1;
 
+  APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
+  ResultUsefulBits.flipAllBits();
+  APInt Mask(UsefulBits.getBitWidth(), 0);
+
+  getUsefulBits(Op, ResultUsefulBits, Depth + 1);
+
   if (MSB >= Imm) {
-    OpUsefulBits = OpUsefulBits.shl(MSB - Imm + 1);
+    // The instruction is a BFXIL.
+    uint64_t Width = MSB - Imm + 1;
+    uint64_t LSB = Imm;
+
+    OpUsefulBits = OpUsefulBits.shl(Width);
     --OpUsefulBits;
-    UsefulBits &= ~OpUsefulBits;
-    getUsefulBits(Op, UsefulBits, Depth + 1);
+
+    if (Op.getOperand(1) == Orig) {
+      // Copy the low bits from the result to bits starting from LSB.
+      Mask = ResultUsefulBits & OpUsefulBits;
+      Mask = Mask.shl(LSB);
+    }
+
+    if (Op.getOperand(0) == Orig)
+      // Bits starting from LSB in the input contribute to the result.
+      Mask |= (ResultUsefulBits & ~OpUsefulBits);
   } else {
-    OpUsefulBits = OpUsefulBits.shl(MSB + 1);
+    // The instruction is a BFI.
+    uint64_t Width = MSB + 1;
+    uint64_t LSB = UsefulBits.getBitWidth() - Imm;
+
+    OpUsefulBits = OpUsefulBits.shl(Width);
     --OpUsefulBits;
-    UsefulBits = ~(OpUsefulBits.shl(OpUsefulBits.getBitWidth() - Imm));
-    getUsefulBits(Op, UsefulBits, Depth + 1);
+    OpUsefulBits = OpUsefulBits.shl(LSB);
+
+    if (Op.getOperand(1) == Orig) {
+      // Copy the bits from the result to the zero bits.
+      Mask = ResultUsefulBits & OpUsefulBits;
+      Mask = Mask.lshr(LSB);
+    }
+
+    if (Op.getOperand(0) == Orig)
+      Mask |= (ResultUsefulBits & ~OpUsefulBits);
   }
+
+  UsefulBits &= Mask;
 }
 
 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
@@ -1931,7 +1976,7 @@ static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
     return;
   // Initialize UsefulBits
   if (!Depth) {
-    unsigned Bitwidth = Op.getValueType().getScalarType().getSizeInBits();
+    unsigned Bitwidth = Op.getScalarValueSizeInBits();
     // At the beginning, assume every produced bits is useful
     UsefulBits = APInt(Bitwidth, 0);
     UsefulBits.flipAllBits();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 06bfe340e758..4c98253878e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -53,13 +53,6 @@ cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration(
     cl::desc("Allow AArch64 Local Dynamic TLS code generation"),
     cl::init(false));
 
-// Disabled for causing self-hosting failures once returned-attribute inference
-// was enabled.
-static cl::opt<bool>
-EnableThisRetForwarding("aarch64-this-return-forwarding", cl::Hidden,
-                        cl::desc("Directly forward this return"),
-                        cl::init(false));
-
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
 
@@ -520,6 +513,12 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   setPrefFunctionAlignment(STI.getPrefFunctionAlignment());
   setPrefLoopAlignment(STI.getPrefLoopAlignment());
 
+  // Only change the limit for entries in a jump table if specified by
+  // the subtarget, but not at the command line.
+  unsigned MaxJT = STI.getMaximumJumpTableSize();
+  if (MaxJT && getMaximumJumpTableSize() == 0)
+    setMaximumJumpTableSize(MaxJT);
+
   setHasExtractBitsInsn(true);
 
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
@@ -764,7 +763,7 @@ void AArch64TargetLowering::computeKnownBitsForTargetNode(
     case Intrinsic::aarch64_ldxr: {
       unsigned BitWidth = KnownOne.getBitWidth();
       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
-      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      unsigned MemBits = VT.getScalarSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
       return;
     }
@@ -960,8 +959,10 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::ST4LANEpost:       return "AArch64ISD::ST4LANEpost";
   case AArch64ISD::SMULL:             return "AArch64ISD::SMULL";
   case AArch64ISD::UMULL:             return "AArch64ISD::UMULL";
-  case AArch64ISD::FRSQRTE:           return "AArch64ISD::FRSQRTE";
   case AArch64ISD::FRECPE:            return "AArch64ISD::FRECPE";
+  case AArch64ISD::FRECPS:            return "AArch64ISD::FRECPS";
+  case AArch64ISD::FRSQRTE:           return "AArch64ISD::FRSQRTE";
+  case AArch64ISD::FRSQRTS:           return "AArch64ISD::FRSQRTS";
   }
   return nullptr;
 }
@@ -1186,7 +1187,8 @@ static void changeVectorFPCCToAArch64CC(ISD::CondCode CC,
     changeFPCCToAArch64CC(CC, CondCode, CondCode2);
     break;
   case ISD::SETUO:
-    Invert = true; // Fallthrough
+    Invert = true;
+    LLVM_FALLTHROUGH;
   case ISD::SETO:
     CondCode = AArch64CC::MI;
     CondCode2 = AArch64CC::GE;
@@ -2136,7 +2138,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
 
   for (const SDValue &Elt : N->op_values()) {
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
-      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      unsigned EltSize = VT.getScalarSizeInBits();
       unsigned HalfSize = EltSize / 2;
       if (isSigned) {
         if (!isIntN(HalfSize, C->getSExtValue()))
@@ -2163,7 +2165,7 @@ static SDValue skipExtensionForVectorMULL(SDNode *N, SelectionDAG &DAG) {
   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
   EVT VT = N->getValueType(0);
   SDLoc dl(N);
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+  unsigned EltSize = VT.getScalarSizeInBits() / 2;
   unsigned NumElts = VT.getVectorNumElements();
   MVT TruncVT = MVT::getIntegerVT(EltSize);
   SmallVector<SDValue, 8> Ops;
@@ -2435,18 +2437,25 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
   case CallingConv::Fast:
   case CallingConv::PreserveMost:
   case CallingConv::CXX_FAST_TLS:
+  case CallingConv::Swift:
     if (!Subtarget->isTargetDarwin())
       return CC_AArch64_AAPCS;
     return IsVarArg ? CC_AArch64_DarwinPCS_VarArg : CC_AArch64_DarwinPCS;
   }
 }
 
+CCAssignFn *
+AArch64TargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
+  return CC == CallingConv::WebKit_JS ? RetCC_AArch64_WebKit_JS
+                                      : RetCC_AArch64_AAPCS;
+}
+
 SDValue AArch64TargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Assign locations to all of the incoming arguments.
   SmallVector<CCValAssign, 16> ArgLocs;
@@ -2499,7 +2508,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
       // FIXME: This works on big-endian for composite byvals, which are the common
       // case. It should also work for fundamental types too.
       unsigned FrameIdx =
-        MFI->CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
+        MFI.CreateFixedObject(8 * NumRegs, VA.getLocMemOffset(), false);
       SDValue FrameIdxN = DAG.getFrameIndex(FrameIdx, PtrVT);
       InVals.push_back(FrameIdxN);
 
@@ -2564,7 +2573,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
           !Ins[i].Flags.isInConsecutiveRegs())
         BEAlign = 8 - ArgSize;
 
-      int FI = MFI->CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
+      int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
@@ -2614,7 +2623,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
     unsigned StackOffset = CCInfo.getNextStackOffset();
     // We currently pass all varargs at 8-byte alignment.
     StackOffset = ((StackOffset + 7) & ~7);
-    FuncInfo->setVarArgsStackIndex(MFI->CreateFixedObject(4, StackOffset, true));
+    FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
   }
 
   unsigned StackArgSize = CCInfo.getNextStackOffset();
@@ -2645,7 +2654,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
                                                 const SDLoc &DL,
                                                 SDValue &Chain) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
@@ -2660,7 +2669,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
   unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
   int GPRIdx = 0;
   if (GPRSaveSize != 0) {
-    GPRIdx = MFI->CreateStackObject(GPRSaveSize, 8, false);
+    GPRIdx = MFI.CreateStackObject(GPRSaveSize, 8, false);
 
     SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
 
@@ -2688,7 +2697,7 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
     unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR);
     int FPRIdx = 0;
     if (FPRSaveSize != 0) {
-      FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false);
+      FPRIdx = MFI.CreateStackObject(FPRSaveSize, 16, false);
 
       SDValue FIN = DAG.getFrameIndex(FPRIdx, PtrVT);
 
@@ -2735,7 +2744,7 @@ SDValue AArch64TargetLowering::LowerCallResult(
 
     // Pass 'this' value directly from the argument to return value, to avoid
     // reg unit interference
-    if (i == 0 && isThisReturn && EnableThisRetForwarding) {
+    if (i == 0 && isThisReturn) {
       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i64 &&
              "unexpected return calling convention register assignment");
       InVals.push_back(ThisVal);
@@ -2763,15 +2772,29 @@ SDValue AArch64TargetLowering::LowerCallResult(
   return Chain;
 }
 
+/// Return true if the calling convention is one that we can guarantee TCO for.
+static bool canGuaranteeTCO(CallingConv::ID CC) {
+  return CC == CallingConv::Fast;
+}
+
+/// Return true if we might ever do TCO for calls with this calling convention.
+static bool mayTailCallThisCC(CallingConv::ID CC) {
+  switch (CC) {
+  case CallingConv::C:
+  case CallingConv::PreserveMost:
+  case CallingConv::Swift:
+    return true;
+  default:
+    return canGuaranteeTCO(CC);
+  }
+}
+
 bool AArch64TargetLowering::isEligibleForTailCallOptimization(
     SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
     const SmallVectorImpl<ISD::OutputArg> &Outs,
     const SmallVectorImpl<SDValue> &OutVals,
     const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
-  // For CallingConv::C this function knows whether the ABI needs
-  // changing. That's not true for other conventions so they will have to opt in
-  // manually.
-  if (!IsTailCallConvention(CalleeCC) && CalleeCC != CallingConv::C)
+  if (!mayTailCallThisCC(CalleeCC))
     return false;
 
   MachineFunction &MF = DAG.getMachineFunction();
@@ -2788,9 +2811,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
     if (i->hasByValAttr())
       return false;
 
-  if (getTargetMachine().Options.GuaranteedTailCallOpt) {
-    return IsTailCallConvention(CalleeCC) && CCMatch;
-  }
+  if (getTargetMachine().Options.GuaranteedTailCallOpt)
+    return canGuaranteeTCO(CalleeCC) && CCMatch;
 
   // Externally-defined functions with weak linkage should not be
   // tail-called on AArch64 when the OS does not support dynamic
@@ -2872,11 +2894,11 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
 
 SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
                                                    SelectionDAG &DAG,
-                                                   MachineFrameInfo *MFI,
+                                                   MachineFrameInfo &MFI,
                                                    int ClobberedFI) const {
   SmallVector<SDValue, 8> ArgChains;
-  int64_t FirstByte = MFI->getObjectOffset(ClobberedFI);
-  int64_t LastByte = FirstByte + MFI->getObjectSize(ClobberedFI) - 1;
+  int64_t FirstByte = MFI.getObjectOffset(ClobberedFI);
+  int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1;
 
   // Include the original chain at the beginning of the list. When this is
   // used by target LowerCall hooks, this helps legalize find the
@@ -2890,9 +2912,9 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain,
     if (LoadSDNode *L = dyn_cast<LoadSDNode>(*U))
       if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr()))
         if (FI->getIndex() < 0) {
-          int64_t InFirstByte = MFI->getObjectOffset(FI->getIndex());
+          int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex());
           int64_t InLastByte = InFirstByte;
-          InLastByte += MFI->getObjectSize(FI->getIndex()) - 1;
+          InLastByte += MFI.getObjectSize(FI->getIndex()) - 1;
 
           if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) ||
               (FirstByte <= InFirstByte && InFirstByte <= LastByte))
@@ -2908,11 +2930,6 @@ bool AArch64TargetLowering::DoesCalleeRestoreStack(CallingConv::ID CallCC,
   return CallCC == CallingConv::Fast && TailCallOpt;
 }
 
-bool AArch64TargetLowering::IsTailCallConvention(CallingConv::ID CallCC) const {
-  return CallCC == CallingConv::Fast ||
-         CallCC == CallingConv::PreserveMost;
-}
-
 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
 /// and add input and output parameter nodes.
 SDValue
@@ -3119,7 +3136,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
 
       if (IsTailCall) {
         Offset = Offset + FPDiff;
-        int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+        int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
 
         DstAddr = DAG.getFrameIndex(FI, PtrVT);
         DstInfo =
@@ -3253,7 +3270,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
   // If we're doing a tall call, use a TC_RETURN here rather than an
   // actual call instruction.
   if (IsTailCall) {
-    MF.getFrameInfo()->setHasTailCall();
+    MF.getFrameInfo().setHasTailCall();
     return DAG.getNode(AArch64ISD::TC_RETURN, DL, NodeTys, Ops);
   }
 
@@ -3444,15 +3461,16 @@ AArch64TargetLowering::LowerDarwinGlobalTLSAddress(SDValue Op,
   // The first entry in the descriptor is a function pointer that we must call
   // to obtain the address of the variable.
   SDValue Chain = DAG.getEntryNode();
-  SDValue FuncTLVGet =
-      DAG.getLoad(MVT::i64, DL, Chain, DescAddr,
-                  MachinePointerInfo::getGOT(DAG.getMachineFunction()),
-                  /* Alignment = */ 8, MachineMemOperand::MONonTemporal |
-                                           MachineMemOperand::MOInvariant);
+  SDValue FuncTLVGet = DAG.getLoad(
+      MVT::i64, DL, Chain, DescAddr,
+      MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+      /* Alignment = */ 8,
+      MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant |
+          MachineMemOperand::MODereferenceable);
   Chain = FuncTLVGet.getValue(1);
 
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setAdjustsStack(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setAdjustsStack(true);
 
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: X0 (it takes an argument), LR (it's a call) and NZCV (let's not be
@@ -3705,7 +3723,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
         // Don't combine AND since emitComparison converts the AND to an ANDS
         // (a.k.a. TST) and the test in the test bit and branch instruction
         // becomes redundant.  This would also increase register pressure.
-        uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
+        uint64_t Mask = LHS.getValueSizeInBits() - 1;
         return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
                            DAG.getConstant(Mask, dl, MVT::i64), Dest);
       }
@@ -3715,7 +3733,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       // Don't combine AND since emitComparison converts the AND to an ANDS
       // (a.k.a. TST) and the test in the test bit and branch instruction
       // becomes redundant.  This would also increase register pressure.
-      uint64_t Mask = LHS.getValueType().getSizeInBits() - 1;
+      uint64_t Mask = LHS.getValueSizeInBits() - 1;
       return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
                          DAG.getConstant(Mask, dl, MVT::i64), Dest);
     }
@@ -4036,6 +4054,33 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
       }
     }
 
+    // Avoid materializing a constant when possible by reusing a known value in
+    // a register.  However, don't perform this optimization if the known value
+    // is one, zero or negative one in the case of a CSEL.  We can always
+    // materialize these values using CSINC, CSEL and CSINV with wzr/xzr as the
+    // FVal, respectively.
+    ConstantSDNode *RHSVal = dyn_cast<ConstantSDNode>(RHS);
+    if (Opcode == AArch64ISD::CSEL && RHSVal && !RHSVal->isOne() &&
+        !RHSVal->isNullValue() && !RHSVal->isAllOnesValue()) {
+      AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+      // Transform "a == C ? C : x" to "a == C ? a : x" and "a != C ? x : C" to
+      // "a != C ? x : a" to avoid materializing C.
+      if (CTVal && CTVal == RHSVal && AArch64CC == AArch64CC::EQ)
+        TVal = LHS;
+      else if (CFVal && CFVal == RHSVal && AArch64CC == AArch64CC::NE)
+        FVal = LHS;
+    } else if (Opcode == AArch64ISD::CSNEG && RHSVal && RHSVal->isOne()) {
+      assert (CTVal && CFVal && "Expected constant operands for CSNEG.");
+      // Use a CSINV to transform "a == C ? 1 : -1" to "a == C ? a : -1" to
+      // avoid materializing C.
+      AArch64CC::CondCode AArch64CC = changeIntCCToAArch64CC(CC);
+      if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
+        Opcode = AArch64ISD::CSINV;
+        TVal = LHS;
+        FVal = DAG.getConstant(0, dl, FVal.getValueType());
+      }
+    }
+
     SDValue CCVal;
     SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
@@ -4053,6 +4098,26 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
   // clean.  Some of them require two CSELs to implement.
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
+
+  if (DAG.getTarget().Options.UnsafeFPMath) {
+    // Transform "a == 0.0 ? 0.0 : x" to "a == 0.0 ? a : x" and
+    // "a != 0.0 ? x : 0.0" to "a != 0.0 ? x : a" to avoid materializing 0.0.
+    ConstantFPSDNode *RHSVal = dyn_cast<ConstantFPSDNode>(RHS);
+    if (RHSVal && RHSVal->isZero()) {
+      ConstantFPSDNode *CFVal = dyn_cast<ConstantFPSDNode>(FVal);
+      ConstantFPSDNode *CTVal = dyn_cast<ConstantFPSDNode>(TVal);
+
+      if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || CC == ISD::SETUEQ) &&
+          CTVal && CTVal->isZero() && TVal.getValueType() == LHS.getValueType())
+        TVal = LHS;
+      else if ((CC == ISD::SETNE || CC == ISD::SETONE || CC == ISD::SETUNE) &&
+               CFVal && CFVal->isZero() &&
+               FVal.getValueType() == LHS.getValueType())
+        FVal = LHS;
+    }
+  }
+
+  // Emit first, and possibly only, CSEL.
   SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
   SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
@@ -4378,8 +4443,8 @@ SDValue AArch64TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op,
                                               SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
@@ -4408,8 +4473,8 @@ unsigned AArch64TargetLowering::getRegisterByName(const char* RegName, EVT VT,
 SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op,
                                                SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
@@ -4559,38 +4624,96 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 //                          AArch64 Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-/// getEstimate - Return the appropriate estimate DAG for either the reciprocal
-/// or the reciprocal square root.
-static SDValue getEstimate(const AArch64Subtarget &ST,
-  const AArch64TargetLowering::DAGCombinerInfo &DCI, unsigned Opcode,
-  const SDValue &Operand, unsigned &ExtraSteps) {
-  if (!ST.hasNEON())
-    return SDValue();
-
+static SDValue getEstimate(const AArch64Subtarget *ST, unsigned Opcode,
+                           SDValue Operand, SelectionDAG &DAG,
+                           int &ExtraSteps) {
   EVT VT = Operand.getValueType();
+  if (ST->hasNEON() &&
+      (VT == MVT::f64 || VT == MVT::v1f64 || VT == MVT::v2f64 ||
+       VT == MVT::f32 || VT == MVT::v1f32 ||
+       VT == MVT::v2f32 || VT == MVT::v4f32)) {
+    if (ExtraSteps == TargetLoweringBase::ReciprocalEstimate::Unspecified)
+      // For the reciprocal estimates, convergence is quadratic, so the number
+      // of digits is doubled after each iteration.  In ARMv8, the accuracy of
+      // the initial estimate is 2^-8.  Thus the number of extra steps to refine
+      // the result for float (23 mantissa bits) is 2 and for double (52
+      // mantissa bits) is 3.
+      ExtraSteps = VT == MVT::f64 ? 3 : 2;
 
-  std::string RecipOp;
-  RecipOp = Opcode == (AArch64ISD::FRECPE) ? "div": "sqrt";
-  RecipOp = ((VT.isVector()) ? "vec-": "") + RecipOp;
-  RecipOp += (VT.getScalarType() == MVT::f64) ? "d": "f";
+    return DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
+  }
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
+  return SDValue();
+}
+
+SDValue AArch64TargetLowering::getSqrtEstimate(SDValue Operand,
+                                               SelectionDAG &DAG, int Enabled,
+                                               int &ExtraSteps,
+                                               bool &UseOneConst,
+                                               bool Reciprocal) const {
+  if (Enabled == ReciprocalEstimate::Enabled ||
+      (Enabled == ReciprocalEstimate::Unspecified && Subtarget->useRSqrt()))
+    if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRSQRTE, Operand,
+                                       DAG, ExtraSteps)) {
+      SDLoc DL(Operand);
+      EVT VT = Operand.getValueType();
+
+      SDNodeFlags Flags;
+      Flags.setUnsafeAlgebra(true);
+
+      // Newton reciprocal square root iteration: E * 0.5 * (3 - X * E^2)
+      // AArch64 reciprocal square root iteration instruction: 0.5 * (3 - M * N)
+      for (int i = ExtraSteps; i > 0; --i) {
+        SDValue Step = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Estimate,
+                                   &Flags);
+        Step = DAG.getNode(AArch64ISD::FRSQRTS, DL, VT, Operand, Step, &Flags);
+        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
+      }
+
+      if (!Reciprocal) {
+        EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+                                      VT);
+        SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
+        SDValue Eq = DAG.getSetCC(DL, CCVT, Operand, FPZero, ISD::SETEQ);
+
+        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Operand, Estimate, &Flags);
+        // Correct the result if the operand is 0.0.
+        Estimate = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL,
+                               VT, Eq, Operand, Estimate);
+      }
 
-  ExtraSteps = Recips.getRefinementSteps(RecipOp);
-  return DCI.DAG.getNode(Opcode, SDLoc(Operand), VT, Operand);
+      ExtraSteps = 0;
+      return Estimate;
+    }
+
+  return SDValue();
 }
 
 SDValue AArch64TargetLowering::getRecipEstimate(SDValue Operand,
-  DAGCombinerInfo &DCI, unsigned &ExtraSteps) const {
-  return getEstimate(*Subtarget, DCI, AArch64ISD::FRECPE, Operand, ExtraSteps);
-}
+                                                SelectionDAG &DAG, int Enabled,
+                                                int &ExtraSteps) const {
+  if (Enabled == ReciprocalEstimate::Enabled)
+    if (SDValue Estimate = getEstimate(Subtarget, AArch64ISD::FRECPE, Operand,
+                                       DAG, ExtraSteps)) {
+      SDLoc DL(Operand);
+      EVT VT = Operand.getValueType();
+
+      SDNodeFlags Flags;
+      Flags.setUnsafeAlgebra(true);
+
+      // Newton reciprocal iteration: E * (2 - X * E)
+      // AArch64 reciprocal iteration instruction: (2 - M * N)
+      for (int i = ExtraSteps; i > 0; --i) {
+        SDValue Step = DAG.getNode(AArch64ISD::FRECPS, DL, VT, Operand,
+                                   Estimate, &Flags);
+        Estimate = DAG.getNode(ISD::FMUL, DL, VT, Estimate, Step, &Flags);
+      }
 
-SDValue AArch64TargetLowering::getRsqrtEstimate(SDValue Operand,
-  DAGCombinerInfo &DCI, unsigned &ExtraSteps, bool &UseOneConst) const {
-  UseOneConst = true;
-  return getEstimate(*Subtarget, DCI, AArch64ISD::FRSQRTE, Operand, ExtraSteps);
+      ExtraSteps = 0;
+      return Estimate;
+    }
+
+  return SDValue();
 }
 
 //===----------------------------------------------------------------------===//
@@ -4704,7 +4827,9 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
         return std::make_pair(0U, &AArch64::GPR64commonRegClass);
       return std::make_pair(0U, &AArch64::GPR32commonRegClass);
     case 'w':
-      if (VT == MVT::f32)
+      if (VT.getSizeInBits() == 16)
+        return std::make_pair(0U, &AArch64::FPR16RegClass);
+      if (VT.getSizeInBits() == 32)
         return std::make_pair(0U, &AArch64::FPR32RegClass);
       if (VT.getSizeInBits() == 64)
         return std::make_pair(0U, &AArch64::FPR64RegClass);
@@ -4971,7 +5096,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
 
     // Add this element source to the list if it's not already there.
     SDValue SourceVec = V.getOperand(0);
-    auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+    auto Source = find(Sources, SourceVec);
     if (Source == Sources.end())
       Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
 
@@ -4996,7 +5121,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     }
   }
   unsigned ResMultiplier =
-      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+      VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
   NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
   EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
 
@@ -5081,21 +5206,21 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
 
   // The stars all align, our next step is to produce the mask for the shuffle.
   SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
-  int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
   for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
     SDValue Entry = Op.getOperand(i);
     if (Entry.isUndef())
       continue;
 
-    auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+    auto Src = find(Sources, Entry.getOperand(0));
     int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
 
     // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
     // trunc. So only std::min(SrcBits, DestBits) actually get defined in this
     // segment.
     EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
-    int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
-                               VT.getVectorElementType().getSizeInBits());
+    int BitsDefined =
+        std::min(OrigEltTy.getSizeInBits(), VT.getScalarSizeInBits());
     int LanesDefined = BitsDefined / BitsPerShuffleLane;
 
     // This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -5157,8 +5282,7 @@ static bool isSingletonEXTMask(ArrayRef<int> M, EVT VT, unsigned &Imm) {
 static bool isEXTMask(ArrayRef<int> M, EVT VT, bool &ReverseEXT,
                       unsigned &Imm) {
   // Look for the first non-undef element.
-  const int *FirstRealElt = std::find_if(M.begin(), M.end(),
-      [](int Elt) {return Elt >= 0;});
+  const int *FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; });
 
   // Benefit form APInt to handle overflow when calculating expected element.
   unsigned NumElts = VT.getVectorNumElements();
@@ -5200,7 +5324,7 @@ static bool isREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
   assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) &&
          "Only possible block sizes for REV are: 16, 32, 64");
 
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5381,7 +5505,7 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
       VT.getVectorElementType() != V1.getValueType().getVectorElementType())
     return SDValue();
 
-  bool SplitV0 = V0.getValueType().getSizeInBits() == 128;
+  bool SplitV0 = V0.getValueSizeInBits() == 128;
 
   if (!isConcatMask(Mask, VT, SplitV0))
     return SDValue();
@@ -5392,7 +5516,7 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
     V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V0,
                      DAG.getConstant(0, DL, MVT::i64));
   }
-  if (V1.getValueType().getSizeInBits() == 128) {
+  if (V1.getValueSizeInBits() == 128) {
     V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, CastVT, V1,
                      DAG.getConstant(0, DL, MVT::i64));
   }
@@ -5523,7 +5647,7 @@ static SDValue GenerateTBL(SDValue Op, ArrayRef<int> ShuffleMask,
 
   MVT IndexVT = MVT::v8i8;
   unsigned IndexLen = 8;
-  if (Op.getValueType().getSizeInBits() == 128) {
+  if (Op.getValueSizeInBits() == 128) {
     IndexVT = MVT::v16i8;
     IndexLen = 16;
   }
@@ -5918,7 +6042,7 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   // Is C1 == ~C2, taking into account how much one can shift elements of a
   // particular size?
   uint64_t C2 = C2node->getZExtValue();
-  unsigned ElemSizeInBits = VT.getVectorElementType().getSizeInBits();
+  unsigned ElemSizeInBits = VT.getScalarSizeInBits();
   if (C2 > ElemSizeInBits)
     return SDValue();
   unsigned ElemMask = (1 << ElemSizeInBits) - 1;
@@ -6351,7 +6475,7 @@ FailedModImm:
       // DUPLANE works on 128-bit vectors, widen it if necessary.
       SDValue Lane = Value.getOperand(1);
       Value = Value.getOperand(0);
-      if (Value.getValueType().getSizeInBits() == 64)
+      if (Value.getValueSizeInBits() == 64)
         Value = WidenVector(Value, DAG);
 
       unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
@@ -6414,7 +6538,7 @@ FailedModImm:
   if (!isConstant && !usesOnlyOneValue) {
     SDValue Vec = DAG.getUNDEF(VT);
     SDValue Op0 = Op.getOperand(0);
-    unsigned ElemSize = VT.getVectorElementType().getSizeInBits();
+    unsigned ElemSize = VT.getScalarSizeInBits();
     unsigned i = 0;
     // For 32 and 64 bit types, use INSERT_SUBREG for lane zero to
     // a) Avoid a RMW dependency on the full vector register, and
@@ -6528,7 +6652,7 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
     return SDValue();
   unsigned Val = Cst->getZExtValue();
 
-  unsigned Size = Op.getValueType().getSizeInBits();
+  unsigned Size = Op.getValueSizeInBits();
 
   // This will get lowered to an appropriate EXTRACT_SUBREG in ISel.
   if (Val == 0)
@@ -6536,7 +6660,7 @@ SDValue AArch64TargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
 
   // If this is extracting the upper 64-bits of a 128-bit vector, we match
   // that directly.
-  if (Size == 64 && Val * VT.getVectorElementType().getSizeInBits() == 64)
+  if (Size == 64 && Val * VT.getScalarSizeInBits() == 64)
     return Op;
 
   return SDValue();
@@ -6606,7 +6730,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
 ///   0 <= Value <= ElementBits for a long left shift.
 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getScalarSizeInBits();
   if (!getVShiftImm(Op, ElementBits, Cnt))
     return false;
   return (Cnt >= 0 && (isLong ? Cnt - 1 : Cnt) < ElementBits);
@@ -6617,7 +6741,7 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
 ///   1 <= Value <= ElementBits for a right shift; or
 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getScalarSizeInBits();
   if (!getVShiftImm(Op, ElementBits, Cnt))
     return false;
   return (Cnt >= 1 && Cnt <= (isNarrow ? ElementBits / 2 : ElementBits));
@@ -6631,7 +6755,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
 
   if (!Op.getOperand(1).getValueType().isVector())
     return Op;
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSize = VT.getScalarSizeInBits();
 
   switch (Op.getOpcode()) {
   default:
@@ -6716,8 +6840,8 @@ static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
     case AArch64CC::LT:
       if (!NoNans)
         return SDValue();
-    // If we ignore NaNs then we can use to the MI implementation.
-    // Fallthrough.
+      // If we ignore NaNs then we can use to the MI implementation.
+      LLVM_FALLTHROUGH;
     case AArch64CC::MI:
       if (IsZero)
         return DAG.getNode(AArch64ISD::FCMLTz, dl, VT, LHS);
@@ -7033,8 +7157,8 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
     case Instruction::GetElementPtr: {
       gep_type_iterator GTI = gep_type_begin(Instr);
       auto &DL = Ext->getModule()->getDataLayout();
-      std::advance(GTI, U.getOperandNo());
-      Type *IdxTy = *GTI;
+      std::advance(GTI, U.getOperandNo()-1);
+      Type *IdxTy = GTI.getIndexedType();
       // This extension will end up with a shift because of the scaling factor.
       // 8-bit sized types have a scaling factor of 1, thus a shift amount of 0.
       // Get the shift amount based on the scaling factor:
@@ -7052,7 +7176,7 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
       // trunc(sext ty1 to ty2) to ty1.
       if (Instr->getType() == Ext->getOperand(0)->getType())
         continue;
-    // FALL THROUGH.
+      LLVM_FALLTHROUGH;
     default:
       return false;
     }
@@ -7063,16 +7187,6 @@ bool AArch64TargetLowering::isExtFreeImpl(const Instruction *Ext) const {
   return true;
 }
 
-bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType,
-                                          unsigned &RequiredAligment) const {
-  if (!LoadedType->isIntegerTy() && !LoadedType->isFloatTy())
-    return false;
-  // Cyclone supports unaligned accesses.
-  RequiredAligment = 0;
-  unsigned NumBits = LoadedType->getPrimitiveSizeInBits();
-  return NumBits == 32 || NumBits == 64;
-}
-
 bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
                                           unsigned &RequiredAligment) const {
   if (!LoadedType.isSimple() ||
@@ -7167,7 +7281,7 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
 ///
 /// E.g. Lower an interleaved store (Factor = 3):
 ///        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
-///                                  <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+///                 <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
 ///        store <12 x i32> %i.vec, <12 x i32>* %ptr
 ///
 ///      Into:
@@ -7178,6 +7292,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
 ///
 /// Note that the new shufflevectors will be removed and we'll only generate one
 /// st3 instruction in CodeGen.
+///
+/// Example for a more general valid mask (Factor 3). Lower:
+///        %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
+///                 <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+///        store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+///      Into:
+///        %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
+///        %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
+///        %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
+///        call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
 bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
                                                   ShuffleVectorInst *SVI,
                                                   unsigned Factor) const {
@@ -7188,9 +7313,9 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   assert(VecTy->getVectorNumElements() % Factor == 0 &&
          "Invalid interleaved store");
 
-  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
   Type *EltTy = VecTy->getVectorElementType();
-  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
 
   const DataLayout &DL = SI->getModule()->getDataLayout();
   unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
@@ -7215,7 +7340,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
     Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
     Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
 
-    SubVecTy = VectorType::get(IntTy, NumSubElts);
+    SubVecTy = VectorType::get(IntTy, LaneLen);
   }
 
   Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
@@ -7229,9 +7354,28 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
   SmallVector<Value *, 5> Ops;
 
   // Split the shufflevector operands into sub vectors for the new stN call.
-  for (unsigned i = 0; i < Factor; i++)
-    Ops.push_back(Builder.CreateShuffleVector(
-        Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+  auto Mask = SVI->getShuffleMask();
+  for (unsigned i = 0; i < Factor; i++) {
+    if (Mask[i] >= 0) {
+      Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
+    } else {
+      unsigned StartMask = 0;
+      for (unsigned j = 1; j < LaneLen; j++) {
+        if (Mask[j*Factor + i] >= 0) {
+          StartMask = Mask[j*Factor + i] - j;
+          break;
+        }
+      }
+      // Note: If all elements in a chunk are undefs, StartMask=0!
+      // Note: Filling undef gaps with random elements is ok, since
+      // those elements were being written anyway (with undefs).
+      // In the case of all undefs we're defaulting to using elems from 0
+      // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
+      Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
+    }
+  }
 
   Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
   Builder.CreateCall(StNFunc, Ops);
@@ -7323,7 +7467,7 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
     int64_t Offset = AM.BaseOffs;
 
     // 9-bit signed offset
-    if (Offset >= -(1LL << 9) && Offset <= (1LL << 9) - 1)
+    if (isInt<9>(Offset))
       return true;
 
     // 12-bit unsigned offset
@@ -7337,8 +7481,7 @@ bool AArch64TargetLowering::isLegalAddressingMode(const DataLayout &DL,
 
   // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
 
-  return !AM.Scale || AM.Scale == 1 ||
-         (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
+  return AM.Scale == 1 || (AM.Scale > 0 && (uint64_t)AM.Scale == NumBytes);
 }
 
 int AArch64TargetLowering::getScalingFactorCost(const DataLayout &DL,
@@ -7544,57 +7687,98 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  // The below optimizations require a constant RHS.
+  if (!isa<ConstantSDNode>(N->getOperand(1)))
+    return SDValue();
+
+  ConstantSDNode *C = cast<ConstantSDNode>(N->getOperand(1));
+  const APInt &ConstValue = C->getAPIntValue();
+
   // Multiplication of a power of two plus/minus one can be done more
   // cheaply as as shift+add/sub. For now, this is true unilaterally. If
   // future CPUs have a cheaper MADD instruction, this may need to be
   // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and
   // 64-bit is 5 cycles, so this is always a win.
-  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
-    const APInt &Value = C->getAPIntValue();
-    EVT VT = N->getValueType(0);
-    SDLoc DL(N);
-    if (Value.isNonNegative()) {
-      // (mul x, 2^N + 1) => (add (shl x, N), x)
-      APInt VM1 = Value - 1;
-      if (VM1.isPowerOf2()) {
-        SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                        DAG.getConstant(VM1.logBase2(), DL, MVT::i64));
-        return DAG.getNode(ISD::ADD, DL, VT, ShiftedVal,
-                           N->getOperand(0));
-      }
-      // (mul x, 2^N - 1) => (sub (shl x, N), x)
-      APInt VP1 = Value + 1;
-      if (VP1.isPowerOf2()) {
-        SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                        DAG.getConstant(VP1.logBase2(), DL, MVT::i64));
-        return DAG.getNode(ISD::SUB, DL, VT, ShiftedVal,
-                           N->getOperand(0));
-      }
-    } else {
-      // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
-      APInt VNP1 = -Value + 1;
-      if (VNP1.isPowerOf2()) {
-        SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                        DAG.getConstant(VNP1.logBase2(), DL, MVT::i64));
-        return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0),
-                           ShiftedVal);
-      }
-      // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
-      APInt VNM1 = -Value - 1;
-      if (VNM1.isPowerOf2()) {
-        SDValue ShiftedVal =
-            DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
-                        DAG.getConstant(VNM1.logBase2(), DL, MVT::i64));
-        SDValue Add =
-            DAG.getNode(ISD::ADD, DL, VT, ShiftedVal, N->getOperand(0));
-        return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Add);
-      }
-    }
+  // More aggressively, some multiplications N0 * C can be lowered to
+  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,
+  // e.g. 6=3*2=(2+1)*2.
+  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45
+  // which equals to (1+2)*16-(1+2).
+  SDValue N0 = N->getOperand(0);
+  // TrailingZeroes is used to test if the mul can be lowered to
+  // shift+add+shift.
+  unsigned TrailingZeroes = ConstValue.countTrailingZeros();
+  if (TrailingZeroes) {
+    // Conservatively do not lower to shift+add+shift if the mul might be
+    // folded into smul or umul.
+    if (N0->hasOneUse() && (isSignExtended(N0.getNode(), DAG) ||
+                            isZeroExtended(N0.getNode(), DAG)))
+      return SDValue();
+    // Conservatively do not lower to shift+add+shift if the mul might be
+    // folded into madd or msub.
+    if (N->hasOneUse() && (N->use_begin()->getOpcode() == ISD::ADD ||
+                           N->use_begin()->getOpcode() == ISD::SUB))
+      return SDValue();
   }
-  return SDValue();
+  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub
+  // and shift+add+shift.
+  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);
+
+  unsigned ShiftAmt, AddSubOpc;
+  // Is the shifted value the LHS operand of the add/sub?
+  bool ShiftValUseIsN0 = true;
+  // Do we need to negate the result?
+  bool NegateResult = false;
+
+  if (ConstValue.isNonNegative()) {
+    // (mul x, 2^N + 1) => (add (shl x, N), x)
+    // (mul x, 2^N - 1) => (sub (shl x, N), x)
+    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)
+    APInt SCVMinus1 = ShiftedConstValue - 1;
+    APInt CVPlus1 = ConstValue + 1;
+    if (SCVMinus1.isPowerOf2()) {
+      ShiftAmt = SCVMinus1.logBase2();
+      AddSubOpc = ISD::ADD;
+    } else if (CVPlus1.isPowerOf2()) {
+      ShiftAmt = CVPlus1.logBase2();
+      AddSubOpc = ISD::SUB;
+    } else
+      return SDValue();
+  } else {
+    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)
+    APInt CVNegPlus1 = -ConstValue + 1;
+    APInt CVNegMinus1 = -ConstValue - 1;
+    if (CVNegPlus1.isPowerOf2()) {
+      ShiftAmt = CVNegPlus1.logBase2();
+      AddSubOpc = ISD::SUB;
+      ShiftValUseIsN0 = false;
+    } else if (CVNegMinus1.isPowerOf2()) {
+      ShiftAmt = CVNegMinus1.logBase2();
+      AddSubOpc = ISD::ADD;
+      NegateResult = true;
+    } else
+      return SDValue();
+  }
+
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  SDValue ShiftedVal = DAG.getNode(ISD::SHL, DL, VT, N0,
+                                   DAG.getConstant(ShiftAmt, DL, MVT::i64));
+
+  SDValue AddSubN0 = ShiftValUseIsN0 ? ShiftedVal : N0;
+  SDValue AddSubN1 = ShiftValUseIsN0 ? N0 : ShiftedVal;
+  SDValue Res = DAG.getNode(AddSubOpc, DL, VT, AddSubN0, AddSubN1);
+  assert(!(NegateResult && TrailingZeroes) &&
+         "NegateResult and TrailingZeroes cannot both be true for now.");
+  // Negate the result.
+  if (NegateResult)
+    return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+  // Shift the result.
+  if (TrailingZeroes)
+    return DAG.getNode(ISD::SHL, DL, VT, Res,
+                       DAG.getConstant(TrailingZeroes, DL, MVT::i64));
+  return Res;
 }
 
 static SDValue performVectorCompareAndMaskUnaryOpCombine(SDNode *N,
@@ -7655,7 +7839,7 @@ static SDValue performIntToFpCombine(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 
   // Only optimize when the source and destination types have the same width.
-  if (VT.getSizeInBits() != N->getOperand(0).getValueType().getSizeInBits())
+  if (VT.getSizeInBits() != N->getOperand(0).getValueSizeInBits())
     return SDValue();
 
   // If the result of an integer load is only used by an integer-to-float
@@ -7757,13 +7941,15 @@ static SDValue performFpToIntCombine(SDNode *N, SelectionDAG &DAG,
 /// Fold a floating-point divide by power of two into fixed-point to
 /// floating-point conversion.
 static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
                                   const AArch64Subtarget *Subtarget) {
   if (!Subtarget->hasNEON())
     return SDValue();
 
   SDValue Op = N->getOperand(0);
   unsigned Opc = Op->getOpcode();
-  if (!Op.getValueType().isVector() ||
+  if (!Op.getValueType().isVector() || !Op.getValueType().isSimple() ||
+      !Op.getOperand(0).getValueType().isSimple() ||
       (Opc != ISD::SINT_TO_FP && Opc != ISD::UINT_TO_FP))
     return SDValue();
 
@@ -7800,10 +7986,13 @@ static SDValue performFDivCombine(SDNode *N, SelectionDAG &DAG,
     ResTy = FloatBits == 32 ? MVT::v2i32 : MVT::v2i64;
     break;
   case 4:
-    ResTy = MVT::v4i32;
+    ResTy = FloatBits == 32 ? MVT::v4i32 : MVT::v4i64;
     break;
   }
 
+  if (ResTy == MVT::v4i64 && DCI.isBeforeLegalizeOps())
+    return SDValue();
+
   SDLoc DL(N);
   SDValue ConvInput = Op.getOperand(0);
   bool IsSigned = Opc == ISD::SINT_TO_FP;
@@ -7901,7 +8090,7 @@ static SDValue tryCombineToBSL(SDNode *N,
 
   // We only have to look for constant vectors here since the general, variable
   // case can be handled in TableGen.
-  unsigned Bits = VT.getVectorElementType().getSizeInBits();
+  unsigned Bits = VT.getScalarSizeInBits();
   uint64_t BitMask = Bits == 64 ? -1ULL : ((1ULL << Bits) - 1);
   for (int i = 1; i >= 0; --i)
     for (int j = 1; j >= 0; --j) {
@@ -8090,7 +8279,7 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // splat. The indexed instructions are going to be expecting a DUPLANE64, so
   // canonicalise to that.
   if (N0 == N1 && VT.getVectorNumElements() == 2) {
-    assert(VT.getVectorElementType().getSizeInBits() == 64);
+    assert(VT.getScalarSizeInBits() == 64);
     return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
                        DAG.getConstant(0, dl, MVT::i64));
   }
@@ -8153,7 +8342,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N,
     // The vector width should be 128 bits by the time we get here, even
     // if it started as 64 bits (the extract_vector handling will have
     // done so).
-    assert(Vec.getValueType().getSizeInBits() == 128 &&
+    assert(Vec.getValueSizeInBits() == 128 &&
            "unexpected vector size on extract_vector_elt!");
     if (Vec.getValueType() == MVT::v4i32)
       VecResTy = MVT::v4f32;
@@ -8655,7 +8844,7 @@ static SDValue performExtendCombine(SDNode *N,
   if (SrcVT.getSizeInBits() != 64)
     return SDValue();
 
-  unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits();
+  unsigned SrcEltSize = SrcVT.getScalarSizeInBits();
   unsigned ElementCount = SrcVT.getVectorNumElements();
   SrcVT = MVT::getVectorVT(MVT::getIntegerVT(SrcEltSize * 2), ElementCount);
   SDLoc DL(N);
@@ -8684,13 +8873,100 @@ static SDValue performExtendCombine(SDNode *N,
   return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
 }
 
+static SDValue splitStoreSplat(SelectionDAG &DAG, StoreSDNode &St,
+                               SDValue SplatVal, unsigned NumVecElts) {
+  unsigned OrigAlignment = St.getAlignment();
+  unsigned EltOffset = SplatVal.getValueType().getSizeInBits() / 8;
+
+  // Create scalar stores. This is at least as good as the code sequence for a
+  // split unaligned store which is a dup.s, ext.b, and two stores.
+  // Most of the time the three stores should be replaced by store pair
+  // instructions (stp).
+  SDLoc DL(&St);
+  SDValue BasePtr = St.getBasePtr();
+  SDValue NewST1 =
+      DAG.getStore(St.getChain(), DL, SplatVal, BasePtr, St.getPointerInfo(),
+                   OrigAlignment, St.getMemOperand()->getFlags());
+
+  unsigned Offset = EltOffset;
+  while (--NumVecElts) {
+    unsigned Alignment = MinAlign(OrigAlignment, Offset);
+    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
+                                    DAG.getConstant(Offset, DL, MVT::i64));
+    NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
+                          St.getPointerInfo(), Alignment,
+                          St.getMemOperand()->getFlags());
+    Offset += EltOffset;
+  }
+  return NewST1;
+}
+
+/// Replace a splat of zeros to a vector store by scalar stores of WZR/XZR.  The
+/// load store optimizer pass will merge them to store pair stores.  This should
+/// be better than a movi to create the vector zero followed by a vector store
+/// if the zero constant is not re-used, since one instructions and one register
+/// live range will be removed.
+///
+/// For example, the final generated code should be:
+///
+///   stp xzr, xzr, [x0]
+///
+/// instead of:
+///
+///   movi v0.2d, #0
+///   str q0, [x0]
+///
+static SDValue replaceZeroVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
+  SDValue StVal = St.getValue();
+  EVT VT = StVal.getValueType();
+
+  // It is beneficial to scalarize a zero splat store for 2 or 3 i64 elements or
+  // 2, 3 or 4 i32 elements.
+  int NumVecElts = VT.getVectorNumElements();
+  if (!(((NumVecElts == 2 || NumVecElts == 3) &&
+         VT.getVectorElementType().getSizeInBits() == 64) ||
+        ((NumVecElts == 2 || NumVecElts == 3 || NumVecElts == 4) &&
+         VT.getVectorElementType().getSizeInBits() == 32)))
+    return SDValue();
+
+  if (StVal.getOpcode() != ISD::BUILD_VECTOR)
+    return SDValue();
+
+  // If the zero constant has more than one use then the vector store could be
+  // better since the constant mov will be amortized and stp q instructions
+  // should be able to be formed.
+  if (!StVal.hasOneUse())
+    return SDValue();
+
+  // If the immediate offset of the address operand is too large for the stp
+  // instruction, then bail out.
+  if (DAG.isBaseWithConstantOffset(St.getBasePtr())) {
+    int64_t Offset = St.getBasePtr()->getConstantOperandVal(1);
+    if (Offset < -512 || Offset > 504)
+      return SDValue();
+  }
+
+  for (int I = 0; I < NumVecElts; ++I) {
+    SDValue EltVal = StVal.getOperand(I);
+    if (!isNullConstant(EltVal) && !isNullFPConstant(EltVal))
+      return SDValue();
+  }
+
+  // Use WZR/XZR here to prevent DAGCombiner::MergeConsecutiveStores from
+  // undoing this transformation.
+  SDValue SplatVal = VT.getVectorElementType().getSizeInBits() == 32
+                         ? DAG.getRegister(AArch64::WZR, MVT::i32)
+                         : DAG.getRegister(AArch64::XZR, MVT::i64);
+  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
+}
+
 /// Replace a splat of a scalar to a vector store by scalar stores of the scalar
 /// value. The load store optimizer pass will merge them to store pair stores.
 /// This has better performance than a splat of the scalar followed by a split
 /// vector store. Even if the stores are not merged it is four stores vs a dup,
 /// followed by an ext.b and two stores.
-static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
-  SDValue StVal = St->getValue();
+static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode &St) {
+  SDValue StVal = St.getValue();
   EVT VT = StVal.getValueType();
 
   // Don't replace floating point stores, they possibly won't be transformed to
@@ -8698,55 +8974,48 @@ static SDValue replaceSplatVectorStore(SelectionDAG &DAG, StoreSDNode *St) {
   if (VT.isFloatingPoint())
     return SDValue();
 
-  // Check for insert vector elements.
-  if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
-    return SDValue();
-
   // We can express a splat as store pair(s) for 2 or 4 elements.
   unsigned NumVecElts = VT.getVectorNumElements();
   if (NumVecElts != 4 && NumVecElts != 2)
     return SDValue();
-  SDValue SplatVal = StVal.getOperand(1);
-  unsigned RemainInsertElts = NumVecElts - 1;
 
   // Check that this is a splat.
-  while (--RemainInsertElts) {
-    SDValue NextInsertElt = StVal.getOperand(0);
-    if (NextInsertElt.getOpcode() != ISD::INSERT_VECTOR_ELT)
+  // Make sure that each of the relevant vector element locations are inserted
+  // to, i.e. 0 and 1 for v2i64 and 0, 1, 2, 3 for v4i32.
+  std::bitset<4> IndexNotInserted((1 << NumVecElts) - 1);
+  SDValue SplatVal;
+  for (unsigned I = 0; I < NumVecElts; ++I) {
+    // Check for insert vector elements.
+    if (StVal.getOpcode() != ISD::INSERT_VECTOR_ELT)
       return SDValue();
-    if (NextInsertElt.getOperand(1) != SplatVal)
+
+    // Check that same value is inserted at each vector element.
+    if (I == 0)
+      SplatVal = StVal.getOperand(1);
+    else if (StVal.getOperand(1) != SplatVal)
       return SDValue();
-    StVal = NextInsertElt;
-  }
-  unsigned OrigAlignment = St->getAlignment();
-  unsigned EltOffset = NumVecElts == 4 ? 4 : 8;
-  unsigned Alignment = std::min(OrigAlignment, EltOffset);
 
-  // Create scalar stores. This is at least as good as the code sequence for a
-  // split unaligned store which is a dup.s, ext.b, and two stores.
-  // Most of the time the three stores should be replaced by store pair
-  // instructions (stp).
-  SDLoc DL(St);
-  SDValue BasePtr = St->getBasePtr();
-  SDValue NewST1 =
-      DAG.getStore(St->getChain(), DL, SplatVal, BasePtr, St->getPointerInfo(),
-                   St->getAlignment(), St->getMemOperand()->getFlags());
+    // Check insert element index.
+    ConstantSDNode *CIndex = dyn_cast<ConstantSDNode>(StVal.getOperand(2));
+    if (!CIndex)
+      return SDValue();
+    uint64_t IndexVal = CIndex->getZExtValue();
+    if (IndexVal >= NumVecElts)
+      return SDValue();
+    IndexNotInserted.reset(IndexVal);
 
-  unsigned Offset = EltOffset;
-  while (--NumVecElts) {
-    SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i64, BasePtr,
-                                    DAG.getConstant(Offset, DL, MVT::i64));
-    NewST1 = DAG.getStore(NewST1.getValue(0), DL, SplatVal, OffsetPtr,
-                          St->getPointerInfo(), Alignment,
-                          St->getMemOperand()->getFlags());
-    Offset += EltOffset;
+    StVal = StVal.getOperand(0);
   }
-  return NewST1;
+  // Check that all vector element locations were inserted to.
+  if (IndexNotInserted.any())
+      return SDValue();
+
+  return splitStoreSplat(DAG, St, SplatVal, NumVecElts);
 }
 
-static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
-                              SelectionDAG &DAG,
-                              const AArch64Subtarget *Subtarget) {
+static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+                           SelectionDAG &DAG,
+                           const AArch64Subtarget *Subtarget) {
   if (!DCI.isBeforeLegalize())
     return SDValue();
 
@@ -8754,6 +9023,17 @@ static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   if (S->isVolatile())
     return SDValue();
 
+  SDValue StVal = S->getValue();
+  EVT VT = StVal.getValueType();
+  if (!VT.isVector())
+    return SDValue();
+
+  // If we get a splat of zeros, convert this vector store to a store of
+  // scalars. They will be merged into store pairs of xzr thereby removing one
+  // instruction and one register.
+  if (SDValue ReplacedZeroSplat = replaceZeroVectorStore(DAG, *S))
+    return ReplacedZeroSplat;
+
   // FIXME: The logic for deciding if an unaligned store should be split should
   // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
   // a call to that function here.
@@ -8765,12 +9045,9 @@ static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   if (DAG.getMachineFunction().getFunction()->optForMinSize())
     return SDValue();
 
-  SDValue StVal = S->getValue();
-  EVT VT = StVal.getValueType();
-
   // Don't split v2i64 vectors. Memcpy lowering produces those and splitting
   // those up regresses performance on micro-benchmarks and olden/bh.
-  if (!VT.isVector() || VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
+  if (VT.getVectorNumElements() < 2 || VT == MVT::v2i64)
     return SDValue();
 
   // Split unaligned 16B stores. They are terrible for performance.
@@ -8785,7 +9062,7 @@ static SDValue split16BStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   // If we get a splat of a scalar convert this vector store to a store of
   // scalars. They will be merged into store pairs thereby removing two
   // instructions.
-  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, S))
+  if (SDValue ReplacedSplat = replaceSplatVectorStore(DAG, *S))
     return ReplacedSplat;
 
   SDLoc DL(S);
@@ -8928,7 +9205,7 @@ static SDValue performSTORECombine(SDNode *N,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    SelectionDAG &DAG,
                                    const AArch64Subtarget *Subtarget) {
-  if (SDValue Split = split16BStores(N, DCI, DAG, Subtarget))
+  if (SDValue Split = splitStores(N, DCI, DAG, Subtarget))
     return Split;
 
   if (Subtarget->supportsAddressTopByteIgnored() &&
@@ -9862,7 +10139,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FP_TO_UINT:
     return performFpToIntCombine(N, DAG, DCI, Subtarget);
   case ISD::FDIV:
-    return performFDivCombine(N, DAG, Subtarget);
+    return performFDivCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
     return performORCombine(N, DCI, Subtarget);
   case ISD::SRL:
@@ -9995,8 +10272,10 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *Op, SDValue &Base,
   // All of the indexed addressing mode instructions take a signed
   // 9 bit immediate offset.
   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
-    int64_t RHSC = (int64_t)RHS->getZExtValue();
-    if (RHSC >= 256 || RHSC <= -256)
+    int64_t RHSC = RHS->getSExtValue();
+    if (Op->getOpcode() == ISD::SUB)
+      RHSC = -(uint64_t)RHSC;
+    if (!isInt<9>(RHSC))
       return false;
     IsInc = (Op->getOpcode() == ISD::ADD);
     Offset = Op->getOperand(1);
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index c87cfed1f892..054ccc31674f 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -187,9 +187,9 @@ enum NodeType : unsigned {
   SMULL,
   UMULL,
 
-  // Reciprocal estimates.
-  FRECPE,
-  FRSQRTE,
+  // Reciprocal estimates and steps.
+  FRECPE, FRECPS,
+  FRSQRTE, FRSQRTS,
 
   // NEON Load/Store with post-increment base updates
   LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -219,6 +219,21 @@ enum NodeType : unsigned {
 
 } // end namespace AArch64ISD
 
+namespace {
+
+// Any instruction that defines a 32-bit result zeros out the high half of the
+// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
+// be copying from a truncate. But any other 32-bit operation will zero-extend
+// up to 64 bits.
+// FIXME: X86 also checks for CMOV here. Do we need something similar?
+static inline bool isDef32(const SDNode &N) {
+  unsigned Opc = N.getOpcode();
+  return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&
+         Opc != ISD::CopyFromReg;
+}
+
+} // end anonymous namespace
+
 class AArch64Subtarget;
 class AArch64TargetMachine;
 
@@ -230,6 +245,9 @@ public:
   /// Selects the correct CCAssignFn for a given CallingConvention value.
   CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const;
 
+  /// Selects the correct CCAssignFn for a given CallingConvention value.
+  CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
+
   /// Determine which of the bits specified in Mask are known to be either zero
   /// or one and return them in the KnownZero/KnownOne bitsets.
   void computeKnownBitsForTargetNode(const SDValue Op, APInt &KnownZero,
@@ -295,8 +313,6 @@ public:
   bool isZExtFree(EVT VT1, EVT VT2) const override;
   bool isZExtFree(SDValue Val, EVT VT2) const override;
 
-  bool hasPairedLoad(Type *LoadedType,
-                     unsigned &RequiredAligment) const override;
   bool hasPairedLoad(EVT LoadedType, unsigned &RequiredAligment) const override;
 
   unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
@@ -396,6 +412,11 @@ public:
     return true;
   }
 
+  bool hasAndNotCompare(SDValue) const override {
+    // 'bics'
+    return true;
+  }
+
   bool hasBitPreservingFPLogic(EVT VT) const override {
     // FIXME: Is this always true? It should be true for vectors at least.
     return VT == MVT::f32 || VT == MVT::f64;
@@ -453,12 +474,10 @@ private:
   /// object and incorporates their load into the current chain. This prevents
   /// an upcoming store from clobbering the stack argument before it's used.
   SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG,
-                              MachineFrameInfo *MFI, int ClobberedFI) const;
+                              MachineFrameInfo &MFI, int ClobberedFI) const;
 
   bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const;
 
-  bool IsTailCallConvention(CallingConv::ID CallCC) const;
-
   void saveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, const SDLoc &DL,
                            SDValue &Chain) const;
 
@@ -520,11 +539,11 @@ private:
 
   SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
                         std::vector<SDNode *> *Created) const override;
-  SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                           unsigned &RefinementSteps,
-                           bool &UseOneConstNR) const override;
-  SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                           unsigned &RefinementSteps) const override;
+  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                          int &ExtraSteps, bool &UseOneConst,
+                          bool Reciprocal) const override;
+  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                           int &ExtraSteps) const override;
   unsigned combineRepeatedFPDivisors() const override;
 
   ConstraintType getConstraintType(StringRef Constraint) const override;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
index 59de62ad2877..867074c3c374 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrAtomics.td
@@ -377,28 +377,28 @@ def : Pat<(int_aarch64_clrex), (CLREX 0xf)>;
 // significantly more naive than the standard expansion: we conservatively
 // assume seq_cst, strong cmpxchg and omit clrex on failure.
 
-let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
+let Constraints = "@earlyclobber $Rd,@earlyclobber $scratch",
     mayLoad = 1, mayStore = 1 in {
-def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
+def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
                         (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
                  Sched<[WriteAtomic]>;
 
-def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
+def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
                          (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
                   Sched<[WriteAtomic]>;
 
-def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
+def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$scratch),
                          (ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
                   Sched<[WriteAtomic]>;
 
-def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$status),
+def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$scratch),
                          (ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>,
                   Sched<[WriteAtomic]>;
 }
 
-let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $status",
+let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $scratch",
     mayLoad = 1, mayStore = 1 in
-def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$status),
+def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$scratch),
                           (ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
                                GPR64:$newLo, GPR64:$newHi), []>,
                    Sched<[WriteAtomic]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 34d35e961210..cefdf51b50d2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -3936,27 +3936,27 @@ class BaseFPConversion<bits<2> type, bits<2> opcode, RegisterClass dstType,
 multiclass FPConversion<string asm> {
   // Double-precision to Half-precision
   def HDr : BaseFPConversion<0b01, 0b11, FPR16, FPR64, asm,
-                             [(set FPR16:$Rd, (fround FPR64:$Rn))]>;
+                             [(set FPR16:$Rd, (fpround FPR64:$Rn))]>;
 
   // Double-precision to Single-precision
   def SDr : BaseFPConversion<0b01, 0b00, FPR32, FPR64, asm,
-                             [(set FPR32:$Rd, (fround FPR64:$Rn))]>;
+                             [(set FPR32:$Rd, (fpround FPR64:$Rn))]>;
 
   // Half-precision to Double-precision
   def DHr : BaseFPConversion<0b11, 0b01, FPR64, FPR16, asm,
-                             [(set FPR64:$Rd, (fextend FPR16:$Rn))]>;
+                             [(set FPR64:$Rd, (fpextend FPR16:$Rn))]>;
 
   // Half-precision to Single-precision
   def SHr : BaseFPConversion<0b11, 0b00, FPR32, FPR16, asm,
-                             [(set FPR32:$Rd, (fextend FPR16:$Rn))]>;
+                             [(set FPR32:$Rd, (fpextend FPR16:$Rn))]>;
 
   // Single-precision to Double-precision
   def DSr : BaseFPConversion<0b00, 0b01, FPR64, FPR32, asm,
-                             [(set FPR64:$Rd, (fextend FPR32:$Rn))]>;
+                             [(set FPR64:$Rd, (fpextend FPR32:$Rn))]>;
 
   // Single-precision to Half-precision
   def HSr : BaseFPConversion<0b00, 0b11, FPR16, FPR32, asm,
-                             [(set FPR16:$Rd, (fround FPR32:$Rn))]>;
+                             [(set FPR16:$Rd, (fpround FPR32:$Rn))]>;
 }
 
 //---
@@ -9348,7 +9348,7 @@ class SHAInstSS<bits<4> opc, string asm, Intrinsic OpNode>
 // ST<OP>{<order>}[<size>] <Ws>, [<Xn|SP>]
 // ST<OP>{<order>} <Xs>, [<Xn|SP>]
 
-let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
 class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
                       string cstr, list<dag> pattern>
       : I<oops, iops, asm, operands, cstr, pattern> {
@@ -9369,6 +9369,7 @@ class BaseCASEncoding<dag oops, dag iops, string asm, string operands,
   let Inst{14-10} = 0b11111;
   let Inst{9-5} = Rn;
   let Inst{4-0} = Rt;
+  let Predicates = [HasLSE];
 }
 
 class BaseCAS<string order, string size, RegisterClass RC>
@@ -9401,7 +9402,7 @@ multiclass CompareAndSwapPair<bits<1> Acq, bits<1> Rel, string order> {
     def d : BaseCASP<order, "", XSeqPairClassOperand>;
 }
 
-let Predicates = [HasV8_1a] in
+let Predicates = [HasLSE] in
 class BaseSWP<string order, string size, RegisterClass RC>
       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "swp" # order # size,
           "\t$Rs, $Rt, [$Rn]","",[]>,
@@ -9424,6 +9425,7 @@ class BaseSWP<string order, string size, RegisterClass RC>
   let Inst{11-10} = 0b00;
   let Inst{9-5} = Rn;
   let Inst{4-0} = Rt;
+  let Predicates = [HasLSE];
 }
 
 multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
@@ -9433,7 +9435,7 @@ multiclass Swap<bits<1> Acq, bits<1> Rel, string order> {
   let Sz = 0b11, Acq = Acq, Rel = Rel in def d : BaseSWP<order, "", GPR64>;
 }
 
-let Predicates = [HasV8_1a], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+let Predicates = [HasLSE], mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
 class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
       : I<(outs RC:$Rt),(ins RC:$Rs, GPR64sp:$Rn), "ld" # op # order # size,
           "\t$Rs, $Rt, [$Rn]","",[]>,
@@ -9456,6 +9458,7 @@ class BaseLDOPregister<string op, string order, string size, RegisterClass RC>
   let Inst{11-10} = 0b00;
   let Inst{9-5} = Rn;
   let Inst{4-0} = Rt;
+  let Predicates = [HasLSE];
 }
 
 multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel, 
@@ -9470,7 +9473,7 @@ multiclass LDOPregister<bits<3> opc, string op, bits<1> Acq, bits<1> Rel,
     def d : BaseLDOPregister<op, order, "", GPR64>;
 }
 
-let Predicates = [HasV8_1a] in
+let Predicates = [HasLSE] in
 class BaseSTOPregister<string asm, RegisterClass OP, Register Reg,
                         Instruction inst> :
       InstAlias<asm # "\t$Rs, [$Rn]", (inst Reg, OP:$Rs, GPR64sp:$Rn)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index d39542a8e4eb..b50749a29b89 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/StackMaps.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -29,16 +30,28 @@ using namespace llvm;
 #define GET_INSTRINFO_CTOR_DTOR
 #include "AArch64GenInstrInfo.inc"
 
-static LLVM_CONSTEXPR MachineMemOperand::Flags MOSuppressPair =
+static const MachineMemOperand::Flags MOSuppressPair =
     MachineMemOperand::MOTargetFlag1;
 
+static cl::opt<unsigned>
+TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
+                    cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
+
+static cl::opt<unsigned>
+CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
+                    cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
+
+static cl::opt<unsigned>
+BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
+                    cl::desc("Restrict range of Bcc instructions (DEBUG)"));
+
 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
       RI(STI.getTargetTriple()), Subtarget(STI) {}
 
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
-unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MachineBasicBlock &MBB = *MI.getParent();
   const MachineFunction *MF = MBB.getParent();
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
@@ -46,22 +59,41 @@ unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
   if (MI.getOpcode() == AArch64::INLINEASM)
     return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
 
+  // FIXME: We currently only handle pseudoinstructions that don't get expanded
+  //        before the assembly printer.
+  unsigned NumBytes = 0;
   const MCInstrDesc &Desc = MI.getDesc();
   switch (Desc.getOpcode()) {
   default:
-    // Anything not explicitly designated otherwise is a nomal 4-byte insn.
-    return 4;
+    // Anything not explicitly designated otherwise is a normal 4-byte insn.
+    NumBytes = 4;
+    break;
   case TargetOpcode::DBG_VALUE:
   case TargetOpcode::EH_LABEL:
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::KILL:
-    return 0;
+    NumBytes = 0;
+    break;
+  case TargetOpcode::STACKMAP:
+    // The upper bound for a stackmap intrinsic is the full length of its shadow
+    NumBytes = StackMapOpers(&MI).getNumPatchBytes();
+    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+    break;
+  case TargetOpcode::PATCHPOINT:
+    // The size of the patchpoint intrinsic is the number of bytes requested
+    NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
+    assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
+    break;
+  case AArch64::TLSDESC_CALLSEQ:
+    // This gets lowered to an instruction sequence which takes 16 bytes
+    NumBytes = 16;
+    break;
   case AArch64::TLSDESC_CALLSEQ:
     // This gets lowered to an instruction sequence which takes 16 bytes
     return 16;
   }
 
-  llvm_unreachable("GetInstSizeInBytes()- Unable to determin insn size");
+  return NumBytes;
 }
 
 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
@@ -95,6 +127,56 @@ static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
   }
 }
 
+static unsigned getBranchDisplacementBits(unsigned Opc) {
+  switch (Opc) {
+  default:
+    llvm_unreachable("unexpected opcode!");
+  case AArch64::B:
+    return 64;
+  case AArch64::TBNZW:
+  case AArch64::TBZW:
+  case AArch64::TBNZX:
+  case AArch64::TBZX:
+    return TBZDisplacementBits;
+  case AArch64::CBNZW:
+  case AArch64::CBZW:
+  case AArch64::CBNZX:
+  case AArch64::CBZX:
+    return CBZDisplacementBits;
+  case AArch64::Bcc:
+    return BCCDisplacementBits;
+  }
+}
+
+bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
+                                             int64_t BrOffset) const {
+  unsigned Bits = getBranchDisplacementBits(BranchOp);
+  assert(Bits >= 3 && "max branch displacement must be enough to jump"
+                      "over conditional branch expansion");
+  return isIntN(Bits, BrOffset / 4);
+}
+
+MachineBasicBlock *AArch64InstrInfo::getBranchDestBlock(
+  const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("unexpected opcode!");
+  case AArch64::B:
+    return MI.getOperand(0).getMBB();
+  case AArch64::TBZW:
+  case AArch64::TBNZW:
+  case AArch64::TBZX:
+  case AArch64::TBNZX:
+    return MI.getOperand(2).getMBB();
+  case AArch64::CBZW:
+  case AArch64::CBNZW:
+  case AArch64::CBZX:
+  case AArch64::CBNZX:
+  case AArch64::Bcc:
+    return MI.getOperand(1).getMBB();
+  }
+}
+
 // Branch analysis.
 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *&TBB,
@@ -183,7 +265,7 @@ bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-bool AArch64InstrInfo::ReverseBranchCondition(
+bool AArch64InstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond[0].getImm() != -1) {
     // Regular Bcc
@@ -224,7 +306,8 @@ bool AArch64InstrInfo::ReverseBranchCondition(
   return false;
 }
 
-unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                        int *BytesRemoved) const {
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
   if (I == MBB.end())
     return 0;
@@ -238,14 +321,23 @@ unsigned AArch64InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 
   I = MBB.end();
 
-  if (I == MBB.begin())
+  if (I == MBB.begin()) {
+    if (BytesRemoved)
+      *BytesRemoved = 4;
     return 1;
+  }
   --I;
-  if (!isCondBranchOpcode(I->getOpcode()))
+  if (!isCondBranchOpcode(I->getOpcode())) {
+    if (BytesRemoved)
+      *BytesRemoved = 4;
     return 1;
+  }
 
   // Remove the branch.
   I->eraseFromParent();
+  if (BytesRemoved)
+    *BytesRemoved = 8;
+
   return 2;
 }
 
@@ -266,25 +358,34 @@ void AArch64InstrInfo::instantiateCondBranch(
   }
 }
 
-unsigned AArch64InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned AArch64InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL) const {
+                                        const DebugLoc &DL,
+                                        int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
 
   if (!FBB) {
     if (Cond.empty()) // Unconditional branch?
       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
     else
       instantiateCondBranch(MBB, DL, TBB, Cond);
+
+    if (BytesAdded)
+      *BytesAdded = 4;
+
     return 1;
   }
 
   // Two-way conditional branch.
   instantiateCondBranch(MBB, DL, TBB, Cond);
   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
+
+  if (BytesAdded)
+    *BytesAdded = 8;
+
   return 2;
 }
 
@@ -318,7 +419,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     // if NZCV is used, do not fold.
     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
       return 0;
-  // fall-through to ADDXri and ADDWri.
+    // fall-through to ADDXri and ADDWri.
+    LLVM_FALLTHROUGH;
   case AArch64::ADDXri:
   case AArch64::ADDWri:
     // add x, 1 -> csinc.
@@ -345,7 +447,8 @@ static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
     // if NZCV is used, do not fold.
     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
       return 0;
-  // fall-through to SUBXrr and SUBWrr.
+    // fall-through to SUBXrr and SUBWrr.
+    LLVM_FALLTHROUGH;
   case AArch64::SUBXrr:
   case AArch64::SUBWrr: {
     // neg x -> csneg, represented as sub dst, xzr, src.
@@ -861,9 +964,9 @@ static bool areCFlagsAccessedBetweenInstrs(
     return true;
 
   // From must be above To.
-  assert(std::find_if(MachineBasicBlock::reverse_iterator(To),
-                      To->getParent()->rend(), [From](MachineInstr &MI) {
-                        return MachineBasicBlock::iterator(MI) == From;
+  assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
+                      [From](MachineInstr &MI) {
+                        return MI.getIterator() == From;
                       }) != To->getParent()->rend());
 
   // We iterate backward starting \p To until we hit \p From.
@@ -971,6 +1074,7 @@ static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
   return false;
 }
 
+namespace {
 struct UsedNZCV {
   bool N;
   bool Z;
@@ -985,6 +1089,7 @@ struct UsedNZCV {
     return *this;
   }
 };
+} // end anonymous namespace
 
 /// Find a condition code used by the instruction.
 /// Returns AArch64CC::Invalid if either the instruction does not use condition
@@ -1529,7 +1634,6 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
     switch (MI.getOpcode()) {
     default:
       break;
-
     case AArch64::LDURQi:
     case AArch64::STURQi:
     case AArch64::LDRQui:
@@ -1544,36 +1648,8 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
 bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
     MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
     const TargetRegisterInfo *TRI) const {
-  switch (LdSt.getOpcode()) {
-  default:
-    return false;
-  // Scaled instructions.
-  case AArch64::STRSui:
-  case AArch64::STRDui:
-  case AArch64::STRQui:
-  case AArch64::STRXui:
-  case AArch64::STRWui:
-  case AArch64::LDRSui:
-  case AArch64::LDRDui:
-  case AArch64::LDRQui:
-  case AArch64::LDRXui:
-  case AArch64::LDRWui:
-  case AArch64::LDRSWui:
-  // Unscaled instructions.
-  case AArch64::STURSi:
-  case AArch64::STURDi:
-  case AArch64::STURQi:
-  case AArch64::STURXi:
-  case AArch64::STURWi:
-  case AArch64::LDURSi:
-  case AArch64::LDURDi:
-  case AArch64::LDURQi:
-  case AArch64::LDURWi:
-  case AArch64::LDURXi:
-  case AArch64::LDURSWi:
-    unsigned Width;
-    return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
-  };
+  unsigned Width;
+  return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
 }
 
 bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
@@ -1772,6 +1848,9 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
   if (NumLoads > 1)
     return false;
 
+  if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
+    return false;
+
   // Can we pair these instructions based on their opcodes?
   unsigned FirstOpc = FirstLdSt.getOpcode();
   unsigned SecondOpc = SecondLdSt.getOpcode();
@@ -1802,41 +1881,82 @@ bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
   return Offset1 + 1 == Offset2;
 }
 
-bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
-                                              MachineInstr &Second) const {
-  if (Subtarget.hasMacroOpFusion()) {
+bool AArch64InstrInfo::shouldScheduleAdjacent(
+    const MachineInstr &First, const MachineInstr &Second) const {
+  if (Subtarget.hasArithmeticBccFusion()) {
     // Fuse CMN, CMP, TST followed by Bcc.
     unsigned SecondOpcode = Second.getOpcode();
     if (SecondOpcode == AArch64::Bcc) {
       switch (First.getOpcode()) {
       default:
         return false;
-      case AArch64::SUBSWri:
       case AArch64::ADDSWri:
-      case AArch64::ANDSWri:
-      case AArch64::SUBSXri:
+      case AArch64::ADDSWrr:
       case AArch64::ADDSXri:
+      case AArch64::ADDSXrr:
+      case AArch64::ANDSWri:
+      case AArch64::ANDSWrr:
       case AArch64::ANDSXri:
+      case AArch64::ANDSXrr:
+      case AArch64::SUBSWri:
+      case AArch64::SUBSWrr:
+      case AArch64::SUBSXri:
+      case AArch64::SUBSXrr:
+      case AArch64::BICSWrr:
+      case AArch64::BICSXrr:
         return true;
+      case AArch64::ADDSWrs:
+      case AArch64::ADDSXrs:
+      case AArch64::ANDSWrs:
+      case AArch64::ANDSXrs:
+      case AArch64::SUBSWrs:
+      case AArch64::SUBSXrs:
+      case AArch64::BICSWrs:
+      case AArch64::BICSXrs:
+        // Shift value can be 0 making these behave like the "rr" variant...
+        return !hasShiftedReg(Second);
       }
     }
+  }
+  if (Subtarget.hasArithmeticCbzFusion()) {
     // Fuse ALU operations followed by CBZ/CBNZ.
+    unsigned SecondOpcode = Second.getOpcode();
     if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
         SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
       switch (First.getOpcode()) {
       default:
         return false;
       case AArch64::ADDWri:
+      case AArch64::ADDWrr:
       case AArch64::ADDXri:
+      case AArch64::ADDXrr:
       case AArch64::ANDWri:
+      case AArch64::ANDWrr:
       case AArch64::ANDXri:
+      case AArch64::ANDXrr:
       case AArch64::EORWri:
+      case AArch64::EORWrr:
       case AArch64::EORXri:
+      case AArch64::EORXrr:
       case AArch64::ORRWri:
+      case AArch64::ORRWrr:
       case AArch64::ORRXri:
+      case AArch64::ORRXrr:
       case AArch64::SUBWri:
+      case AArch64::SUBWrr:
       case AArch64::SUBXri:
+      case AArch64::SUBXrr:
         return true;
+      case AArch64::ADDWrs:
+      case AArch64::ADDXrs:
+      case AArch64::ANDWrs:
+      case AArch64::ANDXrs:
+      case AArch64::SUBWrs:
+      case AArch64::SUBXrs:
+      case AArch64::BICWrs:
+      case AArch64::BICXrs:
+        // Shift value can be 0 making these behave like the "rr" variant...
+        return !hasShiftedReg(Second);
       }
     }
   }
@@ -2189,7 +2309,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
@@ -2293,7 +2413,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -2481,6 +2601,57 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
     }
   }
 
+  // Handle the case where a copy is being spilled or refilled but the source
+  // and destination register class don't match.  For example:
+  //
+  //   %vreg0<def> = COPY %XZR; GPR64common:%vreg0
+  //
+  // In this case we can still safely fold away the COPY and generate the
+  // following spill code:
+  //
+  //   STRXui %XZR, <fi#0>
+  //
+  // This also eliminates spilled cross register class COPYs (e.g. between x and
+  // d regs) of the same size.  For example:
+  //
+  //   %vreg0<def> = COPY %vreg1; GPR64:%vreg0, FPR64:%vreg1
+  //
+  // will be refilled as
+  //
+  //   LDRDui %vreg0, fi<#0>
+  //
+  // instead of
+  //
+  //   LDRXui %vregTemp, fi<#0>
+  //   %vreg0 = FMOV %vregTemp
+  //
+  if (MI.isFullCopy() && Ops.size() == 1 &&
+      // Make sure we're only folding the explicit COPY defs/uses.
+      (Ops[0] == 0 || Ops[0] == 1)) {
+    const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+    const MachineRegisterInfo &MRI = MF.getRegInfo();
+    MachineBasicBlock &MBB = *MI.getParent();
+    const MachineOperand &DstMO = MI.getOperand(0);
+    const MachineOperand &SrcMO = MI.getOperand(1);
+    unsigned DstReg = DstMO.getReg();
+    unsigned SrcReg = SrcMO.getReg();
+    auto getRegClass = [&](unsigned Reg) {
+      return TargetRegisterInfo::isVirtualRegister(Reg)
+                 ? MRI.getRegClass(Reg)
+                 : TRI.getMinimalPhysRegClass(Reg);
+    };
+    const TargetRegisterClass &DstRC = *getRegClass(DstReg);
+    const TargetRegisterClass &SrcRC = *getRegClass(SrcReg);
+    if (DstRC.getSize() == SrcRC.getSize()) {
+      if (Ops[0] == 0)
+        storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
+                            &SrcRC, &TRI);
+      else
+        loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, &DstRC, &TRI);
+      return &*--InsertPt;
+    }
+  }
+
   // Cannot fold.
   return nullptr;
 }
@@ -2829,6 +3000,8 @@ static bool isCombineInstrCandidate64(unsigned Opc) {
 // FP Opcodes that can be combined with a FMUL
 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   switch (Inst.getOpcode()) {
+  default:
+    break;
   case AArch64::FADDSrr:
   case AArch64::FADDDrr:
   case AArch64::FADDv2f32:
@@ -2839,9 +3012,9 @@ static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   case AArch64::FSUBv2f32:
   case AArch64::FSUBv2f64:
   case AArch64::FSUBv4f32:
-    return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
-  default:
-    break;
+		TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; 
+    return (Options.UnsafeFPMath || 
+				    Options.AllowFPOpFusion == FPOpFusion::Fast);
   }
   return false;
 }
@@ -3465,7 +3638,7 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
       unsigned Val = Root.getOperand(3).getImm();
       Imm = Imm << Val;
     }
-    uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
+    uint64_t UImm = SignExtend64(Imm, BitSize);
     uint64_t Encoding;
     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
       MachineInstrBuilder MIB1 =
@@ -3551,12 +3724,12 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
       RC = &AArch64::GPR64RegClass;
     }
     unsigned NewVR = MRI.createVirtualRegister(OrrRC);
-    int Imm = Root.getOperand(2).getImm();
+    uint64_t Imm = Root.getOperand(2).getImm();
     if (Root.getOperand(3).isImm()) {
       unsigned Val = Root.getOperand(3).getImm();
       Imm = Imm << Val;
     }
-    uint64_t UImm = -Imm << (64 - BitSize) >> (64 - BitSize);
+    uint64_t UImm = SignExtend64(-Imm, BitSize);
     uint64_t Encoding;
     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
       MachineInstrBuilder MIB1 =
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 24bc0e639747..90b2c0896872 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -27,7 +27,7 @@ namespace llvm {
 class AArch64Subtarget;
 class AArch64TargetMachine;
 
-class AArch64InstrInfo : public AArch64GenInstrInfo {
+class AArch64InstrInfo final : public AArch64GenInstrInfo {
   const AArch64RegisterInfo RI;
   const AArch64Subtarget &Subtarget;
 
@@ -39,7 +39,7 @@ public:
   /// always be able to get register info as well (through this method).
   const AArch64RegisterInfo &getRegisterInfo() const { return RI; }
 
-  unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   bool isAsCheapAsAMove(const MachineInstr &MI) const override;
 
@@ -87,6 +87,38 @@ public:
   /// Return true if this is an unscaled load/store.
   bool isUnscaledLdSt(MachineInstr &MI) const;
 
+  static bool isPairableLdStInst(const MachineInstr &MI) {
+    switch (MI.getOpcode()) {
+    default:
+      return false;
+    // Scaled instructions.
+    case AArch64::STRSui:
+    case AArch64::STRDui:
+    case AArch64::STRQui:
+    case AArch64::STRXui:
+    case AArch64::STRWui:
+    case AArch64::LDRSui:
+    case AArch64::LDRDui:
+    case AArch64::LDRQui:
+    case AArch64::LDRXui:
+    case AArch64::LDRWui:
+    case AArch64::LDRSWui:
+    // Unscaled instructions.
+    case AArch64::STURSi:
+    case AArch64::STURDi:
+    case AArch64::STURQi:
+    case AArch64::STURWi:
+    case AArch64::STURXi:
+    case AArch64::LDURSi:
+    case AArch64::LDURDi:
+    case AArch64::LDURQi:
+    case AArch64::LDURWi:
+    case AArch64::LDURXi:
+    case AArch64::LDURSWi:
+      return true;
+    }
+  }
+
   /// Return true if this is a load/store that can be potentially paired/merged.
   bool isCandidateToMergeOrPair(MachineInstr &MI) const;
 
@@ -101,15 +133,11 @@ public:
                                   int64_t &Offset, unsigned &Width,
                                   const TargetRegisterInfo *TRI) const;
 
-  bool enableClusterLoads() const override { return true; }
-
-  bool enableClusterStores() const override { return true; }
-
   bool shouldClusterMemOps(MachineInstr &FirstLdSt, MachineInstr &SecondLdSt,
                            unsigned NumLoads) const override;
 
-  bool shouldScheduleAdjacent(MachineInstr &First,
-                              MachineInstr &Second) const override;
+  bool shouldScheduleAdjacent(const MachineInstr &First,
+                              const MachineInstr &Second) const override;
 
   MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx,
                                          uint64_t Offset, const MDNode *Var,
@@ -141,16 +169,25 @@ public:
                         MachineBasicBlock::iterator InsertPt, int FrameIndex,
                         LiveIntervals *LIS = nullptr) const override;
 
+  /// \returns true if a branch from an instruction with opcode \p BranchOpc
+  ///  bytes is capable of jumping to a position \p BrOffset bytes away.
+  bool isBranchOffsetInRange(unsigned BranchOpc,
+                             int64_t BrOffset) const override;
+
+  MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
                        unsigned, unsigned, int &, int &, int &) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index af9ed812e6da..c5b95f282ea8 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -26,6 +26,8 @@ def HasCrypto        : Predicate<"Subtarget->hasCrypto()">,
                                  AssemblerPredicate<"FeatureCrypto", "crypto">;
 def HasCRC           : Predicate<"Subtarget->hasCRC()">,
                                  AssemblerPredicate<"FeatureCRC", "crc">;
+def HasLSE           : Predicate<"Subtarget->hasLSE()">,
+                                 AssemblerPredicate<"FeatureLSE", "lse">;
 def HasRAS           : Predicate<"Subtarget->hasRAS()">,
                                  AssemblerPredicate<"FeatureRAS", "ras">;
 def HasPerfMon       : Predicate<"Subtarget->hasPerfMon()">;
@@ -287,7 +289,9 @@ def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull>;
 def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull>;
 
 def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
+def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
 def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
+def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
 
 def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
 def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
@@ -1133,6 +1137,14 @@ def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
           (CSINCWr WZR, WZR, (i32 imm:$cc))>;
 def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
           (CSINCXr XZR, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
+          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
+          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
+def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
+          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
+def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
+          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
 def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
           (CSINVWr WZR, WZR, (i32 imm:$cc))>;
 def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
@@ -2545,8 +2557,8 @@ defm : FPToIntegerPats<fp_to_sint, ffloor, "FCVTMS">;
 defm : FPToIntegerPats<fp_to_uint, ffloor, "FCVTMU">;
 defm : FPToIntegerPats<fp_to_sint, ftrunc, "FCVTZS">;
 defm : FPToIntegerPats<fp_to_uint, ftrunc, "FCVTZU">;
-defm : FPToIntegerPats<fp_to_sint, frnd,   "FCVTAS">;
-defm : FPToIntegerPats<fp_to_uint, frnd,   "FCVTAU">;
+defm : FPToIntegerPats<fp_to_sint, fround, "FCVTAS">;
+defm : FPToIntegerPats<fp_to_uint, fround, "FCVTAU">;
 
 //===----------------------------------------------------------------------===//
 // Scaled integer to floating point conversion instructions.
@@ -2582,7 +2594,7 @@ defm FCVT : FPConversion<"fcvt">;
 defm FABS   : SingleOperandFPData<0b0001, "fabs", fabs>;
 defm FMOV   : SingleOperandFPData<0b0000, "fmov">;
 defm FNEG   : SingleOperandFPData<0b0010, "fneg", fneg>;
-defm FRINTA : SingleOperandFPData<0b1100, "frinta", frnd>;
+defm FRINTA : SingleOperandFPData<0b1100, "frinta", fround>;
 defm FRINTI : SingleOperandFPData<0b1111, "frinti", fnearbyint>;
 defm FRINTM : SingleOperandFPData<0b1010, "frintm", ffloor>;
 defm FRINTN : SingleOperandFPData<0b1000, "frintn", int_aarch64_neon_frintn>;
@@ -2788,13 +2800,13 @@ def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
 def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
                                                               (i64 4)))),
           (FCVTLv8i16 V128:$Rn)>;
-def : Pat<(v2f64 (fextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
-def : Pat<(v2f64 (fextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
+def : Pat<(v2f64 (fpextend (v2f32 V64:$Rn))), (FCVTLv2i32 V64:$Rn)>;
+def : Pat<(v2f64 (fpextend (v2f32 (extract_subvector (v4f32 V128:$Rn),
                                                     (i64 2))))),
           (FCVTLv4i32 V128:$Rn)>;
 
-def : Pat<(v4f32 (fextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
-def : Pat<(v4f32 (fextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
+def : Pat<(v4f32 (fpextend (v4f16 V64:$Rn))), (FCVTLv4i16 V64:$Rn)>;
+def : Pat<(v4f32 (fpextend (v4f16 (extract_subvector (v8f16 V128:$Rn),
                                                     (i64 4))))),
           (FCVTLv8i16 V128:$Rn)>;
 
@@ -2808,9 +2820,9 @@ def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
 def : Pat<(concat_vectors V64:$Rd,
                           (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
           (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
-def : Pat<(v2f32 (fround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
-def : Pat<(v4f16 (fround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
-def : Pat<(concat_vectors V64:$Rd, (v2f32 (fround (v2f64 V128:$Rn)))),
+def : Pat<(v2f32 (fpround (v2f64 V128:$Rn))), (FCVTNv2i32 V128:$Rn)>;
+def : Pat<(v4f16 (fpround (v4f32 V128:$Rn))), (FCVTNv4i16 V128:$Rn)>;
+def : Pat<(concat_vectors V64:$Rd, (v2f32 (fpround (v2f64 V128:$Rn)))),
           (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
 defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
 defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
@@ -2833,7 +2845,7 @@ def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
 
 defm FNEG   : SIMDTwoVectorFP<1, 1, 0b01111, "fneg", fneg>;
 defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
-defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", frnd>;
+defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", fround>;
 defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", fnearbyint>;
 defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", ffloor>;
 defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", int_aarch64_neon_frintn>;
@@ -3414,6 +3426,17 @@ def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
 def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
           (FRECPEv2f64 FPR128:$Rn)>;
 
+def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
+          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
+def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
+          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
+
 def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
           (FRECPXv1i32 FPR32:$Rn)>;
 def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
@@ -3439,6 +3462,17 @@ def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
 def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
           (FRSQRTEv2f64 FPR128:$Rn)>;
 
+def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
+          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
+def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
+          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
+def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
+          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
+def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
+          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
+def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
+          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
+
 // If an integer is about to be converted to a floating point value,
 // just load it on the floating point unit.
 // Here are the patterns for 8 and 16-bits to float.
@@ -5293,15 +5327,8 @@ def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0
 //----------------------------------------------------------------------------
 // FIXME: Like for X86, these should go in their own separate .td file.
 
-// Any instruction that defines a 32-bit result leaves the high half of the
-// register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. But any other 32-bit operation will zero-extend
-// up to 64 bits.
-// FIXME: X86 also checks for CMOV here. Do we need something similar?
 def def32 : PatLeaf<(i32 GPR32:$src), [{
-  return N->getOpcode() != ISD::TRUNCATE &&
-         N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
-         N->getOpcode() != ISD::CopyFromReg;
+  return isDef32(*N);
 }]>;
 
 // In the case of a 32-bit def that is known to implicitly zero-extend,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
new file mode 100644
index 000000000000..20de07424c53
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -0,0 +1,1161 @@
+//===- AArch64InstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for
+/// AArch64.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstructionSelector.h"
+#include "AArch64InstrInfo.h"
+#include "AArch64RegisterBankInfo.h"
+#include "AArch64RegisterInfo.h"
+#include "AArch64Subtarget.h"
+#include "AArch64TargetMachine.h"
+#include "MCTargetDesc/AArch64AddressingModes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "aarch64-isel"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+#include "AArch64GenGlobalISel.inc"
+
+AArch64InstructionSelector::AArch64InstructionSelector(
+    const AArch64TargetMachine &TM, const AArch64Subtarget &STI,
+    const AArch64RegisterBankInfo &RBI)
+  : InstructionSelector(), TM(TM), STI(STI), TII(*STI.getInstrInfo()),
+      TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+
+// FIXME: This should be target-independent, inferred from the types declared
+// for each class in the bank.
+static const TargetRegisterClass *
+getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
+                         const RegisterBankInfo &RBI) {
+  if (RB.getID() == AArch64::GPRRegBankID) {
+    if (Ty.getSizeInBits() <= 32)
+      return &AArch64::GPR32RegClass;
+    if (Ty.getSizeInBits() == 64)
+      return &AArch64::GPR64RegClass;
+    return nullptr;
+  }
+
+  if (RB.getID() == AArch64::FPRRegBankID) {
+    if (Ty.getSizeInBits() == 32)
+      return &AArch64::FPR32RegClass;
+    if (Ty.getSizeInBits() == 64)
+      return &AArch64::FPR64RegClass;
+    if (Ty.getSizeInBits() == 128)
+      return &AArch64::FPR128RegClass;
+    return nullptr;
+  }
+
+  return nullptr;
+}
+
+/// Check whether \p I is a currently unsupported binary operation:
+/// - it has an unsized type
+/// - an operand is not a vreg
+/// - all operands are not in the same bank
+/// These are checks that should someday live in the verifier, but right now,
+/// these are mostly limitations of the aarch64 selector.
+static bool unsupportedBinOp(const MachineInstr &I,
+                             const AArch64RegisterBankInfo &RBI,
+                             const MachineRegisterInfo &MRI,
+                             const AArch64RegisterInfo &TRI) {
+  LLT Ty = MRI.getType(I.getOperand(0).getReg());
+  if (!Ty.isValid()) {
+    DEBUG(dbgs() << "Generic binop register should be typed\n");
+    return true;
+  }
+
+  const RegisterBank *PrevOpBank = nullptr;
+  for (auto &MO : I.operands()) {
+    // FIXME: Support non-register operands.
+    if (!MO.isReg()) {
+      DEBUG(dbgs() << "Generic inst non-reg operands are unsupported\n");
+      return true;
+    }
+
+    // FIXME: Can generic operations have physical registers operands? If
+    // so, this will need to be taught about that, and we'll need to get the
+    // bank out of the minimal class for the register.
+    // Either way, this needs to be documented (and possibly verified).
+    if (!TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+      DEBUG(dbgs() << "Generic inst has physical register operand\n");
+      return true;
+    }
+
+    const RegisterBank *OpBank = RBI.getRegBank(MO.getReg(), MRI, TRI);
+    if (!OpBank) {
+      DEBUG(dbgs() << "Generic register has no bank or class\n");
+      return true;
+    }
+
+    if (PrevOpBank && OpBank != PrevOpBank) {
+      DEBUG(dbgs() << "Generic inst operands have different banks\n");
+      return true;
+    }
+    PrevOpBank = OpBank;
+  }
+  return false;
+}
+
+/// Select the AArch64 opcode for the basic binary operation \p GenericOpc
+/// (such as G_OR or G_ADD), appropriate for the register bank \p RegBankID
+/// and of size \p OpSize.
+/// \returns \p GenericOpc if the combination is unsupported.
+static unsigned selectBinaryOp(unsigned GenericOpc, unsigned RegBankID,
+                               unsigned OpSize) {
+  switch (RegBankID) {
+  case AArch64::GPRRegBankID:
+    if (OpSize <= 32) {
+      assert((OpSize == 32 || (GenericOpc != TargetOpcode::G_SDIV &&
+                               GenericOpc != TargetOpcode::G_UDIV &&
+                               GenericOpc != TargetOpcode::G_LSHR &&
+                               GenericOpc != TargetOpcode::G_ASHR)) &&
+             "operation should have been legalized before now");
+
+      switch (GenericOpc) {
+      case TargetOpcode::G_OR:
+        return AArch64::ORRWrr;
+      case TargetOpcode::G_XOR:
+        return AArch64::EORWrr;
+      case TargetOpcode::G_AND:
+        return AArch64::ANDWrr;
+      case TargetOpcode::G_ADD:
+        assert(OpSize != 32 && "s32 G_ADD should have been selected");
+        return AArch64::ADDWrr;
+      case TargetOpcode::G_SUB:
+        return AArch64::SUBWrr;
+      case TargetOpcode::G_SHL:
+        return AArch64::LSLVWr;
+      case TargetOpcode::G_LSHR:
+        return AArch64::LSRVWr;
+      case TargetOpcode::G_ASHR:
+        return AArch64::ASRVWr;
+      case TargetOpcode::G_SDIV:
+        return AArch64::SDIVWr;
+      case TargetOpcode::G_UDIV:
+        return AArch64::UDIVWr;
+      default:
+        return GenericOpc;
+      }
+    } else if (OpSize == 64) {
+      switch (GenericOpc) {
+      case TargetOpcode::G_OR:
+        return AArch64::ORRXrr;
+      case TargetOpcode::G_XOR:
+        return AArch64::EORXrr;
+      case TargetOpcode::G_AND:
+        return AArch64::ANDXrr;
+      case TargetOpcode::G_GEP:
+        return AArch64::ADDXrr;
+      case TargetOpcode::G_SUB:
+        return AArch64::SUBXrr;
+      case TargetOpcode::G_SHL:
+        return AArch64::LSLVXr;
+      case TargetOpcode::G_LSHR:
+        return AArch64::LSRVXr;
+      case TargetOpcode::G_ASHR:
+        return AArch64::ASRVXr;
+      case TargetOpcode::G_SDIV:
+        return AArch64::SDIVXr;
+      case TargetOpcode::G_UDIV:
+        return AArch64::UDIVXr;
+      default:
+        return GenericOpc;
+      }
+    }
+  case AArch64::FPRRegBankID:
+    switch (OpSize) {
+    case 32:
+      switch (GenericOpc) {
+      case TargetOpcode::G_FADD:
+        return AArch64::FADDSrr;
+      case TargetOpcode::G_FSUB:
+        return AArch64::FSUBSrr;
+      case TargetOpcode::G_FMUL:
+        return AArch64::FMULSrr;
+      case TargetOpcode::G_FDIV:
+        return AArch64::FDIVSrr;
+      default:
+        return GenericOpc;
+      }
+    case 64:
+      switch (GenericOpc) {
+      case TargetOpcode::G_FADD:
+        return AArch64::FADDDrr;
+      case TargetOpcode::G_FSUB:
+        return AArch64::FSUBDrr;
+      case TargetOpcode::G_FMUL:
+        return AArch64::FMULDrr;
+      case TargetOpcode::G_FDIV:
+        return AArch64::FDIVDrr;
+      case TargetOpcode::G_OR:
+        return AArch64::ORRv8i8;
+      default:
+        return GenericOpc;
+      }
+    }
+  };
+  return GenericOpc;
+}
+
+/// Select the AArch64 opcode for the G_LOAD or G_STORE operation \p GenericOpc,
+/// appropriate for the (value) register bank \p RegBankID and of memory access
+/// size \p OpSize.  This returns the variant with the base+unsigned-immediate
+/// addressing mode (e.g., LDRXui).
+/// \returns \p GenericOpc if the combination is unsupported.
+static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
+                                    unsigned OpSize) {
+  const bool isStore = GenericOpc == TargetOpcode::G_STORE;
+  switch (RegBankID) {
+  case AArch64::GPRRegBankID:
+    switch (OpSize) {
+    case 8:
+      return isStore ? AArch64::STRBBui : AArch64::LDRBBui;
+    case 16:
+      return isStore ? AArch64::STRHHui : AArch64::LDRHHui;
+    case 32:
+      return isStore ? AArch64::STRWui : AArch64::LDRWui;
+    case 64:
+      return isStore ? AArch64::STRXui : AArch64::LDRXui;
+    }
+  case AArch64::FPRRegBankID:
+    switch (OpSize) {
+    case 8:
+      return isStore ? AArch64::STRBui : AArch64::LDRBui;
+    case 16:
+      return isStore ? AArch64::STRHui : AArch64::LDRHui;
+    case 32:
+      return isStore ? AArch64::STRSui : AArch64::LDRSui;
+    case 64:
+      return isStore ? AArch64::STRDui : AArch64::LDRDui;
+    }
+  };
+  return GenericOpc;
+}
+
+static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
+                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+                       const RegisterBankInfo &RBI) {
+
+  unsigned DstReg = I.getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+    assert(I.isCopy() && "Generic operators do not allow physical registers");
+    return true;
+  }
+
+  const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
+  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+  unsigned SrcReg = I.getOperand(1).getReg();
+  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+  (void)SrcSize;
+  assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
+         "No phys reg on generic operators");
+  assert(
+      (DstSize == SrcSize ||
+       // Copies are a mean to setup initial types, the number of
+       // bits may not exactly match.
+       (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+        DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI)) ||
+       // Copies are a mean to copy bits around, as long as we are
+       // on the same register class, that's fine. Otherwise, that
+       // means we need some SUBREG_TO_REG or AND & co.
+       (((DstSize + 31) / 32 == (SrcSize + 31) / 32) && DstSize > SrcSize)) &&
+      "Copy with different width?!");
+  assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
+         "GPRs cannot get more than 64-bit width values");
+  const TargetRegisterClass *RC = nullptr;
+
+  if (RegBank.getID() == AArch64::FPRRegBankID) {
+    if (DstSize <= 32)
+      RC = &AArch64::FPR32RegClass;
+    else if (DstSize <= 64)
+      RC = &AArch64::FPR64RegClass;
+    else if (DstSize <= 128)
+      RC = &AArch64::FPR128RegClass;
+    else {
+      DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
+      return false;
+    }
+  } else {
+    assert(RegBank.getID() == AArch64::GPRRegBankID &&
+           "Bitcast for the flags?");
+    RC =
+        DstSize <= 32 ? &AArch64::GPR32allRegClass : &AArch64::GPR64allRegClass;
+  }
+
+  // No need to constrain SrcReg. It will get constrained when
+  // we hit another of its use or its defs.
+  // Copies do not have constraints.
+  if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+    DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+                 << " operand\n");
+    return false;
+  }
+  I.setDesc(TII.get(AArch64::COPY));
+  return true;
+}
+
+static unsigned selectFPConvOpc(unsigned GenericOpc, LLT DstTy, LLT SrcTy) {
+  if (!DstTy.isScalar() || !SrcTy.isScalar())
+    return GenericOpc;
+
+  const unsigned DstSize = DstTy.getSizeInBits();
+  const unsigned SrcSize = SrcTy.getSizeInBits();
+
+  switch (DstSize) {
+  case 32:
+    switch (SrcSize) {
+    case 32:
+      switch (GenericOpc) {
+      case TargetOpcode::G_SITOFP:
+        return AArch64::SCVTFUWSri;
+      case TargetOpcode::G_UITOFP:
+        return AArch64::UCVTFUWSri;
+      case TargetOpcode::G_FPTOSI:
+        return AArch64::FCVTZSUWSr;
+      case TargetOpcode::G_FPTOUI:
+        return AArch64::FCVTZUUWSr;
+      default:
+        return GenericOpc;
+      }
+    case 64:
+      switch (GenericOpc) {
+      case TargetOpcode::G_SITOFP:
+        return AArch64::SCVTFUXSri;
+      case TargetOpcode::G_UITOFP:
+        return AArch64::UCVTFUXSri;
+      case TargetOpcode::G_FPTOSI:
+        return AArch64::FCVTZSUWDr;
+      case TargetOpcode::G_FPTOUI:
+        return AArch64::FCVTZUUWDr;
+      default:
+        return GenericOpc;
+      }
+    default:
+      return GenericOpc;
+    }
+  case 64:
+    switch (SrcSize) {
+    case 32:
+      switch (GenericOpc) {
+      case TargetOpcode::G_SITOFP:
+        return AArch64::SCVTFUWDri;
+      case TargetOpcode::G_UITOFP:
+        return AArch64::UCVTFUWDri;
+      case TargetOpcode::G_FPTOSI:
+        return AArch64::FCVTZSUXSr;
+      case TargetOpcode::G_FPTOUI:
+        return AArch64::FCVTZUUXSr;
+      default:
+        return GenericOpc;
+      }
+    case 64:
+      switch (GenericOpc) {
+      case TargetOpcode::G_SITOFP:
+        return AArch64::SCVTFUXDri;
+      case TargetOpcode::G_UITOFP:
+        return AArch64::UCVTFUXDri;
+      case TargetOpcode::G_FPTOSI:
+        return AArch64::FCVTZSUXDr;
+      case TargetOpcode::G_FPTOUI:
+        return AArch64::FCVTZUUXDr;
+      default:
+        return GenericOpc;
+      }
+    default:
+      return GenericOpc;
+    }
+  default:
+    return GenericOpc;
+  };
+  return GenericOpc;
+}
+
+static AArch64CC::CondCode changeICMPPredToAArch64CC(CmpInst::Predicate P) {
+  switch (P) {
+  default:
+    llvm_unreachable("Unknown condition code!");
+  case CmpInst::ICMP_NE:
+    return AArch64CC::NE;
+  case CmpInst::ICMP_EQ:
+    return AArch64CC::EQ;
+  case CmpInst::ICMP_SGT:
+    return AArch64CC::GT;
+  case CmpInst::ICMP_SGE:
+    return AArch64CC::GE;
+  case CmpInst::ICMP_SLT:
+    return AArch64CC::LT;
+  case CmpInst::ICMP_SLE:
+    return AArch64CC::LE;
+  case CmpInst::ICMP_UGT:
+    return AArch64CC::HI;
+  case CmpInst::ICMP_UGE:
+    return AArch64CC::HS;
+  case CmpInst::ICMP_ULT:
+    return AArch64CC::LO;
+  case CmpInst::ICMP_ULE:
+    return AArch64CC::LS;
+  }
+}
+
+static void changeFCMPPredToAArch64CC(CmpInst::Predicate P,
+                                      AArch64CC::CondCode &CondCode,
+                                      AArch64CC::CondCode &CondCode2) {
+  CondCode2 = AArch64CC::AL;
+  switch (P) {
+  default:
+    llvm_unreachable("Unknown FP condition!");
+  case CmpInst::FCMP_OEQ:
+    CondCode = AArch64CC::EQ;
+    break;
+  case CmpInst::FCMP_OGT:
+    CondCode = AArch64CC::GT;
+    break;
+  case CmpInst::FCMP_OGE:
+    CondCode = AArch64CC::GE;
+    break;
+  case CmpInst::FCMP_OLT:
+    CondCode = AArch64CC::MI;
+    break;
+  case CmpInst::FCMP_OLE:
+    CondCode = AArch64CC::LS;
+    break;
+  case CmpInst::FCMP_ONE:
+    CondCode = AArch64CC::MI;
+    CondCode2 = AArch64CC::GT;
+    break;
+  case CmpInst::FCMP_ORD:
+    CondCode = AArch64CC::VC;
+    break;
+  case CmpInst::FCMP_UNO:
+    CondCode = AArch64CC::VS;
+    break;
+  case CmpInst::FCMP_UEQ:
+    CondCode = AArch64CC::EQ;
+    CondCode2 = AArch64CC::VS;
+    break;
+  case CmpInst::FCMP_UGT:
+    CondCode = AArch64CC::HI;
+    break;
+  case CmpInst::FCMP_UGE:
+    CondCode = AArch64CC::PL;
+    break;
+  case CmpInst::FCMP_ULT:
+    CondCode = AArch64CC::LT;
+    break;
+  case CmpInst::FCMP_ULE:
+    CondCode = AArch64CC::LE;
+    break;
+  case CmpInst::FCMP_UNE:
+    CondCode = AArch64CC::NE;
+    break;
+  }
+}
+
+bool AArch64InstructionSelector::select(MachineInstr &I) const {
+  assert(I.getParent() && "Instruction should be in a basic block!");
+  assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+  MachineBasicBlock &MBB = *I.getParent();
+  MachineFunction &MF = *MBB.getParent();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  unsigned Opcode = I.getOpcode();
+  if (!isPreISelGenericOpcode(I.getOpcode())) {
+    // Certain non-generic instructions also need some special handling.
+
+    if (Opcode ==  TargetOpcode::LOAD_STACK_GUARD)
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+
+    if (Opcode == TargetOpcode::PHI) {
+      const unsigned DefReg = I.getOperand(0).getReg();
+      const LLT DefTy = MRI.getType(DefReg);
+
+      const TargetRegisterClass *DefRC = nullptr;
+      if (TargetRegisterInfo::isPhysicalRegister(DefReg)) {
+        DefRC = TRI.getRegClass(DefReg);
+      } else {
+        const RegClassOrRegBank &RegClassOrBank =
+            MRI.getRegClassOrRegBank(DefReg);
+
+        DefRC = RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+        if (!DefRC) {
+          if (!DefTy.isValid()) {
+            DEBUG(dbgs() << "PHI operand has no type, not a gvreg?\n");
+            return false;
+          }
+          const RegisterBank &RB = *RegClassOrBank.get<const RegisterBank *>();
+          DefRC = getRegClassForTypeOnBank(DefTy, RB, RBI);
+          if (!DefRC) {
+            DEBUG(dbgs() << "PHI operand has unexpected size/bank\n");
+            return false;
+          }
+        }
+      }
+
+      return RBI.constrainGenericRegister(DefReg, *DefRC, MRI);
+    }
+
+    if (I.isCopy())
+      return selectCopy(I, TII, MRI, TRI, RBI);
+
+    return true;
+  }
+
+
+  if (I.getNumOperands() != I.getNumExplicitOperands()) {
+    DEBUG(dbgs() << "Generic instruction has unexpected implicit operands\n");
+    return false;
+  }
+
+  if (selectImpl(I))
+    return true;
+
+  LLT Ty =
+      I.getOperand(0).isReg() ? MRI.getType(I.getOperand(0).getReg()) : LLT{};
+
+  switch (Opcode) {
+  case TargetOpcode::G_BRCOND: {
+    if (Ty.getSizeInBits() > 32) {
+      // We shouldn't need this on AArch64, but it would be implemented as an
+      // EXTRACT_SUBREG followed by a TBNZW because TBNZX has no encoding if the
+      // bit being tested is < 32.
+      DEBUG(dbgs() << "G_BRCOND has type: " << Ty
+                   << ", expected at most 32-bits");
+      return false;
+    }
+
+    const unsigned CondReg = I.getOperand(0).getReg();
+    MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
+
+    auto MIB = BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::TBNZW))
+                   .addUse(CondReg)
+                   .addImm(/*bit offset=*/0)
+                   .addMBB(DestMBB);
+
+    I.eraseFromParent();
+    return constrainSelectedInstRegOperands(*MIB.getInstr(), TII, TRI, RBI);
+  }
+
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_CONSTANT: {
+    const bool isFP = Opcode == TargetOpcode::G_FCONSTANT;
+
+    const LLT s32 = LLT::scalar(32);
+    const LLT s64 = LLT::scalar(64);
+    const LLT p0 = LLT::pointer(0, 64);
+
+    const unsigned DefReg = I.getOperand(0).getReg();
+    const LLT DefTy = MRI.getType(DefReg);
+    const unsigned DefSize = DefTy.getSizeInBits();
+    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+    // FIXME: Redundant check, but even less readable when factored out.
+    if (isFP) {
+      if (Ty != s32 && Ty != s64) {
+        DEBUG(dbgs() << "Unable to materialize FP " << Ty
+                     << " constant, expected: " << s32 << " or " << s64
+                     << '\n');
+        return false;
+      }
+
+      if (RB.getID() != AArch64::FPRRegBankID) {
+        DEBUG(dbgs() << "Unable to materialize FP " << Ty
+                     << " constant on bank: " << RB << ", expected: FPR\n");
+        return false;
+      }
+    } else {
+      if (Ty != s32 && Ty != s64 && Ty != p0) {
+        DEBUG(dbgs() << "Unable to materialize integer " << Ty
+                     << " constant, expected: " << s32 << ", " << s64 << ", or "
+                     << p0 << '\n');
+        return false;
+      }
+
+      if (RB.getID() != AArch64::GPRRegBankID) {
+        DEBUG(dbgs() << "Unable to materialize integer " << Ty
+                     << " constant on bank: " << RB << ", expected: GPR\n");
+        return false;
+      }
+    }
+
+    const unsigned MovOpc =
+        DefSize == 32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
+
+    I.setDesc(TII.get(MovOpc));
+
+    if (isFP) {
+      const TargetRegisterClass &GPRRC =
+          DefSize == 32 ? AArch64::GPR32RegClass : AArch64::GPR64RegClass;
+      const TargetRegisterClass &FPRRC =
+          DefSize == 32 ? AArch64::FPR32RegClass : AArch64::FPR64RegClass;
+
+      const unsigned DefGPRReg = MRI.createVirtualRegister(&GPRRC);
+      MachineOperand &RegOp = I.getOperand(0);
+      RegOp.setReg(DefGPRReg);
+
+      BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
+              TII.get(AArch64::COPY))
+          .addDef(DefReg)
+          .addUse(DefGPRReg);
+
+      if (!RBI.constrainGenericRegister(DefReg, FPRRC, MRI)) {
+        DEBUG(dbgs() << "Failed to constrain G_FCONSTANT def operand\n");
+        return false;
+      }
+
+      MachineOperand &ImmOp = I.getOperand(1);
+      // FIXME: Is going through int64_t always correct?
+      ImmOp.ChangeToImmediate(
+          ImmOp.getFPImm()->getValueAPF().bitcastToAPInt().getZExtValue());
+    } else {
+      uint64_t Val = I.getOperand(1).getCImm()->getZExtValue();
+      I.getOperand(1).ChangeToImmediate(Val);
+    }
+
+    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    return true;
+  }
+
+  case TargetOpcode::G_FRAME_INDEX: {
+    // allocas and G_FRAME_INDEX are only supported in addrspace(0).
+    if (Ty != LLT::pointer(0, 64)) {
+      DEBUG(dbgs() << "G_FRAME_INDEX pointer has type: " << Ty
+            << ", expected: " << LLT::pointer(0, 64) << '\n');
+      return false;
+    }
+
+    I.setDesc(TII.get(AArch64::ADDXri));
+
+    // MOs for a #0 shifted immediate.
+    I.addOperand(MachineOperand::CreateImm(0));
+    I.addOperand(MachineOperand::CreateImm(0));
+
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
+  case TargetOpcode::G_GLOBAL_VALUE: {
+    auto GV = I.getOperand(1).getGlobal();
+    if (GV->isThreadLocal()) {
+      // FIXME: we don't support TLS yet.
+      return false;
+    }
+    unsigned char OpFlags = STI.ClassifyGlobalReference(GV, TM);
+    if (OpFlags & AArch64II::MO_GOT) {
+      I.setDesc(TII.get(AArch64::LOADgot));
+      I.getOperand(1).setTargetFlags(OpFlags);
+    } else {
+      I.setDesc(TII.get(AArch64::MOVaddr));
+      I.getOperand(1).setTargetFlags(OpFlags | AArch64II::MO_PAGE);
+      MachineInstrBuilder MIB(MF, I);
+      MIB.addGlobalAddress(GV, I.getOperand(1).getOffset(),
+                           OpFlags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
+    }
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
+  case TargetOpcode::G_LOAD:
+  case TargetOpcode::G_STORE: {
+    LLT MemTy = Ty;
+    LLT PtrTy = MRI.getType(I.getOperand(1).getReg());
+
+    if (PtrTy != LLT::pointer(0, 64)) {
+      DEBUG(dbgs() << "Load/Store pointer has type: " << PtrTy
+                   << ", expected: " << LLT::pointer(0, 64) << '\n');
+      return false;
+    }
+
+#ifndef NDEBUG
+    // Sanity-check the pointer register.
+    const unsigned PtrReg = I.getOperand(1).getReg();
+    const RegisterBank &PtrRB = *RBI.getRegBank(PtrReg, MRI, TRI);
+    assert(PtrRB.getID() == AArch64::GPRRegBankID &&
+           "Load/Store pointer operand isn't a GPR");
+    assert(MRI.getType(PtrReg).isPointer() &&
+           "Load/Store pointer operand isn't a pointer");
+#endif
+
+    const unsigned ValReg = I.getOperand(0).getReg();
+    const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
+
+    const unsigned NewOpc =
+        selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemTy.getSizeInBits());
+    if (NewOpc == I.getOpcode())
+      return false;
+
+    I.setDesc(TII.get(NewOpc));
+
+    I.addOperand(MachineOperand::CreateImm(0));
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
+  case TargetOpcode::G_MUL: {
+    // Reject the various things we don't support yet.
+    if (unsupportedBinOp(I, RBI, MRI, TRI))
+      return false;
+
+    const unsigned DefReg = I.getOperand(0).getReg();
+    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+    if (RB.getID() != AArch64::GPRRegBankID) {
+      DEBUG(dbgs() << "G_MUL on bank: " << RB << ", expected: GPR\n");
+      return false;
+    }
+
+    unsigned ZeroReg;
+    unsigned NewOpc;
+    if (Ty.isScalar() && Ty.getSizeInBits() <= 32) {
+      NewOpc = AArch64::MADDWrrr;
+      ZeroReg = AArch64::WZR;
+    } else if (Ty == LLT::scalar(64)) {
+      NewOpc = AArch64::MADDXrrr;
+      ZeroReg = AArch64::XZR;
+    } else {
+      DEBUG(dbgs() << "G_MUL has type: " << Ty << ", expected: "
+                   << LLT::scalar(32) << " or " << LLT::scalar(64) << '\n');
+      return false;
+    }
+
+    I.setDesc(TII.get(NewOpc));
+
+    I.addOperand(MachineOperand::CreateReg(ZeroReg, /*isDef=*/false));
+
+    // Now that we selected an opcode, we need to constrain the register
+    // operands to use appropriate classes.
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
+  case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FDIV:
+
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_ASHR:
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UDIV:
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_SUB:
+  case TargetOpcode::G_GEP: {
+    // Reject the various things we don't support yet.
+    if (unsupportedBinOp(I, RBI, MRI, TRI))
+      return false;
+
+    const unsigned OpSize = Ty.getSizeInBits();
+
+    const unsigned DefReg = I.getOperand(0).getReg();
+    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+    const unsigned NewOpc = selectBinaryOp(I.getOpcode(), RB.getID(), OpSize);
+    if (NewOpc == I.getOpcode())
+      return false;
+
+    I.setDesc(TII.get(NewOpc));
+    // FIXME: Should the type be always reset in setDesc?
+
+    // Now that we selected an opcode, we need to constrain the register
+    // operands to use appropriate classes.
+    return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+  }
+
+  case TargetOpcode::G_PTRTOINT:
+  case TargetOpcode::G_TRUNC: {
+    const LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+    const LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+
+    const unsigned DstReg = I.getOperand(0).getReg();
+    const unsigned SrcReg = I.getOperand(1).getReg();
+
+    const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
+    const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
+
+    if (DstRB.getID() != SrcRB.getID()) {
+      DEBUG(dbgs() << "G_TRUNC input/output on different banks\n");
+      return false;
+    }
+
+    if (DstRB.getID() == AArch64::GPRRegBankID) {
+      const TargetRegisterClass *DstRC =
+          getRegClassForTypeOnBank(DstTy, DstRB, RBI);
+      if (!DstRC)
+        return false;
+
+      const TargetRegisterClass *SrcRC =
+          getRegClassForTypeOnBank(SrcTy, SrcRB, RBI);
+      if (!SrcRC)
+        return false;
+
+      if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
+          !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
+        DEBUG(dbgs() << "Failed to constrain G_TRUNC\n");
+        return false;
+      }
+
+      if (DstRC == SrcRC) {
+        // Nothing to be done
+      } else if (DstRC == &AArch64::GPR32RegClass &&
+                 SrcRC == &AArch64::GPR64RegClass) {
+        I.getOperand(1).setSubReg(AArch64::sub_32);
+      } else {
+        return false;
+      }
+
+      I.setDesc(TII.get(TargetOpcode::COPY));
+      return true;
+    } else if (DstRB.getID() == AArch64::FPRRegBankID) {
+      if (DstTy == LLT::vector(4, 16) && SrcTy == LLT::vector(4, 32)) {
+        I.setDesc(TII.get(AArch64::XTNv4i16));
+        constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+        return true;
+      }
+    }
+
+    return false;
+  }
+
+  case TargetOpcode::G_ANYEXT: {
+    const unsigned DstReg = I.getOperand(0).getReg();
+    const unsigned SrcReg = I.getOperand(1).getReg();
+
+    const RegisterBank &RBDst = *RBI.getRegBank(DstReg, MRI, TRI);
+    if (RBDst.getID() != AArch64::GPRRegBankID) {
+      DEBUG(dbgs() << "G_ANYEXT on bank: " << RBDst << ", expected: GPR\n");
+      return false;
+    }
+
+    const RegisterBank &RBSrc = *RBI.getRegBank(SrcReg, MRI, TRI);
+    if (RBSrc.getID() != AArch64::GPRRegBankID) {
+      DEBUG(dbgs() << "G_ANYEXT on bank: " << RBSrc << ", expected: GPR\n");
+      return false;
+    }
+
+    const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+
+    if (DstSize == 0) {
+      DEBUG(dbgs() << "G_ANYEXT operand has no size, not a gvreg?\n");
+      return false;
+    }
+
+    if (DstSize != 64 && DstSize > 32) {
+      DEBUG(dbgs() << "G_ANYEXT to size: " << DstSize
+                   << ", expected: 32 or 64\n");
+      return false;
+    }
+    // At this point G_ANYEXT is just like a plain COPY, but we need
+    // to explicitly form the 64-bit value if any.
+    if (DstSize > 32) {
+      unsigned ExtSrc = MRI.createVirtualRegister(&AArch64::GPR64allRegClass);
+      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
+          .addDef(ExtSrc)
+          .addImm(0)
+          .addUse(SrcReg)
+          .addImm(AArch64::sub_32);
+      I.getOperand(1).setReg(ExtSrc);
+    }
+    return selectCopy(I, TII, MRI, TRI, RBI);
+  }
+
+  case TargetOpcode::G_ZEXT:
+  case TargetOpcode::G_SEXT: {
+    unsigned Opcode = I.getOpcode();
+    const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
+              SrcTy = MRI.getType(I.getOperand(1).getReg());
+    const bool isSigned = Opcode == TargetOpcode::G_SEXT;
+    const unsigned DefReg = I.getOperand(0).getReg();
+    const unsigned SrcReg = I.getOperand(1).getReg();
+    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+    if (RB.getID() != AArch64::GPRRegBankID) {
+      DEBUG(dbgs() << TII.getName(I.getOpcode()) << " on bank: " << RB
+                   << ", expected: GPR\n");
+      return false;
+    }
+
+    MachineInstr *ExtI;
+    if (DstTy == LLT::scalar(64)) {
+      // FIXME: Can we avoid manually doing this?
+      if (!RBI.constrainGenericRegister(SrcReg, AArch64::GPR32RegClass, MRI)) {
+        DEBUG(dbgs() << "Failed to constrain " << TII.getName(Opcode)
+                     << " operand\n");
+        return false;
+      }
+
+      const unsigned SrcXReg =
+          MRI.createVirtualRegister(&AArch64::GPR64RegClass);
+      BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::SUBREG_TO_REG))
+          .addDef(SrcXReg)
+          .addImm(0)
+          .addUse(SrcReg)
+          .addImm(AArch64::sub_32);
+
+      const unsigned NewOpc = isSigned ? AArch64::SBFMXri : AArch64::UBFMXri;
+      ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
+                 .addDef(DefReg)
+                 .addUse(SrcXReg)
+                 .addImm(0)
+                 .addImm(SrcTy.getSizeInBits() - 1);
+    } else if (DstTy.isScalar() && DstTy.getSizeInBits() <= 32) {
+      const unsigned NewOpc = isSigned ? AArch64::SBFMWri : AArch64::UBFMWri;
+      ExtI = BuildMI(MBB, I, I.getDebugLoc(), TII.get(NewOpc))
+                 .addDef(DefReg)
+                 .addUse(SrcReg)
+                 .addImm(0)
+                 .addImm(SrcTy.getSizeInBits() - 1);
+    } else {
+      return false;
+    }
+
+    constrainSelectedInstRegOperands(*ExtI, TII, TRI, RBI);
+
+    I.eraseFromParent();
+    return true;
+  }
+
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP:
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI: {
+    const LLT DstTy = MRI.getType(I.getOperand(0).getReg()),
+              SrcTy = MRI.getType(I.getOperand(1).getReg());
+    const unsigned NewOpc = selectFPConvOpc(Opcode, DstTy, SrcTy);
+    if (NewOpc == Opcode)
+      return false;
+
+    I.setDesc(TII.get(NewOpc));
+    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+
+    return true;
+  }
+
+
+  case TargetOpcode::G_INTTOPTR:
+  case TargetOpcode::G_BITCAST:
+    return selectCopy(I, TII, MRI, TRI, RBI);
+
+  case TargetOpcode::G_FPEXT: {
+    if (MRI.getType(I.getOperand(0).getReg()) != LLT::scalar(64)) {
+      DEBUG(dbgs() << "G_FPEXT to type " << Ty
+                   << ", expected: " << LLT::scalar(64) << '\n');
+      return false;
+    }
+
+    if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(32)) {
+      DEBUG(dbgs() << "G_FPEXT from type " << Ty
+                   << ", expected: " << LLT::scalar(32) << '\n');
+      return false;
+    }
+
+    const unsigned DefReg = I.getOperand(0).getReg();
+    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+    if (RB.getID() != AArch64::FPRRegBankID) {
+      DEBUG(dbgs() << "G_FPEXT on bank: " << RB << ", expected: FPR\n");
+      return false;
+    }
+
+    I.setDesc(TII.get(AArch64::FCVTDSr));
+    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+
+    return true;
+  }
+
+  case TargetOpcode::G_FPTRUNC: {
+    if (MRI.getType(I.getOperand(0).getReg()) != LLT::scalar(32)) {
+      DEBUG(dbgs() << "G_FPTRUNC to type " << Ty
+                   << ", expected: " << LLT::scalar(32) << '\n');
+      return false;
+    }
+
+    if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(64)) {
+      DEBUG(dbgs() << "G_FPTRUNC from type " << Ty
+                   << ", expected: " << LLT::scalar(64) << '\n');
+      return false;
+    }
+
+    const unsigned DefReg = I.getOperand(0).getReg();
+    const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
+
+    if (RB.getID() != AArch64::FPRRegBankID) {
+      DEBUG(dbgs() << "G_FPTRUNC on bank: " << RB << ", expected: FPR\n");
+      return false;
+    }
+
+    I.setDesc(TII.get(AArch64::FCVTSDr));
+    constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+
+    return true;
+  }
+
+  case TargetOpcode::G_SELECT: {
+    if (MRI.getType(I.getOperand(1).getReg()) != LLT::scalar(1)) {
+      DEBUG(dbgs() << "G_SELECT cond has type: " << Ty
+                   << ", expected: " << LLT::scalar(1) << '\n');
+      return false;
+    }
+
+    const unsigned CondReg = I.getOperand(1).getReg();
+    const unsigned TReg = I.getOperand(2).getReg();
+    const unsigned FReg = I.getOperand(3).getReg();
+
+    unsigned CSelOpc = 0;
+
+    if (Ty == LLT::scalar(32)) {
+      CSelOpc = AArch64::CSELWr;
+    } else if (Ty == LLT::scalar(64)) {
+      CSelOpc = AArch64::CSELXr;
+    } else {
+      return false;
+    }
+
+    MachineInstr &TstMI =
+        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ANDSWri))
+             .addDef(AArch64::WZR)
+             .addUse(CondReg)
+             .addImm(AArch64_AM::encodeLogicalImmediate(1, 32));
+
+    MachineInstr &CSelMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CSelOpc))
+                                .addDef(I.getOperand(0).getReg())
+                                .addUse(TReg)
+                                .addUse(FReg)
+                                .addImm(AArch64CC::NE);
+
+    constrainSelectedInstRegOperands(TstMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(CSelMI, TII, TRI, RBI);
+
+    I.eraseFromParent();
+    return true;
+  }
+  case TargetOpcode::G_ICMP: {
+    if (Ty != LLT::scalar(1)) {
+      DEBUG(dbgs() << "G_ICMP result has type: " << Ty
+                   << ", expected: " << LLT::scalar(1) << '\n');
+      return false;
+    }
+
+    unsigned CmpOpc = 0;
+    unsigned ZReg = 0;
+
+    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
+    if (CmpTy == LLT::scalar(32)) {
+      CmpOpc = AArch64::SUBSWrr;
+      ZReg = AArch64::WZR;
+    } else if (CmpTy == LLT::scalar(64) || CmpTy.isPointer()) {
+      CmpOpc = AArch64::SUBSXrr;
+      ZReg = AArch64::XZR;
+    } else {
+      return false;
+    }
+
+    const AArch64CC::CondCode CC = changeICMPPredToAArch64CC(
+        (CmpInst::Predicate)I.getOperand(1).getPredicate());
+
+    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+                               .addDef(ZReg)
+                               .addUse(I.getOperand(2).getReg())
+                               .addUse(I.getOperand(3).getReg());
+
+    MachineInstr &CSetMI =
+        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
+             .addDef(I.getOperand(0).getReg())
+             .addUse(AArch64::WZR)
+             .addUse(AArch64::WZR)
+             .addImm(CC);
+
+    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
+
+    I.eraseFromParent();
+    return true;
+  }
+
+  case TargetOpcode::G_FCMP: {
+    if (Ty != LLT::scalar(1)) {
+      DEBUG(dbgs() << "G_FCMP result has type: " << Ty
+                   << ", expected: " << LLT::scalar(1) << '\n');
+      return false;
+    }
+
+    unsigned CmpOpc = 0;
+    LLT CmpTy = MRI.getType(I.getOperand(2).getReg());
+    if (CmpTy == LLT::scalar(32)) {
+      CmpOpc = AArch64::FCMPSrr;
+    } else if (CmpTy == LLT::scalar(64)) {
+      CmpOpc = AArch64::FCMPDrr;
+    } else {
+      return false;
+    }
+
+    // FIXME: regbank
+
+    AArch64CC::CondCode CC1, CC2;
+    changeFCMPPredToAArch64CC(
+        (CmpInst::Predicate)I.getOperand(1).getPredicate(), CC1, CC2);
+
+    MachineInstr &CmpMI = *BuildMI(MBB, I, I.getDebugLoc(), TII.get(CmpOpc))
+                               .addUse(I.getOperand(2).getReg())
+                               .addUse(I.getOperand(3).getReg());
+
+    const unsigned DefReg = I.getOperand(0).getReg();
+    unsigned Def1Reg = DefReg;
+    if (CC2 != AArch64CC::AL)
+      Def1Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+
+    MachineInstr &CSetMI =
+        *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
+             .addDef(Def1Reg)
+             .addUse(AArch64::WZR)
+             .addUse(AArch64::WZR)
+             .addImm(CC1);
+
+    if (CC2 != AArch64CC::AL) {
+      unsigned Def2Reg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
+      MachineInstr &CSet2MI =
+          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::CSINCWr))
+               .addDef(Def2Reg)
+               .addUse(AArch64::WZR)
+               .addUse(AArch64::WZR)
+               .addImm(CC2);
+      MachineInstr &OrMI =
+          *BuildMI(MBB, I, I.getDebugLoc(), TII.get(AArch64::ORRWrr))
+               .addDef(DefReg)
+               .addUse(Def1Reg)
+               .addUse(Def2Reg);
+      constrainSelectedInstRegOperands(OrMI, TII, TRI, RBI);
+      constrainSelectedInstRegOperands(CSet2MI, TII, TRI, RBI);
+    }
+
+    constrainSelectedInstRegOperands(CmpMI, TII, TRI, RBI);
+    constrainSelectedInstRegOperands(CSetMI, TII, TRI, RBI);
+
+    I.eraseFromParent();
+    return true;
+  }
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
new file mode 100644
index 000000000000..0d44e696ac20
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.h
@@ -0,0 +1,47 @@
+//===- AArch64InstructionSelector --------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the InstructionSelector class for
+/// AArch64.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64INSTRUCTIONSELECTOR_H
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+
+namespace llvm {
+class AArch64InstrInfo;
+class AArch64RegisterBankInfo;
+class AArch64RegisterInfo;
+class AArch64Subtarget;
+class AArch64TargetMachine;
+
+class AArch64InstructionSelector : public InstructionSelector {
+public:
+  AArch64InstructionSelector(const AArch64TargetMachine &TM,
+                             const AArch64Subtarget &STI,
+                             const AArch64RegisterBankInfo &RBI);
+
+  virtual bool select(MachineInstr &I) const override;
+
+private:
+  /// tblgen-erated 'select' implementation, used as the initial selector for
+  /// the patterns that don't require complex C++.
+  bool selectImpl(MachineInstr &I) const;
+
+  const AArch64TargetMachine &TM;
+  const AArch64Subtarget &STI;
+  const AArch64InstrInfo &TII;
+  const AArch64RegisterInfo &TRI;
+  const AArch64RegisterBankInfo &RBI;
+};
+
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
new file mode 100644
index 000000000000..83f276a8161b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -0,0 +1,204 @@
+//===- AArch64LegalizerInfo.cpp ----------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for
+/// AArch64.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AArch64LegalizerInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Target/TargetOpcodes.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+AArch64LegalizerInfo::AArch64LegalizerInfo() {
+  using namespace TargetOpcode;
+  const LLT p0 = LLT::pointer(0, 64);
+  const LLT s1 = LLT::scalar(1);
+  const LLT s8 = LLT::scalar(8);
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+  const LLT s64 = LLT::scalar(64);
+  const LLT v2s32 = LLT::vector(2, 32);
+  const LLT v4s32 = LLT::vector(4, 32);
+  const LLT v2s64 = LLT::vector(2, 64);
+
+  for (auto BinOp : {G_ADD, G_SUB, G_MUL, G_AND, G_OR, G_XOR, G_SHL}) {
+    // These operations naturally get the right answer when used on
+    // GPR32, even if the actual type is narrower.
+    for (auto Ty : {s1, s8, s16, s32, s64, v2s32, v4s32, v2s64})
+      setAction({BinOp, Ty}, Legal);
+  }
+
+  setAction({G_GEP, p0}, Legal);
+  setAction({G_GEP, 1, s64}, Legal);
+
+  for (auto Ty : {s1, s8, s16, s32})
+    setAction({G_GEP, 1, Ty}, WidenScalar);
+
+  for (auto BinOp : {G_LSHR, G_ASHR, G_SDIV, G_UDIV}) {
+    for (auto Ty : {s32, s64})
+      setAction({BinOp, Ty}, Legal);
+
+    for (auto Ty : {s1, s8, s16})
+      setAction({BinOp, Ty}, WidenScalar);
+  }
+
+  for (auto BinOp : { G_SREM, G_UREM })
+    for (auto Ty : { s1, s8, s16, s32, s64 })
+      setAction({BinOp, Ty}, Lower);
+
+  for (auto Op : { G_UADDE, G_USUBE, G_SADDO, G_SSUBO, G_SMULO, G_UMULO }) {
+    for (auto Ty : { s32, s64 })
+      setAction({Op, Ty}, Legal);
+
+    setAction({Op, 1, s1}, Legal);
+  }
+
+  for (auto BinOp : {G_FADD, G_FSUB, G_FMUL, G_FDIV})
+    for (auto Ty : {s32, s64})
+      setAction({BinOp, Ty}, Legal);
+
+  setAction({G_FREM, s32}, Libcall);
+  setAction({G_FREM, s64}, Libcall);
+
+  for (auto MemOp : {G_LOAD, G_STORE}) {
+    for (auto Ty : {s8, s16, s32, s64, p0, v2s32})
+      setAction({MemOp, Ty}, Legal);
+
+    setAction({MemOp, s1}, WidenScalar);
+
+    // And everything's fine in addrspace 0.
+    setAction({MemOp, 1, p0}, Legal);
+  }
+
+  // Constants
+  for (auto Ty : {s32, s64}) {
+    setAction({TargetOpcode::G_CONSTANT, Ty}, Legal);
+    setAction({TargetOpcode::G_FCONSTANT, Ty}, Legal);
+  }
+
+  setAction({G_CONSTANT, p0}, Legal);
+
+  for (auto Ty : {s1, s8, s16})
+    setAction({TargetOpcode::G_CONSTANT, Ty}, WidenScalar);
+
+  setAction({TargetOpcode::G_FCONSTANT, s16}, WidenScalar);
+
+  setAction({G_ICMP, s1}, Legal);
+  setAction({G_ICMP, 1, s32}, Legal);
+  setAction({G_ICMP, 1, s64}, Legal);
+  setAction({G_ICMP, 1, p0}, Legal);
+
+  for (auto Ty : {s1, s8, s16}) {
+    setAction({G_ICMP, 1, Ty}, WidenScalar);
+  }
+
+  setAction({G_FCMP, s1}, Legal);
+  setAction({G_FCMP, 1, s32}, Legal);
+  setAction({G_FCMP, 1, s64}, Legal);
+
+  // Extensions
+  for (auto Ty : { s1, s8, s16, s32, s64 }) {
+    setAction({G_ZEXT, Ty}, Legal);
+    setAction({G_SEXT, Ty}, Legal);
+    setAction({G_ANYEXT, Ty}, Legal);
+  }
+
+  for (auto Ty : { s1, s8, s16, s32 }) {
+    setAction({G_ZEXT, 1, Ty}, Legal);
+    setAction({G_SEXT, 1, Ty}, Legal);
+    setAction({G_ANYEXT, 1, Ty}, Legal);
+  }
+
+  setAction({G_FPEXT, s64}, Legal);
+  setAction({G_FPEXT, 1, s32}, Legal);
+
+  // Truncations
+  for (auto Ty : { s16, s32 })
+    setAction({G_FPTRUNC, Ty}, Legal);
+
+  for (auto Ty : { s32, s64 })
+    setAction({G_FPTRUNC, 1, Ty}, Legal);
+
+  for (auto Ty : { s1, s8, s16, s32 })
+    setAction({G_TRUNC, Ty}, Legal);
+
+  for (auto Ty : { s8, s16, s32, s64 })
+    setAction({G_TRUNC, 1, Ty}, Legal);
+
+  // Conversions
+  for (auto Ty : { s1, s8, s16, s32, s64 }) {
+    setAction({G_FPTOSI, 0, Ty}, Legal);
+    setAction({G_FPTOUI, 0, Ty}, Legal);
+    setAction({G_SITOFP, 1, Ty}, Legal);
+    setAction({G_UITOFP, 1, Ty}, Legal);
+  }
+
+  for (auto Ty : { s32, s64 }) {
+    setAction({G_FPTOSI, 1, Ty}, Legal);
+    setAction({G_FPTOUI, 1, Ty}, Legal);
+    setAction({G_SITOFP, 0, Ty}, Legal);
+    setAction({G_UITOFP, 0, Ty}, Legal);
+  }
+
+  // Control-flow
+  for (auto Ty : {s1, s8, s16, s32})
+    setAction({G_BRCOND, Ty}, Legal);
+
+  // Select
+  for (auto Ty : {s1, s8, s16, s32, s64})
+    setAction({G_SELECT, Ty}, Legal);
+
+  setAction({G_SELECT, 1, s1}, Legal);
+
+  // Pointer-handling
+  setAction({G_FRAME_INDEX, p0}, Legal);
+  setAction({G_GLOBAL_VALUE, p0}, Legal);
+
+  for (auto Ty : {s1, s8, s16, s32, s64})
+    setAction({G_PTRTOINT, 0, Ty}, Legal);
+
+  setAction({G_PTRTOINT, 1, p0}, Legal);
+
+  setAction({G_INTTOPTR, 0, p0}, Legal);
+  setAction({G_INTTOPTR, 1, s64}, Legal);
+
+  // Casts for 32 and 64-bit width type are just copies.
+  for (auto Ty : {s1, s8, s16, s32, s64}) {
+    setAction({G_BITCAST, 0, Ty}, Legal);
+    setAction({G_BITCAST, 1, Ty}, Legal);
+  }
+
+  // For the sake of copying bits around, the type does not really
+  // matter as long as it fits a register.
+  for (int EltSize = 8; EltSize <= 64; EltSize *= 2) {
+    setAction({G_BITCAST, 0, LLT::vector(128/EltSize, EltSize)}, Legal);
+    setAction({G_BITCAST, 1, LLT::vector(128/EltSize, EltSize)}, Legal);
+    if (EltSize >= 64)
+      continue;
+
+    setAction({G_BITCAST, 0, LLT::vector(64/EltSize, EltSize)}, Legal);
+    setAction({G_BITCAST, 1, LLT::vector(64/EltSize, EltSize)}, Legal);
+    if (EltSize >= 32)
+      continue;
+
+    setAction({G_BITCAST, 0, LLT::vector(32/EltSize, EltSize)}, Legal);
+    setAction({G_BITCAST, 1, LLT::vector(32/EltSize, EltSize)}, Legal);
+  }
+
+  computeTables();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
new file mode 100644
index 000000000000..feacbef9f147
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LegalizerInfo.h
@@ -0,0 +1,30 @@
+//===- AArch64LegalizerInfo --------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for
+/// AArch64.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_AARCH64_AARCH64MACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// This class provides the information for the target register banks.
+class AArch64LegalizerInfo : public LegalizerInfo {
+public:
+  AArch64LegalizerInfo();
+};
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
index dd2ea6a9dbd6..dcb05601e5f4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
@@ -38,7 +38,6 @@ STATISTIC(NumPostFolded, "Number of post-index updates folded");
 STATISTIC(NumPreFolded, "Number of pre-index updates folded");
 STATISTIC(NumUnscaledPairCreated,
           "Number of load/store from unscaled generated");
-STATISTIC(NumNarrowLoadsPromoted, "Number of narrow loads promoted");
 STATISTIC(NumZeroStoresPromoted, "Number of narrow zero stores promoted");
 STATISTIC(NumLoadsFromStoresPromoted, "Number of loads from stores promoted");
 
@@ -51,14 +50,6 @@ static cl::opt<unsigned> LdStLimit("aarch64-load-store-scan-limit",
 static cl::opt<unsigned> UpdateLimit("aarch64-update-scan-limit", cl::init(100),
                                      cl::Hidden);
 
-static cl::opt<bool> EnableNarrowLdMerge("enable-narrow-ld-merge", cl::Hidden,
-                                         cl::init(false),
-                                         cl::desc("Enable narrow load merge"));
-
-namespace llvm {
-void initializeAArch64LoadStoreOptPass(PassRegistry &);
-}
-
 #define AARCH64_LOAD_STORE_OPT_NAME "AArch64 load / store optimization pass"
 
 namespace {
@@ -111,11 +102,11 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
   bool findMatchingStore(MachineBasicBlock::iterator I, unsigned Limit,
                          MachineBasicBlock::iterator &StoreI);
 
-  // Merge the two instructions indicated into a wider instruction.
+  // Merge the two instructions indicated into a wider narrow store instruction.
   MachineBasicBlock::iterator
-  mergeNarrowInsns(MachineBasicBlock::iterator I,
-                   MachineBasicBlock::iterator MergeMI,
-                   const LdStPairFlags &Flags);
+  mergeNarrowZeroStores(MachineBasicBlock::iterator I,
+                        MachineBasicBlock::iterator MergeMI,
+                        const LdStPairFlags &Flags);
 
   // Merge the two instructions indicated into a single pair-wise instruction.
   MachineBasicBlock::iterator
@@ -151,8 +142,8 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
   mergeUpdateInsn(MachineBasicBlock::iterator I,
                   MachineBasicBlock::iterator Update, bool IsPreIdx);
 
-  // Find and merge foldable ldr/str instructions.
-  bool tryToMergeLdStInst(MachineBasicBlock::iterator &MBBI);
+  // Find and merge zero store instructions.
+  bool tryToMergeZeroStInst(MachineBasicBlock::iterator &MBBI);
 
   // Find and pair ldr/str instructions.
   bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI);
@@ -160,18 +151,16 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass {
   // Find and promote load instructions which read directly from store.
   bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
 
-  bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
+  bool optimizeBlock(MachineBasicBlock &MBB, bool EnableNarrowZeroStOpt);
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
-    return AARCH64_LOAD_STORE_OPT_NAME;
-  }
+  StringRef getPassName() const override { return AARCH64_LOAD_STORE_OPT_NAME; }
 };
 char AArch64LoadStoreOpt::ID = 0;
 } // namespace
@@ -179,23 +168,6 @@ char AArch64LoadStoreOpt::ID = 0;
 INITIALIZE_PASS(AArch64LoadStoreOpt, "aarch64-ldst-opt",
                 AARCH64_LOAD_STORE_OPT_NAME, false, false)
 
-static unsigned getBitExtrOpcode(MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  default:
-    llvm_unreachable("Unexpected opcode.");
-  case AArch64::LDRBBui:
-  case AArch64::LDURBBi:
-  case AArch64::LDRHHui:
-  case AArch64::LDURHHi:
-    return AArch64::UBFMWri;
-  case AArch64::LDRSBWui:
-  case AArch64::LDURSBWi:
-  case AArch64::LDRSHWui:
-  case AArch64::LDURSHWi:
-    return AArch64::SBFMWri;
-  }
-}
-
 static bool isNarrowStore(unsigned Opc) {
   switch (Opc) {
   default:
@@ -208,30 +180,6 @@ static bool isNarrowStore(unsigned Opc) {
   }
 }
 
-static bool isNarrowLoad(unsigned Opc) {
-  switch (Opc) {
-  default:
-    return false;
-  case AArch64::LDRHHui:
-  case AArch64::LDURHHi:
-  case AArch64::LDRBBui:
-  case AArch64::LDURBBi:
-  case AArch64::LDRSHWui:
-  case AArch64::LDURSHWi:
-  case AArch64::LDRSBWui:
-  case AArch64::LDURSBWi:
-    return true;
-  }
-}
-
-static bool isNarrowLoad(MachineInstr &MI) {
-  return isNarrowLoad(MI.getOpcode());
-}
-
-static bool isNarrowLoadOrStore(unsigned Opc) {
-  return isNarrowLoad(Opc) || isNarrowStore(Opc);
-}
-
 // Scaling factor for unscaled load or store.
 static int getMemScale(MachineInstr &MI) {
   switch (MI.getOpcode()) {
@@ -323,23 +271,11 @@ static unsigned getMatchingNonSExtOpcode(unsigned Opc,
   case AArch64::STURSi:
   case AArch64::LDRSui:
   case AArch64::LDURSi:
-  case AArch64::LDRHHui:
-  case AArch64::LDURHHi:
-  case AArch64::LDRBBui:
-  case AArch64::LDURBBi:
     return Opc;
   case AArch64::LDRSWui:
     return AArch64::LDRWui;
   case AArch64::LDURSWi:
     return AArch64::LDURWi;
-  case AArch64::LDRSBWui:
-    return AArch64::LDRBBui;
-  case AArch64::LDRSHWui:
-    return AArch64::LDRHHui;
-  case AArch64::LDURSBWi:
-    return AArch64::LDURBBi;
-  case AArch64::LDURSHWi:
-    return AArch64::LDURHHi;
   }
 }
 
@@ -359,18 +295,6 @@ static unsigned getMatchingWideOpcode(unsigned Opc) {
     return AArch64::STURXi;
   case AArch64::STRWui:
     return AArch64::STRXui;
-  case AArch64::LDRHHui:
-  case AArch64::LDRSHWui:
-    return AArch64::LDRWui;
-  case AArch64::LDURHHi:
-  case AArch64::LDURSHWi:
-    return AArch64::LDURWi;
-  case AArch64::LDRBBui:
-  case AArch64::LDRSBWui:
-    return AArch64::LDRHHui;
-  case AArch64::LDURBBi:
-  case AArch64::LDURSBWi:
-    return AArch64::LDURHHi;
   }
 }
 
@@ -614,23 +538,20 @@ static bool isLdOffsetInRangeOfSt(MachineInstr &LoadInst,
          (UnscaledLdOffset + LoadSize <= (UnscaledStOffset + StoreSize));
 }
 
-static bool isPromotableZeroStoreOpcode(unsigned Opc) {
-  return isNarrowStore(Opc) || Opc == AArch64::STRWui || Opc == AArch64::STURWi;
-}
-
-static bool isPromotableZeroStoreOpcode(MachineInstr &MI) {
-  return isPromotableZeroStoreOpcode(MI.getOpcode());
-}
-
 static bool isPromotableZeroStoreInst(MachineInstr &MI) {
-  return (isPromotableZeroStoreOpcode(MI)) &&
+  unsigned Opc = MI.getOpcode();
+  return (Opc == AArch64::STRWui || Opc == AArch64::STURWi ||
+          isNarrowStore(Opc)) &&
          getLdStRegOp(MI).getReg() == AArch64::WZR;
 }
 
 MachineBasicBlock::iterator
-AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
-                                      MachineBasicBlock::iterator MergeMI,
-                                      const LdStPairFlags &Flags) {
+AArch64LoadStoreOpt::mergeNarrowZeroStores(MachineBasicBlock::iterator I,
+                                           MachineBasicBlock::iterator MergeMI,
+                                           const LdStPairFlags &Flags) {
+  assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
+         "Expected promotable zero stores.");
+
   MachineBasicBlock::iterator NextI = I;
   ++NextI;
   // If NextI is the second of the two instructions to be merged, we need
@@ -654,15 +575,12 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
       MergeForward ? getLdStBaseOp(*MergeMI) : getLdStBaseOp(*I);
 
   // Which register is Rt and which is Rt2 depends on the offset order.
-  MachineInstr *RtMI, *Rt2MI;
+  MachineInstr *RtMI;
   if (getLdStOffsetOp(*I).getImm() ==
-      getLdStOffsetOp(*MergeMI).getImm() + OffsetStride) {
+      getLdStOffsetOp(*MergeMI).getImm() + OffsetStride)
     RtMI = &*MergeMI;
-    Rt2MI = &*I;
-  } else {
+  else
     RtMI = &*I;
-    Rt2MI = &*MergeMI;
-  }
 
   int OffsetImm = getLdStOffsetOp(*RtMI).getImm();
   // Change the scaled offset from small to large type.
@@ -671,105 +589,9 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
     OffsetImm /= 2;
   }
 
+  // Construct the new instruction.
   DebugLoc DL = I->getDebugLoc();
   MachineBasicBlock *MBB = I->getParent();
-  if (isNarrowLoad(Opc)) {
-    MachineInstr *RtNewDest = &*(MergeForward ? I : MergeMI);
-    // When merging small (< 32 bit) loads for big-endian targets, the order of
-    // the component parts gets swapped.
-    if (!Subtarget->isLittleEndian())
-      std::swap(RtMI, Rt2MI);
-    // Construct the new load instruction.
-    MachineInstr *NewMemMI, *BitExtMI1, *BitExtMI2;
-    NewMemMI =
-        BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
-            .addOperand(getLdStRegOp(*RtNewDest))
-            .addOperand(BaseRegOp)
-            .addImm(OffsetImm)
-            .setMemRefs(I->mergeMemRefsWith(*MergeMI));
-    (void)NewMemMI;
-
-    DEBUG(
-        dbgs()
-        << "Creating the new load and extract. Replacing instructions:\n    ");
-    DEBUG(I->print(dbgs()));
-    DEBUG(dbgs() << "    ");
-    DEBUG(MergeMI->print(dbgs()));
-    DEBUG(dbgs() << "  with instructions:\n    ");
-    DEBUG((NewMemMI)->print(dbgs()));
-
-    int Width = getMemScale(*I) == 1 ? 8 : 16;
-    int LSBLow = 0;
-    int LSBHigh = Width;
-    int ImmsLow = LSBLow + Width - 1;
-    int ImmsHigh = LSBHigh + Width - 1;
-    MachineInstr *ExtDestMI = &*(MergeForward ? MergeMI : I);
-    if ((ExtDestMI == Rt2MI) == Subtarget->isLittleEndian()) {
-      // Create the bitfield extract for high bits.
-      BitExtMI1 =
-          BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))
-              .addOperand(getLdStRegOp(*Rt2MI))
-              .addReg(getLdStRegOp(*RtNewDest).getReg())
-              .addImm(LSBHigh)
-              .addImm(ImmsHigh);
-      // Create the bitfield extract for low bits.
-      if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
-        // For unsigned, prefer to use AND for low bits.
-        BitExtMI2 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))
-                        .addOperand(getLdStRegOp(*RtMI))
-                        .addReg(getLdStRegOp(*RtNewDest).getReg())
-                        .addImm(ImmsLow);
-      } else {
-        BitExtMI2 =
-            BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))
-                .addOperand(getLdStRegOp(*RtMI))
-                .addReg(getLdStRegOp(*RtNewDest).getReg())
-                .addImm(LSBLow)
-                .addImm(ImmsLow);
-      }
-    } else {
-      // Create the bitfield extract for low bits.
-      if (RtMI->getOpcode() == getMatchingNonSExtOpcode(RtMI->getOpcode())) {
-        // For unsigned, prefer to use AND for low bits.
-        BitExtMI1 = BuildMI(*MBB, InsertionPoint, DL, TII->get(AArch64::ANDWri))
-                        .addOperand(getLdStRegOp(*RtMI))
-                        .addReg(getLdStRegOp(*RtNewDest).getReg())
-                        .addImm(ImmsLow);
-      } else {
-        BitExtMI1 =
-            BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*RtMI)))
-                .addOperand(getLdStRegOp(*RtMI))
-                .addReg(getLdStRegOp(*RtNewDest).getReg())
-                .addImm(LSBLow)
-                .addImm(ImmsLow);
-      }
-
-      // Create the bitfield extract for high bits.
-      BitExtMI2 =
-          BuildMI(*MBB, InsertionPoint, DL, TII->get(getBitExtrOpcode(*Rt2MI)))
-              .addOperand(getLdStRegOp(*Rt2MI))
-              .addReg(getLdStRegOp(*RtNewDest).getReg())
-              .addImm(LSBHigh)
-              .addImm(ImmsHigh);
-    }
-    (void)BitExtMI1;
-    (void)BitExtMI2;
-
-    DEBUG(dbgs() << "    ");
-    DEBUG((BitExtMI1)->print(dbgs()));
-    DEBUG(dbgs() << "    ");
-    DEBUG((BitExtMI2)->print(dbgs()));
-    DEBUG(dbgs() << "\n");
-
-    // Erase the old instructions.
-    I->eraseFromParent();
-    MergeMI->eraseFromParent();
-    return NextI;
-  }
-  assert(isPromotableZeroStoreInst(*I) && isPromotableZeroStoreInst(*MergeMI) &&
-         "Expected promotable zero store");
-
-  // Construct the new instruction.
   MachineInstrBuilder MIB;
   MIB = BuildMI(*MBB, InsertionPoint, DL, TII->get(getMatchingWideOpcode(Opc)))
             .addReg(isNarrowStore(Opc) ? AArch64::WZR : AArch64::XZR)
@@ -778,7 +600,7 @@ AArch64LoadStoreOpt::mergeNarrowInsns(MachineBasicBlock::iterator I,
             .setMemRefs(I->mergeMemRefsWith(*MergeMI));
   (void)MIB;
 
-  DEBUG(dbgs() << "Creating wider load/store. Replacing instructions:\n    ");
+  DEBUG(dbgs() << "Creating wider store. Replacing instructions:\n    ");
   DEBUG(I->print(dbgs()));
   DEBUG(dbgs() << "    ");
   DEBUG(MergeMI->print(dbgs()));
@@ -945,6 +767,7 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
     // Remove the load, if the destination register of the loads is the same
     // register for stored value.
     if (StRt == LdRt && LoadSize == 8) {
+      StoreI->clearRegisterKills(StRt, TRI);
       DEBUG(dbgs() << "Remove load instruction:\n    ");
       DEBUG(LoadI->print(dbgs()));
       DEBUG(dbgs() << "\n");
@@ -1009,6 +832,8 @@ AArch64LoadStoreOpt::promoteLoadFromStore(MachineBasicBlock::iterator LoadI,
               .addImm(Imms);
     }
   }
+  StoreI->clearRegisterKills(StRt, TRI);
+
   (void)BitExtMI;
 
   DEBUG(dbgs() << "Promoting load by replacing :\n    ");
@@ -1041,8 +866,10 @@ static void trackRegDefsUses(const MachineInstr &MI, BitVector &ModifiedRegs,
     if (!Reg)
       continue;
     if (MO.isDef()) {
-      for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
-        ModifiedRegs.set(*AI);
+      // WZR/XZR are not modified even when used as a destination register.
+      if (Reg != AArch64::WZR && Reg != AArch64::XZR)
+        for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+          ModifiedRegs.set(*AI);
     } else {
       assert(MO.isUse() && "Reg operand not a def and not a use?!?");
       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
@@ -1118,8 +945,9 @@ bool AArch64LoadStoreOpt::findMatchingStore(
     --MBBI;
     MachineInstr &MI = *MBBI;
 
-    // Don't count DBG_VALUE instructions towards the search limit.
-    if (!MI.isDebugValue())
+    // Don't count transient instructions towards the search limit since there
+    // may be different numbers of them if e.g. debug information is present.
+    if (!MI.isTransient())
       ++Count;
 
     // If the load instruction reads directly from the address to which the
@@ -1184,13 +1012,14 @@ static bool areCandidatesToMergeOrPair(MachineInstr &FirstMI, MachineInstr &MI,
     return true;
   }
 
-  // If the second instruction isn't even a load/store, bail out.
+  // If the second instruction isn't even a mergable/pairable load/store, bail
+  // out.
   if (!PairIsValidLdStrOpc)
     return false;
 
-  // FIXME: We don't support merging narrow loads/stores with mixed
-  // scaled/unscaled offsets.
-  if (isNarrowLoadOrStore(OpcA) || isNarrowLoadOrStore(OpcB))
+  // FIXME: We don't support merging narrow stores with mixed scaled/unscaled
+  // offsets.
+  if (isNarrowStore(OpcA) || isNarrowStore(OpcB))
     return false;
 
   // Try to match an unscaled load/store with a scaled load/store.
@@ -1229,13 +1058,11 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I,
 
   for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
     MachineInstr &MI = *MBBI;
-    // Skip DBG_VALUE instructions. Otherwise debug info can affect the
-    // optimization by changing how far we scan.
-    if (MI.isDebugValue())
-      continue;
 
-    // Now that we know this is a real instruction, count it.
-    ++Count;
+    // Don't count transient instructions towards the search limit since there
+    // may be different numbers of them if e.g. debug information is present.
+    if (!MI.isTransient())
+      ++Count;
 
     Flags.setSExtIdx(-1);
     if (areCandidatesToMergeOrPair(FirstMI, MI, Flags, TII) &&
@@ -1505,12 +1332,11 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnForward(
   ++MBBI;
   for (unsigned Count = 0; MBBI != E && Count < Limit; ++MBBI) {
     MachineInstr &MI = *MBBI;
-    // Skip DBG_VALUE instructions.
-    if (MI.isDebugValue())
-      continue;
 
-    // Now that we know this is a real instruction, count it.
-    ++Count;
+    // Don't count transient instructions towards the search limit since there
+    // may be different numbers of them if e.g. debug information is present.
+    if (!MI.isTransient())
+      ++Count;
 
     // If we found a match, return it.
     if (isMatchingUpdateInsn(*I, MI, BaseReg, UnscaledOffset))
@@ -1559,8 +1385,9 @@ MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingUpdateInsnBackward(
     --MBBI;
     MachineInstr &MI = *MBBI;
 
-    // Don't count DBG_VALUE instructions towards the search limit.
-    if (!MI.isDebugValue())
+    // Don't count transient instructions towards the search limit since there
+    // may be different numbers of them if e.g. debug information is present.
+    if (!MI.isTransient())
       ++Count;
 
     // If we found a match, return it.
@@ -1603,37 +1430,26 @@ bool AArch64LoadStoreOpt::tryToPromoteLoadFromStore(
   return false;
 }
 
-// Find narrow loads that can be converted into a single wider load with
-// bitfield extract instructions.  Also merge adjacent zero stores into a wider
-// store.
-bool AArch64LoadStoreOpt::tryToMergeLdStInst(
+// Merge adjacent zero stores into a wider store.
+bool AArch64LoadStoreOpt::tryToMergeZeroStInst(
     MachineBasicBlock::iterator &MBBI) {
-  assert((isNarrowLoad(*MBBI) || isPromotableZeroStoreOpcode(*MBBI)) &&
-         "Expected narrow op.");
+  assert(isPromotableZeroStoreInst(*MBBI) && "Expected narrow store.");
   MachineInstr &MI = *MBBI;
   MachineBasicBlock::iterator E = MI.getParent()->end();
 
   if (!TII->isCandidateToMergeOrPair(MI))
     return false;
 
-  // For promotable zero stores, the stored value should be WZR.
-  if (isPromotableZeroStoreOpcode(MI) &&
-      getLdStRegOp(MI).getReg() != AArch64::WZR)
-    return false;
-
   // Look ahead up to LdStLimit instructions for a mergable instruction.
   LdStPairFlags Flags;
   MachineBasicBlock::iterator MergeMI =
       findMatchingInsn(MBBI, Flags, LdStLimit, /* FindNarrowMerge = */ true);
   if (MergeMI != E) {
-    if (isNarrowLoad(MI)) {
-      ++NumNarrowLoadsPromoted;
-    } else if (isPromotableZeroStoreInst(MI)) {
-      ++NumZeroStoresPromoted;
-    }
+    ++NumZeroStoresPromoted;
+
     // Keeping the iterator straight is a pain, so we let the merge routine tell
     // us what the next instruction is after it's done mucking about.
-    MBBI = mergeNarrowInsns(MBBI, MergeMI, Flags);
+    MBBI = mergeNarrowZeroStores(MBBI, MergeMI, Flags);
     return true;
   }
   return false;
@@ -1674,7 +1490,7 @@ bool AArch64LoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) {
 }
 
 bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
-                                        bool enableNarrowLdOpt) {
+                                        bool EnableNarrowZeroStOpt) {
   bool Modified = false;
   // Four tranformations to do here:
   // 1) Find loads that directly read from stores and promote them by
@@ -1713,29 +1529,21 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
     }
     }
   }
-  // 2) Find narrow loads that can be converted into a single wider load
-  //    with bitfield extract instructions.
-  //      e.g.,
-  //        ldrh w0, [x2]
-  //        ldrh w1, [x2, #2]
-  //        ; becomes
-  //        ldr w0, [x2]
-  //        ubfx w1, w0, #16, #16
-  //        and w0, w0, #ffff
-  //
-  //    Also merge adjacent zero stores into a wider store.
+  // 2) Merge adjacent zero stores into a wider store.
   //      e.g.,
   //        strh wzr, [x0]
   //        strh wzr, [x0, #2]
   //        ; becomes
   //        str wzr, [x0]
+  //      e.g.,
+  //        str wzr, [x0]
+  //        str wzr, [x0, #4]
+  //        ; becomes
+  //        str xzr, [x0]
   for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
-       enableNarrowLdOpt && MBBI != E;) {
-    MachineInstr &MI = *MBBI;
-    unsigned Opc = MI.getOpcode();
-    if (isPromotableZeroStoreOpcode(Opc) ||
-        (EnableNarrowLdMerge && isNarrowLoad(Opc))) {
-      if (tryToMergeLdStInst(MBBI)) {
+       EnableNarrowZeroStOpt && MBBI != E;) {
+    if (isPromotableZeroStoreInst(*MBBI)) {
+      if (tryToMergeZeroStInst(MBBI)) {
         Modified = true;
       } else
         ++MBBI;
@@ -1752,44 +1560,10 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB,
   //        ldp x0, x1, [x2]
   for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
        MBBI != E;) {
-    MachineInstr &MI = *MBBI;
-    switch (MI.getOpcode()) {
-    default:
-      // Just move on to the next instruction.
+    if (TII->isPairableLdStInst(*MBBI) && tryToPairLdStInst(MBBI))
+      Modified = true;
+    else
       ++MBBI;
-      break;
-    // Scaled instructions.
-    case AArch64::STRSui:
-    case AArch64::STRDui:
-    case AArch64::STRQui:
-    case AArch64::STRXui:
-    case AArch64::STRWui:
-    case AArch64::LDRSui:
-    case AArch64::LDRDui:
-    case AArch64::LDRQui:
-    case AArch64::LDRXui:
-    case AArch64::LDRWui:
-    case AArch64::LDRSWui:
-    // Unscaled instructions.
-    case AArch64::STURSi:
-    case AArch64::STURDi:
-    case AArch64::STURQi:
-    case AArch64::STURWi:
-    case AArch64::STURXi:
-    case AArch64::LDURSi:
-    case AArch64::LDURDi:
-    case AArch64::LDURQi:
-    case AArch64::LDURWi:
-    case AArch64::LDURXi:
-    case AArch64::LDURSWi: {
-      if (tryToPairLdStInst(MBBI)) {
-        Modified = true;
-        break;
-      }
-      ++MBBI;
-      break;
-    }
-    }
   }
   // 4) Find base register updates that can be merged into the load or store
   //    as a base-reg writeback.
@@ -1930,16 +1704,17 @@ bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
   UsedRegs.resize(TRI->getNumRegs());
 
   bool Modified = false;
-  bool enableNarrowLdOpt =
-    Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
+  bool enableNarrowZeroStOpt = !Subtarget->requiresStrictAlign();
   for (auto &MBB : Fn)
-    Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
+    Modified |= optimizeBlock(MBB, enableNarrowZeroStOpt);
 
   return Modified;
 }
 
-// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep
-// loads and stores near one another?
+// FIXME: Do we need/want a pre-alloc pass like ARM has to try to keep loads and
+// stores near one another?  Note: The pre-RA instruction scheduler already has
+// hooks to try and schedule pairable loads/stores together to improve pairing
+// opportunities.  Thus, pre-RA pairing pass may not be worth the effort.
 
 // FIXME: When pairing store instructions it's very possible for this pass to
 // hoist a store with a KILL marker above another use (without a KILL marker).
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
index 2b4cdf1083be..45083df7ab45 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
 extern cl::opt<bool> EnableAArch64ELFLocalDynamicTLSGeneration;
 
 AArch64MCInstLower::AArch64MCInstLower(MCContext &ctx, AsmPrinter &printer)
-    : Ctx(ctx), Printer(printer), TargetTriple(printer.getTargetTriple()) {}
+    : Ctx(ctx), Printer(printer) {}
 
 MCSymbol *
 AArch64MCInstLower::GetGlobalAddressSymbol(const MachineOperand &MO) const {
@@ -153,10 +153,11 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandELF(const MachineOperand &MO,
 
 MCOperand AArch64MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
                                                  MCSymbol *Sym) const {
-  if (TargetTriple.isOSDarwin())
+  if (Printer.TM.getTargetTriple().isOSDarwin())
     return lowerSymbolOperandDarwin(MO, Sym);
 
-  assert(TargetTriple.isOSBinFormatELF() && "Expect Darwin or ELF target");
+  assert(Printer.TM.getTargetTriple().isOSBinFormatELF() &&
+         "Expect Darwin or ELF target");
   return lowerSymbolOperandELF(MO, Sym);
 }
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 49e7767741ea..ca2860afe13d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -23,7 +23,7 @@ namespace llvm {
 
 /// AArch64FunctionInfo - This class is derived from MachineFunctionInfo and
 /// contains private AArch64-specific information for each MachineFunction.
-class AArch64FunctionInfo : public MachineFunctionInfo {
+class AArch64FunctionInfo final : public MachineFunctionInfo {
 
   /// Number of bytes of arguments this function has on the stack. If the callee
   /// is expected to restore the argument stack this should be a multiple of 16,
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
index b1e40510b2ae..8693f76d7c32 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64PromoteConstant.cpp
@@ -101,9 +101,11 @@ public:
   };
 
   static char ID;
-  AArch64PromoteConstant() : ModulePass(ID) {}
+  AArch64PromoteConstant() : ModulePass(ID) {
+    initializeAArch64PromoteConstantPass(*PassRegistry::getPassRegistry());
+  }
 
-  const char *getPassName() const override { return "AArch64 Promote Constant"; }
+  StringRef getPassName() const override { return "AArch64 Promote Constant"; }
 
   /// Iterate over the functions and promote the interesting constants into
   /// global variables with module scope.
@@ -214,10 +216,6 @@ private:
 
 char AArch64PromoteConstant::ID = 0;
 
-namespace llvm {
-void initializeAArch64PromoteConstantPass(PassRegistry &);
-}
-
 INITIALIZE_PASS_BEGIN(AArch64PromoteConstant, "aarch64-promote-const",
                       "AArch64 Promote Constant Pass", false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
index 60d8bbd260bb..8f45e6a80a36 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RedundantCopyElimination.cpp
@@ -39,10 +39,6 @@ using namespace llvm;
 
 STATISTIC(NumCopiesRemoved, "Number of copies removed.");
 
-namespace llvm {
-void initializeAArch64RedundantCopyEliminationPass(PassRegistry &);
-}
-
 namespace {
 class AArch64RedundantCopyElimination : public MachineFunctionPass {
   const MachineRegisterInfo *MRI;
@@ -50,14 +46,17 @@ class AArch64RedundantCopyElimination : public MachineFunctionPass {
 
 public:
   static char ID;
-  AArch64RedundantCopyElimination() : MachineFunctionPass(ID) {}
+  AArch64RedundantCopyElimination() : MachineFunctionPass(ID) {
+    initializeAArch64RedundantCopyEliminationPass(
+        *PassRegistry::getPassRegistry());
+  }
   bool optimizeCopy(MachineBasicBlock *MBB);
   bool runOnMachineFunction(MachineFunction &MF) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AArch64 Redundant Copy Elimination";
   }
 };
@@ -153,13 +152,11 @@ bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) {
   // CBZ/CBNZ. Conservatively mark as much as we can live.
   CompBr->clearRegisterKills(SmallestDef, TRI);
 
-  if (std::none_of(TargetRegs.begin(), TargetRegs.end(),
-                   [&](unsigned Reg) { return MBB->isLiveIn(Reg); }))
+  if (none_of(TargetRegs, [&](unsigned Reg) { return MBB->isLiveIn(Reg); }))
     MBB->addLiveIn(TargetReg);
 
   // Clear any kills of TargetReg between CompBr and the last removed COPY.
-  for (MachineInstr &MMI :
-       make_range(MBB->begin()->getIterator(), LastChange->getIterator()))
+  for (MachineInstr &MMI : make_range(MBB->begin(), LastChange))
     MMI.clearRegisterKills(SmallestDef, TRI);
 
   return true;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
index 0a1831bd9a8c..a5fd2fbdde19 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.cpp
@@ -14,11 +14,16 @@
 
 #include "AArch64RegisterBankInfo.h"
 #include "AArch64InstrInfo.h" // For XXXRegClassID.
+#include "llvm/CodeGen/LowLevelType.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBank.h"
 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 
+// This file will be TableGen'ed at some point.
+#include "AArch64GenRegisterBankInfo.def"
+
 using namespace llvm;
 
 #ifndef LLVM_BUILD_GLOBAL_ISEL
@@ -26,7 +31,16 @@ using namespace llvm;
 #endif
 
 AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
-    : RegisterBankInfo(AArch64::NumRegisterBanks) {
+    : RegisterBankInfo(AArch64::RegBanks, AArch64::NumRegisterBanks) {
+  static bool AlreadyInit = false;
+  // We have only one set of register banks, whatever the subtarget
+  // is. Therefore, the initialization of the RegBanks table should be
+  // done only once. Indeed the table of all register banks
+  // (AArch64::RegBanks) is unique in the compiler. At some point, it
+  // will get tablegen'ed and the whole constructor becomes empty.
+  if (AlreadyInit)
+    return;
+  AlreadyInit = true;
   // Initialize the GPR bank.
   createRegisterBank(AArch64::GPRRegBankID, "GPR");
   // The GPR register bank is fully defined by all the registers in
@@ -34,6 +48,8 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
   addRegBankCoverage(AArch64::GPRRegBankID, AArch64::GPR64allRegClassID, TRI);
   const RegisterBank &RBGPR = getRegBank(AArch64::GPRRegBankID);
   (void)RBGPR;
+  assert(&AArch64::GPRRegBank == &RBGPR &&
+         "The order in RegBanks is messed up");
   assert(RBGPR.covers(*TRI.getRegClass(AArch64::GPR32RegClassID)) &&
          "Subclass not added?");
   assert(RBGPR.getSize() == 64 && "GPRs should hold up to 64-bit");
@@ -45,6 +61,8 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
   addRegBankCoverage(AArch64::FPRRegBankID, AArch64::QQQQRegClassID, TRI);
   const RegisterBank &RBFPR = getRegBank(AArch64::FPRRegBankID);
   (void)RBFPR;
+  assert(&AArch64::FPRRegBank == &RBFPR &&
+         "The order in RegBanks is messed up");
   assert(RBFPR.covers(*TRI.getRegClass(AArch64::QQRegClassID)) &&
          "Subclass not added?");
   assert(RBFPR.covers(*TRI.getRegClass(AArch64::FPR64RegClassID)) &&
@@ -57,10 +75,133 @@ AArch64RegisterBankInfo::AArch64RegisterBankInfo(const TargetRegisterInfo &TRI)
   addRegBankCoverage(AArch64::CCRRegBankID, AArch64::CCRRegClassID, TRI);
   const RegisterBank &RBCCR = getRegBank(AArch64::CCRRegBankID);
   (void)RBCCR;
+  assert(&AArch64::CCRRegBank == &RBCCR &&
+         "The order in RegBanks is messed up");
   assert(RBCCR.covers(*TRI.getRegClass(AArch64::CCRRegClassID)) &&
          "Class not added?");
   assert(RBCCR.getSize() == 32 && "CCR should hold up to 32-bit");
 
+  // Check that the TableGen'ed like file is in sync we our expectations.
+  // First, the Idx.
+  assert(AArch64::PartialMappingIdx::PMI_GPR32 ==
+             AArch64::PartialMappingIdx::PMI_FirstGPR &&
+         "GPR32 index not first in the GPR list");
+  assert(AArch64::PartialMappingIdx::PMI_GPR64 ==
+             AArch64::PartialMappingIdx::PMI_LastGPR &&
+         "GPR64 index not last in the GPR list");
+  assert(AArch64::PartialMappingIdx::PMI_FirstGPR <=
+             AArch64::PartialMappingIdx::PMI_LastGPR &&
+         "GPR list is backward");
+  assert(AArch64::PartialMappingIdx::PMI_FPR32 ==
+             AArch64::PartialMappingIdx::PMI_FirstFPR &&
+         "FPR32 index not first in the FPR list");
+  assert(AArch64::PartialMappingIdx::PMI_FPR512 ==
+             AArch64::PartialMappingIdx::PMI_LastFPR &&
+         "FPR512 index not last in the FPR list");
+  assert(AArch64::PartialMappingIdx::PMI_FirstFPR <=
+             AArch64::PartialMappingIdx::PMI_LastFPR &&
+         "FPR list is backward");
+  assert(AArch64::PartialMappingIdx::PMI_FPR32 + 1 ==
+             AArch64::PartialMappingIdx::PMI_FPR64 &&
+         AArch64::PartialMappingIdx::PMI_FPR64 + 1 ==
+             AArch64::PartialMappingIdx::PMI_FPR128 &&
+         AArch64::PartialMappingIdx::PMI_FPR128 + 1 ==
+             AArch64::PartialMappingIdx::PMI_FPR256 &&
+         AArch64::PartialMappingIdx::PMI_FPR256 + 1 ==
+             AArch64::PartialMappingIdx::PMI_FPR512 &&
+         "FPR indices not properly ordered");
+// Now, the content.
+// Check partial mapping.
+#define CHECK_PARTIALMAP(Idx, ValStartIdx, ValLength, RB)                      \
+  do {                                                                         \
+    const PartialMapping &Map =                                                \
+        AArch64::PartMappings[AArch64::PartialMappingIdx::Idx -                \
+                              AArch64::PartialMappingIdx::PMI_Min];            \
+    (void)Map;                                                                 \
+    assert(Map.StartIdx == ValStartIdx && Map.Length == ValLength &&           \
+           Map.RegBank == &RB && #Idx " is incorrectly initialized");          \
+  } while (0)
+
+  CHECK_PARTIALMAP(PMI_GPR32, 0, 32, RBGPR);
+  CHECK_PARTIALMAP(PMI_GPR64, 0, 64, RBGPR);
+  CHECK_PARTIALMAP(PMI_FPR32, 0, 32, RBFPR);
+  CHECK_PARTIALMAP(PMI_FPR64, 0, 64, RBFPR);
+  CHECK_PARTIALMAP(PMI_FPR128, 0, 128, RBFPR);
+  CHECK_PARTIALMAP(PMI_FPR256, 0, 256, RBFPR);
+  CHECK_PARTIALMAP(PMI_FPR512, 0, 512, RBFPR);
+
+// Check value mapping.
+#define CHECK_VALUEMAP_IMPL(RBName, Size, Offset)                              \
+  do {                                                                         \
+    unsigned PartialMapBaseIdx =                                               \
+        AArch64::PartialMappingIdx::PMI_##RBName##Size -                       \
+        AArch64::PartialMappingIdx::PMI_Min;                                   \
+    (void)PartialMapBaseIdx;                                                   \
+    const ValueMapping &Map = AArch64::getValueMapping(                        \
+        AArch64::PartialMappingIdx::PMI_First##RBName, Size)[Offset];          \
+    (void)Map;                                                                 \
+    assert(Map.BreakDown == &AArch64::PartMappings[PartialMapBaseIdx] &&       \
+           Map.NumBreakDowns == 1 && #RBName #Size                             \
+           " " #Offset " is incorrectly initialized");                         \
+  } while (0)
+
+#define CHECK_VALUEMAP(RBName, Size) CHECK_VALUEMAP_IMPL(RBName, Size, 0)
+
+  CHECK_VALUEMAP(GPR, 32);
+  CHECK_VALUEMAP(GPR, 64);
+  CHECK_VALUEMAP(FPR, 32);
+  CHECK_VALUEMAP(FPR, 64);
+  CHECK_VALUEMAP(FPR, 128);
+  CHECK_VALUEMAP(FPR, 256);
+  CHECK_VALUEMAP(FPR, 512);
+
+// Check the value mapping for 3-operands instructions where all the operands
+// map to the same value mapping.
+#define CHECK_VALUEMAP_3OPS(RBName, Size)                                      \
+  do {                                                                         \
+    CHECK_VALUEMAP_IMPL(RBName, Size, 0);                                      \
+    CHECK_VALUEMAP_IMPL(RBName, Size, 1);                                      \
+    CHECK_VALUEMAP_IMPL(RBName, Size, 2);                                      \
+  } while (0)
+
+  CHECK_VALUEMAP_3OPS(GPR, 32);
+  CHECK_VALUEMAP_3OPS(GPR, 64);
+  CHECK_VALUEMAP_3OPS(FPR, 32);
+  CHECK_VALUEMAP_3OPS(FPR, 64);
+  CHECK_VALUEMAP_3OPS(FPR, 128);
+  CHECK_VALUEMAP_3OPS(FPR, 256);
+  CHECK_VALUEMAP_3OPS(FPR, 512);
+
+#define CHECK_VALUEMAP_CROSSREGCPY(RBNameDst, RBNameSrc, Size)                 \
+  do {                                                                         \
+    unsigned PartialMapDstIdx =                                                \
+        AArch64::PMI_##RBNameDst##Size - AArch64::PMI_Min;                     \
+    unsigned PartialMapSrcIdx =                                                \
+        AArch64::PMI_##RBNameSrc##Size - AArch64::PMI_Min;                     \
+    (void) PartialMapDstIdx;                                                   \
+    (void) PartialMapSrcIdx;                                                   \
+    const ValueMapping *Map = AArch64::getCopyMapping(                         \
+        AArch64::PMI_First##RBNameDst == AArch64::PMI_FirstGPR,                \
+        AArch64::PMI_First##RBNameSrc == AArch64::PMI_FirstGPR, Size);         \
+    (void) Map;                                                                \
+    assert(Map[0].BreakDown == &AArch64::PartMappings[PartialMapDstIdx] &&     \
+           Map[0].NumBreakDowns == 1 && #RBNameDst #Size                       \
+           " Dst is incorrectly initialized");                                 \
+    assert(Map[1].BreakDown == &AArch64::PartMappings[PartialMapSrcIdx] &&     \
+           Map[1].NumBreakDowns == 1 && #RBNameSrc #Size                       \
+           " Src is incorrectly initialized");                                 \
+                                                                               \
+  } while (0)
+
+  CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 32);
+  CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 32);
+  CHECK_VALUEMAP_CROSSREGCPY(GPR, GPR, 64);
+  CHECK_VALUEMAP_CROSSREGCPY(GPR, FPR, 64);
+  CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 32);
+  CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 32);
+  CHECK_VALUEMAP_CROSSREGCPY(FPR, FPR, 64);
+  CHECK_VALUEMAP_CROSSREGCPY(FPR, GPR, 64);
+
   assert(verify(TRI) && "Invalid register bank information");
 }
 
@@ -72,7 +213,16 @@ unsigned AArch64RegisterBankInfo::copyCost(const RegisterBank &A,
   // Will introduce other hooks for different size:
   // * extract cost.
   // * build_sequence cost.
-  // TODO: Add more accurate cost for FPR to/from GPR.
+
+  // Copy from (resp. to) GPR to (resp. from) FPR involves FMOV.
+  // FIXME: This should be deduced from the scheduling model.
+  if (&A == &AArch64::GPRRegBank && &B == &AArch64::FPRRegBank)
+    // FMOVXDr or FMOVWSr.
+    return 5;
+  if (&A == &AArch64::FPRRegBank && &B == &AArch64::GPRRegBank)
+    // FMOVDXr or FMOVSWr.
+    return 4;
+
   return RegisterBankInfo::copyCost(A, B, Size);
 }
 
@@ -116,15 +266,15 @@ const RegisterBank &AArch64RegisterBankInfo::getRegBankFromRegClass(
 RegisterBankInfo::InstructionMappings
 AArch64RegisterBankInfo::getInstrAlternativeMappings(
     const MachineInstr &MI) const {
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const TargetSubtargetInfo &STI = MF.getSubtarget();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
   switch (MI.getOpcode()) {
   case TargetOpcode::G_OR: {
     // 32 and 64-bit or can be mapped on either FPR or
     // GPR for the same cost.
-    const MachineFunction &MF = *MI.getParent()->getParent();
-    const TargetSubtargetInfo &STI = MF.getSubtarget();
-    const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
-    const MachineRegisterInfo &MRI = MF.getRegInfo();
-
     unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
     if (Size != 32 && Size != 64)
       break;
@@ -134,14 +284,81 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
     if (MI.getNumOperands() != 3)
       break;
     InstructionMappings AltMappings;
-    InstructionMapping GPRMapping(/*ID*/ 1, /*Cost*/ 1, /*NumOperands*/ 3);
-    InstructionMapping FPRMapping(/*ID*/ 2, /*Cost*/ 1, /*NumOperands*/ 3);
-    for (unsigned Idx = 0; Idx != 3; ++Idx) {
-      GPRMapping.setOperandMapping(Idx, Size,
-                                   getRegBank(AArch64::GPRRegBankID));
-      FPRMapping.setOperandMapping(Idx, Size,
-                                   getRegBank(AArch64::FPRRegBankID));
-    }
+    InstructionMapping GPRMapping(
+        /*ID*/ 1, /*Cost*/ 1,
+        AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
+        /*NumOperands*/ 3);
+    InstructionMapping FPRMapping(
+        /*ID*/ 2, /*Cost*/ 1,
+        AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
+        /*NumOperands*/ 3);
+
+    AltMappings.emplace_back(std::move(GPRMapping));
+    AltMappings.emplace_back(std::move(FPRMapping));
+    return AltMappings;
+  }
+  case TargetOpcode::G_BITCAST: {
+    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+    if (Size != 32 && Size != 64)
+      break;
+
+    // If the instruction has any implicit-defs or uses,
+    // do not mess with it.
+    if (MI.getNumOperands() != 2)
+      break;
+
+    InstructionMappings AltMappings;
+    InstructionMapping GPRMapping(
+        /*ID*/ 1, /*Cost*/ 1,
+        AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ true, Size),
+        /*NumOperands*/ 2);
+    InstructionMapping FPRMapping(
+        /*ID*/ 2, /*Cost*/ 1,
+        AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ false, Size),
+        /*NumOperands*/ 2);
+    InstructionMapping GPRToFPRMapping(
+        /*ID*/ 3,
+        /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
+        AArch64::getCopyMapping(/*DstIsGPR*/ false, /*SrcIsGPR*/ true, Size),
+        /*NumOperands*/ 2);
+    InstructionMapping FPRToGPRMapping(
+        /*ID*/ 3,
+        /*Cost*/ copyCost(AArch64::GPRRegBank, AArch64::FPRRegBank, Size),
+        AArch64::getCopyMapping(/*DstIsGPR*/ true, /*SrcIsGPR*/ false, Size),
+        /*NumOperands*/ 2);
+
+    AltMappings.emplace_back(std::move(GPRMapping));
+    AltMappings.emplace_back(std::move(FPRMapping));
+    AltMappings.emplace_back(std::move(GPRToFPRMapping));
+    AltMappings.emplace_back(std::move(FPRToGPRMapping));
+    return AltMappings;
+  }
+  case TargetOpcode::G_LOAD: {
+    unsigned Size = getSizeInBits(MI.getOperand(0).getReg(), MRI, TRI);
+    if (Size != 64)
+      break;
+
+    // If the instruction has any implicit-defs or uses,
+    // do not mess with it.
+    if (MI.getNumOperands() != 2)
+      break;
+
+    InstructionMappings AltMappings;
+    InstructionMapping GPRMapping(
+        /*ID*/ 1, /*Cost*/ 1,
+        getOperandsMapping(
+            {AArch64::getValueMapping(AArch64::PMI_FirstGPR, Size),
+             // Addresses are GPR 64-bit.
+             AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+        /*NumOperands*/ 2);
+    InstructionMapping FPRMapping(
+        /*ID*/ 2, /*Cost*/ 1,
+        getOperandsMapping(
+            {AArch64::getValueMapping(AArch64::PMI_FirstFPR, Size),
+             // Addresses are GPR 64-bit.
+             AArch64::getValueMapping(AArch64::PMI_FirstGPR, 64)}),
+        /*NumOperands*/ 2);
+
     AltMappings.emplace_back(std::move(GPRMapping));
     AltMappings.emplace_back(std::move(FPRMapping));
     return AltMappings;
@@ -155,10 +372,12 @@ AArch64RegisterBankInfo::getInstrAlternativeMappings(
 void AArch64RegisterBankInfo::applyMappingImpl(
     const OperandsMapper &OpdMapper) const {
   switch (OpdMapper.getMI().getOpcode()) {
-  case TargetOpcode::G_OR: {
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_BITCAST:
+  case TargetOpcode::G_LOAD: {
     // Those ID must match getInstrAlternativeMappings.
-    assert((OpdMapper.getInstrMapping().getID() == 1 ||
-            OpdMapper.getInstrMapping().getID() == 2) &&
+    assert((OpdMapper.getInstrMapping().getID() >= 1 &&
+            OpdMapper.getInstrMapping().getID() <= 4) &&
            "Don't know how to handle that ID");
     return applyDefaultMapping(OpdMapper);
   }
@@ -166,3 +385,193 @@ void AArch64RegisterBankInfo::applyMappingImpl(
     llvm_unreachable("Don't know how to handle that operation");
   }
 }
+
+/// Returns whether opcode \p Opc is a pre-isel generic floating-point opcode,
+/// having only floating-point operands.
+static bool isPreISelGenericFloatingPointOpcode(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FDIV:
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_FPEXT:
+  case TargetOpcode::G_FPTRUNC:
+    return true;
+  }
+  return false;
+}
+
+RegisterBankInfo::InstructionMapping
+AArch64RegisterBankInfo::getSameKindOfOperandsMapping(const MachineInstr &MI) {
+  const unsigned Opc = MI.getOpcode();
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  unsigned NumOperands = MI.getNumOperands();
+  assert(NumOperands <= 3 &&
+         "This code is for instructions with 3 or less operands");
+
+  LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+  unsigned Size = Ty.getSizeInBits();
+  bool IsFPR = Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+
+#ifndef NDEBUG
+  // Make sure all the operands are using similar size and type.
+  // Should probably be checked by the machine verifier.
+  // This code won't catch cases where the number of lanes is
+  // different between the operands.
+  // If we want to go to that level of details, it is probably
+  // best to check that the types are the same, period.
+  // Currently, we just check that the register banks are the same
+  // for each types.
+  for (unsigned Idx = 1; Idx != NumOperands; ++Idx) {
+    LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg());
+    assert(AArch64::getRegBankBaseIdxOffset(OpTy.getSizeInBits()) ==
+               AArch64::getRegBankBaseIdxOffset(Size) &&
+           "Operand has incompatible size");
+    bool OpIsFPR = OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc);
+    (void)OpIsFPR;
+    assert(IsFPR == OpIsFPR && "Operand has incompatible type");
+  }
+#endif // End NDEBUG.
+
+  AArch64::PartialMappingIdx RBIdx =
+      IsFPR ? AArch64::PMI_FirstFPR : AArch64::PMI_FirstGPR;
+
+  return InstructionMapping{DefaultMappingID, 1,
+                            AArch64::getValueMapping(RBIdx, Size), NumOperands};
+}
+
+RegisterBankInfo::InstructionMapping
+AArch64RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+  const unsigned Opc = MI.getOpcode();
+  const MachineFunction &MF = *MI.getParent()->getParent();
+  const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  // Try the default logic for non-generic instructions that are either copies
+  // or already have some operands assigned to banks.
+  if (!isPreISelGenericOpcode(Opc)) {
+    RegisterBankInfo::InstructionMapping Mapping = getInstrMappingImpl(MI);
+    if (Mapping.isValid())
+      return Mapping;
+  }
+
+  switch (Opc) {
+    // G_{F|S|U}REM are not listed because they are not legal.
+    // Arithmetic ops.
+  case TargetOpcode::G_ADD:
+  case TargetOpcode::G_SUB:
+  case TargetOpcode::G_GEP:
+  case TargetOpcode::G_MUL:
+  case TargetOpcode::G_SDIV:
+  case TargetOpcode::G_UDIV:
+    // Bitwise ops.
+  case TargetOpcode::G_AND:
+  case TargetOpcode::G_OR:
+  case TargetOpcode::G_XOR:
+    // Shifts.
+  case TargetOpcode::G_SHL:
+  case TargetOpcode::G_LSHR:
+  case TargetOpcode::G_ASHR:
+    // Floating point ops.
+  case TargetOpcode::G_FADD:
+  case TargetOpcode::G_FSUB:
+  case TargetOpcode::G_FMUL:
+  case TargetOpcode::G_FDIV:
+    return getSameKindOfOperandsMapping(MI);
+  case TargetOpcode::G_BITCAST: {
+    LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+    LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+    unsigned Size = DstTy.getSizeInBits();
+    bool DstIsGPR = !DstTy.isVector();
+    bool SrcIsGPR = !SrcTy.isVector();
+    const RegisterBank &DstRB =
+        DstIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
+    const RegisterBank &SrcRB =
+        SrcIsGPR ? AArch64::GPRRegBank : AArch64::FPRRegBank;
+    return InstructionMapping{DefaultMappingID, copyCost(DstRB, SrcRB, Size),
+                              AArch64::getCopyMapping(DstIsGPR, SrcIsGPR, Size),
+                              /*NumOperands*/ 2};
+  }
+  case TargetOpcode::G_SEQUENCE:
+    // FIXME: support this, but the generic code is really not going to do
+    // anything sane.
+    return InstructionMapping();
+  default:
+    break;
+  }
+
+  unsigned NumOperands = MI.getNumOperands();
+
+  // Track the size and bank of each register.  We don't do partial mappings.
+  SmallVector<unsigned, 4> OpSize(NumOperands);
+  SmallVector<AArch64::PartialMappingIdx, 4> OpRegBankIdx(NumOperands);
+  for (unsigned Idx = 0; Idx < NumOperands; ++Idx) {
+    auto &MO = MI.getOperand(Idx);
+    if (!MO.isReg())
+      continue;
+
+    LLT Ty = MRI.getType(MO.getReg());
+    OpSize[Idx] = Ty.getSizeInBits();
+
+    // As a top-level guess, vectors go in FPRs, scalars and pointers in GPRs.
+    // For floating-point instructions, scalars go in FPRs.
+    if (Ty.isVector() || isPreISelGenericFloatingPointOpcode(Opc))
+      OpRegBankIdx[Idx] = AArch64::PMI_FirstFPR;
+    else
+      OpRegBankIdx[Idx] = AArch64::PMI_FirstGPR;
+  }
+
+  unsigned Cost = 1;
+  // Some of the floating-point instructions have mixed GPR and FPR operands:
+  // fine-tune the computed mapping.
+  switch (Opc) {
+  case TargetOpcode::G_SITOFP:
+  case TargetOpcode::G_UITOFP: {
+    OpRegBankIdx = {AArch64::PMI_FirstFPR, AArch64::PMI_FirstGPR};
+    break;
+  }
+  case TargetOpcode::G_FPTOSI:
+  case TargetOpcode::G_FPTOUI: {
+    OpRegBankIdx = {AArch64::PMI_FirstGPR, AArch64::PMI_FirstFPR};
+    break;
+  }
+  case TargetOpcode::G_FCMP: {
+    OpRegBankIdx = {AArch64::PMI_FirstGPR,
+                    /* Predicate */ AArch64::PMI_None, AArch64::PMI_FirstFPR,
+                    AArch64::PMI_FirstFPR};
+    break;
+  }
+  case TargetOpcode::G_BITCAST: {
+    // This is going to be a cross register bank copy and this is expensive.
+    if (OpRegBankIdx[0] != OpRegBankIdx[1])
+      Cost =
+          copyCost(*AArch64::PartMappings[OpRegBankIdx[0]].RegBank,
+                   *AArch64::PartMappings[OpRegBankIdx[1]].RegBank, OpSize[0]);
+    break;
+  }
+  case TargetOpcode::G_LOAD: {
+    // Loading in vector unit is slightly more expensive.
+    // This is actually only true for the LD1R and co instructions,
+    // but anyway for the fast mode this number does not matter and
+    // for the greedy mode the cost of the cross bank copy will
+    // offset this number.
+    // FIXME: Should be derived from the scheduling model.
+    if (OpRegBankIdx[0] >= AArch64::PMI_FirstFPR)
+      Cost = 2;
+  }
+  }
+
+  // Finally construct the computed mapping.
+  RegisterBankInfo::InstructionMapping Mapping =
+      InstructionMapping{DefaultMappingID, Cost, nullptr, NumOperands};
+  SmallVector<const ValueMapping *, 8> OpdsMapping(NumOperands);
+  for (unsigned Idx = 0; Idx < NumOperands; ++Idx)
+    if (MI.getOperand(Idx).isReg())
+      OpdsMapping[Idx] =
+          AArch64::getValueMapping(OpRegBankIdx[Idx], OpSize[Idx]);
+
+  Mapping.setOperandsMapping(getOperandsMapping(OpdsMapping));
+  return Mapping;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
index 907bcfdea161..f763235049d4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterBankInfo.h
@@ -27,43 +27,40 @@ enum {
   CCRRegBankID = 2, /// Conditional register: NZCV.
   NumRegisterBanks
 };
+
+extern RegisterBank GPRRegBank;
+extern RegisterBank FPRRegBank;
+extern RegisterBank CCRRegBank;
 } // End AArch64 namespace.
 
 /// This class provides the information for the target register banks.
-class AArch64RegisterBankInfo : public RegisterBankInfo {
+class AArch64RegisterBankInfo final : public RegisterBankInfo {
   /// See RegisterBankInfo::applyMapping.
   void applyMappingImpl(const OperandsMapper &OpdMapper) const override;
 
+  /// Get an instruction mapping where all the operands map to
+  /// the same register bank and have similar size.
+  ///
+  /// \pre MI.getNumOperands() <= 3
+  ///
+  /// \return An InstructionMappings with a statically allocated
+  /// OperandsMapping.
+  static InstructionMapping
+  getSameKindOfOperandsMapping(const MachineInstr &MI);
+
 public:
   AArch64RegisterBankInfo(const TargetRegisterInfo &TRI);
-  /// Get the cost of a copy from \p B to \p A, or put differently,
-  /// get the cost of A = COPY B. Since register banks may cover
-  /// different size, \p Size specifies what will be the size in bits
-  /// that will be copied around.
-  ///
-  /// \note Since this is a copy, both registers have the same size.
+
   unsigned copyCost(const RegisterBank &A, const RegisterBank &B,
                     unsigned Size) const override;
 
-  /// Get a register bank that covers \p RC.
-  ///
-  /// \pre \p RC is a user-defined register class (as opposed as one
-  /// generated by TableGen).
-  ///
-  /// \note The mapping RC -> RegBank could be built while adding the
-  /// coverage for the register banks. However, we do not do it, because,
-  /// at least for now, we only need this information for register classes
-  /// that are used in the description of instruction. In other words,
-  /// there are just a handful of them and we do not want to waste space.
-  ///
-  /// \todo This should be TableGen'ed.
   const RegisterBank &
   getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
 
-  /// Get the alternative mappings for \p MI.
-  /// Alternative in the sense different from getInstrMapping.
   InstructionMappings
   getInstrAlternativeMappings(const MachineInstr &MI) const override;
+
+  InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
 };
 } // End llvm namespace.
 #endif
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index af867da4823d..98fad71aa18a 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -118,26 +118,27 @@ AArch64RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
-  Reserved.set(AArch64::SP);
-  Reserved.set(AArch64::XZR);
-  Reserved.set(AArch64::WSP);
-  Reserved.set(AArch64::WZR);
+  markSuperRegs(Reserved, AArch64::SP);
+  markSuperRegs(Reserved, AArch64::XZR);
+  markSuperRegs(Reserved, AArch64::WSP);
+  markSuperRegs(Reserved, AArch64::WZR);
 
   if (TFI->hasFP(MF) || TT.isOSDarwin()) {
-    Reserved.set(AArch64::FP);
-    Reserved.set(AArch64::W29);
+    markSuperRegs(Reserved, AArch64::FP);
+    markSuperRegs(Reserved, AArch64::W29);
   }
 
   if (MF.getSubtarget<AArch64Subtarget>().isX18Reserved()) {
-    Reserved.set(AArch64::X18); // Platform register
-    Reserved.set(AArch64::W18);
+    markSuperRegs(Reserved, AArch64::X18); // Platform register
+    markSuperRegs(Reserved, AArch64::W18);
   }
 
   if (hasBasePointer(MF)) {
-    Reserved.set(AArch64::X19);
-    Reserved.set(AArch64::W19);
+    markSuperRegs(Reserved, AArch64::X19);
+    markSuperRegs(Reserved, AArch64::W19);
   }
 
+  assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
 }
 
@@ -167,6 +168,10 @@ bool AArch64RegisterInfo::isReservedReg(const MachineFunction &MF,
   return false;
 }
 
+bool AArch64RegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
+  return PhysReg == AArch64::WZR || PhysReg == AArch64::XZR;
+}
+
 const TargetRegisterClass *
 AArch64RegisterInfo::getPointerRegClass(const MachineFunction &MF,
                                       unsigned Kind) const {
@@ -183,7 +188,7 @@ AArch64RegisterInfo::getCrossCopyRegClass(const TargetRegisterClass *RC) const {
 unsigned AArch64RegisterInfo::getBaseRegister() const { return AArch64::X19; }
 
 bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // In the presence of variable sized objects, if the fixed stack size is
   // large enough that referencing from the FP won't result in things being
@@ -192,7 +197,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   // Furthermore, if both variable sized objects are present, and the
   // stack needs to be dynamically re-aligned, the base pointer is the only
   // reliable way to reference the locals.
-  if (MFI->hasVarSizedObjects()) {
+  if (MFI.hasVarSizedObjects()) {
     if (needsStackRealignment(MF))
       return true;
     // Conservatively estimate whether the negative offset from the frame
@@ -202,7 +207,7 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
     // If it's wrong, we'll materialize the constant and still get to the
     // object; it's just suboptimal. Negative offsets use the unscaled
     // load/store instructions, which have a 9-bit signed immediate.
-    return MFI->getLocalFrameSize() >= 256;
+    return MFI.getLocalFrameSize() >= 256;
   }
 
   return false;
@@ -226,11 +231,11 @@ bool AArch64RegisterInfo::requiresVirtualBaseRegisters(
 
 bool
 AArch64RegisterInfo::useFPForScavengingIndex(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   // AArch64FrameLowering::resolveFrameIndexReference() can always fall back
   // to the stack pointer, so only put the emergency spill slot next to the
   // FP when there's no better way to access it (SP or base pointer).
-  return MFI->hasVarSizedObjects() && !hasBasePointer(MF);
+  return MFI.hasVarSizedObjects() && !hasBasePointer(MF);
 }
 
 bool AArch64RegisterInfo::requiresFrameIndexScavenging(
@@ -240,10 +245,10 @@ bool AArch64RegisterInfo::requiresFrameIndexScavenging(
 
 bool
 AArch64RegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack())
     return true;
-  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken();
 }
 
 /// needsFrameBaseReg - Returns true if the instruction's frame index
@@ -275,7 +280,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
   const AArch64FrameLowering *TFI = getFrameLowering(MF);
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Estimate an offset from the frame pointer.
   // Conservatively assume all GPR callee-saved registers get pushed.
@@ -285,7 +290,7 @@ bool AArch64RegisterInfo::needsFrameBaseReg(MachineInstr *MI,
   // The incoming offset is relating to the SP at the start of the function,
   // but when we access the local it'll be relative to the SP after local
   // allocation, so adjust our SP-relative offset by that allocation size.
-  Offset += MFI->getLocalFrameSize();
+  Offset += MFI.getLocalFrameSize();
   // Assume that we'll have at least some spill slots allocated.
   // FIXME: This is a total SWAG number. We should run some statistics
   //        and pick a real one.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index f33f788fd437..8ce893516fe2 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -24,8 +24,7 @@ class RegScavenger;
 class TargetRegisterClass;
 class Triple;
 
-struct AArch64RegisterInfo : public AArch64GenRegisterInfo {
-private:
+class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
   const Triple &TT;
 
 public:
@@ -36,7 +35,7 @@ public:
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
   const MCPhysReg *
-  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
 
@@ -63,6 +62,7 @@ public:
                                              CallingConv::ID) const;
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
+  bool isConstantPhysReg(unsigned PhysReg) const override;
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
                      unsigned Kind = 0) const override;
@@ -95,6 +95,10 @@ public:
 
   unsigned getRegPressureLimit(const TargetRegisterClass *RC,
                                MachineFunction &MF) const override;
+
+  bool trackLivenessAfterRegAlloc(const MachineFunction&) const override {
+    return true;
+  }
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
index 5fbaff00a5e7..7e29ee5e9baf 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td
@@ -422,7 +422,7 @@ def DD   : RegisterClass<"AArch64", [untyped], 64, (add DSeqPairs)> {
   let Size = 128;
 }
 def DDD  : RegisterClass<"AArch64", [untyped], 64, (add DSeqTriples)> {
-  let Size = 196;
+  let Size = 192;
 }
 def DDDD : RegisterClass<"AArch64", [untyped], 64, (add DSeqQuads)> {
   let Size = 256;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
index a266351f7ffc..99c48d0146e4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57.td
@@ -92,7 +92,7 @@ def : SchedAlias<WriteFCvt,  A57Write_5cyc_1V>;
 def : SchedAlias<WriteFCopy, A57Write_5cyc_1L>;
 def : SchedAlias<WriteFImm,  A57Write_3cyc_1V>;
 def : SchedAlias<WriteFMul,  A57Write_5cyc_1V>;
-def : SchedAlias<WriteFDiv,  A57Write_18cyc_1X>;
+def : SchedAlias<WriteFDiv,  A57Write_17cyc_1W>;
 def : SchedAlias<WriteV,     A57Write_3cyc_1V>;
 def : SchedAlias<WriteVLD,   A57Write_5cyc_1L>;
 def : SchedAlias<WriteVST,   A57Write_1cyc_1S>;
@@ -444,19 +444,19 @@ def : InstRW<[A57Write_5cyc_1V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f
 def : InstRW<[A57Write_5cyc_2V], (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
 
 // ASIMD FP divide, D-form, F32
-def : InstRW<[A57Write_18cyc_1X], (instregex "FDIVv2f32")>;
+def : InstRW<[A57Write_17cyc_1W], (instregex "FDIVv2f32")>;
 // ASIMD FP divide, Q-form, F32
-def : InstRW<[A57Write_36cyc_2X], (instregex "FDIVv4f32")>;
+def : InstRW<[A57Write_34cyc_2W], (instregex "FDIVv4f32")>;
 // ASIMD FP divide, Q-form, F64
-def : InstRW<[A57Write_64cyc_2X], (instregex "FDIVv2f64")>;
+def : InstRW<[A57Write_64cyc_2W], (instregex "FDIVv2f64")>;
 
 // Note: These were simply duplicated from ASIMD FDIV because of missing documentation
 // ASIMD FP square root, D-form, F32
-def : InstRW<[A57Write_18cyc_1X], (instregex "FSQRTv2f32")>;
+def : InstRW<[A57Write_17cyc_1W], (instregex "FSQRTv2f32")>;
 // ASIMD FP square root, Q-form, F32
-def : InstRW<[A57Write_36cyc_2X], (instregex "FSQRTv4f32")>;
+def : InstRW<[A57Write_34cyc_2W], (instregex "FSQRTv4f32")>;
 // ASIMD FP square root, Q-form, F64
-def : InstRW<[A57Write_64cyc_2X], (instregex "FSQRTv2f64")>;
+def : InstRW<[A57Write_64cyc_2W], (instregex "FSQRTv2f64")>;
 
 // ASIMD FP max/min, normal, D-form
 def : InstRW<[A57Write_5cyc_1V], (instregex "^(FMAX|FMIN)(NM)?(v2f32)")>;
@@ -551,15 +551,15 @@ def : InstRW<[A57WriteFPMA, A57ReadFPM, A57ReadFPM, A57ReadFPMA5], (instregex "^
 def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>;
 def : InstRW<[A57Write_10cyc_1L_1V], (instregex "^[SU]CVTF")>;
 
-def : InstRW<[A57Write_32cyc_1X], (instrs FDIVDrr)>;
-def : InstRW<[A57Write_18cyc_1X], (instrs FDIVSrr)>;
+def : InstRW<[A57Write_32cyc_1W], (instrs FDIVDrr)>;
+def : InstRW<[A57Write_17cyc_1W], (instrs FDIVSrr)>;
 
 def : InstRW<[A57Write_5cyc_1V], (instregex "^F(MAX|MIN).+rr")>;
 
 def : InstRW<[A57Write_5cyc_1V], (instregex "^FRINT.+r")>;
 
-def : InstRW<[A57Write_32cyc_1X], (instrs FSQRTDr)>;
-def : InstRW<[A57Write_18cyc_1X], (instrs FSQRTSr)>;
+def : InstRW<[A57Write_32cyc_1W], (instrs FSQRTDr)>;
+def : InstRW<[A57Write_17cyc_1W], (instrs FSQRTSr)>;
 
 def : InstRW<[A57Write_5cyc_1L, WriteLDHi], (instrs LDNPDi)>;
 def : InstRW<[A57Write_6cyc_2L, WriteLDHi], (instrs LDNPQi)>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
index 6f30108a23e3..55005e1d9ed1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedA57WriteRes.td
@@ -28,15 +28,15 @@ def A57Write_5cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 5;  }
 def A57Write_5cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 5;  }
 def A57Write_5cyc_1W  : SchedWriteRes<[A57UnitW]> { let Latency = 5;  }
 def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
-def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
-                                                    let ResourceCycles = [18]; }
+def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
+                                                    let ResourceCycles = [17]; }
 def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
                                                     let ResourceCycles = [19]; }
 def A57Write_1cyc_1B  : SchedWriteRes<[A57UnitB]> { let Latency = 1;  }
 def A57Write_1cyc_1I  : SchedWriteRes<[A57UnitI]> { let Latency = 1;  }
 def A57Write_1cyc_1S  : SchedWriteRes<[A57UnitS]> { let Latency = 1;  }
 def A57Write_2cyc_1M  : SchedWriteRes<[A57UnitM]> { let Latency = 2;  }
-def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
+def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
                                                     let ResourceCycles = [32]; }
 def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
                                                     let ResourceCycles = [35]; }
@@ -54,7 +54,7 @@ def A57Write_6cyc_1V  : SchedWriteRes<[A57UnitV]> { let Latency = 6;  }
 //===----------------------------------------------------------------------===//
 // Define Generic 2 micro-op types
 
-def A57Write_64cyc_2X    : SchedWriteRes<[A57UnitX, A57UnitX]> {
+def A57Write_64cyc_2W    : SchedWriteRes<[A57UnitW, A57UnitW]> {
   let Latency     = 64;
   let NumMicroOps = 2;
   let ResourceCycles = [32, 32];
@@ -139,10 +139,10 @@ def A57Write_2cyc_2V     : SchedWriteRes<[A57UnitV, A57UnitV]> {
   let Latency     = 2;
   let NumMicroOps = 2;
 }
-def A57Write_36cyc_2X    : SchedWriteRes<[A57UnitX, A57UnitX]> {
-  let Latency     = 36;
+def A57Write_34cyc_2W    : SchedWriteRes<[A57UnitW, A57UnitW]> {
+  let Latency     = 34;
   let NumMicroOps = 2;
-  let ResourceCycles = [18, 18];
+  let ResourceCycles = [17, 17];
 }
 def A57Write_3cyc_1I_1M  : SchedWriteRes<[A57UnitI,
                                           A57UnitM]> {
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
new file mode 100644
index 000000000000..19a6d6f2a1ad
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedFalkor.td
@@ -0,0 +1,26 @@
+//==- AArch64SchedFalkor.td - Falkor Scheduling Definitions -*- tablegen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Qualcomm Falkor to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define the SchedMachineModel and provide basic properties for coarse grained
+// instruction cost model.
+
+def FalkorModel : SchedMachineModel {
+  let IssueWidth = 4;          // 4-wide issue for expanded uops.
+  let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer.
+  let LoopMicroOpBufferSize = 16;
+  let LoadLatency = 3;         // Optimistic load latency.
+  let MispredictPenalty = 11;  // Minimum branch misprediction penalty.
+  let CompleteModel = 0;
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
index 2288b8dfc223..14d6891253fa 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedM1.td
@@ -32,33 +32,31 @@ def ExynosM1Model : SchedMachineModel {
 
 def M1UnitA  : ProcResource<2>; // Simple integer
 def M1UnitC  : ProcResource<1>; // Simple and complex integer
+def M1UnitD  : ProcResource<1>; // Integer division (inside C, serialized)
 def M1UnitB  : ProcResource<2>; // Branch
 def M1UnitL  : ProcResource<1>; // Load
 def M1UnitS  : ProcResource<1>; // Store
 def M1PipeF0 : ProcResource<1>; // FP #0
-def M1PipeF1 : ProcResource<1>; // FP #1
-
 let Super = M1PipeF0 in {
   def M1UnitFMAC   : ProcResource<1>; // FP multiplication
-  def M1UnitFCVT   : ProcResource<1>; // FP conversion
-  def M1UnitNAL0   : ProcResource<1>; // Simple vector.
+  def M1UnitNAL0   : ProcResource<1>; // Simple vector
   def M1UnitNMISC  : ProcResource<1>; // Miscellanea
+  def M1UnitFCVT   : ProcResource<1>; // FP conversion
   def M1UnitNCRYPT : ProcResource<1>; // Cryptographic
 }
-
+def M1PipeF1 : ProcResource<1>; // FP #1
 let Super = M1PipeF1 in {
   def M1UnitFADD : ProcResource<1>; // Simple FP
-  let BufferSize = 1 in
+  def M1UnitNAL1 : ProcResource<1>; // Simple vector
   def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized)
-  def M1UnitNAL1 : ProcResource<1>; // Simple vector.
   def M1UnitFST  : ProcResource<1>; // FP store
 }
 
 let SchedModel = ExynosM1Model in {
   def M1UnitALU  : ProcResGroup<[M1UnitA,
-                                 M1UnitC]>;    // All simple integer.
+                                 M1UnitC]>;    // All integer
   def M1UnitNALU : ProcResGroup<[M1UnitNAL0,
-                                 M1UnitNAL1]>; // All simple vector.
+                                 M1UnitNAL1]>; // All simple vector
 }
 
 let SchedModel = ExynosM1Model in {
@@ -66,11 +64,33 @@ let SchedModel = ExynosM1Model in {
 //===----------------------------------------------------------------------===//
 // Coarse scheduling model for the Exynos-M1.
 
+def M1WriteA1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; }
+def M1WriteA2 : SchedWriteRes<[M1UnitALU]> { let Latency = 2; }
+def M1WriteC1 : SchedWriteRes<[M1UnitC]>   { let Latency = 1; }
+def M1WriteC2 : SchedWriteRes<[M1UnitC]>   { let Latency = 2; }
+
+def M1WriteB1 : SchedWriteRes<[M1UnitB]>      { let Latency = 1; }
+
+def M1WriteL5 : SchedWriteRes<[M1UnitL]>   { let Latency = 5; }
+def M1WriteLA : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteL5,
+                                                            M1WriteA1]>,
+                                   SchedVar<NoSchedPred,   [M1WriteL5]>]>;
+
+def M1WriteS1 : SchedWriteRes<[M1UnitS]> { let Latency = 1; }
+def M1WriteS2 : SchedWriteRes<[M1UnitS]> { let Latency = 2; }
+def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; }
+def M1WriteSA : SchedWriteVariant<[SchedVar<ScaledIdxPred, [M1WriteS2,
+                                                            M1WriteA1]>,
+                                   SchedVar<NoSchedPred,   [M1WriteS1]>]>;
+
+def M1ReadAdrBase : SchedReadVariant<[SchedVar<ScaledIdxPred, [ReadDefault]>,
+                                      SchedVar<NoSchedPred,   [ReadDefault]>]>;
+def : SchedAlias<ReadAdrBase, M1ReadAdrBase>;
+
 // Branch instructions.
-// TODO: Non-conditional direct branches take zero cycles and units.
+// NOTE: Unconditional direct branches actually take neither cycles nor units.
 def : WriteRes<WriteBr,    [M1UnitB]> { let Latency = 1; }
 def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; }
-// TODO: Branch and link is much different.
 
 // Arithmetic and logical integer instructions.
 def : WriteRes<WriteI,     [M1UnitALU]> { let Latency = 1; }
@@ -83,9 +103,12 @@ def : WriteRes<WriteIS,    [M1UnitALU]> { let Latency = 1; }
 def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; }
 
 // Divide and multiply instructions.
-// TODO: Division blocks the divider inside C.
-def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; }
-def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; }
+def : WriteRes<WriteID32, [M1UnitC,
+                           M1UnitD]> { let Latency = 13;
+                                       let ResourceCycles = [1, 13]; }
+def : WriteRes<WriteID64, [M1UnitC,
+                           M1UnitD]> { let Latency = 21;
+                                       let ResourceCycles = [1, 21]; }
 // TODO: Long multiplication take 5 cycles and also the ALU.
 // TODO: Multiplication with accumulation can be advanced.
 def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; }
@@ -101,31 +124,27 @@ def : WriteRes<WriteAdr, []> { let Latency = 0; }
 
 // Load instructions.
 def : WriteRes<WriteLD,    [M1UnitL]>   { let Latency = 4; }
-// TODO: Extended address requires also the ALU.
-def : WriteRes<WriteLDIdx, [M1UnitL]>   { let Latency = 5; }
 def : WriteRes<WriteLDHi,  [M1UnitALU]> { let Latency = 4; }
+def : SchedAlias<WriteLDIdx, M1WriteLA>;
 
 // Store instructions.
 def : WriteRes<WriteST,    [M1UnitS]> { let Latency = 1; }
-// TODO: Extended address requires also the ALU.
-def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; }
 def : WriteRes<WriteSTP,   [M1UnitS]> { let Latency = 1; }
 def : WriteRes<WriteSTX,   [M1UnitS]> { let Latency = 1; }
+def : SchedAlias<WriteSTIdx, M1WriteSA>;
 
 // FP data instructions.
 def : WriteRes<WriteF,    [M1UnitFADD]>  { let Latency = 3; }
 // TODO: FCCMP is much different.
 def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; }
-// TODO: DP takes longer.
-def : WriteRes<WriteFDiv, [M1UnitFVAR]>  { let Latency = 15; }
-// TODO: MACC takes longer.
+def : WriteRes<WriteFDiv, [M1UnitFVAR]>  { let Latency = 15;
+                                           let ResourceCycles = [15]; }
 def : WriteRes<WriteFMul, [M1UnitFMAC]>  { let Latency = 4; }
 
 // FP miscellaneous instructions.
 // TODO: Conversion between register files is much different.
 def : WriteRes<WriteFCvt,  [M1UnitFCVT]> { let Latency = 3; }
 def : WriteRes<WriteFImm,  [M1UnitNALU]> { let Latency = 1; }
-// TODO: Copy from FPR to GPR is much different.
 def : WriteRes<WriteFCopy, [M1UnitS]>    { let Latency = 4; }
 
 // FP load instructions.
@@ -137,7 +156,6 @@ def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; }
 def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; }
 
 // ASIMD FP instructions.
-// TODO: Other operations are much different.
 def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; }
 
 // Other miscellaneous instructions.
@@ -191,24 +209,15 @@ def M1WriteNEONJ   : SchedWriteRes<[M1UnitNMISC,
                                     M1UnitFMAC]>   { let Latency = 6; }
 def M1WriteNEONK   : SchedWriteRes<[M1UnitNMISC,
                                     M1UnitFMAC]>   { let Latency = 7; }
-def M1WriteALU1    : SchedWriteRes<[M1UnitALU]>    { let Latency = 1; }
-def M1WriteB       : SchedWriteRes<[M1UnitB]>      { let Latency = 1; }
-// FIXME: This is the worst case, conditional branch and link.
-def M1WriteBL      : SchedWriteRes<[M1UnitB,
-                                    M1UnitALU]>    { let Latency = 1; }
-// FIXME: This is the worst case, when using LR.
-def M1WriteBLR     : SchedWriteRes<[M1UnitB,
-                                    M1UnitALU,
-                                    M1UnitALU]>    { let Latency = 2; }
-def M1WriteC1      : SchedWriteRes<[M1UnitC]>      { let Latency = 1; }
-def M1WriteC2      : SchedWriteRes<[M1UnitC]>      { let Latency = 2; }
 def M1WriteFADD3   : SchedWriteRes<[M1UnitFADD]>   { let Latency = 3; }
 def M1WriteFCVT3   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 3; }
 def M1WriteFCVT4   : SchedWriteRes<[M1UnitFCVT]>   { let Latency = 4; }
 def M1WriteFMAC4   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 4; }
 def M1WriteFMAC5   : SchedWriteRes<[M1UnitFMAC]>   { let Latency = 5; }
-def M1WriteFVAR15  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 15; }
-def M1WriteFVAR23  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 23; }
+def M1WriteFVAR15  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 15;
+                                                     let ResourceCycles = [15]; }
+def M1WriteFVAR23  : SchedWriteRes<[M1UnitFVAR]>   { let Latency = 23;
+                                                     let ResourceCycles = [23]; }
 def M1WriteNALU1   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 1; }
 def M1WriteNALU2   : SchedWriteRes<[M1UnitNALU]>   { let Latency = 2; }
 def M1WriteNAL11   : SchedWriteRes<[M1UnitNAL1]>   { let Latency = 1; }
@@ -220,19 +229,22 @@ def M1WriteNMISC1  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 1; }
 def M1WriteNMISC2  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 2; }
 def M1WriteNMISC3  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 3; }
 def M1WriteNMISC4  : SchedWriteRes<[M1UnitNMISC]>  { let Latency = 4; }
-def M1WriteS4      : SchedWriteRes<[M1UnitS]>      { let Latency = 4; }
 def M1WriteTB      : SchedWriteRes<[M1UnitC,
                                     M1UnitALU]>    { let Latency = 2; }
 
 // Branch instructions
-def : InstRW<[M1WriteB ],  (instrs Bcc)>;
-def : InstRW<[M1WriteBL],  (instrs BL)>;
-def : InstRW<[M1WriteBLR], (instrs BLR)>;
-def : InstRW<[M1WriteC1],  (instregex "^CBN?Z[WX]")>;
-def : InstRW<[M1WriteTB],  (instregex "^TBN?Z[WX]")>;
+def : InstRW<[M1WriteB1], (instrs Bcc)>;
+// NOTE: Conditional branch and link adds a B uop.
+def : InstRW<[M1WriteA1], (instrs BL)>;
+// NOTE: Indirect branch and link with LR adds an ALU uop.
+def : InstRW<[M1WriteA1,
+              M1WriteC1], (instrs BLR)>;
+def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>;
+def : InstRW<[M1WriteC1,
+              M1WriteA2], (instregex "^TBN?Z[WX]")>;
 
 // Arithmetic and logical integer instructions.
-def : InstRW<[M1WriteALU1], (instrs COPY)>;
+def : InstRW<[M1WriteA1], (instrs COPY)>;
 
 // Divide and multiply instructions.
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td b/contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td
index 0aa2462eba83..35a40c314bf4 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td
+++ b/contrib/llvm/lib/Target/AArch64/AArch64SchedVulcan.td
@@ -49,15 +49,12 @@ def VulcanP5 : ProcResource<1>;
 
 let SchedModel = VulcanModel in {
 
-// Define groups for the functional units on each
-// issue port.  Each group created will be used
-// by a WriteRes later on.
+// Define groups for the functional units on each issue port.  Each group
+// created will be used by a WriteRes later on.
 //
-// NOTE: Some groups only contain one member.  This
-// is a way to create names for the various functional
-// units that share a single issue port.  For example,
-// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for
-// FP ops on port 1.
+// NOTE: Some groups only contain one member.  This is a way to create names for
+// the various functional units that share a single issue port.  For example,
+// "VulcanI1" for ALU ops on port 1 and "VulcanF1" for FP ops on port 1.
 
 // Integer divide and multiply micro-ops only on port 1.
 def VulcanI1 : ProcResGroup<[VulcanP1]>;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
index f904b2379416..fe984ccbaf1d 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp
@@ -25,6 +25,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "aarch64-stp-suppress"
 
+#define STPSUPPRESS_PASS_NAME "AArch64 Store Pair Suppression"
+
 namespace {
 class AArch64StorePairSuppress : public MachineFunctionPass {
   const AArch64InstrInfo *TII;
@@ -36,12 +38,12 @@ class AArch64StorePairSuppress : public MachineFunctionPass {
 
 public:
   static char ID;
-  AArch64StorePairSuppress() : MachineFunctionPass(ID) {}
-
-  const char *getPassName() const override {
-    return "AArch64 Store Pair Suppression";
+  AArch64StorePairSuppress() : MachineFunctionPass(ID) {
+    initializeAArch64StorePairSuppressPass(*PassRegistry::getPassRegistry());
   }
 
+  StringRef getPassName() const override { return STPSUPPRESS_PASS_NAME; }
+
   bool runOnMachineFunction(MachineFunction &F) override;
 
 private:
@@ -59,6 +61,9 @@ private:
 char AArch64StorePairSuppress::ID = 0;
 } // anonymous
 
+INITIALIZE_PASS(AArch64StorePairSuppress, "aarch64-stp-suppress",
+                STPSUPPRESS_PASS_NAME, false, false)
+
 FunctionPass *llvm::createAArch64StorePairSuppressPass() {
   return new AArch64StorePairSuppress();
 }
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 7dd8ccbe6c25..f58bbbd26132 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -36,7 +36,8 @@ UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
                          "an address is ignored"), cl::init(false), cl::Hidden);
 
 AArch64Subtarget &
-AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) {
+AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
+                                                  StringRef CPUString) {
   // Determine default and user-specified characteristics
 
   if (CPUString.empty())
@@ -63,9 +64,14 @@ void AArch64Subtarget::initializeProperties() {
     MaxInterleaveFactor = 4;
     break;
   case ExynosM1:
+    MaxInterleaveFactor = 4;
+    MaxJumpTableSize = 8;
     PrefFunctionAlignment = 4;
     PrefLoopAlignment = 3;
     break;
+  case Falkor:
+    MaxInterleaveFactor = 4;
+    break;
   case Kryo:
     MaxInterleaveFactor = 4;
     VectorInsertExtractBaseCost = 2;
@@ -89,8 +95,8 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
                                    const std::string &FS,
                                    const TargetMachine &TM, bool LittleEndian)
     : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()),
-      IsLittle(LittleEndian), CPUString(CPU), TargetTriple(TT), FrameLowering(),
-      InstrInfo(initializeSubtargetDependencies(FS)), TSInfo(),
+      IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(),
+      InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
       TLInfo(TM, *this), GISel() {}
 
 const CallLowering *AArch64Subtarget::getCallLowering() const {
@@ -98,6 +104,16 @@ const CallLowering *AArch64Subtarget::getCallLowering() const {
   return GISel->getCallLowering();
 }
 
+const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getInstructionSelector();
+}
+
+const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getLegalizerInfo();
+}
+
 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
   assert(GISel && "Access to GlobalISel APIs not set");
   return GISel->getRegBankInfo();
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 16a35405c892..73f63b8b9f67 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -32,7 +32,7 @@ class GlobalValue;
 class StringRef;
 class Triple;
 
-class AArch64Subtarget : public AArch64GenSubtargetInfo {
+class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 public:
   enum ARMProcFamilyEnum : uint8_t {
     Others,
@@ -43,6 +43,7 @@ public:
     CortexA73,
     Cyclone,
     ExynosM1,
+    Falkor,
     Kryo,
     Vulcan
   };
@@ -58,6 +59,7 @@ protected:
   bool HasNEON = false;
   bool HasCrypto = false;
   bool HasCRC = false;
+  bool HasLSE = false;
   bool HasRAS = false;
   bool HasPerfMon = false;
   bool HasFullFP16 = false;
@@ -71,7 +73,6 @@ protected:
 
   // StrictAlign - Disallow unaligned memory accesses.
   bool StrictAlign = false;
-  bool MergeNarrowLoads = false;
   bool UseAA = false;
   bool PredictableSelectIsExpensive = false;
   bool BalanceFPOps = false;
@@ -80,7 +81,8 @@ protected:
   bool Misaligned128StoreIsSlow = false;
   bool AvoidQuadLdStPairs = false;
   bool UseAlternateSExtLoadCVTF32Pattern = false;
-  bool HasMacroOpFusion = false;
+  bool HasArithmeticBccFusion = false;
+  bool HasArithmeticCbzFusion = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
   uint8_t MaxInterleaveFactor = 2;
@@ -91,15 +93,13 @@ protected:
   unsigned MaxPrefetchIterationsAhead = UINT_MAX;
   unsigned PrefFunctionAlignment = 0;
   unsigned PrefLoopAlignment = 0;
+  unsigned MaxJumpTableSize = 0;
 
   // ReserveX18 - X18 is not available as a general purpose register.
   bool ReserveX18;
 
   bool IsLittle;
 
-  /// CPUString - String name of used CPU.
-  std::string CPUString;
-
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
@@ -116,7 +116,8 @@ private:
   /// initializeSubtargetDependencies - Initializes using CPUString and the
   /// passed in feature string so that we can use initializer lists for
   /// subtarget initialization.
-  AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
+  AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
+                                                    StringRef CPUString);
 
   /// Initialize properties based on the selected processor family.
   void initializeProperties();
@@ -147,6 +148,8 @@ public:
     return &getInstrInfo()->getRegisterInfo();
   }
   const CallLowering *getCallLowering() const override;
+  const InstructionSelector *getInstructionSelector() const override;
+  const LegalizerInfo *getLegalizerInfo() const override;
   const RegisterBankInfo *getRegBankInfo() const override;
   const Triple &getTargetTriple() const { return TargetTriple; }
   bool enableMachineScheduler() const override { return true; }
@@ -171,13 +174,15 @@ public:
 
   bool requiresStrictAlign() const { return StrictAlign; }
 
+  bool isXRaySupported() const override { return true; }
+
   bool isX18Reserved() const { return ReserveX18; }
   bool hasFPARMv8() const { return HasFPARMv8; }
   bool hasNEON() const { return HasNEON; }
   bool hasCrypto() const { return HasCrypto; }
   bool hasCRC() const { return HasCRC; }
+  bool hasLSE() const { return HasLSE; }
   bool hasRAS() const { return HasRAS; }
-  bool mergeNarrowLoads() const { return MergeNarrowLoads; }
   bool balanceFPOps() const { return BalanceFPOps; }
   bool predictableSelectIsExpensive() const {
     return PredictableSelectIsExpensive;
@@ -188,7 +193,8 @@ public:
   bool useAlternateSExtLoadCVTF32Pattern() const {
     return UseAlternateSExtLoadCVTF32Pattern;
   }
-  bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+  bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
+  bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
   bool useRSqrt() const { return UseRSqrt; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   unsigned getVectorInsertExtractBaseCost() const {
@@ -203,6 +209,8 @@ public:
   unsigned getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
   unsigned getPrefLoopAlignment() const { return PrefLoopAlignment; }
 
+  unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
+
   /// CPU has TBI (top byte of addresses is ignored during HW address
   /// translation) and OS enables it.
   bool supportsAddressTopByteIgnored() const;
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index 0b6345ff8011..e4ef0d4bb8db 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -12,12 +12,17 @@
 
 #include "AArch64.h"
 #include "AArch64CallLowering.h"
+#include "AArch64InstructionSelector.h"
+#include "AArch64LegalizerInfo.h"
 #include "AArch64RegisterBankInfo.h"
 #include "AArch64TargetMachine.h"
 #include "AArch64TargetObjectFile.h"
 #include "AArch64TargetTransformInfo.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/RegAllocRegistry.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -30,53 +35,56 @@
 #include "llvm/Transforms/Scalar.h"
 using namespace llvm;
 
-static cl::opt<bool>
-EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"),
-           cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
+                                cl::desc("Enable the CCMP formation pass"),
+                                cl::init(true), cl::Hidden);
 
-static cl::opt<bool> EnableMCR("aarch64-mcr",
+static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
                                cl::desc("Enable the machine combiner pass"),
                                cl::init(true), cl::Hidden);
 
-static cl::opt<bool>
-EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"),
-                     cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
+                                          cl::desc("Suppress STP for AArch64"),
+                                          cl::init(true), cl::Hidden);
 
-static cl::opt<bool>
-EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar"
-                    " integer instructions"), cl::init(false), cl::Hidden);
-
-static cl::opt<bool>
-EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote "
-                      "constant pass"), cl::init(true), cl::Hidden);
-
-static cl::opt<bool>
-EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the"
-                 " linker optimization hints (LOH)"), cl::init(true),
-                 cl::Hidden);
-
-static cl::opt<bool>
-EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden,
-                              cl::desc("Enable the pass that removes dead"
-                                       " definitons and replaces stores to"
-                                       " them with stores to the zero"
-                                       " register"),
-                              cl::init(true));
+static cl::opt<bool> EnableAdvSIMDScalar(
+    "aarch64-enable-simd-scalar",
+    cl::desc("Enable use of AdvSIMD scalar integer instructions"),
+    cl::init(false), cl::Hidden);
 
 static cl::opt<bool>
-EnableRedundantCopyElimination("aarch64-redundant-copy-elim",
-              cl::desc("Enable the redundant copy elimination pass"),
-              cl::init(true), cl::Hidden);
+    EnablePromoteConstant("aarch64-enable-promote-const",
+                          cl::desc("Enable the promote constant pass"),
+                          cl::init(true), cl::Hidden);
 
-static cl::opt<bool>
-EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair"
-                   " optimization pass"), cl::init(true), cl::Hidden);
+static cl::opt<bool> EnableCollectLOH(
+    "aarch64-enable-collect-loh",
+    cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
+    cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
-EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden,
-                 cl::desc("Run SimplifyCFG after expanding atomic operations"
-                          " to make use of cmpxchg flow-based information"),
-                 cl::init(true));
+    EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
+                                  cl::desc("Enable the pass that removes dead"
+                                           " definitons and replaces stores to"
+                                           " them with stores to the zero"
+                                           " register"),
+                                  cl::init(true));
+
+static cl::opt<bool> EnableRedundantCopyElimination(
+    "aarch64-enable-copyelim",
+    cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
+    cl::Hidden);
+
+static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
+                                        cl::desc("Enable the load/store pair"
+                                                 " optimization pass"),
+                                        cl::init(true), cl::Hidden);
+
+static cl::opt<bool> EnableAtomicTidy(
+    "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
+    cl::desc("Run SimplifyCFG after expanding atomic operations"
+             " to make use of cmpxchg flow-based information"),
+    cl::init(true));
 
 static cl::opt<bool>
 EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
@@ -84,9 +92,9 @@ EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
                         cl::init(true));
 
 static cl::opt<bool>
-EnableCondOpt("aarch64-condopt",
-              cl::desc("Enable the condition optimizer pass"),
-              cl::init(true), cl::Hidden);
+    EnableCondOpt("aarch64-enable-condopt",
+                  cl::desc("Enable the condition optimizer pass"),
+                  cl::init(true), cl::Hidden);
 
 static cl::opt<bool>
 EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
@@ -94,28 +102,51 @@ EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
                 cl::init(false));
 
 static cl::opt<bool>
-EnableGEPOpt("aarch64-gep-opt", cl::Hidden,
-             cl::desc("Enable optimizations on complex GEPs"),
-             cl::init(false));
+    EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
+                               cl::desc("Enable the type promotion pass"),
+                               cl::init(true));
+
+static cl::opt<bool>
+    EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
+                 cl::desc("Enable optimizations on complex GEPs"),
+                 cl::init(false));
+
+static cl::opt<bool>
+    BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
+                     cl::desc("Relax out of range conditional branches"));
 
 // FIXME: Unify control over GlobalMerge.
 static cl::opt<cl::boolOrDefault>
-EnableGlobalMerge("aarch64-global-merge", cl::Hidden,
-                  cl::desc("Enable the global merge pass"));
+    EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
+                      cl::desc("Enable the global merge pass"));
 
 static cl::opt<bool>
-    EnableLoopDataPrefetch("aarch64-loop-data-prefetch", cl::Hidden,
+    EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
                            cl::desc("Enable the loop data prefetch pass"),
                            cl::init(true));
 
 extern "C" void LLVMInitializeAArch64Target() {
   // Register the target.
-  RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget);
-  RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget);
-  RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64Target);
+  RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
+  RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
+  RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
   auto PR = PassRegistry::getPassRegistry();
   initializeGlobalISel(*PR);
+  initializeAArch64A53Fix835769Pass(*PR);
+  initializeAArch64A57FPLoadBalancingPass(*PR);
+  initializeAArch64AddressTypePromotionPass(*PR);
+  initializeAArch64AdvSIMDScalarPass(*PR);
+  initializeAArch64CollectLOHPass(*PR);
+  initializeAArch64ConditionalComparesPass(*PR);
+  initializeAArch64ConditionOptimizerPass(*PR);
+  initializeAArch64DeadRegisterDefinitionsPass(*PR);
   initializeAArch64ExpandPseudoPass(*PR);
+  initializeAArch64LoadStoreOptPass(*PR);
+  initializeAArch64VectorByElementOptPass(*PR);
+  initializeAArch64PromoteConstantPass(*PR);
+  initializeAArch64RedundantCopyEliminationPass(*PR);
+  initializeAArch64StorePairSuppressPass(*PR);
+  initializeLDTLSCleanupPass(*PR);
 }
 
 //===----------------------------------------------------------------------===//
@@ -129,7 +160,11 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
 }
 
 // Helper function to build a DataLayout string
-static std::string computeDataLayout(const Triple &TT, bool LittleEndian) {
+static std::string computeDataLayout(const Triple &TT,
+                                     const MCTargetOptions &Options,
+                                     bool LittleEndian) {
+  if (Options.getABIName() == "ilp32")
+    return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
   if (TT.isOSBinFormatMachO())
     return "e-m:o-i64:64-i128:128-n32:64-S128";
   if (LittleEndian)
@@ -137,29 +172,6 @@ static std::string computeDataLayout(const Triple &TT, bool LittleEndian) {
   return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
 }
 
-// Helper function to set up the defaults for reciprocals.
-static void initReciprocals(AArch64TargetMachine& TM, AArch64Subtarget& ST)
-{
-  // For the estimates, convergence is quadratic, so essentially the number of
-  // digits is doubled after each iteration. ARMv8, the minimum architected
-  // accuracy of the initial estimate is 2^-8.  Therefore, the number of extra
-  // steps to refine the result for float (23 mantissa bits) and for double
-  // (52 mantissa bits) are 2 and 3, respectively.
-  unsigned ExtraStepsF = 2,
-           ExtraStepsD = ExtraStepsF + 1;
-  bool UseRsqrt = ST.useRSqrt();
-
-  TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
-  TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
-  TM.Options.Reciprocals.setDefaults("vec-sqrtf", UseRsqrt, ExtraStepsF);
-  TM.Options.Reciprocals.setDefaults("vec-sqrtd", UseRsqrt, ExtraStepsD);
-
-  TM.Options.Reciprocals.setDefaults("divf", false, ExtraStepsF);
-  TM.Options.Reciprocals.setDefaults("divd", false, ExtraStepsD);
-  TM.Options.Reciprocals.setDefaults("vec-divf", false, ExtraStepsF);
-  TM.Options.Reciprocals.setDefaults("vec-divd", false, ExtraStepsD);
-}
-
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
                                            Optional<Reloc::Model> RM) {
   // AArch64 Darwin is always PIC.
@@ -181,11 +193,12 @@ AArch64TargetMachine::AArch64TargetMachine(
     CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian)
     // This nested ternary is horrible, but DL needs to be properly
     // initialized before TLInfo is constructed.
-    : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS,
-                        Options, getEffectiveRelocModel(TT, RM), CM, OL),
+    : LLVMTargetMachine(T, computeDataLayout(TT, Options.MCOptions,
+                                             LittleEndian),
+                        TT, CPU, FS, Options,
+			getEffectiveRelocModel(TT, RM), CM, OL),
       TLOF(createTLOF(getTargetTriple())),
-      Subtarget(TT, CPU, FS, *this, LittleEndian) {
-  initReciprocals(*this, Subtarget);
+      isLittle(LittleEndian) {
   initAsmInfo();
 }
 
@@ -195,10 +208,18 @@ AArch64TargetMachine::~AArch64TargetMachine() {}
 namespace {
 struct AArch64GISelActualAccessor : public GISelAccessor {
   std::unique_ptr<CallLowering> CallLoweringInfo;
+  std::unique_ptr<InstructionSelector> InstSelector;
+  std::unique_ptr<LegalizerInfo> Legalizer;
   std::unique_ptr<RegisterBankInfo> RegBankInfo;
   const CallLowering *getCallLowering() const override {
     return CallLoweringInfo.get();
   }
+  const InstructionSelector *getInstructionSelector() const override {
+    return InstSelector.get();
+  }
+  const LegalizerInfo *getLegalizerInfo() const override {
+    return Legalizer.get();
+  }
   const RegisterBankInfo *getRegBankInfo() const override {
     return RegBankInfo.get();
   }
@@ -225,16 +246,24 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
-                                            Subtarget.isLittleEndian());
+                                            isLittle);
 #ifndef LLVM_BUILD_GLOBAL_ISEL
-   GISelAccessor *GISel = new GISelAccessor();
+    GISelAccessor *GISel = new GISelAccessor();
 #else
     AArch64GISelActualAccessor *GISel =
         new AArch64GISelActualAccessor();
     GISel->CallLoweringInfo.reset(
         new AArch64CallLowering(*I->getTargetLowering()));
-    GISel->RegBankInfo.reset(
-        new AArch64RegisterBankInfo(*I->getRegisterInfo()));
+    GISel->Legalizer.reset(new AArch64LegalizerInfo());
+
+    auto *RBI = new AArch64RegisterBankInfo(*I->getRegisterInfo());
+
+    // FIXME: At this point, we can't rely on Subtarget having RBI.
+    // It's awkward to mix passing RBI and the Subtarget; should we pass
+    // TII/TRI as well?
+    GISel->InstSelector.reset(new AArch64InstructionSelector(*this, *I, *RBI));
+
+    GISel->RegBankInfo.reset(RBI);
 #endif
     I->setGISelAccessor(*GISel);
   }
@@ -271,12 +300,23 @@ public:
     return getTM<AArch64TargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+    DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+    return DAG;
+  }
+
   void addIRPasses()  override;
   bool addPreISel() override;
   bool addInstSelector() override;
 #ifdef LLVM_BUILD_GLOBAL_ISEL
   bool addIRTranslator() override;
+  bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
+  bool addGlobalInstructionSelect() override;
 #endif
   bool addILPOpts() override;
   void addPreRegAlloc() override;
@@ -351,7 +391,7 @@ bool AArch64PassConfig::addPreISel() {
     addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
   }
 
-  if (TM->getOptLevel() != CodeGenOpt::None)
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableAddressTypePromotion)
     addPass(createAArch64AddressTypePromotionPass());
 
   return false;
@@ -374,10 +414,18 @@ bool AArch64PassConfig::addIRTranslator() {
   addPass(new IRTranslator());
   return false;
 }
+bool AArch64PassConfig::addLegalizeMachineIR() {
+  addPass(new Legalizer());
+  return false;
+}
 bool AArch64PassConfig::addRegBankSelect() {
   addPass(new RegBankSelect());
   return false;
 }
+bool AArch64PassConfig::addGlobalInstructionSelect() {
+  addPass(new InstructionSelect());
+  return false;
+}
 #endif
 
 bool AArch64PassConfig::addILPOpts() {
@@ -391,10 +439,15 @@ bool AArch64PassConfig::addILPOpts() {
     addPass(&EarlyIfConverterID);
   if (EnableStPairSuppress)
     addPass(createAArch64StorePairSuppressPass());
+  addPass(createAArch64VectorByElementOptPass());
   return true;
 }
 
 void AArch64PassConfig::addPreRegAlloc() {
+  // Change dead register definitions to refer to the zero register.
+  if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
+    addPass(createAArch64DeadRegisterDefinitions());
+
   // Use AdvSIMD scalar instructions whenever profitable.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
     addPass(createAArch64AdvSIMDScalar());
@@ -409,9 +462,6 @@ void AArch64PassConfig::addPostRegAlloc() {
   if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
     addPass(createAArch64RedundantCopyEliminationPass());
 
-  // Change dead register definitions to refer to the zero register.
-  if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
-    addPass(createAArch64DeadRegisterDefinitions());
   if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
     // Improve performance for some FP/SIMD code for A57.
     addPass(createAArch64A57FPLoadBalancing());
@@ -430,7 +480,9 @@ void AArch64PassConfig::addPreEmitPass() {
     addPass(createAArch64A53Fix835769());
   // Relax conditional branch instructions if they're otherwise out of
   // range of their destination.
-  addPass(createAArch64BranchRelaxation());
+  if (BranchRelaxation)
+    addPass(&BranchRelaxationPassID);
+
   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
       TM->getTargetTriple().isOSBinFormatMachO())
     addPass(createAArch64CollectLOHPass());
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
index b44107b065bd..6fa5e83957e1 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.h
@@ -46,7 +46,7 @@ public:
   }
 
 private:
-  AArch64Subtarget Subtarget;
+  bool isLittle;
 };
 
 // AArch64 little endian target machine.
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
index 18ee4a9c72b5..8875f9b72647 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.cpp
@@ -30,15 +30,14 @@ AArch64_MachoTargetObjectFile::AArch64_MachoTargetObjectFile()
 }
 
 const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
   // On Darwin, we can reference dwarf symbols with foo@GOT-., which
   // is an indirect pc-relative reference. The default implementation
   // won't reference using the GOT, so we need this target-specific
   // version.
   if (Encoding & (DW_EH_PE_indirect | DW_EH_PE_pcrel)) {
-    const MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    const MCSymbol *Sym = TM.getSymbol(GV);
     const MCExpr *Res =
         MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOT, getContext());
     MCSymbol *PCSym = getContext().createTempSymbol();
@@ -48,13 +47,13 @@ const MCExpr *AArch64_MachoTargetObjectFile::getTTypeGlobalReference(
   }
 
   return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
-      GV, Encoding, Mang, TM, MMI, Streamer);
+      GV, Encoding, TM, MMI, Streamer);
 }
 
 MCSymbol *AArch64_MachoTargetObjectFile::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
-  return TM.getSymbol(GV, Mang);
+  return TM.getSymbol(GV);
 }
 
 const MCExpr *AArch64_MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
index d41f445292cf..05e1dfa9e6c9 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetObjectFile.h
@@ -27,12 +27,12 @@ public:
   AArch64_MachoTargetObjectFile();
 
   const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
-                                        unsigned Encoding, Mangler &Mang,
+                                        unsigned Encoding,
                                         const TargetMachine &TM,
                                         MachineModuleInfo *MMI,
                                         MCStreamer &Streamer) const override;
 
-  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
+  MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
                                     const TargetMachine &TM,
                                     MachineModuleInfo *MMI) const override;
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index ecf4d93068a4..88c98865bbc6 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -467,7 +467,8 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                     unsigned Alignment, unsigned AddressSpace) {
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
 
-  if (Opcode == Instruction::Store && Src->isVectorTy() && Alignment != 16 &&
+  if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
+      Src->isVectorTy() && Alignment != 16 &&
       Src->getVectorElementType()->isIntegerTy(64)) {
     // Unaligned stores are extremely inefficient. We don't split
     // unaligned v2i64 stores because the negative impact that has shown in
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 4f2e8310d769..24642cb1698e 100644
--- a/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -52,13 +52,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  AArch64TTIImpl(const AArch64TTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  AArch64TTIImpl(AArch64TTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
 
diff --git a/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp b/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
new file mode 100644
index 000000000000..e3b1d7cea48d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AArch64/AArch64VectorByElementOpt.cpp
@@ -0,0 +1,371 @@
+//=- AArch64VectorByElementOpt.cpp - AArch64 vector by element inst opt pass =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that performs optimization for vector by element
+// SIMD instructions.
+//
+// Certain SIMD instructions with vector element operand are not efficient.
+// Rewrite them into SIMD instructions with vector operands. This rewrite
+// is driven by the latency of the instructions.
+//
+// Example:
+//    fmla v0.4s, v1.4s, v2.s[1]
+//    is rewritten into
+//    dup v3.4s, v2.s[1]
+//    fmla v0.4s, v1.4s, v3.4s
+//===----------------------------------------------------------------------===//
+
+#include "AArch64InstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetSchedule.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "aarch64-vectorbyelement-opt"
+
+STATISTIC(NumModifiedInstr,
+          "Number of vector by element instructions modified");
+
+#define AARCH64_VECTOR_BY_ELEMENT_OPT_NAME                                     \
+  "AArch64 vector by element instruction optimization pass"
+
+namespace {
+
+struct AArch64VectorByElementOpt : public MachineFunctionPass {
+  static char ID;
+  AArch64VectorByElementOpt() : MachineFunctionPass(ID) {
+    initializeAArch64VectorByElementOptPass(*PassRegistry::getPassRegistry());
+  }
+
+  const TargetInstrInfo *TII;
+  MachineRegisterInfo *MRI;
+  TargetSchedModel SchedModel;
+
+  /// Based only on latency of instructions, determine if it is cost efficient
+  /// to replace the instruction InstDesc by the two instructions InstDescRep1
+  /// and InstDescRep2.
+  /// Return true if replacement is recommended.
+  bool
+  shouldReplaceInstruction(MachineFunction *MF, const MCInstrDesc *InstDesc,
+                           const MCInstrDesc *InstDescRep1,
+                           const MCInstrDesc *InstDescRep2,
+                           std::map<unsigned, bool> &VecInstElemTable) const;
+
+  /// Determine if we need to exit the vector by element instruction
+  /// optimization pass early. This makes sure that Targets with no need
+  /// for this optimization do not spent any compile time on this pass.
+  /// This check is done by comparing the latency of an indexed FMLA
+  /// instruction to the latency of the DUP + the latency of a vector
+  /// FMLA instruction. We do not check on other related instructions such
+  /// as FMLS as we assume that if the situation shows up for one
+  /// instruction, then it is likely to show up for the related ones.
+  /// Return true if early exit of the pass is recommended.
+  bool earlyExitVectElement(MachineFunction *MF);
+
+  /// Check whether an equivalent DUP instruction has already been
+  /// created or not.
+  /// Return true when the dup instruction already exists. In this case,
+  /// DestReg will point to the destination of the already created DUP.
+  bool reuseDUP(MachineInstr &MI, unsigned DupOpcode, unsigned SrcReg,
+                unsigned LaneNumber, unsigned *DestReg) const;
+
+  /// Certain SIMD instructions with vector element operand are not efficient.
+  /// Rewrite them into SIMD instructions with vector operands. This rewrite
+  /// is driven by the latency of the instructions.
+  /// Return true if the SIMD instruction is modified.
+  bool optimizeVectElement(MachineInstr &MI,
+                           std::map<unsigned, bool> *VecInstElemTable) const;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  StringRef getPassName() const override {
+    return AARCH64_VECTOR_BY_ELEMENT_OPT_NAME;
+  }
+};
+char AArch64VectorByElementOpt::ID = 0;
+} // namespace
+
+INITIALIZE_PASS(AArch64VectorByElementOpt, "aarch64-vectorbyelement-opt",
+                AARCH64_VECTOR_BY_ELEMENT_OPT_NAME, false, false)
+
+/// Based only on latency of instructions, determine if it is cost efficient
+/// to replace the instruction InstDesc by the two instructions InstDescRep1
+/// and InstDescRep2. Note that it is assumed in this fuction that an
+/// instruction of type InstDesc is always replaced by the same two
+/// instructions as results are cached here.
+/// Return true if replacement is recommended.
+bool AArch64VectorByElementOpt::shouldReplaceInstruction(
+    MachineFunction *MF, const MCInstrDesc *InstDesc,
+    const MCInstrDesc *InstDescRep1, const MCInstrDesc *InstDescRep2,
+    std::map<unsigned, bool> &VecInstElemTable) const {
+  // Check if replacment decision is alredy available in the cached table.
+  // if so, return it.
+  if (!VecInstElemTable.empty() &&
+      VecInstElemTable.find(InstDesc->getOpcode()) != VecInstElemTable.end())
+    return VecInstElemTable[InstDesc->getOpcode()];
+
+  unsigned SCIdx = InstDesc->getSchedClass();
+  unsigned SCIdxRep1 = InstDescRep1->getSchedClass();
+  unsigned SCIdxRep2 = InstDescRep2->getSchedClass();
+  const MCSchedClassDesc *SCDesc =
+      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdx);
+  const MCSchedClassDesc *SCDescRep1 =
+      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdxRep1);
+  const MCSchedClassDesc *SCDescRep2 =
+      SchedModel.getMCSchedModel()->getSchedClassDesc(SCIdxRep2);
+
+  // If a subtarget does not define resources for any of the instructions
+  // of interest, then return false for no replacement.
+  if (!SCDesc->isValid() || SCDesc->isVariant() || !SCDescRep1->isValid() ||
+      SCDescRep1->isVariant() || !SCDescRep2->isValid() ||
+      SCDescRep2->isVariant()) {
+    VecInstElemTable[InstDesc->getOpcode()] = false;
+    return false;
+  }
+
+  if (SchedModel.computeInstrLatency(InstDesc->getOpcode()) >
+      SchedModel.computeInstrLatency(InstDescRep1->getOpcode()) +
+          SchedModel.computeInstrLatency(InstDescRep2->getOpcode())) {
+    VecInstElemTable[InstDesc->getOpcode()] = true;
+    return true;
+  }
+  VecInstElemTable[InstDesc->getOpcode()] = false;
+  return false;
+}
+
+/// Determine if we need to exit the vector by element instruction
+/// optimization pass early. This makes sure that Targets with no need
+/// for this optimization do not spent any compile time on this pass.
+/// This check is done by comparing the latency of an indexed FMLA
+/// instruction to the latency of the DUP + the latency of a vector
+/// FMLA instruction. We do not check on other related instructions such
+/// as FMLS as we assume that if the situation shows up for one
+/// instruction, then it is likely to show up for the related ones.
+/// Return true if early exit of the pass is recommended.
+bool AArch64VectorByElementOpt::earlyExitVectElement(MachineFunction *MF) {
+  std::map<unsigned, bool> VecInstElemTable;
+  const MCInstrDesc *IndexMulMCID = &TII->get(AArch64::FMLAv4i32_indexed);
+  const MCInstrDesc *DupMCID = &TII->get(AArch64::DUPv4i32lane);
+  const MCInstrDesc *MulMCID = &TII->get(AArch64::FMULv4f32);
+
+  if (!shouldReplaceInstruction(MF, IndexMulMCID, DupMCID, MulMCID,
+                                VecInstElemTable))
+    return true;
+  return false;
+}
+
+/// Check whether an equivalent DUP instruction has already been
+/// created or not.
+/// Return true when the dup instruction already exists. In this case,
+/// DestReg will point to the destination of the already created DUP.
+bool AArch64VectorByElementOpt::reuseDUP(MachineInstr &MI, unsigned DupOpcode,
+                                         unsigned SrcReg, unsigned LaneNumber,
+                                         unsigned *DestReg) const {
+  for (MachineBasicBlock::iterator MII = MI, MIE = MI.getParent()->begin();
+       MII != MIE;) {
+    MII--;
+    MachineInstr *CurrentMI = &*MII;
+
+    if (CurrentMI->getOpcode() == DupOpcode &&
+        CurrentMI->getNumOperands() == 3 &&
+        CurrentMI->getOperand(1).getReg() == SrcReg &&
+        CurrentMI->getOperand(2).getImm() == LaneNumber) {
+      *DestReg = CurrentMI->getOperand(0).getReg();
+      return true;
+    }
+  }
+
+  return false;
+}
+
+/// Certain SIMD instructions with vector element operand are not efficient.
+/// Rewrite them into SIMD instructions with vector operands. This rewrite
+/// is driven by the latency of the instructions.
+/// The instruction of concerns are for the time being fmla, fmls, fmul,
+/// and fmulx and hence they are hardcoded.
+///
+/// Example:
+///    fmla v0.4s, v1.4s, v2.s[1]
+///    is rewritten into
+///    dup v3.4s, v2.s[1]           // dup not necessary if redundant
+///    fmla v0.4s, v1.4s, v3.4s
+/// Return true if the SIMD instruction is modified.
+bool AArch64VectorByElementOpt::optimizeVectElement(
+    MachineInstr &MI, std::map<unsigned, bool> *VecInstElemTable) const {
+  const MCInstrDesc *MulMCID, *DupMCID;
+  const TargetRegisterClass *RC = &AArch64::FPR128RegClass;
+
+  switch (MI.getOpcode()) {
+  default:
+    return false;
+
+  // 4X32 instructions
+  case AArch64::FMLAv4i32_indexed:
+    DupMCID = &TII->get(AArch64::DUPv4i32lane);
+    MulMCID = &TII->get(AArch64::FMLAv4f32);
+    break;
+  case AArch64::FMLSv4i32_indexed:
+    DupMCID = &TII->get(AArch64::DUPv4i32lane);
+    MulMCID = &TII->get(AArch64::FMLSv4f32);
+    break;
+  case AArch64::FMULXv4i32_indexed:
+    DupMCID = &TII->get(AArch64::DUPv4i32lane);
+    MulMCID = &TII->get(AArch64::FMULXv4f32);
+    break;
+  case AArch64::FMULv4i32_indexed:
+    DupMCID = &TII->get(AArch64::DUPv4i32lane);
+    MulMCID = &TII->get(AArch64::FMULv4f32);
+    break;
+
+  // 2X64 instructions
+  case AArch64::FMLAv2i64_indexed:
+    DupMCID = &TII->get(AArch64::DUPv2i64lane);
+    MulMCID = &TII->get(AArch64::FMLAv2f64);
+    break;
+  case AArch64::FMLSv2i64_indexed:
+    DupMCID = &TII->get(AArch64::DUPv2i64lane);
+    MulMCID = &TII->get(AArch64::FMLSv2f64);
+    break;
+  case AArch64::FMULXv2i64_indexed:
+    DupMCID = &TII->get(AArch64::DUPv2i64lane);
+    MulMCID = &TII->get(AArch64::FMULXv2f64);
+    break;
+  case AArch64::FMULv2i64_indexed:
+    DupMCID = &TII->get(AArch64::DUPv2i64lane);
+    MulMCID = &TII->get(AArch64::FMULv2f64);
+    break;
+
+  // 2X32 instructions
+  case AArch64::FMLAv2i32_indexed:
+    RC = &AArch64::FPR64RegClass;
+    DupMCID = &TII->get(AArch64::DUPv2i32lane);
+    MulMCID = &TII->get(AArch64::FMLAv2f32);
+    break;
+  case AArch64::FMLSv2i32_indexed:
+    RC = &AArch64::FPR64RegClass;
+    DupMCID = &TII->get(AArch64::DUPv2i32lane);
+    MulMCID = &TII->get(AArch64::FMLSv2f32);
+    break;
+  case AArch64::FMULXv2i32_indexed:
+    RC = &AArch64::FPR64RegClass;
+    DupMCID = &TII->get(AArch64::DUPv2i32lane);
+    MulMCID = &TII->get(AArch64::FMULXv2f32);
+    break;
+  case AArch64::FMULv2i32_indexed:
+    RC = &AArch64::FPR64RegClass;
+    DupMCID = &TII->get(AArch64::DUPv2i32lane);
+    MulMCID = &TII->get(AArch64::FMULv2f32);
+    break;
+  }
+
+  if (!shouldReplaceInstruction(MI.getParent()->getParent(),
+                                &TII->get(MI.getOpcode()), DupMCID, MulMCID,
+                                *VecInstElemTable))
+    return false;
+
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+
+  // get the operands of the current SIMD arithmetic instruction.
+  unsigned MulDest = MI.getOperand(0).getReg();
+  unsigned SrcReg0 = MI.getOperand(1).getReg();
+  unsigned Src0IsKill = getKillRegState(MI.getOperand(1).isKill());
+  unsigned SrcReg1 = MI.getOperand(2).getReg();
+  unsigned Src1IsKill = getKillRegState(MI.getOperand(2).isKill());
+  unsigned DupDest;
+
+  // Instructions of interest have either 4 or 5 operands.
+  if (MI.getNumOperands() == 5) {
+    unsigned SrcReg2 = MI.getOperand(3).getReg();
+    unsigned Src2IsKill = getKillRegState(MI.getOperand(3).isKill());
+    unsigned LaneNumber = MI.getOperand(4).getImm();
+
+    // Create a new DUP instruction. Note that if an equivalent DUP instruction
+    // has already been created before, then use that one instread of creating
+    // a new one.
+    if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg2, LaneNumber, &DupDest)) {
+      DupDest = MRI.createVirtualRegister(RC);
+      BuildMI(MBB, MI, DL, *DupMCID, DupDest)
+          .addReg(SrcReg2, Src2IsKill)
+          .addImm(LaneNumber);
+    }
+    BuildMI(MBB, MI, DL, *MulMCID, MulDest)
+        .addReg(SrcReg0, Src0IsKill)
+        .addReg(SrcReg1, Src1IsKill)
+        .addReg(DupDest, Src2IsKill);
+  } else if (MI.getNumOperands() == 4) {
+    unsigned LaneNumber = MI.getOperand(3).getImm();
+    if (!reuseDUP(MI, DupMCID->getOpcode(), SrcReg1, LaneNumber, &DupDest)) {
+      DupDest = MRI.createVirtualRegister(RC);
+      BuildMI(MBB, MI, DL, *DupMCID, DupDest)
+          .addReg(SrcReg1, Src1IsKill)
+          .addImm(LaneNumber);
+    }
+    BuildMI(MBB, MI, DL, *MulMCID, MulDest)
+        .addReg(SrcReg0, Src0IsKill)
+        .addReg(DupDest, Src1IsKill);
+  } else {
+    return false;
+  }
+
+  ++NumModifiedInstr;
+  return true;
+}
+
+bool AArch64VectorByElementOpt::runOnMachineFunction(MachineFunction &MF) {
+  if (skipFunction(*MF.getFunction()))
+    return false;
+
+  TII = MF.getSubtarget().getInstrInfo();
+  MRI = &MF.getRegInfo();
+  const TargetSubtargetInfo &ST = MF.getSubtarget();
+  const AArch64InstrInfo *AAII =
+      static_cast<const AArch64InstrInfo *>(ST.getInstrInfo());
+  if (!AAII)
+    return false;
+  SchedModel.init(ST.getSchedModel(), &ST, AAII);
+  if (!SchedModel.hasInstrSchedModel())
+    return false;
+
+  // A simple check to exit this pass early for targets that do not need it.
+  if (earlyExitVectElement(&MF))
+    return false;
+
+  bool Changed = false;
+  std::map<unsigned, bool> VecInstElemTable;
+  SmallVector<MachineInstr *, 8> RemoveMIs;
+
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
+         MII != MIE;) {
+      MachineInstr &MI = *MII;
+      if (optimizeVectElement(MI, &VecInstElemTable)) {
+        // Add MI to the list of instructions to be removed given that it has
+        // been replaced.
+        RemoveMIs.push_back(&MI);
+        Changed = true;
+      }
+      ++MII;
+    }
+  }
+
+  for (MachineInstr *MI : RemoveMIs)
+    MI->eraseFromParent();
+
+  return Changed;
+}
+
+/// createAArch64VectorByElementOptPass - returns an instance of the
+/// vector by element optimization pass.
+FunctionPass *llvm::createAArch64VectorByElementOptPass() {
+  return new AArch64VectorByElementOpt();
+}
diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index aebc370333e3..db84afacf30e 100644
--- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/APInt.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCContext.h"
@@ -28,6 +29,7 @@
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/TargetParser.h"
@@ -66,8 +68,6 @@ private:
   bool parseOperand(OperandVector &Operands, bool isCondCode,
                     bool invertCondCode);
 
-  void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
-  bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
   bool showMatchError(SMLoc Loc, unsigned ErrCode);
 
   bool parseDirectiveArch(SMLoc L);
@@ -117,9 +117,11 @@ public:
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "AArch64GenAsmMatcher.inc"
   };
+  bool IsILP32;
   AArch64AsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
                    const MCInstrInfo &MII, const MCTargetOptions &Options)
     : MCTargetAsmParser(Options, STI) {
+    IsILP32 = Options.getABIName() == "ilp32";
     MCAsmParserExtension::Initialize(Parser);
     MCStreamer &S = getParser().getStreamer();
     if (S.getTargetStreamer() == nullptr)
@@ -208,9 +210,9 @@ private:
   };
 
   struct BarrierOp {
-    unsigned Val; // Not the enum since not all values have names.
     const char *Data;
     unsigned Length;
+    unsigned Val; // Not the enum since not all values have names.
   };
 
   struct SysRegOp {
@@ -226,15 +228,15 @@ private:
   };
 
   struct PrefetchOp {
-    unsigned Val;
     const char *Data;
     unsigned Length;
+    unsigned Val;
   };
 
   struct PSBHintOp {
-    unsigned Val;
     const char *Data;
     unsigned Length;
+    unsigned Val;
   };
 
   struct ShiftExtendOp {
@@ -727,9 +729,13 @@ public:
           || ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12;
     }
 
-    // Otherwise it should be a real immediate in range:
-    const MCConstantExpr *CE = cast<MCConstantExpr>(Expr);
-    return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+    // If it's a constant, it should be a real immediate in range:
+    if (auto *CE = dyn_cast<MCConstantExpr>(Expr))
+      return CE->getValue() >= 0 && CE->getValue() <= 0xfff;
+
+    // If it's an expression, we hope for the best and let the fixup/relocation
+    // code deal with it.
+    return true;
   }
   bool isAddSubImmNeg() const {
     if (!isShiftedImm() && !isImm())
@@ -1964,7 +1970,8 @@ unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name,
 int AArch64AsmParser::tryParseRegister() {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
-  assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier");
+  if (Tok.isNot(AsmToken::Identifier))
+    return -1;
 
   std::string lowerCase = Tok.getString().lower();
   unsigned RegNum = matchRegisterNameAlias(lowerCase, false);
@@ -2018,7 +2025,7 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) {
 }
 
 /// tryParseSysCROperand - Try to parse a system instruction CR operand name.
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
@@ -2048,16 +2055,15 @@ AArch64AsmParser::tryParseSysCROperand(OperandVector &Operands) {
 }
 
 /// tryParsePrefetch - Try to parse a prefetch operand.
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
   const AsmToken &Tok = Parser.getTok();
   // Either an identifier for named values or a 5-bit immediate.
-  bool Hash = Tok.is(AsmToken::Hash);
-  if (Hash || Tok.is(AsmToken::Integer)) {
-    if (Hash)
-      Parser.Lex(); // Eat hash token.
+  // Eat optional hash.
+  if (parseOptionalToken(AsmToken::Hash) ||
+      Tok.is(AsmToken::Integer)) {
     const MCExpr *ImmVal;
     if (getParser().parseExpression(ImmVal))
       return MatchOperand_ParseFail;
@@ -2097,7 +2103,7 @@ AArch64AsmParser::tryParsePrefetch(OperandVector &Operands) {
 }
 
 /// tryParsePSBHint - Try to parse a PSB operand, mapped to Hint command
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
@@ -2121,7 +2127,7 @@ AArch64AsmParser::tryParsePSBHint(OperandVector &Operands) {
 
 /// tryParseAdrpLabel - Parse and validate a source label for the ADRP
 /// instruction.
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
@@ -2172,16 +2178,12 @@ AArch64AsmParser::tryParseAdrpLabel(OperandVector &Operands) {
 
 /// tryParseAdrLabel - Parse and validate a source label for the ADR
 /// instruction.
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
-  MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
   const MCExpr *Expr;
 
-  if (Parser.getTok().is(AsmToken::Hash)) {
-    Parser.Lex(); // Eat hash token.
-  }
-
+  parseOptionalToken(AsmToken::Hash);
   if (getParser().parseExpression(Expr))
     return MatchOperand_ParseFail;
 
@@ -2192,26 +2194,19 @@ AArch64AsmParser::tryParseAdrLabel(OperandVector &Operands) {
 }
 
 /// tryParseFPImm - A floating point immediate expression operand.
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
 
-  bool Hash = false;
-  if (Parser.getTok().is(AsmToken::Hash)) {
-    Parser.Lex(); // Eat '#'
-    Hash = true;
-  }
+  bool Hash = parseOptionalToken(AsmToken::Hash);
 
   // Handle negation, as that still comes through as a separate token.
-  bool isNegative = false;
-  if (Parser.getTok().is(AsmToken::Minus)) {
-    isNegative = true;
-    Parser.Lex();
-  }
+  bool isNegative = parseOptionalToken(AsmToken::Minus);
+
   const AsmToken &Tok = Parser.getTok();
   if (Tok.is(AsmToken::Real)) {
-    APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+    APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
     if (isNegative)
       RealVal.changeSign();
 
@@ -2237,7 +2232,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
         return MatchOperand_ParseFail;
       }
     } else {
-      APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+      APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
       uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
       // If we had a '-' in front, toggle the sign bit.
       IntVal ^= (uint64_t)isNegative << 63;
@@ -2256,7 +2251,7 @@ AArch64AsmParser::tryParseFPImm(OperandVector &Operands) {
 }
 
 /// tryParseAddSubImm - Parse ADD/SUB shifted immediate operand
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = getLoc();
@@ -2299,9 +2294,7 @@ AArch64AsmParser::tryParseAddSubImm(OperandVector &Operands) {
   // Eat 'lsl'
   Parser.Lex();
 
-  if (Parser.getTok().is(AsmToken::Hash)) {
-    Parser.Lex();
-  }
+  parseOptionalToken(AsmToken::Hash);
 
   if (Parser.getTok().isNot(AsmToken::Integer)) {
     Error(Parser.getTok().getLoc(), "only 'lsl #+N' valid after immediate");
@@ -2374,7 +2367,7 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands,
 
 /// tryParseOptionalShift - Some operands take an optional shift argument. Parse
 /// them if present.
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -2402,7 +2395,8 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
   SMLoc S = Tok.getLoc();
   Parser.Lex();
 
-  bool Hash = getLexer().is(AsmToken::Hash);
+  bool Hash = parseOptionalToken(AsmToken::Hash);
+
   if (!Hash && getLexer().isNot(AsmToken::Integer)) {
     if (ShOp == AArch64_AM::LSL || ShOp == AArch64_AM::LSR ||
         ShOp == AArch64_AM::ASR || ShOp == AArch64_AM::ROR ||
@@ -2412,20 +2406,19 @@ AArch64AsmParser::tryParseOptionalShiftExtend(OperandVector &Operands) {
       return MatchOperand_ParseFail;
     }
 
-    // "extend" type operatoins don't need an immediate, #0 is implicit.
+    // "extend" type operations don't need an immediate, #0 is implicit.
     SMLoc E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
     Operands.push_back(
         AArch64Operand::CreateShiftExtend(ShOp, 0, false, S, E, getContext()));
     return MatchOperand_Success;
   }
 
-  if (Hash)
-    Parser.Lex(); // Eat the '#'.
-
-  // Make sure we do actually have a number or a parenthesized expression.
+  // Make sure we do actually have a number, identifier or a parenthesized
+  // expression.
   SMLoc E = Parser.getTok().getLoc();
   if (!Parser.getTok().is(AsmToken::Integer) &&
-      !Parser.getTok().is(AsmToken::LParen)) {
+      !Parser.getTok().is(AsmToken::LParen) &&
+      !Parser.getTok().is(AsmToken::Identifier)) {
     Error(E, "expected integer shift amount");
     return MatchOperand_ParseFail;
   }
@@ -2690,20 +2683,12 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
   bool HasRegister = false;
 
   // Check for the optional register operand.
-  if (getLexer().is(AsmToken::Comma)) {
-    Parser.Lex(); // Eat comma.
-
+  if (parseOptionalToken(AsmToken::Comma)) {
     if (Tok.isNot(AsmToken::Identifier) || parseRegister(Operands))
       return TokError("expected register operand");
-
     HasRegister = true;
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Parser.eatToEndOfStatement();
-    return TokError("unexpected token in argument list");
-  }
-
   if (ExpectRegister && !HasRegister) {
     return TokError("specified " + Mnemonic + " op requires a register");
   }
@@ -2711,21 +2696,21 @@ bool AArch64AsmParser::parseSysAlias(StringRef Name, SMLoc NameLoc,
     return TokError("specified " + Mnemonic + " op does not use a register");
   }
 
-  Parser.Lex(); // Consume the EndOfStatement
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
+    return true;
+
   return false;
 }
 
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
 
   // Can be either a #imm style literal or an option name
-  bool Hash = Tok.is(AsmToken::Hash);
-  if (Hash || Tok.is(AsmToken::Integer)) {
+  if (parseOptionalToken(AsmToken::Hash) ||
+      Tok.is(AsmToken::Integer)) {
     // Immediate operand.
-    if (Hash)
-      Parser.Lex(); // Eat the '#'
     const MCExpr *ImmVal;
     SMLoc ExprLoc = getLoc();
     if (getParser().parseExpression(ImmVal))
@@ -2769,7 +2754,7 @@ AArch64AsmParser::tryParseBarrierOperand(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseSysReg(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -2819,10 +2804,8 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
         AArch64Operand::CreateToken(Kind, false, S, getContext()));
 
   // If there is an index specifier following the register, parse that too.
-  if (Parser.getTok().is(AsmToken::LBrac)) {
-    SMLoc SIdx = getLoc();
-    Parser.Lex(); // Eat left bracket token.
-
+  SMLoc SIdx = getLoc();
+  if (parseOptionalToken(AsmToken::LBrac)) {
     const MCExpr *ImmVal;
     if (getParser().parseExpression(ImmVal))
       return false;
@@ -2833,12 +2816,9 @@ bool AArch64AsmParser::tryParseVectorRegister(OperandVector &Operands) {
     }
 
     SMLoc E = getLoc();
-    if (Parser.getTok().isNot(AsmToken::RBrac)) {
-      Error(E, "']' expected");
-      return false;
-    }
 
-    Parser.Lex(); // Eat right bracket token.
+    if (parseToken(AsmToken::RBrac, "']' expected"))
+      return false;
 
     Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
                                                          E, getContext()));
@@ -2864,18 +2844,16 @@ bool AArch64AsmParser::parseRegister(OperandVector &Operands) {
 
   // A small number of instructions (FMOVXDhighr, for example) have "[1]"
   // as a string token in the instruction itself.
-  if (getLexer().getKind() == AsmToken::LBrac) {
-    SMLoc LBracS = getLoc();
-    Parser.Lex();
-    const AsmToken &Tok = Parser.getTok();
+  SMLoc LBracS = getLoc();
+  const AsmToken &Tok = Parser.getTok();
+  if (parseOptionalToken(AsmToken::LBrac)) {
     if (Tok.is(AsmToken::Integer)) {
       SMLoc IntS = getLoc();
       int64_t Val = Tok.getIntVal();
       if (Val == 1) {
         Parser.Lex();
-        if (getLexer().getKind() == AsmToken::RBrac) {
-          SMLoc RBracS = getLoc();
-          Parser.Lex();
+        SMLoc RBracS = getLoc();
+        if (parseOptionalToken(AsmToken::RBrac)) {
           Operands.push_back(
               AArch64Operand::CreateToken("[", false, LBracS, getContext()));
           Operands.push_back(
@@ -2896,15 +2874,11 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
   bool HasELFModifier = false;
   AArch64MCExpr::VariantKind RefKind;
 
-  if (Parser.getTok().is(AsmToken::Colon)) {
-    Parser.Lex(); // Eat ':"
+  if (parseOptionalToken(AsmToken::Colon)) {
     HasELFModifier = true;
 
-    if (Parser.getTok().isNot(AsmToken::Identifier)) {
-      Error(Parser.getTok().getLoc(),
-            "expect relocation specifier in operand after ':'");
-      return true;
-    }
+    if (Parser.getTok().isNot(AsmToken::Identifier))
+      return TokError("expect relocation specifier in operand after ':'");
 
     std::string LowerCase = Parser.getTok().getIdentifier().lower();
     RefKind = StringSwitch<AArch64MCExpr::VariantKind>(LowerCase)
@@ -2945,19 +2919,13 @@ bool AArch64AsmParser::parseSymbolicImmVal(const MCExpr *&ImmVal) {
                   .Case("tlsdesc", AArch64MCExpr::VK_TLSDESC_PAGE)
                   .Default(AArch64MCExpr::VK_INVALID);
 
-    if (RefKind == AArch64MCExpr::VK_INVALID) {
-      Error(Parser.getTok().getLoc(),
-            "expect relocation specifier in operand after ':'");
-      return true;
-    }
+    if (RefKind == AArch64MCExpr::VK_INVALID)
+      return TokError("expect relocation specifier in operand after ':'");
 
     Parser.Lex(); // Eat identifier
 
-    if (Parser.getTok().isNot(AsmToken::Colon)) {
-      Error(Parser.getTok().getLoc(), "expect ':' after relocation specifier");
+    if (parseToken(AsmToken::Colon, "expect ':' after relocation specifier"))
       return true;
-    }
-    Parser.Lex(); // Eat ':'
   }
 
   if (getParser().parseExpression(ImmVal))
@@ -2982,9 +2950,7 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
   int64_t PrevReg = FirstReg;
   unsigned Count = 1;
 
-  if (Parser.getTok().is(AsmToken::Minus)) {
-    Parser.Lex(); // Eat the minus.
-
+  if (parseOptionalToken(AsmToken::Minus)) {
     SMLoc Loc = getLoc();
     StringRef NextKind;
     int64_t Reg = tryMatchVectorRegister(NextKind, true);
@@ -3003,9 +2969,7 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
     Count += Space;
   }
   else {
-    while (Parser.getTok().is(AsmToken::Comma)) {
-      Parser.Lex(); // Eat the comma token.
-
+    while (parseOptionalToken(AsmToken::Comma)) {
       SMLoc Loc = getLoc();
       StringRef NextKind;
       int64_t Reg = tryMatchVectorRegister(NextKind, true);
@@ -3025,9 +2989,8 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
     }
   }
 
-  if (Parser.getTok().isNot(AsmToken::RCurly))
-    return Error(getLoc(), "'}' expected");
-  Parser.Lex(); // Eat the '}' token.
+  if (parseToken(AsmToken::RCurly, "'}' expected"))
+    return true;
 
   if (Count > 4)
     return Error(S, "invalid number of vectors");
@@ -3041,10 +3004,8 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
       FirstReg, Count, NumElements, ElementKind, S, getLoc(), getContext()));
 
   // If there is an index specifier following the list, parse that too.
-  if (Parser.getTok().is(AsmToken::LBrac)) {
-    SMLoc SIdx = getLoc();
-    Parser.Lex(); // Eat left bracket token.
-
+  SMLoc SIdx = getLoc();
+  if (parseOptionalToken(AsmToken::LBrac)) { // Eat left bracket token.
     const MCExpr *ImmVal;
     if (getParser().parseExpression(ImmVal))
       return false;
@@ -3055,12 +3016,8 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
     }
 
     SMLoc E = getLoc();
-    if (Parser.getTok().isNot(AsmToken::RBrac)) {
-      Error(E, "']' expected");
+    if (parseToken(AsmToken::RBrac, "']' expected"))
       return false;
-    }
-
-    Parser.Lex(); // Eat right bracket token.
 
     Operands.push_back(AArch64Operand::CreateVectorIndex(MCE->getValue(), SIdx,
                                                          E, getContext()));
@@ -3068,7 +3025,7 @@ bool AArch64AsmParser::parseVectorList(OperandVector &Operands) {
   return false;
 }
 
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -3085,15 +3042,13 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) {
   SMLoc S = getLoc();
   Parser.Lex(); // Eat register
 
-  if (Parser.getTok().isNot(AsmToken::Comma)) {
+  if (!parseOptionalToken(AsmToken::Comma)) {
     Operands.push_back(
         AArch64Operand::CreateReg(RegNum, false, S, getLoc(), Ctx));
     return MatchOperand_Success;
   }
-  Parser.Lex(); // Eat comma.
 
-  if (Parser.getTok().is(AsmToken::Hash))
-    Parser.Lex(); // Eat hash
+  parseOptionalToken(AsmToken::Hash);
 
   if (Parser.getTok().isNot(AsmToken::Integer)) {
     Error(getLoc(), "index must be absent or #0");
@@ -3174,7 +3129,6 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
     S = getLoc();
     if (getParser().parseExpression(IdVal))
       return true;
-
     E = SMLoc::getFromPointer(getLoc().getPointer() - 1);
     Operands.push_back(AArch64Operand::CreateImm(IdVal, S, E, getContext()));
     return false;
@@ -3184,8 +3138,8 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
   case AsmToken::Hash: {
     // #42 -> immediate.
     S = getLoc();
-    if (getLexer().is(AsmToken::Hash))
-      Parser.Lex();
+
+    parseOptionalToken(AsmToken::Hash);
 
     // Parse a negative sign
     bool isNegative = false;
@@ -3202,7 +3156,7 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
     // so convert the value.
     const AsmToken &Tok = Parser.getTok();
     if (Tok.is(AsmToken::Real)) {
-      APFloat RealVal(APFloat::IEEEdouble, Tok.getString());
+      APFloat RealVal(APFloat::IEEEdouble(), Tok.getString());
       uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
       if (Mnemonic != "fcmp" && Mnemonic != "fcmpe" && Mnemonic != "fcmeq" &&
           Mnemonic != "fcmge" && Mnemonic != "fcmgt" && Mnemonic != "fcmle" &&
@@ -3228,9 +3182,9 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode,
     return false;
   }
   case AsmToken::Equal: {
-    SMLoc Loc = Parser.getTok().getLoc();
+    SMLoc Loc = getLoc();
     if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val)
-      return Error(Loc, "unexpected token in operand");
+      return TokError("unexpected token in operand");
     Parser.Lex(); // Eat '='
     const MCExpr *SubExprVal;
     if (getParser().parseExpression(SubExprVal))
@@ -3318,12 +3272,8 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
   StringRef Head = Name.slice(Start, Next);
 
   // IC, DC, AT, and TLBI instructions are aliases for the SYS instruction.
-  if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi") {
-    bool IsError = parseSysAlias(Head, NameLoc, Operands);
-    if (IsError && getLexer().isNot(AsmToken::EndOfStatement))
-      Parser.eatToEndOfStatement();
-    return IsError;
-  }
+  if (Head == "ic" || Head == "dc" || Head == "at" || Head == "tlbi")
+    return parseSysAlias(Head, NameLoc, Operands);
 
   Operands.push_back(
       AArch64Operand::CreateToken(Head, false, NameLoc, getContext()));
@@ -3378,20 +3328,16 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
     if (parseOperand(Operands, false, false)) {
-      Parser.eatToEndOfStatement();
       return true;
     }
 
     unsigned N = 2;
-    while (getLexer().is(AsmToken::Comma)) {
-      Parser.Lex(); // Eat the comma.
-
+    while (parseOptionalToken(AsmToken::Comma)) {
       // Parse and remember the operand.
       if (parseOperand(Operands, (N == 4 && condCodeFourthOperand) ||
                                      (N == 3 && condCodeThirdOperand) ||
                                      (N == 2 && condCodeSecondOperand),
                        condCodeSecondOperand || condCodeThirdOperand)) {
-        Parser.eatToEndOfStatement();
         return true;
       }
 
@@ -3403,31 +3349,23 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info,
       //
       // It's someone else's responsibility to make sure these tokens are sane
       // in the given context!
-      if (Parser.getTok().is(AsmToken::RBrac)) {
-        SMLoc Loc = Parser.getTok().getLoc();
-        Operands.push_back(AArch64Operand::CreateToken("]", false, Loc,
-                                                       getContext()));
-        Parser.Lex();
-      }
 
-      if (Parser.getTok().is(AsmToken::Exclaim)) {
-        SMLoc Loc = Parser.getTok().getLoc();
-        Operands.push_back(AArch64Operand::CreateToken("!", false, Loc,
-                                                       getContext()));
-        Parser.Lex();
-      }
+      SMLoc RLoc = Parser.getTok().getLoc();
+      if (parseOptionalToken(AsmToken::RBrac))
+        Operands.push_back(
+            AArch64Operand::CreateToken("]", false, RLoc, getContext()));
+      SMLoc ELoc = Parser.getTok().getLoc();
+      if (parseOptionalToken(AsmToken::Exclaim))
+        Operands.push_back(
+            AArch64Operand::CreateToken("!", false, ELoc, getContext()));
 
       ++N;
     }
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    SMLoc Loc = Parser.getTok().getLoc();
-    Parser.eatToEndOfStatement();
-    return Error(Loc, "unexpected token in argument list");
-  }
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
+    return true;
 
-  Parser.Lex(); // Consume the EndOfStatement
   return false;
 }
 
@@ -3455,7 +3393,7 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst,
     if (RI->isSubRegisterEq(Rn, Rt2))
       return Error(Loc[1], "unpredictable LDP instruction, writeback base "
                            "is also a destination");
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   }
   case AArch64::LDPDi:
   case AArch64::LDPQi:
@@ -3572,31 +3510,34 @@ bool AArch64AsmParser::validateInstruction(MCInst &Inst,
       AArch64MCExpr::VariantKind ELFRefKind;
       MCSymbolRefExpr::VariantKind DarwinRefKind;
       int64_t Addend;
-      if (!classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
-        return Error(Loc[2], "invalid immediate expression");
-      }
+      if (classifySymbolRef(Expr, ELFRefKind, DarwinRefKind, Addend)) {
 
-      // Only allow these with ADDXri.
-      if ((DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
-          DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) &&
-          Inst.getOpcode() == AArch64::ADDXri)
-        return false;
+        // Only allow these with ADDXri.
+        if ((DarwinRefKind == MCSymbolRefExpr::VK_PAGEOFF ||
+             DarwinRefKind == MCSymbolRefExpr::VK_TLVPPAGEOFF) &&
+            Inst.getOpcode() == AArch64::ADDXri)
+          return false;
 
-      // Only allow these with ADDXri/ADDWri
-      if ((ELFRefKind == AArch64MCExpr::VK_LO12 ||
-          ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 ||
-          ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 ||
-          ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC ||
-          ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 ||
-          ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 ||
-          ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC ||
-          ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) &&
-          (Inst.getOpcode() == AArch64::ADDXri ||
-          Inst.getOpcode() == AArch64::ADDWri))
-        return false;
+        // Only allow these with ADDXri/ADDWri
+        if ((ELFRefKind == AArch64MCExpr::VK_LO12 ||
+             ELFRefKind == AArch64MCExpr::VK_DTPREL_HI12 ||
+             ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12 ||
+             ELFRefKind == AArch64MCExpr::VK_DTPREL_LO12_NC ||
+             ELFRefKind == AArch64MCExpr::VK_TPREL_HI12 ||
+             ELFRefKind == AArch64MCExpr::VK_TPREL_LO12 ||
+             ELFRefKind == AArch64MCExpr::VK_TPREL_LO12_NC ||
+             ELFRefKind == AArch64MCExpr::VK_TLSDESC_LO12) &&
+            (Inst.getOpcode() == AArch64::ADDXri ||
+             Inst.getOpcode() == AArch64::ADDWri))
+          return false;
 
-      // Don't allow expressions in the immediate field otherwise
-      return Error(Loc[2], "invalid immediate expression");
+        // Don't allow symbol refs in the immediate field otherwise
+        // Note: Loc.back() may be Loc[1] or Loc[2] depending on the number of
+        // operands of the original instruction (i.e. 'add w0, w1, borked' vs
+        // 'cmp w0, 'borked')
+        return Error(Loc.back(), "invalid immediate expression");
+      }
+      // We don't validate more complex expressions here
     }
     return false;
   }
@@ -4075,7 +4016,8 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 
     if (ErrorInfo != ~0ULL) {
       if (ErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction");
+        return Error(IDLoc, "too few operands for instruction",
+                     SMRange(IDLoc, getTok().getLoc()));
 
       ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
       if (ErrorLoc == SMLoc())
@@ -4138,7 +4080,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_MSR:
   case Match_MRS: {
     if (ErrorInfo >= Operands.size())
-      return Error(IDLoc, "too few operands for instruction");
+      return Error(IDLoc, "too few operands for instruction", SMRange(IDLoc, (*Operands.back()).getEndLoc()));
     // Any time we get here, there's nothing fancy to do. Just get the
     // operand SMLoc and display the diagnostic.
     SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc();
@@ -4161,28 +4103,31 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getIdentifier();
   SMLoc Loc = DirectiveID.getLoc();
   if (IDVal == ".arch")
-    return parseDirectiveArch(Loc);
-  if (IDVal == ".cpu")
-    return parseDirectiveCPU(Loc);
-  if (IDVal == ".hword")
-    return parseDirectiveWord(2, Loc);
-  if (IDVal == ".word")
-    return parseDirectiveWord(4, Loc);
-  if (IDVal == ".xword")
-    return parseDirectiveWord(8, Loc);
-  if (IDVal == ".tlsdesccall")
-    return parseDirectiveTLSDescCall(Loc);
-  if (IDVal == ".ltorg" || IDVal == ".pool")
-    return parseDirectiveLtorg(Loc);
-  if (IDVal == ".unreq")
-    return parseDirectiveUnreq(Loc);
-
-  if (!IsMachO && !IsCOFF) {
+    parseDirectiveArch(Loc);
+  else if (IDVal == ".cpu")
+    parseDirectiveCPU(Loc);
+  else if (IDVal == ".hword")
+    parseDirectiveWord(2, Loc);
+  else if (IDVal == ".word")
+    parseDirectiveWord(4, Loc);
+  else if (IDVal == ".xword")
+    parseDirectiveWord(8, Loc);
+  else if (IDVal == ".tlsdesccall")
+    parseDirectiveTLSDescCall(Loc);
+  else if (IDVal == ".ltorg" || IDVal == ".pool")
+    parseDirectiveLtorg(Loc);
+  else if (IDVal == ".unreq")
+    parseDirectiveUnreq(Loc);
+  else if (!IsMachO && !IsCOFF) {
     if (IDVal == ".inst")
-      return parseDirectiveInst(Loc);
-  }
-
-  return parseDirectiveLOH(IDVal, Loc);
+      parseDirectiveInst(Loc);
+    else
+      return true;
+  } else if (IDVal == MCLOHDirectiveName())
+    parseDirectiveLOH(IDVal, Loc);
+  else
+    return true;
+  return false;
 }
 
 static const struct {
@@ -4193,9 +4138,10 @@ static const struct {
   { "crypto", {AArch64::FeatureCrypto} },
   { "fp", {AArch64::FeatureFPARMv8} },
   { "simd", {AArch64::FeatureNEON} },
+  { "ras", {AArch64::FeatureRAS} },
+  { "lse", {AArch64::FeatureLSE} },
 
   // FIXME: Unsupported extensions
-  { "lse", {} },
   { "pan", {} },
   { "lor", {} },
   { "rdma", {} },
@@ -4212,17 +4158,51 @@ bool AArch64AsmParser::parseDirectiveArch(SMLoc L) {
       getParser().parseStringToEndOfStatement().trim().split('+');
 
   unsigned ID = AArch64::parseArch(Arch);
-  if (ID == ARM::AK_INVALID) {
-    Error(ArchLoc, "unknown arch name");
-    return false;
-  }
+  if (ID == static_cast<unsigned>(AArch64::ArchKind::AK_INVALID))
+    return Error(ArchLoc, "unknown arch name");
+
+  if (parseToken(AsmToken::EndOfStatement))
+    return true;
+
+  // Get the architecture and extension features.
+  std::vector<StringRef> AArch64Features;
+  AArch64::getArchFeatures(ID, AArch64Features);
+  AArch64::getExtensionFeatures(AArch64::getDefaultExtensions("generic", ID),
+                                AArch64Features);
 
   MCSubtargetInfo &STI = copySTI();
-  STI.setDefaultFeatures("", "");
+  std::vector<std::string> ArchFeatures(AArch64Features.begin(), AArch64Features.end());
+  STI.setDefaultFeatures("generic", join(ArchFeatures.begin(), ArchFeatures.end(), ","));
+
+  SmallVector<StringRef, 4> RequestedExtensions;
   if (!ExtensionString.empty())
-    STI.setDefaultFeatures("", ("+" + ExtensionString).str());
-  setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+    ExtensionString.split(RequestedExtensions, '+');
+
+  FeatureBitset Features = STI.getFeatureBits();
+  for (auto Name : RequestedExtensions) {
+    bool EnableFeature = true;
 
+    if (Name.startswith_lower("no")) {
+      EnableFeature = false;
+      Name = Name.substr(2);
+    }
+
+    for (const auto &Extension : ExtensionMap) {
+      if (Extension.Name != Name)
+        continue;
+
+      if (Extension.Features.none())
+        report_fatal_error("unsupported architectural extension: " + Name);
+
+      FeatureBitset ToggleFeatures = EnableFeature
+                                         ? (~Features & Extension.Features)
+                                         : ( Features & Extension.Features);
+      uint64_t Features =
+          ComputeAvailableFeatures(STI.ToggleFeature(ToggleFeatures));
+      setAvailableFeatures(Features);
+      break;
+    }
+  }
   return false;
 }
 
@@ -4235,6 +4215,9 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
   std::tie(CPU, ExtensionString) =
       getParser().parseStringToEndOfStatement().trim().split('+');
 
+  if (parseToken(AsmToken::EndOfStatement))
+    return true;
+
   SmallVector<StringRef, 4> RequestedExtensions;
   if (!ExtensionString.empty())
     ExtensionString.split(RequestedExtensions, '+');
@@ -4281,67 +4264,39 @@ bool AArch64AsmParser::parseDirectiveCPU(SMLoc L) {
 /// parseDirectiveWord
 ///  ::= .word [ expression (, expression)* ]
 bool AArch64AsmParser::parseDirectiveWord(unsigned Size, SMLoc L) {
-  MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      const MCExpr *Value;
-      if (getParser().parseExpression(Value))
-        return true;
-
-      getParser().getStreamer().EmitValue(Value, Size, L);
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      // FIXME: Improve diagnostic.
-      if (getLexer().isNot(AsmToken::Comma))
-        return Error(L, "unexpected token in directive");
-      Parser.Lex();
-    }
-  }
+  auto parseOp = [&]() -> bool {
+    const MCExpr *Value;
+    if (getParser().parseExpression(Value))
+      return true;
+    getParser().getStreamer().EmitValue(Value, Size, L);
+    return false;
+  };
 
-  Parser.Lex();
+  if (parseMany(parseOp))
+    return true;
   return false;
 }
 
 /// parseDirectiveInst
 ///  ::= .inst opcode [, ...]
 bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) {
-  MCAsmParser &Parser = getParser();
-  if (getLexer().is(AsmToken::EndOfStatement)) {
-    Parser.eatToEndOfStatement();
-    Error(Loc, "expected expression following directive");
-    return false;
-  }
+  if (getLexer().is(AsmToken::EndOfStatement))
+    return Error(Loc, "expected expression following '.inst' directive");
 
-  for (;;) {
+  auto parseOp = [&]() -> bool {
+    SMLoc L = getLoc();
     const MCExpr *Expr;
-
-    if (getParser().parseExpression(Expr)) {
-      Error(Loc, "expected expression");
-      return false;
-    }
-
+    if (check(getParser().parseExpression(Expr), L, "expected expression"))
+      return true;
     const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
-    if (!Value) {
-      Error(Loc, "expected constant expression");
-      return false;
-    }
-
+    if (check(!Value, L, "expected constant expression"))
+      return true;
     getTargetStreamer().emitInst(Value->getValue());
+    return false;
+  };
 
-    if (getLexer().is(AsmToken::EndOfStatement))
-      break;
-
-    if (getLexer().isNot(AsmToken::Comma)) {
-      Error(Loc, "unexpected token in directive");
-      return false;
-    }
-
-    Parser.Lex(); // Eat comma.
-  }
-
-  Parser.Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in '.inst' directive");
   return false;
 }
 
@@ -4349,8 +4304,10 @@ bool AArch64AsmParser::parseDirectiveInst(SMLoc Loc) {
 //   ::= .tlsdesccall symbol
 bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
   StringRef Name;
-  if (getParser().parseIdentifier(Name))
-    return Error(L, "expected symbol after directive");
+  if (check(getParser().parseIdentifier(Name), L,
+            "expected symbol after directive") ||
+      parseToken(AsmToken::EndOfStatement))
+    return true;
 
   MCSymbol *Sym = getContext().getOrCreateSymbol(Name);
   const MCExpr *Expr = MCSymbolRefExpr::create(Sym, getContext());
@@ -4367,8 +4324,6 @@ bool AArch64AsmParser::parseDirectiveTLSDescCall(SMLoc L) {
 /// ::= .loh <lohName | lohId> label1, ..., labelN
 /// The number of arguments depends on the loh identifier.
 bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
-  if (IDVal != MCLOHDirectiveName())
-    return true;
   MCLOHType Kind;
   if (getParser().getTok().isNot(AsmToken::Identifier)) {
     if (getParser().getTok().isNot(AsmToken::Integer))
@@ -4405,12 +4360,13 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
 
     if (Idx + 1 == NbArgs)
       break;
-    if (getLexer().isNot(AsmToken::Comma))
-      return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
-    Lex();
+    if (parseToken(AsmToken::Comma,
+                   "unexpected token in '" + Twine(IDVal) + "' directive"))
+      return true;
   }
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return TokError("unexpected token in '" + Twine(IDVal) + "' directive");
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '" + Twine(IDVal) + "' directive"))
+    return true;
 
   getStreamer().EmitLOHDirective((MCLOHType)Kind, Args);
   return false;
@@ -4419,6 +4375,8 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) {
 /// parseDirectiveLtorg
 ///  ::= .ltorg | .pool
 bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) {
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
   getTargetStreamer().emitCurrentConstantPool();
   return false;
 }
@@ -4435,46 +4393,36 @@ bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
   if (RegNum == static_cast<unsigned>(-1)) {
     StringRef Kind;
     RegNum = tryMatchVectorRegister(Kind, false);
-    if (!Kind.empty()) {
-      Error(SRegLoc, "vector register without type specifier expected");
-      return false;
-    }
+    if (!Kind.empty())
+      return Error(SRegLoc, "vector register without type specifier expected");
     IsVector = true;
   }
 
-  if (RegNum == static_cast<unsigned>(-1)) {
-    Parser.eatToEndOfStatement();
-    Error(SRegLoc, "register name or alias expected");
-    return false;
-  }
+  if (RegNum == static_cast<unsigned>(-1))
+    return Error(SRegLoc, "register name or alias expected");
 
   // Shouldn't be anything else.
-  if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
-    Error(Parser.getTok().getLoc(), "unexpected input in .req directive");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-
-  Parser.Lex(); // Consume the EndOfStatement
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected input in .req directive"))
+    return true;
 
   auto pair = std::make_pair(IsVector, RegNum);
   if (RegisterReqs.insert(std::make_pair(Name, pair)).first->second != pair)
     Warning(L, "ignoring redefinition of register alias '" + Name + "'");
 
-  return true;
+  return false;
 }
 
 /// parseDirectiveUneq
 ///  ::= .unreq registername
 bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (Parser.getTok().isNot(AsmToken::Identifier)) {
-    Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive.");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (getTok().isNot(AsmToken::Identifier))
+    return TokError("unexpected input in .unreq directive.");
   RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
   Parser.Lex(); // Eat the identifier.
+  if (parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix("in '.unreq' directive");
   return false;
 }
 
@@ -4530,9 +4478,9 @@ AArch64AsmParser::classifySymbolRef(const MCExpr *Expr,
 
 /// Force static initialization.
 extern "C" void LLVMInitializeAArch64AsmParser() {
-  RegisterMCAsmParser<AArch64AsmParser> X(TheAArch64leTarget);
-  RegisterMCAsmParser<AArch64AsmParser> Y(TheAArch64beTarget);
-  RegisterMCAsmParser<AArch64AsmParser> Z(TheARM64Target);
+  RegisterMCAsmParser<AArch64AsmParser> X(getTheAArch64leTarget());
+  RegisterMCAsmParser<AArch64AsmParser> Y(getTheAArch64beTarget());
+  RegisterMCAsmParser<AArch64AsmParser> Z(getTheARM64Target());
 }
 
 #define GET_REGISTER_MATCHER
@@ -4603,7 +4551,7 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
 }
 
 
-AArch64AsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
 
   SMLoc S = getLoc();
@@ -4660,7 +4608,7 @@ AArch64AsmParser::tryParseGPRSeqPair(OperandVector &Operands) {
              "consecutive same-size even/odd register pair");
     return MatchOperand_ParseFail;
   }
-  
+
   unsigned Pair = 0;
   if(isXReg) {
     Pair = RI->getMatchingSuperReg(FirstReg, AArch64::sube64,
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
index fe6ea31b9061..0d860a7eef79 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp
@@ -237,18 +237,18 @@ createAArch64ExternalSymbolizer(const Triple &TT, LLVMOpInfoCallback GetOpInfo,
 }
 
 extern "C" void LLVMInitializeAArch64Disassembler() {
-  TargetRegistry::RegisterMCDisassembler(TheAArch64leTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheAArch64leTarget(),
                                          createAArch64Disassembler);
-  TargetRegistry::RegisterMCDisassembler(TheAArch64beTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheAArch64beTarget(),
                                          createAArch64Disassembler);
-  TargetRegistry::RegisterMCSymbolizer(TheAArch64leTarget,
+  TargetRegistry::RegisterMCSymbolizer(getTheAArch64leTarget(),
                                        createAArch64ExternalSymbolizer);
-  TargetRegistry::RegisterMCSymbolizer(TheAArch64beTarget,
+  TargetRegistry::RegisterMCSymbolizer(getTheAArch64beTarget(),
                                        createAArch64ExternalSymbolizer);
 
-  TargetRegistry::RegisterMCDisassembler(TheARM64Target,
+  TargetRegistry::RegisterMCDisassembler(getTheARM64Target(),
                                          createAArch64Disassembler);
-  TargetRegistry::RegisterMCSymbolizer(TheARM64Target,
+  TargetRegistry::RegisterMCSymbolizer(getTheARM64Target(),
                                        createAArch64ExternalSymbolizer);
 }
 
@@ -1097,7 +1097,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STXRB:
   case AArch64::STXRH:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case AArch64::LDARW:
   case AArch64::LDARB:
   case AArch64::LDARH:
@@ -1121,7 +1121,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STLXRX:
   case AArch64::STXRX:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case AArch64::LDARX:
   case AArch64::LDAXRX:
   case AArch64::LDXRX:
@@ -1133,7 +1133,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STLXPW:
   case AArch64::STXPW:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case AArch64::LDAXPW:
   case AArch64::LDXPW:
     DecodeGPR32RegisterClass(Inst, Rt, Addr, Decoder);
@@ -1142,7 +1142,7 @@ static DecodeStatus DecodeExclusiveLdStInstruction(llvm::MCInst &Inst,
   case AArch64::STLXPX:
   case AArch64::STXPX:
     DecodeGPR32RegisterClass(Inst, Rs, Addr, Decoder);
-  // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case AArch64::LDAXPX:
   case AArch64::LDXPX:
     DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
@@ -1218,7 +1218,7 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
   case AArch64::STPXpre:
   case AArch64::LDPSWpre:
     NeedsDisjointWritebackTransfer = true;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case AArch64::LDNPXi:
   case AArch64::STNPXi:
   case AArch64::LDPXi:
@@ -1232,7 +1232,7 @@ static DecodeStatus DecodePairLdStInstruction(llvm::MCInst &Inst, uint32_t insn,
   case AArch64::LDPWpre:
   case AArch64::STPWpre:
     NeedsDisjointWritebackTransfer = true;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case AArch64::LDNPWi:
   case AArch64::STNPWi:
   case AArch64::LDPWi:
diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
index e475e505e7d1..24e353cf4b96 100644
--- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
+++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.h
@@ -18,7 +18,6 @@
 namespace llvm {
 
 class MCInst;
-class MemoryObject;
 class raw_ostream;
 
 class AArch64Disassembler : public MCDisassembler {
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 27993246eb07..14c0327f5fa8 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -11,6 +11,7 @@
 #include "AArch64RegisterInfo.h"
 #include "MCTargetDesc/AArch64FixupKinds.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDirectives.h"
@@ -520,6 +521,17 @@ public:
 
     return CompactUnwindEncoding;
   }
+
+  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                         const MCFixup &Fixup, const MCFragment *DF,
+                         const MCValue &Target, uint64_t &Value,
+                         bool &IsResolved) override {
+    // Try to get the encoded value for the fixup as-if we're mapping it into
+    // the instruction. This allows adjustFixupValue() to issue a diagnostic
+    // if the value is invalid.
+    if (IsResolved)
+      (void)adjustFixupValue(Fixup, Value, &Asm.getContext());
+  }
 };
 
 } // end anonymous namespace
@@ -529,12 +541,14 @@ namespace {
 class ELFAArch64AsmBackend : public AArch64AsmBackend {
 public:
   uint8_t OSABI;
+  bool IsILP32;
 
-  ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian)
-    : AArch64AsmBackend(T, IsLittleEndian), OSABI(OSABI) {}
+  ELFAArch64AsmBackend(const Target &T, uint8_t OSABI, bool IsLittleEndian,
+                       bool IsILP32)
+    : AArch64AsmBackend(T, IsLittleEndian), OSABI(OSABI), IsILP32(IsILP32) {}
 
   MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override {
-    return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian);
+    return createAArch64ELFObjectWriter(OS, OSABI, IsLittleEndian, IsILP32);
   }
 
   void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
@@ -574,22 +588,25 @@ void ELFAArch64AsmBackend::processFixupValue(
 MCAsmBackend *llvm::createAArch64leAsmBackend(const Target &T,
                                               const MCRegisterInfo &MRI,
                                               const Triple &TheTriple,
-                                              StringRef CPU) {
+                                              StringRef CPU,
+                                              const MCTargetOptions &Options) {
   if (TheTriple.isOSBinFormatMachO())
     return new DarwinAArch64AsmBackend(T, MRI);
 
   assert(TheTriple.isOSBinFormatELF() && "Expect either MachO or ELF target");
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true);
+  bool IsILP32 = Options.getABIName() == "ilp32";
+  return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/true, IsILP32);
 }
 
 MCAsmBackend *llvm::createAArch64beAsmBackend(const Target &T,
                                               const MCRegisterInfo &MRI,
                                               const Triple &TheTriple,
-                                              StringRef CPU) {
+                                              StringRef CPU,
+                                              const MCTargetOptions &Options) {
   assert(TheTriple.isOSBinFormatELF() &&
          "Big endian is only supported for ELF targets!");
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());
-  return new ELFAArch64AsmBackend(T, OSABI,
-                                  /*IsLittleEndian=*/false);
+  bool IsILP32 = Options.getABIName() == "ilp32";
+  return new ELFAArch64AsmBackend(T, OSABI, /*IsLittleEndian=*/false, IsILP32);
 }
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
index 4b4c4097b97b..a1edb3cef46a 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp
@@ -25,25 +25,80 @@ using namespace llvm;
 namespace {
 class AArch64ELFObjectWriter : public MCELFObjectTargetWriter {
 public:
-  AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian);
+  AArch64ELFObjectWriter(uint8_t OSABI, bool IsLittleEndian, bool IsILP32);
 
   ~AArch64ELFObjectWriter() override;
 
 protected:
   unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
                         const MCFixup &Fixup, bool IsPCRel) const override;
-
+  bool IsILP32;
 private:
 };
 }
 
 AArch64ELFObjectWriter::AArch64ELFObjectWriter(uint8_t OSABI,
-                                               bool IsLittleEndian)
+                                               bool IsLittleEndian,
+                                               bool IsILP32)
     : MCELFObjectTargetWriter(/*Is64Bit*/ true, OSABI, ELF::EM_AARCH64,
-                              /*HasRelocationAddend*/ true) {}
+                              /*HasRelocationAddend*/ true),
+      IsILP32(IsILP32) {}
 
 AArch64ELFObjectWriter::~AArch64ELFObjectWriter() {}
 
+#define R_CLS(rtype) \
+        IsILP32 ? ELF::R_AARCH64_P32_##rtype : ELF::R_AARCH64_##rtype
+#define BAD_ILP32_MOV(lp64rtype) "ILP32 absolute MOV relocation not "\
+        "supported (LP64 eqv: " #lp64rtype ")"
+
+// assumes IsILP32 is true
+static bool isNonILP32reloc(const MCFixup &Fixup,
+                            AArch64MCExpr::VariantKind RefKind,
+                            MCContext &Ctx) {
+  if ((unsigned)Fixup.getKind() != AArch64::fixup_aarch64_movw)
+    return false;
+  switch(RefKind) {
+    case AArch64MCExpr::VK_ABS_G3:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G3));
+      return true;
+    case AArch64MCExpr::VK_ABS_G2:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2));
+      return true;
+    case AArch64MCExpr::VK_ABS_G2_S:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G2));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_ABS_G2_NC:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G2_NC));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_ABS_G1_S:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_SABS_G1));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_ABS_G1_NC:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(MOVW_UABS_G1_NC));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_DTPREL_G2:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G2));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_DTPREL_G1_NC:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLD_MOVW_DTPREL_G1_NC));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_TPREL_G2:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G2));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_TPREL_G1_NC:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSLE_MOVW_TPREL_G1_NC));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_GOTTPREL_G1:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G1));
+      return ELF::R_AARCH64_NONE;
+    case AArch64MCExpr::VK_GOTTPREL_G0_NC:
+      Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(TLSIE_MOVW_GOTTPREL_G0_NC));
+      return ELF::R_AARCH64_NONE;
+    default: return false;
+  }
+  return false;
+}
+
 unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
                                               const MCValue &Target,
                                               const MCFixup &Fixup,
@@ -67,147 +122,161 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
       Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
       return ELF::R_AARCH64_NONE;
     case FK_Data_2:
-      return ELF::R_AARCH64_PREL16;
+      return R_CLS(PREL16);
     case FK_Data_4:
-      return ELF::R_AARCH64_PREL32;
+      return R_CLS(PREL32);
     case FK_Data_8:
-      return ELF::R_AARCH64_PREL64;
+      if (IsILP32) {
+        Ctx.reportError(Fixup.getLoc(), "ILP32 8 byte PC relative data "
+                        "relocation not supported (LP64 eqv: PREL64)");
+        return ELF::R_AARCH64_NONE;
+      } else
+        return ELF::R_AARCH64_PREL64;
     case AArch64::fixup_aarch64_pcrel_adr_imm21:
       assert(SymLoc == AArch64MCExpr::VK_NONE && "unexpected ADR relocation");
-      return ELF::R_AARCH64_ADR_PREL_LO21;
+      return R_CLS(ADR_PREL_LO21);
     case AArch64::fixup_aarch64_pcrel_adrp_imm21:
       if (SymLoc == AArch64MCExpr::VK_ABS && !IsNC)
-        return ELF::R_AARCH64_ADR_PREL_PG_HI21;
+        return R_CLS(ADR_PREL_PG_HI21);
       if (SymLoc == AArch64MCExpr::VK_GOT && !IsNC)
-        return ELF::R_AARCH64_ADR_GOT_PAGE;
+        return R_CLS(ADR_GOT_PAGE);
       if (SymLoc == AArch64MCExpr::VK_GOTTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21;
+        return R_CLS(TLSIE_ADR_GOTTPREL_PAGE21);
       if (SymLoc == AArch64MCExpr::VK_TLSDESC && !IsNC)
-        return ELF::R_AARCH64_TLSDESC_ADR_PAGE21;
+        return R_CLS(TLSDESC_ADR_PAGE21);
       Ctx.reportError(Fixup.getLoc(),
                       "invalid symbol kind for ADRP relocation");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_pcrel_branch26:
-      return ELF::R_AARCH64_JUMP26;
+      return R_CLS(JUMP26);
     case AArch64::fixup_aarch64_pcrel_call26:
-      return ELF::R_AARCH64_CALL26;
+      return R_CLS(CALL26);
     case AArch64::fixup_aarch64_ldr_pcrel_imm19:
       if (SymLoc == AArch64MCExpr::VK_GOTTPREL)
-        return ELF::R_AARCH64_TLSIE_LD_GOTTPREL_PREL19;
-      return ELF::R_AARCH64_LD_PREL_LO19;
+        return R_CLS(TLSIE_LD_GOTTPREL_PREL19);
+      return R_CLS(LD_PREL_LO19);
     case AArch64::fixup_aarch64_pcrel_branch14:
-      return ELF::R_AARCH64_TSTBR14;
+      return R_CLS(TSTBR14);
     case AArch64::fixup_aarch64_pcrel_branch19:
-      return ELF::R_AARCH64_CONDBR19;
+      return R_CLS(CONDBR19);
     default:
       Ctx.reportError(Fixup.getLoc(), "Unsupported pc-relative fixup kind");
       return ELF::R_AARCH64_NONE;
     }
   } else {
+    if (IsILP32 && isNonILP32reloc(Fixup, RefKind, Ctx))
+        return ELF::R_AARCH64_NONE;
     switch ((unsigned)Fixup.getKind()) {
     case FK_Data_1:
       Ctx.reportError(Fixup.getLoc(), "1-byte data relocations not supported");
       return ELF::R_AARCH64_NONE;
     case FK_Data_2:
-      return ELF::R_AARCH64_ABS16;
+      return R_CLS(ABS16);
     case FK_Data_4:
-      return ELF::R_AARCH64_ABS32;
+      return R_CLS(ABS32);
     case FK_Data_8:
-      return ELF::R_AARCH64_ABS64;
+      if (IsILP32) {
+        Ctx.reportError(Fixup.getLoc(), BAD_ILP32_MOV(ABS64));
+        return ELF::R_AARCH64_NONE;
+      } else
+        return ELF::R_AARCH64_ABS64;
     case AArch64::fixup_aarch64_add_imm12:
       if (RefKind == AArch64MCExpr::VK_DTPREL_HI12)
-        return ELF::R_AARCH64_TLSLD_ADD_DTPREL_HI12;
+        return R_CLS(TLSLD_ADD_DTPREL_HI12);
       if (RefKind == AArch64MCExpr::VK_TPREL_HI12)
-        return ELF::R_AARCH64_TLSLE_ADD_TPREL_HI12;
+        return R_CLS(TLSLE_ADD_TPREL_HI12);
       if (RefKind == AArch64MCExpr::VK_DTPREL_LO12_NC)
-        return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC;
+        return R_CLS(TLSLD_ADD_DTPREL_LO12_NC);
       if (RefKind == AArch64MCExpr::VK_DTPREL_LO12)
-        return ELF::R_AARCH64_TLSLD_ADD_DTPREL_LO12;
+        return R_CLS(TLSLD_ADD_DTPREL_LO12);
       if (RefKind == AArch64MCExpr::VK_TPREL_LO12_NC)
-        return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12_NC;
+        return R_CLS(TLSLE_ADD_TPREL_LO12_NC);
       if (RefKind == AArch64MCExpr::VK_TPREL_LO12)
-        return ELF::R_AARCH64_TLSLE_ADD_TPREL_LO12;
+        return R_CLS(TLSLE_ADD_TPREL_LO12);
       if (RefKind == AArch64MCExpr::VK_TLSDESC_LO12)
-        return ELF::R_AARCH64_TLSDESC_ADD_LO12_NC;
+        return R_CLS(TLSDESC_ADD_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_ADD_ABS_LO12_NC;
+        return R_CLS(ADD_ABS_LO12_NC);
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for add (uimm12) instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale1:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST8_ABS_LO12_NC;
+        return R_CLS(LDST8_ABS_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12;
+        return R_CLS(TLSLD_LDST8_DTPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC;
+        return R_CLS(TLSLD_LDST8_DTPREL_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12;
+        return R_CLS(TLSLE_LDST8_TPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC;
+        return R_CLS(TLSLE_LDST8_TPREL_LO12_NC);
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 8-bit load/store instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale2:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST16_ABS_LO12_NC;
+        return R_CLS(LDST16_ABS_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12;
+        return R_CLS(TLSLD_LDST16_DTPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC;
+        return R_CLS(TLSLD_LDST16_DTPREL_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12;
+        return R_CLS(TLSLE_LDST16_TPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC;
+        return R_CLS(TLSLE_LDST16_TPREL_LO12_NC);
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 16-bit load/store instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale4:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST32_ABS_LO12_NC;
+        return R_CLS(LDST32_ABS_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12;
+        return R_CLS(TLSLD_LDST32_DTPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC;
+        return R_CLS(TLSLD_LDST32_DTPREL_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12;
+        return R_CLS(TLSLE_LDST32_TPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC;
+        return R_CLS(TLSLE_LDST32_TPREL_LO12_NC);
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 32-bit load/store instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale8:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST64_ABS_LO12_NC;
+        return R_CLS(LDST64_ABS_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_GOT && IsNC)
-        return ELF::R_AARCH64_LD64_GOT_LO12_NC;
+        return R_CLS(LD64_GOT_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12;
+        return R_CLS(TLSLD_LDST64_DTPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_DTPREL && IsNC)
-        return ELF::R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC;
+        return R_CLS(TLSLD_LDST64_DTPREL_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_TPREL && !IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12;
+        return R_CLS(TLSLE_LDST64_TPREL_LO12);
       if (SymLoc == AArch64MCExpr::VK_TPREL && IsNC)
-        return ELF::R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC;
+        return R_CLS(TLSLE_LDST64_TPREL_LO12_NC);
       if (SymLoc == AArch64MCExpr::VK_GOTTPREL && IsNC)
-        return ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
+        return IsILP32 ? ELF::R_AARCH64_P32_TLSIE_LD32_GOTTPREL_LO12_NC
+                       : ELF::R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC;
       if (SymLoc == AArch64MCExpr::VK_TLSDESC && IsNC)
-        return ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
+        return IsILP32 ? ELF::R_AARCH64_P32_TLSDESC_LD32_LO12_NC
+                       : ELF::R_AARCH64_TLSDESC_LD64_LO12_NC;
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 64-bit load/store instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_ldst_imm12_scale16:
       if (SymLoc == AArch64MCExpr::VK_ABS && IsNC)
-        return ELF::R_AARCH64_LDST128_ABS_LO12_NC;
+        return R_CLS(LDST128_ABS_LO12_NC);
 
       Ctx.reportError(Fixup.getLoc(),
                       "invalid fixup for 128-bit load/store instruction");
       return ELF::R_AARCH64_NONE;
+    // ILP32 case not reached here, tested with isNonILP32reloc
     case AArch64::fixup_aarch64_movw:
       if (RefKind == AArch64MCExpr::VK_ABS_G3)
         return ELF::R_AARCH64_MOVW_UABS_G3;
@@ -218,37 +287,37 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
       if (RefKind == AArch64MCExpr::VK_ABS_G2_NC)
         return ELF::R_AARCH64_MOVW_UABS_G2_NC;
       if (RefKind == AArch64MCExpr::VK_ABS_G1)
-        return ELF::R_AARCH64_MOVW_UABS_G1;
+        return R_CLS(MOVW_UABS_G1);
       if (RefKind == AArch64MCExpr::VK_ABS_G1_S)
         return ELF::R_AARCH64_MOVW_SABS_G1;
       if (RefKind == AArch64MCExpr::VK_ABS_G1_NC)
         return ELF::R_AARCH64_MOVW_UABS_G1_NC;
       if (RefKind == AArch64MCExpr::VK_ABS_G0)
-        return ELF::R_AARCH64_MOVW_UABS_G0;
+        return R_CLS(MOVW_UABS_G0);
       if (RefKind == AArch64MCExpr::VK_ABS_G0_S)
-        return ELF::R_AARCH64_MOVW_SABS_G0;
+        return R_CLS(MOVW_SABS_G0);
       if (RefKind == AArch64MCExpr::VK_ABS_G0_NC)
-        return ELF::R_AARCH64_MOVW_UABS_G0_NC;
+        return R_CLS(MOVW_UABS_G0_NC);
       if (RefKind == AArch64MCExpr::VK_DTPREL_G2)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G2;
       if (RefKind == AArch64MCExpr::VK_DTPREL_G1)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1;
+        return R_CLS(TLSLD_MOVW_DTPREL_G1);
       if (RefKind == AArch64MCExpr::VK_DTPREL_G1_NC)
         return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC;
       if (RefKind == AArch64MCExpr::VK_DTPREL_G0)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0;
+        return R_CLS(TLSLD_MOVW_DTPREL_G0);
       if (RefKind == AArch64MCExpr::VK_DTPREL_G0_NC)
-        return ELF::R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC;
+        return R_CLS(TLSLD_MOVW_DTPREL_G0_NC);
       if (RefKind == AArch64MCExpr::VK_TPREL_G2)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G2;
       if (RefKind == AArch64MCExpr::VK_TPREL_G1)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1;
+        return R_CLS(TLSLE_MOVW_TPREL_G1);
       if (RefKind == AArch64MCExpr::VK_TPREL_G1_NC)
         return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G1_NC;
       if (RefKind == AArch64MCExpr::VK_TPREL_G0)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0;
+        return R_CLS(TLSLE_MOVW_TPREL_G0);
       if (RefKind == AArch64MCExpr::VK_TPREL_G0_NC)
-        return ELF::R_AARCH64_TLSLE_MOVW_TPREL_G0_NC;
+        return R_CLS(TLSLE_MOVW_TPREL_G0_NC);
       if (RefKind == AArch64MCExpr::VK_GOTTPREL_G1)
         return ELF::R_AARCH64_TLSIE_MOVW_GOTTPREL_G1;
       if (RefKind == AArch64MCExpr::VK_GOTTPREL_G0_NC)
@@ -257,7 +326,7 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
                       "invalid fixup for movz/movk instruction");
       return ELF::R_AARCH64_NONE;
     case AArch64::fixup_aarch64_tlsdesc_call:
-      return ELF::R_AARCH64_TLSDESC_CALL;
+      return R_CLS(TLSDESC_CALL);
     default:
       Ctx.reportError(Fixup.getLoc(), "Unknown ELF relocation type");
       return ELF::R_AARCH64_NONE;
@@ -269,8 +338,9 @@ unsigned AArch64ELFObjectWriter::getRelocType(MCContext &Ctx,
 
 MCObjectWriter *llvm::createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
                                                    uint8_t OSABI,
-                                                   bool IsLittleEndian) {
+                                                   bool IsLittleEndian,
+                                                   bool IsILP32) {
   MCELFObjectTargetWriter *MOTW =
-      new AArch64ELFObjectWriter(OSABI, IsLittleEndian);
+      new AArch64ELFObjectWriter(OSABI, IsLittleEndian, IsILP32);
   return createELFObjectWriter(MOTW, OS, IsLittleEndian);
 }
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
index fbce26e1d9a1..8fc822329595 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp
@@ -29,8 +29,7 @@ static cl::opt<AsmWriterVariantTy> AsmWriterVariant(
     "aarch64-neon-syntax", cl::init(Default),
     cl::desc("Choose style of NEON code to emit from AArch64 backend:"),
     cl::values(clEnumValN(Generic, "generic", "Emit generic NEON assembly"),
-               clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly"),
-               clEnumValEnd));
+               clEnumValN(Apple, "apple", "Emit Apple-style NEON assembly")));
 
 AArch64MCAsmInfoDarwin::AArch64MCAsmInfoDarwin() {
   // We prefer NEON instructions to be printed in the short form.
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
index 7b9ff8fa0503..f7058cdf2373 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp
@@ -35,11 +35,13 @@ namespace {
 
 class AArch64MCCodeEmitter : public MCCodeEmitter {
   MCContext &Ctx;
+  const MCInstrInfo &MCII;
 
   AArch64MCCodeEmitter(const AArch64MCCodeEmitter &); // DO NOT IMPLEMENT
   void operator=(const AArch64MCCodeEmitter &);     // DO NOT IMPLEMENT
 public:
-  AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx) : Ctx(ctx) {}
+  AArch64MCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+      : Ctx(ctx), MCII(mcii) {}
 
   ~AArch64MCCodeEmitter() override {}
 
@@ -170,6 +172,11 @@ public:
 
   unsigned fixOneOperandFPComparison(const MCInst &MI, unsigned EncodedValue,
                                      const MCSubtargetInfo &STI) const;
+
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 };
 
 } // end anonymous namespace
@@ -253,7 +260,7 @@ AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
   assert((ShiftVal == 0 || ShiftVal == 12) &&
          "unexpected shift value for add/sub immediate");
   if (MO.isImm())
-    return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << 12));
+    return MO.getImm() | (ShiftVal == 0 ? 0 : (1 << ShiftVal));
   assert(MO.isExpr() && "Unable to encode MCOperand!");
   const MCExpr *Expr = MO.getExpr();
 
@@ -263,7 +270,15 @@ AArch64MCCodeEmitter::getAddSubImmOpValue(const MCInst &MI, unsigned OpIdx,
 
   ++MCNumFixups;
 
-  return 0;
+  // Set the shift bit of the add instruction for relocation types
+  // R_AARCH64_TLSLE_ADD_TPREL_HI12 and R_AARCH64_TLSLD_ADD_DTPREL_HI12.
+  if (const AArch64MCExpr *A64E = dyn_cast<AArch64MCExpr>(Expr)) {
+    AArch64MCExpr::VariantKind RefKind = A64E->getKind();
+    if (RefKind == AArch64MCExpr::VK_TPREL_HI12 ||
+        RefKind == AArch64MCExpr::VK_DTPREL_HI12)
+      ShiftVal = 12;
+  }
+  return ShiftVal == 0 ? 0 : (1 << ShiftVal);
 }
 
 /// getCondBranchTargetOpValue - Return the encoded value for a conditional
@@ -539,6 +554,9 @@ unsigned AArch64MCCodeEmitter::fixMOVZ(const MCInst &MI, unsigned EncodedValue,
 void AArch64MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                              SmallVectorImpl<MCFixup> &Fixups,
                                              const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
   if (MI.getOpcode() == AArch64::TLSDESCCALL) {
     // This is a directive which applies an R_AARCH64_TLSDESC_CALL to the
     // following (BLR) instruction. It doesn't emit any code itself so it
@@ -581,4 +599,5 @@ unsigned AArch64MCCodeEmitter::fixOneOperandFPComparison(
   return EncodedValue;
 }
 
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "AArch64GenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
index 702780621208..e9d38d3dcf10 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp
@@ -15,6 +15,7 @@
 #include "AArch64ELFStreamer.h"
 #include "AArch64MCAsmInfo.h"
 #include "InstPrinter/AArch64InstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
@@ -116,10 +117,14 @@ static MCStreamer *createMachOStreamer(MCContext &Ctx, MCAsmBackend &TAB,
                              /*LabelSections*/ true);
 }
 
+static MCInstrAnalysis *createAArch64InstrAnalysis(const MCInstrInfo *Info) {
+  return new MCInstrAnalysis(Info);
+}
+
 // Force static initialization.
 extern "C" void LLVMInitializeAArch64TargetMC() {
-  for (Target *T :
-       {&TheAArch64leTarget, &TheAArch64beTarget, &TheARM64Target}) {
+  for (Target *T : {&getTheAArch64leTarget(), &getTheAArch64beTarget(),
+                    &getTheARM64Target()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createAArch64MCAsmInfo);
 
@@ -135,6 +140,9 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
     // Register the MC subtarget info.
     TargetRegistry::RegisterMCSubtargetInfo(*T, createAArch64MCSubtargetInfo);
 
+    // Register the MC instruction analyzer.
+    TargetRegistry::RegisterMCInstrAnalysis(*T, createAArch64InstrAnalysis);
+
     // Register the MC Code Emitter
     TargetRegistry::RegisterMCCodeEmitter(*T, createAArch64MCCodeEmitter);
 
@@ -154,8 +162,8 @@ extern "C" void LLVMInitializeAArch64TargetMC() {
   }
 
   // Register the asm backend.
-  for (Target *T : {&TheAArch64leTarget, &TheARM64Target})
+  for (Target *T : {&getTheAArch64leTarget(), &getTheARM64Target()})
     TargetRegistry::RegisterMCAsmBackend(*T, createAArch64leAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheAArch64beTarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheAArch64beTarget(),
                                        createAArch64beAsmBackend);
 }
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
index 39414cc0c6a5..615d7dab2c51 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h
@@ -27,6 +27,7 @@ class MCRegisterInfo;
 class MCObjectWriter;
 class MCStreamer;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class MCTargetStreamer;
 class StringRef;
 class Target;
@@ -34,23 +35,26 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-extern Target TheAArch64leTarget;
-extern Target TheAArch64beTarget;
-extern Target TheARM64Target;
+Target &getTheAArch64leTarget();
+Target &getTheAArch64beTarget();
+Target &getTheARM64Target();
 
 MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII,
                                           const MCRegisterInfo &MRI,
                                           MCContext &Ctx);
 MCAsmBackend *createAArch64leAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU);
+                                        const Triple &TT, StringRef CPU,
+                                        const MCTargetOptions &Options);
 MCAsmBackend *createAArch64beAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU);
+                                        const Triple &TT, StringRef CPU,
+                                        const MCTargetOptions &Options);
 
 MCObjectWriter *createAArch64ELFObjectWriter(raw_pwrite_stream &OS,
                                              uint8_t OSABI,
-                                             bool IsLittleEndian);
+                                             bool IsLittleEndian,
+                                             bool IsILP32);
 
 MCObjectWriter *createAArch64MachObjectWriter(raw_pwrite_stream &OS,
                                               uint32_t CPUType,
diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
index 61c96f1d93c1..53a68527ee8e 100644
--- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp
@@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo(
     Log2Size = llvm::Log2_32(4);
     switch (Sym->getKind()) {
     default:
-      llvm_unreachable("Unexpected symbol reference variant kind!");
+      return false;
     case MCSymbolRefExpr::VK_PAGEOFF:
       RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12);
       return true;
diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
index f42ecb1677de..7ac9a5a08484 100644
--- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp
@@ -10,23 +10,30 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
-
 namespace llvm {
-Target TheAArch64leTarget;
-Target TheAArch64beTarget;
-Target TheARM64Target;
-} // end namespace llvm
+Target &getTheAArch64leTarget() {
+  static Target TheAArch64leTarget;
+  return TheAArch64leTarget;
+}
+Target &getTheAArch64beTarget() {
+  static Target TheAArch64beTarget;
+  return TheAArch64beTarget;
+}
+Target &getTheARM64Target() {
+  static Target TheARM64Target;
+  return TheARM64Target;
+}
+} // namespace llvm
 
 extern "C" void LLVMInitializeAArch64TargetInfo() {
   // Now register the "arm64" name for use with "-march". We don't want it to
   // take possession of the Triple::aarch64 tag though.
-  TargetRegistry::RegisterTarget(TheARM64Target, "arm64",
+  TargetRegistry::RegisterTarget(getTheARM64Target(), "arm64",
                                  "ARM64 (little endian)",
                                  [](Triple::ArchType) { return false; }, true);
 
   RegisterTarget<Triple::aarch64, /*HasJIT=*/true> Z(
-      TheAArch64leTarget, "aarch64", "AArch64 (little endian)");
+      getTheAArch64leTarget(), "aarch64", "AArch64 (little endian)");
   RegisterTarget<Triple::aarch64_be, /*HasJIT=*/true> W(
-      TheAArch64beTarget, "aarch64_be", "AArch64 (big endian)");
-
+      getTheAArch64beTarget(), "aarch64_be", "AArch64 (big endian)");
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
index d4784b5463d7..7b0a7f4b6058 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.h
@@ -11,22 +11,18 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
 
-#include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetMachine.h"
 
 namespace llvm {
 
-class AMDGPUInstrPrinter;
-class AMDGPUSubtarget;
 class AMDGPUTargetMachine;
 class FunctionPass;
 class GCNTargetMachine;
-struct MachineSchedContext;
-class MCAsmInfo;
-class raw_ostream;
-class ScheduleDAGInstrs;
+class ModulePass;
+class Pass;
 class Target;
 class TargetMachine;
+class PassRegistry;
 
 // R600 Passes
 FunctionPass *createR600VectorRegMerger(TargetMachine &tm);
@@ -45,16 +41,12 @@ FunctionPass *createSILowerI1CopiesPass();
 FunctionPass *createSIShrinkInstructionsPass();
 FunctionPass *createSILoadStoreOptimizerPass(TargetMachine &tm);
 FunctionPass *createSIWholeQuadModePass();
-FunctionPass *createSILowerControlFlowPass();
 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
 FunctionPass *createSIFixSGPRCopiesPass();
-FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
 FunctionPass *createSIDebuggerInsertNopsPass();
 FunctionPass *createSIInsertWaitsPass();
 FunctionPass *createAMDGPUCodeGenPreparePass(const GCNTargetMachine *TM = nullptr);
 
-ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);
-
 ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
 extern char &AMDGPUAnnotateKernelFeaturesID;
@@ -78,21 +70,30 @@ void initializeSIWholeQuadModePass(PassRegistry &);
 extern char &SIWholeQuadModeID;
 
 void initializeSILowerControlFlowPass(PassRegistry &);
-extern char &SILowerControlFlowPassID;
+extern char &SILowerControlFlowID;
+
+void initializeSIInsertSkipsPass(PassRegistry &);
+extern char &SIInsertSkipsPassID;
 
+void initializeSIOptimizeExecMaskingPass(PassRegistry &);
+extern char &SIOptimizeExecMaskingID;
 
 // Passes common to R600 and SI
 FunctionPass *createAMDGPUPromoteAlloca(const TargetMachine *TM = nullptr);
 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
 extern char &AMDGPUPromoteAllocaID;
 
-FunctionPass *createAMDGPUAddDivergenceMetadata(const AMDGPUSubtarget &ST);
 Pass *createAMDGPUStructurizeCFGPass();
-FunctionPass *createAMDGPUISelDag(TargetMachine &tm);
+FunctionPass *createAMDGPUISelDag(TargetMachine &TM,
+                                  CodeGenOpt::Level OptLevel);
 ModulePass *createAMDGPUAlwaysInlinePass();
 ModulePass *createAMDGPUOpenCLImageTypeLoweringPass();
 FunctionPass *createAMDGPUAnnotateUniformValues();
 
+FunctionPass* createAMDGPUUnifyMetadataPass();
+void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
+extern char &AMDGPUUnifyMetadataID;
+
 void initializeSIFixControlFlowLiveIntervalsPass(PassRegistry&);
 extern char &SIFixControlFlowLiveIntervalsID;
 
@@ -111,8 +112,8 @@ extern char &SIDebuggerInsertNopsID;
 void initializeSIInsertWaitsPass(PassRegistry&);
 extern char &SIInsertWaitsID;
 
-extern Target TheAMDGPUTarget;
-extern Target TheGCNTarget;
+Target &getTheAMDGPUTarget();
+Target &getTheGCNTarget();
 
 namespace AMDGPU {
 enum TargetIndex {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
index 72c455354411..0b2badff7ccf 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -67,6 +67,19 @@ def FeatureUnalignedBufferAccess : SubtargetFeature<"unaligned-buffer-access",
   "Support unaligned global loads and stores"
 >;
 
+def FeatureUnalignedScratchAccess : SubtargetFeature<"unaligned-scratch-access",
+  "UnalignedScratchAccess",
+  "true",
+  "Support unaligned scratch loads and stores"
+>;
+
+// XNACK is disabled if SH_MEM_CONFIG.ADDRESS_MODE = GPUVM on chips that support
+// XNACK. The current default kernel driver setting is:
+// - graphics ring: XNACK disabled
+// - compute ring: XNACK enabled
+//
+// If XNACK is enabled, the VMEM latency can be worse.
+// If XNACK is disabled, the 2 SGPRs can be used for general purposes.
 def FeatureXNACK : SubtargetFeature<"xnack",
   "EnableXNACK",
   "true",
@@ -110,20 +123,6 @@ class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
 def FeatureLDSBankCount16 : SubtargetFeatureLDSBankCount<16>;
 def FeatureLDSBankCount32 : SubtargetFeatureLDSBankCount<32>;
 
-class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping>
-                                 : SubtargetFeature <
-  "isaver"#Major#"."#Minor#"."#Stepping,
-  "IsaVersion",
-  "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
-  "Instruction set version number"
->;
-
-def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0>;
-def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1>;
-def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0>;
-def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1>;
-def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3>;
-
 class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature<
   "localmemorysize"#Value,
   "LocalMemorySize",
@@ -161,16 +160,46 @@ def FeatureSMemRealTime : SubtargetFeature<"s-memrealtime",
   "Has s_memrealtime instruction"
 >;
 
+def FeatureInv2PiInlineImm : SubtargetFeature<"inv-2pi-inline-imm",
+  "HasInv2PiInlineImm",
+  "true",
+  "Has 1 / (2 * pi) as inline immediate"
+>;
+
 def Feature16BitInsts : SubtargetFeature<"16-bit-insts",
   "Has16BitInsts",
   "true",
   "Has i16/f16 instructions"
 >;
 
+def FeatureMovrel : SubtargetFeature<"movrel",
+  "HasMovrel",
+  "true",
+  "Has v_movrel*_b32 instructions"
+>;
+
+def FeatureVGPRIndexMode : SubtargetFeature<"vgpr-index-mode",
+  "HasVGPRIndexMode",
+  "true",
+  "Has VGPR mode register indexing"
+>;
+
+def FeatureScalarStores : SubtargetFeature<"scalar-stores",
+  "HasScalarStores",
+  "true",
+  "Has store scalar memory instructions"
+>;
+
 //===------------------------------------------------------------===//
 // Subtarget Features (options and debugging)
 //===------------------------------------------------------------===//
 
+def FeatureFP16Denormals : SubtargetFeature<"fp16-denormals",
+  "FP16Denormals",
+  "true",
+  "Enable half precision denormal handling"
+>;
+
 // Some instructions do not support denormals despite this flag. Using
 // fp32 denormals also causes instructions to run at the double
 // precision rate for the device.
@@ -294,23 +323,76 @@ def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS",
 def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize32768,
   FeatureWavefrontSize64, FeatureGCN, FeatureGCN1Encoding,
-  FeatureLDSBankCount32]
+  FeatureLDSBankCount32, FeatureMovrel]
 >;
 
 def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize65536,
   FeatureWavefrontSize64, FeatureGCN, FeatureFlatAddressSpace,
-  FeatureGCN1Encoding, FeatureCIInsts]
+  FeatureGCN1Encoding, FeatureCIInsts, FeatureMovrel]
 >;
 
 def FeatureVolcanicIslands : SubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
   [FeatureFP64, FeatureLocalMemorySize65536,
    FeatureWavefrontSize64, FeatureFlatAddressSpace, FeatureGCN,
    FeatureGCN3Encoding, FeatureCIInsts, Feature16BitInsts,
-   FeatureSMemRealTime
+   FeatureSMemRealTime, FeatureVGPRIndexMode, FeatureMovrel,
+   FeatureScalarStores, FeatureInv2PiInlineImm
   ]
 >;
 
+class SubtargetFeatureISAVersion <int Major, int Minor, int Stepping,
+                                  list<SubtargetFeature> Implies>
+                                 : SubtargetFeature <
+  "isaver"#Major#"."#Minor#"."#Stepping,
+  "IsaVersion",
+  "ISAVersion"#Major#"_"#Minor#"_"#Stepping,
+  "Instruction set version number",
+  Implies
+>;
+
+def FeatureISAVersion7_0_0 : SubtargetFeatureISAVersion <7,0,0,
+  [FeatureSeaIslands,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion7_0_1 : SubtargetFeatureISAVersion <7,0,1,
+  [FeatureSeaIslands,
+   HalfRate64Ops,
+   FeatureLDSBankCount32,
+   FeatureFastFMAF32]>;
+
+def FeatureISAVersion7_0_2 : SubtargetFeatureISAVersion <7,0,2,
+  [FeatureSeaIslands,
+   FeatureLDSBankCount16]>;
+
+def FeatureISAVersion8_0_0 : SubtargetFeatureISAVersion <8,0,0,
+  [FeatureVolcanicIslands,
+   FeatureLDSBankCount32,
+   FeatureSGPRInitBug]>;
+
+def FeatureISAVersion8_0_1 : SubtargetFeatureISAVersion <8,0,1,
+  [FeatureVolcanicIslands,
+   FeatureLDSBankCount32,
+   FeatureXNACK]>;
+
+def FeatureISAVersion8_0_2 : SubtargetFeatureISAVersion <8,0,2,
+  [FeatureVolcanicIslands,
+   FeatureLDSBankCount32,
+   FeatureSGPRInitBug]>;
+
+def FeatureISAVersion8_0_3 : SubtargetFeatureISAVersion <8,0,3,
+  [FeatureVolcanicIslands,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion8_0_4 : SubtargetFeatureISAVersion <8,0,4,
+  [FeatureVolcanicIslands,
+   FeatureLDSBankCount32]>;
+
+def FeatureISAVersion8_1_0 : SubtargetFeatureISAVersion <8,1,0,
+  [FeatureVolcanicIslands,
+   FeatureLDSBankCount16,
+   FeatureXNACK]>;
+
 //===----------------------------------------------------------------------===//
 // Debugger related subtarget features.
 //===----------------------------------------------------------------------===//
@@ -349,10 +431,52 @@ def AMDGPUAsmParser : AsmParser {
   let ShouldEmitMatchRegisterName = 0;
 }
 
+def AMDGPUAsmWriter : AsmWriter {
+  int PassSubtarget = 1;
+}
+
+def AMDGPUAsmVariants {
+  string Default = "Default";
+  int Default_ID = 0;
+  string VOP3 = "VOP3";
+  int VOP3_ID = 1;
+  string SDWA = "SDWA";
+  int SDWA_ID = 2;
+  string DPP = "DPP";
+  int DPP_ID = 3;
+  string Disable = "Disable";
+  int Disable_ID = 4;
+}
+
+def DefaultAMDGPUAsmParserVariant : AsmParserVariant {
+  let Variant = AMDGPUAsmVariants.Default_ID;
+  let Name = AMDGPUAsmVariants.Default;
+}
+
+def VOP3AsmParserVariant : AsmParserVariant {
+  let Variant = AMDGPUAsmVariants.VOP3_ID;
+  let Name = AMDGPUAsmVariants.VOP3;
+}
+
+def SDWAAsmParserVariant : AsmParserVariant {
+  let Variant = AMDGPUAsmVariants.SDWA_ID;
+  let Name = AMDGPUAsmVariants.SDWA;
+}
+
+def DPPAsmParserVariant : AsmParserVariant {
+  let Variant = AMDGPUAsmVariants.DPP_ID;
+  let Name = AMDGPUAsmVariants.DPP;
+}
+
 def AMDGPU : Target {
   // Pull in Instruction Info:
   let InstructionSet = AMDGPUInstrInfo;
   let AssemblyParsers = [AMDGPUAsmParser];
+  let AssemblyParserVariants = [DefaultAMDGPUAsmParserVariant,
+                                VOP3AsmParserVariant,
+                                SDWAAsmParserVariant,
+                                DPPAsmParserVariant];
+  let AssemblyWriters = [AMDGPUAsmWriter];
 }
 
 // Dummy Instruction itineraries for pseudo instructions
@@ -381,6 +505,8 @@ def isCIVI : Predicate <
 
 def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">;
 
+def Has16BitInsts : Predicate<"Subtarget->has16BitInsts()">;
+
 class PredicateControl {
   Predicate SubtargetPredicate;
   Predicate SIAssemblerPredicate = isSICI;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 63f5fb3cdf00..067a16a2af7f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -27,7 +27,7 @@ class AMDGPUAlwaysInline : public ModulePass {
 public:
   AMDGPUAlwaysInline() : ModulePass(ID) { }
   bool runOnModule(Module &M) override;
-  const char *getPassName() const override { return "AMDGPU Always Inline Pass"; }
+  StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
 };
 
 } // End anonymous namespace
@@ -35,8 +35,20 @@ public:
 char AMDGPUAlwaysInline::ID = 0;
 
 bool AMDGPUAlwaysInline::runOnModule(Module &M) {
+  std::vector<GlobalAlias*> AliasesToRemove;
   std::vector<Function *> FuncsToClone;
 
+  for (GlobalAlias &A : M.aliases()) {
+    if (Function* F = dyn_cast<Function>(A.getAliasee())) {
+      A.replaceAllUsesWith(F);
+      AliasesToRemove.push_back(&A);
+    }
+  }
+
+  for (GlobalAlias* A : AliasesToRemove) {
+    A->eraseFromParent();
+  }
+
   for (Function &F : M) {
     if (!F.hasLocalLinkage() && !F.isDeclaration() && !F.use_empty() &&
         !F.hasFnAttribute(Attribute::NoInline))
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
index 0910b2877b09..c98d25e20185 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateKernelFeatures.cpp
@@ -13,6 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -35,7 +36,7 @@ public:
 
   AMDGPUAnnotateKernelFeatures() : ModulePass(ID) { }
   bool runOnModule(Module &M) override;
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AMDGPU Annotate Kernel Features";
   }
 
@@ -188,7 +189,8 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
 
   static const StringRef HSAIntrinsicToAttr[][2] = {
     { "llvm.amdgcn.dispatch.ptr", "amdgpu-dispatch-ptr" },
-    { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" }
+    { "llvm.amdgcn.queue.ptr", "amdgpu-queue-ptr" },
+    { "llvm.amdgcn.dispatch.id", "amdgpu-dispatch-id" }
   };
 
   // TODO: We should not add the attributes if the known compile time workgroup
@@ -200,7 +202,7 @@ bool AMDGPUAnnotateKernelFeatures::runOnModule(Module &M) {
   // always initialized.
 
   bool Changed = addAttrsForIntrinsics(M, IntrinsicToAttr);
-  if (TT.getOS() == Triple::AMDHSA) {
+  if (TT.getOS() == Triple::AMDHSA || TT.getOS() == Triple::Mesa3D) {
     Changed |= addAttrsForIntrinsics(M, HSAIntrinsicToAttr);
 
     for (Function &F : M) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
index 2010cc952265..c011be6fa169 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAnnotateUniformValues.cpp
@@ -15,7 +15,10 @@
 
 #include "AMDGPU.h"
 #include "AMDGPUIntrinsicInfo.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/DivergenceAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/Support/Debug.h"
@@ -30,6 +33,10 @@ namespace {
 class AMDGPUAnnotateUniformValues : public FunctionPass,
                        public InstVisitor<AMDGPUAnnotateUniformValues> {
   DivergenceAnalysis *DA;
+  MemoryDependenceResults *MDR;
+  LoopInfo *LI;
+  DenseMap<Value*, GetElementPtrInst*> noClobberClones;
+  bool isKernelFunc;
 
 public:
   static char ID;
@@ -37,15 +44,19 @@ public:
     FunctionPass(ID) { }
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
-  const char *getPassName() const override { return "AMDGPU Annotate Uniform Values"; }
+  StringRef getPassName() const override {
+    return "AMDGPU Annotate Uniform Values";
+  }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DivergenceAnalysis>();
+    AU.addRequired<MemoryDependenceWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
     AU.setPreservesAll();
  }
 
   void visitBranchInst(BranchInst &I);
   void visitLoadInst(LoadInst &I);
-
+  bool isClobberedInFunction(LoadInst * Load);
 };
 
 } // End anonymous namespace
@@ -53,6 +64,8 @@ public:
 INITIALIZE_PASS_BEGIN(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                       "Add AMDGPU uniform metadata", false, false)
 INITIALIZE_PASS_DEPENDENCY(DivergenceAnalysis)
+INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_END(AMDGPUAnnotateUniformValues, DEBUG_TYPE,
                     "Add AMDGPU uniform metadata", false, false)
 
@@ -61,6 +74,46 @@ char AMDGPUAnnotateUniformValues::ID = 0;
 static void setUniformMetadata(Instruction *I) {
   I->setMetadata("amdgpu.uniform", MDNode::get(I->getContext(), {}));
 }
+static void setNoClobberMetadata(Instruction *I) {
+  I->setMetadata("amdgpu.noclobber", MDNode::get(I->getContext(), {}));
+}
+
+static void DFS(BasicBlock *Root, SetVector<BasicBlock*> & Set) {
+  for (auto I : predecessors(Root))
+    if (Set.insert(I))
+      DFS(I, Set);
+}
+
+bool AMDGPUAnnotateUniformValues::isClobberedInFunction(LoadInst * Load) {
+  // 1. get Loop for the Load->getparent();
+  // 2. if it exists, collect all the BBs from the most outer
+  // loop and check for the writes. If NOT - start DFS over all preds.
+  // 3. Start DFS over all preds from the most outer loop header.
+  SetVector<BasicBlock *> Checklist;
+  BasicBlock *Start = Load->getParent();
+  Checklist.insert(Start);
+  const Value *Ptr = Load->getPointerOperand();
+  const Loop *L = LI->getLoopFor(Start);
+  if (L) {
+    const Loop *P = L;
+    do {
+      L = P;
+      P = P->getParentLoop();
+    } while (P);
+    Checklist.insert(L->block_begin(), L->block_end());
+    Start = L->getHeader();
+  }
+
+  DFS(Start, Checklist);
+  for (auto &BB : Checklist) {
+    BasicBlock::iterator StartIt = (BB == Load->getParent()) ?
+     BasicBlock::iterator(Load) : BB->end();
+     if (MDR->getPointerDependencyFrom(MemoryLocation(Ptr),
+       true, StartIt, BB, Load).isClobber())
+       return true;
+  }
+  return false;
+}
 
 void AMDGPUAnnotateUniformValues::visitBranchInst(BranchInst &I) {
   if (I.isUnconditional())
@@ -77,10 +130,39 @@ void AMDGPUAnnotateUniformValues::visitLoadInst(LoadInst &I) {
   Value *Ptr = I.getPointerOperand();
   if (!DA->isUniform(Ptr))
     return;
-
-  if (Instruction *PtrI = dyn_cast<Instruction>(Ptr))
+  auto isGlobalLoad = [](LoadInst &Load)->bool {
+    return Load.getPointerAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
+  };
+  // We're tracking up to the Function boundaries
+  // We cannot go beyond because of FunctionPass restrictions
+  // Thus we can ensure that memory not clobbered for memory
+  // operations that live in kernel only.
+  bool NotClobbered = isKernelFunc &&   !isClobberedInFunction(&I);
+  Instruction *PtrI = dyn_cast<Instruction>(Ptr);
+  if (!PtrI && NotClobbered && isGlobalLoad(I)) {
+    if (isa<Argument>(Ptr) || isa<GlobalValue>(Ptr)) {
+      // Lookup for the existing GEP
+      if (noClobberClones.count(Ptr)) {
+        PtrI = noClobberClones[Ptr];
+      } else {
+        // Create GEP of the Value
+        Function *F = I.getParent()->getParent();
+        Value *Idx = Constant::getIntegerValue(
+          Type::getInt32Ty(Ptr->getContext()), APInt(64, 0));
+        // Insert GEP at the entry to make it dominate all uses
+        PtrI = GetElementPtrInst::Create(
+          Ptr->getType()->getPointerElementType(), Ptr,
+          ArrayRef<Value*>(Idx), Twine(""), F->getEntryBlock().getFirstNonPHI());
+      }
+      I.replaceUsesOfWith(Ptr, PtrI);
+    }
+  }
+
+  if (PtrI) {
     setUniformMetadata(PtrI);
-
+    if (NotClobbered)
+      setNoClobberMetadata(PtrI);
+  }
 }
 
 bool AMDGPUAnnotateUniformValues::doInitialization(Module &M) {
@@ -91,9 +173,13 @@ bool AMDGPUAnnotateUniformValues::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  DA = &getAnalysis<DivergenceAnalysis>();
-  visit(F);
+  DA  = &getAnalysis<DivergenceAnalysis>();
+  MDR = &getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+  LI  = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  isKernelFunc = F.getCallingConv() == CallingConv::AMDGPU_KERNEL;
 
+  visit(F);
+  noClobberClones.clear();
   return true;
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index c9c95c796a69..a8e6902c252b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -39,9 +39,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
-#include "AMDGPURuntimeMetadata.h"
 
-using namespace ::AMDGPU;
 using namespace llvm;
 
 // TODO: This should get the default rounding mode from the kernel. We just set
@@ -87,13 +85,19 @@ createAMDGPUAsmPrinterPass(TargetMachine &tm,
 }
 
 extern "C" void LLVMInitializeAMDGPUAsmPrinter() {
-  TargetRegistry::RegisterAsmPrinter(TheAMDGPUTarget, createAMDGPUAsmPrinterPass);
-  TargetRegistry::RegisterAsmPrinter(TheGCNTarget, createAMDGPUAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(getTheAMDGPUTarget(),
+                                     createAMDGPUAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(getTheGCNTarget(),
+                                     createAMDGPUAsmPrinterPass);
 }
 
 AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
                                    std::unique_ptr<MCStreamer> Streamer)
-    : AsmPrinter(TM, std::move(Streamer)) {}
+  : AsmPrinter(TM, std::move(Streamer)) {}
+
+StringRef AMDGPUAsmPrinter::getPassName() const {
+  return "AMDGPU Assembly Printer";
+}
 
 void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
   if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
@@ -113,13 +117,30 @@ void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
   TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
                                     "AMD", "AMDGPU");
-  emitStartOfRuntimeMetadata(M);
+
+  // Emit runtime metadata.
+  TS->EmitRuntimeMetadata(M);
 }
 
+bool AMDGPUAsmPrinter::isBlockOnlyReachableByFallthrough(
+  const MachineBasicBlock *MBB) const {
+  if (!AsmPrinter::isBlockOnlyReachableByFallthrough(MBB))
+    return false;
+
+  if (MBB->empty())
+    return true;
+
+  // If this is a block implementing a long branch, an expression relative to
+  // the start of the block is needed.  to the start of the block.
+  // XXX - Is there a smarter way to check this?
+  return (MBB->back().getOpcode() != AMDGPU::S_SETPC_B64);
+}
+
+
 void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
   SIProgramInfo KernelInfo;
-  if (STM.isAmdHsaOS()) {
+  if (STM.isAmdCodeObjectV2()) {
     getSIProgramInfo(KernelInfo, *MF);
     EmitAmdKernelCodeT(*MF, KernelInfo);
   }
@@ -128,11 +149,12 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
 void AMDGPUAsmPrinter::EmitFunctionEntryLabel() {
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
   const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
-  if (MFI->isKernel() && STM.isAmdHsaOS()) {
+  if (MFI->isKernel() && STM.isAmdCodeObjectV2()) {
     AMDGPUTargetStreamer *TS =
         static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
-    TS->EmitAMDGPUSymbolType(CurrentFnSym->getName(),
-                             ELF::STT_AMDGPU_HSA_KERNEL);
+    SmallString<128> SymbolName;
+    getNameWithPrefix(SymbolName, MF->getFunction()),
+    TS->EmitAMDGPUSymbolType(SymbolName, ELF::STT_AMDGPU_HSA_KERNEL);
   }
 
   AsmPrinter::EmitFunctionEntryLabel();
@@ -198,6 +220,16 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
       OutStreamer->emitRawComment(" LDSByteSize: " + Twine(KernelInfo.LDSSize) +
                                   " bytes/workgroup (compile time only)", false);
 
+      OutStreamer->emitRawComment(" SGPRBlocks: " +
+                                  Twine(KernelInfo.SGPRBlocks), false);
+      OutStreamer->emitRawComment(" VGPRBlocks: " +
+                                  Twine(KernelInfo.VGPRBlocks), false);
+
+      OutStreamer->emitRawComment(" NumSGPRsForWavesPerEU: " +
+                                  Twine(KernelInfo.NumSGPRsForWavesPerEU), false);
+      OutStreamer->emitRawComment(" NumVGPRsForWavesPerEU: " +
+                                  Twine(KernelInfo.NumVGPRsForWavesPerEU), false);
+
       OutStreamer->emitRawComment(" ReservedVGPRFirst: " + Twine(KernelInfo.ReservedVGPRFirst),
                                   false);
       OutStreamer->emitRawComment(" ReservedVGPRCount: " + Twine(KernelInfo.ReservedVGPRCount),
@@ -229,7 +261,7 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     } else {
       R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
       OutStreamer->emitRawComment(
-        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->StackSize)));
+        Twine("SQ_PGM_RESOURCES:STACK_SIZE = " + Twine(MFI->CFStackSize)));
     }
   }
 
@@ -247,8 +279,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
-  emitRuntimeMetadata(*MF.getFunction());
-
   return false;
 }
 
@@ -282,7 +312,7 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
   if (STM.getGeneration() >= R600Subtarget::EVERGREEN) {
     // Evergreen / Northern Islands
     switch (MF.getFunction()->getCallingConv()) {
-    default: // Fall through
+    default: LLVM_FALLTHROUGH;
     case CallingConv::AMDGPU_CS: RsrcReg = R_0288D4_SQ_PGM_RESOURCES_LS; break;
     case CallingConv::AMDGPU_GS: RsrcReg = R_028878_SQ_PGM_RESOURCES_GS; break;
     case CallingConv::AMDGPU_PS: RsrcReg = R_028844_SQ_PGM_RESOURCES_PS; break;
@@ -291,9 +321,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
   } else {
     // R600 / R700
     switch (MF.getFunction()->getCallingConv()) {
-    default: // Fall through
-    case CallingConv::AMDGPU_GS: // Fall through
-    case CallingConv::AMDGPU_CS: // Fall through
+    default: LLVM_FALLTHROUGH;
+    case CallingConv::AMDGPU_GS: LLVM_FALLTHROUGH;
+    case CallingConv::AMDGPU_CS: LLVM_FALLTHROUGH;
     case CallingConv::AMDGPU_VS: RsrcReg = R_028868_SQ_PGM_RESOURCES_VS; break;
     case CallingConv::AMDGPU_PS: RsrcReg = R_028850_SQ_PGM_RESOURCES_PS; break;
     }
@@ -301,13 +331,13 @@ void AMDGPUAsmPrinter::EmitProgramInfoR600(const MachineFunction &MF) {
 
   OutStreamer->EmitIntValue(RsrcReg, 4);
   OutStreamer->EmitIntValue(S_NUM_GPRS(MaxGPR + 1) |
-                           S_STACK_SIZE(MFI->StackSize), 4);
+                           S_STACK_SIZE(MFI->CFStackSize), 4);
   OutStreamer->EmitIntValue(R_02880C_DB_SHADER_CONTROL, 4);
   OutStreamer->EmitIntValue(S_02880C_KILL_ENABLE(killPixel), 4);
 
   if (AMDGPU::isCompute(MF.getFunction()->getCallingConv())) {
     OutStreamer->EmitIntValue(R_0288E8_SQ_LDS_ALLOC, 4);
-    OutStreamer->EmitIntValue(alignTo(MFI->LDSSize, 4) >> 2, 4);
+    OutStreamer->EmitIntValue(alignTo(MFI->getLDSSize(), 4) >> 2, 4);
   }
 }
 
@@ -331,7 +361,8 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       if (MI.isDebugValue())
         continue;
 
-      CodeSize += TII->getInstSizeInBytes(MI);
+      if (isVerbose())
+        CodeSize += TII->getInstSizeInBytes(MI);
 
       unsigned numOperands = MI.getNumOperands();
       for (unsigned op_idx = 0; op_idx < numOperands; op_idx++) {
@@ -360,7 +391,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         case AMDGPU::FLAT_SCR:
         case AMDGPU::FLAT_SCR_LO:
         case AMDGPU::FLAT_SCR_HI:
-          FlatUsed = true;
+          // Even if FLAT_SCRATCH is implicitly used, it has no effect if flat
+          // instructions aren't used to access the scratch buffer.
+          if (MFI->hasFlatScratchInit())
+            FlatUsed = true;
           continue;
 
         case AMDGPU::TBA:
@@ -369,26 +403,23 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
         case AMDGPU::TMA:
         case AMDGPU::TMA_LO:
         case AMDGPU::TMA_HI:
-          llvm_unreachable("Trap Handler registers should not be used");
-          continue;
+          llvm_unreachable("trap handler registers should not be used");
 
         default:
           break;
         }
 
         if (AMDGPU::SReg_32RegClass.contains(reg)) {
-          if (AMDGPU::TTMP_32RegClass.contains(reg)) {
-            llvm_unreachable("Trap Handler registers should not be used");
-          }
+          assert(!AMDGPU::TTMP_32RegClass.contains(reg) &&
+                 "trap handler registers should not be used");
           isSGPR = true;
           width = 1;
         } else if (AMDGPU::VGPR_32RegClass.contains(reg)) {
           isSGPR = false;
           width = 1;
         } else if (AMDGPU::SReg_64RegClass.contains(reg)) {
-          if (AMDGPU::TTMP_64RegClass.contains(reg)) {
-            llvm_unreachable("Trap Handler registers should not be used");
-          }
+          assert(!AMDGPU::TTMP_64RegClass.contains(reg) &&
+                 "trap handler registers should not be used");
           isSGPR = true;
           width = 2;
         } else if (AMDGPU::VReg_64RegClass.contains(reg)) {
@@ -445,20 +476,15 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       ExtraSGPRs = 6;
   }
 
-  MaxSGPR += ExtraSGPRs;
-
   // Record first reserved register and reserved register count fields, and
   // update max register counts if "amdgpu-debugger-reserve-regs" attribute was
-  // specified.
-  if (STM.debuggerReserveRegs()) {
-    ProgInfo.ReservedVGPRFirst = MaxVGPR + 1;
-    ProgInfo.ReservedVGPRCount = MFI->getDebuggerReservedVGPRCount();
-    MaxVGPR += MFI->getDebuggerReservedVGPRCount();
-  }
+  // requested.
+  ProgInfo.ReservedVGPRFirst = STM.debuggerReserveRegs() ? MaxVGPR + 1 : 0;
+  ProgInfo.ReservedVGPRCount = RI->getNumDebuggerReservedVGPRs(STM);
 
   // Update DebuggerWavefrontPrivateSegmentOffsetSGPR and
   // DebuggerPrivateSegmentBufferSGPR fields if "amdgpu-debugger-emit-prologue"
-  // attribute was specified.
+  // attribute was requested.
   if (STM.debuggerEmitPrologue()) {
     ProgInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR =
       RI->getHWRegIndex(MFI->getScratchWaveOffsetReg());
@@ -466,21 +492,59 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
       RI->getHWRegIndex(MFI->getScratchRSrcReg());
   }
 
+  // Check the addressable register limit before we add ExtraSGPRs.
+  if (STM.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS &&
+      !STM.hasSGPRInitBug()) {
+    unsigned MaxAddressableNumSGPRs = STM.getMaxNumSGPRs();
+    if (MaxSGPR + 1 > MaxAddressableNumSGPRs) {
+      // This can happen due to a compiler bug or when using inline asm.
+      LLVMContext &Ctx = MF.getFunction()->getContext();
+      DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
+                                       "addressable scalar registers",
+                                       MaxSGPR + 1, DS_Error,
+                                       DK_ResourceLimit, MaxAddressableNumSGPRs);
+      Ctx.diagnose(Diag);
+      MaxSGPR = MaxAddressableNumSGPRs - 1;
+    }
+  }
+
+  // Account for extra SGPRs and VGPRs reserved for debugger use.
+  MaxSGPR += ExtraSGPRs;
+  MaxVGPR += RI->getNumDebuggerReservedVGPRs(STM);
+
   // We found the maximum register index. They start at 0, so add one to get the
   // number of registers.
   ProgInfo.NumVGPR = MaxVGPR + 1;
   ProgInfo.NumSGPR = MaxSGPR + 1;
 
-  if (STM.hasSGPRInitBug()) {
-    if (ProgInfo.NumSGPR > SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG) {
+  // Adjust number of registers used to meet default/requested minimum/maximum
+  // number of waves per execution unit request.
+  ProgInfo.NumSGPRsForWavesPerEU = std::max(
+    ProgInfo.NumSGPR, RI->getMinNumSGPRs(STM, MFI->getMaxWavesPerEU()));
+  ProgInfo.NumVGPRsForWavesPerEU = std::max(
+    ProgInfo.NumVGPR, RI->getMinNumVGPRs(MFI->getMaxWavesPerEU()));
+
+  if (STM.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ||
+      STM.hasSGPRInitBug()) {
+    unsigned MaxNumSGPRs = STM.getMaxNumSGPRs();
+    if (ProgInfo.NumSGPR > MaxNumSGPRs) {
+      // This can happen due to a compiler bug or when using inline asm to use the
+      // registers which are usually reserved for vcc etc.
+
       LLVMContext &Ctx = MF.getFunction()->getContext();
       DiagnosticInfoResourceLimit Diag(*MF.getFunction(),
-                                       "SGPRs with SGPR init bug",
-                                       ProgInfo.NumSGPR, DS_Error);
+                                       "scalar registers",
+                                       ProgInfo.NumSGPR, DS_Error,
+                                       DK_ResourceLimit, MaxNumSGPRs);
       Ctx.diagnose(Diag);
+      ProgInfo.NumSGPR = MaxNumSGPRs;
+      ProgInfo.NumSGPRsForWavesPerEU = MaxNumSGPRs;
     }
+  }
 
+  if (STM.hasSGPRInitBug()) {
     ProgInfo.NumSGPR = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+    ProgInfo.NumSGPRsForWavesPerEU = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
   }
 
   if (MFI->NumUserSGPRs > STM.getMaxNumUserSGPRs()) {
@@ -490,26 +554,34 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     Ctx.diagnose(Diag);
   }
 
-  if (MFI->LDSSize > static_cast<unsigned>(STM.getLocalMemorySize())) {
+  if (MFI->getLDSSize() > static_cast<unsigned>(STM.getLocalMemorySize())) {
     LLVMContext &Ctx = MF.getFunction()->getContext();
     DiagnosticInfoResourceLimit Diag(*MF.getFunction(), "local memory",
-                                     MFI->LDSSize, DS_Error);
+                                     MFI->getLDSSize(), DS_Error);
     Ctx.diagnose(Diag);
   }
 
-  ProgInfo.VGPRBlocks = (ProgInfo.NumVGPR - 1) / 4;
-  ProgInfo.SGPRBlocks = (ProgInfo.NumSGPR - 1) / 8;
+  // SGPRBlocks is actual number of SGPR blocks minus 1.
+  ProgInfo.SGPRBlocks = alignTo(ProgInfo.NumSGPRsForWavesPerEU,
+                                RI->getSGPRAllocGranule());
+  ProgInfo.SGPRBlocks = ProgInfo.SGPRBlocks / RI->getSGPRAllocGranule() - 1;
+
+  // VGPRBlocks is actual number of VGPR blocks minus 1.
+  ProgInfo.VGPRBlocks = alignTo(ProgInfo.NumVGPRsForWavesPerEU,
+                                RI->getVGPRAllocGranule());
+  ProgInfo.VGPRBlocks = ProgInfo.VGPRBlocks / RI->getVGPRAllocGranule() - 1;
+
   // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode
   // register.
   ProgInfo.FloatMode = getFPMode(MF);
 
-  ProgInfo.IEEEMode = 0;
+  ProgInfo.IEEEMode = STM.enableIEEEBit(MF);
 
   // Make clamp modifier on NaN input returns 0.
   ProgInfo.DX10Clamp = 1;
 
-  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
-  ProgInfo.ScratchSize = FrameInfo->getStackSize();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
+  ProgInfo.ScratchSize = FrameInfo.getStackSize();
 
   ProgInfo.FlatUsed = FlatUsed;
   ProgInfo.VCCUsed = VCCUsed;
@@ -524,10 +596,10 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
     LDSAlignShift = 9;
   }
 
-  unsigned LDSSpillSize = MFI->LDSWaveSpillSize *
-                          MFI->getMaximumWorkGroupSize(MF);
+  unsigned LDSSpillSize =
+    MFI->LDSWaveSpillSize * MFI->getMaxFlatWorkGroupSize();
 
-  ProgInfo.LDSSize = MFI->LDSSize + LDSSpillSize;
+  ProgInfo.LDSSize = MFI->getLDSSize() + LDSSpillSize;
   ProgInfo.LDSBlocks =
       alignTo(ProgInfo.LDSSize, 1ULL << LDSAlignShift) >> LDSAlignShift;
 
@@ -573,7 +645,7 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
 
 static unsigned getRsrcReg(CallingConv::ID CallConv) {
   switch (CallConv) {
-  default: // Fall through
+  default: LLVM_FALLTHROUGH;
   case CallingConv::AMDGPU_CS: return R_00B848_COMPUTE_PGM_RSRC1;
   case CallingConv::AMDGPU_GS: return R_00B228_SPI_SHADER_PGM_RSRC1_GS;
   case CallingConv::AMDGPU_PS: return R_00B028_SPI_SHADER_PGM_RSRC1_PS;
@@ -703,7 +775,9 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
   if (STM.isXNACKEnabled())
     header.code_properties |= AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED;
 
-  header.kernarg_segment_byte_size = MFI->ABIArgOffset;
+  // FIXME: Should use getKernArgSize
+  header.kernarg_segment_byte_size =
+      STM.getKernArgSegmentSize(MFI->getABIArgOffset());
   header.wavefront_sgpr_count = KernelInfo.NumSGPR;
   header.workitem_vgpr_count = KernelInfo.NumVGPR;
   header.workitem_private_segment_byte_size = KernelInfo.ScratchSize;
@@ -711,6 +785,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
   header.reserved_vgpr_first = KernelInfo.ReservedVGPRFirst;
   header.reserved_vgpr_count = KernelInfo.ReservedVGPRCount;
 
+  // These alignment values are specified in powers of two, so alignment =
+  // 2^n.  The minimum alignment is 2^4 = 16.
+  header.kernarg_segment_alignment = std::max((size_t)4,
+      countTrailingZeros(MFI->getMaxKernArgAlign()));
+
   if (STM.debuggerEmitPrologue()) {
     header.debug_wavefront_private_segment_offset_sgpr =
       KernelInfo.DebuggerWavefrontPrivateSegmentOffsetSGPR;
@@ -745,231 +824,3 @@ bool AMDGPUAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                    *TM.getSubtargetImpl(*MF->getFunction())->getRegisterInfo());
   return false;
 }
-
-// Emit a key and an integer value for runtime metadata.
-static void emitRuntimeMDIntValue(std::unique_ptr<MCStreamer> &Streamer,
-                                  RuntimeMD::Key K, uint64_t V,
-                                  unsigned Size) {
-  Streamer->EmitIntValue(K, 1);
-  Streamer->EmitIntValue(V, Size);
-}
-
-// Emit a key and a string value for runtime metadata.
-static void emitRuntimeMDStringValue(std::unique_ptr<MCStreamer> &Streamer,
-                                     RuntimeMD::Key K, StringRef S) {
-  Streamer->EmitIntValue(K, 1);
-  Streamer->EmitIntValue(S.size(), 4);
-  Streamer->EmitBytes(S);
-}
-
-// Emit a key and three integer values for runtime metadata.
-// The three integer values are obtained from MDNode \p Node;
-static void emitRuntimeMDThreeIntValues(std::unique_ptr<MCStreamer> &Streamer,
-                                        RuntimeMD::Key K, MDNode *Node,
-                                        unsigned Size) {
-  Streamer->EmitIntValue(K, 1);
-  Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
-    Node->getOperand(0))->getZExtValue(), Size);
-  Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
-    Node->getOperand(1))->getZExtValue(), Size);
-  Streamer->EmitIntValue(mdconst::extract<ConstantInt>(
-    Node->getOperand(2))->getZExtValue(), Size);
-}
-
-void AMDGPUAsmPrinter::emitStartOfRuntimeMetadata(const Module &M) {
-  OutStreamer->SwitchSection(getObjFileLowering().getContext()
-    .getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
-
-  emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyMDVersion,
-                        RuntimeMD::MDVersion << 8 | RuntimeMD::MDRevision, 2);
-  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
-    if (MD->getNumOperands()) {
-      auto Node = MD->getOperand(0);
-      if (Node->getNumOperands() > 1) {
-        emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguage,
-                              RuntimeMD::OpenCL_C, 1);
-        uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
-                         ->getZExtValue();
-        uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
-                         ->getZExtValue();
-        emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyLanguageVersion,
-                              Major * 100 + Minor * 10, 2);
-      }
-    }
-  }
-}
-
-static std::string getOCLTypeName(Type *Ty, bool isSigned) {
-  if (VectorType* VecTy = dyn_cast<VectorType>(Ty)) {
-    Type* EleTy = VecTy->getElementType();
-    unsigned Size = VecTy->getVectorNumElements();
-    return (Twine(getOCLTypeName(EleTy, isSigned)) + Twine(Size)).str();
-  }
-  switch (Ty->getTypeID()) {
-  case Type::HalfTyID:   return "half";
-  case Type::FloatTyID:  return "float";
-  case Type::DoubleTyID: return "double";
-  case Type::IntegerTyID: {
-    if (!isSigned)
-      return (Twine('u') + Twine(getOCLTypeName(Ty, true))).str();
-    auto IntTy = cast<IntegerType>(Ty);
-    auto BW = IntTy->getIntegerBitWidth();
-    switch (BW) {
-    case 8:
-      return "char";
-    case 16:
-      return "short";
-    case 32:
-      return "int";
-    case 64:
-      return "long";
-    default:
-      return (Twine('i') + Twine(BW)).str();
-    }
-  }
-  default:
-    llvm_unreachable("invalid type");
-  }
-}
-
-static RuntimeMD::KernelArg::ValueType getRuntimeMDValueType(
-         Type *Ty, StringRef TypeName) {
-  if (auto VT = dyn_cast<VectorType>(Ty))
-    return getRuntimeMDValueType(VT->getElementType(), TypeName);
-  else if (auto PT = dyn_cast<PointerType>(Ty))
-    return getRuntimeMDValueType(PT->getElementType(), TypeName);
-  else if (Ty->isHalfTy())
-    return RuntimeMD::KernelArg::F16;
-  else if (Ty->isFloatTy())
-    return RuntimeMD::KernelArg::F32;
-  else if (Ty->isDoubleTy())
-    return RuntimeMD::KernelArg::F64;
-  else if (IntegerType* intTy = dyn_cast<IntegerType>(Ty)) {
-    bool Signed = !TypeName.startswith("u");
-    switch (intTy->getIntegerBitWidth()) {
-    case 8:
-      return Signed ? RuntimeMD::KernelArg::I8 : RuntimeMD::KernelArg::U8;
-    case 16:
-      return Signed ? RuntimeMD::KernelArg::I16 : RuntimeMD::KernelArg::U16;
-    case 32:
-      return Signed ? RuntimeMD::KernelArg::I32 : RuntimeMD::KernelArg::U32;
-    case 64:
-      return Signed ? RuntimeMD::KernelArg::I64 : RuntimeMD::KernelArg::U64;
-    default:
-      // Runtime does not recognize other integer types. Report as
-      // struct type.
-      return RuntimeMD::KernelArg::Struct;
-    }
-  } else
-    return RuntimeMD::KernelArg::Struct;
-}
-
-void AMDGPUAsmPrinter::emitRuntimeMetadata(const Function &F) {
-  if (!F.getMetadata("kernel_arg_type"))
-    return;
-
-  MCContext &Context = getObjFileLowering().getContext();
-  OutStreamer->SwitchSection(
-      Context.getELFSection(RuntimeMD::SectionName, ELF::SHT_PROGBITS, 0));
-  OutStreamer->EmitIntValue(RuntimeMD::KeyKernelBegin, 1);
-  emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyKernelName, F.getName());
-
-  for (auto &Arg:F.args()) {
-    // Emit KeyArgBegin.
-    unsigned I = Arg.getArgNo();
-    OutStreamer->EmitIntValue(RuntimeMD::KeyArgBegin, 1);
-
-    // Emit KeyArgSize and KeyArgAlign.
-    auto T = Arg.getType();
-    auto DL = F.getParent()->getDataLayout();
-    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgSize,
-                          DL.getTypeAllocSize(T), 4);
-    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAlign,
-                          DL.getABITypeAlignment(T), 4);
-
-    // Emit KeyArgTypeName.
-    auto TypeName = dyn_cast<MDString>(F.getMetadata(
-      "kernel_arg_type")->getOperand(I))->getString();
-    emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgTypeName, TypeName);
-
-    // Emit KeyArgName.
-    if (auto ArgNameMD = F.getMetadata("kernel_arg_name")) {
-      auto ArgName = cast<MDString>(ArgNameMD->getOperand(
-        I))->getString();
-      emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyArgName, ArgName);
-    }
-
-    // Emit KeyArgIsVolatile, KeyArgIsRestrict, KeyArgIsConst and KeyArgIsPipe.
-    auto TypeQual = cast<MDString>(F.getMetadata(
-      "kernel_arg_type_qual")->getOperand(I))->getString();
-    SmallVector<StringRef, 1> SplitQ;
-    TypeQual.split(SplitQ, " ", -1, false/* drop empty entry*/);
-    for (auto &I:SplitQ) {
-      auto Key = StringSwitch<RuntimeMD::Key>(I)
-        .Case("volatile", RuntimeMD::KeyArgIsVolatile)
-        .Case("restrict", RuntimeMD::KeyArgIsRestrict)
-        .Case("const",    RuntimeMD::KeyArgIsConst)
-        .Case("pipe",     RuntimeMD::KeyArgIsPipe)
-        .Default(RuntimeMD::KeyNull);
-      OutStreamer->EmitIntValue(Key, 1);
-    }
-
-    // Emit KeyArgTypeKind.
-    auto BaseTypeName = cast<MDString>(
-      F.getMetadata("kernel_arg_base_type")->getOperand(I))->getString();
-    auto TypeKind = StringSwitch<RuntimeMD::KernelArg::TypeKind>(BaseTypeName)
-      .Case("sampler_t", RuntimeMD::KernelArg::Sampler)
-      .Case("queue_t",   RuntimeMD::KernelArg::Queue)
-      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
-             "image2d_t" , "image2d_array_t",  RuntimeMD::KernelArg::Image)
-      .Cases("image2d_depth_t", "image2d_array_depth_t",
-             "image2d_msaa_t", "image2d_array_msaa_t",
-             "image2d_msaa_depth_t",  RuntimeMD::KernelArg::Image)
-      .Cases("image2d_array_msaa_depth_t", "image3d_t",
-             RuntimeMD::KernelArg::Image)
-      .Default(isa<PointerType>(T) ? RuntimeMD::KernelArg::Pointer :
-               RuntimeMD::KernelArg::Value);
-    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgTypeKind, TypeKind, 1);
-
-    // Emit KeyArgValueType.
-    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgValueType,
-                          getRuntimeMDValueType(T, BaseTypeName), 2);
-
-    // Emit KeyArgAccQual.
-    auto AccQual = cast<MDString>(F.getMetadata(
-      "kernel_arg_access_qual")->getOperand(I))->getString();
-    auto AQ = StringSwitch<RuntimeMD::KernelArg::AccessQualifer>(AccQual)
-      .Case("read_only",  RuntimeMD::KernelArg::ReadOnly)
-      .Case("write_only", RuntimeMD::KernelArg::WriteOnly)
-      .Case("read_write", RuntimeMD::KernelArg::ReadWrite)
-      .Default(RuntimeMD::KernelArg::None);
-    emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAccQual,
-                          AQ, 1);
-
-    // Emit KeyArgAddrQual.
-    if (isa<PointerType>(T))
-      emitRuntimeMDIntValue(OutStreamer, RuntimeMD::KeyArgAddrQual,
-                            T->getPointerAddressSpace(), 1);
-
-    // Emit KeyArgEnd
-    OutStreamer->EmitIntValue(RuntimeMD::KeyArgEnd, 1);
-  }
-
-  // Emit KeyReqdWorkGroupSize, KeyWorkGroupSizeHint, and KeyVecTypeHint.
-  if (auto RWGS = F.getMetadata("reqd_work_group_size"))
-    emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyReqdWorkGroupSize,
-                                RWGS, 4);
-  if (auto WGSH = F.getMetadata("work_group_size_hint"))
-    emitRuntimeMDThreeIntValues(OutStreamer, RuntimeMD::KeyWorkGroupSizeHint,
-                                WGSH, 4);
-  if (auto VTH = F.getMetadata("vec_type_hint")) {
-    auto TypeName = getOCLTypeName(cast<ValueAsMetadata>(
-      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
-      VTH->getOperand(1))->getZExtValue());
-    emitRuntimeMDStringValue(OutStreamer, RuntimeMD::KeyVecTypeHint,
-                             TypeName);
-  }
-
-  // Emit KeyKernelEnd
-  OutStreamer->EmitIntValue(RuntimeMD::KeyKernelEnd, 1);
-}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 7b04c539520d..9a4bafef3a25 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -15,10 +15,13 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUASMPRINTER_H
 
+#include "AMDGPUMCInstLower.h"
+
 #include "llvm/CodeGen/AsmPrinter.h"
 #include <vector>
 
 namespace llvm {
+class MCOperand;
 
 class AMDGPUAsmPrinter final : public AsmPrinter {
 private:
@@ -40,6 +43,8 @@ private:
       NumVGPR(0),
       NumSGPR(0),
       FlatUsed(false),
+      NumSGPRsForWavesPerEU(0),
+      NumVGPRsForWavesPerEU(0),
       ReservedVGPRFirst(0),
       ReservedVGPRCount(0),
       DebuggerWavefrontPrivateSegmentOffsetSGPR((uint16_t)-1),
@@ -71,15 +76,23 @@ private:
     uint32_t LDSSize;
     bool FlatUsed;
 
+    // Number of SGPRs that meets number of waves per execution unit request.
+    uint32_t NumSGPRsForWavesPerEU;
+
+    // Number of VGPRs that meets number of waves per execution unit request.
+    uint32_t NumVGPRsForWavesPerEU;
+
     // If ReservedVGPRCount is 0 then must be 0. Otherwise, this is the first
     // fixed VGPR number reserved.
     uint16_t ReservedVGPRFirst;
+
     // The number of consecutive VGPRs reserved.
     uint16_t ReservedVGPRCount;
 
     // Fixed SGPR number used to hold wave scratch offset for entire kernel
     // execution, or uint16_t(-1) if the register is not used or not known.
     uint16_t DebuggerWavefrontPrivateSegmentOffsetSGPR;
+
     // Fixed SGPR number of the first 4 SGPRs used to hold scratch V# for entire
     // kernel execution, or uint16_t(-1) if the register is not used or not
     // known.
@@ -108,9 +121,16 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "AMDGPU Assembly Printer";
-  }
+  StringRef getPassName() const override;
+
+  /// \brief Wrapper for MCInstLowering.lowerOperand() for the tblgen'erated
+  /// pseudo lowering.
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
+
+  /// \brief tblgen'erated driver function for lowering simple MI->MC pseudo
+  /// instructions.
+  bool emitPseudoExpansionLowering(MCStreamer &OutStreamer,
+                                   const MachineInstr *MI);
 
   /// Implemented in AMDGPUMCInstLower.cpp
   void EmitInstruction(const MachineInstr *MI) override;
@@ -123,14 +143,13 @@ public:
 
   void EmitStartOfAsmFile(Module &M) override;
 
+  bool isBlockOnlyReachableByFallthrough(
+    const MachineBasicBlock *MBB) const override;
+
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
                        unsigned AsmVariant, const char *ExtraCode,
                        raw_ostream &O) override;
 
-  void emitStartOfRuntimeMetadata(const Module &M);
-
-  void emitRuntimeMetadata(const Function &F);
-
 protected:
   std::vector<std::string> DisasmLines, HexLines;
   size_t DisasmLineMaxLen;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
index 1a1da8a254a7..d53cc153dc9a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp
@@ -1,4 +1,4 @@
-//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering ---===//
+//===-- llvm/lib/Target/AMDGPU/AMDGPUCallLowering.cpp - Call lowering -----===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -34,9 +34,9 @@ bool AMDGPUCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
   return true;
 }
 
-bool AMDGPUCallLowering::lowerFormalArguments(
-    MachineIRBuilder &MIRBuilder, const Function::ArgumentListType &Args,
-    const SmallVectorImpl<unsigned> &VRegs) const {
+bool AMDGPUCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+                                              const Function &F,
+                                              ArrayRef<unsigned> VRegs) const {
   // TODO: Implement once there are generic loads/stores.
   return true;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
index 61174bacdac3..9ae87c9397ab 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCallLowering.h
@@ -27,10 +27,8 @@ class AMDGPUCallLowering: public CallLowering {
 
   bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
                    unsigned VReg) const override;
-  bool
-  lowerFormalArguments(MachineIRBuilder &MIRBuilder,
-                       const Function::ArgumentListType &Args,
-                       const SmallVectorImpl<unsigned> &VRegs) const override;
+  bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+                            ArrayRef<unsigned> VRegs) const override;
 };
 } // End of namespace llvm;
 #endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
index b955e231699a..e6230547a9b3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUCodeGenPrepare.cpp
@@ -39,6 +39,78 @@ class AMDGPUCodeGenPrepare : public FunctionPass,
   Module *Mod;
   bool HasUnsafeFPMath;
 
+  /// \brief Copies exact/nsw/nuw flags (if any) from binary operation \p I to
+  /// binary operation \p V.
+  ///
+  /// \returns Binary operation \p V.
+  Value *copyFlags(const BinaryOperator &I, Value *V) const;
+
+  /// \returns \p T's base element bit width.
+  unsigned getBaseElementBitWidth(const Type *T) const;
+
+  /// \returns Equivalent 32 bit integer type for given type \p T. For example,
+  /// if \p T is i7, then i32 is returned; if \p T is <3 x i12>, then <3 x i32>
+  /// is returned.
+  Type *getI32Ty(IRBuilder<> &B, const Type *T) const;
+
+  /// \returns True if binary operation \p I is a signed binary operation, false
+  /// otherwise.
+  bool isSigned(const BinaryOperator &I) const;
+
+  /// \returns True if the condition of 'select' operation \p I comes from a
+  /// signed 'icmp' operation, false otherwise.
+  bool isSigned(const SelectInst &I) const;
+
+  /// \returns True if type \p T needs to be promoted to 32 bit integer type,
+  /// false otherwise.
+  bool needsPromotionToI32(const Type *T) const;
+
+  /// \brief Promotes uniform binary operation \p I to equivalent 32 bit binary
+  /// operation.
+  ///
+  /// \details \p I's base element bit width must be greater than 1 and less
+  /// than or equal 16. Promotion is done by sign or zero extending operands to
+  /// 32 bits, replacing \p I with equivalent 32 bit binary operation, and
+  /// truncating the result of 32 bit binary operation back to \p I's original
+  /// type. Division operation is not promoted.
+  ///
+  /// \returns True if \p I is promoted to equivalent 32 bit binary operation,
+  /// false otherwise.
+  bool promoteUniformOpToI32(BinaryOperator &I) const;
+
+  /// \brief Promotes uniform 'icmp' operation \p I to 32 bit 'icmp' operation.
+  ///
+  /// \details \p I's base element bit width must be greater than 1 and less
+  /// than or equal 16. Promotion is done by sign or zero extending operands to
+  /// 32 bits, and replacing \p I with 32 bit 'icmp' operation.
+  ///
+  /// \returns True.
+  bool promoteUniformOpToI32(ICmpInst &I) const;
+
+  /// \brief Promotes uniform 'select' operation \p I to 32 bit 'select'
+  /// operation.
+  ///
+  /// \details \p I's base element bit width must be greater than 1 and less
+  /// than or equal 16. Promotion is done by sign or zero extending operands to
+  /// 32 bits, replacing \p I with 32 bit 'select' operation, and truncating the
+  /// result of 32 bit 'select' operation back to \p I's original type.
+  ///
+  /// \returns True.
+  bool promoteUniformOpToI32(SelectInst &I) const;
+
+  /// \brief Promotes uniform 'bitreverse' intrinsic \p I to 32 bit 'bitreverse'
+  /// intrinsic.
+  ///
+  /// \details \p I's base element bit width must be greater than 1 and less
+  /// than or equal 16. Promotion is done by zero extending the operand to 32
+  /// bits, replacing \p I with 32 bit 'bitreverse' intrinsic, shifting the
+  /// result of 32 bit 'bitreverse' intrinsic to the right with zero fill (the
+  /// shift amount is 32 minus \p I's base element bit width), and truncating
+  /// the result of the shift operation back to \p I's original type.
+  ///
+  /// \returns True.
+  bool promoteUniformBitreverseToI32(IntrinsicInst &I) const;
+
 public:
   static char ID;
   AMDGPUCodeGenPrepare(const TargetMachine *TM = nullptr) :
@@ -51,16 +123,18 @@ public:
 
   bool visitFDiv(BinaryOperator &I);
 
-  bool visitInstruction(Instruction &I) {
-    return false;
-  }
+  bool visitInstruction(Instruction &I) { return false; }
+  bool visitBinaryOperator(BinaryOperator &I);
+  bool visitICmpInst(ICmpInst &I);
+  bool visitSelectInst(SelectInst &I);
+
+  bool visitIntrinsicInst(IntrinsicInst &I);
+  bool visitBitreverseIntrinsicInst(IntrinsicInst &I);
 
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
 
-  const char *getPassName() const override {
-    return "AMDGPU IR optimizations";
-  }
+  StringRef getPassName() const override { return "AMDGPU IR optimizations"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DivergenceAnalysis>();
@@ -70,6 +144,171 @@ public:
 
 } // End anonymous namespace
 
+Value *AMDGPUCodeGenPrepare::copyFlags(
+    const BinaryOperator &I, Value *V) const {
+  BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
+  if (!BinOp) // Possibly constant expression.
+    return V;
+
+  if (isa<OverflowingBinaryOperator>(BinOp)) {
+    BinOp->setHasNoSignedWrap(I.hasNoSignedWrap());
+    BinOp->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+  } else if (isa<PossiblyExactOperator>(BinOp))
+    BinOp->setIsExact(I.isExact());
+
+  return V;
+}
+
+unsigned AMDGPUCodeGenPrepare::getBaseElementBitWidth(const Type *T) const {
+  assert(needsPromotionToI32(T) && "T does not need promotion to i32");
+
+  if (T->isIntegerTy())
+    return T->getIntegerBitWidth();
+  return cast<VectorType>(T)->getElementType()->getIntegerBitWidth();
+}
+
+Type *AMDGPUCodeGenPrepare::getI32Ty(IRBuilder<> &B, const Type *T) const {
+  assert(needsPromotionToI32(T) && "T does not need promotion to i32");
+
+  if (T->isIntegerTy())
+    return B.getInt32Ty();
+  return VectorType::get(B.getInt32Ty(), cast<VectorType>(T)->getNumElements());
+}
+
+bool AMDGPUCodeGenPrepare::isSigned(const BinaryOperator &I) const {
+  return I.getOpcode() == Instruction::AShr ||
+      I.getOpcode() == Instruction::SDiv || I.getOpcode() == Instruction::SRem;
+}
+
+bool AMDGPUCodeGenPrepare::isSigned(const SelectInst &I) const {
+  return isa<ICmpInst>(I.getOperand(0)) ?
+      cast<ICmpInst>(I.getOperand(0))->isSigned() : false;
+}
+
+bool AMDGPUCodeGenPrepare::needsPromotionToI32(const Type *T) const {
+  if (T->isIntegerTy() && T->getIntegerBitWidth() > 1 &&
+      T->getIntegerBitWidth() <= 16)
+    return true;
+  if (!T->isVectorTy())
+    return false;
+  return needsPromotionToI32(cast<VectorType>(T)->getElementType());
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(BinaryOperator &I) const {
+  assert(needsPromotionToI32(I.getType()) &&
+         "I does not need promotion to i32");
+
+  if (I.getOpcode() == Instruction::SDiv ||
+      I.getOpcode() == Instruction::UDiv)
+    return false;
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getType());
+  Value *ExtOp0 = nullptr;
+  Value *ExtOp1 = nullptr;
+  Value *ExtRes = nullptr;
+  Value *TruncRes = nullptr;
+
+  if (isSigned(I)) {
+    ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
+    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+  } else {
+    ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
+    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+  }
+  ExtRes = copyFlags(I, Builder.CreateBinOp(I.getOpcode(), ExtOp0, ExtOp1));
+  TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
+
+  I.replaceAllUsesWith(TruncRes);
+  I.eraseFromParent();
+
+  return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(ICmpInst &I) const {
+  assert(needsPromotionToI32(I.getOperand(0)->getType()) &&
+         "I does not need promotion to i32");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getOperand(0)->getType());
+  Value *ExtOp0 = nullptr;
+  Value *ExtOp1 = nullptr;
+  Value *NewICmp  = nullptr;
+
+  if (I.isSigned()) {
+    ExtOp0 = Builder.CreateSExt(I.getOperand(0), I32Ty);
+    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+  } else {
+    ExtOp0 = Builder.CreateZExt(I.getOperand(0), I32Ty);
+    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+  }
+  NewICmp = Builder.CreateICmp(I.getPredicate(), ExtOp0, ExtOp1);
+
+  I.replaceAllUsesWith(NewICmp);
+  I.eraseFromParent();
+
+  return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformOpToI32(SelectInst &I) const {
+  assert(needsPromotionToI32(I.getType()) &&
+         "I does not need promotion to i32");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getType());
+  Value *ExtOp1 = nullptr;
+  Value *ExtOp2 = nullptr;
+  Value *ExtRes = nullptr;
+  Value *TruncRes = nullptr;
+
+  if (isSigned(I)) {
+    ExtOp1 = Builder.CreateSExt(I.getOperand(1), I32Ty);
+    ExtOp2 = Builder.CreateSExt(I.getOperand(2), I32Ty);
+  } else {
+    ExtOp1 = Builder.CreateZExt(I.getOperand(1), I32Ty);
+    ExtOp2 = Builder.CreateZExt(I.getOperand(2), I32Ty);
+  }
+  ExtRes = Builder.CreateSelect(I.getOperand(0), ExtOp1, ExtOp2);
+  TruncRes = Builder.CreateTrunc(ExtRes, I.getType());
+
+  I.replaceAllUsesWith(TruncRes);
+  I.eraseFromParent();
+
+  return true;
+}
+
+bool AMDGPUCodeGenPrepare::promoteUniformBitreverseToI32(
+    IntrinsicInst &I) const {
+  assert(I.getIntrinsicID() == Intrinsic::bitreverse &&
+         "I must be bitreverse intrinsic");
+  assert(needsPromotionToI32(I.getType()) &&
+         "I does not need promotion to i32");
+
+  IRBuilder<> Builder(&I);
+  Builder.SetCurrentDebugLocation(I.getDebugLoc());
+
+  Type *I32Ty = getI32Ty(Builder, I.getType());
+  Function *I32 =
+      Intrinsic::getDeclaration(Mod, Intrinsic::bitreverse, { I32Ty });
+  Value *ExtOp = Builder.CreateZExt(I.getOperand(0), I32Ty);
+  Value *ExtRes = Builder.CreateCall(I32, { ExtOp });
+  Value *LShrOp =
+      Builder.CreateLShr(ExtRes, 32 - getBaseElementBitWidth(I.getType()));
+  Value *TruncRes =
+      Builder.CreateTrunc(LShrOp, I.getType());
+
+  I.replaceAllUsesWith(TruncRes);
+  I.eraseFromParent();
+
+  return true;
+}
+
 static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
   const ConstantFP *CNum = dyn_cast<ConstantFP>(Num);
   if (!CNum)
@@ -85,7 +324,6 @@ static bool shouldKeepFDivF32(Value *Num, bool UnsafeDiv) {
 bool AMDGPUCodeGenPrepare::visitFDiv(BinaryOperator &FDiv) {
   Type *Ty = FDiv.getType();
 
-  // TODO: Handle half
   if (!Ty->getScalarType()->isFloatTy())
     return false;
 
@@ -154,6 +392,55 @@ static bool hasUnsafeFPMath(const Function &F) {
   return Attr.getValueAsString() == "true";
 }
 
+bool AMDGPUCodeGenPrepare::visitBinaryOperator(BinaryOperator &I) {
+  bool Changed = false;
+
+  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      DA->isUniform(&I))
+    Changed |= promoteUniformOpToI32(I);
+
+  return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitICmpInst(ICmpInst &I) {
+  bool Changed = false;
+
+  if (ST->has16BitInsts() && needsPromotionToI32(I.getOperand(0)->getType()) &&
+      DA->isUniform(&I))
+    Changed |= promoteUniformOpToI32(I);
+
+  return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitSelectInst(SelectInst &I) {
+  bool Changed = false;
+
+  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      DA->isUniform(&I))
+    Changed |= promoteUniformOpToI32(I);
+
+  return Changed;
+}
+
+bool AMDGPUCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
+  switch (I.getIntrinsicID()) {
+  case Intrinsic::bitreverse:
+    return visitBitreverseIntrinsicInst(I);
+  default:
+    return false;
+  }
+}
+
+bool AMDGPUCodeGenPrepare::visitBitreverseIntrinsicInst(IntrinsicInst &I) {
+  bool Changed = false;
+
+  if (ST->has16BitInsts() && needsPromotionToI32(I.getType()) &&
+      DA->isUniform(&I))
+    Changed |= promoteUniformBitreverseToI32(I);
+
+  return Changed;
+}
+
 bool AMDGPUCodeGenPrepare::doInitialization(Module &M) {
   Mod = &M;
   return false;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
index bbc28b885721..805fb7102a35 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.cpp
@@ -10,23 +10,22 @@
 // Interface to describe a layout of a stack frame on a AMDGPU target machine.
 //
 //===----------------------------------------------------------------------===//
+
 #include "AMDGPUFrameLowering.h"
 #include "AMDGPURegisterInfo.h"
 #include "AMDGPUSubtarget.h"
-
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/Support/MathExtras.h"
 
 using namespace llvm;
 AMDGPUFrameLowering::AMDGPUFrameLowering(StackDirection D, unsigned StackAl,
     int LAO, unsigned TransAl)
   : TargetFrameLowering(D, StackAl, LAO, TransAl) { }
 
-AMDGPUFrameLowering::~AMDGPUFrameLowering() { }
+AMDGPUFrameLowering::~AMDGPUFrameLowering() = default;
 
 unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
-
   // XXX: Hardcoding to 1 for now.
   //
   // I think the StackWidth should stored as metadata associated with the
@@ -75,7 +74,7 @@ unsigned AMDGPUFrameLowering::getStackWidth(const MachineFunction &MF) const {
 int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                                 int FI,
                                                 unsigned &FrameReg) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const AMDGPURegisterInfo *RI
     = MF.getSubtarget<AMDGPUSubtarget>().getRegisterInfo();
 
@@ -86,19 +85,18 @@ int AMDGPUFrameLowering::getFrameIndexReference(const MachineFunction &MF,
   // XXX: We should only do this when the shader actually uses this
   // information.
   unsigned OffsetBytes = 2 * (getStackWidth(MF) * 4);
-  int UpperBound = FI == -1 ? MFI->getNumObjects() : FI;
+  int UpperBound = FI == -1 ? MFI.getNumObjects() : FI;
 
-  for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) {
-    OffsetBytes = alignTo(OffsetBytes, MFI->getObjectAlignment(i));
-    OffsetBytes += MFI->getObjectSize(i);
+  for (int i = MFI.getObjectIndexBegin(); i < UpperBound; ++i) {
+    OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(i));
+    OffsetBytes += MFI.getObjectSize(i);
     // Each register holds 4 bytes, so we must always align the offset to at
     // least 4 bytes, so that 2 frame objects won't share the same register.
     OffsetBytes = alignTo(OffsetBytes, 4);
   }
 
   if (FI != -1)
-    OffsetBytes = alignTo(OffsetBytes, MFI->getObjectAlignment(FI));
+    OffsetBytes = alignTo(OffsetBytes, MFI.getObjectAlignment(FI));
 
   return OffsetBytes / (getStackWidth(MF) * 4);
 }
-
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
index 513848a1d887..5d51351a00d2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUFrameLowering.h
@@ -11,6 +11,7 @@
 /// \brief Interface to describe a layout of a stack frame on an AMDGPU target.
 //
 //===----------------------------------------------------------------------===//
+
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
 
@@ -27,7 +28,7 @@ class AMDGPUFrameLowering : public TargetFrameLowering {
 public:
   AMDGPUFrameLowering(StackDirection D, unsigned StackAl, int LAO,
                       unsigned TransAl = 1);
-  virtual ~AMDGPUFrameLowering();
+  ~AMDGPUFrameLowering() override;
 
   /// \returns The number of 32-bit sub-registers that are used when storing
   /// values to the stack.
@@ -40,5 +41,7 @@ public:
     return false;
   }
 };
-} // namespace llvm
-#endif
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUFRAMELOWERING_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 23c9352ce273..ef3b44f7c211 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -12,25 +12,48 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AMDGPU.h"
 #include "AMDGPUInstrInfo.h"
-#include "AMDGPUIntrinsicInfo.h"
+#include "AMDGPURegisterInfo.h"
 #include "AMDGPUISelLowering.h" // For AMDGPUISD
 #include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
+#include "SIInstrInfo.h"
+#include "SIRegisterInfo.h"
 #include "SIISelLowering.h"
 #include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/FunctionLoweringInfo.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
-#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <new>
+#include <vector>
 
 using namespace llvm;
 
 namespace llvm {
+
 class R600InstrInfo;
-}
+
+} // end namespace llvm
 
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
@@ -38,18 +61,6 @@ class R600InstrInfo;
 
 namespace {
 
-static bool isCBranchSCC(const SDNode *N) {
-  assert(N->getOpcode() == ISD::BRCOND);
-  if (!N->hasOneUse())
-    return false;
-
-  SDValue Cond = N->getOperand(1);
-  if (Cond.getOpcode() == ISD::CopyToReg)
-    Cond = Cond.getOperand(2);
-  return Cond.getOpcode() == ISD::SETCC &&
-         Cond.getOperand(0).getValueType() == MVT::i32 && Cond.hasOneUse();
-}
-
 /// AMDGPU specific code to select AMDGPU machine instructions for
 /// SelectionDAG operations.
 class AMDGPUDAGToDAGISel : public SelectionDAGISel {
@@ -58,16 +69,18 @@ class AMDGPUDAGToDAGISel : public SelectionDAGISel {
   const AMDGPUSubtarget *Subtarget;
 
 public:
-  AMDGPUDAGToDAGISel(TargetMachine &TM);
-  virtual ~AMDGPUDAGToDAGISel();
+  explicit AMDGPUDAGToDAGISel(TargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel) {}
+  ~AMDGPUDAGToDAGISel() override = default;
+
   bool runOnMachineFunction(MachineFunction &MF) override;
   void Select(SDNode *N) override;
-  const char *getPassName() const override;
-  void PreprocessISelDAG() override;
+  StringRef getPassName() const override;
   void PostprocessISelDAG() override;
 
 private:
-  bool isInlineImmediate(SDNode *N) const;
+  SDValue foldFrameIndex(SDValue N) const;
+  bool isInlineImmediate(const SDNode *N) const;
   bool FoldOperand(SDValue &Src, SDValue &Sel, SDValue &Neg, SDValue &Abs,
                    const R600InstrInfo *TII);
   bool FoldOperands(unsigned, const R600InstrInfo *, std::vector<SDValue> &);
@@ -145,40 +158,46 @@ private:
 
   void SelectADD_SUB_I64(SDNode *N);
   void SelectDIV_SCALE(SDNode *N);
+  void SelectFMA_W_CHAIN(SDNode *N);
+  void SelectFMUL_W_CHAIN(SDNode *N);
 
   SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val,
                    uint32_t Offset, uint32_t Width);
   void SelectS_BFEFromShifts(SDNode *N);
   void SelectS_BFE(SDNode *N);
+  bool isCBranchSCC(const SDNode *N) const;
   void SelectBRCOND(SDNode *N);
   void SelectATOMIC_CMP_SWAP(SDNode *N);
 
   // Include the pieces autogenerated from the target description.
 #include "AMDGPUGenDAGISel.inc"
 };
+
 }  // end anonymous namespace
 
 /// \brief This pass converts a legalized DAG into a AMDGPU-specific
 // DAG, ready for instruction scheduling.
-FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM) {
-  return new AMDGPUDAGToDAGISel(TM);
+FunctionPass *llvm::createAMDGPUISelDag(TargetMachine &TM,
+                                        CodeGenOpt::Level OptLevel) {
+  return new AMDGPUDAGToDAGISel(TM, OptLevel);
 }
 
-AMDGPUDAGToDAGISel::AMDGPUDAGToDAGISel(TargetMachine &TM)
-    : SelectionDAGISel(TM) {}
-
 bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<AMDGPUSubtarget>();
   return SelectionDAGISel::runOnMachineFunction(MF);
 }
 
-AMDGPUDAGToDAGISel::~AMDGPUDAGToDAGISel() {
-}
+bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N) const {
+  const SIInstrInfo *TII
+    = static_cast<const SISubtarget *>(Subtarget)->getInstrInfo();
+
+  if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N))
+    return TII->isInlineConstant(C->getAPIntValue());
 
-bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
-  const SITargetLowering *TL
-      = static_cast<const SITargetLowering *>(getTargetLowering());
-  return TL->analyzeImmediate(N) == 0;
+  if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N))
+    return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt());
+
+  return false;
 }
 
 /// \brief Determine the register class for \p OpNo
@@ -187,8 +206,21 @@ bool AMDGPUDAGToDAGISel::isInlineImmediate(SDNode *N) const {
 /// determined.
 const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
                                                           unsigned OpNo) const {
-  if (!N->isMachineOpcode())
+  if (!N->isMachineOpcode()) {
+    if (N->getOpcode() == ISD::CopyToReg) {
+      unsigned Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
+      if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+        MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo();
+        return MRI.getRegClass(Reg);
+      }
+
+      const SIRegisterInfo *TRI
+        = static_cast<const SISubtarget *>(Subtarget)->getRegisterInfo();
+      return TRI->getPhysRegClass(Reg);
+    }
+
     return nullptr;
+  }
 
   switch (N->getMachineOpcode()) {
   default: {
@@ -244,7 +276,7 @@ SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N) const {
 static unsigned selectSGPRVectorRegClassID(unsigned NumVectorElts) {
   switch (NumVectorElts) {
   case 1:
-    return AMDGPU::SReg_32RegClassID;
+    return AMDGPU::SReg_32_XM0RegClassID;
   case 2:
     return AMDGPU::SReg_64RegClassID;
   case 4:
@@ -275,7 +307,11 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
   // DAG legalization, so we can fold some i64 ADDs used for address
   // calculation into the LOAD and STORE instructions.
   case ISD::ADD:
-  case ISD::SUB: {
+  case ISD::ADDC:
+  case ISD::ADDE:
+  case ISD::SUB:
+  case ISD::SUBC:
+  case ISD::SUBE: {
     if (N->getValueType(0) != MVT::i64 ||
         Subtarget->getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
       break;
@@ -283,6 +319,15 @@ void AMDGPUDAGToDAGISel::Select(SDNode *N) {
     SelectADD_SUB_I64(N);
     return;
   }
+  case AMDGPUISD::FMUL_W_CHAIN: {
+    SelectFMUL_W_CHAIN(N);
+    return;
+  }
+  case AMDGPUISD::FMA_W_CHAIN: {
+    SelectFMA_W_CHAIN(N);
+    return;
+  }
+
   case ISD::SCALAR_TO_VECTOR:
   case AMDGPUISD::BUILD_VERTICAL_VECTOR:
   case ISD::BUILD_VECTOR: {
@@ -498,7 +543,7 @@ bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const {
          Term->getMetadata("structurizecfg.uniform");
 }
 
-const char *AMDGPUDAGToDAGISel::getPassName() const {
+StringRef AMDGPUDAGToDAGISel::getPassName() const {
   return "AMDGPU DAG->DAG Pattern Instruction Selection";
 }
 
@@ -580,7 +625,12 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
-  bool IsAdd = (N->getOpcode() == ISD::ADD);
+  unsigned Opcode = N->getOpcode();
+  bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE);
+  bool ProduceCarry =
+      ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC;
+  bool IsAdd =
+      (Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE);
 
   SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32);
   SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32);
@@ -596,25 +646,70 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
                                        DL, MVT::i32, RHS, Sub1);
 
   SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue);
-  SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
 
   unsigned Opc = IsAdd ? AMDGPU::S_ADD_U32 : AMDGPU::S_SUB_U32;
   unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32;
 
-  SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs);
-  SDValue Carry(AddLo, 1);
-  SDNode *AddHi
-    = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32,
-                             SDValue(Hi0, 0), SDValue(Hi1, 0), Carry);
+  SDNode *AddLo;
+  if (!ConsumeCarry) {
+    SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) };
+    AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args);
+  } else {
+    SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) };
+    AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args);
+  }
+  SDValue AddHiArgs[] = {
+    SDValue(Hi0, 0),
+    SDValue(Hi1, 0),
+    SDValue(AddLo, 1)
+  };
+  SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs);
 
-  SDValue Args[5] = {
+  SDValue RegSequenceArgs[] = {
     CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32),
     SDValue(AddLo,0),
     Sub0,
     SDValue(AddHi,0),
     Sub1,
   };
-  CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args);
+  SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL,
+                                               MVT::i64, RegSequenceArgs);
+
+  if (ProduceCarry) {
+    // Replace the carry-use
+    CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 1), SDValue(AddHi, 1));
+  }
+
+  // Replace the remaining uses.
+  CurDAG->ReplaceAllUsesWith(N, RegSequence);
+  CurDAG->RemoveDeadNode(N);
+}
+
+void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) {
+  SDLoc SL(N);
+  //  src0_modifiers, src0,  src1_modifiers, src1, src2_modifiers, src2, clamp, omod
+  SDValue Ops[10];
+
+  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]);
+  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
+  SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]);
+  Ops[8] = N->getOperand(0);
+  Ops[9] = N->getOperand(4);
+
+  CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32, N->getVTList(), Ops);
+}
+
+void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) {
+  SDLoc SL(N);
+  //	src0_modifiers, src0,  src1_modifiers, src1, clamp, omod
+  SDValue Ops[8];
+
+  SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]);
+  SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]);
+  Ops[6] = N->getOperand(0);
+  Ops[7] = N->getOperand(3);
+
+  CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops);
 }
 
 // We need to handle this here because tablegen doesn't support matching
@@ -779,6 +874,9 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
   }
 
   // default case
+
+  // FIXME: This is broken on SI where we still need to check if the base
+  // pointer is positive here.
   Base = Addr;
   Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8);
   Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8);
@@ -825,7 +923,6 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
       Ptr = N2;
       VAddr = N3;
     } else {
-
       // (add N0, C1) -> offset
       VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32);
       Ptr = N0;
@@ -903,6 +1000,12 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
   return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE);
 }
 
+SDValue AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const {
+  if (auto FI = dyn_cast<FrameIndexSDNode>(N))
+    return CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0));
+  return N;
+}
+
 bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
                                             SDValue &VAddr, SDValue &SOffset,
                                             SDValue &ImmOffset) const {
@@ -922,14 +1025,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratch(SDValue Addr, SDValue &Rsrc,
     // Offsets in vaddr must be positive.
     ConstantSDNode *C1 = cast<ConstantSDNode>(N1);
     if (isLegalMUBUFImmOffset(C1)) {
-      VAddr = N0;
+      VAddr = foldFrameIndex(N0);
       ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16);
       return true;
     }
   }
 
   // (node)
-  VAddr = Addr;
+  VAddr = foldFrameIndex(Addr);
   ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16);
   return true;
 }
@@ -1122,7 +1225,6 @@ bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode,
 
 bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase,
                                      SDValue &Offset, bool &Imm) const {
-
   SDLoc SL(Addr);
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     SDValue N0 = Addr.getOperand(0);
@@ -1327,36 +1429,53 @@ void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) {
   SelectCode(N);
 }
 
+bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const {
+  assert(N->getOpcode() == ISD::BRCOND);
+  if (!N->hasOneUse())
+    return false;
+
+  SDValue Cond = N->getOperand(1);
+  if (Cond.getOpcode() == ISD::CopyToReg)
+    Cond = Cond.getOperand(2);
+
+  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse())
+    return false;
+
+  MVT VT = Cond.getOperand(0).getSimpleValueType();
+  if (VT == MVT::i32)
+    return true;
+
+  if (VT == MVT::i64) {
+    auto ST = static_cast<const SISubtarget *>(Subtarget);
+
+    ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+    return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64();
+  }
+
+  return false;
+}
+
 void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) {
   SDValue Cond = N->getOperand(1);
 
+  if (Cond.isUndef()) {
+    CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other,
+                         N->getOperand(2), N->getOperand(0));
+    return;
+  }
+
   if (isCBranchSCC(N)) {
     // This brcond will use S_CBRANCH_SCC*, so let tablegen handle it.
     SelectCode(N);
     return;
   }
 
-  // The result of VOPC instructions is or'd against ~EXEC before it is
-  // written to vcc or another SGPR.  This means that the value '1' is always
-  // written to the corresponding bit for results that are masked.  In order
-  // to correctly check against vccz, we need to and VCC with the EXEC
-  // register in order to clear the value from the masked bits.
-
   SDLoc SL(N);
 
-  SDNode *MaskedCond =
-        CurDAG->getMachineNode(AMDGPU::S_AND_B64, SL, MVT::i1,
-                               CurDAG->getRegister(AMDGPU::EXEC, MVT::i1),
-                               Cond);
-  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC,
-                                     SDValue(MaskedCond, 0),
-                                     SDValue()); // Passing SDValue() adds a
-                                                 // glue output.
+  SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, AMDGPU::VCC, Cond);
   CurDAG->SelectNodeTo(N, AMDGPU::S_CBRANCH_VCCNZ, MVT::Other,
                        N->getOperand(2), // Basic Block
-                       VCC.getValue(0),  // Chain
-                       VCC.getValue(1)); // Glue
-  return;
+                       VCC.getValue(0));
 }
 
 // This is here because there isn't a way to use the generated sub0_sub1 as the
@@ -1427,7 +1546,6 @@ void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) {
 
 bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src,
                                         SDValue &SrcMods) const {
-
   unsigned Mods = 0;
 
   Src = In;
@@ -1491,62 +1609,6 @@ bool AMDGPUDAGToDAGISel::SelectVOP3Mods0Clamp0OMod(SDValue In, SDValue &Src,
   return SelectVOP3Mods(In, Src, SrcMods);
 }
 
-void AMDGPUDAGToDAGISel::PreprocessISelDAG() {
-  MachineFrameInfo *MFI = CurDAG->getMachineFunction().getFrameInfo();
-
-  // Handle the perverse case where a frame index is being stored. We don't
-  // want to see multiple frame index operands on the same instruction since
-  // it complicates things and violates some assumptions about frame index
-  // lowering.
-  for (int I = MFI->getObjectIndexBegin(), E = MFI->getObjectIndexEnd();
-       I != E; ++I) {
-    SDValue FI = CurDAG->getTargetFrameIndex(I, MVT::i32);
-
-    // It's possible that we have a frame index defined in the function that
-    // isn't used in this block.
-    if (FI.use_empty())
-      continue;
-
-    // Skip over the AssertZext inserted during lowering.
-    SDValue EffectiveFI = FI;
-    auto It = FI->use_begin();
-    if (It->getOpcode() == ISD::AssertZext && FI->hasOneUse()) {
-      EffectiveFI = SDValue(*It, 0);
-      It = EffectiveFI->use_begin();
-    }
-
-    for (auto It = EffectiveFI->use_begin(); !It.atEnd(); ) {
-      SDUse &Use = It.getUse();
-      SDNode *User = Use.getUser();
-      unsigned OpIdx = It.getOperandNo();
-      ++It;
-
-      if (MemSDNode *M = dyn_cast<MemSDNode>(User)) {
-        unsigned PtrIdx = M->getOpcode() == ISD::STORE ? 2 : 1;
-        if (OpIdx == PtrIdx)
-          continue;
-
-        unsigned OpN = M->getNumOperands();
-        SDValue NewOps[8];
-
-        assert(OpN < array_lengthof(NewOps));
-        for (unsigned Op = 0; Op != OpN; ++Op) {
-          if (Op != OpIdx) {
-            NewOps[Op] = M->getOperand(Op);
-            continue;
-          }
-
-          MachineSDNode *Mov = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32,
-                                                      SDLoc(M), MVT::i32, FI);
-          NewOps[Op] = SDValue(Mov, 0);
-        }
-
-        CurDAG->UpdateNodeOperands(M, makeArrayRef(NewOps, OpN));
-      }
-    }
-  }
-}
-
 void AMDGPUDAGToDAGISel::PostprocessISelDAG() {
   const AMDGPUTargetLowering& Lowering =
     *static_cast<const AMDGPUTargetLowering*>(getTargetLowering());
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 352423ed3ad6..a87204d46eae 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -37,7 +37,7 @@ static bool allocateKernArg(unsigned ValNo, MVT ValVT, MVT LocVT,
   MachineFunction &MF = State.getMachineFunction();
   AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>();
 
-  uint64_t Offset = MFI->allocateKernArg(ValVT.getStoreSize(),
+  uint64_t Offset = MFI->allocateKernArg(LocVT.getStoreSize(),
                                          ArgFlags.getOrigAlign());
   State.addLoc(CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
   return true;
@@ -55,14 +55,6 @@ EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) {
   return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
 }
 
-EVT AMDGPUTargetLowering::getEquivalentBitType(LLVMContext &Ctx, EVT VT) {
-  unsigned StoreSize = VT.getStoreSizeInBits();
-  if (StoreSize <= 32)
-    return EVT::getIntegerVT(Ctx, StoreSize);
-
-  return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32);
-}
-
 AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
                                            const AMDGPUSubtarget &STI)
     : TargetLowering(TM), Subtarget(&STI) {
@@ -287,6 +279,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   }
 
   setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand);
+  setOperationAction(ISD::FP_TO_FP16, MVT::f64, Custom);
 
   const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
   for (MVT VT : ScalarIntVTs) {
@@ -367,6 +360,8 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FP_TO_SINT, VT, Expand);
     setOperationAction(ISD::FP_TO_UINT, VT, Expand);
     setOperationAction(ISD::MUL,  VT, Expand);
+    setOperationAction(ISD::MULHU, VT, Expand);
+    setOperationAction(ISD::MULHS, VT, Expand);
     setOperationAction(ISD::OR,   VT, Expand);
     setOperationAction(ISD::SHL,  VT, Expand);
     setOperationAction(ISD::SRA,  VT, Expand);
@@ -440,22 +435,24 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SELECT, MVT::v4f32, Promote);
   AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32);
 
+  // There are no libcalls of any kind.
+  for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I)
+    setLibcallName(static_cast<RTLIB::Libcall>(I), nullptr);
+
   setBooleanContents(ZeroOrNegativeOneBooleanContent);
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
   setSchedulingPreference(Sched::RegPressure);
   setJumpIsExpensive(true);
+  setHasMultipleConditionRegisters(true);
 
   // SI at least has hardware support for floating point exceptions, but no way
   // of using or handling them is implemented. They are also optional in OpenCL
   // (Section 7.3)
   setHasFloatingPointExceptions(Subtarget->hasFPExceptions());
 
-  setSelectIsExpensive(false);
   PredictableSelectIsExpensive = false;
 
-  setFsqrtIsCheap(true);
-
   // We want to find all load dependencies for long chains of stores to enable
   // merging into very wide vectors. The problem is with vectors with > 4
   // elements. MergeConsecutiveStores will attempt to merge these because x8/x16
@@ -472,11 +469,12 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
   MaxStoresPerMemset  = 4096;
 
   setTargetDAGCombine(ISD::BITCAST);
-  setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SRA);
   setTargetDAGCombine(ISD::SRL);
   setTargetDAGCombine(ISD::MUL);
+  setTargetDAGCombine(ISD::MULHU);
+  setTargetDAGCombine(ISD::MULHS);
   setTargetDAGCombine(ISD::SELECT);
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::STORE);
@@ -500,7 +498,8 @@ bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const {
 // FIXME: Why are we reporting vectors of FP immediates as legal?
 bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
   EVT ScalarVT = VT.getScalarType();
-  return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64);
+  return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 ||
+         (ScalarVT == MVT::f16 && Subtarget->has16BitInsts()));
 }
 
 // We don't want to shrink f64 / f32 constants.
@@ -565,12 +564,12 @@ bool AMDGPUTargetLowering::isCheapToSpeculateCtlz() const {
 
 bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const {
   assert(VT.isFloatingPoint());
-  return VT == MVT::f32 || VT == MVT::f64;
+  return VT == MVT::f32 || VT == MVT::f64 || (Subtarget->has16BitInsts() &&
+                                              VT == MVT::f16);
 }
 
 bool AMDGPUTargetLowering::isFNegFree(EVT VT) const {
-  assert(VT.isFloatingPoint());
-  return VT == MVT::f32 || VT == MVT::f64;
+  return isFAbsFree(VT);
 }
 
 bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(EVT MemVT,
@@ -593,19 +592,32 @@ bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const
 
 bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const {
   // Truncate is just accessing a subregister.
-  return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0);
+
+  unsigned SrcSize = Source.getSizeInBits();
+  unsigned DestSize = Dest.getSizeInBits();
+
+  return DestSize < SrcSize && DestSize % 32 == 0 ;
 }
 
 bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const {
   // Truncate is just accessing a subregister.
-  return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() &&
-         (Dest->getPrimitiveSizeInBits() % 32 == 0);
+
+  unsigned SrcSize = Source->getScalarSizeInBits();
+  unsigned DestSize = Dest->getScalarSizeInBits();
+
+  if (DestSize== 16 && Subtarget->has16BitInsts())
+    return SrcSize >= 32;
+
+  return DestSize < SrcSize && DestSize % 32 == 0;
 }
 
 bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const {
   unsigned SrcSize = Src->getScalarSizeInBits();
   unsigned DestSize = Dest->getScalarSizeInBits();
 
+  if (SrcSize == 16 && Subtarget->has16BitInsts())
+    return DestSize >= 32;
+
   return SrcSize == 32 && DestSize == 64;
 }
 
@@ -614,6 +626,10 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const {
   // practical purposes, the extra mov 0 to load a 64-bit is free.  As used,
   // this will enable reducing 64-bit operations the 32-bit, which is always
   // good.
+
+  if (Src == MVT::i16)
+    return Dest == MVT::i32 ||Dest == MVT::i64 ;
+
   return Src == MVT::i32 && Dest == MVT::i64;
 }
 
@@ -635,9 +651,105 @@ bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const {
 // TargetLowering Callbacks
 //===---------------------------------------------------------------------===//
 
-void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+/// The SelectionDAGBuilder will automatically promote function arguments
+/// with illegal types.  However, this does not work for the AMDGPU targets
+/// since the function arguments are stored in memory as these illegal types.
+/// In order to handle this properly we need to get the original types sizes
+/// from the LLVM IR Function and fixup the ISD:InputArg values before
+/// passing them to AnalyzeFormalArguments()
+
+/// When the SelectionDAGBuilder computes the Ins, it takes care of splitting
+/// input values across multiple registers.  Each item in the Ins array
+/// represents a single value that will be stored in regsters.  Ins[x].VT is
+/// the value type of the value that will be stored in the register, so
+/// whatever SDNode we lower the argument to needs to be this type.
+///
+/// In order to correctly lower the arguments we need to know the size of each
+/// argument.  Since Ins[x].VT gives us the size of the register that will
+/// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type
+/// for the orignal function argument so that we can deduce the correct memory
+/// type to use for Ins[x].  In most cases the correct memory type will be
+/// Ins[x].ArgVT.  However, this will not always be the case.  If, for example,
+/// we have a kernel argument of type v8i8, this argument will be split into
+/// 8 parts and each part will be represented by its own item in the Ins array.
+/// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of
+/// the argument before it was split.  From this, we deduce that the memory type
+/// for each individual part is i8.  We pass the memory type as LocVT to the
+/// calling convention analysis function and the register type (Ins[x].VT) as
+/// the ValVT.
+void AMDGPUTargetLowering::analyzeFormalArgumentsCompute(CCState &State,
                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
+  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
+    const ISD::InputArg &In = Ins[i];
+    EVT MemVT;
+
+    unsigned NumRegs = getNumRegisters(State.getContext(), In.ArgVT);
+
+    if (!Subtarget->isAmdHsaOS() &&
+        (In.ArgVT == MVT::i16 || In.ArgVT == MVT::i8 || In.ArgVT == MVT::f16)) {
+      // The ABI says the caller will extend these values to 32-bits.
+      MemVT = In.ArgVT.isInteger() ? MVT::i32 : MVT::f32;
+    } else if (NumRegs == 1) {
+      // This argument is not split, so the IR type is the memory type.
+      assert(!In.Flags.isSplit());
+      if (In.ArgVT.isExtended()) {
+        // We have an extended type, like i24, so we should just use the register type
+        MemVT = In.VT;
+      } else {
+        MemVT = In.ArgVT;
+      }
+    } else if (In.ArgVT.isVector() && In.VT.isVector() &&
+               In.ArgVT.getScalarType() == In.VT.getScalarType()) {
+      assert(In.ArgVT.getVectorNumElements() > In.VT.getVectorNumElements());
+      // We have a vector value which has been split into a vector with
+      // the same scalar type, but fewer elements.  This should handle
+      // all the floating-point vector types.
+      MemVT = In.VT;
+    } else if (In.ArgVT.isVector() &&
+               In.ArgVT.getVectorNumElements() == NumRegs) {
+      // This arg has been split so that each element is stored in a separate
+      // register.
+      MemVT = In.ArgVT.getScalarType();
+    } else if (In.ArgVT.isExtended()) {
+      // We have an extended type, like i65.
+      MemVT = In.VT;
+    } else {
+      unsigned MemoryBits = In.ArgVT.getStoreSizeInBits() / NumRegs;
+      assert(In.ArgVT.getStoreSizeInBits() % NumRegs == 0);
+      if (In.VT.isInteger()) {
+        MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits);
+      } else if (In.VT.isVector()) {
+        assert(!In.VT.getScalarType().isFloatingPoint());
+        unsigned NumElements = In.VT.getVectorNumElements();
+        assert(MemoryBits % NumElements == 0);
+        // This vector type has been split into another vector type with
+        // a different elements size.
+        EVT ScalarVT = EVT::getIntegerVT(State.getContext(),
+                                         MemoryBits / NumElements);
+        MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements);
+      } else {
+        llvm_unreachable("cannot deduce memory type.");
+      }
+    }
+
+    // Convert one element vectors to scalar.
+    if (MemVT.isVector() && MemVT.getVectorNumElements() == 1)
+      MemVT = MemVT.getScalarType();
 
+    if (MemVT.isExtended()) {
+      // This should really only happen if we have vec3 arguments
+      assert(MemVT.isVector() && MemVT.getVectorNumElements() == 3);
+      MemVT = MemVT.getPow2VectorType(State.getContext());
+    }
+
+    assert(MemVT.isSimple());
+    allocateKernArg(i, In.VT, MemVT.getSimpleVT(), CCValAssign::Full, In.Flags,
+                    State);
+  }
+}
+
+void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
+                              const SmallVectorImpl<ISD::InputArg> &Ins) const {
   State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
 }
 
@@ -678,8 +790,10 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI,
       Fn, "unsupported call to function " + FuncName, CLI.DL.getDebugLoc());
   DAG.getContext()->diagnose(NoCalls);
 
-  for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
-    InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
+  if (!CLI.IsTailCall) {
+    for (unsigned I = 0, E = CLI.Ins.size(); I != E; ++I)
+      InVals.push_back(DAG.getUNDEF(CLI.Ins[I].VT));
+  }
 
   return DAG.getEntryNode();
 }
@@ -718,6 +832,7 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
   case ISD::FFLOOR: return LowerFFLOOR(Op, DAG);
   case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG);
   case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
+  case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG);
   case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
   case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
   case ISD::CTLZ:
@@ -745,94 +860,6 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N,
   }
 }
 
-// FIXME: This implements accesses to initialized globals in the constant
-// address space by copying them to private and accessing that. It does not
-// properly handle illegal types or vectors. The private vector loads are not
-// scalarized, and the illegal scalars hit an assertion. This technique will not
-// work well with large initializers, and this should eventually be
-// removed. Initialized globals should be placed into a data section that the
-// runtime will load into a buffer before the kernel is executed. Uses of the
-// global need to be replaced with a pointer loaded from an implicit kernel
-// argument into this buffer holding the copy of the data, which will remove the
-// need for any of this.
-SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init,
-                                                       const GlobalValue *GV,
-                                                       const SDValue &InitPtr,
-                                                       SDValue Chain,
-                                                       SelectionDAG &DAG) const {
-  const DataLayout &TD = DAG.getDataLayout();
-  SDLoc DL(InitPtr);
-  Type *InitTy = Init->getType();
-
-  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) {
-    EVT VT = EVT::getEVT(InitTy);
-    PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
-    return DAG.getStore(Chain, DL, DAG.getConstant(*CI, DL, VT), InitPtr,
-                        MachinePointerInfo(UndefValue::get(PtrTy)),
-                        TD.getPrefTypeAlignment(InitTy));
-  }
-
-  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) {
-    EVT VT = EVT::getEVT(CFP->getType());
-    PointerType *PtrTy = PointerType::get(CFP->getType(), 0);
-    return DAG.getStore(Chain, DL, DAG.getConstantFP(*CFP, DL, VT), InitPtr,
-                        MachinePointerInfo(UndefValue::get(PtrTy)),
-                        TD.getPrefTypeAlignment(CFP->getType()));
-  }
-
-  if (StructType *ST = dyn_cast<StructType>(InitTy)) {
-    const StructLayout *SL = TD.getStructLayout(ST);
-
-    EVT PtrVT = InitPtr.getValueType();
-    SmallVector<SDValue, 8> Chains;
-
-    for (unsigned I = 0, N = ST->getNumElements(); I != N; ++I) {
-      SDValue Offset = DAG.getConstant(SL->getElementOffset(I), DL, PtrVT);
-      SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
-
-      Constant *Elt = Init->getAggregateElement(I);
-      Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
-    }
-
-    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
-  }
-
-  if (SequentialType *SeqTy = dyn_cast<SequentialType>(InitTy)) {
-    EVT PtrVT = InitPtr.getValueType();
-
-    unsigned NumElements;
-    if (ArrayType *AT = dyn_cast<ArrayType>(SeqTy))
-      NumElements = AT->getNumElements();
-    else if (VectorType *VT = dyn_cast<VectorType>(SeqTy))
-      NumElements = VT->getNumElements();
-    else
-      llvm_unreachable("Unexpected type");
-
-    unsigned EltSize = TD.getTypeAllocSize(SeqTy->getElementType());
-    SmallVector<SDValue, 8> Chains;
-    for (unsigned i = 0; i < NumElements; ++i) {
-      SDValue Offset = DAG.getConstant(i * EltSize, DL, PtrVT);
-      SDValue Ptr = DAG.getNode(ISD::ADD, DL, PtrVT, InitPtr, Offset);
-
-      Constant *Elt = Init->getAggregateElement(i);
-      Chains.push_back(LowerConstantInitializer(Elt, GV, Ptr, Chain, DAG));
-    }
-
-    return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
-  }
-
-  if (isa<UndefValue>(Init)) {
-    EVT VT = EVT::getEVT(InitTy);
-    PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS);
-    return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr,
-                        MachinePointerInfo(UndefValue::get(PtrTy)),
-                        TD.getPrefTypeAlignment(InitTy));
-  }
-
-  Init->dump();
-  llvm_unreachable("Unhandled constant initializer");
-}
-
 static bool hasDefinedInitializer(const GlobalValue *GV) {
   const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
   if (!GVar || !GVar->hasInitializer())
@@ -850,11 +877,6 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
   const GlobalValue *GV = G->getGlobal();
 
   switch (G->getAddressSpace()) {
-  case AMDGPUAS::CONSTANT_ADDRESS: {
-    MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
-    SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(G), ConstPtrVT);
-    return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(G), ConstPtrVT, GA);
-  }
   case AMDGPUAS::LOCAL_ADDRESS: {
     // XXX: What does the value of G->getOffset() mean?
     assert(G->getOffset() == 0 &&
@@ -864,24 +886,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
     if (hasDefinedInitializer(GV))
       break;
 
-    unsigned Offset;
-    if (MFI->LocalMemoryObjects.count(GV) == 0) {
-      unsigned Align = GV->getAlignment();
-      if (Align == 0)
-        Align = DL.getABITypeAlignment(GV->getValueType());
-
-      /// TODO: We should sort these to minimize wasted space due to alignment
-      /// padding. Currently the padding is decided by the first encountered use
-      /// during lowering.
-      Offset = MFI->LDSSize = alignTo(MFI->LDSSize, Align);
-      MFI->LocalMemoryObjects[GV] = Offset;
-      MFI->LDSSize += DL.getTypeAllocSize(GV->getValueType());
-    } else {
-      Offset = MFI->LocalMemoryObjects[GV];
-    }
-
-    return DAG.getConstant(Offset, SDLoc(Op),
-                           getPointerTy(DL, AMDGPUAS::LOCAL_ADDRESS));
+    unsigned Offset = MFI->allocateLDSGlobal(DL, *GV);
+    return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType());
   }
   }
 
@@ -1097,65 +1103,6 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op,
   return DAG.getMergeValues(Ops, SL);
 }
 
-// FIXME: This isn't doing anything for SI. This should be used in a target
-// combine during type legalization.
-SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
-                                               SelectionDAG &DAG) const {
-  StoreSDNode *Store = cast<StoreSDNode>(Op);
-  EVT MemVT = Store->getMemoryVT();
-  unsigned MemBits = MemVT.getSizeInBits();
-
-  // Byte stores are really expensive, so if possible, try to pack 32-bit vector
-  // truncating store into an i32 store.
-  // XXX: We could also handle optimize other vector bitwidths.
-  if (!MemVT.isVector() || MemBits > 32) {
-    return SDValue();
-  }
-
-  SDLoc DL(Op);
-  SDValue Value = Store->getValue();
-  EVT VT = Value.getValueType();
-  EVT ElemVT = VT.getVectorElementType();
-  SDValue Ptr = Store->getBasePtr();
-  EVT MemEltVT = MemVT.getVectorElementType();
-  unsigned MemEltBits = MemEltVT.getSizeInBits();
-  unsigned MemNumElements = MemVT.getVectorNumElements();
-  unsigned PackedSize = MemVT.getStoreSizeInBits();
-  SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, DL, MVT::i32);
-
-  assert(Value.getValueType().getScalarSizeInBits() >= 32);
-
-  SDValue PackedValue;
-  for (unsigned i = 0; i < MemNumElements; ++i) {
-    SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
-                              DAG.getConstant(i, DL, MVT::i32));
-    Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
-    Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
-
-    SDValue Shift = DAG.getConstant(MemEltBits * i, DL, MVT::i32);
-    Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
-
-    if (i == 0) {
-      PackedValue = Elt;
-    } else {
-      PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
-    }
-  }
-
-  if (PackedSize < 32) {
-    EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
-    return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
-                             Store->getMemOperand()->getPointerInfo(), PackedVT,
-                             Store->getAlignment(),
-                             Store->getMemOperand()->getFlags());
-  }
-
-  return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
-                      Store->getMemOperand()->getPointerInfo(),
-                      Store->getAlignment(),
-                      Store->getMemOperand()->getFlags());
-}
-
 SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
                                                SelectionDAG &DAG) const {
   StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -1670,7 +1617,7 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
 
   assert(Op.getValueType() == MVT::f64);
 
-  APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52");
+  APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52");
   SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64);
   SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src);
 
@@ -1681,7 +1628,7 @@ SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src);
 
-  APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51");
+  APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51");
   SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64);
 
   EVT SetCCVT =
@@ -1988,14 +1935,26 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
   assert(Op.getOperand(0).getValueType() == MVT::i64 &&
          "operation should be legal");
 
+  // TODO: Factor out code common with LowerSINT_TO_FP.
+
   EVT DestVT = Op.getValueType();
-  if (DestVT == MVT::f64)
-    return LowerINT_TO_FP64(Op, DAG, false);
+  if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
+    SDLoc DL(Op);
+    SDValue Src = Op.getOperand(0);
+
+    SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
+    SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
+    SDValue FPRound =
+        DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
+
+    return FPRound;
+  }
 
   if (DestVT == MVT::f32)
     return LowerINT_TO_FP32(Op, DAG, false);
 
-  return SDValue();
+  assert(DestVT == MVT::f64);
+  return LowerINT_TO_FP64(Op, DAG, false);
 }
 
 SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
@@ -2003,14 +1962,26 @@ SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
   assert(Op.getOperand(0).getValueType() == MVT::i64 &&
          "operation should be legal");
 
+  // TODO: Factor out code common with LowerUINT_TO_FP.
+
   EVT DestVT = Op.getValueType();
+  if (Subtarget->has16BitInsts() && DestVT == MVT::f16) {
+    SDLoc DL(Op);
+    SDValue Src = Op.getOperand(0);
+
+    SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src);
+    SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SDLoc(Op));
+    SDValue FPRound =
+        DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag);
+
+    return FPRound;
+  }
+
   if (DestVT == MVT::f32)
     return LowerINT_TO_FP32(Op, DAG, true);
 
-  if (DestVT == MVT::f64)
-    return LowerINT_TO_FP64(Op, DAG, true);
-
-  return SDValue();
+  assert(DestVT == MVT::f64);
+  return LowerINT_TO_FP64(Op, DAG, true);
 }
 
 SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
@@ -2042,10 +2013,118 @@ SDValue AMDGPUTargetLowering::LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG,
   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Result);
 }
 
+SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const {
+
+  if (getTargetMachine().Options.UnsafeFPMath) {
+    // There is a generic expand for FP_TO_FP16 with unsafe fast math.
+    return SDValue();
+  }
+
+  SDLoc DL(Op);
+  SDValue N0 = Op.getOperand(0);
+  assert (N0.getSimpleValueType() == MVT::f64);
+
+  // f64 -> f16 conversion using round-to-nearest-even rounding mode.
+  const unsigned ExpMask = 0x7ff;
+  const unsigned ExpBiasf64 = 1023;
+  const unsigned ExpBiasf16 = 15;
+  SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
+  SDValue One = DAG.getConstant(1, DL, MVT::i32);
+  SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0);
+  SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U,
+                           DAG.getConstant(32, DL, MVT::i64));
+  UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32);
+  U = DAG.getZExtOrTrunc(U, DL, MVT::i32);
+  SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
+                          DAG.getConstant(20, DL, MVT::i64));
+  E = DAG.getNode(ISD::AND, DL, MVT::i32, E,
+                  DAG.getConstant(ExpMask, DL, MVT::i32));
+  // Subtract the fp64 exponent bias (1023) to get the real exponent and
+  // add the f16 bias (15) to get the biased exponent for the f16 format.
+  E = DAG.getNode(ISD::ADD, DL, MVT::i32, E,
+                  DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32));
+
+  SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
+                          DAG.getConstant(8, DL, MVT::i32));
+  M = DAG.getNode(ISD::AND, DL, MVT::i32, M,
+                  DAG.getConstant(0xffe, DL, MVT::i32));
+
+  SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH,
+                                  DAG.getConstant(0x1ff, DL, MVT::i32));
+  MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U);
+
+  SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ);
+  M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set);
+
+  // (M != 0 ? 0x0200 : 0) | 0x7c00;
+  SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32,
+      DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32),
+                      Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32));
+
+  // N = M | (E << 12);
+  SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M,
+      DAG.getNode(ISD::SHL, DL, MVT::i32, E,
+                  DAG.getConstant(12, DL, MVT::i32)));
+
+  // B = clamp(1-E, 0, 13);
+  SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32,
+                                  One, E);
+  SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero);
+  B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B,
+                  DAG.getConstant(13, DL, MVT::i32));
+
+  SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M,
+                                   DAG.getConstant(0x1000, DL, MVT::i32));
+
+  SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B);
+  SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B);
+  SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE);
+  D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1);
+
+  SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT);
+  SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V,
+                              DAG.getConstant(0x7, DL, MVT::i32));
+  V = DAG.getNode(ISD::SRL, DL, MVT::i32, V,
+                  DAG.getConstant(2, DL, MVT::i32));
+  SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32),
+                               One, Zero, ISD::SETEQ);
+  SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32),
+                               One, Zero, ISD::SETGT);
+  V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1);
+  V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1);
+
+  V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32),
+                      DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT);
+  V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32),
+                      I, V, ISD::SETEQ);
+
+  // Extract the sign bit.
+  SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH,
+                            DAG.getConstant(16, DL, MVT::i32));
+  Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign,
+                     DAG.getConstant(0x8000, DL, MVT::i32));
+
+  V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V);
+  return DAG.getZExtOrTrunc(V, DL, Op.getValueType());
+}
+
 SDValue AMDGPUTargetLowering::LowerFP_TO_SINT(SDValue Op,
                                               SelectionDAG &DAG) const {
   SDValue Src = Op.getOperand(0);
 
+  // TODO: Factor out code common with LowerFP_TO_UINT.
+
+  EVT SrcVT = Src.getValueType();
+  if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
+    SDLoc DL(Op);
+
+    SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
+    SDValue FpToInt32 =
+        DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
+
+    return FpToInt32;
+  }
+
   if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
     return LowerFP64_TO_INT(Op, DAG, true);
 
@@ -2056,6 +2135,19 @@ SDValue AMDGPUTargetLowering::LowerFP_TO_UINT(SDValue Op,
                                               SelectionDAG &DAG) const {
   SDValue Src = Op.getOperand(0);
 
+  // TODO: Factor out code common with LowerFP_TO_SINT.
+
+  EVT SrcVT = Src.getValueType();
+  if (Subtarget->has16BitInsts() && SrcVT == MVT::f16) {
+    SDLoc DL(Op);
+
+    SDValue FPExtend = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src);
+    SDValue FpToInt32 =
+        DAG.getNode(Op.getOpcode(), DL, MVT::i64, FPExtend);
+
+    return FpToInt32;
+  }
+
   if (Op.getValueType() == MVT::i64 && Src.getValueType() == MVT::f64)
     return LowerFP64_TO_INT(Op, DAG, false);
 
@@ -2068,8 +2160,7 @@ SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
   MVT VT = Op.getSimpleValueType();
   MVT ScalarVT = VT.getScalarType();
 
-  if (!VT.isVector())
-    return SDValue();
+  assert(VT.isVector());
 
   SDValue Src = Op.getOperand(0);
   SDLoc DL(Op);
@@ -2108,17 +2199,20 @@ static bool isI24(SDValue Op, SelectionDAG &DAG) {
          (VT.getSizeInBits() - DAG.ComputeNumSignBits(Op)) < 24;
 }
 
-static void simplifyI24(SDValue Op, TargetLowering::DAGCombinerInfo &DCI) {
+static bool simplifyI24(SDNode *Node24, unsigned OpIdx,
+                        TargetLowering::DAGCombinerInfo &DCI) {
 
   SelectionDAG &DAG = DCI.DAG;
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  SDValue Op = Node24->getOperand(OpIdx);
   EVT VT = Op.getValueType();
 
   APInt Demanded = APInt::getLowBitsSet(VT.getSizeInBits(), 24);
   APInt KnownZero, KnownOne;
   TargetLowering::TargetLoweringOpt TLO(DAG, true, true);
-  if (TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
-    DCI.CommitTargetLoweringOpt(TLO);
+  if (TLO.SimplifyDemandedBits(Node24, OpIdx, Demanded, DCI))
+    return true;
+
+  return false;
 }
 
 template <typename IntTy>
@@ -2188,6 +2282,9 @@ SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N,
     // problems during legalization, the emitted instructions to pack and unpack
     // the bytes again are not eliminated in the case of an unaligned copy.
     if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+      if (VT.isVector())
+        return scalarizeVectorLoad(LN, DAG);
+
       SDValue Ops[2];
       std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG);
       return DAG.getMergeValues(Ops, SDLoc(N));
@@ -2236,8 +2333,12 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
     // order problems during legalization, the emitted instructions to pack and
     // unpack the bytes again are not eliminated in the case of an unaligned
     // copy.
-    if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast))
+    if (!allowsMisalignedMemoryAccesses(VT, AS, Align, &IsFast)) {
+      if (VT.isVector())
+        return scalarizeVectorStore(SN, DAG);
+
       return expandUnalignedStore(SN, DAG);
+    }
 
     if (!IsFast)
       return SDValue();
@@ -2262,38 +2363,21 @@ SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N,
                       SN->getBasePtr(), SN->getMemOperand());
 }
 
-// TODO: Should repeat for other bit ops.
-SDValue AMDGPUTargetLowering::performAndCombine(SDNode *N,
-                                                DAGCombinerInfo &DCI) const {
-  if (N->getValueType(0) != MVT::i64)
-    return SDValue();
-
-  // Break up 64-bit and of a constant into two 32-bit ands. This will typically
-  // happen anyway for a VALU 64-bit and. This exposes other 32-bit integer
-  // combine opportunities since most 64-bit operations are decomposed this way.
-  // TODO: We won't want this for SALU especially if it is an inline immediate.
-  const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
-  if (!RHS)
-    return SDValue();
-
-  uint64_t Val = RHS->getZExtValue();
-  if (Lo_32(Val) != 0 && Hi_32(Val) != 0 && !RHS->hasOneUse()) {
-    // If either half of the constant is 0, this is really a 32-bit and, so
-    // split it. If we can re-use the full materialized constant, keep it.
-    return SDValue();
-  }
-
-  SDLoc SL(N);
+/// Split the 64-bit value \p LHS into two 32-bit components, and perform the
+/// binary operation \p Opc to it with the corresponding constant operands.
+SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl(
+  DAGCombinerInfo &DCI, const SDLoc &SL,
+  unsigned Opc, SDValue LHS,
+  uint32_t ValLo, uint32_t ValHi) const {
   SelectionDAG &DAG = DCI.DAG;
-
   SDValue Lo, Hi;
-  std::tie(Lo, Hi) = split64BitValue(N->getOperand(0), DAG);
+  std::tie(Lo, Hi) = split64BitValue(LHS, DAG);
 
-  SDValue LoRHS = DAG.getConstant(Lo_32(Val), SL, MVT::i32);
-  SDValue HiRHS = DAG.getConstant(Hi_32(Val), SL, MVT::i32);
+  SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32);
+  SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32);
 
-  SDValue LoAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Lo, LoRHS);
-  SDValue HiAnd = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, HiRHS);
+  SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS);
+  SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS);
 
   // Re-visit the ands. It's possible we eliminated one of them and it could
   // simplify the vector.
@@ -2408,11 +2492,40 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
   return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
 }
 
+// We need to specifically handle i64 mul here to avoid unnecessary conversion
+// instructions. If we only match on the legalized i64 mul expansion,
+// SimplifyDemandedBits will be unable to remove them because there will be
+// multiple uses due to the separate mul + mulh[su].
+static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL,
+                        SDValue N0, SDValue N1, unsigned Size, bool Signed) {
+  if (Size <= 32) {
+    unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+    return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1);
+  }
+
+  // Because we want to eliminate extension instructions before the
+  // operation, we need to create a single user here (i.e. not the separate
+  // mul_lo + mul_hi) so that SimplifyDemandedBits will deal with it.
+
+  unsigned MulOpc = Signed ? AMDGPUISD::MUL_LOHI_I24 : AMDGPUISD::MUL_LOHI_U24;
+
+  SDValue Mul = DAG.getNode(MulOpc, SL,
+                            DAG.getVTList(MVT::i32, MVT::i32), N0, N1);
+
+  return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64,
+                     Mul.getValue(0), Mul.getValue(1));
+}
+
 SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
                                                 DAGCombinerInfo &DCI) const {
   EVT VT = N->getValueType(0);
 
-  if (VT.isVector() || VT.getSizeInBits() > 32)
+  unsigned Size = VT.getSizeInBits();
+  if (VT.isVector() || Size > 64)
+    return SDValue();
+
+  // There are i16 integer mul/mad.
+  if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16))
     return SDValue();
 
   SelectionDAG &DAG = DCI.DAG;
@@ -2425,11 +2538,11 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
   if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) {
     N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
     N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
-    Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1);
+    Mul = getMul24(DAG, DL, N0, N1, Size, false);
   } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) {
     N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
     N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
-    Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1);
+    Mul = getMul24(DAG, DL, N0, N1, Size, true);
   } else {
     return SDValue();
   }
@@ -2439,6 +2552,77 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
   return DAG.getSExtOrTrunc(Mul, DL, VT);
 }
 
+SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+
+  if (!Subtarget->hasMulI24() || VT.isVector())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (!isI24(N0, DAG) || !isI24(N1, DAG))
+    return SDValue();
+
+  N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32);
+  N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32);
+
+  SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1);
+  DCI.AddToWorklist(Mulhi.getNode());
+  return DAG.getSExtOrTrunc(Mulhi, DL, VT);
+}
+
+SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+
+  if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  if (!isU24(N0, DAG) || !isU24(N1, DAG))
+    return SDValue();
+
+  N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32);
+  N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32);
+
+  SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1);
+  DCI.AddToWorklist(Mulhi.getNode());
+  return DAG.getZExtOrTrunc(Mulhi, DL, VT);
+}
+
+SDValue AMDGPUTargetLowering::performMulLoHi24Combine(
+  SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+
+  // Simplify demanded bits before splitting into multiple users.
+  if (simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI))
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  bool Signed = (N->getOpcode() == AMDGPUISD::MUL_LOHI_I24);
+
+  unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24;
+  unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24;
+
+  SDLoc SL(N);
+
+  SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1);
+  SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1);
+  return DAG.getMergeValues({ MulLo, MulHi }, SL);
+}
+
 static bool isNegativeOne(SDValue Val) {
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
     return C->isAllOnesValue();
@@ -2449,23 +2633,21 @@ static bool isCtlzOpc(unsigned Opc) {
   return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
 }
 
-// Get FFBH node if the incoming op may have been type legalized from a smaller
-// type VT.
-// Need to match pre-legalized type because the generic legalization inserts the
-// add/sub between the select and compare.
-static SDValue getFFBH_U32(const TargetLowering &TLI, SelectionDAG &DAG,
-                           const SDLoc &SL, SDValue Op) {
+SDValue AMDGPUTargetLowering::getFFBH_U32(SelectionDAG &DAG,
+                                          SDValue Op,
+                                          const SDLoc &DL) const {
   EVT VT = Op.getValueType();
-  EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
-  if (LegalVT != MVT::i32)
+  EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT);
+  if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() &&
+                              LegalVT != MVT::i16))
     return SDValue();
 
   if (VT != MVT::i32)
-    Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op);
+    Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op);
 
-  SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op);
+  SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, DL, MVT::i32, Op);
   if (VT != MVT::i32)
-    FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH);
+    FFBH = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBH);
 
   return FFBH;
 }
@@ -2493,7 +2675,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond,
       isCtlzOpc(RHS.getOpcode()) &&
       RHS.getOperand(0) == CmpLHS &&
       isNegativeOne(LHS)) {
-    return getFFBH_U32(*this, DAG, SL, CmpLHS);
+    return getFFBH_U32(DAG, CmpLHS, SL);
   }
 
   // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
@@ -2501,7 +2683,7 @@ SDValue AMDGPUTargetLowering::performCtlzCombine(const SDLoc &SL, SDValue Cond,
       isCtlzOpc(LHS.getOpcode()) &&
       LHS.getOperand(0) == CmpLHS &&
       isNegativeOne(RHS)) {
-    return getFFBH_U32(*this, DAG, SL, CmpLHS);
+    return getFFBH_U32(DAG, CmpLHS, SL);
   }
 
   return SDValue();
@@ -2521,6 +2703,25 @@ SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
   SDValue True = N->getOperand(1);
   SDValue False = N->getOperand(2);
 
+  if (Cond.hasOneUse()) { // TODO: Look for multiple select uses.
+    SelectionDAG &DAG = DCI.DAG;
+    if ((DAG.isConstantValueOfAnyType(True) ||
+         DAG.isConstantValueOfAnyType(True)) &&
+        (!DAG.isConstantValueOfAnyType(False) &&
+         !DAG.isConstantValueOfAnyType(False))) {
+      // Swap cmp + select pair to move constant to false input.
+      // This will allow using VOPC cndmasks more often.
+      // select (setcc x, y), k, x -> select (setcc y, x) x, x
+
+      SDLoc SL(N);
+      ISD::CondCode NewCC = getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                                            LHS.getValueType().isInteger());
+
+      SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC);
+      return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True);
+    }
+  }
+
   if (VT == MVT::f32 && Cond.hasOneUse()) {
     SDValue MinMax
       = CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
@@ -2543,6 +2744,33 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::BITCAST: {
     EVT DestVT = N->getValueType(0);
+
+    // Push casts through vector builds. This helps avoid emitting a large
+    // number of copies when materializing floating point vector constants.
+    //
+    // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) =>
+    //   vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y))
+    if (DestVT.isVector()) {
+      SDValue Src = N->getOperand(0);
+      if (Src.getOpcode() == ISD::BUILD_VECTOR) {
+        EVT SrcVT = Src.getValueType();
+        unsigned NElts = DestVT.getVectorNumElements();
+
+        if (SrcVT.getVectorNumElements() == NElts) {
+          EVT DestEltVT = DestVT.getVectorElementType();
+
+          SmallVector<SDValue, 8> CastedElts;
+          SDLoc SL(N);
+          for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) {
+            SDValue Elt = Src.getOperand(I);
+            CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt));
+          }
+
+          return DAG.getBuildVector(DestVT, SL, CastedElts);
+        }
+      }
+    }
+
     if (DestVT.getSizeInBits() != 64 && !DestVT.isVector())
       break;
 
@@ -2591,22 +2819,24 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
 
     return performSraCombine(N, DCI);
   }
-  case ISD::AND: {
-    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
-      break;
-
-    return performAndCombine(N, DCI);
-  }
   case ISD::MUL:
     return performMulCombine(N, DCI);
+  case ISD::MULHS:
+    return performMulhsCombine(N, DCI);
+  case ISD::MULHU:
+    return performMulhuCombine(N, DCI);
   case AMDGPUISD::MUL_I24:
-  case AMDGPUISD::MUL_U24: {
-    SDValue N0 = N->getOperand(0);
-    SDValue N1 = N->getOperand(1);
-    simplifyI24(N0, DCI);
-    simplifyI24(N1, DCI);
+  case AMDGPUISD::MUL_U24:
+  case AMDGPUISD::MULHI_I24:
+  case AMDGPUISD::MULHI_U24: {
+    // If the first call to simplify is successfull, then N may end up being
+    // deleted, so we shouldn't call simplifyI24 again.
+    simplifyI24(N, 0, DCI) || simplifyI24(N, 1, DCI);
     return SDValue();
   }
+  case AMDGPUISD::MUL_LOHI_I24:
+  case AMDGPUISD::MUL_LOHI_U24:
+    return performMulLoHi24Combine(N, DCI);
   case ISD::SELECT:
     return performSelectCombine(N, DCI);
   case AMDGPUISD::BFE_I32:
@@ -2705,38 +2935,6 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
 // Helper functions
 //===----------------------------------------------------------------------===//
 
-void AMDGPUTargetLowering::getOriginalFunctionArgs(
-                               SelectionDAG &DAG,
-                               const Function *F,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               SmallVectorImpl<ISD::InputArg> &OrigIns) const {
-
-  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
-    if (Ins[i].ArgVT == Ins[i].VT) {
-      OrigIns.push_back(Ins[i]);
-      continue;
-    }
-
-    EVT VT;
-    if (Ins[i].ArgVT.isVector() && !Ins[i].VT.isVector()) {
-      // Vector has been split into scalars.
-      VT = Ins[i].ArgVT.getVectorElementType();
-    } else if (Ins[i].VT.isVector() && Ins[i].ArgVT.isVector() &&
-               Ins[i].ArgVT.getVectorElementType() !=
-               Ins[i].VT.getVectorElementType()) {
-      // Vector elements have been promoted
-      VT = Ins[i].ArgVT;
-    } else {
-      // Vector has been spilt into smaller vectors.
-      VT = Ins[i].VT;
-    }
-
-    ISD::InputArg Arg(Ins[i].Flags, VT, VT, Ins[i].Used,
-                      Ins[i].OrigArgIndex, Ins[i].PartOffset);
-    OrigIns.push_back(Arg);
-  }
-}
-
 SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
                                                   const TargetRegisterClass *RC,
                                                    unsigned Reg, EVT VT) const {
@@ -2754,7 +2952,8 @@ SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG,
 
 uint32_t AMDGPUTargetLowering::getImplicitParameterOffset(
     const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const {
-  uint64_t ArgOffset = MFI->ABIArgOffset;
+  unsigned Alignment = Subtarget->getAlignmentForImplicitArgPtr();
+  uint64_t ArgOffset = alignTo(MFI->getABIArgOffset(), Alignment);
   switch (Param) {
   case GRID_DIM:
     return ArgOffset;
@@ -2779,6 +2978,10 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(RETURN)
   NODE_NAME_CASE(DWORDADDR)
   NODE_NAME_CASE(FRACT)
+  NODE_NAME_CASE(SETCC)
+  NODE_NAME_CASE(SETREG)
+  NODE_NAME_CASE(FMA_W_CHAIN)
+  NODE_NAME_CASE(FMUL_W_CHAIN)
   NODE_NAME_CASE(CLAMP)
   NODE_NAME_CASE(COS_HW)
   NODE_NAME_CASE(SIN_HW)
@@ -2800,7 +3003,9 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(TRIG_PREOP)
   NODE_NAME_CASE(RCP)
   NODE_NAME_CASE(RSQ)
+  NODE_NAME_CASE(RCP_LEGACY)
   NODE_NAME_CASE(RSQ_LEGACY)
+  NODE_NAME_CASE(FMUL_LEGACY)
   NODE_NAME_CASE(RSQ_CLAMP)
   NODE_NAME_CASE(LDEXP)
   NODE_NAME_CASE(FP_CLASS)
@@ -2812,12 +3017,19 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BFI)
   NODE_NAME_CASE(BFM)
   NODE_NAME_CASE(FFBH_U32)
+  NODE_NAME_CASE(FFBH_I32)
   NODE_NAME_CASE(MUL_U24)
   NODE_NAME_CASE(MUL_I24)
+  NODE_NAME_CASE(MULHI_U24)
+  NODE_NAME_CASE(MULHI_I24)
+  NODE_NAME_CASE(MUL_LOHI_U24)
+  NODE_NAME_CASE(MUL_LOHI_I24)
   NODE_NAME_CASE(MAD_U24)
   NODE_NAME_CASE(MAD_I24)
   NODE_NAME_CASE(TEXTURE_FETCH)
   NODE_NAME_CASE(EXPORT)
+  NODE_NAME_CASE(EXPORT_DONE)
+  NODE_NAME_CASE(R600_EXPORT)
   NODE_NAME_CASE(CONST_ADDRESS)
   NODE_NAME_CASE(REGISTER_LOAD)
   NODE_NAME_CASE(REGISTER_STORE)
@@ -2833,6 +3045,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(BUILD_VERTICAL_VECTOR)
   NODE_NAME_CASE(CONST_DATA_PTR)
   NODE_NAME_CASE(PC_ADD_REL_OFFSET)
+  NODE_NAME_CASE(KILL)
   case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break;
   NODE_NAME_CASE(SENDMSG)
   NODE_NAME_CASE(INTERP_MOV)
@@ -2844,16 +3057,18 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(ATOMIC_CMP_SWAP)
   NODE_NAME_CASE(ATOMIC_INC)
   NODE_NAME_CASE(ATOMIC_DEC)
+  NODE_NAME_CASE(BUFFER_LOAD)
+  NODE_NAME_CASE(BUFFER_LOAD_FORMAT)
   case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break;
   }
   return nullptr;
 }
 
-SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
-                                               DAGCombinerInfo &DCI,
-                                               unsigned &RefinementSteps,
-                                               bool &UseOneConstNR) const {
-  SelectionDAG &DAG = DCI.DAG;
+SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand,
+                                              SelectionDAG &DAG, int Enabled,
+                                              int &RefinementSteps,
+                                              bool &UseOneConstNR,
+                                              bool Reciprocal) const {
   EVT VT = Operand.getValueType();
 
   if (VT == MVT::f32) {
@@ -2868,9 +3083,8 @@ SDValue AMDGPUTargetLowering::getRsqrtEstimate(SDValue Operand,
 }
 
 SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand,
-                                               DAGCombinerInfo &DCI,
-                                               unsigned &RefinementSteps) const {
-  SelectionDAG &DAG = DCI.DAG;
+                                               SelectionDAG &DAG, int Enabled,
+                                               int &RefinementSteps) const {
   EVT VT = Operand.getValueType();
 
   if (VT == MVT::f32) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index c2c758592d1c..5cc5efb331e3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -25,19 +25,19 @@ class AMDGPUSubtarget;
 class MachineRegisterInfo;
 
 class AMDGPUTargetLowering : public TargetLowering {
+private:
+  /// \returns AMDGPUISD::FFBH_U32 node if the incoming \p Op may have been
+  /// legalized from a smaller type VT. Need to match pre-legalized type because
+  /// the generic legalization inserts the add/sub between the select and
+  /// compare.
+  SDValue getFFBH_U32(SelectionDAG &DAG, SDValue Op, const SDLoc &DL) const;
+
 protected:
   const AMDGPUSubtarget *Subtarget;
 
-  SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
-                                   const SDValue &InitPtr,
-                                   SDValue Chain,
-                                   SelectionDAG &DAG) const;
   SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-  /// \brief Lower vector stores by merging the vector elements into an integer
-  /// of the same bitwidth.
-  SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
   /// \brief Split a vector store into multiple scalar stores.
   /// \returns The resulting chain.
 
@@ -60,6 +60,7 @@ protected:
   SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
 
   SDValue LowerFP64_TO_INT(SDValue Op, SelectionDAG &DAG, bool Signed) const;
+  SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
 
@@ -69,17 +70,22 @@ protected:
   bool shouldCombineMemoryType(EVT VT) const;
   SDValue performLoadCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
-  SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+
+  SDValue splitBinaryBitConstantOpImpl(DAGCombinerInfo &DCI, const SDLoc &SL,
+                                       unsigned Opc, SDValue LHS,
+                                       uint32_t ValLo, uint32_t ValHi) const;
   SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performMulLoHi24Combine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performCtlzCombine(const SDLoc &SL, SDValue Cond, SDValue LHS,
                              SDValue RHS, DAGCombinerInfo &DCI) const;
   SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
-  static EVT getEquivalentBitType(LLVMContext &Context, EVT VT);
 
   virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
                                      SelectionDAG &DAG) const;
@@ -102,16 +108,8 @@ protected:
   SDValue LowerDIVREM24(SDValue Op, SelectionDAG &DAG, bool sign) const;
   void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG,
                                     SmallVectorImpl<SDValue> &Results) const;
-  /// The SelectionDAGBuilder will automatically promote function arguments
-  /// with illegal types.  However, this does not work for the AMDGPU targets
-  /// since the function arguments are stored in memory as these illegal types.
-  /// In order to handle this properly we need to get the origianl types sizes
-  /// from the LLVM IR Function and fixup the ISD:InputArg values before
-  /// passing them to AnalyzeFormalArguments()
-  void getOriginalFunctionArgs(SelectionDAG &DAG,
-                               const Function *F,
-                               const SmallVectorImpl<ISD::InputArg> &Ins,
-                               SmallVectorImpl<ISD::InputArg> &OrigIns) const;
+  void analyzeFormalArgumentsCompute(CCState &State,
+                              const SmallVectorImpl<ISD::InputArg> &Ins) const;
   void AnalyzeFormalArguments(CCState &State,
                               const SmallVectorImpl<ISD::InputArg> &Ins) const;
   void AnalyzeReturn(CCState &State,
@@ -171,13 +169,14 @@ public:
 
   const char* getTargetNodeName(unsigned Opcode) const override;
 
-  SDValue getRsqrtEstimate(SDValue Operand,
-                           DAGCombinerInfo &DCI,
-                           unsigned &RefinementSteps,
-                           bool &UseOneConstNR) const override;
-  SDValue getRecipEstimate(SDValue Operand,
-                           DAGCombinerInfo &DCI,
-                           unsigned &RefinementSteps) const override;
+  bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
+    return true;
+  }
+  SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                           int &RefinementSteps, bool &UseOneConstNR,
+                           bool Reciprocal) const override;
+  SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                           int &RefinementSteps) const override;
 
   virtual SDNode *PostISelFolding(MachineSDNode *N,
                                   SelectionDAG &DAG) const = 0;
@@ -228,6 +227,13 @@ enum NodeType : unsigned {
   DWORDADDR,
   FRACT,
   CLAMP,
+  // This is SETCC with the full mask result which is used for a compare with a
+  // result bit per item in the wavefront.
+  SETCC,
+  SETREG,
+  // FP ops with input and output chain.
+  FMA_W_CHAIN,
+  FMUL_W_CHAIN,
 
   // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi.
   // Denormals handled on some parts.
@@ -254,7 +260,9 @@ enum NodeType : unsigned {
   //            For f64, max error 2^29 ULP, handles denormals.
   RCP,
   RSQ,
+  RCP_LEGACY,
   RSQ_LEGACY,
+  FMUL_LEGACY,
   RSQ_CLAMP,
   LDEXP,
   FP_CLASS,
@@ -266,12 +274,19 @@ enum NodeType : unsigned {
   BFI, // (src0 & src1) | (~src0 & src2)
   BFM, // Insert a range of bits into a 32-bit word.
   FFBH_U32, // ctlz with -1 if input is zero.
+  FFBH_I32,
   MUL_U24,
   MUL_I24,
+  MULHI_U24,
+  MULHI_I24,
   MAD_U24,
   MAD_I24,
+  MUL_LOHI_I24,
+  MUL_LOHI_U24,
   TEXTURE_FETCH,
-  EXPORT,
+  EXPORT, // exp on SI+
+  EXPORT_DONE, // exp on SI+ with done bit set
+  R600_EXPORT,
   CONST_ADDRESS,
   REGISTER_LOAD,
   REGISTER_STORE,
@@ -302,6 +317,7 @@ enum NodeType : unsigned {
   INTERP_P1,
   INTERP_P2,
   PC_ADD_REL_OFFSET,
+  KILL,
   FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE,
   STORE_MSKOR,
   LOAD_CONSTANT,
@@ -309,6 +325,8 @@ enum NodeType : unsigned {
   ATOMIC_CMP_SWAP,
   ATOMIC_INC,
   ATOMIC_DEC,
+  BUFFER_LOAD,
+  BUFFER_LOAD_FORMAT,
   LAST_AMDGPU_ISD_NUMBER
 };
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
index 9a00ecb24ebe..e4dc6599e156 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.cpp
@@ -23,7 +23,6 @@
 using namespace llvm;
 
 #define GET_INSTRINFO_CTOR_DTOR
-#define GET_INSTRINFO_NAMED_OPS
 #define GET_INSTRMAP_INFO
 #include "AMDGPUGenInstrInfo.inc"
 
@@ -33,10 +32,6 @@ void AMDGPUInstrInfo::anchor() {}
 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
   : AMDGPUGenInstrInfo(-1, -1), ST(ST) {}
 
-bool AMDGPUInstrInfo::enableClusterLoads() const {
-  return true;
-}
-
 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
 // the first 16 loads will be interleaved with the stores, and the next 16 will
 // be clustered as expected. It should really split into 2 16 store batches.
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
index a59eafadeb93..bd8e389639f5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.h
@@ -17,17 +17,12 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUINSTRINFO_H
 
 #include "llvm/Target/TargetInstrInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
 
 #define GET_INSTRINFO_HEADER
 #define GET_INSTRINFO_ENUM
-#define GET_INSTRINFO_OPERAND_ENUM
 #include "AMDGPUGenInstrInfo.inc"
 
-#define OPCODE_IS_ZERO_INT AMDGPU::PRED_SETE_INT
-#define OPCODE_IS_NOT_ZERO_INT AMDGPU::PRED_SETNE_INT
-#define OPCODE_IS_ZERO AMDGPU::PRED_SETE
-#define OPCODE_IS_NOT_ZERO AMDGPU::PRED_SETNE
-
 namespace llvm {
 
 class AMDGPUSubtarget;
@@ -44,8 +39,6 @@ private:
 public:
   explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st);
 
-  bool enableClusterLoads() const override;
-
   bool shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
                                int64_t Offset1, int64_t Offset2,
                                unsigned NumLoads) const override;
@@ -59,15 +52,6 @@ public:
   /// equivalent opcode that writes \p Channels Channels.
   int getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const;
 };
-
-namespace AMDGPU {
-  LLVM_READONLY
-  int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIndex);
-}  // End namespace AMDGPU
-
 } // End llvm namespace
 
-#define AMDGPU_FLAG_REGISTER_LOAD  (UINT64_C(1) << 63)
-#define AMDGPU_FLAG_REGISTER_STORE (UINT64_C(1) << 62)
-
 #endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 2b13bb9079ea..e7b40016e272 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -40,6 +40,8 @@ def AMDGPUFmasOp : SDTypeProfile<1, 4,
   [SDTCisFP<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisInt<4>]
 >;
 
+def AMDGPUKillSDT : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
 //===----------------------------------------------------------------------===//
 // AMDGPU DAG Nodes
 //
@@ -65,6 +67,7 @@ def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>;
 def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>;
 
 // out = 1.0 / sqrt(a)
+def AMDGPUrcp_legacy : SDNode<"AMDGPUISD::RCP_LEGACY", SDTFPUnaryOp>;
 def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>;
 
 // out = 1.0 / sqrt(a) result clamped to +/- max_float.
@@ -82,6 +85,10 @@ def AMDGPUfmax_legacy : SDNode<"AMDGPUISD::FMAX_LEGACY", SDTFPBinOp,
   []
 >;
 
+def AMDGPUfmul_legacy : SDNode<"AMDGPUISD::FMUL_LEGACY", SDTFPBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+
 def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>;
 
 // out = max(a, b) a and b are signed ints
@@ -137,6 +144,24 @@ def AMDGPUcarry : SDNode<"AMDGPUISD::CARRY", SDTIntBinOp, []>;
 // out = (src1 > src0) ? 1 : 0
 def AMDGPUborrow : SDNode<"AMDGPUISD::BORROW", SDTIntBinOp, []>;
 
+def AMDGPUSetCCOp : SDTypeProfile<1, 3, [        // setcc
+  SDTCisVT<0, i64>, SDTCisSameAs<1, 2>, SDTCisVT<3, OtherVT>
+]>;
+
+def AMDGPUsetcc : SDNode<"AMDGPUISD::SETCC", AMDGPUSetCCOp>;
+
+def AMDGPUSetRegOp :  SDTypeProfile<0, 2, [
+  SDTCisInt<0>, SDTCisInt<1>
+]>;
+
+def AMDGPUsetreg : SDNode<"AMDGPUISD::SETREG", AMDGPUSetRegOp, [
+  SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AMDGPUfma : SDNode<"AMDGPUISD::FMA_W_CHAIN", SDTFPTernaryOp, [
+   SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def AMDGPUmul : SDNode<"AMDGPUISD::FMUL_W_CHAIN", SDTFPBinOp, [
+  SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
 def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
   SDTIntToFPOp, []>;
@@ -202,14 +227,22 @@ def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
 def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
 
 def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
+def AMDGPUffbh_i32 : SDNode<"AMDGPUISD::FFBH_I32", SDTIntUnaryOp>;
 
-// Signed and unsigned 24-bit mulitply.  The highest 8-bits are ignore when
-// performing the mulitply.  The result is a 32-bit value.
+// Signed and unsigned 24-bit multiply. The highest 8-bits are ignore
+// when performing the mulitply. The result is a 32-bit value.
 def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
-  [SDNPCommutative]
+  [SDNPCommutative, SDNPAssociative]
 >;
 def AMDGPUmul_i24 : SDNode<"AMDGPUISD::MUL_I24", SDTIntBinOp,
-  [SDNPCommutative]
+  [SDNPCommutative, SDNPAssociative]
+>;
+
+def AMDGPUmulhi_u24 : SDNode<"AMDGPUISD::MULHI_U24", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
+>;
+def AMDGPUmulhi_i24 : SDNode<"AMDGPUISD::MULHI_I24", SDTIntBinOp,
+  [SDNPCommutative, SDNPAssociative]
 >;
 
 def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp,
@@ -245,6 +278,35 @@ def AMDGPUinterp_p2 : SDNode<"AMDGPUISD::INTERP_P2",
                       SDTypeProfile<1, 4, [SDTCisFP<0>]>,
                       [SDNPInGlue]>;
 
+
+def AMDGPUkill : SDNode<"AMDGPUISD::KILL", AMDGPUKillSDT,
+  [SDNPHasChain, SDNPSideEffect]>;
+
+// SI+ export
+def AMDGPUExportOp : SDTypeProfile<0, 8, [
+  SDTCisInt<0>, // i8 en
+  SDTCisInt<1>, // i1 vm
+  // skip done
+  SDTCisInt<2>, // i8 tgt
+  SDTCisSameAs<3, 1>, // i1 compr
+  SDTCisFP<4>,        // f32 src0
+  SDTCisSameAs<5, 4>, // f32 src1
+  SDTCisSameAs<6, 4>, // f32 src2
+  SDTCisSameAs<7, 4>  // f32 src3
+]>;
+
+def AMDGPUexport: SDNode<"AMDGPUISD::EXPORT", AMDGPUExportOp,
+  [SDNPHasChain, SDNPMayStore]>;
+
+def AMDGPUexport_done: SDNode<"AMDGPUISD::EXPORT_DONE", AMDGPUExportOp,
+  [SDNPHasChain, SDNPMayLoad, SDNPMayStore]>;
+
+
+def R600ExportOp : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
+
+def R600_EXPORT: SDNode<"AMDGPUISD::R600_EXPORT", R600ExportOp,
+  [SDNPHasChain, SDNPSideEffect]>;
+
 //===----------------------------------------------------------------------===//
 // Flow Control Profile Types
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
index 3944fdbd31e3..513df3a9cdf3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUInstructions.td
@@ -42,6 +42,7 @@ class AMDGPUShaderInst <dag outs, dag ins, string asm = "",
   field bits<32> Inst = 0xffffffff;
 }
 
+def FP16Denormals : Predicate<"Subtarget.hasFP16Denormals()">;
 def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">;
 def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">;
 def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
@@ -49,13 +50,6 @@ def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">;
 def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>;
 def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>;
 
-// 32-bit VALU immediate operand that uses the constant bus.
-def u32kimm : Operand<i32> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_KIMM32";
-  let PrintMethod = "printU32ImmOperand";
-}
-
 let OperandType = "OPERAND_IMMEDIATE" in {
 
 def u32imm : Operand<i32> {
@@ -172,6 +166,12 @@ class HasOneUseBinOp<SDPatternOperator op> : PatFrag<
   [{ return N->hasOneUse(); }]
 >;
 
+class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag<
+  (ops node:$src0, node:$src1, node:$src2),
+  (op $src0, $src1, $src2),
+  [{ return N->hasOneUse(); }]
+>;
+
 //===----------------------------------------------------------------------===//
 // Load/Store Pattern Fragments
 //===----------------------------------------------------------------------===//
@@ -363,53 +363,54 @@ multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> {
 
 defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>;
 
-def mskor_flat : PatFrag<(ops node:$val, node:$ptr),
-                            (AMDGPUstore_mskor node:$val, node:$ptr), [{
-  return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;
-}]>;
+multiclass global_binary_atomic_op<SDNode atomic_op> {
+  def "" : PatFrag<
+        (ops node:$ptr, node:$value),
+        (atomic_op node:$ptr, node:$value),
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+
+  def _noret : PatFrag<
+        (ops node:$ptr, node:$value),
+        (atomic_op node:$ptr, node:$value),
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+
+  def _ret : PatFrag<
+        (ops node:$ptr, node:$value),
+        (atomic_op node:$ptr, node:$value),
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
+}
 
-class global_binary_atomic_op<SDNode atomic_op> : PatFrag<
-  (ops node:$ptr, node:$value),
-  (atomic_op node:$ptr, node:$value),
-  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]
->;
-
-class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
-  (ops node:$ptr, node:$value),
-  (atomic_op node:$ptr, node:$value),
-  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
->;
-
-def atomic_swap_global : global_binary_atomic_op<atomic_swap>;
-def atomic_add_global : global_binary_atomic_op<atomic_load_add>;
-def atomic_and_global : global_binary_atomic_op<atomic_load_and>;
-def atomic_max_global : global_binary_atomic_op<atomic_load_max>;
-def atomic_min_global : global_binary_atomic_op<atomic_load_min>;
-def atomic_or_global : global_binary_atomic_op<atomic_load_or>;
-def atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
-def atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
-def atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
-def atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
-
-def atomic_cmp_swap_global : global_binary_atomic_op<AMDGPUatomic_cmp_swap>;
-def atomic_cmp_swap_global_nortn : PatFrag<
-  (ops node:$ptr, node:$value),
-  (atomic_cmp_swap_global node:$ptr, node:$value),
-  [{ return SDValue(N, 0).use_empty(); }]
->;
-
-def atomic_swap_flat : flat_binary_atomic_op<atomic_swap>;
-def atomic_add_flat : flat_binary_atomic_op<atomic_load_add>;
-def atomic_and_flat : flat_binary_atomic_op<atomic_load_and>;
-def atomic_max_flat : flat_binary_atomic_op<atomic_load_max>;
-def atomic_min_flat : flat_binary_atomic_op<atomic_load_min>;
-def atomic_or_flat : flat_binary_atomic_op<atomic_load_or>;
-def atomic_sub_flat : flat_binary_atomic_op<atomic_load_sub>;
-def atomic_umax_flat : flat_binary_atomic_op<atomic_load_umax>;
-def atomic_umin_flat : flat_binary_atomic_op<atomic_load_umin>;
-def atomic_xor_flat : flat_binary_atomic_op<atomic_load_xor>;
-
-def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
+defm atomic_swap_global : global_binary_atomic_op<atomic_swap>;
+defm atomic_add_global : global_binary_atomic_op<atomic_load_add>;
+defm atomic_and_global : global_binary_atomic_op<atomic_load_and>;
+defm atomic_max_global : global_binary_atomic_op<atomic_load_max>;
+defm atomic_min_global : global_binary_atomic_op<atomic_load_min>;
+defm atomic_or_global : global_binary_atomic_op<atomic_load_or>;
+defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>;
+defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>;
+defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>;
+defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>;
+
+//legacy
+def AMDGPUatomic_cmp_swap_global : PatFrag<
+        (ops node:$ptr, node:$value),
+        (AMDGPUatomic_cmp_swap node:$ptr, node:$value),
+        [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+
+def atomic_cmp_swap_global : PatFrag<
+      (ops node:$ptr, node:$cmp, node:$value),
+      (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
+      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>;
+
+def atomic_cmp_swap_global_noret : PatFrag<
+      (ops node:$ptr, node:$cmp, node:$value),
+      (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
+      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>;
+
+def atomic_cmp_swap_global_ret : PatFrag<
+      (ops node:$ptr, node:$cmp, node:$value),
+      (atomic_cmp_swap node:$ptr, node:$cmp, node:$value),
+      [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>;
 
 //===----------------------------------------------------------------------===//
 // Misc Pattern Fragments
@@ -420,6 +421,7 @@ int TWO_PI = 0x40c90fdb;
 int PI = 0x40490fdb;
 int TWO_PI_INV = 0x3e22f983;
 int FP_UINT_MAX_PLUS_1 = 0x4f800000;    // 1 << 32 in floating point encoding
+int FP16_ONE = 0x3C00;
 int FP32_ONE = 0x3f800000;
 int FP32_NEG_ONE = 0xbf800000;
 int FP64_ONE = 0x3ff0000000000000;
@@ -559,17 +561,26 @@ multiclass BFIPatterns <Instruction BFI_INT,
 
   def : Pat <
     (fcopysign f32:$src0, f32:$src1),
-    (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1)
+    (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1)
   >;
 
   def : Pat <
     (f64 (fcopysign f64:$src0, f64:$src1)),
     (REG_SEQUENCE RC64,
       (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
-      (BFI_INT (LoadImm32 0x7fffffff),
+      (BFI_INT (LoadImm32 (i32 0x7fffffff)),
                (i32 (EXTRACT_SUBREG $src0, sub1)),
                (i32 (EXTRACT_SUBREG $src1, sub1))), sub1)
   >;
+
+  def : Pat <
+    (f64 (fcopysign f64:$src0, f32:$src1)),
+    (REG_SEQUENCE RC64,
+      (i32 (EXTRACT_SUBREG $src0, sub0)), sub0,
+      (BFI_INT (LoadImm32 (i32 0x7fffffff)),
+               (i32 (EXTRACT_SUBREG $src0, sub1)),
+               $src1), sub1)
+  >;
 }
 
 // SHA-256 Ma patterns
@@ -618,11 +629,10 @@ def smax_oneuse : HasOneUseBinOp<smax>;
 def smin_oneuse : HasOneUseBinOp<smin>;
 def umax_oneuse : HasOneUseBinOp<umax>;
 def umin_oneuse : HasOneUseBinOp<umin>;
+def sub_oneuse : HasOneUseBinOp<sub>;
 } // Properties = [SDNPCommutative, SDNPAssociative]
 
-
-// 24-bit arithmetic patterns
-def umul24 : PatFrag <(ops node:$x, node:$y), (mul node:$x, node:$y)>;
+def select_oneuse : HasOneUseTernaryOp<select>;
 
 // Special conversion patterns
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
index 2127391f18e7..ceae0b575395 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUIntrinsics.td
@@ -16,6 +16,8 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
 
   def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>;
   def int_AMDGPU_kilp : Intrinsic<[], [], []>;
+
+  // Deprecated in favor of llvm.amdgcn.sffbh
   def int_AMDGPU_flbit_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>;
 
   // Deprecated in favor of separate int_amdgcn_cube* intrinsics.
@@ -29,9 +31,6 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in {
   def int_AMDGPU_rsq : Intrinsic<
     [llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]
   >;
-
-  // Deprecated in favor of llvm.amdgcn.read.workdim
-  def int_AMDGPU_read_workdim : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
 }
 
 include "SIIntrinsics.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index ad8d3e4d3545..7d56355074b1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -36,13 +36,92 @@
 
 using namespace llvm;
 
-AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st):
-  Ctx(ctx), ST(st) { }
+#include "AMDGPUGenMCPseudoLowering.inc"
+
+
+AMDGPUMCInstLower::AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &st,
+                                     const AsmPrinter &ap):
+  Ctx(ctx), ST(st), AP(ap) { }
 
 static MCSymbolRefExpr::VariantKind getVariantKind(unsigned MOFlags) {
   switch (MOFlags) {
-  default: return MCSymbolRefExpr::VK_None;
-  case SIInstrInfo::MO_GOTPCREL: return MCSymbolRefExpr::VK_GOTPCREL;
+  default:
+    return MCSymbolRefExpr::VK_None;
+  case SIInstrInfo::MO_GOTPCREL:
+    return MCSymbolRefExpr::VK_GOTPCREL;
+  case SIInstrInfo::MO_GOTPCREL32_LO:
+    return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO;
+  case SIInstrInfo::MO_GOTPCREL32_HI:
+    return MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI;
+  case SIInstrInfo::MO_REL32_LO:
+    return MCSymbolRefExpr::VK_AMDGPU_REL32_LO;
+  case SIInstrInfo::MO_REL32_HI:
+    return MCSymbolRefExpr::VK_AMDGPU_REL32_HI;
+  }
+}
+
+const MCExpr *AMDGPUMCInstLower::getLongBranchBlockExpr(
+  const MachineBasicBlock &SrcBB,
+  const MachineOperand &MO) const {
+  const MCExpr *DestBBSym
+    = MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx);
+  const MCExpr *SrcBBSym = MCSymbolRefExpr::create(SrcBB.getSymbol(), Ctx);
+
+  assert(SrcBB.front().getOpcode() == AMDGPU::S_GETPC_B64 &&
+         ST.getInstrInfo()->get(AMDGPU::S_GETPC_B64).Size == 4);
+
+  // s_getpc_b64 returns the address of next instruction.
+  const MCConstantExpr *One = MCConstantExpr::create(4, Ctx);
+  SrcBBSym = MCBinaryExpr::createAdd(SrcBBSym, One, Ctx);
+
+  if (MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_FORWARD)
+    return MCBinaryExpr::createSub(DestBBSym, SrcBBSym, Ctx);
+
+  assert(MO.getTargetFlags() == AMDGPU::TF_LONG_BRANCH_BACKWARD);
+  return MCBinaryExpr::createSub(SrcBBSym, DestBBSym, Ctx);
+}
+
+bool AMDGPUMCInstLower::lowerOperand(const MachineOperand &MO,
+                                     MCOperand &MCOp) const {
+  switch (MO.getType()) {
+  default:
+    llvm_unreachable("unknown operand type");
+  case MachineOperand::MO_Immediate:
+    MCOp = MCOperand::createImm(MO.getImm());
+    return true;
+  case MachineOperand::MO_Register:
+    MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
+    return true;
+  case MachineOperand::MO_MachineBasicBlock: {
+    if (MO.getTargetFlags() != 0) {
+      MCOp = MCOperand::createExpr(
+        getLongBranchBlockExpr(*MO.getParent()->getParent(), MO));
+    } else {
+      MCOp = MCOperand::createExpr(
+        MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+    }
+
+    return true;
+  }
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MO.getGlobal();
+    SmallString<128> SymbolName;
+    AP.getNameWithPrefix(SymbolName, GV);
+    MCSymbol *Sym = Ctx.getOrCreateSymbol(SymbolName);
+    const MCExpr *SymExpr =
+      MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
+    const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
+      MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+    MCOp = MCOperand::createExpr(Expr);
+    return true;
+  }
+  case MachineOperand::MO_ExternalSymbol: {
+    MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
+    Sym->setExternal(true);
+    const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+    MCOp = MCOperand::createExpr(Expr);
+    return true;
+  }
   }
 }
 
@@ -60,44 +139,24 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const {
 
   for (const MachineOperand &MO : MI->explicit_operands()) {
     MCOperand MCOp;
-    switch (MO.getType()) {
-    default:
-      llvm_unreachable("unknown operand type");
-    case MachineOperand::MO_Immediate:
-      MCOp = MCOperand::createImm(MO.getImm());
-      break;
-    case MachineOperand::MO_Register:
-      MCOp = MCOperand::createReg(AMDGPU::getMCReg(MO.getReg(), ST));
-      break;
-    case MachineOperand::MO_MachineBasicBlock:
-      MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(
-                                   MO.getMBB()->getSymbol(), Ctx));
-      break;
-    case MachineOperand::MO_GlobalAddress: {
-      const GlobalValue *GV = MO.getGlobal();
-      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(GV->getName()));
-      const MCExpr *SymExpr =
-          MCSymbolRefExpr::create(Sym, getVariantKind(MO.getTargetFlags()),Ctx);
-      const MCExpr *Expr = MCBinaryExpr::createAdd(SymExpr,
-          MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
-      MCOp = MCOperand::createExpr(Expr);
-      break;
-    }
-    case MachineOperand::MO_ExternalSymbol: {
-      MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName()));
-      Sym->setExternal(true);
-      const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
-      MCOp = MCOperand::createExpr(Expr);
-      break;
-    }
-    }
+    lowerOperand(MO, MCOp);
     OutMI.addOperand(MCOp);
   }
 }
 
+bool AMDGPUAsmPrinter::lowerOperand(const MachineOperand &MO,
+                                    MCOperand &MCOp) const {
+  const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
+  AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
+  return MCInstLowering.lowerOperand(MO, MCOp);
+}
+
 void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  if (emitPseudoExpansionLowering(*OutStreamer, MI))
+    return;
+
   const AMDGPUSubtarget &STI = MF->getSubtarget<AMDGPUSubtarget>();
-  AMDGPUMCInstLower MCInstLowering(OutContext, STI);
+  AMDGPUMCInstLower MCInstLowering(OutContext, STI, *this);
 
   StringRef Err;
   if (!STI.getInstrInfo()->verifyInstruction(*MI, Err)) {
@@ -137,6 +196,12 @@ void AMDGPUAsmPrinter::EmitInstruction(const MachineInstr *MI) {
       return;
     }
 
+    if (MI->getOpcode() == AMDGPU::WAVE_BARRIER) {
+      if (isVerbose())
+        OutStreamer->emitRawComment(" wave barrier");
+      return;
+    }
+
     MCInst TmpInst;
     MCInstLowering.lower(MI, TmpInst);
     EmitToStreamer(*OutStreamer, TmpInst);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
index 957dcd0de8ef..57d2d85daecd 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h
@@ -5,7 +5,6 @@
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
-/// \file
 //===----------------------------------------------------------------------===//
 
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUMCINSTLOWER_H
@@ -14,16 +13,28 @@
 namespace llvm {
 
 class AMDGPUSubtarget;
+class AsmPrinter;
+class MachineBasicBlock;
 class MachineInstr;
+class MachineOperand;
 class MCContext;
+class MCExpr;
 class MCInst;
+class MCOperand;
 
 class AMDGPUMCInstLower {
   MCContext &Ctx;
   const AMDGPUSubtarget &ST;
+  const AsmPrinter &AP;
+
+  const MCExpr *getLongBranchBlockExpr(const MachineBasicBlock &SrcBB,
+                                       const MachineOperand &MO) const;
 
 public:
-  AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST);
+  AMDGPUMCInstLower(MCContext &ctx, const AMDGPUSubtarget &ST,
+                    const AsmPrinter &AP);
+
+  bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const;
 
   /// \brief Lower a MachineInstr to an MCInst
   void lower(const MachineInstr *MI, MCInst &OutMI) const;
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
index 44516dab04f1..40c3327a98db 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.cpp
@@ -1,23 +1,47 @@
+//===-- AMDGPUMachineFunctionInfo.cpp ---------------------------------------=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
 #include "AMDGPUMachineFunction.h"
+#include "AMDGPUSubtarget.h"
 
 using namespace llvm;
 
-// Pin the vtable to this file.
-void AMDGPUMachineFunction::anchor() {}
-
 AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
   MachineFunctionInfo(),
+  LocalMemoryObjects(),
   KernArgSize(0),
   MaxKernArgAlign(0),
   LDSSize(0),
   ABIArgOffset(0),
-  ScratchSize(0),
-  IsKernel(MF.getFunction()->getCallingConv() == llvm::CallingConv::AMDGPU_KERNEL ||
-           MF.getFunction()->getCallingConv() == llvm::CallingConv::SPIR_KERNEL)
-{
+  IsKernel(MF.getFunction()->getCallingConv() == CallingConv::AMDGPU_KERNEL ||
+           MF.getFunction()->getCallingConv() == CallingConv::SPIR_KERNEL) {
+  // FIXME: Should initialize KernArgSize based on ExplicitKernelArgOffset,
+  // except reserved size is not correctly aligned.
 }
 
-bool AMDGPUMachineFunction::isKernel() const
-{
-  return IsKernel;
+unsigned AMDGPUMachineFunction::allocateLDSGlobal(const DataLayout &DL,
+                                                  const GlobalValue &GV) {
+  auto Entry = LocalMemoryObjects.insert(std::make_pair(&GV, 0));
+  if (!Entry.second)
+    return Entry.first->second;
+
+  unsigned Align = GV.getAlignment();
+  if (Align == 0)
+    Align = DL.getABITypeAlignment(GV.getValueType());
+
+  /// TODO: We should sort these to minimize wasted space due to alignment
+  /// padding. Currently the padding is decided by the first encountered use
+  /// during lowering.
+  unsigned Offset = LDSSize = alignTo(LDSSize, Align);
+
+  Entry.first->second = Offset;
+  LDSSize += DL.getTypeAllocSize(GV.getValueType());
+
+  return Offset;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
index 6b31f63e1a9d..5d0640b816f3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUMachineFunction.h
@@ -11,15 +11,26 @@
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPUMACHINEFUNCTION_H
 
 #include "llvm/CodeGen/MachineFunction.h"
-#include <map>
+#include "llvm/ADT/DenseMap.h"
 
 namespace llvm {
 
 class AMDGPUMachineFunction : public MachineFunctionInfo {
+  /// A map to keep track of local memory objects and their offsets within the
+  /// local memory space.
+  SmallDenseMap<const GlobalValue *, unsigned, 4> LocalMemoryObjects;
+
   uint64_t KernArgSize;
   unsigned MaxKernArgAlign;
 
-  virtual void anchor();
+  /// Number of bytes in the LDS that are being used.
+  unsigned LDSSize;
+
+  // FIXME: This should probably be removed.
+  /// Start of implicit kernel args
+  unsigned ABIArgOffset;
+
+  bool IsKernel;
 
 public:
   AMDGPUMachineFunction(const MachineFunction &MF);
@@ -35,19 +46,31 @@ public:
     return Result;
   }
 
-  /// A map to keep track of local memory objects and their offsets within
-  /// the local memory space.
-  std::map<const GlobalValue *, unsigned> LocalMemoryObjects;
-  /// Number of bytes in the LDS that are being used.
-  unsigned LDSSize;
+  uint64_t getKernArgSize() const {
+    return KernArgSize;
+  }
 
-  /// Start of implicit kernel args
-  unsigned ABIArgOffset;
+  unsigned getMaxKernArgAlign() const {
+    return MaxKernArgAlign;
+  }
 
-  bool isKernel() const;
+  void setABIArgOffset(unsigned NewOffset) {
+    ABIArgOffset = NewOffset;
+  }
 
-  unsigned ScratchSize;
-  bool IsKernel;
+  unsigned getABIArgOffset() const {
+    return ABIArgOffset;
+  }
+
+  unsigned getLDSSize() const {
+    return LDSSize;
+  }
+
+  bool isKernel() const {
+    return IsKernel;
+  }
+
+  unsigned allocateLDSGlobal(const DataLayout &DL, const GlobalValue &GV);
 };
 
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
index 8bc7b53435be..410bd52d9c21 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUOpenCLImageTypeLoweringPass.cpp
@@ -358,7 +358,7 @@ class AMDGPUOpenCLImageTypeLoweringPass : public ModulePass {
     return transformKernels(M);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AMDGPU OpenCL Image Type Pass";
   }
 };
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
new file mode 100644
index 000000000000..947d45b66969
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPTNote.h
@@ -0,0 +1,42 @@
+//===-- AMDGPUNoteType.h - AMDGPU ELF PT_NOTE section info-------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Enums and constants for AMDGPU PT_NOTE sections.
+///
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
+#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPTNOTE_H
+
+namespace AMDGPU {
+
+namespace PT_NOTE {
+
+const char SectionName[] = ".note";
+
+const char NoteName[] = "AMD";
+
+enum NoteType{
+    NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
+    NT_AMDGPU_HSA_HSAIL = 2,
+    NT_AMDGPU_HSA_ISA = 3,
+    NT_AMDGPU_HSA_PRODUCER = 4,
+    NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
+    NT_AMDGPU_HSA_EXTENSION = 6,
+    NT_AMDGPU_HSA_RUNTIME_METADATA = 7,
+    NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
+    NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
+};
+}
+}
+
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUNOTETYPE_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
index 0bad63fa77ad..baa28de7a770 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUPromoteAlloca.cpp
@@ -76,9 +76,7 @@ public:
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
 
-  const char *getPassName() const override {
-    return "AMDGPU Promote Alloca";
-  }
+  StringRef getPassName() const override { return "AMDGPU Promote Alloca"; }
 
   void handleAlloca(AllocaInst &I);
 
@@ -184,13 +182,12 @@ bool AMDGPUPromoteAlloca::runOnFunction(Function &F) {
 
   // TODO: Have some sort of hint or other heuristics to guess occupancy based
   // on other factors..
-  unsigned OccupancyHint
-    = AMDGPU::getIntegerAttribute(F, "amdgpu-max-waves-per-eu", 0);
+  unsigned OccupancyHint = ST.getWavesPerEU(F).second;
   if (OccupancyHint == 0)
     OccupancyHint = 7;
 
   // Clamp to max value.
-  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerCU());
+  OccupancyHint = std::min(OccupancyHint, ST.getMaxWavesPerEU());
 
   // Check the hint but ignore it if it's obviously wrong from the existing LDS
   // usage.
@@ -535,7 +532,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
   std::vector<Value*> &WorkList) const {
 
   for (User *User : Val->users()) {
-    if (std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end())
+    if (is_contained(WorkList, User))
       continue;
 
     if (CallInst *CI = dyn_cast<CallInst>(User)) {
@@ -550,7 +547,7 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
     if (UseInst->getOpcode() == Instruction::PtrToInt)
       return false;
 
-    if (LoadInst *LI = dyn_cast_or_null<LoadInst>(UseInst)) {
+    if (LoadInst *LI = dyn_cast<LoadInst>(UseInst)) {
       if (LI->isVolatile())
         return false;
 
@@ -564,11 +561,10 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
       // Reject if the stored value is not the pointer operand.
       if (SI->getPointerOperand() != Val)
         return false;
-    } else if (AtomicRMWInst *RMW = dyn_cast_or_null<AtomicRMWInst>(UseInst)) {
+    } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(UseInst)) {
       if (RMW->isVolatile())
         return false;
-    } else if (AtomicCmpXchgInst *CAS
-               = dyn_cast_or_null<AtomicCmpXchgInst>(UseInst)) {
+    } else if (AtomicCmpXchgInst *CAS = dyn_cast<AtomicCmpXchgInst>(UseInst)) {
       if (CAS->isVolatile())
         return false;
     }
@@ -583,6 +579,12 @@ bool AMDGPUPromoteAlloca::collectUsesWithPtrTypes(
       WorkList.push_back(ICmp);
     }
 
+    if (UseInst->getOpcode() == Instruction::AddrSpaceCast) {
+      // Don't collect the users of this.
+      WorkList.push_back(User);
+      continue;
+    }
+
     if (!User->getType()->isPointerTy())
       continue;
 
@@ -651,9 +653,11 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
   if (AMDGPU::isShader(ContainingFunction.getCallingConv()))
     return;
 
+  const AMDGPUSubtarget &ST =
+    TM->getSubtarget<AMDGPUSubtarget>(ContainingFunction);
   // FIXME: We should also try to get this value from the reqd_work_group_size
   // function attribute if it is available.
-  unsigned WorkGroupSize = AMDGPU::getMaximumWorkGroupSize(ContainingFunction);
+  unsigned WorkGroupSize = ST.getFlatWorkGroupSizes(ContainingFunction).second;
 
   const DataLayout &DL = Mod->getDataLayout();
 
@@ -741,7 +745,8 @@ void AMDGPUPromoteAlloca::handleAlloca(AllocaInst &I) {
         continue;
       }
 
-      // The operand's value should be corrected on its own.
+      // The operand's value should be corrected on its own and we don't want to
+      // touch the users.
       if (isa<AddrSpaceCastInst>(V))
         continue;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
index 40f639434507..ecd2ac72bf1b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPURuntimeMetadata.h
@@ -13,18 +13,13 @@
 ///
 /// Runtime requests certain information (metadata) about kernels to be able
 /// to execute the kernels and answer the queries about the kernels.
-/// The metadata is represented as a byte stream in an ELF section of a
-/// binary (code object). The byte stream consists of key-value pairs.
-/// Each key is an 8 bit unsigned integer. Each value can be an integer,
-/// a string, or a stream of key-value pairs. There are 3 levels of key-value
-/// pair streams. At the beginning of the ELF section is the top level
-/// key-value pair stream. A kernel-level key-value pair stream starts after
-/// encountering KeyKernelBegin and ends immediately before encountering
-/// KeyKernelEnd. A kernel-argument-level key-value pair stream starts
-/// after encountering KeyArgBegin and ends immediately before encountering
-/// KeyArgEnd. A kernel-level key-value pair stream can only appear in a top
-/// level key-value pair stream. A kernel-argument-level key-value pair stream
-/// can only appear in a kernel-level key-value pair stream.
+/// The metadata is represented as a note element in the .note ELF section of a
+/// binary (code object). The desc field of the note element is a YAML string
+/// consisting of key-value pairs. Each key is a string. Each value can be
+/// an integer, a string, or an YAML sequence. There are 3 levels of YAML maps.
+/// At the beginning of the YAML string is the module level YAML map. A
+/// kernel-level YAML map is in the amd.Kernels sequence. A
+/// kernel-argument-level map is in the amd.Args sequence.
 ///
 /// The format should be kept backward compatible. New enum values and bit
 /// fields should be appended at the end. It is suggested to bump up the
@@ -37,77 +32,64 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
 #define LLVM_LIB_TARGET_AMDGPU_AMDGPURUNTIMEMETADATA_H
 
-#include <stdint.h>
+#include <cstdint>
+#include <vector>
+#include <string>
 
 namespace AMDGPU {
 
 namespace RuntimeMD {
 
   // Version and revision of runtime metadata
-  const unsigned char MDVersion   = 1;
+  const unsigned char MDVersion   = 2;
   const unsigned char MDRevision  = 0;
 
-  // ELF section name containing runtime metadata
-  const char SectionName[] = ".AMDGPU.runtime_metadata";
-
-  // Enumeration values of keys in runtime metadata.
-  enum Key {
-    KeyNull                     = 0, // Place holder. Ignored when encountered
-    KeyMDVersion                = 1, // Runtime metadata version
-    KeyLanguage                 = 2, // Language
-    KeyLanguageVersion          = 3, // Language version
-    KeyKernelBegin              = 4, // Beginning of kernel-level stream
-    KeyKernelEnd                = 5, // End of kernel-level stream
-    KeyKernelName               = 6, // Kernel name
-    KeyArgBegin                 = 7, // Beginning of kernel-arg-level stream
-    KeyArgEnd                   = 8, // End of kernel-arg-level stream
-    KeyArgSize                  = 9, // Kernel arg size
-    KeyArgAlign                 = 10, // Kernel arg alignment
-    KeyArgTypeName              = 11, // Kernel type name
-    KeyArgName                  = 12, // Kernel name
-    KeyArgTypeKind              = 13, // Kernel argument type kind
-    KeyArgValueType             = 14, // Kernel argument value type
-    KeyArgAddrQual              = 15, // Kernel argument address qualifier
-    KeyArgAccQual               = 16, // Kernel argument access qualifier
-    KeyArgIsConst               = 17, // Kernel argument is const qualified
-    KeyArgIsRestrict            = 18, // Kernel argument is restrict qualified
-    KeyArgIsVolatile            = 19, // Kernel argument is volatile qualified
-    KeyArgIsPipe                = 20, // Kernel argument is pipe qualified
-    KeyReqdWorkGroupSize        = 21, // Required work group size
-    KeyWorkGroupSizeHint        = 22, // Work group size hint
-    KeyVecTypeHint              = 23, // Vector type hint
-    KeyKernelIndex              = 24, // Kernel index for device enqueue
-    KeySGPRs                    = 25, // Number of SGPRs
-    KeyVGPRs                    = 26, // Number of VGPRs
-    KeyMinWavesPerSIMD          = 27, // Minimum number of waves per SIMD
-    KeyMaxWavesPerSIMD          = 28, // Maximum number of waves per SIMD
-    KeyFlatWorkGroupSizeLimits  = 29, // Flat work group size limits
-    KeyMaxWorkGroupSize         = 30, // Maximum work group size
-    KeyNoPartialWorkGroups      = 31, // No partial work groups
-  };
-
-  enum Language : uint8_t {
-    OpenCL_C      = 0,
-    HCC           = 1,
-    OpenMP        = 2,
-    OpenCL_CPP    = 3,
-};
-
-  enum LanguageVersion : uint16_t {
-    V100          = 100,
-    V110          = 110,
-    V120          = 120,
-    V200          = 200,
-    V210          = 210,
-  };
+  // Name of keys for runtime metadata.
+  namespace KeyName {
+    const char MDVersion[]                = "amd.MDVersion";            // Runtime metadata version
+    const char Language[]                 = "amd.Language";             // Language
+    const char LanguageVersion[]          = "amd.LanguageVersion";      // Language version
+    const char Kernels[]                  = "amd.Kernels";              // Kernels
+    const char KernelName[]               = "amd.KernelName";           // Kernel name
+    const char Args[]                     = "amd.Args";                 // Kernel arguments
+    const char ArgSize[]                  = "amd.ArgSize";              // Kernel arg size
+    const char ArgAlign[]                 = "amd.ArgAlign";             // Kernel arg alignment
+    const char ArgTypeName[]              = "amd.ArgTypeName";          // Kernel type name
+    const char ArgName[]                  = "amd.ArgName";              // Kernel name
+    const char ArgKind[]                  = "amd.ArgKind";              // Kernel argument kind
+    const char ArgValueType[]             = "amd.ArgValueType";         // Kernel argument value type
+    const char ArgAddrQual[]              = "amd.ArgAddrQual";          // Kernel argument address qualifier
+    const char ArgAccQual[]               = "amd.ArgAccQual";           // Kernel argument access qualifier
+    const char ArgIsConst[]               = "amd.ArgIsConst";           // Kernel argument is const qualified
+    const char ArgIsRestrict[]            = "amd.ArgIsRestrict";        // Kernel argument is restrict qualified
+    const char ArgIsVolatile[]            = "amd.ArgIsVolatile";        // Kernel argument is volatile qualified
+    const char ArgIsPipe[]                = "amd.ArgIsPipe";            // Kernel argument is pipe qualified
+    const char ReqdWorkGroupSize[]        = "amd.ReqdWorkGroupSize";    // Required work group size
+    const char WorkGroupSizeHint[]        = "amd.WorkGroupSizeHint";    // Work group size hint
+    const char VecTypeHint[]              = "amd.VecTypeHint";          // Vector type hint
+    const char KernelIndex[]              = "amd.KernelIndex";          // Kernel index for device enqueue
+    const char NoPartialWorkGroups[]      = "amd.NoPartialWorkGroups";  // No partial work groups
+    const char PrintfInfo[]               = "amd.PrintfInfo";           // Prinf function call information
+    const char ArgActualAcc[]             = "amd.ArgActualAcc";         // The actual kernel argument access qualifier
+    const char ArgPointeeAlign[]          = "amd.ArgPointeeAlign";      // Alignment of pointee type
+  }
 
   namespace KernelArg {
-    enum TypeKind : uint8_t {
-      Value     = 0,
-      Pointer   = 1,
-      Image     = 2,
-      Sampler   = 3,
-      Queue     = 4,
+    enum Kind : uint8_t {
+      ByValue                 = 0,
+      GlobalBuffer            = 1,
+      DynamicSharedPointer    = 2,
+      Sampler                 = 3,
+      Image                   = 4,
+      Pipe                    = 5,
+      Queue                   = 6,
+      HiddenGlobalOffsetX     = 7,
+      HiddenGlobalOffsetY     = 8,
+      HiddenGlobalOffsetZ     = 9,
+      HiddenNone              = 10,
+      HiddenPrintfBuffer      = 11,
+      HiddenDefaultQueue      = 12,
+      HiddenCompletionAction  = 13,
     };
 
     enum ValueType : uint16_t {
@@ -125,13 +107,86 @@ namespace RuntimeMD {
       F64     = 11,
     };
 
+    // Avoid using 'None' since it conflicts with a macro in X11 header file.
     enum AccessQualifer : uint8_t {
-      None       = 0,
+      AccNone    = 0,
       ReadOnly   = 1,
       WriteOnly  = 2,
       ReadWrite  = 3,
     };
+
+    enum AddressSpaceQualifer : uint8_t {
+      Private    = 0,
+      Global     = 1,
+      Constant   = 2,
+      Local      = 3,
+      Generic    = 4,
+      Region     = 5,
+    };
   } // namespace KernelArg
+
+  // Invalid values are used to indicate an optional key should not be emitted.
+  const uint8_t INVALID_ADDR_QUAL     = 0xff;
+  const uint8_t INVALID_ACC_QUAL      = 0xff;
+  const uint32_t INVALID_KERNEL_INDEX = ~0U;
+
+  namespace KernelArg {
+    // In-memory representation of kernel argument information.
+    struct Metadata {
+      uint32_t Size;
+      uint32_t Align;
+      uint32_t PointeeAlign;
+      uint8_t Kind;
+      uint16_t ValueType;
+      std::string TypeName;
+      std::string Name;
+      uint8_t AddrQual;
+      uint8_t AccQual;
+      uint8_t IsVolatile;
+      uint8_t IsConst;
+      uint8_t IsRestrict;
+      uint8_t IsPipe;
+      Metadata() : Size(0), Align(0), PointeeAlign(0), Kind(0), ValueType(0),
+          AddrQual(INVALID_ADDR_QUAL), AccQual(INVALID_ACC_QUAL), IsVolatile(0),
+          IsConst(0), IsRestrict(0), IsPipe(0) {}
+    };
+  }
+
+  namespace Kernel {
+    // In-memory representation of kernel information.
+    struct Metadata {
+      std::string Name;
+      std::string Language;
+      std::vector<uint8_t> LanguageVersion;
+      std::vector<uint32_t> ReqdWorkGroupSize;
+      std::vector<uint32_t> WorkGroupSizeHint;
+      std::string VecTypeHint;
+      uint32_t KernelIndex;
+      uint8_t NoPartialWorkGroups;
+      std::vector<KernelArg::Metadata> Args;
+      Metadata() : KernelIndex(INVALID_KERNEL_INDEX), NoPartialWorkGroups(0) {}
+    };
+  }
+
+  namespace Program {
+    // In-memory representation of program information.
+    struct Metadata {
+      std::vector<uint8_t> MDVersionSeq;
+      std::vector<std::string> PrintfInfo;
+      std::vector<Kernel::Metadata> Kernels;
+
+      explicit Metadata(){}
+
+      // Construct from an YAML string.
+      explicit Metadata(const std::string &YAML);
+
+      // Convert to YAML string.
+      std::string toYAML();
+
+      // Convert from YAML string.
+      static Metadata fromYAML(const std::string &S);
+    };
+  }
 } // namespace RuntimeMD
 } // namespace AMDGPU
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 10fa9cf46737..74851aedbb21 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -13,14 +13,10 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUSubtarget.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
-#include "SIFrameLowering.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-#include "SIMachineFunctionInfo.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include <algorithm>
 
 using namespace llvm;
 
@@ -31,7 +27,7 @@ using namespace llvm;
 #define GET_SUBTARGETINFO_CTOR
 #include "AMDGPUGenSubtargetInfo.inc"
 
-AMDGPUSubtarget::~AMDGPUSubtarget() {}
+AMDGPUSubtarget::~AMDGPUSubtarget() = default;
 
 AMDGPUSubtarget &
 AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
@@ -56,6 +52,7 @@ AMDGPUSubtarget::initializeSubtargetDependencies(const Triple &TT,
   // denormals, but should be checked. Should we issue a warning somewhere
   // if someone tries to enable these?
   if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
+    FP16Denormals = false;
     FP32Denormals = false;
     FP64Denormals = false;
   }
@@ -81,10 +78,12 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     FastFMAF32(false),
     HalfRate64Ops(false),
 
+    FP16Denormals(false),
     FP32Denormals(false),
     FP64Denormals(false),
     FPExceptions(false),
     FlatForGlobal(false),
+    UnalignedScratchAccess(false),
     UnalignedBufferAccess(false),
 
     EnableXNACK(false),
@@ -107,6 +106,10 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     SGPRInitBug(false),
     HasSMemRealTime(false),
     Has16BitInsts(false),
+    HasMovrel(false),
+    HasVGPRIndexMode(false),
+    HasScalarStores(false),
+    HasInv2PiInlineImm(false),
     FlatAddressSpace(false),
 
     R600ALUInst(false),
@@ -114,6 +117,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
     CFALUBug(false),
     HasVertexCache(false),
     TexVTXClauseSize(0),
+    ScalarizeGlobal(false),
 
     FeatureDisable(false),
     InstrItins(getInstrItineraryForCPU(GPU)) {
@@ -178,6 +182,86 @@ unsigned AMDGPUSubtarget::getOccupancyWithLocalMemSize(uint32_t Bytes) const {
   return 1;
 }
 
+std::pair<unsigned, unsigned> AMDGPUSubtarget::getFlatWorkGroupSizes(
+  const Function &F) const {
+  // Default minimum/maximum flat work group sizes.
+  std::pair<unsigned, unsigned> Default =
+    AMDGPU::isCompute(F.getCallingConv()) ?
+      std::pair<unsigned, unsigned>(getWavefrontSize() * 2,
+                                    getWavefrontSize() * 4) :
+      std::pair<unsigned, unsigned>(1, getWavefrontSize());
+
+  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
+  // starts using "amdgpu-flat-work-group-size" attribute.
+  Default.second = AMDGPU::getIntegerAttribute(
+    F, "amdgpu-max-work-group-size", Default.second);
+  Default.first = std::min(Default.first, Default.second);
+
+  // Requested minimum/maximum flat work group sizes.
+  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
+    F, "amdgpu-flat-work-group-size", Default);
+
+  // Make sure requested minimum is less than requested maximum.
+  if (Requested.first > Requested.second)
+    return Default;
+
+  // Make sure requested values do not violate subtarget's specifications.
+  if (Requested.first < getMinFlatWorkGroupSize())
+    return Default;
+  if (Requested.second > getMaxFlatWorkGroupSize())
+    return Default;
+
+  return Requested;
+}
+
+std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
+  const Function &F) const {
+  // Default minimum/maximum number of waves per execution unit.
+  std::pair<unsigned, unsigned> Default(1, 0);
+
+  // Default/requested minimum/maximum flat work group sizes.
+  std::pair<unsigned, unsigned> FlatWorkGroupSizes = getFlatWorkGroupSizes(F);
+
+  // If minimum/maximum flat work group sizes were explicitly requested using
+  // "amdgpu-flat-work-group-size" attribute, then set default minimum/maximum
+  // number of waves per execution unit to values implied by requested
+  // minimum/maximum flat work group sizes.
+  unsigned MinImpliedByFlatWorkGroupSize =
+    getMaxWavesPerEU(FlatWorkGroupSizes.second);
+  bool RequestedFlatWorkGroupSize = false;
+
+  // TODO: Do not process "amdgpu-max-work-group-size" attribute once mesa
+  // starts using "amdgpu-flat-work-group-size" attribute.
+  if (F.hasFnAttribute("amdgpu-max-work-group-size") ||
+      F.hasFnAttribute("amdgpu-flat-work-group-size")) {
+    Default.first = MinImpliedByFlatWorkGroupSize;
+    RequestedFlatWorkGroupSize = true;
+  }
+
+  // Requested minimum/maximum number of waves per execution unit.
+  std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
+    F, "amdgpu-waves-per-eu", Default, true);
+
+  // Make sure requested minimum is less than requested maximum.
+  if (Requested.second && Requested.first > Requested.second)
+    return Default;
+
+  // Make sure requested values do not violate subtarget's specifications.
+  if (Requested.first < getMinWavesPerEU() ||
+      Requested.first > getMaxWavesPerEU())
+    return Default;
+  if (Requested.second > getMaxWavesPerEU())
+    return Default;
+
+  // Make sure requested values are compatible with values implied by requested
+  // minimum/maximum flat work group sizes.
+  if (RequestedFlatWorkGroupSize &&
+      Requested.first > MinImpliedByFlatWorkGroupSize)
+    return Default;
+
+  return Requested;
+}
+
 R600Subtarget::R600Subtarget(const Triple &TT, StringRef GPU, StringRef FS,
                              const TargetMachine &TM) :
   AMDGPUSubtarget(TT, GPU, FS, TM),
@@ -190,21 +274,7 @@ SISubtarget::SISubtarget(const Triple &TT, StringRef GPU, StringRef FS,
   AMDGPUSubtarget(TT, GPU, FS, TM),
   InstrInfo(*this),
   FrameLowering(TargetFrameLowering::StackGrowsUp, getStackAlignment(), 0),
-  TLInfo(TM, *this),
-  GISel() {}
-
-unsigned R600Subtarget::getStackEntrySize() const {
-  switch (getWavefrontSize()) {
-  case 16:
-    return 8;
-  case 32:
-    return hasCaymanISA() ? 4 : 8;
-  case 64:
-    return 4;
-  default:
-    llvm_unreachable("Illegal wavefront size.");
-  }
-}
+  TLInfo(TM, *this) {}
 
 void SISubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
                                       unsigned NumRegionInstrs) const {
@@ -227,15 +297,66 @@ bool SISubtarget::isVGPRSpillingEnabled(const Function& F) const {
   return EnableVGPRSpilling || !AMDGPU::isShader(F.getCallingConv());
 }
 
-unsigned SISubtarget::getAmdKernelCodeChipID() const {
-  switch (getGeneration()) {
-  case SEA_ISLANDS:
-    return 12;
-  default:
-    llvm_unreachable("ChipID unknown");
+unsigned SISubtarget::getKernArgSegmentSize(unsigned ExplicitArgBytes) const {
+  unsigned ImplicitBytes = getImplicitArgNumBytes();
+  if (ImplicitBytes == 0)
+    return ExplicitArgBytes;
+
+  unsigned Alignment = getAlignmentForImplicitArgPtr();
+  return alignTo(ExplicitArgBytes, Alignment) + ImplicitBytes;
+}
+
+unsigned SISubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
+  if (getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
+    if (SGPRs <= 80)
+      return 10;
+    if (SGPRs <= 88)
+      return 9;
+    if (SGPRs <= 100)
+      return 8;
+    return 7;
   }
+  if (SGPRs <= 48)
+    return 10;
+  if (SGPRs <= 56)
+    return 9;
+  if (SGPRs <= 64)
+    return 8;
+  if (SGPRs <= 72)
+    return 7;
+  if (SGPRs <= 80)
+    return 6;
+  return 5;
+}
+
+unsigned SISubtarget::getOccupancyWithNumVGPRs(unsigned VGPRs) const {
+  if (VGPRs <= 24)
+    return 10;
+  if (VGPRs <= 28)
+    return 9;
+  if (VGPRs <= 32)
+    return 8;
+  if (VGPRs <= 36)
+    return 7;
+  if (VGPRs <= 40)
+    return 6;
+  if (VGPRs <= 48)
+    return 5;
+  if (VGPRs <= 64)
+    return 4;
+  if (VGPRs <= 84)
+    return 3;
+  if (VGPRs <= 128)
+    return 2;
+  return 1;
 }
 
-AMDGPU::IsaVersion SISubtarget::getIsaVersion() const {
-  return AMDGPU::getIsaVersion(getFeatureBits());
+unsigned SISubtarget::getMaxNumSGPRs() const {
+  if (hasSGPRInitBug())
+    return SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+
+  if (getGeneration() >= VOLCANIC_ISLANDS)
+    return 102;
+
+  return 104;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 3fe61aa449e0..51ba501bddd1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -23,15 +23,22 @@
 #include "SIISelLowering.h"
 #include "SIFrameLowering.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
-#include "llvm/Target/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/MC/MCInstrItineraries.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <utility>
 
 #define GET_SUBTARGETINFO_HEADER
 #include "AMDGPUGenSubtargetInfo.inc"
 
 namespace llvm {
 
-class SIMachineFunctionInfo;
 class StringRef;
 
 class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo {
@@ -50,9 +57,13 @@ public:
     ISAVersion0_0_0,
     ISAVersion7_0_0,
     ISAVersion7_0_1,
+    ISAVersion7_0_2,
     ISAVersion8_0_0,
     ISAVersion8_0_1,
-    ISAVersion8_0_3
+    ISAVersion8_0_2,
+    ISAVersion8_0_3,
+    ISAVersion8_0_4,
+    ISAVersion8_1_0,
   };
 
 protected:
@@ -70,10 +81,12 @@ protected:
   bool HalfRate64Ops;
 
   // Dynamially set bits that enable features.
+  bool FP16Denormals;
   bool FP32Denormals;
   bool FP64Denormals;
   bool FPExceptions;
   bool FlatForGlobal;
+  bool UnalignedScratchAccess;
   bool UnalignedBufferAccess;
   bool EnableXNACK;
   bool DebuggerInsertNops;
@@ -97,40 +110,60 @@ protected:
   bool SGPRInitBug;
   bool HasSMemRealTime;
   bool Has16BitInsts;
+  bool HasMovrel;
+  bool HasVGPRIndexMode;
+  bool HasScalarStores;
+  bool HasInv2PiInlineImm;
   bool FlatAddressSpace;
   bool R600ALUInst;
   bool CaymanISA;
   bool CFALUBug;
   bool HasVertexCache;
   short TexVTXClauseSize;
+  bool ScalarizeGlobal;
 
   // Dummy feature to use for assembler in tablegen.
   bool FeatureDisable;
 
   InstrItineraryData InstrItins;
+  SelectionDAGTargetInfo TSInfo;
 
 public:
   AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
                   const TargetMachine &TM);
-  virtual ~AMDGPUSubtarget();
+  ~AMDGPUSubtarget() override;
+
   AMDGPUSubtarget &initializeSubtargetDependencies(const Triple &TT,
                                                    StringRef GPU, StringRef FS);
 
-  const AMDGPUInstrInfo *getInstrInfo() const override;
-  const AMDGPUFrameLowering *getFrameLowering() const override;
-  const AMDGPUTargetLowering *getTargetLowering() const override;
-  const AMDGPURegisterInfo *getRegisterInfo() const override;
+  const AMDGPUInstrInfo *getInstrInfo() const override = 0;
+  const AMDGPUFrameLowering *getFrameLowering() const override = 0;
+  const AMDGPUTargetLowering *getTargetLowering() const override = 0;
+  const AMDGPURegisterInfo *getRegisterInfo() const override = 0;
 
   const InstrItineraryData *getInstrItineraryData() const override {
     return &InstrItins;
   }
 
+  // Nothing implemented, just prevent crashes on use.
+  const SelectionDAGTargetInfo *getSelectionDAGInfo() const override {
+    return &TSInfo;
+  }
+
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
   bool isAmdHsaOS() const {
     return TargetTriple.getOS() == Triple::AMDHSA;
   }
 
+  bool isMesa3DOS() const {
+    return TargetTriple.getOS() == Triple::Mesa3D;
+  }
+
+  bool isOpenCLEnv() const {
+    return TargetTriple.getEnvironment() == Triple::OpenCL;
+  }
+
   Generation getGeneration() const {
     return Gen;
   }
@@ -151,6 +184,10 @@ public:
     return MaxPrivateElementSize;
   }
 
+  bool has16BitInsts() const {
+    return Has16BitInsts;
+  }
+
   bool hasHWFP64() const {
     return FP64;
   }
@@ -230,6 +267,10 @@ public:
     return DumpCode;
   }
 
+  bool enableIEEEBit(const MachineFunction &MF) const {
+    return AMDGPU::isCompute(MF.getFunction()->getCallingConv());
+  }
+
   /// Return the amount of LDS that can be used that will not restrict the
   /// occupancy lower than WaveCount.
   unsigned getMaxLocalMemSizeWithWaveCount(unsigned WaveCount) const;
@@ -238,6 +279,9 @@ public:
   /// the given LDS memory size is the only constraint.
   unsigned getOccupancyWithLocalMemSize(uint32_t Bytes) const;
 
+  bool hasFP16Denormals() const {
+    return FP16Denormals;
+  }
 
   bool hasFP32Denormals() const {
     return FP32Denormals;
@@ -259,22 +303,34 @@ public:
     return UnalignedBufferAccess;
   }
 
+  bool hasUnalignedScratchAccess() const {
+    return UnalignedScratchAccess;
+  }
+
   bool isXNACKEnabled() const {
     return EnableXNACK;
   }
 
-  unsigned getMaxWavesPerCU() const {
-    if (getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS)
-      return 10;
-
-    // FIXME: Not sure what this is for other subtagets.
-    return 8;
+  bool isAmdCodeObjectV2() const {
+    return isAmdHsaOS() || isMesa3DOS();
   }
 
   /// \brief Returns the offset in bytes from the start of the input buffer
   ///        of the first explicit kernel argument.
   unsigned getExplicitKernelArgOffset() const {
-    return isAmdHsaOS() ? 0 : 36;
+    return isAmdCodeObjectV2() ? 0 : 36;
+  }
+
+  unsigned getAlignmentForImplicitArgPtr() const {
+    return isAmdHsaOS() ? 8 : 4;
+  }
+
+  unsigned getImplicitArgNumBytes() const {
+    if (isMesa3DOS())
+      return 16;
+    if (isAmdHsaOS() && isOpenCLEnv())
+      return 32;
+    return 0;
   }
 
   unsigned getStackAlignment() const {
@@ -289,6 +345,92 @@ public:
   bool enableSubRegLiveness() const override {
     return true;
   }
+
+  /// \returns Number of execution units per compute unit supported by the
+  /// subtarget.
+  unsigned getEUsPerCU() const {
+    return 4;
+  }
+
+  /// \returns Maximum number of work groups per compute unit supported by the
+  /// subtarget and limited by given flat work group size.
+  unsigned getMaxWorkGroupsPerCU(unsigned FlatWorkGroupSize) const {
+    if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 8;
+    return getWavesPerWorkGroup(FlatWorkGroupSize) == 1 ? 40 : 16;
+  }
+
+  /// \returns Maximum number of waves per compute unit supported by the
+  /// subtarget without any kind of limitation.
+  unsigned getMaxWavesPerCU() const {
+    return getMaxWavesPerEU() * getEUsPerCU();
+  }
+
+  /// \returns Maximum number of waves per compute unit supported by the
+  /// subtarget and limited by given flat work group size.
+  unsigned getMaxWavesPerCU(unsigned FlatWorkGroupSize) const {
+    return getWavesPerWorkGroup(FlatWorkGroupSize);
+  }
+
+  /// \returns Minimum number of waves per execution unit supported by the
+  /// subtarget.
+  unsigned getMinWavesPerEU() const {
+    return 1;
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget without any kind of limitation.
+  unsigned getMaxWavesPerEU() const {
+    if (getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS)
+      return 8;
+    // FIXME: Need to take scratch memory into account.
+    return 10;
+  }
+
+  /// \returns Maximum number of waves per execution unit supported by the
+  /// subtarget and limited by given flat work group size.
+  unsigned getMaxWavesPerEU(unsigned FlatWorkGroupSize) const {
+    return alignTo(getMaxWavesPerCU(FlatWorkGroupSize), getEUsPerCU()) /
+      getEUsPerCU();
+  }
+
+  /// \returns Minimum flat work group size supported by the subtarget.
+  unsigned getMinFlatWorkGroupSize() const {
+    return 1;
+  }
+
+  /// \returns Maximum flat work group size supported by the subtarget.
+  unsigned getMaxFlatWorkGroupSize() const {
+    return 2048;
+  }
+
+  /// \returns Number of waves per work group given the flat work group size.
+  unsigned getWavesPerWorkGroup(unsigned FlatWorkGroupSize) const {
+    return alignTo(FlatWorkGroupSize, getWavefrontSize()) / getWavefrontSize();
+  }
+
+  void setScalarizeGlobalBehavior(bool b) { ScalarizeGlobal = b;}
+  bool getScalarizeGlobalBehavior() const { return ScalarizeGlobal;}
+
+  /// \returns Subtarget's default pair of minimum/maximum flat work group sizes
+  /// for function \p F, or minimum/maximum flat work group sizes explicitly
+  /// requested using "amdgpu-flat-work-group-size" attribute attached to
+  /// function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, or violate subtarget's specifications.
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes(const Function &F) const;
+
+  /// \returns Subtarget's default pair of minimum/maximum number of waves per
+  /// execution unit for function \p F, or minimum/maximum number of waves per
+  /// execution unit explicitly requested using "amdgpu-waves-per-eu" attribute
+  /// attached to function \p F.
+  ///
+  /// \returns Subtarget's default values if explicitly requested values cannot
+  /// be converted to integer, violate subtarget's specifications, or are not
+  /// compatible with minimum/maximum number of waves limited by flat work group
+  /// size, register usage, and/or lds usage.
+  std::pair<unsigned, unsigned> getWavesPerEU(const Function &F) const;
 };
 
 class R600Subtarget final : public AMDGPUSubtarget {
@@ -328,14 +470,14 @@ public:
   short getTexVTXClauseSize() const {
     return TexVTXClauseSize;
   }
-
-  unsigned getStackEntrySize() const;
 };
 
 class SISubtarget final : public AMDGPUSubtarget {
 public:
   enum {
-    FIXED_SGPR_COUNT_FOR_INIT_BUG = 80
+    // The closed Vulkan driver sets 96, which limits the wave count to 8 but
+    // doesn't spill SGPRs as much as when 80 is set.
+    FIXED_SGPR_COUNT_FOR_INIT_BUG = 96
   };
 
 private:
@@ -378,10 +520,6 @@ public:
 
   bool isVGPRSpillingEnabled(const Function& F) const;
 
-  unsigned getAmdKernelCodeChipID() const;
-
-  AMDGPU::IsaVersion getIsaVersion() const;
-
   unsigned getMaxNumUserSGPRs() const {
     return 16;
   }
@@ -394,8 +532,24 @@ public:
     return HasSMemRealTime;
   }
 
-  bool has16BitInsts() const {
-    return Has16BitInsts;
+  bool hasMovrel() const {
+    return HasMovrel;
+  }
+
+  bool hasVGPRIndexMode() const {
+    return HasVGPRIndexMode;
+  }
+
+  bool hasScalarCompareEq64() const {
+    return getGeneration() >= VOLCANIC_ISLANDS;
+  }
+
+  bool hasScalarStores() const {
+    return HasScalarStores;
+  }
+
+  bool hasInv2PiInlineImm() const {
+    return HasInv2PiInlineImm;
   }
 
   bool enableSIScheduler() const {
@@ -426,37 +580,28 @@ public:
   bool hasSGPRInitBug() const {
     return SGPRInitBug;
   }
-};
-
-
-inline const AMDGPUInstrInfo *AMDGPUSubtarget::getInstrInfo() const {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getInstrInfo();
-
-  return static_cast<const R600Subtarget *>(this)->getInstrInfo();
-}
 
-inline const AMDGPUFrameLowering *AMDGPUSubtarget::getFrameLowering() const  {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getFrameLowering();
+  bool has12DWordStoreHazard() const {
+    return getGeneration() != AMDGPUSubtarget::SOUTHERN_ISLANDS;
+  }
 
-  return static_cast<const R600Subtarget *>(this)->getFrameLowering();
-}
+  unsigned getKernArgSegmentSize(unsigned ExplictArgBytes) const;
 
-inline const AMDGPUTargetLowering *AMDGPUSubtarget::getTargetLowering() const  {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getTargetLowering();
+  /// Return the maximum number of waves per SIMD for kernels using \p SGPRs SGPRs
+  unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
 
-  return static_cast<const R600Subtarget *>(this)->getTargetLowering();
-}
+  /// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
+  unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
 
-inline const AMDGPURegisterInfo *AMDGPUSubtarget::getRegisterInfo() const  {
-  if (getGeneration() >= SOUTHERN_ISLANDS)
-    return static_cast<const SISubtarget *>(this)->getRegisterInfo();
+  /// \returns True if waitcnt instruction is needed before barrier instruction,
+  /// false otherwise.
+  bool needWaitcntBeforeBarrier() const {
+    return true;
+  }
 
-  return static_cast<const R600Subtarget *>(this)->getRegisterInfo();
-}
+  unsigned getMaxNumSGPRs() const;
+};
 
-} // End namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUSUBTARGET_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index b2d4e1144c75..d8a0c716279c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -18,28 +18,32 @@
 #include "AMDGPUCallLowering.h"
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPUTargetTransformInfo.h"
-#include "R600ISelLowering.h"
-#include "R600InstrInfo.h"
+#include "GCNSchedStrategy.h"
 #include "R600MachineScheduler.h"
-#include "SIISelLowering.h"
-#include "SIInstrInfo.h"
-
-#include "llvm/Analysis/Passes.h"
+#include "SIMachineScheduler.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
-#include "llvm/IR/Verifier.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/raw_os_ostream.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/Transforms/Vectorize.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include <memory>
 
 using namespace llvm;
 
@@ -64,13 +68,20 @@ static cl::opt<bool> EnableR600IfConvert(
 static cl::opt<bool> EnableLoadStoreVectorizer(
   "amdgpu-load-store-vectorizer",
   cl::desc("Enable load store vectorizer"),
+  cl::init(true),
+  cl::Hidden);
+
+// Option to to control global loads scalarization
+static cl::opt<bool> ScalarizeGlobal(
+  "amdgpu-scalarize-global-loads",
+  cl::desc("Enable global load scalarization"),
   cl::init(false),
   cl::Hidden);
 
 extern "C" void LLVMInitializeAMDGPUTarget() {
   // Register the target
-  RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget);
-  RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget);
+  RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
+  RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
 
   PassRegistry *PR = PassRegistry::getPassRegistry();
   initializeSILowerI1CopiesPass(*PR);
@@ -83,20 +94,36 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
   initializeAMDGPUAnnotateUniformValuesPass(*PR);
   initializeAMDGPUPromoteAllocaPass(*PR);
   initializeAMDGPUCodeGenPreparePass(*PR);
+  initializeAMDGPUUnifyMetadataPass(*PR);
   initializeSIAnnotateControlFlowPass(*PR);
-  initializeSIDebuggerInsertNopsPass(*PR);
   initializeSIInsertWaitsPass(*PR);
   initializeSIWholeQuadModePass(*PR);
   initializeSILowerControlFlowPass(*PR);
+  initializeSIInsertSkipsPass(*PR);
   initializeSIDebuggerInsertNopsPass(*PR);
+  initializeSIOptimizeExecMaskingPass(*PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
-  return make_unique<AMDGPUTargetObjectFile>();
+  return llvm::make_unique<AMDGPUTargetObjectFile>();
 }
 
 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
-  return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
+  return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
+}
+
+static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
+  return new SIScheduleDAGMI(C);
+}
+
+static ScheduleDAGInstrs *
+createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
+  ScheduleDAGMILive *DAG =
+      new ScheduleDAGMILive(C,
+                            llvm::make_unique<GCNMaxOccupancySchedStrategy>(C));
+  DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+  DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+  return DAG;
 }
 
 static MachineSchedRegistry
@@ -107,6 +134,11 @@ static MachineSchedRegistry
 SISchedRegistry("si", "Run SI's custom scheduler",
                 createSIMachineScheduler);
 
+static MachineSchedRegistry
+GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
+                             "Run GCN scheduler to maximize occupancy",
+                             createGCNMaxOccupancyMachineScheduler);
+
 static StringRef computeDataLayout(const Triple &TT) {
   if (TT.getArch() == Triple::r600) {
     // 32-bit pointers.
@@ -147,13 +179,11 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
                                          CodeGenOpt::Level OptLevel)
   : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
                       FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
-    TLOF(createTLOF(getTargetTriple())),
-    IntrinsicInfo() {
-  setRequiresStructuredCFG(true);
+    TLOF(createTLOF(getTargetTriple())) {
   initAsmInfo();
 }
 
-AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
+AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
 
 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
   Attribute GPUAttr = F.getFnAttribute("target-cpu");
@@ -169,6 +199,10 @@ StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
     FSAttr.getValueAsString();
 }
 
+void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
+  PM.add(createAMDGPUUnifyMetadataPass());
+}
+
 //===----------------------------------------------------------------------===//
 // R600 Target Machine (R600 -> Cayman)
 //===----------------------------------------------------------------------===//
@@ -178,7 +212,9 @@ R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
                                      TargetOptions Options,
                                      Optional<Reloc::Model> RM,
                                      CodeModel::Model CM, CodeGenOpt::Level OL)
-  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
+  : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
+  setRequiresStructuredCFG(true);
+}
 
 const R600Subtarget *R600TargetMachine::getSubtargetImpl(
   const Function &F) const {
@@ -206,13 +242,15 @@ const R600Subtarget *R600TargetMachine::getSubtargetImpl(
 
 #ifdef LLVM_BUILD_GLOBAL_ISEL
 namespace {
+
 struct SIGISelActualAccessor : public GISelAccessor {
   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
   const AMDGPUCallLowering *getCallLowering() const override {
     return CallLoweringInfo.get();
   }
 };
-} // End anonymous namespace.
+
+} // end anonymous namespace
 #endif
 
 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
@@ -248,6 +286,8 @@ const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
     I->setGISelAccessor(*GISel);
   }
 
+  I->setScalarizeGlobalBehavior(ScalarizeGlobal);
+
   return I.get();
 }
 
@@ -261,7 +301,6 @@ class AMDGPUPassConfig : public TargetPassConfig {
 public:
   AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
     : TargetPassConfig(TM, PM) {
-
     // Exceptions and StackMaps are not supported, so these passes will never do
     // anything.
     disablePass(&StackMapLivenessID);
@@ -272,6 +311,14 @@ public:
     return getTM<AMDGPUTargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+    DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
+    return DAG;
+  }
+
   void addEarlyCSEOrGVNPass();
   void addStraightLineScalarOptimizationPasses();
   void addIRPasses() override;
@@ -284,7 +331,7 @@ public:
 class R600PassConfig final : public AMDGPUPassConfig {
 public:
   R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
-    : AMDGPUPassConfig(TM, PM) { }
+    : AMDGPUPassConfig(TM, PM) {}
 
   ScheduleDAGInstrs *createMachineScheduler(
     MachineSchedContext *C) const override {
@@ -300,7 +347,7 @@ public:
 class GCNPassConfig final : public AMDGPUPassConfig {
 public:
   GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
-    : AMDGPUPassConfig(TM, PM) { }
+    : AMDGPUPassConfig(TM, PM) {}
 
   GCNTargetMachine &getGCNTargetMachine() const {
     return getTM<GCNTargetMachine>();
@@ -315,16 +362,19 @@ public:
   bool addInstSelector() override;
 #ifdef LLVM_BUILD_GLOBAL_ISEL
   bool addIRTranslator() override;
+  bool addLegalizeMachineIR() override;
   bool addRegBankSelect() override;
+  bool addGlobalInstructionSelect() override;
 #endif
   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
   void addPreRegAlloc() override;
+  void addPostRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
 };
 
-} // End of anonymous namespace
+} // end anonymous namespace
 
 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
   return TargetIRAnalysis([this](const Function &F) {
@@ -363,7 +413,7 @@ void AMDGPUPassConfig::addIRPasses() {
 
   // Function calls are not supported, so make sure we inline everything.
   addPass(createAMDGPUAlwaysInlinePass());
-  addPass(createAlwaysInlinerPass());
+  addPass(createAlwaysInlinerLegacyPass());
   // We need to add the barrier noop pass, otherwise adding the function
   // inlining pass will cause all of the PassConfigs passes to be run
   // one function at a time, which means if we have a nodule with two
@@ -380,9 +430,9 @@ void AMDGPUPassConfig::addIRPasses() {
 
     if (EnableSROA)
       addPass(createSROAPass());
-  }
 
-  addStraightLineScalarOptimizationPasses();
+    addStraightLineScalarOptimizationPasses();
+  }
 
   TargetPassConfig::addIRPasses();
 
@@ -415,7 +465,7 @@ bool AMDGPUPassConfig::addPreISel() {
 }
 
 bool AMDGPUPassConfig::addInstSelector() {
-  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine()));
+  addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
   return false;
 }
 
@@ -468,7 +518,7 @@ ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
   const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
   if (ST.enableSIScheduler())
     return createSIMachineScheduler(C);
-  return nullptr;
+  return createGCNMaxOccupancyMachineScheduler(C);
 }
 
 bool GCNPassConfig::addPreISel() {
@@ -498,6 +548,7 @@ void GCNPassConfig::addMachineSSAOptimization() {
   // XXX - Can we get away without running DeadMachineInstructionElim again?
   addPass(&SIFoldOperandsID);
   addPass(&DeadMachineInstructionElimID);
+  addPass(&SILoadStoreOptimizerID);
 }
 
 void GCNPassConfig::addIRPasses() {
@@ -520,43 +571,54 @@ bool GCNPassConfig::addIRTranslator() {
   return false;
 }
 
+bool GCNPassConfig::addLegalizeMachineIR() {
+  return false;
+}
+
 bool GCNPassConfig::addRegBankSelect() {
   return false;
 }
+
+bool GCNPassConfig::addGlobalInstructionSelect() {
+  return false;
+}
 #endif
 
 void GCNPassConfig::addPreRegAlloc() {
-  // This needs to be run directly before register allocation because
-  // earlier passes might recompute live intervals.
-  // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass
-  if (getOptLevel() > CodeGenOpt::None) {
-    insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
-  }
-
-  if (getOptLevel() > CodeGenOpt::None) {
-    // Don't do this with no optimizations since it throws away debug info by
-    // merging nonadjacent loads.
-
-    // This should be run after scheduling, but before register allocation. It
-    // also need extra copies to the address operand to be eliminated.
-
-    // FIXME: Move pre-RA and remove extra reg coalescer run.
-    insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID);
-    insertPass(&MachineSchedulerID, &RegisterCoalescerID);
-  }
-
   addPass(createSIShrinkInstructionsPass());
   addPass(createSIWholeQuadModePass());
 }
 
 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+  // FIXME: We have to disable the verifier here because of PHIElimination +
+  // TwoAddressInstructions disabling it.
+
+  // This must be run immediately after phi elimination and before
+  // TwoAddressInstructions, otherwise the processing of the tied operand of
+  // SI_ELSE will introduce a copy of the tied operand source after the else.
+  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
+
   TargetPassConfig::addFastRegAlloc(RegAllocPass);
 }
 
 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+  // This needs to be run directly before register allocation because earlier
+  // passes might recompute live intervals.
+  insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
+
+  // This must be run immediately after phi elimination and before
+  // TwoAddressInstructions, otherwise the processing of the tied operand of
+  // SI_ELSE will introduce a copy of the tied operand source after the else.
+  insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
+
   TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
 }
 
+void GCNPassConfig::addPostRegAlloc() {
+  addPass(&SIOptimizeExecMaskingID);
+  TargetPassConfig::addPostRegAlloc();
+}
+
 void GCNPassConfig::addPreSched2() {
 }
 
@@ -573,8 +635,9 @@ void GCNPassConfig::addPreEmitPass() {
 
   addPass(createSIInsertWaitsPass());
   addPass(createSIShrinkInstructionsPass());
-  addPass(createSILowerControlFlowPass());
+  addPass(&SIInsertSkipsPassID);
   addPass(createSIDebuggerInsertNopsPass());
+  addPass(&BranchRelaxationPassID);
 }
 
 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
index b0eb3a9a15f7..9496773a073f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.h
@@ -17,6 +17,13 @@
 
 #include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Target/TargetMachine.h"
+#include <memory>
 
 namespace llvm {
 
@@ -37,10 +44,10 @@ public:
                       StringRef FS, TargetOptions Options,
                       Optional<Reloc::Model> RM, CodeModel::Model CM,
                       CodeGenOpt::Level OL);
-  ~AMDGPUTargetMachine();
+  ~AMDGPUTargetMachine() override;
 
   const AMDGPUSubtarget *getSubtargetImpl() const;
-  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override;
+  const AMDGPUSubtarget *getSubtargetImpl(const Function &) const override = 0;
 
   const AMDGPUIntrinsicInfo *getIntrinsicInfo() const override {
     return &IntrinsicInfo;
@@ -50,6 +57,7 @@ public:
   TargetLoweringObjectFile *getObjFileLowering() const override {
     return TLOF.get();
   }
+  void addEarlyAsPossiblePasses(PassManagerBase &PM) override;
 };
 
 //===----------------------------------------------------------------------===//
@@ -90,13 +98,6 @@ public:
   const SISubtarget *getSubtargetImpl(const Function &) const override;
 };
 
-inline const AMDGPUSubtarget *AMDGPUTargetMachine::getSubtargetImpl(
-  const Function &F) const {
-  if (getTargetTriple().getArch() == Triple::amdgcn)
-    return static_cast<const GCNTargetMachine *>(this)->getSubtargetImpl(F);
-  return static_cast<const R600TargetMachine *>(this)->getSubtargetImpl(F);
-}
+} // end namespace llvm
 
-} // End namespace llvm
-
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETMACHINE_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
index 03d1e2c764de..1fddc88a705a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp
@@ -9,10 +9,10 @@
 
 #include "AMDGPUTargetObjectFile.h"
 #include "AMDGPU.h"
-#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/ELF.h"
+#include "Utils/AMDGPUBaseInfo.h"
 
 using namespace llvm;
 
@@ -20,12 +20,11 @@ using namespace llvm;
 // Generic Object File
 //===----------------------------------------------------------------------===//
 
-MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
-                                                          SectionKind Kind,
-                                                          Mangler &Mang,
-                                                const TargetMachine &TM) const {
-  if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV))
+MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GO) &&
+      AMDGPU::shouldEmitConstantsToTextSection(TM.getTargetTriple()))
     return TextSection;
 
-  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM);
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
index f530e0952a74..de327786dff6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h
@@ -23,8 +23,7 @@ namespace llvm {
 
 class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF {
   public:
-    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+    MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 };
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 3d630fe3ea9d..a1a352642242 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -80,7 +80,7 @@ unsigned AMDGPUTTIImpl::getRegisterBitWidth(bool Vector) {
   return Vector ? 0 : 32;
 }
 
-unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) {
+unsigned AMDGPUTTIImpl::getLoadStoreVecRegBitWidth(unsigned AddrSpace) const {
   switch (AddrSpace) {
   case AMDGPUAS::GLOBAL_ADDRESS:
   case AMDGPUAS::CONSTANT_ADDRESS:
@@ -241,6 +241,7 @@ static bool isIntrinsicSourceOfDivergence(const TargetIntrinsicInfo *TII,
   case Intrinsic::amdgcn_workitem_id_x:
   case Intrinsic::amdgcn_workitem_id_y:
   case Intrinsic::amdgcn_workitem_id_z:
+  case Intrinsic::amdgcn_interp_mov:
   case Intrinsic::amdgcn_interp_p1:
   case Intrinsic::amdgcn_interp_p2:
   case Intrinsic::amdgcn_mbcnt_hi:
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
index a82a07458086..1177007644ff 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.h
@@ -64,13 +64,6 @@ public:
       ST(TM->getSubtargetImpl(F)),
       TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  AMDGPUTTIImpl(const AMDGPUTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  AMDGPUTTIImpl(AMDGPUTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   bool hasBranchDivergence() { return true; }
 
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
@@ -82,7 +75,7 @@ public:
 
   unsigned getNumberOfRegisters(bool Vector);
   unsigned getRegisterBitWidth(bool Vector);
-  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace);
+  unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
   unsigned getMaxInterleaveFactor(unsigned VF);
 
   int getArithmeticInstrCost(
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
new file mode 100644
index 000000000000..bf501a1e8405
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDGPUUnifyMetadata.cpp
@@ -0,0 +1,149 @@
+//===-- AMDGPUUnifyMetadata.cpp - Unify OpenCL metadata -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file
+// \brief This pass that unifies multiple OpenCL metadata due to linking.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+namespace {
+  namespace kOCLMD {
+    const char SpirVer[]            = "opencl.spir.version";
+    const char OCLVer[]             = "opencl.ocl.version";
+    const char UsedExt[]            = "opencl.used.extensions";
+    const char UsedOptCoreFeat[]    = "opencl.used.optional.core.features";
+    const char CompilerOptions[]    = "opencl.compiler.options";
+    const char LLVMIdent[]          = "llvm.ident";
+  }
+
+  /// \brief Unify multiple OpenCL metadata due to linking.
+  class AMDGPUUnifyMetadata : public FunctionPass {
+  public:
+    static char ID;
+    explicit AMDGPUUnifyMetadata() : FunctionPass(ID) {};
+
+  private:
+    // This should really be a module pass but we have to run it as early
+    // as possible, so given function passes are executed first and
+    // TargetMachine::addEarlyAsPossiblePasses() expects only function passes
+    // it has to be a function pass.
+    virtual bool runOnModule(Module &M);
+
+    // \todo: Convert to a module pass.
+    virtual bool runOnFunction(Function &F);
+
+    /// \brief Unify version metadata.
+    /// \return true if changes are made.
+    /// Assume the named metadata has operands each of which is a pair of
+    /// integer constant, e.g.
+    /// !Name = {!n1, !n2}
+    /// !n1 = {i32 1, i32 2}
+    /// !n2 = {i32 2, i32 0}
+    /// Keep the largest version as the sole operand if PickFirst is false.
+    /// Otherwise pick it from the first value, representing kernel module.
+    bool unifyVersionMD(Module &M, StringRef Name, bool PickFirst) {
+      auto NamedMD = M.getNamedMetadata(Name);
+      if (!NamedMD || NamedMD->getNumOperands() <= 1)
+        return false;
+      MDNode *MaxMD = nullptr;
+      auto MaxVer = 0U;
+      for (const auto &VersionMD : NamedMD->operands()) {
+        assert(VersionMD->getNumOperands() == 2);
+        auto CMajor = mdconst::extract<ConstantInt>(VersionMD->getOperand(0));
+        auto VersionMajor = CMajor->getZExtValue();
+        auto CMinor = mdconst::extract<ConstantInt>(VersionMD->getOperand(1));
+        auto VersionMinor = CMinor->getZExtValue();
+        auto Ver = (VersionMajor * 100) + (VersionMinor * 10);
+        if (Ver > MaxVer) {
+          MaxVer = Ver;
+          MaxMD = VersionMD;
+        }
+        if (PickFirst)
+          break;
+      }
+      NamedMD->eraseFromParent();
+      NamedMD = M.getOrInsertNamedMetadata(Name);
+      NamedMD->addOperand(MaxMD);
+      return true;
+    }
+
+  /// \brief Unify version metadata.
+  /// \return true if changes are made.
+  /// Assume the named metadata has operands each of which is a list e.g.
+  /// !Name = {!n1, !n2}
+  /// !n1 = !{!"cl_khr_fp16", {!"cl_khr_fp64"}}
+  /// !n2 = !{!"cl_khr_image"}
+  /// Combine it into a single list with unique operands.
+  bool unifyExtensionMD(Module &M, StringRef Name) {
+    auto NamedMD = M.getNamedMetadata(Name);
+    if (!NamedMD || NamedMD->getNumOperands() == 1)
+      return false;
+
+    SmallVector<Metadata *, 4> All;
+    for (const auto &MD : NamedMD->operands())
+      for (const auto &Op : MD->operands())
+        if (std::find(All.begin(), All.end(), Op.get()) == All.end())
+          All.push_back(Op.get());
+
+    NamedMD->eraseFromParent();
+    NamedMD = M.getOrInsertNamedMetadata(Name);
+    for (const auto &MD : All)
+      NamedMD->addOperand(MDNode::get(M.getContext(), MD));
+
+    return true;
+  }
+};
+
+} // end anonymous namespace
+
+char AMDGPUUnifyMetadata::ID = 0;
+
+char &llvm::AMDGPUUnifyMetadataID = AMDGPUUnifyMetadata::ID;
+
+INITIALIZE_PASS(AMDGPUUnifyMetadata, "amdgpu-unify-metadata",
+                "Unify multiple OpenCL metadata due to linking",
+                false, false)
+
+FunctionPass* llvm::createAMDGPUUnifyMetadataPass() {
+  return new AMDGPUUnifyMetadata();
+}
+
+bool AMDGPUUnifyMetadata::runOnModule(Module &M) {
+  const char* Vers[] = {
+      kOCLMD::SpirVer,
+      kOCLMD::OCLVer
+  };
+  const char* Exts[] = {
+      kOCLMD::UsedExt,
+      kOCLMD::UsedOptCoreFeat,
+      kOCLMD::CompilerOptions,
+      kOCLMD::LLVMIdent
+  };
+
+  bool Changed = false;
+
+  for (auto &I : Vers)
+    Changed |= unifyVersionMD(M, I, true);
+
+  for (auto &I : Exts)
+    Changed |= unifyExtensionMD(M, I);
+
+  return Changed;
+}
+
+bool AMDGPUUnifyMetadata::runOnFunction(Function &F) {
+  return runOnModule(*F.getParent());
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
index 21de76396b16..7faeccdc5df3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AMDILCFGStructurizer.cpp
@@ -18,7 +18,6 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -139,16 +138,15 @@ public:
     initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry());
   }
 
-   const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AMDGPU Control Flow Graph structurizer Pass";
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addPreserved<MachineFunctionAnalysis>();
-    AU.addRequired<MachineFunctionAnalysis>();
     AU.addRequired<MachineDominatorTree>();
     AU.addRequired<MachinePostDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
   }
 
   /// Perform the CFG structurization
@@ -220,7 +218,8 @@ protected:
   bool needMigrateBlock(MachineBasicBlock *MBB) const;
 
   // Utility Functions
-  void reversePredicateSetter(MachineBasicBlock::iterator I);
+  void reversePredicateSetter(MachineBasicBlock::iterator I,
+                              MachineBasicBlock &MBB);
   /// Compute the reversed DFS post order of Blocks
   void orderBlocks(MachineFunction *MF);
 
@@ -422,26 +421,24 @@ bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
 }
 
 void AMDGPUCFGStructurizer::reversePredicateSetter(
-    MachineBasicBlock::iterator I) {
-  assert(static_cast<MachineInstr *>(I) && "Expected valid iterator");
+    MachineBasicBlock::iterator I, MachineBasicBlock &MBB) {
+  assert(I.isValid() && "Expected valid iterator");
   for (;; --I) {
+    if (I == MBB.end())
+      continue;
     if (I->getOpcode() == AMDGPU::PRED_X) {
-      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
-      case OPCODE_IS_ZERO_INT:
-        static_cast<MachineInstr *>(I)->getOperand(2)
-            .setImm(OPCODE_IS_NOT_ZERO_INT);
+      switch (I->getOperand(2).getImm()) {
+      case AMDGPU::PRED_SETE_INT:
+        I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT);
         return;
-      case OPCODE_IS_NOT_ZERO_INT:
-        static_cast<MachineInstr *>(I)->getOperand(2)
-            .setImm(OPCODE_IS_ZERO_INT);
+      case AMDGPU::PRED_SETNE_INT:
+        I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT);
         return;
-      case OPCODE_IS_ZERO:
-        static_cast<MachineInstr *>(I)->getOperand(2)
-            .setImm(OPCODE_IS_NOT_ZERO);
+      case AMDGPU::PRED_SETE:
+        I->getOperand(2).setImm(AMDGPU::PRED_SETNE);
         return;
-      case OPCODE_IS_NOT_ZERO:
-        static_cast<MachineInstr *>(I)->getOperand(2)
-            .setImm(OPCODE_IS_ZERO);
+      case AMDGPU::PRED_SETNE:
+        I->getOperand(2).setImm(AMDGPU::PRED_SETE);
         return;
       default:
         llvm_unreachable("PRED_X Opcode invalid!");
@@ -841,7 +838,7 @@ bool AMDGPUCFGStructurizer::run() {
     } //while, "one iteration" over the function.
 
     MachineBasicBlock *EntryMBB =
-        &*GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
+        *GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
     if (EntryMBB->succ_size() == 0) {
       Finish = true;
       DEBUG(
@@ -864,7 +861,7 @@ bool AMDGPUCFGStructurizer::run() {
   } while (!Finish && MakeProgress);
 
   // Misc wrap up to maintain the consistency of the Function representation.
-  wrapup(&*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
+  wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
 
   // Detach retired Block, release memory.
   for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
@@ -908,9 +905,9 @@ void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
 
   //walk through all the block in func to check for unreachable
   typedef GraphTraits<MachineFunction *> GTM;
-  MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
+  auto It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
   for (; It != E; ++It) {
-    MachineBasicBlock *MBB = &(*It);
+    MachineBasicBlock *MBB = *It;
     SccNum = getSCCNum(MBB);
     if (SccNum == INVALIDSCCNUM)
       dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
@@ -995,7 +992,7 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
     // Triangle pattern, true is empty
     // We reverse the predicate to make a triangle, empty false pattern;
     std::swap(TrueMBB, FalseMBB);
-    reversePredicateSetter(MBB->end());
+    reversePredicateSetter(MBB->end(), *MBB);
     LandBlk = FalseMBB;
     FalseMBB = nullptr;
   } else if (FalseMBB->succ_size() == 1
@@ -1505,7 +1502,7 @@ void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
   MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
   MachineBasicBlock::iterator I = BranchMI;
   if (TrueBranch != LandMBB)
-    reversePredicateSetter(I);
+    reversePredicateSetter(I, *I->getParent());
   insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
   insertInstrBefore(I, AMDGPU::BREAK);
   insertInstrBefore(I, AMDGPU::ENDIF);
diff --git a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index efcf1b23adaa..a6c31629e7c4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -15,38 +15,62 @@
 #include "Utils/AMDKernelCodeTUtils.h"
 #include "Utils/AMDGPUAsmUtils.h"
 #include "llvm/ADT/APFloat.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/APInt.h"
 #include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <map>
+#include <memory>
+#include <string>
+#include <vector>
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 namespace {
 
-struct OptionalOperand;
+class AMDGPUAsmParser;
 
 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_TTMP, IS_SPECIAL };
 
+//===----------------------------------------------------------------------===//
+// Operand
+//===----------------------------------------------------------------------===//
+
 class AMDGPUOperand : public MCParsedAsmOperand {
   enum KindTy {
     Token,
@@ -56,16 +80,18 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   } Kind;
 
   SMLoc StartLoc, EndLoc;
+  const AMDGPUAsmParser *AsmParser;
 
 public:
-  AMDGPUOperand(enum KindTy K) : MCParsedAsmOperand(), Kind(K) {}
+  AMDGPUOperand(enum KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
+    : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
 
   typedef std::unique_ptr<AMDGPUOperand> Ptr;
 
   struct Modifiers {
-    bool Abs;
-    bool Neg;
-    bool Sext;
+    bool Abs = false;
+    bool Neg = false;
+    bool Sext = false;
 
     bool hasFPModifiers() const { return Abs || Neg; }
     bool hasIntModifiers() const { return Sext; }
@@ -126,8 +152,15 @@ public:
     ImmTyDA,
     ImmTyR128,
     ImmTyLWE,
+    ImmTyExpTgt,
+    ImmTyExpCompr,
+    ImmTyExpVM,
     ImmTyHwreg,
+    ImmTyOff,
     ImmTySendMsg,
+    ImmTyInterpSlot,
+    ImmTyInterpAttr,
+    ImmTyAttrChan
   };
 
   struct TokOp {
@@ -136,18 +169,16 @@ public:
   };
 
   struct ImmOp {
-    bool IsFPImm;
-    ImmTy Type;
     int64_t Val;
+    ImmTy Type;
+    bool IsFPImm;
     Modifiers Mods;
   };
 
   struct RegOp {
     unsigned RegNo;
-    Modifiers Mods;
-    const MCRegisterInfo *TRI;
-    const MCSubtargetInfo *STI;
     bool IsForcedVOP3;
+    Modifiers Mods;
   };
 
   union {
@@ -175,20 +206,8 @@ public:
     return Kind == Immediate;
   }
 
-  bool isInlinableImm() const {
-    if (!isImmTy(ImmTyNone)) {
-      // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
-      return false;
-    }
-    // TODO: We should avoid using host float here. It would be better to
-    // check the float bit values which is what a few other places do.
-    // We've had bot failures before due to weird NaN support on mips hosts.
-    const float F = BitsToFloat(Imm.Val);
-    // TODO: Add 1/(2*pi) for VI
-    return (Imm.Val <= 64 && Imm.Val >= -16) ||
-           (F == 0.0 || F == 0.5 || F == -0.5 || F == 1.0 || F == -1.0 ||
-           F == 2.0 || F == -2.0 || F == 4.0 || F == -4.0);
-  }
+  bool isInlinableImm(MVT type) const;
+  bool isLiteralImm(MVT type) const;
 
   bool isRegKind() const {
     return Kind == Register;
@@ -198,18 +217,46 @@ public:
     return isRegKind() && !Reg.Mods.hasModifiers();
   }
 
-  bool isRegOrImmWithInputMods() const {
-    return isRegKind() || isInlinableImm();
+  bool isRegOrImmWithInputMods(MVT type) const {
+    return isRegKind() || isInlinableImm(type);
+  }
+
+  bool isRegOrImmWithInt16InputMods() const {
+    return isRegOrImmWithInputMods(MVT::i16);
+  }
+
+  bool isRegOrImmWithInt32InputMods() const {
+    return isRegOrImmWithInputMods(MVT::i32);
+  }
+
+  bool isRegOrImmWithInt64InputMods() const {
+    return isRegOrImmWithInputMods(MVT::i64);
+  }
+
+  bool isRegOrImmWithFP16InputMods() const {
+    return isRegOrImmWithInputMods(MVT::f16);
+  }
+
+  bool isRegOrImmWithFP32InputMods() const {
+    return isRegOrImmWithInputMods(MVT::f32);
+  }
+
+  bool isRegOrImmWithFP64InputMods() const {
+    return isRegOrImmWithInputMods(MVT::f64);
+  }
+
+  bool isVReg32OrOff() const {
+    return isOff() || isRegClass(AMDGPU::VGPR_32RegClassID);
   }
 
   bool isImmTy(ImmTy ImmT) const {
     return isImm() && Imm.Type == ImmT;
   }
-  
+
   bool isImmModifier() const {
     return isImm() && Imm.Type != ImmTyNone;
   }
-  
+
   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
   bool isDMask() const { return isImmTy(ImmTyDMask); }
@@ -217,6 +264,10 @@ public:
   bool isDA() const { return isImmTy(ImmTyDA); }
   bool isR128() const { return isImmTy(ImmTyUNorm); }
   bool isLWE() const { return isImmTy(ImmTyLWE); }
+  bool isOff() const { return isImmTy(ImmTyOff); }
+  bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
+  bool isExpVM() const { return isImmTy(ImmTyExpVM); }
+  bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
   bool isOffen() const { return isImmTy(ImmTyOffen); }
   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
@@ -234,7 +285,10 @@ public:
   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
-  
+  bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
+  bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
+  bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
+
   bool isMod() const {
     return isClampSI() || isOModSI();
   }
@@ -243,47 +297,112 @@ public:
     return isReg() || isImm();
   }
 
-  bool isRegClass(unsigned RCID) const {
-    return isReg() && Reg.TRI->getRegClass(RCID).contains(getReg());
+  bool isRegClass(unsigned RCID) const;
+
+  bool isSCSrcB16() const {
+    return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i16);
+  }
+
+  bool isSCSrcB32() const {
+    return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::i32);
+  }
+
+  bool isSCSrcB64() const {
+    return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::i64);
+  }
+
+  bool isSCSrcF16() const {
+    return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f16);
+  }
+
+  bool isSCSrcF32() const {
+    return isRegClass(AMDGPU::SReg_32RegClassID) || isInlinableImm(MVT::f32);
   }
 
-  bool isSCSrc32() const {
-    return isInlinableImm() || isRegClass(AMDGPU::SReg_32RegClassID);
+  bool isSCSrcF64() const {
+    return isRegClass(AMDGPU::SReg_64RegClassID) || isInlinableImm(MVT::f64);
   }
 
-  bool isSCSrc64() const {
-    return isInlinableImm() || isRegClass(AMDGPU::SReg_64RegClassID);
+  bool isSSrcB32() const {
+    return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
   }
 
-  bool isSSrc32() const {
-    return isImm() || isSCSrc32() || isExpr();
+  bool isSSrcB16() const {
+    return isSCSrcB16() || isLiteralImm(MVT::i16);
   }
 
-  bool isSSrc64() const {
+  bool isSSrcB64() const {
     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
     // See isVSrc64().
-    return isImm() || isSCSrc64();
+    return isSCSrcB64() || isLiteralImm(MVT::i64);
+  }
+
+  bool isSSrcF32() const {
+    return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
+  }
+
+  bool isSSrcF64() const {
+    return isSCSrcB64() || isLiteralImm(MVT::f64);
+  }
+
+  bool isSSrcF16() const {
+    return isSCSrcB16() || isLiteralImm(MVT::f16);
+  }
+
+  bool isVCSrcB32() const {
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i32);
+  }
+
+  bool isVCSrcB64() const {
+    return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::i64);
+  }
+
+  bool isVCSrcB16() const {
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::i16);
+  }
+
+  bool isVCSrcF32() const {
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f32);
+  }
+
+  bool isVCSrcF64() const {
+    return isRegClass(AMDGPU::VS_64RegClassID) || isInlinableImm(MVT::f64);
+  }
+
+  bool isVCSrcF16() const {
+    return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(MVT::f16);
+  }
+
+  bool isVSrcB32() const {
+    return isVCSrcF32() || isLiteralImm(MVT::i32);
   }
 
-  bool isVCSrc32() const {
-    return isInlinableImm() || isRegClass(AMDGPU::VS_32RegClassID);
+  bool isVSrcB64() const {
+    return isVCSrcF64() || isLiteralImm(MVT::i64);
   }
 
-  bool isVCSrc64() const {
-    return isInlinableImm() || isRegClass(AMDGPU::VS_64RegClassID);
+  bool isVSrcB16() const {
+    return isVCSrcF16() || isLiteralImm(MVT::i16);
   }
 
-  bool isVSrc32() const {
-    return isImm() || isVCSrc32();
+  bool isVSrcF32() const {
+    return isVCSrcF32() || isLiteralImm(MVT::f32);
   }
 
-  bool isVSrc64() const {
-    // TODO: Check if the 64-bit value (coming from assembly source) can be
-    // narrowed to 32 bits (in the instruction stream). That require knowledge
-    // of instruction type (unsigned/signed, floating or "untyped"/B64),
-    // see [AMD GCN3 ISA 6.3.1].
-    // TODO: How 64-bit values are formed from 32-bit literals in _B64 insns?
-    return isImm() || isVCSrc64();
+  bool isVSrcF64() const {
+    return isVCSrcF64() || isLiteralImm(MVT::f64);
+  }
+
+  bool isVSrcF16() const {
+    return isVCSrcF16() || isLiteralImm(MVT::f16);
+  }
+
+  bool isKImmFP32() const {
+    return isLiteralImm(MVT::f32);
+  }
+
+  bool isKImmFP16() const {
+    return isLiteralImm(MVT::f16);
   }
 
   bool isMem() const override {
@@ -301,9 +420,11 @@ public:
   bool isSWaitCnt() const;
   bool isHwreg() const;
   bool isSendMsg() const;
-  bool isSMRDOffset() const;
+  bool isSMRDOffset8() const;
+  bool isSMRDOffset20() const;
   bool isSMRDLiteralOffset() const;
   bool isDPPCtrl() const;
+  bool isGPRIdxMode() const;
 
   StringRef getExpressionAsToken() const {
     assert(isExpr());
@@ -311,7 +432,6 @@ public:
     return S->getSymbol().getName();
   }
 
-
   StringRef getToken() const {
     assert(isToken());
 
@@ -359,7 +479,7 @@ public:
   bool hasModifiers() const {
     return getModifiers().hasModifiers();
   }
-  
+
   bool hasFPModifiers() const {
     return getModifiers().hasFPModifiers();
   }
@@ -368,30 +488,23 @@ public:
     return getModifiers().hasIntModifiers();
   }
 
-  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const {
-    if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers()) {
-      // Apply modifiers to immediate value 
-      int64_t Val = Imm.Val;
-      bool Negate = Imm.Mods.Neg; // Only negate can get here
-      if (Imm.IsFPImm) {
-        APFloat F(BitsToFloat(Val));
-        if (Negate) {
-          F.changeSign();
-        }
-        Val = F.bitcastToAPInt().getZExtValue();
-      } else {
-        Val = Negate ? -Val : Val;
-      }
-      Inst.addOperand(MCOperand::createImm(Val));
-    } else {
-      Inst.addOperand(MCOperand::createImm(getImm()));
-    }
+  void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
+
+  void addLiteralImmOperand(MCInst &Inst, int64_t Val) const;
+
+  template <unsigned Bitwidth>
+  void addKImmFPOperands(MCInst &Inst, unsigned N) const;
+
+  void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
+    addKImmFPOperands<16>(Inst, N);
   }
 
-  void addRegOperands(MCInst &Inst, unsigned N) const {
-    Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), *Reg.STI)));
+  void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
+    addKImmFPOperands<32>(Inst, N);
   }
 
+  void addRegOperands(MCInst &Inst, unsigned N) const;
+
   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
     if (isRegKind())
       addRegOperands(Inst, N);
@@ -430,7 +543,7 @@ public:
     }
   }
 
-  void printImmTy(raw_ostream& OS, ImmTy Type) const {
+  static void printImmTy(raw_ostream& OS, ImmTy Type) {
     switch (Type) {
     case ImmTyNone: OS << "None"; break;
     case ImmTyGDS: OS << "GDS"; break;
@@ -458,8 +571,15 @@ public:
     case ImmTyDA: OS << "DA"; break;
     case ImmTyR128: OS << "R128"; break;
     case ImmTyLWE: OS << "LWE"; break;
+    case ImmTyOff: OS << "Off"; break;
+    case ImmTyExpTgt: OS << "ExpTgt"; break;
+    case ImmTyExpCompr: OS << "ExpCompr"; break;
+    case ImmTyExpVM: OS << "ExpVM"; break;
     case ImmTyHwreg: OS << "Hwreg"; break;
     case ImmTySendMsg: OS << "SendMsg"; break;
+    case ImmTyInterpSlot: OS << "InterpSlot"; break;
+    case ImmTyInterpAttr: OS << "InterpAttr"; break;
+    case ImmTyAttrChan: OS << "AttrChan"; break;
     }
   }
 
@@ -484,22 +604,24 @@ public:
     }
   }
 
-  static AMDGPUOperand::Ptr CreateImm(int64_t Val, SMLoc Loc,
+  static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
+                                      int64_t Val, SMLoc Loc,
                                       enum ImmTy Type = ImmTyNone,
                                       bool IsFPImm = false) {
-    auto Op = llvm::make_unique<AMDGPUOperand>(Immediate);
+    auto Op = llvm::make_unique<AMDGPUOperand>(Immediate, AsmParser);
     Op->Imm.Val = Val;
     Op->Imm.IsFPImm = IsFPImm;
     Op->Imm.Type = Type;
-    Op->Imm.Mods = {false, false, false};
+    Op->Imm.Mods = Modifiers();
     Op->StartLoc = Loc;
     Op->EndLoc = Loc;
     return Op;
   }
 
-  static AMDGPUOperand::Ptr CreateToken(StringRef Str, SMLoc Loc,
+  static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
+                                        StringRef Str, SMLoc Loc,
                                         bool HasExplicitEncodingSize = true) {
-    auto Res = llvm::make_unique<AMDGPUOperand>(Token);
+    auto Res = llvm::make_unique<AMDGPUOperand>(Token, AsmParser);
     Res->Tok.Data = Str.data();
     Res->Tok.Length = Str.size();
     Res->StartLoc = Loc;
@@ -507,24 +629,22 @@ public:
     return Res;
   }
 
-  static AMDGPUOperand::Ptr CreateReg(unsigned RegNo, SMLoc S,
+  static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
+                                      unsigned RegNo, SMLoc S,
                                       SMLoc E,
-                                      const MCRegisterInfo *TRI,
-                                      const MCSubtargetInfo *STI,
                                       bool ForceVOP3) {
-    auto Op = llvm::make_unique<AMDGPUOperand>(Register);
+    auto Op = llvm::make_unique<AMDGPUOperand>(Register, AsmParser);
     Op->Reg.RegNo = RegNo;
-    Op->Reg.TRI = TRI;
-    Op->Reg.STI = STI;
-    Op->Reg.Mods = {false, false, false};
+    Op->Reg.Mods = Modifiers();
     Op->Reg.IsForcedVOP3 = ForceVOP3;
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
   }
 
-  static AMDGPUOperand::Ptr CreateExpr(const class MCExpr *Expr, SMLoc S) {
-    auto Op = llvm::make_unique<AMDGPUOperand>(Expression);
+  static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
+                                       const class MCExpr *Expr, SMLoc S) {
+    auto Op = llvm::make_unique<AMDGPUOperand>(Expression, AsmParser);
     Op->Expr = Expr;
     Op->StartLoc = S;
     Op->EndLoc = S;
@@ -537,6 +657,53 @@ raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
   return OS;
 }
 
+//===----------------------------------------------------------------------===//
+// AsmParser
+//===----------------------------------------------------------------------===//
+
+// Holds info related to the current kernel, e.g. count of SGPRs used.
+// Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
+// .amdgpu_hsa_kernel or at EOF.
+class KernelScopeInfo {
+  int SgprIndexUnusedMin;
+  int VgprIndexUnusedMin;
+  MCContext *Ctx;
+
+  void usesSgprAt(int i) {
+    if (i >= SgprIndexUnusedMin) {
+      SgprIndexUnusedMin = ++i;
+      if (Ctx) {
+        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
+        Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
+      }
+    }
+  }
+  void usesVgprAt(int i) {
+    if (i >= VgprIndexUnusedMin) {
+      VgprIndexUnusedMin = ++i;
+      if (Ctx) {
+        MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
+        Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
+      }
+    }
+  }
+public:
+  KernelScopeInfo() : SgprIndexUnusedMin(-1), VgprIndexUnusedMin(-1), Ctx(nullptr)
+  {}
+  void initialize(MCContext &Context) {
+    Ctx = &Context;
+    usesSgprAt(SgprIndexUnusedMin = -1);
+    usesVgprAt(VgprIndexUnusedMin = -1);
+  }
+  void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
+    switch (RegKind) {
+      case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
+      case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
+      default: break;
+    }
+  }
+};
+
 class AMDGPUAsmParser : public MCTargetAsmParser {
   const MCInstrInfo &MII;
   MCAsmParser &Parser;
@@ -544,22 +711,7 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   unsigned ForcedEncodingSize;
   bool ForcedDPP;
   bool ForcedSDWA;
-
-  bool isSI() const {
-    return AMDGPU::isSI(getSTI());
-  }
-
-  bool isCI() const {
-    return AMDGPU::isCI(getSTI());
-  }
-
-  bool isVI() const {
-    return AMDGPU::isVI(getSTI());
-  }
-
-  bool hasSGPR102_SGPR103() const {
-    return !isVI();
-  }
+  KernelScopeInfo KernelScope;
 
   /// @name Auto-generated Match Functions
   /// {
@@ -570,9 +722,11 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   /// }
 
 private:
+  bool ParseAsAbsoluteExpression(uint32_t &Ret);
   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
   bool ParseDirectiveHSACodeObjectVersion();
   bool ParseDirectiveHSACodeObjectISA();
+  bool ParseDirectiveRuntimeMetadata();
   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
   bool ParseDirectiveAMDKernelCodeT();
   bool ParseSectionDirectiveHSAText();
@@ -584,7 +738,7 @@ private:
   bool ParseSectionDirectiveHSADataGlobalProgram();
   bool ParseSectionDirectiveHSARodataReadonlyAgent();
   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, RegisterKind RegKind, unsigned Reg1, unsigned RegNum);
-  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth);
+  bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex);
   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, bool IsAtomic, bool IsAtomicReturn);
 
 public:
@@ -622,6 +776,27 @@ public:
       Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
       Sym->setVariableValue(MCConstantExpr::create(Isa.Stepping, Ctx));
     }
+    KernelScope.initialize(getContext());
+  }
+
+  bool isSI() const {
+    return AMDGPU::isSI(getSTI());
+  }
+
+  bool isCI() const {
+    return AMDGPU::isCI(getSTI());
+  }
+
+  bool isVI() const {
+    return AMDGPU::isVI(getSTI());
+  }
+
+  bool hasInv2PiInlineImm() const {
+    return getSTI().getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
+  }
+
+  bool hasSGPR102_SGPR103() const {
+    return !isVI();
   }
 
   AMDGPUTargetStreamer &getTargetStreamer() {
@@ -629,6 +804,16 @@ public:
     return static_cast<AMDGPUTargetStreamer &>(TS);
   }
 
+  const MCRegisterInfo *getMRI() const {
+    // We need this const_cast because for some reason getContext() is not const
+    // in MCAsmParser.
+    return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
+  }
+
+  const MCInstrInfo *getMII() const {
+    return &MII;
+  }
+
   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
@@ -652,23 +837,28 @@ public:
   StringRef parseMnemonicSuffix(StringRef Name);
   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
                         SMLoc NameLoc, OperandVector &Operands) override;
+  //bool ProcessInstruction(MCInst &Inst);
 
   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
-  OperandMatchResultTy parseIntWithPrefix(const char *Prefix,
-                                          OperandVector &Operands,
-                                          enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
-                                          bool (*ConvertResult)(int64_t&) = 0);
-  OperandMatchResultTy parseNamedBit(const char *Name, OperandVector &Operands,
-                                     enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
-  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, StringRef &Value);
+  OperandMatchResultTy
+  parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
+                     enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
+                     bool (*ConvertResult)(int64_t &) = nullptr);
+  OperandMatchResultTy
+  parseNamedBit(const char *Name, OperandVector &Operands,
+                enum AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
+  OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
+                                             StringRef &Value);
 
   OperandMatchResultTy parseImm(OperandVector &Operands);
   OperandMatchResultTy parseRegOrImm(OperandVector &Operands);
   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands);
   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands);
+  OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
 
   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
   void cvtDS(MCInst &Inst, const OperandVector &Operands);
+  void cvtExp(MCInst &Inst, const OperandVector &Operands);
 
   bool parseCnt(int64_t &IntVal);
   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
@@ -683,10 +873,17 @@ private:
 
   bool parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &Operation, int64_t &StreamId);
   bool parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width);
+
+  void errorExpTgt();
+  OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val);
+
 public:
   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
 
+  OperandMatchResultTy parseExpTgt(OperandVector &Operands);
   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
+  OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
+  OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
 
   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); }
@@ -701,7 +898,8 @@ public:
   AMDGPUOperand::Ptr defaultDA() const;
   AMDGPUOperand::Ptr defaultR128() const;
   AMDGPUOperand::Ptr defaultLWE() const;
-  AMDGPUOperand::Ptr defaultSMRDOffset() const;
+  AMDGPUOperand::Ptr defaultSMRDOffset8() const;
+  AMDGPUOperand::Ptr defaultSMRDOffset20() const;
   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
 
   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
@@ -736,8 +934,274 @@ struct OptionalOperand {
   bool (*ConvertResult)(int64_t&);
 };
 
+} // end anonymous namespace
+
+// May be called with integer type with equivalent bitwidth.
+static const fltSemantics *getFltSemantics(unsigned Size) {
+  switch (Size) {
+  case 4:
+    return &APFloat::IEEEsingle();
+  case 8:
+    return &APFloat::IEEEdouble();
+  case 2:
+    return &APFloat::IEEEhalf();
+  default:
+    llvm_unreachable("unsupported fp type");
+  }
+}
+
+static const fltSemantics *getFltSemantics(MVT VT) {
+  return getFltSemantics(VT.getSizeInBits() / 8);
+}
+
+//===----------------------------------------------------------------------===//
+// Operand
+//===----------------------------------------------------------------------===//
+
+static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
+  bool Lost;
+
+  // Convert literal to single precision
+  APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
+                                               APFloat::rmNearestTiesToEven,
+                                               &Lost);
+  // We allow precision lost but not overflow or underflow
+  if (Status != APFloat::opOK &&
+      Lost &&
+      ((Status & APFloat::opOverflow)  != 0 ||
+       (Status & APFloat::opUnderflow) != 0)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool AMDGPUOperand::isInlinableImm(MVT type) const {
+  if (!isImmTy(ImmTyNone)) {
+    // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
+    return false;
+  }
+  // TODO: We should avoid using host float here. It would be better to
+  // check the float bit values which is what a few other places do.
+  // We've had bot failures before due to weird NaN support on mips hosts.
+
+  APInt Literal(64, Imm.Val);
+
+  if (Imm.IsFPImm) { // We got fp literal token
+    if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
+      return AMDGPU::isInlinableLiteral64(Imm.Val,
+                                          AsmParser->hasInv2PiInlineImm());
+    }
+
+    APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
+    if (!canLosslesslyConvertToFPType(FPLiteral, type))
+      return false;
+
+    // Check if single precision literal is inlinable
+    return AMDGPU::isInlinableLiteral32(
+      static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
+      AsmParser->hasInv2PiInlineImm());
+  }
+
+
+  // We got int literal token.
+  if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
+    return AMDGPU::isInlinableLiteral64(Imm.Val,
+                                        AsmParser->hasInv2PiInlineImm());
+  }
+
+  if (type.getScalarSizeInBits() == 16) {
+    return AMDGPU::isInlinableLiteral16(
+      static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
+      AsmParser->hasInv2PiInlineImm());
+  }
+
+  return AMDGPU::isInlinableLiteral32(
+    static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
+    AsmParser->hasInv2PiInlineImm());
 }
 
+bool AMDGPUOperand::isLiteralImm(MVT type) const {
+  // Check that this imediate can be added as literal
+  if (!isImmTy(ImmTyNone)) {
+    return false;
+  }
+
+  if (!Imm.IsFPImm) {
+    // We got int literal token.
+
+    unsigned Size = type.getSizeInBits();
+    if (Size == 64)
+      Size = 32;
+
+    // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
+    // types.
+    return isUIntN(Size, Imm.Val) || isIntN(Size, Imm.Val);
+  }
+
+  // We got fp literal token
+  if (type == MVT::f64) { // Expected 64-bit fp operand
+    // We would set low 64-bits of literal to zeroes but we accept this literals
+    return true;
+  }
+
+  if (type == MVT::i64) { // Expected 64-bit int operand
+    // We don't allow fp literals in 64-bit integer instructions. It is
+    // unclear how we should encode them.
+    return false;
+  }
+
+  APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
+  return canLosslesslyConvertToFPType(FPLiteral, type);
+}
+
+bool AMDGPUOperand::isRegClass(unsigned RCID) const {
+  return isReg() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
+}
+
+void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
+  int64_t Val = Imm.Val;
+  if (isImmTy(ImmTyNone) && ApplyModifiers && Imm.Mods.hasFPModifiers() && Imm.Mods.Neg) {
+    // Apply modifiers to immediate value. Only negate can get here
+    if (Imm.IsFPImm) {
+      APFloat F(BitsToDouble(Val));
+      F.changeSign();
+      Val = F.bitcastToAPInt().getZExtValue();
+    } else {
+      Val = -Val;
+    }
+  }
+
+  if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
+                             Inst.getNumOperands())) {
+    addLiteralImmOperand(Inst, Val);
+  } else {
+    Inst.addOperand(MCOperand::createImm(Val));
+  }
+}
+
+void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val) const {
+  const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
+  auto OpNum = Inst.getNumOperands();
+  // Check that this operand accepts literals
+  assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
+
+  auto OpSize = AMDGPU::getOperandSize(InstDesc, OpNum); // expected operand size
+
+  if (Imm.IsFPImm) { // We got fp literal token
+    APInt Literal(64, Val);
+
+    switch (OpSize) {
+    case 8: {
+      if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
+                                       AsmParser->hasInv2PiInlineImm())) {
+        Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
+        return;
+      }
+
+      // Non-inlineable
+      if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
+        // For fp operands we check if low 32 bits are zeros
+        if (Literal.getLoBits(32) != 0) {
+          const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
+          "Can't encode literal as exact 64-bit floating-point operand. "
+          "Low 32-bits will be set to zero");
+        }
+
+        Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
+        return;
+      }
+
+      // We don't allow fp literals in 64-bit integer instructions. It is
+      // unclear how we should encode them. This case should be checked earlier
+      // in predicate methods (isLiteralImm())
+      llvm_unreachable("fp literal in 64-bit integer instruction.");
+    }
+    case 4:
+    case 2: {
+      bool lost;
+      APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
+      // Convert literal to single precision
+      FPLiteral.convert(*getFltSemantics(OpSize),
+                        APFloat::rmNearestTiesToEven, &lost);
+      // We allow precision lost but not overflow or underflow. This should be
+      // checked earlier in isLiteralImm()
+      Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+      return;
+    }
+    default:
+      llvm_unreachable("invalid operand size");
+    }
+
+    return;
+  }
+
+   // We got int literal token.
+  // Only sign extend inline immediates.
+  // FIXME: No errors on truncation
+  switch (OpSize) {
+  case 4: {
+    if (isInt<32>(Val) &&
+        AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
+                                     AsmParser->hasInv2PiInlineImm())) {
+      Inst.addOperand(MCOperand::createImm(Val));
+      return;
+    }
+
+    Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
+    return;
+  }
+  case 8: {
+    if (AMDGPU::isInlinableLiteral64(Val,
+                                     AsmParser->hasInv2PiInlineImm())) {
+      Inst.addOperand(MCOperand::createImm(Val));
+      return;
+    }
+
+    Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
+    return;
+  }
+  case 2: {
+    if (isInt<16>(Val) &&
+        AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
+                                     AsmParser->hasInv2PiInlineImm())) {
+      Inst.addOperand(MCOperand::createImm(Val));
+      return;
+    }
+
+    Inst.addOperand(MCOperand::createImm(Val & 0xffff));
+    return;
+  }
+  default:
+    llvm_unreachable("invalid operand size");
+  }
+}
+
+template <unsigned Bitwidth>
+void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
+  APInt Literal(64, Imm.Val);
+
+  if (!Imm.IsFPImm) {
+    // We got int literal token.
+    Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
+    return;
+  }
+
+  bool Lost;
+  APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
+  FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
+                    APFloat::rmNearestTiesToEven, &Lost);
+  Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
+}
+
+void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
+  Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
+}
+
+//===----------------------------------------------------------------------===//
+// AsmParser
+//===----------------------------------------------------------------------===//
+
 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
   if (Is == IS_VGPR) {
     switch (RegWidth) {
@@ -818,12 +1282,13 @@ bool AMDGPUAsmParser::AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, R
     RegWidth++;
     return true;
   default:
-    assert(false); return false;
+    llvm_unreachable("unexpected register kind");
   }
 }
 
-bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth)
+bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, unsigned& RegNum, unsigned& RegWidth, unsigned *DwordRegIndex)
 {
+  if (DwordRegIndex) { *DwordRegIndex = 0; }
   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   if (getLexer().is(AsmToken::Identifier)) {
     StringRef RegName = Parser.getTok().getString();
@@ -883,7 +1348,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
   } else if (getLexer().is(AsmToken::LBrac)) {
     // List of consecutive registers: [s0,s1,s2,s3]
     Parser.Lex();
-    if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
+    if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr))
       return false;
     if (RegWidth != 1)
       return false;
@@ -895,7 +1360,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
       } else if (getLexer().is(AsmToken::RBrac)) {
         Parser.Lex();
         break;
-      } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1)) {
+      } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) {
         if (RegWidth1 != 1) {
           return false;
         }
@@ -923,11 +1388,12 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
   {
     unsigned Size = 1;
     if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
-      // SGPR and TTMP registers must be are aligned. Max required alignment is 4 dwords.
+      // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords.
       Size = std::min(RegWidth, 4u);
     }
     if (RegNum % Size != 0)
       return false;
+    if (DwordRegIndex) { *DwordRegIndex = RegNum; }
     RegNum = RegNum / Size;
     int RCID = getRegClass(RegKind, RegWidth);
     if (RCID == -1)
@@ -940,7 +1406,7 @@ bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg,
   }
 
   default:
-    assert(false); return false;
+    llvm_unreachable("unexpected register kind");
   }
 
   if (!subtargetHasRegister(*TRI, Reg))
@@ -952,20 +1418,19 @@ std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() {
   const auto &Tok = Parser.getTok();
   SMLoc StartLoc = Tok.getLoc();
   SMLoc EndLoc = Tok.getEndLoc();
-  const MCRegisterInfo *TRI = getContext().getRegisterInfo();
-
   RegisterKind RegKind;
-  unsigned Reg, RegNum, RegWidth;
+  unsigned Reg, RegNum, RegWidth, DwordRegIndex;
 
-  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
+  if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) {
     return nullptr;
   }
-  return AMDGPUOperand::CreateReg(Reg, StartLoc, EndLoc,
-                                  TRI, &getSTI(), false);
+  KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth);
+  return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc, false);
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseImm(OperandVector &Operands) {
+  // TODO: add syntactic sugar for 1/(2*PI)
   bool Minus = false;
   if (getLexer().getKind() == AsmToken::Minus) {
     Minus = true;
@@ -978,28 +1443,21 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
     int64_t IntVal;
     if (getParser().parseAbsoluteExpression(IntVal))
       return MatchOperand_ParseFail;
-    if (!isInt<32>(IntVal) && !isUInt<32>(IntVal)) {
-      Error(S, "invalid immediate: only 32-bit values are legal");
-      return MatchOperand_ParseFail;
-    }
-
     if (Minus)
       IntVal *= -1;
-    Operands.push_back(AMDGPUOperand::CreateImm(IntVal, S));
+    Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
     return MatchOperand_Success;
   }
   case AsmToken::Real: {
-    // FIXME: We should emit an error if a double precisions floating-point
-    // value is used.  I'm not sure the best way to detect this.
     int64_t IntVal;
     if (getParser().parseAbsoluteExpression(IntVal))
       return MatchOperand_ParseFail;
 
-    APFloat F((float)BitsToDouble(IntVal));
+    APFloat F(BitsToDouble(IntVal));
     if (Minus)
       F.changeSign();
     Operands.push_back(
-        AMDGPUOperand::CreateImm(F.bitcastToAPInt().getZExtValue(), S,
+        AMDGPUOperand::CreateImm(this, F.bitcastToAPInt().getZExtValue(), S,
                                  AMDGPUOperand::ImmTyNone, true));
     return MatchOperand_Success;
   }
@@ -1008,7 +1466,7 @@ AMDGPUAsmParser::parseImm(OperandVector &Operands) {
   }
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
   auto res = parseImm(Operands);
   if (res != MatchOperand_NoMatch) {
@@ -1024,7 +1482,7 @@ AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands) {
   return MatchOperand_ParseFail;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
   // XXX: During parsing we can't determine if minus sign means
   // negate-modifier or negative immediate value.
@@ -1060,7 +1518,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
     return Res;
   }
 
-  AMDGPUOperand::Modifiers Mods = {false, false, false};
+  AMDGPUOperand::Modifiers Mods;
   if (Negate) {
     Mods.Neg = true;
   }
@@ -1088,7 +1546,7 @@ AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
   bool Sext = false;
 
@@ -1107,7 +1565,7 @@ AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
     return Res;
   }
 
-  AMDGPUOperand::Modifiers Mods = {false, false, false};
+  AMDGPUOperand::Modifiers Mods;
   if (Sext) {
     if (getLexer().isNot(AsmToken::RParen)) {
       Error(Parser.getTok().getLoc(), "expected closing parentheses");
@@ -1116,14 +1574,33 @@ AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands) {
     Parser.Lex();
     Mods.Sext = true;
   }
-  
+
   if (Mods.hasIntModifiers()) {
     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
     Op.setModifiers(Mods);
   }
+
   return MatchOperand_Success;
 }
 
+OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
+  std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
+  if (Reg) {
+    Operands.push_back(std::move(Reg));
+    return MatchOperand_Success;
+  }
+
+  const AsmToken &Tok = Parser.getTok();
+  if (Tok.getString() == "off") {
+    Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Tok.getLoc(),
+                                                AMDGPUOperand::ImmTyOff, false));
+    Parser.Lex();
+    return MatchOperand_Success;
+  }
+
+  return MatchOperand_NoMatch;
+}
+
 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
 
   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
@@ -1139,6 +1616,17 @@ unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
       getForcedEncodingSize() != 64)
     return Match_PreferE32;
 
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
+    // v_mac_f32/16 allow only dst_sel == DWORD;
+    auto OpNum =
+        AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
+    const auto &Op = Inst.getOperand(OpNum);
+    if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
+      return Match_InvalidOperand;
+    }
+  }
+
   return Match_Success;
 }
 
@@ -1147,57 +1635,105 @@ bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                               MCStreamer &Out,
                                               uint64_t &ErrorInfo,
                                               bool MatchingInlineAsm) {
+  // What asm variants we should check
+  std::vector<unsigned> MatchedVariants;
+  if (getForcedEncodingSize() == 32) {
+    MatchedVariants = {AMDGPUAsmVariants::DEFAULT};
+  } else if (isForcedVOP3()) {
+    MatchedVariants = {AMDGPUAsmVariants::VOP3};
+  } else if (isForcedSDWA()) {
+    MatchedVariants = {AMDGPUAsmVariants::SDWA};
+  } else if (isForcedDPP()) {
+    MatchedVariants = {AMDGPUAsmVariants::DPP};
+  } else {
+    MatchedVariants = {AMDGPUAsmVariants::DEFAULT,
+                       AMDGPUAsmVariants::VOP3,
+                       AMDGPUAsmVariants::SDWA,
+                       AMDGPUAsmVariants::DPP};
+  }
+
   MCInst Inst;
+  unsigned Result = Match_Success;
+  for (auto Variant : MatchedVariants) {
+    uint64_t EI;
+    auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
+                                  Variant);
+    // We order match statuses from least to most specific. We use most specific
+    // status as resulting
+    // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
+    if ((R == Match_Success) ||
+        (R == Match_PreferE32) ||
+        (R == Match_MissingFeature && Result != Match_PreferE32) ||
+        (R == Match_InvalidOperand && Result != Match_MissingFeature
+                                   && Result != Match_PreferE32) ||
+        (R == Match_MnemonicFail   && Result != Match_InvalidOperand
+                                   && Result != Match_MissingFeature
+                                   && Result != Match_PreferE32)) {
+      Result = R;
+      ErrorInfo = EI;
+    }
+    if (R == Match_Success)
+      break;
+  }
 
-  switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) {
-    default: break;
-    case Match_Success:
-      Inst.setLoc(IDLoc);
-      Out.EmitInstruction(Inst, getSTI());
-      return false;
-    case Match_MissingFeature:
-      return Error(IDLoc, "instruction not supported on this GPU");
+  switch (Result) {
+  default: break;
+  case Match_Success:
+    Inst.setLoc(IDLoc);
+    Out.EmitInstruction(Inst, getSTI());
+    return false;
 
-    case Match_MnemonicFail:
-      return Error(IDLoc, "unrecognized instruction mnemonic");
+  case Match_MissingFeature:
+    return Error(IDLoc, "instruction not supported on this GPU");
 
-    case Match_InvalidOperand: {
-      SMLoc ErrorLoc = IDLoc;
-      if (ErrorInfo != ~0ULL) {
-        if (ErrorInfo >= Operands.size()) {
-          return Error(IDLoc, "too few operands for instruction");
-        }
-        ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
-        if (ErrorLoc == SMLoc())
-          ErrorLoc = IDLoc;
+  case Match_MnemonicFail:
+    return Error(IDLoc, "unrecognized instruction mnemonic");
+
+  case Match_InvalidOperand: {
+    SMLoc ErrorLoc = IDLoc;
+    if (ErrorInfo != ~0ULL) {
+      if (ErrorInfo >= Operands.size()) {
+        return Error(IDLoc, "too few operands for instruction");
       }
-      return Error(ErrorLoc, "invalid operand for instruction");
+      ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
+      if (ErrorLoc == SMLoc())
+        ErrorLoc = IDLoc;
     }
-    case Match_PreferE32:
-      return Error(IDLoc, "internal error: instruction without _e64 suffix "
-                          "should be encoded as e32");
+    return Error(ErrorLoc, "invalid operand for instruction");
+  }
+
+  case Match_PreferE32:
+    return Error(IDLoc, "internal error: instruction without _e64 suffix "
+                        "should be encoded as e32");
   }
   llvm_unreachable("Implement any new match types added!");
 }
 
+bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
+  int64_t Tmp = -1;
+  if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) {
+    return true;
+  }
+  if (getParser().parseAbsoluteExpression(Tmp)) {
+    return true;
+  }
+  Ret = static_cast<uint32_t>(Tmp);
+  return false;
+}
+
+
 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
                                                uint32_t &Minor) {
-  if (getLexer().isNot(AsmToken::Integer))
+  if (ParseAsAbsoluteExpression(Major))
     return TokError("invalid major version");
 
-  Major = getLexer().getTok().getIntVal();
-  Lex();
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("minor version number required, comma expected");
   Lex();
 
-  if (getLexer().isNot(AsmToken::Integer))
+  if (ParseAsAbsoluteExpression(Minor))
     return TokError("invalid minor version");
 
-  Minor = getLexer().getTok().getIntVal();
-  Lex();
-
   return false;
 }
 
@@ -1214,7 +1750,6 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
 }
 
 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
-
   uint32_t Major;
   uint32_t Minor;
   uint32_t Stepping;
@@ -1231,7 +1766,6 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
     return false;
   }
 
-
   if (ParseDirectiveMajorMinor(Major, Minor))
     return true;
 
@@ -1239,12 +1773,9 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
     return TokError("stepping version number required, comma expected");
   Lex();
 
-  if (getLexer().isNot(AsmToken::Integer))
+  if (ParseAsAbsoluteExpression(Stepping))
     return TokError("invalid stepping version");
 
-  Stepping = getLexer().getTok().getIntVal();
-  Lex();
-
   if (getLexer().isNot(AsmToken::Comma))
     return TokError("vendor name required, comma expected");
   Lex();
@@ -1270,6 +1801,46 @@ bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
   return false;
 }
 
+bool AMDGPUAsmParser::ParseDirectiveRuntimeMetadata() {
+  std::string Metadata;
+  raw_string_ostream MS(Metadata);
+
+  getLexer().setSkipSpace(false);
+
+  bool FoundEnd = false;
+  while (!getLexer().is(AsmToken::Eof)) {
+    while (getLexer().is(AsmToken::Space)) {
+      MS << ' ';
+      Lex();
+    }
+
+    if (getLexer().is(AsmToken::Identifier)) {
+      StringRef ID = getLexer().getTok().getIdentifier();
+      if (ID == ".end_amdgpu_runtime_metadata") {
+        Lex();
+        FoundEnd = true;
+        break;
+      }
+    }
+
+    MS << Parser.parseStringToEndOfStatement()
+       << getContext().getAsmInfo()->getSeparatorString();
+
+    Parser.eatToEndOfStatement();
+  }
+
+  getLexer().setSkipSpace(true);
+
+  if (getLexer().is(AsmToken::Eof) && !FoundEnd)
+    return TokError("expected directive .end_amdgpu_runtime_metadata not found");
+
+  MS.flush();
+
+  getTargetStreamer().EmitRuntimeMetadata(Metadata);
+
+  return false;
+}
+
 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
                                                amd_kernel_code_t &Header) {
   SmallString<40> ErrStr;
@@ -1282,12 +1853,10 @@ bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
 }
 
 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
-
   amd_kernel_code_t Header;
   AMDGPU::initDefaultAMDKernelCodeT(Header, getSTI().getFeatureBits());
 
   while (true) {
-
     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
     // will set the current token to EndOfStatement.
     while(getLexer().is(AsmToken::EndOfStatement))
@@ -1326,6 +1895,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
                                            ELF::STT_AMDGPU_HSA_KERNEL);
   Lex();
+  KernelScope.initialize(getContext());
   return false;
 }
 
@@ -1378,6 +1948,9 @@ bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   if (IDVal == ".hsa_code_object_isa")
     return ParseDirectiveHSACodeObjectISA();
 
+  if (IDVal == ".amdgpu_runtime_metadata")
+    return ParseDirectiveRuntimeMetadata();
+
   if (IDVal == ".amd_kernel_code_t")
     return ParseDirectiveAMDKernelCodeT();
 
@@ -1433,7 +2006,7 @@ bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   return true;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
   // Try to parse with a custom parser
@@ -1464,11 +2037,11 @@ AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     SMLoc S = Tok.getLoc();
     const MCExpr *Expr = nullptr;
     if (!Parser.parseExpression(Expr)) {
-      Operands.push_back(AMDGPUOperand::CreateExpr(Expr, S));
+      Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
       return MatchOperand_Success;
     }
 
-    Operands.push_back(AMDGPUOperand::CreateToken(Tok.getString(), Tok.getLoc()));
+    Operands.push_back(AMDGPUOperand::CreateToken(this, Tok.getString(), Tok.getLoc()));
     Parser.Lex();
     return MatchOperand_Success;
   }
@@ -1502,10 +2075,10 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
                                        SMLoc NameLoc, OperandVector &Operands) {
   // Add the instruction mnemonic
   Name = parseMnemonicSuffix(Name);
-  Operands.push_back(AMDGPUOperand::CreateToken(Name, NameLoc));
+  Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
 
   while (!getLexer().is(AsmToken::EndOfStatement)) {
-    AMDGPUAsmParser::OperandMatchResultTy Res = parseOperand(Operands, Name);
+    OperandMatchResultTy Res = parseOperand(Operands, Name);
 
     // Eat the comma or space if there is one.
     if (getLexer().is(AsmToken::Comma))
@@ -1535,7 +2108,7 @@ bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
 // Utility functions
 //===----------------------------------------------------------------------===//
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
   switch(getLexer().getKind()) {
     default: return MatchOperand_NoMatch;
@@ -1561,15 +2134,14 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &Int) {
   return MatchOperand_Success;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
                                     enum AMDGPUOperand::ImmTy ImmTy,
                                     bool (*ConvertResult)(int64_t&)) {
-
   SMLoc S = Parser.getTok().getLoc();
   int64_t Value = 0;
 
-  AMDGPUAsmParser::OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
+  OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
   if (Res != MatchOperand_Success)
     return Res;
 
@@ -1577,11 +2149,11 @@ AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Value, S, ImmTy));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
   return MatchOperand_Success;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
                                enum AMDGPUOperand::ImmTy ImmTy) {
   int64_t Bit = 0;
@@ -1609,7 +2181,7 @@ AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands,
     }
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Bit, S, ImmTy));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
   return MatchOperand_Success;
 }
 
@@ -1627,7 +2199,7 @@ void addOptionalImmOperand(MCInst& Inst, const OperandVector& Operands,
   }
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
   if (getLexer().isNot(AsmToken::Identifier)) {
     return MatchOperand_NoMatch;
@@ -1657,7 +2229,6 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
 
 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
                                     const OperandVector &Operands) {
-
   OptionalImmIndexMap OptionalIdx;
 
   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
@@ -1681,7 +2252,6 @@ void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
 }
 
 void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
-
   std::map<enum AMDGPUOperand::ImmTy, unsigned> OptionalIdx;
   bool GDSOnly = false;
 
@@ -1712,6 +2282,46 @@ void AMDGPUAsmParser::cvtDS(MCInst &Inst, const OperandVector &Operands) {
   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
 }
 
+void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
+  OptionalImmIndexMap OptionalIdx;
+
+  unsigned EnMask = 0;
+  int SrcIdx = 0;
+
+  for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
+    AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
+
+    // Add the register arguments
+    if (Op.isReg()) {
+      EnMask |= (1 << SrcIdx);
+      Op.addRegOperands(Inst, 1);
+      ++SrcIdx;
+      continue;
+    }
+
+    if (Op.isOff()) {
+      ++SrcIdx;
+      Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
+      continue;
+    }
+
+    if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
+      Op.addImmOperands(Inst, 1);
+      continue;
+    }
+
+    if (Op.isToken() && Op.getToken() == "done")
+      continue;
+
+    // Handle optional arguments
+    OptionalIdx[Op.getImmTy()] = i;
+  }
+
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
+  addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
+
+  Inst.addOperand(MCOperand::createImm(EnMask));
+}
 
 //===----------------------------------------------------------------------===//
 // s_waitcnt
@@ -1739,52 +2349,41 @@ bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
   if (getLexer().is(AsmToken::Amp) || getLexer().is(AsmToken::Comma))
     Parser.Lex();
 
-  int CntShift;
-  int CntMask;
-
-  if (CntName == "vmcnt") {
-    CntMask = 0xf;
-    CntShift = 0;
-  } else if (CntName == "expcnt") {
-    CntMask = 0x7;
-    CntShift = 4;
-  } else if (CntName == "lgkmcnt") {
-    CntMask = 0xf;
-    CntShift = 8;
-  } else {
+  IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+  if (CntName == "vmcnt")
+    IntVal = encodeVmcnt(IV, IntVal, CntVal);
+  else if (CntName == "expcnt")
+    IntVal = encodeExpcnt(IV, IntVal, CntVal);
+  else if (CntName == "lgkmcnt")
+    IntVal = encodeLgkmcnt(IV, IntVal, CntVal);
+  else
     return true;
-  }
 
-  IntVal &= ~(CntMask << CntShift);
-  IntVal |= (CntVal << CntShift);
   return false;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
-  // Disable all counters by default.
-  // vmcnt   [3:0]
-  // expcnt  [6:4]
-  // lgkmcnt [11:8]
-  int64_t CntVal = 0xf7f;
+  IsaVersion IV = getIsaVersion(getSTI().getFeatureBits());
+  int64_t Waitcnt = getWaitcntBitMask(IV);
   SMLoc S = Parser.getTok().getLoc();
 
   switch(getLexer().getKind()) {
     default: return MatchOperand_ParseFail;
     case AsmToken::Integer:
       // The operand can be an integer value.
-      if (getParser().parseAbsoluteExpression(CntVal))
+      if (getParser().parseAbsoluteExpression(Waitcnt))
         return MatchOperand_ParseFail;
       break;
 
     case AsmToken::Identifier:
       do {
-        if (parseCnt(CntVal))
+        if (parseCnt(Waitcnt))
           return MatchOperand_ParseFail;
       } while(getLexer().isNot(AsmToken::EndOfStatement));
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(CntVal, S));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
   return MatchOperand_Success;
 }
 
@@ -1849,7 +2448,7 @@ bool AMDGPUAsmParser::parseHwregConstruct(OperandInfoTy &HwReg, int64_t &Offset,
   return false;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
   using namespace llvm::AMDGPU::Hwreg;
 
@@ -1889,7 +2488,7 @@ AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
       }
       break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTyHwreg));
   return MatchOperand_Success;
 }
 
@@ -1997,7 +2596,147 @@ bool AMDGPUAsmParser::parseSendMsgConstruct(OperandInfoTy &Msg, OperandInfoTy &O
   return false;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
+  if (getLexer().getKind() != AsmToken::Identifier)
+    return MatchOperand_NoMatch;
+
+  StringRef Str = Parser.getTok().getString();
+  int Slot = StringSwitch<int>(Str)
+    .Case("p10", 0)
+    .Case("p20", 1)
+    .Case("p0", 2)
+    .Default(-1);
+
+  SMLoc S = Parser.getTok().getLoc();
+  if (Slot == -1)
+    return MatchOperand_ParseFail;
+
+  Parser.Lex();
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
+                                              AMDGPUOperand::ImmTyInterpSlot));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
+  if (getLexer().getKind() != AsmToken::Identifier)
+    return MatchOperand_NoMatch;
+
+  StringRef Str = Parser.getTok().getString();
+  if (!Str.startswith("attr"))
+    return MatchOperand_NoMatch;
+
+  StringRef Chan = Str.take_back(2);
+  int AttrChan = StringSwitch<int>(Chan)
+    .Case(".x", 0)
+    .Case(".y", 1)
+    .Case(".z", 2)
+    .Case(".w", 3)
+    .Default(-1);
+  if (AttrChan == -1)
+    return MatchOperand_ParseFail;
+
+  Str = Str.drop_back(2).drop_front(4);
+
+  uint8_t Attr;
+  if (Str.getAsInteger(10, Attr))
+    return MatchOperand_ParseFail;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex();
+  if (Attr > 63) {
+    Error(S, "out of bounds attr");
+    return MatchOperand_Success;
+  }
+
+  SMLoc SChan = SMLoc::getFromPointer(Chan.data());
+
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
+                                              AMDGPUOperand::ImmTyInterpAttr));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
+                                              AMDGPUOperand::ImmTyAttrChan));
+  return MatchOperand_Success;
+}
+
+void AMDGPUAsmParser::errorExpTgt() {
+  Error(Parser.getTok().getLoc(), "invalid exp target");
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str,
+                                                      uint8_t &Val) {
+  if (Str == "null") {
+    Val = 9;
+    return MatchOperand_Success;
+  }
+
+  if (Str.startswith("mrt")) {
+    Str = Str.drop_front(3);
+    if (Str == "z") { // == mrtz
+      Val = 8;
+      return MatchOperand_Success;
+    }
+
+    if (Str.getAsInteger(10, Val))
+      return MatchOperand_ParseFail;
+
+    if (Val > 7)
+      errorExpTgt();
+
+    return MatchOperand_Success;
+  }
+
+  if (Str.startswith("pos")) {
+    Str = Str.drop_front(3);
+    if (Str.getAsInteger(10, Val))
+      return MatchOperand_ParseFail;
+
+    if (Val > 3)
+      errorExpTgt();
+
+    Val += 12;
+    return MatchOperand_Success;
+  }
+
+  if (Str.startswith("param")) {
+    Str = Str.drop_front(5);
+    if (Str.getAsInteger(10, Val))
+      return MatchOperand_ParseFail;
+
+    if (Val >= 32)
+      errorExpTgt();
+
+    Val += 32;
+    return MatchOperand_Success;
+  }
+
+  if (Str.startswith("invalid_target_")) {
+    Str = Str.drop_front(15);
+    if (Str.getAsInteger(10, Val))
+      return MatchOperand_ParseFail;
+
+    errorExpTgt();
+    return MatchOperand_Success;
+  }
+
+  return MatchOperand_NoMatch;
+}
+
+OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
+  uint8_t Val;
+  StringRef Str = Parser.getTok().getString();
+
+  auto Res = parseExpTgtImpl(Str, Val);
+  if (Res != MatchOperand_Success)
+    return Res;
+
+  SMLoc S = Parser.getTok().getLoc();
+  Parser.Lex();
+
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S,
+                                              AMDGPUOperand::ImmTyExpTgt));
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy
 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
   using namespace llvm::AMDGPU::SendMsg;
 
@@ -2068,11 +2807,11 @@ AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
           }
           Imm16Val |= (StreamId << STREAM_ID_SHIFT_);
         }
-      } while (0);
+      } while (false);
     }
     break;
   }
-  Operands.push_back(AMDGPUOperand::CreateImm(Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Imm16Val, S, AMDGPUOperand::ImmTySendMsg));
   return MatchOperand_Success;
 }
 
@@ -2084,7 +2823,7 @@ bool AMDGPUOperand::isSendMsg() const {
 // sopp branch targets
 //===----------------------------------------------------------------------===//
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
 
@@ -2094,12 +2833,12 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
       int64_t Imm;
       if (getParser().parseAbsoluteExpression(Imm))
         return MatchOperand_ParseFail;
-      Operands.push_back(AMDGPUOperand::CreateImm(Imm, S));
+      Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S));
       return MatchOperand_Success;
     }
 
     case AsmToken::Identifier:
-      Operands.push_back(AMDGPUOperand::CreateExpr(
+      Operands.push_back(AMDGPUOperand::CreateExpr(this,
           MCSymbolRefExpr::create(getContext().getOrCreateSymbol(
                                   Parser.getTok().getString()), getContext()), S));
       Parser.Lex();
@@ -2112,15 +2851,15 @@ AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
 //===----------------------------------------------------------------------===//
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyGLC);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTySLC);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultTFE() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyTFE);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyTFE);
 }
 
 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
@@ -2192,7 +2931,7 @@ void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands) {
     } else if (Op.isImmModifier()) {
       OptionalIdx[Op.getImmTy()] = I;
     } else {
-      assert(false);
+      llvm_unreachable("unexpected operand type");
     }
   }
 
@@ -2228,7 +2967,7 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
     } else if (Op.isImmModifier()) {
       OptionalIdx[Op.getImmTy()] = I;
     } else {
-      assert(false);
+      llvm_unreachable("unexpected operand type");
     }
   }
 
@@ -2243,48 +2982,53 @@ void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands)
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDMask() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDMask);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDMask);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultUNorm() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyUNorm);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDA() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDA);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDA);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultR128() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyR128);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyR128);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultLWE() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyLWE);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyLWE);
 }
 
 //===----------------------------------------------------------------------===//
 // smrd
 //===----------------------------------------------------------------------===//
 
-bool AMDGPUOperand::isSMRDOffset() const {
-
-  // FIXME: Support 20-bit offsets on VI.  We need to to pass subtarget
-  // information here.
+bool AMDGPUOperand::isSMRDOffset8() const {
   return isImm() && isUInt<8>(getImm());
 }
 
+bool AMDGPUOperand::isSMRDOffset20() const {
+  return isImm() && isUInt<20>(getImm());
+}
+
 bool AMDGPUOperand::isSMRDLiteralOffset() const {
   // 32-bit literals are only supported on CI and we only want to use them
   // when the offset is > 8-bits.
   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
 }
 
-AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+}
+
+AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const {
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyOffset);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
 }
 
 //===----------------------------------------------------------------------===//
@@ -2317,10 +3061,13 @@ static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
   if (BoundCtrl == 0) {
     BoundCtrl = 1;
     return true;
-  } else if (BoundCtrl == -1) {
+  }
+
+  if (BoundCtrl == -1) {
     BoundCtrl = 0;
     return true;
   }
+
   return false;
 }
 
@@ -2350,9 +3097,10 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
+  {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
 };
 
-AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
+OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
   OperandMatchResultTy res;
   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
     // try to parse any optional operand here
@@ -2376,16 +3124,19 @@ AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(Oper
   return MatchOperand_NoMatch;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands)
-{
+OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
   StringRef Name = Parser.getTok().getString();
   if (Name == "mul") {
-    return parseIntWithPrefix("mul", Operands, AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
-  } else if (Name == "div") {
-    return parseIntWithPrefix("div", Operands, AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
-  } else {
-    return MatchOperand_NoMatch;
+    return parseIntWithPrefix("mul", Operands,
+                              AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
+  }
+
+  if (Name == "div") {
+    return parseIntWithPrefix("div", Operands,
+                              AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
   }
+
+  return MatchOperand_NoMatch;
 }
 
 void AMDGPUAsmParser::cvtId(MCInst &Inst, const OperandVector &Operands) {
@@ -2407,6 +3158,17 @@ void AMDGPUAsmParser::cvtVOP3_2_mod(MCInst &Inst, const OperandVector &Operands)
   }
 }
 
+static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
+      // 1. This operand is input modifiers
+  return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
+      // 2. This is not last operand
+      && Desc.NumOperands > (OpNum + 1)
+      // 3. Next operand is register class
+      && Desc.OpInfo[OpNum + 1].RegClass != -1
+      // 4. Next register is not tied to any other operand
+      && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
+}
+
 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
   OptionalImmIndexMap OptionalIdx;
   unsigned I = 1;
@@ -2417,18 +3179,36 @@ void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
 
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
-    if (Op.isRegOrImmWithInputMods()) {
-      // only fp modifiers allowed in VOP3
+    if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
     } else if (Op.isImm()) {
       OptionalIdx[Op.getImmTy()] = I;
     } else {
-      assert(false);
+      llvm_unreachable("unhandled operand type");
     }
   }
 
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
+
+  // special case v_mac_{f16, f32}:
+  // it has src2 register operand that is tied to dst operand
+  // we don't allow modifiers for this operand in assembler so src2_modifiers
+  // should be 0
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_si ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F32_e64_vi ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi) {
+    auto it = Inst.begin();
+    std::advance(
+      it,
+      AMDGPU::getNamedOperandIdx(Inst.getOpcode() == AMDGPU::V_MAC_F16_e64_vi ?
+                                     AMDGPU::V_MAC_F16_e64 :
+                                     AMDGPU::V_MAC_F32_e64,
+                                 AMDGPU::OpName::src2_modifiers));
+    it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
+    ++it;
+    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+  }
 }
 
 //===----------------------------------------------------------------------===//
@@ -2455,7 +3235,11 @@ bool AMDGPUOperand::isDPPCtrl() const {
   return false;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+bool AMDGPUOperand::isGPRIdxMode() const {
+  return isImm() && isUInt<4>(getImm());
+}
+
+OperandMatchResultTy
 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
   SMLoc S = Parser.getTok().getLoc();
   StringRef Prefix;
@@ -2469,8 +3253,10 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
 
   if (Prefix == "row_mirror") {
     Int = 0x140;
+    Parser.Lex();
   } else if (Prefix == "row_half_mirror") {
     Int = 0x141;
+    Parser.Lex();
   } else {
     // Check to prevent parseDPPCtrlOps from eating invalid tokens
     if (Prefix != "quad_perm"
@@ -2494,60 +3280,46 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
       Parser.Lex();
       if (getLexer().isNot(AsmToken::LBrac))
         return MatchOperand_ParseFail;
-
       Parser.Lex();
-      if (getLexer().isNot(AsmToken::Integer))
-        return MatchOperand_ParseFail;
-      Int = getLexer().getTok().getIntVal();
 
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Comma))
-        return MatchOperand_ParseFail;
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Integer))
+      if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3))
         return MatchOperand_ParseFail;
-      Int += (getLexer().getTok().getIntVal() << 2);
 
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Comma))
-        return MatchOperand_ParseFail;
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Integer))
-        return MatchOperand_ParseFail;
-      Int += (getLexer().getTok().getIntVal() << 4);
+      for (int i = 0; i < 3; ++i) {
+        if (getLexer().isNot(AsmToken::Comma))
+          return MatchOperand_ParseFail;
+        Parser.Lex();
 
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Comma))
-        return MatchOperand_ParseFail;
-      Parser.Lex();
-      if (getLexer().isNot(AsmToken::Integer))
-        return MatchOperand_ParseFail;
-      Int += (getLexer().getTok().getIntVal() << 6);
+        int64_t Temp;
+        if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3))
+          return MatchOperand_ParseFail;
+        const int shift = i*2 + 2;
+        Int += (Temp << shift);
+      }
 
-      Parser.Lex();
       if (getLexer().isNot(AsmToken::RBrac))
         return MatchOperand_ParseFail;
+      Parser.Lex();
 
     } else {
       // sel:%d
       Parser.Lex();
-      if (getLexer().isNot(AsmToken::Integer))
+      if (getParser().parseAbsoluteExpression(Int))
         return MatchOperand_ParseFail;
-      Int = getLexer().getTok().getIntVal();
 
-      if (Prefix == "row_shl") {
+      if (Prefix == "row_shl" && 1 <= Int && Int <= 15) {
         Int |= 0x100;
-      } else if (Prefix == "row_shr") {
+      } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) {
         Int |= 0x110;
-      } else if (Prefix == "row_ror") {
+      } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) {
         Int |= 0x120;
-      } else if (Prefix == "wave_shl") {
+      } else if (Prefix == "wave_shl" && 1 == Int) {
         Int = 0x130;
-      } else if (Prefix == "wave_rol") {
+      } else if (Prefix == "wave_rol" && 1 == Int) {
         Int = 0x134;
-      } else if (Prefix == "wave_shr") {
+      } else if (Prefix == "wave_shr" && 1 == Int) {
         Int = 0x138;
-      } else if (Prefix == "wave_ror") {
+      } else if (Prefix == "wave_ror" && 1 == Int) {
         Int = 0x13C;
       } else if (Prefix == "row_bcast") {
         if (Int == 15) {
@@ -2562,23 +3334,21 @@ AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
       }
     }
   }
-  Parser.Lex(); // eat last token
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
-                                              AMDGPUOperand::ImmTyDppCtrl));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl));
   return MatchOperand_Success;
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
-  return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
+  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
-  return AMDGPUOperand::CreateImm(0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
+  return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
 }
 
 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
-  return AMDGPUOperand::CreateImm(0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
+  return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
 }
 
 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
@@ -2593,8 +3363,11 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     // Add the register arguments
-    if (Op.isRegOrImmWithInputMods()) {
-      // Only float modifiers supported in DPP
+    if (Op.isReg() && Op.Reg.RegNo == AMDGPU::VCC) {
+      // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token.
+      // Skip it.
+      continue;
+    } if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
     } else if (Op.isDPPCtrl()) {
       Op.addImmOperands(Inst, 1);
@@ -2609,18 +3382,30 @@ void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands) {
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
+
+  // special case v_mac_{f16, f32}:
+  // it has src2 register operand that is tied to dst operand
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_dpp ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F16_dpp) {
+    auto it = Inst.begin();
+    std::advance(
+        it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
+  }
 }
 
 //===----------------------------------------------------------------------===//
 // sdwa
 //===----------------------------------------------------------------------===//
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
                               AMDGPUOperand::ImmTy Type) {
+  using namespace llvm::AMDGPU::SDWA;
+
   SMLoc S = Parser.getTok().getLoc();
   StringRef Value;
-  AMDGPUAsmParser::OperandMatchResultTy res;
+  OperandMatchResultTy res;
 
   res = parseStringWithPrefix(Prefix, Value);
   if (res != MatchOperand_Success) {
@@ -2629,13 +3414,13 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
 
   int64_t Int;
   Int = StringSwitch<int64_t>(Value)
-        .Case("BYTE_0", 0)
-        .Case("BYTE_1", 1)
-        .Case("BYTE_2", 2)
-        .Case("BYTE_3", 3)
-        .Case("WORD_0", 4)
-        .Case("WORD_1", 5)
-        .Case("DWORD", 6)
+        .Case("BYTE_0", SdwaSel::BYTE_0)
+        .Case("BYTE_1", SdwaSel::BYTE_1)
+        .Case("BYTE_2", SdwaSel::BYTE_2)
+        .Case("BYTE_3", SdwaSel::BYTE_3)
+        .Case("WORD_0", SdwaSel::WORD_0)
+        .Case("WORD_1", SdwaSel::WORD_1)
+        .Case("DWORD", SdwaSel::DWORD)
         .Default(0xffffffff);
   Parser.Lex(); // eat last token
 
@@ -2643,15 +3428,17 @@ AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Int, S, Type));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
   return MatchOperand_Success;
 }
 
-AMDGPUAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
+  using namespace llvm::AMDGPU::SDWA;
+
   SMLoc S = Parser.getTok().getLoc();
   StringRef Value;
-  AMDGPUAsmParser::OperandMatchResultTy res;
+  OperandMatchResultTy res;
 
   res = parseStringWithPrefix("dst_unused", Value);
   if (res != MatchOperand_Success) {
@@ -2660,9 +3447,9 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
 
   int64_t Int;
   Int = StringSwitch<int64_t>(Value)
-        .Case("UNUSED_PAD", 0)
-        .Case("UNUSED_SEXT", 1)
-        .Case("UNUSED_PRESERVE", 2)
+        .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
+        .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
+        .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
         .Default(0xffffffff);
   Parser.Lex(); // eat last token
 
@@ -2670,8 +3457,7 @@ AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
     return MatchOperand_ParseFail;
   }
 
-  Operands.push_back(AMDGPUOperand::CreateImm(Int, S,
-                                              AMDGPUOperand::ImmTySdwaDstUnused));
+  Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
   return MatchOperand_Success;
 }
 
@@ -2700,13 +3486,15 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
   for (unsigned E = Operands.size(); I != E; ++I) {
     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
     // Add the register arguments
-    if (BasicInstType == SIInstrFlags::VOPC &&
+    if ((BasicInstType == SIInstrFlags::VOPC || 
+         BasicInstType == SIInstrFlags::VOP2)&&
         Op.isReg() &&
         Op.Reg.RegNo == AMDGPU::VCC) {
-      // VOPC sdwa use "vcc" token as dst. Skip it.
+      // VOPC and VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
+      // Skip it.
       continue;
-    } else if (Op.isRegOrImmWithInputMods()) {
-       Op.addRegOrImmWithInputModsOperands(Inst, 2);
+    } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
+      Op.addRegOrImmWithInputModsOperands(Inst, 2);
     } else if (Op.isImm()) {
       // Handle optional arguments
       OptionalIdx[Op.getImmTy()] = I;
@@ -2716,46 +3504,55 @@ void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
   }
 
   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
-  
-  if (Inst.getOpcode() == AMDGPU::V_NOP_sdwa) {
-    // V_NOP_sdwa has no optional sdwa arguments
-    return;
-  }
-  switch (BasicInstType) {
-  case SIInstrFlags::VOP1: {
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
-    break;
-  }
-  case SIInstrFlags::VOP2: {
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
-    break;
-  }
-  case SIInstrFlags::VOPC: {
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
-    addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
-    break;
+
+  if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
+    // V_NOP_sdwa_vi has no optional sdwa arguments
+    switch (BasicInstType) {
+    case SIInstrFlags::VOP1:
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+      break;
+
+    case SIInstrFlags::VOP2:
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, 2);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+      break;
+
+    case SIInstrFlags::VOPC:
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, 6);
+      addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, 6);
+      break;
+
+    default:
+      llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
+    }
   }
-  default:
-    llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
+
+  // special case v_mac_{f16, f32}:
+  // it has src2 register operand that is tied to dst operand
+  if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
+      Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
+    auto it = Inst.begin();
+    std::advance(
+        it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
+    Inst.insert(it, Inst.getOperand(0)); // src2 = dst
   }
+
 }
 
 /// Force static initialization.
 extern "C" void LLVMInitializeAMDGPUAsmParser() {
-  RegisterMCAsmParser<AMDGPUAsmParser> A(TheAMDGPUTarget);
-  RegisterMCAsmParser<AMDGPUAsmParser> B(TheGCNTarget);
+  RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
+  RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
 }
 
 #define GET_REGISTER_MATCHER
 #define GET_MATCHER_IMPLEMENTATION
 #include "AMDGPUGenAsmMatcher.inc"
 
-
 // This fuction should be defined after auto-generated include so that we have
 // MatchClassKind enum defined
 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
@@ -2776,16 +3573,27 @@ unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
   case MCK_offen:
     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
-  case MCK_SSrc32:
+  case MCK_SSrcB32:
     // When operands have expression values, they will return true for isToken,
     // because it is not possible to distinguish between a token and an
     // expression at parse time. MatchInstructionImpl() will always try to
     // match an operand as a token, when isToken returns true, and when the
     // name of the expression is not a valid token, the match will fail,
     // so we need to handle it here.
-    return Operand.isSSrc32() ? Match_Success : Match_InvalidOperand;
+    return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
+  case MCK_SSrcF32:
+    return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
   case MCK_SoppBrTarget:
     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
-  default: return Match_InvalidOperand;
+  case MCK_VReg32OrOff:
+    return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
+  case MCK_InterpSlot:
+    return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
+  case MCK_Attr:
+    return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
+  case MCK_AttrChan:
+    return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
+  default:
+    return Match_InvalidOperand;
   }
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
new file mode 100644
index 000000000000..45a7fe6d3439
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -0,0 +1,1350 @@
+//===-- BUFInstructions.td - Buffer Instruction Defintions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
+def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
+def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
+
+def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
+def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
+def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
+def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
+def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
+def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
+
+class MubufLoad <SDPatternOperator op> : PatFrag <
+  (ops node:$ptr), (op node:$ptr), [{
+  auto const AS = cast<MemSDNode>(N)->getAddressSpace();
+  return AS == AMDGPUAS::GLOBAL_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
+}]>;
+
+def mubuf_load          : MubufLoad <load>;
+def mubuf_az_extloadi8  : MubufLoad <az_extloadi8>;
+def mubuf_sextloadi8    : MubufLoad <sextloadi8>;
+def mubuf_az_extloadi16 : MubufLoad <az_extloadi16>;
+def mubuf_sextloadi16   : MubufLoad <sextloadi16>;
+def mubuf_load_atomic   : MubufLoad <atomic_load>;
+
+def BUFAddrKind {
+  int Offset = 0;
+  int OffEn  = 1;
+  int IdxEn  = 2;
+  int BothEn = 3;
+  int Addr64 = 4;
+}
+
+class getAddrName<int addrKind> {
+  string ret =
+    !if(!eq(addrKind, BUFAddrKind.Offset), "offset",
+    !if(!eq(addrKind, BUFAddrKind.OffEn),  "offen",
+    !if(!eq(addrKind, BUFAddrKind.IdxEn),  "idxen",
+    !if(!eq(addrKind, BUFAddrKind.BothEn), "bothen",
+    !if(!eq(addrKind, BUFAddrKind.Addr64), "addr64",
+    "")))));
+}
+
+class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
+  bit IsAddr64 = is_addr64;
+  string OpName = NAME # suffix;
+}
+
+//===----------------------------------------------------------------------===//
+// MTBUF classes
+//===----------------------------------------------------------------------===//
+
+class MTBUF_Pseudo <string opName, dag outs, dag ins,
+                    string asmOps, list<dag> pattern=[]> :
+  InstSI<outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let Size = 8;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+  let MTBUF = 1;
+  let Uses = [EXEC];
+
+  let hasSideEffects = 0;
+  let SchedRW = [WriteVMEM];
+}
+
+class MTBUF_Real <MTBUF_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  Enc64 {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let Constraints        = ps.Constraints;
+  let DisableEncoding    = ps.DisableEncoding;
+  let TSFlags            = ps.TSFlags;
+
+  bits<8> vdata;
+  bits<12> offset;
+  bits<1> offen;
+  bits<1> idxen;
+  bits<1> glc;
+  bits<1> addr64;
+  bits<4> dfmt;
+  bits<3> nfmt;
+  bits<8> vaddr;
+  bits<7> srsrc;
+  bits<1> slc;
+  bits<1> tfe;
+  bits<8> soffset;
+
+  let Inst{11-0}  = offset;
+  let Inst{12}    = offen;
+  let Inst{13}    = idxen;
+  let Inst{14}    = glc;
+  let Inst{22-19} = dfmt;
+  let Inst{25-23} = nfmt;
+  let Inst{31-26} = 0x3a; //encoding
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = vdata;
+  let Inst{52-48} = srsrc{6-2};
+  let Inst{54}    = slc;
+  let Inst{55}    = tfe;
+  let Inst{63-56} = soffset;
+}
+
+class MTBUF_Load_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
+  opName, (outs regClass:$dst),
+  (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
+       i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
+       i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
+  " $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
+  " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
+  let mayLoad = 1;
+  let mayStore = 0;
+}
+
+class MTBUF_Store_Pseudo <string opName, RegisterClass regClass> : MTBUF_Pseudo <
+  opName, (outs),
+  (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
+       i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
+       SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_b32:$soffset),
+  " $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"#
+  " $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset"> {
+  let mayLoad = 0;
+  let mayStore = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// MUBUF classes
+//===----------------------------------------------------------------------===//
+
+class MUBUF_Pseudo <string opName, dag outs, dag ins,
+                    string asmOps, list<dag> pattern=[]> :
+  InstSI<outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let Size = 8;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  let VM_CNT = 1;
+  let EXP_CNT = 1;
+  let MUBUF = 1;
+  let Uses = [EXEC];
+  let hasSideEffects = 0;
+  let SchedRW = [WriteVMEM];
+
+  let AsmMatchConverter = "cvtMubuf";
+
+  bits<1> offen       = 0;
+  bits<1> idxen       = 0;
+  bits<1> addr64      = 0;
+  bits<1> has_vdata   = 1;
+  bits<1> has_vaddr   = 1;
+  bits<1> has_glc     = 1;
+  bits<1> glc_value   = 0; // the value for glc if no such operand
+  bits<1> has_srsrc   = 1;
+  bits<1> has_soffset = 1;
+  bits<1> has_offset  = 1;
+  bits<1> has_slc     = 1;
+  bits<1> has_tfe     = 1;
+}
+
+class MUBUF_Real <bits<7> op, MUBUF_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let Constraints        = ps.Constraints;
+  let DisableEncoding    = ps.DisableEncoding;
+  let TSFlags            = ps.TSFlags;
+
+  bits<12> offset;
+  bits<1>  glc;
+  bits<1>  lds = 0;
+  bits<8>  vaddr;
+  bits<8>  vdata;
+  bits<7>  srsrc;
+  bits<1>  slc;
+  bits<1>  tfe;
+  bits<8>  soffset;
+}
+
+
+// For cache invalidation instructions.
+class MUBUF_Invalidate <string opName, SDPatternOperator node> :
+  MUBUF_Pseudo<opName, (outs), (ins), "", [(node)]> {
+
+  let AsmMatchConverter = "";
+
+  let hasSideEffects = 1;
+  let mayStore = 1;
+
+  // Set everything to 0.
+  let offen       = 0;
+  let idxen       = 0;
+  let addr64      = 0;
+  let has_vdata   = 0;
+  let has_vaddr   = 0;
+  let has_glc     = 0;
+  let glc_value   = 0;
+  let has_srsrc   = 0;
+  let has_soffset = 0;
+  let has_offset  = 0;
+  let has_slc     = 0;
+  let has_tfe     = 0;
+}
+
+class getMUBUFInsDA<list<RegisterClass> vdataList,
+                    list<RegisterClass> vaddrList=[]> {
+  RegisterClass vdataClass = !if(!empty(vdataList), ?, !head(vdataList));
+  RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
+  dag InsNoData = !if(!empty(vaddrList),
+    (ins                    SReg_128:$srsrc, SCSrc_b32:$soffset,
+         offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe),
+    (ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
+         offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe)
+  );
+  dag InsData = !if(!empty(vaddrList),
+    (ins vdataClass:$vdata,                    SReg_128:$srsrc,
+         SCSrc_b32:$soffset, offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe),
+    (ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
+         SCSrc_b32:$soffset, offset:$offset, GLC:$glc, slc:$slc, tfe:$tfe)
+  );
+  dag ret = !if(!empty(vdataList), InsNoData, InsData);
+}
+
+class getMUBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
+  dag ret =
+    !if(!eq(addrKind, BUFAddrKind.Offset), getMUBUFInsDA<vdataList>.ret,
+    !if(!eq(addrKind, BUFAddrKind.OffEn),  getMUBUFInsDA<vdataList, [VGPR_32]>.ret,
+    !if(!eq(addrKind, BUFAddrKind.IdxEn),  getMUBUFInsDA<vdataList, [VGPR_32]>.ret,
+    !if(!eq(addrKind, BUFAddrKind.BothEn), getMUBUFInsDA<vdataList, [VReg_64]>.ret,
+    !if(!eq(addrKind, BUFAddrKind.Addr64), getMUBUFInsDA<vdataList, [VReg_64]>.ret,
+    (ins))))));
+}
+
+class getMUBUFAsmOps<int addrKind> {
+  string Pfx =
+    !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $soffset",
+    !if(!eq(addrKind, BUFAddrKind.OffEn),  "$vaddr, $srsrc, $soffset offen",
+    !if(!eq(addrKind, BUFAddrKind.IdxEn),  "$vaddr, $srsrc, $soffset idxen",
+    !if(!eq(addrKind, BUFAddrKind.BothEn), "$vaddr, $srsrc, $soffset idxen offen",
+    !if(!eq(addrKind, BUFAddrKind.Addr64), "$vaddr, $srsrc, $soffset addr64",
+    "")))));
+  string ret = Pfx # "$offset";
+}
+
+class MUBUF_SetupAddr<int addrKind> {
+  bits<1> offen  = !if(!eq(addrKind, BUFAddrKind.OffEn), 1,
+                   !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+
+  bits<1> idxen  = !if(!eq(addrKind, BUFAddrKind.IdxEn), 1,
+                   !if(!eq(addrKind, BUFAddrKind.BothEn), 1 , 0));
+
+  bits<1> addr64 = !if(!eq(addrKind, BUFAddrKind.Addr64), 1, 0);
+
+  bits<1> has_vaddr = !if(!eq(addrKind, BUFAddrKind.Offset), 0, 1);
+}
+
+class MUBUF_Load_Pseudo <string opName,
+                         int addrKind,
+                         RegisterClass vdataClass,
+                         list<dag> pattern=[],
+                         // Workaround bug bz30254
+                         int addrKindCopy = addrKind>
+  : MUBUF_Pseudo<opName,
+                 (outs vdataClass:$vdata),
+                 getMUBUFIns<addrKindCopy>.ret,
+                 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 pattern>,
+    MUBUF_SetupAddr<addrKindCopy> {
+  let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
+  let mayLoad = 1;
+  let mayStore = 0;
+}
+
+// FIXME: tfe can't be an operand because it requires a separate
+// opcode because it needs an N+1 register class dest register.
+multiclass MUBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
+                              ValueType load_vt = i32,
+                              SDPatternOperator ld = null_frag> {
+
+  def _OFFSET : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+    [(set load_vt:$vdata,
+     (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))]>,
+    MUBUFAddr64Table<0>;
+
+  def _ADDR64 : MUBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+    [(set load_vt:$vdata,
+     (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe)))]>,
+    MUBUFAddr64Table<1>;
+
+  def _OFFEN  : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+  def _IDXEN  : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+  def _BOTHEN : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+  let DisableWQM = 1 in {
+    def _OFFSET_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
+    def _OFFEN_exact  : MUBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+    def _IDXEN_exact  : MUBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+    def _BOTHEN_exact : MUBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+  }
+}
+
+class MUBUF_Store_Pseudo <string opName,
+                          int addrKind,
+                          RegisterClass vdataClass,
+                          list<dag> pattern=[],
+                          // Workaround bug bz30254
+                          int addrKindCopy = addrKind,
+                          RegisterClass vdataClassCopy = vdataClass>
+  : MUBUF_Pseudo<opName,
+                 (outs),
+                 getMUBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
+                 " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe",
+                 pattern>,
+    MUBUF_SetupAddr<addrKindCopy> {
+  let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
+  let mayLoad = 0;
+  let mayStore = 1;
+}
+
+multiclass MUBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
+                               ValueType store_vt = i32,
+                               SDPatternOperator st = null_frag> {
+
+  def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+    [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+    MUBUFAddr64Table<0>;
+
+  def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+    [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+                                       i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>,
+    MUBUFAddr64Table<1>;
+
+  def _OFFEN  : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+  def _IDXEN  : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+  def _BOTHEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+  let DisableWQM = 1 in {
+    def _OFFSET_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
+    def _OFFEN_exact  : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
+    def _IDXEN_exact  : MUBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
+    def _BOTHEN_exact : MUBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+  }
+}
+
+
+class getMUBUFAtomicInsDA<RegisterClass vdataClass, bit vdata_in,
+                          list<RegisterClass> vaddrList=[]> {
+  RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
+  dag ret = !if(vdata_in,
+    !if(!empty(vaddrList),
+      (ins vdataClass:$vdata_in,
+           SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc),
+      (ins vdataClass:$vdata_in, vaddrClass:$vaddr,
+           SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc)
+    ),
+    !if(!empty(vaddrList),
+      (ins vdataClass:$vdata,
+           SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc),
+      (ins vdataClass:$vdata, vaddrClass:$vaddr,
+           SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, slc:$slc)
+  ));
+}
+
+class getMUBUFAtomicIns<int addrKind,
+                        RegisterClass vdataClass,
+                        bit vdata_in,
+                        // Workaround bug bz30254
+                        RegisterClass vdataClassCopy=vdataClass> {
+  dag ret =
+    !if(!eq(addrKind, BUFAddrKind.Offset),
+            getMUBUFAtomicInsDA<vdataClassCopy, vdata_in>.ret,
+    !if(!eq(addrKind, BUFAddrKind.OffEn),
+            getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret,
+    !if(!eq(addrKind, BUFAddrKind.IdxEn),
+            getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VGPR_32]>.ret,
+    !if(!eq(addrKind, BUFAddrKind.BothEn),
+            getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret,
+    !if(!eq(addrKind, BUFAddrKind.Addr64),
+            getMUBUFAtomicInsDA<vdataClassCopy, vdata_in, [VReg_64]>.ret,
+    (ins))))));
+}
+
+class MUBUF_Atomic_Pseudo<string opName,
+                          int addrKind,
+                          dag outs,
+                          dag ins,
+                          string asmOps,
+                          list<dag> pattern=[],
+                          // Workaround bug bz30254
+                          int addrKindCopy = addrKind>
+  : MUBUF_Pseudo<opName, outs, ins, asmOps, pattern>,
+    MUBUF_SetupAddr<addrKindCopy> {
+  let mayStore = 1;
+  let mayLoad = 1;
+  let hasPostISelHook = 1;
+  let hasSideEffects = 1;
+  let DisableWQM = 1;
+  let has_glc = 0;
+  let has_tfe = 0;
+}
+
+class MUBUF_AtomicNoRet_Pseudo<string opName, int addrKind,
+                               RegisterClass vdataClass,
+                               list<dag> pattern=[],
+                               // Workaround bug bz30254
+                               int addrKindCopy = addrKind,
+                               RegisterClass vdataClassCopy = vdataClass>
+  : MUBUF_Atomic_Pseudo<opName, addrKindCopy,
+                        (outs),
+                        getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 0>.ret,
+                        " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$slc",
+                        pattern>,
+    AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 0> {
+  let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
+  let glc_value = 0;
+  let AsmMatchConverter = "cvtMubufAtomic";
+}
+
+class MUBUF_AtomicRet_Pseudo<string opName, int addrKind,
+                             RegisterClass vdataClass,
+                             list<dag> pattern=[],
+                             // Workaround bug bz30254
+                             int addrKindCopy = addrKind,
+                             RegisterClass vdataClassCopy = vdataClass>
+  : MUBUF_Atomic_Pseudo<opName, addrKindCopy,
+                        (outs vdataClassCopy:$vdata),
+                        getMUBUFAtomicIns<addrKindCopy, vdataClassCopy, 1>.ret,
+                        " $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # " glc$slc",
+                        pattern>,
+    AtomicNoRet<opName # "_" # getAddrName<addrKindCopy>.ret, 1> {
+  let PseudoInstr = opName # "_rtn_" # getAddrName<addrKindCopy>.ret;
+  let glc_value = 1;
+  let Constraints = "$vdata = $vdata_in";
+  let DisableEncoding = "$vdata_in";
+  let AsmMatchConverter = "cvtMubufAtomicReturn";
+}
+
+multiclass MUBUF_Pseudo_Atomics <string opName,
+                                 RegisterClass vdataClass,
+                                 ValueType vdataType,
+                                 SDPatternOperator atomic> {
+
+  def _OFFSET : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass>,
+                MUBUFAddr64Table <0>;
+  def _ADDR64 : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass>,
+                MUBUFAddr64Table <1>;
+  def _OFFEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>;
+  def _IDXEN  : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>;
+  def _BOTHEN : MUBUF_AtomicNoRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+
+  def _RTN_OFFSET : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
+    [(set vdataType:$vdata,
+     (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$slc),
+             vdataType:$vdata_in))]>,
+    MUBUFAddr64Table <0, "_RTN">;
+
+  def _RTN_ADDR64 : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
+    [(set vdataType:$vdata,
+     (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$slc),
+             vdataType:$vdata_in))]>,
+    MUBUFAddr64Table <1, "_RTN">;
+
+  def _RTN_OFFEN  : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.OffEn,  vdataClass>;
+  def _RTN_IDXEN  : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.IdxEn,  vdataClass>;
+  def _RTN_BOTHEN : MUBUF_AtomicRet_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// MUBUF Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGCN in {
+
+defm BUFFER_LOAD_FORMAT_X : MUBUF_Pseudo_Loads <
+  "buffer_load_format_x", VGPR_32
+>;
+defm BUFFER_LOAD_FORMAT_XY : MUBUF_Pseudo_Loads <
+  "buffer_load_format_xy", VReg_64
+>;
+defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Pseudo_Loads <
+  "buffer_load_format_xyz", VReg_96
+>;
+defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Pseudo_Loads <
+  "buffer_load_format_xyzw", VReg_128
+>;
+defm BUFFER_STORE_FORMAT_X : MUBUF_Pseudo_Stores <
+  "buffer_store_format_x", VGPR_32
+>;
+defm BUFFER_STORE_FORMAT_XY : MUBUF_Pseudo_Stores <
+  "buffer_store_format_xy", VReg_64
+>;
+defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Pseudo_Stores <
+  "buffer_store_format_xyz", VReg_96
+>;
+defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Pseudo_Stores <
+  "buffer_store_format_xyzw", VReg_128
+>;
+defm BUFFER_LOAD_UBYTE : MUBUF_Pseudo_Loads <
+  "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8
+>;
+defm BUFFER_LOAD_SBYTE : MUBUF_Pseudo_Loads <
+  "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8
+>;
+defm BUFFER_LOAD_USHORT : MUBUF_Pseudo_Loads <
+  "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16
+>;
+defm BUFFER_LOAD_SSHORT : MUBUF_Pseudo_Loads <
+  "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16
+>;
+defm BUFFER_LOAD_DWORD : MUBUF_Pseudo_Loads <
+  "buffer_load_dword", VGPR_32, i32, mubuf_load
+>;
+defm BUFFER_LOAD_DWORDX2 : MUBUF_Pseudo_Loads <
+  "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
+>;
+defm BUFFER_LOAD_DWORDX3 : MUBUF_Pseudo_Loads <
+  "buffer_load_dwordx3", VReg_96, untyped, mubuf_load
+>;
+defm BUFFER_LOAD_DWORDX4 : MUBUF_Pseudo_Loads <
+  "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
+>;
+defm BUFFER_STORE_BYTE : MUBUF_Pseudo_Stores <
+  "buffer_store_byte", VGPR_32, i32, truncstorei8_global
+>;
+defm BUFFER_STORE_SHORT : MUBUF_Pseudo_Stores <
+  "buffer_store_short", VGPR_32, i32, truncstorei16_global
+>;
+defm BUFFER_STORE_DWORD : MUBUF_Pseudo_Stores <
+  "buffer_store_dword", VGPR_32, i32, global_store
+>;
+defm BUFFER_STORE_DWORDX2 : MUBUF_Pseudo_Stores <
+  "buffer_store_dwordx2", VReg_64, v2i32, global_store
+>;
+defm BUFFER_STORE_DWORDX3 : MUBUF_Pseudo_Stores <
+  "buffer_store_dwordx3", VReg_96, untyped, global_store
+>;
+defm BUFFER_STORE_DWORDX4 : MUBUF_Pseudo_Stores <
+  "buffer_store_dwordx4", VReg_128, v4i32, global_store
+>;
+defm BUFFER_ATOMIC_SWAP : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
+>;
+defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag
+>;
+defm BUFFER_ATOMIC_ADD : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_add", VGPR_32, i32, atomic_add_global
+>;
+defm BUFFER_ATOMIC_SUB : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
+>;
+defm BUFFER_ATOMIC_SMIN : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
+>;
+defm BUFFER_ATOMIC_UMIN : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
+>;
+defm BUFFER_ATOMIC_SMAX : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
+>;
+defm BUFFER_ATOMIC_UMAX : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
+>;
+defm BUFFER_ATOMIC_AND : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_and", VGPR_32, i32, atomic_and_global
+>;
+defm BUFFER_ATOMIC_OR : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_or", VGPR_32, i32, atomic_or_global
+>;
+defm BUFFER_ATOMIC_XOR : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
+>;
+defm BUFFER_ATOMIC_INC : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global
+>;
+defm BUFFER_ATOMIC_DEC : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global
+>;
+defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global
+>;
+defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
+>;
+defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global
+>;
+defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global
+>;
+defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global
+>;
+defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global
+>;
+defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global
+>;
+defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global
+>;
+defm BUFFER_ATOMIC_AND_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global
+>;
+defm BUFFER_ATOMIC_OR_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global
+>;
+defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global
+>;
+defm BUFFER_ATOMIC_INC_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global
+>;
+defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Pseudo_Atomics <
+  "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
+>;
+
+let SubtargetPredicate = isSI in { // isn't on CI & VI
+/*
+defm BUFFER_ATOMIC_RSUB        : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub">;
+defm BUFFER_ATOMIC_FCMPSWAP    : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap">;
+defm BUFFER_ATOMIC_FMIN        : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin">;
+defm BUFFER_ATOMIC_FMAX        : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax">;
+defm BUFFER_ATOMIC_RSUB_X2     : MUBUF_Pseudo_Atomics <"buffer_atomic_rsub_x2">;
+defm BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_Pseudo_Atomics <"buffer_atomic_fcmpswap_x2">;
+defm BUFFER_ATOMIC_FMIN_X2     : MUBUF_Pseudo_Atomics <"buffer_atomic_fmin_x2">;
+defm BUFFER_ATOMIC_FMAX_X2     : MUBUF_Pseudo_Atomics <"buffer_atomic_fmax_x2">;
+*/
+
+def BUFFER_WBINVL1_SC : MUBUF_Invalidate <"buffer_wbinvl1_sc",
+                                          int_amdgcn_buffer_wbinvl1_sc>;
+}
+
+def BUFFER_WBINVL1 : MUBUF_Invalidate <"buffer_wbinvl1",
+                                       int_amdgcn_buffer_wbinvl1>;
+
+//===----------------------------------------------------------------------===//
+// MTBUF Instructions
+//===----------------------------------------------------------------------===//
+
+//def TBUFFER_LOAD_FORMAT_X    : MTBUF_ <0, "tbuffer_load_format_x", []>;
+//def TBUFFER_LOAD_FORMAT_XY   : MTBUF_ <1, "tbuffer_load_format_xy", []>;
+//def TBUFFER_LOAD_FORMAT_XYZ  : MTBUF_ <2, "tbuffer_load_format_xyz", []>;
+def TBUFFER_LOAD_FORMAT_XYZW  : MTBUF_Load_Pseudo  <"tbuffer_load_format_xyzw", VReg_128>;
+def TBUFFER_STORE_FORMAT_X    : MTBUF_Store_Pseudo <"tbuffer_store_format_x", VGPR_32>;
+def TBUFFER_STORE_FORMAT_XY   : MTBUF_Store_Pseudo <"tbuffer_store_format_xy", VReg_64>;
+def TBUFFER_STORE_FORMAT_XYZ  : MTBUF_Store_Pseudo <"tbuffer_store_format_xyz", VReg_128>;
+def TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Pseudo <"tbuffer_store_format_xyzw", VReg_128>;
+
+} // End let SubtargetPredicate = isGCN
+
+let SubtargetPredicate = isCIVI in {
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for CI and newer.
+//===----------------------------------------------------------------------===//
+// Remaining instructions:
+// BUFFER_LOAD_DWORDX3
+// BUFFER_STORE_DWORDX3
+
+def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
+                                           int_amdgcn_buffer_wbinvl1_vol>;
+
+} // End let SubtargetPredicate = isCIVI
+
+//===----------------------------------------------------------------------===//
+// MUBUF Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isGCN] in {
+
+// int_SI_vs_load_input
+def : Pat<
+  (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
+  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, (i32 0), imm:$attr_offset, 0, 0, 0)
+>;
+
+// Offset in an 32-bit VGPR
+def : Pat <
+  (SIload_constant v4i32:$sbase, i32:$voff),
+  (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, (i32 0), 0, 0, 0, 0)
+>;
+
+
+//===----------------------------------------------------------------------===//
+// buffer_load/store_format patterns
+//===----------------------------------------------------------------------===//
+
+multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
+                                  string opcode> {
+  def : Pat<
+    (vt (name v4i32:$rsrc, 0,
+              (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+              imm:$glc, imm:$slc)),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
+      (as_i1imm $glc), (as_i1imm $slc), 0)
+  >;
+
+  def : Pat<
+    (vt (name v4i32:$rsrc, i32:$vindex,
+              (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+              imm:$glc, imm:$slc)),
+    (!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
+      (as_i1imm $glc), (as_i1imm $slc), 0)
+  >;
+
+  def : Pat<
+    (vt (name v4i32:$rsrc, 0,
+              (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+              imm:$glc, imm:$slc)),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
+      (as_i1imm $glc), (as_i1imm $slc), 0)
+  >;
+
+  def : Pat<
+    (vt (name v4i32:$rsrc, i32:$vindex,
+              (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+              imm:$glc, imm:$slc)),
+    (!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
+      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+      $rsrc, $soffset, (as_i16imm $offset),
+      (as_i1imm $glc), (as_i1imm $slc), 0)
+  >;
+}
+
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, f32, "BUFFER_LOAD_DWORD">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
+defm : MUBUF_LoadIntrinsicPat<SIbuffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
+
+multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
+                                   string opcode> {
+  def : Pat<
+    (name vt:$vdata, v4i32:$rsrc, 0,
+          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+          imm:$glc, imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
+                                    (as_i1imm $glc), (as_i1imm $slc), 0)
+  >;
+
+  def : Pat<
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
+          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+          imm:$glc, imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
+                                   (as_i16imm $offset), (as_i1imm $glc),
+                                   (as_i1imm $slc), 0)
+  >;
+
+  def : Pat<
+    (name vt:$vdata, v4i32:$rsrc, 0,
+          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+          imm:$glc, imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
+                                   (as_i16imm $offset), (as_i1imm $glc),
+                                   (as_i1imm $slc), 0)
+  >;
+
+  def : Pat<
+    (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
+          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+          imm:$glc, imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
+      $vdata,
+      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+      $rsrc, $soffset, (as_i16imm $offset),
+      (as_i1imm $glc), (as_i1imm $slc), 0)
+  >;
+}
+
+defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
+defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
+defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
+defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, f32, "BUFFER_STORE_DWORD">;
+defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
+defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
+
+//===----------------------------------------------------------------------===//
+// buffer_atomic patterns
+//===----------------------------------------------------------------------===//
+
+multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
+  def : Pat<
+    (name i32:$vdata_in, v4i32:$rsrc, 0,
+          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+          imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset,
+                                        (as_i16imm $offset), (as_i1imm $slc))
+  >;
+
+  def : Pat<
+    (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+          imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
+                                       (as_i16imm $offset), (as_i1imm $slc))
+  >;
+
+  def : Pat<
+    (name i32:$vdata_in, v4i32:$rsrc, 0,
+          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+          imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
+                                       (as_i16imm $offset), (as_i1imm $slc))
+  >;
+
+  def : Pat<
+    (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
+          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+          imm:$slc),
+    (!cast<MUBUF_Pseudo>(opcode # _RTN_BOTHEN)
+      $vdata_in,
+      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc))
+  >;
+}
+
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">;
+defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
+
+def : Pat<
+  (int_amdgcn_buffer_atomic_cmpswap
+      i32:$data, i32:$cmp, v4i32:$rsrc, 0,
+      (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+      imm:$slc),
+  (EXTRACT_SUBREG
+    (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET
+      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+    sub0)
+>;
+
+def : Pat<
+  (int_amdgcn_buffer_atomic_cmpswap
+      i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
+      (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
+      imm:$slc),
+  (EXTRACT_SUBREG
+    (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN
+      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+      $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+    sub0)
+>;
+
+def : Pat<
+  (int_amdgcn_buffer_atomic_cmpswap
+      i32:$data, i32:$cmp, v4i32:$rsrc, 0,
+      (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+      imm:$slc),
+  (EXTRACT_SUBREG
+    (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN
+      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+      $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+    sub0)
+>;
+
+def : Pat<
+  (int_amdgcn_buffer_atomic_cmpswap
+      i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
+      (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
+      imm:$slc),
+  (EXTRACT_SUBREG
+    (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN
+      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
+      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
+      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
+    sub0)
+>;
+
+
+class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
+                              PatFrag constant_ld> : Pat <
+     (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+  >;
+
+multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
+                                     ValueType vt, PatFrag atomic_ld> {
+  def : Pat <
+     (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+                                   i16:$offset, i1:$slc))),
+     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
+  >;
+
+  def : Pat <
+    (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
+    (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
+  >;
+}
+
+let Predicates = [isSICI] in {
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
+def : MUBUFLoad_PatternADDR64 <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
+
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
+defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
+} // End Predicates = [isSICI]
+
+multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
+                               PatFrag ld> {
+
+  def : Pat <
+    (vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+                          i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
+    (Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+  >;
+}
+
+let Predicates = [Has16BitInsts] in {
+
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, sextloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, az_extloadi8_constant>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_OFFSET, i16, mubuf_sextloadi8>;
+defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_OFFSET, i16, mubuf_az_extloadi8>;
+
+} // End Predicates = [Has16BitInsts]
+
+class MUBUFScratchLoadPat <MUBUF_Pseudo Instr, ValueType vt, PatFrag ld> : Pat <
+  (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
+                        i32:$soffset, u16imm:$offset))),
+  (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+>;
+
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i16, sextloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i16, extloadi8_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
+def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
+
+// BUFFER_LOAD_DWORD*, addr64=0
+multiclass MUBUF_Load_Dword <ValueType vt,
+                             MUBUF_Pseudo offset,
+                             MUBUF_Pseudo offen,
+                             MUBUF_Pseudo idxen,
+                             MUBUF_Pseudo bothen> {
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
+                                  imm:$offset, 0, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
+            (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 1, 0, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+           (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 0, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
+           (as_i1imm $slc), (as_i1imm $tfe))
+  >;
+
+  def : Pat <
+    (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
+                                  imm:$offset, 1, 1, imm:$glc, imm:$slc,
+                                  imm:$tfe)),
+    (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
+            (as_i1imm $tfe))
+  >;
+}
+
+defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
+                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
+defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
+                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
+defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
+                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
+
+multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
+                                      ValueType vt, PatFrag atomic_st> {
+  // Store follows atomic op convention so address is forst
+  def : Pat <
+     (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
+                                   i16:$offset, i1:$slc), vt:$val),
+     (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
+  >;
+
+  def : Pat <
+    (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
+    (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
+  >;
+}
+let Predicates = [isSICI] in {
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, global_store_atomic>;
+defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, global_store_atomic>;
+} // End Predicates = [isSICI]
+
+
+multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
+                               PatFrag st> {
+
+  def : Pat <
+    (st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
+                                      i16:$offset, i1:$glc, i1:$slc, i1:$tfe)),
+    (Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
+  >;
+}
+
+defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE_OFFSET, i16, truncstorei8_global>;
+defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT_OFFSET, i16, global_store>;
+
+class MUBUFScratchStorePat <MUBUF_Pseudo Instr, ValueType vt, PatFrag st> : Pat <
+  (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
+                               u16imm:$offset)),
+  (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
+>;
+
+def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i16, truncstorei8_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i16, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
+def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
+
+//===----------------------------------------------------------------------===//
+// MTBUF Patterns
+//===----------------------------------------------------------------------===//
+
+// TBUFFER_STORE_FORMAT_*, addr64=0
+class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF_Pseudo opcode> : Pat<
+  (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
+                   i32:$soffset, imm:$inst_offset, imm:$dfmt,
+                   imm:$nfmt, imm:$offen, imm:$idxen,
+                   imm:$glc, imm:$slc, imm:$tfe),
+  (opcode
+    $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
+    (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
+    (as_i1imm $slc), (as_i1imm $tfe), $soffset)
+>;
+
+def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
+def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
+def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
+def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
+
+} // End let Predicates = [isGCN]
+
+//===----------------------------------------------------------------------===//
+// Target instructions, move to the appropriate target TD file
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+class MUBUF_Real_si <bits<7> op, MUBUF_Pseudo ps> :
+  MUBUF_Real<op, ps>,
+  Enc64,
+  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
+  let AssemblerPredicate=isSICI;
+  let DecoderNamespace="SICI";
+
+  let Inst{11-0}  = !if(ps.has_offset, offset, ?);
+  let Inst{12}    = ps.offen;
+  let Inst{13}    = ps.idxen;
+  let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
+  let Inst{15}    = ps.addr64;
+  let Inst{16}    = lds;
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x38; //encoding
+  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+  let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+  let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+  let Inst{54}    = !if(ps.has_slc, slc, ?);
+  let Inst{55}    = !if(ps.has_tfe, tfe, ?);
+  let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
+}
+
+multiclass MUBUF_Real_AllAddr_si<bits<7> op> {
+  def _OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+  def _ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_ADDR64")>;
+  def _OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+  def _IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+  def _BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+
+multiclass MUBUF_Real_Atomic_si<bits<7> op> : MUBUF_Real_AllAddr_si<op> {
+  def _RTN_OFFSET_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_OFFSET")>;
+  def _RTN_ADDR64_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_ADDR64")>;
+  def _RTN_OFFEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_OFFEN")>;
+  def _RTN_IDXEN_si  : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_IDXEN")>;
+  def _RTN_BOTHEN_si : MUBUF_Real_si <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_BOTHEN")>;
+}
+
+defm BUFFER_LOAD_FORMAT_X       : MUBUF_Real_AllAddr_si <0x00>;
+defm BUFFER_LOAD_FORMAT_XY      : MUBUF_Real_AllAddr_si <0x01>;
+defm BUFFER_LOAD_FORMAT_XYZ     : MUBUF_Real_AllAddr_si <0x02>;
+defm BUFFER_LOAD_FORMAT_XYZW    : MUBUF_Real_AllAddr_si <0x03>;
+defm BUFFER_STORE_FORMAT_X      : MUBUF_Real_AllAddr_si <0x04>;
+defm BUFFER_STORE_FORMAT_XY     : MUBUF_Real_AllAddr_si <0x05>;
+defm BUFFER_STORE_FORMAT_XYZ    : MUBUF_Real_AllAddr_si <0x06>;
+defm BUFFER_STORE_FORMAT_XYZW   : MUBUF_Real_AllAddr_si <0x07>;
+defm BUFFER_LOAD_UBYTE          : MUBUF_Real_AllAddr_si <0x08>;
+defm BUFFER_LOAD_SBYTE          : MUBUF_Real_AllAddr_si <0x09>;
+defm BUFFER_LOAD_USHORT         : MUBUF_Real_AllAddr_si <0x0a>;
+defm BUFFER_LOAD_SSHORT         : MUBUF_Real_AllAddr_si <0x0b>;
+defm BUFFER_LOAD_DWORD          : MUBUF_Real_AllAddr_si <0x0c>;
+defm BUFFER_LOAD_DWORDX2        : MUBUF_Real_AllAddr_si <0x0d>;
+defm BUFFER_LOAD_DWORDX4        : MUBUF_Real_AllAddr_si <0x0e>;
+defm BUFFER_LOAD_DWORDX3        : MUBUF_Real_AllAddr_si <0x0f>;
+defm BUFFER_STORE_BYTE          : MUBUF_Real_AllAddr_si <0x18>;
+defm BUFFER_STORE_SHORT         : MUBUF_Real_AllAddr_si <0x1a>;
+defm BUFFER_STORE_DWORD         : MUBUF_Real_AllAddr_si <0x1c>;
+defm BUFFER_STORE_DWORDX2       : MUBUF_Real_AllAddr_si <0x1d>;
+defm BUFFER_STORE_DWORDX4       : MUBUF_Real_AllAddr_si <0x1e>;
+defm BUFFER_STORE_DWORDX3       : MUBUF_Real_AllAddr_si <0x1f>;
+
+defm BUFFER_ATOMIC_SWAP         : MUBUF_Real_Atomic_si <0x30>;
+defm BUFFER_ATOMIC_CMPSWAP      : MUBUF_Real_Atomic_si <0x31>;
+defm BUFFER_ATOMIC_ADD          : MUBUF_Real_Atomic_si <0x32>;
+defm BUFFER_ATOMIC_SUB          : MUBUF_Real_Atomic_si <0x33>;
+//defm BUFFER_ATOMIC_RSUB         : MUBUF_Real_Atomic_si <0x34>;    // isn't on CI & VI
+defm BUFFER_ATOMIC_SMIN         : MUBUF_Real_Atomic_si <0x35>;
+defm BUFFER_ATOMIC_UMIN         : MUBUF_Real_Atomic_si <0x36>;
+defm BUFFER_ATOMIC_SMAX         : MUBUF_Real_Atomic_si <0x37>;
+defm BUFFER_ATOMIC_UMAX         : MUBUF_Real_Atomic_si <0x38>;
+defm BUFFER_ATOMIC_AND          : MUBUF_Real_Atomic_si <0x39>;
+defm BUFFER_ATOMIC_OR           : MUBUF_Real_Atomic_si <0x3a>;
+defm BUFFER_ATOMIC_XOR          : MUBUF_Real_Atomic_si <0x3b>;
+defm BUFFER_ATOMIC_INC          : MUBUF_Real_Atomic_si <0x3c>;
+defm BUFFER_ATOMIC_DEC          : MUBUF_Real_Atomic_si <0x3d>;
+
+//defm BUFFER_ATOMIC_FCMPSWAP     : MUBUF_Real_Atomic_si <0x3e>;    // isn't on VI
+//defm BUFFER_ATOMIC_FMIN         : MUBUF_Real_Atomic_si <0x3f>;    // isn't on VI
+//defm BUFFER_ATOMIC_FMAX         : MUBUF_Real_Atomic_si <0x40>;    // isn't on VI
+defm BUFFER_ATOMIC_SWAP_X2      : MUBUF_Real_Atomic_si <0x50>;
+defm BUFFER_ATOMIC_CMPSWAP_X2   : MUBUF_Real_Atomic_si <0x51>;
+defm BUFFER_ATOMIC_ADD_X2       : MUBUF_Real_Atomic_si <0x52>;
+defm BUFFER_ATOMIC_SUB_X2       : MUBUF_Real_Atomic_si <0x53>;
+//defm BUFFER_ATOMIC_RSUB_X2      : MUBUF_Real_Atomic_si <0x54>;    // isn't on CI & VI
+defm BUFFER_ATOMIC_SMIN_X2      : MUBUF_Real_Atomic_si <0x55>;
+defm BUFFER_ATOMIC_UMIN_X2      : MUBUF_Real_Atomic_si <0x56>;
+defm BUFFER_ATOMIC_SMAX_X2      : MUBUF_Real_Atomic_si <0x57>;
+defm BUFFER_ATOMIC_UMAX_X2      : MUBUF_Real_Atomic_si <0x58>;
+defm BUFFER_ATOMIC_AND_X2       : MUBUF_Real_Atomic_si <0x59>;
+defm BUFFER_ATOMIC_OR_X2        : MUBUF_Real_Atomic_si <0x5a>;
+defm BUFFER_ATOMIC_XOR_X2       : MUBUF_Real_Atomic_si <0x5b>;
+defm BUFFER_ATOMIC_INC_X2       : MUBUF_Real_Atomic_si <0x5c>;
+defm BUFFER_ATOMIC_DEC_X2       : MUBUF_Real_Atomic_si <0x5d>;
+// FIXME: Need to handle hazard for BUFFER_ATOMIC_FCMPSWAP_X2 on CI.
+//defm BUFFER_ATOMIC_FCMPSWAP_X2  : MUBUF_Real_Atomic_si <0x5e">;   // isn't on VI
+//defm BUFFER_ATOMIC_FMIN_X2      : MUBUF_Real_Atomic_si <0x5f>;    // isn't on VI
+//defm BUFFER_ATOMIC_FMAX_X2      : MUBUF_Real_Atomic_si <0x60>;    // isn't on VI
+
+def BUFFER_WBINVL1_SC_si        : MUBUF_Real_si <0x70, BUFFER_WBINVL1_SC>;
+def BUFFER_WBINVL1_si           : MUBUF_Real_si <0x71, BUFFER_WBINVL1>;
+
+class MTBUF_Real_si <bits<3> op, MTBUF_Pseudo ps> :
+  MTBUF_Real<ps>,
+  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI> {
+  let AssemblerPredicate=isSICI;
+  let DecoderNamespace="SICI";
+
+  bits<1> addr64;
+  let Inst{15}    = addr64;
+  let Inst{18-16} = op;
+}
+
+def TBUFFER_LOAD_FORMAT_XYZW_si  : MTBUF_Real_si <3, TBUFFER_LOAD_FORMAT_XYZW>;
+def TBUFFER_STORE_FORMAT_X_si    : MTBUF_Real_si <4, TBUFFER_STORE_FORMAT_X>;
+def TBUFFER_STORE_FORMAT_XY_si   : MTBUF_Real_si <5, TBUFFER_STORE_FORMAT_XY>;
+def TBUFFER_STORE_FORMAT_XYZ_si  : MTBUF_Real_si <6, TBUFFER_STORE_FORMAT_XYZ>;
+def TBUFFER_STORE_FORMAT_XYZW_si : MTBUF_Real_si <7, TBUFFER_STORE_FORMAT_XYZW>;
+
+
+//===----------------------------------------------------------------------===//
+// CI
+//===----------------------------------------------------------------------===//
+
+class MUBUF_Real_ci <bits<7> op, MUBUF_Pseudo ps> :
+  MUBUF_Real_si<op, ps> {
+  let AssemblerPredicate=isCIOnly;
+  let DecoderNamespace="CI";
+}
+
+def BUFFER_WBINVL1_VOL_ci : MUBUF_Real_ci <0x70, BUFFER_WBINVL1_VOL>;
+
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class MUBUF_Real_vi <bits<7> op, MUBUF_Pseudo ps> :
+  MUBUF_Real<op, ps>,
+  Enc64,
+  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
+  let AssemblerPredicate=isVI;
+  let DecoderNamespace="VI";
+
+  let Inst{11-0}  = !if(ps.has_offset, offset, ?);
+  let Inst{12}    = ps.offen;
+  let Inst{13}    = ps.idxen;
+  let Inst{14}    = !if(ps.has_glc, glc, ps.glc_value);
+  let Inst{16}    = lds;
+  let Inst{17}    = !if(ps.has_slc, slc, ?);
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x38; //encoding
+  let Inst{39-32} = !if(ps.has_vaddr, vaddr, ?);
+  let Inst{47-40} = !if(ps.has_vdata, vdata, ?);
+  let Inst{52-48} = !if(ps.has_srsrc, srsrc{6-2}, ?);
+  let Inst{55}    = !if(ps.has_tfe, tfe, ?);
+  let Inst{63-56} = !if(ps.has_soffset, soffset, ?);
+}
+
+multiclass MUBUF_Real_AllAddr_vi<bits<7> op> {
+  def _OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFSET")>;
+  def _OFFEN_vi  : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_OFFEN")>;
+  def _IDXEN_vi  : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_IDXEN")>;
+  def _BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_BOTHEN")>;
+}
+
+multiclass MUBUF_Real_Atomic_vi<bits<7> op> :
+  MUBUF_Real_AllAddr_vi<op> {
+  def _RTN_OFFSET_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_OFFSET")>;
+  def _RTN_OFFEN_vi  : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_OFFEN")>;
+  def _RTN_IDXEN_vi  : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_IDXEN")>;
+  def _RTN_BOTHEN_vi : MUBUF_Real_vi <op, !cast<MUBUF_Pseudo>(NAME#"_RTN_BOTHEN")>;
+}
+
+defm BUFFER_LOAD_FORMAT_X       : MUBUF_Real_AllAddr_vi <0x00>;
+defm BUFFER_LOAD_FORMAT_XY      : MUBUF_Real_AllAddr_vi <0x01>;
+defm BUFFER_LOAD_FORMAT_XYZ     : MUBUF_Real_AllAddr_vi <0x02>;
+defm BUFFER_LOAD_FORMAT_XYZW    : MUBUF_Real_AllAddr_vi <0x03>;
+defm BUFFER_STORE_FORMAT_X      : MUBUF_Real_AllAddr_vi <0x04>;
+defm BUFFER_STORE_FORMAT_XY     : MUBUF_Real_AllAddr_vi <0x05>;
+defm BUFFER_STORE_FORMAT_XYZ    : MUBUF_Real_AllAddr_vi <0x06>;
+defm BUFFER_STORE_FORMAT_XYZW   : MUBUF_Real_AllAddr_vi <0x07>;
+defm BUFFER_LOAD_UBYTE          : MUBUF_Real_AllAddr_vi <0x10>;
+defm BUFFER_LOAD_SBYTE          : MUBUF_Real_AllAddr_vi <0x11>;
+defm BUFFER_LOAD_USHORT         : MUBUF_Real_AllAddr_vi <0x12>;
+defm BUFFER_LOAD_SSHORT         : MUBUF_Real_AllAddr_vi <0x13>;
+defm BUFFER_LOAD_DWORD          : MUBUF_Real_AllAddr_vi <0x14>;
+defm BUFFER_LOAD_DWORDX2        : MUBUF_Real_AllAddr_vi <0x15>;
+defm BUFFER_LOAD_DWORDX3        : MUBUF_Real_AllAddr_vi <0x16>;
+defm BUFFER_LOAD_DWORDX4        : MUBUF_Real_AllAddr_vi <0x17>;
+defm BUFFER_STORE_BYTE          : MUBUF_Real_AllAddr_vi <0x18>;
+defm BUFFER_STORE_SHORT         : MUBUF_Real_AllAddr_vi <0x1a>;
+defm BUFFER_STORE_DWORD         : MUBUF_Real_AllAddr_vi <0x1c>;
+defm BUFFER_STORE_DWORDX2       : MUBUF_Real_AllAddr_vi <0x1d>;
+defm BUFFER_STORE_DWORDX3       : MUBUF_Real_AllAddr_vi <0x1e>;
+defm BUFFER_STORE_DWORDX4       : MUBUF_Real_AllAddr_vi <0x1f>;
+
+defm BUFFER_ATOMIC_SWAP         : MUBUF_Real_Atomic_vi <0x40>;
+defm BUFFER_ATOMIC_CMPSWAP      : MUBUF_Real_Atomic_vi <0x41>;
+defm BUFFER_ATOMIC_ADD          : MUBUF_Real_Atomic_vi <0x42>;
+defm BUFFER_ATOMIC_SUB          : MUBUF_Real_Atomic_vi <0x43>;
+defm BUFFER_ATOMIC_SMIN         : MUBUF_Real_Atomic_vi <0x44>;
+defm BUFFER_ATOMIC_UMIN         : MUBUF_Real_Atomic_vi <0x45>;
+defm BUFFER_ATOMIC_SMAX         : MUBUF_Real_Atomic_vi <0x46>;
+defm BUFFER_ATOMIC_UMAX         : MUBUF_Real_Atomic_vi <0x47>;
+defm BUFFER_ATOMIC_AND          : MUBUF_Real_Atomic_vi <0x48>;
+defm BUFFER_ATOMIC_OR           : MUBUF_Real_Atomic_vi <0x49>;
+defm BUFFER_ATOMIC_XOR          : MUBUF_Real_Atomic_vi <0x4a>;
+defm BUFFER_ATOMIC_INC          : MUBUF_Real_Atomic_vi <0x4b>;
+defm BUFFER_ATOMIC_DEC          : MUBUF_Real_Atomic_vi <0x4c>;
+
+defm BUFFER_ATOMIC_SWAP_X2      : MUBUF_Real_Atomic_vi <0x60>;
+defm BUFFER_ATOMIC_CMPSWAP_X2   : MUBUF_Real_Atomic_vi <0x61>;
+defm BUFFER_ATOMIC_ADD_X2       : MUBUF_Real_Atomic_vi <0x62>;
+defm BUFFER_ATOMIC_SUB_X2       : MUBUF_Real_Atomic_vi <0x63>;
+defm BUFFER_ATOMIC_SMIN_X2      : MUBUF_Real_Atomic_vi <0x64>;
+defm BUFFER_ATOMIC_UMIN_X2      : MUBUF_Real_Atomic_vi <0x65>;
+defm BUFFER_ATOMIC_SMAX_X2      : MUBUF_Real_Atomic_vi <0x66>;
+defm BUFFER_ATOMIC_UMAX_X2      : MUBUF_Real_Atomic_vi <0x67>;
+defm BUFFER_ATOMIC_AND_X2       : MUBUF_Real_Atomic_vi <0x68>;
+defm BUFFER_ATOMIC_OR_X2        : MUBUF_Real_Atomic_vi <0x69>;
+defm BUFFER_ATOMIC_XOR_X2       : MUBUF_Real_Atomic_vi <0x6a>;
+defm BUFFER_ATOMIC_INC_X2       : MUBUF_Real_Atomic_vi <0x6b>;
+defm BUFFER_ATOMIC_DEC_X2       : MUBUF_Real_Atomic_vi <0x6c>;
+
+def BUFFER_WBINVL1_vi           : MUBUF_Real_vi <0x3e, BUFFER_WBINVL1>;
+def BUFFER_WBINVL1_VOL_vi       : MUBUF_Real_vi <0x3f, BUFFER_WBINVL1_VOL>;
+
+class MTBUF_Real_vi <bits<4> op, MTBUF_Pseudo ps> :
+  MTBUF_Real<ps>,
+  SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI> {
+  let AssemblerPredicate=isVI;
+  let DecoderNamespace="VI";
+
+  let Inst{18-15} = op;
+}
+
+def TBUFFER_LOAD_FORMAT_XYZW_vi  : MTBUF_Real_vi <3, TBUFFER_LOAD_FORMAT_XYZW>;
+def TBUFFER_STORE_FORMAT_X_vi    : MTBUF_Real_vi <4, TBUFFER_STORE_FORMAT_X>;
+def TBUFFER_STORE_FORMAT_XY_vi   : MTBUF_Real_vi <5, TBUFFER_STORE_FORMAT_XY>;
+def TBUFFER_STORE_FORMAT_XYZ_vi  : MTBUF_Real_vi <6, TBUFFER_STORE_FORMAT_XYZ>;
+def TBUFFER_STORE_FORMAT_XYZW_vi : MTBUF_Real_vi <7, TBUFFER_STORE_FORMAT_XYZW>;
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
index f9a9f79126bd..26a483a8abf6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CIInstructions.td
@@ -12,338 +12,4 @@
 // S_CBRANCH_CDBGUSER
 // S_CBRANCH_CDBGSYS
 // S_CBRANCH_CDBGSYS_OR_USER
-// S_CBRANCH_CDBGSYS_AND_USER
-// DS_NOP
-// DS_GWS_SEMA_RELEASE_ALL
-// DS_WRAP_RTN_B32
-// DS_CNDXCHG32_RTN_B64
-// DS_WRITE_B96
-// DS_WRITE_B128
-// DS_CONDXCHG32_RTN_B128
-// DS_READ_B96
-// DS_READ_B128
-// BUFFER_LOAD_DWORDX3
-// BUFFER_STORE_DWORDX3
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let SubtargetPredicate = isCIVI in {
-
-let SchedRW = [WriteDoubleAdd] in {
-defm V_TRUNC_F64 : VOP1Inst <vop1<0x17>, "v_trunc_f64",
-  VOP_F64_F64, ftrunc
->;
-defm V_CEIL_F64 : VOP1Inst <vop1<0x18>, "v_ceil_f64",
-  VOP_F64_F64, fceil
->;
-defm V_FLOOR_F64 : VOP1Inst <vop1<0x1A>, "v_floor_f64",
-  VOP_F64_F64, ffloor
->;
-defm V_RNDNE_F64 : VOP1Inst <vop1<0x19>, "v_rndne_f64",
-  VOP_F64_F64, frint
->;
-} // End SchedRW = [WriteDoubleAdd]
-
-let SchedRW = [WriteQuarterRate32] in {
-defm V_LOG_LEGACY_F32 : VOP1Inst <vop1<0x45, 0x4c>, "v_log_legacy_f32",
-  VOP_F32_F32
->;
-defm V_EXP_LEGACY_F32 : VOP1Inst <vop1<0x46, 0x4b>, "v_exp_legacy_f32",
-  VOP_F32_F32
->;
-} // End SchedRW = [WriteQuarterRate32]
-
-//===----------------------------------------------------------------------===//
-// VOP3 Instructions
-//===----------------------------------------------------------------------===//
-
-defm V_QSAD_PK_U16_U8 : VOP3Inst <vop3<0x173>, "v_qsad_pk_u16_u8",
-  VOP_I32_I32_I32
->;
-defm V_MQSAD_U16_U8 : VOP3Inst <vop3<0x172>, "v_mqsad_u16_u8",
-  VOP_I32_I32_I32
->;
-defm V_MQSAD_U32_U8 : VOP3Inst <vop3<0x175>, "v_mqsad_u32_u8",
-  VOP_I32_I32_I32
->;
-
-let isCommutable = 1 in {
-defm V_MAD_U64_U32 : VOP3Inst <vop3<0x176>, "v_mad_u64_u32",
-  VOP_I64_I32_I32_I64
->;
-
-// XXX - Does this set VCC?
-defm V_MAD_I64_I32 : VOP3Inst <vop3<0x177>, "v_mad_i64_i32",
-  VOP_I64_I32_I32_I64
->;
-} // End isCommutable = 1
-
-
-//===----------------------------------------------------------------------===//
-// DS Instructions
-//===----------------------------------------------------------------------===//
-defm DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "ds_wrap_rtn_f32", VGPR_32, "ds_wrap_f32">;
-
-// DS_CONDXCHG32_RTN_B64
-// DS_CONDXCHG32_RTN_B128
-
-//===----------------------------------------------------------------------===//
-// SMRD Instructions
-//===----------------------------------------------------------------------===//
-
-defm S_DCACHE_INV_VOL : SMRD_Inval <smrd<0x1d, 0x22>,
-  "s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
-
-//===----------------------------------------------------------------------===//
-// MUBUF Instructions
-//===----------------------------------------------------------------------===//
-
-let DisableSIDecoder = 1 in {
-defm BUFFER_WBINVL1_VOL : MUBUF_Invalidate <mubuf<0x70, 0x3f>,
-  "buffer_wbinvl1_vol", int_amdgcn_buffer_wbinvl1_vol
->;
-}
-
-//===----------------------------------------------------------------------===//
-// Flat Instructions
-//===----------------------------------------------------------------------===//
-
-defm FLAT_LOAD_UBYTE : FLAT_Load_Helper <
-  flat<0x8, 0x10>, "flat_load_ubyte", VGPR_32
->;
-defm FLAT_LOAD_SBYTE : FLAT_Load_Helper <
-  flat<0x9, 0x11>, "flat_load_sbyte", VGPR_32
->;
-defm FLAT_LOAD_USHORT : FLAT_Load_Helper <
-  flat<0xa, 0x12>, "flat_load_ushort", VGPR_32
->;
-defm FLAT_LOAD_SSHORT : FLAT_Load_Helper <
-  flat<0xb, 0x13>, "flat_load_sshort", VGPR_32>
-;
-defm FLAT_LOAD_DWORD : FLAT_Load_Helper <
-  flat<0xc, 0x14>, "flat_load_dword", VGPR_32
->;
-defm FLAT_LOAD_DWORDX2 : FLAT_Load_Helper <
-  flat<0xd, 0x15>, "flat_load_dwordx2", VReg_64
->;
-defm FLAT_LOAD_DWORDX4 : FLAT_Load_Helper <
-  flat<0xe, 0x17>, "flat_load_dwordx4", VReg_128
->;
-defm FLAT_LOAD_DWORDX3 : FLAT_Load_Helper <
-  flat<0xf, 0x16>, "flat_load_dwordx3", VReg_96
->;
-defm FLAT_STORE_BYTE : FLAT_Store_Helper <
-  flat<0x18>, "flat_store_byte", VGPR_32
->;
-defm FLAT_STORE_SHORT : FLAT_Store_Helper <
-  flat <0x1a>, "flat_store_short", VGPR_32
->;
-defm FLAT_STORE_DWORD : FLAT_Store_Helper <
-  flat<0x1c>, "flat_store_dword", VGPR_32
->;
-defm FLAT_STORE_DWORDX2 : FLAT_Store_Helper <
-  flat<0x1d>, "flat_store_dwordx2", VReg_64
->;
-defm FLAT_STORE_DWORDX4 : FLAT_Store_Helper <
-  flat<0x1e, 0x1f>, "flat_store_dwordx4", VReg_128
->;
-defm FLAT_STORE_DWORDX3 : FLAT_Store_Helper <
-  flat<0x1f, 0x1e>, "flat_store_dwordx3", VReg_96
->;
-defm FLAT_ATOMIC_SWAP : FLAT_ATOMIC <
-  flat<0x30, 0x40>, "flat_atomic_swap", VGPR_32, i32, atomic_swap_flat
->;
-defm FLAT_ATOMIC_CMPSWAP : FLAT_ATOMIC <
-  flat<0x31, 0x41>, "flat_atomic_cmpswap", VGPR_32, i32,
-    atomic_cmp_swap_flat, v2i32, VReg_64
->;
-defm FLAT_ATOMIC_ADD : FLAT_ATOMIC <
-  flat<0x32, 0x42>, "flat_atomic_add", VGPR_32, i32, atomic_add_flat
->;
-defm FLAT_ATOMIC_SUB : FLAT_ATOMIC <
-  flat<0x33, 0x43>, "flat_atomic_sub", VGPR_32, i32, atomic_sub_flat
->;
-defm FLAT_ATOMIC_SMIN : FLAT_ATOMIC <
-  flat<0x35, 0x44>, "flat_atomic_smin", VGPR_32, i32, atomic_min_flat
->;
-defm FLAT_ATOMIC_UMIN : FLAT_ATOMIC <
-  flat<0x36, 0x45>, "flat_atomic_umin", VGPR_32, i32, atomic_umin_flat
->;
-defm FLAT_ATOMIC_SMAX : FLAT_ATOMIC <
-  flat<0x37, 0x46>, "flat_atomic_smax", VGPR_32, i32, atomic_max_flat
->;
-defm FLAT_ATOMIC_UMAX : FLAT_ATOMIC <
-  flat<0x38, 0x47>, "flat_atomic_umax", VGPR_32, i32, atomic_umax_flat
->;
-defm FLAT_ATOMIC_AND : FLAT_ATOMIC <
-  flat<0x39, 0x48>, "flat_atomic_and", VGPR_32, i32, atomic_and_flat
->;
-defm FLAT_ATOMIC_OR : FLAT_ATOMIC <
-  flat<0x3a, 0x49>, "flat_atomic_or", VGPR_32, i32, atomic_or_flat
->;
-defm FLAT_ATOMIC_XOR : FLAT_ATOMIC <
-  flat<0x3b, 0x4a>, "flat_atomic_xor", VGPR_32, i32, atomic_xor_flat
->;
-defm FLAT_ATOMIC_INC : FLAT_ATOMIC <
-  flat<0x3c, 0x4b>, "flat_atomic_inc", VGPR_32, i32, atomic_inc_flat
->;
-defm FLAT_ATOMIC_DEC : FLAT_ATOMIC <
-  flat<0x3d, 0x4c>, "flat_atomic_dec", VGPR_32, i32, atomic_dec_flat
->;
-defm FLAT_ATOMIC_SWAP_X2 : FLAT_ATOMIC <
-  flat<0x50, 0x60>, "flat_atomic_swap_x2", VReg_64, i64, atomic_swap_flat
->;
-defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_ATOMIC <
-  flat<0x51, 0x61>, "flat_atomic_cmpswap_x2", VReg_64, i64,
-    atomic_cmp_swap_flat, v2i64, VReg_128
->;
-defm FLAT_ATOMIC_ADD_X2 : FLAT_ATOMIC <
-  flat<0x52, 0x62>, "flat_atomic_add_x2", VReg_64, i64, atomic_add_flat
->;
-defm FLAT_ATOMIC_SUB_X2 : FLAT_ATOMIC <
-  flat<0x53, 0x63>, "flat_atomic_sub_x2", VReg_64, i64, atomic_sub_flat
->;
-defm FLAT_ATOMIC_SMIN_X2 : FLAT_ATOMIC <
-  flat<0x55, 0x64>, "flat_atomic_smin_x2", VReg_64, i64, atomic_min_flat
->;
-defm FLAT_ATOMIC_UMIN_X2 : FLAT_ATOMIC <
-  flat<0x56, 0x65>, "flat_atomic_umin_x2", VReg_64, i64, atomic_umin_flat
->;
-defm FLAT_ATOMIC_SMAX_X2 : FLAT_ATOMIC <
-  flat<0x57, 0x66>, "flat_atomic_smax_x2", VReg_64, i64, atomic_max_flat
->;
-defm FLAT_ATOMIC_UMAX_X2 : FLAT_ATOMIC <
-  flat<0x58, 0x67>, "flat_atomic_umax_x2", VReg_64, i64, atomic_umax_flat
->;
-defm FLAT_ATOMIC_AND_X2 : FLAT_ATOMIC <
-  flat<0x59, 0x68>, "flat_atomic_and_x2", VReg_64, i64, atomic_and_flat
->;
-defm FLAT_ATOMIC_OR_X2 : FLAT_ATOMIC <
-  flat<0x5a, 0x69>, "flat_atomic_or_x2", VReg_64, i64, atomic_or_flat
->;
-defm FLAT_ATOMIC_XOR_X2 : FLAT_ATOMIC <
-  flat<0x5b, 0x6a>, "flat_atomic_xor_x2", VReg_64, i64, atomic_xor_flat
->;
-defm FLAT_ATOMIC_INC_X2 : FLAT_ATOMIC <
-  flat<0x5c, 0x6b>, "flat_atomic_inc_x2", VReg_64, i64, atomic_inc_flat
->;
-defm FLAT_ATOMIC_DEC_X2 : FLAT_ATOMIC <
-  flat<0x5d, 0x6c>, "flat_atomic_dec_x2", VReg_64, i64, atomic_dec_flat
->;
-
-} // End SubtargetPredicate = isCIVI
-
-// CI Only flat instructions
-
-let SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1 in {
-
-defm FLAT_ATOMIC_FCMPSWAP : FLAT_ATOMIC <
-  flat<0x3e>, "flat_atomic_fcmpswap", VGPR_32, f32,
-    null_frag, v2f32, VReg_64
->;
-defm FLAT_ATOMIC_FMIN : FLAT_ATOMIC <
-  flat<0x3f>, "flat_atomic_fmin", VGPR_32, f32
->;
-defm FLAT_ATOMIC_FMAX : FLAT_ATOMIC <
-  flat<0x40>, "flat_atomic_fmax", VGPR_32, f32
->;
-defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_ATOMIC <
-  flat<0x5e>, "flat_atomic_fcmpswap_x2", VReg_64, f64,
-  null_frag, v2f64, VReg_128
->;
-defm FLAT_ATOMIC_FMIN_X2 : FLAT_ATOMIC <
-  flat<0x5f>, "flat_atomic_fmin_x2", VReg_64, f64
->;
-defm FLAT_ATOMIC_FMAX_X2 : FLAT_ATOMIC <
-  flat<0x60>, "flat_atomic_fmax_x2", VReg_64, f64
->;
-
-} // End SubtargetPredicate = isCI, VIAssemblerPredicate = DisableInst, DisableVIDecoder = 1
-
-//===----------------------------------------------------------------------===//
-// Flat Patterns
-//===----------------------------------------------------------------------===//
-
-let Predicates = [isCIVI] in {
-
-// Patterns for global loads with no offset.
-class FlatLoadPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  (vt (node i64:$addr)),
-  (inst $addr, 0, 0, 0)
->;
-
-class FlatLoadAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  (vt (node i64:$addr)),
-  (inst $addr, 1, 0, 0)
->;
-
-def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
-def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
-def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
-def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
-def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
-
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
-def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
-
-
-class FlatStorePat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  (node vt:$data, i64:$addr),
-  (inst $addr, $data, 0, 0, 0)
->;
-
-class FlatStoreAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt> : Pat <
-  // atomic store follows atomic binop convention so the address comes
-  // first.
-  (node i64:$addr, vt:$data),
-  (inst $addr, $data, 1, 0, 0)
->;
-
-def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
-def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
-def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
-def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
-
-def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
-def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
-
-class FlatAtomicPat <FLAT inst, SDPatternOperator node, ValueType vt,
-                     ValueType data_vt = vt> : Pat <
-  (vt (node i64:$addr, data_vt:$data)),
-  (inst $addr, $data, 0, 0)
->;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, atomic_cmp_swap_global, i32, v2i32>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
-
-def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, atomic_cmp_swap_global, i64, v2i64>;
-def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
-
-} // End Predicates = [isCIVI]
+// S_CBRANCH_CDBGSYS_AND_USER
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
index 98bc6e856ea2..6b8e85a73c73 100644
--- a/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/CaymanInstructions.td
@@ -37,6 +37,9 @@ def MULLO_INT_cm : MULLO_INT_Common<0x8F>;
 def MULHI_INT_cm : MULHI_INT_Common<0x90>;
 def MULLO_UINT_cm : MULLO_UINT_Common<0x91>;
 def MULHI_UINT_cm : MULHI_UINT_Common<0x92>;
+def MULHI_INT_cm24 : MULHI_INT24_Common<0x5c>;
+def MULHI_UINT_cm24 : MULHI_UINT24_Common<0xb2>;
+
 def RECIPSQRT_CLAMPED_cm : RECIPSQRT_CLAMPED_Common<0x87>;
 def EXP_IEEE_cm : EXP_IEEE_Common<0x81>;
 def LOG_IEEE_cm : LOG_IEEE_Common<0x83>;
@@ -85,14 +88,13 @@ def RAT_STORE_TYPED_cm: CF_MEM_RAT_STORE_TYPED<0> {
   let eop = 0; // This bit is not used on Cayman.
 }
 
-class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
-    : VTX_WORD0_cm, VTX_READ<name, buffer_id, outs, pattern> {
+class VTX_READ_cm <string name, dag outs>
+    : VTX_WORD0_cm, VTX_READ<name, outs, []> {
 
   // Static fields
   let VC_INST = 0;
   let FETCH_TYPE = 2;
   let FETCH_WHOLE_QUAD = 0;
-  let BUFFER_ID = buffer_id;
   let SRC_REL = 0;
   // XXX: We can infer this field based on the SRC_GPR.  This would allow us
   // to store vertex addresses in any channel, not just X.
@@ -105,9 +107,9 @@ class VTX_READ_cm <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   let Inst{31-0} = Word0;
 }
 
-class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+def VTX_READ_8_cm
+    : VTX_READ_cm <"VTX_READ_8 $dst_gpr, $src_gpr",
+                   (outs R600_TReg32_X:$dst_gpr)> {
 
   let DST_SEL_X = 0;
   let DST_SEL_Y = 7;   // Masked
@@ -116,9 +118,9 @@ class VTX_READ_8_cm <bits<8> buffer_id, list<dag> pattern>
   let DATA_FORMAT = 1; // FMT_8
 }
 
-class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+def VTX_READ_16_cm
+    : VTX_READ_cm <"VTX_READ_16 $dst_gpr, $src_gpr",
+                   (outs R600_TReg32_X:$dst_gpr)> {
   let DST_SEL_X = 0;
   let DST_SEL_Y = 7;   // Masked
   let DST_SEL_Z = 7;   // Masked
@@ -127,9 +129,9 @@ class VTX_READ_16_cm <bits<8> buffer_id, list<dag> pattern>
 
 }
 
-class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+def VTX_READ_32_cm
+    : VTX_READ_cm <"VTX_READ_32 $dst_gpr, $src_gpr",
+                   (outs R600_TReg32_X:$dst_gpr)> {
 
   let DST_SEL_X        = 0;
   let DST_SEL_Y        = 7;   // Masked
@@ -147,9 +149,9 @@ class VTX_READ_32_cm <bits<8> buffer_id, list<dag> pattern>
   let Constraints = "$src_gpr.ptr = $dst_gpr";
 }
 
-class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_cm <"VTX_READ_64 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_Reg64:$dst_gpr), pattern> {
+def VTX_READ_64_cm
+    : VTX_READ_cm <"VTX_READ_64 $dst_gpr.XY, $src_gpr",
+                   (outs R600_Reg64:$dst_gpr)> {
 
   let DST_SEL_X        = 0;
   let DST_SEL_Y        = 1;
@@ -158,9 +160,9 @@ class VTX_READ_64_cm <bits<8> buffer_id, list<dag> pattern>
   let DATA_FORMAT      = 0x1D; // COLOR_32_32
 }
 
-class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
-                   (outs R600_Reg128:$dst_gpr), pattern> {
+def VTX_READ_128_cm
+    : VTX_READ_cm <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr",
+                   (outs R600_Reg128:$dst_gpr)> {
 
   let DST_SEL_X        =  0;
   let DST_SEL_Y        =  1;
@@ -177,79 +179,44 @@ class VTX_READ_128_cm <bits<8> buffer_id, list<dag> pattern>
 //===----------------------------------------------------------------------===//
 // VTX Read from parameter memory space
 //===----------------------------------------------------------------------===//
-def VTX_READ_PARAM_8_cm : VTX_READ_8_cm <0,
-  [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_16_cm : VTX_READ_16_cm <0,
-  [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_32_cm : VTX_READ_32_cm <0,
-  [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_64_cm : VTX_READ_64_cm <0,
-  [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
+def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_8_cm MEMxi:$src_gpr, 3)>;
+def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_16_cm MEMxi:$src_gpr, 3)>;
+def : Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_32_cm MEMxi:$src_gpr, 3)>;
+def : Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_64_cm MEMxi:$src_gpr, 3)>;
+def : Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_128_cm MEMxi:$src_gpr, 3)>;
 
-def VTX_READ_PARAM_128_cm : VTX_READ_128_cm <0,
-  [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
+//===----------------------------------------------------------------------===//
+// VTX Read from constant memory space
+//===----------------------------------------------------------------------===//
+def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_8_cm MEMxi:$src_gpr, 2)>;
+def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_16_cm MEMxi:$src_gpr, 2)>;
+def : Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_32_cm MEMxi:$src_gpr, 2)>;
+def : Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_64_cm MEMxi:$src_gpr, 2)>;
+def : Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_128_cm MEMxi:$src_gpr, 2)>;
 
 //===----------------------------------------------------------------------===//
 // VTX Read from global memory space
 //===----------------------------------------------------------------------===//
-
-// 8-bit reads
-def VTX_READ_ID1_8_cm : VTX_READ_8_cm <1,
-  [(set i32:$dst_gpr, (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr))]
->;
-
-// 16-bit reads
-def VTX_READ_ID1_16_cm : VTX_READ_16_cm <1,
-  [(set i32:$dst_gpr, (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr))]
->;
-
-// 32-bit reads
-def VTX_READ_ID1_32_cm : VTX_READ_32_cm <1,
-  [(set i32:$dst_gpr, (vtx_id1_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 64-bit reads
-def VTX_READ_ID1_64_cm : VTX_READ_64_cm <1,
-  [(set v2i32:$dst_gpr, (vtx_id1_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 128-bit reads
-def VTX_READ_ID1_128_cm : VTX_READ_128_cm <1,
-  [(set v4i32:$dst_gpr, (vtx_id1_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 8-bit reads
-def VTX_READ_ID2_8_cm : VTX_READ_8_cm <2,
-  [(set i32:$dst_gpr, (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr))]
->;
-
-// 16-bit reads
-def VTX_READ_ID2_16_cm : VTX_READ_16_cm <2,
-  [(set i32:$dst_gpr, (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr))]
->;
-
-// 32-bit reads
-def VTX_READ_ID2_32_cm : VTX_READ_32_cm <2,
-  [(set i32:$dst_gpr, (vtx_id2_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 64-bit reads
-def VTX_READ_ID2_64_cm : VTX_READ_64_cm <2,
-  [(set v2i32:$dst_gpr, (vtx_id2_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 128-bit reads
-def VTX_READ_ID2_128_cm : VTX_READ_128_cm <2,
-  [(set v4i32:$dst_gpr, (vtx_id2_load ADDRVTX_READ:$src_gpr))]
->;
+def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_8_cm MEMxi:$src_gpr, 1)>;
+def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_16_cm MEMxi:$src_gpr, 1)>;
+def : Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_32_cm MEMxi:$src_gpr, 1)>;
+def : Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_64_cm MEMxi:$src_gpr, 1)>;
+def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_128_cm MEMxi:$src_gpr, 1)>;
 
 } // End isCayman
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
new file mode 100644
index 000000000000..a077001df6bd
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -0,0 +1,906 @@
+//===-- DSInstructions.td - DS Instruction Defintions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class DS_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
+  InstSI <outs, ins, "", pattern>,
+  SIMCInstr <opName, SIEncodingFamily.NONE> {
+
+  let SubtargetPredicate = isGCN;
+
+  let LGKM_CNT = 1;
+  let DS = 1;
+  let Size = 8;
+  let UseNamedOperandTable = 1;
+  let Uses = [M0, EXEC];
+
+  // Most instruction load and store data, so set this as the default.
+  let mayLoad = 1;
+  let mayStore = 1;
+
+  let hasSideEffects = 0;
+  let SchedRW = [WriteLDS];
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+
+  let AsmMatchConverter = "cvtDS";
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  // Well these bits a kind of hack because it would be more natural
+  // to test "outs" and "ins" dags for the presence of particular operands
+  bits<1> has_vdst = 1;
+  bits<1> has_addr = 1;
+  bits<1> has_data0 = 1;
+  bits<1> has_data1 = 1;
+
+  bits<1> has_offset  = 1; // has "offset" that should be split to offset0,1
+  bits<1> has_offset0 = 1;
+  bits<1> has_offset1 = 1;
+
+  bits<1> has_gds = 1;
+  bits<1> gdsValue = 0; // if has_gds == 0 set gds to this value
+}
+
+class DS_Real <DS_Pseudo ds> :
+  InstSI <ds.OutOperandList, ds.InOperandList, ds.Mnemonic # " " # ds.AsmOperands, []>,
+  Enc64 {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ds.SubtargetPredicate;
+  let AsmMatchConverter  = ds.AsmMatchConverter;
+
+  // encoding fields
+  bits<8> vdst;
+  bits<1> gds;
+  bits<8> addr;
+  bits<8> data0;
+  bits<8> data1;
+  bits<8> offset0;
+  bits<8> offset1;
+
+  bits<16> offset;
+  let offset0 = !if(ds.has_offset, offset{7-0}, ?);
+  let offset1 = !if(ds.has_offset, offset{15-8}, ?);
+}
+
+
+// DS Pseudo instructions
+
+class DS_1A1D_NORET<string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+  (outs),
+  (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
+  "$addr, $data0$offset$gds">,
+  AtomicNoRet<opName, 0> {
+
+  let has_data1 = 0;
+  let has_vdst = 0;
+}
+
+class DS_1A_Off8_NORET<string opName> : DS_Pseudo<opName,
+  (outs),
+  (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
+  "$addr $offset0$offset1$gds"> {
+
+  let has_data0 = 0;
+  let has_data1 = 0;
+  let has_vdst  = 0;
+  let has_offset = 0;
+  let AsmMatchConverter = "cvtDSOffset01";
+}
+
+class DS_1A2D_NORET<string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+  (outs),
+  (ins VGPR_32:$addr, rc:$data0, rc:$data1, offset:$offset, gds:$gds),
+  "$addr, $data0, $data1"#"$offset"#"$gds">,
+  AtomicNoRet<opName, 0> {
+
+  let has_vdst = 0;
+}
+
+class DS_1A2D_Off8_NORET <string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+  (outs),
+  (ins VGPR_32:$addr, rc:$data0, rc:$data1,
+       offset0:$offset0, offset1:$offset1, gds:$gds),
+  "$addr, $data0, $data1$offset0$offset1$gds"> {
+
+  let has_vdst = 0;
+  let has_offset = 0;
+  let AsmMatchConverter = "cvtDSOffset01";
+}
+
+class DS_1A1D_RET <string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+  (outs rc:$vdst),
+  (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
+  "$vdst, $addr, $data0$offset$gds"> {
+
+  let hasPostISelHook = 1;
+  let has_data1 = 0;
+}
+
+class DS_1A2D_RET<string opName,
+                  RegisterClass rc = VGPR_32,
+                  RegisterClass src = rc>
+: DS_Pseudo<opName,
+  (outs rc:$vdst),
+  (ins VGPR_32:$addr, src:$data0, src:$data1, offset:$offset, gds:$gds),
+  "$vdst, $addr, $data0, $data1$offset$gds"> {
+
+  let hasPostISelHook = 1;
+}
+
+class DS_1A_RET<string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+  (outs rc:$vdst),
+  (ins VGPR_32:$addr, offset:$offset, gds:$gds),
+  "$vdst, $addr$offset$gds"> {
+
+  let has_data0 = 0;
+  let has_data1 = 0;
+}
+
+class DS_1A_Off8_RET <string opName, RegisterClass rc = VGPR_32>
+: DS_Pseudo<opName,
+  (outs rc:$vdst),
+  (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1, gds:$gds),
+  "$vdst, $addr$offset0$offset1$gds"> {
+
+  let has_offset = 0;
+  let has_data0 = 0;
+  let has_data1 = 0;
+  let AsmMatchConverter = "cvtDSOffset01";
+}
+
+class DS_1A_RET_GDS <string opName> : DS_Pseudo<opName,
+  (outs VGPR_32:$vdst),
+  (ins VGPR_32:$addr, offset:$offset),
+  "$vdst, $addr$offset gds"> {
+
+  let has_data0 = 0;
+  let has_data1 = 0;
+  let has_gds = 0;
+  let gdsValue = 1;
+}
+
+class DS_0A_RET <string opName> : DS_Pseudo<opName,
+  (outs VGPR_32:$vdst),
+  (ins offset:$offset, gds:$gds),
+  "$vdst$offset$gds"> {
+
+  let mayLoad = 1;
+  let mayStore = 1;
+
+  let has_addr = 0;
+  let has_data0 = 0;
+  let has_data1 = 0;
+}
+
+class DS_1A <string opName> : DS_Pseudo<opName,
+  (outs),
+  (ins VGPR_32:$addr, offset:$offset, gds:$gds),
+  "$addr$offset$gds"> {
+
+  let mayLoad = 1;
+  let mayStore = 1;
+
+  let has_vdst = 0;
+  let has_data0 = 0;
+  let has_data1 = 0;
+}
+
+class DS_1A_GDS <string opName> : DS_Pseudo<opName,
+  (outs),
+  (ins VGPR_32:$addr),
+  "$addr gds"> {
+
+  let has_vdst    = 0;
+  let has_data0   = 0;
+  let has_data1   = 0;
+  let has_offset  = 0;
+  let has_offset0 = 0;
+  let has_offset1 = 0;
+
+  let has_gds     = 0;
+  let gdsValue    = 1;
+}
+
+class DS_1A1D_PERMUTE <string opName, SDPatternOperator node = null_frag>
+: DS_Pseudo<opName,
+  (outs VGPR_32:$vdst),
+  (ins VGPR_32:$addr, VGPR_32:$data0, offset:$offset),
+  "$vdst, $addr, $data0$offset",
+  [(set i32:$vdst,
+   (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))] > {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let isConvergent = 1;
+
+  let has_data1 = 0;
+  let has_gds = 0;
+}
+
+def DS_ADD_U32        : DS_1A1D_NORET<"ds_add_u32">;
+def DS_SUB_U32        : DS_1A1D_NORET<"ds_sub_u32">;
+def DS_RSUB_U32       : DS_1A1D_NORET<"ds_rsub_u32">;
+def DS_INC_U32        : DS_1A1D_NORET<"ds_inc_u32">;
+def DS_DEC_U32        : DS_1A1D_NORET<"ds_dec_u32">;
+def DS_MIN_I32        : DS_1A1D_NORET<"ds_min_i32">;
+def DS_MAX_I32        : DS_1A1D_NORET<"ds_max_i32">;
+def DS_MIN_U32        : DS_1A1D_NORET<"ds_min_u32">;
+def DS_MAX_U32        : DS_1A1D_NORET<"ds_max_u32">;
+def DS_AND_B32        : DS_1A1D_NORET<"ds_and_b32">;
+def DS_OR_B32         : DS_1A1D_NORET<"ds_or_b32">;
+def DS_XOR_B32        : DS_1A1D_NORET<"ds_xor_b32">;
+def DS_ADD_F32        : DS_1A1D_NORET<"ds_add_f32">;
+def DS_MIN_F32        : DS_1A1D_NORET<"ds_min_f32">;
+def DS_MAX_F32        : DS_1A1D_NORET<"ds_max_f32">;
+
+let mayLoad = 0 in {
+def DS_WRITE_B8       : DS_1A1D_NORET<"ds_write_b8">;
+def DS_WRITE_B16      : DS_1A1D_NORET<"ds_write_b16">;
+def DS_WRITE_B32      : DS_1A1D_NORET<"ds_write_b32">;
+def DS_WRITE2_B32     : DS_1A2D_Off8_NORET<"ds_write2_b32">;
+def DS_WRITE2ST64_B32 : DS_1A2D_Off8_NORET<"ds_write2st64_b32">;
+}
+
+def DS_MSKOR_B32      : DS_1A2D_NORET<"ds_mskor_b32">;
+def DS_CMPST_B32      : DS_1A2D_NORET<"ds_cmpst_b32">;
+def DS_CMPST_F32      : DS_1A2D_NORET<"ds_cmpst_f32">;
+
+def DS_ADD_U64        : DS_1A1D_NORET<"ds_add_u64", VReg_64>;
+def DS_SUB_U64        : DS_1A1D_NORET<"ds_sub_u64", VReg_64>;
+def DS_RSUB_U64       : DS_1A1D_NORET<"ds_rsub_u64", VReg_64>;
+def DS_INC_U64        : DS_1A1D_NORET<"ds_inc_u64", VReg_64>;
+def DS_DEC_U64        : DS_1A1D_NORET<"ds_dec_u64", VReg_64>;
+def DS_MIN_I64        : DS_1A1D_NORET<"ds_min_i64", VReg_64>;
+def DS_MAX_I64        : DS_1A1D_NORET<"ds_max_i64", VReg_64>;
+def DS_MIN_U64        : DS_1A1D_NORET<"ds_min_u64", VReg_64>;
+def DS_MAX_U64        : DS_1A1D_NORET<"ds_max_u64", VReg_64>;
+def DS_AND_B64        : DS_1A1D_NORET<"ds_and_b64", VReg_64>;
+def DS_OR_B64         : DS_1A1D_NORET<"ds_or_b64", VReg_64>;
+def DS_XOR_B64        : DS_1A1D_NORET<"ds_xor_b64", VReg_64>;
+def DS_MSKOR_B64      : DS_1A2D_NORET<"ds_mskor_b64", VReg_64>;
+let mayLoad = 0 in {
+def DS_WRITE_B64      : DS_1A1D_NORET<"ds_write_b64", VReg_64>;
+def DS_WRITE2_B64     : DS_1A2D_Off8_NORET<"ds_write2_b64", VReg_64>;
+def DS_WRITE2ST64_B64 : DS_1A2D_Off8_NORET<"ds_write2st64_b64", VReg_64>;
+}
+def DS_CMPST_B64      : DS_1A2D_NORET<"ds_cmpst_b64", VReg_64>;
+def DS_CMPST_F64      : DS_1A2D_NORET<"ds_cmpst_f64", VReg_64>;
+def DS_MIN_F64        : DS_1A1D_NORET<"ds_min_f64", VReg_64>;
+def DS_MAX_F64        : DS_1A1D_NORET<"ds_max_f64", VReg_64>;
+
+def DS_ADD_RTN_U32    : DS_1A1D_RET<"ds_add_rtn_u32">,
+                        AtomicNoRet<"ds_add_u32", 1>;
+def DS_ADD_RTN_F32    : DS_1A1D_RET<"ds_add_rtn_f32">,
+                        AtomicNoRet<"ds_add_f32", 1>;
+def DS_SUB_RTN_U32    : DS_1A1D_RET<"ds_sub_rtn_u32">,
+                        AtomicNoRet<"ds_sub_u32", 1>;
+def DS_RSUB_RTN_U32   : DS_1A1D_RET<"ds_rsub_rtn_u32">,
+                        AtomicNoRet<"ds_rsub_u32", 1>;
+def DS_INC_RTN_U32    : DS_1A1D_RET<"ds_inc_rtn_u32">,
+                        AtomicNoRet<"ds_inc_u32", 1>;
+def DS_DEC_RTN_U32    : DS_1A1D_RET<"ds_dec_rtn_u32">,
+                        AtomicNoRet<"ds_dec_u32", 1>;
+def DS_MIN_RTN_I32    : DS_1A1D_RET<"ds_min_rtn_i32">,
+                        AtomicNoRet<"ds_min_i32", 1>;
+def DS_MAX_RTN_I32    : DS_1A1D_RET<"ds_max_rtn_i32">,
+                        AtomicNoRet<"ds_max_i32", 1>;
+def DS_MIN_RTN_U32    : DS_1A1D_RET<"ds_min_rtn_u32">,
+                        AtomicNoRet<"ds_min_u32", 1>;
+def DS_MAX_RTN_U32    : DS_1A1D_RET<"ds_max_rtn_u32">,
+                        AtomicNoRet<"ds_max_u32", 1>;
+def DS_AND_RTN_B32    : DS_1A1D_RET<"ds_and_rtn_b32">,
+                        AtomicNoRet<"ds_and_b32", 1>;
+def DS_OR_RTN_B32     : DS_1A1D_RET<"ds_or_rtn_b32">,
+                        AtomicNoRet<"ds_or_b32", 1>;
+def DS_XOR_RTN_B32    : DS_1A1D_RET<"ds_xor_rtn_b32">,
+                        AtomicNoRet<"ds_xor_b32", 1>;
+def DS_MSKOR_RTN_B32  : DS_1A2D_RET<"ds_mskor_rtn_b32">,
+                        AtomicNoRet<"ds_mskor_b32", 1>;
+def DS_CMPST_RTN_B32  : DS_1A2D_RET <"ds_cmpst_rtn_b32">,
+                        AtomicNoRet<"ds_cmpst_b32", 1>;
+def DS_CMPST_RTN_F32  : DS_1A2D_RET <"ds_cmpst_rtn_f32">,
+                        AtomicNoRet<"ds_cmpst_f32", 1>;
+def DS_MIN_RTN_F32    : DS_1A1D_RET <"ds_min_rtn_f32">,
+                        AtomicNoRet<"ds_min_f32", 1>;
+def DS_MAX_RTN_F32    : DS_1A1D_RET <"ds_max_rtn_f32">,
+                        AtomicNoRet<"ds_max_f32", 1>;
+
+def DS_WRXCHG_RTN_B32      : DS_1A1D_RET<"ds_wrxchg_rtn_b32">,
+                             AtomicNoRet<"", 1>;
+def DS_WRXCHG2_RTN_B32     : DS_1A2D_RET<"ds_wrxchg2_rtn_b32", VReg_64, VGPR_32>,
+                             AtomicNoRet<"", 1>;
+def DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b32", VReg_64, VGPR_32>,
+                             AtomicNoRet<"", 1>;
+
+def DS_ADD_RTN_U64    : DS_1A1D_RET<"ds_add_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_add_u64", 1>;
+def DS_SUB_RTN_U64    : DS_1A1D_RET<"ds_sub_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_sub_u64", 1>;
+def DS_RSUB_RTN_U64   : DS_1A1D_RET<"ds_rsub_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_rsub_u64", 1>;
+def DS_INC_RTN_U64    : DS_1A1D_RET<"ds_inc_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_inc_u64", 1>;
+def DS_DEC_RTN_U64    : DS_1A1D_RET<"ds_dec_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_dec_u64", 1>;
+def DS_MIN_RTN_I64    : DS_1A1D_RET<"ds_min_rtn_i64", VReg_64>,
+                        AtomicNoRet<"ds_min_i64", 1>;
+def DS_MAX_RTN_I64    : DS_1A1D_RET<"ds_max_rtn_i64", VReg_64>,
+                        AtomicNoRet<"ds_max_i64", 1>;
+def DS_MIN_RTN_U64    : DS_1A1D_RET<"ds_min_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_min_u64", 1>;
+def DS_MAX_RTN_U64    : DS_1A1D_RET<"ds_max_rtn_u64", VReg_64>,
+                        AtomicNoRet<"ds_max_u64", 1>;
+def DS_AND_RTN_B64    : DS_1A1D_RET<"ds_and_rtn_b64", VReg_64>,
+                        AtomicNoRet<"ds_and_b64", 1>;
+def DS_OR_RTN_B64     : DS_1A1D_RET<"ds_or_rtn_b64", VReg_64>,
+                        AtomicNoRet<"ds_or_b64", 1>;
+def DS_XOR_RTN_B64    : DS_1A1D_RET<"ds_xor_rtn_b64", VReg_64>,
+                        AtomicNoRet<"ds_xor_b64", 1>;
+def DS_MSKOR_RTN_B64  : DS_1A2D_RET<"ds_mskor_rtn_b64", VReg_64>,
+                        AtomicNoRet<"ds_mskor_b64", 1>;
+def DS_CMPST_RTN_B64  : DS_1A2D_RET<"ds_cmpst_rtn_b64", VReg_64>,
+                        AtomicNoRet<"ds_cmpst_b64", 1>;
+def DS_CMPST_RTN_F64  : DS_1A2D_RET<"ds_cmpst_rtn_f64", VReg_64>,
+                        AtomicNoRet<"ds_cmpst_f64", 1>;
+def DS_MIN_RTN_F64    : DS_1A1D_RET<"ds_min_rtn_f64", VReg_64>,
+                        AtomicNoRet<"ds_min_f64", 1>;
+def DS_MAX_RTN_F64    : DS_1A1D_RET<"ds_max_rtn_f64", VReg_64>,
+                        AtomicNoRet<"ds_max_f64", 1>;
+
+def DS_WRXCHG_RTN_B64      : DS_1A1D_RET<"ds_wrxchg_rtn_b64", VReg_64>,
+                             AtomicNoRet<"ds_wrxchg_b64", 1>;
+def DS_WRXCHG2_RTN_B64     : DS_1A2D_RET<"ds_wrxchg2_rtn_b64", VReg_128, VReg_64>,
+                             AtomicNoRet<"ds_wrxchg2_b64", 1>;
+def DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET<"ds_wrxchg2st64_rtn_b64", VReg_128, VReg_64>,
+                             AtomicNoRet<"ds_wrxchg2st64_b64", 1>;
+
+def DS_GWS_INIT       : DS_1A_GDS<"ds_gws_init">;
+def DS_GWS_SEMA_V     : DS_1A_GDS<"ds_gws_sema_v">;
+def DS_GWS_SEMA_BR    : DS_1A_GDS<"ds_gws_sema_br">;
+def DS_GWS_SEMA_P     : DS_1A_GDS<"ds_gws_sema_p">;
+def DS_GWS_BARRIER    : DS_1A_GDS<"ds_gws_barrier">;
+
+def DS_ADD_SRC2_U32   : DS_1A<"ds_add_src2_u32">;
+def DS_SUB_SRC2_U32   : DS_1A<"ds_sub_src2_u32">;
+def DS_RSUB_SRC2_U32  : DS_1A<"ds_rsub_src2_u32">;
+def DS_INC_SRC2_U32   : DS_1A<"ds_inc_src2_u32">;
+def DS_DEC_SRC2_U32   : DS_1A<"ds_dec_src2_u32">;
+def DS_MIN_SRC2_I32   : DS_1A<"ds_min_src2_i32">;
+def DS_MAX_SRC2_I32   : DS_1A<"ds_max_src2_i32">;
+def DS_MIN_SRC2_U32   : DS_1A<"ds_min_src2_u32">;
+def DS_MAX_SRC2_U32   : DS_1A<"ds_max_src2_u32">;
+def DS_AND_SRC2_B32   : DS_1A<"ds_and_src_b32">;
+def DS_OR_SRC2_B32    : DS_1A<"ds_or_src2_b32">;
+def DS_XOR_SRC2_B32   : DS_1A<"ds_xor_src2_b32">;
+def DS_MIN_SRC2_F32   : DS_1A<"ds_min_src2_f32">;
+def DS_MAX_SRC2_F32   : DS_1A<"ds_max_src2_f32">;
+
+def DS_ADD_SRC2_U64   : DS_1A<"ds_add_src2_u64">;
+def DS_SUB_SRC2_U64   : DS_1A<"ds_sub_src2_u64">;
+def DS_RSUB_SRC2_U64  : DS_1A<"ds_rsub_src2_u64">;
+def DS_INC_SRC2_U64   : DS_1A<"ds_inc_src2_u64">;
+def DS_DEC_SRC2_U64   : DS_1A<"ds_dec_src2_u64">;
+def DS_MIN_SRC2_I64   : DS_1A<"ds_min_src2_i64">;
+def DS_MAX_SRC2_I64   : DS_1A<"ds_max_src2_i64">;
+def DS_MIN_SRC2_U64   : DS_1A<"ds_min_src2_u64">;
+def DS_MAX_SRC2_U64   : DS_1A<"ds_max_src2_u64">;
+def DS_AND_SRC2_B64   : DS_1A<"ds_and_src2_b64">;
+def DS_OR_SRC2_B64    : DS_1A<"ds_or_src2_b64">;
+def DS_XOR_SRC2_B64   : DS_1A<"ds_xor_src2_b64">;
+def DS_MIN_SRC2_F64   : DS_1A<"ds_min_src2_f64">;
+def DS_MAX_SRC2_F64   : DS_1A<"ds_max_src2_f64">;
+
+def DS_WRITE_SRC2_B32 : DS_1A_Off8_NORET<"ds_write_src2_b32">;
+def DS_WRITE_SRC2_B64 : DS_1A_Off8_NORET<"ds_write_src2_b64">;
+
+let Uses = [EXEC], mayLoad = 0, mayStore = 0, isConvergent = 1 in {
+def DS_SWIZZLE_B32 : DS_1A_RET <"ds_swizzle_b32">;
+}
+
+let mayStore = 0 in {
+def DS_READ_I8       : DS_1A_RET<"ds_read_i8">;
+def DS_READ_U8       : DS_1A_RET<"ds_read_u8">;
+def DS_READ_I16      : DS_1A_RET<"ds_read_i16">;
+def DS_READ_U16      : DS_1A_RET<"ds_read_u16">;
+def DS_READ_B32      : DS_1A_RET<"ds_read_b32">;
+def DS_READ_B64      : DS_1A_RET<"ds_read_b64", VReg_64>;
+
+def DS_READ2_B32     : DS_1A_Off8_RET<"ds_read2_b32", VReg_64>;
+def DS_READ2ST64_B32 : DS_1A_Off8_RET<"ds_read2st64_b32", VReg_64>;
+
+def DS_READ2_B64     : DS_1A_Off8_RET<"ds_read2_b64", VReg_128>;
+def DS_READ2ST64_B64 : DS_1A_Off8_RET<"ds_read2st64_b64", VReg_128>;
+}
+
+let SubtargetPredicate = isSICI in {
+def DS_CONSUME       : DS_0A_RET<"ds_consume">;
+def DS_APPEND        : DS_0A_RET<"ds_append">;
+def DS_ORDERED_COUNT : DS_1A_RET_GDS<"ds_ordered_count">;
+}
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for CI and newer.
+//===----------------------------------------------------------------------===//
+// Remaining instructions:
+// DS_NOP
+// DS_GWS_SEMA_RELEASE_ALL
+// DS_WRAP_RTN_B32
+// DS_CNDXCHG32_RTN_B64
+// DS_WRITE_B96
+// DS_WRITE_B128
+// DS_CONDXCHG32_RTN_B128
+// DS_READ_B96
+// DS_READ_B128
+
+let SubtargetPredicate = isCIVI in {
+
+def DS_WRAP_RTN_F32 : DS_1A1D_RET <"ds_wrap_rtn_f32">,
+                      AtomicNoRet<"ds_wrap_f32", 1>;
+
+} // let SubtargetPredicate = isCIVI
+
+//===----------------------------------------------------------------------===//
+// Instruction definitions for VI and newer.
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isVI in {
+
+let Uses = [EXEC] in {
+def DS_PERMUTE_B32  : DS_1A1D_PERMUTE <"ds_permute_b32",
+                                       int_amdgcn_ds_permute>;
+def DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <"ds_bpermute_b32",
+                                       int_amdgcn_ds_bpermute>;
+}
+
+} // let SubtargetPredicate = isVI
+
+//===----------------------------------------------------------------------===//
+// DS Patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isGCN] in {
+
+def : Pat <
+  (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
+  (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
+>;
+
+class DSReadPat <DS_Pseudo inst, ValueType vt, PatFrag frag> : Pat <
+  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
+  (inst $ptr, (as_i16imm $offset), (i1 0))
+>;
+
+def : DSReadPat <DS_READ_I8,  i32, si_sextload_local_i8>;
+def : DSReadPat <DS_READ_U8,  i32, si_az_extload_local_i8>;
+def : DSReadPat <DS_READ_I8,  i16, si_sextload_local_i8>;
+def : DSReadPat <DS_READ_U8,  i16, si_az_extload_local_i8>;
+def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
+def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
+def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
+def : DSReadPat <DS_READ_U16, i16, si_load_local>;
+def : DSReadPat <DS_READ_B32, i32, si_load_local>;
+
+let AddedComplexity = 100 in {
+
+def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
+
+} // End AddedComplexity = 100
+
+def : Pat <
+  (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+                                                    i8:$offset1))),
+  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
+>;
+
+class DSWritePat <DS_Pseudo inst, ValueType vt, PatFrag frag> : Pat <
+  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
+  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+>;
+
+def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
+def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
+def : DSWritePat <DS_WRITE_B8, i16, si_truncstore_local_i8>;
+def : DSWritePat <DS_WRITE_B16, i16, si_store_local>;
+def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
+
+let AddedComplexity = 100 in {
+
+def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
+} // End AddedComplexity = 100
+
+def : Pat <
+  (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
+                                                               i8:$offset1)),
+  (DS_WRITE2_B32 $ptr, (i32 (EXTRACT_SUBREG $value, sub0)),
+                       (i32 (EXTRACT_SUBREG $value, sub1)), $offset0, $offset1,
+                       (i1 0))
+>;
+
+class DSAtomicRetPat<DS_Pseudo inst, ValueType vt, PatFrag frag> : Pat <
+  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
+  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
+>;
+
+class DSAtomicCmpXChg<DS_Pseudo inst, ValueType vt, PatFrag frag> : Pat <
+  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
+  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
+>;
+
+
+// 32-bit atomics.
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_INC_RTN_U32, i32, si_atomic_inc_local>;
+def : DSAtomicRetPat<DS_DEC_RTN_U32, i32, si_atomic_dec_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
+
+// 64-bit atomics.
+def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
+def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
+def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
+def : DSAtomicRetPat<DS_INC_RTN_U64, i64, si_atomic_inc_local>;
+def : DSAtomicRetPat<DS_DEC_RTN_U64, i64, si_atomic_dec_local>;
+def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
+def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
+def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
+def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
+def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
+
+def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
+
+} // let Predicates = [isGCN]
+
+//===----------------------------------------------------------------------===//
+// Real instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SIInstructions.td
+//===----------------------------------------------------------------------===//
+
+class DS_Real_si <bits<8> op, DS_Pseudo ds> :
+  DS_Real <ds>,
+  SIMCInstr <ds.Mnemonic, SIEncodingFamily.SI> {
+  let AssemblerPredicates=[isSICI];
+  let DecoderNamespace="SICI";
+
+  // encoding
+  let Inst{7-0}   = !if(ds.has_offset0, offset0, 0);
+  let Inst{15-8}  = !if(ds.has_offset1, offset1, 0);
+  let Inst{17}    = !if(ds.has_gds, gds, ds.gdsValue);
+  let Inst{25-18} = op;
+  let Inst{31-26} = 0x36; // ds prefix
+  let Inst{39-32} = !if(ds.has_addr, addr, 0);
+  let Inst{47-40} = !if(ds.has_data0, data0, 0);
+  let Inst{55-48} = !if(ds.has_data1, data1, 0);
+  let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
+}
+
+def DS_ADD_U32_si         : DS_Real_si<0x0,  DS_ADD_U32>;
+def DS_SUB_U32_si         : DS_Real_si<0x1,  DS_SUB_U32>;
+def DS_RSUB_U32_si        : DS_Real_si<0x2,  DS_RSUB_U32>;
+def DS_INC_U32_si         : DS_Real_si<0x3,  DS_INC_U32>;
+def DS_DEC_U32_si         : DS_Real_si<0x4,  DS_DEC_U32>;
+def DS_MIN_I32_si         : DS_Real_si<0x5,  DS_MIN_I32>;
+def DS_MAX_I32_si         : DS_Real_si<0x6,  DS_MAX_I32>;
+def DS_MIN_U32_si         : DS_Real_si<0x7,  DS_MIN_U32>;
+def DS_MAX_U32_si         : DS_Real_si<0x8,  DS_MAX_U32>;
+def DS_AND_B32_si         : DS_Real_si<0x9,  DS_AND_B32>;
+def DS_OR_B32_si          : DS_Real_si<0xa,  DS_OR_B32>;
+def DS_XOR_B32_si         : DS_Real_si<0xb,  DS_XOR_B32>;
+def DS_MSKOR_B32_si       : DS_Real_si<0xc,  DS_MSKOR_B32>;
+def DS_WRITE_B32_si       : DS_Real_si<0xd,  DS_WRITE_B32>;
+def DS_WRITE2_B32_si      : DS_Real_si<0xe,  DS_WRITE2_B32>;
+def DS_WRITE2ST64_B32_si  : DS_Real_si<0xf,  DS_WRITE2ST64_B32>;
+def DS_CMPST_B32_si       : DS_Real_si<0x10, DS_CMPST_B32>;
+def DS_CMPST_F32_si       : DS_Real_si<0x11, DS_CMPST_F32>;
+def DS_MIN_F32_si         : DS_Real_si<0x12, DS_MIN_F32>;
+def DS_MAX_F32_si         : DS_Real_si<0x13, DS_MAX_F32>;
+def DS_GWS_INIT_si        : DS_Real_si<0x19, DS_GWS_INIT>;
+def DS_GWS_SEMA_V_si      : DS_Real_si<0x1a, DS_GWS_SEMA_V>;
+def DS_GWS_SEMA_BR_si     : DS_Real_si<0x1b, DS_GWS_SEMA_BR>;
+def DS_GWS_SEMA_P_si      : DS_Real_si<0x1c, DS_GWS_SEMA_P>;
+def DS_GWS_BARRIER_si     : DS_Real_si<0x1d, DS_GWS_BARRIER>;
+def DS_WRITE_B8_si        : DS_Real_si<0x1e, DS_WRITE_B8>;
+def DS_WRITE_B16_si       : DS_Real_si<0x1f, DS_WRITE_B16>;
+def DS_ADD_RTN_U32_si     : DS_Real_si<0x20, DS_ADD_RTN_U32>;
+def DS_SUB_RTN_U32_si     : DS_Real_si<0x21, DS_SUB_RTN_U32>;
+def DS_RSUB_RTN_U32_si    : DS_Real_si<0x22, DS_RSUB_RTN_U32>;
+def DS_INC_RTN_U32_si     : DS_Real_si<0x23, DS_INC_RTN_U32>;
+def DS_DEC_RTN_U32_si     : DS_Real_si<0x24, DS_DEC_RTN_U32>;
+def DS_MIN_RTN_I32_si     : DS_Real_si<0x25, DS_MIN_RTN_I32>;
+def DS_MAX_RTN_I32_si     : DS_Real_si<0x26, DS_MAX_RTN_I32>;
+def DS_MIN_RTN_U32_si     : DS_Real_si<0x27, DS_MIN_RTN_U32>;
+def DS_MAX_RTN_U32_si     : DS_Real_si<0x28, DS_MAX_RTN_U32>;
+def DS_AND_RTN_B32_si     : DS_Real_si<0x29, DS_AND_RTN_B32>;
+def DS_OR_RTN_B32_si      : DS_Real_si<0x2a, DS_OR_RTN_B32>;
+def DS_XOR_RTN_B32_si     : DS_Real_si<0x2b, DS_XOR_RTN_B32>;
+def DS_MSKOR_RTN_B32_si   : DS_Real_si<0x2c, DS_MSKOR_RTN_B32>;
+def DS_WRXCHG_RTN_B32_si  : DS_Real_si<0x2d, DS_WRXCHG_RTN_B32>;
+def DS_WRXCHG2_RTN_B32_si : DS_Real_si<0x2e, DS_WRXCHG2_RTN_B32>;
+def DS_WRXCHG2ST64_RTN_B32_si : DS_Real_si<0x2f, DS_WRXCHG2ST64_RTN_B32>;
+def DS_CMPST_RTN_B32_si   : DS_Real_si<0x30, DS_CMPST_RTN_B32>;
+def DS_CMPST_RTN_F32_si   : DS_Real_si<0x31, DS_CMPST_RTN_F32>;
+def DS_MIN_RTN_F32_si     : DS_Real_si<0x32, DS_MIN_RTN_F32>;
+def DS_MAX_RTN_F32_si     : DS_Real_si<0x33, DS_MAX_RTN_F32>;
+
+// FIXME: this instruction is actually CI/VI
+def DS_WRAP_RTN_F32_si    : DS_Real_si<0x34, DS_WRAP_RTN_F32>;
+
+def DS_SWIZZLE_B32_si     : DS_Real_si<0x35, DS_SWIZZLE_B32>;
+def DS_READ_B32_si        : DS_Real_si<0x36, DS_READ_B32>;
+def DS_READ2_B32_si       : DS_Real_si<0x37, DS_READ2_B32>;
+def DS_READ2ST64_B32_si   : DS_Real_si<0x38, DS_READ2ST64_B32>;
+def DS_READ_I8_si         : DS_Real_si<0x39, DS_READ_I8>;
+def DS_READ_U8_si         : DS_Real_si<0x3a, DS_READ_U8>;
+def DS_READ_I16_si        : DS_Real_si<0x3b, DS_READ_I16>;
+def DS_READ_U16_si        : DS_Real_si<0x3c, DS_READ_U16>;
+def DS_CONSUME_si         : DS_Real_si<0x3d, DS_CONSUME>;
+def DS_APPEND_si          : DS_Real_si<0x3e, DS_APPEND>;
+def DS_ORDERED_COUNT_si   : DS_Real_si<0x3f, DS_ORDERED_COUNT>;
+def DS_ADD_U64_si         : DS_Real_si<0x40, DS_ADD_U64>;
+def DS_SUB_U64_si         : DS_Real_si<0x41, DS_SUB_U64>;
+def DS_RSUB_U64_si        : DS_Real_si<0x42, DS_RSUB_U64>;
+def DS_INC_U64_si         : DS_Real_si<0x43, DS_INC_U64>;
+def DS_DEC_U64_si         : DS_Real_si<0x44, DS_DEC_U64>;
+def DS_MIN_I64_si         : DS_Real_si<0x45, DS_MIN_I64>;
+def DS_MAX_I64_si         : DS_Real_si<0x46, DS_MAX_I64>;
+def DS_MIN_U64_si         : DS_Real_si<0x47, DS_MIN_U64>;
+def DS_MAX_U64_si         : DS_Real_si<0x48, DS_MAX_U64>;
+def DS_AND_B64_si         : DS_Real_si<0x49, DS_AND_B64>;
+def DS_OR_B64_si          : DS_Real_si<0x4a, DS_OR_B64>;
+def DS_XOR_B64_si         : DS_Real_si<0x4b, DS_XOR_B64>;
+def DS_MSKOR_B64_si       : DS_Real_si<0x4c, DS_MSKOR_B64>;
+def DS_WRITE_B64_si       : DS_Real_si<0x4d, DS_WRITE_B64>;
+def DS_WRITE2_B64_si      : DS_Real_si<0x4E, DS_WRITE2_B64>;
+def DS_WRITE2ST64_B64_si  : DS_Real_si<0x4f, DS_WRITE2ST64_B64>;
+def DS_CMPST_B64_si       : DS_Real_si<0x50, DS_CMPST_B64>;
+def DS_CMPST_F64_si       : DS_Real_si<0x51, DS_CMPST_F64>;
+def DS_MIN_F64_si         : DS_Real_si<0x52, DS_MIN_F64>;
+def DS_MAX_F64_si         : DS_Real_si<0x53, DS_MAX_F64>;
+
+def DS_ADD_RTN_U64_si     : DS_Real_si<0x60, DS_ADD_RTN_U64>;
+def DS_SUB_RTN_U64_si     : DS_Real_si<0x61, DS_SUB_RTN_U64>;
+def DS_RSUB_RTN_U64_si    : DS_Real_si<0x62, DS_RSUB_RTN_U64>;
+def DS_INC_RTN_U64_si     : DS_Real_si<0x63, DS_INC_RTN_U64>;
+def DS_DEC_RTN_U64_si     : DS_Real_si<0x64, DS_DEC_RTN_U64>;
+def DS_MIN_RTN_I64_si     : DS_Real_si<0x65, DS_MIN_RTN_I64>;
+def DS_MAX_RTN_I64_si     : DS_Real_si<0x66, DS_MAX_RTN_I64>;
+def DS_MIN_RTN_U64_si     : DS_Real_si<0x67, DS_MIN_RTN_U64>;
+def DS_MAX_RTN_U64_si     : DS_Real_si<0x68, DS_MAX_RTN_U64>;
+def DS_AND_RTN_B64_si     : DS_Real_si<0x69, DS_AND_RTN_B64>;
+def DS_OR_RTN_B64_si      : DS_Real_si<0x6a, DS_OR_RTN_B64>;
+def DS_XOR_RTN_B64_si     : DS_Real_si<0x6b, DS_XOR_RTN_B64>;
+def DS_MSKOR_RTN_B64_si   : DS_Real_si<0x6c, DS_MSKOR_RTN_B64>;
+def DS_WRXCHG_RTN_B64_si  : DS_Real_si<0x6d, DS_WRXCHG_RTN_B64>;
+def DS_WRXCHG2_RTN_B64_si : DS_Real_si<0x6e, DS_WRXCHG2_RTN_B64>;
+def DS_WRXCHG2ST64_RTN_B64_si : DS_Real_si<0x6f, DS_WRXCHG2ST64_RTN_B64>;
+def DS_CMPST_RTN_B64_si   : DS_Real_si<0x70, DS_CMPST_RTN_B64>;
+def DS_CMPST_RTN_F64_si   : DS_Real_si<0x71, DS_CMPST_RTN_F64>;
+def DS_MIN_RTN_F64_si     : DS_Real_si<0x72, DS_MIN_RTN_F64>;
+def DS_MAX_RTN_F64_si     : DS_Real_si<0x73, DS_MAX_RTN_F64>;
+
+def DS_READ_B64_si        : DS_Real_si<0x76, DS_READ_B64>;
+def DS_READ2_B64_si       : DS_Real_si<0x77, DS_READ2_B64>;
+def DS_READ2ST64_B64_si   : DS_Real_si<0x78, DS_READ2ST64_B64>;
+
+def DS_ADD_SRC2_U32_si    : DS_Real_si<0x80, DS_ADD_SRC2_U32>;
+def DS_SUB_SRC2_U32_si    : DS_Real_si<0x81, DS_SUB_SRC2_U32>;
+def DS_RSUB_SRC2_U32_si   : DS_Real_si<0x82, DS_RSUB_SRC2_U32>;
+def DS_INC_SRC2_U32_si    : DS_Real_si<0x83, DS_INC_SRC2_U32>;
+def DS_DEC_SRC2_U32_si    : DS_Real_si<0x84, DS_DEC_SRC2_U32>;
+def DS_MIN_SRC2_I32_si    : DS_Real_si<0x85, DS_MIN_SRC2_I32>;
+def DS_MAX_SRC2_I32_si    : DS_Real_si<0x86, DS_MAX_SRC2_I32>;
+def DS_MIN_SRC2_U32_si    : DS_Real_si<0x87, DS_MIN_SRC2_U32>;
+def DS_MAX_SRC2_U32_si    : DS_Real_si<0x88, DS_MAX_SRC2_U32>;
+def DS_AND_SRC2_B32_si    : DS_Real_si<0x89, DS_AND_SRC2_B32>;
+def DS_OR_SRC2_B32_si     : DS_Real_si<0x8a, DS_OR_SRC2_B32>;
+def DS_XOR_SRC2_B32_si    : DS_Real_si<0x8b, DS_XOR_SRC2_B32>;
+def DS_WRITE_SRC2_B32_si  : DS_Real_si<0x8d, DS_WRITE_SRC2_B32>;
+
+def DS_MIN_SRC2_F32_si    : DS_Real_si<0x92, DS_MIN_SRC2_F32>;
+def DS_MAX_SRC2_F32_si    : DS_Real_si<0x93, DS_MAX_SRC2_F32>;
+
+def DS_ADD_SRC2_U64_si    : DS_Real_si<0xc0, DS_ADD_SRC2_U64>;
+def DS_SUB_SRC2_U64_si    : DS_Real_si<0xc1, DS_SUB_SRC2_U64>;
+def DS_RSUB_SRC2_U64_si   : DS_Real_si<0xc2, DS_RSUB_SRC2_U64>;
+def DS_INC_SRC2_U64_si    : DS_Real_si<0xc3, DS_INC_SRC2_U64>;
+def DS_DEC_SRC2_U64_si    : DS_Real_si<0xc4, DS_DEC_SRC2_U64>;
+def DS_MIN_SRC2_I64_si    : DS_Real_si<0xc5, DS_MIN_SRC2_I64>;
+def DS_MAX_SRC2_I64_si    : DS_Real_si<0xc6, DS_MAX_SRC2_I64>;
+def DS_MIN_SRC2_U64_si    : DS_Real_si<0xc7, DS_MIN_SRC2_U64>;
+def DS_MAX_SRC2_U64_si    : DS_Real_si<0xc8, DS_MAX_SRC2_U64>;
+def DS_AND_SRC2_B64_si    : DS_Real_si<0xc9, DS_AND_SRC2_B64>;
+def DS_OR_SRC2_B64_si     : DS_Real_si<0xca, DS_OR_SRC2_B64>;
+def DS_XOR_SRC2_B64_si    : DS_Real_si<0xcb, DS_XOR_SRC2_B64>;
+def DS_WRITE_SRC2_B64_si  : DS_Real_si<0xcd, DS_WRITE_SRC2_B64>;
+
+def DS_MIN_SRC2_F64_si    : DS_Real_si<0xd2, DS_MIN_SRC2_F64>;
+def DS_MAX_SRC2_F64_si    : DS_Real_si<0xd3, DS_MAX_SRC2_F64>;
+
+//===----------------------------------------------------------------------===//
+// VIInstructions.td
+//===----------------------------------------------------------------------===//
+
+class DS_Real_vi <bits<8> op, DS_Pseudo ds> :
+  DS_Real <ds>,
+  SIMCInstr <ds.Mnemonic, SIEncodingFamily.VI> {
+  let AssemblerPredicates = [isVI];
+  let DecoderNamespace="VI";
+
+  // encoding
+  let Inst{7-0}   = !if(ds.has_offset0, offset0, 0);
+  let Inst{15-8}  = !if(ds.has_offset1, offset1, 0);
+  let Inst{16}    = !if(ds.has_gds, gds, ds.gdsValue);
+  let Inst{24-17} = op;
+  let Inst{31-26} = 0x36; // ds prefix
+  let Inst{39-32} = !if(ds.has_addr, addr, 0);
+  let Inst{47-40} = !if(ds.has_data0, data0, 0);
+  let Inst{55-48} = !if(ds.has_data1, data1, 0);
+  let Inst{63-56} = !if(ds.has_vdst, vdst, 0);
+}
+
+def DS_ADD_U32_vi         : DS_Real_vi<0x0,  DS_ADD_U32>;
+def DS_SUB_U32_vi         : DS_Real_vi<0x1,  DS_SUB_U32>;
+def DS_RSUB_U32_vi        : DS_Real_vi<0x2,  DS_RSUB_U32>;
+def DS_INC_U32_vi         : DS_Real_vi<0x3,  DS_INC_U32>;
+def DS_DEC_U32_vi         : DS_Real_vi<0x4,  DS_DEC_U32>;
+def DS_MIN_I32_vi         : DS_Real_vi<0x5,  DS_MIN_I32>;
+def DS_MAX_I32_vi         : DS_Real_vi<0x6,  DS_MAX_I32>;
+def DS_MIN_U32_vi         : DS_Real_vi<0x7,  DS_MIN_U32>;
+def DS_MAX_U32_vi         : DS_Real_vi<0x8,  DS_MAX_U32>;
+def DS_AND_B32_vi         : DS_Real_vi<0x9,  DS_AND_B32>;
+def DS_OR_B32_vi          : DS_Real_vi<0xa,  DS_OR_B32>;
+def DS_XOR_B32_vi         : DS_Real_vi<0xb,  DS_XOR_B32>;
+def DS_MSKOR_B32_vi       : DS_Real_vi<0xc,  DS_MSKOR_B32>;
+def DS_WRITE_B32_vi       : DS_Real_vi<0xd,  DS_WRITE_B32>;
+def DS_WRITE2_B32_vi      : DS_Real_vi<0xe,  DS_WRITE2_B32>;
+def DS_WRITE2ST64_B32_vi  : DS_Real_vi<0xf,  DS_WRITE2ST64_B32>;
+def DS_CMPST_B32_vi       : DS_Real_vi<0x10, DS_CMPST_B32>;
+def DS_CMPST_F32_vi       : DS_Real_vi<0x11, DS_CMPST_F32>;
+def DS_MIN_F32_vi         : DS_Real_vi<0x12, DS_MIN_F32>;
+def DS_MAX_F32_vi         : DS_Real_vi<0x13, DS_MAX_F32>;
+def DS_ADD_F32_vi         : DS_Real_vi<0x15, DS_ADD_F32>;
+def DS_GWS_INIT_vi        : DS_Real_vi<0x19, DS_GWS_INIT>;
+def DS_GWS_SEMA_V_vi      : DS_Real_vi<0x1a, DS_GWS_SEMA_V>;
+def DS_GWS_SEMA_BR_vi     : DS_Real_vi<0x1b, DS_GWS_SEMA_BR>;
+def DS_GWS_SEMA_P_vi      : DS_Real_vi<0x1c, DS_GWS_SEMA_P>;
+def DS_GWS_BARRIER_vi     : DS_Real_vi<0x1d, DS_GWS_BARRIER>;
+def DS_WRITE_B8_vi        : DS_Real_vi<0x1e, DS_WRITE_B8>;
+def DS_WRITE_B16_vi       : DS_Real_vi<0x1f, DS_WRITE_B16>;
+def DS_ADD_RTN_U32_vi     : DS_Real_vi<0x20, DS_ADD_RTN_U32>;
+def DS_SUB_RTN_U32_vi     : DS_Real_vi<0x21, DS_SUB_RTN_U32>;
+def DS_RSUB_RTN_U32_vi    : DS_Real_vi<0x22, DS_RSUB_RTN_U32>;
+def DS_INC_RTN_U32_vi     : DS_Real_vi<0x23, DS_INC_RTN_U32>;
+def DS_DEC_RTN_U32_vi     : DS_Real_vi<0x24, DS_DEC_RTN_U32>;
+def DS_MIN_RTN_I32_vi     : DS_Real_vi<0x25, DS_MIN_RTN_I32>;
+def DS_MAX_RTN_I32_vi     : DS_Real_vi<0x26, DS_MAX_RTN_I32>;
+def DS_MIN_RTN_U32_vi     : DS_Real_vi<0x27, DS_MIN_RTN_U32>;
+def DS_MAX_RTN_U32_vi     : DS_Real_vi<0x28, DS_MAX_RTN_U32>;
+def DS_AND_RTN_B32_vi     : DS_Real_vi<0x29, DS_AND_RTN_B32>;
+def DS_OR_RTN_B32_vi      : DS_Real_vi<0x2a, DS_OR_RTN_B32>;
+def DS_XOR_RTN_B32_vi     : DS_Real_vi<0x2b, DS_XOR_RTN_B32>;
+def DS_MSKOR_RTN_B32_vi   : DS_Real_vi<0x2c, DS_MSKOR_RTN_B32>;
+def DS_WRXCHG_RTN_B32_vi  : DS_Real_vi<0x2d, DS_WRXCHG_RTN_B32>;
+def DS_WRXCHG2_RTN_B32_vi : DS_Real_vi<0x2e, DS_WRXCHG2_RTN_B32>;
+def DS_WRXCHG2ST64_RTN_B32_vi : DS_Real_vi<0x2f, DS_WRXCHG2ST64_RTN_B32>;
+def DS_CMPST_RTN_B32_vi   : DS_Real_vi<0x30, DS_CMPST_RTN_B32>;
+def DS_CMPST_RTN_F32_vi   : DS_Real_vi<0x31, DS_CMPST_RTN_F32>;
+def DS_MIN_RTN_F32_vi     : DS_Real_vi<0x32, DS_MIN_RTN_F32>;
+def DS_MAX_RTN_F32_vi     : DS_Real_vi<0x33, DS_MAX_RTN_F32>;
+def DS_WRAP_RTN_F32_vi    : DS_Real_vi<0x34, DS_WRAP_RTN_F32>;
+def DS_ADD_RTN_F32_vi     : DS_Real_vi<0x35, DS_ADD_RTN_F32>;
+def DS_READ_B32_vi        : DS_Real_vi<0x36, DS_READ_B32>;
+def DS_READ2_B32_vi       : DS_Real_vi<0x37, DS_READ2_B32>;
+def DS_READ2ST64_B32_vi   : DS_Real_vi<0x38, DS_READ2ST64_B32>;
+def DS_READ_I8_vi         : DS_Real_vi<0x39, DS_READ_I8>;
+def DS_READ_U8_vi         : DS_Real_vi<0x3a, DS_READ_U8>;
+def DS_READ_I16_vi        : DS_Real_vi<0x3b, DS_READ_I16>;
+def DS_READ_U16_vi        : DS_Real_vi<0x3c, DS_READ_U16>;
+def DS_SWIZZLE_B32_vi     : DS_Real_vi<0x3d, DS_SWIZZLE_B32>;
+def DS_PERMUTE_B32_vi     : DS_Real_vi<0x3e, DS_PERMUTE_B32>;
+def DS_BPERMUTE_B32_vi    : DS_Real_vi<0x3f, DS_BPERMUTE_B32>;
+
+def DS_ADD_U64_vi         : DS_Real_vi<0x40, DS_ADD_U64>;
+def DS_SUB_U64_vi         : DS_Real_vi<0x41, DS_SUB_U64>;
+def DS_RSUB_U64_vi        : DS_Real_vi<0x42, DS_RSUB_U64>;
+def DS_INC_U64_vi         : DS_Real_vi<0x43, DS_INC_U64>;
+def DS_DEC_U64_vi         : DS_Real_vi<0x44, DS_DEC_U64>;
+def DS_MIN_I64_vi         : DS_Real_vi<0x45, DS_MIN_I64>;
+def DS_MAX_I64_vi         : DS_Real_vi<0x46, DS_MAX_I64>;
+def DS_MIN_U64_vi         : DS_Real_vi<0x47, DS_MIN_U64>;
+def DS_MAX_U64_vi         : DS_Real_vi<0x48, DS_MAX_U64>;
+def DS_AND_B64_vi         : DS_Real_vi<0x49, DS_AND_B64>;
+def DS_OR_B64_vi          : DS_Real_vi<0x4a, DS_OR_B64>;
+def DS_XOR_B64_vi         : DS_Real_vi<0x4b, DS_XOR_B64>;
+def DS_MSKOR_B64_vi       : DS_Real_vi<0x4c, DS_MSKOR_B64>;
+def DS_WRITE_B64_vi       : DS_Real_vi<0x4d, DS_WRITE_B64>;
+def DS_WRITE2_B64_vi      : DS_Real_vi<0x4E, DS_WRITE2_B64>;
+def DS_WRITE2ST64_B64_vi  : DS_Real_vi<0x4f, DS_WRITE2ST64_B64>;
+def DS_CMPST_B64_vi       : DS_Real_vi<0x50, DS_CMPST_B64>;
+def DS_CMPST_F64_vi       : DS_Real_vi<0x51, DS_CMPST_F64>;
+def DS_MIN_F64_vi         : DS_Real_vi<0x52, DS_MIN_F64>;
+def DS_MAX_F64_vi         : DS_Real_vi<0x53, DS_MAX_F64>;
+
+def DS_ADD_RTN_U64_vi     : DS_Real_vi<0x60, DS_ADD_RTN_U64>;
+def DS_SUB_RTN_U64_vi     : DS_Real_vi<0x61, DS_SUB_RTN_U64>;
+def DS_RSUB_RTN_U64_vi    : DS_Real_vi<0x62, DS_RSUB_RTN_U64>;
+def DS_INC_RTN_U64_vi     : DS_Real_vi<0x63, DS_INC_RTN_U64>;
+def DS_DEC_RTN_U64_vi     : DS_Real_vi<0x64, DS_DEC_RTN_U64>;
+def DS_MIN_RTN_I64_vi     : DS_Real_vi<0x65, DS_MIN_RTN_I64>;
+def DS_MAX_RTN_I64_vi     : DS_Real_vi<0x66, DS_MAX_RTN_I64>;
+def DS_MIN_RTN_U64_vi     : DS_Real_vi<0x67, DS_MIN_RTN_U64>;
+def DS_MAX_RTN_U64_vi     : DS_Real_vi<0x68, DS_MAX_RTN_U64>;
+def DS_AND_RTN_B64_vi     : DS_Real_vi<0x69, DS_AND_RTN_B64>;
+def DS_OR_RTN_B64_vi      : DS_Real_vi<0x6a, DS_OR_RTN_B64>;
+def DS_XOR_RTN_B64_vi     : DS_Real_vi<0x6b, DS_XOR_RTN_B64>;
+def DS_MSKOR_RTN_B64_vi   : DS_Real_vi<0x6c, DS_MSKOR_RTN_B64>;
+def DS_WRXCHG_RTN_B64_vi  : DS_Real_vi<0x6d, DS_WRXCHG_RTN_B64>;
+def DS_WRXCHG2_RTN_B64_vi : DS_Real_vi<0x6e, DS_WRXCHG2_RTN_B64>;
+def DS_WRXCHG2ST64_RTN_B64_vi : DS_Real_vi<0x6f, DS_WRXCHG2ST64_RTN_B64>;
+def DS_CMPST_RTN_B64_vi   : DS_Real_vi<0x70, DS_CMPST_RTN_B64>;
+def DS_CMPST_RTN_F64_vi   : DS_Real_vi<0x71, DS_CMPST_RTN_F64>;
+def DS_MIN_RTN_F64_vi     : DS_Real_vi<0x72, DS_MIN_RTN_F64>;
+def DS_MAX_RTN_F64_vi     : DS_Real_vi<0x73, DS_MAX_RTN_F64>;
+
+def DS_READ_B64_vi        : DS_Real_vi<0x76, DS_READ_B64>;
+def DS_READ2_B64_vi       : DS_Real_vi<0x77, DS_READ2_B64>;
+def DS_READ2ST64_B64_vi   : DS_Real_vi<0x78, DS_READ2ST64_B64>;
+
+def DS_ADD_SRC2_U32_vi    : DS_Real_vi<0x80, DS_ADD_SRC2_U32>;
+def DS_SUB_SRC2_U32_vi    : DS_Real_vi<0x81, DS_SUB_SRC2_U32>;
+def DS_RSUB_SRC2_U32_vi   : DS_Real_vi<0x82, DS_RSUB_SRC2_U32>;
+def DS_INC_SRC2_U32_vi    : DS_Real_vi<0x83, DS_INC_SRC2_U32>;
+def DS_DEC_SRC2_U32_vi    : DS_Real_vi<0x84, DS_DEC_SRC2_U32>;
+def DS_MIN_SRC2_I32_vi    : DS_Real_vi<0x85, DS_MIN_SRC2_I32>;
+def DS_MAX_SRC2_I32_vi    : DS_Real_vi<0x86, DS_MAX_SRC2_I32>;
+def DS_MIN_SRC2_U32_vi    : DS_Real_vi<0x87, DS_MIN_SRC2_U32>;
+def DS_MAX_SRC2_U32_vi    : DS_Real_vi<0x88, DS_MAX_SRC2_U32>;
+def DS_AND_SRC2_B32_vi    : DS_Real_vi<0x89, DS_AND_SRC2_B32>;
+def DS_OR_SRC2_B32_vi     : DS_Real_vi<0x8a, DS_OR_SRC2_B32>;
+def DS_XOR_SRC2_B32_vi    : DS_Real_vi<0x8b, DS_XOR_SRC2_B32>;
+def DS_WRITE_SRC2_B32_vi  : DS_Real_vi<0x8d, DS_WRITE_SRC2_B32>;
+def DS_MIN_SRC2_F32_vi    : DS_Real_vi<0x92, DS_MIN_SRC2_F32>;
+def DS_MAX_SRC2_F32_vi    : DS_Real_vi<0x93, DS_MAX_SRC2_F32>;
+def DS_ADD_SRC2_U64_vi    : DS_Real_vi<0xc0, DS_ADD_SRC2_U64>;
+def DS_SUB_SRC2_U64_vi    : DS_Real_vi<0xc1, DS_SUB_SRC2_U64>;
+def DS_RSUB_SRC2_U64_vi   : DS_Real_vi<0xc2, DS_RSUB_SRC2_U64>;
+def DS_INC_SRC2_U64_vi    : DS_Real_vi<0xc3, DS_INC_SRC2_U64>;
+def DS_DEC_SRC2_U64_vi    : DS_Real_vi<0xc4, DS_DEC_SRC2_U64>;
+def DS_MIN_SRC2_I64_vi    : DS_Real_vi<0xc5, DS_MIN_SRC2_I64>;
+def DS_MAX_SRC2_I64_vi    : DS_Real_vi<0xc6, DS_MAX_SRC2_I64>;
+def DS_MIN_SRC2_U64_vi    : DS_Real_vi<0xc7, DS_MIN_SRC2_U64>;
+def DS_MAX_SRC2_U64_vi    : DS_Real_vi<0xc8, DS_MAX_SRC2_U64>;
+def DS_AND_SRC2_B64_vi    : DS_Real_vi<0xc9, DS_AND_SRC2_B64>;
+def DS_OR_SRC2_B64_vi     : DS_Real_vi<0xca, DS_OR_SRC2_B64>;
+def DS_XOR_SRC2_B64_vi    : DS_Real_vi<0xcb, DS_XOR_SRC2_B64>;
+def DS_WRITE_SRC2_B64_vi  : DS_Real_vi<0xcd, DS_WRITE_SRC2_B64>;
+def DS_MIN_SRC2_F64_vi    : DS_Real_vi<0xd2, DS_MIN_SRC2_F64>;
+def DS_MAX_SRC2_F64_vi    : DS_Real_vi<0xd3, DS_MAX_SRC2_F64>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index e11de855fe5f..2247cad7bb51 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -28,6 +28,7 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ELF.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -48,6 +49,18 @@ addOperand(MCInst &Inst, const MCOperand& Opnd) {
     MCDisassembler::SoftFail;
 }
 
+static DecodeStatus decodeSoppBrTarget(MCInst &Inst, unsigned Imm,
+                                       uint64_t Addr, const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+
+  APInt SignedOffset(18, Imm * 4, true);
+  int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
+
+  if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
+    return MCDisassembler::Success;
+  return addOperand(Inst, MCOperand::createImm(Imm));
+}
+
 #define DECODE_OPERAND2(RegClass, DecName) \
 static DecodeStatus Decode##RegClass##RegisterClass(MCInst &Inst, \
                                                     unsigned Imm, \
@@ -68,12 +81,22 @@ DECODE_OPERAND(VReg_96)
 DECODE_OPERAND(VReg_128)
 
 DECODE_OPERAND(SReg_32)
-DECODE_OPERAND(SReg_32_XM0)
+DECODE_OPERAND(SReg_32_XM0_XEXEC)
 DECODE_OPERAND(SReg_64)
+DECODE_OPERAND(SReg_64_XEXEC)
 DECODE_OPERAND(SReg_128)
 DECODE_OPERAND(SReg_256)
 DECODE_OPERAND(SReg_512)
 
+
+static DecodeStatus decodeOperand_VSrc16(MCInst &Inst,
+                                         unsigned Imm,
+                                         uint64_t Addr,
+                                         const void *Decoder) {
+  auto DAsm = static_cast<const AMDGPUDisassembler*>(Decoder);
+  return addOperand(Inst, DAsm->decodeOperand_VSrc16(Imm));
+}
+
 #define GET_SUBTARGETINFO_ENUM
 #include "AMDGPUGenSubtargetInfo.inc"
 #undef GET_SUBTARGETINFO_ENUM
@@ -217,12 +240,14 @@ MCOperand AMDGPUDisassembler::createSRegOperand(unsigned SRegClassID,
   // ToDo: unclear if s[88:104] is available on VI. Can we use VCC as SGPR in
   // this bundle?
   default:
-    assert(false);
-    break;
+    llvm_unreachable("unhandled register class");
   }
-  if (Val % (1 << shift))
+
+  if (Val % (1 << shift)) {
     *CommentStream << "Warning: " << getRegClassName(SRegClassID)
                    << ": scalar reg isn't aligned " << Val;
+  }
+
   return createRegOperand(SRegClassID, Val >> shift);
 }
 
@@ -234,7 +259,16 @@ MCOperand AMDGPUDisassembler::decodeOperand_VS_64(unsigned Val) const {
   return decodeSrcOp(OPW64, Val);
 }
 
+MCOperand AMDGPUDisassembler::decodeOperand_VSrc16(unsigned Val) const {
+  return decodeSrcOp(OPW16, Val);
+}
+
 MCOperand AMDGPUDisassembler::decodeOperand_VGPR_32(unsigned Val) const {
+  // Some instructions have operand restrictions beyond what the encoding
+  // allows. Some ordinarily VSrc_32 operands are VGPR_32, so clear the extra
+  // high bit.
+  Val &= 255;
+
   return createRegOperand(AMDGPU::VGPR_32RegClassID, Val);
 }
 
@@ -257,13 +291,17 @@ MCOperand AMDGPUDisassembler::decodeOperand_SReg_32(unsigned Val) const {
   return decodeSrcOp(OPW32, Val);
 }
 
-MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0(unsigned Val) const {
-  // SReg_32_XM0 is SReg_32 without M0
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_32_XM0_XEXEC(
+  unsigned Val) const {
+  // SReg_32_XM0 is SReg_32 without M0 or EXEC_LO/EXEC_HI
   return decodeOperand_SReg_32(Val);
 }
 
 MCOperand AMDGPUDisassembler::decodeOperand_SReg_64(unsigned Val) const {
-  // see decodeOperand_SReg_32 comment
+  return decodeSrcOp(OPW64, Val);
+}
+
+MCOperand AMDGPUDisassembler::decodeOperand_SReg_64_XEXEC(unsigned Val) const {
   return decodeSrcOp(OPW64, Val);
 }
 
@@ -299,28 +337,96 @@ MCOperand AMDGPUDisassembler::decodeIntImmed(unsigned Imm) {
       // Cast prevents negative overflow.
 }
 
-MCOperand AMDGPUDisassembler::decodeFPImmed(bool Is32, unsigned Imm) {
+static int64_t getInlineImmVal32(unsigned Imm) {
+  switch (Imm) {
+  case 240:
+    return FloatToBits(0.5f);
+  case 241:
+    return FloatToBits(-0.5f);
+  case 242:
+    return FloatToBits(1.0f);
+  case 243:
+    return FloatToBits(-1.0f);
+  case 244:
+    return FloatToBits(2.0f);
+  case 245:
+    return FloatToBits(-2.0f);
+  case 246:
+    return FloatToBits(4.0f);
+  case 247:
+    return FloatToBits(-4.0f);
+  case 248: // 1 / (2 * PI)
+    return 0x3e22f983;
+  default:
+    llvm_unreachable("invalid fp inline imm");
+  }
+}
+
+static int64_t getInlineImmVal64(unsigned Imm) {
+  switch (Imm) {
+  case 240:
+    return DoubleToBits(0.5);
+  case 241:
+    return DoubleToBits(-0.5);
+  case 242:
+    return DoubleToBits(1.0);
+  case 243:
+    return DoubleToBits(-1.0);
+  case 244:
+    return DoubleToBits(2.0);
+  case 245:
+    return DoubleToBits(-2.0);
+  case 246:
+    return DoubleToBits(4.0);
+  case 247:
+    return DoubleToBits(-4.0);
+  case 248: // 1 / (2 * PI)
+    return 0x3fc45f306dc9c882;
+  default:
+    llvm_unreachable("invalid fp inline imm");
+  }
+}
+
+static int64_t getInlineImmVal16(unsigned Imm) {
+  switch (Imm) {
+  case 240:
+    return 0x3800;
+  case 241:
+    return 0xB800;
+  case 242:
+    return 0x3C00;
+  case 243:
+    return 0xBC00;
+  case 244:
+    return 0x4000;
+  case 245:
+    return 0xC000;
+  case 246:
+    return 0x4400;
+  case 247:
+    return 0xC400;
+  case 248: // 1 / (2 * PI)
+    return 0x3118;
+  default:
+    llvm_unreachable("invalid fp inline imm");
+  }
+}
+
+MCOperand AMDGPUDisassembler::decodeFPImmed(OpWidthTy Width, unsigned Imm) {
   assert(Imm >= AMDGPU::EncValues::INLINE_FLOATING_C_MIN
       && Imm <= AMDGPU::EncValues::INLINE_FLOATING_C_MAX);
+
   // ToDo: case 248: 1/(2*PI) - is allowed only on VI
-  // ToDo: AMDGPUInstPrinter does not support 1/(2*PI). It consider 1/(2*PI) as
-  // literal constant.
-  float V = 0.0f;
-  switch (Imm) {
-  case 240: V =  0.5f; break;
-  case 241: V = -0.5f; break;
-  case 242: V =  1.0f; break;
-  case 243: V = -1.0f; break;
-  case 244: V =  2.0f; break;
-  case 245: V = -2.0f; break;
-  case 246: V =  4.0f; break;
-  case 247: V = -4.0f; break;
-  case 248: return MCOperand::createImm(Is32 ?         // 1/(2*PI)
-                                          0x3e22f983 :
-                                          0x3fc45f306dc9c882);
-  default: break;
+  switch (Width) {
+  case OPW32:
+    return MCOperand::createImm(getInlineImmVal32(Imm));
+  case OPW64:
+    return MCOperand::createImm(getInlineImmVal64(Imm));
+  case OPW16:
+    return MCOperand::createImm(getInlineImmVal16(Imm));
+  default:
+    llvm_unreachable("implement me");
   }
-  return MCOperand::createImm(Is32? FloatToBits(V) : DoubleToBits(V));
 }
 
 unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
@@ -328,7 +434,9 @@ unsigned AMDGPUDisassembler::getVgprClassId(const OpWidthTy Width) const {
   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
   switch (Width) {
   default: // fall
-  case OPW32: return VGPR_32RegClassID;
+  case OPW32:
+  case OPW16:
+    return VGPR_32RegClassID;
   case OPW64: return VReg_64RegClassID;
   case OPW128: return VReg_128RegClassID;
   }
@@ -339,7 +447,9 @@ unsigned AMDGPUDisassembler::getSgprClassId(const OpWidthTy Width) const {
   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
   switch (Width) {
   default: // fall
-  case OPW32: return SGPR_32RegClassID;
+  case OPW32:
+  case OPW16:
+    return SGPR_32RegClassID;
   case OPW64: return SGPR_64RegClassID;
   case OPW128: return SGPR_128RegClassID;
   }
@@ -350,7 +460,9 @@ unsigned AMDGPUDisassembler::getTtmpClassId(const OpWidthTy Width) const {
   assert(OPW_FIRST_ <= Width && Width < OPW_LAST_);
   switch (Width) {
   default: // fall
-  case OPW32: return TTMP_32RegClassID;
+  case OPW32:
+  case OPW16:
+    return TTMP_32RegClassID;
   case OPW64: return TTMP_64RegClassID;
   case OPW128: return TTMP_128RegClassID;
   }
@@ -371,19 +483,26 @@ MCOperand AMDGPUDisassembler::decodeSrcOp(const OpWidthTy Width, unsigned Val) c
     return createSRegOperand(getTtmpClassId(Width), Val - TTMP_MIN);
   }
 
-  assert(Width == OPW32 || Width == OPW64);
-  const bool Is32 = (Width == OPW32);
+  assert(Width == OPW16 || Width == OPW32 || Width == OPW64);
 
   if (INLINE_INTEGER_C_MIN <= Val && Val <= INLINE_INTEGER_C_MAX)
     return decodeIntImmed(Val);
 
   if (INLINE_FLOATING_C_MIN <= Val && Val <= INLINE_FLOATING_C_MAX)
-    return decodeFPImmed(Is32, Val);
+    return decodeFPImmed(Width, Val);
 
   if (Val == LITERAL_CONST)
     return decodeLiteralConstant();
 
-  return Is32 ? decodeSpecialReg32(Val) : decodeSpecialReg64(Val);
+  switch (Width) {
+  case OPW32:
+  case OPW16:
+    return decodeSpecialReg32(Val);
+  case OPW64:
+    return decodeSpecialReg64(Val);
+  default:
+    llvm_unreachable("unexpected immediate type");
+  }
 }
 
 MCOperand AMDGPUDisassembler::decodeSpecialReg32(unsigned Val) const {
@@ -426,6 +545,56 @@ MCOperand AMDGPUDisassembler::decodeSpecialReg64(unsigned Val) const {
   return errOperand(Val, "unknown operand encoding " + Twine(Val));
 }
 
+//===----------------------------------------------------------------------===//
+// AMDGPUSymbolizer
+//===----------------------------------------------------------------------===//
+
+// Try to find symbol name for specified label
+bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
+                                raw_ostream &/*cStream*/, int64_t Value,
+                                uint64_t /*Address*/, bool IsBranch,
+                                uint64_t /*Offset*/, uint64_t /*InstSize*/) {
+  typedef std::tuple<uint64_t, StringRef, uint8_t> SymbolInfoTy;
+  typedef std::vector<SymbolInfoTy> SectionSymbolsTy;
+
+  if (!IsBranch) {
+    return false;
+  }
+
+  auto *Symbols = static_cast<SectionSymbolsTy *>(DisInfo);
+  auto Result = std::find_if(Symbols->begin(), Symbols->end(),
+                             [Value](const SymbolInfoTy& Val) {
+                                return std::get<0>(Val) == static_cast<uint64_t>(Value)
+                                    && std::get<2>(Val) == ELF::STT_NOTYPE;
+                             });
+  if (Result != Symbols->end()) {
+    auto *Sym = Ctx.getOrCreateSymbol(std::get<1>(*Result));
+    const auto *Add = MCSymbolRefExpr::create(Sym, Ctx);
+    Inst.addOperand(MCOperand::createExpr(Add));
+    return true;
+  }
+  return false;
+}
+
+void AMDGPUSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream,
+                                                       int64_t Value,
+                                                       uint64_t Address) {
+  llvm_unreachable("unimplemented");
+}
+
+//===----------------------------------------------------------------------===//
+// Initialization
+//===----------------------------------------------------------------------===//
+
+static MCSymbolizer *createAMDGPUSymbolizer(const Triple &/*TT*/,
+                              LLVMOpInfoCallback /*GetOpInfo*/,
+                              LLVMSymbolLookupCallback /*SymbolLookUp*/,
+                              void *DisInfo,
+                              MCContext *Ctx,
+                              std::unique_ptr<MCRelocationInfo> &&RelInfo) {
+  return new AMDGPUSymbolizer(*Ctx, std::move(RelInfo), DisInfo);
+}
+
 static MCDisassembler *createAMDGPUDisassembler(const Target &T,
                                                 const MCSubtargetInfo &STI,
                                                 MCContext &Ctx) {
@@ -433,5 +602,8 @@ static MCDisassembler *createAMDGPUDisassembler(const Target &T,
 }
 
 extern "C" void LLVMInitializeAMDGPUDisassembler() {
-  TargetRegistry::RegisterMCDisassembler(TheGCNTarget, createAMDGPUDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheGCNTarget(),
+                                         createAMDGPUDisassembler);
+  TargetRegistry::RegisterMCSymbolizer(getTheGCNTarget(),
+                                       createAMDGPUSymbolizer);
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index dff26a044bf5..ee5883a984e0 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -18,76 +18,113 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCRelocationInfo.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include <cstdint>
+#include <algorithm>
+#include <memory>
 
 namespace llvm {
 
-  class MCContext;
-  class MCInst;
-  class MCOperand;
-  class MCSubtargetInfo;
-  class Twine;
-
-  class AMDGPUDisassembler : public MCDisassembler {
-  private:
-    mutable ArrayRef<uint8_t> Bytes;
-
-  public:
-    AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
-      MCDisassembler(STI, Ctx) {}
-
-    ~AMDGPUDisassembler() {}
-
-    DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
-                                ArrayRef<uint8_t> Bytes, uint64_t Address,
-                                raw_ostream &WS, raw_ostream &CS) const override;
-
-    const char* getRegClassName(unsigned RegClassID) const;
-
-    MCOperand createRegOperand(unsigned int RegId) const;
-    MCOperand createRegOperand(unsigned RegClassID, unsigned Val) const;
-    MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const;
-
-    MCOperand errOperand(unsigned V, const llvm::Twine& ErrMsg) const;
-
-    DecodeStatus tryDecodeInst(const uint8_t* Table,
-                               MCInst &MI,
-                               uint64_t Inst,
-                               uint64_t Address) const;
-
-    MCOperand decodeOperand_VGPR_32(unsigned Val) const;
-    MCOperand decodeOperand_VS_32(unsigned Val) const;
-    MCOperand decodeOperand_VS_64(unsigned Val) const;
-
-    MCOperand decodeOperand_VReg_64(unsigned Val) const;
-    MCOperand decodeOperand_VReg_96(unsigned Val) const;
-    MCOperand decodeOperand_VReg_128(unsigned Val) const;
-
-    MCOperand decodeOperand_SReg_32(unsigned Val) const;
-    MCOperand decodeOperand_SReg_32_XM0(unsigned Val) const;
-    MCOperand decodeOperand_SReg_64(unsigned Val) const;
-    MCOperand decodeOperand_SReg_128(unsigned Val) const;
-    MCOperand decodeOperand_SReg_256(unsigned Val) const;
-    MCOperand decodeOperand_SReg_512(unsigned Val) const;
-
-    enum OpWidthTy {
-      OPW32,
-      OPW64,
-      OPW128,
-      OPW_LAST_,
-      OPW_FIRST_ = OPW32
-    };
-    unsigned getVgprClassId(const OpWidthTy Width) const;
-    unsigned getSgprClassId(const OpWidthTy Width) const;
-    unsigned getTtmpClassId(const OpWidthTy Width) const;
-
-    static MCOperand decodeIntImmed(unsigned Imm);
-    static MCOperand decodeFPImmed(bool Is32, unsigned Imm);
-    MCOperand decodeLiteralConstant() const;
-
-    MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
-    MCOperand decodeSpecialReg32(unsigned Val) const;
-    MCOperand decodeSpecialReg64(unsigned Val) const;
+class MCContext;
+class MCInst;
+class MCOperand;
+class MCSubtargetInfo;
+class Twine;
+
+//===----------------------------------------------------------------------===//
+// AMDGPUDisassembler
+//===----------------------------------------------------------------------===//
+
+class AMDGPUDisassembler : public MCDisassembler {
+private:
+  mutable ArrayRef<uint8_t> Bytes;
+
+public:
+  AMDGPUDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx) :
+    MCDisassembler(STI, Ctx) {}
+
+  ~AMDGPUDisassembler() override = default;
+
+  DecodeStatus getInstruction(MCInst &MI, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &WS, raw_ostream &CS) const override;
+
+  const char* getRegClassName(unsigned RegClassID) const;
+
+  MCOperand createRegOperand(unsigned int RegId) const;
+  MCOperand createRegOperand(unsigned RegClassID, unsigned Val) const;
+  MCOperand createSRegOperand(unsigned SRegClassID, unsigned Val) const;
+
+  MCOperand errOperand(unsigned V, const Twine& ErrMsg) const;
+
+  DecodeStatus tryDecodeInst(const uint8_t* Table,
+                              MCInst &MI,
+                              uint64_t Inst,
+                              uint64_t Address) const;
+
+  MCOperand decodeOperand_VGPR_32(unsigned Val) const;
+  MCOperand decodeOperand_VS_32(unsigned Val) const;
+  MCOperand decodeOperand_VS_64(unsigned Val) const;
+  MCOperand decodeOperand_VSrc16(unsigned Val) const;
+
+  MCOperand decodeOperand_VReg_64(unsigned Val) const;
+  MCOperand decodeOperand_VReg_96(unsigned Val) const;
+  MCOperand decodeOperand_VReg_128(unsigned Val) const;
+
+  MCOperand decodeOperand_SReg_32(unsigned Val) const;
+  MCOperand decodeOperand_SReg_32_XM0_XEXEC(unsigned Val) const;
+  MCOperand decodeOperand_SReg_64(unsigned Val) const;
+  MCOperand decodeOperand_SReg_64_XEXEC(unsigned Val) const;
+  MCOperand decodeOperand_SReg_128(unsigned Val) const;
+  MCOperand decodeOperand_SReg_256(unsigned Val) const;
+  MCOperand decodeOperand_SReg_512(unsigned Val) const;
+
+  enum OpWidthTy {
+    OPW32,
+    OPW64,
+    OPW128,
+    OPW16,
+    OPW_LAST_,
+    OPW_FIRST_ = OPW32
   };
-} // namespace llvm
 
-#endif //LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
+  unsigned getVgprClassId(const OpWidthTy Width) const;
+  unsigned getSgprClassId(const OpWidthTy Width) const;
+  unsigned getTtmpClassId(const OpWidthTy Width) const;
+
+  static MCOperand decodeIntImmed(unsigned Imm);
+  static MCOperand decodeFPImmed(OpWidthTy Width, unsigned Imm);
+  MCOperand decodeLiteralConstant() const;
+
+  MCOperand decodeSrcOp(const OpWidthTy Width, unsigned Val) const;
+  MCOperand decodeSpecialReg32(unsigned Val) const;
+  MCOperand decodeSpecialReg64(unsigned Val) const;
+};
+
+//===----------------------------------------------------------------------===//
+// AMDGPUSymbolizer
+//===----------------------------------------------------------------------===//
+
+class AMDGPUSymbolizer : public MCSymbolizer {
+private:
+  void *DisInfo;
+
+public:
+  AMDGPUSymbolizer(MCContext &Ctx, std::unique_ptr<MCRelocationInfo> &&RelInfo,
+                   void *disInfo)
+                   : MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
+
+  bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
+                                int64_t Value, uint64_t Address,
+                                bool IsBranch, uint64_t Offset,
+                                uint64_t InstSize) override;
+
+  void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
+                                       int64_t Value,
+                                       uint64_t Address) override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_DISASSEMBLER_AMDGPUDISASSEMBLER_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
index 94f05cc41aff..4112ad100584 100644
--- a/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -72,6 +72,8 @@ def MULLO_INT_eg : MULLO_INT_Common<0x8F>;
 def MULHI_INT_eg : MULHI_INT_Common<0x90>;
 def MULLO_UINT_eg : MULLO_UINT_Common<0x91>;
 def MULHI_UINT_eg : MULHI_UINT_Common<0x92>;
+def MULHI_UINT24_eg : MULHI_UINT24_Common<0xb2>;
+
 def RECIP_UINT_eg : RECIP_UINT_Common<0x94>;
 def RECIPSQRT_CLAMPED_eg : RECIPSQRT_CLAMPED_Common<0x87>;
 def EXP_IEEE_eg : EXP_IEEE_Common<0x81>;
@@ -116,14 +118,13 @@ def RAT_STORE_TYPED_eg: CF_MEM_RAT_STORE_TYPED<1>;
 
 } // End usesCustomInserter = 1
 
-class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
-    : VTX_WORD0_eg, VTX_READ<name, buffer_id, outs, pattern> {
+class VTX_READ_eg <string name, dag outs>
+    : VTX_WORD0_eg, VTX_READ<name, outs, []> {
 
   // Static fields
   let VC_INST = 0;
   let FETCH_TYPE = 2;
   let FETCH_WHOLE_QUAD = 0;
-  let BUFFER_ID = buffer_id;
   let SRC_REL = 0;
   // XXX: We can infer this field based on the SRC_GPR.  This would allow us
   // to store vertex addresses in any channel, not just X.
@@ -132,9 +133,9 @@ class VTX_READ_eg <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
   let Inst{31-0} = Word0;
 }
 
-class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+def VTX_READ_8_eg
+    : VTX_READ_eg <"VTX_READ_8 $dst_gpr, $src_gpr",
+                   (outs R600_TReg32_X:$dst_gpr)> {
 
   let MEGA_FETCH_COUNT = 1;
   let DST_SEL_X = 0;
@@ -144,9 +145,9 @@ class VTX_READ_8_eg <bits<8> buffer_id, list<dag> pattern>
   let DATA_FORMAT = 1; // FMT_8
 }
 
-class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+def VTX_READ_16_eg
+    : VTX_READ_eg <"VTX_READ_16 $dst_gpr, $src_gpr",
+                   (outs R600_TReg32_X:$dst_gpr)> {
   let MEGA_FETCH_COUNT = 2;
   let DST_SEL_X = 0;
   let DST_SEL_Y = 7;   // Masked
@@ -156,9 +157,9 @@ class VTX_READ_16_eg <bits<8> buffer_id, list<dag> pattern>
 
 }
 
-class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr", buffer_id,
-                   (outs R600_TReg32_X:$dst_gpr), pattern> {
+def VTX_READ_32_eg
+    : VTX_READ_eg <"VTX_READ_32 $dst_gpr, $src_gpr",
+                   (outs R600_TReg32_X:$dst_gpr)> {
 
   let MEGA_FETCH_COUNT = 4;
   let DST_SEL_X        = 0;
@@ -177,9 +178,9 @@ class VTX_READ_32_eg <bits<8> buffer_id, list<dag> pattern>
   let Constraints = "$src_gpr.ptr = $dst_gpr";
 }
 
-class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr", buffer_id,
-                   (outs R600_Reg64:$dst_gpr), pattern> {
+def VTX_READ_64_eg
+    : VTX_READ_eg <"VTX_READ_64 $dst_gpr.XY, $src_gpr",
+                   (outs R600_Reg64:$dst_gpr)> {
 
   let MEGA_FETCH_COUNT = 8;
   let DST_SEL_X        = 0;
@@ -189,9 +190,9 @@ class VTX_READ_64_eg <bits<8> buffer_id, list<dag> pattern>
   let DATA_FORMAT      = 0x1D; // COLOR_32_32
 }
 
-class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
-    : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr", buffer_id,
-                   (outs R600_Reg128:$dst_gpr), pattern> {
+def VTX_READ_128_eg
+    : VTX_READ_eg <"VTX_READ_128 $dst_gpr.XYZW, $src_gpr",
+                   (outs R600_Reg128:$dst_gpr)> {
 
   let MEGA_FETCH_COUNT = 16;
   let DST_SEL_X        =  0;
@@ -209,80 +210,44 @@ class VTX_READ_128_eg <bits<8> buffer_id, list<dag> pattern>
 //===----------------------------------------------------------------------===//
 // VTX Read from parameter memory space
 //===----------------------------------------------------------------------===//
+def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi8 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_8_eg MEMxi:$src_gpr, 3)>;
+def : Pat<(i32:$dst_gpr (vtx_id3_az_extloadi16 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_16_eg MEMxi:$src_gpr, 3)>;
+def : Pat<(i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_32_eg MEMxi:$src_gpr, 3)>;
+def : Pat<(v2i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_64_eg MEMxi:$src_gpr, 3)>;
+def : Pat<(v4i32:$dst_gpr (vtx_id3_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_128_eg MEMxi:$src_gpr, 3)>;
 
-def VTX_READ_PARAM_8_eg : VTX_READ_8_eg <3,
-  [(set i32:$dst_gpr, (load_param_exti8 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_16_eg : VTX_READ_16_eg <3,
-  [(set i32:$dst_gpr, (load_param_exti16 ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_32_eg : VTX_READ_32_eg <3,
-  [(set i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_64_eg : VTX_READ_64_eg <3,
-  [(set v2i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
-
-def VTX_READ_PARAM_128_eg : VTX_READ_128_eg <3,
-  [(set v4i32:$dst_gpr, (load_param ADDRVTX_READ:$src_gpr))]
->;
+//===----------------------------------------------------------------------===//
+// VTX Read from constant memory space
+//===----------------------------------------------------------------------===//
+def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_8_eg MEMxi:$src_gpr, 2)>;
+def : Pat<(i32:$dst_gpr (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_16_eg MEMxi:$src_gpr, 2)>;
+def : Pat<(i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_32_eg MEMxi:$src_gpr, 2)>;
+def : Pat<(v2i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_64_eg MEMxi:$src_gpr, 2)>;
+def : Pat<(v4i32:$dst_gpr (vtx_id2_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_128_eg MEMxi:$src_gpr, 2)>;
 
 //===----------------------------------------------------------------------===//
 // VTX Read from global memory space
 //===----------------------------------------------------------------------===//
-
-// 8-bit reads
-def VTX_READ_ID1_8_eg : VTX_READ_8_eg <1,
-  [(set i32:$dst_gpr, (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr))]
->;
-
-// 16-bit reads
-def VTX_READ_ID1_16_eg : VTX_READ_16_eg <1,
-  [(set i32:$dst_gpr, (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr))]
->;
-
-// 32-bit reads
-def VTX_READ_ID1_32_eg : VTX_READ_32_eg <1,
-  [(set i32:$dst_gpr, (vtx_id1_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 64-bit reads
-def VTX_READ_ID1_64_eg : VTX_READ_64_eg <1,
-  [(set v2i32:$dst_gpr, (vtx_id1_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 128-bit reads
-def VTX_READ_ID1_128_eg : VTX_READ_128_eg <1,
-  [(set v4i32:$dst_gpr, (vtx_id1_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 8-bit reads
-def VTX_READ_ID2_8_eg : VTX_READ_8_eg <2,
-  [(set i32:$dst_gpr, (vtx_id2_az_extloadi8 ADDRVTX_READ:$src_gpr))]
->;
-
-// 16-bit reads
-def VTX_READ_ID2_16_eg : VTX_READ_16_eg <2,
-  [(set i32:$dst_gpr, (vtx_id2_az_extloadi16 ADDRVTX_READ:$src_gpr))]
->;
-
-// 32-bit reads
-def VTX_READ_ID2_32_eg : VTX_READ_32_eg <2,
-  [(set i32:$dst_gpr, (vtx_id2_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 64-bit reads
-def VTX_READ_ID2_64_eg : VTX_READ_64_eg <2,
-  [(set v2i32:$dst_gpr, (vtx_id2_load ADDRVTX_READ:$src_gpr))]
->;
-
-// 128-bit reads
-def VTX_READ_ID2_128_eg : VTX_READ_128_eg <2,
-  [(set v4i32:$dst_gpr, (vtx_id2_load ADDRVTX_READ:$src_gpr))]
->;
+def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi8 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_8_eg MEMxi:$src_gpr, 1)>;
+def : Pat<(i32:$dst_gpr (vtx_id1_az_extloadi16 ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_16_eg MEMxi:$src_gpr, 1)>;
+def : Pat<(i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_32_eg MEMxi:$src_gpr, 1)>;
+def : Pat<(v2i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_64_eg MEMxi:$src_gpr, 1)>;
+def : Pat<(v4i32:$dst_gpr (vtx_id1_load ADDRVTX_READ:$src_gpr)),
+          (VTX_READ_128_eg MEMxi:$src_gpr, 1)>;
 
 } // End Predicates = [isEG]
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
new file mode 100644
index 000000000000..849fb8ad50f5
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -0,0 +1,530 @@
+//===-- FLATInstructions.td - FLAT Instruction Defintions -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
+
+//===----------------------------------------------------------------------===//
+// FLAT classes
+//===----------------------------------------------------------------------===//
+
+class FLAT_Pseudo<string opName, dag outs, dag ins,
+                  string asmOps, list<dag> pattern=[]> :
+  InstSI<outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+
+  let SubtargetPredicate = isCIVI;
+
+  let FLAT = 1;
+  // Internally, FLAT instruction are executed as both an LDS and a
+  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
+  // and are not considered done until both have been decremented.
+  let VM_CNT = 1;
+  let LGKM_CNT = 1;
+
+  let Uses = [EXEC, FLAT_SCR]; // M0
+
+  let UseNamedOperandTable = 1;
+  let hasSideEffects = 0;
+  let SchedRW = [WriteVMEM];
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  bits<1> has_vdst = 1;
+  bits<1> has_data = 1;
+  bits<1> has_glc  = 1;
+  bits<1> glcValue = 0;
+}
+
+class FLAT_Real <bits<7> op, FLAT_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  Enc64 {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+
+  // encoding fields
+  bits<8> vaddr;
+  bits<8> vdata;
+  bits<8> vdst;
+  bits<1> slc;
+  bits<1> glc;
+  bits<1> tfe;
+
+  // 15-0 is reserved.
+  let Inst{16}    = !if(ps.has_glc, glc, ps.glcValue);
+  let Inst{17}    = slc;
+  let Inst{24-18} = op;
+  let Inst{31-26} = 0x37; // Encoding.
+  let Inst{39-32} = vaddr;
+  let Inst{47-40} = !if(ps.has_data, vdata, ?);
+  // 54-48 is reserved.
+  let Inst{55}    = tfe;
+  let Inst{63-56} = !if(ps.has_vdst, vdst, ?);
+}
+
+class FLAT_Load_Pseudo <string opName, RegisterClass regClass> : FLAT_Pseudo<
+  opName,
+  (outs regClass:$vdst),
+  (ins VReg_64:$vaddr, GLC:$glc, slc:$slc, tfe:$tfe),
+  " $vdst, $vaddr$glc$slc$tfe"> {
+  let has_data = 0;
+  let mayLoad = 1;
+}
+
+class FLAT_Store_Pseudo <string opName, RegisterClass vdataClass> : FLAT_Pseudo<
+  opName,
+  (outs),
+  (ins VReg_64:$vaddr, vdataClass:$vdata, GLC:$glc, slc:$slc, tfe:$tfe),
+  " $vaddr, $vdata$glc$slc$tfe"> {
+  let mayLoad  = 0;
+  let mayStore = 1;
+  let has_vdst = 0;
+}
+
+multiclass FLAT_Atomic_Pseudo<
+  string opName,
+  RegisterClass vdst_rc,
+  ValueType vt,
+  SDPatternOperator atomic = null_frag,
+  ValueType data_vt = vt,
+  RegisterClass data_rc = vdst_rc> {
+
+  def "" : FLAT_Pseudo <opName,
+    (outs),
+    (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc, tfe:$tfe),
+    " $vaddr, $vdata$slc$tfe",
+    []>,
+    AtomicNoRet <NAME, 0> {
+    let mayLoad = 1;
+    let mayStore = 1;
+    let has_glc  = 0;
+    let glcValue = 0;
+    let has_vdst = 0;
+    let PseudoInstr = NAME;
+  }
+
+  def _RTN : FLAT_Pseudo <opName,
+    (outs vdst_rc:$vdst),
+    (ins VReg_64:$vaddr, data_rc:$vdata, slc:$slc, tfe:$tfe),
+    " $vdst, $vaddr, $vdata glc$slc$tfe",
+    [(set vt:$vdst,
+      (atomic (FLATAtomic i64:$vaddr, i1:$slc, i1:$tfe), data_vt:$vdata))]>,
+    AtomicNoRet <NAME, 1> {
+    let mayLoad  = 1;
+    let mayStore = 1;
+    let hasPostISelHook = 1;
+    let has_glc  = 0;
+    let glcValue = 1;
+    let PseudoInstr = NAME # "_RTN";
+  }
+}
+
+class flat_binary_atomic_op<SDNode atomic_op> : PatFrag<
+  (ops node:$ptr, node:$value),
+  (atomic_op node:$ptr, node:$value),
+  [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS;}]
+>;
+
+def atomic_cmp_swap_flat : flat_binary_atomic_op<AMDGPUatomic_cmp_swap>;
+def atomic_swap_flat     : flat_binary_atomic_op<atomic_swap>;
+def atomic_add_flat      : flat_binary_atomic_op<atomic_load_add>;
+def atomic_and_flat      : flat_binary_atomic_op<atomic_load_and>;
+def atomic_max_flat      : flat_binary_atomic_op<atomic_load_max>;
+def atomic_min_flat      : flat_binary_atomic_op<atomic_load_min>;
+def atomic_or_flat       : flat_binary_atomic_op<atomic_load_or>;
+def atomic_sub_flat      : flat_binary_atomic_op<atomic_load_sub>;
+def atomic_umax_flat     : flat_binary_atomic_op<atomic_load_umax>;
+def atomic_umin_flat     : flat_binary_atomic_op<atomic_load_umin>;
+def atomic_xor_flat      : flat_binary_atomic_op<atomic_load_xor>;
+def atomic_inc_flat      : flat_binary_atomic_op<SIatomic_inc>;
+def atomic_dec_flat      : flat_binary_atomic_op<SIatomic_dec>;
+
+
+
+//===----------------------------------------------------------------------===//
+// Flat Instructions
+//===----------------------------------------------------------------------===//
+
+def FLAT_LOAD_UBYTE    : FLAT_Load_Pseudo <"flat_load_ubyte", VGPR_32>;
+def FLAT_LOAD_SBYTE    : FLAT_Load_Pseudo <"flat_load_sbyte", VGPR_32>;
+def FLAT_LOAD_USHORT   : FLAT_Load_Pseudo <"flat_load_ushort", VGPR_32>;
+def FLAT_LOAD_SSHORT   : FLAT_Load_Pseudo <"flat_load_sshort", VGPR_32>;
+def FLAT_LOAD_DWORD    : FLAT_Load_Pseudo <"flat_load_dword", VGPR_32>;
+def FLAT_LOAD_DWORDX2  : FLAT_Load_Pseudo <"flat_load_dwordx2", VReg_64>;
+def FLAT_LOAD_DWORDX4  : FLAT_Load_Pseudo <"flat_load_dwordx4", VReg_128>;
+def FLAT_LOAD_DWORDX3  : FLAT_Load_Pseudo <"flat_load_dwordx3", VReg_96>;
+
+def FLAT_STORE_BYTE    : FLAT_Store_Pseudo <"flat_store_byte", VGPR_32>;
+def FLAT_STORE_SHORT   : FLAT_Store_Pseudo <"flat_store_short", VGPR_32>;
+def FLAT_STORE_DWORD   : FLAT_Store_Pseudo <"flat_store_dword", VGPR_32>;
+def FLAT_STORE_DWORDX2 : FLAT_Store_Pseudo <"flat_store_dwordx2", VReg_64>;
+def FLAT_STORE_DWORDX4 : FLAT_Store_Pseudo <"flat_store_dwordx4", VReg_128>;
+def FLAT_STORE_DWORDX3 : FLAT_Store_Pseudo <"flat_store_dwordx3", VReg_96>;
+
+defm FLAT_ATOMIC_CMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap",
+                                VGPR_32, i32, atomic_cmp_swap_flat,
+                                v2i32, VReg_64>;
+
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_cmpswap_x2",
+                                VReg_64, i64, atomic_cmp_swap_flat,
+                                v2i64, VReg_128>;
+
+defm FLAT_ATOMIC_SWAP       : FLAT_Atomic_Pseudo <"flat_atomic_swap",
+                                VGPR_32, i32, atomic_swap_flat>;
+
+defm FLAT_ATOMIC_SWAP_X2    : FLAT_Atomic_Pseudo <"flat_atomic_swap_x2",
+                                VReg_64, i64, atomic_swap_flat>;
+
+defm FLAT_ATOMIC_ADD        : FLAT_Atomic_Pseudo <"flat_atomic_add",
+                                VGPR_32, i32, atomic_add_flat>;
+
+defm FLAT_ATOMIC_SUB        : FLAT_Atomic_Pseudo <"flat_atomic_sub",
+                                VGPR_32, i32, atomic_sub_flat>;
+
+defm FLAT_ATOMIC_SMIN       : FLAT_Atomic_Pseudo <"flat_atomic_smin",
+                                VGPR_32, i32, atomic_min_flat>;
+
+defm FLAT_ATOMIC_UMIN       : FLAT_Atomic_Pseudo <"flat_atomic_umin",
+                                VGPR_32, i32, atomic_umin_flat>;
+
+defm FLAT_ATOMIC_SMAX       : FLAT_Atomic_Pseudo <"flat_atomic_smax",
+                                VGPR_32, i32, atomic_max_flat>;
+
+defm FLAT_ATOMIC_UMAX       : FLAT_Atomic_Pseudo <"flat_atomic_umax",
+                                VGPR_32, i32, atomic_umax_flat>;
+
+defm FLAT_ATOMIC_AND        : FLAT_Atomic_Pseudo <"flat_atomic_and",
+                                VGPR_32, i32, atomic_and_flat>;
+
+defm FLAT_ATOMIC_OR         : FLAT_Atomic_Pseudo <"flat_atomic_or",
+                                VGPR_32, i32, atomic_or_flat>;
+
+defm FLAT_ATOMIC_XOR        : FLAT_Atomic_Pseudo <"flat_atomic_xor",
+                                VGPR_32, i32, atomic_xor_flat>;
+
+defm FLAT_ATOMIC_INC        : FLAT_Atomic_Pseudo <"flat_atomic_inc",
+                                VGPR_32, i32, atomic_inc_flat>;
+
+defm FLAT_ATOMIC_DEC        : FLAT_Atomic_Pseudo <"flat_atomic_dec",
+                                VGPR_32, i32, atomic_dec_flat>;
+
+defm FLAT_ATOMIC_ADD_X2     : FLAT_Atomic_Pseudo <"flat_atomic_add_x2",
+                                VReg_64, i64, atomic_add_flat>;
+
+defm FLAT_ATOMIC_SUB_X2     : FLAT_Atomic_Pseudo <"flat_atomic_sub_x2",
+                                VReg_64, i64, atomic_sub_flat>;
+
+defm FLAT_ATOMIC_SMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smin_x2",
+                                VReg_64, i64, atomic_min_flat>;
+
+defm FLAT_ATOMIC_UMIN_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umin_x2",
+                                VReg_64, i64, atomic_umin_flat>;
+
+defm FLAT_ATOMIC_SMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_smax_x2",
+                                VReg_64, i64, atomic_max_flat>;
+
+defm FLAT_ATOMIC_UMAX_X2    : FLAT_Atomic_Pseudo <"flat_atomic_umax_x2",
+                                VReg_64, i64, atomic_umax_flat>;
+
+defm FLAT_ATOMIC_AND_X2     : FLAT_Atomic_Pseudo <"flat_atomic_and_x2",
+                                VReg_64, i64, atomic_and_flat>;
+
+defm FLAT_ATOMIC_OR_X2      : FLAT_Atomic_Pseudo <"flat_atomic_or_x2",
+                                VReg_64, i64, atomic_or_flat>;
+
+defm FLAT_ATOMIC_XOR_X2     : FLAT_Atomic_Pseudo <"flat_atomic_xor_x2",
+                                VReg_64, i64, atomic_xor_flat>;
+
+defm FLAT_ATOMIC_INC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_inc_x2",
+                                VReg_64, i64, atomic_inc_flat>;
+
+defm FLAT_ATOMIC_DEC_X2     : FLAT_Atomic_Pseudo <"flat_atomic_dec_x2",
+                                VReg_64, i64, atomic_dec_flat>;
+
+let SubtargetPredicate = isCI in { // CI Only flat instructions : FIXME Only?
+
+defm FLAT_ATOMIC_FCMPSWAP    : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap",
+                                VGPR_32, f32, null_frag, v2f32, VReg_64>;
+
+defm FLAT_ATOMIC_FCMPSWAP_X2 : FLAT_Atomic_Pseudo <"flat_atomic_fcmpswap_x2",
+                                VReg_64, f64, null_frag, v2f64, VReg_128>;
+
+defm FLAT_ATOMIC_FMIN        : FLAT_Atomic_Pseudo <"flat_atomic_fmin",
+                                VGPR_32, f32>;
+
+defm FLAT_ATOMIC_FMAX        : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
+                                VGPR_32, f32>;
+
+defm FLAT_ATOMIC_FMIN_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmin_x2",
+                                VReg_64, f64>;
+
+defm FLAT_ATOMIC_FMAX_X2     : FLAT_Atomic_Pseudo <"flat_atomic_fmax_x2",
+                                VReg_64, f64>;
+
+} // End SubtargetPredicate = isCI
+
+//===----------------------------------------------------------------------===//
+// Flat Patterns
+//===----------------------------------------------------------------------===//
+
+class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
+                                               (ld node:$ptr), [{
+  auto const AS = cast<MemSDNode>(N)->getAddressSpace();
+  return AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::GLOBAL_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
+}]>;
+
+class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
+                                               (st node:$val, node:$ptr), [{
+  auto const AS = cast<MemSDNode>(N)->getAddressSpace();
+  return AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::GLOBAL_ADDRESS;
+}]>;
+
+def atomic_flat_load   : flat_ld <atomic_load>;
+def flat_load          : flat_ld <load>;
+def flat_az_extloadi8  : flat_ld <az_extloadi8>;
+def flat_sextloadi8    : flat_ld <sextloadi8>;
+def flat_az_extloadi16 : flat_ld <az_extloadi16>;
+def flat_sextloadi16   : flat_ld <sextloadi16>;
+
+def atomic_flat_store  : flat_st <atomic_store>;
+def flat_store         : flat_st <store>;
+def flat_truncstorei8  : flat_st <truncstorei8>;
+def flat_truncstorei16 : flat_st <truncstorei16>;
+
+// Patterns for global loads with no offset.
+class FlatLoadPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+  (vt (node i64:$addr)),
+  (inst $addr, 0, 0, 0)
+>;
+
+class FlatLoadAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+  (vt (node i64:$addr)),
+  (inst $addr, 1, 0, 0)
+>;
+
+class FlatStorePat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+  (node vt:$data, i64:$addr),
+  (inst $addr, $data, 0, 0, 0)
+>;
+
+class FlatStoreAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt> : Pat <
+  // atomic store follows atomic binop convention so the address comes
+  // first.
+  (node i64:$addr, vt:$data),
+  (inst $addr, $data, 1, 0, 0)
+>;
+
+class FlatAtomicPat <FLAT_Pseudo inst, SDPatternOperator node, ValueType vt,
+                     ValueType data_vt = vt> : Pat <
+  (vt (node i64:$addr, data_vt:$data)),
+  (inst $addr, $data, 0, 0)
+>;
+
+let Predicates = [isCIVI] in {
+
+def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i32>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i32>;
+def : FlatLoadPat <FLAT_LOAD_UBYTE, flat_az_extloadi8, i16>;
+def : FlatLoadPat <FLAT_LOAD_SBYTE, flat_sextloadi8, i16>;
+def : FlatLoadPat <FLAT_LOAD_USHORT, flat_az_extloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_SSHORT, flat_sextloadi16, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORD, flat_load, i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX2, flat_load, v2i32>;
+def : FlatLoadPat <FLAT_LOAD_DWORDX4, flat_load, v4i32>;
+
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORD, atomic_flat_load, i32>;
+def : FlatLoadAtomicPat <FLAT_LOAD_DWORDX2, atomic_flat_load, i64>;
+
+def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i32>;
+def : FlatStorePat <FLAT_STORE_SHORT, flat_truncstorei16, i32>;
+def : FlatStorePat <FLAT_STORE_DWORD, flat_store, i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX2, flat_store, v2i32>;
+def : FlatStorePat <FLAT_STORE_DWORDX4, flat_store, v4i32>;
+
+def : FlatStoreAtomicPat <FLAT_STORE_DWORD, atomic_flat_store, i32>;
+def : FlatStoreAtomicPat <FLAT_STORE_DWORDX2, atomic_flat_store, i64>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_RTN, atomic_add_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_RTN, atomic_sub_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_INC_RTN, atomic_inc_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_DEC_RTN, atomic_dec_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_RTN, atomic_and_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_RTN, atomic_max_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_RTN, atomic_umax_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_RTN, atomic_min_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_RTN, atomic_umin_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_RTN, atomic_or_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_RTN, atomic_swap_global, i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_RTN, AMDGPUatomic_cmp_swap_global, i32, v2i32>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_RTN, atomic_xor_global, i32>;
+
+def : FlatAtomicPat <FLAT_ATOMIC_ADD_X2_RTN, atomic_add_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SUB_X2_RTN, atomic_sub_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_INC_X2_RTN, atomic_inc_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_DEC_X2_RTN, atomic_dec_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_AND_X2_RTN, atomic_and_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMAX_X2_RTN, atomic_max_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMAX_X2_RTN, atomic_umax_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SMIN_X2_RTN, atomic_min_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_UMIN_X2_RTN, atomic_umin_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_OR_X2_RTN, atomic_or_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_SWAP_X2_RTN, atomic_swap_global, i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_CMPSWAP_X2_RTN, AMDGPUatomic_cmp_swap_global, i64, v2i64>;
+def : FlatAtomicPat <FLAT_ATOMIC_XOR_X2_RTN, atomic_xor_global, i64>;
+
+} // End Predicates = [isCIVI]
+
+let Predicates = [isVI] in {
+  def : FlatStorePat <FLAT_STORE_BYTE, flat_truncstorei8, i16>;
+  def : FlatStorePat <FLAT_STORE_SHORT, flat_store, i16>;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Target
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// CI
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_ci <bits<7> op, FLAT_Pseudo ps> :
+  FLAT_Real <op, ps>,
+  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.SI> {
+  let AssemblerPredicate = isCIOnly;
+  let DecoderNamespace="CI";
+}
+
+def FLAT_LOAD_UBYTE_ci         : FLAT_Real_ci <0x8,  FLAT_LOAD_UBYTE>;
+def FLAT_LOAD_SBYTE_ci         : FLAT_Real_ci <0x9,  FLAT_LOAD_SBYTE>;
+def FLAT_LOAD_USHORT_ci        : FLAT_Real_ci <0xa,  FLAT_LOAD_USHORT>;
+def FLAT_LOAD_SSHORT_ci        : FLAT_Real_ci <0xb,  FLAT_LOAD_SSHORT>;
+def FLAT_LOAD_DWORD_ci         : FLAT_Real_ci <0xc,  FLAT_LOAD_DWORD>;
+def FLAT_LOAD_DWORDX2_ci       : FLAT_Real_ci <0xd,  FLAT_LOAD_DWORDX2>;
+def FLAT_LOAD_DWORDX4_ci       : FLAT_Real_ci <0xe,  FLAT_LOAD_DWORDX4>;
+def FLAT_LOAD_DWORDX3_ci       : FLAT_Real_ci <0xf,  FLAT_LOAD_DWORDX3>;
+
+def FLAT_STORE_BYTE_ci         : FLAT_Real_ci <0x18, FLAT_STORE_BYTE>;
+def FLAT_STORE_SHORT_ci        : FLAT_Real_ci <0x1a, FLAT_STORE_SHORT>;
+def FLAT_STORE_DWORD_ci        : FLAT_Real_ci <0x1c, FLAT_STORE_DWORD>;
+def FLAT_STORE_DWORDX2_ci      : FLAT_Real_ci <0x1d, FLAT_STORE_DWORDX2>;
+def FLAT_STORE_DWORDX4_ci      : FLAT_Real_ci <0x1e, FLAT_STORE_DWORDX4>;
+def FLAT_STORE_DWORDX3_ci      : FLAT_Real_ci <0x1f, FLAT_STORE_DWORDX3>;
+
+multiclass FLAT_Real_Atomics_ci <bits<7> op, FLAT_Pseudo ps> {
+  def _ci     : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
+  def _RTN_ci : FLAT_Real_ci<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
+}
+
+defm FLAT_ATOMIC_SWAP          : FLAT_Real_Atomics_ci <0x30, FLAT_ATOMIC_SWAP>;
+defm FLAT_ATOMIC_CMPSWAP       : FLAT_Real_Atomics_ci <0x31, FLAT_ATOMIC_CMPSWAP>;
+defm FLAT_ATOMIC_ADD           : FLAT_Real_Atomics_ci <0x32, FLAT_ATOMIC_ADD>;
+defm FLAT_ATOMIC_SUB           : FLAT_Real_Atomics_ci <0x33, FLAT_ATOMIC_SUB>;
+defm FLAT_ATOMIC_SMIN          : FLAT_Real_Atomics_ci <0x35, FLAT_ATOMIC_SMIN>;
+defm FLAT_ATOMIC_UMIN          : FLAT_Real_Atomics_ci <0x36, FLAT_ATOMIC_UMIN>;
+defm FLAT_ATOMIC_SMAX          : FLAT_Real_Atomics_ci <0x37, FLAT_ATOMIC_SMAX>;
+defm FLAT_ATOMIC_UMAX          : FLAT_Real_Atomics_ci <0x38, FLAT_ATOMIC_UMAX>;
+defm FLAT_ATOMIC_AND           : FLAT_Real_Atomics_ci <0x39, FLAT_ATOMIC_AND>;
+defm FLAT_ATOMIC_OR            : FLAT_Real_Atomics_ci <0x3a, FLAT_ATOMIC_OR>;
+defm FLAT_ATOMIC_XOR           : FLAT_Real_Atomics_ci <0x3b, FLAT_ATOMIC_XOR>;
+defm FLAT_ATOMIC_INC           : FLAT_Real_Atomics_ci <0x3c, FLAT_ATOMIC_INC>;
+defm FLAT_ATOMIC_DEC           : FLAT_Real_Atomics_ci <0x3d, FLAT_ATOMIC_DEC>;
+defm FLAT_ATOMIC_SWAP_X2       : FLAT_Real_Atomics_ci <0x50, FLAT_ATOMIC_SWAP_X2>;
+defm FLAT_ATOMIC_CMPSWAP_X2    : FLAT_Real_Atomics_ci <0x51, FLAT_ATOMIC_CMPSWAP_X2>;
+defm FLAT_ATOMIC_ADD_X2        : FLAT_Real_Atomics_ci <0x52, FLAT_ATOMIC_ADD_X2>;
+defm FLAT_ATOMIC_SUB_X2        : FLAT_Real_Atomics_ci <0x53, FLAT_ATOMIC_SUB_X2>;
+defm FLAT_ATOMIC_SMIN_X2       : FLAT_Real_Atomics_ci <0x55, FLAT_ATOMIC_SMIN_X2>;
+defm FLAT_ATOMIC_UMIN_X2       : FLAT_Real_Atomics_ci <0x56, FLAT_ATOMIC_UMIN_X2>;
+defm FLAT_ATOMIC_SMAX_X2       : FLAT_Real_Atomics_ci <0x57, FLAT_ATOMIC_SMAX_X2>;
+defm FLAT_ATOMIC_UMAX_X2       : FLAT_Real_Atomics_ci <0x58, FLAT_ATOMIC_UMAX_X2>;
+defm FLAT_ATOMIC_AND_X2        : FLAT_Real_Atomics_ci <0x59, FLAT_ATOMIC_AND_X2>;
+defm FLAT_ATOMIC_OR_X2         : FLAT_Real_Atomics_ci <0x5a, FLAT_ATOMIC_OR_X2>;
+defm FLAT_ATOMIC_XOR_X2        : FLAT_Real_Atomics_ci <0x5b, FLAT_ATOMIC_XOR_X2>;
+defm FLAT_ATOMIC_INC_X2        : FLAT_Real_Atomics_ci <0x5c, FLAT_ATOMIC_INC_X2>;
+defm FLAT_ATOMIC_DEC_X2        : FLAT_Real_Atomics_ci <0x5d, FLAT_ATOMIC_DEC_X2>;
+
+// CI Only flat instructions
+defm FLAT_ATOMIC_FCMPSWAP      : FLAT_Real_Atomics_ci <0x3e, FLAT_ATOMIC_FCMPSWAP>;
+defm FLAT_ATOMIC_FMIN          : FLAT_Real_Atomics_ci <0x3f, FLAT_ATOMIC_FMIN>;
+defm FLAT_ATOMIC_FMAX          : FLAT_Real_Atomics_ci <0x40, FLAT_ATOMIC_FMAX>;
+defm FLAT_ATOMIC_FCMPSWAP_X2   : FLAT_Real_Atomics_ci <0x5e, FLAT_ATOMIC_FCMPSWAP_X2>;
+defm FLAT_ATOMIC_FMIN_X2       : FLAT_Real_Atomics_ci <0x5f, FLAT_ATOMIC_FMIN_X2>;
+defm FLAT_ATOMIC_FMAX_X2       : FLAT_Real_Atomics_ci <0x60, FLAT_ATOMIC_FMAX_X2>;
+
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class FLAT_Real_vi <bits<7> op, FLAT_Pseudo ps> :
+  FLAT_Real <op, ps>,
+  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+  let AssemblerPredicate = isVI;
+  let DecoderNamespace="VI";
+}
+
+def FLAT_LOAD_UBYTE_vi         : FLAT_Real_vi <0x10, FLAT_LOAD_UBYTE>;
+def FLAT_LOAD_SBYTE_vi         : FLAT_Real_vi <0x11, FLAT_LOAD_SBYTE>;
+def FLAT_LOAD_USHORT_vi        : FLAT_Real_vi <0x12, FLAT_LOAD_USHORT>;
+def FLAT_LOAD_SSHORT_vi        : FLAT_Real_vi <0x13, FLAT_LOAD_SSHORT>;
+def FLAT_LOAD_DWORD_vi         : FLAT_Real_vi <0x14, FLAT_LOAD_DWORD>;
+def FLAT_LOAD_DWORDX2_vi       : FLAT_Real_vi <0x15, FLAT_LOAD_DWORDX2>;
+def FLAT_LOAD_DWORDX4_vi       : FLAT_Real_vi <0x17, FLAT_LOAD_DWORDX4>;
+def FLAT_LOAD_DWORDX3_vi       : FLAT_Real_vi <0x16, FLAT_LOAD_DWORDX3>;
+
+def FLAT_STORE_BYTE_vi         : FLAT_Real_vi <0x18, FLAT_STORE_BYTE>;
+def FLAT_STORE_SHORT_vi        : FLAT_Real_vi <0x1a, FLAT_STORE_SHORT>;
+def FLAT_STORE_DWORD_vi        : FLAT_Real_vi <0x1c, FLAT_STORE_DWORD>;
+def FLAT_STORE_DWORDX2_vi      : FLAT_Real_vi <0x1d, FLAT_STORE_DWORDX2>;
+def FLAT_STORE_DWORDX4_vi      : FLAT_Real_vi <0x1f, FLAT_STORE_DWORDX4>;
+def FLAT_STORE_DWORDX3_vi      : FLAT_Real_vi <0x1e, FLAT_STORE_DWORDX3>;
+
+multiclass FLAT_Real_Atomics_vi <bits<7> op, FLAT_Pseudo ps> {
+  def _vi     : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
+  def _RTN_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
+}
+
+defm FLAT_ATOMIC_SWAP       : FLAT_Real_Atomics_vi <0x40, FLAT_ATOMIC_SWAP>;
+defm FLAT_ATOMIC_CMPSWAP    : FLAT_Real_Atomics_vi <0x41, FLAT_ATOMIC_CMPSWAP>;
+defm FLAT_ATOMIC_ADD        : FLAT_Real_Atomics_vi <0x42, FLAT_ATOMIC_ADD>;
+defm FLAT_ATOMIC_SUB        : FLAT_Real_Atomics_vi <0x43, FLAT_ATOMIC_SUB>;
+defm FLAT_ATOMIC_SMIN       : FLAT_Real_Atomics_vi <0x44, FLAT_ATOMIC_SMIN>;
+defm FLAT_ATOMIC_UMIN       : FLAT_Real_Atomics_vi <0x45, FLAT_ATOMIC_UMIN>;
+defm FLAT_ATOMIC_SMAX       : FLAT_Real_Atomics_vi <0x46, FLAT_ATOMIC_SMAX>;
+defm FLAT_ATOMIC_UMAX       : FLAT_Real_Atomics_vi <0x47, FLAT_ATOMIC_UMAX>;
+defm FLAT_ATOMIC_AND        : FLAT_Real_Atomics_vi <0x48, FLAT_ATOMIC_AND>;
+defm FLAT_ATOMIC_OR         : FLAT_Real_Atomics_vi <0x49, FLAT_ATOMIC_OR>;
+defm FLAT_ATOMIC_XOR        : FLAT_Real_Atomics_vi <0x4a, FLAT_ATOMIC_XOR>;
+defm FLAT_ATOMIC_INC        : FLAT_Real_Atomics_vi <0x4b, FLAT_ATOMIC_INC>;
+defm FLAT_ATOMIC_DEC        : FLAT_Real_Atomics_vi <0x4c, FLAT_ATOMIC_DEC>;
+defm FLAT_ATOMIC_SWAP_X2    : FLAT_Real_Atomics_vi <0x60, FLAT_ATOMIC_SWAP_X2>;
+defm FLAT_ATOMIC_CMPSWAP_X2 : FLAT_Real_Atomics_vi <0x61, FLAT_ATOMIC_CMPSWAP_X2>;
+defm FLAT_ATOMIC_ADD_X2     : FLAT_Real_Atomics_vi <0x62, FLAT_ATOMIC_ADD_X2>;
+defm FLAT_ATOMIC_SUB_X2     : FLAT_Real_Atomics_vi <0x63, FLAT_ATOMIC_SUB_X2>;
+defm FLAT_ATOMIC_SMIN_X2    : FLAT_Real_Atomics_vi <0x64, FLAT_ATOMIC_SMIN_X2>;
+defm FLAT_ATOMIC_UMIN_X2    : FLAT_Real_Atomics_vi <0x65, FLAT_ATOMIC_UMIN_X2>;
+defm FLAT_ATOMIC_SMAX_X2    : FLAT_Real_Atomics_vi <0x66, FLAT_ATOMIC_SMAX_X2>;
+defm FLAT_ATOMIC_UMAX_X2    : FLAT_Real_Atomics_vi <0x67, FLAT_ATOMIC_UMAX_X2>;
+defm FLAT_ATOMIC_AND_X2     : FLAT_Real_Atomics_vi <0x68, FLAT_ATOMIC_AND_X2>;
+defm FLAT_ATOMIC_OR_X2      : FLAT_Real_Atomics_vi <0x69, FLAT_ATOMIC_OR_X2>;
+defm FLAT_ATOMIC_XOR_X2     : FLAT_Real_Atomics_vi <0x6a, FLAT_ATOMIC_XOR_X2>;
+defm FLAT_ATOMIC_INC_X2     : FLAT_Real_Atomics_vi <0x6b, FLAT_ATOMIC_INC_X2>;
+defm FLAT_ATOMIC_DEC_X2     : FLAT_Real_Atomics_vi <0x6c, FLAT_ATOMIC_DEC_X2>;
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 29b1f79187d5..dd3b46f13921 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -38,6 +38,33 @@ void GCNHazardRecognizer::EmitInstruction(MachineInstr *MI) {
   CurrCycleInstr = MI;
 }
 
+static bool isDivFMas(unsigned Opcode) {
+  return Opcode == AMDGPU::V_DIV_FMAS_F32 || Opcode == AMDGPU::V_DIV_FMAS_F64;
+}
+
+static bool isSGetReg(unsigned Opcode) {
+  return Opcode == AMDGPU::S_GETREG_B32;
+}
+
+static bool isSSetReg(unsigned Opcode) {
+  return Opcode == AMDGPU::S_SETREG_B32 || Opcode == AMDGPU::S_SETREG_IMM32_B32;
+}
+
+static bool isRWLane(unsigned Opcode) {
+  return Opcode == AMDGPU::V_READLANE_B32 || Opcode == AMDGPU::V_WRITELANE_B32;
+}
+
+static bool isRFE(unsigned Opcode) {
+  return Opcode == AMDGPU::S_RFE_B64;
+}
+
+static unsigned getHWReg(const SIInstrInfo *TII, const MachineInstr &RegInstr) {
+
+  const MachineOperand *RegOp = TII->getNamedOperand(RegInstr,
+                                                     AMDGPU::OpName::simm16);
+  return RegOp->getImm() & AMDGPU::Hwreg::ID_MASK_;
+}
+
 ScheduleHazardRecognizer::HazardType
 GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   MachineInstr *MI = SU->getInstr();
@@ -48,9 +75,27 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0)
     return NoopHazard;
 
+  if (SIInstrInfo::isVALU(*MI) && checkVALUHazards(MI) > 0)
+    return NoopHazard;
+
   if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0)
     return NoopHazard;
 
+  if (isDivFMas(MI->getOpcode()) && checkDivFMasHazards(MI) > 0)
+    return NoopHazard;
+
+  if (isRWLane(MI->getOpcode()) && checkRWLaneHazards(MI) > 0)
+    return NoopHazard;
+
+  if (isSGetReg(MI->getOpcode()) && checkGetRegHazards(MI) > 0)
+    return NoopHazard;
+
+  if (isSSetReg(MI->getOpcode()) && checkSetRegHazards(MI) > 0)
+    return NoopHazard;
+
+  if (isRFE(MI->getOpcode()) && checkRFEHazards(MI) > 0)
+    return NoopHazard;
+
   return NoHazard;
 }
 
@@ -62,11 +107,32 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) {
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(0, checkSMRDHazards(MI));
 
-  if (SIInstrInfo::isVMEM(*MI))
-    return std::max(0, checkVMEMHazards(MI));
+  if (SIInstrInfo::isVALU(*MI)) {
+    int WaitStates = std::max(0, checkVALUHazards(MI));
 
-  if (SIInstrInfo::isDPP(*MI))
-    return std::max(0, checkDPPHazards(MI));
+    if (SIInstrInfo::isVMEM(*MI))
+      WaitStates = std::max(WaitStates, checkVMEMHazards(MI));
+
+    if (SIInstrInfo::isDPP(*MI))
+      WaitStates = std::max(WaitStates, checkDPPHazards(MI));
+
+    if (isDivFMas(MI->getOpcode()))
+      WaitStates = std::max(WaitStates, checkDivFMasHazards(MI));
+
+    if (isRWLane(MI->getOpcode()))
+      WaitStates = std::max(WaitStates, checkRWLaneHazards(MI));
+
+    return WaitStates;
+  }
+
+  if (isSGetReg(MI->getOpcode()))
+    return std::max(0, checkGetRegHazards(MI));
+
+  if (isSSetReg(MI->getOpcode()))
+    return std::max(0, checkSetRegHazards(MI));
+
+  if (isRFE(MI->getOpcode()))
+    return std::max(0, checkRFEHazards(MI));
 
   return 0;
 }
@@ -112,21 +178,40 @@ void GCNHazardRecognizer::RecedeCycle() {
 // Helper Functions
 //===----------------------------------------------------------------------===//
 
-int GCNHazardRecognizer::getWaitStatesSinceDef(
-    unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
-  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+int GCNHazardRecognizer::getWaitStatesSince(
+    function_ref<bool(MachineInstr *)> IsHazard) {
 
   int WaitStates = -1;
   for (MachineInstr *MI : EmittedInstrs) {
     ++WaitStates;
-    if (!MI || !IsHazardDef(MI))
+    if (!MI || !IsHazard(MI))
       continue;
-    if (MI->modifiesRegister(Reg, TRI))
-      return WaitStates;
+    return WaitStates;
   }
   return std::numeric_limits<int>::max();
 }
 
+int GCNHazardRecognizer::getWaitStatesSinceDef(
+    unsigned Reg, function_ref<bool(MachineInstr *)> IsHazardDef) {
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+
+  auto IsHazardFn = [IsHazardDef, TRI, Reg] (MachineInstr *MI) {
+    return IsHazardDef(MI) && MI->modifiesRegister(Reg, TRI);
+  };
+
+  return getWaitStatesSince(IsHazardFn);
+}
+
+int GCNHazardRecognizer::getWaitStatesSinceSetReg(
+    function_ref<bool(MachineInstr *)> IsHazard) {
+
+  auto IsHazardFn = [IsHazard] (MachineInstr *MI) {
+    return isSSetReg(MI->getOpcode()) && IsHazard(MI);
+  };
+
+  return getWaitStatesSince(IsHazardFn);
+}
+
 //===----------------------------------------------------------------------===//
 // No-op Hazard Detection
 //===----------------------------------------------------------------------===//
@@ -262,3 +347,156 @@ int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) {
 
   return WaitStatesNeeded;
 }
+
+int GCNHazardRecognizer::checkDivFMasHazards(MachineInstr *DivFMas) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  // v_div_fmas requires 4 wait states after a write to vcc from a VALU
+  // instruction.
+  const int DivFMasWaitStates = 4;
+  auto IsHazardDefFn = [TII] (MachineInstr *MI) { return TII->isVALU(*MI); };
+  int WaitStatesNeeded = getWaitStatesSinceDef(AMDGPU::VCC, IsHazardDefFn);
+
+  return DivFMasWaitStates - WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkGetRegHazards(MachineInstr *GetRegInstr) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  unsigned GetRegHWReg = getHWReg(TII, *GetRegInstr);
+
+  const int GetRegWaitStates = 2;
+  auto IsHazardFn = [TII, GetRegHWReg] (MachineInstr *MI) {
+    return GetRegHWReg == getHWReg(TII, *MI);
+  };
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+
+  return GetRegWaitStates - WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkSetRegHazards(MachineInstr *SetRegInstr) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  unsigned HWReg = getHWReg(TII, *SetRegInstr);
+
+  const int SetRegWaitStates =
+      ST.getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS ? 1 : 2;
+  auto IsHazardFn = [TII, HWReg] (MachineInstr *MI) {
+    return HWReg == getHWReg(TII, *MI);
+  };
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  return SetRegWaitStates - WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::createsVALUHazard(const MachineInstr &MI) {
+  if (!MI.mayStore())
+    return -1;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  unsigned Opcode = MI.getOpcode();
+  const MCInstrDesc &Desc = MI.getDesc();
+
+  int VDataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
+  int VDataRCID = -1;
+  if (VDataIdx != -1)
+    VDataRCID = Desc.OpInfo[VDataIdx].RegClass;
+
+  if (TII->isMUBUF(MI) || TII->isMTBUF(MI)) {
+    // There is no hazard if the instruction does not use vector regs
+    // (like wbinvl1)
+    if (VDataIdx == -1)
+      return -1;
+    // For MUBUF/MTBUF instructions this hazard only exists if the
+    // instruction is not using a register in the soffset field.
+    const MachineOperand *SOffset =
+        TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
+    // If we have no soffset operand, then assume this field has been
+    // hardcoded to zero.
+    if (AMDGPU::getRegBitWidth(VDataRCID) > 64 &&
+        (!SOffset || !SOffset->isReg()))
+      return VDataIdx;
+  }
+
+  // MIMG instructions create a hazard if they don't use a 256-bit T# and
+  // the store size is greater than 8 bytes and they have more than two bits
+  // of their dmask set.
+  // All our MIMG definitions use a 256-bit T#, so we can skip checking for them.
+  if (TII->isMIMG(MI)) {
+    int SRsrcIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::srsrc);
+    assert(SRsrcIdx != -1 &&
+           AMDGPU::getRegBitWidth(Desc.OpInfo[SRsrcIdx].RegClass) == 256);
+    (void)SRsrcIdx;
+  }
+
+  if (TII->isFLAT(MI)) {
+    int DataIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdata);
+    if (AMDGPU::getRegBitWidth(Desc.OpInfo[DataIdx].RegClass) > 64)
+      return DataIdx;
+  }
+
+  return -1;
+}
+
+int GCNHazardRecognizer::checkVALUHazards(MachineInstr *VALU) {
+  // This checks for the hazard where VMEM instructions that store more than
+  // 8 bytes can have there store data over written by the next instruction.
+  if (!ST.has12DWordStoreHazard())
+    return 0;
+
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const MachineRegisterInfo &MRI = VALU->getParent()->getParent()->getRegInfo();
+
+  const int VALUWaitStates = 1;
+  int WaitStatesNeeded = 0;
+
+  for (const MachineOperand &Def : VALU->defs()) {
+    if (!TRI->isVGPR(MRI, Def.getReg()))
+      continue;
+    unsigned Reg = Def.getReg();
+    auto IsHazardFn = [this, Reg, TRI] (MachineInstr *MI) {
+      int DataIdx = createsVALUHazard(*MI);
+      return DataIdx >= 0 &&
+             TRI->regsOverlap(MI->getOperand(DataIdx).getReg(), Reg);
+    };
+    int WaitStatesNeededForDef =
+        VALUWaitStates - getWaitStatesSince(IsHazardFn);
+    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForDef);
+  }
+  return WaitStatesNeeded;
+}
+
+int GCNHazardRecognizer::checkRWLaneHazards(MachineInstr *RWLane) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const MachineRegisterInfo &MRI =
+      RWLane->getParent()->getParent()->getRegInfo();
+
+  const MachineOperand *LaneSelectOp =
+      TII->getNamedOperand(*RWLane, AMDGPU::OpName::src1);
+
+  if (!LaneSelectOp->isReg() || !TRI->isSGPRReg(MRI, LaneSelectOp->getReg()))
+    return 0;
+
+  unsigned LaneSelectReg = LaneSelectOp->getReg();
+  auto IsHazardFn = [TII] (MachineInstr *MI) {
+    return TII->isVALU(*MI);
+  };
+
+  const int RWLaneWaitStates = 4;
+  int WaitStatesSince = getWaitStatesSinceDef(LaneSelectReg, IsHazardFn);
+  return RWLaneWaitStates - WaitStatesSince;
+}
+
+int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
+
+  if (ST.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    return 0;
+
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  const int RFEWaitStates = 1;
+
+  auto IsHazardFn = [TII] (MachineInstr *MI) {
+    return getHWReg(TII, *MI) == AMDGPU::Hwreg::ID_TRAPSTS;
+  };
+  int WaitStatesNeeded = getWaitStatesSinceSetReg(IsHazardFn);
+  return RFEWaitStates - WaitStatesNeeded;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index d82041c5f174..0ab82ff4635b 100644
--- a/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -35,14 +35,23 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   const MachineFunction &MF;
   const SISubtarget &ST;
 
+  int getWaitStatesSince(function_ref<bool(MachineInstr *)> IsHazard);
   int getWaitStatesSinceDef(unsigned Reg,
                             function_ref<bool(MachineInstr *)> IsHazardDef =
                                 [](MachineInstr *) { return true; });
+  int getWaitStatesSinceSetReg(function_ref<bool(MachineInstr *)> IsHazard);
 
   int checkSMEMSoftClauseHazards(MachineInstr *SMEM);
   int checkSMRDHazards(MachineInstr *SMRD);
   int checkVMEMHazards(MachineInstr* VMEM);
   int checkDPPHazards(MachineInstr *DPP);
+  int checkDivFMasHazards(MachineInstr *DivFMas);
+  int checkGetRegHazards(MachineInstr *GetRegInstr);
+  int checkSetRegHazards(MachineInstr *SetRegInstr);
+  int createsVALUHazard(const MachineInstr &MI);
+  int checkVALUHazards(MachineInstr *VALU);
+  int checkRWLaneHazards(MachineInstr *RWLane);
+  int checkRFEHazards(MachineInstr *RFE);
 public:
   GCNHazardRecognizer(const MachineFunction &MF);
   // We can only issue one instruction per cycle.
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
new file mode 100644
index 000000000000..2f88033c807f
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -0,0 +1,312 @@
+//===-- GCNSchedStrategy.cpp - GCN Scheduler Strategy ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This contains a MachineSchedStrategy implementation for maximizing wave
+/// occupancy on GCN hardware.
+//===----------------------------------------------------------------------===//
+
+#include "GCNSchedStrategy.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
+#include "llvm/CodeGen/RegisterClassInfo.h"
+
+#define DEBUG_TYPE "misched"
+
+using namespace llvm;
+
+GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
+    const MachineSchedContext *C) :
+    GenericScheduler(C) { }
+
+static unsigned getMaxWaves(unsigned SGPRs, unsigned VGPRs,
+                            const MachineFunction &MF) {
+
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned MinRegOccupancy = std::min(ST.getOccupancyWithNumSGPRs(SGPRs),
+                                      ST.getOccupancyWithNumVGPRs(VGPRs));
+  return std::min(MinRegOccupancy,
+                  ST.getOccupancyWithLocalMemSize(MFI->getLDSSize()));
+}
+
+void GCNMaxOccupancySchedStrategy::initCandidate(SchedCandidate &Cand, SUnit *SU,
+                                     bool AtTop, const RegPressureTracker &RPTracker,
+                                     const SIRegisterInfo *SRI,
+                                     int SGPRPressure,
+                                     int VGPRPressure,
+                                     int SGPRExcessLimit,
+                                     int VGPRExcessLimit,
+                                     int SGPRCriticalLimit,
+                                     int VGPRCriticalLimit) {
+
+  Cand.SU = SU;
+  Cand.AtTop = AtTop;
+
+  // getDownwardPressure() and getUpwardPressure() make temporary changes to
+  // the the tracker, so we need to pass those function a non-const copy.
+  RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
+
+  std::vector<unsigned> Pressure;
+  std::vector<unsigned> MaxPressure;
+
+  if (AtTop)
+    TempTracker.getDownwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  else {
+    // FIXME: I think for bottom up scheduling, the register pressure is cached
+    // and can be retrieved by DAG->getPressureDif(SU).
+    TempTracker.getUpwardPressure(SU->getInstr(), Pressure, MaxPressure);
+  }
+
+  int NewSGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+  int NewVGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+
+  // If two instructions increase the pressure of different register sets
+  // by the same amount, the generic scheduler will prefer to schedule the
+  // instruction that increases the set with the least amount of registers,
+  // which in our case would be SGPRs.  This is rarely what we want, so
+  // when we report excess/critical register pressure, we do it either
+  // only for VGPRs or only for SGPRs.
+
+  // FIXME: Better heuristics to determine whether to prefer SGPRs or VGPRs.
+  const int MaxVGPRPressureInc = 16;
+  bool ShouldTrackVGPRs = VGPRPressure + MaxVGPRPressureInc >= VGPRExcessLimit;
+  bool ShouldTrackSGPRs = !ShouldTrackVGPRs && SGPRPressure >= SGPRExcessLimit;
+
+
+  // FIXME: We have to enter REG-EXCESS before we reach the actual threshold
+  // to increase the likelihood we don't go over the limits.  We should improve
+  // the analysis to look through dependencies to find the path with the least
+  // register pressure.
+  // FIXME: This is also necessary, because some passes that run after
+  // scheduling and before regalloc increase register pressure.
+  const int ErrorMargin = 3;
+  VGPRExcessLimit -= ErrorMargin;
+  SGPRExcessLimit -= ErrorMargin;
+
+  // We only need to update the RPDelata for instructions that increase
+  // register pressure.  Instructions that decrease or keep reg pressure
+  // the same will be marked as RegExcess in tryCandidate() when they
+  // are compared with instructions that increase the register pressure.
+  if (ShouldTrackVGPRs && NewVGPRPressure >= VGPRExcessLimit) {
+    Cand.RPDelta.Excess = PressureChange(SRI->getVGPRPressureSet());
+    Cand.RPDelta.Excess.setUnitInc(NewVGPRPressure - VGPRExcessLimit);
+  }
+
+  if (ShouldTrackSGPRs && NewSGPRPressure >= SGPRExcessLimit) {
+    Cand.RPDelta.Excess = PressureChange(SRI->getSGPRPressureSet());
+    Cand.RPDelta.Excess.setUnitInc(NewSGPRPressure = SGPRExcessLimit);
+  }
+
+  // Register pressure is considered 'CRITICAL' if it is approaching a value
+  // that would reduce the wave occupancy for the execution unit.  When
+  // register pressure is 'CRITICAL', increading SGPR and VGPR pressure both
+  // has the same cost, so we don't need to prefer one over the other.
+
+  VGPRCriticalLimit -= ErrorMargin;
+  SGPRCriticalLimit -= ErrorMargin;
+
+  int SGPRDelta = NewSGPRPressure - SGPRCriticalLimit;
+  int VGPRDelta = NewVGPRPressure - VGPRCriticalLimit;
+
+  if (SGPRDelta >= 0 || VGPRDelta >= 0) {
+    if (SGPRDelta > VGPRDelta) {
+      Cand.RPDelta.CriticalMax = PressureChange(SRI->getSGPRPressureSet());
+      Cand.RPDelta.CriticalMax.setUnitInc(SGPRDelta);
+    } else {
+      Cand.RPDelta.CriticalMax = PressureChange(SRI->getVGPRPressureSet());
+      Cand.RPDelta.CriticalMax.setUnitInc(VGPRDelta);
+    }
+  }
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNodeFromQueue()
+void GCNMaxOccupancySchedStrategy::pickNodeFromQueue(SchedBoundary &Zone,
+                                         const CandPolicy &ZonePolicy,
+                                         const RegPressureTracker &RPTracker,
+                                         SchedCandidate &Cand) {
+  const SISubtarget &ST = DAG->MF.getSubtarget<SISubtarget>();
+  const SIRegisterInfo *SRI = static_cast<const SIRegisterInfo*>(TRI);
+  ArrayRef<unsigned> Pressure = RPTracker.getRegSetPressureAtPos();
+  unsigned SGPRPressure = Pressure[SRI->getSGPRPressureSet()];
+  unsigned VGPRPressure = Pressure[SRI->getVGPRPressureSet()];
+  unsigned SGPRExcessLimit =
+      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::SGPR_32RegClass);
+  unsigned VGPRExcessLimit =
+      Context->RegClassInfo->getNumAllocatableRegs(&AMDGPU::VGPR_32RegClass);
+  unsigned MaxWaves = getMaxWaves(SGPRPressure, VGPRPressure, DAG->MF);
+  unsigned SGPRCriticalLimit = SRI->getMaxNumSGPRs(ST, MaxWaves, true);
+  unsigned VGPRCriticalLimit = SRI->getMaxNumVGPRs(MaxWaves);
+
+  ReadyQueue &Q = Zone.Available;
+  for (SUnit *SU : Q) {
+
+    SchedCandidate TryCand(ZonePolicy);
+    initCandidate(TryCand, SU, Zone.isTop(), RPTracker, SRI,
+                  SGPRPressure, VGPRPressure,
+                  SGPRExcessLimit, VGPRExcessLimit,
+                  SGPRCriticalLimit, VGPRCriticalLimit);
+    // Pass SchedBoundary only when comparing nodes from the same boundary.
+    SchedBoundary *ZoneArg = Cand.AtTop == TryCand.AtTop ? &Zone : nullptr;
+    GenericScheduler::tryCandidate(Cand, TryCand, ZoneArg);
+    if (TryCand.Reason != NoCand) {
+      // Initialize resource delta if needed in case future heuristics query it.
+      if (TryCand.ResDelta == SchedResourceDelta())
+        TryCand.initResourceDelta(Zone.DAG, SchedModel);
+      Cand.setBest(TryCand);
+    }
+  }
+}
+
+static int getBidirectionalReasonRank(GenericSchedulerBase::CandReason Reason) {
+  switch (Reason) {
+  default:
+    return Reason;
+  case GenericSchedulerBase::RegCritical:
+  case GenericSchedulerBase::RegExcess:
+    return -Reason;
+ }
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNodeBidirectional()
+SUnit *GCNMaxOccupancySchedStrategy::pickNodeBidirectional(bool &IsTopNode) {
+  // Schedule as far as possible in the direction of no choice. This is most
+  // efficient, but also provides the best heuristics for CriticalPSets.
+  if (SUnit *SU = Bot.pickOnlyChoice()) {
+    IsTopNode = false;
+    return SU;
+  }
+  if (SUnit *SU = Top.pickOnlyChoice()) {
+    IsTopNode = true;
+    return SU;
+  }
+  // Set the bottom-up policy based on the state of the current bottom zone and
+  // the instructions outside the zone, including the top zone.
+  CandPolicy BotPolicy;
+  setPolicy(BotPolicy, /*IsPostRA=*/false, Bot, &Top);
+  // Set the top-down policy based on the state of the current top zone and
+  // the instructions outside the zone, including the bottom zone.
+  CandPolicy TopPolicy;
+  setPolicy(TopPolicy, /*IsPostRA=*/false, Top, &Bot);
+
+  // See if BotCand is still valid (because we previously scheduled from Top).
+  DEBUG(dbgs() << "Picking from Bot:\n");
+  if (!BotCand.isValid() || BotCand.SU->isScheduled ||
+      BotCand.Policy != BotPolicy) {
+    BotCand.reset(CandPolicy());
+    pickNodeFromQueue(Bot, BotPolicy, DAG->getBotRPTracker(), BotCand);
+    assert(BotCand.Reason != NoCand && "failed to find the first candidate");
+  } else {
+    DEBUG(traceCandidate(BotCand));
+  }
+
+  // Check if the top Q has a better candidate.
+  DEBUG(dbgs() << "Picking from Top:\n");
+  if (!TopCand.isValid() || TopCand.SU->isScheduled ||
+      TopCand.Policy != TopPolicy) {
+    TopCand.reset(CandPolicy());
+    pickNodeFromQueue(Top, TopPolicy, DAG->getTopRPTracker(), TopCand);
+    assert(TopCand.Reason != NoCand && "failed to find the first candidate");
+  } else {
+    DEBUG(traceCandidate(TopCand));
+  }
+
+  // Pick best from BotCand and TopCand.
+  DEBUG(
+    dbgs() << "Top Cand: ";
+    traceCandidate(BotCand);
+    dbgs() << "Bot Cand: ";
+    traceCandidate(TopCand);
+  );
+  SchedCandidate Cand;
+  if (TopCand.Reason == BotCand.Reason) {
+    Cand = BotCand;
+    GenericSchedulerBase::CandReason TopReason = TopCand.Reason;
+    TopCand.Reason = NoCand;
+    GenericScheduler::tryCandidate(Cand, TopCand, nullptr);
+    if (TopCand.Reason != NoCand) {
+      Cand.setBest(TopCand);
+    } else {
+      TopCand.Reason = TopReason;
+    }
+  } else {
+    if (TopCand.Reason == RegExcess && TopCand.RPDelta.Excess.getUnitInc() <= 0) {
+      Cand = TopCand;
+    } else if (BotCand.Reason == RegExcess && BotCand.RPDelta.Excess.getUnitInc() <= 0) {
+      Cand = BotCand;
+    } else if (TopCand.Reason == RegCritical && TopCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
+      Cand = TopCand;
+    } else if (BotCand.Reason == RegCritical && BotCand.RPDelta.CriticalMax.getUnitInc() <= 0) {
+      Cand = BotCand;
+    } else {
+      int TopRank = getBidirectionalReasonRank(TopCand.Reason);
+      int BotRank = getBidirectionalReasonRank(BotCand.Reason);
+      if (TopRank > BotRank) {
+        Cand = TopCand;
+      } else {
+        Cand = BotCand;
+      }
+    }
+  }
+  DEBUG(
+    dbgs() << "Picking: ";
+    traceCandidate(Cand);
+  );
+
+  IsTopNode = Cand.AtTop;
+  return Cand.SU;
+}
+
+// This function is mostly cut and pasted from
+// GenericScheduler::pickNode()
+SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) {
+  if (DAG->top() == DAG->bottom()) {
+    assert(Top.Available.empty() && Top.Pending.empty() &&
+           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
+    return nullptr;
+  }
+  SUnit *SU;
+  do {
+    if (RegionPolicy.OnlyTopDown) {
+      SU = Top.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        TopCand.reset(NoPolicy);
+        pickNodeFromQueue(Top, NoPolicy, DAG->getTopRPTracker(), TopCand);
+        assert(TopCand.Reason != NoCand && "failed to find a candidate");
+        SU = TopCand.SU;
+      }
+      IsTopNode = true;
+    } else if (RegionPolicy.OnlyBottomUp) {
+      SU = Bot.pickOnlyChoice();
+      if (!SU) {
+        CandPolicy NoPolicy;
+        BotCand.reset(NoPolicy);
+        pickNodeFromQueue(Bot, NoPolicy, DAG->getBotRPTracker(), BotCand);
+        assert(BotCand.Reason != NoCand && "failed to find a candidate");
+        SU = BotCand.SU;
+      }
+      IsTopNode = false;
+    } else {
+      SU = pickNodeBidirectional(IsTopNode);
+    }
+  } while (SU->isScheduled);
+
+  if (SU->isTopReady())
+    Top.removeReady(SU);
+  if (SU->isBottomReady())
+    Bot.removeReady(SU);
+
+  DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr());
+  return SU;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
new file mode 100644
index 000000000000..4cfc0cea81fb
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -0,0 +1,54 @@
+//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
+#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+
+namespace llvm {
+
+class SIRegisterInfo;
+
+/// This is a minimal scheduler strategy.  The main difference between this
+/// and the GenericScheduler is that GCNSchedStrategy uses different
+/// heuristics to determine excess/critical pressure sets.  Its goal is to
+/// maximize kernel occupancy (i.e. maximum number of waves per simd).
+class GCNMaxOccupancySchedStrategy : public GenericScheduler {
+
+  SUnit *pickNodeBidirectional(bool &IsTopNode);
+
+  void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
+                         const RegPressureTracker &RPTracker,
+                         SchedCandidate &Cand);
+
+  void initCandidate(SchedCandidate &Cand, SUnit *SU,
+                     bool AtTop, const RegPressureTracker &RPTracker,
+                     const SIRegisterInfo *SRI,
+                     int SGPRPressure, int VGPRPressure,
+                     int SGPRExcessLimit, int VGPRExcessLimit,
+                     int SGPRCriticalLimit, int VGPRCriticalLimit);
+
+  void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone, const SIRegisterInfo *SRI,
+                    unsigned SGPRPressure, unsigned VGPRPressure);
+
+public:
+  GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
+
+  SUnit *pickNode(bool &IsTopNode) override;
+};
+
+} // End namespace llvm
+
+#endif // GCNSCHEDSTRATEGY_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
index 2932d3bb1580..7172a0aa7167 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.cpp
@@ -9,46 +9,52 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPUInstPrinter.h"
-#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIDefines.h"
+#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "Utils/AMDGPUAsmUtils.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-
-#include <string>
+#include <cassert>
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 void AMDGPUInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
                                   StringRef Annot, const MCSubtargetInfo &STI) {
   OS.flush();
-  printInstruction(MI, OS);
-
+  printInstruction(MI, STI, OS);
   printAnnotation(OS, Annot);
 }
 
 void AMDGPUInstPrinter::printU4ImmOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
+                                          const MCSubtargetInfo &STI,
+                                          raw_ostream &O) {
   O << formatHex(MI->getOperand(OpNo).getImm() & 0xf);
 }
 
 void AMDGPUInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
+                                          raw_ostream &O) {
   O << formatHex(MI->getOperand(OpNo).getImm() & 0xff);
 }
 
 void AMDGPUInstPrinter::printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
-  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffff);
-}
-
-void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
-                                           raw_ostream &O) {
-  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
+  // It's possible to end up with a 32-bit literal used with a 16-bit operand
+  // with ignored high bits. Print as 32-bit anyway in that case.
+  int64_t Imm = MI->getOperand(OpNo).getImm();
+  if (isInt<16>(Imm) || isUInt<16>(Imm))
+    O << formatHex(static_cast<uint64_t>(Imm & 0xffff));
+  else
+    printU32ImmOperand(MI, OpNo, STI, O);
 }
 
 void AMDGPUInstPrinter::printU4ImmDecOperand(const MCInst *MI, unsigned OpNo,
@@ -66,8 +72,14 @@ void AMDGPUInstPrinter::printU16ImmDecOperand(const MCInst *MI, unsigned OpNo,
   O << formatDec(MI->getOperand(OpNo).getImm() & 0xffff);
 }
 
-void AMDGPUInstPrinter::printNamedBit(const MCInst* MI, unsigned OpNo,
-                                      raw_ostream& O, StringRef BitName) {
+void AMDGPUInstPrinter::printU32ImmOperand(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  O << formatHex(MI->getOperand(OpNo).getImm() & 0xffffffff);
+}
+
+void AMDGPUInstPrinter::printNamedBit(const MCInst *MI, unsigned OpNo,
+                                      raw_ostream &O, StringRef BitName) {
   if (MI->getOperand(OpNo).getImm()) {
     O << ' ' << BitName;
   }
@@ -97,7 +109,8 @@ void AMDGPUInstPrinter::printMBUFOffset(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
-                                      raw_ostream &O) {
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
   uint16_t Imm = MI->getOperand(OpNo).getImm();
   if (Imm != 0) {
     O << " offset:";
@@ -106,7 +119,8 @@ void AMDGPUInstPrinter::printOffset(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
   if (MI->getOperand(OpNo).getImm()) {
     O << " offset0:";
     printU8ImmDecOperand(MI, OpNo, O);
@@ -114,74 +128,97 @@ void AMDGPUInstPrinter::printOffset0(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printOffset1(const MCInst *MI, unsigned OpNo,
-                                        raw_ostream &O) {
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
   if (MI->getOperand(OpNo).getImm()) {
     O << " offset1:";
     printU8ImmDecOperand(MI, OpNo, O);
   }
 }
 
-void AMDGPUInstPrinter::printSMRDOffset(const MCInst *MI, unsigned OpNo,
+void AMDGPUInstPrinter::printSMRDOffset8(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  printU32ImmOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  printU32ImmOperand(MI, OpNo, O);
+  printU32ImmOperand(MI, OpNo, STI, O);
 }
 
 void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
+                                               const MCSubtargetInfo &STI,
                                                raw_ostream &O) {
-  printU32ImmOperand(MI, OpNo, O);
+  printU32ImmOperand(MI, OpNo, STI, O);
 }
 
 void AMDGPUInstPrinter::printGDS(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "gds");
 }
 
 void AMDGPUInstPrinter::printGLC(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "glc");
 }
 
 void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "slc");
 }
 
 void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "tfe");
 }
 
 void AMDGPUInstPrinter::printDMask(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   if (MI->getOperand(OpNo).getImm()) {
     O << " dmask:";
-    printU16ImmOperand(MI, OpNo, O);
+    printU16ImmOperand(MI, OpNo, STI, O);
   }
 }
 
 void AMDGPUInstPrinter::printUNorm(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "unorm");
 }
 
 void AMDGPUInstPrinter::printDA(const MCInst *MI, unsigned OpNo,
-                                raw_ostream &O) {
+                                const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "da");
 }
 
 void AMDGPUInstPrinter::printR128(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "r128");
 }
 
 void AMDGPUInstPrinter::printLWE(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printNamedBit(MI, OpNo, O, "lwe");
 }
 
-void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
+void AMDGPUInstPrinter::printExpCompr(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " compr";
+}
+
+void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI,
+                                   raw_ostream &O) {
+  if (MI->getOperand(OpNo).getImm())
+    O << " vm";
+}
+
+void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
                                         const MCRegisterInfo &MRI) {
-  switch (reg) {
+  switch (RegNo) {
   case AMDGPU::VCC:
     O << "vcc";
     return;
@@ -233,52 +270,54 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
 
   // The low 8 bits of the encoding value is the register index, for both VGPRs
   // and SGPRs.
-  unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1);
+  unsigned RegIdx = MRI.getEncodingValue(RegNo) & ((1 << 8) - 1);
 
   unsigned NumRegs;
-  if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(reg)) {
+  if (MRI.getRegClass(AMDGPU::VGPR_32RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 1;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(reg)) {
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_32RegClassID).contains(RegNo)) {
     O << 's';
     NumRegs = 1;
-  } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::VReg_64RegClassID).contains(RegNo)) {
     O <<'v';
     NumRegs = 2;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(reg)) {
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_64RegClassID).contains(RegNo)) {
     O << 's';
     NumRegs = 2;
-  } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::VReg_128RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 4;
-  } else  if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(reg)) {
+  } else  if (MRI.getRegClass(AMDGPU::SGPR_128RegClassID).contains(RegNo)) {
     O << 's';
     NumRegs = 4;
-  } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::VReg_96RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 3;
-  } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::VReg_256RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 8;
-  } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::SReg_256RegClassID).contains(RegNo)) {
     O << 's';
     NumRegs = 8;
-  } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::VReg_512RegClassID).contains(RegNo)) {
     O << 'v';
     NumRegs = 16;
-  } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::SReg_512RegClassID).contains(RegNo)) {
     O << 's';
     NumRegs = 16;
-  } else if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(reg)) {
+  } else if (MRI.getRegClass(AMDGPU::TTMP_64RegClassID).contains(RegNo)) {
     O << "ttmp";
     NumRegs = 2;
-    RegIdx -= 112; // Trap temps start at offset 112. TODO: Get this from tablegen.
-  } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(reg)) {
+    // Trap temps start at offset 112. TODO: Get this from tablegen.
+    RegIdx -= 112;
+  } else if (MRI.getRegClass(AMDGPU::TTMP_128RegClassID).contains(RegNo)) {
     O << "ttmp";
     NumRegs = 4;
-    RegIdx -= 112; // Trap temps start at offset 112. TODO: Get this from tablegen.
+    // Trap temps start at offset 112. TODO: Get this from tablegen.
+    RegIdx -= 112;
   } else {
-    O << getRegisterName(reg);
+    O << getRegisterName(RegNo);
     return;
   }
 
@@ -291,7 +330,7 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O,
 }
 
 void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
+                                    const MCSubtargetInfo &STI, raw_ostream &O) {
   if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::VOP3)
     O << "_e64 ";
   else if (MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::DPP)
@@ -301,10 +340,44 @@ void AMDGPUInstPrinter::printVOPDst(const MCInst *MI, unsigned OpNo,
   else
     O << "_e32 ";
 
-  printOperand(MI, OpNo, O);
+  printOperand(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printImmediate16(uint32_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
+  int16_t SImm = static_cast<int16_t>(Imm);
+  if (SImm >= -16 && SImm <= 64) {
+    O << SImm;
+    return;
+  }
+
+  if (Imm == 0x3C00)
+    O<< "1.0";
+  else if (Imm == 0xBC00)
+    O<< "-1.0";
+  else if (Imm == 0x3800)
+    O<< "0.5";
+  else if (Imm == 0xB800)
+    O<< "-0.5";
+  else if (Imm == 0x4000)
+    O<< "2.0";
+  else if (Imm == 0xC000)
+    O<< "-2.0";
+  else if (Imm == 0x4400)
+    O<< "4.0";
+  else if (Imm == 0xC400)
+    O<< "-4.0";
+  else if (Imm == 0x3118) {
+    assert(STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]);
+    O << "0.15915494";
+  } else
+    O << formatHex(static_cast<uint64_t>(Imm));
 }
 
-void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate32(uint32_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
   int32_t SImm = static_cast<int32_t>(Imm);
   if (SImm >= -16 && SImm <= 64) {
     O << SImm;
@@ -329,11 +402,16 @@ void AMDGPUInstPrinter::printImmediate32(uint32_t Imm, raw_ostream &O) {
     O << "4.0";
   else if (Imm == FloatToBits(-4.0f))
     O << "-4.0";
+  else if (Imm == 0x3e22f983 &&
+           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    O << "0.15915494";
   else
     O << formatHex(static_cast<uint64_t>(Imm));
 }
 
-void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
+void AMDGPUInstPrinter::printImmediate64(uint64_t Imm,
+                                         const MCSubtargetInfo &STI,
+                                         raw_ostream &O) {
   int64_t SImm = static_cast<int64_t>(Imm);
   if (SImm >= -16 && SImm <= 64) {
     O << SImm;
@@ -358,8 +436,11 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
     O << "4.0";
   else if (Imm == DoubleToBits(-4.0))
     O << "-4.0";
+  else if (Imm == 0x3fc45f306dc9c882 &&
+           STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+  O << "0.15915494";
   else {
-    assert(isUInt<32>(Imm));
+    assert(isUInt<32>(Imm) || Imm == 0x3fc45f306dc9c882);
 
     // In rare situations, we will have a 32-bit literal in a 64-bit
     // operand. This is technically allowed for the encoding of s_mov_b64.
@@ -368,7 +449,12 @@ void AMDGPUInstPrinter::printImmediate64(uint64_t Imm, raw_ostream &O) {
 }
 
 void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
+  if (OpNo >= MI->getNumOperands()) {
+    O << "/*Missing OP" << OpNo << "*/";
+    return;
+  }
 
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
@@ -383,22 +469,39 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     }
   } else if (Op.isImm()) {
     const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-    int RCID = Desc.OpInfo[OpNo].RegClass;
-    if (RCID != -1) {
-      const MCRegisterClass &ImmRC = MRI.getRegClass(RCID);
-      if (ImmRC.getSize() == 4)
-        printImmediate32(Op.getImm(), O);
-      else if (ImmRC.getSize() == 8)
-        printImmediate64(Op.getImm(), O);
-      else
-        llvm_unreachable("Invalid register class size");
-    } else if (Desc.OpInfo[OpNo].OperandType == MCOI::OPERAND_IMMEDIATE) {
-      printImmediate32(Op.getImm(), O);
-    } else {
+    switch (Desc.OpInfo[OpNo].OperandType) {
+    case AMDGPU::OPERAND_REG_IMM_INT32:
+    case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+    case MCOI::OPERAND_IMMEDIATE:
+      printImmediate32(Op.getImm(), STI, O);
+      break;
+    case AMDGPU::OPERAND_REG_IMM_INT64:
+    case AMDGPU::OPERAND_REG_IMM_FP64:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+      printImmediate64(Op.getImm(), STI, O);
+      break;
+    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    case AMDGPU::OPERAND_REG_IMM_INT16:
+    case AMDGPU::OPERAND_REG_IMM_FP16:
+      printImmediate16(Op.getImm(), STI, O);
+      break;
+    case MCOI::OPERAND_UNKNOWN:
+    case MCOI::OPERAND_PCREL:
+      O << formatDec(Op.getImm());
+      break;
+    case MCOI::OPERAND_REGISTER:
+      // FIXME: This should be removed and handled somewhere else. Seems to come
+      // from a disassembler bug.
+      O << "/*invalid immediate*/";
+      break;
+    default:
       // We hit this for the immediate instruction bits that don't yet have a
       // custom printer.
-      // TODO: Eventually this should be unnecessary.
-      O << formatDec(Op.getImm());
+      llvm_unreachable("unexpected immediate operand type");
     }
   } else if (Op.isFPImm()) {
     // We special case 0.0 because otherwise it will be printed as an integer.
@@ -406,12 +509,12 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << "0.0";
     else {
       const MCInstrDesc &Desc = MII.get(MI->getOpcode());
-      const MCRegisterClass &ImmRC = MRI.getRegClass(Desc.OpInfo[OpNo].RegClass);
-
-      if (ImmRC.getSize() == 4)
-        printImmediate32(FloatToBits(Op.getFPImm()), O);
-      else if (ImmRC.getSize() == 8)
-        printImmediate64(DoubleToBits(Op.getFPImm()), O);
+      int RCID = Desc.OpInfo[OpNo].RegClass;
+      unsigned RCBits = AMDGPU::getRegBitWidth(MRI.getRegClass(RCID));
+      if (RCBits == 32)
+        printImmediate32(FloatToBits(Op.getFPImm()), STI, O);
+      else if (RCBits == 64)
+        printImmediate64(DoubleToBits(Op.getFPImm()), STI, O);
       else
         llvm_unreachable("Invalid register class size");
     }
@@ -424,32 +527,34 @@ void AMDGPUInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printOperandAndFPInputMods(const MCInst *MI,
-                                                      unsigned OpNo,
-                                                      raw_ostream &O) {
+                                                   unsigned OpNo,
+                                                   const MCSubtargetInfo &STI,
+                                                   raw_ostream &O) {
   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
   if (InputModifiers & SISrcMods::NEG)
     O << '-';
   if (InputModifiers & SISrcMods::ABS)
     O << '|';
-  printOperand(MI, OpNo + 1, O);
+  printOperand(MI, OpNo + 1, STI, O);
   if (InputModifiers & SISrcMods::ABS)
     O << '|';
 }
 
 void AMDGPUInstPrinter::printOperandAndIntInputMods(const MCInst *MI,
-                                                     unsigned OpNo,
-                                                     raw_ostream &O) {
+                                                    unsigned OpNo,
+                                                    const MCSubtargetInfo &STI,
+                                                    raw_ostream &O) {
   unsigned InputModifiers = MI->getOperand(OpNo).getImm();
   if (InputModifiers & SISrcMods::SEXT)
     O << "sext(";
-  printOperand(MI, OpNo + 1, O);
+  printOperand(MI, OpNo + 1, STI, O);
   if (InputModifiers & SISrcMods::SEXT)
     O << ')';
 }
 
-
 void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNo).getImm();
   if (Imm <= 0x0ff) {
     O << " quad_perm:[";
@@ -488,19 +593,22 @@ void AMDGPUInstPrinter::printDPPCtrl(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printRowMask(const MCInst *MI, unsigned OpNo,
-                                            raw_ostream &O) {
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
   O << " row_mask:";
-  printU4ImmOperand(MI, OpNo, O);
+  printU4ImmOperand(MI, OpNo, STI, O);
 }
 
 void AMDGPUInstPrinter::printBankMask(const MCInst *MI, unsigned OpNo,
-                                             raw_ostream &O) {
+                                      const MCSubtargetInfo &STI,
+                                      raw_ostream &O) {
   O << " bank_mask:";
-  printU4ImmOperand(MI, OpNo, O);
+  printU4ImmOperand(MI, OpNo, STI, O);
 }
 
 void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
-                                              raw_ostream &O) {
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNo).getImm();
   if (Imm) {
     O << " bound_ctrl:0"; // XXX - this syntax is used in sp3
@@ -509,69 +617,180 @@ void AMDGPUInstPrinter::printBoundCtrl(const MCInst *MI, unsigned OpNo,
 
 void AMDGPUInstPrinter::printSDWASel(const MCInst *MI, unsigned OpNo,
                                      raw_ostream &O) {
+  using namespace llvm::AMDGPU::SDWA;
+
   unsigned Imm = MI->getOperand(OpNo).getImm();
   switch (Imm) {
-  case 0: O << "BYTE_0"; break;
-  case 1: O << "BYTE_1"; break;
-  case 2: O << "BYTE_2"; break;
-  case 3: O << "BYTE_3"; break;
-  case 4: O << "WORD_0"; break;
-  case 5: O << "WORD_1"; break;
-  case 6: O << "DWORD"; break;
+  case SdwaSel::BYTE_0: O << "BYTE_0"; break;
+  case SdwaSel::BYTE_1: O << "BYTE_1"; break;
+  case SdwaSel::BYTE_2: O << "BYTE_2"; break;
+  case SdwaSel::BYTE_3: O << "BYTE_3"; break;
+  case SdwaSel::WORD_0: O << "WORD_0"; break;
+  case SdwaSel::WORD_1: O << "WORD_1"; break;
+  case SdwaSel::DWORD: O << "DWORD"; break;
   default: llvm_unreachable("Invalid SDWA data select operand");
   }
 }
 
 void AMDGPUInstPrinter::printSDWADstSel(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   O << "dst_sel:";
   printSDWASel(MI, OpNo, O);
 }
 
 void AMDGPUInstPrinter::printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   O << "src0_sel:";
   printSDWASel(MI, OpNo, O);
 }
 
 void AMDGPUInstPrinter::printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   O << "src1_sel:";
   printSDWASel(MI, OpNo, O);
 }
 
 void AMDGPUInstPrinter::printSDWADstUnused(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
                                            raw_ostream &O) {
+  using namespace llvm::AMDGPU::SDWA;
+
   O << "dst_unused:";
   unsigned Imm = MI->getOperand(OpNo).getImm();
   switch (Imm) {
-  case 0: O << "UNUSED_PAD"; break;
-  case 1: O << "UNUSED_SEXT"; break;
-  case 2: O << "UNUSED_PRESERVE"; break;
+  case DstUnused::UNUSED_PAD: O << "UNUSED_PAD"; break;
+  case DstUnused::UNUSED_SEXT: O << "UNUSED_SEXT"; break;
+  case DstUnused::UNUSED_PRESERVE: O << "UNUSED_PRESERVE"; break;
   default: llvm_unreachable("Invalid SDWA dest_unused operand");
   }
 }
 
+template <unsigned N>
+void AMDGPUInstPrinter::printExpSrcN(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  int EnIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(), AMDGPU::OpName::en);
+  unsigned En = MI->getOperand(EnIdx).getImm();
+
+  // FIXME: What do we do with compr? The meaning of en changes depending on if
+  // compr is set.
+
+  if (En & (1 << N))
+    printRegOperand(MI->getOperand(OpNo).getReg(), O, MRI);
+  else
+    O << "off";
+}
+
+void AMDGPUInstPrinter::printExpSrc0(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<0>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc1(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<1>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc2(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<2>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpSrc3(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
+                                     raw_ostream &O) {
+  printExpSrcN<3>(MI, OpNo, STI, O);
+}
+
+void AMDGPUInstPrinter::printExpTgt(const MCInst *MI, unsigned OpNo,
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
+  // This is really a 6 bit field.
+  uint32_t Tgt = MI->getOperand(OpNo).getImm() & ((1 << 6) - 1);
+
+  if (Tgt <= 7)
+    O << " mrt" << Tgt;
+  else if (Tgt == 8)
+    O << " mrtz";
+  else if (Tgt == 9)
+    O << " null";
+  else if (Tgt >= 12 && Tgt <= 15)
+    O << " pos" << Tgt - 12;
+  else if (Tgt >= 32 && Tgt <= 63)
+    O << " param" << Tgt - 32;
+  else {
+    // Reserved values 10, 11
+    O << " invalid_target_" << Tgt;
+  }
+}
+
 void AMDGPUInstPrinter::printInterpSlot(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   unsigned Imm = MI->getOperand(OpNum).getImm();
+  switch (Imm) {
+  case 0:
+    O << "p10";
+    break;
+  case 1:
+    O << "p20";
+    break;
+  case 2:
+    O << "p0";
+    break;
+  default:
+    O << "invalid_param_" << Imm;
+  }
+}
 
-  if (Imm == 2) {
-    O << "P0";
-  } else if (Imm == 1) {
-    O << "P20";
-  } else if (Imm == 0) {
-    O << "P10";
-  } else {
-    llvm_unreachable("Invalid interpolation parameter slot");
+void AMDGPUInstPrinter::printInterpAttr(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Attr = MI->getOperand(OpNum).getImm();
+  O << "attr" << Attr;
+}
+
+void AMDGPUInstPrinter::printInterpAttrChan(const MCInst *MI, unsigned OpNum,
+                                        const MCSubtargetInfo &STI,
+                                        raw_ostream &O) {
+  unsigned Chan = MI->getOperand(OpNum).getImm();
+  O << '.' << "xyzw"[Chan & 0x3];
+}
+
+void AMDGPUInstPrinter::printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
+                                           const MCSubtargetInfo &STI,
+                                           raw_ostream &O) {
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  if (Val == 0) {
+    O << " 0";
+    return;
   }
+
+  if (Val & VGPRIndexMode::DST_ENABLE)
+    O << " dst";
+
+  if (Val & VGPRIndexMode::SRC0_ENABLE)
+    O << " src0";
+
+  if (Val & VGPRIndexMode::SRC1_ENABLE)
+    O << " src1";
+
+  if (Val & VGPRIndexMode::SRC2_ENABLE)
+    O << " src2";
 }
 
 void AMDGPUInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
-  printOperand(MI, OpNo, O);
+  printOperand(MI, OpNo, STI, O);
   O  << ", ";
-  printOperand(MI, OpNo + 1, O);
+  printOperand(MI, OpNo + 1, STI, O);
 }
 
 void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
@@ -595,23 +814,25 @@ void AMDGPUInstPrinter::printIfSet(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printAbs(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printIfSet(MI, OpNo, O, '|');
 }
 
 void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   printIfSet(MI, OpNo, O, "_SAT");
 }
 
 void AMDGPUInstPrinter::printClampSI(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   if (MI->getOperand(OpNo).getImm())
     O << " clamp";
 }
 
 void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
-                                     raw_ostream &O) {
+                                    const MCSubtargetInfo &STI,
+                                    raw_ostream &O) {
   int Imm = MI->getOperand(OpNo).getImm();
   if (Imm == SIOutMods::MUL2)
     O << " mul:2";
@@ -622,6 +843,7 @@ void AMDGPUInstPrinter::printOModSI(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   assert(Op.isImm() || Op.isExpr());
@@ -635,17 +857,17 @@ void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   printIfSet(MI, OpNo, O, "*", " ");
 }
 
 void AMDGPUInstPrinter::printNeg(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printIfSet(MI, OpNo, O, '-');
 }
 
 void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   switch (MI->getOperand(OpNo).getImm()) {
   default: break;
   case 1:
@@ -661,22 +883,24 @@ void AMDGPUInstPrinter::printOMOD(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printRel(const MCInst *MI, unsigned OpNo,
-                                 raw_ostream &O) {
+                                 const MCSubtargetInfo &STI, raw_ostream &O) {
   printIfSet(MI, OpNo, O, '+');
 }
 
 void AMDGPUInstPrinter::printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                                            const MCSubtargetInfo &STI,
                                             raw_ostream &O) {
   printIfSet(MI, OpNo, O, "ExecMask,");
 }
 
 void AMDGPUInstPrinter::printUpdatePred(const MCInst *MI, unsigned OpNo,
+                                        const MCSubtargetInfo &STI,
                                         raw_ostream &O) {
   printIfSet(MI, OpNo, O, "Pred,");
 }
 
 void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
-                                       raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.getImm() == 0) {
     O << " (MASKED)";
@@ -684,7 +908,7 @@ void AMDGPUInstPrinter::printWrite(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                 raw_ostream &O) {
   const char * chans = "XYZW";
   int sel = MI->getOperand(OpNo).getImm();
 
@@ -708,6 +932,7 @@ void AMDGPUInstPrinter::printSel(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                                         const MCSubtargetInfo &STI,
                                          raw_ostream &O) {
   int BankSwizzle = MI->getOperand(OpNo).getImm();
   switch (BankSwizzle) {
@@ -729,11 +954,10 @@ void AMDGPUInstPrinter::printBankSwizzle(const MCInst *MI, unsigned OpNo,
   default:
     break;
   }
-  return;
 }
 
 void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                  const MCSubtargetInfo &STI, raw_ostream &O) {
   unsigned Sel = MI->getOperand(OpNo).getImm();
   switch (Sel) {
   case 0:
@@ -763,7 +987,7 @@ void AMDGPUInstPrinter::printRSel(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O) {
+                                const MCSubtargetInfo &STI, raw_ostream &O) {
   unsigned CT = MI->getOperand(OpNo).getImm();
   switch (CT) {
   case 0:
@@ -778,7 +1002,7 @@ void AMDGPUInstPrinter::printCT(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
-                                    raw_ostream &O) {
+                                    const MCSubtargetInfo &STI, raw_ostream &O) {
   int KCacheMode = MI->getOperand(OpNo).getImm();
   if (KCacheMode > 0) {
     int KCacheBank = MI->getOperand(OpNo - 2).getImm();
@@ -790,6 +1014,7 @@ void AMDGPUInstPrinter::printKCache(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
+                                     const MCSubtargetInfo &STI,
                                      raw_ostream &O) {
   using namespace llvm::AMDGPU::SendMsg;
 
@@ -825,32 +1050,34 @@ void AMDGPUInstPrinter::printSendMsg(const MCInst *MI, unsigned OpNo,
       O << "sendmsg(" << IdSymbolic[Id] << ", " << OpSysSymbolic[OpSys] << ')';
       return;
     }
-  } while (0);
+  } while (false);
   O << SImm16; // Unknown simm16 code.
 }
 
 void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
+                                      const MCSubtargetInfo &STI,
                                       raw_ostream &O) {
+  IsaVersion IV = getIsaVersion(STI.getFeatureBits());
+
   unsigned SImm16 = MI->getOperand(OpNo).getImm();
-  unsigned Vmcnt = SImm16 & 0xF;
-  unsigned Expcnt = (SImm16 >> 4) & 0x7;
-  unsigned Lgkmcnt = (SImm16 >> 8) & 0xF;
+  unsigned Vmcnt, Expcnt, Lgkmcnt;
+  decodeWaitcnt(IV, SImm16, Vmcnt, Expcnt, Lgkmcnt);
 
   bool NeedSpace = false;
 
-  if (Vmcnt != 0xF) {
+  if (Vmcnt != getVmcntBitMask(IV)) {
     O << "vmcnt(" << Vmcnt << ')';
     NeedSpace = true;
   }
 
-  if (Expcnt != 0x7) {
+  if (Expcnt != getExpcntBitMask(IV)) {
     if (NeedSpace)
       O << ' ';
     O << "expcnt(" << Expcnt << ')';
     NeedSpace = true;
   }
 
-  if (Lgkmcnt != 0xF) {
+  if (Lgkmcnt != getLgkmcntBitMask(IV)) {
     if (NeedSpace)
       O << ' ';
     O << "lgkmcnt(" << Lgkmcnt << ')';
@@ -858,7 +1085,7 @@ void AMDGPUInstPrinter::printWaitFlag(const MCInst *MI, unsigned OpNo,
 }
 
 void AMDGPUInstPrinter::printHwreg(const MCInst *MI, unsigned OpNo,
-                                   raw_ostream &O) {
+                                   const MCSubtargetInfo &STI, raw_ostream &O) {
   using namespace llvm::AMDGPU::Hwreg;
 
   unsigned SImm16 = MI->getOperand(OpNo).getImm();
diff --git a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
index f5a290f16045..a6d348ff0f12 100644
--- a/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/InstPrinter/AMDGPUInstPrinter.h
@@ -24,7 +24,8 @@ public:
     : MCInstPrinter(MAI, MII, MRI) {}
 
   //Autogenerated by tblgen
-  void printInstruction(const MCInst *MI, raw_ostream &O);
+  void printInstruction(const MCInst *MI, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
   static const char *getRegisterName(unsigned RegNo);
 
   void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
@@ -33,76 +34,159 @@ public:
                               const MCRegisterInfo &MRI);
 
 private:
-  void printU4ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU4ImmOperand(const MCInst *MI, unsigned OpNo,
+                         const MCSubtargetInfo &STI, raw_ostream &O);
   void printU8ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU16ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printU16ImmOperand(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
   void printU4ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU8ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU16ImmDecOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printU32ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printNamedBit(const MCInst* MI, unsigned OpNo, raw_ostream& O,
+  void printU32ImmOperand(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printNamedBit(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                      StringRef BitName);
   void printOffen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printIdxen(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printAddr64(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printMBUFOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOffset0(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOffset1(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSMRDOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printGDS(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printGLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSLC(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printTFE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printUNorm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDA(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printR128(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printLWE(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printOffset(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printOffset0(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printOffset1(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printSMRDOffset8(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSMRDOffset20(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
+                              const MCSubtargetInfo &STI, raw_ostream &O);
+  void printGDS(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printGLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printUNorm(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printDA(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+               raw_ostream &O);
+  void printR128(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printLWE(const MCInst *MI, unsigned OpNo,
+                const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpCompr(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpVM(const MCInst *MI, unsigned OpNo,
+                  const MCSubtargetInfo &STI, raw_ostream &O);
+
   void printRegOperand(unsigned RegNo, raw_ostream &O);
-  void printVOPDst(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printImmediate32(uint32_t I, raw_ostream &O);
-  void printImmediate64(uint64_t I, raw_ostream &O);
-  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printDPPCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printRowMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBankMask(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printBoundCtrl(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printVOPDst(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printImmediate16(uint32_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printImmediate32(uint32_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printImmediate64(uint64_t Imm, const MCSubtargetInfo &STI,
+                        raw_ostream &O);
+  void printOperand(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printOperandAndFPInputMods(const MCInst *MI, unsigned OpNo,
+                                  const MCSubtargetInfo &STI, raw_ostream &O);
+  void printOperandAndIntInputMods(const MCInst *MI, unsigned OpNo,
+                                   const MCSubtargetInfo &STI, raw_ostream &O);
+  void printDPPCtrl(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printRowMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printBankMask(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printBoundCtrl(const MCInst *MI, unsigned OpNo,
+                      const MCSubtargetInfo &STI, raw_ostream &O);
   void printSDWASel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSDWADstSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printSDWADstUnused(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printInterpSlot(const MCInst *MI, unsigned OpNum, raw_ostream &O);
-  void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printSDWADstSel(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWASrc0Sel(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWASrc1Sel(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printSDWADstUnused(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInterpSlot(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInterpAttr(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printInterpAttrChan(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+
+  void printVGPRIndexMode(const MCInst *MI, unsigned OpNo,
+                          const MCSubtargetInfo &STI, raw_ostream &O);
+  void printMemOperand(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+
+
+  template <unsigned N>
+  void printExpSrcN(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc0(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc1(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc2(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpSrc3(const MCInst *MI, unsigned OpNo,
+                    const MCSubtargetInfo &STI, raw_ostream &O);
+  void printExpTgt(const MCInst *MI, unsigned OpNo,
+                   const MCSubtargetInfo &STI, raw_ostream &O);
+
   static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
                          StringRef Asm, StringRef Default = "");
-  static void printIfSet(const MCInst *MI, unsigned OpNo,
-                         raw_ostream &O, char Asm);
-  static void printAbs(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printClamp(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printClampSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printOModSI(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  void printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printLast(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printNeg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printOMOD(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printRel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printUpdateExecMask(const MCInst *MI, unsigned OpNo,
-                                  raw_ostream &O);
-  static void printUpdatePred(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printWrite(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printBankSwizzle(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printRSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printCT(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printKCache(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printSendMsg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printWaitFlag(const MCInst *MI, unsigned OpNo, raw_ostream &O);
-  static void printHwreg(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  static void printIfSet(const MCInst *MI, unsigned OpNo, raw_ostream &O,
+                         char Asm);
+  void printAbs(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printClamp(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printClampSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printOModSI(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printLiteral(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printLast(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printNeg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printOMOD(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printRel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                raw_ostream &O);
+  void printUpdateExecMask(const MCInst *MI, unsigned OpNo,
+                           const MCSubtargetInfo &STI, raw_ostream &O);
+  void printUpdatePred(const MCInst *MI, unsigned OpNo,
+                       const MCSubtargetInfo &STI, raw_ostream &O);
+  void printWrite(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
+  void printSel(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printBankSwizzle(const MCInst *MI, unsigned OpNo,
+                        const MCSubtargetInfo &STI, raw_ostream &O);
+  void printRSel(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                 raw_ostream &O);
+  void printCT(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+               raw_ostream &O);
+  void printKCache(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                   raw_ostream &O);
+  void printSendMsg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                    raw_ostream &O);
+  void printWaitFlag(const MCInst *MI, unsigned OpNo,
+                     const MCSubtargetInfo &STI, raw_ostream &O);
+  void printHwreg(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
+                  raw_ostream &O);
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 1cb9d21408c6..ffb92aae599e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAsmBackend.h"
 #include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCValue.h"
@@ -22,30 +23,19 @@ using namespace llvm;
 
 namespace {
 
-class AMDGPUMCObjectWriter : public MCObjectWriter {
-public:
-  AMDGPUMCObjectWriter(raw_pwrite_stream &OS) : MCObjectWriter(OS, true) {}
-  void executePostLayoutBinding(MCAssembler &Asm,
-                                const MCAsmLayout &Layout) override {
-    //XXX: Implement if necessary.
-  }
-  void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
-                        const MCFragment *Fragment, const MCFixup &Fixup,
-                        MCValue Target, bool &IsPCRel,
-                        uint64_t &FixedValue) override {
-    assert(!"Not implemented");
-  }
-
-  void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
-
-};
-
 class AMDGPUAsmBackend : public MCAsmBackend {
 public:
   AMDGPUAsmBackend(const Target &T)
     : MCAsmBackend() {}
 
   unsigned getNumFixupKinds() const override { return AMDGPU::NumTargetFixupKinds; };
+
+  void processFixupValue(const MCAssembler &Asm,
+                         const MCAsmLayout &Layout,
+                         const MCFixup &Fixup, const MCFragment *DF,
+                         const MCValue &Target, uint64_t &Value,
+                         bool &IsResolved) override;
+
   void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
                   uint64_t Value, bool IsPCRel) const override;
   bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
@@ -55,7 +45,7 @@ public:
   }
   void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
                         MCInst &Res) const override {
-    assert(!"Not implemented");
+    llvm_unreachable("Not implemented");
   }
   bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
   bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
@@ -65,15 +55,10 @@ public:
 
 } //End anonymous namespace
 
-void AMDGPUMCObjectWriter::writeObject(MCAssembler &Asm,
-                                       const MCAsmLayout &Layout) {
-  for (MCAssembler::iterator I = Asm.begin(), E = Asm.end(); I != E; ++I) {
-    Asm.writeSectionData(&*I, Layout);
-  }
-}
-
 static unsigned getFixupKindNumBytes(unsigned Kind) {
   switch (Kind) {
+  case AMDGPU::fixup_si_sopp_br:
+    return 2;
   case FK_SecRel_1:
   case FK_Data_1:
     return 1;
@@ -92,40 +77,77 @@ static unsigned getFixupKindNumBytes(unsigned Kind) {
   }
 }
 
+static uint64_t adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
+                                 MCContext *Ctx) {
+  int64_t SignedValue = static_cast<int64_t>(Value);
+
+  switch (Fixup.getKind()) {
+  case AMDGPU::fixup_si_sopp_br: {
+    int64_t BrImm = (SignedValue - 4) / 4;
+
+    if (Ctx && !isInt<16>(BrImm))
+      Ctx->reportError(Fixup.getLoc(), "branch size exceeds simm16");
+
+    return BrImm;
+  }
+  case FK_Data_1:
+  case FK_Data_2:
+  case FK_Data_4:
+  case FK_Data_8:
+  case FK_PCRel_4:
+  case FK_SecRel_4:
+    return Value;
+  default:
+    llvm_unreachable("unhandled fixup kind");
+  }
+}
+
+void AMDGPUAsmBackend::processFixupValue(const MCAssembler &Asm,
+                                         const MCAsmLayout &Layout,
+                                         const MCFixup &Fixup, const MCFragment *DF,
+                                         const MCValue &Target, uint64_t &Value,
+                                         bool &IsResolved) {
+  MCValue Res;
+
+  // When we have complex expressions like: BB0_1 + (BB0_2 - 4), which are
+  // used for long branches, this function will be called with
+  // IsResolved = false and Value set to some pre-computed value.  In
+  // the example above, the value would be:
+  // (BB0_1 + (BB0_2 - 4)) - CurrentOffsetFromStartOfFunction.
+  // This is not what we want.  We just want the expression computation
+  // only.  The reason the MC layer subtracts the current offset from the
+  // expression is because the fixup is of kind FK_PCRel_4.
+  // For these scenarios, evaluateAsValue gives us the computation that we
+  // want.
+  if (!IsResolved && Fixup.getValue()->evaluateAsValue(Res, Layout) &&
+      Res.isAbsolute()) {
+    Value = Res.getConstant();
+    IsResolved = true;
+
+  }
+  if (IsResolved)
+    Value = adjustFixupValue(Fixup, Value, &Asm.getContext());
+}
+
 void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                   unsigned DataSize, uint64_t Value,
                                   bool IsPCRel) const {
+  if (!Value)
+    return; // Doesn't change encoding.
 
-  switch ((unsigned)Fixup.getKind()) {
-    case AMDGPU::fixup_si_sopp_br: {
-      int64_t BrImm = ((int64_t)Value - 4) / 4;
-      if (!isInt<16>(BrImm))
-        report_fatal_error("branch size exceeds simm16");
-
-      uint16_t *Dst = (uint16_t*)(Data + Fixup.getOffset());
-      *Dst = BrImm;
-      break;
-    }
-
-    default: {
-      // FIXME: Copied from AArch64
-      unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
-      if (!Value)
-        return; // Doesn't change encoding.
-      MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
-
-      // Shift the value into position.
-      Value <<= Info.TargetOffset;
-
-      unsigned Offset = Fixup.getOffset();
-      assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
-
-      // For each byte of the fragment that the fixup touches, mask in the
-      // bits from the fixup value.
-      for (unsigned i = 0; i != NumBytes; ++i)
-        Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff);
-    }
-  }
+  MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+  // Shift the value into position.
+  Value <<= Info.TargetOffset;
+
+  unsigned NumBytes = getFixupKindNumBytes(Fixup.getKind());
+  uint32_t Offset = Fixup.getOffset();
+  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the bits from
+  // the fixup value.
+  for (unsigned i = 0; i != NumBytes; ++i)
+    Data[Offset + i] |= static_cast<uint8_t>((Value >> (i * 8)) & 0xff);
 }
 
 const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo(
@@ -171,7 +193,8 @@ public:
 
 MCAsmBackend *llvm::createAMDGPUAsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
-                                           const Triple &TT, StringRef CPU) {
+                                           const Triple &TT, StringRef CPU,
+                                           const MCTargetOptions &Options) {
   // Use 64-bit ELF for amdgcn
   return new ELFAMDGPUAsmBackend(T, TT);
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
index b4e3b8e896bd..1847d7a67328 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUELFObjectWriter.cpp
@@ -38,26 +38,40 @@ unsigned AMDGPUELFObjectWriter::getRelocType(MCContext &Ctx,
                                              const MCValue &Target,
                                              const MCFixup &Fixup,
                                              bool IsPCRel) const {
-  // SCRATCH_RSRC_DWORD[01] is a special global variable that represents
-  // the scratch buffer.
-  if (Target.getSymA()->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
-    return ELF::R_AMDGPU_ABS32_LO;
-  if (Target.getSymA()->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
-    return ELF::R_AMDGPU_ABS32_HI;
+  if (const auto *SymA = Target.getSymA()) {
+    // SCRATCH_RSRC_DWORD[01] is a special global variable that represents
+    // the scratch buffer.
+    if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD0")
+      return ELF::R_AMDGPU_ABS32_LO;
+
+    if (SymA->getSymbol().getName() == "SCRATCH_RSRC_DWORD1")
+      return ELF::R_AMDGPU_ABS32_HI;
+  }
 
   switch (Target.getAccessVariant()) {
   default:
     break;
   case MCSymbolRefExpr::VK_GOTPCREL:
     return ELF::R_AMDGPU_GOTPCREL;
+  case MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_LO:
+    return ELF::R_AMDGPU_GOTPCREL32_LO;
+  case MCSymbolRefExpr::VK_AMDGPU_GOTPCREL32_HI:
+    return ELF::R_AMDGPU_GOTPCREL32_HI;
+  case MCSymbolRefExpr::VK_AMDGPU_REL32_LO:
+    return ELF::R_AMDGPU_REL32_LO;
+  case MCSymbolRefExpr::VK_AMDGPU_REL32_HI:
+    return ELF::R_AMDGPU_REL32_HI;
   }
 
   switch (Fixup.getKind()) {
   default: break;
   case FK_PCRel_4:
     return ELF::R_AMDGPU_REL32;
+  case FK_Data_4:
   case FK_SecRel_4:
     return ELF::R_AMDGPU_ABS32;
+  case FK_Data_8:
+    return ELF::R_AMDGPU_ABS64;
   }
 
   llvm_unreachable("unhandled relocation type");
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
index c942ea904085..3d3858ab47ec 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCCodeEmitter.h
@@ -21,11 +21,19 @@
 namespace llvm {
 
 class MCInst;
+class MCInstrInfo;
 class MCOperand;
 class MCSubtargetInfo;
+class FeatureBitset;
 
 class AMDGPUMCCodeEmitter : public MCCodeEmitter {
   virtual void anchor();
+
+protected:
+  const MCInstrInfo &MCII;
+
+  AMDGPUMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+
 public:
 
   uint64_t getBinaryCodeForInstr(const MCInst &MI,
@@ -43,6 +51,11 @@ public:
                                      const MCSubtargetInfo &STI) const {
     return 0;
   }
+
+protected:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index a0d9aab114fc..136e6ec4ceb5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -86,7 +86,7 @@ static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
 }
 
 extern "C" void LLVMInitializeAMDGPUTargetMC() {
-  for (Target *T : {&TheAMDGPUTarget, &TheGCNTarget}) {
+  for (Target *T : {&getTheAMDGPUTarget(), &getTheGCNTarget()}) {
     RegisterMCAsmInfo<AMDGPUMCAsmInfo> X(*T);
 
     TargetRegistry::RegisterMCInstrInfo(*T, createAMDGPUMCInstrInfo);
@@ -98,14 +98,15 @@ extern "C" void LLVMInitializeAMDGPUTargetMC() {
   }
 
   // R600 specific registration
-  TargetRegistry::RegisterMCCodeEmitter(TheAMDGPUTarget,
+  TargetRegistry::RegisterMCCodeEmitter(getTheAMDGPUTarget(),
                                         createR600MCCodeEmitter);
 
   // GCN specific registration
-  TargetRegistry::RegisterMCCodeEmitter(TheGCNTarget, createSIMCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(getTheGCNTarget(),
+                                        createSIMCCodeEmitter);
 
-  TargetRegistry::RegisterAsmTargetStreamer(TheGCNTarget,
+  TargetRegistry::RegisterAsmTargetStreamer(getTheGCNTarget(),
                                             createAMDGPUAsmTargetStreamer);
-  TargetRegistry::RegisterObjectTargetStreamer(TheGCNTarget,
-                                              createAMDGPUObjectTargetStreamer);
+  TargetRegistry::RegisterObjectTargetStreamer(
+      getTheGCNTarget(), createAMDGPUObjectTargetStreamer);
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
index 9ab7940812ba..548bad56e174 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.h
@@ -19,7 +19,6 @@
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-class StringRef;
 class MCAsmBackend;
 class MCCodeEmitter;
 class MCContext;
@@ -27,13 +26,14 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
+class StringRef;
 class Target;
 class Triple;
 class raw_pwrite_stream;
-class raw_ostream;
 
-extern Target TheAMDGPUTarget;
-extern Target TheGCNTarget;
+Target &getTheAMDGPUTarget();
+Target &getTheGCNTarget();
 
 MCCodeEmitter *createR600MCCodeEmitter(const MCInstrInfo &MCII,
                                        const MCRegisterInfo &MRI,
@@ -44,7 +44,8 @@ MCCodeEmitter *createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                      MCContext &Ctx);
 
 MCAsmBackend *createAMDGPUAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     const Triple &TT, StringRef CPU);
+                                     const Triple &TT, StringRef CPU,
+                                     const MCTargetOptions &Options);
 
 MCObjectWriter *createAMDGPUELFObjectWriter(bool Is64Bit,
                                             bool HasRelocationAddend,
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
new file mode 100644
index 000000000000..95387ad1627c
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.cpp
@@ -0,0 +1,408 @@
+//===-- AMDGPURuntimeMD.cpp - Generates runtime metadata ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+///
+/// Generates AMDGPU runtime metadata for YAML mapping.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#include "AMDGPU.h"
+#include "AMDGPURuntimeMetadata.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/YAMLTraits.h"
+#include <vector>
+#include "AMDGPURuntimeMD.h"
+
+using namespace llvm;
+using namespace ::AMDGPU::RuntimeMD;
+
+static cl::opt<bool>
+DumpRuntimeMD("amdgpu-dump-rtmd",
+              cl::desc("Dump AMDGPU runtime metadata"));
+
+static cl::opt<bool>
+CheckRuntimeMDParser("amdgpu-check-rtmd-parser", cl::Hidden,
+                     cl::desc("Check AMDGPU runtime metadata YAML parser"));
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint8_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(std::string)
+LLVM_YAML_IS_SEQUENCE_VECTOR(Kernel::Metadata)
+LLVM_YAML_IS_SEQUENCE_VECTOR(KernelArg::Metadata)
+
+namespace llvm {
+namespace yaml {
+
+template <> struct MappingTraits<KernelArg::Metadata> {
+  static void mapping(IO &YamlIO, KernelArg::Metadata &A) {
+    YamlIO.mapRequired(KeyName::ArgSize, A.Size);
+    YamlIO.mapRequired(KeyName::ArgAlign, A.Align);
+    YamlIO.mapOptional(KeyName::ArgPointeeAlign, A.PointeeAlign, 0U);
+    YamlIO.mapRequired(KeyName::ArgKind, A.Kind);
+    YamlIO.mapRequired(KeyName::ArgValueType, A.ValueType);
+    YamlIO.mapOptional(KeyName::ArgTypeName, A.TypeName, std::string());
+    YamlIO.mapOptional(KeyName::ArgName, A.Name, std::string());
+    YamlIO.mapOptional(KeyName::ArgAddrQual, A.AddrQual, INVALID_ADDR_QUAL);
+    YamlIO.mapOptional(KeyName::ArgAccQual, A.AccQual, INVALID_ACC_QUAL);
+    YamlIO.mapOptional(KeyName::ArgIsVolatile, A.IsVolatile, uint8_t(0));
+    YamlIO.mapOptional(KeyName::ArgIsConst, A.IsConst, uint8_t(0));
+    YamlIO.mapOptional(KeyName::ArgIsRestrict, A.IsRestrict, uint8_t(0));
+    YamlIO.mapOptional(KeyName::ArgIsPipe, A.IsPipe, uint8_t(0));
+  }
+  static const bool flow = true;
+};
+
+template <> struct MappingTraits<Kernel::Metadata> {
+  static void mapping(IO &YamlIO, Kernel::Metadata &K) {
+    YamlIO.mapRequired(KeyName::KernelName, K.Name);
+    YamlIO.mapOptional(KeyName::Language, K.Language, std::string());
+    YamlIO.mapOptional(KeyName::LanguageVersion, K.LanguageVersion);
+    YamlIO.mapOptional(KeyName::ReqdWorkGroupSize, K.ReqdWorkGroupSize);
+    YamlIO.mapOptional(KeyName::WorkGroupSizeHint, K.WorkGroupSizeHint);
+    YamlIO.mapOptional(KeyName::VecTypeHint, K.VecTypeHint, std::string());
+    YamlIO.mapOptional(KeyName::KernelIndex, K.KernelIndex,
+        INVALID_KERNEL_INDEX);
+    YamlIO.mapOptional(KeyName::NoPartialWorkGroups, K.NoPartialWorkGroups,
+        uint8_t(0));
+    YamlIO.mapRequired(KeyName::Args, K.Args);
+  }
+  static const bool flow = true;
+};
+
+template <> struct MappingTraits<Program::Metadata> {
+  static void mapping(IO &YamlIO, Program::Metadata &Prog) {
+    YamlIO.mapRequired(KeyName::MDVersion, Prog.MDVersionSeq);
+    YamlIO.mapOptional(KeyName::PrintfInfo, Prog.PrintfInfo);
+    YamlIO.mapOptional(KeyName::Kernels, Prog.Kernels);
+  }
+  static const bool flow = true;
+};
+
+} // end namespace yaml
+} // end namespace llvm
+
+// Get a vector of three integer values from MDNode \p Node;
+static std::vector<uint32_t> getThreeInt32(MDNode *Node) {
+  assert(Node->getNumOperands() == 3);
+  std::vector<uint32_t> V;
+  for (const MDOperand &Op : Node->operands()) {
+    const ConstantInt *CI = mdconst::extract<ConstantInt>(Op);
+    V.push_back(CI->getZExtValue());
+  }
+  return V;
+}
+
+static std::string getOCLTypeName(Type *Ty, bool Signed) {
+  switch (Ty->getTypeID()) {
+  case Type::HalfTyID:
+    return "half";
+  case Type::FloatTyID:
+    return "float";
+  case Type::DoubleTyID:
+    return "double";
+  case Type::IntegerTyID: {
+    if (!Signed)
+      return (Twine('u') + getOCLTypeName(Ty, true)).str();
+    unsigned BW = Ty->getIntegerBitWidth();
+    switch (BW) {
+    case 8:
+      return "char";
+    case 16:
+      return "short";
+    case 32:
+      return "int";
+    case 64:
+      return "long";
+    default:
+      return (Twine('i') + Twine(BW)).str();
+    }
+  }
+  case Type::VectorTyID: {
+    VectorType *VecTy = cast<VectorType>(Ty);
+    Type *EleTy = VecTy->getElementType();
+    unsigned Size = VecTy->getVectorNumElements();
+    return (Twine(getOCLTypeName(EleTy, Signed)) + Twine(Size)).str();
+  }
+  default:
+    return "unknown";
+  }
+}
+
+static KernelArg::ValueType getRuntimeMDValueType(
+  Type *Ty, StringRef TypeName) {
+  switch (Ty->getTypeID()) {
+  case Type::HalfTyID:
+    return KernelArg::F16;
+  case Type::FloatTyID:
+    return KernelArg::F32;
+  case Type::DoubleTyID:
+    return KernelArg::F64;
+  case Type::IntegerTyID: {
+    bool Signed = !TypeName.startswith("u");
+    switch (Ty->getIntegerBitWidth()) {
+    case 8:
+      return Signed ? KernelArg::I8 : KernelArg::U8;
+    case 16:
+      return Signed ? KernelArg::I16 : KernelArg::U16;
+    case 32:
+      return Signed ? KernelArg::I32 : KernelArg::U32;
+    case 64:
+      return Signed ? KernelArg::I64 : KernelArg::U64;
+    default:
+      // Runtime does not recognize other integer types. Report as struct type.
+      return KernelArg::Struct;
+    }
+  }
+  case Type::VectorTyID:
+    return getRuntimeMDValueType(Ty->getVectorElementType(), TypeName);
+  case Type::PointerTyID:
+    return getRuntimeMDValueType(Ty->getPointerElementType(), TypeName);
+  default:
+    return KernelArg::Struct;
+  }
+}
+
+static KernelArg::AddressSpaceQualifer getRuntimeAddrSpace(
+    AMDGPUAS::AddressSpaces A) {
+  switch (A) {
+  case AMDGPUAS::GLOBAL_ADDRESS:
+    return KernelArg::Global;
+  case AMDGPUAS::CONSTANT_ADDRESS:
+    return KernelArg::Constant;
+  case AMDGPUAS::LOCAL_ADDRESS:
+    return KernelArg::Local;
+  case AMDGPUAS::FLAT_ADDRESS:
+    return KernelArg::Generic;
+  case AMDGPUAS::REGION_ADDRESS:
+    return KernelArg::Region;
+  default:
+    return KernelArg::Private;
+  }
+}
+
+static KernelArg::Metadata getRuntimeMDForKernelArg(const DataLayout &DL,
+    Type *T, KernelArg::Kind Kind, StringRef BaseTypeName = "",
+    StringRef TypeName = "", StringRef ArgName = "", StringRef TypeQual = "",
+    StringRef AccQual = "") {
+
+  KernelArg::Metadata Arg;
+
+  // Set ArgSize and ArgAlign.
+  Arg.Size = DL.getTypeAllocSize(T);
+  Arg.Align = DL.getABITypeAlignment(T);
+  if (auto PT = dyn_cast<PointerType>(T)) {
+    auto ET = PT->getElementType();
+    if (PT->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && ET->isSized())
+      Arg.PointeeAlign = DL.getABITypeAlignment(ET);
+  }
+
+  // Set ArgTypeName.
+  Arg.TypeName = TypeName;
+
+  // Set ArgName.
+  Arg.Name = ArgName;
+
+  // Set ArgIsVolatile, ArgIsRestrict, ArgIsConst and ArgIsPipe.
+  SmallVector<StringRef, 1> SplitQ;
+  TypeQual.split(SplitQ, " ", -1, false /* Drop empty entry */);
+
+  for (StringRef KeyName : SplitQ) {
+    auto *P = StringSwitch<uint8_t *>(KeyName)
+      .Case("volatile", &Arg.IsVolatile)
+      .Case("restrict", &Arg.IsRestrict)
+      .Case("const",    &Arg.IsConst)
+      .Case("pipe",     &Arg.IsPipe)
+      .Default(nullptr);
+    if (P)
+      *P = 1;
+  }
+
+  // Set ArgKind.
+  Arg.Kind = Kind;
+
+  // Set ArgValueType.
+  Arg.ValueType = getRuntimeMDValueType(T, BaseTypeName);
+
+  // Set ArgAccQual.
+  if (!AccQual.empty()) {
+    Arg.AccQual = StringSwitch<KernelArg::AccessQualifer>(AccQual)
+      .Case("read_only",  KernelArg::ReadOnly)
+      .Case("write_only", KernelArg::WriteOnly)
+      .Case("read_write", KernelArg::ReadWrite)
+      .Default(KernelArg::AccNone);
+  }
+
+  // Set ArgAddrQual.
+  if (auto *PT = dyn_cast<PointerType>(T)) {
+    Arg.AddrQual = getRuntimeAddrSpace(static_cast<AMDGPUAS::AddressSpaces>(
+        PT->getAddressSpace()));
+  }
+
+  return Arg;
+}
+
+static Kernel::Metadata getRuntimeMDForKernel(const Function &F) {
+  Kernel::Metadata Kernel;
+  Kernel.Name = F.getName();
+  auto &M = *F.getParent();
+
+  // Set Language and LanguageVersion.
+  if (auto MD = M.getNamedMetadata("opencl.ocl.version")) {
+    if (MD->getNumOperands() != 0) {
+      auto Node = MD->getOperand(0);
+      if (Node->getNumOperands() > 1) {
+        Kernel.Language = "OpenCL C";
+        uint16_t Major = mdconst::extract<ConstantInt>(Node->getOperand(0))
+                         ->getZExtValue();
+        uint16_t Minor = mdconst::extract<ConstantInt>(Node->getOperand(1))
+                         ->getZExtValue();
+        Kernel.LanguageVersion.push_back(Major);
+        Kernel.LanguageVersion.push_back(Minor);
+      }
+    }
+  }
+
+  const DataLayout &DL = F.getParent()->getDataLayout();
+  for (auto &Arg : F.args()) {
+    unsigned I = Arg.getArgNo();
+    Type *T = Arg.getType();
+    auto TypeName = dyn_cast<MDString>(F.getMetadata(
+        "kernel_arg_type")->getOperand(I))->getString();
+    auto BaseTypeName = cast<MDString>(F.getMetadata(
+        "kernel_arg_base_type")->getOperand(I))->getString();
+    StringRef ArgName;
+    if (auto ArgNameMD = F.getMetadata("kernel_arg_name"))
+      ArgName = cast<MDString>(ArgNameMD->getOperand(I))->getString();
+    auto TypeQual = cast<MDString>(F.getMetadata(
+        "kernel_arg_type_qual")->getOperand(I))->getString();
+    auto AccQual = cast<MDString>(F.getMetadata(
+        "kernel_arg_access_qual")->getOperand(I))->getString();
+    KernelArg::Kind Kind;
+    if (TypeQual.find("pipe") != StringRef::npos)
+      Kind = KernelArg::Pipe;
+    else Kind = StringSwitch<KernelArg::Kind>(BaseTypeName)
+      .Case("sampler_t", KernelArg::Sampler)
+      .Case("queue_t",   KernelArg::Queue)
+      .Cases("image1d_t", "image1d_array_t", "image1d_buffer_t",
+             "image2d_t" , "image2d_array_t",  KernelArg::Image)
+      .Cases("image2d_depth_t", "image2d_array_depth_t",
+             "image2d_msaa_t", "image2d_array_msaa_t",
+             "image2d_msaa_depth_t",  KernelArg::Image)
+      .Cases("image2d_array_msaa_depth_t", "image3d_t",
+             KernelArg::Image)
+      .Default(isa<PointerType>(T) ?
+                   (T->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ?
+                   KernelArg::DynamicSharedPointer :
+                   KernelArg::GlobalBuffer) :
+                   KernelArg::ByValue);
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, T, Kind,
+        BaseTypeName, TypeName, ArgName, TypeQual, AccQual));
+  }
+
+  // Emit hidden kernel arguments for OpenCL kernels.
+  if (F.getParent()->getNamedMetadata("opencl.ocl.version")) {
+    auto Int64T = Type::getInt64Ty(F.getContext());
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
+        KernelArg::HiddenGlobalOffsetX));
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
+        KernelArg::HiddenGlobalOffsetY));
+    Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int64T,
+        KernelArg::HiddenGlobalOffsetZ));
+    if (F.getParent()->getNamedMetadata("llvm.printf.fmts")) {
+      auto Int8PtrT = Type::getInt8PtrTy(F.getContext(),
+          KernelArg::Global);
+      Kernel.Args.emplace_back(getRuntimeMDForKernelArg(DL, Int8PtrT,
+          KernelArg::HiddenPrintfBuffer));
+    }
+  }
+
+  // Set ReqdWorkGroupSize, WorkGroupSizeHint, and VecTypeHint.
+  if (auto RWGS = F.getMetadata("reqd_work_group_size"))
+    Kernel.ReqdWorkGroupSize = getThreeInt32(RWGS);
+
+  if (auto WGSH = F.getMetadata("work_group_size_hint"))
+    Kernel.WorkGroupSizeHint = getThreeInt32(WGSH);
+
+  if (auto VTH = F.getMetadata("vec_type_hint"))
+    Kernel.VecTypeHint = getOCLTypeName(cast<ValueAsMetadata>(
+      VTH->getOperand(0))->getType(), mdconst::extract<ConstantInt>(
+      VTH->getOperand(1))->getZExtValue());
+
+  return Kernel;
+}
+
+Program::Metadata::Metadata(const std::string &YAML) {
+  yaml::Input Input(YAML);
+  Input >> *this;
+}
+
+std::string Program::Metadata::toYAML(void) {
+  std::string Text;
+  raw_string_ostream Stream(Text);
+  yaml::Output Output(Stream, nullptr, INT_MAX /* do not wrap line */);
+  Output << *this;
+  return Stream.str();
+}
+
+Program::Metadata Program::Metadata::fromYAML(const std::string &S) {
+  return Program::Metadata(S);
+}
+
+// Check if the YAML string can be parsed.
+static void checkRuntimeMDYAMLString(const std::string &YAML) {
+  auto P = Program::Metadata::fromYAML(YAML);
+  auto S = P.toYAML();
+  llvm::errs() << "AMDGPU runtime metadata parser test "
+               << (YAML == S ? "passes" : "fails") << ".\n";
+  if (YAML != S) {
+    llvm::errs() << "First output: " << YAML << '\n'
+                 << "Second output: " << S << '\n';
+  }
+}
+
+std::string llvm::getRuntimeMDYAMLString(Module &M) {
+  Program::Metadata Prog;
+  Prog.MDVersionSeq.push_back(MDVersion);
+  Prog.MDVersionSeq.push_back(MDRevision);
+
+  // Set PrintfInfo.
+  if (auto MD = M.getNamedMetadata("llvm.printf.fmts")) {
+    for (unsigned I = 0; I < MD->getNumOperands(); ++I) {
+      auto Node = MD->getOperand(I);
+      if (Node->getNumOperands() > 0)
+        Prog.PrintfInfo.push_back(cast<MDString>(Node->getOperand(0))
+            ->getString());
+    }
+  }
+
+  // Set Kernels.
+  for (auto &F: M.functions()) {
+    if (!F.getMetadata("kernel_arg_type"))
+      continue;
+    Prog.Kernels.emplace_back(getRuntimeMDForKernel(F));
+  }
+
+  auto YAML = Prog.toYAML();
+
+  if (DumpRuntimeMD)
+    llvm::errs() << "AMDGPU runtime metadata:\n" << YAML << '\n';
+
+  if (CheckRuntimeMDParser)
+    checkRuntimeMDYAMLString(YAML);
+
+  return YAML;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
new file mode 100644
index 000000000000..a92fdd4bebc2
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPURuntimeMD.h
@@ -0,0 +1,26 @@
+//===- AMDGPURuntimeMD.h - Generate runtime metadata ---------------*- C++ -*-//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares functions for generating runtime metadata.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
+#define LLVM_LIB_TARGET_AMDGPU_MCTARGETDESC_AMDGPURUNTIMEMD_H
+
+#include <string>
+
+namespace llvm {
+class Module;
+
+// Get runtime metadata as YAML string.
+std::string getRuntimeMDYAMLString(Module &M);
+
+}
+#endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 83dcaacb738f..3392183d33c3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -11,21 +11,33 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "AMDGPU.h"
 #include "AMDGPUTargetStreamer.h"
 #include "SIDefines.h"
 #include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/AMDKernelCodeTUtils.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCObjectFileInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/FormattedStream.h"
+#include "AMDGPURuntimeMD.h"
+
+namespace llvm {
+#include "AMDGPUPTNote.h"
+}
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 AMDGPUTargetStreamer::AMDGPUTargetStreamer(MCStreamer &S)
-    : MCTargetStreamer(S) { }
+    : MCTargetStreamer(S) {}
 
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetAsmStreamer
@@ -56,169 +68,9 @@ AMDGPUTargetAsmStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
 
 void
 AMDGPUTargetAsmStreamer::EmitAMDKernelCodeT(const amd_kernel_code_t &Header) {
-  uint64_t ComputePgmRsrc2 = (Header.compute_pgm_resource_registers >> 32);
-  bool EnableSGPRPrivateSegmentBuffer = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER);
-  bool EnableSGPRDispatchPtr = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR);
-  bool EnableSGPRQueuePtr = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR);
-  bool EnableSGPRKernargSegmentPtr = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR);
-  bool EnableSGPRDispatchID = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID);
-  bool EnableSGPRFlatScratchInit = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT);
-  bool EnableSGPRPrivateSegmentSize = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
-  bool EnableSGPRGridWorkgroupCountX = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_X);
-  bool EnableSGPRGridWorkgroupCountY = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y);
-  bool EnableSGPRGridWorkgroupCountZ = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z);
-  bool EnableOrderedAppendGDS = (Header.code_properties &
-      AMD_CODE_PROPERTY_ENABLE_ORDERED_APPEND_GDS);
-  uint32_t PrivateElementSize = (Header.code_properties &
-      AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE) >>
-          AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE_SHIFT;
-  bool IsPtr64 = (Header.code_properties & AMD_CODE_PROPERTY_IS_PTR64);
-  bool IsDynamicCallstack = (Header.code_properties &
-      AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK);
-  bool IsDebugEnabled = (Header.code_properties &
-      AMD_CODE_PROPERTY_IS_DEBUG_SUPPORTED);
-  bool IsXNackEnabled = (Header.code_properties &
-      AMD_CODE_PROPERTY_IS_XNACK_SUPPORTED);
-
-  OS << "\t.amd_kernel_code_t\n" <<
-    "\t\tkernel_code_version_major = " <<
-        Header.amd_kernel_code_version_major << '\n' <<
-    "\t\tkernel_code_version_minor = " <<
-        Header.amd_kernel_code_version_minor << '\n' <<
-    "\t\tmachine_kind = " <<
-        Header.amd_machine_kind << '\n' <<
-    "\t\tmachine_version_major = " <<
-        Header.amd_machine_version_major << '\n' <<
-    "\t\tmachine_version_minor = " <<
-        Header.amd_machine_version_minor << '\n' <<
-    "\t\tmachine_version_stepping = " <<
-        Header.amd_machine_version_stepping << '\n' <<
-    "\t\tkernel_code_entry_byte_offset = " <<
-        Header.kernel_code_entry_byte_offset << '\n' <<
-    "\t\tkernel_code_prefetch_byte_size = " <<
-        Header.kernel_code_prefetch_byte_size << '\n' <<
-    "\t\tmax_scratch_backing_memory_byte_size = " <<
-        Header.max_scratch_backing_memory_byte_size << '\n' <<
-    "\t\tcompute_pgm_rsrc1_vgprs = " <<
-        G_00B848_VGPRS(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_sgprs = " <<
-        G_00B848_SGPRS(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_priority = " <<
-        G_00B848_PRIORITY(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_float_mode = " <<
-        G_00B848_FLOAT_MODE(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_priv = " <<
-        G_00B848_PRIV(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_dx10_clamp = " <<
-        G_00B848_DX10_CLAMP(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_debug_mode = " <<
-        G_00B848_DEBUG_MODE(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc1_ieee_mode = " <<
-        G_00B848_IEEE_MODE(Header.compute_pgm_resource_registers) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_scratch_en = " <<
-        G_00B84C_SCRATCH_EN(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_user_sgpr = " <<
-        G_00B84C_USER_SGPR(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_tgid_x_en = " <<
-        G_00B84C_TGID_X_EN(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_tgid_y_en = " <<
-        G_00B84C_TGID_Y_EN(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_tgid_z_en = " <<
-        G_00B84C_TGID_Z_EN(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_tg_size_en = " <<
-        G_00B84C_TG_SIZE_EN(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_tidig_comp_cnt = " <<
-        G_00B84C_TIDIG_COMP_CNT(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_excp_en_msb = " <<
-        G_00B84C_EXCP_EN_MSB(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_lds_size = " <<
-        G_00B84C_LDS_SIZE(ComputePgmRsrc2) << '\n' <<
-    "\t\tcompute_pgm_rsrc2_excp_en = " <<
-        G_00B84C_EXCP_EN(ComputePgmRsrc2) << '\n' <<
-
-    "\t\tenable_sgpr_private_segment_buffer = " <<
-        EnableSGPRPrivateSegmentBuffer << '\n' <<
-    "\t\tenable_sgpr_dispatch_ptr = " <<
-        EnableSGPRDispatchPtr << '\n' <<
-    "\t\tenable_sgpr_queue_ptr = " <<
-        EnableSGPRQueuePtr << '\n' <<
-    "\t\tenable_sgpr_kernarg_segment_ptr = " <<
-        EnableSGPRKernargSegmentPtr << '\n' <<
-    "\t\tenable_sgpr_dispatch_id = " <<
-        EnableSGPRDispatchID << '\n' <<
-    "\t\tenable_sgpr_flat_scratch_init = " <<
-        EnableSGPRFlatScratchInit << '\n' <<
-    "\t\tenable_sgpr_private_segment_size = " <<
-        EnableSGPRPrivateSegmentSize << '\n' <<
-    "\t\tenable_sgpr_grid_workgroup_count_x = " <<
-        EnableSGPRGridWorkgroupCountX << '\n' <<
-    "\t\tenable_sgpr_grid_workgroup_count_y = " <<
-        EnableSGPRGridWorkgroupCountY << '\n' <<
-    "\t\tenable_sgpr_grid_workgroup_count_z = " <<
-        EnableSGPRGridWorkgroupCountZ << '\n' <<
-    "\t\tenable_ordered_append_gds = " <<
-        EnableOrderedAppendGDS << '\n' <<
-    "\t\tprivate_element_size = " <<
-        PrivateElementSize << '\n' <<
-    "\t\tis_ptr64 = " <<
-        IsPtr64 << '\n' <<
-    "\t\tis_dynamic_callstack = " <<
-        IsDynamicCallstack << '\n' <<
-    "\t\tis_debug_enabled = " <<
-        IsDebugEnabled << '\n' <<
-    "\t\tis_xnack_enabled = " <<
-        IsXNackEnabled << '\n' <<
-    "\t\tworkitem_private_segment_byte_size = " <<
-        Header.workitem_private_segment_byte_size << '\n' <<
-    "\t\tworkgroup_group_segment_byte_size = " <<
-        Header.workgroup_group_segment_byte_size << '\n' <<
-    "\t\tgds_segment_byte_size = " <<
-        Header.gds_segment_byte_size << '\n' <<
-    "\t\tkernarg_segment_byte_size = " <<
-        Header.kernarg_segment_byte_size << '\n' <<
-    "\t\tworkgroup_fbarrier_count = " <<
-        Header.workgroup_fbarrier_count << '\n' <<
-    "\t\twavefront_sgpr_count = " <<
-        Header.wavefront_sgpr_count << '\n' <<
-    "\t\tworkitem_vgpr_count = " <<
-        Header.workitem_vgpr_count << '\n' <<
-    "\t\treserved_vgpr_first = " <<
-        Header.reserved_vgpr_first << '\n' <<
-    "\t\treserved_vgpr_count = " <<
-        Header.reserved_vgpr_count << '\n' <<
-    "\t\treserved_sgpr_first = " <<
-        Header.reserved_sgpr_first << '\n' <<
-    "\t\treserved_sgpr_count = " <<
-        Header.reserved_sgpr_count << '\n' <<
-    "\t\tdebug_wavefront_private_segment_offset_sgpr = " <<
-        Header.debug_wavefront_private_segment_offset_sgpr << '\n' <<
-    "\t\tdebug_private_segment_buffer_sgpr = " <<
-        Header.debug_private_segment_buffer_sgpr << '\n' <<
-    "\t\tkernarg_segment_alignment = " <<
-        (uint32_t)Header.kernarg_segment_alignment << '\n' <<
-    "\t\tgroup_segment_alignment = " <<
-        (uint32_t)Header.group_segment_alignment << '\n' <<
-    "\t\tprivate_segment_alignment = " <<
-        (uint32_t)Header.private_segment_alignment << '\n' <<
-    "\t\twavefront_size = " <<
-        (uint32_t)Header.wavefront_size << '\n' <<
-    "\t\tcall_convention = " <<
-        Header.call_convention << '\n' <<
-    "\t\truntime_loader_kernel_symbol = " <<
-        Header.runtime_loader_kernel_symbol << '\n' <<
-    // TODO: control_directives
-    "\t.end_amd_kernel_code_t\n";
-
+  OS << "\t.amd_kernel_code_t\n";
+  dumpAmdKernelCode(&Header, OS, "\t\t");
+  OS << "\t.end_amd_kernel_code_t\n";
 }
 
 void AMDGPUTargetAsmStreamer::EmitAMDGPUSymbolType(StringRef SymbolName,
@@ -241,35 +93,63 @@ void AMDGPUTargetAsmStreamer::EmitAMDGPUHsaProgramScopeGlobal(
   OS << "\t.amdgpu_hsa_program_global " << GlobalName << '\n';
 }
 
+void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(Module &M) {
+  OS << "\t.amdgpu_runtime_metadata\n";
+  OS << getRuntimeMDYAMLString(M);
+  OS << "\n\t.end_amdgpu_runtime_metadata\n";
+}
+
+void AMDGPUTargetAsmStreamer::EmitRuntimeMetadata(StringRef Metadata) {
+  OS << "\t.amdgpu_runtime_metadata";
+  OS << Metadata;
+  OS << "\t.end_amdgpu_runtime_metadata\n";
+}
+
 //===----------------------------------------------------------------------===//
 // AMDGPUTargetELFStreamer
 //===----------------------------------------------------------------------===//
 
 AMDGPUTargetELFStreamer::AMDGPUTargetELFStreamer(MCStreamer &S)
-    : AMDGPUTargetStreamer(S), Streamer(S) { }
+    : AMDGPUTargetStreamer(S), Streamer(S) {}
 
 MCELFStreamer &AMDGPUTargetELFStreamer::getStreamer() {
   return static_cast<MCELFStreamer &>(Streamer);
 }
 
 void
+AMDGPUTargetELFStreamer::EmitAMDGPUNote(const MCExpr* DescSZ,
+                                        PT_NOTE::NoteType Type,
+                              std::function<void(MCELFStreamer &)> EmitDesc) {
+  auto &S = getStreamer();
+  auto &Context = S.getContext();
+
+  auto NameSZ = sizeof(PT_NOTE::NoteName);
+
+  S.PushSection();
+  S.SwitchSection(Context.getELFSection(
+    PT_NOTE::SectionName, ELF::SHT_NOTE, ELF::SHF_ALLOC));
+  S.EmitIntValue(NameSZ, 4);                                  // namesz
+  S.EmitValue(DescSZ, 4);                                     // descz
+  S.EmitIntValue(Type, 4); // type
+  S.EmitBytes(StringRef(PT_NOTE::NoteName, NameSZ));          // name
+  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
+  EmitDesc(S);                                                // desc
+  S.EmitValueToAlignment(4, 0, 1, 0);                         // padding 0
+  S.PopSection();
+}
+
+void
 AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(uint32_t Major,
                                                            uint32_t Minor) {
-  MCStreamer &OS = getStreamer();
-  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
-
-  unsigned NameSZ = 4;
 
-  OS.PushSection();
-  OS.SwitchSection(Note);
-  OS.EmitIntValue(NameSZ, 4);                            // namesz
-  OS.EmitIntValue(8, 4);                                 // descz
-  OS.EmitIntValue(NT_AMDGPU_HSA_CODE_OBJECT_VERSION, 4); // type
-  OS.EmitBytes(StringRef("AMD", NameSZ));                // name
-  OS.EmitIntValue(Major, 4);                             // desc
-  OS.EmitIntValue(Minor, 4);
-  OS.EmitValueToAlignment(4);
-  OS.PopSection();
+  EmitAMDGPUNote(
+    MCConstantExpr::create(8, getContext()),
+    PT_NOTE::NT_AMDGPU_HSA_CODE_OBJECT_VERSION,
+    [&](MCELFStreamer &OS){
+      OS.EmitIntValue(Major, 4);
+      OS.EmitIntValue(Minor, 4);
+    }
+  );
 }
 
 void
@@ -278,33 +158,28 @@ AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectISA(uint32_t Major,
                                                        uint32_t Stepping,
                                                        StringRef VendorName,
                                                        StringRef ArchName) {
-  MCStreamer &OS = getStreamer();
-  MCSectionELF *Note = OS.getContext().getELFSection(".note", ELF::SHT_NOTE, 0);
-
-  unsigned NameSZ = 4;
   uint16_t VendorNameSize = VendorName.size() + 1;
   uint16_t ArchNameSize = ArchName.size() + 1;
+  
   unsigned DescSZ = sizeof(VendorNameSize) + sizeof(ArchNameSize) +
-                    sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
-                    VendorNameSize + ArchNameSize;
-
-  OS.PushSection();
-  OS.SwitchSection(Note);
-  OS.EmitIntValue(NameSZ, 4);                            // namesz
-  OS.EmitIntValue(DescSZ, 4);                            // descsz
-  OS.EmitIntValue(NT_AMDGPU_HSA_ISA, 4);                 // type
-  OS.EmitBytes(StringRef("AMD", 4));                     // name
-  OS.EmitIntValue(VendorNameSize, 2);                    // desc
-  OS.EmitIntValue(ArchNameSize, 2);
-  OS.EmitIntValue(Major, 4);
-  OS.EmitIntValue(Minor, 4);
-  OS.EmitIntValue(Stepping, 4);
-  OS.EmitBytes(VendorName);
-  OS.EmitIntValue(0, 1); // NULL terminate VendorName
-  OS.EmitBytes(ArchName);
-  OS.EmitIntValue(0, 1); // NULL terminte ArchName
-  OS.EmitValueToAlignment(4);
-  OS.PopSection();
+    sizeof(Major) + sizeof(Minor) + sizeof(Stepping) +
+    VendorNameSize + ArchNameSize;
+
+  EmitAMDGPUNote(
+    MCConstantExpr::create(DescSZ, getContext()),
+    PT_NOTE::NT_AMDGPU_HSA_ISA,
+    [&](MCELFStreamer &OS) {
+      OS.EmitIntValue(VendorNameSize, 2);
+      OS.EmitIntValue(ArchNameSize, 2);
+      OS.EmitIntValue(Major, 4);
+      OS.EmitIntValue(Minor, 4);
+      OS.EmitIntValue(Stepping, 4);
+      OS.EmitBytes(VendorName);
+      OS.EmitIntValue(0, 1); // NULL terminate VendorName
+      OS.EmitBytes(ArchName);
+      OS.EmitIntValue(0, 1); // NULL terminte ArchName
+    }
+  );
 }
 
 void
@@ -340,3 +215,28 @@ void AMDGPUTargetELFStreamer::EmitAMDGPUHsaProgramScopeGlobal(
   Symbol->setType(ELF::STT_OBJECT);
   Symbol->setBinding(ELF::STB_GLOBAL);
 }
+
+void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(StringRef Metadata) {
+  // Create two labels to mark the beginning and end of the desc field
+  // and a MCExpr to calculate the size of the desc field.
+  auto &Context = getContext();
+  auto *DescBegin = Context.createTempSymbol();
+  auto *DescEnd = Context.createTempSymbol();
+  auto *DescSZ = MCBinaryExpr::createSub(
+    MCSymbolRefExpr::create(DescEnd, Context),
+    MCSymbolRefExpr::create(DescBegin, Context), Context);
+
+  EmitAMDGPUNote(
+    DescSZ,
+    PT_NOTE::NT_AMDGPU_HSA_RUNTIME_METADATA,
+    [&](MCELFStreamer &OS) {
+      OS.EmitLabel(DescBegin);
+      OS.EmitBytes(Metadata);
+      OS.EmitLabel(DescEnd);
+    }
+  );
+}
+
+void AMDGPUTargetELFStreamer::EmitRuntimeMetadata(Module &M) {
+  EmitRuntimeMetadata(getRuntimeMDYAMLString(M));
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index b3d59e8f396e..e2f20586903d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -14,11 +14,20 @@
 #include "llvm/MC/MCStreamer.h"
 
 namespace llvm {
+#include "AMDGPUPTNote.h"
 
+class DataLayout;
+class Function;
 class MCELFStreamer;
 class MCSymbol;
+class MDNode;
+class Module;
+class Type;
 
 class AMDGPUTargetStreamer : public MCTargetStreamer {
+protected:
+  MCContext &getContext() const { return Streamer.getContext(); }
+
 public:
   AMDGPUTargetStreamer(MCStreamer &S);
   virtual void EmitDirectiveHSACodeObjectVersion(uint32_t Major,
@@ -36,6 +45,10 @@ public:
   virtual void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) = 0;
 
   virtual void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) = 0;
+
+  virtual void EmitRuntimeMetadata(Module &M) = 0;
+
+  virtual void EmitRuntimeMetadata(StringRef Metadata) = 0;
 };
 
 class AMDGPUTargetAsmStreamer : public AMDGPUTargetStreamer {
@@ -56,23 +69,19 @@ public:
   void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
-};
 
-class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
+  void EmitRuntimeMetadata(Module &M) override;
 
-  enum NoteType {
-    NT_AMDGPU_HSA_CODE_OBJECT_VERSION = 1,
-    NT_AMDGPU_HSA_HSAIL = 2,
-    NT_AMDGPU_HSA_ISA = 3,
-    NT_AMDGPU_HSA_PRODUCER = 4,
-    NT_AMDGPU_HSA_PRODUCER_OPTIONS = 5,
-    NT_AMDGPU_HSA_EXTENSION = 6,
-    NT_AMDGPU_HSA_HLDEBUG_DEBUG = 101,
-    NT_AMDGPU_HSA_HLDEBUG_TARGET = 102
-  };
+  void EmitRuntimeMetadata(StringRef Metadata) override;
+};
 
+class AMDGPUTargetELFStreamer : public AMDGPUTargetStreamer {
   MCStreamer &Streamer;
 
+  void EmitAMDGPUNote(const MCExpr* DescSize,
+                      AMDGPU::PT_NOTE::NoteType Type,
+                      std::function<void(MCELFStreamer &)> EmitDesc);
+
 public:
   AMDGPUTargetELFStreamer(MCStreamer &S);
 
@@ -92,6 +101,10 @@ public:
   void EmitAMDGPUHsaModuleScopeGlobal(StringRef GlobalName) override;
 
   void EmitAMDGPUHsaProgramScopeGlobal(StringRef GlobalName) override;
+
+  void EmitRuntimeMetadata(Module &M) override;
+
+  void EmitRuntimeMetadata(StringRef Metadata) override;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 5e8e6ceb7ca2..6015ec190fd4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -20,26 +20,30 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 
 namespace {
 
 class R600MCCodeEmitter : public AMDGPUMCCodeEmitter {
-  R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
-  void operator=(const R600MCCodeEmitter &) = delete;
-  const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
 
 public:
   R600MCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri)
-    : MCII(mcii), MRI(mri) { }
+    : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
+  R600MCCodeEmitter(const R600MCCodeEmitter &) = delete;
+  R600MCCodeEmitter &operator=(const R600MCCodeEmitter &) = delete;
 
   /// \brief Encode the instruction and write it to the OS.
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -58,7 +62,7 @@ private:
   unsigned getHWReg(unsigned regNo) const;
 };
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 enum RegElement {
   ELEMENT_X = 0,
@@ -86,6 +90,9 @@ MCCodeEmitter *llvm::createR600MCCodeEmitter(const MCInstrInfo &MCII,
 void R600MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   if (MI.getOpcode() == AMDGPU::RETURN ||
     MI.getOpcode() == AMDGPU::FETCH_CLAUSE ||
@@ -178,4 +185,5 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI,
   return MO.getImm();
 }
 
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "AMDGPUGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
index 71b585c25ac5..0c5bb0648a16 100644
--- a/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp
@@ -1,4 +1,4 @@
-//===-- SIMCCodeEmitter.cpp - SI Code Emitter -------------------------------===//
+//===-- SIMCCodeEmitter.cpp - SI Code Emitter -----------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -17,38 +17,42 @@
 #include "MCTargetDesc/AMDGPUFixupKinds.h"
 #include "MCTargetDesc/AMDGPUMCCodeEmitter.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
-#include "SIDefines.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/MC/MCCodeEmitter.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
 
 using namespace llvm;
 
 namespace {
 
 class SIMCCodeEmitter : public  AMDGPUMCCodeEmitter {
-  SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
-  void operator=(const SIMCCodeEmitter &) = delete;
-  const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
 
-  /// \brief Can this operand also contain immediate values?
-  bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const;
-
   /// \brief Encode an fp or int literal
-  uint32_t getLitEncoding(const MCOperand &MO, unsigned OpSize) const;
+  uint32_t getLitEncoding(const MCOperand &MO, const MCOperandInfo &OpInfo,
+                          const MCSubtargetInfo &STI) const;
 
 public:
   SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
                   MCContext &ctx)
-    : MCII(mcii), MRI(mri) { }
-
-  ~SIMCCodeEmitter() override {}
+      : AMDGPUMCCodeEmitter(mcii), MRI(mri) {}
+  SIMCCodeEmitter(const SIMCCodeEmitter &) = delete;
+  SIMCCodeEmitter &operator=(const SIMCCodeEmitter &) = delete;
 
   /// \brief Encode the instruction and write it to the OS.
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -67,7 +71,7 @@ public:
                              const MCSubtargetInfo &STI) const override;
 };
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
                                            const MCRegisterInfo &MRI,
@@ -75,14 +79,6 @@ MCCodeEmitter *llvm::createSIMCCodeEmitter(const MCInstrInfo &MCII,
   return new SIMCCodeEmitter(MCII, MRI, Ctx);
 }
 
-bool SIMCCodeEmitter::isSrcOperand(const MCInstrDesc &Desc,
-                                   unsigned OpNo) const {
-  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
-
-  return OpType == AMDGPU::OPERAND_REG_IMM32 ||
-         OpType == AMDGPU::OPERAND_REG_INLINE_C;
-}
-
 // Returns the encoding value to use if the given integer is an integer inline
 // immediate value, or 0 if it is not.
 template <typename IntTy>
@@ -96,7 +92,43 @@ static uint32_t getIntInlineImmEncoding(IntTy Imm) {
   return 0;
 }
 
-static uint32_t getLit32Encoding(uint32_t Val) {
+static uint32_t getLit16Encoding(uint16_t Val, const MCSubtargetInfo &STI) {
+  uint16_t IntImm = getIntInlineImmEncoding(static_cast<int16_t>(Val));
+  if (IntImm != 0)
+    return IntImm;
+
+  if (Val == 0x3800) // 0.5
+    return 240;
+
+  if (Val == 0xB800) // -0.5
+    return 241;
+
+  if (Val == 0x3C00) // 1.0
+    return 242;
+
+  if (Val == 0xBC00) // -1.0
+    return 243;
+
+  if (Val == 0x4000) // 2.0
+    return 244;
+
+  if (Val == 0xC000) // -2.0
+    return 245;
+
+  if (Val == 0x4400) // 4.0
+    return 246;
+
+  if (Val == 0xC400) // -4.0
+    return 247;
+
+  if (Val == 0x3118 && // 1.0 / (2.0 * pi)
+      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    return 248;
+
+  return 255;
+}
+
+static uint32_t getLit32Encoding(uint32_t Val, const MCSubtargetInfo &STI) {
   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int32_t>(Val));
   if (IntImm != 0)
     return IntImm;
@@ -125,10 +157,14 @@ static uint32_t getLit32Encoding(uint32_t Val) {
   if (Val == FloatToBits(-4.0f))
     return 247;
 
+  if (Val == 0x3e22f983 && // 1.0 / (2.0 * pi)
+      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    return 248;
+
   return 255;
 }
 
-static uint32_t getLit64Encoding(uint64_t Val) {
+static uint32_t getLit64Encoding(uint64_t Val, const MCSubtargetInfo &STI) {
   uint32_t IntImm = getIntInlineImmEncoding(static_cast<int64_t>(Val));
   if (IntImm != 0)
     return IntImm;
@@ -157,15 +193,19 @@ static uint32_t getLit64Encoding(uint64_t Val) {
   if (Val == DoubleToBits(-4.0))
     return 247;
 
+  if (Val == 0x3fc45f306dc9c882 && // 1.0 / (2.0 * pi)
+      STI.getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm])
+    return 248;
+
   return 255;
 }
 
 uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
-                                         unsigned OpSize) const {
-
+                                         const MCOperandInfo &OpInfo,
+                                         const MCSubtargetInfo &STI) const {
   int64_t Imm;
   if (MO.isExpr()) {
-    const MCConstantExpr *C = dyn_cast<MCConstantExpr>(MO.getExpr());
+    const auto *C = dyn_cast<MCConstantExpr>(MO.getExpr());
     if (!C)
       return 255;
 
@@ -180,17 +220,23 @@ uint32_t SIMCCodeEmitter::getLitEncoding(const MCOperand &MO,
     Imm = MO.getImm();
   }
 
-  if (OpSize == 4)
-    return getLit32Encoding(static_cast<uint32_t>(Imm));
-
-  assert(OpSize == 8);
-
-  return getLit64Encoding(static_cast<uint64_t>(Imm));
+  switch (AMDGPU::getOperandSize(OpInfo)) {
+  case 4:
+    return getLit32Encoding(static_cast<uint32_t>(Imm), STI);
+  case 8:
+    return getLit64Encoding(static_cast<uint64_t>(Imm), STI);
+  case 2:
+    return getLit16Encoding(static_cast<uint16_t>(Imm), STI);
+  default:
+    llvm_unreachable("invalid operand size");
+  }
 }
 
 void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
 
   uint64_t Encoding = getBinaryCodeForInstr(MI, Fixups, STI);
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
@@ -207,15 +253,12 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
 
     // Check if this operand should be encoded as [SV]Src
-    if (!isSrcOperand(Desc, i))
+    if (!AMDGPU::isSISrcOperand(Desc, i))
       continue;
 
-    int RCID = Desc.OpInfo[i].RegClass;
-    const MCRegisterClass &RC = MRI.getRegClass(RCID);
-
     // Is this operand a literal immediate?
     const MCOperand &Op = MI.getOperand(i);
-    if (getLitEncoding(Op, RC.getSize()) != 255)
+    if (getLitEncoding(Op, Desc.OpInfo[i], STI) != 255)
       continue;
 
     // Yes! Encode it
@@ -224,7 +267,7 @@ void SIMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (Op.isImm())
       Imm = Op.getImm();
     else if (Op.isExpr()) {
-      if (const MCConstantExpr *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
+      if (const auto *C = dyn_cast<MCConstantExpr>(Op.getExpr()))
         Imm = C->getValue();
 
     } else if (!Op.isExpr()) // Exprs will be replaced with a fixup value.
@@ -262,7 +305,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
     return MRI.getEncodingValue(MO.getReg());
 
   if (MO.isExpr() && MO.getExpr()->getKind() != MCExpr::Constant) {
-    const MCSymbolRefExpr *Expr = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
+    const auto *Expr = dyn_cast<MCSymbolRefExpr>(MO.getExpr());
     MCFixupKind Kind;
     if (Expr && Expr->getSymbol().isExternal())
       Kind = FK_Data_4;
@@ -279,11 +322,8 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
   }
 
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  if (isSrcOperand(Desc, OpNo)) {
-    int RCID = Desc.OpInfo[OpNo].RegClass;
-    const MCRegisterClass &RC = MRI.getRegClass(RCID);
-
-    uint32_t Enc = getLitEncoding(MO, RC.getSize());
+  if (AMDGPU::isSISrcOperand(Desc, OpNo)) {
+    uint32_t Enc = getLitEncoding(MO, Desc.OpInfo[OpNo], STI);
     if (Enc != ~0U && (Enc != 255 || Desc.getSize() == 4))
       return Enc;
 
@@ -293,4 +333,3 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI,
   llvm_unreachable("Encoding of this operand type is not supported yet.");
   return 0;
 }
-
diff --git a/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
new file mode 100644
index 000000000000..46803e555711
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/MIMGInstructions.td
@@ -0,0 +1,763 @@
+//===-- MIMGInstructions.td - MIMG Instruction Defintions -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+class MIMG_Mask <string op, int channels> {
+  string Op = op;
+  int Channels = channels;
+}
+
+class mimg <bits<7> si, bits<7> vi = si> {
+  field bits<7> SI = si;
+  field bits<7> VI = vi;
+}
+
+class MIMG_Helper <dag outs, dag ins, string asm,
+                   string dns=""> : MIMG<outs, ins, asm,[]> {
+  let mayLoad = 1;
+  let mayStore = 0;
+  let hasPostISelHook = 1;
+  let DecoderNamespace = dns;
+  let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
+  let AsmMatchConverter = "cvtMIMG";
+  let usesCustomInserter = 1;
+}
+
+class MIMG_NoSampler_Helper <bits<7> op, string asm,
+                             RegisterClass dst_rc,
+                             RegisterClass addr_rc,
+                             string dns=""> : MIMG_Helper <
+  (outs dst_rc:$vdata),
+  (ins addr_rc:$vaddr, SReg_256:$srsrc,
+       dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
+       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
+  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da",
+  dns>, MIMGe<op> {
+  let ssamp = 0;
+}
+
+multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
+                                      RegisterClass dst_rc,
+                                      int channels> {
+  def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
+                                   !if(!eq(channels, 1), "AMDGPU", "")>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
+            MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_NoSampler <bits<7> op, string asm> {
+  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
+  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
+  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
+  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
+}
+
+class MIMG_Store_Helper <bits<7> op, string asm,
+                         RegisterClass data_rc,
+                         RegisterClass addr_rc> : MIMG_Helper <
+  (outs),
+  (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+       dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
+       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
+  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+     >, MIMGe<op> {
+  let ssamp = 0;
+  let mayLoad = 1; // TableGen requires this for matching with the intrinsics
+  let mayStore = 1;
+  let hasSideEffects = 1;
+  let hasPostISelHook = 0;
+  let DisableWQM = 1;
+}
+
+multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
+                                  RegisterClass data_rc,
+                                  int channels> {
+  def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>,
+            MIMG_Mask<asm#"_V4", channels>;
+}
+
+multiclass MIMG_Store <bits<7> op, string asm> {
+  defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1>;
+  defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2>;
+  defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3>;
+  defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4>;
+}
+
+class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
+                          RegisterClass addr_rc> : MIMG_Helper <
+    (outs data_rc:$vdst),
+    (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
+         dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
+         r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
+    asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
+  > {
+  let mayStore = 1;
+  let hasSideEffects = 1;
+  let hasPostISelHook = 0;
+  let DisableWQM = 1;
+  let Constraints = "$vdst = $vdata";
+  let AsmMatchConverter = "cvtMIMGAtomic";
+}
+
+class MIMG_Atomic_Real_si<mimg op, string name, string asm,
+  RegisterClass data_rc, RegisterClass addr_rc> :
+  MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
+  SIMCInstr<name, SIEncodingFamily.SI>,
+  MIMGe<op.SI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isSICI];
+  let DecoderNamespace = "SICI";
+  let DisableDecoder = DisableSIDecoder;
+}
+
+class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
+  RegisterClass data_rc, RegisterClass addr_rc> :
+  MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
+  SIMCInstr<name, SIEncodingFamily.VI>,
+  MIMGe<op.VI> {
+  let isCodeGenOnly = 0;
+  let AssemblerPredicates = [isVI];
+  let DecoderNamespace = "VI";
+  let DisableDecoder = DisableVIDecoder;
+}
+
+multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm,
+                                 RegisterClass data_rc, RegisterClass addr_rc> {
+  let isPseudo = 1, isCodeGenOnly = 1 in {
+    def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
+             SIMCInstr<name, SIEncodingFamily.NONE>;
+  }
+
+  let ssamp = 0 in {
+    def _si : MIMG_Atomic_Real_si<op, name, asm, data_rc, addr_rc>;
+
+    def _vi : MIMG_Atomic_Real_vi<op, name, asm, data_rc, addr_rc>;
+  }
+}
+
+multiclass MIMG_Atomic <mimg op, string asm, RegisterClass data_rc = VGPR_32> {
+  defm _V1 : MIMG_Atomic_Helper_m <op, asm # "_V1", asm, data_rc, VGPR_32>;
+  defm _V2 : MIMG_Atomic_Helper_m <op, asm # "_V2", asm, data_rc, VReg_64>;
+  defm _V4 : MIMG_Atomic_Helper_m <op, asm # "_V3", asm, data_rc, VReg_128>;
+}
+
+class MIMG_Sampler_Helper <bits<7> op, string asm,
+                           RegisterClass dst_rc,
+                           RegisterClass src_rc,
+                           bit wqm,
+                           string dns=""> : MIMG_Helper <
+  (outs dst_rc:$vdata),
+  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+       dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
+       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
+  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da",
+  dns>, MIMGe<op> {
+  let WQM = wqm;
+}
+
+multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
+                                    RegisterClass dst_rc,
+                                    int channels, bit wqm> {
+  def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm,
+                                 !if(!eq(channels, 1), "AMDGPU", "")>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
+            MIMG_Mask<asm#"_V4", channels>;
+  def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
+            MIMG_Mask<asm#"_V8", channels>;
+  def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
+            MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Sampler <bits<7> op, string asm, bit wqm=0> {
+  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm>;
+  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm>;
+  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm>;
+  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm>;
+}
+
+multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>;
+
+class MIMG_Gather_Helper <bits<7> op, string asm,
+                          RegisterClass dst_rc,
+                          RegisterClass src_rc, bit wqm> : MIMG <
+  (outs dst_rc:$vdata),
+  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
+       dmask:$dmask, unorm:$unorm, GLC:$glc, slc:$slc,
+       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
+  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da",
+  []>, MIMGe<op> {
+  let mayLoad = 1;
+  let mayStore = 0;
+
+  // DMASK was repurposed for GATHER4. 4 components are always
+  // returned and DMASK works like a swizzle - it selects
+  // the component to fetch. The only useful DMASK values are
+  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
+  // (red,red,red,red) etc.) The ISA document doesn't mention
+  // this.
+  // Therefore, disable all code which updates DMASK by setting this:
+  let Gather4 = 1;
+  let hasPostISelHook = 0;
+  let WQM = wqm;
+
+  let isAsmParserOnly = 1; // TBD: fix it later
+}
+
+multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
+                                    RegisterClass dst_rc,
+                                    int channels, bit wqm> {
+  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
+            MIMG_Mask<asm#"_V1", channels>;
+  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
+            MIMG_Mask<asm#"_V2", channels>;
+  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
+            MIMG_Mask<asm#"_V4", channels>;
+  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
+            MIMG_Mask<asm#"_V8", channels>;
+  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
+            MIMG_Mask<asm#"_V16", channels>;
+}
+
+multiclass MIMG_Gather <bits<7> op, string asm, bit wqm=0> {
+  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, wqm>;
+  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, wqm>;
+  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, wqm>;
+  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, wqm>;
+}
+
+multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
+
+//===----------------------------------------------------------------------===//
+// MIMG Instructions
+//===----------------------------------------------------------------------===//
+let SubtargetPredicate = isGCN in {
+defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
+defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
+//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
+//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
+//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
+//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
+defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">;
+defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">;
+//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
+//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
+defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
+defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">;
+defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimg<0x10, 0x11>, "image_atomic_cmpswap", VReg_64>;
+defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimg<0x11, 0x12>, "image_atomic_add">;
+defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimg<0x12, 0x13>, "image_atomic_sub">;
+//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; -- not on VI
+defm IMAGE_ATOMIC_SMIN : MIMG_Atomic <mimg<0x14>, "image_atomic_smin">;
+defm IMAGE_ATOMIC_UMIN : MIMG_Atomic <mimg<0x15>, "image_atomic_umin">;
+defm IMAGE_ATOMIC_SMAX : MIMG_Atomic <mimg<0x16>, "image_atomic_smax">;
+defm IMAGE_ATOMIC_UMAX : MIMG_Atomic <mimg<0x17>, "image_atomic_umax">;
+defm IMAGE_ATOMIC_AND : MIMG_Atomic <mimg<0x18>, "image_atomic_and">;
+defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">;
+defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;
+defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;
+defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
+//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>; -- not on VI
+//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
+//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
+defm IMAGE_SAMPLE           : MIMG_Sampler_WQM <0x00000020, "image_sample">;
+defm IMAGE_SAMPLE_CL        : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
+defm IMAGE_SAMPLE_D         : MIMG_Sampler <0x00000022, "image_sample_d">;
+defm IMAGE_SAMPLE_D_CL      : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
+defm IMAGE_SAMPLE_L         : MIMG_Sampler <0x00000024, "image_sample_l">;
+defm IMAGE_SAMPLE_B         : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
+defm IMAGE_SAMPLE_B_CL      : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
+defm IMAGE_SAMPLE_LZ        : MIMG_Sampler <0x00000027, "image_sample_lz">;
+defm IMAGE_SAMPLE_C         : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
+defm IMAGE_SAMPLE_C_CL      : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
+defm IMAGE_SAMPLE_C_D       : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
+defm IMAGE_SAMPLE_C_D_CL    : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
+defm IMAGE_SAMPLE_C_L       : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
+defm IMAGE_SAMPLE_C_B       : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
+defm IMAGE_SAMPLE_C_B_CL    : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
+defm IMAGE_SAMPLE_C_LZ      : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
+defm IMAGE_SAMPLE_O         : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
+defm IMAGE_SAMPLE_CL_O      : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
+defm IMAGE_SAMPLE_D_O       : MIMG_Sampler <0x00000032, "image_sample_d_o">;
+defm IMAGE_SAMPLE_D_CL_O    : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
+defm IMAGE_SAMPLE_L_O       : MIMG_Sampler <0x00000034, "image_sample_l_o">;
+defm IMAGE_SAMPLE_B_O       : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
+defm IMAGE_SAMPLE_B_CL_O    : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
+defm IMAGE_SAMPLE_LZ_O      : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
+defm IMAGE_SAMPLE_C_O       : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
+defm IMAGE_SAMPLE_C_CL_O    : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
+defm IMAGE_SAMPLE_C_D_O     : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
+defm IMAGE_SAMPLE_C_D_CL_O  : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
+defm IMAGE_SAMPLE_C_L_O     : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
+defm IMAGE_SAMPLE_C_B_O     : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
+defm IMAGE_SAMPLE_C_B_CL_O  : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
+defm IMAGE_SAMPLE_C_LZ_O    : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
+defm IMAGE_GATHER4          : MIMG_Gather_WQM <0x00000040, "image_gather4">;
+defm IMAGE_GATHER4_CL       : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
+defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "image_gather4_l">;
+defm IMAGE_GATHER4_B        : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
+defm IMAGE_GATHER4_B_CL     : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
+defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "image_gather4_lz">;
+defm IMAGE_GATHER4_C        : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
+defm IMAGE_GATHER4_C_CL     : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
+defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
+defm IMAGE_GATHER4_C_B      : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
+defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
+defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
+defm IMAGE_GATHER4_O        : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
+defm IMAGE_GATHER4_CL_O     : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
+defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "image_gather4_l_o">;
+defm IMAGE_GATHER4_B_O      : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
+defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
+defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
+defm IMAGE_GATHER4_C_O      : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
+defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
+defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
+defm IMAGE_GATHER4_C_B_O    : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
+defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
+defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
+defm IMAGE_GET_LOD          : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
+defm IMAGE_SAMPLE_CD        : MIMG_Sampler <0x00000068, "image_sample_cd">;
+defm IMAGE_SAMPLE_CD_CL     : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
+defm IMAGE_SAMPLE_C_CD      : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
+defm IMAGE_SAMPLE_C_CD_CL   : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
+defm IMAGE_SAMPLE_CD_O      : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
+defm IMAGE_SAMPLE_CD_CL_O   : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
+defm IMAGE_SAMPLE_C_CD_O    : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
+defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
+//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
+//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
+}
+
+/********** ======================= **********/
+/********** Image sampling patterns **********/
+/********** ======================= **********/
+
+// Image + sampler
+class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
+        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
+  (opcode $addr, $rsrc, $sampler,
+          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
+          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da))
+>;
+
+multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>;
+  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>;
+}
+
+// Image + sampler for amdgcn
+// TODO:
+// 1. Handle half data type like v4f16, and add D16 bit support;
+// 2. Handle v4i32 rsrc type (Register Class for the instruction to be SReg_128).
+// 3. Add A16 support when we pass address of half type.
+multiclass AMDGCNSamplePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt>  {
+  def : Pat<
+    (dt (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i1:$unorm, i1:$glc,
+        i1:$slc, i1:$lwe, i1:$da)),
+    (opcode $addr, $rsrc, $sampler,
+          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
+          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+    >;
+}
+
+multiclass AMDGCNSampleDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
+  defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V1), dt, f32>;
+  defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V2), dt, v2f32>;
+  defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V4), dt, v4f32>;
+  defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V8), dt, v8f32>;
+  defm : AMDGCNSamplePattern<name, !cast<MIMG>(opcode # _V16), dt, v16f32>;
+}
+
+// TODO: support v3f32.
+multiclass AMDGCNSamplePatterns<SDPatternOperator name, string opcode> {
+  defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V1), f32>;
+  defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
+  defm : AMDGCNSampleDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+}
+
+// Image only
+class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm,
+        imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe),
+  (opcode $addr, $rsrc,
+          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
+          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da))
+>;
+
+multiclass ImagePatterns<SDPatternOperator name, string opcode> {
+  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
+  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
+  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
+}
+
+multiclass ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+  def : Pat <
+    (dt (name vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc, i1:$lwe,
+                i1:$da)),
+    (opcode $addr, $rsrc,
+          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
+          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+  >;
+}
+
+multiclass ImageLoadDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V1), dt, i32>;
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>;
+  defm : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>;
+}
+
+// TODO: support v3f32.
+multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V1), f32>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
+  defm : ImageLoadDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+}
+
+multiclass ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType dt, ValueType vt> {
+  def : Pat <
+    (name dt:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, i1:$glc, i1:$slc,
+          i1:$lwe, i1:$da),
+    (opcode $data, $addr, $rsrc,
+          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
+          0, 0, (as_i1imm $lwe), (as_i1imm $da))
+  >;
+}
+
+multiclass ImageStoreDataPatterns<SDPatternOperator name, string opcode, ValueType dt> {
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V1), dt, i32>;
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V2), dt, v2i32>;
+  defm : ImageStorePattern<name, !cast<MIMG>(opcode # _V4), dt, v4i32>;
+}
+
+// TODO: support v3f32.
+multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V1), f32>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V2), v2f32>;
+  defm : ImageStoreDataPatterns<name, !cast<string>(opcode # _V4), v4f32>;
+}
+
+class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
+  (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc),
+  (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da))
+>;
+
+multiclass ImageAtomicPatterns<SDPatternOperator name, string opcode> {
+  def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1), i32>;
+  def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V2), v2i32>;
+  def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V4), v4i32>;
+}
+
+class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : Pat <
+  (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc,
+                                   imm:$r128, imm:$da, imm:$slc),
+  (EXTRACT_SUBREG
+    (opcode (REG_SEQUENCE VReg_64, $vsrc, sub0, $vcmp, sub1),
+            $addr, $rsrc, 3, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)),
+    sub0)
+>;
+
+// ======= SI Image Intrinsics ================
+
+// Image load
+defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
+defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
+def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
+
+// Basic sample
+defm : SampleRawPatterns<int_SI_image_sample,           "IMAGE_SAMPLE">;
+defm : SampleRawPatterns<int_SI_image_sample_cl,        "IMAGE_SAMPLE_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_d,         "IMAGE_SAMPLE_D">;
+defm : SampleRawPatterns<int_SI_image_sample_d_cl,      "IMAGE_SAMPLE_D_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_l,         "IMAGE_SAMPLE_L">;
+defm : SampleRawPatterns<int_SI_image_sample_b,         "IMAGE_SAMPLE_B">;
+defm : SampleRawPatterns<int_SI_image_sample_b_cl,      "IMAGE_SAMPLE_B_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_lz,        "IMAGE_SAMPLE_LZ">;
+defm : SampleRawPatterns<int_SI_image_sample_cd,        "IMAGE_SAMPLE_CD">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_cl,     "IMAGE_SAMPLE_CD_CL">;
+
+// Sample with comparison
+defm : SampleRawPatterns<int_SI_image_sample_c,         "IMAGE_SAMPLE_C">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cl,      "IMAGE_SAMPLE_C_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d,       "IMAGE_SAMPLE_C_D">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_cl,    "IMAGE_SAMPLE_C_D_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_l,       "IMAGE_SAMPLE_C_L">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b,       "IMAGE_SAMPLE_C_B">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_cl,    "IMAGE_SAMPLE_C_B_CL">;
+defm : SampleRawPatterns<int_SI_image_sample_c_lz,      "IMAGE_SAMPLE_C_LZ">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd,      "IMAGE_SAMPLE_C_CD">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl,   "IMAGE_SAMPLE_C_CD_CL">;
+
+// Sample with offsets
+defm : SampleRawPatterns<int_SI_image_sample_o,         "IMAGE_SAMPLE_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cl_o,      "IMAGE_SAMPLE_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_d_o,       "IMAGE_SAMPLE_D_O">;
+defm : SampleRawPatterns<int_SI_image_sample_d_cl_o,    "IMAGE_SAMPLE_D_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_l_o,       "IMAGE_SAMPLE_L_O">;
+defm : SampleRawPatterns<int_SI_image_sample_b_o,       "IMAGE_SAMPLE_B_O">;
+defm : SampleRawPatterns<int_SI_image_sample_b_cl_o,    "IMAGE_SAMPLE_B_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_lz_o,      "IMAGE_SAMPLE_LZ_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_o,      "IMAGE_SAMPLE_CD_O">;
+defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o,   "IMAGE_SAMPLE_CD_CL_O">;
+
+// Sample with comparison and offsets
+defm : SampleRawPatterns<int_SI_image_sample_c_o,       "IMAGE_SAMPLE_C_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cl_o,    "IMAGE_SAMPLE_C_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_o,     "IMAGE_SAMPLE_C_D_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o,  "IMAGE_SAMPLE_C_D_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_l_o,     "IMAGE_SAMPLE_C_L_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_o,     "IMAGE_SAMPLE_C_B_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o,  "IMAGE_SAMPLE_C_B_CL_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_lz_o,    "IMAGE_SAMPLE_C_LZ_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_o,    "IMAGE_SAMPLE_C_CD_O">;
+defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
+
+// Gather opcodes
+// Only the variants which make sense are defined.
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
+def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
+def : SampleRawPattern<int_SI_gather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
+def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
+def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
+
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
+def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
+
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
+def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
+
+// ======= amdgcn Image Intrinsics ==============
+
+// Image load
+defm : ImageLoadPatterns<int_amdgcn_image_load, "IMAGE_LOAD">;
+defm : ImageLoadPatterns<int_amdgcn_image_load_mip, "IMAGE_LOAD_MIP">;
+defm : ImageLoadPatterns<int_amdgcn_image_getresinfo, "IMAGE_GET_RESINFO">;
+
+// Image store
+defm : ImageStorePatterns<int_amdgcn_image_store, "IMAGE_STORE">;
+defm : ImageStorePatterns<int_amdgcn_image_store_mip, "IMAGE_STORE_MIP">;
+
+// Basic sample
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample,           "IMAGE_SAMPLE">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cl,        "IMAGE_SAMPLE_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d,         "IMAGE_SAMPLE_D">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_cl,      "IMAGE_SAMPLE_D_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_l,         "IMAGE_SAMPLE_L">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b,         "IMAGE_SAMPLE_B">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_cl,      "IMAGE_SAMPLE_B_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_lz,        "IMAGE_SAMPLE_LZ">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd,        "IMAGE_SAMPLE_CD">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_cl,     "IMAGE_SAMPLE_CD_CL">;
+
+// Sample with comparison
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c,         "IMAGE_SAMPLE_C">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cl,      "IMAGE_SAMPLE_C_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d,       "IMAGE_SAMPLE_C_D">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_cl,    "IMAGE_SAMPLE_C_D_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_l,       "IMAGE_SAMPLE_C_L">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b,       "IMAGE_SAMPLE_C_B">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_cl,    "IMAGE_SAMPLE_C_B_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_lz,      "IMAGE_SAMPLE_C_LZ">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd,      "IMAGE_SAMPLE_C_CD">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl,   "IMAGE_SAMPLE_C_CD_CL">;
+
+// Sample with offsets
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_o,         "IMAGE_SAMPLE_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cl_o,      "IMAGE_SAMPLE_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_o,       "IMAGE_SAMPLE_D_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_d_cl_o,    "IMAGE_SAMPLE_D_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_l_o,       "IMAGE_SAMPLE_L_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_o,       "IMAGE_SAMPLE_B_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_b_cl_o,    "IMAGE_SAMPLE_B_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_lz_o,      "IMAGE_SAMPLE_LZ_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_o,      "IMAGE_SAMPLE_CD_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_cd_cl_o,   "IMAGE_SAMPLE_CD_CL_O">;
+
+// Sample with comparison and offsets
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_o,       "IMAGE_SAMPLE_C_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cl_o,    "IMAGE_SAMPLE_C_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_o,     "IMAGE_SAMPLE_C_D_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_d_cl_o,  "IMAGE_SAMPLE_C_D_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_l_o,     "IMAGE_SAMPLE_C_L_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_o,     "IMAGE_SAMPLE_C_B_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_b_cl_o,  "IMAGE_SAMPLE_C_B_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_lz_o,    "IMAGE_SAMPLE_C_LZ_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_o,    "IMAGE_SAMPLE_C_CD_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
+
+// Gather opcodes
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4,           "IMAGE_GATHER4">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl,        "IMAGE_GATHER4_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l,         "IMAGE_GATHER4_L">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b,         "IMAGE_GATHER4_B">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl,      "IMAGE_GATHER4_B_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz,        "IMAGE_GATHER4_LZ">;
+
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c,         "IMAGE_GATHER4_C">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl,      "IMAGE_GATHER4_C_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l,       "IMAGE_GATHER4_C_L">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b,       "IMAGE_GATHER4_C_B">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl,    "IMAGE_GATHER4_C_B_CL">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz,      "IMAGE_GATHER4_C_LZ">;
+
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_o,         "IMAGE_GATHER4_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_cl_o,      "IMAGE_GATHER4_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_l_o,       "IMAGE_GATHER4_L_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_o,       "IMAGE_GATHER4_B_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_b_cl_o,    "IMAGE_GATHER4_B_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_lz_o,      "IMAGE_GATHER4_LZ_O">;
+
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_o,       "IMAGE_GATHER4_C_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_cl_o,    "IMAGE_GATHER4_C_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_l_o,     "IMAGE_GATHER4_C_L_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_o,     "IMAGE_GATHER4_C_B_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_b_cl_o,  "IMAGE_GATHER4_C_B_CL_O">;
+defm : AMDGCNSamplePatterns<int_amdgcn_image_gather4_c_lz_o,    "IMAGE_GATHER4_C_LZ_O">;
+
+defm : AMDGCNSamplePatterns<int_amdgcn_image_getlod, "IMAGE_GET_LOD">;
+
+// Image atomics
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">;
+def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1, i32>;
+def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V2, v2i32>;
+def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V4, v4i32>;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_add, "IMAGE_ATOMIC_ADD">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_sub, "IMAGE_ATOMIC_SUB">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_smin, "IMAGE_ATOMIC_SMIN">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_umin, "IMAGE_ATOMIC_UMIN">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_smax, "IMAGE_ATOMIC_SMAX">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_umax, "IMAGE_ATOMIC_UMAX">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_and, "IMAGE_ATOMIC_AND">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_or, "IMAGE_ATOMIC_OR">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_xor, "IMAGE_ATOMIC_XOR">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_inc, "IMAGE_ATOMIC_INC">;
+defm : ImageAtomicPatterns<int_amdgcn_image_atomic_dec, "IMAGE_ATOMIC_DEC">;
+
+/* SIsample for simple 1D texture lookup */
+def : Pat <
+  (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
+  (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
+>;
+
+class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
+    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
+>;
+
+class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
+    (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0)
+>;
+
+class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
+    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
+    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
+>;
+
+class SampleShadowPattern<SDNode name, MIMG opcode,
+                          ValueType vt> : Pat <
+    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
+    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
+>;
+
+class SampleShadowArrayPattern<SDNode name, MIMG opcode,
+                               ValueType vt> : Pat <
+    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
+    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
+>;
+
+/* SIsample* for texture lookups consuming more address parameters */
+multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
+                          MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
+MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
+  def : SamplePattern <SIsample, sample, addr_type>;
+  def : SampleRectPattern <SIsample, sample, addr_type>;
+  def : SampleArrayPattern <SIsample, sample, addr_type>;
+  def : SampleShadowPattern <SIsample, sample_c, addr_type>;
+  def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
+
+  def : SamplePattern <SIsamplel, sample_l, addr_type>;
+  def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
+  def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
+  def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
+
+  def : SamplePattern <SIsampleb, sample_b, addr_type>;
+  def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
+  def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
+  def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
+
+  def : SamplePattern <SIsampled, sample_d, addr_type>;
+  def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
+  def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
+  def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
+}
+
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
+                      IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
+                      IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
+                      IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
+                      v2i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
+                      IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
+                      IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
+                      IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
+                      v4i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
+                      IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
+                      IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
+                      IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
+                      v8i32>;
+defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
+                      IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
+                      IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
+                      IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
+                      v16i32>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/Processors.td b/contrib/llvm/lib/Target/AMDGPU/Processors.td
index f5f1eb14e993..3c07cc76b9a1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Processors.td
+++ b/contrib/llvm/lib/Target/AMDGPU/Processors.td
@@ -101,55 +101,89 @@ def : ProcessorModel<"hainan",   SIQuarterSpeedModel, [FeatureSouthernIslands]>;
 //===----------------------------------------------------------------------===//
 
 def : ProcessorModel<"bonaire",    SIQuarterSpeedModel,
-  [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
+  [FeatureISAVersion7_0_0]
 >;
 
 def : ProcessorModel<"kabini",     SIQuarterSpeedModel,
-  [FeatureSeaIslands, FeatureLDSBankCount16]
+  [FeatureISAVersion7_0_2]
 >;
 
 def : ProcessorModel<"kaveri",     SIQuarterSpeedModel,
-  [FeatureSeaIslands, FeatureLDSBankCount32, FeatureISAVersion7_0_0]
+  [FeatureISAVersion7_0_0]
 >;
 
-def : ProcessorModel<"hawaii", SIFullSpeedModel,
-  [FeatureSeaIslands, FeatureFastFMAF32, HalfRate64Ops,
-   FeatureLDSBankCount32, FeatureISAVersion7_0_1]
+def : ProcessorModel<"hawaii",     SIFullSpeedModel,
+  [FeatureISAVersion7_0_1]
 >;
 
 def : ProcessorModel<"mullins",    SIQuarterSpeedModel,
-  [FeatureSeaIslands, FeatureLDSBankCount16]>;
+  [FeatureISAVersion7_0_2]>;
+
+def : ProcessorModel<"gfx700",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_0]
+>;
+
+def : ProcessorModel<"gfx701",     SIFullSpeedModel,
+  [FeatureISAVersion7_0_1]
+>;
+
+def : ProcessorModel<"gfx702",     SIQuarterSpeedModel,
+  [FeatureISAVersion7_0_2]
+>;
 
 //===----------------------------------------------------------------------===//
 // Volcanic Islands
 //===----------------------------------------------------------------------===//
 
 def : ProcessorModel<"tonga",   SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0,
-   FeatureLDSBankCount32]
+  [FeatureISAVersion8_0_2]
 >;
 
 def : ProcessorModel<"iceland", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureSGPRInitBug, FeatureISAVersion8_0_0,
-   FeatureLDSBankCount32]
+  [FeatureISAVersion8_0_0]
 >;
 
 def : ProcessorModel<"carrizo", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount32]
+  [FeatureISAVersion8_0_1]
 >;
 
-def : ProcessorModel<"fiji", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureISAVersion8_0_3, FeatureLDSBankCount32]
+def : ProcessorModel<"fiji",    SIQuarterSpeedModel,
+  [FeatureISAVersion8_0_3]
 >;
 
-def : ProcessorModel<"stoney", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount16]
+def : ProcessorModel<"stoney",  SIQuarterSpeedModel,
+  [FeatureISAVersion8_1_0]
 >;
 
 def : ProcessorModel<"polaris10", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount32]
+  [FeatureISAVersion8_0_3]
 >;
 
 def : ProcessorModel<"polaris11", SIQuarterSpeedModel,
-  [FeatureVolcanicIslands, FeatureISAVersion8_0_1, FeatureLDSBankCount32]
+  [FeatureISAVersion8_0_3]
+>;
+
+def : ProcessorModel<"gfx800", SIQuarterSpeedModel,
+  [FeatureISAVersion8_0_0]
+>;
+
+def : ProcessorModel<"gfx801", SIQuarterSpeedModel,
+  [FeatureISAVersion8_0_1]
 >;
+
+def : ProcessorModel<"gfx802", SIQuarterSpeedModel,
+  [FeatureISAVersion8_0_2]
+>;
+
+def : ProcessorModel<"gfx803", SIQuarterSpeedModel,
+  [FeatureISAVersion8_0_3]
+>;
+
+def : ProcessorModel<"gfx804", SIQuarterSpeedModel,
+  [FeatureISAVersion8_0_4]
+>;
+
+def : ProcessorModel<"gfx810", SIQuarterSpeedModel,
+  [FeatureISAVersion8_1_0]
+>;
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
index 3ccde79e2df4..d0aba38f786d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ClauseMergePass.cpp
@@ -66,7 +66,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
 };
 
 char R600ClauseMergePass::ID = 0;
@@ -201,7 +201,7 @@ bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
   return false;
 }
 
-const char *R600ClauseMergePass::getPassName() const {
+StringRef R600ClauseMergePass::getPassName() const {
   return "R600 Merge Clause Markers Pass";
 }
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
index d5bda4a8303e..45b36d3d3ebb 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ControlFlowFinalizer.cpp
@@ -354,10 +354,10 @@ private:
       if (Src.first->getReg() != AMDGPU::ALU_LITERAL_X)
         continue;
       int64_t Imm = Src.second;
-      std::vector<MachineOperand*>::iterator It =
-          std::find_if(Lits.begin(), Lits.end(),
-                    [&](MachineOperand* val)
-                        { return val->isImm() && (val->getImm() == Imm);});
+      std::vector<MachineOperand *>::iterator It =
+          find_if(Lits, [&](MachineOperand *val) {
+            return val->isImm() && (val->getImm() == Imm);
+          });
 
       // Get corresponding Operand
       MachineOperand &Operand = MI.getOperand(
@@ -450,27 +450,24 @@ private:
     return ClauseFile(&ClauseHead, std::move(ClauseContent));
   }
 
-  void
-  EmitFetchClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
-      unsigned &CfCount) {
+  void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
+                       const DebugLoc &DL, ClauseFile &Clause,
+                       unsigned &CfCount) {
     CounterPropagateAddr(*Clause.first, CfCount);
     MachineBasicBlock *BB = Clause.first->getParent();
-    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::FETCH_CLAUSE))
-        .addImm(CfCount);
+    BuildMI(BB, DL, TII->get(AMDGPU::FETCH_CLAUSE)).addImm(CfCount);
     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
       BB->splice(InsertPos, BB, Clause.second[i]);
     }
     CfCount += 2 * Clause.second.size();
   }
 
-  void
-  EmitALUClause(MachineBasicBlock::iterator InsertPos, ClauseFile &Clause,
-      unsigned &CfCount) {
+  void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
+                     ClauseFile &Clause, unsigned &CfCount) {
     Clause.first->getOperand(0).setImm(0);
     CounterPropagateAddr(*Clause.first, CfCount);
     MachineBasicBlock *BB = Clause.first->getParent();
-    BuildMI(BB, InsertPos->getDebugLoc(), TII->get(AMDGPU::ALU_CLAUSE))
-        .addImm(CfCount);
+    BuildMI(BB, DL, TII->get(AMDGPU::ALU_CLAUSE)).addImm(CfCount);
     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
       BB->splice(InsertPos, BB, Clause.second[i]);
     }
@@ -644,17 +641,18 @@ public:
           break;
         }
         case AMDGPU::RETURN: {
-          BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END));
+          DebugLoc DL = MBB.findDebugLoc(MI);
+          BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
           CfCount++;
           if (CfCount % 2) {
-            BuildMI(MBB, I, MBB.findDebugLoc(MI), TII->get(AMDGPU::PAD));
+            BuildMI(MBB, I, DL, TII->get(AMDGPU::PAD));
             CfCount++;
           }
           MI->eraseFromParent();
           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
-            EmitFetchClause(I, FetchClauses[i], CfCount);
+            EmitFetchClause(I, DL, FetchClauses[i], CfCount);
           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
-            EmitALUClause(I, AluClauses[i], CfCount);
+            EmitALUClause(I, DL, AluClauses[i], CfCount);
           break;
         }
         default:
@@ -680,13 +678,13 @@ public:
             .addImm(Alu->getOperand(8).getImm());
         Alu->eraseFromParent();
       }
-      MFI->StackSize = CFStack.MaxStackSize;
+      MFI->CFStackSize = CFStack.MaxStackSize;
     }
 
     return false;
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "R600 Control Flow Finalizer Pass";
   }
 };
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
index 93ed5be94a54..9a5db6ccc672 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600EmitClauseMarkers.cpp
@@ -307,7 +307,7 @@ public:
                                                     BB != BB_E; ++BB) {
       MachineBasicBlock &MBB = *BB;
       MachineBasicBlock::iterator I = MBB.begin();
-      if (I->getOpcode() == AMDGPU::CF_ALU)
+      if (I != MBB.end() && I->getOpcode() == AMDGPU::CF_ALU)
         continue; // BB was already parsed
       for (MachineBasicBlock::iterator E = MBB.end(); I != E;) {
         if (isALU(*I))
@@ -319,7 +319,7 @@ public:
     return false;
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "R600 Emit Clause Markers Pass";
   }
 };
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
index 0385b6283f37..3e46e6387614 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ExpandSpecialInstrs.cpp
@@ -42,7 +42,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "R600 Expand special instructions pass";
   }
 };
@@ -116,85 +116,6 @@ bool R600ExpandSpecialInstrsPass::runOnMachineFunction(MachineFunction &MF) {
         MI.eraseFromParent();
         continue;
         }
-
-      case AMDGPU::INTERP_PAIR_XY: {
-        MachineInstr *BMI;
-        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
-                MI.getOperand(2).getImm());
-
-        for (unsigned Chan = 0; Chan < 4; ++Chan) {
-          unsigned DstReg;
-
-          if (Chan < 2)
-            DstReg = MI.getOperand(Chan).getReg();
-          else
-            DstReg = Chan == 2 ? AMDGPU::T0_Z : AMDGPU::T0_W;
-
-          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_XY,
-              DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
-
-          if (Chan > 0) {
-            BMI->bundleWithPred();
-          }
-          if (Chan >= 2)
-            TII->addFlag(*BMI, 0, MO_FLAG_MASK);
-          if (Chan != 3)
-            TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
-        }
-
-        MI.eraseFromParent();
-        continue;
-        }
-
-      case AMDGPU::INTERP_PAIR_ZW: {
-        MachineInstr *BMI;
-        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
-                MI.getOperand(2).getImm());
-
-        for (unsigned Chan = 0; Chan < 4; ++Chan) {
-          unsigned DstReg;
-
-          if (Chan < 2)
-            DstReg = Chan == 0 ? AMDGPU::T0_X : AMDGPU::T0_Y;
-          else
-            DstReg = MI.getOperand(Chan-2).getReg();
-
-          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_ZW,
-              DstReg, MI.getOperand(3 + (Chan % 2)).getReg(), PReg);
-
-          if (Chan > 0) {
-            BMI->bundleWithPred();
-          }
-          if (Chan < 2)
-            TII->addFlag(*BMI, 0, MO_FLAG_MASK);
-          if (Chan != 3)
-            TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
-        }
-
-        MI.eraseFromParent();
-        continue;
-        }
-
-      case AMDGPU::INTERP_VEC_LOAD: {
-        const R600RegisterInfo &TRI = TII->getRegisterInfo();
-        MachineInstr *BMI;
-        unsigned PReg = AMDGPU::R600_ArrayBaseRegClass.getRegister(
-                MI.getOperand(1).getImm());
-        unsigned DstReg = MI.getOperand(0).getReg();
-
-        for (unsigned Chan = 0; Chan < 4; ++Chan) {
-          BMI = TII->buildDefaultInstruction(MBB, I, AMDGPU::INTERP_LOAD_P0,
-              TRI.getSubReg(DstReg, TRI.getSubRegFromChannel(Chan)), PReg);
-          if (Chan > 0) {
-            BMI->bundleWithPred();
-          }
-          if (Chan != 3)
-            TII->addFlag(*BMI, 0, MO_FLAG_NOT_LAST);
-        }
-
-        MI.eraseFromParent();
-        continue;
-        }
       case AMDGPU::DOT_4: {
 
         const R600RegisterInfo &TRI = TII->getRegisterInfo();
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
index dd5681ff5e8b..5813786abe01 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.cpp
@@ -11,5 +11,4 @@
 
 using namespace llvm;
 
-R600FrameLowering::~R600FrameLowering() {
-}
+R600FrameLowering::~R600FrameLowering() = default;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.h
index 5fe4e0d201ac..874435f35ce4 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600FrameLowering.h
@@ -19,12 +19,14 @@ public:
   R600FrameLowering(StackDirection D, unsigned StackAl, int LAO,
                     unsigned TransAl = 1) :
     AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
-  virtual ~R600FrameLowering();
+  ~R600FrameLowering() override;
 
-  void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const {}
-  void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {}
+  void emitPrologue(MachineFunction &MF,
+                    MachineBasicBlock &MBB) const override {}
+  void emitEpilogue(MachineFunction &MF,
+                    MachineBasicBlock &MBB) const override {}
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_R600FRAMELOWERING_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
index 8ccd176930a6..89c9266746ac 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600ISelLowering.cpp
@@ -17,16 +17,36 @@
 #include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
+#include "R600FrameLowering.h"
 #include "R600InstrInfo.h"
 #include "R600MachineFunctionInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "Utils/AMDGPUBaseInfo.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
-#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/IR/Argument.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/ErrorHandling.h"
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -72,7 +92,6 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i1, Expand);
 
-
   setOperationAction(ISD::STORE, MVT::i8, Custom);
   setOperationAction(ISD::STORE, MVT::i32, Custom);
   setOperationAction(ISD::STORE, MVT::v2i32, Custom);
@@ -192,12 +211,12 @@ R600TargetLowering::R600TargetLowering(const TargetMachine &TM,
 
   setSchedulingPreference(Sched::Source);
 
-
   setTargetDAGCombine(ISD::FP_ROUND);
   setTargetDAGCombine(ISD::FP_TO_SINT);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::SELECT_CC);
   setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
+  setTargetDAGCombine(ISD::LOAD);
 }
 
 const R600Subtarget *R600TargetLowering::getSubtarget() const {
@@ -205,13 +224,15 @@ const R600Subtarget *R600TargetLowering::getSubtarget() const {
 }
 
 static inline bool isEOP(MachineBasicBlock::iterator I) {
+  if (std::next(I) == I->getParent()->end())
+    return false;
   return std::next(I)->getOpcode() == AMDGPU::RETURN;
 }
 
 MachineBasicBlock *
 R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                 MachineBasicBlock *BB) const {
-  MachineFunction * MF = BB->getParent();
+  MachineFunction *MF = BB->getParent();
   MachineRegisterInfo &MRI = MF->getRegInfo();
   MachineBasicBlock::iterator I = MI;
   const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
@@ -278,10 +299,12 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                             .bitcastToAPInt()
                                                             .getZExtValue());
     break;
+
   case AMDGPU::MOV_IMM_I32:
     TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
                      MI.getOperand(1).getImm());
     break;
+
   case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
     //TODO: Perhaps combine this instruction with the next if possible
     auto MIB = TII->buildDefaultInstruction(
@@ -291,6 +314,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     MIB->getOperand(Idx) = MI.getOperand(1);
     break;
   }
+
   case AMDGPU::CONST_COPY: {
     MachineInstr *NewMI = TII->buildDefaultInstruction(
         *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
@@ -301,228 +325,20 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
   case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
   case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
-  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
+  case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
         .addOperand(MI.getOperand(0))
         .addOperand(MI.getOperand(1))
         .addImm(isEOP(I)); // Set End of program bit
     break;
-  }
-  case AMDGPU::RAT_STORE_TYPED_eg: {
+
+  case AMDGPU::RAT_STORE_TYPED_eg:
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
         .addOperand(MI.getOperand(0))
         .addOperand(MI.getOperand(1))
         .addOperand(MI.getOperand(2))
         .addImm(isEOP(I)); // Set End of program bit
     break;
-  }
-
-  case AMDGPU::TXD: {
-    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-    MachineOperand &RID = MI.getOperand(4);
-    MachineOperand &SID = MI.getOperand(5);
-    unsigned TextureId = MI.getOperand(6).getImm();
-    unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
-    unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
-
-    switch (TextureId) {
-    case 5: // Rect
-      CTX = CTY = 0;
-      break;
-    case 6: // Shadow1D
-      SrcW = SrcZ;
-      break;
-    case 7: // Shadow2D
-      SrcW = SrcZ;
-      break;
-    case 8: // ShadowRect
-      CTX = CTY = 0;
-      SrcW = SrcZ;
-      break;
-    case 9: // 1DArray
-      SrcZ = SrcY;
-      CTZ = 0;
-      break;
-    case 10: // 2DArray
-      CTZ = 0;
-      break;
-    case 11: // Shadow1DArray
-      SrcZ = SrcY;
-      CTZ = 0;
-      break;
-    case 12: // Shadow2DArray
-      CTZ = 0;
-      break;
-    }
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
-            T0)
-        .addOperand(MI.getOperand(3))
-        .addImm(SrcX)
-        .addImm(SrcY)
-        .addImm(SrcZ)
-        .addImm(SrcW)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(1)
-        .addImm(2)
-        .addImm(3)
-        .addOperand(RID)
-        .addOperand(SID)
-        .addImm(CTX)
-        .addImm(CTY)
-        .addImm(CTZ)
-        .addImm(CTW);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
-            T1)
-        .addOperand(MI.getOperand(2))
-        .addImm(SrcX)
-        .addImm(SrcY)
-        .addImm(SrcZ)
-        .addImm(SrcW)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(1)
-        .addImm(2)
-        .addImm(3)
-        .addOperand(RID)
-        .addOperand(SID)
-        .addImm(CTX)
-        .addImm(CTY)
-        .addImm(CTZ)
-        .addImm(CTW);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
-        .addOperand(MI.getOperand(0))
-        .addOperand(MI.getOperand(1))
-        .addImm(SrcX)
-        .addImm(SrcY)
-        .addImm(SrcZ)
-        .addImm(SrcW)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(1)
-        .addImm(2)
-        .addImm(3)
-        .addOperand(RID)
-        .addOperand(SID)
-        .addImm(CTX)
-        .addImm(CTY)
-        .addImm(CTZ)
-        .addImm(CTW)
-        .addReg(T0, RegState::Implicit)
-        .addReg(T1, RegState::Implicit);
-    break;
-  }
-
-  case AMDGPU::TXD_SHADOW: {
-    unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-    unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
-    MachineOperand &RID = MI.getOperand(4);
-    MachineOperand &SID = MI.getOperand(5);
-    unsigned TextureId = MI.getOperand(6).getImm();
-    unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
-    unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
-
-    switch (TextureId) {
-    case 5: // Rect
-      CTX = CTY = 0;
-      break;
-    case 6: // Shadow1D
-      SrcW = SrcZ;
-      break;
-    case 7: // Shadow2D
-      SrcW = SrcZ;
-      break;
-    case 8: // ShadowRect
-      CTX = CTY = 0;
-      SrcW = SrcZ;
-      break;
-    case 9: // 1DArray
-      SrcZ = SrcY;
-      CTZ = 0;
-      break;
-    case 10: // 2DArray
-      CTZ = 0;
-      break;
-    case 11: // Shadow1DArray
-      SrcZ = SrcY;
-      CTZ = 0;
-      break;
-    case 12: // Shadow2DArray
-      CTZ = 0;
-      break;
-    }
-
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H),
-            T0)
-        .addOperand(MI.getOperand(3))
-        .addImm(SrcX)
-        .addImm(SrcY)
-        .addImm(SrcZ)
-        .addImm(SrcW)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(1)
-        .addImm(2)
-        .addImm(3)
-        .addOperand(RID)
-        .addOperand(SID)
-        .addImm(CTX)
-        .addImm(CTY)
-        .addImm(CTZ)
-        .addImm(CTW);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V),
-            T1)
-        .addOperand(MI.getOperand(2))
-        .addImm(SrcX)
-        .addImm(SrcY)
-        .addImm(SrcZ)
-        .addImm(SrcW)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(1)
-        .addImm(2)
-        .addImm(3)
-        .addOperand(RID)
-        .addOperand(SID)
-        .addImm(CTX)
-        .addImm(CTY)
-        .addImm(CTZ)
-        .addImm(CTW);
-    BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
-        .addOperand(MI.getOperand(0))
-        .addOperand(MI.getOperand(1))
-        .addImm(SrcX)
-        .addImm(SrcY)
-        .addImm(SrcZ)
-        .addImm(SrcW)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(1)
-        .addImm(2)
-        .addImm(3)
-        .addOperand(RID)
-        .addOperand(SID)
-        .addImm(CTX)
-        .addImm(CTY)
-        .addImm(CTZ)
-        .addImm(CTW)
-        .addReg(T0, RegState::Implicit)
-        .addReg(T1, RegState::Implicit);
-    break;
-  }
 
   case AMDGPU::BRANCH:
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
@@ -534,7 +350,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
                 AMDGPU::PREDICATE_BIT)
             .addOperand(MI.getOperand(1))
-            .addImm(OPCODE_IS_NOT_ZERO)
+            .addImm(AMDGPU::PRED_SETNE)
             .addImm(0); // Flags
     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
@@ -548,7 +364,7 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
         BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
                 AMDGPU::PREDICATE_BIT)
             .addOperand(MI.getOperand(1))
-            .addImm(OPCODE_IS_NOT_ZERO_INT)
+            .addImm(AMDGPU::PRED_SETNE_INT)
             .addImm(0); // Flags
     TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
     BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
@@ -592,12 +408,6 @@ R600TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     break;
   }
   case AMDGPU::RETURN: {
-    // RETURN instructions must have the live-out registers as implicit uses,
-    // otherwise they appear dead.
-    R600MachineFunctionInfo *MFI = MF->getInfo<R600MachineFunctionInfo>();
-    MachineInstrBuilder MIB(*MF, MI);
-    for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
-      MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
     return BB;
   }
   }
@@ -654,7 +464,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
         DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
         DAG.getConstant(3, DL, MVT::i32) // SWZ_W
       };
-      return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
+      return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
     }
 
     // default for switch(IntrinsicID)
@@ -671,15 +481,7 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     switch(IntrinsicID) {
     default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
     case AMDGPUIntrinsic::r600_tex:
-    case AMDGPUIntrinsic::r600_texc:
-    case AMDGPUIntrinsic::r600_txl:
-    case AMDGPUIntrinsic::r600_txlc:
-    case AMDGPUIntrinsic::r600_txb:
-    case AMDGPUIntrinsic::r600_txbc:
-    case AMDGPUIntrinsic::r600_txf:
-    case AMDGPUIntrinsic::r600_txq:
-    case AMDGPUIntrinsic::r600_ddx:
-    case AMDGPUIntrinsic::r600_ddy: {
+    case AMDGPUIntrinsic::r600_texc: {
       unsigned TextureOp;
       switch (IntrinsicID) {
       case AMDGPUIntrinsic::r600_tex:
@@ -688,32 +490,8 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
       case AMDGPUIntrinsic::r600_texc:
         TextureOp = 1;
         break;
-      case AMDGPUIntrinsic::r600_txl:
-        TextureOp = 2;
-        break;
-      case AMDGPUIntrinsic::r600_txlc:
-        TextureOp = 3;
-        break;
-      case AMDGPUIntrinsic::r600_txb:
-        TextureOp = 4;
-        break;
-      case AMDGPUIntrinsic::r600_txbc:
-        TextureOp = 5;
-        break;
-      case AMDGPUIntrinsic::r600_txf:
-        TextureOp = 6;
-        break;
-      case AMDGPUIntrinsic::r600_txq:
-        TextureOp = 7;
-        break;
-      case AMDGPUIntrinsic::r600_ddx:
-        TextureOp = 8;
-        break;
-      case AMDGPUIntrinsic::r600_ddy:
-        TextureOp = 9;
-        break;
       default:
-        llvm_unreachable("Unknow Texture Operation");
+        llvm_unreachable("unhandled texture operation");
       }
 
       SDValue TexArgs[19] = {
@@ -785,12 +563,6 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const
     case Intrinsic::r600_read_local_size_z:
       return LowerImplicitParameter(DAG, VT, DL, 8);
 
-    case Intrinsic::r600_read_workdim:
-    case AMDGPUIntrinsic::AMDGPU_read_workdim: { // Legacy name.
-      uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
-      return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
-    }
-
     case Intrinsic::r600_read_tgid_x:
       return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
                                   AMDGPU::T1_X, VT);
@@ -836,9 +608,10 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
       return;
     }
-    // Fall-through. Since we don't care about out of bounds values
-    // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
-    // considers some extra cases which are not necessary here.
+    // Since we don't care about out of bounds values we can use FP_TO_SINT for
+    // uints too. The DAGLegalizer code for uint considers some extra cases
+    // which are not necessary here.
+    LLVM_FALLTHROUGH;
   case ISD::FP_TO_SINT: {
     if (N->getValueType(0) == MVT::i1) {
       Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
@@ -867,14 +640,12 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N,
 
 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
                                                    SDValue Vector) const {
-
   SDLoc DL(Vector);
   EVT VecVT = Vector.getValueType();
   EVT EltVT = VecVT.getVectorElementType();
   SmallVector<SDValue, 8> Args;
 
-  for (unsigned i = 0, e = VecVT.getVectorNumElements();
-                                                           i != e; ++i) {
+  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
     Args.push_back(DAG.getNode(
         ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
         DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
@@ -885,7 +656,6 @@ SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
 
 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
                                                     SelectionDAG &DAG) const {
-
   SDLoc DL(Op);
   SDValue Vector = Op.getOperand(0);
   SDValue Index = Op.getOperand(1);
@@ -919,7 +689,6 @@ SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
                                                SDValue Op,
                                                SelectionDAG &DAG) const {
-
   GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
   if (GSD->getAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
     return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
@@ -1364,33 +1133,40 @@ SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
 }
 
 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
-  if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
-    return Result;
-
   StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
   unsigned AS = StoreNode->getAddressSpace();
   SDValue Value = StoreNode->getValue();
   EVT ValueVT = Value.getValueType();
+  EVT MemVT = StoreNode->getMemoryVT();
+  unsigned Align = StoreNode->getAlignment();
 
   if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
       ValueVT.isVector()) {
     return SplitVectorStore(Op, DAG);
   }
 
+  // Private AS needs special fixes
+  if (Align < MemVT.getStoreSize() && (AS != AMDGPUAS::PRIVATE_ADDRESS) &&
+      !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
+    return expandUnalignedStore(StoreNode, DAG);
+  }
+
   SDLoc DL(Op);
   SDValue Chain = StoreNode->getChain();
   SDValue Ptr = StoreNode->getBasePtr();
 
   if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
+    // It is beneficial to create MSKOR here instead of combiner to avoid
+    // artificial dependencies introduced by RMW
     if (StoreNode->isTruncatingStore()) {
       EVT VT = Value.getValueType();
       assert(VT.bitsLE(MVT::i32));
-      EVT MemVT = StoreNode->getMemoryVT();
       SDValue MaskConstant;
       if (MemVT == MVT::i8) {
         MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
       } else {
         assert(MemVT == MVT::i16);
+        assert(StoreNode->getAlignment() >= 2);
         MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
       }
       SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
@@ -1434,7 +1210,6 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
   if (AS != AMDGPUAS::PRIVATE_ADDRESS)
     return SDValue();
 
-  EVT MemVT = StoreNode->getMemoryVT();
   if (MemVT.bitsLT(MVT::i32))
     return lowerPrivateTruncStore(StoreNode, DAG);
 
@@ -1754,9 +1529,11 @@ SDValue R600TargetLowering::LowerFormalArguments(
 
   SmallVector<ISD::InputArg, 8> LocalIns;
 
-  getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
-
-  AnalyzeFormalArguments(CCInfo, LocalIns);
+  if (AMDGPU::isShader(CallConv)) {
+    AnalyzeFormalArguments(CCInfo, Ins);
+  } else {
+    analyzeFormalArgumentsCompute(CCInfo, Ins);
+  }
 
   for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
     CCValAssign &VA = ArgLocs[i];
@@ -1800,18 +1577,19 @@ SDValue R600TargetLowering::LowerFormalArguments(
 
     unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
     unsigned PartOffset = VA.getLocMemOffset();
-    unsigned Offset = 36 + VA.getLocMemOffset();
+    unsigned Offset = Subtarget->getExplicitKernelArgOffset() + VA.getLocMemOffset();
 
     MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
     SDValue Arg = DAG.getLoad(
         ISD::UNINDEXED, Ext, VT, DL, Chain,
         DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
-        MemVT, /* Alignment = */ 4,
-        MachineMemOperand::MONonTemporal | MachineMemOperand::MOInvariant);
+        MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
+                                        MachineMemOperand::MODereferenceable |
+                                        MachineMemOperand::MOInvariant);
 
     // 4 is the preferred alignment for the CONSTANT memory space.
     InVals.push_back(Arg);
-    MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
+    MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
   }
   return Chain;
 }
@@ -1949,7 +1727,6 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
   return BuildVector;
 }
 
-
 //===----------------------------------------------------------------------===//
 // Custom DAG Optimizations
 //===----------------------------------------------------------------------===//
@@ -1957,14 +1734,14 @@ SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
 SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
 
   switch (N->getOpcode()) {
-  default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
   // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
   case ISD::FP_ROUND: {
       SDValue Arg = N->getOperand(0);
       if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
-        return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
+        return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
                            Arg.getOperand(0));
       }
       break;
@@ -1989,12 +1766,11 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       return SDValue();
     }
 
-    SDLoc dl(N);
-    return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
+    return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
                            SelectCC.getOperand(0), // LHS
                            SelectCC.getOperand(1), // RHS
-                           DAG.getConstant(-1, dl, MVT::i32), // True
-                           DAG.getConstant(0, dl, MVT::i32),  // False
+                           DAG.getConstant(-1, DL, MVT::i32), // True
+                           DAG.getConstant(0, DL, MVT::i32),  // False
                            SelectCC.getOperand(4)); // CC
 
     break;
@@ -2006,7 +1782,6 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
     SDValue InVec = N->getOperand(0);
     SDValue InVal = N->getOperand(1);
     SDValue EltNo = N->getOperand(2);
-    SDLoc dl(N);
 
     // If the inserted element is an UNDEF, just use the input vector.
     if (InVal.isUndef())
@@ -2044,13 +1819,13 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       EVT OpVT = Ops[0].getValueType();
       if (InVal.getValueType() != OpVT)
         InVal = OpVT.bitsGT(InVal.getValueType()) ?
-          DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
-          DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
+          DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
+          DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
       Ops[Elt] = InVal;
     }
 
     // Return the new vector
-    return DAG.getBuildVector(VT, dl, Ops);
+    return DAG.getBuildVector(VT, DL, Ops);
   }
 
   // Extract_vec (Build_vector) generated by custom lowering
@@ -2064,11 +1839,13 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       }
     }
     if (Arg.getOpcode() == ISD::BITCAST &&
-        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
+        Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
+        (Arg.getOperand(0).getValueType().getVectorNumElements() ==
+         Arg.getValueType().getVectorNumElements())) {
       if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
         unsigned Element = Const->getZExtValue();
-        return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
-            Arg->getOperand(0).getOperand(Element));
+        return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
+                           Arg->getOperand(0).getOperand(Element));
       }
     }
     break;
@@ -2109,7 +1886,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
                                   LHS.getOperand(0).getValueType().isInteger());
       if (DCI.isBeforeLegalizeOps() ||
           isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
-        return DAG.getSelectCC(SDLoc(N),
+        return DAG.getSelectCC(DL,
                                LHS.getOperand(0),
                                LHS.getOperand(1),
                                LHS.getOperand(2),
@@ -2121,7 +1898,7 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
     return SDValue();
   }
 
-  case AMDGPUISD::EXPORT: {
+  case AMDGPUISD::R600_EXPORT: {
     SDValue Arg = N->getOperand(1);
     if (Arg.getOpcode() != ISD::BUILD_VECTOR)
       break;
@@ -2136,9 +1913,8 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       N->getOperand(6), // SWZ_Z
       N->getOperand(7) // SWZ_W
     };
-    SDLoc DL(N);
     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
-    return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
+    return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
   }
   case AMDGPUISD::TEXTURE_FETCH: {
     SDValue Arg = N->getOperand(1);
@@ -2166,10 +1942,10 @@ SDValue R600TargetLowering::PerformDAGCombine(SDNode *N,
       N->getOperand(17),
       N->getOperand(18),
     };
-    SDLoc DL(N);
     NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
     return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
   }
+  default: break;
   }
 
   return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
@@ -2262,7 +2038,6 @@ bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
     unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
     uint64_t ImmValue = 0;
 
-
     if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
       ConstantFPSDNode *FPC = dyn_cast<ConstantFPSDNode>(Src.getOperand(0));
       float FloatValue = FPC->getValueAPF().convertToFloat();
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td
index 0ffd485476ec..68fcc545916a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrFormats.td
@@ -210,14 +210,14 @@ class VTX_WORD0 {
   bits<5> VC_INST;
   bits<2> FETCH_TYPE;
   bits<1> FETCH_WHOLE_QUAD;
-  bits<8> BUFFER_ID;
+  bits<8> buffer_id;
   bits<1> SRC_REL;
   bits<2> SRC_SEL_X;
 
   let Word0{4-0}   = VC_INST;
   let Word0{6-5}   = FETCH_TYPE;
   let Word0{7}     = FETCH_WHOLE_QUAD;
-  let Word0{15-8}  = BUFFER_ID;
+  let Word0{15-8}  = buffer_id;
   let Word0{22-16} = src_gpr;
   let Word0{23}    = SRC_REL;
   let Word0{25-24} = SRC_SEL_X;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
index 1c5f7ec1b6ef..e88bd076718e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.cpp
@@ -320,12 +320,12 @@ R600InstrInfo::ExtractSrcs(MachineInstr &MI,
                            const DenseMap<unsigned, unsigned> &PV,
                            unsigned &ConstCount) const {
   ConstCount = 0;
-  ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI);
   const std::pair<int, unsigned> DummyPair(-1, 0);
   std::vector<std::pair<int, unsigned> > Result;
   unsigned i = 0;
-  for (unsigned n = Srcs.size(); i < n; ++i) {
-    unsigned Reg = Srcs[i].first->getReg();
+  for (const auto &Src : getSrcs(MI)) {
+    ++i;
+    unsigned Reg = Src.first->getReg();
     int Index = RI.getEncodingValue(Reg) & 0xff;
     if (Reg == AMDGPU::OQAP) {
       Result.push_back(std::make_pair(Index, 0U));
@@ -592,9 +592,7 @@ R600InstrInfo::fitsConstReadLimitations(const std::vector<MachineInstr *> &MIs)
     if (!isALUInstr(MI.getOpcode()))
       continue;
 
-    ArrayRef<std::pair<MachineOperand *, int64_t>> Srcs = getSrcs(MI);
-
-    for (const auto &Src:Srcs) {
+    for (const auto &Src : getSrcs(MI)) {
       if (Src.first->getReg() == AMDGPU::ALU_LITERAL_X)
         Literals.insert(Src.second);
       if (Literals.size() > 4)
@@ -667,7 +665,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   // handled
   if (isBranch(I->getOpcode()))
     return true;
-  if (!isJump(static_cast<MachineInstr *>(I)->getOpcode())) {
+  if (!isJump(I->getOpcode())) {
     return false;
   }
 
@@ -682,8 +680,7 @@ bool R600InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 
   // If there is only one terminator instruction, process it.
   unsigned LastOpc = LastInst.getOpcode();
-  if (I == MBB.begin() ||
-          !isJump(static_cast<MachineInstr *>(--I)->getOpcode())) {
+  if (I == MBB.begin() || !isJump((--I)->getOpcode())) {
     if (LastOpc == AMDGPU::JUMP) {
       TBB = LastInst.getOperand(0).getMBB();
       return false;
@@ -729,17 +726,19 @@ MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) {
       It != E; ++It) {
     if (It->getOpcode() == AMDGPU::CF_ALU ||
         It->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE)
-      return std::prev(It.base());
+      return It.getReverse();
   }
   return MBB.end();
 }
 
-unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned R600InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *TBB,
                                      MachineBasicBlock *FBB,
                                      ArrayRef<MachineOperand> Cond,
-                                     const DebugLoc &DL) const {
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+                                     const DebugLoc &DL,
+                                     int *BytesAdded) const {
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
+  assert(!BytesAdded && "code size not handled");
 
   if (!FBB) {
     if (Cond.empty()) {
@@ -779,8 +778,9 @@ unsigned R600InstrInfo::InsertBranch(MachineBasicBlock &MBB,
   }
 }
 
-unsigned
-R600InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned R600InstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                     int *BytesRemoved) const {
+    assert(!BytesRemoved && "code size not handled");
 
   // Note : we leave PRED* instructions there.
   // They may be needed when predicating instructions.
@@ -910,20 +910,20 @@ R600InstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,
 
 
 bool
-R600InstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+R600InstrInfo::reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   MachineOperand &MO = Cond[1];
   switch (MO.getImm()) {
-  case OPCODE_IS_ZERO_INT:
-    MO.setImm(OPCODE_IS_NOT_ZERO_INT);
+  case AMDGPU::PRED_SETE_INT:
+    MO.setImm(AMDGPU::PRED_SETNE_INT);
     break;
-  case OPCODE_IS_NOT_ZERO_INT:
-    MO.setImm(OPCODE_IS_ZERO_INT);
+  case AMDGPU::PRED_SETNE_INT:
+    MO.setImm(AMDGPU::PRED_SETE_INT);
     break;
-  case OPCODE_IS_ZERO:
-    MO.setImm(OPCODE_IS_NOT_ZERO);
+  case AMDGPU::PRED_SETE:
+    MO.setImm(AMDGPU::PRED_SETNE);
     break;
-  case OPCODE_IS_NOT_ZERO:
-    MO.setImm(OPCODE_IS_ZERO);
+  case AMDGPU::PRED_SETNE:
+    MO.setImm(AMDGPU::PRED_SETE);
     break;
   default:
     return true;
@@ -1160,10 +1160,10 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB,
 
 int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
   const MachineRegisterInfo &MRI = MF.getRegInfo();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   int Offset = -1;
 
-  if (MFI->getNumObjects() == 0) {
+  if (MFI.getNumObjects() == 0) {
     return -1;
   }
 
@@ -1195,14 +1195,14 @@ int R600InstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
 
 int R600InstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
   int Offset = 0;
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Variable sized objects are not supported
-  if (MFI->hasVarSizedObjects()) {
+  if (MFI.hasVarSizedObjects()) {
     return -1;
   }
 
-  if (MFI->getNumObjects() == 0) {
+  if (MFI.getNumObjects() == 0) {
     return -1;
   }
 
@@ -1481,11 +1481,3 @@ void R600InstrInfo::clearFlag(MachineInstr &MI, unsigned Operand,
     FlagOp.setImm(InstFlags);
   }
 }
-
-bool R600InstrInfo::isRegisterStore(const MachineInstr &MI) const {
-  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_STORE;
-}
-
-bool R600InstrInfo::isRegisterLoad(const MachineInstr &MI) const {
-  return get(MI.getOpcode()).TSFlags & AMDGPU_FLAG_REGISTER_LOAD;
-}
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
index feaca98def44..a280052dbd4a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600InstrInfo.h
@@ -19,6 +19,14 @@
 #include "R600RegisterInfo.h"
 
 namespace llvm {
+
+namespace R600InstrFlags {
+enum : uint64_t {
+ REGISTER_STORE = UINT64_C(1) << 62,
+ REGISTER_LOAD = UINT64_C(1) << 63
+};
+}
+
 class AMDGPUTargetMachine;
 class DFAPacketizer;
 class MachineFunction;
@@ -151,7 +159,7 @@ public:
   DFAPacketizer *
   CreateTargetScheduleState(const TargetSubtargetInfo &) const override;
 
-  bool ReverseBranchCondition(
+  bool reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const override;
 
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
@@ -159,11 +167,13 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemvoed = nullptr) const override;
 
   bool isPredicated(const MachineInstr &MI) const override;
 
@@ -301,8 +311,13 @@ public:
   void clearFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const;
 
   // Helper functions that check the opcode for status information
-  bool isRegisterStore(const MachineInstr &MI) const;
-  bool isRegisterLoad(const MachineInstr &MI) const;
+  bool isRegisterStore(const MachineInstr &MI) const {
+    return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_STORE;
+  }
+
+  bool isRegisterLoad(const MachineInstr &MI) const {
+    return get(MI.getOpcode()).TSFlags & R600InstrFlags::REGISTER_LOAD;
+  }
 };
 
 namespace AMDGPU {
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
index b6b576d95278..3a72e0791fd6 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Instructions.td
@@ -242,20 +242,6 @@ def TEX_SHADOW_ARRAY : PatLeaf<
   }]
 >;
 
-def TEX_MSAA : PatLeaf<
-  (imm),
-  [{uint32_t TType = (uint32_t)N->getZExtValue();
-    return TType == 14;
-  }]
->;
-
-def TEX_ARRAY_MSAA : PatLeaf<
-  (imm),
-  [{uint32_t TType = (uint32_t)N->getZExtValue();
-    return TType == 15;
-  }]
->;
-
 class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
                  dag outs, dag ins, string asm, list<dag> pattern> :
     InstR600ISA <outs, ins, asm, pattern>,
@@ -283,8 +269,8 @@ class EG_CF_RAT <bits <8> cfinst, bits <6> ratinst, bits<4> ratid, bits<4> mask,
 
 }
 
-class VTX_READ <string name, bits<8> buffer_id, dag outs, list<dag> pattern>
-    : InstR600ISA <outs, (ins MEMxi:$src_gpr), !strconcat("  ", name), pattern>,
+class VTX_READ <string name, dag outs, list<dag> pattern>
+    : InstR600ISA <outs, (ins MEMxi:$src_gpr, i8imm:$buffer_id), !strconcat("  ", name, ", #$buffer_id"), pattern>,
       VTX_WORD1_GPR {
 
   // Static fields
@@ -333,9 +319,9 @@ class LoadParamFrag <PatFrag load_type> : PatFrag <
             (cast<LoadSDNode>(N)->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS); }]
 >;
 
-def load_param : LoadParamFrag<load>;
-def load_param_exti8 : LoadParamFrag<az_extloadi8>;
-def load_param_exti16 : LoadParamFrag<az_extloadi16>;
+def vtx_id3_az_extloadi8 : LoadParamFrag<az_extloadi8>;
+def vtx_id3_az_extloadi16 : LoadParamFrag<az_extloadi16>;
+def vtx_id3_load : LoadParamFrag<load>;
 
 class LoadVtxId1 <PatFrag load> : PatFrag <
   (ops node:$ptr), (load node:$ptr), [{
@@ -450,11 +436,6 @@ def INTERP_LOAD_P0 : R600_1OP <0xE0, "INTERP_LOAD_P0", []>;
 // Export Instructions
 //===----------------------------------------------------------------------===//
 
-def ExportType : SDTypeProfile<0, 7, [SDTCisFP<0>, SDTCisInt<1>]>;
-
-def EXPORT: SDNode<"AMDGPUISD::EXPORT", ExportType,
-  [SDNPHasChain, SDNPSideEffect]>;
-
 class ExportWord0 {
   field bits<32> Word0;
 
@@ -500,7 +481,7 @@ class ExportBufWord1 {
 }
 
 multiclass ExportPattern<Instruction ExportInst, bits<8> cf_inst> {
-  def : Pat<(EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
+  def : Pat<(R600_EXPORT (v4f32 R600_Reg128:$src), (i32 imm:$base), (i32 imm:$type),
     (i32 imm:$swz_x), (i32 imm:$swz_y), (i32 imm:$swz_z), (i32 imm:$swz_w)),
         (ExportInst R600_Reg128:$src, imm:$type, imm:$base,
         imm:$swz_x, imm:$swz_y, imm:$swz_z, imm:$swz_w, cf_inst, 0)
@@ -1073,18 +1054,27 @@ class LSHL_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHL", shl>;
 class LSHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "LSHR", srl>;
 class ASHR_Common <bits<11> inst> : R600_2OP_Helper <inst, "ASHR", sra>;
 class MULHI_INT_Common <bits<11> inst> : R600_2OP_Helper <
-  inst, "MULHI_INT", mulhs
-> {
+  inst, "MULHI_INT", mulhs> {
   let Itinerary = TransALU;
 }
+
+class MULHI_INT24_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULHI_INT24", AMDGPUmulhi_i24> {
+  let Itinerary = VecALU;
+}
+
 class MULHI_UINT_Common <bits<11> inst> : R600_2OP_Helper <
-  inst, "MULHI", mulhu
-> {
+  inst, "MULHI", mulhu> {
   let Itinerary = TransALU;
 }
+
+class MULHI_UINT24_Common <bits<11> inst> : R600_2OP_Helper <
+  inst, "MULHI_UINT24", AMDGPUmulhi_u24> {
+  let Itinerary = VecALU;
+}
+
 class MULLO_INT_Common <bits<11> inst> : R600_2OP_Helper <
-  inst, "MULLO_INT", mul
-> {
+  inst, "MULLO_INT", mul> {
   let Itinerary = TransALU;
 }
 class MULLO_UINT_Common <bits<11> inst> : R600_2OP <inst, "MULLO_UINT", []> {
@@ -1366,8 +1356,8 @@ def CONST_COPY : Instruction {
 } // end usesCustomInserter = 1, isCodeGenOnly = 1, isPseudo = 1, Namespace = "AMDGPU"
 
 def TEX_VTX_CONSTBUF :
-  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "VTX_READ_eg $dst, $ptr",
-      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$BUFFER_ID)))]>,
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$buffer_id), "VTX_READ_eg $dst, $ptr",
+      [(set v4i32:$dst, (CONST_ADDRESS ADDRGA_VAR_OFFSET:$ptr, (i32 imm:$buffer_id)))]>,
   VTX_WORD1_GPR, VTX_WORD0_eg {
 
   let VC_INST = 0;
@@ -1420,7 +1410,7 @@ def TEX_VTX_CONSTBUF :
 }
 
 def TEX_VTX_TEXBUF:
-  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$BUFFER_ID), "TEX_VTX_EXPLICIT_READ $dst, $ptr">,
+  InstR600ISA <(outs R600_Reg128:$dst), (ins MEMxi:$ptr, i32imm:$buffer_id), "TEX_VTX_EXPLICIT_READ $dst, $ptr">,
 VTX_WORD1_GPR, VTX_WORD0_eg {
 
 let VC_INST = 0;
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
index 01105c614c55..3ca319c6c6c2 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.cpp
@@ -12,9 +12,5 @@
 
 using namespace llvm;
 
-
-// Pin the vtable to this file.
-void R600MachineFunctionInfo::anchor() {}
-
 R600MachineFunctionInfo::R600MachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF) { }
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h
index 04a4436ebe03..29ac0920f997 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineFunctionInfo.h
@@ -14,18 +14,13 @@
 #define LLVM_LIB_TARGET_AMDGPU_R600MACHINEFUNCTIONINFO_H
 
 #include "AMDGPUMachineFunction.h"
-#include "llvm/CodeGen/SelectionDAG.h"
-#include <vector>
 
 namespace llvm {
 
 class R600MachineFunctionInfo final : public AMDGPUMachineFunction {
-  void anchor() override;
 public:
   R600MachineFunctionInfo(const MachineFunction &MF);
-  SmallVector<unsigned, 4> LiveOuts;
-  std::vector<unsigned> IndirectRegs;
-  unsigned StackSize;
+  unsigned CFStackSize;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
index 16d5d939708c..9a6770570477 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
+++ b/contrib/llvm/lib/Target/AMDGPU/R600MachineScheduler.h
@@ -16,6 +16,7 @@
 #define LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H
 
 #include "llvm/CodeGen/MachineScheduler.h"
+#include <vector>
 
 using namespace llvm;
 
@@ -25,10 +26,10 @@ class R600InstrInfo;
 struct R600RegisterInfo;
 
 class R600SchedStrategy final : public MachineSchedStrategy {
-  const ScheduleDAGMILive *DAG;
-  const R600InstrInfo *TII;
-  const R600RegisterInfo *TRI;
-  MachineRegisterInfo *MRI;
+  const ScheduleDAGMILive *DAG = nullptr;
+  const R600InstrInfo *TII = nullptr;
+  const R600RegisterInfo *TRI = nullptr;
+  MachineRegisterInfo *MRI = nullptr;
 
   enum InstKind {
     IDAlu,
@@ -66,11 +67,8 @@ class R600SchedStrategy final : public MachineSchedStrategy {
   int OccupedSlotsMask;
 
 public:
-  R600SchedStrategy() :
-    DAG(nullptr), TII(nullptr), TRI(nullptr), MRI(nullptr) {
-  }
-
-  virtual ~R600SchedStrategy() {}
+  R600SchedStrategy() = default;
+  ~R600SchedStrategy() override = default;
 
   void initialize(ScheduleDAGMI *dag) override;
   SUnit *pickNode(bool &IsTopNode) override;
@@ -97,6 +95,6 @@ private:
   void MoveUnits(std::vector<SUnit *> &QSrc, std::vector<SUnit *> &QDst);
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif /* R600MACHINESCHEDULER_H_ */
+#endif // LLVM_LIB_TARGET_AMDGPU_R600MACHINESCHEDULER_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
index ecae27d2233d..d90008a550ae 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600OptimizeVectorRegisters.cpp
@@ -31,22 +31,31 @@
 #include "AMDGPUSubtarget.h"
 #include "R600Defines.h"
 #include "R600InstrInfo.h"
-#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/PassAnalysisSupport.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
 #define DEBUG_TYPE "vec-merger"
 
-namespace {
-
 static bool
 isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
   for (MachineRegisterInfo::def_instr_iterator It = MRI.def_instr_begin(Reg),
@@ -60,11 +69,14 @@ isImplicitlyDef(MachineRegisterInfo &MRI, unsigned Reg) {
   return false;
 }
 
+namespace {
+
 class RegSeqInfo {
 public:
   MachineInstr *Instr;
   DenseMap<unsigned, unsigned> RegToChan;
   std::vector<unsigned> UndefReg;
+
   RegSeqInfo(MachineRegisterInfo &MRI, MachineInstr *MI) : Instr(MI) {
     assert(MI->getOpcode() == AMDGPU::REG_SEQUENCE);
     for (unsigned i = 1, e = Instr->getNumOperands(); i < e; i+=2) {
@@ -76,7 +88,8 @@ public:
         RegToChan[MO.getReg()] = Chan;
     }
   }
-  RegSeqInfo() {}
+
+  RegSeqInfo() = default;
 
   bool operator==(const RegSeqInfo &RSI) const {
     return RSI.Instr == Instr;
@@ -87,28 +100,30 @@ class R600VectorRegMerger : public MachineFunctionPass {
 private:
   MachineRegisterInfo *MRI;
   const R600InstrInfo *TII;
-  bool canSwizzle(const MachineInstr &) const;
+
+  bool canSwizzle(const MachineInstr &MI) const;
   bool areAllUsesSwizzeable(unsigned Reg) const;
   void SwizzleInput(MachineInstr &,
-      const std::vector<std::pair<unsigned, unsigned> > &) const;
-  bool tryMergeVector(const RegSeqInfo *, RegSeqInfo *,
-      std::vector<std::pair<unsigned, unsigned> > &Remap) const;
+      const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
+  bool tryMergeVector(const RegSeqInfo *Untouched, RegSeqInfo *ToMerge,
+      std::vector<std::pair<unsigned, unsigned>> &Remap) const;
   bool tryMergeUsingCommonSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
-      std::vector<std::pair<unsigned, unsigned> > &RemapChan);
+      std::vector<std::pair<unsigned, unsigned>> &RemapChan);
   bool tryMergeUsingFreeSlot(RegSeqInfo &RSI, RegSeqInfo &CompatibleRSI,
-      std::vector<std::pair<unsigned, unsigned> > &RemapChan);
-  MachineInstr *RebuildVector(RegSeqInfo *MI,
-      const RegSeqInfo *BaseVec,
-      const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const;
+      std::vector<std::pair<unsigned, unsigned>> &RemapChan);
+  MachineInstr *RebuildVector(RegSeqInfo *MI, const RegSeqInfo *BaseVec,
+      const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const;
   void RemoveMI(MachineInstr *);
   void trackRSI(const RegSeqInfo &RSI);
 
-  typedef DenseMap<unsigned, std::vector<MachineInstr *> > InstructionSetMap;
+  typedef DenseMap<unsigned, std::vector<MachineInstr *>> InstructionSetMap;
   DenseMap<MachineInstr *, RegSeqInfo> PreviousRegSeq;
   InstructionSetMap PreviousRegSeqByReg;
   InstructionSetMap PreviousRegSeqByUndefCount;
+
 public:
   static char ID;
+
   R600VectorRegMerger(TargetMachine &tm) : MachineFunctionPass(ID),
   TII(nullptr) { }
 
@@ -121,13 +136,15 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "R600 Vector Registers Merge Pass";
   }
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 };
 
+} // end anonymous namespace.
+
 char R600VectorRegMerger::ID = 0;
 
 bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
@@ -144,7 +161,7 @@ bool R600VectorRegMerger::canSwizzle(const MachineInstr &MI)
 }
 
 bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
-    RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned> > &Remap)
+    RegSeqInfo *ToMerge, std::vector< std::pair<unsigned, unsigned>> &Remap)
     const {
   unsigned CurrentUndexIdx = 0;
   for (DenseMap<unsigned, unsigned>::iterator It = ToMerge->RegToChan.begin(),
@@ -167,7 +184,7 @@ bool R600VectorRegMerger::tryMergeVector(const RegSeqInfo *Untouched,
 
 static
 unsigned getReassignedChan(
-    const std::vector<std::pair<unsigned, unsigned> > &RemapChan,
+    const std::vector<std::pair<unsigned, unsigned>> &RemapChan,
     unsigned Chan) {
   for (unsigned j = 0, je = RemapChan.size(); j < je; j++) {
     if (RemapChan[j].first == Chan)
@@ -178,7 +195,7 @@ unsigned getReassignedChan(
 
 MachineInstr *R600VectorRegMerger::RebuildVector(
     RegSeqInfo *RSI, const RegSeqInfo *BaseRSI,
-    const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
+    const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
   unsigned Reg = RSI->Instr->getOperand(0).getReg();
   MachineBasicBlock::iterator Pos = RSI->Instr;
   MachineBasicBlock &MBB = *Pos->getParent();
@@ -200,12 +217,10 @@ MachineInstr *R600VectorRegMerger::RebuildVector(
         .addReg(SubReg)
         .addImm(Chan);
     UpdatedRegToChan[SubReg] = Chan;
-    std::vector<unsigned>::iterator ChanPos =
-        std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan);
+    std::vector<unsigned>::iterator ChanPos = llvm::find(UpdatedUndef, Chan);
     if (ChanPos != UpdatedUndef.end())
       UpdatedUndef.erase(ChanPos);
-    assert(std::find(UpdatedUndef.begin(), UpdatedUndef.end(), Chan) ==
-               UpdatedUndef.end() &&
+    assert(!is_contained(UpdatedUndef, Chan) &&
            "UpdatedUndef shouldn't contain Chan more than once!");
     DEBUG(dbgs() << "    ->"; Tmp->dump(););
     (void)Tmp;
@@ -236,17 +251,17 @@ void R600VectorRegMerger::RemoveMI(MachineInstr *MI) {
   for (InstructionSetMap::iterator It = PreviousRegSeqByReg.begin(),
       E = PreviousRegSeqByReg.end(); It != E; ++It) {
     std::vector<MachineInstr *> &MIs = (*It).second;
-    MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
+    MIs.erase(llvm::find(MIs, MI), MIs.end());
   }
   for (InstructionSetMap::iterator It = PreviousRegSeqByUndefCount.begin(),
       E = PreviousRegSeqByUndefCount.end(); It != E; ++It) {
     std::vector<MachineInstr *> &MIs = (*It).second;
-    MIs.erase(std::find(MIs.begin(), MIs.end(), MI), MIs.end());
+    MIs.erase(llvm::find(MIs, MI), MIs.end());
   }
 }
 
 void R600VectorRegMerger::SwizzleInput(MachineInstr &MI,
-    const std::vector<std::pair<unsigned, unsigned> > &RemapChan) const {
+    const std::vector<std::pair<unsigned, unsigned>> &RemapChan) const {
   unsigned Offset;
   if (TII->get(MI.getOpcode()).TSFlags & R600_InstFlag::TEX_INST)
     Offset = 2;
@@ -274,7 +289,7 @@ bool R600VectorRegMerger::areAllUsesSwizzeable(unsigned Reg) const {
 
 bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
     RegSeqInfo &CompatibleRSI,
-    std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
+    std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
   for (MachineInstr::mop_iterator MOp = RSI.Instr->operands_begin(),
       MOE = RSI.Instr->operands_end(); MOp != MOE; ++MOp) {
     if (!MOp->isReg())
@@ -294,7 +309,7 @@ bool R600VectorRegMerger::tryMergeUsingCommonSlot(RegSeqInfo &RSI,
 
 bool R600VectorRegMerger::tryMergeUsingFreeSlot(RegSeqInfo &RSI,
     RegSeqInfo &CompatibleRSI,
-    std::vector<std::pair<unsigned, unsigned> > &RemapChan) {
+    std::vector<std::pair<unsigned, unsigned>> &RemapChan) {
   unsigned NeededUndefs = 4 - RSI.UndefReg.size();
   if (PreviousRegSeqByUndefCount[NeededUndefs].empty())
     return false;
@@ -357,7 +372,7 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
       });
 
       RegSeqInfo CandidateRSI;
-      std::vector<std::pair<unsigned, unsigned> > RemapChan;
+      std::vector<std::pair<unsigned, unsigned>> RemapChan;
       DEBUG(dbgs() << "Using common slots...\n";);
       if (tryMergeUsingCommonSlot(RSI, CandidateRSI, RemapChan)) {
         // Remove CandidateRSI mapping
@@ -381,8 +396,6 @@ bool R600VectorRegMerger::runOnMachineFunction(MachineFunction &Fn) {
   return false;
 }
 
-}
-
 llvm::FunctionPass *llvm::createR600VectorRegMerger(TargetMachine &tm) {
   return new R600VectorRegMerger(tm);
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
index c84866469ae8..5b6dd1ed128d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/R600Packetizer.cpp
@@ -47,9 +47,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  const char *getPassName() const override {
-    return "R600 Packetizer";
-  }
+  StringRef getPassName() const override { return "R600 Packetizer"; }
 
   bool runOnMachineFunction(MachineFunction &Fn) override;
 };
@@ -283,7 +281,7 @@ public:
       return false;
     }
 
-    // We cannot read LDS source registrs from the Trans slot.
+    // We cannot read LDS source registers from the Trans slot.
     if (isTransSlot && TII->readsLDSSrcReg(MI))
       return false;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
index 5f182c5304c6..d70f52e0f295 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIAnnotateControlFlow.cpp
@@ -102,9 +102,7 @@ public:
 
   bool runOnFunction(Function &F) override;
 
-  const char *getPassName() const override {
-    return "SI annotate control flow";
-  }
+  StringRef getPassName() const override { return "SI annotate control flow"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LoopInfoWrapperPass>();
@@ -148,12 +146,15 @@ bool SIAnnotateControlFlow::doInitialization(Module &M) {
 
   Break = M.getOrInsertFunction(
     BreakIntrinsic, Int64, Int64, (Type *)nullptr);
+  cast<Function>(Break)->setDoesNotAccessMemory();
 
   IfBreak = M.getOrInsertFunction(
     IfBreakIntrinsic, Int64, Boolean, Int64, (Type *)nullptr);
+  cast<Function>(IfBreak)->setDoesNotAccessMemory();;
 
   ElseBreak = M.getOrInsertFunction(
     ElseBreakIntrinsic, Int64, Int64, Int64, (Type *)nullptr);
+  cast<Function>(ElseBreak)->setDoesNotAccessMemory();
 
   Loop = M.getOrInsertFunction(
     LoopIntrinsic, Boolean, Int64, (Type *)nullptr);
@@ -331,6 +332,8 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) {
 
   BasicBlock *BB = Term->getParent();
   llvm::Loop *L = LI->getLoopFor(BB);
+  if (!L)
+    return;
   BasicBlock *Target = Term->getSuccessor(1);
   PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front());
 
@@ -361,7 +364,7 @@ void SIAnnotateControlFlow::closeControlFlow(BasicBlock *BB) {
 
     std::vector<BasicBlock*> Preds;
     for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); PI != PE; ++PI) {
-      if (std::find(Latches.begin(), Latches.end(), *PI) == Latches.end())
+      if (!is_contained(Latches, *PI))
         Preds.push_back(*PI);
     }
     BB = llvm::SplitBlockPredecessors(BB, Preds, "endcf.split", DT, LI, false);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp b/contrib/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
index 65ceff3930ac..62ebef8e91af 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDebuggerInsertNops.cpp
@@ -38,7 +38,7 @@ public:
   static char ID;
 
   SIDebuggerInsertNops() : MachineFunctionPass(ID) { }
-  const char *getPassName() const override { return PASS_NAME; }
+  StringRef getPassName() const override { return PASS_NAME; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
index f4b04e3631a5..ff4e32147184 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -13,76 +13,111 @@
 #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
 #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
 
+namespace llvm {
+
 namespace SIInstrFlags {
 // This needs to be kept in sync with the field bits in InstSI.
-enum {
-  SALU = 1 << 3,
-  VALU = 1 << 4,
-
-  SOP1 = 1 << 5,
-  SOP2 = 1 << 6,
-  SOPC = 1 << 7,
-  SOPK = 1 << 8,
-  SOPP = 1 << 9,
-
-  VOP1 = 1 << 10,
-  VOP2 = 1 << 11,
-  VOP3 = 1 << 12,
-  VOPC = 1 << 13,
+enum : uint64_t {
+  // Low bits - basic encoding information.
+  SALU = 1 << 0,
+  VALU = 1 << 1,
+
+  // SALU instruction formats.
+  SOP1 = 1 << 2,
+  SOP2 = 1 << 3,
+  SOPC = 1 << 4,
+  SOPK = 1 << 5,
+  SOPP = 1 << 6,
+
+  // VALU instruction formats.
+  VOP1 = 1 << 7,
+  VOP2 = 1 << 8,
+  VOPC = 1 << 9,
+
+ // TODO: Should this be spilt into VOP3 a and b?
+  VOP3 = 1 << 10,
+
+  VINTRP = 1 << 13,
   SDWA = 1 << 14,
   DPP = 1 << 15,
 
+  // Memory instruction formats.
   MUBUF = 1 << 16,
   MTBUF = 1 << 17,
   SMRD = 1 << 18,
-  DS = 1 << 19,
-  MIMG = 1 << 20,
+  MIMG = 1 << 19,
+  EXP = 1 << 20,
   FLAT = 1 << 21,
-  WQM = 1 << 22,
+  DS = 1 << 22,
+
+  // Pseudo instruction formats.
   VGPRSpill = 1 << 23,
-  VOPAsmPrefer32Bit = 1 << 24,
-  Gather4 = 1 << 25,
-  DisableWQM = 1 << 26
+  SGPRSpill = 1 << 24,
+
+  // High bits - other information.
+  VM_CNT = UINT64_C(1) << 32,
+  EXP_CNT = UINT64_C(1) << 33,
+  LGKM_CNT = UINT64_C(1) << 34,
+
+  WQM = UINT64_C(1) << 35,
+  DisableWQM = UINT64_C(1) << 36,
+  Gather4 = UINT64_C(1) << 37,
+  SOPK_ZEXT = UINT64_C(1) << 38,
+  SCALAR_STORE = UINT64_C(1) << 39,
+  FIXED_SIZE = UINT64_C(1) << 40,
+  VOPAsmPrefer32Bit = UINT64_C(1) << 41
+
+};
+
+// v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
+// The result is true if any of these tests are true.
+enum ClassFlags {
+  S_NAN = 1 << 0,        // Signaling NaN
+  Q_NAN = 1 << 1,        // Quiet NaN
+  N_INFINITY = 1 << 2,   // Negative infinity
+  N_NORMAL = 1 << 3,     // Negative normal
+  N_SUBNORMAL = 1 << 4,  // Negative subnormal
+  N_ZERO = 1 << 5,       // Negative zero
+  P_ZERO = 1 << 6,       // Positive zero
+  P_SUBNORMAL = 1 << 7,  // Positive subnormal
+  P_NORMAL = 1 << 8,     // Positive normal
+  P_INFINITY = 1 << 9    // Positive infinity
 };
 }
 
-namespace llvm {
 namespace AMDGPU {
   enum OperandType {
-    /// Operand with register or 32-bit immediate
-    OPERAND_REG_IMM32 = MCOI::OPERAND_FIRST_TARGET,
-    /// Operand with register or inline constant
-    OPERAND_REG_INLINE_C,
-
-    /// Operand with 32-bit immediate that uses the constant bus. The standard
-    /// OPERAND_IMMEDIATE should be used for special immediates such as source
-    /// modifiers.
-    OPERAND_KIMM32
-  };
-}
-}
-
-namespace SIInstrFlags {
-  enum Flags {
-    // First 4 bits are the instruction encoding
-    VM_CNT = 1 << 0,
-    EXP_CNT = 1 << 1,
-    LGKM_CNT = 1 << 2
-  };
-
-  // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
-  // The result is true if any of these tests are true.
-  enum ClassFlags {
-    S_NAN = 1 << 0,        // Signaling NaN
-    Q_NAN = 1 << 1,        // Quiet NaN
-    N_INFINITY = 1 << 2,   // Negative infinity
-    N_NORMAL = 1 << 3,     // Negative normal
-    N_SUBNORMAL = 1 << 4,  // Negative subnormal
-    N_ZERO = 1 << 5,       // Negative zero
-    P_ZERO = 1 << 6,       // Positive zero
-    P_SUBNORMAL = 1 << 7,  // Positive subnormal
-    P_NORMAL = 1 << 8,     // Positive normal
-    P_INFINITY = 1 << 9    // Positive infinity
+    /// Operands with register or 32-bit immediate
+    OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
+    OPERAND_REG_IMM_INT64,
+    OPERAND_REG_IMM_INT16,
+    OPERAND_REG_IMM_FP32,
+    OPERAND_REG_IMM_FP64,
+    OPERAND_REG_IMM_FP16,
+
+    /// Operands with register or inline constant
+    OPERAND_REG_INLINE_C_INT16,
+    OPERAND_REG_INLINE_C_INT32,
+    OPERAND_REG_INLINE_C_INT64,
+    OPERAND_REG_INLINE_C_FP16,
+    OPERAND_REG_INLINE_C_FP32,
+    OPERAND_REG_INLINE_C_FP64,
+
+    OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
+    OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
+
+    OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
+    OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_FP64,
+
+    OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
+    OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
+
+    // Operand for source modifiers for VOP instructions
+    OPERAND_INPUT_MODS,
+
+    /// Operand with 32-bit immediate that uses the constant bus.
+    OPERAND_KIMM32,
+    OPERAND_KIMM16
   };
 }
 
@@ -105,7 +140,24 @@ namespace SIOutMods {
   };
 }
 
-namespace llvm {
+namespace VGPRIndexMode {
+  enum {
+    SRC0_ENABLE = 1 << 0,
+    SRC1_ENABLE = 1 << 1,
+    SRC2_ENABLE = 1 << 2,
+    DST_ENABLE = 1 << 3
+  };
+}
+
+namespace AMDGPUAsmVariants {
+  enum {
+    DEFAULT = 0,
+    VOP3 = 1,
+    SDWA = 2,
+    DPP = 3
+  };
+}
+
 namespace AMDGPU {
 namespace EncValues { // Encoding values of enum9/8/7 operands
 
@@ -126,9 +178,7 @@ enum {
 
 } // namespace EncValues
 } // namespace AMDGPU
-} // namespace llvm
 
-namespace llvm {
 namespace AMDGPU {
 namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
 
@@ -184,6 +234,13 @@ namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns.
 enum Id { // HwRegCode, (6) [5:0]
   ID_UNKNOWN_ = -1,
   ID_SYMBOLIC_FIRST_ = 1, // There are corresponding symbolic names defined.
+  ID_MODE = 1,
+  ID_STATUS = 2,
+  ID_TRAPSTS = 3,
+  ID_HW_ID = 4,
+  ID_GPR_ALLOC = 5,
+  ID_LDS_ALLOC = 6,
+  ID_IB_STS = 7,
   ID_SYMBOLIC_LAST_ = 8,
   ID_SHIFT_ = 0,
   ID_WIDTH_ = 6,
@@ -205,8 +262,27 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
 };
 
 } // namespace Hwreg
+
+namespace SDWA {
+
+enum SdwaSel {
+  BYTE_0 = 0,
+  BYTE_1 = 1,
+  BYTE_2 = 2,
+  BYTE_3 = 3,
+  WORD_0 = 4,
+  WORD_1 = 5,
+  DWORD = 6,
+};
+
+enum DstUnused {
+  UNUSED_PAD = 0,
+  UNUSED_SEXT = 1,
+  UNUSED_PRESERVE = 2,
+};
+
+} // namespace SDWA
 } // namespace AMDGPU
-} // namespace llvm
 
 #define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS                                0x00B02C
@@ -312,4 +388,6 @@ enum WidthMinusOne { // WidthMinusOne, (5) [15:11]
 #define R_SPILLED_SGPRS         0x4
 #define R_SPILLED_VGPRS         0x8
 
+} // End namespace llvm
+
 #endif
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
index 636750dcfba2..d4d3959658e7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixControlFlowLiveIntervals.cpp
@@ -37,9 +37,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Fix CF Live Intervals";
-  }
+  StringRef getPassName() const override { return "SI Fix CF Live Intervals"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LiveIntervals>();
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 9e0086b79087..6a422e70fe1f 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -68,6 +68,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -82,6 +83,9 @@ using namespace llvm;
 namespace {
 
 class SIFixSGPRCopies : public MachineFunctionPass {
+
+  MachineDominatorTree *MDT;
+
 public:
   static char ID;
 
@@ -89,11 +93,11 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Fix SGPR copies";
-  }
+  StringRef getPassName() const override { return "SI Fix SGPR copies"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineDominatorTree>();
+    AU.addPreserved<MachineDominatorTree>();
     AU.setPreservesCFG();
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -101,8 +105,12 @@ public:
 
 } // End anonymous namespace
 
-INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE,
-                "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_BEGIN(SIFixSGPRCopies, DEBUG_TYPE,
+                     "SI Fix SGPR copies", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_END(SIFixSGPRCopies, DEBUG_TYPE,
+                     "SI Fix SGPR copies", false, false)
+
 
 char SIFixSGPRCopies::ID = 0;
 
@@ -236,11 +244,94 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
   return true;
 }
 
+static bool phiHasVGPROperands(const MachineInstr &PHI,
+                               const MachineRegisterInfo &MRI,
+                               const SIRegisterInfo *TRI,
+                               const SIInstrInfo *TII) {
+
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (TRI->hasVGPRs(MRI.getRegClass(Reg)))
+      return true;
+  }
+  return false;
+}
+static bool phiHasBreakDef(const MachineInstr &PHI,
+                           const MachineRegisterInfo &MRI,
+                           SmallSet<unsigned, 8> &Visited) {
+
+  for (unsigned i = 1; i < PHI.getNumOperands(); i += 2) {
+    unsigned Reg = PHI.getOperand(i).getReg();
+    if (Visited.count(Reg))
+      continue;
+
+    Visited.insert(Reg);
+
+    MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
+    assert(DefInstr);
+    switch (DefInstr->getOpcode()) {
+    default:
+      break;
+    case AMDGPU::SI_BREAK:
+    case AMDGPU::SI_IF_BREAK:
+    case AMDGPU::SI_ELSE_BREAK:
+      return true;
+    case AMDGPU::PHI:
+      if (phiHasBreakDef(*DefInstr, MRI, Visited))
+        return true;
+    }
+  }
+  return false;
+}
+
+static bool hasTerminatorThatModifiesExec(const MachineBasicBlock &MBB,
+                                          const TargetRegisterInfo &TRI) {
+  for (MachineBasicBlock::const_iterator I = MBB.getFirstTerminator(),
+       E = MBB.end(); I != E; ++I) {
+    if (I->modifiesRegister(AMDGPU::EXEC, &TRI))
+      return true;
+  }
+  return false;
+}
+
+static bool isSafeToFoldImmIntoCopy(const MachineInstr *Copy,
+                                    const MachineInstr *MoveImm,
+                                    const SIInstrInfo *TII,
+                                    unsigned &SMovOp,
+                                    int64_t &Imm) {
+
+  if (!MoveImm->isMoveImmediate())
+    return false;
+
+  const MachineOperand *ImmOp =
+      TII->getNamedOperand(*MoveImm, AMDGPU::OpName::src0);
+  if (!ImmOp->isImm())
+    return false;
+
+  // FIXME: Handle copies with sub-regs.
+  if (Copy->getOperand(0).getSubReg())
+    return false;
+
+  switch (MoveImm->getOpcode()) {
+  default:
+    return false;
+  case AMDGPU::V_MOV_B32_e32:
+    SMovOp = AMDGPU::S_MOV_B32;
+    break;
+  case AMDGPU::V_MOV_B64_PSEUDO:
+    SMovOp = AMDGPU::S_MOV_B64;
+    break;
+  }
+  Imm = ImmOp->getImm();
+  return true;
+}
+
 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   const SIInstrInfo *TII = ST.getInstrInfo();
+  MDT = &getAnalysis<MachineDominatorTree>();
 
   SmallVector<MachineInstr *, 16> Worklist;
 
@@ -264,18 +355,40 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         const TargetRegisterClass *SrcRC, *DstRC;
         std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI);
         if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) {
-          DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI);
+          MachineInstr *DefMI = MRI.getVRegDef(MI.getOperand(1).getReg());
+          unsigned SMovOp;
+          int64_t Imm;
+          // If we are just copying an immediate, we can replace the copy with
+          // s_mov_b32.
+          if (isSafeToFoldImmIntoCopy(&MI, DefMI, TII, SMovOp, Imm)) {
+            MI.getOperand(1).ChangeToImmediate(Imm);
+            MI.addImplicitDefUseOperands(MF);
+            MI.setDesc(TII->get(SMovOp));
+            break;
+          }
           TII->moveToVALU(MI);
         }
 
         break;
       }
       case AMDGPU::PHI: {
-        DEBUG(dbgs() << "Fixing PHI: " << MI);
         unsigned Reg = MI.getOperand(0).getReg();
         if (!TRI->isSGPRClass(MRI.getRegClass(Reg)))
           break;
 
+        // We don't need to fix the PHI if the common dominator of the
+        // two incoming blocks terminates with a uniform branch.
+        if (MI.getNumExplicitOperands() == 5) {
+          MachineBasicBlock *MBB0 = MI.getOperand(2).getMBB();
+          MachineBasicBlock *MBB1 = MI.getOperand(4).getMBB();
+
+          MachineBasicBlock *NCD = MDT->findNearestCommonDominator(MBB0, MBB1);
+          if (NCD && !hasTerminatorThatModifiesExec(*NCD, *TRI)) {
+            DEBUG(dbgs() << "Not fixing PHI for uniform branch: " << MI << '\n');
+            break;
+          }
+        }
+
         // If a PHI node defines an SGPR and any of its operands are VGPRs,
         // then we need to move it to the VALU.
         //
@@ -302,10 +415,6 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         // ...
         // use sgpr2
         //
-        // FIXME: This is OK if the branching decision is made based on an
-        // SGPR value.
-        bool SGPRBranch = false;
-
         // The one exception to this rule is when one of the operands
         // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK
         // instruction.  In this case, there we know the program will
@@ -313,31 +422,12 @@ bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) {
         // the first block (where the condition is computed), so there
         // is no chance for values to be over-written.
 
-        bool HasBreakDef = false;
-        for (unsigned i = 1; i < MI.getNumOperands(); i+=2) {
-          unsigned Reg = MI.getOperand(i).getReg();
-          if (TRI->hasVGPRs(MRI.getRegClass(Reg))) {
-            TII->moveToVALU(MI);
-            break;
-          }
-          MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg);
-          assert(DefInstr);
-          switch(DefInstr->getOpcode()) {
-
-          case AMDGPU::SI_BREAK:
-          case AMDGPU::SI_IF_BREAK:
-          case AMDGPU::SI_ELSE_BREAK:
-          // If we see a PHI instruction that defines an SGPR, then that PHI
-          // instruction has already been considered and should have
-          // a *_BREAK as an operand.
-          case AMDGPU::PHI:
-            HasBreakDef = true;
-            break;
-          }
-        }
-
-        if (!SGPRBranch && !HasBreakDef)
+        SmallSet<unsigned, 8> Visited;
+        if (phiHasVGPROperands(MI, MRI, TRI, TII) ||
+            !phiHasBreakDef(MI, MRI, Visited)) {
+          DEBUG(dbgs() << "Fixing PHI: " << MI);
           TII->moveToVALU(MI);
+        }
         break;
       }
       case AMDGPU::REG_SEQUENCE: {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4ecc0fcc6232..831ac5948a68 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -36,9 +36,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Fold Operands";
-  }
+  StringRef getPassName() const override { return "SI Fold Operands"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -48,24 +46,36 @@ public:
 
 struct FoldCandidate {
   MachineInstr *UseMI;
-  unsigned UseOpNo;
-  MachineOperand *OpToFold;
-  uint64_t ImmToFold;
+  union {
+    MachineOperand *OpToFold;
+    uint64_t ImmToFold;
+    int FrameIndexToFold;
+  };
+  unsigned char UseOpNo;
+  MachineOperand::MachineOperandType Kind;
 
   FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
-                UseMI(MI), UseOpNo(OpNo) {
-
+    UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
     if (FoldOp->isImm()) {
-      OpToFold = nullptr;
       ImmToFold = FoldOp->getImm();
+    } else if (FoldOp->isFI()) {
+      FrameIndexToFold = FoldOp->getIndex();
     } else {
       assert(FoldOp->isReg());
       OpToFold = FoldOp;
     }
   }
 
+  bool isFI() const {
+    return Kind == MachineOperand::MO_FrameIndex;
+  }
+
   bool isImm() const {
-    return !OpToFold;
+    return Kind == MachineOperand::MO_Immediate;
+  }
+
+  bool isReg() const {
+    return Kind == MachineOperand::MO_Register;
   }
 };
 
@@ -82,11 +92,18 @@ FunctionPass *llvm::createSIFoldOperandsPass() {
   return new SIFoldOperands();
 }
 
-static bool isSafeToFold(unsigned Opcode) {
-  switch(Opcode) {
+static bool isSafeToFold(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   case AMDGPU::V_MOV_B32_e32:
   case AMDGPU::V_MOV_B32_e64:
-  case AMDGPU::V_MOV_B64_PSEUDO:
+  case AMDGPU::V_MOV_B64_PSEUDO: {
+    // If there are additional implicit register operands, this may be used for
+    // register indexing so the source register operand isn't simply copied.
+    unsigned NumOps = MI.getDesc().getNumOperands() +
+      MI.getDesc().getNumImplicitUses();
+
+    return MI.getNumOperands() == NumOps;
+  }
   case AMDGPU::S_MOV_B32:
   case AMDGPU::S_MOV_B64:
   case AMDGPU::COPY:
@@ -107,6 +124,11 @@ static bool updateOperand(FoldCandidate &Fold,
     return true;
   }
 
+  if (Fold.isFI()) {
+    Old.ChangeToFrameIndex(Fold.FrameIndexToFold);
+    return true;
+  }
+
   MachineOperand *New = Fold.OpToFold;
   if (TargetRegisterInfo::isVirtualRegister(Old.getReg()) &&
       TargetRegisterInfo::isVirtualRegister(New->getReg())) {
@@ -134,13 +156,15 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
                              const SIInstrInfo *TII) {
   if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
 
-    // Special case for v_mac_f32_e64 if we are trying to fold into src2
+    // Special case for v_mac_{f16, f32}_e64 if we are trying to fold into src2
     unsigned Opc = MI->getOpcode();
-    if (Opc == AMDGPU::V_MAC_F32_e64 &&
+    if ((Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64) &&
         (int)OpNo == AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2)) {
-      // Check if changing this to a v_mad_f32 instruction will allow us to
-      // fold the operand.
-      MI->setDesc(TII->get(AMDGPU::V_MAD_F32));
+      bool IsF32  = Opc == AMDGPU::V_MAC_F32_e64;
+
+      // Check if changing this to a v_mad_{f16, f32} instruction will allow us
+      // to fold the operand.
+      MI->setDesc(TII->get(IsF32 ? AMDGPU::V_MAD_F32 : AMDGPU::V_MAD_F16));
       bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold, TII);
       if (FoldAsMAD) {
         MI->untieRegOperand(OpNo);
@@ -149,6 +173,13 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
       MI->setDesc(TII->get(Opc));
     }
 
+    // Special case for s_setreg_b32
+    if (Opc == AMDGPU::S_SETREG_B32 && OpToFold->isImm()) {
+      MI->setDesc(TII->get(AMDGPU::S_SETREG_IMM32_B32));
+      FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
+      return true;
+    }
+
     // If we are already folding into another operand of MI, then
     // we can't commute the instruction, otherwise we risk making the
     // other fold illegal.
@@ -188,6 +219,14 @@ static bool tryAddToFoldList(std::vector<FoldCandidate> &FoldList,
   return true;
 }
 
+// If the use operand doesn't care about the value, this may be an operand only
+// used for register indexing, in which case it is unsafe to fold.
+static bool isUseSafeToFold(const MachineInstr &MI,
+                            const MachineOperand &UseMO) {
+  return !UseMO.isUndef();
+  //return !MI.hasRegisterImplicitUseOperand(UseMO.getReg());
+}
+
 static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
                         unsigned UseOpIdx,
                         std::vector<FoldCandidate> &FoldList,
@@ -196,100 +235,256 @@ static void foldOperand(MachineOperand &OpToFold, MachineInstr *UseMI,
                         MachineRegisterInfo &MRI) {
   const MachineOperand &UseOp = UseMI->getOperand(UseOpIdx);
 
+  if (!isUseSafeToFold(*UseMI, UseOp))
+    return;
+
   // FIXME: Fold operands with subregs.
-  if (UseOp.isReg() && ((UseOp.getSubReg() && OpToFold.isReg()) ||
-      UseOp.isImplicit())) {
+  if (UseOp.isReg() && OpToFold.isReg()) {
+    if (UseOp.isImplicit() || UseOp.getSubReg() != AMDGPU::NoSubRegister)
+      return;
+
+    // Don't fold subregister extracts into tied operands, only if it is a full
+    // copy since a subregister use tied to a full register def doesn't really
+    // make sense. e.g. don't fold:
+    //
+    // %vreg1 = COPY %vreg0:sub1
+    // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg1<tied0>
+    //
+    //  into
+    // %vreg2<tied3> = V_MAC_{F16, F32} %vreg3, %vreg4, %vreg0:sub1<tied0>
+    if (UseOp.isTied() && OpToFold.getSubReg() != AMDGPU::NoSubRegister)
+      return;
+  }
+
+  // Special case for REG_SEQUENCE: We can't fold literals into
+  // REG_SEQUENCE instructions, so we have to fold them into the
+  // uses of REG_SEQUENCE.
+  if (UseMI->isRegSequence()) {
+    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
+    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+
+    for (MachineRegisterInfo::use_iterator
+           RSUse = MRI.use_begin(RegSeqDstReg), RSE = MRI.use_end();
+         RSUse != RSE; ++RSUse) {
+
+      MachineInstr *RSUseMI = RSUse->getParent();
+      if (RSUse->getSubReg() != RegSeqDstSubReg)
+        continue;
+
+      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
+                  CopiesToReplace, TII, TRI, MRI);
+    }
+
     return;
   }
 
+
   bool FoldingImm = OpToFold.isImm();
-  APInt Imm;
 
-  if (FoldingImm) {
+  // In order to fold immediates into copies, we need to change the
+  // copy to a MOV.
+  if (FoldingImm && UseMI->isCopy()) {
+    unsigned DestReg = UseMI->getOperand(0).getReg();
+    const TargetRegisterClass *DestRC
+      = TargetRegisterInfo::isVirtualRegister(DestReg) ?
+      MRI.getRegClass(DestReg) :
+      TRI.getPhysRegClass(DestReg);
+
+    unsigned MovOp = TII->getMovOpcode(DestRC);
+    if (MovOp == AMDGPU::COPY)
+      return;
+
+    UseMI->setDesc(TII->get(MovOp));
+    CopiesToReplace.push_back(UseMI);
+  } else {
+    const MCInstrDesc &UseDesc = UseMI->getDesc();
+
+    // Don't fold into target independent nodes.  Target independent opcodes
+    // don't have defined register classes.
+    if (UseDesc.isVariadic() ||
+        UseDesc.OpInfo[UseOpIdx].RegClass == -1)
+      return;
+  }
+
+  if (!FoldingImm) {
+    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+
+    // FIXME: We could try to change the instruction from 64-bit to 32-bit
+    // to enable more folding opportunites.  The shrink operands pass
+    // already does this.
+    return;
+  }
+
+
+  const MCInstrDesc &FoldDesc = OpToFold.getParent()->getDesc();
+  const TargetRegisterClass *FoldRC =
+    TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+
+  APInt Imm(TII->operandBitWidth(FoldDesc.OpInfo[1].OperandType),
+            OpToFold.getImm());
+
+  // Split 64-bit constants into 32-bits for folding.
+  if (UseOp.getSubReg() && AMDGPU::getRegBitWidth(FoldRC->getID()) == 64) {
     unsigned UseReg = UseOp.getReg();
     const TargetRegisterClass *UseRC
       = TargetRegisterInfo::isVirtualRegister(UseReg) ?
       MRI.getRegClass(UseReg) :
       TRI.getPhysRegClass(UseReg);
 
-    Imm = APInt(64, OpToFold.getImm());
-
-    const MCInstrDesc &FoldDesc = TII->get(OpToFold.getParent()->getOpcode());
-    const TargetRegisterClass *FoldRC =
-        TRI.getRegClass(FoldDesc.OpInfo[0].RegClass);
+    assert(Imm.getBitWidth() == 64);
 
-    // Split 64-bit constants into 32-bits for folding.
-    if (FoldRC->getSize() == 8 && UseOp.getSubReg()) {
-      if (UseRC->getSize() != 8)
-        return;
+    if (AMDGPU::getRegBitWidth(UseRC->getID()) != 64)
+      return;
 
-      if (UseOp.getSubReg() == AMDGPU::sub0) {
-        Imm = Imm.getLoBits(32);
-      } else {
-        assert(UseOp.getSubReg() == AMDGPU::sub1);
-        Imm = Imm.getHiBits(32);
-      }
+    if (UseOp.getSubReg() == AMDGPU::sub0) {
+      Imm = Imm.getLoBits(32);
+    } else {
+      assert(UseOp.getSubReg() == AMDGPU::sub1);
+      Imm = Imm.getHiBits(32);
     }
+  }
 
-    // In order to fold immediates into copies, we need to change the
-    // copy to a MOV.
-    if (UseMI->getOpcode() == AMDGPU::COPY) {
-      unsigned DestReg = UseMI->getOperand(0).getReg();
-      const TargetRegisterClass *DestRC
-        = TargetRegisterInfo::isVirtualRegister(DestReg) ?
-        MRI.getRegClass(DestReg) :
-        TRI.getPhysRegClass(DestReg);
-
-      unsigned MovOp = TII->getMovOpcode(DestRC);
-      if (MovOp == AMDGPU::COPY)
-        return;
-
-      UseMI->setDesc(TII->get(MovOp));
-      CopiesToReplace.push_back(UseMI);
-    }
+  MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
+  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
+}
+
+static bool evalBinaryInstruction(unsigned Opcode, int32_t &Result,
+                                  int32_t LHS, int32_t RHS) {
+  switch (Opcode) {
+  case AMDGPU::V_AND_B32_e64:
+  case AMDGPU::S_AND_B32:
+    Result = LHS & RHS;
+    return true;
+  case AMDGPU::V_OR_B32_e64:
+  case AMDGPU::S_OR_B32:
+    Result = LHS | RHS;
+    return true;
+  case AMDGPU::V_XOR_B32_e64:
+  case AMDGPU::S_XOR_B32:
+    Result = LHS ^ RHS;
+    return true;
+  default:
+    return false;
   }
+}
 
-  // Special case for REG_SEQUENCE: We can't fold literals into
-  // REG_SEQUENCE instructions, so we have to fold them into the
-  // uses of REG_SEQUENCE.
-  if (UseMI->getOpcode() == AMDGPU::REG_SEQUENCE) {
-    unsigned RegSeqDstReg = UseMI->getOperand(0).getReg();
-    unsigned RegSeqDstSubReg = UseMI->getOperand(UseOpIdx + 1).getImm();
+static unsigned getMovOpc(bool IsScalar) {
+  return IsScalar ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
+}
 
-    for (MachineRegisterInfo::use_iterator
-         RSUse = MRI.use_begin(RegSeqDstReg),
-         RSE = MRI.use_end(); RSUse != RSE; ++RSUse) {
+/// Remove any leftover implicit operands from mutating the instruction. e.g.
+/// if we replace an s_and_b32 with a copy, we don't need the implicit scc def
+/// anymore.
+static void stripExtraCopyOperands(MachineInstr &MI) {
+  const MCInstrDesc &Desc = MI.getDesc();
+  unsigned NumOps = Desc.getNumOperands() +
+                    Desc.getNumImplicitUses() +
+                    Desc.getNumImplicitDefs();
+
+  for (unsigned I = MI.getNumOperands() - 1; I >= NumOps; --I)
+    MI.RemoveOperand(I);
+}
 
-      MachineInstr *RSUseMI = RSUse->getParent();
-      if (RSUse->getSubReg() != RegSeqDstSubReg)
-        continue;
+static void mutateCopyOp(MachineInstr &MI, const MCInstrDesc &NewDesc) {
+  MI.setDesc(NewDesc);
+  stripExtraCopyOperands(MI);
+}
 
-      foldOperand(OpToFold, RSUseMI, RSUse.getOperandNo(), FoldList,
-                  CopiesToReplace, TII, TRI, MRI);
+// Try to simplify operations with a constant that may appear after instruction
+// selection.
+static bool tryConstantFoldOp(MachineRegisterInfo &MRI,
+                              const SIInstrInfo *TII,
+                              MachineInstr *MI) {
+  unsigned Opc = MI->getOpcode();
+
+  if (Opc == AMDGPU::V_NOT_B32_e64 || Opc == AMDGPU::V_NOT_B32_e32 ||
+      Opc == AMDGPU::S_NOT_B32) {
+    MachineOperand &Src0 = MI->getOperand(1);
+    if (Src0.isImm()) {
+      Src0.setImm(~Src0.getImm());
+      mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_NOT_B32)));
+      return true;
     }
-    return;
+
+    return false;
   }
 
-  const MCInstrDesc &UseDesc = UseMI->getDesc();
+  if (!MI->isCommutable())
+    return false;
 
-  // Don't fold into target independent nodes.  Target independent opcodes
-  // don't have defined register classes.
-  if (UseDesc.isVariadic() ||
-      UseDesc.OpInfo[UseOpIdx].RegClass == -1)
-    return;
+  int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
+  int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
 
-  if (FoldingImm) {
-    MachineOperand ImmOp = MachineOperand::CreateImm(Imm.getSExtValue());
-    tryAddToFoldList(FoldList, UseMI, UseOpIdx, &ImmOp, TII);
-    return;
+  MachineOperand *Src0 = &MI->getOperand(Src0Idx);
+  MachineOperand *Src1 = &MI->getOperand(Src1Idx);
+  if (!Src0->isImm() && !Src1->isImm())
+    return false;
+
+  // and k0, k1 -> v_mov_b32 (k0 & k1)
+  // or k0, k1 -> v_mov_b32 (k0 | k1)
+  // xor k0, k1 -> v_mov_b32 (k0 ^ k1)
+  if (Src0->isImm() && Src1->isImm()) {
+    int32_t NewImm;
+    if (!evalBinaryInstruction(Opc, NewImm, Src0->getImm(), Src1->getImm()))
+      return false;
+
+    const SIRegisterInfo &TRI = TII->getRegisterInfo();
+    bool IsSGPR = TRI.isSGPRReg(MRI, MI->getOperand(0).getReg());
+
+    Src0->setImm(NewImm);
+    MI->RemoveOperand(Src1Idx);
+    mutateCopyOp(*MI, TII->get(getMovOpc(IsSGPR)));
+    return true;
+  }
+
+  if (Src0->isImm() && !Src1->isImm()) {
+    std::swap(Src0, Src1);
+    std::swap(Src0Idx, Src1Idx);
+  }
+
+  int32_t Src1Val = static_cast<int32_t>(Src1->getImm());
+  if (Opc == AMDGPU::V_OR_B32_e64 || Opc == AMDGPU::S_OR_B32) {
+    if (Src1Val == 0) {
+      // y = or x, 0 => y = copy x
+      MI->RemoveOperand(Src1Idx);
+      mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+    } else if (Src1Val == -1) {
+      // y = or x, -1 => y = v_mov_b32 -1
+      MI->RemoveOperand(Src1Idx);
+      mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_OR_B32)));
+    } else
+      return false;
+
+    return true;
+  }
+
+  if (MI->getOpcode() == AMDGPU::V_AND_B32_e64 ||
+      MI->getOpcode() == AMDGPU::S_AND_B32) {
+    if (Src1Val == 0) {
+      // y = and x, 0 => y = v_mov_b32 0
+      MI->RemoveOperand(Src0Idx);
+      mutateCopyOp(*MI, TII->get(getMovOpc(Opc == AMDGPU::S_AND_B32)));
+    } else if (Src1Val == -1) {
+      // y = and x, -1 => y = copy x
+      MI->RemoveOperand(Src1Idx);
+      mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+      stripExtraCopyOperands(*MI);
+    } else
+      return false;
+
+    return true;
   }
 
-  tryAddToFoldList(FoldList, UseMI, UseOpIdx, &OpToFold, TII);
+  if (MI->getOpcode() == AMDGPU::V_XOR_B32_e64 ||
+      MI->getOpcode() == AMDGPU::S_XOR_B32) {
+    if (Src1Val == 0) {
+      // y = xor x, 0 => y = copy x
+      MI->RemoveOperand(Src1Idx);
+      mutateCopyOp(*MI, TII->get(AMDGPU::COPY));
+    }
+  }
 
-  // FIXME: We could try to change the instruction from 64-bit to 32-bit
-  // to enable more folding opportunites.  The shrink operands pass
-  // already does this.
-  return;
+  return false;
 }
 
 bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
@@ -311,25 +506,17 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       Next = std::next(I);
       MachineInstr &MI = *I;
 
-      if (!isSafeToFold(MI.getOpcode()))
+      if (!isSafeToFold(MI))
         continue;
 
-      unsigned OpSize = TII->getOpSize(MI, 1);
       MachineOperand &OpToFold = MI.getOperand(1);
-      bool FoldingImm = OpToFold.isImm();
+      bool FoldingImm = OpToFold.isImm() || OpToFold.isFI();
 
       // FIXME: We could also be folding things like FrameIndexes and
       // TargetIndexes.
       if (!FoldingImm && !OpToFold.isReg())
         continue;
 
-      // Folding immediates with more than one use will increase program size.
-      // FIXME: This will also reduce register usage, which may be better
-      // in some cases.  A better heuristic is needed.
-      if (FoldingImm && !TII->isInlineConstant(OpToFold, OpSize) &&
-          !MRI.hasOneUse(MI.getOperand(0).getReg()))
-        continue;
-
       if (OpToFold.isReg() &&
           !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
         continue;
@@ -351,14 +538,58 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       SmallVector<MachineInstr *, 4> CopiesToReplace;
 
       std::vector<FoldCandidate> FoldList;
-      for (MachineRegisterInfo::use_iterator
-           Use = MRI.use_begin(MI.getOperand(0).getReg()), E = MRI.use_end();
-           Use != E; ++Use) {
-
-        MachineInstr *UseMI = Use->getParent();
+      if (FoldingImm) {
+        unsigned NumLiteralUses = 0;
+        MachineOperand *NonInlineUse = nullptr;
+        int NonInlineUseOpNo = -1;
+
+        // Try to fold any inline immediate uses, and then only fold other
+        // constants if they have one use.
+        //
+        // The legality of the inline immediate must be checked based on the use
+        // operand, not the defining instruction, because 32-bit instructions
+        // with 32-bit inline immediate sources may be used to materialize
+        // constants used in 16-bit operands.
+        //
+        // e.g. it is unsafe to fold:
+        //  s_mov_b32 s0, 1.0    // materializes 0x3f800000
+        //  v_add_f16 v0, v1, s0 // 1.0 f16 inline immediate sees 0x00003c00
+
+        // Folding immediates with more than one use will increase program size.
+        // FIXME: This will also reduce register usage, which may be better
+        // in some cases. A better heuristic is needed.
+        for (MachineRegisterInfo::use_iterator
+               Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
+             Use != E; ++Use) {
+          MachineInstr *UseMI = Use->getParent();
+          unsigned OpNo = Use.getOperandNo();
+
+          if (TII->isInlineConstant(*UseMI, OpNo, OpToFold)) {
+            foldOperand(OpToFold, UseMI, OpNo, FoldList,
+                        CopiesToReplace, TII, TRI, MRI);
+          } else {
+            if (++NumLiteralUses == 1) {
+              NonInlineUse = &*Use;
+              NonInlineUseOpNo = OpNo;
+            }
+          }
+        }
 
-        foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
-                    CopiesToReplace, TII, TRI, MRI);
+        if (NumLiteralUses == 1) {
+          MachineInstr *UseMI = NonInlineUse->getParent();
+          foldOperand(OpToFold, UseMI, NonInlineUseOpNo, FoldList,
+                      CopiesToReplace, TII, TRI, MRI);
+        }
+      } else {
+        // Folding register.
+        for (MachineRegisterInfo::use_iterator
+               Use = MRI.use_begin(Dst.getReg()), E = MRI.use_end();
+             Use != E; ++Use) {
+          MachineInstr *UseMI = Use->getParent();
+
+          foldOperand(OpToFold, UseMI, Use.getOperandNo(), FoldList,
+                      CopiesToReplace, TII, TRI, MRI);
+        }
       }
 
       // Make sure we add EXEC uses to any new v_mov instructions created.
@@ -368,7 +599,7 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
       for (FoldCandidate &Fold : FoldList) {
         if (updateOperand(Fold, TRI)) {
           // Clear kill flags.
-          if (!Fold.isImm()) {
+          if (Fold.isReg()) {
             assert(Fold.OpToFold && Fold.OpToFold->isReg());
             // FIXME: Probably shouldn't bother trying to fold if not an
             // SGPR. PeepholeOptimizer can eliminate redundant VGPR->VGPR
@@ -376,7 +607,13 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
             MRI.clearKillFlags(Fold.OpToFold->getReg());
           }
           DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
-                Fold.UseOpNo << " of " << *Fold.UseMI << '\n');
+                static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
+
+          // Folding the immediate may reveal operations that can be constant
+          // folded or replaced with a copy. This can happen for example after
+          // frame indices are lowered to constants or from splitting 64-bit
+          // constants.
+          tryConstantFoldOp(MRI, TII, Fold.UseMI);
         }
       }
     }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 03b11f0fd38d..d0a69eafc58e 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -21,20 +21,168 @@
 using namespace llvm;
 
 
-static bool hasOnlySGPRSpills(const SIMachineFunctionInfo *FuncInfo,
-                              const MachineFrameInfo *FrameInfo) {
-  return FuncInfo->hasSpilledSGPRs() &&
-    (!FuncInfo->hasSpilledVGPRs() && !FuncInfo->hasNonSpillStackObjects());
-}
-
-static ArrayRef<MCPhysReg> getAllSGPR128() {
+static ArrayRef<MCPhysReg> getAllSGPR128(const MachineFunction &MF,
+                                         const SIRegisterInfo *TRI) {
   return makeArrayRef(AMDGPU::SGPR_128RegClass.begin(),
-                      AMDGPU::SGPR_128RegClass.getNumRegs());
+                      TRI->getMaxNumSGPRs(MF) / 4);
 }
 
-static ArrayRef<MCPhysReg> getAllSGPRs() {
+static ArrayRef<MCPhysReg> getAllSGPRs(const MachineFunction &MF,
+                                       const SIRegisterInfo *TRI) {
   return makeArrayRef(AMDGPU::SGPR_32RegClass.begin(),
-                      AMDGPU::SGPR_32RegClass.getNumRegs());
+                      TRI->getMaxNumSGPRs(MF));
+}
+
+void SIFrameLowering::emitFlatScratchInit(const SIInstrInfo *TII,
+                                          const SIRegisterInfo* TRI,
+                                          MachineFunction &MF,
+                                          MachineBasicBlock &MBB) const {
+  // We don't need this if we only have spills since there is no user facing
+  // scratch.
+
+  // TODO: If we know we don't have flat instructions earlier, we can omit
+  // this from the input registers.
+  //
+  // TODO: We only need to know if we access scratch space through a flat
+  // pointer. Because we only detect if flat instructions are used at all,
+  // this will be used more often than necessary on VI.
+
+  // Debug location must be unknown since the first debug location is used to
+  // determine the end of the prologue.
+  DebugLoc DL;
+  MachineBasicBlock::iterator I = MBB.begin();
+
+  unsigned FlatScratchInitReg
+    = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  MRI.addLiveIn(FlatScratchInitReg);
+  MBB.addLiveIn(FlatScratchInitReg);
+
+  // Copy the size in bytes.
+  unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
+  BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), AMDGPU::FLAT_SCR_LO)
+    .addReg(FlatScrInitHi, RegState::Kill);
+
+  unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+
+  // Add wave offset in bytes to private base offset.
+  // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
+  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
+    .addReg(FlatScrInitLo)
+    .addReg(ScratchWaveOffsetReg);
+
+  // Convert offset to 256-byte units.
+  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
+    .addReg(FlatScrInitLo, RegState::Kill)
+    .addImm(8);
+}
+
+unsigned SIFrameLowering::getReservedPrivateSegmentBufferReg(
+  const SISubtarget &ST,
+  const SIInstrInfo *TII,
+  const SIRegisterInfo *TRI,
+  SIMachineFunctionInfo *MFI,
+  MachineFunction &MF) const {
+
+  // We need to insert initialization of the scratch resource descriptor.
+  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
+  if (ScratchRsrcReg == AMDGPU::NoRegister)
+    return AMDGPU::NoRegister;
+
+  if (ST.hasSGPRInitBug() ||
+      ScratchRsrcReg != TRI->reservedPrivateSegmentBufferReg(MF))
+    return ScratchRsrcReg;
+
+  // We reserved the last registers for this. Shift it down to the end of those
+  // which were actually used.
+  //
+  // FIXME: It might be safer to use a pseudoregister before replacement.
+
+  // FIXME: We should be able to eliminate unused input registers. We only
+  // cannot do this for the resources required for scratch access. For now we
+  // skip over user SGPRs and may leave unused holes.
+
+  // We find the resource first because it has an alignment requirement.
+
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  unsigned NumPreloaded = (MFI->getNumPreloadedSGPRs() + 3) / 4;
+  ArrayRef<MCPhysReg> AllSGPR128s = getAllSGPR128(MF, TRI);
+  AllSGPR128s = AllSGPR128s.slice(std::min(static_cast<unsigned>(AllSGPR128s.size()), NumPreloaded));
+
+  // Skip the last 2 elements because the last one is reserved for VCC, and
+  // this is the 2nd to last element already.
+  for (MCPhysReg Reg : AllSGPR128s) {
+    // Pick the first unallocated one. Make sure we don't clobber the other
+    // reserved input we needed.
+    if (!MRI.isPhysRegUsed(Reg) && MRI.isAllocatable(Reg)) {
+      //assert(MRI.isAllocatable(Reg));
+      MRI.replaceRegWith(ScratchRsrcReg, Reg);
+      MFI->setScratchRSrcReg(Reg);
+      return Reg;
+    }
+  }
+
+  return ScratchRsrcReg;
+}
+
+unsigned SIFrameLowering::getReservedPrivateSegmentWaveByteOffsetReg(
+  const SISubtarget &ST,
+  const SIInstrInfo *TII,
+  const SIRegisterInfo *TRI,
+  SIMachineFunctionInfo *MFI,
+  MachineFunction &MF) const {
+  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
+  if (ST.hasSGPRInitBug() ||
+      ScratchWaveOffsetReg != TRI->reservedPrivateSegmentWaveByteOffsetReg(MF))
+    return ScratchWaveOffsetReg;
+
+  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+
+  unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
+
+  ArrayRef<MCPhysReg> AllSGPRs = getAllSGPRs(MF, TRI);
+  if (NumPreloaded > AllSGPRs.size())
+    return ScratchWaveOffsetReg;
+
+  AllSGPRs = AllSGPRs.slice(NumPreloaded);
+
+  // We need to drop register from the end of the list that we cannot use
+  // for the scratch wave offset.
+  // + 2 s102 and s103 do not exist on VI.
+  // + 2 for vcc
+  // + 2 for xnack_mask
+  // + 2 for flat_scratch
+  // + 4 for registers reserved for scratch resource register
+  // + 1 for register reserved for scratch wave offset.  (By exluding this
+  //     register from the list to consider, it means that when this
+  //     register is being used for the scratch wave offset and there
+  //     are no other free SGPRs, then the value will stay in this register.
+  // ----
+  //  13
+  if (AllSGPRs.size() < 13)
+    return ScratchWaveOffsetReg;
+
+  for (MCPhysReg Reg : AllSGPRs.drop_back(13)) {
+    // Pick the first unallocated SGPR. Be careful not to pick an alias of the
+    // scratch descriptor, since we haven’t added its uses yet.
+    if (!MRI.isPhysRegUsed(Reg)) {
+      if (!MRI.isAllocatable(Reg) ||
+          TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
+        continue;
+
+      MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
+      MFI->setScratchWaveOffsetReg(Reg);
+      return Reg;
+    }
+  }
+
+  return ScratchWaveOffsetReg;
 }
 
 void SIFrameLowering::emitPrologue(MachineFunction &MF,
@@ -45,9 +193,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   if (ST.debuggerEmitPrologue())
     emitDebuggerPrologue(MF, MBB);
 
-  if (!MF.getFrameInfo()->hasStackObjects())
-    return;
-
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
 
   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -57,186 +202,111 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
   //
   // FIXME: We should be cleaning up these unused SGPR spill frame indices
   // somewhere.
-  if (hasOnlySGPRSpills(MFI, MF.getFrameInfo()))
-    return;
 
   const SIInstrInfo *TII = ST.getInstrInfo();
   const SIRegisterInfo *TRI = &TII->getRegisterInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  MachineBasicBlock::iterator I = MBB.begin();
 
-  // We need to insert initialization of the scratch resource descriptor.
-  unsigned ScratchRsrcReg = MFI->getScratchRSrcReg();
-  assert(ScratchRsrcReg != AMDGPU::NoRegister);
-
-  unsigned ScratchWaveOffsetReg = MFI->getScratchWaveOffsetReg();
-  assert(ScratchWaveOffsetReg != AMDGPU::NoRegister);
+  unsigned ScratchRsrcReg
+    = getReservedPrivateSegmentBufferReg(ST, TII, TRI, MFI, MF);
+  unsigned ScratchWaveOffsetReg
+    = getReservedPrivateSegmentWaveByteOffsetReg(ST, TII, TRI, MFI, MF);
 
-  unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
-    MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
-
-  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
-  if (ST.isAmdHsaOS()) {
-    PreloadedPrivateBufferReg = TRI->getPreloadedValue(
-      MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
+  if (ScratchRsrcReg == AMDGPU::NoRegister) {
+    assert(ScratchWaveOffsetReg == AMDGPU::NoRegister);
+    return;
   }
 
-  if (MFI->hasFlatScratchInit()) {
-    // We don't need this if we only have spills since there is no user facing
-    // scratch.
-
-    // TODO: If we know we don't have flat instructions earlier, we can omit
-    // this from the input registers.
-    //
-    // TODO: We only need to know if we access scratch space through a flat
-    // pointer. Because we only detect if flat instructions are used at all,
-    // this will be used more often than necessary on VI.
-
-    // Debug location must be unknown since the first debug location is used to
-    // determine the end of the prologue.
-    DebugLoc DL;
-
-    unsigned FlatScratchInitReg
-      = TRI->getPreloadedValue(MF, SIRegisterInfo::FLAT_SCRATCH_INIT);
+  assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
 
-    MRI.addLiveIn(FlatScratchInitReg);
-    MBB.addLiveIn(FlatScratchInitReg);
+  // We need to do the replacement of the private segment buffer and wave offset
+  // register even if there are no stack objects. There could be stores to undef
+  // or a constant without an associated object.
 
-    // Copy the size in bytes.
-    unsigned FlatScrInitHi = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub1);
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::FLAT_SCR_LO)
-      .addReg(FlatScrInitHi, RegState::Kill);
+  // FIXME: We still have implicit uses on SGPR spill instructions in case they
+  // need to spill to vector memory. It's likely that will not happen, but at
+  // this point it appears we need the setup. This part of the prolog should be
+  // emitted after frame indices are eliminated.
 
-    unsigned FlatScrInitLo = TRI->getSubReg(FlatScratchInitReg, AMDGPU::sub0);
+  if (MF.getFrameInfo().hasStackObjects() && MFI->hasFlatScratchInit())
+    emitFlatScratchInit(TII, TRI, MF, MBB);
 
-    // Add wave offset in bytes to private base offset.
-    // See comment in AMDKernelCodeT.h for enable_sgpr_flat_scratch_init.
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_U32), FlatScrInitLo)
-      .addReg(FlatScrInitLo)
-      .addReg(ScratchWaveOffsetReg);
+  // We need to insert initialization of the scratch resource descriptor.
+  unsigned PreloadedScratchWaveOffsetReg = TRI->getPreloadedValue(
+    MF, SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET);
 
-    // Convert offset to 256-byte units.
-    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_LSHR_B32), AMDGPU::FLAT_SCR_HI)
-      .addReg(FlatScrInitLo, RegState::Kill)
-      .addImm(8);
-  }
 
-  // If we reserved the original input registers, we don't need to copy to the
-  // reserved registers.
-  if (ScratchRsrcReg == PreloadedPrivateBufferReg) {
-    // We should always reserve these 5 registers at the same time.
-    assert(ScratchWaveOffsetReg == PreloadedScratchWaveOffsetReg &&
-           "scratch wave offset and private segment buffer inconsistent");
-    return;
+  unsigned PreloadedPrivateBufferReg = AMDGPU::NoRegister;
+  if (ST.isAmdCodeObjectV2()) {
+    PreloadedPrivateBufferReg = TRI->getPreloadedValue(
+      MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
   }
 
+  bool OffsetRegUsed = !MRI.use_empty(ScratchWaveOffsetReg);
+  bool ResourceRegUsed = !MRI.use_empty(ScratchRsrcReg);
 
   // We added live-ins during argument lowering, but since they were not used
   // they were deleted. We're adding the uses now, so add them back.
-  MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
-  MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
+  if (OffsetRegUsed) {
+    assert(PreloadedScratchWaveOffsetReg != AMDGPU::NoRegister &&
+           "scratch wave offset input is required");
+    MRI.addLiveIn(PreloadedScratchWaveOffsetReg);
+    MBB.addLiveIn(PreloadedScratchWaveOffsetReg);
+  }
 
-  if (ST.isAmdHsaOS()) {
+  if (ResourceRegUsed && PreloadedPrivateBufferReg != AMDGPU::NoRegister) {
+    assert(ST.isAmdCodeObjectV2());
     MRI.addLiveIn(PreloadedPrivateBufferReg);
     MBB.addLiveIn(PreloadedPrivateBufferReg);
   }
 
-  if (!ST.hasSGPRInitBug()) {
-    // We reserved the last registers for this. Shift it down to the end of those
-    // which were actually used.
-    //
-    // FIXME: It might be safer to use a pseudoregister before replacement.
-
-    // FIXME: We should be able to eliminate unused input registers. We only
-    // cannot do this for the resources required for scratch access. For now we
-    // skip over user SGPRs and may leave unused holes.
-
-    // We find the resource first because it has an alignment requirement.
-    if (ScratchRsrcReg == TRI->reservedPrivateSegmentBufferReg(MF)) {
-      MachineRegisterInfo &MRI = MF.getRegInfo();
-
-      unsigned NumPreloaded = MFI->getNumPreloadedSGPRs() / 4;
-      // Skip the last 2 elements because the last one is reserved for VCC, and
-      // this is the 2nd to last element already.
-      for (MCPhysReg Reg : getAllSGPR128().drop_back(2).slice(NumPreloaded)) {
-        // Pick the first unallocated one. Make sure we don't clobber the other
-        // reserved input we needed.
-        if (!MRI.isPhysRegUsed(Reg)) {
-          assert(MRI.isAllocatable(Reg));
-          MRI.replaceRegWith(ScratchRsrcReg, Reg);
-          ScratchRsrcReg = Reg;
-          MFI->setScratchRSrcReg(ScratchRsrcReg);
-          break;
-        }
-      }
-    }
+  // Make the register selected live throughout the function.
+  for (MachineBasicBlock &OtherBB : MF) {
+    if (&OtherBB == &MBB)
+      continue;
 
-    if (ScratchWaveOffsetReg == TRI->reservedPrivateSegmentWaveByteOffsetReg(MF)) {
-      MachineRegisterInfo &MRI = MF.getRegInfo();
-      unsigned NumPreloaded = MFI->getNumPreloadedSGPRs();
-
-      // We need to drop register from the end of the list that we cannot use
-      // for the scratch wave offset.
-      // + 2 s102 and s103 do not exist on VI.
-      // + 2 for vcc
-      // + 2 for xnack_mask
-      // + 2 for flat_scratch
-      // + 4 for registers reserved for scratch resource register
-      // + 1 for register reserved for scratch wave offset.  (By exluding this
-      //     register from the list to consider, it means that when this
-      //     register is being used for the scratch wave offset and there
-      //     are no other free SGPRs, then the value will stay in this register.
-      // ----
-      //  13
-      for (MCPhysReg Reg : getAllSGPRs().drop_back(13).slice(NumPreloaded)) {
-        // Pick the first unallocated SGPR. Be careful not to pick an alias of the
-        // scratch descriptor, since we haven’t added its uses yet.
-        if (!MRI.isPhysRegUsed(Reg)) {
-          if (!MRI.isAllocatable(Reg) ||
-              TRI->isSubRegisterEq(ScratchRsrcReg, Reg))
-            continue;
-
-          MRI.replaceRegWith(ScratchWaveOffsetReg, Reg);
-          ScratchWaveOffsetReg = Reg;
-          MFI->setScratchWaveOffsetReg(ScratchWaveOffsetReg);
-          break;
-        }
-      }
-    }
+    if (OffsetRegUsed)
+      OtherBB.addLiveIn(ScratchWaveOffsetReg);
+
+    if (ResourceRegUsed)
+      OtherBB.addLiveIn(ScratchRsrcReg);
   }
 
+  DebugLoc DL;
+  MachineBasicBlock::iterator I = MBB.begin();
 
-  assert(!TRI->isSubRegister(ScratchRsrcReg, ScratchWaveOffsetReg));
+  // If we reserved the original input registers, we don't need to copy to the
+  // reserved registers.
 
-  const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
-  DebugLoc DL;
+  bool CopyBuffer = ResourceRegUsed &&
+    PreloadedPrivateBufferReg != AMDGPU::NoRegister &&
+    ScratchRsrcReg != PreloadedPrivateBufferReg;
+
+  // This needs to be careful of the copying order to avoid overwriting one of
+  // the input registers before it's been copied to it's final
+  // destination. Usually the offset should be copied first.
+  bool CopyBufferFirst = TRI->isSubRegisterEq(PreloadedPrivateBufferReg,
+                                              ScratchWaveOffsetReg);
+  if (CopyBuffer && CopyBufferFirst) {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
+      .addReg(PreloadedPrivateBufferReg, RegState::Kill);
+  }
 
-  if (PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
-    // Make sure we emit the copy for the offset first. We may have chosen to copy
-    // the buffer resource into a register that aliases the input offset register.
-    BuildMI(MBB, I, DL, SMovB32, ScratchWaveOffsetReg)
+  if (OffsetRegUsed &&
+      PreloadedScratchWaveOffsetReg != ScratchWaveOffsetReg) {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchWaveOffsetReg)
       .addReg(PreloadedScratchWaveOffsetReg, RegState::Kill);
   }
 
-  if (ST.isAmdHsaOS()) {
-    // Insert copies from argument register.
-    assert(
-      !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchRsrcReg) &&
-      !TRI->isSubRegisterEq(PreloadedPrivateBufferReg, ScratchWaveOffsetReg));
-
-    unsigned Rsrc01 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0_sub1);
-    unsigned Rsrc23 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2_sub3);
-
-    unsigned Lo = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub0_sub1);
-    unsigned Hi = TRI->getSubReg(PreloadedPrivateBufferReg, AMDGPU::sub2_sub3);
+  if (CopyBuffer && !CopyBufferFirst) {
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), ScratchRsrcReg)
+      .addReg(PreloadedPrivateBufferReg, RegState::Kill);
+  }
 
-    const MCInstrDesc &SMovB64 = TII->get(AMDGPU::S_MOV_B64);
+  if (ResourceRegUsed && PreloadedPrivateBufferReg == AMDGPU::NoRegister) {
+    assert(!ST.isAmdCodeObjectV2());
+    const MCInstrDesc &SMovB32 = TII->get(AMDGPU::S_MOV_B32);
 
-    BuildMI(MBB, I, DL, SMovB64, Rsrc01)
-      .addReg(Lo, RegState::Kill);
-    BuildMI(MBB, I, DL, SMovB64, Rsrc23)
-      .addReg(Hi, RegState::Kill);
-  } else {
     unsigned Rsrc0 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub0);
     unsigned Rsrc1 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub1);
     unsigned Rsrc2 = TRI->getSubReg(ScratchRsrcReg, AMDGPU::sub2);
@@ -260,15 +330,6 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
       .addImm(Rsrc23 >> 32)
       .addReg(ScratchRsrcReg, RegState::ImplicitDefine);
   }
-
-  // Make the register selected live throughout the function.
-  for (MachineBasicBlock &OtherBB : MF) {
-    if (&OtherBB == &MBB)
-      continue;
-
-    OtherBB.addLiveIn(ScratchRsrcReg);
-    OtherBB.addLiveIn(ScratchWaveOffsetReg);
-  }
 }
 
 void SIFrameLowering::emitEpilogue(MachineFunction &MF,
@@ -279,20 +340,20 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
 void SIFrameLowering::processFunctionBeforeFrameFinalized(
   MachineFunction &MF,
   RegScavenger *RS) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  if (!MFI->hasStackObjects())
+  if (!MFI.hasStackObjects())
     return;
 
-  bool MayNeedScavengingEmergencySlot = MFI->hasStackObjects();
+  bool MayNeedScavengingEmergencySlot = MFI.hasStackObjects();
 
   assert((RS || !MayNeedScavengingEmergencySlot) &&
          "RegScavenger required if spilling");
 
   if (MayNeedScavengingEmergencySlot) {
-    int ScavengeFI = MFI->CreateSpillStackObject(
+    int ScavengeFI = MFI.CreateStackObject(
       AMDGPU::SGPR_32RegClass.getSize(),
-      AMDGPU::SGPR_32RegClass.getAlignment());
+      AMDGPU::SGPR_32RegClass.getAlignment(), false);
     RS->addScavengingFrameIndex(ScavengeFI);
   }
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
index 37417d098f31..7657b4e03864 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIFrameLowering.h
@@ -14,12 +14,17 @@
 
 namespace llvm {
 
+class SIInstrInfo;
+class SIMachineFunctionInfo;
+class SIRegisterInfo;
+class SISubtarget;
+
 class SIFrameLowering final : public AMDGPUFrameLowering {
 public:
   SIFrameLowering(StackDirection D, unsigned StackAl, int LAO,
                   unsigned TransAl = 1) :
     AMDGPUFrameLowering(D, StackAl, LAO, TransAl) {}
-  ~SIFrameLowering() override {}
+  ~SIFrameLowering() override = default;
 
   void emitPrologue(MachineFunction &MF,
                     MachineBasicBlock &MBB) const override;
@@ -31,10 +36,29 @@ public:
     RegScavenger *RS = nullptr) const override;
 
 private:
+  void emitFlatScratchInit(const SIInstrInfo *TII,
+                           const SIRegisterInfo* TRI,
+                           MachineFunction &MF,
+                           MachineBasicBlock &MBB) const;
+
+  unsigned getReservedPrivateSegmentBufferReg(
+    const SISubtarget &ST,
+    const SIInstrInfo *TII,
+    const SIRegisterInfo *TRI,
+    SIMachineFunctionInfo *MFI,
+    MachineFunction &MF) const;
+
+  unsigned getReservedPrivateSegmentWaveByteOffsetReg(
+    const SISubtarget &ST,
+    const SIInstrInfo *TII,
+    const SIRegisterInfo *TRI,
+    SIMachineFunctionInfo *MFI,
+    MachineFunction &MF) const;
+
   /// \brief Emits debugger prologue.
   void emitDebuggerPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const;
 };
 
-}
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_AMDGPU_SIFRAMELOWERING_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 80d44351267d..fa53831cbe16 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -21,6 +21,7 @@
 #include "AMDGPU.h"
 #include "AMDGPUIntrinsicInfo.h"
 #include "AMDGPUSubtarget.h"
+#include "SIDefines.h"
 #include "SIISelLowering.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
@@ -31,17 +32,18 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/Analysis.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Function.h"
 
 using namespace llvm;
 
-// -amdgpu-fast-fdiv - Command line option to enable faster 2.5 ulp fdiv.
-static cl::opt<bool> EnableAMDGPUFastFDIV(
-  "amdgpu-fast-fdiv",
-  cl::desc("Enable faster 2.5 ulp fdiv"),
+static cl::opt<bool> EnableVGPRIndexMode(
+  "amdgpu-vgpr-index-mode",
+  cl::desc("Use GPR indexing mode instead of movrel for vector indexing"),
   cl::init(false));
 
+
 static unsigned findFirstFreeSGPR(CCState &CCInfo) {
   unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
   for (unsigned Reg = 0; Reg < NumSGPRs; ++Reg) {
@@ -58,7 +60,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   addRegisterClass(MVT::i1, &AMDGPU::VReg_1RegClass);
   addRegisterClass(MVT::i64, &AMDGPU::SReg_64RegClass);
 
-  addRegisterClass(MVT::i32, &AMDGPU::SReg_32RegClass);
+  addRegisterClass(MVT::i32, &AMDGPU::SReg_32_XM0RegClass);
   addRegisterClass(MVT::f32, &AMDGPU::VGPR_32RegClass);
 
   addRegisterClass(MVT::f64, &AMDGPU::VReg_64RegClass);
@@ -77,6 +79,11 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   addRegisterClass(MVT::v16i32, &AMDGPU::SReg_512RegClass);
   addRegisterClass(MVT::v16f32, &AMDGPU::VReg_512RegClass);
 
+  if (Subtarget->has16BitInsts()) {
+    addRegisterClass(MVT::i16, &AMDGPU::SReg_32_XM0RegClass);
+    addRegisterClass(MVT::f16, &AMDGPU::SReg_32_XM0RegClass);
+  }
+
   computeRegisterProperties(STI.getRegisterInfo());
 
   // We need to custom lower vector stores from local memory
@@ -94,7 +101,6 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
   setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
-  setOperationAction(ISD::FrameIndex, MVT::i32, Custom);
   setOperationAction(ISD::ConstantPool, MVT::v2i64, Expand);
 
   setOperationAction(ISD::SELECT, MVT::i1, Promote);
@@ -111,6 +117,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SETCC, MVT::i1, Promote);
   setOperationAction(ISD::SETCC, MVT::v2i1, Expand);
   setOperationAction(ISD::SETCC, MVT::v4i1, Expand);
+  AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
 
   setOperationAction(ISD::TRUNCATE, MVT::v2i32, Expand);
   setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand);
@@ -159,6 +166,9 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
     }
   }
 
+  // TODO: For dynamic 64-bit vector inserts/extracts, should emit a pseudo that
+  // is expanded to avoid having two separate loops in case the index is a VGPR.
+
   // Most operations are naturally 32-bit vector operations. We only support
   // load and store of i64 vectors, so promote v2i64 vector operations to v4i32.
   for (MVT Vec64 : { MVT::v2i64, MVT::v2f64 }) {
@@ -218,6 +228,83 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::FDIV, MVT::f32, Custom);
   setOperationAction(ISD::FDIV, MVT::f64, Custom);
 
+  if (Subtarget->has16BitInsts()) {
+    setOperationAction(ISD::Constant, MVT::i16, Legal);
+
+    setOperationAction(ISD::SMIN, MVT::i16, Legal);
+    setOperationAction(ISD::SMAX, MVT::i16, Legal);
+
+    setOperationAction(ISD::UMIN, MVT::i16, Legal);
+    setOperationAction(ISD::UMAX, MVT::i16, Legal);
+
+    setOperationAction(ISD::SIGN_EXTEND, MVT::i16, Promote);
+    AddPromotedToType(ISD::SIGN_EXTEND, MVT::i16, MVT::i32);
+
+    setOperationAction(ISD::ROTR, MVT::i16, Promote);
+    setOperationAction(ISD::ROTL, MVT::i16, Promote);
+
+    setOperationAction(ISD::SDIV, MVT::i16, Promote);
+    setOperationAction(ISD::UDIV, MVT::i16, Promote);
+    setOperationAction(ISD::SREM, MVT::i16, Promote);
+    setOperationAction(ISD::UREM, MVT::i16, Promote);
+
+    setOperationAction(ISD::BSWAP, MVT::i16, Promote);
+    setOperationAction(ISD::BITREVERSE, MVT::i16, Promote);
+
+    setOperationAction(ISD::CTTZ, MVT::i16, Promote);
+    setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16, Promote);
+    setOperationAction(ISD::CTLZ, MVT::i16, Promote);
+    setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i16, Promote);
+
+    setOperationAction(ISD::SELECT_CC, MVT::i16, Expand);
+
+    setOperationAction(ISD::BR_CC, MVT::i16, Expand);
+
+    setOperationAction(ISD::LOAD, MVT::i16, Custom);
+
+    setTruncStoreAction(MVT::i64, MVT::i16, Expand);
+
+    setOperationAction(ISD::FP16_TO_FP, MVT::i16, Promote);
+    AddPromotedToType(ISD::FP16_TO_FP, MVT::i16, MVT::i32);
+    setOperationAction(ISD::FP_TO_FP16, MVT::i16, Promote);
+    AddPromotedToType(ISD::FP_TO_FP16, MVT::i16, MVT::i32);
+
+    setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
+    setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+
+    // F16 - Constant Actions.
+    setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
+
+    // F16 - Load/Store Actions.
+    setOperationAction(ISD::LOAD, MVT::f16, Promote);
+    AddPromotedToType(ISD::LOAD, MVT::f16, MVT::i16);
+    setOperationAction(ISD::STORE, MVT::f16, Promote);
+    AddPromotedToType(ISD::STORE, MVT::f16, MVT::i16);
+
+    // F16 - VOP1 Actions.
+    setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
+    setOperationAction(ISD::FCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FSIN, MVT::f16, Promote);
+    setOperationAction(ISD::FP_TO_SINT, MVT::f16, Promote);
+    setOperationAction(ISD::FP_TO_UINT, MVT::f16, Promote);
+    setOperationAction(ISD::SINT_TO_FP, MVT::f16, Promote);
+    setOperationAction(ISD::UINT_TO_FP, MVT::f16, Promote);
+
+    // F16 - VOP2 Actions.
+    setOperationAction(ISD::BR_CC, MVT::f16, Expand);
+    setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
+    setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
+    setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
+    setOperationAction(ISD::FDIV, MVT::f16, Custom);
+
+    // F16 - VOP3 Actions.
+    setOperationAction(ISD::FMA, MVT::f16, Legal);
+    if (!Subtarget->hasFP16Denormals())
+      setOperationAction(ISD::FMAD, MVT::f16, Legal);
+  }
+
   setTargetDAGCombine(ISD::FADD);
   setTargetDAGCombine(ISD::FSUB);
   setTargetDAGCombine(ISD::FMINNUM);
@@ -229,6 +316,8 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::SETCC);
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
+  setTargetDAGCombine(ISD::XOR);
+  setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::UINT_TO_FP);
   setTargetDAGCombine(ISD::FCANONICALIZE);
 
@@ -357,6 +446,7 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
   case AMDGPUAS::CONSTANT_ADDRESS: {
     // If the offset isn't a multiple of 4, it probably isn't going to be
     // correctly aligned.
+    // FIXME: Can we get the real alignment here?
     if (AM.BaseOffs % 4 != 0)
       return isLegalMUBUFAddressingMode(AM);
 
@@ -435,8 +525,12 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
 
   // TODO: I think v3i32 should allow unaligned accesses on CI with DS_READ_B96,
   // which isn't a simple VT.
-  if (!VT.isSimple() || VT == MVT::Other)
+  // Until MVT is extended to handle this, simply check for the size and
+  // rely on the condition below: allow accesses if the size is a multiple of 4.
+  if (VT == MVT::Other || (VT != MVT::Other && VT.getSizeInBits() > 1024 &&
+                           VT.getStoreSize() > 16)) {
     return false;
+  }
 
   if (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
       AddrSpace == AMDGPUAS::REGION_ADDRESS) {
@@ -450,6 +544,15 @@ bool SITargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
     return AlignedBy4;
   }
 
+  // FIXME: We have to be conservative here and assume that flat operations
+  // will access scratch.  If we had access to the IR function, then we
+  // could determine if any private memory was used in the function.
+  if (!Subtarget->hasUnalignedScratchAccess() &&
+      (AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
+       AddrSpace == AMDGPUAS::FLAT_ADDRESS)) {
+    return false;
+  }
+
   if (Subtarget->hasUnalignedBufferAccess()) {
     // If we have an uniform constant load, it still requires using a slow
     // buffer instruction if unaligned.
@@ -496,8 +599,8 @@ EVT SITargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
 
 static bool isFlatGlobalAddrSpace(unsigned AS) {
   return AS == AMDGPUAS::GLOBAL_ADDRESS ||
-    AS == AMDGPUAS::FLAT_ADDRESS ||
-    AS == AMDGPUAS::CONSTANT_ADDRESS;
+         AS == AMDGPUAS::FLAT_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS;
 }
 
 bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
@@ -505,6 +608,23 @@ bool SITargetLowering::isNoopAddrSpaceCast(unsigned SrcAS,
   return isFlatGlobalAddrSpace(SrcAS) && isFlatGlobalAddrSpace(DestAS);
 }
 
+bool SITargetLowering::isMemOpHasNoClobberedMemOperand(const SDNode *N) const {
+  const MemSDNode *MemNode = cast<MemSDNode>(N);
+  const Value *Ptr = MemNode->getMemOperand()->getValue();
+  const Instruction *I = dyn_cast<Instruction>(Ptr);
+  return I && I->getMetadata("amdgpu.noclobber");
+}
+
+bool SITargetLowering::isCheapAddrSpaceCast(unsigned SrcAS,
+                                            unsigned DestAS) const {
+  // Flat -> private/local is a simple truncate.
+  // Flat -> global is no-op
+  if (SrcAS == AMDGPUAS::FLAT_ADDRESS)
+    return true;
+
+  return isNoopAddrSpaceCast(SrcAS, DestAS);
+}
+
 bool SITargetLowering::isMemOpUniform(const SDNode *N) const {
   const MemSDNode *MemNode = cast<MemSDNode>(N);
   const Value *Ptr = MemNode->getMemOperand()->getValue();
@@ -531,11 +651,27 @@ SITargetLowering::getPreferredVectorAction(EVT VT) const {
 
 bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                                          Type *Ty) const {
-  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
-  return TII->isInlineConstant(Imm);
+  // FIXME: Could be smarter if called for vector constants.
+  return true;
 }
 
 bool SITargetLowering::isTypeDesirableForOp(unsigned Op, EVT VT) const {
+  if (Subtarget->has16BitInsts() && VT == MVT::i16) {
+    switch (Op) {
+    case ISD::LOAD:
+    case ISD::STORE:
+
+    // These operations are done with 32-bit instructions anyway.
+    case ISD::AND:
+    case ISD::OR:
+    case ISD::XOR:
+    case ISD::SELECT:
+      // TODO: Extensions?
+      return true;
+    default:
+      return false;
+    }
+  }
 
   // SimplifySetCC uses this function to determine whether or not it should
   // create setcc with i1 operands.  We don't have instructions for i1 setcc.
@@ -560,26 +696,37 @@ SDValue SITargetLowering::LowerParameterPtr(SelectionDAG &DAG,
   return DAG.getNode(ISD::ADD, SL, PtrVT, BasePtr,
                      DAG.getConstant(Offset, SL, PtrVT));
 }
+
 SDValue SITargetLowering::LowerParameter(SelectionDAG &DAG, EVT VT, EVT MemVT,
                                          const SDLoc &SL, SDValue Chain,
                                          unsigned Offset, bool Signed) const {
   const DataLayout &DL = DAG.getDataLayout();
-  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
-  MVT PtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
+  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
   PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
-  SDValue PtrOffset = DAG.getUNDEF(PtrVT);
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
 
   unsigned Align = DL.getABITypeAlignment(Ty);
 
-  ISD::LoadExtType ExtTy = Signed ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
+  SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
+  SDValue Load = DAG.getLoad(MemVT, SL, Chain, Ptr, PtrInfo, Align,
+                             MachineMemOperand::MONonTemporal |
+                             MachineMemOperand::MODereferenceable |
+                             MachineMemOperand::MOInvariant);
+
+  SDValue Val;
   if (MemVT.isFloatingPoint())
-    ExtTy = ISD::EXTLOAD;
+    Val = getFPExtOrFPTrunc(DAG, Load, SL, VT);
+  else if (Signed)
+    Val = DAG.getSExtOrTrunc(Load, SL, VT);
+  else
+    Val = DAG.getZExtOrTrunc(Load, SL, VT);
+
+  SDValue Ops[] = {
+    Val,
+    Load.getValue(1)
+  };
 
-  SDValue Ptr = LowerParameterPtr(DAG, SL, Chain, Offset);
-  return DAG.getLoad(ISD::UNINDEXED, ExtTy, VT, SL, Chain, Ptr, PtrOffset,
-                     PtrInfo, MemVT, Align, MachineMemOperand::MONonTemporal |
-                                                MachineMemOperand::MOInvariant);
+  return DAG.getMergeValues(Ops, SL);
 }
 
 SDValue SITargetLowering::LowerFormalArguments(
@@ -679,9 +826,6 @@ SDValue SITargetLowering::LowerFormalArguments(
   }
 
   if (!AMDGPU::isShader(CallConv)) {
-    getOriginalFunctionArgs(DAG, DAG.getMachineFunction().getFunction(), Ins,
-                            Splits);
-
     assert(Info->hasWorkGroupIDX() && Info->hasWorkItemIDX());
   } else {
     assert(!Info->hasPrivateSegmentBuffer() && !Info->hasDispatchPtr() &&
@@ -701,29 +845,38 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   if (Info->hasDispatchPtr()) {
     unsigned DispatchPtrReg = Info->addDispatchPtr(*TRI);
-    MF.addLiveIn(DispatchPtrReg, &AMDGPU::SReg_64RegClass);
+    MF.addLiveIn(DispatchPtrReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(DispatchPtrReg);
   }
 
   if (Info->hasQueuePtr()) {
     unsigned QueuePtrReg = Info->addQueuePtr(*TRI);
-    MF.addLiveIn(QueuePtrReg, &AMDGPU::SReg_64RegClass);
+    MF.addLiveIn(QueuePtrReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(QueuePtrReg);
   }
 
   if (Info->hasKernargSegmentPtr()) {
     unsigned InputPtrReg = Info->addKernargSegmentPtr(*TRI);
-    MF.addLiveIn(InputPtrReg, &AMDGPU::SReg_64RegClass);
+    MF.addLiveIn(InputPtrReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(InputPtrReg);
   }
 
+  if (Info->hasDispatchID()) {
+    unsigned DispatchIDReg = Info->addDispatchID(*TRI);
+    MF.addLiveIn(DispatchIDReg, &AMDGPU::SGPR_64RegClass);
+    CCInfo.AllocateReg(DispatchIDReg);
+  }
+
   if (Info->hasFlatScratchInit()) {
     unsigned FlatScratchInitReg = Info->addFlatScratchInit(*TRI);
-    MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SReg_64RegClass);
+    MF.addLiveIn(FlatScratchInitReg, &AMDGPU::SGPR_64RegClass);
     CCInfo.AllocateReg(FlatScratchInitReg);
   }
 
-  AnalyzeFormalArguments(CCInfo, Splits);
+  if (!AMDGPU::isShader(CallConv))
+    analyzeFormalArgumentsCompute(CCInfo, Ins);
+  else
+    AnalyzeFormalArguments(CCInfo, Splits);
 
   SmallVector<SDValue, 16> Chains;
 
@@ -740,7 +893,7 @@ SDValue SITargetLowering::LowerFormalArguments(
 
     if (VA.isMemLoc()) {
       VT = Ins[i].VT;
-      EVT MemVT = Splits[i].VT;
+      EVT MemVT = VA.getLocVT();
       const unsigned Offset = Subtarget->getExplicitKernelArgOffset() +
                               VA.getLocMemOffset();
       // The first 36 bytes of the input buffer contains information about
@@ -761,7 +914,7 @@ SDValue SITargetLowering::LowerFormalArguments(
       }
 
       InVals.push_back(Arg);
-      Info->ABIArgOffset = Offset + MemVT.getStoreSize();
+      Info->setABIArgOffset(Offset + MemVT.getStoreSize());
       continue;
     }
     assert(VA.isRegLoc() && "Parameter must be in a register!");
@@ -771,8 +924,8 @@ SDValue SITargetLowering::LowerFormalArguments(
     if (VT == MVT::i64) {
       // For now assume it is a pointer
       Reg = TRI->getMatchingSuperReg(Reg, AMDGPU::sub0,
-                                     &AMDGPU::SReg_64RegClass);
-      Reg = MF.addLiveIn(Reg, &AMDGPU::SReg_64RegClass);
+                                     &AMDGPU::SGPR_64RegClass);
+      Reg = MF.addLiveIn(Reg, &AMDGPU::SGPR_64RegClass);
       SDValue Copy = DAG.getCopyFromReg(Chain, DL, Reg, VT);
       InVals.push_back(Copy);
       continue;
@@ -816,25 +969,25 @@ SDValue SITargetLowering::LowerFormalArguments(
   // Start adding system SGPRs.
   if (Info->hasWorkGroupIDX()) {
     unsigned Reg = Info->addWorkGroupIDX();
-    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
     CCInfo.AllocateReg(Reg);
   }
 
   if (Info->hasWorkGroupIDY()) {
     unsigned Reg = Info->addWorkGroupIDY();
-    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
     CCInfo.AllocateReg(Reg);
   }
 
   if (Info->hasWorkGroupIDZ()) {
     unsigned Reg = Info->addWorkGroupIDZ();
-    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
     CCInfo.AllocateReg(Reg);
   }
 
   if (Info->hasWorkGroupInfo()) {
     unsigned Reg = Info->addWorkGroupInfo();
-    MF.addLiveIn(Reg, &AMDGPU::SReg_32RegClass);
+    MF.addLiveIn(Reg, &AMDGPU::SReg_32_XM0RegClass);
     CCInfo.AllocateReg(Reg);
   }
 
@@ -854,18 +1007,22 @@ SDValue SITargetLowering::LowerFormalArguments(
 
   // Now that we've figured out where the scratch register inputs are, see if
   // should reserve the arguments and use them directly.
-  bool HasStackObjects = MF.getFrameInfo()->hasStackObjects();
+  bool HasStackObjects = MF.getFrameInfo().hasStackObjects();
   // Record that we know we have non-spill stack objects so we don't need to
   // check all stack objects later.
   if (HasStackObjects)
     Info->setHasNonSpillStackObjects(true);
 
-  if (ST.isAmdHsaOS()) {
-    // TODO: Assume we will spill without optimizations.
+  // Everything live out of a block is spilled with fast regalloc, so it's
+  // almost certain that spilling will be required.
+  if (getTargetMachine().getOptLevel() == CodeGenOpt::None)
+    HasStackObjects = true;
+
+  if (ST.isAmdCodeObjectV2()) {
     if (HasStackObjects) {
       // If we have stack objects, we unquestionably need the private buffer
-      // resource. For the HSA ABI, this will be the first 4 user SGPR
-      // inputs. We can reserve those and use them directly.
+      // resource. For the Code Object V2 ABI, this will be the first 4 user
+      // SGPR inputs. We can reserve those and use them directly.
 
       unsigned PrivateSegmentBufferReg = TRI->getPreloadedValue(
         MF, SIRegisterInfo::PRIVATE_SEGMENT_BUFFER);
@@ -1088,64 +1245,551 @@ MachineBasicBlock *SITargetLowering::splitKillBlock(MachineInstr &MI,
   MachineBasicBlock *SplitBB
     = MF->CreateMachineBasicBlock(BB->getBasicBlock());
 
-  // Fix the block phi references to point to the new block for the defs in the
-  // second piece of the block.
-  for (MachineBasicBlock *Succ : BB->successors()) {
-    for (MachineInstr &MI : *Succ) {
-      if (!MI.isPHI())
-        break;
-
-      for (unsigned I = 2, E = MI.getNumOperands(); I != E; I += 2) {
-        MachineOperand &FromBB = MI.getOperand(I);
-        if (BB == FromBB.getMBB()) {
-          FromBB.setMBB(SplitBB);
-          break;
-        }
-      }
-    }
-  }
-
   MF->insert(++MachineFunction::iterator(BB), SplitBB);
   SplitBB->splice(SplitBB->begin(), BB, SplitPoint, BB->end());
 
-  SplitBB->transferSuccessors(BB);
+  SplitBB->transferSuccessorsAndUpdatePHIs(BB);
   BB->addSuccessor(SplitBB);
 
   MI.setDesc(TII->get(AMDGPU::SI_KILL_TERMINATOR));
   return SplitBB;
 }
 
+// Do a v_movrels_b32 or v_movreld_b32 for each unique value of \p IdxReg in the
+// wavefront. If the value is uniform and just happens to be in a VGPR, this
+// will only do one iteration. In the worst case, this will loop 64 times.
+//
+// TODO: Just use v_readlane_b32 if we know the VGPR has a uniform value.
+static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
+  const SIInstrInfo *TII,
+  MachineRegisterInfo &MRI,
+  MachineBasicBlock &OrigBB,
+  MachineBasicBlock &LoopBB,
+  const DebugLoc &DL,
+  const MachineOperand &IdxReg,
+  unsigned InitReg,
+  unsigned ResultReg,
+  unsigned PhiReg,
+  unsigned InitSaveExecReg,
+  int Offset,
+  bool UseGPRIdxMode) {
+  MachineBasicBlock::iterator I = LoopBB.begin();
+
+  unsigned PhiExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned NewExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned CurrentIdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+  unsigned CondReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+  BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiReg)
+    .addReg(InitReg)
+    .addMBB(&OrigBB)
+    .addReg(ResultReg)
+    .addMBB(&LoopBB);
+
+  BuildMI(LoopBB, I, DL, TII->get(TargetOpcode::PHI), PhiExec)
+    .addReg(InitSaveExecReg)
+    .addMBB(&OrigBB)
+    .addReg(NewExec)
+    .addMBB(&LoopBB);
+
+  // Read the next variant <- also loop target.
+  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), CurrentIdxReg)
+    .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
+
+  // Compare the just read M0 value to all possible Idx values.
+  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e64), CondReg)
+    .addReg(CurrentIdxReg)
+    .addReg(IdxReg.getReg(), 0, IdxReg.getSubReg());
+
+  if (UseGPRIdxMode) {
+    unsigned IdxReg;
+    if (Offset == 0) {
+      IdxReg = CurrentIdxReg;
+    } else {
+      IdxReg = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
+      BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), IdxReg)
+        .addReg(CurrentIdxReg, RegState::Kill)
+        .addImm(Offset);
+    }
+
+    MachineInstr *SetIdx =
+      BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_IDX))
+      .addReg(IdxReg, RegState::Kill);
+    SetIdx->getOperand(2).setIsUndef();
+  } else {
+    // Move index from VCC into M0
+    if (Offset == 0) {
+      BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+        .addReg(CurrentIdxReg, RegState::Kill);
+    } else {
+      BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+        .addReg(CurrentIdxReg, RegState::Kill)
+        .addImm(Offset);
+    }
+  }
+
+  // Update EXEC, save the original EXEC value to VCC.
+  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), NewExec)
+    .addReg(CondReg, RegState::Kill);
+
+  MRI.setSimpleHint(NewExec, CondReg);
+
+  // Update EXEC, switch all done bits to 0 and all todo bits to 1.
+  MachineInstr *InsertPt =
+    BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC)
+    .addReg(NewExec);
+
+  // XXX - s_xor_b64 sets scc to 1 if the result is nonzero, so can we use
+  // s_cbranch_scc0?
+
+  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover.
+  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+    .addMBB(&LoopBB);
+
+  return InsertPt->getIterator();
+}
+
+// This has slightly sub-optimal regalloc when the source vector is killed by
+// the read. The register allocator does not understand that the kill is
+// per-workitem, so is kept alive for the whole loop so we end up not re-using a
+// subregister from it, using 1 more VGPR than necessary. This was saved when
+// this was expanded after register allocation.
+static MachineBasicBlock::iterator loadM0FromVGPR(const SIInstrInfo *TII,
+                                                  MachineBasicBlock &MBB,
+                                                  MachineInstr &MI,
+                                                  unsigned InitResultReg,
+                                                  unsigned PhiReg,
+                                                  int Offset,
+                                                  bool UseGPRIdxMode) {
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator I(&MI);
+
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SaveExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  unsigned TmpExec = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+  BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), TmpExec);
+
+  // Save the EXEC mask
+  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), SaveExec)
+    .addReg(AMDGPU::EXEC);
+
+  // To insert the loop we need to split the block. Move everything after this
+  // point to a new block, and insert a new empty block between the two.
+  MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
+  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
+  MachineFunction::iterator MBBI(MBB);
+  ++MBBI;
+
+  MF->insert(MBBI, LoopBB);
+  MF->insert(MBBI, RemainderBB);
+
+  LoopBB->addSuccessor(LoopBB);
+  LoopBB->addSuccessor(RemainderBB);
+
+  // Move the rest of the block into a new block.
+  RemainderBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
+
+  MBB.addSuccessor(LoopBB);
+
+  const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
+
+  auto InsPt = emitLoadM0FromVGPRLoop(TII, MRI, MBB, *LoopBB, DL, *Idx,
+                                      InitResultReg, DstReg, PhiReg, TmpExec,
+                                      Offset, UseGPRIdxMode);
+
+  MachineBasicBlock::iterator First = RemainderBB->begin();
+  BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+    .addReg(SaveExec);
+
+  return InsPt;
+}
+
+// Returns subreg index, offset
+static std::pair<unsigned, int>
+computeIndirectRegAndOffset(const SIRegisterInfo &TRI,
+                            const TargetRegisterClass *SuperRC,
+                            unsigned VecReg,
+                            int Offset) {
+  int NumElts = SuperRC->getSize() / 4;
+
+  // Skip out of bounds offsets, or else we would end up using an undefined
+  // register.
+  if (Offset >= NumElts || Offset < 0)
+    return std::make_pair(AMDGPU::sub0, Offset);
+
+  return std::make_pair(AMDGPU::sub0 + Offset, 0);
+}
+
+// Return true if the index is an SGPR and was set.
+static bool setM0ToIndexFromSGPR(const SIInstrInfo *TII,
+                                 MachineRegisterInfo &MRI,
+                                 MachineInstr &MI,
+                                 int Offset,
+                                 bool UseGPRIdxMode,
+                                 bool IsIndirectSrc) {
+  MachineBasicBlock *MBB = MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator I(&MI);
+
+  const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
+  const TargetRegisterClass *IdxRC = MRI.getRegClass(Idx->getReg());
+
+  assert(Idx->getReg() != AMDGPU::NoRegister);
+
+  if (!TII->getRegisterInfo().isSGPRClass(IdxRC))
+    return false;
+
+  if (UseGPRIdxMode) {
+    unsigned IdxMode = IsIndirectSrc ?
+      VGPRIndexMode::SRC0_ENABLE : VGPRIndexMode::DST_ENABLE;
+    if (Offset == 0) {
+      MachineInstr *SetOn =
+        BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
+        .addOperand(*Idx)
+        .addImm(IdxMode);
+
+      SetOn->getOperand(3).setIsUndef();
+    } else {
+      unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), Tmp)
+        .addOperand(*Idx)
+        .addImm(Offset);
+      MachineInstr *SetOn =
+        BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
+        .addReg(Tmp, RegState::Kill)
+        .addImm(IdxMode);
+
+      SetOn->getOperand(3).setIsUndef();
+    }
+
+    return true;
+  }
+
+  if (Offset == 0) {
+    BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
+      .addOperand(*Idx);
+  } else {
+    BuildMI(*MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
+      .addOperand(*Idx)
+      .addImm(Offset);
+  }
+
+  return true;
+}
+
+// Control flow needs to be inserted if indexing with a VGPR.
+static MachineBasicBlock *emitIndirectSrc(MachineInstr &MI,
+                                          MachineBasicBlock &MBB,
+                                          const SISubtarget &ST) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  unsigned SrcReg = TII->getNamedOperand(MI, AMDGPU::OpName::src)->getReg();
+  int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
+
+  const TargetRegisterClass *VecRC = MRI.getRegClass(SrcReg);
+
+  unsigned SubReg;
+  std::tie(SubReg, Offset)
+    = computeIndirectRegAndOffset(TRI, VecRC, SrcReg, Offset);
+
+  bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode;
+
+  if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, true)) {
+    MachineBasicBlock::iterator I(&MI);
+    const DebugLoc &DL = MI.getDebugLoc();
+
+    if (UseGPRIdxMode) {
+      // TODO: Look at the uses to avoid the copy. This may require rescheduling
+      // to avoid interfering with other uses, so probably requires a new
+      // optimization pass.
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
+        .addReg(SrcReg, RegState::Undef, SubReg)
+        .addReg(SrcReg, RegState::Implicit)
+        .addReg(AMDGPU::M0, RegState::Implicit);
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+    } else {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+        .addReg(SrcReg, RegState::Undef, SubReg)
+        .addReg(SrcReg, RegState::Implicit);
+    }
+
+    MI.eraseFromParent();
+
+    return &MBB;
+  }
+
+
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator I(&MI);
+
+  unsigned PhiReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+  unsigned InitReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+  BuildMI(MBB, I, DL, TII->get(TargetOpcode::IMPLICIT_DEF), InitReg);
+
+  if (UseGPRIdxMode) {
+    MachineInstr *SetOn = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
+      .addImm(0) // Reset inside loop.
+      .addImm(VGPRIndexMode::SRC0_ENABLE);
+    SetOn->getOperand(3).setIsUndef();
+
+    // Disable again after the loop.
+    BuildMI(MBB, std::next(I), DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+  }
+
+  auto InsPt = loadM0FromVGPR(TII, MBB, MI, InitReg, PhiReg, Offset, UseGPRIdxMode);
+  MachineBasicBlock *LoopBB = InsPt->getParent();
+
+  if (UseGPRIdxMode) {
+    BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
+      .addReg(SrcReg, RegState::Undef, SubReg)
+      .addReg(SrcReg, RegState::Implicit)
+      .addReg(AMDGPU::M0, RegState::Implicit);
+  } else {
+    BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
+      .addReg(SrcReg, RegState::Undef, SubReg)
+      .addReg(SrcReg, RegState::Implicit);
+  }
+
+  MI.eraseFromParent();
+
+  return LoopBB;
+}
+
+static unsigned getMOVRELDPseudo(const TargetRegisterClass *VecRC) {
+  switch (VecRC->getSize()) {
+  case 4:
+    return AMDGPU::V_MOVRELD_B32_V1;
+  case 8:
+    return AMDGPU::V_MOVRELD_B32_V2;
+  case 16:
+    return AMDGPU::V_MOVRELD_B32_V4;
+  case 32:
+    return AMDGPU::V_MOVRELD_B32_V8;
+  case 64:
+    return AMDGPU::V_MOVRELD_B32_V16;
+  default:
+    llvm_unreachable("unsupported size for MOVRELD pseudos");
+  }
+}
+
+static MachineBasicBlock *emitIndirectDst(MachineInstr &MI,
+                                          MachineBasicBlock &MBB,
+                                          const SISubtarget &ST) {
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const SIRegisterInfo &TRI = TII->getRegisterInfo();
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  unsigned Dst = MI.getOperand(0).getReg();
+  const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
+  const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
+  const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
+  int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
+  const TargetRegisterClass *VecRC = MRI.getRegClass(SrcVec->getReg());
+
+  // This can be an immediate, but will be folded later.
+  assert(Val->getReg());
+
+  unsigned SubReg;
+  std::tie(SubReg, Offset) = computeIndirectRegAndOffset(TRI, VecRC,
+                                                         SrcVec->getReg(),
+                                                         Offset);
+  bool UseGPRIdxMode = ST.hasVGPRIndexMode() && EnableVGPRIndexMode;
+
+  if (Idx->getReg() == AMDGPU::NoRegister) {
+    MachineBasicBlock::iterator I(&MI);
+    const DebugLoc &DL = MI.getDebugLoc();
+
+    assert(Offset == 0);
+
+    BuildMI(MBB, I, DL, TII->get(TargetOpcode::INSERT_SUBREG), Dst)
+      .addOperand(*SrcVec)
+      .addOperand(*Val)
+      .addImm(SubReg);
+
+    MI.eraseFromParent();
+    return &MBB;
+  }
+
+  if (setM0ToIndexFromSGPR(TII, MRI, MI, Offset, UseGPRIdxMode, false)) {
+    MachineBasicBlock::iterator I(&MI);
+    const DebugLoc &DL = MI.getDebugLoc();
+
+    if (UseGPRIdxMode) {
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
+        .addReg(SrcVec->getReg(), RegState::Undef, SubReg) // vdst
+        .addOperand(*Val)
+        .addReg(Dst, RegState::ImplicitDefine)
+        .addReg(SrcVec->getReg(), RegState::Implicit)
+        .addReg(AMDGPU::M0, RegState::Implicit);
+
+      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+    } else {
+      const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
+
+      BuildMI(MBB, I, DL, MovRelDesc)
+          .addReg(Dst, RegState::Define)
+          .addReg(SrcVec->getReg())
+          .addOperand(*Val)
+          .addImm(SubReg - AMDGPU::sub0);
+    }
+
+    MI.eraseFromParent();
+    return &MBB;
+  }
+
+  if (Val->isReg())
+    MRI.clearKillFlags(Val->getReg());
+
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  if (UseGPRIdxMode) {
+    MachineBasicBlock::iterator I(&MI);
+
+    MachineInstr *SetOn = BuildMI(MBB, I, DL, TII->get(AMDGPU::S_SET_GPR_IDX_ON))
+      .addImm(0) // Reset inside loop.
+      .addImm(VGPRIndexMode::DST_ENABLE);
+    SetOn->getOperand(3).setIsUndef();
+
+    // Disable again after the loop.
+    BuildMI(MBB, std::next(I), DL, TII->get(AMDGPU::S_SET_GPR_IDX_OFF));
+  }
+
+  unsigned PhiReg = MRI.createVirtualRegister(VecRC);
+
+  auto InsPt = loadM0FromVGPR(TII, MBB, MI, SrcVec->getReg(), PhiReg,
+                              Offset, UseGPRIdxMode);
+  MachineBasicBlock *LoopBB = InsPt->getParent();
+
+  if (UseGPRIdxMode) {
+    BuildMI(*LoopBB, InsPt, DL, TII->get(AMDGPU::V_MOV_B32_indirect))
+      .addReg(PhiReg, RegState::Undef, SubReg) // vdst
+      .addOperand(*Val) // src0
+      .addReg(Dst, RegState::ImplicitDefine)
+      .addReg(PhiReg, RegState::Implicit)
+      .addReg(AMDGPU::M0, RegState::Implicit);
+  } else {
+    const MCInstrDesc &MovRelDesc = TII->get(getMOVRELDPseudo(VecRC));
+
+    BuildMI(*LoopBB, InsPt, DL, MovRelDesc)
+        .addReg(Dst, RegState::Define)
+        .addReg(PhiReg)
+        .addOperand(*Val)
+        .addImm(SubReg - AMDGPU::sub0);
+  }
+
+  MI.eraseFromParent();
+
+  return LoopBB;
+}
+
 MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
   MachineInstr &MI, MachineBasicBlock *BB) const {
+
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+  MachineFunction *MF = BB->getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+
+  if (TII->isMIMG(MI)) {
+      if (!MI.memoperands_empty())
+        return BB;
+    // Add a memoperand for mimg instructions so that they aren't assumed to
+    // be ordered memory instuctions.
+
+    MachinePointerInfo PtrInfo(MFI->getImagePSV());
+    MachineMemOperand::Flags Flags = MachineMemOperand::MODereferenceable;
+    if (MI.mayStore())
+      Flags |= MachineMemOperand::MOStore;
+
+    if (MI.mayLoad())
+      Flags |= MachineMemOperand::MOLoad;
+
+    auto MMO = MF->getMachineMemOperand(PtrInfo, Flags, 0, 0);
+    MI.addMemOperand(*MF, MMO);
+    return BB;
+  }
+
   switch (MI.getOpcode()) {
   case AMDGPU::SI_INIT_M0: {
-    const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
     BuildMI(*BB, MI.getIterator(), MI.getDebugLoc(),
             TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-        .addOperand(MI.getOperand(0));
+      .addOperand(MI.getOperand(0));
     MI.eraseFromParent();
-    break;
-  }
-  case AMDGPU::BRANCH:
     return BB;
+  }
   case AMDGPU::GET_GROUPSTATICSIZE: {
-    const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
-
-    MachineFunction *MF = BB->getParent();
-    SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
     DebugLoc DL = MI.getDebugLoc();
     BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_MOV_B32))
       .addOperand(MI.getOperand(0))
-      .addImm(MFI->LDSSize);
+      .addImm(MFI->getLDSSize());
     MI.eraseFromParent();
     return BB;
   }
+  case AMDGPU::SI_INDIRECT_SRC_V1:
+  case AMDGPU::SI_INDIRECT_SRC_V2:
+  case AMDGPU::SI_INDIRECT_SRC_V4:
+  case AMDGPU::SI_INDIRECT_SRC_V8:
+  case AMDGPU::SI_INDIRECT_SRC_V16:
+    return emitIndirectSrc(MI, *BB, *getSubtarget());
+  case AMDGPU::SI_INDIRECT_DST_V1:
+  case AMDGPU::SI_INDIRECT_DST_V2:
+  case AMDGPU::SI_INDIRECT_DST_V4:
+  case AMDGPU::SI_INDIRECT_DST_V8:
+  case AMDGPU::SI_INDIRECT_DST_V16:
+    return emitIndirectDst(MI, *BB, *getSubtarget());
   case AMDGPU::SI_KILL:
     return splitKillBlock(MI, BB);
+  case AMDGPU::V_CNDMASK_B64_PSEUDO: {
+    MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
+
+    unsigned Dst = MI.getOperand(0).getReg();
+    unsigned Src0 = MI.getOperand(1).getReg();
+    unsigned Src1 = MI.getOperand(2).getReg();
+    const DebugLoc &DL = MI.getDebugLoc();
+    unsigned SrcCond = MI.getOperand(3).getReg();
+
+    unsigned DstLo = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+    unsigned DstHi = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+    BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
+      .addReg(Src0, 0, AMDGPU::sub0)
+      .addReg(Src1, 0, AMDGPU::sub0)
+      .addReg(SrcCond);
+    BuildMI(*BB, MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
+      .addReg(Src0, 0, AMDGPU::sub1)
+      .addReg(Src1, 0, AMDGPU::sub1)
+      .addReg(SrcCond);
+
+    BuildMI(*BB, MI, DL, TII->get(AMDGPU::REG_SEQUENCE), Dst)
+      .addReg(DstLo)
+      .addImm(AMDGPU::sub0)
+      .addReg(DstHi)
+      .addImm(AMDGPU::sub1);
+    MI.eraseFromParent();
+    return BB;
+  }
+  case AMDGPU::SI_BR_UNDEF: {
+    const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+    const DebugLoc &DL = MI.getDebugLoc();
+    MachineInstr *Br = BuildMI(*BB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC1))
+      .addOperand(MI.getOperand(0));
+    Br->getOperand(1).setIsUndef(true); // read undef SCC
+    MI.eraseFromParent();
+    return BB;
+  }
   default:
     return AMDGPUTargetLowering::EmitInstrWithCustomInserter(MI, BB);
   }
-  return BB;
 }
 
 bool SITargetLowering::enableAggressiveFMAFusion(EVT VT) const {
@@ -1167,8 +1811,10 @@ EVT SITargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &Ctx,
   return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements());
 }
 
-MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT) const {
-  return MVT::i32;
+MVT SITargetLowering::getScalarShiftAmountTy(const DataLayout &, EVT VT) const {
+  // TODO: Should i16 be used always if legal? For now it would force VALU
+  // shifts.
+  return (VT == MVT::i16) ? MVT::i16 : MVT::i32;
 }
 
 // Answering this is somewhat tricky and depends on the specific device which
@@ -1201,6 +1847,8 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
     return Subtarget->hasFP32Denormals() && Subtarget->hasFastFMAF32();
   case MVT::f64:
     return true;
+  case MVT::f16:
+    return Subtarget->has16BitInsts() && Subtarget->hasFP16Denormals();
   default:
     break;
   }
@@ -1215,7 +1863,6 @@ bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
 SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
-  case ISD::FrameIndex: return LowerFrameIndex(Op, DAG);
   case ISD::BRCOND: return LowerBRCOND(Op, DAG);
   case ISD::LOAD: {
     SDValue Result = LowerLOAD(Op, DAG);
@@ -1242,6 +1889,8 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG);
   case ISD::ADDRSPACECAST: return lowerADDRSPACECAST(Op, DAG);
   case ISD::TRAP: return lowerTRAP(Op, DAG);
+  case ISD::FP_ROUND:
+    return lowerFP_ROUND(Op, DAG);
   }
   return SDValue();
 }
@@ -1262,58 +1911,31 @@ static SDNode *findUser(SDValue Value, unsigned Opcode) {
   return nullptr;
 }
 
-SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
-
-  SDLoc SL(Op);
-  FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
-  unsigned FrameIndex = FINode->getIndex();
-
-  // A FrameIndex node represents a 32-bit offset into scratch memory. If the
-  // high bit of a frame index offset were to be set, this would mean that it
-  // represented an offset of ~2GB * 64 = ~128GB from the start of the scratch
-  // buffer, with 64 being the number of threads per wave.
-  //
-  // The maximum private allocation for the entire GPU is 4G, and we are
-  // concerned with the largest the index could ever be for an individual
-  // workitem. This will occur with the minmum dispatch size. If a program
-  // requires more, the dispatch size will be reduced.
-  //
-  // With this limit, we can mark the high bit of the FrameIndex node as known
-  // zero, which is important, because it means in most situations we can prove
-  // that values derived from FrameIndex nodes are non-negative. This enables us
-  // to take advantage of more addressing modes when accessing scratch buffers,
-  // since for scratch reads/writes, the register offset must always be
-  // positive.
-
-  uint64_t MaxGPUAlloc = UINT64_C(4) * 1024 * 1024 * 1024;
-
-  // XXX - It is unclear if partial dispatch works. Assume it works at half wave
-  // granularity. It is probably a full wave.
-  uint64_t MinGranularity = 32;
-
-  unsigned KnownBits = Log2_64(MaxGPUAlloc / MinGranularity);
-  EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), KnownBits);
-
-  SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
-  return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
-                     DAG.getValueType(ExtVT));
-}
-
 bool SITargetLowering::isCFIntrinsic(const SDNode *Intr) const {
-  if (Intr->getOpcode() != ISD::INTRINSIC_W_CHAIN)
-    return false;
+  if (Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
+    switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
+    case AMDGPUIntrinsic::amdgcn_if:
+    case AMDGPUIntrinsic::amdgcn_else:
+    case AMDGPUIntrinsic::amdgcn_end_cf:
+    case AMDGPUIntrinsic::amdgcn_loop:
+      return true;
+    default:
+      return false;
+    }
+  }
 
-  switch (cast<ConstantSDNode>(Intr->getOperand(1))->getZExtValue()) {
-  default: return false;
-  case AMDGPUIntrinsic::amdgcn_if:
-  case AMDGPUIntrinsic::amdgcn_else:
-  case AMDGPUIntrinsic::amdgcn_break:
-  case AMDGPUIntrinsic::amdgcn_if_break:
-  case AMDGPUIntrinsic::amdgcn_else_break:
-  case AMDGPUIntrinsic::amdgcn_loop:
-  case AMDGPUIntrinsic::amdgcn_end_cf:
-    return true;
+  if (Intr->getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
+    switch (cast<ConstantSDNode>(Intr->getOperand(0))->getZExtValue()) {
+    case AMDGPUIntrinsic::amdgcn_break:
+    case AMDGPUIntrinsic::amdgcn_if_break:
+    case AMDGPUIntrinsic::amdgcn_else_break:
+      return true;
+    default:
+      return false;
+    }
   }
+
+  return false;
 }
 
 void SITargetLowering::createDebuggerPrologueStackObjects(
@@ -1334,14 +1956,31 @@ void SITargetLowering::createDebuggerPrologueStackObjects(
   // For each dimension:
   for (unsigned i = 0; i < 3; ++i) {
     // Create fixed stack object for work group ID.
-    ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4, true);
+    ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4, true);
     Info->setDebuggerWorkGroupIDStackObjectIndex(i, ObjectIdx);
     // Create fixed stack object for work item ID.
-    ObjectIdx = MF.getFrameInfo()->CreateFixedObject(4, i * 4 + 16, true);
+    ObjectIdx = MF.getFrameInfo().CreateFixedObject(4, i * 4 + 16, true);
     Info->setDebuggerWorkItemIDStackObjectIndex(i, ObjectIdx);
   }
 }
 
+bool SITargetLowering::shouldEmitFixup(const GlobalValue *GV) const {
+  const Triple &TT = getTargetMachine().getTargetTriple();
+  return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+         AMDGPU::shouldEmitConstantsToTextSection(TT);
+}
+
+bool SITargetLowering::shouldEmitGOTReloc(const GlobalValue *GV) const {
+  return (GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+              GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+         !shouldEmitFixup(GV) &&
+         !getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
+}
+
+bool SITargetLowering::shouldEmitPCReloc(const GlobalValue *GV) const {
+  return !shouldEmitFixup(GV) && !shouldEmitGOTReloc(GV);
+}
+
 /// This transforms the control flow intrinsics to get the branch destination as
 /// last parameter, also switches branch target with BR if the need arise
 SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
@@ -1365,30 +2004,50 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
     Target = BR->getOperand(1);
   }
 
+  // FIXME: This changes the types of the intrinsics instead of introducing new
+  // nodes with the correct types.
+  // e.g. llvm.amdgcn.loop
+
+  // eg: i1,ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3
+  // =>     t9: ch = llvm.amdgcn.loop t0, TargetConstant:i32<6271>, t3, BasicBlock:ch<bb1 0x7fee5286d088>
+
   if (!isCFIntrinsic(Intr)) {
     // This is a uniform branch so we don't need to legalize.
     return BRCOND;
   }
 
+  bool HaveChain = Intr->getOpcode() == ISD::INTRINSIC_VOID ||
+                   Intr->getOpcode() == ISD::INTRINSIC_W_CHAIN;
+
   assert(!SetCC ||
         (SetCC->getConstantOperandVal(1) == 1 &&
          cast<CondCodeSDNode>(SetCC->getOperand(2).getNode())->get() ==
                                                              ISD::SETNE));
 
-  // Build the result and
-  ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
-
   // operands of the new intrinsic call
   SmallVector<SDValue, 4> Ops;
-  Ops.push_back(BRCOND.getOperand(0));
-  Ops.append(Intr->op_begin() + 1, Intr->op_end());
+  if (HaveChain)
+    Ops.push_back(BRCOND.getOperand(0));
+
+  Ops.append(Intr->op_begin() + (HaveChain ?  1 : 0), Intr->op_end());
   Ops.push_back(Target);
 
+  ArrayRef<EVT> Res(Intr->value_begin() + 1, Intr->value_end());
+
   // build the new intrinsic call
   SDNode *Result = DAG.getNode(
     Res.size() > 1 ? ISD::INTRINSIC_W_CHAIN : ISD::INTRINSIC_VOID, DL,
     DAG.getVTList(Res), Ops).getNode();
 
+  if (!HaveChain) {
+    SDValue Ops[] =  {
+      SDValue(Result, 0),
+      BRCOND.getOperand(0)
+    };
+
+    Result = DAG.getMergeValues(Ops, DL).getNode();
+  }
+
   if (BR) {
     // Give the branch instruction our target
     SDValue Ops[] = {
@@ -1425,6 +2084,31 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
   return Chain;
 }
 
+SDValue SITargetLowering::getFPExtOrFPTrunc(SelectionDAG &DAG,
+                                            SDValue Op,
+                                            const SDLoc &DL,
+                                            EVT VT) const {
+  return Op.getValueType().bitsLE(VT) ?
+      DAG.getNode(ISD::FP_EXTEND, DL, VT, Op) :
+      DAG.getNode(ISD::FTRUNC, DL, VT, Op);
+}
+
+SDValue SITargetLowering::lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
+  assert(Op.getValueType() == MVT::f16 &&
+         "Do not know how to custom lower FP_ROUND for non-f16 type");
+
+  SDValue Src = Op.getOperand(0);
+  EVT SrcVT = Src.getValueType();
+  if (SrcVT != MVT::f64)
+    return Op;
+
+  SDLoc DL(Op);
+
+  SDValue FpToFp16 = DAG.getNode(ISD::FP_TO_FP16, DL, MVT::i32, Src);
+  SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToFp16);
+  return DAG.getNode(ISD::BITCAST, DL, MVT::f16, Trunc);;
+}
+
 SDValue SITargetLowering::getSegmentAperture(unsigned AS,
                                              SelectionDAG &DAG) const {
   SDLoc SL;
@@ -1452,7 +2136,8 @@ SDValue SITargetLowering::getSegmentAperture(unsigned AS,
   MachinePointerInfo PtrInfo(V, StructOffset);
   return DAG.getLoad(MVT::i32, SL, QueuePtr.getValue(1), Ptr, PtrInfo,
                      MinAlign(64, StructOffset),
-                     MachineMemOperand::MOInvariant);
+                     MachineMemOperand::MODereferenceable |
+                         MachineMemOperand::MOInvariant);
 }
 
 SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
@@ -1505,17 +2190,12 @@ SDValue SITargetLowering::lowerADDRSPACECAST(SDValue Op,
   return DAG.getUNDEF(ASC->getValueType(0));
 }
 
-static bool shouldEmitGOTReloc(const GlobalValue *GV,
-                               const TargetMachine &TM) {
-  return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
-         !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
-}
-
 bool
 SITargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
   // We can fold offsets for anything that doesn't require a GOT relocation.
-  return GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
-         !shouldEmitGOTReloc(GA->getGlobal(), getTargetMachine());
+  return (GA->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
+              GA->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS) &&
+         !shouldEmitGOTReloc(GA->getGlobal());
 }
 
 static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
@@ -1523,14 +2203,27 @@ static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
                                       unsigned GAFlags = SIInstrInfo::MO_NONE) {
   // In order to support pc-relative addressing, the PC_ADD_REL_OFFSET SDNode is
   // lowered to the following code sequence:
-  // s_getpc_b64 s[0:1]
-  // s_add_u32 s0, s0, $symbol
-  // s_addc_u32 s1, s1, 0
   //
-  // s_getpc_b64 returns the address of the s_add_u32 instruction and then
-  // a fixup or relocation is emitted to replace $symbol with a literal
-  // constant, which is a pc-relative offset from the encoding of the $symbol
-  // operand to the global variable.
+  // For constant address space:
+  //   s_getpc_b64 s[0:1]
+  //   s_add_u32 s0, s0, $symbol
+  //   s_addc_u32 s1, s1, 0
+  //
+  //   s_getpc_b64 returns the address of the s_add_u32 instruction and then
+  //   a fixup or relocation is emitted to replace $symbol with a literal
+  //   constant, which is a pc-relative offset from the encoding of the $symbol
+  //   operand to the global variable.
+  //
+  // For global address space:
+  //   s_getpc_b64 s[0:1]
+  //   s_add_u32 s0, s0, $symbol@{gotpc}rel32@lo
+  //   s_addc_u32 s1, s1, $symbol@{gotpc}rel32@hi
+  //
+  //   s_getpc_b64 returns the address of the s_add_u32 instruction and then
+  //   fixups or relocations are emitted to replace $symbol@*@lo and
+  //   $symbol@*@hi with lower 32 bits and higher 32 bits of a literal constant,
+  //   which is a 64-bit pc-relative offset from the encoding of the $symbol
+  //   operand to the global variable.
   //
   // What we want here is an offset from the value returned by s_getpc
   // (which is the address of the s_add_u32 instruction) to the global
@@ -1538,9 +2231,12 @@ static SDValue buildPCRelGlobalAddress(SelectionDAG &DAG, const GlobalValue *GV,
   // of the s_add_u32 instruction, we end up with an offset that is 4 bytes too
   // small. This requires us to add 4 to the global variable offset in order to
   // compute the correct address.
-  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
-                                          GAFlags);
-  return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, GA);
+  SDValue PtrLo = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
+                                             GAFlags);
+  SDValue PtrHi = DAG.getTargetGlobalAddress(GV, DL, MVT::i32, Offset + 4,
+                                             GAFlags == SIInstrInfo::MO_NONE ?
+                                             GAFlags : GAFlags + 1);
+  return DAG.getNode(AMDGPUISD::PC_ADD_REL_OFFSET, DL, PtrVT, PtrLo, PtrHi);
 }
 
 SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
@@ -1556,11 +2252,14 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
   const GlobalValue *GV = GSD->getGlobal();
   EVT PtrVT = Op.getValueType();
 
-  if (!shouldEmitGOTReloc(GV, getTargetMachine()))
+  if (shouldEmitFixup(GV))
     return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT);
+  else if (shouldEmitPCReloc(GV))
+    return buildPCRelGlobalAddress(DAG, GV, DL, GSD->getOffset(), PtrVT,
+                                   SIInstrInfo::MO_REL32);
 
   SDValue GOTAddr = buildPCRelGlobalAddress(DAG, GV, DL, 0, PtrVT,
-                                            SIInstrInfo::MO_GOTPCREL);
+                                            SIInstrInfo::MO_GOTPCREL32);
 
   Type *Ty = PtrVT.getTypeForEVT(*DAG.getContext());
   PointerType *PtrTy = PointerType::get(Ty, AMDGPUAS::CONSTANT_ADDRESS);
@@ -1570,7 +2269,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
   MachinePointerInfo PtrInfo(UndefValue::get(PtrTy));
 
   return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), GOTAddr, PtrInfo, Align,
-                     MachineMemOperand::MOInvariant);
+                     MachineMemOperand::MODereferenceable |
+                         MachineMemOperand::MOInvariant);
 }
 
 SDValue SITargetLowering::lowerTRAP(SDValue Op,
@@ -1649,7 +2349,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   switch (IntrinsicID) {
   case Intrinsic::amdgcn_dispatch_ptr:
   case Intrinsic::amdgcn_queue_ptr: {
-    if (!Subtarget->isAmdHsaOS()) {
+    if (!Subtarget->isAmdCodeObjectV2()) {
       DiagnosticInfoUnsupported BadIntrin(
           *MF.getFunction(), "unsupported hsa intrinsic without hsa target",
           DL.getDebugLoc());
@@ -1671,6 +2371,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       = TRI->getPreloadedValue(MF, SIRegisterInfo::KERNARG_SEGMENT_PTR);
     return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
   }
+  case Intrinsic::amdgcn_dispatch_id: {
+    unsigned Reg = TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_ID);
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass, Reg, VT);
+  }
   case Intrinsic::amdgcn_rcp:
     return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1));
   case Intrinsic::amdgcn_rsq:
@@ -1682,6 +2386,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
     return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
   }
+  case Intrinsic::amdgcn_rcp_legacy: {
+    if (Subtarget->getGeneration() >= SISubtarget::VOLCANIC_ISLANDS)
+      return emitRemovedIntrinsicError(DAG, DL, VT);
+    return DAG.getNode(AMDGPUISD::RCP_LEGACY, DL, VT, Op.getOperand(1));
+  }
   case Intrinsic::amdgcn_rsq_clamp: {
     if (Subtarget->getGeneration() < SISubtarget::VOLCANIC_ISLANDS)
       return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
@@ -1750,22 +2459,17 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 
     return lowerImplicitZextParam(DAG, Op, MVT::i16,
                                   SI::KernelInputOffsets::LOCAL_SIZE_Z);
-  case Intrinsic::amdgcn_read_workdim:
-  case AMDGPUIntrinsic::AMDGPU_read_workdim: // Legacy name.
-    // Really only 2 bits.
-    return lowerImplicitZextParam(DAG, Op, MVT::i8,
-                                  getImplicitParameterOffset(MFI, GRID_DIM));
   case Intrinsic::amdgcn_workgroup_id_x:
   case Intrinsic::r600_read_tgid_x:
-    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
       TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_X), VT);
   case Intrinsic::amdgcn_workgroup_id_y:
   case Intrinsic::r600_read_tgid_y:
-    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
       TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Y), VT);
   case Intrinsic::amdgcn_workgroup_id_z:
   case Intrinsic::r600_read_tgid_z:
-    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32RegClass,
+    return CreateLiveInRegister(DAG, &AMDGPU::SReg_32_XM0RegClass,
       TRI->getPreloadedValue(MF, SIRegisterInfo::WORKGROUP_ID_Z), VT);
   case Intrinsic::amdgcn_workitem_id_x:
   case Intrinsic::r600_read_tidig_x:
@@ -1786,9 +2490,10 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     };
 
     MachineMemOperand *MMO = MF.getMachineMemOperand(
-      MachinePointerInfo(),
-      MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
-      VT.getStoreSize(), 4);
+        MachinePointerInfo(),
+        MachineMemOperand::MOLoad | MachineMemOperand::MODereferenceable |
+            MachineMemOperand::MOInvariant,
+        VT.getStoreSize(), 4);
     return DAG.getMemIntrinsicNode(AMDGPUISD::LOAD_CONSTANT, DL,
                                    Op->getVTList(), Ops, VT, MMO);
   }
@@ -1818,6 +2523,8 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                             DAG.getConstant(0, DL, MVT::i32));
     SDValue J = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, IJ,
                             DAG.getConstant(1, DL, MVT::i32));
+    I = DAG.getNode(ISD::BITCAST, DL, MVT::f32, I);
+    J = DAG.getNode(ISD::BITCAST, DL, MVT::f32, J);
     SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(3));
     SDValue Glue = M0.getValue(1);
     SDValue P1 = DAG.getNode(AMDGPUISD::INTERP_P1, DL,
@@ -1827,6 +2534,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AMDGPUISD::INTERP_P2, DL, MVT::f32, P1, J,
                              Op.getOperand(1), Op.getOperand(2), Glue);
   }
+  case Intrinsic::amdgcn_interp_mov: {
+    SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
+    SDValue Glue = M0.getValue(1);
+    return DAG.getNode(AMDGPUISD::INTERP_MOV, DL, MVT::f32, Op.getOperand(1),
+                       Op.getOperand(2), Op.getOperand(3), Glue);
+  }
   case Intrinsic::amdgcn_interp_p1: {
     SDValue M0 = copyToM0(DAG, DAG.getEntryNode(), DL, Op.getOperand(4));
     SDValue Glue = M0.getValue(1);
@@ -1899,6 +2612,38 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, Op->getVTList(), Src0,
                        Denominator, Numerator);
   }
+  case Intrinsic::amdgcn_icmp: {
+    const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+    int CondCode = CD->getSExtValue();
+
+    if (CondCode < ICmpInst::Predicate::FIRST_ICMP_PREDICATE ||
+        CondCode >= ICmpInst::Predicate::BAD_ICMP_PREDICATE)
+      return DAG.getUNDEF(VT);
+
+    ICmpInst::Predicate IcInput = static_cast<ICmpInst::Predicate>(CondCode);
+    ISD::CondCode CCOpcode = getICmpCondCode(IcInput);
+    return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
+                       Op.getOperand(2), DAG.getCondCode(CCOpcode));
+  }
+  case Intrinsic::amdgcn_fcmp: {
+    const auto *CD = dyn_cast<ConstantSDNode>(Op.getOperand(3));
+    int CondCode = CD->getSExtValue();
+
+    if (CondCode <= FCmpInst::Predicate::FCMP_FALSE ||
+        CondCode >= FCmpInst::Predicate::FCMP_TRUE)
+      return DAG.getUNDEF(VT);
+
+    FCmpInst::Predicate IcInput = static_cast<FCmpInst::Predicate>(CondCode);
+    ISD::CondCode CCOpcode = getFCmpCondCode(IcInput);
+    return DAG.getNode(AMDGPUISD::SETCC, DL, VT, Op.getOperand(1),
+                       Op.getOperand(2), DAG.getCondCode(CCOpcode));
+  }
+  case Intrinsic::amdgcn_fmul_legacy:
+    return DAG.getNode(AMDGPUISD::FMUL_LEGACY, DL, VT,
+                       Op.getOperand(1), Op.getOperand(2));
+  case Intrinsic::amdgcn_sffbh:
+  case AMDGPUIntrinsic::AMDGPU_flbit_i32: // Legacy name.
+    return DAG.getNode(AMDGPUISD::FFBH_I32, DL, VT, Op.getOperand(1));
   default:
     return AMDGPUTargetLowering::LowerOperation(Op, DAG);
   }
@@ -1907,6 +2652,7 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                  SelectionDAG &DAG) const {
   unsigned IntrID = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
+  SDLoc DL(Op);
   switch (IntrID) {
   case Intrinsic::amdgcn_atomic_inc:
   case Intrinsic::amdgcn_atomic_dec: {
@@ -1922,6 +2668,31 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     return DAG.getMemIntrinsicNode(Opc, SDLoc(Op), M->getVTList(), Ops,
                                    M->getMemoryVT(), M->getMemOperand());
   }
+  case Intrinsic::amdgcn_buffer_load:
+  case Intrinsic::amdgcn_buffer_load_format: {
+    SDValue Ops[] = {
+      Op.getOperand(0), // Chain
+      Op.getOperand(2), // rsrc
+      Op.getOperand(3), // vindex
+      Op.getOperand(4), // offset
+      Op.getOperand(5), // glc
+      Op.getOperand(6)  // slc
+    };
+    MachineFunction &MF = DAG.getMachineFunction();
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+    unsigned Opc = (IntrID == Intrinsic::amdgcn_buffer_load) ?
+        AMDGPUISD::BUFFER_LOAD : AMDGPUISD::BUFFER_LOAD_FORMAT;
+    EVT VT = Op.getValueType();
+    EVT IntVT = VT.changeTypeToInteger();
+
+    MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo(MFI->getBufferPSV()),
+      MachineMemOperand::MOLoad,
+      VT.getStoreSize(), VT.getStoreSize());
+
+    return DAG.getMemIntrinsicNode(Opc, DL, Op->getVTList(), Ops, IntVT, MMO);
+  }
   default:
     return SDValue();
   }
@@ -1969,12 +2740,40 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
                                    Op->getVTList(), Ops, VT, MMO);
   }
   case AMDGPUIntrinsic::AMDGPU_kill: {
-    if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Op.getOperand(2))) {
+    SDValue Src = Op.getOperand(2);
+    if (const ConstantFPSDNode *K = dyn_cast<ConstantFPSDNode>(Src)) {
       if (!K->isNegative())
         return Chain;
+
+      SDValue NegOne = DAG.getTargetConstant(FloatToBits(-1.0f), DL, MVT::i32);
+      return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, NegOne);
     }
 
-    return Op;
+    SDValue Cast = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Src);
+    return DAG.getNode(AMDGPUISD::KILL, DL, MVT::Other, Chain, Cast);
+  }
+  case AMDGPUIntrinsic::SI_export: {
+    const ConstantSDNode *En = cast<ConstantSDNode>(Op.getOperand(2));
+    const ConstantSDNode *VM = cast<ConstantSDNode>(Op.getOperand(3));
+    const ConstantSDNode *Done = cast<ConstantSDNode>(Op.getOperand(4));
+    const ConstantSDNode *Tgt = cast<ConstantSDNode>(Op.getOperand(5));
+    const ConstantSDNode *Compr = cast<ConstantSDNode>(Op.getOperand(6));
+
+    const SDValue Ops[] = {
+      Chain,
+      DAG.getTargetConstant(En->getZExtValue(), DL, MVT::i8),
+      DAG.getTargetConstant(VM->getZExtValue(), DL, MVT::i1),
+      DAG.getTargetConstant(Tgt->getZExtValue(), DL, MVT::i8),
+      DAG.getTargetConstant(Compr->getZExtValue(), DL, MVT::i1),
+      Op.getOperand(7), // src0
+      Op.getOperand(8), // src1
+      Op.getOperand(9), // src2
+      Op.getOperand(10) // src3
+    };
+
+    unsigned Opc = Done->isNullValue() ?
+      AMDGPUISD::EXPORT : AMDGPUISD::EXPORT_DONE;
+    return DAG.getNode(Opc, DL, Op->getVTList(), Ops);
   }
   default:
     return SDValue();
@@ -1988,7 +2787,6 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   EVT MemVT = Load->getMemoryVT();
 
   if (ExtType == ISD::NON_EXTLOAD && MemVT.getSizeInBits() < 32) {
-    assert(MemVT == MVT::i1 && "Only i1 non-extloads expected");
     // FIXME: Copied from PPC
     // First, load into 32 bits, then truncate to 1 bit.
 
@@ -1996,8 +2794,10 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     SDValue BasePtr = Load->getBasePtr();
     MachineMemOperand *MMO = Load->getMemOperand();
 
+    EVT RealMemVT = (MemVT == MVT::i1) ? MVT::i8 : MVT::i16;
+
     SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain,
-                                   BasePtr, MVT::i8, MMO);
+                                   BasePtr, RealMemVT, MMO);
 
     SDValue Ops[] = {
       DAG.getNode(ISD::TRUNCATE, DL, MemVT, NewLD),
@@ -2021,17 +2821,34 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
     return DAG.getMergeValues(Ops, DL);
   }
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  // If there is a possibilty that flat instruction access scratch memory
+  // then we need to use the same legalization rules we use for private.
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
+    AS = MFI->hasFlatScratchInit() ?
+         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+
   unsigned NumElements = MemVT.getVectorNumElements();
   switch (AS) {
   case AMDGPUAS::CONSTANT_ADDRESS:
     if (isMemOpUniform(Load))
       return SDValue();
     // Non-uniform loads will be selected to MUBUF instructions, so they
-    // have the same legalization requires ments as global and private
+    // have the same legalization requirements as global and private
     // loads.
     //
-    // Fall-through
-  case AMDGPUAS::GLOBAL_ADDRESS:
+    LLVM_FALLTHROUGH;
+  case AMDGPUAS::GLOBAL_ADDRESS: {
+    if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
+                  isMemOpHasNoClobberedMemOperand(Load))
+      return SDValue();
+    // Non-uniform loads will be selected to MUBUF instructions, so they
+    // have the same legalization requirements as global and private
+    // loads.
+    //
+  }
+    LLVM_FALLTHROUGH;
   case AMDGPUAS::FLAT_ADDRESS:
     if (NumElements > 4)
       return SplitVectorLoad(Op, DAG);
@@ -2110,22 +2927,33 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
   bool Unsafe = DAG.getTarget().Options.UnsafeFPMath;
 
   if (const ConstantFPSDNode *CLHS = dyn_cast<ConstantFPSDNode>(LHS)) {
-    if ((Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals())) &&
-        CLHS->isExactlyValue(1.0)) {
-      // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
-      // the CI documentation has a worst case error of 1 ulp.
-      // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
-      // use it as long as we aren't trying to use denormals.
-
-      // 1.0 / sqrt(x) -> rsq(x)
-      //
-      // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
-      // error seems really high at 2^29 ULP.
-      if (RHS.getOpcode() == ISD::FSQRT)
-        return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
-
-      // 1.0 / x -> rcp(x)
-      return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+    if (Unsafe || (VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
+        VT == MVT::f16) {
+      if (CLHS->isExactlyValue(1.0)) {
+        // v_rcp_f32 and v_rsq_f32 do not support denormals, and according to
+        // the CI documentation has a worst case error of 1 ulp.
+        // OpenCL requires <= 2.5 ulp for 1.0 / x, so it should always be OK to
+        // use it as long as we aren't trying to use denormals.
+        //
+        // v_rcp_f16 and v_rsq_f16 DO support denormals.
+
+        // 1.0 / sqrt(x) -> rsq(x)
+
+        // XXX - Is UnsafeFPMath sufficient to do this for f64? The maximum ULP
+        // error seems really high at 2^29 ULP.
+        if (RHS.getOpcode() == ISD::FSQRT)
+          return DAG.getNode(AMDGPUISD::RSQ, SL, VT, RHS.getOperand(0));
+
+        // 1.0 / x -> rcp(x)
+        return DAG.getNode(AMDGPUISD::RCP, SL, VT, RHS);
+      }
+
+      // Same as for 1.0, but expand the sign out of the constant.
+      if (CLHS->isExactlyValue(-1.0)) {
+        // -1.0 / x -> rcp (fneg x)
+        SDValue FNegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+        return DAG.getNode(AMDGPUISD::RCP, SL, VT, FNegRHS);
+      }
     }
   }
 
@@ -2143,6 +2971,67 @@ SDValue SITargetLowering::lowerFastUnsafeFDIV(SDValue Op,
   return SDValue();
 }
 
+static SDValue getFPBinOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
+                          EVT VT, SDValue A, SDValue B, SDValue GlueChain) {
+  if (GlueChain->getNumValues() <= 1) {
+    return DAG.getNode(Opcode, SL, VT, A, B);
+  }
+
+  assert(GlueChain->getNumValues() == 3);
+
+  SDVTList VTList = DAG.getVTList(VT, MVT::Other, MVT::Glue);
+  switch (Opcode) {
+  default: llvm_unreachable("no chain equivalent for opcode");
+  case ISD::FMUL:
+    Opcode = AMDGPUISD::FMUL_W_CHAIN;
+    break;
+  }
+
+  return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B,
+                     GlueChain.getValue(2));
+}
+
+static SDValue getFPTernOp(SelectionDAG &DAG, unsigned Opcode, const SDLoc &SL,
+                           EVT VT, SDValue A, SDValue B, SDValue C,
+                           SDValue GlueChain) {
+  if (GlueChain->getNumValues() <= 1) {
+    return DAG.getNode(Opcode, SL, VT, A, B, C);
+  }
+
+  assert(GlueChain->getNumValues() == 3);
+
+  SDVTList VTList = DAG.getVTList(VT, MVT::Other, MVT::Glue);
+  switch (Opcode) {
+  default: llvm_unreachable("no chain equivalent for opcode");
+  case ISD::FMA:
+    Opcode = AMDGPUISD::FMA_W_CHAIN;
+    break;
+  }
+
+  return DAG.getNode(Opcode, SL, VTList, GlueChain.getValue(1), A, B, C,
+                     GlueChain.getValue(2));
+}
+
+SDValue SITargetLowering::LowerFDIV16(SDValue Op, SelectionDAG &DAG) const {
+  if (SDValue FastLowered = lowerFastUnsafeFDIV(Op, DAG))
+    return FastLowered;
+
+  SDLoc SL(Op);
+  SDValue Src0 = Op.getOperand(0);
+  SDValue Src1 = Op.getOperand(1);
+
+  SDValue CvtSrc0 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src0);
+  SDValue CvtSrc1 = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src1);
+
+  SDValue RcpSrc1 = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, CvtSrc1);
+  SDValue Quot = DAG.getNode(ISD::FMUL, SL, MVT::f32, CvtSrc0, RcpSrc1);
+
+  SDValue FPRoundFlag = DAG.getTargetConstant(0, SL, MVT::i32);
+  SDValue BestQuot = DAG.getNode(ISD::FP_ROUND, SL, MVT::f16, Quot, FPRoundFlag);
+
+  return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f16, BestQuot, Src1, Src0);
+}
+
 // Faster 2.5 ULP division that does not support denormals.
 SDValue SITargetLowering::lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const {
   SDLoc SL(Op);
@@ -2189,25 +3078,73 @@ SDValue SITargetLowering::LowerFDIV32(SDValue Op, SelectionDAG &DAG) const {
 
   SDVTList ScaleVT = DAG.getVTList(MVT::f32, MVT::i1);
 
-  SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, RHS, RHS, LHS);
-  SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT, LHS, RHS, LHS);
+  SDValue DenominatorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
+                                          RHS, RHS, LHS);
+  SDValue NumeratorScaled = DAG.getNode(AMDGPUISD::DIV_SCALE, SL, ScaleVT,
+                                        LHS, RHS, LHS);
 
   // Denominator is scaled to not be denormal, so using rcp is ok.
-  SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32, DenominatorScaled);
+  SDValue ApproxRcp = DAG.getNode(AMDGPUISD::RCP, SL, MVT::f32,
+                                  DenominatorScaled);
+  SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32,
+                                     DenominatorScaled);
+
+  const unsigned Denorm32Reg = AMDGPU::Hwreg::ID_MODE |
+                               (4 << AMDGPU::Hwreg::OFFSET_SHIFT_) |
+                               (1 << AMDGPU::Hwreg::WIDTH_M1_SHIFT_);
+
+  const SDValue BitField = DAG.getTargetConstant(Denorm32Reg, SL, MVT::i16);
+
+  if (!Subtarget->hasFP32Denormals()) {
+    SDVTList BindParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
+    const SDValue EnableDenormValue = DAG.getConstant(FP_DENORM_FLUSH_NONE,
+                                                      SL, MVT::i32);
+    SDValue EnableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, BindParamVTs,
+                                       DAG.getEntryNode(),
+                                       EnableDenormValue, BitField);
+    SDValue Ops[3] = {
+      NegDivScale0,
+      EnableDenorm.getValue(0),
+      EnableDenorm.getValue(1)
+    };
 
-  SDValue NegDivScale0 = DAG.getNode(ISD::FNEG, SL, MVT::f32, DenominatorScaled);
+    NegDivScale0 = DAG.getMergeValues(Ops, SL);
+  }
 
-  SDValue Fma0 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, ApproxRcp, One);
-  SDValue Fma1 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp, ApproxRcp);
+  SDValue Fma0 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0,
+                             ApproxRcp, One, NegDivScale0);
 
-  SDValue Mul = DAG.getNode(ISD::FMUL, SL, MVT::f32, NumeratorScaled, Fma1);
+  SDValue Fma1 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, Fma0, ApproxRcp,
+                             ApproxRcp, Fma0);
 
-  SDValue Fma2 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Mul, NumeratorScaled);
-  SDValue Fma3 = DAG.getNode(ISD::FMA, SL, MVT::f32, Fma2, Fma1, Mul);
-  SDValue Fma4 = DAG.getNode(ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3, NumeratorScaled);
+  SDValue Mul = getFPBinOp(DAG, ISD::FMUL, SL, MVT::f32, NumeratorScaled,
+                           Fma1, Fma1);
+
+  SDValue Fma2 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Mul,
+                             NumeratorScaled, Mul);
+
+  SDValue Fma3 = getFPTernOp(DAG, ISD::FMA,SL, MVT::f32, Fma2, Fma1, Mul, Fma2);
+
+  SDValue Fma4 = getFPTernOp(DAG, ISD::FMA, SL, MVT::f32, NegDivScale0, Fma3,
+                             NumeratorScaled, Fma3);
+
+  if (!Subtarget->hasFP32Denormals()) {
+    const SDValue DisableDenormValue =
+        DAG.getConstant(FP_DENORM_FLUSH_IN_FLUSH_OUT, SL, MVT::i32);
+    SDValue DisableDenorm = DAG.getNode(AMDGPUISD::SETREG, SL, MVT::Other,
+                                        Fma4.getValue(1),
+                                        DisableDenormValue,
+                                        BitField,
+                                        Fma4.getValue(2));
+
+    SDValue OutputChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other,
+                                      DisableDenorm, DAG.getRoot());
+    DAG.setRoot(OutputChain);
+  }
 
   SDValue Scale = NumeratorScaled.getValue(1);
-  SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32, Fma4, Fma1, Fma3, Scale);
+  SDValue Fmas = DAG.getNode(AMDGPUISD::DIV_FMAS, SL, MVT::f32,
+                             Fma4, Fma1, Fma3, Scale);
 
   return DAG.getNode(AMDGPUISD::DIV_FIXUP, SL, MVT::f32, Fmas, RHS, LHS);
 }
@@ -2288,6 +3225,9 @@ SDValue SITargetLowering::LowerFDIV(SDValue Op, SelectionDAG &DAG) const {
   if (VT == MVT::f64)
     return LowerFDIV64(Op, DAG);
 
+  if (VT == MVT::f16)
+    return LowerFDIV16(Op, DAG);
+
   llvm_unreachable("Unexpected type for fdiv");
 }
 
@@ -2311,6 +3251,14 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
     return expandUnalignedStore(Store, DAG);
   }
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  // If there is a possibilty that flat instruction access scratch memory
+  // then we need to use the same legalization rules we use for private.
+  if (AS == AMDGPUAS::FLAT_ADDRESS)
+    AS = MFI->hasFlatScratchInit() ?
+         AMDGPUAS::PRIVATE_ADDRESS : AMDGPUAS::GLOBAL_ADDRESS;
+
   unsigned NumElements = VT.getVectorNumElements();
   switch (AS) {
   case AMDGPUAS::GLOBAL_ADDRESS:
@@ -2504,23 +3452,83 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
   return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
 }
 
+SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+  SDValue Ptr = N->getBasePtr();
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+
+  // TODO: We could also do this for multiplies.
+  unsigned AS = N->getAddressSpace();
+  if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+    SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
+    if (NewPtr) {
+      SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
+
+      NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
+      return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+    }
+  }
+
+  return SDValue();
+}
+
+static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val) {
+  return (Opc == ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
+         (Opc == ISD::OR && (Val == 0xffffffff || Val == 0)) ||
+         (Opc == ISD::XOR && Val == 0);
+}
+
+// Break up 64-bit bit operation of a constant into two 32-bit and/or/xor. This
+// will typically happen anyway for a VALU 64-bit and. This exposes other 32-bit
+// integer combine opportunities since most 64-bit operations are decomposed
+// this way.  TODO: We won't want this for SALU especially if it is an inline
+// immediate.
+SDValue SITargetLowering::splitBinaryBitConstantOp(
+  DAGCombinerInfo &DCI,
+  const SDLoc &SL,
+  unsigned Opc, SDValue LHS,
+  const ConstantSDNode *CRHS) const {
+  uint64_t Val = CRHS->getZExtValue();
+  uint32_t ValLo = Lo_32(Val);
+  uint32_t ValHi = Hi_32(Val);
+  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
+
+    if ((bitOpWithConstantIsReducible(Opc, ValLo) ||
+         bitOpWithConstantIsReducible(Opc, ValHi)) ||
+        (CRHS->hasOneUse() && !TII->isInlineConstant(CRHS->getAPIntValue()))) {
+    // If we need to materialize a 64-bit immediate, it will be split up later
+    // anyway. Avoid creating the harder to understand 64-bit immediate
+    // materialization.
+    return splitBinaryBitConstantOpImpl(DCI, SL, Opc, LHS, ValLo, ValHi);
+  }
+
+  return SDValue();
+}
+
 SDValue SITargetLowering::performAndCombine(SDNode *N,
                                             DAGCombinerInfo &DCI) const {
   if (DCI.isBeforeLegalize())
     return SDValue();
 
-  if (SDValue Base = AMDGPUTargetLowering::performAndCombine(N, DCI))
-    return Base;
-
   SelectionDAG &DAG = DCI.DAG;
-
-  // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
-  // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+  EVT VT = N->getValueType(0);
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
 
-  if (LHS.getOpcode() == ISD::SETCC &&
-      RHS.getOpcode() == ISD::SETCC) {
+
+  if (VT == MVT::i64) {
+    const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+    if (CRHS) {
+      if (SDValue Split
+          = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::AND, LHS, CRHS))
+        return Split;
+    }
+  }
+
+  // (and (fcmp ord x, x), (fcmp une (fabs x), inf)) ->
+  // fp_class x, ~(s_nan | q_nan | n_infinity | p_infinity)
+  if (LHS.getOpcode() == ISD::SETCC && RHS.getOpcode() == ISD::SETCC) {
     ISD::CondCode LCC = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
     ISD::CondCode RCC = cast<CondCodeSDNode>(RHS.getOperand(2))->get();
 
@@ -2568,54 +3576,85 @@ SDValue SITargetLowering::performOrCombine(SDNode *N,
   SDValue RHS = N->getOperand(1);
 
   EVT VT = N->getValueType(0);
-  if (VT == MVT::i64) {
-    // TODO: This could be a generic combine with a predicate for extracting the
-    // high half of an integer being free.
-
-    // (or i64:x, (zero_extend i32:y)) ->
-    //   i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
-    if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
-        RHS.getOpcode() != ISD::ZERO_EXTEND)
-      std::swap(LHS, RHS);
-
-    if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
-      SDValue ExtSrc = RHS.getOperand(0);
-      EVT SrcVT = ExtSrc.getValueType();
-      if (SrcVT == MVT::i32) {
-        SDLoc SL(N);
-        SDValue LowLHS, HiBits;
-        std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
-        SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
-
-        DCI.AddToWorklist(LowOr.getNode());
-        DCI.AddToWorklist(HiBits.getNode());
-
-        SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
-                                  LowOr, HiBits);
-        return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
-      }
+  if (VT == MVT::i1) {
+    // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
+    if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
+        RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
+      SDValue Src = LHS.getOperand(0);
+      if (Src != RHS.getOperand(0))
+        return SDValue();
+
+      const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
+      const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
+      if (!CLHS || !CRHS)
+        return SDValue();
+
+      // Only 10 bits are used.
+      static const uint32_t MaxMask = 0x3ff;
+
+      uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
+      SDLoc DL(N);
+      return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
+                         Src, DAG.getConstant(NewMask, DL, MVT::i32));
     }
+
+    return SDValue();
   }
 
-  // or (fp_class x, c1), (fp_class x, c2) -> fp_class x, (c1 | c2)
-  if (LHS.getOpcode() == AMDGPUISD::FP_CLASS &&
-      RHS.getOpcode() == AMDGPUISD::FP_CLASS) {
-    SDValue Src = LHS.getOperand(0);
-    if (Src != RHS.getOperand(0))
-      return SDValue();
+  if (VT != MVT::i64)
+    return SDValue();
 
-    const ConstantSDNode *CLHS = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
-    const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS.getOperand(1));
-    if (!CLHS || !CRHS)
-      return SDValue();
+  // TODO: This could be a generic combine with a predicate for extracting the
+  // high half of an integer being free.
+
+  // (or i64:x, (zero_extend i32:y)) ->
+  //   i64 (bitcast (v2i32 build_vector (or i32:y, lo_32(x)), hi_32(x)))
+  if (LHS.getOpcode() == ISD::ZERO_EXTEND &&
+      RHS.getOpcode() != ISD::ZERO_EXTEND)
+    std::swap(LHS, RHS);
+
+  if (RHS.getOpcode() == ISD::ZERO_EXTEND) {
+    SDValue ExtSrc = RHS.getOperand(0);
+    EVT SrcVT = ExtSrc.getValueType();
+    if (SrcVT == MVT::i32) {
+      SDLoc SL(N);
+      SDValue LowLHS, HiBits;
+      std::tie(LowLHS, HiBits) = split64BitValue(LHS, DAG);
+      SDValue LowOr = DAG.getNode(ISD::OR, SL, MVT::i32, LowLHS, ExtSrc);
+
+      DCI.AddToWorklist(LowOr.getNode());
+      DCI.AddToWorklist(HiBits.getNode());
+
+      SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32,
+                                LowOr, HiBits);
+      return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec);
+    }
+  }
 
-    // Only 10 bits are used.
-    static const uint32_t MaxMask = 0x3ff;
+  const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (CRHS) {
+    if (SDValue Split
+          = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::OR, LHS, CRHS))
+      return Split;
+  }
 
-    uint32_t NewMask = (CLHS->getZExtValue() | CRHS->getZExtValue()) & MaxMask;
-    SDLoc DL(N);
-    return DAG.getNode(AMDGPUISD::FP_CLASS, DL, MVT::i1,
-                       Src, DAG.getConstant(NewMask, DL, MVT::i32));
+  return SDValue();
+}
+
+SDValue SITargetLowering::performXorCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
+  EVT VT = N->getValueType(0);
+  if (VT != MVT::i64)
+    return SDValue();
+
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  const ConstantSDNode *CRHS = dyn_cast<ConstantSDNode>(RHS);
+  if (CRHS) {
+    if (SDValue Split
+          = splitBinaryBitConstantOp(DCI, SDLoc(N), ISD::XOR, LHS, CRHS))
+      return Split;
   }
 
   return SDValue();
@@ -2657,6 +3696,9 @@ SDValue SITargetLowering::performFCanonicalizeCombine(
 
     if (VT == MVT::f64 && !Subtarget->hasFP64Denormals())
       return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
+    if (VT == MVT::f16 && !Subtarget->hasFP16Denormals())
+      return DAG.getConstantFP(0.0, SDLoc(N), VT);
   }
 
   if (C.isNaN()) {
@@ -2716,8 +3758,23 @@ static SDValue performIntMed3ImmCombine(SelectionDAG &DAG, const SDLoc &SL,
   }
 
   EVT VT = K0->getValueType(0);
-  return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
-                     Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
+
+  MVT NVT = MVT::i32;
+  unsigned ExtOp = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+
+  SDValue Tmp1, Tmp2, Tmp3;
+  Tmp1 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(0));
+  Tmp2 = DAG.getNode(ExtOp, SL, NVT, Op0->getOperand(1));
+  Tmp3 = DAG.getNode(ExtOp, SL, NVT, Op1);
+
+  if (VT == MVT::i16) {
+    Tmp1 = DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, NVT,
+                       Tmp1, Tmp2, Tmp3);
+
+    return DAG.getNode(ISD::TRUNCATE, SL, VT, Tmp1);
+  } else
+    return DAG.getNode(Signed ? AMDGPUISD::SMED3 : AMDGPUISD::UMED3, SL, VT,
+                       Op0.getOperand(0), SDValue(K0, 0), SDValue(K1, 0));
 }
 
 static bool isKnownNeverSNan(SelectionDAG &DAG, SDValue Op) {
@@ -2814,6 +3871,119 @@ SDValue SITargetLowering::performMinMaxCombine(SDNode *N,
   return SDValue();
 }
 
+unsigned SITargetLowering::getFusedOpcode(const SelectionDAG &DAG,
+                                          const SDNode *N0,
+                                          const SDNode *N1) const {
+  EVT VT = N0->getValueType(0);
+
+  // Only do this if we are not trying to support denormals. v_mad_f32 does not
+  // support denormals ever.
+  if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
+      (VT == MVT::f16 && !Subtarget->hasFP16Denormals()))
+    return ISD::FMAD;
+
+  const TargetOptions &Options = DAG.getTarget().Options;
+  if ((Options.AllowFPOpFusion == FPOpFusion::Fast ||
+       Options.UnsafeFPMath ||
+       (cast<BinaryWithFlagsSDNode>(N0)->Flags.hasUnsafeAlgebra() &&
+        cast<BinaryWithFlagsSDNode>(N1)->Flags.hasUnsafeAlgebra())) &&
+      isFMAFasterThanFMulAndFAdd(VT)) {
+    return ISD::FMA;
+  }
+
+  return 0;
+}
+
+SDValue SITargetLowering::performFAddCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  EVT VT = N->getValueType(0);
+  assert(!VT.isVector());
+
+  SDLoc SL(N);
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  // These should really be instruction patterns, but writing patterns with
+  // source modiifiers is a pain.
+
+  // fadd (fadd (a, a), b) -> mad 2.0, a, b
+  if (LHS.getOpcode() == ISD::FADD) {
+    SDValue A = LHS.getOperand(0);
+    if (A == LHS.getOperand(1)) {
+      unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
+      if (FusedOp != 0) {
+        const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+        return DAG.getNode(FusedOp, SL, VT, A, Two, RHS);
+      }
+    }
+  }
+
+  // fadd (b, fadd (a, a)) -> mad 2.0, a, b
+  if (RHS.getOpcode() == ISD::FADD) {
+    SDValue A = RHS.getOperand(0);
+    if (A == RHS.getOperand(1)) {
+      unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
+      if (FusedOp != 0) {
+        const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+        return DAG.getNode(FusedOp, SL, VT, A, Two, LHS);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::performFSubCombine(SDNode *N,
+                                             DAGCombinerInfo &DCI) const {
+  if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc SL(N);
+  EVT VT = N->getValueType(0);
+  assert(!VT.isVector());
+
+  // Try to get the fneg to fold into the source modifier. This undoes generic
+  // DAG combines and folds them into the mad.
+  //
+  // Only do this if we are not trying to support denormals. v_mad_f32 does
+  // not support denormals ever.
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+  if (LHS.getOpcode() == ISD::FADD) {
+    // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
+    SDValue A = LHS.getOperand(0);
+    if (A == LHS.getOperand(1)) {
+      unsigned FusedOp = getFusedOpcode(DAG, N, LHS.getNode());
+      if (FusedOp != 0){
+        const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+        SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+
+        return DAG.getNode(FusedOp, SL, VT, A, Two, NegRHS);
+      }
+    }
+  }
+
+  if (RHS.getOpcode() == ISD::FADD) {
+    // (fsub c, (fadd a, a)) -> mad -2.0, a, c
+
+    SDValue A = RHS.getOperand(0);
+    if (A == RHS.getOperand(1)) {
+      unsigned FusedOp = getFusedOpcode(DAG, N, RHS.getNode());
+      if (FusedOp != 0){
+        const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
+        return DAG.getNode(FusedOp, SL, VT, A, NegTwo, LHS);
+      }
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue SITargetLowering::performSetCCCombine(SDNode *N,
                                               DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -2823,7 +3993,8 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
   SDValue RHS = N->getOperand(1);
   EVT VT = LHS.getValueType();
 
-  if (VT != MVT::f32 && VT != MVT::f64)
+  if (VT != MVT::f32 && VT != MVT::f64 && (Subtarget->has16BitInsts() &&
+                                           VT != MVT::f16))
     return SDValue();
 
   // Match isinf pattern
@@ -2845,14 +4016,59 @@ SDValue SITargetLowering::performSetCCCombine(SDNode *N,
   return SDValue();
 }
 
-SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
-                                            DAGCombinerInfo &DCI) const {
+SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
+                                                     DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
-  SDLoc DL(N);
+  SDLoc SL(N);
+  unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
+
+  SDValue Src = N->getOperand(0);
+  SDValue Srl = N->getOperand(0);
+  if (Srl.getOpcode() == ISD::ZERO_EXTEND)
+    Srl = Srl.getOperand(0);
+
+  // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.
+  if (Srl.getOpcode() == ISD::SRL) {
+    // cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
+    // cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
+    // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
+
+    if (const ConstantSDNode *C =
+        dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+      Srl = DAG.getZExtOrTrunc(Srl.getOperand(0), SDLoc(Srl.getOperand(0)),
+                               EVT(MVT::i32));
+
+      unsigned SrcOffset = C->getZExtValue() + 8 * Offset;
+      if (SrcOffset < 32 && SrcOffset % 8 == 0) {
+        return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, SL,
+                           MVT::f32, Srl);
+      }
+    }
+  }
 
+  APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
+
+  APInt KnownZero, KnownOne;
+  TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+                                        !DCI.isBeforeLegalizeOps());
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+      TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
+    DCI.CommitTargetLoweringOpt(TLO);
+  }
+
+  return SDValue();
+}
+
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+                                            DAGCombinerInfo &DCI) const {
   switch (N->getOpcode()) {
   default:
     return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+  case ISD::FADD:
+    return performFAddCombine(N, DCI);
+  case ISD::FSUB:
+    return performFSubCombine(N, DCI);
   case ISD::SETCC:
     return performSetCCCombine(N, DCI);
   case ISD::FMAXNUM:
@@ -2869,127 +4085,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
       return performMinMaxCombine(N, DCI);
     break;
   }
-
-  case AMDGPUISD::CVT_F32_UBYTE0:
-  case AMDGPUISD::CVT_F32_UBYTE1:
-  case AMDGPUISD::CVT_F32_UBYTE2:
-  case AMDGPUISD::CVT_F32_UBYTE3: {
-    unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
-    SDValue Src = N->getOperand(0);
-
-    // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.
-    if (Src.getOpcode() == ISD::SRL) {
-      // cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
-      // cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
-      // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
-
-      if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) {
-        unsigned SrcOffset = C->getZExtValue() + 8 * Offset;
-        if (SrcOffset < 32 && SrcOffset % 8 == 0) {
-          return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, DL,
-                             MVT::f32, Src.getOperand(0));
-        }
-      }
-    }
-
-    APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
-
-    APInt KnownZero, KnownOne;
-    TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
-                                          !DCI.isBeforeLegalizeOps());
-    const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-    if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
-        TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
-      DCI.CommitTargetLoweringOpt(TLO);
-    }
-
-    break;
-  }
-
-  case ISD::UINT_TO_FP: {
-    return performUCharToFloatCombine(N, DCI);
-  }
-  case ISD::FADD: {
-    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
-      break;
-
-    EVT VT = N->getValueType(0);
-    if (VT != MVT::f32)
-      break;
-
-    // Only do this if we are not trying to support denormals. v_mad_f32 does
-    // not support denormals ever.
-    if (Subtarget->hasFP32Denormals())
-      break;
-
-    SDValue LHS = N->getOperand(0);
-    SDValue RHS = N->getOperand(1);
-
-    // These should really be instruction patterns, but writing patterns with
-    // source modiifiers is a pain.
-
-    // fadd (fadd (a, a), b) -> mad 2.0, a, b
-    if (LHS.getOpcode() == ISD::FADD) {
-      SDValue A = LHS.getOperand(0);
-      if (A == LHS.getOperand(1)) {
-        const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
-        return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
-      }
-    }
-
-    // fadd (b, fadd (a, a)) -> mad 2.0, a, b
-    if (RHS.getOpcode() == ISD::FADD) {
-      SDValue A = RHS.getOperand(0);
-      if (A == RHS.getOperand(1)) {
-        const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
-        return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
-      }
-    }
-
-    return SDValue();
-  }
-  case ISD::FSUB: {
-    if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
-      break;
-
-    EVT VT = N->getValueType(0);
-
-    // Try to get the fneg to fold into the source modifier. This undoes generic
-    // DAG combines and folds them into the mad.
-    //
-    // Only do this if we are not trying to support denormals. v_mad_f32 does
-    // not support denormals ever.
-    if (VT == MVT::f32 &&
-        !Subtarget->hasFP32Denormals()) {
-      SDValue LHS = N->getOperand(0);
-      SDValue RHS = N->getOperand(1);
-      if (LHS.getOpcode() == ISD::FADD) {
-        // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
-
-        SDValue A = LHS.getOperand(0);
-        if (A == LHS.getOperand(1)) {
-          const SDValue Two = DAG.getConstantFP(2.0, DL, MVT::f32);
-          SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
-
-          return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
-        }
-      }
-
-      if (RHS.getOpcode() == ISD::FADD) {
-        // (fsub c, (fadd a, a)) -> mad -2.0, a, c
-
-        SDValue A = RHS.getOperand(0);
-        if (A == RHS.getOperand(1)) {
-          const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, MVT::f32);
-          return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
-        }
-      }
-
-      return SDValue();
-    }
-
-    break;
-  }
   case ISD::LOAD:
   case ISD::STORE:
   case ISD::ATOMIC_LOAD:
@@ -3011,27 +4106,14 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
     if (DCI.isBeforeLegalize())
       break;
-
-    MemSDNode *MemNode = cast<MemSDNode>(N);
-    SDValue Ptr = MemNode->getBasePtr();
-
-    // TODO: We could also do this for multiplies.
-    unsigned AS = MemNode->getAddressSpace();
-    if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
-      SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
-      if (NewPtr) {
-        SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
-
-        NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
-        return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
-      }
-    }
-    break;
+    return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
   }
   case ISD::AND:
     return performAndCombine(N, DCI);
   case ISD::OR:
     return performOrCombine(N, DCI);
+  case ISD::XOR:
+    return performXorCombine(N, DCI);
   case AMDGPUISD::FP_CLASS:
     return performClassCombine(N, DCI);
   case ISD::FCANONICALIZE:
@@ -3039,6 +4121,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
   case AMDGPUISD::FRACT:
   case AMDGPUISD::RCP:
   case AMDGPUISD::RSQ:
+  case AMDGPUISD::RCP_LEGACY:
   case AMDGPUISD::RSQ_LEGACY:
   case AMDGPUISD::RSQ_CLAMP:
   case AMDGPUISD::LDEXP: {
@@ -3047,38 +4130,18 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
       return Src;
     break;
   }
+  case ISD::SINT_TO_FP:
+  case ISD::UINT_TO_FP:
+    return performUCharToFloatCombine(N, DCI);
+  case AMDGPUISD::CVT_F32_UBYTE0:
+  case AMDGPUISD::CVT_F32_UBYTE1:
+  case AMDGPUISD::CVT_F32_UBYTE2:
+  case AMDGPUISD::CVT_F32_UBYTE3:
+    return performCvtF32UByteNCombine(N, DCI);
   }
   return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
 }
 
-/// \brief Analyze the possible immediate value Op
-///
-/// Returns -1 if it isn't an immediate, 0 if it's and inline immediate
-/// and the immediate value if it's a literal immediate
-int32_t SITargetLowering::analyzeImmediate(const SDNode *N) const {
-  const SIInstrInfo *TII = getSubtarget()->getInstrInfo();
-
-  if (const ConstantSDNode *Node = dyn_cast<ConstantSDNode>(N)) {
-    if (TII->isInlineConstant(Node->getAPIntValue()))
-      return 0;
-
-    uint64_t Val = Node->getZExtValue();
-    return isUInt<32>(Val) ? Val : -1;
-  }
-
-  if (const ConstantFPSDNode *Node = dyn_cast<ConstantFPSDNode>(N)) {
-    if (TII->isInlineConstant(Node->getValueAPF().bitcastToAPInt()))
-      return 0;
-
-    if (Node->getValueType(0) == MVT::f32)
-      return FloatToBits(Node->getValueAPF().convertToFloat());
-
-    return -1;
-  }
-
-  return -1;
-}
-
 /// \brief Helper function for adjustWritemask
 static unsigned SubIdx2Lane(unsigned Idx) {
   switch (Idx) {
@@ -3235,13 +4298,16 @@ void SITargetLowering::AdjustInstrPostInstrSelection(MachineInstr &MI,
 
   if (TII->isMIMG(MI)) {
     unsigned VReg = MI.getOperand(0).getReg();
+    const TargetRegisterClass *RC = MRI.getRegClass(VReg);
+    // TODO: Need mapping tables to handle other cases (register classes).
+    if (RC != &AMDGPU::VReg_128RegClass)
+      return;
+
     unsigned DmaskIdx = MI.getNumOperands() == 12 ? 3 : 4;
     unsigned Writemask = MI.getOperand(DmaskIdx).getImm();
     unsigned BitsSet = 0;
     for (unsigned i = 0; i < 4; ++i)
       BitsSet += Writemask & (1 << i) ? 1 : 0;
-
-    const TargetRegisterClass *RC;
     switch (BitsSet) {
     default: return;
     case 1:  RC = &AMDGPU::VGPR_32RegClass; break;
@@ -3379,6 +4445,8 @@ std::pair<unsigned, const TargetRegisterClass *>
 SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                StringRef Constraint,
                                                MVT VT) const {
+  if (!isTypeLegal(VT))
+    return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
 
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
@@ -3388,7 +4456,8 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       default:
         return std::make_pair(0U, nullptr);
       case 32:
-        return std::make_pair(0U, &AMDGPU::SGPR_32RegClass);
+      case 16:
+        return std::make_pair(0U, &AMDGPU::SReg_32_XM0RegClass);
       case 64:
         return std::make_pair(0U, &AMDGPU::SGPR_64RegClass);
       case 128:
@@ -3402,6 +4471,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       default:
         return std::make_pair(0U, nullptr);
       case 32:
+      case 16:
         return std::make_pair(0U, &AMDGPU::VGPR_32RegClass);
       case 64:
         return std::make_pair(0U, &AMDGPU::VReg_64RegClass);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
index 1d349faa592c..9583f6db6faa 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIISelLowering.h
@@ -33,11 +33,11 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
-  SDValue LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFastUnsafeFDIV(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerFDIV_FAST(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerFDIV16(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const;
@@ -47,6 +47,16 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
 
+  /// \brief Converts \p Op, which must be of floating point type, to the
+  /// floating point type \p VT, by either extending or truncating it.
+  SDValue getFPExtOrFPTrunc(SelectionDAG &DAG,
+                            SDValue Op,
+                            const SDLoc &DL,
+                            EVT VT) const;
+
+  /// \brief Custom lowering for ISD::FP_ROUND for MVT::f16.
+  SDValue lowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
+
   SDValue getSegmentAperture(unsigned AS, SelectionDAG &DAG) const;
   SDValue lowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerTRAP(SDValue Op, SelectionDAG &DAG) const;
@@ -58,14 +68,27 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   SDValue performSHLPtrCombine(SDNode *N,
                                unsigned AS,
                                DAGCombinerInfo &DCI) const;
+
+  SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const;
+
+  SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL,
+                                   unsigned Opc, SDValue LHS,
+                                   const ConstantSDNode *CRHS) const;
+
   SDValue performAndCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performOrCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performXorCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performClassCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performFCanonicalizeCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
+  unsigned getFusedOpcode(const SelectionDAG &DAG,
+                          const SDNode *N0, const SDNode *N1) const;
+  SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
 
   bool isLegalFlatAddressingMode(const AddrMode &AM) const;
   bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
@@ -73,6 +96,19 @@ class SITargetLowering final : public AMDGPUTargetLowering {
   bool isCFIntrinsic(const SDNode *Intr) const;
 
   void createDebuggerPrologueStackObjects(MachineFunction &MF) const;
+
+  /// \returns True if fixup needs to be emitted for given global value \p GV,
+  /// false otherwise.
+  bool shouldEmitFixup(const GlobalValue *GV) const;
+
+  /// \returns True if GOT relocation needs to be emitted for given global value
+  /// \p GV, false otherwise.
+  bool shouldEmitGOTReloc(const GlobalValue *GV) const;
+
+  /// \returns True if PC-relative relocation needs to be emitted for given
+  /// global value \p GV, false otherwise.
+  bool shouldEmitPCReloc(const GlobalValue *GV) const;
+
 public:
   SITargetLowering(const TargetMachine &tm, const SISubtarget &STI);
 
@@ -98,7 +134,9 @@ public:
                           MachineFunction &MF) const override;
 
   bool isMemOpUniform(const SDNode *N) const;
+  bool isMemOpHasNoClobberedMemOperand(const SDNode *N) const;
   bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
+  bool isCheapAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
 
   TargetLoweringBase::LegalizeTypeAction
   getPreferredVectorAction(EVT VT) const override;
@@ -141,7 +179,6 @@ public:
   void AdjustInstrPostInstrSelection(MachineInstr &MI,
                                      SDNode *Node) const override;
 
-  int32_t analyzeImmediate(const SDNode *N) const;
   SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC,
                                unsigned Reg, EVT VT) const override;
   void legalizeTargetIndependentNode(SDNode *Node, SelectionDAG &DAG) const;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
new file mode 100644
index 000000000000..91e4bf755c53
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -0,0 +1,329 @@
+//===-- SIInsertSkips.cpp - Use predicates for control flow ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief This pass inserts branches on the 0 exec mask over divergent branches
+/// branches when it's expected that jumping over the untaken control flow will
+/// be cheaper than having every workitem no-op through it.
+//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "SIMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/MC/MCAsmInfo.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-insert-skips"
+
+namespace {
+
+static cl::opt<unsigned> SkipThresholdFlag(
+  "amdgpu-skip-threshold",
+  cl::desc("Number of instructions before jumping over divergent control flow"),
+  cl::init(12), cl::Hidden);
+
+class SIInsertSkips : public MachineFunctionPass {
+private:
+  const SIRegisterInfo *TRI;
+  const SIInstrInfo *TII;
+  unsigned SkipThreshold;
+
+  bool shouldSkip(const MachineBasicBlock &From,
+                  const MachineBasicBlock &To) const;
+
+  bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
+
+  void kill(MachineInstr &MI);
+
+  MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
+                                     MachineBasicBlock::iterator I) const;
+
+  bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
+
+public:
+  static char ID;
+
+  SIInsertSkips() :
+    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr), SkipThreshold(0) { }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "SI insert s_cbranch_execz instructions";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace
+
+char SIInsertSkips::ID = 0;
+
+INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,
+                "SI insert s_cbranch_execz instructions", false, false)
+
+char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;
+
+static bool opcodeEmitsNoInsts(unsigned Opc) {
+  switch (Opc) {
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::BUNDLE:
+  case TargetOpcode::CFI_INSTRUCTION:
+  case TargetOpcode::EH_LABEL:
+  case TargetOpcode::GC_LABEL:
+  case TargetOpcode::DBG_VALUE:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
+                               const MachineBasicBlock &To) const {
+  if (From.succ_empty())
+    return false;
+
+  unsigned NumInstr = 0;
+  const MachineFunction *MF = From.getParent();
+
+  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
+       MBBI != End && MBBI != ToI; ++MBBI) {
+    const MachineBasicBlock &MBB = *MBBI;
+
+    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
+         NumInstr < SkipThreshold && I != E; ++I) {
+      if (opcodeEmitsNoInsts(I->getOpcode()))
+        continue;
+
+      // FIXME: Since this is required for correctness, this should be inserted
+      // during SILowerControlFlow.
+
+      // When a uniform loop is inside non-uniform control flow, the branch
+      // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
+      // when EXEC = 0. We should skip the loop lest it becomes infinite.
+      if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
+          I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
+        return true;
+
+      if (I->isInlineAsm()) {
+        const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
+        const char *AsmStr = I->getOperand(0).getSymbolName();
+
+        // inlineasm length estimate is number of bytes assuming the longest
+        // instruction.
+        uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
+        NumInstr += MaxAsmSize / MAI->getMaxInstLength();
+      } else {
+        ++NumInstr;
+      }
+
+      if (NumInstr >= SkipThreshold)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+bool SIInsertSkips::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  MachineFunction *MF = MBB.getParent();
+
+  if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
+      !shouldSkip(MBB, MBB.getParent()->back()))
+    return false;
+
+  MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
+
+  const DebugLoc &DL = MI.getDebugLoc();
+
+  // If the exec mask is non-zero, skip the next two instructions
+  BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+    .addMBB(&NextBB);
+
+  MachineBasicBlock::iterator Insert = SkipBB->begin();
+
+  // Exec mask is zero: Export to NULL target...
+  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP_DONE))
+    .addImm(0x09) // V_008DFC_SQ_EXP_NULL
+    .addReg(AMDGPU::VGPR0, RegState::Undef)
+    .addReg(AMDGPU::VGPR0, RegState::Undef)
+    .addReg(AMDGPU::VGPR0, RegState::Undef)
+    .addReg(AMDGPU::VGPR0, RegState::Undef)
+    .addImm(1)  // vm
+    .addImm(0)  // compr
+    .addImm(0); // en
+
+  // ... and terminate wavefront.
+  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
+
+  return true;
+}
+
+void SIInsertSkips::kill(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  DebugLoc DL = MI.getDebugLoc();
+  const MachineOperand &Op = MI.getOperand(0);
+
+#ifndef NDEBUG
+  CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
+  // Kill is only allowed in pixel / geometry shaders.
+  assert(CallConv == CallingConv::AMDGPU_PS ||
+         CallConv == CallingConv::AMDGPU_GS);
+#endif
+  // Clear this thread from the exec mask if the operand is negative.
+  if (Op.isImm()) {
+    // Constant operand: Set exec mask to 0 or do nothing
+    if (Op.getImm() & 0x80000000) {
+      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
+        .addImm(0);
+    }
+  } else {
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
+      .addImm(0)
+      .addOperand(Op);
+  }
+}
+
+MachineBasicBlock *SIInsertSkips::insertSkipBlock(
+  MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
+  MachineFunction *MF = MBB.getParent();
+
+  MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
+  MachineFunction::iterator MBBI(MBB);
+  ++MBBI;
+
+  MF->insert(MBBI, SkipBB);
+  MBB.addSuccessor(SkipBB);
+
+  return SkipBB;
+}
+
+// Returns true if a branch over the block was inserted.
+bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
+                                   MachineBasicBlock &SrcMBB) {
+  MachineBasicBlock *DestBB = MI.getOperand(0).getMBB();
+
+  if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
+    return false;
+
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());
+
+  BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
+    .addMBB(DestBB);
+
+  return true;
+}
+
+bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  TII = ST.getInstrInfo();
+  TRI = &TII->getRegisterInfo();
+  SkipThreshold = SkipThresholdFlag;
+
+  bool HaveKill = false;
+  bool MadeChange = false;
+
+  // Track depth of exec mask, divergent branches.
+  SmallVector<MachineBasicBlock *, 16> ExecBranchStack;
+
+  MachineFunction::iterator NextBB;
+
+  MachineBasicBlock *EmptyMBBAtEnd = nullptr;
+
+  for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
+       BI != BE; BI = NextBB) {
+    NextBB = std::next(BI);
+    MachineBasicBlock &MBB = *BI;
+
+    if (!ExecBranchStack.empty() && ExecBranchStack.back() == &MBB) {
+      // Reached convergence point for last divergent branch.
+      ExecBranchStack.pop_back();
+    }
+
+    if (HaveKill && ExecBranchStack.empty()) {
+      HaveKill = false;
+
+      // TODO: Insert skip if exec is 0?
+    }
+
+    MachineBasicBlock::iterator I, Next;
+    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+      Next = std::next(I);
+
+      MachineInstr &MI = *I;
+
+      switch (MI.getOpcode()) {
+      case AMDGPU::SI_MASK_BRANCH: {
+        ExecBranchStack.push_back(MI.getOperand(0).getMBB());
+        MadeChange |= skipMaskBranch(MI, MBB);
+        break;
+      }
+      case AMDGPU::S_BRANCH: {
+        // Optimize out branches to the next block.
+        // FIXME: Shouldn't this be handled by BranchFolding?
+        if (MBB.isLayoutSuccessor(MI.getOperand(0).getMBB()))
+          MI.eraseFromParent();
+        break;
+      }
+      case AMDGPU::SI_KILL_TERMINATOR: {
+        MadeChange = true;
+        kill(MI);
+
+        if (ExecBranchStack.empty()) {
+          if (skipIfDead(MI, *NextBB)) {
+            NextBB = std::next(BI);
+            BE = MF.end();
+            Next = MBB.end();
+          }
+        } else {
+          HaveKill = true;
+        }
+
+        MI.eraseFromParent();
+        break;
+      }
+      case AMDGPU::SI_RETURN: {
+        // FIXME: Should move somewhere else
+        assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
+
+        // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
+        // because external bytecode will be appended at the end.
+        if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
+          // SI_RETURN is not the last instruction. Add an empty block at
+          // the end and jump there.
+          if (!EmptyMBBAtEnd) {
+            EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
+            MF.insert(MF.end(), EmptyMBBAtEnd);
+          }
+
+          MBB.addSuccessor(EmptyMBBAtEnd);
+          BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
+            .addMBB(EmptyMBBAtEnd);
+          I->eraseFromParent();
+        }
+      }
+      default:
+        break;
+      }
+    }
+  }
+
+  return MadeChange;
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
index d24588d6c143..202a1e9ed8ac 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -21,6 +21,7 @@
 #include "SIDefines.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "Utils/AMDGPUBaseInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -29,6 +30,7 @@
 #define DEBUG_TYPE "si-insert-waits"
 
 using namespace llvm;
+using namespace llvm::AMDGPU;
 
 namespace {
 
@@ -59,13 +61,14 @@ private:
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
   const MachineRegisterInfo *MRI;
-
-  /// \brief Constant hardware limits
-  static const Counters WaitCounts;
+  IsaVersion IV;
 
   /// \brief Constant zero value
   static const Counters ZeroCounts;
 
+  /// \brief Hardware limits
+  Counters HardwareLimits;
+
   /// \brief Counter values we have already waited on.
   Counters WaitedOn;
 
@@ -90,6 +93,9 @@ private:
 
   bool LastInstWritesM0;
 
+  /// Whether or not we have flat operations outstanding.
+  bool IsFlatOutstanding;
+
   /// \brief Whether the machine function returns void
   bool ReturnsVoid;
 
@@ -145,7 +151,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SI insert wait instructions";
   }
 
@@ -170,11 +176,12 @@ FunctionPass *llvm::createSIInsertWaitsPass() {
   return new SIInsertWaits();
 }
 
-const Counters SIInsertWaits::WaitCounts = { { 15, 7, 15 } };
 const Counters SIInsertWaits::ZeroCounts = { { 0, 0, 0 } };
 
-static bool readsVCCZ(unsigned Opcode) {
-  return Opcode == AMDGPU::S_CBRANCH_VCCNZ || Opcode == AMDGPU::S_CBRANCH_VCCZ;
+static bool readsVCCZ(const MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  return (Opc == AMDGPU::S_CBRANCH_VCCNZ || Opc == AMDGPU::S_CBRANCH_VCCZ) &&
+         !MI.getOperand(1).isUndef();
 }
 
 bool SIInsertWaits::hasOutstandingLGKM() const {
@@ -188,8 +195,7 @@ Counters SIInsertWaits::getHwCounts(MachineInstr &MI) {
   Result.Named.VM = !!(TSFlags & SIInstrFlags::VM_CNT);
 
   // Only consider stores or EXP for EXP_CNT
-  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT &&
-      (MI.getOpcode() == AMDGPU::EXP || MI.getDesc().mayStore()));
+  Result.Named.EXP = !!(TSFlags & SIInstrFlags::EXP_CNT) && MI.mayStore();
 
   // LGKM may uses larger values
   if (TSFlags & SIInstrFlags::LGKM_CNT) {
@@ -231,9 +237,10 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
   if (Op.isDef())
     return true;
 
-  // For exports all registers are relevant
+  // For exports all registers are relevant.
+  // TODO: Skip undef/disabled registers.
   MachineInstr &MI = *Op.getParent();
-  if (MI.getOpcode() == AMDGPU::EXP)
+  if (TII->isEXP(MI))
     return true;
 
   // For stores the stored value is also relevant
@@ -245,12 +252,6 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
   // operand comes before the value operand and it may have
   // multiple data operands.
 
-  if (TII->isDS(MI) || TII->isFLAT(MI)) {
-    MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::data);
-    if (Data && Op.isIdenticalTo(*Data))
-      return true;
-  }
-
   if (TII->isDS(MI)) {
     MachineOperand *Data0 = TII->getNamedOperand(MI, AMDGPU::OpName::data0);
     if (Data0 && Op.isIdenticalTo(*Data0))
@@ -260,6 +261,12 @@ bool SIInsertWaits::isOpRelevant(MachineOperand &Op) {
     return Data1 && Op.isIdenticalTo(*Data1);
   }
 
+  if (TII->isFLAT(MI)) {
+    MachineOperand *Data = TII->getNamedOperand(MI, AMDGPU::OpName::vdata);
+    if (Data && Op.isIdenticalTo(*Data))
+      return true;
+  }
+
   // NOTE: This assumes that the value operand is before the
   // address operand, and that there is only one value operand.
   for (MachineInstr::mop_iterator I = MI.operands_begin(),
@@ -292,6 +299,9 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
   Counters Limit = ZeroCounts;
   unsigned Sum = 0;
 
+  if (TII->mayAccessFlatAddressSpace(*I))
+    IsFlatOutstanding = true;
+
   for (unsigned i = 0; i < 3; ++i) {
     LastIssued.Array[i] += Increment.Array[i];
     if (Increment.Array[i])
@@ -330,7 +340,7 @@ void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
 
   // Remember which export instructions we have seen
   if (Increment.Named.EXP) {
-    ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
+    ExpInstrTypesSeen |= TII->isEXP(*I) ? 1 : 2;
   }
 
   for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
@@ -366,8 +376,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
   // Figure out if the async instructions execute in order
   bool Ordered[3];
 
-  // VM_CNT is always ordered
-  Ordered[0] = true;
+  // VM_CNT is always ordered except when there are flat instructions, which
+  // can return out of order.
+  Ordered[0] = !IsFlatOutstanding;
 
   // EXP_CNT is unordered if we have both EXP & VM-writes
   Ordered[1] = ExpInstrTypesSeen == 3;
@@ -376,7 +387,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
   Ordered[2] = false;
 
   // The values we are going to put into the S_WAITCNT instruction
-  Counters Counts = WaitCounts;
+  Counters Counts = HardwareLimits;
 
   // Do we really need to wait?
   bool NeedWait = false;
@@ -392,7 +403,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
       unsigned Value = LastIssued.Array[i] - Required.Array[i];
 
       // Adjust the value to the real hardware possibilities.
-      Counts.Array[i] = std::min(Value, WaitCounts.Array[i]);
+      Counts.Array[i] = std::min(Value, HardwareLimits.Array[i]);
 
     } else
       Counts.Array[i] = 0;
@@ -410,12 +421,14 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
 
   // Build the wait instruction
   BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
-          .addImm((Counts.Named.VM & 0xF) |
-                  ((Counts.Named.EXP & 0x7) << 4) |
-                  ((Counts.Named.LGKM & 0xF) << 8));
+    .addImm(encodeWaitcnt(IV,
+                          Counts.Named.VM,
+                          Counts.Named.EXP,
+                          Counts.Named.LGKM));
 
   LastOpcodeType = OTHER;
   LastInstWritesM0 = false;
+  IsFlatOutstanding = false;
   return true;
 }
 
@@ -440,9 +453,9 @@ void SIInsertWaits::handleExistingWait(MachineBasicBlock::iterator I) {
   unsigned Imm = I->getOperand(0).getImm();
   Counters Counts, WaitOn;
 
-  Counts.Named.VM = Imm & 0xF;
-  Counts.Named.EXP = (Imm >> 4) & 0x7;
-  Counts.Named.LGKM = (Imm >> 8) & 0xF;
+  Counts.Named.VM = decodeVmcnt(IV, Imm);
+  Counts.Named.EXP = decodeExpcnt(IV, Imm);
+  Counts.Named.LGKM = decodeLgkmcnt(IV, Imm);
 
   for (unsigned i = 0; i < 3; ++i) {
     if (Counts.Array[i] <= LastIssued.Array[i])
@@ -518,26 +531,40 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
   TII = ST->getInstrInfo();
   TRI = &TII->getRegisterInfo();
   MRI = &MF.getRegInfo();
+  IV = getIsaVersion(ST->getFeatureBits());
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+
+  HardwareLimits.Named.VM = getVmcntBitMask(IV);
+  HardwareLimits.Named.EXP = getExpcntBitMask(IV);
+  HardwareLimits.Named.LGKM = getLgkmcntBitMask(IV);
 
   WaitedOn = ZeroCounts;
   DelayedWaitOn = ZeroCounts;
   LastIssued = ZeroCounts;
   LastOpcodeType = OTHER;
   LastInstWritesM0 = false;
-  ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
+  IsFlatOutstanding = false;
+  ReturnsVoid = MFI->returnsVoid();
 
   memset(&UsedRegs, 0, sizeof(UsedRegs));
   memset(&DefinedRegs, 0, sizeof(DefinedRegs));
 
   SmallVector<MachineInstr *, 4> RemoveMI;
+  SmallVector<MachineBasicBlock *, 4> EndPgmBlocks;
+
+  bool HaveScalarStores = false;
 
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; ++BI) {
 
     MachineBasicBlock &MBB = *BI;
+
     for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
          I != E; ++I) {
 
+      if (!HaveScalarStores && TII->isScalarStore(*I))
+        HaveScalarStores = true;
+
       if (ST->getGeneration() <= SISubtarget::SEA_ISLANDS) {
         // There is a hardware bug on CI/SI where SMRD instruction may corrupt
         // vccz bit, so when we detect that an instruction may read from a
@@ -557,7 +584,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
         }
 
         // Check if we need to apply the bug work-around
-        if (readsVCCZ(I->getOpcode()) && VCCZCorrupt) {
+        if (VCCZCorrupt && readsVCCZ(*I)) {
           DEBUG(dbgs() << "Inserting vccz bug work-around before: " << *I << '\n');
 
           // Wait on everything, not just LGKM.  vccz reads usually come from
@@ -572,7 +599,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
           // vcc and then writing it back to the register.
           BuildMI(MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_MOV_B64),
                   AMDGPU::VCC)
-                  .addReg(AMDGPU::VCC);
+            .addReg(AMDGPU::VCC);
         }
       }
 
@@ -590,8 +617,9 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
       // S_SENDMSG implicitly waits for all outstanding LGKM transfers to finish,
       // but we also want to wait for any other outstanding transfers before
       // signalling other hardware blocks
-      if (I->getOpcode() == AMDGPU::S_BARRIER ||
-          I->getOpcode() == AMDGPU::S_SENDMSG)
+      if ((I->getOpcode() == AMDGPU::S_BARRIER &&
+               ST->needWaitcntBeforeBarrier()) ||
+           I->getOpcode() == AMDGPU::S_SENDMSG)
         Required = LastIssued;
       else
         Required = handleOperands(*I);
@@ -605,12 +633,45 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
 
       pushInstruction(MBB, I, Increment);
       handleSendMsg(MBB, I);
+
+      if (I->getOpcode() == AMDGPU::S_ENDPGM ||
+          I->getOpcode() == AMDGPU::SI_RETURN)
+        EndPgmBlocks.push_back(&MBB);
     }
 
     // Wait for everything at the end of the MBB
     Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
   }
 
+  if (HaveScalarStores) {
+    // If scalar writes are used, the cache must be flushed or else the next
+    // wave to reuse the same scratch memory can be clobbered.
+    //
+    // Insert s_dcache_wb at wave termination points if there were any scalar
+    // stores, and only if the cache hasn't already been flushed. This could be
+    // improved by looking across blocks for flushes in postdominating blocks
+    // from the stores but an explicitly requested flush is probably very rare.
+    for (MachineBasicBlock *MBB : EndPgmBlocks) {
+      bool SeenDCacheWB = false;
+
+      for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
+           I != E; ++I) {
+
+        if (I->getOpcode() == AMDGPU::S_DCACHE_WB)
+          SeenDCacheWB = true;
+        else if (TII->isScalarStore(*I))
+          SeenDCacheWB = false;
+
+        // FIXME: It would be better to insert this before a waitcnt if any.
+        if ((I->getOpcode() == AMDGPU::S_ENDPGM ||
+             I->getOpcode() == AMDGPU::SI_RETURN) && !SeenDCacheWB) {
+          Changes = true;
+          BuildMI(*MBB, I, I->getDebugLoc(), TII->get(AMDGPU::S_DCACHE_WB));
+        }
+      }
+    }
+  }
+
   for (MachineInstr *I : RemoveMI)
     I->eraseFromParent();
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index 6163f0547bd5..5523ec142ba7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -15,78 +15,111 @@ class InstSI <dag outs, dag ins, string asm = "",
               list<dag> pattern = []> :
   AMDGPUInst<outs, ins, asm, pattern>, PredicateControl {
 
-  field bits<1> VM_CNT = 0;
-  field bits<1> EXP_CNT = 0;
-  field bits<1> LGKM_CNT = 0;
-
-  field bits<1> SALU = 0;
-  field bits<1> VALU = 0;
-
-  field bits<1> SOP1 = 0;
-  field bits<1> SOP2 = 0;
-  field bits<1> SOPC = 0;
-  field bits<1> SOPK = 0;
-  field bits<1> SOPP = 0;
-
-  field bits<1> VOP1 = 0;
-  field bits<1> VOP2 = 0;
-  field bits<1> VOP3 = 0;
-  field bits<1> VOPC = 0;
-  field bits<1> SDWA = 0;
-  field bits<1> DPP = 0;
-
-  field bits<1> MUBUF = 0;
-  field bits<1> MTBUF = 0;
-  field bits<1> SMRD = 0;
-  field bits<1> DS = 0;
-  field bits<1> MIMG = 0;
-  field bits<1> FLAT = 0;
+  // Low bits - basic encoding information.
+  field bit SALU = 0;
+  field bit VALU = 0;
+
+  // SALU instruction formats.
+  field bit SOP1 = 0;
+  field bit SOP2 = 0;
+  field bit SOPC = 0;
+  field bit SOPK = 0;
+  field bit SOPP = 0;
+
+  // VALU instruction formats.
+  field bit VOP1 = 0;
+  field bit VOP2 = 0;
+  field bit VOPC = 0;
+  field bit VOP3 = 0;
+  field bit VINTRP = 0;
+  field bit SDWA = 0;
+  field bit DPP = 0;
+
+  // Memory instruction formats.
+  field bit MUBUF = 0;
+  field bit MTBUF = 0;
+  field bit SMRD = 0;
+  field bit MIMG = 0;
+  field bit EXP = 0;
+  field bit FLAT = 0;
+  field bit DS = 0;
+
+   // Pseudo instruction formats.
+  field bit VGPRSpill = 0;
+  field bit SGPRSpill = 0;
+
+  // High bits - other information.
+  field bit VM_CNT = 0;
+  field bit EXP_CNT = 0;
+  field bit LGKM_CNT = 0;
 
   // Whether WQM _must_ be enabled for this instruction.
-  field bits<1> WQM = 0;
-  field bits<1> VGPRSpill = 0;
+  field bit WQM = 0;
+
+  // Whether WQM _must_ be disabled for this instruction.
+  field bit DisableWQM = 0;
+
+  field bit Gather4 = 0;
+
+  // Most sopk treat the immediate as a signed 16-bit, however some
+  // use it as unsigned.
+  field bit SOPKZext = 0;
+
+  // This is an s_store_dword* instruction that requires a cache flush
+  // on wave termination. It is necessary to distinguish from mayStore
+  // SMEM instructions like the cache flush ones.
+  field bit ScalarStore = 0;
+
+  // Whether the operands can be ignored when computing the
+  // instruction size.
+  field bit FixedSize = 0;
 
   // This bit tells the assembler to use the 32-bit encoding in case it
   // is unable to infer the encoding from the operands.
-  field bits<1> VOPAsmPrefer32Bit = 0;
+  field bit VOPAsmPrefer32Bit = 0;
 
-  field bits<1> Gather4 = 0;
+  // These need to be kept in sync with the enum in SIInstrFlags.
+  let TSFlags{0} = SALU;
+  let TSFlags{1} = VALU;
 
-  // Whether WQM _must_ be disabled for this instruction.
-  field bits<1> DisableWQM = 0;
+  let TSFlags{2} = SOP1;
+  let TSFlags{3} = SOP2;
+  let TSFlags{4} = SOPC;
+  let TSFlags{5} = SOPK;
+  let TSFlags{6} = SOPP;
 
-  // These need to be kept in sync with the enum in SIInstrFlags.
-  let TSFlags{0} = VM_CNT;
-  let TSFlags{1} = EXP_CNT;
-  let TSFlags{2} = LGKM_CNT;
-
-  let TSFlags{3} = SALU;
-  let TSFlags{4} = VALU;
-
-  let TSFlags{5} = SOP1;
-  let TSFlags{6} = SOP2;
-  let TSFlags{7} = SOPC;
-  let TSFlags{8} = SOPK;
-  let TSFlags{9} = SOPP;
-
-  let TSFlags{10} = VOP1;
-  let TSFlags{11} = VOP2;
-  let TSFlags{12} = VOP3;
-  let TSFlags{13} = VOPC;
+  let TSFlags{7} = VOP1;
+  let TSFlags{8} = VOP2;
+  let TSFlags{9} = VOPC;
+  let TSFlags{10} = VOP3;
+
+  let TSFlags{13} = VINTRP;
   let TSFlags{14} = SDWA;
   let TSFlags{15} = DPP;
 
   let TSFlags{16} = MUBUF;
   let TSFlags{17} = MTBUF;
   let TSFlags{18} = SMRD;
-  let TSFlags{19} = DS;
-  let TSFlags{20} = MIMG;
+  let TSFlags{19} = MIMG;
+  let TSFlags{20} = EXP;
   let TSFlags{21} = FLAT;
-  let TSFlags{22} = WQM;
+  let TSFlags{22} = DS;
+
   let TSFlags{23} = VGPRSpill;
-  let TSFlags{24} = VOPAsmPrefer32Bit;
-  let TSFlags{25} = Gather4;
-  let TSFlags{26} = DisableWQM;
+  let TSFlags{24} = SGPRSpill;
+
+  let TSFlags{32} = VM_CNT;
+  let TSFlags{33} = EXP_CNT;
+  let TSFlags{34} = LGKM_CNT;
+
+  let TSFlags{35} = WQM;
+  let TSFlags{36} = DisableWQM;
+  let TSFlags{37} = Gather4;
+
+  let TSFlags{38} = SOPKZext;
+  let TSFlags{39} = ScalarStore;
+  let TSFlags{40} = FixedSize;
+  let TSFlags{41} = VOPAsmPrefer32Bit;
 
   let SchedRW = [Write32Bit];
 
@@ -95,6 +128,7 @@ class InstSI <dag outs, dag ins, string asm = "",
   field bits<1> DisableDecoder = 0;
 
   let isAsmParserOnly = !if(!eq(DisableDecoder{0}, {0}), 0, 1);
+  let AsmVariantName = AMDGPUAsmVariants.Default;
 }
 
 class PseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
@@ -103,376 +137,39 @@ class PseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
   let isCodeGenOnly = 1;
 }
 
-class Enc32 {
-  field bits<32> Inst;
-  int Size = 4;
-}
-
-class Enc64 {
-  field bits<64> Inst;
-  int Size = 8;
-}
-
-class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
-
-let Uses = [EXEC] in {
-
-class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI <outs, ins, asm, pattern> {
-
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let VALU = 1;
-}
-
-class VOPCCommon <dag ins, string asm, list<dag> pattern> :
-    VOPAnyCommon <(outs), ins, asm, pattern> {
-
-  let VOPC = 1;
-  let Size = 4;
-  let Defs = [VCC];
-}
-
-class VOP1Common <dag outs, dag ins, string asm, list<dag> pattern> :
-    VOPAnyCommon <outs, ins, asm, pattern> {
-
-  let VOP1 = 1;
-  let Size = 4;
-}
-
-class VOP2Common <dag outs, dag ins, string asm, list<dag> pattern> :
-    VOPAnyCommon <outs, ins, asm, pattern> {
-
-  let VOP2 = 1;
-  let Size = 4;
-}
-
-class VOP3Common <dag outs, dag ins, string asm = "",
-                  list<dag> pattern = [], bit HasMods = 0,
-                  bit VOP3Only = 0> :
-  VOPAnyCommon <outs, ins, asm, pattern> {
-
-  // Using complex patterns gives VOP3 patterns a very high complexity rating,
-  // but standalone patterns are almost always prefered, so we need to adjust the
-  // priority lower.  The goal is to use a high number to reduce complexity to
-  // zero (or less than zero).
-  let AddedComplexity = -1000;
-
-  let VOP3 = 1;
-  let VALU = 1;
-
-  let AsmMatchConverter =
-    !if(!eq(VOP3Only,1),
-        "cvtVOP3",
-        !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
-
-  let isCodeGenOnly = 0;
-
-  int Size = 8;
-
-  // Because SGPRs may be allowed if there are multiple operands, we
-  // need a post-isel hook to insert copies in order to avoid
-  // violating constant bus requirements.
-  let hasPostISelHook = 1;
-}
-
-} // End Uses = [EXEC]
-
-//===----------------------------------------------------------------------===//
-// Scalar operations
-//===----------------------------------------------------------------------===//
-
-class SOP1e <bits<8> op> : Enc32 {
-  bits<7> sdst;
-  bits<8> src0;
-
-  let Inst{7-0} = src0;
-  let Inst{15-8} = op;
-  let Inst{22-16} = sdst;
-  let Inst{31-23} = 0x17d; //encoding;
-}
-
-class SOP2e <bits<7> op> : Enc32 {
-  bits<7> sdst;
-  bits<8> src0;
-  bits<8> src1;
-
-  let Inst{7-0} = src0;
-  let Inst{15-8} = src1;
-  let Inst{22-16} = sdst;
-  let Inst{29-23} = op;
-  let Inst{31-30} = 0x2; // encoding
-}
-
-class SOPCe <bits<7> op> : Enc32 {
-  bits<8> src0;
-  bits<8> src1;
-
-  let Inst{7-0} = src0;
-  let Inst{15-8} = src1;
-  let Inst{22-16} = op;
-  let Inst{31-23} = 0x17e;
-}
-
-class SOPKe <bits<5> op> : Enc32 {
-  bits <7> sdst;
-  bits <16> simm16;
-
-  let Inst{15-0} = simm16;
-  let Inst{22-16} = sdst;
-  let Inst{27-23} = op;
-  let Inst{31-28} = 0xb; //encoding
-}
-
-class SOPK64e <bits<5> op> : Enc64 {
-  bits <7> sdst = 0;
-  bits <16> simm16;
-  bits <32> imm;
-
-  let Inst{15-0} = simm16;
-  let Inst{22-16} = sdst;
-  let Inst{27-23} = op;
-  let Inst{31-28} = 0xb;
-
-  let Inst{63-32} = imm;
-}
-
-class SOPPe <bits<7> op> : Enc32 {
-  bits <16> simm16;
-
-  let Inst{15-0} = simm16;
-  let Inst{22-16} = op;
-  let Inst{31-23} = 0x17f; // encoding
-}
-
-class SMRDe <bits<5> op, bits<1> imm> : Enc32 {
-  bits<7> sdst;
-  bits<7> sbase;
-
-  let Inst{8} = imm;
-  let Inst{14-9} = sbase{6-1};
-  let Inst{21-15} = sdst;
-  let Inst{26-22} = op;
-  let Inst{31-27} = 0x18; //encoding
-}
-
-class SMRD_IMMe <bits<5> op> : SMRDe<op, 1> {
-  bits<8> offset;
-  let Inst{7-0} = offset;
-}
-
-class SMRD_SOFFe <bits<5> op> : SMRDe<op, 0> {
-  bits<8> soff;
-  let Inst{7-0} = soff;
-}
-
-
-
-class SMRD_IMMe_ci <bits<5> op> : Enc64 {
-  bits<7> sdst;
-  bits<7> sbase;
-  bits<32> offset;
-
-  let Inst{7-0}   = 0xff;
-  let Inst{8}     = 0;
-  let Inst{14-9}  = sbase{6-1};
-  let Inst{21-15} = sdst;
-  let Inst{26-22} = op;
-  let Inst{31-27} = 0x18; //encoding
-  let Inst{63-32} = offset;
-}
-
-let SchedRW = [WriteSALU] in {
-class SOP1 <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI<outs, ins, asm, pattern> {
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let isCodeGenOnly = 0;
-  let SALU = 1;
-  let SOP1 = 1;
-}
-
-class SOP2 <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI <outs, ins, asm, pattern> {
-
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let isCodeGenOnly = 0;
-  let SALU = 1;
-  let SOP2 = 1;
-
-  let UseNamedOperandTable = 1;
-}
-
-class SOPC <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-  InstSI<outs, ins, asm, pattern>, SOPCe <op> {
-
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
+class SPseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
+  : PseudoInstSI<outs, ins, pattern> {
   let SALU = 1;
-  let SOPC = 1;
-  let isCodeGenOnly = 0;
-  let Defs = [SCC];
-
-  let UseNamedOperandTable = 1;
 }
 
-class SOPK <dag outs, dag ins, string asm, list<dag> pattern> :
-   InstSI <outs, ins , asm, pattern> {
-
-  let mayLoad = 0;
-  let mayStore = 0;
-  let hasSideEffects = 0;
-  let SALU = 1;
-  let SOPK = 1;
-
-  let UseNamedOperandTable = 1;
+class VPseudoInstSI<dag outs, dag ins, list<dag> pattern = []>
+  : PseudoInstSI<outs, ins, pattern> {
+  let VALU = 1;
+  let Uses = [EXEC];
 }
 
-class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
-		InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+class CFPseudoInstSI<dag outs, dag ins, list<dag> pattern = [],
+  bit UseExec = 0, bit DefExec = 0> :
+  SPseudoInstSI<outs, ins, pattern> {
 
+  let Uses = !if(UseExec, [EXEC], []);
+  let Defs = !if(DefExec, [EXEC, SCC], [SCC]);
   let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
-  let SALU = 1;
-  let SOPP = 1;
-
-  let UseNamedOperandTable = 1;
-}
-
-} // let SchedRW = [WriteSALU]
-
-class SMRD <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI<outs, ins, asm, pattern> {
-
-  let LGKM_CNT = 1;
-  let SMRD = 1;
-  let mayStore = 0;
-  let mayLoad = 1;
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let SchedRW = [WriteSMEM];
-}
-
-//===----------------------------------------------------------------------===//
-// Vector ALU operations
-//===----------------------------------------------------------------------===//
-
-class VOP1e <bits<8> op> : Enc32 {
-  bits<8> vdst;
-  bits<9> src0;
-
-  let Inst{8-0} = src0;
-  let Inst{16-9} = op;
-  let Inst{24-17} = vdst;
-  let Inst{31-25} = 0x3f; //encoding
-}
-
-class VOP2e <bits<6> op> : Enc32 {
-  bits<8> vdst;
-  bits<9> src0;
-  bits<8> src1;
-
-  let Inst{8-0} = src0;
-  let Inst{16-9} = src1;
-  let Inst{24-17} = vdst;
-  let Inst{30-25} = op;
-  let Inst{31} = 0x0; //encoding
-}
-
-class VOP2_MADKe <bits<6> op> : Enc64 {
-
-  bits<8>  vdst;
-  bits<9>  src0;
-  bits<8>  src1;
-  bits<32> imm;
-
-  let Inst{8-0} = src0;
-  let Inst{16-9} = src1;
-  let Inst{24-17} = vdst;
-  let Inst{30-25} = op;
-  let Inst{31} = 0x0; // encoding
-  let Inst{63-32} = imm;
-}
-
-class VOP3a <bits<9> op> : Enc64 {
-  bits<2> src0_modifiers;
-  bits<9> src0;
-  bits<2> src1_modifiers;
-  bits<9> src1;
-  bits<2> src2_modifiers;
-  bits<9> src2;
-  bits<1> clamp;
-  bits<2> omod;
-
-  let Inst{8} = src0_modifiers{1};
-  let Inst{9} = src1_modifiers{1};
-  let Inst{10} = src2_modifiers{1};
-  let Inst{11} = clamp;
-  let Inst{25-17} = op;
-  let Inst{31-26} = 0x34; //encoding
-  let Inst{40-32} = src0;
-  let Inst{49-41} = src1;
-  let Inst{58-50} = src2;
-  let Inst{60-59} = omod;
-  let Inst{61} = src0_modifiers{0};
-  let Inst{62} = src1_modifiers{0};
-  let Inst{63} = src2_modifiers{0};
-}
-
-class VOP3e <bits<9> op> : VOP3a <op> {
-  bits<8> vdst;
-
-  let Inst{7-0} = vdst;
 }
 
-// Encoding used for VOPC instructions encoded as VOP3
-// Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
-class VOP3ce <bits<9> op> : VOP3a <op> {
-  bits<8> sdst;
-
-  let Inst{7-0} = sdst;
+class Enc32 {
+  field bits<32> Inst;
+  int Size = 4;
 }
 
-class VOP3be <bits<9> op> : Enc64 {
-  bits<8> vdst;
-  bits<2> src0_modifiers;
-  bits<9> src0;
-  bits<2> src1_modifiers;
-  bits<9> src1;
-  bits<2> src2_modifiers;
-  bits<9> src2;
-  bits<7> sdst;
-  bits<2> omod;
-
-  let Inst{7-0} = vdst;
-  let Inst{14-8} = sdst;
-  let Inst{25-17} = op;
-  let Inst{31-26} = 0x34; //encoding
-  let Inst{40-32} = src0;
-  let Inst{49-41} = src1;
-  let Inst{58-50} = src2;
-  let Inst{60-59} = omod;
-  let Inst{61} = src0_modifiers{0};
-  let Inst{62} = src1_modifiers{0};
-  let Inst{63} = src2_modifiers{0};
+class Enc64 {
+  field bits<64> Inst;
+  int Size = 8;
 }
 
-class VOPCe <bits<8> op> : Enc32 {
-  bits<9> src0;
-  bits<8> src1;
-
-  let Inst{8-0} = src0;
-  let Inst{16-9} = src1;
-  let Inst{24-17} = op;
-  let Inst{31-25} = 0x3e;
-}
+class VOPDstOperand <RegisterClass rc> : RegisterOperand <rc, "printVOPDst">;
 
 class VINTRPe <bits<2> op> : Enc32 {
   bits<8> vdst;
@@ -488,88 +185,6 @@ class VINTRPe <bits<2> op> : Enc32 {
   let Inst{31-26} = 0x32; // encoding
 }
 
-class DSe <bits<8> op> : Enc64 {
-  bits<8> vdst;
-  bits<1> gds;
-  bits<8> addr;
-  bits<8> data0;
-  bits<8> data1;
-  bits<8> offset0;
-  bits<8> offset1;
-
-  let Inst{7-0} = offset0;
-  let Inst{15-8} = offset1;
-  let Inst{17} = gds;
-  let Inst{25-18} = op;
-  let Inst{31-26} = 0x36; //encoding
-  let Inst{39-32} = addr;
-  let Inst{47-40} = data0;
-  let Inst{55-48} = data1;
-  let Inst{63-56} = vdst;
-}
-
-class MUBUFe <bits<7> op> : Enc64 {
-  bits<12> offset;
-  bits<1> offen;
-  bits<1> idxen;
-  bits<1> glc;
-  bits<1> addr64;
-  bits<1> lds;
-  bits<8> vaddr;
-  bits<8> vdata;
-  bits<7> srsrc;
-  bits<1> slc;
-  bits<1> tfe;
-  bits<8> soffset;
-
-  let Inst{11-0} = offset;
-  let Inst{12} = offen;
-  let Inst{13} = idxen;
-  let Inst{14} = glc;
-  let Inst{15} = addr64;
-  let Inst{16} = lds;
-  let Inst{24-18} = op;
-  let Inst{31-26} = 0x38; //encoding
-  let Inst{39-32} = vaddr;
-  let Inst{47-40} = vdata;
-  let Inst{52-48} = srsrc{6-2};
-  let Inst{54} = slc;
-  let Inst{55} = tfe;
-  let Inst{63-56} = soffset;
-}
-
-class MTBUFe <bits<3> op> : Enc64 {
-  bits<8> vdata;
-  bits<12> offset;
-  bits<1> offen;
-  bits<1> idxen;
-  bits<1> glc;
-  bits<1> addr64;
-  bits<4> dfmt;
-  bits<3> nfmt;
-  bits<8> vaddr;
-  bits<7> srsrc;
-  bits<1> slc;
-  bits<1> tfe;
-  bits<8> soffset;
-
-  let Inst{11-0} = offset;
-  let Inst{12} = offen;
-  let Inst{13} = idxen;
-  let Inst{14} = glc;
-  let Inst{15} = addr64;
-  let Inst{18-16} = op;
-  let Inst{22-19} = dfmt;
-  let Inst{25-23} = nfmt;
-  let Inst{31-26} = 0x3a; //encoding
-  let Inst{39-32} = vaddr;
-  let Inst{47-40} = vdata;
-  let Inst{52-48} = srsrc{6-2};
-  let Inst{54} = slc;
-  let Inst{55} = tfe;
-  let Inst{63-56} = soffset;
-}
-
 class MIMGe <bits<7> op> : Enc64 {
   bits<8> vdata;
   bits<4> dmask;
@@ -600,26 +215,6 @@ class MIMGe <bits<7> op> : Enc64 {
   let Inst{57-53} = ssamp{6-2};
 }
 
-class FLATe<bits<7> op> : Enc64 {
-  bits<8> addr;
-  bits<8> data;
-  bits<8> vdst;
-  bits<1> slc;
-  bits<1> glc;
-  bits<1> tfe;
-
-  // 15-0 is reserved.
-  let Inst{16} = glc;
-  let Inst{17} = slc;
-  let Inst{24-18} = op;
-  let Inst{31-26} = 0x37; // Encoding.
-  let Inst{39-32} = addr;
-  let Inst{47-40} = data;
-  // 54-48 is reserved.
-  let Inst{55} = tfe;
-  let Inst{63-56} = vdst;
-}
-
 class EXPe : Enc64 {
   bits<4> en;
   bits<6> tgt;
@@ -645,92 +240,37 @@ class EXPe : Enc64 {
 
 let Uses = [EXEC] in {
 
-class VOP1 <bits<8> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    VOP1Common <outs, ins, asm, pattern>,
-    VOP1e<op> {
-  let isCodeGenOnly = 0;
-}
-
-class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    VOP2Common <outs, ins, asm, pattern>, VOP2e<op> {
-  let isCodeGenOnly = 0;
-}
-
-class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> :
-    VOPCCommon <ins, asm, pattern>, VOPCe <op>;
-
 class VINTRPCommon <dag outs, dag ins, string asm, list<dag> pattern> :
     InstSI <outs, ins, asm, pattern> {
-  let mayLoad = 1;
+  let VINTRP = 1;
+  // VINTRP instructions read parameter values from LDS, but these parameter
+  // values are stored outside of the LDS memory that is allocated to the
+  // shader for general purpose use.
+  //
+  // While it may be possible for ds_read/ds_write instructions to access
+  // the parameter values in LDS, this would essentially be an out-of-bounds
+  // memory access which we consider to be undefined behavior.
+  //
+  // So even though these instructions read memory, this memory is outside the
+  // addressable memory space for the shader, and we consider these instructions
+  // to be readnone.
+  let mayLoad = 0;
   let mayStore = 0;
   let hasSideEffects = 0;
 }
 
-} // End Uses = [EXEC]
-
-//===----------------------------------------------------------------------===//
-// Vector I/O operations
-//===----------------------------------------------------------------------===//
-
-class DS <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI <outs, ins, asm, pattern> {
-
-  let LGKM_CNT = 1;
-  let DS = 1;
-  let UseNamedOperandTable = 1;
-  let Uses = [M0, EXEC];
-
-  // Most instruction load and store data, so set this as the default.
-  let mayLoad = 1;
-  let mayStore = 1;
-
-  let hasSideEffects = 0;
-  let AsmMatchConverter = "cvtDS";
-  let SchedRW = [WriteLDS];
-}
-
-class MUBUF <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI<outs, ins, asm, pattern> {
-
-  let VM_CNT = 1;
+class EXPCommon<dag outs, dag ins, string asm, list<dag> pattern> :
+  InstSI<outs, ins, asm, pattern> {
+  let EXP = 1;
   let EXP_CNT = 1;
-  let MUBUF = 1;
-  let Uses = [EXEC];
-
-  let hasSideEffects = 0;
+  let mayLoad = 0; // Set to 1 if done bit is set.
+  let mayStore = 1;
   let UseNamedOperandTable = 1;
-  let AsmMatchConverter = "cvtMubuf";
-  let SchedRW = [WriteVMEM];
-}
-
-class MTBUF <dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI<outs, ins, asm, pattern> {
-
-  let VM_CNT = 1;
-  let EXP_CNT = 1;
-  let MTBUF = 1;
   let Uses = [EXEC];
-
-  let hasSideEffects = 0;
-  let UseNamedOperandTable = 1;
-  let SchedRW = [WriteVMEM];
+  let SchedRW = [WriteExport];
 }
 
-class FLAT <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-    InstSI<outs, ins, asm, pattern>, FLATe <op> {
-  let FLAT = 1;
-  // Internally, FLAT instruction are executed as both an LDS and a
-  // Buffer instruction; so, they increment both VM_CNT and LGKM_CNT
-  // and are not considered done until both have been decremented.
-  let VM_CNT = 1;
-  let LGKM_CNT = 1;
-
-  let Uses = [EXEC, FLAT_SCR]; // M0
-
-  let UseNamedOperandTable = 1;
-  let hasSideEffects = 0;
-  let SchedRW = [WriteVMEM];
-}
+} // End Uses = [EXEC]
 
 class MIMG <dag outs, dag ins, string asm, list<dag> pattern> :
     InstSI <outs, ins, asm, pattern> {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 919081902a9c..26a8d22062a9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -28,6 +28,13 @@
 
 using namespace llvm;
 
+// Must be at least 4 to be able to branch over minimum unconditional branch
+// code. This is only for making it possible to write reasonably small tests for
+// long branches.
+static cl::opt<unsigned>
+BranchOffsetBits("amdgpu-s-branch-bits", cl::ReallyHidden, cl::init(16),
+                 cl::desc("Restrict range of branch instructions (DEBUG)"));
+
 SIInstrInfo::SIInstrInfo(const SISubtarget &ST)
   : AMDGPUInstrInfo(ST), RI(), ST(ST) {}
 
@@ -258,7 +265,8 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
   }
 
   if (isMUBUF(LdSt) || isMTBUF(LdSt)) {
-    if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::soffset) != -1)
+    const MachineOperand *SOffset = getNamedOperand(LdSt, AMDGPU::OpName::soffset);
+    if (SOffset && SOffset->isReg())
       return false;
 
     const MachineOperand *AddrReg =
@@ -270,6 +278,10 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
         getNamedOperand(LdSt, AMDGPU::OpName::offset);
     BaseReg = AddrReg->getReg();
     Offset = OffsetImm->getImm();
+
+    if (SOffset) // soffset can be an inline immediate.
+      Offset += SOffset->getImm();
+
     return true;
   }
 
@@ -287,7 +299,7 @@ bool SIInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
   }
 
   if (isFLAT(LdSt)) {
-    const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::addr);
+    const MachineOperand *AddrReg = getNamedOperand(LdSt, AMDGPU::OpName::vaddr);
     BaseReg = AddrReg->getReg();
     Offset = 0;
     return true;
@@ -302,20 +314,16 @@ bool SIInstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
   const MachineOperand *FirstDst = nullptr;
   const MachineOperand *SecondDst = nullptr;
 
-  if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
-    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
-    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
-  }
-
-  if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
-    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
-    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
-  }
-
   if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
       (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt))) {
     FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
     SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
+  } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
+    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
+    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
+  } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
+    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
+    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
   }
 
   if (!FirstDst || !SecondDst)
@@ -342,62 +350,32 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, unsigned DestReg,
                               unsigned SrcReg, bool KillSrc) const {
+  const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
 
-  // If we are trying to copy to or from SCC, there is a bug somewhere else in
-  // the backend.  While it may be theoretically possible to do this, it should
-  // never be necessary.
-  assert(DestReg != AMDGPU::SCC && SrcReg != AMDGPU::SCC);
-
-  static const int16_t Sub0_15[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
-    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
-    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
-  };
-
-  static const int16_t Sub0_15_64[] = {
-    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
-    AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
-    AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
-    AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
-  };
-
-  static const int16_t Sub0_7[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
-  };
-
-  static const int16_t Sub0_7_64[] = {
-    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
-    AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
-  };
-
-  static const int16_t Sub0_3[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-  };
-
-  static const int16_t Sub0_3_64[] = {
-    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
-  };
-
-  static const int16_t Sub0_2[] = {
-    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
-  };
-
-  static const int16_t Sub0_1[] = {
-    AMDGPU::sub0, AMDGPU::sub1,
-  };
+  if (RC == &AMDGPU::VGPR_32RegClass) {
+    assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
+           AMDGPU::SReg_32RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
+      .addReg(SrcReg, getKillRegState(KillSrc));
+    return;
+  }
 
-  unsigned Opcode;
-  ArrayRef<int16_t> SubIndices;
+  if (RC == &AMDGPU::SReg_32_XM0RegClass ||
+      RC == &AMDGPU::SReg_32RegClass) {
+    if (SrcReg == AMDGPU::SCC) {
+      BuildMI(MBB, MI, DL, get(AMDGPU::S_CSELECT_B32), DestReg)
+          .addImm(-1)
+          .addImm(0);
+      return;
+    }
 
-  if (AMDGPU::SReg_32RegClass.contains(DestReg)) {
     assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), DestReg)
             .addReg(SrcReg, getKillRegState(KillSrc));
     return;
+  }
 
-  } else if (AMDGPU::SReg_64RegClass.contains(DestReg)) {
+  if (RC == &AMDGPU::SReg_64RegClass) {
     if (DestReg == AMDGPU::VCC) {
       if (AMDGPU::SReg_64RegClass.contains(SrcReg)) {
         BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), AMDGPU::VCC)
@@ -405,7 +383,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       } else {
         // FIXME: Hack until VReg_1 removed.
         assert(AMDGPU::VGPR_32RegClass.contains(SrcReg));
-        BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_I32_e32))
+        BuildMI(MBB, MI, DL, get(AMDGPU::V_CMP_NE_U32_e32))
           .addImm(0)
           .addReg(SrcReg, getKillRegState(KillSrc));
       }
@@ -417,62 +395,29 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B64), DestReg)
             .addReg(SrcReg, getKillRegState(KillSrc));
     return;
+  }
 
-  } else if (AMDGPU::SReg_128RegClass.contains(DestReg)) {
-    assert(AMDGPU::SReg_128RegClass.contains(SrcReg));
-    Opcode = AMDGPU::S_MOV_B64;
-    SubIndices = Sub0_3_64;
-
-  } else if (AMDGPU::SReg_256RegClass.contains(DestReg)) {
-    assert(AMDGPU::SReg_256RegClass.contains(SrcReg));
-    Opcode = AMDGPU::S_MOV_B64;
-    SubIndices = Sub0_7_64;
-
-  } else if (AMDGPU::SReg_512RegClass.contains(DestReg)) {
-    assert(AMDGPU::SReg_512RegClass.contains(SrcReg));
-    Opcode = AMDGPU::S_MOV_B64;
-    SubIndices = Sub0_15_64;
-
-  } else if (AMDGPU::VGPR_32RegClass.contains(DestReg)) {
-    assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_32RegClass.contains(SrcReg));
-    BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg)
-            .addReg(SrcReg, getKillRegState(KillSrc));
+  if (DestReg == AMDGPU::SCC) {
+    assert(AMDGPU::SReg_32RegClass.contains(SrcReg));
+    BuildMI(MBB, MI, DL, get(AMDGPU::S_CMP_LG_U32))
+      .addReg(SrcReg, getKillRegState(KillSrc))
+      .addImm(0);
     return;
+  }
 
-  } else if (AMDGPU::VReg_64RegClass.contains(DestReg)) {
-    assert(AMDGPU::VReg_64RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_64RegClass.contains(SrcReg));
-    Opcode = AMDGPU::V_MOV_B32_e32;
-    SubIndices = Sub0_1;
-
-  } else if (AMDGPU::VReg_96RegClass.contains(DestReg)) {
-    assert(AMDGPU::VReg_96RegClass.contains(SrcReg));
-    Opcode = AMDGPU::V_MOV_B32_e32;
-    SubIndices = Sub0_2;
-
-  } else if (AMDGPU::VReg_128RegClass.contains(DestReg)) {
-    assert(AMDGPU::VReg_128RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_128RegClass.contains(SrcReg));
-    Opcode = AMDGPU::V_MOV_B32_e32;
-    SubIndices = Sub0_3;
-
-  } else if (AMDGPU::VReg_256RegClass.contains(DestReg)) {
-    assert(AMDGPU::VReg_256RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_256RegClass.contains(SrcReg));
-    Opcode = AMDGPU::V_MOV_B32_e32;
-    SubIndices = Sub0_7;
-
-  } else if (AMDGPU::VReg_512RegClass.contains(DestReg)) {
-    assert(AMDGPU::VReg_512RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_512RegClass.contains(SrcReg));
-    Opcode = AMDGPU::V_MOV_B32_e32;
-    SubIndices = Sub0_15;
-
-  } else {
-    llvm_unreachable("Can't copy register!");
+  unsigned EltSize = 4;
+  unsigned Opcode = AMDGPU::V_MOV_B32_e32;
+  if (RI.isSGPRClass(RC)) {
+    if (RC->getSize() > 4) {
+      Opcode =  AMDGPU::S_MOV_B64;
+      EltSize = 8;
+    } else {
+      Opcode = AMDGPU::S_MOV_B32;
+      EltSize = 4;
+    }
   }
 
+  ArrayRef<int16_t> SubIndices = RI.getRegSplitParts(RC, EltSize);
   bool Forward = RI.getHWRegIndex(DestReg) <= RI.getHWRegIndex(SrcReg);
 
   for (unsigned Idx = 0; Idx < SubIndices.size(); ++Idx) {
@@ -497,9 +442,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
 }
 
-int SIInstrInfo::commuteOpcode(const MachineInstr &MI) const {
-  const unsigned Opcode = MI.getOpcode();
-
+int SIInstrInfo::commuteOpcode(unsigned Opcode) const {
   int NewOpc;
 
   // Try to map original to commuted opcode
@@ -573,11 +516,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                       const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
 
-  unsigned Size = FrameInfo->getObjectSize(FrameIndex);
-  unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
+  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
+  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
   MachinePointerInfo PtrInfo
     = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
   MachineMemOperand *MMO
@@ -587,20 +530,31 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   if (RI.isSGPRClass(RC)) {
     MFI->setHasSpilledSGPRs();
 
+    // We are only allowed to create one new instruction when spilling
+    // registers, so we need to use pseudo instruction for spilling SGPRs.
+    const MCInstrDesc &OpDesc = get(getSGPRSpillSaveOpcode(RC->getSize()));
+
+    // The SGPR spill/restore instructions only work on number sgprs, so we need
+    // to make sure we are using the correct register class.
     if (TargetRegisterInfo::isVirtualRegister(SrcReg) && RC->getSize() == 4) {
-      // m0 may not be allowed for readlane.
       MachineRegisterInfo &MRI = MF->getRegInfo();
       MRI.constrainRegClass(SrcReg, &AMDGPU::SReg_32_XM0RegClass);
     }
 
-    // We are only allowed to create one new instruction when spilling
-    // registers, so we need to use pseudo instruction for spilling
-    // SGPRs.
-    unsigned Opcode = getSGPRSpillSaveOpcode(RC->getSize());
-    BuildMI(MBB, MI, DL, get(Opcode))
-      .addReg(SrcReg, getKillRegState(isKill)) // src
-      .addFrameIndex(FrameIndex) // frame_idx
-      .addMemOperand(MMO);
+    MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc)
+      .addReg(SrcReg, getKillRegState(isKill)) // data
+      .addFrameIndex(FrameIndex)               // addr
+      .addMemOperand(MMO)
+      .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
+      .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
+    // Add the scratch resource registers as implicit uses because we may end up
+    // needing them, and need to ensure that the reserved registers are
+    // correctly handled.
+
+    if (ST.hasScalarStores()) {
+      // m0 is used for offset to scalar stores if used to spill.
+      Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+    }
 
     return;
   }
@@ -620,11 +574,11 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   unsigned Opcode = getVGPRSpillSaveOpcode(RC->getSize());
   MFI->setHasSpilledVGPRs();
   BuildMI(MBB, MI, DL, get(Opcode))
-    .addReg(SrcReg, getKillRegState(isKill)) // src
-    .addFrameIndex(FrameIndex)        // frame_idx
-    .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
-    .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
-    .addImm(0)                              // offset
+    .addReg(SrcReg, getKillRegState(isKill)) // data
+    .addFrameIndex(FrameIndex)               // addr
+    .addReg(MFI->getScratchRSrcReg())        // scratch_rsrc
+    .addReg(MFI->getScratchWaveOffsetReg())  // scratch_offset
+    .addImm(0)                               // offset
     .addMemOperand(MMO);
 }
 
@@ -671,10 +625,10 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
                                        const TargetRegisterInfo *TRI) const {
   MachineFunction *MF = MBB.getParent();
   const SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
-  unsigned Align = FrameInfo->getObjectAlignment(FrameIndex);
-  unsigned Size = FrameInfo->getObjectSize(FrameIndex);
+  unsigned Align = FrameInfo.getObjectAlignment(FrameIndex);
+  unsigned Size = FrameInfo.getObjectSize(FrameIndex);
 
   MachinePointerInfo PtrInfo
     = MachinePointerInfo::getFixedStack(*MF, FrameIndex);
@@ -685,17 +639,22 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   if (RI.isSGPRClass(RC)) {
     // FIXME: Maybe this should not include a memoperand because it will be
     // lowered to non-memory instructions.
-    unsigned Opcode = getSGPRSpillRestoreOpcode(RC->getSize());
-
+    const MCInstrDesc &OpDesc = get(getSGPRSpillRestoreOpcode(RC->getSize()));
     if (TargetRegisterInfo::isVirtualRegister(DestReg) && RC->getSize() == 4) {
-      // m0 may not be allowed for readlane.
       MachineRegisterInfo &MRI = MF->getRegInfo();
       MRI.constrainRegClass(DestReg, &AMDGPU::SReg_32_XM0RegClass);
     }
 
-    BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-      .addFrameIndex(FrameIndex) // frame_idx
-      .addMemOperand(MMO);
+    MachineInstrBuilder Spill = BuildMI(MBB, MI, DL, OpDesc, DestReg)
+      .addFrameIndex(FrameIndex) // addr
+      .addMemOperand(MMO)
+      .addReg(MFI->getScratchRSrcReg(), RegState::Implicit)
+      .addReg(MFI->getScratchWaveOffsetReg(), RegState::Implicit);
+
+    if (ST.hasScalarStores()) {
+      // m0 is used for offset to scalar stores if used to spill.
+      Spill.addReg(AMDGPU::M0, RegState::ImplicitDefine);
+    }
 
     return;
   }
@@ -713,7 +672,7 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 
   unsigned Opcode = getVGPRSpillRestoreOpcode(RC->getSize());
   BuildMI(MBB, MI, DL, get(Opcode), DestReg)
-    .addFrameIndex(FrameIndex)        // frame_idx
+    .addFrameIndex(FrameIndex)              // vaddr
     .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
     .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
     .addImm(0)                              // offset
@@ -729,7 +688,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
   const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
-  unsigned WorkGroupSize = MFI->getMaximumWorkGroupSize(*MF);
+  unsigned WorkGroupSize = MFI->getMaxFlatWorkGroupSize();
   unsigned WavefrontSize = ST.getWavefrontSize();
 
   unsigned TIDReg = MFI->getTIDReg();
@@ -808,7 +767,7 @@ unsigned SIInstrInfo::calculateLDSSpillAddress(
   }
 
   // Add FrameIndex to LDS offset
-  unsigned LDSOffset = MFI->LDSSize + (FrameOffset * WorkGroupSize);
+  unsigned LDSOffset = MFI->getLDSSize() + (FrameOffset * WorkGroupSize);
   BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e32), TmpReg)
           .addImm(LDSOffset)
           .addReg(TIDReg);
@@ -851,7 +810,24 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   DebugLoc DL = MBB.findDebugLoc(MI);
   switch (MI.getOpcode()) {
   default: return AMDGPUInstrInfo::expandPostRAPseudo(MI);
-
+  case AMDGPU::S_MOV_B64_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_MOV_B64));
+    break;
+  }
+  case AMDGPU::S_XOR_B64_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_XOR_B64));
+    break;
+  }
+  case AMDGPU::S_ANDN2_B64_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(get(AMDGPU::S_ANDN2_B64));
+    break;
+  }
   case AMDGPU::V_MOV_B64_PSEUDO: {
     unsigned Dst = MI.getOperand(0).getReg();
     unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
@@ -880,36 +856,37 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     MI.eraseFromParent();
     break;
   }
+  case AMDGPU::V_MOVRELD_B32_V1:
+  case AMDGPU::V_MOVRELD_B32_V2:
+  case AMDGPU::V_MOVRELD_B32_V4:
+  case AMDGPU::V_MOVRELD_B32_V8:
+  case AMDGPU::V_MOVRELD_B32_V16: {
+    const MCInstrDesc &MovRelDesc = get(AMDGPU::V_MOVRELD_B32_e32);
+    unsigned VecReg = MI.getOperand(0).getReg();
+    bool IsUndef = MI.getOperand(1).isUndef();
+    unsigned SubReg = AMDGPU::sub0 + MI.getOperand(3).getImm();
+    assert(VecReg == MI.getOperand(1).getReg());
+
+    MachineInstr *MovRel =
+        BuildMI(MBB, MI, DL, MovRelDesc)
+            .addReg(RI.getSubReg(VecReg, SubReg), RegState::Undef)
+            .addOperand(MI.getOperand(2))
+            .addReg(VecReg, RegState::ImplicitDefine)
+            .addReg(VecReg, RegState::Implicit | (IsUndef ? RegState::Undef : 0));
+
+    const int ImpDefIdx =
+        MovRelDesc.getNumOperands() + MovRelDesc.getNumImplicitUses();
+    const int ImpUseIdx = ImpDefIdx + 1;
+    MovRel->tieOperands(ImpDefIdx, ImpUseIdx);
 
-  case AMDGPU::V_CNDMASK_B64_PSEUDO: {
-    unsigned Dst = MI.getOperand(0).getReg();
-    unsigned DstLo = RI.getSubReg(Dst, AMDGPU::sub0);
-    unsigned DstHi = RI.getSubReg(Dst, AMDGPU::sub1);
-    unsigned Src0 = MI.getOperand(1).getReg();
-    unsigned Src1 = MI.getOperand(2).getReg();
-    const MachineOperand &SrcCond = MI.getOperand(3);
-
-    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstLo)
-      .addReg(RI.getSubReg(Src0, AMDGPU::sub0))
-      .addReg(RI.getSubReg(Src1, AMDGPU::sub0))
-      .addReg(SrcCond.getReg())
-      .addReg(Dst, RegState::Implicit | RegState::Define);
-    BuildMI(MBB, MI, DL, get(AMDGPU::V_CNDMASK_B32_e64), DstHi)
-      .addReg(RI.getSubReg(Src0, AMDGPU::sub1))
-      .addReg(RI.getSubReg(Src1, AMDGPU::sub1))
-      .addReg(SrcCond.getReg(), getKillRegState(SrcCond.isKill()))
-      .addReg(Dst, RegState::Implicit | RegState::Define);
     MI.eraseFromParent();
     break;
   }
-
   case AMDGPU::SI_PC_ADD_REL_OFFSET: {
-    const SIRegisterInfo *TRI
-      = static_cast<const SIRegisterInfo *>(ST.getRegisterInfo());
     MachineFunction &MF = *MBB.getParent();
     unsigned Reg = MI.getOperand(0).getReg();
-    unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0);
-    unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1);
+    unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0);
+    unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1);
 
     // Create a bundle so these instructions won't be re-ordered by the
     // post-RA scheduler.
@@ -921,10 +898,15 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo)
                        .addReg(RegLo)
                        .addOperand(MI.getOperand(1)));
-    Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
-                           .addReg(RegHi)
-                           .addImm(0));
 
+    MachineInstrBuilder MIB = BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi)
+                                  .addReg(RegHi);
+    if (MI.getOperand(2).getTargetFlags() == SIInstrInfo::MO_NONE)
+      MIB.addImm(0);
+    else
+      MIB.addOperand(MI.getOperand(2));
+
+    Bundler.append(MIB);
     llvm::finalizeBundle(MBB, Bundler.begin());
 
     MI.eraseFromParent();
@@ -934,91 +916,96 @@ bool SIInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   return true;
 }
 
-/// Commutes the operands in the given instruction.
-/// The commutable operands are specified by their indices OpIdx0 and OpIdx1.
-///
-/// Do not call this method for a non-commutable instruction or for
-/// non-commutable pair of operand indices OpIdx0 and OpIdx1.
-/// Even though the instruction is commutable, the method may still
-/// fail to commute the operands, null pointer is returned in such cases.
-MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
-                                                  unsigned OpIdx0,
-                                                  unsigned OpIdx1) const {
-  int CommutedOpcode = commuteOpcode(MI);
-  if (CommutedOpcode == -1)
-    return nullptr;
+bool SIInstrInfo::swapSourceModifiers(MachineInstr &MI,
+                                      MachineOperand &Src0,
+                                      unsigned Src0OpName,
+                                      MachineOperand &Src1,
+                                      unsigned Src1OpName) const {
+  MachineOperand *Src0Mods = getNamedOperand(MI, Src0OpName);
+  if (!Src0Mods)
+    return false;
 
-  int Src0Idx =
-      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
-  MachineOperand &Src0 = MI.getOperand(Src0Idx);
-  if (!Src0.isReg())
+  MachineOperand *Src1Mods = getNamedOperand(MI, Src1OpName);
+  assert(Src1Mods &&
+         "All commutable instructions have both src0 and src1 modifiers");
+
+  int Src0ModsVal = Src0Mods->getImm();
+  int Src1ModsVal = Src1Mods->getImm();
+
+  Src1Mods->setImm(Src0ModsVal);
+  Src0Mods->setImm(Src1ModsVal);
+  return true;
+}
+
+static MachineInstr *swapRegAndNonRegOperand(MachineInstr &MI,
+                                             MachineOperand &RegOp,
+                                             MachineOperand &NonRegOp) {
+  unsigned Reg = RegOp.getReg();
+  unsigned SubReg = RegOp.getSubReg();
+  bool IsKill = RegOp.isKill();
+  bool IsDead = RegOp.isDead();
+  bool IsUndef = RegOp.isUndef();
+  bool IsDebug = RegOp.isDebug();
+
+  if (NonRegOp.isImm())
+    RegOp.ChangeToImmediate(NonRegOp.getImm());
+  else if (NonRegOp.isFI())
+    RegOp.ChangeToFrameIndex(NonRegOp.getIndex());
+  else
     return nullptr;
 
-  int Src1Idx =
-      AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src1);
+  NonRegOp.ChangeToRegister(Reg, false, false, IsKill, IsDead, IsUndef, IsDebug);
+  NonRegOp.setSubReg(SubReg);
 
-  if ((OpIdx0 != static_cast<unsigned>(Src0Idx) ||
-       OpIdx1 != static_cast<unsigned>(Src1Idx)) &&
-      (OpIdx0 != static_cast<unsigned>(Src1Idx) ||
-       OpIdx1 != static_cast<unsigned>(Src0Idx)))
+  return &MI;
+}
+
+MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+                                                  unsigned Src0Idx,
+                                                  unsigned Src1Idx) const {
+  assert(!NewMI && "this should never be used");
+
+  unsigned Opc = MI.getOpcode();
+  int CommutedOpcode = commuteOpcode(Opc);
+  if (CommutedOpcode == -1)
     return nullptr;
 
-  MachineOperand &Src1 = MI.getOperand(Src1Idx);
+  assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0) ==
+           static_cast<int>(Src0Idx) &&
+         AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1) ==
+           static_cast<int>(Src1Idx) &&
+         "inconsistency with findCommutedOpIndices");
 
-  if (isVOP2(MI) || isVOPC(MI)) {
-    const MCInstrDesc &InstrDesc = MI.getDesc();
-    // For VOP2 and VOPC instructions, any operand type is valid to use for
-    // src0.  Make sure we can use the src0 as src1.
-    //
-    // We could be stricter here and only allow commuting if there is a reason
-    // to do so. i.e. if both operands are VGPRs there is no real benefit,
-    // although MachineCSE attempts to find matches by commuting.
-    const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
-    if (!isLegalRegOperand(MRI, InstrDesc.OpInfo[Src1Idx], Src0))
-      return nullptr;
-  }
+  MachineOperand &Src0 = MI.getOperand(Src0Idx);
+  MachineOperand &Src1 = MI.getOperand(Src1Idx);
 
-  MachineInstr *CommutedMI = &MI;
-  if (!Src1.isReg()) {
-    // Allow commuting instructions with Imm operands.
-    if (NewMI || !Src1.isImm() || (!isVOP2(MI) && !isVOP3(MI))) {
-      return nullptr;
+  MachineInstr *CommutedMI = nullptr;
+  if (Src0.isReg() && Src1.isReg()) {
+    if (isOperandLegal(MI, Src1Idx, &Src0)) {
+      // Be sure to copy the source modifiers to the right place.
+      CommutedMI
+        = TargetInstrInfo::commuteInstructionImpl(MI, NewMI, Src0Idx, Src1Idx);
     }
-    // Be sure to copy the source modifiers to the right place.
-    if (MachineOperand *Src0Mods =
-            getNamedOperand(MI, AMDGPU::OpName::src0_modifiers)) {
-      MachineOperand *Src1Mods =
-          getNamedOperand(MI, AMDGPU::OpName::src1_modifiers);
-
-      int Src0ModsVal = Src0Mods->getImm();
-      if (!Src1Mods && Src0ModsVal != 0)
-        return nullptr;
-
-      // XXX - This assert might be a lie. It might be useful to have a neg
-      // modifier with 0.0.
-      int Src1ModsVal = Src1Mods->getImm();
-      assert((Src1ModsVal == 0) && "Not expecting modifiers with immediates");
-
-      Src1Mods->setImm(Src0ModsVal);
-      Src0Mods->setImm(Src1ModsVal);
-    }
-
-    unsigned Reg = Src0.getReg();
-    unsigned SubReg = Src0.getSubReg();
-    if (Src1.isImm())
-      Src0.ChangeToImmediate(Src1.getImm());
-    else
-      llvm_unreachable("Should only have immediates");
 
-    Src1.ChangeToRegister(Reg, false);
-    Src1.setSubReg(SubReg);
+  } else if (Src0.isReg() && !Src1.isReg()) {
+    // src0 should always be able to support any operand type, so no need to
+    // check operand legality.
+    CommutedMI = swapRegAndNonRegOperand(MI, Src0, Src1);
+  } else if (!Src0.isReg() && Src1.isReg()) {
+    if (isOperandLegal(MI, Src1Idx, &Src0))
+      CommutedMI = swapRegAndNonRegOperand(MI, Src1, Src0);
   } else {
-    CommutedMI =
-        TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx0, OpIdx1);
+    // FIXME: Found two non registers to commute. This does happen.
+    return nullptr;
   }
 
-  if (CommutedMI)
+
+  if (CommutedMI) {
+    swapSourceModifiers(MI, Src0, AMDGPU::OpName::src0_modifiers,
+                        Src1, AMDGPU::OpName::src1_modifiers);
+
     CommutedMI->setDesc(get(CommutedOpcode));
+  }
 
   return CommutedMI;
 }
@@ -1028,8 +1015,7 @@ MachineInstr *SIInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
 // TargetInstrInfo::commuteInstruction uses it.
 bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
                                         unsigned &SrcOpIdx1) const {
-  const MCInstrDesc &MCID = MI.getDesc();
-  if (!MCID.isCommutable())
+  if (!MI.isCommutable())
     return false;
 
   unsigned Opc = MI.getOpcode();
@@ -1037,34 +1023,135 @@ bool SIInstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx0,
   if (Src0Idx == -1)
     return false;
 
-  // FIXME: Workaround TargetInstrInfo::commuteInstruction asserting on
-  // immediate. Also, immediate src0 operand is not handled in
-  // SIInstrInfo::commuteInstruction();
-  if (!MI.getOperand(Src0Idx).isReg())
-    return false;
-
   int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
   if (Src1Idx == -1)
     return false;
 
-  MachineOperand &Src1 = MI.getOperand(Src1Idx);
-  if (Src1.isImm()) {
-    // SIInstrInfo::commuteInstruction() does support commuting the immediate
-    // operand src1 in 2 and 3 operand instructions.
-    if (!isVOP2(MI.getOpcode()) && !isVOP3(MI.getOpcode()))
-      return false;
-  } else if (Src1.isReg()) {
-    // If any source modifiers are set, the generic instruction commuting won't
-    // understand how to copy the source modifiers.
-    if (hasModifiersSet(MI, AMDGPU::OpName::src0_modifiers) ||
-        hasModifiersSet(MI, AMDGPU::OpName::src1_modifiers))
-      return false;
-  } else
-    return false;
-
   return fixCommutedOpIndices(SrcOpIdx0, SrcOpIdx1, Src0Idx, Src1Idx);
 }
 
+bool SIInstrInfo::isBranchOffsetInRange(unsigned BranchOp,
+                                        int64_t BrOffset) const {
+  // BranchRelaxation should never have to check s_setpc_b64 because its dest
+  // block is unanalyzable.
+  assert(BranchOp != AMDGPU::S_SETPC_B64);
+
+  // Convert to dwords.
+  BrOffset /= 4;
+
+  // The branch instructions do PC += signext(SIMM16 * 4) + 4, so the offset is
+  // from the next instruction.
+  BrOffset -= 1;
+
+  return isIntN(BranchOffsetBits, BrOffset);
+}
+
+MachineBasicBlock *SIInstrInfo::getBranchDestBlock(
+  const MachineInstr &MI) const {
+  if (MI.getOpcode() == AMDGPU::S_SETPC_B64) {
+    // This would be a difficult analysis to perform, but can always be legal so
+    // there's no need to analyze it.
+    return nullptr;
+  }
+
+  return MI.getOperand(0).getMBB();
+}
+
+unsigned SIInstrInfo::insertIndirectBranch(MachineBasicBlock &MBB,
+                                           MachineBasicBlock &DestBB,
+                                           const DebugLoc &DL,
+                                           int64_t BrOffset,
+                                           RegScavenger *RS) const {
+  assert(RS && "RegScavenger required for long branching");
+  assert(MBB.empty() &&
+         "new block should be inserted for expanding unconditional branch");
+  assert(MBB.pred_size() == 1);
+
+  MachineFunction *MF = MBB.getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+
+  // FIXME: Virtual register workaround for RegScavenger not working with empty
+  // blocks.
+  unsigned PCReg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+  auto I = MBB.end();
+
+  // We need to compute the offset relative to the instruction immediately after
+  // s_getpc_b64. Insert pc arithmetic code before last terminator.
+  MachineInstr *GetPC = BuildMI(MBB, I, DL, get(AMDGPU::S_GETPC_B64), PCReg);
+
+  // TODO: Handle > 32-bit block address.
+  if (BrOffset >= 0) {
+    BuildMI(MBB, I, DL, get(AMDGPU::S_ADD_U32))
+      .addReg(PCReg, RegState::Define, AMDGPU::sub0)
+      .addReg(PCReg, 0, AMDGPU::sub0)
+      .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_FORWARD);
+    BuildMI(MBB, I, DL, get(AMDGPU::S_ADDC_U32))
+      .addReg(PCReg, RegState::Define, AMDGPU::sub1)
+      .addReg(PCReg, 0, AMDGPU::sub1)
+      .addImm(0);
+  } else {
+    // Backwards branch.
+    BuildMI(MBB, I, DL, get(AMDGPU::S_SUB_U32))
+      .addReg(PCReg, RegState::Define, AMDGPU::sub0)
+      .addReg(PCReg, 0, AMDGPU::sub0)
+      .addMBB(&DestBB, AMDGPU::TF_LONG_BRANCH_BACKWARD);
+    BuildMI(MBB, I, DL, get(AMDGPU::S_SUBB_U32))
+      .addReg(PCReg, RegState::Define, AMDGPU::sub1)
+      .addReg(PCReg, 0, AMDGPU::sub1)
+      .addImm(0);
+  }
+
+  // Insert the indirect branch after the other terminator.
+  BuildMI(&MBB, DL, get(AMDGPU::S_SETPC_B64))
+    .addReg(PCReg);
+
+  // FIXME: If spilling is necessary, this will fail because this scavenger has
+  // no emergency stack slots. It is non-trivial to spill in this situation,
+  // because the restore code needs to be specially placed after the
+  // jump. BranchRelaxation then needs to be made aware of the newly inserted
+  // block.
+  //
+  // If a spill is needed for the pc register pair, we need to insert a spill
+  // restore block right before the destination block, and insert a short branch
+  // into the old destination block's fallthrough predecessor.
+  // e.g.:
+  //
+  // s_cbranch_scc0 skip_long_branch:
+  //
+  // long_branch_bb:
+  //   spill s[8:9]
+  //   s_getpc_b64 s[8:9]
+  //   s_add_u32 s8, s8, restore_bb
+  //   s_addc_u32 s9, s9, 0
+  //   s_setpc_b64 s[8:9]
+  //
+  // skip_long_branch:
+  //   foo;
+  //
+  // .....
+  //
+  // dest_bb_fallthrough_predecessor:
+  // bar;
+  // s_branch dest_bb
+  //
+  // restore_bb:
+  //  restore s[8:9]
+  //  fallthrough dest_bb
+  ///
+  // dest_bb:
+  //   buzz;
+
+  RS->enterBasicBlockEnd(MBB);
+  unsigned Scav = RS->scavengeRegister(&AMDGPU::SReg_64RegClass,
+                                       MachineBasicBlock::iterator(GetPC), 0);
+  MRI.replaceRegWith(PCReg, Scav);
+  MRI.clearVirtRegs();
+  RS->setRegUsed(Scav);
+
+  return 4 + 8 + 4 + 4;
+}
+
 unsigned SIInstrInfo::getBranchOpcode(SIInstrInfo::BranchPredicate Cond) {
   switch (Cond) {
   case SIInstrInfo::SCC_TRUE:
@@ -1103,15 +1190,12 @@ SIInstrInfo::BranchPredicate SIInstrInfo::getBranchPredicate(unsigned Opcode) {
   }
 }
 
-bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
-                                MachineBasicBlock *&FBB,
-                                SmallVectorImpl<MachineOperand> &Cond,
-                                bool AllowModify) const {
-  MachineBasicBlock::iterator I = MBB.getFirstTerminator();
-
-  if (I == MBB.end())
-    return false;
-
+bool SIInstrInfo::analyzeBranchImpl(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    MachineBasicBlock *&TBB,
+                                    MachineBasicBlock *&FBB,
+                                    SmallVectorImpl<MachineOperand> &Cond,
+                                    bool AllowModify) const {
   if (I->getOpcode() == AMDGPU::S_BRANCH) {
     // Unconditional Branch
     TBB = I->getOperand(0).getMBB();
@@ -1124,6 +1208,7 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
 
   MachineBasicBlock *CondBB = I->getOperand(0).getMBB();
   Cond.push_back(MachineOperand::CreateImm(Pred));
+  Cond.push_back(I->getOperand(1)); // Save the branch register.
 
   ++I;
 
@@ -1142,29 +1227,81 @@ bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
   return true;
 }
 
-unsigned SIInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+bool SIInstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
+                                MachineBasicBlock *&FBB,
+                                SmallVectorImpl<MachineOperand> &Cond,
+                                bool AllowModify) const {
+  MachineBasicBlock::iterator I = MBB.getFirstTerminator();
+  if (I == MBB.end())
+    return false;
+
+  if (I->getOpcode() != AMDGPU::SI_MASK_BRANCH)
+    return analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify);
+
+  ++I;
+
+  // TODO: Should be able to treat as fallthrough?
+  if (I == MBB.end())
+    return true;
+
+  if (analyzeBranchImpl(MBB, I, TBB, FBB, Cond, AllowModify))
+    return true;
+
+  MachineBasicBlock *MaskBrDest = I->getOperand(0).getMBB();
+
+  // Specifically handle the case where the conditional branch is to the same
+  // destination as the mask branch. e.g.
+  //
+  // si_mask_branch BB8
+  // s_cbranch_execz BB8
+  // s_cbranch BB9
+  //
+  // This is required to understand divergent loops which may need the branches
+  // to be relaxed.
+  if (TBB != MaskBrDest || Cond.empty())
+    return true;
+
+  auto Pred = Cond[0].getImm();
+  return (Pred != EXECZ && Pred != EXECNZ);
+}
+
+unsigned SIInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                   int *BytesRemoved) const {
   MachineBasicBlock::iterator I = MBB.getFirstTerminator();
 
   unsigned Count = 0;
+  unsigned RemovedSize = 0;
   while (I != MBB.end()) {
     MachineBasicBlock::iterator Next = std::next(I);
+    if (I->getOpcode() == AMDGPU::SI_MASK_BRANCH) {
+      I = Next;
+      continue;
+    }
+
+    RemovedSize += getInstSizeInBytes(*I);
     I->eraseFromParent();
     ++Count;
     I = Next;
   }
 
+  if (BytesRemoved)
+    *BytesRemoved = RemovedSize;
+
   return Count;
 }
 
-unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned SIInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *TBB,
                                    MachineBasicBlock *FBB,
                                    ArrayRef<MachineOperand> Cond,
-                                   const DebugLoc &DL) const {
+                                   const DebugLoc &DL,
+                                   int *BytesAdded) const {
 
   if (!FBB && Cond.empty()) {
     BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
       .addMBB(TBB);
+    if (BytesAdded)
+      *BytesAdded = 4;
     return 1;
   }
 
@@ -1174,24 +1311,42 @@ unsigned SIInstrInfo::InsertBranch(MachineBasicBlock &MBB,
     = getBranchOpcode(static_cast<BranchPredicate>(Cond[0].getImm()));
 
   if (!FBB) {
-    BuildMI(&MBB, DL, get(Opcode))
+    Cond[1].isUndef();
+    MachineInstr *CondBr =
+      BuildMI(&MBB, DL, get(Opcode))
       .addMBB(TBB);
+
+    // Copy the flags onto the implicit condition register operand.
+    MachineOperand &CondReg = CondBr->getOperand(1);
+    CondReg.setIsUndef(Cond[1].isUndef());
+    CondReg.setIsKill(Cond[1].isKill());
+
+    if (BytesAdded)
+      *BytesAdded = 4;
     return 1;
   }
 
   assert(TBB && FBB);
 
-  BuildMI(&MBB, DL, get(Opcode))
+  MachineInstr *CondBr =
+    BuildMI(&MBB, DL, get(Opcode))
     .addMBB(TBB);
   BuildMI(&MBB, DL, get(AMDGPU::S_BRANCH))
     .addMBB(FBB);
 
+  MachineOperand &CondReg = CondBr->getOperand(1);
+  CondReg.setIsUndef(Cond[1].isUndef());
+  CondReg.setIsKill(Cond[1].isKill());
+
+  if (BytesAdded)
+      *BytesAdded = 8;
+
   return 2;
 }
 
-bool SIInstrInfo::ReverseBranchCondition(
+bool SIInstrInfo::reverseBranchCondition(
   SmallVectorImpl<MachineOperand> &Cond) const {
-  assert(Cond.size() == 1);
+  assert(Cond.size() == 2);
   Cond[0].setImm(-Cond[0].getImm());
   return false;
 }
@@ -1210,15 +1365,43 @@ static void removeModOperands(MachineInstr &MI) {
   MI.RemoveOperand(Src0ModIdx);
 }
 
-// TODO: Maybe this should be removed this and custom fold everything in
-// SIFoldOperands?
 bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
                                 unsigned Reg, MachineRegisterInfo *MRI) const {
   if (!MRI->hasOneNonDBGUse(Reg))
     return false;
 
   unsigned Opc = UseMI.getOpcode();
-  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64) {
+  if (Opc == AMDGPU::COPY) {
+    bool isVGPRCopy = RI.isVGPR(*MRI, UseMI.getOperand(0).getReg());
+    switch (DefMI.getOpcode()) {
+    default:
+      return false;
+    case AMDGPU::S_MOV_B64:
+      // TODO: We could fold 64-bit immediates, but this get compilicated
+      // when there are sub-registers.
+      return false;
+
+    case AMDGPU::V_MOV_B32_e32:
+    case AMDGPU::S_MOV_B32:
+      break;
+    }
+    unsigned NewOpc = isVGPRCopy ? AMDGPU::V_MOV_B32_e32 : AMDGPU::S_MOV_B32;
+    const MachineOperand *ImmOp = getNamedOperand(DefMI, AMDGPU::OpName::src0);
+    assert(ImmOp);
+    // FIXME: We could handle FrameIndex values here.
+    if (!ImmOp->isImm()) {
+      return false;
+    }
+    UseMI.setDesc(get(NewOpc));
+    UseMI.getOperand(1).ChangeToImmediate(ImmOp->getImm());
+    UseMI.addImplicitDefUseOperands(*UseMI.getParent()->getParent());
+    return true;
+  }
+
+  if (Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64 ||
+      Opc == AMDGPU::V_MAD_F16 || Opc == AMDGPU::V_MAC_F16_e64) {
+    bool IsF32 = Opc == AMDGPU::V_MAD_F32 || Opc == AMDGPU::V_MAC_F32_e64;
+
     // Don't fold if we are using source modifiers. The new VOP2 instructions
     // don't have them.
     if (hasModifiersSet(UseMI, AMDGPU::OpName::src0_modifiers) ||
@@ -1232,14 +1415,16 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
     // If this is a free constant, there's no reason to do this.
     // TODO: We could fold this here instead of letting SIFoldOperands do it
     // later.
-    if (isInlineConstant(ImmOp, 4))
+    MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
+
+    // Any src operand can be used for the legality check.
+    if (isInlineConstant(UseMI, *Src0, ImmOp))
       return false;
 
-    MachineOperand *Src0 = getNamedOperand(UseMI, AMDGPU::OpName::src0);
     MachineOperand *Src1 = getNamedOperand(UseMI, AMDGPU::OpName::src1);
     MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
 
-    // Multiplied part is the constant: Use v_madmk_f32
+    // Multiplied part is the constant: Use v_madmk_{f16, f32}.
     // We should only expect these to be on src0 due to canonicalizations.
     if (Src0->isReg() && Src0->getReg() == Reg) {
       if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
@@ -1267,15 +1452,15 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       Src0->setSubReg(Src1SubReg);
       Src0->setIsKill(Src1->isKill());
 
-      if (Opc == AMDGPU::V_MAC_F32_e64) {
+      if (Opc == AMDGPU::V_MAC_F32_e64 ||
+          Opc == AMDGPU::V_MAC_F16_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
-      }
 
       Src1->ChangeToImmediate(Imm);
 
       removeModOperands(UseMI);
-      UseMI.setDesc(get(AMDGPU::V_MADMK_F32));
+      UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16));
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -1284,7 +1469,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       return true;
     }
 
-    // Added part is the constant: Use v_madak_f32
+    // Added part is the constant: Use v_madak_{f16, f32}.
     if (Src2->isReg() && Src2->getReg() == Reg) {
       // Not allowed to use constant bus for another operand.
       // We can however allow an inline immediate as src0.
@@ -1306,17 +1491,17 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       UseMI.RemoveOperand(
           AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp));
 
-      if (Opc == AMDGPU::V_MAC_F32_e64) {
+      if (Opc == AMDGPU::V_MAC_F32_e64 ||
+          Opc == AMDGPU::V_MAC_F16_e64)
         UseMI.untieRegOperand(
             AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2));
-      }
 
       // ChangingToImmediate adds Src2 back to the instruction.
       Src2->ChangeToImmediate(Imm);
 
       // These come before src2.
       removeModOperands(UseMI);
-      UseMI.setDesc(get(AMDGPU::V_MADAK_F32));
+      UseMI.setDesc(get(IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16));
 
       bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
       if (DeleteDef)
@@ -1375,6 +1560,17 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
   if (MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
     return false;
 
+  if (AA && MIa.hasOneMemOperand() && MIb.hasOneMemOperand()) {
+    const MachineMemOperand *MMOa = *MIa.memoperands_begin();
+    const MachineMemOperand *MMOb = *MIb.memoperands_begin();
+    if (MMOa->getValue() && MMOb->getValue()) {
+      MemoryLocation LocA(MMOa->getValue(), MMOa->getSize(), MMOa->getAAInfo());
+      MemoryLocation LocB(MMOb->getValue(), MMOb->getSize(), MMOb->getAAInfo());
+      if (!AA->alias(LocA, LocB))
+        return true;
+    }
+  }
+
   // TODO: Should we check the address space from the MachineMemOperand? That
   // would allow us to distinguish objects we know don't alias based on the
   // underlying address space, even if it was lowered to a different one,
@@ -1414,15 +1610,22 @@ bool SIInstrInfo::areMemAccessesTriviallyDisjoint(MachineInstr &MIa,
 MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
                                                  MachineInstr &MI,
                                                  LiveVariables *LV) const {
+  bool IsF16 = false;
 
   switch (MI.getOpcode()) {
   default:
     return nullptr;
+  case AMDGPU::V_MAC_F16_e64:
+    IsF16 = true;
   case AMDGPU::V_MAC_F32_e64:
     break;
+  case AMDGPU::V_MAC_F16_e32:
+    IsF16 = true;
   case AMDGPU::V_MAC_F32_e32: {
-    const MachineOperand *Src0 = getNamedOperand(MI, AMDGPU::OpName::src0);
-    if (Src0->isImm() && !isInlineConstant(*Src0, 4))
+    int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
+                                             AMDGPU::OpName::src0);
+    const MachineOperand *Src0 = &MI.getOperand(Src0Idx);
+    if (Src0->isImm() && !isInlineConstant(MI, Src0Idx, *Src0))
       return nullptr;
     break;
   }
@@ -1433,7 +1636,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
   const MachineOperand *Src1 = getNamedOperand(MI, AMDGPU::OpName::src1);
   const MachineOperand *Src2 = getNamedOperand(MI, AMDGPU::OpName::src2);
 
-  return BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::V_MAD_F32))
+  return BuildMI(*MBB, MI, MI.getDebugLoc(),
+                 get(IsF16 ? AMDGPU::V_MAD_F16 : AMDGPU::V_MAD_F32))
       .addOperand(*Dst)
       .addImm(0) // Src0 mods
       .addOperand(*Src0)
@@ -1445,6 +1649,20 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineFunction::iterator &MBB,
       .addImm(0); // omod
 }
 
+// It's not generally safe to move VALU instructions across these since it will
+// start using the register as a base index rather than directly.
+// XXX - Why isn't hasSideEffects sufficient for these?
+static bool changesVGPRIndexingMode(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_SET_GPR_IDX_ON:
+  case AMDGPU::S_SET_GPR_IDX_MODE:
+  case AMDGPU::S_SET_GPR_IDX_OFF:
+    return true;
+  default:
+    return false;
+  }
+}
+
 bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
                                        const MachineBasicBlock *MBB,
                                        const MachineFunction &MF) const {
@@ -1454,67 +1672,78 @@ bool SIInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
   // when they operate on VGPRs. Treating EXEC modifications as scheduling
   // boundaries prevents incorrect movements of such instructions.
   return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF) ||
-         MI.modifiesRegister(AMDGPU::EXEC, &RI);
+         MI.modifiesRegister(AMDGPU::EXEC, &RI) ||
+         MI.getOpcode() == AMDGPU::S_SETREG_IMM32_B32 ||
+         MI.getOpcode() == AMDGPU::S_SETREG_B32 ||
+         changesVGPRIndexingMode(MI);
 }
 
 bool SIInstrInfo::isInlineConstant(const APInt &Imm) const {
-  int64_t SVal = Imm.getSExtValue();
-  if (SVal >= -16 && SVal <= 64)
-    return true;
-
-  if (Imm.getBitWidth() == 64) {
-    uint64_t Val = Imm.getZExtValue();
-    return (DoubleToBits(0.0) == Val) ||
-           (DoubleToBits(1.0) == Val) ||
-           (DoubleToBits(-1.0) == Val) ||
-           (DoubleToBits(0.5) == Val) ||
-           (DoubleToBits(-0.5) == Val) ||
-           (DoubleToBits(2.0) == Val) ||
-           (DoubleToBits(-2.0) == Val) ||
-           (DoubleToBits(4.0) == Val) ||
-           (DoubleToBits(-4.0) == Val);
-  }
-
-  // The actual type of the operand does not seem to matter as long
-  // as the bits match one of the inline immediate values.  For example:
-  //
-  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
-  // so it is a legal inline immediate.
-  //
-  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
-  // floating-point, so it is a legal inline immediate.
-  uint32_t Val = Imm.getZExtValue();
-
-  return (FloatToBits(0.0f) == Val) ||
-         (FloatToBits(1.0f) == Val) ||
-         (FloatToBits(-1.0f) == Val) ||
-         (FloatToBits(0.5f) == Val) ||
-         (FloatToBits(-0.5f) == Val) ||
-         (FloatToBits(2.0f) == Val) ||
-         (FloatToBits(-2.0f) == Val) ||
-         (FloatToBits(4.0f) == Val) ||
-         (FloatToBits(-4.0f) == Val);
+  switch (Imm.getBitWidth()) {
+  case 32:
+    return AMDGPU::isInlinableLiteral32(Imm.getSExtValue(),
+                                        ST.hasInv2PiInlineImm());
+  case 64:
+    return AMDGPU::isInlinableLiteral64(Imm.getSExtValue(),
+                                        ST.hasInv2PiInlineImm());
+  case 16:
+    return AMDGPU::isInlinableLiteral16(Imm.getSExtValue(),
+                                        ST.hasInv2PiInlineImm());
+  default:
+    llvm_unreachable("invalid bitwidth");
+  }
 }
 
 bool SIInstrInfo::isInlineConstant(const MachineOperand &MO,
-                                   unsigned OpSize) const {
-  if (MO.isImm()) {
-    // MachineOperand provides no way to tell the true operand size, since it
-    // only records a 64-bit value. We need to know the size to determine if a
-    // 32-bit floating point immediate bit pattern is legal for an integer
-    // immediate. It would be for any 32-bit integer operand, but would not be
-    // for a 64-bit one.
+                                   uint8_t OperandType) const {
+  if (!MO.isImm() || OperandType < MCOI::OPERAND_FIRST_TARGET)
+    return false;
 
-    unsigned BitSize = 8 * OpSize;
-    return isInlineConstant(APInt(BitSize, MO.getImm(), true));
-  }
+  // MachineOperand provides no way to tell the true operand size, since it only
+  // records a 64-bit value. We need to know the size to determine if a 32-bit
+  // floating point immediate bit pattern is legal for an integer immediate. It
+  // would be for any 32-bit integer operand, but would not be for a 64-bit one.
+
+  int64_t Imm = MO.getImm();
+  switch (operandBitWidth(OperandType)) {
+  case 32: {
+    int32_t Trunc = static_cast<int32_t>(Imm);
+    return Trunc == Imm &&
+           AMDGPU::isInlinableLiteral32(Trunc, ST.hasInv2PiInlineImm());
+  }
+  case 64: {
+    return AMDGPU::isInlinableLiteral64(MO.getImm(),
+                                        ST.hasInv2PiInlineImm());
+  }
+  case 16: {
+    if (isInt<16>(Imm) || isUInt<16>(Imm)) {
+      int16_t Trunc = static_cast<int16_t>(Imm);
+      return AMDGPU::isInlinableLiteral16(Trunc, ST.hasInv2PiInlineImm());
+    }
 
-  return false;
+    return false;
+  }
+  default:
+    llvm_unreachable("invalid bitwidth");
+  }
 }
 
-bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO,
-                                    unsigned OpSize) const {
-  return MO.isImm() && !isInlineConstant(MO, OpSize);
+bool SIInstrInfo::isLiteralConstantLike(const MachineOperand &MO,
+                                        const MCOperandInfo &OpInfo) const {
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    return false;
+  case MachineOperand::MO_Immediate:
+    return !isInlineConstant(MO, OpInfo);
+  case MachineOperand::MO_FrameIndex:
+  case MachineOperand::MO_MachineBasicBlock:
+  case MachineOperand::MO_ExternalSymbol:
+  case MachineOperand::MO_GlobalAddress:
+  case MachineOperand::MO_MCSymbol:
+    return true;
+  default:
+    llvm_unreachable("unexpected operand type");
+  }
 }
 
 static bool compareMachineOp(const MachineOperand &Op0,
@@ -1544,11 +1773,10 @@ bool SIInstrInfo::isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
   if (OpInfo.RegClass < 0)
     return false;
 
-  unsigned OpSize = RI.getRegClass(OpInfo.RegClass)->getSize();
-  if (isLiteralConstant(MO, OpSize))
-    return RI.opCanUseLiteralConstant(OpInfo.OperandType);
+  if (MO.isImm() && isInlineConstant(MO, OpInfo))
+    return RI.opCanUseInlineConstant(OpInfo.OperandType);
 
-  return RI.opCanUseInlineConstant(OpInfo.OperandType);
+  return RI.opCanUseLiteralConstant(OpInfo.OperandType);
 }
 
 bool SIInstrInfo::hasVALU32BitEncoding(unsigned Opcode) const {
@@ -1575,12 +1803,17 @@ bool SIInstrInfo::hasModifiersSet(const MachineInstr &MI,
 
 bool SIInstrInfo::usesConstantBus(const MachineRegisterInfo &MRI,
                                   const MachineOperand &MO,
-                                  unsigned OpSize) const {
+                                  const MCOperandInfo &OpInfo) const {
   // Literal constants use the constant bus.
-  if (isLiteralConstant(MO, OpSize))
-    return true;
+  //if (isLiteralConstantLike(MO, OpInfo))
+  // return true;
+  if (MO.isImm())
+    return !isInlineConstant(MO, OpInfo);
+
+  if (!MO.isReg())
+    return true; // Misc other operands like FrameIndex
 
-  if (!MO.isReg() || !MO.isUse())
+  if (!MO.isUse())
     return false;
 
   if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
@@ -1644,6 +1877,16 @@ static bool shouldReadExec(const MachineInstr &MI) {
   return true;
 }
 
+static bool isSubRegOf(const SIRegisterInfo &TRI,
+                       const MachineOperand &SuperVec,
+                       const MachineOperand &SubReg) {
+  if (TargetRegisterInfo::isPhysicalRegister(SubReg.getReg()))
+    return TRI.isSubRegister(SuperVec.getReg(), SubReg.getReg());
+
+  return SubReg.getSubReg() != AMDGPU::NoSubRegister &&
+         SubReg.getReg() == SuperVec.getReg();
+}
+
 bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
                                     StringRef &ErrInfo) const {
   uint16_t Opcode = MI.getOpcode();
@@ -1660,6 +1903,28 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     return false;
   }
 
+  if (MI.isInlineAsm()) {
+    // Verify register classes for inlineasm constraints.
+    for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands();
+         I != E; ++I) {
+      const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI);
+      if (!RC)
+        continue;
+
+      const MachineOperand &Op = MI.getOperand(I);
+      if (!Op.isReg())
+        continue;
+
+      unsigned Reg = Op.getReg();
+      if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) {
+        ErrInfo = "inlineasm operand has incorrect register class.";
+        return false;
+      }
+    }
+
+    return true;
+  }
+
   // Make sure the register classes are correct.
   for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) {
     if (MI.getOperand(i).isFPImm()) {
@@ -1677,15 +1942,22 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
         return false;
       }
       break;
-    case AMDGPU::OPERAND_REG_IMM32:
+    case AMDGPU::OPERAND_REG_IMM_INT32:
+    case AMDGPU::OPERAND_REG_IMM_FP32:
       break;
-    case AMDGPU::OPERAND_REG_INLINE_C:
-      if (isLiteralConstant(MI.getOperand(i),
-                            RI.getRegClass(RegClass)->getSize())) {
+    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP16: {
+      const MachineOperand &MO = MI.getOperand(i);
+      if (!MO.isReg() && (!MO.isImm() || !isInlineConstant(MI, i))) {
         ErrInfo = "Illegal immediate value for operand.";
         return false;
       }
       break;
+    }
     case MCOI::OPERAND_IMMEDIATE:
     case AMDGPU::OPERAND_KIMM32:
       // Check if this operand is an immediate.
@@ -1695,7 +1967,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
         ErrInfo = "Expected immediate, but got non-immediate";
         return false;
       }
-      // Fall-through
+      LLVM_FALLTHROUGH;
     default:
       continue;
     }
@@ -1737,7 +2009,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
       if (OpIdx == -1)
         break;
       const MachineOperand &MO = MI.getOperand(OpIdx);
-      if (usesConstantBus(MRI, MO, getOpSize(Opcode, OpIdx))) {
+      if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
         if (MO.isReg()) {
           if (MO.getReg() != SGPRUsed)
             ++ConstantBusCount;
@@ -1768,6 +2040,65 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  if (isSOPK(MI)) {
+    int64_t Imm = getNamedOperand(MI, AMDGPU::OpName::simm16)->getImm();
+    if (sopkIsZext(MI)) {
+      if (!isUInt<16>(Imm)) {
+        ErrInfo = "invalid immediate for SOPK instruction";
+        return false;
+      }
+    } else {
+      if (!isInt<16>(Imm)) {
+        ErrInfo = "invalid immediate for SOPK instruction";
+        return false;
+      }
+    }
+  }
+
+  if (Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e32 ||
+      Desc.getOpcode() == AMDGPU::V_MOVRELS_B32_e64 ||
+      Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
+      Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64) {
+    const bool IsDst = Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e32 ||
+                       Desc.getOpcode() == AMDGPU::V_MOVRELD_B32_e64;
+
+    const unsigned StaticNumOps = Desc.getNumOperands() +
+      Desc.getNumImplicitUses();
+    const unsigned NumImplicitOps = IsDst ? 2 : 1;
+
+    // Allow additional implicit operands. This allows a fixup done by the post
+    // RA scheduler where the main implicit operand is killed and implicit-defs
+    // are added for sub-registers that remain live after this instruction.
+    if (MI.getNumOperands() < StaticNumOps + NumImplicitOps) {
+      ErrInfo = "missing implicit register operands";
+      return false;
+    }
+
+    const MachineOperand *Dst = getNamedOperand(MI, AMDGPU::OpName::vdst);
+    if (IsDst) {
+      if (!Dst->isUse()) {
+        ErrInfo = "v_movreld_b32 vdst should be a use operand";
+        return false;
+      }
+
+      unsigned UseOpIdx;
+      if (!MI.isRegTiedToUseOperand(StaticNumOps, &UseOpIdx) ||
+          UseOpIdx != StaticNumOps + 1) {
+        ErrInfo = "movrel implicit operands should be tied";
+        return false;
+      }
+    }
+
+    const MachineOperand &Src0 = MI.getOperand(Src0Idx);
+    const MachineOperand &ImpUse
+      = MI.getOperand(StaticNumOps + NumImplicitOps - 1);
+    if (!ImpUse.isReg() || !ImpUse.isUse() ||
+        !isSubRegOf(RI, ImpUse, IsDst ? *Dst : Src0)) {
+      ErrInfo = "src0 should be subreg of implicit vector use";
+      return false;
+    }
+  }
+
   // Make sure we aren't losing exec uses in the td files. This mostly requires
   // being careful when using let Uses to try to add other use registers.
   if (shouldReadExec(MI)) {
@@ -1777,6 +2108,18 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
     }
   }
 
+  if (isSMRD(MI)) {
+    if (MI.mayStore()) {
+      // The register offset form of scalar stores may only use m0 as the
+      // soffset register.
+      const MachineOperand *Soff = getNamedOperand(MI, AMDGPU::OpName::soff);
+      if (Soff && Soff->getReg() != AMDGPU::M0) {
+        ErrInfo = "scalar stores must use m0 as offset register";
+        return false;
+      }
+    }
+  }
+
   return true;
 }
 
@@ -1797,13 +2140,13 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_SUB_U32: return AMDGPU::V_SUB_I32_e32;
   case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
   case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_I32;
-  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e32;
-  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e32;
-  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e32;
-  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e32;
-  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e32;
-  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e32;
-  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e32;
+  case AMDGPU::S_AND_B32: return AMDGPU::V_AND_B32_e64;
+  case AMDGPU::S_OR_B32: return AMDGPU::V_OR_B32_e64;
+  case AMDGPU::S_XOR_B32: return AMDGPU::V_XOR_B32_e64;
+  case AMDGPU::S_MIN_I32: return AMDGPU::V_MIN_I32_e64;
+  case AMDGPU::S_MIN_U32: return AMDGPU::V_MIN_U32_e64;
+  case AMDGPU::S_MAX_I32: return AMDGPU::V_MAX_I32_e64;
+  case AMDGPU::S_MAX_U32: return AMDGPU::V_MAX_U32_e64;
   case AMDGPU::S_ASHR_I32: return AMDGPU::V_ASHR_I32_e32;
   case AMDGPU::S_ASHR_I64: return AMDGPU::V_ASHR_I64;
   case AMDGPU::S_LSHL_B32: return AMDGPU::V_LSHL_B32_e32;
@@ -1830,6 +2173,8 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) {
   case AMDGPU::S_CMP_GE_U32: return AMDGPU::V_CMP_GE_U32_e32;
   case AMDGPU::S_CMP_LT_U32: return AMDGPU::V_CMP_LT_U32_e32;
   case AMDGPU::S_CMP_LE_U32: return AMDGPU::V_CMP_LE_U32_e32;
+  case AMDGPU::S_CMP_EQ_U64: return AMDGPU::V_CMP_EQ_U64_e32;
+  case AMDGPU::S_CMP_LG_U64: return AMDGPU::V_CMP_NE_U64_e32;
   case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e64;
   case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32;
   case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32;
@@ -1937,11 +2282,10 @@ MachineOperand SIInstrInfo::buildExtractSubRegOrImm(
   unsigned SubIdx,
   const TargetRegisterClass *SubRC) const {
   if (Op.isImm()) {
-    // XXX - Is there a better way to do this?
     if (SubIdx == AMDGPU::sub0)
-      return MachineOperand::CreateImm(Op.getImm() & 0xFFFFFFFF);
+      return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm()));
     if (SubIdx == AMDGPU::sub1)
-      return MachineOperand::CreateImm(Op.getImm() >> 32);
+      return MachineOperand::CreateImm(static_cast<int32_t>(Op.getImm() >> 32));
 
     llvm_unreachable("Unhandled register index for immediate");
   }
@@ -1978,8 +2322,8 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
   // In order to be legal, the common sub-class must be equal to the
   // class of the current operand.  For example:
   //
-  // v_mov_b32 s0 ; Operand defined as vsrc_32
-  //              ; RI.getCommonSubClass(s0,vsrc_32) = sgpr ; LEGAL
+  // v_mov_b32 s0 ; Operand defined as vsrc_b32
+  //              ; RI.getCommonSubClass(s0,vsrc_b32) = sgpr ; LEGAL
   //
   // s_sendmsg 0, s0 ; Operand defined as m0reg
   //                 ; RI.getCommonSubClass(s0,m0reg) = m0reg ; NOT LEGAL
@@ -2008,7 +2352,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
   if (!MO)
     MO = &MI.getOperand(OpIdx);
 
-  if (isVALU(MI) && usesConstantBus(MRI, *MO, DefinedRC->getSize())) {
+  if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
 
     RegSubRegPair SGPRUsed;
     if (MO->isReg())
@@ -2020,7 +2364,7 @@ bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
       const MachineOperand &Op = MI.getOperand(i);
       if (Op.isReg()) {
         if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
-            usesConstantBus(MRI, Op, getOpSize(MI, i))) {
+            usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
           return false;
         }
       } else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
@@ -2202,6 +2546,39 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
   }
 }
 
+void SIInstrInfo::legalizeGenericOperand(MachineBasicBlock &InsertMBB,
+                                         MachineBasicBlock::iterator I,
+                                         const TargetRegisterClass *DstRC,
+                                         MachineOperand &Op,
+                                         MachineRegisterInfo &MRI,
+                                         const DebugLoc &DL) const {
+
+  unsigned OpReg = Op.getReg();
+  unsigned OpSubReg = Op.getSubReg();
+
+  const TargetRegisterClass *OpRC = RI.getSubClassWithSubReg(
+      RI.getRegClassForReg(MRI, OpReg), OpSubReg);
+
+  // Check if operand is already the correct register class.
+  if (DstRC == OpRC)
+    return;
+
+  unsigned DstReg = MRI.createVirtualRegister(DstRC);
+  MachineInstr *Copy = BuildMI(InsertMBB, I, DL, get(AMDGPU::COPY), DstReg)
+                               .addOperand(Op);
+
+  Op.setReg(DstReg);
+  Op.setSubReg(0);
+
+  MachineInstr *Def = MRI.getVRegDef(OpReg);
+  if (!Def)
+    return;
+
+  // Try to eliminate the copy if it is copying an immediate value.
+  if (Def->isMoveImmediate())
+    FoldImmediate(*Copy, *Def, OpReg, &MRI);
+}
+
 void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
   MachineFunction &MF = *MI.getParent()->getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
@@ -2260,15 +2637,14 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
       MachineOperand &Op = MI.getOperand(I);
       if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg()))
         continue;
-      unsigned DstReg = MRI.createVirtualRegister(RC);
 
       // MI is a PHI instruction.
       MachineBasicBlock *InsertBB = MI.getOperand(I + 1).getMBB();
       MachineBasicBlock::iterator Insert = InsertBB->getFirstTerminator();
 
-      BuildMI(*InsertBB, Insert, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg)
-          .addOperand(Op);
-      Op.setReg(DstReg);
+      // Avoid creating no-op copies with the same src and dst reg class.  These
+      // confuse some of the machine passes.
+      legalizeGenericOperand(*InsertBB, Insert, RC, Op, MRI, MI.getDebugLoc());
     }
   }
 
@@ -2292,12 +2668,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
         if (VRC == OpRC)
           continue;
 
-        unsigned DstReg = MRI.createVirtualRegister(VRC);
-
-        BuildMI(*MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), DstReg)
-            .addOperand(Op);
-
-        Op.setReg(DstReg);
+        legalizeGenericOperand(*MBB, MI, VRC, Op, MRI, MI.getDebugLoc());
         Op.setIsKill();
       }
     }
@@ -2313,11 +2684,9 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI) const {
     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
     const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0);
     if (DstRC != Src0RC) {
-      MachineBasicBlock &MBB = *MI.getParent();
-      unsigned NewSrc0 = MRI.createVirtualRegister(DstRC);
-      BuildMI(MBB, MI, MI.getDebugLoc(), get(AMDGPU::COPY), NewSrc0)
-          .addReg(Src0);
-      MI.getOperand(1).setReg(NewSrc0);
+      MachineBasicBlock *MBB = MI.getParent();
+      MachineOperand &Op = MI.getOperand(1);
+      legalizeGenericOperand(*MBB, MI, DstRC, Op, MRI, MI.getDebugLoc());
     }
     return;
   }
@@ -2664,6 +3033,22 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
         continue;
 
       unsigned DstReg = Inst.getOperand(0).getReg();
+      if (Inst.isCopy() &&
+          TargetRegisterInfo::isVirtualRegister(Inst.getOperand(1).getReg()) &&
+          NewDstRC == RI.getRegClassForReg(MRI, Inst.getOperand(1).getReg())) {
+        // Instead of creating a copy where src and dst are the same register
+        // class, we just replace all uses of dst with src.  These kinds of
+        // copies interfere with the heuristics MachineSink uses to decide
+        // whether or not to split a critical edge.  Since the pass assumes
+        // that copies will end up as machine instructions and not be
+        // eliminated.
+        addUsersToMoveToVALUWorklist(DstReg, MRI, Worklist);
+        MRI.replaceRegWith(DstReg, Inst.getOperand(1).getReg());
+        MRI.clearKillFlags(Inst.getOperand(1).getReg());
+        Inst.getOperand(0).setReg(DstReg);
+        continue;
+      }
+
       NewDstReg = MRI.createVirtualRegister(NewDstRC);
       MRI.replaceRegWith(DstReg, NewDstReg);
     }
@@ -2927,10 +3312,16 @@ void SIInstrInfo::addUsersToMoveToVALUWorklist(
   MachineRegisterInfo &MRI,
   SmallVectorImpl<MachineInstr *> &Worklist) const {
   for (MachineRegisterInfo::use_iterator I = MRI.use_begin(DstReg),
-         E = MRI.use_end(); I != E; ++I) {
+         E = MRI.use_end(); I != E;) {
     MachineInstr &UseMI = *I->getParent();
     if (!canReadVGPR(UseMI, I.getOperandNo())) {
       Worklist.push_back(&UseMI);
+
+      do {
+        ++I;
+      } while (I != E && I->getParent() == &UseMI);
+    } else {
+      ++I;
     }
   }
 }
@@ -3098,6 +3489,56 @@ bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr &MI) const {
   return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
 }
 
+unsigned SIInstrInfo::isStackAccess(const MachineInstr &MI,
+                                    int &FrameIndex) const {
+  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::vaddr);
+  if (!Addr || !Addr->isFI())
+    return AMDGPU::NoRegister;
+
+  assert(!MI.memoperands_empty() &&
+         (*MI.memoperands_begin())->getAddrSpace() == AMDGPUAS::PRIVATE_ADDRESS);
+
+  FrameIndex = Addr->getIndex();
+  return getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
+}
+
+unsigned SIInstrInfo::isSGPRStackAccess(const MachineInstr &MI,
+                                        int &FrameIndex) const {
+  const MachineOperand *Addr = getNamedOperand(MI, AMDGPU::OpName::addr);
+  assert(Addr && Addr->isFI());
+  FrameIndex = Addr->getIndex();
+  return getNamedOperand(MI, AMDGPU::OpName::data)->getReg();
+}
+
+unsigned SIInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
+                                          int &FrameIndex) const {
+
+  if (!MI.mayLoad())
+    return AMDGPU::NoRegister;
+
+  if (isMUBUF(MI) || isVGPRSpill(MI))
+    return isStackAccess(MI, FrameIndex);
+
+  if (isSGPRSpill(MI))
+    return isSGPRStackAccess(MI, FrameIndex);
+
+  return AMDGPU::NoRegister;
+}
+
+unsigned SIInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
+                                         int &FrameIndex) const {
+  if (!MI.mayStore())
+    return AMDGPU::NoRegister;
+
+  if (isMUBUF(MI) || isVGPRSpill(MI))
+    return isStackAccess(MI, FrameIndex);
+
+  if (isSGPRSpill(MI))
+    return isSGPRStackAccess(MI, FrameIndex);
+
+  return AMDGPU::NoRegister;
+}
+
 unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   unsigned Opc = MI.getOpcode();
   const MCInstrDesc &Desc = getMCOpcodeFromPseudo(Opc);
@@ -3105,32 +3546,45 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
 
   // If we have a definitive size, we can use it. Otherwise we need to inspect
   // the operands to know the size.
-  if (DescSize == 8 || DescSize == 4)
+  //
+  // FIXME: Instructions that have a base 32-bit encoding report their size as
+  // 4, even though they are really 8 bytes if they have a literal operand.
+  if (DescSize != 0 && DescSize != 4)
     return DescSize;
 
-  assert(DescSize == 0);
+  if (Opc == AMDGPU::WAVE_BARRIER)
+    return 0;
 
   // 4-byte instructions may have a 32-bit literal encoded after them. Check
   // operands that coud ever be literals.
   if (isVALU(MI) || isSALU(MI)) {
+    if (isFixedSize(MI)) {
+      assert(DescSize == 4);
+      return DescSize;
+    }
+
     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
     if (Src0Idx == -1)
       return 4; // No operands.
 
-    if (isLiteralConstant(MI.getOperand(Src0Idx), getOpSize(MI, Src0Idx)))
+    if (isLiteralConstantLike(MI.getOperand(Src0Idx), Desc.OpInfo[Src0Idx]))
       return 8;
 
     int Src1Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src1);
     if (Src1Idx == -1)
       return 4;
 
-    if (isLiteralConstant(MI.getOperand(Src1Idx), getOpSize(MI, Src1Idx)))
+    if (isLiteralConstantLike(MI.getOperand(Src1Idx), Desc.OpInfo[Src1Idx]))
       return 8;
 
     return 4;
   }
 
+  if (DescSize == 4)
+    return 4;
+
   switch (Opc) {
+  case AMDGPU::SI_MASK_BRANCH:
   case TargetOpcode::IMPLICIT_DEF:
   case TargetOpcode::KILL:
   case TargetOpcode::DBG_VALUE:
@@ -3147,6 +3601,20 @@ unsigned SIInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   }
 }
 
+bool SIInstrInfo::mayAccessFlatAddressSpace(const MachineInstr &MI) const {
+  if (!isFLAT(MI))
+    return false;
+
+  if (MI.memoperands_empty())
+    return true;
+
+  for (const MachineMemOperand *MMO : MI.memoperands()) {
+    if (MMO->getAddrSpace() == AMDGPUAS::FLAT_ADDRESS)
+      return true;
+  }
+  return false;
+}
+
 ArrayRef<std::pair<int, const char *>>
 SIInstrInfo::getSerializableTargetIndices() const {
   static const std::pair<int, const char *> TargetIndices[] = {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index fef8904a6c87..e68f6f92ba96 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -86,6 +86,10 @@ private:
   unsigned findUsedSGPR(const MachineInstr &MI, int OpIndices[3]) const;
 
 protected:
+  bool swapSourceModifiers(MachineInstr &MI,
+                           MachineOperand &Src0, unsigned Src0OpName,
+                           MachineOperand &Src1, unsigned Src1OpName) const;
+
   MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
                                        unsigned OpIdx0,
                                        unsigned OpIdx1) const override;
@@ -94,7 +98,18 @@ public:
 
   enum TargetOperandFlags {
     MO_NONE = 0,
-    MO_GOTPCREL = 1
+    // MO_GOTPCREL -> symbol@GOTPCREL -> R_AMDGPU_GOTPCREL.
+    MO_GOTPCREL = 1,
+    // MO_GOTPCREL32_LO -> symbol@gotpcrel32@lo -> R_AMDGPU_GOTPCREL32_LO.
+    MO_GOTPCREL32 = 2,
+    MO_GOTPCREL32_LO = 2,
+    // MO_GOTPCREL32_HI -> symbol@gotpcrel32@hi -> R_AMDGPU_GOTPCREL32_HI.
+    MO_GOTPCREL32_HI = 3,
+    // MO_REL32_LO -> symbol@rel32@lo -> R_AMDGPU_REL32_LO.
+    MO_REL32 = 4,
+    MO_REL32_LO = 4,
+    // MO_REL32_HI -> symbol@rel32@hi -> R_AMDGPU_REL32_HI.
+    MO_REL32_HI = 5
   };
 
   explicit SIInstrInfo(const SISubtarget &);
@@ -144,23 +159,48 @@ public:
   unsigned getMovOpcode(const TargetRegisterClass *DstRC) const;
 
   LLVM_READONLY
-  int commuteOpcode(const MachineInstr &MI) const;
+  int commuteOpcode(unsigned Opc) const;
+
+  LLVM_READONLY
+  inline int commuteOpcode(const MachineInstr &MI) const {
+    return commuteOpcode(MI.getOpcode());
+  }
 
   bool findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
                              unsigned &SrcOpIdx2) const override;
 
+  bool isBranchOffsetInRange(unsigned BranchOpc,
+                             int64_t BrOffset) const override;
+
+  MachineBasicBlock *getBranchDestBlock(const MachineInstr &MI) const override;
+
+  unsigned insertIndirectBranch(MachineBasicBlock &MBB,
+                                MachineBasicBlock &NewDestBB,
+                                const DebugLoc &DL,
+                                int64_t BrOffset,
+                                RegScavenger *RS = nullptr) const override;
+
+  bool analyzeBranchImpl(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator I,
+                         MachineBasicBlock *&TBB,
+                         MachineBasicBlock *&FBB,
+                         SmallVectorImpl<MachineOperand> &Cond,
+                         bool AllowModify) const;
+
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
-  bool ReverseBranchCondition(
+  bool reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const override;
 
   bool
@@ -332,6 +372,14 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::FLAT;
   }
 
+  static bool isEXP(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::EXP;
+  }
+
+  bool isEXP(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::EXP;
+  }
+
   static bool isWQM(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::WQM;
   }
@@ -356,6 +404,14 @@ public:
     return get(Opcode).TSFlags & SIInstrFlags::VGPRSpill;
   }
 
+  static bool isSGPRSpill(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SGPRSpill;
+  }
+
+  bool isSGPRSpill(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
+  }
+
   static bool isDPP(const MachineInstr &MI) {
     return MI.getDesc().TSFlags & SIInstrFlags::DPP;
   }
@@ -372,6 +428,32 @@ public:
     return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
   }
 
+  static bool sopkIsZext(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
+  }
+
+  bool sopkIsZext(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
+  }
+
+  /// \returns true if this is an s_store_dword* instruction. This is more
+  /// specific than than isSMEM && mayStore.
+  static bool isScalarStore(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::SCALAR_STORE;
+  }
+
+  bool isScalarStore(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::SCALAR_STORE;
+  }
+
+  static bool isFixedSize(const MachineInstr &MI) {
+    return MI.getDesc().TSFlags & SIInstrFlags::FIXED_SIZE;
+  }
+
+  bool isFixedSize(uint16_t Opcode) const {
+    return get(Opcode).TSFlags & SIInstrFlags::FIXED_SIZE;
+  }
+
   bool isVGPRCopy(const MachineInstr &MI) const {
     assert(MI.isCopy());
     unsigned Dest = MI.getOperand(0).getReg();
@@ -380,9 +462,96 @@ public:
     return !RI.isSGPRReg(MRI, Dest);
   }
 
+  static int operandBitWidth(uint8_t OperandType) {
+    switch (OperandType) {
+    case AMDGPU::OPERAND_REG_IMM_INT32:
+    case AMDGPU::OPERAND_REG_IMM_FP32:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+      return 32;
+    case AMDGPU::OPERAND_REG_IMM_INT64:
+    case AMDGPU::OPERAND_REG_IMM_FP64:
+    case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+      return 64;
+    case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+    case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    case AMDGPU::OPERAND_REG_IMM_INT16:
+    case AMDGPU::OPERAND_REG_IMM_FP16:
+      return 16;
+    default:
+      llvm_unreachable("unexpected operand type");
+    }
+  }
+
   bool isInlineConstant(const APInt &Imm) const;
-  bool isInlineConstant(const MachineOperand &MO, unsigned OpSize) const;
-  bool isLiteralConstant(const MachineOperand &MO, unsigned OpSize) const;
+
+  bool isInlineConstant(const MachineOperand &MO, uint8_t OperandType) const;
+
+  bool isInlineConstant(const MachineOperand &MO,
+                        const MCOperandInfo &OpInfo) const {
+    return isInlineConstant(MO, OpInfo.OperandType);
+  }
+
+  /// \p returns true if \p UseMO is substituted with \p DefMO in \p MI it would
+  /// be an inline immediate.
+  bool isInlineConstant(const MachineInstr &MI,
+                        const MachineOperand &UseMO,
+                        const MachineOperand &DefMO) const {
+    assert(UseMO.getParent() == &MI);
+    int OpIdx = MI.getOperandNo(&UseMO);
+    if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands) {
+      return false;
+    }
+
+    return isInlineConstant(DefMO, MI.getDesc().OpInfo[OpIdx]);
+  }
+
+  /// \p returns true if the operand \p OpIdx in \p MI is a valid inline
+  /// immediate.
+  bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx) const {
+    const MachineOperand &MO = MI.getOperand(OpIdx);
+    return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
+  }
+
+  bool isInlineConstant(const MachineInstr &MI, unsigned OpIdx,
+                        const MachineOperand &MO) const {
+    if (!MI.getDesc().OpInfo || OpIdx >= MI.getDesc().NumOperands)
+      return false;
+
+    if (MI.isCopy()) {
+      unsigned Size = getOpSize(MI, OpIdx);
+      assert(Size == 8 || Size == 4);
+
+      uint8_t OpType = (Size == 8) ?
+        AMDGPU::OPERAND_REG_IMM_INT64 : AMDGPU::OPERAND_REG_IMM_INT32;
+      return isInlineConstant(MO, OpType);
+    }
+
+    return isInlineConstant(MO, MI.getDesc().OpInfo[OpIdx].OperandType);
+  }
+
+  bool isInlineConstant(const MachineOperand &MO) const {
+    const MachineInstr *Parent = MO.getParent();
+    return isInlineConstant(*Parent, Parent->getOperandNo(&MO));
+  }
+
+  bool isLiteralConstant(const MachineOperand &MO,
+                         const MCOperandInfo &OpInfo) const {
+    return MO.isImm() && !isInlineConstant(MO, OpInfo.OperandType);
+  }
+
+  bool isLiteralConstant(const MachineInstr &MI, int OpIdx) const {
+    const MachineOperand &MO = MI.getOperand(OpIdx);
+    return MO.isImm() && !isInlineConstant(MI, OpIdx);
+  }
+
+  // Returns true if this operand could potentially require a 32-bit literal
+  // operand, but not necessarily. A FrameIndex for example could resolve to an
+  // inline immediate value that will not require an additional 4-bytes; this
+  // assumes that it will.
+  bool isLiteralConstantLike(const MachineOperand &MO,
+                             const MCOperandInfo &OpInfo) const;
 
   bool isImmOperandLegal(const MachineInstr &MI, unsigned OpNo,
                          const MachineOperand &MO) const;
@@ -394,7 +563,7 @@ public:
   /// \brief Returns true if this operand uses the constant bus.
   bool usesConstantBus(const MachineRegisterInfo &MRI,
                        const MachineOperand &MO,
-                       unsigned OpSize) const;
+                       const MCOperandInfo &OpInfo) const;
 
   /// \brief Return true if this instruction has any modifiers.
   ///  e.g. src[012]_mod, omod, clamp.
@@ -487,6 +656,12 @@ public:
 
   void legalizeOperandsSMRD(MachineRegisterInfo &MRI, MachineInstr &MI) const;
 
+  void legalizeGenericOperand(MachineBasicBlock &InsertMBB,
+                              MachineBasicBlock::iterator I,
+                              const TargetRegisterClass *DstRC,
+                              MachineOperand &Op, MachineRegisterInfo &MRI,
+                              const DebugLoc &DL) const;
+
   /// \brief Legalize all operands in this instruction.  This function may
   /// create new instruction and insert them before \p MI.
   void legalizeOperands(MachineInstr &MI) const;
@@ -535,7 +710,17 @@ public:
     return get(pseudoToMCOpcode(Opcode));
   }
 
-  unsigned getInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned isStackAccess(const MachineInstr &MI, int &FrameIndex) const;
+  unsigned isSGPRStackAccess(const MachineInstr &MI, int &FrameIndex) const;
+
+  unsigned isLoadFromStackSlot(const MachineInstr &MI,
+                               int &FrameIndex) const override;
+  unsigned isStoreToStackSlot(const MachineInstr &MI,
+                              int &FrameIndex) const override;
+
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
+
+  bool mayAccessFlatAddressSpace(const MachineInstr &MI) const;
 
   ArrayRef<std::pair<int, const char *>>
   getSerializableTargetIndices() const override;
@@ -570,10 +755,19 @@ namespace AMDGPU {
   LLVM_READONLY
   int getAtomicNoRetOp(uint16_t Opcode);
 
+  LLVM_READONLY
+  int getSOPKOp(uint16_t Opcode);
+
   const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
   const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
   const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
   const uint64_t RSRC_TID_ENABLE = UINT64_C(1) << (32 + 23);
+
+  // For MachineOperands.
+  enum TargetFlags {
+    TF_LONG_BRANCH_FORWARD = 1 << 0,
+    TF_LONG_BRANCH_BACKWARD = 1 << 1
+  };
 } // End namespace AMDGPU
 
 namespace SI {
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 00f53e846db4..34096e158039 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -14,75 +14,6 @@ def isCIOnly : Predicate<"Subtarget->getGeneration() =="
 
 def DisableInst : Predicate <"false">, AssemblerPredicate<"FeatureDisable">;
 
-class vop {
-  field bits<9> SI3;
-  field bits<10> VI3;
-}
-
-class vopc <bits<8> si, bits<8> vi = !add(0x40, si)> : vop {
-  field bits<8> SI = si;
-  field bits<8> VI = vi;
-
-  field bits<9>  SI3 = {0, si{7-0}};
-  field bits<10> VI3 = {0, 0, vi{7-0}};
-}
-
-class vop1 <bits<8> si, bits<8> vi = si> : vop {
-  field bits<8> SI = si;
-  field bits<8> VI = vi;
-
-  field bits<9>  SI3 = {1, 1, si{6-0}};
-  field bits<10> VI3 = !add(0x140, vi);
-}
-
-class vop2 <bits<6> si, bits<6> vi = si> : vop {
-  field bits<6> SI = si;
-  field bits<6> VI = vi;
-
-  field bits<9>  SI3 = {1, 0, 0, si{5-0}};
-  field bits<10> VI3 = {0, 1, 0, 0, vi{5-0}};
-}
-
-// Specify a VOP2 opcode for SI and VOP3 opcode for VI
-// that doesn't have VOP2 encoding on VI
-class vop23 <bits<6> si, bits<10> vi> : vop2 <si> {
-  let VI3 = vi;
-}
-
-class vop3 <bits<9> si, bits<10> vi = {0, si}> : vop {
-  let SI3 = si;
-  let VI3 = vi;
-}
-
-class sop1 <bits<8> si, bits<8> vi = si> {
-  field bits<8> SI = si;
-  field bits<8> VI = vi;
-}
-
-class sop2 <bits<7> si, bits<7> vi = si> {
-  field bits<7> SI = si;
-  field bits<7> VI = vi;
-}
-
-class sopk <bits<5> si, bits<5> vi = si> {
-  field bits<5> SI = si;
-  field bits<5> VI = vi;
-}
-
-class dsop <bits<8> si, bits<8> vi = si> {
-  field bits<8> SI = si;
-  field bits<8> VI = vi;
-}
-
-// Specify an SMRD opcode for SI and SMEM opcode for VI
-
-// FIXME: This should really be bits<5> si, Tablegen crashes if
-// parameter default value is other parameter with different bit size
-class smrd<bits<8> si, bits<8> vi = si> {
-  field bits<5> SI = si{4-0};
-  field bits<8> VI = vi;
-}
-
 // Execpt for the NONE field, this must be kept in sync with the
 // SIEncodingFamily enum in AMDGPUInstrInfo.cpp
 def SIEncodingFamily {
@@ -127,6 +58,19 @@ def SItbuffer_store : SDNode<"AMDGPUISD::TBUFFER_STORE_FORMAT",
   [SDNPMayStore, SDNPMemOperand, SDNPHasChain]
 >;
 
+def SDTBufferLoad : SDTypeProfile<1, 5,
+    [                    // vdata
+     SDTCisVT<1, v4i32>, // rsrc
+     SDTCisVT<2, i32>,   // vindex
+     SDTCisVT<3, i32>,   // offset
+     SDTCisVT<4, i1>,    // glc
+     SDTCisVT<5, i1>]>;  // slc
+
+def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
+                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+def SIbuffer_load_format : SDNode <"AMDGPUISD::BUFFER_LOAD_FORMAT", SDTBufferLoad,
+                            [SDNPMemOperand, SDNPHasChain, SDNPMayLoad]>;
+
 def SIload_input : SDNode<"AMDGPUISD::LOAD_INPUT",
   SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisVT<1, v4i32>, SDTCisVT<2, i16>,
                        SDTCisVT<3, i32>]>
@@ -143,72 +87,15 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">;
 def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">;
 
 def SIpc_add_rel_offset : SDNode<"AMDGPUISD::PC_ADD_REL_OFFSET",
-  SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>]>
+  SDTypeProfile<1, 2, [SDTCisVT<0, iPTR>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>]>
 >;
 
 //===----------------------------------------------------------------------===//
-// PatFrags for FLAT instructions
-//===----------------------------------------------------------------------===//
-
-class flat_ld <SDPatternOperator ld> : PatFrag<(ops node:$ptr),
-                                               (ld node:$ptr), [{
-  const MemSDNode *LD = cast<MemSDNode>(N);
-  return LD->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
-         LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
-         LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-def flat_load : flat_ld <load>;
-def atomic_flat_load : flat_ld<atomic_load>;
-def flat_az_extloadi8 : flat_ld <az_extloadi8>;
-def flat_sextloadi8 : flat_ld <sextloadi8>;
-def flat_az_extloadi16 : flat_ld <az_extloadi16>;
-def flat_sextloadi16 : flat_ld <sextloadi16>;
-
-class flat_st <SDPatternOperator st> : PatFrag<(ops node:$val, node:$ptr),
-                                               (st node:$val, node:$ptr), [{
-  const MemSDNode *ST = cast<MemSDNode>(N);
-  return ST->getAddressSpace() == AMDGPUAS::FLAT_ADDRESS ||
-         ST->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
-}]>;
-
-def flat_store: flat_st <store>;
-def atomic_flat_store: flat_st <atomic_store>;
-def flat_truncstorei8 : flat_st <truncstorei8>;
-def flat_truncstorei16 : flat_st <truncstorei16>;
-
-class MubufLoad <SDPatternOperator op> : PatFrag <
-  (ops node:$ptr), (op node:$ptr), [{
-
-  const MemSDNode *LD = cast<MemSDNode>(N);
-  return LD->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS ||
-         LD->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
-}]>;
-
-def mubuf_load : MubufLoad <load>;
-def mubuf_az_extloadi8 : MubufLoad <az_extloadi8>;
-def mubuf_sextloadi8 : MubufLoad <sextloadi8>;
-def mubuf_az_extloadi16 : MubufLoad <az_extloadi16>;
-def mubuf_sextloadi16 : MubufLoad <sextloadi16>;
-
-def mubuf_load_atomic : MubufLoad <atomic_load>;
-
-def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
-  auto Ld = cast<LoadSDNode>(N);
-  return Ld->getAlignment() >= 4  &&
-    Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
-    static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N);
-}]>;
-
-//===----------------------------------------------------------------------===//
 // PatFrags for global memory operations
 //===----------------------------------------------------------------------===//
 
-def atomic_inc_global : global_binary_atomic_op<SIatomic_inc>;
-def atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
-
-def atomic_inc_flat : flat_binary_atomic_op<SIatomic_inc>;
-def atomic_dec_flat : flat_binary_atomic_op<SIatomic_dec>;
+defm atomic_inc_global : global_binary_atomic_op<SIatomic_inc>;
+defm atomic_dec_global : global_binary_atomic_op<SIatomic_dec>;
 
 //===----------------------------------------------------------------------===//
 // SDNodes and PatFrag for local loads and stores to enable s_mov_b32 m0, -1
@@ -338,36 +225,6 @@ def si_atomic_cmp_swap_glue : SDNode <"ISD::ATOMIC_CMP_SWAP", SDTAtomic3,
 
 defm si_atomic_cmp_swap : AtomicCmpSwapLocal <si_atomic_cmp_swap_glue>;
 
-// Transformation function, extract the lower 32bit of a 64bit immediate
-def LO32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() & 0xffffffff, SDLoc(N),
-                                   MVT::i32);
-}]>;
-
-def LO32f : SDNodeXForm<fpimm, [{
-  APInt V = N->getValueAPF().bitcastToAPInt().trunc(32);
-  return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), MVT::f32);
-}]>;
-
-// Transformation function, extract the upper 32bit of a 64bit immediate
-def HI32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() >> 32, SDLoc(N), MVT::i32);
-}]>;
-
-def HI32f : SDNodeXForm<fpimm, [{
-  APInt V = N->getValueAPF().bitcastToAPInt().lshr(32).trunc(32);
-  return CurDAG->getTargetConstantFP(APFloat(APFloat::IEEEsingle, V), SDLoc(N),
-                                     MVT::f32);
-}]>;
-
-def IMM8bitDWORD : PatLeaf <(imm),
-  [{return (N->getZExtValue() & ~0x3FC) == 0;}]
->;
-
-def as_dword_i32imm : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(N->getZExtValue() >> 2, SDLoc(N), MVT::i32);
-}]>;
-
 def as_i1imm : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i1);
 }]>;
@@ -394,24 +251,17 @@ return CurDAG->getTargetConstant(
   N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
 }]>;
 
+def frameindex_to_targetframeindex : SDNodeXForm<frameindex, [{
+  auto FI = cast<FrameIndexSDNode>(N);
+  return CurDAG->getTargetFrameIndex(FI->getIndex(), MVT::i32);
+}]>;
+
 // Copied from the AArch64 backend:
 def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
 return CurDAG->getTargetConstant(
   N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
 }]>;
 
-def IMM8bit : PatLeaf <(imm),
-  [{return isUInt<8>(N->getZExtValue());}]
->;
-
-def IMM12bit : PatLeaf <(imm),
-  [{return isUInt<12>(N->getZExtValue());}]
->;
-
-def IMM16bit : PatLeaf <(imm),
-  [{return isUInt<16>(N->getZExtValue());}]
->;
-
 def SIMM16bit : PatLeaf <(imm),
   [{return isInt<16>(N->getSExtValue());}]
 >;
@@ -420,15 +270,6 @@ def IMM20bit : PatLeaf <(imm),
   [{return isUInt<20>(N->getZExtValue());}]
 >;
 
-def IMM32bit : PatLeaf <(imm),
-  [{return isUInt<32>(N->getZExtValue());}]
->;
-
-def mubuf_vaddr_offset : PatFrag<
-  (ops node:$ptr, node:$offset, node:$imm_offset),
-  (add (add node:$ptr, node:$offset), node:$imm_offset)
->;
-
 class InlineImm <ValueType vt> : PatLeaf <(vt imm), [{
   return isInlineImmediate(N);
 }]>;
@@ -437,29 +278,31 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{
   return isInlineImmediate(N);
 }]>;
 
-class SGPRImm <dag frag> : PatLeaf<frag, [{
+class VGPRImm <dag frag> : PatLeaf<frag, [{
   if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) {
     return false;
   }
   const SIRegisterInfo *SIRI =
       static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo());
+  unsigned Limit = 0;
   for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end();
-                                                U != E; ++U) {
+         Limit < 10 && U != E; ++U, ++Limit) {
     const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo());
-    if (RC && SIRI->isSGPRClass(RC))
-      return true;
+
+    // If the register class is unknown, it could be an unknown
+    // register class that needs to be an SGPR, e.g. an inline asm
+    // constraint
+    if (!RC || SIRI->isSGPRClass(RC))
+      return false;
   }
-  return false;
+
+  return Limit < 10;
 }]>;
 
 //===----------------------------------------------------------------------===//
 // Custom Operands
 //===----------------------------------------------------------------------===//
 
-def FRAMEri32 : Operand<iPTR> {
-  let MIOperandInfo = (ops i32:$ptr, i32imm:$index);
-}
-
 def SoppBrTarget : AsmOperandClass {
   let Name = "SoppBrTarget";
   let ParserMethod = "parseSOppBrTarget";
@@ -467,14 +310,51 @@ def SoppBrTarget : AsmOperandClass {
 
 def sopp_brtarget : Operand<OtherVT> {
   let EncoderMethod = "getSOPPBrEncoding";
+  let DecoderMethod = "decodeSoppBrTarget";
   let OperandType = "OPERAND_PCREL";
   let ParserMatchClass = SoppBrTarget;
 }
 
 def si_ga : Operand<iPTR>;
 
+def InterpSlotMatchClass : AsmOperandClass {
+  let Name = "InterpSlot";
+  let PredicateMethod = "isInterpSlot";
+  let ParserMethod = "parseInterpSlot";
+  let RenderMethod = "addImmOperands";
+}
+
 def InterpSlot : Operand<i32> {
   let PrintMethod = "printInterpSlot";
+  let ParserMatchClass = InterpSlotMatchClass;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def AttrMatchClass : AsmOperandClass {
+  let Name = "Attr";
+  let PredicateMethod = "isInterpAttr";
+  let ParserMethod = "parseInterpAttr";
+  let RenderMethod = "addImmOperands";
+}
+
+// It appears to be necessary to create a separate operand for this to
+// be able to parse attr<num> with no space.
+def Attr : Operand<i32> {
+  let PrintMethod = "printInterpAttr";
+  let ParserMatchClass = AttrMatchClass;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def AttrChanMatchClass : AsmOperandClass {
+  let Name = "AttrChan";
+  let PredicateMethod = "isAttrChan";
+  let RenderMethod = "addImmOperands";
+}
+
+def AttrChan : Operand<i32> {
+  let PrintMethod = "printInterpAttrChan";
+  let ParserMatchClass = AttrChanMatchClass;
+  let OperandType = "OPERAND_IMMEDIATE";
 }
 
 def SendMsgMatchClass : AsmOperandClass {
@@ -484,6 +364,13 @@ def SendMsgMatchClass : AsmOperandClass {
   let RenderMethod = "addImmOperands";
 }
 
+def ExpTgtMatchClass : AsmOperandClass {
+  let Name = "ExpTgt";
+  let PredicateMethod = "isExpTgt";
+  let ParserMethod = "parseExpTgt";
+  let RenderMethod = "printExpTgt";
+}
+
 def SendMsgImm : Operand<i32> {
   let PrintMethod = "printSendMsg";
   let ParserMatchClass = SendMsgMatchClass;
@@ -495,6 +382,11 @@ def SWaitMatchClass : AsmOperandClass {
   let ParserMethod = "parseSWaitCntOps";
 }
 
+def VReg32OrOffClass : AsmOperandClass {
+  let Name = "VReg32OrOff";
+  let ParserMethod = "parseVReg32OrOff";
+}
+
 def WAIT_FLAG : Operand <i32> {
   let ParserMatchClass = SWaitMatchClass;
   let PrintMethod = "printWaitFlag";
@@ -503,6 +395,31 @@ def WAIT_FLAG : Operand <i32> {
 include "SIInstrFormats.td"
 include "VIInstrFormats.td"
 
+// ===----------------------------------------------------------------------===//
+// ExpSrc* Special cases for exp src operands which are printed as
+// "off" depending on en operand.
+// ===----------------------------------------------------------------------===//
+
+def ExpSrc0 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc0";
+  let ParserMatchClass = VReg32OrOffClass;
+}
+
+def ExpSrc1 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc1";
+  let ParserMatchClass = VReg32OrOffClass;
+}
+
+def ExpSrc2 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc2";
+  let ParserMatchClass = VReg32OrOffClass;
+}
+
+def ExpSrc3 : RegisterOperand<VGPR_32> {
+  let PrintMethod = "printExpSrc3";
+  let ParserMatchClass = VReg32OrOffClass;
+}
+
 class NamedMatchClass<string CName, bit Optional = 1> : AsmOperandClass {
   let Name = "Imm"#CName;
   let PredicateMethod = "is"#CName;
@@ -547,16 +464,15 @@ def gds : NamedOperandBit<"GDS", NamedMatchClass<"GDS">>;
 def omod : NamedOperandU32<"OModSI", NamedMatchClass<"OModSI">>;
 def clampmod : NamedOperandBit<"ClampSI", NamedMatchClass<"ClampSI">>;
 
-def smrd_offset : NamedOperandU32<"SMRDOffset", NamedMatchClass<"SMRDOffset">>;
-def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset", NamedMatchClass<"SMRDLiteralOffset">>;
-
-def glc : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
+def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
 def slc : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
 def tfe : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
 def unorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
 def da : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
 def r128 : NamedOperandBit<"R128", NamedMatchClass<"R128">>;
 def lwe : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
+def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
+def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
 
 def dmask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
 
@@ -572,32 +488,74 @@ def dst_unused : NamedOperandU32<"SDWADstUnused", NamedMatchClass<"SDWADstUnused
 
 def hwreg : NamedOperandU16<"Hwreg", NamedMatchClass<"Hwreg", 0>>;
 
+def exp_tgt : NamedOperandU8<"ExpTgt", NamedMatchClass<"ExpTgt", 0>> {
+
+}
+
 } // End OperandType = "OPERAND_IMMEDIATE"
 
+class KImmMatchClass<int size> : AsmOperandClass {
+  let Name = "KImmFP"#size;
+  let PredicateMethod = "isKImmFP"#size;
+  let ParserMethod = "parseImm";
+  let RenderMethod = "addKImmFP"#size#"Operands";
+}
+
+class kimmOperand<ValueType vt> : Operand<vt> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_KIMM"#vt.Size;
+  let PrintMethod = "printU"#vt.Size#"ImmOperand";
+  let ParserMatchClass = !cast<AsmOperandClass>("KImmFP"#vt.Size#"MatchClass");
+}
+
+// 32-bit VALU immediate operand that uses the constant bus.
+def KImmFP32MatchClass : KImmMatchClass<32>;
+def f32kimm : kimmOperand<i32>;
+
+// 32-bit VALU immediate operand with a 16-bit value that uses the
+// constant bus.
+def KImmFP16MatchClass : KImmMatchClass<16>;
+def f16kimm : kimmOperand<i16>;
+
 
 def VOPDstS64 : VOPDstOperand <SReg_64>;
 
-def FPInputModsMatchClass : AsmOperandClass {
-  let Name = "RegOrImmWithFPInputMods";
+class FPInputModsMatchClass <int opSize> : AsmOperandClass {
+  let Name = "RegOrImmWithFP"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithFPInputMods";
-  let PredicateMethod = "isRegOrImmWithInputMods";
+  let PredicateMethod = "isRegOrImmWithFP"#opSize#"InputMods";
 }
+def FP16InputModsMatchClass : FPInputModsMatchClass<16>;
+def FP32InputModsMatchClass : FPInputModsMatchClass<32>;
+def FP64InputModsMatchClass : FPInputModsMatchClass<64>;
 
-def FPInputMods : Operand <i32> {
+class InputMods <AsmOperandClass matchClass> : Operand <i32> {
+  let OperandNamespace = "AMDGPU";
+  let OperandType = "OPERAND_INPUT_MODS";
+  let ParserMatchClass = matchClass;
+}
+
+class FPInputMods <FPInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndFPInputMods";
-  let ParserMatchClass = FPInputModsMatchClass;
 }
 
-def IntInputModsMatchClass : AsmOperandClass {
-  let Name = "RegOrImmWithIntInputMods";
+def FP16InputMods : FPInputMods<FP16InputModsMatchClass>;
+def FP32InputMods : FPInputMods<FP32InputModsMatchClass>;
+def FP64InputMods : FPInputMods<FP64InputModsMatchClass>;
+
+class IntInputModsMatchClass <int opSize> : AsmOperandClass {
+  let Name = "RegOrImmWithInt"#opSize#"InputMods";
   let ParserMethod = "parseRegOrImmWithIntInputMods";
-  let PredicateMethod = "isRegOrImmWithInputMods";
+  let PredicateMethod = "isRegOrImmWithInt"#opSize#"InputMods";
 }
+def Int32InputModsMatchClass : IntInputModsMatchClass<32>;
+def Int64InputModsMatchClass : IntInputModsMatchClass<64>;
 
-def IntInputMods: Operand <i32> {
+class IntInputMods <IntInputModsMatchClass matchClass> : InputMods <matchClass> {
   let PrintMethod = "printOperandAndIntInputMods";
-  let ParserMatchClass = IntInputModsMatchClass;
 }
+def Int32InputMods : IntInputMods<Int32InputModsMatchClass>;
+def Int64InputMods : IntInputMods<Int64InputModsMatchClass>;
 
 //===----------------------------------------------------------------------===//
 // Complex patterns
@@ -606,24 +564,6 @@ def IntInputMods: Operand <i32> {
 def DS1Addr1Offset : ComplexPattern<i32, 2, "SelectDS1Addr1Offset">;
 def DS64Bit4ByteAligned : ComplexPattern<i32, 3, "SelectDS64Bit4ByteAligned">;
 
-def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
-def MUBUFAddr64 : ComplexPattern<i64, 7, "SelectMUBUFAddr64">;
-def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
-def FLATAtomic : ComplexPattern<i64, 3, "SelectFlat">;
-def MUBUFScratch : ComplexPattern<i64, 4, "SelectMUBUFScratch">;
-def MUBUFOffset : ComplexPattern<i64, 6, "SelectMUBUFOffset">;
-def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
-def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
-def MUBUFIntrinsicOffset : ComplexPattern<i32, 2, "SelectMUBUFIntrinsicOffset">;
-def MUBUFIntrinsicVOffset : ComplexPattern<i32, 3, "SelectMUBUFIntrinsicVOffset">;
-
-def SMRDImm   : ComplexPattern<i64, 2, "SelectSMRDImm">;
-def SMRDImm32 : ComplexPattern<i64, 2, "SelectSMRDImm32">;
-def SMRDSgpr  : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
-def SMRDBufferImm   : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
-def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
-def SMRDBufferSgpr  : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
-
 def MOVRELOffset : ComplexPattern<i32, 2, "SelectMOVRELOffset">;
 
 def VOP3Mods0 : ComplexPattern<untyped, 4, "SelectVOP3Mods0">;
@@ -681,455 +621,44 @@ class SIMCInstr <string pseudo, int subtarget> {
 // EXP classes
 //===----------------------------------------------------------------------===//
 
-class EXPCommon : InstSI<
+class EXP_Helper<bit done, SDPatternOperator node = null_frag> : EXPCommon<
   (outs),
-  (ins i32imm:$en, i32imm:$tgt, i32imm:$compr, i32imm:$done, i32imm:$vm,
-       VGPR_32:$src0, VGPR_32:$src1, VGPR_32:$src2, VGPR_32:$src3),
-  "exp $en, $tgt, $compr, $done, $vm, $src0, $src1, $src2, $src3",
-  [] > {
-
-  let EXP_CNT = 1;
-  let Uses = [EXEC];
-  let SchedRW = [WriteExport];
-}
-
-multiclass EXP_m {
-
-  let isPseudo = 1, isCodeGenOnly = 1 in {
-    def "" : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.NONE> ;
-  }
-
-  def _si : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.SI>, EXPe {
-    let DecoderNamespace="SICI";
-    let DisableDecoder = DisableSIDecoder;
-  }
-
-  def _vi : EXPCommon, SIMCInstr <"exp", SIEncodingFamily.VI>, EXPe_vi {
-    let DecoderNamespace="VI";
-    let DisableDecoder = DisableVIDecoder;
-  }
-}
-
-//===----------------------------------------------------------------------===//
-// Scalar classes
-//===----------------------------------------------------------------------===//
-
-class SOP1_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-  SOP1 <outs, ins, "", pattern>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class SOP1_Real_si <sop1 op, string opName, dag outs, dag ins, string asm> :
-  SOP1 <outs, ins, asm, []>,
-  SOP1e <op.SI>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let isCodeGenOnly = 0;
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class SOP1_Real_vi <sop1 op, string opName, dag outs, dag ins, string asm> :
-  SOP1 <outs, ins, asm, []>,
-  SOP1e <op.VI>,
-  SIMCInstr<opName, SIEncodingFamily.VI> {
-  let isCodeGenOnly = 0;
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass SOP1_m <sop1 op, string opName, dag outs, dag ins, string asm,
-                   list<dag> pattern> {
-
-  def "" : SOP1_Pseudo <opName, outs, ins, pattern>;
-
-  def _si : SOP1_Real_si <op, opName, outs, ins, asm>;
-
-  def _vi : SOP1_Real_vi <op, opName, outs, ins, asm>;
-
-}
-
-multiclass SOP1_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
-    op, opName, (outs SReg_32:$sdst), (ins SSrc_32:$src0),
-    opName#" $sdst, $src0", pattern
->;
-
-multiclass SOP1_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
-    op, opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0),
-    opName#" $sdst, $src0", pattern
->;
-
-// no input, 64-bit output.
-multiclass SOP1_64_0 <sop1 op, string opName, list<dag> pattern> {
-  def "" : SOP1_Pseudo <opName, (outs SReg_64:$sdst), (ins), pattern>;
-
-  def _si : SOP1_Real_si <op, opName, (outs SReg_64:$sdst), (ins),
-    opName#" $sdst"> {
-    let src0 = 0;
-  }
-
-  def _vi : SOP1_Real_vi <op, opName, (outs SReg_64:$sdst), (ins),
-    opName#" $sdst"> {
-    let src0 = 0;
-  }
-}
-
-// 64-bit input, no output
-multiclass SOP1_1 <sop1 op, string opName, list<dag> pattern> {
-  def "" : SOP1_Pseudo <opName, (outs), (ins SReg_64:$src0), pattern>;
-
-  def _si : SOP1_Real_si <op, opName, (outs), (ins SReg_64:$src0),
-    opName#" $src0"> {
-    let sdst = 0;
-  }
-
-  def _vi : SOP1_Real_vi <op, opName, (outs), (ins SReg_64:$src0),
-    opName#" $src0"> {
-    let sdst = 0;
-  }
-}
-
-// 64-bit input, 32-bit output.
-multiclass SOP1_32_64 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
-    op, opName, (outs SReg_32:$sdst), (ins SSrc_64:$src0),
-    opName#" $sdst, $src0", pattern
->;
-
-// 32-bit input, 64-bit output.
-multiclass SOP1_64_32 <sop1 op, string opName, list<dag> pattern> : SOP1_m <
-    op, opName, (outs SReg_64:$sdst), (ins SSrc_32:$src0),
-    opName#" $sdst, $src0", pattern
->;
-
-class SOP2_Pseudo<string opName, dag outs, dag ins, list<dag> pattern> :
-  SOP2<outs, ins, "", pattern>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-  let Size = 4;
-
-  // Pseudo instructions have no encodings, but adding this field here allows
-  // us to do:
-  // let sdst = xxx in {
-  // for multiclasses that include both real and pseudo instructions.
-  field bits<7> sdst = 0;
-}
-
-class SOP2_Real_si<sop2 op, string opName, dag outs, dag ins, string asm> :
-  SOP2<outs, ins, asm, []>,
-  SOP2e<op.SI>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class SOP2_Real_vi<sop2 op, string opName, dag outs, dag ins, string asm> :
-  SOP2<outs, ins, asm, []>,
-  SOP2e<op.VI>,
-  SIMCInstr<opName, SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass SOP2_m <sop2 op, string opName, dag outs, dag ins, string asm,
-                   list<dag> pattern> {
-
-  def "" : SOP2_Pseudo <opName, outs, ins, pattern>;
-
-  def _si : SOP2_Real_si <op, opName, outs, ins, asm>;
-
-  def _vi : SOP2_Real_vi <op, opName, outs, ins, asm>;
-
-}
-
-multiclass SOP2_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
-    op, opName, (outs SReg_32:$sdst), (ins SSrc_32:$src0, SSrc_32:$src1),
-    opName#" $sdst, $src0, $src1", pattern
->;
-
-multiclass SOP2_64 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
-    op, opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0, SSrc_64:$src1),
-    opName#" $sdst, $src0, $src1", pattern
->;
-
-multiclass SOP2_64_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
-    op, opName, (outs SReg_64:$sdst), (ins SSrc_64:$src0, SSrc_32:$src1),
-    opName#" $sdst, $src0, $src1", pattern
->;
-
-multiclass SOP2_64_32_32 <sop2 op, string opName, list<dag> pattern> : SOP2_m <
-    op, opName, (outs SReg_64:$sdst), (ins SSrc_32:$src0, SSrc_32:$src1),
-    opName#" $sdst, $src0, $src1", pattern
->;
-
-class SOPC_Base <bits<7> op, RegisterOperand rc0, RegisterOperand rc1,
-                 string opName, list<dag> pattern = []> : SOPC <
-  op, (outs), (ins rc0:$src0, rc1:$src1),
-  opName#" $src0, $src1", pattern > {
-  let Defs = [SCC];
-}
-class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
-                    string opName, PatLeaf cond> : SOPC_Base <
-  op, rc, rc, opName,
-  [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
-}
-
-class SOPC_CMP_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
-  : SOPC_Helper<op, SSrc_32, i32, opName, cond>;
-
-class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
-  : SOPC_Base<op, SSrc_32, SSrc_32, opName, pattern>;
-
-class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
-  : SOPC_Base<op, SSrc_64, SSrc_32, opName, pattern>;
-
-class SOPK_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-  SOPK <outs, ins, "", pattern>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class SOPK_Real_si <sopk op, string opName, dag outs, dag ins, string asm> :
-  SOPK <outs, ins, asm, []>,
-  SOPKe <op.SI>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-  let isCodeGenOnly = 0;
-}
-
-class SOPK_Real_vi <sopk op, string opName, dag outs, dag ins, string asm> :
-  SOPK <outs, ins, asm, []>,
-  SOPKe <op.VI>,
-  SIMCInstr<opName, SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-  let isCodeGenOnly = 0;
-}
-
-multiclass SOPK_m <sopk op, string opName, dag outs, dag ins, string opAsm,
-                   string asm = opName#opAsm> {
-  def "" : SOPK_Pseudo <opName, outs, ins, []>;
-
-  def _si : SOPK_Real_si <op, opName, outs, ins, asm>;
-
-  def _vi : SOPK_Real_vi <op, opName, outs, ins, asm>;
-
-}
-
-multiclass SOPK_32 <sopk op, string opName, list<dag> pattern> {
-  def "" : SOPK_Pseudo <opName, (outs SReg_32:$sdst), (ins u16imm:$simm16),
-    pattern>;
-
-  def _si : SOPK_Real_si <op, opName, (outs SReg_32:$sdst), (ins u16imm:$simm16),
-    opName#" $sdst, $simm16">;
-
-  def _vi : SOPK_Real_vi <op, opName, (outs SReg_32:$sdst), (ins u16imm:$simm16),
-    opName#" $sdst, $simm16">;
-}
-
-multiclass SOPK_SCC <sopk op, string opName, list<dag> pattern> {
-  def "" : SOPK_Pseudo <opName, (outs),
-    (ins SReg_32:$src0, u16imm:$src1), pattern> {
-    let Defs = [SCC];
-  }
-
-
-  def _si : SOPK_Real_si <op, opName, (outs),
-    (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
-    let Defs = [SCC];
-  }
-
-  def _vi : SOPK_Real_vi <op, opName, (outs),
-    (ins SReg_32:$sdst, u16imm:$simm16), opName#" $sdst, $simm16"> {
-    let Defs = [SCC];
-  }
-}
-
-multiclass SOPK_32TIE <sopk op, string opName, list<dag> pattern> : SOPK_m <
-  op, opName, (outs SReg_32:$sdst), (ins SReg_32:$src0, u16imm:$simm16),
-  " $sdst, $simm16"
->;
-
-multiclass SOPK_IMM32 <sopk op, string opName, dag outs, dag ins,
-                       string argAsm, string asm = opName#argAsm> {
-
-  def "" : SOPK_Pseudo <opName, outs, ins, []>;
-
-  def _si : SOPK <outs, ins, asm, []>,
-            SOPK64e <op.SI>,
-            SIMCInstr<opName, SIEncodingFamily.SI> {
-              let AssemblerPredicates = [isSICI];
-              let DecoderNamespace = "SICI";
-              let DisableDecoder = DisableSIDecoder;
-              let isCodeGenOnly = 0;
-            }
-
-  def _vi : SOPK <outs, ins, asm, []>,
-            SOPK64e <op.VI>,
-            SIMCInstr<opName, SIEncodingFamily.VI> {
-              let AssemblerPredicates = [isVI];
-              let DecoderNamespace = "VI";
-              let DisableDecoder = DisableVIDecoder;
-              let isCodeGenOnly = 0;
-            }
-}
-//===----------------------------------------------------------------------===//
-// SMRD classes
-//===----------------------------------------------------------------------===//
-
-class SMRD_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-  SMRD <outs, ins, "", pattern>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class SMRD_IMM_Real_si <bits<5> op, string opName, dag outs, dag ins,
-                        string asm> :
-  SMRD <outs, ins, asm, []>,
-  SMRD_IMMe <op>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class SMRD_SOFF_Real_si <bits<5> op, string opName, dag outs, dag ins,
-                         string asm> :
-  SMRD <outs, ins, asm, []>,
-  SMRD_SOFFe <op>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-
-class SMRD_IMM_Real_vi <bits<8> op, string opName, dag outs, dag ins,
-                        string asm, list<dag> pattern = []> :
-  SMRD <outs, ins, asm, pattern>,
-  SMEM_IMMe_vi <op>,
-  SIMCInstr<opName, SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-class SMRD_SOFF_Real_vi <bits<8> op, string opName, dag outs, dag ins,
-                         string asm, list<dag> pattern = []> :
-  SMRD <outs, ins, asm, pattern>,
-  SMEM_SOFFe_vi <op>,
-  SIMCInstr<opName, SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-
-multiclass SMRD_IMM_m <smrd op, string opName, dag outs, dag ins,
-                   string asm, list<dag> pattern> {
-
-  def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
-
-  def _si : SMRD_IMM_Real_si <op.SI, opName, outs, ins, asm>;
-
-  // glc is only applicable to scalar stores, which are not yet
-  // implemented.
-  let glc = 0 in {
-    def _vi : SMRD_IMM_Real_vi <op.VI, opName, outs, ins, asm>;
-  }
-}
-
-multiclass SMRD_SOFF_m <smrd op, string opName, dag outs, dag ins,
-                        string asm, list<dag> pattern> {
-
-  def "" : SMRD_Pseudo <opName, outs, ins, pattern>;
-
-  def _si : SMRD_SOFF_Real_si <op.SI, opName, outs, ins, asm>;
-
-  // glc is only applicable to scalar stores, which are not yet
-  // implemented.
-  let glc = 0 in {
-    def _vi : SMRD_SOFF_Real_vi <op.VI, opName, outs, ins, asm>;
-  }
-}
-
-multiclass SMRD_Special <smrd op, string opName, dag outs,
-                       int sdst_ = ?,
-                       string opStr = "",
-                       list<dag> pattern = []> {
-  let hasSideEffects = 1 in {
-    def "" : SMRD_Pseudo <opName, outs, (ins), pattern>;
+  (ins exp_tgt:$tgt,
+       ExpSrc0:$src0, ExpSrc1:$src1, ExpSrc2:$src2, ExpSrc3:$src3,
+       exp_vm:$vm, exp_compr:$compr, i8imm:$en),
+  "exp$tgt $src0, $src1, $src2, $src3"#!if(done, " done", "")#"$compr$vm",
+  [(node (i8 timm:$en), (i1 timm:$vm), (i8 timm:$tgt), (i1 timm:$compr),
+         f32:$src0, f32:$src1, f32:$src2, f32:$src3)]> {
+  let AsmMatchConverter = "cvtExp";
+}
+
+// Split EXP instruction into EXP and EXP_DONE so we can set
+// mayLoad for done=1.
+multiclass EXP_m<bit done, SDPatternOperator node> {
+  let mayLoad = done in {
+    let isPseudo = 1, isCodeGenOnly = 1 in {
+      def "" : EXP_Helper<done, node>,
+               SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.NONE>;
+    }
 
-    let sbase = 0, soff = 0, sdst = sdst_ in {
-      def _si : SMRD_SOFF_Real_si <op.SI, opName, outs, (ins), opName#opStr>;
+    let done = done in {
+      def _si : EXP_Helper<done>,
+                SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.SI>,
+                EXPe {
+        let DecoderNamespace = "SICI";
+        let DisableDecoder = DisableSIDecoder;
+      }
 
-      let glc = 0 in {
-        def _vi : SMRD_SOFF_Real_vi <op.VI, opName, outs, (ins), opName#opStr>;
+      def _vi : EXP_Helper<done>,
+                SIMCInstr <"exp"#!if(done, "_done", ""), SIEncodingFamily.VI>,
+                EXPe_vi {
+        let DecoderNamespace = "VI";
+        let DisableDecoder = DisableVIDecoder;
       }
     }
   }
 }
 
-multiclass SMRD_Inval <smrd op, string opName,
-                     SDPatternOperator node> {
-  let mayStore = 1 in {
-    defm : SMRD_Special<op, opName, (outs), 0, "", [(node)]>;
-  }
-}
-
-class SMEM_Inval <bits<8> op, string opName, SDPatternOperator node> :
-  SMRD_SOFF_Real_vi<op, opName, (outs), (ins), opName, [(node)]> {
-  let hasSideEffects = 1;
-  let mayStore = 1;
-  let sbase = 0;
-  let sdst = 0;
-  let glc = 0;
-  let soff = 0;
-}
-
-class SMEM_Ret <bits<8> op, string opName, SDPatternOperator node> :
-  SMRD_SOFF_Real_vi<op, opName, (outs SReg_64:$sdst), (ins),
-  opName#" $sdst", [(set i64:$sdst, (node))]> {
-  let hasSideEffects = 1;
-  let mayStore = ?;
-  let mayLoad = ?;
-  let sbase = 0;
-  let glc = 0;
-  let soff = 0;
-}
-
-multiclass SMRD_Helper <smrd op, string opName, RegisterClass baseClass,
-                        RegisterClass dstClass> {
-  defm _IMM : SMRD_IMM_m <
-    op, opName#"_IMM", (outs dstClass:$sdst),
-    (ins baseClass:$sbase, smrd_offset:$offset),
-    opName#" $sdst, $sbase, $offset", []
-  >;
-
-  def _IMM_ci : SMRD <
-    (outs dstClass:$sdst), (ins baseClass:$sbase, smrd_literal_offset:$offset),
-    opName#" $sdst, $sbase, $offset", []>, SMRD_IMMe_ci <op.SI> {
-    let AssemblerPredicates = [isCIOnly];
-    let DecoderNamespace = "CI";
-  }
-
-  defm _SGPR : SMRD_SOFF_m <
-    op, opName#"_SGPR", (outs dstClass:$sdst),
-    (ins baseClass:$sbase, SReg_32:$soff),
-    opName#" $sdst, $sbase, $soff", []
-  >;
-}
-
 //===----------------------------------------------------------------------===//
 // Vector ALU classes
 //===----------------------------------------------------------------------===//
@@ -1146,43 +675,90 @@ class getNumSrcArgs<ValueType Src0, ValueType Src1, ValueType Src2> {
 // instructions for the given VT.
 class getVALUDstForVT<ValueType VT> {
   RegisterOperand ret = !if(!eq(VT.Size, 32), VOPDstOperand<VGPR_32>,
-                          !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
-                            !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
-                            VOPDstOperand<SReg_64>))); // else VT == i1
+                          !if(!eq(VT.Size, 128), VOPDstOperand<VReg_128>,
+                            !if(!eq(VT.Size, 64), VOPDstOperand<VReg_64>,
+                              !if(!eq(VT.Size, 16), VOPDstOperand<VGPR_32>,
+                              VOPDstOperand<SReg_64>)))); // else VT == i1
 }
 
 // Returns the register class to use for source 0 of VOP[12C]
 // instructions for the given VT.
 class getVOPSrc0ForVT<ValueType VT> {
-  RegisterOperand ret = !if(!eq(VT.Size, 64), VSrc_64, VSrc_32);
+  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+             !if(!eq(VT.Value, f32.Value), 1,
+             !if(!eq(VT.Value, f64.Value), 1,
+             0)));
+  RegisterOperand ret = !if(isFP,
+                            !if(!eq(VT.Size, 64), VSrc_f64, !if(!eq(VT.Size, 16), VSrc_f16, VSrc_f32)),
+                            !if(!eq(VT.Size, 64), VSrc_b64, !if(!eq(VT.Size, 16), VSrc_b16, VSrc_b32)));
 }
 
 // Returns the vreg register class to use for source operand given VT
 class getVregSrcForVT<ValueType VT> {
-  RegisterClass ret = !if(!eq(VT.Size, 64), VReg_64, VGPR_32);
+  RegisterClass ret = !if(!eq(VT.Size, 128), VReg_128,
+                        !if(!eq(VT.Size, 64), VReg_64, VGPR_32));
 }
 
 
 // Returns the register class to use for sources of VOP3 instructions for the
 // given VT.
 class getVOP3SrcForVT<ValueType VT> {
+  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+             !if(!eq(VT.Value, f32.Value), 1,
+             !if(!eq(VT.Value, f64.Value), 1,
+             0)));
   RegisterOperand ret =
-  !if(!eq(VT.Size, 64),
-      VCSrc_64,
-      !if(!eq(VT.Value, i1.Value),
-          SCSrc_64,
-          VCSrc_32
-       )
-    );
+  !if(!eq(VT.Size, 128),
+      VSrc_128,
+    !if(!eq(VT.Size, 64),
+        !if(isFP,
+            VCSrc_f64,
+            VCSrc_b64),
+        !if(!eq(VT.Value, i1.Value),
+            SCSrc_b64,
+            !if(isFP,
+                !if(!eq(VT.Size, 16), VCSrc_f16, VCSrc_f32),
+                !if(!eq(VT.Size, 16), VCSrc_b16, VCSrc_b32)
+            )
+         )
+	   )
+     );
 }
 
 // Returns 1 if the source arguments have modifiers, 0 if they do not.
 // XXX - do f16 instructions?
-class hasModifiers<ValueType SrcVT> {
+class isFloatType<ValueType SrcVT> {
   bit ret =
+    !if(!eq(SrcVT.Value, f16.Value), 1,
     !if(!eq(SrcVT.Value, f32.Value), 1,
     !if(!eq(SrcVT.Value, f64.Value), 1,
-    0));
+    0)));
+}
+
+class isIntType<ValueType SrcVT> {
+  bit ret =
+    !if(!eq(SrcVT.Value, i16.Value), 1,
+    !if(!eq(SrcVT.Value, i32.Value), 1,
+    !if(!eq(SrcVT.Value, i64.Value), 1,
+    0)));
+}
+
+
+// Return type of input modifiers operand for specified input operand
+class getSrcMod <ValueType VT> {
+  bit isFP = !if(!eq(VT.Value, f16.Value), 1,
+               !if(!eq(VT.Value, f32.Value), 1,
+               !if(!eq(VT.Value, f64.Value), 1,
+               0)));
+  Operand ret =  !if(!eq(VT.Size, 64),
+                     !if(isFP, FP64InputMods, Int64InputMods),
+                       !if(isFP,
+                         !if(!eq(VT.Value, f16.Value),
+                            FP16InputMods,
+                            FP32InputMods
+                          ),
+                         Int32InputMods)
+                     );
 }
 
 // Returns the input arguments for VOP[12C] instructions for the given SrcVT.
@@ -1195,7 +771,8 @@ class getIns32 <RegisterOperand Src0RC, RegisterClass Src1RC, int NumSrcArgs> {
 // Returns the input arguments for VOP3 instructions for the given SrcVT.
 class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
                 RegisterOperand Src2RC, int NumSrcArgs,
-                bit HasModifiers> {
+                bit HasModifiers, Operand Src0Mod, Operand Src1Mod,
+                Operand Src2Mod> {
 
   dag ret =
     !if (!eq(NumSrcArgs, 0),
@@ -1205,7 +782,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
     !if (!eq(NumSrcArgs, 1),
       !if (!eq(HasModifiers, 1),
         // VOP1 with modifiers
-        (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
+        (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
              clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP1 without modifiers
@@ -1214,8 +791,8 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
     !if (!eq(NumSrcArgs, 2),
       !if (!eq(HasModifiers, 1),
         // VOP 2 with modifiers
-        (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
-             FPInputMods:$src1_modifiers, Src1RC:$src1,
+        (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+             Src1Mod:$src1_modifiers, Src1RC:$src1,
              clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP2 without modifiers
@@ -1224,9 +801,9 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
     /* NumSrcArgs == 3 */,
       !if (!eq(HasModifiers, 1),
         // VOP3 with modifiers
-        (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
-             FPInputMods:$src1_modifiers, Src1RC:$src1,
-             FPInputMods:$src2_modifiers, Src2RC:$src2,
+        (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+             Src1Mod:$src1_modifiers, Src1RC:$src1,
+             Src2Mod:$src2_modifiers, Src2RC:$src2,
              clampmod:$clamp, omod:$omod)
       /* else */,
         // VOP3 without modifiers
@@ -1235,7 +812,7 @@ class getIns64 <RegisterOperand Src0RC, RegisterOperand Src1RC,
 }
 
 class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
-                                                             bit HasModifiers> {
+                 bit HasModifiers, Operand Src0Mod, Operand Src1Mod> {
 
   dag ret = !if (!eq(NumSrcArgs, 0),
                 // VOP1 without input operands (V_NOP)
@@ -1244,7 +821,7 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
             !if (!eq(NumSrcArgs, 1),
               !if (!eq(HasModifiers, 1),
                 // VOP1_DPP with modifiers
-                (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
+                (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
               /* else */,
@@ -1255,8 +832,8 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
               /* NumSrcArgs == 2 */,
               !if (!eq(HasModifiers, 1),
                 // VOP2_DPP with modifiers
-                (ins FPInputMods:$src0_modifiers, Src0RC:$src0,
-                     FPInputMods:$src1_modifiers, Src1RC:$src1,
+                (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                     Src1Mod:$src1_modifiers, Src1RC:$src1,
                      dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                      bank_mask:$bank_mask, bound_ctrl:$bound_ctrl)
               /* else */,
@@ -1268,49 +845,28 @@ class getInsDPP <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
 }
 
 class getInsSDWA <RegisterClass Src0RC, RegisterClass Src1RC, int NumSrcArgs,
-                  bit HasFloatModifiers, ValueType DstVT> {
+                  bit HasFloatModifiers, Operand Src0Mod, Operand Src1Mod,
+                  ValueType DstVT> {
 
   dag ret = !if(!eq(NumSrcArgs, 0),
                // VOP1 without input operands (V_NOP)
                (ins),
             !if(!eq(NumSrcArgs, 1),
-                !if(HasFloatModifiers,
-                    // VOP1_SDWA with float modifiers
-                    (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0,
-                         clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                         src0_sel:$src0_sel)
-                /* else */,
-                    // VOP1_SDWA with sext modifier
-                    (ins IntInputMods:$src0_imodifiers, Src0RC:$src0,
-                         clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                         src0_sel:$src0_sel)
-                /* endif */)
-              /* NumSrcArgs == 2 */,
-              !if(HasFloatModifiers,
-                  !if(!eq(DstVT.Size, 1),
-                      // VOPC_SDWA with float modifiers
-                      (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0,
-                           FPInputMods:$src1_fmodifiers, Src1RC:$src1,
-                           clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
-                      // VOP2_SDWA or VOPC_SDWA with float modifiers
-                      (ins FPInputMods:$src0_fmodifiers, Src0RC:$src0,
-                           FPInputMods:$src1_fmodifiers, Src1RC:$src1,
-                           clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                           src0_sel:$src0_sel, src1_sel:$src1_sel)
-                  ),
-              /* else */
-                !if(!eq(DstVT.Size, 1),
-                    // VOPC_SDWA with sext modifiers
-                    (ins IntInputMods:$src0_imodifiers, Src0RC:$src0,
-                         IntInputMods:$src1_imodifiers, Src1RC:$src1,
-                         clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
-                    // VOP2_SDWA or VOPC_SDWA with sext modifier
-                    (ins IntInputMods:$src0_imodifiers, Src0RC:$src0,
-                         IntInputMods:$src1_imodifiers, Src1RC:$src1,
-                         clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                         src0_sel:$src0_sel, src1_sel:$src1_sel)
-                )
-             /* endif */)));
+               (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                    clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                    src0_sel:$src0_sel),
+            !if(!eq(NumSrcArgs, 2),
+               !if(!eq(DstVT.Size, 1),
+                  // VOPC_SDWA with modifiers
+                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                       Src1Mod:$src1_modifiers, Src1RC:$src1,
+                       clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel),
+                  // VOP2_SDWA or VOPC_SDWA with modifiers
+                  (ins Src0Mod:$src0_modifiers, Src0RC:$src0,
+                       Src1Mod:$src1_modifiers, Src1RC:$src1,
+                       clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                       src0_sel:$src0_sel, src1_sel:$src1_sel)),
+            (ins)/* endif */)));
 }
 
 // Outs for DPP and SDWA
@@ -1374,8 +930,8 @@ class getAsmSDWA <bit HasDst, int NumSrcArgs, bit HasFloatModifiers,
                        " vcc", // use vcc token as dst for VOPC instructioins
                        "$vdst"),
                     "");
-  string src0 = !if(HasFloatModifiers, "$src0_fmodifiers", "$src0_imodifiers");
-  string src1 = !if(HasFloatModifiers, "$src1_fmodifiers", "$src1_imodifiers");
+  string src0 = "$src0_modifiers";
+  string src1 = "$src1_modifiers";
   string args = !if(!eq(NumSrcArgs, 0),
                     "",
                     !if(!eq(NumSrcArgs, 1),
@@ -1414,6 +970,14 @@ class getHasExt <int NumSrcArgs, ValueType DstVT = i32, ValueType Src0VT = i32,
             );
 }
 
+class BitOr<bit a, bit b> {
+  bit ret = !if(a, 1, !if(b, 1, 0));
+}
+
+class BitAnd<bit a, bit b> {
+  bit ret = !if(a, !if(b, 1, 0), 0);
+}
+
 class VOPProfile <list<ValueType> _ArgVT> {
 
   field list<ValueType> ArgVT = _ArgVT;
@@ -1434,11 +998,36 @@ class VOPProfile <list<ValueType> _ArgVT> {
   field RegisterClass Src1DPP = getVregSrcForVT<Src1VT>.ret;
   field RegisterClass Src0SDWA = getVregSrcForVT<Src0VT>.ret;
   field RegisterClass Src1SDWA = getVregSrcForVT<Src1VT>.ret;
+  field Operand Src0Mod = getSrcMod<Src0VT>.ret;
+  field Operand Src1Mod = getSrcMod<Src1VT>.ret;
+  field Operand Src2Mod = getSrcMod<Src2VT>.ret;
 
   field bit HasDst = !if(!eq(DstVT.Value, untyped.Value), 0, 1);
   field bit HasDst32 = HasDst;
+  field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
   field int NumSrcArgs = getNumSrcArgs<Src0VT, Src1VT, Src2VT>.ret;
-  field bit HasModifiers = hasModifiers<Src0VT>.ret;
+  field bit HasSrc0 = !if(!eq(Src0VT.Value, untyped.Value), 0, 1);
+  field bit HasSrc1 = !if(!eq(Src1VT.Value, untyped.Value), 0, 1);
+  field bit HasSrc2 = !if(!eq(Src2VT.Value, untyped.Value), 0, 1);
+
+  // TODO: Modifiers logic is somewhat adhoc here, to be refined later
+  field bit HasModifiers = isFloatType<Src0VT>.ret;
+
+  field bit HasSrc0FloatMods = isFloatType<Src0VT>.ret;
+  field bit HasSrc1FloatMods = isFloatType<Src1VT>.ret;
+  field bit HasSrc2FloatMods = isFloatType<Src2VT>.ret;
+
+  field bit HasSrc0IntMods = isIntType<Src0VT>.ret;
+  field bit HasSrc1IntMods = isIntType<Src1VT>.ret;
+  field bit HasSrc2IntMods = isIntType<Src2VT>.ret;
+
+  field bit HasSrc0Mods = HasModifiers;
+  field bit HasSrc1Mods = !if(HasModifiers, BitOr<HasSrc1FloatMods, HasSrc1IntMods>.ret, 0);
+  field bit HasSrc2Mods = !if(HasModifiers, BitOr<HasSrc2FloatMods, HasSrc2IntMods>.ret, 0);
+
+  field bit HasOMod = HasModifiers;
+  field bit HasClamp = HasModifiers;
+  field bit HasSDWAClamp = HasSrc0;
 
   field bit HasExt = getHasExt<NumSrcArgs, DstVT, Src0VT, Src1VT>.ret;
 
@@ -1453,9 +1042,11 @@ class VOPProfile <list<ValueType> _ArgVT> {
 
   field dag Ins32 = getIns32<Src0RC32, Src1RC32, NumSrcArgs>.ret;
   field dag Ins64 = getIns64<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
-                             HasModifiers>.ret;
-  field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs, HasModifiers>.ret;
-  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs, HasModifiers, DstVT>.ret;
+                             HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+  field dag InsDPP = getInsDPP<Src0DPP, Src1DPP, NumSrcArgs,
+                               HasModifiers, Src0Mod, Src1Mod>.ret;
+  field dag InsSDWA = getInsSDWA<Src0SDWA, Src1SDWA, NumSrcArgs,
+                                 HasModifiers, Src0Mod, Src1Mod, DstVT>.ret;
 
   field string Asm32 = getAsm32<HasDst, NumSrcArgs, DstVT>.ret;
   field string Asm64 = getAsm64<HasDst, NumSrcArgs, HasModifiers, DstVT>.ret;
@@ -1467,14 +1058,13 @@ class VOP_NO_EXT <VOPProfile p> : VOPProfile <p.ArgVT> {
   let HasExt = 0;
 }
 
-// FIXME: I think these F16/I16 profiles will need to use f16/i16 types in order
-//        for the instruction patterns to work.
 def VOP_F16_F16 : VOPProfile <[f16, f16, untyped, untyped]>;
-def VOP_F16_I16 : VOPProfile <[f16, i32, untyped, untyped]>;
-def VOP_I16_F16 : VOPProfile <[i32, f16, untyped, untyped]>;
+def VOP_F16_I16 : VOPProfile <[f16, i16, untyped, untyped]>;
+def VOP_I16_F16 : VOPProfile <[i16, f16, untyped, untyped]>;
 
 def VOP_F16_F16_F16 : VOPProfile <[f16, f16, f16, untyped]>;
-def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i32, untyped]>;
+def VOP_F16_F16_I16 : VOPProfile <[f16, f16, i16, untyped]>;
+def VOP_F16_F16_I32 : VOPProfile <[f16, f16, i32, untyped]>;
 def VOP_I16_I16_I16 : VOPProfile <[i32, i32, i32, untyped]>;
 
 def VOP_I16_I16_I16_I16 : VOPProfile <[i32, i32, i32, i32, untyped]>;
@@ -1492,6 +1082,7 @@ def VOP_I32_F32 : VOPProfile <[i32, f32, untyped, untyped]>;
 def VOP_I32_F64 : VOPProfile <[i32, f64, untyped, untyped]>;
 def VOP_I32_I32 : VOPProfile <[i32, i32, untyped, untyped]>;
 
+def VOP_F32_F32_F16 : VOPProfile <[f32, f32, f16, untyped]>;
 def VOP_F32_F32_F32 : VOPProfile <[f32, f32, f32, untyped]>;
 def VOP_F32_F32_I32 : VOPProfile <[f32, f32, i32, untyped]>;
 def VOP_F64_F64_F64 : VOPProfile <[f64, f64, f64, untyped]>;
@@ -1500,181 +1091,21 @@ def VOP_I32_F32_F32 : VOPProfile <[i32, f32, f32, untyped]>;
 def VOP_I32_F32_I32 : VOPProfile <[i32, f32, i32, untyped]>;
 def VOP_I32_I32_I32 : VOPProfile <[i32, i32, i32, untyped]>;
 
-// Write out to vcc or arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
-  let Asm32 = "$vdst, vcc, $src0, $src1";
-  let Asm64 = "$vdst, $sdst, $src0, $src1";
-  let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-}
-
-// Write out to vcc or arbitrary SGPR and read in from vcc or
-// arbitrary SGPR.
-def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
-  // We use VCSrc_32 to exclude literal constants, even though the
-  // encoding normally allows them since the implicit VCC use means
-  // using one would always violate the constant bus
-  // restriction. SGPRs are still allowed because it should
-  // technically be possible to use VCC again as src0.
-  let Src0RC32 = VCSrc_32;
-  let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
-  let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
-  let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-
-  // Suppress src2 implied by type since the 32-bit encoding uses an
-  // implicit VCC use.
-  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
-}
-
-// Read in from vcc or arbitrary SGPR
-def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
-  let Src0RC32 = VCSrc_32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
-  let Asm32 = "$vdst, $src0, $src1, vcc";
-  let Asm64 = "$vdst, $src0, $src1, $src2";
-  let Outs32 = (outs DstRC:$vdst);
-  let Outs64 = (outs DstRC:$vdst);
-
-  // Suppress src2 implied by type since the 32-bit encoding uses an
-  // implicit VCC use.
-  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
-}
-
-class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
-  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
-  let Asm64 = "$vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod";
-}
-
-def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
-  // FIXME: Hack to stop printing _e64
-  let DstRC = RegisterOperand<VGPR_32>;
-}
-
-def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
-  // FIXME: Hack to stop printing _e64
-  let DstRC = RegisterOperand<VReg_64>;
-}
-
-// VOPC instructions are a special case because for the 32-bit
-// encoding, we want to display the implicit vcc write as if it were
-// an explicit $dst.
-class VOPC_Profile<ValueType vt0, ValueType vt1 = vt0> : VOPProfile <[i1, vt0, vt1, untyped]> {
-  let Asm32 = "vcc, $src0, $src1";
-  // The destination for 32-bit encoding is implicit.
-  let HasDst32 = 0;
-  let Outs64 = (outs DstRC:$sdst);
-}
-
-class VOPC_Class_Profile<ValueType vt> : VOPC_Profile<vt, i32> {
-  let Ins64 = (ins FPInputMods:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
-  let Asm64 = "$sdst, $src0_modifiers, $src1";
-  let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC64:$src0,
-                     IntInputMods:$src1_imodifiers, Src1RC64:$src1,
-                     clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
-  let AsmSDWA = " vcc, $src0_fmodifiers, $src1_imodifiers$clamp $src0_sel $src1_sel";
-
-}
-
-def VOPC_I1_F32_F32 : VOPC_Profile<f32>;
-def VOPC_I1_F64_F64 : VOPC_Profile<f64>;
-def VOPC_I1_I32_I32 : VOPC_Profile<i32>;
-def VOPC_I1_I64_I64 : VOPC_Profile<i64>;
-
-def VOPC_I1_F32_I32 : VOPC_Class_Profile<f32>;
-def VOPC_I1_F64_I32 : VOPC_Class_Profile<f64>;
-
 def VOP_I64_I64_I32 : VOPProfile <[i64, i64, i32, untyped]>;
 def VOP_I64_I32_I64 : VOPProfile <[i64, i32, i64, untyped]>;
 def VOP_I64_I64_I64 : VOPProfile <[i64, i64, i64, untyped]>;
 
+def VOP_F16_F32_F16_F32 : VOPProfile <[f16, f32, f16, f32]>;
+def VOP_F32_F32_F16_F16 : VOPProfile <[f32, f32, f16, f16]>;
 def VOP_F32_F32_F32_F32 : VOPProfile <[f32, f32, f32, f32]>;
-def VOP_MADAK : VOPProfile <[f32, f32, f32, f32]> {
-  field dag Ins32 = (ins VCSrc_32:$src0, VGPR_32:$src1, u32kimm:$imm);
-  field string Asm32 = "$vdst, $src0, $src1, $imm";
-  field bit HasExt = 0;
-}
-def VOP_MADMK : VOPProfile <[f32, f32, f32, f32]> {
-  field dag Ins32 = (ins VCSrc_32:$src0, u32kimm:$imm, VGPR_32:$src1);
-  field string Asm32 = "$vdst, $src0, $imm, $src1";
-  field bit HasExt = 0;
-}
-def VOP_MAC : VOPProfile <[f32, f32, f32, f32]> {
-  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
-  let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
-                             HasModifiers>.ret;
-  let InsDPP = (ins FPInputMods:$src0_modifiers, Src0RC32:$src0,
-                    FPInputMods:$src1_modifiers, Src1RC32:$src1,
-                    VGPR_32:$src2, // stub argument
-                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
-                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
-  let InsSDWA = (ins FPInputMods:$src0_fmodifiers, Src0RC32:$src0,
-                     FPInputMods:$src1_fmodifiers, Src1RC32:$src1,
-                     VGPR_32:$src2, // stub argument
-                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
-                     src0_sel:$src0_sel, src1_sel:$src1_sel);
-  let Asm32 = getAsm32<1, 2, f32>.ret;
-  let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
-  let AsmDPP = getAsmDPP<1, 2, HasModifiers, f32>.ret;
-  let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, f32>.ret;
-}
 def VOP_F64_F64_F64_F64 : VOPProfile <[f64, f64, f64, f64]>;
 def VOP_I32_I32_I32_I32 : VOPProfile <[i32, i32, i32, i32]>;
 def VOP_I64_I32_I32_I64 : VOPProfile <[i64, i32, i32, i64]>;
+def VOP_I32_F32_I32_I32 : VOPProfile <[i32, f32, i32, i32]>;
+def VOP_I64_I64_I32_I64 : VOPProfile <[i64, i64, i32, i64]>;
+def VOP_V4I32_I64_I32_V4I32 : VOPProfile <[v4i32, i64, i32, v4i32]>;
 
-// This class is used only with VOPC instructions. Use $sdst for out operand
-class SIInstAlias <string asm, Instruction inst, VOPProfile p> :
-    InstAlias <asm, (inst)>, PredicateControl {
-
-  field bit isCompare;
-  field bit isCommutable;
-
-  let ResultInst =
-    !if (p.HasDst32,
-      !if (!eq(p.NumSrcArgs, 0),
-        // 1 dst, 0 src
-        (inst p.DstRC:$sdst),
-      !if (!eq(p.NumSrcArgs, 1),
-        // 1 dst, 1 src
-        (inst p.DstRC:$sdst, p.Src0RC32:$src0),
-      !if (!eq(p.NumSrcArgs, 2),
-        // 1 dst, 2 src
-        (inst p.DstRC:$sdst, p.Src0RC32:$src0, p.Src1RC32:$src1),
-      // else - unreachable
-        (inst)))),
-    // else
-      !if (!eq(p.NumSrcArgs, 2),
-        // 0 dst, 2 src
-        (inst p.Src0RC32:$src0, p.Src1RC32:$src1),
-      !if (!eq(p.NumSrcArgs, 1),
-        // 0 dst, 1 src
-        (inst p.Src0RC32:$src1),
-      // else
-        // 0 dst, 0 src
-        (inst))));
-}
-
-class SIInstAliasSI <string asm, string op_name, VOPProfile p> :
-  SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_si"), p> {
-  let AssemblerPredicate = SIAssemblerPredicate;
-}
-
-class SIInstAliasVI <string asm, string op_name, VOPProfile p> :
-  SIInstAlias <asm, !cast<Instruction>(op_name#"_e32_vi"), p> {
-  let AssemblerPredicates = [isVI];
-}
-
-multiclass SIInstAliasBuilder <string asm, VOPProfile p> {
-
-  def : SIInstAliasSI <asm, NAME, p>;
-
-  def : SIInstAliasVI <asm, NAME, p>;
-}
-
-class VOP <string opName> {
-  string OpName = opName;
-}
-
-class VOP2_REV <string revOp, bit isOrig> {
+class Commutable_REV <string revOp, bit isOrig> {
   string RevOp = revOp;
   bit IsOrig = isOrig;
 }
@@ -1684,832 +1115,6 @@ class AtomicNoRet <string noRetOp, bit isRet> {
   bit IsRet = isRet;
 }
 
-class VOP1_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
-  VOP1Common <outs, ins, "", pattern>,
-  VOP <opName>,
-  SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
-  MnemonicAlias<opName#"_e32", opName> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-
-  field bits<8> vdst;
-  field bits<9> src0;
-}
-
-class VOP1_Real_si <string opName, vop1 op, dag outs, dag ins, string asm> :
-  VOP1<op.SI, outs, ins, asm, []>,
-  SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
-  let AssemblerPredicate = SIAssemblerPredicate;
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP1_Real_vi <string opName, vop1 op, dag outs, dag ins, string asm> :
-  VOP1<op.VI, outs, ins, asm, []>,
-  SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP1_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
-                   string asm = opName#p.Asm32> {
-  def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
-
-  def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
-
-  def _vi : VOP1_Real_vi <opName, op, p.Outs, p.Ins32, asm>;
-
-}
-
-class VOP1_DPP <vop1 op, string opName, VOPProfile p> :
-  VOP1_DPPe <op.VI>,
-  VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
-  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
-  let DecoderNamespace = "DPP";
-  let DisableDecoder = DisableVIDecoder;
-  let src0_modifiers = !if(p.HasModifiers, ?, 0);
-  let src1_modifiers = 0;
-}
-
-class SDWADisableFields <VOPProfile p> {
-  bits<8> src0 = !if(!eq(p.NumSrcArgs, 0), 0, ?);
-  bits<3> src0_sel = !if(!eq(p.NumSrcArgs, 0), 6, ?);
-  bits<2> src0_fmodifiers = !if(!eq(p.NumSrcArgs, 0),
-                                0,
-                                !if(p.HasModifiers, ?, 0));
-  bits<1> src0_imodifiers = !if(!eq(p.NumSrcArgs, 0),
-                                0,
-                                !if(p.HasModifiers, 0, ?));
-  bits<3> src1_sel = !if(!eq(p.NumSrcArgs, 0), 6,
-                         !if(!eq(p.NumSrcArgs, 1), 6,
-                             ?));
-  bits<2> src1_fmodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
-                                !if(!eq(p.NumSrcArgs, 1), 0,
-                                    !if(p.HasModifiers, ?, 0)));
-  bits<1> src1_imodifiers = !if(!eq(p.NumSrcArgs, 0), 0,
-                                !if(!eq(p.NumSrcArgs, 1), 0,
-                                    !if(p.HasModifiers, 0, ?)));
-  bits<3> dst_sel = !if(p.HasDst, ?, 6);
-  bits<2> dst_unused = !if(p.HasDst, ?, 2);
-  bits<1> clamp = !if(!eq(p.NumSrcArgs, 0), 0, ?);
-}
-
-class VOP1_SDWA <vop1 op, string opName, VOPProfile p> :
-  VOP1_SDWAe <op.VI>,
-  VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
-  SDWADisableFields <p> {
-  let AsmMatchConverter = "cvtSdwaVOP1";
-  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
-  let DecoderNamespace = "SDWA";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP1SI_m <vop1 op, string opName, VOPProfile p, list<dag> pattern,
-                     string asm = opName#p.Asm32> {
-
-  def "" : VOP1_Pseudo <p.Outs, p.Ins32, pattern, opName>;
-
-  def _si : VOP1_Real_si <opName, op, p.Outs, p.Ins32, asm>;
-}
-
-class VOP2_Pseudo <dag outs, dag ins, list<dag> pattern, string opName> :
-  VOP2Common <outs, ins, "", pattern>,
-  VOP <opName>,
-  SIMCInstr<opName#"_e32", SIEncodingFamily.NONE>,
-  MnemonicAlias<opName#"_e32", opName> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class VOP2_Real_si <string opName, vop2 op, dag outs, dag ins, string asm> :
-  VOP2 <op.SI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP2_Real_vi <string opName, vop2 op, dag outs, dag ins, string asm> :
-  VOP2 <op.VI, outs, ins, opName#asm, []>,
-  SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
-                     string revOp> {
-
-  def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
-           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
-  def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
-}
-
-multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
-                   string revOp> {
-
-  def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
-           VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
-
-  def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
-
-  def _vi : VOP2_Real_vi <opName, op, p.Outs32, p.Ins32, p.Asm32>;
-
-}
-
-class VOP2_DPP <vop2 op, string opName, VOPProfile p> :
-  VOP2_DPPe <op.VI>,
-  VOP_DPP <p.OutsDPP, p.InsDPP, opName#p.AsmDPP, [], p.HasModifiers> {
-  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
-  let DecoderNamespace = "DPP";
-  let DisableDecoder = DisableVIDecoder;
-  let src0_modifiers = !if(p.HasModifiers, ?, 0);
-  let src1_modifiers = !if(p.HasModifiers, ?, 0);
-}
-
-class VOP2_SDWA <vop2 op, string opName, VOPProfile p> :
-  VOP2_SDWAe <op.VI>,
-  VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
-  SDWADisableFields <p> {
-  let AsmMatchConverter = "cvtSdwaVOP2";
-  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
-  let DecoderNamespace = "SDWA";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3DisableFields <bit HasSrc1, bit HasSrc2, bit HasModifiers> {
-
-  bits<2> src0_modifiers = !if(HasModifiers, ?, 0);
-  bits<2> src1_modifiers = !if(HasModifiers, !if(HasSrc1, ?, 0), 0);
-  bits<2> src2_modifiers = !if(HasModifiers, !if(HasSrc2, ?, 0), 0);
-  bits<2> omod = !if(HasModifiers, ?, 0);
-  bits<1> clamp = !if(HasModifiers, ?, 0);
-  bits<9> src1 = !if(HasSrc1, ?, 0);
-  bits<9> src2 = !if(HasSrc2, ?, 0);
-}
-
-class VOP3DisableModFields <bit HasSrc0Mods,
-                            bit HasSrc1Mods = 0,
-                            bit HasSrc2Mods = 0,
-                            bit HasOutputMods = 0> {
-  bits<2> src0_modifiers = !if(HasSrc0Mods, ?, 0);
-  bits<2> src1_modifiers = !if(HasSrc1Mods, ?, 0);
-  bits<2> src2_modifiers = !if(HasSrc2Mods, ?, 0);
-  bits<2> omod = !if(HasOutputMods, ?, 0);
-  bits<1> clamp = !if(HasOutputMods, ?, 0);
-}
-
-class VOP3_Pseudo <dag outs, dag ins, list<dag> pattern, string opName,
-                   bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, "", pattern, HasMods, VOP3Only>,
-  VOP <opName>,
-  SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
-  MnemonicAlias<opName#"_e64", opName> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-
-  field bit vdst;
-  field bit src0;
-}
-
-class VOP3_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
-                    bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3e <op>,
-  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
-                    bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3_C_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
-                     bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3ce <op>,
-  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3_C_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
-                      bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3ce_vi <op>,
-  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3b_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
-                     bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3be <op>,
-  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3b_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
-                     bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3be_vi <op>,
-  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-class VOP3e_Real_si <bits<9> op, dag outs, dag ins, string asm, string opName,
-                     bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3e <op>,
-  SIMCInstr<opName#"_e64", SIEncodingFamily.SI> {
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class VOP3e_Real_vi <bits<10> op, dag outs, dag ins, string asm, string opName,
-                     bit HasMods = 0, bit VOP3Only = 0> :
-  VOP3Common <outs, ins, asm, [], HasMods, VOP3Only>,
-  VOP3e_vi <op>,
-  SIMCInstr <opName#"_e64", SIEncodingFamily.VI> {
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOP3_m <vop op, dag outs, dag ins, string asm, list<dag> pattern,
-                   string opName, int NumSrcArgs, bit HasMods = 1, bit VOP3Only = 0> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName>;
-
-  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
-            VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
-                              !if(!eq(NumSrcArgs, 2), 0, 1),
-                              HasMods>;
-  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
-            VOP3DisableFields<!if(!eq(NumSrcArgs, 1), 0, 1),
-                              !if(!eq(NumSrcArgs, 2), 0, 1),
-                              HasMods>;
-}
-
-multiclass VOP3_1_m <vop op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName, bit HasMods = 1> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>;
-
-  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<0, 0, HasMods>;
-
-  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<0, 0, HasMods>;
-}
-
-multiclass VOP3SI_1_m <vop op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName, bit HasMods = 1> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>;
-
-  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<0, 0, HasMods>;
-  // No VI instruction. This class is for SI only.
-}
-
-multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName, string revOp,
-                     bit HasMods = 1> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
-  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<1, 0, HasMods>;
-
-  def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<1, 0, HasMods>;
-}
-
-multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName, string revOp,
-                     bit HasMods = 1> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
-
-  def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<1, 0, HasMods>;
-
-  // No VI instruction. This class is for SI only.
-}
-
-// Two operand VOP3b instruction that may have a 3rd SGPR bool operand
-// instead of an implicit VCC as in the VOP2b format.
-multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
-                        list<dag> pattern, string opName, string revOp,
-                        bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
-
-  def _si : VOP3b_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
-            VOP3DisableFields<1, useSrc2Input, HasMods>;
-
-  def _vi : VOP3b_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
-            VOP3DisableFields<1, useSrc2Input, HasMods>;
-}
-
-// Same as VOP3b_2_3_m but no 2nd destination (sdst), e.g. v_cndmask_b32.
-multiclass VOP3e_2_3_m <vop op, dag outs, dag ins, string asm,
-                        list<dag> pattern, string opName, string revOp,
-                        bit HasMods = 1, bit useSrc2Input = 0, bit VOP3Only = 0> {
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods, VOP3Only>;
-
-  def _si : VOP3e_Real_si <op.SI3, outs, ins, asm, opName, HasMods, VOP3Only>,
-            VOP3DisableFields<1, useSrc2Input, HasMods>;
-
-  def _vi : VOP3e_Real_vi <op.VI3, outs, ins, asm, opName, HasMods, VOP3Only>,
-            VOP3DisableFields<1, useSrc2Input, HasMods>;
-}
-
-multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
-                     list<dag> pattern, string opName,
-                     bit HasMods, bit defExec,
-                     string revOp, list<SchedReadWrite> sched> {
-
-  def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
-           VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
-    let Defs = !if(defExec, [EXEC], []);
-    let SchedRW = sched;
-  }
-
-  def _si : VOP3_C_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<1, 0, HasMods> {
-    let Defs = !if(defExec, [EXEC], []);
-    let SchedRW = sched;
-  }
-
-  def _vi : VOP3_C_Real_vi <op.VI3, outs, ins, asm, opName, HasMods>,
-            VOP3DisableFields<1, 0, HasMods> {
-    let Defs = !if(defExec, [EXEC], []);
-    let SchedRW = sched;
-  }
-}
-
-// An instruction that is VOP2 on SI and VOP3 on VI, no modifiers.
-multiclass VOP2SI_3VI_m <vop3 op, string opName, dag outs, dag ins,
-                         string asm, list<dag> pattern = []> {
-  let isPseudo = 1, isCodeGenOnly = 1 in {
-    def "" : VOPAnyCommon <outs, ins, "", pattern>,
-             SIMCInstr<opName, SIEncodingFamily.NONE>;
-  }
-
-  def _si : VOP2 <op.SI3{5-0}, outs, ins, asm, []>,
-            SIMCInstr <opName, SIEncodingFamily.SI> {
-            let AssemblerPredicates = [isSICI];
-            let DecoderNamespace = "SICI";
-            let DisableDecoder = DisableSIDecoder;
-  }
-
-  def _vi : VOP3Common <outs, ins, asm, []>,
-            VOP3e_vi <op.VI3>,
-            VOP3DisableFields <1, 0, 0>,
-            SIMCInstr <opName, SIEncodingFamily.VI> {
-            let AssemblerPredicates = [isVI];
-            let DecoderNamespace = "VI";
-            let DisableDecoder = DisableVIDecoder;
-  }
-}
-
-multiclass VOP1_Helper <vop1 op, string opName, VOPProfile p, list<dag> pat32,
-                        list<dag> pat64> {
-
-  defm _e32 : VOP1_m <op, opName, p, pat32>;
-
-  defm _e64 : VOP3_1_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
-                        p.HasModifiers>;
-
-  def _dpp : VOP1_DPP <op, opName, p>;
-
-  def _sdwa : VOP1_SDWA <op, opName, p>;
-}
-
-multiclass VOP1Inst <vop1 op, string opName, VOPProfile P,
-                     SDPatternOperator node = null_frag> : VOP1_Helper <
-  op, opName, P, [],
-  !if(P.HasModifiers,
-      [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
-                                i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
-      [(set P.DstVT:$vdst, (node P.Src0VT:$src0))])
->;
-
-multiclass VOP1InstSI <vop1 op, string opName, VOPProfile P,
-                       SDPatternOperator node = null_frag> {
-
-  defm _e32 : VOP1SI_m <op, opName, P, []>;
-
-  defm _e64 : VOP3SI_1_m <op, P.Outs, P.Ins64, opName#P.Asm64,
-    !if(P.HasModifiers,
-      [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
-                                i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
-      [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]),
-    opName, P.HasModifiers>;
-}
-
-multiclass VOP2_Helper <vop2 op, string opName, VOPProfile p, list<dag> pat32,
-                        list<dag> pat64, string revOp> {
-
-  defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
-
-  defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
-                        revOp, p.HasModifiers>;
-
-  def _dpp : VOP2_DPP <op, opName, p>;
-
-  def _sdwa : VOP2_SDWA <op, opName, p>;
-}
-
-multiclass VOP2Inst <vop2 op, string opName, VOPProfile P,
-                     SDPatternOperator node = null_frag,
-                     string revOp = opName> : VOP2_Helper <
-  op, opName, P, [],
-  !if(P.HasModifiers,
-      [(set P.DstVT:$vdst,
-           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                      i1:$clamp, i32:$omod)),
-                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-      [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp
->;
-
-multiclass VOP2InstSI <vop2 op, string opName, VOPProfile P,
-                       SDPatternOperator node = null_frag,
-                       string revOp = opName> {
-
-  defm _e32 : VOP2SI_m <op, opName, P, [], revOp>;
-
-  defm _e64 : VOP3SI_2_m <op, P.Outs, P.Ins64, opName#P.Asm64,
-    !if(P.HasModifiers,
-        [(set P.DstVT:$vdst,
-             (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                        i1:$clamp, i32:$omod)),
-                   (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-        [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-    opName, revOp, P.HasModifiers>;
-}
-
-multiclass VOP2e_Helper <vop2 op, string opName, VOPProfile p,
-                         list<dag> pat32, list<dag> pat64,
-                         string revOp, bit useSGPRInput> {
-
-  let SchedRW = [Write32Bit] in {
-    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
-      defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
-    }
-
-    defm _e64 : VOP3e_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
-                             opName, revOp, p.HasModifiers, useSGPRInput>;
-  }
-}
-
-multiclass VOP2eInst <vop2 op, string opName, VOPProfile P,
-                      SDPatternOperator node = null_frag,
-                      string revOp = opName> : VOP2e_Helper <
-  op, opName, P, [],
-  !if(P.HasModifiers,
-      [(set P.DstVT:$vdst,
-           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                      i1:$clamp, i32:$omod)),
-                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-      [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp, !eq(P.NumSrcArgs, 3)
->;
-
-multiclass VOP2b_Helper <vop2 op, string opName, VOPProfile p,
-                         list<dag> pat32, list<dag> pat64,
-                         string revOp, bit useSGPRInput> {
-
-  let SchedRW = [Write32Bit, WriteSALU] in {
-    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
-      defm _e32 : VOP2_m <op, opName, p, pat32, revOp>;
-    }
-
-    defm _e64 : VOP3b_2_3_m <op, p.Outs64, p.Ins64, opName#p.Asm64, pat64,
-                             opName, revOp, p.HasModifiers, useSGPRInput>;
-  }
-}
-
-multiclass VOP2bInst <vop2 op, string opName, VOPProfile P,
-                      SDPatternOperator node = null_frag,
-                      string revOp = opName> : VOP2b_Helper <
-  op, opName, P, [],
-  !if(P.HasModifiers,
-      [(set P.DstVT:$vdst,
-           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                      i1:$clamp, i32:$omod)),
-                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-      [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp, !eq(P.NumSrcArgs, 3)
->;
-
-// A VOP2 instruction that is VOP3-only on VI.
-multiclass VOP2_VI3_Helper <vop23 op, string opName, VOPProfile p,
-                            list<dag> pat32, list<dag> pat64, string revOp> {
-
-  defm _e32 : VOP2SI_m <op, opName, p, pat32, revOp>;
-
-  defm _e64 : VOP3_2_m <op, p.Outs, p.Ins64, opName#p.Asm64, pat64, opName,
-                        revOp, p.HasModifiers>;
-}
-
-multiclass VOP2_VI3_Inst <vop23 op, string opName, VOPProfile P,
-                          SDPatternOperator node = null_frag,
-                          string revOp = opName>
-                          : VOP2_VI3_Helper <
-  op, opName, P, [],
-  !if(P.HasModifiers,
-      [(set P.DstVT:$vdst,
-           (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                      i1:$clamp, i32:$omod)),
-                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-      [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]),
-  revOp
->;
-
-multiclass VOP2MADK <vop2 op, string opName, VOPProfile P, list<dag> pattern = []> {
-
-  def "" : VOP2_Pseudo <P.Outs, P.Ins32, pattern, opName>;
-
-let isCodeGenOnly = 0 in {
-  def _si : VOP2Common <P.Outs, P.Ins32,
-                        !strconcat(opName, P.Asm32), []>,
-            SIMCInstr <opName#"_e32", SIEncodingFamily.SI>,
-            VOP2_MADKe <op.SI> {
-            let AssemblerPredicates = [isSICI];
-            let DecoderNamespace = "SICI";
-            let DisableDecoder = DisableSIDecoder;
-            }
-
-  def _vi : VOP2Common <P.Outs, P.Ins32,
-                        !strconcat(opName, P.Asm32), []>,
-            SIMCInstr <opName#"_e32", SIEncodingFamily.VI>,
-            VOP2_MADKe <op.VI> {
-            let AssemblerPredicates = [isVI];
-            let DecoderNamespace = "VI";
-            let DisableDecoder = DisableVIDecoder;
-            }
-} // End isCodeGenOnly = 0
-}
-
-class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
-  VOPCCommon <ins, "", pattern>,
-  VOP <opName>,
-  SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class VOPC_SDWA <vopc op, string opName, bit DefExec, VOPProfile p> :
-    VOPC_SDWAe <op.VI>,
-    VOP_SDWA <p.OutsSDWA, p.InsSDWA, opName#p.AsmSDWA, [], p.HasModifiers>,
-    SDWADisableFields <p> {
-  let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
-  let hasSideEffects = DefExec;
-  let AsmMatchConverter = "cvtSdwaVOPC";
-  let AssemblerPredicates = !if(p.HasExt, [isVI], [DisableInst]);
-  let DecoderNamespace = "SDWA";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
-                   string opName, bit DefExec, VOPProfile p,
-                   list<SchedReadWrite> sched,
-                   string revOpName = "", string asm = opName#"_e32 "#op_asm,
-                   string alias_asm = opName#" "#op_asm> {
-  def "" : VOPC_Pseudo <ins, pattern, opName>,
-           VOP2_REV<revOpName#"_e32", !eq(revOpName, opName)> {
-    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
-    let SchedRW = sched;
-    let isConvergent = DefExec;
-  }
-
-  let AssemblerPredicates = [isSICI] in {
-    def _si : VOPC<op.SI, ins, asm, []>,
-              SIMCInstr <opName#"_e32", SIEncodingFamily.SI> {
-      let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
-      let isConvergent = DefExec;
-      let SchedRW = sched;
-      let DecoderNamespace = "SICI";
-      let DisableDecoder = DisableSIDecoder;
-    }
-
-  } // End AssemblerPredicates = [isSICI]
-
-  let AssemblerPredicates = [isVI] in {
-    def _vi : VOPC<op.VI, ins, asm, []>,
-              SIMCInstr <opName#"_e32", SIEncodingFamily.VI> {
-      let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
-      let isConvergent = DefExec;
-      let SchedRW = sched;
-      let DecoderNamespace = "VI";
-      let DisableDecoder = DisableVIDecoder;
-    }
-
-  } // End AssemblerPredicates = [isVI]
-
-  defm : SIInstAliasBuilder<alias_asm, p>;
-}
-
-multiclass VOPC_Helper <vopc op, string opName, list<dag> pat32,
-                        list<dag> pat64, bit DefExec, string revOp,
-                        VOPProfile p, list<SchedReadWrite> sched> {
-  defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched,
-                      revOp>;
-
-  defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$sdst), p.Ins64, opName#p.Asm64, pat64,
-                        opName, p.HasModifiers, DefExec, revOp, sched>;
-
-  def _sdwa : VOPC_SDWA <op, opName, DefExec, p>;
-}
-
-// Special case for class instructions which only have modifiers on
-// the 1st source operand.
-multiclass VOPC_Class_Helper <vopc op, string opName, list<dag> pat32,
-                              list<dag> pat64, bit DefExec, string revOp,
-                              VOPProfile p, list<SchedReadWrite> sched> {
-  defm _e32 : VOPC_m <op, p.Ins32, p.Asm32, pat32, opName, DefExec, p, sched>;
-
-  defm _e64 : VOP3_C_m <op, (outs VOPDstS64:$sdst), p.Ins64, opName#p.Asm64, pat64,
-                        opName, p.HasModifiers, DefExec, revOp, sched>,
-                        VOP3DisableModFields<1, 0, 0>;
-
-  def _sdwa : VOPC_SDWA <op, opName, DefExec, p> {
-    let src1_fmodifiers = 0;
-    let src1_imodifiers = ?;
-  }
-}
-
-multiclass VOPCInst <vopc op, string opName,
-                     VOPProfile P, PatLeaf cond = COND_NULL,
-                     string revOp = opName,
-                     bit DefExec = 0,
-                     list<SchedReadWrite> sched = [Write32Bit]> :
-                     VOPC_Helper <
-  op, opName, [],
-  !if(P.HasModifiers,
-      [(set i1:$sdst,
-          (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                      i1:$clamp, i32:$omod)),
-                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
-                 cond))],
-      [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
-  DefExec, revOp, P, sched
->;
-
-multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
-                     bit DefExec = 0,
-                     list<SchedReadWrite> sched> : VOPC_Class_Helper <
-  op, opName, [],
-  !if(P.HasModifiers,
-      [(set i1:$sdst,
-          (AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
-      [(set i1:$sdst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
-  DefExec, opName, P, sched
->;
-
-
-multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOPC_I1_F32_F32, cond, revOp>;
-
-multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOPC_I1_F64_F64, cond, revOp, 0, [WriteDoubleAdd]>;
-
-multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOPC_I1_I32_I32, cond, revOp>;
-
-multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
-  VOPCInst <op, opName, VOPC_I1_I64_I64, cond, revOp, 0, [Write64Bit]>;
-
-
-multiclass VOPCX <vopc op, string opName, VOPProfile P,
-                  PatLeaf cond = COND_NULL,
-                  list<SchedReadWrite> sched,
-                  string revOp = "">
-  : VOPCInst <op, opName, P, cond, revOp, 1, sched>;
-
-multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOPC_I1_F32_F32, COND_NULL, [Write32Bit], revOp>;
-
-multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOPC_I1_F64_F64, COND_NULL, [WriteDoubleAdd], revOp>;
-
-multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOPC_I1_I32_I32, COND_NULL, [Write32Bit], revOp>;
-
-multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
-  VOPCX <op, opName, VOPC_I1_I64_I64, COND_NULL, [Write64Bit], revOp>;
-
-
-multiclass VOPC_CLASS_F32 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOPC_I1_F32_I32, 0, [Write32Bit]>;
-
-multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOPC_I1_F32_I32, 1, [Write32Bit]>;
-
-multiclass VOPC_CLASS_F64 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOPC_I1_F64_I32, 0, [WriteDoubleAdd]>;
-
-multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
-  VOPCClassInst <op, opName, VOPC_I1_F64_I32, 1, [WriteDoubleAdd]>;
-
-
-multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
-                        list<dag> pat, int NumSrcArgs, bit HasMods,
-                        bit VOP3Only = 0> : VOP3_m <
-    op, outs, ins, opName#" "#asm, pat, opName, NumSrcArgs, HasMods, VOP3Only
->;
-
-multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
-                     SDPatternOperator node = null_frag, bit VOP3Only = 0> :
-  VOP3_Helper <
-  op, opName, (outs P.DstRC.RegClass:$vdst), P.Ins64, P.Asm64,
-  !if(!eq(P.NumSrcArgs, 3),
-    !if(P.HasModifiers,
-        [(set P.DstVT:$vdst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod)),
-                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
-                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))],
-        [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1,
-                                  P.Src2VT:$src2))]),
-  !if(!eq(P.NumSrcArgs, 2),
-    !if(P.HasModifiers,
-        [(set P.DstVT:$vdst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod)),
-                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
-        [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))])
-  /* P.NumSrcArgs == 1 */,
-    !if(P.HasModifiers,
-        [(set P.DstVT:$vdst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod))))],
-        [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]))),
-  P.NumSrcArgs, P.HasModifiers, VOP3Only
->;
-
-// Special case for v_div_fmas_{f32|f64}, since it seems to be the
-// only VOP instruction that implicitly reads VCC.
-multiclass VOP3_VCC_Inst <vop3 op, string opName,
-                          VOPProfile P,
-                          SDPatternOperator node = null_frag> : VOP3_Helper <
-  op, opName,
-  (outs P.DstRC.RegClass:$vdst),
-  (ins FPInputMods:$src0_modifiers, P.Src0RC64:$src0,
-       FPInputMods:$src1_modifiers, P.Src1RC64:$src1,
-       FPInputMods:$src2_modifiers, P.Src2RC64:$src2,
-       clampmod:$clamp,
-       omod:$omod),
-  "$vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers"#"$clamp"#"$omod",
-  [(set P.DstVT:$vdst,
-            (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
-                                       i1:$clamp, i32:$omod)),
-                  (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
-                  (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
-                  (i1 VCC)))],
-  3, 1
->;
-
-multiclass VOP3bInst <vop op, string opName, VOPProfile P, list<dag> pattern = [], bit VOP3Only = 0> :
-  VOP3b_2_3_m <
-  op, P.Outs64, P.Ins64,
-  opName#" "#P.Asm64, pattern,
-  opName, "", 1, 1, VOP3Only
->;
-
-class Vop3ModPat<Instruction Inst, VOPProfile P, SDPatternOperator node> : Pat<
-  (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
-        (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
-        (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))),
-  (Inst i32:$src0_modifiers, P.Src0VT:$src0,
-        i32:$src1_modifiers, P.Src1VT:$src1,
-        i32:$src2_modifiers, P.Src2VT:$src2,
-        i1:$clamp,
-        i32:$omod)>;
-
 //===----------------------------------------------------------------------===//
 // Interpolation opcodes
 //===----------------------------------------------------------------------===//
@@ -2551,1052 +1156,6 @@ multiclass VINTRP_m <bits <2> op, dag outs, dag ins, string asm,
 }
 
 //===----------------------------------------------------------------------===//
-// Vector I/O classes
-//===----------------------------------------------------------------------===//
-
-class DS_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-  DS <outs, ins, "", pattern>,
-  SIMCInstr <opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class DS_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
-  DS <outs, ins, asm, []>,
-  DSe <op>,
-  SIMCInstr <opName, SIEncodingFamily.SI> {
-  let isCodeGenOnly = 0;
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace="SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class DS_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
-  DS <outs, ins, asm, []>,
-  DSe_vi <op>,
-  SIMCInstr <opName, SIEncodingFamily.VI> {
-  let isCodeGenOnly = 0;
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace="VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-class DS_Off16_Real_si <bits<8> op, string opName, dag outs, dag ins, string asm> :
-  DS_Real_si <op,opName, outs, ins, asm> {
-
-  // Single load interpret the 2 i8imm operands as a single i16 offset.
-  bits<16> offset;
-  let offset0 = offset{7-0};
-  let offset1 = offset{15-8};
-}
-
-class DS_Off16_Real_vi <bits<8> op, string opName, dag outs, dag ins, string asm> :
-  DS_Real_vi <op, opName, outs, ins, asm> {
-
-  // Single load interpret the 2 i8imm operands as a single i16 offset.
-  bits<16> offset;
-  let offset0 = offset{7-0};
-  let offset1 = offset{15-8};
-}
-
-multiclass DS_1A_RET_ <dsop op, string opName, RegisterClass rc,
-  dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds),
-  string asm = opName#" $vdst, $addr"#"$offset$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let data0 = 0, data1 = 0 in {
-    def _si : DS_Off16_Real_si <op.SI, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op.VI, opName, outs, ins, asm>;
-  }
-}
-
-// TODO: DS_1A_RET can be inherited from DS_1A_RET_ but its not working
-// for some reason. In fact we can remove this class if use dsop everywhere
-multiclass DS_1A_RET <bits<8> op, string opName, RegisterClass rc,
-  dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds),
-  string asm = opName#" $vdst, $addr"#"$offset$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let data0 = 0, data1 = 0 in {
-    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_1A_Off8_RET <bits<8> op, string opName, RegisterClass rc,
-  dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, offset0:$offset0, offset1:$offset1,
-                 gds:$gds),
-  string asm = opName#" $vdst, $addr"#"$offset0"#"$offset1$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let data0 = 0, data1 = 0, AsmMatchConverter = "cvtDSOffset01" in {
-    def _si : DS_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_1A1D_NORET <bits<8> op, string opName, RegisterClass rc,
-  dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
-  string asm = opName#" $addr, $data0"#"$offset$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>,
-           AtomicNoRet<opName, 0>;
-
-  let data1 = 0, vdst = 0 in {
-    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_1A_Off8_NORET <bits<8> op, string opName,
-  dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr,
-              offset0:$offset0, offset1:$offset1, gds:$gds),
-  string asm = opName#" $addr $offset0"#"$offset1$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let data0 = 0, data1 = 0, vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
-    def _si : DS_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_1A2D_Off8_NORET <bits<8> op, string opName, RegisterClass rc,
-  dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
-              offset0:$offset0, offset1:$offset1, gds:$gds),
-  string asm = opName#" $addr, $data0, $data1$offset0$offset1$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let vdst = 0, AsmMatchConverter = "cvtDSOffset01" in {
-    def _si : DS_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_1A1D_RET <bits<8> op, string opName, RegisterClass rc,
-                        string noRetOp = "",
-  dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset, gds:$gds),
-  string asm = opName#" $vdst, $addr, $data0"#"$offset$gds"> {
-
-  let hasPostISelHook = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, []>,
-             AtomicNoRet<noRetOp, 1>;
-
-    let data1 = 0 in {
-      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-    }
-  }
-}
-
-multiclass DS_1A1D_PERMUTE <bits<8> op, string opName, RegisterClass rc,
-                            SDPatternOperator node = null_frag,
-  dag outs = (outs rc:$vdst),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, offset:$offset),
-  string asm = opName#" $vdst, $addr, $data0"#"$offset"> {
-
-  let mayLoad = 0, mayStore = 0, isConvergent = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins,
-     [(set i32:$vdst,
-         (node (DS1Addr1Offset i32:$addr, i16:$offset), i32:$data0))]>;
-
-    let data1 = 0, gds = 0  in {
-      def "_vi" : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-    }
-  }
-}
-
-multiclass DS_1A2D_RET_m <bits<8> op, string opName, RegisterClass rc,
-                          string noRetOp = "", dag ins,
-  dag outs = (outs rc:$vdst),
-  string asm = opName#" $vdst, $addr, $data0, $data1"#"$offset"#"$gds"> {
-
-  let hasPostISelHook = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, []>,
-             AtomicNoRet<noRetOp, 1>;
-
-    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc,
-                        string noRetOp = "", RegisterClass src = rc> :
-  DS_1A2D_RET_m <op, asm, rc, noRetOp,
-                 (ins VGPR_32:$addr, src:$data0, src:$data1,
-                      offset:$offset, gds:$gds)
->;
-
-multiclass DS_1A2D_NORET <bits<8> op, string opName, RegisterClass rc,
-                          string noRetOp = opName,
-  dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr, rc:$data0, rc:$data1,
-                 offset:$offset, gds:$gds),
-  string asm = opName#" $addr, $data0, $data1"#"$offset"#"$gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>,
-           AtomicNoRet<noRetOp, 0>;
-
-  let vdst = 0 in {
-    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass DS_0A_RET <bits<8> op, string opName,
-  dag outs = (outs VGPR_32:$vdst),
-  dag ins = (ins offset:$offset, gds:$gds),
-  string asm = opName#" $vdst"#"$offset"#"$gds"> {
-
-  let mayLoad = 1, mayStore = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, []>;
-
-    let addr = 0, data0 = 0, data1 = 0 in {
-      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-    } // end addr = 0, data0 = 0, data1 = 0
-  } // end mayLoad = 1, mayStore = 1
-}
-
-multiclass DS_1A_RET_GDS <bits<8> op, string opName,
-  dag outs = (outs VGPR_32:$vdst),
-  dag ins = (ins VGPR_32:$addr, offset:$offset),
-  string asm = opName#" $vdst, $addr"#"$offset gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let data0 = 0, data1 = 0, gds = 1 in {
-    def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-  } // end data0 = 0, data1 = 0, gds = 1
-}
-
-multiclass DS_1A_GDS <bits<8> op, string opName,
-  dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr),
-  string asm = opName#" $addr gds"> {
-
-  def "" : DS_Pseudo <opName, outs, ins, []>;
-
-  let vdst = 0, data0 = 0, data1 = 0, offset0 = 0, offset1 = 0, gds = 1 in {
-    def _si : DS_Real_si <op, opName, outs, ins, asm>;
-    def _vi : DS_Real_vi <op, opName, outs, ins, asm>;
-  } // end vdst = 0, data = 0, data1 = 0, gds = 1
-}
-
-multiclass DS_1A <bits<8> op, string opName,
-  dag outs = (outs),
-  dag ins = (ins VGPR_32:$addr, offset:$offset, gds:$gds),
-  string asm = opName#" $addr"#"$offset"#"$gds"> {
-
-  let mayLoad = 1, mayStore = 1 in {
-    def "" : DS_Pseudo <opName, outs, ins, []>;
-
-    let vdst = 0, data0 = 0, data1 = 0 in {
-      def _si : DS_Off16_Real_si <op, opName, outs, ins, asm>;
-      def _vi : DS_Off16_Real_vi <op, opName, outs, ins, asm>;
-    } // let vdst = 0, data0 = 0, data1 = 0
-  } // end mayLoad = 1, mayStore = 1
-}
-
-//===----------------------------------------------------------------------===//
-// MTBUF classes
-//===----------------------------------------------------------------------===//
-
-class MTBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-  MTBUF <outs, ins, "", pattern>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class MTBUF_Real_si <bits<3> op, string opName, dag outs, dag ins,
-                    string asm> :
-  MTBUF <outs, ins, asm, []>,
-  MTBUFe <op>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let DecoderNamespace="SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class MTBUF_Real_vi <bits<4> op, string opName, dag outs, dag ins, string asm> :
-  MTBUF <outs, ins, asm, []>,
-  MTBUFe_vi <op>,
-  SIMCInstr <opName, SIEncodingFamily.VI> {
-  let DecoderNamespace="VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass MTBUF_m <bits<3> op, string opName, dag outs, dag ins, string asm,
-                    list<dag> pattern> {
-
-  def "" : MTBUF_Pseudo <opName, outs, ins, pattern>;
-
-  def _si : MTBUF_Real_si <op, opName, outs, ins, asm>;
-
-  def _vi : MTBUF_Real_vi <{0, op{2}, op{1}, op{0}}, opName, outs, ins, asm>;
-
-}
-
-let mayStore = 1, mayLoad = 0 in {
-
-multiclass MTBUF_Store_Helper <bits<3> op, string opName,
-                               RegisterClass regClass> : MTBUF_m <
-  op, opName, (outs),
-  (ins regClass:$vdata, u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc,
-   i1imm:$addr64, i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr,
-   SReg_128:$srsrc, i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
-  opName#" $vdata, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
-        #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
->;
-
-} // mayStore = 1, mayLoad = 0
-
-let mayLoad = 1, mayStore = 0 in {
-
-multiclass MTBUF_Load_Helper <bits<3> op, string opName,
-                              RegisterClass regClass> : MTBUF_m <
-  op, opName, (outs regClass:$dst),
-  (ins u16imm:$offset, i1imm:$offen, i1imm:$idxen, i1imm:$glc, i1imm:$addr64,
-       i8imm:$dfmt, i8imm:$nfmt, VGPR_32:$vaddr, SReg_128:$srsrc,
-       i1imm:$slc, i1imm:$tfe, SCSrc_32:$soffset),
-  opName#" $dst, $offset, $offen, $idxen, $glc, $addr64, $dfmt,"
-        #" $nfmt, $vaddr, $srsrc, $slc, $tfe, $soffset", []
->;
-
-} // mayLoad = 1, mayStore = 0
-
-//===----------------------------------------------------------------------===//
-// MUBUF classes
-//===----------------------------------------------------------------------===//
-
-class mubuf <bits<7> si, bits<7> vi = si> {
-  field bits<7> SI = si;
-  field bits<7> VI = vi;
-}
-
-let isCodeGenOnly = 0 in {
-
-class MUBUF_si <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-  MUBUF <outs, ins, asm, pattern>, MUBUFe <op> {
-  let lds  = 0;
-}
-
-} // End let isCodeGenOnly = 0
-
-class MUBUF_vi <bits<7> op, dag outs, dag ins, string asm, list<dag> pattern> :
-  MUBUF <outs, ins, asm, pattern>, MUBUFe_vi <op> {
-  let lds = 0;
-}
-
-class MUBUFAddr64Table <bit is_addr64, string suffix = ""> {
-  bit IsAddr64 = is_addr64;
-  string OpName = NAME # suffix;
-}
-
-class MUBUF_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-  MUBUF <outs, ins, "", pattern>,
-  SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-
-  // dummy fields, so that we can use let statements around multiclasses
-  bits<1> offen;
-  bits<1> idxen;
-  bits<8> vaddr;
-  bits<1> glc;
-  bits<1> slc;
-  bits<1> tfe;
-  bits<8> soffset;
-}
-
-class MUBUF_Real_si <mubuf op, string opName, dag outs, dag ins,
-                     string asm> :
-  MUBUF <outs, ins, asm, []>,
-  MUBUFe <op.SI>,
-  SIMCInstr<opName, SIEncodingFamily.SI> {
-  let lds = 0;
-  let AssemblerPredicate = SIAssemblerPredicate;
-  let DecoderNamespace="SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class MUBUF_Real_vi <mubuf op, string opName, dag outs, dag ins,
-                     string asm> :
-  MUBUF <outs, ins, asm, []>,
-  MUBUFe_vi <op.VI>,
-  SIMCInstr<opName, SIEncodingFamily.VI> {
-  let lds = 0;
-  let AssemblerPredicate = VIAssemblerPredicate;
-  let DecoderNamespace="VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass MUBUF_m <mubuf op, string opName, dag outs, dag ins, string asm,
-                    list<dag> pattern> {
-
-  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
-           MUBUFAddr64Table <0>;
-
-  let DisableWQM = 1 in {
-    def "_exact" : MUBUF_Pseudo <opName, outs, ins, []>;
-  }
-
-  let addr64 = 0, isCodeGenOnly = 0 in {
-    def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
-  }
-
-  def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
-}
-
-multiclass MUBUFAddr64_m <mubuf op, string opName, dag outs,
-                          dag ins, string asm, list<dag> pattern> {
-
-  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
-           MUBUFAddr64Table <1>;
-
-  let addr64 = 1, isCodeGenOnly = 0 in {
-    def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
-  }
-
-  // There is no VI version. If the pseudo is selected, it should be lowered
-  // for VI appropriately.
-}
-
-multiclass MUBUFAtomicOffset_m <mubuf op, string opName, dag outs, dag ins,
-                                string asm, list<dag> pattern, bit is_return> {
-
-  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
-           MUBUFAddr64Table <0, !if(is_return, "_RTN", "")>,
-           AtomicNoRet<NAME#"_OFFSET", is_return>;
-
-  let offen = 0, idxen = 0, tfe = 0, vaddr = 0 in {
-    let addr64 = 0 in {
-      def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
-    }
-
-    def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass MUBUFAtomicAddr64_m <mubuf op, string opName, dag outs, dag ins,
-                                string asm, list<dag> pattern, bit is_return> {
-
-  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
-           MUBUFAddr64Table <1, !if(is_return, "_RTN", "")>,
-           AtomicNoRet<NAME#"_ADDR64", is_return>;
-
-  let offen = 0, idxen = 0, addr64 = 1, tfe = 0 in {
-    def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
-  }
-
-  // There is no VI version. If the pseudo is selected, it should be lowered
-  // for VI appropriately.
-}
-
-multiclass MUBUFAtomicOther_m <mubuf op, string opName, dag outs, dag ins,
-                               string asm, list<dag> pattern, bit is_return> {
-
-  def "" : MUBUF_Pseudo <opName, outs, ins, pattern>,
-           AtomicNoRet<opName, is_return>;
-
-  let tfe = 0 in {
-    let addr64 = 0 in {
-      def _si : MUBUF_Real_si <op, opName, outs, ins, asm>;
-    }
-
-    def _vi : MUBUF_Real_vi <op, opName, outs, ins, asm>;
-  }
-}
-
-multiclass MUBUF_Atomic <mubuf op, string name, RegisterClass rc,
-                         ValueType vt, SDPatternOperator atomic> {
-
-  let mayStore = 1, mayLoad = 1, hasPostISelHook = 1, hasSideEffects = 1,
-      DisableWQM = 1 in {
-
-    // No return variants
-    let glc = 0, AsmMatchConverter = "cvtMubufAtomic" in {
-
-      defm _ADDR64 : MUBUFAtomicAddr64_m <
-        op, name#"_addr64", (outs),
-        (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
-             SCSrc_32:$soffset, offset:$offset, slc:$slc),
-        name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$slc", [], 0
-      >;
-
-      defm _OFFSET : MUBUFAtomicOffset_m <
-        op, name#"_offset", (outs),
-        (ins rc:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset, offset:$offset,
-             slc:$slc),
-        name#" $vdata, off, $srsrc, $soffset$offset$slc", [], 0
-      >;
-
-      let offen = 1, idxen = 0 in {
-        defm _OFFEN : MUBUFAtomicOther_m <
-          op, name#"_offen", (outs),
-          (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$slc", [], 0
-        >;
-      }
-
-      let offen = 0, idxen = 1 in {
-        defm _IDXEN : MUBUFAtomicOther_m <
-          op, name#"_idxen", (outs),
-          (ins rc:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$slc", [], 0
-        >;
-      }
-
-      let offen = 1, idxen = 1 in {
-        defm _BOTHEN : MUBUFAtomicOther_m <
-          op, name#"_bothen", (outs),
-          (ins rc:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$slc",
-          [], 0
-        >;
-      }
-    } // glc = 0
-
-    // Variant that return values
-    let glc = 1, Constraints = "$vdata = $vdata_in",
-        AsmMatchConverter = "cvtMubufAtomicReturn",
-        DisableEncoding = "$vdata_in"  in {
-
-      defm _RTN_ADDR64 : MUBUFAtomicAddr64_m <
-        op, name#"_rtn_addr64", (outs rc:$vdata),
-        (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc,
-             SCSrc_32:$soffset, offset:$offset, slc:$slc),
-        name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset glc$slc",
-        [(set vt:$vdata,
-         (atomic (MUBUFAddr64Atomic v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-	                            i16:$offset, i1:$slc), vt:$vdata_in))], 1
-      >;
-
-      defm _RTN_OFFSET : MUBUFAtomicOffset_m <
-        op, name#"_rtn_offset", (outs rc:$vdata),
-        (ins rc:$vdata_in, SReg_128:$srsrc, SCSrc_32:$soffset,
-             offset:$offset, slc:$slc),
-        name#" $vdata, off, $srsrc, $soffset$offset glc$slc",
-        [(set vt:$vdata,
-         (atomic (MUBUFOffsetAtomic v4i32:$srsrc, i32:$soffset, i16:$offset,
-                                    i1:$slc), vt:$vdata_in))], 1
-      >;
-
-      let offen = 1, idxen = 0 in {
-        defm _RTN_OFFEN : MUBUFAtomicOther_m <
-          op, name#"_rtn_offen", (outs rc:$vdata),
-          (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset offen$offset glc$slc",
-          [], 1
-        >;
-      }
-
-      let offen = 0, idxen = 1 in {
-        defm _RTN_IDXEN : MUBUFAtomicOther_m <
-          op, name#"_rtn_idxen", (outs rc:$vdata),
-          (ins rc:$vdata_in, VGPR_32:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset glc$slc",
-          [], 1
-        >;
-      }
-
-      let offen = 1, idxen = 1 in {
-        defm _RTN_BOTHEN : MUBUFAtomicOther_m <
-          op, name#"_rtn_bothen", (outs rc:$vdata),
-          (ins rc:$vdata_in, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                offset:$offset, slc:$slc),
-          name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset glc$slc",
-          [], 1
-        >;
-      }
-    } // glc = 1
-
-  } // mayStore = 1, mayLoad = 1, hasPostISelHook = 1
-}
-
-// FIXME: tfe can't be an operand because it requires a separate
-// opcode because it needs an N+1 register class dest register.
-multiclass MUBUF_Load_Helper <mubuf op, string name, RegisterClass regClass,
-                              ValueType load_vt = i32,
-                              SDPatternOperator ld = null_frag> {
-
-  let mayLoad = 1, mayStore = 0 in {
-    let offen = 0, idxen = 0, vaddr = 0 in {
-      defm _OFFSET : MUBUF_m <op, name#"_offset", (outs regClass:$vdata),
-                           (ins SReg_128:$srsrc, SCSrc_32:$soffset,
-                           offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe",
-                           [(set load_vt:$vdata, (ld (MUBUFOffset v4i32:$srsrc,
-                                                     i32:$soffset, i16:$offset,
-                                                     i1:$glc, i1:$slc, i1:$tfe)))]>;
-    }
-
-    let offen = 1, idxen = 0  in {
-      defm _OFFEN  : MUBUF_m <op, name#"_offen", (outs regClass:$vdata),
-                           (ins VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, offset:$offset, glc:$glc, slc:$slc,
-                           tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset offen$offset$glc$slc$tfe", []>;
-    }
-
-    let offen = 0, idxen = 1 in {
-      defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs regClass:$vdata),
-                           (ins VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, offset:$offset, glc:$glc,
-                           slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>;
-    }
-
-    let offen = 1, idxen = 1 in {
-      defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs regClass:$vdata),
-                           (ins VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                           offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>;
-    }
-
-    let offen = 0, idxen = 0 in {
-      defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs regClass:$vdata),
-                           (ins VReg_64:$vaddr, SReg_128:$srsrc,
-                                SCSrc_32:$soffset, offset:$offset,
-				glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset addr64$offset$glc$slc$tfe",
-                           [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc,
-                                                  i64:$vaddr, i32:$soffset,
-                                                  i16:$offset, i1:$glc, i1:$slc,
-						  i1:$tfe)))]>;
-    }
-  }
-}
-
-multiclass MUBUF_Store_Helper <mubuf op, string name, RegisterClass vdataClass,
-                          ValueType store_vt = i32, SDPatternOperator st = null_frag> {
-  let mayLoad = 0, mayStore = 1 in {
-    let offen = 0, idxen = 0, vaddr = 0 in {
-      defm _OFFSET : MUBUF_m <op, name#"_offset",(outs),
-                              (ins vdataClass:$vdata, SReg_128:$srsrc, SCSrc_32:$soffset,
-                              offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                              name#" $vdata, off, $srsrc, $soffset$offset$glc$slc$tfe",
-                              [(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
-                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))]>;
-    } // offen = 0, idxen = 0, vaddr = 0
-
-    let offen = 1, idxen = 0  in {
-      defm _OFFEN : MUBUF_m <op, name#"_offen", (outs),
-                             (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
-                              SCSrc_32:$soffset, offset:$offset, glc:$glc,
-                              slc:$slc, tfe:$tfe),
-                             name#" $vdata, $vaddr, $srsrc, $soffset offen"#
-                             "$offset$glc$slc$tfe", []>;
-    } // end offen = 1, idxen = 0
-
-    let offen = 0, idxen = 1 in {
-      defm _IDXEN  : MUBUF_m <op, name#"_idxen", (outs),
-                           (ins vdataClass:$vdata, VGPR_32:$vaddr, SReg_128:$srsrc,
-                           SCSrc_32:$soffset, offset:$offset, glc:$glc,
-                           slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen$offset$glc$slc$tfe", []>;
-    }
-
-    let offen = 1, idxen = 1 in {
-      defm _BOTHEN : MUBUF_m <op, name#"_bothen", (outs),
-                           (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc, SCSrc_32:$soffset,
-                           offset:$offset, glc:$glc, slc:$slc, tfe:$tfe),
-                           name#" $vdata, $vaddr, $srsrc, $soffset idxen offen$offset$glc$slc$tfe", []>;
-    }
-
-    let offen = 0, idxen = 0 in {
-      defm _ADDR64 : MUBUFAddr64_m <op, name#"_addr64", (outs),
-                                    (ins vdataClass:$vdata, VReg_64:$vaddr, SReg_128:$srsrc,
-                                         SCSrc_32:$soffset,
-                                         offset:$offset, glc:$glc, slc:$slc,
-                                         tfe:$tfe),
-                                    name#" $vdata, $vaddr, $srsrc, $soffset addr64"#
-                                         "$offset$glc$slc$tfe",
-                                    [(st store_vt:$vdata,
-                                      (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr,
-                                                   i32:$soffset, i16:$offset,
-                                                   i1:$glc, i1:$slc, i1:$tfe))]>;
-    }
-  } // End mayLoad = 0, mayStore = 1
-}
-
-// For cache invalidation instructions.
-multiclass MUBUF_Invalidate <mubuf op, string opName, SDPatternOperator node> {
-  let hasSideEffects = 1, mayStore = 1, AsmMatchConverter = "" in {
-    def "" : MUBUF_Pseudo <opName, (outs), (ins), [(node)]>;
-
-    // Set everything to 0.
-    let offset = 0, offen = 0, idxen = 0, glc = 0, vaddr = 0,
-        vdata = 0, srsrc = 0, slc = 0, tfe = 0, soffset = 0 in {
-      let addr64 = 0 in {
-        def _si : MUBUF_Real_si <op, opName, (outs), (ins), opName>;
-      }
-
-      def _vi : MUBUF_Real_vi <op, opName, (outs), (ins), opName>;
-    }
-  } // End hasSideEffects = 1, mayStore = 1, AsmMatchConverter = ""
-}
-
-//===----------------------------------------------------------------------===//
-// FLAT classes
-//===----------------------------------------------------------------------===//
-
-class flat <bits<7> ci, bits<7> vi = ci> {
-  field bits<7> CI = ci;
-  field bits<7> VI = vi;
-}
-
-class FLAT_Pseudo <string opName, dag outs, dag ins, list<dag> pattern> :
-     FLAT <0, outs, ins, "", pattern>,
-      SIMCInstr<opName, SIEncodingFamily.NONE> {
-  let isPseudo = 1;
-  let isCodeGenOnly = 1;
-}
-
-class FLAT_Real_ci <bits<7> op, string opName, dag outs, dag ins, string asm> :
-    FLAT <op, outs, ins, asm, []>,
-    SIMCInstr<opName, SIEncodingFamily.SI> {
-  let AssemblerPredicate = isCIOnly;
-  let DecoderNamespace="CI";
-}
-
-class FLAT_Real_vi <bits<7> op, string opName, dag outs, dag ins, string asm> :
-    FLAT <op, outs, ins, asm, []>,
-    SIMCInstr<opName, SIEncodingFamily.VI> {
-  let AssemblerPredicate = VIAssemblerPredicate;
-  let DecoderNamespace="VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass FLAT_AtomicRet_m <flat op, dag outs, dag ins, string asm,
-                   list<dag> pattern> {
-  def "" : FLAT_Pseudo <NAME#"_RTN", outs, ins, pattern>,
-               AtomicNoRet <NAME, 1>;
-
-  def _ci : FLAT_Real_ci <op.CI, NAME#"_RTN", outs, ins, asm>;
-
-  def _vi : FLAT_Real_vi <op.VI, NAME#"_RTN", outs, ins, asm>;
-}
-
-multiclass FLAT_Load_Helper <flat op, string asm_name,
-    RegisterClass regClass,
-    dag outs = (outs regClass:$vdst),
-    dag ins = (ins VReg_64:$addr, glc:$glc, slc:$slc, tfe:$tfe),
-    string asm = asm_name#" $vdst, $addr$glc$slc$tfe"> {
-
-  let data = 0, mayLoad = 1 in {
-
-    def "" : FLAT_Pseudo <NAME, outs, ins, []>;
-
-    def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
-
-    def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
-  }
-}
-
-multiclass FLAT_Store_Helper <flat op, string asm_name,
-    RegisterClass vdataClass,
-    dag outs = (outs),
-    dag ins = (ins VReg_64:$addr, vdataClass:$data, glc:$glc,
-                   slc:$slc, tfe:$tfe),
-    string asm = asm_name#" $addr, $data$glc$slc$tfe"> {
-
-  let mayLoad = 0, mayStore = 1, vdst = 0 in {
-
-    def "" : FLAT_Pseudo <NAME, outs, ins, []>;
-
-    def _ci : FLAT_Real_ci <op.CI, NAME, outs, ins, asm>;
-
-    def _vi : FLAT_Real_vi <op.VI, NAME, outs, ins, asm>;
-  }
-}
-
-multiclass FLAT_ATOMIC <flat op, string asm_name, RegisterClass vdst_rc,
-    ValueType vt, SDPatternOperator atomic = null_frag,
-    ValueType data_vt = vt,
-    RegisterClass data_rc = vdst_rc,
-    string asm_noret = asm_name#" $addr, $data"#"$slc"#"$tfe"> {
-
-  let mayLoad = 1, mayStore = 1, glc = 0, vdst = 0 in {
-    def "" : FLAT_Pseudo <NAME, (outs),
-                          (ins VReg_64:$addr, data_rc:$data,
-                               slc:$slc, tfe:$tfe), []>,
-             AtomicNoRet <NAME, 0>;
-
-    def _ci : FLAT_Real_ci <op.CI, NAME, (outs),
-                            (ins VReg_64:$addr, data_rc:$data,
-                                 slc:$slc, tfe:$tfe),
-                            asm_noret>;
-
-    def _vi : FLAT_Real_vi <op.VI, NAME, (outs),
-                            (ins VReg_64:$addr, data_rc:$data,
-                                 slc:$slc, tfe:$tfe),
-                            asm_noret>;
-  }
-
-  let glc = 1, hasPostISelHook = 1 in {
-    defm _RTN : FLAT_AtomicRet_m <
-      op, (outs vdst_rc:$vdst),
-      (ins VReg_64:$addr, data_rc:$data, slc:$slc, tfe:$tfe),
-      asm_name#" $vdst, $addr, $data glc$slc$tfe",
-      [(set vt:$vdst,
-         (atomic (FLATAtomic i64:$addr, i1:$slc, i1:$tfe), data_vt:$data))]
-    >;
-  }
-}
-
-class MIMG_Mask <string op, int channels> {
-  string Op = op;
-  int Channels = channels;
-}
-
-class mimg <bits<7> si, bits<7> vi = si> {
-  field bits<7> SI = si;
-  field bits<7> VI = vi;
-}
-
-class MIMG_Helper <dag outs, dag ins, string asm,
-                   string dns=""> : MIMG<outs, ins, asm,[]> {
-  let mayLoad = 1;
-  let mayStore = 0;
-  let hasPostISelHook = 1;
-  let DecoderNamespace = dns;
-  let isAsmParserOnly = !if(!eq(dns,""), 1, 0);
-  let AsmMatchConverter = "cvtMIMG";
-}
-
-class MIMG_NoSampler_Helper <bits<7> op, string asm,
-                             RegisterClass dst_rc,
-                             RegisterClass addr_rc,
-                             string dns=""> : MIMG_Helper <
-  (outs dst_rc:$vdata),
-  (ins addr_rc:$vaddr, SReg_256:$srsrc,
-       dmask:$dmask, unorm:$unorm, glc:$glc, slc:$slc,
-       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da",
-  dns>, MIMGe<op> {
-  let ssamp = 0;
-}
-
-multiclass MIMG_NoSampler_Src_Helper <bits<7> op, string asm,
-                                      RegisterClass dst_rc,
-                                      int channels> {
-  def _V1 : MIMG_NoSampler_Helper <op, asm, dst_rc, VGPR_32,
-                                   !if(!eq(channels, 1), "AMDGPU", "")>,
-            MIMG_Mask<asm#"_V1", channels>;
-  def _V2 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_64>,
-            MIMG_Mask<asm#"_V2", channels>;
-  def _V4 : MIMG_NoSampler_Helper <op, asm, dst_rc, VReg_128>,
-            MIMG_Mask<asm#"_V4", channels>;
-}
-
-multiclass MIMG_NoSampler <bits<7> op, string asm> {
-  defm _V1 : MIMG_NoSampler_Src_Helper <op, asm, VGPR_32, 1>;
-  defm _V2 : MIMG_NoSampler_Src_Helper <op, asm, VReg_64, 2>;
-  defm _V3 : MIMG_NoSampler_Src_Helper <op, asm, VReg_96, 3>;
-  defm _V4 : MIMG_NoSampler_Src_Helper <op, asm, VReg_128, 4>;
-}
-
-class MIMG_Store_Helper <bits<7> op, string asm,
-                         RegisterClass data_rc,
-                         RegisterClass addr_rc> : MIMG_Helper <
-  (outs),
-  (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
-       dmask:$dmask, unorm:$unorm, glc:$glc, slc:$slc,
-       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-  asm#" $vdata, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
-     >, MIMGe<op> {
-  let ssamp = 0;
-  let mayLoad = 1; // TableGen requires this for matching with the intrinsics
-  let mayStore = 1;
-  let hasSideEffects = 1;
-  let hasPostISelHook = 0;
-  let DisableWQM = 1;
-}
-
-multiclass MIMG_Store_Addr_Helper <bits<7> op, string asm,
-                                  RegisterClass data_rc,
-                                  int channels> {
-  def _V1 : MIMG_Store_Helper <op, asm, data_rc, VGPR_32>,
-            MIMG_Mask<asm#"_V1", channels>;
-  def _V2 : MIMG_Store_Helper <op, asm, data_rc, VReg_64>,
-            MIMG_Mask<asm#"_V2", channels>;
-  def _V4 : MIMG_Store_Helper <op, asm, data_rc, VReg_128>,
-            MIMG_Mask<asm#"_V4", channels>;
-}
-
-multiclass MIMG_Store <bits<7> op, string asm> {
-  defm _V1 : MIMG_Store_Addr_Helper <op, asm, VGPR_32, 1>;
-  defm _V2 : MIMG_Store_Addr_Helper <op, asm, VReg_64, 2>;
-  defm _V3 : MIMG_Store_Addr_Helper <op, asm, VReg_96, 3>;
-  defm _V4 : MIMG_Store_Addr_Helper <op, asm, VReg_128, 4>;
-}
-
-class MIMG_Atomic_Helper <string asm, RegisterClass data_rc,
-                          RegisterClass addr_rc> : MIMG_Helper <
-    (outs data_rc:$vdst),
-    (ins data_rc:$vdata, addr_rc:$vaddr, SReg_256:$srsrc,
-         dmask:$dmask, unorm:$unorm, glc:$glc, slc:$slc,
-         r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-    asm#" $vdst, $vaddr, $srsrc$dmask$unorm$glc$slc$r128$tfe$lwe$da"
-  > {
-  let mayStore = 1;
-  let hasSideEffects = 1;
-  let hasPostISelHook = 0;
-  let DisableWQM = 1;
-  let Constraints = "$vdst = $vdata";
-  let AsmMatchConverter = "cvtMIMGAtomic";
-}
-
-class MIMG_Atomic_Real_si<mimg op, string name, string asm,
-  RegisterClass data_rc, RegisterClass addr_rc> :
-  MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
-  SIMCInstr<name, SIEncodingFamily.SI>,
-  MIMGe<op.SI> {
-  let isCodeGenOnly = 0;
-  let AssemblerPredicates = [isSICI];
-  let DecoderNamespace = "SICI";
-  let DisableDecoder = DisableSIDecoder;
-}
-
-class MIMG_Atomic_Real_vi<mimg op, string name, string asm,
-  RegisterClass data_rc, RegisterClass addr_rc> :
-  MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
-  SIMCInstr<name, SIEncodingFamily.VI>,
-  MIMGe<op.VI> {
-  let isCodeGenOnly = 0;
-  let AssemblerPredicates = [isVI];
-  let DecoderNamespace = "VI";
-  let DisableDecoder = DisableVIDecoder;
-}
-
-multiclass MIMG_Atomic_Helper_m <mimg op, string name, string asm,
-                                 RegisterClass data_rc, RegisterClass addr_rc> {
-  let isPseudo = 1, isCodeGenOnly = 1 in {
-    def "" : MIMG_Atomic_Helper<asm, data_rc, addr_rc>,
-             SIMCInstr<name, SIEncodingFamily.NONE>;
-  }
-
-  let ssamp = 0 in {
-    def _si : MIMG_Atomic_Real_si<op, name, asm, data_rc, addr_rc>;
-
-    def _vi : MIMG_Atomic_Real_vi<op, name, asm, data_rc, addr_rc>;
-  }
-}
-
-multiclass MIMG_Atomic <mimg op, string asm, RegisterClass data_rc = VGPR_32> {
-  defm _V1 : MIMG_Atomic_Helper_m <op, asm # "_V1", asm, data_rc, VGPR_32>;
-  defm _V2 : MIMG_Atomic_Helper_m <op, asm # "_V2", asm, data_rc, VReg_64>;
-  defm _V4 : MIMG_Atomic_Helper_m <op, asm # "_V3", asm, data_rc, VReg_128>;
-}
-
-class MIMG_Sampler_Helper <bits<7> op, string asm,
-                           RegisterClass dst_rc,
-                           RegisterClass src_rc,
-                           int wqm,
-                           string dns=""> : MIMG_Helper <
-  (outs dst_rc:$vdata),
-  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
-       dmask:$dmask, unorm:$unorm, glc:$glc, slc:$slc,
-       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da",
-  dns>, MIMGe<op> {
-  let WQM = wqm;
-}
-
-multiclass MIMG_Sampler_Src_Helper <bits<7> op, string asm,
-                                    RegisterClass dst_rc,
-                                    int channels, int wqm> {
-  def _V1 : MIMG_Sampler_Helper <op, asm, dst_rc, VGPR_32, wqm,
-                                 !if(!eq(channels, 1), "AMDGPU", "")>,
-            MIMG_Mask<asm#"_V1", channels>;
-  def _V2 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_64, wqm>,
-            MIMG_Mask<asm#"_V2", channels>;
-  def _V4 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_128, wqm>,
-            MIMG_Mask<asm#"_V4", channels>;
-  def _V8 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_256, wqm>,
-            MIMG_Mask<asm#"_V8", channels>;
-  def _V16 : MIMG_Sampler_Helper <op, asm, dst_rc, VReg_512, wqm>,
-            MIMG_Mask<asm#"_V16", channels>;
-}
-
-multiclass MIMG_Sampler <bits<7> op, string asm, int wqm=0> {
-  defm _V1 : MIMG_Sampler_Src_Helper<op, asm, VGPR_32, 1, wqm>;
-  defm _V2 : MIMG_Sampler_Src_Helper<op, asm, VReg_64, 2, wqm>;
-  defm _V3 : MIMG_Sampler_Src_Helper<op, asm, VReg_96, 3, wqm>;
-  defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4, wqm>;
-}
-
-multiclass MIMG_Sampler_WQM <bits<7> op, string asm> : MIMG_Sampler<op, asm, 1>;
-
-class MIMG_Gather_Helper <bits<7> op, string asm,
-                          RegisterClass dst_rc,
-                          RegisterClass src_rc, int wqm> : MIMG <
-  (outs dst_rc:$vdata),
-  (ins src_rc:$vaddr, SReg_256:$srsrc, SReg_128:$ssamp,
-       dmask:$dmask, unorm:$unorm, glc:$glc, slc:$slc,
-       r128:$r128, tfe:$tfe, lwe:$lwe, da:$da),
-  asm#" $vdata, $vaddr, $srsrc, $ssamp$dmask$unorm$glc$slc$r128$tfe$lwe$da",
-  []>, MIMGe<op> {
-  let mayLoad = 1;
-  let mayStore = 0;
-
-  // DMASK was repurposed for GATHER4. 4 components are always
-  // returned and DMASK works like a swizzle - it selects
-  // the component to fetch. The only useful DMASK values are
-  // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
-  // (red,red,red,red) etc.) The ISA document doesn't mention
-  // this.
-  // Therefore, disable all code which updates DMASK by setting this:
-  let Gather4 = 1;
-  let hasPostISelHook = 0;
-  let WQM = wqm;
-
-  let isAsmParserOnly = 1; // TBD: fix it later
-}
-
-multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm,
-                                    RegisterClass dst_rc,
-                                    int channels, int wqm> {
-  def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VGPR_32, wqm>,
-            MIMG_Mask<asm#"_V1", channels>;
-  def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64, wqm>,
-            MIMG_Mask<asm#"_V2", channels>;
-  def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128, wqm>,
-            MIMG_Mask<asm#"_V4", channels>;
-  def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256, wqm>,
-            MIMG_Mask<asm#"_V8", channels>;
-  def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512, wqm>,
-            MIMG_Mask<asm#"_V16", channels>;
-}
-
-multiclass MIMG_Gather <bits<7> op, string asm, int wqm=0> {
-  defm _V1 : MIMG_Gather_Src_Helper<op, asm, VGPR_32, 1, wqm>;
-  defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2, wqm>;
-  defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3, wqm>;
-  defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4, wqm>;
-}
-
-multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
-
-//===----------------------------------------------------------------------===//
 // Vector instruction mappings
 //===----------------------------------------------------------------------===//
 
@@ -3604,18 +1163,18 @@ multiclass MIMG_Gather_WQM <bits<7> op, string asm> : MIMG_Gather<op, asm, 1>;
 def getVOPe64 : InstrMapping {
   let FilterClass = "VOP";
   let RowFields = ["OpName"];
-  let ColFields = ["Size"];
-  let KeyCol = ["4"];
-  let ValueCols = [["8"]];
+  let ColFields = ["Size", "VOP3"];
+  let KeyCol = ["4", "0"];
+  let ValueCols = [["8", "1"]];
 }
 
 // Maps an opcode in e64 form to its e32 equivalent
 def getVOPe32 : InstrMapping {
   let FilterClass = "VOP";
   let RowFields = ["OpName"];
-  let ColFields = ["Size"];
-  let KeyCol = ["8"];
-  let ValueCols = [["4"]];
+  let ColFields = ["Size", "VOP3"];
+  let KeyCol = ["8", "1"];
+  let ValueCols = [["4", "0"]];
 }
 
 def getMaskedMIMGOp : InstrMapping {
@@ -3628,7 +1187,7 @@ def getMaskedMIMGOp : InstrMapping {
 
 // Maps an commuted opcode to its original version
 def getCommuteOrig : InstrMapping {
-  let FilterClass = "VOP2_REV";
+  let FilterClass = "Commutable_REV";
   let RowFields = ["RevOp"];
   let ColFields = ["IsOrig"];
   let KeyCol = ["0"];
@@ -3637,31 +1196,13 @@ def getCommuteOrig : InstrMapping {
 
 // Maps an original opcode to its commuted version
 def getCommuteRev : InstrMapping {
-  let FilterClass = "VOP2_REV";
+  let FilterClass = "Commutable_REV";
   let RowFields = ["RevOp"];
   let ColFields = ["IsOrig"];
   let KeyCol = ["1"];
   let ValueCols = [["0"]];
 }
 
-def getCommuteCmpOrig : InstrMapping {
-  let FilterClass = "VOP2_REV";
-  let RowFields = ["RevOp"];
-  let ColFields = ["IsOrig"];
-  let KeyCol = ["0"];
-  let ValueCols = [["1"]];
-}
-
-// Maps an original opcode to its commuted version
-def getCommuteCmpRev : InstrMapping {
-  let FilterClass = "VOP2_REV";
-  let RowFields = ["RevOp"];
-  let ColFields = ["IsOrig"];
-  let KeyCol = ["1"];
-  let ValueCols = [["0"]];
-}
-
-
 def getMCOpcodeGen : InstrMapping {
   let FilterClass = "SIMCInstr";
   let RowFields = ["PseudoInstr"];
@@ -3671,6 +1212,15 @@ def getMCOpcodeGen : InstrMapping {
                    [!cast<string>(SIEncodingFamily.VI)]];
 }
 
+// Get equivalent SOPK instruction.
+def getSOPKOp : InstrMapping {
+  let FilterClass = "SOPKInstTable";
+  let RowFields = ["BaseCmpOp"];
+  let ColFields = ["IsSOPK"];
+  let KeyCol = ["0"];
+  let ValueCols = [["1"]];
+}
+
 def getAddr64Inst : InstrMapping {
   let FilterClass = "MUBUFAddr64Table";
   let RowFields = ["OpName"];
@@ -3699,4 +1249,6 @@ def getAtomicNoRetOp : InstrMapping {
 
 include "SIInstructions.td"
 include "CIInstructions.td"
-include "VIInstructions.td"
+
+include "DSInstructions.td"
+include "MIMGInstructions.td"
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
index dde5f2fc6b40..bc35c2edc8d3 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -11,13 +11,6 @@
 // that are not yet supported remain commented out.
 //===----------------------------------------------------------------------===//
 
-class InterpSlots {
-int P0 = 2;
-int P10 = 0;
-int P20 = 1;
-}
-def INTERP : InterpSlots;
-
 def isGCN : Predicate<"Subtarget->getGeneration() "
                       ">= SISubtarget::SOUTHERN_ISLANDS">,
             AssemblerPredicate<"FeatureGCN">;
@@ -25,9 +18,18 @@ def isSI : Predicate<"Subtarget->getGeneration() "
                       "== SISubtarget::SOUTHERN_ISLANDS">,
            AssemblerPredicate<"FeatureSouthernIslands">;
 
-
 def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
 def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
+def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
+                      AssemblerPredicate<"FeatureVGPRIndexMode">;
+def HasMovrel : Predicate<"Subtarget->hasMovrel()">,
+                AssemblerPredicate<"FeatureMovrel">;
+
+include "VOPInstructions.td"
+include "SOPInstructions.td"
+include "SMInstructions.td"
+include "FLATInstructions.td"
+include "BUFInstructions.td"
 
 let SubtargetPredicate = isGCN in {
 
@@ -35,1393 +37,8 @@ let SubtargetPredicate = isGCN in {
 // EXP Instructions
 //===----------------------------------------------------------------------===//
 
-defm EXP : EXP_m;
-
-//===----------------------------------------------------------------------===//
-// SMRD Instructions
-//===----------------------------------------------------------------------===//
-
-// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
-// SMRD instructions, because the SReg_32_XM0 register class does not include M0
-// and writing to M0 from an SMRD instruction will hang the GPU.
-defm S_LOAD_DWORD : SMRD_Helper <smrd<0x00>, "s_load_dword", SReg_64, SReg_32_XM0>;
-defm S_LOAD_DWORDX2 : SMRD_Helper <smrd<0x01>, "s_load_dwordx2", SReg_64, SReg_64>;
-defm S_LOAD_DWORDX4 : SMRD_Helper <smrd<0x02>, "s_load_dwordx4", SReg_64, SReg_128>;
-defm S_LOAD_DWORDX8 : SMRD_Helper <smrd<0x03>, "s_load_dwordx8", SReg_64, SReg_256>;
-defm S_LOAD_DWORDX16 : SMRD_Helper <smrd<0x04>, "s_load_dwordx16", SReg_64, SReg_512>;
-
-defm S_BUFFER_LOAD_DWORD : SMRD_Helper <
-  smrd<0x08>, "s_buffer_load_dword", SReg_128, SReg_32_XM0
->;
-
-defm S_BUFFER_LOAD_DWORDX2 : SMRD_Helper <
-  smrd<0x09>, "s_buffer_load_dwordx2", SReg_128, SReg_64
->;
-
-defm S_BUFFER_LOAD_DWORDX4 : SMRD_Helper <
-  smrd<0x0a>, "s_buffer_load_dwordx4", SReg_128, SReg_128
->;
-
-defm S_BUFFER_LOAD_DWORDX8 : SMRD_Helper <
-  smrd<0x0b>, "s_buffer_load_dwordx8", SReg_128, SReg_256
->;
-
-defm S_BUFFER_LOAD_DWORDX16 : SMRD_Helper <
-  smrd<0x0c>, "s_buffer_load_dwordx16", SReg_128, SReg_512
->;
-
-let mayStore = ? in {
-// FIXME: mayStore = ? is a workaround for tablegen bug for different
-// inferred mayStore flags for the instruction pattern vs. standalone
-// Pat. Each considers the other contradictory.
-
-defm S_MEMTIME : SMRD_Special <smrd<0x1e, 0x24>, "s_memtime",
-  (outs SReg_64:$sdst), ?, " $sdst", [(set i64:$sdst, (int_amdgcn_s_memtime))]
->;
-}
-
-defm S_DCACHE_INV : SMRD_Inval <smrd<0x1f, 0x20>, "s_dcache_inv",
-  int_amdgcn_s_dcache_inv>;
-
-//===----------------------------------------------------------------------===//
-// SOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let isMoveImm = 1 in {
-  let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-    defm S_MOV_B32 : SOP1_32 <sop1<0x03, 0x00>, "s_mov_b32", []>;
-    defm S_MOV_B64 : SOP1_64 <sop1<0x04, 0x01>, "s_mov_b64", []>;
-  } // End isRematerializeable = 1
-
-  let Uses = [SCC] in {
-    defm S_CMOV_B32 : SOP1_32 <sop1<0x05, 0x02>, "s_cmov_b32", []>;
-    defm S_CMOV_B64 : SOP1_64 <sop1<0x06, 0x03>, "s_cmov_b64", []>;
-  } // End Uses = [SCC]
-} // End isMoveImm = 1
-
-let Defs = [SCC] in {
-  defm S_NOT_B32 : SOP1_32 <sop1<0x07, 0x04>, "s_not_b32",
-    [(set i32:$sdst, (not i32:$src0))]
-  >;
-
-  defm S_NOT_B64 : SOP1_64 <sop1<0x08, 0x05>, "s_not_b64",
-    [(set i64:$sdst, (not i64:$src0))]
-  >;
-  defm S_WQM_B32 : SOP1_32 <sop1<0x09, 0x06>, "s_wqm_b32", []>;
-  defm S_WQM_B64 : SOP1_64 <sop1<0x0a, 0x07>, "s_wqm_b64", []>;
-} // End Defs = [SCC]
-
-
-defm S_BREV_B32 : SOP1_32 <sop1<0x0b, 0x08>, "s_brev_b32",
-  [(set i32:$sdst, (bitreverse i32:$src0))]
->;
-defm S_BREV_B64 : SOP1_64 <sop1<0x0c, 0x09>, "s_brev_b64", []>;
-
-let Defs = [SCC] in {
-  defm S_BCNT0_I32_B32 : SOP1_32 <sop1<0x0d, 0x0a>, "s_bcnt0_i32_b32", []>;
-  defm S_BCNT0_I32_B64 : SOP1_32_64 <sop1<0x0e, 0x0b>, "s_bcnt0_i32_b64", []>;
-  defm S_BCNT1_I32_B32 : SOP1_32 <sop1<0x0f, 0x0c>, "s_bcnt1_i32_b32",
-    [(set i32:$sdst, (ctpop i32:$src0))]
-  >;
-  defm S_BCNT1_I32_B64 : SOP1_32_64 <sop1<0x10, 0x0d>, "s_bcnt1_i32_b64", []>;
-} // End Defs = [SCC]
-
-defm S_FF0_I32_B32 : SOP1_32 <sop1<0x11, 0x0e>, "s_ff0_i32_b32", []>;
-defm S_FF0_I32_B64 : SOP1_32_64 <sop1<0x12, 0x0f>, "s_ff0_i32_b64", []>;
-defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
-  [(set i32:$sdst, (cttz_zero_undef i32:$src0))]
->;
-defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
-
-defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
-  [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))]
->;
-
-defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
-defm S_FLBIT_I32 : SOP1_32 <sop1<0x17, 0x14>, "s_flbit_i32",
-  [(set i32:$sdst, (int_AMDGPU_flbit_i32 i32:$src0))]
->;
-defm S_FLBIT_I32_I64 : SOP1_32_64 <sop1<0x18, 0x15>, "s_flbit_i32_i64", []>;
-defm S_SEXT_I32_I8 : SOP1_32 <sop1<0x19, 0x16>, "s_sext_i32_i8",
-  [(set i32:$sdst, (sext_inreg i32:$src0, i8))]
->;
-defm S_SEXT_I32_I16 : SOP1_32 <sop1<0x1a, 0x17>, "s_sext_i32_i16",
-  [(set i32:$sdst, (sext_inreg i32:$src0, i16))]
->;
-
-defm S_BITSET0_B32 : SOP1_32 <sop1<0x1b, 0x18>, "s_bitset0_b32", []>;
-defm S_BITSET0_B64 : SOP1_64_32 <sop1<0x1c, 0x19>, "s_bitset0_b64", []>;
-defm S_BITSET1_B32 : SOP1_32 <sop1<0x1d, 0x1a>, "s_bitset1_b32", []>;
-defm S_BITSET1_B64 : SOP1_64_32 <sop1<0x1e, 0x1b>, "s_bitset1_b64", []>;
-defm S_GETPC_B64 : SOP1_64_0 <sop1<0x1f, 0x1c>, "s_getpc_b64", []>;
-defm S_SETPC_B64 : SOP1_1 <sop1<0x20, 0x1d>, "s_setpc_b64", []>;
-defm S_SWAPPC_B64 : SOP1_64 <sop1<0x21, 0x1e>, "s_swappc_b64", []>;
-defm S_RFE_B64 : SOP1_1 <sop1<0x22, 0x1f>, "s_rfe_b64", []>;
-
-let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
-
-defm S_AND_SAVEEXEC_B64 : SOP1_64 <sop1<0x24, 0x20>, "s_and_saveexec_b64", []>;
-defm S_OR_SAVEEXEC_B64 : SOP1_64 <sop1<0x25, 0x21>, "s_or_saveexec_b64", []>;
-defm S_XOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x26, 0x22>, "s_xor_saveexec_b64", []>;
-defm S_ANDN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x27, 0x23>, "s_andn2_saveexec_b64", []>;
-defm S_ORN2_SAVEEXEC_B64 : SOP1_64 <sop1<0x28, 0x24>, "s_orn2_saveexec_b64", []>;
-defm S_NAND_SAVEEXEC_B64 : SOP1_64 <sop1<0x29, 0x25>, "s_nand_saveexec_b64", []>;
-defm S_NOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2a, 0x26>, "s_nor_saveexec_b64", []>;
-defm S_XNOR_SAVEEXEC_B64 : SOP1_64 <sop1<0x2b, 0x27>, "s_xnor_saveexec_b64", []>;
-
-} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
-
-defm S_QUADMASK_B32 : SOP1_32 <sop1<0x2c, 0x28>, "s_quadmask_b32", []>;
-defm S_QUADMASK_B64 : SOP1_64 <sop1<0x2d, 0x29>, "s_quadmask_b64", []>;
-
-let Uses = [M0] in {
-defm S_MOVRELS_B32 : SOP1_32 <sop1<0x2e, 0x2a>, "s_movrels_b32", []>;
-defm S_MOVRELS_B64 : SOP1_64 <sop1<0x2f, 0x2b>, "s_movrels_b64", []>;
-defm S_MOVRELD_B32 : SOP1_32 <sop1<0x30, 0x2c>, "s_movreld_b32", []>;
-defm S_MOVRELD_B64 : SOP1_64 <sop1<0x31, 0x2d>, "s_movreld_b64", []>;
-} // End Uses = [M0]
-
-defm S_CBRANCH_JOIN : SOP1_1 <sop1<0x32, 0x2e>, "s_cbranch_join", []>;
-defm S_MOV_REGRD_B32 : SOP1_32 <sop1<0x33, 0x2f>, "s_mov_regrd_b32", []>;
-let Defs = [SCC] in {
-  defm S_ABS_I32 : SOP1_32 <sop1<0x34, 0x30>, "s_abs_i32", []>;
-} // End Defs = [SCC]
-defm S_MOV_FED_B32 : SOP1_32 <sop1<0x35, 0x31>, "s_mov_fed_b32", []>;
-
-//===----------------------------------------------------------------------===//
-// SOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-let Defs = [SCC] in { // Carry out goes to SCC
-let isCommutable = 1 in {
-defm S_ADD_U32 : SOP2_32 <sop2<0x00>, "s_add_u32", []>;
-defm S_ADD_I32 : SOP2_32 <sop2<0x02>, "s_add_i32",
-  [(set i32:$sdst, (add SSrc_32:$src0, SSrc_32:$src1))]
->;
-} // End isCommutable = 1
-
-defm S_SUB_U32 : SOP2_32 <sop2<0x01>, "s_sub_u32", []>;
-defm S_SUB_I32 : SOP2_32 <sop2<0x03>, "s_sub_i32",
-  [(set i32:$sdst, (sub SSrc_32:$src0, SSrc_32:$src1))]
->;
-
-let Uses = [SCC] in { // Carry in comes from SCC
-let isCommutable = 1 in {
-defm S_ADDC_U32 : SOP2_32 <sop2<0x04>, "s_addc_u32",
-  [(set i32:$sdst, (adde (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
-} // End isCommutable = 1
-
-defm S_SUBB_U32 : SOP2_32 <sop2<0x05>, "s_subb_u32",
-  [(set i32:$sdst, (sube (i32 SSrc_32:$src0), (i32 SSrc_32:$src1)))]>;
-} // End Uses = [SCC]
-
-defm S_MIN_I32 : SOP2_32 <sop2<0x06>, "s_min_i32",
-  [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
->;
-defm S_MIN_U32 : SOP2_32 <sop2<0x07>, "s_min_u32",
-  [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
->;
-defm S_MAX_I32 : SOP2_32 <sop2<0x08>, "s_max_i32",
-  [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
->;
-defm S_MAX_U32 : SOP2_32 <sop2<0x09>, "s_max_u32",
-  [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
->;
-} // End Defs = [SCC]
-
-
-let Uses = [SCC] in {
-  defm S_CSELECT_B32 : SOP2_32 <sop2<0x0a>, "s_cselect_b32", []>;
-  defm S_CSELECT_B64 : SOP2_64 <sop2<0x0b>, "s_cselect_b64", []>;
-} // End Uses = [SCC]
-
-let Defs = [SCC] in {
-defm S_AND_B32 : SOP2_32 <sop2<0x0e, 0x0c>, "s_and_b32",
-  [(set i32:$sdst, (and i32:$src0, i32:$src1))]
->;
-
-defm S_AND_B64 : SOP2_64 <sop2<0x0f, 0x0d>, "s_and_b64",
-  [(set i64:$sdst, (and i64:$src0, i64:$src1))]
->;
-
-defm S_OR_B32 : SOP2_32 <sop2<0x10, 0x0e>, "s_or_b32",
-  [(set i32:$sdst, (or i32:$src0, i32:$src1))]
->;
-
-defm S_OR_B64 : SOP2_64 <sop2<0x11, 0x0f>, "s_or_b64",
-  [(set i64:$sdst, (or i64:$src0, i64:$src1))]
->;
-
-defm S_XOR_B32 : SOP2_32 <sop2<0x12, 0x10>, "s_xor_b32",
-  [(set i32:$sdst, (xor i32:$src0, i32:$src1))]
->;
-
-defm S_XOR_B64 : SOP2_64 <sop2<0x13, 0x11>, "s_xor_b64",
-  [(set i64:$sdst, (xor i64:$src0, i64:$src1))]
->;
-defm S_ANDN2_B32 : SOP2_32 <sop2<0x14, 0x12>, "s_andn2_b32", []>;
-defm S_ANDN2_B64 : SOP2_64 <sop2<0x15, 0x13>, "s_andn2_b64", []>;
-defm S_ORN2_B32 : SOP2_32 <sop2<0x16, 0x14>, "s_orn2_b32", []>;
-defm S_ORN2_B64 : SOP2_64 <sop2<0x17, 0x15>, "s_orn2_b64", []>;
-defm S_NAND_B32 : SOP2_32 <sop2<0x18, 0x16>, "s_nand_b32", []>;
-defm S_NAND_B64 : SOP2_64 <sop2<0x19, 0x17>, "s_nand_b64", []>;
-defm S_NOR_B32 : SOP2_32 <sop2<0x1a, 0x18>, "s_nor_b32", []>;
-defm S_NOR_B64 : SOP2_64 <sop2<0x1b, 0x19>, "s_nor_b64", []>;
-defm S_XNOR_B32 : SOP2_32 <sop2<0x1c, 0x1a>, "s_xnor_b32", []>;
-defm S_XNOR_B64 : SOP2_64 <sop2<0x1d, 0x1b>, "s_xnor_b64", []>;
-} // End Defs = [SCC]
-
-// Use added complexity so these patterns are preferred to the VALU patterns.
-let AddedComplexity = 1 in {
-let Defs = [SCC] in {
-
-defm S_LSHL_B32 : SOP2_32 <sop2<0x1e, 0x1c>, "s_lshl_b32",
-  [(set i32:$sdst, (shl i32:$src0, i32:$src1))]
->;
-defm S_LSHL_B64 : SOP2_64_32 <sop2<0x1f, 0x1d>, "s_lshl_b64",
-  [(set i64:$sdst, (shl i64:$src0, i32:$src1))]
->;
-defm S_LSHR_B32 : SOP2_32 <sop2<0x20, 0x1e>, "s_lshr_b32",
-  [(set i32:$sdst, (srl i32:$src0, i32:$src1))]
->;
-defm S_LSHR_B64 : SOP2_64_32 <sop2<0x21, 0x1f>, "s_lshr_b64",
-  [(set i64:$sdst, (srl i64:$src0, i32:$src1))]
->;
-defm S_ASHR_I32 : SOP2_32 <sop2<0x22, 0x20>, "s_ashr_i32",
-  [(set i32:$sdst, (sra i32:$src0, i32:$src1))]
->;
-defm S_ASHR_I64 : SOP2_64_32 <sop2<0x23, 0x21>, "s_ashr_i64",
-  [(set i64:$sdst, (sra i64:$src0, i32:$src1))]
->;
-} // End Defs = [SCC]
-
-defm S_BFM_B32 : SOP2_32 <sop2<0x24, 0x22>, "s_bfm_b32",
-  [(set i32:$sdst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
-defm S_BFM_B64 : SOP2_64_32_32 <sop2<0x25, 0x23>, "s_bfm_b64", []>;
-defm S_MUL_I32 : SOP2_32 <sop2<0x26, 0x24>, "s_mul_i32",
-  [(set i32:$sdst, (mul i32:$src0, i32:$src1))]
->;
-
-} // End AddedComplexity = 1
-
-let Defs = [SCC] in {
-defm S_BFE_U32 : SOP2_32 <sop2<0x27, 0x25>, "s_bfe_u32", []>;
-defm S_BFE_I32 : SOP2_32 <sop2<0x28, 0x26>, "s_bfe_i32", []>;
-defm S_BFE_U64 : SOP2_64_32 <sop2<0x29, 0x27>, "s_bfe_u64", []>;
-defm S_BFE_I64 : SOP2_64_32 <sop2<0x2a, 0x28>, "s_bfe_i64", []>;
-} // End Defs = [SCC]
-
-let sdst = 0 in {
-defm S_CBRANCH_G_FORK : SOP2_m <
-  sop2<0x2b, 0x29>, "s_cbranch_g_fork", (outs),
-  (ins SReg_64:$src0, SReg_64:$src1), "s_cbranch_g_fork $src0, $src1", []
->;
-}
-
-let Defs = [SCC] in {
-defm S_ABSDIFF_I32 : SOP2_32 <sop2<0x2c, 0x2a>, "s_absdiff_i32", []>;
-} // End Defs = [SCC]
-
-//===----------------------------------------------------------------------===//
-// SOPC Instructions
-//===----------------------------------------------------------------------===//
-
-def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00000000, "s_cmp_eq_i32", COND_EQ>;
-def S_CMP_LG_I32 : SOPC_CMP_32 <0x00000001, "s_cmp_lg_i32", COND_NE>;
-def S_CMP_GT_I32 : SOPC_CMP_32 <0x00000002, "s_cmp_gt_i32", COND_SGT>;
-def S_CMP_GE_I32 : SOPC_CMP_32 <0x00000003, "s_cmp_ge_i32", COND_SGE>;
-def S_CMP_LT_I32 : SOPC_CMP_32 <0x00000004, "s_cmp_lt_i32", COND_SLT>;
-def S_CMP_LE_I32 : SOPC_CMP_32 <0x00000005, "s_cmp_le_i32", COND_SLE>;
-def S_CMP_EQ_U32 : SOPC_CMP_32 <0x00000006, "s_cmp_eq_u32", COND_EQ>;
-def S_CMP_LG_U32 : SOPC_CMP_32 <0x00000007, "s_cmp_lg_u32", COND_NE >;
-def S_CMP_GT_U32 : SOPC_CMP_32 <0x00000008, "s_cmp_gt_u32", COND_UGT>;
-def S_CMP_GE_U32 : SOPC_CMP_32 <0x00000009, "s_cmp_ge_u32", COND_UGE>;
-def S_CMP_LT_U32 : SOPC_CMP_32 <0x0000000a, "s_cmp_lt_u32", COND_ULT>;
-def S_CMP_LE_U32 : SOPC_CMP_32 <0x0000000b, "s_cmp_le_u32", COND_ULE>;
-def S_BITCMP0_B32 : SOPC_32 <0x0000000c, "s_bitcmp0_b32">;
-def S_BITCMP1_B32 : SOPC_32 <0x0000000d, "s_bitcmp1_b32">;
-def S_BITCMP0_B64 : SOPC_64_32 <0x0000000e, "s_bitcmp0_b64">;
-def S_BITCMP1_B64 : SOPC_64_32 <0x0000000f, "s_bitcmp1_b64">;
-def S_SETVSKIP : SOPC_32 <0x00000010, "s_setvskip">;
-
-//===----------------------------------------------------------------------===//
-// SOPK Instructions
-//===----------------------------------------------------------------------===//
-
-let isReMaterializable = 1, isMoveImm = 1 in {
-defm S_MOVK_I32 : SOPK_32 <sopk<0x00>, "s_movk_i32", []>;
-} // End isReMaterializable = 1
-let Uses = [SCC] in {
-  defm S_CMOVK_I32 : SOPK_32 <sopk<0x02, 0x01>, "s_cmovk_i32", []>;
-}
-
-let isCompare = 1 in {
-
-/*
-This instruction is disabled for now until we can figure out how to teach
-the instruction selector to correctly use the  S_CMP* vs V_CMP*
-instructions.
-
-When this instruction is enabled the code generator sometimes produces this
-invalid sequence:
-
-SCC = S_CMPK_EQ_I32 SGPR0, imm
-VCC = COPY SCC
-VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
-
-defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32",
-  [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
->;
-*/
-
-defm S_CMPK_EQ_I32 : SOPK_SCC <sopk<0x03, 0x02>, "s_cmpk_eq_i32", []>;
-defm S_CMPK_LG_I32 : SOPK_SCC <sopk<0x04, 0x03>, "s_cmpk_lg_i32", []>;
-defm S_CMPK_GT_I32 : SOPK_SCC <sopk<0x05, 0x04>, "s_cmpk_gt_i32", []>;
-defm S_CMPK_GE_I32 : SOPK_SCC <sopk<0x06, 0x05>, "s_cmpk_ge_i32", []>;
-defm S_CMPK_LT_I32 : SOPK_SCC <sopk<0x07, 0x06>, "s_cmpk_lt_i32", []>;
-defm S_CMPK_LE_I32 : SOPK_SCC <sopk<0x08, 0x07>, "s_cmpk_le_i32", []>;
-defm S_CMPK_EQ_U32 : SOPK_SCC <sopk<0x09, 0x08>, "s_cmpk_eq_u32", []>;
-defm S_CMPK_LG_U32 : SOPK_SCC <sopk<0x0a, 0x09>, "s_cmpk_lg_u32", []>;
-defm S_CMPK_GT_U32 : SOPK_SCC <sopk<0x0b, 0x0a>, "s_cmpk_gt_u32", []>;
-defm S_CMPK_GE_U32 : SOPK_SCC <sopk<0x0c, 0x0b>, "s_cmpk_ge_u32", []>;
-defm S_CMPK_LT_U32 : SOPK_SCC <sopk<0x0d, 0x0c>, "s_cmpk_lt_u32", []>;
-defm S_CMPK_LE_U32 : SOPK_SCC <sopk<0x0e, 0x0d>, "s_cmpk_le_u32", []>;
-} // End isCompare = 1
-
-let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
-    Constraints = "$sdst = $src0" in {
-  defm S_ADDK_I32 : SOPK_32TIE <sopk<0x0f, 0x0e>, "s_addk_i32", []>;
-  defm S_MULK_I32 : SOPK_32TIE <sopk<0x10, 0x0f>, "s_mulk_i32", []>;
-}
-
-defm S_CBRANCH_I_FORK : SOPK_m <
-  sopk<0x11, 0x10>, "s_cbranch_i_fork", (outs),
-  (ins SReg_64:$sdst, u16imm:$simm16), " $sdst, $simm16"
->;
-
-let mayLoad = 1 in {
-defm S_GETREG_B32 : SOPK_m <
-  sopk<0x12, 0x11>, "s_getreg_b32", (outs SReg_32:$sdst),
-  (ins hwreg:$simm16), " $sdst, $simm16"
->;
-}
-
-defm S_SETREG_B32 : SOPK_m <
-  sopk<0x13, 0x12>, "s_setreg_b32", (outs),
-  (ins SReg_32:$sdst, hwreg:$simm16), " $simm16, $sdst"
->;
-// FIXME: Not on SI?
-//defm S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32", []>;
-defm S_SETREG_IMM32_B32 : SOPK_IMM32 <
-  sopk<0x15, 0x14>, "s_setreg_imm32_b32", (outs),
-  (ins i32imm:$imm, hwreg:$simm16), " $simm16, $imm"
->;
-
-//===----------------------------------------------------------------------===//
-// SOPP Instructions
-//===----------------------------------------------------------------------===//
-
-def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
-
-let isTerminator = 1 in {
-
-def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
-  [(AMDGPUendpgm)]> {
-  let simm16 = 0;
-  let isBarrier = 1;
-  let hasCtrlDep = 1;
-  let hasSideEffects = 1;
-}
-
-let isBranch = 1 in {
-def S_BRANCH : SOPP <
-  0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
-  [(br bb:$simm16)]> {
-  let isBarrier = 1;
-}
-
-let Uses = [SCC] in {
-def S_CBRANCH_SCC0 : SOPP <
-  0x00000004, (ins sopp_brtarget:$simm16),
-  "s_cbranch_scc0 $simm16"
->;
-def S_CBRANCH_SCC1 : SOPP <
-  0x00000005, (ins sopp_brtarget:$simm16),
-  "s_cbranch_scc1 $simm16",
-  [(si_uniform_br_scc SCC, bb:$simm16)]
->;
-} // End Uses = [SCC]
-
-let Uses = [VCC] in {
-def S_CBRANCH_VCCZ : SOPP <
-  0x00000006, (ins sopp_brtarget:$simm16),
-  "s_cbranch_vccz $simm16"
->;
-def S_CBRANCH_VCCNZ : SOPP <
-  0x00000007, (ins sopp_brtarget:$simm16),
-  "s_cbranch_vccnz $simm16"
->;
-} // End Uses = [VCC]
-
-let Uses = [EXEC] in {
-def S_CBRANCH_EXECZ : SOPP <
-  0x00000008, (ins sopp_brtarget:$simm16),
-  "s_cbranch_execz $simm16"
->;
-def S_CBRANCH_EXECNZ : SOPP <
-  0x00000009, (ins sopp_brtarget:$simm16),
-  "s_cbranch_execnz $simm16"
->;
-} // End Uses = [EXEC]
-
-
-} // End isBranch = 1
-} // End isTerminator = 1
-
-let hasSideEffects = 1 in {
-def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
-  [(int_amdgcn_s_barrier)]
-> {
-  let SchedRW = [WriteBarrier];
-  let simm16 = 0;
-  let mayLoad = 1;
-  let mayStore = 1;
-  let isConvergent = 1;
-}
-
-let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
-def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
-def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
-
-// On SI the documentation says sleep for approximately 64 * low 2
-// bits, consistent with the reported maximum of 448. On VI the
-// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
-// maximum really 15 on VI?
-def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
-  "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
-  let hasSideEffects = 1;
-  let mayLoad = 1;
-  let mayStore = 1;
-}
-
-def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
-
-let Uses = [EXEC, M0] in {
-  // FIXME: Should this be mayLoad+mayStore?
-  def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
-      [(AMDGPUsendmsg (i32 imm:$simm16))]
-  >;
-} // End Uses = [EXEC, M0]
-
-def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
-def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
-def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
-	let simm16 = 0;
-}
-def S_INCPERFLEVEL : SOPP <0x00000014, (ins i16imm:$simm16), "s_incperflevel $simm16">;
-def S_DECPERFLEVEL : SOPP <0x00000015, (ins i16imm:$simm16), "s_decperflevel $simm16">;
-def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
-  let simm16 = 0;
-}
-} // End hasSideEffects
-
-//===----------------------------------------------------------------------===//
-// VOPC Instructions
-//===----------------------------------------------------------------------===//
-
-let isCompare = 1, isCommutable = 1 in {
-
-defm V_CMP_F_F32 : VOPC_F32 <vopc<0x0, 0x40>, "v_cmp_f_f32">;
-defm V_CMP_LT_F32 : VOPC_F32 <vopc<0x1, 0x41>, "v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
-defm V_CMP_EQ_F32 : VOPC_F32 <vopc<0x2, 0x42>, "v_cmp_eq_f32", COND_OEQ>;
-defm V_CMP_LE_F32 : VOPC_F32 <vopc<0x3, 0x43>, "v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
-defm V_CMP_GT_F32 : VOPC_F32 <vopc<0x4, 0x44>, "v_cmp_gt_f32", COND_OGT>;
-defm V_CMP_LG_F32 : VOPC_F32 <vopc<0x5, 0x45>, "v_cmp_lg_f32", COND_ONE>;
-defm V_CMP_GE_F32 : VOPC_F32 <vopc<0x6, 0x46>, "v_cmp_ge_f32", COND_OGE>;
-defm V_CMP_O_F32 : VOPC_F32 <vopc<0x7, 0x47>, "v_cmp_o_f32", COND_O>;
-defm V_CMP_U_F32 : VOPC_F32 <vopc<0x8, 0x48>, "v_cmp_u_f32", COND_UO>;
-defm V_CMP_NGE_F32 : VOPC_F32 <vopc<0x9, 0x49>, "v_cmp_nge_f32",  COND_ULT, "v_cmp_nle_f32">;
-defm V_CMP_NLG_F32 : VOPC_F32 <vopc<0xa, 0x4a>, "v_cmp_nlg_f32", COND_UEQ>;
-defm V_CMP_NGT_F32 : VOPC_F32 <vopc<0xb, 0x4b>, "v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
-defm V_CMP_NLE_F32 : VOPC_F32 <vopc<0xc, 0x4c>, "v_cmp_nle_f32", COND_UGT>;
-defm V_CMP_NEQ_F32 : VOPC_F32 <vopc<0xd, 0x4d>, "v_cmp_neq_f32", COND_UNE>;
-defm V_CMP_NLT_F32 : VOPC_F32 <vopc<0xe, 0x4e>, "v_cmp_nlt_f32", COND_UGE>;
-defm V_CMP_TRU_F32 : VOPC_F32 <vopc<0xf, 0x4f>, "v_cmp_tru_f32">;
-
-
-defm V_CMPX_F_F32 : VOPCX_F32 <vopc<0x10, 0x50>, "v_cmpx_f_f32">;
-defm V_CMPX_LT_F32 : VOPCX_F32 <vopc<0x11, 0x51>, "v_cmpx_lt_f32", "v_cmpx_gt_f32">;
-defm V_CMPX_EQ_F32 : VOPCX_F32 <vopc<0x12, 0x52>, "v_cmpx_eq_f32">;
-defm V_CMPX_LE_F32 : VOPCX_F32 <vopc<0x13, 0x53>, "v_cmpx_le_f32", "v_cmpx_ge_f32">;
-defm V_CMPX_GT_F32 : VOPCX_F32 <vopc<0x14, 0x54>, "v_cmpx_gt_f32">;
-defm V_CMPX_LG_F32 : VOPCX_F32 <vopc<0x15, 0x55>, "v_cmpx_lg_f32">;
-defm V_CMPX_GE_F32 : VOPCX_F32 <vopc<0x16, 0x56>, "v_cmpx_ge_f32">;
-defm V_CMPX_O_F32 : VOPCX_F32 <vopc<0x17, 0x57>, "v_cmpx_o_f32">;
-defm V_CMPX_U_F32 : VOPCX_F32 <vopc<0x18, 0x58>, "v_cmpx_u_f32">;
-defm V_CMPX_NGE_F32 : VOPCX_F32 <vopc<0x19, 0x59>, "v_cmpx_nge_f32">;
-defm V_CMPX_NLG_F32 : VOPCX_F32 <vopc<0x1a, 0x5a>, "v_cmpx_nlg_f32">;
-defm V_CMPX_NGT_F32 : VOPCX_F32 <vopc<0x1b, 0x5b>, "v_cmpx_ngt_f32">;
-defm V_CMPX_NLE_F32 : VOPCX_F32 <vopc<0x1c, 0x5c>, "v_cmpx_nle_f32">;
-defm V_CMPX_NEQ_F32 : VOPCX_F32 <vopc<0x1d, 0x5d>, "v_cmpx_neq_f32">;
-defm V_CMPX_NLT_F32 : VOPCX_F32 <vopc<0x1e, 0x5e>, "v_cmpx_nlt_f32">;
-defm V_CMPX_TRU_F32 : VOPCX_F32 <vopc<0x1f, 0x5f>, "v_cmpx_tru_f32">;
-
-
-defm V_CMP_F_F64 : VOPC_F64 <vopc<0x20, 0x60>, "v_cmp_f_f64">;
-defm V_CMP_LT_F64 : VOPC_F64 <vopc<0x21, 0x61>, "v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
-defm V_CMP_EQ_F64 : VOPC_F64 <vopc<0x22, 0x62>, "v_cmp_eq_f64", COND_OEQ>;
-defm V_CMP_LE_F64 : VOPC_F64 <vopc<0x23, 0x63>, "v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
-defm V_CMP_GT_F64 : VOPC_F64 <vopc<0x24, 0x64>, "v_cmp_gt_f64", COND_OGT>;
-defm V_CMP_LG_F64 : VOPC_F64 <vopc<0x25, 0x65>, "v_cmp_lg_f64", COND_ONE>;
-defm V_CMP_GE_F64 : VOPC_F64 <vopc<0x26, 0x66>, "v_cmp_ge_f64", COND_OGE>;
-defm V_CMP_O_F64 : VOPC_F64 <vopc<0x27, 0x67>, "v_cmp_o_f64", COND_O>;
-defm V_CMP_U_F64 : VOPC_F64 <vopc<0x28, 0x68>, "v_cmp_u_f64", COND_UO>;
-defm V_CMP_NGE_F64 : VOPC_F64 <vopc<0x29, 0x69>, "v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
-defm V_CMP_NLG_F64 : VOPC_F64 <vopc<0x2a, 0x6a>, "v_cmp_nlg_f64", COND_UEQ>;
-defm V_CMP_NGT_F64 : VOPC_F64 <vopc<0x2b, 0x6b>, "v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
-defm V_CMP_NLE_F64 : VOPC_F64 <vopc<0x2c, 0x6c>, "v_cmp_nle_f64", COND_UGT>;
-defm V_CMP_NEQ_F64 : VOPC_F64 <vopc<0x2d, 0x6d>, "v_cmp_neq_f64", COND_UNE>;
-defm V_CMP_NLT_F64 : VOPC_F64 <vopc<0x2e, 0x6e>, "v_cmp_nlt_f64", COND_UGE>;
-defm V_CMP_TRU_F64 : VOPC_F64 <vopc<0x2f, 0x6f>, "v_cmp_tru_f64">;
-
-
-defm V_CMPX_F_F64 : VOPCX_F64 <vopc<0x30, 0x70>, "v_cmpx_f_f64">;
-defm V_CMPX_LT_F64 : VOPCX_F64 <vopc<0x31, 0x71>, "v_cmpx_lt_f64", "v_cmpx_gt_f64">;
-defm V_CMPX_EQ_F64 : VOPCX_F64 <vopc<0x32, 0x72>, "v_cmpx_eq_f64">;
-defm V_CMPX_LE_F64 : VOPCX_F64 <vopc<0x33, 0x73>, "v_cmpx_le_f64", "v_cmpx_ge_f64">;
-defm V_CMPX_GT_F64 : VOPCX_F64 <vopc<0x34, 0x74>, "v_cmpx_gt_f64">;
-defm V_CMPX_LG_F64 : VOPCX_F64 <vopc<0x35, 0x75>, "v_cmpx_lg_f64">;
-defm V_CMPX_GE_F64 : VOPCX_F64 <vopc<0x36, 0x76>, "v_cmpx_ge_f64">;
-defm V_CMPX_O_F64 : VOPCX_F64 <vopc<0x37, 0x77>, "v_cmpx_o_f64">;
-defm V_CMPX_U_F64 : VOPCX_F64 <vopc<0x38, 0x78>, "v_cmpx_u_f64">;
-defm V_CMPX_NGE_F64 : VOPCX_F64 <vopc<0x39, 0x79>, "v_cmpx_nge_f64", "v_cmpx_nle_f64">;
-defm V_CMPX_NLG_F64 : VOPCX_F64 <vopc<0x3a, 0x7a>, "v_cmpx_nlg_f64">;
-defm V_CMPX_NGT_F64 : VOPCX_F64 <vopc<0x3b, 0x7b>, "v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
-defm V_CMPX_NLE_F64 : VOPCX_F64 <vopc<0x3c, 0x7c>, "v_cmpx_nle_f64">;
-defm V_CMPX_NEQ_F64 : VOPCX_F64 <vopc<0x3d, 0x7d>, "v_cmpx_neq_f64">;
-defm V_CMPX_NLT_F64 : VOPCX_F64 <vopc<0x3e, 0x7e>, "v_cmpx_nlt_f64">;
-defm V_CMPX_TRU_F64 : VOPCX_F64 <vopc<0x3f, 0x7f>, "v_cmpx_tru_f64">;
-
-
-let SubtargetPredicate = isSICI in {
-
-defm V_CMPS_F_F32 : VOPC_F32 <vopc<0x40>, "v_cmps_f_f32">;
-defm V_CMPS_LT_F32 : VOPC_F32 <vopc<0x41>, "v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
-defm V_CMPS_EQ_F32 : VOPC_F32 <vopc<0x42>, "v_cmps_eq_f32">;
-defm V_CMPS_LE_F32 : VOPC_F32 <vopc<0x43>, "v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
-defm V_CMPS_GT_F32 : VOPC_F32 <vopc<0x44>, "v_cmps_gt_f32">;
-defm V_CMPS_LG_F32 : VOPC_F32 <vopc<0x45>, "v_cmps_lg_f32">;
-defm V_CMPS_GE_F32 : VOPC_F32 <vopc<0x46>, "v_cmps_ge_f32">;
-defm V_CMPS_O_F32 : VOPC_F32 <vopc<0x47>, "v_cmps_o_f32">;
-defm V_CMPS_U_F32 : VOPC_F32 <vopc<0x48>, "v_cmps_u_f32">;
-defm V_CMPS_NGE_F32 : VOPC_F32 <vopc<0x49>, "v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
-defm V_CMPS_NLG_F32 : VOPC_F32 <vopc<0x4a>, "v_cmps_nlg_f32">;
-defm V_CMPS_NGT_F32 : VOPC_F32 <vopc<0x4b>, "v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
-defm V_CMPS_NLE_F32 : VOPC_F32 <vopc<0x4c>, "v_cmps_nle_f32">;
-defm V_CMPS_NEQ_F32 : VOPC_F32 <vopc<0x4d>, "v_cmps_neq_f32">;
-defm V_CMPS_NLT_F32 : VOPC_F32 <vopc<0x4e>, "v_cmps_nlt_f32">;
-defm V_CMPS_TRU_F32 : VOPC_F32 <vopc<0x4f>, "v_cmps_tru_f32">;
-
-
-defm V_CMPSX_F_F32 : VOPCX_F32 <vopc<0x50>, "v_cmpsx_f_f32">;
-defm V_CMPSX_LT_F32 : VOPCX_F32 <vopc<0x51>, "v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
-defm V_CMPSX_EQ_F32 : VOPCX_F32 <vopc<0x52>, "v_cmpsx_eq_f32">;
-defm V_CMPSX_LE_F32 : VOPCX_F32 <vopc<0x53>, "v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
-defm V_CMPSX_GT_F32 : VOPCX_F32 <vopc<0x54>, "v_cmpsx_gt_f32">;
-defm V_CMPSX_LG_F32 : VOPCX_F32 <vopc<0x55>, "v_cmpsx_lg_f32">;
-defm V_CMPSX_GE_F32 : VOPCX_F32 <vopc<0x56>, "v_cmpsx_ge_f32">;
-defm V_CMPSX_O_F32 : VOPCX_F32 <vopc<0x57>, "v_cmpsx_o_f32">;
-defm V_CMPSX_U_F32 : VOPCX_F32 <vopc<0x58>, "v_cmpsx_u_f32">;
-defm V_CMPSX_NGE_F32 : VOPCX_F32 <vopc<0x59>, "v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
-defm V_CMPSX_NLG_F32 : VOPCX_F32 <vopc<0x5a>, "v_cmpsx_nlg_f32">;
-defm V_CMPSX_NGT_F32 : VOPCX_F32 <vopc<0x5b>, "v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
-defm V_CMPSX_NLE_F32 : VOPCX_F32 <vopc<0x5c>, "v_cmpsx_nle_f32">;
-defm V_CMPSX_NEQ_F32 : VOPCX_F32 <vopc<0x5d>, "v_cmpsx_neq_f32">;
-defm V_CMPSX_NLT_F32 : VOPCX_F32 <vopc<0x5e>, "v_cmpsx_nlt_f32">;
-defm V_CMPSX_TRU_F32 : VOPCX_F32 <vopc<0x5f>, "v_cmpsx_tru_f32">;
-
-
-defm V_CMPS_F_F64 : VOPC_F64 <vopc<0x60>, "v_cmps_f_f64">;
-defm V_CMPS_LT_F64 : VOPC_F64 <vopc<0x61>, "v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
-defm V_CMPS_EQ_F64 : VOPC_F64 <vopc<0x62>, "v_cmps_eq_f64">;
-defm V_CMPS_LE_F64 : VOPC_F64 <vopc<0x63>, "v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
-defm V_CMPS_GT_F64 : VOPC_F64 <vopc<0x64>, "v_cmps_gt_f64">;
-defm V_CMPS_LG_F64 : VOPC_F64 <vopc<0x65>, "v_cmps_lg_f64">;
-defm V_CMPS_GE_F64 : VOPC_F64 <vopc<0x66>, "v_cmps_ge_f64">;
-defm V_CMPS_O_F64 : VOPC_F64 <vopc<0x67>, "v_cmps_o_f64">;
-defm V_CMPS_U_F64 : VOPC_F64 <vopc<0x68>, "v_cmps_u_f64">;
-defm V_CMPS_NGE_F64 : VOPC_F64 <vopc<0x69>, "v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
-defm V_CMPS_NLG_F64 : VOPC_F64 <vopc<0x6a>, "v_cmps_nlg_f64">;
-defm V_CMPS_NGT_F64 : VOPC_F64 <vopc<0x6b>, "v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
-defm V_CMPS_NLE_F64 : VOPC_F64 <vopc<0x6c>, "v_cmps_nle_f64">;
-defm V_CMPS_NEQ_F64 : VOPC_F64 <vopc<0x6d>, "v_cmps_neq_f64">;
-defm V_CMPS_NLT_F64 : VOPC_F64 <vopc<0x6e>, "v_cmps_nlt_f64">;
-defm V_CMPS_TRU_F64 : VOPC_F64 <vopc<0x6f>, "v_cmps_tru_f64">;
-
-
-defm V_CMPSX_F_F64 : VOPCX_F64 <vopc<0x70>, "v_cmpsx_f_f64">;
-defm V_CMPSX_LT_F64 : VOPCX_F64 <vopc<0x71>, "v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
-defm V_CMPSX_EQ_F64 : VOPCX_F64 <vopc<0x72>, "v_cmpsx_eq_f64">;
-defm V_CMPSX_LE_F64 : VOPCX_F64 <vopc<0x73>, "v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
-defm V_CMPSX_GT_F64 : VOPCX_F64 <vopc<0x74>, "v_cmpsx_gt_f64">;
-defm V_CMPSX_LG_F64 : VOPCX_F64 <vopc<0x75>, "v_cmpsx_lg_f64">;
-defm V_CMPSX_GE_F64 : VOPCX_F64 <vopc<0x76>, "v_cmpsx_ge_f64">;
-defm V_CMPSX_O_F64 : VOPCX_F64 <vopc<0x77>, "v_cmpsx_o_f64">;
-defm V_CMPSX_U_F64 : VOPCX_F64 <vopc<0x78>, "v_cmpsx_u_f64">;
-defm V_CMPSX_NGE_F64 : VOPCX_F64 <vopc<0x79>, "v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
-defm V_CMPSX_NLG_F64 : VOPCX_F64 <vopc<0x7a>, "v_cmpsx_nlg_f64">;
-defm V_CMPSX_NGT_F64 : VOPCX_F64 <vopc<0x7b>, "v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
-defm V_CMPSX_NLE_F64 : VOPCX_F64 <vopc<0x7c>, "v_cmpsx_nle_f64">;
-defm V_CMPSX_NEQ_F64 : VOPCX_F64 <vopc<0x7d>, "v_cmpsx_neq_f64">;
-defm V_CMPSX_NLT_F64 : VOPCX_F64 <vopc<0x7e>, "v_cmpsx_nlt_f64">;
-defm V_CMPSX_TRU_F64 : VOPCX_F64 <vopc<0x7f>, "v_cmpsx_tru_f64">;
-
-} // End SubtargetPredicate = isSICI
-
-defm V_CMP_F_I32 : VOPC_I32 <vopc<0x80, 0xc0>, "v_cmp_f_i32">;
-defm V_CMP_LT_I32 : VOPC_I32 <vopc<0x81, 0xc1>, "v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
-defm V_CMP_EQ_I32 : VOPC_I32 <vopc<0x82, 0xc2>, "v_cmp_eq_i32", COND_EQ>;
-defm V_CMP_LE_I32 : VOPC_I32 <vopc<0x83, 0xc3>, "v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
-defm V_CMP_GT_I32 : VOPC_I32 <vopc<0x84, 0xc4>, "v_cmp_gt_i32", COND_SGT>;
-defm V_CMP_NE_I32 : VOPC_I32 <vopc<0x85, 0xc5>, "v_cmp_ne_i32", COND_NE>;
-defm V_CMP_GE_I32 : VOPC_I32 <vopc<0x86, 0xc6>, "v_cmp_ge_i32", COND_SGE>;
-defm V_CMP_T_I32 : VOPC_I32 <vopc<0x87, 0xc7>, "v_cmp_t_i32">;
-
-
-defm V_CMPX_F_I32 : VOPCX_I32 <vopc<0x90, 0xd0>, "v_cmpx_f_i32">;
-defm V_CMPX_LT_I32 : VOPCX_I32 <vopc<0x91, 0xd1>, "v_cmpx_lt_i32", "v_cmpx_gt_i32">;
-defm V_CMPX_EQ_I32 : VOPCX_I32 <vopc<0x92, 0xd2>, "v_cmpx_eq_i32">;
-defm V_CMPX_LE_I32 : VOPCX_I32 <vopc<0x93, 0xd3>, "v_cmpx_le_i32", "v_cmpx_ge_i32">;
-defm V_CMPX_GT_I32 : VOPCX_I32 <vopc<0x94, 0xd4>, "v_cmpx_gt_i32">;
-defm V_CMPX_NE_I32 : VOPCX_I32 <vopc<0x95, 0xd5>, "v_cmpx_ne_i32">;
-defm V_CMPX_GE_I32 : VOPCX_I32 <vopc<0x96, 0xd6>, "v_cmpx_ge_i32">;
-defm V_CMPX_T_I32 : VOPCX_I32 <vopc<0x97, 0xd7>, "v_cmpx_t_i32">;
-
-
-defm V_CMP_F_I64 : VOPC_I64 <vopc<0xa0, 0xe0>, "v_cmp_f_i64">;
-defm V_CMP_LT_I64 : VOPC_I64 <vopc<0xa1, 0xe1>, "v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
-defm V_CMP_EQ_I64 : VOPC_I64 <vopc<0xa2, 0xe2>, "v_cmp_eq_i64", COND_EQ>;
-defm V_CMP_LE_I64 : VOPC_I64 <vopc<0xa3, 0xe3>, "v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
-defm V_CMP_GT_I64 : VOPC_I64 <vopc<0xa4, 0xe4>, "v_cmp_gt_i64", COND_SGT>;
-defm V_CMP_NE_I64 : VOPC_I64 <vopc<0xa5, 0xe5>, "v_cmp_ne_i64", COND_NE>;
-defm V_CMP_GE_I64 : VOPC_I64 <vopc<0xa6, 0xe6>, "v_cmp_ge_i64", COND_SGE>;
-defm V_CMP_T_I64 : VOPC_I64 <vopc<0xa7, 0xe7>, "v_cmp_t_i64">;
-
-
-defm V_CMPX_F_I64 : VOPCX_I64 <vopc<0xb0, 0xf0>, "v_cmpx_f_i64">;
-defm V_CMPX_LT_I64 : VOPCX_I64 <vopc<0xb1, 0xf1>, "v_cmpx_lt_i64", "v_cmpx_gt_i64">;
-defm V_CMPX_EQ_I64 : VOPCX_I64 <vopc<0xb2, 0xf2>, "v_cmpx_eq_i64">;
-defm V_CMPX_LE_I64 : VOPCX_I64 <vopc<0xb3, 0xf3>, "v_cmpx_le_i64", "v_cmpx_ge_i64">;
-defm V_CMPX_GT_I64 : VOPCX_I64 <vopc<0xb4, 0xf4>, "v_cmpx_gt_i64">;
-defm V_CMPX_NE_I64 : VOPCX_I64 <vopc<0xb5, 0xf5>, "v_cmpx_ne_i64">;
-defm V_CMPX_GE_I64 : VOPCX_I64 <vopc<0xb6, 0xf6>, "v_cmpx_ge_i64">;
-defm V_CMPX_T_I64 : VOPCX_I64 <vopc<0xb7, 0xf7>, "v_cmpx_t_i64">;
-
-
-defm V_CMP_F_U32 : VOPC_I32 <vopc<0xc0, 0xc8>, "v_cmp_f_u32">;
-defm V_CMP_LT_U32 : VOPC_I32 <vopc<0xc1, 0xc9>, "v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
-defm V_CMP_EQ_U32 : VOPC_I32 <vopc<0xc2, 0xca>, "v_cmp_eq_u32", COND_EQ>;
-defm V_CMP_LE_U32 : VOPC_I32 <vopc<0xc3, 0xcb>, "v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
-defm V_CMP_GT_U32 : VOPC_I32 <vopc<0xc4, 0xcc>, "v_cmp_gt_u32", COND_UGT>;
-defm V_CMP_NE_U32 : VOPC_I32 <vopc<0xc5, 0xcd>, "v_cmp_ne_u32", COND_NE>;
-defm V_CMP_GE_U32 : VOPC_I32 <vopc<0xc6, 0xce>, "v_cmp_ge_u32", COND_UGE>;
-defm V_CMP_T_U32 : VOPC_I32 <vopc<0xc7, 0xcf>, "v_cmp_t_u32">;
-
-
-defm V_CMPX_F_U32 : VOPCX_I32 <vopc<0xd0, 0xd8>, "v_cmpx_f_u32">;
-defm V_CMPX_LT_U32 : VOPCX_I32 <vopc<0xd1, 0xd9>, "v_cmpx_lt_u32", "v_cmpx_gt_u32">;
-defm V_CMPX_EQ_U32 : VOPCX_I32 <vopc<0xd2, 0xda>, "v_cmpx_eq_u32">;
-defm V_CMPX_LE_U32 : VOPCX_I32 <vopc<0xd3, 0xdb>, "v_cmpx_le_u32", "v_cmpx_le_u32">;
-defm V_CMPX_GT_U32 : VOPCX_I32 <vopc<0xd4, 0xdc>, "v_cmpx_gt_u32">;
-defm V_CMPX_NE_U32 : VOPCX_I32 <vopc<0xd5, 0xdd>, "v_cmpx_ne_u32">;
-defm V_CMPX_GE_U32 : VOPCX_I32 <vopc<0xd6, 0xde>, "v_cmpx_ge_u32">;
-defm V_CMPX_T_U32 : VOPCX_I32 <vopc<0xd7, 0xdf>, "v_cmpx_t_u32">;
-
-
-defm V_CMP_F_U64 : VOPC_I64 <vopc<0xe0, 0xe8>, "v_cmp_f_u64">;
-defm V_CMP_LT_U64 : VOPC_I64 <vopc<0xe1, 0xe9>, "v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
-defm V_CMP_EQ_U64 : VOPC_I64 <vopc<0xe2, 0xea>, "v_cmp_eq_u64", COND_EQ>;
-defm V_CMP_LE_U64 : VOPC_I64 <vopc<0xe3, 0xeb>, "v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
-defm V_CMP_GT_U64 : VOPC_I64 <vopc<0xe4, 0xec>, "v_cmp_gt_u64", COND_UGT>;
-defm V_CMP_NE_U64 : VOPC_I64 <vopc<0xe5, 0xed>, "v_cmp_ne_u64", COND_NE>;
-defm V_CMP_GE_U64 : VOPC_I64 <vopc<0xe6, 0xee>, "v_cmp_ge_u64", COND_UGE>;
-defm V_CMP_T_U64 : VOPC_I64 <vopc<0xe7, 0xef>, "v_cmp_t_u64">;
-
-defm V_CMPX_F_U64 : VOPCX_I64 <vopc<0xf0, 0xf8>, "v_cmpx_f_u64">;
-defm V_CMPX_LT_U64 : VOPCX_I64 <vopc<0xf1, 0xf9>, "v_cmpx_lt_u64", "v_cmpx_gt_u64">;
-defm V_CMPX_EQ_U64 : VOPCX_I64 <vopc<0xf2, 0xfa>, "v_cmpx_eq_u64">;
-defm V_CMPX_LE_U64 : VOPCX_I64 <vopc<0xf3, 0xfb>, "v_cmpx_le_u64", "v_cmpx_ge_u64">;
-defm V_CMPX_GT_U64 : VOPCX_I64 <vopc<0xf4, 0xfc>, "v_cmpx_gt_u64">;
-defm V_CMPX_NE_U64 : VOPCX_I64 <vopc<0xf5, 0xfd>, "v_cmpx_ne_u64">;
-defm V_CMPX_GE_U64 : VOPCX_I64 <vopc<0xf6, 0xfe>, "v_cmpx_ge_u64">;
-defm V_CMPX_T_U64 : VOPCX_I64 <vopc<0xf7, 0xff>, "v_cmpx_t_u64">;
-
-} // End isCompare = 1, isCommutable = 1
-
-defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <vopc<0x88, 0x10>, "v_cmp_class_f32">;
-defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <vopc<0x98, 0x11>, "v_cmpx_class_f32">;
-defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <vopc<0xa8, 0x12>, "v_cmp_class_f64">;
-defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <vopc<0xb8, 0x13>, "v_cmpx_class_f64">;
-
-//===----------------------------------------------------------------------===//
-// DS Instructions
-//===----------------------------------------------------------------------===//
-
-defm DS_ADD_U32 : DS_1A1D_NORET <0x0, "ds_add_u32", VGPR_32>;
-defm DS_SUB_U32 : DS_1A1D_NORET <0x1, "ds_sub_u32", VGPR_32>;
-defm DS_RSUB_U32 : DS_1A1D_NORET <0x2, "ds_rsub_u32", VGPR_32>;
-defm DS_INC_U32 : DS_1A1D_NORET <0x3, "ds_inc_u32", VGPR_32>;
-defm DS_DEC_U32 : DS_1A1D_NORET <0x4, "ds_dec_u32", VGPR_32>;
-defm DS_MIN_I32 : DS_1A1D_NORET <0x5, "ds_min_i32", VGPR_32>;
-defm DS_MAX_I32 : DS_1A1D_NORET <0x6, "ds_max_i32", VGPR_32>;
-defm DS_MIN_U32 : DS_1A1D_NORET <0x7, "ds_min_u32", VGPR_32>;
-defm DS_MAX_U32 : DS_1A1D_NORET <0x8, "ds_max_u32", VGPR_32>;
-defm DS_AND_B32 : DS_1A1D_NORET <0x9, "ds_and_b32", VGPR_32>;
-defm DS_OR_B32 : DS_1A1D_NORET <0xa, "ds_or_b32", VGPR_32>;
-defm DS_XOR_B32 : DS_1A1D_NORET <0xb, "ds_xor_b32", VGPR_32>;
-defm DS_MSKOR_B32 : DS_1A2D_NORET <0xc, "ds_mskor_b32", VGPR_32>;
-let mayLoad = 0 in {
-defm DS_WRITE_B32 : DS_1A1D_NORET <0xd, "ds_write_b32", VGPR_32>;
-defm DS_WRITE2_B32 : DS_1A2D_Off8_NORET <0xe, "ds_write2_b32", VGPR_32>;
-defm DS_WRITE2ST64_B32 : DS_1A2D_Off8_NORET <0xf, "ds_write2st64_b32", VGPR_32>;
-}
-defm DS_CMPST_B32 : DS_1A2D_NORET <0x10, "ds_cmpst_b32", VGPR_32>;
-defm DS_CMPST_F32 : DS_1A2D_NORET <0x11, "ds_cmpst_f32", VGPR_32>;
-defm DS_MIN_F32 : DS_1A2D_NORET <0x12, "ds_min_f32", VGPR_32>;
-defm DS_MAX_F32 : DS_1A2D_NORET <0x13, "ds_max_f32", VGPR_32>;
-
-defm DS_GWS_INIT : DS_1A_GDS <0x19, "ds_gws_init">;
-defm DS_GWS_SEMA_V : DS_1A_GDS <0x1a, "ds_gws_sema_v">;
-defm DS_GWS_SEMA_BR : DS_1A_GDS <0x1b, "ds_gws_sema_br">;
-defm DS_GWS_SEMA_P : DS_1A_GDS <0x1c, "ds_gws_sema_p">;
-defm DS_GWS_BARRIER : DS_1A_GDS <0x1d, "ds_gws_barrier">;
-let mayLoad = 0 in {
-defm DS_WRITE_B8 : DS_1A1D_NORET <0x1e, "ds_write_b8", VGPR_32>;
-defm DS_WRITE_B16 : DS_1A1D_NORET <0x1f, "ds_write_b16", VGPR_32>;
-}
-defm DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "ds_add_rtn_u32", VGPR_32, "ds_add_u32">;
-defm DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "ds_sub_rtn_u32", VGPR_32, "ds_sub_u32">;
-defm DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "ds_rsub_rtn_u32", VGPR_32, "ds_rsub_u32">;
-defm DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "ds_inc_rtn_u32", VGPR_32, "ds_inc_u32">;
-defm DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "ds_dec_rtn_u32", VGPR_32, "ds_dec_u32">;
-defm DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "ds_min_rtn_i32", VGPR_32, "ds_min_i32">;
-defm DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "ds_max_rtn_i32", VGPR_32, "ds_max_i32">;
-defm DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "ds_min_rtn_u32", VGPR_32, "ds_min_u32">;
-defm DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "ds_max_rtn_u32", VGPR_32, "ds_max_u32">;
-defm DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "ds_and_rtn_b32", VGPR_32, "ds_and_b32">;
-defm DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "ds_or_rtn_b32", VGPR_32, "ds_or_b32">;
-defm DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "ds_xor_rtn_b32", VGPR_32, "ds_xor_b32">;
-defm DS_MSKOR_RTN_B32 : DS_1A2D_RET <0x2c, "ds_mskor_rtn_b32", VGPR_32, "ds_mskor_b32">;
-defm DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "ds_wrxchg_rtn_b32", VGPR_32>;
-defm DS_WRXCHG2_RTN_B32 : DS_1A2D_RET <
-  0x2e, "ds_wrxchg2_rtn_b32", VReg_64, "", VGPR_32
->;
-defm DS_WRXCHG2ST64_RTN_B32 : DS_1A2D_RET <
-  0x2f, "ds_wrxchg2st64_rtn_b32", VReg_64, "", VGPR_32
->;
-defm DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "ds_cmpst_rtn_b32", VGPR_32, "ds_cmpst_b32">;
-defm DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "ds_cmpst_rtn_f32", VGPR_32, "ds_cmpst_f32">;
-defm DS_MIN_RTN_F32 : DS_1A2D_RET <0x32, "ds_min_rtn_f32", VGPR_32, "ds_min_f32">;
-defm DS_MAX_RTN_F32 : DS_1A2D_RET <0x33, "ds_max_rtn_f32", VGPR_32, "ds_max_f32">;
-
-let Uses = [EXEC], mayLoad =0, mayStore = 0, isConvergent = 1 in {
-defm DS_SWIZZLE_B32 : DS_1A_RET_ <dsop<0x35, 0x3d>, "ds_swizzle_b32", VGPR_32>;
-}
-
-let mayStore = 0 in {
-defm DS_READ_B32 : DS_1A_RET <0x36, "ds_read_b32", VGPR_32>;
-defm DS_READ2_B32 : DS_1A_Off8_RET <0x37, "ds_read2_b32", VReg_64>;
-defm DS_READ2ST64_B32 : DS_1A_Off8_RET <0x38, "ds_read2st64_b32", VReg_64>;
-defm DS_READ_I8 : DS_1A_RET <0x39, "ds_read_i8", VGPR_32>;
-defm DS_READ_U8 : DS_1A_RET <0x3a, "ds_read_u8", VGPR_32>;
-defm DS_READ_I16 : DS_1A_RET <0x3b, "ds_read_i16", VGPR_32>;
-defm DS_READ_U16 : DS_1A_RET <0x3c, "ds_read_u16", VGPR_32>;
-}
-defm DS_CONSUME : DS_0A_RET <0x3d, "ds_consume">;
-defm DS_APPEND : DS_0A_RET <0x3e, "ds_append">;
-defm DS_ORDERED_COUNT : DS_1A_RET_GDS <0x3f, "ds_ordered_count">;
-defm DS_ADD_U64 : DS_1A1D_NORET <0x40, "ds_add_u64", VReg_64>;
-defm DS_SUB_U64 : DS_1A1D_NORET <0x41, "ds_sub_u64", VReg_64>;
-defm DS_RSUB_U64 : DS_1A1D_NORET <0x42, "ds_rsub_u64", VReg_64>;
-defm DS_INC_U64 : DS_1A1D_NORET <0x43, "ds_inc_u64", VReg_64>;
-defm DS_DEC_U64 : DS_1A1D_NORET <0x44, "ds_dec_u64", VReg_64>;
-defm DS_MIN_I64 : DS_1A1D_NORET <0x45, "ds_min_i64", VReg_64>;
-defm DS_MAX_I64 : DS_1A1D_NORET <0x46, "ds_max_i64", VReg_64>;
-defm DS_MIN_U64 : DS_1A1D_NORET <0x47, "ds_min_u64", VReg_64>;
-defm DS_MAX_U64 : DS_1A1D_NORET <0x48, "ds_max_u64", VReg_64>;
-defm DS_AND_B64 : DS_1A1D_NORET <0x49, "ds_and_b64", VReg_64>;
-defm DS_OR_B64 : DS_1A1D_NORET <0x4a, "ds_or_b64", VReg_64>;
-defm DS_XOR_B64 : DS_1A1D_NORET <0x4b, "ds_xor_b64", VReg_64>;
-defm DS_MSKOR_B64 : DS_1A2D_NORET <0x4c, "ds_mskor_b64", VReg_64>;
-let mayLoad = 0 in {
-defm DS_WRITE_B64 : DS_1A1D_NORET <0x4d, "ds_write_b64", VReg_64>;
-defm DS_WRITE2_B64 : DS_1A2D_Off8_NORET <0x4E, "ds_write2_b64", VReg_64>;
-defm DS_WRITE2ST64_B64 : DS_1A2D_Off8_NORET <0x4f, "ds_write2st64_b64", VReg_64>;
-}
-defm DS_CMPST_B64 : DS_1A2D_NORET <0x50, "ds_cmpst_b64", VReg_64>;
-defm DS_CMPST_F64 : DS_1A2D_NORET <0x51, "ds_cmpst_f64", VReg_64>;
-defm DS_MIN_F64 : DS_1A1D_NORET <0x52, "ds_min_f64", VReg_64>;
-defm DS_MAX_F64 : DS_1A1D_NORET <0x53, "ds_max_f64", VReg_64>;
-
-defm DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "ds_add_rtn_u64", VReg_64, "ds_add_u64">;
-defm DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "ds_sub_rtn_u64", VReg_64, "ds_sub_u64">;
-defm DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "ds_rsub_rtn_u64", VReg_64, "ds_rsub_u64">;
-defm DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "ds_inc_rtn_u64", VReg_64, "ds_inc_u64">;
-defm DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "ds_dec_rtn_u64", VReg_64, "ds_dec_u64">;
-defm DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "ds_min_rtn_i64", VReg_64, "ds_min_i64">;
-defm DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "ds_max_rtn_i64", VReg_64, "ds_max_i64">;
-defm DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "ds_min_rtn_u64", VReg_64, "ds_min_u64">;
-defm DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "ds_max_rtn_u64", VReg_64, "ds_max_u64">;
-defm DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "ds_and_rtn_b64", VReg_64, "ds_and_b64">;
-defm DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "ds_or_rtn_b64", VReg_64, "ds_or_b64">;
-defm DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "ds_xor_rtn_b64", VReg_64, "ds_xor_b64">;
-defm DS_MSKOR_RTN_B64 : DS_1A2D_RET <0x6c, "ds_mskor_rtn_b64", VReg_64, "ds_mskor_b64">;
-defm DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "ds_wrxchg_rtn_b64", VReg_64, "ds_wrxchg_b64">;
-defm DS_WRXCHG2_RTN_B64 : DS_1A2D_RET <0x6e, "ds_wrxchg2_rtn_b64", VReg_128, "ds_wrxchg2_b64", VReg_64>;
-defm DS_WRXCHG2ST64_RTN_B64 : DS_1A2D_RET <0x6f, "ds_wrxchg2st64_rtn_b64", VReg_128, "ds_wrxchg2st64_b64", VReg_64>;
-defm DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "ds_cmpst_rtn_b64", VReg_64, "ds_cmpst_b64">;
-defm DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "ds_cmpst_rtn_f64", VReg_64, "ds_cmpst_f64">;
-defm DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "ds_min_rtn_f64", VReg_64, "ds_min_f64">;
-defm DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "ds_max_rtn_f64", VReg_64, "ds_max_f64">;
-
-let mayStore = 0 in {
-defm DS_READ_B64 : DS_1A_RET <0x76, "ds_read_b64", VReg_64>;
-defm DS_READ2_B64 : DS_1A_Off8_RET <0x77, "ds_read2_b64", VReg_128>;
-defm DS_READ2ST64_B64 : DS_1A_Off8_RET <0x78, "ds_read2st64_b64", VReg_128>;
-}
-
-defm DS_ADD_SRC2_U32 : DS_1A <0x80, "ds_add_src2_u32">;
-defm DS_SUB_SRC2_U32 : DS_1A <0x81, "ds_sub_src2_u32">;
-defm DS_RSUB_SRC2_U32 : DS_1A <0x82, "ds_rsub_src2_u32">;
-defm DS_INC_SRC2_U32 : DS_1A <0x83, "ds_inc_src2_u32">;
-defm DS_DEC_SRC2_U32 : DS_1A <0x84, "ds_dec_src2_u32">;
-defm DS_MIN_SRC2_I32 : DS_1A <0x85, "ds_min_src2_i32">;
-defm DS_MAX_SRC2_I32 : DS_1A <0x86, "ds_max_src2_i32">;
-defm DS_MIN_SRC2_U32 : DS_1A <0x87, "ds_min_src2_u32">;
-defm DS_MAX_SRC2_U32 : DS_1A <0x88, "ds_max_src2_u32">;
-defm DS_AND_SRC2_B32 : DS_1A <0x89, "ds_and_src_b32">;
-defm DS_OR_SRC2_B32 : DS_1A <0x8a, "ds_or_src2_b32">;
-defm DS_XOR_SRC2_B32 : DS_1A <0x8b, "ds_xor_src2_b32">;
-defm DS_WRITE_SRC2_B32 : DS_1A_Off8_NORET <0x8d, "ds_write_src2_b32">;
-
-defm DS_MIN_SRC2_F32 : DS_1A <0x92, "ds_min_src2_f32">;
-defm DS_MAX_SRC2_F32 : DS_1A <0x93, "ds_max_src2_f32">;
-
-defm DS_ADD_SRC2_U64 : DS_1A <0xc0, "ds_add_src2_u64">;
-defm DS_SUB_SRC2_U64 : DS_1A <0xc1, "ds_sub_src2_u64">;
-defm DS_RSUB_SRC2_U64 : DS_1A <0xc2, "ds_rsub_src2_u64">;
-defm DS_INC_SRC2_U64 : DS_1A <0xc3, "ds_inc_src2_u64">;
-defm DS_DEC_SRC2_U64 : DS_1A <0xc4, "ds_dec_src2_u64">;
-defm DS_MIN_SRC2_I64 : DS_1A <0xc5, "ds_min_src2_i64">;
-defm DS_MAX_SRC2_I64 : DS_1A <0xc6, "ds_max_src2_i64">;
-defm DS_MIN_SRC2_U64 : DS_1A <0xc7, "ds_min_src2_u64">;
-defm DS_MAX_SRC2_U64 : DS_1A <0xc8, "ds_max_src2_u64">;
-defm DS_AND_SRC2_B64 : DS_1A <0xc9, "ds_and_src2_b64">;
-defm DS_OR_SRC2_B64 : DS_1A <0xca, "ds_or_src2_b64">;
-defm DS_XOR_SRC2_B64 : DS_1A <0xcb, "ds_xor_src2_b64">;
-defm DS_WRITE_SRC2_B64 : DS_1A_Off8_NORET <0xcd, "ds_write_src2_b64">;
-
-defm DS_MIN_SRC2_F64 : DS_1A <0xd2, "ds_min_src2_f64">;
-defm DS_MAX_SRC2_F64 : DS_1A <0xd3, "ds_max_src2_f64">;
-
-//===----------------------------------------------------------------------===//
-// MUBUF Instructions
-//===----------------------------------------------------------------------===//
-
-defm BUFFER_LOAD_FORMAT_X : MUBUF_Load_Helper <
-  mubuf<0x00>, "buffer_load_format_x", VGPR_32
->;
-defm BUFFER_LOAD_FORMAT_XY : MUBUF_Load_Helper <
-  mubuf<0x01>, "buffer_load_format_xy", VReg_64
->;
-defm BUFFER_LOAD_FORMAT_XYZ : MUBUF_Load_Helper <
-  mubuf<0x02>, "buffer_load_format_xyz", VReg_96
->;
-defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <
-  mubuf<0x03>, "buffer_load_format_xyzw", VReg_128
->;
-defm BUFFER_STORE_FORMAT_X : MUBUF_Store_Helper <
-  mubuf<0x04>, "buffer_store_format_x", VGPR_32
->;
-defm BUFFER_STORE_FORMAT_XY : MUBUF_Store_Helper <
-  mubuf<0x05>, "buffer_store_format_xy", VReg_64
->;
-defm BUFFER_STORE_FORMAT_XYZ : MUBUF_Store_Helper <
-  mubuf<0x06>, "buffer_store_format_xyz", VReg_96
->;
-defm BUFFER_STORE_FORMAT_XYZW : MUBUF_Store_Helper <
-  mubuf<0x07>, "buffer_store_format_xyzw", VReg_128
->;
-defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <
-  mubuf<0x08, 0x10>, "buffer_load_ubyte", VGPR_32, i32, mubuf_az_extloadi8
->;
-defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <
-  mubuf<0x09, 0x11>, "buffer_load_sbyte", VGPR_32, i32, mubuf_sextloadi8
->;
-defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <
-  mubuf<0x0a, 0x12>, "buffer_load_ushort", VGPR_32, i32, mubuf_az_extloadi16
->;
-defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <
-  mubuf<0x0b, 0x13>, "buffer_load_sshort", VGPR_32, i32, mubuf_sextloadi16
->;
-defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <
-  mubuf<0x0c, 0x14>, "buffer_load_dword", VGPR_32, i32, mubuf_load
->;
-defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <
-  mubuf<0x0d, 0x15>, "buffer_load_dwordx2", VReg_64, v2i32, mubuf_load
->;
-defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <
-  mubuf<0x0e, 0x17>, "buffer_load_dwordx4", VReg_128, v4i32, mubuf_load
->;
-
-defm BUFFER_STORE_BYTE : MUBUF_Store_Helper <
-  mubuf<0x18>, "buffer_store_byte", VGPR_32, i32, truncstorei8_global
->;
-
-defm BUFFER_STORE_SHORT : MUBUF_Store_Helper <
-  mubuf<0x1a>, "buffer_store_short", VGPR_32, i32, truncstorei16_global
->;
-
-defm BUFFER_STORE_DWORD : MUBUF_Store_Helper <
-  mubuf<0x1c>, "buffer_store_dword", VGPR_32, i32, global_store
->;
-
-defm BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper <
-  mubuf<0x1d>, "buffer_store_dwordx2", VReg_64, v2i32, global_store
->;
-
-defm BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper <
-  mubuf<0x1e, 0x1f>, "buffer_store_dwordx4", VReg_128, v4i32, global_store
->;
-
-defm BUFFER_ATOMIC_SWAP : MUBUF_Atomic <
-  mubuf<0x30, 0x40>, "buffer_atomic_swap", VGPR_32, i32, atomic_swap_global
->;
-defm BUFFER_ATOMIC_CMPSWAP : MUBUF_Atomic <
-  mubuf<0x31, 0x41>, "buffer_atomic_cmpswap", VReg_64, v2i32, null_frag
->;
-defm BUFFER_ATOMIC_ADD : MUBUF_Atomic <
-  mubuf<0x32, 0x42>, "buffer_atomic_add", VGPR_32, i32, atomic_add_global
->;
-defm BUFFER_ATOMIC_SUB : MUBUF_Atomic <
-  mubuf<0x33, 0x43>, "buffer_atomic_sub", VGPR_32, i32, atomic_sub_global
->;
-//def BUFFER_ATOMIC_RSUB : MUBUF_ <mubuf<0x34>, "buffer_atomic_rsub", []>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN : MUBUF_Atomic <
-  mubuf<0x35, 0x44>, "buffer_atomic_smin", VGPR_32, i32, atomic_min_global
->;
-defm BUFFER_ATOMIC_UMIN : MUBUF_Atomic <
-  mubuf<0x36, 0x45>, "buffer_atomic_umin", VGPR_32, i32, atomic_umin_global
->;
-defm BUFFER_ATOMIC_SMAX : MUBUF_Atomic <
-  mubuf<0x37, 0x46>, "buffer_atomic_smax", VGPR_32, i32, atomic_max_global
->;
-defm BUFFER_ATOMIC_UMAX : MUBUF_Atomic <
-  mubuf<0x38, 0x47>, "buffer_atomic_umax", VGPR_32, i32, atomic_umax_global
->;
-defm BUFFER_ATOMIC_AND : MUBUF_Atomic <
-  mubuf<0x39, 0x48>, "buffer_atomic_and", VGPR_32, i32, atomic_and_global
->;
-defm BUFFER_ATOMIC_OR : MUBUF_Atomic <
-  mubuf<0x3a, 0x49>, "buffer_atomic_or", VGPR_32, i32, atomic_or_global
->;
-defm BUFFER_ATOMIC_XOR : MUBUF_Atomic <
-  mubuf<0x3b, 0x4a>, "buffer_atomic_xor", VGPR_32, i32, atomic_xor_global
->;
-defm BUFFER_ATOMIC_INC : MUBUF_Atomic <
-  mubuf<0x3c, 0x4b>, "buffer_atomic_inc", VGPR_32, i32, atomic_inc_global
->;
-defm BUFFER_ATOMIC_DEC : MUBUF_Atomic <
-  mubuf<0x3d, 0x4c>, "buffer_atomic_dec", VGPR_32, i32, atomic_dec_global
->;
-
-//def BUFFER_ATOMIC_FCMPSWAP : MUBUF_Atomic <mubuf<0x3e>, "buffer_atomic_fcmpswap", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMIN : MUBUF_Atomic <mubuf<0x3f>, "buffer_atomic_fmin", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMAX : MUBUF_Atomic <mubuf<0x40>, "buffer_atomic_fmax", []>; // isn't on VI
-defm BUFFER_ATOMIC_SWAP_X2 : MUBUF_Atomic <
-  mubuf<0x50, 0x60>, "buffer_atomic_swap_x2", VReg_64, i64, atomic_swap_global
->;
-defm BUFFER_ATOMIC_CMPSWAP_X2 : MUBUF_Atomic <
-  mubuf<0x51, 0x61>, "buffer_atomic_cmpswap_x2", VReg_128, v2i64, null_frag
->;
-defm BUFFER_ATOMIC_ADD_X2 : MUBUF_Atomic <
-  mubuf<0x52, 0x62>, "buffer_atomic_add_x2", VReg_64, i64, atomic_add_global
->;
-defm BUFFER_ATOMIC_SUB_X2 : MUBUF_Atomic <
-  mubuf<0x53, 0x63>, "buffer_atomic_sub_x2", VReg_64, i64, atomic_sub_global
->;
-//defm BUFFER_ATOMIC_RSUB_X2 : MUBUF_Atomic <mubuf<0x54>, "buffer_atomic_rsub_x2", []>; // isn't on CI & VI
-defm BUFFER_ATOMIC_SMIN_X2 : MUBUF_Atomic <
-  mubuf<0x55, 0x64>, "buffer_atomic_smin_x2", VReg_64, i64, atomic_min_global
->;
-defm BUFFER_ATOMIC_UMIN_X2 : MUBUF_Atomic <
-  mubuf<0x56, 0x65>, "buffer_atomic_umin_x2", VReg_64, i64, atomic_umin_global
->;
-defm BUFFER_ATOMIC_SMAX_X2 : MUBUF_Atomic <
-  mubuf<0x57, 0x66>, "buffer_atomic_smax_x2", VReg_64, i64, atomic_max_global
->;
-defm BUFFER_ATOMIC_UMAX_X2 : MUBUF_Atomic <
-  mubuf<0x58, 0x67>, "buffer_atomic_umax_x2", VReg_64, i64, atomic_umax_global
->;
-defm BUFFER_ATOMIC_AND_X2 : MUBUF_Atomic <
-  mubuf<0x59, 0x68>, "buffer_atomic_and_x2", VReg_64, i64, atomic_and_global
->;
-defm BUFFER_ATOMIC_OR_X2 : MUBUF_Atomic <
-  mubuf<0x5a, 0x69>, "buffer_atomic_or_x2", VReg_64, i64, atomic_or_global
->;
-defm BUFFER_ATOMIC_XOR_X2 : MUBUF_Atomic <
-  mubuf<0x5b, 0x6a>, "buffer_atomic_xor_x2", VReg_64, i64, atomic_xor_global
->;
-defm BUFFER_ATOMIC_INC_X2 : MUBUF_Atomic <
-  mubuf<0x5c, 0x6b>, "buffer_atomic_inc_x2", VReg_64, i64, atomic_inc_global
->;
-defm BUFFER_ATOMIC_DEC_X2 : MUBUF_Atomic <
-  mubuf<0x5d, 0x6c>, "buffer_atomic_dec_x2", VReg_64, i64, atomic_dec_global
->;
-//def BUFFER_ATOMIC_FCMPSWAP_X2 : MUBUF_X2 <mubuf<0x5e>, "buffer_atomic_fcmpswap_x2", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMIN_X2 : MUBUF_X2 <mubuf<0x5f>, "buffer_atomic_fmin_x2", []>; // isn't on VI
-//def BUFFER_ATOMIC_FMAX_X2 : MUBUF_X2 <mubuf<0x60>, "buffer_atomic_fmax_x2", []>; // isn't on VI
-
-let SubtargetPredicate = isSI, DisableVIDecoder = 1 in {
-defm BUFFER_WBINVL1_SC : MUBUF_Invalidate <mubuf<0x70>, "buffer_wbinvl1_sc", int_amdgcn_buffer_wbinvl1_sc>; // isn't on CI & VI
-}
-
-defm BUFFER_WBINVL1 : MUBUF_Invalidate <mubuf<0x71, 0x3e>, "buffer_wbinvl1", int_amdgcn_buffer_wbinvl1>;
-
-//===----------------------------------------------------------------------===//
-// MTBUF Instructions
-//===----------------------------------------------------------------------===//
-
-//def TBUFFER_LOAD_FORMAT_X : MTBUF_ <0x00000000, "tbuffer_load_format_x", []>;
-//def TBUFFER_LOAD_FORMAT_XY : MTBUF_ <0x00000001, "tbuffer_load_format_xy", []>;
-//def TBUFFER_LOAD_FORMAT_XYZ : MTBUF_ <0x00000002, "tbuffer_load_format_xyz", []>;
-defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Load_Helper <0x00000003, "tbuffer_load_format_xyzw", VReg_128>;
-defm TBUFFER_STORE_FORMAT_X : MTBUF_Store_Helper <0x00000004, "tbuffer_store_format_x", VGPR_32>;
-defm TBUFFER_STORE_FORMAT_XY : MTBUF_Store_Helper <0x00000005, "tbuffer_store_format_xy", VReg_64>;
-defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Store_Helper <0x00000006, "tbuffer_store_format_xyz", VReg_128>;
-defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Store_Helper <0x00000007, "tbuffer_store_format_xyzw", VReg_128>;
-
-//===----------------------------------------------------------------------===//
-// MIMG Instructions
-//===----------------------------------------------------------------------===//
-
-defm IMAGE_LOAD : MIMG_NoSampler <0x00000000, "image_load">;
-defm IMAGE_LOAD_MIP : MIMG_NoSampler <0x00000001, "image_load_mip">;
-//def IMAGE_LOAD_PCK : MIMG_NoPattern_ <"image_load_pck", 0x00000002>;
-//def IMAGE_LOAD_PCK_SGN : MIMG_NoPattern_ <"image_load_pck_sgn", 0x00000003>;
-//def IMAGE_LOAD_MIP_PCK : MIMG_NoPattern_ <"image_load_mip_pck", 0x00000004>;
-//def IMAGE_LOAD_MIP_PCK_SGN : MIMG_NoPattern_ <"image_load_mip_pck_sgn", 0x00000005>;
-defm IMAGE_STORE : MIMG_Store <0x00000008, "image_store">;
-defm IMAGE_STORE_MIP : MIMG_Store <0x00000009, "image_store_mip">;
-//def IMAGE_STORE_PCK : MIMG_NoPattern_ <"image_store_pck", 0x0000000a>;
-//def IMAGE_STORE_MIP_PCK : MIMG_NoPattern_ <"image_store_mip_pck", 0x0000000b>;
-defm IMAGE_GET_RESINFO : MIMG_NoSampler <0x0000000e, "image_get_resinfo">;
-defm IMAGE_ATOMIC_SWAP : MIMG_Atomic <mimg<0x0f, 0x10>, "image_atomic_swap">;
-defm IMAGE_ATOMIC_CMPSWAP : MIMG_Atomic <mimg<0x10, 0x11>, "image_atomic_cmpswap", VReg_64>;
-defm IMAGE_ATOMIC_ADD : MIMG_Atomic <mimg<0x11, 0x12>, "image_atomic_add">;
-defm IMAGE_ATOMIC_SUB : MIMG_Atomic <mimg<0x12, 0x13>, "image_atomic_sub">;
-//def IMAGE_ATOMIC_RSUB : MIMG_NoPattern_ <"image_atomic_rsub", 0x00000013>; -- not on VI
-defm IMAGE_ATOMIC_SMIN : MIMG_Atomic <mimg<0x14>, "image_atomic_smin">;
-defm IMAGE_ATOMIC_UMIN : MIMG_Atomic <mimg<0x15>, "image_atomic_umin">;
-defm IMAGE_ATOMIC_SMAX : MIMG_Atomic <mimg<0x16>, "image_atomic_smax">;
-defm IMAGE_ATOMIC_UMAX : MIMG_Atomic <mimg<0x17>, "image_atomic_umax">;
-defm IMAGE_ATOMIC_AND : MIMG_Atomic <mimg<0x18>, "image_atomic_and">;
-defm IMAGE_ATOMIC_OR : MIMG_Atomic <mimg<0x19>, "image_atomic_or">;
-defm IMAGE_ATOMIC_XOR : MIMG_Atomic <mimg<0x1a>, "image_atomic_xor">;
-defm IMAGE_ATOMIC_INC : MIMG_Atomic <mimg<0x1b>, "image_atomic_inc">;
-defm IMAGE_ATOMIC_DEC : MIMG_Atomic <mimg<0x1c>, "image_atomic_dec">;
-//def IMAGE_ATOMIC_FCMPSWAP : MIMG_NoPattern_ <"image_atomic_fcmpswap", 0x0000001d>; -- not on VI
-//def IMAGE_ATOMIC_FMIN : MIMG_NoPattern_ <"image_atomic_fmin", 0x0000001e>; -- not on VI
-//def IMAGE_ATOMIC_FMAX : MIMG_NoPattern_ <"image_atomic_fmax", 0x0000001f>; -- not on VI
-defm IMAGE_SAMPLE           : MIMG_Sampler_WQM <0x00000020, "image_sample">;
-defm IMAGE_SAMPLE_CL        : MIMG_Sampler_WQM <0x00000021, "image_sample_cl">;
-defm IMAGE_SAMPLE_D         : MIMG_Sampler <0x00000022, "image_sample_d">;
-defm IMAGE_SAMPLE_D_CL      : MIMG_Sampler <0x00000023, "image_sample_d_cl">;
-defm IMAGE_SAMPLE_L         : MIMG_Sampler <0x00000024, "image_sample_l">;
-defm IMAGE_SAMPLE_B         : MIMG_Sampler_WQM <0x00000025, "image_sample_b">;
-defm IMAGE_SAMPLE_B_CL      : MIMG_Sampler_WQM <0x00000026, "image_sample_b_cl">;
-defm IMAGE_SAMPLE_LZ        : MIMG_Sampler <0x00000027, "image_sample_lz">;
-defm IMAGE_SAMPLE_C         : MIMG_Sampler_WQM <0x00000028, "image_sample_c">;
-defm IMAGE_SAMPLE_C_CL      : MIMG_Sampler_WQM <0x00000029, "image_sample_c_cl">;
-defm IMAGE_SAMPLE_C_D       : MIMG_Sampler <0x0000002a, "image_sample_c_d">;
-defm IMAGE_SAMPLE_C_D_CL    : MIMG_Sampler <0x0000002b, "image_sample_c_d_cl">;
-defm IMAGE_SAMPLE_C_L       : MIMG_Sampler <0x0000002c, "image_sample_c_l">;
-defm IMAGE_SAMPLE_C_B       : MIMG_Sampler_WQM <0x0000002d, "image_sample_c_b">;
-defm IMAGE_SAMPLE_C_B_CL    : MIMG_Sampler_WQM <0x0000002e, "image_sample_c_b_cl">;
-defm IMAGE_SAMPLE_C_LZ      : MIMG_Sampler <0x0000002f, "image_sample_c_lz">;
-defm IMAGE_SAMPLE_O         : MIMG_Sampler_WQM <0x00000030, "image_sample_o">;
-defm IMAGE_SAMPLE_CL_O      : MIMG_Sampler_WQM <0x00000031, "image_sample_cl_o">;
-defm IMAGE_SAMPLE_D_O       : MIMG_Sampler <0x00000032, "image_sample_d_o">;
-defm IMAGE_SAMPLE_D_CL_O    : MIMG_Sampler <0x00000033, "image_sample_d_cl_o">;
-defm IMAGE_SAMPLE_L_O       : MIMG_Sampler <0x00000034, "image_sample_l_o">;
-defm IMAGE_SAMPLE_B_O       : MIMG_Sampler_WQM <0x00000035, "image_sample_b_o">;
-defm IMAGE_SAMPLE_B_CL_O    : MIMG_Sampler_WQM <0x00000036, "image_sample_b_cl_o">;
-defm IMAGE_SAMPLE_LZ_O      : MIMG_Sampler <0x00000037, "image_sample_lz_o">;
-defm IMAGE_SAMPLE_C_O       : MIMG_Sampler_WQM <0x00000038, "image_sample_c_o">;
-defm IMAGE_SAMPLE_C_CL_O    : MIMG_Sampler_WQM <0x00000039, "image_sample_c_cl_o">;
-defm IMAGE_SAMPLE_C_D_O     : MIMG_Sampler <0x0000003a, "image_sample_c_d_o">;
-defm IMAGE_SAMPLE_C_D_CL_O  : MIMG_Sampler <0x0000003b, "image_sample_c_d_cl_o">;
-defm IMAGE_SAMPLE_C_L_O     : MIMG_Sampler <0x0000003c, "image_sample_c_l_o">;
-defm IMAGE_SAMPLE_C_B_O     : MIMG_Sampler_WQM <0x0000003d, "image_sample_c_b_o">;
-defm IMAGE_SAMPLE_C_B_CL_O  : MIMG_Sampler_WQM <0x0000003e, "image_sample_c_b_cl_o">;
-defm IMAGE_SAMPLE_C_LZ_O    : MIMG_Sampler <0x0000003f, "image_sample_c_lz_o">;
-defm IMAGE_GATHER4          : MIMG_Gather_WQM <0x00000040, "image_gather4">;
-defm IMAGE_GATHER4_CL       : MIMG_Gather_WQM <0x00000041, "image_gather4_cl">;
-defm IMAGE_GATHER4_L        : MIMG_Gather <0x00000044, "image_gather4_l">;
-defm IMAGE_GATHER4_B        : MIMG_Gather_WQM <0x00000045, "image_gather4_b">;
-defm IMAGE_GATHER4_B_CL     : MIMG_Gather_WQM <0x00000046, "image_gather4_b_cl">;
-defm IMAGE_GATHER4_LZ       : MIMG_Gather <0x00000047, "image_gather4_lz">;
-defm IMAGE_GATHER4_C        : MIMG_Gather_WQM <0x00000048, "image_gather4_c">;
-defm IMAGE_GATHER4_C_CL     : MIMG_Gather_WQM <0x00000049, "image_gather4_c_cl">;
-defm IMAGE_GATHER4_C_L      : MIMG_Gather <0x0000004c, "image_gather4_c_l">;
-defm IMAGE_GATHER4_C_B      : MIMG_Gather_WQM <0x0000004d, "image_gather4_c_b">;
-defm IMAGE_GATHER4_C_B_CL   : MIMG_Gather_WQM <0x0000004e, "image_gather4_c_b_cl">;
-defm IMAGE_GATHER4_C_LZ     : MIMG_Gather <0x0000004f, "image_gather4_c_lz">;
-defm IMAGE_GATHER4_O        : MIMG_Gather_WQM <0x00000050, "image_gather4_o">;
-defm IMAGE_GATHER4_CL_O     : MIMG_Gather_WQM <0x00000051, "image_gather4_cl_o">;
-defm IMAGE_GATHER4_L_O      : MIMG_Gather <0x00000054, "image_gather4_l_o">;
-defm IMAGE_GATHER4_B_O      : MIMG_Gather_WQM <0x00000055, "image_gather4_b_o">;
-defm IMAGE_GATHER4_B_CL_O   : MIMG_Gather <0x00000056, "image_gather4_b_cl_o">;
-defm IMAGE_GATHER4_LZ_O     : MIMG_Gather <0x00000057, "image_gather4_lz_o">;
-defm IMAGE_GATHER4_C_O      : MIMG_Gather_WQM <0x00000058, "image_gather4_c_o">;
-defm IMAGE_GATHER4_C_CL_O   : MIMG_Gather_WQM <0x00000059, "image_gather4_c_cl_o">;
-defm IMAGE_GATHER4_C_L_O    : MIMG_Gather <0x0000005c, "image_gather4_c_l_o">;
-defm IMAGE_GATHER4_C_B_O    : MIMG_Gather_WQM <0x0000005d, "image_gather4_c_b_o">;
-defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather_WQM <0x0000005e, "image_gather4_c_b_cl_o">;
-defm IMAGE_GATHER4_C_LZ_O   : MIMG_Gather <0x0000005f, "image_gather4_c_lz_o">;
-defm IMAGE_GET_LOD          : MIMG_Sampler_WQM <0x00000060, "image_get_lod">;
-defm IMAGE_SAMPLE_CD        : MIMG_Sampler <0x00000068, "image_sample_cd">;
-defm IMAGE_SAMPLE_CD_CL     : MIMG_Sampler <0x00000069, "image_sample_cd_cl">;
-defm IMAGE_SAMPLE_C_CD      : MIMG_Sampler <0x0000006a, "image_sample_c_cd">;
-defm IMAGE_SAMPLE_C_CD_CL   : MIMG_Sampler <0x0000006b, "image_sample_c_cd_cl">;
-defm IMAGE_SAMPLE_CD_O      : MIMG_Sampler <0x0000006c, "image_sample_cd_o">;
-defm IMAGE_SAMPLE_CD_CL_O   : MIMG_Sampler <0x0000006d, "image_sample_cd_cl_o">;
-defm IMAGE_SAMPLE_C_CD_O    : MIMG_Sampler <0x0000006e, "image_sample_c_cd_o">;
-defm IMAGE_SAMPLE_C_CD_CL_O : MIMG_Sampler <0x0000006f, "image_sample_c_cd_cl_o">;
-//def IMAGE_RSRC256 : MIMG_NoPattern_RSRC256 <"image_rsrc256", 0x0000007e>;
-//def IMAGE_SAMPLER : MIMG_NoPattern_ <"image_sampler", 0x0000007f>;
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
-defm V_NOP : VOP1Inst <vop1<0x0>, "v_nop", VOP_NONE>;
-}
-
-let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-defm V_MOV_B32 : VOP1Inst <vop1<0x1>, "v_mov_b32", VOP_I32_I32>;
-} // End isMoveImm = 1
-
-let Uses = [EXEC] in {
-
-// FIXME: Specify SchedRW for READFIRSTLANE_B32
-
-def V_READFIRSTLANE_B32 : VOP1 <
-  0x00000002,
-  (outs SReg_32:$vdst),
-  (ins VS_32:$src0),
-  "v_readfirstlane_b32 $vdst, $src0",
-  []
-> {
-  let isConvergent = 1;
-}
-
-}
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_CVT_I32_F64 : VOP1Inst <vop1<0x3>, "v_cvt_i32_f64",
-  VOP_I32_F64, fp_to_sint
->;
-defm V_CVT_F64_I32 : VOP1Inst <vop1<0x4>, "v_cvt_f64_i32",
-  VOP_F64_I32, sint_to_fp
->;
-defm V_CVT_F32_I32 : VOP1Inst <vop1<0x5>, "v_cvt_f32_i32",
-  VOP_F32_I32, sint_to_fp
->;
-defm V_CVT_F32_U32 : VOP1Inst <vop1<0x6>, "v_cvt_f32_u32",
-  VOP_F32_I32, uint_to_fp
->;
-defm V_CVT_U32_F32 : VOP1Inst <vop1<0x7>, "v_cvt_u32_f32",
-  VOP_I32_F32, fp_to_uint
->;
-defm V_CVT_I32_F32 : VOP1Inst <vop1<0x8>, "v_cvt_i32_f32",
-  VOP_I32_F32, fp_to_sint
->;
-defm V_CVT_F16_F32 : VOP1Inst <vop1<0xa>, "v_cvt_f16_f32",
-  VOP_I32_F32, fp_to_f16
->;
-defm V_CVT_F32_F16 : VOP1Inst <vop1<0xb>, "v_cvt_f32_f16",
-  VOP_F32_I32, f16_to_fp
->;
-defm V_CVT_RPI_I32_F32 : VOP1Inst <vop1<0xc>, "v_cvt_rpi_i32_f32",
-  VOP_I32_F32, cvt_rpi_i32_f32>;
-defm V_CVT_FLR_I32_F32 : VOP1Inst <vop1<0xd>, "v_cvt_flr_i32_f32",
-  VOP_I32_F32, cvt_flr_i32_f32>;
-defm V_CVT_OFF_F32_I4 : VOP1Inst  <vop1<0x0e>, "v_cvt_off_f32_i4", VOP_F32_I32>;
-defm V_CVT_F32_F64 : VOP1Inst <vop1<0xf>, "v_cvt_f32_f64",
-  VOP_F32_F64, fround
->;
-defm V_CVT_F64_F32 : VOP1Inst <vop1<0x10>, "v_cvt_f64_f32",
-  VOP_F64_F32, fextend
->;
-defm V_CVT_F32_UBYTE0 : VOP1Inst <vop1<0x11>, "v_cvt_f32_ubyte0",
-  VOP_F32_I32, AMDGPUcvt_f32_ubyte0
->;
-defm V_CVT_F32_UBYTE1 : VOP1Inst <vop1<0x12>, "v_cvt_f32_ubyte1",
-  VOP_F32_I32, AMDGPUcvt_f32_ubyte1
->;
-defm V_CVT_F32_UBYTE2 : VOP1Inst <vop1<0x13>, "v_cvt_f32_ubyte2",
-  VOP_F32_I32, AMDGPUcvt_f32_ubyte2
->;
-defm V_CVT_F32_UBYTE3 : VOP1Inst <vop1<0x14>, "v_cvt_f32_ubyte3",
-  VOP_F32_I32, AMDGPUcvt_f32_ubyte3
->;
-defm V_CVT_U32_F64 : VOP1Inst <vop1<0x15>, "v_cvt_u32_f64",
-  VOP_I32_F64, fp_to_uint
->;
-defm V_CVT_F64_U32 : VOP1Inst <vop1<0x16>, "v_cvt_f64_u32",
-  VOP_F64_I32, uint_to_fp
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-defm V_FRACT_F32 : VOP1Inst <vop1<0x20, 0x1b>, "v_fract_f32",
-  VOP_F32_F32, AMDGPUfract
->;
-defm V_TRUNC_F32 : VOP1Inst <vop1<0x21, 0x1c>, "v_trunc_f32",
-  VOP_F32_F32, ftrunc
->;
-defm V_CEIL_F32 : VOP1Inst <vop1<0x22, 0x1d>, "v_ceil_f32",
-  VOP_F32_F32, fceil
->;
-defm V_RNDNE_F32 : VOP1Inst <vop1<0x23, 0x1e>, "v_rndne_f32",
-  VOP_F32_F32, frint
->;
-defm V_FLOOR_F32 : VOP1Inst <vop1<0x24, 0x1f>, "v_floor_f32",
-  VOP_F32_F32, ffloor
->;
-defm V_EXP_F32 : VOP1Inst <vop1<0x25, 0x20>, "v_exp_f32",
-  VOP_F32_F32, fexp2
->;
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_LOG_F32 : VOP1Inst <vop1<0x27, 0x21>, "v_log_f32",
-  VOP_F32_F32, flog2
->;
-defm V_RCP_F32 : VOP1Inst <vop1<0x2a, 0x22>, "v_rcp_f32",
-  VOP_F32_F32, AMDGPUrcp
->;
-defm V_RCP_IFLAG_F32 : VOP1Inst <vop1<0x2b, 0x23>, "v_rcp_iflag_f32",
-  VOP_F32_F32
->;
-defm V_RSQ_F32 : VOP1Inst <vop1<0x2e, 0x24>, "v_rsq_f32",
-  VOP_F32_F32, AMDGPUrsq
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-
-defm V_RCP_F64 : VOP1Inst <vop1<0x2f, 0x25>, "v_rcp_f64",
-  VOP_F64_F64, AMDGPUrcp
->;
-defm V_RSQ_F64 : VOP1Inst <vop1<0x31, 0x26>, "v_rsq_f64",
-  VOP_F64_F64, AMDGPUrsq
->;
-
-} // End SchedRW = [WriteDouble];
-
-defm V_SQRT_F32 : VOP1Inst <vop1<0x33, 0x27>, "v_sqrt_f32",
-  VOP_F32_F32, fsqrt
->;
-
-let SchedRW = [WriteDouble] in {
-
-defm V_SQRT_F64 : VOP1Inst <vop1<0x34, 0x28>, "v_sqrt_f64",
-  VOP_F64_F64, fsqrt
->;
-
-} // End SchedRW = [WriteDouble]
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_SIN_F32 : VOP1Inst <vop1<0x35, 0x29>, "v_sin_f32",
-  VOP_F32_F32, AMDGPUsin
->;
-defm V_COS_F32 : VOP1Inst <vop1<0x36, 0x2a>, "v_cos_f32",
-  VOP_F32_F32, AMDGPUcos
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-defm V_NOT_B32 : VOP1Inst <vop1<0x37, 0x2b>, "v_not_b32", VOP_I32_I32>;
-defm V_BFREV_B32 : VOP1Inst <vop1<0x38, 0x2c>, "v_bfrev_b32", VOP_I32_I32>;
-defm V_FFBH_U32 : VOP1Inst <vop1<0x39, 0x2d>, "v_ffbh_u32", VOP_I32_I32>;
-defm V_FFBL_B32 : VOP1Inst <vop1<0x3a, 0x2e>, "v_ffbl_b32", VOP_I32_I32>;
-defm V_FFBH_I32 : VOP1Inst <vop1<0x3b, 0x2f>, "v_ffbh_i32", VOP_I32_I32>;
-defm V_FREXP_EXP_I32_F64 : VOP1Inst <vop1<0x3c,0x30>, "v_frexp_exp_i32_f64",
-  VOP_I32_F64, int_amdgcn_frexp_exp
->;
-
-let SchedRW = [WriteDoubleAdd] in {
-defm V_FREXP_MANT_F64 : VOP1Inst <vop1<0x3d, 0x31>, "v_frexp_mant_f64",
-  VOP_F64_F64, int_amdgcn_frexp_mant
->;
-
-defm V_FRACT_F64 : VOP1Inst <vop1<0x3e, 0x32>, "v_fract_f64",
-  VOP_F64_F64, AMDGPUfract
->;
-} // End SchedRW = [WriteDoubleAdd]
-
-
-defm V_FREXP_EXP_I32_F32 : VOP1Inst <vop1<0x3f, 0x33>, "v_frexp_exp_i32_f32",
-  VOP_I32_F32, int_amdgcn_frexp_exp
->;
-defm V_FREXP_MANT_F32 : VOP1Inst <vop1<0x40, 0x34>, "v_frexp_mant_f32",
-  VOP_F32_F32, int_amdgcn_frexp_mant
->;
-let vdst = 0, src0 = 0, VOPAsmPrefer32Bit = 1 in {
-defm V_CLREXCP : VOP1Inst <vop1<0x41,0x35>, "v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
-}
-
-let Uses = [M0, EXEC] in {
-defm V_MOVRELD_B32 : VOP1Inst <vop1<0x42, 0x36>, "v_movreld_b32", VOP_NO_EXT<VOP_I32_I32>>;
-defm V_MOVRELS_B32 : VOP1Inst <vop1<0x43, 0x37>, "v_movrels_b32", VOP_NO_EXT<VOP_I32_I32>>;
-defm V_MOVRELSD_B32 : VOP1Inst <vop1<0x44, 0x38>, "v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
-} // End Uses = [M0, EXEC]
-
-// These instruction only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let SchedRW = [WriteQuarterRate32] in {
-
-defm V_MOV_FED_B32 : VOP1InstSI <vop1<0x9>, "v_mov_fed_b32", VOP_I32_I32>;
-defm V_LOG_CLAMP_F32 : VOP1InstSI <vop1<0x26>, "v_log_clamp_f32",
-  VOP_F32_F32, int_amdgcn_log_clamp>;
-defm V_RCP_CLAMP_F32 : VOP1InstSI <vop1<0x28>, "v_rcp_clamp_f32", VOP_F32_F32>;
-defm V_RCP_LEGACY_F32 : VOP1InstSI <vop1<0x29>, "v_rcp_legacy_f32", VOP_F32_F32>;
-defm V_RSQ_CLAMP_F32 : VOP1InstSI <vop1<0x2c>, "v_rsq_clamp_f32",
-  VOP_F32_F32, AMDGPUrsq_clamp
->;
-defm V_RSQ_LEGACY_F32 : VOP1InstSI <vop1<0x2d>, "v_rsq_legacy_f32",
-  VOP_F32_F32, AMDGPUrsq_legacy
->;
-
-} // End SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteDouble] in {
-
-defm V_RCP_CLAMP_F64 : VOP1InstSI <vop1<0x30>, "v_rcp_clamp_f64", VOP_F64_F64>;
-defm V_RSQ_CLAMP_F64 : VOP1InstSI <vop1<0x32>, "v_rsq_clamp_f64",
-  VOP_F64_F64, AMDGPUrsq_clamp
->;
-
-} // End SchedRW = [WriteDouble]
-
-} // End SubtargetPredicate = isSICI
+defm EXP : EXP_m<0, AMDGPUexport>;
+defm EXP_DONE : EXP_m<1, AMDGPUexport_done>;
 
 //===----------------------------------------------------------------------===//
 // VINTRP Instructions
@@ -1433,11 +50,11 @@ let Uses = [M0, EXEC] in {
 
 multiclass V_INTERP_P1_F32_m : VINTRP_m <
   0x00000000,
-  (outs VGPR_32:$dst),
-  (ins VGPR_32:$i, i32imm:$attr_chan, i32imm:$attr),
-  "v_interp_p1_f32 $dst, $i, $attr_chan, $attr, [m0]",
-  [(set f32:$dst, (AMDGPUinterp_p1 i32:$i, (i32 imm:$attr_chan),
-                                           (i32 imm:$attr)))]
+  (outs VGPR_32:$vdst),
+  (ins VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
+  "v_interp_p1_f32 $vdst, $vsrc, $attr$attrchan",
+  [(set f32:$vdst, (AMDGPUinterp_p1 f32:$vsrc, (i32 imm:$attrchan),
+                                               (i32 imm:$attr)))]
 >;
 
 let OtherPredicates = [has32BankLDS] in {
@@ -1446,459 +63,33 @@ defm V_INTERP_P1_F32 : V_INTERP_P1_F32_m;
 
 } // End OtherPredicates = [has32BankLDS]
 
-let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $dst", isAsmParserOnly=1 in {
+let OtherPredicates = [has16BankLDS], Constraints = "@earlyclobber $vdst", isAsmParserOnly=1 in {
 
 defm V_INTERP_P1_F32_16bank : V_INTERP_P1_F32_m;
 
-} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $dst", isAsmParserOnly=1
+} // End OtherPredicates = [has32BankLDS], Constraints = "@earlyclobber $vdst", isAsmParserOnly=1
 
-let DisableEncoding = "$src0", Constraints = "$src0 = $dst" in {
+let DisableEncoding = "$src0", Constraints = "$src0 = $vdst" in {
 
 defm V_INTERP_P2_F32 : VINTRP_m <
   0x00000001,
-  (outs VGPR_32:$dst),
-  (ins VGPR_32:$src0, VGPR_32:$j, i32imm:$attr_chan, i32imm:$attr),
-  "v_interp_p2_f32 $dst, [$src0], $j, $attr_chan, $attr, [m0]",
-  [(set f32:$dst, (AMDGPUinterp_p2 f32:$src0, i32:$j, (i32 imm:$attr_chan),
-                                                     (i32 imm:$attr)))]>;
+  (outs VGPR_32:$vdst),
+  (ins VGPR_32:$src0, VGPR_32:$vsrc, Attr:$attr, AttrChan:$attrchan),
+  "v_interp_p2_f32 $vdst, $vsrc, $attr$attrchan",
+  [(set f32:$vdst, (AMDGPUinterp_p2 f32:$src0, f32:$vsrc, (i32 imm:$attrchan),
+                                                          (i32 imm:$attr)))]>;
 
-} // End DisableEncoding = "$src0", Constraints = "$src0 = $dst"
+} // End DisableEncoding = "$src0", Constraints = "$src0 = $vdst"
 
 defm V_INTERP_MOV_F32 : VINTRP_m <
   0x00000002,
-  (outs VGPR_32:$dst),
-  (ins InterpSlot:$src0, i32imm:$attr_chan, i32imm:$attr),
-  "v_interp_mov_f32 $dst, $src0, $attr_chan, $attr, [m0]",
-  [(set f32:$dst, (AMDGPUinterp_mov (i32 imm:$src0), (i32 imm:$attr_chan),
-                                    (i32 imm:$attr)))]>;
-
-} // End Uses = [M0, EXEC]
-
-//===----------------------------------------------------------------------===//
-// VOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-defm V_CNDMASK_B32 : VOP2eInst <vop2<0x0, 0x0>, "v_cndmask_b32",
-  VOP2e_I32_I32_I32_I1
->;
-
-let isCommutable = 1 in {
-defm V_ADD_F32 : VOP2Inst <vop2<0x3, 0x1>, "v_add_f32",
-  VOP_F32_F32_F32, fadd
->;
-
-defm V_SUB_F32 : VOP2Inst <vop2<0x4, 0x2>, "v_sub_f32", VOP_F32_F32_F32, fsub>;
-defm V_SUBREV_F32 : VOP2Inst <vop2<0x5, 0x3>, "v_subrev_f32",
-  VOP_F32_F32_F32, null_frag, "v_sub_f32"
->;
-} // End isCommutable = 1
-
-let isCommutable = 1 in {
-
-defm V_MUL_LEGACY_F32 : VOP2Inst <vop2<0x7, 0x4>, "v_mul_legacy_f32",
-  VOP_F32_F32_F32
->;
-
-defm V_MUL_F32 : VOP2Inst <vop2<0x8, 0x5>, "v_mul_f32",
-  VOP_F32_F32_F32, fmul
->;
-
-defm V_MUL_I32_I24 : VOP2Inst <vop2<0x9, 0x6>, "v_mul_i32_i24",
-  VOP_I32_I32_I32, AMDGPUmul_i24
->;
-
-defm V_MUL_HI_I32_I24 : VOP2Inst <vop2<0xa,0x7>, "v_mul_hi_i32_i24",
-  VOP_I32_I32_I32
->;
-
-defm V_MUL_U32_U24 : VOP2Inst <vop2<0xb, 0x8>, "v_mul_u32_u24",
-  VOP_I32_I32_I32, AMDGPUmul_u24
->;
-
-defm V_MUL_HI_U32_U24 : VOP2Inst <vop2<0xc,0x9>, "v_mul_hi_u32_u24",
- VOP_I32_I32_I32
->;
-
-defm V_MIN_F32 : VOP2Inst <vop2<0xf, 0xa>, "v_min_f32", VOP_F32_F32_F32,
-  fminnum>;
-defm V_MAX_F32 : VOP2Inst <vop2<0x10, 0xb>, "v_max_f32", VOP_F32_F32_F32,
-  fmaxnum>;
-defm V_MIN_I32 : VOP2Inst <vop2<0x11, 0xc>, "v_min_i32", VOP_I32_I32_I32>;
-defm V_MAX_I32 : VOP2Inst <vop2<0x12, 0xd>, "v_max_i32", VOP_I32_I32_I32>;
-defm V_MIN_U32 : VOP2Inst <vop2<0x13, 0xe>, "v_min_u32", VOP_I32_I32_I32>;
-defm V_MAX_U32 : VOP2Inst <vop2<0x14, 0xf>, "v_max_u32", VOP_I32_I32_I32>;
-
-defm V_LSHRREV_B32 : VOP2Inst <
-  vop2<0x16, 0x10>, "v_lshrrev_b32", VOP_I32_I32_I32, null_frag,
-    "v_lshr_b32"
->;
-
-defm V_ASHRREV_I32 : VOP2Inst <
-  vop2<0x18, 0x11>, "v_ashrrev_i32", VOP_I32_I32_I32, null_frag,
-    "v_ashr_i32"
->;
-
-defm V_LSHLREV_B32 : VOP2Inst <
-  vop2<0x1a, 0x12>, "v_lshlrev_b32", VOP_I32_I32_I32, null_frag,
-    "v_lshl_b32"
->;
-
-defm V_AND_B32 : VOP2Inst <vop2<0x1b, 0x13>, "v_and_b32", VOP_I32_I32_I32>;
-defm V_OR_B32 : VOP2Inst <vop2<0x1c, 0x14>, "v_or_b32", VOP_I32_I32_I32>;
-defm V_XOR_B32 : VOP2Inst <vop2<0x1d, 0x15>, "v_xor_b32", VOP_I32_I32_I32>;
-
-let Constraints = "$vdst = $src2", DisableEncoding="$src2",
-    isConvertibleToThreeAddress = 1 in {
-defm V_MAC_F32 : VOP2Inst <vop2<0x1f, 0x16>, "v_mac_f32", VOP_MAC>;
-}
-} // End isCommutable = 1
-
-defm V_MADMK_F32 : VOP2MADK <vop2<0x20, 0x17>, "v_madmk_f32", VOP_MADMK>;
-
-let isCommutable = 1 in {
-defm V_MADAK_F32 : VOP2MADK <vop2<0x21, 0x18>, "v_madak_f32", VOP_MADAK>;
-} // End isCommutable = 1
-
-let isCommutable = 1 in {
-// No patterns so that the scalar instructions are always selected.
-// The scalar versions will be replaced with vector when needed later.
-
-// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
-// but the VI instructions behave the same as the SI versions.
-defm V_ADD_I32 : VOP2bInst <vop2<0x25, 0x19>, "v_add_i32",
-  VOP2b_I32_I1_I32_I32
->;
-defm V_SUB_I32 : VOP2bInst <vop2<0x26, 0x1a>, "v_sub_i32", VOP2b_I32_I1_I32_I32>;
-
-defm V_SUBREV_I32 : VOP2bInst <vop2<0x27, 0x1b>, "v_subrev_i32",
-  VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32"
->;
-
-defm V_ADDC_U32 : VOP2bInst <vop2<0x28, 0x1c>, "v_addc_u32",
-  VOP2b_I32_I1_I32_I32_I1
->;
-defm V_SUBB_U32 : VOP2bInst <vop2<0x29, 0x1d>, "v_subb_u32",
-  VOP2b_I32_I1_I32_I32_I1
->;
-defm V_SUBBREV_U32 : VOP2bInst <vop2<0x2a, 0x1e>, "v_subbrev_u32",
-  VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32"
->;
-
-} // End isCommutable = 1
-
-// These are special and do not read the exec mask.
-let isConvergent = 1, Uses = []<Register> in {
-
-defm V_READLANE_B32 : VOP2SI_3VI_m <
-  vop3 <0x001, 0x289>,
-  "v_readlane_b32",
-  (outs SReg_32:$vdst),
-  (ins VS_32:$src0, SCSrc_32:$src1),
-  "v_readlane_b32 $vdst, $src0, $src1"
->;
-
-defm V_WRITELANE_B32 : VOP2SI_3VI_m <
-  vop3 <0x002, 0x28a>,
-  "v_writelane_b32",
   (outs VGPR_32:$vdst),
-  (ins SReg_32:$src0, SCSrc_32:$src1),
-  "v_writelane_b32 $vdst, $src0, $src1"
->;
-
-} // End isConvergent = 1
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-let isCommutable = 1 in {
-defm V_MAC_LEGACY_F32 : VOP2InstSI <vop2<0x6>, "v_mac_legacy_f32",
-  VOP_F32_F32_F32
->;
-} // End isCommutable = 1
-
-defm V_MIN_LEGACY_F32 : VOP2InstSI <vop2<0xd>, "v_min_legacy_f32",
-  VOP_F32_F32_F32, AMDGPUfmin_legacy
->;
-defm V_MAX_LEGACY_F32 : VOP2InstSI <vop2<0xe>, "v_max_legacy_f32",
-  VOP_F32_F32_F32, AMDGPUfmax_legacy
->;
-
-let isCommutable = 1 in {
-defm V_LSHR_B32 : VOP2InstSI <vop2<0x15>, "v_lshr_b32", VOP_I32_I32_I32>;
-defm V_ASHR_I32 : VOP2InstSI <vop2<0x17>, "v_ashr_i32", VOP_I32_I32_I32>;
-defm V_LSHL_B32 : VOP2InstSI <vop2<0x19>, "v_lshl_b32", VOP_I32_I32_I32>;
-} // End isCommutable = 1
-} // End let SubtargetPredicate = SICI
-
-defm V_BFM_B32 : VOP2_VI3_Inst <vop23<0x1e, 0x293>, "v_bfm_b32",
-  VOP_I32_I32_I32
->;
-defm V_BCNT_U32_B32 : VOP2_VI3_Inst <vop23<0x22, 0x28b>, "v_bcnt_u32_b32",
-  VOP_I32_I32_I32
->;
-defm V_MBCNT_LO_U32_B32 : VOP2_VI3_Inst <vop23<0x23, 0x28c>, "v_mbcnt_lo_u32_b32",
-  VOP_I32_I32_I32, int_amdgcn_mbcnt_lo
->;
-defm V_MBCNT_HI_U32_B32 : VOP2_VI3_Inst <vop23<0x24, 0x28d>, "v_mbcnt_hi_u32_b32",
-  VOP_I32_I32_I32, int_amdgcn_mbcnt_hi
->;
-defm V_LDEXP_F32 : VOP2_VI3_Inst <vop23<0x2b, 0x288>, "v_ldexp_f32",
-  VOP_F32_F32_I32, AMDGPUldexp
->;
-
-defm V_CVT_PKACCUM_U8_F32 : VOP2_VI3_Inst <vop23<0x2c, 0x1f0>, "v_cvt_pkaccum_u8_f32",
-  VOP_I32_F32_I32>; // TODO: set "Uses = dst"
-
-defm V_CVT_PKNORM_I16_F32 : VOP2_VI3_Inst <vop23<0x2d, 0x294>, "v_cvt_pknorm_i16_f32",
-  VOP_I32_F32_F32
->;
-defm V_CVT_PKNORM_U16_F32 : VOP2_VI3_Inst <vop23<0x2e, 0x295>, "v_cvt_pknorm_u16_f32",
-  VOP_I32_F32_F32
->;
-defm V_CVT_PKRTZ_F16_F32 : VOP2_VI3_Inst <vop23<0x2f, 0x296>, "v_cvt_pkrtz_f16_f32",
-  VOP_I32_F32_F32, int_SI_packf16
->;
-defm V_CVT_PK_U16_U32 : VOP2_VI3_Inst <vop23<0x30, 0x297>, "v_cvt_pk_u16_u32",
-  VOP_I32_I32_I32
->;
-defm V_CVT_PK_I16_I32 : VOP2_VI3_Inst <vop23<0x31, 0x298>, "v_cvt_pk_i16_i32",
-  VOP_I32_I32_I32
->;
-
-//===----------------------------------------------------------------------===//
-// VOP3 Instructions
-//===----------------------------------------------------------------------===//
-
-let isCommutable = 1 in {
-defm V_MAD_LEGACY_F32 : VOP3Inst <vop3<0x140, 0x1c0>, "v_mad_legacy_f32",
-  VOP_F32_F32_F32_F32
->;
-
-defm V_MAD_F32 : VOP3Inst <vop3<0x141, 0x1c1>, "v_mad_f32",
-  VOP_F32_F32_F32_F32, fmad
->;
-
-defm V_MAD_I32_I24 : VOP3Inst <vop3<0x142, 0x1c2>, "v_mad_i32_i24",
-  VOP_I32_I32_I32_I32, AMDGPUmad_i24
->;
-defm V_MAD_U32_U24 : VOP3Inst <vop3<0x143, 0x1c3>, "v_mad_u32_u24",
-  VOP_I32_I32_I32_I32, AMDGPUmad_u24
->;
-} // End isCommutable = 1
-
-defm V_CUBEID_F32 : VOP3Inst <vop3<0x144, 0x1c4>, "v_cubeid_f32",
-  VOP_F32_F32_F32_F32, int_amdgcn_cubeid
->;
-defm V_CUBESC_F32 : VOP3Inst <vop3<0x145, 0x1c5>, "v_cubesc_f32",
-  VOP_F32_F32_F32_F32, int_amdgcn_cubesc
->;
-defm V_CUBETC_F32 : VOP3Inst <vop3<0x146, 0x1c6>, "v_cubetc_f32",
-  VOP_F32_F32_F32_F32, int_amdgcn_cubetc
->;
-defm V_CUBEMA_F32 : VOP3Inst <vop3<0x147, 0x1c7>, "v_cubema_f32",
-  VOP_F32_F32_F32_F32, int_amdgcn_cubema
->;
-
-defm V_BFE_U32 : VOP3Inst <vop3<0x148, 0x1c8>, "v_bfe_u32",
-  VOP_I32_I32_I32_I32, AMDGPUbfe_u32
->;
-defm V_BFE_I32 : VOP3Inst <vop3<0x149, 0x1c9>, "v_bfe_i32",
-  VOP_I32_I32_I32_I32, AMDGPUbfe_i32
->;
-
-defm V_BFI_B32 : VOP3Inst <vop3<0x14a, 0x1ca>, "v_bfi_b32",
-  VOP_I32_I32_I32_I32, AMDGPUbfi
->;
-
-let isCommutable = 1 in {
-defm V_FMA_F32 : VOP3Inst <vop3<0x14b, 0x1cb>, "v_fma_f32",
-  VOP_F32_F32_F32_F32, fma
->;
-defm V_FMA_F64 : VOP3Inst <vop3<0x14c, 0x1cc>, "v_fma_f64",
-  VOP_F64_F64_F64_F64, fma
->;
-
-defm V_LERP_U8 : VOP3Inst <vop3<0x14d, 0x1cd>, "v_lerp_u8",
-  VOP_I32_I32_I32_I32, int_amdgcn_lerp
->;
-} // End isCommutable = 1
-
-//def V_LERP_U8 : VOP3_U8 <0x0000014d, "v_lerp_u8", []>;
-defm V_ALIGNBIT_B32 : VOP3Inst <vop3<0x14e, 0x1ce>, "v_alignbit_b32",
-  VOP_I32_I32_I32_I32
->;
-defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f, 0x1cf>, "v_alignbyte_b32",
-  VOP_I32_I32_I32_I32
->;
-
-defm V_MIN3_F32 : VOP3Inst <vop3<0x151, 0x1d0>, "v_min3_f32",
-  VOP_F32_F32_F32_F32, AMDGPUfmin3>;
+  (ins InterpSlot:$vsrc, Attr:$attr, AttrChan:$attrchan),
+  "v_interp_mov_f32 $vdst, $vsrc, $attr$attrchan",
+  [(set f32:$vdst, (AMDGPUinterp_mov (i32 imm:$vsrc), (i32 imm:$attrchan),
+                                     (i32 imm:$attr)))]>;
 
-defm V_MIN3_I32 : VOP3Inst <vop3<0x152, 0x1d1>, "v_min3_i32",
-  VOP_I32_I32_I32_I32, AMDGPUsmin3
->;
-defm V_MIN3_U32 : VOP3Inst <vop3<0x153, 0x1d2>, "v_min3_u32",
-  VOP_I32_I32_I32_I32, AMDGPUumin3
->;
-defm V_MAX3_F32 : VOP3Inst <vop3<0x154, 0x1d3>, "v_max3_f32",
-  VOP_F32_F32_F32_F32, AMDGPUfmax3
->;
-defm V_MAX3_I32 : VOP3Inst <vop3<0x155, 0x1d4>, "v_max3_i32",
-  VOP_I32_I32_I32_I32, AMDGPUsmax3
->;
-defm V_MAX3_U32 : VOP3Inst <vop3<0x156, 0x1d5>, "v_max3_u32",
-  VOP_I32_I32_I32_I32, AMDGPUumax3
->;
-defm V_MED3_F32 : VOP3Inst <vop3<0x157, 0x1d6>, "v_med3_f32",
-  VOP_F32_F32_F32_F32, AMDGPUfmed3
->;
-defm V_MED3_I32 : VOP3Inst <vop3<0x158, 0x1d7>, "v_med3_i32",
-  VOP_I32_I32_I32_I32, AMDGPUsmed3
->;
-defm V_MED3_U32 : VOP3Inst <vop3<0x159, 0x1d8>, "v_med3_u32",
-  VOP_I32_I32_I32_I32, AMDGPUumed3
->;
-
-//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
-//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
-//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
-defm V_SAD_U32 : VOP3Inst <vop3<0x15d, 0x1dc>, "v_sad_u32",
-  VOP_I32_I32_I32_I32
->;
-//def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "v_cvt_pk_u8_f32", []>;
-defm V_DIV_FIXUP_F32 : VOP3Inst <
-  vop3<0x15f, 0x1de>, "v_div_fixup_f32", VOP_F32_F32_F32_F32, AMDGPUdiv_fixup
->;
-
-let SchedRW = [WriteDoubleAdd] in {
-
-defm V_DIV_FIXUP_F64 : VOP3Inst <
-  vop3<0x160, 0x1df>, "v_div_fixup_f64", VOP_F64_F64_F64_F64, AMDGPUdiv_fixup
->;
-
-} // End SchedRW = [WriteDouble]
-
-let SchedRW = [WriteDoubleAdd] in {
-let isCommutable = 1 in {
-
-defm V_ADD_F64 : VOP3Inst <vop3<0x164, 0x280>, "v_add_f64",
-  VOP_F64_F64_F64, fadd, 1
->;
-defm V_MUL_F64 : VOP3Inst <vop3<0x165, 0x281>, "v_mul_f64",
-  VOP_F64_F64_F64, fmul, 1
->;
-
-defm V_MIN_F64 : VOP3Inst <vop3<0x166, 0x282>, "v_min_f64",
-  VOP_F64_F64_F64, fminnum, 1
->;
-defm V_MAX_F64 : VOP3Inst <vop3<0x167, 0x283>, "v_max_f64",
-  VOP_F64_F64_F64, fmaxnum, 1
->;
-
-} // End isCommutable = 1
-
-defm V_LDEXP_F64 : VOP3Inst <vop3<0x168, 0x284>, "v_ldexp_f64",
-  VOP_F64_F64_I32, AMDGPUldexp, 1
->;
-
-} // End let SchedRW = [WriteDoubleAdd]
-
-let isCommutable = 1, SchedRW = [WriteQuarterRate32] in {
-
-defm V_MUL_LO_U32 : VOP3Inst <vop3<0x169, 0x285>, "v_mul_lo_u32",
-  VOP_I32_I32_I32
->;
-defm V_MUL_HI_U32 : VOP3Inst <vop3<0x16a, 0x286>, "v_mul_hi_u32",
-  VOP_I32_I32_I32, mulhu
->;
-
-let DisableVIDecoder=1 in { // removed from VI as identical to V_MUL_LO_U32
-defm V_MUL_LO_I32 : VOP3Inst <vop3<0x16b, 0x285>, "v_mul_lo_i32",
-  VOP_I32_I32_I32
->;
-}
-
-defm V_MUL_HI_I32 : VOP3Inst <vop3<0x16c, 0x287>, "v_mul_hi_i32",
-  VOP_I32_I32_I32, mulhs
->;
-
-} // End isCommutable = 1, SchedRW = [WriteQuarterRate32]
-
-let SchedRW = [WriteFloatFMA, WriteSALU] in {
-defm V_DIV_SCALE_F32 : VOP3bInst <vop3<0x16d, 0x1e0>, "v_div_scale_f32",
-  VOP3b_F32_I1_F32_F32_F32, [], 1
->;
-}
-
-let SchedRW = [WriteDouble, WriteSALU] in {
-// Double precision division pre-scale.
-defm V_DIV_SCALE_F64 : VOP3bInst <vop3<0x16e, 0x1e1>, "v_div_scale_f64",
-  VOP3b_F64_I1_F64_F64_F64, [], 1
->;
-} // End SchedRW = [WriteDouble]
-
-let isCommutable = 1, Uses = [VCC, EXEC] in {
-
-let SchedRW = [WriteFloatFMA] in {
-// v_div_fmas_f32:
-//   result = src0 * src1 + src2
-//   if (vcc)
-//     result *= 2^32
-//
-defm V_DIV_FMAS_F32 : VOP3_VCC_Inst <vop3<0x16f, 0x1e2>, "v_div_fmas_f32",
-  VOP_F32_F32_F32_F32, AMDGPUdiv_fmas
->;
-}
-
-let SchedRW = [WriteDouble] in {
-// v_div_fmas_f64:
-//   result = src0 * src1 + src2
-//   if (vcc)
-//     result *= 2^64
-//
-defm V_DIV_FMAS_F64 : VOP3_VCC_Inst <vop3<0x170, 0x1e3>, "v_div_fmas_f64",
-  VOP_F64_F64_F64_F64, AMDGPUdiv_fmas
->;
-
-} // End SchedRW = [WriteDouble]
-} // End isCommutable = 1, Uses = [VCC, EXEC]
-
-//def V_MSAD_U8 : VOP3_U8 <0x00000171, "v_msad_u8", []>;
-//def V_QSAD_U8 : VOP3_U8 <0x00000172, "v_qsad_u8", []>;
-//def V_MQSAD_U8 : VOP3_U8 <0x00000173, "v_mqsad_u8", []>;
-
-let SchedRW = [WriteDouble] in {
-defm V_TRIG_PREOP_F64 : VOP3Inst <
-  vop3<0x174, 0x292>, "v_trig_preop_f64", VOP_F64_F64_I32, AMDGPUtrig_preop
->;
-
-} // End SchedRW = [WriteDouble]
-
-// These instructions only exist on SI and CI
-let SubtargetPredicate = isSICI in {
-
-defm V_LSHL_B64 : VOP3Inst <vop3<0x161>, "v_lshl_b64", VOP_I64_I64_I32>;
-defm V_LSHR_B64 : VOP3Inst <vop3<0x162>, "v_lshr_b64", VOP_I64_I64_I32>;
-defm V_ASHR_I64 : VOP3Inst <vop3<0x163>, "v_ashr_i64", VOP_I64_I64_I32>;
-
-defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
-  VOP_F32_F32_F32_F32>;
-
-} // End SubtargetPredicate = isSICI
-
-let SubtargetPredicate = isVI, DisableSIDecoder = 1 in {
-
-defm V_LSHLREV_B64 : VOP3Inst <vop3<0, 0x28f>, "v_lshlrev_b64",
-  VOP_I64_I32_I64
->;
-defm V_LSHRREV_B64 : VOP3Inst <vop3<0, 0x290>, "v_lshrrev_b64",
-  VOP_I64_I32_I64
->;
-defm V_ASHRREV_I64 : VOP3Inst <vop3<0, 0x291>, "v_ashrrev_i64",
-  VOP_I64_I32_I64
->;
-
-} // End SubtargetPredicate = isVI
+} // End Uses = [M0, EXEC]
 
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
@@ -1908,16 +99,16 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC] in {
 
 // For use in patterns
 def V_CNDMASK_B64_PSEUDO : VOP3Common <(outs VReg_64:$vdst),
-  (ins VSrc_64:$src0, VSrc_64:$src1, SSrc_64:$src2), "", []> {
+  (ins VSrc_b64:$src0, VSrc_b64:$src1, SSrc_b64:$src2), "", []> {
   let isPseudo = 1;
   let isCodeGenOnly = 1;
+  let usesCustomInserter = 1;
 }
 
 // 64-bit vector move instruction.  This is mainly used by the SIFoldOperands
 // pass to enable folding of inline immediates.
-def V_MOV_B64_PSEUDO : PseudoInstSI <(outs VReg_64:$vdst), (ins VSrc_64:$src0)> {
-  let VALU = 1;
-}
+def V_MOV_B64_PSEUDO : VPseudoInstSI <(outs VReg_64:$vdst),
+                                      (ins VSrc_b64:$src0)>;
 } // End let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [EXEC]
 
 let usesCustomInserter = 1, SALU = 1 in {
@@ -1925,83 +116,142 @@ def GET_GROUPSTATICSIZE : PseudoInstSI <(outs SReg_32:$sdst), (ins),
   [(set SReg_32:$sdst, (int_amdgcn_groupstaticsize))]>;
 } // End let usesCustomInserter = 1, SALU = 1
 
+def S_MOV_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+   (ins SSrc_b64:$src0)> {
+  let SALU = 1;
+  let isAsCheapAsAMove = 1;
+  let isTerminator = 1;
+}
+
+def S_XOR_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+   (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
+  let SALU = 1;
+  let isAsCheapAsAMove = 1;
+  let isTerminator = 1;
+}
+
+def S_ANDN2_B64_term : PseudoInstSI<(outs SReg_64:$dst),
+   (ins SSrc_b64:$src0, SSrc_b64:$src1)> {
+  let SALU = 1;
+  let isAsCheapAsAMove = 1;
+  let isTerminator = 1;
+}
+
+def WAVE_BARRIER : SPseudoInstSI<(outs), (ins),
+  [(int_amdgcn_wave_barrier)]> {
+  let SchedRW = [];
+  let hasNoSchedulingInfo = 1;
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let isBarrier = 1;
+  let isConvergent = 1;
+}
+
 // SI pseudo instructions. These are used by the CFG structurizer pass
 // and should be lowered to ISA instructions prior to codegen.
 
-let hasSideEffects = 1 in {
-
 // Dummy terminator instruction to use after control flow instructions
 // replaced with exec mask operations.
 def SI_MASK_BRANCH : PseudoInstSI <
-  (outs), (ins brtarget:$target, SReg_64:$dst)> {
-  let isBranch = 1;
+  (outs), (ins brtarget:$target)> {
+  let isBranch = 0;
   let isTerminator = 1;
-  let isBarrier = 1;
-  let SALU = 1;
+  let isBarrier = 0;
+  let Uses = [EXEC];
+  let SchedRW = [];
+  let hasNoSchedulingInfo = 1;
 }
 
-let Uses = [EXEC], Defs = [EXEC, SCC] in {
-
-let isBranch = 1, isTerminator = 1 in {
+let isTerminator = 1 in {
 
-def SI_IF: PseudoInstSI <
+def SI_IF: CFPseudoInstSI <
   (outs SReg_64:$dst), (ins SReg_64:$vcc, brtarget:$target),
-  [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))]> {
+  [(set i64:$dst, (int_amdgcn_if i1:$vcc, bb:$target))], 1, 1> {
   let Constraints = "";
+  let Size = 12;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 1;
 }
 
-def SI_ELSE : PseudoInstSI <
-  (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target),
-  [(set i64:$dst, (int_amdgcn_else i64:$src, bb:$target))]> {
+def SI_ELSE : CFPseudoInstSI <
+  (outs SReg_64:$dst), (ins SReg_64:$src, brtarget:$target, i1imm:$execfix), [], 1, 1> {
   let Constraints = "$src = $dst";
+  let Size = 12;
+  let mayStore = 1;
+  let mayLoad = 1;
+  let hasSideEffects = 1;
 }
 
-def SI_LOOP : PseudoInstSI <
+def SI_LOOP : CFPseudoInstSI <
   (outs), (ins SReg_64:$saved, brtarget:$target),
-  [(int_amdgcn_loop i64:$saved, bb:$target)]
->;
+  [(int_amdgcn_loop i64:$saved, bb:$target)], 1, 1> {
+  let Size = 8;
+  let isBranch = 1;
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
 
 } // End isBranch = 1, isTerminator = 1
 
+def SI_END_CF : CFPseudoInstSI <
+  (outs), (ins SReg_64:$saved),
+  [(int_amdgcn_end_cf i64:$saved)], 1, 1> {
+  let Size = 4;
+  let isAsCheapAsAMove = 1;
+  let isReMaterializable = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let hasSideEffects = 1;
+}
 
-def SI_BREAK : PseudoInstSI <
+def SI_BREAK : CFPseudoInstSI <
   (outs SReg_64:$dst), (ins SReg_64:$src),
-  [(set i64:$dst, (int_amdgcn_break i64:$src))]
->;
+  [(set i64:$dst, (int_amdgcn_break i64:$src))], 1> {
+  let Size = 4;
+  let isAsCheapAsAMove = 1;
+  let isReMaterializable = 1;
+}
 
-def SI_IF_BREAK : PseudoInstSI <
+def SI_IF_BREAK : CFPseudoInstSI <
   (outs SReg_64:$dst), (ins SReg_64:$vcc, SReg_64:$src),
-  [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]
->;
+  [(set i64:$dst, (int_amdgcn_if_break i1:$vcc, i64:$src))]> {
+  let Size = 4;
+  let isAsCheapAsAMove = 1;
+  let isReMaterializable = 1;
+}
 
-def SI_ELSE_BREAK : PseudoInstSI <
+def SI_ELSE_BREAK : CFPseudoInstSI <
   (outs SReg_64:$dst), (ins SReg_64:$src0, SReg_64:$src1),
-  [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]
->;
-
-def SI_END_CF : PseudoInstSI <
-  (outs), (ins SReg_64:$saved),
-  [(int_amdgcn_end_cf i64:$saved)]
->;
-
-} // End Uses = [EXEC], Defs = [EXEC, SCC]
+  [(set i64:$dst, (int_amdgcn_else_break i64:$src0, i64:$src1))]> {
+  let Size = 4;
+  let isAsCheapAsAMove = 1;
+  let isReMaterializable = 1;
+}
 
 let Uses = [EXEC], Defs = [EXEC,VCC] in {
 def SI_KILL : PseudoInstSI <
-  (outs), (ins VSrc_32:$src),
-  [(int_AMDGPU_kill f32:$src)]> {
+  (outs), (ins VSrc_b32:$src),
+  [(AMDGPUkill i32:$src)]> {
   let isConvergent = 1;
   let usesCustomInserter = 1;
 }
 
-def SI_KILL_TERMINATOR : PseudoInstSI <
-  (outs), (ins VSrc_32:$src)> {
+def SI_KILL_TERMINATOR : SPseudoInstSI <
+  (outs), (ins VSrc_b32:$src)> {
   let isTerminator = 1;
 }
 
 } // End Uses = [EXEC], Defs = [EXEC,VCC]
 
-} // End mayLoad = 1, mayStore = 1, hasSideEffects = 1
+// Branch on undef scc. Used to avoid intermediate copy from
+// IMPLICIT_DEF to SCC.
+def SI_BR_UNDEF : SPseudoInstSI <(outs), (ins sopp_brtarget:$simm16)> {
+  let isTerminator = 1;
+  let usesCustomInserter = 1;
+}
 
 def SI_PS_LIVE : PseudoInstSI <
   (outs SReg_64:$dst), (ins),
@@ -2013,36 +263,37 @@ def SI_PS_LIVE : PseudoInstSI <
 // s_mov_b32 rather than a copy of another initialized
 // register. MachineCSE skips copies, and we don't want to have to
 // fold operands before it runs.
-def SI_INIT_M0 : PseudoInstSI <(outs), (ins SSrc_32:$src)> {
+def SI_INIT_M0 : SPseudoInstSI <(outs), (ins SSrc_b32:$src)> {
   let Defs = [M0];
   let usesCustomInserter = 1;
   let isAsCheapAsAMove = 1;
-  let SALU = 1;
   let isReMaterializable = 1;
 }
 
-def SI_RETURN : PseudoInstSI <
+def SI_RETURN : SPseudoInstSI <
   (outs), (ins variable_ops), [(AMDGPUreturn)]> {
   let isTerminator = 1;
   let isBarrier = 1;
   let isReturn = 1;
   let hasSideEffects = 1;
-  let SALU = 1;
   let hasNoSchedulingInfo = 1;
   let DisableWQM = 1;
 }
 
-let Uses = [EXEC], Defs = [EXEC, VCC, M0],
+let Defs = [M0, EXEC],
   UseNamedOperandTable = 1 in {
 
-class SI_INDIRECT_SRC<RegisterClass rc> : PseudoInstSI <
-  (outs VGPR_32:$vdst, SReg_64:$sdst),
-  (ins rc:$src, VS_32:$idx, i32imm:$offset)>;
+class SI_INDIRECT_SRC<RegisterClass rc> : VPseudoInstSI <
+  (outs VGPR_32:$vdst),
+  (ins rc:$src, VS_32:$idx, i32imm:$offset)> {
+  let usesCustomInserter = 1;
+}
 
-class SI_INDIRECT_DST<RegisterClass rc> : PseudoInstSI <
-  (outs rc:$vdst, SReg_64:$sdst),
-  (ins unknown:$src, VS_32:$idx, i32imm:$offset, VGPR_32:$val)> {
+class SI_INDIRECT_DST<RegisterClass rc> : VPseudoInstSI <
+  (outs rc:$vdst),
+  (ins rc:$src, VS_32:$idx, i32imm:$offset, VGPR_32:$val)> {
   let Constraints = "$src = $vdst";
+  let usesCustomInserter = 1;
 }
 
 // TODO: We can support indirect SGPR access.
@@ -2058,53 +309,60 @@ def SI_INDIRECT_DST_V4 : SI_INDIRECT_DST<VReg_128>;
 def SI_INDIRECT_DST_V8 : SI_INDIRECT_DST<VReg_256>;
 def SI_INDIRECT_DST_V16 : SI_INDIRECT_DST<VReg_512>;
 
-} // End Uses = [EXEC], Defs = [EXEC,VCC,M0]
+} // End Uses = [EXEC], Defs = [M0, EXEC]
 
 multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> {
-  let UseNamedOperandTable = 1, Uses = [EXEC] in {
+  let UseNamedOperandTable = 1, SGPRSpill = 1, Uses = [EXEC] in {
     def _SAVE : PseudoInstSI <
       (outs),
-      (ins sgpr_class:$src, i32imm:$frame_idx)> {
+      (ins sgpr_class:$data, i32imm:$addr)> {
       let mayStore = 1;
       let mayLoad = 0;
     }
 
     def _RESTORE : PseudoInstSI <
-      (outs sgpr_class:$dst),
-      (ins i32imm:$frame_idx)> {
+      (outs sgpr_class:$data),
+      (ins i32imm:$addr)> {
       let mayStore = 0;
       let mayLoad = 1;
     }
   } // End UseNamedOperandTable = 1
 }
 
-// It's unclear whether you can use M0 as the output of v_readlane_b32
-// instructions, so use SReg_32_XM0 register class for spills to prevent
-// this from happening.
-defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32_XM0>;
+// You cannot use M0 as the output of v_readlane_b32 instructions or
+// use it in the sdata operand of SMEM instructions. We still need to
+// be able to spill the physical register m0, so allow it for
+// SI_SPILL_32_* instructions.
+defm SI_SPILL_S32  : SI_SPILL_SGPR <SReg_32>;
 defm SI_SPILL_S64  : SI_SPILL_SGPR <SReg_64>;
 defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
 defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
 defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
 
 multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
-  let UseNamedOperandTable = 1, VGPRSpill = 1, Uses = [EXEC] in {
-    def _SAVE : PseudoInstSI <
+  let UseNamedOperandTable = 1, VGPRSpill = 1,
+       SchedRW = [WriteVMEM] in {
+    def _SAVE : VPseudoInstSI <
       (outs),
-      (ins vgpr_class:$src, i32imm:$frame_idx, SReg_128:$scratch_rsrc,
-           SReg_32:$scratch_offset, i32imm:$offset)> {
+      (ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
+           SReg_32:$soffset, i32imm:$offset)> {
       let mayStore = 1;
       let mayLoad = 0;
+      // (2 * 4) + (8 * num_subregs) bytes maximum
+      let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
     }
 
-    def _RESTORE : PseudoInstSI <
-      (outs vgpr_class:$dst),
-      (ins i32imm:$frame_idx, SReg_128:$scratch_rsrc, SReg_32:$scratch_offset,
+    def _RESTORE : VPseudoInstSI <
+      (outs vgpr_class:$vdata),
+      (ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
            i32imm:$offset)> {
       let mayStore = 0;
       let mayLoad = 1;
+
+      // (2 * 4) + (8 * num_subregs) bytes maximum
+      let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
     }
-  } // End UseNamedOperandTable = 1, VGPRSpill = 1
+  } // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
 }
 
 defm SI_SPILL_V32  : SI_SPILL_VGPR <VGPR_32>;
@@ -2114,344 +372,26 @@ defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
 defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
 defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
 
-let Defs = [SCC] in {
-
-def SI_PC_ADD_REL_OFFSET : PseudoInstSI <
+def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
   (outs SReg_64:$dst),
-  (ins si_ga:$ptr),
-  [(set SReg_64:$dst, (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr))))]> {
-  let SALU = 1;
+  (ins si_ga:$ptr_lo, si_ga:$ptr_hi),
+  [(set SReg_64:$dst,
+   (i64 (SIpc_add_rel_offset (tglobaladdr:$ptr_lo), (tglobaladdr:$ptr_hi))))]> {
+  let Defs = [SCC];
 }
 
-} // End Defs = [SCC]
-
 } // End SubtargetPredicate = isGCN
 
 let Predicates = [isGCN] in {
 
-def : Pat <
-  (int_AMDGPU_kilp),
-  (SI_KILL 0xbf800000)
->;
-
-/* int_SI_vs_load_input */
 def : Pat<
-  (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr),
-  (BUFFER_LOAD_FORMAT_XYZW_IDXEN $buf_idx_vgpr, $tlst, 0, imm:$attr_offset, 0, 0, 0)
+  (int_amdgcn_else i64:$src, bb:$target),
+  (SI_ELSE $src, $target, 0)
 >;
 
 def : Pat <
-  (int_SI_export imm:$en, imm:$vm, imm:$done, imm:$tgt, imm:$compr,
-                 f32:$src0, f32:$src1, f32:$src2, f32:$src3),
-  (EXP imm:$en, imm:$tgt, imm:$compr, imm:$done, imm:$vm,
-       $src0, $src1, $src2, $src3)
->;
-
-//===----------------------------------------------------------------------===//
-// buffer_load/store_format patterns
-//===----------------------------------------------------------------------===//
-
-multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
-                                  string opcode> {
-  def : Pat<
-    (vt (name v4i32:$rsrc, 0,
-              (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-              imm:$glc, imm:$slc)),
-    (!cast<MUBUF>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
-  >;
-
-  def : Pat<
-    (vt (name v4i32:$rsrc, i32:$vindex,
-              (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-              imm:$glc, imm:$slc)),
-    (!cast<MUBUF>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
-  >;
-
-  def : Pat<
-    (vt (name v4i32:$rsrc, 0,
-              (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-              imm:$glc, imm:$slc)),
-    (!cast<MUBUF>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
-  >;
-
-  def : Pat<
-    (vt (name v4i32:$rsrc, i32:$vindex,
-              (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-              imm:$glc, imm:$slc)),
-    (!cast<MUBUF>(opcode # _BOTHEN)
-      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
-  >;
-}
-
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, f32, "BUFFER_LOAD_FORMAT_X">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, v2f32, "BUFFER_LOAD_FORMAT_XY">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load_format, v4f32, "BUFFER_LOAD_FORMAT_XYZW">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, f32, "BUFFER_LOAD_DWORD">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, v2f32, "BUFFER_LOAD_DWORDX2">;
-defm : MUBUF_LoadIntrinsicPat<int_amdgcn_buffer_load, v4f32, "BUFFER_LOAD_DWORDX4">;
-
-multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
-                                   string opcode> {
-  def : Pat<
-    (name vt:$vdata, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$glc, imm:$slc),
-    (!cast<MUBUF>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
-                                    (as_i1imm $glc), (as_i1imm $slc), 0)
-  >;
-
-  def : Pat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$glc, imm:$slc),
-    (!cast<MUBUF>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
-                                   (as_i16imm $offset), (as_i1imm $glc),
-                                   (as_i1imm $slc), 0)
-  >;
-
-  def : Pat<
-    (name vt:$vdata, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$glc, imm:$slc),
-    (!cast<MUBUF>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
-                                   (as_i16imm $offset), (as_i1imm $glc),
-                                   (as_i1imm $slc), 0)
-  >;
-
-  def : Pat<
-    (name vt:$vdata, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$glc, imm:$slc),
-    (!cast<MUBUF>(opcode # _BOTHEN_exact)
-      $vdata,
-      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset),
-      (as_i1imm $glc), (as_i1imm $slc), 0)
-  >;
-}
-
-defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, f32, "BUFFER_STORE_FORMAT_X">;
-defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v2f32, "BUFFER_STORE_FORMAT_XY">;
-defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store_format, v4f32, "BUFFER_STORE_FORMAT_XYZW">;
-defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, f32, "BUFFER_STORE_DWORD">;
-defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v2f32, "BUFFER_STORE_DWORDX2">;
-defm : MUBUF_StoreIntrinsicPat<int_amdgcn_buffer_store, v4f32, "BUFFER_STORE_DWORDX4">;
-
-//===----------------------------------------------------------------------===//
-// buffer_atomic patterns
-//===----------------------------------------------------------------------===//
-multiclass BufferAtomicPatterns<SDPatternOperator name, string opcode> {
-  def : Pat<
-    (name i32:$vdata_in, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$slc),
-    (!cast<MUBUF>(opcode # _RTN_OFFSET) $vdata_in, $rsrc, $soffset,
-                                        (as_i16imm $offset), (as_i1imm $slc))
-  >;
-
-  def : Pat<
-    (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-          imm:$slc),
-    (!cast<MUBUF>(opcode # _RTN_IDXEN) $vdata_in, $vindex, $rsrc, $soffset,
-                                       (as_i16imm $offset), (as_i1imm $slc))
-  >;
-
-  def : Pat<
-    (name i32:$vdata_in, v4i32:$rsrc, 0,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$slc),
-    (!cast<MUBUF>(opcode # _RTN_OFFEN) $vdata_in, $voffset, $rsrc, $soffset,
-                                       (as_i16imm $offset), (as_i1imm $slc))
-  >;
-
-  def : Pat<
-    (name i32:$vdata_in, v4i32:$rsrc, i32:$vindex,
-          (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-          imm:$slc),
-    (!cast<MUBUF>(opcode # _RTN_BOTHEN)
-      $vdata_in,
-      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc))
-  >;
-}
-
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_swap, "BUFFER_ATOMIC_SWAP">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_add, "BUFFER_ATOMIC_ADD">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_sub, "BUFFER_ATOMIC_SUB">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smin, "BUFFER_ATOMIC_SMIN">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umin, "BUFFER_ATOMIC_UMIN">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_smax, "BUFFER_ATOMIC_SMAX">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_umax, "BUFFER_ATOMIC_UMAX">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_and, "BUFFER_ATOMIC_AND">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_or, "BUFFER_ATOMIC_OR">;
-defm : BufferAtomicPatterns<int_amdgcn_buffer_atomic_xor, "BUFFER_ATOMIC_XOR">;
-
-def : Pat<
-  (int_amdgcn_buffer_atomic_cmpswap
-      i32:$data, i32:$cmp, v4i32:$rsrc, 0,
-      (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-      imm:$slc),
-  (EXTRACT_SUBREG
-    (BUFFER_ATOMIC_CMPSWAP_RTN_OFFSET
-      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
-    sub0)
->;
-
-def : Pat<
-  (int_amdgcn_buffer_atomic_cmpswap
-      i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
-      (MUBUFIntrinsicOffset i32:$soffset, i16:$offset),
-      imm:$slc),
-  (EXTRACT_SUBREG
-    (BUFFER_ATOMIC_CMPSWAP_RTN_IDXEN
-      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      $vindex, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
-    sub0)
->;
-
-def : Pat<
-  (int_amdgcn_buffer_atomic_cmpswap
-      i32:$data, i32:$cmp, v4i32:$rsrc, 0,
-      (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-      imm:$slc),
-  (EXTRACT_SUBREG
-    (BUFFER_ATOMIC_CMPSWAP_RTN_OFFEN
-      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      $voffset, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
-    sub0)
->;
-
-def : Pat<
-  (int_amdgcn_buffer_atomic_cmpswap
-      i32:$data, i32:$cmp, v4i32:$rsrc, i32:$vindex,
-      (MUBUFIntrinsicVOffset i32:$soffset, i16:$offset, i32:$voffset),
-      imm:$slc),
-  (EXTRACT_SUBREG
-    (BUFFER_ATOMIC_CMPSWAP_RTN_BOTHEN
-      (REG_SEQUENCE VReg_64, $data, sub0, $cmp, sub1),
-      (REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
-      $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $slc)),
-    sub0)
->;
-
-
-//===----------------------------------------------------------------------===//
-// S_GETREG_B32 Intrinsic Pattern.
-//===----------------------------------------------------------------------===//
-def : Pat <
-  (int_amdgcn_s_getreg imm:$simm16),
-  (S_GETREG_B32 (as_i16imm $simm16))
->;
-
-//===----------------------------------------------------------------------===//
-// DS_SWIZZLE Intrinsic Pattern.
-//===----------------------------------------------------------------------===//
-def : Pat <
-  (int_amdgcn_ds_swizzle i32:$src, imm:$offset16),
-  (DS_SWIZZLE_B32 $src, (as_i16imm $offset16), (i1 0))
->;
-
-//===----------------------------------------------------------------------===//
-// SMRD Patterns
-//===----------------------------------------------------------------------===//
-
-multiclass SMRD_Pattern <string Instr, ValueType vt> {
-
-  // 1. IMM offset
-  def : Pat <
-    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
-    (vt (!cast<SMRD>(Instr#"_IMM") $sbase, $offset))
-  >;
-
-  // 2. SGPR offset
-  def : Pat <
-    (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
-    (vt (!cast<SMRD>(Instr#"_SGPR") $sbase, $offset))
-  >;
-
-  def : Pat <
-    (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
-    (vt (!cast<SMRD>(Instr#"_IMM_ci") $sbase, $offset))
-  > {
-    let Predicates = [isCIOnly];
-  }
-}
-
-// Global and constant loads can be selected to either MUBUF or SMRD
-// instructions, but SMRD instructions are faster so we want the instruction
-// selector to prefer those.
-let AddedComplexity = 100 in {
-
-defm : SMRD_Pattern <"S_LOAD_DWORD", i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX2", v2i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX4", v4i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX8", v8i32>;
-defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
-
-// 1. Offset as an immediate
-def : Pat <
-  (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
-  (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset)
->;
-
-// 2. Offset loaded in an 32bit SGPR
-def : Pat <
-  (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
-  (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset)
->;
-
-let Predicates = [isCI] in {
-
-def : Pat <
-  (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
-  (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset)
->;
-
-} // End Predicates = [isCI]
-
-} // End let AddedComplexity = 10000
-
-//===----------------------------------------------------------------------===//
-// SOP1 Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
-  (i64 (ctpop i64:$src)),
-    (i64 (REG_SEQUENCE SReg_64,
-     (i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,
-     (S_MOV_B32 0), sub1))
->;
-
-def : Pat <
-  (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
-  (S_ABS_I32 $x)
->;
-
-//===----------------------------------------------------------------------===//
-// SOP2 Patterns
-//===----------------------------------------------------------------------===//
-
-// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
-// case, the sgpr-copies pass will fix this to use the vector version.
-def : Pat <
-  (i32 (addc i32:$src0, i32:$src1)),
-  (S_ADD_U32 $src0, $src1)
->;
-
-//===----------------------------------------------------------------------===//
-// SOPP Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
-  (int_amdgcn_s_waitcnt i32:$simm16),
-  (S_WAITCNT (as_i16imm $simm16))
+  (int_AMDGPU_kilp),
+  (SI_KILL (i32 0xbf800000))
 >;
 
 //===----------------------------------------------------------------------===//
@@ -2483,308 +423,79 @@ def : Pat <
 
 } // End Predicates = [UnsafeFPMath]
 
-//===----------------------------------------------------------------------===//
-// VOP2 Patterns
-//===----------------------------------------------------------------------===//
-
 def : Pat <
-  (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
-  (V_BCNT_U32_B32_e64 $popcnt, $val)
+  (f32 (fpextend f16:$src)),
+  (V_CVT_F32_F16_e32 $src)
 >;
 
 def : Pat <
-  (i32 (select i1:$src0, i32:$src1, i32:$src2)),
-  (V_CNDMASK_B32_e64 $src2, $src1, $src0)
+  (f64 (fpextend f16:$src)),
+  (V_CVT_F64_F32_e32 (V_CVT_F32_F16_e32 $src))
 >;
 
-// Pattern for V_MAC_F32
 def : Pat <
-  (fmad  (VOP3NoMods0 f32:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
-         (VOP3NoMods f32:$src1, i32:$src1_modifiers),
-         (VOP3NoMods f32:$src2, i32:$src2_modifiers)),
-  (V_MAC_F32_e64 $src0_modifiers, $src0, $src1_modifiers, $src1,
-                 $src2_modifiers, $src2, $clamp, $omod)
->;
-
-/********** ======================= **********/
-/********** Image sampling patterns **********/
-/********** ======================= **********/
-
-// Image + sampler
-class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
-  (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, i32:$dmask, i32:$unorm,
-        i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe),
-  (opcode $addr, $rsrc, $sampler,
-          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
-          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da))
->;
-
-multiclass SampleRawPatterns<SDPatternOperator name, string opcode> {
-  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
-  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
-  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
-  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V8), v8i32>;
-  def : SampleRawPattern<name, !cast<MIMG>(opcode # _V4_V16), v16i32>;
-}
-
-// Image only
-class ImagePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
-  (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$unorm,
-        imm:$r128, imm:$da, imm:$glc, imm:$slc, imm:$tfe, imm:$lwe),
-  (opcode $addr, $rsrc,
-          (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $slc),
-          (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $da))
+  (f16 (fpround f32:$src)),
+  (V_CVT_F16_F32_e32 $src)
 >;
 
-multiclass ImagePatterns<SDPatternOperator name, string opcode> {
-  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
-  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
-  def : ImagePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
-}
-
-class ImageLoadPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
-  (name vt:$addr, v8i32:$rsrc, imm:$dmask, imm:$r128, imm:$da, imm:$glc,
-        imm:$slc),
-  (opcode $addr, $rsrc,
-          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
-          (as_i1imm $r128), 0, 0, (as_i1imm $da))
->;
-
-multiclass ImageLoadPatterns<SDPatternOperator name, string opcode> {
-  def : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
-  def : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
-  def : ImageLoadPattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
-}
-
-class ImageStorePattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
-  (name v4f32:$data, vt:$addr, v8i32:$rsrc, i32:$dmask, imm:$r128, imm:$da,
-        imm:$glc, imm:$slc),
-  (opcode $data, $addr, $rsrc,
-          (as_i32imm $dmask), 1, (as_i1imm $glc), (as_i1imm $slc),
-          (as_i1imm $r128), 0, 0, (as_i1imm $da))
->;
-
-multiclass ImageStorePatterns<SDPatternOperator name, string opcode> {
-  def : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V1), i32>;
-  def : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V2), v2i32>;
-  def : ImageStorePattern<name, !cast<MIMG>(opcode # _V4_V4), v4i32>;
-}
-
-class ImageAtomicPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat <
-  (name i32:$vdata, vt:$addr, v8i32:$rsrc, imm:$r128, imm:$da, imm:$slc),
-  (opcode $vdata, $addr, $rsrc, 1, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da))
->;
-
-multiclass ImageAtomicPatterns<SDPatternOperator name, string opcode> {
-  def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V1), i32>;
-  def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V2), v2i32>;
-  def : ImageAtomicPattern<name, !cast<MIMG>(opcode # _V4), v4i32>;
-}
-
-class ImageAtomicCmpSwapPattern<MIMG opcode, ValueType vt> : Pat <
-  (int_amdgcn_image_atomic_cmpswap i32:$vsrc, i32:$vcmp, vt:$addr, v8i32:$rsrc,
-                                   imm:$r128, imm:$da, imm:$slc),
-  (EXTRACT_SUBREG
-    (opcode (REG_SEQUENCE VReg_64, $vsrc, sub0, $vcmp, sub1),
-            $addr, $rsrc, 3, 1, 1, (as_i1imm $slc), (as_i1imm $r128), 0, 0, (as_i1imm $da)),
-    sub0)
->;
-
-// Basic sample
-defm : SampleRawPatterns<int_SI_image_sample,           "IMAGE_SAMPLE">;
-defm : SampleRawPatterns<int_SI_image_sample_cl,        "IMAGE_SAMPLE_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_d,         "IMAGE_SAMPLE_D">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl,      "IMAGE_SAMPLE_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_l,         "IMAGE_SAMPLE_L">;
-defm : SampleRawPatterns<int_SI_image_sample_b,         "IMAGE_SAMPLE_B">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl,      "IMAGE_SAMPLE_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_lz,        "IMAGE_SAMPLE_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_cd,        "IMAGE_SAMPLE_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl,     "IMAGE_SAMPLE_CD_CL">;
-
-// Sample with comparison
-defm : SampleRawPatterns<int_SI_image_sample_c,         "IMAGE_SAMPLE_C">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl,      "IMAGE_SAMPLE_C_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d,       "IMAGE_SAMPLE_C_D">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl,    "IMAGE_SAMPLE_C_D_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l,       "IMAGE_SAMPLE_C_L">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b,       "IMAGE_SAMPLE_C_B">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl,    "IMAGE_SAMPLE_C_B_CL">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz,      "IMAGE_SAMPLE_C_LZ">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd,      "IMAGE_SAMPLE_C_CD">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl,   "IMAGE_SAMPLE_C_CD_CL">;
-
-// Sample with offsets
-defm : SampleRawPatterns<int_SI_image_sample_o,         "IMAGE_SAMPLE_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cl_o,      "IMAGE_SAMPLE_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_o,       "IMAGE_SAMPLE_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_d_cl_o,    "IMAGE_SAMPLE_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_l_o,       "IMAGE_SAMPLE_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_o,       "IMAGE_SAMPLE_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_b_cl_o,    "IMAGE_SAMPLE_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_lz_o,      "IMAGE_SAMPLE_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_o,      "IMAGE_SAMPLE_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_cd_cl_o,   "IMAGE_SAMPLE_CD_CL_O">;
-
-// Sample with comparison and offsets
-defm : SampleRawPatterns<int_SI_image_sample_c_o,       "IMAGE_SAMPLE_C_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cl_o,    "IMAGE_SAMPLE_C_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_o,     "IMAGE_SAMPLE_C_D_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_d_cl_o,  "IMAGE_SAMPLE_C_D_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_l_o,     "IMAGE_SAMPLE_C_L_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_o,     "IMAGE_SAMPLE_C_B_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_b_cl_o,  "IMAGE_SAMPLE_C_B_CL_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_lz_o,    "IMAGE_SAMPLE_C_LZ_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_o,    "IMAGE_SAMPLE_C_CD_O">;
-defm : SampleRawPatterns<int_SI_image_sample_c_cd_cl_o, "IMAGE_SAMPLE_C_CD_CL_O">;
-
-// Gather opcodes
-// Only the variants which make sense are defined.
-def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V2,        v2i32>;
-def : SampleRawPattern<int_SI_gather4,           IMAGE_GATHER4_V4_V4,        v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl,        IMAGE_GATHER4_CL_V4_V4,     v4i32>;
-def : SampleRawPattern<int_SI_gather4_l,         IMAGE_GATHER4_L_V4_V4,      v4i32>;
-def : SampleRawPattern<int_SI_gather4_b,         IMAGE_GATHER4_B_V4_V4,      v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V4,   v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl,      IMAGE_GATHER4_B_CL_V4_V8,   v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V2,     v2i32>;
-def : SampleRawPattern<int_SI_gather4_lz,        IMAGE_GATHER4_LZ_V4_V4,     v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c,         IMAGE_GATHER4_C_V4_V4,      v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V4,   v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl,      IMAGE_GATHER4_C_CL_V4_V8,   v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V4,    v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_l,       IMAGE_GATHER4_C_L_V4_V8,    v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V4,    v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_b,       IMAGE_GATHER4_C_B_V4_V8,    v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl,    IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz,      IMAGE_GATHER4_C_LZ_V4_V4,   v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_o,         IMAGE_GATHER4_O_V4_V4,      v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V4,   v4i32>;
-def : SampleRawPattern<int_SI_gather4_cl_o,      IMAGE_GATHER4_CL_O_V4_V8,   v8i32>;
-def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V4,    v4i32>;
-def : SampleRawPattern<int_SI_gather4_l_o,       IMAGE_GATHER4_L_O_V4_V8,    v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V4,    v4i32>;
-def : SampleRawPattern<int_SI_gather4_b_o,       IMAGE_GATHER4_B_O_V4_V8,    v8i32>;
-def : SampleRawPattern<int_SI_gather4_b_cl_o,    IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_lz_o,      IMAGE_GATHER4_LZ_O_V4_V4,   v4i32>;
-
-def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V4,    v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_o,       IMAGE_GATHER4_C_O_V4_V8,    v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_cl_o,    IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_l_o,     IMAGE_GATHER4_C_L_O_V4_V8,  v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_o,     IMAGE_GATHER4_C_B_O_V4_V8,  v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_b_cl_o,  IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>;
-def : SampleRawPattern<int_SI_gather4_c_lz_o,    IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>;
-
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>;
-def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>;
-
-def : ImagePattern<int_SI_getresinfo, IMAGE_GET_RESINFO_V4_V1, i32>;
-defm : ImagePatterns<int_SI_image_load, "IMAGE_LOAD">;
-defm : ImagePatterns<int_SI_image_load_mip, "IMAGE_LOAD_MIP">;
-defm : ImageLoadPatterns<int_amdgcn_image_load, "IMAGE_LOAD">;
-defm : ImageLoadPatterns<int_amdgcn_image_load_mip, "IMAGE_LOAD_MIP">;
-defm : ImageStorePatterns<int_amdgcn_image_store, "IMAGE_STORE">;
-defm : ImageStorePatterns<int_amdgcn_image_store_mip, "IMAGE_STORE_MIP">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_swap, "IMAGE_ATOMIC_SWAP">;
-def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V1, i32>;
-def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V2, v2i32>;
-def : ImageAtomicCmpSwapPattern<IMAGE_ATOMIC_CMPSWAP_V4, v4i32>;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_add, "IMAGE_ATOMIC_ADD">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_sub, "IMAGE_ATOMIC_SUB">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_smin, "IMAGE_ATOMIC_SMIN">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_umin, "IMAGE_ATOMIC_UMIN">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_smax, "IMAGE_ATOMIC_SMAX">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_umax, "IMAGE_ATOMIC_UMAX">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_and, "IMAGE_ATOMIC_AND">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_or, "IMAGE_ATOMIC_OR">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_xor, "IMAGE_ATOMIC_XOR">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_inc, "IMAGE_ATOMIC_INC">;
-defm : ImageAtomicPatterns<int_amdgcn_image_atomic_dec, "IMAGE_ATOMIC_DEC">;
-
-/* SIsample for simple 1D texture lookup */
 def : Pat <
-  (SIsample i32:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
-  (IMAGE_SAMPLE_V4_V1 $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
->;
-
-class SamplePattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, imm),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
+  (f16 (fpround f64:$src)),
+  (V_CVT_F16_F32_e32 (V_CVT_F32_F64_e32 $src))
 >;
 
-class SampleRectPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_RECT),
-    (opcode $addr, $rsrc, $sampler, 0xf, 1, 0, 0, 0, 0, 0, 0)
+def : Pat <
+  (i32 (fp_to_sint f16:$src)),
+  (V_CVT_I32_F32_e32 (V_CVT_F32_F16_e32 $src))
 >;
 
-class SampleArrayPattern<SDNode name, MIMG opcode, ValueType vt> : Pat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_ARRAY),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
+def : Pat <
+  (i32 (fp_to_uint f16:$src)),
+  (V_CVT_U32_F32_e32 (V_CVT_F32_F16_e32 $src))
 >;
 
-class SampleShadowPattern<SDNode name, MIMG opcode,
-                          ValueType vt> : Pat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 0)
+def : Pat <
+  (f16 (sint_to_fp i32:$src)),
+  (V_CVT_F16_F32_e32 (V_CVT_F32_I32_e32 $src))
 >;
 
-class SampleShadowArrayPattern<SDNode name, MIMG opcode,
-                               ValueType vt> : Pat <
-    (name vt:$addr, v8i32:$rsrc, v4i32:$sampler, TEX_SHADOW_ARRAY),
-    (opcode $addr, $rsrc, $sampler, 0xf, 0, 0, 0, 0, 0, 0, 1)
+def : Pat <
+  (f16 (uint_to_fp i32:$src)),
+  (V_CVT_F16_F32_e32 (V_CVT_F32_U32_e32 $src))
 >;
 
-/* SIsample* for texture lookups consuming more address parameters */
-multiclass SamplePatterns<MIMG sample, MIMG sample_c, MIMG sample_l,
-                          MIMG sample_c_l, MIMG sample_b, MIMG sample_c_b,
-MIMG sample_d, MIMG sample_c_d, ValueType addr_type> {
-  def : SamplePattern <SIsample, sample, addr_type>;
-  def : SampleRectPattern <SIsample, sample, addr_type>;
-  def : SampleArrayPattern <SIsample, sample, addr_type>;
-  def : SampleShadowPattern <SIsample, sample_c, addr_type>;
-  def : SampleShadowArrayPattern <SIsample, sample_c, addr_type>;
+//===----------------------------------------------------------------------===//
+// VOP2 Patterns
+//===----------------------------------------------------------------------===//
 
-  def : SamplePattern <SIsamplel, sample_l, addr_type>;
-  def : SampleArrayPattern <SIsamplel, sample_l, addr_type>;
-  def : SampleShadowPattern <SIsamplel, sample_c_l, addr_type>;
-  def : SampleShadowArrayPattern <SIsamplel, sample_c_l, addr_type>;
+multiclass FMADPat <ValueType vt, Instruction inst> {
+  def : Pat <
+    (vt (fmad (VOP3NoMods0 vt:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod),
+              (VOP3NoMods  vt:$src1, i32:$src1_modifiers),
+              (VOP3NoMods  vt:$src2, i32:$src2_modifiers))),
+    (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+          $src2_modifiers, $src2, $clamp, $omod)
+  >;
+}
 
-  def : SamplePattern <SIsampleb, sample_b, addr_type>;
-  def : SampleArrayPattern <SIsampleb, sample_b, addr_type>;
-  def : SampleShadowPattern <SIsampleb, sample_c_b, addr_type>;
-  def : SampleShadowArrayPattern <SIsampleb, sample_c_b, addr_type>;
+defm : FMADPat <f16, V_MAC_F16_e64>;
+defm : FMADPat <f32, V_MAC_F32_e64>;
 
-  def : SamplePattern <SIsampled, sample_d, addr_type>;
-  def : SampleArrayPattern <SIsampled, sample_d, addr_type>;
-  def : SampleShadowPattern <SIsampled, sample_c_d, addr_type>;
-  def : SampleShadowArrayPattern <SIsampled, sample_c_d, addr_type>;
+multiclass SelectPat <ValueType vt, Instruction inst> {
+  def : Pat <
+    (vt (select i1:$src0, vt:$src1, vt:$src2)),
+    (inst $src2, $src1, $src0)
+  >;
 }
 
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V2, IMAGE_SAMPLE_C_V4_V2,
-                      IMAGE_SAMPLE_L_V4_V2, IMAGE_SAMPLE_C_L_V4_V2,
-                      IMAGE_SAMPLE_B_V4_V2, IMAGE_SAMPLE_C_B_V4_V2,
-                      IMAGE_SAMPLE_D_V4_V2, IMAGE_SAMPLE_C_D_V4_V2,
-                      v2i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V4, IMAGE_SAMPLE_C_V4_V4,
-                      IMAGE_SAMPLE_L_V4_V4, IMAGE_SAMPLE_C_L_V4_V4,
-                      IMAGE_SAMPLE_B_V4_V4, IMAGE_SAMPLE_C_B_V4_V4,
-                      IMAGE_SAMPLE_D_V4_V4, IMAGE_SAMPLE_C_D_V4_V4,
-                      v4i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V8, IMAGE_SAMPLE_C_V4_V8,
-                      IMAGE_SAMPLE_L_V4_V8, IMAGE_SAMPLE_C_L_V4_V8,
-                      IMAGE_SAMPLE_B_V4_V8, IMAGE_SAMPLE_C_B_V4_V8,
-                      IMAGE_SAMPLE_D_V4_V8, IMAGE_SAMPLE_C_D_V4_V8,
-                      v8i32>;
-defm : SamplePatterns<IMAGE_SAMPLE_V4_V16, IMAGE_SAMPLE_C_V4_V16,
-                      IMAGE_SAMPLE_L_V4_V16, IMAGE_SAMPLE_C_L_V4_V16,
-                      IMAGE_SAMPLE_B_V4_V16, IMAGE_SAMPLE_C_B_V4_V16,
-                      IMAGE_SAMPLE_D_V4_V16, IMAGE_SAMPLE_C_D_V4_V16,
-                      v16i32>;
+defm : SelectPat <i16, V_CNDMASK_B32_e64>;
+defm : SelectPat <i32, V_CNDMASK_B32_e64>;
+defm : SelectPat <f16, V_CNDMASK_B32_e64>;
+defm : SelectPat <f32, V_CNDMASK_B32_e64>;
+
+def : Pat <
+  (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
+  (V_BCNT_U32_B32_e64 $popcnt, $val)
+>;
 
 /********** ============================================ **********/
 /********** Extraction, Insertion, Building and Casting  **********/
@@ -2856,6 +567,12 @@ foreach Index = 0-15 in {
 
 // FIXME: Why do only some of these type combinations for SReg and
 // VReg?
+// 16-bit bitcast
+def : BitConvert <i16, f16, VGPR_32>;
+def : BitConvert <f16, i16, VGPR_32>;
+def : BitConvert <i16, f16, SReg_32>;
+def : BitConvert <f16, i16, SReg_32>;
+
 // 32-bit bitcast
 def : BitConvert <i32, f32, VGPR_32>;
 def : BitConvert <f32, i32, VGPR_32>;
@@ -2905,7 +622,7 @@ def : BitConvert <v16f32, v16i32, VReg_512>;
 def : Pat <
   (AMDGPUclamp (VOP3Mods0Clamp f32:$src0, i32:$src0_modifiers, i32:$omod),
                (f32 FP_ZERO), (f32 FP_ONE)),
-  (V_ADD_F32_e64 $src0_modifiers, $src0, 0, 0, 1, $omod)
+  (V_ADD_F32_e64 $src0_modifiers, $src0, 0, (i32 0), 1, $omod)
 >;
 
 /********** ================================ **********/
@@ -2916,7 +633,7 @@ def : Pat <
 
 def : Pat <
   (fneg (fabs f32:$src)),
-  (S_OR_B32 $src, 0x80000000) // Set sign bit
+  (S_OR_B32 $src, (S_MOV_B32(i32 0x80000000))) // Set sign bit
 >;
 
 // FIXME: Should use S_OR_B32
@@ -2925,19 +642,19 @@ def : Pat <
   (REG_SEQUENCE VReg_64,
     (i32 (EXTRACT_SUBREG f64:$src, sub0)),
     sub0,
-    (V_OR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
-                  (V_MOV_B32_e32 0x80000000)), // Set sign bit.
+    (V_OR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+                  (V_MOV_B32_e32 (i32 0x80000000))), // Set sign bit.
     sub1)
 >;
 
 def : Pat <
   (fabs f32:$src),
-  (V_AND_B32_e32 $src, (V_MOV_B32_e32 0x7fffffff))
+  (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x7fffffff)))
 >;
 
 def : Pat <
   (fneg f32:$src),
-  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 0x80000000))
+  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x80000000)))
 >;
 
 def : Pat <
@@ -2945,8 +662,8 @@ def : Pat <
   (REG_SEQUENCE VReg_64,
     (i32 (EXTRACT_SUBREG f64:$src, sub0)),
     sub0,
-    (V_AND_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
-                   (V_MOV_B32_e32 0x7fffffff)), // Set sign bit.
+    (V_AND_B32_e64 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+                   (V_MOV_B32_e32 (i32 0x7fffffff))), // Set sign bit.
      sub1)
 >;
 
@@ -2955,33 +672,66 @@ def : Pat <
   (REG_SEQUENCE VReg_64,
     (i32 (EXTRACT_SUBREG f64:$src, sub0)),
     sub0,
-    (V_XOR_B32_e32 (EXTRACT_SUBREG f64:$src, sub1),
-                   (V_MOV_B32_e32 0x80000000)),
+    (V_XOR_B32_e32 (i32 (EXTRACT_SUBREG f64:$src, sub1)),
+                   (i32 (V_MOV_B32_e32 (i32 0x80000000)))),
     sub1)
 >;
 
+def : Pat <
+  (fneg f16:$src),
+  (V_XOR_B32_e32 $src, (V_MOV_B32_e32 (i32 0x00008000)))
+>;
+
+def : Pat <
+  (fabs f16:$src),
+  (V_AND_B32_e64 $src, (V_MOV_B32_e32 (i32 0x00007fff)))
+>;
+
+def : Pat <
+  (fneg (fabs f16:$src)),
+  (S_OR_B32 $src, (S_MOV_B32 (i32 0x00008000))) // Set sign bit
+>;
+
 /********** ================== **********/
 /********** Immediate Patterns **********/
 /********** ================== **********/
 
 def : Pat <
-  (SGPRImm<(i32 imm)>:$imm),
-  (S_MOV_B32 imm:$imm)
+  (VGPRImm<(i32 imm)>:$imm),
+  (V_MOV_B32_e32 imm:$imm)
 >;
 
 def : Pat <
-  (SGPRImm<(f32 fpimm)>:$imm),
-  (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
+  (VGPRImm<(f32 fpimm)>:$imm),
+  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : Pat <
   (i32 imm:$imm),
-  (V_MOV_B32_e32 imm:$imm)
+  (S_MOV_B32 imm:$imm)
+>;
+
+// FIXME: Workaround for ordering issue with peephole optimizer where
+// a register class copy interferes with immediate folding.  Should
+// use s_mov_b32, which can be shrunk to s_movk_i32
+def : Pat <
+  (VGPRImm<(f16 fpimm)>:$imm),
+  (V_MOV_B32_e32 (f16 (bitcast_fpimm_to_i32 $imm)))
 >;
 
 def : Pat <
   (f32 fpimm:$imm),
-  (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm)))
+  (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm)))
+>;
+
+def : Pat <
+  (f16 fpimm:$imm),
+  (S_MOV_B32 (i32 (bitcast_fpimm_to_i32 $imm)))
+>;
+
+def : Pat <
+ (i32 frameindex:$fi),
+ (V_MOV_B32_e32 (i32 (frameindex_to_targetframeindex $fi)))
 >;
 
 def : Pat <
@@ -3011,21 +761,21 @@ def : POW_Common <V_LOG_F32_e32, V_EXP_F32_e32, V_MUL_LEGACY_F32_e32>;
 def : Pat <
   (int_AMDGPU_cube v4f32:$src),
   (REG_SEQUENCE VReg_128,
-    (V_CUBETC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
-                  0 /* src1_modifiers */, (EXTRACT_SUBREG $src, sub1),
-                  0 /* src2_modifiers */, (EXTRACT_SUBREG $src, sub2),
+    (V_CUBETC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
+                  0 /* src1_modifiers */, (f32 (EXTRACT_SUBREG $src, sub1)),
+                  0 /* src2_modifiers */, (f32 (EXTRACT_SUBREG $src, sub2)),
                   0 /* clamp */, 0 /* omod */), sub0,
-    (V_CUBESC_F32 0 /* src0_modifiers */, (EXTRACT_SUBREG $src, sub0),
-                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
-                  0 /* src2_modifiers */,(EXTRACT_SUBREG $src, sub2),
+    (V_CUBESC_F32 0 /* src0_modifiers */, (f32 (EXTRACT_SUBREG $src, sub0)),
+                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
+                  0 /* src2_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
                   0 /* clamp */, 0 /* omod */), sub1,
-    (V_CUBEMA_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
-                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
-                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+    (V_CUBEMA_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
+                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
+                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
                   0 /* clamp */, 0 /* omod */), sub2,
-    (V_CUBEID_F32 0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub0),
-                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub1),
-                  0 /* src1_modifiers */,(EXTRACT_SUBREG $src, sub2),
+    (V_CUBEID_F32 0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub0)),
+                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub1)),
+                  0 /* src1_modifiers */,(f32 (EXTRACT_SUBREG $src, sub2)),
                   0 /* clamp */, 0 /* omod */), sub3)
 >;
 
@@ -3042,17 +792,11 @@ class Ext32Pat <SDNode ext> : Pat <
 def : Ext32Pat <zext>;
 def : Ext32Pat <anyext>;
 
-// Offset in an 32-bit VGPR
-def : Pat <
-  (SIload_constant v4i32:$sbase, i32:$voff),
-  (BUFFER_LOAD_DWORD_OFFEN $voff, $sbase, 0, 0, 0, 0, 0)
->;
-
 // The multiplication scales from [0,1] to the unsigned integer range
 def : Pat <
   (AMDGPUurecip i32:$src0),
   (V_CVT_U32_F32_e32
-    (V_MUL_F32_e32 CONST.FP_UINT_MAX_PLUS_1,
+    (V_MUL_F32_e32 (i32 CONST.FP_UINT_MAX_PLUS_1),
                    (V_RCP_IFLAG_F32_e32 (V_CVT_F32_U32_e32 $src0))))
 >;
 
@@ -3066,245 +810,8 @@ def : UMad24Pat<V_MAD_U32_U24>;
 defm : BFIPatterns <V_BFI_B32, S_MOV_B32, SReg_64>;
 def : ROTRPattern <V_ALIGNBIT_B32>;
 
-/********** ======================= **********/
-/**********   Load/Store Patterns   **********/
-/********** ======================= **********/
-
-class DSReadPat <DS inst, ValueType vt, PatFrag frag> : Pat <
-  (vt (frag (DS1Addr1Offset i32:$ptr, i32:$offset))),
-  (inst $ptr, (as_i16imm $offset), (i1 0))
->;
-
-def : DSReadPat <DS_READ_I8,  i32, si_sextload_local_i8>;
-def : DSReadPat <DS_READ_U8,  i32, si_az_extload_local_i8>;
-def : DSReadPat <DS_READ_I16, i32, si_sextload_local_i16>;
-def : DSReadPat <DS_READ_U16, i32, si_az_extload_local_i16>;
-def : DSReadPat <DS_READ_B32, i32, si_load_local>;
-
-let AddedComplexity = 100 in {
-
-def : DSReadPat <DS_READ_B64, v2i32, si_load_local_align8>;
-
-} // End AddedComplexity = 100
-
-def : Pat <
-  (v2i32 (si_load_local (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
-                                                    i8:$offset1))),
-  (DS_READ2_B32 $ptr, $offset0, $offset1, (i1 0))
->;
-
-class DSWritePat <DS inst, ValueType vt, PatFrag frag> : Pat <
-  (frag vt:$value, (DS1Addr1Offset i32:$ptr, i32:$offset)),
-  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
->;
-
-def : DSWritePat <DS_WRITE_B8, i32, si_truncstore_local_i8>;
-def : DSWritePat <DS_WRITE_B16, i32, si_truncstore_local_i16>;
-def : DSWritePat <DS_WRITE_B32, i32, si_store_local>;
-
-let AddedComplexity = 100 in {
-
-def : DSWritePat <DS_WRITE_B64, v2i32, si_store_local_align8>;
-} // End AddedComplexity = 100
-
-def : Pat <
-  (si_store_local v2i32:$value, (DS64Bit4ByteAligned i32:$ptr, i8:$offset0,
-                                                               i8:$offset1)),
-  (DS_WRITE2_B32 $ptr, (EXTRACT_SUBREG $value, sub0),
-                       (EXTRACT_SUBREG $value, sub1), $offset0, $offset1,
-                       (i1 0))
->;
-
-class DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> : Pat <
-  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$value),
-  (inst $ptr, $value, (as_i16imm $offset), (i1 0))
->;
-
-class DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> : Pat <
-  (frag (DS1Addr1Offset i32:$ptr, i32:$offset), vt:$cmp, vt:$swap),
-  (inst $ptr, $cmp, $swap, (as_i16imm $offset), (i1 0))
->;
-
-
-// 32-bit atomics.
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, si_atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U32, i32, si_atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U32, i32, si_atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_INC_RTN_U32, i32, si_atomic_inc_local>;
-def : DSAtomicRetPat<DS_DEC_RTN_U32, i32, si_atomic_dec_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B32, i32, si_atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B32, i32, si_atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B32, i32, si_atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I32, i32, si_atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I32, i32, si_atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U32, i32, si_atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U32, i32, si_atomic_load_umax_local>;
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, si_atomic_cmp_swap_32_local>;
-
-// 64-bit atomics.
-def : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, si_atomic_swap_local>;
-def : DSAtomicRetPat<DS_ADD_RTN_U64, i64, si_atomic_load_add_local>;
-def : DSAtomicRetPat<DS_SUB_RTN_U64, i64, si_atomic_load_sub_local>;
-def : DSAtomicRetPat<DS_INC_RTN_U64, i64, si_atomic_inc_local>;
-def : DSAtomicRetPat<DS_DEC_RTN_U64, i64, si_atomic_dec_local>;
-def : DSAtomicRetPat<DS_AND_RTN_B64, i64, si_atomic_load_and_local>;
-def : DSAtomicRetPat<DS_OR_RTN_B64, i64, si_atomic_load_or_local>;
-def : DSAtomicRetPat<DS_XOR_RTN_B64, i64, si_atomic_load_xor_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_I64, i64, si_atomic_load_min_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_I64, i64, si_atomic_load_max_local>;
-def : DSAtomicRetPat<DS_MIN_RTN_U64, i64, si_atomic_load_umin_local>;
-def : DSAtomicRetPat<DS_MAX_RTN_U64, i64, si_atomic_load_umax_local>;
-
-def : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, si_atomic_cmp_swap_64_local>;
-
-
-//===----------------------------------------------------------------------===//
-// MUBUF Patterns
-//===----------------------------------------------------------------------===//
-
-class MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt,
-                              PatFrag constant_ld> : Pat <
-     (vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                   i16:$offset, i1:$glc, i1:$slc, i1:$tfe))),
-     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe)
-  >;
-
-multiclass MUBUFLoad_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
-                                     ValueType vt, PatFrag atomic_ld> {
-  def : Pat <
-     (vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                   i16:$offset, i1:$slc))),
-     (Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
-  >;
-
-  def : Pat <
-    (vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
-    (Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
-  >;
-}
-
-let Predicates = [isSICI] in {
-def : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, sextloadi8_constant>;
-def : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, az_extloadi8_constant>;
-def : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, sextloadi16_constant>;
-def : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, az_extloadi16_constant>;
-
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORD_ADDR64, BUFFER_LOAD_DWORD_OFFSET, i32, mubuf_load_atomic>;
-defm : MUBUFLoad_Atomic_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, BUFFER_LOAD_DWORDX2_OFFSET, i64, mubuf_load_atomic>;
-} // End Predicates = [isSICI]
-
-class MUBUFScratchLoadPat <MUBUF Instr, ValueType vt, PatFrag ld> : Pat <
-  (vt (ld (MUBUFScratch v4i32:$srsrc, i32:$vaddr,
-                        i32:$soffset, u16imm:$offset))),
-  (Instr $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
-
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SBYTE_OFFEN, i32, sextloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_UBYTE_OFFEN, i32, extloadi8_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_SSHORT_OFFEN, i32, sextloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_USHORT_OFFEN, i32, extloadi16_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORD_OFFEN, i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX2_OFFEN, v2i32, load_private>;
-def : MUBUFScratchLoadPat <BUFFER_LOAD_DWORDX4_OFFEN, v4i32, load_private>;
-
-// BUFFER_LOAD_DWORD*, addr64=0
-multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen,
-                             MUBUF bothen> {
-
-  def : Pat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, (i32 imm), i32:$soffset,
-                                  imm:$offset, 0, 0, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (offset $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
-            (as_i1imm $slc), (as_i1imm $tfe))
-  >;
-
-  def : Pat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
-                                  imm:$offset, 1, 0, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (offen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
-           (as_i1imm $tfe))
-  >;
-
-  def : Pat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, i32:$vaddr, i32:$soffset,
-                                  imm:$offset, 0, 1, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (idxen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc),
-           (as_i1imm $slc), (as_i1imm $tfe))
-  >;
-
-  def : Pat <
-    (vt (int_SI_buffer_load_dword v4i32:$rsrc, v2i32:$vaddr, i32:$soffset,
-                                  imm:$offset, 1, 1, imm:$glc, imm:$slc,
-                                  imm:$tfe)),
-    (bothen $vaddr, $rsrc, $soffset, (as_i16imm $offset), (as_i1imm $glc), (as_i1imm $slc),
-            (as_i1imm $tfe))
-  >;
-}
-
-defm : MUBUF_Load_Dword <i32, BUFFER_LOAD_DWORD_OFFSET, BUFFER_LOAD_DWORD_OFFEN,
-                         BUFFER_LOAD_DWORD_IDXEN, BUFFER_LOAD_DWORD_BOTHEN>;
-defm : MUBUF_Load_Dword <v2i32, BUFFER_LOAD_DWORDX2_OFFSET, BUFFER_LOAD_DWORDX2_OFFEN,
-                         BUFFER_LOAD_DWORDX2_IDXEN, BUFFER_LOAD_DWORDX2_BOTHEN>;
-defm : MUBUF_Load_Dword <v4i32, BUFFER_LOAD_DWORDX4_OFFSET, BUFFER_LOAD_DWORDX4_OFFEN,
-                         BUFFER_LOAD_DWORDX4_IDXEN, BUFFER_LOAD_DWORDX4_BOTHEN>;
-
-multiclass MUBUFStore_Atomic_Pattern <MUBUF Instr_ADDR64, MUBUF Instr_OFFSET,
-                                      ValueType vt, PatFrag atomic_st> {
-  // Store follows atomic op convention so address is forst
-  def : Pat <
-     (atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
-                                   i16:$offset, i1:$slc), vt:$val),
-     (Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 1, $slc, 0)
-  >;
-
-  def : Pat <
-    (atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
-    (Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 1, 0, 0)
-  >;
-}
-let Predicates = [isSICI] in {
-defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORD_ADDR64, BUFFER_STORE_DWORD_OFFSET, i32, global_store_atomic>;
-defm : MUBUFStore_Atomic_Pattern <BUFFER_STORE_DWORDX2_ADDR64, BUFFER_STORE_DWORDX2_OFFSET, i64, global_store_atomic>;
-} // End Predicates = [isSICI]
-
-class MUBUFScratchStorePat <MUBUF Instr, ValueType vt, PatFrag st> : Pat <
-  (st vt:$value, (MUBUFScratch v4i32:$srsrc, i32:$vaddr, i32:$soffset,
-                               u16imm:$offset)),
-  (Instr $value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0)
->;
-
-def : MUBUFScratchStorePat <BUFFER_STORE_BYTE_OFFEN, i32, truncstorei8_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_SHORT_OFFEN, i32, truncstorei16_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORD_OFFEN, i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX2_OFFEN, v2i32, store_private>;
-def : MUBUFScratchStorePat <BUFFER_STORE_DWORDX4_OFFEN, v4i32, store_private>;
-
-//===----------------------------------------------------------------------===//
-// MTBUF Patterns
-//===----------------------------------------------------------------------===//
-
-// TBUFFER_STORE_FORMAT_*, addr64=0
-class MTBUF_StoreResource <ValueType vt, int num_channels, MTBUF opcode> : Pat<
-  (SItbuffer_store v4i32:$rsrc, vt:$vdata, num_channels, i32:$vaddr,
-                   i32:$soffset, imm:$inst_offset, imm:$dfmt,
-                   imm:$nfmt, imm:$offen, imm:$idxen,
-                   imm:$glc, imm:$slc, imm:$tfe),
-  (opcode
-    $vdata, (as_i16imm $inst_offset), (as_i1imm $offen), (as_i1imm $idxen),
-    (as_i1imm $glc), 0, (as_i8imm $dfmt), (as_i8imm $nfmt), $vaddr, $rsrc,
-    (as_i1imm $slc), (as_i1imm $tfe), $soffset)
->;
-
-def : MTBUF_StoreResource <i32, 1, TBUFFER_STORE_FORMAT_X>;
-def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>;
-def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>;
-def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>;
-
 /********** ====================== **********/
-/**********   Indirect adressing   **********/
+/**********   Indirect addressing  **********/
 /********** ====================== **********/
 
 multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, string VecSize> {
@@ -3332,48 +839,75 @@ defm : SI_INDIRECT_Pattern <v8i32, i32, "V8">;
 defm : SI_INDIRECT_Pattern <v16i32, i32, "V16">;
 
 //===----------------------------------------------------------------------===//
+// SAD Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (add (sub_oneuse (umax i32:$src0, i32:$src1),
+                   (umin i32:$src0, i32:$src1)),
+       i32:$src2),
+  (V_SAD_U32 $src0, $src1, $src2)
+>;
+
+def : Pat <
+  (add (select_oneuse (i1 (setugt i32:$src0, i32:$src1)),
+                      (sub i32:$src0, i32:$src1),
+                      (sub i32:$src1, i32:$src0)),
+       i32:$src2),
+  (V_SAD_U32 $src0, $src1, $src2)
+>;
+
+//===----------------------------------------------------------------------===//
 // Conversion Patterns
 //===----------------------------------------------------------------------===//
 
 def : Pat<(i32 (sext_inreg i32:$src, i1)),
-  (S_BFE_I32 i32:$src, 65536)>; // 0 | 1 << 16
+  (S_BFE_I32 i32:$src, (i32 65536))>; // 0 | 1 << 16
 
 // Handle sext_inreg in i64
 def : Pat <
   (i64 (sext_inreg i64:$src, i1)),
-  (S_BFE_I64 i64:$src, 0x10000) // 0 | 1 << 16
+  (S_BFE_I64 i64:$src, (i32 0x10000)) // 0 | 1 << 16
+>;
+
+def : Pat <
+  (i16 (sext_inreg i16:$src, i8)),
+  (S_BFE_I32 $src, (i32 0x80000)) // 0 | 8 << 16
 >;
 
 def : Pat <
   (i64 (sext_inreg i64:$src, i8)),
-  (S_BFE_I64 i64:$src, 0x80000) // 0 | 8 << 16
+  (S_BFE_I64 i64:$src, (i32 0x80000)) // 0 | 8 << 16
 >;
 
 def : Pat <
   (i64 (sext_inreg i64:$src, i16)),
-  (S_BFE_I64 i64:$src, 0x100000) // 0 | 16 << 16
+  (S_BFE_I64 i64:$src, (i32 0x100000)) // 0 | 16 << 16
 >;
 
 def : Pat <
   (i64 (sext_inreg i64:$src, i32)),
-  (S_BFE_I64 i64:$src, 0x200000) // 0 | 32 << 16
+  (S_BFE_I64 i64:$src, (i32 0x200000)) // 0 | 32 << 16
 >;
 
-class ZExt_i64_i32_Pat <SDNode ext> : Pat <
-  (i64 (ext i32:$src)),
-  (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 0), sub1)
+def : Pat <
+  (i64 (zext i32:$src)),
+  (REG_SEQUENCE SReg_64, $src, sub0, (S_MOV_B32 (i32 0)), sub1)
+>;
+
+def : Pat <
+  (i64 (anyext i32:$src)),
+  (REG_SEQUENCE SReg_64, $src, sub0, (i32 (IMPLICIT_DEF)), sub1)
 >;
 
 class ZExt_i64_i1_Pat <SDNode ext> : Pat <
   (i64 (ext i1:$src)),
     (REG_SEQUENCE VReg_64,
       (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0,
-      (S_MOV_B32 0), sub1)
+      (S_MOV_B32 (i32 0)), sub1)
 >;
 
 
-def : ZExt_i64_i32_Pat<zext>;
-def : ZExt_i64_i32_Pat<anyext>;
 def : ZExt_i64_i1_Pat<zext>;
 def : ZExt_i64_i1_Pat<anyext>;
 
@@ -3382,29 +916,29 @@ def : ZExt_i64_i1_Pat<anyext>;
 def : Pat <
   (i64 (sext i32:$src)),
     (REG_SEQUENCE SReg_64, $src, sub0,
-    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, 31), SReg_32_XM0)), sub1)
+    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 $src, (i32 31)), SReg_32_XM0)), sub1)
 >;
 
 def : Pat <
   (i64 (sext i1:$src)),
   (REG_SEQUENCE VReg_64,
-    (V_CNDMASK_B32_e64 0, -1, $src), sub0,
-    (V_CNDMASK_B32_e64 0, -1, $src), sub1)
+    (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub0,
+    (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src), sub1)
 >;
 
-class FPToI1Pat<Instruction Inst, int KOne, ValueType vt, SDPatternOperator fp_to_int> : Pat <
+class FPToI1Pat<Instruction Inst, int KOne, ValueType kone_type, ValueType vt, SDPatternOperator fp_to_int> : Pat <
   (i1 (fp_to_int (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)))),
-  (i1 (Inst 0, KOne, $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
+  (i1 (Inst 0, (kone_type KOne), $src0_modifiers, $src0, DSTCLAMP.NONE, DSTOMOD.NONE))
 >;
 
-def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, f32, fp_to_uint>;
-def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, f32, fp_to_sint>;
-def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, f64, fp_to_uint>;
-def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, f64, fp_to_sint>;
+def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_ONE, i32, f32, fp_to_uint>;
+def : FPToI1Pat<V_CMP_EQ_F32_e64, CONST.FP32_NEG_ONE, i32, f32, fp_to_sint>;
+def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_ONE, i64, f64, fp_to_uint>;
+def : FPToI1Pat<V_CMP_EQ_F64_e64, CONST.FP64_NEG_ONE, i64, f64, fp_to_sint>;
 
 // If we need to perform a logical operation on i1 values, we need to
 // use vector comparisons since there is only one SCC register. Vector
-// comparisions still write to a pair of SGPRs, so treat these as
+// comparisons still write to a pair of SGPRs, so treat these as
 // 64-bit comparisons. When legalizing SGPR copies, instructions
 // resulting in the copies from SCC to these instructions will be
 // moved to the VALU.
@@ -3425,12 +959,12 @@ def : Pat <
 
 def : Pat <
   (f32 (sint_to_fp i1:$src)),
-  (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src)
+  (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_NEG_ONE), $src)
 >;
 
 def : Pat <
   (f32 (uint_to_fp i1:$src)),
-  (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src)
+  (V_CNDMASK_B32_e64 (i32 0), (i32 CONST.FP32_ONE), $src)
 >;
 
 def : Pat <
@@ -3454,25 +988,20 @@ def : Pat <
 
 def : Pat <
   (i1 (trunc i32:$a)),
-  (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1), $a), 1)
+  (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
 >;
 
 def : Pat <
   (i1 (trunc i64:$a)),
-  (V_CMP_EQ_I32_e64 (S_AND_B32 (i32 1),
-                    (EXTRACT_SUBREG $a, sub0)), 1)
+  (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
+                    (i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
 >;
 
 def : Pat <
   (i32 (bswap i32:$a)),
-  (V_BFI_B32 (S_MOV_B32 0x00ff00ff),
-             (V_ALIGNBIT_B32 $a, $a, 24),
-             (V_ALIGNBIT_B32 $a, $a, 8))
->;
-
-def : Pat <
-  (f32 (select i1:$src2, f32:$src1, f32:$src0)),
-  (V_CNDMASK_B32_e64 $src0, $src1, $src2)
+  (V_BFI_B32 (S_MOV_B32 (i32 0x00ff00ff)),
+             (V_ALIGNBIT_B32 $a, $a, (i32 24)),
+             (V_ALIGNBIT_B32 $a, $a, (i32 8)))
 >;
 
 multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
@@ -3483,7 +1012,7 @@ multiclass BFMPatterns <ValueType vt, InstSI BFM, InstSI MOV> {
 
   def : Pat <
     (vt (add (vt (shl 1, vt:$a)), -1)),
-    (BFM $a, (MOV 0))
+    (BFM $a, (MOV (i32 0)))
   >;
 }
 
@@ -3492,16 +1021,14 @@ defm : BFMPatterns <i32, S_BFM_B32, S_MOV_B32>;
 
 def : BFEPattern <V_BFE_U32, S_MOV_B32>;
 
-let Predicates = [isSICI] in {
-def : Pat <
-  (i64 (readcyclecounter)),
-  (S_MEMTIME)
+def : Pat<
+  (fcanonicalize f16:$src),
+  (V_MUL_F16_e64 0, (i32 CONST.FP16_ONE), 0, $src, 0, 0)
 >;
-}
 
 def : Pat<
   (fcanonicalize f32:$src),
-  (V_MUL_F32_e64 0, CONST.FP32_ONE, 0, $src, 0, 0)
+  (V_MUL_F32_e64 0, (i32 CONST.FP32_ONE), 0, $src, 0, 0)
 >;
 
 def : Pat<
@@ -3536,7 +1063,7 @@ def : Pat <
              (V_MOV_B64_PSEUDO 0x3fefffffffffffff),
              DSTCLAMP.NONE, DSTOMOD.NONE),
          $x,
-         (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, 3/*NaN*/)),
+         (V_CMP_CLASS_F64_e64 SRCMODS.NONE, $x, (i32 3 /*NaN*/))),
       DSTCLAMP.NONE, DSTOMOD.NONE)
 >;
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
index 9d06ccfc6c7f..5da375468713 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIIntrinsics.td
@@ -15,7 +15,20 @@
 
 let TargetPrefix = "SI", isTarget = 1 in {
   def int_SI_packf16 : Intrinsic <[llvm_i32_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
-  def int_SI_export : Intrinsic <[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty, llvm_float_ty], []>;
+
+  def int_SI_export : Intrinsic <[],
+    [llvm_i32_ty,   // en
+    llvm_i32_ty,    // vm   (FIXME: should be i1)
+    llvm_i32_ty,    // done (FIXME: should be i1)
+    llvm_i32_ty,    // tgt
+    llvm_i32_ty,    // compr (FIXME: should be i1)
+    llvm_float_ty,  // src0
+    llvm_float_ty,  // src1
+    llvm_float_ty,  // src2
+    llvm_float_ty], // src3
+    []
+  >;
+
   def int_SI_load_const : Intrinsic <[llvm_float_ty], [llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>;
   def int_SI_vs_load_input : Intrinsic <[llvm_v4f32_ty], [llvm_anyint_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]> ;
 
@@ -186,11 +199,11 @@ let TargetPrefix = "amdgcn", isTarget = 1 in {
 
   /* Control flow Intrinsics */
 
-  def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], []>;
-  def int_amdgcn_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], []>;
-  def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], []>;
-  def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], []>;
-  def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], []>;
-  def int_amdgcn_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], []>;
-  def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], []>;
+  def int_amdgcn_if : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_empty_ty], [IntrConvergent]>;
+  def int_amdgcn_else : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
+  def int_amdgcn_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
+  def int_amdgcn_if_break : Intrinsic<[llvm_i64_ty], [llvm_i1_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
+  def int_amdgcn_else_break : Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem, IntrConvergent]>;
+  def int_amdgcn_loop : Intrinsic<[], [llvm_i64_ty, llvm_empty_ty], [IntrConvergent]>;
+  def int_amdgcn_end_cf : Intrinsic<[], [llvm_i64_ty], [IntrConvergent]>;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 9e972a569a0f..99fe96c0be22 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -60,31 +60,35 @@ private:
   const SIInstrInfo *TII;
   const SIRegisterInfo *TRI;
   MachineRegisterInfo *MRI;
-  LiveIntervals *LIS;
+  AliasAnalysis *AA;
 
   static bool offsetsCanBeCombined(unsigned Offset0,
                                    unsigned Offset1,
                                    unsigned EltSize);
 
-  MachineBasicBlock::iterator findMatchingDSInst(MachineBasicBlock::iterator I,
-                                                 unsigned EltSize);
+  MachineBasicBlock::iterator findMatchingDSInst(
+    MachineBasicBlock::iterator I,
+    unsigned EltSize,
+    SmallVectorImpl<MachineInstr*> &InstsToMove);
 
   MachineBasicBlock::iterator mergeRead2Pair(
     MachineBasicBlock::iterator I,
     MachineBasicBlock::iterator Paired,
-    unsigned EltSize);
+    unsigned EltSize,
+    ArrayRef<MachineInstr*> InstsToMove);
 
   MachineBasicBlock::iterator mergeWrite2Pair(
     MachineBasicBlock::iterator I,
     MachineBasicBlock::iterator Paired,
-    unsigned EltSize);
+    unsigned EltSize,
+    ArrayRef<MachineInstr*> InstsToMove);
 
 public:
   static char ID;
 
   SILoadStoreOptimizer()
       : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr), MRI(nullptr),
-        LIS(nullptr) {}
+        AA(nullptr) {}
 
   SILoadStoreOptimizer(const TargetMachine &TM_) : MachineFunctionPass(ID) {
     initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry());
@@ -94,16 +98,11 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Load / Store Optimizer";
-  }
+  StringRef getPassName() const override { return "SI Load / Store Optimizer"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
-    AU.addPreserved<SlotIndexes>();
-    AU.addPreserved<LiveIntervals>();
-    AU.addPreserved<LiveVariables>();
-    AU.addRequired<LiveIntervals>();
+    AU.addRequired<AAResultsWrapperPass>();
 
     MachineFunctionPass::getAnalysisUsage(AU);
   }
@@ -113,9 +112,7 @@ public:
 
 INITIALIZE_PASS_BEGIN(SILoadStoreOptimizer, DEBUG_TYPE,
                       "SI Load / Store Optimizer", false, false)
-INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
-INITIALIZE_PASS_DEPENDENCY(LiveVariables)
-INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(SILoadStoreOptimizer, DEBUG_TYPE,
                     "SI Load / Store Optimizer", false, false)
 
@@ -127,6 +124,73 @@ FunctionPass *llvm::createSILoadStoreOptimizerPass(TargetMachine &TM) {
   return new SILoadStoreOptimizer(TM);
 }
 
+static void moveInstsAfter(MachineBasicBlock::iterator I,
+                           ArrayRef<MachineInstr*> InstsToMove) {
+  MachineBasicBlock *MBB = I->getParent();
+  ++I;
+  for (MachineInstr *MI : InstsToMove) {
+    MI->removeFromParent();
+    MBB->insert(I, MI);
+  }
+}
+
+static void addDefsToList(const MachineInstr &MI,
+                          SmallVectorImpl<const MachineOperand *> &Defs) {
+  for (const MachineOperand &Def : MI.defs()) {
+    Defs.push_back(&Def);
+  }
+}
+
+static bool memAccessesCanBeReordered(
+  MachineBasicBlock::iterator A,
+  MachineBasicBlock::iterator B,
+  const SIInstrInfo *TII,
+  llvm::AliasAnalysis * AA) {
+  return (TII->areMemAccessesTriviallyDisjoint(*A, *B, AA) ||
+    // RAW or WAR - cannot reorder
+    // WAW - cannot reorder
+    // RAR - safe to reorder
+    !(A->mayStore() || B->mayStore()));
+}
+
+// Add MI and its defs to the lists if MI reads one of the defs that are
+// already in the list. Returns true in that case.
+static bool
+addToListsIfDependent(MachineInstr &MI,
+                      SmallVectorImpl<const MachineOperand *> &Defs,
+                      SmallVectorImpl<MachineInstr*> &Insts) {
+  for (const MachineOperand *Def : Defs) {
+    bool ReadDef = MI.readsVirtualRegister(Def->getReg());
+    // If ReadDef is true, then there is a use of Def between I
+    // and the instruction that I will potentially be merged with. We
+    // will need to move this instruction after the merged instructions.
+    if (ReadDef) {
+      Insts.push_back(&MI);
+      addDefsToList(MI, Defs);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+static bool
+canMoveInstsAcrossMemOp(MachineInstr &MemOp,
+                        ArrayRef<MachineInstr*> InstsToMove,
+                        const SIInstrInfo *TII,
+                        AliasAnalysis *AA) {
+
+  assert(MemOp.mayLoadOrStore());
+
+  for (MachineInstr *InstToMove : InstsToMove) {
+    if (!InstToMove->mayLoadOrStore())
+      continue;
+    if (!memAccessesCanBeReordered(MemOp, *InstToMove, TII, AA))
+        return false;
+  }
+  return true;
+}
+
 bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
                                                 unsigned Offset1,
                                                 unsigned Size) {
@@ -156,43 +220,99 @@ bool SILoadStoreOptimizer::offsetsCanBeCombined(unsigned Offset0,
 
 MachineBasicBlock::iterator
 SILoadStoreOptimizer::findMatchingDSInst(MachineBasicBlock::iterator I,
-                                         unsigned EltSize){
+                                  unsigned EltSize,
+                                  SmallVectorImpl<MachineInstr*> &InstsToMove) {
   MachineBasicBlock::iterator E = I->getParent()->end();
   MachineBasicBlock::iterator MBBI = I;
   ++MBBI;
 
-  if (MBBI->getOpcode() != I->getOpcode())
-    return E;
-
-  // Don't merge volatiles.
-  if (MBBI->hasOrderedMemoryRef())
-    return E;
-
-  int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
-  const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
-  const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
-
-  // Check same base pointer. Be careful of subregisters, which can occur with
-  // vectors of pointers.
-  if (AddrReg0.getReg() == AddrReg1.getReg() &&
-      AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
-    int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
-                                               AMDGPU::OpName::offset);
-    unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
-    unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
-
-    // Check both offsets fit in the reduced range.
-    if (offsetsCanBeCombined(Offset0, Offset1, EltSize))
-      return MBBI;
-  }
+  SmallVector<const MachineOperand *, 8> DefsToMove;
+  addDefsToList(*I, DefsToMove);
 
+  for ( ; MBBI != E; ++MBBI) {
+
+    if (MBBI->getOpcode() != I->getOpcode()) {
+
+      // This is not a matching DS instruction, but we can keep looking as
+      // long as one of these conditions are met:
+      // 1. It is safe to move I down past MBBI.
+      // 2. It is safe to move MBBI down past the instruction that I will
+      //    be merged into.
+
+      if (MBBI->hasUnmodeledSideEffects())
+        // We can't re-order this instruction with respect to other memory
+        // opeations, so we fail both conditions mentioned above.
+        return E;
+
+      if (MBBI->mayLoadOrStore() &&
+        !memAccessesCanBeReordered(*I, *MBBI, TII, AA)) {
+        // We fail condition #1, but we may still be able to satisfy condition
+        // #2.  Add this instruction to the move list and then we will check
+        // if condition #2 holds once we have selected the matching instruction.
+        InstsToMove.push_back(&*MBBI);
+        addDefsToList(*MBBI, DefsToMove);
+        continue;
+      }
+
+      // When we match I with another DS instruction we will be moving I down
+      // to the location of the matched instruction any uses of I will need to
+      // be moved down as well.
+      addToListsIfDependent(*MBBI, DefsToMove, InstsToMove);
+      continue;
+    }
+
+    // Don't merge volatiles.
+    if (MBBI->hasOrderedMemoryRef())
+      return E;
+
+    // Handle a case like
+    //   DS_WRITE_B32 addr, v, idx0
+    //   w = DS_READ_B32 addr, idx0
+    //   DS_WRITE_B32 addr, f(w), idx1
+    // where the DS_READ_B32 ends up in InstsToMove and therefore prevents
+    // merging of the two writes.
+    if (addToListsIfDependent(*MBBI, DefsToMove, InstsToMove))
+      continue;
+
+    int AddrIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(), AMDGPU::OpName::addr);
+    const MachineOperand &AddrReg0 = I->getOperand(AddrIdx);
+    const MachineOperand &AddrReg1 = MBBI->getOperand(AddrIdx);
+
+    // Check same base pointer. Be careful of subregisters, which can occur with
+    // vectors of pointers.
+    if (AddrReg0.getReg() == AddrReg1.getReg() &&
+        AddrReg0.getSubReg() == AddrReg1.getSubReg()) {
+      int OffsetIdx = AMDGPU::getNamedOperandIdx(I->getOpcode(),
+                                                 AMDGPU::OpName::offset);
+      unsigned Offset0 = I->getOperand(OffsetIdx).getImm() & 0xffff;
+      unsigned Offset1 = MBBI->getOperand(OffsetIdx).getImm() & 0xffff;
+
+      // Check both offsets fit in the reduced range.
+      // We also need to go through the list of instructions that we plan to
+      // move and make sure they are all safe to move down past the merged
+      // instruction.
+      if (offsetsCanBeCombined(Offset0, Offset1, EltSize) &&
+          canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA))
+        return MBBI;
+    }
+
+    // We've found a load/store that we couldn't merge for some reason.
+    // We could potentially keep looking, but we'd need to make sure that
+    // it was safe to move I and also all the instruction in InstsToMove
+    // down past this instruction.
+    if (!memAccessesCanBeReordered(*I, *MBBI, TII, AA) ||   // check if we can move I across MBBI
+      !canMoveInstsAcrossMemOp(*MBBI, InstsToMove, TII, AA) // check if we can move all I's users
+     )
+      break;
+  }
   return E;
 }
 
 MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
   MachineBasicBlock::iterator I,
   MachineBasicBlock::iterator Paired,
-  unsigned EltSize) {
+  unsigned EltSize,
+  ArrayRef<MachineInstr*> InstsToMove) {
   MachineBasicBlock *MBB = I->getParent();
 
   // Be careful, since the addresses could be subregisters themselves in weird
@@ -220,6 +340,15 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
     Opc = (EltSize == 4) ? AMDGPU::DS_READ2ST64_B32 : AMDGPU::DS_READ2ST64_B64;
   }
 
+  unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
+  unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+
+  if (NewOffset0 > NewOffset1) {
+    // Canonicalize the merged instruction so the smaller offset comes first.
+    std::swap(NewOffset0, NewOffset1);
+    std::swap(SubRegIdx0, SubRegIdx1);
+  }
+
   assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
          (NewOffset0 != NewOffset1) &&
          "Computed offset doesn't fit");
@@ -232,62 +361,40 @@ MachineBasicBlock::iterator  SILoadStoreOptimizer::mergeRead2Pair(
 
   DebugLoc DL = I->getDebugLoc();
   MachineInstrBuilder Read2
-    = BuildMI(*MBB, I, DL, Read2Desc, DestReg)
+    = BuildMI(*MBB, Paired, DL, Read2Desc, DestReg)
     .addOperand(*AddrReg) // addr
     .addImm(NewOffset0) // offset0
     .addImm(NewOffset1) // offset1
     .addImm(0) // gds
     .addMemOperand(*I->memoperands_begin())
     .addMemOperand(*Paired->memoperands_begin());
-
-  unsigned SubRegIdx0 = (EltSize == 4) ? AMDGPU::sub0 : AMDGPU::sub0_sub1;
-  unsigned SubRegIdx1 = (EltSize == 4) ? AMDGPU::sub1 : AMDGPU::sub2_sub3;
+  (void)Read2;
 
   const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
 
   // Copy to the old destination registers.
-  MachineInstr *Copy0 = BuildMI(*MBB, I, DL, CopyDesc)
+  BuildMI(*MBB, Paired, DL, CopyDesc)
     .addOperand(*Dest0) // Copy to same destination including flags and sub reg.
     .addReg(DestReg, 0, SubRegIdx0);
-  MachineInstr *Copy1 = BuildMI(*MBB, I, DL, CopyDesc)
+  MachineInstr *Copy1 = BuildMI(*MBB, Paired, DL, CopyDesc)
     .addOperand(*Dest1)
     .addReg(DestReg, RegState::Kill, SubRegIdx1);
 
-  LIS->InsertMachineInstrInMaps(*Read2);
-
-  // repairLiveintervalsInRange() doesn't handle physical register, so we have
-  // to update the M0 range manually.
-  SlotIndex PairedIndex = LIS->getInstructionIndex(*Paired);
-  LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
-  LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
-  bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
-
-  // The new write to the original destination register is now the copy. Steal
-  // the old SlotIndex.
-  LIS->ReplaceMachineInstrInMaps(*I, *Copy0);
-  LIS->ReplaceMachineInstrInMaps(*Paired, *Copy1);
+  moveInstsAfter(Copy1, InstsToMove);
 
+  MachineBasicBlock::iterator Next = std::next(I);
   I->eraseFromParent();
   Paired->eraseFromParent();
 
-  LiveInterval &AddrRegLI = LIS->getInterval(AddrReg->getReg());
-  LIS->shrinkToUses(&AddrRegLI);
-
-  LIS->createAndComputeVirtRegInterval(DestReg);
-
-  if (UpdateM0Range) {
-    SlotIndex Read2Index = LIS->getInstructionIndex(*Read2);
-    M0Segment->end = Read2Index.getRegSlot();
-  }
-
   DEBUG(dbgs() << "Inserted read2: " << *Read2 << '\n');
-  return Read2.getInstr();
+  return Next;
 }
 
 MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
   MachineBasicBlock::iterator I,
   MachineBasicBlock::iterator Paired,
-  unsigned EltSize) {
+  unsigned EltSize,
+  ArrayRef<MachineInstr*> InstsToMove) {
   MachineBasicBlock *MBB = I->getParent();
 
   // Be sure to use .addOperand(), and not .addReg() with these. We want to be
@@ -316,6 +423,12 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
     Opc = (EltSize == 4) ? AMDGPU::DS_WRITE2ST64_B32 : AMDGPU::DS_WRITE2ST64_B64;
   }
 
+  if (NewOffset0 > NewOffset1) {
+    // Canonicalize the merged instruction so the smaller offset comes first.
+    std::swap(NewOffset0, NewOffset1);
+    std::swap(Data0, Data1);
+  }
+
   assert((isUInt<8>(NewOffset0) && isUInt<8>(NewOffset1)) &&
          (NewOffset0 != NewOffset1) &&
          "Computed offset doesn't fit");
@@ -323,15 +436,8 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
   const MCInstrDesc &Write2Desc = TII->get(Opc);
   DebugLoc DL = I->getDebugLoc();
 
-  // repairLiveintervalsInRange() doesn't handle physical register, so we have
-  // to update the M0 range manually.
-  SlotIndex PairedIndex = LIS->getInstructionIndex(*Paired);
-  LiveRange &M0Range = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::M0, TRI));
-  LiveRange::Segment *M0Segment = M0Range.getSegmentContaining(PairedIndex);
-  bool UpdateM0Range = M0Segment->end == PairedIndex.getRegSlot();
-
   MachineInstrBuilder Write2
-    = BuildMI(*MBB, I, DL, Write2Desc)
+    = BuildMI(*MBB, Paired, DL, Write2Desc)
     .addOperand(*Addr) // addr
     .addOperand(*Data0) // data0
     .addOperand(*Data1) // data1
@@ -341,24 +447,14 @@ MachineBasicBlock::iterator SILoadStoreOptimizer::mergeWrite2Pair(
     .addMemOperand(*I->memoperands_begin())
     .addMemOperand(*Paired->memoperands_begin());
 
-  // XXX - How do we express subregisters here?
-  unsigned OrigRegs[] = { Data0->getReg(), Data1->getReg(), Addr->getReg() };
+  moveInstsAfter(Write2, InstsToMove);
 
-  LIS->RemoveMachineInstrFromMaps(*I);
-  LIS->RemoveMachineInstrFromMaps(*Paired);
+  MachineBasicBlock::iterator Next = std::next(I);
   I->eraseFromParent();
   Paired->eraseFromParent();
 
-  // This doesn't handle physical registers like M0
-  LIS->repairIntervalsInRange(MBB, Write2, Write2, OrigRegs);
-
-  if (UpdateM0Range) {
-    SlotIndex Write2Index = LIS->getInstructionIndex(*Write2);
-    M0Segment->end = Write2Index.getRegSlot();
-  }
-
   DEBUG(dbgs() << "Inserted write2 inst: " << *Write2 << '\n');
-  return Write2.getInstr();
+  return Next;
 }
 
 // Scan through looking for adjacent LDS operations with constant offsets from
@@ -376,13 +472,15 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
       continue;
     }
 
+    SmallVector<MachineInstr*, 8> InstsToMove;
     unsigned Opc = MI.getOpcode();
     if (Opc == AMDGPU::DS_READ_B32 || Opc == AMDGPU::DS_READ_B64) {
       unsigned Size = (Opc == AMDGPU::DS_READ_B64) ? 8 : 4;
-      MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+      MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size,
+                                                             InstsToMove);
       if (Match != E) {
         Modified = true;
-        I = mergeRead2Pair(I, Match, Size);
+        I = mergeRead2Pair(I, Match, Size, InstsToMove);
       } else {
         ++I;
       }
@@ -390,10 +488,11 @@ bool SILoadStoreOptimizer::optimizeBlock(MachineBasicBlock &MBB) {
       continue;
     } else if (Opc == AMDGPU::DS_WRITE_B32 || Opc == AMDGPU::DS_WRITE_B64) {
       unsigned Size = (Opc == AMDGPU::DS_WRITE_B64) ? 8 : 4;
-      MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size);
+      MachineBasicBlock::iterator Match = findMatchingDSInst(I, Size,
+                                                             InstsToMove);
       if (Match != E) {
         Modified = true;
-        I = mergeWrite2Pair(I, Match, Size);
+        I = mergeWrite2Pair(I, Match, Size, InstsToMove);
       } else {
         ++I;
       }
@@ -419,13 +518,10 @@ bool SILoadStoreOptimizer::runOnMachineFunction(MachineFunction &MF) {
   TRI = &TII->getRegisterInfo();
 
   MRI = &MF.getRegInfo();
-
-  LIS = &getAnalysis<LiveIntervals>();
+  AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
 
   DEBUG(dbgs() << "Running SILoadStoreOptimizer\n");
 
-  assert(!MRI->isSSA());
-
   bool Modified = false;
 
   for (MachineBasicBlock &MBB : MF)
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index ee1d5dae70b7..7ed18f27e591 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -58,7 +58,6 @@
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
 
 using namespace llvm;
 
@@ -68,63 +67,50 @@ namespace {
 
 class SILowerControlFlow : public MachineFunctionPass {
 private:
-  static const unsigned SkipThreshold = 12;
-
   const SIRegisterInfo *TRI;
   const SIInstrInfo *TII;
+  LiveIntervals *LIS;
+  MachineRegisterInfo *MRI;
 
-  bool shouldSkip(MachineBasicBlock *From, MachineBasicBlock *To);
-
-  void Skip(MachineInstr &From, MachineOperand &To);
-  bool skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB);
-
-  void If(MachineInstr &MI);
-  void Else(MachineInstr &MI, bool ExecModified);
-  void Break(MachineInstr &MI);
-  void IfBreak(MachineInstr &MI);
-  void ElseBreak(MachineInstr &MI);
-  void Loop(MachineInstr &MI);
-  void EndCf(MachineInstr &MI);
-
-  void Kill(MachineInstr &MI);
-  void Branch(MachineInstr &MI);
-
-  MachineBasicBlock *insertSkipBlock(MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator I) const;
-
-  std::pair<MachineBasicBlock *, MachineBasicBlock *>
-  splitBlock(MachineBasicBlock &MBB, MachineBasicBlock::iterator I);
+  void emitIf(MachineInstr &MI);
+  void emitElse(MachineInstr &MI);
+  void emitBreak(MachineInstr &MI);
+  void emitIfBreak(MachineInstr &MI);
+  void emitElseBreak(MachineInstr &MI);
+  void emitLoop(MachineInstr &MI);
+  void emitEndCf(MachineInstr &MI);
 
-  void splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
-                               const MachineRegisterInfo &MRI,
-                               const MachineInstr &MI,
-                               MachineBasicBlock &LoopBB,
-                               MachineBasicBlock &RemainderBB,
-                               unsigned SaveReg,
-                               const MachineOperand &IdxReg);
+  void findMaskOperands(MachineInstr &MI, unsigned OpNo,
+                        SmallVectorImpl<MachineOperand> &Src) const;
 
-  void emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB, DebugLoc DL,
-                              MachineInstr *MovRel,
-                              const MachineOperand &IdxReg,
-                              int Offset);
-
-  bool loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset = 0);
-  std::pair<unsigned, int> computeIndirectRegAndOffset(unsigned VecReg,
-                                                       int Offset) const;
-  bool indirectSrc(MachineInstr &MI);
-  bool indirectDst(MachineInstr &MI);
+  void combineMasks(MachineInstr &MI);
 
 public:
   static char ID;
 
   SILowerControlFlow() :
-    MachineFunctionPass(ID), TRI(nullptr), TII(nullptr) { }
+    MachineFunctionPass(ID),
+    TRI(nullptr),
+    TII(nullptr),
+    LIS(nullptr),
+    MRI(nullptr) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SI Lower control flow pseudo instructions";
   }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    // Should preserve the same set that TwoAddressInstructions does.
+    AU.addPreserved<SlotIndexes>();
+    AU.addPreserved<LiveIntervals>();
+    AU.addPreservedID(LiveVariablesID);
+    AU.addPreservedID(MachineLoopInfoID);
+    AU.addPreservedID(MachineDominatorsID);
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 };
 
 } // End anonymous namespace
@@ -132,555 +118,283 @@ public:
 char SILowerControlFlow::ID = 0;
 
 INITIALIZE_PASS(SILowerControlFlow, DEBUG_TYPE,
-                "SI lower control flow", false, false)
+               "SI lower control flow", false, false)
 
-char &llvm::SILowerControlFlowPassID = SILowerControlFlow::ID;
+static void setImpSCCDefDead(MachineInstr &MI, bool IsDead) {
+  MachineOperand &ImpDefSCC = MI.getOperand(3);
+  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
 
-
-FunctionPass *llvm::createSILowerControlFlowPass() {
-  return new SILowerControlFlow();
+  ImpDefSCC.setIsDead(IsDead);
 }
 
-static bool opcodeEmitsNoInsts(unsigned Opc) {
-  switch (Opc) {
-  case TargetOpcode::IMPLICIT_DEF:
-  case TargetOpcode::KILL:
-  case TargetOpcode::BUNDLE:
-  case TargetOpcode::CFI_INSTRUCTION:
-  case TargetOpcode::EH_LABEL:
-  case TargetOpcode::GC_LABEL:
-  case TargetOpcode::DBG_VALUE:
-    return true;
-  default:
-    return false;
-  }
-}
-
-bool SILowerControlFlow::shouldSkip(MachineBasicBlock *From,
-                                    MachineBasicBlock *To) {
-  if (From->succ_empty())
-    return false;
-
-  unsigned NumInstr = 0;
-  MachineFunction *MF = From->getParent();
-
-  for (MachineFunction::iterator MBBI(From), ToI(To), End = MF->end();
-       MBBI != End && MBBI != ToI; ++MBBI) {
-    MachineBasicBlock &MBB = *MBBI;
-
-    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-         NumInstr < SkipThreshold && I != E; ++I) {
-      if (opcodeEmitsNoInsts(I->getOpcode()))
-        continue;
-
-      // When a uniform loop is inside non-uniform control flow, the branch
-      // leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
-      // when EXEC = 0. We should skip the loop lest it becomes infinite.
-      if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
-          I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
-        return true;
-
-      if (I->isInlineAsm()) {
-        const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
-        const char *AsmStr = I->getOperand(0).getSymbolName();
-
-        // inlineasm length estimate is number of bytes assuming the longest
-        // instruction.
-        uint64_t MaxAsmSize = TII->getInlineAsmLength(AsmStr, *MAI);
-        NumInstr += MaxAsmSize / MAI->getMaxInstLength();
-      } else {
-        ++NumInstr;
-      }
+char &llvm::SILowerControlFlowID = SILowerControlFlow::ID;
 
-      if (NumInstr >= SkipThreshold)
-        return true;
-    }
-  }
-
-  return false;
-}
-
-void SILowerControlFlow::Skip(MachineInstr &From, MachineOperand &To) {
-
-  if (!shouldSkip(*From.getParent()->succ_begin(), To.getMBB()))
-    return;
-
-  DebugLoc DL = From.getDebugLoc();
-  BuildMI(*From.getParent(), &From, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
-    .addOperand(To);
-}
-
-bool SILowerControlFlow::skipIfDead(MachineInstr &MI, MachineBasicBlock &NextBB) {
+void SILowerControlFlow::emitIf(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
-  MachineFunction *MF = MBB.getParent();
-
-  if (MF->getFunction()->getCallingConv() != CallingConv::AMDGPU_PS ||
-      !shouldSkip(&MBB, &MBB.getParent()->back()))
-    return false;
-
-  MachineBasicBlock *SkipBB = insertSkipBlock(MBB, MI.getIterator());
-  MBB.addSuccessor(SkipBB);
-
   const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator I(&MI);
 
-  // If the exec mask is non-zero, skip the next two instructions
-  BuildMI(&MBB, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-    .addMBB(&NextBB);
-
-  MachineBasicBlock::iterator Insert = SkipBB->begin();
-
-  // Exec mask is zero: Export to NULL target...
-  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::EXP))
-    .addImm(0)
-    .addImm(0x09) // V_008DFC_SQ_EXP_NULL
-    .addImm(0)
-    .addImm(1)
-    .addImm(1)
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addReg(AMDGPU::VGPR0, RegState::Undef)
-    .addReg(AMDGPU::VGPR0, RegState::Undef);
-
-  // ... and terminate wavefront.
-  BuildMI(*SkipBB, Insert, DL, TII->get(AMDGPU::S_ENDPGM));
-
-  return true;
-}
-
-void SILowerControlFlow::If(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  unsigned Reg = MI.getOperand(0).getReg();
-  unsigned Vcc = MI.getOperand(1).getReg();
-
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), Reg)
-          .addReg(Vcc);
-
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), Reg)
-          .addReg(AMDGPU::EXEC)
-          .addReg(Reg);
-
-  Skip(MI, MI.getOperand(2));
+  MachineOperand &SaveExec = MI.getOperand(0);
+  MachineOperand &Cond = MI.getOperand(1);
+  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister &&
+         Cond.getSubReg() == AMDGPU::NoSubRegister);
 
-  // Insert a pseudo terminator to help keep the verifier happy.
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
-    .addOperand(MI.getOperand(2))
-    .addReg(Reg);
+  unsigned SaveExecReg = SaveExec.getReg();
 
-  MI.eraseFromParent();
-}
+  MachineOperand &ImpDefSCC = MI.getOperand(4);
+  assert(ImpDefSCC.getReg() == AMDGPU::SCC && ImpDefSCC.isDef());
 
-void SILowerControlFlow::Else(MachineInstr &MI, bool ExecModified) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  unsigned Dst = MI.getOperand(0).getReg();
-  unsigned Src = MI.getOperand(1).getReg();
-
-  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
-          TII->get(AMDGPU::S_OR_SAVEEXEC_B64), Dst)
-          .addReg(Src); // Saved EXEC
-
-  if (ExecModified) {
-    // Adjust the saved exec to account for the modifications during the flow
-    // block that contains the ELSE. This can happen when WQM mode is switched
-    // off.
-    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64), Dst)
-            .addReg(AMDGPU::EXEC)
-            .addReg(Dst);
+  // Add an implicit def of exec to discourage scheduling VALU after this which
+  // will interfere with trying to form s_and_saveexec_b64 later.
+  unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  MachineInstr *CopyExec =
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::COPY), CopyReg)
+    .addReg(AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC, RegState::ImplicitDefine);
+
+  unsigned Tmp = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+
+  MachineInstr *And =
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_AND_B64), Tmp)
+    .addReg(CopyReg)
+    //.addReg(AMDGPU::EXEC)
+    .addReg(Cond.getReg());
+  setImpSCCDefDead(*And, true);
+
+  MachineInstr *Xor =
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_XOR_B64), SaveExecReg)
+    .addReg(Tmp)
+    .addReg(CopyReg);
+  setImpSCCDefDead(*Xor, ImpDefSCC.isDead());
+
+  // Use a copy that is a terminator to get correct spill code placement it with
+  // fast regalloc.
+  MachineInstr *SetExec =
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64_term), AMDGPU::EXEC)
+    .addReg(Tmp, RegState::Kill);
+
+  // Insert a pseudo terminator to help keep the verifier happy. This will also
+  // be used later when inserting skips.
+  MachineInstr *NewBr =
+    BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+    .addOperand(MI.getOperand(2));
+
+  if (!LIS) {
+    MI.eraseFromParent();
+    return;
   }
 
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(Dst);
+  LIS->InsertMachineInstrInMaps(*CopyExec);
 
-  Skip(MI, MI.getOperand(2));
+  // Replace with and so we don't need to fix the live interval for condition
+  // register.
+  LIS->ReplaceMachineInstrInMaps(MI, *And);
 
-  // Insert a pseudo terminator to help keep the verifier happy.
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
-    .addOperand(MI.getOperand(2))
-    .addReg(Dst);
+  LIS->InsertMachineInstrInMaps(*Xor);
+  LIS->InsertMachineInstrInMaps(*SetExec);
+  LIS->InsertMachineInstrInMaps(*NewBr);
 
+  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
   MI.eraseFromParent();
-}
-
-void SILowerControlFlow::Break(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-
-  unsigned Dst = MI.getOperand(0).getReg();
-  unsigned Src = MI.getOperand(1).getReg();
 
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-          .addReg(AMDGPU::EXEC)
-          .addReg(Src);
-
-  MI.eraseFromParent();
+  // FIXME: Is there a better way of adjusting the liveness? It shouldn't be
+  // hard to add another def here but I'm not sure how to correctly update the
+  // valno.
+  LIS->removeInterval(SaveExecReg);
+  LIS->createAndComputeVirtRegInterval(SaveExecReg);
+  LIS->createAndComputeVirtRegInterval(Tmp);
+  LIS->createAndComputeVirtRegInterval(CopyReg);
 }
 
-void SILowerControlFlow::IfBreak(MachineInstr &MI) {
+void SILowerControlFlow::emitElse(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-
-  unsigned Dst = MI.getOperand(0).getReg();
-  unsigned Vcc = MI.getOperand(1).getReg();
-  unsigned Src = MI.getOperand(2).getReg();
-
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-          .addReg(Vcc)
-          .addReg(Src);
-
-  MI.eraseFromParent();
-}
+  const DebugLoc &DL = MI.getDebugLoc();
 
-void SILowerControlFlow::ElseBreak(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
+  unsigned DstReg = MI.getOperand(0).getReg();
+  assert(MI.getOperand(0).getSubReg() == AMDGPU::NoSubRegister);
 
-  unsigned Dst = MI.getOperand(0).getReg();
-  unsigned Saved = MI.getOperand(1).getReg();
-  unsigned Src = MI.getOperand(2).getReg();
+  bool ExecModified = MI.getOperand(3).getImm() != 0;
+  MachineBasicBlock::iterator Start = MBB.begin();
 
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
-          .addReg(Saved)
-          .addReg(Src);
+  // We are running before TwoAddressInstructions, and si_else's operands are
+  // tied. In order to correctly tie the registers, split this into a copy of
+  // the src like it does.
+  unsigned CopyReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+  BuildMI(MBB, Start, DL, TII->get(AMDGPU::COPY), CopyReg)
+    .addOperand(MI.getOperand(1)); // Saved EXEC
 
-  MI.eraseFromParent();
-}
+  // This must be inserted before phis and any spill code inserted before the
+  // else.
+  unsigned SaveReg = ExecModified ?
+    MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass) : DstReg;
+  MachineInstr *OrSaveExec =
+    BuildMI(MBB, Start, DL, TII->get(AMDGPU::S_OR_SAVEEXEC_B64), SaveReg)
+    .addReg(CopyReg);
 
-void SILowerControlFlow::Loop(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  unsigned Src = MI.getOperand(0).getReg();
+  MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();
 
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(Src);
+  MachineBasicBlock::iterator ElsePt(MI);
 
-  BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-    .addOperand(MI.getOperand(1));
+  if (ExecModified) {
+    MachineInstr *And =
+      BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_AND_B64), DstReg)
+      .addReg(AMDGPU::EXEC)
+      .addReg(SaveReg);
 
-  MI.eraseFromParent();
-}
+    if (LIS)
+      LIS->InsertMachineInstrInMaps(*And);
+  }
 
-void SILowerControlFlow::EndCf(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  unsigned Reg = MI.getOperand(0).getReg();
+  MachineInstr *Xor =
+    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC)
+    .addReg(DstReg);
 
-  BuildMI(MBB, MBB.getFirstNonPHI(), DL,
-          TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
-          .addReg(AMDGPU::EXEC)
-          .addReg(Reg);
+  MachineInstr *Branch =
+    BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
+    .addMBB(DestBB);
 
-  MI.eraseFromParent();
-}
-
-void SILowerControlFlow::Branch(MachineInstr &MI) {
-  MachineBasicBlock *MBB = MI.getOperand(0).getMBB();
-  if (MBB == MI.getParent()->getNextNode())
+  if (!LIS) {
     MI.eraseFromParent();
-
-  // If these aren't equal, this is probably an infinite loop.
-}
-
-void SILowerControlFlow::Kill(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  const MachineOperand &Op = MI.getOperand(0);
-
-#ifndef NDEBUG
-  CallingConv::ID CallConv = MBB.getParent()->getFunction()->getCallingConv();
-  // Kill is only allowed in pixel / geometry shaders.
-  assert(CallConv == CallingConv::AMDGPU_PS ||
-         CallConv == CallingConv::AMDGPU_GS);
-#endif
-
-  // Clear this thread from the exec mask if the operand is negative
-  if ((Op.isImm())) {
-    // Constant operand: Set exec mask to 0 or do nothing
-    if (Op.getImm() & 0x80000000) {
-      BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
-              .addImm(0);
-    }
-  } else {
-    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMPX_LE_F32_e32))
-           .addImm(0)
-           .addOperand(Op);
+    return;
   }
 
+  LIS->RemoveMachineInstrFromMaps(MI);
   MI.eraseFromParent();
-}
 
-// All currently live registers must remain so in the remainder block.
-void SILowerControlFlow::splitLoadM0BlockLiveIns(LivePhysRegs &RemainderLiveRegs,
-                                                 const MachineRegisterInfo &MRI,
-                                                 const MachineInstr &MI,
-                                                 MachineBasicBlock &LoopBB,
-                                                 MachineBasicBlock &RemainderBB,
-                                                 unsigned SaveReg,
-                                                 const MachineOperand &IdxReg) {
-  // Add reg defined in loop body.
-  RemainderLiveRegs.addReg(SaveReg);
-
-  if (const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val)) {
-    if (!Val->isUndef()) {
-      RemainderLiveRegs.addReg(Val->getReg());
-      LoopBB.addLiveIn(Val->getReg());
-    }
-  }
+  LIS->InsertMachineInstrInMaps(*OrSaveExec);
 
-  for (unsigned Reg : RemainderLiveRegs) {
-    if (MRI.isAllocatable(Reg))
-      RemainderBB.addLiveIn(Reg);
-  }
+  LIS->InsertMachineInstrInMaps(*Xor);
+  LIS->InsertMachineInstrInMaps(*Branch);
 
-  const MachineOperand *Src = TII->getNamedOperand(MI, AMDGPU::OpName::src);
-  if (!Src->isUndef())
-    LoopBB.addLiveIn(Src->getReg());
+  // src reg is tied to dst reg.
+  LIS->removeInterval(DstReg);
+  LIS->createAndComputeVirtRegInterval(DstReg);
+  LIS->createAndComputeVirtRegInterval(CopyReg);
+  if (ExecModified)
+    LIS->createAndComputeVirtRegInterval(SaveReg);
 
-  if (!IdxReg.isUndef())
-    LoopBB.addLiveIn(IdxReg.getReg());
-  LoopBB.sortUniqueLiveIns();
+  // Let this be recomputed.
+  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::EXEC, TRI));
 }
 
-void SILowerControlFlow::emitLoadM0FromVGPRLoop(MachineBasicBlock &LoopBB,
-                                                DebugLoc DL,
-                                                MachineInstr *MovRel,
-                                                const MachineOperand &IdxReg,
-                                                int Offset) {
-  MachineBasicBlock::iterator I = LoopBB.begin();
-
-  // Read the next variant into VCC (lower 32 bits) <- also loop target
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), AMDGPU::VCC_LO)
-    .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
-
-  // Move index from VCC into M0
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-    .addReg(AMDGPU::VCC_LO);
-
-  // Compare the just read M0 value to all possible Idx values
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32))
-    .addReg(AMDGPU::M0)
-    .addReg(IdxReg.getReg(), getUndefRegState(IdxReg.isUndef()));
-
-  // Update EXEC, save the original EXEC value to VCC
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC)
-    .addReg(AMDGPU::VCC);
-
-  if (Offset != 0) {
-    BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
-      .addReg(AMDGPU::M0)
-      .addImm(Offset);
-  }
-
-  // Do the actual move
-  LoopBB.insert(I, MovRel);
+void SILowerControlFlow::emitBreak(MachineInstr &MI) {
+  MachineBasicBlock &MBB = *MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+  unsigned Dst = MI.getOperand(0).getReg();
 
-  // Update EXEC, switch all done bits to 0 and all todo bits to 1
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+  MachineInstr *Or =
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_OR_B64), Dst)
     .addReg(AMDGPU::EXEC)
-    .addReg(AMDGPU::VCC);
+    .addOperand(MI.getOperand(1));
 
-  // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover
-  BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
-    .addMBB(&LoopBB);
+  if (LIS)
+    LIS->ReplaceMachineInstrInMaps(MI, *Or);
+  MI.eraseFromParent();
 }
 
-MachineBasicBlock *SILowerControlFlow::insertSkipBlock(
-  MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const {
-  MachineFunction *MF = MBB.getParent();
-
-  MachineBasicBlock *SkipBB = MF->CreateMachineBasicBlock();
-  MachineFunction::iterator MBBI(MBB);
-  ++MBBI;
-
-  MF->insert(MBBI, SkipBB);
-
-  return SkipBB;
+void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
+  MI.setDesc(TII->get(AMDGPU::S_OR_B64));
 }
 
-std::pair<MachineBasicBlock *, MachineBasicBlock *>
-SILowerControlFlow::splitBlock(MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator I) {
-  MachineFunction *MF = MBB.getParent();
-
-  // To insert the loop we need to split the block. Move everything after this
-  // point to a new block, and insert a new empty block between the two.
-  MachineBasicBlock *LoopBB = MF->CreateMachineBasicBlock();
-  MachineBasicBlock *RemainderBB = MF->CreateMachineBasicBlock();
-  MachineFunction::iterator MBBI(MBB);
-  ++MBBI;
-
-  MF->insert(MBBI, LoopBB);
-  MF->insert(MBBI, RemainderBB);
-
-  // Move the rest of the block into a new block.
-  RemainderBB->transferSuccessors(&MBB);
-  RemainderBB->splice(RemainderBB->begin(), &MBB, I, MBB.end());
-
-  MBB.addSuccessor(LoopBB);
-
-  return std::make_pair(LoopBB, RemainderBB);
+void SILowerControlFlow::emitElseBreak(MachineInstr &MI) {
+  MI.setDesc(TII->get(AMDGPU::S_OR_B64));
 }
 
-// Returns true if a new block was inserted.
-bool SILowerControlFlow::loadM0(MachineInstr &MI, MachineInstr *MovRel, int Offset) {
+void SILowerControlFlow::emitLoop(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
-  DebugLoc DL = MI.getDebugLoc();
-  MachineBasicBlock::iterator I(&MI);
+  const DebugLoc &DL = MI.getDebugLoc();
 
-  const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
+  MachineInstr *AndN2 =
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_ANDN2_B64_term), AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC)
+    .addOperand(MI.getOperand(0));
 
-  if (AMDGPU::SReg_32RegClass.contains(Idx->getReg())) {
-    if (Offset != 0) {
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ADD_I32), AMDGPU::M0)
-        .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()))
-        .addImm(Offset);
-    } else {
-      BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0)
-        .addReg(Idx->getReg(), getUndefRegState(Idx->isUndef()));
-    }
+  MachineInstr *Branch =
+    BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ))
+    .addOperand(MI.getOperand(1));
 
-    MBB.insert(I, MovRel);
-    MI.eraseFromParent();
-    return false;
+  if (LIS) {
+    LIS->ReplaceMachineInstrInMaps(MI, *AndN2);
+    LIS->InsertMachineInstrInMaps(*Branch);
   }
 
-  MachineOperand *SaveOp = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
-  SaveOp->setIsDead(false);
-  unsigned Save = SaveOp->getReg();
-
-  // Reading from a VGPR requires looping over all workitems in the wavefront.
-  assert(AMDGPU::SReg_64RegClass.contains(Save) &&
-         AMDGPU::VGPR_32RegClass.contains(Idx->getReg()));
-
-  // Save the EXEC mask
-  BuildMI(MBB, I, DL, TII->get(AMDGPU::S_MOV_B64), Save)
-    .addReg(AMDGPU::EXEC);
-
-  LivePhysRegs RemainderLiveRegs(TRI);
-
-  RemainderLiveRegs.addLiveOuts(MBB);
-
-  MachineBasicBlock *LoopBB;
-  MachineBasicBlock *RemainderBB;
-
-  std::tie(LoopBB, RemainderBB) = splitBlock(MBB, I);
-
-  for (const MachineInstr &Inst : reverse(*RemainderBB))
-    RemainderLiveRegs.stepBackward(Inst);
-
-  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-  LoopBB->addSuccessor(RemainderBB);
-  LoopBB->addSuccessor(LoopBB);
-
-  splitLoadM0BlockLiveIns(RemainderLiveRegs, MRI, MI, *LoopBB,
-                          *RemainderBB, Save, *Idx);
-
-  emitLoadM0FromVGPRLoop(*LoopBB, DL, MovRel, *Idx, Offset);
-
-  MachineBasicBlock::iterator First = RemainderBB->begin();
-  BuildMI(*RemainderBB, First, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC)
-    .addReg(Save);
-
   MI.eraseFromParent();
-  return true;
-}
-
-/// \param @VecReg The register which holds element zero of the vector being
-///                 addressed into.
-//
-/// \param[in] @Idx The index operand from the movrel instruction. This must be
-// a register, but may be NoRegister.
-///
-/// \param[in] @Offset As an input, this is the constant offset part of the
-// indirect Index. e.g. v0 = v[VecReg + Offset] As an output, this is a constant
-// value that needs to be added to the value stored in M0.
-std::pair<unsigned, int>
-SILowerControlFlow::computeIndirectRegAndOffset(unsigned VecReg, int Offset) const {
-  unsigned SubReg = TRI->getSubReg(VecReg, AMDGPU::sub0);
-  if (!SubReg)
-    SubReg = VecReg;
-
-  const TargetRegisterClass *SuperRC = TRI->getPhysRegClass(VecReg);
-  const TargetRegisterClass *RC = TRI->getPhysRegClass(SubReg);
-  int NumElts = SuperRC->getSize() / RC->getSize();
-
-  int BaseRegIdx = TRI->getHWRegIndex(SubReg);
-
-  // Skip out of bounds offsets, or else we would end up using an undefined
-  // register.
-  if (Offset >= NumElts)
-    return std::make_pair(RC->getRegister(BaseRegIdx), Offset);
-
-  int RegIdx = BaseRegIdx + Offset;
-  if (RegIdx < 0) {
-    Offset = RegIdx;
-    RegIdx = 0;
-  } else {
-    Offset = 0;
-  }
-
-  unsigned Reg = RC->getRegister(RegIdx);
-  return std::make_pair(Reg, Offset);
 }
 
-// Return true if a new block was inserted.
-bool SILowerControlFlow::indirectSrc(MachineInstr &MI) {
+void SILowerControlFlow::emitEndCf(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
 
-  unsigned Dst = MI.getOperand(0).getReg();
-  const MachineOperand *SrcVec = TII->getNamedOperand(MI, AMDGPU::OpName::src);
-  int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
-  unsigned Reg;
-
-  std::tie(Reg, Offset) = computeIndirectRegAndOffset(SrcVec->getReg(), Offset);
+  MachineBasicBlock::iterator InsPt = MBB.begin();
+  MachineInstr *NewMI =
+    BuildMI(MBB, InsPt, DL, TII->get(AMDGPU::S_OR_B64), AMDGPU::EXEC)
+    .addReg(AMDGPU::EXEC)
+    .addOperand(MI.getOperand(0));
 
-  const MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
-  if (Idx->getReg() == AMDGPU::NoRegister) {
-    // Only had a constant offset, copy the register directly.
-    BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Dst)
-      .addReg(Reg, getUndefRegState(SrcVec->isUndef()));
-    MI.eraseFromParent();
-    return false;
-  }
+  if (LIS)
+    LIS->ReplaceMachineInstrInMaps(MI, *NewMI);
 
-  MachineInstr *MovRel =
-    BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELS_B32_e32), Dst)
-    .addReg(Reg, getUndefRegState(SrcVec->isUndef()))
-    .addReg(SrcVec->getReg(), RegState::Implicit);
+  MI.eraseFromParent();
 
-  return loadM0(MI, MovRel, Offset);
+  if (LIS)
+    LIS->handleMove(*NewMI);
 }
 
-// Return true if a new block was inserted.
-bool SILowerControlFlow::indirectDst(MachineInstr &MI) {
-  MachineBasicBlock &MBB = *MI.getParent();
-  const DebugLoc &DL = MI.getDebugLoc();
-
-  unsigned Dst = MI.getOperand(0).getReg();
-  int Offset = TII->getNamedOperand(MI, AMDGPU::OpName::offset)->getImm();
-  unsigned Reg;
-
-  const MachineOperand *Val = TII->getNamedOperand(MI, AMDGPU::OpName::val);
-  std::tie(Reg, Offset) = computeIndirectRegAndOffset(Dst, Offset);
-
-  MachineOperand *Idx = TII->getNamedOperand(MI, AMDGPU::OpName::idx);
-  if (Idx->getReg() == AMDGPU::NoRegister) {
-    // Only had a constant offset, copy the register directly.
-    BuildMI(MBB, MI.getIterator(), DL, TII->get(AMDGPU::V_MOV_B32_e32), Reg)
-      .addOperand(*Val);
-    MI.eraseFromParent();
-    return false;
+// Returns replace operands for a logical operation, either single result
+// for exec or two operands if source was another equivalent operation.
+void SILowerControlFlow::findMaskOperands(MachineInstr &MI, unsigned OpNo,
+       SmallVectorImpl<MachineOperand> &Src) const {
+  MachineOperand &Op = MI.getOperand(OpNo);
+  if (!Op.isReg() || !TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
+    Src.push_back(Op);
+    return;
   }
 
-  MachineInstr *MovRel =
-    BuildMI(*MBB.getParent(), DL, TII->get(AMDGPU::V_MOVRELD_B32_e32), Reg)
-    .addReg(Val->getReg(), getUndefRegState(Val->isUndef()))
-    .addReg(Dst, RegState::Implicit);
+  MachineInstr *Def = MRI->getUniqueVRegDef(Op.getReg());
+  if (!Def || Def->getParent() != MI.getParent() ||
+      !(Def->isFullCopy() || (Def->getOpcode() == MI.getOpcode())))
+    return;
 
-  return loadM0(MI, MovRel, Offset);
+  // Make sure we do not modify exec between def and use.
+  // A copy with implcitly defined exec inserted earlier is an exclusion, it
+  // does not really modify exec.
+  for (auto I = Def->getIterator(); I != MI.getIterator(); ++I)
+    if (I->modifiesRegister(AMDGPU::EXEC, TRI) &&
+        !(I->isCopy() && I->getOperand(0).getReg() != AMDGPU::EXEC))
+      return;
+
+  for (const auto &SrcOp : Def->explicit_operands())
+    if (SrcOp.isUse() && (!SrcOp.isReg() ||
+        TargetRegisterInfo::isVirtualRegister(SrcOp.getReg()) ||
+        SrcOp.getReg() == AMDGPU::EXEC))
+      Src.push_back(SrcOp);
+}
+
+// Search and combine pairs of equivalent instructions, like
+// S_AND_B64 x, (S_AND_B64 x, y) => S_AND_B64 x, y
+// S_OR_B64  x, (S_OR_B64  x, y) => S_OR_B64  x, y
+// One of the operands is exec mask.
+void SILowerControlFlow::combineMasks(MachineInstr &MI) {
+  assert(MI.getNumExplicitOperands() == 3);
+  SmallVector<MachineOperand, 4> Ops;
+  unsigned OpToReplace = 1;
+  findMaskOperands(MI, 1, Ops);
+  if (Ops.size() == 1) OpToReplace = 2; // First operand can be exec or its copy
+  findMaskOperands(MI, 2, Ops);
+  if (Ops.size() != 3) return;
+
+  unsigned UniqueOpndIdx;
+  if (Ops[0].isIdenticalTo(Ops[1])) UniqueOpndIdx = 2;
+  else if (Ops[0].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
+  else if (Ops[1].isIdenticalTo(Ops[2])) UniqueOpndIdx = 1;
+  else return;
+
+  unsigned Reg = MI.getOperand(OpToReplace).getReg();
+  MI.RemoveOperand(OpToReplace);
+  MI.addOperand(Ops[UniqueOpndIdx]);
+  if (MRI->use_empty(Reg))
+    MRI->getUniqueVRegDef(Reg)->eraseFromParent();
 }
 
 bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
@@ -688,148 +402,66 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
   TII = ST.getInstrInfo();
   TRI = &TII->getRegisterInfo();
 
-  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-
-  bool HaveKill = false;
-  bool NeedFlat = false;
-  unsigned Depth = 0;
+  // This doesn't actually need LiveIntervals, but we can preserve them.
+  LIS = getAnalysisIfAvailable<LiveIntervals>();
+  MRI = &MF.getRegInfo();
 
   MachineFunction::iterator NextBB;
-
   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
        BI != BE; BI = NextBB) {
     NextBB = std::next(BI);
     MachineBasicBlock &MBB = *BI;
 
-    MachineBasicBlock *EmptyMBBAtEnd = nullptr;
-    MachineBasicBlock::iterator I, Next;
-    bool ExecModified = false;
+    MachineBasicBlock::iterator I, Next, Last;
 
-    for (I = MBB.begin(); I != MBB.end(); I = Next) {
+    for (I = MBB.begin(), Last = MBB.end(); I != MBB.end(); I = Next) {
       Next = std::next(I);
-
       MachineInstr &MI = *I;
 
-      // Flat uses m0 in case it needs to access LDS.
-      if (TII->isFLAT(MI))
-        NeedFlat = true;
-
-      if (I->modifiesRegister(AMDGPU::EXEC, TRI))
-        ExecModified = true;
-
       switch (MI.getOpcode()) {
-        default: break;
-        case AMDGPU::SI_IF:
-          ++Depth;
-          If(MI);
-          break;
-
-        case AMDGPU::SI_ELSE:
-          Else(MI, ExecModified);
-          break;
-
-        case AMDGPU::SI_BREAK:
-          Break(MI);
-          break;
-
-        case AMDGPU::SI_IF_BREAK:
-          IfBreak(MI);
-          break;
-
-        case AMDGPU::SI_ELSE_BREAK:
-          ElseBreak(MI);
-          break;
-
-        case AMDGPU::SI_LOOP:
-          ++Depth;
-          Loop(MI);
-          break;
-
-        case AMDGPU::SI_END_CF:
-          if (--Depth == 0 && HaveKill) {
-            HaveKill = false;
-            // TODO: Insert skip if exec is 0?
-          }
-
-          EndCf(MI);
-          break;
-
-        case AMDGPU::SI_KILL_TERMINATOR:
-          if (Depth == 0) {
-            if (skipIfDead(MI, *NextBB)) {
-              NextBB = std::next(BI);
-              BE = MF.end();
-            }
-          } else
-            HaveKill = true;
-          Kill(MI);
-          break;
-
-        case AMDGPU::S_BRANCH:
-          Branch(MI);
-          break;
-
-        case AMDGPU::SI_INDIRECT_SRC_V1:
-        case AMDGPU::SI_INDIRECT_SRC_V2:
-        case AMDGPU::SI_INDIRECT_SRC_V4:
-        case AMDGPU::SI_INDIRECT_SRC_V8:
-        case AMDGPU::SI_INDIRECT_SRC_V16:
-          if (indirectSrc(MI)) {
-            // The block was split at this point. We can safely skip the middle
-            // inserted block to the following which contains the rest of this
-            // block's instructions.
-            NextBB = std::next(BI);
-            BE = MF.end();
-            Next = MBB.end();
-          }
-
-          break;
-
-        case AMDGPU::SI_INDIRECT_DST_V1:
-        case AMDGPU::SI_INDIRECT_DST_V2:
-        case AMDGPU::SI_INDIRECT_DST_V4:
-        case AMDGPU::SI_INDIRECT_DST_V8:
-        case AMDGPU::SI_INDIRECT_DST_V16:
-          if (indirectDst(MI)) {
-            // The block was split at this point. We can safely skip the middle
-            // inserted block to the following which contains the rest of this
-            // block's instructions.
-            NextBB = std::next(BI);
-            BE = MF.end();
-            Next = MBB.end();
-          }
-
-          break;
-
-        case AMDGPU::SI_RETURN: {
-          assert(!MF.getInfo<SIMachineFunctionInfo>()->returnsVoid());
-
-          // Graphics shaders returning non-void shouldn't contain S_ENDPGM,
-          // because external bytecode will be appended at the end.
-          if (BI != --MF.end() || I != MBB.getFirstTerminator()) {
-            // SI_RETURN is not the last instruction. Add an empty block at
-            // the end and jump there.
-            if (!EmptyMBBAtEnd) {
-              EmptyMBBAtEnd = MF.CreateMachineBasicBlock();
-              MF.insert(MF.end(), EmptyMBBAtEnd);
-            }
-
-            MBB.addSuccessor(EmptyMBBAtEnd);
-            BuildMI(*BI, I, MI.getDebugLoc(), TII->get(AMDGPU::S_BRANCH))
-                    .addMBB(EmptyMBBAtEnd);
-            I->eraseFromParent();
-          }
-          break;
-        }
+      case AMDGPU::SI_IF:
+        emitIf(MI);
+        break;
+
+      case AMDGPU::SI_ELSE:
+        emitElse(MI);
+        break;
+
+      case AMDGPU::SI_BREAK:
+        emitBreak(MI);
+        break;
+
+      case AMDGPU::SI_IF_BREAK:
+        emitIfBreak(MI);
+        break;
+
+      case AMDGPU::SI_ELSE_BREAK:
+        emitElseBreak(MI);
+        break;
+
+      case AMDGPU::SI_LOOP:
+        emitLoop(MI);
+        break;
+
+      case AMDGPU::SI_END_CF:
+        emitEndCf(MI);
+        break;
+
+      case AMDGPU::S_AND_B64:
+      case AMDGPU::S_OR_B64:
+        // Cleanup bit manipulations on exec mask
+        combineMasks(MI);
+        Last = I;
+        continue;
+
+      default:
+        Last = I;
+        continue;
       }
-    }
-  }
 
-  if (NeedFlat && MFI->IsKernel) {
-    // TODO: What to use with function calls?
-    // We will need to Initialize the flat scratch register pair.
-    if (NeedFlat)
-      MFI->setHasFlatInstructions(true);
+      // Replay newly inserted code to combine masks
+      Next = (Last == MBB.end()) ? MBB.begin() : Last;
+    }
   }
 
   return true;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
index dc1d20ddb274..be2e14fd4623 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp
@@ -41,9 +41,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Lower i1 Copies";
-  }
+  StringRef getPassName() const override { return "SI Lower i1 Copies"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -102,12 +100,12 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
       const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg());
       const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg());
 
+      DebugLoc DL = MI.getDebugLoc();
+      MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
       if (DstRC == &AMDGPU::VReg_1RegClass &&
           TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) {
         I1Defs.push_back(Dst.getReg());
-        DebugLoc DL = MI.getDebugLoc();
 
-        MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg());
         if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) {
           if (DefInst->getOperand(1).isImm()) {
             I1Defs.push_back(Dst.getReg());
@@ -131,10 +129,26 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) {
         MI.eraseFromParent();
       } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) &&
                  SrcRC == &AMDGPU::VReg_1RegClass) {
-        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64))
-          .addOperand(Dst)
-          .addOperand(Src)
-          .addImm(0);
+        if (DefInst->getOpcode() == AMDGPU::V_CNDMASK_B32_e64 &&
+            DefInst->getOperand(1).isImm() && DefInst->getOperand(2).isImm() &&
+            DefInst->getOperand(1).getImm() == 0 &&
+            DefInst->getOperand(2).getImm() != 0 &&
+            DefInst->getOperand(3).isReg() &&
+            TargetRegisterInfo::isVirtualRegister(
+              DefInst->getOperand(3).getReg()) &&
+            TRI->getCommonSubClass(
+              MRI.getRegClass(DefInst->getOperand(3).getReg()),
+              &AMDGPU::SGPR_64RegClass)) {
+          BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_B64))
+            .addOperand(Dst)
+            .addReg(AMDGPU::EXEC)
+            .addOperand(DefInst->getOperand(3));
+        } else {
+          BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_NE_U32_e64))
+            .addOperand(Dst)
+            .addOperand(Src)
+            .addImm(0);
+        }
         MI.eraseFromParent();
       }
     }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index 848be32cd515..e911817c451d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -26,9 +26,6 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
   cl::ReallyHidden,
   cl::init(true));
 
-// Pin the vtable to this file.
-void SIMachineFunctionInfo::anchor() {}
-
 SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
   : AMDGPUMachineFunction(MF),
     TIDReg(AMDGPU::NoRegister),
@@ -51,8 +48,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
     PSInputAddr(0),
     ReturnsVoid(true),
-    MaximumWorkGroupSize(0),
-    DebuggerReservedVGPRCount(0),
+    FlatWorkGroupSizes(0, 0),
+    WavesPerEU(0, 0),
     DebuggerWorkGroupIDStackObjectIndices({{0, 0, 0}}),
     DebuggerWorkItemIDStackObjectIndices({{0, 0, 0}}),
     LDSWaveSpillSize(0),
@@ -62,14 +59,13 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     HasSpilledSGPRs(false),
     HasSpilledVGPRs(false),
     HasNonSpillStackObjects(false),
-    HasFlatInstructions(false),
     NumSpilledSGPRs(0),
     NumSpilledVGPRs(0),
     PrivateSegmentBuffer(false),
     DispatchPtr(false),
     QueuePtr(false),
-    DispatchID(false),
     KernargSegmentPtr(false),
+    DispatchID(false),
     FlatScratchInit(false),
     GridWorkgroupCountX(false),
     GridWorkgroupCountY(false),
@@ -87,7 +83,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 
   PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
 
-  const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  const MachineFrameInfo &FrameInfo = MF.getFrameInfo();
 
   if (!AMDGPU::isShader(F->getCallingConv())) {
     KernargSegmentPtr = true;
@@ -113,12 +109,12 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
     WorkItemIDY = true;
 
   bool MaySpill = ST.isVGPRSpillingEnabled(*F);
-  bool HasStackObjects = FrameInfo->hasStackObjects();
+  bool HasStackObjects = FrameInfo.hasStackObjects();
 
   if (HasStackObjects || MaySpill)
     PrivateSegmentWaveByteOffset = true;
 
-  if (ST.isAmdHsaOS()) {
+  if (ST.isAmdCodeObjectV2()) {
     if (HasStackObjects || MaySpill)
       PrivateSegmentBuffer = true;
 
@@ -127,6 +123,9 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
 
     if (F->hasFnAttribute("amdgpu-queue-ptr"))
       QueuePtr = true;
+
+    if (F->hasFnAttribute("amdgpu-dispatch-id"))
+      DispatchID = true;
   }
 
   // We don't need to worry about accessing spills with flat instructions.
@@ -136,13 +135,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
       ST.isAmdHsaOS())
     FlatScratchInit = true;
 
-  if (AMDGPU::isCompute(F->getCallingConv()))
-    MaximumWorkGroupSize = AMDGPU::getMaximumWorkGroupSize(*F);
-  else
-    MaximumWorkGroupSize = ST.getWavefrontSize();
-
-  if (ST.debuggerReserveRegs())
-    DebuggerReservedVGPRCount = 4;
+  FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(*F);
+  WavesPerEU = ST.getWavesPerEU(*F);
 }
 
 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
@@ -174,6 +168,13 @@ unsigned SIMachineFunctionInfo::addKernargSegmentPtr(const SIRegisterInfo &TRI)
   return KernargSegmentPtrUserSGPR;
 }
 
+unsigned SIMachineFunctionInfo::addDispatchID(const SIRegisterInfo &TRI) {
+  DispatchIDUserSGPR = TRI.getMatchingSuperReg(
+    getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
+  NumUserSGPRs += 2;
+  return DispatchIDUserSGPR;
+}
+
 unsigned SIMachineFunctionInfo::addFlatScratchInit(const SIRegisterInfo &TRI) {
   FlatScratchInitUserSGPR = TRI.getMatchingSuperReg(
     getNextUserSGPR(), AMDGPU::sub0, &AMDGPU::SReg_64RegClass);
@@ -191,9 +192,9 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
   const SISubtarget &ST = MF->getSubtarget<SISubtarget>();
   const SIRegisterInfo *TRI = ST.getRegisterInfo();
 
-  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   MachineRegisterInfo &MRI = MF->getRegInfo();
-  int64_t Offset = FrameInfo->getObjectOffset(FrameIndex);
+  int64_t Offset = FrameInfo.getObjectOffset(FrameIndex);
   Offset += SubIdx * 4;
 
   unsigned LaneVGPRIdx = Offset / (64 * 4);
@@ -223,8 +224,3 @@ SIMachineFunctionInfo::SpilledReg SIMachineFunctionInfo::getSpilledReg (
   Spill.VGPR = LaneVGPRs[LaneVGPRIdx];
   return Spill;
 }
-
-unsigned SIMachineFunctionInfo::getMaximumWorkGroupSize(
-                                              const MachineFunction &MF) const {
-  return MaximumWorkGroupSize;
-}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index f5bd6366c717..3b4e233cd787 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -23,12 +23,59 @@ namespace llvm {
 
 class MachineRegisterInfo;
 
+class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
+public:
+  explicit AMDGPUImagePseudoSourceValue() :
+    PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+
+  bool isConstant(const MachineFrameInfo *) const override {
+    // This should probably be true for most images, but we will start by being
+    // conservative.
+    return false;
+  }
+
+  bool isAliased(const MachineFrameInfo *) const override {
+    // FIXME: If we ever change image intrinsics to accept fat pointers, then
+    // this could be true for some cases.
+    return false;
+  }
+
+  bool mayAlias(const MachineFrameInfo*) const override {
+    // FIXME: If we ever change image intrinsics to accept fat pointers, then
+    // this could be true for some cases.
+    return false;
+  }
+};
+
+class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
+public:
+  explicit AMDGPUBufferPseudoSourceValue() :
+    PseudoSourceValue(PseudoSourceValue::TargetCustom) { }
+
+  bool isConstant(const MachineFrameInfo *) const override {
+    // This should probably be true for most images, but we will start by being
+    // conservative.
+    return false;
+  }
+
+  bool isAliased(const MachineFrameInfo *) const override {
+    // FIXME: If we ever change image intrinsics to accept fat pointers, then
+    // this could be true for some cases.
+    return false;
+  }
+
+  bool mayAlias(const MachineFrameInfo*) const override {
+    // FIXME: If we ever change image intrinsics to accept fat pointers, then
+    // this could be true for some cases.
+    return false;
+  }
+};
+
 /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 /// tells the hardware which interpolation parameters to load.
 class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   // FIXME: This should be removed and getPreloadedValue moved here.
-  friend struct SIRegisterInfo;
-  void anchor() override;
+  friend class SIRegisterInfo;
 
   unsigned TIDReg;
 
@@ -61,15 +108,22 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
   unsigned PSInputAddr;
   bool ReturnsVoid;
 
-  unsigned MaximumWorkGroupSize;
+  // A pair of default/requested minimum/maximum flat work group sizes.
+  // Minimum - first, maximum - second.
+  std::pair<unsigned, unsigned> FlatWorkGroupSizes;
+
+  // A pair of default/requested minimum/maximum number of waves per execution
+  // unit. Minimum - first, maximum - second.
+  std::pair<unsigned, unsigned> WavesPerEU;
 
-  // Number of reserved VGPRs for debugger usage.
-  unsigned DebuggerReservedVGPRCount;
   // Stack object indices for work group IDs.
   std::array<int, 3> DebuggerWorkGroupIDStackObjectIndices;
   // Stack object indices for work item IDs.
   std::array<int, 3> DebuggerWorkItemIDStackObjectIndices;
 
+  AMDGPUBufferPseudoSourceValue BufferPSV;
+  AMDGPUImagePseudoSourceValue ImagePSV;
+
 public:
   // FIXME: Make private
   unsigned LDSWaveSpillSize;
@@ -83,7 +137,6 @@ private:
   bool HasSpilledSGPRs;
   bool HasSpilledVGPRs;
   bool HasNonSpillStackObjects;
-  bool HasFlatInstructions;
 
   unsigned NumSpilledSGPRs;
   unsigned NumSpilledVGPRs;
@@ -92,8 +145,8 @@ private:
   bool PrivateSegmentBuffer : 1;
   bool DispatchPtr : 1;
   bool QueuePtr : 1;
-  bool DispatchID : 1;
   bool KernargSegmentPtr : 1;
+  bool DispatchID : 1;
   bool FlatScratchInit : 1;
   bool GridWorkgroupCountX : 1;
   bool GridWorkgroupCountY : 1;
@@ -143,6 +196,7 @@ public:
   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
   unsigned addQueuePtr(const SIRegisterInfo &TRI);
   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
+  unsigned addDispatchID(const SIRegisterInfo &TRI);
   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
 
   // Add system SGPRs.
@@ -192,14 +246,14 @@ public:
     return QueuePtr;
   }
 
-  bool hasDispatchID() const {
-    return DispatchID;
-  }
-
   bool hasKernargSegmentPtr() const {
     return KernargSegmentPtr;
   }
 
+  bool hasDispatchID() const {
+    return DispatchID;
+  }
+
   bool hasFlatScratchInit() const {
     return FlatScratchInit;
   }
@@ -308,14 +362,6 @@ public:
     HasNonSpillStackObjects = StackObject;
   }
 
-  bool hasFlatInstructions() const {
-    return HasFlatInstructions;
-  }
-
-  void setHasFlatInstructions(bool UseFlat = true) {
-    HasFlatInstructions = UseFlat;
-  }
-
   unsigned getNumSpilledSGPRs() const {
     return NumSpilledSGPRs;
   }
@@ -352,9 +398,36 @@ public:
     ReturnsVoid = Value;
   }
 
-  /// \returns Number of reserved VGPRs for debugger usage.
-  unsigned getDebuggerReservedVGPRCount() const {
-    return DebuggerReservedVGPRCount;
+  /// \returns A pair of default/requested minimum/maximum flat work group sizes
+  /// for this function.
+  std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
+    return FlatWorkGroupSizes;
+  }
+
+  /// \returns Default/requested minimum flat work group size for this function.
+  unsigned getMinFlatWorkGroupSize() const {
+    return FlatWorkGroupSizes.first;
+  }
+
+  /// \returns Default/requested maximum flat work group size for this function.
+  unsigned getMaxFlatWorkGroupSize() const {
+    return FlatWorkGroupSizes.second;
+  }
+
+  /// \returns A pair of default/requested minimum/maximum number of waves per
+  /// execution unit.
+  std::pair<unsigned, unsigned> getWavesPerEU() const {
+    return WavesPerEU;
+  }
+
+  /// \returns Default/requested minimum number of waves per execution unit.
+  unsigned getMinWavesPerEU() const {
+    return WavesPerEU.first;
+  }
+
+  /// \returns Default/requested maximum number of waves per execution unit.
+  unsigned getMaxWavesPerEU() const {
+    return WavesPerEU.second;
   }
 
   /// \returns Stack object index for \p Dim's work group ID.
@@ -413,7 +486,13 @@ public:
     llvm_unreachable("unexpected dimension");
   }
 
-  unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
+  const AMDGPUBufferPseudoSourceValue *getBufferPSV() const {
+    return &BufferPSV;
+  }
+
+  const AMDGPUImagePseudoSourceValue *getImagePSV() const {
+    return &ImagePSV;
+  }
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
index 7125b411c603..da86bbf9dd2a 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -1,4 +1,4 @@
-//===-- SIMachineScheduler.cpp - SI Scheduler Interface -*- C++ -*-----===//
+//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -13,12 +13,28 @@
 //===----------------------------------------------------------------------===//
 
 #include "AMDGPU.h"
+#include "SIInstrInfo.h"
 #include "SIMachineScheduler.h"
+#include "SIRegisterInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <map>
+#include <set>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -77,11 +93,11 @@ using namespace llvm;
 //   The block creation algorithm is divided into several steps, and several
 //   variants can be tried during the scheduling process.
 //
-// Second the order of the instructions inside the blocks is choosen.
+// Second the order of the instructions inside the blocks is chosen.
 //   At that step we do take into account only register usage and hiding
 //   low latency instructions
 //
-// Third the block order is choosen, there we try to hide high latencies
+// Third the block order is chosen, there we try to hide high latencies
 // and keep register usage low.
 //
 // After the third step, a pass is done to improve the hiding of low
@@ -89,7 +105,7 @@ using namespace llvm;
 //
 // Actually when talking about 'low latency' or 'high latency' it includes
 // both the latency to get the cache (or global mem) data go to the register,
-// and the bandwith limitations.
+// and the bandwidth limitations.
 // Increasing the number of active wavefronts helps hide the former, but it
 // doesn't solve the latter, thus why even if wavefront count is high, we have
 // to try have as many instructions hiding high latencies as possible.
@@ -120,7 +136,6 @@ using namespace llvm;
 // 300-600 cycles. We do not specially take that into account when scheduling
 // As we expect the driver to be able to preload the constants soon.
 
-
 // common code //
 
 #ifndef NDEBUG
@@ -181,7 +196,6 @@ void SIScheduleBlock::addUnit(SUnit *SU) {
 }
 
 #ifndef NDEBUG
-
 void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
 
   dbgs() << "  SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
@@ -209,7 +223,7 @@ void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
   //   we haven't waited for
   // . Low latencies
   // . All other instructions
-  // Goal is to get: low latency instructions - independant instructions
+  // Goal is to get: low latency instructions - independent instructions
   //     - (eventually some more low latency instructions)
   //     - instructions that depend on the first low latency instructions.
   // If in the block there is a lot of constant loads, the SGPR usage
@@ -479,8 +493,7 @@ void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
 void SIScheduleBlock::nodeScheduled(SUnit *SU) {
   // Is in TopReadySUs
   assert (!SU->NumPredsLeft);
-  std::vector<SUnit*>::iterator I =
-    std::find(TopReadySUs.begin(), TopReadySUs.end(), SU);
+  std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
   if (I == TopReadySUs.end()) {
     dbgs() << "Data Structure Bug in SI Scheduler\n";
     llvm_unreachable(nullptr);
@@ -589,9 +602,8 @@ void SIScheduleBlock::printDebug(bool full) {
     }
   }
 
-   dbgs() << "///////////////////////\n";
+  dbgs() << "///////////////////////\n";
 }
-
 #endif
 
 // SIScheduleBlockCreator //
@@ -600,8 +612,7 @@ SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
 DAG(DAG) {
 }
 
-SIScheduleBlockCreator::~SIScheduleBlockCreator() {
-}
+SIScheduleBlockCreator::~SIScheduleBlockCreator() = default;
 
 SIScheduleBlocks
 SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
@@ -1059,8 +1070,7 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
     unsigned Color = CurrentColoring[SU->NodeNum];
     if (RealID.find(Color) == RealID.end()) {
       int ID = CurrentBlocks.size();
-      BlockPtrs.push_back(
-        make_unique<SIScheduleBlock>(DAG, this, ID));
+      BlockPtrs.push_back(llvm::make_unique<SIScheduleBlock>(DAG, this, ID));
       CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
       RealID[Color] = ID;
     }
@@ -1104,30 +1114,17 @@ void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVaria
 
 // Two functions taken from Codegen/MachineScheduler.cpp
 
-/// If this iterator is a debug value, increment until reaching the End or a
-/// non-debug instruction.
-static MachineBasicBlock::const_iterator
-nextIfDebug(MachineBasicBlock::const_iterator I,
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
             MachineBasicBlock::const_iterator End) {
-  for(; I != End; ++I) {
+  for (; I != End; ++I) {
     if (!I->isDebugValue())
       break;
   }
   return I;
 }
 
-/// Non-const version.
-static MachineBasicBlock::iterator
-nextIfDebug(MachineBasicBlock::iterator I,
-            MachineBasicBlock::const_iterator End) {
-  // Cast the return value to nonconst MachineInstr, then cast to an
-  // instr_iterator, which does not check for null, finally return a
-  // bundle_iterator.
-  return MachineBasicBlock::instr_iterator(
-    const_cast<MachineInstr*>(
-      &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
-}
-
 void SIScheduleBlockCreator::topologicalSort() {
   unsigned DAGSize = CurrentBlocks.size();
   std::vector<int> WorkList;
@@ -1217,7 +1214,7 @@ void SIScheduleBlockCreator::scheduleInsideBlocks() {
         DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
 
         // Update LiveIntervals.
-        // Note: Moving all instructions and calling handleMove everytime
+        // Note: Moving all instructions and calling handleMove every time
         // is the most cpu intensive operation of the scheduler.
         // It would gain a lot if there was a way to recompute the
         // LiveIntervals for the entire scheduling region.
@@ -1265,7 +1262,7 @@ void SIScheduleBlockCreator::fillStats() {
   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
     int BlockIndice = TopDownIndex2Block[i];
     SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
-    if (Block->getPreds().size() == 0)
+    if (Block->getPreds().empty())
       Block->Depth = 0;
     else {
       unsigned Depth = 0;
@@ -1280,7 +1277,7 @@ void SIScheduleBlockCreator::fillStats() {
   for (unsigned i = 0, e = DAGSize; i != e; ++i) {
     int BlockIndice = BottomUpIndex2Block[i];
     SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
-    if (Block->getSuccs().size() == 0)
+    if (Block->getSuccs().empty())
       Block->Height = 0;
     else {
       unsigned Height = 0;
@@ -1654,20 +1651,15 @@ SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
 // SIScheduleDAGMI //
 
 SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
-  ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)) {
+  ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C)) {
   SITII = static_cast<const SIInstrInfo*>(TII);
   SITRI = static_cast<const SIRegisterInfo*>(TRI);
 
-  VGPRSetID = SITRI->getVGPR32PressureSet();
-  SGPRSetID = SITRI->getSGPR32PressureSet();
-}
-
-SIScheduleDAGMI::~SIScheduleDAGMI() {
+  VGPRSetID = SITRI->getVGPRPressureSet();
+  SGPRSetID = SITRI->getSGPRPressureSet();
 }
 
-ScheduleDAGInstrs *llvm::createSIMachineScheduler(MachineSchedContext *C) {
-  return new SIScheduleDAGMI(C);
-}
+SIScheduleDAGMI::~SIScheduleDAGMI() = default;
 
 // Code adapted from scheduleDAG.cpp
 // Does a topological sort over the SUs.
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
index 117aed497cc2..77c07350d325 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -1,4 +1,4 @@
-//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
+//===-- SIMachineScheduler.h - SI Scheduler Interface -----------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -16,10 +16,16 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
 
 #include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/RegisterPressure.h"
-
-using namespace llvm;
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <memory>
+#include <set>
+#include <vector>
 
 namespace llvm {
 
@@ -93,12 +99,10 @@ class SIScheduleBlock {
 public:
   SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
                   unsigned ID):
-    DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
-    TopRPTracker(TopPressure), Scheduled(false),
-    HighLatencyBlock(false), ID(ID),
-    Preds(), Succs(), NumHighLatencySuccessors(0) {};
+    DAG(DAG), BC(BC), TopRPTracker(TopPressure), Scheduled(false),
+    HighLatencyBlock(false), ID(ID), NumHighLatencySuccessors(0) {}
 
-  ~SIScheduleBlock() {};
+  ~SIScheduleBlock() = default;
 
   unsigned getID() const { return ID; }
 
@@ -146,7 +150,6 @@ public:
 
   bool isScheduled() { return Scheduled; }
 
-
   // Needs the block to be scheduled inside
   // TODO: find a way to compute it.
   std::vector<unsigned> &getInternalAdditionnalRegUsage() {
@@ -161,7 +164,7 @@ public:
 private:
   struct SISchedCandidate : SISchedulerCandidate {
     // The best SUnit candidate.
-    SUnit *SU;
+    SUnit *SU = nullptr;
 
     unsigned SGPRUsage;
     unsigned VGPRUsage;
@@ -169,8 +172,7 @@ private:
     unsigned LowLatencyOffset;
     bool HasLowLatencyNonWaitedParent;
 
-    SISchedCandidate()
-      : SU(nullptr) {}
+    SISchedCandidate() = default;
 
     bool isValid() const { return SU; }
 
@@ -341,17 +343,17 @@ public:
   SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
                            SISchedulerBlockSchedulerVariant Variant,
                            SIScheduleBlocks BlocksStruct);
-  ~SIScheduleBlockScheduler() {};
+  ~SIScheduleBlockScheduler() = default;
 
-  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
+  std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; }
 
-  unsigned getVGPRUsage() { return maxVregUsage; };
-  unsigned getSGPRUsage() { return maxSregUsage; };
+  unsigned getVGPRUsage() { return maxVregUsage; }
+  unsigned getSGPRUsage() { return maxSregUsage; }
 
 private:
   struct SIBlockSchedCandidate : SISchedulerCandidate {
     // The best Block candidate.
-    SIScheduleBlock *Block;
+    SIScheduleBlock *Block = nullptr;
 
     bool IsHighLatency;
     int VGPRUsageDiff;
@@ -360,8 +362,7 @@ private:
     unsigned LastPosHighLatParentScheduled;
     unsigned Height;
 
-    SIBlockSchedCandidate()
-      : Block(nullptr) {}
+    SIBlockSchedCandidate() = default;
 
     bool isValid() const { return Block; }
 
@@ -409,9 +410,9 @@ class SIScheduler {
   SIScheduleBlockCreator BlockCreator;
 
 public:
-  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
+  SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {}
 
-  ~SIScheduler() {};
+  ~SIScheduler() = default;
 
   struct SIScheduleBlockResult
   scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
@@ -445,13 +446,13 @@ public:
   }
 
   MachineBasicBlock *getBB() { return BB; }
-  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
-  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
+  MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; }
+  MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; }
   LiveIntervals *getLIS() { return LIS; }
   MachineRegisterInfo *getMRI() { return &MRI; }
   const TargetRegisterInfo *getTRI() { return TRI; }
-  SUnit& getEntrySU() { return EntrySU; };
-  SUnit& getExitSU() { return ExitSU; };
+  SUnit& getEntrySU() { return EntrySU; }
+  SUnit& getExitSU() { return ExitSU; }
 
   void restoreSULinksLeft();
 
@@ -459,13 +460,14 @@ public:
                                                      _Iterator End,
                                                      unsigned &VgprUsage,
                                                      unsigned &SgprUsage);
+
   std::set<unsigned> getInRegs() {
     std::set<unsigned> InRegs;
     for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
       InRegs.insert(RegMaskPair.RegUnit);
     }
     return InRegs;
-  };
+  }
 
   unsigned getVGPRSetID() const { return VGPRSetID; }
   unsigned getSGPRSetID() const { return SGPRSetID; }
@@ -486,6 +488,6 @@ public:
   std::vector<int> BottomUpIndex2SU;
 };
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif /* SIMACHINESCHEDULER_H_ */
+#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
new file mode 100644
index 000000000000..4d2f917278e9
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SIOptimizeExecMasking.cpp
@@ -0,0 +1,304 @@
+//===-- SIOptimizeExecMasking.cpp -----------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPU.h"
+#include "AMDGPUSubtarget.h"
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "si-optimize-exec-masking"
+
+namespace {
+
+class SIOptimizeExecMasking : public MachineFunctionPass {
+public:
+  static char ID;
+
+public:
+  SIOptimizeExecMasking() : MachineFunctionPass(ID) {
+    initializeSIOptimizeExecMaskingPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override {
+    return "SI optimize exec mask operations";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+};
+
+} // End anonymous namespace.
+
+INITIALIZE_PASS_BEGIN(SIOptimizeExecMasking, DEBUG_TYPE,
+                      "SI optimize exec mask operations", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(SIOptimizeExecMasking, DEBUG_TYPE,
+                    "SI optimize exec mask operations", false, false)
+
+char SIOptimizeExecMasking::ID = 0;
+
+char &llvm::SIOptimizeExecMaskingID = SIOptimizeExecMasking::ID;
+
+/// If \p MI is a copy from exec, return the register copied to.
+static unsigned isCopyFromExec(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::COPY:
+  case AMDGPU::S_MOV_B64:
+  case AMDGPU::S_MOV_B64_term: {
+    const MachineOperand &Src = MI.getOperand(1);
+    if (Src.isReg() && Src.getReg() == AMDGPU::EXEC)
+      return MI.getOperand(0).getReg();
+  }
+  }
+
+  return AMDGPU::NoRegister;
+}
+
+/// If \p MI is a copy to exec, return the register copied from.
+static unsigned isCopyToExec(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::COPY:
+  case AMDGPU::S_MOV_B64: {
+    const MachineOperand &Dst = MI.getOperand(0);
+    if (Dst.isReg() && Dst.getReg() == AMDGPU::EXEC)
+      return MI.getOperand(1).getReg();
+    break;
+  }
+  case AMDGPU::S_MOV_B64_term:
+    llvm_unreachable("should have been replaced");
+  }
+
+  return AMDGPU::NoRegister;
+}
+
+static unsigned getSaveExecOp(unsigned Opc) {
+  switch (Opc) {
+  case AMDGPU::S_AND_B64:
+    return AMDGPU::S_AND_SAVEEXEC_B64;
+  case AMDGPU::S_OR_B64:
+    return AMDGPU::S_OR_SAVEEXEC_B64;
+  case AMDGPU::S_XOR_B64:
+    return AMDGPU::S_XOR_SAVEEXEC_B64;
+  case AMDGPU::S_ANDN2_B64:
+    return AMDGPU::S_ANDN2_SAVEEXEC_B64;
+  case AMDGPU::S_ORN2_B64:
+    return AMDGPU::S_ORN2_SAVEEXEC_B64;
+  case AMDGPU::S_NAND_B64:
+    return AMDGPU::S_NAND_SAVEEXEC_B64;
+  case AMDGPU::S_NOR_B64:
+    return AMDGPU::S_NOR_SAVEEXEC_B64;
+  case AMDGPU::S_XNOR_B64:
+    return AMDGPU::S_XNOR_SAVEEXEC_B64;
+  default:
+    return AMDGPU::INSTRUCTION_LIST_END;
+  }
+}
+
+// These are only terminators to get correct spill code placement during
+// register allocation, so turn them back into normal instructions. Only one of
+// these is expected per block.
+static bool removeTerminatorBit(const SIInstrInfo &TII, MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case AMDGPU::S_MOV_B64_term: {
+    MI.setDesc(TII.get(AMDGPU::COPY));
+    return true;
+  }
+  case AMDGPU::S_XOR_B64_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(TII.get(AMDGPU::S_XOR_B64));
+    return true;
+  }
+  case AMDGPU::S_ANDN2_B64_term: {
+    // This is only a terminator to get the correct spill code placement during
+    // register allocation.
+    MI.setDesc(TII.get(AMDGPU::S_ANDN2_B64));
+    return true;
+  }
+  default:
+    return false;
+  }
+}
+
+static MachineBasicBlock::reverse_iterator fixTerminators(
+  const SIInstrInfo &TII,
+  MachineBasicBlock &MBB) {
+  MachineBasicBlock::reverse_iterator I = MBB.rbegin(), E = MBB.rend();
+  for (; I != E; ++I) {
+    if (!I->isTerminator())
+      return I;
+
+    if (removeTerminatorBit(TII, *I))
+      return I;
+  }
+
+  return E;
+}
+
+static MachineBasicBlock::reverse_iterator findExecCopy(
+  const SIInstrInfo &TII,
+  MachineBasicBlock &MBB,
+  MachineBasicBlock::reverse_iterator I,
+  unsigned CopyToExec) {
+  const unsigned InstLimit = 25;
+
+  auto E = MBB.rend();
+  for (unsigned N = 0; N <= InstLimit && I != E; ++I, ++N) {
+    unsigned CopyFromExec = isCopyFromExec(*I);
+    if (CopyFromExec != AMDGPU::NoRegister)
+      return I;
+  }
+
+  return E;
+}
+
+// XXX - Seems LivePhysRegs doesn't work correctly since it will incorrectly
+// repor tthe register as unavailable because a super-register with a lane mask
+// as unavailable.
+static bool isLiveOut(const MachineBasicBlock &MBB, unsigned Reg) {
+  for (MachineBasicBlock *Succ : MBB.successors()) {
+    if (Succ->isLiveIn(Reg))
+      return true;
+  }
+
+  return false;
+}
+
+bool SIOptimizeExecMasking::runOnMachineFunction(MachineFunction &MF) {
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIRegisterInfo *TRI = ST.getRegisterInfo();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  // Optimize sequences emitted for control flow lowering. They are originally
+  // emitted as the separate operations because spill code may need to be
+  // inserted for the saved copy of exec.
+  //
+  //     x = copy exec
+  //     z = s_<op>_b64 x, y
+  //     exec = copy z
+  // =>
+  //     x = s_<op>_saveexec_b64 y
+  //
+
+  for (MachineBasicBlock &MBB : MF) {
+    MachineBasicBlock::reverse_iterator I = fixTerminators(*TII, MBB);
+    MachineBasicBlock::reverse_iterator E = MBB.rend();
+    if (I == E)
+      continue;
+
+    unsigned CopyToExec = isCopyToExec(*I);
+    if (CopyToExec == AMDGPU::NoRegister)
+      continue;
+
+    // Scan backwards to find the def.
+    auto CopyToExecInst = &*I;
+    auto CopyFromExecInst = findExecCopy(*TII, MBB, I, CopyToExec);
+    if (CopyFromExecInst == E)
+      continue;
+
+    if (isLiveOut(MBB, CopyToExec)) {
+      // The copied register is live out and has a second use in another block.
+      DEBUG(dbgs() << "Exec copy source register is live out\n");
+      continue;
+    }
+
+    unsigned CopyFromExec = CopyFromExecInst->getOperand(0).getReg();
+    MachineInstr *SaveExecInst = nullptr;
+    SmallVector<MachineInstr *, 4> OtherUseInsts;
+
+    for (MachineBasicBlock::iterator J
+           = std::next(CopyFromExecInst->getIterator()), JE = I->getIterator();
+         J != JE; ++J) {
+      if (SaveExecInst && J->readsRegister(AMDGPU::EXEC, TRI)) {
+        DEBUG(dbgs() << "exec read prevents saveexec: " << *J << '\n');
+        // Make sure this is inserted after any VALU ops that may have been
+        // scheduled in between.
+        SaveExecInst = nullptr;
+        break;
+      }
+
+      if (J->modifiesRegister(CopyToExec, TRI)) {
+        if (SaveExecInst) {
+          DEBUG(dbgs() << "Multiple instructions modify "
+                << PrintReg(CopyToExec, TRI) << '\n');
+          SaveExecInst = nullptr;
+          break;
+        }
+
+        unsigned SaveExecOp = getSaveExecOp(J->getOpcode());
+        if (SaveExecOp == AMDGPU::INSTRUCTION_LIST_END)
+          break;
+
+        if (J->readsRegister(CopyFromExec, TRI)) {
+          SaveExecInst = &*J;
+          DEBUG(dbgs() << "Found save exec op: " << *SaveExecInst << '\n');
+          continue;
+        } else {
+          DEBUG(dbgs() << "Instruction does not read exec copy: " << *J << '\n');
+          break;
+        }
+      }
+
+      if (SaveExecInst && J->readsRegister(CopyToExec, TRI)) {
+        assert(SaveExecInst != &*J);
+        OtherUseInsts.push_back(&*J);
+      }
+    }
+
+    if (!SaveExecInst)
+      continue;
+
+    DEBUG(dbgs() << "Insert save exec op: " << *SaveExecInst << '\n');
+
+    MachineOperand &Src0 = SaveExecInst->getOperand(1);
+    MachineOperand &Src1 = SaveExecInst->getOperand(2);
+
+    MachineOperand *OtherOp = nullptr;
+
+    if (Src0.isReg() && Src0.getReg() == CopyFromExec) {
+      OtherOp = &Src1;
+    } else if (Src1.isReg() && Src1.getReg() == CopyFromExec) {
+      if (!SaveExecInst->isCommutable())
+        break;
+
+      OtherOp = &Src0;
+    } else
+      llvm_unreachable("unexpected");
+
+    CopyFromExecInst->eraseFromParent();
+
+    auto InsPt = SaveExecInst->getIterator();
+    const DebugLoc &DL = SaveExecInst->getDebugLoc();
+
+    BuildMI(MBB, InsPt, DL, TII->get(getSaveExecOp(SaveExecInst->getOpcode())),
+            CopyFromExec)
+      .addReg(OtherOp->getReg());
+    SaveExecInst->eraseFromParent();
+
+    CopyToExecInst->eraseFromParent();
+
+    for (MachineInstr *OtherInst : OtherUseInsts) {
+      OtherInst->substituteRegister(CopyToExec, AMDGPU::EXEC,
+                                    AMDGPU::NoSubRegister, *TRI);
+    }
+  }
+
+  return true;
+
+}
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 347c33fb3760..8c4b24a4504d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -24,52 +24,11 @@
 
 using namespace llvm;
 
-static unsigned getMaxWaveCountPerSIMD(const MachineFunction &MF) {
-  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
-  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-  unsigned SIMDPerCU = 4;
-
-  unsigned MaxInvocationsPerWave = SIMDPerCU * ST.getWavefrontSize();
-  return alignTo(MFI.getMaximumWorkGroupSize(MF), MaxInvocationsPerWave) /
-           MaxInvocationsPerWave;
-}
-
-static unsigned getMaxWorkGroupSGPRCount(const MachineFunction &MF) {
-  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-  unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF);
-
-  unsigned TotalSGPRCountPerSIMD, AddressableSGPRCount, SGPRUsageAlignment;
-  unsigned ReservedSGPRCount;
-
-  if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
-    TotalSGPRCountPerSIMD = 800;
-    AddressableSGPRCount = 102;
-    SGPRUsageAlignment = 16;
-    ReservedSGPRCount = 6; // VCC, FLAT_SCRATCH, XNACK
-  } else {
-    TotalSGPRCountPerSIMD = 512;
-    AddressableSGPRCount = 104;
-    SGPRUsageAlignment = 8;
-    ReservedSGPRCount = 2; // VCC
-  }
+static cl::opt<bool> EnableSpillSGPRToSMEM(
+  "amdgpu-spill-sgpr-to-smem",
+  cl::desc("Use scalar stores to spill SGPRs if supported by subtarget"),
+  cl::init(false));
 
-  unsigned MaxSGPRCount = (TotalSGPRCountPerSIMD / MaxWaveCountPerSIMD);
-  MaxSGPRCount = alignDown(MaxSGPRCount, SGPRUsageAlignment);
-
-  if (ST.hasSGPRInitBug())
-    MaxSGPRCount = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
-
-  return std::min(MaxSGPRCount - ReservedSGPRCount, AddressableSGPRCount);
-}
-
-static unsigned getMaxWorkGroupVGPRCount(const MachineFunction &MF) {
-  unsigned MaxWaveCountPerSIMD = getMaxWaveCountPerSIMD(MF);
-  unsigned TotalVGPRCountPerSIMD = 256;
-  unsigned VGPRUsageAlignment = 4;
-
-  return alignDown(TotalVGPRCountPerSIMD / MaxWaveCountPerSIMD,
-                   VGPRUsageAlignment);
-}
 
 static bool hasPressureSet(const int *PSets, unsigned PSetID) {
   for (unsigned i = 0; PSets[i] != -1; ++i) {
@@ -95,19 +54,38 @@ SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo(),
                                    VGPRPressureSets(getNumRegPressureSets()) {
   unsigned NumRegPressureSets = getNumRegPressureSets();
 
-  SGPR32SetID = NumRegPressureSets;
-  VGPR32SetID = NumRegPressureSets;
-  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
-    if (strncmp("SGPR_32", getRegPressureSetName(i), 7) == 0)
-      SGPR32SetID = i;
-    else if (strncmp("VGPR_32", getRegPressureSetName(i), 7) == 0)
-      VGPR32SetID = i;
+  SGPRSetID = NumRegPressureSets;
+  VGPRSetID = NumRegPressureSets;
 
+  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
     classifyPressureSet(i, AMDGPU::SGPR0, SGPRPressureSets);
     classifyPressureSet(i, AMDGPU::VGPR0, VGPRPressureSets);
   }
-  assert(SGPR32SetID < NumRegPressureSets &&
-         VGPR32SetID < NumRegPressureSets);
+
+  // Determine the number of reg units for each pressure set.
+  std::vector<unsigned> PressureSetRegUnits(NumRegPressureSets, 0);
+  for (unsigned i = 0, e = getNumRegUnits(); i != e; ++i) {
+    const int *PSets = getRegUnitPressureSets(i);
+    for (unsigned j = 0; PSets[j] != -1; ++j) {
+      ++PressureSetRegUnits[PSets[j]];
+    }
+  }
+
+  unsigned VGPRMax = 0, SGPRMax = 0;
+  for (unsigned i = 0; i < NumRegPressureSets; ++i) {
+    if (isVGPRPressureSet(i) && PressureSetRegUnits[i] > VGPRMax) {
+      VGPRSetID = i;
+      VGPRMax = PressureSetRegUnits[i];
+      continue;
+    }
+    if (isSGPRPressureSet(i) && PressureSetRegUnits[i] > SGPRMax) {
+      SGPRSetID = i;
+      SGPRMax = PressureSetRegUnits[i];
+    }
+  }
+
+  assert(SGPRSetID < NumRegPressureSets &&
+         VGPRSetID < NumRegPressureSets);
 }
 
 void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
@@ -119,14 +97,14 @@ void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) co
 
 unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
   const MachineFunction &MF) const {
-  unsigned BaseIdx = alignDown(getMaxWorkGroupSGPRCount(MF), 4) - 4;
+  unsigned BaseIdx = alignDown(getMaxNumSGPRs(MF), 4) - 4;
   unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
   return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
 }
 
 unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
   const MachineFunction &MF) const {
-  unsigned RegCount = getMaxWorkGroupSGPRCount(MF);
+  unsigned RegCount = getMaxNumSGPRs(MF);
   unsigned Reg;
 
   // Try to place it in a hole after PrivateSegmentbufferReg.
@@ -161,18 +139,16 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
   reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
 
-  unsigned MaxWorkGroupSGPRCount = getMaxWorkGroupSGPRCount(MF);
-  unsigned MaxWorkGroupVGPRCount = getMaxWorkGroupVGPRCount(MF);
-
-  unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
-  unsigned NumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
-  for (unsigned i = MaxWorkGroupSGPRCount; i < NumSGPRs; ++i) {
+  unsigned MaxNumSGPRs = getMaxNumSGPRs(MF);
+  unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
+  for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
     unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
     reserveRegisterTuples(Reserved, Reg);
   }
 
-
-  for (unsigned i = MaxWorkGroupVGPRCount; i < NumVGPRs; ++i) {
+  unsigned MaxNumVGPRs = getMaxNumVGPRs(MF);
+  unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
+  for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
     unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
     reserveRegisterTuples(Reserved, Reg);
   }
@@ -194,49 +170,26 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     assert(!isSubRegister(ScratchRSrcReg, ScratchWaveOffsetReg));
   }
 
-  // Reserve registers for debugger usage if "amdgpu-debugger-reserve-trap-regs"
-  // attribute was specified.
-  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
-  if (ST.debuggerReserveRegs()) {
-    unsigned ReservedVGPRFirst =
-      MaxWorkGroupVGPRCount - MFI->getDebuggerReservedVGPRCount();
-    for (unsigned i = ReservedVGPRFirst; i < MaxWorkGroupVGPRCount; ++i) {
-      unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
-      reserveRegisterTuples(Reserved, Reg);
-    }
-  }
-
   return Reserved;
 }
 
-unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
-                                                unsigned Idx) const {
-  const SISubtarget &STI = MF.getSubtarget<SISubtarget>();
-  // FIXME: We should adjust the max number of waves based on LDS size.
-  unsigned SGPRLimit = getNumSGPRsAllowed(STI, STI.getMaxWavesPerCU());
-  unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
-
-  unsigned VSLimit = SGPRLimit + VGPRLimit;
-
-  if (SGPRPressureSets.test(Idx) && VGPRPressureSets.test(Idx)) {
-    // FIXME: This is a hack. We should never be considering the pressure of
-    // these since no virtual register should ever have this class.
-    return VSLimit;
-  }
-
-  if (SGPRPressureSets.test(Idx))
-    return SGPRLimit;
-
-  return VGPRLimit;
-}
-
 bool SIRegisterInfo::requiresRegisterScavenging(const MachineFunction &Fn) const {
-  return Fn.getFrameInfo()->hasStackObjects();
+  return Fn.getFrameInfo().hasStackObjects();
 }
 
 bool
 SIRegisterInfo::requiresFrameIndexScavenging(const MachineFunction &MF) const {
-  return MF.getFrameInfo()->hasStackObjects();
+  return MF.getFrameInfo().hasStackObjects();
+}
+
+bool SIRegisterInfo::requiresFrameIndexReplacementScavenging(
+  const MachineFunction &MF) const {
+  // m0 is needed for the scalar store offset. m0 is unallocatable, so we can't
+  // create a virtual register for it during frame index elimination, so the
+  // scavenger is directly needed.
+  return MF.getFrameInfo().hasStackObjects() &&
+         MF.getSubtarget<SISubtarget>().hasScalarStores() &&
+         MF.getInfo<SIMachineFunctionInfo>()->hasSpilledSGPRs();
 }
 
 bool SIRegisterInfo::requiresVirtualBaseRegisters(
@@ -250,6 +203,14 @@ bool SIRegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const
   return true;
 }
 
+int64_t SIRegisterInfo::getMUBUFInstrOffset(const MachineInstr *MI) const {
+  assert(SIInstrInfo::isMUBUF(*MI));
+
+  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                          AMDGPU::OpName::offset);
+  return MI->getOperand(OffIdx).getImm();
+}
+
 int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
                                                  int Idx) const {
   if (!SIInstrInfo::isMUBUF(*MI))
@@ -259,13 +220,16 @@ int64_t SIRegisterInfo::getFrameIndexInstrOffset(const MachineInstr *MI,
                                            AMDGPU::OpName::vaddr) &&
          "Should never see frame index on non-address operand");
 
-  int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
-                                          AMDGPU::OpName::offset);
-  return MI->getOperand(OffIdx).getImm();
+  return getMUBUFInstrOffset(MI);
 }
 
 bool SIRegisterInfo::needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
-  return MI->mayLoadOrStore();
+  if (!MI->mayLoadOrStore())
+    return false;
+
+  int64_t FullOffset = Offset + getMUBUFInstrOffset(MI);
+
+  return !isUInt<12>(FullOffset);
 }
 
 void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
@@ -290,14 +254,19 @@ void SIRegisterInfo::materializeFrameBaseRegister(MachineBasicBlock *MBB,
 
   MachineRegisterInfo &MRI = MF->getRegInfo();
   unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+  unsigned FIReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
     .addImm(Offset);
+  BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_MOV_B32_e32), FIReg)
+    .addFrameIndex(FrameIdx);
+
   BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::V_ADD_I32_e64), BaseReg)
     .addReg(UnusedCarry, RegState::Define | RegState::Dead)
     .addReg(OffsetReg, RegState::Kill)
-    .addFrameIndex(FrameIdx);
+    .addReg(FIReg);
 }
 
 void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
@@ -328,40 +297,21 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg,
 
   MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
   int64_t NewOffset = OffsetOp->getImm() + Offset;
-  if (isUInt<12>(NewOffset)) {
-    // If we have a legal offset, fold it directly into the instruction.
-    FIOp->ChangeToRegister(BaseReg, false);
-    OffsetOp->setImm(NewOffset);
-    return;
-  }
-
-  // The offset is not legal, so we must insert an add of the offset.
-  MachineRegisterInfo &MRI = MF->getRegInfo();
-  unsigned NewReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-  DebugLoc DL = MI.getDebugLoc();
-
-  assert(Offset != 0 && "Non-zero offset expected");
-
-  unsigned UnusedCarry = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass);
-  unsigned OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32RegClass);
+  assert(isUInt<12>(NewOffset) && "offset should be legal");
 
-  // In the case the instruction already had an immediate offset, here only
-  // the requested new offset is added because we are leaving the original
-  // immediate in place.
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
-    .addImm(Offset);
-  BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_ADD_I32_e64), NewReg)
-    .addReg(UnusedCarry, RegState::Define | RegState::Dead)
-    .addReg(OffsetReg, RegState::Kill)
-    .addReg(BaseReg);
-
-  FIOp->ChangeToRegister(NewReg, false);
+  FIOp->ChangeToRegister(BaseReg, false);
+  OffsetOp->setImm(NewOffset);
 }
 
 bool SIRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI,
                                         unsigned BaseReg,
                                         int64_t Offset) const {
-  return SIInstrInfo::isMUBUF(*MI) && isUInt<12>(Offset);
+  if (!SIInstrInfo::isMUBUF(*MI))
+    return false;
+
+  int64_t NewOffset = Offset + getMUBUFInstrOffset(MI);
+
+  return isUInt<12>(NewOffset);
 }
 
 const TargetRegisterClass *SIRegisterInfo::getPointerRegClass(
@@ -407,31 +357,107 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) {
   }
 }
 
-void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
-                                           unsigned LoadStoreOp,
-                                           const MachineOperand *SrcDst,
-                                           unsigned ScratchRsrcReg,
-                                           unsigned ScratchOffset,
-                                           int64_t Offset,
-                                           RegScavenger *RS) const {
+static int getOffsetMUBUFStore(unsigned Opc) {
+  switch (Opc) {
+  case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
+    return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
+  case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
+    return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
+  case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
+    return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
+  case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
+    return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
+  case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
+    return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
+  default:
+    return -1;
+  }
+}
+
+static int getOffsetMUBUFLoad(unsigned Opc) {
+  switch (Opc) {
+  case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
+    return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
+  case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
+    return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
+  case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
+    return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
+  case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
+    return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
+  case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
+    return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
+  case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
+    return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
+  case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
+    return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
+  default:
+    return -1;
+  }
+}
 
-  unsigned Value = SrcDst->getReg();
-  bool IsKill = SrcDst->isKill();
+// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
+// need to handle the case where an SGPR may need to be spilled while spilling.
+static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
+                                      MachineFrameInfo &MFI,
+                                      MachineBasicBlock::iterator MI,
+                                      int Index,
+                                      int64_t Offset) {
+  MachineBasicBlock *MBB = MI->getParent();
+  const DebugLoc &DL = MI->getDebugLoc();
+  bool IsStore = MI->mayStore();
+
+  unsigned Opc = MI->getOpcode();
+  int LoadStoreOp = IsStore ?
+    getOffsetMUBUFStore(Opc) : getOffsetMUBUFLoad(Opc);
+  if (LoadStoreOp == -1)
+    return false;
+
+  unsigned Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata)->getReg();
+
+  BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
+    .addReg(Reg, getDefRegState(!IsStore))
+    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
+    .addOperand(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
+    .addImm(Offset)
+    .addImm(0) // glc
+    .addImm(0) // slc
+    .addImm(0) // tfe
+    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  return true;
+}
+
+void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
+                                         unsigned LoadStoreOp,
+                                         int Index,
+                                         unsigned ValueReg,
+                                         bool IsKill,
+                                         unsigned ScratchRsrcReg,
+                                         unsigned ScratchOffsetReg,
+                                         int64_t InstOffset,
+                                         MachineMemOperand *MMO,
+                                         RegScavenger *RS) const {
   MachineBasicBlock *MBB = MI->getParent();
   MachineFunction *MF = MI->getParent()->getParent();
   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
 
-  DebugLoc DL = MI->getDebugLoc();
-  bool IsStore = MI->mayStore();
+  const MCInstrDesc &Desc = TII->get(LoadStoreOp);
+  const DebugLoc &DL = MI->getDebugLoc();
+  bool IsStore = Desc.mayStore();
 
   bool RanOutOfSGPRs = false;
   bool Scavenged = false;
-  unsigned SOffset = ScratchOffset;
-  unsigned OriginalImmOffset = Offset;
+  unsigned SOffset = ScratchOffsetReg;
 
-  unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
+  const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
+  unsigned NumSubRegs = AMDGPU::getRegBitWidth(RC->getID()) / 32;
   unsigned Size = NumSubRegs * 4;
+  int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
+  const int64_t OriginalImmOffset = Offset;
+
+  unsigned Align = MFI.getObjectAlignment(Index);
+  const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
 
   if (!isUInt<12>(Offset + Size)) {
     SOffset = AMDGPU::NoRegister;
@@ -450,20 +476,23 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
       // subtract the offset after the spill to return ScratchOffset to it's
       // original value.
       RanOutOfSGPRs = true;
-      SOffset = ScratchOffset;
+      SOffset = ScratchOffsetReg;
     } else {
       Scavenged = true;
     }
+
     BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), SOffset)
-            .addReg(ScratchOffset)
-            .addImm(Offset);
+      .addReg(ScratchOffsetReg)
+      .addImm(Offset);
+
     Offset = 0;
   }
 
-  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += 4) {
-    unsigned SubReg = NumSubRegs > 1 ?
-        getPhysRegSubReg(Value, &AMDGPU::VGPR_32RegClass, i) :
-        Value;
+  const unsigned EltSize = 4;
+
+  for (unsigned i = 0, e = NumSubRegs; i != e; ++i, Offset += EltSize) {
+    unsigned SubReg = NumSubRegs == 1 ?
+      ValueReg : getSubReg(ValueReg, getSubRegFromChannel(i));
 
     unsigned SOffsetRegState = 0;
     unsigned SrcDstRegState = getDefRegState(!IsStore);
@@ -473,23 +502,324 @@ void SIRegisterInfo::buildScratchLoadStore(MachineBasicBlock::iterator MI,
       SrcDstRegState |= getKillRegState(IsKill);
     }
 
-    BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
-      .addReg(SubReg, getDefRegState(!IsStore))
+    MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(EltSize * i);
+    MachineMemOperand *NewMMO
+      = MF->getMachineMemOperand(PInfo, MMO->getFlags(),
+                                 EltSize, MinAlign(Align, EltSize * i));
+
+    auto MIB = BuildMI(*MBB, MI, DL, Desc)
+      .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill))
       .addReg(ScratchRsrcReg)
       .addReg(SOffset, SOffsetRegState)
       .addImm(Offset)
       .addImm(0) // glc
       .addImm(0) // slc
       .addImm(0) // tfe
-      .addReg(Value, RegState::Implicit | SrcDstRegState)
-      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+      .addMemOperand(NewMMO);
+
+    if (NumSubRegs > 1)
+      MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
   }
+
   if (RanOutOfSGPRs) {
     // Subtract the offset we added to the ScratchOffset register.
-    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffset)
-            .addReg(ScratchOffset)
-            .addImm(OriginalImmOffset);
+    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_SUB_U32), ScratchOffsetReg)
+      .addReg(ScratchOffsetReg)
+      .addImm(OriginalImmOffset);
+  }
+}
+
+static std::pair<unsigned, unsigned> getSpillEltSize(unsigned SuperRegSize,
+                                                     bool Store) {
+  if (SuperRegSize % 16 == 0) {
+    return { 16, Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
+                         AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
+  }
+
+  if (SuperRegSize % 8 == 0) {
+    return { 8, Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
+                        AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
+  }
+
+  return { 4, Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
+                      AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
+}
+
+void SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
+                               int Index,
+                               RegScavenger *RS) const {
+  MachineBasicBlock *MBB = MI->getParent();
+  MachineFunction *MF = MBB->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+
+  unsigned SuperReg = MI->getOperand(0).getReg();
+  bool IsKill = MI->getOperand(0).isKill();
+  const DebugLoc &DL = MI->getDebugLoc();
+
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+
+  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+
+  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
+
+  unsigned OffsetReg = AMDGPU::M0;
+  unsigned M0CopyReg = AMDGPU::NoRegister;
+
+  if (SpillToSMEM) {
+    if (RS->isRegUsed(AMDGPU::M0)) {
+      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
+        .addReg(AMDGPU::M0);
+    }
+  }
+
+  unsigned ScalarStoreOp;
+  unsigned EltSize = 4;
+  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
+  if (SpillToSMEM && isSGPRClass(RC)) {
+    // XXX - if private_element_size is larger than 4 it might be useful to be
+    // able to spill wider vmem spills.
+    std::tie(EltSize, ScalarStoreOp) = getSpillEltSize(RC->getSize(), true);
+  }
+
+  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
+  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+
+  // SubReg carries the "Kill" flag when SubReg == SuperReg.
+  unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
+  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+    unsigned SubReg = NumSubRegs == 1 ?
+      SuperReg : getSubReg(SuperReg, SplitParts[i]);
+
+    if (SpillToSMEM) {
+      int64_t FrOffset = FrameInfo.getObjectOffset(Index);
+
+      // The allocated memory size is really the wavefront size * the frame
+      // index size. The widest register class is 64 bytes, so a 4-byte scratch
+      // allocation is enough to spill this in a single stack object.
+      //
+      // FIXME: Frame size/offsets are computed earlier than this, so the extra
+      // space is still unnecessarily allocated.
+
+      unsigned Align = FrameInfo.getObjectAlignment(Index);
+      MachinePointerInfo PtrInfo
+        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+      MachineMemOperand *MMO
+        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                   EltSize, MinAlign(Align, EltSize * i));
+
+      // SMEM instructions only support a single offset, so increment the wave
+      // offset.
+
+      int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
+      if (Offset != 0) {
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
+          .addReg(MFI->getScratchWaveOffsetReg())
+          .addImm(Offset);
+      } else {
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
+          .addReg(MFI->getScratchWaveOffsetReg());
+      }
+
+      BuildMI(*MBB, MI, DL, TII->get(ScalarStoreOp))
+        .addReg(SubReg, getKillRegState(IsKill)) // sdata
+        .addReg(MFI->getScratchRSrcReg())        // sbase
+        .addReg(OffsetReg, RegState::Kill)       // soff
+        .addImm(0)                               // glc
+        .addMemOperand(MMO);
+
+      continue;
+    }
+
+    struct SIMachineFunctionInfo::SpilledReg Spill =
+      MFI->getSpilledReg(MF, Index, i);
+    if (Spill.hasReg()) {
+      BuildMI(*MBB, MI, DL,
+              TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
+              Spill.VGPR)
+        .addReg(SubReg, getKillRegState(IsKill))
+        .addImm(Spill.Lane);
+
+      // FIXME: Since this spills to another register instead of an actual
+      // frame index, we should delete the frame index when all references to
+      // it are fixed.
+    } else {
+      // Spill SGPR to a frame index.
+      // TODO: Should VI try to spill to VGPR and then spill to SMEM?
+      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      // TODO: Should VI try to spill to VGPR and then spill to SMEM?
+
+      MachineInstrBuilder Mov
+        = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
+        .addReg(SubReg, SubKillState);
+
+
+      // There could be undef components of a spilled super register.
+      // TODO: Can we detect this and skip the spill?
+      if (NumSubRegs > 1) {
+        // The last implicit use of the SuperReg carries the "Kill" flag.
+        unsigned SuperKillState = 0;
+        if (i + 1 == e)
+          SuperKillState |= getKillRegState(IsKill);
+        Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
+      }
+
+      unsigned Align = FrameInfo.getObjectAlignment(Index);
+      MachinePointerInfo PtrInfo
+        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+      MachineMemOperand *MMO
+        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+                                   EltSize, MinAlign(Align, EltSize * i));
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
+        .addReg(TmpReg, RegState::Kill)         // src
+        .addFrameIndex(Index)                   // vaddr
+        .addReg(MFI->getScratchRSrcReg())       // srrsrc
+        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
+        .addImm(i * 4)                          // offset
+        .addMemOperand(MMO);
+    }
+  }
+
+  if (M0CopyReg != AMDGPU::NoRegister) {
+    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
+      .addReg(M0CopyReg, RegState::Kill);
+  }
+
+  MI->eraseFromParent();
+  MFI->addToSpilledSGPRs(NumSubRegs);
+}
+
+void SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
+                                 int Index,
+                                 RegScavenger *RS) const {
+  MachineFunction *MF = MI->getParent()->getParent();
+  MachineRegisterInfo &MRI = MF->getRegInfo();
+  MachineBasicBlock *MBB = MI->getParent();
+  SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
+  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
+  const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
+  const SIInstrInfo *TII = ST.getInstrInfo();
+  const DebugLoc &DL = MI->getDebugLoc();
+
+  unsigned SuperReg = MI->getOperand(0).getReg();
+  bool SpillToSMEM = ST.hasScalarStores() && EnableSpillSGPRToSMEM;
+
+  assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
+
+  unsigned OffsetReg = AMDGPU::M0;
+  unsigned M0CopyReg = AMDGPU::NoRegister;
+
+  if (SpillToSMEM) {
+    if (RS->isRegUsed(AMDGPU::M0)) {
+      M0CopyReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), M0CopyReg)
+        .addReg(AMDGPU::M0);
+    }
+  }
+
+  unsigned EltSize = 4;
+  unsigned ScalarLoadOp;
+
+  const TargetRegisterClass *RC = getPhysRegClass(SuperReg);
+  if (SpillToSMEM && isSGPRClass(RC)) {
+    // XXX - if private_element_size is larger than 4 it might be useful to be
+    // able to spill wider vmem spills.
+    std::tie(EltSize, ScalarLoadOp) = getSpillEltSize(RC->getSize(), false);
+  }
+
+  ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize);
+  unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
+
+  // SubReg carries the "Kill" flag when SubReg == SuperReg.
+  int64_t FrOffset = FrameInfo.getObjectOffset(Index);
+
+  for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
+    unsigned SubReg = NumSubRegs == 1 ?
+      SuperReg : getSubReg(SuperReg, SplitParts[i]);
+
+    if (SpillToSMEM) {
+      // FIXME: Size may be > 4 but extra bytes wasted.
+      unsigned Align = FrameInfo.getObjectAlignment(Index);
+      MachinePointerInfo PtrInfo
+        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+      MachineMemOperand *MMO
+        = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+                                   EltSize, MinAlign(Align, EltSize * i));
+
+      // Add i * 4 offset
+      int64_t Offset = (ST.getWavefrontSize() * FrOffset) + (EltSize * i);
+      if (Offset != 0) {
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_U32), OffsetReg)
+          .addReg(MFI->getScratchWaveOffsetReg())
+          .addImm(Offset);
+      } else {
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
+          .addReg(MFI->getScratchWaveOffsetReg());
+      }
+
+      auto MIB =
+        BuildMI(*MBB, MI, DL, TII->get(ScalarLoadOp), SubReg)
+        .addReg(MFI->getScratchRSrcReg()) // sbase
+        .addReg(OffsetReg, RegState::Kill)                // soff
+        .addImm(0)                        // glc
+        .addMemOperand(MMO);
+
+      if (NumSubRegs > 1)
+        MIB.addReg(SuperReg, RegState::ImplicitDefine);
+
+      continue;
+    }
+
+    SIMachineFunctionInfo::SpilledReg Spill
+      = MFI->getSpilledReg(MF, Index, i);
+
+    if (Spill.hasReg()) {
+      auto MIB =
+        BuildMI(*MBB, MI, DL, TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
+                SubReg)
+        .addReg(Spill.VGPR)
+        .addImm(Spill.Lane);
+
+      if (NumSubRegs > 1)
+        MIB.addReg(SuperReg, RegState::ImplicitDefine);
+    } else {
+      // Restore SGPR from a stack slot.
+      // FIXME: We should use S_LOAD_DWORD here for VI.
+      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      unsigned Align = FrameInfo.getObjectAlignment(Index);
+
+      MachinePointerInfo PtrInfo
+        = MachinePointerInfo::getFixedStack(*MF, Index, EltSize * i);
+
+      MachineMemOperand *MMO = MF->getMachineMemOperand(PtrInfo,
+        MachineMemOperand::MOLoad, EltSize,
+        MinAlign(Align, EltSize * i));
+
+      BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
+        .addFrameIndex(Index)                   // vaddr
+        .addReg(MFI->getScratchRSrcReg())       // srsrc
+        .addReg(MFI->getScratchWaveOffsetReg()) // soffset
+        .addImm(i * 4)                          // offset
+        .addMemOperand(MMO);
+
+      auto MIB =
+        BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
+        .addReg(TmpReg, RegState::Kill);
+
+      if (NumSubRegs > 1)
+        MIB.addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
+    }
+  }
+
+  if (M0CopyReg != AMDGPU::NoRegister) {
+    BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), AMDGPU::M0)
+      .addReg(M0CopyReg, RegState::Kill);
   }
+
+  MI->eraseFromParent();
 }
 
 void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
@@ -499,7 +829,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
   MachineRegisterInfo &MRI = MF->getRegInfo();
   MachineBasicBlock *MBB = MI->getParent();
   SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>();
-  MachineFrameInfo *FrameInfo = MF->getFrameInfo();
+  MachineFrameInfo &FrameInfo = MF->getFrameInfo();
   const SISubtarget &ST =  MF->getSubtarget<SISubtarget>();
   const SIInstrInfo *TII = ST.getInstrInfo();
   DebugLoc DL = MI->getDebugLoc();
@@ -514,66 +844,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S128_SAVE:
     case AMDGPU::SI_SPILL_S64_SAVE:
     case AMDGPU::SI_SPILL_S32_SAVE: {
-      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
-      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
-      unsigned SuperReg = MI->getOperand(0).getReg();
-      bool IsKill = MI->getOperand(0).isKill();
-      // SubReg carries the "Kill" flag when SubReg == SuperReg.
-      unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill);
-      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
-        unsigned SubReg = getPhysRegSubReg(SuperReg,
-                                           &AMDGPU::SGPR_32RegClass, i);
-
-        struct SIMachineFunctionInfo::SpilledReg Spill =
-            MFI->getSpilledReg(MF, Index, i);
-
-        if (Spill.hasReg()) {
-          BuildMI(*MBB, MI, DL,
-                  TII->getMCOpcodeFromPseudo(AMDGPU::V_WRITELANE_B32),
-                  Spill.VGPR)
-                  .addReg(SubReg, getKillRegState(IsKill))
-                  .addImm(Spill.Lane);
-
-          // FIXME: Since this spills to another register instead of an actual
-          // frame index, we should delete the frame index when all references to
-          // it are fixed.
-        } else {
-          // Spill SGPR to a frame index.
-          // FIXME we should use S_STORE_DWORD here for VI.
-          MachineInstrBuilder Mov
-            = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
-            .addReg(SubReg, SubKillState);
-
-
-          // There could be undef components of a spilled super register.
-          // TODO: Can we detect this and skip the spill?
-          if (NumSubRegs > 1) {
-            // The last implicit use of the SuperReg carries the "Kill" flag.
-            unsigned SuperKillState = 0;
-            if (i + 1 == e)
-              SuperKillState |= getKillRegState(IsKill);
-            Mov.addReg(SuperReg, RegState::Implicit | SuperKillState);
-          }
-
-          unsigned Size = FrameInfo->getObjectSize(Index);
-          unsigned Align = FrameInfo->getObjectAlignment(Index);
-          MachinePointerInfo PtrInfo
-              = MachinePointerInfo::getFixedStack(*MF, Index);
-          MachineMemOperand *MMO
-              = MF->getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
-                                         Size, Align);
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_SAVE))
-                  .addReg(TmpReg, RegState::Kill)         // src
-                  .addFrameIndex(Index)                   // frame_idx
-                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
-                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
-                  .addImm(i * 4)                          // offset
-                  .addMemOperand(MMO);
-        }
-      }
-      MI->eraseFromParent();
-      MFI->addToSpilledSGPRs(NumSubRegs);
+      spillSGPR(MI, Index, RS);
       break;
     }
 
@@ -583,49 +854,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_S128_RESTORE:
     case AMDGPU::SI_SPILL_S64_RESTORE:
     case AMDGPU::SI_SPILL_S32_RESTORE: {
-      unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode());
-      unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
-      for (unsigned i = 0, e = NumSubRegs; i < e; ++i) {
-        unsigned SubReg = getPhysRegSubReg(MI->getOperand(0).getReg(),
-                                           &AMDGPU::SGPR_32RegClass, i);
-        struct SIMachineFunctionInfo::SpilledReg Spill =
-            MFI->getSpilledReg(MF, Index, i);
-
-        if (Spill.hasReg()) {
-          BuildMI(*MBB, MI, DL,
-                  TII->getMCOpcodeFromPseudo(AMDGPU::V_READLANE_B32),
-                  SubReg)
-                  .addReg(Spill.VGPR)
-                  .addImm(Spill.Lane)
-                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
-        } else {
-          // Restore SGPR from a stack slot.
-          // FIXME: We should use S_LOAD_DWORD here for VI.
-
-          unsigned Align = FrameInfo->getObjectAlignment(Index);
-          unsigned Size = FrameInfo->getObjectSize(Index);
-
-          MachinePointerInfo PtrInfo
-              = MachinePointerInfo::getFixedStack(*MF, Index);
-
-          MachineMemOperand *MMO = MF->getMachineMemOperand(
-              PtrInfo, MachineMemOperand::MOLoad, Size, Align);
-
-          BuildMI(*MBB, MI, DL, TII->get(AMDGPU::SI_SPILL_V32_RESTORE), TmpReg)
-                  .addFrameIndex(Index)                   // frame_idx
-                  .addReg(MFI->getScratchRSrcReg())       // scratch_rsrc
-                  .addReg(MFI->getScratchWaveOffsetReg()) // scratch_offset
-                  .addImm(i * 4)                          // offset
-                  .addMemOperand(MMO);
-          BuildMI(*MBB, MI, DL,
-                  TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
-                  .addReg(TmpReg, RegState::Kill)
-                  .addReg(MI->getOperand(0).getReg(), RegState::ImplicitDefine);
-        }
-      }
-
-      MI->eraseFromParent();
+      restoreSGPR(MI, Index, RS);
       break;
     }
 
@@ -635,34 +864,62 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
     case AMDGPU::SI_SPILL_V128_SAVE:
     case AMDGPU::SI_SPILL_V96_SAVE:
     case AMDGPU::SI_SPILL_V64_SAVE:
-    case AMDGPU::SI_SPILL_V32_SAVE:
-      buildScratchLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
-            TII->getNamedOperand(*MI, AMDGPU::OpName::src),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
-            FrameInfo->getObjectOffset(Index) +
-            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
-      MI->eraseFromParent();
+    case AMDGPU::SI_SPILL_V32_SAVE: {
+      const MachineOperand *VData = TII->getNamedOperand(*MI,
+                                                         AMDGPU::OpName::vdata);
+      buildSpillLoadStore(MI, AMDGPU::BUFFER_STORE_DWORD_OFFSET,
+            Index,
+            VData->getReg(), VData->isKill(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+            *MI->memoperands_begin(),
+            RS);
       MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
+      MI->eraseFromParent();
       break;
+    }
     case AMDGPU::SI_SPILL_V32_RESTORE:
     case AMDGPU::SI_SPILL_V64_RESTORE:
     case AMDGPU::SI_SPILL_V96_RESTORE:
     case AMDGPU::SI_SPILL_V128_RESTORE:
     case AMDGPU::SI_SPILL_V256_RESTORE:
     case AMDGPU::SI_SPILL_V512_RESTORE: {
-      buildScratchLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
-            TII->getNamedOperand(*MI, AMDGPU::OpName::dst),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_rsrc)->getReg(),
-            TII->getNamedOperand(*MI, AMDGPU::OpName::scratch_offset)->getReg(),
-            FrameInfo->getObjectOffset(Index) +
-            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(), RS);
+      const MachineOperand *VData = TII->getNamedOperand(*MI,
+                                                         AMDGPU::OpName::vdata);
+
+      buildSpillLoadStore(MI, AMDGPU::BUFFER_LOAD_DWORD_OFFSET,
+            Index,
+            VData->getReg(), VData->isKill(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg(),
+            TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
+            *MI->memoperands_begin(),
+            RS);
       MI->eraseFromParent();
       break;
     }
 
     default: {
-      int64_t Offset = FrameInfo->getObjectOffset(Index);
+      if (TII->isMUBUF(*MI)) {
+        // Disable offen so we don't need a 0 vgpr base.
+        assert(static_cast<int>(FIOperandNum) ==
+               AMDGPU::getNamedOperandIdx(MI->getOpcode(),
+                                          AMDGPU::OpName::vaddr));
+
+        int64_t Offset = FrameInfo.getObjectOffset(Index);
+        int64_t OldImm
+          = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
+        int64_t NewOffset = OldImm + Offset;
+
+        if (isUInt<12>(NewOffset) &&
+            buildMUBUFOffsetLoadStore(TII, FrameInfo, MI, Index, NewOffset)) {
+          MI->eraseFromParent();
+          break;
+        }
+      }
+
+      int64_t Offset = FrameInfo.getObjectOffset(Index);
       FIOp.ChangeToImmediate(Offset);
       if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
         unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
@@ -770,7 +1027,8 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
     return RC;
 
   // We can assume that each lane corresponds to one 32-bit register.
-  unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx));
+  LaneBitmask::Type Mask = getSubRegIndexLaneMask(SubIdx).getAsInteger();
+  unsigned Count = countPopulation(Mask);
   if (isSGPRClass(RC)) {
     switch (Count) {
     case 1:
@@ -812,7 +1070,7 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
   // We want to prefer the smallest register class possible, so we don't want to
   // stop and rewrite on anything that looks like a subregister
   // extract. Operations mostly don't care about the super register class, so we
-  // only want to stop on the most basic of copies between the smae register
+  // only want to stop on the most basic of copies between the same register
   // class.
   //
   // e.g. if we have something like
@@ -828,80 +1086,6 @@ bool SIRegisterInfo::shouldRewriteCopySrc(
   return getCommonSubClass(DefRC, SrcRC) != nullptr;
 }
 
-unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg,
-                                          const TargetRegisterClass *SubRC,
-                                          unsigned Channel) const {
-
-  switch (Reg) {
-    case AMDGPU::VCC:
-      switch(Channel) {
-        case 0: return AMDGPU::VCC_LO;
-        case 1: return AMDGPU::VCC_HI;
-        default: llvm_unreachable("Invalid SubIdx for VCC"); break;
-      }
-
-    case AMDGPU::TBA:
-      switch(Channel) {
-        case 0: return AMDGPU::TBA_LO;
-        case 1: return AMDGPU::TBA_HI;
-        default: llvm_unreachable("Invalid SubIdx for TBA"); break;
-      }
-
-    case AMDGPU::TMA:
-      switch(Channel) {
-        case 0: return AMDGPU::TMA_LO;
-        case 1: return AMDGPU::TMA_HI;
-        default: llvm_unreachable("Invalid SubIdx for TMA"); break;
-      }
-
-  case AMDGPU::FLAT_SCR:
-    switch (Channel) {
-    case 0:
-      return AMDGPU::FLAT_SCR_LO;
-    case 1:
-      return AMDGPU::FLAT_SCR_HI;
-    default:
-      llvm_unreachable("Invalid SubIdx for FLAT_SCR");
-    }
-    break;
-
-  case AMDGPU::EXEC:
-    switch (Channel) {
-    case 0:
-      return AMDGPU::EXEC_LO;
-    case 1:
-      return AMDGPU::EXEC_HI;
-    default:
-      llvm_unreachable("Invalid SubIdx for EXEC");
-    }
-    break;
-  }
-
-  const TargetRegisterClass *RC = getPhysRegClass(Reg);
-  // 32-bit registers don't have sub-registers, so we can just return the
-  // Reg.  We need to have this check here, because the calculation below
-  // using getHWRegIndex() will fail with special 32-bit registers like
-  // VCC_LO, VCC_HI, EXEC_LO, EXEC_HI and M0.
-  if (RC->getSize() == 4) {
-    assert(Channel == 0);
-    return Reg;
-  }
-
-  unsigned Index = getHWRegIndex(Reg);
-  return SubRC->getRegister(Index + Channel);
-}
-
-bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
-  return OpType == AMDGPU::OPERAND_REG_IMM32;
-}
-
-bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
-  if (opCanUseLiteralConstant(OpType))
-    return true;
-
-  return OpType == AMDGPU::OPERAND_REG_INLINE_C;
-}
-
 // FIXME: Most of these are flexible with HSA and we don't need to reserve them
 // as input registers if unused. Whether the dispatch ptr is necessary should be
 // easy to detect from used intrinsics. Scratch setup is harder to know.
@@ -924,14 +1108,16 @@ unsigned SIRegisterInfo::getPreloadedValue(const MachineFunction &MF,
   case SIRegisterInfo::PRIVATE_SEGMENT_WAVE_BYTE_OFFSET:
     return MFI->PrivateSegmentWaveByteOffsetSystemSGPR;
   case SIRegisterInfo::PRIVATE_SEGMENT_BUFFER:
-    assert(ST.isAmdHsaOS() && "Non-HSA ABI currently uses relocations");
+    assert(ST.isAmdCodeObjectV2() &&
+           "Non-CodeObjectV2 ABI currently uses relocations");
     assert(MFI->hasPrivateSegmentBuffer());
     return MFI->PrivateSegmentBufferUserSGPR;
   case SIRegisterInfo::KERNARG_SEGMENT_PTR:
     assert(MFI->hasKernargSegmentPtr());
     return MFI->KernargSegmentPtrUserSGPR;
   case SIRegisterInfo::DISPATCH_ID:
-    llvm_unreachable("unimplemented");
+    assert(MFI->hasDispatchID());
+    return MFI->DispatchIDUserSGPR;
   case SIRegisterInfo::FLAT_SCRATCH_INIT:
     assert(MFI->hasFlatScratchInit());
     return MFI->FlatScratchInitUserSGPR;
@@ -968,50 +1154,323 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
   return AMDGPU::NoRegister;
 }
 
-unsigned SIRegisterInfo::getNumVGPRsAllowed(unsigned WaveCount) const {
-  switch(WaveCount) {
-    case 10: return 24;
-    case 9:  return 28;
-    case 8:  return 32;
-    case 7:  return 36;
-    case 6:  return 40;
-    case 5:  return 48;
-    case 4:  return 64;
-    case 3:  return 84;
-    case 2:  return 128;
-    default: return 256;
+unsigned SIRegisterInfo::getTotalNumSGPRs(const SISubtarget &ST) const {
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    return 800;
+  return 512;
+}
+
+unsigned SIRegisterInfo::getNumAddressableSGPRs(const SISubtarget &ST) const {
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+    return 102;
+  return 104;
+}
+
+unsigned SIRegisterInfo::getNumReservedSGPRs(const SISubtarget &ST,
+                                             const SIMachineFunctionInfo &MFI) const {
+  if (MFI.hasFlatScratchInit()) {
+    if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
+      return 6; // FLAT_SCRATCH, XNACK, VCC (in that order)
+
+    if (ST.getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)
+      return 4; // FLAT_SCRATCH, VCC (in that order)
   }
+
+  if (ST.isXNACKEnabled())
+    return 4; // XNACK, VCC (in that order)
+
+  return 2; // VCC.
 }
 
-unsigned SIRegisterInfo::getNumSGPRsAllowed(const SISubtarget &ST,
-                                            unsigned WaveCount) const {
-  if (ST.getGeneration() >= SISubtarget::VOLCANIC_ISLANDS) {
-    switch (WaveCount) {
+unsigned SIRegisterInfo::getMinNumSGPRs(const SISubtarget &ST,
+                                        unsigned WavesPerEU) const {
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    switch (WavesPerEU) {
+      case 0:  return 0;
+      case 10: return 0;
+      case 9:  return 0;
+      case 8:  return 81;
+      default: return 97;
+    }
+  } else {
+    switch (WavesPerEU) {
+      case 0:  return 0;
+      case 10: return 0;
+      case 9:  return 49;
+      case 8:  return 57;
+      case 7:  return 65;
+      case 6:  return 73;
+      case 5:  return 81;
+      default: return 97;
+    }
+  }
+}
+
+unsigned SIRegisterInfo::getMaxNumSGPRs(const SISubtarget &ST,
+                                        unsigned WavesPerEU,
+                                        bool Addressable) const {
+  if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+    switch (WavesPerEU) {
+      case 0:  return 80;
       case 10: return 80;
       case 9:  return 80;
       case 8:  return 96;
-      default: return 102;
+      default: return Addressable ? getNumAddressableSGPRs(ST) : 112;
     }
   } else {
-    switch(WaveCount) {
+    switch (WavesPerEU) {
+      case 0:  return 48;
       case 10: return 48;
       case 9:  return 56;
       case 8:  return 64;
       case 7:  return 72;
       case 6:  return 80;
       case 5:  return 96;
-      default: return 103;
+      default: return getNumAddressableSGPRs(ST);
     }
   }
 }
 
-bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
-                            unsigned Reg) const {
-  const TargetRegisterClass *RC;
+unsigned SIRegisterInfo::getMaxNumSGPRs(const MachineFunction &MF) const {
+  const Function &F = *MF.getFunction();
+
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+  // Compute maximum number of SGPRs function can use using default/requested
+  // minimum number of waves per execution unit.
+  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+  unsigned MaxNumSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, false);
+  unsigned MaxNumAddressableSGPRs = getMaxNumSGPRs(ST, WavesPerEU.first, true);
+
+  // Check if maximum number of SGPRs was explicitly requested using
+  // "amdgpu-num-sgpr" attribute.
+  if (F.hasFnAttribute("amdgpu-num-sgpr")) {
+    unsigned Requested = AMDGPU::getIntegerAttribute(
+      F, "amdgpu-num-sgpr", MaxNumSGPRs);
+
+    // Make sure requested value does not violate subtarget's specifications.
+    if (Requested && (Requested <= getNumReservedSGPRs(ST, MFI)))
+      Requested = 0;
+
+    // If more SGPRs are required to support the input user/system SGPRs,
+    // increase to accommodate them.
+    //
+    // FIXME: This really ends up using the requested number of SGPRs + number
+    // of reserved special registers in total. Theoretically you could re-use
+    // the last input registers for these special registers, but this would
+    // require a lot of complexity to deal with the weird aliasing.
+    unsigned NumInputSGPRs = MFI.getNumPreloadedSGPRs();
+    if (Requested && Requested < NumInputSGPRs)
+      Requested = NumInputSGPRs;
+
+    // Make sure requested value is compatible with values implied by
+    // default/requested minimum/maximum number of waves per execution unit.
+    if (Requested && Requested > getMaxNumSGPRs(ST, WavesPerEU.first, false))
+      Requested = 0;
+    if (WavesPerEU.second &&
+        Requested && Requested < getMinNumSGPRs(ST, WavesPerEU.second))
+      Requested = 0;
+
+    if (Requested)
+      MaxNumSGPRs = Requested;
+  }
+
+  if (ST.hasSGPRInitBug())
+    MaxNumSGPRs = SISubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG;
+
+  return std::min(MaxNumSGPRs - getNumReservedSGPRs(ST, MFI),
+                  MaxNumAddressableSGPRs);
+}
+
+unsigned SIRegisterInfo::getNumDebuggerReservedVGPRs(
+  const SISubtarget &ST) const {
+  if (ST.debuggerReserveRegs())
+    return 4;
+  return 0;
+}
+
+unsigned SIRegisterInfo::getMinNumVGPRs(unsigned WavesPerEU) const {
+  switch (WavesPerEU) {
+    case 0:  return 0;
+    case 10: return 0;
+    case 9:  return 25;
+    case 8:  return 29;
+    case 7:  return 33;
+    case 6:  return 37;
+    case 5:  return 41;
+    case 4:  return 49;
+    case 3:  return 65;
+    case 2:  return 85;
+    default: return 129;
+  }
+}
+
+unsigned SIRegisterInfo::getMaxNumVGPRs(unsigned WavesPerEU) const {
+  switch (WavesPerEU) {
+    case 0:  return 24;
+    case 10: return 24;
+    case 9:  return 28;
+    case 8:  return 32;
+    case 7:  return 36;
+    case 6:  return 40;
+    case 5:  return 48;
+    case 4:  return 64;
+    case 3:  return 84;
+    case 2:  return 128;
+    default: return getTotalNumVGPRs();
+  }
+}
+
+unsigned SIRegisterInfo::getMaxNumVGPRs(const MachineFunction &MF) const {
+  const Function &F = *MF.getFunction();
+
+  const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
+  const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+
+  // Compute maximum number of VGPRs function can use using default/requested
+  // minimum number of waves per execution unit.
+  std::pair<unsigned, unsigned> WavesPerEU = MFI.getWavesPerEU();
+  unsigned MaxNumVGPRs = getMaxNumVGPRs(WavesPerEU.first);
+
+  // Check if maximum number of VGPRs was explicitly requested using
+  // "amdgpu-num-vgpr" attribute.
+  if (F.hasFnAttribute("amdgpu-num-vgpr")) {
+    unsigned Requested = AMDGPU::getIntegerAttribute(
+      F, "amdgpu-num-vgpr", MaxNumVGPRs);
+
+    // Make sure requested value does not violate subtarget's specifications.
+    if (Requested && Requested <= getNumDebuggerReservedVGPRs(ST))
+      Requested = 0;
+
+    // Make sure requested value is compatible with values implied by
+    // default/requested minimum/maximum number of waves per execution unit.
+    if (Requested && Requested > getMaxNumVGPRs(WavesPerEU.first))
+      Requested = 0;
+    if (WavesPerEU.second &&
+        Requested && Requested < getMinNumVGPRs(WavesPerEU.second))
+      Requested = 0;
+
+    if (Requested)
+      MaxNumVGPRs = Requested;
+  }
+
+  return MaxNumVGPRs - getNumDebuggerReservedVGPRs(ST);
+}
+
+ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
+                                                   unsigned EltSize) const {
+  if (EltSize == 4) {
+    static const int16_t Sub0_15[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+      AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+      AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+      AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+    };
+
+    static const int16_t Sub0_7[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+      AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+    };
+
+    static const int16_t Sub0_3[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    };
+
+    static const int16_t Sub0_2[] = {
+      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
+    };
+
+    static const int16_t Sub0_1[] = {
+      AMDGPU::sub0, AMDGPU::sub1,
+    };
+
+    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+    case 32:
+      return {};
+    case 64:
+      return makeArrayRef(Sub0_1);
+    case 96:
+      return makeArrayRef(Sub0_2);
+    case 128:
+      return makeArrayRef(Sub0_3);
+    case 256:
+      return makeArrayRef(Sub0_7);
+    case 512:
+      return makeArrayRef(Sub0_15);
+    default:
+      llvm_unreachable("unhandled register size");
+    }
+  }
+
+  if (EltSize == 8) {
+    static const int16_t Sub0_15_64[] = {
+      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+      AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+      AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+      AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
+    };
+
+    static const int16_t Sub0_7_64[] = {
+      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+      AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
+    };
+
+
+    static const int16_t Sub0_3_64[] = {
+      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
+    };
+
+    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+    case 64:
+      return {};
+    case 128:
+      return makeArrayRef(Sub0_3_64);
+    case 256:
+      return makeArrayRef(Sub0_7_64);
+    case 512:
+      return makeArrayRef(Sub0_15_64);
+    default:
+      llvm_unreachable("unhandled register size");
+    }
+  }
+
+  assert(EltSize == 16 && "unhandled register spill split size");
+
+  static const int16_t Sub0_15_128[] = {
+    AMDGPU::sub0_sub1_sub2_sub3,
+    AMDGPU::sub4_sub5_sub6_sub7,
+    AMDGPU::sub8_sub9_sub10_sub11,
+    AMDGPU::sub12_sub13_sub14_sub15
+  };
+
+  static const int16_t Sub0_7_128[] = {
+    AMDGPU::sub0_sub1_sub2_sub3,
+    AMDGPU::sub4_sub5_sub6_sub7
+  };
+
+  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
+  case 128:
+    return {};
+  case 256:
+    return makeArrayRef(Sub0_7_128);
+  case 512:
+    return makeArrayRef(Sub0_15_128);
+  default:
+    llvm_unreachable("unhandled register size");
+  }
+}
+
+const TargetRegisterClass*
+SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
+                                  unsigned Reg) const {
   if (TargetRegisterInfo::isVirtualRegister(Reg))
-    RC = MRI.getRegClass(Reg);
-  else
-    RC = getPhysRegClass(Reg);
+    return  MRI.getRegClass(Reg);
 
-  return hasVGPRs(RC);
+  return getPhysRegClass(Reg);
+}
+
+bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
+                            unsigned Reg) const {
+  return hasVGPRs(getRegClassForReg(MRI, Reg));
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index d8b2d9f4e975..0bcae7d9840c 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -16,17 +16,19 @@
 #define LLVM_LIB_TARGET_AMDGPU_SIREGISTERINFO_H
 
 #include "AMDGPURegisterInfo.h"
+#include "SIDefines.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 
 namespace llvm {
 
 class SISubtarget;
 class MachineRegisterInfo;
+class SIMachineFunctionInfo;
 
-struct SIRegisterInfo final : public AMDGPURegisterInfo {
+class SIRegisterInfo final : public AMDGPURegisterInfo {
 private:
-  unsigned SGPR32SetID;
-  unsigned VGPR32SetID;
+  unsigned SGPRSetID;
+  unsigned VGPRSetID;
   BitVector SGPRPressureSets;
   BitVector VGPRPressureSets;
 
@@ -48,17 +50,16 @@ public:
 
   BitVector getReservedRegs(const MachineFunction &MF) const override;
 
-  unsigned getRegPressureSetLimit(const MachineFunction &MF,
-                                  unsigned Idx) const override;
-
-
   bool requiresRegisterScavenging(const MachineFunction &Fn) const override;
 
-
   bool requiresFrameIndexScavenging(const MachineFunction &MF) const override;
+  bool requiresFrameIndexReplacementScavenging(
+    const MachineFunction &MF) const override;
   bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override;
   bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override;
 
+  int64_t getMUBUFInstrOffset(const MachineInstr *MI) const;
+
   int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
                                    int Idx) const override;
 
@@ -77,6 +78,12 @@ public:
   const TargetRegisterClass *getPointerRegClass(
     const MachineFunction &MF, unsigned Kind = 0) const override;
 
+  void spillSGPR(MachineBasicBlock::iterator MI,
+                 int FI, RegScavenger *RS) const;
+
+  void restoreSGPR(MachineBasicBlock::iterator MI,
+                   int FI, RegScavenger *RS) const;
+
   void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
                            unsigned FIOperandNum,
                            RegScavenger *RS) const override;
@@ -111,13 +118,6 @@ public:
   /// \returns true if this class contains VGPR registers.
   bool hasVGPRs(const TargetRegisterClass *RC) const;
 
-  /// returns true if this is a pseudoregister class combination of VGPRs and
-  /// SGPRs for operand modeling. FIXME: We should set isAllocatable = 0 on
-  /// them.
-  static bool isPseudoRegClass(const TargetRegisterClass *RC) {
-    return RC == &AMDGPU::VS_32RegClass || RC == &AMDGPU::VS_64RegClass;
-  }
-
   /// \returns A VGPR reg class with the same width as \p SRC
   const TargetRegisterClass *getEquivalentVGPRClass(
                                           const TargetRegisterClass *SRC) const;
@@ -137,20 +137,21 @@ public:
                             const TargetRegisterClass *SrcRC,
                             unsigned SrcSubReg) const override;
 
-  /// \p Channel This is the register channel (e.g. a value from 0-16), not the
-  ///            SubReg index.
-  /// \returns The sub-register of Reg that is in Channel.
-  unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC,
-                            unsigned Channel) const;
-
   /// \returns True if operands defined with this operand type can accept
   /// a literal constant (i.e. any 32-bit immediate).
-  bool opCanUseLiteralConstant(unsigned OpType) const;
+  bool opCanUseLiteralConstant(unsigned OpType) const {
+    // TODO: 64-bit operands have extending behavior from 32-bit literal.
+    return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
+           OpType <= AMDGPU::OPERAND_REG_IMM_LAST;
+  }
 
   /// \returns True if operands defined with this operand type can accept
   /// an inline constant. i.e. An integer value in the range (-16, 64) or
   /// -4.0f, -2.0f, -1.0f, -0.5f, 0.0f, 0.5f, 1.0f, 2.0f, 4.0f.
-  bool opCanUseInlineConstant(unsigned OpType) const;
+  bool opCanUseInlineConstant(unsigned OpType) const {
+    return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+           OpType <= AMDGPU::OPERAND_SRC_LAST;
+  }
 
   enum PreloadedValue {
     // SGPRS:
@@ -176,29 +177,104 @@ public:
   unsigned getPreloadedValue(const MachineFunction &MF,
                              enum PreloadedValue Value) const;
 
-  /// \brief Give the maximum number of VGPRs that can be used by \p WaveCount
-  ///        concurrent waves.
-  unsigned getNumVGPRsAllowed(unsigned WaveCount) const;
-
-  /// \brief Give the maximum number of SGPRs that can be used by \p WaveCount
-  ///        concurrent waves.
-  unsigned getNumSGPRsAllowed(const SISubtarget &ST, unsigned WaveCount) const;
-
   unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
                               const TargetRegisterClass *RC,
                               const MachineFunction &MF) const;
 
-  unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
-  unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
+  unsigned getSGPRPressureSet() const { return SGPRSetID; };
+  unsigned getVGPRPressureSet() const { return VGPRSetID; };
 
+  const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
+                                               unsigned Reg) const;
   bool isVGPR(const MachineRegisterInfo &MRI, unsigned Reg) const;
 
+  bool isSGPRPressureSet(unsigned SetID) const {
+    return SGPRPressureSets.test(SetID) && !VGPRPressureSets.test(SetID);
+  }
+  bool isVGPRPressureSet(unsigned SetID) const {
+    return VGPRPressureSets.test(SetID) && !SGPRPressureSets.test(SetID);
+  }
+
+  /// \returns SGPR allocation granularity supported by the subtarget.
+  unsigned getSGPRAllocGranule() const {
+    return 8;
+  }
+
+  /// \returns Total number of SGPRs supported by the subtarget.
+  unsigned getTotalNumSGPRs(const SISubtarget &ST) const;
+
+  /// \returns Number of addressable SGPRs supported by the subtarget.
+  unsigned getNumAddressableSGPRs(const SISubtarget &ST) const;
+
+  /// \returns Number of reserved SGPRs supported by the subtarget.
+  unsigned getNumReservedSGPRs(const SISubtarget &ST,
+                               const SIMachineFunctionInfo &MFI) const;
+
+  /// \returns Minimum number of SGPRs that meets given number of waves per
+  /// execution unit requirement for given subtarget.
+  unsigned getMinNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU) const;
+
+  /// \returns Maximum number of SGPRs that meets given number of waves per
+  /// execution unit requirement for given subtarget.
+  unsigned getMaxNumSGPRs(const SISubtarget &ST, unsigned WavesPerEU,
+                          bool Addressable) const;
+
+  /// \returns Maximum number of SGPRs that meets number of waves per execution
+  /// unit requirement for function \p MF, or number of SGPRs explicitly
+  /// requested using "amdgpu-num-sgpr" attribute attached to function \p MF.
+  ///
+  /// \returns Value that meets number of waves per execution unit requirement
+  /// if explicitly requested value cannot be converted to integer, violates
+  /// subtarget's specifications, or does not meet number of waves per execution
+  /// unit requirement.
+  unsigned getMaxNumSGPRs(const MachineFunction &MF) const;
+
+  /// \returns VGPR allocation granularity supported by the subtarget.
+  unsigned getVGPRAllocGranule() const {
+    return 4;
+  }
+
+  /// \returns Total number of VGPRs supported by the subtarget.
+  unsigned getTotalNumVGPRs() const {
+    return 256;
+  }
+
+  /// \returns Number of reserved VGPRs for debugger use supported by the
+  /// subtarget.
+  unsigned getNumDebuggerReservedVGPRs(const SISubtarget &ST) const;
+
+  /// \returns Minimum number of SGPRs that meets given number of waves per
+  /// execution unit requirement.
+  unsigned getMinNumVGPRs(unsigned WavesPerEU) const;
+
+  /// \returns Maximum number of VGPRs that meets given number of waves per
+  /// execution unit requirement.
+  unsigned getMaxNumVGPRs(unsigned WavesPerEU) const;
+
+  /// \returns Maximum number of VGPRs that meets number of waves per execution
+  /// unit requirement for function \p MF, or number of VGPRs explicitly
+  /// requested using "amdgpu-num-vgpr" attribute attached to function \p MF.
+  ///
+  /// \returns Value that meets number of waves per execution unit requirement
+  /// if explicitly requested value cannot be converted to integer, violates
+  /// subtarget's specifications, or does not meet number of waves per execution
+  /// unit requirement.
+  unsigned getMaxNumVGPRs(const MachineFunction &MF) const;
+
+  ArrayRef<int16_t> getRegSplitParts(const TargetRegisterClass *RC,
+                                     unsigned EltSize) const;
+
 private:
-  void buildScratchLoadStore(MachineBasicBlock::iterator MI,
-                             unsigned LoadStoreOp, const MachineOperand *SrcDst,
-                             unsigned ScratchRsrcReg, unsigned ScratchOffset,
-                             int64_t Offset,
-                             RegScavenger *RS) const;
+  void buildSpillLoadStore(MachineBasicBlock::iterator MI,
+                           unsigned LoadStoreOp,
+                           int Index,
+                           unsigned ValueReg,
+                           bool ValueIsKill,
+                           unsigned ScratchRsrcReg,
+                           unsigned ScratchOffsetReg,
+                           int64_t InstrOffset,
+                           MachineMemOperand *MMO,
+                           RegScavenger *RS) const;
 };
 
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index c427874d467a..31e714b9f6b9 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -120,12 +120,19 @@ def SCC_CLASS : RegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
   let isAllocatable = 0;
 }
 
+def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
+  let CopyCost = 1;
+  let isAllocatable = 0;
+}
+
 // TODO: Do we need to set DwarfRegAlias on register tuples?
 
 // SGPR 32-bit registers
-def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
                             (add (sequence "SGPR%u", 0, 103))> {
-  let AllocationPriority = 1;
+  // Give all SGPR classes higher priority than VGPR classes, because
+  // we want to spill SGPRs to VGPRs.
+  let AllocationPriority = 7;
 }
 
 // SGPR 64-bit registers
@@ -190,9 +197,10 @@ def TTMP_128Regs : RegisterTuples<[sub0, sub1, sub2, sub3],
                                (add (decimate (shl TTMP_32, 3), 4))]>;
 
 // VGPR 32-bit registers
-def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
+def VGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
                             (add (sequence "VGPR%u", 0, 255))> {
   let AllocationPriority = 1;
+  let Size = 32;
 }
 
 // VGPR 64-bit registers
@@ -248,43 +256,51 @@ def VGPR_512 : RegisterTuples<[sub0, sub1, sub2, sub3, sub4, sub5, sub6, sub7,
 //  Register classes used as source and destination
 //===----------------------------------------------------------------------===//
 
-class RegImmMatcher<string name> : AsmOperandClass {
-  let Name = name;
-  let RenderMethod = "addRegOrImmOperands";
-}
-
 // Subset of SReg_32 without M0 for SMRD instructions and alike.
 // See comments in SIInstructions.td for more info.
-def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32], 32,
-  (add SGPR_32, VCC_LO, VCC_HI, EXEC_LO, EXEC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
+def SReg_32_XM0_XEXEC : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+  (add SGPR_32, VCC_LO, VCC_HI, FLAT_SCR_LO, FLAT_SCR_HI,
    TTMP_32, TMA_LO, TMA_HI, TBA_LO, TBA_HI)> {
-  let AllocationPriority = 1;
+  let AllocationPriority = 7;
+}
+
+def SReg_32_XM0 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+  (add SReg_32_XM0_XEXEC, EXEC_LO, EXEC_HI)> {
+  let AllocationPriority = 7;
 }
 
 // Register class for all scalar registers (SGPRs + Special Registers)
-def SReg_32 : RegisterClass<"AMDGPU", [i32, f32], 32,
-  (add SReg_32_XM0, M0)> {
-  let AllocationPriority = 1;
+def SReg_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+  (add SReg_32_XM0, M0_CLASS, EXEC_LO, EXEC_HI)> {
+  let AllocationPriority = 7;
 }
 
 def SGPR_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add SGPR_64Regs)> {
-  let AllocationPriority = 2;
+  let CopyCost = 1;
+  let AllocationPriority = 8;
 }
 
 def TTMP_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64], 32, (add TTMP_64Regs)> {
   let isAllocatable = 0;
 }
 
+def SReg_64_XEXEC : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
+  (add SGPR_64, VCC, FLAT_SCR, TTMP_64, TBA, TMA)> {
+  let CopyCost = 1;
+  let AllocationPriority = 8;
+}
+
 def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, f64, i1], 32,
-  (add SGPR_64, VCC, EXEC, FLAT_SCR, TTMP_64, TBA, TMA)> {
-  let AllocationPriority = 2;
+  (add SReg_64_XEXEC, EXEC)> {
+  let CopyCost = 1;
+  let AllocationPriority = 8;
 }
 
 // Requires 2 s_mov_b64 to copy
 let CopyCost = 2 in {
 
 def SGPR_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128Regs)> {
-  let AllocationPriority = 4;
+  let AllocationPriority = 10;
 }
 
 def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128Regs)> {
@@ -292,7 +308,7 @@ def TTMP_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add TTMP_128R
 }
 
 def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128, TTMP_128)> {
-  let AllocationPriority = 4;
+  let AllocationPriority = 10;
 }
 
 } // End CopyCost = 2
@@ -300,17 +316,19 @@ def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8, v2i64], 32, (add SGPR_128,
 def SReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add SGPR_256)> {
   // Requires 4 s_mov_b64 to copy
   let CopyCost = 4;
-  let AllocationPriority = 5;
+  let AllocationPriority = 11;
 }
 
 def SReg_512 : RegisterClass<"AMDGPU", [v64i8, v16i32], 32, (add SGPR_512)> {
   // Requires 8 s_mov_b64 to copy
   let CopyCost = 8;
-  let AllocationPriority = 6;
+  let AllocationPriority = 12;
 }
 
 // Register class for all vector registers (VGPRs + Interploation Registers)
 def VReg_64 : RegisterClass<"AMDGPU", [i64, f64, v2i32, v2f32], 32, (add VGPR_64)> {
+  let Size = 64;
+
   // Requires 2 v_mov_b32 to copy
   let CopyCost = 2;
   let AllocationPriority = 2;
@@ -325,17 +343,21 @@ def VReg_96 : RegisterClass<"AMDGPU", [untyped], 32, (add VGPR_96)> {
 }
 
 def VReg_128 : RegisterClass<"AMDGPU", [v4i32, v4f32, v2i64, v2f64], 32, (add VGPR_128)> {
+  let Size = 128;
+
   // Requires 4 v_mov_b32 to copy
   let CopyCost = 4;
   let AllocationPriority = 4;
 }
 
 def VReg_256 : RegisterClass<"AMDGPU", [v8i32, v8f32], 32, (add VGPR_256)> {
+  let Size = 256;
   let CopyCost = 8;
   let AllocationPriority = 5;
 }
 
 def VReg_512 : RegisterClass<"AMDGPU", [v16i32, v16f32], 32, (add VGPR_512)> {
+  let Size = 512;
   let CopyCost = 16;
   let AllocationPriority = 6;
 }
@@ -344,80 +366,100 @@ def VReg_1 : RegisterClass<"AMDGPU", [i1], 32, (add VGPR_32)> {
   let Size = 32;
 }
 
-class RegImmOperand <RegisterClass rc> : RegisterOperand<rc> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_IMM32";
+def VS_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16], 32,
+                          (add VGPR_32, SReg_32)> {
+  let isAllocatable = 0;
 }
 
-class RegInlineOperand <RegisterClass rc> : RegisterOperand<rc> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_INLINE_C";
+def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
+  let isAllocatable = 0;
 }
 
 //===----------------------------------------------------------------------===//
-//  SSrc_* Operands with an SGPR or a 32-bit immediate
+//  Register operands
 //===----------------------------------------------------------------------===//
 
-def SSrc_32 : RegImmOperand<SReg_32> {
-  let ParserMatchClass = RegImmMatcher<"SSrc32">;
+class RegImmMatcher<string name> : AsmOperandClass {
+  let Name = name;
+  let RenderMethod = "addRegOrImmOperands";
 }
 
-def SSrc_64 : RegImmOperand<SReg_64> {
-  let ParserMatchClass = RegImmMatcher<"SSrc64">;
+multiclass SIRegOperand <string rc, string MatchName, string opType> {
+  let OperandNamespace = "AMDGPU" in {
+    def _b16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+      let OperandType = opType#"_INT16";
+      let ParserMatchClass = RegImmMatcher<MatchName#"B16">;
+      let DecoderMethod = "decodeOperand_VSrc16";
+    }
+
+    def _f16 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+      let OperandType = opType#"_FP16";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F16">;
+      let DecoderMethod = "decodeOperand_VSrc16";
+    }
+
+    def _b32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+      let OperandType = opType#"_INT32";
+      let ParserMatchClass = RegImmMatcher<MatchName#"B32">;
+    }
+
+    def _f32 : RegisterOperand<!cast<RegisterClass>(rc#"_32")> {
+      let OperandType = opType#"_FP32";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F32">;
+    }
+
+    def _b64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+      let OperandType = opType#"_INT64";
+      let ParserMatchClass = RegImmMatcher<MatchName#"B64">;
+    }
+
+    def _f64 : RegisterOperand<!cast<RegisterClass>(rc#"_64")> {
+      let OperandType = opType#"_FP64";
+      let ParserMatchClass = RegImmMatcher<MatchName#"F64">;
+    }
+  }
 }
 
+// FIXME: 64-bit sources can sometimes use 32-bit constants.
+multiclass RegImmOperand <string rc, string MatchName>
+  : SIRegOperand<rc, MatchName, "OPERAND_REG_IMM">;
+
+multiclass RegInlineOperand <string rc, string MatchName>
+  : SIRegOperand<rc, MatchName, "OPERAND_REG_INLINE_C">;
+
 //===----------------------------------------------------------------------===//
-//  SCSrc_* Operands with an SGPR or a inline constant
+//  SSrc_* Operands with an SGPR or a 32-bit immediate
 //===----------------------------------------------------------------------===//
 
-def SCSrc_32 : RegInlineOperand<SReg_32> {
-  let ParserMatchClass = RegImmMatcher<"SCSrc32">;
-}
+defm SSrc : RegImmOperand<"SReg", "SSrc">;
 
 //===----------------------------------------------------------------------===//
-//  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
+//  SCSrc_* Operands with an SGPR or a inline constant
 //===----------------------------------------------------------------------===//
 
-def VS_32 : RegisterClass<"AMDGPU", [i32, f32], 32, (add VGPR_32, SReg_32)>;
+defm SCSrc : RegInlineOperand<"SReg", "SCSrc"> ;
 
-def VS_64 : RegisterClass<"AMDGPU", [i64, f64], 32, (add VReg_64, SReg_64)> {
-  let CopyCost = 2;
-}
+//===----------------------------------------------------------------------===//
+//  VSrc_* Operands with an SGPR, VGPR or a 32-bit immediate
+//===----------------------------------------------------------------------===//
 
-def VSrc_32 : RegisterOperand<VS_32> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_IMM32";
-  let ParserMatchClass = RegImmMatcher<"VSrc32">;
-}
+defm VSrc : RegImmOperand<"VS", "VSrc">;
 
-def VSrc_64 : RegisterOperand<VS_64> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_IMM32";
-  let ParserMatchClass = RegImmMatcher<"VSrc64">;
-}
+def VSrc_128 : RegisterOperand<VReg_128>;
 
 //===----------------------------------------------------------------------===//
-//  VCSrc_* Operands with an SGPR, VGPR or an inline constant
+//  VSrc_* Operands with an VGPR
 //===----------------------------------------------------------------------===//
 
-def VCSrc_32 : RegisterOperand<VS_32> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_INLINE_C";
-  let ParserMatchClass = RegImmMatcher<"VCSrc32">;
-}
-
-def VCSrc_64 : RegisterOperand<VS_64> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_INLINE_C";
-  let ParserMatchClass = RegImmMatcher<"VCSrc64">;
+// This is for operands with the enum(9), VSrc encoding restriction,
+// but only allows VGPRs.
+def VRegSrc_32 : RegisterOperand<VGPR_32> {
+  //let ParserMatchClass = RegImmMatcher<"VRegSrc32">;
+  let DecoderMethod = "DecodeVS_32RegisterClass";
 }
 
 //===----------------------------------------------------------------------===//
-//  SCSrc_* Operands with an SGPR or an inline constant
+//  VCSrc_* Operands with an SGPR, VGPR or an inline constant
 //===----------------------------------------------------------------------===//
 
-def SCSrc_64 : RegisterOperand<SReg_64> {
-  let OperandNamespace = "AMDGPU";
-  let OperandType = "OPERAND_REG_INLINE_C";
-  let ParserMatchClass = RegImmMatcher<"SCSrc64">;
-}
+defm VCSrc : RegInlineOperand<"VS", "VCSrc">;
diff --git a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
index ed19217226b8..be27966fd5f1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/contrib/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -46,7 +46,11 @@ def Write64Bit : SchedWrite;
 // instructions)
 
 class SISchedMachineModel : SchedMachineModel {
-  let CompleteModel = 0;
+  let CompleteModel = 1;
+  // MicroOpBufferSize = 1 means that instructions will always be added
+  // the ready queue when they become available.  This exposes them
+  // to the register pressure analysis.
+  let MicroOpBufferSize = 1;
   let IssueWidth = 1;
   let PostRAScheduler = 1;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 6cba55300a8c..b27d7c691032 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -45,9 +45,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Shrink Instructions";
-  }
+  StringRef getPassName() const override { return "SI Shrink Instructions"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -93,6 +91,7 @@ static bool canShrink(MachineInstr &MI, const SIInstrInfo *TII,
       default: return false;
 
       case AMDGPU::V_MAC_F32_e64:
+      case AMDGPU::V_MAC_F16_e64:
         if (!isVGPR(Src2, TRI, MRI) ||
             TII->hasModifiersSet(MI, AMDGPU::OpName::src2_modifiers))
           return false;
@@ -134,23 +133,15 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
 
   assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
 
-  const SIRegisterInfo &TRI = TII->getRegisterInfo();
   int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
-  MachineOperand &Src0 = MI.getOperand(Src0Idx);
 
   // Only one literal constant is allowed per instruction, so if src0 is a
   // literal constant then we can't do any folding.
-  if (Src0.isImm() &&
-      TII->isLiteralConstant(Src0, TII->getOpSize(MI, Src0Idx)))
-    return;
-
-  // Literal constants and SGPRs can only be used in Src0, so if Src0 is an
-  // SGPR, we cannot commute the instruction, so we can't fold any literal
-  // constants.
-  if (Src0.isReg() && !isVGPR(&Src0, TRI, MRI))
+  if (TII->isLiteralConstant(MI, Src0Idx))
     return;
 
   // Try to fold Src0
+  MachineOperand &Src0 = MI.getOperand(Src0Idx);
   if (Src0.isReg() && MRI.hasOneUse(Src0.getReg())) {
     unsigned Reg = Src0.getReg();
     MachineInstr *Def = MRI.getUniqueVRegDef(Reg);
@@ -158,7 +149,8 @@ static void foldImmediates(MachineInstr &MI, const SIInstrInfo *TII,
       MachineOperand &MovSrc = Def->getOperand(1);
       bool ConstantFolded = false;
 
-      if (MovSrc.isImm() && isUInt<32>(MovSrc.getImm())) {
+      if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) ||
+                             isUInt<32>(MovSrc.getImm()))) {
         Src0.ChangeToImmediate(MovSrc.getImm());
         ConstantFolded = true;
       }
@@ -191,7 +183,95 @@ static void copyFlagsToImplicitVCC(MachineInstr &MI,
 }
 
 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
-  return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+  return isInt<16>(Src.getImm()) &&
+    !TII->isInlineConstant(*Src.getParent(),
+                           Src.getParent()->getOperandNo(&Src));
+}
+
+static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+  return isUInt<16>(Src.getImm()) &&
+    !TII->isInlineConstant(*Src.getParent(),
+                           Src.getParent()->getOperandNo(&Src));
+}
+
+static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
+                                 const MachineOperand &Src,
+                                 bool &IsUnsigned) {
+  if (isInt<16>(Src.getImm())) {
+    IsUnsigned = false;
+    return !TII->isInlineConstant(Src);
+  }
+
+  if (isUInt<16>(Src.getImm())) {
+    IsUnsigned = true;
+    return !TII->isInlineConstant(Src);
+  }
+
+  return false;
+}
+
+/// \returns true if the constant in \p Src should be replaced with a bitreverse
+/// of an inline immediate.
+static bool isReverseInlineImm(const SIInstrInfo *TII,
+                               const MachineOperand &Src,
+                               int32_t &ReverseImm) {
+  if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
+    return false;
+
+  ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
+  return ReverseImm >= -16 && ReverseImm <= 64;
+}
+
+/// Copy implicit register operands from specified instruction to this
+/// instruction that are not part of the instruction definition.
+static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
+                                 const MachineInstr &MI) {
+  for (unsigned i = MI.getDesc().getNumOperands() +
+         MI.getDesc().getNumImplicitUses() +
+         MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
+    if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
+      NewMI.addOperand(MF, MO);
+  }
+}
+
+static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
+  // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
+  // get constants on the RHS.
+  if (!MI.getOperand(0).isReg())
+    TII->commuteInstruction(MI, false, 0, 1);
+
+  const MachineOperand &Src1 = MI.getOperand(1);
+  if (!Src1.isImm())
+    return;
+
+  int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
+  if (SOPKOpc == -1)
+    return;
+
+  // eq/ne is special because the imm16 can be treated as signed or unsigned,
+  // and initially selectd to the unsigned versions.
+  if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
+    bool HasUImm;
+    if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
+      if (!HasUImm) {
+        SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
+          AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
+      }
+
+      MI.setDesc(TII->get(SOPKOpc));
+    }
+
+    return;
+  }
+
+  const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
+
+  if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
+      (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) {
+    MI.setDesc(NewDesc);
+  }
 }
 
 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
@@ -226,14 +306,11 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         MachineOperand &Src = MI.getOperand(1);
         if (Src.isImm() &&
             TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) {
-          int64_t Imm = Src.getImm();
-          if (isInt<32>(Imm) && !TII->isInlineConstant(Src, 4)) {
-            int32_t ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Imm));
-            if (ReverseImm >= -16 && ReverseImm <= 64) {
-              MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
-              Src.setImm(ReverseImm);
-              continue;
-            }
+          int32_t ReverseImm;
+          if (isReverseInlineImm(TII, Src, ReverseImm)) {
+            MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
+            Src.setImm(ReverseImm);
+            continue;
           }
         }
       }
@@ -272,21 +349,27 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       // satisfied.
       if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
           MI.getOpcode() == AMDGPU::S_MUL_I32) {
-        const MachineOperand &Dest = MI.getOperand(0);
-        const MachineOperand &Src0 = MI.getOperand(1);
-        const MachineOperand &Src1 = MI.getOperand(2);
+        const MachineOperand *Dest = &MI.getOperand(0);
+        MachineOperand *Src0 = &MI.getOperand(1);
+        MachineOperand *Src1 = &MI.getOperand(2);
+
+        if (!Src0->isReg() && Src1->isReg()) {
+          if (TII->commuteInstruction(MI, false, 1, 2))
+            std::swap(Src0, Src1);
+        }
 
         // FIXME: This could work better if hints worked with subregisters. If
         // we have a vector add of a constant, we usually don't get the correct
         // allocation due to the subregister usage.
-        if (TargetRegisterInfo::isVirtualRegister(Dest.getReg()) &&
-            Src0.isReg()) {
-          MRI.setRegAllocationHint(Dest.getReg(), 0, Src0.getReg());
+        if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) &&
+            Src0->isReg()) {
+          MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
+          MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
           continue;
         }
 
-        if (Src0.isReg() && Src0.getReg() == Dest.getReg()) {
-          if (Src1.isImm() && isKImmOperand(TII, Src1)) {
+        if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
+          if (Src1->isImm() && isKImmOperand(TII, *Src1)) {
             unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
               AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
 
@@ -296,12 +379,27 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
         }
       }
 
+      // Try to use s_cmpk_*
+      if (MI.isCompare() && TII->isSOPC(MI)) {
+        shrinkScalarCompare(TII, MI);
+        continue;
+      }
+
       // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
       if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
-        const MachineOperand &Src = MI.getOperand(1);
+        const MachineOperand &Dst = MI.getOperand(0);
+        MachineOperand &Src = MI.getOperand(1);
 
-        if (Src.isImm() && isKImmOperand(TII, Src))
-          MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+        if (Src.isImm() &&
+            TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) {
+          int32_t ReverseImm;
+          if (isKImmOperand(TII, Src))
+            MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
+          else if (isReverseInlineImm(TII, Src, ReverseImm)) {
+            MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
+            Src.setImm(ReverseImm);
+          }
+        }
 
         continue;
       }
@@ -398,9 +496,13 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
       }
 
       ++NumInstructionsShrunk;
-      MI.eraseFromParent();
 
+      // Copy extra operands not present in the instruction definition.
+      copyExtraImplicitOps(*Inst32, MF, MI);
+
+      MI.eraseFromParent();
       foldImmediates(*Inst32, TII, MRI);
+
       DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
 
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
index facc0c7df1dc..aad68537f779 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -42,9 +42,7 @@ public:
   SITypeRewriter() : FunctionPass(ID) { }
   bool doInitialization(Module &M) override;
   bool runOnFunction(Function &F) override;
-  const char *getPassName() const override {
-    return "SI Type Rewriter";
-  }
+  StringRef getPassName() const override { return "SI Type Rewriter"; }
   void visitLoadInst(LoadInst &I);
   void visitCallInst(CallInst &I);
   void visitBitCast(BitCastInst &I);
diff --git a/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index 1534d5825696..a613a220e29d 100644
--- a/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -53,10 +53,28 @@
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "SIMachineFunctionInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <vector>
 
 using namespace llvm;
 
@@ -69,6 +87,25 @@ enum {
   StateExact = 0x2,
 };
 
+struct PrintState {
+public:
+  int State;
+
+  explicit PrintState(int State) : State(State) {}
+};
+
+static raw_ostream &operator<<(raw_ostream &OS, const PrintState &PS) {
+  if (PS.State & StateWQM)
+    OS << "WQM";
+  if (PS.State & StateExact) {
+    if (PS.State & StateWQM)
+      OS << '|';
+    OS << "Exact";
+  }
+
+  return OS;
+}
+
 struct InstrInfo {
   char Needs = 0;
   char OutNeeds = 0;
@@ -84,7 +121,7 @@ struct WorkItem {
   MachineBasicBlock *MBB = nullptr;
   MachineInstr *MI = nullptr;
 
-  WorkItem() {}
+  WorkItem() = default;
   WorkItem(MachineBasicBlock *MBB) : MBB(MBB) {}
   WorkItem(MachineInstr *MI) : MI(MI) {}
 };
@@ -98,16 +135,26 @@ private:
 
   DenseMap<const MachineInstr *, InstrInfo> Instructions;
   DenseMap<MachineBasicBlock *, BlockInfo> Blocks;
-  SmallVector<const MachineInstr *, 2> ExecExports;
   SmallVector<MachineInstr *, 1> LiveMaskQueries;
 
+  void printInfo();
+
   void markInstruction(MachineInstr &MI, char Flag,
                        std::vector<WorkItem> &Worklist);
+  void markUsesWQM(const MachineInstr &MI, std::vector<WorkItem> &Worklist);
   char scanInstructions(MachineFunction &MF, std::vector<WorkItem> &Worklist);
   void propagateInstruction(MachineInstr &MI, std::vector<WorkItem> &Worklist);
   void propagateBlock(MachineBasicBlock &MBB, std::vector<WorkItem> &Worklist);
   char analyzeFunction(MachineFunction &MF);
 
+  bool requiresCorrectState(const MachineInstr &MI) const;
+
+  MachineBasicBlock::iterator saveSCC(MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator Before);
+  MachineBasicBlock::iterator
+  prepareInsertion(MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
+                   MachineBasicBlock::iterator Last, bool PreferLast,
+                   bool SaveSCC);
   void toExact(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
                unsigned SaveWQM, unsigned LiveMaskReg);
   void toWQM(MachineBasicBlock &MBB, MachineBasicBlock::iterator Before,
@@ -124,9 +171,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "SI Whole Quad Mode";
-  }
+  StringRef getPassName() const override { return "SI Whole Quad Mode"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<LiveIntervals>();
@@ -135,7 +180,7 @@ public:
   }
 };
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 char SIWholeQuadMode::ID = 0;
 
@@ -151,6 +196,24 @@ FunctionPass *llvm::createSIWholeQuadModePass() {
   return new SIWholeQuadMode;
 }
 
+void SIWholeQuadMode::printInfo() {
+  for (const auto &BII : Blocks) {
+    dbgs() << "\nBB#" << BII.first->getNumber() << ":\n"
+           << "  InNeeds = " << PrintState(BII.second.InNeeds)
+           << ", Needs = " << PrintState(BII.second.Needs)
+           << ", OutNeeds = " << PrintState(BII.second.OutNeeds) << "\n\n";
+
+    for (const MachineInstr &MI : *BII.first) {
+      auto III = Instructions.find(&MI);
+      if (III == Instructions.end())
+        continue;
+
+      dbgs() << "  " << MI << "    Needs = " << PrintState(III->second.Needs)
+             << ", OutNeeds = " << PrintState(III->second.OutNeeds) << '\n';
+    }
+  }
+}
+
 void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
                                       std::vector<WorkItem> &Worklist) {
   InstrInfo &II = Instructions[&MI];
@@ -168,6 +231,45 @@ void SIWholeQuadMode::markInstruction(MachineInstr &MI, char Flag,
   Worklist.push_back(&MI);
 }
 
+/// Mark all instructions defining the uses in \p MI as WQM.
+void SIWholeQuadMode::markUsesWQM(const MachineInstr &MI,
+                                  std::vector<WorkItem> &Worklist) {
+  for (const MachineOperand &Use : MI.uses()) {
+    if (!Use.isReg() || !Use.isUse())
+      continue;
+
+    unsigned Reg = Use.getReg();
+
+    // Handle physical registers that we need to track; this is mostly relevant
+    // for VCC, which can appear as the (implicit) input of a uniform branch,
+    // e.g. when a loop counter is stored in a VGPR.
+    if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+      if (Reg == AMDGPU::EXEC)
+        continue;
+
+      for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
+        LiveRange &LR = LIS->getRegUnit(*RegUnit);
+        const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
+        if (!Value)
+          continue;
+
+        // Since we're in machine SSA, we do not need to track physical
+        // registers across basic blocks.
+        if (Value->isPHIDef())
+          continue;
+
+        markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
+                        Worklist);
+      }
+
+      continue;
+    }
+
+    for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
+      markInstruction(DefMI, StateWQM, Worklist);
+  }
+}
+
 // Scan instructions to determine which ones require an Exact execmask and
 // which ones seed WQM requirements.
 char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
@@ -183,16 +285,19 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
       unsigned Opcode = MI.getOpcode();
       char Flags = 0;
 
-      if (TII->isWQM(Opcode) || TII->isDS(Opcode)) {
+      if (TII->isDS(Opcode)) {
         Flags = StateWQM;
+      } else if (TII->isWQM(Opcode)) {
+        // Sampling instructions don't need to produce results for all pixels
+        // in a quad, they just require all inputs of a quad to have been
+        // computed for derivatives.
+        markUsesWQM(MI, Worklist);
+        GlobalFlags |= StateWQM;
+        continue;
       } else if (TII->isDisableWQM(MI)) {
         Flags = StateExact;
       } else {
-        // Handle export instructions with the exec mask valid flag set
-        if (Opcode == AMDGPU::EXP) {
-          if (MI.getOperand(4).getImm() != 0)
-            ExecExports.push_back(&MI);
-        } else if (Opcode == AMDGPU::SI_PS_LIVE) {
+        if (Opcode == AMDGPU::SI_PS_LIVE) {
           LiveMaskQueries.push_back(&MI);
         } else if (WQMOutputs) {
           // The function is in machine SSA form, which means that physical
@@ -259,43 +364,9 @@ void SIWholeQuadMode::propagateInstruction(MachineInstr &MI,
 
   // Propagate WQM flag to instruction inputs
   assert(II.Needs != (StateWQM | StateExact));
-  if (II.Needs != StateWQM)
-    return;
-
-  for (const MachineOperand &Use : MI.uses()) {
-    if (!Use.isReg() || !Use.isUse())
-      continue;
-
-    unsigned Reg = Use.getReg();
-
-    // Handle physical registers that we need to track; this is mostly relevant
-    // for VCC, which can appear as the (implicit) input of a uniform branch,
-    // e.g. when a loop counter is stored in a VGPR.
-    if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
-      if (Reg == AMDGPU::EXEC)
-        continue;
 
-      for (MCRegUnitIterator RegUnit(Reg, TRI); RegUnit.isValid(); ++RegUnit) {
-        LiveRange &LR = LIS->getRegUnit(*RegUnit);
-        const VNInfo *Value = LR.Query(LIS->getInstructionIndex(MI)).valueIn();
-        if (!Value)
-          continue;
-
-        // Since we're in machine SSA, we do not need to track physical
-        // registers across basic blocks.
-        if (Value->isPHIDef())
-          continue;
-
-        markInstruction(*LIS->getInstructionFromIndex(Value->def), StateWQM,
-                        Worklist);
-      }
-
-      continue;
-    }
-
-    for (MachineInstr &DefMI : MRI->def_instructions(Use.getReg()))
-      markInstruction(DefMI, StateWQM, Worklist);
-  }
+  if (II.Needs == StateWQM)
+    markUsesWQM(MI, Worklist);
 }
 
 void SIWholeQuadMode::propagateBlock(MachineBasicBlock &MBB,
@@ -351,32 +422,140 @@ char SIWholeQuadMode::analyzeFunction(MachineFunction &MF) {
   return GlobalFlags;
 }
 
+/// Whether \p MI really requires the exec state computed during analysis.
+///
+/// Scalar instructions must occasionally be marked WQM for correct propagation
+/// (e.g. thread masks leading up to branches), but when it comes to actual
+/// execution, they don't care about EXEC.
+bool SIWholeQuadMode::requiresCorrectState(const MachineInstr &MI) const {
+  if (MI.isTerminator())
+    return true;
+
+  // Skip instructions that are not affected by EXEC
+  if (TII->isScalarUnit(MI))
+    return false;
+
+  // Generic instructions such as COPY will either disappear by register
+  // coalescing or be lowered to SALU or VALU instructions.
+  if (MI.isTransient()) {
+    if (MI.getNumExplicitOperands() >= 1) {
+      const MachineOperand &Op = MI.getOperand(0);
+      if (Op.isReg()) {
+        if (TRI->isSGPRReg(*MRI, Op.getReg())) {
+          // SGPR instructions are not affected by EXEC
+          return false;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
+MachineBasicBlock::iterator
+SIWholeQuadMode::saveSCC(MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator Before) {
+  unsigned SaveReg = MRI->createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
+
+  MachineInstr *Save =
+      BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), SaveReg)
+          .addReg(AMDGPU::SCC);
+  MachineInstr *Restore =
+      BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::SCC)
+          .addReg(SaveReg);
+
+  LIS->InsertMachineInstrInMaps(*Save);
+  LIS->InsertMachineInstrInMaps(*Restore);
+  LIS->createAndComputeVirtRegInterval(SaveReg);
+
+  return Restore;
+}
+
+// Return an iterator in the (inclusive) range [First, Last] at which
+// instructions can be safely inserted, keeping in mind that some of the
+// instructions we want to add necessarily clobber SCC.
+MachineBasicBlock::iterator SIWholeQuadMode::prepareInsertion(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator First,
+    MachineBasicBlock::iterator Last, bool PreferLast, bool SaveSCC) {
+  if (!SaveSCC)
+    return PreferLast ? Last : First;
+
+  LiveRange &LR = LIS->getRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
+  auto MBBE = MBB.end();
+  SlotIndex FirstIdx = First != MBBE ? LIS->getInstructionIndex(*First)
+                                     : LIS->getMBBEndIdx(&MBB);
+  SlotIndex LastIdx =
+      Last != MBBE ? LIS->getInstructionIndex(*Last) : LIS->getMBBEndIdx(&MBB);
+  SlotIndex Idx = PreferLast ? LastIdx : FirstIdx;
+  const LiveRange::Segment *S;
+
+  for (;;) {
+    S = LR.getSegmentContaining(Idx);
+    if (!S)
+      break;
+
+    if (PreferLast) {
+      SlotIndex Next = S->start.getBaseIndex();
+      if (Next < FirstIdx)
+        break;
+      Idx = Next;
+    } else {
+      SlotIndex Next = S->end.getNextIndex().getBaseIndex();
+      if (Next > LastIdx)
+        break;
+      Idx = Next;
+    }
+  }
+
+  MachineBasicBlock::iterator MBBI;
+
+  if (MachineInstr *MI = LIS->getInstructionFromIndex(Idx))
+    MBBI = MI;
+  else {
+    assert(Idx == LIS->getMBBEndIdx(&MBB));
+    MBBI = MBB.end();
+  }
+
+  if (S)
+    MBBI = saveSCC(MBB, MBBI);
+
+  return MBBI;
+}
+
 void SIWholeQuadMode::toExact(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator Before,
                               unsigned SaveWQM, unsigned LiveMaskReg) {
+  MachineInstr *MI;
+
   if (SaveWQM) {
-    BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
-            SaveWQM)
-        .addReg(LiveMaskReg);
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_SAVEEXEC_B64),
+                 SaveWQM)
+             .addReg(LiveMaskReg);
   } else {
-    BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
-            AMDGPU::EXEC)
-        .addReg(AMDGPU::EXEC)
-        .addReg(LiveMaskReg);
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_AND_B64),
+                 AMDGPU::EXEC)
+             .addReg(AMDGPU::EXEC)
+             .addReg(LiveMaskReg);
   }
+
+  LIS->InsertMachineInstrInMaps(*MI);
 }
 
 void SIWholeQuadMode::toWQM(MachineBasicBlock &MBB,
                             MachineBasicBlock::iterator Before,
                             unsigned SavedWQM) {
+  MachineInstr *MI;
+
   if (SavedWQM) {
-    BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
-        .addReg(SavedWQM);
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::COPY), AMDGPU::EXEC)
+             .addReg(SavedWQM);
   } else {
-    BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
-            AMDGPU::EXEC)
-        .addReg(AMDGPU::EXEC);
+    MI = BuildMI(MBB, Before, DebugLoc(), TII->get(AMDGPU::S_WQM_B64),
+                 AMDGPU::EXEC)
+             .addReg(AMDGPU::EXEC);
   }
+
+  LIS->InsertMachineInstrInMaps(*MI);
 }
 
 void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
@@ -395,72 +574,82 @@ void SIWholeQuadMode::processBlock(MachineBasicBlock &MBB, unsigned LiveMaskReg,
   if (!isEntry && !(BI.Needs & StateExact) && BI.OutNeeds != StateExact)
     return;
 
+  DEBUG(dbgs() << "\nProcessing block BB#" << MBB.getNumber() << ":\n");
+
   unsigned SavedWQMReg = 0;
   bool WQMFromExec = isEntry;
   char State = isEntry ? StateExact : StateWQM;
 
   auto II = MBB.getFirstNonPHI(), IE = MBB.end();
-  while (II != IE) {
-    MachineInstr &MI = *II;
-    ++II;
+  if (isEntry)
+    ++II; // Skip the instruction that saves LiveMask
 
-    // Skip instructions that are not affected by EXEC
-    if (TII->isScalarUnit(MI) && !MI.isTerminator())
-      continue;
+  MachineBasicBlock::iterator First = IE;
+  for (;;) {
+    MachineBasicBlock::iterator Next = II;
+    char Needs = 0;
+    char OutNeeds = 0;
 
-    // Generic instructions such as COPY will either disappear by register
-    // coalescing or be lowered to SALU or VALU instructions.
-    if (TargetInstrInfo::isGenericOpcode(MI.getOpcode())) {
-      if (MI.getNumExplicitOperands() >= 1) {
-        const MachineOperand &Op = MI.getOperand(0);
-        if (Op.isReg()) {
-          if (TRI->isSGPRReg(*MRI, Op.getReg())) {
-            // SGPR instructions are not affected by EXEC
-            continue;
-          }
+    if (First == IE)
+      First = II;
+
+    if (II != IE) {
+      MachineInstr &MI = *II;
+
+      if (requiresCorrectState(MI)) {
+        auto III = Instructions.find(&MI);
+        if (III != Instructions.end()) {
+          Needs = III->second.Needs;
+          OutNeeds = III->second.OutNeeds;
         }
       }
-    }
 
-    char Needs = 0;
-    char OutNeeds = 0;
-    auto InstrInfoIt = Instructions.find(&MI);
-    if (InstrInfoIt != Instructions.end()) {
-      Needs = InstrInfoIt->second.Needs;
-      OutNeeds = InstrInfoIt->second.OutNeeds;
-
-      // Make sure to switch to Exact mode before the end of the block when
-      // Exact and only Exact is needed further downstream.
-      if (OutNeeds == StateExact && MI.isTerminator()) {
-        assert(Needs == 0);
+      if (MI.isTerminator() && !Needs && OutNeeds == StateExact)
+        Needs = StateExact;
+
+      if (MI.getOpcode() == AMDGPU::SI_ELSE && BI.OutNeeds == StateExact)
+        MI.getOperand(3).setImm(1);
+
+      ++Next;
+    } else {
+      // End of basic block
+      if (BI.OutNeeds & StateWQM)
+        Needs = StateWQM;
+      else if (BI.OutNeeds == StateExact)
         Needs = StateExact;
-      }
     }
 
-    // State switching
-    if (Needs && State != Needs) {
-      if (Needs == StateExact) {
-        assert(!SavedWQMReg);
+    if (Needs) {
+      if (Needs != State) {
+        MachineBasicBlock::iterator Before =
+            prepareInsertion(MBB, First, II, Needs == StateWQM,
+                             Needs == StateExact || WQMFromExec);
 
-        if (!WQMFromExec && (OutNeeds & StateWQM))
-          SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
+        if (Needs == StateExact) {
+          if (!WQMFromExec && (OutNeeds & StateWQM))
+            SavedWQMReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
 
-        toExact(MBB, &MI, SavedWQMReg, LiveMaskReg);
-      } else {
-        assert(WQMFromExec == (SavedWQMReg == 0));
-        toWQM(MBB, &MI, SavedWQMReg);
-        SavedWQMReg = 0;
+          toExact(MBB, Before, SavedWQMReg, LiveMaskReg);
+        } else {
+          assert(WQMFromExec == (SavedWQMReg == 0));
+
+          toWQM(MBB, Before, SavedWQMReg);
+
+          if (SavedWQMReg) {
+            LIS->createAndComputeVirtRegInterval(SavedWQMReg);
+            SavedWQMReg = 0;
+          }
+        }
+
+        State = Needs;
       }
 
-      State = Needs;
+      First = IE;
     }
-  }
 
-  if ((BI.OutNeeds & StateWQM) && State != StateWQM) {
-    assert(WQMFromExec == (SavedWQMReg == 0));
-    toWQM(MBB, MBB.end(), SavedWQMReg);
-  } else if (BI.OutNeeds == StateExact && State != StateExact) {
-    toExact(MBB, MBB.end(), 0, LiveMaskReg);
+    if (II == IE)
+      break;
+    II = Next;
   }
 }
 
@@ -468,8 +657,11 @@ void SIWholeQuadMode::lowerLiveMaskQueries(unsigned LiveMaskReg) {
   for (MachineInstr *MI : LiveMaskQueries) {
     const DebugLoc &DL = MI->getDebugLoc();
     unsigned Dest = MI->getOperand(0).getReg();
-    BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
-        .addReg(LiveMaskReg);
+    MachineInstr *Copy =
+        BuildMI(*MI->getParent(), MI, DL, TII->get(AMDGPU::COPY), Dest)
+            .addReg(LiveMaskReg);
+
+    LIS->ReplaceMachineInstrInMaps(*MI, *Copy);
     MI->eraseFromParent();
   }
 }
@@ -480,7 +672,6 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
 
   Instructions.clear();
   Blocks.clear();
-  ExecExports.clear();
   LiveMaskQueries.clear();
 
   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
@@ -504,8 +695,10 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
 
     if (GlobalFlags & StateExact || !LiveMaskQueries.empty()) {
       LiveMaskReg = MRI->createVirtualRegister(&AMDGPU::SReg_64RegClass);
-      BuildMI(Entry, EntryMI, DebugLoc(), TII->get(AMDGPU::COPY), LiveMaskReg)
-          .addReg(AMDGPU::EXEC);
+      MachineInstr *MI = BuildMI(Entry, EntryMI, DebugLoc(),
+                                 TII->get(AMDGPU::COPY), LiveMaskReg)
+                             .addReg(AMDGPU::EXEC);
+      LIS->InsertMachineInstrInMaps(*MI);
     }
 
     if (GlobalFlags == StateWQM) {
@@ -520,11 +713,18 @@ bool SIWholeQuadMode::runOnMachineFunction(MachineFunction &MF) {
     }
   }
 
+  DEBUG(printInfo());
+
   lowerLiveMaskQueries(LiveMaskReg);
 
   // Handle the general case
   for (auto BII : Blocks)
     processBlock(*BII.first, LiveMaskReg, BII.first == &*MF.begin());
 
+  // Physical registers like SCC aren't tracked by default anyway, so just
+  // removing the ranges we computed is the simplest option for maintaining
+  // the analysis results.
+  LIS->removeRegUnit(*MCRegUnitIterator(AMDGPU::SCC, TRI));
+
   return true;
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
new file mode 100644
index 000000000000..02656483cd74
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SMInstructions.td
@@ -0,0 +1,535 @@
+//===---- SMInstructions.td - Scalar Memory Instruction Defintions --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def smrd_offset_8 : NamedOperandU32<"SMRDOffset8",
+                                  NamedMatchClass<"SMRDOffset8">> {
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+def smrd_offset_20 : NamedOperandU32<"SMRDOffset20",
+                                  NamedMatchClass<"SMRDOffset20">> {
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+//===----------------------------------------------------------------------===//
+// Scalar Memory classes
+//===----------------------------------------------------------------------===//
+
+class SM_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]> :
+  InstSI <outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+
+  let LGKM_CNT = 1;
+  let SMRD = 1;
+  let mayStore = 0;
+  let mayLoad = 1;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let SchedRW = [WriteSMEM];
+  let SubtargetPredicate = isGCN;
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  bits<1> has_sbase = 1;
+  bits<1> has_sdst = 1;
+  bit has_glc = 0;
+  bits<1> has_offset = 1;
+  bits<1> offset_is_imm = 0;
+}
+
+class SM_Real <SM_Pseudo ps>
+  : InstSI<ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+
+  // encoding
+  bits<7>  sbase;
+  bits<7>  sdst;
+  bits<32> offset;
+  bits<1> imm = !if(ps.has_offset, ps.offset_is_imm, 0);
+}
+
+class SM_Load_Pseudo <string opName, dag outs, dag ins, string asmOps, list<dag> pattern=[]>
+  : SM_Pseudo<opName, outs, ins, asmOps, pattern> {
+  RegisterClass BaseClass;
+  let mayLoad = 1;
+  let mayStore = 0;
+  let has_glc = 1;
+}
+
+class SM_Store_Pseudo <string opName, dag ins, string asmOps, list<dag> pattern = []>
+  : SM_Pseudo<opName, (outs), ins, asmOps, pattern> {
+  RegisterClass BaseClass;
+  RegisterClass SrcClass;
+  let mayLoad = 0;
+  let mayStore = 1;
+  let has_glc = 1;
+  let ScalarStore = 1;
+}
+
+multiclass SM_Pseudo_Loads<string opName,
+                           RegisterClass baseClass,
+                           RegisterClass dstClass> {
+  def _IMM  : SM_Load_Pseudo <opName,
+                              (outs dstClass:$sdst),
+                              (ins baseClass:$sbase, i32imm:$offset, i1imm:$glc),
+                              " $sdst, $sbase, $offset$glc", []> {
+    let offset_is_imm = 1;
+    let BaseClass = baseClass;
+    let PseudoInstr = opName # "_IMM";
+    let has_glc = 1;
+  }
+
+  def _SGPR  : SM_Load_Pseudo <opName,
+                              (outs dstClass:$sdst),
+                              (ins baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
+                              " $sdst, $sbase, $offset$glc", []> {
+    let BaseClass = baseClass;
+    let PseudoInstr = opName # "_SGPR";
+    let has_glc = 1;
+  }
+}
+
+multiclass SM_Pseudo_Stores<string opName,
+                           RegisterClass baseClass,
+                           RegisterClass srcClass> {
+  def _IMM  : SM_Store_Pseudo <opName,
+    (ins srcClass:$sdata, baseClass:$sbase, i32imm:$offset, i1imm:$glc),
+    " $sdata, $sbase, $offset$glc", []> {
+    let offset_is_imm = 1;
+    let BaseClass = baseClass;
+    let SrcClass = srcClass;
+    let PseudoInstr = opName # "_IMM";
+  }
+
+  def _SGPR  : SM_Store_Pseudo <opName,
+    (ins srcClass:$sdata, baseClass:$sbase, SReg_32:$soff, i1imm:$glc),
+    " $sdata, $sbase, $offset$glc", []> {
+    let BaseClass = baseClass;
+    let SrcClass = srcClass;
+    let PseudoInstr = opName # "_SGPR";
+  }
+}
+
+class SM_Time_Pseudo<string opName, SDPatternOperator node> : SM_Pseudo<
+  opName, (outs SReg_64_XEXEC:$sdst), (ins),
+  " $sdst", [(set i64:$sdst, (node))]> {
+  let hasSideEffects = 1;
+  // FIXME: mayStore = ? is a workaround for tablegen bug for different
+  // inferred mayStore flags for the instruction pattern vs. standalone
+  // Pat. Each considers the other contradictory.
+  let mayStore = ?;
+  let mayLoad = ?;
+  let has_sbase = 0;
+  let has_offset = 0;
+}
+
+class SM_Inval_Pseudo <string opName, SDPatternOperator node> : SM_Pseudo<
+  opName, (outs), (ins), "", [(node)]> {
+  let hasSideEffects = 1;
+  let mayStore = 1;
+  let has_sdst = 0;
+  let has_sbase = 0;
+  let has_offset = 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Scalar Memory Instructions
+//===----------------------------------------------------------------------===//
+
+// We are using the SReg_32_XM0 and not the SReg_32 register class for 32-bit
+// SMRD instructions, because the SReg_32_XM0 register class does not include M0
+// and writing to M0 from an SMRD instruction will hang the GPU.
+
+// XXX - SMEM instructions do not allow exec for data operand, but
+// does sdst for SMRD on SI/CI?
+defm S_LOAD_DWORD    : SM_Pseudo_Loads <"s_load_dword", SReg_64, SReg_32_XM0_XEXEC>;
+defm S_LOAD_DWORDX2  : SM_Pseudo_Loads <"s_load_dwordx2", SReg_64, SReg_64_XEXEC>;
+defm S_LOAD_DWORDX4  : SM_Pseudo_Loads <"s_load_dwordx4", SReg_64, SReg_128>;
+defm S_LOAD_DWORDX8  : SM_Pseudo_Loads <"s_load_dwordx8", SReg_64, SReg_256>;
+defm S_LOAD_DWORDX16 : SM_Pseudo_Loads <"s_load_dwordx16", SReg_64, SReg_512>;
+
+defm S_BUFFER_LOAD_DWORD : SM_Pseudo_Loads <
+  "s_buffer_load_dword", SReg_128, SReg_32_XM0_XEXEC
+>;
+
+// FIXME: exec_lo/exec_hi appear to be allowed for SMRD loads on
+// SI/CI, bit disallowed for SMEM on VI.
+defm S_BUFFER_LOAD_DWORDX2 : SM_Pseudo_Loads <
+  "s_buffer_load_dwordx2", SReg_128, SReg_64_XEXEC
+>;
+
+defm S_BUFFER_LOAD_DWORDX4 : SM_Pseudo_Loads <
+  "s_buffer_load_dwordx4", SReg_128, SReg_128
+>;
+
+defm S_BUFFER_LOAD_DWORDX8 : SM_Pseudo_Loads <
+  "s_buffer_load_dwordx8", SReg_128, SReg_256
+>;
+
+defm S_BUFFER_LOAD_DWORDX16 : SM_Pseudo_Loads <
+  "s_buffer_load_dwordx16", SReg_128, SReg_512
+>;
+
+defm S_STORE_DWORD : SM_Pseudo_Stores <"s_store_dword", SReg_64, SReg_32_XM0_XEXEC>;
+defm S_STORE_DWORDX2 : SM_Pseudo_Stores <"s_store_dwordx2", SReg_64, SReg_64_XEXEC>;
+defm S_STORE_DWORDX4 : SM_Pseudo_Stores <"s_store_dwordx4", SReg_64, SReg_128>;
+
+defm S_BUFFER_STORE_DWORD : SM_Pseudo_Stores <
+  "s_buffer_store_dword", SReg_128, SReg_32_XM0_XEXEC
+>;
+
+defm S_BUFFER_STORE_DWORDX2 : SM_Pseudo_Stores <
+  "s_buffer_store_dwordx2", SReg_128, SReg_64_XEXEC
+>;
+
+defm S_BUFFER_STORE_DWORDX4 : SM_Pseudo_Stores <
+  "s_buffer_store_dwordx4", SReg_128, SReg_128
+>;
+
+
+def S_MEMTIME : SM_Time_Pseudo <"s_memtime", int_amdgcn_s_memtime>;
+def S_DCACHE_INV : SM_Inval_Pseudo <"s_dcache_inv", int_amdgcn_s_dcache_inv>;
+
+let SubtargetPredicate = isCIVI in {
+def S_DCACHE_INV_VOL : SM_Inval_Pseudo <"s_dcache_inv_vol", int_amdgcn_s_dcache_inv_vol>;
+} // let SubtargetPredicate = isCIVI
+
+let SubtargetPredicate = isVI in {
+def S_DCACHE_WB     : SM_Inval_Pseudo <"s_dcache_wb", int_amdgcn_s_dcache_wb>;
+def S_DCACHE_WB_VOL : SM_Inval_Pseudo <"s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
+def S_MEMREALTIME   : SM_Time_Pseudo <"s_memrealtime", int_amdgcn_s_memrealtime>;
+} // SubtargetPredicate = isVI
+
+
+
+//===----------------------------------------------------------------------===//
+// Scalar Memory Patterns
+//===----------------------------------------------------------------------===//
+
+
+def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
+  auto Ld = cast<LoadSDNode>(N);
+  return Ld->getAlignment() >= 4  &&
+    ((Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
+    static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
+    (Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS &&
+    static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
+    static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
+}]>;
+
+def SMRDImm         : ComplexPattern<i64, 2, "SelectSMRDImm">;
+def SMRDImm32       : ComplexPattern<i64, 2, "SelectSMRDImm32">;
+def SMRDSgpr        : ComplexPattern<i64, 2, "SelectSMRDSgpr">;
+def SMRDBufferImm   : ComplexPattern<i32, 1, "SelectSMRDBufferImm">;
+def SMRDBufferImm32 : ComplexPattern<i32, 1, "SelectSMRDBufferImm32">;
+def SMRDBufferSgpr  : ComplexPattern<i32, 1, "SelectSMRDBufferSgpr">;
+
+let Predicates = [isGCN] in {
+
+multiclass SMRD_Pattern <string Instr, ValueType vt> {
+
+  // 1. IMM offset
+  def : Pat <
+    (smrd_load (SMRDImm i64:$sbase, i32:$offset)),
+    (vt (!cast<SM_Pseudo>(Instr#"_IMM") $sbase, $offset, 0))
+  >;
+
+  // 2. SGPR offset
+  def : Pat <
+    (smrd_load (SMRDSgpr i64:$sbase, i32:$offset)),
+    (vt (!cast<SM_Pseudo>(Instr#"_SGPR") $sbase, $offset, 0))
+  >;
+}
+
+let Predicates = [isSICI] in {
+def : Pat <
+  (i64 (readcyclecounter)),
+  (S_MEMTIME)
+>;
+}
+
+// Global and constant loads can be selected to either MUBUF or SMRD
+// instructions, but SMRD instructions are faster so we want the instruction
+// selector to prefer those.
+let AddedComplexity = 100 in {
+
+defm : SMRD_Pattern <"S_LOAD_DWORD",    i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX2",  v2i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX4",  v4i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX8",  v8i32>;
+defm : SMRD_Pattern <"S_LOAD_DWORDX16", v16i32>;
+
+// 1. Offset as an immediate
+def SM_LOAD_PATTERN : Pat <  // name this pattern to reuse AddedComplexity on CI
+  (SIload_constant v4i32:$sbase, (SMRDBufferImm i32:$offset)),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, $offset, 0)
+>;
+
+// 2. Offset loaded in an 32bit SGPR
+def : Pat <
+  (SIload_constant v4i32:$sbase, (SMRDBufferSgpr i32:$offset)),
+  (S_BUFFER_LOAD_DWORD_SGPR $sbase, $offset, 0)
+>;
+
+} // End let AddedComplexity = 100
+
+} // let Predicates = [isGCN]
+
+let Predicates = [isVI] in {
+
+// 1. Offset as 20bit DWORD immediate
+def : Pat <
+  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
+  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset), 0)
+>;
+
+def : Pat <
+  (i64 (readcyclecounter)),
+  (S_MEMREALTIME)
+>;
+
+} // let Predicates = [isVI]
+
+
+//===----------------------------------------------------------------------===//
+// Targets
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+class SMRD_Real_si <bits<5> op, SM_Pseudo ps>
+  : SM_Real<ps>
+  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
+  , Enc32 {
+
+  let AssemblerPredicates = [isSICI];
+  let DecoderNamespace = "SICI";
+
+  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, ?);
+  let Inst{8}     = imm;
+  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
+  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
+  let Inst{26-22} = op;
+  let Inst{31-27} = 0x18; //encoding
+}
+
+// FIXME: Assembler should reject trying to use glc on SMRD
+// instructions on SI.
+multiclass SM_Real_Loads_si<bits<5> op, string ps,
+                            SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
+                            SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+
+  def _IMM_si : SMRD_Real_si <op, immPs> {
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_8:$offset, GLC:$glc);
+  }
+
+  // FIXME: The operand name $offset is inconsistent with $soff used
+  // in the pseudo
+  def _SGPR_si : SMRD_Real_si <op, sgprPs> {
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+  }
+
+}
+
+defm S_LOAD_DWORD           : SM_Real_Loads_si <0x00, "S_LOAD_DWORD">;
+defm S_LOAD_DWORDX2         : SM_Real_Loads_si <0x01, "S_LOAD_DWORDX2">;
+defm S_LOAD_DWORDX4         : SM_Real_Loads_si <0x02, "S_LOAD_DWORDX4">;
+defm S_LOAD_DWORDX8         : SM_Real_Loads_si <0x03, "S_LOAD_DWORDX8">;
+defm S_LOAD_DWORDX16        : SM_Real_Loads_si <0x04, "S_LOAD_DWORDX16">;
+defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_si <0x08, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_si <0x09, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_si <0x0a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_si <0x0b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_si <0x0c, "S_BUFFER_LOAD_DWORDX16">;
+
+def S_MEMTIME_si    : SMRD_Real_si <0x1e, S_MEMTIME>;
+def S_DCACHE_INV_si : SMRD_Real_si <0x1f, S_DCACHE_INV>;
+
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class SMEM_Real_vi <bits<8> op, SM_Pseudo ps>
+  : SM_Real<ps>
+  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.VI>
+  , Enc64 {
+  bit glc;
+
+  let AssemblerPredicates = [isVI];
+  let DecoderNamespace = "VI";
+
+  let Inst{5-0}   = !if(ps.has_sbase, sbase{6-1}, ?);
+  let Inst{12-6}  = !if(ps.has_sdst, sdst{6-0}, ?);
+
+  let Inst{16} = !if(ps.has_glc, glc, ?);
+  let Inst{17} = imm;
+  let Inst{25-18} = op;
+  let Inst{31-26} = 0x30; //encoding
+  let Inst{51-32} = !if(ps.has_offset, offset{19-0}, ?);
+}
+
+multiclass SM_Real_Loads_vi<bits<8> op, string ps,
+                            SM_Load_Pseudo immPs = !cast<SM_Load_Pseudo>(ps#_IMM),
+                            SM_Load_Pseudo sgprPs = !cast<SM_Load_Pseudo>(ps#_SGPR)> {
+  def _IMM_vi : SMEM_Real_vi <op, immPs> {
+    let InOperandList = (ins immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+  }
+  def _SGPR_vi : SMEM_Real_vi <op, sgprPs> {
+    let InOperandList = (ins sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+  }
+}
+
+class SMEM_Real_Store_vi <bits<8> op, SM_Pseudo ps> : SMEM_Real_vi <op, ps> {
+  // encoding
+  bits<7> sdata;
+
+  let sdst = ?;
+  let Inst{12-6}  = !if(ps.has_sdst, sdata{6-0}, ?);
+}
+
+multiclass SM_Real_Stores_vi<bits<8> op, string ps,
+                            SM_Store_Pseudo immPs = !cast<SM_Store_Pseudo>(ps#_IMM),
+                            SM_Store_Pseudo sgprPs = !cast<SM_Store_Pseudo>(ps#_SGPR)> {
+  // FIXME: The operand name $offset is inconsistent with $soff used
+  // in the pseudo
+  def _IMM_vi : SMEM_Real_Store_vi <op, immPs> {
+    let InOperandList = (ins immPs.SrcClass:$sdata, immPs.BaseClass:$sbase, smrd_offset_20:$offset, GLC:$glc);
+  }
+
+  def _SGPR_vi : SMEM_Real_Store_vi <op, sgprPs> {
+    let InOperandList = (ins sgprPs.SrcClass:$sdata, sgprPs.BaseClass:$sbase, SReg_32:$offset, GLC:$glc);
+  }
+}
+
+defm S_LOAD_DWORD           : SM_Real_Loads_vi <0x00, "S_LOAD_DWORD">;
+defm S_LOAD_DWORDX2         : SM_Real_Loads_vi <0x01, "S_LOAD_DWORDX2">;
+defm S_LOAD_DWORDX4         : SM_Real_Loads_vi <0x02, "S_LOAD_DWORDX4">;
+defm S_LOAD_DWORDX8         : SM_Real_Loads_vi <0x03, "S_LOAD_DWORDX8">;
+defm S_LOAD_DWORDX16        : SM_Real_Loads_vi <0x04, "S_LOAD_DWORDX16">;
+defm S_BUFFER_LOAD_DWORD    : SM_Real_Loads_vi <0x08, "S_BUFFER_LOAD_DWORD">;
+defm S_BUFFER_LOAD_DWORDX2  : SM_Real_Loads_vi <0x09, "S_BUFFER_LOAD_DWORDX2">;
+defm S_BUFFER_LOAD_DWORDX4  : SM_Real_Loads_vi <0x0a, "S_BUFFER_LOAD_DWORDX4">;
+defm S_BUFFER_LOAD_DWORDX8  : SM_Real_Loads_vi <0x0b, "S_BUFFER_LOAD_DWORDX8">;
+defm S_BUFFER_LOAD_DWORDX16 : SM_Real_Loads_vi <0x0c, "S_BUFFER_LOAD_DWORDX16">;
+
+defm S_STORE_DWORD : SM_Real_Stores_vi <0x10, "S_STORE_DWORD">;
+defm S_STORE_DWORDX2 : SM_Real_Stores_vi <0x11, "S_STORE_DWORDX2">;
+defm S_STORE_DWORDX4 : SM_Real_Stores_vi <0x12, "S_STORE_DWORDX4">;
+
+defm S_BUFFER_STORE_DWORD    : SM_Real_Stores_vi <0x18, "S_BUFFER_STORE_DWORD">;
+defm S_BUFFER_STORE_DWORDX2  : SM_Real_Stores_vi <0x19, "S_BUFFER_STORE_DWORDX2">;
+defm S_BUFFER_STORE_DWORDX4  : SM_Real_Stores_vi <0x1a, "S_BUFFER_STORE_DWORDX4">;
+
+// These instructions use same encoding
+def S_DCACHE_INV_vi         : SMEM_Real_vi <0x20, S_DCACHE_INV>;
+def S_DCACHE_WB_vi          : SMEM_Real_vi <0x21, S_DCACHE_WB>;
+def S_DCACHE_INV_VOL_vi     : SMEM_Real_vi <0x22, S_DCACHE_INV_VOL>;
+def S_DCACHE_WB_VOL_vi      : SMEM_Real_vi <0x23, S_DCACHE_WB_VOL>;
+def S_MEMTIME_vi            : SMEM_Real_vi <0x24, S_MEMTIME>;
+def S_MEMREALTIME_vi        : SMEM_Real_vi <0x25, S_MEMREALTIME>;
+
+
+//===----------------------------------------------------------------------===//
+// CI
+//===----------------------------------------------------------------------===//
+
+def smrd_literal_offset : NamedOperandU32<"SMRDLiteralOffset",
+                                          NamedMatchClass<"SMRDLiteralOffset">> {
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+class SMRD_Real_Load_IMM_ci <bits<5> op, SM_Load_Pseudo ps> :
+  SM_Real<ps>,
+  Enc64 {
+
+  let AssemblerPredicates = [isCIOnly];
+  let DecoderNamespace = "CI";
+  let InOperandList = (ins ps.BaseClass:$sbase, smrd_literal_offset:$offset, GLC:$glc);
+
+  let LGKM_CNT = ps.LGKM_CNT;
+  let SMRD = ps.SMRD;
+  let mayLoad = ps.mayLoad;
+  let mayStore = ps.mayStore;
+  let hasSideEffects = ps.hasSideEffects;
+  let SchedRW = ps.SchedRW;
+  let UseNamedOperandTable = ps.UseNamedOperandTable;
+
+  let Inst{7-0}   = 0xff;
+  let Inst{8}     = 0;
+  let Inst{14-9}  = sbase{6-1};
+  let Inst{21-15} = sdst{6-0};
+  let Inst{26-22} = op;
+  let Inst{31-27} = 0x18; //encoding
+  let Inst{63-32} = offset{31-0};
+}
+
+def S_LOAD_DWORD_IMM_ci           : SMRD_Real_Load_IMM_ci <0x00, S_LOAD_DWORD_IMM>;
+def S_LOAD_DWORDX2_IMM_ci         : SMRD_Real_Load_IMM_ci <0x01, S_LOAD_DWORDX2_IMM>;
+def S_LOAD_DWORDX4_IMM_ci         : SMRD_Real_Load_IMM_ci <0x02, S_LOAD_DWORDX4_IMM>;
+def S_LOAD_DWORDX8_IMM_ci         : SMRD_Real_Load_IMM_ci <0x03, S_LOAD_DWORDX8_IMM>;
+def S_LOAD_DWORDX16_IMM_ci        : SMRD_Real_Load_IMM_ci <0x04, S_LOAD_DWORDX16_IMM>;
+def S_BUFFER_LOAD_DWORD_IMM_ci    : SMRD_Real_Load_IMM_ci <0x08, S_BUFFER_LOAD_DWORD_IMM>;
+def S_BUFFER_LOAD_DWORDX2_IMM_ci  : SMRD_Real_Load_IMM_ci <0x09, S_BUFFER_LOAD_DWORDX2_IMM>;
+def S_BUFFER_LOAD_DWORDX4_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0a, S_BUFFER_LOAD_DWORDX4_IMM>;
+def S_BUFFER_LOAD_DWORDX8_IMM_ci  : SMRD_Real_Load_IMM_ci <0x0b, S_BUFFER_LOAD_DWORDX8_IMM>;
+def S_BUFFER_LOAD_DWORDX16_IMM_ci : SMRD_Real_Load_IMM_ci <0x0c, S_BUFFER_LOAD_DWORDX16_IMM>;
+
+class SMRD_Real_ci <bits<5> op, SM_Pseudo ps>
+  : SM_Real<ps>
+  , SIMCInstr<ps.PseudoInstr, SIEncodingFamily.SI>
+  , Enc32 {
+
+  let AssemblerPredicates = [isCIOnly];
+  let DecoderNamespace = "CI";
+
+  let Inst{7-0}   = !if(ps.has_offset, offset{7-0}, ?);
+  let Inst{8}     = imm;
+  let Inst{14-9}  = !if(ps.has_sbase, sbase{6-1}, ?);
+  let Inst{21-15} = !if(ps.has_sdst, sdst{6-0}, ?);
+  let Inst{26-22} = op;
+  let Inst{31-27} = 0x18; //encoding
+}
+
+def S_DCACHE_INV_VOL_ci : SMRD_Real_ci <0x1d, S_DCACHE_INV_VOL>;
+
+let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity in {
+
+class SMRD_Pattern_ci <string Instr, ValueType vt> : Pat <
+  (smrd_load (SMRDImm32 i64:$sbase, i32:$offset)),
+  (vt (!cast<SM_Pseudo>(Instr#"_IMM_ci") $sbase, $offset, 0))> {
+  let Predicates = [isCIOnly];
+}
+
+def : SMRD_Pattern_ci <"S_LOAD_DWORD",    i32>;
+def : SMRD_Pattern_ci <"S_LOAD_DWORDX2",  v2i32>;
+def : SMRD_Pattern_ci <"S_LOAD_DWORDX4",  v4i32>;
+def : SMRD_Pattern_ci <"S_LOAD_DWORDX8",  v8i32>;
+def : SMRD_Pattern_ci <"S_LOAD_DWORDX16", v16i32>;
+
+def : Pat <
+  (SIload_constant v4i32:$sbase, (SMRDBufferImm32 i32:$offset)),
+  (S_BUFFER_LOAD_DWORD_IMM_ci $sbase, $offset, 0)> {
+  let Predicates = [isCI]; // should this be isCIOnly?
+}
+
+} // End let AddedComplexity = SM_LOAD_PATTERN.AddedComplexity
+
diff --git a/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
new file mode 100644
index 000000000000..0aeb1297d3a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -0,0 +1,1229 @@
+//===-- SOPInstructions.td - SOP Instruction Defintions -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+def GPRIdxModeMatchClass : AsmOperandClass {
+  let Name = "GPRIdxMode";
+  let PredicateMethod = "isGPRIdxMode";
+  let RenderMethod = "addImmOperands";
+}
+
+def GPRIdxMode : Operand<i32> {
+  let PrintMethod = "printVGPRIndexMode";
+  let ParserMatchClass = GPRIdxModeMatchClass;
+  let OperandType = "OPERAND_IMMEDIATE";
+}
+
+//===----------------------------------------------------------------------===//
+// SOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+class SOP1_Pseudo <string opName, dag outs, dag ins,
+                   string asmOps, list<dag> pattern=[]> :
+  InstSI <outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let SubtargetPredicate = isGCN;
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOP1 = 1;
+  let SchedRW = [WriteSALU];
+  let Size = 4;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  bits<1> has_src0 = 1;
+  bits<1> has_sdst = 1;
+}
+
+class SOP1_Real<bits<8> op, SOP1_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList,
+          ps.Mnemonic # " " # ps.AsmOperands, []>,
+  Enc32 {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+  let Size = 4;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+
+  // encoding
+  bits<7> sdst;
+  bits<8> src0;
+
+  let Inst{7-0} = !if(ps.has_src0, src0, ?);
+  let Inst{15-8} = op;
+  let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
+  let Inst{31-23} = 0x17d; //encoding;
+}
+
+class SOP1_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0),
+  "$sdst, $src0", pattern
+>;
+
+// 32-bit input, no output.
+class SOP1_0_32 <string opName, list<dag> pattern = []> : SOP1_Pseudo <
+  opName, (outs), (ins SSrc_b32:$src0),
+  "$src0", pattern> {
+  let has_sdst = 0;
+}
+
+class SOP1_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0),
+  "$sdst, $src0", pattern
+>;
+
+// 64-bit input, 32-bit output.
+class SOP1_32_64 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs SReg_32:$sdst), (ins SSrc_b64:$src0),
+  "$sdst, $src0", pattern
+>;
+
+// 32-bit input, 64-bit output.
+class SOP1_64_32 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0),
+  "$sdst, $src0", pattern
+>;
+
+// no input, 64-bit output.
+class SOP1_64_0 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs SReg_64:$sdst), (ins), "$sdst", pattern> {
+  let has_src0 = 0;
+}
+
+// 64-bit input, no output
+class SOP1_1 <string opName, list<dag> pattern=[]> : SOP1_Pseudo <
+  opName, (outs), (ins SReg_64:$src0), "$src0", pattern> {
+  let has_sdst = 0;
+}
+
+
+let isMoveImm = 1 in {
+  let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+    def S_MOV_B32 : SOP1_32 <"s_mov_b32">;
+    def S_MOV_B64 : SOP1_64 <"s_mov_b64">;
+  } // End isRematerializeable = 1
+
+  let Uses = [SCC] in {
+    def S_CMOV_B32 : SOP1_32 <"s_cmov_b32">;
+    def S_CMOV_B64 : SOP1_64 <"s_cmov_b64">;
+  } // End Uses = [SCC]
+} // End isMoveImm = 1
+
+let Defs = [SCC] in {
+  def S_NOT_B32 : SOP1_32 <"s_not_b32",
+    [(set i32:$sdst, (not i32:$src0))]
+  >;
+
+  def S_NOT_B64 : SOP1_64 <"s_not_b64",
+    [(set i64:$sdst, (not i64:$src0))]
+  >;
+  def S_WQM_B32 : SOP1_32 <"s_wqm_b32">;
+  def S_WQM_B64 : SOP1_64 <"s_wqm_b64">;
+} // End Defs = [SCC]
+
+
+def S_BREV_B32 : SOP1_32 <"s_brev_b32",
+  [(set i32:$sdst, (bitreverse i32:$src0))]
+>;
+def S_BREV_B64 : SOP1_64 <"s_brev_b64">;
+
+let Defs = [SCC] in {
+def S_BCNT0_I32_B32 : SOP1_32 <"s_bcnt0_i32_b32">;
+def S_BCNT0_I32_B64 : SOP1_32_64 <"s_bcnt0_i32_b64">;
+def S_BCNT1_I32_B32 : SOP1_32 <"s_bcnt1_i32_b32",
+  [(set i32:$sdst, (ctpop i32:$src0))]
+>;
+def S_BCNT1_I32_B64 : SOP1_32_64 <"s_bcnt1_i32_b64">;
+} // End Defs = [SCC]
+
+def S_FF0_I32_B32 : SOP1_32 <"s_ff0_i32_b32">;
+def S_FF0_I32_B64 : SOP1_32_64 <"s_ff0_i32_b64">;
+def S_FF1_I32_B32 : SOP1_32 <"s_ff1_i32_b32",
+  [(set i32:$sdst, (cttz_zero_undef i32:$src0))]
+>;
+def S_FF1_I32_B64 : SOP1_32_64 <"s_ff1_i32_b64">;
+
+def S_FLBIT_I32_B32 : SOP1_32 <"s_flbit_i32_b32",
+  [(set i32:$sdst, (AMDGPUffbh_u32 i32:$src0))]
+>;
+
+def S_FLBIT_I32_B64 : SOP1_32_64 <"s_flbit_i32_b64">;
+def S_FLBIT_I32 : SOP1_32 <"s_flbit_i32",
+  [(set i32:$sdst, (AMDGPUffbh_i32 i32:$src0))]
+>;
+def S_FLBIT_I32_I64 : SOP1_32_64 <"s_flbit_i32_i64">;
+def S_SEXT_I32_I8 : SOP1_32 <"s_sext_i32_i8",
+  [(set i32:$sdst, (sext_inreg i32:$src0, i8))]
+>;
+def S_SEXT_I32_I16 : SOP1_32 <"s_sext_i32_i16",
+  [(set i32:$sdst, (sext_inreg i32:$src0, i16))]
+>;
+
+def S_BITSET0_B32 : SOP1_32    <"s_bitset0_b32">;
+def S_BITSET0_B64 : SOP1_64_32 <"s_bitset0_b64">;
+def S_BITSET1_B32 : SOP1_32    <"s_bitset1_b32">;
+def S_BITSET1_B64 : SOP1_64_32 <"s_bitset1_b64">;
+def S_GETPC_B64 : SOP1_64_0  <"s_getpc_b64">;
+
+let isTerminator = 1, isBarrier = 1,
+    isBranch = 1, isIndirectBranch = 1 in {
+def S_SETPC_B64 : SOP1_1  <"s_setpc_b64">;
+}
+def S_SWAPPC_B64 : SOP1_64 <"s_swappc_b64">;
+def S_RFE_B64 : SOP1_1  <"s_rfe_b64">;
+
+let hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC] in {
+
+def S_AND_SAVEEXEC_B64 : SOP1_64 <"s_and_saveexec_b64">;
+def S_OR_SAVEEXEC_B64 : SOP1_64 <"s_or_saveexec_b64">;
+def S_XOR_SAVEEXEC_B64 : SOP1_64 <"s_xor_saveexec_b64">;
+def S_ANDN2_SAVEEXEC_B64 : SOP1_64 <"s_andn2_saveexec_b64">;
+def S_ORN2_SAVEEXEC_B64 : SOP1_64 <"s_orn2_saveexec_b64">;
+def S_NAND_SAVEEXEC_B64 : SOP1_64 <"s_nand_saveexec_b64">;
+def S_NOR_SAVEEXEC_B64 : SOP1_64 <"s_nor_saveexec_b64">;
+def S_XNOR_SAVEEXEC_B64 : SOP1_64 <"s_xnor_saveexec_b64">;
+
+} // End hasSideEffects = 1, Uses = [EXEC], Defs = [EXEC, SCC]
+
+def S_QUADMASK_B32 : SOP1_32 <"s_quadmask_b32">;
+def S_QUADMASK_B64 : SOP1_64 <"s_quadmask_b64">;
+
+let Uses = [M0] in {
+def S_MOVRELS_B32 : SOP1_32 <"s_movrels_b32">;
+def S_MOVRELS_B64 : SOP1_64 <"s_movrels_b64">;
+def S_MOVRELD_B32 : SOP1_32 <"s_movreld_b32">;
+def S_MOVRELD_B64 : SOP1_64 <"s_movreld_b64">;
+} // End Uses = [M0]
+
+def S_CBRANCH_JOIN : SOP1_1  <"s_cbranch_join">;
+def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
+let Defs = [SCC] in {
+def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
+} // End Defs = [SCC]
+def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">;
+
+let SubtargetPredicate = HasVGPRIndexMode in {
+def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
+  let Uses = [M0];
+  let Defs = [M0];
+}
+}
+
+//===----------------------------------------------------------------------===//
+// SOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+class SOP2_Pseudo<string opName, dag outs, dag ins,
+                  string asmOps, list<dag> pattern=[]> :
+  InstSI<outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let SubtargetPredicate = isGCN;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOP2 = 1;
+  let SchedRW = [WriteSALU];
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  bits<1> has_sdst = 1;
+
+  // Pseudo instructions have no encodings, but adding this field here allows
+  // us to do:
+  // let sdst = xxx in {
+  // for multiclasses that include both real and pseudo instructions.
+  // field bits<7> sdst = 0;
+  // let Size = 4; // Do we need size here?
+}
+
+class SOP2_Real<bits<7> op, SOP2_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList,
+          ps.Mnemonic # " " # ps.AsmOperands, []>,
+  Enc32 {
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+
+  // encoding
+  bits<7> sdst;
+  bits<8> src0;
+  bits<8> src1;
+
+  let Inst{7-0}   = src0;
+  let Inst{15-8}  = src1;
+  let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
+  let Inst{29-23} = op;
+  let Inst{31-30} = 0x2; // encoding
+}
+
+
+class SOP2_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+  opName, (outs SReg_32:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1),
+  "$sdst, $src0, $src1", pattern
+>;
+
+class SOP2_64 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+  opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b64:$src1),
+  "$sdst, $src0, $src1", pattern
+>;
+
+class SOP2_64_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+  opName, (outs SReg_64:$sdst), (ins SSrc_b64:$src0, SSrc_b32:$src1),
+  "$sdst, $src0, $src1", pattern
+>;
+
+class SOP2_64_32_32 <string opName, list<dag> pattern=[]> : SOP2_Pseudo <
+  opName, (outs SReg_64:$sdst), (ins SSrc_b32:$src0, SSrc_b32:$src1),
+  "$sdst, $src0, $src1", pattern
+>;
+
+let Defs = [SCC] in { // Carry out goes to SCC
+let isCommutable = 1 in {
+def S_ADD_U32 : SOP2_32 <"s_add_u32">;
+def S_ADD_I32 : SOP2_32 <"s_add_i32",
+  [(set i32:$sdst, (add SSrc_b32:$src0, SSrc_b32:$src1))]
+>;
+} // End isCommutable = 1
+
+def S_SUB_U32 : SOP2_32 <"s_sub_u32">;
+def S_SUB_I32 : SOP2_32 <"s_sub_i32",
+  [(set i32:$sdst, (sub SSrc_b32:$src0, SSrc_b32:$src1))]
+>;
+
+let Uses = [SCC] in { // Carry in comes from SCC
+let isCommutable = 1 in {
+def S_ADDC_U32 : SOP2_32 <"s_addc_u32",
+  [(set i32:$sdst, (adde (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+} // End isCommutable = 1
+
+def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
+  [(set i32:$sdst, (sube (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
+} // End Uses = [SCC]
+
+
+let isCommutable = 1 in {
+def S_MIN_I32 : SOP2_32 <"s_min_i32",
+  [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
+>;
+def S_MIN_U32 : SOP2_32 <"s_min_u32",
+  [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
+>;
+def S_MAX_I32 : SOP2_32 <"s_max_i32",
+  [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
+>;
+def S_MAX_U32 : SOP2_32 <"s_max_u32",
+  [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
+>;
+} // End isCommutable = 1
+} // End Defs = [SCC]
+
+
+let Uses = [SCC] in {
+  def S_CSELECT_B32 : SOP2_32 <"s_cselect_b32">;
+  def S_CSELECT_B64 : SOP2_64 <"s_cselect_b64">;
+} // End Uses = [SCC]
+
+let Defs = [SCC] in {
+let isCommutable = 1 in {
+def S_AND_B32 : SOP2_32 <"s_and_b32",
+  [(set i32:$sdst, (and i32:$src0, i32:$src1))]
+>;
+
+def S_AND_B64 : SOP2_64 <"s_and_b64",
+  [(set i64:$sdst, (and i64:$src0, i64:$src1))]
+>;
+
+def S_OR_B32 : SOP2_32 <"s_or_b32",
+  [(set i32:$sdst, (or i32:$src0, i32:$src1))]
+>;
+
+def S_OR_B64 : SOP2_64 <"s_or_b64",
+  [(set i64:$sdst, (or i64:$src0, i64:$src1))]
+>;
+
+def S_XOR_B32 : SOP2_32 <"s_xor_b32",
+  [(set i32:$sdst, (xor i32:$src0, i32:$src1))]
+>;
+
+def S_XOR_B64 : SOP2_64 <"s_xor_b64",
+  [(set i64:$sdst, (xor i64:$src0, i64:$src1))]
+>;
+} // End isCommutable = 1
+
+def S_ANDN2_B32 : SOP2_32 <"s_andn2_b32">;
+def S_ANDN2_B64 : SOP2_64 <"s_andn2_b64">;
+def S_ORN2_B32 : SOP2_32 <"s_orn2_b32">;
+def S_ORN2_B64 : SOP2_64 <"s_orn2_b64">;
+def S_NAND_B32 : SOP2_32 <"s_nand_b32">;
+def S_NAND_B64 : SOP2_64 <"s_nand_b64">;
+def S_NOR_B32 : SOP2_32 <"s_nor_b32">;
+def S_NOR_B64 : SOP2_64 <"s_nor_b64">;
+def S_XNOR_B32 : SOP2_32 <"s_xnor_b32">;
+def S_XNOR_B64 : SOP2_64 <"s_xnor_b64">;
+} // End Defs = [SCC]
+
+// Use added complexity so these patterns are preferred to the VALU patterns.
+let AddedComplexity = 1 in {
+
+let Defs = [SCC] in {
+def S_LSHL_B32 : SOP2_32 <"s_lshl_b32",
+  [(set i32:$sdst, (shl i32:$src0, i32:$src1))]
+>;
+def S_LSHL_B64 : SOP2_64_32 <"s_lshl_b64",
+  [(set i64:$sdst, (shl i64:$src0, i32:$src1))]
+>;
+def S_LSHR_B32 : SOP2_32 <"s_lshr_b32",
+  [(set i32:$sdst, (srl i32:$src0, i32:$src1))]
+>;
+def S_LSHR_B64 : SOP2_64_32 <"s_lshr_b64",
+  [(set i64:$sdst, (srl i64:$src0, i32:$src1))]
+>;
+def S_ASHR_I32 : SOP2_32 <"s_ashr_i32",
+  [(set i32:$sdst, (sra i32:$src0, i32:$src1))]
+>;
+def S_ASHR_I64 : SOP2_64_32 <"s_ashr_i64",
+  [(set i64:$sdst, (sra i64:$src0, i32:$src1))]
+>;
+} // End Defs = [SCC]
+
+def S_BFM_B32 : SOP2_32 <"s_bfm_b32",
+  [(set i32:$sdst, (AMDGPUbfm i32:$src0, i32:$src1))]>;
+def S_BFM_B64 : SOP2_64_32_32 <"s_bfm_b64">;
+def S_MUL_I32 : SOP2_32 <"s_mul_i32",
+  [(set i32:$sdst, (mul i32:$src0, i32:$src1))]> {
+  let isCommutable = 1;
+}
+
+} // End AddedComplexity = 1
+
+let Defs = [SCC] in {
+def S_BFE_U32 : SOP2_32 <"s_bfe_u32">;
+def S_BFE_I32 : SOP2_32 <"s_bfe_i32">;
+def S_BFE_U64 : SOP2_64_32 <"s_bfe_u64">;
+def S_BFE_I64 : SOP2_64_32 <"s_bfe_i64">;
+} // End Defs = [SCC]
+
+def S_CBRANCH_G_FORK : SOP2_Pseudo <
+  "s_cbranch_g_fork", (outs),
+  (ins SReg_64:$src0, SReg_64:$src1),
+  "$src0, $src1"
+> {
+  let has_sdst = 0;
+}
+
+let Defs = [SCC] in {
+def S_ABSDIFF_I32 : SOP2_32 <"s_absdiff_i32">;
+} // End Defs = [SCC]
+
+
+//===----------------------------------------------------------------------===//
+// SOPK Instructions
+//===----------------------------------------------------------------------===//
+
+class SOPK_Pseudo <string opName, dag outs, dag ins,
+                   string asmOps, list<dag> pattern=[]> :
+  InstSI <outs, ins, "", pattern>,
+  SIMCInstr<opName, SIEncodingFamily.NONE> {
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let SubtargetPredicate = isGCN;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPK = 1;
+  let SchedRW = [WriteSALU];
+  let UseNamedOperandTable = 1;
+  string Mnemonic = opName;
+  string AsmOperands = asmOps;
+
+  bits<1> has_sdst = 1;
+}
+
+class SOPK_Real<bits<5> op, SOPK_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList,
+          ps.Mnemonic # " " # ps.AsmOperands, []> {
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let DisableEncoding    = ps.DisableEncoding;
+  let Constraints        = ps.Constraints;
+
+  // encoding
+  bits<7>  sdst;
+  bits<16> simm16;
+  bits<32> imm;
+}
+
+class SOPK_Real32<bits<5> op, SOPK_Pseudo ps> :
+  SOPK_Real <op, ps>,
+  Enc32 {
+  let Inst{15-0}  = simm16;
+  let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb; //encoding
+}
+
+class SOPK_Real64<bits<5> op, SOPK_Pseudo ps> :
+  SOPK_Real<op, ps>,
+  Enc64 {
+  let Inst{15-0}  = simm16;
+  let Inst{22-16} = !if(ps.has_sdst, sdst, ?);
+  let Inst{27-23} = op;
+  let Inst{31-28} = 0xb; //encoding
+  let Inst{63-32} = imm;
+}
+
+class SOPKInstTable <bit is_sopk, string cmpOp = ""> {
+  bit IsSOPK = is_sopk;
+  string BaseCmpOp = cmpOp;
+}
+
+class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+  opName,
+  (outs SReg_32:$sdst),
+  (ins u16imm:$simm16),
+  "$sdst, $simm16",
+  pattern>;
+
+class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo <
+  opName,
+  (outs),
+  (ins SReg_32:$sdst, u16imm:$simm16),
+  "$sdst, $simm16", []>,
+  SOPKInstTable<1, base_op>{
+  let Defs = [SCC];
+}
+
+class SOPK_32TIE <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+  opName,
+  (outs SReg_32:$sdst),
+  (ins SReg_32:$src0, u16imm:$simm16),
+  "$sdst, $simm16",
+  pattern
+>;
+
+let isReMaterializable = 1, isMoveImm = 1 in {
+def S_MOVK_I32 : SOPK_32 <"s_movk_i32">;
+} // End isReMaterializable = 1
+let Uses = [SCC] in {
+def S_CMOVK_I32 : SOPK_32 <"s_cmovk_i32">;
+}
+
+let isCompare = 1 in {
+
+// This instruction is disabled for now until we can figure out how to teach
+// the instruction selector to correctly use the  S_CMP* vs V_CMP*
+// instructions.
+//
+// When this instruction is enabled the code generator sometimes produces this
+// invalid sequence:
+//
+// SCC = S_CMPK_EQ_I32 SGPR0, imm
+// VCC = COPY SCC
+// VGPR0 = V_CNDMASK VCC, VGPR0, VGPR1
+//
+// def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32",
+//   [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
+// >;
+
+def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">;
+def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">;
+def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">;
+def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">;
+def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">;
+def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">;
+
+let SOPKZext = 1 in {
+def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">;
+def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">;
+def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">;
+def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">;
+def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">;
+def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">;
+} // End SOPKZext = 1
+} // End isCompare = 1
+
+let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
+    Constraints = "$sdst = $src0" in {
+  def S_ADDK_I32 : SOPK_32TIE <"s_addk_i32">;
+  def S_MULK_I32 : SOPK_32TIE <"s_mulk_i32">;
+}
+
+def S_CBRANCH_I_FORK : SOPK_Pseudo <
+  "s_cbranch_i_fork",
+  (outs), (ins SReg_64:$sdst, u16imm:$simm16),
+  "$sdst, $simm16"
+>;
+
+let mayLoad = 1 in {
+def S_GETREG_B32 : SOPK_Pseudo <
+  "s_getreg_b32",
+  (outs SReg_32:$sdst), (ins hwreg:$simm16),
+  "$sdst, $simm16"
+>;
+}
+
+let hasSideEffects = 1 in {
+
+def S_SETREG_B32 : SOPK_Pseudo <
+  "s_setreg_b32",
+  (outs), (ins SReg_32:$sdst, hwreg:$simm16),
+  "$simm16, $sdst",
+  [(AMDGPUsetreg i32:$sdst, (i16 timm:$simm16))]
+>;
+
+// FIXME: Not on SI?
+//def S_GETREG_REGRD_B32 : SOPK_32 <sopk<0x14, 0x13>, "s_getreg_regrd_b32">;
+
+def S_SETREG_IMM32_B32 : SOPK_Pseudo <
+  "s_setreg_imm32_b32",
+  (outs), (ins i32imm:$imm, hwreg:$simm16),
+  "$simm16, $imm"> {
+  let Size = 8; // Unlike every other SOPK instruction.
+  let has_sdst = 0;
+}
+
+} // End hasSideEffects = 1
+
+//===----------------------------------------------------------------------===//
+// SOPC Instructions
+//===----------------------------------------------------------------------===//
+
+class SOPCe <bits<7> op> : Enc32 {
+  bits<8> src0;
+  bits<8> src1;
+
+  let Inst{7-0} = src0;
+  let Inst{15-8} = src1;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17e;
+}
+
+class SOPC <bits<7> op, dag outs, dag ins, string asm,
+            list<dag> pattern = []> :
+  InstSI<outs, ins, asm, pattern>, SOPCe <op> {
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPC = 1;
+  let isCodeGenOnly = 0;
+  let Defs = [SCC];
+  let SchedRW = [WriteSALU];
+  let UseNamedOperandTable = 1;
+  let SubtargetPredicate = isGCN;
+}
+
+class SOPC_Base <bits<7> op, RegisterOperand rc0, RegisterOperand rc1,
+                 string opName, list<dag> pattern = []> : SOPC <
+  op, (outs), (ins rc0:$src0, rc1:$src1),
+  opName#" $src0, $src1", pattern > {
+  let Defs = [SCC];
+}
+class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
+                    string opName, PatLeaf cond> : SOPC_Base <
+  op, rc, rc, opName,
+  [(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
+}
+
+class SOPC_CMP_32<bits<7> op, string opName,
+                  PatLeaf cond = COND_NULL, string revOp = opName>
+  : SOPC_Helper<op, SSrc_b32, i32, opName, cond>,
+    Commutable_REV<revOp, !eq(revOp, opName)>,
+    SOPKInstTable<0, opName> {
+  let isCompare = 1;
+  let isCommutable = 1;
+}
+
+class SOPC_CMP_64<bits<7> op, string opName,
+                  PatLeaf cond = COND_NULL, string revOp = opName>
+  : SOPC_Helper<op, SSrc_b64, i64, opName, cond>,
+    Commutable_REV<revOp, !eq(revOp, opName)> {
+  let isCompare = 1;
+  let isCommutable = 1;
+}
+
+class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
+  : SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>;
+
+class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
+  : SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
+
+def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32">;
+def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32">;
+def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
+def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
+def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
+def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">;
+def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>;
+def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>;
+def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>;
+def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>;
+def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">;
+def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">;
+
+def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
+def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
+def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;
+def S_BITCMP1_B64 : SOPC_64_32 <0x0f, "s_bitcmp1_b64">;
+def S_SETVSKIP : SOPC_32 <0x10, "s_setvskip">;
+
+let SubtargetPredicate = isVI in {
+def S_CMP_EQ_U64 : SOPC_CMP_64 <0x12, "s_cmp_eq_u64", COND_EQ>;
+def S_CMP_LG_U64 : SOPC_CMP_64 <0x13, "s_cmp_lg_u64", COND_NE>;
+}
+
+let SubtargetPredicate = HasVGPRIndexMode in {
+def S_SET_GPR_IDX_ON : SOPC <0x11,
+  (outs),
+  (ins SSrc_b32:$src0, GPRIdxMode:$src1),
+  "s_set_gpr_idx_on $src0,$src1"> {
+  let Defs = [M0]; // No scc def
+  let Uses = [M0]; // Other bits of m0 unmodified.
+  let hasSideEffects = 1; // Sets mode.gpr_idx_en
+  let FixedSize = 1;
+}
+}
+
+//===----------------------------------------------------------------------===//
+// SOPP Instructions
+//===----------------------------------------------------------------------===//
+
+class SOPPe <bits<7> op> : Enc32 {
+  bits <16> simm16;
+
+  let Inst{15-0} = simm16;
+  let Inst{22-16} = op;
+  let Inst{31-23} = 0x17f; // encoding
+}
+
+class SOPP <bits<7> op, dag ins, string asm, list<dag> pattern = []> :
+  InstSI <(outs), ins, asm, pattern >, SOPPe <op> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SALU = 1;
+  let SOPP = 1;
+  let Size = 4;
+  let SchedRW = [WriteSALU];
+
+  let UseNamedOperandTable = 1;
+  let SubtargetPredicate = isGCN;
+}
+
+
+def S_NOP : SOPP <0x00000000, (ins i16imm:$simm16), "s_nop $simm16">;
+
+let isTerminator = 1 in {
+
+def S_ENDPGM : SOPP <0x00000001, (ins), "s_endpgm",
+  [(AMDGPUendpgm)]> {
+  let simm16 = 0;
+  let isBarrier = 1;
+  let isReturn = 1;
+}
+
+let isBranch = 1, SchedRW = [WriteBranch] in {
+def S_BRANCH : SOPP <
+  0x00000002, (ins sopp_brtarget:$simm16), "s_branch $simm16",
+  [(br bb:$simm16)]> {
+  let isBarrier = 1;
+}
+
+let Uses = [SCC] in {
+def S_CBRANCH_SCC0 : SOPP <
+  0x00000004, (ins sopp_brtarget:$simm16),
+  "s_cbranch_scc0 $simm16"
+>;
+def S_CBRANCH_SCC1 : SOPP <
+  0x00000005, (ins sopp_brtarget:$simm16),
+  "s_cbranch_scc1 $simm16",
+  [(si_uniform_br_scc SCC, bb:$simm16)]
+>;
+} // End Uses = [SCC]
+
+let Uses = [VCC] in {
+def S_CBRANCH_VCCZ : SOPP <
+  0x00000006, (ins sopp_brtarget:$simm16),
+  "s_cbranch_vccz $simm16"
+>;
+def S_CBRANCH_VCCNZ : SOPP <
+  0x00000007, (ins sopp_brtarget:$simm16),
+  "s_cbranch_vccnz $simm16"
+>;
+} // End Uses = [VCC]
+
+let Uses = [EXEC] in {
+def S_CBRANCH_EXECZ : SOPP <
+  0x00000008, (ins sopp_brtarget:$simm16),
+  "s_cbranch_execz $simm16"
+>;
+def S_CBRANCH_EXECNZ : SOPP <
+  0x00000009, (ins sopp_brtarget:$simm16),
+  "s_cbranch_execnz $simm16"
+>;
+} // End Uses = [EXEC]
+
+
+} // End isBranch = 1
+} // End isTerminator = 1
+
+let hasSideEffects = 1 in {
+def S_BARRIER : SOPP <0x0000000a, (ins), "s_barrier",
+  [(int_amdgcn_s_barrier)]> {
+  let SchedRW = [WriteBarrier];
+  let simm16 = 0;
+  let mayLoad = 1;
+  let mayStore = 1;
+  let isConvergent = 1;
+}
+
+let mayLoad = 1, mayStore = 1, hasSideEffects = 1 in
+def S_WAITCNT : SOPP <0x0000000c, (ins WAIT_FLAG:$simm16), "s_waitcnt $simm16">;
+def S_SETHALT : SOPP <0x0000000d, (ins i16imm:$simm16), "s_sethalt $simm16">;
+
+// On SI the documentation says sleep for approximately 64 * low 2
+// bits, consistent with the reported maximum of 448. On VI the
+// maximum reported is 960 cycles, so 960 / 64 = 15 max, so is the
+// maximum really 15 on VI?
+def S_SLEEP : SOPP <0x0000000e, (ins i32imm:$simm16),
+  "s_sleep $simm16", [(int_amdgcn_s_sleep SIMM16bit:$simm16)]> {
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+
+def S_SETPRIO : SOPP <0x0000000f, (ins i16imm:$simm16), "s_setprio $simm16">;
+
+let Uses = [EXEC, M0] in {
+// FIXME: Should this be mayLoad+mayStore?
+def S_SENDMSG : SOPP <0x00000010, (ins SendMsgImm:$simm16), "s_sendmsg $simm16",
+  [(AMDGPUsendmsg (i32 imm:$simm16))]
+>;
+} // End Uses = [EXEC, M0]
+
+def S_SENDMSGHALT : SOPP <0x00000011, (ins SendMsgImm:$simm16), "s_sendmsghalt $simm16">;
+def S_TRAP : SOPP <0x00000012, (ins i16imm:$simm16), "s_trap $simm16">;
+def S_ICACHE_INV : SOPP <0x00000013, (ins), "s_icache_inv"> {
+  let simm16 = 0;
+}
+def S_INCPERFLEVEL : SOPP <0x00000014, (ins i32imm:$simm16), "s_incperflevel $simm16",
+  [(int_amdgcn_s_incperflevel SIMM16bit:$simm16)]> {
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+def S_DECPERFLEVEL : SOPP <0x00000015, (ins i32imm:$simm16), "s_decperflevel $simm16",
+  [(int_amdgcn_s_decperflevel SIMM16bit:$simm16)]> {
+  let hasSideEffects = 1;
+  let mayLoad = 1;
+  let mayStore = 1;
+}
+def S_TTRACEDATA : SOPP <0x00000016, (ins), "s_ttracedata"> {
+  let simm16 = 0;
+}
+
+let SubtargetPredicate = HasVGPRIndexMode in {
+def S_SET_GPR_IDX_OFF : SOPP<0x1c, (ins), "s_set_gpr_idx_off"> {
+  let simm16 = 0;
+}
+}
+} // End hasSideEffects
+
+let SubtargetPredicate = HasVGPRIndexMode in {
+def S_SET_GPR_IDX_MODE : SOPP<0x1d, (ins GPRIdxMode:$simm16),
+  "s_set_gpr_idx_mode$simm16"> {
+  let Defs = [M0];
+}
+}
+
+let Predicates = [isGCN] in {
+
+//===----------------------------------------------------------------------===//
+// S_GETREG_B32 Intrinsic Pattern.
+//===----------------------------------------------------------------------===//
+def : Pat <
+  (int_amdgcn_s_getreg imm:$simm16),
+  (S_GETREG_B32 (as_i16imm $simm16))
+>;
+
+//===----------------------------------------------------------------------===//
+// SOP1 Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (i64 (ctpop i64:$src)),
+    (i64 (REG_SEQUENCE SReg_64,
+     (i32 (COPY_TO_REGCLASS (S_BCNT1_I32_B64 $src), SReg_32)), sub0,
+     (S_MOV_B32 (i32 0)), sub1))
+>;
+
+def : Pat <
+  (i32 (smax i32:$x, (i32 (ineg i32:$x)))),
+  (S_ABS_I32 $x)
+>;
+
+def : Pat <
+  (i16 imm:$imm),
+  (S_MOV_B32 imm:$imm)
+>;
+
+// Same as a 32-bit inreg
+def : Pat<
+  (i32 (sext i16:$src)),
+  (S_SEXT_I32_I16 $src)
+>;
+
+
+//===----------------------------------------------------------------------===//
+// SOP2 Patterns
+//===----------------------------------------------------------------------===//
+
+// V_ADD_I32_e32/S_ADD_U32 produces carry in VCC/SCC. For the vector
+// case, the sgpr-copies pass will fix this to use the vector version.
+def : Pat <
+  (i32 (addc i32:$src0, i32:$src1)),
+  (S_ADD_U32 $src0, $src1)
+>;
+
+// FIXME: We need to use COPY_TO_REGCLASS to work-around the fact that
+// REG_SEQUENCE patterns don't support instructions with multiple
+// outputs.
+def : Pat<
+  (i64 (zext i16:$src)),
+    (REG_SEQUENCE SReg_64,
+      (i32 (COPY_TO_REGCLASS (S_AND_B32 $src, (S_MOV_B32 (i32 0xffff))), SGPR_32)), sub0,
+      (S_MOV_B32 (i32 0)), sub1)
+>;
+
+def : Pat <
+  (i64 (sext i16:$src)),
+    (REG_SEQUENCE SReg_64, (i32 (S_SEXT_I32_I16 $src)), sub0,
+    (i32 (COPY_TO_REGCLASS (S_ASHR_I32 (i32 (S_SEXT_I32_I16 $src)), (S_MOV_B32 (i32 31))), SGPR_32)), sub1)
+>;
+
+def : Pat<
+  (i32 (zext i16:$src)),
+  (S_AND_B32 (S_MOV_B32 (i32 0xffff)), $src)
+>;
+
+
+
+//===----------------------------------------------------------------------===//
+// SOPP Patterns
+//===----------------------------------------------------------------------===//
+
+def : Pat <
+  (int_amdgcn_s_waitcnt i32:$simm16),
+  (S_WAITCNT (as_i16imm $simm16))
+>;
+
+} // End isGCN predicate
+
+
+//===----------------------------------------------------------------------===//
+// Real target instructions, move this to the appropriate subtarget TD file
+//===----------------------------------------------------------------------===//
+
+class Select_si<string opName> :
+  SIMCInstr<opName, SIEncodingFamily.SI> {
+  list<Predicate> AssemblerPredicates = [isSICI];
+  string DecoderNamespace = "SICI";
+}
+
+class SOP1_Real_si<bits<8> op, SOP1_Pseudo ps> :
+  SOP1_Real<op, ps>,
+  Select_si<ps.Mnemonic>;
+
+class SOP2_Real_si<bits<7> op, SOP2_Pseudo ps> :
+  SOP2_Real<op, ps>,
+  Select_si<ps.Mnemonic>;
+
+class SOPK_Real_si<bits<5> op, SOPK_Pseudo ps> :
+  SOPK_Real32<op, ps>,
+  Select_si<ps.Mnemonic>;
+
+def S_MOV_B32_si           : SOP1_Real_si <0x03, S_MOV_B32>;
+def S_MOV_B64_si           : SOP1_Real_si <0x04, S_MOV_B64>;
+def S_CMOV_B32_si          : SOP1_Real_si <0x05, S_CMOV_B32>;
+def S_CMOV_B64_si          : SOP1_Real_si <0x06, S_CMOV_B64>;
+def S_NOT_B32_si           : SOP1_Real_si <0x07, S_NOT_B32>;
+def S_NOT_B64_si           : SOP1_Real_si <0x08, S_NOT_B64>;
+def S_WQM_B32_si           : SOP1_Real_si <0x09, S_WQM_B32>;
+def S_WQM_B64_si           : SOP1_Real_si <0x0a, S_WQM_B64>;
+def S_BREV_B32_si          : SOP1_Real_si <0x0b, S_BREV_B32>;
+def S_BREV_B64_si          : SOP1_Real_si <0x0c, S_BREV_B64>;
+def S_BCNT0_I32_B32_si     : SOP1_Real_si <0x0d, S_BCNT0_I32_B32>;
+def S_BCNT0_I32_B64_si     : SOP1_Real_si <0x0e, S_BCNT0_I32_B64>;
+def S_BCNT1_I32_B32_si     : SOP1_Real_si <0x0f, S_BCNT1_I32_B32>;
+def S_BCNT1_I32_B64_si     : SOP1_Real_si <0x10, S_BCNT1_I32_B64>;
+def S_FF0_I32_B32_si       : SOP1_Real_si <0x11, S_FF0_I32_B32>;
+def S_FF0_I32_B64_si       : SOP1_Real_si <0x12, S_FF0_I32_B64>;
+def S_FF1_I32_B32_si       : SOP1_Real_si <0x13, S_FF1_I32_B32>;
+def S_FF1_I32_B64_si       : SOP1_Real_si <0x14, S_FF1_I32_B64>;
+def S_FLBIT_I32_B32_si     : SOP1_Real_si <0x15, S_FLBIT_I32_B32>;
+def S_FLBIT_I32_B64_si     : SOP1_Real_si <0x16, S_FLBIT_I32_B64>;
+def S_FLBIT_I32_si         : SOP1_Real_si <0x17, S_FLBIT_I32>;
+def S_FLBIT_I32_I64_si     : SOP1_Real_si <0x18, S_FLBIT_I32_I64>;
+def S_SEXT_I32_I8_si       : SOP1_Real_si <0x19, S_SEXT_I32_I8>;
+def S_SEXT_I32_I16_si      : SOP1_Real_si <0x1a, S_SEXT_I32_I16>;
+def S_BITSET0_B32_si       : SOP1_Real_si <0x1b, S_BITSET0_B32>;
+def S_BITSET0_B64_si       : SOP1_Real_si <0x1c, S_BITSET0_B64>;
+def S_BITSET1_B32_si       : SOP1_Real_si <0x1d, S_BITSET1_B32>;
+def S_BITSET1_B64_si       : SOP1_Real_si <0x1e, S_BITSET1_B64>;
+def S_GETPC_B64_si         : SOP1_Real_si <0x1f, S_GETPC_B64>;
+def S_SETPC_B64_si         : SOP1_Real_si <0x20, S_SETPC_B64>;
+def S_SWAPPC_B64_si        : SOP1_Real_si <0x21, S_SWAPPC_B64>;
+def S_RFE_B64_si           : SOP1_Real_si <0x22, S_RFE_B64>;
+def S_AND_SAVEEXEC_B64_si  : SOP1_Real_si <0x24, S_AND_SAVEEXEC_B64>;
+def S_OR_SAVEEXEC_B64_si   : SOP1_Real_si <0x25, S_OR_SAVEEXEC_B64>;
+def S_XOR_SAVEEXEC_B64_si  : SOP1_Real_si <0x26, S_XOR_SAVEEXEC_B64>;
+def S_ANDN2_SAVEEXEC_B64_si: SOP1_Real_si <0x27, S_ANDN2_SAVEEXEC_B64>;
+def S_ORN2_SAVEEXEC_B64_si : SOP1_Real_si <0x28, S_ORN2_SAVEEXEC_B64>;
+def S_NAND_SAVEEXEC_B64_si : SOP1_Real_si <0x29, S_NAND_SAVEEXEC_B64>;
+def S_NOR_SAVEEXEC_B64_si  : SOP1_Real_si <0x2a, S_NOR_SAVEEXEC_B64>;
+def S_XNOR_SAVEEXEC_B64_si : SOP1_Real_si <0x2b, S_XNOR_SAVEEXEC_B64>;
+def S_QUADMASK_B32_si      : SOP1_Real_si <0x2c, S_QUADMASK_B32>;
+def S_QUADMASK_B64_si      : SOP1_Real_si <0x2d, S_QUADMASK_B64>;
+def S_MOVRELS_B32_si       : SOP1_Real_si <0x2e, S_MOVRELS_B32>;
+def S_MOVRELS_B64_si       : SOP1_Real_si <0x2f, S_MOVRELS_B64>;
+def S_MOVRELD_B32_si       : SOP1_Real_si <0x30, S_MOVRELD_B32>;
+def S_MOVRELD_B64_si       : SOP1_Real_si <0x31, S_MOVRELD_B64>;
+def S_CBRANCH_JOIN_si      : SOP1_Real_si <0x32, S_CBRANCH_JOIN>;
+def S_MOV_REGRD_B32_si     : SOP1_Real_si <0x33, S_MOV_REGRD_B32>;
+def S_ABS_I32_si           : SOP1_Real_si <0x34, S_ABS_I32>;
+def S_MOV_FED_B32_si       : SOP1_Real_si <0x35, S_MOV_FED_B32>;
+
+def S_ADD_U32_si           : SOP2_Real_si <0x00, S_ADD_U32>;
+def S_ADD_I32_si           : SOP2_Real_si <0x02, S_ADD_I32>;
+def S_SUB_U32_si           : SOP2_Real_si <0x01, S_SUB_U32>;
+def S_SUB_I32_si           : SOP2_Real_si <0x03, S_SUB_I32>;
+def S_ADDC_U32_si          : SOP2_Real_si <0x04, S_ADDC_U32>;
+def S_SUBB_U32_si          : SOP2_Real_si <0x05, S_SUBB_U32>;
+def S_MIN_I32_si           : SOP2_Real_si <0x06, S_MIN_I32>;
+def S_MIN_U32_si           : SOP2_Real_si <0x07, S_MIN_U32>;
+def S_MAX_I32_si           : SOP2_Real_si <0x08, S_MAX_I32>;
+def S_MAX_U32_si           : SOP2_Real_si <0x09, S_MAX_U32>;
+def S_CSELECT_B32_si       : SOP2_Real_si <0x0a, S_CSELECT_B32>;
+def S_CSELECT_B64_si       : SOP2_Real_si <0x0b, S_CSELECT_B64>;
+def S_AND_B32_si           : SOP2_Real_si <0x0e, S_AND_B32>;
+def S_AND_B64_si           : SOP2_Real_si <0x0f, S_AND_B64>;
+def S_OR_B32_si            : SOP2_Real_si <0x10, S_OR_B32>;
+def S_OR_B64_si            : SOP2_Real_si <0x11, S_OR_B64>;
+def S_XOR_B32_si           : SOP2_Real_si <0x12, S_XOR_B32>;
+def S_XOR_B64_si           : SOP2_Real_si <0x13, S_XOR_B64>;
+def S_ANDN2_B32_si         : SOP2_Real_si <0x14, S_ANDN2_B32>;
+def S_ANDN2_B64_si         : SOP2_Real_si <0x15, S_ANDN2_B64>;
+def S_ORN2_B32_si          : SOP2_Real_si <0x16, S_ORN2_B32>;
+def S_ORN2_B64_si          : SOP2_Real_si <0x17, S_ORN2_B64>;
+def S_NAND_B32_si          : SOP2_Real_si <0x18, S_NAND_B32>;
+def S_NAND_B64_si          : SOP2_Real_si <0x19, S_NAND_B64>;
+def S_NOR_B32_si           : SOP2_Real_si <0x1a, S_NOR_B32>;
+def S_NOR_B64_si           : SOP2_Real_si <0x1b, S_NOR_B64>;
+def S_XNOR_B32_si          : SOP2_Real_si <0x1c, S_XNOR_B32>;
+def S_XNOR_B64_si          : SOP2_Real_si <0x1d, S_XNOR_B64>;
+def S_LSHL_B32_si          : SOP2_Real_si <0x1e, S_LSHL_B32>;
+def S_LSHL_B64_si          : SOP2_Real_si <0x1f, S_LSHL_B64>;
+def S_LSHR_B32_si          : SOP2_Real_si <0x20, S_LSHR_B32>;
+def S_LSHR_B64_si          : SOP2_Real_si <0x21, S_LSHR_B64>;
+def S_ASHR_I32_si          : SOP2_Real_si <0x22, S_ASHR_I32>;
+def S_ASHR_I64_si          : SOP2_Real_si <0x23, S_ASHR_I64>;
+def S_BFM_B32_si           : SOP2_Real_si <0x24, S_BFM_B32>;
+def S_BFM_B64_si           : SOP2_Real_si <0x25, S_BFM_B64>;
+def S_MUL_I32_si           : SOP2_Real_si <0x26, S_MUL_I32>;
+def S_BFE_U32_si           : SOP2_Real_si <0x27, S_BFE_U32>;
+def S_BFE_I32_si           : SOP2_Real_si <0x28, S_BFE_I32>;
+def S_BFE_U64_si           : SOP2_Real_si <0x29, S_BFE_U64>;
+def S_BFE_I64_si           : SOP2_Real_si <0x2a, S_BFE_I64>;
+def S_CBRANCH_G_FORK_si    : SOP2_Real_si <0x2b, S_CBRANCH_G_FORK>;
+def S_ABSDIFF_I32_si       : SOP2_Real_si <0x2c, S_ABSDIFF_I32>;
+
+def S_MOVK_I32_si          : SOPK_Real_si <0x00, S_MOVK_I32>;
+def S_CMOVK_I32_si         : SOPK_Real_si <0x02, S_CMOVK_I32>;
+def S_CMPK_EQ_I32_si       : SOPK_Real_si <0x03, S_CMPK_EQ_I32>;
+def S_CMPK_LG_I32_si       : SOPK_Real_si <0x04, S_CMPK_LG_I32>;
+def S_CMPK_GT_I32_si       : SOPK_Real_si <0x05, S_CMPK_GT_I32>;
+def S_CMPK_GE_I32_si       : SOPK_Real_si <0x06, S_CMPK_GE_I32>;
+def S_CMPK_LT_I32_si       : SOPK_Real_si <0x07, S_CMPK_LT_I32>;
+def S_CMPK_LE_I32_si       : SOPK_Real_si <0x08, S_CMPK_LE_I32>;
+def S_CMPK_EQ_U32_si       : SOPK_Real_si <0x09, S_CMPK_EQ_U32>;
+def S_CMPK_LG_U32_si       : SOPK_Real_si <0x0a, S_CMPK_LG_U32>;
+def S_CMPK_GT_U32_si       : SOPK_Real_si <0x0b, S_CMPK_GT_U32>;
+def S_CMPK_GE_U32_si       : SOPK_Real_si <0x0c, S_CMPK_GE_U32>;
+def S_CMPK_LT_U32_si       : SOPK_Real_si <0x0d, S_CMPK_LT_U32>;
+def S_CMPK_LE_U32_si       : SOPK_Real_si <0x0e, S_CMPK_LE_U32>;
+def S_ADDK_I32_si          : SOPK_Real_si <0x0f, S_ADDK_I32>;
+def S_MULK_I32_si          : SOPK_Real_si <0x10, S_MULK_I32>;
+def S_CBRANCH_I_FORK_si    : SOPK_Real_si <0x11, S_CBRANCH_I_FORK>;
+def S_GETREG_B32_si        : SOPK_Real_si <0x12, S_GETREG_B32>;
+def S_SETREG_B32_si        : SOPK_Real_si <0x13, S_SETREG_B32>;
+//def S_GETREG_REGRD_B32_si  : SOPK_Real_si <0x14, S_GETREG_REGRD_B32>; // see pseudo for comments
+def S_SETREG_IMM32_B32_si  : SOPK_Real64<0x15, S_SETREG_IMM32_B32>,
+                             Select_si<S_SETREG_IMM32_B32.Mnemonic>;
+
+
+class Select_vi<string opName> :
+  SIMCInstr<opName, SIEncodingFamily.VI> {
+  list<Predicate> AssemblerPredicates = [isVI];
+  string DecoderNamespace = "VI";
+}
+
+class SOP1_Real_vi<bits<8> op, SOP1_Pseudo ps> :
+  SOP1_Real<op, ps>,
+  Select_vi<ps.Mnemonic>;
+
+
+class SOP2_Real_vi<bits<7> op, SOP2_Pseudo ps> :
+  SOP2_Real<op, ps>,
+  Select_vi<ps.Mnemonic>;
+
+class SOPK_Real_vi<bits<5> op, SOPK_Pseudo ps> :
+  SOPK_Real32<op, ps>,
+  Select_vi<ps.Mnemonic>;
+
+def S_MOV_B32_vi           : SOP1_Real_vi <0x00, S_MOV_B32>;
+def S_MOV_B64_vi           : SOP1_Real_vi <0x01, S_MOV_B64>;
+def S_CMOV_B32_vi          : SOP1_Real_vi <0x02, S_CMOV_B32>;
+def S_CMOV_B64_vi          : SOP1_Real_vi <0x03, S_CMOV_B64>;
+def S_NOT_B32_vi           : SOP1_Real_vi <0x04, S_NOT_B32>;
+def S_NOT_B64_vi           : SOP1_Real_vi <0x05, S_NOT_B64>;
+def S_WQM_B32_vi           : SOP1_Real_vi <0x06, S_WQM_B32>;
+def S_WQM_B64_vi           : SOP1_Real_vi <0x07, S_WQM_B64>;
+def S_BREV_B32_vi          : SOP1_Real_vi <0x08, S_BREV_B32>;
+def S_BREV_B64_vi          : SOP1_Real_vi <0x09, S_BREV_B64>;
+def S_BCNT0_I32_B32_vi     : SOP1_Real_vi <0x0a, S_BCNT0_I32_B32>;
+def S_BCNT0_I32_B64_vi     : SOP1_Real_vi <0x0b, S_BCNT0_I32_B64>;
+def S_BCNT1_I32_B32_vi     : SOP1_Real_vi <0x0c, S_BCNT1_I32_B32>;
+def S_BCNT1_I32_B64_vi     : SOP1_Real_vi <0x0d, S_BCNT1_I32_B64>;
+def S_FF0_I32_B32_vi       : SOP1_Real_vi <0x0e, S_FF0_I32_B32>;
+def S_FF0_I32_B64_vi       : SOP1_Real_vi <0x0f, S_FF0_I32_B64>;
+def S_FF1_I32_B32_vi       : SOP1_Real_vi <0x10, S_FF1_I32_B32>;
+def S_FF1_I32_B64_vi       : SOP1_Real_vi <0x11, S_FF1_I32_B64>;
+def S_FLBIT_I32_B32_vi     : SOP1_Real_vi <0x12, S_FLBIT_I32_B32>;
+def S_FLBIT_I32_B64_vi     : SOP1_Real_vi <0x13, S_FLBIT_I32_B64>;
+def S_FLBIT_I32_vi         : SOP1_Real_vi <0x14, S_FLBIT_I32>;
+def S_FLBIT_I32_I64_vi     : SOP1_Real_vi <0x15, S_FLBIT_I32_I64>;
+def S_SEXT_I32_I8_vi       : SOP1_Real_vi <0x16, S_SEXT_I32_I8>;
+def S_SEXT_I32_I16_vi      : SOP1_Real_vi <0x17, S_SEXT_I32_I16>;
+def S_BITSET0_B32_vi       : SOP1_Real_vi <0x18, S_BITSET0_B32>;
+def S_BITSET0_B64_vi       : SOP1_Real_vi <0x19, S_BITSET0_B64>;
+def S_BITSET1_B32_vi       : SOP1_Real_vi <0x1a, S_BITSET1_B32>;
+def S_BITSET1_B64_vi       : SOP1_Real_vi <0x1b, S_BITSET1_B64>;
+def S_GETPC_B64_vi         : SOP1_Real_vi <0x1c, S_GETPC_B64>;
+def S_SETPC_B64_vi         : SOP1_Real_vi <0x1d, S_SETPC_B64>;
+def S_SWAPPC_B64_vi        : SOP1_Real_vi <0x1e, S_SWAPPC_B64>;
+def S_RFE_B64_vi           : SOP1_Real_vi <0x1f, S_RFE_B64>;
+def S_AND_SAVEEXEC_B64_vi  : SOP1_Real_vi <0x20, S_AND_SAVEEXEC_B64>;
+def S_OR_SAVEEXEC_B64_vi   : SOP1_Real_vi <0x21, S_OR_SAVEEXEC_B64>;
+def S_XOR_SAVEEXEC_B64_vi  : SOP1_Real_vi <0x22, S_XOR_SAVEEXEC_B64>;
+def S_ANDN2_SAVEEXEC_B64_vi: SOP1_Real_vi <0x23, S_ANDN2_SAVEEXEC_B64>;
+def S_ORN2_SAVEEXEC_B64_vi : SOP1_Real_vi <0x24, S_ORN2_SAVEEXEC_B64>;
+def S_NAND_SAVEEXEC_B64_vi : SOP1_Real_vi <0x25, S_NAND_SAVEEXEC_B64>;
+def S_NOR_SAVEEXEC_B64_vi  : SOP1_Real_vi <0x26, S_NOR_SAVEEXEC_B64>;
+def S_XNOR_SAVEEXEC_B64_vi : SOP1_Real_vi <0x27, S_XNOR_SAVEEXEC_B64>;
+def S_QUADMASK_B32_vi      : SOP1_Real_vi <0x28, S_QUADMASK_B32>;
+def S_QUADMASK_B64_vi      : SOP1_Real_vi <0x29, S_QUADMASK_B64>;
+def S_MOVRELS_B32_vi       : SOP1_Real_vi <0x2a, S_MOVRELS_B32>;
+def S_MOVRELS_B64_vi       : SOP1_Real_vi <0x2b, S_MOVRELS_B64>;
+def S_MOVRELD_B32_vi       : SOP1_Real_vi <0x2c, S_MOVRELD_B32>;
+def S_MOVRELD_B64_vi       : SOP1_Real_vi <0x2d, S_MOVRELD_B64>;
+def S_CBRANCH_JOIN_vi      : SOP1_Real_vi <0x2e, S_CBRANCH_JOIN>;
+def S_MOV_REGRD_B32_vi     : SOP1_Real_vi <0x2f, S_MOV_REGRD_B32>;
+def S_ABS_I32_vi           : SOP1_Real_vi <0x30, S_ABS_I32>;
+def S_MOV_FED_B32_vi       : SOP1_Real_vi <0x31, S_MOV_FED_B32>;
+def S_SET_GPR_IDX_IDX_vi   : SOP1_Real_vi <0x32, S_SET_GPR_IDX_IDX>;
+
+def S_ADD_U32_vi           : SOP2_Real_vi <0x00, S_ADD_U32>;
+def S_ADD_I32_vi           : SOP2_Real_vi <0x02, S_ADD_I32>;
+def S_SUB_U32_vi           : SOP2_Real_vi <0x01, S_SUB_U32>;
+def S_SUB_I32_vi           : SOP2_Real_vi <0x03, S_SUB_I32>;
+def S_ADDC_U32_vi          : SOP2_Real_vi <0x04, S_ADDC_U32>;
+def S_SUBB_U32_vi          : SOP2_Real_vi <0x05, S_SUBB_U32>;
+def S_MIN_I32_vi           : SOP2_Real_vi <0x06, S_MIN_I32>;
+def S_MIN_U32_vi           : SOP2_Real_vi <0x07, S_MIN_U32>;
+def S_MAX_I32_vi           : SOP2_Real_vi <0x08, S_MAX_I32>;
+def S_MAX_U32_vi           : SOP2_Real_vi <0x09, S_MAX_U32>;
+def S_CSELECT_B32_vi       : SOP2_Real_vi <0x0a, S_CSELECT_B32>;
+def S_CSELECT_B64_vi       : SOP2_Real_vi <0x0b, S_CSELECT_B64>;
+def S_AND_B32_vi           : SOP2_Real_vi <0x0c, S_AND_B32>;
+def S_AND_B64_vi           : SOP2_Real_vi <0x0d, S_AND_B64>;
+def S_OR_B32_vi            : SOP2_Real_vi <0x0e, S_OR_B32>;
+def S_OR_B64_vi            : SOP2_Real_vi <0x0f, S_OR_B64>;
+def S_XOR_B32_vi           : SOP2_Real_vi <0x10, S_XOR_B32>;
+def S_XOR_B64_vi           : SOP2_Real_vi <0x11, S_XOR_B64>;
+def S_ANDN2_B32_vi         : SOP2_Real_vi <0x12, S_ANDN2_B32>;
+def S_ANDN2_B64_vi         : SOP2_Real_vi <0x13, S_ANDN2_B64>;
+def S_ORN2_B32_vi          : SOP2_Real_vi <0x14, S_ORN2_B32>;
+def S_ORN2_B64_vi          : SOP2_Real_vi <0x15, S_ORN2_B64>;
+def S_NAND_B32_vi          : SOP2_Real_vi <0x16, S_NAND_B32>;
+def S_NAND_B64_vi          : SOP2_Real_vi <0x17, S_NAND_B64>;
+def S_NOR_B32_vi           : SOP2_Real_vi <0x18, S_NOR_B32>;
+def S_NOR_B64_vi           : SOP2_Real_vi <0x19, S_NOR_B64>;
+def S_XNOR_B32_vi          : SOP2_Real_vi <0x1a, S_XNOR_B32>;
+def S_XNOR_B64_vi          : SOP2_Real_vi <0x1b, S_XNOR_B64>;
+def S_LSHL_B32_vi          : SOP2_Real_vi <0x1c, S_LSHL_B32>;
+def S_LSHL_B64_vi          : SOP2_Real_vi <0x1d, S_LSHL_B64>;
+def S_LSHR_B32_vi          : SOP2_Real_vi <0x1e, S_LSHR_B32>;
+def S_LSHR_B64_vi          : SOP2_Real_vi <0x1f, S_LSHR_B64>;
+def S_ASHR_I32_vi          : SOP2_Real_vi <0x20, S_ASHR_I32>;
+def S_ASHR_I64_vi          : SOP2_Real_vi <0x21, S_ASHR_I64>;
+def S_BFM_B32_vi           : SOP2_Real_vi <0x22, S_BFM_B32>;
+def S_BFM_B64_vi           : SOP2_Real_vi <0x23, S_BFM_B64>;
+def S_MUL_I32_vi           : SOP2_Real_vi <0x24, S_MUL_I32>;
+def S_BFE_U32_vi           : SOP2_Real_vi <0x25, S_BFE_U32>;
+def S_BFE_I32_vi           : SOP2_Real_vi <0x26, S_BFE_I32>;
+def S_BFE_U64_vi           : SOP2_Real_vi <0x27, S_BFE_U64>;
+def S_BFE_I64_vi           : SOP2_Real_vi <0x28, S_BFE_I64>;
+def S_CBRANCH_G_FORK_vi    : SOP2_Real_vi <0x29, S_CBRANCH_G_FORK>;
+def S_ABSDIFF_I32_vi       : SOP2_Real_vi <0x2a, S_ABSDIFF_I32>;
+
+def S_MOVK_I32_vi          : SOPK_Real_vi <0x00, S_MOVK_I32>;
+def S_CMOVK_I32_vi         : SOPK_Real_vi <0x01, S_CMOVK_I32>;
+def S_CMPK_EQ_I32_vi       : SOPK_Real_vi <0x02, S_CMPK_EQ_I32>;
+def S_CMPK_LG_I32_vi       : SOPK_Real_vi <0x03, S_CMPK_LG_I32>;
+def S_CMPK_GT_I32_vi       : SOPK_Real_vi <0x04, S_CMPK_GT_I32>;
+def S_CMPK_GE_I32_vi       : SOPK_Real_vi <0x05, S_CMPK_GE_I32>;
+def S_CMPK_LT_I32_vi       : SOPK_Real_vi <0x06, S_CMPK_LT_I32>;
+def S_CMPK_LE_I32_vi       : SOPK_Real_vi <0x07, S_CMPK_LE_I32>;
+def S_CMPK_EQ_U32_vi       : SOPK_Real_vi <0x08, S_CMPK_EQ_U32>;
+def S_CMPK_LG_U32_vi       : SOPK_Real_vi <0x09, S_CMPK_LG_U32>;
+def S_CMPK_GT_U32_vi       : SOPK_Real_vi <0x0A, S_CMPK_GT_U32>;
+def S_CMPK_GE_U32_vi       : SOPK_Real_vi <0x0B, S_CMPK_GE_U32>;
+def S_CMPK_LT_U32_vi       : SOPK_Real_vi <0x0C, S_CMPK_LT_U32>;
+def S_CMPK_LE_U32_vi       : SOPK_Real_vi <0x0D, S_CMPK_LE_U32>;
+def S_ADDK_I32_vi          : SOPK_Real_vi <0x0E, S_ADDK_I32>;
+def S_MULK_I32_vi          : SOPK_Real_vi <0x0F, S_MULK_I32>;
+def S_CBRANCH_I_FORK_vi    : SOPK_Real_vi <0x10, S_CBRANCH_I_FORK>;
+def S_GETREG_B32_vi        : SOPK_Real_vi <0x11, S_GETREG_B32>;
+def S_SETREG_B32_vi        : SOPK_Real_vi <0x12, S_SETREG_B32>;
+//def S_GETREG_REGRD_B32_vi  : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments
+def S_SETREG_IMM32_B32_vi  : SOPK_Real64<0x14, S_SETREG_IMM32_B32>,
+                             Select_vi<S_SETREG_IMM32_B32.Mnemonic>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
index 2112135aa5d4..9908fc003ce7 100644
--- a/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp
@@ -18,13 +18,20 @@ using namespace llvm;
 
 /// \brief The target which suports all AMD GPUs.  This will eventually
 ///         be deprecated and there will be a R600 target and a GCN target.
-Target llvm::TheAMDGPUTarget;
+Target &llvm::getTheAMDGPUTarget() {
+  static Target TheAMDGPUTarget;
+  return TheAMDGPUTarget;
+}
 /// \brief The target for GCN GPUs
-Target llvm::TheGCNTarget;
+Target &llvm::getTheGCNTarget() {
+  static Target TheGCNTarget;
+  return TheGCNTarget;
+}
 
 /// \brief Extern function to initialize the targets for the AMDGPU backend
 extern "C" void LLVMInitializeAMDGPUTargetInfo() {
-  RegisterTarget<Triple::r600, false>
-    R600(TheAMDGPUTarget, "r600", "AMD GPUs HD2XXX-HD6XXX");
-  RegisterTarget<Triple::amdgcn, false> GCN(TheGCNTarget, "amdgcn", "AMD GCN GPUs");
+  RegisterTarget<Triple::r600, false> R600(getTheAMDGPUTarget(), "r600",
+                                           "AMD GPUs HD2XXX-HD6XXX");
+  RegisterTarget<Triple::amdgcn, false> GCN(getTheGCNTarget(), "amdgcn",
+                                            "AMD GCN GPUs");
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c6f9142c0aa5..85cbadf0a570 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -8,10 +8,13 @@
 //===----------------------------------------------------------------------===//
 #include "AMDGPUBaseInfo.h"
 #include "AMDGPU.h"
+#include "SIDefines.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -24,6 +27,55 @@
 #include "AMDGPUGenRegisterInfo.inc"
 #undef GET_REGINFO_ENUM
 
+#define GET_INSTRINFO_NAMED_OPS
+#define GET_INSTRINFO_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_NAMED_OPS
+#undef GET_INSTRINFO_ENUM
+
+namespace {
+
+/// \returns Bit mask for given bit \p Shift and bit \p Width.
+unsigned getBitMask(unsigned Shift, unsigned Width) {
+  return ((1 << Width) - 1) << Shift;
+}
+
+/// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
+///
+/// \returns Packed \p Dst.
+unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
+  Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
+  Dst |= (Src << Shift) & getBitMask(Shift, Width);
+  return Dst;
+}
+
+/// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
+///
+/// \returns Unpacked bits.
+unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
+  return (Src & getBitMask(Shift, Width)) >> Shift;
+}
+
+/// \returns Vmcnt bit shift.
+unsigned getVmcntBitShift() { return 0; }
+
+/// \returns Vmcnt bit width.
+unsigned getVmcntBitWidth() { return 4; }
+
+/// \returns Expcnt bit shift.
+unsigned getExpcntBitShift() { return 4; }
+
+/// \returns Expcnt bit width.
+unsigned getExpcntBitWidth() { return 3; }
+
+/// \returns Lgkmcnt bit shift.
+unsigned getLgkmcntBitShift() { return 8; }
+
+/// \returns Lgkmcnt bit width.
+unsigned getLgkmcntBitWidth() { return 4; }
+
+} // anonymous namespace
+
 namespace llvm {
 namespace AMDGPU {
 
@@ -35,15 +87,27 @@ IsaVersion getIsaVersion(const FeatureBitset &Features) {
   if (Features.test(FeatureISAVersion7_0_1))
     return {7, 0, 1};
 
+  if (Features.test(FeatureISAVersion7_0_2))
+    return {7, 0, 2};
+
   if (Features.test(FeatureISAVersion8_0_0))
     return {8, 0, 0};
 
   if (Features.test(FeatureISAVersion8_0_1))
     return {8, 0, 1};
 
+  if (Features.test(FeatureISAVersion8_0_2))
+    return {8, 0, 2};
+
   if (Features.test(FeatureISAVersion8_0_3))
     return {8, 0, 3};
 
+  if (Features.test(FeatureISAVersion8_0_4))
+    return {8, 0, 4};
+
+  if (Features.test(FeatureISAVersion8_1_0))
+    return {8, 1, 0};
+
   return {0, 0, 0};
 }
 
@@ -109,6 +173,10 @@ bool isReadOnlySegment(const GlobalValue *GV) {
   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
 }
 
+bool shouldEmitConstantsToTextSection(const Triple &TT) {
+  return TT.getOS() != Triple::AMDHSA;
+}
+
 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
   Attribute A = F.getFnAttribute(Name);
   int Result = Default;
@@ -124,8 +192,88 @@ int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
   return Result;
 }
 
-unsigned getMaximumWorkGroupSize(const Function &F) {
-  return getIntegerAttribute(F, "amdgpu-max-work-group-size", 256);
+std::pair<int, int> getIntegerPairAttribute(const Function &F,
+                                            StringRef Name,
+                                            std::pair<int, int> Default,
+                                            bool OnlyFirstRequired) {
+  Attribute A = F.getFnAttribute(Name);
+  if (!A.isStringAttribute())
+    return Default;
+
+  LLVMContext &Ctx = F.getContext();
+  std::pair<int, int> Ints = Default;
+  std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
+  if (Strs.first.trim().getAsInteger(0, Ints.first)) {
+    Ctx.emitError("can't parse first integer attribute " + Name);
+    return Default;
+  }
+  if (Strs.second.trim().getAsInteger(0, Ints.second)) {
+    if (!OnlyFirstRequired || Strs.second.trim().size()) {
+      Ctx.emitError("can't parse second integer attribute " + Name);
+      return Default;
+    }
+  }
+
+  return Ints;
+}
+
+unsigned getWaitcntBitMask(IsaVersion Version) {
+  unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
+  unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
+  unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+  return Vmcnt | Expcnt | Lgkmcnt;
+}
+
+unsigned getVmcntBitMask(IsaVersion Version) {
+  return (1 << getVmcntBitWidth()) - 1;
+}
+
+unsigned getExpcntBitMask(IsaVersion Version) {
+  return (1 << getExpcntBitWidth()) - 1;
+}
+
+unsigned getLgkmcntBitMask(IsaVersion Version) {
+  return (1 << getLgkmcntBitWidth()) - 1;
+}
+
+unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
+  return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+}
+
+unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
+  return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
+}
+
+unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
+  return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+}
+
+void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
+  Vmcnt = decodeVmcnt(Version, Waitcnt);
+  Expcnt = decodeExpcnt(Version, Waitcnt);
+  Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
+}
+
+unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
+  return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
+}
+
+unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
+  return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
+}
+
+unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
+  return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+}
+
+unsigned encodeWaitcnt(IsaVersion Version,
+                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
+  unsigned Waitcnt = getWaitcntBitMask(Version);;
+  Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
+  Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
+  Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
+  return Waitcnt;
 }
 
 unsigned getInitialPSInputAddr(const Function &F) {
@@ -179,5 +327,135 @@ unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
   return Reg;
 }
 
+bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+  return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
+         OpType <= AMDGPU::OPERAND_SRC_LAST;
+}
+
+bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+  switch (OpType) {
+  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_IMM_FP64:
+  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
+  unsigned OpType = Desc.OpInfo[OpNo].OperandType;
+  return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
+         OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
+}
+
+// Avoid using MCRegisterClass::getSize, since that function will go away
+// (move from MC* level to Target* level). Return size in bits.
+unsigned getRegBitWidth(unsigned RCID) {
+  switch (RCID) {
+  case AMDGPU::SGPR_32RegClassID:
+  case AMDGPU::VGPR_32RegClassID:
+  case AMDGPU::VS_32RegClassID:
+  case AMDGPU::SReg_32RegClassID:
+  case AMDGPU::SReg_32_XM0RegClassID:
+    return 32;
+  case AMDGPU::SGPR_64RegClassID:
+  case AMDGPU::VS_64RegClassID:
+  case AMDGPU::SReg_64RegClassID:
+  case AMDGPU::VReg_64RegClassID:
+    return 64;
+  case AMDGPU::VReg_96RegClassID:
+    return 96;
+  case AMDGPU::SGPR_128RegClassID:
+  case AMDGPU::SReg_128RegClassID:
+  case AMDGPU::VReg_128RegClassID:
+    return 128;
+  case AMDGPU::SReg_256RegClassID:
+  case AMDGPU::VReg_256RegClassID:
+    return 256;
+  case AMDGPU::SReg_512RegClassID:
+  case AMDGPU::VReg_512RegClassID:
+    return 512;
+  default:
+    llvm_unreachable("Unexpected register class");
+  }
+}
+
+unsigned getRegBitWidth(const MCRegisterClass &RC) {
+  return getRegBitWidth(RC.getID());
+}
+
+unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
+                           unsigned OpNo) {
+  unsigned RCID = Desc.OpInfo[OpNo].RegClass;
+  return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
+}
+
+bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
+  if (Literal >= -16 && Literal <= 64)
+    return true;
+
+  uint64_t Val = static_cast<uint64_t>(Literal);
+  return (Val == DoubleToBits(0.0)) ||
+         (Val == DoubleToBits(1.0)) ||
+         (Val == DoubleToBits(-1.0)) ||
+         (Val == DoubleToBits(0.5)) ||
+         (Val == DoubleToBits(-0.5)) ||
+         (Val == DoubleToBits(2.0)) ||
+         (Val == DoubleToBits(-2.0)) ||
+         (Val == DoubleToBits(4.0)) ||
+         (Val == DoubleToBits(-4.0)) ||
+         (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
+}
+
+bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
+  if (Literal >= -16 && Literal <= 64)
+    return true;
+
+  // The actual type of the operand does not seem to matter as long
+  // as the bits match one of the inline immediate values.  For example:
+  //
+  // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
+  // so it is a legal inline immediate.
+  //
+  // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
+  // floating-point, so it is a legal inline immediate.
+
+  uint32_t Val = static_cast<uint32_t>(Literal);
+  return (Val == FloatToBits(0.0f)) ||
+         (Val == FloatToBits(1.0f)) ||
+         (Val == FloatToBits(-1.0f)) ||
+         (Val == FloatToBits(0.5f)) ||
+         (Val == FloatToBits(-0.5f)) ||
+         (Val == FloatToBits(2.0f)) ||
+         (Val == FloatToBits(-2.0f)) ||
+         (Val == FloatToBits(4.0f)) ||
+         (Val == FloatToBits(-4.0f)) ||
+         (Val == 0x3e22f983 && HasInv2Pi);
+}
+
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
+  assert(HasInv2Pi);
+
+  if (Literal >= -16 && Literal <= 64)
+    return true;
+
+  uint16_t Val = static_cast<uint16_t>(Literal);
+  return Val == 0x3C00 || // 1.0
+         Val == 0xBC00 || // -1.0
+         Val == 0x3800 || // 0.5
+         Val == 0xB800 || // -0.5
+         Val == 0x4000 || // 2.0
+         Val == 0xC000 || // -2.0
+         Val == 0x4400 || // 4.0
+         Val == 0xC400 || // -4.0
+         Val == 0x3118;   // 1/2pi
+}
+
 } // End namespace AMDGPU
 } // End namespace llvm
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 995a9041fb36..ea5fc366d205 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -13,17 +13,29 @@
 #include "AMDKernelCodeT.h"
 #include "llvm/IR/CallingConv.h"
 
+#include "SIDefines.h"
+
+#define GET_INSTRINFO_OPERAND_ENUM
+#include "AMDGPUGenInstrInfo.inc"
+#undef GET_INSTRINFO_OPERAND_ENUM
+
 namespace llvm {
 
 class FeatureBitset;
 class Function;
 class GlobalValue;
 class MCContext;
+class MCInstrDesc;
+class MCRegisterClass;
+class MCRegisterInfo;
 class MCSection;
 class MCSubtargetInfo;
 
 namespace AMDGPU {
 
+LLVM_READONLY
+int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx);
+
 struct IsaVersion {
   unsigned Major;
   unsigned Minor;
@@ -45,9 +57,86 @@ bool isGroupSegment(const GlobalValue *GV);
 bool isGlobalSegment(const GlobalValue *GV);
 bool isReadOnlySegment(const GlobalValue *GV);
 
+/// \returns True if constants should be emitted to .text section for given
+/// target triple \p TT, false otherwise.
+bool shouldEmitConstantsToTextSection(const Triple &TT);
+
+/// \returns Integer value requested using \p F's \p Name attribute.
+///
+/// \returns \p Default if attribute is not present.
+///
+/// \returns \p Default and emits error if requested value cannot be converted
+/// to integer.
 int getIntegerAttribute(const Function &F, StringRef Name, int Default);
 
-unsigned getMaximumWorkGroupSize(const Function &F);
+/// \returns A pair of integer values requested using \p F's \p Name attribute
+/// in "first[,second]" format ("second" is optional unless \p OnlyFirstRequired
+/// is false).
+///
+/// \returns \p Default if attribute is not present.
+///
+/// \returns \p Default and emits error if one of the requested values cannot be
+/// converted to integer, or \p OnlyFirstRequired is false and "second" value is
+/// not present.
+std::pair<int, int> getIntegerPairAttribute(const Function &F,
+                                            StringRef Name,
+                                            std::pair<int, int> Default,
+                                            bool OnlyFirstRequired = false);
+
+/// \returns Waitcnt bit mask for given isa \p Version.
+unsigned getWaitcntBitMask(IsaVersion Version);
+
+/// \returns Vmcnt bit mask for given isa \p Version.
+unsigned getVmcntBitMask(IsaVersion Version);
+
+/// \returns Expcnt bit mask for given isa \p Version.
+unsigned getExpcntBitMask(IsaVersion Version);
+
+/// \returns Lgkmcnt bit mask for given isa \p Version.
+unsigned getLgkmcntBitMask(IsaVersion Version);
+
+/// \returns Decoded Vmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt);
+
+/// \returns Decoded Expcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt);
+
+/// \returns Decoded Lgkmcnt from given \p Waitcnt for given isa \p Version.
+unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt);
+
+/// \brief Decodes Vmcnt, Expcnt and Lgkmcnt from given \p Waitcnt for given isa
+/// \p Version, and writes decoded values into \p Vmcnt, \p Expcnt and
+/// \p Lgkmcnt respectively.
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are decoded as follows:
+///     \p Vmcnt = \p Waitcnt[3:0]
+///     \p Expcnt = \p Waitcnt[6:4]
+///     \p Lgkmcnt = \p Waitcnt[11:8]
+void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
+                   unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
+
+/// \returns \p Waitcnt with encoded \p Vmcnt for given isa \p Version.
+unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt);
+
+/// \returns \p Waitcnt with encoded \p Expcnt for given isa \p Version.
+unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt);
+
+/// \returns \p Waitcnt with encoded \p Lgkmcnt for given isa \p Version.
+unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt);
+
+/// \brief Encodes \p Vmcnt, \p Expcnt and \p Lgkmcnt into Waitcnt for given isa
+/// \p Version.
+///
+/// \details \p Vmcnt, \p Expcnt and \p Lgkmcnt are encoded as follows:
+///     Waitcnt[3:0]  = \p Vmcnt
+///     Waitcnt[6:4]  = \p Expcnt
+///     Waitcnt[11:8] = \p Lgkmcnt
+///
+/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
+/// isa \p Version.
+unsigned encodeWaitcnt(IsaVersion Version,
+                       unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt);
+
 unsigned getInitialPSInputAddr(const Function &F);
 
 bool isShader(CallingConv::ID cc);
@@ -61,6 +150,66 @@ bool isVI(const MCSubtargetInfo &STI);
 /// \p STI otherwise return \p Reg.
 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI);
 
+/// \brief Can this operand also contain immediate values?
+bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// \brief Is this floating-point operand?
+bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// \brief Does this opearnd support only inlinable literals?
+bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo);
+
+/// \brief Get the size in bits of a register from the register class \p RC.
+unsigned getRegBitWidth(unsigned RCID);
+
+/// \brief Get the size in bits of a register from the register class \p RC.
+unsigned getRegBitWidth(const MCRegisterClass &RC);
+
+/// \brief Get size of register operand
+unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
+                           unsigned OpNo);
+
+LLVM_READNONE
+inline unsigned getOperandSize(const MCOperandInfo &OpInfo) {
+  switch (OpInfo.OperandType) {
+  case AMDGPU::OPERAND_REG_IMM_INT32:
+  case AMDGPU::OPERAND_REG_IMM_FP32:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT32:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP32:
+    return 4;
+
+  case AMDGPU::OPERAND_REG_IMM_INT64:
+  case AMDGPU::OPERAND_REG_IMM_FP64:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT64:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP64:
+    return 8;
+
+  case AMDGPU::OPERAND_REG_IMM_INT16:
+  case AMDGPU::OPERAND_REG_IMM_FP16:
+  case AMDGPU::OPERAND_REG_INLINE_C_INT16:
+  case AMDGPU::OPERAND_REG_INLINE_C_FP16:
+    return 2;
+
+  default:
+    llvm_unreachable("unhandled operand type");
+  }
+}
+
+LLVM_READNONE
+inline unsigned getOperandSize(const MCInstrDesc &Desc, unsigned OpNo) {
+  return getOperandSize(Desc.OpInfo[OpNo]);
+}
+
+/// \brief Is this literal inlinable
+LLVM_READNONE
+bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi);
+
+LLVM_READNONE
+bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi);
+
 } // end namespace AMDGPU
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
index 3a5ff60601d0..c55eaab077d1 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
@@ -16,10 +16,10 @@
 #define QNAME(name) amd_kernel_code_t::name
 #define FLD_T(name) decltype(QNAME(name)), &QNAME(name)
 
-#define FIELD2(sname, name) \
-  RECORD(sname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
+#define FIELD2(sname, aname, name) \
+  RECORD(sname, aname, printField<FLD_T(name)>, parseField<FLD_T(name)>)
 
-#define FIELD(name) FIELD2(name, name)
+#define FIELD(name) FIELD2(name, name, name)
 
 
 #define PRINTCODEPROP(name) \
@@ -33,7 +33,7 @@
                 AMD_CODE_PROPERTY_##name##_WIDTH>
 
 #define CODEPROP(name, shift) \
-  RECORD(name, PRINTCODEPROP(shift), PARSECODEPROP(shift))
+  RECORD(name, name, PRINTCODEPROP(shift), PARSECODEPROP(shift))
 
 // have to define these lambdas because of Set/GetMacro
 #define PRINTCOMP(GetMacro, Shift) \
@@ -50,32 +50,70 @@
    return true; \
 }
 
-#define COMPPGM(name, GetMacro, SetMacro, Shift) \
-  RECORD(name, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift))
+#define COMPPGM(name, aname, GetMacro, SetMacro, Shift) \
+  RECORD(name, aname, PRINTCOMP(GetMacro, Shift), PARSECOMP(SetMacro, Shift))
 
-#define COMPPGM1(name, AccMacro) \
-  COMPPGM(compute_pgm_rsrc1_##name, \
-          G_00B848_##AccMacro, S_00B848_##AccMacro, 0)
+#define COMPPGM1(name, aname, AccMacro) \
+  COMPPGM(name, aname, G_00B848_##AccMacro, S_00B848_##AccMacro, 0)
 
-#define COMPPGM2(name, AccMacro) \
-  COMPPGM(compute_pgm_rsrc2_##name, \
-          G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
+#define COMPPGM2(name, aname, AccMacro) \
+  COMPPGM(name, aname, G_00B84C_##AccMacro, S_00B84C_##AccMacro, 32)
 
 ///////////////////////////////////////////////////////////////////////////////
 // Begin of the table
 // Define RECORD(name, print, parse) in your code to get field definitions
 // and include this file
 
-FIELD2(kernel_code_version_major, amd_kernel_code_version_major),
-FIELD2(kernel_code_version_minor, amd_kernel_code_version_minor),
-FIELD2(machine_kind,              amd_machine_kind),
-FIELD2(machine_version_major,     amd_machine_version_major),
-FIELD2(machine_version_minor,     amd_machine_version_minor),
-FIELD2(machine_version_stepping,  amd_machine_version_stepping),
+FIELD2(amd_code_version_major,        kernel_code_version_major,  amd_kernel_code_version_major),
+FIELD2(amd_code_version_minor,        kernel_code_version_minor,  amd_kernel_code_version_minor),
+FIELD2(amd_machine_kind,              machine_kind,               amd_machine_kind),
+FIELD2(amd_machine_version_major,     machine_version_major,      amd_machine_version_major),
+FIELD2(amd_machine_version_minor,     machine_version_minor,      amd_machine_version_minor),
+FIELD2(amd_machine_version_stepping,  machine_version_stepping,   amd_machine_version_stepping),
+
 FIELD(kernel_code_entry_byte_offset),
 FIELD(kernel_code_prefetch_byte_size),
 FIELD(max_scratch_backing_memory_byte_size),
-FIELD(compute_pgm_resource_registers),
+
+COMPPGM1(granulated_workitem_vgpr_count,  compute_pgm_rsrc1_vgprs,          VGPRS),
+COMPPGM1(granulated_wavefront_sgpr_count, compute_pgm_rsrc1_sgprs,          SGPRS),
+COMPPGM1(priority,                        compute_pgm_rsrc1_priority,       PRIORITY),
+COMPPGM1(float_mode,                      compute_pgm_rsrc1_float_mode,     FLOAT_MODE), // TODO: split float_mode
+COMPPGM1(priv,                            compute_pgm_rsrc1_priv,           PRIV),
+COMPPGM1(enable_dx10_clamp,               compute_pgm_rsrc1_dx10_clamp,     DX10_CLAMP),
+COMPPGM1(debug_mode,                      compute_pgm_rsrc1_debug_mode,     DEBUG_MODE),
+COMPPGM1(enable_ieee_mode,                compute_pgm_rsrc1_ieee_mode,      IEEE_MODE),
+// TODO: bulky
+// TODO: cdbg_user
+COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
+COMPPGM2(user_sgpr_count,                 compute_pgm_rsrc2_user_sgpr,      USER_SGPR),
+// TODO: enable_trap_handler
+COMPPGM2(enable_sgpr_workgroup_id_x,      compute_pgm_rsrc2_tgid_x_en,      TGID_X_EN),
+COMPPGM2(enable_sgpr_workgroup_id_y,      compute_pgm_rsrc2_tgid_y_en,      TGID_Y_EN),
+COMPPGM2(enable_sgpr_workgroup_id_z,      compute_pgm_rsrc2_tgid_z_en,      TGID_Z_EN),
+COMPPGM2(enable_sgpr_workgroup_info,      compute_pgm_rsrc2_tg_size_en,     TG_SIZE_EN),
+COMPPGM2(enable_vgpr_workitem_id,         compute_pgm_rsrc2_tidig_comp_cnt, TIDIG_COMP_CNT),
+COMPPGM2(enable_exception_msb,            compute_pgm_rsrc2_excp_en_msb,    EXCP_EN_MSB), // TODO: split enable_exception_msb
+COMPPGM2(granulated_lds_size,             compute_pgm_rsrc2_lds_size,       LDS_SIZE),
+COMPPGM2(enable_exception,                compute_pgm_rsrc2_excp_en,        EXCP_EN), // TODO: split enable_exception
+
+CODEPROP(enable_sgpr_private_segment_buffer,  ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER),
+CODEPROP(enable_sgpr_dispatch_ptr,            ENABLE_SGPR_DISPATCH_PTR),
+CODEPROP(enable_sgpr_queue_ptr,               ENABLE_SGPR_QUEUE_PTR),
+CODEPROP(enable_sgpr_kernarg_segment_ptr,     ENABLE_SGPR_KERNARG_SEGMENT_PTR),
+CODEPROP(enable_sgpr_dispatch_id,             ENABLE_SGPR_DISPATCH_ID),
+CODEPROP(enable_sgpr_flat_scratch_init,       ENABLE_SGPR_FLAT_SCRATCH_INIT),
+CODEPROP(enable_sgpr_private_segment_size,    ENABLE_SGPR_PRIVATE_SEGMENT_SIZE),
+CODEPROP(enable_sgpr_grid_workgroup_count_x,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
+CODEPROP(enable_sgpr_grid_workgroup_count_y,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
+CODEPROP(enable_sgpr_grid_workgroup_count_z,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_ordered_append_gds,           ENABLE_ORDERED_APPEND_GDS),
+CODEPROP(private_element_size,                PRIVATE_ELEMENT_SIZE),
+CODEPROP(is_ptr64,                            IS_PTR64),
+CODEPROP(is_dynamic_callstack,                IS_DYNAMIC_CALLSTACK),
+CODEPROP(is_debug_enabled,                    IS_DEBUG_SUPPORTED),
+CODEPROP(is_xnack_enabled,                    IS_XNACK_SUPPORTED),
+
 FIELD(workitem_private_segment_byte_size),
 FIELD(workgroup_group_segment_byte_size),
 FIELD(gds_segment_byte_size),
@@ -94,59 +132,8 @@ FIELD(group_segment_alignment),
 FIELD(private_segment_alignment),
 FIELD(wavefront_size),
 FIELD(call_convention),
-FIELD(runtime_loader_kernel_symbol),
-
-COMPPGM1(vgprs,          VGPRS),
-COMPPGM1(sgprs,          SGPRS),
-COMPPGM1(priority,       PRIORITY),
-COMPPGM1(float_mode,     FLOAT_MODE),
-COMPPGM1(priv,           PRIV),
-COMPPGM1(dx10_clamp,     DX10_CLAMP),
-COMPPGM1(debug_mode,     DEBUG_MODE),
-COMPPGM1(ieee_mode,      IEEE_MODE),
-COMPPGM2(scratch_en,     SCRATCH_EN),
-COMPPGM2(user_sgpr,      USER_SGPR),
-COMPPGM2(tgid_x_en,      TGID_X_EN),
-COMPPGM2(tgid_y_en,      TGID_Y_EN),
-COMPPGM2(tgid_z_en,      TGID_Z_EN),
-COMPPGM2(tg_size_en,     TG_SIZE_EN),
-COMPPGM2(tidig_comp_cnt, TIDIG_COMP_CNT),
-COMPPGM2(excp_en_msb,    EXCP_EN_MSB),
-COMPPGM2(lds_size,       LDS_SIZE),
-COMPPGM2(excp_en,        EXCP_EN),
-
-CODEPROP(enable_sgpr_private_segment_buffer,
-         ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER),
-CODEPROP(enable_sgpr_dispatch_ptr,
-         ENABLE_SGPR_DISPATCH_PTR),
-CODEPROP(enable_sgpr_queue_ptr,
-         ENABLE_SGPR_QUEUE_PTR),
-CODEPROP(enable_sgpr_kernarg_segment_ptr,
-         ENABLE_SGPR_KERNARG_SEGMENT_PTR),
-CODEPROP(enable_sgpr_dispatch_id,
-         ENABLE_SGPR_DISPATCH_ID),
-CODEPROP(enable_sgpr_flat_scratch_init,
-         ENABLE_SGPR_FLAT_SCRATCH_INIT),
-CODEPROP(enable_sgpr_private_segment_size,
-         ENABLE_SGPR_PRIVATE_SEGMENT_SIZE),
-CODEPROP(enable_sgpr_grid_workgroup_count_x,
-         ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
-CODEPROP(enable_sgpr_grid_workgroup_count_y,
-         ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
-CODEPROP(enable_sgpr_grid_workgroup_count_z,
-         ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
-CODEPROP(enable_ordered_append_gds,
-         ENABLE_ORDERED_APPEND_GDS),
-CODEPROP(private_element_size,
-         PRIVATE_ELEMENT_SIZE),
-CODEPROP(is_ptr64,
-         IS_PTR64),
-CODEPROP(is_dynamic_callstack,
-         IS_DYNAMIC_CALLSTACK),
-CODEPROP(is_debug_enabled,
-         IS_DEBUG_SUPPORTED),
-CODEPROP(is_xnack_enabled,
-         IS_XNACK_SUPPORTED)
+FIELD(runtime_loader_kernel_symbol)
+// TODO: control_directive
 
 // end of the table
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index f64973afa44f..0333b0a14d29 100644
--- a/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -24,22 +24,37 @@ using namespace llvm;
 static ArrayRef<StringRef> get_amd_kernel_code_t_FldNames() {
   static StringRef const Table[] = {
     "", // not found placeholder
-#define RECORD(name, print, parse) #name
+#define RECORD(name, altName, print, parse) #name
 #include "AMDKernelCodeTInfo.h"
 #undef RECORD
   };
   return makeArrayRef(Table);
 }
 
-static StringMap<int> createIndexMap(const ArrayRef<StringRef> &a) {
+static ArrayRef<StringRef> get_amd_kernel_code_t_FldAltNames() {
+  static StringRef const Table[] = {
+    "", // not found placeholder
+#define RECORD(name, altName, print, parse) #altName
+#include "AMDKernelCodeTInfo.h"
+#undef RECORD
+  };
+  return makeArrayRef(Table);
+}
+
+static StringMap<int> createIndexMap(const ArrayRef<StringRef> &names,
+                                     const ArrayRef<StringRef> &altNames) {
   StringMap<int> map;
-  for (auto Name : a)
-    map.insert(std::make_pair(Name, map.size()));
+  assert(names.size() == altNames.size());
+  for (unsigned i = 0; i < names.size(); ++i) {
+    map.insert(std::make_pair(names[i], i));
+    map.insert(std::make_pair(altNames[i], i));
+  }
   return map;
 }
 
 static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
-  static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames());
+  static const auto map = createIndexMap(get_amd_kernel_code_t_FldNames(),
+                                         get_amd_kernel_code_t_FldAltNames());
   return map.lookup(name) - 1; // returns -1 if not found
 }
 
@@ -73,7 +88,7 @@ typedef void(*PrintFx)(StringRef,
 
 static ArrayRef<PrintFx> getPrinterTable() {
   static const PrintFx Table[] = {
-#define RECORD(name, print, parse) print
+#define RECORD(name, altName, print, parse) print
 #include "AMDKernelCodeTInfo.h"
 #undef RECORD
   };
@@ -145,7 +160,7 @@ typedef bool(*ParseFx)(amd_kernel_code_t &,
 
 static ArrayRef<ParseFx> getParserTable() {
   static const ParseFx Table[] = {
-#define RECORD(name, print, parse) parse
+#define RECORD(name, altName, print, parse) parse
 #include "AMDKernelCodeTInfo.h"
 #undef RECORD
   };
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td b/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td
index 912ed5329bfe..1fd1c1e21527 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstrFormats.td
@@ -11,283 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-class DSe_vi <bits<8> op> : Enc64 {
-  bits<8> vdst;
-  bits<1> gds;
-  bits<8> addr;
-  bits<8> data0;
-  bits<8> data1;
-  bits<8> offset0;
-  bits<8> offset1;
-
-  let Inst{7-0} = offset0;
-  let Inst{15-8} = offset1;
-  let Inst{16} = gds;
-  let Inst{24-17} = op;
-  let Inst{31-26} = 0x36; //encoding
-  let Inst{39-32} = addr;
-  let Inst{47-40} = data0;
-  let Inst{55-48} = data1;
-  let Inst{63-56} = vdst;
-}
-
-class MUBUFe_vi <bits<7> op> : Enc64 {
-  bits<12> offset;
-  bits<1> offen;
-  bits<1> idxen;
-  bits<1> glc;
-  bits<1> lds;
-  bits<8> vaddr;
-  bits<8> vdata;
-  bits<7> srsrc;
-  bits<1> slc;
-  bits<1> tfe;
-  bits<8> soffset;
-
-  let Inst{11-0} = offset;
-  let Inst{12} = offen;
-  let Inst{13} = idxen;
-  let Inst{14} = glc;
-  let Inst{16} = lds;
-  let Inst{17} = slc;
-  let Inst{24-18} = op;
-  let Inst{31-26} = 0x38; //encoding
-  let Inst{39-32} = vaddr;
-  let Inst{47-40} = vdata;
-  let Inst{52-48} = srsrc{6-2};
-  let Inst{55} = tfe;
-  let Inst{63-56} = soffset;
-}
-
-class MTBUFe_vi <bits<4> op> : Enc64 {
-  bits<12> offset;
-  bits<1>  offen;
-  bits<1>  idxen;
-  bits<1>  glc;
-  bits<4>  dfmt;
-  bits<3>  nfmt;
-  bits<8>  vaddr;
-  bits<8>  vdata;
-  bits<7>  srsrc;
-  bits<1>  slc;
-  bits<1>  tfe;
-  bits<8>  soffset;
-
-  let Inst{11-0}  = offset;
-  let Inst{12}    = offen;
-  let Inst{13}    = idxen;
-  let Inst{14}    = glc;
-  let Inst{18-15} = op;
-  let Inst{22-19} = dfmt;
-  let Inst{25-23} = nfmt;
-  let Inst{31-26} = 0x3a; //encoding
-  let Inst{39-32} = vaddr;
-  let Inst{47-40} = vdata;
-  let Inst{52-48} = srsrc{6-2};
-  let Inst{54}    = slc;
-  let Inst{55}    = tfe;
-  let Inst{63-56} = soffset;
-}
-
-class SMEMe_vi <bits<8> op, bit imm> : Enc64 {
-  bits<7>  sbase;
-  bits<7>  sdst;
-  bits<1>  glc;
-
-  let Inst{5-0}   = sbase{6-1};
-  let Inst{12-6}  = sdst;
-  let Inst{16}    = glc;
-  let Inst{17}    = imm;
-  let Inst{25-18} = op;
-  let Inst{31-26} = 0x30; //encoding
-}
-
-class SMEM_IMMe_vi <bits<8> op> : SMEMe_vi<op, 1> {
-  bits<20> offset;
-  let Inst{51-32} = offset;
-}
-
-class SMEM_SOFFe_vi <bits<8> op> : SMEMe_vi<op, 0> {
-  bits<20> soff;
-  let Inst{51-32} = soff;
-}
-
-class VOP3a_vi <bits<10> op> : Enc64 {
-  bits<2> src0_modifiers;
-  bits<9> src0;
-  bits<2> src1_modifiers;
-  bits<9> src1;
-  bits<2> src2_modifiers;
-  bits<9> src2;
-  bits<1> clamp;
-  bits<2> omod;
-
-  let Inst{8}     = src0_modifiers{1};
-  let Inst{9}     = src1_modifiers{1};
-  let Inst{10}    = src2_modifiers{1};
-  let Inst{15}    = clamp;
-  let Inst{25-16} = op;
-  let Inst{31-26} = 0x34; //encoding
-  let Inst{40-32} = src0;
-  let Inst{49-41} = src1;
-  let Inst{58-50} = src2;
-  let Inst{60-59} = omod;
-  let Inst{61} = src0_modifiers{0};
-  let Inst{62} = src1_modifiers{0};
-  let Inst{63} = src2_modifiers{0};
-}
-
-class VOP3e_vi <bits<10> op> : VOP3a_vi <op> {
-  bits<8> vdst;
-
-  let Inst{7-0} = vdst;
-}
-
-// Encoding used for VOPC instructions encoded as VOP3
-// Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
-class VOP3ce_vi <bits<10> op> : VOP3a_vi <op> {
-  bits<8> sdst;
-
-  let Inst{7-0} = sdst;
-}
-
-class VOP3be_vi <bits<10> op> : Enc64 {
-  bits<8> vdst;
-  bits<2> src0_modifiers;
-  bits<9> src0;
-  bits<2> src1_modifiers;
-  bits<9> src1;
-  bits<2> src2_modifiers;
-  bits<9> src2;
-  bits<7> sdst;
-  bits<2> omod;
-  bits<1> clamp;
-
-  let Inst{7-0} = vdst;
-  let Inst{14-8} = sdst;
-  let Inst{15} = clamp;
-  let Inst{25-16} = op;
-  let Inst{31-26} = 0x34; //encoding
-  let Inst{40-32} = src0;
-  let Inst{49-41} = src1;
-  let Inst{58-50} = src2;
-  let Inst{60-59} = omod;
-  let Inst{61} = src0_modifiers{0};
-  let Inst{62} = src1_modifiers{0};
-  let Inst{63} = src2_modifiers{0};
-}
-
-class VOP_DPP <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
-    VOPAnyCommon <outs, ins, asm, pattern> {
-  let DPP = 1;
-  let Size = 8;
-
-  let AsmMatchConverter = !if(!eq(HasMods,1), "cvtDPP", "");
-}
-
-class VOP_DPPe : Enc64 {
-  bits<2> src0_modifiers;
-  bits<8> src0;
-  bits<2> src1_modifiers;
-  bits<9> dpp_ctrl;
-  bits<1> bound_ctrl;
-  bits<4> bank_mask;
-  bits<4> row_mask;
-
-  let Inst{39-32} = src0;
-  let Inst{48-40} = dpp_ctrl;
-  let Inst{51}    = bound_ctrl;
-  let Inst{52}    = src0_modifiers{0}; // src0_neg
-  let Inst{53}    = src0_modifiers{1}; // src0_abs
-  let Inst{54}    = src1_modifiers{0}; // src1_neg
-  let Inst{55}    = src1_modifiers{1}; // src1_abs
-  let Inst{59-56} = bank_mask;
-  let Inst{63-60} = row_mask;
-}
-
-class VOP1_DPPe <bits<8> op> : VOP_DPPe {
-  bits<8> vdst;
-
-  let Inst{8-0} = 0xfa; // dpp
-  let Inst{16-9} = op;
-  let Inst{24-17} = vdst;
-  let Inst{31-25} = 0x3f; //encoding
-}
-
-class VOP2_DPPe <bits<6> op> : VOP_DPPe {
-  bits<8> vdst;
-  bits<8> src1;
-
-  let Inst{8-0} = 0xfa; //dpp
-  let Inst{16-9} = src1;
-  let Inst{24-17} = vdst;
-  let Inst{30-25} = op;
-  let Inst{31} = 0x0; //encoding
-}
-
-class VOP_SDWA <dag outs, dag ins, string asm, list<dag> pattern, bit HasMods = 0> :
-    VOPAnyCommon <outs, ins, asm, pattern> {
-  let SDWA = 1;
-  let Size = 8;
-}
-
-class VOP_SDWAe : Enc64 {
-  bits<8> src0;
-  bits<3> src0_sel;
-  bits<2> src0_fmodifiers; // {abs,neg}
-  bits<1> src0_imodifiers; // sext
-  bits<3> src1_sel;
-  bits<2> src1_fmodifiers;
-  bits<1> src1_imodifiers;
-  bits<3> dst_sel;
-  bits<2> dst_unused;
-  bits<1> clamp;
-
-  let Inst{39-32} = src0;
-  let Inst{42-40} = dst_sel;
-  let Inst{44-43} = dst_unused;
-  let Inst{45} = clamp;
-  let Inst{50-48} = src0_sel;
-  let Inst{53-52} = src0_fmodifiers;
-  let Inst{51} = src0_imodifiers;
-  let Inst{58-56} = src1_sel;
-  let Inst{61-60} = src1_fmodifiers;
-  let Inst{59} = src1_imodifiers;
-}
-
-class VOP1_SDWAe <bits<8> op> : VOP_SDWAe {
-  bits<8> vdst;
-
-  let Inst{8-0} = 0xf9; // sdwa
-  let Inst{16-9} = op;
-  let Inst{24-17} = vdst;
-  let Inst{31-25} = 0x3f; // encoding
-}
-
-class VOP2_SDWAe <bits<6> op> : VOP_SDWAe {
-  bits<8> vdst;
-  bits<8> src1;
-
-  let Inst{8-0} = 0xf9; // sdwa
-  let Inst{16-9} = src1;
-  let Inst{24-17} = vdst;
-  let Inst{30-25} = op;
-  let Inst{31} = 0x0; // encoding
-}
-
-class VOPC_SDWAe <bits<8> op> : VOP_SDWAe {
-  bits<8> src1;
-
-  let Inst{8-0} = 0xf9; // sdwa
-  let Inst{16-9} = src1;
-  let Inst{24-17} = op;
-  let Inst{31-25} = 0x3e; // encoding
-
-  // VOPC disallows dst_sel and dst_unused as they have no effect on destination
-  let Inst{42-40} = 0x6;
-  let Inst{44-43} = 0x2;
-}
-
 class EXPe_vi : EXPe {
   let Inst{31-26} = 0x31; //encoding
 }
diff --git a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
index 5c490ab900f2..b45c8fc9c7d5 100644
--- a/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
+++ b/contrib/llvm/lib/Target/AMDGPU/VIInstructions.td
@@ -9,150 +9,6 @@
 // Instruction definitions for VI and newer.
 //===----------------------------------------------------------------------===//
 
-let SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI in {
-
-let DisableSIDecoder = 1 in {
-
-//===----------------------------------------------------------------------===//
-// VOP1 Instructions
-//===----------------------------------------------------------------------===//
-
-defm V_CVT_F16_U16 : VOP1Inst <vop1<0, 0x39>, "v_cvt_f16_u16", VOP_F16_I16>;
-defm V_CVT_F16_I16 : VOP1Inst <vop1<0, 0x3a>, "v_cvt_f16_i16", VOP_F16_I16>;
-defm V_CVT_U16_F16 : VOP1Inst <vop1<0, 0x3b>, "v_cvt_u16_f16", VOP_I16_F16>;
-defm V_CVT_I16_F16 : VOP1Inst <vop1<0, 0x3c>, "v_cvt_i16_f16", VOP_I16_F16>;
-defm V_RCP_F16 : VOP1Inst <vop1<0, 0x3d>, "v_rcp_f16", VOP_F16_F16>;
-defm V_SQRT_F16 : VOP1Inst <vop1<0, 0x3e>, "v_sqrt_f16", VOP_F16_F16>;
-defm V_RSQ_F16 : VOP1Inst <vop1<0, 0x3f>, "v_rsq_f16", VOP_F16_F16>;
-defm V_LOG_F16 : VOP1Inst <vop1<0, 0x40>, "v_log_f16", VOP_F16_F16>;
-defm V_EXP_F16 : VOP1Inst <vop1<0, 0x41>, "v_exp_f16", VOP_F16_F16>;
-defm V_FREXP_MANT_F16 : VOP1Inst <vop1<0, 0x42>, "v_frexp_mant_f16",
-  VOP_F16_F16
->;
-defm V_FREXP_EXP_I16_F16 : VOP1Inst <vop1<0, 0x43>, "v_frexp_exp_i16_f16",
-  VOP_I16_F16
->;
-defm V_FLOOR_F16 : VOP1Inst <vop1<0, 0x44>, "v_floor_f16", VOP_F16_F16>;
-defm V_CEIL_F16 : VOP1Inst <vop1<0, 0x45>, "v_ceil_f16", VOP_F16_F16>;
-defm V_TRUNC_F16 : VOP1Inst <vop1<0, 0x46>, "v_trunc_f16", VOP_F16_F16>;
-defm V_RNDNE_F16 : VOP1Inst <vop1<0, 0x47>, "v_rndne_f16", VOP_F16_F16>;
-defm V_FRACT_F16 : VOP1Inst <vop1<0, 0x48>, "v_fract_f16", VOP_F16_F16>;
-defm V_SIN_F16 : VOP1Inst <vop1<0, 0x49>, "v_sin_f16", VOP_F16_F16>;
-defm V_COS_F16 : VOP1Inst <vop1<0, 0x4a>, "v_cos_f16", VOP_F16_F16>;
-
-//===----------------------------------------------------------------------===//
-// VOP2 Instructions
-//===----------------------------------------------------------------------===//
-
-let isCommutable = 1 in {
-
-defm V_ADD_F16 : VOP2Inst <vop2<0, 0x1f>, "v_add_f16", VOP_F16_F16_F16>;
-defm V_SUB_F16 : VOP2Inst <vop2<0, 0x20>, "v_sub_f16", VOP_F16_F16_F16>;
-defm V_SUBREV_F16 : VOP2Inst <vop2<0, 0x21>, "v_subrev_f16", VOP_F16_F16_F16,
-  null_frag, "v_sub_f16"
->;
-defm V_MUL_F16 : VOP2Inst <vop2<0, 0x22>, "v_mul_f16", VOP_F16_F16_F16>;
-defm V_MAC_F16 : VOP2Inst <vop2<0, 0x23>, "v_mac_f16", VOP_F16_F16_F16>;
-} // End isCommutable = 1
-defm V_MADMK_F16 : VOP2MADK <vop2<0,0x24>, "v_madmk_f16", VOP_MADMK>;
-let isCommutable = 1 in {
-defm V_MADAK_F16 : VOP2MADK <vop2<0,0x25>, "v_madak_f16", VOP_MADAK>;
-defm V_ADD_U16 : VOP2Inst <vop2<0,0x26>, "v_add_u16", VOP_I16_I16_I16>;
-defm V_SUB_U16 : VOP2Inst <vop2<0,0x27>, "v_sub_u16" , VOP_I16_I16_I16>;
-defm V_SUBREV_U16 : VOP2Inst <vop2<0,0x28>, "v_subrev_u16", VOP_I16_I16_I16>;
-defm V_MUL_LO_U16 : VOP2Inst <vop2<0,0x29>, "v_mul_lo_u16", VOP_I16_I16_I16>;
-} // End isCommutable = 1
-defm V_LSHLREV_B16 : VOP2Inst <vop2<0,0x2a>, "v_lshlrev_b16", VOP_I16_I16_I16>;
-defm V_LSHRREV_B16 : VOP2Inst <vop2<0,0x2b>, "v_lshrrev_b16", VOP_I16_I16_I16>;
-defm V_ASHRREV_B16 : VOP2Inst <vop2<0,0x2c>, "v_ashrrev_b16", VOP_I16_I16_I16>;
-let isCommutable = 1 in {
-defm V_MAX_F16 : VOP2Inst <vop2<0,0x2d>, "v_max_f16", VOP_F16_F16_F16>;
-defm V_MIN_F16 : VOP2Inst <vop2<0,0x2e>, "v_min_f16", VOP_F16_F16_F16>;
-defm V_MAX_U16 : VOP2Inst <vop2<0,0x2f>, "v_max_u16", VOP_I16_I16_I16>;
-defm V_MAX_I16 : VOP2Inst <vop2<0,0x30>, "v_max_i16", VOP_I16_I16_I16>;
-defm V_MIN_U16 : VOP2Inst <vop2<0,0x31>, "v_min_u16", VOP_I16_I16_I16>;
-defm V_MIN_I16 : VOP2Inst <vop2<0,0x32>, "v_min_i16", VOP_I16_I16_I16>;
-} // End isCommutable = 1
-defm V_LDEXP_F16 : VOP2Inst <vop2<0,0x33>, "v_ldexp_f16", VOP_F16_F16_I16>;
-
-//===----------------------------------------------------------------------===//
-// VOP3 Instructions
-//===----------------------------------------------------------------------===//
-let isCommutable = 1 in {
-    defm V_MAD_F16 : VOP3Inst <vop3<0, 0x1ea>, "v_mad_f16", VOP_F16_F16_F16_F16>;
-    defm V_MAD_U16 : VOP3Inst <vop3<0, 0x1eb>, "v_mad_u16", VOP_I16_I16_I16_I16>;
-    defm V_MAD_I16 : VOP3Inst <vop3<0, 0x1ec>, "v_mad_i16", VOP_I16_I16_I16_I16>;
-}
-} // let DisableSIDecoder = 1
-
-// Aliases to simplify matching of floating-point instructions that
-// are VOP2 on SI and VOP3 on VI.
-
-class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
-  name#" $dst, $src0, $src1",
-  (inst VGPR_32:$dst, 0, VCSrc_32:$src0, 0, VCSrc_32:$src1, 0, 0)
->, PredicateControl {
-  let UseInstAsmMatchConverter = 0;
-}
-
-def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
-def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
-
-//===----------------------------------------------------------------------===//
-// SMEM Instructions
-//===----------------------------------------------------------------------===//
-
-def S_DCACHE_WB : SMEM_Inval <0x21,
-  "s_dcache_wb", int_amdgcn_s_dcache_wb>;
-
-def S_DCACHE_WB_VOL : SMEM_Inval <0x23,
-  "s_dcache_wb_vol", int_amdgcn_s_dcache_wb_vol>;
-
-def S_MEMREALTIME : SMEM_Ret<0x25,
-  "s_memrealtime", int_amdgcn_s_memrealtime>;
-
-} // End SIAssemblerPredicate = DisableInst, SubtargetPredicate = isVI
-
-let Predicates = [isVI] in {
-
-// 1. Offset as 20bit DWORD immediate
-def : Pat <
-  (SIload_constant v4i32:$sbase, IMM20bit:$offset),
-  (S_BUFFER_LOAD_DWORD_IMM $sbase, (as_i32imm $offset))
->;
-
-//===----------------------------------------------------------------------===//
-// DPP Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
-  (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
-                      imm:$bound_ctrl),
-  (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
-                       (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
->;
-
-//===----------------------------------------------------------------------===//
-// Misc Patterns
-//===----------------------------------------------------------------------===//
-
-def : Pat <
-  (i64 (readcyclecounter)),
-  (S_MEMREALTIME)
->;
-
-//===----------------------------------------------------------------------===//
-// DS_PERMUTE/DS_BPERMUTE Instructions.
-//===----------------------------------------------------------------------===//
-
-let Uses = [EXEC] in {
-defm DS_PERMUTE_B32 : DS_1A1D_PERMUTE <0x3e, "ds_permute_b32", VGPR_32,
-                                       int_amdgcn_ds_permute>;
-defm DS_BPERMUTE_B32 : DS_1A1D_PERMUTE <0x3f, "ds_bpermute_b32", VGPR_32,
-                                       int_amdgcn_ds_bpermute>;
-}
-
-} // End Predicates = [isVI]
+FIXME: Deleting this file broke buildbots that don't do full rebuilds.  This
+file is no longer used by the backend, so it can be deleted once all
+the buildbots update there dependencies.
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
new file mode 100644
index 000000000000..bff706cdc1dc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -0,0 +1,621 @@
+//===-- VOP1Instructions.td - Vector Instruction Defintions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP1 Classes
+//===----------------------------------------------------------------------===//
+
+class VOP1e <bits<8> op, VOPProfile P> : Enc32 {
+  bits<8> vdst;
+  bits<9> src0;
+
+  let Inst{8-0}   = !if(P.HasSrc0, src0{8-0}, 0);
+  let Inst{16-9}  = op;
+  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{31-25} = 0x3f; //encoding
+}
+
+class VOP1_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
+  bits<8> vdst;
+  
+  let Inst{8-0}   = 0xf9; // sdwa
+  let Inst{16-9}  = op;
+  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{31-25} = 0x3f; // encoding
+}
+
+class VOP1_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+  InstSI <P.Outs32, P.Ins32, "", pattern>,
+  VOP <opName>,
+  SIMCInstr <opName#"_e32", SIEncodingFamily.NONE>,
+  MnemonicAlias<opName#"_e32", opName> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = P.Asm32;
+
+  let Size = 4;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SubtargetPredicate = isGCN;
+
+  let VOP1 = 1;
+  let VALU = 1;
+  let Uses = [EXEC];
+
+  let AsmVariantName = AMDGPUAsmVariants.Default;
+
+  VOPProfile Pfl = P;
+}
+
+class VOP1_Real <VOP1_Pseudo ps, int EncodingFamily> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  let Constraints     = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let AsmVariantName     = ps.AsmVariantName;
+  let Constraints        = ps.Constraints;
+  let DisableEncoding    = ps.DisableEncoding;
+  let TSFlags            = ps.TSFlags;
+}
+
+class VOP1_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+  VOP_SDWA_Pseudo <OpName, P, pattern> {
+  let AsmMatchConverter = "cvtSdwaVOP1";
+}
+
+class getVOP1Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
+  list<dag> ret = !if(P.HasModifiers,
+    [(set P.DstVT:$vdst, (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0,
+                                i32:$src0_modifiers, i1:$clamp, i32:$omod))))],
+    [(set P.DstVT:$vdst, (node P.Src0VT:$src0))]);
+}
+
+multiclass VOP1Inst <string opName, VOPProfile P,
+                     SDPatternOperator node = null_frag> {
+  def _e32 : VOP1_Pseudo <opName, P>;
+  def _e64 : VOP3_Pseudo <opName, P, getVOP1Pat64<node, P>.ret>;
+  def _sdwa : VOP1_SDWA_Pseudo <opName, P>;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP1 Instructions
+//===----------------------------------------------------------------------===//
+
+let VOPAsmPrefer32Bit = 1 in {
+defm V_NOP : VOP1Inst <"v_nop", VOP_NONE>;
+}
+
+let isMoveImm = 1, isReMaterializable = 1, isAsCheapAsAMove = 1 in {
+defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOP_I32_I32>;
+} // End isMoveImm = 1
+
+// FIXME: Specify SchedRW for READFIRSTLANE_B32
+// TODO: Make profile for this, there is VOP3 encoding also
+def V_READFIRSTLANE_B32 :
+  InstSI <(outs SReg_32:$vdst),
+    (ins VGPR_32:$src0),
+    "v_readfirstlane_b32 $vdst, $src0",
+    [(set i32:$vdst, (int_amdgcn_readfirstlane i32:$src0))]>,
+  Enc32 {
+
+  let isCodeGenOnly = 0;
+  let UseNamedOperandTable = 1;
+
+  let Size = 4;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SubtargetPredicate = isGCN;
+
+  let VOP1 = 1;
+  let VALU = 1;
+  let Uses = [EXEC];
+  let isConvergent = 1;
+
+  bits<8> vdst;
+  bits<9> src0;
+
+  let Inst{8-0}   = src0;
+  let Inst{16-9}  = 0x2;
+  let Inst{24-17} = vdst;
+  let Inst{31-25} = 0x3f; //encoding
+}
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_CVT_I32_F64 : VOP1Inst <"v_cvt_i32_f64", VOP_I32_F64, fp_to_sint>;
+defm V_CVT_F64_I32 : VOP1Inst <"v_cvt_f64_i32", VOP_F64_I32, sint_to_fp>;
+defm V_CVT_F32_I32 : VOP1Inst <"v_cvt_f32_i32", VOP_F32_I32, sint_to_fp>;
+defm V_CVT_F32_U32 : VOP1Inst <"v_cvt_f32_u32", VOP_F32_I32, uint_to_fp>;
+defm V_CVT_U32_F32 : VOP1Inst <"v_cvt_u32_f32", VOP_I32_F32, fp_to_uint>;
+defm V_CVT_I32_F32 : VOP1Inst <"v_cvt_i32_f32", VOP_I32_F32, fp_to_sint>;
+defm V_CVT_F16_F32 : VOP1Inst <"v_cvt_f16_f32", VOP_I32_F32, fp_to_f16>;
+defm V_CVT_F32_F16 : VOP1Inst <"v_cvt_f32_f16", VOP_F32_I32, f16_to_fp>;
+defm V_CVT_RPI_I32_F32 : VOP1Inst <"v_cvt_rpi_i32_f32", VOP_I32_F32, cvt_rpi_i32_f32>;
+defm V_CVT_FLR_I32_F32 : VOP1Inst <"v_cvt_flr_i32_f32", VOP_I32_F32, cvt_flr_i32_f32>;
+defm V_CVT_OFF_F32_I4 : VOP1Inst  <"v_cvt_off_f32_i4", VOP_F32_I32>;
+defm V_CVT_F32_F64 : VOP1Inst <"v_cvt_f32_f64", VOP_F32_F64, fpround>;
+defm V_CVT_F64_F32 : VOP1Inst <"v_cvt_f64_f32", VOP_F64_F32, fpextend>;
+defm V_CVT_F32_UBYTE0 : VOP1Inst <"v_cvt_f32_ubyte0", VOP_F32_I32, AMDGPUcvt_f32_ubyte0>;
+defm V_CVT_F32_UBYTE1 : VOP1Inst <"v_cvt_f32_ubyte1", VOP_F32_I32, AMDGPUcvt_f32_ubyte1>;
+defm V_CVT_F32_UBYTE2 : VOP1Inst <"v_cvt_f32_ubyte2", VOP_F32_I32, AMDGPUcvt_f32_ubyte2>;
+defm V_CVT_F32_UBYTE3 : VOP1Inst <"v_cvt_f32_ubyte3", VOP_F32_I32, AMDGPUcvt_f32_ubyte3>;
+defm V_CVT_U32_F64 : VOP1Inst <"v_cvt_u32_f64", VOP_I32_F64, fp_to_uint>;
+defm V_CVT_F64_U32 : VOP1Inst <"v_cvt_f64_u32", VOP_F64_I32, uint_to_fp>;
+} // End SchedRW = [WriteQuarterRate32]
+
+defm V_FRACT_F32 : VOP1Inst <"v_fract_f32", VOP_F32_F32, AMDGPUfract>;
+defm V_TRUNC_F32 : VOP1Inst <"v_trunc_f32", VOP_F32_F32, ftrunc>;
+defm V_CEIL_F32 : VOP1Inst <"v_ceil_f32", VOP_F32_F32, fceil>;
+defm V_RNDNE_F32 : VOP1Inst <"v_rndne_f32", VOP_F32_F32, frint>;
+defm V_FLOOR_F32 : VOP1Inst <"v_floor_f32", VOP_F32_F32, ffloor>;
+defm V_EXP_F32 : VOP1Inst <"v_exp_f32", VOP_F32_F32, fexp2>;
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_LOG_F32 : VOP1Inst <"v_log_f32", VOP_F32_F32, flog2>;
+defm V_RCP_F32 : VOP1Inst <"v_rcp_f32", VOP_F32_F32, AMDGPUrcp>;
+defm V_RCP_IFLAG_F32 : VOP1Inst <"v_rcp_iflag_f32", VOP_F32_F32>;
+defm V_RSQ_F32 : VOP1Inst <"v_rsq_f32", VOP_F32_F32, AMDGPUrsq>;
+} // End SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+defm V_RCP_F64 : VOP1Inst <"v_rcp_f64", VOP_F64_F64, AMDGPUrcp>;
+defm V_RSQ_F64 : VOP1Inst <"v_rsq_f64", VOP_F64_F64, AMDGPUrsq>;
+} // End SchedRW = [WriteDouble];
+
+defm V_SQRT_F32 : VOP1Inst <"v_sqrt_f32", VOP_F32_F32, fsqrt>;
+
+let SchedRW = [WriteDouble] in {
+defm V_SQRT_F64 : VOP1Inst <"v_sqrt_f64", VOP_F64_F64, fsqrt>;
+} // End SchedRW = [WriteDouble]
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_SIN_F32 : VOP1Inst <"v_sin_f32", VOP_F32_F32, AMDGPUsin>;
+defm V_COS_F32 : VOP1Inst <"v_cos_f32", VOP_F32_F32, AMDGPUcos>;
+} // End SchedRW = [WriteQuarterRate32]
+
+defm V_NOT_B32 : VOP1Inst <"v_not_b32", VOP_I32_I32>;
+defm V_BFREV_B32 : VOP1Inst <"v_bfrev_b32", VOP_I32_I32>;
+defm V_FFBH_U32 : VOP1Inst <"v_ffbh_u32", VOP_I32_I32>;
+defm V_FFBL_B32 : VOP1Inst <"v_ffbl_b32", VOP_I32_I32>;
+defm V_FFBH_I32 : VOP1Inst <"v_ffbh_i32", VOP_I32_I32>;
+defm V_FREXP_EXP_I32_F64 : VOP1Inst <"v_frexp_exp_i32_f64", VOP_I32_F64, int_amdgcn_frexp_exp>;
+
+let SchedRW = [WriteDoubleAdd] in {
+defm V_FREXP_MANT_F64 : VOP1Inst <"v_frexp_mant_f64", VOP_F64_F64, int_amdgcn_frexp_mant>;
+defm V_FRACT_F64 : VOP1Inst <"v_fract_f64", VOP_F64_F64, AMDGPUfract>;
+} // End SchedRW = [WriteDoubleAdd]
+
+defm V_FREXP_EXP_I32_F32 : VOP1Inst <"v_frexp_exp_i32_f32", VOP_I32_F32, int_amdgcn_frexp_exp>;
+defm V_FREXP_MANT_F32 : VOP1Inst <"v_frexp_mant_f32", VOP_F32_F32, int_amdgcn_frexp_mant>;
+
+let VOPAsmPrefer32Bit = 1 in {
+defm V_CLREXCP : VOP1Inst <"v_clrexcp", VOP_NO_EXT<VOP_NONE>>;
+}
+
+// Restrict src0 to be VGPR
+def VOP_I32_VI32_NO_EXT : VOPProfile<[i32, i32, untyped, untyped]> {
+  let Src0RC32 = VRegSrc_32;
+  let Src0RC64 = VRegSrc_32;
+
+  let HasExt = 0;
+}
+
+// Special case because there are no true output operands.  Hack vdst
+// to be a src operand. The custom inserter must add a tied implicit
+// def and use of the super register since there seems to be no way to
+// add an implicit def of a virtual register in tablegen.
+def VOP_MOVRELD : VOPProfile<[untyped, i32, untyped, untyped]> {
+  let Src0RC32 = VOPDstOperand<VGPR_32>;
+  let Src0RC64 = VOPDstOperand<VGPR_32>;
+
+  let Outs = (outs);
+  let Ins32 = (ins Src0RC32:$vdst, VSrc_b32:$src0);
+  let Ins64 = (ins Src0RC64:$vdst, VSrc_b32:$src0);
+  let InsDPP = (ins Src0RC32:$vdst, Src0RC32:$src0, dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsSDWA = (ins Src0RC32:$vdst, Int32InputMods:$src0_modifiers, VCSrc_b32:$src0,
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel);
+
+  let Asm32 = getAsm32<1, 1>.ret;
+  let Asm64 = getAsm64<1, 1, 0>.ret;
+  let AsmDPP = getAsmDPP<1, 1, 0>.ret;
+  let AsmSDWA = getAsmSDWA<1, 1, 0>.ret;
+
+  let HasExt = 0;
+  let HasDst = 0;
+  let EmitDst = 1; // force vdst emission
+}
+
+let SubtargetPredicate = HasMovrel, Uses = [M0, EXEC] in {
+// v_movreld_b32 is a special case because the destination output
+ // register is really a source. It isn't actually read (but may be
+ // written), and is only to provide the base register to start
+ // indexing from. Tablegen seems to not let you define an implicit
+ // virtual register output for the super register being written into,
+ // so this must have an implicit def of the register added to it.
+defm V_MOVRELD_B32 : VOP1Inst <"v_movreld_b32", VOP_MOVRELD>;
+defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_I32_VI32_NO_EXT>;
+defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_NO_EXT<VOP_I32_I32>>;
+} // End Uses = [M0, EXEC]
+
+// These instruction only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
+defm V_LOG_CLAMP_F32 : VOP1Inst <"v_log_clamp_f32", VOP_F32_F32, int_amdgcn_log_clamp>;
+defm V_RCP_CLAMP_F32 : VOP1Inst <"v_rcp_clamp_f32", VOP_F32_F32>;
+defm V_RCP_LEGACY_F32 : VOP1Inst <"v_rcp_legacy_f32", VOP_F32_F32, AMDGPUrcp_legacy>;
+defm V_RSQ_CLAMP_F32 : VOP1Inst <"v_rsq_clamp_f32", VOP_F32_F32, AMDGPUrsq_clamp>;
+defm V_RSQ_LEGACY_F32 : VOP1Inst <"v_rsq_legacy_f32", VOP_F32_F32, AMDGPUrsq_legacy>;
+} // End SchedRW = [WriteQuarterRate32]
+
+let SchedRW = [WriteDouble] in {
+defm V_RCP_CLAMP_F64 : VOP1Inst <"v_rcp_clamp_f64", VOP_F64_F64>;
+defm V_RSQ_CLAMP_F64 : VOP1Inst <"v_rsq_clamp_f64", VOP_F64_F64, AMDGPUrsq_clamp>;
+} // End SchedRW = [WriteDouble]
+
+} // End SubtargetPredicate = isSICI
+
+
+let SubtargetPredicate = isCIVI in {
+
+let SchedRW = [WriteDoubleAdd] in {
+defm V_TRUNC_F64 : VOP1Inst <"v_trunc_f64", VOP_F64_F64, ftrunc>;
+defm V_CEIL_F64 : VOP1Inst <"v_ceil_f64", VOP_F64_F64, fceil>;
+defm V_FLOOR_F64 : VOP1Inst <"v_floor_f64", VOP_F64_F64, ffloor>;
+defm V_RNDNE_F64 : VOP1Inst <"v_rndne_f64", VOP_F64_F64, frint>;
+} // End SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteQuarterRate32] in {
+defm V_LOG_LEGACY_F32 : VOP1Inst <"v_log_legacy_f32", VOP_F32_F32>;
+defm V_EXP_LEGACY_F32 : VOP1Inst <"v_exp_legacy_f32", VOP_F32_F32>;
+} // End SchedRW = [WriteQuarterRate32]
+
+} // End SubtargetPredicate = isCIVI
+
+
+let SubtargetPredicate = isVI in {
+
+defm V_CVT_F16_U16 : VOP1Inst <"v_cvt_f16_u16", VOP_F16_I16, uint_to_fp>;
+defm V_CVT_F16_I16 : VOP1Inst <"v_cvt_f16_i16", VOP_F16_I16, sint_to_fp>;
+defm V_CVT_U16_F16 : VOP1Inst <"v_cvt_u16_f16", VOP_I16_F16, fp_to_uint>;
+defm V_CVT_I16_F16 : VOP1Inst <"v_cvt_i16_f16", VOP_I16_F16, fp_to_sint>;
+defm V_RCP_F16 : VOP1Inst <"v_rcp_f16", VOP_F16_F16, AMDGPUrcp>;
+defm V_SQRT_F16 : VOP1Inst <"v_sqrt_f16", VOP_F16_F16, fsqrt>;
+defm V_RSQ_F16 : VOP1Inst <"v_rsq_f16", VOP_F16_F16, AMDGPUrsq>;
+defm V_LOG_F16 : VOP1Inst <"v_log_f16", VOP_F16_F16, flog2>;
+defm V_EXP_F16 : VOP1Inst <"v_exp_f16", VOP_F16_F16, fexp2>;
+defm V_FREXP_MANT_F16 : VOP1Inst <"v_frexp_mant_f16", VOP_F16_F16, int_amdgcn_frexp_mant>;
+defm V_FREXP_EXP_I16_F16 : VOP1Inst <"v_frexp_exp_i16_f16", VOP_I16_F16, int_amdgcn_frexp_exp>;
+defm V_FLOOR_F16 : VOP1Inst <"v_floor_f16", VOP_F16_F16, ffloor>;
+defm V_CEIL_F16 : VOP1Inst <"v_ceil_f16", VOP_F16_F16, fceil>;
+defm V_TRUNC_F16 : VOP1Inst <"v_trunc_f16", VOP_F16_F16, ftrunc>;
+defm V_RNDNE_F16 : VOP1Inst <"v_rndne_f16", VOP_F16_F16, frint>;
+defm V_FRACT_F16 : VOP1Inst <"v_fract_f16", VOP_F16_F16, AMDGPUfract>;
+defm V_SIN_F16 : VOP1Inst <"v_sin_f16", VOP_F16_F16, AMDGPUsin>;
+defm V_COS_F16 : VOP1Inst <"v_cos_f16", VOP_F16_F16, AMDGPUcos>;
+
+}
+
+let Predicates = [isVI] in {
+
+def : Pat<
+    (f32 (f16_to_fp i16:$src)),
+    (V_CVT_F32_F16_e32 $src)
+>;
+
+def : Pat<
+    (i16 (fp_to_f16 f32:$src)),
+    (V_CVT_F16_F32_e32 $src)
+>;
+
+}
+
+//===----------------------------------------------------------------------===//
+// Target
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+multiclass VOP1_Real_si <bits<9> op> {
+  let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+    def _e32_si :
+      VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+      VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+    def _e64_si :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+      VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+}
+
+defm V_NOP               : VOP1_Real_si <0x0>;
+defm V_MOV_B32           : VOP1_Real_si <0x1>;
+defm V_CVT_I32_F64       : VOP1_Real_si <0x3>;
+defm V_CVT_F64_I32       : VOP1_Real_si <0x4>;
+defm V_CVT_F32_I32       : VOP1_Real_si <0x5>;
+defm V_CVT_F32_U32       : VOP1_Real_si <0x6>;
+defm V_CVT_U32_F32       : VOP1_Real_si <0x7>;
+defm V_CVT_I32_F32       : VOP1_Real_si <0x8>;
+defm V_MOV_FED_B32       : VOP1_Real_si <0x9>;
+defm V_CVT_F16_F32       : VOP1_Real_si <0xa>;
+defm V_CVT_F32_F16       : VOP1_Real_si <0xb>;
+defm V_CVT_RPI_I32_F32   : VOP1_Real_si <0xc>;
+defm V_CVT_FLR_I32_F32   : VOP1_Real_si <0xd>;
+defm V_CVT_OFF_F32_I4    : VOP1_Real_si <0xe>;
+defm V_CVT_F32_F64       : VOP1_Real_si <0xf>;
+defm V_CVT_F64_F32       : VOP1_Real_si <0x10>;
+defm V_CVT_F32_UBYTE0    : VOP1_Real_si <0x11>;
+defm V_CVT_F32_UBYTE1    : VOP1_Real_si <0x12>;
+defm V_CVT_F32_UBYTE2    : VOP1_Real_si <0x13>;
+defm V_CVT_F32_UBYTE3    : VOP1_Real_si <0x14>;
+defm V_CVT_U32_F64       : VOP1_Real_si <0x15>;
+defm V_CVT_F64_U32       : VOP1_Real_si <0x16>;
+defm V_FRACT_F32         : VOP1_Real_si <0x20>;
+defm V_TRUNC_F32         : VOP1_Real_si <0x21>;
+defm V_CEIL_F32          : VOP1_Real_si <0x22>;
+defm V_RNDNE_F32         : VOP1_Real_si <0x23>;
+defm V_FLOOR_F32         : VOP1_Real_si <0x24>;
+defm V_EXP_F32           : VOP1_Real_si <0x25>;
+defm V_LOG_CLAMP_F32     : VOP1_Real_si <0x26>;
+defm V_LOG_F32           : VOP1_Real_si <0x27>;
+defm V_RCP_CLAMP_F32     : VOP1_Real_si <0x28>;
+defm V_RCP_LEGACY_F32    : VOP1_Real_si <0x29>;
+defm V_RCP_F32           : VOP1_Real_si <0x2a>;
+defm V_RCP_IFLAG_F32     : VOP1_Real_si <0x2b>;
+defm V_RSQ_CLAMP_F32     : VOP1_Real_si <0x2c>;
+defm V_RSQ_LEGACY_F32    : VOP1_Real_si <0x2d>;
+defm V_RSQ_F32           : VOP1_Real_si <0x2e>;
+defm V_RCP_F64           : VOP1_Real_si <0x2f>;
+defm V_RCP_CLAMP_F64     : VOP1_Real_si <0x30>;
+defm V_RSQ_F64           : VOP1_Real_si <0x31>;
+defm V_RSQ_CLAMP_F64     : VOP1_Real_si <0x32>;
+defm V_SQRT_F32          : VOP1_Real_si <0x33>;
+defm V_SQRT_F64          : VOP1_Real_si <0x34>;
+defm V_SIN_F32           : VOP1_Real_si <0x35>;
+defm V_COS_F32           : VOP1_Real_si <0x36>;
+defm V_NOT_B32           : VOP1_Real_si <0x37>;
+defm V_BFREV_B32         : VOP1_Real_si <0x38>;
+defm V_FFBH_U32          : VOP1_Real_si <0x39>;
+defm V_FFBL_B32          : VOP1_Real_si <0x3a>;
+defm V_FFBH_I32          : VOP1_Real_si <0x3b>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_si <0x3c>;
+defm V_FREXP_MANT_F64    : VOP1_Real_si <0x3d>;
+defm V_FRACT_F64         : VOP1_Real_si <0x3e>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_si <0x3f>;
+defm V_FREXP_MANT_F32    : VOP1_Real_si <0x40>;
+defm V_CLREXCP           : VOP1_Real_si <0x41>;
+defm V_MOVRELD_B32       : VOP1_Real_si <0x42>;
+defm V_MOVRELS_B32       : VOP1_Real_si <0x43>;
+defm V_MOVRELSD_B32      : VOP1_Real_si <0x44>;
+
+//===----------------------------------------------------------------------===//
+// CI
+//===----------------------------------------------------------------------===//
+
+multiclass VOP1_Real_ci <bits<9> op> {
+  let AssemblerPredicates = [isCIOnly], DecoderNamespace = "CI" in {
+    def _e32_ci :
+      VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+      VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+    def _e64_ci :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+      VOP3e_si <{1, 1, op{6-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+}
+
+defm V_TRUNC_F64         : VOP1_Real_ci <0x17>;
+defm V_CEIL_F64          : VOP1_Real_ci <0x18>;
+defm V_FLOOR_F64         : VOP1_Real_ci <0x1A>;
+defm V_RNDNE_F64         : VOP1_Real_ci <0x19>;
+defm V_LOG_LEGACY_F32    : VOP1_Real_ci <0x45>;
+defm V_EXP_LEGACY_F32    : VOP1_Real_ci <0x46>;
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class VOP1_DPP <bits<8> op, VOP1_Pseudo ps, VOPProfile P = ps.Pfl> :
+  VOP_DPP <ps.OpName, P> {
+  let Defs = ps.Defs;
+  let Uses = ps.Uses;
+  let SchedRW = ps.SchedRW;
+  let hasSideEffects = ps.hasSideEffects;
+  let Constraints = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  bits<8> vdst;
+  let Inst{8-0}   = 0xfa; // dpp
+  let Inst{16-9}  = op;
+  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{31-25} = 0x3f; //encoding
+}
+
+multiclass VOP1_Real_vi <bits<10> op> {
+  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+    def _e32_vi :
+      VOP1_Real<!cast<VOP1_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
+      VOP1e<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32").Pfl>;
+    def _e64_vi :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+      VOP3e_vi <!add(0x140, op), !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+  }
+
+  def _sdwa_vi :
+    VOP_SDWA_Real <!cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa")>,
+    VOP1_SDWAe <op{7-0}, !cast<VOP1_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+
+  // For now left dpp only for asm/dasm
+  // TODO: add corresponding pseudo
+  def _dpp : VOP1_DPP<op{7-0}, !cast<VOP1_Pseudo>(NAME#"_e32")>;
+}
+
+defm V_NOP               : VOP1_Real_vi <0x0>;
+defm V_MOV_B32           : VOP1_Real_vi <0x1>;
+defm V_CVT_I32_F64       : VOP1_Real_vi <0x3>;
+defm V_CVT_F64_I32       : VOP1_Real_vi <0x4>;
+defm V_CVT_F32_I32       : VOP1_Real_vi <0x5>;
+defm V_CVT_F32_U32       : VOP1_Real_vi <0x6>;
+defm V_CVT_U32_F32       : VOP1_Real_vi <0x7>;
+defm V_CVT_I32_F32       : VOP1_Real_vi <0x8>;
+defm V_CVT_F16_F32       : VOP1_Real_vi <0xa>;
+defm V_CVT_F32_F16       : VOP1_Real_vi <0xb>;
+defm V_CVT_RPI_I32_F32   : VOP1_Real_vi <0xc>;
+defm V_CVT_FLR_I32_F32   : VOP1_Real_vi <0xd>;
+defm V_CVT_OFF_F32_I4    : VOP1_Real_vi <0xe>;
+defm V_CVT_F32_F64       : VOP1_Real_vi <0xf>;
+defm V_CVT_F64_F32       : VOP1_Real_vi <0x10>;
+defm V_CVT_F32_UBYTE0    : VOP1_Real_vi <0x11>;
+defm V_CVT_F32_UBYTE1    : VOP1_Real_vi <0x12>;
+defm V_CVT_F32_UBYTE2    : VOP1_Real_vi <0x13>;
+defm V_CVT_F32_UBYTE3    : VOP1_Real_vi <0x14>;
+defm V_CVT_U32_F64       : VOP1_Real_vi <0x15>;
+defm V_CVT_F64_U32       : VOP1_Real_vi <0x16>;
+defm V_FRACT_F32         : VOP1_Real_vi <0x1b>;
+defm V_TRUNC_F32         : VOP1_Real_vi <0x1c>;
+defm V_CEIL_F32          : VOP1_Real_vi <0x1d>;
+defm V_RNDNE_F32         : VOP1_Real_vi <0x1e>;
+defm V_FLOOR_F32         : VOP1_Real_vi <0x1f>;
+defm V_EXP_F32           : VOP1_Real_vi <0x20>;
+defm V_LOG_F32           : VOP1_Real_vi <0x21>;
+defm V_RCP_F32           : VOP1_Real_vi <0x22>;
+defm V_RCP_IFLAG_F32     : VOP1_Real_vi <0x23>;
+defm V_RSQ_F32           : VOP1_Real_vi <0x24>;
+defm V_RCP_F64           : VOP1_Real_vi <0x25>;
+defm V_RSQ_F64           : VOP1_Real_vi <0x26>;
+defm V_SQRT_F32          : VOP1_Real_vi <0x27>;
+defm V_SQRT_F64          : VOP1_Real_vi <0x28>;
+defm V_SIN_F32           : VOP1_Real_vi <0x29>;
+defm V_COS_F32           : VOP1_Real_vi <0x2a>;
+defm V_NOT_B32           : VOP1_Real_vi <0x2b>;
+defm V_BFREV_B32         : VOP1_Real_vi <0x2c>;
+defm V_FFBH_U32          : VOP1_Real_vi <0x2d>;
+defm V_FFBL_B32          : VOP1_Real_vi <0x2e>;
+defm V_FFBH_I32          : VOP1_Real_vi <0x2f>;
+defm V_FREXP_EXP_I32_F64 : VOP1_Real_vi <0x30>;
+defm V_FREXP_MANT_F64    : VOP1_Real_vi <0x31>;
+defm V_FRACT_F64         : VOP1_Real_vi <0x32>;
+defm V_FREXP_EXP_I32_F32 : VOP1_Real_vi <0x33>;
+defm V_FREXP_MANT_F32    : VOP1_Real_vi <0x34>;
+defm V_CLREXCP           : VOP1_Real_vi <0x35>;
+defm V_MOVRELD_B32       : VOP1_Real_vi <0x36>;
+defm V_MOVRELS_B32       : VOP1_Real_vi <0x37>;
+defm V_MOVRELSD_B32      : VOP1_Real_vi <0x38>;
+defm V_TRUNC_F64         : VOP1_Real_vi <0x17>;
+defm V_CEIL_F64          : VOP1_Real_vi <0x18>;
+defm V_FLOOR_F64         : VOP1_Real_vi <0x1A>;
+defm V_RNDNE_F64         : VOP1_Real_vi <0x19>;
+defm V_LOG_LEGACY_F32    : VOP1_Real_vi <0x4c>;
+defm V_EXP_LEGACY_F32    : VOP1_Real_vi <0x4b>;
+defm V_CVT_F16_U16       : VOP1_Real_vi <0x39>;
+defm V_CVT_F16_I16       : VOP1_Real_vi <0x3a>;
+defm V_CVT_U16_F16       : VOP1_Real_vi <0x3b>;
+defm V_CVT_I16_F16       : VOP1_Real_vi <0x3c>;
+defm V_RCP_F16           : VOP1_Real_vi <0x3d>;
+defm V_SQRT_F16          : VOP1_Real_vi <0x3e>;
+defm V_RSQ_F16           : VOP1_Real_vi <0x3f>;
+defm V_LOG_F16           : VOP1_Real_vi <0x40>;
+defm V_EXP_F16           : VOP1_Real_vi <0x41>;
+defm V_FREXP_MANT_F16    : VOP1_Real_vi <0x42>;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_vi <0x43>;
+defm V_FLOOR_F16         : VOP1_Real_vi <0x44>;
+defm V_CEIL_F16          : VOP1_Real_vi <0x45>;
+defm V_TRUNC_F16         : VOP1_Real_vi <0x46>;
+defm V_RNDNE_F16         : VOP1_Real_vi <0x47>;
+defm V_FRACT_F16         : VOP1_Real_vi <0x48>;
+defm V_SIN_F16           : VOP1_Real_vi <0x49>;
+defm V_COS_F16           : VOP1_Real_vi <0x4a>;
+
+
+// Copy of v_mov_b32 with $vdst as a use operand for use with VGPR
+// indexing mode. vdst can't be treated as a def for codegen purposes,
+// and an implicit use and def of the super register should be added.
+def V_MOV_B32_indirect : VPseudoInstSI<(outs),
+  (ins getVALUDstForVT<i32>.ret:$vdst, getVOPSrc0ForVT<i32>.ret:$src0)>,
+  PseudoInstExpansion<(V_MOV_B32_e32_vi getVALUDstForVT<i32>.ret:$vdst,
+                                        getVOPSrc0ForVT<i32>.ret:$src0)> {
+  let VOP1 = 1;
+  let SubtargetPredicate = isVI;
+}
+
+// This is a pseudo variant of the v_movreld_b32 instruction in which the
+// vector operand appears only twice, once as def and once as use. Using this
+// pseudo avoids problems with the Two Address instructions pass.
+class V_MOVRELD_B32_pseudo<RegisterClass rc> : VPseudoInstSI <
+  (outs rc:$vdst),
+  (ins rc:$vsrc, VSrc_b32:$val, i32imm:$offset)> {
+  let VOP1 = 1;
+
+  let Constraints = "$vsrc = $vdst";
+  let Uses = [M0, EXEC];
+
+  let SubtargetPredicate = HasMovrel;
+}
+
+def V_MOVRELD_B32_V1 : V_MOVRELD_B32_pseudo<VGPR_32>;
+def V_MOVRELD_B32_V2 : V_MOVRELD_B32_pseudo<VReg_64>;
+def V_MOVRELD_B32_V4 : V_MOVRELD_B32_pseudo<VReg_128>;
+def V_MOVRELD_B32_V8 : V_MOVRELD_B32_pseudo<VReg_256>;
+def V_MOVRELD_B32_V16 : V_MOVRELD_B32_pseudo<VReg_512>;
+
+let Predicates = [isVI] in {
+
+def : Pat <
+  (i32 (int_amdgcn_mov_dpp i32:$src, imm:$dpp_ctrl, imm:$row_mask, imm:$bank_mask,
+                      imm:$bound_ctrl)),
+  (V_MOV_B32_dpp $src, (as_i32imm $dpp_ctrl), (as_i32imm $row_mask),
+                       (as_i32imm $bank_mask), (as_i1imm $bound_ctrl))
+>;
+
+
+def : Pat<
+  (i32 (anyext i16:$src)),
+  (COPY $src)
+>;
+
+def : Pat<
+   (i64 (anyext i16:$src)),
+   (REG_SEQUENCE VReg_64,
+     (i32 (COPY $src)), sub0,
+     (V_MOV_B32_e32 (i32 0)), sub1)
+>;
+
+def : Pat<
+  (i16 (trunc i32:$src)),
+  (COPY $src)
+>;
+
+def : Pat<
+  (i1 (trunc i16:$src)),
+  (COPY $src)
+>;
+
+
+def : Pat <
+  (i16 (trunc i64:$src)),
+  (EXTRACT_SUBREG $src, sub0)
+>;
+
+} // End Predicates = [isVI]
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
new file mode 100644
index 000000000000..20fb7f7bcab7
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -0,0 +1,757 @@
+//===-- VOP2Instructions.td - Vector Instruction Defintions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP2 Classes
+//===----------------------------------------------------------------------===//
+
+class VOP2e <bits<6> op, VOPProfile P> : Enc32 {
+  bits<8> vdst;
+  bits<9> src0;
+  bits<8> src1;
+
+  let Inst{8-0}   = !if(P.HasSrc0, src0, 0);
+  let Inst{16-9}  = !if(P.HasSrc1, src1, 0);
+  let Inst{24-17} = !if(P.EmitDst, vdst, 0);
+  let Inst{30-25} = op;
+  let Inst{31}    = 0x0; //encoding
+}
+
+class VOP2_MADKe <bits<6> op, VOPProfile P> : Enc64 {
+  bits<8>  vdst;
+  bits<9>  src0;
+  bits<8>  src1;
+  bits<32> imm;
+
+  let Inst{8-0}   = !if(P.HasSrc0, src0, 0);
+  let Inst{16-9}  = !if(P.HasSrc1, src1, 0);
+  let Inst{24-17} = !if(P.EmitDst, vdst, 0);
+  let Inst{30-25} = op;
+  let Inst{31}    = 0x0; // encoding
+  let Inst{63-32} = imm;
+}
+
+class VOP2_SDWAe <bits<6> op, VOPProfile P> : VOP_SDWAe <P> {
+  bits<8> vdst;
+  bits<8> src1;
+  
+  let Inst{8-0}   = 0xf9; // sdwa
+  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
+  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{30-25} = op;
+  let Inst{31}    = 0x0; // encoding
+}
+
+class VOP2_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], string suffix = "_e32"> :
+  InstSI <P.Outs32, P.Ins32, "", pattern>,
+  VOP <opName>,
+  SIMCInstr <opName#suffix, SIEncodingFamily.NONE>,
+  MnemonicAlias<opName#suffix, opName> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = P.Asm32;
+
+  let Size = 4;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SubtargetPredicate = isGCN;
+
+  let VOP2 = 1;
+  let VALU = 1;
+  let Uses = [EXEC];
+
+  let AsmVariantName = AMDGPUAsmVariants.Default;
+
+  VOPProfile Pfl = P;
+}
+
+class VOP2_Real <VOP2_Pseudo ps, int EncodingFamily> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  let Constraints     = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let AsmVariantName     = ps.AsmVariantName;
+  let Constraints        = ps.Constraints;
+  let DisableEncoding    = ps.DisableEncoding;
+  let TSFlags            = ps.TSFlags;
+}
+
+class VOP2_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+  VOP_SDWA_Pseudo <OpName, P, pattern> {
+  let AsmMatchConverter = "cvtSdwaVOP2";
+}
+
+class getVOP2Pat64 <SDPatternOperator node, VOPProfile P> : LetDummies {
+  list<dag> ret = !if(P.HasModifiers,
+    [(set P.DstVT:$vdst,
+      (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+            (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))],
+    [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))]);
+}
+
+multiclass VOP2Inst <string opName,
+                     VOPProfile P,
+                     SDPatternOperator node = null_frag,
+                     string revOp = opName> {
+
+  def _e32 : VOP2_Pseudo <opName, P>,
+             Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+
+  def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+             Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+
+  def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
+              Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
+}
+
+// TODO: add SDWA pseudo instructions for VOP2bInst and VOP2eInst
+multiclass VOP2bInst <string opName,
+                      VOPProfile P,
+                      SDPatternOperator node = null_frag,
+                      string revOp = opName,
+                      bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
+
+  let SchedRW = [Write32Bit, WriteSALU] in {
+    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]), Defs = [VCC] in {
+      def _e32 : VOP2_Pseudo <opName, P>,
+                 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+      
+      def _sdwa : VOP2_SDWA_Pseudo <opName, P>,
+              Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)>;
+    }
+    def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+               Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+  }
+}
+
+multiclass VOP2eInst <string opName,
+                      VOPProfile P,
+                      SDPatternOperator node = null_frag,
+                      string revOp = opName,
+                      bit useSGPRInput = !eq(P.NumSrcArgs, 3)> {
+
+  let SchedRW = [Write32Bit] in {
+    let Uses = !if(useSGPRInput, [VCC, EXEC], [EXEC]) in {
+      def _e32 : VOP2_Pseudo <opName, P>,
+                 Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
+    }
+    def _e64 : VOP3_Pseudo <opName, P, getVOP2Pat64<node, P>.ret>,
+               Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
+  }
+}
+
+class VOP_MADAK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+  field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
+  field dag Ins32 = (ins VCSrc_f32:$src0, VGPR_32:$src1, ImmOpType:$imm);
+  field string Asm32 = "$vdst, $src0, $src1, $imm";
+  field bit HasExt = 0;
+}
+
+def VOP_MADAK_F16 : VOP_MADAK <f16>;
+def VOP_MADAK_F32 : VOP_MADAK <f32>;
+
+class VOP_MADMK <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+  field Operand ImmOpType = !if(!eq(vt.Size, 32), f32kimm, f16kimm);
+  field dag Ins32 = (ins VCSrc_f32:$src0, ImmOpType:$imm, VGPR_32:$src1);
+  field string Asm32 = "$vdst, $src0, $imm, $src1";
+  field bit HasExt = 0;
+}
+
+def VOP_MADMK_F16 : VOP_MADMK <f16>;
+def VOP_MADMK_F32 : VOP_MADMK <f32>;
+
+class VOP_MAC <ValueType vt> : VOPProfile <[vt, vt, vt, vt]> {
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1, VGPR_32:$src2);
+  let Ins64 = getIns64<Src0RC64, Src1RC64, RegisterOperand<VGPR_32>, 3,
+                       HasModifiers, Src0Mod, Src1Mod, Src2Mod>.ret;
+  let InsDPP = (ins FP32InputMods:$src0_modifiers, Src0DPP:$src0,
+                    FP32InputMods:$src1_modifiers, Src1DPP:$src1,
+                    VGPR_32:$src2, // stub argument
+                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let InsSDWA = (ins FP32InputMods:$src0_modifiers, Src0SDWA:$src0,
+                     FP32InputMods:$src1_modifiers, Src1SDWA:$src1,
+                     VGPR_32:$src2, // stub argument
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel);
+  let Asm32 = getAsm32<1, 2, vt>.ret;
+  let AsmDPP = getAsmDPP<1, 2, HasModifiers, vt>.ret;
+  let AsmSDWA = getAsmSDWA<1, 2, HasModifiers, vt>.ret;
+  let HasSrc2 = 0;
+  let HasSrc2Mods = 0;
+  let HasExt = 1;
+}
+
+def VOP_MAC_F16 : VOP_MAC <f16> {
+  // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
+  // 'not a string initializer' error.
+  let Asm64 = getAsm64<1, 2, HasModifiers, f16>.ret;
+}
+
+def VOP_MAC_F32 : VOP_MAC <f32> {
+  // FIXME: Move 'Asm64' definition to VOP_MAC, and use 'vt'. Currently it gives
+  // 'not a string initializer' error.
+  let Asm64 = getAsm64<1, 2, HasModifiers, f32>.ret;
+}
+
+// Write out to vcc or arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32 : VOPProfile<[i32, i32, i32, untyped]> {
+  let Asm32 = "$vdst, vcc, $src0, $src1";
+  let Asm64 = "$vdst, $sdst, $src0, $src1";
+  let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers$clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+  let AsmDPP = "$vdst, vcc, $src0, $src1 $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+  let Outs32 = (outs DstRC:$vdst);
+  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+}
+
+// Write out to vcc or arbitrary SGPR and read in from vcc or
+// arbitrary SGPR.
+def VOP2b_I32_I1_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+  // We use VCSrc_b32 to exclude literal constants, even though the
+  // encoding normally allows them since the implicit VCC use means
+  // using one would always violate the constant bus
+  // restriction. SGPRs are still allowed because it should
+  // technically be possible to use VCC again as src0.
+  let Src0RC32 = VCSrc_b32;
+  let Asm32 = "$vdst, vcc, $src0, $src1, vcc";
+  let Asm64 = "$vdst, $sdst, $src0, $src1, $src2";
+  let AsmSDWA = "$vdst, vcc, $src0_modifiers, $src1_modifiers, vcc $clamp $dst_sel $dst_unused $src0_sel $src1_sel";
+  let AsmDPP = "$vdst, vcc, $src0, $src1, vcc $dpp_ctrl$row_mask$bank_mask$bound_ctrl";
+  let Outs32 = (outs DstRC:$vdst);
+  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+
+  // Suppress src2 implied by type since the 32-bit encoding uses an
+  // implicit VCC use.
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+
+  let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0SDWA:$src0,
+                     Src1Mod:$src1_modifiers, Src1SDWA:$src1,
+                     clampmod:$clamp, dst_sel:$dst_sel, dst_unused:$dst_unused,
+                     src0_sel:$src0_sel, src1_sel:$src1_sel);
+
+  let InsDPP = (ins Src0Mod:$src0_modifiers, Src0DPP:$src0,
+                    Src1Mod:$src1_modifiers, Src1DPP:$src1,
+                    dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
+                    bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
+  let HasExt = 1;
+}
+
+// Read in from vcc or arbitrary SGPR
+def VOP2e_I32_I32_I32_I1 : VOPProfile<[i32, i32, i32, i1]> {
+  let Src0RC32 = VCSrc_b32; // See comment in def VOP2b_I32_I1_I32_I32_I1 above.
+  let Asm32 = "$vdst, $src0, $src1, vcc";
+  let Asm64 = "$vdst, $src0, $src1, $src2";
+  let Outs32 = (outs DstRC:$vdst);
+  let Outs64 = (outs DstRC:$vdst);
+
+  // Suppress src2 implied by type since the 32-bit encoding uses an
+  // implicit VCC use.
+  let Ins32 = (ins Src0RC32:$src0, Src1RC32:$src1);
+}
+
+def VOP_READLANE : VOPProfile<[i32, i32, i32]> {
+  let Outs32 = (outs SReg_32:$vdst);
+  let Outs64 = Outs32;
+  let Ins32 = (ins VGPR_32:$src0, SCSrc_b32:$src1);
+  let Ins64 = Ins32;
+  let Asm32 = " $vdst, $src0, $src1";
+  let Asm64 = Asm32;
+}
+
+def VOP_WRITELANE : VOPProfile<[i32, i32, i32]> {
+  let Outs32 = (outs VGPR_32:$vdst);
+  let Outs64 = Outs32;
+  let Ins32 = (ins SReg_32:$src0, SCSrc_b32:$src1);
+  let Ins64 = Ins32;
+  let Asm32 = " $vdst, $src0, $src1";
+  let Asm64 = Asm32;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP2 Instructions
+//===----------------------------------------------------------------------===//
+
+let SubtargetPredicate = isGCN in {
+
+defm V_CNDMASK_B32 : VOP2eInst <"v_cndmask_b32", VOP2e_I32_I32_I32_I1>;
+def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32>;
+
+let isCommutable = 1 in {
+defm V_ADD_F32 : VOP2Inst <"v_add_f32", VOP_F32_F32_F32, fadd>;
+defm V_SUB_F32 : VOP2Inst <"v_sub_f32", VOP_F32_F32_F32, fsub>;
+defm V_SUBREV_F32 : VOP2Inst <"v_subrev_f32", VOP_F32_F32_F32, null_frag, "v_sub_f32">;
+defm V_MUL_LEGACY_F32 : VOP2Inst <"v_mul_legacy_f32", VOP_F32_F32_F32, AMDGPUfmul_legacy>;
+defm V_MUL_F32 : VOP2Inst <"v_mul_f32", VOP_F32_F32_F32, fmul>;
+defm V_MUL_I32_I24 : VOP2Inst <"v_mul_i32_i24", VOP_I32_I32_I32, AMDGPUmul_i24>;
+defm V_MUL_HI_I32_I24 : VOP2Inst <"v_mul_hi_i32_i24", VOP_I32_I32_I32, AMDGPUmulhi_i24>;
+defm V_MUL_U32_U24 : VOP2Inst <"v_mul_u32_u24", VOP_I32_I32_I32, AMDGPUmul_u24>;
+defm V_MUL_HI_U32_U24 : VOP2Inst <"v_mul_hi_u32_u24", VOP_I32_I32_I32, AMDGPUmulhi_u24>;
+defm V_MIN_F32 : VOP2Inst <"v_min_f32", VOP_F32_F32_F32, fminnum>;
+defm V_MAX_F32 : VOP2Inst <"v_max_f32", VOP_F32_F32_F32, fmaxnum>;
+defm V_MIN_I32 : VOP2Inst <"v_min_i32", VOP_I32_I32_I32>;
+defm V_MAX_I32 : VOP2Inst <"v_max_i32", VOP_I32_I32_I32>;
+defm V_MIN_U32 : VOP2Inst <"v_min_u32", VOP_I32_I32_I32>;
+defm V_MAX_U32 : VOP2Inst <"v_max_u32", VOP_I32_I32_I32>;
+defm V_LSHRREV_B32 : VOP2Inst <"v_lshrrev_b32", VOP_I32_I32_I32, null_frag, "v_lshr_b32">;
+defm V_ASHRREV_I32 : VOP2Inst <"v_ashrrev_i32", VOP_I32_I32_I32, null_frag, "v_ashr_i32">;
+defm V_LSHLREV_B32 : VOP2Inst <"v_lshlrev_b32", VOP_I32_I32_I32, null_frag, "v_lshl_b32">;
+defm V_AND_B32 : VOP2Inst <"v_and_b32", VOP_I32_I32_I32>;
+defm V_OR_B32 : VOP2Inst <"v_or_b32", VOP_I32_I32_I32>;
+defm V_XOR_B32 : VOP2Inst <"v_xor_b32", VOP_I32_I32_I32>;
+
+let Constraints = "$vdst = $src2", DisableEncoding="$src2",
+    isConvertibleToThreeAddress = 1 in {
+defm V_MAC_F32 : VOP2Inst <"v_mac_f32", VOP_MAC_F32>;
+}
+
+def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32>;
+
+// No patterns so that the scalar instructions are always selected.
+// The scalar versions will be replaced with vector when needed later.
+
+// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
+// but the VI instructions behave the same as the SI versions.
+defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32>;
+defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32>;
+defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32">;
+defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1>;
+defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1>;
+defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32">;
+} // End isCommutable = 1
+
+// These are special and do not read the exec mask.
+let isConvergent = 1, Uses = []<Register> in {
+def V_READLANE_B32 : VOP2_Pseudo<"v_readlane_b32", VOP_READLANE,
+  [(set i32:$vdst, (int_amdgcn_readlane i32:$src0, i32:$src1))], "">;
+
+def V_WRITELANE_B32 : VOP2_Pseudo<"v_writelane_b32", VOP_WRITELANE, [], "">;
+} // End isConvergent = 1
+
+defm V_BFM_B32 : VOP2Inst <"v_bfm_b32", VOP_I32_I32_I32>;
+defm V_BCNT_U32_B32 : VOP2Inst <"v_bcnt_u32_b32", VOP_I32_I32_I32>;
+defm V_MBCNT_LO_U32_B32 : VOP2Inst <"v_mbcnt_lo_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_lo>;
+defm V_MBCNT_HI_U32_B32 : VOP2Inst <"v_mbcnt_hi_u32_b32", VOP_I32_I32_I32, int_amdgcn_mbcnt_hi>;
+defm V_LDEXP_F32 : VOP2Inst <"v_ldexp_f32", VOP_F32_F32_I32, AMDGPUldexp>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2Inst <"v_cvt_pkaccum_u8_f32", VOP_I32_F32_I32>; // TODO: set "Uses = dst"
+defm V_CVT_PKNORM_I16_F32 : VOP2Inst <"v_cvt_pknorm_i16_f32", VOP_I32_F32_F32>;
+defm V_CVT_PKNORM_U16_F32 : VOP2Inst <"v_cvt_pknorm_u16_f32", VOP_I32_F32_F32>;
+defm V_CVT_PKRTZ_F16_F32 : VOP2Inst <"v_cvt_pkrtz_f16_f32", VOP_I32_F32_F32, int_SI_packf16>;
+defm V_CVT_PK_U16_U32 : VOP2Inst <"v_cvt_pk_u16_u32", VOP_I32_I32_I32>;
+defm V_CVT_PK_I16_I32 : VOP2Inst <"v_cvt_pk_i16_i32", VOP_I32_I32_I32>;
+
+} // End SubtargetPredicate = isGCN
+
+
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+
+defm V_MIN_LEGACY_F32 : VOP2Inst <"v_min_legacy_f32", VOP_F32_F32_F32, AMDGPUfmin_legacy>;
+defm V_MAX_LEGACY_F32 : VOP2Inst <"v_max_legacy_f32", VOP_F32_F32_F32, AMDGPUfmax_legacy>;
+
+let isCommutable = 1 in {
+defm V_MAC_LEGACY_F32 : VOP2Inst <"v_mac_legacy_f32", VOP_F32_F32_F32>;
+defm V_LSHR_B32 : VOP2Inst <"v_lshr_b32", VOP_I32_I32_I32>;
+defm V_ASHR_I32 : VOP2Inst <"v_ashr_i32", VOP_I32_I32_I32>;
+defm V_LSHL_B32 : VOP2Inst <"v_lshl_b32", VOP_I32_I32_I32>;
+} // End isCommutable = 1
+
+} // End let SubtargetPredicate = SICI
+
+let SubtargetPredicate = isVI in {
+
+def V_MADMK_F16 : VOP2_Pseudo <"v_madmk_f16", VOP_MADMK_F16>;
+defm V_LSHLREV_B16 : VOP2Inst <"v_lshlrev_b16", VOP_I16_I16_I16>;
+defm V_LSHRREV_B16 : VOP2Inst <"v_lshrrev_b16", VOP_I16_I16_I16>;
+defm V_ASHRREV_I16 : VOP2Inst <"v_ashrrev_i16", VOP_I16_I16_I16>;
+defm V_LDEXP_F16 : VOP2Inst <"v_ldexp_f16", VOP_F16_F16_I32, AMDGPUldexp>;
+
+let isCommutable = 1 in {
+defm V_ADD_F16 : VOP2Inst <"v_add_f16", VOP_F16_F16_F16, fadd>;
+defm V_SUB_F16 : VOP2Inst <"v_sub_f16", VOP_F16_F16_F16, fsub>;
+defm V_SUBREV_F16 : VOP2Inst <"v_subrev_f16", VOP_F16_F16_F16, null_frag, "v_sub_f16">;
+defm V_MUL_F16 : VOP2Inst <"v_mul_f16", VOP_F16_F16_F16, fmul>;
+def V_MADAK_F16 : VOP2_Pseudo <"v_madak_f16", VOP_MADAK_F16>;
+defm V_ADD_U16 : VOP2Inst <"v_add_u16", VOP_I16_I16_I16>;
+defm V_SUB_U16 : VOP2Inst <"v_sub_u16" , VOP_I16_I16_I16>;
+defm V_SUBREV_U16 : VOP2Inst <"v_subrev_u16", VOP_I16_I16_I16, null_frag, "v_sub_u16">;
+defm V_MUL_LO_U16 : VOP2Inst <"v_mul_lo_u16", VOP_I16_I16_I16>;
+defm V_MAX_F16 : VOP2Inst <"v_max_f16", VOP_F16_F16_F16, fmaxnum>;
+defm V_MIN_F16 : VOP2Inst <"v_min_f16", VOP_F16_F16_F16, fminnum>;
+defm V_MAX_U16 : VOP2Inst <"v_max_u16", VOP_I16_I16_I16>;
+defm V_MAX_I16 : VOP2Inst <"v_max_i16", VOP_I16_I16_I16>;
+defm V_MIN_U16 : VOP2Inst <"v_min_u16", VOP_I16_I16_I16>;
+defm V_MIN_I16 : VOP2Inst <"v_min_i16", VOP_I16_I16_I16>;
+
+let Constraints = "$vdst = $src2", DisableEncoding="$src2",
+    isConvertibleToThreeAddress = 1 in {
+defm V_MAC_F16 : VOP2Inst <"v_mac_f16", VOP_MAC_F16>;
+}
+} // End isCommutable = 1
+
+} // End SubtargetPredicate = isVI
+
+// Note: 16-bit instructions produce a 0 result in the high 16-bits.
+multiclass Arithmetic_i16_Pats <SDPatternOperator op, Instruction inst> {
+
+def : Pat<
+  (op i16:$src0, i16:$src1),
+  (inst $src0, $src1)
+>;
+
+def : Pat<
+  (i32 (zext (op i16:$src0, i16:$src1))),
+  (inst $src0, $src1)
+>;
+
+def : Pat<
+  (i64 (zext (op i16:$src0, i16:$src1))),
+   (REG_SEQUENCE VReg_64,
+     (inst $src0, $src1), sub0,
+     (V_MOV_B32_e32 (i32 0)), sub1)
+>;
+
+}
+
+multiclass Bits_OpsRev_i16_Pats <SDPatternOperator op, Instruction inst> {
+
+def : Pat<
+  (op i16:$src0, i16:$src1),
+  (inst $src1, $src0)
+>;
+
+def : Pat<
+  (i32 (zext (op i16:$src0, i16:$src1))),
+  (inst $src1, $src0)
+>;
+
+
+def : Pat<
+  (i64 (zext (op i16:$src0, i16:$src1))),
+   (REG_SEQUENCE VReg_64,
+     (inst $src1, $src0), sub0,
+     (V_MOV_B32_e32 (i32 0)), sub1)
+>;
+}
+
+class ZExt_i16_i1_Pat <SDNode ext> : Pat <
+  (i16 (ext i1:$src)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)
+>;
+
+let Predicates = [isVI] in {
+
+defm : Arithmetic_i16_Pats<add, V_ADD_U16_e64>;
+defm : Arithmetic_i16_Pats<mul, V_MUL_LO_U16_e64>;
+defm : Arithmetic_i16_Pats<sub, V_SUB_U16_e64>;
+defm : Arithmetic_i16_Pats<smin, V_MIN_I16_e64>;
+defm : Arithmetic_i16_Pats<smax, V_MAX_I16_e64>;
+defm : Arithmetic_i16_Pats<umin, V_MIN_U16_e64>;
+defm : Arithmetic_i16_Pats<umax, V_MAX_U16_e64>;
+
+def : Pat <
+  (and i16:$src0, i16:$src1),
+  (V_AND_B32_e64 $src0, $src1)
+>;
+
+def : Pat <
+  (or i16:$src0, i16:$src1),
+  (V_OR_B32_e64 $src0, $src1)
+>;
+
+def : Pat <
+  (xor i16:$src0, i16:$src1),
+  (V_XOR_B32_e64 $src0, $src1)
+>;
+
+defm : Bits_OpsRev_i16_Pats<shl, V_LSHLREV_B16_e64>;
+defm : Bits_OpsRev_i16_Pats<srl, V_LSHRREV_B16_e64>;
+defm : Bits_OpsRev_i16_Pats<sra, V_ASHRREV_I16_e64>;
+
+def : ZExt_i16_i1_Pat<zext>;
+def : ZExt_i16_i1_Pat<anyext>;
+
+def : Pat <
+  (i16 (sext i1:$src)),
+  (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)
+>;
+
+} // End Predicates = [isVI]
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+
+multiclass VOP2_Real_si <bits<6> op> {
+  def _si :
+    VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_MADK_si <bits<6> op> {
+  def _si : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
+            VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_e32_si <bits<6> op> {
+  def _e32_si :
+    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+}
+
+multiclass VOP2_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
+  def _e64_si :
+    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+    VOP3e_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass VOP2be_Real_e32e64_si <bits<6> op> : VOP2_Real_e32_si<op> {
+  def _e64_si :
+    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+    VOP3be_si <{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
+
+defm V_CNDMASK_B32        : VOP2_Real_e32e64_si <0x0>;
+defm V_ADD_F32            : VOP2_Real_e32e64_si <0x3>;
+defm V_SUB_F32            : VOP2_Real_e32e64_si <0x4>;
+defm V_SUBREV_F32         : VOP2_Real_e32e64_si <0x5>;
+defm V_MUL_LEGACY_F32     : VOP2_Real_e32e64_si <0x7>;
+defm V_MUL_F32            : VOP2_Real_e32e64_si <0x8>;
+defm V_MUL_I32_I24        : VOP2_Real_e32e64_si <0x9>;
+defm V_MUL_HI_I32_I24     : VOP2_Real_e32e64_si <0xa>;
+defm V_MUL_U32_U24        : VOP2_Real_e32e64_si <0xb>;
+defm V_MUL_HI_U32_U24     : VOP2_Real_e32e64_si <0xc>;
+defm V_MIN_F32            : VOP2_Real_e32e64_si <0xf>;
+defm V_MAX_F32            : VOP2_Real_e32e64_si <0x10>;
+defm V_MIN_I32            : VOP2_Real_e32e64_si <0x11>;
+defm V_MAX_I32            : VOP2_Real_e32e64_si <0x12>;
+defm V_MIN_U32            : VOP2_Real_e32e64_si <0x13>;
+defm V_MAX_U32            : VOP2_Real_e32e64_si <0x14>;
+defm V_LSHRREV_B32        : VOP2_Real_e32e64_si <0x16>;
+defm V_ASHRREV_I32        : VOP2_Real_e32e64_si <0x18>;
+defm V_LSHLREV_B32        : VOP2_Real_e32e64_si <0x1a>;
+defm V_AND_B32            : VOP2_Real_e32e64_si <0x1b>;
+defm V_OR_B32             : VOP2_Real_e32e64_si <0x1c>;
+defm V_XOR_B32            : VOP2_Real_e32e64_si <0x1d>;
+defm V_MAC_F32            : VOP2_Real_e32e64_si <0x1f>;
+defm V_MADMK_F32          : VOP2_Real_MADK_si <0x20>;
+defm V_MADAK_F32          : VOP2_Real_MADK_si <0x21>;
+defm V_ADD_I32            : VOP2be_Real_e32e64_si <0x25>;
+defm V_SUB_I32            : VOP2be_Real_e32e64_si <0x26>;
+defm V_SUBREV_I32         : VOP2be_Real_e32e64_si <0x27>;
+defm V_ADDC_U32           : VOP2be_Real_e32e64_si <0x28>;
+defm V_SUBB_U32           : VOP2be_Real_e32e64_si <0x29>;
+defm V_SUBBREV_U32        : VOP2be_Real_e32e64_si <0x2a>;
+
+defm V_READLANE_B32       : VOP2_Real_si <0x01>;
+defm V_WRITELANE_B32      : VOP2_Real_si <0x02>;
+
+defm V_MAC_LEGACY_F32     : VOP2_Real_e32e64_si <0x6>;
+defm V_MIN_LEGACY_F32     : VOP2_Real_e32e64_si <0xd>;
+defm V_MAX_LEGACY_F32     : VOP2_Real_e32e64_si <0xe>;
+defm V_LSHR_B32           : VOP2_Real_e32e64_si <0x15>;
+defm V_ASHR_I32           : VOP2_Real_e32e64_si <0x17>;
+defm V_LSHL_B32           : VOP2_Real_e32e64_si <0x19>;
+
+defm V_BFM_B32            : VOP2_Real_e32e64_si <0x1e>;
+defm V_BCNT_U32_B32       : VOP2_Real_e32e64_si <0x22>;
+defm V_MBCNT_LO_U32_B32   : VOP2_Real_e32e64_si <0x23>;
+defm V_MBCNT_HI_U32_B32   : VOP2_Real_e32e64_si <0x24>;
+defm V_LDEXP_F32          : VOP2_Real_e32e64_si <0x2b>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e32e64_si <0x2c>;
+defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e32e64_si <0x2d>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e32e64_si <0x2e>;
+defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_e32e64_si <0x2f>;
+defm V_CVT_PK_U16_U32     : VOP2_Real_e32e64_si <0x30>;
+defm V_CVT_PK_I16_I32     : VOP2_Real_e32e64_si <0x31>;
+
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+class VOP2_DPP <bits<6> op, VOP2_Pseudo ps, VOPProfile P = ps.Pfl> :
+  VOP_DPP <ps.OpName, P> {
+  let Defs = ps.Defs;
+  let Uses = ps.Uses;
+  let SchedRW = ps.SchedRW;
+  let hasSideEffects = ps.hasSideEffects;
+  let Constraints = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  bits<8> vdst;
+  bits<8> src1;
+  let Inst{8-0}   = 0xfa; //dpp
+  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
+  let Inst{24-17} = !if(P.EmitDst, vdst{7-0}, 0);
+  let Inst{30-25} = op;
+  let Inst{31}    = 0x0; //encoding
+}
+
+let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+
+multiclass VOP32_Real_vi <bits<10> op> {
+  def _vi :
+    VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
+    VOP3e_vi<op, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_MADK_vi <bits<6> op> {
+  def _vi : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP2_Real_e32_vi <bits<6> op> {
+  def _e32_vi :
+    VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
+    VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+}
+
+multiclass VOP2_Real_e64_vi <bits<10> op> {
+  def _e64_vi :
+    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+    VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass Base_VOP2be_Real_e32e64_vi <bits<6> op> : VOP2_Real_e32_vi<op> {
+  def _e64_vi :
+    VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+    VOP3be_vi <{0, 1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+}
+
+multiclass Base_VOP2_Real_e32e64_vi <bits<6> op> :
+  VOP2_Real_e32_vi<op>,
+  VOP2_Real_e64_vi<{0, 1, 0, 0, op{5-0}}>;
+
+} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+ 
+multiclass VOP2_SDWA_Real <bits<6> op> {
+  def _sdwa_vi :
+    VOP_SDWA_Real <!cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa")>,
+    VOP2_SDWAe <op{5-0}, !cast<VOP2_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+}
+
+multiclass VOP2be_Real_e32e64_vi <bits<6> op> :
+  Base_VOP2be_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> {
+  // For now left dpp only for asm/dasm
+  // TODO: add corresponding pseudo
+  def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+}
+
+multiclass VOP2_Real_e32e64_vi <bits<6> op> :
+  Base_VOP2_Real_e32e64_vi<op>, VOP2_SDWA_Real<op> {
+  // For now left dpp only for asm/dasm
+  // TODO: add corresponding pseudo
+  def _dpp : VOP2_DPP<op, !cast<VOP2_Pseudo>(NAME#"_e32")>;
+}
+
+defm V_CNDMASK_B32        : Base_VOP2_Real_e32e64_vi <0x0>;
+defm V_ADD_F32            : VOP2_Real_e32e64_vi <0x1>;
+defm V_SUB_F32            : VOP2_Real_e32e64_vi <0x2>;
+defm V_SUBREV_F32         : VOP2_Real_e32e64_vi <0x3>;
+defm V_MUL_LEGACY_F32     : VOP2_Real_e32e64_vi <0x4>;
+defm V_MUL_F32            : VOP2_Real_e32e64_vi <0x5>;
+defm V_MUL_I32_I24        : VOP2_Real_e32e64_vi <0x6>;
+defm V_MUL_HI_I32_I24     : VOP2_Real_e32e64_vi <0x7>;
+defm V_MUL_U32_U24        : VOP2_Real_e32e64_vi <0x8>;
+defm V_MUL_HI_U32_U24     : VOP2_Real_e32e64_vi <0x9>;
+defm V_MIN_F32            : VOP2_Real_e32e64_vi <0xa>;
+defm V_MAX_F32            : VOP2_Real_e32e64_vi <0xb>;
+defm V_MIN_I32            : VOP2_Real_e32e64_vi <0xc>;
+defm V_MAX_I32            : VOP2_Real_e32e64_vi <0xd>;
+defm V_MIN_U32            : VOP2_Real_e32e64_vi <0xe>;
+defm V_MAX_U32            : VOP2_Real_e32e64_vi <0xf>;
+defm V_LSHRREV_B32        : VOP2_Real_e32e64_vi <0x10>;
+defm V_ASHRREV_I32        : VOP2_Real_e32e64_vi <0x11>;
+defm V_LSHLREV_B32        : VOP2_Real_e32e64_vi <0x12>;
+defm V_AND_B32            : VOP2_Real_e32e64_vi <0x13>;
+defm V_OR_B32             : VOP2_Real_e32e64_vi <0x14>;
+defm V_XOR_B32            : VOP2_Real_e32e64_vi <0x15>;
+defm V_MAC_F32            : VOP2_Real_e32e64_vi <0x16>;
+defm V_MADMK_F32          : VOP2_Real_MADK_vi <0x17>;
+defm V_MADAK_F32          : VOP2_Real_MADK_vi <0x18>;
+defm V_ADD_I32            : VOP2be_Real_e32e64_vi <0x19>;
+defm V_SUB_I32            : VOP2be_Real_e32e64_vi <0x1a>;
+defm V_SUBREV_I32         : VOP2be_Real_e32e64_vi <0x1b>;
+defm V_ADDC_U32           : VOP2be_Real_e32e64_vi <0x1c>;
+defm V_SUBB_U32           : VOP2be_Real_e32e64_vi <0x1d>;
+defm V_SUBBREV_U32        : VOP2be_Real_e32e64_vi <0x1e>;
+
+defm V_READLANE_B32       : VOP32_Real_vi <0x289>;
+defm V_WRITELANE_B32      : VOP32_Real_vi <0x28a>;
+
+defm V_BFM_B32            : VOP2_Real_e64_vi <0x293>;
+defm V_BCNT_U32_B32       : VOP2_Real_e64_vi <0x28b>;
+defm V_MBCNT_LO_U32_B32   : VOP2_Real_e64_vi <0x28c>;
+defm V_MBCNT_HI_U32_B32   : VOP2_Real_e64_vi <0x28d>;
+defm V_LDEXP_F32          : VOP2_Real_e64_vi <0x288>;
+defm V_CVT_PKACCUM_U8_F32 : VOP2_Real_e64_vi <0x1f0>;
+defm V_CVT_PKNORM_I16_F32 : VOP2_Real_e64_vi <0x294>;
+defm V_CVT_PKNORM_U16_F32 : VOP2_Real_e64_vi <0x295>;
+defm V_CVT_PKRTZ_F16_F32  : VOP2_Real_e64_vi <0x296>;
+defm V_CVT_PK_U16_U32     : VOP2_Real_e64_vi <0x297>;
+defm V_CVT_PK_I16_I32     : VOP2_Real_e64_vi <0x298>;
+
+defm V_ADD_F16            : VOP2_Real_e32e64_vi <0x1f>;
+defm V_SUB_F16            : VOP2_Real_e32e64_vi <0x20>;
+defm V_SUBREV_F16         : VOP2_Real_e32e64_vi <0x21>;
+defm V_MUL_F16            : VOP2_Real_e32e64_vi <0x22>;
+defm V_MAC_F16            : VOP2_Real_e32e64_vi <0x23>;
+defm V_MADMK_F16          : VOP2_Real_MADK_vi <0x24>;
+defm V_MADAK_F16          : VOP2_Real_MADK_vi <0x25>;
+defm V_ADD_U16            : VOP2_Real_e32e64_vi <0x26>;
+defm V_SUB_U16            : VOP2_Real_e32e64_vi <0x27>;
+defm V_SUBREV_U16         : VOP2_Real_e32e64_vi <0x28>;
+defm V_MUL_LO_U16         : VOP2_Real_e32e64_vi <0x29>;
+defm V_LSHLREV_B16        : VOP2_Real_e32e64_vi <0x2a>;
+defm V_LSHRREV_B16        : VOP2_Real_e32e64_vi <0x2b>;
+defm V_ASHRREV_I16        : VOP2_Real_e32e64_vi <0x2c>;
+defm V_MAX_F16            : VOP2_Real_e32e64_vi <0x2d>;
+defm V_MIN_F16            : VOP2_Real_e32e64_vi <0x2e>;
+defm V_MAX_U16            : VOP2_Real_e32e64_vi <0x2f>;
+defm V_MAX_I16            : VOP2_Real_e32e64_vi <0x30>;
+defm V_MIN_U16            : VOP2_Real_e32e64_vi <0x31>;
+defm V_MIN_I16            : VOP2_Real_e32e64_vi <0x32>;
+defm V_LDEXP_F16          : VOP2_Real_e32e64_vi <0x33>;
+
+let SubtargetPredicate = isVI in {
+
+// Aliases to simplify matching of floating-point instructions that
+// are VOP2 on SI and VOP3 on VI.
+class SI2_VI3Alias <string name, Instruction inst> : InstAlias <
+  name#" $dst, $src0, $src1",
+  (inst VGPR_32:$dst, 0, VCSrc_f32:$src0, 0, VCSrc_f32:$src1, 0, 0)
+>, PredicateControl {
+  let UseInstAsmMatchConverter = 0;
+  let AsmVariantName = AMDGPUAsmVariants.VOP3;
+}
+
+def : SI2_VI3Alias <"v_ldexp_f32", V_LDEXP_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkaccum_u8_f32", V_CVT_PKACCUM_U8_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_i16_f32", V_CVT_PKNORM_I16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pknorm_u16_f32", V_CVT_PKNORM_U16_F32_e64_vi>;
+def : SI2_VI3Alias <"v_cvt_pkrtz_f16_f32", V_CVT_PKRTZ_F16_F32_e64_vi>;
+
+} // End SubtargetPredicate = isVI
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
new file mode 100644
index 000000000000..5efa64d25ce1
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -0,0 +1,447 @@
+//===-- VOP3Instructions.td - Vector Instruction Defintions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// VOP3 Classes
+//===----------------------------------------------------------------------===//
+
+class getVOP3ModPat<VOPProfile P, SDPatternOperator node> {
+  list<dag> ret3 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+          (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+          (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers))))];
+
+  list<dag> ret2 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+          (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers))))];
+
+  list<dag> ret1 = [(set P.DstVT:$vdst,
+    (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod))))];
+
+  list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+                  !if(!eq(P.NumSrcArgs, 2), ret2,
+                  ret1));
+}
+
+class getVOP3Pat<VOPProfile P, SDPatternOperator node> {
+  list<dag> ret3 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1, P.Src2VT:$src2))];
+  list<dag> ret2 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0, P.Src1VT:$src1))];
+  list<dag> ret1 = [(set P.DstVT:$vdst, (node P.Src0VT:$src0))];
+  list<dag> ret = !if(!eq(P.NumSrcArgs, 3), ret3,
+                  !if(!eq(P.NumSrcArgs, 2), ret2,
+                  ret1));
+}
+
+class VOP3Inst<string OpName, VOPProfile P, SDPatternOperator node = null_frag, bit VOP3Only = 0> :
+  VOP3_Pseudo<OpName, P,
+    !if(P.HasModifiers, getVOP3ModPat<P, node>.ret, getVOP3Pat<P, node>.ret),
+    VOP3Only>;
+
+// Special case for v_div_fmas_{f32|f64}, since it seems to be the
+// only VOP instruction that implicitly reads VCC.
+let Asm64 = " $vdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod" in {
+def VOP_F32_F32_F32_F32_VCC : VOPProfile<[f32, f32, f32, f32]> {
+  let Outs64 = (outs DstRC.RegClass:$vdst);
+}
+def VOP_F64_F64_F64_F64_VCC : VOPProfile<[f64, f64, f64, f64]> {
+  let Outs64 = (outs DstRC.RegClass:$vdst);
+}
+}
+
+class getVOP3VCC<VOPProfile P, SDPatternOperator node> {
+  list<dag> ret =
+    [(set P.DstVT:$vdst,
+      (node (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers, i1:$clamp, i32:$omod)),
+            (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+            (P.Src2VT (VOP3Mods P.Src2VT:$src2, i32:$src2_modifiers)),
+            (i1 VCC)))];
+}
+
+class VOP3_Profile<VOPProfile P> : VOPProfile<P.ArgVT> {
+  // FIXME: Hack to stop printing _e64
+  let Outs64 = (outs DstRC.RegClass:$vdst);
+  let Asm64 = " " # P.Asm64;
+}
+
+class VOP3b_Profile<ValueType vt> : VOPProfile<[vt, vt, vt, vt]> {
+  let Outs64 = (outs DstRC:$vdst, SReg_64:$sdst);
+  let Asm64 = " $vdst, $sdst, $src0_modifiers, $src1_modifiers, $src2_modifiers$clamp$omod";
+}
+
+def VOP3b_F32_I1_F32_F32_F32 : VOP3b_Profile<f32> {
+  // FIXME: Hack to stop printing _e64
+  let DstRC = RegisterOperand<VGPR_32>;
+}
+
+def VOP3b_F64_I1_F64_F64_F64 : VOP3b_Profile<f64> {
+  // FIXME: Hack to stop printing _e64
+  let DstRC = RegisterOperand<VReg_64>;
+}
+
+//===----------------------------------------------------------------------===//
+// VOP3 Instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1 in {
+
+def V_MAD_LEGACY_F32 : VOP3Inst <"v_mad_legacy_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+def V_MAD_F32 : VOP3Inst <"v_mad_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fmad>;
+def V_MAD_I32_I24 : VOP3Inst <"v_mad_i32_i24", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUmad_i24>;
+def V_MAD_U32_U24 : VOP3Inst <"v_mad_u32_u24", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUmad_u24>;
+def V_FMA_F32 : VOP3Inst <"v_fma_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, fma>;
+def V_FMA_F64 : VOP3Inst <"v_fma_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, fma>;
+def V_LERP_U8 : VOP3Inst <"v_lerp_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_lerp>;
+
+let SchedRW = [WriteDoubleAdd] in {
+def V_ADD_F64 : VOP3Inst <"v_add_f64", VOP3_Profile<VOP_F64_F64_F64>, fadd, 1>;
+def V_MUL_F64 : VOP3Inst <"v_mul_f64", VOP3_Profile<VOP_F64_F64_F64>, fmul, 1>;
+def V_MIN_F64 : VOP3Inst <"v_min_f64", VOP3_Profile<VOP_F64_F64_F64>, fminnum, 1>;
+def V_MAX_F64 : VOP3Inst <"v_max_f64", VOP3_Profile<VOP_F64_F64_F64>, fmaxnum, 1>;
+} // End SchedRW = [WriteDoubleAdd]
+
+let SchedRW = [WriteQuarterRate32] in {
+def V_MUL_LO_U32 : VOP3Inst <"v_mul_lo_u32", VOP3_Profile<VOP_I32_I32_I32>>;
+def V_MUL_HI_U32 : VOP3Inst <"v_mul_hi_u32", VOP3_Profile<VOP_I32_I32_I32>, mulhu>;
+def V_MUL_LO_I32 : VOP3Inst <"v_mul_lo_i32", VOP3_Profile<VOP_I32_I32_I32>>;
+def V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", VOP3_Profile<VOP_I32_I32_I32>, mulhs>;
+} // End SchedRW = [WriteQuarterRate32]
+
+let Uses = [VCC, EXEC] in {
+// v_div_fmas_f32:
+//   result = src0 * src1 + src2
+//   if (vcc)
+//     result *= 2^32
+//
+def V_DIV_FMAS_F32 : VOP3_Pseudo <"v_div_fmas_f32", VOP_F32_F32_F32_F32_VCC,
+  getVOP3VCC<VOP_F32_F32_F32_F32_VCC, AMDGPUdiv_fmas>.ret> {
+  let SchedRW = [WriteFloatFMA];
+}
+// v_div_fmas_f64:
+//   result = src0 * src1 + src2
+//   if (vcc)
+//     result *= 2^64
+//
+def V_DIV_FMAS_F64 : VOP3_Pseudo <"v_div_fmas_f64", VOP_F64_F64_F64_F64_VCC,
+  getVOP3VCC<VOP_F64_F64_F64_F64_VCC, AMDGPUdiv_fmas>.ret> {
+  let SchedRW = [WriteDouble];
+}
+} // End Uses = [VCC, EXEC]
+
+} // End isCommutable = 1
+
+def V_CUBEID_F32 : VOP3Inst <"v_cubeid_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubeid>;
+def V_CUBESC_F32 : VOP3Inst <"v_cubesc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubesc>;
+def V_CUBETC_F32 : VOP3Inst <"v_cubetc_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubetc>;
+def V_CUBEMA_F32 : VOP3Inst <"v_cubema_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, int_amdgcn_cubema>;
+def V_BFE_U32 : VOP3Inst <"v_bfe_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_u32>;
+def V_BFE_I32 : VOP3Inst <"v_bfe_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfe_i32>;
+def V_BFI_B32 : VOP3Inst <"v_bfi_b32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUbfi>;
+def V_ALIGNBIT_B32 : VOP3Inst <"v_alignbit_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_ALIGNBYTE_B32 : VOP3Inst <"v_alignbyte_b32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_MIN3_F32 : VOP3Inst <"v_min3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmin3>;
+def V_MIN3_I32 : VOP3Inst <"v_min3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmin3>;
+def V_MIN3_U32 : VOP3Inst <"v_min3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumin3>;
+def V_MAX3_F32 : VOP3Inst <"v_max3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmax3>;
+def V_MAX3_I32 : VOP3Inst <"v_max3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmax3>;
+def V_MAX3_U32 : VOP3Inst <"v_max3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumax3>;
+def V_MED3_F32 : VOP3Inst <"v_med3_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUfmed3>;
+def V_MED3_I32 : VOP3Inst <"v_med3_i32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUsmed3>;
+def V_MED3_U32 : VOP3Inst <"v_med3_u32", VOP3_Profile<VOP_I32_I32_I32_I32>, AMDGPUumed3>;
+def V_SAD_U8 : VOP3Inst <"v_sad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_u8>;
+def V_SAD_HI_U8 : VOP3Inst <"v_sad_hi_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_hi_u8>;
+def V_SAD_U16 : VOP3Inst <"v_sad_u16", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_sad_u16>;
+def V_SAD_U32 : VOP3Inst <"v_sad_u32", VOP3_Profile<VOP_I32_I32_I32_I32>>;
+def V_CVT_PK_U8_F32 : VOP3Inst<"v_cvt_pk_u8_f32", VOP3_Profile<VOP_I32_F32_I32_I32>, int_amdgcn_cvt_pk_u8_f32>;
+def V_DIV_FIXUP_F32 : VOP3Inst <"v_div_fixup_f32", VOP3_Profile<VOP_F32_F32_F32_F32>, AMDGPUdiv_fixup>;
+
+let SchedRW = [WriteDoubleAdd] in {
+def V_DIV_FIXUP_F64 : VOP3Inst <"v_div_fixup_f64", VOP3_Profile<VOP_F64_F64_F64_F64>, AMDGPUdiv_fixup>;
+def V_LDEXP_F64 : VOP3Inst <"v_ldexp_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUldexp, 1>;
+} // End SchedRW = [WriteDoubleAdd]
+
+def V_DIV_SCALE_F32 : VOP3_Pseudo <"v_div_scale_f32", VOP3b_F32_I1_F32_F32_F32, [], 1> {
+  let SchedRW = [WriteFloatFMA, WriteSALU];
+  let hasExtraSrcRegAllocReq = 1;
+}
+
+// Double precision division pre-scale.
+def V_DIV_SCALE_F64 : VOP3_Pseudo <"v_div_scale_f64", VOP3b_F64_I1_F64_F64_F64, [], 1> {
+  let SchedRW = [WriteDouble, WriteSALU];
+  let hasExtraSrcRegAllocReq = 1;
+}
+
+def V_MSAD_U8 : VOP3Inst <"v_msad_u8", VOP3_Profile<VOP_I32_I32_I32_I32>, int_amdgcn_msad_u8>;
+def V_MQSAD_PK_U16_U8 : VOP3Inst <"v_mqsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_mqsad_pk_u16_u8>;
+
+def V_TRIG_PREOP_F64 : VOP3Inst <"v_trig_preop_f64", VOP3_Profile<VOP_F64_F64_I32>, AMDGPUtrig_preop> {
+  let SchedRW = [WriteDouble];
+}
+
+// These instructions only exist on SI and CI
+let SubtargetPredicate = isSICI in {
+def V_LSHL_B64 : VOP3Inst <"v_lshl_b64", VOP3_Profile<VOP_I64_I64_I32>>;
+def V_LSHR_B64 : VOP3Inst <"v_lshr_b64", VOP3_Profile<VOP_I64_I64_I32>>;
+def V_ASHR_I64 : VOP3Inst <"v_ashr_i64", VOP3_Profile<VOP_I64_I64_I32>>;
+def V_MULLIT_F32 : VOP3Inst <"v_mullit_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
+} // End SubtargetPredicate = isSICI
+
+let SubtargetPredicate = isVI in {
+def V_LSHLREV_B64 : VOP3Inst <"v_lshlrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
+def V_LSHRREV_B64 : VOP3Inst <"v_lshrrev_b64", VOP3_Profile<VOP_I64_I32_I64>>;
+def V_ASHRREV_I64 : VOP3Inst <"v_ashrrev_i64", VOP3_Profile<VOP_I64_I32_I64>>;
+} // End SubtargetPredicate = isVI
+
+
+let SubtargetPredicate = isCIVI in {
+
+def V_MQSAD_U16_U8 : VOP3Inst <"v_mqsad_u16_u8", VOP3_Profile<VOP_I32_I32_I32>>;
+def V_QSAD_PK_U16_U8 : VOP3Inst <"v_qsad_pk_u16_u8", VOP3_Profile<VOP_I64_I64_I32_I64>, int_amdgcn_qsad_pk_u16_u8>;
+def V_MQSAD_U32_U8 : VOP3Inst <"v_mqsad_u32_u8", VOP3_Profile<VOP_V4I32_I64_I32_V4I32>, int_amdgcn_mqsad_u32_u8>;
+
+let isCommutable = 1 in {
+def V_MAD_U64_U32 : VOP3Inst <"v_mad_u64_u32", VOP3_Profile<VOP_I64_I32_I32_I64>>;
+
+// XXX - Does this set VCC?
+def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3_Profile<VOP_I64_I32_I32_I64>>;
+} // End isCommutable = 1
+
+} // End SubtargetPredicate = isCIVI
+
+
+let SubtargetPredicate = isVI in {
+
+let isCommutable = 1 in {
+
+def V_DIV_FIXUP_F16   : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
+def V_FMA_F16         : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
+def V_INTERP_P1LL_F16 : VOP3Inst <"v_interp_p1ll_f16", VOP3_Profile<VOP_F32_F32_F16>>;
+def V_INTERP_P1LV_F16 : VOP3Inst <"v_interp_p1lv_f16", VOP3_Profile<VOP_F32_F32_F16_F16>>;
+def V_INTERP_P2_F16   : VOP3Inst <"v_interp_p2_f16", VOP3_Profile<VOP_F16_F32_F16_F32>>;
+def V_MAD_F16         : VOP3Inst <"v_mad_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fmad>;
+
+def V_MAD_U16 : VOP3Inst <"v_mad_u16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
+def V_MAD_I16 : VOP3Inst <"v_mad_i16", VOP3_Profile<VOP_I16_I16_I16_I16>>;
+
+}  // End isCommutable = 1
+
+} // End SubtargetPredicate = isVI
+
+let Predicates = [isVI] in {
+
+multiclass Tenary_i16_Pats <SDPatternOperator op1, SDPatternOperator op2,
+                            Instruction inst, SDPatternOperator op3> {
+def : Pat<
+  (op2 (op1 i16:$src0, i16:$src1), i16:$src2),
+  (inst i16:$src0, i16:$src1, i16:$src2)
+>;
+
+def : Pat<
+  (i32 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
+  (inst i16:$src0, i16:$src1, i16:$src2)
+>;
+
+def : Pat<
+  (i64 (op3 (op2 (op1 i16:$src0, i16:$src1), i16:$src2))),
+   (REG_SEQUENCE VReg_64,
+     (inst i16:$src0, i16:$src1, i16:$src2), sub0,
+     (V_MOV_B32_e32 (i32 0)), sub1)
+>;
+}
+
+defm: Tenary_i16_Pats<mul, add, V_MAD_U16, zext>;
+defm: Tenary_i16_Pats<mul, add, V_MAD_I16, sext>;
+
+} // End Predicates = [isVI]
+
+
+//===----------------------------------------------------------------------===//
+// Target
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+
+multiclass VOP3_Real_si<bits<9> op> {
+  def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+            VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP3be_Real_si<bits<9> op> {
+  def _si : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+            VOP3be_si <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+}
+
+} // End AssemblerPredicates = [isSICI], DecoderNamespace = "SICI"
+
+defm V_MAD_LEGACY_F32   : VOP3_Real_si <0x140>;
+defm V_MAD_F32          : VOP3_Real_si <0x141>;
+defm V_MAD_I32_I24      : VOP3_Real_si <0x142>;
+defm V_MAD_U32_U24      : VOP3_Real_si <0x143>;
+defm V_CUBEID_F32       : VOP3_Real_si <0x144>;
+defm V_CUBESC_F32       : VOP3_Real_si <0x145>;
+defm V_CUBETC_F32       : VOP3_Real_si <0x146>;
+defm V_CUBEMA_F32       : VOP3_Real_si <0x147>;
+defm V_BFE_U32          : VOP3_Real_si <0x148>;
+defm V_BFE_I32          : VOP3_Real_si <0x149>;
+defm V_BFI_B32          : VOP3_Real_si <0x14a>;
+defm V_FMA_F32          : VOP3_Real_si <0x14b>;
+defm V_FMA_F64          : VOP3_Real_si <0x14c>;
+defm V_LERP_U8          : VOP3_Real_si <0x14d>;
+defm V_ALIGNBIT_B32     : VOP3_Real_si <0x14e>;
+defm V_ALIGNBYTE_B32    : VOP3_Real_si <0x14f>;
+defm V_MULLIT_F32       : VOP3_Real_si <0x150>;
+defm V_MIN3_F32         : VOP3_Real_si <0x151>;
+defm V_MIN3_I32         : VOP3_Real_si <0x152>;
+defm V_MIN3_U32         : VOP3_Real_si <0x153>;
+defm V_MAX3_F32         : VOP3_Real_si <0x154>;
+defm V_MAX3_I32         : VOP3_Real_si <0x155>;
+defm V_MAX3_U32         : VOP3_Real_si <0x156>;
+defm V_MED3_F32         : VOP3_Real_si <0x157>;
+defm V_MED3_I32         : VOP3_Real_si <0x158>;
+defm V_MED3_U32         : VOP3_Real_si <0x159>;
+defm V_SAD_U8           : VOP3_Real_si <0x15a>;
+defm V_SAD_HI_U8        : VOP3_Real_si <0x15b>;
+defm V_SAD_U16          : VOP3_Real_si <0x15c>;
+defm V_SAD_U32          : VOP3_Real_si <0x15d>;
+defm V_CVT_PK_U8_F32    : VOP3_Real_si <0x15e>;
+defm V_DIV_FIXUP_F32    : VOP3_Real_si <0x15f>;
+defm V_DIV_FIXUP_F64    : VOP3_Real_si <0x160>;
+defm V_LSHL_B64         : VOP3_Real_si <0x161>;
+defm V_LSHR_B64         : VOP3_Real_si <0x162>;
+defm V_ASHR_I64         : VOP3_Real_si <0x163>;
+defm V_ADD_F64          : VOP3_Real_si <0x164>;
+defm V_MUL_F64          : VOP3_Real_si <0x165>;
+defm V_MIN_F64          : VOP3_Real_si <0x166>;
+defm V_MAX_F64          : VOP3_Real_si <0x167>;
+defm V_LDEXP_F64        : VOP3_Real_si <0x168>;
+defm V_MUL_LO_U32       : VOP3_Real_si <0x169>;
+defm V_MUL_HI_U32       : VOP3_Real_si <0x16a>;
+defm V_MUL_LO_I32       : VOP3_Real_si <0x16b>;
+defm V_MUL_HI_I32       : VOP3_Real_si <0x16c>;
+defm V_DIV_SCALE_F32    : VOP3be_Real_si <0x16d>;
+defm V_DIV_SCALE_F64    : VOP3be_Real_si <0x16e>;
+defm V_DIV_FMAS_F32     : VOP3_Real_si <0x16f>;
+defm V_DIV_FMAS_F64     : VOP3_Real_si <0x170>;
+defm V_MSAD_U8          : VOP3_Real_si <0x171>;
+defm V_MQSAD_PK_U16_U8  : VOP3_Real_si <0x173>;
+defm V_TRIG_PREOP_F64   : VOP3_Real_si <0x174>;
+
+//===----------------------------------------------------------------------===//
+// CI
+//===----------------------------------------------------------------------===//
+
+multiclass VOP3_Real_ci<bits<9> op> {
+  def _ci : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.SI>,
+            VOP3e_si <op, !cast<VOP3_Pseudo>(NAME).Pfl> {
+    let AssemblerPredicates = [isCIOnly];
+    let DecoderNamespace = "CI";
+  }
+}
+
+defm V_MQSAD_U16_U8     : VOP3_Real_ci <0x172>;
+defm V_QSAD_PK_U16_U8   : VOP3_Real_ci <0x172>;
+defm V_MQSAD_U32_U8     : VOP3_Real_ci <0x174>;
+defm V_MAD_U64_U32      : VOP3_Real_ci <0x176>;
+defm V_MAD_I64_I32      : VOP3_Real_ci <0x177>;
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+
+multiclass VOP3_Real_vi<bits<10> op> {
+  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3e_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+}
+
+multiclass VOP3be_Real_vi<bits<10> op> {
+  def _vi : VOP3_Real<!cast<VOP3_Pseudo>(NAME), SIEncodingFamily.VI>,
+            VOP3be_vi <op, !cast<VOP3_Pseudo>(NAME).Pfl>;
+}
+
+} // End AssemblerPredicates = [isVI], DecoderNamespace = "VI"
+
+defm V_MQSAD_U16_U8     : VOP3_Real_vi <0x172>;
+defm V_MAD_U64_U32      : VOP3_Real_vi <0x176>;
+defm V_MAD_I64_I32      : VOP3_Real_vi <0x177>;
+
+defm V_MAD_LEGACY_F32   : VOP3_Real_vi <0x1c0>;
+defm V_MAD_F32          : VOP3_Real_vi <0x1c1>;
+defm V_MAD_I32_I24      : VOP3_Real_vi <0x1c2>;
+defm V_MAD_U32_U24      : VOP3_Real_vi <0x1c3>;
+defm V_CUBEID_F32       : VOP3_Real_vi <0x1c4>;
+defm V_CUBESC_F32       : VOP3_Real_vi <0x1c5>;
+defm V_CUBETC_F32       : VOP3_Real_vi <0x1c6>;
+defm V_CUBEMA_F32       : VOP3_Real_vi <0x1c7>;
+defm V_BFE_U32          : VOP3_Real_vi <0x1c8>;
+defm V_BFE_I32          : VOP3_Real_vi <0x1c9>;
+defm V_BFI_B32          : VOP3_Real_vi <0x1ca>;
+defm V_FMA_F32          : VOP3_Real_vi <0x1cb>;
+defm V_FMA_F64          : VOP3_Real_vi <0x1cc>;
+defm V_LERP_U8          : VOP3_Real_vi <0x1cd>;
+defm V_ALIGNBIT_B32     : VOP3_Real_vi <0x1ce>;
+defm V_ALIGNBYTE_B32    : VOP3_Real_vi <0x1cf>;
+defm V_MIN3_F32         : VOP3_Real_vi <0x1d0>;
+defm V_MIN3_I32         : VOP3_Real_vi <0x1d1>;
+defm V_MIN3_U32         : VOP3_Real_vi <0x1d2>;
+defm V_MAX3_F32         : VOP3_Real_vi <0x1d3>;
+defm V_MAX3_I32         : VOP3_Real_vi <0x1d4>;
+defm V_MAX3_U32         : VOP3_Real_vi <0x1d5>;
+defm V_MED3_F32         : VOP3_Real_vi <0x1d6>;
+defm V_MED3_I32         : VOP3_Real_vi <0x1d7>;
+defm V_MED3_U32         : VOP3_Real_vi <0x1d8>;
+defm V_SAD_U8           : VOP3_Real_vi <0x1d9>;
+defm V_SAD_HI_U8        : VOP3_Real_vi <0x1da>;
+defm V_SAD_U16          : VOP3_Real_vi <0x1db>;
+defm V_SAD_U32          : VOP3_Real_vi <0x1dc>;
+defm V_CVT_PK_U8_F32    : VOP3_Real_vi <0x1dd>;
+defm V_DIV_FIXUP_F32    : VOP3_Real_vi <0x1de>;
+defm V_DIV_FIXUP_F64    : VOP3_Real_vi <0x1df>;
+defm V_DIV_SCALE_F32    : VOP3be_Real_vi <0x1e0>;
+defm V_DIV_SCALE_F64    : VOP3be_Real_vi <0x1e1>;
+defm V_DIV_FMAS_F32     : VOP3_Real_vi <0x1e2>;
+defm V_DIV_FMAS_F64     : VOP3_Real_vi <0x1e3>;
+defm V_MSAD_U8          : VOP3_Real_vi <0x1e4>;
+defm V_QSAD_PK_U16_U8   : VOP3_Real_vi <0x1e5>;
+defm V_MQSAD_PK_U16_U8  : VOP3_Real_vi <0x1e6>;
+defm V_MQSAD_U32_U8     : VOP3_Real_vi <0x1e7>;
+
+defm V_MAD_F16          : VOP3_Real_vi <0x1ea>;
+defm V_MAD_U16          : VOP3_Real_vi <0x1eb>;
+defm V_MAD_I16          : VOP3_Real_vi <0x1ec>;
+
+defm V_FMA_F16          : VOP3_Real_vi <0x1ee>;
+defm V_DIV_FIXUP_F16    : VOP3_Real_vi <0x1ef>;
+
+defm V_INTERP_P1LL_F16  : VOP3_Real_vi <0x274>;
+defm V_INTERP_P1LV_F16  : VOP3_Real_vi <0x275>;
+defm V_INTERP_P2_F16    : VOP3_Real_vi <0x276>;
+defm V_ADD_F64          : VOP3_Real_vi <0x280>;
+defm V_MUL_F64          : VOP3_Real_vi <0x281>;
+defm V_MIN_F64          : VOP3_Real_vi <0x282>;
+defm V_MAX_F64          : VOP3_Real_vi <0x283>;
+defm V_LDEXP_F64        : VOP3_Real_vi <0x284>;
+defm V_MUL_LO_U32       : VOP3_Real_vi <0x285>;
+
+// removed from VI as identical to V_MUL_LO_U32
+let isAsmParserOnly = 1 in {
+defm V_MUL_LO_I32       : VOP3_Real_vi <0x285>;
+}
+
+defm V_MUL_HI_U32       : VOP3_Real_vi <0x286>;
+defm V_MUL_HI_I32       : VOP3_Real_vi <0x287>;
+
+defm V_LSHLREV_B64      : VOP3_Real_vi <0x28f>;
+defm V_LSHRREV_B64      : VOP3_Real_vi <0x290>;
+defm V_ASHRREV_I64      : VOP3_Real_vi <0x291>;
+defm V_TRIG_PREOP_F64   : VOP3_Real_vi <0x292>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
new file mode 100644
index 000000000000..c431d9db801e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPCInstructions.td
@@ -0,0 +1,1144 @@
+//===-- VOPCInstructions.td - Vector Instruction Defintions ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Encodings
+//===----------------------------------------------------------------------===//
+
+class VOPCe <bits<8> op> : Enc32 {
+  bits<9> src0;
+  bits<8> src1;
+
+  let Inst{8-0} = src0;
+  let Inst{16-9} = src1;
+  let Inst{24-17} = op;
+  let Inst{31-25} = 0x3e;
+}
+
+class VOPC_SDWAe <bits<8> op, VOPProfile P> : VOP_SDWAe <P> {
+  bits<8> src1;
+
+  let Inst{8-0}   = 0xf9; // sdwa
+  let Inst{16-9}  = !if(P.HasSrc1, src1{7-0}, 0);
+  let Inst{24-17} = op;
+  let Inst{31-25} = 0x3e; // encoding
+
+  // VOPC disallows dst_sel and dst_unused as they have no effect on destination
+  let Inst{42-40} = SDWA.DWORD;
+  let Inst{44-43} = SDWA.UNUSED_PRESERVE;
+}
+
+//===----------------------------------------------------------------------===//
+// VOPC classes
+//===----------------------------------------------------------------------===//
+
+// VOPC instructions are a special case because for the 32-bit
+// encoding, we want to display the implicit vcc write as if it were
+// an explicit $dst.
+class VOPC_Profile<list<SchedReadWrite> sched, ValueType vt0, ValueType vt1 = vt0> :
+  VOPProfile <[i1, vt0, vt1, untyped]> {
+  let Asm32 = "vcc, $src0, $src1";
+  // The destination for 32-bit encoding is implicit.
+  let HasDst32 = 0;
+  let Outs64 = (outs VOPDstS64:$sdst);
+  list<SchedReadWrite> Schedule = sched;
+}
+
+class VOPC_Pseudo <string opName, VOPC_Profile P, list<dag> pattern=[]> :
+  InstSI<(outs), P.Ins32, "", pattern>,
+  VOP <opName>,
+  SIMCInstr<opName#"_e32", SIEncodingFamily.NONE> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = P.Asm32;
+
+  let Size = 4;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+
+  let VALU = 1;
+  let VOPC = 1;
+  let Uses = [EXEC];
+  let Defs = [VCC];
+
+  let SubtargetPredicate = isGCN;
+
+  VOPProfile Pfl = P;
+}
+
+class VOPC_Real <VOPC_Pseudo ps, int EncodingFamily> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.PseudoInstr # " " # ps.AsmOperands, []>,
+  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  let Constraints     = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let Constraints        = ps.Constraints;
+  let DisableEncoding    = ps.DisableEncoding;
+  let TSFlags            = ps.TSFlags;
+}
+
+class VOPC_SDWA_Pseudo <string OpName, VOPProfile P, list<dag> pattern=[]> :
+  VOP_SDWA_Pseudo <OpName, P, pattern> {
+  let AsmMatchConverter = "cvtSdwaVOPC";
+}
+
+// This class is used only with VOPC instructions. Use $sdst for out operand
+class VOPCInstAlias <VOP3_Pseudo ps, Instruction inst, VOPProfile p = ps.Pfl> :
+  InstAlias <ps.OpName#" "#p.Asm32, (inst)>, PredicateControl {
+
+  field bit isCompare;
+  field bit isCommutable;
+
+  let ResultInst =
+    !if (p.HasDst32,
+      !if (!eq(p.NumSrcArgs, 0),
+        // 1 dst, 0 src
+        (inst p.DstRC:$sdst),
+      !if (!eq(p.NumSrcArgs, 1),
+        // 1 dst, 1 src
+        (inst p.DstRC:$sdst, p.Src0RC32:$src0),
+      !if (!eq(p.NumSrcArgs, 2),
+        // 1 dst, 2 src
+        (inst p.DstRC:$sdst, p.Src0RC32:$src0, p.Src1RC32:$src1),
+      // else - unreachable
+        (inst)))),
+    // else
+      !if (!eq(p.NumSrcArgs, 2),
+        // 0 dst, 2 src
+        (inst p.Src0RC32:$src0, p.Src1RC32:$src1),
+      !if (!eq(p.NumSrcArgs, 1),
+        // 0 dst, 1 src
+        (inst p.Src0RC32:$src1),
+      // else
+        // 0 dst, 0 src
+        (inst))));
+
+  let AsmVariantName = AMDGPUAsmVariants.Default;
+  let SubtargetPredicate = AssemblerPredicate;
+}
+
+multiclass VOPC_Pseudos <string opName,
+                         VOPC_Profile P,
+                         PatLeaf cond = COND_NULL,
+                         string revOp = opName,
+                         bit DefExec = 0> {
+
+  def _e32 : VOPC_Pseudo <opName, P>,
+             Commutable_REV<revOp#"_e32", !eq(revOp, opName)> {
+    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+    let SchedRW = P.Schedule;
+    let isConvergent = DefExec;
+    let isCompare = 1;
+    let isCommutable = 1;
+  }
+
+  def _e64 : VOP3_Pseudo<opName, P,
+    !if(P.HasModifiers,
+      [(set i1:$sdst,
+          (setcc (P.Src0VT (VOP3Mods0 P.Src0VT:$src0, i32:$src0_modifiers,
+                                      i1:$clamp, i32:$omod)),
+                 (P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
+                 cond))],
+      [(set i1:$sdst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))])>,
+    Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
+    let Defs = !if(DefExec, [EXEC], []);
+    let SchedRW = P.Schedule;
+    let isCompare = 1;
+    let isCommutable = 1;
+  }
+
+  def _sdwa : VOPC_SDWA_Pseudo <opName, P>,
+              Commutable_REV<revOp#"_sdwa", !eq(revOp, opName)> {
+    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+    let SchedRW = P.Schedule;
+    let isConvergent = DefExec;
+    let isCompare = 1;
+    let isCommutable = 1;
+  }
+}
+
+def VOPC_I1_F16_F16 : VOPC_Profile<[Write32Bit], f16>;
+def VOPC_I1_F32_F32 : VOPC_Profile<[Write32Bit], f32>;
+def VOPC_I1_F64_F64 : VOPC_Profile<[WriteDoubleAdd], f64>;
+def VOPC_I1_I16_I16 : VOPC_Profile<[Write32Bit], i16>;
+def VOPC_I1_I32_I32 : VOPC_Profile<[Write32Bit], i32>;
+def VOPC_I1_I64_I64 : VOPC_Profile<[Write64Bit], i64>;
+
+multiclass VOPC_F16 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_F16_F16, cond, revOp, 0>;
+
+multiclass VOPC_F32 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_F32_F32, cond, revOp, 0>;
+
+multiclass VOPC_F64 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_F64_F64, cond, revOp, 0>;
+
+multiclass VOPC_I16 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_I16_I16, cond, revOp, 0>;
+
+multiclass VOPC_I32 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_I32_I32, cond, revOp, 0>;
+
+multiclass VOPC_I64 <string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_I64_I64, cond, revOp, 0>;
+
+multiclass VOPCX_F16 <string opName, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_F16_F16, COND_NULL, revOp, 1>;
+
+multiclass VOPCX_F32 <string opName, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_F32_F32, COND_NULL, revOp, 1>;
+
+multiclass VOPCX_F64 <string opName, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_F64_F64, COND_NULL, revOp, 1>;
+
+multiclass VOPCX_I16 <string opName, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_I16_I16, COND_NULL, revOp, 1>;
+
+multiclass VOPCX_I32 <string opName, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_I32_I32, COND_NULL, revOp, 1>;
+
+multiclass VOPCX_I64 <string opName, string revOp = opName> :
+  VOPC_Pseudos <opName, VOPC_I1_I64_I64, COND_NULL, revOp, 1>;
+
+
+//===----------------------------------------------------------------------===//
+// Compare instructions
+//===----------------------------------------------------------------------===//
+
+defm V_CMP_F_F32 : VOPC_F32 <"v_cmp_f_f32">;
+defm V_CMP_LT_F32 : VOPC_F32 <"v_cmp_lt_f32", COND_OLT, "v_cmp_gt_f32">;
+defm V_CMP_EQ_F32 : VOPC_F32 <"v_cmp_eq_f32", COND_OEQ>;
+defm V_CMP_LE_F32 : VOPC_F32 <"v_cmp_le_f32", COND_OLE, "v_cmp_ge_f32">;
+defm V_CMP_GT_F32 : VOPC_F32 <"v_cmp_gt_f32", COND_OGT>;
+defm V_CMP_LG_F32 : VOPC_F32 <"v_cmp_lg_f32", COND_ONE>;
+defm V_CMP_GE_F32 : VOPC_F32 <"v_cmp_ge_f32", COND_OGE>;
+defm V_CMP_O_F32 : VOPC_F32 <"v_cmp_o_f32", COND_O>;
+defm V_CMP_U_F32 : VOPC_F32 <"v_cmp_u_f32", COND_UO>;
+defm V_CMP_NGE_F32 : VOPC_F32 <"v_cmp_nge_f32",  COND_ULT, "v_cmp_nle_f32">;
+defm V_CMP_NLG_F32 : VOPC_F32 <"v_cmp_nlg_f32", COND_UEQ>;
+defm V_CMP_NGT_F32 : VOPC_F32 <"v_cmp_ngt_f32", COND_ULE, "v_cmp_nlt_f32">;
+defm V_CMP_NLE_F32 : VOPC_F32 <"v_cmp_nle_f32", COND_UGT>;
+defm V_CMP_NEQ_F32 : VOPC_F32 <"v_cmp_neq_f32", COND_UNE>;
+defm V_CMP_NLT_F32 : VOPC_F32 <"v_cmp_nlt_f32", COND_UGE>;
+defm V_CMP_TRU_F32 : VOPC_F32 <"v_cmp_tru_f32">;
+
+defm V_CMPX_F_F32 : VOPCX_F32 <"v_cmpx_f_f32">;
+defm V_CMPX_LT_F32 : VOPCX_F32 <"v_cmpx_lt_f32", "v_cmpx_gt_f32">;
+defm V_CMPX_EQ_F32 : VOPCX_F32 <"v_cmpx_eq_f32">;
+defm V_CMPX_LE_F32 : VOPCX_F32 <"v_cmpx_le_f32", "v_cmpx_ge_f32">;
+defm V_CMPX_GT_F32 : VOPCX_F32 <"v_cmpx_gt_f32">;
+defm V_CMPX_LG_F32 : VOPCX_F32 <"v_cmpx_lg_f32">;
+defm V_CMPX_GE_F32 : VOPCX_F32 <"v_cmpx_ge_f32">;
+defm V_CMPX_O_F32 : VOPCX_F32 <"v_cmpx_o_f32">;
+defm V_CMPX_U_F32 : VOPCX_F32 <"v_cmpx_u_f32">;
+defm V_CMPX_NGE_F32 : VOPCX_F32 <"v_cmpx_nge_f32", "v_cmpx_nle_f32">;
+defm V_CMPX_NLG_F32 : VOPCX_F32 <"v_cmpx_nlg_f32">;
+defm V_CMPX_NGT_F32 : VOPCX_F32 <"v_cmpx_ngt_f32", "v_cmpx_nlt_f32">;
+defm V_CMPX_NLE_F32 : VOPCX_F32 <"v_cmpx_nle_f32">;
+defm V_CMPX_NEQ_F32 : VOPCX_F32 <"v_cmpx_neq_f32">;
+defm V_CMPX_NLT_F32 : VOPCX_F32 <"v_cmpx_nlt_f32">;
+defm V_CMPX_TRU_F32 : VOPCX_F32 <"v_cmpx_tru_f32">;
+
+defm V_CMP_F_F64 : VOPC_F64 <"v_cmp_f_f64">;
+defm V_CMP_LT_F64 : VOPC_F64 <"v_cmp_lt_f64", COND_OLT, "v_cmp_gt_f64">;
+defm V_CMP_EQ_F64 : VOPC_F64 <"v_cmp_eq_f64", COND_OEQ>;
+defm V_CMP_LE_F64 : VOPC_F64 <"v_cmp_le_f64", COND_OLE, "v_cmp_ge_f64">;
+defm V_CMP_GT_F64 : VOPC_F64 <"v_cmp_gt_f64", COND_OGT>;
+defm V_CMP_LG_F64 : VOPC_F64 <"v_cmp_lg_f64", COND_ONE>;
+defm V_CMP_GE_F64 : VOPC_F64 <"v_cmp_ge_f64", COND_OGE>;
+defm V_CMP_O_F64 : VOPC_F64 <"v_cmp_o_f64", COND_O>;
+defm V_CMP_U_F64 : VOPC_F64 <"v_cmp_u_f64", COND_UO>;
+defm V_CMP_NGE_F64 : VOPC_F64 <"v_cmp_nge_f64", COND_ULT, "v_cmp_nle_f64">;
+defm V_CMP_NLG_F64 : VOPC_F64 <"v_cmp_nlg_f64", COND_UEQ>;
+defm V_CMP_NGT_F64 : VOPC_F64 <"v_cmp_ngt_f64", COND_ULE, "v_cmp_nlt_f64">;
+defm V_CMP_NLE_F64 : VOPC_F64 <"v_cmp_nle_f64", COND_UGT>;
+defm V_CMP_NEQ_F64 : VOPC_F64 <"v_cmp_neq_f64", COND_UNE>;
+defm V_CMP_NLT_F64 : VOPC_F64 <"v_cmp_nlt_f64", COND_UGE>;
+defm V_CMP_TRU_F64 : VOPC_F64 <"v_cmp_tru_f64">;
+
+defm V_CMPX_F_F64 : VOPCX_F64 <"v_cmpx_f_f64">;
+defm V_CMPX_LT_F64 : VOPCX_F64 <"v_cmpx_lt_f64", "v_cmpx_gt_f64">;
+defm V_CMPX_EQ_F64 : VOPCX_F64 <"v_cmpx_eq_f64">;
+defm V_CMPX_LE_F64 : VOPCX_F64 <"v_cmpx_le_f64", "v_cmpx_ge_f64">;
+defm V_CMPX_GT_F64 : VOPCX_F64 <"v_cmpx_gt_f64">;
+defm V_CMPX_LG_F64 : VOPCX_F64 <"v_cmpx_lg_f64">;
+defm V_CMPX_GE_F64 : VOPCX_F64 <"v_cmpx_ge_f64">;
+defm V_CMPX_O_F64 : VOPCX_F64 <"v_cmpx_o_f64">;
+defm V_CMPX_U_F64 : VOPCX_F64 <"v_cmpx_u_f64">;
+defm V_CMPX_NGE_F64 : VOPCX_F64 <"v_cmpx_nge_f64", "v_cmpx_nle_f64">;
+defm V_CMPX_NLG_F64 : VOPCX_F64 <"v_cmpx_nlg_f64">;
+defm V_CMPX_NGT_F64 : VOPCX_F64 <"v_cmpx_ngt_f64", "v_cmpx_nlt_f64">;
+defm V_CMPX_NLE_F64 : VOPCX_F64 <"v_cmpx_nle_f64">;
+defm V_CMPX_NEQ_F64 : VOPCX_F64 <"v_cmpx_neq_f64">;
+defm V_CMPX_NLT_F64 : VOPCX_F64 <"v_cmpx_nlt_f64">;
+defm V_CMPX_TRU_F64 : VOPCX_F64 <"v_cmpx_tru_f64">;
+
+let SubtargetPredicate = isSICI in {
+
+defm V_CMPS_F_F32 : VOPC_F32 <"v_cmps_f_f32">;
+defm V_CMPS_LT_F32 : VOPC_F32 <"v_cmps_lt_f32", COND_NULL, "v_cmps_gt_f32">;
+defm V_CMPS_EQ_F32 : VOPC_F32 <"v_cmps_eq_f32">;
+defm V_CMPS_LE_F32 : VOPC_F32 <"v_cmps_le_f32", COND_NULL, "v_cmps_ge_f32">;
+defm V_CMPS_GT_F32 : VOPC_F32 <"v_cmps_gt_f32">;
+defm V_CMPS_LG_F32 : VOPC_F32 <"v_cmps_lg_f32">;
+defm V_CMPS_GE_F32 : VOPC_F32 <"v_cmps_ge_f32">;
+defm V_CMPS_O_F32 : VOPC_F32 <"v_cmps_o_f32">;
+defm V_CMPS_U_F32 : VOPC_F32 <"v_cmps_u_f32">;
+defm V_CMPS_NGE_F32 : VOPC_F32 <"v_cmps_nge_f32", COND_NULL, "v_cmps_nle_f32">;
+defm V_CMPS_NLG_F32 : VOPC_F32 <"v_cmps_nlg_f32">;
+defm V_CMPS_NGT_F32 : VOPC_F32 <"v_cmps_ngt_f32", COND_NULL, "v_cmps_nlt_f32">;
+defm V_CMPS_NLE_F32 : VOPC_F32 <"v_cmps_nle_f32">;
+defm V_CMPS_NEQ_F32 : VOPC_F32 <"v_cmps_neq_f32">;
+defm V_CMPS_NLT_F32 : VOPC_F32 <"v_cmps_nlt_f32">;
+defm V_CMPS_TRU_F32 : VOPC_F32 <"v_cmps_tru_f32">;
+
+defm V_CMPSX_F_F32 : VOPCX_F32 <"v_cmpsx_f_f32">;
+defm V_CMPSX_LT_F32 : VOPCX_F32 <"v_cmpsx_lt_f32", "v_cmpsx_gt_f32">;
+defm V_CMPSX_EQ_F32 : VOPCX_F32 <"v_cmpsx_eq_f32">;
+defm V_CMPSX_LE_F32 : VOPCX_F32 <"v_cmpsx_le_f32", "v_cmpsx_ge_f32">;
+defm V_CMPSX_GT_F32 : VOPCX_F32 <"v_cmpsx_gt_f32">;
+defm V_CMPSX_LG_F32 : VOPCX_F32 <"v_cmpsx_lg_f32">;
+defm V_CMPSX_GE_F32 : VOPCX_F32 <"v_cmpsx_ge_f32">;
+defm V_CMPSX_O_F32 : VOPCX_F32 <"v_cmpsx_o_f32">;
+defm V_CMPSX_U_F32 : VOPCX_F32 <"v_cmpsx_u_f32">;
+defm V_CMPSX_NGE_F32 : VOPCX_F32 <"v_cmpsx_nge_f32", "v_cmpsx_nle_f32">;
+defm V_CMPSX_NLG_F32 : VOPCX_F32 <"v_cmpsx_nlg_f32">;
+defm V_CMPSX_NGT_F32 : VOPCX_F32 <"v_cmpsx_ngt_f32", "v_cmpsx_nlt_f32">;
+defm V_CMPSX_NLE_F32 : VOPCX_F32 <"v_cmpsx_nle_f32">;
+defm V_CMPSX_NEQ_F32 : VOPCX_F32 <"v_cmpsx_neq_f32">;
+defm V_CMPSX_NLT_F32 : VOPCX_F32 <"v_cmpsx_nlt_f32">;
+defm V_CMPSX_TRU_F32 : VOPCX_F32 <"v_cmpsx_tru_f32">;
+
+defm V_CMPS_F_F64 : VOPC_F64 <"v_cmps_f_f64">;
+defm V_CMPS_LT_F64 : VOPC_F64 <"v_cmps_lt_f64", COND_NULL, "v_cmps_gt_f64">;
+defm V_CMPS_EQ_F64 : VOPC_F64 <"v_cmps_eq_f64">;
+defm V_CMPS_LE_F64 : VOPC_F64 <"v_cmps_le_f64", COND_NULL, "v_cmps_ge_f64">;
+defm V_CMPS_GT_F64 : VOPC_F64 <"v_cmps_gt_f64">;
+defm V_CMPS_LG_F64 : VOPC_F64 <"v_cmps_lg_f64">;
+defm V_CMPS_GE_F64 : VOPC_F64 <"v_cmps_ge_f64">;
+defm V_CMPS_O_F64 : VOPC_F64 <"v_cmps_o_f64">;
+defm V_CMPS_U_F64 : VOPC_F64 <"v_cmps_u_f64">;
+defm V_CMPS_NGE_F64 : VOPC_F64 <"v_cmps_nge_f64", COND_NULL, "v_cmps_nle_f64">;
+defm V_CMPS_NLG_F64 : VOPC_F64 <"v_cmps_nlg_f64">;
+defm V_CMPS_NGT_F64 : VOPC_F64 <"v_cmps_ngt_f64", COND_NULL, "v_cmps_nlt_f64">;
+defm V_CMPS_NLE_F64 : VOPC_F64 <"v_cmps_nle_f64">;
+defm V_CMPS_NEQ_F64 : VOPC_F64 <"v_cmps_neq_f64">;
+defm V_CMPS_NLT_F64 : VOPC_F64 <"v_cmps_nlt_f64">;
+defm V_CMPS_TRU_F64 : VOPC_F64 <"v_cmps_tru_f64">;
+
+defm V_CMPSX_F_F64 : VOPCX_F64 <"v_cmpsx_f_f64">;
+defm V_CMPSX_LT_F64 : VOPCX_F64 <"v_cmpsx_lt_f64", "v_cmpsx_gt_f64">;
+defm V_CMPSX_EQ_F64 : VOPCX_F64 <"v_cmpsx_eq_f64">;
+defm V_CMPSX_LE_F64 : VOPCX_F64 <"v_cmpsx_le_f64", "v_cmpsx_ge_f64">;
+defm V_CMPSX_GT_F64 : VOPCX_F64 <"v_cmpsx_gt_f64">;
+defm V_CMPSX_LG_F64 : VOPCX_F64 <"v_cmpsx_lg_f64">;
+defm V_CMPSX_GE_F64 : VOPCX_F64 <"v_cmpsx_ge_f64">;
+defm V_CMPSX_O_F64 : VOPCX_F64 <"v_cmpsx_o_f64">;
+defm V_CMPSX_U_F64 : VOPCX_F64 <"v_cmpsx_u_f64">;
+defm V_CMPSX_NGE_F64 : VOPCX_F64 <"v_cmpsx_nge_f64", "v_cmpsx_nle_f64">;
+defm V_CMPSX_NLG_F64 : VOPCX_F64 <"v_cmpsx_nlg_f64">;
+defm V_CMPSX_NGT_F64 : VOPCX_F64 <"v_cmpsx_ngt_f64", "v_cmpsx_nlt_f64">;
+defm V_CMPSX_NLE_F64 : VOPCX_F64 <"v_cmpsx_nle_f64">;
+defm V_CMPSX_NEQ_F64 : VOPCX_F64 <"v_cmpsx_neq_f64">;
+defm V_CMPSX_NLT_F64 : VOPCX_F64 <"v_cmpsx_nlt_f64">;
+defm V_CMPSX_TRU_F64 : VOPCX_F64 <"v_cmpsx_tru_f64">;
+
+} // End SubtargetPredicate = isSICI
+
+let SubtargetPredicate = Has16BitInsts in {
+
+defm V_CMP_F_F16    : VOPC_F16 <"v_cmp_f_f16">;
+defm V_CMP_LT_F16   : VOPC_F16 <"v_cmp_lt_f16", COND_OLT, "v_cmp_gt_f16">;
+defm V_CMP_EQ_F16   : VOPC_F16 <"v_cmp_eq_f16", COND_OEQ>;
+defm V_CMP_LE_F16   : VOPC_F16 <"v_cmp_le_f16", COND_OLE, "v_cmp_ge_f16">;
+defm V_CMP_GT_F16   : VOPC_F16 <"v_cmp_gt_f16", COND_OGT>;
+defm V_CMP_LG_F16   : VOPC_F16 <"v_cmp_lg_f16", COND_ONE>;
+defm V_CMP_GE_F16   : VOPC_F16 <"v_cmp_ge_f16", COND_OGE>;
+defm V_CMP_O_F16    : VOPC_F16 <"v_cmp_o_f16", COND_O>;
+defm V_CMP_U_F16    : VOPC_F16 <"v_cmp_u_f16", COND_UO>;
+defm V_CMP_NGE_F16  : VOPC_F16 <"v_cmp_nge_f16", COND_ULT, "v_cmp_nle_f16">;
+defm V_CMP_NLG_F16  : VOPC_F16 <"v_cmp_nlg_f16", COND_UEQ>;
+defm V_CMP_NGT_F16  : VOPC_F16 <"v_cmp_ngt_f16", COND_ULE, "v_cmp_nlt_f16">;
+defm V_CMP_NLE_F16  : VOPC_F16 <"v_cmp_nle_f16", COND_UGT>;
+defm V_CMP_NEQ_F16  : VOPC_F16 <"v_cmp_neq_f16", COND_UNE>;
+defm V_CMP_NLT_F16  : VOPC_F16 <"v_cmp_nlt_f16", COND_UGE>;
+defm V_CMP_TRU_F16  : VOPC_F16 <"v_cmp_tru_f16">;
+
+defm V_CMPX_F_F16   : VOPCX_F16 <"v_cmpx_f_f16">;
+defm V_CMPX_LT_F16  : VOPCX_F16 <"v_cmpx_lt_f16", "v_cmpx_gt_f16">;
+defm V_CMPX_EQ_F16  : VOPCX_F16 <"v_cmpx_eq_f16">;
+defm V_CMPX_LE_F16  : VOPCX_F16 <"v_cmpx_le_f16", "v_cmpx_ge_f16">;
+defm V_CMPX_GT_F16  : VOPCX_F16 <"v_cmpx_gt_f16">;
+defm V_CMPX_LG_F16  : VOPCX_F16 <"v_cmpx_lg_f16">;
+defm V_CMPX_GE_F16  : VOPCX_F16 <"v_cmpx_ge_f16">;
+defm V_CMPX_O_F16   : VOPCX_F16 <"v_cmpx_o_f16">;
+defm V_CMPX_U_F16   : VOPCX_F16 <"v_cmpx_u_f16">;
+defm V_CMPX_NGE_F16 : VOPCX_F16 <"v_cmpx_nge_f16", "v_cmpx_nle_f16">;
+defm V_CMPX_NLG_F16 : VOPCX_F16 <"v_cmpx_nlg_f16">;
+defm V_CMPX_NGT_F16 : VOPCX_F16 <"v_cmpx_ngt_f16", "v_cmpx_nlt_f16">;
+defm V_CMPX_NLE_F16 : VOPCX_F16 <"v_cmpx_nle_f16">;
+defm V_CMPX_NEQ_F16 : VOPCX_F16 <"v_cmpx_neq_f16">;
+defm V_CMPX_NLT_F16 : VOPCX_F16 <"v_cmpx_nlt_f16">;
+defm V_CMPX_TRU_F16 : VOPCX_F16 <"v_cmpx_tru_f16">;
+
+defm V_CMP_F_I16 : VOPC_I16 <"v_cmp_f_i16">;
+defm V_CMP_LT_I16 : VOPC_I16 <"v_cmp_lt_i16", COND_SLT, "v_cmp_gt_i16">;
+defm V_CMP_EQ_I16 : VOPC_I16 <"v_cmp_eq_i16">;
+defm V_CMP_LE_I16 : VOPC_I16 <"v_cmp_le_i16", COND_SLE, "v_cmp_ge_i16">;
+defm V_CMP_GT_I16 : VOPC_I16 <"v_cmp_gt_i16", COND_SGT>;
+defm V_CMP_NE_I16 : VOPC_I16 <"v_cmp_ne_i16">;
+defm V_CMP_GE_I16 : VOPC_I16 <"v_cmp_ge_i16", COND_SGE>;
+defm V_CMP_T_I16 : VOPC_I16 <"v_cmp_t_i16">;
+
+defm V_CMP_F_U16 : VOPC_I16 <"v_cmp_f_u16">;
+defm V_CMP_LT_U16 : VOPC_I16 <"v_cmp_lt_u16", COND_ULT, "v_cmp_gt_u16">;
+defm V_CMP_EQ_U16 : VOPC_I16 <"v_cmp_eq_u16", COND_EQ>;
+defm V_CMP_LE_U16 : VOPC_I16 <"v_cmp_le_u16", COND_ULE, "v_cmp_ge_u16">;
+defm V_CMP_GT_U16 : VOPC_I16 <"v_cmp_gt_u16", COND_UGT>;
+defm V_CMP_NE_U16 : VOPC_I16 <"v_cmp_ne_u16", COND_NE>;
+defm V_CMP_GE_U16 : VOPC_I16 <"v_cmp_ge_u16", COND_UGE>;
+defm V_CMP_T_U16 : VOPC_I16 <"v_cmp_t_u16">;
+
+defm V_CMPX_F_I16 : VOPCX_I16 <"v_cmpx_f_i16">;
+defm V_CMPX_LT_I16 : VOPCX_I16 <"v_cmpx_lt_i16", "v_cmpx_gt_i16">;
+defm V_CMPX_EQ_I16 : VOPCX_I16 <"v_cmpx_eq_i16">;
+defm V_CMPX_LE_I16 : VOPCX_I16 <"v_cmpx_le_i16", "v_cmpx_ge_i16">;
+defm V_CMPX_GT_I16 : VOPCX_I16 <"v_cmpx_gt_i16">;
+defm V_CMPX_NE_I16 : VOPCX_I16 <"v_cmpx_ne_i16">;
+defm V_CMPX_GE_I16 : VOPCX_I16 <"v_cmpx_ge_i16">;
+defm V_CMPX_T_I16 : VOPCX_I16 <"v_cmpx_t_i16">;
+defm V_CMPX_F_U16 : VOPCX_I16 <"v_cmpx_f_u16">;
+
+defm V_CMPX_LT_U16 : VOPCX_I16 <"v_cmpx_lt_u16", "v_cmpx_gt_u16">;
+defm V_CMPX_EQ_U16 : VOPCX_I16 <"v_cmpx_eq_u16">;
+defm V_CMPX_LE_U16 : VOPCX_I16 <"v_cmpx_le_u16", "v_cmpx_ge_u16">;
+defm V_CMPX_GT_U16 : VOPCX_I16 <"v_cmpx_gt_u16">;
+defm V_CMPX_NE_U16 : VOPCX_I16 <"v_cmpx_ne_u16">;
+defm V_CMPX_GE_U16 : VOPCX_I16 <"v_cmpx_ge_u16">;
+defm V_CMPX_T_U16 : VOPCX_I16 <"v_cmpx_t_u16">;
+
+} // End SubtargetPredicate = Has16BitInsts
+
+defm V_CMP_F_I32 : VOPC_I32 <"v_cmp_f_i32">;
+defm V_CMP_LT_I32 : VOPC_I32 <"v_cmp_lt_i32", COND_SLT, "v_cmp_gt_i32">;
+defm V_CMP_EQ_I32 : VOPC_I32 <"v_cmp_eq_i32">;
+defm V_CMP_LE_I32 : VOPC_I32 <"v_cmp_le_i32", COND_SLE, "v_cmp_ge_i32">;
+defm V_CMP_GT_I32 : VOPC_I32 <"v_cmp_gt_i32", COND_SGT>;
+defm V_CMP_NE_I32 : VOPC_I32 <"v_cmp_ne_i32">;
+defm V_CMP_GE_I32 : VOPC_I32 <"v_cmp_ge_i32", COND_SGE>;
+defm V_CMP_T_I32 : VOPC_I32 <"v_cmp_t_i32">;
+
+defm V_CMPX_F_I32 : VOPCX_I32 <"v_cmpx_f_i32">;
+defm V_CMPX_LT_I32 : VOPCX_I32 <"v_cmpx_lt_i32", "v_cmpx_gt_i32">;
+defm V_CMPX_EQ_I32 : VOPCX_I32 <"v_cmpx_eq_i32">;
+defm V_CMPX_LE_I32 : VOPCX_I32 <"v_cmpx_le_i32", "v_cmpx_ge_i32">;
+defm V_CMPX_GT_I32 : VOPCX_I32 <"v_cmpx_gt_i32">;
+defm V_CMPX_NE_I32 : VOPCX_I32 <"v_cmpx_ne_i32">;
+defm V_CMPX_GE_I32 : VOPCX_I32 <"v_cmpx_ge_i32">;
+defm V_CMPX_T_I32 : VOPCX_I32 <"v_cmpx_t_i32">;
+
+defm V_CMP_F_I64 : VOPC_I64 <"v_cmp_f_i64">;
+defm V_CMP_LT_I64 : VOPC_I64 <"v_cmp_lt_i64", COND_SLT, "v_cmp_gt_i64">;
+defm V_CMP_EQ_I64 : VOPC_I64 <"v_cmp_eq_i64">;
+defm V_CMP_LE_I64 : VOPC_I64 <"v_cmp_le_i64", COND_SLE, "v_cmp_ge_i64">;
+defm V_CMP_GT_I64 : VOPC_I64 <"v_cmp_gt_i64", COND_SGT>;
+defm V_CMP_NE_I64 : VOPC_I64 <"v_cmp_ne_i64">;
+defm V_CMP_GE_I64 : VOPC_I64 <"v_cmp_ge_i64", COND_SGE>;
+defm V_CMP_T_I64 : VOPC_I64 <"v_cmp_t_i64">;
+
+defm V_CMPX_F_I64 : VOPCX_I64 <"v_cmpx_f_i64">;
+defm V_CMPX_LT_I64 : VOPCX_I64 <"v_cmpx_lt_i64", "v_cmpx_gt_i64">;
+defm V_CMPX_EQ_I64 : VOPCX_I64 <"v_cmpx_eq_i64">;
+defm V_CMPX_LE_I64 : VOPCX_I64 <"v_cmpx_le_i64", "v_cmpx_ge_i64">;
+defm V_CMPX_GT_I64 : VOPCX_I64 <"v_cmpx_gt_i64">;
+defm V_CMPX_NE_I64 : VOPCX_I64 <"v_cmpx_ne_i64">;
+defm V_CMPX_GE_I64 : VOPCX_I64 <"v_cmpx_ge_i64">;
+defm V_CMPX_T_I64 : VOPCX_I64 <"v_cmpx_t_i64">;
+
+defm V_CMP_F_U32 : VOPC_I32 <"v_cmp_f_u32">;
+defm V_CMP_LT_U32 : VOPC_I32 <"v_cmp_lt_u32", COND_ULT, "v_cmp_gt_u32">;
+defm V_CMP_EQ_U32 : VOPC_I32 <"v_cmp_eq_u32", COND_EQ>;
+defm V_CMP_LE_U32 : VOPC_I32 <"v_cmp_le_u32", COND_ULE, "v_cmp_ge_u32">;
+defm V_CMP_GT_U32 : VOPC_I32 <"v_cmp_gt_u32", COND_UGT>;
+defm V_CMP_NE_U32 : VOPC_I32 <"v_cmp_ne_u32", COND_NE>;
+defm V_CMP_GE_U32 : VOPC_I32 <"v_cmp_ge_u32", COND_UGE>;
+defm V_CMP_T_U32 : VOPC_I32 <"v_cmp_t_u32">;
+
+defm V_CMPX_F_U32 : VOPCX_I32 <"v_cmpx_f_u32">;
+defm V_CMPX_LT_U32 : VOPCX_I32 <"v_cmpx_lt_u32", "v_cmpx_gt_u32">;
+defm V_CMPX_EQ_U32 : VOPCX_I32 <"v_cmpx_eq_u32">;
+defm V_CMPX_LE_U32 : VOPCX_I32 <"v_cmpx_le_u32", "v_cmpx_le_u32">;
+defm V_CMPX_GT_U32 : VOPCX_I32 <"v_cmpx_gt_u32">;
+defm V_CMPX_NE_U32 : VOPCX_I32 <"v_cmpx_ne_u32">;
+defm V_CMPX_GE_U32 : VOPCX_I32 <"v_cmpx_ge_u32">;
+defm V_CMPX_T_U32 : VOPCX_I32 <"v_cmpx_t_u32">;
+
+defm V_CMP_F_U64 : VOPC_I64 <"v_cmp_f_u64">;
+defm V_CMP_LT_U64 : VOPC_I64 <"v_cmp_lt_u64", COND_ULT, "v_cmp_gt_u64">;
+defm V_CMP_EQ_U64 : VOPC_I64 <"v_cmp_eq_u64", COND_EQ>;
+defm V_CMP_LE_U64 : VOPC_I64 <"v_cmp_le_u64", COND_ULE, "v_cmp_ge_u64">;
+defm V_CMP_GT_U64 : VOPC_I64 <"v_cmp_gt_u64", COND_UGT>;
+defm V_CMP_NE_U64 : VOPC_I64 <"v_cmp_ne_u64", COND_NE>;
+defm V_CMP_GE_U64 : VOPC_I64 <"v_cmp_ge_u64", COND_UGE>;
+defm V_CMP_T_U64 : VOPC_I64 <"v_cmp_t_u64">;
+
+defm V_CMPX_F_U64 : VOPCX_I64 <"v_cmpx_f_u64">;
+defm V_CMPX_LT_U64 : VOPCX_I64 <"v_cmpx_lt_u64", "v_cmpx_gt_u64">;
+defm V_CMPX_EQ_U64 : VOPCX_I64 <"v_cmpx_eq_u64">;
+defm V_CMPX_LE_U64 : VOPCX_I64 <"v_cmpx_le_u64", "v_cmpx_ge_u64">;
+defm V_CMPX_GT_U64 : VOPCX_I64 <"v_cmpx_gt_u64">;
+defm V_CMPX_NE_U64 : VOPCX_I64 <"v_cmpx_ne_u64">;
+defm V_CMPX_GE_U64 : VOPCX_I64 <"v_cmpx_ge_u64">;
+defm V_CMPX_T_U64 : VOPCX_I64 <"v_cmpx_t_u64">;
+
+//===----------------------------------------------------------------------===//
+// Class instructions
+//===----------------------------------------------------------------------===//
+
+class VOPC_Class_Profile<list<SchedReadWrite> sched, ValueType vt> :
+  VOPC_Profile<sched, vt, i32> {
+  let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1);
+  let Asm64 = "$sdst, $src0_modifiers, $src1";
+  let InsSDWA = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0,
+                     Int32InputMods:$src1_modifiers, Src1RC64:$src1,
+                     clampmod:$clamp, src0_sel:$src0_sel, src1_sel:$src1_sel);
+  let AsmSDWA = " vcc, $src0_modifiers, $src1_modifiers$clamp $src0_sel $src1_sel";
+  let HasSrc1Mods = 0;
+  let HasClamp = 0;
+  let HasOMod = 0;
+}
+
+class getVOPCClassPat64 <VOPProfile P> {
+  list<dag> ret =
+    [(set i1:$sdst,
+      (AMDGPUfp_class
+        (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)),
+        P.Src1VT:$src1))];
+}
+
+// Special case for class instructions which only have modifiers on
+// the 1st source operand.
+multiclass VOPC_Class_Pseudos <string opName, VOPC_Profile p, bit DefExec> {
+  def _e32 : VOPC_Pseudo <opName, p> {
+    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+    let SchedRW = p.Schedule;
+    let isConvergent = DefExec;
+  }
+
+  def _e64 : VOP3_Pseudo<opName, p, getVOPCClassPat64<p>.ret> {
+    let Defs = !if(DefExec, [EXEC], []);
+    let SchedRW = p.Schedule;
+  }
+
+  def _sdwa : VOPC_SDWA_Pseudo <opName, p> {
+    let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+    let SchedRW = p.Schedule;
+    let isConvergent = DefExec;
+  }
+}
+
+def VOPC_I1_F16_I32 : VOPC_Class_Profile<[Write32Bit], f16>;
+def VOPC_I1_F32_I32 : VOPC_Class_Profile<[Write32Bit], f32>;
+def VOPC_I1_F64_I32 : VOPC_Class_Profile<[WriteDoubleAdd], f64>;
+
+multiclass VOPC_CLASS_F16 <string opName> :
+  VOPC_Class_Pseudos <opName, VOPC_I1_F16_I32, 0>;
+
+multiclass VOPCX_CLASS_F16 <string opName> :
+  VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 1>;
+
+multiclass VOPC_CLASS_F32 <string opName> :
+  VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 0>;
+
+multiclass VOPCX_CLASS_F32 <string opName> :
+  VOPC_Class_Pseudos <opName, VOPC_I1_F32_I32, 1>;
+
+multiclass VOPC_CLASS_F64 <string opName> :
+  VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 0>;
+
+multiclass VOPCX_CLASS_F64 <string opName> :
+  VOPC_Class_Pseudos <opName, VOPC_I1_F64_I32, 1>;
+
+defm V_CMP_CLASS_F32 : VOPC_CLASS_F32 <"v_cmp_class_f32">;
+defm V_CMPX_CLASS_F32 : VOPCX_CLASS_F32 <"v_cmpx_class_f32">;
+defm V_CMP_CLASS_F64 : VOPC_CLASS_F64 <"v_cmp_class_f64">;
+defm V_CMPX_CLASS_F64 : VOPCX_CLASS_F64 <"v_cmpx_class_f64">;
+defm V_CMP_CLASS_F16  : VOPC_CLASS_F16 <"v_cmp_class_f16">;
+defm V_CMPX_CLASS_F16 : VOPCX_CLASS_F16 <"v_cmpx_class_f16">;
+
+//===----------------------------------------------------------------------===//
+// V_ICMPIntrinsic Pattern.
+//===----------------------------------------------------------------------===//
+
+let Predicates = [isGCN] in {
+
+class ICMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
+  (AMDGPUsetcc vt:$src0, vt:$src1, cond),
+  (inst $src0, $src1)
+>;
+
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U32_e64, i32>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U32_e64, i32>;
+def : ICMP_Pattern <COND_UGT, V_CMP_GT_U32_e64, i32>;
+def : ICMP_Pattern <COND_UGE, V_CMP_GE_U32_e64, i32>;
+def : ICMP_Pattern <COND_ULT, V_CMP_LT_U32_e64, i32>;
+def : ICMP_Pattern <COND_ULE, V_CMP_LE_U32_e64, i32>;
+def : ICMP_Pattern <COND_SGT, V_CMP_GT_I32_e64, i32>;
+def : ICMP_Pattern <COND_SGE, V_CMP_GE_I32_e64, i32>;
+def : ICMP_Pattern <COND_SLT, V_CMP_LT_I32_e64, i32>;
+def : ICMP_Pattern <COND_SLE, V_CMP_LE_I32_e64, i32>;
+
+def : ICMP_Pattern <COND_EQ, V_CMP_EQ_U64_e64, i64>;
+def : ICMP_Pattern <COND_NE, V_CMP_NE_U64_e64, i64>;
+def : ICMP_Pattern <COND_UGT, V_CMP_GT_U64_e64, i64>;
+def : ICMP_Pattern <COND_UGE, V_CMP_GE_U64_e64, i64>;
+def : ICMP_Pattern <COND_ULT, V_CMP_LT_U64_e64, i64>;
+def : ICMP_Pattern <COND_ULE, V_CMP_LE_U64_e64, i64>;
+def : ICMP_Pattern <COND_SGT, V_CMP_GT_I64_e64, i64>;
+def : ICMP_Pattern <COND_SGE, V_CMP_GE_I64_e64, i64>;
+def : ICMP_Pattern <COND_SLT, V_CMP_LT_I64_e64, i64>;
+def : ICMP_Pattern <COND_SLE, V_CMP_LE_I64_e64, i64>;
+
+class FCMP_Pattern <PatLeaf cond, Instruction inst, ValueType vt> : Pat <
+  (i64 (AMDGPUsetcc (vt (VOP3Mods vt:$src0, i32:$src0_modifiers)),
+                   (vt (VOP3Mods vt:$src1, i32:$src1_modifiers)), cond)),
+  (inst $src0_modifiers, $src0, $src1_modifiers, $src1,
+        DSTCLAMP.NONE, DSTOMOD.NONE)
+>;
+
+def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F32_e64, f32>;
+def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F32_e64, f32>;
+def : FCMP_Pattern <COND_OGT, V_CMP_GT_F32_e64, f32>;
+def : FCMP_Pattern <COND_OGE, V_CMP_GE_F32_e64, f32>;
+def : FCMP_Pattern <COND_OLT, V_CMP_LT_F32_e64, f32>;
+def : FCMP_Pattern <COND_OLE, V_CMP_LE_F32_e64, f32>;
+
+def : FCMP_Pattern <COND_OEQ, V_CMP_EQ_F64_e64, f64>;
+def : FCMP_Pattern <COND_ONE, V_CMP_NEQ_F64_e64, f64>;
+def : FCMP_Pattern <COND_OGT, V_CMP_GT_F64_e64, f64>;
+def : FCMP_Pattern <COND_OGE, V_CMP_GE_F64_e64, f64>;
+def : FCMP_Pattern <COND_OLT, V_CMP_LT_F64_e64, f64>;
+def : FCMP_Pattern <COND_OLE, V_CMP_LE_F64_e64, f64>;
+
+def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F32_e64, f32>;
+def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F32_e64, f32>;
+def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F32_e64, f32>;
+def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F32_e64, f32>;
+def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F32_e64, f32>;
+def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F32_e64, f32>;
+
+def : FCMP_Pattern <COND_UEQ, V_CMP_NLG_F64_e64, f64>;
+def : FCMP_Pattern <COND_UNE, V_CMP_NEQ_F64_e64, f64>;
+def : FCMP_Pattern <COND_UGT, V_CMP_NLE_F64_e64, f64>;
+def : FCMP_Pattern <COND_UGE, V_CMP_NLT_F64_e64, f64>;
+def : FCMP_Pattern <COND_ULT, V_CMP_NGE_F64_e64, f64>;
+def : FCMP_Pattern <COND_ULE, V_CMP_NGT_F64_e64, f64>;
+
+} // End Predicates = [isGCN]
+
+//===----------------------------------------------------------------------===//
+// Target
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// SI
+//===----------------------------------------------------------------------===//
+
+multiclass VOPC_Real_si <bits<9> op> {
+  let AssemblerPredicates = [isSICI], DecoderNamespace = "SICI" in {
+    def _e32_si :
+      VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
+      VOPCe<op{7-0}>;
+
+    def _e64_si :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
+      VOP3a_si <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+      // Encoding used for VOPC instructions encoded as VOP3
+      // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
+      bits<8> sdst;
+      let Inst{7-0} = sdst;
+    }
+  }
+  def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
+                       !cast<Instruction>(NAME#"_e32_si")> {
+    let AssemblerPredicate = isSICI;
+  }
+}
+
+defm V_CMP_F_F32     : VOPC_Real_si <0x0>;
+defm V_CMP_LT_F32    : VOPC_Real_si <0x1>;
+defm V_CMP_EQ_F32    : VOPC_Real_si <0x2>;
+defm V_CMP_LE_F32    : VOPC_Real_si <0x3>;
+defm V_CMP_GT_F32    : VOPC_Real_si <0x4>;
+defm V_CMP_LG_F32    : VOPC_Real_si <0x5>;
+defm V_CMP_GE_F32    : VOPC_Real_si <0x6>;
+defm V_CMP_O_F32     : VOPC_Real_si <0x7>;
+defm V_CMP_U_F32     : VOPC_Real_si <0x8>;
+defm V_CMP_NGE_F32   : VOPC_Real_si <0x9>;
+defm V_CMP_NLG_F32   : VOPC_Real_si <0xa>;
+defm V_CMP_NGT_F32   : VOPC_Real_si <0xb>;
+defm V_CMP_NLE_F32   : VOPC_Real_si <0xc>;
+defm V_CMP_NEQ_F32   : VOPC_Real_si <0xd>;
+defm V_CMP_NLT_F32   : VOPC_Real_si <0xe>;
+defm V_CMP_TRU_F32   : VOPC_Real_si <0xf>;
+
+defm V_CMPX_F_F32    : VOPC_Real_si <0x10>;
+defm V_CMPX_LT_F32   : VOPC_Real_si <0x11>;
+defm V_CMPX_EQ_F32   : VOPC_Real_si <0x12>;
+defm V_CMPX_LE_F32   : VOPC_Real_si <0x13>;
+defm V_CMPX_GT_F32   : VOPC_Real_si <0x14>;
+defm V_CMPX_LG_F32   : VOPC_Real_si <0x15>;
+defm V_CMPX_GE_F32   : VOPC_Real_si <0x16>;
+defm V_CMPX_O_F32    : VOPC_Real_si <0x17>;
+defm V_CMPX_U_F32    : VOPC_Real_si <0x18>;
+defm V_CMPX_NGE_F32  : VOPC_Real_si <0x19>;
+defm V_CMPX_NLG_F32  : VOPC_Real_si <0x1a>;
+defm V_CMPX_NGT_F32  : VOPC_Real_si <0x1b>;
+defm V_CMPX_NLE_F32  : VOPC_Real_si <0x1c>;
+defm V_CMPX_NEQ_F32  : VOPC_Real_si <0x1d>;
+defm V_CMPX_NLT_F32  : VOPC_Real_si <0x1e>;
+defm V_CMPX_TRU_F32  : VOPC_Real_si <0x1f>;
+
+defm V_CMP_F_F64     : VOPC_Real_si <0x20>;
+defm V_CMP_LT_F64    : VOPC_Real_si <0x21>;
+defm V_CMP_EQ_F64    : VOPC_Real_si <0x22>;
+defm V_CMP_LE_F64    : VOPC_Real_si <0x23>;
+defm V_CMP_GT_F64    : VOPC_Real_si <0x24>;
+defm V_CMP_LG_F64    : VOPC_Real_si <0x25>;
+defm V_CMP_GE_F64    : VOPC_Real_si <0x26>;
+defm V_CMP_O_F64     : VOPC_Real_si <0x27>;
+defm V_CMP_U_F64     : VOPC_Real_si <0x28>;
+defm V_CMP_NGE_F64   : VOPC_Real_si <0x29>;
+defm V_CMP_NLG_F64   : VOPC_Real_si <0x2a>;
+defm V_CMP_NGT_F64   : VOPC_Real_si <0x2b>;
+defm V_CMP_NLE_F64   : VOPC_Real_si <0x2c>;
+defm V_CMP_NEQ_F64   : VOPC_Real_si <0x2d>;
+defm V_CMP_NLT_F64   : VOPC_Real_si <0x2e>;
+defm V_CMP_TRU_F64   : VOPC_Real_si <0x2f>;
+
+defm V_CMPX_F_F64    : VOPC_Real_si <0x30>;
+defm V_CMPX_LT_F64   : VOPC_Real_si <0x31>;
+defm V_CMPX_EQ_F64   : VOPC_Real_si <0x32>;
+defm V_CMPX_LE_F64   : VOPC_Real_si <0x33>;
+defm V_CMPX_GT_F64   : VOPC_Real_si <0x34>;
+defm V_CMPX_LG_F64   : VOPC_Real_si <0x35>;
+defm V_CMPX_GE_F64   : VOPC_Real_si <0x36>;
+defm V_CMPX_O_F64    : VOPC_Real_si <0x37>;
+defm V_CMPX_U_F64    : VOPC_Real_si <0x38>;
+defm V_CMPX_NGE_F64  : VOPC_Real_si <0x39>;
+defm V_CMPX_NLG_F64  : VOPC_Real_si <0x3a>;
+defm V_CMPX_NGT_F64  : VOPC_Real_si <0x3b>;
+defm V_CMPX_NLE_F64  : VOPC_Real_si <0x3c>;
+defm V_CMPX_NEQ_F64  : VOPC_Real_si <0x3d>;
+defm V_CMPX_NLT_F64  : VOPC_Real_si <0x3e>;
+defm V_CMPX_TRU_F64  : VOPC_Real_si <0x3f>;
+
+defm V_CMPS_F_F32    : VOPC_Real_si <0x40>;
+defm V_CMPS_LT_F32   : VOPC_Real_si <0x41>;
+defm V_CMPS_EQ_F32   : VOPC_Real_si <0x42>;
+defm V_CMPS_LE_F32   : VOPC_Real_si <0x43>;
+defm V_CMPS_GT_F32   : VOPC_Real_si <0x44>;
+defm V_CMPS_LG_F32   : VOPC_Real_si <0x45>;
+defm V_CMPS_GE_F32   : VOPC_Real_si <0x46>;
+defm V_CMPS_O_F32    : VOPC_Real_si <0x47>;
+defm V_CMPS_U_F32    : VOPC_Real_si <0x48>;
+defm V_CMPS_NGE_F32  : VOPC_Real_si <0x49>;
+defm V_CMPS_NLG_F32  : VOPC_Real_si <0x4a>;
+defm V_CMPS_NGT_F32  : VOPC_Real_si <0x4b>;
+defm V_CMPS_NLE_F32  : VOPC_Real_si <0x4c>;
+defm V_CMPS_NEQ_F32  : VOPC_Real_si <0x4d>;
+defm V_CMPS_NLT_F32  : VOPC_Real_si <0x4e>;
+defm V_CMPS_TRU_F32  : VOPC_Real_si <0x4f>;
+
+defm V_CMPSX_F_F32   : VOPC_Real_si <0x50>;
+defm V_CMPSX_LT_F32  : VOPC_Real_si <0x51>;
+defm V_CMPSX_EQ_F32  : VOPC_Real_si <0x52>;
+defm V_CMPSX_LE_F32  : VOPC_Real_si <0x53>;
+defm V_CMPSX_GT_F32  : VOPC_Real_si <0x54>;
+defm V_CMPSX_LG_F32  : VOPC_Real_si <0x55>;
+defm V_CMPSX_GE_F32  : VOPC_Real_si <0x56>;
+defm V_CMPSX_O_F32   : VOPC_Real_si <0x57>;
+defm V_CMPSX_U_F32   : VOPC_Real_si <0x58>;
+defm V_CMPSX_NGE_F32 : VOPC_Real_si <0x59>;
+defm V_CMPSX_NLG_F32 : VOPC_Real_si <0x5a>;
+defm V_CMPSX_NGT_F32 : VOPC_Real_si <0x5b>;
+defm V_CMPSX_NLE_F32 : VOPC_Real_si <0x5c>;
+defm V_CMPSX_NEQ_F32 : VOPC_Real_si <0x5d>;
+defm V_CMPSX_NLT_F32 : VOPC_Real_si <0x5e>;
+defm V_CMPSX_TRU_F32 : VOPC_Real_si <0x5f>;
+
+defm V_CMPS_F_F64    : VOPC_Real_si <0x60>;
+defm V_CMPS_LT_F64   : VOPC_Real_si <0x61>;
+defm V_CMPS_EQ_F64   : VOPC_Real_si <0x62>;
+defm V_CMPS_LE_F64   : VOPC_Real_si <0x63>;
+defm V_CMPS_GT_F64   : VOPC_Real_si <0x64>;
+defm V_CMPS_LG_F64   : VOPC_Real_si <0x65>;
+defm V_CMPS_GE_F64   : VOPC_Real_si <0x66>;
+defm V_CMPS_O_F64    : VOPC_Real_si <0x67>;
+defm V_CMPS_U_F64    : VOPC_Real_si <0x68>;
+defm V_CMPS_NGE_F64  : VOPC_Real_si <0x69>;
+defm V_CMPS_NLG_F64  : VOPC_Real_si <0x6a>;
+defm V_CMPS_NGT_F64  : VOPC_Real_si <0x6b>;
+defm V_CMPS_NLE_F64  : VOPC_Real_si <0x6c>;
+defm V_CMPS_NEQ_F64  : VOPC_Real_si <0x6d>;
+defm V_CMPS_NLT_F64  : VOPC_Real_si <0x6e>;
+defm V_CMPS_TRU_F64  : VOPC_Real_si <0x6f>;
+
+defm V_CMPSX_F_F64   : VOPC_Real_si <0x70>;
+defm V_CMPSX_LT_F64  : VOPC_Real_si <0x71>;
+defm V_CMPSX_EQ_F64  : VOPC_Real_si <0x72>;
+defm V_CMPSX_LE_F64  : VOPC_Real_si <0x73>;
+defm V_CMPSX_GT_F64  : VOPC_Real_si <0x74>;
+defm V_CMPSX_LG_F64  : VOPC_Real_si <0x75>;
+defm V_CMPSX_GE_F64  : VOPC_Real_si <0x76>;
+defm V_CMPSX_O_F64   : VOPC_Real_si <0x77>;
+defm V_CMPSX_U_F64   : VOPC_Real_si <0x78>;
+defm V_CMPSX_NGE_F64 : VOPC_Real_si <0x79>;
+defm V_CMPSX_NLG_F64 : VOPC_Real_si <0x7a>;
+defm V_CMPSX_NGT_F64 : VOPC_Real_si <0x7b>;
+defm V_CMPSX_NLE_F64 : VOPC_Real_si <0x7c>;
+defm V_CMPSX_NEQ_F64 : VOPC_Real_si <0x7d>;
+defm V_CMPSX_NLT_F64 : VOPC_Real_si <0x7e>;
+defm V_CMPSX_TRU_F64 : VOPC_Real_si <0x7f>;
+
+defm V_CMP_F_I32     : VOPC_Real_si <0x80>;
+defm V_CMP_LT_I32    : VOPC_Real_si <0x81>;
+defm V_CMP_EQ_I32    : VOPC_Real_si <0x82>;
+defm V_CMP_LE_I32    : VOPC_Real_si <0x83>;
+defm V_CMP_GT_I32    : VOPC_Real_si <0x84>;
+defm V_CMP_NE_I32    : VOPC_Real_si <0x85>;
+defm V_CMP_GE_I32    : VOPC_Real_si <0x86>;
+defm V_CMP_T_I32     : VOPC_Real_si <0x87>;
+
+defm V_CMPX_F_I32    : VOPC_Real_si <0x90>;
+defm V_CMPX_LT_I32   : VOPC_Real_si <0x91>;
+defm V_CMPX_EQ_I32   : VOPC_Real_si <0x92>;
+defm V_CMPX_LE_I32   : VOPC_Real_si <0x93>;
+defm V_CMPX_GT_I32   : VOPC_Real_si <0x94>;
+defm V_CMPX_NE_I32   : VOPC_Real_si <0x95>;
+defm V_CMPX_GE_I32   : VOPC_Real_si <0x96>;
+defm V_CMPX_T_I32    : VOPC_Real_si <0x97>;
+
+defm V_CMP_F_I64     : VOPC_Real_si <0xa0>;
+defm V_CMP_LT_I64    : VOPC_Real_si <0xa1>;
+defm V_CMP_EQ_I64    : VOPC_Real_si <0xa2>;
+defm V_CMP_LE_I64    : VOPC_Real_si <0xa3>;
+defm V_CMP_GT_I64    : VOPC_Real_si <0xa4>;
+defm V_CMP_NE_I64    : VOPC_Real_si <0xa5>;
+defm V_CMP_GE_I64    : VOPC_Real_si <0xa6>;
+defm V_CMP_T_I64     : VOPC_Real_si <0xa7>;
+
+defm V_CMPX_F_I64    : VOPC_Real_si <0xb0>;
+defm V_CMPX_LT_I64   : VOPC_Real_si <0xb1>;
+defm V_CMPX_EQ_I64   : VOPC_Real_si <0xb2>;
+defm V_CMPX_LE_I64   : VOPC_Real_si <0xb3>;
+defm V_CMPX_GT_I64   : VOPC_Real_si <0xb4>;
+defm V_CMPX_NE_I64   : VOPC_Real_si <0xb5>;
+defm V_CMPX_GE_I64   : VOPC_Real_si <0xb6>;
+defm V_CMPX_T_I64    : VOPC_Real_si <0xb7>;
+
+defm V_CMP_F_U32     : VOPC_Real_si <0xc0>;
+defm V_CMP_LT_U32    : VOPC_Real_si <0xc1>;
+defm V_CMP_EQ_U32    : VOPC_Real_si <0xc2>;
+defm V_CMP_LE_U32    : VOPC_Real_si <0xc3>;
+defm V_CMP_GT_U32    : VOPC_Real_si <0xc4>;
+defm V_CMP_NE_U32    : VOPC_Real_si <0xc5>;
+defm V_CMP_GE_U32    : VOPC_Real_si <0xc6>;
+defm V_CMP_T_U32     : VOPC_Real_si <0xc7>;
+
+defm V_CMPX_F_U32    : VOPC_Real_si <0xd0>;
+defm V_CMPX_LT_U32   : VOPC_Real_si <0xd1>;
+defm V_CMPX_EQ_U32   : VOPC_Real_si <0xd2>;
+defm V_CMPX_LE_U32   : VOPC_Real_si <0xd3>;
+defm V_CMPX_GT_U32   : VOPC_Real_si <0xd4>;
+defm V_CMPX_NE_U32   : VOPC_Real_si <0xd5>;
+defm V_CMPX_GE_U32   : VOPC_Real_si <0xd6>;
+defm V_CMPX_T_U32    : VOPC_Real_si <0xd7>;
+
+defm V_CMP_F_U64     : VOPC_Real_si <0xe0>;
+defm V_CMP_LT_U64    : VOPC_Real_si <0xe1>;
+defm V_CMP_EQ_U64    : VOPC_Real_si <0xe2>;
+defm V_CMP_LE_U64    : VOPC_Real_si <0xe3>;
+defm V_CMP_GT_U64    : VOPC_Real_si <0xe4>;
+defm V_CMP_NE_U64    : VOPC_Real_si <0xe5>;
+defm V_CMP_GE_U64    : VOPC_Real_si <0xe6>;
+defm V_CMP_T_U64     : VOPC_Real_si <0xe7>;
+
+defm V_CMPX_F_U64    : VOPC_Real_si <0xf0>;
+defm V_CMPX_LT_U64   : VOPC_Real_si <0xf1>;
+defm V_CMPX_EQ_U64   : VOPC_Real_si <0xf2>;
+defm V_CMPX_LE_U64   : VOPC_Real_si <0xf3>;
+defm V_CMPX_GT_U64   : VOPC_Real_si <0xf4>;
+defm V_CMPX_NE_U64   : VOPC_Real_si <0xf5>;
+defm V_CMPX_GE_U64   : VOPC_Real_si <0xf6>;
+defm V_CMPX_T_U64    : VOPC_Real_si <0xf7>;
+
+defm V_CMP_CLASS_F32  : VOPC_Real_si <0x88>;
+defm V_CMPX_CLASS_F32 : VOPC_Real_si <0x98>;
+defm V_CMP_CLASS_F64  : VOPC_Real_si <0xa8>;
+defm V_CMPX_CLASS_F64 : VOPC_Real_si <0xb8>;
+
+//===----------------------------------------------------------------------===//
+// VI
+//===----------------------------------------------------------------------===//
+
+multiclass VOPC_Real_vi <bits<10> op> {
+  let AssemblerPredicates = [isVI], DecoderNamespace = "VI" in {
+    def _e32_vi :
+      VOPC_Real<!cast<VOPC_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
+      VOPCe<op{7-0}>;
+
+    def _e64_vi :
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+      VOP3a_vi <op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl> {
+      // Encoding used for VOPC instructions encoded as VOP3
+      // Differs from VOP3e by destination name (sdst) as VOPC doesn't have vector dst
+      bits<8> sdst;
+      let Inst{7-0} = sdst;
+    }
+  }
+
+  def _sdwa_vi :
+    VOP_SDWA_Real <!cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa")>,
+    VOPC_SDWAe <op{7-0}, !cast<VOPC_SDWA_Pseudo>(NAME#"_sdwa").Pfl>;
+
+  def : VOPCInstAlias <!cast<VOP3_Pseudo>(NAME#"_e64"),
+                       !cast<Instruction>(NAME#"_e32_vi")> {
+    let AssemblerPredicate = isVI;
+  }
+}
+
+defm V_CMP_CLASS_F32  : VOPC_Real_vi <0x10>;
+defm V_CMPX_CLASS_F32 : VOPC_Real_vi <0x11>;
+defm V_CMP_CLASS_F64  : VOPC_Real_vi <0x12>;
+defm V_CMPX_CLASS_F64 : VOPC_Real_vi <0x13>;
+defm V_CMP_CLASS_F16  : VOPC_Real_vi <0x14>;
+defm V_CMPX_CLASS_F16 : VOPC_Real_vi <0x15>;
+
+defm V_CMP_F_F16      : VOPC_Real_vi <0x20>;
+defm V_CMP_LT_F16     : VOPC_Real_vi <0x21>;
+defm V_CMP_EQ_F16     : VOPC_Real_vi <0x22>;
+defm V_CMP_LE_F16     : VOPC_Real_vi <0x23>;
+defm V_CMP_GT_F16     : VOPC_Real_vi <0x24>;
+defm V_CMP_LG_F16     : VOPC_Real_vi <0x25>;
+defm V_CMP_GE_F16     : VOPC_Real_vi <0x26>;
+defm V_CMP_O_F16      : VOPC_Real_vi <0x27>;
+defm V_CMP_U_F16      : VOPC_Real_vi <0x28>;
+defm V_CMP_NGE_F16    : VOPC_Real_vi <0x29>;
+defm V_CMP_NLG_F16    : VOPC_Real_vi <0x2a>;
+defm V_CMP_NGT_F16    : VOPC_Real_vi <0x2b>;
+defm V_CMP_NLE_F16    : VOPC_Real_vi <0x2c>;
+defm V_CMP_NEQ_F16    : VOPC_Real_vi <0x2d>;
+defm V_CMP_NLT_F16    : VOPC_Real_vi <0x2e>;
+defm V_CMP_TRU_F16    : VOPC_Real_vi <0x2f>;
+
+defm V_CMPX_F_F16     : VOPC_Real_vi <0x30>;
+defm V_CMPX_LT_F16    : VOPC_Real_vi <0x31>;
+defm V_CMPX_EQ_F16    : VOPC_Real_vi <0x32>;
+defm V_CMPX_LE_F16    : VOPC_Real_vi <0x33>;
+defm V_CMPX_GT_F16    : VOPC_Real_vi <0x34>;
+defm V_CMPX_LG_F16    : VOPC_Real_vi <0x35>;
+defm V_CMPX_GE_F16    : VOPC_Real_vi <0x36>;
+defm V_CMPX_O_F16     : VOPC_Real_vi <0x37>;
+defm V_CMPX_U_F16     : VOPC_Real_vi <0x38>;
+defm V_CMPX_NGE_F16   : VOPC_Real_vi <0x39>;
+defm V_CMPX_NLG_F16   : VOPC_Real_vi <0x3a>;
+defm V_CMPX_NGT_F16   : VOPC_Real_vi <0x3b>;
+defm V_CMPX_NLE_F16   : VOPC_Real_vi <0x3c>;
+defm V_CMPX_NEQ_F16   : VOPC_Real_vi <0x3d>;
+defm V_CMPX_NLT_F16   : VOPC_Real_vi <0x3e>;
+defm V_CMPX_TRU_F16   : VOPC_Real_vi <0x3f>;
+
+defm V_CMP_F_F32      : VOPC_Real_vi <0x40>;
+defm V_CMP_LT_F32     : VOPC_Real_vi <0x41>;
+defm V_CMP_EQ_F32     : VOPC_Real_vi <0x42>;
+defm V_CMP_LE_F32     : VOPC_Real_vi <0x43>;
+defm V_CMP_GT_F32     : VOPC_Real_vi <0x44>;
+defm V_CMP_LG_F32     : VOPC_Real_vi <0x45>;
+defm V_CMP_GE_F32     : VOPC_Real_vi <0x46>;
+defm V_CMP_O_F32      : VOPC_Real_vi <0x47>;
+defm V_CMP_U_F32      : VOPC_Real_vi <0x48>;
+defm V_CMP_NGE_F32    : VOPC_Real_vi <0x49>;
+defm V_CMP_NLG_F32    : VOPC_Real_vi <0x4a>;
+defm V_CMP_NGT_F32    : VOPC_Real_vi <0x4b>;
+defm V_CMP_NLE_F32    : VOPC_Real_vi <0x4c>;
+defm V_CMP_NEQ_F32    : VOPC_Real_vi <0x4d>;
+defm V_CMP_NLT_F32    : VOPC_Real_vi <0x4e>;
+defm V_CMP_TRU_F32    : VOPC_Real_vi <0x4f>;
+
+defm V_CMPX_F_F32     : VOPC_Real_vi <0x50>;
+defm V_CMPX_LT_F32    : VOPC_Real_vi <0x51>;
+defm V_CMPX_EQ_F32    : VOPC_Real_vi <0x52>;
+defm V_CMPX_LE_F32    : VOPC_Real_vi <0x53>;
+defm V_CMPX_GT_F32    : VOPC_Real_vi <0x54>;
+defm V_CMPX_LG_F32    : VOPC_Real_vi <0x55>;
+defm V_CMPX_GE_F32    : VOPC_Real_vi <0x56>;
+defm V_CMPX_O_F32     : VOPC_Real_vi <0x57>;
+defm V_CMPX_U_F32     : VOPC_Real_vi <0x58>;
+defm V_CMPX_NGE_F32   : VOPC_Real_vi <0x59>;
+defm V_CMPX_NLG_F32   : VOPC_Real_vi <0x5a>;
+defm V_CMPX_NGT_F32   : VOPC_Real_vi <0x5b>;
+defm V_CMPX_NLE_F32   : VOPC_Real_vi <0x5c>;
+defm V_CMPX_NEQ_F32   : VOPC_Real_vi <0x5d>;
+defm V_CMPX_NLT_F32   : VOPC_Real_vi <0x5e>;
+defm V_CMPX_TRU_F32   : VOPC_Real_vi <0x5f>;
+
+defm V_CMP_F_F64      : VOPC_Real_vi <0x60>;
+defm V_CMP_LT_F64     : VOPC_Real_vi <0x61>;
+defm V_CMP_EQ_F64     : VOPC_Real_vi <0x62>;
+defm V_CMP_LE_F64     : VOPC_Real_vi <0x63>;
+defm V_CMP_GT_F64     : VOPC_Real_vi <0x64>;
+defm V_CMP_LG_F64     : VOPC_Real_vi <0x65>;
+defm V_CMP_GE_F64     : VOPC_Real_vi <0x66>;
+defm V_CMP_O_F64      : VOPC_Real_vi <0x67>;
+defm V_CMP_U_F64      : VOPC_Real_vi <0x68>;
+defm V_CMP_NGE_F64    : VOPC_Real_vi <0x69>;
+defm V_CMP_NLG_F64    : VOPC_Real_vi <0x6a>;
+defm V_CMP_NGT_F64    : VOPC_Real_vi <0x6b>;
+defm V_CMP_NLE_F64    : VOPC_Real_vi <0x6c>;
+defm V_CMP_NEQ_F64    : VOPC_Real_vi <0x6d>;
+defm V_CMP_NLT_F64    : VOPC_Real_vi <0x6e>;
+defm V_CMP_TRU_F64    : VOPC_Real_vi <0x6f>;
+
+defm V_CMPX_F_F64     : VOPC_Real_vi <0x70>;
+defm V_CMPX_LT_F64    : VOPC_Real_vi <0x71>;
+defm V_CMPX_EQ_F64    : VOPC_Real_vi <0x72>;
+defm V_CMPX_LE_F64    : VOPC_Real_vi <0x73>;
+defm V_CMPX_GT_F64    : VOPC_Real_vi <0x74>;
+defm V_CMPX_LG_F64    : VOPC_Real_vi <0x75>;
+defm V_CMPX_GE_F64    : VOPC_Real_vi <0x76>;
+defm V_CMPX_O_F64     : VOPC_Real_vi <0x77>;
+defm V_CMPX_U_F64     : VOPC_Real_vi <0x78>;
+defm V_CMPX_NGE_F64   : VOPC_Real_vi <0x79>;
+defm V_CMPX_NLG_F64   : VOPC_Real_vi <0x7a>;
+defm V_CMPX_NGT_F64   : VOPC_Real_vi <0x7b>;
+defm V_CMPX_NLE_F64   : VOPC_Real_vi <0x7c>;
+defm V_CMPX_NEQ_F64   : VOPC_Real_vi <0x7d>;
+defm V_CMPX_NLT_F64   : VOPC_Real_vi <0x7e>;
+defm V_CMPX_TRU_F64   : VOPC_Real_vi <0x7f>;
+
+defm V_CMP_F_I16      : VOPC_Real_vi <0xa0>;
+defm V_CMP_LT_I16     : VOPC_Real_vi <0xa1>;
+defm V_CMP_EQ_I16     : VOPC_Real_vi <0xa2>;
+defm V_CMP_LE_I16     : VOPC_Real_vi <0xa3>;
+defm V_CMP_GT_I16     : VOPC_Real_vi <0xa4>;
+defm V_CMP_NE_I16     : VOPC_Real_vi <0xa5>;
+defm V_CMP_GE_I16     : VOPC_Real_vi <0xa6>;
+defm V_CMP_T_I16      : VOPC_Real_vi <0xa7>;
+
+defm V_CMP_F_U16      : VOPC_Real_vi <0xa8>;
+defm V_CMP_LT_U16     : VOPC_Real_vi <0xa9>;
+defm V_CMP_EQ_U16     : VOPC_Real_vi <0xaa>;
+defm V_CMP_LE_U16     : VOPC_Real_vi <0xab>;
+defm V_CMP_GT_U16     : VOPC_Real_vi <0xac>;
+defm V_CMP_NE_U16     : VOPC_Real_vi <0xad>;
+defm V_CMP_GE_U16     : VOPC_Real_vi <0xae>;
+defm V_CMP_T_U16      : VOPC_Real_vi <0xaf>;
+
+defm V_CMPX_F_I16 : VOPC_Real_vi <0xb0>;
+defm V_CMPX_LT_I16 : VOPC_Real_vi <0xb1>;
+defm V_CMPX_EQ_I16 : VOPC_Real_vi <0xb2>;
+defm V_CMPX_LE_I16 : VOPC_Real_vi <0xb3>;
+defm V_CMPX_GT_I16 : VOPC_Real_vi <0xb4>;
+defm V_CMPX_NE_I16 : VOPC_Real_vi <0xb5>;
+defm V_CMPX_GE_I16 : VOPC_Real_vi <0xb6>;
+defm V_CMPX_T_I16 : VOPC_Real_vi <0xb7>;
+
+defm V_CMPX_F_U16 : VOPC_Real_vi <0xb8>;
+defm V_CMPX_LT_U16 : VOPC_Real_vi <0xb9>;
+defm V_CMPX_EQ_U16 : VOPC_Real_vi <0xba>;
+defm V_CMPX_LE_U16 : VOPC_Real_vi <0xbb>;
+defm V_CMPX_GT_U16 : VOPC_Real_vi <0xbc>;
+defm V_CMPX_NE_U16 : VOPC_Real_vi <0xbd>;
+defm V_CMPX_GE_U16 : VOPC_Real_vi <0xbe>;
+defm V_CMPX_T_U16 : VOPC_Real_vi <0xbf>;
+
+defm V_CMP_F_I32      : VOPC_Real_vi <0xc0>;
+defm V_CMP_LT_I32     : VOPC_Real_vi <0xc1>;
+defm V_CMP_EQ_I32     : VOPC_Real_vi <0xc2>;
+defm V_CMP_LE_I32     : VOPC_Real_vi <0xc3>;
+defm V_CMP_GT_I32     : VOPC_Real_vi <0xc4>;
+defm V_CMP_NE_I32     : VOPC_Real_vi <0xc5>;
+defm V_CMP_GE_I32     : VOPC_Real_vi <0xc6>;
+defm V_CMP_T_I32      : VOPC_Real_vi <0xc7>;
+
+defm V_CMPX_F_I32     : VOPC_Real_vi <0xd0>;
+defm V_CMPX_LT_I32    : VOPC_Real_vi <0xd1>;
+defm V_CMPX_EQ_I32    : VOPC_Real_vi <0xd2>;
+defm V_CMPX_LE_I32    : VOPC_Real_vi <0xd3>;
+defm V_CMPX_GT_I32    : VOPC_Real_vi <0xd4>;
+defm V_CMPX_NE_I32    : VOPC_Real_vi <0xd5>;
+defm V_CMPX_GE_I32    : VOPC_Real_vi <0xd6>;
+defm V_CMPX_T_I32     : VOPC_Real_vi <0xd7>;
+
+defm V_CMP_F_I64      : VOPC_Real_vi <0xe0>;
+defm V_CMP_LT_I64     : VOPC_Real_vi <0xe1>;
+defm V_CMP_EQ_I64     : VOPC_Real_vi <0xe2>;
+defm V_CMP_LE_I64     : VOPC_Real_vi <0xe3>;
+defm V_CMP_GT_I64     : VOPC_Real_vi <0xe4>;
+defm V_CMP_NE_I64     : VOPC_Real_vi <0xe5>;
+defm V_CMP_GE_I64     : VOPC_Real_vi <0xe6>;
+defm V_CMP_T_I64      : VOPC_Real_vi <0xe7>;
+
+defm V_CMPX_F_I64     : VOPC_Real_vi <0xf0>;
+defm V_CMPX_LT_I64    : VOPC_Real_vi <0xf1>;
+defm V_CMPX_EQ_I64    : VOPC_Real_vi <0xf2>;
+defm V_CMPX_LE_I64    : VOPC_Real_vi <0xf3>;
+defm V_CMPX_GT_I64    : VOPC_Real_vi <0xf4>;
+defm V_CMPX_NE_I64    : VOPC_Real_vi <0xf5>;
+defm V_CMPX_GE_I64    : VOPC_Real_vi <0xf6>;
+defm V_CMPX_T_I64     : VOPC_Real_vi <0xf7>;
+
+defm V_CMP_F_U32      : VOPC_Real_vi <0xc8>;
+defm V_CMP_LT_U32     : VOPC_Real_vi <0xc9>;
+defm V_CMP_EQ_U32     : VOPC_Real_vi <0xca>;
+defm V_CMP_LE_U32     : VOPC_Real_vi <0xcb>;
+defm V_CMP_GT_U32     : VOPC_Real_vi <0xcc>;
+defm V_CMP_NE_U32     : VOPC_Real_vi <0xcd>;
+defm V_CMP_GE_U32     : VOPC_Real_vi <0xce>;
+defm V_CMP_T_U32      : VOPC_Real_vi <0xcf>;
+
+defm V_CMPX_F_U32     : VOPC_Real_vi <0xd8>;
+defm V_CMPX_LT_U32    : VOPC_Real_vi <0xd9>;
+defm V_CMPX_EQ_U32    : VOPC_Real_vi <0xda>;
+defm V_CMPX_LE_U32    : VOPC_Real_vi <0xdb>;
+defm V_CMPX_GT_U32    : VOPC_Real_vi <0xdc>;
+defm V_CMPX_NE_U32    : VOPC_Real_vi <0xdd>;
+defm V_CMPX_GE_U32    : VOPC_Real_vi <0xde>;
+defm V_CMPX_T_U32     : VOPC_Real_vi <0xdf>;
+
+defm V_CMP_F_U64      : VOPC_Real_vi <0xe8>;
+defm V_CMP_LT_U64     : VOPC_Real_vi <0xe9>;
+defm V_CMP_EQ_U64     : VOPC_Real_vi <0xea>;
+defm V_CMP_LE_U64     : VOPC_Real_vi <0xeb>;
+defm V_CMP_GT_U64     : VOPC_Real_vi <0xec>;
+defm V_CMP_NE_U64     : VOPC_Real_vi <0xed>;
+defm V_CMP_GE_U64     : VOPC_Real_vi <0xee>;
+defm V_CMP_T_U64      : VOPC_Real_vi <0xef>;
+
+defm V_CMPX_F_U64     : VOPC_Real_vi <0xf8>;
+defm V_CMPX_LT_U64    : VOPC_Real_vi <0xf9>;
+defm V_CMPX_EQ_U64    : VOPC_Real_vi <0xfa>;
+defm V_CMPX_LE_U64    : VOPC_Real_vi <0xfb>;
+defm V_CMPX_GT_U64    : VOPC_Real_vi <0xfc>;
+defm V_CMPX_NE_U64    : VOPC_Real_vi <0xfd>;
+defm V_CMPX_GE_U64    : VOPC_Real_vi <0xfe>;
+defm V_CMPX_T_U64     : VOPC_Real_vi <0xff>;
diff --git a/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
new file mode 100644
index 000000000000..5f72f97d9e28
--- /dev/null
+++ b/contrib/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -0,0 +1,350 @@
+//===-- VOPInstructions.td - Vector Instruction Defintions ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// dummies for outer let
+class LetDummies {
+  bit isCommutable;
+  bit isConvertibleToThreeAddress;
+  bit isMoveImm;
+  bit isReMaterializable;
+  bit isAsCheapAsAMove;
+  bit VOPAsmPrefer32Bit;
+  Predicate SubtargetPredicate;
+  string Constraints;
+  string DisableEncoding;
+  list<SchedReadWrite> SchedRW;
+  list<Register> Uses;
+  list<Register> Defs;
+}
+
+class VOP <string opName> {
+  string OpName = opName;
+}
+
+class VOPAnyCommon <dag outs, dag ins, string asm, list<dag> pattern> :
+    InstSI <outs, ins, asm, pattern> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+  let VALU = 1;
+  let Uses = [EXEC];
+}
+
+class VOP3Common <dag outs, dag ins, string asm = "",
+                  list<dag> pattern = [], bit HasMods = 0,
+                  bit VOP3Only = 0> :
+  VOPAnyCommon <outs, ins, asm, pattern> {
+
+  // Using complex patterns gives VOP3 patterns a very high complexity rating,
+  // but standalone patterns are almost always preferred, so we need to adjust the
+  // priority lower.  The goal is to use a high number to reduce complexity to
+  // zero (or less than zero).
+  let AddedComplexity = -1000;
+
+  let VOP3 = 1;
+
+  let AsmMatchConverter =
+    !if(!eq(VOP3Only,1),
+        "cvtVOP3",
+        !if(!eq(HasMods,1), "cvtVOP3_2_mod", ""));
+
+  let AsmVariantName = AMDGPUAsmVariants.VOP3;
+
+  let isCodeGenOnly = 0;
+
+  int Size = 8;
+
+  // Because SGPRs may be allowed if there are multiple operands, we
+  // need a post-isel hook to insert copies in order to avoid
+  // violating constant bus requirements.
+  let hasPostISelHook = 1;
+}
+
+class VOP3_Pseudo <string opName, VOPProfile P, list<dag> pattern=[], bit VOP3Only = 0> :
+  InstSI <P.Outs64, P.Ins64, "", pattern>,
+  VOP <opName>,
+  SIMCInstr<opName#"_e64", SIEncodingFamily.NONE>,
+  MnemonicAlias<opName#"_e64", opName> {
+
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = P.Asm64;
+
+  let Size = 8;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let SubtargetPredicate = isGCN;
+
+  // Because SGPRs may be allowed if there are multiple operands, we
+  // need a post-isel hook to insert copies in order to avoid
+  // violating constant bus requirements.
+  let hasPostISelHook = 1;
+
+  // Using complex patterns gives VOP3 patterns a very high complexity rating,
+  // but standalone patterns are almost always preferred, so we need to adjust the
+  // priority lower.  The goal is to use a high number to reduce complexity to
+  // zero (or less than zero).
+  let AddedComplexity = -1000;
+
+  let VOP3 = 1;
+  let VALU = 1;
+  let Uses = [EXEC];
+
+  let AsmVariantName = AMDGPUAsmVariants.VOP3;
+  let AsmMatchConverter =
+    !if(!eq(VOP3Only,1),
+        "cvtVOP3",
+        !if(!eq(P.HasModifiers, 1), "cvtVOP3_2_mod", ""));
+
+  VOPProfile Pfl = P;
+}
+
+class VOP3_Real <VOP3_Pseudo ps, int EncodingFamily> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  SIMCInstr <ps.PseudoInstr, EncodingFamily> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  let Constraints     = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  // copy relevant pseudo op flags
+  let SubtargetPredicate = ps.SubtargetPredicate;
+  let AsmMatchConverter  = ps.AsmMatchConverter;
+  let AsmVariantName     = ps.AsmVariantName;
+  let Constraints        = ps.Constraints;
+  let DisableEncoding    = ps.DisableEncoding;
+  let TSFlags            = ps.TSFlags;
+}
+
+class VOP3a<VOPProfile P> : Enc64 {
+  bits<2> src0_modifiers;
+  bits<9> src0;
+  bits<2> src1_modifiers;
+  bits<9> src1;
+  bits<2> src2_modifiers;
+  bits<9> src2;
+  bits<1> clamp;
+  bits<2> omod;
+
+  let Inst{8}     = !if(P.HasSrc0Mods, src0_modifiers{1}, 0);
+  let Inst{9}     = !if(P.HasSrc1Mods, src1_modifiers{1}, 0);
+  let Inst{10}    = !if(P.HasSrc2Mods, src2_modifiers{1}, 0);
+
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = !if(P.HasSrc0, src0, 0);
+  let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+  let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+  let Inst{60-59} = !if(P.HasOMod, omod, 0);
+  let Inst{61}    = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
+  let Inst{62}    = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
+  let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
+}
+
+class VOP3a_si <bits<9> op, VOPProfile P> : VOP3a<P> {
+  let Inst{25-17} = op;
+  let Inst{11}    = !if(P.HasClamp, clamp{0}, 0);
+}
+
+class VOP3a_vi <bits<10> op, VOPProfile P> : VOP3a<P> {
+  let Inst{25-16} = op;
+  let Inst{15}    = !if(P.HasClamp, clamp{0}, 0);
+}
+
+class VOP3e_si <bits<9> op, VOPProfile P> : VOP3a_si <op, P> {
+  bits<8> vdst;
+  let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+}
+
+class VOP3e_vi <bits<10> op, VOPProfile P> : VOP3a_vi <op, P> {
+  bits<8> vdst;
+  let Inst{7-0} = !if(P.EmitDst, vdst{7-0}, 0);
+}
+
+class VOP3be <VOPProfile P> : Enc64 {
+  bits<8> vdst;
+  bits<2> src0_modifiers;
+  bits<9> src0;
+  bits<2> src1_modifiers;
+  bits<9> src1;
+  bits<2> src2_modifiers;
+  bits<9> src2;
+  bits<7> sdst;
+  bits<2> omod;
+
+  let Inst{7-0}   = vdst;
+  let Inst{14-8}  = sdst;
+  let Inst{31-26} = 0x34; //encoding
+  let Inst{40-32} = !if(P.HasSrc0, src0, 0);
+  let Inst{49-41} = !if(P.HasSrc1, src1, 0);
+  let Inst{58-50} = !if(P.HasSrc2, src2, 0);
+  let Inst{60-59} = !if(P.HasOMod, omod, 0);
+  let Inst{61}    = !if(P.HasSrc0Mods, src0_modifiers{0}, 0);
+  let Inst{62}    = !if(P.HasSrc1Mods, src1_modifiers{0}, 0);
+  let Inst{63}    = !if(P.HasSrc2Mods, src2_modifiers{0}, 0);
+}
+
+class VOP3be_si <bits<9> op, VOPProfile P> : VOP3be<P> {
+  let Inst{25-17} = op;
+}
+
+class VOP3be_vi <bits<10> op, VOPProfile P> : VOP3be<P> {
+  bits<1> clamp;
+  let Inst{25-16} = op;
+  let Inst{15}    = !if(P.HasClamp, clamp{0}, 0);
+}
+
+def SDWA {
+  // sdwa_sel
+  int BYTE_0 = 0;
+  int BYTE_1 = 1;
+  int BYTE_2 = 2;
+  int BYTE_3 = 3;
+  int WORD_0 = 4;
+  int WORD_1 = 5;
+  int DWORD = 6;
+
+  // dst_unused
+  int UNUSED_PAD = 0;
+  int UNUSED_SEXT = 1;
+  int UNUSED_PRESERVE = 2;
+}
+
+class VOP_SDWAe<VOPProfile P> : Enc64 {
+  bits<8> src0;
+  bits<3> src0_sel;
+  bits<2> src0_modifiers; // float: {abs,neg}, int {sext}
+  bits<3> src1_sel;
+  bits<2> src1_modifiers;
+  bits<3> dst_sel;
+  bits<2> dst_unused;
+  bits<1> clamp;
+
+  let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+  let Inst{42-40} = !if(P.EmitDst, dst_sel{2-0}, SDWA.DWORD);
+  let Inst{44-43} = !if(P.EmitDst, dst_unused{1-0}, SDWA.UNUSED_PRESERVE);
+  let Inst{45}    = !if(P.HasSDWAClamp, clamp{0}, 0);
+  let Inst{50-48} = !if(P.HasSrc0, src0_sel{2-0}, SDWA.DWORD);
+  let Inst{53-52} = !if(P.HasSrc0FloatMods, src0_modifiers{1-0}, 0);
+  let Inst{51}    = !if(P.HasSrc0IntMods, src0_modifiers{0}, 0);
+  let Inst{58-56} = !if(P.HasSrc1, src1_sel{2-0}, SDWA.DWORD);
+  let Inst{61-60} = !if(P.HasSrc1FloatMods, src1_modifiers{1-0}, 0);
+  let Inst{59}    = !if(P.HasSrc1IntMods, src1_modifiers{0}, 0);
+}
+
+class VOP_SDWA_Pseudo <string opName, VOPProfile P, list<dag> pattern=[]> :
+  InstSI <P.OutsSDWA, P.InsSDWA, "", pattern>,
+  VOP <opName>,
+  SIMCInstr <opName#"_sdwa", SIEncodingFamily.NONE>,
+  MnemonicAlias <opName#"_sdwa", opName> {
+  
+  let isPseudo = 1;
+  let isCodeGenOnly = 1;
+  let UseNamedOperandTable = 1;
+
+  string Mnemonic = opName;
+  string AsmOperands = P.AsmSDWA;
+
+  let Size = 8;
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;  
+
+  let VALU = 1;
+  let SDWA = 1;
+  let Uses = [EXEC];
+  
+  let SubtargetPredicate = isVI;
+  let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+  let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.SDWA,
+                                     AMDGPUAsmVariants.Disable);
+  let DecoderNamespace = "SDWA";
+
+  VOPProfile Pfl = P;
+}
+
+class VOP_SDWA_Real <VOP_SDWA_Pseudo ps> :
+  InstSI <ps.OutOperandList, ps.InOperandList, ps.Mnemonic # ps.AsmOperands, []>,
+  SIMCInstr <ps.PseudoInstr, SIEncodingFamily.VI> {
+
+  let isPseudo = 0;
+  let isCodeGenOnly = 0;
+
+  let Defs = ps.Defs;
+  let Uses = ps.Uses;
+  let SchedRW = ps.SchedRW;
+  let hasSideEffects = ps.hasSideEffects;
+
+  let Constraints     = ps.Constraints;
+  let DisableEncoding = ps.DisableEncoding;
+
+  // Copy relevant pseudo op flags
+  let SubtargetPredicate   = ps.SubtargetPredicate;
+  let AssemblerPredicate   = ps.AssemblerPredicate;
+  let AsmMatchConverter    = ps.AsmMatchConverter;
+  let AsmVariantName       = ps.AsmVariantName;
+  let UseNamedOperandTable = ps.UseNamedOperandTable;
+  let DecoderNamespace     = ps.DecoderNamespace;
+  let Constraints          = ps.Constraints;
+  let DisableEncoding      = ps.DisableEncoding;
+  let TSFlags              = ps.TSFlags;
+}
+
+class VOP_DPPe<VOPProfile P> : Enc64 {
+  bits<2> src0_modifiers;
+  bits<8> src0;
+  bits<2> src1_modifiers;
+  bits<9> dpp_ctrl;
+  bits<1> bound_ctrl;
+  bits<4> bank_mask;
+  bits<4> row_mask;
+
+  let Inst{39-32} = !if(P.HasSrc0, src0{7-0}, 0);
+  let Inst{48-40} = dpp_ctrl;
+  let Inst{51}    = bound_ctrl;
+  let Inst{52}    = !if(P.HasSrc0Mods, src0_modifiers{0}, 0); // src0_neg
+  let Inst{53}    = !if(P.HasSrc0Mods, src0_modifiers{1}, 0); // src0_abs
+  let Inst{54}    = !if(P.HasSrc1Mods, src1_modifiers{0}, 0); // src1_neg
+  let Inst{55}    = !if(P.HasSrc1Mods, src1_modifiers{1}, 0); // src1_abs
+  let Inst{59-56} = bank_mask;
+  let Inst{63-60} = row_mask;
+}
+
+class VOP_DPP <string OpName, VOPProfile P> :
+  InstSI <P.OutsDPP, P.InsDPP, OpName#P.AsmDPP, []>,
+  VOP_DPPe<P> {
+
+  let mayLoad = 0;
+  let mayStore = 0;
+  let hasSideEffects = 0;
+  let UseNamedOperandTable = 1;
+
+  let VALU = 1;
+  let DPP = 1;
+  let Size = 8;
+
+  let AsmMatchConverter = !if(!eq(P.HasModifiers,1), "cvtDPP", "");
+  let SubtargetPredicate = isVI;
+  let AssemblerPredicate = !if(P.HasExt, isVI, DisableInst);
+  let AsmVariantName = !if(P.HasExt, AMDGPUAsmVariants.DPP,
+                                     AMDGPUAsmVariants.Disable);
+  let DecoderNamespace = "DPP";
+}
+
+include "VOPCInstructions.td"
+include "VOP1Instructions.td"
+include "VOP2Instructions.td"
+include "VOP3Instructions.td"
diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
index 9228cc2d7a9c..89859ba063d9 100644
--- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp
@@ -52,9 +52,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
-    const char *getPassName() const override {
-      return "ARM A15 S->D optimizer";
-    }
+    StringRef getPassName() const override { return "ARM A15 S->D optimizer"; }
 
   private:
     const ARMBaseInstrInfo *TII;
diff --git a/contrib/llvm/lib/Target/ARM/ARM.h b/contrib/llvm/lib/Target/ARM/ARM.h
index 690ff86a0c86..be3048252bbc 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.h
+++ b/contrib/llvm/lib/Target/ARM/ARM.h
@@ -16,6 +16,7 @@
 #define LLVM_LIB_TARGET_ARM_ARM_H
 
 #include "llvm/Support/CodeGen.h"
+#include "ARMBasicBlockInfo.h"
 #include <functional>
 
 namespace llvm {
@@ -46,6 +47,10 @@ FunctionPass *createThumb2SizeReductionPass(
 void LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                   ARMAsmPrinter &AP);
 
+void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
+                      BasicBlockInfo &BBI);
+std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF);
+
 void initializeARMLoadStoreOptPass(PassRegistry &);
 void initializeARMPreAllocLoadStoreOptPass(PassRegistry &);
 
diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td
index ef626b66a1e7..2a090faeee6a 100644
--- a/contrib/llvm/lib/Target/ARM/ARM.td
+++ b/contrib/llvm/lib/Target/ARM/ARM.td
@@ -99,6 +99,8 @@ def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true",
 // Not to be confused with FeatureHasRetAddrStack (return address stack)
 def FeatureRAS : SubtargetFeature<"ras", "HasRAS", "true",
                 "Enable Reliability, Availability and Serviceability extensions">;
+def FeatureFPAO : SubtargetFeature<"fpao", "HasFPAO", "true",
+                "Enable fast computation of positive address offsets">;
 
 
 // Cyclone has preferred instructions for zeroing VFP registers, which can
@@ -295,7 +297,8 @@ def HasV7Ops    : SubtargetFeature<"v7", "HasV7Ops", "true",
                                     FeatureV7Clrex]>;
 def HasV8Ops    : SubtargetFeature<"v8", "HasV8Ops", "true",
                                    "Support ARM v8 instructions",
-                                   [HasV7Ops, FeatureAcquireRelease]>;
+                                   [HasV7Ops, FeatureAcquireRelease,
+                                    FeatureT2XtPk]>;
 def HasV8_1aOps : SubtargetFeature<"v8.1a", "HasV8_1aOps", "true",
                                    "Support ARM v8.1a instructions",
                                    [HasV8Ops]>;
@@ -352,6 +355,8 @@ def ProcR5      : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5",
                                    "Cortex-R5 ARM processors", []>;
 def ProcR7      : SubtargetFeature<"r7", "ARMProcFamily", "CortexR7",
                                    "Cortex-R7 ARM processors", []>;
+def ProcR52     : SubtargetFeature<"r52", "ARMProcFamily", "CortexR52",
+                                   "Cortex-R52 ARM processors", []>;
 
 def ProcM3      : SubtargetFeature<"m3", "ARMProcFamily", "CortexM3",
                                    "Cortex-M3 ARM processors", []>;
@@ -388,7 +393,8 @@ def ARMv5tej  : Architecture<"armv5tej",  "ARMv5tej", [HasV5TEOps]>;
 def ARMv6     : Architecture<"armv6",     "ARMv6",    [HasV6Ops]>;
 
 def ARMv6t2   : Architecture<"armv6t2",   "ARMv6t2",  [HasV6T2Ops,
-                                                       FeatureDSP]>;
+                                                       FeatureDSP,
+                                                       FeatureT2XtPk]>;
 
 def ARMv6k    : Architecture<"armv6k",    "ARMv6k",   [HasV6KOps]>;
 
@@ -409,13 +415,15 @@ def ARMv7a    : Architecture<"armv7-a",   "ARMv7a",   [HasV7Ops,
                                                        FeatureNEON,
                                                        FeatureDB,
                                                        FeatureDSP,
-                                                       FeatureAClass]>;
+                                                       FeatureAClass,
+                                                       FeatureT2XtPk]>;
 
 def ARMv7r    : Architecture<"armv7-r",   "ARMv7r",   [HasV7Ops,
                                                        FeatureDB,
                                                        FeatureDSP,
                                                        FeatureHWDiv,
-                                                       FeatureRClass]>;
+                                                       FeatureRClass,
+                                                       FeatureT2XtPk]>;
 
 def ARMv7m    : Architecture<"armv7-m",   "ARMv7m",   [HasV7Ops,
                                                        FeatureThumb2,
@@ -470,6 +478,19 @@ def ARMv82a   : Architecture<"armv8.2-a", "ARMv82a",  [HasV8_2aOps,
                                                        FeatureCRC,
                                                        FeatureRAS]>;
 
+def ARMv8r    : Architecture<"armv8-r",   "ARMv8r",   [HasV8Ops,
+                                                       FeatureRClass,
+                                                       FeatureDB,
+                                                       FeatureHWDiv,
+                                                       FeatureHWDivARM,
+                                                       FeatureT2XtPk,
+                                                       FeatureDSP,
+                                                       FeatureCRC,
+                                                       FeatureMP,
+                                                       FeatureVirtualization,
+                                                       FeatureFPARMv8,
+                                                       FeatureNEON]>;
+
 def ARMv8mBaseline : Architecture<"armv8-m.base", "ARMv8mBaseline",
                                                       [HasV8MBaselineOps,
                                                        FeatureNoARM,
@@ -570,7 +591,6 @@ def : ProcessorModel<"cortex-a5",   CortexA8Model,      [ARMv7a, ProcA5,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureVMLxForwarding,
-                                                         FeatureT2XtPk,
                                                          FeatureMP,
                                                          FeatureVFP4]>;
 
@@ -581,7 +601,6 @@ def : ProcessorModel<"cortex-a7",   CortexA8Model,      [ARMv7a, ProcA7,
                                                          FeatureHasVMLxHazards,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureVMLxForwarding,
-                                                         FeatureT2XtPk,
                                                          FeatureMP,
                                                          FeatureVFP4,
                                                          FeatureHWDiv,
@@ -595,15 +614,13 @@ def : ProcessorModel<"cortex-a8",   CortexA8Model,      [ARMv7a, ProcA8,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHasVMLxHazards,
                                                          FeatureHasSlowFPVMLx,
-                                                         FeatureVMLxForwarding,
-                                                         FeatureT2XtPk]>;
+                                                         FeatureVMLxForwarding]>;
 
 def : ProcessorModel<"cortex-a9",   CortexA9Model,      [ARMv7a, ProcA9,
                                                          FeatureHasRetAddrStack,
                                                          FeatureTrustZone,
                                                          FeatureHasVMLxHazards,
                                                          FeatureVMLxForwarding,
-                                                         FeatureT2XtPk,
                                                          FeatureFP16,
                                                          FeatureAvoidPartialCPSR,
                                                          FeatureExpandMLx,
@@ -618,7 +635,6 @@ def : ProcessorModel<"cortex-a12",  CortexA9Model,      [ARMv7a, ProcA12,
                                                          FeatureHasRetAddrStack,
                                                          FeatureTrustZone,
                                                          FeatureVMLxForwarding,
-                                                         FeatureT2XtPk,
                                                          FeatureVFP4,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
@@ -632,7 +648,6 @@ def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
                                                          FeatureHasRetAddrStack,
                                                          FeatureMuxedUnits,
                                                          FeatureTrustZone,
-                                                         FeatureT2XtPk,
                                                          FeatureVFP4,
                                                          FeatureMP,
                                                          FeatureCheckVLDnAlign,
@@ -647,7 +662,6 @@ def : ProcessorModel<"cortex-a17",  CortexA9Model,      [ARMv7a, ProcA17,
                                                          FeatureTrustZone,
                                                          FeatureMP,
                                                          FeatureVMLxForwarding,
-                                                         FeatureT2XtPk,
                                                          FeatureVFP4,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
@@ -662,7 +676,6 @@ def : ProcessorModel<"krait",       CortexA9Model,      [ARMv7a, ProcKrait,
                                                          FeatureMuxedUnits,
                                                          FeatureCheckVLDnAlign,
                                                          FeatureVMLxForwarding,
-                                                         FeatureT2XtPk,
                                                          FeatureFP16,
                                                          FeatureAvoidPartialCPSR,
                                                          FeatureVFP4,
@@ -672,7 +685,6 @@ def : ProcessorModel<"krait",       CortexA9Model,      [ARMv7a, ProcKrait,
 def : ProcessorModel<"swift",       SwiftModel,         [ARMv7a, ProcSwift,
                                                          FeatureHasRetAddrStack,
                                                          FeatureNEONForFP,
-                                                         FeatureT2XtPk,
                                                          FeatureVFP4,
                                                          FeatureMP,
                                                          FeatureHWDiv,
@@ -691,8 +703,7 @@ def : ProcessorModel<"swift",       SwiftModel,         [ARMv7a, ProcSwift,
 // FIXME: R4 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r4",   CortexA8Model,      [ARMv7r, ProcR4,
                                                          FeatureHasRetAddrStack,
-                                                         FeatureAvoidPartialCPSR,
-                                                         FeatureT2XtPk]>;
+                                                         FeatureAvoidPartialCPSR]>;
 
 // FIXME: R4F has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r4f",  CortexA8Model,      [ARMv7r, ProcR4,
@@ -701,8 +712,7 @@ def : ProcessorModel<"cortex-r4f",  CortexA8Model,      [ARMv7r, ProcR4,
                                                          FeatureHasSlowFPVMLx,
                                                          FeatureVFP3,
                                                          FeatureD16,
-                                                         FeatureAvoidPartialCPSR,
-                                                         FeatureT2XtPk]>;
+                                                         FeatureAvoidPartialCPSR]>;
 
 // FIXME: R5 has currently the same ProcessorModel as A8.
 def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
@@ -712,8 +722,7 @@ def : ProcessorModel<"cortex-r5",   CortexA8Model,      [ARMv7r, ProcR5,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHWDivARM,
                                                          FeatureHasSlowFPVMLx,
-                                                         FeatureAvoidPartialCPSR,
-                                                         FeatureT2XtPk]>;
+                                                         FeatureAvoidPartialCPSR]>;
 
 // FIXME: R7 has currently the same ProcessorModel as A8 and is modelled as R5.
 def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
@@ -725,8 +734,7 @@ def : ProcessorModel<"cortex-r7",   CortexA8Model,      [ARMv7r, ProcR7,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHWDivARM,
                                                          FeatureHasSlowFPVMLx,
-                                                         FeatureAvoidPartialCPSR,
-                                                         FeatureT2XtPk]>;
+                                                         FeatureAvoidPartialCPSR]>;
 
 def : ProcessorModel<"cortex-r8",   CortexA8Model,      [ARMv7r,
                                                          FeatureHasRetAddrStack,
@@ -737,8 +745,7 @@ def : ProcessorModel<"cortex-r8",   CortexA8Model,      [ARMv7r,
                                                          FeatureSlowFPBrcc,
                                                          FeatureHWDivARM,
                                                          FeatureHasSlowFPVMLx,
-                                                         FeatureAvoidPartialCPSR,
-                                                         FeatureT2XtPk]>;
+                                                         FeatureAvoidPartialCPSR]>;
 
 def : ProcNoItin<"cortex-m3",                           [ARMv7m, ProcM3]>;
 def : ProcNoItin<"sc300",                               [ARMv7m, ProcM3]>;
@@ -755,42 +762,38 @@ def : ProcNoItin<"cortex-m7",                           [ARMv7em,
 def : ProcNoItin<"cortex-a32",                           [ARMv8a,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
 def : ProcNoItin<"cortex-a35",                          [ARMv8a, ProcA35,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
 def : ProcNoItin<"cortex-a53",                          [ARMv8a, ProcA53,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
-                                                         FeatureCRC]>;
+                                                         FeatureCRC,
+                                                         FeatureFPAO]>;
 
 def : ProcNoItin<"cortex-a57",                          [ARMv8a, ProcA57,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
-                                                         FeatureCRC]>;
+                                                         FeatureCRC,
+                                                         FeatureFPAO]>;
 
 def : ProcNoItin<"cortex-a72",                          [ARMv8a, ProcA72,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
 def : ProcNoItin<"cortex-a73",                          [ARMv8a, ProcA73,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
@@ -798,7 +801,6 @@ def : ProcNoItin<"cortex-a73",                          [ARMv8a, ProcA73,
 def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
                                                          FeatureHasRetAddrStack,
                                                          FeatureNEONForFP,
-                                                         FeatureT2XtPk,
                                                          FeatureVFP4,
                                                          FeatureMP,
                                                          FeatureHWDiv,
@@ -812,10 +814,24 @@ def : ProcessorModel<"cyclone",     SwiftModel,         [ARMv8a, ProcSwift,
 def : ProcNoItin<"exynos-m1",                           [ARMv8a, ProcExynosM1,
                                                          FeatureHWDiv,
                                                          FeatureHWDivARM,
-                                                         FeatureT2XtPk,
                                                          FeatureCrypto,
                                                          FeatureCRC]>;
 
+def : ProcNoItin<"exynos-m2",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;
+
+def : ProcNoItin<"exynos-m3",                           [ARMv8a, ProcExynosM1,
+                                                         FeatureHWDiv,
+                                                         FeatureHWDivARM,
+                                                         FeatureCrypto,
+                                                         FeatureCRC]>;
+
+def : ProcessorModel<"cortex-r52", CortexR52Model,      [ARMv8r, ProcR52,
+                                                         FeatureFPAO]>;
+
 //===----------------------------------------------------------------------===//
 // Register File Description
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
index 04863a7ecf8f..f20768ab77a5 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp
@@ -74,8 +74,9 @@ void ARMAsmPrinter::EmitFunctionEntryLabel() {
   if (AFI->isThumbFunction()) {
     OutStreamer->EmitAssemblerFlag(MCAF_Code16);
     OutStreamer->EmitThumbFunc(CurrentFnSym);
+  } else {
+    OutStreamer->EmitAssemblerFlag(MCAF_Code32);
   }
-
   OutStreamer->EmitLabel(CurrentFnSym);
 }
 
@@ -96,6 +97,13 @@ void ARMAsmPrinter::EmitXXStructor(const DataLayout &DL, const Constant *CV) {
   OutStreamer->EmitValue(E, Size);
 }
 
+void ARMAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
+  if (PromotedGlobals.count(GV))
+    // The global was promoted into a constant pool. It should not be emitted.
+    return;
+  AsmPrinter::EmitGlobalVariable(GV);
+}
+
 /// runOnMachineFunction - This uses the EmitInstruction()
 /// method to print assembly for each instruction.
 ///
@@ -108,6 +116,12 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   const Function* F = MF.getFunction();
   const TargetMachine& TM = MF.getTarget();
 
+  // Collect all globals that had their storage promoted to a constant pool.
+  // Functions are emitted before variables, so this accumulates promoted
+  // globals from all functions in PromotedGlobals.
+  for (auto *GV : AFI->getGlobalsPromotedToConstantPool())
+    PromotedGlobals.insert(GV);
+  
   // Calculate this function's optimization goal.
   unsigned OptimizationGoal;
   if (F->hasFnAttribute(Attribute::OptimizeNone))
@@ -150,6 +164,9 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   // Emit the rest of the function body.
   EmitFunctionBody();
 
+  // Emit the XRay table for this function.
+  EmitXRayTable();
+
   // If we need V4T thumb mode Register Indirect Jump pads, emit them.
   // These are created per function, rather than per TU, since it's
   // relatively easy to exceed the thumb branch range within a TU.
@@ -215,6 +232,8 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
     break;
   }
   case MachineOperand::MO_ConstantPoolIndex:
+    if (Subtarget->genExecuteOnly())
+      llvm_unreachable("execute-only should not generate constant pools");
     GetCPISymbol(MO.getIndex())->print(O, MAI);
     break;
   }
@@ -249,7 +268,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
           << "]";
         return false;
       }
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case 'c': // Don't print "#" before an immediate operand.
       if (!MI->getOperand(OpNum).isImm())
         return true;
@@ -542,11 +561,11 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) {
     raw_string_ostream OS(Flags);
 
     for (const auto &Function : M)
-      TLOF.emitLinkerFlagsForGlobal(OS, &Function, *Mang);
+      TLOF.emitLinkerFlagsForGlobal(OS, &Function);
     for (const auto &Global : M.globals())
-      TLOF.emitLinkerFlagsForGlobal(OS, &Global, *Mang);
+      TLOF.emitLinkerFlagsForGlobal(OS, &Global);
     for (const auto &Alias : M.aliases())
-      TLOF.emitLinkerFlagsForGlobal(OS, &Alias, *Mang);
+      TLOF.emitLinkerFlagsForGlobal(OS, &Alias);
 
     OS.flush();
 
@@ -588,9 +607,11 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
   if (CPU == "xscale")
     return ARMBuildAttrs::v5TEJ;
 
-  if (Subtarget->hasV8Ops())
+  if (Subtarget->hasV8Ops()) {
+    if (Subtarget->isRClass())
+      return ARMBuildAttrs::v8_R;
     return ARMBuildAttrs::v8_A;
-  else if (Subtarget->hasV8MMainlineOps())
+  } else if (Subtarget->hasV8MMainlineOps())
     return ARMBuildAttrs::v8_M_Main;
   else if (Subtarget->hasV7Ops()) {
     if (Subtarget->isMClass() && Subtarget->hasDSP())
@@ -614,6 +635,15 @@ static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU,
     return ARMBuildAttrs::v4;
 }
 
+// Returns true if all functions have the same function attribute value.
+// It also returns true when the module has no functions.
+static bool checkFunctionsAttributeConsistency(const Module &M, StringRef Attr,
+                                               StringRef Value) {
+  return !any_of(M, [&](const Function &F) {
+    return F.getFnAttribute(Attr).getValueAsString() != Value;
+  });
+}
+
 void ARMAsmPrinter::emitAttributes() {
   MCTargetStreamer &TS = *OutStreamer->getTargetStreamer();
   ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -725,31 +755,48 @@ void ARMAsmPrinter::emitAttributes() {
       ATS.emitFPU(ARM::FK_VFPV2);
   }
 
+  // RW data addressing.
   if (isPositionIndependent()) {
-    // PIC specific attributes.
     ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
                       ARMBuildAttrs::AddressRWPCRel);
+  } else if (STI.isRWPI()) {
+    // RWPI specific attributes.
+    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RW_data,
+                      ARMBuildAttrs::AddressRWSBRel);
+  }
+
+  // RO data addressing.
+  if (isPositionIndependent() || STI.isROPI()) {
     ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_RO_data,
                       ARMBuildAttrs::AddressROPCRel);
+  }
+
+  // GOT use.
+  if (isPositionIndependent()) {
     ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
                       ARMBuildAttrs::AddressGOT);
   } else {
-    // Allow direct addressing of imported data for all other relocation models.
     ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_GOT_use,
                       ARMBuildAttrs::AddressDirect);
   }
 
-  // Signal various FP modes.
-  if (!TM.Options.UnsafeFPMath) {
+  // Set FP Denormals.
+  if (checkFunctionsAttributeConsistency(*MMI->getModule(),
+                                         "denormal-fp-math",
+                                         "preserve-sign") ||
+      TM.Options.FPDenormalMode == FPDenormal::PreserveSign)
+    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+                      ARMBuildAttrs::PreserveFPSign);
+  else if (checkFunctionsAttributeConsistency(*MMI->getModule(),
+                                              "denormal-fp-math",
+                                              "positive-zero") ||
+           TM.Options.FPDenormalMode == FPDenormal::PositiveZero)
+    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
+                      ARMBuildAttrs::PositiveZero);
+  else if (!TM.Options.UnsafeFPMath)
     ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal,
                       ARMBuildAttrs::IEEEDenormals);
-    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
-
-    // If the user has permitted this code to choose the IEEE 754
-    // rounding at run-time, emit the rounding attribute.
-    if (TM.Options.HonorSignDependentRoundingFPMathOption)
-      ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
-  } else {
+  else {
     if (!STI.hasVFP2()) {
       // When the target doesn't have an FPU (by design or
       // intention), the assumptions made on the software support
@@ -775,6 +822,21 @@ void ARMAsmPrinter::emitAttributes() {
     // absence of its emission implies zero).
   }
 
+  // Set FP exceptions and rounding
+  if (checkFunctionsAttributeConsistency(*MMI->getModule(),
+                                         "no-trapping-math", "true") ||
+      TM.Options.NoTrappingFPMath)
+    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions,
+                      ARMBuildAttrs::Not_Allowed);
+  else if (!TM.Options.UnsafeFPMath) {
+    ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, ARMBuildAttrs::Allowed);
+
+    // If the user has permitted this code to choose the IEEE 754
+    // rounding at run-time, emit the rounding attribute.
+    if (TM.Options.HonorSignDependentRoundingFPMathOption)
+      ATS.emitAttribute(ARMBuildAttrs::ABI_FP_rounding, ARMBuildAttrs::Allowed);
+  }
+
   // TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath is the
   // equivalent of GCC's -ffinite-math-only flag.
   if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath)
@@ -858,14 +920,16 @@ void ARMAsmPrinter::emitAttributes() {
     }
   }
 
-  // TODO: We currently only support either reserving the register, or treating
-  // it as another callee-saved register, but not as SB or a TLS pointer; It
-  // would instead be nicer to push this from the frontend as metadata, as we do
-  // for the wchar and enum size tags
-  if (STI.isR9Reserved())
-    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9Reserved);
+  // We currently do not support using R9 as the TLS pointer.
+  if (STI.isRWPI())
+    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+                      ARMBuildAttrs::R9IsSB);
+  else if (STI.isR9Reserved())
+    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+                      ARMBuildAttrs::R9Reserved);
   else
-    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use, ARMBuildAttrs::R9IsGPR);
+    ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_R9_use,
+                      ARMBuildAttrs::R9IsGPR);
 
   if (STI.hasTrustZone() && STI.hasVirtualization())
     ATS.emitAttribute(ARMBuildAttrs::Virtualization_use,
@@ -880,7 +944,7 @@ void ARMAsmPrinter::emitAttributes() {
 
 //===----------------------------------------------------------------------===//
 
-static MCSymbol *getPICLabel(const char *Prefix, unsigned FunctionNumber,
+static MCSymbol *getPICLabel(StringRef Prefix, unsigned FunctionNumber,
                              unsigned LabelId, MCContext &Ctx) {
 
   MCSymbol *Label = Ctx.getOrCreateSymbol(Twine(Prefix)
@@ -899,6 +963,8 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) {
     return MCSymbolRefExpr::VK_TPOFF;
   case ARMCP::GOTTPOFF:
     return MCSymbolRefExpr::VK_GOTTPOFF;
+  case ARMCP::SBREL:
+    return MCSymbolRefExpr::VK_ARM_SBREL;
   case ARMCP::GOT_PREL:
     return MCSymbolRefExpr::VK_ARM_GOT_PREL;
   case ARMCP::SECREL:
@@ -954,6 +1020,26 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
 
   ARMConstantPoolValue *ACPV = static_cast<ARMConstantPoolValue*>(MCPV);
 
+  if (ACPV->isPromotedGlobal()) {
+    // This constant pool entry is actually a global whose storage has been
+    // promoted into the constant pool. This global may be referenced still
+    // by debug information, and due to the way AsmPrinter is set up, the debug
+    // info is immutable by the time we decide to promote globals to constant
+    // pools. Because of this, we need to ensure we emit a symbol for the global
+    // with private linkage (the default) so debug info can refer to it.
+    //
+    // However, if this global is promoted into several functions we must ensure
+    // we don't try and emit duplicate symbols!
+    auto *ACPC = cast<ARMConstantPoolConstant>(ACPV);
+    auto *GV = ACPC->getPromotedGlobal();
+    if (!EmittedPromotedGlobalLabels.count(GV)) {
+      MCSymbol *GVSym = getSymbol(GV);
+      OutStreamer->EmitLabel(GVSym);
+      EmittedPromotedGlobalLabels.insert(GV);
+    }
+    return EmitGlobalConstant(DL, ACPC->getPromotedGlobalInit());
+  }
+
   MCSymbol *MCSym;
   if (ACPV->isLSDA()) {
     MCSym = getCurExceptionSym();
@@ -973,7 +1059,7 @@ EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) {
     MCSym = MBB->getSymbol();
   } else {
     assert(ACPV->isExtSymbol() && "unrecognized constant pool value");
-    const char *Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
+    auto Sym = cast<ARMConstantPoolSymbol>(ACPV)->getSymbol();
     MCSym = GetExternalSymbolSymbol(Sym);
   }
 
@@ -1037,7 +1123,7 @@ void ARMAsmPrinter::EmitJumpTableAddrs(const MachineInstr *MI) {
     //    .word (LBB1 - LJTI_0_0)
     const MCExpr *Expr = MCSymbolRefExpr::create(MBB->getSymbol(), OutContext);
 
-    if (isPositionIndependent())
+    if (isPositionIndependent() || Subtarget->isROPI())
       Expr = MCBinaryExpr::createSub(Expr, MCSymbolRefExpr::create(JTISymbol,
                                                                    OutContext),
                                      OutContext);
@@ -1082,6 +1168,9 @@ void ARMAsmPrinter::EmitJumpTableTBInst(const MachineInstr *MI,
   const MachineOperand &MO1 = MI->getOperand(1);
   unsigned JTI = MO1.getIndex();
 
+  if (Subtarget->isThumb1Only())
+    EmitAlignment(2);
+  
   MCSymbol *JTISymbol = GetARMJTIPICJumpTableLabel(JTI);
   OutStreamer->EmitLabel(JTISymbol);
 
@@ -1628,6 +1717,91 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                      .addReg(0));
     return;
   }
+  case ARM::tTBB_JT:
+  case ARM::tTBH_JT: {
+
+    bool Is8Bit = MI->getOpcode() == ARM::tTBB_JT;
+    unsigned Base = MI->getOperand(0).getReg();
+    unsigned Idx = MI->getOperand(1).getReg();
+    assert(MI->getOperand(1).isKill() && "We need the index register as scratch!");
+
+    // Multiply up idx if necessary.
+    if (!Is8Bit)
+      EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
+                                       .addReg(Idx)
+                                       .addReg(ARM::CPSR)
+                                       .addReg(Idx)
+                                       .addImm(1)
+                                       // Add predicate operands.
+                                       .addImm(ARMCC::AL)
+                                       .addReg(0));
+
+    if (Base == ARM::PC) {
+      // TBB [base, idx] =
+      //    ADDS idx, idx, base
+      //    LDRB idx, [idx, #4] ; or LDRH if TBH
+      //    LSLS idx, #1
+      //    ADDS pc, pc, idx
+
+      // When using PC as the base, it's important that there is no padding
+      // between the last ADDS and the start of the jump table. The jump table
+      // is 4-byte aligned, so we ensure we're 4 byte aligned here too.
+      //
+      // FIXME: Ideally we could vary the LDRB index based on the padding
+      // between the sequence and jump table, however that relies on MCExprs
+      // for load indexes which are currently not supported.
+      OutStreamer->EmitCodeAlignment(4);
+      EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+                                       .addReg(Idx)
+                                       .addReg(Idx)
+                                       .addReg(Base)
+                                       // Add predicate operands.
+                                       .addImm(ARMCC::AL)
+                                       .addReg(0));
+
+      unsigned Opc = Is8Bit ? ARM::tLDRBi : ARM::tLDRHi;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+                                       .addReg(Idx)
+                                       .addReg(Idx)
+                                       .addImm(Is8Bit ? 4 : 2)
+                                       // Add predicate operands.
+                                       .addImm(ARMCC::AL)
+                                       .addReg(0));
+    } else {
+      // TBB [base, idx] =
+      //    LDRB idx, [base, idx] ; or LDRH if TBH
+      //    LSLS idx, #1
+      //    ADDS pc, pc, idx
+
+      unsigned Opc = Is8Bit ? ARM::tLDRBr : ARM::tLDRHr;
+      EmitToStreamer(*OutStreamer, MCInstBuilder(Opc)
+                                       .addReg(Idx)
+                                       .addReg(Base)
+                                       .addReg(Idx)
+                                       // Add predicate operands.
+                                       .addImm(ARMCC::AL)
+                                       .addReg(0));
+    }
+
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tLSLri)
+                                     .addReg(Idx)
+                                     .addReg(ARM::CPSR)
+                                     .addReg(Idx)
+                                     .addImm(1)
+                                     // Add predicate operands.
+                                     .addImm(ARMCC::AL)
+                                     .addReg(0));
+
+    OutStreamer->EmitLabel(GetCPISymbol(MI->getOperand(3).getImm()));
+    EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::tADDhirr)
+                                     .addReg(ARM::PC)
+                                     .addReg(ARM::PC)
+                                     .addReg(Idx)
+                                     // Add predicate operands.
+                                     .addImm(ARMCC::AL)
+                                     .addReg(0));
+    return;
+  }
   case ARM::tBR_JTr:
   case ARM::BR_JTr: {
     // Lower and emit the instruction itself, then the jump table following it.
@@ -1961,6 +2135,15 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
                                      .addReg(0));
     return;
   }
+  case ARM::PATCHABLE_FUNCTION_ENTER:
+    LowerPATCHABLE_FUNCTION_ENTER(*MI);
+    return;
+  case ARM::PATCHABLE_FUNCTION_EXIT:
+    LowerPATCHABLE_FUNCTION_EXIT(*MI);
+    return;
+  case ARM::PATCHABLE_TAIL_CALL:
+    LowerPATCHABLE_TAIL_CALL(*MI);
+    return;
   }
 
   MCInst TmpInst;
@@ -1975,8 +2158,8 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeARMAsmPrinter() {
-  RegisterAsmPrinter<ARMAsmPrinter> X(TheARMLETarget);
-  RegisterAsmPrinter<ARMAsmPrinter> Y(TheARMBETarget);
-  RegisterAsmPrinter<ARMAsmPrinter> A(TheThumbLETarget);
-  RegisterAsmPrinter<ARMAsmPrinter> B(TheThumbBETarget);
+  RegisterAsmPrinter<ARMAsmPrinter> X(getTheARMLETarget());
+  RegisterAsmPrinter<ARMAsmPrinter> Y(getTheARMBETarget());
+  RegisterAsmPrinter<ARMAsmPrinter> A(getTheThumbLETarget());
+  RegisterAsmPrinter<ARMAsmPrinter> B(getTheThumbBETarget());
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
index 97f5ca0ecbc2..ce0b04d56d9e 100644
--- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
+++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h
@@ -56,12 +56,22 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter {
   /// -1 if uninitialized, 0 if conflicting goals
   int OptimizationGoals;
 
+  /// List of globals that have had their storage promoted to a constant
+  /// pool. This lives between calls to runOnMachineFunction and collects
+  /// data from every MachineFunction. It is used during doFinalization
+  /// when all non-function globals are emitted.
+  SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
+  /// Set of globals in PromotedGlobals that we've emitted labels for.
+  /// We need to emit labels even for promoted globals so that DWARF
+  /// debug info can link properly.
+  SmallPtrSet<const GlobalVariable*,2> EmittedPromotedGlobalLabels;
+
 public:
   explicit ARMAsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer);
 
-  const char *getPassName() const override {
-    return "ARM Assembly / Object Emitter";
+  StringRef getPassName() const override {
+    return "ARM Assembly Printer";
   }
 
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
@@ -90,11 +100,25 @@ public:
   void EmitStartOfAsmFile(Module &M) override;
   void EmitEndOfAsmFile(Module &M) override;
   void EmitXXStructor(const DataLayout &DL, const Constant *CV) override;
-
+  void EmitGlobalVariable(const GlobalVariable *GV) override;
+  
   // lowerOperand - Convert a MachineOperand into the equivalent MCOperand.
   bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp);
 
+  //===------------------------------------------------------------------===//
+  // XRay implementation
+  //===------------------------------------------------------------------===//
+public:
+  // XRay-specific lowering for ARM.
+  void LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI);
+  void LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI);
+  void LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI);
+  // Helper function that emits the XRay sleds we've collected for a particular
+  // function.
+  void EmitXRayTable();
+
 private:
+  void EmitSled(const MachineInstr &MI, SledKind Kind);
 
   // Helpers for EmitStartOfAsmFile() and EmitEndOfAsmFile()
   void emitAttributes();
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 693f16499717..70a3246e34f1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -382,7 +382,10 @@ bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 }
 
 
-unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                        int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
   if (I == MBB.end())
     return 0;
@@ -406,11 +409,13 @@ unsigned ARMBaseInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL) const {
+                                        const DebugLoc &DL,
+                                        int *BytesAdded) const {
+  assert(!BytesAdded && "code size not handled");
   ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();
   int BOpc   = !AFI->isThumbFunction()
     ? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);
@@ -419,7 +424,7 @@ unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();
 
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "ARM branch conditions have two components!");
 
@@ -448,7 +453,7 @@ unsigned ARMBaseInstrInfo::InsertBranch(MachineBasicBlock &MBB,
 }
 
 bool ARMBaseInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();
   Cond[0].setImm(ARMCC::getOppositeCondition(CC));
   return false;
@@ -575,6 +580,9 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr &MI) const {
   if (!MI.isPredicable())
     return false;
 
+  if (MI.isBundle())
+    return false;
+
   if (!isEligibleForITBlock(&MI))
     return false;
 
@@ -610,7 +618,7 @@ template <> bool IsCPSRDead<MachineInstr>(MachineInstr *MI) {
 
 /// GetInstSize - Return the size of the specified MachineInstr.
 ///
-unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MachineBasicBlock &MBB = *MI.getParent();
   const MachineFunction *MF = MBB.getParent();
   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
@@ -669,7 +677,7 @@ unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {
   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
   while (++I != E && I->isInsideBundle()) {
     assert(!I->isBundle() && "No nested bundle!");
-    Size += GetInstSizeInBytes(*I);
+    Size += getInstSizeInBytes(*I);
   }
   return Size;
 }
@@ -868,7 +876,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -1051,7 +1059,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   DebugLoc DL;
   if (I != MBB.end()) DL = I->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
@@ -2069,29 +2077,40 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,
   int RegListIdx = IsT1PushPop ? 2 : 4;
 
   // Calculate the space we'll need in terms of registers.
-  unsigned FirstReg = MI->getOperand(RegListIdx).getReg();
-  unsigned RD0Reg, RegsNeeded;
+  unsigned RegsNeeded;
+  const TargetRegisterClass *RegClass;
   if (IsVFPPushPop) {
-    RD0Reg = ARM::D0;
     RegsNeeded = NumBytes / 8;
+    RegClass = &ARM::DPRRegClass;
   } else {
-    RD0Reg = ARM::R0;
     RegsNeeded = NumBytes / 4;
+    RegClass = &ARM::GPRRegClass;
   }
 
   // We're going to have to strip all list operands off before
   // re-adding them since the order matters, so save the existing ones
   // for later.
   SmallVector<MachineOperand, 4> RegList;
-  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)
-    RegList.push_back(MI->getOperand(i));
+
+  // We're also going to need the first register transferred by this
+  // instruction, which won't necessarily be the first register in the list.
+  unsigned FirstRegEnc = -1;
 
   const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();
+  for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {
+    MachineOperand &MO = MI->getOperand(i);
+    RegList.push_back(MO);
+
+    if (MO.isReg() && TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)
+      FirstRegEnc = TRI->getEncodingValue(MO.getReg());
+  }
+
   const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);
 
   // Now try to find enough space in the reglist to allocate NumBytes.
-  for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded;
-       --CurReg) {
+  for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;
+       --CurRegEnc) {
+    unsigned CurReg = RegClass->getRegister(CurRegEnc);
     if (!IsPop) {
       // Pushing any register is completely harmless, mark the
       // register involved as undef since we don't care about it in
@@ -2291,6 +2310,7 @@ bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
   default: break;
   case ARM::CMPri:
   case ARM::t2CMPri:
+  case ARM::tCMPi8:
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
@@ -2477,8 +2497,21 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
   if (isPredicated(*MI))
     return false;
 
+  bool IsThumb1 = false;
   switch (MI->getOpcode()) {
   default: break;
+  case ARM::tLSLri:
+  case ARM::tLSRri:
+  case ARM::tLSLrr:
+  case ARM::tLSRrr:
+  case ARM::tSUBrr:
+  case ARM::tADDrr:
+  case ARM::tADDi3:
+  case ARM::tADDi8:
+  case ARM::tSUBi3:
+  case ARM::tSUBi8:
+    IsThumb1 = true;
+    LLVM_FALLTHROUGH;
   case ARM::RSBrr:
   case ARM::RSBri:
   case ARM::RSCrr:
@@ -2511,7 +2544,11 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
   case ARM::EORrr:
   case ARM::EORri:
   case ARM::t2EORrr:
-  case ARM::t2EORri: {
+  case ARM::t2EORri:
+  case ARM::t2LSRri:
+  case ARM::t2LSRrr:
+  case ARM::t2LSLri:
+  case ARM::t2LSLrr: {
     // Scan forward for the use of CPSR
     // When checking against MI: if it's a conditional code that requires
     // checking of the V bit or C bit, then this is not safe to do.
@@ -2618,9 +2655,12 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
           return false;
     }
 
-    // Toggle the optional operand to CPSR.
-    MI->getOperand(5).setReg(ARM::CPSR);
-    MI->getOperand(5).setIsDef(true);
+    // Toggle the optional operand to CPSR (if it exists - in Thumb1 we always
+    // set CPSR so this is represented as an explicit output)
+    if (!IsThumb1) {
+      MI->getOperand(5).setReg(ARM::CPSR);
+      MI->getOperand(5).setIsDef(true);
+    }
     assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");
     CmpInstr.eraseFromParent();
 
@@ -2632,7 +2672,7 @@ bool ARMBaseInstrInfo::optimizeCompareInstr(
     return true;
   }
   }
-
+  
   return false;
 }
 
@@ -4119,6 +4159,9 @@ bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,
 void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
                                                 unsigned LoadImmOpc,
                                                 unsigned LoadOpc) const {
+  assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&
+         "ROPI/RWPI not currently supported with stack guard");
+
   MachineBasicBlock &MBB = *MI->getParent();
   DebugLoc DL = MI->getDebugLoc();
   unsigned Reg = MI->getOperand(0).getReg();
@@ -4132,7 +4175,9 @@ void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,
   if (Subtarget.isGVIndirectSymbol(GV)) {
     MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);
     MIB.addReg(Reg, RegState::Kill).addImm(0);
-    auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+    auto Flags = MachineMemOperand::MOLoad |
+                 MachineMemOperand::MODereferenceable |
+                 MachineMemOperand::MOInvariant;
     MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
         MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
     MIB.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 52b0ff17dea2..b01d5c8ec85f 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -100,6 +100,10 @@ public:
   // Return whether the target has an explicit NOP encoding.
   bool hasNOP() const;
 
+  virtual void getNoopForElfTarget(MCInst &NopInst) const {
+    getNoopForMachoTarget(NopInst);
+  }
+
   // Return the non-pre/post incrementing version of 'Opc'. Return 0
   // if there is not such an opcode.
   virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
@@ -124,13 +128,15 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
   // Predication support.
   bool isPredicated(const MachineInstr &MI) const override;
@@ -154,7 +160,7 @@ public:
 
   /// GetInstSize - Returns the size of the specified MachineInstr.
   ///
-  virtual unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   unsigned isLoadFromStackSlot(const MachineInstr &MI,
                                int &FrameIndex) const override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index aa968efc37d4..d995c631dd1c 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -49,18 +49,13 @@ ARMBaseRegisterInfo::ARMBaseRegisterInfo()
     : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), BasePtr(ARM::R6) {}
 
 static unsigned getFramePointerReg(const ARMSubtarget &STI) {
-  if (STI.isTargetMachO())
-    return ARM::R7;
-  else if (STI.isTargetWindows())
-    return ARM::R11;
-  else // ARM EABI
-    return STI.isThumb() ? ARM::R7 : ARM::R11;
+  return STI.useR7AsFramePointer() ? ARM::R7 : ARM::R11;
 }
 
 const MCPhysReg*
 ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   const ARMSubtarget &STI = MF->getSubtarget<ARMSubtarget>();
-  bool UseSplitPush = STI.splitFramePushPop();
+  bool UseSplitPush = STI.splitFramePushPop(*MF);
   const MCPhysReg *RegList =
       STI.isTargetDarwin()
           ? CSR_iOS_SaveList
@@ -136,6 +131,15 @@ ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
   return CSR_iOS_TLSCall_RegMask;
 }
 
+const uint32_t *
+ARMBaseRegisterInfo::getSjLjDispatchPreservedMask(const MachineFunction &MF) const {
+  const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
+  if (!STI.useSoftFloat() && STI.hasVFP2() && !STI.isThumb1Only())
+    return CSR_NoRegs_RegMask;
+  else
+    return CSR_FPRegs_RegMask;
+}
+
 
 const uint32_t *
 ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
@@ -163,27 +167,29 @@ getReservedRegs(const MachineFunction &MF) const {
 
   // FIXME: avoid re-calculating this every time.
   BitVector Reserved(getNumRegs());
-  Reserved.set(ARM::SP);
-  Reserved.set(ARM::PC);
-  Reserved.set(ARM::FPSCR);
-  Reserved.set(ARM::APSR_NZCV);
+  markSuperRegs(Reserved, ARM::SP);
+  markSuperRegs(Reserved, ARM::PC);
+  markSuperRegs(Reserved, ARM::FPSCR);
+  markSuperRegs(Reserved, ARM::APSR_NZCV);
   if (TFI->hasFP(MF))
-    Reserved.set(getFramePointerReg(STI));
+    markSuperRegs(Reserved, getFramePointerReg(STI));
   if (hasBasePointer(MF))
-    Reserved.set(BasePtr);
+    markSuperRegs(Reserved, BasePtr);
   // Some targets reserve R9.
   if (STI.isR9Reserved())
-    Reserved.set(ARM::R9);
+    markSuperRegs(Reserved, ARM::R9);
   // Reserve D16-D31 if the subtarget doesn't support them.
   if (!STI.hasVFP3() || STI.hasD16()) {
     static_assert(ARM::D31 == ARM::D16 + 15, "Register list not consecutive!");
-    Reserved.set(ARM::D16, ARM::D31 + 1);
+    for (unsigned R = 0; R < 16; ++R)
+      markSuperRegs(Reserved, ARM::D16 + R);
   }
   const TargetRegisterClass *RC  = &ARM::GPRPairRegClass;
   for(TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I!=E; ++I)
     for (MCSubRegIterator SI(*I, this); SI.isValid(); ++SI)
-      if (Reserved.test(*SI)) Reserved.set(*I);
+      if (Reserved.test(*SI)) markSuperRegs(Reserved, *I);
 
+  assert(checkAllSuperRegsMarked(Reserved));
   return Reserved;
 }
 
@@ -289,8 +295,7 @@ ARMBaseRegisterInfo::getRegAllocationHints(unsigned VirtReg,
   }
 
   // First prefer the paired physreg.
-  if (PairedPhys &&
-      std::find(Order.begin(), Order.end(), PairedPhys) != Order.end())
+  if (PairedPhys && is_contained(Order, PairedPhys))
     Hints.push_back(PairedPhys);
 
   // Then prefer even or odd registers.
@@ -332,7 +337,7 @@ ARMBaseRegisterInfo::updateRegAllocHint(unsigned Reg, unsigned NewReg,
 }
 
 bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const ARMFrameLowering *TFI = getFrameLowering(MF);
 
@@ -347,14 +352,14 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
   // It's going to be better to use the SP or Base Pointer instead. When there
   // are variable sized objects, we can't reference off of the SP, so we
   // reserve a Base Pointer.
-  if (AFI->isThumbFunction() && MFI->hasVarSizedObjects()) {
+  if (AFI->isThumbFunction() && MFI.hasVarSizedObjects()) {
     // Conservatively estimate whether the negative offset from the frame
     // pointer will be sufficient to reach. If a function has a smallish
     // frame, it's less likely to have lots of spills and callee saved
     // space, so it's all more likely to be within range of the frame pointer.
     // If it's wrong, the scavenger will still enable access to work, it just
     // won't be optimal.
-    if (AFI->isThumb2Function() && MFI->getLocalFrameSize() < 128)
+    if (AFI->isThumb2Function() && MFI.getLocalFrameSize() < 128)
       return false;
     return true;
   }
@@ -389,10 +394,10 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const {
 
 bool ARMBaseRegisterInfo::
 cannotEliminateFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI->adjustsStack())
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MF.getTarget().Options.DisableFramePointerElim(MF) && MFI.adjustsStack())
     return true;
-  return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+  return MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken()
     || needsStackRealignment(MF);
 }
 
@@ -536,7 +541,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // so it'll be negative.
   MachineFunction &MF = *MI->getParent()->getParent();
   const ARMFrameLowering *TFI = getFrameLowering(MF);
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
   // Estimate an offset from the frame pointer.
@@ -551,7 +556,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // The incoming offset is relating to the SP at the start of the function,
   // but when we access the local it'll be relative to the SP after local
   // allocation, so adjust our SP-relative offset by that allocation size.
-  Offset += MFI->getLocalFrameSize();
+  Offset += MFI.getLocalFrameSize();
   // Assume that we'll have at least some spill slots allocated.
   // FIXME: This is a total SWAG number. We should run some statistics
   //        and pick a real one.
@@ -563,7 +568,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   // on whether there are any local variables that would trigger it.
   unsigned StackAlign = TFI->getStackAlignment();
   if (TFI->hasFP(MF) && 
-      !((MFI->getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
+      !((MFI.getLocalFrameMaxAlign() > StackAlign) && canRealignStack(MF))) {
     if (isFrameOffsetLegal(MI, getFrameRegister(MF), FPOffset))
       return false;
   }
@@ -572,7 +577,7 @@ needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const {
   //        to only disallow SP relative references in the live range of
   //        the VLA(s). In practice, it's unclear how much difference that
   //        would make, but it may be worth doing.
-  if (!MFI->hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
+  if (!MFI.hasVarSizedObjects() && isFrameOffsetLegal(MI, ARM::SP, Offset))
     return false;
 
   // The offset likely isn't legal, we want to allocate a virtual base register.
@@ -730,7 +735,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     assert(TFI->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
-    assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+    assert(!MF.getFrameInfo().hasVarSizedObjects() &&
            "Cannot use SP to access the emergency spill slot in "
            "functions with variable sized frame objects");
   }
diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
index 1eee94857e05..330e1535e863 100644
--- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -99,11 +99,12 @@ public:
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
   const MCPhysReg *
-  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const override;
   const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
+  const uint32_t *getSjLjDispatchPreservedMask(const MachineFunction &MF) const;
 
   /// getThisReturnPreservedMask - Returns a call preserved mask specific to the
   /// case that 'returned' is on an i32 first argument if the calling convention
diff --git a/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
new file mode 100644
index 000000000000..780544f865df
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMBasicBlockInfo.h
@@ -0,0 +1,110 @@
+//===-- ARMBasicBlockInfo.h - Basic Block Information -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utility functions and data structure for computing block size.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMBASICBLOCKINFO_H
+
+#include "ARM.h"
+#include "ARMMachineFunctionInfo.h"
+using namespace llvm;
+
+namespace llvm {
+
+/// UnknownPadding - Return the worst case padding that could result from
+/// unknown offset bits.  This does not include alignment padding caused by
+/// known offset bits.
+///
+/// @param LogAlign log2(alignment)
+/// @param KnownBits Number of known low offset bits.
+inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
+  if (KnownBits < LogAlign)
+    return (1u << LogAlign) - (1u << KnownBits);
+  return 0;
+}
+
+/// BasicBlockInfo - Information about the offset and size of a single
+/// basic block.
+struct BasicBlockInfo {
+  /// Offset - Distance from the beginning of the function to the beginning
+  /// of this basic block.
+  ///
+  /// Offsets are computed assuming worst case padding before an aligned
+  /// block. This means that subtracting basic block offsets always gives a
+  /// conservative estimate of the real distance which may be smaller.
+  ///
+  /// Because worst case padding is used, the computed offset of an aligned
+  /// block may not actually be aligned.
+  unsigned Offset;
+
+  /// Size - Size of the basic block in bytes.  If the block contains
+  /// inline assembly, this is a worst case estimate.
+  ///
+  /// The size does not include any alignment padding whether from the
+  /// beginning of the block, or from an aligned jump table at the end.
+  unsigned Size;
+
+  /// KnownBits - The number of low bits in Offset that are known to be
+  /// exact.  The remaining bits of Offset are an upper bound.
+  uint8_t KnownBits;
+
+  /// Unalign - When non-zero, the block contains instructions (inline asm)
+  /// of unknown size.  The real size may be smaller than Size bytes by a
+  /// multiple of 1 << Unalign.
+  uint8_t Unalign;
+
+  /// PostAlign - When non-zero, the block terminator contains a .align
+  /// directive, so the end of the block is aligned to 1 << PostAlign
+  /// bytes.
+  uint8_t PostAlign;
+
+  BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
+    PostAlign(0) {}
+
+  /// Compute the number of known offset bits internally to this block.
+  /// This number should be used to predict worst case padding when
+  /// splitting the block.
+  unsigned internalKnownBits() const {
+    unsigned Bits = Unalign ? Unalign : KnownBits;
+    // If the block size isn't a multiple of the known bits, assume the
+    // worst case padding.
+    if (Size & ((1u << Bits) - 1))
+      Bits = countTrailingZeros(Size);
+    return Bits;
+  }
+
+  /// Compute the offset immediately following this block.  If LogAlign is
+  /// specified, return the offset the successor block will get if it has
+  /// this alignment.
+  unsigned postOffset(unsigned LogAlign = 0) const {
+    unsigned PO = Offset + Size;
+    unsigned LA = std::max(unsigned(PostAlign), LogAlign);
+    if (!LA)
+      return PO;
+    // Add alignment padding from the terminator.
+    return PO + UnknownPadding(LA, internalKnownBits());
+  }
+
+  /// Compute the number of known low bits of postOffset.  If this block
+  /// contains inline asm, the number of known bits drops to the
+  /// instruction alignment.  An aligned terminator may increase the number
+  /// of know bits.
+  /// If LogAlign is given, also consider the alignment of the next block.
+  unsigned postKnownBits(unsigned LogAlign = 0) const {
+    return std::max(std::max(unsigned(PostAlign), LogAlign),
+                    internalKnownBits());
+  }
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
new file mode 100644
index 000000000000..52c95b6244ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -0,0 +1,203 @@
+//===-- llvm/lib/Target/ARM/ARMCallLowering.cpp - Call lowering -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "ARMCallLowering.h"
+
+#include "ARMBaseInstrInfo.h"
+#include "ARMISelLowering.h"
+
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "This shouldn't be built without GISel"
+#endif
+
+ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
+    : CallLowering(&TLI) {}
+
+static bool isSupportedType(const DataLayout DL, const ARMTargetLowering &TLI,
+                            Type *T) {
+  EVT VT = TLI.getValueType(DL, T);
+  if (!VT.isSimple() || !VT.isInteger() || VT.isVector())
+    return false;
+
+  unsigned VTSize = VT.getSimpleVT().getSizeInBits();
+  return VTSize == 8 || VTSize == 16 || VTSize == 32;
+}
+
+namespace {
+struct FuncReturnHandler : public CallLowering::ValueHandler {
+  FuncReturnHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+                    MachineInstrBuilder &MIB)
+      : ValueHandler(MIRBuilder, MRI), MIB(MIB) {}
+
+  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+                           MachinePointerInfo &MPO) override {
+    llvm_unreachable("Don't know how to get a stack address yet");
+  }
+
+  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+                        CCValAssign &VA) override {
+    assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
+    assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
+
+    assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
+    assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+
+    assert(VA.getLocInfo() != CCValAssign::SExt &&
+           VA.getLocInfo() != CCValAssign::ZExt &&
+           "ABI extensions not supported yet");
+
+    MIRBuilder.buildCopy(PhysReg, ValVReg);
+    MIB.addUse(PhysReg, RegState::Implicit);
+  }
+
+  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+                            MachinePointerInfo &MPO, CCValAssign &VA) override {
+    llvm_unreachable("Don't know how to assign a value to an address yet");
+  }
+
+  MachineInstrBuilder &MIB;
+};
+} // End anonymous namespace.
+
+/// Lower the return value for the already existing \p Ret. This assumes that
+/// \p MIRBuilder's insertion point is correct.
+bool ARMCallLowering::lowerReturnVal(MachineIRBuilder &MIRBuilder,
+                                     const Value *Val, unsigned VReg,
+                                     MachineInstrBuilder &Ret) const {
+  if (!Val)
+    // Nothing to do here.
+    return true;
+
+  auto &MF = MIRBuilder.getMF();
+  const auto &F = *MF.getFunction();
+
+  auto DL = MF.getDataLayout();
+  auto &TLI = *getTLI<ARMTargetLowering>();
+  if (!isSupportedType(DL, TLI, Val->getType()))
+    return false;
+
+  CCAssignFn *AssignFn =
+      TLI.CCAssignFnForReturn(F.getCallingConv(), F.isVarArg());
+
+  ArgInfo RetInfo(VReg, Val->getType());
+  setArgFlags(RetInfo, AttributeSet::ReturnIndex, DL, F);
+
+  FuncReturnHandler RetHandler(MIRBuilder, MF.getRegInfo(), Ret);
+  return handleAssignments(MIRBuilder, AssignFn, RetInfo, RetHandler);
+}
+
+bool ARMCallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+                                  const Value *Val, unsigned VReg) const {
+  assert(!Val == !VReg && "Return value without a vreg");
+
+  auto Ret = AddDefaultPred(MIRBuilder.buildInstrNoInsert(ARM::BX_RET));
+
+  if (!lowerReturnVal(MIRBuilder, Val, VReg, Ret))
+    return false;
+
+  MIRBuilder.insertInstr(Ret);
+  return true;
+}
+
+namespace {
+struct FormalArgHandler : public CallLowering::ValueHandler {
+  FormalArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
+      : ValueHandler(MIRBuilder, MRI) {}
+
+  unsigned getStackAddress(uint64_t Size, int64_t Offset,
+                           MachinePointerInfo &MPO) override {
+    assert(Size == 4 && "Unsupported size");
+
+    auto &MFI = MIRBuilder.getMF().getFrameInfo();
+
+    int FI = MFI.CreateFixedObject(Size, Offset, true);
+    MPO = MachinePointerInfo::getFixedStack(MIRBuilder.getMF(), FI);
+
+    unsigned AddrReg =
+        MRI.createGenericVirtualRegister(LLT::pointer(MPO.getAddrSpace(), 32));
+    MIRBuilder.buildFrameIndex(AddrReg, FI);
+
+    return AddrReg;
+  }
+
+  void assignValueToAddress(unsigned ValVReg, unsigned Addr, uint64_t Size,
+                            MachinePointerInfo &MPO, CCValAssign &VA) override {
+    assert(Size == 4 && "Unsupported size");
+
+    auto MMO = MIRBuilder.getMF().getMachineMemOperand(
+        MPO, MachineMemOperand::MOLoad, Size, /* Alignment */ 0);
+    MIRBuilder.buildLoad(ValVReg, Addr, *MMO);
+  }
+
+  void assignValueToReg(unsigned ValVReg, unsigned PhysReg,
+                        CCValAssign &VA) override {
+    assert(VA.isRegLoc() && "Value shouldn't be assigned to reg");
+    assert(VA.getLocReg() == PhysReg && "Assigning to the wrong reg?");
+
+    assert(VA.getValVT().getSizeInBits() <= 32 && "Unsupported value size");
+    assert(VA.getLocVT().getSizeInBits() == 32 && "Unsupported location size");
+
+    MIRBuilder.getMBB().addLiveIn(PhysReg);
+    MIRBuilder.buildCopy(ValVReg, PhysReg);
+  }
+};
+} // End anonymous namespace
+
+bool ARMCallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+                                           const Function &F,
+                                           ArrayRef<unsigned> VRegs) const {
+  // Quick exit if there aren't any args
+  if (F.arg_empty())
+    return true;
+
+  if (F.isVarArg())
+    return false;
+
+  auto DL = MIRBuilder.getMF().getDataLayout();
+  auto &TLI = *getTLI<ARMTargetLowering>();
+
+  auto &Args = F.getArgumentList();
+  unsigned ArgIdx = 0;
+  for (auto &Arg : Args) {
+    ArgIdx++;
+    if (!isSupportedType(DL, TLI, Arg.getType()))
+      return false;
+
+    // FIXME: This check as well as ArgIdx are going away as soon as we support
+    // loading values < 32 bits.
+    if (ArgIdx > 4 && Arg.getType()->getIntegerBitWidth() != 32)
+      return false;
+  }
+
+  CCAssignFn *AssignFn =
+      TLI.CCAssignFnForCall(F.getCallingConv(), F.isVarArg());
+
+  SmallVector<ArgInfo, 8> ArgInfos;
+  unsigned Idx = 0;
+  for (auto &Arg : Args) {
+    ArgInfo AInfo(VRegs[Idx], Arg.getType());
+    setArgFlags(AInfo, Idx + 1, DL, F);
+    ArgInfos.push_back(AInfo);
+    Idx++;
+  }
+
+  FormalArgHandler ArgHandler(MIRBuilder, MIRBuilder.getMF().getRegInfo());
+  return handleAssignments(MIRBuilder, AssignFn, ArgInfos, ArgHandler);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallLowering.h b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
new file mode 100644
index 000000000000..6a1b886b501f
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMCallLowering.h
@@ -0,0 +1,42 @@
+//===-- llvm/lib/Target/ARM/ARMCallLowering.h - Call lowering -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMCALLLOWERING
+#define LLVM_LIB_TARGET_ARM_ARMCALLLOWERING
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/ValueTypes.h"
+
+namespace llvm {
+
+class ARMTargetLowering;
+class MachineInstrBuilder;
+
+class ARMCallLowering : public CallLowering {
+public:
+  ARMCallLowering(const ARMTargetLowering &TLI);
+
+  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
+                   unsigned VReg) const override;
+
+  bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+                            ArrayRef<unsigned> VRegs) const override;
+
+private:
+  bool lowerReturnVal(MachineIRBuilder &MIRBuilder, const Value *Val,
+                      unsigned VReg, MachineInstrBuilder &Ret) const;
+};
+} // End of namespace llvm
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
index edb69581b9d3..9c278a52a7ff 100644
--- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
+++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td
@@ -242,6 +242,7 @@ def RetCC_ARM_AAPCS_VFP : CallingConv<[
 //===----------------------------------------------------------------------===//
 
 def CSR_NoRegs : CalleeSavedRegs<(add)>;
+def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>;
 
 def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4,
                                      (sequence "D%u", 15, 8))>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
new file mode 100644
index 000000000000..64f187d17e64
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMComputeBlockSize.cpp
@@ -0,0 +1,72 @@
+//===--- ARMComputeBlockSize.cpp - Compute machine block sizes ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ARM.h"
+#include "ARMBasicBlockInfo.h"
+using namespace llvm;
+
+namespace llvm {
+
+// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
+// below may shrink MI.
+static bool
+mayOptimizeThumb2Instruction(const MachineInstr *MI) {
+  switch(MI->getOpcode()) {
+    // optimizeThumb2Instructions.
+    case ARM::t2LEApcrel:
+    case ARM::t2LDRpci:
+    // optimizeThumb2Branches.
+    case ARM::t2B:
+    case ARM::t2Bcc:
+    case ARM::tBcc:
+    // optimizeThumb2JumpTables.
+    case ARM::t2BR_JT:
+      return true;
+  }
+  return false;
+}
+
+void computeBlockSize(MachineFunction *MF, MachineBasicBlock *MBB,
+                      BasicBlockInfo &BBI) {
+  const ARMBaseInstrInfo *TII =
+    static_cast<const ARMBaseInstrInfo *>(MF->getSubtarget().getInstrInfo());
+  bool isThumb = MF->getInfo<ARMFunctionInfo>()->isThumbFunction();
+  BBI.Size = 0;
+  BBI.Unalign = 0;
+  BBI.PostAlign = 0;
+
+  for (MachineInstr &I : *MBB) {
+    BBI.Size += TII->getInstSizeInBytes(I);
+    // For inline asm, getInstSizeInBytes returns a conservative estimate.
+    // The actual size may be smaller, but still a multiple of the instr size.
+    if (I.isInlineAsm())
+      BBI.Unalign = isThumb ? 1 : 2;
+    // Also consider instructions that may be shrunk later.
+    else if (isThumb && mayOptimizeThumb2Instruction(&I))
+      BBI.Unalign = 1;
+  }
+
+  // tBR_JTr contains a .align 2 directive.
+  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
+    BBI.PostAlign = 2;
+    MBB->getParent()->ensureAlignment(2);
+  }
+}
+
+std::vector<BasicBlockInfo> computeAllBlockSizes(MachineFunction *MF) {
+  std::vector<BasicBlockInfo> BBInfo;
+  BBInfo.resize(MF->getNumBlockIDs());
+
+  for (MachineBasicBlock &MBB : *MF)
+    computeBlockSize(MF, &MBB, BBInfo[MBB.getNumber()]);
+
+  return BBInfo;
+}
+
+} // end namespace
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
index 8511f67dccd5..be1a37e3e362 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ARM.h"
+#include "ARMBasicBlockInfo.h"
 #include "ARMMachineFunctionInfo.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "Thumb2InstrInfo.h"
@@ -57,18 +58,10 @@ static cl::opt<unsigned>
 CPMaxIteration("arm-constant-island-max-iteration", cl::Hidden, cl::init(30),
           cl::desc("The max number of iteration for converge"));
 
-
-/// UnknownPadding - Return the worst case padding that could result from
-/// unknown offset bits.  This does not include alignment padding caused by
-/// known offset bits.
-///
-/// @param LogAlign log2(alignment)
-/// @param KnownBits Number of known low offset bits.
-static inline unsigned UnknownPadding(unsigned LogAlign, unsigned KnownBits) {
-  if (KnownBits < LogAlign)
-    return (1u << LogAlign) - (1u << KnownBits);
-  return 0;
-}
+static cl::opt<bool> SynthesizeThumb1TBB(
+    "arm-synthesize-thumb-1-tbb", cl::Hidden, cl::init(true),
+    cl::desc("Use compressed jump tables in Thumb-1 by synthesizing an "
+             "equivalent to the TBB/TBH instructions"));
 
 namespace {
   /// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -83,78 +76,6 @@ namespace {
   ///   CPE     - A constant pool entry that has been placed somewhere, which
   ///             tracks a list of users.
   class ARMConstantIslands : public MachineFunctionPass {
-    /// BasicBlockInfo - Information about the offset and size of a single
-    /// basic block.
-    struct BasicBlockInfo {
-      /// Offset - Distance from the beginning of the function to the beginning
-      /// of this basic block.
-      ///
-      /// Offsets are computed assuming worst case padding before an aligned
-      /// block. This means that subtracting basic block offsets always gives a
-      /// conservative estimate of the real distance which may be smaller.
-      ///
-      /// Because worst case padding is used, the computed offset of an aligned
-      /// block may not actually be aligned.
-      unsigned Offset;
-
-      /// Size - Size of the basic block in bytes.  If the block contains
-      /// inline assembly, this is a worst case estimate.
-      ///
-      /// The size does not include any alignment padding whether from the
-      /// beginning of the block, or from an aligned jump table at the end.
-      unsigned Size;
-
-      /// KnownBits - The number of low bits in Offset that are known to be
-      /// exact.  The remaining bits of Offset are an upper bound.
-      uint8_t KnownBits;
-
-      /// Unalign - When non-zero, the block contains instructions (inline asm)
-      /// of unknown size.  The real size may be smaller than Size bytes by a
-      /// multiple of 1 << Unalign.
-      uint8_t Unalign;
-
-      /// PostAlign - When non-zero, the block terminator contains a .align
-      /// directive, so the end of the block is aligned to 1 << PostAlign
-      /// bytes.
-      uint8_t PostAlign;
-
-      BasicBlockInfo() : Offset(0), Size(0), KnownBits(0), Unalign(0),
-        PostAlign(0) {}
-
-      /// Compute the number of known offset bits internally to this block.
-      /// This number should be used to predict worst case padding when
-      /// splitting the block.
-      unsigned internalKnownBits() const {
-        unsigned Bits = Unalign ? Unalign : KnownBits;
-        // If the block size isn't a multiple of the known bits, assume the
-        // worst case padding.
-        if (Size & ((1u << Bits) - 1))
-          Bits = countTrailingZeros(Size);
-        return Bits;
-      }
-
-      /// Compute the offset immediately following this block.  If LogAlign is
-      /// specified, return the offset the successor block will get if it has
-      /// this alignment.
-      unsigned postOffset(unsigned LogAlign = 0) const {
-        unsigned PO = Offset + Size;
-        unsigned LA = std::max(unsigned(PostAlign), LogAlign);
-        if (!LA)
-          return PO;
-        // Add alignment padding from the terminator.
-        return PO + UnknownPadding(LA, internalKnownBits());
-      }
-
-      /// Compute the number of known low bits of postOffset.  If this block
-      /// contains inline asm, the number of known bits drops to the
-      /// instruction alignment.  An aligned terminator may increase the number
-      /// of know bits.
-      /// If LogAlign is given, also consider the alignment of the next block.
-      unsigned postKnownBits(unsigned LogAlign = 0) const {
-        return std::max(std::max(unsigned(PostAlign), LogAlign),
-                        internalKnownBits());
-      }
-    };
 
     std::vector<BasicBlockInfo> BBInfo;
 
@@ -273,6 +194,7 @@ namespace {
     bool isThumb;
     bool isThumb1;
     bool isThumb2;
+    bool isPositionIndependentOrROPI;
   public:
     static char ID;
     ARMConstantIslands() : MachineFunctionPass(ID) {}
@@ -281,10 +203,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "ARM constant island placement and branch shortening pass";
     }
 
@@ -319,7 +241,6 @@ namespace {
     bool fixupConditionalBr(ImmBranch &Br);
     bool fixupUnconditionalBr(ImmBranch &Br);
     bool undoLRSpillRestore();
-    bool mayOptimizeThumb2Instruction(const MachineInstr *MI) const;
     bool optimizeThumb2Instructions();
     bool optimizeThumb2Branches();
     bool reorderThumb2JumpTables();
@@ -330,7 +251,6 @@ namespace {
     MachineBasicBlock *adjustJTTargetBlockForward(MachineBasicBlock *BB,
                                                   MachineBasicBlock *JTBB);
 
-    void computeBlockSize(MachineBasicBlock *MBB);
     unsigned getOffsetOf(MachineInstr *MI) const;
     unsigned getUserOffset(CPUser&) const;
     void dumpBBs();
@@ -405,6 +325,8 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
 
   STI = &static_cast<const ARMSubtarget &>(MF->getSubtarget());
   TII = STI->getInstrInfo();
+  isPositionIndependentOrROPI =
+      STI->getTargetLowering()->isPositionIndependent() || STI->isROPI();
   AFI = MF->getInfo<ARMFunctionInfo>();
 
   isThumb = AFI->isThumbFunction();
@@ -412,6 +334,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   isThumb2 = AFI->isThumb2Function();
 
   HasFarJump = false;
+  bool GenerateTBB = isThumb2 || (isThumb1 && SynthesizeThumb1TBB);
 
   // This pass invalidates liveness information when it splits basic blocks.
   MF->getRegInfo().invalidateLiveness();
@@ -423,7 +346,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
   // Try to reorder and otherwise adjust the block layout to make good use
   // of the TB[BH] instructions.
   bool MadeChange = false;
-  if (isThumb2 && AdjustJumpTableBlocks) {
+  if (GenerateTBB && AdjustJumpTableBlocks) {
     scanFunctionJumpTables();
     MadeChange |= reorderThumb2JumpTables();
     // Data is out of date, so clear it. It'll be re-computed later.
@@ -500,7 +423,7 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &mf) {
     MadeChange |= optimizeThumb2Branches();
 
   // Optimize jump tables using TBB / TBH.
-  if (isThumb2)
+  if (GenerateTBB && !STI->genExecuteOnly())
     MadeChange |= optimizeThumb2JumpTables();
 
   // After a while, this might be made debug-only, but it is not expensive.
@@ -626,9 +549,11 @@ void ARMConstantIslands::doInitialJumpTablePlacement(
     case ARM::t2BR_JT:
       JTOpcode = ARM::JUMPTABLE_INSTS;
       break;
+    case ARM::tTBB_JT:
     case ARM::t2TBB_JT:
       JTOpcode = ARM::JUMPTABLE_TBB;
       break;
+    case ARM::tTBH_JT:
     case ARM::t2TBH_JT:
       JTOpcode = ARM::JUMPTABLE_TBH;
       break;
@@ -668,7 +593,7 @@ bool ARMConstantIslands::BBHasFallthrough(MachineBasicBlock *MBB) {
     return false;
 
   MachineBasicBlock *NextBB = &*std::next(MBBI);
-  if (std::find(MBB->succ_begin(), MBB->succ_end(), NextBB) == MBB->succ_end())
+  if (!MBB->isSuccessor(NextBB))
     return false;
 
   // Try to analyze the end of the block. A potential fallthrough may already
@@ -701,8 +626,9 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
   case ARM::CONSTPOOL_ENTRY:
     break;
   case ARM::JUMPTABLE_TBB:
-    return 0;
+    return isThumb1 ? 2 : 0;
   case ARM::JUMPTABLE_TBH:
+    return isThumb1 ? 2 : 1;
   case ARM::JUMPTABLE_INSTS:
     return 1;
   case ARM::JUMPTABLE_ADDRS:
@@ -724,7 +650,8 @@ unsigned ARMConstantIslands::getCPELogAlign(const MachineInstr *CPEMI) {
 void ARMConstantIslands::scanFunctionJumpTables() {
   for (MachineBasicBlock &MBB : *MF) {
     for (MachineInstr &I : MBB)
-      if (I.isBranch() && I.getOpcode() == ARM::t2BR_JT)
+      if (I.isBranch() &&
+          (I.getOpcode() == ARM::t2BR_JT || I.getOpcode() == ARM::tBR_JTr))
         T2JumpTables.push_back(&I);
   }
 }
@@ -734,15 +661,8 @@ void ARMConstantIslands::scanFunctionJumpTables() {
 /// and finding all of the constant pool users.
 void ARMConstantIslands::
 initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
-  BBInfo.clear();
-  BBInfo.resize(MF->getNumBlockIDs());
 
-  // First thing, compute the size of all basic blocks, and see if the function
-  // has any inline assembly in it. If so, we have to be conservative about
-  // alignment assumptions, as we don't know for sure the size of any
-  // instructions in the inline assembly.
-  for (MachineBasicBlock &MBB : *MF)
-    computeBlockSize(&MBB);
+  BBInfo = computeAllBlockSizes(MF);
 
   // The known bits of the entry block offset are determined by the function
   // alignment.
@@ -772,12 +692,13 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
         default:
           continue;  // Ignore other JT branches
         case ARM::t2BR_JT:
+        case ARM::tBR_JTr:
           T2JumpTables.push_back(&I);
           continue;   // Does not get an entry in ImmBranches
         case ARM::Bcc:
           isCond = true;
           UOpc = ARM::B;
-          // Fallthrough
+          LLVM_FALLTHROUGH;
         case ARM::B:
           Bits = 24;
           Scale = 4;
@@ -860,6 +781,7 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
           case ARM::LDRi12:
           case ARM::LDRcp:
           case ARM::t2LDRpci:
+          case ARM::t2LDRHpci:
             Bits = 12;  // +-offset_12
             NegOk = true;
             break;
@@ -875,6 +797,11 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
             Scale = 4;  // +-(offset_8*4)
             NegOk = true;
             break;
+
+          case ARM::tLDRHi:
+            Bits = 5;
+            Scale = 2; // +(offset_5*2)
+            break;
           }
 
           // Remember that this is a user of a CP entry.
@@ -901,32 +828,6 @@ initializeFunctionInfo(const std::vector<MachineInstr*> &CPEMIs) {
   }
 }
 
-/// computeBlockSize - Compute the size and some alignment information for MBB.
-/// This function updates BBInfo directly.
-void ARMConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
-  BasicBlockInfo &BBI = BBInfo[MBB->getNumber()];
-  BBI.Size = 0;
-  BBI.Unalign = 0;
-  BBI.PostAlign = 0;
-
-  for (MachineInstr &I : *MBB) {
-    BBI.Size += TII->GetInstSizeInBytes(I);
-    // For inline asm, GetInstSizeInBytes returns a conservative estimate.
-    // The actual size may be smaller, but still a multiple of the instr size.
-    if (I.isInlineAsm())
-      BBI.Unalign = isThumb ? 1 : 2;
-    // Also consider instructions that may be shrunk later.
-    else if (isThumb && mayOptimizeThumb2Instruction(&I))
-      BBI.Unalign = 1;
-  }
-
-  // tBR_JTr contains a .align 2 directive.
-  if (!MBB->empty() && MBB->back().getOpcode() == ARM::tBR_JTr) {
-    BBI.PostAlign = 2;
-    MBB->getParent()->ensureAlignment(2);
-  }
-}
-
 /// getOffsetOf - Return the current offset of the specified machine instruction
 /// from the start of the function.  This offset changes as stuff is moved
 /// around inside the function.
@@ -941,7 +842,7 @@ unsigned ARMConstantIslands::getOffsetOf(MachineInstr *MI) const {
   // Sum instructions before MI in MBB.
   for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
     assert(I != MBB->end() && "Didn't find MI in its own basic block?");
-    Offset += TII->GetInstSizeInBytes(*I);
+    Offset += TII->getInstSizeInBytes(*I);
   }
   return Offset;
 }
@@ -1034,11 +935,11 @@ MachineBasicBlock *ARMConstantIslands::splitBlockBeforeInstr(MachineInstr *MI) {
   // the new jump we added.  (It should be possible to do this without
   // recounting everything, but it's very confusing, and this is rarely
   // executed.)
-  computeBlockSize(OrigBB);
+  computeBlockSize(MF, OrigBB, BBInfo[OrigBB->getNumber()]);
 
   // Figure out how large the NewMBB is.  As the second half of the original
   // block, it may contain a tablejump.
-  computeBlockSize(NewBB);
+  computeBlockSize(MF, NewBB, BBInfo[NewBB->getNumber()]);
 
   // All BBOffsets following these blocks must be modified.
   adjustBBOffsetsAfter(OrigBB);
@@ -1400,7 +1301,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
       unsigned MaxDisp = getUnconditionalBrDisp(UncondBr);
       ImmBranches.push_back(ImmBranch(&UserMBB->back(),
                                       MaxDisp, false, UncondBr));
-      computeBlockSize(UserMBB);
+      computeBlockSize(MF, UserMBB, BBInfo[UserMBB->getNumber()]);
       adjustBBOffsetsAfter(UserMBB);
       return;
     }
@@ -1449,7 +1350,7 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
     // iterates at least once.
     BaseInsertOffset =
         std::max(UserBBI.postOffset() - UPad - 8,
-                 UserOffset + TII->GetInstSizeInBytes(*UserMI) + 1);
+                 UserOffset + TII->getInstSizeInBytes(*UserMI) + 1);
     DEBUG(dbgs() << format("Move inside block: %#x\n", BaseInsertOffset));
   }
   unsigned EndInsertOffset = BaseInsertOffset + 4 + UPad +
@@ -1459,9 +1360,9 @@ void ARMConstantIslands::createNewWater(unsigned CPUserIndex,
   unsigned CPUIndex = CPUserIndex+1;
   unsigned NumCPUsers = CPUsers.size();
   MachineInstr *LastIT = nullptr;
-  for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI);
+  for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
        Offset < BaseInsertOffset;
-       Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) {
+       Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)) {
     assert(MI != UserMBB->end() && "Fell off end of block");
     if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == &*MI) {
       CPUser &U = CPUsers[CPUIndex];
@@ -1551,7 +1452,7 @@ bool ARMConstantIslands::handleConstantPoolUser(unsigned CPUserIndex,
     // it.  Check for this so it will be removed from the WaterList.
     // Also remove any entry from NewWaterList.
     MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
-    IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+    IP = find(WaterList, WaterBB);
     if (IP != WaterList.end())
       NewWaterList.erase(WaterBB);
 
@@ -1762,7 +1663,7 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
     splitBlockBeforeInstr(MI);
     // No need for the branch to the next block. We're adding an unconditional
     // branch to the destination.
-    int delta = TII->GetInstSizeInBytes(MBB->back());
+    int delta = TII->getInstSizeInBytes(MBB->back());
     BBInfo[MBB->getNumber()].Size -= delta;
     MBB->back().eraseFromParent();
     // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
@@ -1778,18 +1679,18 @@ ARMConstantIslands::fixupConditionalBr(ImmBranch &Br) {
   BuildMI(MBB, DebugLoc(), TII->get(MI->getOpcode()))
     .addMBB(NextBB).addImm(CC).addReg(CCReg);
   Br.MI = &MBB->back();
-  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
   if (isThumb)
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB)
             .addImm(ARMCC::AL).addReg(0);
   else
     BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
-  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
 
   // Remove the old conditional branch.  It may or may not still be in MBB.
-  BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI);
+  BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
   MI->eraseFromParent();
   adjustBBOffsetsAfter(MBB);
   return true;
@@ -1817,25 +1718,6 @@ bool ARMConstantIslands::undoLRSpillRestore() {
   return MadeChange;
 }
 
-// mayOptimizeThumb2Instruction - Returns true if optimizeThumb2Instructions
-// below may shrink MI.
-bool
-ARMConstantIslands::mayOptimizeThumb2Instruction(const MachineInstr *MI) const {
-  switch(MI->getOpcode()) {
-    // optimizeThumb2Instructions.
-    case ARM::t2LEApcrel:
-    case ARM::t2LDRpci:
-    // optimizeThumb2Branches.
-    case ARM::t2B:
-    case ARM::t2Bcc:
-    case ARM::tBcc:
-    // optimizeThumb2JumpTables.
-    case ARM::t2BR_JT:
-      return true;
-  }
-  return false;
-}
-
 bool ARMConstantIslands::optimizeThumb2Instructions() {
   bool MadeChange = false;
 
@@ -2075,7 +1957,7 @@ bool ARMConstantIslands::preserveBaseRegister(MachineInstr *JumpMI,
 
   if (RemovableAdd) {
     RemovableAdd->eraseFromParent();
-    DeadSize += 4;
+    DeadSize += isThumb2 ? 4 : 2;
   } else if (BaseReg == EntryReg) {
     // The add wasn't removable, but clobbered the base for the TBB. So we can't
     // preserve it.
@@ -2142,25 +2024,82 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
     if (!ByteOk && !HalfWordOk)
       continue;
 
+    CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
     MachineBasicBlock *MBB = MI->getParent();
     if (!MI->getOperand(0).isKill()) // FIXME: needed now?
       continue;
-    unsigned IdxReg = MI->getOperand(1).getReg();
-    bool IdxRegKill = MI->getOperand(1).isKill();
 
-    CPUser &User = CPUsers[JumpTableUserIndices[JTI]];
     unsigned DeadSize = 0;
     bool CanDeleteLEA = false;
     bool BaseRegKill = false;
-    bool PreservedBaseReg =
+    
+    unsigned IdxReg = ~0U;
+    bool IdxRegKill = true;
+    if (isThumb2) {
+      IdxReg = MI->getOperand(1).getReg();
+      IdxRegKill = MI->getOperand(1).isKill();
+
+      bool PreservedBaseReg =
         preserveBaseRegister(MI, User.MI, DeadSize, CanDeleteLEA, BaseRegKill);
+      if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
+        continue;
+    } else {
+      // We're in thumb-1 mode, so we must have something like:
+      //   %idx = tLSLri %idx, 2
+      //   %base = tLEApcrelJT
+      //   %t = tLDRr %idx, %base
+      unsigned BaseReg = User.MI->getOperand(0).getReg();
+
+      if (User.MI->getIterator() == User.MI->getParent()->begin())
+        continue;
+      MachineInstr *Shift = User.MI->getPrevNode();
+      if (Shift->getOpcode() != ARM::tLSLri ||
+          Shift->getOperand(3).getImm() != 2 ||
+          !Shift->getOperand(2).isKill())
+        continue;
+      IdxReg = Shift->getOperand(2).getReg();
+      unsigned ShiftedIdxReg = Shift->getOperand(0).getReg();
 
-    if (!jumpTableFollowsTB(MI, User.CPEMI) && !PreservedBaseReg)
-      continue;
+      MachineInstr *Load = User.MI->getNextNode();
+      if (Load->getOpcode() != ARM::tLDRr)
+        continue;
+      if (Load->getOperand(1).getReg() != ShiftedIdxReg ||
+          Load->getOperand(2).getReg() != BaseReg ||
+          !Load->getOperand(1).isKill())
+        continue;
 
+      // If we're in PIC mode, there should be another ADD following.
+      if (isPositionIndependentOrROPI) {
+        MachineInstr *Add = Load->getNextNode();
+        if (Add->getOpcode() != ARM::tADDrr ||
+            Add->getOperand(2).getReg() != Load->getOperand(0).getReg() ||
+            Add->getOperand(3).getReg() != BaseReg ||
+            !Add->getOperand(2).isKill())
+          continue;
+        if (Add->getOperand(0).getReg() != MI->getOperand(0).getReg())
+          continue;
+
+        Add->eraseFromParent();
+        DeadSize += 2;
+      } else {
+        if (Load->getOperand(0).getReg() != MI->getOperand(0).getReg())
+          continue;
+      }
+      
+      
+      // Now safe to delete the load and lsl. The LEA will be removed later.
+      CanDeleteLEA = true;
+      Shift->eraseFromParent();
+      Load->eraseFromParent();
+      DeadSize += 4;
+    }
+    
     DEBUG(dbgs() << "Shrink JT: " << *MI);
     MachineInstr *CPEMI = User.CPEMI;
     unsigned Opc = ByteOk ? ARM::t2TBB_JT : ARM::t2TBH_JT;
+    if (!isThumb2)
+      Opc = ByteOk ? ARM::tTBB_JT : ARM::tTBH_JT;
+
     MachineBasicBlock::iterator MI_JT = MI;
     MachineInstr *NewJTMI =
         BuildMI(*MBB, MI_JT, MI->getDebugLoc(), TII->get(Opc))
@@ -2180,7 +2119,7 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
 
       if (CanDeleteLEA)  {
         User.MI->eraseFromParent();
-        DeadSize += 4;
+        DeadSize += isThumb2 ? 4 : 2;
 
         // The LEA was eliminated, the TBB instruction becomes the only new user
         // of the jump table.
@@ -2194,16 +2133,15 @@ bool ARMConstantIslands::optimizeThumb2JumpTables() {
         // record the TBB or TBH use.
         int CPEntryIdx = JumpTableEntryIndices[JTI];
         auto &CPEs = CPEntries[CPEntryIdx];
-        auto Entry = std::find_if(CPEs.begin(), CPEs.end(), [&](CPEntry &E) {
-          return E.CPEMI == User.CPEMI;
-        });
+        auto Entry =
+            find_if(CPEs, [&](CPEntry &E) { return E.CPEMI == User.CPEMI; });
         ++Entry->RefCount;
         CPUsers.emplace_back(CPUser(NewJTMI, User.CPEMI, 4, false, false));
       }
     }
 
-    unsigned NewSize = TII->GetInstSizeInBytes(*NewJTMI);
-    unsigned OrigSize = TII->GetInstSizeInBytes(*MI);
+    unsigned NewSize = TII->getInstSizeInBytes(*NewJTMI);
+    unsigned OrigSize = TII->getInstSizeInBytes(*MI);
     MI->eraseFromParent();
 
     int Delta = OrigSize - NewSize + DeadSize;
@@ -2297,9 +2235,16 @@ adjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB) {
   // Add an unconditional branch from NewBB to BB.
   // There doesn't seem to be meaningful DebugInfo available; this doesn't
   // correspond directly to anything in the source.
-  assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
-  BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B)).addMBB(BB)
-          .addImm(ARMCC::AL).addReg(0);
+  if (isThumb2)
+    BuildMI(NewBB, DebugLoc(), TII->get(ARM::t2B))
+        .addMBB(BB)
+        .addImm(ARMCC::AL)
+        .addReg(0);
+  else
+    BuildMI(NewBB, DebugLoc(), TII->get(ARM::tB))
+        .addMBB(BB)
+        .addImm(ARMCC::AL)
+        .addReg(0);
 
   // Update internal data structures to account for the newly inserted MBB.
   MF->RenumberBlocks(NewBB);
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
index c0db001cb6f1..2d1602873ce0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -46,7 +46,7 @@ ARMConstantPoolValue::ARMConstantPoolValue(LLVMContext &C, unsigned id,
 
 ARMConstantPoolValue::~ARMConstantPoolValue() {}
 
-const char *ARMConstantPoolValue::getModifierText() const {
+StringRef ARMConstantPoolValue::getModifierText() const {
   switch (Modifier) {
     // FIXME: Are these case sensitive? It'd be nice to lower-case all the
     // strings if that's legal.
@@ -60,6 +60,8 @@ const char *ARMConstantPoolValue::getModifierText() const {
     return "gottpoff";
   case ARMCP::TPOFF:
     return "tpoff";
+  case ARMCP::SBREL:
+    return "SBREL";
   case ARMCP::SECREL:
     return "secrel32";
   }
@@ -129,6 +131,12 @@ ARMConstantPoolConstant::ARMConstantPoolConstant(const Constant *C,
                          AddCurrentAddress),
     CVal(C) {}
 
+ARMConstantPoolConstant::ARMConstantPoolConstant(const GlobalVariable *GV,
+                                                 const Constant *C)
+    : ARMConstantPoolValue((Type *)C->getType(), 0, ARMCP::CPPromotedGlobal, 0,
+                           ARMCP::no_modifier, false),
+      CVal(C), GVar(GV) {}
+
 ARMConstantPoolConstant *
 ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
   return new ARMConstantPoolConstant(C, ID, ARMCP::CPValue, 0,
@@ -136,6 +144,12 @@ ARMConstantPoolConstant::Create(const Constant *C, unsigned ID) {
 }
 
 ARMConstantPoolConstant *
+ARMConstantPoolConstant::Create(const GlobalVariable *GVar,
+                                const Constant *Initializer) {
+  return new ARMConstantPoolConstant(GVar, Initializer);
+}
+
+ARMConstantPoolConstant *
 ARMConstantPoolConstant::Create(const GlobalValue *GV,
                                 ARMCP::ARMCPModifier Modifier) {
   return new ARMConstantPoolConstant((Type*)Type::getInt32Ty(GV->getContext()),
@@ -191,18 +205,17 @@ void ARMConstantPoolConstant::print(raw_ostream &O) const {
 // ARMConstantPoolSymbol
 //===----------------------------------------------------------------------===//
 
-ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, const char *s,
-                                             unsigned id,
-                                             unsigned char PCAdj,
+ARMConstantPoolSymbol::ARMConstantPoolSymbol(LLVMContext &C, StringRef s,
+                                             unsigned id, unsigned char PCAdj,
                                              ARMCP::ARMCPModifier Modifier,
                                              bool AddCurrentAddress)
-  : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
-                         AddCurrentAddress),
-    S(s) {}
+    : ARMConstantPoolValue(C, id, ARMCP::CPExtSymbol, PCAdj, Modifier,
+                           AddCurrentAddress),
+      S(s) {}
 
-ARMConstantPoolSymbol *
-ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s,
-                              unsigned ID, unsigned char PCAdj) {
+ARMConstantPoolSymbol *ARMConstantPoolSymbol::Create(LLVMContext &C,
+                                                     StringRef s, unsigned ID,
+                                                     unsigned char PCAdj) {
   return new ARMConstantPoolSymbol(C, s, ID, PCAdj, ARMCP::no_modifier, false);
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
index c07331d71dad..5f61832aa740 100644
--- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h
@@ -24,6 +24,7 @@ namespace llvm {
 class BlockAddress;
 class Constant;
 class GlobalValue;
+class GlobalVariable;
 class LLVMContext;
 class MachineBasicBlock;
 
@@ -33,7 +34,8 @@ namespace ARMCP {
     CPExtSymbol,
     CPBlockAddress,
     CPLSDA,
-    CPMachineBasicBlock
+    CPMachineBasicBlock,
+    CPPromotedGlobal
   };
 
   enum ARMCPModifier {
@@ -43,6 +45,7 @@ namespace ARMCP {
     GOTTPOFF,    /// Global Offset Table, Thread Pointer Offset
     TPOFF,       /// Thread Pointer Offset
     SECREL,      /// Section Relative (Windows TLS)
+    SBREL,       /// Static Base Relative (RWPI)
   };
 }
 
@@ -89,7 +92,7 @@ public:
   ~ARMConstantPoolValue() override;
 
   ARMCP::ARMCPModifier getModifier() const { return Modifier; }
-  const char *getModifierText() const;
+  StringRef getModifierText() const;
   bool hasModifier() const { return Modifier != ARMCP::no_modifier; }
 
   bool mustAddCurrentAddress() const { return AddCurrentAddress; }
@@ -102,7 +105,8 @@ public:
   bool isBlockAddress() const { return Kind == ARMCP::CPBlockAddress; }
   bool isLSDA() const { return Kind == ARMCP::CPLSDA; }
   bool isMachineBasicBlock() const{ return Kind == ARMCP::CPMachineBasicBlock; }
-
+  bool isPromotedGlobal() const{ return Kind == ARMCP::CPPromotedGlobal; }
+  
   int getExistingMachineCPValue(MachineConstantPool *CP,
                                 unsigned Alignment) override;
 
@@ -132,6 +136,7 @@ inline raw_ostream &operator<<(raw_ostream &O, const ARMConstantPoolValue &V) {
 /// Functions, and BlockAddresses.
 class ARMConstantPoolConstant : public ARMConstantPoolValue {
   const Constant *CVal;         // Constant being loaded.
+  const GlobalVariable *GVar = nullptr;
 
   ARMConstantPoolConstant(const Constant *C,
                           unsigned ID,
@@ -145,11 +150,14 @@ class ARMConstantPoolConstant : public ARMConstantPoolValue {
                           unsigned char PCAdj,
                           ARMCP::ARMCPModifier Modifier,
                           bool AddCurrentAddress);
+  ARMConstantPoolConstant(const GlobalVariable *GV, const Constant *Init);
 
 public:
   static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID);
   static ARMConstantPoolConstant *Create(const GlobalValue *GV,
                                          ARMCP::ARMCPModifier Modifier);
+  static ARMConstantPoolConstant *Create(const GlobalVariable *GV,
+                                         const Constant *Initializer);
   static ARMConstantPoolConstant *Create(const Constant *C, unsigned ID,
                                          ARMCP::ARMCPKind Kind,
                                          unsigned char PCAdj);
@@ -161,6 +169,12 @@ public:
 
   const GlobalValue *getGV() const;
   const BlockAddress *getBlockAddress() const;
+  const GlobalVariable *getPromotedGlobal() const {
+    return dyn_cast_or_null<GlobalVariable>(GVar);
+  }
+  const Constant *getPromotedGlobalInit() const {
+    return CVal;
+  }
 
   int getExistingMachineCPValue(MachineConstantPool *CP,
                                 unsigned Alignment) override;
@@ -173,7 +187,8 @@ public:
 
   void print(raw_ostream &O) const override;
   static bool classof(const ARMConstantPoolValue *APV) {
-    return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA();
+    return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA() ||
+           APV->isPromotedGlobal();
   }
 
   bool equals(const ARMConstantPoolConstant *A) const {
@@ -186,15 +201,15 @@ public:
 class ARMConstantPoolSymbol : public ARMConstantPoolValue {
   const std::string S;          // ExtSymbol being loaded.
 
-  ARMConstantPoolSymbol(LLVMContext &C, const char *s, unsigned id,
+  ARMConstantPoolSymbol(LLVMContext &C, StringRef s, unsigned id,
                         unsigned char PCAdj, ARMCP::ARMCPModifier Modifier,
                         bool AddCurrentAddress);
 
 public:
-  static ARMConstantPoolSymbol *Create(LLVMContext &C, const char *s,
-                                       unsigned ID, unsigned char PCAdj);
+  static ARMConstantPoolSymbol *Create(LLVMContext &C, StringRef s, unsigned ID,
+                                       unsigned char PCAdj);
 
-  const char *getSymbol() const { return S.c_str(); }
+  StringRef getSymbol() const { return S; }
 
   int getExistingMachineCPValue(MachineConstantPool *CP,
                                 unsigned Alignment) override;
diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index a7b299677c1c..95fcc8dcb453 100644
--- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -53,10 +53,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "ARM pseudo instruction expansion pass";
     }
 
@@ -657,6 +657,9 @@ static bool IsAnAddressOperand(const MachineOperand &MO) {
     return true;
   case MachineOperand::MO_CFIIndex:
     return false;
+  case MachineOperand::MO_IntrinsicID:
+  case MachineOperand::MO_Predicate:
+    llvm_unreachable("should not exist post-isel");
   }
   llvm_unreachable("unhandled machine operand type");
 }
@@ -1175,8 +1178,8 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
         }
         // If there's dynamic realignment, adjust for it.
         if (RI.needsStackRealignment(MF)) {
-          MachineFrameInfo  *MFI = MF.getFrameInfo();
-          unsigned MaxAlign = MFI->getMaxAlignment();
+          MachineFrameInfo &MFI = MF.getFrameInfo();
+          unsigned MaxAlign = MFI.getMaxAlignment();
           assert (!AFI->isThumb1OnlyFunction());
           // Emit bic r6, r6, MaxAlign
           assert(MaxAlign <= 256 && "The BIC instruction cannot encode "
diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
index 13724da5d4f7..df4dcb375750 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp
@@ -112,11 +112,6 @@ class ARMFastISel final : public FastISel {
                              const TargetRegisterClass *RC,
                              unsigned Op0, bool Op0IsKill,
                              uint64_t Imm);
-    unsigned fastEmitInst_rri(unsigned MachineInstOpcode,
-                              const TargetRegisterClass *RC,
-                              unsigned Op0, bool Op0IsKill,
-                              unsigned Op1, bool Op1IsKill,
-                              uint64_t Imm);
     unsigned fastEmitInst_i(unsigned MachineInstOpcode,
                             const TargetRegisterClass *RC,
                             uint64_t Imm);
@@ -351,36 +346,6 @@ unsigned ARMFastISel::fastEmitInst_ri(unsigned MachineInstOpcode,
   return ResultReg;
 }
 
-unsigned ARMFastISel::fastEmitInst_rri(unsigned MachineInstOpcode,
-                                       const TargetRegisterClass *RC,
-                                       unsigned Op0, bool Op0IsKill,
-                                       unsigned Op1, bool Op1IsKill,
-                                       uint64_t Imm) {
-  unsigned ResultReg = createResultReg(RC);
-  const MCInstrDesc &II = TII.get(MachineInstOpcode);
-
-  // Make sure the input operands are sufficiently constrained to be legal
-  // for this instruction.
-  Op0 = constrainOperandRegClass(II, Op0, 1);
-  Op1 = constrainOperandRegClass(II, Op1, 2);
-  if (II.getNumDefs() >= 1) {
-    AddOptionalDefs(
-        BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
-            .addReg(Op0, Op0IsKill * RegState::Kill)
-            .addReg(Op1, Op1IsKill * RegState::Kill)
-            .addImm(Imm));
-  } else {
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
-                   .addReg(Op0, Op0IsKill * RegState::Kill)
-                   .addReg(Op1, Op1IsKill * RegState::Kill)
-                   .addImm(Imm));
-    AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
-                           TII.get(TargetOpcode::COPY), ResultReg)
-                   .addReg(II.ImplicitDefs[0]));
-  }
-  return ResultReg;
-}
-
 unsigned ARMFastISel::fastEmitInst_i(unsigned MachineInstOpcode,
                                      const TargetRegisterClass *RC,
                                      uint64_t Imm) {
@@ -546,6 +511,10 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
   // For now 32-bit only.
   if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
 
+  // ROPI/RWPI not currently supported.
+  if (Subtarget->isROPI() || Subtarget->isRWPI())
+    return 0;
+
   bool IsIndirect = Subtarget->isGVIndirectSymbol(GV);
   const TargetRegisterClass *RC = isThumb2 ? &ARM::rGPRRegClass
                                            : &ARM::GPRRegClass;
@@ -764,7 +733,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) {
       for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
            i != e; ++i, ++GTI) {
         const Value *Op = *i;
-        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        if (StructType *STy = GTI.getStructTypeOrNull()) {
           const StructLayout *SL = DL.getStructLayout(STy);
           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
           TmpOffset += SL->getElementOffset(Idx);
@@ -1071,7 +1040,8 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr,
                               TII.get(Opc), Res)
                       .addReg(SrcReg).addImm(1));
       SrcReg = Res;
-    } // Fallthrough here.
+      LLVM_FALLTHROUGH;
+    }
     case MVT::i8:
       if (isThumb2) {
         if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops())
@@ -1844,7 +1814,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
       // For AAPCS ABI targets, just use VFP variant of the calling convention.
       return (Return ? RetCC_ARM_AAPCS_VFP : CC_ARM_AAPCS_VFP);
     }
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case CallingConv::C:
   case CallingConv::CXX_FAST_TLS:
     // Use target triple & subtarget features to do actual dispatch.
@@ -1863,6 +1833,7 @@ CCAssignFn *ARMFastISel::CCAssignFnForCall(CallingConv::ID CC,
       return (Return ? RetCC_ARM_AAPCS_VFP: CC_ARM_AAPCS_VFP);
     // Fall through to soft float variant, variadic functions don't
     // use hard floating point ABI.
+    LLVM_FALLTHROUGH;
   case CallingConv::ARM_AAPCS:
     return (Return ? RetCC_ARM_AAPCS: CC_ARM_AAPCS);
   case CallingConv::ARM_APCS:
@@ -2481,8 +2452,8 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) {
   switch (I.getIntrinsicID()) {
   default: return false;
   case Intrinsic::frameaddress: {
-    MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo();
-    MFI->setFrameAddressIsTaken(true);
+    MachineFrameInfo &MFI = FuncInfo.MF->getFrameInfo();
+    MFI.setFrameAddressIsTaken(true);
 
     unsigned LdrOpc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12;
     const TargetRegisterClass *RC = isThumb2 ? &ARM::tGPRRegClass
diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
index e8c9f610ea64..c72db8aca108 100644
--- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp
@@ -30,6 +30,8 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Target/TargetOptions.h"
 
+#define DEBUG_TYPE "arm-frame-lowering"
+
 using namespace llvm;
 
 static cl::opt<bool>
@@ -57,18 +59,16 @@ bool ARMFrameLowering::noFramePointerElim(const MachineFunction &MF) const {
 /// or if frame pointer elimination is disabled.
 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  // iOS requires FP not to be clobbered for backtracing purpose.
-  if (STI.isTargetIOS() || STI.isTargetWatchOS())
+  // ABI-required frame pointer.
+  if (MF.getTarget().Options.DisableFramePointerElim(MF))
     return true;
 
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  // Always eliminate non-leaf frame pointers.
-  return ((MF.getTarget().Options.DisableFramePointerElim(MF) &&
-           MFI->hasCalls()) ||
-          RegInfo->needsStackRealignment(MF) ||
-          MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken());
+  // Frame pointer required for use within this function.
+  return (RegInfo->needsStackRealignment(MF) ||
+          MFI.hasVarSizedObjects() ||
+          MFI.isFrameAddressTaken());
 }
 
 /// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@@ -77,8 +77,8 @@ bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {
 /// add/sub sp brackets around call sites.  Returns true if the call frame is
 /// included as part of the stack frame.
 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  unsigned CFSize = FFI->getMaxCallFrameSize();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned CFSize = MFI.getMaxCallFrameSize();
   // It's not always a good idea to include the call frame as part of the
   // stack frame. ARM (especially Thumb) has small immediate offset to
   // address the stack frame. So a large call frame can cause poor codegen
@@ -86,7 +86,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   if (CFSize >= ((1 << 12) - 1) / 2)  // Half of imm12
     return false;
 
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MFI.hasVarSizedObjects();
 }
 
 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
@@ -95,7 +95,7 @@ bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 /// even when FP is available in Thumb2 mode.
 bool
 ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
-  return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+  return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();
 }
 
 static bool isCSRestore(MachineInstr &MI, const ARMBaseInstrInfo &TII,
@@ -169,9 +169,9 @@ static int sizeOfSPAdjustment(const MachineInstr &MI) {
 
 static bool WindowsRequiresStackProbe(const MachineFunction &MF,
                                       size_t StackSizeInBytes) {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const Function *F = MF.getFunction();
-  unsigned StackProbeSize = (MFI->getStackProtectorIndex() > 0) ? 4080 : 4096;
+  unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
   if (F->hasFnAttribute("stack-probe-size"))
     F->getFnAttribute("stack-probe-size")
         .getValueAsString()
@@ -196,22 +196,21 @@ struct StackAdjustingInsts {
   }
 
   void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
-    auto Info = std::find_if(Insts.begin(), Insts.end(),
-                             [&](InstInfo &Info) { return Info.I == I; });
+    auto Info = find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
     assert(Info != Insts.end() && "invalid sp adjusting instruction");
     Info->SPAdjust += ExtraBytes;
   }
 
-  void emitDefCFAOffsets(MachineModuleInfo &MMI, MachineBasicBlock &MBB,
-                         const DebugLoc &dl, const ARMBaseInstrInfo &TII,
-                         bool HasFP) {
+  void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
+                         const ARMBaseInstrInfo &TII, bool HasFP) {
+    MachineFunction &MF = *MBB.getParent();
     unsigned CFAOffset = 0;
     for (auto &Info : Insts) {
       if (HasFP && !Info.BeforeFPSet)
         return;
 
       CFAOffset -= Info.SPAdjust;
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
       BuildMI(MBB, std::next(Info.I), dl,
               TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -288,7 +287,7 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,
 void ARMFrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  MachineFrameInfo  &MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineModuleInfo &MMI = MF.getMMI();
   MCContext &Context = MMI.getContext();
@@ -301,8 +300,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   bool isARM = !AFI->isThumbFunction();
   unsigned Align = STI.getFrameLowering()->getStackAlignment();
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
-  unsigned NumBytes = MFI->getStackSize();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  unsigned NumBytes = MFI.getStackSize();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -339,7 +338,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       DefCFAOffsetCandidates.addInst(std::prev(MBBI),
                                      NumBytes - ArgRegsSaveSize, true);
     }
-    DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
+    DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
     return;
   }
 
@@ -353,11 +352,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R10:
     case ARM::R11:
     case ARM::R12:
-      if (STI.splitFramePushPop()) {
+      if (STI.splitFramePushPop(MF)) {
         GPRCS2Size += 4;
         break;
       }
-      // fallthrough
+      LLVM_FALLTHROUGH;
     case ARM::R0:
     case ARM::R1:
     case ARM::R2:
@@ -396,8 +395,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   int FramePtrOffsetInPush = 0;
   if (HasFP) {
     FramePtrOffsetInPush =
-        MFI->getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
-    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+        MFI.getObjectOffset(FramePtrSpillFI) + ArgRegsSaveSize;
+    AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
                                 NumBytes);
   }
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
@@ -414,7 +413,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // .cfi_offset operations will reflect that.
   if (DPRGapSize) {
     assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
-    if (tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
+    if (LastPush != MBB.end() &&
+        tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
       DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
     else {
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
@@ -440,7 +440,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
     // leaves the stack pointer pointing to the DPRCS2 area.
     //
     // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
-    NumBytes += MFI->getObjectOffset(D8SpillFI);
+    NumBytes += MFI.getObjectOffset(D8SpillFI);
   } else
     NumBytes = DPRCSOffset;
 
@@ -526,7 +526,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
                          PushSize + FramePtrOffsetInPush,
                          MachineInstr::FrameSetup);
     if (FramePtrOffsetInPush + PushSize != 0) {
-      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
           nullptr, MRI->getDwarfRegNum(FramePtr, true),
           -(ArgRegsSaveSize - FramePtrOffsetInPush)));
       BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -534,7 +534,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
           .setMIFlags(MachineInstr::FrameSetup);
     } else {
       unsigned CFIIndex =
-          MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
               nullptr, MRI->getDwarfRegNum(FramePtr, true)));
       BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
@@ -557,9 +557,9 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop())
+        if (STI.splitFramePushPop(MF))
           break;
-        // fallthrough
+        LLVM_FALLTHROUGH;
       case ARM::R0:
       case ARM::R1:
       case ARM::R2:
@@ -569,8 +569,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R6:
       case ARM::R7:
       case ARM::LR:
-        CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
-            nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
+        CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+            nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex)
             .setMIFlags(MachineInstr::FrameSetup);
@@ -590,10 +590,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       case ARM::R10:
       case ARM::R11:
       case ARM::R12:
-        if (STI.splitFramePushPop()) {
+        if (STI.splitFramePushPop(MF)) {
           unsigned DwarfReg =  MRI->getDwarfRegNum(Reg, true);
-          unsigned Offset = MFI->getObjectOffset(FI);
-          unsigned CFIIndex = MMI.addFrameInst(
+          unsigned Offset = MFI.getObjectOffset(FI);
+          unsigned CFIIndex = MF.addFrameInst(
               MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
           BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
               .addCFIIndex(CFIIndex)
@@ -614,8 +614,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
       if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
           (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
         unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
-        unsigned Offset = MFI->getObjectOffset(FI);
-        unsigned CFIIndex = MMI.addFrameInst(
+        unsigned Offset = MFI.getObjectOffset(FI);
+        unsigned CFIIndex = MF.addFrameInst(
             MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
         BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex)
@@ -628,11 +628,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // throughout the process. If we have a frame pointer, it takes over the job
   // half-way through, so only the first few .cfi_def_cfa_offset instructions
   // actually get emitted.
-  DefCFAOffsetCandidates.emitDefCFAOffsets(MMI, MBB, dl, TII, HasFP);
+  DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
 
   if (STI.isTargetELF() && hasFP(MF))
-    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
-                             AFI->getFramePtrSpillOffset());
+    MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
+                            AFI->getFramePtrSpillOffset());
 
   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -644,7 +644,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // If aligned NEON registers were spilled, the stack has already been
   // realigned.
   if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->needsStackRealignment(MF)) {
-    unsigned MaxAlign = MFI->getMaxAlignment();
+    unsigned MaxAlign = MFI.getMaxAlignment();
     assert(!AFI->isThumb1OnlyFunction());
     if (!AFI->isThumbFunction()) {
       emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
@@ -688,13 +688,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF,
   // If the frame has variable sized objects then the epilogue must restore
   // the sp from fp. We can assume there's an FP here since hasFP already
   // checks for hasVarSizedObjects.
-  if (MFI->hasVarSizedObjects())
+  if (MFI.hasVarSizedObjects())
     AFI->setShouldRestoreSPFromFP(true);
 }
 
 void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
   const ARMBaseInstrInfo &TII =
@@ -704,7 +704,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
   bool isARM = !AFI->isThumbFunction();
 
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
-  int NumBytes = (int)MFI->getStackSize();
+  int NumBytes = (int)MFI.getStackSize();
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
   // All calls are tail calls in GHC calling conv, and functions have no
@@ -753,7 +753,7 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
           // This is bad, if an interrupt is taken after the mov, sp is in an
           // inconsistent state.
           // Use the first callee-saved register as a scratch register.
-          assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
+          assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
                  "No scratch register to restore SP from FP!");
           emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                  ARMCC::AL, 0, TII);
@@ -776,11 +776,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF,
       emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
 
     // Increment past our save areas.
-    if (AFI->getDPRCalleeSavedAreaSize()) {
+    if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
       MBBI++;
       // Since vpop register list cannot have gaps, there may be multiple vpop
       // instructions in the epilogue.
-      while (MBBI->getOpcode() == ARM::VLDMDIA_UPD)
+      while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
         MBBI++;
     }
     if (AFI->getDPRCalleeSavedGapSize()) {
@@ -811,13 +811,13 @@ int
 ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,
                                              int FI, unsigned &FrameReg,
                                              int SPAdj) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
       MF.getSubtarget().getRegisterInfo());
   const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize();
+  int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
   int FPOffset = Offset - AFI->getFramePtrSpillOffset();
-  bool isFixed = MFI->isFixedObjectIndex(FI);
+  bool isFixed = MFI.isFixedObjectIndex(FI);
 
   FrameReg = ARM::SP;
   Offset += SPAdj;
@@ -893,16 +893,18 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
                                     unsigned MIFlags) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
   DebugLoc DL;
 
-  SmallVector<std::pair<unsigned,bool>, 4> Regs;
+  typedef std::pair<unsigned, bool> RegAndKill;
+  SmallVector<RegAndKill, 4> Regs;
   unsigned i = CSI.size();
   while (i != 0) {
     unsigned LastReg = 0;
     for (; i != 0; --i) {
       unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.splitFramePushPop())) continue;
+      if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
 
       // D-registers in the aligned area DPRCS2 are NOT spilled here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -927,6 +929,12 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
 
     if (Regs.empty())
       continue;
+
+    std::sort(Regs.begin(), Regs.end(), [&](const RegAndKill &LHS,
+                                            const RegAndKill &RHS) {
+      return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
+    });
+
     if (Regs.size() > 1 || StrOpc== 0) {
       MachineInstrBuilder MIB =
         AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
@@ -960,6 +968,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
                                    unsigned NumAlignedDPRCS2Regs) const {
   MachineFunction &MF = *MBB.getParent();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL;
   bool isTailCall = false;
@@ -983,7 +992,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
     bool DeleteRet = false;
     for (; i != 0; --i) {
       unsigned Reg = CSI[i-1].getReg();
-      if (!(Func)(Reg, STI.splitFramePushPop())) continue;
+      if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
 
       // The aligned reloads from area DPRCS2 are not inserted here.
       if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
@@ -1012,6 +1021,11 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
 
     if (Regs.empty())
       continue;
+
+    std::sort(Regs.begin(), Regs.end(), [&](unsigned LHS, unsigned RHS) {
+      return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
+    });
+
     if (Regs.size() > 1 || LdrOpc == 0) {
       MachineInstrBuilder MIB =
         AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
@@ -1062,7 +1076,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Mark the D-register spill slots as properly aligned.  Since MFI computes
   // stack slot layout backwards, this can actually mean that the d-reg stack
@@ -1104,7 +1118,7 @@ static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,
                                   .addReg(ARM::SP)
                                   .addImm(8 * NumAlignedDPRCS2Regs)));
 
-  unsigned MaxAlign = MF.getFrameInfo()->getMaxAlignment();
+  unsigned MaxAlign = MF.getFrameInfo().getMaxAlignment();
   // We must set parameter MustBeSingleInstruction to true, since
   // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
   // stack alignment.  Luckily, this can always be done since all ARM
@@ -1359,7 +1373,7 @@ static unsigned GetFunctionSizeInBytes(const MachineFunction &MF,
   unsigned FnSize = 0;
   for (auto &MBB : MF) {
     for (auto &MI : MBB)
-      FnSize += TII.GetInstSizeInBytes(MI);
+      FnSize += TII.getInstSizeInBytes(MI);
   }
   return FnSize;
 }
@@ -1485,8 +1499,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   const ARMBaseInstrInfo &TII =
       *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
+  const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+  (void)TRI;  // Silence unused warning in non-assert builds.
   unsigned FramePtr = RegInfo->getFrameRegister(MF);
 
   // Spill R4 if Thumb2 function requires stack realignment - it will be used as
@@ -1495,7 +1511,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // instruction.
   // FIXME: It will be better just to find spare register here.
   if (AFI->isThumb2Function() &&
-      (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
+      (MFI.hasVarSizedObjects() || RegInfo->needsStackRealignment(MF)))
     SavedRegs.set(ARM::R4);
 
   if (AFI->isThumb1OnlyFunction()) {
@@ -1509,8 +1525,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
     // we've used all the registers and so R4 is already used, so not marking
     // it here will be OK.
     // FIXME: It will be better just to find spare register here.
-    unsigned StackSize = MFI->estimateStackSize(MF);
-    if (MFI->hasVarSizedObjects() || StackSize > 508)
+    unsigned StackSize = MFI.estimateStackSize(MF);
+    if (MFI.hasVarSizedObjects() || StackSize > 508)
       SavedRegs.set(ARM::R4);
   }
 
@@ -1547,7 +1563,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
     if (Spilled) {
       NumGPRSpills++;
 
-      if (!STI.splitFramePushPop()) {
+      if (!STI.splitFramePushPop(MF)) {
         if (Reg == ARM::LR)
           LRSpilled = true;
         CS1Spilled = true;
@@ -1558,7 +1574,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       switch (Reg) {
       case ARM::LR:
         LRSpilled = true;
-        // Fallthrough
+        LLVM_FALLTHROUGH;
       case ARM::R0: case ARM::R1:
       case ARM::R2: case ARM::R3:
       case ARM::R4: case ARM::R5:
@@ -1569,7 +1585,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         break;
       }
     } else {
-      if (!STI.splitFramePushPop()) {
+      if (!STI.splitFramePushPop(MF)) {
         UnspilledCS1GPRs.push_back(Reg);
         continue;
       }
@@ -1616,7 +1632,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   //        and which instructions will need a scratch register for them. Is it
   //        worth the effort and added fragility?
   unsigned EstimatedStackSize =
-      MFI->estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
+      MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
   if (hasFP(MF)) {
     if (AFI->hasStackFrame())
       EstimatedStackSize += 4;
@@ -1628,20 +1644,149 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
   EstimatedStackSize += 16; // For possible paddings.
 
   bool BigStack = EstimatedStackSize >= estimateRSStackSizeLimit(MF, this) ||
-                  MFI->hasVarSizedObjects() ||
-                  (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF));
+                  MFI.hasVarSizedObjects() ||
+                  (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
   bool ExtraCSSpill = false;
   if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
     AFI->setHasStackFrame(true);
 
+    if (hasFP(MF)) {
+      SavedRegs.set(FramePtr);
+      // If the frame pointer is required by the ABI, also spill LR so that we
+      // emit a complete frame record.
+      if (MF.getTarget().Options.DisableFramePointerElim(MF) && !LRSpilled) {
+        SavedRegs.set(ARM::LR);
+        LRSpilled = true;
+        NumGPRSpills++;
+        auto LRPos = find(UnspilledCS1GPRs, ARM::LR);
+        if (LRPos != UnspilledCS1GPRs.end())
+          UnspilledCS1GPRs.erase(LRPos);
+      }
+      auto FPPos = find(UnspilledCS1GPRs, FramePtr);
+      if (FPPos != UnspilledCS1GPRs.end())
+        UnspilledCS1GPRs.erase(FPPos);
+      NumGPRSpills++;
+      if (FramePtr == ARM::R7)
+        CS1Spilled = true;
+    }
+
+    if (AFI->isThumb1OnlyFunction()) {
+      // For Thumb1-only targets, we need some low registers when we save and
+      // restore the high registers (which aren't allocatable, but could be
+      // used by inline assembly) because the push/pop instructions can not
+      // access high registers. If necessary, we might need to push more low
+      // registers to ensure that there is at least one free that can be used
+      // for the saving & restoring, and preferably we should ensure that as
+      // many as are needed are available so that fewer push/pop instructions
+      // are required.
+
+      // Low registers which are not currently pushed, but could be (r4-r7).
+      SmallVector<unsigned, 4> AvailableRegs;
+
+      // Unused argument registers (r0-r3) can be clobbered in the prologue for
+      // free.
+      int EntryRegDeficit = 0;
+      for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
+        if (!MF.getRegInfo().isLiveIn(Reg)) {
+          --EntryRegDeficit;
+          DEBUG(dbgs() << PrintReg(Reg, TRI)
+                       << " is unused argument register, EntryRegDeficit = "
+                       << EntryRegDeficit << "\n");
+        }
+      }
+
+      // Unused return registers can be clobbered in the epilogue for free.
+      int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
+      DEBUG(dbgs() << AFI->getReturnRegsCount()
+                   << " return regs used, ExitRegDeficit = " << ExitRegDeficit
+                   << "\n");
+
+      int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
+      DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
+
+      // r4-r6 can be used in the prologue if they are pushed by the first push
+      // instruction.
+      for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
+        if (SavedRegs.test(Reg)) {
+          --RegDeficit;
+          DEBUG(dbgs() << PrintReg(Reg, TRI)
+                       << " is saved low register, RegDeficit = " << RegDeficit
+                       << "\n");
+        } else {
+          AvailableRegs.push_back(Reg);
+          DEBUG(dbgs()
+                << PrintReg(Reg, TRI)
+                << " is non-saved low register, adding to AvailableRegs\n");
+        }
+      }
+
+      // r7 can be used if it is not being used as the frame pointer.
+      if (!hasFP(MF)) {
+        if (SavedRegs.test(ARM::R7)) {
+          --RegDeficit;
+          DEBUG(dbgs() << "%R7 is saved low register, RegDeficit = "
+                       << RegDeficit << "\n");
+        } else {
+          AvailableRegs.push_back(ARM::R7);
+          DEBUG(dbgs()
+                << "%R7 is non-saved low register, adding to AvailableRegs\n");
+        }
+      }
+
+      // Each of r8-r11 needs to be copied to a low register, then pushed.
+      for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
+        if (SavedRegs.test(Reg)) {
+          ++RegDeficit;
+          DEBUG(dbgs() << PrintReg(Reg, TRI)
+                       << " is saved high register, RegDeficit = " << RegDeficit
+                       << "\n");
+        }
+      }
+
+      // LR can only be used by PUSH, not POP, and can't be used at all if the
+      // llvm.returnaddress intrinsic is used. This is only worth doing if we
+      // are more limited at function entry than exit.
+      if ((EntryRegDeficit > ExitRegDeficit) &&
+          !(MF.getRegInfo().isLiveIn(ARM::LR) &&
+            MF.getFrameInfo().isReturnAddressTaken())) {
+        if (SavedRegs.test(ARM::LR)) {
+          --RegDeficit;
+          DEBUG(dbgs() << "%LR is saved register, RegDeficit = " << RegDeficit
+                       << "\n");
+        } else {
+          AvailableRegs.push_back(ARM::LR);
+          DEBUG(dbgs() << "%LR is not saved, adding to AvailableRegs\n");
+        }
+      }
+
+      // If there are more high registers that need pushing than low registers
+      // available, push some more low registers so that we can use fewer push
+      // instructions. This might not reduce RegDeficit all the way to zero,
+      // because we can only guarantee that r4-r6 are available, but r8-r11 may
+      // need saving.
+      DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
+      for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
+        unsigned Reg = AvailableRegs.pop_back_val();
+        DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
+                     << " to make up reg deficit\n");
+        SavedRegs.set(Reg);
+        NumGPRSpills++;
+        CS1Spilled = true;
+        ExtraCSSpill = true;
+        UnspilledCS1GPRs.erase(find(UnspilledCS1GPRs, Reg));
+        if (Reg == ARM::LR)
+          LRSpilled = true;
+      }
+      DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit << "\n");
+    }
+
     // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
     // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
     if (!LRSpilled && CS1Spilled) {
       SavedRegs.set(ARM::LR);
       NumGPRSpills++;
       SmallVectorImpl<unsigned>::iterator LRPos;
-      LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
-                        (unsigned)ARM::LR);
+      LRPos = find(UnspilledCS1GPRs, (unsigned)ARM::LR);
       if (LRPos != UnspilledCS1GPRs.end())
         UnspilledCS1GPRs.erase(LRPos);
 
@@ -1649,18 +1794,10 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       ExtraCSSpill = true;
     }
 
-    if (hasFP(MF)) {
-      SavedRegs.set(FramePtr);
-      auto FPPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(),
-                             FramePtr);
-      if (FPPos != UnspilledCS1GPRs.end())
-        UnspilledCS1GPRs.erase(FPPos);
-      NumGPRSpills++;
-    }
-
     // If stack and double are 8-byte aligned and we are spilling an odd number
     // of GPRs, spill one extra callee save GPR so we won't have to pad between
     // the integer and double callee save areas.
+    DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
     unsigned TargetAlign = getStackAlignment();
     if (TargetAlign >= 8 && (NumGPRSpills & 1)) {
       if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
@@ -1672,6 +1809,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
               (STI.isTargetWindows() && Reg == ARM::R11) ||
               isARMLowRegister(Reg) || Reg == ARM::LR) {
             SavedRegs.set(Reg);
+            DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
+                         << " to make up alignment\n");
             if (!MRI.isReserved(Reg))
               ExtraCSSpill = true;
             break;
@@ -1680,6 +1819,8 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
       } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
         unsigned Reg = UnspilledCS2GPRs.front();
         SavedRegs.set(Reg);
+        DEBUG(dbgs() << "Spilling " << PrintReg(Reg, TRI)
+                     << " to make up alignment\n");
         if (!MRI.isReserved(Reg))
           ExtraCSSpill = true;
       }
@@ -1725,9 +1866,9 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,
         // closest to SP or frame pointer.
         assert(RS && "Register scavenging not provided");
         const TargetRegisterClass *RC = &ARM::GPRRegClass;
-        RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                           RC->getAlignment(),
-                                                           false));
+        RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
+                                                          RC->getAlignment(),
+                                                          false));
       }
     }
   }
@@ -1855,7 +1996,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   if (!ST->isTargetAndroid() && !ST->isTargetLinux())
     report_fatal_error("Segmented stacks not supported on this platform.");
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   MCContext &Context = MMI.getContext();
   const MCRegisterInfo *MRI = Context.getRegisterInfo();
@@ -1864,7 +2005,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();
   DebugLoc DL;
 
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
 
   // Do not generate a prologue for functions with a stack of size zero
   if (StackSize == 0)
@@ -1951,14 +2092,14 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   // Emit the relevant DWARF information about the change in stack pointer as
   // well as where to find both r4 and r5 (the callee-save registers)
   CFIIndex =
-      MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
+      MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -8));
   BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
       nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
   BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
       nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
   BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
@@ -2069,10 +2210,10 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   // Emit the DWARF info about the change in stack as well as where to find the
   // previous link register
   CFIIndex =
-      MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
+      MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -12));
   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
         nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
@@ -2124,7 +2265,7 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   }
 
   // Update the CFA offset now that we've popped
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
   BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
@@ -2147,17 +2288,17 @@ void ARMFrameLowering::adjustForSegmentedStacks(
   }
 
   // Update the CFA offset now that we've popped
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 0));
   BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
   // Tell debuggers that r4 and r5 are now the same as they were in the
   // previous function, that they're the "Same Value".
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
       nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
   BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createSameValue(
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(
       nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
   BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 20db3d39bcae..c3e9591d5c70 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -70,9 +70,7 @@ public:
     return true;
   }
 
-  const char *getPassName() const override {
-    return "ARM Instruction Selection";
-  }
+  StringRef getPassName() const override { return "ARM Instruction Selection"; }
 
   void PreprocessISelDAG() override;
 
@@ -193,6 +191,8 @@ public:
 #include "ARMGenDAGISel.inc"
 
 private:
+  void transferMemOperands(SDNode *Src, SDNode *Dst);
+
   /// Indexed (pre/post inc/dec) load matching code for ARM.
   bool tryARMIndexedLoad(SDNode *N);
   bool tryT1IndexedLoad(SDNode *N);
@@ -222,10 +222,11 @@ private:
                        const uint16_t *QOpcodes);
 
   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
-  /// should be 2, 3 or 4.  The opcode array specifies the instructions used
+  /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
   /// for loading D registers.  (Q registers are not supported.)
   void SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
-                    const uint16_t *Opcodes);
+                    const uint16_t *DOpcodes,
+                    const uint16_t *QOpcodes = nullptr);
 
   /// SelectVTBL - Select NEON VTBL and VTBX intrinsics.  NumVecs should be 2,
   /// 3 or 4.  These are custom-selected so that a REG_SEQUENCE can be
@@ -244,6 +245,7 @@ private:
   bool tryInlineAsm(SDNode *N);
 
   void SelectConcatVector(SDNode *N);
+  void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
 
   bool trySMLAWSMULW(SDNode *N);
 
@@ -476,7 +478,9 @@ bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
 unsigned ARMDAGToDAGISel::ConstantMaterializationCost(unsigned Val) const {
   if (Subtarget->isThumb()) {
     if (Val <= 255) return 1;                               // MOV
-    if (Subtarget->hasV6T2Ops() && Val <= 0xffff) return 1; // MOVW
+    if (Subtarget->hasV6T2Ops() &&
+        (Val <= 0xffff || ARM_AM::getT2SOImmValSplatVal(Val) != -1))
+      return 1; // MOVW
     if (Val <= 510) return 2;                               // MOV + ADDi8
     if (~Val <= 255) return 2;                              // MOV + MVN
     if (ARM_AM::isThumbImmShiftedVal(Val)) return 2;        // MOV + LSL
@@ -1186,6 +1190,7 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
     } else if (N.getOpcode() == ARMISD::Wrapper &&
         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
+        N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
       Base = N.getOperand(0);
     } else {
@@ -1232,9 +1237,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
     int FI = cast<FrameIndexSDNode>(N)->getIndex();
     // Only multiples of 4 are allowed for the offset, so the frame object
     // alignment must be at least 4.
-    MachineFrameInfo *MFI = MF->getFrameInfo();
-    if (MFI->getObjectAlignment(FI) < 4)
-      MFI->setObjectAlignment(FI, 4);
+    MachineFrameInfo &MFI = MF->getFrameInfo();
+    if (MFI.getObjectAlignment(FI) < 4)
+      MFI.setObjectAlignment(FI, 4);
     Base = CurDAG->getTargetFrameIndex(
         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
@@ -1255,9 +1260,9 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
         // For LHS+RHS to result in an offset that's a multiple of 4 the object
         // indexed by the LHS must be 4-byte aligned.
-        MachineFrameInfo *MFI = MF->getFrameInfo();
-        if (MFI->getObjectAlignment(FI) < 4)
-          MFI->setObjectAlignment(FI, 4);
+        MachineFrameInfo &MFI = MF->getFrameInfo();
+        if (MFI.getObjectAlignment(FI) < 4)
+          MFI.setObjectAlignment(FI, 4);
         Base = CurDAG->getTargetFrameIndex(
             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
       }
@@ -1469,6 +1474,12 @@ static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
 }
 
+void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
+  cast<MachineSDNode>(Result)->setMemRefs(MemOp, MemOp + 1);
+}
+
 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::MemIndexedMode AM = LD->getAddressingMode();
@@ -1527,16 +1538,20 @@ bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
       SDValue Base = LD->getBasePtr();
       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
                        CurDAG->getRegister(0, MVT::i32), Chain };
-      ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
-                                            MVT::i32, MVT::Other, Ops));
+      SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+                                           MVT::Other, Ops);
+      transferMemOperands(N, New);
+      ReplaceNode(N, New);
       return true;
     } else {
       SDValue Chain = LD->getChain();
       SDValue Base = LD->getBasePtr();
       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
                        CurDAG->getRegister(0, MVT::i32), Chain };
-      ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
-                                            MVT::i32, MVT::Other, Ops));
+      SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+                                           MVT::Other, Ops);
+      transferMemOperands(N, New);
+      ReplaceNode(N, New);
       return true;
     }
   }
@@ -1548,8 +1563,8 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   EVT LoadedVT = LD->getMemoryVT();
   ISD::MemIndexedMode AM = LD->getAddressingMode();
-  if (AM == ISD::UNINDEXED || LD->getExtensionType() != ISD::NON_EXTLOAD ||
-      AM != ISD::POST_INC || LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
+  if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
+      LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
     return false;
 
   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
@@ -1564,8 +1579,10 @@ bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
   SDValue Base = LD->getBasePtr();
   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
                    CurDAG->getRegister(0, MVT::i32), Chain };
-  ReplaceNode(N, CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, MVT::i32,
-                                        MVT::Other, Ops));
+  SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
+                                       MVT::i32, MVT::Other, Ops);
+  transferMemOperands(N, New);
+  ReplaceNode(N, New);
   return true;
 }
 
@@ -1610,8 +1627,10 @@ bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
     SDValue Base = LD->getBasePtr();
     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
                      CurDAG->getRegister(0, MVT::i32), Chain };
-    ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
-                                          MVT::Other, Ops));
+    SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
+                                         MVT::Other, Ops);
+    transferMemOperands(N, New);
+    ReplaceNode(N, New);
     return true;
   }
 
@@ -1744,6 +1763,12 @@ static bool isVLDfixed(unsigned Opc)
   case ARM::VLD1q16wb_fixed : return true;
   case ARM::VLD1q32wb_fixed : return true;
   case ARM::VLD1q64wb_fixed : return true;
+  case ARM::VLD1DUPd8wb_fixed : return true;
+  case ARM::VLD1DUPd16wb_fixed : return true;
+  case ARM::VLD1DUPd32wb_fixed : return true;
+  case ARM::VLD1DUPq8wb_fixed : return true;
+  case ARM::VLD1DUPq16wb_fixed : return true;
+  case ARM::VLD1DUPq32wb_fixed : return true;
   case ARM::VLD2d8wb_fixed : return true;
   case ARM::VLD2d16wb_fixed : return true;
   case ARM::VLD2d32wb_fixed : return true;
@@ -1798,6 +1823,12 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
+  case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
+  case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
+  case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
+  case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
+  case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
+  case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
 
   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
@@ -2140,7 +2171,7 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
   unsigned Alignment = 0;
   if (NumVecs != 3) {
     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
-    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
     if (Alignment > NumBytes)
       Alignment = NumBytes;
     if (Alignment < 8 && Alignment < NumBytes)
@@ -2238,8 +2269,9 @@ void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
 }
 
 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
-                                   const uint16_t *Opcodes) {
-  assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
+                                   const uint16_t *DOpcodes,
+                                   const uint16_t *QOpcodes) {
+  assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
   SDLoc dl(N);
 
   SDValue MemAddr, Align;
@@ -2255,7 +2287,7 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
   unsigned Alignment = 0;
   if (NumVecs != 3) {
     Alignment = cast<ConstantSDNode>(Align)->getZExtValue();
-    unsigned NumBytes = NumVecs * VT.getVectorElementType().getSizeInBits()/8;
+    unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
     if (Alignment > NumBytes)
       Alignment = NumBytes;
     if (Alignment < 8 && Alignment < NumBytes)
@@ -2267,19 +2299,21 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
   }
   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
 
-  unsigned OpcodeIndex;
+  unsigned Opc;
   switch (VT.getSimpleVT().SimpleTy) {
   default: llvm_unreachable("unhandled vld-dup type");
-  case MVT::v8i8:  OpcodeIndex = 0; break;
-  case MVT::v4i16: OpcodeIndex = 1; break;
+  case MVT::v8i8:  Opc = DOpcodes[0]; break;
+  case MVT::v16i8: Opc = QOpcodes[0]; break;
+  case MVT::v4i16: Opc = DOpcodes[1]; break;
+  case MVT::v8i16: Opc = QOpcodes[1]; break;
   case MVT::v2f32:
-  case MVT::v2i32: OpcodeIndex = 2; break;
+  case MVT::v2i32: Opc = DOpcodes[2]; break;
+  case MVT::v4f32:
+  case MVT::v4i32: Opc = QOpcodes[2]; break;
   }
 
   SDValue Pred = getAL(CurDAG, dl);
   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
-  SDValue SuperReg;
-  unsigned Opc = Opcodes[OpcodeIndex];
   SmallVector<SDValue, 6> Ops;
   Ops.push_back(MemAddr);
   Ops.push_back(Align);
@@ -2287,6 +2321,8 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
     // fixed-stride update instructions don't have an explicit writeback
     // operand. It's implicit in the opcode itself.
     SDValue Inc = N->getOperand(2);
+    if (NumVecs <= 2 && !isa<ConstantSDNode>(Inc.getNode()))
+      Opc = getVLDSTRegisterUpdateOpcode(Opc);
     if (!isa<ConstantSDNode>(Inc.getNode()))
       Ops.push_back(Inc);
     // FIXME: VLD3 and VLD4 haven't been updated to that form yet.
@@ -2305,14 +2341,18 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs,
   ResTys.push_back(MVT::Other);
   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   cast<MachineSDNode>(VLdDup)->setMemRefs(MemOp, MemOp + 1);
-  SuperReg = SDValue(VLdDup, 0);
 
   // Extract the subregisters.
-  static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
-  unsigned SubIdx = ARM::dsub_0;
-  for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
-    ReplaceUses(SDValue(N, Vec),
-                CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+  if (NumVecs == 1) {
+    ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
+  } else {
+    SDValue SuperReg = SDValue(VLdDup, 0);
+    static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
+    unsigned SubIdx = ARM::dsub_0;
+    for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
+      ReplaceUses(SDValue(N, Vec),
+                  CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
+  }
   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
   if (isUpdating)
     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
@@ -2612,6 +2652,10 @@ static bool SearchSignedMulLong(SDValue OR, unsigned *Opc, SDValue &Src0,
 }
 
 bool ARMDAGToDAGISel::trySMLAWSMULW(SDNode *N) {
+  if (!Subtarget->hasV6Ops() ||
+      (Subtarget->isThumb() && !Subtarget->hasThumb2()))
+    return false;
+
   SDLoc dl(N);
   SDValue Src0 = N->getOperand(0);
   SDValue Src1 = N->getOperand(1);
@@ -2687,6 +2731,87 @@ void ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
   ReplaceNode(N, createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)));
 }
 
+static Optional<std::pair<unsigned, unsigned>>
+getContiguousRangeOfSetBits(const APInt &A) {
+  unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1;
+  unsigned LastOne = A.countTrailingZeros();
+  if (A.countPopulation() != (FirstOne - LastOne + 1))
+    return Optional<std::pair<unsigned,unsigned>>();
+  return std::make_pair(FirstOne, LastOne);
+}
+
+void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
+  assert(N->getOpcode() == ARMISD::CMPZ);
+  SwitchEQNEToPLMI = false;
+
+  if (!Subtarget->isThumb())
+    // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
+    // LSR don't exist as standalone instructions - they need the barrel shifter.
+    return;
+
+  // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
+  SDValue And = N->getOperand(0);
+  if (!And->hasOneUse())
+    return;
+
+  SDValue Zero = N->getOperand(1);
+  if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() ||
+      And->getOpcode() != ISD::AND)
+    return;
+  SDValue X = And.getOperand(0);
+  auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
+
+  if (!C || !X->hasOneUse())
+    return;
+  auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
+  if (!Range)
+    return;
+
+  // There are several ways to lower this:
+  SDNode *NewN;
+  SDLoc dl(N);
+
+  auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
+    if (Subtarget->isThumb2()) {
+      Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
+      SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
+                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+                        CurDAG->getRegister(0, MVT::i32) };
+      return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+    } else {
+      SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
+                       CurDAG->getTargetConstant(Imm, dl, MVT::i32),
+                       getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+      return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
+    }
+  };
+  
+  if (Range->second == 0) {
+    //  1. Mask includes the LSB -> Simply shift the top N bits off
+    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
+    ReplaceNode(And.getNode(), NewN);
+  } else if (Range->first == 31) {
+    //  2. Mask includes the MSB -> Simply shift the bottom N bits off
+    NewN = EmitShift(ARM::tLSRri, X, Range->second);
+    ReplaceNode(And.getNode(), NewN);
+  } else if (Range->first == Range->second) {
+    //  3. Only one bit is set. We can shift this into the sign bit and use a
+    //     PL/MI comparison.
+    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
+    ReplaceNode(And.getNode(), NewN);
+
+    SwitchEQNEToPLMI = true;
+  } else if (!Subtarget->hasV6T2Ops()) {
+    //  4. Do a double shift to clear bottom and top bits, but only in
+    //     thumb-1 mode as in thumb-2 we can use UBFX.
+    NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
+    NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
+                     Range->second + (31 - Range->first));
+    ReplaceNode(And.getNode(), NewN);
+  }
+
+}
+
 void ARMDAGToDAGISel::Select(SDNode *N) {
   SDLoc dl(N);
 
@@ -2761,9 +2886,9 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     if (Subtarget->isThumb1Only()) {
       // Set the alignment of the frame object to 4, to avoid having to generate
       // more than one ADD
-      MachineFrameInfo *MFI = MF->getFrameInfo();
-      if (MFI->getObjectAlignment(FI) < 4)
-        MFI->setObjectAlignment(FI, 4);
+      MachineFrameInfo &MFI = MF->getFrameInfo();
+      if (MFI.getObjectAlignment(FI) < 4)
+        MFI.setObjectAlignment(FI, 4);
       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
                            CurDAG->getTargetConstant(0, dl, MVT::i32));
       return;
@@ -2914,6 +3039,7 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
         return;
       }
     }
+
     break;
   }
   case ARMISD::VMOVRRD:
@@ -2971,7 +3097,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
   case ARMISD::UMLAL:{
     // UMAAL is similar to UMLAL but it adds two 32-bit values to the
     // 64-bit multiplication result.
-    if (Subtarget->hasV6Ops() && N->getOperand(2).getOpcode() == ARMISD::ADDC &&
+    if (Subtarget->hasV6Ops() && Subtarget->hasDSP() &&
+        N->getOperand(2).getOpcode() == ARMISD::ADDC &&
         N->getOperand(3).getOpcode() == ARMISD::ADDE) {
 
       SDValue Addc = N->getOperand(2);
@@ -3037,6 +3164,37 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
       return;
     }
   }
+  case ARMISD::SUBE: {
+    if (!Subtarget->hasV6Ops())
+      break;
+    // Look for a pattern to match SMMLS
+    // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
+    if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
+        N->getOperand(2).getOpcode() != ARMISD::SUBC ||
+        !SDValue(N, 1).use_empty())
+      break;
+
+    if (Subtarget->isThumb())
+      assert(Subtarget->hasThumb2() &&
+             "This pattern should not be generated for Thumb");
+
+    SDValue SmulLoHi = N->getOperand(1);
+    SDValue Subc = N->getOperand(2);
+    auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0));
+
+    if (!Zero || Zero->getZExtValue() != 0 ||
+        Subc.getOperand(1) != SmulLoHi.getValue(0) ||
+        N->getOperand(1) != SmulLoHi.getValue(1) ||
+        N->getOperand(2) != Subc.getValue(1))
+      break;
+
+    unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
+    SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
+                      N->getOperand(0), getAL(CurDAG, dl),
+                      CurDAG->getRegister(0, MVT::i32) };
+    ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
+    return;
+  }
   case ISD::LOAD: {
     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
       if (tryT2IndexedLoad(N))
@@ -3073,9 +3231,27 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     assert(N2.getOpcode() == ISD::Constant);
     assert(N3.getOpcode() == ISD::Register);
 
-    SDValue Tmp2 = CurDAG->getTargetConstant(((unsigned)
-                               cast<ConstantSDNode>(N2)->getZExtValue()), dl,
-                               MVT::i32);
+    unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue();
+    
+    if (InFlag.getOpcode() == ARMISD::CMPZ) {
+      bool SwitchEQNEToPLMI;
+      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
+      InFlag = N->getOperand(4);
+
+      if (SwitchEQNEToPLMI) {
+        switch ((ARMCC::CondCodes)CC) {
+        default: llvm_unreachable("CMPZ must be either NE or EQ!");
+        case ARMCC::NE:
+          CC = (unsigned)ARMCC::MI;
+          break;
+        case ARMCC::EQ:
+          CC = (unsigned)ARMCC::PL;
+          break;
+        }
+      }
+    }
+
+    SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
     SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag };
     SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,
                                              MVT::Glue, Ops);
@@ -3089,6 +3265,80 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     CurDAG->RemoveDeadNode(N);
     return;
   }
+
+  case ARMISD::CMPZ: {
+    // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
+    //   This allows us to avoid materializing the expensive negative constant.
+    //   The CMPZ #0 is useless and will be peepholed away but we need to keep it
+    //   for its glue output.
+    SDValue X = N->getOperand(0);
+    auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
+    if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
+      int64_t Addend = -C->getSExtValue();
+
+      SDNode *Add = nullptr;
+      // In T2 mode, ADDS can be better than CMN if the immediate fits in a
+      // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
+      // Outside that range we can just use a CMN which is 32-bit but has a
+      // 12-bit immediate range.
+      if (Subtarget->isThumb2() && Addend < 1<<8) {
+        SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+                          getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
+                          CurDAG->getRegister(0, MVT::i32) };
+        Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
+      } else if (!Subtarget->isThumb2() && Addend < 1<<8) {
+        // FIXME: Add T1 tADDi8 code.
+        SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+                         CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+        Add = CurDAG->getMachineNode(ARM::tADDi8, dl, MVT::i32, Ops);
+      } else if (!Subtarget->isThumb2() && Addend < 1<<3) {
+        SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
+                         CurDAG->getTargetConstant(Addend, dl, MVT::i32),
+                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
+        Add = CurDAG->getMachineNode(ARM::tADDi3, dl, MVT::i32, Ops);
+      }
+      if (Add) {
+        SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
+        CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);
+      }
+    }
+    // Other cases are autogenerated.
+    break;
+  }
+
+  case ARMISD::CMOV: {
+    SDValue InFlag = N->getOperand(4);
+
+    if (InFlag.getOpcode() == ARMISD::CMPZ) {
+      bool SwitchEQNEToPLMI;
+      SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI);
+
+      if (SwitchEQNEToPLMI) {
+        SDValue ARMcc = N->getOperand(2);
+        ARMCC::CondCodes CC =
+          (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue();
+
+        switch (CC) {
+        default: llvm_unreachable("CMPZ must be either NE or EQ!");
+        case ARMCC::NE:
+          CC = ARMCC::MI;
+          break;
+        case ARMCC::EQ:
+          CC = ARMCC::PL;
+          break;
+        }
+        SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
+        SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
+                         N->getOperand(3), N->getOperand(4)};
+        CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
+      }
+
+    }
+    // Other cases are autogenerated.
+    break;
+  }
+    
   case ARMISD::VZIP: {
     unsigned Opc = 0;
     EVT VT = N->getValueType(0);
@@ -3174,6 +3424,15 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     return;
   }
 
+  case ARMISD::VLD1DUP: {
+    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
+                                         ARM::VLD1DUPd32 };
+    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
+                                         ARM::VLD1DUPq32 };
+    SelectVLDDup(N, false, 1, DOpcodes, QOpcodes);
+    return;
+  }
+
   case ARMISD::VLD2DUP: {
     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
                                         ARM::VLD2DUPd32 };
@@ -3197,6 +3456,17 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     return;
   }
 
+  case ARMISD::VLD1DUP_UPD: {
+    static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
+                                         ARM::VLD1DUPd16wb_fixed,
+                                         ARM::VLD1DUPd32wb_fixed };
+    static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
+                                         ARM::VLD1DUPq16wb_fixed,
+                                         ARM::VLD1DUPq32wb_fixed };
+    SelectVLDDup(N, true, 1, DOpcodes, QOpcodes);
+    return;
+  }
+
   case ARMISD::VLD2DUP_UPD: {
     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed,
                                         ARM::VLD2DUPd16wb_fixed,
@@ -4383,7 +4653,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   case InlineAsm::Constraint_i:
     // FIXME: It seems strange that 'i' is needed here since it's supposed to
     //        be an immediate and not a memory constraint.
-    // Fallthrough.
+    LLVM_FALLTHROUGH;
   case InlineAsm::Constraint_m:
   case InlineAsm::Constraint_o:
   case InlineAsm::Constraint_Q:
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 3cfcb1e09f0b..afba1587a743 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -37,6 +37,7 @@
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instruction.h"
@@ -59,18 +60,27 @@ using namespace llvm;
 STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumMovwMovt, "Number of GAs materialized with movw + movt");
 STATISTIC(NumLoopByVals, "Number of loops generated for byval arguments");
+STATISTIC(NumConstpoolPromoted,
+  "Number of constants with their storage promoted into constant pools");
 
 static cl::opt<bool>
 ARMInterworking("arm-interworking", cl::Hidden,
   cl::desc("Enable / disable ARM interworking (for debugging only)"),
   cl::init(true));
 
-// Disabled for causing self-hosting failures once returned-attribute inference
-// was enabled.
-static cl::opt<bool>
-EnableThisRetForwarding("arm-this-return-forwarding", cl::Hidden,
-                        cl::desc("Directly forward this return"),
-                        cl::init(false));
+static cl::opt<bool> EnableConstpoolPromotion(
+    "arm-promote-constant", cl::Hidden,
+    cl::desc("Enable / disable promotion of unnamed_addr constants into "
+             "constant pools"),
+    cl::init(true));
+static cl::opt<unsigned> ConstpoolPromotionMaxSize(
+    "arm-promote-constant-max-size", cl::Hidden,
+    cl::desc("Maximum size of constant to promote into a constant pool"),
+    cl::init(64));
+static cl::opt<unsigned> ConstpoolPromotionMaxTotal(
+    "arm-promote-constant-max-total", cl::Hidden,
+    cl::desc("Maximum size of ALL constants to promote into a constant pool"),
+    cl::init(128));
 
 namespace {
   class ARMCCState : public CCState {
@@ -87,6 +97,171 @@ namespace {
   };
 }
 
+void ARMTargetLowering::InitLibcallCallingConvs() {
+  // The builtins on ARM always use AAPCS, irrespective of wheter C is AAPCS or
+  // AAPCS_VFP.
+  for (const auto LC : {
+           RTLIB::SHL_I16,
+           RTLIB::SHL_I32,
+           RTLIB::SHL_I64,
+           RTLIB::SHL_I128,
+           RTLIB::SRL_I16,
+           RTLIB::SRL_I32,
+           RTLIB::SRL_I64,
+           RTLIB::SRL_I128,
+           RTLIB::SRA_I16,
+           RTLIB::SRA_I32,
+           RTLIB::SRA_I64,
+           RTLIB::SRA_I128,
+           RTLIB::MUL_I8,
+           RTLIB::MUL_I16,
+           RTLIB::MUL_I32,
+           RTLIB::MUL_I64,
+           RTLIB::MUL_I128,
+           RTLIB::MULO_I32,
+           RTLIB::MULO_I64,
+           RTLIB::MULO_I128,
+           RTLIB::SDIV_I8,
+           RTLIB::SDIV_I16,
+           RTLIB::SDIV_I32,
+           RTLIB::SDIV_I64,
+           RTLIB::SDIV_I128,
+           RTLIB::UDIV_I8,
+           RTLIB::UDIV_I16,
+           RTLIB::UDIV_I32,
+           RTLIB::UDIV_I64,
+           RTLIB::UDIV_I128,
+           RTLIB::SREM_I8,
+           RTLIB::SREM_I16,
+           RTLIB::SREM_I32,
+           RTLIB::SREM_I64,
+           RTLIB::SREM_I128,
+           RTLIB::UREM_I8,
+           RTLIB::UREM_I16,
+           RTLIB::UREM_I32,
+           RTLIB::UREM_I64,
+           RTLIB::UREM_I128,
+           RTLIB::SDIVREM_I8,
+           RTLIB::SDIVREM_I16,
+           RTLIB::SDIVREM_I32,
+           RTLIB::SDIVREM_I64,
+           RTLIB::SDIVREM_I128,
+           RTLIB::UDIVREM_I8,
+           RTLIB::UDIVREM_I16,
+           RTLIB::UDIVREM_I32,
+           RTLIB::UDIVREM_I64,
+           RTLIB::UDIVREM_I128,
+           RTLIB::NEG_I32,
+           RTLIB::NEG_I64,
+           RTLIB::ADD_F32,
+           RTLIB::ADD_F64,
+           RTLIB::ADD_F80,
+           RTLIB::ADD_F128,
+           RTLIB::SUB_F32,
+           RTLIB::SUB_F64,
+           RTLIB::SUB_F80,
+           RTLIB::SUB_F128,
+           RTLIB::MUL_F32,
+           RTLIB::MUL_F64,
+           RTLIB::MUL_F80,
+           RTLIB::MUL_F128,
+           RTLIB::DIV_F32,
+           RTLIB::DIV_F64,
+           RTLIB::DIV_F80,
+           RTLIB::DIV_F128,
+           RTLIB::POWI_F32,
+           RTLIB::POWI_F64,
+           RTLIB::POWI_F80,
+           RTLIB::POWI_F128,
+           RTLIB::FPEXT_F64_F128,
+           RTLIB::FPEXT_F32_F128,
+           RTLIB::FPEXT_F32_F64,
+           RTLIB::FPEXT_F16_F32,
+           RTLIB::FPROUND_F32_F16,
+           RTLIB::FPROUND_F64_F16,
+           RTLIB::FPROUND_F80_F16,
+           RTLIB::FPROUND_F128_F16,
+           RTLIB::FPROUND_F64_F32,
+           RTLIB::FPROUND_F80_F32,
+           RTLIB::FPROUND_F128_F32,
+           RTLIB::FPROUND_F80_F64,
+           RTLIB::FPROUND_F128_F64,
+           RTLIB::FPTOSINT_F32_I32,
+           RTLIB::FPTOSINT_F32_I64,
+           RTLIB::FPTOSINT_F32_I128,
+           RTLIB::FPTOSINT_F64_I32,
+           RTLIB::FPTOSINT_F64_I64,
+           RTLIB::FPTOSINT_F64_I128,
+           RTLIB::FPTOSINT_F80_I32,
+           RTLIB::FPTOSINT_F80_I64,
+           RTLIB::FPTOSINT_F80_I128,
+           RTLIB::FPTOSINT_F128_I32,
+           RTLIB::FPTOSINT_F128_I64,
+           RTLIB::FPTOSINT_F128_I128,
+           RTLIB::FPTOUINT_F32_I32,
+           RTLIB::FPTOUINT_F32_I64,
+           RTLIB::FPTOUINT_F32_I128,
+           RTLIB::FPTOUINT_F64_I32,
+           RTLIB::FPTOUINT_F64_I64,
+           RTLIB::FPTOUINT_F64_I128,
+           RTLIB::FPTOUINT_F80_I32,
+           RTLIB::FPTOUINT_F80_I64,
+           RTLIB::FPTOUINT_F80_I128,
+           RTLIB::FPTOUINT_F128_I32,
+           RTLIB::FPTOUINT_F128_I64,
+           RTLIB::FPTOUINT_F128_I128,
+           RTLIB::SINTTOFP_I32_F32,
+           RTLIB::SINTTOFP_I32_F64,
+           RTLIB::SINTTOFP_I32_F80,
+           RTLIB::SINTTOFP_I32_F128,
+           RTLIB::SINTTOFP_I64_F32,
+           RTLIB::SINTTOFP_I64_F64,
+           RTLIB::SINTTOFP_I64_F80,
+           RTLIB::SINTTOFP_I64_F128,
+           RTLIB::SINTTOFP_I128_F32,
+           RTLIB::SINTTOFP_I128_F64,
+           RTLIB::SINTTOFP_I128_F80,
+           RTLIB::SINTTOFP_I128_F128,
+           RTLIB::UINTTOFP_I32_F32,
+           RTLIB::UINTTOFP_I32_F64,
+           RTLIB::UINTTOFP_I32_F80,
+           RTLIB::UINTTOFP_I32_F128,
+           RTLIB::UINTTOFP_I64_F32,
+           RTLIB::UINTTOFP_I64_F64,
+           RTLIB::UINTTOFP_I64_F80,
+           RTLIB::UINTTOFP_I64_F128,
+           RTLIB::UINTTOFP_I128_F32,
+           RTLIB::UINTTOFP_I128_F64,
+           RTLIB::UINTTOFP_I128_F80,
+           RTLIB::UINTTOFP_I128_F128,
+           RTLIB::OEQ_F32,
+           RTLIB::OEQ_F64,
+           RTLIB::OEQ_F128,
+           RTLIB::UNE_F32,
+           RTLIB::UNE_F64,
+           RTLIB::UNE_F128,
+           RTLIB::OGE_F32,
+           RTLIB::OGE_F64,
+           RTLIB::OGE_F128,
+           RTLIB::OLT_F32,
+           RTLIB::OLT_F64,
+           RTLIB::OLT_F128,
+           RTLIB::OLE_F32,
+           RTLIB::OLE_F64,
+           RTLIB::OLE_F128,
+           RTLIB::OGT_F32,
+           RTLIB::OGT_F64,
+           RTLIB::OGT_F128,
+           RTLIB::UO_F32,
+           RTLIB::UO_F64,
+           RTLIB::UO_F128,
+           RTLIB::O_F32,
+           RTLIB::O_F64,
+           RTLIB::O_F128,
+       })
+  setLibcallCallingConv(LC, CallingConv::ARM_AAPCS);
+}
+
 // The APCS parameter registers.
 static const MCPhysReg GPRArgRegs[] = {
   ARM::R0, ARM::R1, ARM::R2, ARM::R3
@@ -103,7 +278,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT,
   }
 
   MVT ElemTy = VT.getVectorElementType();
-  if (ElemTy != MVT::i64 && ElemTy != MVT::f64)
+  if (ElemTy != MVT::f64)
     setOperationAction(ISD::SETCC, VT, Custom);
   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
   setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
@@ -174,6 +349,8 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
 
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
+  InitLibcallCallingConvs();
+
   if (Subtarget->isTargetMachO()) {
     // Uses VFP for Thumb libfuncs if available.
     if (Subtarget->isThumb() && Subtarget->hasVFP2() &&
@@ -565,8 +742,6 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SDIV, MVT::v8i8, Custom);
     setOperationAction(ISD::UDIV, MVT::v4i16, Custom);
     setOperationAction(ISD::UDIV, MVT::v8i8, Custom);
-    setOperationAction(ISD::SETCC, MVT::v1i64, Expand);
-    setOperationAction(ISD::SETCC, MVT::v2i64, Expand);
     // Neon does not have single instruction SINT_TO_FP and UINT_TO_FP with
     // a destination type that is wider than the source, and nor does
     // it have a FP_TO_[SU]INT instruction with a narrower destination than
@@ -803,19 +978,26 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::UREM,  MVT::i32, Expand);
   // Register based DivRem for AEABI (RTABI 4.2)
   if (Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
-      Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) {
+      Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
+      Subtarget->isTargetWindows()) {
     setOperationAction(ISD::SREM, MVT::i64, Custom);
     setOperationAction(ISD::UREM, MVT::i64, Custom);
     HasStandaloneRem = false;
 
-    setLibcallName(RTLIB::SDIVREM_I8,  "__aeabi_idivmod");
-    setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod");
-    setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod");
-    setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod");
-    setLibcallName(RTLIB::UDIVREM_I8,  "__aeabi_uidivmod");
-    setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod");
-    setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod");
-    setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod");
+    for (const auto &LC :
+         {RTLIB::SDIVREM_I8, RTLIB::SDIVREM_I16, RTLIB::SDIVREM_I32})
+      setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_sdiv"
+                                                      : "__aeabi_idivmod");
+    setLibcallName(RTLIB::SDIVREM_I64, Subtarget->isTargetWindows()
+                                           ? "__rt_sdiv64"
+                                           : "__aeabi_ldivmod");
+    for (const auto &LC :
+         {RTLIB::UDIVREM_I8, RTLIB::UDIVREM_I16, RTLIB::UDIVREM_I32})
+      setLibcallName(LC, Subtarget->isTargetWindows() ? "__rt_udiv"
+                                                      : "__aeabi_uidivmod");
+    setLibcallName(RTLIB::UDIVREM_I64, Subtarget->isTargetWindows()
+                                           ? "__rt_udiv64"
+                                           : "__aeabi_uldivmod");
 
     setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS);
     setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS);
@@ -835,6 +1017,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
   }
 
+  if (Subtarget->isTargetWindows() && Subtarget->getTargetTriple().isOSMSVCRT())
+    for (auto &VT : {MVT::f32, MVT::f64})
+      setOperationAction(ISD::FPOWI, VT, Custom);
+
   setOperationAction(ISD::GlobalAddress, MVT::i32,   Custom);
   setOperationAction(ISD::ConstantPool,  MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
@@ -875,6 +1061,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
   } else {
     // If there's anything we can use as a barrier, go through custom lowering
     // for ATOMIC_FENCE.
+    // If target has DMB in thumb, Fences can be inserted.
+    if (Subtarget->hasDataBarrier())
+      InsertFencesForAtomic = true;
+
     setOperationAction(ISD::ATOMIC_FENCE,   MVT::Other,
                        Subtarget->hasAnyDataBarrier() ? Custom : Expand);
 
@@ -893,8 +1083,10 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i32, Expand);
     // Mark ATOMIC_LOAD and ATOMIC_STORE custom so we can handle the
     // Unordered/Monotonic case.
-    setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
-    setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+    if (!InsertFencesForAtomic) {
+      setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Custom);
+      setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Custom);
+    }
   }
 
   setOperationAction(ISD::PREFETCH,         MVT::Other, Custom);
@@ -1177,7 +1369,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
 
   case ARMISD::PRELOAD:       return "ARMISD::PRELOAD";
 
-  case ARMISD::WIN__CHKSTK:   return "ARMISD:::WIN__CHKSTK";
+  case ARMISD::WIN__CHKSTK:   return "ARMISD::WIN__CHKSTK";
   case ARMISD::WIN__DBZCHK:   return "ARMISD::WIN__DBZCHK";
 
   case ARMISD::VCEQ:          return "ARMISD::VCEQ";
@@ -1236,6 +1428,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
   case ARMISD::VBSL:          return "ARMISD::VBSL";
   case ARMISD::MEMCPY:        return "ARMISD::MEMCPY";
+  case ARMISD::VLD1DUP:       return "ARMISD::VLD1DUP";
   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
   case ARMISD::VLD3DUP:       return "ARMISD::VLD3DUP";
   case ARMISD::VLD4DUP:       return "ARMISD::VLD4DUP";
@@ -1246,6 +1439,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::VLD2LN_UPD:    return "ARMISD::VLD2LN_UPD";
   case ARMISD::VLD3LN_UPD:    return "ARMISD::VLD3LN_UPD";
   case ARMISD::VLD4LN_UPD:    return "ARMISD::VLD4LN_UPD";
+  case ARMISD::VLD1DUP_UPD:   return "ARMISD::VLD1DUP_UPD";
   case ARMISD::VLD2DUP_UPD:   return "ARMISD::VLD2DUP_UPD";
   case ARMISD::VLD3DUP_UPD:   return "ARMISD::VLD3DUP_UPD";
   case ARMISD::VLD4DUP_UPD:   return "ARMISD::VLD4DUP_UPD";
@@ -1429,6 +1623,16 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
   }
 }
 
+CCAssignFn *ARMTargetLowering::CCAssignFnForCall(CallingConv::ID CC,
+                                                 bool isVarArg) const {
+  return CCAssignFnForNode(CC, false, isVarArg);
+}
+
+CCAssignFn *ARMTargetLowering::CCAssignFnForReturn(CallingConv::ID CC,
+                                                   bool isVarArg) const {
+  return CCAssignFnForNode(CC, true, isVarArg);
+}
+
 /// CCAssignFnForNode - Selects the correct CCAssignFn for the given
 /// CallingConvention.
 CCAssignFn *ARMTargetLowering::CCAssignFnForNode(CallingConv::ID CC,
@@ -1464,9 +1668,7 @@ SDValue ARMTargetLowering::LowerCallResult(
   SmallVector<CCValAssign, 16> RVLocs;
   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
                     *DAG.getContext(), Call);
-  CCInfo.AnalyzeCallResult(Ins,
-                           CCAssignFnForNode(CallConv, /* Return*/ true,
-                                             isVarArg));
+  CCInfo.AnalyzeCallResult(Ins, CCAssignFnForReturn(CallConv, isVarArg));
 
   // Copy all of the result registers out of their specified physreg.
   for (unsigned i = 0; i != RVLocs.size(); ++i) {
@@ -1474,7 +1676,7 @@ SDValue ARMTargetLowering::LowerCallResult(
 
     // Pass 'this' value directly from the argument to return value, to avoid
     // reg unit interference
-    if (i == 0 && isThisReturn && EnableThisRetForwarding) {
+    if (i == 0 && isThisReturn) {
       assert(!VA.needsCustom() && VA.getLocVT() == MVT::i32 &&
              "unexpected return calling convention register assignment");
       InVals.push_back(ThisVal);
@@ -1627,9 +1829,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SmallVector<CCValAssign, 16> ArgLocs;
   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
                     *DAG.getContext(), Call);
-  CCInfo.AnalyzeCallOperands(Outs,
-                             CCAssignFnForNode(CallConv, /* Return*/ false,
-                                               isVarArg));
+  CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CallConv, isVarArg));
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getNextStackOffset();
@@ -1864,7 +2064,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     auto *BB = CLI.CS->getParent();
     bool PreferIndirect =
         Subtarget->isThumb() && MF.getFunction()->optForMinSize() &&
-        std::count_if(GV->user_begin(), GV->user_end(), [&BB](const User *U) {
+        count_if(GV->users(), [&BB](const User *U) {
           return isa<Instruction>(U) && cast<Instruction>(U)->getParent() == BB;
         }) > 2;
 
@@ -1880,10 +2080,11 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         Callee = DAG.getNode(
             ARMISD::WrapperPIC, dl, PtrVt,
             DAG.getTargetGlobalAddress(GV, dl, PtrVt, 0, ARMII::MO_NONLAZY));
-        Callee =
-            DAG.getLoad(PtrVt, dl, DAG.getEntryNode(), Callee,
-                        MachinePointerInfo::getGOT(DAG.getMachineFunction()),
-                        /* Alignment = */ 0, MachineMemOperand::MOInvariant);
+        Callee = DAG.getLoad(
+            PtrVt, dl, DAG.getEntryNode(), Callee,
+            MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+            /* Alignment = */ 0, MachineMemOperand::MODereferenceable |
+                                     MachineMemOperand::MOInvariant);
       } else if (Subtarget->isTargetCOFF()) {
         assert(Subtarget->isTargetWindows() &&
                "Windows is the only supported COFF target");
@@ -1977,7 +2178,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   if (isTailCall) {
-    MF.getFrameInfo()->setHasTailCall();
+    MF.getFrameInfo().setHasTailCall();
     return DAG.getNode(ARMISD::TC_RETURN, dl, NodeTys, Ops);
   }
 
@@ -2060,9 +2261,9 @@ void ARMTargetLowering::HandleByVal(CCState *State, unsigned &Size,
 /// incoming argument stack.
 static
 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
-                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
                          const TargetInstrInfo *TII) {
-  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  unsigned Bytes = Arg.getValueSizeInBits() / 8;
   int FI = INT_MAX;
   if (Arg.getOpcode() == ISD::CopyFromReg) {
     unsigned VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
@@ -2094,9 +2295,9 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
     return false;
 
   assert(FI != INT_MAX);
-  if (!MFI->isFixedObjectIndex(FI))
+  if (!MFI.isFixedObjectIndex(FI))
     return false;
-  return Offset == MFI->getObjectOffset(FI) && Bytes == MFI->getObjectSize(FI);
+  return Offset == MFI.getObjectOffset(FI) && Bytes == MFI.getObjectSize(FI);
 }
 
 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
@@ -2121,11 +2322,6 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // Look for obvious safe cases to perform tail call optimization that do not
   // require ABI changes. This is what gcc calls sibcall.
 
-  // Do not sibcall optimize vararg calls unless the call site is not passing
-  // any arguments.
-  if (isVarArg && !Outs.empty())
-    return false;
-
   // Exception-handling functions need a special set of instructions to indicate
   // a return to the hardware. Tail-calling another function would probably
   // break this.
@@ -2155,8 +2351,8 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
   // Check that the call results are passed in the same way.
   LLVMContext &C = *DAG.getContext();
   if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
-                                  CCAssignFnForNode(CalleeCC, true, isVarArg),
-                                  CCAssignFnForNode(CallerCC, true, isVarArg)))
+                                  CCAssignFnForReturn(CalleeCC, isVarArg),
+                                  CCAssignFnForReturn(CallerCC, isVarArg)))
     return false;
   // The callee has to preserve all registers the caller needs to preserve.
   const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
@@ -2181,12 +2377,11 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
     // argument is passed on the stack.
     SmallVector<CCValAssign, 16> ArgLocs;
     ARMCCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C, Call);
-    CCInfo.AnalyzeCallOperands(Outs,
-                               CCAssignFnForNode(CalleeCC, false, isVarArg));
+    CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg));
     if (CCInfo.getNextStackOffset()) {
       // Check if the arguments are already laid out in the right way as
       // the caller's fixed stack objects.
-      MachineFrameInfo *MFI = MF.getFrameInfo();
+      MachineFrameInfo &MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
       const TargetInstrInfo *TII = Subtarget->getInstrInfo();
       for (unsigned i = 0, realArgIdx = 0, e = ArgLocs.size();
@@ -2236,8 +2431,7 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
                                   LLVMContext &Context) const {
   SmallVector<CCValAssign, 16> RVLocs;
   CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
-  return CCInfo.CheckReturn(Outs, CCAssignFnForNode(CallConv, /*Return=*/true,
-                                                    isVarArg));
+  return CCInfo.CheckReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
 }
 
 static SDValue LowerInterruptReturn(SmallVectorImpl<SDValue> &RetOps,
@@ -2288,8 +2482,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                     *DAG.getContext(), Call);
 
   // Analyze outgoing return values.
-  CCInfo.AnalyzeReturn(Outs, CCAssignFnForNode(CallConv, /* Return */ true,
-                                               isVarArg));
+  CCInfo.AnalyzeReturn(Outs, CCAssignFnForReturn(CallConv, isVarArg));
 
   SDValue Flag;
   SmallVector<SDValue, 4> RetOps;
@@ -2537,7 +2730,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   SDValue CPAddr;
-  bool IsPositionIndependent = isPositionIndependent();
+  bool IsPositionIndependent = isPositionIndependent() || Subtarget->isROPI();
   if (!IsPositionIndependent) {
     CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
   } else {
@@ -2595,16 +2788,17 @@ ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
   // The first entry in the descriptor is a function pointer that we must call
   // to obtain the address of the variable.
   SDValue Chain = DAG.getEntryNode();
-  SDValue FuncTLVGet =
-      DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
-                  MachinePointerInfo::getGOT(DAG.getMachineFunction()),
-                  /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
-                                           MachineMemOperand::MOInvariant);
+  SDValue FuncTLVGet = DAG.getLoad(
+      MVT::i32, DL, Chain, DescAddr,
+      MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+      /* Alignment = */ 4,
+      MachineMemOperand::MONonTemporal | MachineMemOperand::MODereferenceable |
+          MachineMemOperand::MOInvariant);
   Chain = FuncTLVGet.getValue(1);
 
   MachineFunction &F = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = F.getFrameInfo();
-  MFI->setAdjustsStack(true);
+  MachineFrameInfo &MFI = F.getFrameInfo();
+  MFI.setAdjustsStack(true);
 
   // TLS calls preserve all registers except those that absolutely must be
   // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
@@ -2801,12 +2995,171 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
   llvm_unreachable("bogus TLS model");
 }
 
+/// Return true if all users of V are within function F, looking through
+/// ConstantExprs.
+static bool allUsersAreInFunction(const Value *V, const Function *F) {
+  SmallVector<const User*,4> Worklist;
+  for (auto *U : V->users())
+    Worklist.push_back(U);
+  while (!Worklist.empty()) {
+    auto *U = Worklist.pop_back_val();
+    if (isa<ConstantExpr>(U)) {
+      for (auto *UU : U->users())
+        Worklist.push_back(UU);
+      continue;
+    }
+
+    auto *I = dyn_cast<Instruction>(U);
+    if (!I || I->getParent()->getParent() != F)
+      return false;
+  }
+  return true;
+}
+
+/// Return true if all users of V are within some (any) function, looking through
+/// ConstantExprs. In other words, are there any global constant users?
+static bool allUsersAreInFunctions(const Value *V) {
+  SmallVector<const User*,4> Worklist;
+  for (auto *U : V->users())
+    Worklist.push_back(U);
+  while (!Worklist.empty()) {
+    auto *U = Worklist.pop_back_val();
+    if (isa<ConstantExpr>(U)) {
+      for (auto *UU : U->users())
+        Worklist.push_back(UU);
+      continue;
+    }
+
+    if (!isa<Instruction>(U))
+      return false;
+  }
+  return true;
+}
+
+// Return true if T is an integer, float or an array/vector of either.
+static bool isSimpleType(Type *T) {
+  if (T->isIntegerTy() || T->isFloatingPointTy())
+    return true;
+  Type *SubT = nullptr;
+  if (T->isArrayTy())
+    SubT = T->getArrayElementType();
+  else if (T->isVectorTy())
+    SubT = T->getVectorElementType();
+  else
+    return false;
+  return SubT->isIntegerTy() || SubT->isFloatingPointTy();
+}
+
+static SDValue promoteToConstantPool(const GlobalValue *GV, SelectionDAG &DAG,
+                                     EVT PtrVT, SDLoc dl) {
+  // If we're creating a pool entry for a constant global with unnamed address,
+  // and the global is small enough, we can emit it inline into the constant pool
+  // to save ourselves an indirection.
+  //
+  // This is a win if the constant is only used in one function (so it doesn't
+  // need to be duplicated) or duplicating the constant wouldn't increase code
+  // size (implying the constant is no larger than 4 bytes).
+  const Function *F = DAG.getMachineFunction().getFunction();
+  
+  // We rely on this decision to inline being idemopotent and unrelated to the
+  // use-site. We know that if we inline a variable at one use site, we'll
+  // inline it elsewhere too (and reuse the constant pool entry). Fast-isel
+  // doesn't know about this optimization, so bail out if it's enabled else
+  // we could decide to inline here (and thus never emit the GV) but require
+  // the GV from fast-isel generated code.
+  if (!EnableConstpoolPromotion ||
+      DAG.getMachineFunction().getTarget().Options.EnableFastISel)
+      return SDValue();
+
+  auto *GVar = dyn_cast<GlobalVariable>(GV);
+  if (!GVar || !GVar->hasInitializer() ||
+      !GVar->isConstant() || !GVar->hasGlobalUnnamedAddr() ||
+      !GVar->hasLocalLinkage())
+    return SDValue();
+
+  // Ensure that we don't try and inline any type that contains pointers. If
+  // we inline a value that contains relocations, we move the relocations from
+  // .data to .text which is not ideal.
+  auto *Init = GVar->getInitializer();
+  if (!isSimpleType(Init->getType()))
+    return SDValue();
+
+  // The constant islands pass can only really deal with alignment requests
+  // <= 4 bytes and cannot pad constants itself. Therefore we cannot promote
+  // any type wanting greater alignment requirements than 4 bytes. We also
+  // can only promote constants that are multiples of 4 bytes in size or
+  // are paddable to a multiple of 4. Currently we only try and pad constants
+  // that are strings for simplicity.
+  auto *CDAInit = dyn_cast<ConstantDataArray>(Init);
+  unsigned Size = DAG.getDataLayout().getTypeAllocSize(Init->getType());
+  unsigned Align = GVar->getAlignment();
+  unsigned RequiredPadding = 4 - (Size % 4);
+  bool PaddingPossible =
+    RequiredPadding == 4 || (CDAInit && CDAInit->isString());
+  if (!PaddingPossible || Align > 4 || Size > ConstpoolPromotionMaxSize)
+    return SDValue();
+
+  unsigned PaddedSize = Size + ((RequiredPadding == 4) ? 0 : RequiredPadding);
+  MachineFunction &MF = DAG.getMachineFunction();
+  ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+
+  // We can't bloat the constant pool too much, else the ConstantIslands pass
+  // may fail to converge. If we haven't promoted this global yet (it may have
+  // multiple uses), and promoting it would increase the constant pool size (Sz
+  // > 4), ensure we have space to do so up to MaxTotal.
+  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar) && Size > 4)
+    if (AFI->getPromotedConstpoolIncrease() + PaddedSize - 4 >=
+        ConstpoolPromotionMaxTotal)
+      return SDValue();
+
+  // This is only valid if all users are in a single function OR it has users
+  // in multiple functions but it no larger than a pointer. We also check if
+  // GVar has constant (non-ConstantExpr) users. If so, it essentially has its
+  // address taken.
+  if (!allUsersAreInFunction(GVar, F) &&
+      !(Size <= 4 && allUsersAreInFunctions(GVar)))
+    return SDValue();
+
+  // We're going to inline this global. Pad it out if needed.
+  if (RequiredPadding != 4) {
+    StringRef S = CDAInit->getAsString();
+
+    SmallVector<uint8_t,16> V(S.size());
+    std::copy(S.bytes_begin(), S.bytes_end(), V.begin());
+    while (RequiredPadding--)
+      V.push_back(0);
+    Init = ConstantDataArray::get(*DAG.getContext(), V);
+  }
+
+  auto CPVal = ARMConstantPoolConstant::Create(GVar, Init);
+  SDValue CPAddr =
+    DAG.getTargetConstantPool(CPVal, PtrVT, /*Align=*/4);
+  if (!AFI->getGlobalsPromotedToConstantPool().count(GVar)) {
+    AFI->markGlobalAsPromotedToConstantPool(GVar);
+    AFI->setPromotedConstpoolIncrease(AFI->getPromotedConstpoolIncrease() +
+                                      PaddedSize - 4);
+  }
+  ++NumConstpoolPromoted;
+  return DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+}
+
 SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
                                                  SelectionDAG &DAG) const {
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   const TargetMachine &TM = getTargetMachine();
+  if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
+    GV = GA->getBaseObject();
+  bool IsRO =
+      (isa<GlobalVariable>(GV) && cast<GlobalVariable>(GV)->isConstant()) ||
+      isa<Function>(GV);
+
+  // promoteToConstantPool only if not generating XO text section
+  if (TM.shouldAssumeDSOLocal(*GV->getParent(), GV) && !Subtarget->genExecuteOnly())
+    if (SDValue V = promoteToConstantPool(GV, DAG, PtrVT, dl))
+      return V;
+
   if (isPositionIndependent()) {
     bool UseGOT_PREL = !TM.shouldAssumeDSOLocal(*GV->getParent(), GV);
 
@@ -2833,6 +3186,23 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
           DAG.getLoad(PtrVT, dl, Chain, Result,
                       MachinePointerInfo::getGOT(DAG.getMachineFunction()));
     return Result;
+  } else if (Subtarget->isROPI() && IsRO) {
+    // PC-relative.
+    SDValue G = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
+    SDValue Result = DAG.getNode(ARMISD::WrapperPIC, dl, PtrVT, G);
+    return Result;
+  } else if (Subtarget->isRWPI() && !IsRO) {
+    // SB-relative.
+    ARMConstantPoolValue *CPV =
+      ARMConstantPoolConstant::Create(GV, ARMCP::SBREL);
+    SDValue CPAddr = DAG.getTargetConstantPool(CPV, PtrVT, 4);
+    CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
+    SDValue G = DAG.getLoad(
+        PtrVT, dl, DAG.getEntryNode(), CPAddr,
+        MachinePointerInfo::getConstantPool(DAG.getMachineFunction()));
+    SDValue SB = DAG.getCopyFromReg(DAG.getEntryNode(), dl, ARM::R9, PtrVT);
+    SDValue Result = DAG.getNode(ISD::ADD, dl, PtrVT, SB, G);
+    return Result;
   }
 
   // If we have T2 ops, we can materialize the address directly via movt/movw
@@ -2854,6 +3224,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
 
 SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
                                                     SelectionDAG &DAG) const {
+  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
+         "ROPI/RWPI not currently supported for Darwin");
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDLoc dl(Op);
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -2880,6 +3252,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op,
   assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported");
   assert(Subtarget->useMovt(DAG.getMachineFunction()) &&
          "Windows on ARM expects to use movw/movt");
+  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
+         "ROPI/RWPI not currently supported for Windows");
 
   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
   const ARMII::TOF TargetFlags =
@@ -3097,8 +3471,8 @@ SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA,
 
   SDValue ArgValue2;
   if (NextVA.isMemLoc()) {
-    MachineFrameInfo *MFI = MF.getFrameInfo();
-    int FI = MFI->CreateFixedObject(4, NextVA.getLocMemOffset(), true);
+    MachineFrameInfo &MFI = MF.getFrameInfo();
+    int FI = MFI.CreateFixedObject(4, NextVA.getLocMemOffset(), true);
 
     // Create load node to retrieve arguments from the stack.
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
@@ -3139,7 +3513,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
   //          initialize stack frame.
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   unsigned RBegin, REnd;
   if (InRegsParamRecordIdx < CCInfo.getInRegsParamsCount()) {
@@ -3154,7 +3528,7 @@ int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG,
     ArgOffset = -4 * (ARM::R4 - RBegin);
 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
-  int FrameIndex = MFI->CreateFixedObject(ArgSize, ArgOffset, false);
+  int FrameIndex = MFI.CreateFixedObject(ArgSize, ArgOffset, false);
   SDValue FIN = DAG.getFrameIndex(FrameIndex, PtrVT);
 
   SmallVector<SDValue, 4> MemOps;
@@ -3200,7 +3574,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
 
@@ -3208,9 +3582,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
   SmallVector<CCValAssign, 16> ArgLocs;
   ARMCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
                     *DAG.getContext(), Prologue);
-  CCInfo.AnalyzeFormalArguments(Ins,
-                                CCAssignFnForNode(CallConv, /* Return*/ false,
-                                                  isVarArg));
+  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
 
   SmallVector<SDValue, 16> ArgValues;
   SDValue ArgValue;
@@ -3248,7 +3620,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
   CCInfo.rewindByValRegsInfo();
 
   int lastInsIndex = -1;
-  if (isVarArg && MFI->hasVAStart()) {
+  if (isVarArg && MFI.hasVAStart()) {
     unsigned RegIdx = CCInfo.getFirstUnallocated(GPRArgRegs);
     if (RegIdx != array_lengthof(GPRArgRegs))
       ArgRegBegin = std::min(ArgRegBegin, (unsigned)GPRArgRegs[RegIdx]);
@@ -3278,7 +3650,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
           VA = ArgLocs[++i]; // skip ahead to next loc
           SDValue ArgValue2;
           if (VA.isMemLoc()) {
-            int FI = MFI->CreateFixedObject(8, VA.getLocMemOffset(), true);
+            int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), true);
             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
             ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN,
                                     MachinePointerInfo::getFixedStack(
@@ -3370,8 +3742,8 @@ SDValue ARMTargetLowering::LowerFormalArguments(
             CCInfo.nextInRegsParam();
           } else {
             unsigned FIOffset = VA.getLocMemOffset();
-            int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
-                                            FIOffset, true);
+            int FI = MFI.CreateFixedObject(VA.getLocVT().getSizeInBits()/8,
+                                           FIOffset, true);
 
             // Create load nodes to retrieve arguments from the stack.
             SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3385,7 +3757,7 @@ SDValue ARMTargetLowering::LowerFormalArguments(
   }
 
   // varargs
-  if (isVarArg && MFI->hasVAStart())
+  if (isVarArg && MFI.hasVAStart())
     VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
                          CCInfo.getNextStackOffset(),
                          TotalArgRegsSaveSize);
@@ -4122,15 +4494,15 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
   Table = DAG.getNode(ARMISD::WrapperJT, dl, MVT::i32, JTI);
   Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(4, dl, PTy));
   SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
-  if (Subtarget->isThumb2()) {
-    // Thumb2 uses a two-level jump. That is, it jumps into the jump table
+  if (Subtarget->isThumb2() || (Subtarget->hasV8MBaselineOps() && Subtarget->isThumb())) {
+    // Thumb2 and ARMv8-M use a two-level jump. That is, it jumps into the jump table
     // which does another jump to the destination. This also makes it easier
-    // to translate it to TBB / TBH later.
+    // to translate it to TBB / TBH later (Thumb2 only).
     // FIXME: This might not work if the function is extremely large.
     return DAG.getNode(ARMISD::BR2_JT, dl, MVT::Other, Chain,
                        Addr, Op.getOperand(2), JTI);
   }
-  if (isPositionIndependent()) {
+  if (isPositionIndependent() || Subtarget->isROPI()) {
     Addr =
         DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr,
                     MachinePointerInfo::getJumpTable(DAG.getMachineFunction()));
@@ -4320,8 +4692,8 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
 
 SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -4346,8 +4718,8 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   const ARMBaseRegisterInfo &ARI =
     *static_cast<const ARMBaseRegisterInfo*>(RegInfo);
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc dl(Op);  // FIXME probably not meaningful
@@ -4520,6 +4892,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
   SDValue ARMcc;
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
   unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
 
   assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
@@ -4530,15 +4903,23 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
                                    DAG.getConstant(VTBits, dl, MVT::i32));
   SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt);
-  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
-  SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
-
-  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
-                          ISD::SETGE, ARMcc, DAG, dl);
-  SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
-  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
-                           CCR, Cmp);
+  SDValue LoSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+  SDValue LoBigShift = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt);
+  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+                            ISD::SETGE, ARMcc, DAG, dl);
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift, LoBigShift,
+                           ARMcc, CCR, CmpLo);
+
+
+  SDValue HiSmallShift = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
+  SDValue HiBigShift = Opc == ISD::SRA
+                           ? DAG.getNode(Opc, dl, VT, ShOpHi,
+                                         DAG.getConstant(VTBits - 1, dl, VT))
+                           : DAG.getConstant(0, dl, VT);
+  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+                            ISD::SETGE, ARMcc, DAG, dl);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+                           ARMcc, CCR, CmpHi);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
@@ -4556,23 +4937,28 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
   SDValue ShOpHi = Op.getOperand(1);
   SDValue ShAmt  = Op.getOperand(2);
   SDValue ARMcc;
+  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
 
   assert(Op.getOpcode() == ISD::SHL_PARTS);
   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
                                  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
   SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue HiSmallShift = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+
   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
                                    DAG.getConstant(VTBits, dl, MVT::i32));
-  SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
-  SDValue Tmp3 = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+  SDValue HiBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+  SDValue CmpHi = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+                            ISD::SETGE, ARMcc, DAG, dl);
+  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, HiSmallShift, HiBigShift,
+                           ARMcc, CCR, CmpHi);
 
-  SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
-  SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
-  SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
+  SDValue CmpLo = getARMCmp(ExtraShAmt, DAG.getConstant(0, dl, MVT::i32),
                           ISD::SETGE, ARMcc, DAG, dl);
-  SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
-  SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
-                           CCR, Cmp);
+  SDValue LoSmallShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, LoSmallShift,
+                           DAG.getConstant(0, dl, VT), ARMcc, CCR, CmpLo);
 
   SDValue Ops[2] = { Lo, Hi };
   return DAG.getMergeValues(Ops, dl);
@@ -4877,32 +5263,49 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
   SDLoc dl(Op);
 
+  if (Op0.getValueType().getVectorElementType() == MVT::i64 &&
+      (SetCCOpcode == ISD::SETEQ || SetCCOpcode == ISD::SETNE)) {
+    // Special-case integer 64-bit equality comparisons. They aren't legal,
+    // but they can be lowered with a few vector instructions.
+    unsigned CmpElements = CmpVT.getVectorNumElements() * 2;
+    EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, CmpElements);
+    SDValue CastOp0 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op0);
+    SDValue CastOp1 = DAG.getNode(ISD::BITCAST, dl, SplitVT, Op1);
+    SDValue Cmp = DAG.getNode(ISD::SETCC, dl, SplitVT, CastOp0, CastOp1,
+                              DAG.getCondCode(ISD::SETEQ));
+    SDValue Reversed = DAG.getNode(ARMISD::VREV64, dl, SplitVT, Cmp);
+    SDValue Merged = DAG.getNode(ISD::AND, dl, SplitVT, Cmp, Reversed);
+    Merged = DAG.getNode(ISD::BITCAST, dl, CmpVT, Merged);
+    if (SetCCOpcode == ISD::SETNE)
+      Merged = DAG.getNOT(dl, Merged, CmpVT);
+    Merged = DAG.getSExtOrTrunc(Merged, dl, VT);
+    return Merged;
+  }
+
   if (CmpVT.getVectorElementType() == MVT::i64)
-    // 64-bit comparisons are not legal. We've marked SETCC as non-Custom,
-    // but it's possible that our operands are 64-bit but our result is 32-bit.
-    // Bail in this case.
+    // 64-bit comparisons are not legal in general.
     return SDValue();
 
   if (Op1.getValueType().isFloatingPoint()) {
     switch (SetCCOpcode) {
     default: llvm_unreachable("Illegal FP comparison");
     case ISD::SETUNE:
-    case ISD::SETNE:  Invert = true; // Fallthrough
+    case ISD::SETNE:  Invert = true; LLVM_FALLTHROUGH;
     case ISD::SETOEQ:
     case ISD::SETEQ:  Opc = ARMISD::VCEQ; break;
     case ISD::SETOLT:
-    case ISD::SETLT: Swap = true; // Fallthrough
+    case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
     case ISD::SETOGT:
     case ISD::SETGT:  Opc = ARMISD::VCGT; break;
     case ISD::SETOLE:
-    case ISD::SETLE:  Swap = true; // Fallthrough
+    case ISD::SETLE:  Swap = true; LLVM_FALLTHROUGH;
     case ISD::SETOGE:
     case ISD::SETGE: Opc = ARMISD::VCGE; break;
-    case ISD::SETUGE: Swap = true; // Fallthrough
+    case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
     case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break;
-    case ISD::SETUGT: Swap = true; // Fallthrough
+    case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH;
     case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break;
-    case ISD::SETUEQ: Invert = true; // Fallthrough
+    case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH;
     case ISD::SETONE:
       // Expand this to (OLT | OGT).
       TmpOp0 = Op0;
@@ -4911,7 +5314,9 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) {
       Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0);
       Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1);
       break;
-    case ISD::SETUO: Invert = true; // Fallthrough
+    case ISD::SETUO:
+      Invert = true;
+      LLVM_FALLTHROUGH;
     case ISD::SETO:
       // Expand this to (OLT | OGE).
       TmpOp0 = Op0;
@@ -5168,11 +5573,28 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef,
 
 SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
                                            const ARMSubtarget *ST) const {
-  if (!ST->hasVFP3())
-    return SDValue();
-
   bool IsDouble = Op.getValueType() == MVT::f64;
   ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Op);
+  const APFloat &FPVal = CFP->getValueAPF();
+
+  // Prevent floating-point constants from using literal loads
+  // when execute-only is enabled.
+  if (ST->genExecuteOnly()) {
+    APInt INTVal = FPVal.bitcastToAPInt();
+    SDLoc DL(CFP);
+    if (IsDouble) {
+      SDValue Lo = DAG.getConstant(INTVal.trunc(32), DL, MVT::i32);
+      SDValue Hi = DAG.getConstant(INTVal.lshr(32).trunc(32), DL, MVT::i32);
+      if (!ST->isLittle())
+        std::swap(Lo, Hi);
+      return DAG.getNode(ARMISD::VMOVDRR, DL, MVT::f64, Lo, Hi);
+    } else {
+      return DAG.getConstant(INTVal, DL, MVT::i32);
+    }
+  }
+
+  if (!ST->hasVFP3())
+    return SDValue();
 
   // Use the default (constant pool) lowering for double constants when we have
   // an SP-only FPU
@@ -5180,7 +5602,6 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG,
     return SDValue();
 
   // Try splatting with a VMOV.f32...
-  const APFloat &FPVal = CFP->getValueAPF();
   int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal);
 
   if (ImmVal != -1) {
@@ -5325,7 +5746,7 @@ static bool isVREVMask(ArrayRef<int> M, EVT VT, unsigned BlockSize) {
   assert((BlockSize==16 || BlockSize==32 || BlockSize==64) &&
          "Only possible block sizes for VREV are: 16, 32, 64");
 
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5376,7 +5797,7 @@ static bool isVTBLMask(ArrayRef<int> M, EVT VT) {
 // want to check the low half and high half of the shuffle mask as if it were
 // the other case
 static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5411,7 +5832,7 @@ static bool isVTRNMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
 /// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
 static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5446,7 +5867,7 @@ static bool isVTRN_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
 // Requires similar checks to that of isVTRNMask with
 // respect the how results are returned.
 static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5476,7 +5897,7 @@ static bool isVUZPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
 /// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
 static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5517,7 +5938,7 @@ static bool isVUZP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
 // Requires similar checks to that of isVTRNMask with respect the how results
 // are returned.
 static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5550,7 +5971,7 @@ static bool isVZIPMask(ArrayRef<int> M, EVT VT, unsigned &WhichResult) {
 /// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
 /// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
 static bool isVZIP_v_undef_Mask(ArrayRef<int> M, EVT VT, unsigned &WhichResult){
-  unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSz = VT.getScalarSizeInBits();
   if (EltSz == 64)
     return false;
 
@@ -5650,6 +6071,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   unsigned SplatBitSize;
   bool HasAnyUndefs;
   if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) {
+    if (SplatUndef.isAllOnesValue())
+      return DAG.getUNDEF(VT);
+
     if (SplatBitSize <= 64) {
       // Check if an immediate VMOV works.
       EVT VmovVT;
@@ -5732,7 +6156,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode()))
     return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
 
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSize = VT.getScalarSizeInBits();
 
   // Use VDUP for non-constant splats.  For f32 constant splats, reduce to
   // i32 and try again.
@@ -5811,6 +6235,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
       return shuffle;
   }
 
+  if (VT.is128BitVector() && VT != MVT::v2f64 && VT != MVT::v4f32) {
+    // If we haven't found an efficient lowering, try splitting a 128-bit vector
+    // into two 64-bit vectors; we might discover a better way to lower it.
+    SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElts);
+    EVT ExtVT = VT.getVectorElementType();
+    EVT HVT = EVT::getVectorVT(*DAG.getContext(), ExtVT, NumElts / 2);
+    SDValue Lower =
+        DAG.getBuildVector(HVT, dl, makeArrayRef(&Ops[0], NumElts / 2));
+    if (Lower.getOpcode() == ISD::BUILD_VECTOR)
+      Lower = LowerBUILD_VECTOR(Lower, DAG, ST);
+    SDValue Upper = DAG.getBuildVector(
+        HVT, dl, makeArrayRef(&Ops[NumElts / 2], NumElts / 2));
+    if (Upper.getOpcode() == ISD::BUILD_VECTOR)
+      Upper = LowerBUILD_VECTOR(Upper, DAG, ST);
+    if (Lower && Upper)
+      return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lower, Upper);
+  }
+
   // Vectors with 32- or 64-bit elements can be built by directly assigning
   // the subregisters.  Lower it to an ARMISD::BUILD_VECTOR so the operands
   // will be legalized.
@@ -5896,7 +6338,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
 
     // Add this element source to the list if it's not already there.
     SDValue SourceVec = V.getOperand(0);
-    auto Source = std::find(Sources.begin(), Sources.end(), SourceVec);
+    auto Source = find(Sources, SourceVec);
     if (Source == Sources.end())
       Source = Sources.insert(Sources.end(), ShuffleSourceInfo(SourceVec));
 
@@ -5920,7 +6362,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
       SmallestEltTy = SrcEltTy;
   }
   unsigned ResMultiplier =
-      VT.getVectorElementType().getSizeInBits() / SmallestEltTy.getSizeInBits();
+      VT.getScalarSizeInBits() / SmallestEltTy.getSizeInBits();
   NumElts = VT.getSizeInBits() / SmallestEltTy.getSizeInBits();
   EVT ShuffleVT = EVT::getVectorVT(*DAG.getContext(), SmallestEltTy, NumElts);
 
@@ -6006,13 +6448,13 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
 
   // The stars all align, our next step is to produce the mask for the shuffle.
   SmallVector<int, 8> Mask(ShuffleVT.getVectorNumElements(), -1);
-  int BitsPerShuffleLane = ShuffleVT.getVectorElementType().getSizeInBits();
+  int BitsPerShuffleLane = ShuffleVT.getScalarSizeInBits();
   for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) {
     SDValue Entry = Op.getOperand(i);
     if (Entry.isUndef())
       continue;
 
-    auto Src = std::find(Sources.begin(), Sources.end(), Entry.getOperand(0));
+    auto Src = find(Sources, Entry.getOperand(0));
     int EltNo = cast<ConstantSDNode>(Entry.getOperand(1))->getSExtValue();
 
     // EXTRACT_VECTOR_ELT performs an implicit any_ext; BUILD_VECTOR an implicit
@@ -6020,7 +6462,7 @@ SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op,
     // segment.
     EVT OrigEltTy = Entry.getOperand(0).getValueType().getVectorElementType();
     int BitsDefined = std::min(OrigEltTy.getSizeInBits(),
-                               VT.getVectorElementType().getSizeInBits());
+                               VT.getScalarSizeInBits());
     int LanesDefined = BitsDefined / BitsPerShuffleLane;
 
     // This source is expected to fill ResMultiplier lanes of the final shuffle,
@@ -6080,7 +6522,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
   bool ReverseVEXT, isV_UNDEF;
   unsigned Imm, WhichResult;
 
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSize = VT.getScalarSizeInBits();
   return (EltSize >= 32 ||
           ShuffleVectorSDNode::isSplatMask(&M[0], VT) ||
           isVREVMask(M, VT, 64) ||
@@ -6223,7 +6665,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
   // of the same time so that they get CSEd properly.
   ArrayRef<int> ShuffleMask = SVN->getMask();
 
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+  unsigned EltSize = VT.getScalarSizeInBits();
   if (EltSize <= 32) {
     if (SVN->isSplat()) {
       int Lane = SVN->getSplatIndex();
@@ -6309,7 +6751,7 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
       EVT SubVT = SubV1.getValueType();
 
       // We expect these to have been canonicalized to -1.
-      assert(std::all_of(ShuffleMask.begin(), ShuffleMask.end(), [&](int i) {
+      assert(all_of(ShuffleMask, [&](int i) {
         return i < (int)VT.getVectorNumElements();
       }) && "Unexpected shuffle index into UNDEF operand!");
 
@@ -6397,8 +6839,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
     return SDValue();
 
   SDValue Vec = Op.getOperand(0);
-  if (Op.getValueType() == MVT::i32 &&
-      Vec.getValueType().getVectorElementType().getSizeInBits() < 32) {
+  if (Op.getValueType() == MVT::i32 && Vec.getScalarValueSizeInBits() < 32) {
     SDLoc dl(Op);
     return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane);
   }
@@ -6463,7 +6904,7 @@ static bool isExtendedBUILD_VECTOR(SDNode *N, SelectionDAG &DAG,
   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
     SDNode *Elt = N->getOperand(i).getNode();
     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Elt)) {
-      unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+      unsigned EltSize = VT.getScalarSizeInBits();
       unsigned HalfSize = EltSize / 2;
       if (isSigned) {
         if (!isIntN(HalfSize, C->getSExtValue()))
@@ -6590,7 +7031,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) {
   // Construct a new BUILD_VECTOR with elements truncated to half the size.
   assert(N->getOpcode() == ISD::BUILD_VECTOR && "expected BUILD_VECTOR");
   EVT VT = N->getValueType(0);
-  unsigned EltSize = VT.getVectorElementType().getSizeInBits() / 2;
+  unsigned EltSize = VT.getScalarSizeInBits() / 2;
   unsigned NumElts = VT.getVectorNumElements();
   MVT TruncVT = MVT::getIntegerVT(EltSize);
   SmallVector<SDValue, 8> Ops;
@@ -6915,7 +7356,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Pair of floats / doubles used to pass the result.
@@ -6929,7 +7370,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const {
     // Create stack object for sret.
     const uint64_t ByteSize = DL.getTypeAllocSize(RetTy);
     const unsigned StackAlign = DL.getPrefTypeAlignment(RetTy);
-    int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
+    int FrameIdx = MFI.CreateStackObject(ByteSize, StackAlign, false);
     SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy(DL));
 
     ArgListEntry Entry;
@@ -7029,6 +7470,19 @@ SDValue ARMTargetLowering::LowerDIV_Windows(SDValue Op, SelectionDAG &DAG,
   return LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
 }
 
+static SDValue WinDBZCheckDenominator(SelectionDAG &DAG, SDNode *N, SDValue InChain) {
+  SDLoc DL(N);
+  SDValue Op = N->getOperand(1);
+  if (N->getValueType(0) == MVT::i32)
+    return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain, Op);
+  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
+                           DAG.getConstant(0, DL, MVT::i32));
+  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Op,
+                           DAG.getConstant(1, DL, MVT::i32));
+  return DAG.getNode(ARMISD::WIN__DBZCHK, DL, MVT::Other, InChain,
+                     DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi));
+}
+
 void ARMTargetLowering::ExpandDIV_Windows(
     SDValue Op, SelectionDAG &DAG, bool Signed,
     SmallVectorImpl<SDValue> &Results) const {
@@ -7039,14 +7493,7 @@ void ARMTargetLowering::ExpandDIV_Windows(
          "unexpected type for custom lowering DIV");
   SDLoc dl(Op);
 
-  SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
-                           DAG.getConstant(0, dl, MVT::i32));
-  SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op.getOperand(1),
-                           DAG.getConstant(1, dl, MVT::i32));
-  SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i32, Lo, Hi);
-
-  SDValue DBZCHK =
-      DAG.getNode(ARMISD::WIN__DBZCHK, dl, MVT::Other, DAG.getEntryNode(), Or);
+  SDValue DBZCHK = WinDBZCheckDenominator(DAG, Op.getNode(), DAG.getEntryNode());
 
   SDValue Result = LowerWindowsDIVLibCall(Op, DAG, Signed, DBZCHK);
 
@@ -7132,11 +7579,66 @@ static void ReplaceCMP_SWAP_64Results(SDNode *N,
   Results.push_back(SDValue(CmpSwap, 2));
 }
 
+static SDValue LowerFPOWI(SDValue Op, const ARMSubtarget &Subtarget,
+                          SelectionDAG &DAG) {
+  const auto &TLI = DAG.getTargetLoweringInfo();
+
+  assert(Subtarget.getTargetTriple().isOSMSVCRT() &&
+         "Custom lowering is MSVCRT specific!");
+
+  SDLoc dl(Op);
+  SDValue Val = Op.getOperand(0);
+  MVT Ty = Val->getSimpleValueType(0);
+  SDValue Exponent = DAG.getNode(ISD::SINT_TO_FP, dl, Ty, Op.getOperand(1));
+  SDValue Callee = DAG.getExternalSymbol(Ty == MVT::f32 ? "powf" : "pow",
+                                         TLI.getPointerTy(DAG.getDataLayout()));
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  Entry.Node = Val;
+  Entry.Ty = Val.getValueType().getTypeForEVT(*DAG.getContext());
+  Entry.isZExt = true;
+  Args.push_back(Entry);
+
+  Entry.Node = Exponent;
+  Entry.Ty = Exponent.getValueType().getTypeForEVT(*DAG.getContext());
+  Entry.isZExt = true;
+  Args.push_back(Entry);
+
+  Type *LCRTy = Val.getValueType().getTypeForEVT(*DAG.getContext());
+
+  // In the in-chain to the call is the entry node  If we are emitting a
+  // tailcall, the chain will be mutated if the node has a non-entry input
+  // chain.
+  SDValue InChain = DAG.getEntryNode();
+  SDValue TCChain = InChain;
+
+  const auto *F = DAG.getMachineFunction().getFunction();
+  bool IsTC = TLI.isInTailCallPosition(DAG, Op.getNode(), TCChain) &&
+              F->getReturnType() == LCRTy;
+  if (IsTC)
+    InChain = TCChain;
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(InChain)
+      .setCallee(CallingConv::ARM_AAPCS_VFP, LCRTy, Callee, std::move(Args))
+      .setTailCall(IsTC);
+  std::pair<SDValue, SDValue> CI = TLI.LowerCallTo(CLI);
+
+  // Return the chain (the DAG root) if it is a tail call
+  return !CI.second.getNode() ? DAG.getRoot() : CI.first;
+}
+
 SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Don't know how to custom lower this!");
   case ISD::WRITE_REGISTER: return LowerWRITE_REGISTER(Op, DAG);
-  case ISD::ConstantPool:  return LowerConstantPool(Op, DAG);
+  case ISD::ConstantPool:
+    if (Subtarget->genExecuteOnly())
+      llvm_unreachable("execute-only should not generate constant pools");
+    return LowerConstantPool(Op, DAG);
   case ISD::BlockAddress:  return LowerBlockAddress(Op, DAG);
   case ISD::GlobalAddress:
     switch (Subtarget->getTargetTriple().getObjectFormat()) {
@@ -7218,6 +7720,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     llvm_unreachable("Don't know how to custom lower this!");
   case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG);
   case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
+  case ISD::FPOWI: return LowerFPOWI(Op, *Subtarget, DAG);
   case ARMISD::WIN__DBZCHK: return SDValue();
   }
 }
@@ -7278,6 +7781,8 @@ void ARMTargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI,
                                                MachineBasicBlock *MBB,
                                                MachineBasicBlock *DispatchBB,
                                                int FI) const {
+  assert(!Subtarget->isROPI() && !Subtarget->isRWPI() &&
+         "ROPI/RWPI not currently supported with SjLj");
   const TargetInstrInfo *TII = Subtarget->getInstrInfo();
   DebugLoc dl = MI.getDebugLoc();
   MachineFunction *MF = MBB->getParent();
@@ -7396,8 +7901,8 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
   DebugLoc dl = MI.getDebugLoc();
   MachineFunction *MF = MBB->getParent();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
-  MachineFrameInfo *MFI = MF->getFrameInfo();
-  int FI = MFI->getFunctionContextIndex();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
+  int FI = MFI.getFunctionContextIndex();
 
   const TargetRegisterClass *TRC = Subtarget->isThumb() ? &ARM::tGPRRegClass
                                                         : &ARM::GPRnopcRegClass;
@@ -7406,7 +7911,6 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
   // associated with.
   DenseMap<unsigned, SmallVector<MachineBasicBlock*, 2> > CallSiteNumToLPad;
   unsigned MaxCSNum = 0;
-  MachineModuleInfo &MMI = MF->getMMI();
   for (MachineFunction::iterator BB = MF->begin(), E = MF->end(); BB != E;
        ++BB) {
     if (!BB->isEHPad()) continue;
@@ -7418,9 +7922,9 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
       if (!II->isEHLabel()) continue;
 
       MCSymbol *Sym = II->getOperand(0).getMCSymbol();
-      if (!MMI.hasCallSiteLandingPad(Sym)) continue;
+      if (!MF->hasCallSiteLandingPad(Sym)) continue;
 
-      SmallVectorImpl<unsigned> &CallSiteIdxs = MMI.getCallSiteLandingPad(Sym);
+      SmallVectorImpl<unsigned> &CallSiteIdxs = MF->getCallSiteLandingPad(Sym);
       for (SmallVectorImpl<unsigned>::iterator
              CSI = CallSiteIdxs.begin(), CSE = CallSiteIdxs.end();
            CSI != CSE; ++CSI) {
@@ -7491,8 +7995,10 @@ void ARMTargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
   const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
 
   // Add a register mask with no preserved registers.  This results in all
-  // registers being marked as clobbered.
-  MIB.addRegMask(RI.getNoPreservedMask());
+  // registers being marked as clobbered. This can't work if the dispatch block
+  // is in a Thumb1 function and is linked with ARM code which uses the FP
+  // registers, as there is no way to preserve the FP registers in Thumb1 mode.
+  MIB.addRegMask(RI.getSjLjDispatchPreservedMask(*MF));
 
   bool IsPositionIndependent = isPositionIndependent();
   unsigned NumLPads = LPadList.size();
@@ -7911,6 +8417,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
 
   bool IsThumb1 = Subtarget->isThumb1Only();
   bool IsThumb2 = Subtarget->isThumb2();
+  bool IsThumb = Subtarget->isThumb();
 
   if (Align & 1) {
     UnitSize = 1;
@@ -7932,7 +8439,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
 
   // Select the correct opcode and register class for unit size load/store
   bool IsNeon = UnitSize >= 8;
-  TRC = (IsThumb1 || IsThumb2) ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
+  TRC = IsThumb ? &ARM::tGPRRegClass : &ARM::GPRRegClass;
   if (IsNeon)
     VecTRC = UnitSize == 16 ? &ARM::DPairRegClass
                             : UnitSize == 8 ? &ARM::DPRRegClass
@@ -8014,12 +8521,12 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
     if ((LoopSize & 0xFFFF0000) != 0)
       Vtmp = MRI.createVirtualRegister(TRC);
     AddDefaultPred(BuildMI(BB, dl,
-                           TII->get(IsThumb2 ? ARM::t2MOVi16 : ARM::MOVi16),
+                           TII->get(IsThumb ? ARM::t2MOVi16 : ARM::MOVi16),
                            Vtmp).addImm(LoopSize & 0xFFFF));
 
     if ((LoopSize & 0xFFFF0000) != 0)
       AddDefaultPred(BuildMI(BB, dl,
-                             TII->get(IsThumb2 ? ARM::t2MOVTi16 : ARM::MOVTi16),
+                             TII->get(IsThumb ? ARM::t2MOVTi16 : ARM::MOVTi16),
                              varEnd)
                          .addReg(Vtmp)
                          .addImm(LoopSize >> 16));
@@ -8034,7 +8541,7 @@ ARMTargetLowering::EmitStructByval(MachineInstr &MI,
       Align = MF->getDataLayout().getTypeAllocSize(C->getType());
     unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align);
 
-    if (IsThumb1)
+    if (IsThumb)
       AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg(
           varEnd, RegState::Define).addConstantPoolIndex(Idx));
     else
@@ -8201,17 +8708,20 @@ ARMTargetLowering::EmitLowered__dbzchk(MachineInstr &MI,
   ContBB->splice(ContBB->begin(), MBB,
                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
   ContBB->transferSuccessorsAndUpdatePHIs(MBB);
+  MBB->addSuccessor(ContBB);
 
   MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock();
+  BuildMI(TrapBB, DL, TII->get(ARM::t__brkdiv0));
   MF->push_back(TrapBB);
-  BuildMI(TrapBB, DL, TII->get(ARM::t2UDF)).addImm(249);
   MBB->addSuccessor(TrapBB);
 
-  BuildMI(*MBB, MI, DL, TII->get(ARM::tCBZ))
-      .addReg(MI.getOperand(0).getReg())
-      .addMBB(TrapBB);
-  AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::t2B)).addMBB(ContBB));
-  MBB->addSuccessor(ContBB);
+  AddDefaultPred(BuildMI(*MBB, MI, DL, TII->get(ARM::tCMPi8))
+                     .addReg(MI.getOperand(0).getReg())
+                     .addImm(0));
+  BuildMI(*MBB, MI, DL, TII->get(ARM::t2Bcc))
+      .addMBB(TrapBB)
+      .addImm(ARMCC::EQ)
+      .addReg(ARM::CPSR);
 
   MI.eraseFromParent();
   return ContBB;
@@ -8635,7 +9145,7 @@ static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes,
     // (zext cc) can never be the all ones value.
     if (AllOnes)
       return false;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case ISD::SIGN_EXTEND: {
     SDLoc dl(N);
     EVT VT = N->getValueType(0);
@@ -8962,7 +9472,8 @@ static SDValue AddCombineTo64bitUMAAL(SDNode *AddcNode,
   // be combined into a UMLAL. The other pattern is AddcNode being combined
   // into an UMLAL and then using another addc is handled in ISelDAGToDAG.
 
-  if (!Subtarget->hasV6Ops())
+  if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP() ||
+      (Subtarget->isThumb() && !Subtarget->hasThumb2()))
     return AddCombineTo64bitMLAL(AddcNode, DCI, Subtarget);
 
   SDNode *PrevAddc = nullptr;
@@ -9964,6 +10475,7 @@ static SDValue CombineBaseUpdate(SDNode *N,
       isLaneOp = true;
       switch (N->getOpcode()) {
       default: llvm_unreachable("unexpected opcode for Neon base update");
+      case ARMISD::VLD1DUP: NewOpc = ARMISD::VLD1DUP_UPD; NumVecs = 1; break;
       case ARMISD::VLD2DUP: NewOpc = ARMISD::VLD2DUP_UPD; NumVecs = 2; break;
       case ARMISD::VLD3DUP: NewOpc = ARMISD::VLD3DUP_UPD; NumVecs = 3; break;
       case ARMISD::VLD4DUP: NewOpc = ARMISD::VLD4DUP_UPD; NumVecs = 4; break;
@@ -10078,8 +10590,8 @@ static SDValue CombineBaseUpdate(SDNode *N,
       StVal = DAG.getNode(ISD::BITCAST, dl, AlignedVecTy, StVal);
     }
 
-    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys,
-                                           Ops, AlignedVecTy,
+    EVT LoadVT = isLaneOp ? VecTy.getVectorElementType() : AlignedVecTy;
+    SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, dl, SDTys, Ops, LoadVT,
                                            MemN->getMemOperand());
 
     // Update the uses.
@@ -10211,19 +10723,44 @@ static SDValue PerformVDUPLANECombine(SDNode *N,
     return SDValue();
 
   // Make sure the VMOV element size is not bigger than the VDUPLANE elements.
-  unsigned EltSize = Op.getValueType().getVectorElementType().getSizeInBits();
+  unsigned EltSize = Op.getScalarValueSizeInBits();
   // The canonical VMOV for a zero vector uses a 32-bit element size.
   unsigned Imm = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   unsigned EltBits;
   if (ARM_AM::decodeNEONModImm(Imm, EltBits) == 0)
     EltSize = 8;
   EVT VT = N->getValueType(0);
-  if (EltSize > VT.getVectorElementType().getSizeInBits())
+  if (EltSize > VT.getScalarSizeInBits())
     return SDValue();
 
   return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
 }
 
+/// PerformVDUPCombine - Target-specific dag combine xforms for ARMISD::VDUP.
+static SDValue PerformVDUPCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Op = N->getOperand(0);
+
+  // Match VDUP(LOAD) -> VLD1DUP.
+  // We match this pattern here rather than waiting for isel because the
+  // transform is only legal for unindexed loads.
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode());
+  if (LD && Op.hasOneUse() && LD->isUnindexed() &&
+      LD->getMemoryVT() == N->getValueType(0).getVectorElementType()) {
+    SDValue Ops[] = { LD->getOperand(0), LD->getOperand(1),
+                      DAG.getConstant(LD->getAlignment(), SDLoc(N), MVT::i32) };
+    SDVTList SDTys = DAG.getVTList(N->getValueType(0), MVT::Other);
+    SDValue VLDDup = DAG.getMemIntrinsicNode(ARMISD::VLD1DUP, SDLoc(N), SDTys,
+                                             Ops, LD->getMemoryVT(),
+                                             LD->getMemOperand());
+    DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), VLDDup.getValue(1));
+    return VLDDup;
+  }
+
+  return SDValue();
+}
+
 static SDValue PerformLOADCombine(SDNode *N,
                                   TargetLowering::DAGCombinerInfo &DCI) {
   EVT VT = N->getValueType(0);
@@ -10255,8 +10792,8 @@ static SDValue PerformSTORECombine(SDNode *N,
     EVT StVT = St->getMemoryVT();
     unsigned NumElems = VT.getVectorNumElements();
     assert(StVT != VT && "Cannot truncate to the same type");
-    unsigned FromEltSz = VT.getVectorElementType().getSizeInBits();
-    unsigned ToEltSz = StVT.getVectorElementType().getSizeInBits();
+    unsigned FromEltSz = VT.getScalarSizeInBits();
+    unsigned ToEltSz = StVT.getScalarSizeInBits();
 
     // From, To sizes and ElemCount must be pow of two
     if (!isPowerOf2_32(NumElems * FromEltSz * ToEltSz)) return SDValue();
@@ -10524,7 +11061,7 @@ static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) {
 ///   0 <= Value <= ElementBits for a long left shift.
 static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getScalarSizeInBits();
   if (! getVShiftImm(Op, ElementBits, Cnt))
     return false;
   return (Cnt >= 0 && (isLong ? Cnt-1 : Cnt) < ElementBits);
@@ -10539,7 +11076,7 @@ static bool isVShiftLImm(SDValue Op, EVT VT, bool isLong, int64_t &Cnt) {
 static bool isVShiftRImm(SDValue Op, EVT VT, bool isNarrow, bool isIntrinsic,
                          int64_t &Cnt) {
   assert(VT.isVector() && "vector shift count is not a vector type");
-  int64_t ElementBits = VT.getVectorElementType().getSizeInBits();
+  int64_t ElementBits = VT.getScalarSizeInBits();
   if (! getVShiftImm(Op, ElementBits, Cnt))
     return false;
   if (!isIntrinsic)
@@ -11051,6 +11588,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI);
   case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG);
   case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
+  case ARMISD::VDUP: return PerformVDUPCombine(N, DCI);
   case ISD::FP_TO_SINT:
   case ISD::FP_TO_UINT:
     return PerformVCVTCombine(N, DCI.DAG, Subtarget);
@@ -11066,6 +11604,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
   case ARMISD::CMOV: return PerformCMOVCombine(N, DCI.DAG);
   case ARMISD::BRCOND: return PerformBRCONDCombine(N, DCI.DAG);
   case ISD::LOAD:       return PerformLOADCombine(N, DCI);
+  case ARMISD::VLD1DUP:
   case ARMISD::VLD2DUP:
   case ARMISD::VLD3DUP:
   case ARMISD::VLD4DUP:
@@ -11234,6 +11773,17 @@ bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
   return true;
 }
 
+int ARMTargetLowering::getScalingFactorCost(const DataLayout &DL,
+                                                const AddrMode &AM, Type *Ty,
+                                                unsigned AS) const {
+  if (isLegalAddressingMode(DL, AM, Ty, AS)) {
+    if (Subtarget->hasFPAO())
+      return AM.Scale < 0 ? 1 : 0; // positive offsets execute faster
+    return 0;
+  }
+  return -1;
+}
+
 
 static bool isLegalT1AddressImmediate(int64_t V, EVT VT) {
   if (V < 0)
@@ -11384,7 +11934,7 @@ bool ARMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   case 1:
     if (Subtarget->isThumb1Only())
       return false;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   default:
     // ARM doesn't support any R+R*scale+imm addr modes.
     if (AM.BaseOffs)
@@ -11682,7 +12232,7 @@ void ARMTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     case Intrinsic::arm_ldaex:
     case Intrinsic::arm_ldrex: {
       EVT VT = cast<MemIntrinsicSDNode>(Op)->getMemoryVT();
-      unsigned MemBits = VT.getScalarType().getSizeInBits();
+      unsigned MemBits = VT.getScalarSizeInBits();
       KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits);
       return;
     }
@@ -12043,7 +12593,7 @@ static RTLIB::Libcall getDivRemLibcall(
 }
 
 static TargetLowering::ArgListTy getDivRemArgList(
-    const SDNode *N, LLVMContext *Context) {
+    const SDNode *N, LLVMContext *Context, const ARMSubtarget *Subtarget) {
   assert((N->getOpcode() == ISD::SDIVREM || N->getOpcode() == ISD::UDIVREM ||
           N->getOpcode() == ISD::SREM    || N->getOpcode() == ISD::UREM) &&
          "Unhandled Opcode in getDivRemArgList");
@@ -12060,12 +12610,15 @@ static TargetLowering::ArgListTy getDivRemArgList(
     Entry.isZExt = !isSigned;
     Args.push_back(Entry);
   }
+  if (Subtarget->isTargetWindows() && Args.size() >= 2)
+    std::swap(Args[0], Args[1]);
   return Args;
 }
 
 SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   assert((Subtarget->isTargetAEABI() || Subtarget->isTargetAndroid() ||
-          Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI()) &&
+          Subtarget->isTargetGNUAEABI() || Subtarget->isTargetMuslAEABI() ||
+          Subtarget->isTargetWindows()) &&
          "Register-based DivRem lowering only");
   unsigned Opcode = Op->getOpcode();
   assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
@@ -12073,20 +12626,42 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
   bool isSigned = (Opcode == ISD::SDIVREM);
   EVT VT = Op->getValueType(0);
   Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+  SDLoc dl(Op);
+
+  // If the target has hardware divide, use divide + multiply + subtract:
+  //     div = a / b
+  //     rem = a - b * div
+  //     return {div, rem}
+  // This should be lowered into UDIV/SDIV + MLS later on.
+  if (Subtarget->hasDivide() && Op->getValueType(0).isSimple() &&
+      Op->getSimpleValueType(0) == MVT::i32) {
+    unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
+    const SDValue Dividend = Op->getOperand(0);
+    const SDValue Divisor = Op->getOperand(1);
+    SDValue Div = DAG.getNode(DivOpcode, dl, VT, Dividend, Divisor);
+    SDValue Mul = DAG.getNode(ISD::MUL, dl, VT, Div, Divisor);
+    SDValue Rem = DAG.getNode(ISD::SUB, dl, VT, Dividend, Mul);
+
+    SDValue Values[2] = {Div, Rem};
+    return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VT, VT), Values);
+  }
 
   RTLIB::Libcall LC = getDivRemLibcall(Op.getNode(),
                                        VT.getSimpleVT().SimpleTy);
   SDValue InChain = DAG.getEntryNode();
 
   TargetLowering::ArgListTy Args = getDivRemArgList(Op.getNode(),
-                                                    DAG.getContext());
+                                                    DAG.getContext(),
+                                                    Subtarget);
 
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
                                          getPointerTy(DAG.getDataLayout()));
 
   Type *RetTy = (Type*)StructType::get(Ty, Ty, nullptr);
 
-  SDLoc dl(Op);
+  if (Subtarget->isTargetWindows())
+    InChain = WinDBZCheckDenominator(DAG, Op.getNode(), InChain);
+
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl).setChain(InChain)
     .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
@@ -12119,11 +12694,15 @@ SDValue ARMTargetLowering::LowerREM(SDNode *N, SelectionDAG &DAG) const {
   RTLIB::Libcall LC = getDivRemLibcall(N, N->getValueType(0).getSimpleVT().
                                                              SimpleTy);
   SDValue InChain = DAG.getEntryNode();
-  TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext());
+  TargetLowering::ArgListTy Args = getDivRemArgList(N, DAG.getContext(),
+                                                    Subtarget);
   bool isSigned = N->getOpcode() == ISD::SREM;
   SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
                                          getPointerTy(DAG.getDataLayout()));
 
+  if (Subtarget->isTargetWindows())
+    InChain = WinDBZCheckDenominator(DAG, N, InChain);
+
   // Lower call
   CallLoweringInfo CLI(DAG);
   CLI.setChain(InChain)
@@ -12342,6 +12921,14 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
   return true;
 }
 
+bool ARMTargetLowering::isExtractSubvectorCheap(EVT ResVT,
+                                                unsigned Index) const {
+  if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
+    return false;
+
+  return (Index == 0 || Index == ResVT.getVectorNumElements());
+}
+
 Instruction* ARMTargetLowering::makeDMB(IRBuilder<> &Builder,
                                         ARM_MB::MemBOpt Domain) const {
   Module *M = Builder.GetInsertBlock()->getParent()->getParent();
@@ -12443,7 +13030,8 @@ ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
 TargetLowering::AtomicExpansionKind
 ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
-  return (Size <= (Subtarget->isMClass() ? 32U : 64U))
+  bool hasAtomicRMW = !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+  return (Size <= (Subtarget->isMClass() ? 32U : 64U) && hasAtomicRMW)
              ? AtomicExpansionKind::LLSC
              : AtomicExpansionKind::None;
 }
@@ -12455,7 +13043,9 @@ bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
   // on the stack and close enough to the spill slot, this can lead to a
   // situation where the monitor always gets cleared and the atomic operation
   // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  return getTargetMachine().getOptLevel() != 0;
+  bool hasAtomicCmpXchg =
+      !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
+  return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
 }
 
 bool ARMTargetLowering::shouldInsertFencesForAtomic(
@@ -12681,6 +13271,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
 ///
 /// Note that the new shufflevectors will be removed and we'll only generate one
 /// vst3 instruction in CodeGen.
+///
+/// Example for a more general valid mask (Factor 3). Lower:
+///        %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
+///                 <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+///        store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+///      Into:
+///        %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
+///        %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
+///        %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
+///        call void llvm.arm.neon.vst3(%ptr, %sub.v0, %sub.v1, %sub.v2, 4)
 bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
                                               ShuffleVectorInst *SVI,
                                               unsigned Factor) const {
@@ -12691,9 +13292,9 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
   assert(VecTy->getVectorNumElements() % Factor == 0 &&
          "Invalid interleaved store");
 
-  unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+  unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
   Type *EltTy = VecTy->getVectorElementType();
-  VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+  VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
 
   const DataLayout &DL = SI->getModule()->getDataLayout();
   unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
@@ -12720,7 +13321,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
     Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
     Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
 
-    SubVecTy = VectorType::get(IntTy, NumSubElts);
+    SubVecTy = VectorType::get(IntTy, LaneLen);
   }
 
   static const Intrinsic::ID StoreInts[3] = {Intrinsic::arm_neon_vst2,
@@ -12736,9 +13337,28 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
       SI->getModule(), StoreInts[Factor - 2], Tys);
 
   // Split the shufflevector operands into sub vectors for the new vstN call.
-  for (unsigned i = 0; i < Factor; i++)
-    Ops.push_back(Builder.CreateShuffleVector(
-        Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+  auto Mask = SVI->getShuffleMask();
+  for (unsigned i = 0; i < Factor; i++) {
+    if (Mask[i] >= 0) {
+      Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
+    } else {
+      unsigned StartMask = 0;
+      for (unsigned j = 1; j < LaneLen; j++) {
+        if (Mask[j*Factor + i] >= 0) {
+          StartMask = Mask[j*Factor + i] - j;
+          break;
+        }
+      }
+      // Note: If all elements in a chunk are undefs, StartMask=0!
+      // Note: Filling undef gaps with random elements is ok, since
+      // those elements were being written anyway (with undefs).
+      // In the case of all undefs we're defaulting to using elems from 0
+      // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
+      Ops.push_back(Builder.CreateShuffleVector(
+        Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
+    }
+  }
 
   Ops.push_back(Builder.getInt32(SI->getAlignment()));
   Builder.CreateCall(VstNFunc, Ops);
diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
index 4906686616bc..5255d82d647a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -190,7 +190,8 @@ namespace llvm {
       MEMCPY,
 
       // Vector load N-element structure to all lanes:
-      VLD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      VLD1DUP = ISD::FIRST_TARGET_MEMORY_OPCODE,
+      VLD2DUP,
       VLD3DUP,
       VLD4DUP,
 
@@ -202,6 +203,7 @@ namespace llvm {
       VLD2LN_UPD,
       VLD3LN_UPD,
       VLD4LN_UPD,
+      VLD1DUP_UPD,
       VLD2DUP_UPD,
       VLD3DUP_UPD,
       VLD4DUP_UPD,
@@ -291,6 +293,14 @@ namespace llvm {
     /// by AM is legal for this target, for a load/store of the specified type.
     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
                                Type *Ty, unsigned AS) const override;
+
+    /// getScalingFactorCost - Return the cost of the scaling used in
+    /// addressing mode represented by AM.
+    /// If the AM is supported, the return value must be >= 0.
+    /// If the AM is not supported, the return value must be negative.
+    int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
+                             unsigned AS) const override;
+
     bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
 
     /// isLegalICmpImmediate - Return true if the specified immediate is legal
@@ -421,6 +431,10 @@ namespace llvm {
     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
                                            Type *Ty) const override;
 
+    /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
+    /// with this index.
+    bool isExtractSubvectorCheap(EVT ResVT, unsigned Index) const override;
+
     /// \brief Returns true if an argument of type Ty needs to be passed in a
     /// contiguous block of registers in calling convention CallConv.
     bool functionArgumentNeedsConsecutiveRegisters(
@@ -482,6 +496,9 @@ namespace llvm {
       return HasStandaloneRem;
     }
 
+    CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const;
+    CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -506,6 +523,8 @@ namespace llvm {
 
     bool HasStandaloneRem = true;
 
+    void InitLibcallCallingConvs();
+
     void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT);
     void addDRTypeForNEON(MVT VT);
     void addQRTypeForNEON(MVT VT);
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
index 37a83f70a1fb..488439fc24e0 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td
@@ -398,6 +398,14 @@ class tPseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
   list<Predicate> Predicates = [IsThumb];
 }
 
+// PseudoInst that's in ARMv8-M baseline (Somewhere between Thumb and Thumb2)
+class t2basePseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
+                    list<dag> pattern>
+  : PseudoInst<oops, iops, itin, pattern> {
+  let Size = sz;
+  list<Predicate> Predicates = [IsThumb,HasV8MBaseline];
+}
+
 // PseudoInst that's Thumb2-mode only.
 class t2PseudoInst<dag oops, dag iops, int sz, InstrItinClass itin,
                     list<dag> pattern>
@@ -999,6 +1007,15 @@ class VFPPat<dag pattern, dag result> : Pat<pattern, result> {
 class VFPNoNEONPat<dag pattern, dag result> : Pat<pattern, result> {
   list<Predicate> Predicates = [HasVFP2, DontUseNEONForFP];
 }
+class Thumb2DSPPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb2, HasDSP];
+}
+class Thumb2DSPMulPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb2, UseMulOps, HasDSP];
+}
+class Thumb2ExtractPat<dag pattern, dag result> : Pat<pattern, result> {
+  list<Predicate> Predicates = [IsThumb2, HasT2ExtractPack];
+}
 //===----------------------------------------------------------------------===//
 // Thumb Instruction Format Definitions.
 //
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
index 98b1b4ca4272..27b64322dfa9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp
@@ -123,7 +123,9 @@ void ARMInstrInfo::expandLoadStackGuard(MachineBasicBlock::iterator MI) const {
 
   MIB = BuildMI(MBB, MI, DL, get(ARM::MOV_ga_pcrel_ldr), Reg)
             .addGlobalAddress(GV, 0, ARMII::MO_NONLAZY);
-  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+  auto Flags = MachineMemOperand::MOLoad |
+               MachineMemOperand::MODereferenceable |
+               MachineMemOperand::MOInvariant;
   MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
       MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, 4);
   MIB.addMemOperand(MMO);
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
index c9735f3ec277..c47393990e97 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -330,6 +330,8 @@ def DontUseVMOVSR : Predicate<"!Subtarget->preferVMOVSR() &&"
 def IsLE             : Predicate<"MF->getDataLayout().isLittleEndian()">;
 def IsBE             : Predicate<"MF->getDataLayout().isBigEndian()">;
 
+def GenExecuteOnly : Predicate<"Subtarget->genExecuteOnly()">;
+
 //===----------------------------------------------------------------------===//
 // ARM Flag Definitions.
 
@@ -358,7 +360,23 @@ def imm16_31 : ImmLeaf<i32, [{
 
 // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits.
 def sext_16_node : PatLeaf<(i32 GPR:$a), [{
-  return CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17;
+  if (CurDAG->ComputeNumSignBits(SDValue(N,0)) >= 17)
+    return true;
+
+  if (N->getOpcode() != ISD::SRA)
+    return false;
+  if (N->getOperand(0).getOpcode() != ISD::SHL)
+    return false;
+
+  auto *ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(1));
+  if (!ShiftVal || ShiftVal->getZExtValue() != 16)
+    return false;
+
+  ShiftVal = dyn_cast<ConstantSDNode>(N->getOperand(0)->getOperand(1));
+  if (!ShiftVal || ShiftVal->getZExtValue() != 16)
+    return false;
+
+  return true;
 }]>;
 
 /// Split a 32-bit immediate into two 16 bit parts.
@@ -3400,6 +3418,12 @@ def SXTAB : AI_exta_rrot<0b01101010,
 def SXTAH : AI_exta_rrot<0b01101011,
                "sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
 
+def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
+               (SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : ARMV6Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot),
+                                          i16)),
+               (SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+
 def SXTB16  : AI_ext_rrot_np<0b01101000, "sxtb16">;
 
 def SXTAB16 : AI_exta_rrot_np<0b01101000, "sxtab16">;
@@ -3427,6 +3451,11 @@ def UXTAB : AI_exta_rrot<0b01101110, "uxtab",
                         BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
 def UXTAH : AI_exta_rrot<0b01101111, "uxtah",
                         BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+
+def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
+               (UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : ARMV6Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
+               (UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
 }
 
 // This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
@@ -3471,6 +3500,7 @@ def UBFX  : I<(outs GPRnopc:$Rd),
 //  Arithmetic Instructions.
 //
 
+let isAdd = 1 in
 defm ADD  : AsI1_bin_irs<0b0100, "add",
                          IIC_iALUi, IIC_iALUr, IIC_iALUsr, add, 1>;
 defm SUB  : AsI1_bin_irs<0b0010, "sub",
@@ -3486,9 +3516,11 @@ defm SUB  : AsI1_bin_irs<0b0010, "sub",
 // FIXME: Eliminate ADDS/SUBS pseudo opcodes after adding tablegen
 // support for an optional CPSR definition that corresponds to the DAG
 // node's second value. We can then eliminate the implicit def of CPSR.
+let isAdd = 1 in
 defm ADDS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMaddc, 1>;
 defm SUBS : AsI1_bin_s_irs<IIC_iALUi, IIC_iALUr, IIC_iALUsr, ARMsubc>;
 
+let isAdd = 1 in
 defm ADC : AI1_adde_sube_irs<0b0101, "adc", ARMadde, 1>;
 defm SBC : AI1_adde_sube_irs<0b0110, "sbc", ARMsube>;
 
@@ -5492,45 +5524,22 @@ def : ARMPat<(extloadi8  addrmodepc:$addr), (PICLDRB addrmodepc:$addr)>;
 def : ARMPat<(extloadi16 addrmodepc:$addr), (PICLDRH addrmodepc:$addr)>;
 
 // smul* and smla*
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                 (SMULBB GPR:$a, GPR:$b)>;
 def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b),
                  (SMULBB GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                      (sra GPR:$b, (i32 16))),
-                 (SMULBT GPR:$a, GPR:$b)>;
 def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))),
                  (SMULBT GPR:$a, GPR:$b)>;
-def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)),
-                      (sra (shl GPR:$b, (i32 16)), (i32 16))),
-                 (SMULTB GPR:$a, GPR:$b)>;
 def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b),
                 (SMULTB GPR:$a, GPR:$b)>;
-
-def : ARMV5MOPat<(add GPR:$acc,
-                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
-                 (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
 def : ARMV5MOPat<(add GPR:$acc,
                       (mul sext_16_node:$a, sext_16_node:$b)),
                  (SMLABB GPR:$a, GPR:$b, GPR:$acc)>;
 def : ARMV5MOPat<(add GPR:$acc,
-                      (mul (sra (shl GPR:$a, (i32 16)), (i32 16)),
-                           (sra GPR:$b, (i32 16)))),
-                 (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
                       (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))),
                  (SMLABT GPR:$a, GPR:$b, GPR:$acc)>;
 def : ARMV5MOPat<(add GPR:$acc,
-                      (mul (sra GPR:$a, (i32 16)),
-                           (sra (shl GPR:$b, (i32 16)), (i32 16)))),
-                 (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
-def : ARMV5MOPat<(add GPR:$acc,
                       (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)),
                  (SMLATB GPR:$a, GPR:$b, GPR:$acc)>;
 
-
 // Pre-v7 uses MCR for synchronization barriers.
 def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>,
          Requires<[IsARM, HasV6]>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
index defef4ea9073..b5fa8e999e2a 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -610,14 +610,14 @@ def NEONimmAllOnesV: PatLeaf<(NEONvmovImm (i32 timm)), [{
 def VLDMQIA
   : PseudoVFPLdStM<(outs DPair:$dst), (ins GPR:$Rn),
                     IIC_fpLoad_m, "",
-                   [(set DPair:$dst, (v2f64 (load GPR:$Rn)))]>;
+                   [(set DPair:$dst, (v2f64 (word_alignedload GPR:$Rn)))]>;
 
 // Use VSTM to store a Q register as a D register pair.
 // This is a pseudo instruction that is expanded to VSTMD after reg alloc.
 def VSTMQIA
   : PseudoVFPLdStM<(outs), (ins DPair:$src, GPR:$Rn),
                     IIC_fpStore_m, "",
-                   [(store (v2f64 DPair:$src), GPR:$Rn)]>;
+                   [(word_alignedstore (v2f64 DPair:$src), GPR:$Rn)]>;
 
 // Classes for VLD* pseudo-instructions with multi-register operands.
 // These are expanded to real instructions after register allocation.
@@ -6849,6 +6849,16 @@ let Predicates = [IsBE] in {
   def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (VREV64q32 QPR:$src)>;
 }
 
+// Use VLD1/VST1 + VREV for non-word-aligned v2f64 load/store on Big Endian
+def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
+          (VREV64q8 (VLD1q8 addrmode6:$addr))>, Requires<[IsBE]>;
+def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+          (VST1q8 addrmode6:$addr, (VREV64q8 QPR:$value))>, Requires<[IsBE]>;
+def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
+          (VREV64q16 (VLD1q16 addrmode6:$addr))>, Requires<[IsBE]>;
+def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
+          (VST1q16 addrmode6:$addr, (VREV64q16 QPR:$value))>, Requires<[IsBE]>;
+
 // Fold extracting an element out of a v2i32 into a vfp register.
 def : Pat<(f32 (bitconvert (i32 (extractelt (v2i32 DPR:$src), imm:$lane)))),
           (f32 (EXTRACT_SUBREG DPR:$src, (SSubReg_f32_reg imm:$lane)))>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
index 93a174f3678a..a681f64b05e6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td
@@ -904,49 +904,51 @@ class T1sItGenEncodeImm<bits<5> opA, dag oops, dag iops, InstrItinClass itin,
   let Inst{7-0}  = imm8;
 }
 
-// Add with carry register
-let isCommutable = 1, Uses = [CPSR] in
-def tADC :                      // A8.6.2
-  T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
-                "adc", "\t$Rdn, $Rm",
-                [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
-
-// Add immediate
-def tADDi3 :                    // A8.6.4 T1
-  T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
-                   IIC_iALUi,
-                   "add", "\t$Rd, $Rm, $imm3",
-                   [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
-                   Sched<[WriteALU]> {
-  bits<3> imm3;
-  let Inst{8-6} = imm3;
-}
-
-def tADDi8 :                    // A8.6.4 T2
-  T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
-                    (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
-                    "add", "\t$Rdn, $imm8",
-                    [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
-                    Sched<[WriteALU]>;
+let isAdd = 1 in {
+  // Add with carry register
+  let isCommutable = 1, Uses = [CPSR] in
+  def tADC :                      // A8.6.2
+    T1sItDPEncode<0b0101, (outs tGPR:$Rdn), (ins tGPR:$Rn, tGPR:$Rm), IIC_iALUr,
+                  "adc", "\t$Rdn, $Rm",
+                  [(set tGPR:$Rdn, (adde tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+  // Add immediate
+  def tADDi3 :                    // A8.6.4 T1
+    T1sIGenEncodeImm<0b01110, (outs tGPR:$Rd), (ins tGPR:$Rm, imm0_7:$imm3),
+                     IIC_iALUi,
+                     "add", "\t$Rd, $Rm, $imm3",
+                     [(set tGPR:$Rd, (add tGPR:$Rm, imm0_7:$imm3))]>,
+                     Sched<[WriteALU]> {
+    bits<3> imm3;
+    let Inst{8-6} = imm3;
+  }
 
-// Add register
-let isCommutable = 1 in
-def tADDrr :                    // A8.6.6 T1
-  T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
-                IIC_iALUr,
-                "add", "\t$Rd, $Rn, $Rm",
-                [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
-
-let hasSideEffects = 0 in
-def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
-                     "add", "\t$Rdn, $Rm", []>,
-               T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
-  // A8.6.6 T2
-  bits<4> Rdn;
-  bits<4> Rm;
-  let Inst{7}   = Rdn{3};
-  let Inst{6-3} = Rm;
-  let Inst{2-0} = Rdn{2-0};
+  def tADDi8 :                    // A8.6.4 T2
+    T1sItGenEncodeImm<{1,1,0,?,?}, (outs tGPR:$Rdn),
+                      (ins tGPR:$Rn, imm0_255:$imm8), IIC_iALUi,
+                      "add", "\t$Rdn, $imm8",
+                      [(set tGPR:$Rdn, (add tGPR:$Rn, imm8_255:$imm8))]>,
+                      Sched<[WriteALU]>;
+
+  // Add register
+  let isCommutable = 1 in
+  def tADDrr :                    // A8.6.6 T1
+    T1sIGenEncode<0b01100, (outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm),
+                  IIC_iALUr,
+                  "add", "\t$Rd, $Rn, $Rm",
+                  [(set tGPR:$Rd, (add tGPR:$Rn, tGPR:$Rm))]>, Sched<[WriteALU]>;
+
+  let hasSideEffects = 0 in
+  def tADDhirr : T1pIt<(outs GPR:$Rdn), (ins GPR:$Rn, GPR:$Rm), IIC_iALUr,
+                       "add", "\t$Rdn, $Rm", []>,
+                 T1Special<{0,0,?,?}>, Sched<[WriteALU]> {
+    // A8.6.6 T2
+    bits<4> Rdn;
+    bits<4> Rm;
+    let Inst{7}   = Rdn{3};
+    let Inst{6-3} = Rm;
+    let Inst{2-0} = Rdn{2-0};
+  }
 }
 
 // AND register
@@ -1259,6 +1261,13 @@ def tUDF : TI<(outs), (ins imm0_255:$imm8), IIC_Br, "udf\t$imm8",
   let Inst{7-0} = imm8;
 }
 
+def t__brkdiv0 : TI<(outs), (ins), IIC_Br, "__brkdiv0",
+                    [(int_arm_undefined 249)]>, Encoding16,
+    Requires<[IsThumb, IsWindows]> {
+  let Inst = 0xdef9;
+  let isTerminator = 1;
+}
+
 // Zero-extend byte
 def tUXTB :                     // A8.6.262
   T1pIMiscEncode<{0,0,1,0,1,1,?}, (outs tGPR:$Rd), (ins tGPR:$Rm),
@@ -1306,6 +1315,18 @@ def tLEApcrelJT : tPseudoInst<(outs tGPR:$Rd),
                               (ins i32imm:$label, pred:$p),
                               2, IIC_iALUi, []>, Sched<[WriteALU]>;
 
+// Thumb-1 doesn't have the TBB or TBH instructions, but we can synthesize them
+// and make use of the same compressed jump table format as Thumb-2.
+let Size = 2 in {
+def tTBB_JT : tPseudoInst<(outs),
+        (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+        Sched<[WriteBr]>;
+
+def tTBH_JT : tPseudoInst<(outs),
+        (ins tGPR:$base, tGPR:$index, i32imm:$jt, i32imm:$pclbl), 0, IIC_Br, []>,
+        Sched<[WriteBr]>;
+}
+
 //===----------------------------------------------------------------------===//
 // TLS Instructions
 //
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
index db8b9fb923bf..603d66403e65 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -536,9 +536,9 @@ class T2FourReg<dag oops, dag iops, InstrItinClass itin,
 }
 
 class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
-                dag oops, dag iops, InstrItinClass itin,
-                string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
+                string opc, list<dag> pattern>
+  : T2I<(outs rGPR:$RdLo, rGPR:$RdHi), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
+         opc, "\t$RdLo, $RdHi, $Rn, $Rm", pattern> {
   bits<4> RdLo;
   bits<4> RdHi;
   bits<4> Rn;
@@ -552,10 +552,11 @@ class T2MulLong<bits<3> opc22_20, bits<4> opc7_4,
   let Inst{7-4}   = opc7_4;
   let Inst{3-0}   = Rm;
 }
-class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4,
-                dag oops, dag iops, InstrItinClass itin,
-                string opc, string asm, list<dag> pattern>
-  : T2I<oops, iops, itin, opc, asm, pattern> {
+class T2MlaLong<bits<3> opc22_20, bits<4> opc7_4, string opc>
+  : T2I<(outs rGPR:$RdLo, rGPR:$RdHi),
+        (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
+        opc, "\t$RdLo, $RdHi, $Rn, $Rm", []>,
+        RegConstraint<"$RLo = $RdLo, $RHi = $RdHi"> {
   bits<4> RdLo;
   bits<4> RdHi;
   bits<4> Rn;
@@ -1983,12 +1984,19 @@ def t2SXTAB16 : T2I_exta_rrot_np<0b010, "sxtab16">;
 // A simple right-shift can also be used in most cases (the exception is the
 // SXTH operations with a rotate of 24: there the non-contiguous bits are
 // relevant).
-def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, rot_imm:$rot), i8)),
-          (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
-      Requires<[HasT2ExtractPack, IsThumb2]>;
-def : Pat<(add rGPR:$Rn, (sext_inreg (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
-          (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
-      Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+                                        (srl rGPR:$Rm, rot_imm:$rot), i8)),
+                       (t2SXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+                                        (srl rGPR:$Rm, imm8_or_16:$rot), i16)),
+                       (t2SXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+                                        (rotr rGPR:$Rm, (i32 24)), i16)),
+                       (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (sext_inreg
+                                        (or (srl rGPR:$Rm, (i32 24)),
+                                              (shl rGPR:$Rm, (i32 8))), i16)),
+                       (t2SXTAH rGPR:$Rn, rGPR:$Rm, (i32 3))>;
 
 // Zero extenders
 
@@ -2017,12 +2025,12 @@ def t2UXTAH : T2I_exta_rrot<0b001, "uxtah",
                            BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
 def t2UXTAB16 : T2I_exta_rrot_np<0b011, "uxtab16">;
 
-def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot), 0xFF)),
-          (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
-      Requires<[HasT2ExtractPack, IsThumb2]>;
-def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
-          (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>,
-      Requires<[HasT2ExtractPack, IsThumb2]>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, rot_imm:$rot),
+                                           0xFF)),
+                       (t2UXTAB rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
+def : Thumb2ExtractPat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot),
+                                            0xFFFF)),
+                       (t2UXTAH rGPR:$Rn, rGPR:$Rm, rot_imm:$rot)>;
 }
 
 
@@ -2030,6 +2038,7 @@ def : Pat<(add rGPR:$Rn, (and (srl rGPR:$Rm, imm8_or_16:$rot), 0xFFFF)),
 //  Arithmetic Instructions.
 //
 
+let isAdd = 1 in
 defm t2ADD  : T2I_bin_ii12rs<0b000, "add", add, 1>;
 defm t2SUB  : T2I_bin_ii12rs<0b101, "sub", sub>;
 
@@ -2546,367 +2555,194 @@ def t2MUL: T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
   let Inst{7-4} = 0b0000; // Multiply
 }
 
-def t2MLA: T2FourReg<
-                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "mla", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm), rGPR:$Ra))]>,
-           Requires<[IsThumb2, UseMulOps]> {
+class T2FourRegMLA<bits<4> op7_4, string opc, list<dag> pattern>
+  : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+               opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
+               Requires<[IsThumb2, UseMulOps]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b000;
-  let Inst{7-4} = 0b0000; // Multiply
+  let Inst{7-4} = op7_4;
 }
 
-def t2MLS: T2FourReg<
-                (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "mls", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn, rGPR:$Rm)))]>,
-           Requires<[IsThumb2, UseMulOps]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0110;
-  let Inst{22-20} = 0b000;
-  let Inst{7-4} = 0b0001; // Multiply and Subtract
-}
+def t2MLA : T2FourRegMLA<0b0000, "mla",
+                         [(set rGPR:$Rd, (add (mul rGPR:$Rn, rGPR:$Rm),
+                                               rGPR:$Ra))]>;
+def t2MLS: T2FourRegMLA<0b0001, "mls",
+                        [(set rGPR:$Rd, (sub rGPR:$Ra, (mul rGPR:$Rn,
+                                                            rGPR:$Rm)))]>;
 
 // Extra precision multiplies with low / high results
 let hasSideEffects = 0 in {
 let isCommutable = 1 in {
-def t2SMULL : T2MulLong<0b000, 0b0000,
-                  (outs rGPR:$RdLo, rGPR:$RdHi),
-                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
-                   "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
-
-def t2UMULL : T2MulLong<0b010, 0b0000,
-                  (outs rGPR:$RdLo, rGPR:$RdHi),
-                  (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL64,
-                   "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>;
+def t2SMULL : T2MulLong<0b000, 0b0000, "smull", []>;
+def t2UMULL : T2MulLong<0b010, 0b0000, "umull", []>;
 } // isCommutable
 
 // Multiply + accumulate
-def t2SMLAL : T2MlaLong<0b100, 0b0000,
-                  (outs rGPR:$RdLo, rGPR:$RdHi),
-                  (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
-                  "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
-                  RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
-
-def t2UMLAL : T2MlaLong<0b110, 0b0000,
-                  (outs rGPR:$RdLo, rGPR:$RdHi),
-                  (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
-                  "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
-                  RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">;
-
-def t2UMAAL : T2MulLong<0b110, 0b0110,
-                  (outs rGPR:$RdLo, rGPR:$RdHi),
-                  (ins rGPR:$Rn, rGPR:$Rm, rGPR:$RLo, rGPR:$RHi), IIC_iMAC64,
-                  "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>,
-          RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">,
-          Requires<[IsThumb2, HasDSP]>;
+def t2SMLAL : T2MlaLong<0b100, 0b0000, "smlal">;
+def t2UMLAL : T2MlaLong<0b110, 0b0000, "umlal">;
+def t2UMAAL : T2MlaLong<0b110, 0b0110, "umaal">, Requires<[IsThumb2, HasDSP]>;
 } // hasSideEffects
 
 // Rounding variants of the below included for disassembly only
 
 // Most significant word multiply
-def t2SMMUL : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
-                  "smmul", "\t$Rd, $Rn, $Rm",
-                  [(set rGPR:$Rd, (mulhs rGPR:$Rn, rGPR:$Rm))]>,
-          Requires<[IsThumb2, HasDSP]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0110;
-  let Inst{22-20} = 0b101;
-  let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
-}
-
-def t2SMMULR : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
-                  "smmulr", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]> {
+class T2SMMUL<bits<4> op7_4, string opc, list<dag> pattern>
+  : T2ThreeReg<(outs rGPR:$Rd),
+               (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL32,
+               opc, "\t$Rd, $Rn, $Rm", pattern>,
+               Requires<[IsThumb2, HasDSP]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
   let Inst{22-20} = 0b101;
   let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+  let Inst{7-4} = op7_4;
 }
+def t2SMMUL : T2SMMUL<0b0000, "smmul", [(set rGPR:$Rd, (mulhs rGPR:$Rn,
+                                                              rGPR:$Rm))]>;
+def t2SMMULR : T2SMMUL<0b0001, "smmulr", []>;
 
-def t2SMMLA : T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "smmla", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>,
+class T2FourRegSMMLA<bits<3> op22_20, bits<4> op7_4, string opc,
+                     list<dag> pattern>
+  : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
+              opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
               Requires<[IsThumb2, HasDSP, UseMulOps]> {
   let Inst{31-27} = 0b11111;
   let Inst{26-23} = 0b0110;
-  let Inst{22-20} = 0b101;
-  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
-}
-
-def t2SMMLAR: T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                  "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasDSP]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0110;
-  let Inst{22-20} = 0b101;
-  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
-}
-
-def t2SMMLS: T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "smmls", "\t$Rd, $Rn, $Rm, $Ra",
-                [(set rGPR:$Rd, (sub rGPR:$Ra, (mulhs rGPR:$Rn, rGPR:$Rm)))]>,
-             Requires<[IsThumb2, HasDSP, UseMulOps]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0110;
-  let Inst{22-20} = 0b110;
-  let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
-}
-
-def t2SMMLSR:T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32,
-                "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasDSP]> {
-  let Inst{31-27} = 0b11111;
-  let Inst{26-23} = 0b0110;
-  let Inst{22-20} = 0b110;
-  let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+  let Inst{22-20} = op22_20;
+  let Inst{7-4} = op7_4;
 }
 
-multiclass T2I_smul<string opc, SDNode opnode> {
-  def BB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
-              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm",
-              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
-                                      (sext_inreg rGPR:$Rm, i16)))]>,
-          Requires<[IsThumb2, HasDSP]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b00;
-  }
-
-  def BT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
-              !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm",
-              [(set rGPR:$Rd, (opnode (sext_inreg rGPR:$Rn, i16),
-                                      (sra rGPR:$Rm, (i32 16))))]>,
-          Requires<[IsThumb2, HasDSP]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b01;
-  }
-
-  def TB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
-              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm",
-              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
-                                      (sext_inreg rGPR:$Rm, i16)))]>,
-          Requires<[IsThumb2, HasDSP]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b10;
-  }
+def t2SMMLA :   T2FourRegSMMLA<0b101, 0b0000, "smmla",
+                [(set rGPR:$Rd, (add (mulhs rGPR:$Rm, rGPR:$Rn), rGPR:$Ra))]>;
+def t2SMMLAR:   T2FourRegSMMLA<0b101, 0b0001, "smmlar", []>;
+def t2SMMLS:    T2FourRegSMMLA<0b110, 0b0000, "smmls", []>;
+def t2SMMLSR:   T2FourRegSMMLA<0b110, 0b0001, "smmlsr", []>;
 
-  def TT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
-              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm",
-              [(set rGPR:$Rd, (opnode (sra rGPR:$Rn, (i32 16)),
-                                      (sra rGPR:$Rm, (i32 16))))]>,
-          Requires<[IsThumb2, HasDSP]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b11;
-  }
-
-  def WB : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
-              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm",
-              []>,
-          Requires<[IsThumb2, HasDSP]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b011;
-    let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b00;
-  }
-
-  def WT : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16,
-              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm",
-              []>,
-          Requires<[IsThumb2, HasDSP]> {
+class T2ThreeRegSMUL<bits<3> op22_20, bits<2> op5_4, string opc,
+                     list<dag> pattern>
+  : T2ThreeReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm), IIC_iMUL16, opc,
+               "\t$Rd, $Rn, $Rm", pattern>,
+    Requires<[IsThumb2, HasDSP]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b011;
+    let Inst{22-20} = op22_20;
     let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
     let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b01;
-  }
-}
-
-
-multiclass T2I_smla<string opc, SDNode opnode> {
-  def BB : T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
-              !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set rGPR:$Rd, (add rGPR:$Ra,
-                               (opnode (sext_inreg rGPR:$Rn, i16),
-                                       (sext_inreg rGPR:$Rm, i16))))]>,
-           Requires<[IsThumb2, HasDSP, UseMulOps]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b00;
-  }
-
-  def BT : T2FourReg<
-       (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
-             !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm, $Ra",
-             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sext_inreg rGPR:$Rn, i16),
-                                                 (sra rGPR:$Rm, (i32 16)))))]>,
-           Requires<[IsThumb2, HasDSP, UseMulOps]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b01;
-  }
-
-  def TB : T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
-              !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm, $Ra",
-              [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
-                                               (sext_inreg rGPR:$Rm, i16))))]>,
-           Requires<[IsThumb2, HasDSP, UseMulOps]> {
+    let Inst{5-4} = op5_4;
+}
+
+def t2SMULBB : T2ThreeRegSMUL<0b001, 0b00, "smulbb",
+             [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
+                                   (sext_inreg rGPR:$Rm, i16)))]>;
+def t2SMULBT : T2ThreeRegSMUL<0b001, 0b01, "smulbt",
+             [(set rGPR:$Rd, (mul (sext_inreg rGPR:$Rn, i16),
+                                   (sra rGPR:$Rm, (i32 16))))]>;
+def t2SMULTB : T2ThreeRegSMUL<0b001, 0b10, "smultb",
+             [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
+                                   (sext_inreg rGPR:$Rm, i16)))]>;
+def t2SMULTT : T2ThreeRegSMUL<0b001, 0b11, "smultt",
+             [(set rGPR:$Rd, (mul (sra rGPR:$Rn, (i32 16)),
+                                   (sra rGPR:$Rm, (i32 16))))]>;
+def t2SMULWB : T2ThreeRegSMUL<0b011, 0b00, "smulwb", []>;
+def t2SMULWT : T2ThreeRegSMUL<0b011, 0b01, "smulwt", []>;
+
+def : Thumb2DSPPat<(mul sext_16_node:$Rm, sext_16_node:$Rn),
+                   (t2SMULBB rGPR:$Rm, rGPR:$Rn)>;
+def : Thumb2DSPPat<(mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16))),
+                   (t2SMULBT rGPR:$Rn, rGPR:$Rm)>;
+def : Thumb2DSPPat<(mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm),
+                   (t2SMULTB rGPR:$Rn, rGPR:$Rm)>;
+
+class T2FourRegSMLA<bits<3> op22_20, bits<2> op5_4, string opc,
+                    list<dag> pattern>
+  : T2FourReg<(outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMUL16,
+               opc, "\t$Rd, $Rn, $Rm, $Ra", pattern>,
+    Requires<[IsThumb2, HasDSP, UseMulOps]> {
     let Inst{31-27} = 0b11111;
     let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
+    let Inst{22-20} = op22_20;
     let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b10;
-  }
-
-  def TT : T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
-              !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm, $Ra",
-             [(set rGPR:$Rd, (add rGPR:$Ra, (opnode (sra rGPR:$Rn, (i32 16)),
-                                                 (sra rGPR:$Rm, (i32 16)))))]>,
-           Requires<[IsThumb2, HasDSP, UseMulOps]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b001;
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b11;
-  }
-
-  def WB : T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
-              !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra",
-              []>,
-           Requires<[IsThumb2, HasDSP, UseMulOps]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b011;
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b00;
-  }
-
-  def WT : T2FourReg<
-        (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC16,
-              !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra",
-              []>,
-           Requires<[IsThumb2, HasDSP, UseMulOps]> {
-    let Inst{31-27} = 0b11111;
-    let Inst{26-23} = 0b0110;
-    let Inst{22-20} = 0b011;
-    let Inst{7-6} = 0b00;
-    let Inst{5-4} = 0b01;
-  }
-}
-
-defm t2SMUL : T2I_smul<"smul", mul>;
-defm t2SMLA : T2I_smla<"smla", mul>;
+    let Inst{5-4} = op5_4;
+}
+
+def t2SMLABB : T2FourRegSMLA<0b001, 0b00, "smlabb",
+             [(set rGPR:$Rd, (add rGPR:$Ra,
+                               (mul (sext_inreg rGPR:$Rn, i16),
+                                     (sext_inreg rGPR:$Rm, i16))))]>;
+def t2SMLABT : T2FourRegSMLA<0b001, 0b01, "smlabt",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sext_inreg rGPR:$Rn, i16),
+                                                 (sra rGPR:$Rm, (i32 16)))))]>;
+def t2SMLATB : T2FourRegSMLA<0b001, 0b10, "smlatb",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
+                                                (sext_inreg rGPR:$Rm, i16))))]>;
+def t2SMLATT : T2FourRegSMLA<0b001, 0b11, "smlatt",
+             [(set rGPR:$Rd, (add rGPR:$Ra, (mul (sra rGPR:$Rn, (i32 16)),
+                                                 (sra rGPR:$Rm, (i32 16)))))]>;
+def t2SMLAWB : T2FourRegSMLA<0b011, 0b00, "smlawb", []>;
+def t2SMLAWT : T2FourRegSMLA<0b011, 0b01, "smlawt", []>;
+
+def : Thumb2DSPMulPat<(add rGPR:$Ra, (mul sext_16_node:$Rn, sext_16_node:$Rm)),
+                      (t2SMLABB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra,
+                        (mul sext_16_node:$Rn, (sra rGPR:$Rm, (i32 16)))),
+                      (t2SMLABT rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+def : Thumb2DSPMulPat<(add rGPR:$Ra,
+                        (mul (sra rGPR:$Rn, (i32 16)), sext_16_node:$Rm)),
+                      (t2SMLATB rGPR:$Rn, rGPR:$Rm, rGPR:$Ra)>;
+
+class T2SMLAL<bits<3> op22_20, bits<4> op7_4, string opc, list<dag> pattern>
+  : T2FourReg_mac<1, op22_20, op7_4,
+                  (outs rGPR:$Ra, rGPR:$Rd),
+                  (ins rGPR:$Rn, rGPR:$Rm),
+                  IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
+                  Requires<[IsThumb2, HasDSP]>;
 
 // Halfword multiple accumulate long: SMLAL<x><y>
-def t2SMLALBB : T2FourReg_mac<1, 0b100, 0b1000, (outs rGPR:$Ra,rGPR:$Rd),
-         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbb", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLALBT : T2FourReg_mac<1, 0b100, 0b1001, (outs rGPR:$Ra,rGPR:$Rd),
-         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlalbt", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLALTB : T2FourReg_mac<1, 0b100, 0b1010, (outs rGPR:$Ra,rGPR:$Rd),
-         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltb", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLALTT : T2FourReg_mac<1, 0b100, 0b1011, (outs rGPR:$Ra,rGPR:$Rd),
-         (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaltt", "\t$Ra, $Rd, $Rn, $Rm",
-           [/* For disassembly only; pattern left blank */]>,
-          Requires<[IsThumb2, HasDSP]>;
-
-// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-def t2SMUAD: T2ThreeReg_mac<
-            0, 0b010, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smuad", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]> {
-  let Inst{15-12} = 0b1111;
-}
-def t2SMUADX:T2ThreeReg_mac<
-            0, 0b010, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smuadx", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]> {
+def t2SMLALBB : T2SMLAL<0b100, 0b1000, "smlalbb", []>;
+def t2SMLALBT : T2SMLAL<0b100, 0b1001, "smlalbt", []>;
+def t2SMLALTB : T2SMLAL<0b100, 0b1010, "smlaltb", []>;
+def t2SMLALTT : T2SMLAL<0b100, 0b1011, "smlaltt", []>;
+
+class T2DualHalfMul<bits<3> op22_20, bits<4> op7_4, string opc>
+  : T2ThreeReg_mac<0, op22_20, op7_4,
+                   (outs rGPR:$Rd),
+                   (ins rGPR:$Rn, rGPR:$Rm),
+                   IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm", []>,
+                   Requires<[IsThumb2, HasDSP]> {
   let Inst{15-12} = 0b1111;
 }
-def t2SMUSD: T2ThreeReg_mac<
-            0, 0b100, 0b0000, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smusd", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]> {
-  let Inst{15-12} = 0b1111;
-}
-def t2SMUSDX:T2ThreeReg_mac<
-            0, 0b100, 0b0001, (outs rGPR:$Rd), (ins rGPR:$Rn, rGPR:$Rm),
-            IIC_iMAC32, "smusdx", "\t$Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]> {
-  let Inst{15-12} = 0b1111;
-}
-def t2SMLAD   : T2FourReg_mac<
-            0, 0b010, 0b0000, (outs rGPR:$Rd),
-            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlad",
-            "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLADX  : T2FourReg_mac<
-            0, 0b010, 0b0001, (outs rGPR:$Rd),
-            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smladx",
-            "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLSD   : T2FourReg_mac<0, 0b100, 0b0000, (outs rGPR:$Rd),
-            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsd",
-            "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLSDX  : T2FourReg_mac<0, 0b100, 0b0001, (outs rGPR:$Rd),
-            (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra), IIC_iMAC32, "smlsdx",
-            "\t$Rd, $Rn, $Rm, $Ra", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLALD  : T2FourReg_mac<1, 0b100, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
-                        (ins rGPR:$Rn, rGPR:$Rm), IIC_iMAC64, "smlald",
-                        "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLALDX : T2FourReg_mac<1, 0b100, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
-                        (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlaldx",
-                        "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLSLD  : T2FourReg_mac<1, 0b101, 0b1100, (outs rGPR:$Ra,rGPR:$Rd),
-                        (ins rGPR:$Rn,rGPR:$Rm), IIC_iMAC64, "smlsld",
-                        "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]>;
-def t2SMLSLDX : T2FourReg_mac<1, 0b101, 0b1101, (outs rGPR:$Ra,rGPR:$Rd),
-                        (ins rGPR:$Rm,rGPR:$Rn), IIC_iMAC64, "smlsldx",
-                        "\t$Ra, $Rd, $Rn, $Rm", []>,
-          Requires<[IsThumb2, HasDSP]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+def t2SMUAD: T2DualHalfMul<0b010, 0b0000, "smuad">;
+def t2SMUADX: T2DualHalfMul<0b010, 0b0001, "smuadx">;
+def t2SMUSD: T2DualHalfMul<0b100, 0b0000, "smusd">;
+def t2SMUSDX: T2DualHalfMul<0b100, 0b0001, "smusdx">;
+
+class T2DualHalfMulAdd<bits<3> op22_20, bits<4> op7_4, string opc>
+  : T2FourReg_mac<0, op22_20, op7_4,
+                  (outs rGPR:$Rd),
+                  (ins rGPR:$Rn, rGPR:$Rm, rGPR:$Ra),
+                  IIC_iMAC32, opc, "\t$Rd, $Rn, $Rm, $Ra", []>,
+                  Requires<[IsThumb2, HasDSP]>;
+
+def t2SMLAD   : T2DualHalfMulAdd<0b010, 0b0000, "smlad">;
+def t2SMLADX  : T2DualHalfMulAdd<0b010, 0b0001, "smladx">;
+def t2SMLSD   : T2DualHalfMulAdd<0b100, 0b0000, "smlsd">;
+def t2SMLSDX  : T2DualHalfMulAdd<0b100, 0b0001, "smlsdx">;
+
+class T2DualHalfMulAddLong<bits<3> op22_20, bits<4> op7_4, string opc>
+  : T2FourReg_mac<1, op22_20, op7_4,
+                  (outs rGPR:$Ra, rGPR:$Rd),
+                  (ins rGPR:$Rn, rGPR:$Rm),
+                  IIC_iMAC64, opc, "\t$Ra, $Rd, $Rn, $Rm", []>,
+                  Requires<[IsThumb2, HasDSP]>;
+
+def t2SMLALD  : T2DualHalfMulAddLong<0b100, 0b1100, "smlald">;
+def t2SMLALDX : T2DualHalfMulAddLong<0b100, 0b1101, "smlaldx">;
+def t2SMLSLD  : T2DualHalfMulAddLong<0b101, 0b1100, "smlsld">;
+def t2SMLSLDX : T2DualHalfMulAddLong<0b101, 0b1101, "smlsldx">;
 
 //===----------------------------------------------------------------------===//
 //  Division Instructions.
@@ -3545,7 +3381,9 @@ def t2B   : T2I<(outs), (ins thumb_br_target:$target), IIC_Br,
 }
 
 let Size = 4, isNotDuplicable = 1, isIndirectBranch = 1 in {
-def t2BR_JT : t2PseudoInst<(outs),
+
+// available in both v8-M.Baseline and Thumb2 targets
+def t2BR_JT : t2basePseudoInst<(outs),
           (ins GPR:$target, GPR:$index, i32imm:$jt),
            0, IIC_Br,
           [(ARMbr2jt GPR:$target, GPR:$index, tjumptable:$jt)]>,
@@ -3645,6 +3483,7 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
 
 // Branch and Exchange Jazelle -- for disassembly only
 // Rm = Inst{19-16}
+let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in
 def t2BXJ : T2I<(outs), (ins GPRnopc:$func), NoItinerary, "bxj", "\t$func", []>,
     Sched<[WriteBr]>, Requires<[IsThumb2, IsNotMClass]> {
   bits<4> func;
@@ -3753,6 +3592,7 @@ def t2DBG : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "dbg", "\t$opt",
 
 // Secure Monitor Call is a system instruction.
 // Option = Inst{19-16}
+let isCall = 1, Uses = [SP] in
 def t2SMC : T2I<(outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt",
                 []>, Requires<[IsThumb2, HasTrustZone]> {
   let Inst{31-27} = 0b11110;
@@ -3809,6 +3649,7 @@ def : t2InstAlias<"srsia${p} $mode", (t2SRSIA imm0_31:$mode, pred:$p)>;
 def : t2InstAlias<"srsia${p} $mode!", (t2SRSIA_UPD imm0_31:$mode, pred:$p)>;
 
 // Return From Exception is a system instruction.
+let isReturn = 1, isBarrier = 1, isTerminator = 1, Defs = [PC] in
 class T2RFE<bits<12> op31_20, dag oops, dag iops, InstrItinClass itin,
           string opc, string asm, list<dag> pattern>
   : T2I<oops, iops, itin, opc, asm, pattern>,
@@ -4568,7 +4409,7 @@ def : t2InstAlias<"ldrsh${p} $Rt, $addr",
                   (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>;
 
 def : t2InstAlias<"ldr${p} $Rt, $addr",
-                  (t2LDRpci GPRnopc:$Rt, t2ldrlabel:$addr, pred:$p)>;
+                  (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
 def : t2InstAlias<"ldrb${p} $Rt, $addr",
                   (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>;
 def : t2InstAlias<"ldrh${p} $Rt, $addr",
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
index e29d265ae3d1..e99048645685 100644
--- a/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/contrib/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -624,7 +624,7 @@ def VCMPZH  : AHuI<0b11101, 0b11, 0b0101, 0b01, 0,
 def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
                    (outs DPR:$Dd), (ins SPR:$Sm),
                    IIC_fpCVTDS, "vcvt", ".f64.f32\t$Dd, $Sm",
-                   [(set DPR:$Dd, (fextend SPR:$Sm))]> {
+                   [(set DPR:$Dd, (fpextend SPR:$Sm))]> {
   // Instruction operands.
   bits<5> Dd;
   bits<5> Sm;
@@ -641,7 +641,7 @@ def VCVTDS  : ASuI<0b11101, 0b11, 0b0111, 0b11, 0,
 // Special case encoding: bits 11-8 is 0b1011.
 def VCVTSD  : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm,
                     IIC_fpCVTSD, "vcvt", ".f32.f64\t$Sd, $Dm",
-                    [(set SPR:$Sd, (fround DPR:$Dm))]> {
+                    [(set SPR:$Sd, (fpround DPR:$Dm))]> {
   // Instruction operands.
   bits<5> Sd;
   bits<5> Dm;
@@ -838,7 +838,7 @@ multiclass vcvt_inst<string opc, bits<2> rm,
   }
 }
 
-defm VCVTA : vcvt_inst<"a", 0b00, frnd>;
+defm VCVTA : vcvt_inst<"a", 0b00, fround>;
 defm VCVTN : vcvt_inst<"n", 0b01>;
 defm VCVTP : vcvt_inst<"p", 0b10, fceil>;
 defm VCVTM : vcvt_inst<"m", 0b11, ffloor>;
@@ -938,7 +938,7 @@ multiclass vrint_inst_anpm<string opc, bits<2> rm,
         Requires<[HasFPARMv8,HasDPVFP]>;
 }
 
-defm VRINTA : vrint_inst_anpm<"a", 0b00, frnd>;
+defm VRINTA : vrint_inst_anpm<"a", 0b00, fround>;
 defm VRINTN : vrint_inst_anpm<"n", 0b01>;
 defm VRINTP : vrint_inst_anpm<"p", 0b10, fceil>;
 defm VRINTM : vrint_inst_anpm<"m", 0b11, ffloor>;
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
new file mode 100644
index 000000000000..2bdbe4fca3de
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.cpp
@@ -0,0 +1,109 @@
+//===- ARMInstructionSelector.cpp ----------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the InstructionSelector class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "ARMInstructionSelector.h"
+#include "ARMRegisterBankInfo.h"
+#include "ARMSubtarget.h"
+#include "ARMTargetMachine.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "arm-isel"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+ARMInstructionSelector::ARMInstructionSelector(const ARMSubtarget &STI,
+                                               const ARMRegisterBankInfo &RBI)
+    : InstructionSelector(), TII(*STI.getInstrInfo()),
+      TRI(*STI.getRegisterInfo()), RBI(RBI) {}
+
+static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
+                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
+                       const RegisterBankInfo &RBI) {
+  unsigned DstReg = I.getOperand(0).getReg();
+  if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+    return true;
+
+  const RegisterBank *RegBank = RBI.getRegBank(DstReg, MRI, TRI);
+  (void)RegBank;
+  assert(RegBank && "Can't get reg bank for virtual register");
+
+  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
+  (void)DstSize;
+  unsigned SrcReg = I.getOperand(1).getReg();
+  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
+  (void)SrcSize;
+  assert((DstSize == SrcSize ||
+          // Copies are a means to setup initial types, the number of
+          // bits may not exactly match.
+          (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
+           DstSize <= SrcSize)) &&
+         "Copy with different width?!");
+
+  assert(RegBank->getID() == ARM::GPRRegBankID && "Unsupported reg bank");
+  const TargetRegisterClass *RC = &ARM::GPRRegClass;
+
+  // No need to constrain SrcReg. It will get constrained when
+  // we hit another of its uses or its defs.
+  // Copies do not have constraints.
+  if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
+    DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
+                 << " operand\n");
+    return false;
+  }
+  return true;
+}
+
+bool ARMInstructionSelector::select(MachineInstr &I) const {
+  assert(I.getParent() && "Instruction should be in a basic block!");
+  assert(I.getParent()->getParent() && "Instruction should be in a function!");
+
+  auto &MBB = *I.getParent();
+  auto &MF = *MBB.getParent();
+  auto &MRI = MF.getRegInfo();
+
+  if (!isPreISelGenericOpcode(I.getOpcode())) {
+    if (I.isCopy())
+      return selectCopy(I, TII, MRI, TRI, RBI);
+
+    return true;
+  }
+
+  MachineInstrBuilder MIB{MF, I};
+
+  using namespace TargetOpcode;
+  switch (I.getOpcode()) {
+  case G_ADD:
+    I.setDesc(TII.get(ARM::ADDrr));
+    AddDefaultCC(AddDefaultPred(MIB));
+    break;
+  case G_FRAME_INDEX:
+    // Add 0 to the given frame index and hope it will eventually be folded into
+    // the user(s).
+    I.setDesc(TII.get(ARM::ADDri));
+    AddDefaultCC(AddDefaultPred(MIB.addImm(0)));
+    break;
+  case G_LOAD:
+    I.setDesc(TII.get(ARM::LDRi12));
+    AddDefaultPred(MIB.addImm(0));
+    break;
+  default:
+    return false;
+  }
+
+  return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h
new file mode 100644
index 000000000000..5072cdd60ce4
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMInstructionSelector.h
@@ -0,0 +1,39 @@
+//===- ARMInstructionSelector ------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the InstructionSelector class for ARM.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
+#define LLVM_LIB_TARGET_ARM_ARMINSTRUCTIONSELECTOR_H
+
+#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
+
+namespace llvm {
+class ARMBaseInstrInfo;
+class ARMBaseRegisterInfo;
+class ARMBaseTargetMachine;
+class ARMRegisterBankInfo;
+class ARMSubtarget;
+
+class ARMInstructionSelector : public InstructionSelector {
+public:
+  ARMInstructionSelector(const ARMSubtarget &STI,
+                         const ARMRegisterBankInfo &RBI);
+
+  virtual bool select(MachineInstr &I) const override;
+
+private:
+  const ARMBaseInstrInfo &TII;
+  const ARMBaseRegisterInfo &TRI;
+  const ARMRegisterBankInfo &RBI;
+};
+
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
new file mode 100644
index 000000000000..255ea4bc7198
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.cpp
@@ -0,0 +1,44 @@
+//===- ARMLegalizerInfo.cpp --------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "ARMLegalizerInfo.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Target/TargetOpcodes.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+ARMLegalizerInfo::ARMLegalizerInfo() {
+  using namespace TargetOpcode;
+
+  const LLT p0 = LLT::pointer(0, 32);
+
+  const LLT s8 = LLT::scalar(8);
+  const LLT s16 = LLT::scalar(16);
+  const LLT s32 = LLT::scalar(32);
+
+  setAction({G_FRAME_INDEX, p0}, Legal);
+
+  setAction({G_LOAD, s32}, Legal);
+  setAction({G_LOAD, 1, p0}, Legal);
+
+  for (auto Ty : {s8, s16, s32})
+    setAction({G_ADD, Ty}, Legal);
+
+  computeTables();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
new file mode 100644
index 000000000000..ca3eea81271b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMLegalizerInfo.h
@@ -0,0 +1,29 @@
+//===- ARMLegalizerInfo ------------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_ARM_ARMMACHINELEGALIZER_H
+
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+
+namespace llvm {
+
+class LLVMContext;
+
+/// This class provides the information for the target register banks.
+class ARMLegalizerInfo : public LegalizerInfo {
+public:
+  ARMLegalizerInfo();
+};
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 62d57f3f4986..48ab491b5be9 100644
--- a/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -95,12 +95,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
-      return ARM_LOAD_STORE_OPT_NAME;
-    }
+    StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
 
   private:
     /// A set of load/store MachineInstrs with same base register sorted by
@@ -562,7 +560,7 @@ void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
     MachineBasicBlock::const_iterator Before) {
   // Initialize if we never queried in this block.
   if (!LiveRegsValid) {
-    LiveRegs.init(TRI);
+    LiveRegs.init(*TRI);
     LiveRegs.addLiveOuts(MBB);
     LiveRegPos = MBB.end();
     LiveRegsValid = true;
@@ -834,7 +832,7 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
         assert(MO.isImplicit());
         unsigned DefReg = MO.getReg();
 
-        if (std::find(ImpDefs.begin(), ImpDefs.end(), DefReg) != ImpDefs.end())
+        if (is_contained(ImpDefs, DefReg))
           continue;
         // We can ignore cases where the super-reg is read and written.
         if (MI->readsRegister(DefReg))
@@ -1851,7 +1849,7 @@ bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
   if (MBB.empty()) return false;
 
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  if (MBBI != MBB.begin() &&
+  if (MBBI != MBB.begin() && MBBI != MBB.end() &&
       (MBBI->getOpcode() == ARM::BX_RET ||
        MBBI->getOpcode() == ARM::tBX_RET ||
        MBBI->getOpcode() == ARM::MOVPCLR)) {
@@ -1953,7 +1951,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
     }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
index 7429acdb09ad..293a527b09e8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp
@@ -21,6 +21,12 @@
 #include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
 using namespace llvm;
 
 
@@ -85,6 +91,8 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
     MCOp = GetSymbolRef(MO, GetJTISymbol(MO.getIndex()));
     break;
   case MachineOperand::MO_ConstantPoolIndex:
+    if (Subtarget->genExecuteOnly())
+      llvm_unreachable("execute-only should not generate constant pools");
     MCOp = GetSymbolRef(MO, GetCPISymbol(MO.getIndex()));
     break;
   case MachineOperand::MO_BlockAddress:
@@ -93,7 +101,7 @@ bool ARMAsmPrinter::lowerOperand(const MachineOperand &MO,
   case MachineOperand::MO_FPImmediate: {
     APFloat Val = MO.getFPImm()->getValueAPF();
     bool ignored;
-    Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+    Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored);
     MCOp = MCOperand::createFPImm(Val.convertToDouble());
     break;
   }
@@ -150,3 +158,106 @@ void llvm::LowerARMMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
     }
   }
 }
+
+void ARMAsmPrinter::EmitSled(const MachineInstr &MI, SledKind Kind)
+{
+  if (MI.getParent()->getParent()->getInfo<ARMFunctionInfo>()
+    ->isThumbFunction())
+  {
+    MI.emitError("An attempt to perform XRay instrumentation for a"
+      " Thumb function (not supported). Detected when emitting a sled.");
+    return;
+  }
+  static const int8_t NoopsInSledCount = 6;
+  // We want to emit the following pattern:
+  //
+  // .Lxray_sled_N:
+  //   ALIGN
+  //   B #20
+  //   ; 6 NOP instructions (24 bytes)
+  // .tmpN
+  //
+  // We need the 24 bytes (6 instructions) because at runtime, we'd be patching
+  // over the full 28 bytes (7 instructions) with the following pattern:
+  //
+  //   PUSH{ r0, lr }
+  //   MOVW r0, #<lower 16 bits of function ID>
+  //   MOVT r0, #<higher 16 bits of function ID>
+  //   MOVW ip, #<lower 16 bits of address of __xray_FunctionEntry/Exit>
+  //   MOVT ip, #<higher 16 bits of address of __xray_FunctionEntry/Exit>
+  //   BLX ip
+  //   POP{ r0, lr }
+  //
+  OutStreamer->EmitCodeAlignment(4);
+  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitLabel(CurSled);
+  auto Target = OutContext.createTempSymbol();
+
+  // Emit "B #20" instruction, which jumps over the next 24 bytes (because
+  // register pc is 8 bytes ahead of the jump instruction by the moment CPU
+  // is executing it).
+  // By analogy to ARMAsmPrinter::emitPseudoExpansionLowering() |case ARM::B|.
+  // It is not clear why |addReg(0)| is needed (the last operand).
+  EmitToStreamer(*OutStreamer, MCInstBuilder(ARM::Bcc).addImm(20)
+    .addImm(ARMCC::AL).addReg(0));
+
+  MCInst Noop;
+  Subtarget->getInstrInfo()->getNoopForElfTarget(Noop);
+  for (int8_t I = 0; I < NoopsInSledCount; I++)
+  {
+    OutStreamer->EmitInstruction(Noop, getSubtargetInfo());
+  }
+
+  OutStreamer->EmitLabel(Target);
+  recordSled(CurSled, MI, Kind);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_ENTER);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_FUNCTION_EXIT(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::FUNCTION_EXIT);
+}
+
+void ARMAsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI)
+{
+  EmitSled(MI, SledKind::TAIL_CALL);
+}
+
+void ARMAsmPrinter::EmitXRayTable()
+{
+  if (Sleds.empty())
+    return;
+
+  MCSection *Section = nullptr;
+  if (Subtarget->isTargetELF()) {
+    Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                       ELF::SHF_ALLOC | ELF::SHF_GROUP |
+                                           ELF::SHF_MERGE,
+                                       0, CurrentFnSym->getName());
+  } else if (Subtarget->isTargetMachO()) {
+    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+                                         SectionKind::getReadOnlyWithRel());
+  } else {
+    llvm_unreachable("Unsupported target");
+  }
+
+  auto PrevSection = OutStreamer->getCurrentSectionOnly();
+  OutStreamer->SwitchSection(Section);
+  for (const auto &Sled : Sleds) {
+    OutStreamer->EmitSymbolValue(Sled.Sled, 4);
+    OutStreamer->EmitSymbolValue(CurrentFnSym, 4);
+    auto Kind = static_cast<uint8_t>(Sled.Kind);
+    OutStreamer->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&Kind), 1));
+    OutStreamer->EmitBytes(
+      StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+    OutStreamer->EmitZeros(6);
+  }
+  OutStreamer->SwitchSection(PrevSection);
+
+  Sleds.clear();
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index b6dee9ff8385..50d8f0941460 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -11,14 +11,14 @@
 
 using namespace llvm;
 
-void ARMFunctionInfo::anchor() { }
+void ARMFunctionInfo::anchor() {}
 
 ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
     : isThumb(MF.getSubtarget<ARMSubtarget>().isThumb()),
       hasThumb2(MF.getSubtarget<ARMSubtarget>().hasThumb2()),
-      StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false),
-      RestoreSPFromFP(false), LRSpilledForFarJump(false),
+      StByValParamsPadding(0), ArgRegsSaveSize(0), ReturnRegsCount(0),
+      HasStackFrame(false), RestoreSPFromFP(false), LRSpilledForFarJump(false),
       FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
-      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
-      PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
-      ArgumentStackSize(0), IsSplitCSR(false) {}
+      GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), PICLabelUId(0),
+      VarArgsFrameIndex(0), HasITBlocks(false), ArgumentStackSize(0),
+      IsSplitCSR(false), PromotedGlobalsIncrease(0) {}
diff --git a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
index f71497240ff3..8c485e89bf54 100644
--- a/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -121,6 +121,12 @@ class ARMFunctionInfo : public MachineFunctionInfo {
   /// copies.
   bool IsSplitCSR;
 
+  /// Globals that have had their storage promoted into the constant pool.
+  SmallPtrSet<const GlobalVariable*,2> PromotedGlobals;
+
+  /// The amount the literal pool has been increasedby due to promoted globals.
+  int PromotedGlobalsIncrease;
+  
 public:
   ARMFunctionInfo() :
     isThumb(false),
@@ -131,7 +137,8 @@ public:
     FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
     GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
     NumAlignedDPRCS2Regs(0), PICLabelUId(0),
-    VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
+    VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false),
+    PromotedGlobalsIncrease(0) {}
 
   explicit ARMFunctionInfo(MachineFunction &MF);
 
@@ -226,6 +233,22 @@ public:
     }
     return It;
   }
+
+  /// Indicate to the backend that \c GV has had its storage changed to inside
+  /// a constant pool. This means it no longer needs to be emitted as a
+  /// global variable.
+  void markGlobalAsPromotedToConstantPool(const GlobalVariable *GV) {
+    PromotedGlobals.insert(GV);
+  }
+  SmallPtrSet<const GlobalVariable*, 2>& getGlobalsPromotedToConstantPool() {
+    return PromotedGlobals;
+  }
+  int getPromotedConstpoolIncrease() const {
+    return PromotedGlobalsIncrease;
+  }
+  void setPromotedConstpoolIncrease(int Sz) {
+    PromotedGlobalsIncrease = Sz;
+  }
 };
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
index 73dcb9641b61..581d5fe159fd 100644
--- a/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMOptimizeBarriersPass.cpp
@@ -29,12 +29,10 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
-    return "optimise barriers pass";
-  }
+  StringRef getPassName() const override { return "optimise barriers pass"; }
 };
 char ARMOptimizeBarriersPass::ID = 0;
 }
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
new file mode 100644
index 000000000000..9bd036a1eace
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.cpp
@@ -0,0 +1,127 @@
+//===- ARMRegisterBankInfo.cpp -----------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the RegisterBankInfo class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "ARMRegisterBankInfo.h"
+#include "ARMInstrInfo.h" // For the register classes
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "You shouldn't build this"
+#endif
+
+// FIXME: TableGen this.
+// If it grows too much and TableGen still isn't ready to do the job, extract it
+// into an ARMGenRegisterBankInfo.def (similar to AArch64).
+namespace llvm {
+namespace ARM {
+RegisterBank GPRRegBank;
+RegisterBank *RegBanks[] = {&GPRRegBank};
+
+RegisterBankInfo::PartialMapping GPRPartialMapping{0, 32, GPRRegBank};
+
+RegisterBankInfo::ValueMapping ValueMappings[] = {
+    {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}, {&GPRPartialMapping, 1}};
+} // end namespace arm
+} // end namespace llvm
+
+ARMRegisterBankInfo::ARMRegisterBankInfo(const TargetRegisterInfo &TRI)
+    : RegisterBankInfo(ARM::RegBanks, ARM::NumRegisterBanks) {
+  static bool AlreadyInit = false;
+  // We have only one set of register banks, whatever the subtarget
+  // is. Therefore, the initialization of the RegBanks table should be
+  // done only once. Indeed the table of all register banks
+  // (ARM::RegBanks) is unique in the compiler. At some point, it
+  // will get tablegen'ed and the whole constructor becomes empty.
+  if (AlreadyInit)
+    return;
+  AlreadyInit = true;
+
+  // Initialize the GPR bank.
+  createRegisterBank(ARM::GPRRegBankID, "GPRB");
+
+  addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRRegClassID, TRI);
+  addRegBankCoverage(ARM::GPRRegBankID, ARM::GPRwithAPSRRegClassID, TRI);
+  const RegisterBank &RBGPR = getRegBank(ARM::GPRRegBankID);
+  (void)RBGPR;
+  assert(&ARM::GPRRegBank == &RBGPR && "The order in RegBanks is messed up");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRwithAPSRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::GPRnopcRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::rGPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tcGPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.covers(*TRI.getRegClass(ARM::tGPR_and_tcGPRRegClassID)) &&
+         "Subclass not added?");
+  assert(RBGPR.getSize() == 32 && "GPRs should hold up to 32-bit");
+}
+
+const RegisterBank &ARMRegisterBankInfo::getRegBankFromRegClass(
+    const TargetRegisterClass &RC) const {
+  using namespace ARM;
+
+  switch (RC.getID()) {
+  case GPRRegClassID:
+  case tGPR_and_tcGPRRegClassID:
+    return getRegBank(ARM::GPRRegBankID);
+  default:
+    llvm_unreachable("Unsupported register kind");
+  }
+
+  llvm_unreachable("Switch should handle all register classes");
+}
+
+RegisterBankInfo::InstructionMapping
+ARMRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
+  auto Opc = MI.getOpcode();
+
+  // Try the default logic for non-generic instructions that are either copies
+  // or already have some operands assigned to banks.
+  if (!isPreISelGenericOpcode(Opc)) {
+    InstructionMapping Mapping = getInstrMappingImpl(MI);
+    if (Mapping.isValid())
+      return Mapping;
+  }
+
+  using namespace TargetOpcode;
+
+  unsigned NumOperands = MI.getNumOperands();
+  const ValueMapping *OperandsMapping = &ARM::ValueMappings[0];
+
+  switch (Opc) {
+  case G_ADD:
+  case G_LOAD:
+    // FIXME: We're abusing the fact that everything lives in a GPR for now; in
+    // the real world we would use different mappings.
+    OperandsMapping = &ARM::ValueMappings[0];
+    break;
+  case G_FRAME_INDEX:
+    OperandsMapping = getOperandsMapping({&ARM::ValueMappings[0], nullptr});
+    break;
+  default:
+    return InstructionMapping{};
+  }
+
+  return InstructionMapping{DefaultMappingID, /*Cost=*/1, OperandsMapping,
+                            NumOperands};
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
new file mode 100644
index 000000000000..773920ee57a7
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMRegisterBankInfo.h
@@ -0,0 +1,41 @@
+//===- ARMRegisterBankInfo ---------------------------------------*- C++ -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the RegisterBankInfo class for ARM.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
+#define LLVM_LIB_TARGET_ARM_ARMREGISTERBANKINFO_H
+
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+
+namespace llvm {
+
+class TargetRegisterInfo;
+
+namespace ARM {
+enum {
+  GPRRegBankID = 0, // General purpose registers
+  NumRegisterBanks,
+};
+} // end namespace ARM
+
+/// This class provides the information for the target register banks.
+class ARMRegisterBankInfo final : public RegisterBankInfo {
+public:
+  ARMRegisterBankInfo(const TargetRegisterInfo &TRI);
+
+  const RegisterBank &
+  getRegBankFromRegClass(const TargetRegisterClass &RC) const override;
+
+  InstructionMapping getInstrMapping(const MachineInstr &MI) const override;
+};
+} // End llvm namespace.
+#endif
diff --git a/contrib/llvm/lib/Target/ARM/ARMSchedule.td b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
index 47a99313025c..b7d2d34614df 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSchedule.td
+++ b/contrib/llvm/lib/Target/ARM/ARMSchedule.td
@@ -364,3 +364,4 @@ include "ARMScheduleV6.td"
 include "ARMScheduleA8.td"
 include "ARMScheduleA9.td"
 include "ARMScheduleSwift.td"
+include "ARMScheduleR52.td"
diff --git a/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
new file mode 100644
index 000000000000..1b40742a093b
--- /dev/null
+++ b/contrib/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -0,0 +1,983 @@
+//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
+//
+//===----------------------------------------------------------------------===//
+
+// ===---------------------------------------------------------------------===//
+// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
+// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
+// There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
+// A number of forwarding paths enable results of computations to be input
+// to subsequent operations before they are written to registers.
+// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
+
+def CortexR52Model : SchedMachineModel {
+  let MicroOpBufferSize = 0;  // R52 is in-order processor
+  let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
+  let LoadLatency = 1;        // Optimistic, assuming no misses
+  let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
+  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
+  let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available.
+
+// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
+// Cortex-R52 is an in-order processor.
+
+def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
+def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
+def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
+def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
+def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
+def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
+def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
+def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
+
+// Cortex-R52 specific SchedReads
+def R52Read_ISS   : SchedRead;
+def R52Read_EX1   : SchedRead;
+def R52Read_EX2   : SchedRead;
+def R52Read_WRI   : SchedRead;
+def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
+def R52Read_F1    : SchedRead;
+def R52Read_F2    : SchedRead;
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedWrite types which map ProcResources and set latency.
+
+let SchedModel = CortexR52Model in {
+
+// ALU - Write occurs in Late EX2 (independent of whether shift was required)
+def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
+def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
+
+// Compares
+def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
+def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
+def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
+
+// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
+def : WriteRes<WriteDiv, [R52UnitDiv]> {
+  let Latency = 8; let ResourceCycles = [8]; // not pipelined
+}
+
+// Loads
+def : WriteRes<WriteLd, [R52UnitLd]> { let Latency = 4; }
+def : WriteRes<WritePreLd, [R52UnitLd]> { let Latency = 4; }
+
+// Branches  - LR written in Late EX2
+def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
+def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
+def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
+
+// Misc
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+def : WriteRes<WriteCvtFP, [R52UnitALU]> { let Latency = 3; }
+
+def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
+def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific SchedReadWrites.
+
+// Forwarding information - based on when an operand is read
+def : ReadAdvance<R52Read_ISS, 0>;
+def : ReadAdvance<R52Read_EX1, 1>;
+def : ReadAdvance<R52Read_EX2, 2>;
+def : ReadAdvance<R52Read_F0, 0>;
+def : ReadAdvance<R52Read_F1, 1>;
+def : ReadAdvance<R52Read_F2, 2>;
+
+
+// Cortex-R52 specific SchedWrites for use with InstRW
+def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
+def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
+  let Latency = 8; let ResourceCycles = [8]; // not pipelined
+}
+def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
+def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
+def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
+def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
+def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
+def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
+def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
+
+def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
+def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
+
+def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
+def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
+  let Latency = 4;
+}
+def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
+def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
+  let Latency = 5;
+}
+def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
+def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
+  let Latency = 6;
+}
+def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
+def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
+  let Latency = 6;
+}
+def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
+  let Latency = 11;     // as it is internally two insns (MUL then ADD)
+}
+def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
+                                         R52UnitFPALU, R52UnitFPALU]> {
+  let Latency = 11;
+}
+
+def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
+def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
+
+def R52WriteFPDIV_SP   : SchedWriteRes<[R52UnitFPDIV]> {
+  let Latency = 7;          // FP div takes fixed #cycles
+  let ResourceCycles = [7]; // is not pipelined
+ }
+def R52WriteFPDIV_DP   : SchedWriteRes<[R52UnitFPDIV]> {
+  let Latency = 17;
+  let ResourceCycles = [17];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Subtarget-specific - map operands to SchedReadWrites
+
+def : InstRW<[WriteALU], (instrs COPY)>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
+      (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
+      "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
+
+def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
+      (instregex "MOVCCi32imm", "MOVi32imm", "MOV_ga_dyn", "t2MOVCCi",
+      "t2MOVi", "t2MOV_ga_dyn")>;
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
+      (instregex "MOV_ga_pcrel", "t2MOV_ga_pcrel")>;
+def : InstRW<[R52WriteLd,R52Read_ISS],
+      (instregex "MOV_ga_pcrel_ldr", "t2MOV_ga_pcrel_ldr")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
+
+def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
+      (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
+      "(t|t2)UBFX", "(t|t2)SBFX")>;
+
+// Saturating arithmetic
+def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
+      (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
+      "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
+      "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
+      "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
+      "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
+      "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
+
+// Parallel arithmetic
+def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
+      (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
+      "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
+      "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
+      "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
+
+// Flag setting.
+def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
+      (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
+      "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
+      "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
+      "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
+      "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
+      "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
+
+// Sum of Absolute Difference
+def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
+      (instregex "USAD8", "t2USAD8", "tUSAD8","USADA8", "t2USADA8", "tUSADA8") >;
+
+// Integer Multiply
+def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
+      (instregex "MULS", "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
+      "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL",
+      "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
+      "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
+
+// Multiply Accumulate
+// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
+// The store pipeline is used partly for 64-bit operations.
+def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
+      (instregex "MLAS", "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
+      "t2MLA", "t2MLS", "t2MLAS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
+      "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
+      "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
+      "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
+      "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
+      "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
+      "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
+      "SMLALS", "UMLALS", "SMLAL", "UMLAL", "MLALBB", "SMLALBT",
+      "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
+      "UMAAL", "t2SMLALS", "t2UMLALS", "t2SMLAL", "t2UMLAL", "t2MLALBB",
+      "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
+      "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
+
+def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
+      (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>;
+
+// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
+// However, that's non-trivial to specify, so we keep it uniform
+def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
+      (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
+      "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
+      "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
+      "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
+      "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
+      "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
+def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
+      (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
+      "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
+      "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
+      "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
+      "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)", "t2LDRS(B|H)T",
+      "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
+def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri","ANDS?ri",
+      "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
+      "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
+      "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
+      "ANDS?rr", "BICS?rr", "CRC*", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
+      "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
+      "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
+      "t2AD(|D)S?rsi", "t2ANDS?rsi", "t2BICS?rsi", "t2EORrsi", "t2ORRrsi", "t2RSBrsi", "t2SBCrsi")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
+      (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
+      "ORRrsrr", "RSBrsr", "RSCrsr", "SBCrsr")>;
+
+def : InstRW<[R52WriteALU_EX1],
+    (instregex "ADR", "MOVSi", "MOVSsi", "MOVST?i16*", "MVNS?s?i", "t2MOVS?si")>;
+
+def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
+def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
+      (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
+
+def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
+def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
+def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
+def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
+
+def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
+      (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
+
+def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
+
+def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
+def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
+
+//def : InstRW<[R52WriteLd, R52Read_ISS], (instregex "^LDRB?(_PRE_IMM|_POST_IMM)", "LDRrs")>;
+//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_PRE_REG", "LDRB?rr")>;
+//def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS], (instregex "^LDRB?_POST_REG")>;
+
+//def : InstRW<[R52WriteST, R52Read_ISS], (instregex "STRi12", "PICSTR")>;
+//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_PRE_REG", "STRB?_PRE_REG")>;
+//def : InstRW<[R52WriteST, R52WriteAdr, R52Read_ISS, R52Read_EX2], (instregex "t2STRB?_POST_REG", "STRB?_POST_REG")>;
+
+
+// Integer Load, Multiple.
+foreach Lat = 3-25 in {
+  def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
+    let Latency = Lat;
+  }
+  def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
+    let Latency = Lat;
+    let NumMicroOps = 0;
+  }
+}
+foreach NAddr = 1-16 in {
+  def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
+}
+def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
+def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
+def R52WriteILDM : SchedWriteVariant<[
+    SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
+
+    SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy]>,
+    SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
+
+    SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy]>,
+    SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
+
+    SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy]>,
+    SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
+
+    SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy]>,
+    SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
+
+    SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
+                                 R52WriteILDM14Cy]>,
+    SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
+                                 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
+
+    SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
+                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
+                                 R52WriteILDM16Cy]>,
+    SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
+                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
+                                 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
+
+    SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
+                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
+                                 R52WriteILDM16Cy, R52WriteILDM17Cy,
+                                 R52WriteILDM18Cy]>,
+    SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
+                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
+                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
+                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
+                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
+                                 R52WriteILDM16Cy, R52WriteILDM17Cy,
+                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
+
+// Unknown number of registers, just use resources for two registers.
+    SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
+                                R52WriteILDM6CyNo, R52WriteILDM7CyNo,
+                                R52WriteILDM8CyNo, R52WriteILDM9CyNo,
+                                R52WriteILDM10CyNo, R52WriteILDM11CyNo,
+                                R52WriteILDM12CyNo, R52WriteILDM13CyNo,
+                                R52WriteILDM14CyNo, R52WriteILDM15CyNo,
+                                R52WriteILDM16CyNo, R52WriteILDM17CyNo,
+                                R52WriteILDM18Cy, R52WriteILDM19Cy]>
+]> { let Variadic=1; }
+
+// Integer Store, Multiple
+def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 4;
+  let NumMicroOps = 2;
+}
+foreach NumAddr = 1-16 in {
+  def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
+}
+def R52WriteISTM : SchedWriteVariant<[
+    SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
+    SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
+    SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
+    SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
+    SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
+    SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
+    SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
+    SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
+    SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
+    SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
+    SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
+    SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
+    SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
+    SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
+    SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
+    // Unknow number of registers, just use resources for two registers.
+    SchedVar<NoSchedPred,      [R52WriteISTM2]>
+]>;
+
+def : InstRW<[R52WriteILDM, R52Read_ISS],
+      (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
+      "(t|sys)LDM(IA|DA|DB|IB)$")>;
+def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
+      (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
+def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
+        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "POP", "tPOP")>;
+
+// Integer Store, Single Element
+def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
+      (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
+      "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
+      "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
+
+def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
+      (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
+      "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
+      "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
+      "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
+
+// Integer Store, Dual
+def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
+    (instregex "STRD$", "t2STRDi8", "STL", "t2STRD$", "t2STL")>;
+def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
+    (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
+
+def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
+    (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
+def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
+    (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
+    "PUSH", "tPUSH")>;
+
+// LDRLIT pseudo instructions, they expand to LDR + PICADD
+def : InstRW<[R52WriteLd],
+      (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel")>;
+// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
+def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
+
+
+
+//===----------------------------------------------------------------------===//
+// VFP, Floating Point Support
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
+
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
+
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
+
+def : InstRW<[R52WriteFPDIV_SP, R52Read_F0, R52Read_F0], (instregex "VDIV(S|H)")>;
+def : InstRW<[R52WriteFPDIV_DP, R52Read_F0, R52Read_F0], (instregex "VDIVD")>;
+
+def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1],
+                                          (instregex "(VFMA|VFMS|VFNMA|VFNMS)(D|H|S)")>;
+
+def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
+def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
+
+
+//===----------------------------------------------------------------------===//
+// Neon Support
+
+// vector multiple load stores
+foreach NumAddr = 1-16 in {
+  def R52LMAddrPred#NumAddr :
+    SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
+}
+foreach Lat = 1-32 in {
+  def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
+    let Latency = Lat;
+  }
+}
+foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
+  def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
+    let Latency = 0;
+    let NumMicroOps = Num;
+    let ResourceCycles = [Num];
+  }
+}
+def R52WriteVLDM : SchedWriteVariant<[
+  // 1 D reg
+  SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
+                              R52ReserveLd5Cy]>,
+  SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
+                              R52ReserveLd5Cy]>,
+
+  // 2 D reg
+  SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52ReserveLd6Cy]>,
+  SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52ReserveLd6Cy]>,
+
+  // 3 D reg
+  SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy,
+                              R52ReserveLd4Cy]>,
+  SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy,
+                              R52ReserveLd7Cy]>,
+
+  // 4 D reg
+  SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52ReserveLd8Cy]>,
+  SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52ReserveLd8Cy]>,
+
+  // 5 D reg
+  SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy,
+                              R52ReserveLd9Cy]>,
+  SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy,
+                              R52ReserveLd9Cy]>,
+
+  // 6 D reg
+  SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52ReserveLd10Cy]>,
+  SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52ReserveLd10Cy]>,
+
+  // 7 D reg
+  SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52WriteLM11Cy,
+                              R52ReserveLd11Cy]>,
+  SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52WriteLM11Cy,
+                              R52ReserveLd11Cy]>,
+
+  // 8 D reg
+  SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52WriteLM11Cy, R52WriteLM12Cy,
+                              R52ReserveLd12Cy]>,
+  SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52WriteLM11Cy, R52WriteLM12Cy,
+                              R52ReserveLd12Cy]>,
+  // unknown number of reg.
+  SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
+                              R52WriteLM7Cy, R52WriteLM8Cy,
+                              R52WriteLM9Cy, R52WriteLM10Cy,
+                              R52WriteLM11Cy, R52WriteLM12Cy,
+                              R52ReserveLd5Cy]>
+]> { let Variadic=1;}
+
+// variable stores. Cannot dual-issue
+def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 5;
+  let NumMicroOps = 2;
+  let ResourceCycles = [1];
+}
+def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 6;
+  let NumMicroOps = 4;
+  let ResourceCycles = [2];
+}
+def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 7;
+  let NumMicroOps = 6;
+  let ResourceCycles = [3];
+}
+def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 8;
+  let NumMicroOps = 8;
+  let ResourceCycles = [4];
+}
+def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 9;
+  let NumMicroOps = 10;
+  let ResourceCycles = [5];
+}
+def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 10;
+  let NumMicroOps = 12;
+  let ResourceCycles = [6];
+}
+def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 11;
+  let NumMicroOps = 14;
+  let ResourceCycles = [7];
+}
+def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 12;
+  let NumMicroOps = 16;
+  let ResourceCycles = [8];
+}
+def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 13;
+  let NumMicroOps = 18;
+  let ResourceCycles = [9];
+}
+def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 14;
+  let NumMicroOps = 20;
+  let ResourceCycles = [10];
+}
+def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 15;
+  let NumMicroOps = 22;
+  let ResourceCycles = [11];
+}
+
+def R52WriteSTM : SchedWriteVariant<[
+  SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
+  SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
+  SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
+  SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
+  SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
+  SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
+  SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
+  SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
+  SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
+  SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
+  SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
+  SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
+  SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
+  SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
+  SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
+  SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
+  // unknown number of registers, just use resources for two
+  SchedVar<NoSchedPred,      [R52WriteSTM6]>
+]>;
+
+// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
+// another instruction in slot-1, but only in the last issue.
+def R52WriteVLD1Mem  : SchedWriteRes<[R52UnitLd]> { let Latency = 5;}
+def R52WriteVLD2Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2];
+}
+def R52WriteVLD3Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 7;
+  let NumMicroOps = 5;
+  let ResourceCycles = [3];
+}
+def R52WriteVLD4Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 8;
+  let NumMicroOps = 7;
+  let ResourceCycles = [4];
+}
+def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 5;
+  let NumMicroOps = 1;
+  let ResourceCycles = [1];
+}
+def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 6;
+  let NumMicroOps = 3;
+  let ResourceCycles = [2];
+}
+def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 7;
+  let NumMicroOps = 5;
+  let ResourceCycles = [3];
+}
+def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 8;
+  let NumMicroOps = 7;
+  let ResourceCycles = [4];
+}
+def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
+  let Latency = 9;
+  let NumMicroOps = 9;
+  let ResourceCycles = [5];
+}
+
+
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
+
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
+
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
+                               (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
+                                (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
+                               (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
+
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
+                                            (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
+
+def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
+      (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
+      (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
+
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
+
+def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
+def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
+
+def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
+
+def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VMOV", "VORR", "VORN", "VREV")>;
+def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
+def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VMUL", "VNMUL", "VMLA")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
+def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
+                  (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
+def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
+                  (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
+def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
+def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
+def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
+                 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
+def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
+
+//---
+// VLDx. Vector Loads
+//---
+// 1-element structure load
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD1q(8|16|32|64)$")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)T$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Q$")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD1d64TPseudo$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD1d64QPseudo$")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)d(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1LNdAsm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1q(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Twb")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d(8|16|32|64)Qwb")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64TPseudoWB")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1d64QPseudoWB")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1DUP(d|q)(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD1(LN|DUP)q(8|16|32)Pseudo_UPD")>;
+
+// 2-element structure load
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2(d|b)(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD2q(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2q(8|16|32)PseudoWB")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2$")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo")>;
+def : InstRW<[R52WriteVLD1Mem, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNqWB_(fixed|register)_Asm_(16|32)")>;
+
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)wb")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2DUPd(8|16|32)x2wb")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNd(8|16|32)Pseudo_UPD")>;
+def : InstRW<[R52WriteVLD1Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD2LNq(16|32)Pseudo_UPD")>;
+
+// 3-element structure load
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD3Mem, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo")>;
+def : InstRW<[R52WriteVLD3Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD3(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+
+// 4-element structure load
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD4Mem, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD4Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVLD2Mem, R52Read_ISS], (instregex "VLD4DUPd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVLD2Mem, R52WriteAdr, R52Read_ISS], (instregex "VLD4(LN|DUP)(d|q)(8|16|32)Pseudo_UPD")>;
+
+//---
+// VSTx. Vector Stores
+//---
+// 1-element structure store
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
+def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
+def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
+
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
+def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
+def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
+
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
+
+// 2-element structure store
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
+
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
+def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
+
+// 3-element structure store
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
+
+// 4-element structure store
+def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
+def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
+def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
+
+def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
+def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
+
+} // R52 SchedModel
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
index 1d7eef9ddcfd..e2df0bddd0d1 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/TargetParser.h"
 
 using namespace llvm;
 
@@ -58,8 +59,7 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT),
               clEnumValN(RestrictedIT, "arm-restrict-it",
                          "Disallow deprecated IT based on ARMv8"),
               clEnumValN(NoRestrictedIT, "arm-no-restrict-it",
-                         "Allow IT blocks based on ARMv7"),
-              clEnumValEnd));
+                         "Allow IT blocks based on ARMv7")));
 
 /// ForceFastISel - Use the fast-isel, even for subtargets where it is not
 /// currently supported (for testing only).
@@ -76,6 +76,11 @@ ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU,
   return *this;
 }
 
+/// EnableExecuteOnly - Enables the generation of execute-only code on supported
+/// targets
+static cl::opt<bool>
+EnableExecuteOnly("arm-execute-only");
+
 ARMFrameLowering *ARMSubtarget::initializeFrameLowering(StringRef CPU,
                                                         StringRef FS) {
   ARMSubtarget &STI = initializeSubtargetDependencies(CPU, FS);
@@ -89,8 +94,9 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                            const std::string &FS,
                            const ARMBaseTargetMachine &TM, bool IsLittle)
     : ARMGenSubtargetInfo(TT, CPU, FS), UseMulOps(UseFusedMulOps),
-      CPUString(CPU), IsLittle(IsLittle), TargetTriple(TT), Options(TM.Options),
-      TM(TM), FrameLowering(initializeFrameLowering(CPU, FS)),
+      GenExecuteOnly(EnableExecuteOnly), CPUString(CPU), IsLittle(IsLittle),
+      TargetTriple(TT), Options(TM.Options), TM(TM),
+      FrameLowering(initializeFrameLowering(CPU, FS)),
       // At this point initializeSubtargetDependencies has been called so
       // we can query directly.
       InstrInfo(isThumb1Only()
@@ -98,7 +104,32 @@ ARMSubtarget::ARMSubtarget(const Triple &TT, const std::string &CPU,
                     : !isThumb()
                           ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this)
                           : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)),
-      TLInfo(TM, *this) {}
+      TLInfo(TM, *this), GISel() {}
+
+const CallLowering *ARMSubtarget::getCallLowering() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getCallLowering();
+}
+
+const InstructionSelector *ARMSubtarget::getInstructionSelector() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getInstructionSelector();
+}
+
+const LegalizerInfo *ARMSubtarget::getLegalizerInfo() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getLegalizerInfo();
+}
+
+const RegisterBankInfo *ARMSubtarget::getRegBankInfo() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getRegBankInfo();
+}
+
+bool ARMSubtarget::isXRaySupported() const {
+  // We don't currently suppport Thumb, but Windows requires Thumb.
+  return hasV6Ops() && hasARMOps() && !isTargetWindows();
+}
 
 void ARMSubtarget::initializeEnvironment() {
   // MCAsmInfo isn't always present (e.g. in opt) so we can't initialize this
@@ -117,10 +148,11 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
 
     if (isTargetDarwin()) {
       StringRef ArchName = TargetTriple.getArchName();
-      if (ArchName.endswith("v7s"))
+      unsigned ArchKind = llvm::ARM::parseArch(ArchName);
+      if (ArchKind == llvm::ARM::AK_ARMV7S)
         // Default to the Swift CPU when targeting armv7s/thumbv7s.
         CPUString = "swift";
-      else if (ArchName.endswith("v7k"))
+      else if (ArchKind == llvm::ARM::AK_ARMV7K)
         // Default to the Cortex-a7 CPU when targeting armv7k/thumbv7k.
         // ARMv7k does not use SjLj exception handling.
         CPUString = "cortex-a7";
@@ -143,6 +175,10 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   // Assert this for now to make the change obvious.
   assert(hasV6T2Ops() || !hasThumb2());
 
+  // Execute only support requires movt support
+  if (genExecuteOnly())
+    assert(hasV8MBaselineOps() && !NoMovt && "Cannot generate execute-only code for this target");
+
   // Keep a pointer to static instruction cost data for the specified CPU.
   SchedModel = getSchedModelForCPU(CPUString);
 
@@ -199,6 +235,9 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
       (Options.UnsafeFPMath || isTargetDarwin()))
     UseNEONForSinglePrecisionFP = true;
 
+  if (isRWPI())
+    ReserveR9 = true;
+
   // FIXME: Teach TableGen to deal with these instead of doing it manually here.
   switch (ARMProcFamily) {
   case Others:
@@ -234,6 +273,7 @@ void ARMSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   case CortexR7:
   case CortexM3:
   case ExynosM1:
+  case CortexR52:
     break;
   case Krait:
     PreISelOperandLatencyAdjustment = 1;
@@ -261,6 +301,15 @@ bool ARMSubtarget::isAAPCS16_ABI() const {
   return TM.TargetABI == ARMBaseTargetMachine::ARM_ABI_AAPCS16;
 }
 
+bool ARMSubtarget::isROPI() const {
+  return TM.getRelocationModel() == Reloc::ROPI ||
+         TM.getRelocationModel() == Reloc::ROPI_RWPI;
+}
+bool ARMSubtarget::isRWPI() const {
+  return TM.getRelocationModel() == Reloc::RWPI ||
+         TM.getRelocationModel() == Reloc::ROPI_RWPI;
+}
+
 bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     return true;
@@ -268,7 +317,7 @@ bool ARMSubtarget::isGVIndirectSymbol(const GlobalValue *GV) const {
   // 32 bit macho has no relocation for a-b if a is undefined, even if b is in
   // the section that is being relocated. This means we have to use o load even
   // for GVs that are known to be local to the dso.
-  if (isTargetDarwin() && TM.isPositionIndependent() &&
+  if (isTargetMachO() && TM.isPositionIndependent() &&
       (GV->isDeclarationForLinker() || GV->hasCommonLinkage()))
     return true;
 
@@ -300,9 +349,7 @@ bool ARMSubtarget::enablePostRAScheduler() const {
   return (!isThumb() || hasThumb2());
 }
 
-bool ARMSubtarget::enableAtomicExpand() const {
-  return hasAnyDataBarrier() && (!isThumb() || hasV8MBaselineOps());
-}
+bool ARMSubtarget::enableAtomicExpand() const { return hasAnyDataBarrier(); }
 
 bool ARMSubtarget::useStride4VFPs(const MachineFunction &MF) const {
   // For general targets, the prologue can grow when VFPs are allocated with
@@ -316,7 +363,7 @@ bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
   // immediates as it is inherently position independent, and may be out of
   // range otherwise.
   return !NoMovt && hasV8MBaselineOps() &&
-         (isTargetWindows() || !MF.getFunction()->optForMinSize());
+         (isTargetWindows() || !MF.getFunction()->optForMinSize() || genExecuteOnly());
 }
 
 bool ARMSubtarget::useFastISel() const {
diff --git a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
index 910de0e1e72d..8c8218d0f432 100644
--- a/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
+++ b/contrib/llvm/lib/Target/ARM/ARMSubtarget.h
@@ -25,6 +25,7 @@
 #include "Thumb1InstrInfo.h"
 #include "Thumb2InstrInfo.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -43,7 +44,7 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
 protected:
   enum ARMProcFamilyEnum {
     Others, CortexA5, CortexA7, CortexA8, CortexA9, CortexA12, CortexA15,
-    CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexM3,
+    CortexA17, CortexR4, CortexR4F, CortexR5, CortexR7, CortexR52, CortexM3,
     CortexA32, CortexA35, CortexA53, CortexA57, CortexA72, CortexA73,
     Krait, Swift, ExynosM1
   };
@@ -53,7 +54,8 @@ protected:
   enum ARMArchEnum {
     ARMv2, ARMv2a, ARMv3, ARMv3m, ARMv4, ARMv4t, ARMv5, ARMv5t, ARMv5te,
     ARMv5tej, ARMv6, ARMv6k, ARMv6kz, ARMv6t2, ARMv6m, ARMv6sm, ARMv7a, ARMv7r,
-    ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline
+    ARMv7m, ARMv7em, ARMv8a, ARMv81a, ARMv82a, ARMv8mMainline, ARMv8mBaseline,
+    ARMv8r
   };
 
 public:
@@ -234,6 +236,9 @@ protected:
   /// particularly effective at zeroing a VFP register.
   bool HasZeroCycleZeroing = false;
 
+  /// HasFPAO - if true, processor  does positive address offset computation faster
+  bool HasFPAO = false;
+
   /// If true, if conversion may decide to leave some instructions unpredicated.
   bool IsProfitableToUnpredicate = false;
 
@@ -296,6 +301,9 @@ protected:
   /// Generate calls via indirect call instructions.
   bool GenLongCalls = false;
 
+  /// Generate code that does not contain data access to code sections.
+  bool GenExecuteOnly = false;
+
   /// Target machine allowed unsafe FP math (such as use of NEON fp)
   bool UnsafeFPMath = false;
 
@@ -346,6 +354,9 @@ public:
   ARMSubtarget(const Triple &TT, const std::string &CPU, const std::string &FS,
                const ARMBaseTargetMachine &TM, bool IsLittle);
 
+  /// This object will take onwership of \p GISelAccessor.
+  void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); }
+
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
   /// that still makes it profitable to inline the call.
   unsigned getMaxInlineSizeThreshold() const {
@@ -375,6 +386,11 @@ public:
     return &InstrInfo->getRegisterInfo();
   }
 
+  const CallLowering *getCallLowering() const override;
+  const InstructionSelector *getInstructionSelector() const override;
+  const LegalizerInfo *getLegalizerInfo() const override;
+  const RegisterBankInfo *getRegBankInfo() const override;
+
 private:
   ARMSelectionDAGInfo TSInfo;
   // Either Thumb1FrameLowering or ARMFrameLowering.
@@ -383,6 +399,11 @@ private:
   std::unique_ptr<ARMBaseInstrInfo> InstrInfo;
   ARMTargetLowering   TLInfo;
 
+  /// Gather the accessor points to GlobalISel-related APIs.
+  /// This is used to avoid ifndefs spreading around while GISel is
+  /// an optional library.
+  std::unique_ptr<GISelAccessor> GISel;
+
   void initializeEnvironment();
   void initSubtargetFeatures(StringRef CPU, StringRef FS);
   ARMFrameLowering *initializeFrameLowering(StringRef CPU, StringRef FS);
@@ -452,6 +473,7 @@ public:
   bool hasTrustZone() const { return HasTrustZone; }
   bool has8MSecExt() const { return Has8MSecExt; }
   bool hasZeroCycleZeroing() const { return HasZeroCycleZeroing; }
+  bool hasFPAO() const { return HasFPAO; }
   bool isProfitableToUnpredicate() const { return IsProfitableToUnpredicate; }
   bool hasSlowVGETLNi32() const { return HasSlowVGETLNi32; }
   bool hasSlowVDUP32() const { return HasSlowVDUP32; }
@@ -475,6 +497,7 @@ public:
   bool useNaClTrap() const { return UseNaClTrap; }
   bool useSjLjEH() const { return UseSjLjEH; }
   bool genLongCalls() const { return GenLongCalls; }
+  bool genExecuteOnly() const { return GenExecuteOnly; }
 
   bool hasFP16() const { return HasFP16; }
   bool hasD16() const { return HasD16; }
@@ -540,10 +563,15 @@ public:
   }
   bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
 
+  virtual bool isXRaySupported() const override;
+
   bool isAPCS_ABI() const;
   bool isAAPCS_ABI() const;
   bool isAAPCS16_ABI() const;
 
+  bool isROPI() const;
+  bool isRWPI() const;
+
   bool useSoftFloat() const { return UseSoftFloat; }
   bool isThumb() const { return InThumbMode; }
   bool isThumb1Only() const { return InThumbMode && !HasThumb2; }
@@ -557,11 +585,17 @@ public:
     return isTargetMachO() ? (ReserveR9 || !HasV6Ops) : ReserveR9;
   }
 
+  bool useR7AsFramePointer() const {
+    return isTargetDarwin() || (!isTargetWindows() && isThumb());
+  }
   /// Returns true if the frame setup is split into two separate pushes (first
   /// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
-  /// to lr.
-  bool splitFramePushPop() const {
-    return isTargetMachO();
+  /// to lr. This is always required on Thumb1-only targets, as the push and
+  /// pop instructions can't access the high registers.
+  bool splitFramePushPop(const MachineFunction &MF) const {
+    return (useR7AsFramePointer() &&
+            MF.getTarget().Options.DisableFramePointerElim(MF)) ||
+           isThumb1Only();
   }
 
   bool useStride4VFPs(const MachineFunction &MF) const;
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index dc730a675bef..70c9567d99f8 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -10,11 +10,19 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "ARMTargetMachine.h"
 #include "ARM.h"
+#include "ARMCallLowering.h"
 #include "ARMFrameLowering.h"
-#include "ARMTargetMachine.h"
+#include "ARMInstructionSelector.h"
+#include "ARMLegalizerInfo.h"
+#include "ARMRegisterBankInfo.h"
 #include "ARMTargetObjectFile.h"
 #include "ARMTargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
+#include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Function.h"
@@ -22,6 +30,7 @@
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetParser.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Target/TargetOptions.h"
 #include "llvm/Transforms/Scalar.h"
@@ -50,12 +59,13 @@ EnableGlobalMerge("arm-global-merge", cl::Hidden,
 
 extern "C" void LLVMInitializeARMTarget() {
   // Register the target.
-  RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget);
-  RegisterTargetMachine<ARMBETargetMachine> Y(TheARMBETarget);
-  RegisterTargetMachine<ThumbLETargetMachine> A(TheThumbLETarget);
-  RegisterTargetMachine<ThumbBETargetMachine> B(TheThumbBETarget);
+  RegisterTargetMachine<ARMLETargetMachine> X(getTheARMLETarget());
+  RegisterTargetMachine<ARMBETargetMachine> Y(getTheARMBETarget());
+  RegisterTargetMachine<ThumbLETargetMachine> A(getTheThumbLETarget());
+  RegisterTargetMachine<ThumbBETargetMachine> B(getTheThumbBETarget());
 
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
+  initializeGlobalISel(Registry);
   initializeARMLoadStoreOptPass(Registry);
   initializeARMPreAllocLoadStoreOptPass(Registry);
 }
@@ -84,11 +94,13 @@ computeTargetABI(const Triple &TT, StringRef CPU,
   ARMBaseTargetMachine::ARMABI TargetABI =
       ARMBaseTargetMachine::ARM_ABI_UNKNOWN;
 
+  unsigned ArchKind = llvm::ARM::parseCPUArch(CPU);
+  StringRef ArchName = llvm::ARM::getArchName(ArchKind);
   // FIXME: This is duplicated code from the front end and should be unified.
   if (TT.isOSBinFormatMachO()) {
     if (TT.getEnvironment() == llvm::Triple::EABI ||
         (TT.getOS() == llvm::Triple::UnknownOS && TT.isOSBinFormatMachO()) ||
-        CPU.startswith("cortex-m")) {
+        llvm::ARM::parseArchProfile(ArchName) == llvm::ARM::PK_M) {
       TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS;
     } else if (TT.isWatchABI()) {
       TargetABI = ARMBaseTargetMachine::ARM_ABI_AAPCS16;
@@ -184,6 +196,10 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT,
     // Default relocation model on Darwin is PIC.
     return TT.isOSBinFormatMachO() ? Reloc::PIC_ : Reloc::Static;
 
+  if (*RM == Reloc::ROPI || *RM == Reloc::RWPI || *RM == Reloc::ROPI_RWPI)
+    assert(TT.isOSBinFormatELF() &&
+           "ROPI/RWPI currently only supported for ELF");
+
   // DynamicNoPIC is only used on darwin.
   if (*RM == Reloc::DynamicNoPIC && !TT.isOSDarwin())
     return Reloc::Static;
@@ -224,6 +240,29 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, const Triple &TT,
 
 ARMBaseTargetMachine::~ARMBaseTargetMachine() {}
 
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+struct ARMGISelActualAccessor : public GISelAccessor {
+  std::unique_ptr<CallLowering> CallLoweringInfo;
+  std::unique_ptr<InstructionSelector> InstSelector;
+  std::unique_ptr<LegalizerInfo> Legalizer;
+  std::unique_ptr<RegisterBankInfo> RegBankInfo;
+  const CallLowering *getCallLowering() const override {
+    return CallLoweringInfo.get();
+  }
+  const InstructionSelector *getInstructionSelector() const override {
+    return InstSelector.get();
+  }
+  const LegalizerInfo *getLegalizerInfo() const override {
+    return Legalizer.get();
+  }
+  const RegisterBankInfo *getRegBankInfo() const override {
+    return RegBankInfo.get();
+  }
+};
+} // End anonymous namespace.
+#endif
+
 const ARMSubtarget *
 ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -255,6 +294,24 @@ ARMBaseTargetMachine::getSubtargetImpl(const Function &F) const {
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = llvm::make_unique<ARMSubtarget>(TargetTriple, CPU, FS, *this, isLittle);
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+    GISelAccessor *GISel = new GISelAccessor();
+#else
+    ARMGISelActualAccessor *GISel = new ARMGISelActualAccessor();
+    GISel->CallLoweringInfo.reset(new ARMCallLowering(*I->getTargetLowering()));
+    GISel->Legalizer.reset(new ARMLegalizerInfo());
+
+    auto *RBI = new ARMRegisterBankInfo(*I->getRegisterInfo());
+
+    // FIXME: At this point, we can't rely on Subtarget having RBI.
+    // It's awkward to mix passing RBI and the Subtarget; should we pass
+    // TII/TRI as well?
+    GISel->InstSelector.reset(new ARMInstructionSelector(*I, *RBI));
+
+    GISel->RegBankInfo.reset(RBI);
+#endif
+    I->setGISelAccessor(*GISel);
   }
   return I.get();
 }
@@ -346,6 +403,12 @@ public:
   void addIRPasses() override;
   bool addPreISel() override;
   bool addInstSelector() override;
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+  bool addIRTranslator() override;
+  bool addLegalizeMachineIR() override;
+  bool addRegBankSelect() override;
+  bool addGlobalInstructionSelect() override;
+#endif
   void addPreRegAlloc() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
@@ -406,6 +469,28 @@ bool ARMPassConfig::addInstSelector() {
   return false;
 }
 
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+bool ARMPassConfig::addIRTranslator() {
+  addPass(new IRTranslator());
+  return false;
+}
+
+bool ARMPassConfig::addLegalizeMachineIR() {
+  addPass(new Legalizer());
+  return false;
+}
+
+bool ARMPassConfig::addRegBankSelect() {
+  addPass(new RegBankSelect());
+  return false;
+}
+
+bool ARMPassConfig::addGlobalInstructionSelect() {
+  addPass(new InstructionSelect());
+  return false;
+}
+#endif
+
 void ARMPassConfig::addPreRegAlloc() {
   if (getOptLevel() != CodeGenOpt::None) {
     addPass(createMLxExpansionPass());
@@ -436,8 +521,8 @@ void ARMPassConfig::addPreSched2() {
       return this->TM->getSubtarget<ARMSubtarget>(F).restrictIT();
     }));
 
-    addPass(createIfConverter([this](const Function &F) {
-      return !this->TM->getSubtarget<ARMSubtarget>(F).isThumb1Only();
+    addPass(createIfConverter([](const MachineFunction &MF) {
+      return !MF.getSubtarget<ARMSubtarget>().isThumb1Only();
     }));
   }
   addPass(createThumb2ITBlockPass());
@@ -447,8 +532,8 @@ void ARMPassConfig::addPreEmitPass() {
   addPass(createThumb2SizeReductionPass());
 
   // Constant island pass work on unbundled instructions.
-  addPass(createUnpackMachineBundles([this](const Function &F) {
-    return this->TM->getSubtarget<ARMSubtarget>(F).isThumb2();
+  addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
+    return MF.getSubtarget<ARMSubtarget>().isThumb2();
   }));
 
   // Don't optimize barriers at -O0.
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
index eaed5cc68750..625c4280e1a6 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.cpp
@@ -27,8 +27,10 @@ using namespace dwarf;
 
 void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
                                         const TargetMachine &TM) {
-  bool isAAPCS_ABI = static_cast<const ARMTargetMachine &>(TM).TargetABI ==
-                     ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
+  const ARMTargetMachine &ARM_TM = static_cast<const ARMTargetMachine &>(TM);
+  bool isAAPCS_ABI = ARM_TM.TargetABI == ARMTargetMachine::ARMABI::ARM_ABI_AAPCS;
+  genExecuteOnly = ARM_TM.getSubtargetImpl()->genExecuteOnly();
+
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
   InitializeELF(isAAPCS_ABI);
 
@@ -38,19 +40,28 @@ void ARMElfTargetObjectFile::Initialize(MCContext &Ctx,
 
   AttributesSection =
       getContext().getELFSection(".ARM.attributes", ELF::SHT_ARM_ATTRIBUTES, 0);
+
+  // Make code section unreadable when in execute-only mode
+  if (genExecuteOnly) {
+    unsigned  Type = ELF::SHT_PROGBITS;
+    unsigned Flags = ELF::SHF_EXECINSTR | ELF::SHF_ALLOC | ELF::SHF_ARM_PURECODE;
+    // Since we cannot modify flags for an existing section, we create a new
+    // section with the right flags, and use 0 as the unique ID for
+    // execute-only text
+    TextSection = Ctx.getELFSection(".text", Type, Flags, 0, "", 0U);
+  }
 }
 
 const MCExpr *ARMElfTargetObjectFile::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
   if (TM.getMCAsmInfo()->getExceptionHandlingType() != ExceptionHandling::ARM)
     return TargetLoweringObjectFileELF::getTTypeGlobalReference(
-        GV, Encoding, Mang, TM, MMI, Streamer);
+        GV, Encoding, TM, MMI, Streamer);
 
   assert(Encoding == DW_EH_PE_absptr && "Can handle absptr encoding only");
 
-  return MCSymbolRefExpr::create(TM.getSymbol(GV, Mang),
+  return MCSymbolRefExpr::create(TM.getSymbol(GV),
                                  MCSymbolRefExpr::VK_ARM_TARGET2, getContext());
 }
 
@@ -59,3 +70,23 @@ getDebugThreadLocalSymbol(const MCSymbol *Sym) const {
   return MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_ARM_TLSLDO,
                                  getContext());
 }
+
+MCSection *
+ARMElfTargetObjectFile::getExplicitSectionGlobal(const GlobalObject *GO,
+                                                 SectionKind SK, const TargetMachine &TM) const {
+  // Set execute-only access for the explicit section
+  if (genExecuteOnly && SK.isText())
+    SK = SectionKind::getExecuteOnly();
+
+  return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, SK, TM);
+}
+
+MCSection *
+ARMElfTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
+                                               SectionKind SK, const TargetMachine &TM) const {
+  // Place the global in the execute-only text section
+  if (genExecuteOnly && SK.isText())
+    SK = SectionKind::getExecuteOnly();
+
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, SK, TM);
+}
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
index b1db201cb30d..24e755ddac27 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetObjectFile.h
@@ -18,6 +18,7 @@ class MCContext;
 class TargetMachine;
 
 class ARMElfTargetObjectFile : public TargetLoweringObjectFileELF {
+  mutable bool genExecuteOnly = false;
 protected:
   const MCSection *AttributesSection;
 public:
@@ -28,14 +29,20 @@ public:
 
   void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
-  const MCExpr *
-  getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
-                          Mangler &Mang, const TargetMachine &TM,
-                          MachineModuleInfo *MMI,
-                          MCStreamer &Streamer) const override;
+  const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                        unsigned Encoding,
+                                        const TargetMachine &TM,
+                                        MachineModuleInfo *MMI,
+                                        MCStreamer &Streamer) const override;
 
   /// \brief Describe a TLS variable address within debug info.
   const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
+
+  MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
+                                      const TargetMachine &TM) const override;
+
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+                                    const TargetMachine &TM) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 13c5dc61acd9..10e6297ef1ed 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -41,7 +41,7 @@ int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
   // Thumb1.
   if (SImmVal >= 0 && SImmVal < 256)
     return 1;
-  if ((~ZImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
+  if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
     return 2;
   // Load from constantpool.
   return 3;
@@ -69,6 +69,25 @@ int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
       Idx == 1)
     return 0;
 
+  if (Opcode == Instruction::And)
+      // Conversion to BIC is free, and means we can use ~Imm instead.
+      return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
+
+  if (Opcode == Instruction::Add)
+    // Conversion to SUB is free, and means we can use -Imm instead.
+    return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
+
+  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
+      Ty->getIntegerBitWidth() == 32) {
+    int64_t NegImm = -Imm.getSExtValue();
+    if (ST->isThumb2() && NegImm < 1<<12)
+      // icmp X, #-C -> cmn X, #C
+      return 0;
+    if (ST->isThumb() && NegImm < 1<<8)
+      // icmp X, #-C -> adds X, #C
+      return 0;
+  }
+
   return getIntImmCost(Imm, Ty);
 }
 
diff --git a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index a0ca9e648002..d83228afb0ab 100644
--- a/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -45,13 +45,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  ARMTTIImpl(const ARMTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  ARMTTIImpl(ARMTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   bool enableInterleavedAccessVectorization() { return true; }
 
   /// Floating-point computation using ARMv8 AArch32 Advanced
@@ -128,6 +121,16 @@ public:
   int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
                                  ArrayRef<unsigned> Indices, unsigned Alignment,
                                  unsigned AddressSpace);
+
+  bool shouldBuildLookupTablesForConstant(Constant *C) const {
+    // In the ROPI and RWPI relocation models we can't have pointers to global
+    // variables or functions in constant data, so don't convert switches to
+    // lookup tables if any of the values would need relocation.
+    if (ST->isROPI() || ST->isRWPI())
+      return !C->needsRelocation();
+
+    return true;
+  }
   /// @}
 };
 
diff --git a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
index f5de8a3cd25e..c243a2d35979 100644
--- a/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
+++ b/contrib/llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/ARMEHABI.h"
 #include "llvm/Support/COFF.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/MathExtras.h"
@@ -52,6 +53,20 @@ using namespace llvm;
 
 namespace {
 
+enum class ImplicitItModeTy { Always, Never, ARMOnly, ThumbOnly };
+
+static cl::opt<ImplicitItModeTy> ImplicitItMode(
+    "arm-implicit-it", cl::init(ImplicitItModeTy::ARMOnly),
+    cl::desc("Allow conditional instructions outdside of an IT block"),
+    cl::values(clEnumValN(ImplicitItModeTy::Always, "always",
+                          "Accept in both ISAs, emit implicit ITs in Thumb"),
+               clEnumValN(ImplicitItModeTy::Never, "never",
+                          "Warn in ARM, reject in Thumb"),
+               clEnumValN(ImplicitItModeTy::ARMOnly, "arm",
+                          "Accept in ARM, reject in Thumb"),
+               clEnumValN(ImplicitItModeTy::ThumbOnly, "thumb",
+                          "Warn in ARM, emit implicit ITs in Thumb")));
+
 class ARMOperand;
 
 enum VectorLaneTy { NoLanes, AllLanes, IndexedLane };
@@ -145,6 +160,16 @@ class ARMAsmParser : public MCTargetAsmParser {
 
   bool NextSymbolIsThumb;
 
+  bool useImplicitITThumb() const {
+    return ImplicitItMode == ImplicitItModeTy::Always ||
+           ImplicitItMode == ImplicitItModeTy::ThumbOnly;
+  }
+
+  bool useImplicitITARM() const {
+    return ImplicitItMode == ImplicitItModeTy::Always ||
+           ImplicitItMode == ImplicitItModeTy::ARMOnly;
+  }
+
   struct {
     ARMCC::CondCodes Cond;    // Condition for IT block.
     unsigned Mask:4;          // Condition mask for instructions.
@@ -153,40 +178,174 @@ class ARMAsmParser : public MCTargetAsmParser {
                               //   '0'  inverse of condition (else).
                               // Count of instructions in IT block is
                               // 4 - trailingzeroes(mask)
-
-    bool FirstCond;           // Explicit flag for when we're parsing the
-                              // First instruction in the IT block. It's
-                              // implied in the mask, so needs special
-                              // handling.
+                              // Note that this does not have the same encoding
+                              // as in the IT instruction, which also depends
+                              // on the low bit of the condition code.
 
     unsigned CurPosition;     // Current position in parsing of IT
-                              // block. In range [0,3]. Initialized
-                              // according to count of instructions in block.
-                              // ~0U if no active IT block.
+                              // block. In range [0,4], with 0 being the IT
+                              // instruction itself. Initialized according to
+                              // count of instructions in block.  ~0U if no
+                              // active IT block.
+
+    bool IsExplicit;          // true  - The IT instruction was present in the
+                              //         input, we should not modify it.
+                              // false - The IT instruction was added
+                              //         implicitly, we can extend it if that
+                              //         would be legal.
   } ITState;
+
+  llvm::SmallVector<MCInst, 4> PendingConditionalInsts;
+
+  void flushPendingInstructions(MCStreamer &Out) override {
+    if (!inImplicitITBlock()) {
+      assert(PendingConditionalInsts.size() == 0);
+      return;
+    }
+
+    // Emit the IT instruction
+    unsigned Mask = getITMaskEncoding();
+    MCInst ITInst;
+    ITInst.setOpcode(ARM::t2IT);
+    ITInst.addOperand(MCOperand::createImm(ITState.Cond));
+    ITInst.addOperand(MCOperand::createImm(Mask));
+    Out.EmitInstruction(ITInst, getSTI());
+
+    // Emit the conditonal instructions
+    assert(PendingConditionalInsts.size() <= 4);
+    for (const MCInst &Inst : PendingConditionalInsts) {
+      Out.EmitInstruction(Inst, getSTI());
+    }
+    PendingConditionalInsts.clear();
+
+    // Clear the IT state
+    ITState.Mask = 0;
+    ITState.CurPosition = ~0U;
+  }
+
   bool inITBlock() { return ITState.CurPosition != ~0U; }
+  bool inExplicitITBlock() { return inITBlock() && ITState.IsExplicit; }
+  bool inImplicitITBlock() { return inITBlock() && !ITState.IsExplicit; }
   bool lastInITBlock() {
     return ITState.CurPosition == 4 - countTrailingZeros(ITState.Mask);
   }
   void forwardITPosition() {
     if (!inITBlock()) return;
     // Move to the next instruction in the IT block, if there is one. If not,
-    // mark the block as done.
+    // mark the block as done, except for implicit IT blocks, which we leave
+    // open until we find an instruction that can't be added to it.
     unsigned TZ = countTrailingZeros(ITState.Mask);
-    if (++ITState.CurPosition == 5 - TZ)
+    if (++ITState.CurPosition == 5 - TZ && ITState.IsExplicit)
       ITState.CurPosition = ~0U; // Done with the IT block after this.
   }
 
-  void Note(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges = None) {
-    return getParser().Note(L, Msg, Ranges);
+  // Rewind the state of the current IT block, removing the last slot from it.
+  void rewindImplicitITPosition() {
+    assert(inImplicitITBlock());
+    assert(ITState.CurPosition > 1);
+    ITState.CurPosition--;
+    unsigned TZ = countTrailingZeros(ITState.Mask);
+    unsigned NewMask = 0;
+    NewMask |= ITState.Mask & (0xC << TZ);
+    NewMask |= 0x2 << TZ;
+    ITState.Mask = NewMask;
+  }
+
+  // Rewind the state of the current IT block, removing the last slot from it.
+  // If we were at the first slot, this closes the IT block.
+  void discardImplicitITBlock() {
+    assert(inImplicitITBlock());
+    assert(ITState.CurPosition == 1);
+    ITState.CurPosition = ~0U;
+    return;
   }
-  bool Warning(SMLoc L, const Twine &Msg,
-               ArrayRef<SMRange> Ranges = None) {
-    return getParser().Warning(L, Msg, Ranges);
+
+  // Get the encoding of the IT mask, as it will appear in an IT instruction.
+  unsigned getITMaskEncoding() {
+    assert(inITBlock());
+    unsigned Mask = ITState.Mask;
+    unsigned TZ = countTrailingZeros(Mask);
+    if ((ITState.Cond & 1) == 0) {
+      assert(Mask && TZ <= 3 && "illegal IT mask value!");
+      Mask ^= (0xE << TZ) & 0xF;
+    }
+    return Mask;
   }
-  bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = None) {
-    return getParser().Error(L, Msg, Ranges);
+
+  // Get the condition code corresponding to the current IT block slot.
+  ARMCC::CondCodes currentITCond() {
+    unsigned MaskBit;
+    if (ITState.CurPosition == 1)
+      MaskBit = 1;
+    else
+      MaskBit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
+
+    return MaskBit ? ITState.Cond : ARMCC::getOppositeCondition(ITState.Cond);
+  }
+
+  // Invert the condition of the current IT block slot without changing any
+  // other slots in the same block.
+  void invertCurrentITCondition() {
+    if (ITState.CurPosition == 1) {
+      ITState.Cond = ARMCC::getOppositeCondition(ITState.Cond);
+    } else {
+      ITState.Mask ^= 1 << (5 - ITState.CurPosition);
+    }
+  }
+
+  // Returns true if the current IT block is full (all 4 slots used).
+  bool isITBlockFull() {
+    return inITBlock() && (ITState.Mask & 1);
+  }
+
+  // Extend the current implicit IT block to have one more slot with the given
+  // condition code.
+  void extendImplicitITBlock(ARMCC::CondCodes Cond) {
+    assert(inImplicitITBlock());
+    assert(!isITBlockFull());
+    assert(Cond == ITState.Cond ||
+           Cond == ARMCC::getOppositeCondition(ITState.Cond));
+    unsigned TZ = countTrailingZeros(ITState.Mask);
+    unsigned NewMask = 0;
+    // Keep any existing condition bits.
+    NewMask |= ITState.Mask & (0xE << TZ);
+    // Insert the new condition bit.
+    NewMask |= (Cond == ITState.Cond) << TZ;
+    // Move the trailing 1 down one bit.
+    NewMask |= 1 << (TZ - 1);
+    ITState.Mask = NewMask;
+  }
+
+  // Create a new implicit IT block with a dummy condition code.
+  void startImplicitITBlock() {
+    assert(!inITBlock());
+    ITState.Cond = ARMCC::AL;
+    ITState.Mask = 8;
+    ITState.CurPosition = 1;
+    ITState.IsExplicit = false;
+    return;
+  }
+
+  // Create a new explicit IT block with the given condition and mask. The mask
+  // should be in the parsed format, with a 1 implying 't', regardless of the
+  // low bit of the condition.
+  void startExplicitITBlock(ARMCC::CondCodes Cond, unsigned Mask) {
+    assert(!inITBlock());
+    ITState.Cond = Cond;
+    ITState.Mask = Mask;
+    ITState.CurPosition = 0;
+    ITState.IsExplicit = true;
+    return;
+  }
+
+  void Note(SMLoc L, const Twine &Msg, SMRange Range = None) {
+    return getParser().Note(L, Msg, Range);
+  }
+  bool Warning(SMLoc L, const Twine &Msg, SMRange Range = None) {
+    return getParser().Warning(L, Msg, Range);
+  }
+  bool Error(SMLoc L, const Twine &Msg, SMRange Range = None) {
+    return getParser().Error(L, Msg, Range);
   }
 
   bool validatetLDMRegList(const MCInst &Inst, const OperandVector &Operands,
@@ -355,6 +514,7 @@ class ARMAsmParser : public MCTargetAsmParser {
   bool processInstruction(MCInst &Inst, const OperandVector &Ops, MCStreamer &Out);
   bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands);
   bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands);
+  bool isITBlockTerminator(MCInst &Inst) const;
 
 public:
   enum ARMMatchResultTy {
@@ -363,6 +523,7 @@ public:
     Match_RequiresV6,
     Match_RequiresThumb2,
     Match_RequiresV8,
+    Match_RequiresFlagSetting,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "ARMGenAsmMatcher.inc"
 
@@ -399,6 +560,9 @@ public:
                                OperandVector &Operands, MCStreamer &Out,
                                uint64_t &ErrorInfo,
                                bool MatchingInlineAsm) override;
+  unsigned MatchInstruction(OperandVector &Operands, MCInst &Inst,
+                            uint64_t &ErrorInfo, bool MatchingInlineAsm,
+                            bool &EmitInITBlock, MCStreamer &Out);
   void onLabelParsed(MCSymbol *Symbol) override;
 };
 } // end anonymous namespace
@@ -3286,7 +3450,7 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) {
 }
 
 /// parseITCondCode - Try to parse a condition code for an IT instruction.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseITCondCode(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -3324,7 +3488,7 @@ ARMAsmParser::parseITCondCode(OperandVector &Operands) {
 /// parseCoprocNumOperand - Try to parse an coprocessor number operand. The
 /// token must be an Identifier when called, and if it is a coprocessor
 /// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -3347,7 +3511,7 @@ ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) {
 /// parseCoprocRegOperand - Try to parse an coprocessor register operand. The
 /// token must be an Identifier when called, and if it is a coprocessor
 /// number, the token is eaten and the operand is added to the operand list.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -3366,7 +3530,7 @@ ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) {
 
 /// parseCoprocOptionOperand - Try to parse an coprocessor option operand.
 /// coproc_option : '{' imm0_255 '}'
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -3447,8 +3611,8 @@ static unsigned getDRegFromQReg(unsigned QReg) {
 /// Parse a register list.
 bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
-  assert(Parser.getTok().is(AsmToken::LCurly) &&
-         "Token is not a Left Curly Brace");
+  if (Parser.getTok().isNot(AsmToken::LCurly))
+    return TokError("Token is not a Left Curly Brace");
   SMLoc S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat '{' token.
   SMLoc RegLoc = Parser.getTok().getLoc();
@@ -3576,7 +3740,7 @@ bool ARMAsmParser::parseRegisterList(OperandVector &Operands) {
 }
 
 // Helper function to parse the lane index for vector lists.
-ARMAsmParser::OperandMatchResultTy ARMAsmParser::
+OperandMatchResultTy ARMAsmParser::
 parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
   MCAsmParser &Parser = getParser();
   Index = 0; // Always return a defined index value.
@@ -3628,7 +3792,7 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) {
 }
 
 // parse a vector register list
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseVectorList(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   VectorLaneTy LaneKind;
@@ -3880,7 +4044,7 @@ ARMAsmParser::parseVectorList(OperandVector &Operands) {
 }
 
 /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -3952,7 +4116,7 @@ ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) {
 }
 
 /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -4004,7 +4168,7 @@ ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) {
 
 
 /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -4039,7 +4203,7 @@ ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) {
 }
 
 /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -4192,7 +4356,7 @@ ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) {
 
 /// parseBankedRegOperand - Try to parse a banked register (e.g. "lr_irq") for
 /// use in the MRS/MSR instructions added to support virtualization.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -4247,7 +4411,7 @@ ARMAsmParser::parseBankedRegOperand(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
                           int High) {
   MCAsmParser &Parser = getParser();
@@ -4296,7 +4460,7 @@ ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low,
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -4326,7 +4490,7 @@ ARMAsmParser::parseSetEndImm(OperandVector &Operands) {
 ///     lsl #n  'n' in [0,31]
 ///     asr #n  'n' in [1,32]
 ///             n == 32 encoded as n == 0.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseShifterImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -4397,7 +4561,7 @@ ARMAsmParser::parseShifterImm(OperandVector &Operands) {
 /// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family
 /// of instructions. Legal values are:
 ///     ror #n  'n' in {0, 8, 16, 24}
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseRotImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
@@ -4444,7 +4608,7 @@ ARMAsmParser::parseRotImm(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseModImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   MCAsmLexer &Lexer = getLexer();
@@ -4561,7 +4725,7 @@ ARMAsmParser::parseModImm(OperandVector &Operands) {
   }
 }
 
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseBitfield(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S = Parser.getTok().getLoc();
@@ -4630,7 +4794,7 @@ ARMAsmParser::parseBitfield(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
   // Check for a post-index addressing register operand. Specifically:
   // postidx_reg := '+' register {, shift}
@@ -4680,7 +4844,7 @@ ARMAsmParser::parsePostIdxReg(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseAM3Offset(OperandVector &Operands) {
   // Check for a post-index addressing register operand. Specifically:
   // am3offset := '+' register
@@ -4833,8 +4997,8 @@ void ARMAsmParser::cvtThumbBranches(MCInst &Inst,
 bool ARMAsmParser::parseMemory(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SMLoc S, E;
-  assert(Parser.getTok().is(AsmToken::LBrac) &&
-         "Token is not a Left Bracket");
+  if (Parser.getTok().isNot(AsmToken::LBrac))
+    return TokError("Token is not a Left Bracket");
   S = Parser.getTok().getLoc();
   Parser.Lex(); // Eat left bracket token.
 
@@ -5082,7 +5246,7 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St,
 }
 
 /// parseFPImm - A floating point immediate expression operand.
-ARMAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 ARMAsmParser::parseFPImm(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   // Anything that can accept a floating point constant as an operand
@@ -5131,7 +5295,7 @@ ARMAsmParser::parseFPImm(OperandVector &Operands) {
   const AsmToken &Tok = Parser.getTok();
   SMLoc Loc = Tok.getLoc();
   if (Tok.is(AsmToken::Real) && isVmovf) {
-    APFloat RealVal(APFloat::IEEEsingle, Tok.getString());
+    APFloat RealVal(APFloat::IEEEsingle(), Tok.getString());
     uint64_t IntVal = RealVal.bitcastToAPInt().getZExtValue();
     // If we had a '-' in front, toggle the sign bit.
     IntVal ^= (uint64_t)isNegative << 31;
@@ -5259,7 +5423,7 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
       return false;
     }
     // w/ a ':' after the '#', it's just like a plain ':'.
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   }
   case AsmToken::Colon: {
     S = Parser.getTok().getLoc();
@@ -5289,6 +5453,9 @@ bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     if (getParser().parseExpression(SubExprVal))
       return true;
     E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+    // execute-only: we assume that assembly programmers know what they are
+    // doing and allow literal pool creation here
     Operands.push_back(ARMOperand::CreateConstantPoolImm(SubExprVal, S, E));
     return false;
   }
@@ -5842,7 +6009,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // In Thumb1, only the branch (B) instruction can be predicated.
   if (isThumbOne() && PredicationCode != ARMCC::AL && Mnemonic != "b") {
-    Parser.eatToEndOfStatement();
     return Error(NameLoc, "conditional execution not supported in Thumb1");
   }
 
@@ -5856,14 +6022,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   if (Mnemonic == "it") {
     SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + 2);
     if (ITMask.size() > 3) {
-      Parser.eatToEndOfStatement();
       return Error(Loc, "too many conditions on IT instruction");
     }
     unsigned Mask = 8;
     for (unsigned i = ITMask.size(); i != 0; --i) {
       char pos = ITMask[i - 1];
       if (pos != 't' && pos != 'e') {
-        Parser.eatToEndOfStatement();
         return Error(Loc, "illegal IT block condition mask '" + ITMask + "'");
       }
       Mask >>= 1;
@@ -5889,14 +6053,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // If we had a carry-set on an instruction that can't do that, issue an
   // error.
   if (!CanAcceptCarrySet && CarrySetting) {
-    Parser.eatToEndOfStatement();
     return Error(NameLoc, "instruction '" + Mnemonic +
                  "' can not set flags, but 's' suffix specified");
   }
   // If we had a predication code on an instruction that can't do that, issue an
   // error.
   if (!CanAcceptPredicationCode && PredicationCode != ARMCC::AL) {
-    Parser.eatToEndOfStatement();
     return Error(NameLoc, "instruction '" + Mnemonic +
                  "' is not predicable, but condition code specified");
   }
@@ -5940,7 +6102,6 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     // For for ARM mode generate an error if the .n qualifier is used.
     if (ExtraToken == ".n" && !isThumb()) {
       SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start);
-      Parser.eatToEndOfStatement();
       return Error(Loc, "instruction with .n (narrow) qualifier not allowed in "
                    "arm mode");
     }
@@ -5958,28 +6119,19 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
     if (parseOperand(Operands, Mnemonic)) {
-      Parser.eatToEndOfStatement();
       return true;
     }
 
-    while (getLexer().is(AsmToken::Comma)) {
-      Parser.Lex();  // Eat the comma.
-
+    while (parseOptionalToken(AsmToken::Comma)) {
       // Parse and remember the operand.
       if (parseOperand(Operands, Mnemonic)) {
-        Parser.eatToEndOfStatement();
         return true;
       }
     }
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    SMLoc Loc = getLexer().getLoc();
-    Parser.eatToEndOfStatement();
-    return Error(Loc, "unexpected token in argument list");
-  }
-
-  Parser.Lex(); // Consume the EndOfStatement
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in argument list"))
+    return true;
 
   if (RequireVFPRegisterListCheck) {
     ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back());
@@ -6043,10 +6195,9 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
       // Rt2 must be Rt + 1 and Rt must be even.
       if (Rt + 1 != Rt2 || (Rt & 1)) {
-        Error(Op2.getStartLoc(), isLoad
-                                     ? "destination operands must be sequential"
-                                     : "source operands must be sequential");
-        return true;
+        return Error(Op2.getStartLoc(),
+                     isLoad ? "destination operands must be sequential"
+                            : "source operands must be sequential");
       }
       unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0,
           &(MRI->getRegClass(ARM::GPRPairRegClassID)));
@@ -6188,18 +6339,11 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
   // NOTE: BKPT and HLT instructions have the interesting property of being
   // allowed in IT blocks, but not being predicable. They just always execute.
   if (inITBlock() && !instIsBreakpoint(Inst)) {
-    unsigned Bit = 1;
-    if (ITState.FirstCond)
-      ITState.FirstCond = false;
-    else
-      Bit = (ITState.Mask >> (5 - ITState.CurPosition)) & 1;
     // The instruction must be predicable.
     if (!MCID.isPredicable())
       return Error(Loc, "instructions in IT block must be predicable");
     unsigned Cond = Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm();
-    unsigned ITCond = Bit ? ITState.Cond :
-      ARMCC::getOppositeCondition(ITState.Cond);
-    if (Cond != ITCond) {
+    if (Cond != currentITCond()) {
       // Find the condition code Operand to get its SMLoc information.
       SMLoc CondLoc;
       for (unsigned I = 1; I < Operands.size(); ++I)
@@ -6208,14 +6352,19 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
       return Error(CondLoc, "incorrect condition in IT block; got '" +
                    StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) +
                    "', but expected '" +
-                   ARMCondCodeToString(ARMCC::CondCodes(ITCond)) + "'");
+                   ARMCondCodeToString(ARMCC::CondCodes(currentITCond())) + "'");
     }
   // Check for non-'al' condition codes outside of the IT block.
   } else if (isThumbTwo() && MCID.isPredicable() &&
              Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
              ARMCC::AL && Inst.getOpcode() != ARM::tBcc &&
-             Inst.getOpcode() != ARM::t2Bcc)
+             Inst.getOpcode() != ARM::t2Bcc) {
     return Error(Loc, "predicated instructions must be in IT block");
+  } else if (!isThumb() && !useImplicitITARM() && MCID.isPredicable() &&
+             Inst.getOperand(MCID.findFirstPredOperandIdx()).getImm() !=
+                 ARMCC::AL) {
+    return Warning(Loc, "predicated instructions should be in IT block");
+  }
 
   const unsigned Opcode = Inst.getOpcode();
   switch (Opcode) {
@@ -6520,6 +6669,12 @@ bool ARMAsmParser::validateInstruction(MCInst &Inst,
       return Error(Operands[Op]->getStartLoc(), "branch target out of range");
     break;
   }
+  case ARM::tCBZ:
+  case ARM::tCBNZ: {
+    if (!static_cast<ARMOperand &>(*Operands[2]).isUnsignedOffset<6, 1>())
+      return Error(Operands[2]->getStartLoc(), "branch target out of range");
+    break;
+  }
   case ARM::MOVi16:
   case ARM::t2MOVi16:
   case ARM::t2MOVTi16:
@@ -8639,27 +8794,15 @@ bool ARMAsmParser::processInstruction(MCInst &Inst,
   }
   case ARM::ITasm:
   case ARM::t2IT: {
-    // The mask bits for all but the first condition are represented as
-    // the low bit of the condition code value implies 't'. We currently
-    // always have 1 implies 't', so XOR toggle the bits if the low bit
-    // of the condition code is zero. 
     MCOperand &MO = Inst.getOperand(1);
     unsigned Mask = MO.getImm();
-    unsigned OrigMask = Mask;
-    unsigned TZ = countTrailingZeros(Mask);
-    if ((Inst.getOperand(0).getImm() & 1) == 0) {
-      assert(Mask && TZ <= 3 && "illegal IT mask value!");
-      Mask ^= (0xE << TZ) & 0xF;
-    }
-    MO.setImm(Mask);
+    ARMCC::CondCodes Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
 
     // Set up the IT block state according to the IT instruction we just
     // matched.
     assert(!inITBlock() && "nested IT blocks?!");
-    ITState.Cond = ARMCC::CondCodes(Inst.getOperand(0).getImm());
-    ITState.Mask = OrigMask; // Use the original mask, not the updated one.
-    ITState.CurPosition = 0;
-    ITState.FirstCond = true;
+    startExplicitITBlock(Cond, Mask);
+    MO.setImm(getITMaskEncoding());
     break;
   }
   case ARM::t2LSLrr:
@@ -8766,7 +8909,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
       ;
     // If we're parsing Thumb1, reject it completely.
     if (isThumbOne() && Inst.getOperand(OpNo).getReg() != ARM::CPSR)
-      return Match_MnemonicFail;
+      return Match_RequiresFlagSetting;
     // If we're parsing Thumb2, which form is legal depends on whether we're
     // in an IT block.
     if (isThumbTwo() && Inst.getOperand(OpNo).getReg() != ARM::CPSR &&
@@ -8807,6 +8950,132 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) {
 }
 }
 
+// Returns true if Inst is unpredictable if it is in and IT block, but is not
+// the last instruction in the block.
+bool ARMAsmParser::isITBlockTerminator(MCInst &Inst) const {
+  const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+
+  // All branch & call instructions terminate IT blocks.
+  if (MCID.isTerminator() || MCID.isCall() || MCID.isReturn() ||
+      MCID.isBranch() || MCID.isIndirectBranch())
+    return true;
+
+  // Any arithmetic instruction which writes to the PC also terminates the IT
+  // block.
+  for (unsigned OpIdx = 0; OpIdx < MCID.getNumDefs(); ++OpIdx) {
+    MCOperand &Op = Inst.getOperand(OpIdx);
+    if (Op.isReg() && Op.getReg() == ARM::PC)
+      return true;
+  }
+
+  if (MCID.hasImplicitDefOfPhysReg(ARM::PC, MRI))
+    return true;
+
+  // Instructions with variable operand lists, which write to the variable
+  // operands. We only care about Thumb instructions here, as ARM instructions
+  // obviously can't be in an IT block.
+  switch (Inst.getOpcode()) {
+  case ARM::t2LDMIA:
+  case ARM::t2LDMIA_UPD:
+  case ARM::t2LDMDB:
+  case ARM::t2LDMDB_UPD:
+    if (listContainsReg(Inst, 3, ARM::PC))
+      return true;
+    break;
+  case ARM::tPOP:
+    if (listContainsReg(Inst, 2, ARM::PC))
+      return true;
+    break;
+  }
+
+  return false;
+}
+
+unsigned ARMAsmParser::MatchInstruction(OperandVector &Operands, MCInst &Inst,
+                                          uint64_t &ErrorInfo,
+                                          bool MatchingInlineAsm,
+                                          bool &EmitInITBlock,
+                                          MCStreamer &Out) {
+  // If we can't use an implicit IT block here, just match as normal.
+  if (inExplicitITBlock() || !isThumbTwo() || !useImplicitITThumb())
+    return MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+  // Try to match the instruction in an extension of the current IT block (if
+  // there is one).
+  if (inImplicitITBlock()) {
+    extendImplicitITBlock(ITState.Cond);
+    if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) ==
+            Match_Success) {
+      // The match succeded, but we still have to check that the instruction is
+      // valid in this implicit IT block.
+      const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+      if (MCID.isPredicable()) {
+        ARMCC::CondCodes InstCond =
+            (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx())
+                .getImm();
+        ARMCC::CondCodes ITCond = currentITCond();
+        if (InstCond == ITCond) {
+          EmitInITBlock = true;
+          return Match_Success;
+        } else if (InstCond == ARMCC::getOppositeCondition(ITCond)) {
+          invertCurrentITCondition();
+          EmitInITBlock = true;
+          return Match_Success;
+        }
+      }
+    }
+    rewindImplicitITPosition();
+  }
+
+  // Finish the current IT block, and try to match outside any IT block.
+  flushPendingInstructions(Out);
+  unsigned PlainMatchResult =
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+  if (PlainMatchResult == Match_Success) {
+    const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+    if (MCID.isPredicable()) {
+      ARMCC::CondCodes InstCond =
+          (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx())
+              .getImm();
+      // Some forms of the branch instruction have their own condition code
+      // fields, so can be conditionally executed without an IT block.
+      if (Inst.getOpcode() == ARM::tBcc || Inst.getOpcode() == ARM::t2Bcc) {
+        EmitInITBlock = false;
+        return Match_Success;
+      }
+      if (InstCond == ARMCC::AL) {
+        EmitInITBlock = false;
+        return Match_Success;
+      }
+    } else {
+      EmitInITBlock = false;
+      return Match_Success;
+    }
+  }
+
+  // Try to match in a new IT block. The matcher doesn't check the actual
+  // condition, so we create an IT block with a dummy condition, and fix it up
+  // once we know the actual condition.
+  startImplicitITBlock();
+  if (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm) ==
+      Match_Success) {
+    const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
+    if (MCID.isPredicable()) {
+      ITState.Cond =
+          (ARMCC::CondCodes)Inst.getOperand(MCID.findFirstPredOperandIdx())
+              .getImm();
+      EmitInITBlock = true;
+      return Match_Success;
+    }
+  }
+  discardImplicitITBlock();
+
+  // If none of these succeed, return the error we got when trying to match
+  // outside any IT blocks.
+  EmitInITBlock = false;
+  return PlainMatchResult;
+}
+
 static const char *getSubtargetFeatureName(uint64_t Val);
 bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                            OperandVector &Operands,
@@ -8814,9 +9083,11 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
                                            bool MatchingInlineAsm) {
   MCInst Inst;
   unsigned MatchResult;
+  bool PendConditionalInstruction = false;
+
+  MatchResult = MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
+                                 PendConditionalInstruction, Out);
 
-  MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo,
-                                     MatchingInlineAsm);
   switch (MatchResult) {
   case Match_Success:
     // Context sensitive operand constraints aren't handled by the matcher,
@@ -8856,7 +9127,13 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       return false;
 
     Inst.setLoc(IDLoc);
-    Out.EmitInstruction(Inst, getSTI());
+    if (PendConditionalInstruction) {
+      PendingConditionalInsts.push_back(Inst);
+      if (isITBlockFull() || isITBlockTerminator(Inst))
+        flushPendingInstructions(Out);
+    } else {
+      Out.EmitInstruction(Inst, getSTI());
+    }
     return false;
   case Match_MissingFeature: {
     assert(ErrorInfo && "Unknown missing feature!");
@@ -8898,6 +9175,8 @@ bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "instruction variant requires Thumb2");
   case Match_RequiresV8:
     return Error(IDLoc, "instruction variant requires ARMv8 or later");
+  case Match_RequiresFlagSetting:
+    return Error(IDLoc, "no flag-preserving variant of this instruction available");
   case Match_ImmRange0_15: {
     SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc();
     if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc;
@@ -8958,78 +9237,79 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
 
   StringRef IDVal = DirectiveID.getIdentifier();
   if (IDVal == ".word")
-    return parseLiteralValues(4, DirectiveID.getLoc());
+    parseLiteralValues(4, DirectiveID.getLoc());
   else if (IDVal == ".short" || IDVal == ".hword")
-    return parseLiteralValues(2, DirectiveID.getLoc());
+    parseLiteralValues(2, DirectiveID.getLoc());
   else if (IDVal == ".thumb")
-    return parseDirectiveThumb(DirectiveID.getLoc());
+    parseDirectiveThumb(DirectiveID.getLoc());
   else if (IDVal == ".arm")
-    return parseDirectiveARM(DirectiveID.getLoc());
+    parseDirectiveARM(DirectiveID.getLoc());
   else if (IDVal == ".thumb_func")
-    return parseDirectiveThumbFunc(DirectiveID.getLoc());
+    parseDirectiveThumbFunc(DirectiveID.getLoc());
   else if (IDVal == ".code")
-    return parseDirectiveCode(DirectiveID.getLoc());
+    parseDirectiveCode(DirectiveID.getLoc());
   else if (IDVal == ".syntax")
-    return parseDirectiveSyntax(DirectiveID.getLoc());
+    parseDirectiveSyntax(DirectiveID.getLoc());
   else if (IDVal == ".unreq")
-    return parseDirectiveUnreq(DirectiveID.getLoc());
+    parseDirectiveUnreq(DirectiveID.getLoc());
   else if (IDVal == ".fnend")
-    return parseDirectiveFnEnd(DirectiveID.getLoc());
+    parseDirectiveFnEnd(DirectiveID.getLoc());
   else if (IDVal == ".cantunwind")
-    return parseDirectiveCantUnwind(DirectiveID.getLoc());
+    parseDirectiveCantUnwind(DirectiveID.getLoc());
   else if (IDVal == ".personality")
-    return parseDirectivePersonality(DirectiveID.getLoc());
+    parseDirectivePersonality(DirectiveID.getLoc());
   else if (IDVal == ".handlerdata")
-    return parseDirectiveHandlerData(DirectiveID.getLoc());
+    parseDirectiveHandlerData(DirectiveID.getLoc());
   else if (IDVal == ".setfp")
-    return parseDirectiveSetFP(DirectiveID.getLoc());
+    parseDirectiveSetFP(DirectiveID.getLoc());
   else if (IDVal == ".pad")
-    return parseDirectivePad(DirectiveID.getLoc());
+    parseDirectivePad(DirectiveID.getLoc());
   else if (IDVal == ".save")
-    return parseDirectiveRegSave(DirectiveID.getLoc(), false);
+    parseDirectiveRegSave(DirectiveID.getLoc(), false);
   else if (IDVal == ".vsave")
-    return parseDirectiveRegSave(DirectiveID.getLoc(), true);
+    parseDirectiveRegSave(DirectiveID.getLoc(), true);
   else if (IDVal == ".ltorg" || IDVal == ".pool")
-    return parseDirectiveLtorg(DirectiveID.getLoc());
+    parseDirectiveLtorg(DirectiveID.getLoc());
   else if (IDVal == ".even")
-    return parseDirectiveEven(DirectiveID.getLoc());
+    parseDirectiveEven(DirectiveID.getLoc());
   else if (IDVal == ".personalityindex")
-    return parseDirectivePersonalityIndex(DirectiveID.getLoc());
+    parseDirectivePersonalityIndex(DirectiveID.getLoc());
   else if (IDVal == ".unwind_raw")
-    return parseDirectiveUnwindRaw(DirectiveID.getLoc());
+    parseDirectiveUnwindRaw(DirectiveID.getLoc());
   else if (IDVal == ".movsp")
-    return parseDirectiveMovSP(DirectiveID.getLoc());
+    parseDirectiveMovSP(DirectiveID.getLoc());
   else if (IDVal == ".arch_extension")
-    return parseDirectiveArchExtension(DirectiveID.getLoc());
+    parseDirectiveArchExtension(DirectiveID.getLoc());
   else if (IDVal == ".align")
-    return parseDirectiveAlign(DirectiveID.getLoc());
+    return parseDirectiveAlign(DirectiveID.getLoc()); // Use Generic on failure.
   else if (IDVal == ".thumb_set")
-    return parseDirectiveThumbSet(DirectiveID.getLoc());
-
-  if (!IsMachO && !IsCOFF) {
+    parseDirectiveThumbSet(DirectiveID.getLoc());
+  else if (!IsMachO && !IsCOFF) {
     if (IDVal == ".arch")
-      return parseDirectiveArch(DirectiveID.getLoc());
+      parseDirectiveArch(DirectiveID.getLoc());
     else if (IDVal == ".cpu")
-      return parseDirectiveCPU(DirectiveID.getLoc());
+      parseDirectiveCPU(DirectiveID.getLoc());
     else if (IDVal == ".eabi_attribute")
-      return parseDirectiveEabiAttr(DirectiveID.getLoc());
+      parseDirectiveEabiAttr(DirectiveID.getLoc());
     else if (IDVal == ".fpu")
-      return parseDirectiveFPU(DirectiveID.getLoc());
+      parseDirectiveFPU(DirectiveID.getLoc());
     else if (IDVal == ".fnstart")
-      return parseDirectiveFnStart(DirectiveID.getLoc());
+      parseDirectiveFnStart(DirectiveID.getLoc());
     else if (IDVal == ".inst")
-      return parseDirectiveInst(DirectiveID.getLoc());
+      parseDirectiveInst(DirectiveID.getLoc());
     else if (IDVal == ".inst.n")
-      return parseDirectiveInst(DirectiveID.getLoc(), 'n');
+      parseDirectiveInst(DirectiveID.getLoc(), 'n');
     else if (IDVal == ".inst.w")
-      return parseDirectiveInst(DirectiveID.getLoc(), 'w');
+      parseDirectiveInst(DirectiveID.getLoc(), 'w');
     else if (IDVal == ".object_arch")
-      return parseDirectiveObjectArch(DirectiveID.getLoc());
+      parseDirectiveObjectArch(DirectiveID.getLoc());
     else if (IDVal == ".tlsdescseq")
-      return parseDirectiveTLSDescSeq(DirectiveID.getLoc());
-  }
-
-  return true;
+      parseDirectiveTLSDescSeq(DirectiveID.getLoc());
+    else
+      return true;
+  } else
+    return true;
+  return false;
 }
 
 /// parseLiteralValues
@@ -9037,47 +9317,22 @@ bool ARMAsmParser::ParseDirective(AsmToken DirectiveID) {
 ///  ::= .short expression [, expression]*
 ///  ::= .word expression [, expression]*
 bool ARMAsmParser::parseLiteralValues(unsigned Size, SMLoc L) {
-  MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      const MCExpr *Value;
-      if (getParser().parseExpression(Value)) {
-        Parser.eatToEndOfStatement();
-        return false;
-      }
-
-      getParser().getStreamer().EmitValue(Value, Size, L);
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      // FIXME: Improve diagnostic.
-      if (getLexer().isNot(AsmToken::Comma)) {
-        Error(L, "unexpected token in directive");
-        return false;
-      }
-      Parser.Lex();
-    }
-  }
-
-  Parser.Lex();
-  return false;
+  auto parseOne = [&]() -> bool {
+    const MCExpr *Value;
+    if (getParser().parseExpression(Value))
+      return true;
+    getParser().getStreamer().EmitValue(Value, Size, L);
+    return false;
+  };
+  return (parseMany(parseOne));
 }
 
 /// parseDirectiveThumb
 ///  ::= .thumb
 bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
-  MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
-  Parser.Lex();
-
-  if (!hasThumb()) {
-    Error(L, "target does not support Thumb mode");
-    return false;
-  }
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
+      check(!hasThumb(), L, "target does not support Thumb mode"))
+    return true;
 
   if (!isThumb())
     SwitchMode();
@@ -9089,26 +9344,20 @@ bool ARMAsmParser::parseDirectiveThumb(SMLoc L) {
 /// parseDirectiveARM
 ///  ::= .arm
 bool ARMAsmParser::parseDirectiveARM(SMLoc L) {
-  MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
-  Parser.Lex();
-
-  if (!hasARM()) {
-    Error(L, "target does not support ARM mode");
-    return false;
-  }
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive") ||
+      check(!hasARM(), L, "target does not support ARM mode"))
+    return true;
 
   if (isThumb())
     SwitchMode();
-
   getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
   return false;
 }
 
 void ARMAsmParser::onLabelParsed(MCSymbol *Symbol) {
+  // We need to flush the current implicit IT block on a label, because it is
+  // not legal to branch into an IT block.
+  flushPendingInstructions(getStreamer());
   if (NextSymbolIsThumb) {
     getParser().getStreamer().EmitThumbFunc(Symbol);
     NextSymbolIsThumb = false;
@@ -9124,27 +9373,24 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) {
 
   // Darwin asm has (optionally) function name after .thumb_func direction
   // ELF doesn't
-  if (IsMachO) {
-    const AsmToken &Tok = Parser.getTok();
-    if (Tok.isNot(AsmToken::EndOfStatement)) {
-      if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) {
-        Error(L, "unexpected token in .thumb_func directive");
-        return false;
-      }
 
-      MCSymbol *Func =
-          getParser().getContext().getOrCreateSymbol(Tok.getIdentifier());
+  if (IsMachO) {
+    if (Parser.getTok().is(AsmToken::Identifier) ||
+        Parser.getTok().is(AsmToken::String)) {
+      MCSymbol *Func = getParser().getContext().getOrCreateSymbol(
+          Parser.getTok().getIdentifier());
       getParser().getStreamer().EmitThumbFunc(Func);
-      Parser.Lex(); // Consume the identifier token.
+      Parser.Lex();
+      if (parseToken(AsmToken::EndOfStatement,
+                     "unexpected token in '.thumb_func' directive"))
+        return true;
       return false;
     }
   }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(Parser.getTok().getLoc(), "unexpected token in directive");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.thumb_func' directive"))
+    return true;
 
   NextSymbolIsThumb = true;
   return false;
@@ -9161,21 +9407,13 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
   }
 
   StringRef Mode = Tok.getString();
-  if (Mode == "unified" || Mode == "UNIFIED") {
-    Parser.Lex();
-  } else if (Mode == "divided" || Mode == "DIVIDED") {
-    Error(L, "'.syntax divided' arm asssembly not supported");
-    return false;
-  } else {
-    Error(L, "unrecognized syntax mode in .syntax directive");
-    return false;
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(Parser.getTok().getLoc(), "unexpected token in directive");
-    return false;
-  }
   Parser.Lex();
+  if (check(Mode == "divided" || Mode == "DIVIDED", L,
+            "'.syntax divided' arm assembly not supported") ||
+      check(Mode != "unified" && Mode != "UNIFIED", L,
+            "unrecognized syntax mode in .syntax directive") ||
+      parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
 
   // TODO tell the MC streamer the mode
   // getParser().getStreamer().Emit???();
@@ -9187,10 +9425,8 @@ bool ARMAsmParser::parseDirectiveSyntax(SMLoc L) {
 bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
-  if (Tok.isNot(AsmToken::Integer)) {
-    Error(L, "unexpected token in .code directive");
-    return false;
-  }
+  if (Tok.isNot(AsmToken::Integer))
+    return Error(L, "unexpected token in .code directive");
   int64_t Val = Parser.getTok().getIntVal();
   if (Val != 16 && Val != 32) {
     Error(L, "invalid operand to .code directive");
@@ -9198,26 +9434,19 @@ bool ARMAsmParser::parseDirectiveCode(SMLoc L) {
   }
   Parser.Lex();
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(Parser.getTok().getLoc(), "unexpected token in directive");
-    return false;
-  }
-  Parser.Lex();
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
 
   if (Val == 16) {
-    if (!hasThumb()) {
-      Error(L, "target does not support Thumb mode");
-      return false;
-    }
+    if (!hasThumb())
+      return Error(L, "target does not support Thumb mode");
 
     if (!isThumb())
       SwitchMode();
     getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
   } else {
-    if (!hasARM()) {
-      Error(L, "target does not support ARM mode");
-      return false;
-    }
+    if (!hasARM())
+      return Error(L, "target does not support ARM mode");
 
     if (isThumb())
       SwitchMode();
@@ -9234,25 +9463,15 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
   Parser.Lex(); // Eat the '.req' token.
   unsigned Reg;
   SMLoc SRegLoc, ERegLoc;
-  if (ParseRegister(Reg, SRegLoc, ERegLoc)) {
-    Parser.eatToEndOfStatement();
-    Error(SRegLoc, "register name expected");
-    return false;
-  }
-
-  // Shouldn't be anything else.
-  if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
-    Parser.eatToEndOfStatement();
-    Error(Parser.getTok().getLoc(), "unexpected input in .req directive.");
-    return false;
-  }
-
-  Parser.Lex(); // Consume the EndOfStatement
+  if (check(ParseRegister(Reg, SRegLoc, ERegLoc), SRegLoc,
+            "register name expected") ||
+      parseToken(AsmToken::EndOfStatement,
+                 "unexpected input in .req directive."))
+    return true;
 
-  if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg) {
-    Error(SRegLoc, "redefinition of '" + Name + "' does not match original.");
-    return false;
-  }
+  if (RegisterReqs.insert(std::make_pair(Name, Reg)).first->second != Reg)
+    return Error(SRegLoc,
+                 "redefinition of '" + Name + "' does not match original.");
 
   return false;
 }
@@ -9261,13 +9480,13 @@ bool ARMAsmParser::parseDirectiveReq(StringRef Name, SMLoc L) {
 ///  ::= .unreq registername
 bool ARMAsmParser::parseDirectiveUnreq(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (Parser.getTok().isNot(AsmToken::Identifier)) {
-    Parser.eatToEndOfStatement();
-    Error(L, "unexpected input in .unreq directive.");
-    return false;
-  }
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return Error(L, "unexpected input in .unreq directive.");
   RegisterReqs.erase(Parser.getTok().getIdentifier().lower());
   Parser.Lex(); // Eat the identifier.
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected input in '.unreq' directive"))
+    return true;
   return false;
 }
 
@@ -9300,13 +9519,10 @@ void ARMAsmParser::FixModeAfterArchChange(bool WasThumb, SMLoc Loc) {
 ///  ::= .arch token
 bool ARMAsmParser::parseDirectiveArch(SMLoc L) {
   StringRef Arch = getParser().parseStringToEndOfStatement().trim();
-
   unsigned ID = ARM::parseArch(Arch);
 
-  if (ID == ARM::AK_INVALID) {
-    Error(L, "Unknown arch name");
-    return false;
-  }
+  if (ID == ARM::AK_INVALID)
+    return Error(L, "Unknown arch name");
 
   bool WasThumb = isThumb();
   Triple T;
@@ -9332,7 +9548,6 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
     Tag = ARMBuildAttrs::AttrTypeFromString(Name);
     if (Tag == -1) {
       Error(TagLoc, "attribute name not recognised: " + Name);
-      Parser.eatToEndOfStatement();
       return false;
     }
     Parser.Lex();
@@ -9340,27 +9555,18 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
     const MCExpr *AttrExpr;
 
     TagLoc = Parser.getTok().getLoc();
-    if (Parser.parseExpression(AttrExpr)) {
-      Parser.eatToEndOfStatement();
-      return false;
-    }
+    if (Parser.parseExpression(AttrExpr))
+      return true;
 
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(AttrExpr);
-    if (!CE) {
-      Error(TagLoc, "expected numeric constant");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
+    if (check(!CE, TagLoc, "expected numeric constant"))
+      return true;
 
     Tag = CE->getValue();
   }
 
-  if (Parser.getTok().isNot(AsmToken::Comma)) {
-    Error(Parser.getTok().getLoc(), "comma expected");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-  Parser.Lex(); // skip comma
+  if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+    return true;
 
   StringRef StringValue = "";
   bool IsStringValue = false;
@@ -9383,44 +9589,32 @@ bool ARMAsmParser::parseDirectiveEabiAttr(SMLoc L) {
   if (IsIntegerValue) {
     const MCExpr *ValueExpr;
     SMLoc ValueExprLoc = Parser.getTok().getLoc();
-    if (Parser.parseExpression(ValueExpr)) {
-      Parser.eatToEndOfStatement();
-      return false;
-    }
+    if (Parser.parseExpression(ValueExpr))
+      return true;
 
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(ValueExpr);
-    if (!CE) {
-      Error(ValueExprLoc, "expected numeric constant");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
-
+    if (!CE)
+      return Error(ValueExprLoc, "expected numeric constant");
     IntegerValue = CE->getValue();
   }
 
   if (Tag == ARMBuildAttrs::compatibility) {
-    if (Parser.getTok().isNot(AsmToken::Comma))
-      IsStringValue = false;
-    if (Parser.getTok().isNot(AsmToken::Comma)) {
-      Error(Parser.getTok().getLoc(), "comma expected");
-      Parser.eatToEndOfStatement();
-      return false;
-    } else {
-       Parser.Lex();
-    }
+    if (Parser.parseToken(AsmToken::Comma, "comma expected"))
+      return true;
   }
 
   if (IsStringValue) {
-    if (Parser.getTok().isNot(AsmToken::String)) {
-      Error(Parser.getTok().getLoc(), "bad string constant");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
+    if (Parser.getTok().isNot(AsmToken::String))
+      return Error(Parser.getTok().getLoc(), "bad string constant");
 
     StringValue = Parser.getTok().getStringContents();
     Parser.Lex();
   }
 
+  if (Parser.parseToken(AsmToken::EndOfStatement,
+                        "unexpected token in '.eabi_attribute' directive"))
+    return true;
+
   if (IsIntegerValue && IsStringValue) {
     assert(Tag == ARMBuildAttrs::compatibility);
     getTargetStreamer().emitIntTextAttribute(Tag, IntegerValue, StringValue);
@@ -9439,10 +9633,8 @@ bool ARMAsmParser::parseDirectiveCPU(SMLoc L) {
 
   // FIXME: This is using table-gen data, but should be moved to
   // ARMTargetParser once that is table-gen'd.
-  if (!getSTI().isCPUStringValid(CPU)) {
-    Error(L, "Unknown CPU name");
-    return false;
-  }
+  if (!getSTI().isCPUStringValid(CPU))
+    return Error(L, "Unknown CPU name");
 
   bool WasThumb = isThumb();
   MCSubtargetInfo &STI = copySTI();
@@ -9459,11 +9651,9 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
   StringRef FPU = getParser().parseStringToEndOfStatement().trim();
 
   unsigned ID = ARM::parseFPU(FPU);
-  std::vector<const char *> Features;
-  if (!ARM::getFPUFeatures(ID, Features)) {
-    Error(FPUNameLoc, "Unknown FPU name");
-    return false;
-  }
+  std::vector<StringRef> Features;
+  if (!ARM::getFPUFeatures(ID, Features))
+    return Error(FPUNameLoc, "Unknown FPU name");
 
   MCSubtargetInfo &STI = copySTI();
   for (auto Feature : Features)
@@ -9477,10 +9667,14 @@ bool ARMAsmParser::parseDirectiveFPU(SMLoc L) {
 /// parseDirectiveFnStart
 ///  ::= .fnstart
 bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.fnstart' directive"))
+    return true;
+
   if (UC.hasFnStart()) {
     Error(L, ".fnstart starts before the end of previous one");
     UC.emitFnStartLocNotes();
-    return false;
+    return true;
   }
 
   // Reset the unwind directives parser state
@@ -9495,11 +9689,12 @@ bool ARMAsmParser::parseDirectiveFnStart(SMLoc L) {
 /// parseDirectiveFnEnd
 ///  ::= .fnend
 bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.fnend' directive"))
+    return true;
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .fnend directive");
-    return false;
-  }
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .fnend directive");
 
   // Reset the unwind directives parser state
   getTargetStreamer().emitFnEnd();
@@ -9511,22 +9706,24 @@ bool ARMAsmParser::parseDirectiveFnEnd(SMLoc L) {
 /// parseDirectiveCantUnwind
 ///  ::= .cantunwind
 bool ARMAsmParser::parseDirectiveCantUnwind(SMLoc L) {
-  UC.recordCantUnwind(L);
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.cantunwind' directive"))
+    return true;
 
+  UC.recordCantUnwind(L);
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .cantunwind directive");
-    return false;
-  }
+  if (check(!UC.hasFnStart(), L, ".fnstart must precede .cantunwind directive"))
+    return true;
+
   if (UC.hasHandlerData()) {
     Error(L, ".cantunwind can't be used with .handlerdata directive");
     UC.emitHandlerDataLocNotes();
-    return false;
+    return true;
   }
   if (UC.hasPersonality()) {
     Error(L, ".cantunwind can't be used with .personality directive");
     UC.emitPersonalityLocNotes();
-    return false;
+    return true;
   }
 
   getTargetStreamer().emitCantUnwind();
@@ -9539,38 +9736,36 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
   MCAsmParser &Parser = getParser();
   bool HasExistingPersonality = UC.hasPersonality();
 
+  // Parse the name of the personality routine
+  if (Parser.getTok().isNot(AsmToken::Identifier))
+    return Error(L, "unexpected input in .personality directive.");
+  StringRef Name(Parser.getTok().getIdentifier());
+  Parser.Lex();
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.personality' directive"))
+    return true;
+
   UC.recordPersonality(L);
 
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .personality directive");
-    return false;
-  }
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .personality directive");
   if (UC.cantUnwind()) {
     Error(L, ".personality can't be used with .cantunwind directive");
     UC.emitCantUnwindLocNotes();
-    return false;
+    return true;
   }
   if (UC.hasHandlerData()) {
     Error(L, ".personality must precede .handlerdata directive");
     UC.emitHandlerDataLocNotes();
-    return false;
+    return true;
   }
   if (HasExistingPersonality) {
-    Parser.eatToEndOfStatement();
     Error(L, "multiple personality directives");
     UC.emitPersonalityLocNotes();
-    return false;
-  }
-
-  // Parse the name of the personality routine
-  if (Parser.getTok().isNot(AsmToken::Identifier)) {
-    Parser.eatToEndOfStatement();
-    Error(L, "unexpected input in .personality directive.");
-    return false;
+    return true;
   }
-  StringRef Name(Parser.getTok().getIdentifier());
-  Parser.Lex();
 
   MCSymbol *PR = getParser().getContext().getOrCreateSymbol(Name);
   getTargetStreamer().emitPersonality(PR);
@@ -9580,17 +9775,18 @@ bool ARMAsmParser::parseDirectivePersonality(SMLoc L) {
 /// parseDirectiveHandlerData
 ///  ::= .handlerdata
 bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
-  UC.recordHandlerData(L);
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.handlerdata' directive"))
+    return true;
 
+  UC.recordHandlerData(L);
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .personality directive");
-    return false;
-  }
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .personality directive");
   if (UC.cantUnwind()) {
     Error(L, ".handlerdata can't be used with .cantunwind directive");
     UC.emitCantUnwindLocNotes();
-    return false;
+    return true;
   }
 
   getTargetStreamer().emitHandlerData();
@@ -9602,74 +9798,52 @@ bool ARMAsmParser::parseDirectiveHandlerData(SMLoc L) {
 bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
   MCAsmParser &Parser = getParser();
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .setfp directive");
-    return false;
-  }
-  if (UC.hasHandlerData()) {
-    Error(L, ".setfp must precede .handlerdata directive");
-    return false;
-  }
+  if (check(!UC.hasFnStart(), L, ".fnstart must precede .setfp directive") ||
+      check(UC.hasHandlerData(), L,
+            ".setfp must precede .handlerdata directive"))
+    return true;
 
   // Parse fpreg
   SMLoc FPRegLoc = Parser.getTok().getLoc();
   int FPReg = tryParseRegister();
-  if (FPReg == -1) {
-    Error(FPRegLoc, "frame pointer register expected");
-    return false;
-  }
 
-  // Consume comma
-  if (Parser.getTok().isNot(AsmToken::Comma)) {
-    Error(Parser.getTok().getLoc(), "comma expected");
-    return false;
-  }
-  Parser.Lex(); // skip comma
+  if (check(FPReg == -1, FPRegLoc, "frame pointer register expected") ||
+      Parser.parseToken(AsmToken::Comma, "comma expected"))
+    return true;
 
   // Parse spreg
   SMLoc SPRegLoc = Parser.getTok().getLoc();
   int SPReg = tryParseRegister();
-  if (SPReg == -1) {
-    Error(SPRegLoc, "stack pointer register expected");
-    return false;
-  }
-
-  if (SPReg != ARM::SP && SPReg != UC.getFPReg()) {
-    Error(SPRegLoc, "register should be either $sp or the latest fp register");
-    return false;
-  }
+  if (check(SPReg == -1, SPRegLoc, "stack pointer register expected") ||
+      check(SPReg != ARM::SP && SPReg != UC.getFPReg(), SPRegLoc,
+            "register should be either $sp or the latest fp register"))
+    return true;
 
   // Update the frame pointer register
   UC.saveFPReg(FPReg);
 
   // Parse offset
   int64_t Offset = 0;
-  if (Parser.getTok().is(AsmToken::Comma)) {
-    Parser.Lex(); // skip comma
-
+  if (Parser.parseOptionalToken(AsmToken::Comma)) {
     if (Parser.getTok().isNot(AsmToken::Hash) &&
-        Parser.getTok().isNot(AsmToken::Dollar)) {
-      Error(Parser.getTok().getLoc(), "'#' expected");
-      return false;
-    }
+        Parser.getTok().isNot(AsmToken::Dollar))
+      return Error(Parser.getTok().getLoc(), "'#' expected");
     Parser.Lex(); // skip hash token.
 
     const MCExpr *OffsetExpr;
     SMLoc ExLoc = Parser.getTok().getLoc();
     SMLoc EndLoc;
-    if (getParser().parseExpression(OffsetExpr, EndLoc)) {
-      Error(ExLoc, "malformed setfp offset");
-      return false;
-    }
+    if (getParser().parseExpression(OffsetExpr, EndLoc))
+      return Error(ExLoc, "malformed setfp offset");
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
-    if (!CE) {
-      Error(ExLoc, "setfp offset must be an immediate");
-      return false;
-    }
-
+    if (check(!CE, ExLoc, "setfp offset must be an immediate"))
+      return true;
     Offset = CE->getValue();
   }
 
+  if (Parser.parseToken(AsmToken::EndOfStatement))
+    return true;
+
   getTargetStreamer().emitSetFP(static_cast<unsigned>(FPReg),
                                 static_cast<unsigned>(SPReg), Offset);
   return false;
@@ -9680,35 +9854,29 @@ bool ARMAsmParser::parseDirectiveSetFP(SMLoc L) {
 bool ARMAsmParser::parseDirectivePad(SMLoc L) {
   MCAsmParser &Parser = getParser();
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .pad directive");
-    return false;
-  }
-  if (UC.hasHandlerData()) {
-    Error(L, ".pad must precede .handlerdata directive");
-    return false;
-  }
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .pad directive");
+  if (UC.hasHandlerData())
+    return Error(L, ".pad must precede .handlerdata directive");
 
   // Parse the offset
   if (Parser.getTok().isNot(AsmToken::Hash) &&
-      Parser.getTok().isNot(AsmToken::Dollar)) {
-    Error(Parser.getTok().getLoc(), "'#' expected");
-    return false;
-  }
+      Parser.getTok().isNot(AsmToken::Dollar))
+    return Error(Parser.getTok().getLoc(), "'#' expected");
   Parser.Lex(); // skip hash token.
 
   const MCExpr *OffsetExpr;
   SMLoc ExLoc = Parser.getTok().getLoc();
   SMLoc EndLoc;
-  if (getParser().parseExpression(OffsetExpr, EndLoc)) {
-    Error(ExLoc, "malformed pad offset");
-    return false;
-  }
+  if (getParser().parseExpression(OffsetExpr, EndLoc))
+    return Error(ExLoc, "malformed pad offset");
   const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
-  if (!CE) {
-    Error(ExLoc, "pad offset must be an immediate");
-    return false;
-  }
+  if (!CE)
+    return Error(ExLoc, "pad offset must be an immediate");
+
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.pad' directive"))
+    return true;
 
   getTargetStreamer().emitPad(CE->getValue());
   return false;
@@ -9719,30 +9887,23 @@ bool ARMAsmParser::parseDirectivePad(SMLoc L) {
 ///  ::= .vsave { registers }
 bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
   // Check the ordering of unwind directives
-  if (!UC.hasFnStart()) {
-    Error(L, ".fnstart must precede .save or .vsave directives");
-    return false;
-  }
-  if (UC.hasHandlerData()) {
-    Error(L, ".save or .vsave must precede .handlerdata directive");
-    return false;
-  }
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .save or .vsave directives");
+  if (UC.hasHandlerData())
+    return Error(L, ".save or .vsave must precede .handlerdata directive");
 
   // RAII object to make sure parsed operands are deleted.
   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
 
   // Parse the register list
-  if (parseRegisterList(Operands))
-    return false;
+  if (parseRegisterList(Operands) ||
+      parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
   ARMOperand &Op = (ARMOperand &)*Operands[0];
-  if (!IsVector && !Op.isRegList()) {
-    Error(L, ".save expects GPR registers");
-    return false;
-  }
-  if (IsVector && !Op.isDPRRegList()) {
-    Error(L, ".vsave expects DPR registers");
-    return false;
-  }
+  if (!IsVector && !Op.isRegList())
+    return Error(L, ".save expects GPR registers");
+  if (IsVector && !Op.isDPRRegList())
+    return Error(L, ".vsave expects DPR registers");
 
   getTargetStreamer().emitRegSave(Op.getRegList(), IsVector);
   return false;
@@ -9753,8 +9914,7 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) {
 ///  ::= .inst.n opcode [, ...]
 ///  ::= .inst.w opcode [, ...]
 bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
-  MCAsmParser &Parser = getParser();
-  int Width;
+  int Width = 4;
 
   if (isThumb()) {
     switch (Suffix) {
@@ -9762,96 +9922,68 @@ bool ARMAsmParser::parseDirectiveInst(SMLoc Loc, char Suffix) {
       Width = 2;
       break;
     case 'w':
-      Width = 4;
       break;
     default:
-      Parser.eatToEndOfStatement();
-      Error(Loc, "cannot determine Thumb instruction size, "
-                 "use inst.n/inst.w instead");
-      return false;
+      return Error(Loc, "cannot determine Thumb instruction size, "
+                        "use inst.n/inst.w instead");
     }
   } else {
-    if (Suffix) {
-      Parser.eatToEndOfStatement();
-      Error(Loc, "width suffixes are invalid in ARM mode");
-      return false;
-    }
-    Width = 4;
-  }
-
-  if (getLexer().is(AsmToken::EndOfStatement)) {
-    Parser.eatToEndOfStatement();
-    Error(Loc, "expected expression following directive");
-    return false;
+    if (Suffix)
+      return Error(Loc, "width suffixes are invalid in ARM mode");
   }
 
-  for (;;) {
+  auto parseOne = [&]() -> bool {
     const MCExpr *Expr;
-
-    if (getParser().parseExpression(Expr)) {
-      Error(Loc, "expected expression");
-      return false;
-    }
-
+    if (getParser().parseExpression(Expr))
+      return true;
     const MCConstantExpr *Value = dyn_cast_or_null<MCConstantExpr>(Expr);
     if (!Value) {
-      Error(Loc, "expected constant expression");
-      return false;
+      return Error(Loc, "expected constant expression");
     }
 
     switch (Width) {
     case 2:
-      if (Value->getValue() > 0xffff) {
-        Error(Loc, "inst.n operand is too big, use inst.w instead");
-        return false;
-      }
+      if (Value->getValue() > 0xffff)
+        return Error(Loc, "inst.n operand is too big, use inst.w instead");
       break;
     case 4:
-      if (Value->getValue() > 0xffffffff) {
-        Error(Loc,
-              StringRef(Suffix ? "inst.w" : "inst") + " operand is too big");
-        return false;
-      }
+      if (Value->getValue() > 0xffffffff)
+        return Error(Loc, StringRef(Suffix ? "inst.w" : "inst") +
+                              " operand is too big");
       break;
     default:
       llvm_unreachable("only supported widths are 2 and 4");
     }
 
     getTargetStreamer().emitInst(Value->getValue(), Suffix);
+    return false;
+  };
 
-    if (getLexer().is(AsmToken::EndOfStatement))
-      break;
-
-    if (getLexer().isNot(AsmToken::Comma)) {
-      Error(Loc, "unexpected token in directive");
-      return false;
-    }
-
-    Parser.Lex();
-  }
-
-  Parser.Lex();
+  if (parseOptionalToken(AsmToken::EndOfStatement))
+    return Error(Loc, "expected expression following directive");
+  if (parseMany(parseOne))
+    return true;
   return false;
 }
 
 /// parseDirectiveLtorg
 ///  ::= .ltorg | .pool
 bool ARMAsmParser::parseDirectiveLtorg(SMLoc L) {
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
   getTargetStreamer().emitCurrentConstantPool();
   return false;
 }
 
 bool ARMAsmParser::parseDirectiveEven(SMLoc L) {
-  const MCSection *Section = getStreamer().getCurrentSection().first;
+  const MCSection *Section = getStreamer().getCurrentSectionOnly();
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    TokError("unexpected token in directive");
-    return false;
-  }
+  if (parseToken(AsmToken::EndOfStatement, "unexpected token in directive"))
+    return true;
 
   if (!Section) {
     getStreamer().InitSections(false);
-    Section = getStreamer().getCurrentSection().first;
+    Section = getStreamer().getCurrentSectionOnly();
   }
 
   assert(Section && "must have section to emit alignment");
@@ -9869,51 +10001,41 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
   MCAsmParser &Parser = getParser();
   bool HasExistingPersonality = UC.hasPersonality();
 
+  const MCExpr *IndexExpression;
+  SMLoc IndexLoc = Parser.getTok().getLoc();
+  if (Parser.parseExpression(IndexExpression) ||
+      parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.personalityindex' directive")) {
+    return true;
+  }
+
   UC.recordPersonalityIndex(L);
 
   if (!UC.hasFnStart()) {
-    Parser.eatToEndOfStatement();
-    Error(L, ".fnstart must precede .personalityindex directive");
-    return false;
+    return Error(L, ".fnstart must precede .personalityindex directive");
   }
   if (UC.cantUnwind()) {
-    Parser.eatToEndOfStatement();
     Error(L, ".personalityindex cannot be used with .cantunwind");
     UC.emitCantUnwindLocNotes();
-    return false;
+    return true;
   }
   if (UC.hasHandlerData()) {
-    Parser.eatToEndOfStatement();
     Error(L, ".personalityindex must precede .handlerdata directive");
     UC.emitHandlerDataLocNotes();
-    return false;
+    return true;
   }
   if (HasExistingPersonality) {
-    Parser.eatToEndOfStatement();
     Error(L, "multiple personality directives");
     UC.emitPersonalityLocNotes();
-    return false;
-  }
-
-  const MCExpr *IndexExpression;
-  SMLoc IndexLoc = Parser.getTok().getLoc();
-  if (Parser.parseExpression(IndexExpression)) {
-    Parser.eatToEndOfStatement();
-    return false;
+    return true;
   }
 
   const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(IndexExpression);
-  if (!CE) {
-    Parser.eatToEndOfStatement();
-    Error(IndexLoc, "index must be a constant number");
-    return false;
-  }
-  if (CE->getValue() < 0 ||
-      CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX) {
-    Parser.eatToEndOfStatement();
-    Error(IndexLoc, "personality routine index should be in range [0-3]");
-    return false;
-  }
+  if (!CE)
+    return Error(IndexLoc, "index must be a constant number");
+  if (CE->getValue() < 0 || CE->getValue() >= ARM::EHABI::NUM_PERSONALITY_INDEX)
+    return Error(IndexLoc,
+                 "personality routine index should be in range [0-3]");
 
   getTargetStreamer().emitPersonalityIndex(CE->getValue());
   return false;
@@ -9923,81 +10045,51 @@ bool ARMAsmParser::parseDirectivePersonalityIndex(SMLoc L) {
 ///   ::= .unwind_raw offset, opcode [, opcode...]
 bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (!UC.hasFnStart()) {
-    Parser.eatToEndOfStatement();
-    Error(L, ".fnstart must precede .unwind_raw directives");
-    return false;
-  }
-
   int64_t StackOffset;
-
   const MCExpr *OffsetExpr;
   SMLoc OffsetLoc = getLexer().getLoc();
-  if (getLexer().is(AsmToken::EndOfStatement) ||
-      getParser().parseExpression(OffsetExpr)) {
-    Error(OffsetLoc, "expected expression");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .unwind_raw directives");
+  if (getParser().parseExpression(OffsetExpr))
+    return Error(OffsetLoc, "expected expression");
 
   const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
-  if (!CE) {
-    Error(OffsetLoc, "offset must be a constant");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (!CE)
+    return Error(OffsetLoc, "offset must be a constant");
 
   StackOffset = CE->getValue();
 
-  if (getLexer().isNot(AsmToken::Comma)) {
-    Error(getLexer().getLoc(), "expected comma");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-  Parser.Lex();
+  if (Parser.parseToken(AsmToken::Comma, "expected comma"))
+    return true;
 
   SmallVector<uint8_t, 16> Opcodes;
-  for (;;) {
-    const MCExpr *OE;
 
+  auto parseOne = [&]() -> bool {
+    const MCExpr *OE;
     SMLoc OpcodeLoc = getLexer().getLoc();
-    if (getLexer().is(AsmToken::EndOfStatement) || Parser.parseExpression(OE)) {
-      Error(OpcodeLoc, "expected opcode expression");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
-
+    if (check(getLexer().is(AsmToken::EndOfStatement) ||
+                  Parser.parseExpression(OE),
+              OpcodeLoc, "expected opcode expression"))
+      return true;
     const MCConstantExpr *OC = dyn_cast<MCConstantExpr>(OE);
-    if (!OC) {
-      Error(OpcodeLoc, "opcode value must be a constant");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
-
+    if (!OC)
+      return Error(OpcodeLoc, "opcode value must be a constant");
     const int64_t Opcode = OC->getValue();
-    if (Opcode & ~0xff) {
-      Error(OpcodeLoc, "invalid opcode");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
-
+    if (Opcode & ~0xff)
+      return Error(OpcodeLoc, "invalid opcode");
     Opcodes.push_back(uint8_t(Opcode));
+    return false;
+  };
 
-    if (getLexer().is(AsmToken::EndOfStatement))
-      break;
-
-    if (getLexer().isNot(AsmToken::Comma)) {
-      Error(getLexer().getLoc(), "unexpected token in directive");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
-
-    Parser.Lex();
-  }
+  // Must have at least 1 element
+  SMLoc OpcodeLoc = getLexer().getLoc();
+  if (parseOptionalToken(AsmToken::EndOfStatement))
+    return Error(OpcodeLoc, "expected opcode expression");
+  if (parseMany(parseOne))
+    return true;
 
   getTargetStreamer().emitUnwindRaw(StackOffset, Opcodes);
-
-  Parser.Lex();
   return false;
 }
 
@@ -10006,22 +10098,17 @@ bool ARMAsmParser::parseDirectiveUnwindRaw(SMLoc L) {
 bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
   MCAsmParser &Parser = getParser();
 
-  if (getLexer().isNot(AsmToken::Identifier)) {
-    TokError("expected variable after '.tlsdescseq' directive");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (getLexer().isNot(AsmToken::Identifier))
+    return TokError("expected variable after '.tlsdescseq' directive");
 
   const MCSymbolRefExpr *SRE =
     MCSymbolRefExpr::create(Parser.getTok().getIdentifier(),
                             MCSymbolRefExpr::VK_ARM_TLSDESCSEQ, getContext());
   Lex();
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(Parser.getTok().getLoc(), "unexpected token");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.tlsdescseq' directive"))
+    return true;
 
   getTargetStreamer().AnnotateTLSDescriptorSequence(SRE);
   return false;
@@ -10031,60 +10118,40 @@ bool ARMAsmParser::parseDirectiveTLSDescSeq(SMLoc L) {
 ///  ::= .movsp reg [, #offset]
 bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (!UC.hasFnStart()) {
-    Parser.eatToEndOfStatement();
-    Error(L, ".fnstart must precede .movsp directives");
-    return false;
-  }
-  if (UC.getFPReg() != ARM::SP) {
-    Parser.eatToEndOfStatement();
-    Error(L, "unexpected .movsp directive");
-    return false;
-  }
+  if (!UC.hasFnStart())
+    return Error(L, ".fnstart must precede .movsp directives");
+  if (UC.getFPReg() != ARM::SP)
+    return Error(L, "unexpected .movsp directive");
 
   SMLoc SPRegLoc = Parser.getTok().getLoc();
   int SPReg = tryParseRegister();
-  if (SPReg == -1) {
-    Parser.eatToEndOfStatement();
-    Error(SPRegLoc, "register expected");
-    return false;
-  }
-
-  if (SPReg == ARM::SP || SPReg == ARM::PC) {
-    Parser.eatToEndOfStatement();
-    Error(SPRegLoc, "sp and pc are not permitted in .movsp directive");
-    return false;
-  }
+  if (SPReg == -1)
+    return Error(SPRegLoc, "register expected");
+  if (SPReg == ARM::SP || SPReg == ARM::PC)
+    return Error(SPRegLoc, "sp and pc are not permitted in .movsp directive");
 
   int64_t Offset = 0;
-  if (Parser.getTok().is(AsmToken::Comma)) {
-    Parser.Lex();
-
-    if (Parser.getTok().isNot(AsmToken::Hash)) {
-      Error(Parser.getTok().getLoc(), "expected #constant");
-      Parser.eatToEndOfStatement();
-      return false;
-    }
-    Parser.Lex();
+  if (Parser.parseOptionalToken(AsmToken::Comma)) {
+    if (Parser.parseToken(AsmToken::Hash, "expected #constant"))
+      return true;
 
     const MCExpr *OffsetExpr;
     SMLoc OffsetLoc = Parser.getTok().getLoc();
-    if (Parser.parseExpression(OffsetExpr)) {
-      Parser.eatToEndOfStatement();
-      Error(OffsetLoc, "malformed offset expression");
-      return false;
-    }
+
+    if (Parser.parseExpression(OffsetExpr))
+      return Error(OffsetLoc, "malformed offset expression");
 
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(OffsetExpr);
-    if (!CE) {
-      Parser.eatToEndOfStatement();
-      Error(OffsetLoc, "offset must be an immediate constant");
-      return false;
-    }
+    if (!CE)
+      return Error(OffsetLoc, "offset must be an immediate constant");
 
     Offset = CE->getValue();
   }
 
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.movsp' directive"))
+    return true;
+
   getTargetStreamer().emitMovSP(SPReg, Offset);
   UC.saveFPReg(SPReg);
 
@@ -10095,11 +10162,8 @@ bool ARMAsmParser::parseDirectiveMovSP(SMLoc L) {
 ///   ::= .object_arch name
 bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::Identifier)) {
-    Error(getLexer().getLoc(), "unexpected token");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (getLexer().isNot(AsmToken::Identifier))
+    return Error(getLexer().getLoc(), "unexpected token");
 
   StringRef Arch = Parser.getTok().getString();
   SMLoc ArchLoc = Parser.getTok().getLoc();
@@ -10107,19 +10171,12 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
 
   unsigned ID = ARM::parseArch(Arch);
 
-  if (ID == ARM::AK_INVALID) {
-    Error(ArchLoc, "unknown architecture '" + Arch + "'");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (ID == ARM::AK_INVALID)
+    return Error(ArchLoc, "unknown architecture '" + Arch + "'");
+  if (parseToken(AsmToken::EndOfStatement))
+    return true;
 
   getTargetStreamer().emitObjectArch(ID);
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(getLexer().getLoc(), "unexpected token");
-    Parser.eatToEndOfStatement();
-  }
-
   return false;
 }
 
@@ -10128,18 +10185,17 @@ bool ARMAsmParser::parseDirectiveObjectArch(SMLoc L) {
 bool ARMAsmParser::parseDirectiveAlign(SMLoc L) {
   // NOTE: if this is not the end of the statement, fall back to the target
   // agnostic handling for this directive which will correctly handle this.
-  if (getLexer().isNot(AsmToken::EndOfStatement))
-    return true;
-
-  // '.align' is target specifically handled to mean 2**2 byte alignment.
-  const MCSection *Section = getStreamer().getCurrentSection().first;
-  assert(Section && "must have section to emit alignment");
-  if (Section->UseCodeAlign())
-    getStreamer().EmitCodeAlignment(4, 0);
-  else
-    getStreamer().EmitValueToAlignment(4, 0, 1, 0);
-
-  return false;
+  if (parseOptionalToken(AsmToken::EndOfStatement)) {
+    // '.align' is target specifically handled to mean 2**2 byte alignment.
+    const MCSection *Section = getStreamer().getCurrentSectionOnly();
+    assert(Section && "must have section to emit alignment");
+    if (Section->UseCodeAlign())
+      getStreamer().EmitCodeAlignment(4, 0);
+    else
+      getStreamer().EmitValueToAlignment(4, 0, 1, 0);
+    return false;
+  }
+  return true;
 }
 
 /// parseDirectiveThumbSet
@@ -10148,18 +10204,10 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
   MCAsmParser &Parser = getParser();
 
   StringRef Name;
-  if (Parser.parseIdentifier(Name)) {
-    TokError("expected identifier after '.thumb_set'");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-
-  if (getLexer().isNot(AsmToken::Comma)) {
-    TokError("expected comma after name '" + Name + "'");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
-  Lex();
+  if (check(Parser.parseIdentifier(Name),
+            "expected identifier after '.thumb_set'") ||
+      parseToken(AsmToken::Comma, "expected comma after name '" + Name + "'"))
+    return true;
 
   MCSymbol *Sym;
   const MCExpr *Value;
@@ -10173,10 +10221,10 @@ bool ARMAsmParser::parseDirectiveThumbSet(SMLoc L) {
 
 /// Force static initialization.
 extern "C" void LLVMInitializeARMAsmParser() {
-  RegisterMCAsmParser<ARMAsmParser> X(TheARMLETarget);
-  RegisterMCAsmParser<ARMAsmParser> Y(TheARMBETarget);
-  RegisterMCAsmParser<ARMAsmParser> A(TheThumbLETarget);
-  RegisterMCAsmParser<ARMAsmParser> B(TheThumbBETarget);
+  RegisterMCAsmParser<ARMAsmParser> X(getTheARMLETarget());
+  RegisterMCAsmParser<ARMAsmParser> Y(getTheARMBETarget());
+  RegisterMCAsmParser<ARMAsmParser> A(getTheThumbLETarget());
+  RegisterMCAsmParser<ARMAsmParser> B(getTheThumbBETarget());
 }
 
 #define GET_REGISTER_MATCHER
@@ -10218,16 +10266,17 @@ static const struct {
 bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
   MCAsmParser &Parser = getParser();
 
-  if (getLexer().isNot(AsmToken::Identifier)) {
-    Error(getLexer().getLoc(), "unexpected token");
-    Parser.eatToEndOfStatement();
-    return false;
-  }
+  if (getLexer().isNot(AsmToken::Identifier))
+    return Error(getLexer().getLoc(), "expected architecture extension name");
 
   StringRef Name = Parser.getTok().getString();
   SMLoc ExtLoc = Parser.getTok().getLoc();
   Lex();
 
+  if (parseToken(AsmToken::EndOfStatement,
+                 "unexpected token in '.arch_extension' directive"))
+    return true;
+
   bool EnableFeature = true;
   if (Name.startswith_lower("no")) {
     EnableFeature = false;
@@ -10235,20 +10284,19 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
   }
   unsigned FeatureKind = ARM::parseArchExt(Name);
   if (FeatureKind == ARM::AEK_INVALID)
-    Error(ExtLoc, "unknown architectural extension: " + Name);
+    return Error(ExtLoc, "unknown architectural extension: " + Name);
 
   for (const auto &Extension : Extensions) {
     if (Extension.Kind != FeatureKind)
       continue;
 
     if (Extension.Features.none())
-      report_fatal_error("unsupported architectural extension: " + Name);
+      return Error(ExtLoc, "unsupported architectural extension: " + Name);
 
-    if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck) {
-      Error(ExtLoc, "architectural extension '" + Name + "' is not "
-            "allowed for the current base architecture");
-      return false;
-    }
+    if ((getAvailableFeatures() & Extension.ArchCheck) != Extension.ArchCheck)
+      return Error(ExtLoc, "architectural extension '" + Name +
+                               "' is not "
+                               "allowed for the current base architecture");
 
     MCSubtargetInfo &STI = copySTI();
     FeatureBitset ToggleFeatures = EnableFeature
@@ -10261,9 +10309,7 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) {
     return false;
   }
 
-  Error(ExtLoc, "unknown architectural extension: " + Name);
-  Parser.eatToEndOfStatement();
-  return false;
+  return Error(ExtLoc, "unknown architectural extension: " + Name);
 }
 
 // Define this matcher function after the auto-generated include so we
diff --git a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
index 3196a57ccc3e..ac3d8c780af2 100644
--- a/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
+++ b/contrib/llvm/lib/Target/ARM/Disassembler/ARMDisassembler.cpp
@@ -861,13 +861,13 @@ DecodeStatus ThumbDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
 
 extern "C" void LLVMInitializeARMDisassembler() {
-  TargetRegistry::RegisterMCDisassembler(TheARMLETarget,
+  TargetRegistry::RegisterMCDisassembler(getTheARMLETarget(),
                                          createARMDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheARMBETarget,
+  TargetRegistry::RegisterMCDisassembler(getTheARMBETarget(),
                                          createARMDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheThumbLETarget,
+  TargetRegistry::RegisterMCDisassembler(getTheThumbLETarget(),
                                          createThumbDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheThumbBETarget,
+  TargetRegistry::RegisterMCDisassembler(getTheThumbBETarget(),
                                          createThumbDisassembler);
 }
 
@@ -1432,7 +1432,7 @@ static DecodeStatus DecodeCopMemInstruction(MCInst &Inst, unsigned Insn,
     case ARM::STC_POST:
     case ARM::STCL_POST:
       imm |= U << 8;
-      // fall through.
+      LLVM_FALLTHROUGH;
     default:
       // The 'option' variant doesn't encode 'U' in the immediate since
       // the immediate is unsigned [0,255].
@@ -2555,6 +2555,7 @@ static DecodeStatus DecodeVLDInstruction(MCInst &Inst, unsigned Insn,
       break;
     }
     // Fall through to handle the register offset variant.
+    LLVM_FALLTHROUGH;
   case ARM::VLD1d8wb_fixed:
   case ARM::VLD1d16wb_fixed:
   case ARM::VLD1d32wb_fixed:
@@ -4157,7 +4158,7 @@ static DecodeStatus DecodeMSRMask(MCInst &Inst, unsigned Val,
     case 0x93: // faultmask_ns
       if (!(FeatureBits[ARM::HasV8MMainlineOps]))
         return MCDisassembler::Fail;
-      // fall through
+      LLVM_FALLTHROUGH;
     case 10:   // msplim
     case 11:   // psplim
     case 0x88: // msp_ns
@@ -5310,4 +5311,3 @@ static DecodeStatus DecoderForMRRC2AndMCRR2(llvm::MCInst &Inst, unsigned Val,
 
   return S;
 }
-
diff --git a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
index e81bb77dbdfc..3667952d44c0 100644
--- a/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp
@@ -726,6 +726,12 @@ void ARMInstPrinter::printPKHASRShiftImm(const MCInst *MI, unsigned OpNum,
 void ARMInstPrinter::printRegisterList(const MCInst *MI, unsigned OpNum,
                                        const MCSubtargetInfo &STI,
                                        raw_ostream &O) {
+  assert(std::is_sorted(MI->begin() + OpNum, MI->end(),
+                        [&](const MCOperand &LHS, const MCOperand &RHS) {
+                          return MRI.getEncodingValue(LHS.getReg()) <
+                                 MRI.getEncodingValue(RHS.getReg());
+                        }));
+
   O << "{";
   for (unsigned i = OpNum, e = MI->getNumOperands(); i != e; ++i) {
     if (i != OpNum)
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 0fc758201d47..a58d5b34131b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -375,7 +375,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_movt_hi16:
     if (!IsPCRel)
       Value >>= 16;
-  // Fallthrough
+    LLVM_FALLTHROUGH;
   case ARM::fixup_arm_movw_lo16: {
     unsigned Hi4 = (Value & 0xF000) >> 12;
     unsigned Lo12 = Value & 0x0FFF;
@@ -387,7 +387,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_t2_movt_hi16:
     if (!IsPCRel)
       Value >>= 16;
-  // Fallthrough
+    LLVM_FALLTHROUGH;
   case ARM::fixup_t2_movw_lo16: {
     unsigned Hi4 = (Value & 0xF000) >> 12;
     unsigned i = (Value & 0x800) >> 11;
@@ -403,7 +403,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_ldst_pcrel_12:
     // ARM PC-relative values are offset by 8.
     Value -= 4;
-  // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case ARM::fixup_t2_ldst_pcrel_12: {
     // Offset by 4, adjusted by two due to the half-word ordering of thumb.
     Value -= 4;
@@ -541,7 +541,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     //
     // Note that the halfwords are stored high first, low second; so we need
     // to transpose the fixup value here to map properly.
-    if (Ctx && Value  % 4 != 0) {
+    if (Ctx && Value % 4 != 0) {
       Ctx->reportError(Fixup.getLoc(), "misaligned ARM call destination");
       return 0;
     }
@@ -578,6 +578,13 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
     // Offset by 4, and don't encode the low two bits.
     return ((Value - 4) >> 2) & 0xff;
   case ARM::fixup_arm_thumb_cb: {
+    // CB instructions can only branch to offsets in [4, 126] in multiples of 2
+    // so ensure that the raw value LSB is zero and it lies in [2, 130].
+    // An offset of 2 will be relaxed to a NOP.
+    if (Ctx && ((int64_t)Value < 2 || Value > 0x82 || Value & 1)) {
+      Ctx->reportError(Fixup.getLoc(), "out of range pc-relative fixup value");
+      return 0;
+    }
     // Offset by 4 and don't encode the lower bit, which is always 0.
     // FIXME: diagnose if no Thumb2
     uint32_t Binary = (Value - 4) >> 1;
@@ -623,7 +630,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_pcrel_10:
     Value = Value - 4; // ARM fixups offset by an additional word and don't
                        // need to adjust for the half-word ordering.
-                       // Fall through.
+    LLVM_FALLTHROUGH;
   case ARM::fixup_t2_pcrel_10: {
     // Offset by 4, adjusted by two due to the half-word ordering of thumb.
     Value = Value - 4;
@@ -650,7 +657,7 @@ unsigned ARMAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case ARM::fixup_arm_pcrel_9:
     Value = Value - 4; // ARM fixups offset by an additional word and don't
                        // need to adjust for the half-word ordering.
-                       // Fall through.
+    LLVM_FALLTHROUGH;
   case ARM::fixup_t2_pcrel_9: {
     // Offset by 4, adjusted by two due to the half-word ordering of thumb.
     Value = Value - 4;
@@ -696,14 +703,16 @@ void ARMAsmBackend::processFixupValue(const MCAssembler &Asm,
                                       bool &IsResolved) {
   const MCSymbolRefExpr *A = Target.getSymA();
   const MCSymbol *Sym = A ? &A->getSymbol() : nullptr;
-  // Some fixups to thumb function symbols need the low bit (thumb bit)
-  // twiddled.
-  if ((unsigned)Fixup.getKind() != ARM::fixup_arm_ldst_pcrel_12 &&
-      (unsigned)Fixup.getKind() != ARM::fixup_t2_ldst_pcrel_12 &&
-      (unsigned)Fixup.getKind() != ARM::fixup_arm_adr_pcrel_12 &&
-      (unsigned)Fixup.getKind() != ARM::fixup_thumb_adr_pcrel_10 &&
-      (unsigned)Fixup.getKind() != ARM::fixup_t2_adr_pcrel_12 &&
-      (unsigned)Fixup.getKind() != ARM::fixup_arm_thumb_cp) {
+  // MachO (the only user of "Value") tries to make .o files that look vaguely
+  // pre-linked, so for MOVW/MOVT and .word relocations they put the Thumb bit
+  // into the addend if possible. Other relocation types don't want this bit
+  // though (branches couldn't encode it if it *was* present, and no other
+  // relocations exist) and it can interfere with checking valid expressions.
+  if ((unsigned)Fixup.getKind() == FK_Data_4 ||
+      (unsigned)Fixup.getKind() == ARM::fixup_arm_movw_lo16 ||
+      (unsigned)Fixup.getKind() == ARM::fixup_arm_movt_hi16 ||
+      (unsigned)Fixup.getKind() == ARM::fixup_t2_movw_lo16 ||
+      (unsigned)Fixup.getKind() == ARM::fixup_t2_movt_hi16) {
     if (Sym) {
       if (Asm.isThumbFunc(Sym))
         Value |= 1;
@@ -1111,6 +1120,7 @@ static MachO::CPUSubTypeARM getMachOSubTypeFromArch(StringRef Arch) {
 MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
                                         const Triple &TheTriple, StringRef CPU,
+                                        const MCTargetOptions &Options,
                                         bool isLittle) {
   switch (TheTriple.getObjectFormat()) {
   default:
@@ -1131,24 +1141,28 @@ MCAsmBackend *llvm::createARMAsmBackend(const Target &T,
 
 MCAsmBackend *llvm::createARMLEAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU) {
-  return createARMAsmBackend(T, MRI, TT, CPU, true);
+                                          const Triple &TT, StringRef CPU,
+                                          const MCTargetOptions &Options) {
+  return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
 }
 
 MCAsmBackend *llvm::createARMBEAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU) {
-  return createARMAsmBackend(T, MRI, TT, CPU, false);
+                                          const Triple &TT, StringRef CPU,
+                                          const MCTargetOptions &Options) {
+  return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
 }
 
 MCAsmBackend *llvm::createThumbLEAsmBackend(const Target &T,
                                             const MCRegisterInfo &MRI,
-                                            const Triple &TT, StringRef CPU) {
-  return createARMAsmBackend(T, MRI, TT, CPU, true);
+                                            const Triple &TT, StringRef CPU,
+                                            const MCTargetOptions &Options) {
+  return createARMAsmBackend(T, MRI, TT, CPU, Options, true);
 }
 
 MCAsmBackend *llvm::createThumbBEAsmBackend(const Target &T,
                                             const MCRegisterInfo &MRI,
-                                            const Triple &TT, StringRef CPU) {
-  return createARMAsmBackend(T, MRI, TT, CPU, false);
+                                            const Triple &TT, StringRef CPU,
+                                            const MCTargetOptions &Options) {
+  return createARMAsmBackend(T, MRI, TT, CPU, Options, false);
 }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
index 4118fe8e8cdb..6f19754b899e 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp
@@ -140,6 +140,12 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
     case ARM::fixup_t2_movw_lo16:
       Type = ELF::R_ARM_THM_MOVW_PREL_NC;
       break;
+    case ARM::fixup_arm_thumb_br:
+      Type = ELF::R_ARM_THM_JUMP11;
+      break;
+    case ARM::fixup_arm_thumb_bcc:
+      Type = ELF::R_ARM_THM_JUMP8;
+      break;
     case ARM::fixup_arm_thumb_bl:
     case ARM::fixup_arm_thumb_blx:
       switch (Modifier) {
@@ -221,6 +227,9 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       case MCSymbolRefExpr::VK_TLSDESC:
         Type = ELF::R_ARM_TLS_GOTDESC;
         break;
+      case MCSymbolRefExpr::VK_TLSLDM:
+        Type = ELF::R_ARM_TLS_LDM32;
+        break;
       case MCSymbolRefExpr::VK_ARM_TLSDESCSEQ:
         Type = ELF::R_ARM_TLS_DESCSEQ;
         break;
@@ -239,10 +248,26 @@ unsigned ARMELFObjectWriter::GetRelocTypeInner(const MCValue &Target,
       Type = ELF::R_ARM_JUMP24;
       break;
     case ARM::fixup_arm_movt_hi16:
-      Type = ELF::R_ARM_MOVT_ABS;
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_MOVT_ABS;
+        break;
+      case MCSymbolRefExpr::VK_ARM_SBREL:
+        Type = ELF:: R_ARM_MOVT_BREL;
+        break;
+      }
       break;
     case ARM::fixup_arm_movw_lo16:
-      Type = ELF::R_ARM_MOVW_ABS_NC;
+      switch (Modifier) {
+      default: llvm_unreachable("Unsupported Modifier");
+      case MCSymbolRefExpr::VK_None:
+        Type = ELF::R_ARM_MOVW_ABS_NC;
+        break;
+      case MCSymbolRefExpr::VK_ARM_SBREL:
+        Type = ELF:: R_ARM_MOVW_BREL_NC;
+        break;
+      }
       break;
     case ARM::fixup_t2_movt_hi16:
       Type = ELF::R_ARM_THM_MOVT_ABS;
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 36cb74765f3b..f6bb35d2326b 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -591,7 +591,7 @@ private:
   void FlushPendingOffset();
   void FlushUnwindOpcodes(bool NoHandlerData);
 
-  void SwitchToEHSection(const char *Prefix, unsigned Type, unsigned Flags,
+  void SwitchToEHSection(StringRef Prefix, unsigned Type, unsigned Flags,
                          SectionKind Kind, const MCSymbol &Fn);
   void SwitchToExTabSection(const MCSymbol &FnStart);
   void SwitchToExIdxSection(const MCSymbol &FnStart);
@@ -1074,7 +1074,7 @@ void ARMELFStreamer::reset() {
   getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
 }
 
-inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
+inline void ARMELFStreamer::SwitchToEHSection(StringRef Prefix,
                                               unsigned Type,
                                               unsigned Flags,
                                               SectionKind Kind,
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
index 53cd29a6061e..1e062ad45af5 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp
@@ -90,6 +90,7 @@ ARMCOFFMCAsmInfoMicrosoft::ARMCOFFMCAsmInfoMicrosoft() {
 
   PrivateGlobalPrefix = "$M";
   PrivateLabelPrefix = "$M";
+  CommentString = ";";
 }
 
 void ARMCOFFMCAsmInfoGNU::anchor() { }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
index 9fca13eeea93..559a4f8de75f 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp
@@ -1493,7 +1493,7 @@ getT2SORegOpValue(const MCInst &MI, unsigned OpIdx,
   case ARM_AM::lsl: SBits = 0x0; break;
   case ARM_AM::lsr: SBits = 0x2; break;
   case ARM_AM::asr: SBits = 0x4; break;
-  case ARM_AM::rrx: // FALLTHROUGH
+  case ARM_AM::rrx: LLVM_FALLTHROUGH;
   case ARM_AM::ror: SBits = 0x6; break;
   }
 
@@ -1545,8 +1545,15 @@ getRegisterListOpValue(const MCInst &MI, unsigned Op,
     else
       Binary |= NumRegs * 2;
   } else {
+    const MCRegisterInfo &MRI = *CTX.getRegisterInfo();
+    assert(std::is_sorted(MI.begin() + Op, MI.end(),
+                          [&](const MCOperand &LHS, const MCOperand &RHS) {
+                            return MRI.getEncodingValue(LHS.getReg()) <
+                                   MRI.getEncodingValue(RHS.getReg());
+                          }));
+
     for (unsigned I = Op, E = MI.getNumOperands(); I < E; ++I) {
-      unsigned RegNo = CTX.getRegisterInfo()->getEncodingValue(MI.getOperand(I).getReg());
+      unsigned RegNo = MRI.getEncodingValue(MI.getOperand(I).getReg());
       Binary |= 1 << RegNo;
     }
   }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
index afb089ab0286..9e4d202321e6 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp
@@ -204,7 +204,8 @@ static MCStreamer *createELFStreamer(const Triple &T, MCContext &Ctx,
                                      MCAsmBackend &MAB, raw_pwrite_stream &OS,
                                      MCCodeEmitter *Emitter, bool RelaxAll) {
   return createARMELFStreamer(Ctx, MAB, OS, Emitter, false,
-                              T.getArch() == Triple::thumb);
+                              (T.getArch() == Triple::thumb ||
+                               T.getArch() == Triple::thumbeb));
 }
 
 static MCStreamer *createARMMachOStreamer(MCContext &Ctx, MCAsmBackend &MAB,
@@ -273,8 +274,8 @@ static MCInstrAnalysis *createARMMCInstrAnalysis(const MCInstrInfo *Info) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeARMTargetMC() {
-  for (Target *T : {&TheARMLETarget, &TheARMBETarget, &TheThumbLETarget,
-                    &TheThumbBETarget}) {
+  for (Target *T : {&getTheARMLETarget(), &getTheARMBETarget(),
+                    &getTheThumbLETarget(), &getTheThumbBETarget()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createARMMCAsmInfo);
 
@@ -313,16 +314,18 @@ extern "C" void LLVMInitializeARMTargetMC() {
   }
 
   // Register the MC Code Emitter
-  for (Target *T : {&TheARMLETarget, &TheThumbLETarget})
+  for (Target *T : {&getTheARMLETarget(), &getTheThumbLETarget()})
     TargetRegistry::RegisterMCCodeEmitter(*T, createARMLEMCCodeEmitter);
-  for (Target *T : {&TheARMBETarget, &TheThumbBETarget})
+  for (Target *T : {&getTheARMBETarget(), &getTheThumbBETarget()})
     TargetRegistry::RegisterMCCodeEmitter(*T, createARMBEMCCodeEmitter);
 
   // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheARMLETarget, createARMLEAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheARMBETarget, createARMBEAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheThumbLETarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheARMLETarget(),
+                                       createARMLEAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(getTheARMBETarget(),
+                                       createARMBEAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(getTheThumbLETarget(),
                                        createThumbLEAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheThumbBETarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheThumbBETarget(),
                                        createThumbBEAsmBackend);
 }
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
index c2bbc8e828c4..ba834201e585 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h
@@ -28,6 +28,7 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class MCStreamer;
+class MCTargetOptions;
 class MCRelocationInfo;
 class MCTargetStreamer;
 class StringRef;
@@ -36,8 +37,10 @@ class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-extern Target TheARMLETarget, TheThumbLETarget;
-extern Target TheARMBETarget, TheThumbBETarget;
+Target &getTheARMLETarget();
+Target &getTheThumbLETarget();
+Target &getTheARMBETarget();
+Target &getTheThumbBETarget();
 
 namespace ARM_MC {
 std::string ParseARMTriple(const Triple &TT, StringRef CPU);
@@ -66,21 +69,26 @@ MCCodeEmitter *createARMBEMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createARMAsmBackend(const Target &T, const MCRegisterInfo &MRI,
                                   const Triple &TT, StringRef CPU,
+                                  const MCTargetOptions &Options,
                                   bool IsLittleEndian);
 
 MCAsmBackend *createARMLEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU,
+                                    const MCTargetOptions &Options);
 
 MCAsmBackend *createARMBEAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU,
+                                    const MCTargetOptions &Options);
 
 MCAsmBackend *createThumbLEAsmBackend(const Target &T,
                                       const MCRegisterInfo &MRI,
-                                      const Triple &TT, StringRef CPU);
+                                      const Triple &TT, StringRef CPU,
+                                      const MCTargetOptions &Options);
 
 MCAsmBackend *createThumbBEAsmBackend(const Target &T,
                                       const MCRegisterInfo &MRI,
-                                      const Triple &TT, StringRef CPU);
+                                      const Triple &TT, StringRef CPU,
+                                      const MCTargetOptions &Options);
 
 // Construct a PE/COFF machine code streamer which will generate a PE/COFF
 // object file.
diff --git a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
index cfa6ce7da65e..b77181f29b2d 100644
--- a/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp
@@ -208,7 +208,7 @@ RecordARMScatteredHalfRelocation(MachObjectWriter *Writer,
     if (Asm.isThumbFunc(A))
       FixedValue &= 0xfffffffe;
     MovtBit = 1;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case ARM::fixup_t2_movw_lo16:
     ThumbBit = 1;
     break;
diff --git a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
index 7f2124033982..744761bcddb8 100644
--- a/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/MLxExpansionPass.cpp
@@ -43,7 +43,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "ARM MLA / MLS expansion pass";
     }
 
@@ -334,18 +334,15 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
   unsigned Skip = 0;
   MachineBasicBlock::reverse_iterator MII = MBB.rbegin(), E = MBB.rend();
   while (MII != E) {
-    MachineInstr *MI = &*MII;
+    MachineInstr *MI = &*MII++;
 
-    if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy()) {
-      ++MII;
+    if (MI->isPosition() || MI->isImplicitDef() || MI->isCopy())
       continue;
-    }
 
     const MCInstrDesc &MCID = MI->getDesc();
     if (MI->isBarrier()) {
       clearStack();
       Skip = 0;
-      ++MII;
       continue;
     }
 
@@ -365,13 +362,9 @@ bool MLxExpansion::ExpandFPMLxInstructions(MachineBasicBlock &MBB) {
         pushStack(MI);
       else {
         ExpandFPMLxInstruction(MBB, MI, MulOpc, AddSubOpc, NegAcc, HasLane);
-        E = MBB.rend(); // May have changed if MI was the 1st instruction.
         Changed = true;
-        continue;
       }
     }
-
-    ++MII;
   }
 
   return Changed;
diff --git a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
index 3f88eb818062..caa69f8d71b7 100644
--- a/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp
@@ -11,17 +11,31 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheARMLETarget,   llvm::TheARMBETarget;
-Target llvm::TheThumbLETarget, llvm::TheThumbBETarget;
+Target &llvm::getTheARMLETarget() {
+  static Target TheARMLETarget;
+  return TheARMLETarget;
+}
+Target &llvm::getTheARMBETarget() {
+  static Target TheARMBETarget;
+  return TheARMBETarget;
+}
+Target &llvm::getTheThumbLETarget() {
+  static Target TheThumbLETarget;
+  return TheThumbLETarget;
+}
+Target &llvm::getTheThumbBETarget() {
+  static Target TheThumbBETarget;
+  return TheThumbBETarget;
+}
 
 extern "C" void LLVMInitializeARMTargetInfo() {
-  RegisterTarget<Triple::arm, /*HasJIT=*/true>
-    X(TheARMLETarget, "arm", "ARM");
-  RegisterTarget<Triple::armeb, /*HasJIT=*/true>
-    Y(TheARMBETarget, "armeb", "ARM (big endian)");
+  RegisterTarget<Triple::arm, /*HasJIT=*/true> X(getTheARMLETarget(), "arm",
+                                                 "ARM");
+  RegisterTarget<Triple::armeb, /*HasJIT=*/true> Y(getTheARMBETarget(), "armeb",
+                                                   "ARM (big endian)");
 
-  RegisterTarget<Triple::thumb, /*HasJIT=*/true>
-    A(TheThumbLETarget, "thumb", "Thumb");
-  RegisterTarget<Triple::thumbeb, /*HasJIT=*/true>
-    B(TheThumbBETarget, "thumbeb", "Thumb (big endian)");
+  RegisterTarget<Triple::thumb, /*HasJIT=*/true> A(getTheThumbLETarget(),
+                                                   "thumb", "Thumb");
+  RegisterTarget<Triple::thumbeb, /*HasJIT=*/true> B(
+      getTheThumbBETarget(), "thumbeb", "Thumb (big endian)");
 }
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
index c0732e4b750a..9953c61cd89c 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
@@ -26,8 +26,8 @@ Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti)
     : ARMFrameLowering(sti) {}
 
 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
-  const MachineFrameInfo *FFI = MF.getFrameInfo();
-  unsigned CFSize = FFI->getMaxCallFrameSize();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned CFSize = MFI.getMaxCallFrameSize();
   // It's not always a good idea to include the call frame as part of the
   // stack frame. ARM (especially Thumb) has small immediate offset to
   // address the stack frame. So a large call frame can cause poor codegen
@@ -35,7 +35,7 @@ bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{
   if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4
     return false;
 
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MFI.hasVarSizedObjects();
 }
 
 static void emitSPUpdate(MachineBasicBlock &MBB,
@@ -85,7 +85,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo  *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -95,10 +95,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
 
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
-  unsigned NumBytes = MFI->getStackSize();
+  unsigned NumBytes = MFI.getStackSize();
   assert(NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -110,7 +110,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
 
   // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4.
   NumBytes = (NumBytes + 3) & ~3;
-  MFI->setStackSize(NumBytes);
+  MFI.setStackSize(NumBytes);
 
   // Determine the sizes of each callee-save spill areas and record which frame
   // belongs to which callee-save spill areas.
@@ -121,7 +121,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize,
                  MachineInstr::FrameSetup);
     CFAOffset -= ArgRegsSaveSize;
-    unsigned CFIIndex = MMI.addFrameInst(
+    unsigned CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex)
@@ -133,7 +133,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -(NumBytes - ArgRegsSaveSize),
                    MachineInstr::FrameSetup);
       CFAOffset -= NumBytes - ArgRegsSaveSize;
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
@@ -150,11 +150,11 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R9:
     case ARM::R10:
     case ARM::R11:
-      if (STI.splitFramePushPop()) {
+      if (STI.splitFramePushPop(MF)) {
         GPRCS2Size += 4;
         break;
       }
-      // fallthrough
+      LLVM_FALLTHROUGH;
     case ARM::R4:
     case ARM::R5:
     case ARM::R6:
@@ -179,7 +179,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size;
   bool HasFP = hasFP(MF);
   if (HasFP)
-    AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) +
+    AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
                                 NumBytes);
   AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
   AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
@@ -188,7 +188,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
 
   int FramePtrOffsetInBlock = 0;
   unsigned adjustedGPRCS1Size = GPRCS1Size;
-  if (tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
+  if (GPRCS1Size > 0 && GPRCS2Size == 0 &&
+      tryFoldSPUpdateIntoPushPop(STI, MF, &*std::prev(MBBI), NumBytes)) {
     FramePtrOffsetInBlock = NumBytes;
     adjustedGPRCS1Size += NumBytes;
     NumBytes = 0;
@@ -196,7 +197,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
 
   if (adjustedGPRCS1Size) {
     CFAOffset -= adjustedGPRCS1Size;
-    unsigned CFIIndex = MMI.addFrameInst(
+    unsigned CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex)
@@ -212,7 +213,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R10:
     case ARM::R11:
     case ARM::R12:
-      if (STI.splitFramePushPop())
+      if (STI.splitFramePushPop(MF))
         break;
       // fallthough
     case ARM::R0:
@@ -224,8 +225,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
     case ARM::R6:
     case ARM::R7:
     case ARM::LR:
-      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
-          nullptr, MRI->getDwarfRegNum(Reg, true), MFI->getObjectOffset(FI)));
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
@@ -236,20 +237,20 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   // Adjust FP so it point to the stack slot that contains the previous FP.
   if (HasFP) {
     FramePtrOffsetInBlock +=
-        MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
+        MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize;
     AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDrSPi), FramePtr)
       .addReg(ARM::SP).addImm(FramePtrOffsetInBlock / 4)
       .setMIFlags(MachineInstr::FrameSetup));
     if(FramePtrOffsetInBlock) {
       CFAOffset += FramePtrOffsetInBlock;
-      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfa(
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfa(
           nullptr, MRI->getDwarfRegNum(FramePtr, true), CFAOffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
           .setMIFlags(MachineInstr::FrameSetup);
     } else {
       unsigned CFIIndex =
-          MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+          MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
               nullptr, MRI->getDwarfRegNum(FramePtr, true)));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
@@ -261,13 +262,55 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
       AFI->setShouldRestoreSPFromFP(true);
   }
 
+  // Skip past the spilling of r8-r11, which could consist of multiple tPUSH
+  // and tMOVr instructions. We don't need to add any call frame information
+  // in-between these instructions, because they do not modify the high
+  // registers.
+  while (true) {
+    MachineBasicBlock::iterator OldMBBI = MBBI;
+    // Skip a run of tMOVr instructions
+    while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr)
+      MBBI++;
+    if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) {
+      MBBI++;
+    } else {
+      // We have reached an instruction which is not a push, so the previous
+      // run of tMOVr instructions (which may have been empty) was not part of
+      // the prologue. Reset MBBI back to the last PUSH of the prologue.
+      MBBI = OldMBBI;
+      break;
+    }
+  }
+
+  // Emit call frame information for the callee-saved high registers.
+  for (auto &I : CSI) {
+    unsigned Reg = I.getReg();
+    int FI = I.getFrameIdx();
+    switch (Reg) {
+    case ARM::R8:
+    case ARM::R9:
+    case ARM::R10:
+    case ARM::R11:
+    case ARM::R12: {
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
+          nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
+      BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex)
+          .setMIFlags(MachineInstr::FrameSetup);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
   if (NumBytes) {
     // Insert it after all the callee-save spills.
     emitSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes,
                  MachineInstr::FrameSetup);
     if (!HasFP) {
       CFAOffset -= NumBytes;
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaOffset(nullptr, CFAOffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex)
@@ -276,8 +319,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   if (STI.isTargetELF() && HasFP)
-    MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() -
-                             AFI->getFramePtrSpillOffset());
+    MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -
+                            AFI->getFramePtrSpillOffset());
 
   AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
   AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
@@ -299,7 +342,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
   // If the frame has variable sized objects then the epilogue must restore
   // the sp from fp. We can assume there's an FP here since hasFP already
   // checks for hasVarSizedObjects.
-  if (MFI->hasVarSizedObjects())
+  if (MFI.hasVarSizedObjects())
     AFI->setShouldRestoreSPFromFP(true);
 }
 
@@ -308,12 +351,12 @@ static bool isCSRestore(MachineInstr &MI, const MCPhysReg *CSRegs) {
       isCalleeSavedRegister(MI.getOperand(0).getReg(), CSRegs))
     return true;
   else if (MI.getOpcode() == ARM::tPOP) {
-    // The first two operands are predicates. The last two are
-    // imp-def and imp-use of SP. Check everything in between.
-    for (int i = 2, e = MI.getNumOperands() - 2; i != e; ++i)
-      if (!isCalleeSavedRegister(MI.getOperand(i).getReg(), CSRegs))
-        return false;
     return true;
+  } else if (MI.getOpcode() == ARM::tMOVr) {
+    unsigned Dst = MI.getOperand(0).getReg();
+    unsigned Src = MI.getOperand(1).getReg();
+    return ((ARM::tGPRRegClass.contains(Src) || Src == ARM::LR) &&
+            ARM::hGPRRegClass.contains(Dst));
   }
   return false;
 }
@@ -322,7 +365,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
                                    MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
   DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const ThumbRegisterInfo *RegInfo =
       static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo());
@@ -330,7 +373,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
       *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo());
 
   unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
-  int NumBytes = (int)MFI->getStackSize();
+  int NumBytes = (int)MFI.getStackSize();
   assert((unsigned)NumBytes >= ArgRegsSaveSize &&
          "ArgRegsSaveSize is included in NumBytes");
   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
@@ -361,7 +404,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF,
       // frame pointer stack slot, the target is ELF and the function has FP, or
       // the target uses var sized objects.
       if (NumBytes) {
-        assert(!MFI->getPristineRegs(MF).test(ARM::R4) &&
+        assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
                "No scratch register to restore SP from FP!");
         emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
                                   TII, *RegInfo);
@@ -405,7 +448,7 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const {
     return true;
 
   // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up.
-  for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo())
+  for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo())
     if (CSI.getReg() == ARM::LR)
       return true;
 
@@ -568,6 +611,19 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB,
   return true;
 }
 
+// Return the first iteraror after CurrentReg which is present in EnabledRegs,
+// or OrderEnd if no further registers are in that set. This does not advance
+// the iterator fiorst, so returns CurrentReg if it is in EnabledRegs.
+template <unsigned SetSize>
+static const unsigned *
+findNextOrderedReg(const unsigned *CurrentReg,
+                   SmallSet<unsigned, SetSize> &EnabledRegs,
+                   const unsigned *OrderEnd) {
+  while (CurrentReg != OrderEnd && !EnabledRegs.count(*CurrentReg))
+    ++CurrentReg;
+  return CurrentReg;
+}
+
 bool Thumb1FrameLowering::
 spillCalleeSavedRegisters(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MI,
@@ -578,29 +634,114 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
   DebugLoc DL;
   const TargetInstrInfo &TII = *STI.getInstrInfo();
+  MachineFunction &MF = *MBB.getParent();
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
+
+  SmallSet<unsigned, 9> LoRegsToSave; // r0-r7, lr
+  SmallSet<unsigned, 4> HiRegsToSave; // r8-r11
+  SmallSet<unsigned, 9> CopyRegs; // Registers which can be used after pushing
+                           // LoRegs for saving HiRegs.
 
-  MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
-  AddDefaultPred(MIB);
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
-    bool isKill = true;
 
-    // Add the callee-saved register as live-in unless it's LR and
-    // @llvm.returnaddress is called. If LR is returned for @llvm.returnaddress
-    // then it's already added to the function and entry block live-in sets.
-    if (Reg == ARM::LR) {
-      MachineFunction &MF = *MBB.getParent();
-      if (MF.getFrameInfo()->isReturnAddressTaken() &&
-          MF.getRegInfo().isLiveIn(Reg))
-        isKill = false;
+    if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
+      LoRegsToSave.insert(Reg);
+    } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
+      HiRegsToSave.insert(Reg);
+    } else {
+      llvm_unreachable("callee-saved register of unexpected class");
+    }
+
+    if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) &&
+        !MF.getRegInfo().isLiveIn(Reg) &&
+        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
+      CopyRegs.insert(Reg);
+  }
+
+  // Unused argument registers can be used for the high register saving.
+  for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3})
+    if (!MF.getRegInfo().isLiveIn(ArgReg))
+      CopyRegs.insert(ArgReg);
+
+  // Push the low registers and lr
+  if (!LoRegsToSave.empty()) {
+    MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH));
+    AddDefaultPred(MIB);
+    for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6, ARM::R7, ARM::LR}) {
+      if (LoRegsToSave.count(Reg)) {
+        bool isKill = !MF.getRegInfo().isLiveIn(Reg);
+        if (isKill)
+          MBB.addLiveIn(Reg);
+
+        MIB.addReg(Reg, getKillRegState(isKill));
+      }
+    }
+    MIB.setMIFlags(MachineInstr::FrameSetup);
+  }
+
+  // Push the high registers. There are no store instructions that can access
+  // these registers directly, so we have to move them to low registers, and
+  // push them. This might take multiple pushes, as it is possible for there to
+  // be fewer low registers available than high registers which need saving.
+
+  // These are in reverse order so that in the case where we need to use
+  // multiple PUSH instructions, the order of the registers on the stack still
+  // matches the unwind info. They need to be swicthed back to ascending order
+  // before adding to the PUSH instruction.
+  static const unsigned AllCopyRegs[] = {ARM::LR, ARM::R7, ARM::R6,
+                                         ARM::R5, ARM::R4, ARM::R3,
+                                         ARM::R2, ARM::R1, ARM::R0};
+  static const unsigned AllHighRegs[] = {ARM::R11, ARM::R10, ARM::R9, ARM::R8};
+
+  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
+  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+
+  // Find the first register to save.
+  const unsigned *HiRegToSave = findNextOrderedReg(
+      std::begin(AllHighRegs), HiRegsToSave, AllHighRegsEnd);
+
+  while (HiRegToSave != AllHighRegsEnd) {
+    // Find the first low register to use.
+    const unsigned *CopyReg =
+        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+
+    // Create the PUSH, but don't insert it yet (the MOVs need to come first).
+    MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH));
+    AddDefaultPred(PushMIB);
+
+    SmallVector<unsigned, 4> RegsToPush;
+    while (HiRegToSave != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+      if (HiRegsToSave.count(*HiRegToSave)) {
+        bool isKill = !MF.getRegInfo().isLiveIn(*HiRegToSave);
+        if (isKill)
+          MBB.addLiveIn(*HiRegToSave);
+
+        // Emit a MOV from the high reg to the low reg.
+        MachineInstrBuilder MIB =
+            BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
+        MIB.addReg(*CopyReg, RegState::Define);
+        MIB.addReg(*HiRegToSave, getKillRegState(isKill));
+        AddDefaultPred(MIB);
+
+        // Record the register that must be added to the PUSH.
+        RegsToPush.push_back(*CopyReg);
+
+        CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
+        HiRegToSave =
+            findNextOrderedReg(++HiRegToSave, HiRegsToSave, AllHighRegsEnd);
+      }
     }
 
-    if (isKill)
-      MBB.addLiveIn(Reg);
+    // Add the low registers to the PUSH, in ascending order.
+    for (unsigned Reg : reverse(RegsToPush))
+      PushMIB.addReg(Reg, RegState::Kill);
 
-    MIB.addReg(Reg, getKillRegState(isKill));
+    // Insert the PUSH instruction after the MOVs.
+    MBB.insert(MI, PushMIB);
   }
-  MIB.setMIFlags(MachineInstr::FrameSetup);
+
   return true;
 }
 
@@ -615,15 +756,101 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   MachineFunction &MF = *MBB.getParent();
   ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
   const TargetInstrInfo &TII = *STI.getInstrInfo();
+  const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
+      MF.getSubtarget().getRegisterInfo());
 
   bool isVarArg = AFI->getArgRegsSaveSize() > 0;
   DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
+
+  SmallSet<unsigned, 9> LoRegsToRestore;
+  SmallSet<unsigned, 4> HiRegsToRestore;
+  // Low registers (r0-r7) which can be used to restore the high registers.
+  SmallSet<unsigned, 9> CopyRegs;
+
+  for (CalleeSavedInfo I : CSI) {
+    unsigned Reg = I.getReg();
+
+    if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) {
+      LoRegsToRestore.insert(Reg);
+    } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) {
+      HiRegsToRestore.insert(Reg);
+    } else {
+      llvm_unreachable("callee-saved register of unexpected class");
+    }
+
+    // If this is a low register not used as the frame pointer, we may want to
+    // use it for restoring the high registers.
+    if ((ARM::tGPRRegClass.contains(Reg)) &&
+        !(hasFP(MF) && Reg == RegInfo->getFrameRegister(MF)))
+      CopyRegs.insert(Reg);
+  }
+
+  // If this is a return block, we may be able to use some unused return value
+  // registers for restoring the high regs.
+  auto Terminator = MBB.getFirstTerminator();
+  if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) {
+    CopyRegs.insert(ARM::R0);
+    CopyRegs.insert(ARM::R1);
+    CopyRegs.insert(ARM::R2);
+    CopyRegs.insert(ARM::R3);
+    for (auto Op : Terminator->implicit_operands()) {
+      if (Op.isReg())
+        CopyRegs.erase(Op.getReg());
+    }
+  }
+
+  static const unsigned AllCopyRegs[] = {ARM::R0, ARM::R1, ARM::R2, ARM::R3,
+                                         ARM::R4, ARM::R5, ARM::R6, ARM::R7};
+  static const unsigned AllHighRegs[] = {ARM::R8, ARM::R9, ARM::R10, ARM::R11};
+
+  const unsigned *AllCopyRegsEnd = std::end(AllCopyRegs);
+  const unsigned *AllHighRegsEnd = std::end(AllHighRegs);
+
+  // Find the first register to restore.
+  auto HiRegToRestore = findNextOrderedReg(std::begin(AllHighRegs),
+                                           HiRegsToRestore, AllHighRegsEnd);
+
+  while (HiRegToRestore != AllHighRegsEnd) {
+    assert(!CopyRegs.empty());
+    // Find the first low register to use.
+    auto CopyReg =
+        findNextOrderedReg(std::begin(AllCopyRegs), CopyRegs, AllCopyRegsEnd);
+
+    // Create the POP instruction.
+    MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP));
+    AddDefaultPred(PopMIB);
+
+    while (HiRegToRestore != AllHighRegsEnd && CopyReg != AllCopyRegsEnd) {
+      // Add the low register to the POP.
+      PopMIB.addReg(*CopyReg, RegState::Define);
+
+      // Create the MOV from low to high register.
+      MachineInstrBuilder MIB =
+          BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr));
+      MIB.addReg(*HiRegToRestore, RegState::Define);
+      MIB.addReg(*CopyReg, RegState::Kill);
+      AddDefaultPred(MIB);
+
+      CopyReg = findNextOrderedReg(++CopyReg, CopyRegs, AllCopyRegsEnd);
+      HiRegToRestore =
+          findNextOrderedReg(++HiRegToRestore, HiRegsToRestore, AllHighRegsEnd);
+    }
+  }
+
+
+
+
   MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP));
   AddDefaultPred(MIB);
 
   bool NeedsPop = false;
   for (unsigned i = CSI.size(); i != 0; --i) {
     unsigned Reg = CSI[i-1].getReg();
+
+    // High registers (excluding lr) have already been dealt with
+    if (!(ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR))
+      continue;
+
     if (Reg == ARM::LR) {
       if (MBB.succ_empty()) {
         // Special epilogue for vararg functions. See emitEpilogue
diff --git a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
index 159731d8fc72..4b4fbaab28d9 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -83,7 +83,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (I != MBB.end()) DL = I->getDebugLoc();
 
     MachineFunction &MF = *MBB.getParent();
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineFrameInfo &MFI = MF.getFrameInfo();
     MachineMemOperand *MMO = MF.getMachineMemOperand(
         MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
@@ -109,7 +109,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
     if (I != MBB.end()) DL = I->getDebugLoc();
 
     MachineFunction &MF = *MBB.getParent();
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
+    MachineFrameInfo &MFI = MF.getFrameInfo();
     MachineMemOperand *MMO = MF.getMachineMemOperand(
         MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
         MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 0c7055551632..d01fc8c40ddf 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -38,10 +38,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Thumb IT blocks insertion pass";
     }
 
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index e2e6dafd218a..1c731d669eda 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -130,7 +130,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,
       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
@@ -170,7 +170,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                      const TargetRegisterClass *RC,
                      const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,
       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
diff --git a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
index c4fdb9b3147d..8208e7e24770 100644
--- a/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/contrib/llvm/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -148,10 +148,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Thumb2 instruction size reduction pass";
     }
 
@@ -430,6 +430,10 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI,
     if (!MBB.getParent()->getFunction()->optForMinSize())
       return false;
 
+    if (!MI->hasOneMemOperand() ||
+        (*MI->memoperands_begin())->getAlignment() < 4)
+      return false;
+
     // We're creating a completely different type of load/store - LDM from LDR.
     // For this reason we can't reuse the logic at the end of this function; we
     // have to implement the MI building here.
@@ -651,7 +655,7 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI,
       case ARM::t2ADDSri: {
         if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, IsSelfLoop))
           return true;
-        // fallthrough
+        LLVM_FALLTHROUGH;
       }
       case ARM::t2ADDSrr:
         return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, IsSelfLoop);
diff --git a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
index 6c26c8843865..2efd63b84a2c 100644
--- a/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/ARM/ThumbRegisterInfo.cpp
@@ -126,6 +126,7 @@ static void emitThumbRegPlusImmInReg(
     bool CanChangeCC, const TargetInstrInfo &TII,
     const ARMBaseRegisterInfo &MRI, unsigned MIFlags = MachineInstr::NoFlags) {
   MachineFunction &MF = *MBB.getParent();
+  const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>();
   bool isHigh = !isARMLowRegister(DestReg) ||
                 (BaseReg != 0 && !isARMLowRegister(BaseReg));
   bool isSub = false;
@@ -154,6 +155,9 @@ static void emitThumbRegPlusImmInReg(
     AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tRSB), LdReg))
         .addReg(LdReg, RegState::Kill)
         .setMIFlags(MIFlags);
+  } else if (ST.genExecuteOnly()) {
+    BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), LdReg)
+      .addImm(NumBytes).setMIFlags(MIFlags);
   } else
     MRI.emitLoadConstPool(MBB, MBBI, dl, LdReg, 0, NumBytes, ARMCC::AL, 0,
                           MIFlags);
@@ -511,10 +515,10 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   unsigned FrameReg = ARM::SP;
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
-               MF.getFrameInfo()->getStackSize() + SPAdj;
+  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
+               MF.getFrameInfo().getStackSize() + SPAdj;
 
-  if (MF.getFrameInfo()->hasVarSizedObjects()) {
+  if (MF.getFrameInfo().hasVarSizedObjects()) {
     assert(SPAdj == 0 && STI.getFrameLowering()->hasFP(MF) && "Unexpected");
     // There are alloca()'s in this function, must reference off the frame
     // pointer or base pointer instead.
@@ -534,7 +538,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     assert(STI.getFrameLowering()->hasReservedCallFrame(MF) &&
            "Cannot use SP to access the emergency spill slot in "
            "functions without a reserved call frame");
-    assert(!MF.getFrameInfo()->hasVarSizedObjects() &&
+    assert(!MF.getFrameInfo().hasVarSizedObjects() &&
            "Cannot use SP to access the emergency spill slot in "
            "functions with variable sized frame objects");
   }
@@ -570,7 +574,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     unsigned TmpReg = MI.getOperand(0).getReg();
     bool UseRR = false;
     if (Opcode == ARM::tLDRspi) {
-      if (FrameReg == ARM::SP)
+      if (FrameReg == ARM::SP || STI.genExecuteOnly())
         emitThumbRegPlusImmInReg(MBB, II, dl, TmpReg, FrameReg,
                                  Offset, false, TII, *this);
       else {
@@ -594,7 +598,7 @@ void ThumbRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       bool UseRR = false;
 
       if (Opcode == ARM::tSTRspi) {
-        if (FrameReg == ARM::SP)
+        if (FrameReg == ARM::SP || STI.genExecuteOnly())
           emitThumbRegPlusImmInReg(MBB, II, dl, VReg, FrameReg,
                                    Offset, false, TII, *this);
         else {
diff --git a/contrib/llvm/lib/Target/AVR/AVR.h b/contrib/llvm/lib/Target/AVR/AVR.h
index 041c77cfcb9d..8e5cc5360ad4 100644
--- a/contrib/llvm/lib/Target/AVR/AVR.h
+++ b/contrib/llvm/lib/Target/AVR/AVR.h
@@ -27,9 +27,15 @@ FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
                                CodeGenOpt::Level OptLevel);
 FunctionPass *createAVRExpandPseudoPass();
 FunctionPass *createAVRFrameAnalyzerPass();
+FunctionPass *createAVRInstrumentFunctionsPass();
+FunctionPass *createAVRRelaxMemPass();
 FunctionPass *createAVRDynAllocaSRPass();
 FunctionPass *createAVRBranchSelectionPass();
 
+void initializeAVRExpandPseudoPass(PassRegistry&);
+void initializeAVRInstrumentFunctionsPass(PassRegistry&);
+void initializeAVRRelaxMemPass(PassRegistry&);
+
 /// Contains the AVR backend.
 namespace AVR {
 
diff --git a/contrib/llvm/lib/Target/AVR/AVR.td b/contrib/llvm/lib/Target/AVR/AVR.td
index 27cf212704f0..d03b983aa70b 100644
--- a/contrib/llvm/lib/Target/AVR/AVR.td
+++ b/contrib/llvm/lib/Target/AVR/AVR.td
@@ -16,493 +16,10 @@
 include "llvm/Target/Target.td"
 
 //===---------------------------------------------------------------------===//
-// AVR Subtarget Features.
+// AVR Device Definitions
 //===---------------------------------------------------------------------===//
 
-// :TODO: Implement the skip errata, see `gcc/config/avr/avr-arch.h` for details
-// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD.
-//        In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
-//        avr2 (with SRAM) adds the rest of the variants.
-// :TODO: s/AVRTiny/Tiny
-
-
-// A feature set aggregates features, grouping them. We don't want to create a
-// new member in AVRSubtarget (to store a value) for each set because we do not
-// care if the set is supported, only the subfeatures inside the set. We fix
-// this by simply setting the same dummy member for all feature sets, which is
-// then ignored.
-class FeatureSet<string name, string desc, list<SubtargetFeature> i>
-  : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
-
-// A family of microcontrollers, defining a set of supported features.
-class Family<string name, list<SubtargetFeature> i>
-  : FeatureSet<name, !strconcat("The device is a part of the ",
-               name, " family"), i>;
-
-// The device has SRAM, and supports the bare minimum of
-// SRAM-relevant instructions.
-//
-// These are:
-// LD - all 9 variants
-// ST - all 9 variants
-// LDD - two variants for Y and Z
-// STD - two variants for Y and Z
-// `LDS Rd, K`
-// `STS k, Rr`
-// `PUSH`/`POP`
-def FeatureSRAM           : SubtargetFeature<"sram", "m_hasSRAM", "true",
-                                  "The device has random access memory">;
-
-// The device supports the `JMP k` and `CALL k` instructions.
-def FeatureJMPCALL        : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
-                                  "The device supports the `JMP` and "
-                                  "`CALL` instructions">;
-
-
-// The device supports the indirect branches `IJMP` and `ICALL`.
-def FeatureIJMPCALL       : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
-                                  "true",
-                                  "The device supports `IJMP`/`ICALL`"
-                                  "instructions">;
-
-// The device supports the extended indirect branches `EIJMP` and `EICALL`.
-def FeatureEIJMPCALL      : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
-                                  "true", "The device supports the "
-                                  "`EIJMP`/`EICALL` instructions">;
-
-// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
-def FeatureADDSUBIW       : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
-                                  "true", "Enable 16-bit register-immediate "
-                                  "addition and subtraction instructions">;
-
-// The device has an 8-bit stack pointer (SP) register.
-def FeatureSmallStack     : SubtargetFeature<"smallstack", "m_hasSmallStack",
-                                  "true", "The device has an 8-bit "
-                                  "stack pointer">;
-
-// The device supports the 16-bit GPR pair MOVW instruction.
-def FeatureMOVW           : SubtargetFeature<"movw", "m_hasMOVW", "true",
-                                  "The device supports the 16-bit MOVW "
-                                  "instruction">;
-
-// The device supports the `LPM` instruction, with implied destination being r0.
-def FeatureLPM            : SubtargetFeature<"lpm", "m_hasLPM", "true",
-                                  "The device supports the `LPM` instruction">;
-
-// The device supports the `LPM Rd, Z[+] instruction.
-def FeatureLPMX           : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
-                                  "The device supports the `LPM Rd, Z[+]` "
-                                  "instruction">;
-
-// The device supports the `ELPM` instruction.
-def FeatureELPM           : SubtargetFeature<"elpm", "m_hasELPM", "true",
-                                  "The device supports the ELPM instruction">;
-
-// The device supports the `ELPM Rd, Z[+]` instructions.
-def FeatureELPMX          : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
-                                  "The device supports the `ELPM Rd, Z[+]` "
-                                  "instructions">;
-
-// The device supports the `SPM` instruction.
-def FeatureSPM            : SubtargetFeature<"spm", "m_hasSPM", "true",
-                                  "The device supports the `SPM` instruction">;
-
-// The device supports the `SPM Z+` instruction.
-def FeatureSPMX           : SubtargetFeature<"spmx", "m_hasSPMX", "true",
-                                  "The device supports the `SPM Z+` "
-                                  "instruction">;
-
-// The device supports the `DES k` instruction.
-def FeatureDES            : SubtargetFeature<"des", "m_hasDES", "true",
-                                  "The device supports the `DES k` encryption "
-                                  "instruction">;
-
-// The device supports the Read-Write-Modify instructions
-// XCH, LAS, LAC, and LAT.
-def FeatureRMW            : SubtargetFeature<"rmw", "m_supportsRMW", "true",
-                                  "The device supports the read-write-modify "
-                                  "instructions: XCH, LAS, LAC, LAT">;
-
-// The device supports the `[F]MUL[S][U]` family of instructions.
-def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
-                                  "true", "The device supports the "
-                                  "multiplication instructions">;
-
-// The device supports the `BREAK` instruction.
-def FeatureBREAK          : SubtargetFeature<"break", "m_hasBREAK", "true",
-                                  "The device supports the `BREAK` debugging "
-                                  "instruction">;
-
-// The device has instruction encodings specific to the Tiny core.
-def FeatureTinyEncoding   : SubtargetFeature<"tinyencoding",
-                                  "m_hasTinyEncoding", "true",
-                                  "The device has Tiny core specific "
-                                  "instruction encodings">;
-
-class ELFArch<string name>  : SubtargetFeature<"", "ELFArch",
-                                    !strconcat("ELF::",name), "">;
-
-// ELF e_flags architecture values
-def ELFArchAVR1    : ELFArch<"EF_AVR_ARCH_AVR1">;
-def ELFArchAVR2    : ELFArch<"EF_AVR_ARCH_AVR2">;
-def ELFArchAVR25   : ELFArch<"EF_AVR_ARCH_AVR25">;
-def ELFArchAVR3    : ELFArch<"EF_AVR_ARCH_AVR3">;
-def ELFArchAVR31   : ELFArch<"EF_AVR_ARCH_AVR31">;
-def ELFArchAVR35   : ELFArch<"EF_AVR_ARCH_AVR35">;
-def ELFArchAVR4    : ELFArch<"EF_AVR_ARCH_AVR4">;
-def ELFArchAVR5    : ELFArch<"EF_AVR_ARCH_AVR5">;
-def ELFArchAVR51   : ELFArch<"EF_AVR_ARCH_AVR51">;
-def ELFArchAVR6    : ELFArch<"EF_AVR_ARCH_AVR6">;
-def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
-def ELFArchXMEGA1  : ELFArch<"EF_AVR_ARCH_XMEGA1">;
-def ELFArchXMEGA2  : ELFArch<"EF_AVR_ARCH_XMEGA2">;
-def ELFArchXMEGA3  : ELFArch<"EF_AVR_ARCH_XMEGA3">;
-def ELFArchXMEGA4  : ELFArch<"EF_AVR_ARCH_XMEGA4">;
-def ELFArchXMEGA5  : ELFArch<"EF_AVR_ARCH_XMEGA5">;
-def ELFArchXMEGA6  : ELFArch<"EF_AVR_ARCH_XMEGA6">;
-def ELFArchXMEGA7  : ELFArch<"EF_AVR_ARCH_XMEGA7">;
-
-//===---------------------------------------------------------------------===//
-// AVR Families
-//===---------------------------------------------------------------------===//
-
-// The device has at least the bare minimum that **every** single AVR
-// device should have.
-def FamilyAVR0           : Family<"avr0", []>;
-
-def FamilyAVR1           : Family<"avr1", [FamilyAVR0, FeatureLPM]>;
-
-def FamilyAVR2           : Family<"avr2",
-                                 [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
-                                  FeatureSRAM]>;
-
-def FamilyAVR25          : Family<"avr25",
-                                 [FamilyAVR2, FeatureMOVW, FeatureLPMX,
-                                  FeatureSPM, FeatureBREAK]>;
-
-def FamilyAVR3           : Family<"avr3",
-                                 [FamilyAVR2, FeatureJMPCALL]>;
-
-def FamilyAVR31          : Family<"avr31",
-                                 [FamilyAVR3, FeatureELPM]>;
-
-def FamilyAVR35          : Family<"avr35",
-                                 [FamilyAVR3, FeatureMOVW, FeatureLPMX,
-                                  FeatureSPM, FeatureBREAK]>;
-
-def FamilyAVR4           : Family<"avr4",
-                                 [FamilyAVR2, FeatureMultiplication,
-                                  FeatureMOVW, FeatureLPMX, FeatureSPM,
-                                  FeatureBREAK]>;
-
-def FamilyAVR5           : Family<"avr5",
-                                 [FamilyAVR3, FeatureMultiplication,
-                                  FeatureMOVW, FeatureLPMX, FeatureSPM,
-                                  FeatureBREAK]>;
-
-def FamilyAVR51          : Family<"avr51",
-                                 [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
-
-def FamilyAVR6           : Family<"avr6",
-                                 [FamilyAVR51]>;
-
-def FamilyAVRTiny        : Family<"avrtiny",
-                                 [FamilyAVR0, FeatureBREAK, FeatureSRAM,
-                                  FeatureTinyEncoding]>;
-
-def FamilyXMEGA          : Family<"xmega",
-                                 [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX,
-                                  FeatureDES]>;
-
-def FamilyXMEGAU         : Family<"xmegau",
-                                  [FamilyXMEGA, FeatureRMW]>;
-
-def FeatureSetSpecial    : FeatureSet<"special",
-                                      "Enable use of the entire instruction "
-                                      "set - used for debugging",
-                                      [FeatureSRAM, FeatureJMPCALL,
-                                       FeatureIJMPCALL, FeatureEIJMPCALL,
-                                       FeatureADDSUBIW, FeatureMOVW,
-                                       FeatureLPM, FeatureLPMX, FeatureELPM,
-                                       FeatureELPMX, FeatureSPM, FeatureSPMX,
-                                       FeatureDES, FeatureRMW,
-                                       FeatureMultiplication, FeatureBREAK]>;
-
-//===---------------------------------------------------------------------===//
-// AVR microcontrollers supported.
-//===---------------------------------------------------------------------===//
-
-class Device<string Name, Family Fam, ELFArch Arch,
-             list<SubtargetFeature> ExtraFeatures = []>
-  : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
-
-// Generic MCUs
-// Note that several versions of GCC has strange ELF architecture
-// settings for backwards compatibility - see `gas/config/tc-avr.c`
-// in AVR binutils. We do not replicate this.
-def : Device<"avr1",      FamilyAVR1,    ELFArchAVR1>;
-def : Device<"avr2",      FamilyAVR2,    ELFArchAVR2>;
-def : Device<"avr25",     FamilyAVR25,   ELFArchAVR25>;
-def : Device<"avr3",      FamilyAVR3,    ELFArchAVR3>;
-def : Device<"avr31",     FamilyAVR31,   ELFArchAVR31>;
-def : Device<"avr35",     FamilyAVR35,   ELFArchAVR35>;
-def : Device<"avr4",      FamilyAVR4,    ELFArchAVR4>;
-def : Device<"avr5",      FamilyAVR5,    ELFArchAVR5>;
-def : Device<"avr51",     FamilyAVR51,   ELFArchAVR51>;
-def : Device<"avr6",      FamilyAVR6,    ELFArchAVR6>;
-def : Device<"avrxmega1", FamilyXMEGA,   ELFArchXMEGA1>;
-def : Device<"avrxmega2", FamilyXMEGA,   ELFArchXMEGA2>;
-def : Device<"avrxmega3", FamilyXMEGA,   ELFArchXMEGA3>;
-def : Device<"avrxmega4", FamilyXMEGA,   ELFArchXMEGA4>;
-def : Device<"avrxmega5", FamilyXMEGA,   ELFArchXMEGA5>;
-def : Device<"avrxmega6", FamilyXMEGA,   ELFArchXMEGA6>;
-def : Device<"avrxmega7", FamilyXMEGA,   ELFArchXMEGA7>;
-def : Device<"avrtiny",   FamilyAVRTiny, ELFArchAVRTiny>;
-
-// Specific MCUs
-def : Device<"at90s1200",          FamilyAVR0, ELFArchAVR1>;
-def : Device<"attiny11",           FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny12",           FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny15",           FamilyAVR1, ELFArchAVR1>;
-def : Device<"attiny28",           FamilyAVR1, ELFArchAVR1>;
-def : Device<"at90s2313",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2323",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2333",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s2343",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny22",           FamilyAVR2, ELFArchAVR2>;
-def : Device<"attiny26",           FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
-def : Device<"at86rf401",          FamilyAVR2, ELFArchAVR25,
-             [FeatureMOVW, FeatureLPMX]>;
-def : Device<"at90s4414",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4433",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s4434",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8515",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90c8534",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"at90s8535",          FamilyAVR2, ELFArchAVR2>;
-def : Device<"ata5272",            FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny13a",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313",         FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny2313a",        FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny24a",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny4313",         FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny44",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny44a",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny84a",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny25",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny45",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny85",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny261a",         FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny461",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny461a",         FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny861a",         FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny87",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny43u",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny48",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny88",           FamilyAVR25, ELFArchAVR25>;
-def : Device<"attiny828",          FamilyAVR25, ELFArchAVR25>;
-def : Device<"at43usb355",         FamilyAVR3,  ELFArchAVR3>;
-def : Device<"at76c711",           FamilyAVR3,  ELFArchAVR3>;
-def : Device<"atmega103",          FamilyAVR31, ELFArchAVR31>;
-def : Device<"at43usb320",         FamilyAVR31, ELFArchAVR31>;
-def : Device<"attiny167",          FamilyAVR35, ELFArchAVR35>;
-def : Device<"at90usb82",          FamilyAVR35, ELFArchAVR35>;
-def : Device<"at90usb162",         FamilyAVR35, ELFArchAVR35>;
-def : Device<"ata5505",            FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8u2",          FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega16u2",         FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega32u2",         FamilyAVR35, ELFArchAVR35>;
-def : Device<"attiny1634",         FamilyAVR35, ELFArchAVR35>;
-def : Device<"atmega8",            FamilyAVR4,  ELFArchAVR4>; // FIXME: family may be wrong
-def : Device<"ata6289",            FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega8a",           FamilyAVR4,  ELFArchAVR4>;
-def : Device<"ata6285",            FamilyAVR4,  ELFArchAVR4>;
-def : Device<"ata6286",            FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega48",           FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega48a",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega48pa",         FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega48p",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega88",           FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega88a",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega88p",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega88pa",         FamilyAVR4,  ELFArchAVR4>;
-def : Device<"atmega8515",         FamilyAVR2,  ELFArchAVR4,
-             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega8535",         FamilyAVR2,  ELFArchAVR4,
-             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega8hva",         FamilyAVR4,  ELFArchAVR4>;
-def : Device<"at90pwm1",           FamilyAVR4,  ELFArchAVR4>;
-def : Device<"at90pwm2",           FamilyAVR4,  ELFArchAVR4>;
-def : Device<"at90pwm2b",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"at90pwm3",           FamilyAVR4,  ELFArchAVR4>;
-def : Device<"at90pwm3b",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"at90pwm81",          FamilyAVR4,  ELFArchAVR4>;
-def : Device<"ata5790",            FamilyAVR5,  ELFArchAVR5>;
-def : Device<"ata5795",            FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16",           FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16a",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega161",          FamilyAVR3,  ELFArchAVR5,
-             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega162",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega163",          FamilyAVR3,  ELFArchAVR5,
-             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
-def : Device<"atmega164a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega164p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega164pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega165",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega165a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega165p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega165pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega168",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega168a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega168p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega168pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega169",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega169a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega169p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega169pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32",           FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32a",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega323",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega324a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega324p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega324pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega325",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega325a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega325p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega325pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3250",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3250a",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3250p",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3250pa",       FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega328",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega328p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega329",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega329a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega329p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega329pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3290",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3290a",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3290p",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega3290pa",       FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega406",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega64",           FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega64a",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega640",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega644",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega644a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega644p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega644pa",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega645",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega645a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega645p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega649",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega649a",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega649p",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega6450",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega6450a",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega6450p",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega6490",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega6490a",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega6490p",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega64rfr2",       FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega644rfr2",      FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16hva",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16hva2",       FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16hvb",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16hvbrevb",    FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32hvb",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32hvbrevb",    FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega64hve",        FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90can32",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90can64",          FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90pwm161",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90pwm216",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90pwm316",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32c1",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega64c1",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16m1",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32m1",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega64m1",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega16u4",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32u4",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega32u6",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90usb646",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90usb647",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at90scr100",         FamilyAVR5,  ELFArchAVR5>;
-def : Device<"at94k",              FamilyAVR3,  ELFArchAVR5,
-             [FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
-def : Device<"m3000",              FamilyAVR5,  ELFArchAVR5>;
-def : Device<"atmega128",          FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128a",         FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1280",         FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1281",         FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284",         FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284p",        FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128rfa1",      FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega128rfr2",      FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega1284rfr2",     FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90can128",         FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90usb1286",        FamilyAVR51, ELFArchAVR51>;
-def : Device<"at90usb1287",        FamilyAVR51, ELFArchAVR51>;
-def : Device<"atmega2560",         FamilyAVR6,  ELFArchAVR6>;
-def : Device<"atmega2561",         FamilyAVR6,  ELFArchAVR6>;
-def : Device<"atmega256rfr2",      FamilyAVR6,  ELFArchAVR6>;
-def : Device<"atmega2564rfr2",     FamilyAVR6,  ELFArchAVR6>;
-def : Device<"atxmega16a4",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega16a4u",       FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16c4",        FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega16d4",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32a4",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32a4u",       FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32c4",        FamilyXMEGAU, ELFArchXMEGA2>;
-def : Device<"atxmega32d4",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32e5",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega16e5",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega8e5",         FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega32x1",        FamilyXMEGA, ELFArchXMEGA2>;
-def : Device<"atxmega64a3",        FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64a3u",       FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64a4u",       FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64b1",        FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64b3",        FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64c3",        FamilyXMEGAU, ELFArchXMEGA4>;
-def : Device<"atxmega64d3",        FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64d4",        FamilyXMEGA, ELFArchXMEGA4>;
-def : Device<"atxmega64a1",        FamilyXMEGA, ELFArchXMEGA5>;
-def : Device<"atxmega64a1u",       FamilyXMEGAU, ELFArchXMEGA5>;
-def : Device<"atxmega128a3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128b1",       FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128b3",       FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128c3",       FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega128d3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128d4",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega192a3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega192a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega192c3",       FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega192d3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256a3b",      FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega256a3bu",     FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256c3",       FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega256d3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega384c3",       FamilyXMEGAU, ELFArchXMEGA6>;
-def : Device<"atxmega384d3",       FamilyXMEGA, ELFArchXMEGA6>;
-def : Device<"atxmega128a1",       FamilyXMEGA, ELFArchXMEGA7>;
-def : Device<"atxmega128a1u",      FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"atxmega128a4u",      FamilyXMEGAU, ELFArchXMEGA7>;
-def : Device<"attiny4",            FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny5",            FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny9",            FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny10",           FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny20",           FamilyAVRTiny, ELFArchAVRTiny>;
-def : Device<"attiny40",           FamilyAVRTiny, ELFArchAVRTiny>;
+include "AVRDevices.td"
 
 //===---------------------------------------------------------------------===//
 // Register File Description
@@ -528,36 +45,37 @@ include "AVRCallingConv.td"
 // Assembly Printers
 //===---------------------------------------------------------------------===//
 
-// def AVRAsmWriter : AsmWriter {
-//  string AsmWriterClassName = "InstPrinter";
-//  bit isMCAsmWriter = 1;
-// }
+def AVRAsmWriter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+ bit isMCAsmWriter = 1;
+}
 
 //===---------------------------------------------------------------------===//
 // Assembly Parsers
 //===---------------------------------------------------------------------===//
 
-// def AVRAsmParser : AsmParser {
-//   let ShouldEmitMatchRegisterName = 1;
-//   let ShouldEmitMatchRegisterAltName = 1;
-// }
+def AVRAsmParser : AsmParser {
+  let ShouldEmitMatchRegisterName = 1;
+  let ShouldEmitMatchRegisterAltName = 1;
+}
 
-// def AVRAsmParserVariant : AsmParserVariant {
-//   int Variant = 0;
-//
-//   // Recognize hard coded registers.
-//   string RegisterPrefix = "$";
-// }
+def AVRAsmParserVariant : AsmParserVariant {
+  int Variant = 0;
+
+  // Recognize hard coded registers.
+  string RegisterPrefix = "$";
+  string TokenizingCharacters = "+";
+}
 
 //===---------------------------------------------------------------------===//
 // Target Declaration
 //===---------------------------------------------------------------------===//
 
 def AVR : Target {
-   let InstructionSet         = AVRInstrInfo;
-//   let AssemblyWriters        = [AVRAsmWriter];
-//
-//   let AssemblyParsers        = [AVRAsmParser];
-//   let AssemblyParserVariants = [AVRAsmParserVariant];
+  let InstructionSet         = AVRInstrInfo;
+  let AssemblyWriters        = [AVRAsmWriter];
+
+  let AssemblyParsers        = [AVRAsmParser];
+  let AssemblyParserVariants = [AVRAsmParserVariant];
 }
 
diff --git a/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp b/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
new file mode 100644
index 000000000000..4afdd3a0ec08
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRAsmPrinter.cpp
@@ -0,0 +1,184 @@
+//===-- AVRAsmPrinter.cpp - AVR LLVM assembly writer ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format AVR assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "AVRMCInstLower.h"
+#include "AVRSubtarget.h"
+#include "InstPrinter/AVRInstPrinter.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Target/TargetSubtargetInfo.h"
+
+#define DEBUG_TYPE "avr-asm-printer"
+
+namespace llvm {
+
+/// An AVR assembly code printer.
+class AVRAsmPrinter : public AsmPrinter {
+public:
+  AVRAsmPrinter(TargetMachine &TM,
+                std::unique_ptr<MCStreamer> Streamer)
+      : AsmPrinter(TM, std::move(Streamer)), MRI(*TM.getMCRegisterInfo()) { }
+
+  StringRef getPassName() const override { return "AVR Assembly Printer"; }
+
+  void printOperand(const MachineInstr *MI, unsigned OpNo, raw_ostream &O,
+                    const char *Modifier = 0);
+
+  bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                       unsigned AsmVariant, const char *ExtraCode,
+                       raw_ostream &O) override;
+
+  bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
+                             unsigned AsmVariant, const char *ExtraCode,
+                             raw_ostream &O) override;
+
+  void EmitInstruction(const MachineInstr *MI) override;
+
+private:
+  const MCRegisterInfo &MRI;
+};
+
+void AVRAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
+                                 raw_ostream &O, const char *Modifier) {
+  const MachineOperand &MO = MI->getOperand(OpNo);
+
+  switch (MO.getType()) {
+  case MachineOperand::MO_Register:
+    O << AVRInstPrinter::getPrettyRegisterName(MO.getReg(), MRI);
+    break;
+  case MachineOperand::MO_Immediate:
+    O << MO.getImm();
+    break;
+  case MachineOperand::MO_GlobalAddress:
+    O << getSymbol(MO.getGlobal());
+    break;
+  case MachineOperand::MO_ExternalSymbol:
+    O << *GetExternalSymbolSymbol(MO.getSymbolName());
+    break;
+  case MachineOperand::MO_MachineBasicBlock:
+    O << *MO.getMBB()->getSymbol();
+    break;
+  default:
+    llvm_unreachable("Not implemented yet!");
+  }
+}
+
+bool AVRAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
+                                    unsigned AsmVariant, const char *ExtraCode,
+                                    raw_ostream &O) {
+  // Default asm printer can only deal with some extra codes,
+  // so try it first.
+  bool Error = AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O);
+
+  if (Error && ExtraCode && ExtraCode[0]) {
+    if (ExtraCode[1] != 0)
+      return true; // Unknown modifier.
+
+    if (ExtraCode[0] >= 'A' && ExtraCode[0] <= 'Z') {
+      const MachineOperand &RegOp = MI->getOperand(OpNum);
+
+      assert(RegOp.isReg() && "Operand must be a register when you're"
+                              "using 'A'..'Z' operand extracodes.");
+      unsigned Reg = RegOp.getReg();
+
+      unsigned ByteNumber = ExtraCode[0] - 'A';
+
+      unsigned OpFlags = MI->getOperand(OpNum - 1).getImm();
+      unsigned NumOpRegs = InlineAsm::getNumOperandRegisters(OpFlags);
+      (void)NumOpRegs;
+
+      const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
+      const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
+      unsigned BytesPerReg = TRI.getMinimalPhysRegClass(Reg)->getSize();
+      assert(BytesPerReg <= 2 && "Only 8 and 16 bit regs are supported.");
+
+      unsigned RegIdx = ByteNumber / BytesPerReg;
+      assert(RegIdx < NumOpRegs && "Multibyte index out of range.");
+
+      Reg = MI->getOperand(OpNum + RegIdx).getReg();
+
+      if (BytesPerReg == 2) {
+        Reg = TRI.getSubReg(Reg, ByteNumber % BytesPerReg ? AVR::sub_hi
+                                                          : AVR::sub_lo);
+      }
+
+      O << AVRInstPrinter::getPrettyRegisterName(Reg, MRI);
+      return false;
+    }
+  }
+
+  printOperand(MI, OpNum, O);
+
+  return false;
+}
+
+bool AVRAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
+                                          unsigned OpNum, unsigned AsmVariant,
+                                          const char *ExtraCode,
+                                          raw_ostream &O) {
+  if (ExtraCode && ExtraCode[0]) {
+    llvm_unreachable("This branch is not implemented yet");
+  }
+
+  const MachineOperand &MO = MI->getOperand(OpNum);
+  (void)MO;
+  assert(MO.isReg() && "Unexpected inline asm memory operand");
+
+  // TODO: We can look up the alternative name for the register if it's given.
+  if (MI->getOperand(OpNum).getReg() == AVR::R31R30) {
+    O << "Z";
+  } else {
+    assert(MI->getOperand(OpNum).getReg() == AVR::R29R28 &&
+           "Wrong register class for memory operand.");
+    O << "Y";
+  }
+
+  // If NumOpRegs == 2, then we assume it is product of a FrameIndex expansion
+  // and the second operand is an Imm.
+  unsigned OpFlags = MI->getOperand(OpNum - 1).getImm();
+  unsigned NumOpRegs = InlineAsm::getNumOperandRegisters(OpFlags);
+
+  if (NumOpRegs == 2) {
+    O << '+' << MI->getOperand(OpNum + 1).getImm();
+  }
+
+  return false;
+}
+
+void AVRAsmPrinter::EmitInstruction(const MachineInstr *MI) {
+  AVRMCInstLower MCInstLowering(OutContext, *this);
+
+  MCInst I;
+  MCInstLowering.lowerInstruction(*MI, I);
+  EmitToStreamer(*OutStreamer, I);
+}
+
+} // end of namespace llvm
+
+extern "C" void LLVMInitializeAVRAsmPrinter() {
+  llvm::RegisterAsmPrinter<llvm::AVRAsmPrinter> X(llvm::getTheAVRTarget());
+}
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRCallingConv.td b/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
index d8cb3fe84022..68dbce02706f 100644
--- a/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
+++ b/contrib/llvm/lib/Target/AVR/AVRCallingConv.td
@@ -23,7 +23,7 @@ def RetCC_AVR : CallingConv
 ]>;
 
 // Special return value calling convention for runtime functions.
-def RetCC_AVR_RT : CallingConv
+def RetCC_AVR_BUILTIN : CallingConv
 <[
   CCIfType<[i8], CCAssignToReg<[R24,R25]>>,
   CCIfType<[i16], CCAssignToReg<[R23R22, R25R24]>>
@@ -43,15 +43,8 @@ def ArgCC_AVR_Vararg : CallingConv
 ]>;
 
 // Special argument calling convention for
-// multiplication runtime functions.
-def ArgCC_AVR_RT_MUL : CallingConv
-<[
-  CCIfType<[i16], CCAssignToReg<[R27R26,R19R18]>>
-]>;
-
-// Special argument calling convention for
 // division runtime functions.
-def ArgCC_AVR_RT_DIV : CallingConv
+def ArgCC_AVR_BUILTIN_DIV : CallingConv
 <[
   CCIfType<[i8], CCAssignToReg<[R24,R22]>>,
   CCIfType<[i16], CCAssignToReg<[R25R24, R23R22]>>
diff --git a/contrib/llvm/lib/Target/AVR/AVRDevices.td b/contrib/llvm/lib/Target/AVR/AVRDevices.td
new file mode 100644
index 000000000000..9224af613d14
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRDevices.td
@@ -0,0 +1,491 @@
+//===---------------------------------------------------------------------===//
+// AVR Device Definitions
+//===---------------------------------------------------------------------===//
+
+// :TODO: Implement the skip errata, see `gcc/config/avr/avr-arch.h` for details
+// :TODO: We define all devices with SRAM to have all variants of LD/ST/LDD/STD.
+//        In reality, avr1 (no SRAM) has one variant each of `LD` and `ST`.
+//        avr2 (with SRAM) adds the rest of the variants.
+// :TODO: s/AVRTiny/Tiny
+
+
+// A feature set aggregates features, grouping them. We don't want to create a
+// new member in AVRSubtarget (to store a value) for each set because we do not
+// care if the set is supported, only the subfeatures inside the set. We fix
+// this by simply setting the same dummy member for all feature sets, which is
+// then ignored.
+class FeatureSet<string name, string desc, list<SubtargetFeature> i>
+  : SubtargetFeature<name, "m_FeatureSetDummy", "true", desc, i>;
+
+// A family of microcontrollers, defining a set of supported features.
+class Family<string name, list<SubtargetFeature> i>
+  : FeatureSet<name, !strconcat("The device is a part of the ",
+               name, " family"), i>;
+
+// The device has SRAM, and supports the bare minimum of
+// SRAM-relevant instructions.
+//
+// These are:
+// LD - all 9 variants
+// ST - all 9 variants
+// LDD - two variants for Y and Z
+// STD - two variants for Y and Z
+// `LDS Rd, K`
+// `STS k, Rr`
+// `PUSH`/`POP`
+def FeatureSRAM           : SubtargetFeature<"sram", "m_hasSRAM", "true",
+                                  "The device has random access memory">;
+
+// The device supports the `JMP k` and `CALL k` instructions.
+def FeatureJMPCALL        : SubtargetFeature<"jmpcall", "m_hasJMPCALL", "true",
+                                  "The device supports the `JMP` and "
+                                  "`CALL` instructions">;
+
+
+// The device supports the indirect branches `IJMP` and `ICALL`.
+def FeatureIJMPCALL       : SubtargetFeature<"ijmpcall", "m_hasIJMPCALL",
+                                  "true",
+                                  "The device supports `IJMP`/`ICALL`"
+                                  "instructions">;
+
+// The device supports the extended indirect branches `EIJMP` and `EICALL`.
+def FeatureEIJMPCALL      : SubtargetFeature<"eijmpcall", "m_hasEIJMPCALL",
+                                  "true", "The device supports the "
+                                  "`EIJMP`/`EICALL` instructions">;
+
+// The device supports `ADDI Rd, K`, `SUBI Rd, K`.
+def FeatureADDSUBIW       : SubtargetFeature<"addsubiw", "m_hasADDSUBIW",
+                                  "true", "Enable 16-bit register-immediate "
+                                  "addition and subtraction instructions">;
+
+// The device has an 8-bit stack pointer (SP) register.
+def FeatureSmallStack     : SubtargetFeature<"smallstack", "m_hasSmallStack",
+                                  "true", "The device has an 8-bit "
+                                  "stack pointer">;
+
+// The device supports the 16-bit GPR pair MOVW instruction.
+def FeatureMOVW           : SubtargetFeature<"movw", "m_hasMOVW", "true",
+                                  "The device supports the 16-bit MOVW "
+                                  "instruction">;
+
+// The device supports the `LPM` instruction, with implied destination being r0.
+def FeatureLPM            : SubtargetFeature<"lpm", "m_hasLPM", "true",
+                                  "The device supports the `LPM` instruction">;
+
+// The device supports the `LPM Rd, Z[+] instruction.
+def FeatureLPMX           : SubtargetFeature<"lpmx", "m_hasLPMX", "true",
+                                  "The device supports the `LPM Rd, Z[+]` "
+                                  "instruction">;
+
+// The device supports the `ELPM` instruction.
+def FeatureELPM           : SubtargetFeature<"elpm", "m_hasELPM", "true",
+                                  "The device supports the ELPM instruction">;
+
+// The device supports the `ELPM Rd, Z[+]` instructions.
+def FeatureELPMX          : SubtargetFeature<"elpmx", "m_hasELPMX", "true",
+                                  "The device supports the `ELPM Rd, Z[+]` "
+                                  "instructions">;
+
+// The device supports the `SPM` instruction.
+def FeatureSPM            : SubtargetFeature<"spm", "m_hasSPM", "true",
+                                  "The device supports the `SPM` instruction">;
+
+// The device supports the `SPM Z+` instruction.
+def FeatureSPMX           : SubtargetFeature<"spmx", "m_hasSPMX", "true",
+                                  "The device supports the `SPM Z+` "
+                                  "instruction">;
+
+// The device supports the `DES k` instruction.
+def FeatureDES            : SubtargetFeature<"des", "m_hasDES", "true",
+                                  "The device supports the `DES k` encryption "
+                                  "instruction">;
+
+// The device supports the Read-Write-Modify instructions
+// XCH, LAS, LAC, and LAT.
+def FeatureRMW            : SubtargetFeature<"rmw", "m_supportsRMW", "true",
+                                  "The device supports the read-write-modify "
+                                  "instructions: XCH, LAS, LAC, LAT">;
+
+// The device supports the `[F]MUL[S][U]` family of instructions.
+def FeatureMultiplication : SubtargetFeature<"mul", "m_supportsMultiplication",
+                                  "true", "The device supports the "
+                                  "multiplication instructions">;
+
+// The device supports the `BREAK` instruction.
+def FeatureBREAK          : SubtargetFeature<"break", "m_hasBREAK", "true",
+                                  "The device supports the `BREAK` debugging "
+                                  "instruction">;
+
+// The device has instruction encodings specific to the Tiny core.
+def FeatureTinyEncoding   : SubtargetFeature<"tinyencoding",
+                                  "m_hasTinyEncoding", "true",
+                                  "The device has Tiny core specific "
+                                  "instruction encodings">;
+
+class ELFArch<string name>  : SubtargetFeature<"", "ELFArch",
+                                    !strconcat("ELF::",name), "">;
+
+// ELF e_flags architecture values
+def ELFArchAVR1    : ELFArch<"EF_AVR_ARCH_AVR1">;
+def ELFArchAVR2    : ELFArch<"EF_AVR_ARCH_AVR2">;
+def ELFArchAVR25   : ELFArch<"EF_AVR_ARCH_AVR25">;
+def ELFArchAVR3    : ELFArch<"EF_AVR_ARCH_AVR3">;
+def ELFArchAVR31   : ELFArch<"EF_AVR_ARCH_AVR31">;
+def ELFArchAVR35   : ELFArch<"EF_AVR_ARCH_AVR35">;
+def ELFArchAVR4    : ELFArch<"EF_AVR_ARCH_AVR4">;
+def ELFArchAVR5    : ELFArch<"EF_AVR_ARCH_AVR5">;
+def ELFArchAVR51   : ELFArch<"EF_AVR_ARCH_AVR51">;
+def ELFArchAVR6    : ELFArch<"EF_AVR_ARCH_AVR6">;
+def ELFArchAVRTiny : ELFArch<"EF_AVR_ARCH_AVRTINY">;
+def ELFArchXMEGA1  : ELFArch<"EF_AVR_ARCH_XMEGA1">;
+def ELFArchXMEGA2  : ELFArch<"EF_AVR_ARCH_XMEGA2">;
+def ELFArchXMEGA3  : ELFArch<"EF_AVR_ARCH_XMEGA3">;
+def ELFArchXMEGA4  : ELFArch<"EF_AVR_ARCH_XMEGA4">;
+def ELFArchXMEGA5  : ELFArch<"EF_AVR_ARCH_XMEGA5">;
+def ELFArchXMEGA6  : ELFArch<"EF_AVR_ARCH_XMEGA6">;
+def ELFArchXMEGA7  : ELFArch<"EF_AVR_ARCH_XMEGA7">;
+
+//===---------------------------------------------------------------------===//
+// AVR Families
+//===---------------------------------------------------------------------===//
+
+// The device has at least the bare minimum that **every** single AVR
+// device should have.
+def FamilyAVR0           : Family<"avr0", []>;
+
+def FamilyAVR1           : Family<"avr1", [FamilyAVR0, FeatureLPM]>;
+
+def FamilyAVR2           : Family<"avr2",
+                                 [FamilyAVR1, FeatureIJMPCALL, FeatureADDSUBIW,
+                                  FeatureSRAM]>;
+
+def FamilyAVR25          : Family<"avr25",
+                                 [FamilyAVR2, FeatureMOVW, FeatureLPMX,
+                                  FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR3           : Family<"avr3",
+                                 [FamilyAVR2, FeatureJMPCALL]>;
+
+def FamilyAVR31          : Family<"avr31",
+                                 [FamilyAVR3, FeatureELPM]>;
+
+def FamilyAVR35          : Family<"avr35",
+                                 [FamilyAVR3, FeatureMOVW, FeatureLPMX,
+                                  FeatureSPM, FeatureBREAK]>;
+
+def FamilyAVR4           : Family<"avr4",
+                                 [FamilyAVR2, FeatureMultiplication,
+                                  FeatureMOVW, FeatureLPMX, FeatureSPM,
+                                  FeatureBREAK]>;
+
+def FamilyAVR5           : Family<"avr5",
+                                 [FamilyAVR3, FeatureMultiplication,
+                                  FeatureMOVW, FeatureLPMX, FeatureSPM,
+                                  FeatureBREAK]>;
+
+def FamilyAVR51          : Family<"avr51",
+                                 [FamilyAVR5, FeatureELPM, FeatureELPMX]>;
+
+def FamilyAVR6           : Family<"avr6",
+                                 [FamilyAVR51]>;
+
+def FamilyAVRTiny        : Family<"avrtiny",
+                                 [FamilyAVR0, FeatureBREAK, FeatureSRAM,
+                                  FeatureTinyEncoding]>;
+
+def FamilyXMEGA          : Family<"xmega",
+                                 [FamilyAVR51, FeatureEIJMPCALL, FeatureSPMX,
+                                  FeatureDES]>;
+
+def FamilyXMEGAU         : Family<"xmegau",
+                                  [FamilyXMEGA, FeatureRMW]>;
+
+def FeatureSetSpecial    : FeatureSet<"special",
+                                      "Enable use of the entire instruction "
+                                      "set - used for debugging",
+                                      [FeatureSRAM, FeatureJMPCALL,
+                                       FeatureIJMPCALL, FeatureEIJMPCALL,
+                                       FeatureADDSUBIW, FeatureMOVW,
+                                       FeatureLPM, FeatureLPMX, FeatureELPM,
+                                       FeatureELPMX, FeatureSPM, FeatureSPMX,
+                                       FeatureDES, FeatureRMW,
+                                       FeatureMultiplication, FeatureBREAK]>;
+
+//===---------------------------------------------------------------------===//
+// AVR microcontrollers supported.
+//===---------------------------------------------------------------------===//
+
+class Device<string Name, Family Fam, ELFArch Arch,
+             list<SubtargetFeature> ExtraFeatures = []>
+  : Processor<Name, NoItineraries, !listconcat([Fam,Arch],ExtraFeatures)>;
+
+// Generic MCUs
+// Note that several versions of GCC has strange ELF architecture
+// settings for backwards compatibility - see `gas/config/tc-avr.c`
+// in AVR binutils. We do not replicate this.
+def : Device<"avr1",      FamilyAVR1,    ELFArchAVR1>;
+def : Device<"avr2",      FamilyAVR2,    ELFArchAVR2>;
+def : Device<"avr25",     FamilyAVR25,   ELFArchAVR25>;
+def : Device<"avr3",      FamilyAVR3,    ELFArchAVR3>;
+def : Device<"avr31",     FamilyAVR31,   ELFArchAVR31>;
+def : Device<"avr35",     FamilyAVR35,   ELFArchAVR35>;
+def : Device<"avr4",      FamilyAVR4,    ELFArchAVR4>;
+def : Device<"avr5",      FamilyAVR5,    ELFArchAVR5>;
+def : Device<"avr51",     FamilyAVR51,   ELFArchAVR51>;
+def : Device<"avr6",      FamilyAVR6,    ELFArchAVR6>;
+def : Device<"avrxmega1", FamilyXMEGA,   ELFArchXMEGA1>;
+def : Device<"avrxmega2", FamilyXMEGA,   ELFArchXMEGA2>;
+def : Device<"avrxmega3", FamilyXMEGA,   ELFArchXMEGA3>;
+def : Device<"avrxmega4", FamilyXMEGA,   ELFArchXMEGA4>;
+def : Device<"avrxmega5", FamilyXMEGA,   ELFArchXMEGA5>;
+def : Device<"avrxmega6", FamilyXMEGA,   ELFArchXMEGA6>;
+def : Device<"avrxmega7", FamilyXMEGA,   ELFArchXMEGA7>;
+def : Device<"avrtiny",   FamilyAVRTiny, ELFArchAVRTiny>;
+
+// Specific MCUs
+def : Device<"at90s1200",          FamilyAVR0, ELFArchAVR1>;
+def : Device<"attiny11",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny12",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny15",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"attiny28",           FamilyAVR1, ELFArchAVR1>;
+def : Device<"at90s2313",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2323",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2333",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s2343",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny22",           FamilyAVR2, ELFArchAVR2>;
+def : Device<"attiny26",           FamilyAVR2, ELFArchAVR2, [FeatureLPMX]>;
+def : Device<"at86rf401",          FamilyAVR2, ELFArchAVR25,
+             [FeatureMOVW, FeatureLPMX]>;
+def : Device<"at90s4414",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4433",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s4434",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8515",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90c8534",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"at90s8535",          FamilyAVR2, ELFArchAVR2>;
+def : Device<"ata5272",            FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny13a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny2313a",        FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny24a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny4313",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny44a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny84a",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny25",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny45",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny85",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny261a",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny461a",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny861a",         FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny87",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny43u",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny48",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny88",           FamilyAVR25, ELFArchAVR25>;
+def : Device<"attiny828",          FamilyAVR25, ELFArchAVR25>;
+def : Device<"at43usb355",         FamilyAVR3,  ELFArchAVR3>;
+def : Device<"at76c711",           FamilyAVR3,  ELFArchAVR3>;
+def : Device<"atmega103",          FamilyAVR31, ELFArchAVR31>;
+def : Device<"at43usb320",         FamilyAVR31, ELFArchAVR31>;
+def : Device<"attiny167",          FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb82",          FamilyAVR35, ELFArchAVR35>;
+def : Device<"at90usb162",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"ata5505",            FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8u2",          FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega16u2",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega32u2",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"attiny1634",         FamilyAVR35, ELFArchAVR35>;
+def : Device<"atmega8",            FamilyAVR4,  ELFArchAVR4>; // FIXME: family may be wrong
+def : Device<"ata6289",            FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega8a",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"ata6285",            FamilyAVR4,  ELFArchAVR4>;
+def : Device<"ata6286",            FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48a",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48pa",         FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega48p",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88a",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88p",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega88pa",         FamilyAVR4,  ELFArchAVR4>;
+def : Device<"atmega8515",         FamilyAVR2,  ELFArchAVR4,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8535",         FamilyAVR2,  ELFArchAVR4,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega8hva",         FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm1",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm2",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm2b",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm3",           FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm3b",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"at90pwm81",          FamilyAVR4,  ELFArchAVR4>;
+def : Device<"ata5790",            FamilyAVR5,  ELFArchAVR5>;
+def : Device<"ata5795",            FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16",           FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16a",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega161",          FamilyAVR3,  ELFArchAVR5,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega162",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega163",          FamilyAVR3,  ELFArchAVR5,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX, FeatureSPM]>;
+def : Device<"atmega164a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega164p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega164pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega165pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega168pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega169pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32",           FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32a",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega323",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega324a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega324p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega324pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega325pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3250pa",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega328",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega328p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega329pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega3290pa",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega406",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64",           FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64a",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega640",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644pa",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega645",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega645a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega645p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega649",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega649a",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega649p",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6450",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6450a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6450p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6490",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6490a",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega6490p",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64rfr2",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega644rfr2",      FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hva",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hva2",       FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hvb",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16hvbrevb",    FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32hvb",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32hvbrevb",    FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64hve",        FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90can32",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90can64",          FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90pwm161",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90pwm216",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90pwm316",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32c1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64c1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16m1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32m1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega64m1",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega16u4",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32u4",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega32u6",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90usb646",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90usb647",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at90scr100",         FamilyAVR5,  ELFArchAVR5>;
+def : Device<"at94k",              FamilyAVR3,  ELFArchAVR5,
+             [FeatureMultiplication, FeatureMOVW, FeatureLPMX]>;
+def : Device<"m3000",              FamilyAVR5,  ELFArchAVR5>;
+def : Device<"atmega128",          FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128a",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1280",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1281",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284p",        FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfa1",      FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega128rfr2",      FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega1284rfr2",     FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90can128",         FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1286",        FamilyAVR51, ELFArchAVR51>;
+def : Device<"at90usb1287",        FamilyAVR51, ELFArchAVR51>;
+def : Device<"atmega2560",         FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atmega2561",         FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atmega256rfr2",      FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atmega2564rfr2",     FamilyAVR6,  ELFArchAVR6>;
+def : Device<"atxmega16a4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16a4u",       FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16c4",        FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega16d4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32a4u",       FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32c4",        FamilyXMEGAU, ELFArchXMEGA2>;
+def : Device<"atxmega32d4",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32e5",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega16e5",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega8e5",         FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega32x1",        FamilyXMEGA, ELFArchXMEGA2>;
+def : Device<"atxmega64a3",        FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a3u",       FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64a4u",       FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b1",        FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64b3",        FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64c3",        FamilyXMEGAU, ELFArchXMEGA4>;
+def : Device<"atxmega64d3",        FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64d4",        FamilyXMEGA, ELFArchXMEGA4>;
+def : Device<"atxmega64a1",        FamilyXMEGA, ELFArchXMEGA5>;
+def : Device<"atxmega64a1u",       FamilyXMEGAU, ELFArchXMEGA5>;
+def : Device<"atxmega128a3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b1",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128b3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega128d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128d4",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega192a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega192d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3u",      FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256a3b",      FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega256a3bu",     FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega256d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega384c3",       FamilyXMEGAU, ELFArchXMEGA6>;
+def : Device<"atxmega384d3",       FamilyXMEGA, ELFArchXMEGA6>;
+def : Device<"atxmega128a1",       FamilyXMEGA, ELFArchXMEGA7>;
+def : Device<"atxmega128a1u",      FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"atxmega128a4u",      FamilyXMEGAU, ELFArchXMEGA7>;
+def : Device<"attiny4",            FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny5",            FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny9",            FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny10",           FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny20",           FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny40",           FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny102",          FamilyAVRTiny, ELFArchAVRTiny>;
+def : Device<"attiny104",          FamilyAVRTiny, ELFArchAVRTiny>;
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
new file mode 100644
index 000000000000..1b2f2cec0bca
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp
@@ -0,0 +1,1515 @@
+//===-- AVRExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions. This pass should be run after register allocation but before
+// the post-regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "AVRInstrInfo.h"
+#include "AVRTargetMachine.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define AVR_EXPAND_PSEUDO_NAME "AVR pseudo instruction expansion pass"
+
+namespace {
+
+/// Expands "placeholder" instructions marked as pseudo into
+/// actual AVR instructions.
+class AVRExpandPseudo : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AVRExpandPseudo() : MachineFunctionPass(ID) {
+    initializeAVRExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return AVR_EXPAND_PSEUDO_NAME; }
+
+private:
+  typedef MachineBasicBlock Block;
+  typedef Block::iterator BlockIt;
+
+  const AVRRegisterInfo *TRI;
+  const TargetInstrInfo *TII;
+
+  /// The register to be used for temporary storage.
+  const unsigned SCRATCH_REGISTER = AVR::R0;
+  /// The IO address of the status register.
+  const unsigned SREG_ADDR = 0x3f;
+
+  bool expandMBB(Block &MBB);
+  bool expandMI(Block &MBB, BlockIt MBBI);
+  template <unsigned OP> bool expand(Block &MBB, BlockIt MBBI);
+
+  MachineInstrBuilder buildMI(Block &MBB, BlockIt MBBI, unsigned Opcode) {
+    return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode));
+  }
+
+  MachineInstrBuilder buildMI(Block &MBB, BlockIt MBBI, unsigned Opcode,
+                              unsigned DstReg) {
+    return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode), DstReg);
+  }
+
+  MachineRegisterInfo &getRegInfo(Block &MBB) { return MBB.getParent()->getRegInfo(); }
+
+  bool expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI);
+  bool expandLogic(unsigned Op, Block &MBB, BlockIt MBBI);
+  bool expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI);
+  bool isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const;
+
+  template<typename Func>
+  bool expandAtomic(Block &MBB, BlockIt MBBI, Func f);
+
+  template<typename Func>
+  bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI, Func f);
+
+  bool expandAtomicBinaryOp(unsigned Opcode, Block &MBB, BlockIt MBBI);
+
+  bool expandAtomicArithmeticOp(unsigned MemOpcode,
+                                unsigned ArithOpcode,
+                                Block &MBB,
+                                BlockIt MBBI);
+};
+
+char AVRExpandPseudo::ID = 0;
+
+bool AVRExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  BlockIt MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    BlockIt NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool AVRExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  bool Modified = false;
+
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  TRI = STI.getRegisterInfo();
+  TII = STI.getInstrInfo();
+
+  // We need to track liveness in order to use register scavenging.
+  MF.getProperties().set(MachineFunctionProperties::Property::TracksLiveness);
+
+  for (Block &MBB : MF) {
+    bool ContinueExpanding = true;
+    unsigned ExpandCount = 0;
+
+    // Continue expanding the block until all pseudos are expanded.
+    do {
+      assert(ExpandCount < 10 && "pseudo expand limit reached");
+
+      bool BlockModified = expandMBB(MBB);
+      Modified |= BlockModified;
+      ExpandCount++;
+
+      ContinueExpanding = BlockModified;
+    } while (ContinueExpanding);
+  }
+
+  return Modified;
+}
+
+bool AVRExpandPseudo::
+expandArith(unsigned OpLo, unsigned OpHi, Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool SrcIsKill = MI.getOperand(2).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(DstIsKill))
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(DstIsKill))
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  if (ImpIsDead)
+    MIBHI->getOperand(3).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBHI->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AVRExpandPseudo::
+expandLogic(unsigned Op, Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool SrcIsKill = MI.getOperand(2).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, Op)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(DstIsKill))
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  // SREG is always implicitly dead
+  MIBLO->getOperand(3).setIsDead();
+
+  auto MIBHI = buildMI(MBB, MBBI, Op)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(DstIsKill))
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  if (ImpIsDead)
+    MIBHI->getOperand(3).setIsDead();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AVRExpandPseudo::
+  isLogicImmOpRedundant(unsigned Op, unsigned ImmVal) const {
+
+  // ANDI Rd, 0xff is redundant.
+  if (Op == AVR::ANDIRdK && ImmVal == 0xff)
+    return true;
+
+  // ORI Rd, 0x0 is redundant.
+  if (Op == AVR::ORIRdK && ImmVal == 0x0)
+    return true;
+
+  return false;
+}
+
+bool AVRExpandPseudo::
+expandLogicImm(unsigned Op, Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  unsigned Imm = MI.getOperand(2).getImm();
+  unsigned Lo8 = Imm & 0xff;
+  unsigned Hi8 = (Imm >> 8) & 0xff;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  if (!isLogicImmOpRedundant(Op, Lo8)) {
+    auto MIBLO = buildMI(MBB, MBBI, Op)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstLoReg, getKillRegState(SrcIsKill))
+      .addImm(Lo8);
+
+    // SREG is always implicitly dead
+    MIBLO->getOperand(3).setIsDead();
+  }
+
+  if (!isLogicImmOpRedundant(Op, Hi8)) {
+    auto MIBHI = buildMI(MBB, MBBI, Op)
+      .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(DstHiReg, getKillRegState(SrcIsKill))
+      .addImm(Hi8);
+
+    if (ImpIsDead)
+      MIBHI->getOperand(3).setIsDead();
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ADDWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandArith(AVR::ADDRdRr, AVR::ADCRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ADCWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandArith(AVR::ADCRdRr, AVR::ADCRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::SUBWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandArith(AVR::SUBRdRr, AVR::SBCRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::SUBIWRdK>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, AVR::SUBIRdK)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(SrcIsKill));
+
+  auto MIBHI = buildMI(MBB, MBBI, AVR::SBCIRdK)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(SrcIsKill));
+
+  switch (MI.getOperand(2).getType()) {
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MI.getOperand(2).getGlobal();
+    int64_t Offs = MI.getOperand(2).getOffset();
+    unsigned TF = MI.getOperand(2).getTargetFlags();
+    MIBLO.addGlobalAddress(GV, Offs, TF | AVRII::MO_NEG | AVRII::MO_LO);
+    MIBHI.addGlobalAddress(GV, Offs, TF | AVRII::MO_NEG | AVRII::MO_HI);
+    break;
+  }
+  case MachineOperand::MO_Immediate: {
+    unsigned Imm = MI.getOperand(2).getImm();
+    MIBLO.addImm(Imm & 0xff);
+    MIBHI.addImm((Imm >> 8) & 0xff);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown operand type!");
+  }
+
+  if (ImpIsDead)
+    MIBHI->getOperand(3).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBHI->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::SBCWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandArith(AVR::SBCRdRr, AVR::SBCRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::SBCIWRdK>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(3).isDead();
+  unsigned Imm = MI.getOperand(2).getImm();
+  unsigned Lo8 = Imm & 0xff;
+  unsigned Hi8 = (Imm >> 8) & 0xff;
+  OpLo = AVR::SBCIRdK;
+  OpHi = AVR::SBCIRdK;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(SrcIsKill))
+    .addImm(Lo8);
+
+  // SREG is always implicitly killed
+  MIBLO->getOperand(4).setIsKill();
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(SrcIsKill))
+    .addImm(Hi8);
+
+  if (ImpIsDead)
+    MIBHI->getOperand(3).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBHI->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ANDWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandLogic(AVR::ANDRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ANDIWRdK>(Block &MBB, BlockIt MBBI) {
+  return expandLogicImm(AVR::ANDIRdK, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ORWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandLogic(AVR::ORRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ORIWRdK>(Block &MBB, BlockIt MBBI) {
+  return expandLogicImm(AVR::ORIRdK, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::EORWRdRr>(Block &MBB, BlockIt MBBI) {
+  return expandLogic(AVR::EORRdRr, MBB, MBBI);
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::COMWRd>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  OpLo = AVR::COMRd;
+  OpHi = AVR::COMRd;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+  // SREG is always implicitly dead
+  MIBLO->getOperand(2).setIsDead();
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+    MIBHI->getOperand(2).setIsDead();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::CPWRdRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsKill = MI.getOperand(0).isKill();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  OpLo = AVR::CPRdRr;
+  OpHi = AVR::CPCRdRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // Low part
+  buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, getKillRegState(DstIsKill))
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, getKillRegState(DstIsKill))
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  if (ImpIsDead)
+    MIBHI->getOperand(2).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBHI->getOperand(3).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::CPCWRdRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsKill = MI.getOperand(0).isKill();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  OpLo = AVR::CPCRdRr;
+  OpHi = AVR::CPCRdRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, getKillRegState(DstIsKill))
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  // SREG is always implicitly killed
+  MIBLO->getOperand(3).setIsKill();
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, getKillRegState(DstIsKill))
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  if (ImpIsDead)
+    MIBHI->getOperand(2).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBHI->getOperand(3).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LDIWRdK>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  OpLo = AVR::LDIRdK;
+  OpHi = AVR::LDIRdK;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
+
+  switch (MI.getOperand(1).getType()) {
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MI.getOperand(1).getGlobal();
+    int64_t Offs = MI.getOperand(1).getOffset();
+    unsigned TF = MI.getOperand(1).getTargetFlags();
+
+    MIBLO.addGlobalAddress(GV, Offs, TF | AVRII::MO_LO);
+    MIBHI.addGlobalAddress(GV, Offs, TF | AVRII::MO_HI);
+    break;
+  }
+  case MachineOperand::MO_BlockAddress: {
+    const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
+    unsigned TF = MI.getOperand(1).getTargetFlags();
+
+    MIBLO.addOperand(MachineOperand::CreateBA(BA, TF | AVRII::MO_LO));
+    MIBHI.addOperand(MachineOperand::CreateBA(BA, TF | AVRII::MO_HI));
+    break;
+  }
+  case MachineOperand::MO_Immediate: {
+    unsigned Imm = MI.getOperand(1).getImm();
+
+    MIBLO.addImm(Imm & 0xff);
+    MIBHI.addImm((Imm >> 8) & 0xff);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown operand type!");
+  }
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LDSWRdK>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  OpLo = AVR::LDSRdK;
+  OpHi = AVR::LDSRdK;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead));
+
+  switch (MI.getOperand(1).getType()) {
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MI.getOperand(1).getGlobal();
+    int64_t Offs = MI.getOperand(1).getOffset();
+    unsigned TF = MI.getOperand(1).getTargetFlags();
+
+    MIBLO.addGlobalAddress(GV, Offs, TF);
+    MIBHI.addGlobalAddress(GV, Offs + 1, TF);
+    break;
+  }
+  case MachineOperand::MO_Immediate: {
+    unsigned Imm = MI.getOperand(1).getImm();
+
+    MIBLO.addImm(Imm);
+    MIBHI.addImm(Imm + 1);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown operand type!");
+  }
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LDWRdPtr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  OpLo = AVR::LDRdPtr;
+  OpHi = AVR::LDDRdPtrQ;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(SrcReg);
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(SrcReg, getKillRegState(SrcIsKill))
+    .addImm(1);
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LDWRdPtrPi>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsDead = MI.getOperand(1).isKill();
+  OpLo = AVR::LDRdPtrPi;
+  OpHi = AVR::LDRdPtrPi;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(SrcReg, RegState::Define)
+    .addReg(SrcReg, RegState::Kill);
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
+    .addReg(SrcReg, RegState::Kill);
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LDWRdPtrPd>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsDead = MI.getOperand(1).isKill();
+  OpLo = AVR::LDRdPtrPd;
+  OpHi = AVR::LDRdPtrPd;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(SrcReg, RegState::Define)
+    .addReg(SrcReg, RegState::Kill);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(SrcReg, RegState::Define | getDeadRegState(SrcIsDead))
+    .addReg(SrcReg, RegState::Kill);
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LDDWRdPtrQ>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  unsigned Imm = MI.getOperand(2).getImm();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  OpLo = AVR::LDDRdPtrQ;
+  OpHi = AVR::LDDRdPtrQ;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  assert(Imm <= 63 && "Offset is out of range");
+
+  MachineInstr *MIBLO, *MIBHI;
+
+  // HACK: We shouldn't have instances of this instruction
+  // where src==dest because the instruction itself is
+  // marked earlyclobber. We do however get this instruction when
+  // loading from stack slots where the earlyclobber isn't useful.
+  //
+  // In this case, just use a temporary register.
+  if (DstReg == SrcReg) {
+    RegScavenger RS;
+
+    RS.enterBasicBlock(MBB);
+    RS.forward(MBBI);
+
+    BitVector Candidates =
+        TRI->getAllocatableSet
+        (*MBB.getParent(), &AVR::GPR8RegClass);
+
+    // Exclude all the registers being used by the instruction.
+    for (MachineOperand &MO : MI.operands()) {
+      if (MO.isReg() && MO.getReg() != 0 && !MO.isDef() &&
+          !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+        Candidates.reset(MO.getReg());
+    }
+
+    BitVector Available = RS.getRegsAvailable(&AVR::GPR8RegClass);
+    Available &= Candidates;
+
+    signed TmpReg = Available.find_first();
+    assert(TmpReg != -1 && "ran out of registers");
+
+    MIBLO = buildMI(MBB, MBBI, OpLo)
+      .addReg(TmpReg, RegState::Define)
+      .addReg(SrcReg)
+      .addImm(Imm);
+
+    buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstLoReg).addReg(TmpReg);
+
+    MIBHI = buildMI(MBB, MBBI, OpHi)
+      .addReg(TmpReg, RegState::Define)
+      .addReg(SrcReg, getKillRegState(SrcIsKill))
+      .addImm(Imm + 1);
+
+    buildMI(MBB, MBBI, AVR::MOVRdRr).addReg(DstHiReg).addReg(TmpReg);
+  } else {
+    MIBLO = buildMI(MBB, MBBI, OpLo)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(SrcReg)
+      .addImm(Imm);
+
+    MIBHI = buildMI(MBB, MBBI, OpHi)
+      .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(SrcReg, getKillRegState(SrcIsKill))
+      .addImm(Imm + 1);
+  }
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LPMWRdZ>(Block &MBB, BlockIt MBBI) {
+  llvm_unreachable("wide LPM is unimplemented");
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LPMWRdZPi>(Block &MBB, BlockIt MBBI) {
+  llvm_unreachable("wide LPMPi is unimplemented");
+}
+
+template<typename Func>
+bool AVRExpandPseudo::expandAtomic(Block &MBB, BlockIt MBBI, Func f) {
+  // Remove the pseudo instruction.
+  MachineInstr &MI = *MBBI;
+
+  // Store the SREG.
+  buildMI(MBB, MBBI, AVR::INRdA)
+    .addReg(SCRATCH_REGISTER, RegState::Define)
+    .addImm(SREG_ADDR);
+
+  // Disable exceptions.
+  buildMI(MBB, MBBI, AVR::BCLRs).addImm(7); // CLI
+
+  f(MI);
+
+  // Restore the status reg.
+  buildMI(MBB, MBBI, AVR::OUTARr)
+    .addImm(SREG_ADDR)
+    .addReg(SCRATCH_REGISTER);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template<typename Func>
+bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
+                                           Block &MBB,
+                                           BlockIt MBBI,
+                                           Func f) {
+  return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
+      auto Op1 = MI.getOperand(0);
+      auto Op2 = MI.getOperand(1);
+
+      MachineInstr &NewInst = *buildMI(MBB, MBBI, Opcode)
+        .addOperand(Op1).addOperand(Op2)
+        .getInstr();
+      f(NewInst);
+  });
+}
+
+bool AVRExpandPseudo::expandAtomicBinaryOp(unsigned Opcode,
+                                           Block &MBB,
+                                           BlockIt MBBI) {
+  return expandAtomicBinaryOp(Opcode, MBB, MBBI, [](MachineInstr &MI) {});
+}
+
+bool AVRExpandPseudo::expandAtomicArithmeticOp(unsigned Width,
+                                               unsigned ArithOpcode,
+                                               Block &MBB,
+                                               BlockIt MBBI) {
+  return expandAtomic(MBB, MBBI, [&](MachineInstr &MI) {
+      auto Op1 = MI.getOperand(0);
+      auto Op2 = MI.getOperand(1);
+
+      unsigned LoadOpcode = (Width == 8) ? AVR::LDRdPtr : AVR::LDWRdPtr;
+      unsigned StoreOpcode = (Width == 8) ? AVR::STPtrRr : AVR::STWPtrRr;
+
+      // Create the load
+      buildMI(MBB, MBBI, LoadOpcode).addOperand(Op1).addOperand(Op2);
+
+      // Create the arithmetic op
+      buildMI(MBB, MBBI, ArithOpcode)
+        .addOperand(Op1).addOperand(Op1)
+        .addOperand(Op2);
+
+      // Create the store
+      buildMI(MBB, MBBI, StoreOpcode).addOperand(Op2).addOperand(Op1);
+  });
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoad8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicBinaryOp(AVR::LDRdPtr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoad16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicBinaryOp(AVR::LDWRdPtr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicStore8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicBinaryOp(AVR::STPtrRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicStore16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicBinaryOp(AVR::STWPtrRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(8, AVR::ADDRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadAdd16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(16, AVR::ADDWRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadSub8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(8, AVR::SUBRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadSub16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(16, AVR::SUBWRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(8, AVR::ANDRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadAnd16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(16, AVR::ANDWRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadOr8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(8, AVR::ORRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadOr16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(16, AVR::ORWRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadXor8>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(8, AVR::EORRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicLoadXor16>(Block &MBB, BlockIt MBBI) {
+  return expandAtomicArithmeticOp(16, AVR::EORWRdRr, MBB, MBBI);
+}
+
+template<>
+bool AVRExpandPseudo::expand<AVR::AtomicFence>(Block &MBB, BlockIt MBBI) {
+  // On AVR, there is only one core and so atomic fences do nothing.
+  MBBI->eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::STSWKRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  OpLo = AVR::STSKRr;
+  OpHi = AVR::STSKRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  // Write the high byte first in case this address belongs to a special
+  // I/O address with a special temporary register.
+  auto MIBHI = buildMI(MBB, MBBI, OpHi);
+  auto MIBLO = buildMI(MBB, MBBI, OpLo);
+
+  switch (MI.getOperand(0).getType()) {
+  case MachineOperand::MO_GlobalAddress: {
+    const GlobalValue *GV = MI.getOperand(0).getGlobal();
+    int64_t Offs = MI.getOperand(0).getOffset();
+    unsigned TF = MI.getOperand(0).getTargetFlags();
+
+    MIBLO.addGlobalAddress(GV, Offs, TF);
+    MIBHI.addGlobalAddress(GV, Offs + 1, TF);
+    break;
+  }
+  case MachineOperand::MO_Immediate: {
+    unsigned Imm = MI.getOperand(0).getImm();
+
+    MIBLO.addImm(Imm);
+    MIBHI.addImm(Imm + 1);
+    break;
+  }
+  default:
+    llvm_unreachable("Unknown operand type!");
+  }
+
+  MIBLO.addReg(SrcLoReg, getKillRegState(SrcIsKill));
+  MIBHI.addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::STWPtrRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsKill = MI.getOperand(0).isKill();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  OpLo = AVR::STPtrRr;
+  OpHi = AVR::STDPtrQRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  //:TODO: need to reverse this order like inw and stsw?
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstReg)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstReg, getKillRegState(DstIsKill))
+    .addImm(1)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::STWPtrPiRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  unsigned Imm = MI.getOperand(3).getImm();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(2).isKill();
+  OpLo = AVR::STPtrPiRr;
+  OpHi = AVR::STPtrPiRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstReg, RegState::Define)
+    .addReg(DstReg, RegState::Kill)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+    .addImm(Imm);
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstReg, RegState::Kill)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+    .addImm(Imm);
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::STWPtrPdRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  unsigned Imm = MI.getOperand(3).getImm();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(2).isKill();
+  OpLo = AVR::STPtrPdRr;
+  OpHi = AVR::STPtrPdRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  assert(DstReg != SrcReg && "SrcReg and DstReg cannot be the same");
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstReg, RegState::Define)
+    .addReg(DstReg, RegState::Kill)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+    .addImm(Imm);
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstReg, RegState::Kill)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+    .addImm(Imm);
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  unsigned Imm = MI.getOperand(1).getImm();
+  bool DstIsKill = MI.getOperand(0).isKill();
+  bool SrcIsKill = MI.getOperand(2).isKill();
+  OpLo = AVR::STDPtrQRr;
+  OpHi = AVR::STDPtrQRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  assert(Imm <= 63 && "Offset is out of range");
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstReg)
+    .addImm(Imm)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstReg, getKillRegState(DstIsKill))
+    .addImm(Imm + 1)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::INWRdA>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned Imm = MI.getOperand(1).getImm();
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  OpLo = AVR::INRdA;
+  OpHi = AVR::INRdA;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  assert(Imm <= 63 && "Address is out of range");
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addImm(Imm);
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addImm(Imm + 1);
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::OUTWARr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned Imm = MI.getOperand(0).getImm();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  OpLo = AVR::OUTARr;
+  OpHi = AVR::OUTARr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  assert(Imm <= 63 && "Address is out of range");
+
+  // 16 bit I/O writes need the high byte first
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addImm(Imm + 1)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill));
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addImm(Imm)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill));
+
+  MIBLO->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+  MIBHI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::PUSHWRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, SrcLoReg, SrcHiReg;
+  unsigned SrcReg = MI.getOperand(0).getReg();
+  bool SrcIsKill = MI.getOperand(0).isKill();
+  unsigned Flags = MI.getFlags();
+  OpLo = AVR::PUSHRr;
+  OpHi = AVR::PUSHRr;
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  // Low part
+  buildMI(MBB, MBBI, OpLo)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+    .setMIFlags(Flags);
+
+  // High part
+  buildMI(MBB, MBBI, OpHi)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+    .setMIFlags(Flags);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::POPWRd>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned Flags = MI.getFlags();
+  OpLo = AVR::POPRd;
+  OpHi = AVR::POPRd;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  buildMI(MBB, MBBI, OpHi, DstHiReg).setMIFlags(Flags); // High
+  buildMI(MBB, MBBI, OpLo, DstLoReg).setMIFlags(Flags); // Low
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LSLWRd>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  OpLo = AVR::LSLRd;
+  OpHi = AVR::ROLRd;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // Low part
+  buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+  auto MIBHI = buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+    MIBHI->getOperand(2).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBHI->getOperand(3).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::LSRWRd>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  OpLo = AVR::RORRd;
+  OpHi = AVR::LSRRd;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // High part
+  buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+    MIBLO->getOperand(2).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBLO->getOperand(3).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::RORWRd>(Block &MBB, BlockIt MBBI) {
+  llvm_unreachable("RORW unimplemented");
+  return false;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ROLWRd>(Block &MBB, BlockIt MBBI) {
+  llvm_unreachable("ROLW unimplemented");
+  return false;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::ASRWRd>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool DstIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  OpLo = AVR::RORRd;
+  OpHi = AVR::ASRRd;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // High part
+  buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, getKillRegState(DstIsKill));
+
+  auto MIBLO = buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstLoReg, getKillRegState(DstIsKill));
+
+  if (ImpIsDead)
+    MIBLO->getOperand(2).setIsDead();
+
+  // SREG is always implicitly killed
+  MIBLO->getOperand(3).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <> bool AVRExpandPseudo::expand<AVR::SEXT>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned DstLoReg, DstHiReg;
+  // sext R17:R16, R17
+  // mov     r16, r17
+  // lsl     r17
+  // sbc     r17, r17
+  // sext R17:R16, R13
+  // mov     r16, r13
+  // mov     r17, r13
+  // lsl     r17
+  // sbc     r17, r17
+  // sext R17:R16, R16
+  // mov     r17, r16
+  // lsl     r17
+  // sbc     r17, r17
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  if (SrcReg != DstLoReg) {
+    auto MOV = buildMI(MBB, MBBI, AVR::MOVRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(SrcReg);
+
+    if (SrcReg == DstHiReg) {
+      MOV->getOperand(1).setIsKill();
+    }
+  }
+
+  if (SrcReg != DstHiReg) {
+    buildMI(MBB, MBBI, AVR::MOVRdRr)
+      .addReg(DstHiReg, RegState::Define)
+      .addReg(SrcReg, getKillRegState(SrcIsKill));
+  }
+
+  buildMI(MBB, MBBI, AVR::LSLRd)
+    .addReg(DstHiReg, RegState::Define)
+    .addReg(DstHiReg, RegState::Kill);
+
+  auto SBC = buildMI(MBB, MBBI, AVR::SBCRdRr)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, RegState::Kill)
+    .addReg(DstHiReg, RegState::Kill);
+
+  if (ImpIsDead)
+    SBC->getOperand(3).setIsDead();
+
+  // SREG is always implicitly killed
+  SBC->getOperand(4).setIsKill();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <> bool AVRExpandPseudo::expand<AVR::ZEXT>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned DstLoReg, DstHiReg;
+  // zext R25:R24, R20
+  // mov      R24, R20
+  // eor      R25, R25
+  // zext R25:R24, R24
+  // eor      R25, R25
+  // zext R25:R24, R25
+  // mov      R24, R25
+  // eor      R25, R25
+  unsigned DstReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  bool ImpIsDead = MI.getOperand(2).isDead();
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  if (SrcReg != DstLoReg) {
+    buildMI(MBB, MBBI, AVR::MOVRdRr)
+      .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+      .addReg(SrcReg, getKillRegState(SrcIsKill));
+  }
+
+  auto EOR = buildMI(MBB, MBBI, AVR::EORRdRr)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addReg(DstHiReg, RegState::Kill)
+    .addReg(DstHiReg, RegState::Kill);
+
+  if (ImpIsDead)
+    EOR->getOperand(3).setIsDead();
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::SPREAD>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned OpLo, OpHi, DstLoReg, DstHiReg;
+  unsigned DstReg = MI.getOperand(0).getReg();
+  bool DstIsDead = MI.getOperand(0).isDead();
+  unsigned Flags = MI.getFlags();
+  OpLo = AVR::INRdA;
+  OpHi = AVR::INRdA;
+  TRI->splitReg(DstReg, DstLoReg, DstHiReg);
+
+  // Low part
+  buildMI(MBB, MBBI, OpLo)
+    .addReg(DstLoReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addImm(0x3d)
+    .setMIFlags(Flags);
+
+  // High part
+  buildMI(MBB, MBBI, OpHi)
+    .addReg(DstHiReg, RegState::Define | getDeadRegState(DstIsDead))
+    .addImm(0x3e)
+    .setMIFlags(Flags);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+template <>
+bool AVRExpandPseudo::expand<AVR::SPWRITE>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  unsigned SrcLoReg, SrcHiReg;
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  bool SrcIsKill = MI.getOperand(1).isKill();
+  unsigned Flags = MI.getFlags();
+  TRI->splitReg(SrcReg, SrcLoReg, SrcHiReg);
+
+  buildMI(MBB, MBBI, AVR::INRdA)
+    .addReg(AVR::R0, RegState::Define)
+    .addImm(SREG_ADDR)
+    .setMIFlags(Flags);
+
+  buildMI(MBB, MBBI, AVR::BCLRs).addImm(0x07).setMIFlags(Flags);
+
+  buildMI(MBB, MBBI, AVR::OUTARr)
+    .addImm(0x3e)
+    .addReg(SrcHiReg, getKillRegState(SrcIsKill))
+    .setMIFlags(Flags);
+
+  buildMI(MBB, MBBI, AVR::OUTARr)
+    .addImm(SREG_ADDR)
+    .addReg(AVR::R0, RegState::Kill)
+    .setMIFlags(Flags);
+
+  buildMI(MBB, MBBI, AVR::OUTARr)
+    .addImm(0x3d)
+    .addReg(SrcLoReg, getKillRegState(SrcIsKill))
+    .setMIFlags(Flags);
+
+  MI.eraseFromParent();
+  return true;
+}
+
+bool AVRExpandPseudo::expandMI(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  int Opcode = MBBI->getOpcode();
+
+#define EXPAND(Op)               \
+  case Op:                       \
+    return expand<Op>(MBB, MI)
+
+  switch (Opcode) {
+    EXPAND(AVR::ADDWRdRr);
+    EXPAND(AVR::ADCWRdRr);
+    EXPAND(AVR::SUBWRdRr);
+    EXPAND(AVR::SUBIWRdK);
+    EXPAND(AVR::SBCWRdRr);
+    EXPAND(AVR::SBCIWRdK);
+    EXPAND(AVR::ANDWRdRr);
+    EXPAND(AVR::ANDIWRdK);
+    EXPAND(AVR::ORWRdRr);
+    EXPAND(AVR::ORIWRdK);
+    EXPAND(AVR::EORWRdRr);
+    EXPAND(AVR::COMWRd);
+    EXPAND(AVR::CPWRdRr);
+    EXPAND(AVR::CPCWRdRr);
+    EXPAND(AVR::LDIWRdK);
+    EXPAND(AVR::LDSWRdK);
+    EXPAND(AVR::LDWRdPtr);
+    EXPAND(AVR::LDWRdPtrPi);
+    EXPAND(AVR::LDWRdPtrPd);
+  case AVR::LDDWRdYQ: //:FIXME: remove this once PR13375 gets fixed
+    EXPAND(AVR::LDDWRdPtrQ);
+    EXPAND(AVR::LPMWRdZ);
+    EXPAND(AVR::LPMWRdZPi);
+    EXPAND(AVR::AtomicLoad8);
+    EXPAND(AVR::AtomicLoad16);
+    EXPAND(AVR::AtomicStore8);
+    EXPAND(AVR::AtomicStore16);
+    EXPAND(AVR::AtomicLoadAdd8);
+    EXPAND(AVR::AtomicLoadAdd16);
+    EXPAND(AVR::AtomicLoadSub8);
+    EXPAND(AVR::AtomicLoadSub16);
+    EXPAND(AVR::AtomicLoadAnd8);
+    EXPAND(AVR::AtomicLoadAnd16);
+    EXPAND(AVR::AtomicLoadOr8);
+    EXPAND(AVR::AtomicLoadOr16);
+    EXPAND(AVR::AtomicLoadXor8);
+    EXPAND(AVR::AtomicLoadXor16);
+    EXPAND(AVR::AtomicFence);
+    EXPAND(AVR::STSWKRr);
+    EXPAND(AVR::STWPtrRr);
+    EXPAND(AVR::STWPtrPiRr);
+    EXPAND(AVR::STWPtrPdRr);
+    EXPAND(AVR::STDWPtrQRr);
+    EXPAND(AVR::INWRdA);
+    EXPAND(AVR::OUTWARr);
+    EXPAND(AVR::PUSHWRr);
+    EXPAND(AVR::POPWRd);
+    EXPAND(AVR::LSLWRd);
+    EXPAND(AVR::LSRWRd);
+    EXPAND(AVR::RORWRd);
+    EXPAND(AVR::ROLWRd);
+    EXPAND(AVR::ASRWRd);
+    EXPAND(AVR::SEXT);
+    EXPAND(AVR::ZEXT);
+    EXPAND(AVR::SPREAD);
+    EXPAND(AVR::SPWRITE);
+  }
+#undef EXPAND
+  return false;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(AVRExpandPseudo, "avr-expand-pseudo",
+                AVR_EXPAND_PSEUDO_NAME, false, false)
+namespace llvm {
+
+FunctionPass *createAVRExpandPseudoPass() { return new AVRExpandPseudo(); }
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp
new file mode 100644
index 000000000000..b8cb2215ddb4
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -0,0 +1,538 @@
+//===-- AVRFrameLowering.cpp - AVR Frame Information ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the AVR implementation of TargetFrameLowering class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRFrameLowering.h"
+
+#include "AVR.h"
+#include "AVRInstrInfo.h"
+#include "AVRMachineFunctionInfo.h"
+#include "AVRTargetMachine.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Function.h"
+
+#include <vector>
+
+namespace llvm {
+
+AVRFrameLowering::AVRFrameLowering()
+    : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 1, -2) {}
+
+bool AVRFrameLowering::canSimplifyCallFramePseudos(
+    const MachineFunction &MF) const {
+  // Always simplify call frame pseudo instructions, even when
+  // hasReservedCallFrame is false.
+  return true;
+}
+
+bool AVRFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
+  // Reserve call frame memory in function prologue under the following
+  // conditions:
+  // - Y pointer is reserved to be the frame pointer.
+  // - The function does not contain variable sized objects.
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  return hasFP(MF) && !MFI.hasVarSizedObjects();
+}
+
+void AVRFrameLowering::emitPrologue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  MachineBasicBlock::iterator MBBI = MBB.begin();
+  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  DebugLoc DL = (MBBI != MBB.end()) ? MBBI->getDebugLoc() : DebugLoc();
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  const AVRInstrInfo &TII = *STI.getInstrInfo();
+
+  // Interrupt handlers re-enable interrupts in function entry.
+  if (CallConv == CallingConv::AVR_INTR) {
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::BSETs))
+        .addImm(0x07)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  // Emit special prologue code to save R1, R0 and SREG in interrupt/signal
+  // handlers before saving any other registers.
+  if (CallConv == CallingConv::AVR_INTR ||
+      CallConv == CallingConv::AVR_SIGNAL) {
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHWRr))
+        .addReg(AVR::R1R0, RegState::Kill)
+        .setMIFlag(MachineInstr::FrameSetup);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::INRdA), AVR::R0)
+        .addImm(0x3f)
+        .setMIFlag(MachineInstr::FrameSetup);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::PUSHRr))
+        .addReg(AVR::R0, RegState::Kill)
+        .setMIFlag(MachineInstr::FrameSetup);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::EORRdRr))
+        .addReg(AVR::R0, RegState::Define)
+        .addReg(AVR::R0, RegState::Kill)
+        .addReg(AVR::R0, RegState::Kill)
+        .setMIFlag(MachineInstr::FrameSetup);
+  }
+
+  // Early exit if the frame pointer is not needed in this function.
+  if (!hasFP(MF)) {
+    return;
+  }
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+  unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize();
+
+  // Skip the callee-saved push instructions.
+  while (
+      (MBBI != MBB.end()) && MBBI->getFlag(MachineInstr::FrameSetup) &&
+      (MBBI->getOpcode() == AVR::PUSHRr || MBBI->getOpcode() == AVR::PUSHWRr)) {
+    ++MBBI;
+  }
+
+  // Update Y with the new base value.
+  BuildMI(MBB, MBBI, DL, TII.get(AVR::SPREAD), AVR::R29R28)
+      .addReg(AVR::SP)
+      .setMIFlag(MachineInstr::FrameSetup);
+
+  // Mark the FramePtr as live-in in every block except the entry.
+  for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end();
+       I != E; ++I) {
+    I->addLiveIn(AVR::R29R28);
+  }
+
+  if (!FrameSize) {
+    return;
+  }
+
+  // Reserve the necessary frame memory by doing FP -= <size>.
+  unsigned Opcode = (isUInt<6>(FrameSize)) ? AVR::SBIWRdK : AVR::SUBIWRdK;
+
+  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
+                         .addReg(AVR::R29R28, RegState::Kill)
+                         .addImm(FrameSize)
+                         .setMIFlag(MachineInstr::FrameSetup);
+  // The SREG implicit def is dead.
+  MI->getOperand(3).setIsDead();
+
+  // Write back R29R28 to SP and temporarily disable interrupts.
+  BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP)
+      .addReg(AVR::R29R28)
+      .setMIFlag(MachineInstr::FrameSetup);
+}
+
+void AVRFrameLowering::emitEpilogue(MachineFunction &MF,
+                                    MachineBasicBlock &MBB) const {
+  CallingConv::ID CallConv = MF.getFunction()->getCallingConv();
+  bool isHandler = (CallConv == CallingConv::AVR_INTR ||
+                    CallConv == CallingConv::AVR_SIGNAL);
+
+  // Early exit if the frame pointer is not needed in this function except for
+  // signal/interrupt handlers where special code generation is required.
+  if (!hasFP(MF) && !isHandler) {
+    return;
+  }
+
+  MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
+  assert(MBBI->getDesc().isReturn() &&
+         "Can only insert epilog into returning blocks");
+
+  DebugLoc DL = MBBI->getDebugLoc();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+  unsigned FrameSize = MFI.getStackSize() - AFI->getCalleeSavedFrameSize();
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  const AVRInstrInfo &TII = *STI.getInstrInfo();
+
+  // Emit special epilogue code to restore R1, R0 and SREG in interrupt/signal
+  // handlers at the very end of the function, just before reti.
+  if (isHandler) {
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPRd), AVR::R0);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::OUTARr))
+        .addImm(0x3f)
+        .addReg(AVR::R0, RegState::Kill);
+    BuildMI(MBB, MBBI, DL, TII.get(AVR::POPWRd), AVR::R1R0);
+  }
+
+  // Early exit if there is no need to restore the frame pointer.
+  if (!FrameSize) {
+    return;
+  }
+
+  // Skip the callee-saved pop instructions.
+  while (MBBI != MBB.begin()) {
+    MachineBasicBlock::iterator PI = std::prev(MBBI);
+    int Opc = PI->getOpcode();
+
+    if (Opc != AVR::POPRd && Opc != AVR::POPWRd && !PI->isTerminator()) {
+      break;
+    }
+
+    --MBBI;
+  }
+
+  unsigned Opcode;
+
+  // Select the optimal opcode depending on how big it is.
+  if (isUInt<6>(FrameSize)) {
+    Opcode = AVR::ADIWRdK;
+  } else {
+    Opcode = AVR::SUBIWRdK;
+    FrameSize = -FrameSize;
+  }
+
+  // Restore the frame pointer by doing FP += <size>.
+  MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opcode), AVR::R29R28)
+                         .addReg(AVR::R29R28, RegState::Kill)
+                         .addImm(FrameSize);
+  // The SREG implicit def is dead.
+  MI->getOperand(3).setIsDead();
+
+  // Write back R29R28 to SP and temporarily disable interrupts.
+  BuildMI(MBB, MBBI, DL, TII.get(AVR::SPWRITE), AVR::SP)
+      .addReg(AVR::R29R28, RegState::Kill);
+}
+
+// Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function meets any of the following
+// conditions:
+//  - a register has been spilled
+//  - has allocas
+//  - input arguments are passed using the stack
+//
+// Notice that strictly this is not a frame pointer because it contains SP after
+// frame allocation instead of having the original SP in function entry.
+bool AVRFrameLowering::hasFP(const MachineFunction &MF) const {
+  const AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
+
+  return (FuncInfo->getHasSpills() || FuncInfo->getHasAllocas() ||
+          FuncInfo->getHasStackArgs());
+}
+
+bool AVRFrameLowering::spillCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    const std::vector<CalleeSavedInfo> &CSI,
+    const TargetRegisterInfo *TRI) const {
+  if (CSI.empty()) {
+    return false;
+  }
+
+  unsigned CalleeFrameSize = 0;
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  MachineFunction &MF = *MBB.getParent();
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+  AVRMachineFunctionInfo *AVRFI = MF.getInfo<AVRMachineFunctionInfo>();
+
+  for (unsigned i = CSI.size(); i != 0; --i) {
+    unsigned Reg = CSI[i - 1].getReg();
+    bool IsNotLiveIn = !MBB.isLiveIn(Reg);
+
+    assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 &&
+           "Invalid register size");
+
+    // Add the callee-saved register as live-in only if it is not already a
+    // live-in register, this usually happens with arguments that are passed
+    // through callee-saved registers.
+    if (IsNotLiveIn) {
+      MBB.addLiveIn(Reg);
+    }
+
+    // Do not kill the register when it is an input argument.
+    BuildMI(MBB, MI, DL, TII.get(AVR::PUSHRr))
+        .addReg(Reg, getKillRegState(IsNotLiveIn))
+        .setMIFlag(MachineInstr::FrameSetup);
+    ++CalleeFrameSize;
+  }
+
+  AVRFI->setCalleeSavedFrameSize(CalleeFrameSize);
+
+  return true;
+}
+
+bool AVRFrameLowering::restoreCalleeSavedRegisters(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+    const std::vector<CalleeSavedInfo> &CSI,
+    const TargetRegisterInfo *TRI) const {
+  if (CSI.empty()) {
+    return false;
+  }
+
+  DebugLoc DL = MBB.findDebugLoc(MI);
+  const MachineFunction &MF = *MBB.getParent();
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+  for (const CalleeSavedInfo &CCSI : CSI) {
+    unsigned Reg = CCSI.getReg();
+
+    assert(TRI->getMinimalPhysRegClass(Reg)->getSize() == 1 &&
+           "Invalid register size");
+
+    BuildMI(MBB, MI, DL, TII.get(AVR::POPRd), Reg);
+  }
+
+  return true;
+}
+
+/// Replace pseudo store instructions that pass arguments through the stack with
+/// real instructions. If insertPushes is true then all instructions are
+/// replaced with push instructions, otherwise regular std instructions are
+/// inserted.
+static void fixStackStores(MachineBasicBlock &MBB,
+                           MachineBasicBlock::iterator MI,
+                           const TargetInstrInfo &TII, bool insertPushes) {
+  const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+  const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
+
+  // Iterate through the BB until we hit a call instruction or we reach the end.
+  for (auto I = MI, E = MBB.end(); I != E && !I->isCall();) {
+    MachineBasicBlock::iterator NextMI = std::next(I);
+    MachineInstr &MI = *I;
+    unsigned Opcode = I->getOpcode();
+
+    // Only care of pseudo store instructions where SP is the base pointer.
+    if (Opcode != AVR::STDSPQRr && Opcode != AVR::STDWSPQRr) {
+      I = NextMI;
+      continue;
+    }
+
+    assert(MI.getOperand(0).getReg() == AVR::SP &&
+           "Invalid register, should be SP!");
+    if (insertPushes) {
+      // Replace this instruction with a push.
+      unsigned SrcReg = MI.getOperand(2).getReg();
+      bool SrcIsKill = MI.getOperand(2).isKill();
+
+      // We can't use PUSHWRr here because when expanded the order of the new
+      // instructions are reversed from what we need. Perform the expansion now.
+      if (Opcode == AVR::STDWSPQRr) {
+        BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
+            .addReg(TRI.getSubReg(SrcReg, AVR::sub_hi),
+                    getKillRegState(SrcIsKill));
+        BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
+            .addReg(TRI.getSubReg(SrcReg, AVR::sub_lo),
+                    getKillRegState(SrcIsKill));
+      } else {
+        BuildMI(MBB, I, MI.getDebugLoc(), TII.get(AVR::PUSHRr))
+            .addReg(SrcReg, getKillRegState(SrcIsKill));
+      }
+
+      MI.eraseFromParent();
+      I = NextMI;
+      continue;
+    }
+
+    // Replace this instruction with a regular store. Use Y as the base
+    // pointer since it is guaranteed to contain a copy of SP.
+    unsigned STOpc =
+        (Opcode == AVR::STDWSPQRr) ? AVR::STDWPtrQRr : AVR::STDPtrQRr;
+
+    MI.setDesc(TII.get(STOpc));
+    MI.getOperand(0).setReg(AVR::R29R28);
+
+    I = NextMI;
+  }
+}
+
+MachineBasicBlock::iterator AVRFrameLowering::eliminateCallFramePseudoInstr(
+    MachineFunction &MF, MachineBasicBlock &MBB,
+    MachineBasicBlock::iterator MI) const {
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  const TargetFrameLowering &TFI = *STI.getFrameLowering();
+  const AVRInstrInfo &TII = *STI.getInstrInfo();
+
+  // There is nothing to insert when the call frame memory is allocated during
+  // function entry. Delete the call frame pseudo and replace all pseudo stores
+  // with real store instructions.
+  if (TFI.hasReservedCallFrame(MF)) {
+    fixStackStores(MBB, MI, TII, false);
+    return MBB.erase(MI);
+  }
+
+  DebugLoc DL = MI->getDebugLoc();
+  unsigned int Opcode = MI->getOpcode();
+  int Amount = MI->getOperand(0).getImm();
+
+  // Adjcallstackup does not need to allocate stack space for the call, instead
+  // we insert push instructions that will allocate the necessary stack.
+  // For adjcallstackdown we convert it into an 'adiw reg, <amt>' handling
+  // the read and write of SP in I/O space.
+  if (Amount != 0) {
+    assert(TFI.getStackAlignment() == 1 && "Unsupported stack alignment");
+
+    if (Opcode == TII.getCallFrameSetupOpcode()) {
+      fixStackStores(MBB, MI, TII, true);
+    } else {
+      assert(Opcode == TII.getCallFrameDestroyOpcode());
+
+      // Select the best opcode to adjust SP based on the offset size.
+      unsigned addOpcode;
+      if (isUInt<6>(Amount)) {
+        addOpcode = AVR::ADIWRdK;
+      } else {
+        addOpcode = AVR::SUBIWRdK;
+        Amount = -Amount;
+      }
+
+      // Build the instruction sequence.
+      BuildMI(MBB, MI, DL, TII.get(AVR::SPREAD), AVR::R31R30).addReg(AVR::SP);
+
+      MachineInstr *New = BuildMI(MBB, MI, DL, TII.get(addOpcode), AVR::R31R30)
+                              .addReg(AVR::R31R30, RegState::Kill)
+                              .addImm(Amount);
+      New->getOperand(3).setIsDead();
+
+      BuildMI(MBB, MI, DL, TII.get(AVR::SPWRITE), AVR::SP)
+          .addReg(AVR::R31R30, RegState::Kill);
+    }
+  }
+
+  return MBB.erase(MI);
+}
+
+void AVRFrameLowering::determineCalleeSaves(MachineFunction &MF,
+                                            BitVector &SavedRegs,
+                                            RegScavenger *RS) const {
+  TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
+
+  // Spill register Y when it is used as the frame pointer.
+  if (hasFP(MF)) {
+    SavedRegs.set(AVR::R29R28);
+    SavedRegs.set(AVR::R29);
+    SavedRegs.set(AVR::R28);
+  }
+}
+/// The frame analyzer pass.
+///
+/// Scans the function for allocas and used arguments
+/// that are passed through the stack.
+struct AVRFrameAnalyzer : public MachineFunctionPass {
+  static char ID;
+  AVRFrameAnalyzer() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) {
+    const MachineFrameInfo &MFI = MF.getFrameInfo();
+    AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
+
+    // If there are no fixed frame indexes during this stage it means there
+    // are allocas present in the function.
+    if (MFI.getNumObjects() != MFI.getNumFixedObjects()) {
+      // Check for the type of allocas present in the function. We only care
+      // about fixed size allocas so do not give false positives if only
+      // variable sized allocas are present.
+      for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+        // Variable sized objects have size 0.
+        if (MFI.getObjectSize(i)) {
+          FuncInfo->setHasAllocas(true);
+          break;
+        }
+      }
+    }
+
+    // If there are fixed frame indexes present, scan the function to see if
+    // they are really being used.
+    if (MFI.getNumFixedObjects() == 0) {
+      return false;
+    }
+
+    // Ok fixed frame indexes present, now scan the function to see if they
+    // are really being used, otherwise we can ignore them.
+    for (const MachineBasicBlock &BB : MF) {
+      for (const MachineInstr &MI : BB) {
+        int Opcode = MI.getOpcode();
+
+        if ((Opcode != AVR::LDDRdPtrQ) && (Opcode != AVR::LDDWRdPtrQ) &&
+            (Opcode != AVR::STDPtrQRr) && (Opcode != AVR::STDWPtrQRr)) {
+          continue;
+        }
+
+        for (const MachineOperand &MO : MI.operands()) {
+          if (!MO.isFI()) {
+            continue;
+          }
+
+          if (MFI.isFixedObjectIndex(MO.getIndex())) {
+            FuncInfo->setHasStackArgs(true);
+            return false;
+          }
+        }
+      }
+    }
+
+    return false;
+  }
+
+  StringRef getPassName() const { return "AVR Frame Analyzer"; }
+};
+
+char AVRFrameAnalyzer::ID = 0;
+
+/// Creates instance of the frame analyzer pass.
+FunctionPass *createAVRFrameAnalyzerPass() { return new AVRFrameAnalyzer(); }
+
+/// Create the Dynalloca Stack Pointer Save/Restore pass.
+/// Insert a copy of SP before allocating the dynamic stack memory and restore
+/// it in function exit to restore the original SP state. This avoids the need
+/// of reserving a register pair for a frame pointer.
+struct AVRDynAllocaSR : public MachineFunctionPass {
+  static char ID;
+  AVRDynAllocaSR() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) {
+    // Early exit when there are no variable sized objects in the function.
+    if (!MF.getFrameInfo().hasVarSizedObjects()) {
+      return false;
+    }
+
+    const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+    const TargetInstrInfo &TII = *STI.getInstrInfo();
+    MachineBasicBlock &EntryMBB = MF.front();
+    MachineBasicBlock::iterator MBBI = EntryMBB.begin();
+    DebugLoc DL = EntryMBB.findDebugLoc(MBBI);
+
+    unsigned SPCopy =
+        MF.getRegInfo().createVirtualRegister(&AVR::DREGSRegClass);
+
+    // Create a copy of SP in function entry before any dynallocas are
+    // inserted.
+    BuildMI(EntryMBB, MBBI, DL, TII.get(AVR::COPY), SPCopy).addReg(AVR::SP);
+
+    // Restore SP in all exit basic blocks.
+    for (MachineBasicBlock &MBB : MF) {
+      // If last instruction is a return instruction, add a restore copy.
+      if (!MBB.empty() && MBB.back().isReturn()) {
+        MBBI = MBB.getLastNonDebugInstr();
+        DL = MBBI->getDebugLoc();
+        BuildMI(MBB, MBBI, DL, TII.get(AVR::COPY), AVR::SP)
+            .addReg(SPCopy, RegState::Kill);
+      }
+    }
+
+    return true;
+  }
+
+  StringRef getPassName() const {
+    return "AVR dynalloca stack pointer save/restore";
+  }
+};
+
+char AVRDynAllocaSR::ID = 0;
+
+/// createAVRDynAllocaSRPass - returns an instance of the dynalloca stack
+/// pointer save/restore pass.
+FunctionPass *createAVRDynAllocaSRPass() { return new AVRDynAllocaSR(); }
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
new file mode 100644
index 000000000000..156a21dfecfe
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRISelDAGToDAG.cpp
@@ -0,0 +1,565 @@
+//===-- AVRISelDAGToDAG.cpp - A dag to dag inst selector for AVR ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the AVR target.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "AVRTargetMachine.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "avr-isel"
+
+namespace llvm {
+
+/// Lowers LLVM IR (in DAG form) to AVR MC instructions (in DAG form).
+class AVRDAGToDAGISel : public SelectionDAGISel {
+public:
+  AVRDAGToDAGISel(AVRTargetMachine &TM, CodeGenOpt::Level OptLevel)
+      : SelectionDAGISel(TM, OptLevel), Subtarget(nullptr) {}
+
+  StringRef getPassName() const override {
+    return "AVR DAG->DAG Instruction Selection";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  bool SelectAddr(SDNode *Op, SDValue N, SDValue &Base, SDValue &Disp);
+
+  bool selectIndexedLoad(SDNode *N);
+  unsigned selectIndexedProgMemLoad(const LoadSDNode *LD, MVT VT);
+
+  bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintCode,
+                                    std::vector<SDValue> &OutOps) override;
+
+// Include the pieces autogenerated from the target description.
+#include "AVRGenDAGISel.inc"
+
+private:
+  void Select(SDNode *N) override;
+  bool trySelect(SDNode *N);
+
+  template <unsigned NodeType> bool select(SDNode *N);
+  bool selectMultiplication(SDNode *N);
+
+  const AVRSubtarget *Subtarget;
+};
+
+bool AVRDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) {
+  Subtarget = &MF.getSubtarget<AVRSubtarget>();
+  return SelectionDAGISel::runOnMachineFunction(MF);
+}
+
+bool AVRDAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
+                                 SDValue &Disp) {
+  SDLoc dl(Op);
+  auto DL = CurDAG->getDataLayout();
+  MVT PtrVT = getTargetLowering()->getPointerTy(DL);
+
+  // if the address is a frame index get the TargetFrameIndex.
+  if (const FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N)) {
+    Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), PtrVT);
+    Disp = CurDAG->getTargetConstant(0, dl, MVT::i8);
+
+    return true;
+  }
+
+  // Match simple Reg + uimm6 operands.
+  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
+      !CurDAG->isBaseWithConstantOffset(N)) {
+    return false;
+  }
+
+  if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+    int RHSC = (int)RHS->getZExtValue();
+
+    // Convert negative offsets into positives ones.
+    if (N.getOpcode() == ISD::SUB) {
+      RHSC = -RHSC;
+    }
+
+    // <#Frame index + const>
+    // Allow folding offsets bigger than 63 so the frame pointer can be used
+    // directly instead of copying it around by adjusting and restoring it for
+    // each access.
+    if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
+      int FI = cast<FrameIndexSDNode>(N.getOperand(0))->getIndex();
+
+      Base = CurDAG->getTargetFrameIndex(FI, PtrVT);
+      Disp = CurDAG->getTargetConstant(RHSC, dl, MVT::i16);
+
+      return true;
+    }
+
+    // The value type of the memory instruction determines what is the maximum
+    // offset allowed.
+    MVT VT = cast<MemSDNode>(Op)->getMemoryVT().getSimpleVT();
+
+    // We only accept offsets that fit in 6 bits (unsigned).
+    if (isUInt<6>(RHSC) && (VT == MVT::i8 || VT == MVT::i16)) {
+      Base = N.getOperand(0);
+      Disp = CurDAG->getTargetConstant(RHSC, dl, MVT::i8);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool AVRDAGToDAGISel::selectIndexedLoad(SDNode *N) {
+  const LoadSDNode *LD = cast<LoadSDNode>(N);
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+  MVT VT = LD->getMemoryVT().getSimpleVT();
+  auto PtrVT = getTargetLowering()->getPointerTy(CurDAG->getDataLayout());
+
+  // We only care if this load uses a POSTINC or PREDEC mode.
+  if ((LD->getExtensionType() != ISD::NON_EXTLOAD) ||
+      (AM != ISD::POST_INC && AM != ISD::PRE_DEC)) {
+
+    return false;
+  }
+
+  unsigned Opcode = 0;
+  bool isPre = (AM == ISD::PRE_DEC);
+  int Offs = cast<ConstantSDNode>(LD->getOffset())->getSExtValue();
+
+  switch (VT.SimpleTy) {
+  case MVT::i8: {
+    if ((!isPre && Offs != 1) || (isPre && Offs != -1)) {
+      return false;
+    }
+
+    Opcode = (isPre) ? AVR::LDRdPtrPd : AVR::LDRdPtrPi;
+    break;
+  }
+  case MVT::i16: {
+    if ((!isPre && Offs != 2) || (isPre && Offs != -2)) {
+      return false;
+    }
+
+    Opcode = (isPre) ? AVR::LDWRdPtrPd : AVR::LDWRdPtrPi;
+    break;
+  }
+  default:
+    return false;
+  }
+
+  SDNode *ResNode = CurDAG->getMachineNode(Opcode, SDLoc(N), VT,
+                                           PtrVT, MVT::Other,
+                                           LD->getBasePtr(), LD->getChain());
+  ReplaceUses(N, ResNode);
+  CurDAG->RemoveDeadNode(N);
+
+  return true;
+}
+
+unsigned AVRDAGToDAGISel::selectIndexedProgMemLoad(const LoadSDNode *LD,
+                                                   MVT VT) {
+  ISD::MemIndexedMode AM = LD->getAddressingMode();
+
+  // Progmem indexed loads only work in POSTINC mode.
+  if (LD->getExtensionType() != ISD::NON_EXTLOAD || AM != ISD::POST_INC) {
+    return 0;
+  }
+
+  unsigned Opcode = 0;
+  int Offs = cast<ConstantSDNode>(LD->getOffset())->getSExtValue();
+
+  switch (VT.SimpleTy) {
+  case MVT::i8: {
+    if (Offs != 1) {
+      return 0;
+    }
+    Opcode = AVR::LPMRdZPi;
+    break;
+  }
+  case MVT::i16: {
+    if (Offs != 2) {
+      return 0;
+    }
+    Opcode = AVR::LPMWRdZPi;
+    break;
+  }
+  default:
+    return 0;
+  }
+
+  return Opcode;
+}
+
+bool AVRDAGToDAGISel::SelectInlineAsmMemoryOperand(const SDValue &Op,
+                                                   unsigned ConstraintCode,
+                                                   std::vector<SDValue> &OutOps) {
+  assert(ConstraintCode == InlineAsm::Constraint_m ||
+         ConstraintCode == InlineAsm::Constraint_Q &&
+      "Unexpected asm memory constraint");
+
+  MachineRegisterInfo &RI = MF->getRegInfo();
+  const AVRSubtarget &STI = MF->getSubtarget<AVRSubtarget>();
+  const TargetLowering &TL = *STI.getTargetLowering();
+  SDLoc dl(Op);
+  auto DL = CurDAG->getDataLayout();
+
+  const RegisterSDNode *RegNode = dyn_cast<RegisterSDNode>(Op);
+
+  // If address operand is of PTRDISPREGS class, all is OK, then.
+  if (RegNode &&
+      RI.getRegClass(RegNode->getReg()) == &AVR::PTRDISPREGSRegClass) {
+    OutOps.push_back(Op);
+    return false;
+  }
+
+  if (Op->getOpcode() == ISD::FrameIndex) {
+    SDValue Base, Disp;
+
+    if (SelectAddr(Op.getNode(), Op, Base, Disp)) {
+      OutOps.push_back(Base);
+      OutOps.push_back(Disp);
+
+      return false;
+    }
+
+    return true;
+  }
+
+  // If Op is add 'register, immediate' and
+  // register is either virtual register or register of PTRDISPREGSRegClass
+  if (Op->getOpcode() == ISD::ADD || Op->getOpcode() == ISD::SUB) {
+    SDValue CopyFromRegOp = Op->getOperand(0);
+    SDValue ImmOp = Op->getOperand(1);
+    ConstantSDNode *ImmNode = dyn_cast<ConstantSDNode>(ImmOp);
+
+    unsigned Reg;
+    bool CanHandleRegImmOpt = true;
+
+    CanHandleRegImmOpt &= ImmNode != 0;
+    CanHandleRegImmOpt &= ImmNode->getAPIntValue().getZExtValue() < 64;
+
+    if (CopyFromRegOp->getOpcode() == ISD::CopyFromReg) {
+      RegisterSDNode *RegNode =
+          cast<RegisterSDNode>(CopyFromRegOp->getOperand(1));
+      Reg = RegNode->getReg();
+      CanHandleRegImmOpt &= (TargetRegisterInfo::isVirtualRegister(Reg) ||
+                             AVR::PTRDISPREGSRegClass.contains(Reg));
+    } else {
+      CanHandleRegImmOpt = false;
+    }
+
+    // If we detect proper case - correct virtual register class
+    // if needed and go to another inlineasm operand.
+    if (CanHandleRegImmOpt) {
+      SDValue Base, Disp;
+
+      if (RI.getRegClass(Reg) != &AVR::PTRDISPREGSRegClass) {
+        SDLoc dl(CopyFromRegOp);
+
+        unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass);
+
+        SDValue CopyToReg =
+            CurDAG->getCopyToReg(CopyFromRegOp, dl, VReg, CopyFromRegOp);
+
+        SDValue NewCopyFromRegOp =
+            CurDAG->getCopyFromReg(CopyToReg, dl, VReg, TL.getPointerTy(DL));
+
+        Base = NewCopyFromRegOp;
+      } else {
+        Base = CopyFromRegOp;
+      }
+
+      if (ImmNode->getValueType(0) != MVT::i8) {
+        Disp = CurDAG->getTargetConstant(ImmNode->getAPIntValue().getZExtValue(), dl, MVT::i8);
+      } else {
+        Disp = ImmOp;
+      }
+
+      OutOps.push_back(Base);
+      OutOps.push_back(Disp);
+
+      return false;
+    }
+  }
+
+  // More generic case.
+  // Create chain that puts Op into pointer register
+  // and return that register.
+  unsigned VReg = RI.createVirtualRegister(&AVR::PTRDISPREGSRegClass);
+
+  SDValue CopyToReg = CurDAG->getCopyToReg(Op, dl, VReg, Op);
+  SDValue CopyFromReg =
+      CurDAG->getCopyFromReg(CopyToReg, dl, VReg, TL.getPointerTy(DL));
+
+  OutOps.push_back(CopyFromReg);
+
+  return false;
+}
+
+template <> bool AVRDAGToDAGISel::select<ISD::FrameIndex>(SDNode *N) {
+  auto DL = CurDAG->getDataLayout();
+
+  // Convert the frameindex into a temp instruction that will hold the
+  // effective address of the final stack slot.
+  int FI = cast<FrameIndexSDNode>(N)->getIndex();
+  SDValue TFI =
+    CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy(DL));
+
+  CurDAG->SelectNodeTo(N, AVR::FRMIDX,
+                       getTargetLowering()->getPointerTy(DL), TFI,
+                       CurDAG->getTargetConstant(0, SDLoc(N), MVT::i16));
+  return true;
+}
+
+template <> bool AVRDAGToDAGISel::select<ISD::STORE>(SDNode *N) {
+  // Use the STD{W}SPQRr pseudo instruction when passing arguments through
+  // the stack on function calls for further expansion during the PEI phase.
+  const StoreSDNode *ST = cast<StoreSDNode>(N);
+  SDValue BasePtr = ST->getBasePtr();
+
+  // Early exit when the base pointer is a frame index node or a constant.
+  if (isa<FrameIndexSDNode>(BasePtr) || isa<ConstantSDNode>(BasePtr) ||
+      BasePtr.isUndef()) {
+    return false;
+  }
+
+  const RegisterSDNode *RN = dyn_cast<RegisterSDNode>(BasePtr.getOperand(0));
+  // Only stores where SP is the base pointer are valid.
+  if (!RN || (RN->getReg() != AVR::SP)) {
+    return false;
+  }
+
+  int CST = (int)cast<ConstantSDNode>(BasePtr.getOperand(1))->getZExtValue();
+  SDValue Chain = ST->getChain();
+  EVT VT = ST->getValue().getValueType();
+  SDLoc DL(N);
+  SDValue Offset = CurDAG->getTargetConstant(CST, DL, MVT::i16);
+  SDValue Ops[] = {BasePtr.getOperand(0), Offset, ST->getValue(), Chain};
+  unsigned Opc = (VT == MVT::i16) ? AVR::STDWSPQRr : AVR::STDSPQRr;
+
+  SDNode *ResNode = CurDAG->getMachineNode(Opc, DL, MVT::Other, Ops);
+
+  // Transfer memory operands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = ST->getMemOperand();
+  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+
+  ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+  CurDAG->RemoveDeadNode(N);
+
+  return true;
+}
+
+template <> bool AVRDAGToDAGISel::select<ISD::LOAD>(SDNode *N) {
+  const LoadSDNode *LD = cast<LoadSDNode>(N);
+  if (!AVR::isProgramMemoryAccess(LD)) {
+    // Check if the opcode can be converted into an indexed load.
+    return selectIndexedLoad(N);
+  }
+
+  assert(Subtarget->hasLPM() && "cannot load from program memory on this mcu");
+
+  // This is a flash memory load, move the pointer into R31R30 and emit
+  // the lpm instruction.
+  MVT VT = LD->getMemoryVT().getSimpleVT();
+  SDValue Chain = LD->getChain();
+  SDValue Ptr = LD->getBasePtr();
+  SDNode *ResNode;
+  SDLoc DL(N);
+
+  Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, Ptr, SDValue());
+  Ptr = CurDAG->getCopyFromReg(Chain, DL, AVR::R31R30, MVT::i16,
+                               Chain.getValue(1));
+
+  SDValue RegZ = CurDAG->getRegister(AVR::R31R30, MVT::i16);
+
+  // Check if the opcode can be converted into an indexed load.
+  if (unsigned LPMOpc = selectIndexedProgMemLoad(LD, VT)) {
+    // It is legal to fold the load into an indexed load.
+    ResNode = CurDAG->getMachineNode(LPMOpc, DL, VT, MVT::i16, MVT::Other, Ptr,
+                                     RegZ);
+    ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+  } else {
+    // Selecting an indexed load is not legal, fallback to a normal load.
+    switch (VT.SimpleTy) {
+    case MVT::i8:
+      ResNode = CurDAG->getMachineNode(AVR::LPMRdZ, DL, MVT::i8, MVT::Other,
+                                       Ptr, RegZ);
+      break;
+    case MVT::i16:
+      ResNode = CurDAG->getMachineNode(AVR::LPMWRdZ, DL, MVT::i16,
+                                       MVT::Other, Ptr, RegZ);
+      ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+      break;
+    default:
+      llvm_unreachable("Unsupported VT!");
+    }
+  }
+
+  // Transfer memory operands.
+  MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
+  MemOp[0] = LD->getMemOperand();
+  cast<MachineSDNode>(ResNode)->setMemRefs(MemOp, MemOp + 1);
+
+  ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+  ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+  CurDAG->RemoveDeadNode(N);
+
+  return true;
+}
+
+template <> bool AVRDAGToDAGISel::select<AVRISD::CALL>(SDNode *N) {
+  SDValue InFlag;
+  SDValue Chain = N->getOperand(0);
+  SDValue Callee = N->getOperand(1);
+  unsigned LastOpNum = N->getNumOperands() - 1;
+
+  // Direct calls are autogenerated.
+  unsigned Op = Callee.getOpcode();
+  if (Op == ISD::TargetGlobalAddress || Op == ISD::TargetExternalSymbol) {
+    return false;
+  }
+
+  // Skip the incoming flag if present
+  if (N->getOperand(LastOpNum).getValueType() == MVT::Glue) {
+    --LastOpNum;
+  }
+
+  SDLoc DL(N);
+  Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, Callee, InFlag);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(CurDAG->getRegister(AVR::R31R30, MVT::i16));
+
+  // Map all operands into the new node.
+  for (unsigned i = 2, e = LastOpNum + 1; i != e; ++i) {
+    Ops.push_back(N->getOperand(i));
+  }
+
+  Ops.push_back(Chain);
+  Ops.push_back(Chain.getValue(1));
+
+  SDNode *ResNode =
+    CurDAG->getMachineNode(AVR::ICALL, DL, MVT::Other, MVT::Glue, Ops);
+
+  ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+  ReplaceUses(SDValue(N, 1), SDValue(ResNode, 1));
+  CurDAG->RemoveDeadNode(N);
+
+  return true;
+}
+
+template <> bool AVRDAGToDAGISel::select<ISD::BRIND>(SDNode *N) {
+  SDValue Chain = N->getOperand(0);
+  SDValue JmpAddr = N->getOperand(1);
+
+  SDLoc DL(N);
+  // Move the destination address of the indirect branch into R31R30.
+  Chain = CurDAG->getCopyToReg(Chain, DL, AVR::R31R30, JmpAddr);
+  SDNode *ResNode = CurDAG->getMachineNode(AVR::IJMP, DL, MVT::Other, Chain);
+
+  ReplaceUses(SDValue(N, 0), SDValue(ResNode, 0));
+  CurDAG->RemoveDeadNode(N);
+
+  return true;
+}
+
+bool AVRDAGToDAGISel::selectMultiplication(llvm::SDNode *N) {
+  SDLoc DL(N);
+  MVT Type = N->getSimpleValueType(0);
+
+  assert(Type == MVT::i8 && "unexpected value type");
+
+  bool isSigned = N->getOpcode() == ISD::SMUL_LOHI;
+  unsigned MachineOp = isSigned ? AVR::MULSRdRr : AVR::MULRdRr;
+
+  SDValue Lhs = N->getOperand(0);
+  SDValue Rhs = N->getOperand(1);
+  SDNode *Mul = CurDAG->getMachineNode(MachineOp, DL, MVT::Glue, Lhs, Rhs);
+  SDValue InChain = CurDAG->getEntryNode();
+  SDValue InGlue = SDValue(Mul, 0);
+
+  // Copy the low half of the result, if it is needed.
+  if (N->hasAnyUseOfValue(0)) {
+    SDValue CopyFromLo =
+        CurDAG->getCopyFromReg(InChain, DL, AVR::R0, Type, InGlue);
+
+    ReplaceUses(SDValue(N, 0), CopyFromLo);
+
+    InChain = CopyFromLo.getValue(1);
+    InGlue = CopyFromLo.getValue(2);
+  }
+
+  // Copy the high half of the result, if it is needed.
+  if (N->hasAnyUseOfValue(1)) {
+    SDValue CopyFromHi =
+        CurDAG->getCopyFromReg(InChain, DL, AVR::R1, Type, InGlue);
+
+    ReplaceUses(SDValue(N, 1), CopyFromHi);
+
+    InChain = CopyFromHi.getValue(1);
+    InGlue = CopyFromHi.getValue(2);
+  }
+
+  CurDAG->RemoveDeadNode(N);
+
+  // We need to clear R1. This is currently done (dirtily)
+  // using a custom inserter.
+
+  return true;
+}
+
+void AVRDAGToDAGISel::Select(SDNode *N) {
+  // Dump information about the Node being selected
+  DEBUG(errs() << "Selecting: "; N->dump(CurDAG); errs() << "\n");
+
+  // If we have a custom node, we already have selected!
+  if (N->isMachineOpcode()) {
+    DEBUG(errs() << "== "; N->dump(CurDAG); errs() << "\n");
+    N->setNodeId(-1);
+    return;
+  }
+
+  // See if subclasses can handle this node.
+  if (trySelect(N))
+    return;
+
+  // Select the default instruction
+  SelectCode(N);
+}
+
+bool AVRDAGToDAGISel::trySelect(SDNode *N) {
+  unsigned Opcode = N->getOpcode();
+  SDLoc DL(N);
+
+  switch (Opcode) {
+  // Nodes we fully handle.
+  case ISD::FrameIndex: return select<ISD::FrameIndex>(N);
+  case ISD::BRIND:      return select<ISD::BRIND>(N);
+  case ISD::UMUL_LOHI:
+  case ISD::SMUL_LOHI:  return selectMultiplication(N);
+
+  // Nodes we handle partially. Other cases are autogenerated
+  case ISD::STORE:   return select<ISD::STORE>(N);
+  case ISD::LOAD:    return select<ISD::LOAD>(N);
+  case AVRISD::CALL: return select<AVRISD::CALL>(N);
+  default:           return false;
+  }
+}
+
+FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
+                               CodeGenOpt::Level OptLevel) {
+  return new AVRDAGToDAGISel(TM, OptLevel);
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
new file mode 100644
index 000000000000..53668f05b59b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.cpp
@@ -0,0 +1,1937 @@
+//===-- AVRISelLowering.cpp - AVR DAG Lowering Implementation -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that AVR uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRISelLowering.h"
+
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include "AVR.h"
+#include "AVRMachineFunctionInfo.h"
+#include "AVRTargetMachine.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+namespace llvm {
+
+AVRTargetLowering::AVRTargetLowering(AVRTargetMachine &tm)
+    : TargetLowering(tm) {
+  // Set up the register classes.
+  addRegisterClass(MVT::i8, &AVR::GPR8RegClass);
+  addRegisterClass(MVT::i16, &AVR::DREGSRegClass);
+
+  // Compute derived properties from the register classes.
+  computeRegisterProperties(tm.getSubtargetImpl()->getRegisterInfo());
+
+  setBooleanContents(ZeroOrOneBooleanContent);
+  setBooleanVectorContents(ZeroOrOneBooleanContent);
+  setSchedulingPreference(Sched::RegPressure);
+  setStackPointerRegisterToSaveRestore(AVR::SP);
+
+  setOperationAction(ISD::GlobalAddress, MVT::i16, Custom);
+  setOperationAction(ISD::BlockAddress, MVT::i16, Custom);
+
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i8, Expand);
+  setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i16, Expand);
+
+  for (MVT VT : MVT::integer_valuetypes()) {
+    for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
+      setLoadExtAction(N, VT, MVT::i1, Promote);
+      setLoadExtAction(N, VT, MVT::i8, Expand);
+    }
+  }
+
+  setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+
+  // sub (x, imm) gets canonicalized to add (x, -imm), so for illegal types
+  // revert into a sub since we don't have an add with immediate instruction.
+  setOperationAction(ISD::ADD, MVT::i32, Custom);
+  setOperationAction(ISD::ADD, MVT::i64, Custom);
+
+  // our shift instructions are only able to shift 1 bit at a time, so handle
+  // this in a custom way.
+  setOperationAction(ISD::SRA, MVT::i8, Custom);
+  setOperationAction(ISD::SHL, MVT::i8, Custom);
+  setOperationAction(ISD::SRL, MVT::i8, Custom);
+  setOperationAction(ISD::SRA, MVT::i16, Custom);
+  setOperationAction(ISD::SHL, MVT::i16, Custom);
+  setOperationAction(ISD::SRL, MVT::i16, Custom);
+  setOperationAction(ISD::SHL_PARTS, MVT::i16, Expand);
+  setOperationAction(ISD::SRA_PARTS, MVT::i16, Expand);
+  setOperationAction(ISD::SRL_PARTS, MVT::i16, Expand);
+
+  setOperationAction(ISD::BR_CC, MVT::i8, Custom);
+  setOperationAction(ISD::BR_CC, MVT::i16, Custom);
+  setOperationAction(ISD::BR_CC, MVT::i32, Custom);
+  setOperationAction(ISD::BR_CC, MVT::i64, Custom);
+  setOperationAction(ISD::BRCOND, MVT::Other, Expand);
+
+  setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i16, Custom);
+  setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
+  setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+  setOperationAction(ISD::SETCC, MVT::i8, Custom);
+  setOperationAction(ISD::SETCC, MVT::i16, Custom);
+  setOperationAction(ISD::SETCC, MVT::i32, Custom);
+  setOperationAction(ISD::SETCC, MVT::i64, Custom);
+  setOperationAction(ISD::SELECT, MVT::i8, Expand);
+  setOperationAction(ISD::SELECT, MVT::i16, Expand);
+
+  setOperationAction(ISD::BSWAP, MVT::i16, Expand);
+
+  // Add support for postincrement and predecrement load/stores.
+  setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+  setIndexedLoadAction(ISD::PRE_DEC, MVT::i8, Legal);
+  setIndexedLoadAction(ISD::PRE_DEC, MVT::i16, Legal);
+  setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal);
+  setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal);
+  setIndexedStoreAction(ISD::PRE_DEC, MVT::i8, Legal);
+  setIndexedStoreAction(ISD::PRE_DEC, MVT::i16, Legal);
+
+  setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+
+  setOperationAction(ISD::VASTART, MVT::Other, Custom);
+  setOperationAction(ISD::VAEND, MVT::Other, Expand);
+  setOperationAction(ISD::VAARG, MVT::Other, Expand);
+  setOperationAction(ISD::VACOPY, MVT::Other, Expand);
+
+  // Atomic operations which must be lowered to rtlib calls
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::ATOMIC_SWAP, VT, Expand);
+    setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_NAND, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MAX, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_MIN, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMAX, VT, Expand);
+    setOperationAction(ISD::ATOMIC_LOAD_UMIN, VT, Expand);
+  }
+
+  // Division/remainder
+  setOperationAction(ISD::UDIV, MVT::i8, Expand);
+  setOperationAction(ISD::UDIV, MVT::i16, Expand);
+  setOperationAction(ISD::UREM, MVT::i8, Expand);
+  setOperationAction(ISD::UREM, MVT::i16, Expand);
+  setOperationAction(ISD::SDIV, MVT::i8, Expand);
+  setOperationAction(ISD::SDIV, MVT::i16, Expand);
+  setOperationAction(ISD::SREM, MVT::i8, Expand);
+  setOperationAction(ISD::SREM, MVT::i16, Expand);
+
+  // Make division and modulus custom
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::UDIVREM, VT, Custom);
+    setOperationAction(ISD::SDIVREM, VT, Custom);
+  }
+
+  // Do not use MUL. The AVR instructions are closer to SMUL_LOHI &co.
+  setOperationAction(ISD::MUL, MVT::i8, Expand);
+  setOperationAction(ISD::MUL, MVT::i16, Expand);
+
+  // Expand 16 bit multiplications.
+  setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
+  setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::MULHS, VT, Expand);
+    setOperationAction(ISD::MULHU, VT, Expand);
+  }
+
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::CTPOP, VT, Expand);
+    setOperationAction(ISD::CTLZ, VT, Expand);
+    setOperationAction(ISD::CTTZ, VT, Expand);
+  }
+
+  for (MVT VT : MVT::integer_valuetypes()) {
+    setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
+    // TODO: The generated code is pretty poor. Investigate using the
+    // same "shift and subtract with carry" trick that we do for
+    // extending 8-bit to 16-bit. This may require infrastructure
+    // improvements in how we treat 16-bit "registers" to be feasible.
+  }
+
+  // Division rtlib functions (not supported)
+  setLibcallName(RTLIB::SDIV_I8, nullptr);
+  setLibcallName(RTLIB::SDIV_I16, nullptr);
+  setLibcallName(RTLIB::SDIV_I32, nullptr);
+  setLibcallName(RTLIB::SDIV_I64, nullptr);
+  setLibcallName(RTLIB::SDIV_I128, nullptr);
+  setLibcallName(RTLIB::UDIV_I8, nullptr);
+  setLibcallName(RTLIB::UDIV_I16, nullptr);
+  setLibcallName(RTLIB::UDIV_I32, nullptr);
+  setLibcallName(RTLIB::UDIV_I64, nullptr);
+  setLibcallName(RTLIB::UDIV_I128, nullptr);
+
+  // Modulus rtlib functions (not supported)
+  setLibcallName(RTLIB::SREM_I8, nullptr);
+  setLibcallName(RTLIB::SREM_I16, nullptr);
+  setLibcallName(RTLIB::SREM_I32, nullptr);
+  setLibcallName(RTLIB::SREM_I64, nullptr);
+  setLibcallName(RTLIB::SREM_I128, nullptr);
+  setLibcallName(RTLIB::UREM_I8, nullptr);
+  setLibcallName(RTLIB::UREM_I16, nullptr);
+  setLibcallName(RTLIB::UREM_I32, nullptr);
+  setLibcallName(RTLIB::UREM_I64, nullptr);
+  setLibcallName(RTLIB::UREM_I128, nullptr);
+
+  // Division and modulus rtlib functions
+  setLibcallName(RTLIB::SDIVREM_I8, "__divmodqi4");
+  setLibcallName(RTLIB::SDIVREM_I16, "__divmodhi4");
+  setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4");
+  setLibcallName(RTLIB::SDIVREM_I64, "__divmoddi4");
+  setLibcallName(RTLIB::SDIVREM_I128, "__divmodti4");
+  setLibcallName(RTLIB::UDIVREM_I8, "__udivmodqi4");
+  setLibcallName(RTLIB::UDIVREM_I16, "__udivmodhi4");
+  setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4");
+  setLibcallName(RTLIB::UDIVREM_I64, "__udivmoddi4");
+  setLibcallName(RTLIB::UDIVREM_I128, "__udivmodti4");
+
+  // Several of the runtime library functions use a special calling conv
+  setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::AVR_BUILTIN);
+  setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::AVR_BUILTIN);
+  setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::AVR_BUILTIN);
+  setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::AVR_BUILTIN);
+
+  // Trigonometric rtlib functions
+  setLibcallName(RTLIB::SIN_F32, "sin");
+  setLibcallName(RTLIB::COS_F32, "cos");
+
+  setMinFunctionAlignment(1);
+  setMinimumJumpTableEntries(INT_MAX);
+}
+
+const char *AVRTargetLowering::getTargetNodeName(unsigned Opcode) const {
+#define NODE(name)       \
+  case AVRISD::name:     \
+    return #name
+
+  switch (Opcode) {
+  default:
+    return nullptr;
+    NODE(RET_FLAG);
+    NODE(RETI_FLAG);
+    NODE(CALL);
+    NODE(WRAPPER);
+    NODE(LSL);
+    NODE(LSR);
+    NODE(ROL);
+    NODE(ROR);
+    NODE(ASR);
+    NODE(LSLLOOP);
+    NODE(LSRLOOP);
+    NODE(ASRLOOP);
+    NODE(BRCOND);
+    NODE(CMP);
+    NODE(CMPC);
+    NODE(TST);
+    NODE(SELECT_CC);
+#undef NODE
+  }
+}
+
+EVT AVRTargetLowering::getSetCCResultType(const DataLayout &DL, LLVMContext &,
+                                          EVT VT) const {
+  assert(!VT.isVector() && "No AVR SetCC type for vectors!");
+  return MVT::i8;
+}
+
+SDValue AVRTargetLowering::LowerShifts(SDValue Op, SelectionDAG &DAG) const {
+  //:TODO: this function has to be completely rewritten to produce optimal
+  // code, for now it's producing very long but correct code.
+  unsigned Opc8;
+  const SDNode *N = Op.getNode();
+  EVT VT = Op.getValueType();
+  SDLoc dl(N);
+
+  // Expand non-constant shifts to loops.
+  if (!isa<ConstantSDNode>(N->getOperand(1))) {
+    switch (Op.getOpcode()) {
+    default:
+      llvm_unreachable("Invalid shift opcode!");
+    case ISD::SHL:
+      return DAG.getNode(AVRISD::LSLLOOP, dl, VT, N->getOperand(0),
+                         N->getOperand(1));
+    case ISD::SRL:
+      return DAG.getNode(AVRISD::LSRLOOP, dl, VT, N->getOperand(0),
+                         N->getOperand(1));
+    case ISD::SRA:
+      return DAG.getNode(AVRISD::ASRLOOP, dl, VT, N->getOperand(0),
+                         N->getOperand(1));
+    }
+  }
+
+  uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
+  SDValue Victim = N->getOperand(0);
+
+  switch (Op.getOpcode()) {
+  case ISD::SRA:
+    Opc8 = AVRISD::ASR;
+    break;
+  case ISD::ROTL:
+    Opc8 = AVRISD::ROL;
+    break;
+  case ISD::ROTR:
+    Opc8 = AVRISD::ROR;
+    break;
+  case ISD::SRL:
+    Opc8 = AVRISD::LSR;
+    break;
+  case ISD::SHL:
+    Opc8 = AVRISD::LSL;
+    break;
+  default:
+    llvm_unreachable("Invalid shift opcode");
+  }
+
+  while (ShiftAmount--) {
+    Victim = DAG.getNode(Opc8, dl, VT, Victim);
+  }
+
+  return Victim;
+}
+
+SDValue AVRTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const {
+  unsigned Opcode = Op->getOpcode();
+  assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) &&
+         "Invalid opcode for Div/Rem lowering");
+  bool isSigned = (Opcode == ISD::SDIVREM);
+  EVT VT = Op->getValueType(0);
+  Type *Ty = VT.getTypeForEVT(*DAG.getContext());
+
+  RTLIB::Libcall LC;
+  switch (VT.getSimpleVT().SimpleTy) {
+  default:
+    llvm_unreachable("Unexpected request for libcall!");
+  case MVT::i8:
+    LC = isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8;
+    break;
+  case MVT::i16:
+    LC = isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16;
+    break;
+  case MVT::i32:
+    LC = isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32;
+    break;
+  case MVT::i64:
+    LC = isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64;
+    break;
+  }
+
+  SDValue InChain = DAG.getEntryNode();
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+  for (SDValue const &Value : Op->op_values()) {
+    Entry.Node = Value;
+    Entry.Ty = Value.getValueType().getTypeForEVT(*DAG.getContext());
+    Entry.isSExt = isSigned;
+    Entry.isZExt = !isSigned;
+    Args.push_back(Entry);
+  }
+
+  SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC),
+                                         getPointerTy(DAG.getDataLayout()));
+
+  Type *RetTy = (Type *)StructType::get(Ty, Ty, nullptr);
+
+  SDLoc dl(Op);
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl)
+      .setChain(InChain)
+      .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
+      .setInRegister()
+      .setSExtResult(isSigned)
+      .setZExtResult(!isSigned);
+
+  std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI);
+  return CallInfo.first;
+}
+
+SDValue AVRTargetLowering::LowerGlobalAddress(SDValue Op,
+                                              SelectionDAG &DAG) const {
+  auto DL = DAG.getDataLayout();
+
+  const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+  int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
+
+  // Create the TargetGlobalAddress node, folding in the constant offset.
+  SDValue Result =
+      DAG.getTargetGlobalAddress(GV, SDLoc(Op), getPointerTy(DL), Offset);
+  return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
+}
+
+SDValue AVRTargetLowering::LowerBlockAddress(SDValue Op,
+                                             SelectionDAG &DAG) const {
+  auto DL = DAG.getDataLayout();
+  const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
+
+  SDValue Result = DAG.getTargetBlockAddress(BA, getPointerTy(DL));
+
+  return DAG.getNode(AVRISD::WRAPPER, SDLoc(Op), getPointerTy(DL), Result);
+}
+
+/// IntCCToAVRCC - Convert a DAG integer condition code to an AVR CC.
+static AVRCC::CondCodes intCCToAVRCC(ISD::CondCode CC) {
+  switch (CC) {
+  default:
+    llvm_unreachable("Unknown condition code!");
+  case ISD::SETEQ:
+    return AVRCC::COND_EQ;
+  case ISD::SETNE:
+    return AVRCC::COND_NE;
+  case ISD::SETGE:
+    return AVRCC::COND_GE;
+  case ISD::SETLT:
+    return AVRCC::COND_LT;
+  case ISD::SETUGE:
+    return AVRCC::COND_SH;
+  case ISD::SETULT:
+    return AVRCC::COND_LO;
+  }
+}
+
+/// Returns appropriate AVR CMP/CMPC nodes and corresponding condition code for
+/// the given operands.
+SDValue AVRTargetLowering::getAVRCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+                                     SDValue &AVRcc, SelectionDAG &DAG,
+                                     SDLoc DL) const {
+  SDValue Cmp;
+  EVT VT = LHS.getValueType();
+  bool UseTest = false;
+
+  switch (CC) {
+  default:
+    break;
+  case ISD::SETLE: {
+    // Swap operands and reverse the branching condition.
+    std::swap(LHS, RHS);
+    CC = ISD::SETGE;
+    break;
+  }
+  case ISD::SETGT: {
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+      switch (C->getSExtValue()) {
+      case -1: {
+        // When doing lhs > -1 use a tst instruction on the top part of lhs
+        // and use brpl instead of using a chain of cp/cpc.
+        UseTest = true;
+        AVRcc = DAG.getConstant(AVRCC::COND_PL, DL, MVT::i8);
+        break;
+      }
+      case 0: {
+        // Turn lhs > 0 into 0 < lhs since 0 can be materialized with
+        // __zero_reg__ in lhs.
+        RHS = LHS;
+        LHS = DAG.getConstant(0, DL, VT);
+        CC = ISD::SETLT;
+        break;
+      }
+      default: {
+        // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows
+        // us to  fold the constant into the cmp instruction.
+        RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);
+        CC = ISD::SETGE;
+        break;
+      }
+      }
+      break;
+    }
+    // Swap operands and reverse the branching condition.
+    std::swap(LHS, RHS);
+    CC = ISD::SETLT;
+    break;
+  }
+  case ISD::SETLT: {
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+      switch (C->getSExtValue()) {
+      case 1: {
+        // Turn lhs < 1 into 0 >= lhs since 0 can be materialized with
+        // __zero_reg__ in lhs.
+        RHS = LHS;
+        LHS = DAG.getConstant(0, DL, VT);
+        CC = ISD::SETGE;
+        break;
+      }
+      case 0: {
+        // When doing lhs < 0 use a tst instruction on the top part of lhs
+        // and use brmi instead of using a chain of cp/cpc.
+        UseTest = true;
+        AVRcc = DAG.getConstant(AVRCC::COND_MI, DL, MVT::i8);
+        break;
+      }
+      }
+    }
+    break;
+  }
+  case ISD::SETULE: {
+    // Swap operands and reverse the branching condition.
+    std::swap(LHS, RHS);
+    CC = ISD::SETUGE;
+    break;
+  }
+  case ISD::SETUGT: {
+    // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
+    // fold the constant into the cmp instruction.
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
+      RHS = DAG.getConstant(C->getSExtValue() + 1, DL, VT);
+      CC = ISD::SETUGE;
+      break;
+    }
+    // Swap operands and reverse the branching condition.
+    std::swap(LHS, RHS);
+    CC = ISD::SETULT;
+    break;
+  }
+  }
+
+  // Expand 32 and 64 bit comparisons with custom CMP and CMPC nodes instead of
+  // using the default and/or/xor expansion code which is much longer.
+  if (VT == MVT::i32) {
+    SDValue LHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
+                                DAG.getIntPtrConstant(0, DL));
+    SDValue LHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS,
+                                DAG.getIntPtrConstant(1, DL));
+    SDValue RHSlo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
+                                DAG.getIntPtrConstant(0, DL));
+    SDValue RHShi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS,
+                                DAG.getIntPtrConstant(1, DL));
+
+    if (UseTest) {
+      // When using tst we only care about the highest part.
+      SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHShi,
+                                DAG.getIntPtrConstant(1, DL));
+      Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
+    } else {
+      Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHSlo, RHSlo);
+      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHShi, RHShi, Cmp);
+    }
+  } else if (VT == MVT::i64) {
+    SDValue LHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
+                                DAG.getIntPtrConstant(0, DL));
+    SDValue LHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, LHS,
+                                DAG.getIntPtrConstant(1, DL));
+
+    SDValue LHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
+                               DAG.getIntPtrConstant(0, DL));
+    SDValue LHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_0,
+                               DAG.getIntPtrConstant(1, DL));
+    SDValue LHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
+                               DAG.getIntPtrConstant(0, DL));
+    SDValue LHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, LHS_1,
+                               DAG.getIntPtrConstant(1, DL));
+
+    SDValue RHS_0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
+                                DAG.getIntPtrConstant(0, DL));
+    SDValue RHS_1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, RHS,
+                                DAG.getIntPtrConstant(1, DL));
+
+    SDValue RHS0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
+                               DAG.getIntPtrConstant(0, DL));
+    SDValue RHS1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_0,
+                               DAG.getIntPtrConstant(1, DL));
+    SDValue RHS2 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
+                               DAG.getIntPtrConstant(0, DL));
+    SDValue RHS3 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i16, RHS_1,
+                               DAG.getIntPtrConstant(1, DL));
+
+    if (UseTest) {
+      // When using tst we only care about the highest part.
+      SDValue Top = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8, LHS3,
+                                DAG.getIntPtrConstant(1, DL));
+      Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue, Top);
+    } else {
+      Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS0, RHS0);
+      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS1, RHS1, Cmp);
+      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS2, RHS2, Cmp);
+      Cmp = DAG.getNode(AVRISD::CMPC, DL, MVT::Glue, LHS3, RHS3, Cmp);
+    }
+  } else if (VT == MVT::i8 || VT == MVT::i16) {
+    if (UseTest) {
+      // When using tst we only care about the highest part.
+      Cmp = DAG.getNode(AVRISD::TST, DL, MVT::Glue,
+                        (VT == MVT::i8)
+                            ? LHS
+                            : DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i8,
+                                          LHS, DAG.getIntPtrConstant(1, DL)));
+    } else {
+      Cmp = DAG.getNode(AVRISD::CMP, DL, MVT::Glue, LHS, RHS);
+    }
+  } else {
+    llvm_unreachable("Invalid comparison size");
+  }
+
+  // When using a test instruction AVRcc is already set.
+  if (!UseTest) {
+    AVRcc = DAG.getConstant(intCCToAVRCC(CC), DL, MVT::i8);
+  }
+
+  return Cmp;
+}
+
+SDValue AVRTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Chain = Op.getOperand(0);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
+  SDValue LHS = Op.getOperand(2);
+  SDValue RHS = Op.getOperand(3);
+  SDValue Dest = Op.getOperand(4);
+  SDLoc dl(Op);
+
+  SDValue TargetCC;
+  SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
+
+  return DAG.getNode(AVRISD::BRCOND, dl, MVT::Other, Chain, Dest, TargetCC,
+                     Cmp);
+}
+
+SDValue AVRTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  SDValue TrueV = Op.getOperand(2);
+  SDValue FalseV = Op.getOperand(3);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
+  SDLoc dl(Op);
+
+  SDValue TargetCC;
+  SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, dl);
+
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
+
+  return DAG.getNode(AVRISD::SELECT_CC, dl, VTs, Ops);
+}
+
+SDValue AVRTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
+  SDValue LHS = Op.getOperand(0);
+  SDValue RHS = Op.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+  SDLoc DL(Op);
+
+  SDValue TargetCC;
+  SDValue Cmp = getAVRCmp(LHS, RHS, CC, TargetCC, DAG, DL);
+
+  SDValue TrueV = DAG.getConstant(1, DL, Op.getValueType());
+  SDValue FalseV = DAG.getConstant(0, DL, Op.getValueType());
+  SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue);
+  SDValue Ops[] = {TrueV, FalseV, TargetCC, Cmp};
+
+  return DAG.getNode(AVRISD::SELECT_CC, DL, VTs, Ops);
+}
+
+SDValue AVRTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
+  const MachineFunction &MF = DAG.getMachineFunction();
+  const AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
+  auto DL = DAG.getDataLayout();
+  SDLoc dl(Op);
+
+  // Vastart just stores the address of the VarArgsFrameIndex slot into the
+  // memory location argument.
+  SDValue FI = DAG.getFrameIndex(AFI->getVarArgsFrameIndex(), getPointerTy(DL));
+
+  return DAG.getStore(Op.getOperand(0), dl, FI, Op.getOperand(1),
+                      MachinePointerInfo(SV), 0);
+}
+
+SDValue AVRTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
+  switch (Op.getOpcode()) {
+  default:
+    llvm_unreachable("Don't know how to custom lower this!");
+  case ISD::SHL:
+  case ISD::SRA:
+  case ISD::SRL:
+  case ISD::ROTL:
+  case ISD::ROTR:
+    return LowerShifts(Op, DAG);
+  case ISD::GlobalAddress:
+    return LowerGlobalAddress(Op, DAG);
+  case ISD::BlockAddress:
+    return LowerBlockAddress(Op, DAG);
+  case ISD::BR_CC:
+    return LowerBR_CC(Op, DAG);
+  case ISD::SELECT_CC:
+    return LowerSELECT_CC(Op, DAG);
+  case ISD::SETCC:
+    return LowerSETCC(Op, DAG);
+  case ISD::VASTART:
+    return LowerVASTART(Op, DAG);
+  case ISD::SDIVREM:
+  case ISD::UDIVREM:
+    return LowerDivRem(Op, DAG);
+  }
+
+  return SDValue();
+}
+
+/// Replace a node with an illegal result type
+/// with a new node built out of custom code.
+void AVRTargetLowering::ReplaceNodeResults(SDNode *N,
+                                           SmallVectorImpl<SDValue> &Results,
+                                           SelectionDAG &DAG) const {
+  SDLoc DL(N);
+
+  switch (N->getOpcode()) {
+  case ISD::ADD: {
+    // Convert add (x, imm) into sub (x, -imm).
+    if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+      SDValue Sub = DAG.getNode(
+          ISD::SUB, DL, N->getValueType(0), N->getOperand(0),
+          DAG.getConstant(-C->getAPIntValue(), DL, C->getValueType(0)));
+      Results.push_back(Sub);
+    }
+    break;
+  }
+  default: {
+    SDValue Res = LowerOperation(SDValue(N, 0), DAG);
+
+    for (unsigned I = 0, E = Res->getNumValues(); I != E; ++I)
+      Results.push_back(Res.getValue(I));
+
+    break;
+  }
+  }
+}
+
+/// Return true if the addressing mode represented
+/// by AM is legal for this target, for a load/store of the specified type.
+bool AVRTargetLowering::isLegalAddressingMode(const DataLayout &DL,
+                                              const AddrMode &AM, Type *Ty,
+                                              unsigned AS) const {
+  int64_t Offs = AM.BaseOffs;
+
+  // Allow absolute addresses.
+  if (AM.BaseGV && !AM.HasBaseReg && AM.Scale == 0 && Offs == 0) {
+    return true;
+  }
+
+  // Flash memory instructions only allow zero offsets.
+  if (isa<PointerType>(Ty) && AS == AVR::ProgramMemory) {
+    return false;
+  }
+
+  // Allow reg+<6bit> offset.
+  if (Offs < 0)
+    Offs = -Offs;
+  if (AM.BaseGV == 0 && AM.HasBaseReg && AM.Scale == 0 && isUInt<6>(Offs)) {
+    return true;
+  }
+
+  return false;
+}
+
+/// Returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if the node's address
+/// can be legally represented as pre-indexed load / store address.
+bool AVRTargetLowering::getPreIndexedAddressParts(SDNode *N, SDValue &Base,
+                                                  SDValue &Offset,
+                                                  ISD::MemIndexedMode &AM,
+                                                  SelectionDAG &DAG) const {
+  EVT VT;
+  const SDNode *Op;
+  SDLoc DL(N);
+
+  if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT = LD->getMemoryVT();
+    Op = LD->getBasePtr().getNode();
+    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+      return false;
+    if (AVR::isProgramMemoryAccess(LD)) {
+      return false;
+    }
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT = ST->getMemoryVT();
+    Op = ST->getBasePtr().getNode();
+    if (AVR::isProgramMemoryAccess(ST)) {
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  if (VT != MVT::i8 && VT != MVT::i16) {
+    return false;
+  }
+
+  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
+    return false;
+  }
+
+  if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+    int RHSC = RHS->getSExtValue();
+    if (Op->getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+
+    if ((VT == MVT::i16 && RHSC != -2) || (VT == MVT::i8 && RHSC != -1)) {
+      return false;
+    }
+
+    Base = Op->getOperand(0);
+    Offset = DAG.getConstant(RHSC, DL, MVT::i8);
+    AM = ISD::PRE_DEC;
+
+    return true;
+  }
+
+  return false;
+}
+
+/// Returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool AVRTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+                                                   SDValue &Base,
+                                                   SDValue &Offset,
+                                                   ISD::MemIndexedMode &AM,
+                                                   SelectionDAG &DAG) const {
+  EVT VT;
+  SDLoc DL(N);
+
+  if (const LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
+    VT = LD->getMemoryVT();
+    if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+      return false;
+  } else if (const StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
+    VT = ST->getMemoryVT();
+    if (AVR::isProgramMemoryAccess(ST)) {
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  if (VT != MVT::i8 && VT != MVT::i16) {
+    return false;
+  }
+
+  if (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB) {
+    return false;
+  }
+
+  if (const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+    int RHSC = RHS->getSExtValue();
+    if (Op->getOpcode() == ISD::SUB)
+      RHSC = -RHSC;
+    if ((VT == MVT::i16 && RHSC != 2) || (VT == MVT::i8 && RHSC != 1)) {
+      return false;
+    }
+
+    Base = Op->getOperand(0);
+    Offset = DAG.getConstant(RHSC, DL, MVT::i8);
+    AM = ISD::POST_INC;
+
+    return true;
+  }
+
+  return false;
+}
+
+bool AVRTargetLowering::isOffsetFoldingLegal(
+    const GlobalAddressSDNode *GA) const {
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//             Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "AVRGenCallingConv.inc"
+
+/// For each argument in a function store the number of pieces it is composed
+/// of.
+static void parseFunctionArgs(const Function *F, const DataLayout *TD,
+                              SmallVectorImpl<unsigned> &Out) {
+  for (Argument const &Arg : F->args()) {
+    unsigned Bytes = (TD->getTypeSizeInBits(Arg.getType()) + 7) / 8;
+    Out.push_back((Bytes + 1) / 2);
+  }
+}
+
+/// For external symbols there is no function prototype information so we
+/// have to rely directly on argument sizes.
+static void parseExternFuncCallArgs(const SmallVectorImpl<ISD::OutputArg> &In,
+                                    SmallVectorImpl<unsigned> &Out) {
+  for (unsigned i = 0, e = In.size(); i != e;) {
+    unsigned Size = 0;
+    unsigned Offset = 0;
+    while ((i != e) && (In[i].PartOffset == Offset)) {
+      Offset += In[i].VT.getStoreSize();
+      ++i;
+      ++Size;
+    }
+    Out.push_back(Size);
+  }
+}
+
+static StringRef getFunctionName(TargetLowering::CallLoweringInfo &CLI) {
+  SDValue Callee = CLI.Callee;
+
+  if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    return G->getSymbol();
+  }
+
+  if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    return G->getGlobal()->getName();
+  }
+
+  llvm_unreachable("don't know how to get the name for this callee");
+}
+
+/// Analyze incoming and outgoing function arguments. We need custom C++ code
+/// to handle special constraints in the ABI like reversing the order of the
+/// pieces of splitted arguments. In addition, all pieces of a certain argument
+/// have to be passed either using registers or the stack but never mixing both.
+static void analyzeStandardArguments(TargetLowering::CallLoweringInfo *CLI,
+                                     const Function *F, const DataLayout *TD,
+                                     const SmallVectorImpl<ISD::OutputArg> *Outs,
+                                     const SmallVectorImpl<ISD::InputArg> *Ins,
+                                     CallingConv::ID CallConv,
+                                     SmallVectorImpl<CCValAssign> &ArgLocs,
+                                     CCState &CCInfo, bool IsCall, bool IsVarArg) {
+  static const MCPhysReg RegList8[] = {AVR::R24, AVR::R22, AVR::R20,
+                                       AVR::R18, AVR::R16, AVR::R14,
+                                       AVR::R12, AVR::R10, AVR::R8};
+  static const MCPhysReg RegList16[] = {AVR::R25R24, AVR::R23R22, AVR::R21R20,
+                                        AVR::R19R18, AVR::R17R16, AVR::R15R14,
+                                        AVR::R13R12, AVR::R11R10, AVR::R9R8};
+  if (IsVarArg) {
+    // Variadic functions do not need all the analisys below.
+    if (IsCall) {
+      CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_Vararg);
+    } else {
+      CCInfo.AnalyzeFormalArguments(*Ins, ArgCC_AVR_Vararg);
+    }
+    return;
+  }
+
+  // Fill in the Args array which will contain original argument sizes.
+  SmallVector<unsigned, 8> Args;
+  if (IsCall) {
+    parseExternFuncCallArgs(*Outs, Args);
+  } else {
+    assert(F != nullptr && "function should not be null");
+    parseFunctionArgs(F, TD, Args);
+  }
+
+  unsigned RegsLeft = array_lengthof(RegList8), ValNo = 0;
+  // Variadic functions always use the stack.
+  bool UsesStack = false;
+  for (unsigned i = 0, pos = 0, e = Args.size(); i != e; ++i) {
+    unsigned Size = Args[i];
+    MVT LocVT = (IsCall) ? (*Outs)[pos].VT : (*Ins)[pos].VT;
+
+    // If we have plenty of regs to pass the whole argument do it.
+    if (!UsesStack && (Size <= RegsLeft)) {
+      const MCPhysReg *RegList = (LocVT == MVT::i16) ? RegList16 : RegList8;
+
+      for (unsigned j = 0; j != Size; ++j) {
+        unsigned Reg = CCInfo.AllocateReg(
+            ArrayRef<MCPhysReg>(RegList, array_lengthof(RegList8)));
+        CCInfo.addLoc(
+            CCValAssign::getReg(ValNo++, LocVT, Reg, LocVT, CCValAssign::Full));
+        --RegsLeft;
+      }
+
+      // Reverse the order of the pieces to agree with the "big endian" format
+      // required in the calling convention ABI.
+      std::reverse(ArgLocs.begin() + pos, ArgLocs.begin() + pos + Size);
+    } else {
+      // Pass the rest of arguments using the stack.
+      UsesStack = true;
+      for (unsigned j = 0; j != Size; ++j) {
+        unsigned Offset = CCInfo.AllocateStack(
+            TD->getTypeAllocSize(EVT(LocVT).getTypeForEVT(CCInfo.getContext())),
+            TD->getABITypeAlignment(
+                EVT(LocVT).getTypeForEVT(CCInfo.getContext())));
+        CCInfo.addLoc(CCValAssign::getMem(ValNo++, LocVT, Offset, LocVT,
+                                          CCValAssign::Full));
+      }
+    }
+    pos += Size;
+  }
+}
+
+static void analyzeBuiltinArguments(TargetLowering::CallLoweringInfo &CLI,
+                                    const Function *F, const DataLayout *TD,
+                                    const SmallVectorImpl<ISD::OutputArg> *Outs,
+                                    const SmallVectorImpl<ISD::InputArg> *Ins,
+                                    CallingConv::ID CallConv,
+                                    SmallVectorImpl<CCValAssign> &ArgLocs,
+                                    CCState &CCInfo, bool IsCall, bool IsVarArg) {
+  StringRef FuncName = getFunctionName(CLI);
+
+  if (FuncName.startswith("__udivmod") || FuncName.startswith("__divmod")) {
+    CCInfo.AnalyzeCallOperands(*Outs, ArgCC_AVR_BUILTIN_DIV);
+  } else {
+    analyzeStandardArguments(&CLI, F, TD, Outs, Ins,
+                             CallConv, ArgLocs, CCInfo,
+                             IsCall, IsVarArg);
+  }
+}
+
+static void analyzeArguments(TargetLowering::CallLoweringInfo *CLI,
+                             const Function *F, const DataLayout *TD,
+                             const SmallVectorImpl<ISD::OutputArg> *Outs,
+                             const SmallVectorImpl<ISD::InputArg> *Ins,
+                             CallingConv::ID CallConv,
+                             SmallVectorImpl<CCValAssign> &ArgLocs,
+                             CCState &CCInfo, bool IsCall, bool IsVarArg) {
+  switch (CallConv) {
+    case CallingConv::AVR_BUILTIN: {
+      analyzeBuiltinArguments(*CLI, F, TD, Outs, Ins,
+                              CallConv, ArgLocs, CCInfo,
+                              IsCall, IsVarArg);
+      return;
+    }
+    default: {
+      analyzeStandardArguments(CLI, F, TD, Outs, Ins,
+                               CallConv, ArgLocs, CCInfo,
+                               IsCall, IsVarArg);
+      return;
+    }
+  }
+}
+
+SDValue AVRTargetLowering::LowerFormalArguments(
+    SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
+  MachineFunction &MF = DAG.getMachineFunction();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  auto DL = DAG.getDataLayout();
+
+  // Assign locations to all of the incoming arguments.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+
+  analyzeArguments(nullptr, MF.getFunction(), &DL, 0, &Ins, CallConv, ArgLocs, CCInfo,
+                   false, isVarArg);
+
+  SDValue ArgValue;
+  for (CCValAssign &VA : ArgLocs) {
+
+    // Arguments stored on registers.
+    if (VA.isRegLoc()) {
+      EVT RegVT = VA.getLocVT();
+      const TargetRegisterClass *RC;
+      if (RegVT == MVT::i8) {
+        RC = &AVR::GPR8RegClass;
+      } else if (RegVT == MVT::i16) {
+        RC = &AVR::DREGSRegClass;
+      } else {
+        llvm_unreachable("Unknown argument type!");
+      }
+
+      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+      // :NOTE: Clang should not promote any i8 into i16 but for safety the
+      // following code will handle zexts or sexts generated by other
+      // front ends. Otherwise:
+      // If this is an 8 bit value, it is really passed promoted
+      // to 16 bits. Insert an assert[sz]ext to capture this, then
+      // truncate to the right size.
+      switch (VA.getLocInfo()) {
+      default:
+        llvm_unreachable("Unknown loc info!");
+      case CCValAssign::Full:
+        break;
+      case CCValAssign::BCvt:
+        ArgValue = DAG.getNode(ISD::BITCAST, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::SExt:
+        ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      case CCValAssign::ZExt:
+        ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
+                               DAG.getValueType(VA.getValVT()));
+        ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+        break;
+      }
+
+      InVals.push_back(ArgValue);
+    } else {
+      // Sanity check.
+      assert(VA.isMemLoc());
+
+      EVT LocVT = VA.getLocVT();
+
+      // Create the frame index object for this incoming parameter.
+      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
+                                     VA.getLocMemOffset(), true);
+
+      // Create the SelectionDAG nodes corresponding to a load
+      // from this parameter.
+      SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DL));
+      InVals.push_back(DAG.getLoad(LocVT, dl, Chain, FIN,
+                                   MachinePointerInfo::getFixedStack(MF, FI),
+                                   0));
+    }
+  }
+
+  // If the function takes variable number of arguments, make a frame index for
+  // the start of the first vararg value... for expansion of llvm.va_start.
+  if (isVarArg) {
+    unsigned StackSize = CCInfo.getNextStackOffset();
+    AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+
+    AFI->setVarArgsFrameIndex(MFI.CreateFixedObject(2, StackSize, true));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//                  Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue AVRTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                     SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc &DL = CLI.DL;
+  SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
+  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
+  SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
+  SDValue Chain = CLI.Chain;
+  SDValue Callee = CLI.Callee;
+  bool &isTailCall = CLI.IsTailCall;
+  CallingConv::ID CallConv = CLI.CallConv;
+  bool isVarArg = CLI.IsVarArg;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+
+  // AVR does not yet support tail call optimization.
+  isTailCall = false;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+
+  // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+  // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+  // node so that legalize doesn't hack it.
+  const Function *F = nullptr;
+  if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+    const GlobalValue *GV = G->getGlobal();
+
+    F = cast<Function>(GV);
+    Callee =
+        DAG.getTargetGlobalAddress(GV, DL, getPointerTy(DAG.getDataLayout()));
+  } else if (const ExternalSymbolSDNode *ES =
+                 dyn_cast<ExternalSymbolSDNode>(Callee)) {
+    Callee = DAG.getTargetExternalSymbol(ES->getSymbol(),
+                                         getPointerTy(DAG.getDataLayout()));
+  }
+
+  analyzeArguments(&CLI, F, &DAG.getDataLayout(), &Outs, 0, CallConv, ArgLocs, CCInfo,
+                   true, isVarArg);
+
+  // Get a count of how many bytes are to be pushed on the stack.
+  unsigned NumBytes = CCInfo.getNextStackOffset();
+
+  Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+                               DL);
+
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // First, walk the register assignments, inserting copies.
+  unsigned AI, AE;
+  bool HasStackArgs = false;
+  for (AI = 0, AE = ArgLocs.size(); AI != AE; ++AI) {
+    CCValAssign &VA = ArgLocs[AI];
+    EVT RegVT = VA.getLocVT();
+    SDValue Arg = OutVals[AI];
+
+    // Promote the value if needed. With Clang this should not happen.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown loc info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, RegVT, Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, RegVT, Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, RegVT, Arg);
+      break;
+    case CCValAssign::BCvt:
+      Arg = DAG.getNode(ISD::BITCAST, DL, RegVT, Arg);
+      break;
+    }
+
+    // Stop when we encounter a stack argument, we need to process them
+    // in reverse order in the loop below.
+    if (VA.isMemLoc()) {
+      HasStackArgs = true;
+      break;
+    }
+
+    // Arguments that can be passed on registers must be kept in the RegsToPass
+    // vector.
+    RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+  }
+
+  // Second, stack arguments have to walked in reverse order by inserting
+  // chained stores, this ensures their order is not changed by the scheduler
+  // and that the push instruction sequence generated is correct, otherwise they
+  // can be freely intermixed.
+  if (HasStackArgs) {
+    for (AE = AI, AI = ArgLocs.size(); AI != AE; --AI) {
+      unsigned Loc = AI - 1;
+      CCValAssign &VA = ArgLocs[Loc];
+      SDValue Arg = OutVals[Loc];
+
+      assert(VA.isMemLoc());
+
+      // SP points to one stack slot further so add one to adjust it.
+      SDValue PtrOff = DAG.getNode(
+          ISD::ADD, DL, getPointerTy(DAG.getDataLayout()),
+          DAG.getRegister(AVR::SP, getPointerTy(DAG.getDataLayout())),
+          DAG.getIntPtrConstant(VA.getLocMemOffset() + 1, DL));
+
+      Chain =
+          DAG.getStore(Chain, DL, Arg, PtrOff,
+                       MachinePointerInfo::getStack(MF, VA.getLocMemOffset()),
+                       0);
+    }
+  }
+
+  // Build a sequence of copy-to-reg nodes chained together with token chain and
+  // flag operands which copy the outgoing args into registers.  The InFlag in
+  // necessary since all emited instructions must be stuck together.
+  SDValue InFlag;
+  for (auto Reg : RegsToPass) {
+    Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, InFlag);
+    InFlag = Chain.getValue(1);
+  }
+
+  // Returns a chain & a flag for retval copy to use.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  Ops.push_back(Callee);
+
+  // Add argument registers to the end of the list so that they are known live
+  // into the call.
+  for (auto Reg : RegsToPass) {
+    Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+  }
+
+  // Add a register mask operand representing the call-preserved registers.
+  const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
+  const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  if (InFlag.getNode()) {
+    Ops.push_back(InFlag);
+  }
+
+  Chain = DAG.getNode(AVRISD::CALL, DL, NodeTys, Ops);
+  InFlag = Chain.getValue(1);
+
+  // Create the CALLSEQ_END node.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true), InFlag, DL);
+
+  if (!Ins.empty()) {
+    InFlag = Chain.getValue(1);
+  }
+
+  // Handle result values, copying them out of physregs into vregs that we
+  // return.
+  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, DL, DAG,
+                         InVals);
+}
+
+/// Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+///
+SDValue AVRTargetLowering::LowerCallResult(
+    SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
+    const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, SelectionDAG &DAG,
+    SmallVectorImpl<SDValue> &InVals) const {
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  // Handle runtime calling convs.
+  auto CCFunction = CCAssignFnForReturn(CallConv);
+  CCInfo.AnalyzeCallResult(Ins, CCFunction);
+
+  if (CallConv != CallingConv::AVR_BUILTIN && RVLocs.size() > 1) {
+    // Reverse splitted return values to get the "big endian" format required
+    // to agree with the calling convention ABI.
+    std::reverse(RVLocs.begin(), RVLocs.end());
+  }
+
+  // Copy all of the result registers out of their specified physreg.
+  for (CCValAssign const &RVLoc : RVLocs) {
+    Chain = DAG.getCopyFromReg(Chain, dl, RVLoc.getLocReg(), RVLoc.getValVT(),
+                               InFlag)
+                .getValue(1);
+    InFlag = Chain.getValue(2);
+    InVals.push_back(Chain.getValue(0));
+  }
+
+  return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+//               Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+CCAssignFn *AVRTargetLowering::CCAssignFnForReturn(CallingConv::ID CC) const {
+  switch (CC) {
+  case CallingConv::AVR_BUILTIN:
+    return RetCC_AVR_BUILTIN;
+  default:
+    return RetCC_AVR;
+  }
+}
+
+bool
+AVRTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
+                                  MachineFunction &MF, bool isVarArg,
+                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
+                                  LLVMContext &Context) const
+{
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
+
+  auto CCFunction = CCAssignFnForReturn(CallConv);
+  return CCInfo.CheckReturn(Outs, CCFunction);
+}
+
+SDValue
+AVRTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
+                               bool isVarArg,
+                               const SmallVectorImpl<ISD::OutputArg> &Outs,
+                               const SmallVectorImpl<SDValue> &OutVals,
+                               const SDLoc &dl, SelectionDAG &DAG) const {
+  // CCValAssign - represent the assignment of the return value to locations.
+  SmallVector<CCValAssign, 16> RVLocs;
+
+  // CCState - Info about the registers and stack slot.
+  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  // Analyze return values.
+  auto CCFunction = CCAssignFnForReturn(CallConv);
+  CCInfo.AnalyzeReturn(Outs, CCFunction);
+
+  // If this is the first return lowered for this function, add the regs to
+  // the liveout set for the function.
+  MachineFunction &MF = DAG.getMachineFunction();
+  unsigned e = RVLocs.size();
+
+  // Reverse splitted return values to get the "big endian" format required
+  // to agree with the calling convention ABI.
+  if (e > 1) {
+    std::reverse(RVLocs.begin(), RVLocs.end());
+  }
+
+  SDValue Flag;
+  SmallVector<SDValue, 4> RetOps(1, Chain);
+  // Copy the result values into the output registers.
+  for (unsigned i = 0; i != e; ++i) {
+    CCValAssign &VA = RVLocs[i];
+    assert(VA.isRegLoc() && "Can only return in registers!");
+
+    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), OutVals[i], Flag);
+
+    // Guarantee that all emitted copies are stuck together with flags.
+    Flag = Chain.getValue(1);
+    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+  }
+
+  // Don't emit the ret/reti instruction when the naked attribute is present in
+  // the function being compiled.
+  if (MF.getFunction()->getAttributes().hasAttribute(
+          AttributeSet::FunctionIndex, Attribute::Naked)) {
+    return Chain;
+  }
+
+  unsigned RetOpc =
+      (CallConv == CallingConv::AVR_INTR || CallConv == CallingConv::AVR_SIGNAL)
+          ? AVRISD::RETI_FLAG
+          : AVRISD::RET_FLAG;
+
+  RetOps[0] = Chain; // Update chain.
+
+  if (Flag.getNode()) {
+    RetOps.push_back(Flag);
+  }
+
+  return DAG.getNode(RetOpc, dl, MVT::Other, RetOps);
+}
+
+//===----------------------------------------------------------------------===//
+//  Custom Inserters
+//===----------------------------------------------------------------------===//
+
+MachineBasicBlock *AVRTargetLowering::insertShift(MachineInstr &MI,
+                                                  MachineBasicBlock *BB) const {
+  unsigned Opc;
+  const TargetRegisterClass *RC;
+  MachineFunction *F = BB->getParent();
+  MachineRegisterInfo &RI = F->getRegInfo();
+  const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
+  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  DebugLoc dl = MI.getDebugLoc();
+
+  switch (MI.getOpcode()) {
+  default:
+    llvm_unreachable("Invalid shift opcode!");
+  case AVR::Lsl8:
+    Opc = AVR::LSLRd;
+    RC = &AVR::GPR8RegClass;
+    break;
+  case AVR::Lsl16:
+    Opc = AVR::LSLWRd;
+    RC = &AVR::DREGSRegClass;
+    break;
+  case AVR::Asr8:
+    Opc = AVR::ASRRd;
+    RC = &AVR::GPR8RegClass;
+    break;
+  case AVR::Asr16:
+    Opc = AVR::ASRWRd;
+    RC = &AVR::DREGSRegClass;
+    break;
+  case AVR::Lsr8:
+    Opc = AVR::LSRRd;
+    RC = &AVR::GPR8RegClass;
+    break;
+  case AVR::Lsr16:
+    Opc = AVR::LSRWRd;
+    RC = &AVR::DREGSRegClass;
+    break;
+  }
+
+  const BasicBlock *LLVM_BB = BB->getBasicBlock();
+  MachineFunction::iterator I = BB->getParent()->begin();
+  ++I;
+
+  // Create loop block.
+  MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+  F->insert(I, LoopBB);
+  F->insert(I, RemBB);
+
+  // Update machine-CFG edges by transferring all successors of the current
+  // block to the block containing instructions after shift.
+  RemBB->splice(RemBB->begin(), BB, std::next(MachineBasicBlock::iterator(MI)),
+                BB->end());
+  RemBB->transferSuccessorsAndUpdatePHIs(BB);
+
+  // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB.
+  BB->addSuccessor(LoopBB);
+  BB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(RemBB);
+  LoopBB->addSuccessor(LoopBB);
+
+  unsigned ShiftAmtReg = RI.createVirtualRegister(&AVR::LD8RegClass);
+  unsigned ShiftAmtReg2 = RI.createVirtualRegister(&AVR::LD8RegClass);
+  unsigned ShiftReg = RI.createVirtualRegister(RC);
+  unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+  unsigned ShiftAmtSrcReg = MI.getOperand(2).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  unsigned DstReg = MI.getOperand(0).getReg();
+
+  // BB:
+  // cp 0, N
+  // breq RemBB
+  BuildMI(BB, dl, TII.get(AVR::CPRdRr)).addReg(ShiftAmtSrcReg).addReg(AVR::R0);
+  BuildMI(BB, dl, TII.get(AVR::BREQk)).addMBB(RemBB);
+
+  // LoopBB:
+  // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+  // ShiftAmt = phi [%N, BB],      [%ShiftAmt2, LoopBB]
+  // ShiftReg2 = shift ShiftReg
+  // ShiftAmt2 = ShiftAmt - 1;
+  BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftReg)
+      .addReg(SrcReg)
+      .addMBB(BB)
+      .addReg(ShiftReg2)
+      .addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(AVR::PHI), ShiftAmtReg)
+      .addReg(ShiftAmtSrcReg)
+      .addMBB(BB)
+      .addReg(ShiftAmtReg2)
+      .addMBB(LoopBB);
+  BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2).addReg(ShiftReg);
+  BuildMI(LoopBB, dl, TII.get(AVR::SUBIRdK), ShiftAmtReg2)
+      .addReg(ShiftAmtReg)
+      .addImm(1);
+  BuildMI(LoopBB, dl, TII.get(AVR::BRNEk)).addMBB(LoopBB);
+
+  // RemBB:
+  // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+  BuildMI(*RemBB, RemBB->begin(), dl, TII.get(AVR::PHI), DstReg)
+      .addReg(SrcReg)
+      .addMBB(BB)
+      .addReg(ShiftReg2)
+      .addMBB(LoopBB);
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return RemBB;
+}
+
+static bool isCopyMulResult(MachineBasicBlock::iterator const &I) {
+  if (I->getOpcode() == AVR::COPY) {
+    unsigned SrcReg = I->getOperand(1).getReg();
+    return (SrcReg == AVR::R0 || SrcReg == AVR::R1);
+  }
+
+  return false;
+}
+
+// The mul instructions wreak havock on our zero_reg R1. We need to clear it
+// after the result has been evacuated. This is probably not the best way to do
+// it, but it works for now.
+MachineBasicBlock *AVRTargetLowering::insertMul(MachineInstr &MI,
+                                                MachineBasicBlock *BB) const {
+  const AVRTargetMachine &TM = (const AVRTargetMachine &)getTargetMachine();
+  const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
+  MachineBasicBlock::iterator I(MI);
+  ++I; // in any case insert *after* the mul instruction
+  if (isCopyMulResult(I))
+    ++I;
+  if (isCopyMulResult(I))
+    ++I;
+  BuildMI(*BB, I, MI.getDebugLoc(), TII.get(AVR::EORRdRr), AVR::R1)
+      .addReg(AVR::R1)
+      .addReg(AVR::R1);
+  return BB;
+}
+
+MachineBasicBlock *
+AVRTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
+                                               MachineBasicBlock *MBB) const {
+  int Opc = MI.getOpcode();
+
+  // Pseudo shift instructions with a non constant shift amount are expanded
+  // into a loop.
+  switch (Opc) {
+  case AVR::Lsl8:
+  case AVR::Lsl16:
+  case AVR::Lsr8:
+  case AVR::Lsr16:
+  case AVR::Asr8:
+  case AVR::Asr16:
+    return insertShift(MI, MBB);
+  case AVR::MULRdRr:
+  case AVR::MULSRdRr:
+    return insertMul(MI, MBB);
+  }
+
+  assert((Opc == AVR::Select16 || Opc == AVR::Select8) &&
+         "Unexpected instr type to insert");
+
+  const AVRInstrInfo &TII = (const AVRInstrInfo &)*MI.getParent()
+                                ->getParent()
+                                ->getSubtarget()
+                                .getInstrInfo();
+  DebugLoc dl = MI.getDebugLoc();
+
+  // To "insert" a SELECT instruction, we insert the diamond
+  // control-flow pattern. The incoming instruction knows the
+  // destination vreg to set, the condition code register to branch
+  // on, the true/false values to select between, and a branch opcode
+  // to use.
+
+  MachineFunction *MF = MBB->getParent();
+  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
+  MachineBasicBlock *trueMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+  MachineBasicBlock *falseMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+
+  MachineFunction::iterator I = MBB->getParent()->begin();
+  ++I;
+  MF->insert(I, trueMBB);
+  MF->insert(I, falseMBB);
+
+  // Transfer remaining instructions and all successors of the current
+  // block to the block which will contain the Phi node for the
+  // select.
+  trueMBB->splice(trueMBB->begin(), MBB,
+                  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
+  trueMBB->transferSuccessorsAndUpdatePHIs(MBB);
+
+  AVRCC::CondCodes CC = (AVRCC::CondCodes)MI.getOperand(3).getImm();
+  BuildMI(MBB, dl, TII.getBrCond(CC)).addMBB(trueMBB);
+  BuildMI(MBB, dl, TII.get(AVR::RJMPk)).addMBB(falseMBB);
+  MBB->addSuccessor(falseMBB);
+  MBB->addSuccessor(trueMBB);
+
+  // Unconditionally flow back to the true block
+  BuildMI(falseMBB, dl, TII.get(AVR::RJMPk)).addMBB(trueMBB);
+  falseMBB->addSuccessor(trueMBB);
+
+  // Set up the Phi node to determine where we came from
+  BuildMI(*trueMBB, trueMBB->begin(), dl, TII.get(AVR::PHI), MI.getOperand(0).getReg())
+    .addReg(MI.getOperand(1).getReg())
+    .addMBB(MBB)
+    .addReg(MI.getOperand(2).getReg())
+    .addMBB(falseMBB) ;
+
+  MI.eraseFromParent(); // The pseudo instruction is gone now.
+  return trueMBB;
+}
+
+//===----------------------------------------------------------------------===//
+//  Inline Asm Support
+//===----------------------------------------------------------------------===//
+
+AVRTargetLowering::ConstraintType
+AVRTargetLowering::getConstraintType(StringRef Constraint) const {
+  if (Constraint.size() == 1) {
+    // See http://www.nongnu.org/avr-libc/user-manual/inline_asm.html
+    switch (Constraint[0]) {
+    case 'a': // Simple upper registers
+    case 'b': // Base pointer registers pairs
+    case 'd': // Upper register
+    case 'l': // Lower registers
+    case 'e': // Pointer register pairs
+    case 'q': // Stack pointer register
+    case 'r': // Any register
+    case 'w': // Special upper register pairs
+      return C_RegisterClass;
+    case 't': // Temporary register
+    case 'x': case 'X': // Pointer register pair X
+    case 'y': case 'Y': // Pointer register pair Y
+    case 'z': case 'Z': // Pointer register pair Z
+      return C_Register;
+    case 'Q': // A memory address based on Y or Z pointer with displacement.
+      return C_Memory;
+    case 'G': // Floating point constant
+    case 'I': // 6-bit positive integer constant
+    case 'J': // 6-bit negative integer constant
+    case 'K': // Integer constant (Range: 2)
+    case 'L': // Integer constant (Range: 0)
+    case 'M': // 8-bit integer constant
+    case 'N': // Integer constant (Range: -1)
+    case 'O': // Integer constant (Range: 8, 16, 24)
+    case 'P': // Integer constant (Range: 1)
+    case 'R': // Integer constant (Range: -6 to 5)x
+      return C_Other;
+    default:
+      break;
+    }
+  }
+
+  return TargetLowering::getConstraintType(Constraint);
+}
+
+unsigned
+AVRTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
+  // Not sure if this is actually the right thing to do, but we got to do
+  // *something* [agnat]
+  switch (ConstraintCode[0]) {
+  case 'Q':
+    return InlineAsm::Constraint_Q;
+  }
+  return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
+}
+
+AVRTargetLowering::ConstraintWeight
+AVRTargetLowering::getSingleConstraintMatchWeight(
+    AsmOperandInfo &info, const char *constraint) const {
+  ConstraintWeight weight = CW_Invalid;
+  Value *CallOperandVal = info.CallOperandVal;
+
+  // If we don't have a value, we can't do a match,
+  // but allow it at the lowest weight.
+  // (this behaviour has been copied from the ARM backend)
+  if (!CallOperandVal) {
+    return CW_Default;
+  }
+
+  // Look at the constraint type.
+  switch (*constraint) {
+  default:
+    weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint);
+    break;
+  case 'd':
+  case 'r':
+  case 'l':
+    weight = CW_Register;
+    break;
+  case 'a':
+  case 'b':
+  case 'e':
+  case 'q':
+  case 't':
+  case 'w':
+  case 'x': case 'X':
+  case 'y': case 'Y':
+  case 'z': case 'Z':
+    weight = CW_SpecificReg;
+    break;
+  case 'G':
+    if (const ConstantFP *C = dyn_cast<ConstantFP>(CallOperandVal)) {
+      if (C->isZero()) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'I':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (isUInt<6>(C->getZExtValue())) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'J':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -63) && (C->getSExtValue() <= 0)) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'K':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() == 2) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'L':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() == 0) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'M':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (isUInt<8>(C->getZExtValue())) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'N':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getSExtValue() == -1) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'O':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getZExtValue() == 8) || (C->getZExtValue() == 16) ||
+          (C->getZExtValue() == 24)) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'P':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if (C->getZExtValue() == 1) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'R':
+    if (const ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) {
+      if ((C->getSExtValue() >= -6) && (C->getSExtValue() <= 5)) {
+        weight = CW_Constant;
+      }
+    }
+    break;
+  case 'Q':
+    weight = CW_Memory;
+    break;
+  }
+
+  return weight;
+}
+
+std::pair<unsigned, const TargetRegisterClass *>
+AVRTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
+                                                StringRef Constraint,
+                                                MVT VT) const {
+  auto STI = static_cast<const AVRTargetMachine &>(this->getTargetMachine())
+                 .getSubtargetImpl();
+
+  // We only support i8 and i16.
+  //
+  //:FIXME: remove this assert for now since it gets sometimes executed
+  // assert((VT == MVT::i16 || VT == MVT::i8) && "Wrong operand type.");
+
+  if (Constraint.size() == 1) {
+    switch (Constraint[0]) {
+    case 'a': // Simple upper registers r16..r23.
+      return std::make_pair(0U, &AVR::LD8loRegClass);
+    case 'b': // Base pointer registers: y, z.
+      return std::make_pair(0U, &AVR::PTRDISPREGSRegClass);
+    case 'd': // Upper registers r16..r31.
+      return std::make_pair(0U, &AVR::LD8RegClass);
+    case 'l': // Lower registers r0..r15.
+      return std::make_pair(0U, &AVR::GPR8loRegClass);
+    case 'e': // Pointer register pairs: x, y, z.
+      return std::make_pair(0U, &AVR::PTRREGSRegClass);
+    case 'q': // Stack pointer register: SPH:SPL.
+      return std::make_pair(0U, &AVR::GPRSPRegClass);
+    case 'r': // Any register: r0..r31.
+      if (VT == MVT::i8)
+        return std::make_pair(0U, &AVR::GPR8RegClass);
+
+      assert(VT == MVT::i16 && "inline asm constraint too large");
+      return std::make_pair(0U, &AVR::DREGSRegClass);
+    case 't': // Temporary register: r0.
+      return std::make_pair(unsigned(AVR::R0), &AVR::GPR8RegClass);
+    case 'w': // Special upper register pairs: r24, r26, r28, r30.
+      return std::make_pair(0U, &AVR::IWREGSRegClass);
+    case 'x': // Pointer register pair X: r27:r26.
+    case 'X':
+      return std::make_pair(unsigned(AVR::R27R26), &AVR::PTRREGSRegClass);
+    case 'y': // Pointer register pair Y: r29:r28.
+    case 'Y':
+      return std::make_pair(unsigned(AVR::R29R28), &AVR::PTRREGSRegClass);
+    case 'z': // Pointer register pair Z: r31:r30.
+    case 'Z':
+      return std::make_pair(unsigned(AVR::R31R30), &AVR::PTRREGSRegClass);
+    default:
+      break;
+    }
+  }
+
+  return TargetLowering::getRegForInlineAsmConstraint(STI->getRegisterInfo(),
+                                                      Constraint, VT);
+}
+
+void AVRTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
+                                                     std::string &Constraint,
+                                                     std::vector<SDValue> &Ops,
+                                                     SelectionDAG &DAG) const {
+  SDValue Result(0, 0);
+  SDLoc DL(Op);
+  EVT Ty = Op.getValueType();
+
+  // Currently only support length 1 constraints.
+  if (Constraint.length() != 1) {
+    return;
+  }
+
+  char ConstraintLetter = Constraint[0];
+  switch (ConstraintLetter) {
+  default:
+    break;
+  // Deal with integers first:
+  case 'I':
+  case 'J':
+  case 'K':
+  case 'L':
+  case 'M':
+  case 'N':
+  case 'O':
+  case 'P':
+  case 'R': {
+    const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
+    if (!C) {
+      return;
+    }
+
+    int64_t CVal64 = C->getSExtValue();
+    uint64_t CUVal64 = C->getZExtValue();
+    switch (ConstraintLetter) {
+    case 'I': // 0..63
+      if (!isUInt<6>(CUVal64))
+        return;
+      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
+      break;
+    case 'J': // -63..0
+      if (CVal64 < -63 || CVal64 > 0)
+        return;
+      Result = DAG.getTargetConstant(CVal64, DL, Ty);
+      break;
+    case 'K': // 2
+      if (CUVal64 != 2)
+        return;
+      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
+      break;
+    case 'L': // 0
+      if (CUVal64 != 0)
+        return;
+      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
+      break;
+    case 'M': // 0..255
+      if (!isUInt<8>(CUVal64))
+        return;
+      // i8 type may be printed as a negative number,
+      // e.g. 254 would be printed as -2,
+      // so we force it to i16 at least.
+      if (Ty.getSimpleVT() == MVT::i8) {
+        Ty = MVT::i16;
+      }
+      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
+      break;
+    case 'N': // -1
+      if (CVal64 != -1)
+        return;
+      Result = DAG.getTargetConstant(CVal64, DL, Ty);
+      break;
+    case 'O': // 8, 16, 24
+      if (CUVal64 != 8 && CUVal64 != 16 && CUVal64 != 24)
+        return;
+      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
+      break;
+    case 'P': // 1
+      if (CUVal64 != 1)
+        return;
+      Result = DAG.getTargetConstant(CUVal64, DL, Ty);
+      break;
+    case 'R': // -6..5
+      if (CVal64 < -6 || CVal64 > 5)
+        return;
+      Result = DAG.getTargetConstant(CVal64, DL, Ty);
+      break;
+    }
+
+    break;
+  }
+  case 'G':
+    const ConstantFPSDNode *FC = dyn_cast<ConstantFPSDNode>(Op);
+    if (!FC || !FC->isZero())
+      return;
+    // Soften float to i8 0
+    Result = DAG.getTargetConstant(0, DL, MVT::i8);
+    break;
+  }
+
+  if (Result.getNode()) {
+    Ops.push_back(Result);
+    return;
+  }
+
+  return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
index 2c8c9c88b6dd..17074e1b1eee 100644
--- a/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
+++ b/contrib/llvm/lib/Target/AVR/AVRISelLowering.h
@@ -15,6 +15,7 @@
 #ifndef LLVM_AVR_ISEL_LOWERING_H
 #define LLVM_AVR_ISEL_LOWERING_H
 
+#include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/Target/TargetLowering.h"
 
 namespace llvm {
@@ -92,6 +93,9 @@ public:
 
   bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
 
+  EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
+                         EVT VT) const override;
+
   MachineBasicBlock *
   EmitInstrWithCustomInserter(MachineInstr &MI,
                               MachineBasicBlock *MBB) const override;
@@ -125,6 +129,13 @@ private:
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
+  CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC) const;
+
+  bool CanLowerReturn(CallingConv::ID CallConv,
+                      MachineFunction &MF, bool isVarArg,
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
+                      LLVMContext &Context) const override;
+
   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
                       const SmallVectorImpl<SDValue> &OutVals, const SDLoc &dl,
@@ -143,8 +154,8 @@ private:
                           SmallVectorImpl<SDValue> &InVals) const;
 
 private:
-  MachineBasicBlock *insertShift(MachineInstr *MI, MachineBasicBlock *BB) const;
-  MachineBasicBlock *insertMul(MachineInstr *MI, MachineBasicBlock *BB) const;
+  MachineBasicBlock *insertShift(MachineInstr &MI, MachineBasicBlock *BB) const;
+  MachineBasicBlock *insertMul(MachineInstr &MI, MachineBasicBlock *BB) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrFormats.td b/contrib/llvm/lib/Target/AVR/AVRInstrFormats.td
index c10023dd2c0a..ce5e606f9787 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrFormats.td
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrFormats.td
@@ -20,6 +20,8 @@ class AVRInst<dag outs, dag ins, string asmstr, list<dag> pattern> : Instruction
   dag InOperandList = ins;
   let AsmString = asmstr;
   let Pattern = pattern;
+
+  field bits<32> SoftFail = 0;
 }
 
 /// A 16-bit AVR instruction.
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp
index 0327c015cbbb..88f889260cce 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.cpp
@@ -27,6 +27,7 @@
 
 #include "AVR.h"
 #include "AVRMachineFunctionInfo.h"
+#include "AVRRegisterInfo.h"
 #include "AVRTargetMachine.h"
 #include "MCTargetDesc/AVRMCTargetDesc.h"
 
@@ -42,22 +43,41 @@ void AVRInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MI,
                                const DebugLoc &DL, unsigned DestReg,
                                unsigned SrcReg, bool KillSrc) const {
+  const AVRSubtarget &STI = MBB.getParent()->getSubtarget<AVRSubtarget>();
+  const AVRRegisterInfo &TRI = *STI.getRegisterInfo();
   unsigned Opc;
 
-  if (AVR::GPR8RegClass.contains(DestReg, SrcReg)) {
-    Opc = AVR::MOVRdRr;
-  } else if (AVR::DREGSRegClass.contains(DestReg, SrcReg)) {
-    Opc = AVR::MOVWRdRr;
-  } else if (SrcReg == AVR::SP && AVR::DREGSRegClass.contains(DestReg)) {
-    Opc = AVR::SPREAD;
-  } else if (DestReg == AVR::SP && AVR::DREGSRegClass.contains(SrcReg)) {
-    Opc = AVR::SPWRITE;
+  // Not all AVR devices support the 16-bit `MOVW` instruction.
+  if (AVR::DREGSRegClass.contains(DestReg, SrcReg)) {
+    if (STI.hasMOVW()) {
+      BuildMI(MBB, MI, DL, get(AVR::MOVWRdRr), DestReg)
+          .addReg(SrcReg, getKillRegState(KillSrc));
+    } else {
+      unsigned DestLo, DestHi, SrcLo, SrcHi;
+
+      TRI.splitReg(DestReg, DestLo, DestHi);
+      TRI.splitReg(SrcReg,  SrcLo,  SrcHi);
+
+      // Copy each individual register with the `MOV` instruction.
+      BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestLo)
+        .addReg(SrcLo, getKillRegState(KillSrc));
+      BuildMI(MBB, MI, DL, get(AVR::MOVRdRr), DestHi)
+        .addReg(SrcHi, getKillRegState(KillSrc));
+    }
   } else {
-    llvm_unreachable("Impossible reg-to-reg copy");
-  }
+    if (AVR::GPR8RegClass.contains(DestReg, SrcReg)) {
+      Opc = AVR::MOVRdRr;
+    } else if (SrcReg == AVR::SP && AVR::DREGSRegClass.contains(DestReg)) {
+      Opc = AVR::SPREAD;
+    } else if (DestReg == AVR::SP && AVR::DREGSRegClass.contains(SrcReg)) {
+      Opc = AVR::SPWRITE;
+    } else {
+      llvm_unreachable("Impossible reg-to-reg copy");
+    }
 
-  BuildMI(MBB, MI, DL, get(Opc), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
+    BuildMI(MBB, MI, DL, get(Opc), DestReg)
+        .addReg(SrcReg, getKillRegState(KillSrc));
+  }
 }
 
 unsigned AVRInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
@@ -105,13 +125,16 @@ void AVRInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
   MachineFunction &MF = *MBB.getParent();
+  AVRMachineFunctionInfo *AFI = MF.getInfo<AVRMachineFunctionInfo>();
+
+  AFI->setHasSpills(true);
 
   DebugLoc DL;
   if (MI != MBB.end()) {
     DL = MI->getDebugLoc();
   }
 
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FrameIndex),
@@ -145,7 +168,7 @@ void AVRInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   }
 
   MachineFunction &MF = *MBB.getParent();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FrameIndex),
@@ -373,13 +396,16 @@ bool AVRInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned AVRInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned AVRInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL) const {
+                                    const DebugLoc &DL,
+                                    int *BytesAdded) const {
+  assert(!BytesAdded && "code size not handled");
+
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
          "AVR branch conditions have one component!");
 
@@ -404,7 +430,10 @@ unsigned AVRInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return Count;
 }
 
-unsigned AVRInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned AVRInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                    int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
 
@@ -429,7 +458,7 @@ unsigned AVRInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return Count;
 }
 
-bool AVRInstrInfo::ReverseBranchCondition(
+bool AVRInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1 && "Invalid AVR branch condition!");
 
@@ -439,8 +468,8 @@ bool AVRInstrInfo::ReverseBranchCondition(
   return false;
 }
 
-unsigned AVRInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
-  unsigned Opcode = MI->getOpcode();
+unsigned AVRInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
 
   switch (Opcode) {
   // A regular instruction
@@ -454,13 +483,16 @@ unsigned AVRInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
   case TargetOpcode::DBG_VALUE:
     return 0;
   case TargetOpcode::INLINEASM: {
-    const MachineFunction *MF = MI->getParent()->getParent();
-    const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF->getTarget());
-    const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
-    return TII.getInlineAsmLength(MI->getOperand(0).getSymbolName(),
+    const MachineFunction &MF = *MI.getParent()->getParent();
+    const AVRTargetMachine &TM = static_cast<const AVRTargetMachine&>(MF.getTarget());
+    const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+    const TargetInstrInfo &TII = *STI.getInstrInfo();
+
+    return TII.getInlineAsmLength(MI.getOperand(0).getSymbolName(),
                                   *TM.getMCAsmInfo());
   }
   }
 }
 
 } // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h
index fc8945d82432..c5105dafe5eb 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.h
@@ -70,7 +70,7 @@ public:
   const MCInstrDesc &getBrCond(AVRCC::CondCodes CC) const;
   AVRCC::CondCodes getCondFromBranchOpc(unsigned Opc) const;
   AVRCC::CondCodes getOppositeCondition(AVRCC::CondCodes CC) const;
-  unsigned GetInstSizeInBytes(const MachineInstr *MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
@@ -94,12 +94,14 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
 private:
   const AVRRegisterInfo RI;
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
index e75683680150..bc66379ab708 100644
--- a/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrInfo.td
@@ -155,7 +155,7 @@ def memspi : Operand<iPTR>
   let MIOperandInfo = (ops GPRSP, i16imm);
 }
 
-def i8imm_com : Operand<i8>
+def imm_com8 : Operand<i8>
 {
   let EncoderMethod = "encodeComplement";
 
@@ -180,6 +180,38 @@ def call_target : Operand<iPTR>
     let EncoderMethod = "encodeCallTarget";
 }
 
+// A 16-bit address (which can lead to an R_AVR_16 relocation).
+def imm16 : Operand<i16>
+{
+    let EncoderMethod = "encodeImm<AVR::fixup_16>";
+}
+
+/// A 6-bit immediate used in the ADIW/SBIW instructions.
+def imm_arith6 : Operand<i16>
+{
+    let EncoderMethod = "encodeImm<AVR::fixup_6_adiw>";
+}
+
+/// An 8-bit immediate inside an instruction with the same format
+/// as the `LDI` instruction (the `FRdK` format).
+def imm_ldi8 : Operand<i8>
+{
+    let EncoderMethod = "encodeImm<AVR::fixup_ldi>";
+}
+
+/// A 5-bit port number used in SBIC and friends (the `FIOBIT` format).
+def imm_port5 : Operand<i8>
+{
+    let EncoderMethod = "encodeImm<AVR::fixup_port5>";
+}
+
+/// A 6-bit port number used in the `IN` instruction and friends (the
+/// `FIORdA` format.
+def imm_port6 : Operand<i8>
+{
+    let EncoderMethod = "encodeImm<AVR::fixup_port6>";
+}
+
 // Addressing mode pattern reg+imm6
 def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], [SDNPWantRoot]>;
 
@@ -372,7 +404,7 @@ Defs = [SREG] in
   // Adds an immediate 6-bit value K to Rd, placing the result in Rd.
   def ADIWRdK : FWRdK<0b0,
                       (outs IWREGS:$rd),
-                      (ins IWREGS:$src, i16imm:$k),
+                      (ins IWREGS:$src, imm_arith6:$k),
                       "adiw\t$rd, $k",
                       [(set i16:$rd, (add i16:$src, uimm6:$k)),
                        (implicit SREG)]>,
@@ -409,7 +441,7 @@ Defs = [SREG] in
 
   def SUBIRdK : FRdK<0b0101,
                      (outs LD8:$rd),
-                     (ins LD8:$src, i8imm:$k),
+                     (ins LD8:$src, imm_ldi8:$k),
                      "subi\t$rd, $k",
                      [(set i8:$rd, (sub i8:$src, imm:$k)),
                       (implicit SREG)]>;
@@ -427,7 +459,7 @@ Defs = [SREG] in
 
   def SBIWRdK : FWRdK<0b1,
                       (outs IWREGS:$rd),
-                      (ins IWREGS:$src, i16imm:$k),
+                      (ins IWREGS:$src, imm_arith6:$k),
                       "sbiw\t$rd, $k",
                       [(set i16:$rd, (sub i16:$src, uimm6:$k)),
                        (implicit SREG)]>,
@@ -457,7 +489,7 @@ Defs = [SREG] in
 
     def SBCIRdK : FRdK<0b0100,
                        (outs LD8:$rd),
-                       (ins LD8:$src, i8imm:$k),
+                       (ins LD8:$src, imm_ldi8:$k),
                        "sbci\t$rd, $k",
                        [(set i8:$rd, (sube i8:$src, imm:$k)),
                         (implicit SREG)]>;
@@ -626,7 +658,7 @@ Defs = [SREG] in
 
   def ANDIRdK : FRdK<0b0111,
                      (outs LD8:$rd),
-                     (ins LD8:$src, i8imm:$k),
+                     (ins LD8:$src, imm_ldi8:$k),
                      "andi\t$rd, $k",
                      [(set i8:$rd, (and i8:$src, imm:$k)),
                       (implicit SREG)]>;
@@ -644,7 +676,7 @@ Defs = [SREG] in
 
   def ORIRdK : FRdK<0b0110,
                     (outs LD8:$rd),
-                    (ins LD8:$src, i8imm:$k),
+                    (ins LD8:$src, imm_ldi8:$k),
                     "ori\t$rd, $k",
                     [(set i8:$rd, (or i8:$src, imm:$k)),
                      (implicit SREG)]>;
@@ -871,7 +903,7 @@ let Defs = [SREG] in
   let Uses = [SREG] in
   def CPIRdK : FRdK<0b0011,
                     (outs),
-                    (ins GPR8:$rd, i8imm:$k),
+                    (ins GPR8:$rd, imm_ldi8:$k),
                     "cpi\t$rd, $k",
                     [(AVRcmp i8:$rd, imm:$k), (implicit SREG)]>;
 }
@@ -900,13 +932,13 @@ isTerminator = 1 in
 
     def SBICAb : FIOBIT<0b01,
                         (outs),
-                        (ins i16imm:$a, i8imm:$b),
+                        (ins imm_port5:$a, i8imm:$b),
                         "sbic\t$a, $b",
                         []>;
 
     def SBISAb : FIOBIT<0b11,
                         (outs),
-                        (ins i16imm:$a, i8imm:$b),
+                        (ins imm_port5:$a, i8imm:$b),
                         "sbis\t$a, $b",
                         []>;
   }
@@ -1065,7 +1097,7 @@ let isReMaterializable = 1 in
 {
   def LDIRdK : FRdK<0b1110,
                     (outs LD8:$rd),
-                    (ins i8imm:$k),
+                    (ins imm_ldi8:$k),
                     "ldi\t$rd, $k",
                     [(set i8:$rd, imm:$k)]>;
 
@@ -1086,7 +1118,7 @@ isReMaterializable = 1 in
 {
   def LDSRdK : F32DM<0b0,
                      (outs GPR8:$rd),
-                     (ins i16imm:$k),
+                     (ins imm16:$k),
                      "lds\t$rd, $k",
                      [(set i8:$rd, (load imm:$k))]>,
                Requires<[HasSRAM]>;
@@ -1175,6 +1207,7 @@ Constraints = "$ptrreg = $base_wb,@earlyclobber $reg,@earlyclobber $base_wb" in
 let canFoldAsLoad = 1,
 isReMaterializable = 1 in
 {
+  let Constraints = "@earlyclobber $reg" in
   def LDDRdPtrQ : FSTDLDD<0,
                           (outs GPR8:$reg),
                           (ins memri:$memri),
@@ -1194,10 +1227,9 @@ isReMaterializable = 1 in
                           [(set i16:$dst, (load addr:$memri))]>,
                    Requires<[HasSRAM]>;
 
-  //:FIXME: remove this once PR13375 gets fixed
-  // Bug report: https://llvm.org/bugs/show_bug.cgi?id=13375
   let mayLoad = 1,
-  hasSideEffects = 0 in
+  hasSideEffects = 0,
+  Constraints = "@earlyclobber $dst" in
   def LDDWRdYQ : Pseudo<(outs DREGS:$dst),
                         (ins memri:$memri),
                         "lddw\t$dst, $memri",
@@ -1205,10 +1237,42 @@ isReMaterializable = 1 in
                  Requires<[HasSRAM]>;
 }
 
+class AtomicLoad<PatFrag Op, RegisterClass DRC> :
+  Pseudo<(outs DRC:$rd), (ins PTRREGS:$rr), "atomic_op",
+         [(set DRC:$rd, (Op i16:$rr))]>;
+
+class AtomicStore<PatFrag Op, RegisterClass DRC> :
+  Pseudo<(outs), (ins PTRDISPREGS:$rd, DRC:$rr), "atomic_op",
+         [(Op i16:$rd, DRC:$rr)]>;
+
+class AtomicLoadOp<PatFrag Op, RegisterClass DRC> :
+  Pseudo<(outs DRC:$rd), (ins PTRREGS:$rr, DRC:$operand),
+         "atomic_op",
+         [(set DRC:$rd, (Op i16:$rr, DRC:$operand))]>;
+
+def AtomicLoad8   : AtomicLoad<atomic_load_8, GPR8>;
+def AtomicLoad16  : AtomicLoad<atomic_load_16, DREGS>;
+
+def AtomicStore8  : AtomicStore<atomic_store_8, GPR8>;
+def AtomicStore16 : AtomicStore<atomic_store_16, DREGS>;
+
+def AtomicLoadAdd8  : AtomicLoadOp<atomic_load_add_8, GPR8>;
+def AtomicLoadAdd16 : AtomicLoadOp<atomic_load_add_16, DREGS>;
+def AtomicLoadSub8  : AtomicLoadOp<atomic_load_sub_8, GPR8>;
+def AtomicLoadSub16 : AtomicLoadOp<atomic_load_sub_16, DREGS>;
+def AtomicLoadAnd8  : AtomicLoadOp<atomic_load_and_8, GPR8>;
+def AtomicLoadAnd16 : AtomicLoadOp<atomic_load_and_16, DREGS>;
+def AtomicLoadOr8   : AtomicLoadOp<atomic_load_or_8, GPR8>;
+def AtomicLoadOr16  : AtomicLoadOp<atomic_load_or_16, DREGS>;
+def AtomicLoadXor8  : AtomicLoadOp<atomic_load_xor_8, GPR8>;
+def AtomicLoadXor16 : AtomicLoadOp<atomic_load_xor_16, DREGS>;
+def AtomicFence     : Pseudo<(outs), (ins), "atomic_fence",
+                             [(atomic_fence imm, imm)]>;
+
 // Indirect store from register to data space.
 def STSKRr : F32DM<0b1,
                    (outs),
-                   (ins i16imm:$k, GPR8:$rd),
+                   (ins imm16:$k, GPR8:$rd),
                    "sts\t$k, $rd",
                    [(store i8:$rd, imm:$k)]>,
              Requires<[HasSRAM]>;
@@ -1433,24 +1497,24 @@ let canFoldAsLoad = 1,
 isReMaterializable = 1 in
 {
   def INRdA : FIORdA<(outs GPR8:$dst),
-                     (ins i16imm:$src),
+                     (ins imm_port6:$src),
                      "in\t$dst, $src",
                      [(set i8:$dst, (load ioaddr8:$src))]>;
 
   def INWRdA : Pseudo<(outs DREGS:$dst),
-                      (ins i16imm:$src),
+                      (ins imm_port6:$src),
                       "inw\t$dst, $src",
                       [(set i16:$dst, (load ioaddr16:$src))]>;
 }
 
 // Write data to IO location operations.
 def OUTARr : FIOARr<(outs),
-                    (ins i16imm:$dst, GPR8:$src),
+                    (ins imm_port6:$dst, GPR8:$src),
                     "out\t$dst, $src",
                     [(store i8:$src, ioaddr8:$dst)]>;
 
 def OUTWARr : Pseudo<(outs),
-                     (ins i16imm:$dst, DREGS:$src),
+                     (ins imm_port6:$dst, DREGS:$src),
                      "outw\t$dst, $src",
                      [(store i16:$src, ioaddr16:$dst)]>;
 
@@ -1613,14 +1677,14 @@ def SWAPRd : FRd<0b1001,
 // instead of in+ori+out which requires one more instr.
 def SBIAb : FIOBIT<0b10,
                    (outs),
-                   (ins i16imm:$addr, i8imm:$bit),
+                   (ins imm_port5:$addr, i8imm:$bit),
                    "sbi\t$addr, $bit",
                    [(store (or (i8 (load lowioaddr8:$addr)), iobitpos8:$bit),
                      lowioaddr8:$addr)]>;
 
 def CBIAb : FIOBIT<0b00,
                    (outs),
-                   (ins i16imm:$addr, i8imm:$bit),
+                   (ins imm_port5:$addr, i8imm:$bit),
                    "cbi\t$addr, $bit",
                    [(store (and (i8 (load lowioaddr8:$addr)), iobitposn8:$bit),
                      lowioaddr8:$addr)]>;
@@ -1648,16 +1712,18 @@ Defs = [SREG] in
   // Alias for ORI Rd, K
   def SBRRdK : FRdK<0b0110,
                     (outs LD8:$rd),
-                    (ins LD8:$src, i8imm:$k),
+                    (ins LD8:$src, imm_ldi8:$k),
                     "sbr\t$rd, $k",
                     [(set i8:$rd, (or i8:$src, imm:$k)),
                      (implicit SREG)]>;
 
   // CBR Rd, K
   // Alias for `ANDI Rd, COM(K)` where COM(K) is the compliment of K.
+  // FIXME: This uses the 'complement' encoder. We need it to also use the
+  // imm_ldi8 encoder. This will cause no fixups to be created on this instruction.
   def CBRRdK : FRdK<0b0111,
                     (outs LD8:$rd),
-                    (ins LD8:$src, i8imm_com:$k),
+                    (ins LD8:$src, imm_com8:$k),
                     "cbr\t$rd, $k",
                     []>;
 }
diff --git a/contrib/llvm/lib/Target/AVR/AVRInstrumentFunctions.cpp b/contrib/llvm/lib/Target/AVR/AVRInstrumentFunctions.cpp
new file mode 100644
index 000000000000..5553dc2da31b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRInstrumentFunctions.cpp
@@ -0,0 +1,222 @@
+//===-- AVRInstrumentFunctions.cpp - Insert instrumentation for testing ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass takes a function and inserts calls to hook functions which are
+// told the name, arguments, and results of function calls.
+//
+// The hooks can do anything with the information given. It is possible to
+// send the data through a serial connection in order to runs tests on
+// bare metal.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+
+#include <llvm/IR/Function.h>
+#include <llvm/IR/Module.h>
+
+using namespace llvm;
+
+#define AVR_INSTRUMENT_FUNCTIONS_NAME "AVR function instrumentation pass"
+
+namespace {
+
+// External symbols that we emit calls to.
+namespace symbols {
+
+#define SYMBOL_PREFIX "avr_instrumentation"
+
+  const StringRef PREFIX = SYMBOL_PREFIX;
+
+  // void (i16 argCount);
+  const StringRef BEGIN_FUNCTION_SIGNATURE = SYMBOL_PREFIX "_begin_signature";
+  // void(i16 argCount);
+  const StringRef END_FUNCTION_SIGNATURE = SYMBOL_PREFIX "_end_signature";
+
+#undef SYMBOL_PREFIX
+}
+
+class AVRInstrumentFunctions : public FunctionPass {
+public:
+  static char ID;
+
+  AVRInstrumentFunctions() : FunctionPass(ID) {
+    initializeAVRInstrumentFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  StringRef getPassName() const override { return AVR_INSTRUMENT_FUNCTIONS_NAME; }
+};
+
+char AVRInstrumentFunctions::ID = 0;
+
+/// Creates a pointer to a string.
+static Value *CreateStringPtr(BasicBlock &BB, StringRef Str) {
+  LLVMContext &Ctx = BB.getContext();
+  IntegerType *I8 = Type::getInt8Ty(Ctx);
+
+  Constant *ConstantStr = ConstantDataArray::getString(Ctx, Str);
+  GlobalVariable *GlobalStr = new GlobalVariable(*BB.getParent()->getParent(),
+                                                 ConstantStr->getType(),
+                                                 true, /* is a constant */
+                                                 GlobalValue::PrivateLinkage,
+                                                 ConstantStr);
+  return GetElementPtrInst::CreateInBounds(GlobalStr,
+    {ConstantInt::get(I8, 0), ConstantInt::get(I8, 0)}, "", &BB);
+}
+
+static std::string GetTypeName(Type &Ty) {
+  if (auto *IntTy = dyn_cast<IntegerType>(&Ty)) {
+    return std::string("i") + std::to_string(IntTy->getBitWidth());
+  }
+
+  if (Ty.isFloatingPointTy()) {
+    return std::string("f") + std::to_string(Ty.getPrimitiveSizeInBits());
+  }
+
+  llvm_unreachable("unknown return type");
+}
+
+/// Builds a call to one of the signature begin/end hooks.
+static void BuildSignatureCall(StringRef SymName, BasicBlock &BB, Function &F) {
+  LLVMContext &Ctx = F.getContext();
+  IntegerType *I16 = Type::getInt16Ty(Ctx);
+
+  FunctionType *FnType = FunctionType::get(Type::getVoidTy(Ctx),
+    {Type::getInt8PtrTy(Ctx), I16}, false);
+
+  Constant *Fn = F.getParent()->getOrInsertFunction(SymName, FnType);
+  Value *FunctionName = CreateStringPtr(BB, F.getName());
+
+  Value *Args[] = {FunctionName,
+                   ConstantInt::get(I16, F.getArgumentList().size())};
+  CallInst::Create(Fn, Args, "", &BB);
+}
+
+/// Builds instructions to call into an external function to
+/// notify about a function signature beginning.
+static void BuildBeginSignature(BasicBlock &BB, Function &F) {
+  return BuildSignatureCall(symbols::BEGIN_FUNCTION_SIGNATURE, BB, F);
+}
+
+/// Builds instructions to call into an external function to
+/// notify about a function signature ending.
+static void BuildEndSignature(BasicBlock &BB, Function &F) {
+  return BuildSignatureCall(symbols::END_FUNCTION_SIGNATURE, BB, F);
+}
+
+/// Get the name of the external symbol that we need to call
+/// to notify about this argument.
+static std::string GetArgumentSymbolName(Argument &Arg) {
+  return (symbols::PREFIX + "_argument_" + GetTypeName(*Arg.getType())).str();
+}
+
+/// Builds a call to one of the argument hooks.
+static void BuildArgument(BasicBlock &BB, Argument &Arg) {
+  Function &F = *Arg.getParent();
+  LLVMContext &Ctx = F.getContext();
+
+  Type *I8 = Type::getInt8Ty(Ctx);
+
+  FunctionType *FnType = FunctionType::get(Type::getVoidTy(Ctx),
+    {Type::getInt8PtrTy(Ctx), I8, Arg.getType()}, false);
+
+  Constant *Fn = F.getParent()->getOrInsertFunction(
+    GetArgumentSymbolName(Arg), FnType);
+  Value *ArgName = CreateStringPtr(BB, Arg.getName());
+
+  Value *Args[] = {ArgName, ConstantInt::get(I8, Arg.getArgNo()), &Arg};
+  CallInst::Create(Fn, Args, "", &BB);
+}
+
+/// Builds a call to all of the function signature hooks.
+static void BuildSignature(BasicBlock &BB, Function &F) {
+  BuildBeginSignature(BB, F);
+  for (Argument &Arg : F.args()) { BuildArgument(BB, Arg); }
+  BuildEndSignature(BB, F);
+}
+
+/// Builds the instrumentation entry block.
+static void BuildEntryBlock(Function &F) {
+  BasicBlock &EntryBlock = F.getEntryBlock();
+
+  // Create a new basic block at the start of the existing entry block.
+  BasicBlock *BB = BasicBlock::Create(F.getContext(),
+                                      "instrumentation_entry",
+                                      &F, &EntryBlock);
+
+  BuildSignature(*BB, F);
+
+  // Jump to the actual entry block.
+  BranchInst::Create(&EntryBlock, BB);
+}
+
+static std::string GetReturnSymbolName(Value &Val) {
+  return (symbols::PREFIX + "_result_" + GetTypeName(*Val.getType())).str();
+}
+
+static void BuildExitHook(Instruction &I) {
+  Function &F = *I.getParent()->getParent();
+  LLVMContext &Ctx = F.getContext();
+
+  if (auto *Ret = dyn_cast<ReturnInst>(&I)) {
+    Value *RetVal = Ret->getReturnValue();
+    assert(RetVal && "should only be instrumenting functions with return values");
+
+    FunctionType *FnType = FunctionType::get(Type::getVoidTy(Ctx),
+      {RetVal->getType()}, false);
+
+    Constant *Fn = F.getParent()->getOrInsertFunction(
+      GetReturnSymbolName(*RetVal), FnType);
+
+    // Call the result hook just before the return.
+    CallInst::Create(Fn, {RetVal}, "", &I);
+  }
+}
+
+/// Runs return hooks before all returns in a function.
+static void BuildExitHooks(Function &F) {
+  for (BasicBlock &BB : F) {
+    auto BBI = BB.begin(), E = BB.end();
+    while (BBI != E) {
+      auto NBBI = std::next(BBI);
+
+      BuildExitHook(*BBI);
+
+      // Modified |= expandMI(BB, MBBI);
+      BBI = NBBI;
+    }
+  }
+}
+
+static bool ShouldInstrument(Function &F) {
+  // No point reporting results if there are none.
+  return !F.getReturnType()->isVoidTy();
+}
+
+bool AVRInstrumentFunctions::runOnFunction(Function &F) {
+  if (ShouldInstrument(F)) {
+    BuildEntryBlock(F);
+    BuildExitHooks(F);
+  }
+
+  return true;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(AVRInstrumentFunctions, "avr-instrument-functions",
+                AVR_INSTRUMENT_FUNCTIONS_NAME, false, false)
+
+namespace llvm {
+
+FunctionPass *createAVRInstrumentFunctionsPass() { return new AVRInstrumentFunctions(); }
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRMCInstLower.cpp b/contrib/llvm/lib/Target/AVR/AVRMCInstLower.cpp
new file mode 100644
index 000000000000..342fe558813a
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRMCInstLower.cpp
@@ -0,0 +1,100 @@
+//===-- AVRMCInstLower.cpp - Convert AVR MachineInstr to an MCInst --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to lower AVR MachineInstrs to their corresponding
+// MCInst records.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRMCInstLower.h"
+
+#include "AVRInstrInfo.h"
+#include "MCTargetDesc/AVRMCExpr.h"
+
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+MCOperand AVRMCInstLower::lowerSymbolOperand(const MachineOperand &MO,
+                                             MCSymbol *Sym) const {
+  unsigned char TF = MO.getTargetFlags();
+  const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
+
+  bool IsNegated = false;
+  if (TF & AVRII::MO_NEG) { IsNegated = true; }
+
+  if (!MO.isJTI() && MO.getOffset()) {
+    Expr = MCBinaryExpr::createAdd(
+        Expr, MCConstantExpr::create(MO.getOffset(), Ctx), Ctx);
+  }
+
+  if (TF & AVRII::MO_LO) {
+    Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_LO8, Expr, IsNegated, Ctx);
+  } else if (TF & AVRII::MO_HI) {
+    Expr = AVRMCExpr::create(AVRMCExpr::VK_AVR_HI8, Expr, IsNegated, Ctx);
+  } else if (TF != 0) {
+    llvm_unreachable("Unknown target flag on symbol operand");
+  }
+
+  return MCOperand::createExpr(Expr);
+}
+
+void AVRMCInstLower::lowerInstruction(const MachineInstr &MI, MCInst &OutMI) const {
+  OutMI.setOpcode(MI.getOpcode());
+
+  for (MachineOperand const &MO : MI.operands()) {
+    MCOperand MCOp;
+
+    switch (MO.getType()) {
+    default:
+      MI.dump();
+      llvm_unreachable("unknown operand type");
+    case MachineOperand::MO_Register:
+      // Ignore all implicit register operands.
+      if (MO.isImplicit())
+        continue;
+      MCOp = MCOperand::createReg(MO.getReg());
+      break;
+    case MachineOperand::MO_Immediate:
+      MCOp = MCOperand::createImm(MO.getImm());
+      break;
+    case MachineOperand::MO_GlobalAddress:
+      MCOp = lowerSymbolOperand(MO, Printer.getSymbol(MO.getGlobal()));
+      break;
+    case MachineOperand::MO_ExternalSymbol:
+      MCOp = lowerSymbolOperand(
+          MO, Printer.GetExternalSymbolSymbol(MO.getSymbolName()));
+      break;
+    case MachineOperand::MO_MachineBasicBlock:
+      MCOp = MCOperand::createExpr(
+          MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
+      break;
+    case MachineOperand::MO_RegisterMask:
+      continue;
+    case MachineOperand::MO_BlockAddress:
+      MCOp = lowerSymbolOperand(
+          MO, Printer.GetBlockAddressSymbol(MO.getBlockAddress()));
+      break;
+    case MachineOperand::MO_JumpTableIndex:
+      MCOp = lowerSymbolOperand(MO, Printer.GetJTISymbol(MO.getIndex()));
+      break;
+    case MachineOperand::MO_ConstantPoolIndex:
+      MCOp = lowerSymbolOperand(MO, Printer.GetCPISymbol(MO.getIndex()));
+      break;
+    }
+
+    OutMI.addOperand(MCOp);
+  }
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRMCInstLower.h b/contrib/llvm/lib/Target/AVR/AVRMCInstLower.h
new file mode 100644
index 000000000000..2e2d1014485e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRMCInstLower.h
@@ -0,0 +1,43 @@
+//===-- AVRMCInstLower.h - Lower MachineInstr to MCInst ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_MCINST_LOWER_H
+#define LLVM_AVR_MCINST_LOWER_H
+
+#include "llvm/Support/Compiler.h"
+
+namespace llvm {
+
+class AsmPrinter;
+class MachineInstr;
+class MachineOperand;
+class MCContext;
+class MCInst;
+class MCOperand;
+class MCSymbol;
+
+/// Lowers `MachineInstr` objects into `MCInst` objects.
+class AVRMCInstLower {
+public:
+  AVRMCInstLower(MCContext &Ctx, AsmPrinter &Printer)
+      : Ctx(Ctx), Printer(Printer) {}
+
+  /// Lowers a `MachineInstr` into a `MCInst`.
+  void lowerInstruction(const MachineInstr &MI, MCInst &OutMI) const;
+  MCOperand lowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
+
+private:
+  MCContext &Ctx;
+  AsmPrinter &Printer;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_MCINST_LOWER_H
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
index 5786f74d1c4f..48798bd4a1da 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.cpp
@@ -129,13 +129,13 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   const MachineFunction &MF = *MBB.getParent();
   const AVRTargetMachine &TM = (const AVRTargetMachine &)MF.getTarget();
   const TargetInstrInfo &TII = *TM.getSubtargetImpl()->getInstrInfo();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetFrameLowering *TFI = TM.getSubtargetImpl()->getFrameLowering();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  int Offset = MFI->getObjectOffset(FrameIndex);
+  int Offset = MFI.getObjectOffset(FrameIndex);
 
   // Add one to the offset because SP points to an empty slot.
-  Offset += MFI->getStackSize() - TFI->getOffsetOfLocalArea() + 1;
+  Offset += MFI.getStackSize() - TFI->getOffsetOfLocalArea() + 1;
   // Fold incoming offset.
   Offset += MI.getOperand(FIOperandNum + 1).getImm();
 
@@ -172,7 +172,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
         Opcode = AVR::ADIWRdK;
         break;
       }
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     }
     default: {
       // This opcode will get expanded into a pair of subi/sbci.
@@ -193,7 +193,7 @@ void AVRRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // If the offset is too big we have to adjust and restore the frame pointer
   // to materialize a valid load/store with displacement.
   //:TODO: consider using only one adiw/sbiw chain for more than one frame index
-  if (Offset >= 63) {
+  if (Offset > 63) {
     unsigned AddOpc = AVR::ADIWRdK, SubOpc = AVR::SBIWRdK;
     int AddOffset = Offset - 63 + 1;
 
@@ -253,4 +253,14 @@ AVRRegisterInfo::getPointerRegClass(const MachineFunction &MF,
   return &AVR::PTRDISPREGSRegClass;
 }
 
+void AVRRegisterInfo::splitReg(unsigned Reg,
+                               unsigned &LoReg,
+                               unsigned &HiReg) const {
+    assert(AVR::DREGSRegClass.contains(Reg) && "can only split 16-bit registers");
+
+    LoReg = getSubReg(Reg, AVR::sub_lo);
+    HiReg = getSubReg(Reg, AVR::sub_hi);
+}
+
 } // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.h b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.h
index 59c0849d209b..b97e32ea203f 100644
--- a/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.h
+++ b/contrib/llvm/lib/Target/AVR/AVRRegisterInfo.h
@@ -42,13 +42,15 @@ public:
                            unsigned FIOperandNum,
                            RegScavenger *RS = NULL) const override;
 
-  /// Debug information queries.
   unsigned getFrameRegister(const MachineFunction &MF) const override;
 
-  /// Returns a TargetRegisterClass used for pointer values.
   const TargetRegisterClass *
   getPointerRegClass(const MachineFunction &MF,
                      unsigned Kind = 0) const override;
+
+  /// Splits a 16-bit `DREGS` register into the lo/hi register pair.
+  /// \param Reg A 16-bit register to split.
+  void splitReg(unsigned Reg, unsigned &LoReg, unsigned &HiReg) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp b/contrib/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
new file mode 100644
index 000000000000..26dbcf77b452
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AVRRelaxMemOperations.cpp
@@ -0,0 +1,149 @@
+//===-- AVRRelaxMemOperations.cpp - Relax out of range loads/stores -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass which relaxes out of range memory operations into
+// equivalent operations which handle bigger addresses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "AVRInstrInfo.h"
+#include "AVRTargetMachine.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+#define AVR_RELAX_MEM_OPS_NAME "AVR memory operation relaxation pass"
+
+namespace {
+
+class AVRRelaxMem : public MachineFunctionPass {
+public:
+  static char ID;
+
+  AVRRelaxMem() : MachineFunctionPass(ID) {
+    initializeAVRRelaxMemPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  StringRef getPassName() const override { return AVR_RELAX_MEM_OPS_NAME; }
+
+private:
+  typedef MachineBasicBlock Block;
+  typedef Block::iterator BlockIt;
+
+  const TargetInstrInfo *TII;
+
+  template <unsigned OP> bool relax(Block &MBB, BlockIt MBBI);
+
+  bool runOnBasicBlock(Block &MBB);
+  bool runOnInstruction(Block &MBB, BlockIt MBBI);
+
+  MachineInstrBuilder buildMI(Block &MBB, BlockIt MBBI, unsigned Opcode) {
+    return BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode));
+  }
+};
+
+char AVRRelaxMem::ID = 0;
+
+bool AVRRelaxMem::runOnMachineFunction(MachineFunction &MF) {
+  bool Modified = false;
+
+  const AVRSubtarget &STI = MF.getSubtarget<AVRSubtarget>();
+  TII = STI.getInstrInfo();
+
+  for (Block &MBB : MF) {
+    bool BlockModified = runOnBasicBlock(MBB);
+    Modified |= BlockModified;
+  }
+
+  return Modified;
+}
+
+bool AVRRelaxMem::runOnBasicBlock(Block &MBB) {
+  bool Modified = false;
+
+  BlockIt MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    BlockIt NMBBI = std::next(MBBI);
+    Modified |= runOnInstruction(MBB, MBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+template <>
+bool AVRRelaxMem::relax<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+
+  MachineOperand &Ptr = MI.getOperand(0);
+  MachineOperand &Src = MI.getOperand(2);
+  int64_t Imm = MI.getOperand(1).getImm();
+
+  // We can definitely optimise this better.
+  if (Imm > 63) {
+    // Push the previous state of the pointer register.
+    // This instruction must preserve the value.
+    buildMI(MBB, MBBI, AVR::PUSHWRr)
+      .addReg(Ptr.getReg());
+
+    // Add the immediate to the pointer register.
+    buildMI(MBB, MBBI, AVR::SBCIWRdK)
+      .addReg(Ptr.getReg(), RegState::Define)
+      .addReg(Ptr.getReg())
+      .addImm(-Imm);
+
+    // Store the value in the source register to the address
+    // pointed to by the pointer register.
+    buildMI(MBB, MBBI, AVR::STWPtrRr)
+      .addReg(Ptr.getReg())
+      .addReg(Src.getReg(), getKillRegState(Src.isKill()));
+
+    // Pop the original state of the pointer register.
+    buildMI(MBB, MBBI, AVR::POPWRd)
+      .addReg(Ptr.getReg(), getKillRegState(Ptr.isKill()));
+
+    MI.removeFromParent();
+  }
+
+  return false;
+}
+
+bool AVRRelaxMem::runOnInstruction(Block &MBB, BlockIt MBBI) {
+  MachineInstr &MI = *MBBI;
+  int Opcode = MBBI->getOpcode();
+
+#define RELAX(Op)                \
+  case Op:                       \
+    return relax<Op>(MBB, MI)
+
+  switch (Opcode) {
+    RELAX(AVR::STDWPtrQRr);
+  }
+#undef RELAX
+  return false;
+}
+
+} // end of anonymous namespace
+
+INITIALIZE_PASS(AVRRelaxMem, "avr-relax-mem",
+                AVR_RELAX_MEM_OPS_NAME, false, false)
+
+namespace llvm {
+
+FunctionPass *createAVRRelaxMemPass() { return new AVRRelaxMem(); }
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
index 508723e91c60..fb3262916b4f 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetMachine.cpp
@@ -25,6 +25,8 @@
 
 namespace llvm {
 
+static const char *AVRDataLayout = "e-p:16:16:16-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-n8";
+
 /// Processes a CPU name.
 static StringRef getCPU(StringRef CPU) {
   if (CPU.empty() || CPU == "generic") {
@@ -44,7 +46,7 @@ AVRTargetMachine::AVRTargetMachine(const Target &T, const Triple &TT,
                                    Optional<Reloc::Model> RM, CodeModel::Model CM,
                                    CodeGenOpt::Level OL)
     : LLVMTargetMachine(
-          T, "e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-i64:8:8-f32:8:8-f64:8:8-n8", TT,
+          T, AVRDataLayout, TT,
           getCPU(CPU), FS, Options, getEffectiveRelocModel(RM), CM, OL),
       SubTarget(TT, getCPU(CPU), FS, *this) {
   this->TLOF = make_unique<AVRTargetObjectFile>();
@@ -65,7 +67,6 @@ public:
   bool addInstSelector() override;
   void addPreSched2() override;
   void addPreRegAlloc() override;
-  void addPreEmitPass() override;
 };
 } // namespace
 
@@ -75,7 +76,12 @@ TargetPassConfig *AVRTargetMachine::createPassConfig(PassManagerBase &PM) {
 
 extern "C" void LLVMInitializeAVRTarget() {
   // Register the target.
-  RegisterTargetMachine<AVRTargetMachine> X(TheAVRTarget);
+  RegisterTargetMachine<AVRTargetMachine> X(getTheAVRTarget());
+
+  auto &PR = *PassRegistry::getPassRegistry();
+  initializeAVRExpandPseudoPass(PR);
+  initializeAVRInstrumentFunctionsPass(PR);
+  initializeAVRRelaxMemPass(PR);
 }
 
 const AVRSubtarget *AVRTargetMachine::getSubtargetImpl() const {
@@ -91,15 +97,22 @@ const AVRSubtarget *AVRTargetMachine::getSubtargetImpl(const Function &) const {
 //===----------------------------------------------------------------------===//
 
 bool AVRPassConfig::addInstSelector() {
+  // Install an instruction selector.
+  addPass(createAVRISelDag(getAVRTargetMachine(), getOptLevel()));
+  // Create the frame analyzer pass used by the PEI pass.
+  addPass(createAVRFrameAnalyzerPass());
+
   return false;
 }
 
 void AVRPassConfig::addPreRegAlloc() {
+  // Create the dynalloc SP save/restore pass to handle variable sized allocas.
+  addPass(createAVRDynAllocaSRPass());
 }
 
-void AVRPassConfig::addPreSched2() { }
-
-void AVRPassConfig::addPreEmitPass() {
+void AVRPassConfig::addPreSched2() {
+  addPass(createAVRRelaxMemPass());
+  addPass(createAVRExpandPseudoPass());
 }
 
 } // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
index 85f03e818e83..af14d9292f27 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -26,15 +26,16 @@ void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
 }
 
 MCSection *
-AVRTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
-                                            SectionKind Kind, Mangler &Mang,
+AVRTargetObjectFile::SelectSectionForGlobal(const GlobalObject *GO,
+                                            SectionKind Kind,
                                             const TargetMachine &TM) const {
   // Global values in flash memory are placed in the progmem.data section
   // unless they already have a user assigned section.
-  if (AVR::isProgramMemoryAddress(GV) && !GV->hasSection())
+  if (AVR::isProgramMemoryAddress(GO) && !GO->hasSection())
     return ProgmemDataSection;
 
   // Otherwise, we work the same way as ELF.
-  return Base::SelectSectionForGlobal(GV, Kind, Mang, TM);
+  return Base::SelectSectionForGlobal(GO, Kind, TM);
 }
 } // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
index 587612584314..ba91036fd64c 100644
--- a/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/AVR/AVRTargetObjectFile.h
@@ -21,8 +21,7 @@ class AVRTargetObjectFile : public TargetLoweringObjectFileELF {
 public:
   void Initialize(MCContext &ctx, const TargetMachine &TM) override;
 
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 
 private:
diff --git a/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
new file mode 100644
index 000000000000..5b0398c0ca34
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -0,0 +1,631 @@
+//===---- AVRAsmParser.cpp - Parse AVR assembly to MCInst instructions ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "AVRRegisterInfo.h"
+#include "MCTargetDesc/AVRMCExpr.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCParser/MCAsmLexer.h"
+#include "llvm/MC/MCParser/MCParsedAsmOperand.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#include <sstream>
+
+#define DEBUG_TYPE "avr-asm-parser"
+
+namespace llvm {
+
+/// Parses AVR assembly from a stream.
+class AVRAsmParser : public MCTargetAsmParser {
+  const MCSubtargetInfo &STI;
+  MCAsmParser &Parser;
+  const MCRegisterInfo *MRI;
+
+#define GET_ASSEMBLER_HEADER
+#include "AVRGenAsmMatcher.inc"
+
+  bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
+                               OperandVector &Operands, MCStreamer &Out,
+                               uint64_t &ErrorInfo,
+                               bool MatchingInlineAsm) override;
+
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
+
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override;
+
+  bool ParseDirective(AsmToken directiveID) override;
+
+  OperandMatchResultTy parseMemriOperand(OperandVector &Operands);
+
+  bool parseOperand(OperandVector &Operands);
+  int parseRegisterName(unsigned (*matchFn)(StringRef));
+  int parseRegisterName();
+  int parseRegister();
+  bool tryParseRegisterOperand(OperandVector &Operands);
+  bool tryParseExpression(OperandVector &Operands);
+  bool tryParseRelocExpression(OperandVector &Operands);
+  void eatComma();
+
+  unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
+                                      unsigned Kind) override;
+
+  unsigned toDREG(unsigned Reg, unsigned From = AVR::sub_lo) {
+    MCRegisterClass const *Class = &AVRMCRegisterClasses[AVR::DREGSRegClassID];
+    return MRI->getMatchingSuperReg(Reg, From, Class);
+  }
+
+  bool emit(MCInst &Instruction, SMLoc const &Loc, MCStreamer &Out) const;
+  bool invalidOperand(SMLoc const &Loc, OperandVector const &Operands,
+                      uint64_t const &ErrorInfo);
+  bool missingFeature(SMLoc const &Loc, uint64_t const &ErrorInfo);
+
+public:
+  AVRAsmParser(const MCSubtargetInfo &STI, MCAsmParser &Parser,
+               const MCInstrInfo &MII, const MCTargetOptions &Options)
+      : MCTargetAsmParser(Options, STI), STI(STI), Parser(Parser) {
+    MCAsmParserExtension::Initialize(Parser);
+    MRI = getContext().getRegisterInfo();
+
+    setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits()));
+  }
+
+  MCAsmParser &getParser() const { return Parser; }
+  MCAsmLexer &getLexer() const { return Parser.getLexer(); }
+};
+
+/// An parsed AVR assembly operand.
+class AVROperand : public MCParsedAsmOperand {
+  typedef MCParsedAsmOperand Base;
+  enum KindTy { k_Immediate, k_Register, k_Token, k_Memri } Kind;
+
+public:
+  AVROperand(StringRef Tok, SMLoc const &S)
+      : Base(), Kind(k_Token), Tok(Tok), Start(S), End(S) {}
+  AVROperand(unsigned Reg, SMLoc const &S, SMLoc const &E)
+      : Base(), Kind(k_Register), RegImm({Reg, nullptr}), Start(S), End(E) {}
+  AVROperand(MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
+      : Base(), Kind(k_Immediate), RegImm({0, Imm}), Start(S), End(E) {}
+  AVROperand(unsigned Reg, MCExpr const *Imm, SMLoc const &S, SMLoc const &E)
+      : Base(), Kind(k_Memri), RegImm({Reg, Imm}), Start(S), End(E) {}
+
+  struct RegisterImmediate {
+    unsigned Reg;
+    MCExpr const *Imm;
+  };
+  union {
+    StringRef Tok;
+    RegisterImmediate RegImm;
+  };
+
+  SMLoc Start, End;
+
+public:
+  void addRegOperands(MCInst &Inst, unsigned N) const {
+    assert(Kind == k_Register && "Unexpected operand kind");
+    assert(N == 1 && "Invalid number of operands!");
+
+    Inst.addOperand(MCOperand::createReg(getReg()));
+  }
+
+  void addExpr(MCInst &Inst, const MCExpr *Expr) const {
+    // Add as immediate when possible
+    if (!Expr)
+      Inst.addOperand(MCOperand::createImm(0));
+    else if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Expr))
+      Inst.addOperand(MCOperand::createImm(CE->getValue()));
+    else
+      Inst.addOperand(MCOperand::createExpr(Expr));
+  }
+
+  void addImmOperands(MCInst &Inst, unsigned N) const {
+    assert(Kind == k_Immediate && "Unexpected operand kind");
+    assert(N == 1 && "Invalid number of operands!");
+
+    const MCExpr *Expr = getImm();
+    addExpr(Inst, Expr);
+  }
+
+  /// Adds the contained reg+imm operand to an instruction.
+  void addMemriOperands(MCInst &Inst, unsigned N) const {
+    assert(Kind == k_Memri && "Unexpected operand kind");
+    assert(N == 2 && "Invalid number of operands");
+
+    Inst.addOperand(MCOperand::createReg(getReg()));
+    addExpr(Inst, getImm());
+  }
+
+  bool isReg() const { return Kind == k_Register; }
+  bool isImm() const { return Kind == k_Immediate; }
+  bool isToken() const { return Kind == k_Token; }
+  bool isMem() const { return Kind == k_Memri; }
+  bool isMemri() const { return Kind == k_Memri; }
+
+  StringRef getToken() const {
+    assert(Kind == k_Token && "Invalid access!");
+    return Tok;
+  }
+
+  unsigned getReg() const {
+    assert((Kind == k_Register || Kind == k_Memri) && "Invalid access!");
+
+    return RegImm.Reg;
+  }
+
+  const MCExpr *getImm() const {
+    assert((Kind == k_Immediate || Kind == k_Memri) && "Invalid access!");
+    return RegImm.Imm;
+  }
+
+  static std::unique_ptr<AVROperand> CreateToken(StringRef Str, SMLoc S) {
+    return make_unique<AVROperand>(Str, S);
+  }
+
+  static std::unique_ptr<AVROperand> CreateReg(unsigned RegNum, SMLoc S,
+                                               SMLoc E) {
+    return make_unique<AVROperand>(RegNum, S, E);
+  }
+
+  static std::unique_ptr<AVROperand> CreateImm(const MCExpr *Val, SMLoc S,
+                                               SMLoc E) {
+    return make_unique<AVROperand>(Val, S, E);
+  }
+
+  static std::unique_ptr<AVROperand>
+  CreateMemri(unsigned RegNum, const MCExpr *Val, SMLoc S, SMLoc E) {
+    return make_unique<AVROperand>(RegNum, Val, S, E);
+  }
+
+  void makeToken(StringRef Token) {
+    Kind = k_Token;
+    Tok = Token;
+  }
+
+  void makeReg(unsigned RegNo) {
+    Kind = k_Register;
+    RegImm = {RegNo, nullptr};
+  }
+
+  void makeImm(MCExpr const *Ex) {
+    Kind = k_Immediate;
+    RegImm = {0, Ex};
+  }
+
+  void makeMemri(unsigned RegNo, MCExpr const *Imm) {
+    Kind = k_Memri;
+    RegImm = {RegNo, Imm};
+  }
+
+  SMLoc getStartLoc() const { return Start; }
+  SMLoc getEndLoc() const { return End; }
+
+  virtual void print(raw_ostream &O) const {
+    switch (Kind) {
+    case k_Token:
+      O << "Token: \"" << getToken() << "\"";
+      break;
+    case k_Register:
+      O << "Register: " << getReg();
+      break;
+    case k_Immediate:
+      O << "Immediate: \"" << *getImm() << "\"";
+      break;
+    case k_Memri: {
+      // only manually print the size for non-negative values,
+      // as the sign is inserted automatically.
+      O << "Memri: \"" << getReg() << '+' << *getImm() << "\"";
+      break;
+    }
+    }
+    O << "\n";
+  }
+};
+
+// Auto-generated Match Functions
+
+/// Maps from the set of all register names to a register number.
+/// \note Generated by TableGen.
+static unsigned MatchRegisterName(StringRef Name);
+
+/// Maps from the set of all alternative registernames to a register number.
+/// \note Generated by TableGen.
+static unsigned MatchRegisterAltName(StringRef Name);
+
+bool AVRAsmParser::invalidOperand(SMLoc const &Loc,
+                                  OperandVector const &Operands,
+                                  uint64_t const &ErrorInfo) {
+  SMLoc ErrorLoc = Loc;
+  char const *Diag = 0;
+
+  if (ErrorInfo != ~0U) {
+    if (ErrorInfo >= Operands.size()) {
+      Diag = "too few operands for instruction.";
+    } else {
+      AVROperand const &Op = (AVROperand const &)*Operands[ErrorInfo];
+
+      // TODO: See if we can do a better error than just "invalid ...".
+      if (Op.getStartLoc() != SMLoc()) {
+        ErrorLoc = Op.getStartLoc();
+      }
+    }
+  }
+
+  if (!Diag) {
+    Diag = "invalid operand for instruction";
+  }
+
+  return Error(ErrorLoc, Diag);
+}
+
+bool AVRAsmParser::missingFeature(llvm::SMLoc const &Loc,
+                                  uint64_t const &ErrorInfo) {
+  return Error(Loc, "instruction requires a CPU feature not currently enabled");
+}
+
+bool AVRAsmParser::emit(MCInst &Inst, SMLoc const &Loc, MCStreamer &Out) const {
+  Inst.setLoc(Loc);
+  Out.EmitInstruction(Inst, STI);
+
+  return false;
+}
+
+bool AVRAsmParser::MatchAndEmitInstruction(SMLoc Loc, unsigned &Opcode,
+                                           OperandVector &Operands,
+                                           MCStreamer &Out, uint64_t &ErrorInfo,
+                                           bool MatchingInlineAsm) {
+  MCInst Inst;
+  unsigned MatchResult =
+      MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm);
+
+  switch (MatchResult) {
+  case Match_Success:        return emit(Inst, Loc, Out);
+  case Match_MissingFeature: return missingFeature(Loc, ErrorInfo);
+  case Match_InvalidOperand: return invalidOperand(Loc, Operands, ErrorInfo);
+  case Match_MnemonicFail:   return Error(Loc, "invalid instruction");
+  default:                   return true;
+  }
+}
+
+/// Parses a register name using a given matching function.
+/// Checks for lowercase or uppercase if necessary.
+int AVRAsmParser::parseRegisterName(unsigned (*matchFn)(StringRef)) {
+  StringRef Name = Parser.getTok().getString();
+
+  int RegNum = matchFn(Name);
+
+  // GCC supports case insensitive register names. Some of the AVR registers
+  // are all lower case, some are all upper case but non are mixed. We prefer
+  // to use the original names in the register definitions. That is why we
+  // have to test both upper and lower case here.
+  if (RegNum == AVR::NoRegister) {
+    RegNum = matchFn(Name.lower());
+  }
+  if (RegNum == AVR::NoRegister) {
+    RegNum = matchFn(Name.upper());
+  }
+
+  return RegNum;
+}
+
+int AVRAsmParser::parseRegisterName() {
+  int RegNum = parseRegisterName(&MatchRegisterName);
+
+  if (RegNum == AVR::NoRegister)
+    RegNum = parseRegisterName(&MatchRegisterAltName);
+
+  return RegNum;
+}
+
+int AVRAsmParser::parseRegister() {
+  int RegNum = AVR::NoRegister;
+
+  if (Parser.getTok().is(AsmToken::Identifier)) {
+    // Check for register pair syntax
+    if (Parser.getLexer().peekTok().is(AsmToken::Colon)) {
+      Parser.Lex();
+      Parser.Lex(); // Eat high (odd) register and colon
+
+      if (Parser.getTok().is(AsmToken::Identifier)) {
+        // Convert lower (even) register to DREG
+        RegNum = toDREG(parseRegisterName());
+      }
+    } else {
+      RegNum = parseRegisterName();
+    }
+  }
+  return RegNum;
+}
+
+bool AVRAsmParser::tryParseRegisterOperand(OperandVector &Operands) {
+  int RegNo = parseRegister();
+
+  if (RegNo == AVR::NoRegister)
+    return true;
+
+  AsmToken const &T = Parser.getTok();
+  Operands.push_back(AVROperand::CreateReg(RegNo, T.getLoc(), T.getEndLoc()));
+  Parser.Lex(); // Eat register token.
+
+  return false;
+}
+
+bool AVRAsmParser::tryParseExpression(OperandVector &Operands) {
+  SMLoc S = Parser.getTok().getLoc();
+
+  if (!tryParseRelocExpression(Operands))
+    return false;
+
+  if ((Parser.getTok().getKind() == AsmToken::Plus ||
+       Parser.getTok().getKind() == AsmToken::Minus) &&
+      Parser.getLexer().peekTok().getKind() == AsmToken::Identifier) {
+    // Don't handle this case - it should be split into two
+    // separate tokens.
+    return true;
+  }
+
+  // Parse (potentially inner) expression
+  MCExpr const *Expression;
+  if (getParser().parseExpression(Expression))
+    return true;
+
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(AVROperand::CreateImm(Expression, S, E));
+  return false;
+}
+
+bool AVRAsmParser::tryParseRelocExpression(OperandVector &Operands) {
+  bool isNegated = false;
+  AVRMCExpr::VariantKind ModifierKind = AVRMCExpr::VK_AVR_None;
+
+  SMLoc S = Parser.getTok().getLoc();
+
+  // Check for sign
+  AsmToken tokens[2];
+  size_t ReadCount = Parser.getLexer().peekTokens(tokens);
+
+  if (ReadCount == 2) {
+    if (tokens[0].getKind() == AsmToken::Identifier &&
+        tokens[1].getKind() == AsmToken::LParen) {
+
+      AsmToken::TokenKind CurTok = Parser.getLexer().getKind();
+      if (CurTok == AsmToken::Minus) {
+        isNegated = true;
+      } else {
+        assert(CurTok == AsmToken::Plus);
+        isNegated = false;
+      }
+
+      // Eat the sign
+      Parser.Lex();
+    }
+  }
+
+  // Check if we have a target specific modifier (lo8, hi8, &c)
+  if (Parser.getTok().getKind() != AsmToken::Identifier ||
+      Parser.getLexer().peekTok().getKind() != AsmToken::LParen) {
+    // Not a reloc expr
+    return true;
+  }
+  StringRef ModifierName = Parser.getTok().getString();
+  ModifierKind = AVRMCExpr::getKindByName(ModifierName.str().c_str());
+
+  if (ModifierKind != AVRMCExpr::VK_AVR_None) {
+    Parser.Lex();
+    Parser.Lex(); // Eat modifier name and parenthesis
+  } else {
+    return Error(Parser.getTok().getLoc(), "unknown modifier");
+  }
+
+  MCExpr const *InnerExpression;
+  if (getParser().parseExpression(InnerExpression))
+    return true;
+
+  // If we have a modifier wrap the inner expression
+  assert(Parser.getTok().getKind() == AsmToken::RParen);
+  Parser.Lex(); // Eat closing parenthesis
+
+  MCExpr const *Expression = AVRMCExpr::create(ModifierKind, InnerExpression,
+                                               isNegated, getContext());
+
+  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  Operands.push_back(AVROperand::CreateImm(Expression, S, E));
+
+  return false;
+}
+
+bool AVRAsmParser::parseOperand(OperandVector &Operands) {
+  DEBUG(dbgs() << "parseOperand\n");
+
+  switch (getLexer().getKind()) {
+  default:
+    return Error(Parser.getTok().getLoc(), "unexpected token in operand");
+
+  case AsmToken::Identifier:
+    // Try to parse a register, if it fails,
+    // fall through to the next case.
+    if (!tryParseRegisterOperand(Operands)) {
+      return false;
+    }
+  case AsmToken::LParen:
+  case AsmToken::Integer:
+  case AsmToken::Dot:
+    return tryParseExpression(Operands);
+  case AsmToken::Plus:
+  case AsmToken::Minus: {
+    // If the sign preceeds a number, parse the number,
+    // otherwise treat the sign a an independent token.
+    switch (getLexer().peekTok().getKind()) {
+    case AsmToken::Integer:
+    case AsmToken::BigNum:
+    case AsmToken::Identifier:
+    case AsmToken::Real:
+      if (!tryParseExpression(Operands))
+        return false;
+    default:
+      break;
+    }
+    // Treat the token as an independent token.
+    Operands.push_back(AVROperand::CreateToken(Parser.getTok().getString(),
+                                               Parser.getTok().getLoc()));
+    Parser.Lex(); // Eat the token.
+    return false;
+  }
+  }
+
+  // Could not parse operand
+  return true;
+}
+
+OperandMatchResultTy
+AVRAsmParser::parseMemriOperand(OperandVector &Operands) {
+  DEBUG(dbgs() << "parseMemriOperand()\n");
+
+  SMLoc E, S;
+  MCExpr const *Expression;
+  int RegNo;
+
+  // Parse register.
+  {
+    RegNo = parseRegister();
+
+    if (RegNo == AVR::NoRegister)
+      return MatchOperand_ParseFail;
+
+    S = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+    Parser.Lex(); // Eat register token.
+  }
+
+  // Parse immediate;
+  {
+    if (getParser().parseExpression(Expression))
+      return MatchOperand_ParseFail;
+
+    E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+  }
+
+  Operands.push_back(AVROperand::CreateMemri(RegNo, Expression, S, E));
+
+  return MatchOperand_Success;
+}
+
+bool AVRAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
+                                 SMLoc &EndLoc) {
+  StartLoc = Parser.getTok().getLoc();
+  RegNo = parseRegister();
+  EndLoc = Parser.getTok().getLoc();
+
+  return (RegNo == AVR::NoRegister);
+}
+
+void AVRAsmParser::eatComma() {
+  if (getLexer().is(AsmToken::Comma)) {
+    Parser.Lex();
+  } else {
+    // GCC allows commas to be omitted.
+  }
+}
+
+bool AVRAsmParser::ParseInstruction(ParseInstructionInfo &Info,
+                                    StringRef Mnemonic, SMLoc NameLoc,
+                                    OperandVector &Operands) {
+  Operands.push_back(AVROperand::CreateToken(Mnemonic, NameLoc));
+
+  bool first = true;
+  while (getLexer().isNot(AsmToken::EndOfStatement)) {
+    if (!first) eatComma();
+
+    first = false;
+
+    auto MatchResult = MatchOperandParserImpl(Operands, Mnemonic);
+
+    if (MatchResult == MatchOperand_Success) {
+      continue;
+    }
+
+    if (MatchResult == MatchOperand_ParseFail) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.eatToEndOfStatement();
+
+      return Error(Loc, "failed to parse register and immediate pair");
+    }
+
+    if (parseOperand(Operands)) {
+      SMLoc Loc = getLexer().getLoc();
+      Parser.eatToEndOfStatement();
+      return Error(Loc, "unexpected token in argument list");
+    }
+  }
+  Parser.Lex(); // Consume the EndOfStatement
+  return false;
+}
+
+bool AVRAsmParser::ParseDirective(llvm::AsmToken DirectiveID) { return true; }
+
+extern "C" void LLVMInitializeAVRAsmParser() {
+  RegisterMCAsmParser<AVRAsmParser> X(getTheAVRTarget());
+}
+
+#define GET_REGISTER_MATCHER
+#define GET_MATCHER_IMPLEMENTATION
+#include "AVRGenAsmMatcher.inc"
+
+// Uses enums defined in AVRGenAsmMatcher.inc
+unsigned AVRAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
+                                                  unsigned ExpectedKind) {
+  AVROperand &Op = static_cast<AVROperand &>(AsmOp);
+  MatchClassKind Expected = static_cast<MatchClassKind>(ExpectedKind);
+
+  // If need be, GCC converts bare numbers to register names
+  // It's ugly, but GCC supports it.
+  if (Op.isImm()) {
+    if (MCConstantExpr const *Const = dyn_cast<MCConstantExpr>(Op.getImm())) {
+      int64_t RegNum = Const->getValue();
+      std::ostringstream RegName;
+      RegName << "r" << RegNum;
+      RegNum = MatchRegisterName(RegName.str().c_str());
+      if (RegNum != AVR::NoRegister) {
+        Op.makeReg(RegNum);
+        if (validateOperandClass(Op, Expected) == Match_Success) {
+          return Match_Success;
+        }
+      }
+      // Let the other quirks try their magic.
+    }
+  }
+
+  if (Op.isReg()) {
+    // If the instruction uses a register pair but we got a single, lower
+    // register we perform a "class cast".
+    if (isSubclass(Expected, MCK_DREGS)) {
+      unsigned correspondingDREG = toDREG(Op.getReg());
+
+      if (correspondingDREG != AVR::NoRegister) {
+        Op.makeReg(correspondingDREG);
+        return validateOperandClass(Op, Expected);
+      }
+    }
+  }
+  return Match_InvalidOperand;
+}
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp b/contrib/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
new file mode 100644
index 000000000000..d2a21fb64635
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/Disassembler/AVRDisassembler.cpp
@@ -0,0 +1,156 @@
+//===- AVRDisassembler.cpp - Disassembler for AVR ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the AVR Disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVR.h"
+#include "AVRRegisterInfo.h"
+#include "AVRSubtarget.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "avr-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// A disassembler class for AVR.
+class AVRDisassembler : public MCDisassembler {
+public:
+  AVRDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+      : MCDisassembler(STI, Ctx) {}
+  virtual ~AVRDisassembler() {}
+
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
+};
+}
+
+static MCDisassembler *createAVRDisassembler(const Target &T,
+                                             const MCSubtargetInfo &STI,
+                                             MCContext &Ctx) {
+  return new AVRDisassembler(STI, Ctx);
+}
+
+
+extern "C" void LLVMInitializeAVRDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(getTheAVRTarget(),
+                                         createAVRDisassembler);
+}
+
+static DecodeStatus DecodeGPR8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                            uint64_t Address, const void *Decoder) {
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodeLD8RegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address, const void *Decoder) {
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus DecodePTRREGSRegisterClass(MCInst &Inst, unsigned RegNo,
+                                               uint64_t Address, const void *Decoder) {
+  return MCDisassembler::Success;
+}
+
+#include "AVRGenDisassemblerTables.inc"
+
+static DecodeStatus readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                      uint64_t &Size, uint32_t &Insn) {
+  if (Bytes.size() < 2) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  Size = 2;
+  Insn = (Bytes[0] << 0) | (Bytes[1] << 8);
+
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                      uint64_t &Size, uint32_t &Insn) {
+
+  if (Bytes.size() < 4) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  Size = 4;
+  Insn = (Bytes[0] << 0) | (Bytes[1] << 8) | (Bytes[2] << 16) | (Bytes[3] << 24);
+
+  return MCDisassembler::Success;
+}
+
+static const uint8_t *getDecoderTable(uint64_t Size) {
+
+  switch (Size) {
+    case 2: return DecoderTable16;
+    case 4: return DecoderTable32;
+    default: llvm_unreachable("instructions must be 16 or 32-bits");
+  }
+}
+
+DecodeStatus AVRDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+                                             ArrayRef<uint8_t> Bytes,
+                                             uint64_t Address,
+                                             raw_ostream &VStream,
+                                             raw_ostream &CStream) const {
+  uint32_t Insn;
+
+  DecodeStatus Result;
+
+  // Try decode a 16-bit instruction.
+  {
+    Result = readInstruction16(Bytes, Address, Size, Insn);
+
+    if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+
+    // Try to auto-decode a 16-bit instruction.
+    Result = decodeInstruction(getDecoderTable(Size), Instr,
+                               Insn, Address, this, STI);
+
+    if (Result != MCDisassembler::Fail)
+      return Result;
+  }
+
+  // Try decode a 32-bit instruction.
+  {
+    Result = readInstruction32(Bytes, Address, Size, Insn);
+
+    if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+
+    Result = decodeInstruction(getDecoderTable(Size), Instr, Insn,
+                               Address, this, STI);
+
+    if (Result != MCDisassembler::Fail) {
+      return Result;
+    }
+
+    return MCDisassembler::Fail;
+  }
+}
+
+typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
+                                   const void *Decoder);
+
diff --git a/contrib/llvm/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp b/contrib/llvm/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
new file mode 100644
index 000000000000..316b7836df0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/InstPrinter/AVRInstPrinter.cpp
@@ -0,0 +1,171 @@
+//===-- AVRInstPrinter.cpp - Convert AVR MCInst to assembly syntax --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AVR MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRInstPrinter.h"
+
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+
+#include <cstring>
+
+#define DEBUG_TYPE "asm-printer"
+
+namespace llvm {
+
+// Include the auto-generated portion of the assembly writer.
+#define PRINT_ALIAS_INSTR
+#include "AVRGenAsmWriter.inc"
+
+void AVRInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
+                               StringRef Annot, const MCSubtargetInfo &STI) {
+  unsigned Opcode = MI->getOpcode();
+
+  // First handle load and store instructions with postinc or predec
+  // of the form "ld reg, X+".
+  // TODO: We should be able to rewrite this using TableGen data.
+  switch (Opcode) {
+  case AVR::LDRdPtr:
+  case AVR::LDRdPtrPi:
+  case AVR::LDRdPtrPd:
+    O << "\tld\t";
+    printOperand(MI, 0, O);
+    O << ", ";
+
+    if (Opcode == AVR::LDRdPtrPd)
+      O << '-';
+
+    printOperand(MI, 1, O);
+
+    if (Opcode == AVR::LDRdPtrPi)
+      O << '+';
+    break;
+  case AVR::STPtrRr:
+    O << "\tst\t";
+    printOperand(MI, 0, O);
+    O << ", ";
+    printOperand(MI, 1, O);
+    break;
+  case AVR::STPtrPiRr:
+  case AVR::STPtrPdRr:
+    O << "\tst\t";
+
+    if (Opcode == AVR::STPtrPdRr)
+      O << '-';
+
+    printOperand(MI, 1, O);
+
+    if (Opcode == AVR::STPtrPiRr)
+      O << '+';
+
+    O << ", ";
+    printOperand(MI, 2, O);
+    break;
+  default:
+    if (!printAliasInstr(MI, O))
+      printInstruction(MI, O);
+
+    printAnnotation(O, Annot);
+    break;
+  }
+}
+
+const char *AVRInstPrinter::getPrettyRegisterName(unsigned RegNum,
+                                                  MCRegisterInfo const &MRI) {
+  // GCC prints register pairs by just printing the lower register
+  // If the register contains a subregister, print it instead
+  if (MRI.getNumSubRegIndices() > 0) {
+    unsigned RegLoNum = MRI.getSubReg(RegNum, AVR::sub_lo);
+    RegNum = (RegLoNum != AVR::NoRegister) ? RegLoNum : RegNum;
+  }
+
+  return getRegisterName(RegNum);
+}
+
+void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
+                                  raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+  const MCOperandInfo &MOI = this->MII.get(MI->getOpcode()).OpInfo[OpNo];
+
+  if (Op.isReg()) {
+    bool isPtrReg = (MOI.RegClass == AVR::PTRREGSRegClassID) ||
+                    (MOI.RegClass == AVR::PTRDISPREGSRegClassID) ||
+                    (MOI.RegClass == AVR::ZREGSRegClassID);
+
+    if (isPtrReg) {
+      O << getRegisterName(Op.getReg(), AVR::ptr);
+    } else {
+      O << getPrettyRegisterName(Op.getReg(), MRI);
+    }
+  } else if (Op.isImm()) {
+    O << Op.getImm();
+  } else {
+    assert(Op.isExpr() && "Unknown operand kind in printOperand");
+    O << *Op.getExpr();
+  }
+}
+
+/// This is used to print an immediate value that ends up
+/// being encoded as a pc-relative value.
+void AVRInstPrinter::printPCRelImm(const MCInst *MI, unsigned OpNo,
+                                   raw_ostream &O) {
+  const MCOperand &Op = MI->getOperand(OpNo);
+
+  if (Op.isImm()) {
+    int64_t Imm = Op.getImm();
+    O << '.';
+
+    // Print a position sign if needed.
+    // Negative values have their sign printed automatically.
+    if (Imm >= 0)
+      O << '+';
+
+    O << Imm;
+  } else {
+    assert(Op.isExpr() && "Unknown pcrel immediate operand");
+    O << *Op.getExpr();
+  }
+}
+
+void AVRInstPrinter::printMemri(const MCInst *MI, unsigned OpNo,
+                                raw_ostream &O) {
+  assert(MI->getOperand(OpNo).isReg() && "Expected a register for the first operand");
+
+  const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
+
+  // Print the register.
+  printOperand(MI, OpNo, O);
+
+  // Print the {+,-}offset.
+  if (OffsetOp.isImm()) {
+    int64_t Offset = OffsetOp.getImm();
+
+    if (Offset >= 0)
+      O << '+';
+
+    O << Offset;
+  } else if (OffsetOp.isExpr()) {
+    O << *OffsetOp.getExpr();
+  } else {
+    llvm_unreachable("unknown type for offset");
+  }
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/InstPrinter/AVRInstPrinter.h b/contrib/llvm/lib/Target/AVR/InstPrinter/AVRInstPrinter.h
new file mode 100644
index 000000000000..c9f65b922745
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/InstPrinter/AVRInstPrinter.h
@@ -0,0 +1,54 @@
+//===- AVRInstPrinter.h - Convert AVR MCInst to assembly syntax -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class prints an AVR MCInst to a .s file.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_INST_PRINTER_H
+#define LLVM_AVR_INST_PRINTER_H
+
+#include "llvm/MC/MCInstPrinter.h"
+
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+namespace llvm {
+
+/// Prints AVR instructions to a textual stream.
+class AVRInstPrinter : public MCInstPrinter {
+public:
+  AVRInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
+                 const MCRegisterInfo &MRI)
+      : MCInstPrinter(MAI, MII, MRI) {}
+
+  static const char *getPrettyRegisterName(unsigned RegNo,
+                                           MCRegisterInfo const &MRI);
+
+  void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot,
+                 const MCSubtargetInfo &STI) override;
+
+private:
+  static const char *getRegisterName(unsigned RegNo,
+                                     unsigned AltIdx = AVR::NoRegAltName);
+
+  void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printPCRelImm(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+  void printMemri(const MCInst *MI, unsigned OpNo, raw_ostream &O);
+
+  // Autogenerated by TableGen.
+  void printInstruction(const MCInst *MI, raw_ostream &O);
+  bool printAliasInstr(const MCInst *MI, raw_ostream &O);
+  void printCustomAliasOperand(const MCInst *MI, unsigned OpIdx,
+                               unsigned PrintMethodIdx, raw_ostream &O);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_INST_PRINTER_H
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
new file mode 100644
index 000000000000..081d8b5740ef
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.cpp
@@ -0,0 +1,473 @@
+//===-- AVRAsmBackend.cpp - AVR Asm Backend  ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AVRAsmBackend class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AVRAsmBackend.h"
+#include "MCTargetDesc/AVRFixupKinds.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+// FIXME: we should be doing checks to make sure asm operands
+// are not out of bounds.
+
+namespace adjust {
+
+using namespace llvm;
+
+void signed_width(unsigned Width, uint64_t Value, std::string Description,
+                  const MCFixup &Fixup, MCContext *Ctx = nullptr) {
+  if (!isIntN(Width, Value)) {
+    std::string Diagnostic = "out of range " + Description;
+
+    int64_t Min = minIntN(Width);
+    int64_t Max = maxIntN(Width);
+
+    Diagnostic += " (expected an integer in the range " + std::to_string(Min) +
+      " to " + std::to_string(Max) + ")";
+
+    if (Ctx) {
+      Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
+    } else {
+      llvm_unreachable(Diagnostic.c_str());
+    }
+  }
+}
+
+void unsigned_width(unsigned Width, uint64_t Value, std::string Description,
+                    const MCFixup &Fixup, MCContext *Ctx = nullptr) {
+  if (!isUIntN(Width, Value)) {
+    std::string Diagnostic = "out of range " + Description;
+
+    int64_t Max = maxUIntN(Width);
+
+    Diagnostic += " (expected an integer in the range 0 to " +
+      std::to_string(Max) + ")";
+
+    if (Ctx) {
+      Ctx->reportFatalError(Fixup.getLoc(), Diagnostic);
+    } else {
+      llvm_unreachable(Diagnostic.c_str());
+    }
+  }
+}
+
+/// Adjusts the value of a branch target before fixup application.
+void adjustBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+                  MCContext *Ctx = nullptr) {
+  // We have one extra bit of precision because the value is rightshifted by
+  // one.
+  unsigned_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
+
+  // Rightshifts the value by one.
+  AVR::fixups::adjustBranchTarget(Value);
+}
+
+/// Adjusts the value of a relative branch target before fixup application.
+void adjustRelativeBranch(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+                          MCContext *Ctx = nullptr) {
+  // We have one extra bit of precision because the value is rightshifted by
+  // one.
+  signed_width(Size + 1, Value, std::string("branch target"), Fixup, Ctx);
+
+  Value -= 2;
+
+  // Rightshifts the value by one.
+  AVR::fixups::adjustBranchTarget(Value);
+}
+
+/// 22-bit absolute fixup.
+///
+/// Resolves to:
+/// 1001 kkkk 010k kkkk kkkk kkkk 111k kkkk
+///
+/// Offset of 0 (so the result is left shifted by 3 bits before application).
+void fixup_call(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+                MCContext *Ctx = nullptr) {
+  adjustBranch(Size, Fixup, Value, Ctx);
+
+  auto top = Value & (0xf00000 << 6);   // the top four bits
+  auto middle = Value & (0x1ffff << 5); // the middle 13 bits
+  auto bottom = Value & 0x1f;           // end bottom 5 bits
+
+  Value = (top << 6) | (middle << 3) | (bottom << 0);
+}
+
+/// 7-bit PC-relative fixup.
+///
+/// Resolves to:
+/// 0000 00kk kkkk k000
+/// Offset of 0 (so the result is left shifted by 3 bits before application).
+void fixup_7_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+                   MCContext *Ctx = nullptr) {
+  adjustRelativeBranch(Size, Fixup, Value, Ctx);
+
+  // Because the value may be negative, we must mask out the sign bits
+  Value &= 0x7f;
+}
+
+/// 12-bit PC-relative fixup.
+/// Yes, the fixup is 12 bits even though the name says otherwise.
+///
+/// Resolves to:
+/// 0000 kkkk kkkk kkkk
+/// Offset of 0 (so the result isn't left-shifted before application).
+void fixup_13_pcrel(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+                    MCContext *Ctx = nullptr) {
+  adjustRelativeBranch(Size, Fixup, Value, Ctx);
+
+  // Because the value may be negative, we must mask out the sign bits
+  Value &= 0xfff;
+}
+
+/// 6-bit fixup for the immediate operand of the ADIW family of
+/// instructions.
+///
+/// Resolves to:
+/// 0000 0000 kk00 kkkk
+void fixup_6_adiw(const MCFixup &Fixup, uint64_t &Value,
+                  MCContext *Ctx = nullptr) {
+  unsigned_width(6, Value, std::string("immediate"), Fixup, Ctx);
+
+  Value = ((Value & 0x30) << 2) | (Value & 0x0f);
+}
+
+/// 5-bit port number fixup on the SBIC family of instructions.
+///
+/// Resolves to:
+/// 0000 0000 AAAA A000
+void fixup_port5(const MCFixup &Fixup, uint64_t &Value,
+                 MCContext *Ctx = nullptr) {
+  unsigned_width(5, Value, std::string("port number"), Fixup, Ctx);
+
+  Value &= 0x1f;
+
+  Value <<= 3;
+}
+
+/// 6-bit port number fixup on the `IN` family of instructions.
+///
+/// Resolves to:
+/// 1011 0AAd dddd AAAA
+void fixup_port6(const MCFixup &Fixup, uint64_t &Value,
+                 MCContext *Ctx = nullptr) {
+  unsigned_width(6, Value, std::string("port number"), Fixup, Ctx);
+
+  Value = ((Value & 0x30) << 5) | (Value & 0x0f);
+}
+
+/// Adjusts a program memory address.
+/// This is a simple right-shift.
+void pm(uint64_t &Value) {
+  Value >>= 1;
+}
+
+/// Fixups relating to the LDI instruction.
+namespace ldi {
+
+/// Adjusts a value to fix up the immediate of an `LDI Rd, K` instruction.
+///
+/// Resolves to:
+/// 0000 KKKK 0000 KKKK
+/// Offset of 0 (so the result isn't left-shifted before application).
+void fixup(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+           MCContext *Ctx = nullptr) {
+  uint64_t upper = Value & 0xf0;
+  uint64_t lower = Value & 0x0f;
+
+  Value = (upper << 4) | lower;
+}
+
+void neg(uint64_t &Value) { Value *= -1; }
+
+void lo8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+         MCContext *Ctx = nullptr) {
+  Value &= 0xff;
+  ldi::fixup(Size, Fixup, Value, Ctx);
+}
+
+void hi8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+         MCContext *Ctx = nullptr) {
+  Value = (Value & 0xff00) >> 8;
+  ldi::fixup(Size, Fixup, Value, Ctx);
+}
+
+void hh8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+         MCContext *Ctx = nullptr) {
+  Value = (Value & 0xff0000) >> 16;
+  ldi::fixup(Size, Fixup, Value, Ctx);
+}
+
+void ms8(unsigned Size, const MCFixup &Fixup, uint64_t &Value,
+         MCContext *Ctx = nullptr) {
+  Value = (Value & 0xff000000) >> 24;
+  ldi::fixup(Size, Fixup, Value, Ctx);
+}
+
+} // end of ldi namespace
+} // end of adjust namespace
+
+namespace llvm {
+
+// Prepare value for the target space for it
+void AVRAsmBackend::adjustFixupValue(const MCFixup &Fixup, uint64_t &Value,
+                                     MCContext *Ctx) const {
+  // The size of the fixup in bits.
+  uint64_t Size = AVRAsmBackend::getFixupKindInfo(Fixup.getKind()).TargetSize;
+
+  unsigned Kind = Fixup.getKind();
+
+  switch (Kind) {
+  default:
+    llvm_unreachable("unhandled fixup");
+  case AVR::fixup_7_pcrel:
+    adjust::fixup_7_pcrel(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_13_pcrel:
+    adjust::fixup_13_pcrel(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_call:
+    adjust::fixup_call(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_ldi:
+    adjust::ldi::fixup(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_lo8_ldi:
+  case AVR::fixup_lo8_ldi_pm:
+    if (Kind == AVR::fixup_lo8_ldi_pm) adjust::pm(Value);
+
+    adjust::ldi::lo8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_hi8_ldi:
+  case AVR::fixup_hi8_ldi_pm:
+    if (Kind == AVR::fixup_hi8_ldi_pm) adjust::pm(Value);
+
+    adjust::ldi::hi8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_hh8_ldi:
+  case AVR::fixup_hh8_ldi_pm:
+    if (Kind == AVR::fixup_hh8_ldi_pm) adjust::pm(Value);
+
+    adjust::ldi::hh8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_ms8_ldi:
+    adjust::ldi::ms8(Size, Fixup, Value, Ctx);
+    break;
+
+  case AVR::fixup_lo8_ldi_neg:
+  case AVR::fixup_lo8_ldi_pm_neg:
+    if (Kind == AVR::fixup_lo8_ldi_pm_neg) adjust::pm(Value);
+
+    adjust::ldi::neg(Value);
+    adjust::ldi::lo8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_hi8_ldi_neg:
+  case AVR::fixup_hi8_ldi_pm_neg:
+    if (Kind == AVR::fixup_hi8_ldi_pm_neg) adjust::pm(Value);
+
+    adjust::ldi::neg(Value);
+    adjust::ldi::hi8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_hh8_ldi_neg:
+  case AVR::fixup_hh8_ldi_pm_neg:
+    if (Kind == AVR::fixup_hh8_ldi_pm_neg) adjust::pm(Value);
+
+    adjust::ldi::neg(Value);
+    adjust::ldi::hh8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_ms8_ldi_neg:
+    adjust::ldi::neg(Value);
+    adjust::ldi::ms8(Size, Fixup, Value, Ctx);
+    break;
+  case AVR::fixup_16:
+    adjust::unsigned_width(16, Value, std::string("port number"), Fixup, Ctx);
+
+    Value &= 0xffff;
+    break;
+  case AVR::fixup_6_adiw:
+    adjust::fixup_6_adiw(Fixup, Value, Ctx);
+    break;
+
+  case AVR::fixup_port5:
+    adjust::fixup_port5(Fixup, Value, Ctx);
+    break;
+
+  case AVR::fixup_port6:
+    adjust::fixup_port6(Fixup, Value, Ctx);
+    break;
+
+  // Fixups which do not require adjustments.
+  case FK_Data_2:
+  case FK_Data_4:
+  case FK_Data_8:
+    break;
+
+  case FK_GPRel_4:
+    llvm_unreachable("don't know how to adjust this fixup");
+    break;
+  }
+}
+
+MCObjectWriter *AVRAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
+  return createAVRELFObjectWriter(OS,
+                                  MCELFObjectTargetWriter::getOSABI(OSType));
+}
+
+void AVRAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                               unsigned DataSize, uint64_t Value,
+                               bool IsPCRel) const {
+  if (Value == 0)
+    return; // Doesn't change encoding.
+
+  MCFixupKindInfo Info = getFixupKindInfo(Fixup.getKind());
+
+  // The number of bits in the fixup mask
+  auto NumBits = Info.TargetSize + Info.TargetOffset;
+  auto NumBytes = (NumBits / 8) + ((NumBits % 8) == 0 ? 0 : 1);
+
+  // Shift the value into position.
+  Value <<= Info.TargetOffset;
+
+  unsigned Offset = Fixup.getOffset();
+  assert(Offset + NumBytes <= DataSize && "Invalid fixup offset!");
+
+  // For each byte of the fragment that the fixup touches, mask in the
+  // bits from the fixup value.
+  for (unsigned i = 0; i < NumBytes; ++i) {
+    uint8_t mask = (((Value >> (i * 8)) & 0xff));
+    Data[Offset + i] |= mask;
+  }
+}
+
+MCFixupKindInfo const &AVRAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
+  // NOTE: Many AVR fixups work on sets of non-contignous bits. We work around
+  // this by saying that the fixup is the size of the entire instruction.
+  const static MCFixupKindInfo Infos[AVR::NumTargetFixupKinds] = {
+      // This table *must* be in same the order of fixup_* kinds in
+      // AVRFixupKinds.h.
+      //
+      // name                    offset  bits  flags
+      {"fixup_32", 0, 32, 0},
+
+      {"fixup_7_pcrel", 3, 7, MCFixupKindInfo::FKF_IsPCRel},
+      {"fixup_13_pcrel", 0, 12, MCFixupKindInfo::FKF_IsPCRel},
+
+      {"fixup_16", 0, 16, 0},
+      {"fixup_16_pm", 0, 16, 0},
+
+      {"fixup_ldi", 0, 8, 0},
+
+      {"fixup_lo8_ldi", 0, 8, 0},
+      {"fixup_hi8_ldi", 0, 8, 0},
+      {"fixup_hh8_ldi", 0, 8, 0},
+      {"fixup_ms8_ldi", 0, 8, 0},
+
+      {"fixup_lo8_ldi_neg", 0, 8, 0},
+      {"fixup_hi8_ldi_neg", 0, 8, 0},
+      {"fixup_hh8_ldi_neg", 0, 8, 0},
+      {"fixup_ms8_ldi_neg", 0, 8, 0},
+
+      {"fixup_lo8_ldi_pm", 0, 8, 0},
+      {"fixup_hi8_ldi_pm", 0, 8, 0},
+      {"fixup_hh8_ldi_pm", 0, 8, 0},
+
+      {"fixup_lo8_ldi_pm_neg", 0, 8, 0},
+      {"fixup_hi8_ldi_pm_neg", 0, 8, 0},
+      {"fixup_hh8_ldi_pm_neg", 0, 8, 0},
+
+      {"fixup_call", 0, 22, 0},
+
+      {"fixup_6", 0, 16, 0}, // non-contiguous
+      {"fixup_6_adiw", 0, 6, 0},
+
+      {"fixup_lo8_ldi_gs", 0, 8, 0},
+      {"fixup_hi8_ldi_gs", 0, 8, 0},
+
+      {"fixup_8", 0, 8, 0},
+      {"fixup_8_lo8", 0, 8, 0},
+      {"fixup_8_hi8", 0, 8, 0},
+      {"fixup_8_hlo8", 0, 8, 0},
+
+      {"fixup_sym_diff", 0, 32, 0},
+      {"fixup_16_ldst", 0, 16, 0},
+
+      {"fixup_lds_sts_16", 0, 16, 0},
+
+      {"fixup_port6", 0, 16, 0}, // non-contiguous
+      {"fixup_port5", 3, 5, 0},
+  };
+
+  if (Kind < FirstTargetFixupKind)
+    return MCAsmBackend::getFixupKindInfo(Kind);
+
+  assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&
+         "Invalid kind!");
+
+  return Infos[Kind - FirstTargetFixupKind];
+}
+
+bool AVRAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  // If the count is not 2-byte aligned, we must be writing data into the text
+  // section (otherwise we have unaligned instructions, and thus have far
+  // bigger problems), so just write zeros instead.
+  assert((Count % 2) == 0 && "NOP instructions must be 2 bytes");
+
+  OW->WriteZeros(Count);
+  return true;
+}
+
+void AVRAsmBackend::processFixupValue(const MCAssembler &Asm,
+                                      const MCAsmLayout &Layout,
+                                      const MCFixup &Fixup,
+                                      const MCFragment *DF,
+                                      const MCValue &Target, uint64_t &Value,
+                                      bool &IsResolved) {
+  switch ((unsigned) Fixup.getKind()) {
+  // Fixups which should always be recorded as relocations.
+  case AVR::fixup_7_pcrel:
+  case AVR::fixup_13_pcrel:
+  case AVR::fixup_call:
+    IsResolved = false;
+    break;
+  default:
+    // Parsed LLVM-generated temporary labels are already
+    // adjusted for instruction size, but normal labels aren't.
+    //
+    // To handle both cases, we simply un-adjust the temporary label
+    // case so it acts like all other labels.
+    if (Target.getSymA()->getSymbol().isTemporary())
+      Value += 2;
+
+    adjustFixupValue(Fixup, Value, &Asm.getContext());
+    break;
+  }
+}
+
+MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+                                  const Triple &TT, StringRef CPU,
+                                  const llvm::MCTargetOptions &TO) {
+  return new AVRAsmBackend(TT.getOS());
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
new file mode 100644
index 000000000000..7ff4b8f350f6
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRAsmBackend.h
@@ -0,0 +1,78 @@
+//===-- AVRAsmBackend.h - AVR Asm Backend  --------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// \file The AVR assembly backend implementation.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_AVR_ASM_BACKEND_H
+#define LLVM_AVR_ASM_BACKEND_H
+
+#include "MCTargetDesc/AVRFixupKinds.h"
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/MC/MCAsmBackend.h"
+
+namespace llvm {
+
+class MCAssembler;
+class MCObjectWriter;
+class Target;
+
+struct MCFixupKindInfo;
+
+/// Utilities for manipulating generated AVR machine code.
+class AVRAsmBackend : public MCAsmBackend {
+public:
+
+  AVRAsmBackend(Triple::OSType OSType)
+      : MCAsmBackend(), OSType(OSType) {}
+
+  void adjustFixupValue(const MCFixup &Fixup, uint64_t &Value,
+                        MCContext *Ctx = nullptr) const;
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value, bool IsPCRel) const override;
+
+  const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;
+
+  unsigned getNumFixupKinds() const override {
+    return AVR::NumTargetFixupKinds;
+  }
+
+  bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    llvm_unreachable("RelaxInstruction() unimplemented");
+    return false;
+  }
+
+  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+                        MCInst &Res) const override {}
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+
+  void processFixupValue(const MCAssembler &Asm, const MCAsmLayout &Layout,
+                         const MCFixup &Fixup, const MCFragment *DF,
+                         const MCValue &Target, uint64_t &Value,
+                         bool &IsResolved) override;
+
+private:
+  Triple::OSType OSType;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_ASM_BACKEND_H
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
new file mode 100644
index 000000000000..161f305fd014
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRELFObjectWriter.cpp
@@ -0,0 +1,127 @@
+//===-- AVRELFObjectWriter.cpp - AVR ELF Writer ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/AVRFixupKinds.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/Support/ErrorHandling.h"
+
+namespace llvm {
+
+/// Writes AVR machine code into an ELF32 object file.
+class AVRELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  AVRELFObjectWriter(uint8_t OSABI);
+
+  virtual ~AVRELFObjectWriter() {}
+
+  unsigned getRelocType(MCContext &Ctx,
+                        const MCValue &Target,
+                        const MCFixup &Fixup,
+                        bool IsPCRel) const override;
+};
+
+AVRELFObjectWriter::AVRELFObjectWriter(uint8_t OSABI)
+    : MCELFObjectTargetWriter(false, OSABI, ELF::EM_AVR, true, false) {}
+
+unsigned AVRELFObjectWriter::getRelocType(MCContext &Ctx,
+                                          const MCValue &Target,
+                                          const MCFixup &Fixup,
+                                          bool IsPCRel) const {
+  switch ((unsigned) Fixup.getKind()) {
+  case FK_Data_1:
+  case FK_Data_4:
+    llvm_unreachable("unsupported relocation type");
+  case FK_Data_2:
+    return ELF::R_AVR_16_PM;
+  case AVR::fixup_32:
+    return ELF::R_AVR_32;
+  case AVR::fixup_7_pcrel:
+    return ELF::R_AVR_7_PCREL;
+  case AVR::fixup_13_pcrel:
+    return ELF::R_AVR_13_PCREL;
+  case AVR::fixup_16:
+    return ELF::R_AVR_16;
+  case AVR::fixup_16_pm:
+    return ELF::R_AVR_16_PM;
+  case AVR::fixup_lo8_ldi:
+    return ELF::R_AVR_LO8_LDI;
+  case AVR::fixup_hi8_ldi:
+    return ELF::R_AVR_HI8_LDI;
+  case AVR::fixup_hh8_ldi:
+    return ELF::R_AVR_HH8_LDI;
+  case AVR::fixup_lo8_ldi_neg:
+    return ELF::R_AVR_LO8_LDI_NEG;
+  case AVR::fixup_hi8_ldi_neg:
+    return ELF::R_AVR_HI8_LDI_NEG;
+  case AVR::fixup_hh8_ldi_neg:
+    return ELF::R_AVR_HH8_LDI_NEG;
+  case AVR::fixup_lo8_ldi_pm:
+    return ELF::R_AVR_LO8_LDI_PM;
+  case AVR::fixup_hi8_ldi_pm:
+    return ELF::R_AVR_HI8_LDI_PM;
+  case AVR::fixup_hh8_ldi_pm:
+    return ELF::R_AVR_HH8_LDI_PM;
+  case AVR::fixup_lo8_ldi_pm_neg:
+    return ELF::R_AVR_LO8_LDI_PM_NEG;
+  case AVR::fixup_hi8_ldi_pm_neg:
+    return ELF::R_AVR_HI8_LDI_PM_NEG;
+  case AVR::fixup_hh8_ldi_pm_neg:
+    return ELF::R_AVR_HH8_LDI_PM_NEG;
+  case AVR::fixup_call:
+    return ELF::R_AVR_CALL;
+  case AVR::fixup_ldi:
+    return ELF::R_AVR_LDI;
+  case AVR::fixup_6:
+    return ELF::R_AVR_6;
+  case AVR::fixup_6_adiw:
+    return ELF::R_AVR_6_ADIW;
+  case AVR::fixup_ms8_ldi:
+    return ELF::R_AVR_MS8_LDI;
+  case AVR::fixup_ms8_ldi_neg:
+    return ELF::R_AVR_MS8_LDI_NEG;
+  case AVR::fixup_lo8_ldi_gs:
+    return ELF::R_AVR_LO8_LDI_GS;
+  case AVR::fixup_hi8_ldi_gs:
+    return ELF::R_AVR_HI8_LDI_GS;
+  case AVR::fixup_8:
+    return ELF::R_AVR_8;
+  case AVR::fixup_8_lo8:
+    return ELF::R_AVR_8_LO8;
+  case AVR::fixup_8_hi8:
+    return ELF::R_AVR_8_HI8;
+  case AVR::fixup_8_hlo8:
+    return ELF::R_AVR_8_HLO8;
+  case AVR::fixup_sym_diff:
+    return ELF::R_AVR_SYM_DIFF;
+  case AVR::fixup_16_ldst:
+    return ELF::R_AVR_16_LDST;
+  case AVR::fixup_lds_sts_16:
+    return ELF::R_AVR_LDS_STS_16;
+  case AVR::fixup_port6:
+    return ELF::R_AVR_PORT6;
+  case AVR::fixup_port5:
+    return ELF::R_AVR_PORT5;
+  default:
+    llvm_unreachable("invalid fixup kind!");
+  }
+}
+
+MCObjectWriter *createAVRELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI) {
+  MCELFObjectTargetWriter *MOTW = new AVRELFObjectWriter(OSABI);
+  return createELFObjectWriter(MOTW, OS, true);
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
new file mode 100644
index 000000000000..d3bd52d343fc
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRFixupKinds.h
@@ -0,0 +1,149 @@
+//===-- AVRFixupKinds.h - AVR Specific Fixup Entries ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_FIXUP_KINDS_H
+#define LLVM_AVR_FIXUP_KINDS_H
+
+#include "llvm/MC/MCFixup.h"
+
+namespace llvm {
+namespace AVR {
+
+/// The set of supported fixups.
+///
+/// Although most of the current fixup types reflect a unique relocation
+/// one can have multiple fixup types for a given relocation and thus need
+/// to be uniquely named.
+///
+/// \note This table *must* be in the same order of
+///       MCFixupKindInfo Infos[AVR::NumTargetFixupKinds]
+///       in `AVRAsmBackend.cpp`.
+enum Fixups {
+  /// A 32-bit AVR fixup.
+  fixup_32 = FirstTargetFixupKind,
+
+  /// A 7-bit PC-relative fixup for the family of conditional
+  /// branches which take 7-bit targets (BRNE,BRGT,etc).
+  fixup_7_pcrel,
+  /// A 12-bit PC-relative fixup for the family of branches
+  /// which take 12-bit targets (RJMP,RCALL,etc).
+  /// \note Although the fixup is labelled as 13 bits, it
+  ///       is actually only encoded in 12. The reason for
+  ///       The nonmenclature is that AVR branch targets are
+  ///       rightshifted by 1, because instructions are always
+  ///       aligned to 2 bytes, so the 0'th bit is always 0.
+  ///       This way there is 13-bits of precision.
+  fixup_13_pcrel,
+
+  /// A 16-bit address.
+  fixup_16,
+  /// A 16-bit program memory address.
+  fixup_16_pm,
+
+  /// Replaces the 8-bit immediate with another value.
+  fixup_ldi,
+
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the lower 8 bits of a 16-bit value (bits 0-7).
+  fixup_lo8_ldi,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a 16-bit value (bits 8-15).
+  fixup_hi8_ldi,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a 24-bit value (bits 16-23).
+  fixup_hh8_ldi,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a 32-bit value (bits 24-31).
+  fixup_ms8_ldi,
+
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the lower 8 bits of a negated 16-bit value (bits 0-7).
+  fixup_lo8_ldi_neg,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a negated 16-bit value (bits 8-15).
+  fixup_hi8_ldi_neg,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a negated negated 24-bit value (bits 16-23).
+  fixup_hh8_ldi_neg,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a negated negated 32-bit value (bits 24-31).
+  fixup_ms8_ldi_neg,
+
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the lower 8 bits of a 16-bit program memory address value (bits 0-7).
+  fixup_lo8_ldi_pm,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a 16-bit program memory address value (bits
+  /// 8-15).
+  fixup_hi8_ldi_pm,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a 24-bit program memory address value (bits
+  /// 16-23).
+  fixup_hh8_ldi_pm,
+
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the lower 8 bits of a negated 16-bit program memory address value
+  /// (bits 0-7).
+  fixup_lo8_ldi_pm_neg,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a negated 16-bit program memory address value
+  /// (bits 8-15).
+  fixup_hi8_ldi_pm_neg,
+  /// Replaces the immediate operand of a 16-bit `Rd, K` instruction
+  /// with the upper 8 bits of a negated 24-bit program memory address value
+  /// (bits 16-23).
+  fixup_hh8_ldi_pm_neg,
+
+  /// A 22-bit fixup for the target of a `CALL k` or `JMP k` instruction.
+  fixup_call,
+
+  fixup_6,
+  /// A symbol+addr fixup for the `LDD <x>+<n>, <r>" family of instructions.
+  fixup_6_adiw,
+
+  fixup_lo8_ldi_gs,
+  fixup_hi8_ldi_gs,
+
+  fixup_8,
+  fixup_8_lo8,
+  fixup_8_hi8,
+  fixup_8_hlo8,
+
+  /// Fixup to calculate the difference between two symbols.
+  /// Is the only stateful fixup. We do not support it yet.
+  fixup_sym_diff,
+  fixup_16_ldst,
+
+  fixup_lds_sts_16,
+
+  /// A 6-bit port address.
+  fixup_port6,
+  /// A 5-bit port address.
+  fixup_port5,
+
+  // Marker
+  LastTargetFixupKind,
+  NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
+};
+
+namespace fixups {
+
+/// Adjusts the value of a branch target.
+/// All branch targets in AVR are rightshifted by 1 to take advantage
+/// of the fact that all instructions are aligned to addresses of size
+/// 2, so bit 0 of an address is always 0. This gives us another bit
+/// of precision.
+/// \param[in,out] The target to adjust.
+template <typename T> inline void adjustBranchTarget(T &val) { val >>= 1; }
+
+} // end of namespace fixups
+}
+} // end of namespace llvm::AVR
+
+#endif // LLVM_AVR_FIXUP_KINDS_H
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
new file mode 100644
index 000000000000..e6dc8868c705
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp
@@ -0,0 +1,304 @@
+//===-- AVRMCCodeEmitter.cpp - Convert AVR Code to Machine Code -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the AVRMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRMCCodeEmitter.h"
+
+#include "MCTargetDesc/AVRMCExpr.h"
+#include "MCTargetDesc/AVRMCTargetDesc.h"
+
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "mccodeemitter"
+
+#define GET_INSTRMAP_INFO
+#include "AVRGenInstrInfo.inc"
+#undef GET_INSTRMAP_INFO
+
+namespace llvm {
+
+/// Performs a post-encoding step on a `LD` or `ST` instruction.
+///
+/// The encoding of the LD/ST family of instructions is inconsistent w.r.t
+/// the pointer register and the addressing mode.
+///
+/// The permutations of the format are as followed:
+/// ld Rd, X    `1001 000d dddd 1100`
+/// ld Rd, X+   `1001 000d dddd 1101`
+/// ld Rd, -X   `1001 000d dddd 1110`
+///
+/// ld Rd, Y    `1000 000d dddd 1000`
+/// ld Rd, Y+   `1001 000d dddd 1001`
+/// ld Rd, -Y   `1001 000d dddd 1010`
+///
+/// ld Rd, Z    `1000 000d dddd 0000`
+/// ld Rd, Z+   `1001 000d dddd 0001`
+/// ld Rd, -Z   `1001 000d dddd 0010`
+///                 ^
+///                 |
+/// Note this one inconsistent bit - it is 1 sometimes and 0 at other times.
+/// There is no logical pattern. Looking at a truth table, the following
+/// formula can be derived to fit the pattern:
+//
+/// ```
+/// inconsistent_bit = is_predec OR is_postinc OR is_reg_x
+/// ```
+//
+/// We manually set this bit in this post encoder method.
+unsigned
+AVRMCCodeEmitter::loadStorePostEncoder(const MCInst &MI, unsigned EncodedValue,
+                                       const MCSubtargetInfo &STI) const {
+
+  assert(MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
+         "the load/store operands must be registers");
+
+  unsigned Opcode = MI.getOpcode();
+
+  // check whether either of the registers are the X pointer register.
+  bool IsRegX = MI.getOperand(0).getReg() == AVR::R27R26 ||
+                  MI.getOperand(1).getReg() == AVR::R27R26;
+
+  bool IsPredec = Opcode == AVR::LDRdPtrPd || Opcode == AVR::STPtrPdRr;
+  bool IsPostinc = Opcode == AVR::LDRdPtrPi || Opcode == AVR::STPtrPiRr;
+
+  // Check if we need to set the inconsistent bit
+  if (IsRegX || IsPredec || IsPostinc) {
+    EncodedValue |= (1 << 12);
+  }
+
+  return EncodedValue;
+}
+
+template <AVR::Fixups Fixup>
+unsigned
+AVRMCCodeEmitter::encodeRelCondBrTarget(const MCInst &MI, unsigned OpNo,
+                                        SmallVectorImpl<MCFixup> &Fixups,
+                                        const MCSubtargetInfo &STI) const {
+  const MCOperand &MO = MI.getOperand(OpNo);
+
+  if (MO.isExpr()) {
+    Fixups.push_back(MCFixup::create(0, MO.getExpr(),
+                     MCFixupKind(Fixup), MI.getLoc()));
+    return 0;
+  }
+
+  assert(MO.isImm());
+
+  // Take the size of the current instruction away.
+  // With labels, this is implicitly done.
+  auto target = MO.getImm();
+  AVR::fixups::adjustBranchTarget(target);
+  return target;
+}
+
+unsigned AVRMCCodeEmitter::encodeLDSTPtrReg(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  auto MO = MI.getOperand(OpNo);
+
+  // The operand should be a pointer register.
+  assert(MO.isReg());
+
+  switch (MO.getReg()) {
+  case AVR::R27R26: return 0x03; // X: 0b11
+  case AVR::R29R28: return 0x02; // Y: 0b10
+  case AVR::R31R30: return 0x00; // Z: 0b00
+  default:
+    llvm_unreachable("invalid pointer register");
+  }
+}
+
+/// Encodes a `memri` operand.
+/// The operand is 7-bits.
+/// * The lower 6 bits is the immediate
+/// * The upper bit is the pointer register bit (Z=0,Y=1)
+unsigned AVRMCCodeEmitter::encodeMemri(const MCInst &MI, unsigned OpNo,
+                                       SmallVectorImpl<MCFixup> &Fixups,
+                                       const MCSubtargetInfo &STI) const {
+  auto RegOp = MI.getOperand(OpNo);
+  auto OffsetOp = MI.getOperand(OpNo + 1);
+
+  assert(RegOp.isReg() && "Expected register operand");
+
+  uint8_t RegBit = 0;
+
+  switch (RegOp.getReg()) {
+  default:
+    llvm_unreachable("Expected either Y or Z register");
+  case AVR::R31R30:
+    RegBit = 0;
+    break; // Z register
+  case AVR::R29R28:
+    RegBit = 1;
+    break; // Y register
+  }
+
+  int8_t OffsetBits;
+
+  if (OffsetOp.isImm()) {
+    OffsetBits = OffsetOp.getImm();
+  } else if (OffsetOp.isExpr()) {
+    OffsetBits = 0;
+    Fixups.push_back(MCFixup::create(0, OffsetOp.getExpr(),
+                     MCFixupKind(AVR::fixup_6), MI.getLoc()));
+  } else {
+    llvm_unreachable("invalid value for offset");
+  }
+
+  return (RegBit << 6) | OffsetBits;
+}
+
+unsigned AVRMCCodeEmitter::encodeComplement(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  // The operand should be an immediate.
+  assert(MI.getOperand(OpNo).isImm());
+
+  auto Imm = MI.getOperand(OpNo).getImm();
+  return (~0) - Imm;
+}
+
+template <AVR::Fixups Fixup>
+unsigned AVRMCCodeEmitter::encodeImm(const MCInst &MI, unsigned OpNo,
+                                     SmallVectorImpl<MCFixup> &Fixups,
+                                     const MCSubtargetInfo &STI) const {
+  auto MO = MI.getOperand(OpNo);
+
+  if (MO.isExpr()) {
+    if (isa<AVRMCExpr>(MO.getExpr())) {
+      // If the expression is already an AVRMCExpr (i.e. a lo8(symbol),
+      // we shouldn't perform any more fixups. Without this check, we would
+      // instead create a fixup to the symbol named 'lo8(symbol)' which
+      // is not correct.
+      return getExprOpValue(MO.getExpr(), Fixups, STI);
+    }
+
+    MCFixupKind FixupKind = static_cast<MCFixupKind>(Fixup);
+    Fixups.push_back(MCFixup::create(0, MO.getExpr(), FixupKind, MI.getLoc()));
+
+    return 0;
+  }
+
+  assert(MO.isImm());
+  return MO.getImm();
+}
+
+unsigned AVRMCCodeEmitter::encodeCallTarget(const MCInst &MI, unsigned OpNo,
+                                            SmallVectorImpl<MCFixup> &Fixups,
+                                            const MCSubtargetInfo &STI) const {
+  auto MO = MI.getOperand(OpNo);
+
+  if (MO.isExpr()) {
+    MCFixupKind FixupKind = static_cast<MCFixupKind>(AVR::fixup_call);
+    Fixups.push_back(MCFixup::create(0, MO.getExpr(), FixupKind, MI.getLoc()));
+    return 0;
+  }
+
+  assert(MO.isImm());
+
+  auto Target = MO.getImm();
+  AVR::fixups::adjustBranchTarget(Target);
+  return Target;
+}
+
+unsigned AVRMCCodeEmitter::getExprOpValue(const MCExpr *Expr,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const {
+
+  MCExpr::ExprKind Kind = Expr->getKind();
+
+  if (Kind == MCExpr::Binary) {
+    Expr = static_cast<const MCBinaryExpr *>(Expr)->getLHS();
+    Kind = Expr->getKind();
+  }
+
+  if (Kind == MCExpr::Target) {
+    AVRMCExpr const *AVRExpr = cast<AVRMCExpr>(Expr);
+    int64_t Result;
+    if (AVRExpr->evaluateAsConstant(Result)) {
+      return Result;
+    }
+
+    MCFixupKind FixupKind = static_cast<MCFixupKind>(AVRExpr->getFixupKind());
+    Fixups.push_back(MCFixup::create(0, AVRExpr, FixupKind));
+    return 0;
+  }
+
+  assert(Kind == MCExpr::SymbolRef);
+  return 0;
+}
+
+unsigned AVRMCCodeEmitter::getMachineOpValue(const MCInst &MI,
+                                             const MCOperand &MO,
+                                             SmallVectorImpl<MCFixup> &Fixups,
+                                             const MCSubtargetInfo &STI) const {
+  if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+  if (MO.isImm()) return static_cast<unsigned>(MO.getImm());
+
+  if (MO.isFPImm())
+    return static_cast<unsigned>(APFloat(MO.getFPImm())
+                                     .bitcastToAPInt()
+                                     .getHiBits(32)
+                                     .getLimitedValue());
+
+  // MO must be an Expr.
+  assert(MO.isExpr());
+
+  return getExprOpValue(MO.getExpr(), Fixups, STI);
+}
+
+void AVRMCCodeEmitter::emitInstruction(uint64_t Val, unsigned Size,
+                                       const MCSubtargetInfo &STI,
+                                       raw_ostream &OS) const {
+  const uint16_t *Words = reinterpret_cast<uint16_t const *>(&Val);
+  size_t WordCount = Size / 2;
+
+  for (int64_t i = WordCount - 1; i >= 0; --i) {
+    uint16_t Word = Words[i];
+
+    OS << (uint8_t) ((Word & 0x00ff) >> 0);
+    OS << (uint8_t) ((Word & 0xff00) >> 8);
+  }
+}
+
+void AVRMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                         SmallVectorImpl<MCFixup> &Fixups,
+                                         const MCSubtargetInfo &STI) const {
+  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+
+  // Get byte count of instruction
+  unsigned Size = Desc.getSize();
+
+  assert(Size > 0 && "Instruction size cannot be zero");
+
+  uint64_t BinaryOpCode = getBinaryCodeForInstr(MI, Fixups, STI);
+  emitInstruction(BinaryOpCode, Size, STI, OS);
+}
+
+MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
+                                      const MCRegisterInfo &MRI,
+                                      MCContext &Ctx) {
+  return new AVRMCCodeEmitter(MCII, Ctx);
+}
+
+#include "AVRGenMCCodeEmitter.inc"
+
+} // end of namespace llvm
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
new file mode 100644
index 000000000000..5fa425c296a5
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h
@@ -0,0 +1,115 @@
+//===-- AVRMCCodeEmitter.h - Convert AVR Code to Machine Code -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AVRMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_AVR_CODE_EMITTER_H
+#define LLVM_AVR_CODE_EMITTER_H
+
+#include "AVRFixupKinds.h"
+
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/Support/DataTypes.h"
+
+#define GET_INSTRINFO_OPERAND_TYPES_ENUM
+#include "AVRGenInstrInfo.inc"
+
+namespace llvm {
+
+class MCContext;
+class MCExpr;
+class MCFixup;
+class MCInst;
+class MCInstrInfo;
+class MCOperand;
+class MCSubtargetInfo;
+class raw_ostream;
+
+/// Writes AVR machine code to a stream.
+class AVRMCCodeEmitter : public MCCodeEmitter {
+public:
+  AVRMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx)
+      : MCII(MCII), Ctx(Ctx) {}
+
+private:
+  /// Finishes up encoding an LD/ST instruction.
+  /// The purpose of this function is to set an bit in the instruction
+  /// which follows no logical pattern. See the implementation for details.
+  unsigned loadStorePostEncoder(const MCInst &MI, unsigned EncodedValue,
+                                const MCSubtargetInfo &STI) const;
+
+  /// Gets the encoding for a conditional branch target.
+  template <AVR::Fixups Fixup>
+  unsigned encodeRelCondBrTarget(const MCInst &MI, unsigned OpNo,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  /// Encodes the `PTRREGS` operand to a load or store instruction.
+  unsigned encodeLDSTPtrReg(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  /// Encodes a `register+immediate` operand for `LDD`/`STD`.
+  unsigned encodeMemri(const MCInst &MI, unsigned OpNo,
+                       SmallVectorImpl<MCFixup> &Fixups,
+                       const MCSubtargetInfo &STI) const;
+
+  /// Takes the compliment of a number (~0 - val).
+  unsigned encodeComplement(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  /// Encodes an immediate value with a given fixup.
+  template <AVR::Fixups Fixup>
+  unsigned encodeImm(const MCInst &MI, unsigned OpNo,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     const MCSubtargetInfo &STI) const;
+
+  /// Gets the encoding of the target for the `CALL k` instruction.
+  unsigned encodeCallTarget(const MCInst &MI, unsigned OpNo,
+                            SmallVectorImpl<MCFixup> &Fixups,
+                            const MCSubtargetInfo &STI) const;
+
+  /// TableGen'ed function to get the binary encoding for an instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
+                          const MCSubtargetInfo &STI) const;
+
+  /// Returns the binary encoding of operand.
+  ///
+  /// If the machine operand requires relocation, the relocation is recorded
+  /// and zero is returned.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+
+  void emitInstruction(uint64_t Val, unsigned Size, const MCSubtargetInfo &STI,
+                       raw_ostream &OS) const;
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  AVRMCCodeEmitter(const AVRMCCodeEmitter &) = delete;
+  void operator=(const AVRMCCodeEmitter &) = delete;
+
+  const MCInstrInfo &MCII;
+  MCContext &Ctx;
+};
+
+} // end namespace of llvm.
+
+#endif // LLVM_AVR_CODE_EMITTER_H
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
new file mode 100644
index 000000000000..400296b8409b
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp
@@ -0,0 +1,189 @@
+//===-- AVRMCExpr.cpp - AVR specific MC expression classes ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRMCExpr.h"
+
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCValue.h"
+#include "llvm/MC/MCAsmLayout.h"
+
+namespace llvm {
+
+namespace {
+
+const struct ModifierEntry {
+  const char * const Spelling;
+  AVRMCExpr::VariantKind VariantKind;
+} ModifierNames[] = {
+    {"lo8", AVRMCExpr::VK_AVR_LO8},       {"hi8", AVRMCExpr::VK_AVR_HI8},
+    {"hh8", AVRMCExpr::VK_AVR_HH8}, // synonym with hlo8
+    {"hlo8", AVRMCExpr::VK_AVR_HH8},      {"hhi8", AVRMCExpr::VK_AVR_HHI8},
+
+    {"pm_lo8", AVRMCExpr::VK_AVR_PM_LO8}, {"pm_hi8", AVRMCExpr::VK_AVR_PM_HI8},
+    {"pm_hh8", AVRMCExpr::VK_AVR_PM_HH8},
+};
+
+} // end of anonymous namespace
+
+const AVRMCExpr *AVRMCExpr::create(VariantKind Kind, const MCExpr *Expr,
+                                   bool Negated, MCContext &Ctx) {
+  return new (Ctx) AVRMCExpr(Kind, Expr, Negated);
+}
+
+void AVRMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
+  assert(Kind != VK_AVR_None);
+
+  if (isNegated())
+    OS << '-';
+
+  OS << getName() << '(';
+  getSubExpr()->print(OS, MAI);
+  OS << ')';
+}
+
+bool AVRMCExpr::evaluateAsConstant(int64_t &Result) const {
+  MCValue Value;
+
+  bool isRelocatable =
+      getSubExpr()->evaluateAsRelocatable(Value, nullptr, nullptr);
+
+  if (!isRelocatable)
+    return false;
+
+  if (Value.isAbsolute()) {
+    Result = evaluateAsInt64(Value.getConstant());
+    return true;
+  }
+
+  return false;
+}
+
+bool AVRMCExpr::evaluateAsRelocatableImpl(MCValue &Result,
+                                          const MCAsmLayout *Layout,
+                                          const MCFixup *Fixup) const {
+  MCValue Value;
+  bool isRelocatable = SubExpr->evaluateAsRelocatable(Value, Layout, Fixup);
+
+  if (!isRelocatable)
+    return false;
+
+  if (Value.isAbsolute()) {
+    Result = MCValue::get(evaluateAsInt64(Value.getConstant()));
+  } else {
+    if (!Layout) return false;
+
+    MCContext &Context = Layout->getAssembler().getContext();
+    const MCSymbolRefExpr *Sym = Value.getSymA();
+    MCSymbolRefExpr::VariantKind Modifier = Sym->getKind();
+    if (Modifier != MCSymbolRefExpr::VK_None)
+      return false;
+
+    Sym = MCSymbolRefExpr::create(&Sym->getSymbol(), Modifier, Context);
+    Result = MCValue::get(Sym, Value.getSymB(), Value.getConstant());
+  }
+
+  return true;
+}
+
+int64_t AVRMCExpr::evaluateAsInt64(int64_t Value) const {
+  if (Negated)
+    Value *= -1;
+
+  switch (Kind) {
+  case AVRMCExpr::VK_AVR_LO8:
+    break;
+  case AVRMCExpr::VK_AVR_HI8:
+    Value >>= 8;
+    break;
+  case AVRMCExpr::VK_AVR_HH8:
+    Value >>= 16;
+    break;
+  case AVRMCExpr::VK_AVR_HHI8:
+    Value >>= 24;
+    break;
+  case AVRMCExpr::VK_AVR_PM_LO8:
+    Value >>= 1;
+    break;
+  case AVRMCExpr::VK_AVR_PM_HI8:
+    Value >>= 9;
+    break;
+  case AVRMCExpr::VK_AVR_PM_HH8:
+    Value >>= 17;
+    break;
+
+  case AVRMCExpr::VK_AVR_None:
+    llvm_unreachable("Uninitialized expression.");
+  }
+  return static_cast<uint64_t>(Value) & 0xff;
+}
+
+AVR::Fixups AVRMCExpr::getFixupKind() const {
+  AVR::Fixups Kind = AVR::Fixups::LastTargetFixupKind;
+
+  switch (getKind()) {
+  case VK_AVR_LO8:
+    Kind = isNegated() ? AVR::fixup_lo8_ldi_neg : AVR::fixup_lo8_ldi;
+    break;
+  case VK_AVR_HI8:
+    Kind = isNegated() ? AVR::fixup_hi8_ldi_neg : AVR::fixup_hi8_ldi;
+    break;
+  case VK_AVR_HH8:
+    Kind = isNegated() ? AVR::fixup_hh8_ldi_neg : AVR::fixup_hh8_ldi;
+    break;
+  case VK_AVR_HHI8:
+    Kind = isNegated() ? AVR::fixup_ms8_ldi_neg : AVR::fixup_ms8_ldi;
+    break;
+
+  case VK_AVR_PM_LO8:
+    Kind = isNegated() ? AVR::fixup_lo8_ldi_pm_neg : AVR::fixup_lo8_ldi_pm;
+    break;
+  case VK_AVR_PM_HI8:
+    Kind = isNegated() ? AVR::fixup_hi8_ldi_pm_neg : AVR::fixup_hi8_ldi_pm;
+    break;
+  case VK_AVR_PM_HH8:
+    Kind = isNegated() ? AVR::fixup_hh8_ldi_pm_neg : AVR::fixup_hh8_ldi_pm;
+    break;
+
+  case VK_AVR_None:
+    llvm_unreachable("Uninitialized expression");
+  }
+
+  return Kind;
+}
+
+void AVRMCExpr::visitUsedExpr(MCStreamer &Streamer) const {
+  Streamer.visitUsedExpr(*getSubExpr());
+}
+
+const char *AVRMCExpr::getName() const {
+  const auto &Modifier = std::find_if(
+      std::begin(ModifierNames), std::end(ModifierNames),
+      [this](ModifierEntry const &Mod) { return Mod.VariantKind == Kind; });
+
+  if (Modifier != std::end(ModifierNames)) {
+    return Modifier->Spelling;
+  }
+  return nullptr;
+}
+
+AVRMCExpr::VariantKind AVRMCExpr::getKindByName(StringRef Name) {
+  const auto &Modifier = std::find_if(
+      std::begin(ModifierNames), std::end(ModifierNames),
+      [&Name](ModifierEntry const &Mod) { return Mod.Spelling == Name; });
+
+  if (Modifier != std::end(ModifierNames)) {
+    return Modifier->VariantKind;
+  }
+  return VK_AVR_None;
+}
+
+} // end of namespace llvm
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
new file mode 100644
index 000000000000..be565a8be340
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h
@@ -0,0 +1,88 @@
+//===-- AVRMCExpr.h - AVR specific MC expression classes --------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_MCEXPR_H
+#define LLVM_AVR_MCEXPR_H
+
+#include "llvm/MC/MCExpr.h"
+
+#include "MCTargetDesc/AVRFixupKinds.h"
+
+namespace llvm {
+
+/// A expression in AVR machine code.
+class AVRMCExpr : public MCTargetExpr {
+public:
+  /// Specifies the type of an expression.
+  enum VariantKind {
+    VK_AVR_None,
+
+    VK_AVR_HI8,  ///< Corresponds to `hi8()`.
+    VK_AVR_LO8,  ///< Corresponds to `lo8()`.
+    VK_AVR_HH8,  ///< Corresponds to `hlo8() and hh8()`.
+    VK_AVR_HHI8, ///< Corresponds to `hhi8()`.
+
+    VK_AVR_PM_LO8, ///< Corresponds to `pm_lo8()`.
+    VK_AVR_PM_HI8, ///< Corresponds to `pm_hi8()`.
+    VK_AVR_PM_HH8  ///< Corresponds to `pm_hh8()`.
+  };
+
+public:
+  /// Creates an AVR machine code expression.
+  static const AVRMCExpr *create(VariantKind Kind, const MCExpr *Expr,
+                                 bool isNegated, MCContext &Ctx);
+
+  /// Gets the type of the expression.
+  VariantKind getKind() const { return Kind; }
+  /// Gets the name of the expression.
+  const char *getName() const;
+  const MCExpr *getSubExpr() const { return SubExpr; }
+  /// Gets the fixup which corresponds to the expression.
+  AVR::Fixups getFixupKind() const;
+  /// Evaluates the fixup as a constant value.
+  bool evaluateAsConstant(int64_t &Result) const;
+
+  bool isNegated() const { return Negated; }
+  void setNegated(bool negated = true) { Negated = negated; }
+
+  void printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const override;
+  bool evaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout,
+                                 const MCFixup *Fixup) const override;
+
+  void visitUsedExpr(MCStreamer &streamer) const override;
+
+  MCFragment *findAssociatedFragment() const override {
+    return getSubExpr()->findAssociatedFragment();
+  }
+
+  void fixELFSymbolsInTLSFixups(MCAssembler &Asm) const override {}
+
+  static bool classof(const MCExpr *E) {
+    return E->getKind() == MCExpr::Target;
+  }
+
+public:
+  static VariantKind getKindByName(StringRef Name);
+
+private:
+  int64_t evaluateAsInt64(int64_t Value) const;
+
+  const VariantKind Kind;
+  const MCExpr *SubExpr;
+  bool Negated;
+
+private:
+  explicit AVRMCExpr(VariantKind Kind, const MCExpr *Expr, bool Negated)
+      : Kind(Kind), SubExpr(Expr), Negated(Negated) {}
+  ~AVRMCExpr() {}
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_MCEXPR_H
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
new file mode 100644
index 000000000000..a4fa5c0a9310
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp
@@ -0,0 +1,121 @@
+//===-- AVRMCTargetDesc.cpp - AVR Target Descriptions ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides AVR specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRELFStreamer.h"
+#include "AVRMCAsmInfo.h"
+#include "AVRMCTargetDesc.h"
+#include "AVRTargetStreamer.h"
+#include "InstPrinter/AVRInstPrinter.h"
+
+#include "llvm/MC/MCELFStreamer.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "AVRGenInstrInfo.inc"
+
+#define GET_SUBTARGETINFO_MC_DESC
+#include "AVRGenSubtargetInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "AVRGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createAVRMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitAVRMCInstrInfo(X);
+
+  return X;
+}
+
+static MCRegisterInfo *createAVRMCRegisterInfo(const Triple &TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitAVRMCRegisterInfo(X, 0);
+
+  return X;
+}
+
+static MCSubtargetInfo *createAVRMCSubtargetInfo(const Triple &TT,
+                                                 StringRef CPU, StringRef FS) {
+  return createAVRMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static MCInstPrinter *createAVRMCInstPrinter(const Triple &T,
+                                             unsigned SyntaxVariant,
+                                             const MCAsmInfo &MAI,
+                                             const MCInstrInfo &MII,
+                                             const MCRegisterInfo &MRI) {
+  if (SyntaxVariant == 0) {
+    return new AVRInstPrinter(MAI, MII, MRI);
+  }
+
+  return nullptr;
+}
+
+static MCStreamer *createMCStreamer(const Triple &T, MCContext &Context,
+                                    MCAsmBackend &MAB, raw_pwrite_stream &OS,
+                                    MCCodeEmitter *Emitter, bool RelaxAll) {
+  return createELFStreamer(Context, MAB, OS, Emitter, RelaxAll);
+}
+
+static MCTargetStreamer *
+createAVRObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo &STI) {
+  return new AVRELFStreamer(S, STI);
+}
+
+static MCTargetStreamer *createMCAsmTargetStreamer(MCStreamer &S,
+                                                   formatted_raw_ostream &OS,
+                                                   MCInstPrinter *InstPrint,
+                                                   bool isVerboseAsm) {
+  return new AVRTargetAsmStreamer(S);
+}
+
+extern "C" void LLVMInitializeAVRTargetMC() {
+  // Register the MC asm info.
+  RegisterMCAsmInfo<AVRMCAsmInfo> X(getTheAVRTarget());
+
+  // Register the MC instruction info.
+  TargetRegistry::RegisterMCInstrInfo(getTheAVRTarget(), createAVRMCInstrInfo);
+
+  // Register the MC register info.
+  TargetRegistry::RegisterMCRegInfo(getTheAVRTarget(), createAVRMCRegisterInfo);
+
+  // Register the MC subtarget info.
+  TargetRegistry::RegisterMCSubtargetInfo(getTheAVRTarget(),
+                                          createAVRMCSubtargetInfo);
+
+  // Register the MCInstPrinter.
+  TargetRegistry::RegisterMCInstPrinter(getTheAVRTarget(),
+                                        createAVRMCInstPrinter);
+
+  // Register the MC Code Emitter
+  TargetRegistry::RegisterMCCodeEmitter(getTheAVRTarget(), createAVRMCCodeEmitter);
+
+  // Register the ELF streamer
+  TargetRegistry::RegisterELFStreamer(getTheAVRTarget(), createMCStreamer);
+
+  // Register the obj target streamer.
+  TargetRegistry::RegisterObjectTargetStreamer(getTheAVRTarget(),
+                                               createAVRObjectTargetStreamer);
+
+  // Register the asm target streamer.
+  TargetRegistry::RegisterAsmTargetStreamer(getTheAVRTarget(),
+                                            createMCAsmTargetStreamer);
+
+  // Register the asm backend (as little endian).
+  TargetRegistry::RegisterMCAsmBackend(getTheAVRTarget(), createAVRAsmBackend);
+}
+
diff --git a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
index b72793d0fab4..41a574767910 100644
--- a/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h
@@ -24,12 +24,13 @@ class MCContext;
 class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
+class MCTargetOptions;
 class StringRef;
 class Target;
 class Triple;
 class raw_pwrite_stream;
 
-extern Target TheAVRTarget;
+Target &getTheAVRTarget();
 
 /// Creates a machine code emitter for AVR.
 MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
@@ -38,7 +39,8 @@ MCCodeEmitter *createAVRMCCodeEmitter(const MCInstrInfo &MCII,
 
 /// Creates an assembly backend for AVR.
 MCAsmBackend *createAVRAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU);
+                                  const Triple &TT, StringRef CPU,
+                                  const llvm::MCTargetOptions &TO);
 
 /// Creates an ELF object writer for AVR.
 MCObjectWriter *createAVRELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
diff --git a/contrib/llvm/lib/Target/AVR/README.md b/contrib/llvm/lib/Target/AVR/README.md
new file mode 100644
index 000000000000..bd8b453aa81e
--- /dev/null
+++ b/contrib/llvm/lib/Target/AVR/README.md
@@ -0,0 +1,8 @@
+# AVR backend
+
+This experimental backend is for the 8-bit Atmel [AVR](https://en.wikipedia.org/wiki/Atmel_AVR) microcontroller.
+
+## Useful links
+
+* [Unresolved bugs](https://llvm.org/bugs/buglist.cgi?product=libraries&component=Backend%3A%20AVR&resolution=---&list_id=109466)
+* [Architecture notes](https://github.com/avr-llvm/architecture)
diff --git a/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp b/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
index c0e0d20029c2..36cecaa7ac7a 100644
--- a/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp
@@ -9,17 +9,15 @@
 
 #include "llvm/IR/Module.h"
 #include "llvm/Support/TargetRegistry.h"
-
 namespace llvm {
-Target TheAVRTarget;
+Target &getTheAVRTarget() {
+  static Target TheAVRTarget;
+  return TheAVRTarget;
+}
 }
 
 extern "C" void LLVMInitializeAVRTargetInfo() {
-  llvm::RegisterTarget<llvm::Triple::avr> X(
-      llvm::TheAVRTarget, "avr", "Atmel AVR Microcontroller");
+  llvm::RegisterTarget<llvm::Triple::avr> X(llvm::getTheAVRTarget(), "avr",
+                                            "Atmel AVR Microcontroller");
 }
 
-// FIXME: Temporary stub - this function must be defined for linking
-// to succeed. Remove once this function is properly implemented.
-extern "C" void LLVMInitializeAVRTargetMC() {
-}
diff --git a/contrib/llvm/lib/Target/BPF/BPF.td b/contrib/llvm/lib/Target/BPF/BPF.td
index 8493b0fd1e4a..11abe520c506 100644
--- a/contrib/llvm/lib/Target/BPF/BPF.td
+++ b/contrib/llvm/lib/Target/BPF/BPF.td
@@ -25,14 +25,20 @@ def BPFInstPrinter : AsmWriter {
   bit isMCAsmWriter = 1;
 }
 
+def BPFAsmParser : AsmParser {
+  bit HasMnemonicFirst = 0;
+}
+
 def BPFAsmParserVariant : AsmParserVariant {
   int Variant = 0;
   string Name = "BPF";
   string BreakCharacters = ".";
+  string TokenizingCharacters = "#()[]=:.<>!+*";
 }
 
 def BPF : Target {
   let InstructionSet = BPFInstrInfo;
   let AssemblyWriters = [BPFInstPrinter];
+  let AssemblyParsers = [BPFAsmParser];
   let AssemblyParserVariants = [BPFAsmParserVariant];
 }
diff --git a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
index 1078b0652189..c5201465e074 100644
--- a/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFAsmPrinter.cpp
@@ -38,7 +38,7 @@ public:
   explicit BPFAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)) {}
 
-  const char *getPassName() const override { return "BPF Assembly Printer"; }
+  StringRef getPassName() const override { return "BPF Assembly Printer"; }
 
   void EmitInstruction(const MachineInstr *MI) override;
 };
@@ -55,7 +55,7 @@ void BPFAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeBPFAsmPrinter() {
-  RegisterAsmPrinter<BPFAsmPrinter> X(TheBPFleTarget);
-  RegisterAsmPrinter<BPFAsmPrinter> Y(TheBPFbeTarget);
-  RegisterAsmPrinter<BPFAsmPrinter> Z(TheBPFTarget);
+  RegisterAsmPrinter<BPFAsmPrinter> X(getTheBPFleTarget());
+  RegisterAsmPrinter<BPFAsmPrinter> Y(getTheBPFbeTarget());
+  RegisterAsmPrinter<BPFAsmPrinter> Z(getTheBPFTarget());
 }
diff --git a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
index ac2af036b6f8..12091449cc11 100644
--- a/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFISelDAGToDAG.cpp
@@ -38,7 +38,7 @@ class BPFDAGToDAGISel : public SelectionDAGISel {
 public:
   explicit BPFDAGToDAGISel(BPFTargetMachine &TM) : SelectionDAGISel(TM) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "BPF DAG->DAG Pattern Instruction Selection";
   }
 
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
index 7aea0512ac78..cbe4466164f9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.cpp
@@ -130,13 +130,16 @@ bool BPFInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned BPFInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned BPFInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL) const {
+                                    const DebugLoc &DL,
+                                    int *BytesAdded) const {
+  assert(!BytesAdded && "code size not handled");
+
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
 
   if (Cond.empty()) {
     // Unconditional branch
@@ -148,7 +151,10 @@ unsigned BPFInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   llvm_unreachable("Unexpected conditional branch");
 }
 
-unsigned BPFInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned BPFInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                    int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
 
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
index cc2e41e4c603..c7048ab979b7 100644
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.h
@@ -49,10 +49,12 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 };
 }
 
diff --git a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
index 6b73db87fa26..a7910dea98de 100644
--- a/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
+++ b/contrib/llvm/lib/Target/BPF/BPFInstrInfo.td
@@ -61,6 +61,7 @@ def FIri : ComplexPattern<i64, 2, "SelectFIAddr", [add, or], []>;
 def MEMri : Operand<i64> {
   let PrintMethod = "printMemOperand";
   let EncoderMethod = "getMemoryOpValue";
+  let DecoderMethod = "decodeMemoryOpValue";
   let MIOperandInfo = (ops GPR, i16imm);
 }
 
@@ -81,7 +82,7 @@ def BPF_CC_GEU : PatLeaf<(imm),
 // jump instructions
 class JMP_RR<bits<4> Opc, string OpcodeStr, PatLeaf Cond>
     : InstBPF<(outs), (ins GPR:$dst, GPR:$src, brtarget:$BrDst),
-              !strconcat(OpcodeStr, "\t$dst, $src goto $BrDst"),
+              "if $dst "#OpcodeStr#" $src goto $BrDst",
               [(BPFbrcc i64:$dst, i64:$src, Cond, bb:$BrDst)]> {
   bits<4> op;
   bits<1> BPFSrc;
@@ -102,7 +103,7 @@ class JMP_RR<bits<4> Opc, string OpcodeStr, PatLeaf Cond>
 
 class JMP_RI<bits<4> Opc, string OpcodeStr, PatLeaf Cond>
     : InstBPF<(outs), (ins GPR:$dst, i64imm:$imm, brtarget:$BrDst),
-              !strconcat(OpcodeStr, "i\t$dst, $imm goto $BrDst"),
+              "if $dst "#OpcodeStr#" $imm goto $BrDst",
               [(BPFbrcc i64:$dst, i64immSExt32:$imm, Cond, bb:$BrDst)]> {
   bits<4> op;
   bits<1> BPFSrc;
@@ -128,18 +129,18 @@ multiclass J<bits<4> Opc, string OpcodeStr, PatLeaf Cond> {
 
 let isBranch = 1, isTerminator = 1, hasDelaySlot=0 in {
 // cmp+goto instructions
-defm JEQ  : J<0x1, "jeq",  BPF_CC_EQ>;
-defm JUGT : J<0x2, "jgt", BPF_CC_GTU>;
-defm JUGE : J<0x3, "jge", BPF_CC_GEU>;
-defm JNE  : J<0x5, "jne",  BPF_CC_NE>;
-defm JSGT : J<0x6, "jsgt", BPF_CC_GT>;
-defm JSGE : J<0x7, "jsge", BPF_CC_GE>;
+defm JEQ  : J<0x1, "==",  BPF_CC_EQ>;
+defm JUGT : J<0x2, ">", BPF_CC_GTU>;
+defm JUGE : J<0x3, ">=", BPF_CC_GEU>;
+defm JNE  : J<0x5, "!=",  BPF_CC_NE>;
+defm JSGT : J<0x6, "s>", BPF_CC_GT>;
+defm JSGE : J<0x7, "s>=", BPF_CC_GE>;
 }
 
 // ALU instructions
 class ALU_RI<bits<4> Opc, string OpcodeStr, SDNode OpNode>
     : InstBPF<(outs GPR:$dst), (ins GPR:$src2, i64imm:$imm),
-              !strconcat(OpcodeStr, "i\t$dst, $imm"),
+              "$dst "#OpcodeStr#" $imm",
               [(set GPR:$dst, (OpNode GPR:$src2, i64immSExt32:$imm))]> {
   bits<4> op;
   bits<1> BPFSrc;
@@ -158,7 +159,7 @@ class ALU_RI<bits<4> Opc, string OpcodeStr, SDNode OpNode>
 
 class ALU_RR<bits<4> Opc, string OpcodeStr, SDNode OpNode>
     : InstBPF<(outs GPR:$dst), (ins GPR:$src2, GPR:$src),
-              !strconcat(OpcodeStr, "\t$dst, $src"),
+              "$dst "#OpcodeStr#" $src",
               [(set GPR:$dst, (OpNode i64:$src2, i64:$src))]> {
   bits<4> op;
   bits<1> BPFSrc;
@@ -182,22 +183,22 @@ multiclass ALU<bits<4> Opc, string OpcodeStr, SDNode OpNode> {
 
 let Constraints = "$dst = $src2" in {
 let isAsCheapAsAMove = 1 in {
-  defm ADD : ALU<0x0, "add", add>;
-  defm SUB : ALU<0x1, "sub", sub>;
-  defm OR  : ALU<0x4, "or", or>;
-  defm AND : ALU<0x5, "and", and>;
-  defm SLL : ALU<0x6, "sll", shl>;
-  defm SRL : ALU<0x7, "srl", srl>;
-  defm XOR : ALU<0xa, "xor", xor>;
-  defm SRA : ALU<0xc, "sra", sra>;
+  defm ADD : ALU<0x0, "+=", add>;
+  defm SUB : ALU<0x1, "-=", sub>;
+  defm OR  : ALU<0x4, "|=", or>;
+  defm AND : ALU<0x5, "&=", and>;
+  defm SLL : ALU<0x6, "<<=", shl>;
+  defm SRL : ALU<0x7, ">>=", srl>;
+  defm XOR : ALU<0xa, "^=", xor>;
+  defm SRA : ALU<0xc, "s>>=", sra>;
 }
-  defm MUL : ALU<0x2, "mul", mul>;
-  defm DIV : ALU<0x3, "div", udiv>;
+  defm MUL : ALU<0x2, "*=", mul>;
+  defm DIV : ALU<0x3, "/=", udiv>;
 }
 
 class MOV_RR<string OpcodeStr>
     : InstBPF<(outs GPR:$dst), (ins GPR:$src),
-              !strconcat(OpcodeStr, "\t$dst, $src"),
+              "$dst "#OpcodeStr#" $src",
               []> {
   bits<4> op;
   bits<1> BPFSrc;
@@ -216,7 +217,7 @@ class MOV_RR<string OpcodeStr>
 
 class MOV_RI<string OpcodeStr>
     : InstBPF<(outs GPR:$dst), (ins i64imm:$imm),
-              !strconcat(OpcodeStr, "\t$dst, $imm"),
+              "$dst "#OpcodeStr#" $imm",
               [(set GPR:$dst, (i64 i64immSExt32:$imm))]> {
   bits<4> op;
   bits<1> BPFSrc;
@@ -235,7 +236,7 @@ class MOV_RI<string OpcodeStr>
 
 class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
     : InstBPF<(outs GPR:$dst), (ins u64imm:$imm),
-              !strconcat(OpcodeStr, "\t$dst, $imm"),
+              "$dst "#OpcodeStr#" ${imm}ll",
               [(set GPR:$dst, (i64 imm:$imm))]> {
 
   bits<3> mode;
@@ -256,9 +257,9 @@ class LD_IMM64<bits<4> Pseudo, string OpcodeStr>
 }
 
 let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
-def LD_imm64 : LD_IMM64<0, "ld_64">;
-def MOV_rr : MOV_RR<"mov">;
-def MOV_ri : MOV_RI<"mov">;
+def LD_imm64 : LD_IMM64<0, "=">;
+def MOV_rr : MOV_RR<"=">;
+def MOV_ri : MOV_RI<"=">;
 }
 
 def FI_ri
@@ -267,6 +268,13 @@ def FI_ri
                [(set i64:$dst, FIri:$addr)]> {
   // This is a tentative instruction, and will be replaced
   // with MOV_rr and ADD_ri in PEI phase
+  let Inst{63-61} = 0;
+  let Inst{60-59} = 3;
+  let Inst{51-48} = 0;
+  let Inst{55-52} = 2;
+  let Inst{47-32} = 0;
+  let Inst{31-0} = 0;
+  let BPFClass = 0;
 }
 
 
@@ -296,7 +304,7 @@ def LD_pseudo
 // STORE instructions
 class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
     : InstBPF<(outs), (ins GPR:$src, MEMri:$addr),
-              !strconcat(OpcodeStr, "\t$addr, $src"), Pattern> {
+              "*("#OpcodeStr#" *)($addr) = $src", Pattern> {
   bits<3> mode;
   bits<2> size;
   bits<4> src;
@@ -316,15 +324,15 @@ class STORE<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
 class STOREi64<bits<2> Opc, string OpcodeStr, PatFrag OpNode>
     : STORE<Opc, OpcodeStr, [(OpNode i64:$src, ADDRri:$addr)]>;
 
-def STW : STOREi64<0x0, "stw", truncstorei32>;
-def STH : STOREi64<0x1, "sth", truncstorei16>;
-def STB : STOREi64<0x2, "stb", truncstorei8>;
-def STD : STOREi64<0x3, "std", store>;
+def STW : STOREi64<0x0, "u32", truncstorei32>;
+def STH : STOREi64<0x1, "u16", truncstorei16>;
+def STB : STOREi64<0x2, "u8", truncstorei8>;
+def STD : STOREi64<0x3, "u64", store>;
 
 // LOAD instructions
 class LOAD<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
     : InstBPF<(outs GPR:$dst), (ins MEMri:$addr),
-              !strconcat(OpcodeStr, "\t$dst, $addr"), Pattern> {
+              "$dst = *("#OpcodeStr#" *)($addr)", Pattern> {
   bits<3> mode;
   bits<2> size;
   bits<4> dst;
@@ -344,14 +352,14 @@ class LOAD<bits<2> SizeOp, string OpcodeStr, list<dag> Pattern>
 class LOADi64<bits<2> SizeOp, string OpcodeStr, PatFrag OpNode>
     : LOAD<SizeOp, OpcodeStr, [(set i64:$dst, (OpNode ADDRri:$addr))]>;
 
-def LDW : LOADi64<0x0, "ldw", zextloadi32>;
-def LDH : LOADi64<0x1, "ldh", zextloadi16>;
-def LDB : LOADi64<0x2, "ldb", zextloadi8>;
-def LDD : LOADi64<0x3, "ldd", load>;
+def LDW : LOADi64<0x0, "u32", zextloadi32>;
+def LDH : LOADi64<0x1, "u16", zextloadi16>;
+def LDB : LOADi64<0x2, "u8", zextloadi8>;
+def LDD : LOADi64<0x3, "u64", load>;
 
 class BRANCH<bits<4> Opc, string OpcodeStr, list<dag> Pattern>
     : InstBPF<(outs), (ins brtarget:$BrDst),
-              !strconcat(OpcodeStr, "\t$BrDst"), Pattern> {
+              !strconcat(OpcodeStr, " $BrDst"), Pattern> {
   bits<4> op;
   bits<16> BrDst;
   bits<1> BPFSrc;
@@ -367,7 +375,7 @@ class BRANCH<bits<4> Opc, string OpcodeStr, list<dag> Pattern>
 
 class CALL<string OpcodeStr>
     : InstBPF<(outs), (ins calltarget:$BrDst),
-              !strconcat(OpcodeStr, "\t$BrDst"), []> {
+              !strconcat(OpcodeStr, " $BrDst"), []> {
   bits<4> op;
   bits<32> BrDst;
   bits<1> BPFSrc;
@@ -383,7 +391,7 @@ class CALL<string OpcodeStr>
 
 // Jump always
 let isBranch = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1 in {
-  def JMP : BRANCH<0x0, "jmp", [(br bb:$BrDst)]>;
+  def JMP : BRANCH<0x0, "goto", [(br bb:$BrDst)]>;
 }
 
 // Jump and link
@@ -432,7 +440,7 @@ class RET<string OpcodeStr>
 
 let isReturn = 1, isTerminator = 1, hasDelaySlot=0, isBarrier = 1,
     isNotDuplicable = 1 in {
-  def RET : RET<"ret">;
+  def RET : RET<"exit">;
 }
 
 // ADJCALLSTACKDOWN/UP pseudo insns
@@ -472,17 +480,17 @@ def : Pat<(extloadi32 ADDRri:$src), (i64 (LDW ADDRri:$src))>;
 // Atomics
 class XADD<bits<2> SizeOp, string OpcodeStr, PatFrag OpNode>
     : InstBPF<(outs GPR:$dst), (ins MEMri:$addr, GPR:$val),
-              !strconcat(OpcodeStr, "\t$dst, $addr, $val"),
+              "lock *("#OpcodeStr#" *)($addr) += $val",
               [(set GPR:$dst, (OpNode ADDRri:$addr, GPR:$val))]> {
   bits<3> mode;
   bits<2> size;
-  bits<4> src;
+  bits<4> dst;
   bits<20> addr;
 
   let Inst{63-61} = mode;
   let Inst{60-59} = size;
   let Inst{51-48} = addr{19-16}; // base reg
-  let Inst{55-52} = src;
+  let Inst{55-52} = dst;
   let Inst{47-32} = addr{15-0}; // offset
 
   let mode = 6;     // BPF_XADD
@@ -491,8 +499,8 @@ class XADD<bits<2> SizeOp, string OpcodeStr, PatFrag OpNode>
 }
 
 let Constraints = "$dst = $val" in {
-def XADD32 : XADD<0, "xadd32", atomic_load_add_32>;
-def XADD64 : XADD<3, "xadd64", atomic_load_add_64>;
+def XADD32 : XADD<0, "u32", atomic_load_add_32>;
+def XADD64 : XADD<3, "u64", atomic_load_add_64>;
 // undefined def XADD16 : XADD<1, "xadd16", atomic_load_add_16>;
 // undefined def XADD8  : XADD<2, "xadd8", atomic_load_add_8>;
 }
@@ -528,7 +536,7 @@ let Defs = [R0, R1, R2, R3, R4, R5], Uses = [R6], hasSideEffects = 1,
     hasExtraDefRegAllocReq = 1, hasExtraSrcRegAllocReq = 1, mayLoad = 1 in {
 class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
     : InstBPF<(outs), (ins GPR:$skb, i64imm:$imm),
-              !strconcat(OpcodeStr, "\tr0, $skb.data + $imm"),
+              "r0 = *("#OpcodeStr#" *)skb[$imm]",
               [(set R0, (OpNode GPR:$skb, i64immSExt32:$imm))]> {
   bits<3> mode;
   bits<2> size;
@@ -545,7 +553,7 @@ class LOAD_ABS<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
 
 class LOAD_IND<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
     : InstBPF<(outs), (ins GPR:$skb, GPR:$val),
-              !strconcat(OpcodeStr, "\tr0, $skb.data + $val"),
+              "r0 = *("#OpcodeStr#" *)skb[$val]",
               [(set R0, (OpNode GPR:$skb, GPR:$val))]> {
   bits<3> mode;
   bits<2> size;
@@ -561,10 +569,10 @@ class LOAD_IND<bits<2> SizeOp, string OpcodeStr, Intrinsic OpNode>
 }
 }
 
-def LD_ABS_B : LOAD_ABS<2, "ldabs_b", int_bpf_load_byte>;
-def LD_ABS_H : LOAD_ABS<1, "ldabs_h", int_bpf_load_half>;
-def LD_ABS_W : LOAD_ABS<0, "ldabs_w", int_bpf_load_word>;
+def LD_ABS_B : LOAD_ABS<2, "u8", int_bpf_load_byte>;
+def LD_ABS_H : LOAD_ABS<1, "u16", int_bpf_load_half>;
+def LD_ABS_W : LOAD_ABS<0, "u32", int_bpf_load_word>;
 
-def LD_IND_B : LOAD_IND<2, "ldind_b", int_bpf_load_byte>;
-def LD_IND_H : LOAD_IND<1, "ldind_h", int_bpf_load_half>;
-def LD_IND_W : LOAD_IND<0, "ldind_w", int_bpf_load_word>;
+def LD_IND_B : LOAD_IND<2, "u8", int_bpf_load_byte>;
+def LD_IND_H : LOAD_IND<1, "u16", int_bpf_load_half>;
+def LD_IND_W : LOAD_IND<0, "u32", int_bpf_load_word>;
diff --git a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
index 952615bd1c2b..71846e3e92c9 100644
--- a/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -62,7 +62,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineBasicBlock &MBB = *MI.getParent();
 
   if (MI.getOpcode() == BPF::MOV_rr) {
-    int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+    int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
 
     MI.getOperand(i).ChangeToRegister(FrameReg, false);
     unsigned reg = MI.getOperand(i - 1).getReg();
@@ -72,7 +72,7 @@ void BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     return;
   }
 
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
                MI.getOperand(i + 1).getImm();
 
   if (!isInt<32>(Offset))
diff --git a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
index 5fc6f2f0ce55..897695633e46 100644
--- a/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/BPF/BPFTargetMachine.cpp
@@ -24,9 +24,9 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeBPFTarget() {
   // Register the target.
-  RegisterTargetMachine<BPFTargetMachine> X(TheBPFleTarget);
-  RegisterTargetMachine<BPFTargetMachine> Y(TheBPFbeTarget);
-  RegisterTargetMachine<BPFTargetMachine> Z(TheBPFTarget);
+  RegisterTargetMachine<BPFTargetMachine> X(getTheBPFleTarget());
+  RegisterTargetMachine<BPFTargetMachine> Y(getTheBPFbeTarget());
+  RegisterTargetMachine<BPFTargetMachine> Z(getTheBPFTarget());
 }
 
 // DataLayout: little or big endian
diff --git a/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
new file mode 100644
index 000000000000..b0037fbc16ac
--- /dev/null
+++ b/contrib/llvm/lib/Target/BPF/Disassembler/BPFDisassembler.cpp
@@ -0,0 +1,154 @@
+//===- BPFDisassembler.cpp - Disassembler for BPF ---------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is part of the BPF Disassembler.
+//
+//===----------------------------------------------------------------------===//
+
+#include "BPF.h"
+#include "BPFRegisterInfo.h"
+#include "BPFSubtarget.h"
+#include "MCTargetDesc/BPFMCTargetDesc.h"
+
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCFixedLenDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "bpf-disassembler"
+
+typedef MCDisassembler::DecodeStatus DecodeStatus;
+
+namespace {
+
+/// A disassembler class for BPF.
+class BPFDisassembler : public MCDisassembler {
+public:
+  BPFDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
+      : MCDisassembler(STI, Ctx) {}
+  virtual ~BPFDisassembler() {}
+
+  DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+                              ArrayRef<uint8_t> Bytes, uint64_t Address,
+                              raw_ostream &VStream,
+                              raw_ostream &CStream) const override;
+};
+}
+
+static MCDisassembler *createBPFDisassembler(const Target &T,
+                                             const MCSubtargetInfo &STI,
+                                             MCContext &Ctx) {
+  return new BPFDisassembler(STI, Ctx);
+}
+
+
+extern "C" void LLVMInitializeBPFDisassembler() {
+  // Register the disassembler.
+  TargetRegistry::RegisterMCDisassembler(getTheBPFTarget(),
+                                         createBPFDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheBPFleTarget(),
+                                         createBPFDisassembler);
+  TargetRegistry::RegisterMCDisassembler(getTheBPFbeTarget(),
+                                         createBPFDisassembler);
+}
+
+static const unsigned GPRDecoderTable[] = {
+    BPF::R0,  BPF::R1,  BPF::R2,  BPF::R3,  BPF::R4,  BPF::R5,
+    BPF::R6,  BPF::R7,  BPF::R8,  BPF::R9,  BPF::R10, BPF::R11};
+
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t /*Address*/,
+                                           const void * /*Decoder*/) {
+  if (RegNo > 11)
+    return MCDisassembler::Fail;
+
+  unsigned Reg = GPRDecoderTable[RegNo];
+  Inst.addOperand(MCOperand::createReg(Reg));
+  return MCDisassembler::Success;
+}
+
+static DecodeStatus decodeMemoryOpValue(MCInst &Inst, unsigned Insn,
+                                        uint64_t Address, const void *Decoder) {
+  unsigned Register = (Insn >> 16) & 0xf;
+  Inst.addOperand(MCOperand::createReg(GPRDecoderTable[Register]));
+  unsigned Offset = (Insn & 0xffff);
+  Inst.addOperand(MCOperand::createImm(SignExtend32<16>(Offset)));
+
+  return MCDisassembler::Success;
+}
+
+#include "BPFGenDisassemblerTables.inc"
+
+static DecodeStatus readInstruction64(ArrayRef<uint8_t> Bytes, uint64_t Address,
+                                      uint64_t &Size, uint64_t &Insn) {
+  uint64_t Lo, Hi;
+
+  if (Bytes.size() < 8) {
+    Size = 0;
+    return MCDisassembler::Fail;
+  }
+
+  Size = 8;
+  Hi = (Bytes[0] << 24) | (Bytes[1] << 16) | (Bytes[2] << 0) | (Bytes[3] << 8);
+  Lo = (Bytes[4] << 0) | (Bytes[5] << 8) | (Bytes[6] << 16) | (Bytes[7] << 24);
+  Insn = Make_64(Hi, Lo);
+
+  return MCDisassembler::Success;
+}
+
+DecodeStatus BPFDisassembler::getInstruction(MCInst &Instr, uint64_t &Size,
+                                             ArrayRef<uint8_t> Bytes,
+                                             uint64_t Address,
+                                             raw_ostream &VStream,
+                                             raw_ostream &CStream) const {
+  uint64_t Insn;
+  DecodeStatus Result;
+
+  Result = readInstruction64(Bytes, Address, Size, Insn);
+  if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+
+  Result = decodeInstruction(DecoderTableBPF64, Instr, Insn,
+                             Address, this, STI);
+  if (Result == MCDisassembler::Fail) return MCDisassembler::Fail;
+
+  switch (Instr.getOpcode()) {
+  case BPF::LD_imm64: {
+    if (Bytes.size() < 16) {
+      Size = 0;
+      return MCDisassembler::Fail;
+    }
+    Size = 16;
+    uint64_t Hi = (Bytes[12] << 0) | (Bytes[13] << 8) | (Bytes[14] << 16) | (Bytes[15] << 24);
+    auto& Op = Instr.getOperand(1);
+    Op.setImm(Make_64(Hi, Op.getImm()));
+    break;
+  }
+  case BPF::LD_ABS_B:
+  case BPF::LD_ABS_H:
+  case BPF::LD_ABS_W:
+  case BPF::LD_IND_B:
+  case BPF::LD_IND_H:
+  case BPF::LD_IND_W: {
+    auto Op = Instr.getOperand(0);
+    Instr.clear();
+    Instr.addOperand(MCOperand::createReg(BPF::R6));
+    Instr.addOperand(Op);
+    break;
+  }
+  }
+
+  return Result;
+}
+
+typedef DecodeStatus (*DecodeFunc)(MCInst &MI, unsigned insn, uint64_t Address,
+                                   const void *Decoder);
diff --git a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
index 552288b819d1..ffd29f3ea991 100644
--- a/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/BPF/InstPrinter/BPFInstPrinter.cpp
@@ -67,15 +67,21 @@ void BPFInstPrinter::printMemOperand(const MCInst *MI, int OpNo, raw_ostream &O,
                                      const char *Modifier) {
   const MCOperand &RegOp = MI->getOperand(OpNo);
   const MCOperand &OffsetOp = MI->getOperand(OpNo + 1);
-  // offset
-  if (OffsetOp.isImm())
-    O << formatDec(OffsetOp.getImm());
-  else
-    assert(0 && "Expected an immediate");
 
   // register
   assert(RegOp.isReg() && "Register operand not a register");
-  O << '(' << getRegisterName(RegOp.getReg()) << ')';
+  O << getRegisterName(RegOp.getReg());
+
+  // offset
+  if (OffsetOp.isImm()) {
+    auto Imm = OffsetOp.getImm();
+    if (Imm >= 0)
+      O << " + " << formatDec(Imm);
+    else
+      O << " - " << formatDec(-Imm);
+  } else {
+    assert(0 && "Expected an immediate");
+  }
 }
 
 void BPFInstPrinter::printImm64Operand(const MCInst *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
index 2de40aab3a74..a6cd2002c12c 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFAsmBackend.cpp
@@ -96,12 +96,14 @@ MCObjectWriter *BPFAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
 
 MCAsmBackend *llvm::createBPFAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU) {
+                                        const Triple &TT, StringRef CPU,
+                                        const MCTargetOptions&) {
   return new BPFAsmBackend(/*IsLittleEndian=*/true);
 }
 
 MCAsmBackend *llvm::createBPFbeAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU) {
+                                          const Triple &TT, StringRef CPU,
+                                          const MCTargetOptions&) {
   return new BPFAsmBackend(/*IsLittleEndian=*/false);
 }
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
index 4b92e3eb019b..3d1c0eb55afa 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFELFObjectWriter.cpp
@@ -41,13 +41,13 @@ unsigned BPFELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
   default:
     llvm_unreachable("invalid fixup kind!");
   case FK_SecRel_8:
-    return ELF::R_X86_64_64;
+    return ELF::R_BPF_64_64;
   case FK_SecRel_4:
-    return ELF::R_X86_64_PC32;
+    return ELF::R_BPF_64_32;
   case FK_Data_8:
-    return IsPCRel ? ELF::R_X86_64_PC64 : ELF::R_X86_64_64;
+    return ELF::R_BPF_64_64;
   case FK_Data_4:
-    return IsPCRel ? ELF::R_X86_64_PC32 : ELF::R_X86_64_32;
+    return ELF::R_BPF_64_32;
   }
 }
 
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
index 9a2e223bcbd6..559ac291a79e 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCAsmInfo.h
@@ -34,6 +34,15 @@ public:
     HasDotTypeDotSizeDirective = false;
 
     SupportsDebugInformation = true;
+    ExceptionsType = ExceptionHandling::DwarfCFI;
+    MinInstAlignment = 8;
+
+    // the default is 4 and it only affects dwarf elf output
+    // so if not set correctly, the dwarf data will be
+    // messed up in random places by 4 bytes. .debug_line
+    // section will be parsable, but with odd offsets and
+    // line numbers, etc.
+    PointerSize = 8;
   }
 };
 }
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
index c6561ddda26e..47f16512a397 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCCodeEmitter.cpp
@@ -30,12 +30,14 @@ namespace {
 class BPFMCCodeEmitter : public MCCodeEmitter {
   BPFMCCodeEmitter(const BPFMCCodeEmitter &) = delete;
   void operator=(const BPFMCCodeEmitter &) = delete;
+  const MCInstrInfo &MCII;
   const MCRegisterInfo &MRI;
   bool IsLittleEndian;
 
 public:
-  BPFMCCodeEmitter(const MCRegisterInfo &mri, bool IsLittleEndian)
-    : MRI(mri), IsLittleEndian(IsLittleEndian) {}
+  BPFMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri,
+                   bool IsLittleEndian)
+      : MCII(mcii), MRI(mri), IsLittleEndian(IsLittleEndian) {}
 
   ~BPFMCCodeEmitter() {}
 
@@ -58,19 +60,24 @@ public:
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override;
+
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 };
 }
 
 MCCodeEmitter *llvm::createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                             const MCRegisterInfo &MRI,
                                             MCContext &Ctx) {
-  return new BPFMCCodeEmitter(MRI, true);
+  return new BPFMCCodeEmitter(MCII, MRI, true);
 }
 
 MCCodeEmitter *llvm::createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
                                               const MCRegisterInfo &MRI,
                                               MCContext &Ctx) {
-  return new BPFMCCodeEmitter(MRI, false);
+  return new BPFMCCodeEmitter(MCII, MRI, false);
 }
 
 unsigned BPFMCCodeEmitter::getMachineOpValue(const MCInst &MI,
@@ -108,6 +115,9 @@ static uint8_t SwapBits(uint8_t Val)
 void BPFMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                          SmallVectorImpl<MCFixup> &Fixups,
                                          const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
   unsigned Opcode = MI.getOpcode();
   support::endian::Writer<support::little> LE(OS);
   support::endian::Writer<support::big> BE(OS);
@@ -165,4 +175,5 @@ uint64_t BPFMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
   return Encoding;
 }
 
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "BPFGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
index 03d6b193fe27..55415f97396b 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.cpp
@@ -68,7 +68,8 @@ static MCInstPrinter *createBPFMCInstPrinter(const Triple &T,
 }
 
 extern "C" void LLVMInitializeBPFTargetMC() {
-  for (Target *T : {&TheBPFleTarget, &TheBPFbeTarget, &TheBPFTarget}) {
+  for (Target *T :
+       {&getTheBPFleTarget(), &getTheBPFbeTarget(), &getTheBPFTarget()}) {
     // Register the MC asm info.
     RegisterMCAsmInfo<BPFMCAsmInfo> X(*T);
 
@@ -90,18 +91,26 @@ extern "C" void LLVMInitializeBPFTargetMC() {
   }
 
   // Register the MC code emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheBPFleTarget, createBPFMCCodeEmitter);
-  TargetRegistry::RegisterMCCodeEmitter(TheBPFbeTarget, createBPFbeMCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(getTheBPFleTarget(),
+                                        createBPFMCCodeEmitter);
+  TargetRegistry::RegisterMCCodeEmitter(getTheBPFbeTarget(),
+                                        createBPFbeMCCodeEmitter);
 
   // Register the ASM Backend
-  TargetRegistry::RegisterMCAsmBackend(TheBPFleTarget, createBPFAsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheBPFbeTarget, createBPFbeAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(getTheBPFleTarget(),
+                                       createBPFAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(getTheBPFbeTarget(),
+                                       createBPFbeAsmBackend);
 
   if (sys::IsLittleEndianHost) {
-    TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFMCCodeEmitter);
-    TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFAsmBackend);
+    TargetRegistry::RegisterMCCodeEmitter(getTheBPFTarget(),
+                                          createBPFMCCodeEmitter);
+    TargetRegistry::RegisterMCAsmBackend(getTheBPFTarget(),
+                                         createBPFAsmBackend);
   } else {
-    TargetRegistry::RegisterMCCodeEmitter(TheBPFTarget, createBPFbeMCCodeEmitter);
-    TargetRegistry::RegisterMCAsmBackend(TheBPFTarget, createBPFbeAsmBackend);
+    TargetRegistry::RegisterMCCodeEmitter(getTheBPFTarget(),
+                                          createBPFbeMCCodeEmitter);
+    TargetRegistry::RegisterMCAsmBackend(getTheBPFTarget(),
+                                         createBPFbeAsmBackend);
   }
 }
diff --git a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
index e2ae6526edc6..3df673eaeb4b 100644
--- a/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/BPF/MCTargetDesc/BPFMCTargetDesc.h
@@ -25,15 +25,16 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-extern Target TheBPFleTarget;
-extern Target TheBPFbeTarget;
-extern Target TheBPFTarget;
+Target &getTheBPFleTarget();
+Target &getTheBPFbeTarget();
+Target &getTheBPFTarget();
 
 MCCodeEmitter *createBPFMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
@@ -43,9 +44,11 @@ MCCodeEmitter *createBPFbeMCCodeEmitter(const MCInstrInfo &MCII,
                                         MCContext &Ctx);
 
 MCAsmBackend *createBPFAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU);
+                                  const Triple &TT, StringRef CPU,
+                                  const MCTargetOptions &Options);
 MCAsmBackend *createBPFbeAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU,
+                                    const MCTargetOptions &Options);
 
 MCObjectWriter *createBPFELFObjectWriter(raw_pwrite_stream &OS,
                                          uint8_t OSABI, bool IsLittleEndian);
diff --git a/contrib/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp b/contrib/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
index a16dbae867b2..265180b99876 100644
--- a/contrib/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/BPF/TargetInfo/BPFTargetInfo.cpp
@@ -12,17 +12,25 @@
 using namespace llvm;
 
 namespace llvm {
-Target TheBPFleTarget;
-Target TheBPFbeTarget;
-Target TheBPFTarget;
+Target &getTheBPFleTarget() {
+  static Target TheBPFleTarget;
+  return TheBPFleTarget;
 }
+Target &getTheBPFbeTarget() {
+  static Target TheBPFbeTarget;
+  return TheBPFbeTarget;
+}
+Target &getTheBPFTarget() {
+  static Target TheBPFTarget;
+  return TheBPFTarget;
+}
+} // namespace llvm
 
 extern "C" void LLVMInitializeBPFTargetInfo() {
-  TargetRegistry::RegisterTarget(TheBPFTarget, "bpf",
-                                 "BPF (host endian)",
+  TargetRegistry::RegisterTarget(getTheBPFTarget(), "bpf", "BPF (host endian)",
                                  [](Triple::ArchType) { return false; }, true);
-  RegisterTarget<Triple::bpfel, /*HasJIT=*/true> X(
-      TheBPFleTarget, "bpfel", "BPF (little endian)");
-  RegisterTarget<Triple::bpfeb, /*HasJIT=*/true> Y(
-      TheBPFbeTarget, "bpfeb", "BPF (big endian)");
+  RegisterTarget<Triple::bpfel, /*HasJIT=*/true> X(getTheBPFleTarget(), "bpfel",
+                                                   "BPF (little endian)");
+  RegisterTarget<Triple::bpfeb, /*HasJIT=*/true> Y(getTheBPFbeTarget(), "bpfeb",
+                                                   "BPF (big endian)");
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
index 496efbf7374b..becc086c81b0 100644
--- a/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp
@@ -10,39 +10,53 @@
 #define DEBUG_TYPE "mcasmparser"
 
 #include "Hexagon.h"
-#include "HexagonRegisterInfo.h"
 #include "HexagonTargetStreamer.h"
-#include "MCTargetDesc/HexagonBaseInfo.h"
-#include "MCTargetDesc/HexagonMCAsmInfo.h"
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCELFStreamer.h"
 #include "MCTargetDesc/HexagonMCExpr.h"
-#include "MCTargetDesc/HexagonMCShuffler.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonShuffler.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCParser/MCAsmLexer.h"
 #include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCAsmParserExtension.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
-#include "llvm/Support/MemoryBuffer.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/SMLoc.h"
+#include "llvm/Support/TargetRegistry.h"
+#include <algorithm>
+#include <cassert>
+#include <cctype>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
+#include <string>
+#include <utility>
 
 using namespace llvm;
 
@@ -65,8 +79,8 @@ static cl::opt<bool> ErrorNoncontigiousRegister("merror-noncontigious-register",
 cl::desc("Error for register names that aren't contigious"),
 cl::init(false));
 
-
 namespace {
+
 struct HexagonOperand;
 
 class HexagonAsmParser : public MCTargetAsmParser {
@@ -93,9 +107,7 @@ class HexagonAsmParser : public MCTargetAsmParser {
   bool Error(SMLoc L, const Twine &Msg) { return Parser.Error(L, Msg); }
   bool ParseDirectiveFalign(unsigned Size, SMLoc L);
 
-  virtual bool ParseRegister(unsigned &RegNo,
-                             SMLoc &StartLoc,
-                             SMLoc &EndLoc) override;
+  bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
   bool ParseDirectiveSubsection(SMLoc L);
   bool ParseDirectiveValue(unsigned Size, SMLoc L);
   bool ParseDirectiveComm(bool IsLocal, SMLoc L);
@@ -114,7 +126,7 @@ class HexagonAsmParser : public MCTargetAsmParser {
                                uint64_t &ErrorInfo, bool MatchingInlineAsm) override;
 
   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override;
-  void OutOfRange(SMLoc IDLoc, long long Val, long long Max);
+  bool OutOfRange(SMLoc IDLoc, long long Val, long long Max);
   int processInstruction(MCInst &Inst, OperandVector const &Operands,
                          SMLoc IDLoc);
 
@@ -141,14 +153,14 @@ public:
       MCII (MII), MCB(HexagonMCInstrInfo::createBundle()), InBrackets(false) {
     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
 
-  MCAsmParserExtension::Initialize(_Parser);
+    MCAsmParserExtension::Initialize(_Parser);
 
-  Assembler = nullptr;
-  // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
-  if (!Parser.getStreamer().hasRawTextSupport()) {
-    MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
-    Assembler = &MES->getAssembler();
-  }
+    Assembler = nullptr;
+    // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
+    if (!Parser.getStreamer().hasRawTextSupport()) {
+      MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
+      Assembler = &MES->getAssembler();
+    }
   }
 
   bool splitIdentifier(OperandVector &Operands);
@@ -157,15 +169,17 @@ public:
   bool implicitExpressionLocation(OperandVector &Operands);
   bool parseExpressionOrOperand(OperandVector &Operands);
   bool parseExpression(MCExpr const *& Expr);
-  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                                SMLoc NameLoc, OperandVector &Operands) override
+
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
+                        SMLoc NameLoc, OperandVector &Operands) override
   {
     llvm_unreachable("Unimplemented");
   }
-  virtual bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
-                                AsmToken ID, OperandVector &Operands) override;
 
-  virtual bool ParseDirective(AsmToken DirectiveID) override;
+  bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, AsmToken ID,
+                        OperandVector &Operands) override;
+
+  bool ParseDirective(AsmToken DirectiveID) override;
 };
 
 /// HexagonOperand - Instances of this class represent a parsed Hexagon machine
@@ -219,12 +233,12 @@ public:
   }
 
   /// getStartLoc - Get the location of the first token of this operand.
-  SMLoc getStartLoc() const { return StartLoc; }
+  SMLoc getStartLoc() const override { return StartLoc; }
 
   /// getEndLoc - Get the location of the last token of this operand.
-  SMLoc getEndLoc() const { return EndLoc; }
+  SMLoc getEndLoc() const override { return EndLoc; }
 
-  unsigned getReg() const {
+  unsigned getReg() const override {
     assert(Kind == Register && "Invalid access!");
     return Reg.RegNum;
   }
@@ -234,10 +248,10 @@ public:
     return Imm.Val;
   }
 
-  bool isToken() const { return Kind == Token; }
-  bool isImm() const { return Kind == Immediate; }
-  bool isMem() const { llvm_unreachable("No isMem"); }
-  bool isReg() const { return Kind == Register; }
+  bool isToken() const override { return Kind == Token; }
+  bool isImm() const override { return Kind == Immediate; }
+  bool isMem() const override { llvm_unreachable("No isMem"); }
+  bool isReg() const override { return Kind == Register; }
 
   bool CheckImmRange(int immBits, int zeroBits, bool isSigned,
                      bool isRelocatable, bool Extendable) const {
@@ -259,11 +273,11 @@ public:
           if (bits == 64)
             return true;
           if (Res >= 0)
-            return ((uint64_t)Res < (uint64_t)(1ULL << bits)) ? true : false;
+            return ((uint64_t)Res < (uint64_t)(1ULL << bits));
           else {
             const int64_t high_bit_set = 1ULL << 63;
             const uint64_t mask = (high_bit_set >> (63 - bits));
-            return (((uint64_t)Res & mask) == mask) ? true : false;
+            return (((uint64_t)Res & mask) == mask);
           }
         }
       } else if (myMCExpr->getKind() == MCExpr::SymbolRef && isRelocatable)
@@ -276,55 +290,60 @@ public:
   }
 
   bool isf32Ext() const { return false; }
-  bool iss32Imm() const { return CheckImmRange(32, 0, true, true, false); }
+  bool iss32_0Imm() const { return CheckImmRange(32, 0, true, true, false); }
   bool iss23_2Imm() const { return CheckImmRange(23, 2, true, true, false); }
-  bool iss8Imm() const { return CheckImmRange(8, 0, true, false, false); }
-  bool iss8Imm64() const { return CheckImmRange(8, 0, true, true, false); }
-  bool iss7Imm() const { return CheckImmRange(7, 0, true, false, false); }
-  bool iss6Imm() const { return CheckImmRange(6, 0, true, false, false); }
-  bool iss4Imm() const { return CheckImmRange(4, 0, true, false, false); }
+  bool iss8_0Imm() const { return CheckImmRange(8, 0, true, false, false); }
+  bool iss8_0Imm64() const { return CheckImmRange(8, 0, true, true, false); }
+  bool iss7_0Imm() const { return CheckImmRange(7, 0, true, false, false); }
+  bool iss6_0Imm() const { return CheckImmRange(6, 0, true, false, false); }
   bool iss4_0Imm() const { return CheckImmRange(4, 0, true, false, false); }
   bool iss4_1Imm() const { return CheckImmRange(4, 1, true, false, false); }
   bool iss4_2Imm() const { return CheckImmRange(4, 2, true, false, false); }
   bool iss4_3Imm() const { return CheckImmRange(4, 3, true, false, false); }
   bool iss4_6Imm() const { return CheckImmRange(4, 0, true, false, false); }
   bool iss3_6Imm() const { return CheckImmRange(3, 0, true, false, false); }
-  bool iss3Imm() const { return CheckImmRange(3, 0, true, false, false); }
+  bool iss3_0Imm() const { return CheckImmRange(3, 0, true, false, false); }
 
-  bool isu64Imm() const { return CheckImmRange(64, 0, false, true, true); }
-  bool isu32Imm() const { return CheckImmRange(32, 0, false, true, false); }
+  bool isu64_0Imm() const { return CheckImmRange(64, 0, false, true, true); }
+  bool isu32_0Imm() const { return CheckImmRange(32, 0, false, true, false); }
   bool isu26_6Imm() const { return CheckImmRange(26, 6, false, true, false); }
-  bool isu16Imm() const { return CheckImmRange(16, 0, false, true, false); }
   bool isu16_0Imm() const { return CheckImmRange(16, 0, false, true, false); }
   bool isu16_1Imm() const { return CheckImmRange(16, 1, false, true, false); }
   bool isu16_2Imm() const { return CheckImmRange(16, 2, false, true, false); }
   bool isu16_3Imm() const { return CheckImmRange(16, 3, false, true, false); }
   bool isu11_3Imm() const { return CheckImmRange(11, 3, false, false, false); }
-  bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
   bool isu6_1Imm() const { return CheckImmRange(6, 1, false, false, false); }
   bool isu6_2Imm() const { return CheckImmRange(6, 2, false, false, false); }
   bool isu6_3Imm() const { return CheckImmRange(6, 3, false, false, false); }
-  bool isu10Imm() const { return CheckImmRange(10, 0, false, false, false); }
-  bool isu9Imm() const { return CheckImmRange(9, 0, false, false, false); }
-  bool isu8Imm() const { return CheckImmRange(8, 0, false, false, false); }
-  bool isu7Imm() const { return CheckImmRange(7, 0, false, false, false); }
-  bool isu6Imm() const { return CheckImmRange(6, 0, false, false, false); }
-  bool isu5Imm() const { return CheckImmRange(5, 0, false, false, false); }
-  bool isu4Imm() const { return CheckImmRange(4, 0, false, false, false); }
-  bool isu3Imm() const { return CheckImmRange(3, 0, false, false, false); }
-  bool isu2Imm() const { return CheckImmRange(2, 0, false, false, false); }
-  bool isu1Imm() const { return CheckImmRange(1, 0, false, false, false); }
-
-  bool ism6Imm() const { return CheckImmRange(6, 0, false, false, false); }
-  bool isn8Imm() const { return CheckImmRange(8, 0, false, false, false); }
-
-  bool iss16Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
-  bool iss12Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
-  bool iss10Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
-  bool iss9Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
-  bool iss8Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
-  bool iss7Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
-  bool iss6Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
+  bool isu10_0Imm() const { return CheckImmRange(10, 0, false, false, false); }
+  bool isu9_0Imm() const { return CheckImmRange(9, 0, false, false, false); }
+  bool isu8_0Imm() const { return CheckImmRange(8, 0, false, false, false); }
+  bool isu7_0Imm() const { return CheckImmRange(7, 0, false, false, false); }
+  bool isu6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isu5_0Imm() const { return CheckImmRange(5, 0, false, false, false); }
+  bool isu4_0Imm() const { return CheckImmRange(4, 0, false, false, false); }
+  bool isu3_0Imm() const { return CheckImmRange(3, 0, false, false, false); }
+  bool isu2_0Imm() const { return CheckImmRange(2, 0, false, false, false); }
+  bool isu1_0Imm() const { return CheckImmRange(1, 0, false, false, false); }
+
+  bool ism6_0Imm() const { return CheckImmRange(6, 0, false, false, false); }
+  bool isn8_0Imm() const { return CheckImmRange(8, 0, false, false, false); }
+  bool isn1Const() const {
+    if (!isImm())
+      return false;
+    int64_t Value;
+    if (!getImm()->evaluateAsAbsolute(Value))
+      return false;
+    return Value == -1;
+  }
+
+  bool iss16_0Ext() const { return CheckImmRange(16 + 26, 0, true, true, true); }
+  bool iss12_0Ext() const { return CheckImmRange(12 + 26, 0, true, true, true); }
+  bool iss10_0Ext() const { return CheckImmRange(10 + 26, 0, true, true, true); }
+  bool iss9_0Ext() const { return CheckImmRange(9 + 26, 0, true, true, true); }
+  bool iss8_0Ext() const { return CheckImmRange(8 + 26, 0, true, true, true); }
+  bool iss7_0Ext() const { return CheckImmRange(7 + 26, 0, true, true, true); }
+  bool iss6_0Ext() const { return CheckImmRange(6 + 26, 0, true, true, true); }
   bool iss11_0Ext() const {
     return CheckImmRange(11 + 26, 0, true, true, true);
   }
@@ -338,16 +357,15 @@ public:
     return CheckImmRange(11 + 26, 3, true, true, true);
   }
 
-  bool isu6Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
-  bool isu7Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
-  bool isu8Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
-  bool isu9Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
-  bool isu10Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
+  bool isu7_0Ext() const { return CheckImmRange(7 + 26, 0, false, true, true); }
+  bool isu8_0Ext() const { return CheckImmRange(8 + 26, 0, false, true, true); }
+  bool isu9_0Ext() const { return CheckImmRange(9 + 26, 0, false, true, true); }
+  bool isu10_0Ext() const { return CheckImmRange(10 + 26, 0, false, true, true); }
   bool isu6_0Ext() const { return CheckImmRange(6 + 26, 0, false, true, true); }
   bool isu6_1Ext() const { return CheckImmRange(6 + 26, 1, false, true, true); }
   bool isu6_2Ext() const { return CheckImmRange(6 + 26, 2, false, true, true); }
   bool isu6_3Ext() const { return CheckImmRange(6 + 26, 3, false, true, true); }
-  bool isu32MustExt() const { return isImm(); }
+  bool isu32_0MustExt() const { return isImm(); }
 
   void addRegOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
@@ -378,22 +396,19 @@ public:
     addImmOperands(Inst, N);
   }
 
-  void adds32ImmOperands(MCInst &Inst, unsigned N) const {
+  void adds32_0ImmOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
   void adds23_2ImmOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds8ImmOperands(MCInst &Inst, unsigned N) const {
-    addSignedImmOperands(Inst, N);
-  }
-  void adds8Imm64Operands(MCInst &Inst, unsigned N) const {
+  void adds8_0ImmOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds6ImmOperands(MCInst &Inst, unsigned N) const {
+  void adds8_0Imm64Operands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds4ImmOperands(MCInst &Inst, unsigned N) const {
+  void adds6_0ImmOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
   void adds4_0ImmOperands(MCInst &Inst, unsigned N) const {
@@ -408,22 +423,19 @@ public:
   void adds4_3ImmOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds3ImmOperands(MCInst &Inst, unsigned N) const {
+  void adds3_0ImmOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
 
-  void addu64ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu64_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu32ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu32_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
   void addu26_6ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu16ImmOperands(MCInst &Inst, unsigned N) const {
-    addImmOperands(Inst, N);
-  }
   void addu16_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
@@ -439,19 +451,16 @@ public:
   void addu11_3ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu10ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu10_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu9ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu9_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu8ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu8_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu7ImmOperands(MCInst &Inst, unsigned N) const {
-    addImmOperands(Inst, N);
-  }
-  void addu6ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu7_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
   void addu6_0ImmOperands(MCInst &Inst, unsigned N) const {
@@ -466,45 +475,45 @@ public:
   void addu6_3ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu5ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu5_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu4ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu4_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu3ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu3_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu2ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu2_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu1ImmOperands(MCInst &Inst, unsigned N) const {
+  void addu1_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
 
-  void addm6ImmOperands(MCInst &Inst, unsigned N) const {
+  void addm6_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addn8ImmOperands(MCInst &Inst, unsigned N) const {
+  void addn8_0ImmOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
 
-  void adds16ExtOperands(MCInst &Inst, unsigned N) const {
+  void adds16_0ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds12ExtOperands(MCInst &Inst, unsigned N) const {
+  void adds12_0ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds10ExtOperands(MCInst &Inst, unsigned N) const {
+  void adds10_0ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds9ExtOperands(MCInst &Inst, unsigned N) const {
+  void adds9_0ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds8ExtOperands(MCInst &Inst, unsigned N) const {
+  void adds8_0ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-  void adds6ExtOperands(MCInst &Inst, unsigned N) const {
+  void adds6_0ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
   void adds11_0ExtOperands(MCInst &Inst, unsigned N) const {
@@ -519,20 +528,20 @@ public:
   void adds11_3ExtOperands(MCInst &Inst, unsigned N) const {
     addSignedImmOperands(Inst, N);
   }
-
-  void addu6ExtOperands(MCInst &Inst, unsigned N) const {
+  void addn1ConstOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu7ExtOperands(MCInst &Inst, unsigned N) const {
+
+  void addu7_0ExtOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu8ExtOperands(MCInst &Inst, unsigned N) const {
+  void addu8_0ExtOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu9ExtOperands(MCInst &Inst, unsigned N) const {
+  void addu9_0ExtOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu10ExtOperands(MCInst &Inst, unsigned N) const {
+  void addu10_0ExtOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
   void addu6_0ExtOperands(MCInst &Inst, unsigned N) const {
@@ -547,7 +556,7 @@ public:
   void addu6_3ExtOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
-  void addu32MustExtOperands(MCInst &Inst, unsigned N) const {
+  void addu32_0MustExtOperands(MCInst &Inst, unsigned N) const {
     addImmOperands(Inst, N);
   }
 
@@ -570,7 +579,7 @@ public:
     return StringRef(Tok.Data, Tok.Length);
   }
 
-  virtual void print(raw_ostream &OS) const;
+  void print(raw_ostream &OS) const override;
 
   static std::unique_ptr<HexagonOperand> CreateToken(StringRef Str, SMLoc S) {
     HexagonOperand *Op = new HexagonOperand(Token);
@@ -600,7 +609,7 @@ public:
   }
 };
 
-} // end anonymous namespace.
+} // end anonymous namespace
 
 void HexagonOperand::print(raw_ostream &OS) const {
   switch (Kind) {
@@ -630,67 +639,70 @@ bool HexagonAsmParser::finishBundle(SMLoc IDLoc, MCStreamer &Out) {
                                                         getContext(), MCB,
                                                         &Check);
 
-  while (Check.getNextErrInfo() == true) {
+  while (Check.getNextErrInfo()) {
     unsigned Reg = Check.getErrRegister();
     Twine R(RI->getName(Reg));
 
     uint64_t Err = Check.getError();
     if (Err != HexagonMCErrInfo::CHECK_SUCCESS) {
       if (HexagonMCErrInfo::CHECK_ERROR_BRANCHES & Err)
-        Error(IDLoc,
-              "unconditional branch cannot precede another branch in packet");
+        return Error(
+            IDLoc,
+            "unconditional branch cannot precede another branch in packet");
 
       if (HexagonMCErrInfo::CHECK_ERROR_NEWP & Err ||
           HexagonMCErrInfo::CHECK_ERROR_NEWV & Err)
-        Error(IDLoc, "register `" + R +
-                         "' used with `.new' "
-                         "but not validly modified in the same packet");
+        return Error(IDLoc, "register `" + R +
+                                "' used with `.new' "
+                                "but not validly modified in the same packet");
 
       if (HexagonMCErrInfo::CHECK_ERROR_REGISTERS & Err)
-        Error(IDLoc, "register `" + R + "' modified more than once");
+        return Error(IDLoc, "register `" + R + "' modified more than once");
 
       if (HexagonMCErrInfo::CHECK_ERROR_READONLY & Err)
-        Error(IDLoc, "cannot write to read-only register `" + R + "'");
+        return Error(IDLoc, "cannot write to read-only register `" + R + "'");
 
       if (HexagonMCErrInfo::CHECK_ERROR_LOOP & Err)
-        Error(IDLoc, "loop-setup and some branch instructions "
-                     "cannot be in the same packet");
+        return Error(IDLoc, "loop-setup and some branch instructions "
+                            "cannot be in the same packet");
 
       if (HexagonMCErrInfo::CHECK_ERROR_ENDLOOP & Err) {
         Twine N(HexagonMCInstrInfo::isInnerLoop(MCB) ? '0' : '1');
-        Error(IDLoc, "packet marked with `:endloop" + N + "' " +
+        return Error(IDLoc,
+                     "packet marked with `:endloop" + N + "' " +
                          "cannot contain instructions that modify register " +
                          "`" + R + "'");
       }
 
       if (HexagonMCErrInfo::CHECK_ERROR_SOLO & Err)
-        Error(IDLoc,
-              "instruction cannot appear in packet with other instructions");
+        return Error(
+            IDLoc,
+            "instruction cannot appear in packet with other instructions");
 
       if (HexagonMCErrInfo::CHECK_ERROR_NOSLOTS & Err)
-        Error(IDLoc, "too many slots used in packet");
+        return Error(IDLoc, "too many slots used in packet");
 
       if (Err & HexagonMCErrInfo::CHECK_ERROR_SHUFFLE) {
         uint64_t Erm = Check.getShuffleError();
 
         if (HexagonShuffler::SHUFFLE_ERROR_INVALID == Erm)
-          Error(IDLoc, "invalid instruction packet");
+          return Error(IDLoc, "invalid instruction packet");
         else if (HexagonShuffler::SHUFFLE_ERROR_STORES == Erm)
-          Error(IDLoc, "invalid instruction packet: too many stores");
+          return Error(IDLoc, "invalid instruction packet: too many stores");
         else if (HexagonShuffler::SHUFFLE_ERROR_LOADS == Erm)
-          Error(IDLoc, "invalid instruction packet: too many loads");
+          return Error(IDLoc, "invalid instruction packet: too many loads");
         else if (HexagonShuffler::SHUFFLE_ERROR_BRANCHES == Erm)
-          Error(IDLoc, "too many branches in packet");
+          return Error(IDLoc, "too many branches in packet");
         else if (HexagonShuffler::SHUFFLE_ERROR_NOSLOTS == Erm)
-          Error(IDLoc, "invalid instruction packet: out of slots");
+          return Error(IDLoc, "invalid instruction packet: out of slots");
         else if (HexagonShuffler::SHUFFLE_ERROR_SLOTS == Erm)
-          Error(IDLoc, "invalid instruction packet: slot error");
+          return Error(IDLoc, "invalid instruction packet: slot error");
         else if (HexagonShuffler::SHUFFLE_ERROR_ERRATA2 == Erm)
-          Error(IDLoc, "v60 packet violation");
+          return Error(IDLoc, "v60 packet violation");
         else if (HexagonShuffler::SHUFFLE_ERROR_STORE_LOAD_CONFLICT == Erm)
-          Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
+          return Error(IDLoc, "slot 0 instruction does not allow slot 1 store");
         else
-          Error(IDLoc, "unknown error in instruction packet");
+          return Error(IDLoc, "unknown error in instruction packet");
       }
     }
 
@@ -878,7 +890,7 @@ bool HexagonAsmParser::ParseDirective(AsmToken DirectiveID) {
   return true;
 }
 bool HexagonAsmParser::ParseDirectiveSubsection(SMLoc L) {
-  const MCExpr *Subsection = 0;
+  const MCExpr *Subsection = nullptr;
   int64_t Res;
 
   assert((getLexer().isNot(AsmToken::EndOfStatement)) &&
@@ -908,13 +920,13 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
 
   int64_t MaxBytesToFill = 15;
 
-  // if there is an arguement
+  // if there is an argument
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     const MCExpr *Value;
     SMLoc ExprLoc = L;
 
     // Make sure we have a number (false is returned if expression is a number)
-    if (getParser().parseExpression(Value) == false) {
+    if (!getParser().parseExpression(Value)) {
       // Make sure this is a number that is in range
       const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(Value);
       uint64_t IntValue = MCE->getValue();
@@ -936,8 +948,7 @@ bool HexagonAsmParser::ParseDirectiveFalign(unsigned Size, SMLoc L) {
 ///  ::= .word [ expression (, expression)* ]
 bool HexagonAsmParser::ParseDirectiveValue(unsigned Size, SMLoc L) {
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
-
-    for (;;) {
+    while (true) {
       const MCExpr *Value;
       SMLoc ExprLoc = L;
       if (getParser().parseExpression(Value))
@@ -1062,15 +1073,15 @@ bool HexagonAsmParser::RegisterMatchesArch(unsigned MatchNum) const {
 
 /// Force static initialization.
 extern "C" void LLVMInitializeHexagonAsmParser() {
-  RegisterMCAsmParser<HexagonAsmParser> X(TheHexagonTarget);
+  RegisterMCAsmParser<HexagonAsmParser> X(getTheHexagonTarget());
 }
 
 #define GET_MATCHER_IMPLEMENTATION
 #define GET_REGISTER_MATCHER
 #include "HexagonGenAsmMatcher.inc"
 
-namespace {
-bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
+static bool previousEqual(OperandVector &Operands, size_t Index,
+                          StringRef String) {
   if (Index >= Operands.size())
     return false;
   MCParsedAsmOperand &Operand = *Operands[Operands.size() - Index - 1];
@@ -1078,14 +1089,14 @@ bool previousEqual(OperandVector &Operands, size_t Index, StringRef String) {
     return false;
   return static_cast<HexagonOperand &>(Operand).getToken().equals_lower(String);
 }
-bool previousIsLoop(OperandVector &Operands, size_t Index) {
+
+static bool previousIsLoop(OperandVector &Operands, size_t Index) {
   return previousEqual(Operands, Index, "loop0") ||
          previousEqual(Operands, Index, "loop1") ||
          previousEqual(Operands, Index, "sp1loop0") ||
          previousEqual(Operands, Index, "sp2loop0") ||
          previousEqual(Operands, Index, "sp3loop0");
 }
-}
 
 bool HexagonAsmParser::splitIdentifier(OperandVector &Operands) {
   AsmToken const &Token = getParser().getTok();
@@ -1174,8 +1185,7 @@ bool HexagonAsmParser::isLabel(AsmToken &Token) {
   StringRef Raw (String.data(), Third.getString().data() - String.data() +
                  Third.getString().size());
   std::string Collapsed = Raw;
-  Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
-                  Collapsed.end());
+  Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end());
   StringRef Whole = Collapsed;
   std::pair<StringRef, StringRef> DotSplit = Whole.split('.');
   if (!matchRegister(DotSplit.first.lower()))
@@ -1219,8 +1229,7 @@ bool HexagonAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &En
     NeededWorkaround = NeededWorkaround || (Again && !(Contigious && Type));
   }
   std::string Collapsed = RawString;
-  Collapsed.erase(std::remove_if(Collapsed.begin(), Collapsed.end(), isspace),
-                  Collapsed.end());
+  Collapsed.erase(llvm::remove_if(Collapsed, isspace), Collapsed.end());
   StringRef FullString = Collapsed;
   std::pair<StringRef, StringRef> DotSplit = FullString.split('.');
   unsigned DotReg = matchRegister(DotSplit.first.lower());
@@ -1277,7 +1286,7 @@ bool HexagonAsmParser::implicitExpressionLocation(OperandVector &Operands) {
 }
 
 bool HexagonAsmParser::parseExpression(MCExpr const *& Expr) {
-  llvm::SmallVector<AsmToken, 4> Tokens;
+  SmallVector<AsmToken, 4> Tokens;
   MCAsmLexer &Lexer = getLexer();
   bool Done = false;
   static char const * Comma = ",";
@@ -1456,9 +1465,8 @@ bool HexagonAsmParser::ParseInstruction(ParseInstructionInfo &Info,
   return parseInstruction(Operands);
 }
 
-namespace {
-MCInst makeCombineInst(int opCode, MCOperand &Rdd,
-                       MCOperand &MO1, MCOperand &MO2) {
+static MCInst makeCombineInst(int opCode, MCOperand &Rdd,
+                              MCOperand &MO1, MCOperand &MO2) {
   MCInst TmpInst;
   TmpInst.setOpcode(opCode);
   TmpInst.addOperand(Rdd);
@@ -1467,7 +1475,6 @@ MCInst makeCombineInst(int opCode, MCOperand &Rdd,
 
   return TmpInst;
 }
-}
 
 // Define this matcher function after the auto-generated include so we
 // have the match class enum definitions.
@@ -1488,12 +1495,6 @@ unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
                ? Match_Success
                : Match_InvalidOperand;
   }
-  case MCK__MINUS_1: {
-    int64_t Value;
-    return Op->isImm() && Op->Imm.Val->evaluateAsAbsolute(Value) && Value == -1
-               ? Match_Success
-               : Match_InvalidOperand;
-  }
   }
   if (Op->Kind == HexagonOperand::Token && Kind != InvalidMatchClass) {
     StringRef myStringRef = StringRef(Op->Tok.Data, Op->Tok.Length);
@@ -1510,7 +1511,8 @@ unsigned HexagonAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp,
   return Match_InvalidOperand;
 }
 
-void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
+// FIXME: Calls to OutOfRange shoudl propagate failure up to parseStatement.
+bool HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
   std::string errStr;
   raw_string_ostream ES(errStr);
   ES << "value " << Val << "(" << format_hex(Val, 0) << ") out of range: ";
@@ -1518,7 +1520,7 @@ void HexagonAsmParser::OutOfRange(SMLoc IDLoc, long long Val, long long Max) {
     ES << "0-" << Max;
   else
     ES << Max << "-" << (-Max - 1);
-  Error(IDLoc, ES.str().c_str());
+  return Parser.printError(IDLoc, ES.str());
 }
 
 int HexagonAsmParser::processInstruction(MCInst &Inst,
@@ -1599,11 +1601,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
   case Hexagon::A2_tfrp: {
     MCOperand &MO = Inst.getOperand(1);
     unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = r + llvm::utostr(RegPairNum + 1);
+    std::string R1 = r + utostr(RegPairNum + 1);
     StringRef Reg1(R1);
     MO.setReg(matchRegister(Reg1));
     // Add a new operand for the second register in the pair.
-    std::string R2 = r + llvm::utostr(RegPairNum);
+    std::string R2 = r + utostr(RegPairNum);
     StringRef Reg2(R2);
     Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
     Inst.setOpcode(Hexagon::A2_combinew);
@@ -1614,11 +1616,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
   case Hexagon::A2_tfrpf: {
     MCOperand &MO = Inst.getOperand(2);
     unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = r + llvm::utostr(RegPairNum + 1);
+    std::string R1 = r + utostr(RegPairNum + 1);
     StringRef Reg1(R1);
     MO.setReg(matchRegister(Reg1));
     // Add a new operand for the second register in the pair.
-    std::string R2 = r + llvm::utostr(RegPairNum);
+    std::string R2 = r + utostr(RegPairNum);
     StringRef Reg2(R2);
     Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
     Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrpt)
@@ -1630,11 +1632,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
   case Hexagon::A2_tfrpfnew: {
     MCOperand &MO = Inst.getOperand(2);
     unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = r + llvm::utostr(RegPairNum + 1);
+    std::string R1 = r + utostr(RegPairNum + 1);
     StringRef Reg1(R1);
     MO.setReg(matchRegister(Reg1));
     // Add a new operand for the second register in the pair.
-    std::string R2 = r + llvm::utostr(RegPairNum);
+    std::string R2 = r + utostr(RegPairNum);
     StringRef Reg2(R2);
     Inst.addOperand(MCOperand::createReg(matchRegister(Reg2)));
     Inst.setOpcode((Inst.getOpcode() == Hexagon::A2_tfrptnew)
@@ -1644,13 +1646,13 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
   }
 
   // Translate a "$Vdd = $Vss" to "$Vdd = vcombine($Vs, $Vt)"
-  case Hexagon::HEXAGON_V6_vassignpair: {
+  case Hexagon::V6_vassignp: {
     MCOperand &MO = Inst.getOperand(1);
     unsigned int RegPairNum = RI->getEncodingValue(MO.getReg());
-    std::string R1 = v + llvm::utostr(RegPairNum + 1);
+    std::string R1 = v + utostr(RegPairNum + 1);
     MO.setReg(MatchRegisterName(R1));
     // Add a new operand for the second register in the pair.
-    std::string R2 = v + llvm::utostr(RegPairNum);
+    std::string R2 = v + utostr(RegPairNum);
     Inst.addOperand(MCOperand::createReg(MatchRegisterName(R2)));
     Inst.setOpcode(Hexagon::V6_vcombine);
     break;
@@ -1658,14 +1660,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
 
   // Translate a "$Rx =  CONST32(#imm)" to "$Rx = memw(gp+#LABEL) "
   case Hexagon::CONST32:
-  case Hexagon::CONST32_Float_Real:
-  case Hexagon::CONST32_Int_Real:
-  case Hexagon::FCONST32_nsdata:
     is32bit = true;
   // Translate a "$Rx:y =  CONST64(#imm)" to "$Rx:y = memd(gp+#LABEL) "
-  case Hexagon::CONST64_Float_Real:
-  case Hexagon::CONST64_Int_Real:
-
+  case Hexagon::CONST64:
     // FIXME: need better way to detect AsmStreamer (upstream removed getKind())
     if (!Parser.getStreamer().hasRawTextSupport()) {
       MCELFStreamer *MES = static_cast<MCELFStreamer *>(&Parser.getStreamer());
@@ -1725,8 +1722,8 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
           getStreamer().EmitIntValue(Value, byteSize);
         }
       } else if (MO_1.isExpr()) {
-        const char *StringStart = 0;
-        const char *StringEnd = 0;
+        const char *StringStart = nullptr;
+        const char *StringEnd = nullptr;
         if (*Operands[4]->getStartLoc().getPointer() == '#') {
           StringStart = Operands[5]->getStartLoc().getPointer();
           StringEnd = Operands[6]->getStartLoc().getPointer();
@@ -1832,10 +1829,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     break;
   }
 
-  case Hexagon::S2_tableidxb_goodsyntax: {
+  case Hexagon::S2_tableidxb_goodsyntax:
     Inst.setOpcode(Hexagon::S2_tableidxb);
     break;
-  }
 
   case Hexagon::S2_tableidxh_goodsyntax: {
     MCInst TmpInst;
@@ -1894,10 +1890,9 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     break;
   }
 
-  case Hexagon::M2_mpyui: {
+  case Hexagon::M2_mpyui:
     Inst.setOpcode(Hexagon::M2_mpyi);
     break;
-  }
   case Hexagon::M2_mpysmi: {
     MCInst TmpInst;
     MCOperand &Rd = Inst.getOperand(0);
@@ -1970,11 +1965,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     if (Value == 0) { // convert to $Rdd = combine ($Rs[0], $Rs[1])
       MCInst TmpInst;
       unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
-      std::string R1 = r + llvm::utostr(RegPairNum + 1);
+      std::string R1 = r + utostr(RegPairNum + 1);
       StringRef Reg1(R1);
       Rss.setReg(matchRegister(Reg1));
       // Add a new operand for the second register in the pair.
-      std::string R2 = r + llvm::utostr(RegPairNum);
+      std::string R2 = r + utostr(RegPairNum);
       StringRef Reg2(R2);
       TmpInst.setOpcode(Hexagon::A2_combinew);
       TmpInst.addOperand(Rdd);
@@ -1996,14 +1991,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
     if (RegNum & 1) { // Odd mapped to raw:hi, regpair is rodd:odd-1, like r3:2
       Inst.setOpcode(Hexagon::A4_boundscheck_hi);
-      std::string Name =
-          r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1);
+      std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1);
       StringRef RegPair = Name;
       Rs.setReg(matchRegister(RegPair));
     } else { // raw:lo
       Inst.setOpcode(Hexagon::A4_boundscheck_lo);
-      std::string Name =
-          r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum);
+      std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum);
       StringRef RegPair = Name;
       Rs.setReg(matchRegister(RegPair));
     }
@@ -2015,14 +2008,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     unsigned int RegNum = RI->getEncodingValue(Rs.getReg());
     if (RegNum & 1) { // Odd mapped to raw:hi
       Inst.setOpcode(Hexagon::A2_addsph);
-      std::string Name =
-          r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1);
+      std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1);
       StringRef RegPair = Name;
       Rs.setReg(matchRegister(RegPair));
     } else { // Even mapped raw:lo
       Inst.setOpcode(Hexagon::A2_addspl);
-      std::string Name =
-          r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum);
+      std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum);
       StringRef RegPair = Name;
       Rs.setReg(matchRegister(RegPair));
     }
@@ -2034,14 +2025,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
     if (RegNum & 1) { // Odd mapped to sat:raw:hi
       Inst.setOpcode(Hexagon::M2_vrcmpys_s1_h);
-      std::string Name =
-          r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1);
+      std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1);
       StringRef RegPair = Name;
       Rt.setReg(matchRegister(RegPair));
     } else { // Even mapped sat:raw:lo
       Inst.setOpcode(Hexagon::M2_vrcmpys_s1_l);
-      std::string Name =
-          r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum);
+      std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum);
       StringRef RegPair = Name;
       Rt.setReg(matchRegister(RegPair));
     }
@@ -2056,14 +2045,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
     if (RegNum & 1) { // Odd mapped to sat:raw:hi
       TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_h);
-      std::string Name =
-          r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1);
+      std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1);
       StringRef RegPair = Name;
       Rt.setReg(matchRegister(RegPair));
     } else { // Even mapped sat:raw:lo
       TmpInst.setOpcode(Hexagon::M2_vrcmpys_acc_s1_l);
-      std::string Name =
-          r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum);
+      std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum);
       StringRef RegPair = Name;
       Rt.setReg(matchRegister(RegPair));
     }
@@ -2081,14 +2068,12 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     unsigned int RegNum = RI->getEncodingValue(Rt.getReg());
     if (RegNum & 1) { // Odd mapped to rnd:sat:raw:hi
       Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_h);
-      std::string Name =
-          r + llvm::utostr(RegNum) + Colon + llvm::utostr(RegNum - 1);
+      std::string Name = r + utostr(RegNum) + Colon + utostr(RegNum - 1);
       StringRef RegPair = Name;
       Rt.setReg(matchRegister(RegPair));
     } else { // Even mapped rnd:sat:raw:lo
       Inst.setOpcode(Hexagon::M2_vrcmpys_s1rp_l);
-      std::string Name =
-          r + llvm::utostr(RegNum + 1) + Colon + llvm::utostr(RegNum);
+      std::string Name = r + utostr(RegNum + 1) + Colon + utostr(RegNum);
       StringRef RegPair = Name;
       Rt.setReg(matchRegister(RegPair));
     }
@@ -2124,11 +2109,11 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
     if (Value == 0) {
       MCInst TmpInst;
       unsigned int RegPairNum = RI->getEncodingValue(Rss.getReg());
-      std::string R1 = r + llvm::utostr(RegPairNum + 1);
+      std::string R1 = r + utostr(RegPairNum + 1);
       StringRef Reg1(R1);
       Rss.setReg(matchRegister(Reg1));
       // Add a new operand for the second register in the pair.
-      std::string R2 = r + llvm::utostr(RegPairNum);
+      std::string R2 = r + utostr(RegPairNum);
       StringRef Reg2(R2);
       TmpInst.setOpcode(Hexagon::A2_combinew);
       TmpInst.addOperand(Rdd);
@@ -2162,7 +2147,6 @@ int HexagonAsmParser::processInstruction(MCInst &Inst,
   return Match_Success;
 }
 
-
 unsigned HexagonAsmParser::matchRegister(StringRef Name) {
   if (unsigned Reg = MatchRegisterName(Name))
     return Reg;
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
index d052a835fbd8..c0591c332dea 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.cpp
@@ -169,6 +169,12 @@ namespace llvm {
   }
 }
 
+void BitTracker::print_cells(raw_ostream &OS) const {
+  for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
+    dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
+}
+
+
 BitTracker::BitTracker(const MachineEvaluator &E, MachineFunction &F)
     : Trace(false), ME(E), MF(F), MRI(F.getRegInfo()), Map(*new CellMapType) {}
 
@@ -1042,6 +1048,20 @@ bool BT::reached(const MachineBasicBlock *B) const {
 }
 
 
+// Visit an individual instruction. This could be a newly added instruction,
+// or one that has been modified by an optimization.
+void BT::visit(const MachineInstr &MI) {
+  assert(!MI.isBranch() && "Only non-branches are allowed");
+  InstrExec.insert(&MI);
+  visitNonBranch(MI);
+  // The call to visitNonBranch could propagate the changes until a branch
+  // is actually visited. This could result in adding CFG edges to the flow
+  // queue. Since the queue won't be processed, clear it.
+  while (!FlowQ.empty())
+    FlowQ.pop();
+}
+
+
 void BT::reset() {
   EdgeExec.clear();
   InstrExec.clear();
@@ -1118,10 +1138,7 @@ void BT::run() {
     }
   } // while (!FlowQ->empty())
 
-  if (Trace) {
-    dbgs() << "Cells after propagation:\n";
-    for (CellMapType::iterator I = Map.begin(), E = Map.end(); I != E; ++I)
-      dbgs() << PrintReg(I->first, &ME.TRI) << " -> " << I->second << "\n";
-  }
+  if (Trace)
+    print_cells(dbgs() << "Cells after propagation:\n");
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/BitTracker.h b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
index 5b925fe696f8..74cafcd00b60 100644
--- a/contrib/llvm/lib/Target/Hexagon/BitTracker.h
+++ b/contrib/llvm/lib/Target/Hexagon/BitTracker.h
@@ -49,6 +49,9 @@ struct BitTracker {
   void put(RegisterRef RR, const RegisterCell &RC);
   void subst(RegisterRef OldRR, RegisterRef NewRR);
   bool reached(const MachineBasicBlock *B) const;
+  void visit(const MachineInstr &MI);
+
+  void print_cells(raw_ostream &OS) const;
 
 private:
   void visitPHI(const MachineInstr &PI);
diff --git a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
index 7bc08ecfcab6..c05fbc1d7756 100644
--- a/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp
@@ -14,22 +14,23 @@
 #include "MCTargetDesc/HexagonMCChecker.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "MCTargetDesc/HexagonMCInstrInfo.h"
-#include "MCTargetDesc/HexagonInstPrinter.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
 #include "llvm/MC/MCInst.h"
-#include "llvm/MC/MCInstrDesc.h"
 #include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/LEB128.h"
-#include "llvm/Support/MemoryObject.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <memory>
 
 using namespace llvm;
 using namespace Hexagon;
@@ -37,11 +38,13 @@ using namespace Hexagon;
 typedef MCDisassembler::DecodeStatus DecodeStatus;
 
 namespace {
+
 /// \brief Hexagon disassembler for all Hexagon platforms.
 class HexagonDisassembler : public MCDisassembler {
 public:
   std::unique_ptr<MCInstrInfo const> const MCII;
   std::unique_ptr<MCInst *> CurrentBundle;
+
   HexagonDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
                       MCInstrInfo const *MCII)
       : MCDisassembler(STI, Ctx), MCII(MCII), CurrentBundle(new MCInst *) {}
@@ -58,7 +61,8 @@ public:
   void adjustExtendedInstructions(MCInst &MCI, MCInst const &MCB) const;
   void addSubinstOperands(MCInst *MI, unsigned opcode, unsigned inst) const;
 };
-}
+
+} // end anonymous namespace
 
 // Forward declare these because the auto-generated code will reference them.
 // Definitions are further down.
@@ -105,9 +109,9 @@ static unsigned getRegFromSubinstEncoding(unsigned encoded_reg);
 
 static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
                                        uint64_t Address, const void *Decoder);
-static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                   const void *Decoder);
-static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                   const void *Decoder);
 static DecodeStatus s11_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                     const void *Decoder);
@@ -117,9 +121,9 @@ static DecodeStatus s11_2ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                     const void *Decoder);
 static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                     const void *Decoder);
-static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                   const void *Decoder);
-static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                  const void *Decoder);
 static DecodeStatus s6_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t Address,
                                    const void *Decoder);
@@ -147,7 +151,7 @@ static MCDisassembler *createHexagonDisassembler(const Target &T,
 }
 
 extern "C" void LLVMInitializeHexagonDisassembler() {
-  TargetRegistry::RegisterMCDisassembler(TheHexagonTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheHexagonTarget(),
                                          createHexagonDisassembler);
 }
 
@@ -162,7 +166,7 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
 
   *CurrentBundle = &MI;
   MI = HexagonMCInstrInfo::createBundle();
-  while (Result == Success && Complete == false) {
+  while (Result == Success && !Complete) {
     if (Bytes.size() < HEXAGON_INSTR_SIZE)
       return MCDisassembler::Fail;
     MCInst *Inst = new (getContext()) MCInst;
@@ -179,14 +183,13 @@ DecodeStatus HexagonDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
   return MCDisassembler::Success;
 }
 
-namespace {
-HexagonDisassembler const &disassembler(void const *Decoder) {
+static HexagonDisassembler const &disassembler(void const *Decoder) {
   return *static_cast<HexagonDisassembler const *>(Decoder);
 }
-MCContext &contextFromDecoder(void const *Decoder) {
+
+static MCContext &contextFromDecoder(void const *Decoder) {
   return disassembler(Decoder).getContext();
 }
-}
 
 DecodeStatus HexagonDisassembler::getSingleInstruction(
     MCInst &MI, MCInst &MCB, ArrayRef<uint8_t> Bytes, uint64_t Address,
@@ -329,8 +332,7 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
       // follow the duplex model, so the register values in the MCInst are
       // incorrect. If the instruction is a compound, loop through the
       // operands and change registers appropriately.
-      if (llvm::HexagonMCInstrInfo::getType(*MCII, MI) ==
-          HexagonII::TypeCOMPOUND) {
+      if (HexagonMCInstrInfo::getType(*MCII, MI) == HexagonII::TypeCOMPOUND) {
         for (MCInst::iterator i = MI.begin(), last = MI.end(); i < last; ++i) {
           if (i->isReg()) {
             unsigned reg = i->getReg() - Hexagon::R0;
@@ -341,6 +343,37 @@ DecodeStatus HexagonDisassembler::getSingleInstruction(
     }
   }
 
+  switch(MI.getOpcode()) {
+  case Hexagon::J4_cmpeqn1_f_jumpnv_nt:
+  case Hexagon::J4_cmpeqn1_f_jumpnv_t:
+  case Hexagon::J4_cmpeqn1_fp0_jump_nt:
+  case Hexagon::J4_cmpeqn1_fp0_jump_t:
+  case Hexagon::J4_cmpeqn1_fp1_jump_nt:
+  case Hexagon::J4_cmpeqn1_fp1_jump_t:
+  case Hexagon::J4_cmpeqn1_t_jumpnv_nt:
+  case Hexagon::J4_cmpeqn1_t_jumpnv_t:
+  case Hexagon::J4_cmpeqn1_tp0_jump_nt:
+  case Hexagon::J4_cmpeqn1_tp0_jump_t:
+  case Hexagon::J4_cmpeqn1_tp1_jump_nt:
+  case Hexagon::J4_cmpeqn1_tp1_jump_t:
+  case Hexagon::J4_cmpgtn1_f_jumpnv_nt:
+  case Hexagon::J4_cmpgtn1_f_jumpnv_t:
+  case Hexagon::J4_cmpgtn1_fp0_jump_nt:
+  case Hexagon::J4_cmpgtn1_fp0_jump_t:
+  case Hexagon::J4_cmpgtn1_fp1_jump_nt:
+  case Hexagon::J4_cmpgtn1_fp1_jump_t:
+  case Hexagon::J4_cmpgtn1_t_jumpnv_nt:
+  case Hexagon::J4_cmpgtn1_t_jumpnv_t:
+  case Hexagon::J4_cmpgtn1_tp0_jump_nt:
+  case Hexagon::J4_cmpgtn1_tp0_jump_t:
+  case Hexagon::J4_cmpgtn1_tp1_jump_nt:
+  case Hexagon::J4_cmpgtn1_tp1_jump_t:
+    MI.insert(MI.begin() + 1, MCOperand::createExpr(MCConstantExpr::create(-1, getContext())));
+    break;
+  default:
+    break;
+  }
+
   if (HexagonMCInstrInfo::isNewValue(*MCII, MI)) {
     unsigned OpIndex = HexagonMCInstrInfo::getNewValueOp(*MCII, MI);
     MCOperand &MCO = MI.getOperand(OpIndex);
@@ -417,46 +450,46 @@ void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
     // GP relative instruction in the absence of the corresponding immediate
     // extender.
     switch (MCI.getOpcode()) {
-    case Hexagon::S2_storerbabs:
+    case Hexagon::PS_storerbabs:
       opcode = Hexagon::S2_storerbgp;
       break;
-    case Hexagon::S2_storerhabs:
+    case Hexagon::PS_storerhabs:
       opcode = Hexagon::S2_storerhgp;
       break;
-    case Hexagon::S2_storerfabs:
+    case Hexagon::PS_storerfabs:
       opcode = Hexagon::S2_storerfgp;
       break;
-    case Hexagon::S2_storeriabs:
+    case Hexagon::PS_storeriabs:
       opcode = Hexagon::S2_storerigp;
       break;
-    case Hexagon::S2_storerbnewabs:
+    case Hexagon::PS_storerbnewabs:
       opcode = Hexagon::S2_storerbnewgp;
       break;
-    case Hexagon::S2_storerhnewabs:
+    case Hexagon::PS_storerhnewabs:
       opcode = Hexagon::S2_storerhnewgp;
       break;
-    case Hexagon::S2_storerinewabs:
+    case Hexagon::PS_storerinewabs:
       opcode = Hexagon::S2_storerinewgp;
       break;
-    case Hexagon::S2_storerdabs:
+    case Hexagon::PS_storerdabs:
       opcode = Hexagon::S2_storerdgp;
       break;
-    case Hexagon::L4_loadrb_abs:
+    case Hexagon::PS_loadrbabs:
       opcode = Hexagon::L2_loadrbgp;
       break;
-    case Hexagon::L4_loadrub_abs:
+    case Hexagon::PS_loadrubabs:
       opcode = Hexagon::L2_loadrubgp;
       break;
-    case Hexagon::L4_loadrh_abs:
+    case Hexagon::PS_loadrhabs:
       opcode = Hexagon::L2_loadrhgp;
       break;
-    case Hexagon::L4_loadruh_abs:
+    case Hexagon::PS_loadruhabs:
       opcode = Hexagon::L2_loadruhgp;
       break;
-    case Hexagon::L4_loadri_abs:
+    case Hexagon::PS_loadriabs:
       opcode = Hexagon::L2_loadrigp;
       break;
-    case Hexagon::L4_loadrd_abs:
+    case Hexagon::PS_loadrdabs:
       opcode = Hexagon::L2_loadrdgp;
       break;
     default:
@@ -466,10 +499,6 @@ void HexagonDisassembler::adjustExtendedInstructions(MCInst &MCI,
   }
 }
 
-namespace llvm {
-extern const MCInstrDesc HexagonInsts[];
-}
-
 static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo,
                                         ArrayRef<MCPhysReg> Table) {
   if (RegNo < Table.size()) {
@@ -621,11 +650,8 @@ static DecodeStatus DecodeModRegsRegisterClass(MCInst &Inst, unsigned RegNo,
   return MCDisassembler::Success;
 }
 
-namespace {
-uint32_t fullValue(MCInstrInfo const &MCII,
-                  MCInst &MCB,
-                  MCInst &MI,
-                  int64_t Value) {
+static uint32_t fullValue(MCInstrInfo const &MCII, MCInst &MCB, MCInst &MI,
+                          int64_t Value) {
   MCInst const *Extender = HexagonMCInstrInfo::extenderForIndex(
     MCB, HexagonMCInstrInfo::bundleSize(MCB));
   if(!Extender || MI.size() != HexagonMCInstrInfo::getExtendableOp(MCII, MI))
@@ -639,8 +665,9 @@ uint32_t fullValue(MCInstrInfo const &MCII,
   uint32_t Operand = Upper26 | Lower6;
   return Operand;
 }
+
 template <size_t T>
-void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
+static void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
   HexagonDisassembler const &Disassembler = disassembler(Decoder);
   int64_t FullValue = fullValue(*Disassembler.MCII,
                                 **Disassembler.CurrentBundle,
@@ -649,7 +676,6 @@ void signedDecoder(MCInst &MI, unsigned tmp, const void *Decoder) {
   HexagonMCInstrInfo::addConstant(MI, Extended,
                                   Disassembler.getContext());
 }
-}
 
 static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
                                        uint64_t /*Address*/,
@@ -663,13 +689,13 @@ static DecodeStatus unsignedImmDecoder(MCInst &MI, unsigned tmp,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus s16ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s16_0ImmDecoder(MCInst &MI, unsigned tmp,
                                   uint64_t /*Address*/, const void *Decoder) {
   signedDecoder<16>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
-static DecodeStatus s12ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s12_0ImmDecoder(MCInst &MI, unsigned tmp,
                                   uint64_t /*Address*/, const void *Decoder) {
   signedDecoder<12>(MI, tmp, Decoder);
   return MCDisassembler::Success;
@@ -699,13 +725,13 @@ static DecodeStatus s11_3ImmDecoder(MCInst &MI, unsigned tmp,
   return MCDisassembler::Success;
 }
 
-static DecodeStatus s10ImmDecoder(MCInst &MI, unsigned tmp,
+static DecodeStatus s10_0ImmDecoder(MCInst &MI, unsigned tmp,
                                   uint64_t /*Address*/, const void *Decoder) {
   signedDecoder<10>(MI, tmp, Decoder);
   return MCDisassembler::Success;
 }
 
-static DecodeStatus s8ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
+static DecodeStatus s8_0ImmDecoder(MCInst &MI, unsigned tmp, uint64_t /*Address*/,
                                  const void *Decoder) {
   signedDecoder<8>(MI, tmp, Decoder);
   return MCDisassembler::Success;
@@ -811,25 +837,24 @@ static const unsigned int StoreConditionalOpcodeData[][2] = {
 // HexagonII::INST_ICLASS_LD
 
 // HexagonII::INST_ICLASS_LD_ST_2
-static unsigned int LoadStoreOpcodeData[][2] = {{L4_loadrd_abs, 0x49c00000},
-                                                {L4_loadri_abs, 0x49800000},
-                                                {L4_loadruh_abs, 0x49600000},
-                                                {L4_loadrh_abs, 0x49400000},
-                                                {L4_loadrub_abs, 0x49200000},
-                                                {L4_loadrb_abs, 0x49000000},
-                                                {S2_storerdabs, 0x48c00000},
-                                                {S2_storerinewabs, 0x48a01000},
-                                                {S2_storerhnewabs, 0x48a00800},
-                                                {S2_storerbnewabs, 0x48a00000},
-                                                {S2_storeriabs, 0x48800000},
-                                                {S2_storerfabs, 0x48600000},
-                                                {S2_storerhabs, 0x48400000},
-                                                {S2_storerbabs, 0x48000000}};
+static unsigned int LoadStoreOpcodeData[][2] = {{PS_loadrdabs, 0x49c00000},
+                                                {PS_loadriabs, 0x49800000},
+                                                {PS_loadruhabs, 0x49600000},
+                                                {PS_loadrhabs, 0x49400000},
+                                                {PS_loadrubabs, 0x49200000},
+                                                {PS_loadrbabs, 0x49000000},
+                                                {PS_storerdabs, 0x48c00000},
+                                                {PS_storerinewabs, 0x48a01000},
+                                                {PS_storerhnewabs, 0x48a00800},
+                                                {PS_storerbnewabs, 0x48a00000},
+                                                {PS_storeriabs, 0x48800000},
+                                                {PS_storerfabs, 0x48600000},
+                                                {PS_storerhabs, 0x48400000},
+                                                {PS_storerbabs, 0x48000000}};
 static const size_t NumCondS = array_lengthof(StoreConditionalOpcodeData);
 static const size_t NumLS = array_lengthof(LoadStoreOpcodeData);
 
 static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
-
   unsigned MachineOpcode = 0;
   unsigned LLVMOpcode = 0;
 
@@ -868,19 +893,18 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
     case Hexagon::S4_pstorerdf_abs:
     case Hexagon::S4_pstorerdt_abs:
     case Hexagon::S4_pstorerdfnew_abs:
-    case Hexagon::S4_pstorerdtnew_abs: {
+    case Hexagon::S4_pstorerdtnew_abs:
       // op: Pv
       Value = insn & UINT64_C(3);
-      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
       // op: u6
       Value = (insn >> 12) & UINT64_C(48);
       Value |= (insn >> 3) & UINT64_C(15);
       MI.addOperand(MCOperand::createImm(Value));
       // op: Rtt
       Value = (insn >> 8) & UINT64_C(31);
-      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
       break;
-    }
 
     case Hexagon::S4_pstorerbnewf_abs:
     case Hexagon::S4_pstorerbnewt_abs:
@@ -893,19 +917,18 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
     case Hexagon::S4_pstorerinewf_abs:
     case Hexagon::S4_pstorerinewt_abs:
     case Hexagon::S4_pstorerinewfnew_abs:
-    case Hexagon::S4_pstorerinewtnew_abs: {
+    case Hexagon::S4_pstorerinewtnew_abs:
       // op: Pv
       Value = insn & UINT64_C(3);
-      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
       // op: u6
       Value = (insn >> 12) & UINT64_C(48);
       Value |= (insn >> 3) & UINT64_C(15);
       MI.addOperand(MCOperand::createImm(Value));
       // op: Nt
       Value = (insn >> 8) & UINT64_C(7);
-      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
       break;
-    }
 
     case Hexagon::S4_pstorerbf_abs:
     case Hexagon::S4_pstorerbt_abs:
@@ -918,36 +941,34 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
     case Hexagon::S4_pstorerif_abs:
     case Hexagon::S4_pstorerit_abs:
     case Hexagon::S4_pstorerifnew_abs:
-    case Hexagon::S4_pstoreritnew_abs: {
+    case Hexagon::S4_pstoreritnew_abs:
       // op: Pv
       Value = insn & UINT64_C(3);
-      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
       // op: u6
       Value = (insn >> 12) & UINT64_C(48);
       Value |= (insn >> 3) & UINT64_C(15);
       MI.addOperand(MCOperand::createImm(Value));
       // op: Rt
       Value = (insn >> 8) & UINT64_C(31);
-      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
       break;
-    }
 
     case Hexagon::L4_ploadrdf_abs:
     case Hexagon::L4_ploadrdt_abs:
     case Hexagon::L4_ploadrdfnew_abs:
-    case Hexagon::L4_ploadrdtnew_abs: {
+    case Hexagon::L4_ploadrdtnew_abs:
       // op: Rdd
       Value = insn & UINT64_C(31);
-      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
       // op: Pt
       Value = ((insn >> 9) & UINT64_C(3));
-      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
       // op: u6
       Value = ((insn >> 15) & UINT64_C(62));
       Value |= ((insn >> 8) & UINT64_C(1));
       MI.addOperand(MCOperand::createImm(Value));
       break;
-    }
 
     case Hexagon::L4_ploadrbf_abs:
     case Hexagon::L4_ploadrbt_abs:
@@ -971,10 +992,10 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
     case Hexagon::L4_ploadritnew_abs:
       // op: Rd
       Value = insn & UINT64_C(31);
-      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
       // op: Pt
       Value = (insn >> 9) & UINT64_C(3);
-      DecodePredRegsRegisterClass(MI, Value, 0, 0);
+      DecodePredRegsRegisterClass(MI, Value, 0, nullptr);
       // op: u6
       Value = (insn >> 15) & UINT64_C(62);
       Value |= (insn >> 8) & UINT64_C(1);
@@ -982,36 +1003,34 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
       break;
 
     // op: g16_2
-    case (Hexagon::L4_loadri_abs):
+    case (Hexagon::PS_loadriabs):
       ++shift;
     // op: g16_1
-    case Hexagon::L4_loadrh_abs:
-    case Hexagon::L4_loadruh_abs:
+    case Hexagon::PS_loadrhabs:
+    case Hexagon::PS_loadruhabs:
       ++shift;
     // op: g16_0
-    case Hexagon::L4_loadrb_abs:
-    case Hexagon::L4_loadrub_abs: {
+    case Hexagon::PS_loadrbabs:
+    case Hexagon::PS_loadrubabs:
       // op: Rd
       Value |= insn & UINT64_C(31);
-      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
       Value = (insn >> 11) & UINT64_C(49152);
       Value |= (insn >> 7) & UINT64_C(15872);
       Value |= (insn >> 5) & UINT64_C(511);
       MI.addOperand(MCOperand::createImm(Value << shift));
       break;
-    }
 
-    case Hexagon::L4_loadrd_abs: {
+    case Hexagon::PS_loadrdabs:
       Value = insn & UINT64_C(31);
-      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
       Value = (insn >> 11) & UINT64_C(49152);
       Value |= (insn >> 7) & UINT64_C(15872);
       Value |= (insn >> 5) & UINT64_C(511);
       MI.addOperand(MCOperand::createImm(Value << 3));
       break;
-    }
 
-    case Hexagon::S2_storerdabs: {
+    case Hexagon::PS_storerdabs:
       // op: g16_3
       Value = (insn >> 11) & UINT64_C(49152);
       Value |= (insn >> 7) & UINT64_C(15872);
@@ -1020,18 +1039,17 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
       MI.addOperand(MCOperand::createImm(Value << 3));
       // op: Rtt
       Value = (insn >> 8) & UINT64_C(31);
-      DecodeDoubleRegsRegisterClass(MI, Value, 0, 0);
+      DecodeDoubleRegsRegisterClass(MI, Value, 0, nullptr);
       break;
-    }
 
     // op: g16_2
-    case Hexagon::S2_storerinewabs:
+    case Hexagon::PS_storerinewabs:
       ++shift;
     // op: g16_1
-    case Hexagon::S2_storerhnewabs:
+    case Hexagon::PS_storerhnewabs:
       ++shift;
     // op: g16_0
-    case Hexagon::S2_storerbnewabs: {
+    case Hexagon::PS_storerbnewabs:
       Value = (insn >> 11) & UINT64_C(49152);
       Value |= (insn >> 7) & UINT64_C(15872);
       Value |= (insn >> 5) & UINT64_C(256);
@@ -1039,19 +1057,18 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
       MI.addOperand(MCOperand::createImm(Value << shift));
       // op: Nt
       Value = (insn >> 8) & UINT64_C(7);
-      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
       break;
-    }
 
     // op: g16_2
-    case Hexagon::S2_storeriabs:
+    case Hexagon::PS_storeriabs:
       ++shift;
     // op: g16_1
-    case Hexagon::S2_storerhabs:
-    case Hexagon::S2_storerfabs:
+    case Hexagon::PS_storerhabs:
+    case Hexagon::PS_storerfabs:
       ++shift;
     // op: g16_0
-    case Hexagon::S2_storerbabs: {
+    case Hexagon::PS_storerbabs:
       Value = (insn >> 11) & UINT64_C(49152);
       Value |= (insn >> 7) & UINT64_C(15872);
       Value |= (insn >> 5) & UINT64_C(256);
@@ -1059,10 +1076,9 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
       MI.addOperand(MCOperand::createImm(Value << shift));
       // op: Rt
       Value = (insn >> 8) & UINT64_C(31);
-      DecodeIntRegsRegisterClass(MI, Value, 0, 0);
+      DecodeIntRegsRegisterClass(MI, Value, 0, nullptr);
       break;
     }
-    }
     return MCDisassembler::Success;
   }
   return MCDisassembler::Fail;
@@ -1070,7 +1086,6 @@ static DecodeStatus decodeSpecial(MCInst &MI, uint32_t insn) {
 
 static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
                                  void const *Decoder) {
-
   // Instruction Class for a constant a extender: bits 31:28 = 0x0000
   if ((~insn & 0xf0000000) == 0xf0000000) {
     unsigned Value;
@@ -1087,244 +1102,244 @@ static DecodeStatus decodeImmext(MCInst &MI, uint32_t insn,
 
 // These values are from HexagonGenMCCodeEmitter.inc and HexagonIsetDx.td
 enum subInstBinaryValues {
-  V4_SA1_addi_BITS = 0x0000,
-  V4_SA1_addi_MASK = 0x1800,
-  V4_SA1_addrx_BITS = 0x1800,
-  V4_SA1_addrx_MASK = 0x1f00,
-  V4_SA1_addsp_BITS = 0x0c00,
-  V4_SA1_addsp_MASK = 0x1c00,
-  V4_SA1_and1_BITS = 0x1200,
-  V4_SA1_and1_MASK = 0x1f00,
-  V4_SA1_clrf_BITS = 0x1a70,
-  V4_SA1_clrf_MASK = 0x1e70,
-  V4_SA1_clrfnew_BITS = 0x1a50,
-  V4_SA1_clrfnew_MASK = 0x1e70,
-  V4_SA1_clrt_BITS = 0x1a60,
-  V4_SA1_clrt_MASK = 0x1e70,
-  V4_SA1_clrtnew_BITS = 0x1a40,
-  V4_SA1_clrtnew_MASK = 0x1e70,
-  V4_SA1_cmpeqi_BITS = 0x1900,
-  V4_SA1_cmpeqi_MASK = 0x1f00,
-  V4_SA1_combine0i_BITS = 0x1c00,
-  V4_SA1_combine0i_MASK = 0x1d18,
-  V4_SA1_combine1i_BITS = 0x1c08,
-  V4_SA1_combine1i_MASK = 0x1d18,
-  V4_SA1_combine2i_BITS = 0x1c10,
-  V4_SA1_combine2i_MASK = 0x1d18,
-  V4_SA1_combine3i_BITS = 0x1c18,
-  V4_SA1_combine3i_MASK = 0x1d18,
-  V4_SA1_combinerz_BITS = 0x1d08,
-  V4_SA1_combinerz_MASK = 0x1d08,
-  V4_SA1_combinezr_BITS = 0x1d00,
-  V4_SA1_combinezr_MASK = 0x1d08,
-  V4_SA1_dec_BITS = 0x1300,
-  V4_SA1_dec_MASK = 0x1f00,
-  V4_SA1_inc_BITS = 0x1100,
-  V4_SA1_inc_MASK = 0x1f00,
-  V4_SA1_seti_BITS = 0x0800,
-  V4_SA1_seti_MASK = 0x1c00,
-  V4_SA1_setin1_BITS = 0x1a00,
-  V4_SA1_setin1_MASK = 0x1e40,
-  V4_SA1_sxtb_BITS = 0x1500,
-  V4_SA1_sxtb_MASK = 0x1f00,
-  V4_SA1_sxth_BITS = 0x1400,
-  V4_SA1_sxth_MASK = 0x1f00,
-  V4_SA1_tfr_BITS = 0x1000,
-  V4_SA1_tfr_MASK = 0x1f00,
-  V4_SA1_zxtb_BITS = 0x1700,
-  V4_SA1_zxtb_MASK = 0x1f00,
-  V4_SA1_zxth_BITS = 0x1600,
-  V4_SA1_zxth_MASK = 0x1f00,
-  V4_SL1_loadri_io_BITS = 0x0000,
-  V4_SL1_loadri_io_MASK = 0x1000,
-  V4_SL1_loadrub_io_BITS = 0x1000,
-  V4_SL1_loadrub_io_MASK = 0x1000,
-  V4_SL2_deallocframe_BITS = 0x1f00,
-  V4_SL2_deallocframe_MASK = 0x1fc0,
-  V4_SL2_jumpr31_BITS = 0x1fc0,
-  V4_SL2_jumpr31_MASK = 0x1fc4,
-  V4_SL2_jumpr31_f_BITS = 0x1fc5,
-  V4_SL2_jumpr31_f_MASK = 0x1fc7,
-  V4_SL2_jumpr31_fnew_BITS = 0x1fc7,
-  V4_SL2_jumpr31_fnew_MASK = 0x1fc7,
-  V4_SL2_jumpr31_t_BITS = 0x1fc4,
-  V4_SL2_jumpr31_t_MASK = 0x1fc7,
-  V4_SL2_jumpr31_tnew_BITS = 0x1fc6,
-  V4_SL2_jumpr31_tnew_MASK = 0x1fc7,
-  V4_SL2_loadrb_io_BITS = 0x1000,
-  V4_SL2_loadrb_io_MASK = 0x1800,
-  V4_SL2_loadrd_sp_BITS = 0x1e00,
-  V4_SL2_loadrd_sp_MASK = 0x1f00,
-  V4_SL2_loadrh_io_BITS = 0x0000,
-  V4_SL2_loadrh_io_MASK = 0x1800,
-  V4_SL2_loadri_sp_BITS = 0x1c00,
-  V4_SL2_loadri_sp_MASK = 0x1e00,
-  V4_SL2_loadruh_io_BITS = 0x0800,
-  V4_SL2_loadruh_io_MASK = 0x1800,
-  V4_SL2_return_BITS = 0x1f40,
-  V4_SL2_return_MASK = 0x1fc4,
-  V4_SL2_return_f_BITS = 0x1f45,
-  V4_SL2_return_f_MASK = 0x1fc7,
-  V4_SL2_return_fnew_BITS = 0x1f47,
-  V4_SL2_return_fnew_MASK = 0x1fc7,
-  V4_SL2_return_t_BITS = 0x1f44,
-  V4_SL2_return_t_MASK = 0x1fc7,
-  V4_SL2_return_tnew_BITS = 0x1f46,
-  V4_SL2_return_tnew_MASK = 0x1fc7,
-  V4_SS1_storeb_io_BITS = 0x1000,
-  V4_SS1_storeb_io_MASK = 0x1000,
-  V4_SS1_storew_io_BITS = 0x0000,
-  V4_SS1_storew_io_MASK = 0x1000,
-  V4_SS2_allocframe_BITS = 0x1c00,
-  V4_SS2_allocframe_MASK = 0x1e00,
-  V4_SS2_storebi0_BITS = 0x1200,
-  V4_SS2_storebi0_MASK = 0x1f00,
-  V4_SS2_storebi1_BITS = 0x1300,
-  V4_SS2_storebi1_MASK = 0x1f00,
-  V4_SS2_stored_sp_BITS = 0x0a00,
-  V4_SS2_stored_sp_MASK = 0x1e00,
-  V4_SS2_storeh_io_BITS = 0x0000,
-  V4_SS2_storeh_io_MASK = 0x1800,
-  V4_SS2_storew_sp_BITS = 0x0800,
-  V4_SS2_storew_sp_MASK = 0x1e00,
-  V4_SS2_storewi0_BITS = 0x1000,
-  V4_SS2_storewi0_MASK = 0x1f00,
-  V4_SS2_storewi1_BITS = 0x1100,
-  V4_SS2_storewi1_MASK = 0x1f00
+  SA1_addi_BITS = 0x0000,
+  SA1_addi_MASK = 0x1800,
+  SA1_addrx_BITS = 0x1800,
+  SA1_addrx_MASK = 0x1f00,
+  SA1_addsp_BITS = 0x0c00,
+  SA1_addsp_MASK = 0x1c00,
+  SA1_and1_BITS = 0x1200,
+  SA1_and1_MASK = 0x1f00,
+  SA1_clrf_BITS = 0x1a70,
+  SA1_clrf_MASK = 0x1e70,
+  SA1_clrfnew_BITS = 0x1a50,
+  SA1_clrfnew_MASK = 0x1e70,
+  SA1_clrt_BITS = 0x1a60,
+  SA1_clrt_MASK = 0x1e70,
+  SA1_clrtnew_BITS = 0x1a40,
+  SA1_clrtnew_MASK = 0x1e70,
+  SA1_cmpeqi_BITS = 0x1900,
+  SA1_cmpeqi_MASK = 0x1f00,
+  SA1_combine0i_BITS = 0x1c00,
+  SA1_combine0i_MASK = 0x1d18,
+  SA1_combine1i_BITS = 0x1c08,
+  SA1_combine1i_MASK = 0x1d18,
+  SA1_combine2i_BITS = 0x1c10,
+  SA1_combine2i_MASK = 0x1d18,
+  SA1_combine3i_BITS = 0x1c18,
+  SA1_combine3i_MASK = 0x1d18,
+  SA1_combinerz_BITS = 0x1d08,
+  SA1_combinerz_MASK = 0x1d08,
+  SA1_combinezr_BITS = 0x1d00,
+  SA1_combinezr_MASK = 0x1d08,
+  SA1_dec_BITS = 0x1300,
+  SA1_dec_MASK = 0x1f00,
+  SA1_inc_BITS = 0x1100,
+  SA1_inc_MASK = 0x1f00,
+  SA1_seti_BITS = 0x0800,
+  SA1_seti_MASK = 0x1c00,
+  SA1_setin1_BITS = 0x1a00,
+  SA1_setin1_MASK = 0x1e40,
+  SA1_sxtb_BITS = 0x1500,
+  SA1_sxtb_MASK = 0x1f00,
+  SA1_sxth_BITS = 0x1400,
+  SA1_sxth_MASK = 0x1f00,
+  SA1_tfr_BITS = 0x1000,
+  SA1_tfr_MASK = 0x1f00,
+  SA1_zxtb_BITS = 0x1700,
+  SA1_zxtb_MASK = 0x1f00,
+  SA1_zxth_BITS = 0x1600,
+  SA1_zxth_MASK = 0x1f00,
+  SL1_loadri_io_BITS = 0x0000,
+  SL1_loadri_io_MASK = 0x1000,
+  SL1_loadrub_io_BITS = 0x1000,
+  SL1_loadrub_io_MASK = 0x1000,
+  SL2_deallocframe_BITS = 0x1f00,
+  SL2_deallocframe_MASK = 0x1fc0,
+  SL2_jumpr31_BITS = 0x1fc0,
+  SL2_jumpr31_MASK = 0x1fc4,
+  SL2_jumpr31_f_BITS = 0x1fc5,
+  SL2_jumpr31_f_MASK = 0x1fc7,
+  SL2_jumpr31_fnew_BITS = 0x1fc7,
+  SL2_jumpr31_fnew_MASK = 0x1fc7,
+  SL2_jumpr31_t_BITS = 0x1fc4,
+  SL2_jumpr31_t_MASK = 0x1fc7,
+  SL2_jumpr31_tnew_BITS = 0x1fc6,
+  SL2_jumpr31_tnew_MASK = 0x1fc7,
+  SL2_loadrb_io_BITS = 0x1000,
+  SL2_loadrb_io_MASK = 0x1800,
+  SL2_loadrd_sp_BITS = 0x1e00,
+  SL2_loadrd_sp_MASK = 0x1f00,
+  SL2_loadrh_io_BITS = 0x0000,
+  SL2_loadrh_io_MASK = 0x1800,
+  SL2_loadri_sp_BITS = 0x1c00,
+  SL2_loadri_sp_MASK = 0x1e00,
+  SL2_loadruh_io_BITS = 0x0800,
+  SL2_loadruh_io_MASK = 0x1800,
+  SL2_return_BITS = 0x1f40,
+  SL2_return_MASK = 0x1fc4,
+  SL2_return_f_BITS = 0x1f45,
+  SL2_return_f_MASK = 0x1fc7,
+  SL2_return_fnew_BITS = 0x1f47,
+  SL2_return_fnew_MASK = 0x1fc7,
+  SL2_return_t_BITS = 0x1f44,
+  SL2_return_t_MASK = 0x1fc7,
+  SL2_return_tnew_BITS = 0x1f46,
+  SL2_return_tnew_MASK = 0x1fc7,
+  SS1_storeb_io_BITS = 0x1000,
+  SS1_storeb_io_MASK = 0x1000,
+  SS1_storew_io_BITS = 0x0000,
+  SS1_storew_io_MASK = 0x1000,
+  SS2_allocframe_BITS = 0x1c00,
+  SS2_allocframe_MASK = 0x1e00,
+  SS2_storebi0_BITS = 0x1200,
+  SS2_storebi0_MASK = 0x1f00,
+  SS2_storebi1_BITS = 0x1300,
+  SS2_storebi1_MASK = 0x1f00,
+  SS2_stored_sp_BITS = 0x0a00,
+  SS2_stored_sp_MASK = 0x1e00,
+  SS2_storeh_io_BITS = 0x0000,
+  SS2_storeh_io_MASK = 0x1800,
+  SS2_storew_sp_BITS = 0x0800,
+  SS2_storew_sp_MASK = 0x1e00,
+  SS2_storewi0_BITS = 0x1000,
+  SS2_storewi0_MASK = 0x1f00,
+  SS2_storewi1_BITS = 0x1100,
+  SS2_storewi1_MASK = 0x1f00
 };
 
 static unsigned GetSubinstOpcode(unsigned IClass, unsigned inst, unsigned &op,
                                  raw_ostream &os) {
   switch (IClass) {
   case HexagonII::HSIG_L1:
-    if ((inst & V4_SL1_loadri_io_MASK) == V4_SL1_loadri_io_BITS)
-      op = Hexagon::V4_SL1_loadri_io;
-    else if ((inst & V4_SL1_loadrub_io_MASK) == V4_SL1_loadrub_io_BITS)
-      op = Hexagon::V4_SL1_loadrub_io;
+    if ((inst & SL1_loadri_io_MASK) == SL1_loadri_io_BITS)
+      op = Hexagon::SL1_loadri_io;
+    else if ((inst & SL1_loadrub_io_MASK) == SL1_loadrub_io_BITS)
+      op = Hexagon::SL1_loadrub_io;
     else {
       os << "<unknown subinstruction>";
       return MCDisassembler::Fail;
     }
     break;
   case HexagonII::HSIG_L2:
-    if ((inst & V4_SL2_deallocframe_MASK) == V4_SL2_deallocframe_BITS)
-      op = Hexagon::V4_SL2_deallocframe;
-    else if ((inst & V4_SL2_jumpr31_MASK) == V4_SL2_jumpr31_BITS)
-      op = Hexagon::V4_SL2_jumpr31;
-    else if ((inst & V4_SL2_jumpr31_f_MASK) == V4_SL2_jumpr31_f_BITS)
-      op = Hexagon::V4_SL2_jumpr31_f;
-    else if ((inst & V4_SL2_jumpr31_fnew_MASK) == V4_SL2_jumpr31_fnew_BITS)
-      op = Hexagon::V4_SL2_jumpr31_fnew;
-    else if ((inst & V4_SL2_jumpr31_t_MASK) == V4_SL2_jumpr31_t_BITS)
-      op = Hexagon::V4_SL2_jumpr31_t;
-    else if ((inst & V4_SL2_jumpr31_tnew_MASK) == V4_SL2_jumpr31_tnew_BITS)
-      op = Hexagon::V4_SL2_jumpr31_tnew;
-    else if ((inst & V4_SL2_loadrb_io_MASK) == V4_SL2_loadrb_io_BITS)
-      op = Hexagon::V4_SL2_loadrb_io;
-    else if ((inst & V4_SL2_loadrd_sp_MASK) == V4_SL2_loadrd_sp_BITS)
-      op = Hexagon::V4_SL2_loadrd_sp;
-    else if ((inst & V4_SL2_loadrh_io_MASK) == V4_SL2_loadrh_io_BITS)
-      op = Hexagon::V4_SL2_loadrh_io;
-    else if ((inst & V4_SL2_loadri_sp_MASK) == V4_SL2_loadri_sp_BITS)
-      op = Hexagon::V4_SL2_loadri_sp;
-    else if ((inst & V4_SL2_loadruh_io_MASK) == V4_SL2_loadruh_io_BITS)
-      op = Hexagon::V4_SL2_loadruh_io;
-    else if ((inst & V4_SL2_return_MASK) == V4_SL2_return_BITS)
-      op = Hexagon::V4_SL2_return;
-    else if ((inst & V4_SL2_return_f_MASK) == V4_SL2_return_f_BITS)
-      op = Hexagon::V4_SL2_return_f;
-    else if ((inst & V4_SL2_return_fnew_MASK) == V4_SL2_return_fnew_BITS)
-      op = Hexagon::V4_SL2_return_fnew;
-    else if ((inst & V4_SL2_return_t_MASK) == V4_SL2_return_t_BITS)
-      op = Hexagon::V4_SL2_return_t;
-    else if ((inst & V4_SL2_return_tnew_MASK) == V4_SL2_return_tnew_BITS)
-      op = Hexagon::V4_SL2_return_tnew;
+    if ((inst & SL2_deallocframe_MASK) == SL2_deallocframe_BITS)
+      op = Hexagon::SL2_deallocframe;
+    else if ((inst & SL2_jumpr31_MASK) == SL2_jumpr31_BITS)
+      op = Hexagon::SL2_jumpr31;
+    else if ((inst & SL2_jumpr31_f_MASK) == SL2_jumpr31_f_BITS)
+      op = Hexagon::SL2_jumpr31_f;
+    else if ((inst & SL2_jumpr31_fnew_MASK) == SL2_jumpr31_fnew_BITS)
+      op = Hexagon::SL2_jumpr31_fnew;
+    else if ((inst & SL2_jumpr31_t_MASK) == SL2_jumpr31_t_BITS)
+      op = Hexagon::SL2_jumpr31_t;
+    else if ((inst & SL2_jumpr31_tnew_MASK) == SL2_jumpr31_tnew_BITS)
+      op = Hexagon::SL2_jumpr31_tnew;
+    else if ((inst & SL2_loadrb_io_MASK) == SL2_loadrb_io_BITS)
+      op = Hexagon::SL2_loadrb_io;
+    else if ((inst & SL2_loadrd_sp_MASK) == SL2_loadrd_sp_BITS)
+      op = Hexagon::SL2_loadrd_sp;
+    else if ((inst & SL2_loadrh_io_MASK) == SL2_loadrh_io_BITS)
+      op = Hexagon::SL2_loadrh_io;
+    else if ((inst & SL2_loadri_sp_MASK) == SL2_loadri_sp_BITS)
+      op = Hexagon::SL2_loadri_sp;
+    else if ((inst & SL2_loadruh_io_MASK) == SL2_loadruh_io_BITS)
+      op = Hexagon::SL2_loadruh_io;
+    else if ((inst & SL2_return_MASK) == SL2_return_BITS)
+      op = Hexagon::SL2_return;
+    else if ((inst & SL2_return_f_MASK) == SL2_return_f_BITS)
+      op = Hexagon::SL2_return_f;
+    else if ((inst & SL2_return_fnew_MASK) == SL2_return_fnew_BITS)
+      op = Hexagon::SL2_return_fnew;
+    else if ((inst & SL2_return_t_MASK) == SL2_return_t_BITS)
+      op = Hexagon::SL2_return_t;
+    else if ((inst & SL2_return_tnew_MASK) == SL2_return_tnew_BITS)
+      op = Hexagon::SL2_return_tnew;
     else {
       os << "<unknown subinstruction>";
       return MCDisassembler::Fail;
     }
     break;
   case HexagonII::HSIG_A:
-    if ((inst & V4_SA1_addi_MASK) == V4_SA1_addi_BITS)
-      op = Hexagon::V4_SA1_addi;
-    else if ((inst & V4_SA1_addrx_MASK) == V4_SA1_addrx_BITS)
-      op = Hexagon::V4_SA1_addrx;
-    else if ((inst & V4_SA1_addsp_MASK) == V4_SA1_addsp_BITS)
-      op = Hexagon::V4_SA1_addsp;
-    else if ((inst & V4_SA1_and1_MASK) == V4_SA1_and1_BITS)
-      op = Hexagon::V4_SA1_and1;
-    else if ((inst & V4_SA1_clrf_MASK) == V4_SA1_clrf_BITS)
-      op = Hexagon::V4_SA1_clrf;
-    else if ((inst & V4_SA1_clrfnew_MASK) == V4_SA1_clrfnew_BITS)
-      op = Hexagon::V4_SA1_clrfnew;
-    else if ((inst & V4_SA1_clrt_MASK) == V4_SA1_clrt_BITS)
-      op = Hexagon::V4_SA1_clrt;
-    else if ((inst & V4_SA1_clrtnew_MASK) == V4_SA1_clrtnew_BITS)
-      op = Hexagon::V4_SA1_clrtnew;
-    else if ((inst & V4_SA1_cmpeqi_MASK) == V4_SA1_cmpeqi_BITS)
-      op = Hexagon::V4_SA1_cmpeqi;
-    else if ((inst & V4_SA1_combine0i_MASK) == V4_SA1_combine0i_BITS)
-      op = Hexagon::V4_SA1_combine0i;
-    else if ((inst & V4_SA1_combine1i_MASK) == V4_SA1_combine1i_BITS)
-      op = Hexagon::V4_SA1_combine1i;
-    else if ((inst & V4_SA1_combine2i_MASK) == V4_SA1_combine2i_BITS)
-      op = Hexagon::V4_SA1_combine2i;
-    else if ((inst & V4_SA1_combine3i_MASK) == V4_SA1_combine3i_BITS)
-      op = Hexagon::V4_SA1_combine3i;
-    else if ((inst & V4_SA1_combinerz_MASK) == V4_SA1_combinerz_BITS)
-      op = Hexagon::V4_SA1_combinerz;
-    else if ((inst & V4_SA1_combinezr_MASK) == V4_SA1_combinezr_BITS)
-      op = Hexagon::V4_SA1_combinezr;
-    else if ((inst & V4_SA1_dec_MASK) == V4_SA1_dec_BITS)
-      op = Hexagon::V4_SA1_dec;
-    else if ((inst & V4_SA1_inc_MASK) == V4_SA1_inc_BITS)
-      op = Hexagon::V4_SA1_inc;
-    else if ((inst & V4_SA1_seti_MASK) == V4_SA1_seti_BITS)
-      op = Hexagon::V4_SA1_seti;
-    else if ((inst & V4_SA1_setin1_MASK) == V4_SA1_setin1_BITS)
-      op = Hexagon::V4_SA1_setin1;
-    else if ((inst & V4_SA1_sxtb_MASK) == V4_SA1_sxtb_BITS)
-      op = Hexagon::V4_SA1_sxtb;
-    else if ((inst & V4_SA1_sxth_MASK) == V4_SA1_sxth_BITS)
-      op = Hexagon::V4_SA1_sxth;
-    else if ((inst & V4_SA1_tfr_MASK) == V4_SA1_tfr_BITS)
-      op = Hexagon::V4_SA1_tfr;
-    else if ((inst & V4_SA1_zxtb_MASK) == V4_SA1_zxtb_BITS)
-      op = Hexagon::V4_SA1_zxtb;
-    else if ((inst & V4_SA1_zxth_MASK) == V4_SA1_zxth_BITS)
-      op = Hexagon::V4_SA1_zxth;
+    if ((inst & SA1_addi_MASK) == SA1_addi_BITS)
+      op = Hexagon::SA1_addi;
+    else if ((inst & SA1_addrx_MASK) == SA1_addrx_BITS)
+      op = Hexagon::SA1_addrx;
+    else if ((inst & SA1_addsp_MASK) == SA1_addsp_BITS)
+      op = Hexagon::SA1_addsp;
+    else if ((inst & SA1_and1_MASK) == SA1_and1_BITS)
+      op = Hexagon::SA1_and1;
+    else if ((inst & SA1_clrf_MASK) == SA1_clrf_BITS)
+      op = Hexagon::SA1_clrf;
+    else if ((inst & SA1_clrfnew_MASK) == SA1_clrfnew_BITS)
+      op = Hexagon::SA1_clrfnew;
+    else if ((inst & SA1_clrt_MASK) == SA1_clrt_BITS)
+      op = Hexagon::SA1_clrt;
+    else if ((inst & SA1_clrtnew_MASK) == SA1_clrtnew_BITS)
+      op = Hexagon::SA1_clrtnew;
+    else if ((inst & SA1_cmpeqi_MASK) == SA1_cmpeqi_BITS)
+      op = Hexagon::SA1_cmpeqi;
+    else if ((inst & SA1_combine0i_MASK) == SA1_combine0i_BITS)
+      op = Hexagon::SA1_combine0i;
+    else if ((inst & SA1_combine1i_MASK) == SA1_combine1i_BITS)
+      op = Hexagon::SA1_combine1i;
+    else if ((inst & SA1_combine2i_MASK) == SA1_combine2i_BITS)
+      op = Hexagon::SA1_combine2i;
+    else if ((inst & SA1_combine3i_MASK) == SA1_combine3i_BITS)
+      op = Hexagon::SA1_combine3i;
+    else if ((inst & SA1_combinerz_MASK) == SA1_combinerz_BITS)
+      op = Hexagon::SA1_combinerz;
+    else if ((inst & SA1_combinezr_MASK) == SA1_combinezr_BITS)
+      op = Hexagon::SA1_combinezr;
+    else if ((inst & SA1_dec_MASK) == SA1_dec_BITS)
+      op = Hexagon::SA1_dec;
+    else if ((inst & SA1_inc_MASK) == SA1_inc_BITS)
+      op = Hexagon::SA1_inc;
+    else if ((inst & SA1_seti_MASK) == SA1_seti_BITS)
+      op = Hexagon::SA1_seti;
+    else if ((inst & SA1_setin1_MASK) == SA1_setin1_BITS)
+      op = Hexagon::SA1_setin1;
+    else if ((inst & SA1_sxtb_MASK) == SA1_sxtb_BITS)
+      op = Hexagon::SA1_sxtb;
+    else if ((inst & SA1_sxth_MASK) == SA1_sxth_BITS)
+      op = Hexagon::SA1_sxth;
+    else if ((inst & SA1_tfr_MASK) == SA1_tfr_BITS)
+      op = Hexagon::SA1_tfr;
+    else if ((inst & SA1_zxtb_MASK) == SA1_zxtb_BITS)
+      op = Hexagon::SA1_zxtb;
+    else if ((inst & SA1_zxth_MASK) == SA1_zxth_BITS)
+      op = Hexagon::SA1_zxth;
     else {
       os << "<unknown subinstruction>";
       return MCDisassembler::Fail;
     }
     break;
   case HexagonII::HSIG_S1:
-    if ((inst & V4_SS1_storeb_io_MASK) == V4_SS1_storeb_io_BITS)
-      op = Hexagon::V4_SS1_storeb_io;
-    else if ((inst & V4_SS1_storew_io_MASK) == V4_SS1_storew_io_BITS)
-      op = Hexagon::V4_SS1_storew_io;
+    if ((inst & SS1_storeb_io_MASK) == SS1_storeb_io_BITS)
+      op = Hexagon::SS1_storeb_io;
+    else if ((inst & SS1_storew_io_MASK) == SS1_storew_io_BITS)
+      op = Hexagon::SS1_storew_io;
     else {
       os << "<unknown subinstruction>";
       return MCDisassembler::Fail;
     }
     break;
   case HexagonII::HSIG_S2:
-    if ((inst & V4_SS2_allocframe_MASK) == V4_SS2_allocframe_BITS)
-      op = Hexagon::V4_SS2_allocframe;
-    else if ((inst & V4_SS2_storebi0_MASK) == V4_SS2_storebi0_BITS)
-      op = Hexagon::V4_SS2_storebi0;
-    else if ((inst & V4_SS2_storebi1_MASK) == V4_SS2_storebi1_BITS)
-      op = Hexagon::V4_SS2_storebi1;
-    else if ((inst & V4_SS2_stored_sp_MASK) == V4_SS2_stored_sp_BITS)
-      op = Hexagon::V4_SS2_stored_sp;
-    else if ((inst & V4_SS2_storeh_io_MASK) == V4_SS2_storeh_io_BITS)
-      op = Hexagon::V4_SS2_storeh_io;
-    else if ((inst & V4_SS2_storew_sp_MASK) == V4_SS2_storew_sp_BITS)
-      op = Hexagon::V4_SS2_storew_sp;
-    else if ((inst & V4_SS2_storewi0_MASK) == V4_SS2_storewi0_BITS)
-      op = Hexagon::V4_SS2_storewi0;
-    else if ((inst & V4_SS2_storewi1_MASK) == V4_SS2_storewi1_BITS)
-      op = Hexagon::V4_SS2_storewi1;
+    if ((inst & SS2_allocframe_MASK) == SS2_allocframe_BITS)
+      op = Hexagon::SS2_allocframe;
+    else if ((inst & SS2_storebi0_MASK) == SS2_storebi0_BITS)
+      op = Hexagon::SS2_storebi0;
+    else if ((inst & SS2_storebi1_MASK) == SS2_storebi1_BITS)
+      op = Hexagon::SS2_storebi1;
+    else if ((inst & SS2_stored_sp_MASK) == SS2_stored_sp_BITS)
+      op = Hexagon::SS2_stored_sp;
+    else if ((inst & SS2_storeh_io_MASK) == SS2_storeh_io_BITS)
+      op = Hexagon::SS2_storeh_io;
+    else if ((inst & SS2_storew_sp_MASK) == SS2_storew_sp_BITS)
+      op = Hexagon::SS2_storew_sp;
+    else if ((inst & SS2_storewi0_MASK) == SS2_storewi0_BITS)
+      op = Hexagon::SS2_storewi0;
+    else if ((inst & SS2_storewi1_MASK) == SS2_storewi1_BITS)
+      op = Hexagon::SS2_storewi1;
     else {
       os << "<unknown subinstruction>";
       return MCDisassembler::Fail;
@@ -1362,25 +1377,25 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
   int64_t operand;
   MCOperand Op;
   switch (opcode) {
-  case Hexagon::V4_SL2_deallocframe:
-  case Hexagon::V4_SL2_jumpr31:
-  case Hexagon::V4_SL2_jumpr31_f:
-  case Hexagon::V4_SL2_jumpr31_fnew:
-  case Hexagon::V4_SL2_jumpr31_t:
-  case Hexagon::V4_SL2_jumpr31_tnew:
-  case Hexagon::V4_SL2_return:
-  case Hexagon::V4_SL2_return_f:
-  case Hexagon::V4_SL2_return_fnew:
-  case Hexagon::V4_SL2_return_t:
-  case Hexagon::V4_SL2_return_tnew:
+  case Hexagon::SL2_deallocframe:
+  case Hexagon::SL2_jumpr31:
+  case Hexagon::SL2_jumpr31_f:
+  case Hexagon::SL2_jumpr31_fnew:
+  case Hexagon::SL2_jumpr31_t:
+  case Hexagon::SL2_jumpr31_tnew:
+  case Hexagon::SL2_return:
+  case Hexagon::SL2_return_f:
+  case Hexagon::SL2_return_fnew:
+  case Hexagon::SL2_return_t:
+  case Hexagon::SL2_return_tnew:
     // no operands for these instructions
     break;
-  case Hexagon::V4_SS2_allocframe:
+  case Hexagon::SS2_allocframe:
     // u 8-4{5_3}
     operand = ((inst & 0x1f0) >> 4) << 3;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SL1_loadri_io:
+  case Hexagon::SL1_loadri_io:
     // Rd 3-0, Rs 7-4, u 11-8{4_2}
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1391,7 +1406,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = (inst & 0xf00) >> 6;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SL1_loadrub_io:
+  case Hexagon::SL1_loadrub_io:
     // Rd 3-0, Rs 7-4, u 11-8
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1402,7 +1417,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = (inst & 0xf00) >> 8;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SL2_loadrb_io:
+  case Hexagon::SL2_loadrb_io:
     // Rd 3-0, Rs 7-4, u 10-8
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1413,8 +1428,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = (inst & 0x700) >> 8;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SL2_loadrh_io:
-  case Hexagon::V4_SL2_loadruh_io:
+  case Hexagon::SL2_loadrh_io:
+  case Hexagon::SL2_loadruh_io:
     // Rd 3-0, Rs 7-4, u 10-8{3_1}
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1425,7 +1440,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = ((inst & 0x700) >> 8) << 1;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SL2_loadrd_sp:
+  case Hexagon::SL2_loadrd_sp:
     // Rdd 2-0, u 7-3{5_3}
     operand = getDRegFromSubinstEncoding(inst & 0x7);
     Op = MCOperand::createReg(operand);
@@ -1433,7 +1448,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = ((inst & 0x0f8) >> 3) << 3;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SL2_loadri_sp:
+  case Hexagon::SL2_loadri_sp:
     // Rd 3-0, u 8-4{5_2}
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1441,7 +1456,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = ((inst & 0x1f0) >> 4) << 2;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SA1_addi:
+  case Hexagon::SA1_addi:
     // Rx 3-0 (x2), s7 10-4
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1450,7 +1465,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = SignExtend64<7>((inst & 0x7f0) >> 4);
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SA1_addrx:
+  case Hexagon::SA1_addrx:
     // Rx 3-0 (x2), Rs 7-4
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1460,14 +1475,14 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SA1_and1:
-  case Hexagon::V4_SA1_dec:
-  case Hexagon::V4_SA1_inc:
-  case Hexagon::V4_SA1_sxtb:
-  case Hexagon::V4_SA1_sxth:
-  case Hexagon::V4_SA1_tfr:
-  case Hexagon::V4_SA1_zxtb:
-  case Hexagon::V4_SA1_zxth:
+  case Hexagon::SA1_and1:
+  case Hexagon::SA1_dec:
+  case Hexagon::SA1_inc:
+  case Hexagon::SA1_sxtb:
+  case Hexagon::SA1_sxth:
+  case Hexagon::SA1_tfr:
+  case Hexagon::SA1_zxtb:
+  case Hexagon::SA1_zxth:
     // Rd 3-0, Rs 7-4
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1476,7 +1491,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SA1_addsp:
+  case Hexagon::SA1_addsp:
     // Rd 3-0, u 9-4{6_2}
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1484,7 +1499,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = ((inst & 0x3f0) >> 4) << 2;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SA1_seti:
+  case Hexagon::SA1_seti:
     // Rd 3-0, u 9-4
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
@@ -1492,17 +1507,20 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = (inst & 0x3f0) >> 4;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SA1_clrf:
-  case Hexagon::V4_SA1_clrfnew:
-  case Hexagon::V4_SA1_clrt:
-  case Hexagon::V4_SA1_clrtnew:
-  case Hexagon::V4_SA1_setin1:
+  case Hexagon::SA1_clrf:
+  case Hexagon::SA1_clrfnew:
+  case Hexagon::SA1_clrt:
+  case Hexagon::SA1_clrtnew:
+  case Hexagon::SA1_setin1:
     // Rd 3-0
     operand = getRegFromSubinstEncoding(inst & 0xf);
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
+    if (opcode == Hexagon::SA1_setin1)
+      break;
+    MI->addOperand(MCOperand::createReg(Hexagon::P0));
     break;
-  case Hexagon::V4_SA1_cmpeqi:
+  case Hexagon::SA1_cmpeqi:
     // Rs 7-4, u 1-0
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
@@ -1510,10 +1528,10 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = inst & 0x3;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SA1_combine0i:
-  case Hexagon::V4_SA1_combine1i:
-  case Hexagon::V4_SA1_combine2i:
-  case Hexagon::V4_SA1_combine3i:
+  case Hexagon::SA1_combine0i:
+  case Hexagon::SA1_combine1i:
+  case Hexagon::SA1_combine2i:
+  case Hexagon::SA1_combine3i:
     // Rdd 2-0, u 6-5
     operand = getDRegFromSubinstEncoding(inst & 0x7);
     Op = MCOperand::createReg(operand);
@@ -1521,8 +1539,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = (inst & 0x060) >> 5;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SA1_combinerz:
-  case Hexagon::V4_SA1_combinezr:
+  case Hexagon::SA1_combinerz:
+  case Hexagon::SA1_combinezr:
     // Rdd 2-0, Rs 7-4
     operand = getDRegFromSubinstEncoding(inst & 0x7);
     Op = MCOperand::createReg(operand);
@@ -1531,7 +1549,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SS1_storeb_io:
+  case Hexagon::SS1_storeb_io:
     // Rs 7-4, u 11-8, Rt 3-0
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
@@ -1542,7 +1560,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SS1_storew_io:
+  case Hexagon::SS1_storew_io:
     // Rs 7-4, u 11-8{4_2}, Rt 3-0
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
@@ -1553,8 +1571,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SS2_storebi0:
-  case Hexagon::V4_SS2_storebi1:
+  case Hexagon::SS2_storebi0:
+  case Hexagon::SS2_storebi1:
     // Rs 7-4, u 3-0
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
@@ -1562,8 +1580,8 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = inst & 0xf;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SS2_storewi0:
-  case Hexagon::V4_SS2_storewi1:
+  case Hexagon::SS2_storewi0:
+  case Hexagon::SS2_storewi1:
     // Rs 7-4, u 3-0{4_2}
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
@@ -1571,7 +1589,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     operand = (inst & 0xf) << 2;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
     break;
-  case Hexagon::V4_SS2_stored_sp:
+  case Hexagon::SS2_stored_sp:
     // s 8-3{6_3}, Rtt 2-0
     operand = SignExtend64<9>(((inst & 0x1f8) >> 3) << 3);
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
@@ -1579,7 +1597,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SS2_storeh_io:
+  case Hexagon::SS2_storeh_io:
     // Rs 7-4, u 10-8{3_1}, Rt 3-0
     operand = getRegFromSubinstEncoding((inst & 0xf0) >> 4);
     Op = MCOperand::createReg(operand);
@@ -1590,7 +1608,7 @@ void HexagonDisassembler::addSubinstOperands(MCInst *MI, unsigned opcode,
     Op = MCOperand::createReg(operand);
     MI->addOperand(Op);
     break;
-  case Hexagon::V4_SS2_storew_sp:
+  case Hexagon::SS2_storew_sp:
     // u 8-4{5_2}, Rd 3-0
     operand = ((inst & 0x1f0) >> 4) << 2;
     HexagonMCInstrInfo::addConstant(*MI, operand, getContext());
diff --git a/contrib/llvm/lib/Target/Hexagon/Hexagon.td b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
index aaa0f3e9b3d3..0b2b46387b6a 100644
--- a/contrib/llvm/lib/Target/Hexagon/Hexagon.td
+++ b/contrib/llvm/lib/Target/Hexagon/Hexagon.td
@@ -27,11 +27,12 @@ def ArchV5:  SubtargetFeature<"v5",  "HexagonArchVersion", "V5",  "Hexagon V5">;
 def ArchV55: SubtargetFeature<"v55", "HexagonArchVersion", "V55", "Hexagon V55">;
 def ArchV60: SubtargetFeature<"v60", "HexagonArchVersion", "V60", "Hexagon V60">;
 
-// Hexagon ISA Extensions
-def ExtensionHVX: SubtargetFeature<"hvx", "UseHVXOps",
-                                   "true", "Hexagon HVX instructions">;
-def ExtensionHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps",
-                                   "true", "Hexagon HVX Double instructions">;
+def FeatureHVX: SubtargetFeature<"hvx", "UseHVXOps", "true",
+      "Hexagon HVX instructions">;
+def FeatureHVXDbl: SubtargetFeature<"hvx-double", "UseHVXDblOps", "true",
+      "Hexagon HVX Double instructions">;
+def FeatureLongCalls: SubtargetFeature<"long-calls", "UseLongCalls", "true",
+      "Use constant-extended calls">;
 
 //===----------------------------------------------------------------------===//
 // Hexagon Instruction Predicate Definitions.
@@ -45,10 +46,10 @@ def HasV60T            : Predicate<"HST->hasV60TOps()">,
 def UseMEMOP           : Predicate<"HST->useMemOps()">;
 def IEEERndNearV5T     : Predicate<"HST->modeIEEERndNear()">;
 def UseHVXDbl          : Predicate<"HST->useHVXDblOps()">,
-                         AssemblerPredicate<"ExtensionHVXDbl">;
+                         AssemblerPredicate<"FeatureHVXDbl">;
 def UseHVXSgl          : Predicate<"HST->useHVXSglOps()">;
 def UseHVX             : Predicate<"HST->useHVXSglOps() ||HST->useHVXDblOps()">,
-                         AssemblerPredicate<"ExtensionHVX">;
+                         AssemblerPredicate<"FeatureHVX">;
 
 //===----------------------------------------------------------------------===//
 // Classes used for relation maps.
@@ -249,6 +250,7 @@ include "HexagonSchedule.td"
 include "HexagonRegisterInfo.td"
 include "HexagonCallingConv.td"
 include "HexagonInstrInfo.td"
+include "HexagonPatterns.td"
 include "HexagonIntrinsics.td"
 include "HexagonIntrinsicsDerived.td"
 
@@ -269,7 +271,7 @@ def : Proc<"hexagonv5",  HexagonModelV4,
 def : Proc<"hexagonv55", HexagonModelV55,
            [ArchV4, ArchV5, ArchV55]>;
 def : Proc<"hexagonv60", HexagonModelV60,
-           [ArchV4, ArchV5, ArchV55, ArchV60, ExtensionHVX]>;
+           [ArchV4, ArchV5, ArchV55, ArchV60, FeatureHVX]>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
@@ -282,7 +284,7 @@ def HexagonAsmParser : AsmParser {
 
 def HexagonAsmParserVariant : AsmParserVariant {
   int Variant = 0;
-  string TokenizingCharacters = "#()=:.<>!+*";
+  string TokenizingCharacters = "#()=:.<>!+*-|^&";
 }
 
 def Hexagon : Target {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index cd954a146104..54db5ad4374b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -81,7 +81,7 @@ HexagonAsmPrinter::HexagonAsmPrinter(TargetMachine &TM,
     : AsmPrinter(TM, std::move(Streamer)), Subtarget(nullptr) {}
 
 void HexagonAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
-                                    raw_ostream &O) {
+                                     raw_ostream &O) {
   const MachineOperand &MO = MI->getOperand(OpNo);
 
   switch (MO.getType()) {
@@ -141,14 +141,22 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
       // Hexagon never has a prefix.
       printOperand(MI, OpNo, OS);
       return false;
-    case 'L': // Write second word of DImode reference.
-      // Verify that this operand has two consecutive registers.
-      if (!MI->getOperand(OpNo).isReg() ||
-          OpNo+1 == MI->getNumOperands() ||
-          !MI->getOperand(OpNo+1).isReg())
+    case 'L':
+    case 'H': { // The highest-numbered register of a pair.
+      const MachineOperand &MO = MI->getOperand(OpNo);
+      const MachineFunction &MF = *MI->getParent()->getParent();
+      const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+      if (!MO.isReg())
         return true;
-      ++OpNo;   // Return the high-part.
-      break;
+      unsigned RegNumber = MO.getReg();
+      // This should be an assert in the frontend.
+      if (Hexagon::DoubleRegsRegClass.contains(RegNumber))
+        RegNumber = TRI->getSubReg(RegNumber, ExtraCode[0] == 'L' ?
+                                              Hexagon::isub_lo :
+                                              Hexagon::isub_hi);
+      OS << HexagonInstPrinter::getRegisterName(RegNumber);
+      return false;
+    }
     case 'I':
       // Write 'i' if an integer constant, otherwise nothing.  Used to print
       // addi vs add, etc.
@@ -163,9 +171,9 @@ bool HexagonAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
 }
 
 bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
-                                            unsigned OpNo, unsigned AsmVariant,
-                                            const char *ExtraCode,
-                                            raw_ostream &O) {
+                                              unsigned OpNo, unsigned AsmVariant,
+                                              const char *ExtraCode,
+                                              raw_ostream &O) {
   if (ExtraCode && ExtraCode[0])
     return true; // Unknown modifier.
 
@@ -275,8 +283,7 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
   }
 
   // "$dst = CONST64(#$src1)",
-  case Hexagon::CONST64_Float_Real:
-  case Hexagon::CONST64_Int_Real:
+  case Hexagon::CONST64:
     if (!OutStreamer->hasRawTextSupport()) {
       const MCOperand &Imm = MappedInst.getOperand(1);
       MCSectionSubPair Current = OutStreamer->getCurrentSection();
@@ -295,9 +302,6 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
     }
     break;
   case Hexagon::CONST32:
-  case Hexagon::CONST32_Float_Real:
-  case Hexagon::CONST32_Int_Real:
-  case Hexagon::FCONST32_nsdata:
     if (!OutStreamer->hasRawTextSupport()) {
       MCOperand &Imm = MappedInst.getOperand(1);
       MCSectionSubPair Current = OutStreamer->getCurrentSection();
@@ -410,8 +414,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
       TmpInst.setOpcode(Hexagon::A2_combinew);
       TmpInst.addOperand(MappedInst.getOperand(0));
       MCOperand &MO1 = MappedInst.getOperand(1);
-      unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::subreg_hireg);
-      unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::subreg_loreg);
+      unsigned High = RI->getSubReg(MO1.getReg(), Hexagon::isub_hi);
+      unsigned Low = RI->getSubReg(MO1.getReg(), Hexagon::isub_lo);
       // Add a new operand for the second register in the pair.
       TmpInst.addOperand(MCOperand::createReg(High));
       TmpInst.addOperand(MCOperand::createReg(Low));
@@ -458,21 +462,6 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
     MappedInst = TmpInst;
     return;
   }
-  case Hexagon::TFRI_f:
-    MappedInst.setOpcode(Hexagon::A2_tfrsi);
-    return;
-  case Hexagon::TFRI_cPt_f:
-    MappedInst.setOpcode(Hexagon::C2_cmoveit);
-    return;
-  case Hexagon::TFRI_cNotPt_f:
-    MappedInst.setOpcode(Hexagon::C2_cmoveif);
-    return;
-  case Hexagon::MUX_ri_f:
-    MappedInst.setOpcode(Hexagon::C2_muxri);
-    return;
-  case Hexagon::MUX_ir_f:
-    MappedInst.setOpcode(Hexagon::C2_muxir);
-    return;
 
   // Translate a "$Rdd = #imm" to "$Rdd = combine(#[-1,0], #imm)"
   case Hexagon::A2_tfrpi: {
@@ -498,8 +487,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
   // Translate a "$Rdd = $Rss" to "$Rdd = combine($Rs, $Rt)"
   case Hexagon::A2_tfrp: {
     MCOperand &MO = MappedInst.getOperand(1);
-    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
-    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::isub_hi);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::isub_lo);
     MO.setReg(High);
     // Add a new operand for the second register in the pair.
     MappedInst.addOperand(MCOperand::createReg(Low));
@@ -510,8 +499,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
   case Hexagon::A2_tfrpt:
   case Hexagon::A2_tfrpf: {
     MCOperand &MO = MappedInst.getOperand(2);
-    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
-    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::isub_hi);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::isub_lo);
     MO.setReg(High);
     // Add a new operand for the second register in the pair.
     MappedInst.addOperand(MCOperand::createReg(Low));
@@ -523,8 +512,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
   case Hexagon::A2_tfrptnew:
   case Hexagon::A2_tfrpfnew: {
     MCOperand &MO = MappedInst.getOperand(2);
-    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::subreg_hireg);
-    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::subreg_loreg);
+    unsigned High = RI->getSubReg(MO.getReg(), Hexagon::isub_hi);
+    unsigned Low = RI->getSubReg(MO.getReg(), Hexagon::isub_lo);
     MO.setReg(High);
     // Add a new operand for the second register in the pair.
     MappedInst.addOperand(MCOperand::createReg(Low));
@@ -561,8 +550,8 @@ void HexagonAsmPrinter::HexagonProcessInstruction(MCInst &Inst,
     Rt.setReg(getHexagonRegisterPair(Rt.getReg(), RI));
     return;
   }
-  case Hexagon::HEXAGON_V6_vd0_pseudo:
-  case Hexagon::HEXAGON_V6_vd0_pseudo_128B: {
+  case Hexagon::V6_vd0:
+  case Hexagon::V6_vd0_128B: {
     MCInst TmpInst;
     assert (Inst.getOperand(0).isReg() &&
             "Expected register and none was found");
@@ -611,5 +600,5 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 }
 
 extern "C" void LLVMInitializeHexagonAsmPrinter() {
-  RegisterAsmPrinter<HexagonAsmPrinter> X(TheHexagonTarget);
+  RegisterAsmPrinter<HexagonAsmPrinter> X(getTheHexagonTarget());
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
index a78d97e28427..775da03e0f8c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonAsmPrinter.h
@@ -33,7 +33,7 @@ namespace llvm {
       return AsmPrinter::runOnMachineFunction(Fn);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Hexagon Assembly Printer";
     }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
index c8b4a4cf9382..fe7278fde1b1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -11,29 +11,56 @@
 
 #include "HexagonBitTracker.h"
 #include "HexagonTargetMachine.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
+static cl::opt<bool> PreserveTiedOps("hexbit-keep-tied", cl::Hidden,
+  cl::init(true), cl::desc("Preserve subregisters in tied operands"));
+
 namespace llvm {
+
   void initializeHexagonBitSimplifyPass(PassRegistry& Registry);
   FunctionPass *createHexagonBitSimplify();
-}
+
+} // end namespace llvm
 
 namespace {
+
   // Set of virtual registers, based on BitVector.
   struct RegisterSet : private BitVector {
-    RegisterSet() : BitVector() {}
+    RegisterSet() = default;
     explicit RegisterSet(unsigned s, bool t = false) : BitVector(s, t) {}
-    RegisterSet(const RegisterSet &RS) : BitVector(RS) {}
+    RegisterSet(const RegisterSet &RS) = default;
 
     using BitVector::clear;
     using BitVector::count;
@@ -104,20 +131,23 @@ namespace {
       if (size() <= Idx)
         resize(std::max(Idx+1, 32U));
     }
+
     static inline unsigned v2x(unsigned v) {
       return TargetRegisterInfo::virtReg2Index(v);
     }
+
     static inline unsigned x2v(unsigned x) {
       return TargetRegisterInfo::index2VirtReg(x);
     }
   };
 
-
   struct PrintRegSet {
     PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
       : RS(S), TRI(RI) {}
+
     friend raw_ostream &operator<< (raw_ostream &OS,
           const PrintRegSet &P);
+
   private:
     const RegisterSet &RS;
     const TargetRegisterInfo *TRI;
@@ -132,27 +162,28 @@ namespace {
     OS << " }";
     return OS;
   }
-}
-
 
-namespace {
   class Transformation;
 
   class HexagonBitSimplify : public MachineFunctionPass {
   public:
     static char ID;
-    HexagonBitSimplify() : MachineFunctionPass(ID), MDT(0) {
+
+    HexagonBitSimplify() : MachineFunctionPass(ID), MDT(nullptr) {
       initializeHexagonBitSimplifyPass(*PassRegistry::getPassRegistry());
     }
-    virtual const char *getPassName() const {
+
+    StringRef getPassName() const override {
       return "Hexagon bit simplification";
     }
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
     static void getInstrDefs(const MachineInstr &MI, RegisterSet &Defs);
     static void getInstrUses(const MachineInstr &MI, RegisterSet &Uses);
@@ -171,7 +202,8 @@ namespace {
     static bool replaceSubWithSub(unsigned OldR, unsigned OldSR,
         unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI);
     static bool parseRegSequence(const MachineInstr &I,
-        BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH);
+        BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
+        const MachineRegisterInfo &MRI);
 
     static bool getUsedBitsInStore(unsigned Opc, BitVector &Bits,
         uint16_t Begin);
@@ -187,23 +219,27 @@ namespace {
     MachineDominatorTree *MDT;
 
     bool visitBlock(MachineBasicBlock &B, Transformation &T, RegisterSet &AVs);
+    static bool hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
+        unsigned NewSub = Hexagon::NoSubRegister);
   };
 
   char HexagonBitSimplify::ID = 0;
   typedef HexagonBitSimplify HBS;
 
-
   // The purpose of this class is to provide a common facility to traverse
   // the function top-down or bottom-up via the dominator tree, and keep
   // track of the available registers.
   class Transformation {
   public:
     bool TopDown;
+
     Transformation(bool TD) : TopDown(TD) {}
+    virtual ~Transformation() = default;
+
     virtual bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) = 0;
-    virtual ~Transformation() {}
   };
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(HexagonBitSimplify, "hexbit",
       "Hexagon bit simplification", false, false)
@@ -211,7 +247,6 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(HexagonBitSimplify, "hexbit",
       "Hexagon bit simplification", false, false)
 
-
 bool HexagonBitSimplify::visitBlock(MachineBasicBlock &B, Transformation &T,
       RegisterSet &AVs) {
   MachineDomTreeNode *N = MDT->getNode(&B);
@@ -290,7 +325,6 @@ bool HexagonBitSimplify::isZero(const BitTracker::RegisterCell &RC,
   return true;
 }
 
-
 bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
         uint16_t B, uint16_t W, uint64_t &U) {
   assert(B < RC.width() && B+W <= RC.width());
@@ -307,7 +341,6 @@ bool HexagonBitSimplify::getConst(const BitTracker::RegisterCell &RC,
   return true;
 }
 
-
 bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
       MachineRegisterInfo &MRI) {
   if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
@@ -322,12 +355,13 @@ bool HexagonBitSimplify::replaceReg(unsigned OldR, unsigned NewR,
   return Begin != End;
 }
 
-
 bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
       unsigned NewSR, MachineRegisterInfo &MRI) {
   if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
       !TargetRegisterInfo::isVirtualRegister(NewR))
     return false;
+  if (hasTiedUse(OldR, MRI, NewSR))
+    return false;
   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
   decltype(End) NextI;
   for (auto I = Begin; I != End; I = NextI) {
@@ -338,12 +372,13 @@ bool HexagonBitSimplify::replaceRegWithSub(unsigned OldR, unsigned NewR,
   return Begin != End;
 }
 
-
 bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
       unsigned NewR, unsigned NewSR, MachineRegisterInfo &MRI) {
   if (!TargetRegisterInfo::isVirtualRegister(OldR) ||
       !TargetRegisterInfo::isVirtualRegister(NewR))
     return false;
+  if (OldSR != NewSR && hasTiedUse(OldR, MRI, NewSR))
+    return false;
   auto Begin = MRI.use_begin(OldR), End = MRI.use_end();
   decltype(End) NextI;
   for (auto I = Begin; I != End; I = NextI) {
@@ -356,47 +391,54 @@ bool HexagonBitSimplify::replaceSubWithSub(unsigned OldR, unsigned OldSR,
   return Begin != End;
 }
 
-
 // For a register ref (pair Reg:Sub), set Begin to the position of the LSB
 // of Sub in Reg, and set Width to the size of Sub in bits. Return true,
 // if this succeeded, otherwise return false.
 bool HexagonBitSimplify::getSubregMask(const BitTracker::RegisterRef &RR,
       unsigned &Begin, unsigned &Width, MachineRegisterInfo &MRI) {
   const TargetRegisterClass *RC = MRI.getRegClass(RR.Reg);
-  if (RC == &Hexagon::IntRegsRegClass) {
-    assert(RR.Sub == 0);
+  if (RR.Sub == 0) {
     Begin = 0;
-    Width = 32;
+    Width = RC->getSize()*8;
     return true;
   }
-  if (RC == &Hexagon::DoubleRegsRegClass) {
-    if (RR.Sub == 0) {
-      Begin = 0;
-      Width = 64;
-      return true;
-    }
-    assert(RR.Sub == Hexagon::subreg_loreg || RR.Sub == Hexagon::subreg_hireg);
-    Width = 32;
-    Begin = (RR.Sub == Hexagon::subreg_loreg ? 0 : 32);
-    return true;
+
+  Begin = 0;
+
+  switch (RC->getID()) {
+    case Hexagon::DoubleRegsRegClassID:
+    case Hexagon::VecDblRegsRegClassID:
+    case Hexagon::VecDblRegs128BRegClassID:
+      Width = RC->getSize()*8 / 2;
+      if (RR.Sub == Hexagon::isub_hi || RR.Sub == Hexagon::vsub_hi)
+        Begin = Width;
+      break;
+    default:
+      return false;
   }
-  return false;
+  return true;
 }
 
 
 // For a REG_SEQUENCE, set SL to the low subregister and SH to the high
 // subregister.
 bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
-      BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH) {
+      BitTracker::RegisterRef &SL, BitTracker::RegisterRef &SH,
+      const MachineRegisterInfo &MRI) {
   assert(I.getOpcode() == TargetOpcode::REG_SEQUENCE);
   unsigned Sub1 = I.getOperand(2).getImm(), Sub2 = I.getOperand(4).getImm();
-  assert(Sub1 != Sub2);
-  if (Sub1 == Hexagon::subreg_loreg && Sub2 == Hexagon::subreg_hireg) {
+  auto *DstRC = MRI.getRegClass(I.getOperand(0).getReg());
+  auto &HRI = static_cast<const HexagonRegisterInfo&>(
+                  *MRI.getTargetRegisterInfo());
+  unsigned SubLo = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_lo);
+  unsigned SubHi = HRI.getHexagonSubRegIndex(DstRC, Hexagon::ps_sub_hi);
+  assert((Sub1 == SubLo && Sub2 == SubHi) || (Sub1 == SubHi && Sub2 == SubLo));
+  if (Sub1 == SubLo && Sub2 == SubHi) {
     SL = I.getOperand(1);
     SH = I.getOperand(3);
     return true;
   }
-  if (Sub1 == Hexagon::subreg_hireg && Sub2 == Hexagon::subreg_loreg) {
+  if (Sub1 == SubHi && Sub2 == SubLo) {
     SH = I.getOperand(1);
     SL = I.getOperand(3);
     return true;
@@ -404,7 +446,6 @@ bool HexagonBitSimplify::parseRegSequence(const MachineInstr &I,
   return false;
 }
 
-
 // All stores (except 64-bit stores) take a 32-bit register as the source
 // of the value to be stored. If the instruction stores into a location
 // that is shorter than 32 bits, some bits of the source register are not
@@ -562,7 +603,6 @@ bool HexagonBitSimplify::getUsedBitsInStore(unsigned Opc, BitVector &Bits,
   return false;
 }
 
-
 // For an instruction with opcode Opc, calculate the set of bits that it
 // uses in a register in operand OpN. This only calculates the set of used
 // bits for cases where it does not depend on any operands (as is the case
@@ -842,9 +882,8 @@ bool HexagonBitSimplify::getUsedBits(unsigned Opc, unsigned OpN,
   return false;
 }
 
-
 // Calculate the register class that matches Reg:Sub. For example, if
-// vreg1 is a double register, then vreg1:subreg_hireg would match "int"
+// vreg1 is a double register, then vreg1:isub_hi would match the "int"
 // register class.
 const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
       const BitTracker::RegisterRef &RR, MachineRegisterInfo &MRI) {
@@ -853,26 +892,28 @@ const TargetRegisterClass *HexagonBitSimplify::getFinalVRegClass(
   auto *RC = MRI.getRegClass(RR.Reg);
   if (RR.Sub == 0)
     return RC;
+  auto &HRI = static_cast<const HexagonRegisterInfo&>(
+                  *MRI.getTargetRegisterInfo());
 
-  auto VerifySR = [] (unsigned Sub) -> void {
-    assert(Sub == Hexagon::subreg_hireg || Sub == Hexagon::subreg_loreg);
+  auto VerifySR = [&HRI] (const TargetRegisterClass *RC, unsigned Sub) -> void {
+    assert(Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo) ||
+           Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi));
   };
 
   switch (RC->getID()) {
     case Hexagon::DoubleRegsRegClassID:
-      VerifySR(RR.Sub);
+      VerifySR(RC, RR.Sub);
       return &Hexagon::IntRegsRegClass;
     case Hexagon::VecDblRegsRegClassID:
-      VerifySR(RR.Sub);
+      VerifySR(RC, RR.Sub);
       return &Hexagon::VectorRegsRegClass;
     case Hexagon::VecDblRegs128BRegClassID:
-      VerifySR(RR.Sub);
+      VerifySR(RC, RR.Sub);
       return &Hexagon::VectorRegs128BRegClass;
   }
   return nullptr;
 }
 
-
 // Check if RD could be replaced with RS at any possible use of RD.
 // For example a predicate register cannot be replaced with a integer
 // register, but a 64-bit register with a subregister can be replaced
@@ -890,11 +931,18 @@ bool HexagonBitSimplify::isTransparentCopy(const BitTracker::RegisterRef &RD,
   return DRC == getFinalVRegClass(RS, MRI);
 }
 
+bool HexagonBitSimplify::hasTiedUse(unsigned Reg, MachineRegisterInfo &MRI,
+      unsigned NewSub) {
+  if (!PreserveTiedOps)
+    return false;
+  return llvm::any_of(MRI.use_operands(Reg),
+                      [NewSub] (const MachineOperand &Op) -> bool {
+                        return Op.getSubReg() != NewSub && Op.isTied();
+                      });
+}
 
-//
-// Dead code elimination
-//
 namespace {
+
   class DeadCodeElimination {
   public:
     DeadCodeElimination(MachineFunction &mf, MachineDominatorTree &mdt)
@@ -914,8 +962,8 @@ namespace {
     MachineDominatorTree &MDT;
     MachineRegisterInfo &MRI;
   };
-}
 
+} // end anonymous namespace
 
 bool DeadCodeElimination::isDead(unsigned R) const {
   for (auto I = MRI.use_begin(R), E = MRI.use_end(); I != E; ++I) {
@@ -933,7 +981,6 @@ bool DeadCodeElimination::isDead(unsigned R) const {
   return true;
 }
 
-
 bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
   bool Changed = false;
   typedef GraphTraits<MachineDomTreeNode*> GTN;
@@ -983,8 +1030,8 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
   return Changed;
 }
 
+namespace {
 
-//
 // Eliminate redundant instructions
 //
 // This transformation will identify instructions where the output register
@@ -995,13 +1042,14 @@ bool DeadCodeElimination::runOnNode(MachineDomTreeNode *N) {
 // registers.
 // If the output matches an input, the instruction is replaced with COPY.
 // The copies will be removed by another transformation.
-namespace {
   class RedundantInstrElimination : public Transformation {
   public:
     RedundantInstrElimination(BitTracker &bt, const HexagonInstrInfo &hii,
           MachineRegisterInfo &mri)
         : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+
     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+
   private:
     bool isLossyShiftLeft(const MachineInstr &MI, unsigned OpN,
           unsigned &LostB, unsigned &LostE);
@@ -1016,8 +1064,8 @@ namespace {
     MachineRegisterInfo &MRI;
     BitTracker &BT;
   };
-}
 
+} // end anonymous namespace
 
 // Check if the instruction is a lossy shift left, where the input being
 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
@@ -1025,6 +1073,7 @@ namespace {
 bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
       unsigned OpN, unsigned &LostB, unsigned &LostE) {
   using namespace Hexagon;
+
   unsigned Opc = MI.getOpcode();
   unsigned ImN, RegN, Width;
   switch (Opc) {
@@ -1078,13 +1127,13 @@ bool RedundantInstrElimination::isLossyShiftLeft(const MachineInstr &MI,
   return true;
 }
 
-
 // Check if the instruction is a lossy shift right, where the input being
 // shifted is the operand OpN of MI. If true, [LostB, LostE) is the range
 // of bit indices that are lost.
 bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
       unsigned OpN, unsigned &LostB, unsigned &LostE) {
   using namespace Hexagon;
+
   unsigned Opc = MI.getOpcode();
   unsigned ImN, RegN;
   switch (Opc) {
@@ -1141,7 +1190,6 @@ bool RedundantInstrElimination::isLossyShiftRight(const MachineInstr &MI,
   return true;
 }
 
-
 // Calculate the bit vector that corresponds to the used bits of register Reg.
 // The vector Bits has the same size, as the size of Reg in bits. If the cal-
 // culation fails (i.e. the used bits are unknown), it returns false. Other-
@@ -1178,7 +1226,6 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
   return true;
 }
 
-
 // Calculate the bits used by instruction MI in a register in operand OpN.
 // Return true/false if the calculation succeeds/fails. If is succeeds, set
 // used bits in Bits. This function does not reset any bits in Bits, so
@@ -1188,11 +1235,11 @@ bool RedundantInstrElimination::computeUsedBits(unsigned Reg, BitVector &Bits) {
 // holds the bits for the entire register. To keep track of that, the
 // argument Begin indicates where in Bits is the lowest-significant bit
 // of the register used in operand OpN. For example, in instruction:
-//   vreg1 = S2_lsr_i_r vreg2:subreg_hireg, 10
+//   vreg1 = S2_lsr_i_r vreg2:isub_hi, 10
 // the operand 1 is a 32-bit register, which happens to be a subregister
 // of the 64-bit register vreg2, and that subregister starts at position 32.
 // In this case Begin=32, since Bits[32] would be the lowest-significant bit
-// of vreg2:subreg_hireg.
+// of vreg2:isub_hi.
 bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
       unsigned OpN, BitVector &Bits, uint16_t Begin) {
   unsigned Opc = MI.getOpcode();
@@ -1219,7 +1266,6 @@ bool RedundantInstrElimination::computeUsedBits(const MachineInstr &MI,
   return GotBits;
 }
 
-
 // Calculates the used bits in RD ("defined register"), and checks if these
 // bits in RS ("used register") and RD are identical.
 bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
@@ -1246,9 +1292,10 @@ bool RedundantInstrElimination::usedBitsEqual(BitTracker::RegisterRef RD,
   return true;
 }
 
-
 bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
       const RegisterSet&) {
+  if (!BT.reached(&B))
+    return false;
   bool Changed = false;
 
   for (auto I = B.begin(), E = B.end(), NextI = I; I != E; ++I) {
@@ -1292,10 +1339,20 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
       const DebugLoc &DL = MI->getDebugLoc();
       const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
       unsigned NewR = MRI.createVirtualRegister(FRC);
-      BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
-          .addReg(RS.Reg, 0, RS.Sub);
+      MachineInstr *CopyI =
+          BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+            .addReg(RS.Reg, 0, RS.Sub);
       HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
-      BT.put(BitTracker::RegisterRef(NewR), SC);
+      // This pass can create copies between registers that don't have the
+      // exact same values. Updating the tracker has to involve updating
+      // all dependent cells. Example:
+      //   vreg1 = inst vreg2     ; vreg1 != vreg2, but used bits are equal
+      //
+      //   vreg3 = copy vreg2     ; <- inserted
+      //     ... = vreg3          ; <- replaced from vreg2
+      // Indirectly, we can create a "copy" between vreg1 and vreg2 even
+      // though their exact values do not match.
+      BT.visit(*CopyI);
       Changed = true;
       break;
     }
@@ -1304,22 +1361,20 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
   return Changed;
 }
 
+namespace {
 
-//
-// Const generation
-//
 // Recognize instructions that produce constant values known at compile-time.
 // Replace them with register definitions that load these constants directly.
-namespace {
   class ConstGeneration : public Transformation {
   public:
     ConstGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
         MachineRegisterInfo &mri)
       : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+
     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+    static bool isTfrConst(const MachineInstr &MI);
+
   private:
-    bool isTfrConst(const MachineInstr &MI) const;
-    bool isConst(unsigned R, int64_t &V) const;
     unsigned genTfrConst(const TargetRegisterClass *RC, int64_t C,
         MachineBasicBlock &B, MachineBasicBlock::iterator At, DebugLoc &DL);
 
@@ -1327,42 +1382,25 @@ namespace {
     MachineRegisterInfo &MRI;
     BitTracker &BT;
   };
-}
 
-bool ConstGeneration::isConst(unsigned R, int64_t &C) const {
-  if (!BT.has(R))
-    return false;
-  const BitTracker::RegisterCell &RC = BT.lookup(R);
-  int64_t T = 0;
-  for (unsigned i = RC.width(); i > 0; --i) {
-    const BitTracker::BitValue &V = RC[i-1];
-    T <<= 1;
-    if (V.is(1))
-      T |= 1;
-    else if (!V.is(0))
-      return false;
-  }
-  C = T;
-  return true;
-}
+} // end anonymous namespace
 
-bool ConstGeneration::isTfrConst(const MachineInstr &MI) const {
+bool ConstGeneration::isTfrConst(const MachineInstr &MI) {
   unsigned Opc = MI.getOpcode();
   switch (Opc) {
     case Hexagon::A2_combineii:
     case Hexagon::A4_combineii:
     case Hexagon::A2_tfrsi:
     case Hexagon::A2_tfrpi:
-    case Hexagon::TFR_PdTrue:
-    case Hexagon::TFR_PdFalse:
-    case Hexagon::CONST32_Int_Real:
-    case Hexagon::CONST64_Int_Real:
+    case Hexagon::PS_true:
+    case Hexagon::PS_false:
+    case Hexagon::CONST32:
+    case Hexagon::CONST64:
       return true;
   }
   return false;
 }
 
-
 // Generate a transfer-immediate instruction that is appropriate for the
 // register class and the actual value being transferred.
 unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
@@ -1391,7 +1429,7 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
       return Reg;
     }
 
-    BuildMI(B, At, DL, HII.get(Hexagon::CONST64_Int_Real), Reg)
+    BuildMI(B, At, DL, HII.get(Hexagon::CONST64), Reg)
         .addImm(C);
     return Reg;
   }
@@ -1399,9 +1437,9 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
   if (RC == &Hexagon::PredRegsRegClass) {
     unsigned Opc;
     if (C == 0)
-      Opc = Hexagon::TFR_PdFalse;
+      Opc = Hexagon::PS_false;
     else if ((C & 0xFF) == 0xFF)
-      Opc = Hexagon::TFR_PdTrue;
+      Opc = Hexagon::PS_true;
     else
       return 0;
     BuildMI(B, At, DL, HII.get(Opc), Reg);
@@ -1411,8 +1449,9 @@ unsigned ConstGeneration::genTfrConst(const TargetRegisterClass *RC, int64_t C,
   return 0;
 }
 
-
 bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
+  if (!BT.reached(&B))
+    return false;
   bool Changed = false;
   RegisterSet Defs;
 
@@ -1426,14 +1465,16 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
     unsigned DR = Defs.find_first();
     if (!TargetRegisterInfo::isVirtualRegister(DR))
       continue;
-    int64_t C;
-    if (isConst(DR, C)) {
+    uint64_t U;
+    const BitTracker::RegisterCell &DRC = BT.lookup(DR);
+    if (HBS::getConst(DRC, 0, DRC.width(), U)) {
+      int64_t C = U;
       DebugLoc DL = I->getDebugLoc();
       auto At = I->isPHI() ? B.getFirstNonPHI() : I;
       unsigned ImmReg = genTfrConst(MRI.getRegClass(DR), C, B, At, DL);
       if (ImmReg) {
         HBS::replaceReg(DR, ImmReg, MRI);
-        BT.put(ImmReg, BT.lookup(DR));
+        BT.put(ImmReg, DRC);
         Changed = true;
       }
     }
@@ -1441,48 +1482,49 @@ bool ConstGeneration::processBlock(MachineBasicBlock &B, const RegisterSet&) {
   return Changed;
 }
 
+namespace {
 
-//
-// Copy generation
-//
 // Identify pairs of available registers which hold identical values.
 // In such cases, only one of them needs to be calculated, the other one
 // will be defined as a copy of the first.
-//
-// Copy propagation
-//
-// Eliminate register copies RD = RS, by replacing the uses of RD with
-// with uses of RS.
-namespace {
   class CopyGeneration : public Transformation {
   public:
     CopyGeneration(BitTracker &bt, const HexagonInstrInfo &hii,
-        MachineRegisterInfo &mri)
-      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+        const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
+      : Transformation(true), HII(hii), HRI(hri), MRI(mri), BT(bt) {}
+
     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+
   private:
     bool findMatch(const BitTracker::RegisterRef &Inp,
         BitTracker::RegisterRef &Out, const RegisterSet &AVs);
 
     const HexagonInstrInfo &HII;
+    const HexagonRegisterInfo &HRI;
     MachineRegisterInfo &MRI;
     BitTracker &BT;
+    RegisterSet Forbidden;
   };
 
+// Eliminate register copies RD = RS, by replacing the uses of RD with
+// with uses of RS.
   class CopyPropagation : public Transformation {
   public:
     CopyPropagation(const HexagonRegisterInfo &hri, MachineRegisterInfo &mri)
-        : Transformation(false), MRI(mri) {}
+        : Transformation(false), HRI(hri), MRI(mri) {}
+
     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
-    static bool isCopyReg(unsigned Opc);
+
+    static bool isCopyReg(unsigned Opc, bool NoConv);
+
   private:
     bool propagateRegCopy(MachineInstr &MI);
 
+    const HexagonRegisterInfo &HRI;
     MachineRegisterInfo &MRI;
   };
 
-}
-
+} // end anonymous namespace
 
 /// Check if there is a register in AVs that is identical to Inp. If so,
 /// set Out to the found register. The output may be a pair Reg:Sub.
@@ -1491,17 +1533,20 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
   if (!BT.has(Inp.Reg))
     return false;
   const BitTracker::RegisterCell &InpRC = BT.lookup(Inp.Reg);
+  auto *FRC = HBS::getFinalVRegClass(Inp, MRI);
   unsigned B, W;
   if (!HBS::getSubregMask(Inp, B, W, MRI))
     return false;
 
   for (unsigned R = AVs.find_first(); R; R = AVs.find_next(R)) {
-    if (!BT.has(R) || !HBS::isTransparentCopy(R, Inp, MRI))
+    if (!BT.has(R) || Forbidden[R])
       continue;
     const BitTracker::RegisterCell &RC = BT.lookup(R);
     unsigned RW = RC.width();
     if (W == RW) {
-      if (MRI.getRegClass(Inp.Reg) != MRI.getRegClass(R))
+      if (FRC != MRI.getRegClass(R))
+        continue;
+      if (!HBS::isTransparentCopy(R, Inp, MRI))
         continue;
       if (!HBS::isEqual(InpRC, B, RC, 0, W))
         continue;
@@ -1518,20 +1563,22 @@ bool CopyGeneration::findMatch(const BitTracker::RegisterRef &Inp,
       continue;
 
     if (HBS::isEqual(InpRC, B, RC, 0, W))
-      Out.Sub = Hexagon::subreg_loreg;
+      Out.Sub = Hexagon::isub_lo;
     else if (HBS::isEqual(InpRC, B, RC, W, W))
-      Out.Sub = Hexagon::subreg_hireg;
+      Out.Sub = Hexagon::isub_hi;
     else
       continue;
     Out.Reg = R;
-    return true;
+    if (HBS::isTransparentCopy(Out, Inp, MRI))
+      return true;
   }
   return false;
 }
 
-
 bool CopyGeneration::processBlock(MachineBasicBlock &B,
       const RegisterSet &AVs) {
+  if (!BT.reached(&B))
+    return false;
   RegisterSet AVB(AVs);
   bool Changed = false;
   RegisterSet Defs;
@@ -1543,44 +1590,74 @@ bool CopyGeneration::processBlock(MachineBasicBlock &B,
     HBS::getInstrDefs(*I, Defs);
 
     unsigned Opc = I->getOpcode();
-    if (CopyPropagation::isCopyReg(Opc))
+    if (CopyPropagation::isCopyReg(Opc, false) ||
+        ConstGeneration::isTfrConst(*I))
       continue;
 
+    DebugLoc DL = I->getDebugLoc();
+    auto At = I->isPHI() ? B.getFirstNonPHI() : I;
+
     for (unsigned R = Defs.find_first(); R; R = Defs.find_next(R)) {
       BitTracker::RegisterRef MR;
-      if (!findMatch(R, MR, AVB))
+      auto *FRC = HBS::getFinalVRegClass(R, MRI);
+
+      if (findMatch(R, MR, AVB)) {
+        unsigned NewR = MRI.createVirtualRegister(FRC);
+        BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+          .addReg(MR.Reg, 0, MR.Sub);
+        BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+        HBS::replaceReg(R, NewR, MRI);
+        Forbidden.insert(R);
         continue;
-      DebugLoc DL = I->getDebugLoc();
-      auto *FRC = HBS::getFinalVRegClass(MR, MRI);
-      unsigned NewR = MRI.createVirtualRegister(FRC);
-      auto At = I->isPHI() ? B.getFirstNonPHI() : I;
-      BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
-        .addReg(MR.Reg, 0, MR.Sub);
-      BT.put(BitTracker::RegisterRef(NewR), BT.get(MR));
+      }
+
+      if (FRC == &Hexagon::DoubleRegsRegClass ||
+          FRC == &Hexagon::VecDblRegsRegClass ||
+          FRC == &Hexagon::VecDblRegs128BRegClass) {
+        // Try to generate REG_SEQUENCE.
+        unsigned SubLo = HRI.getHexagonSubRegIndex(FRC, Hexagon::ps_sub_lo);
+        unsigned SubHi = HRI.getHexagonSubRegIndex(FRC, Hexagon::ps_sub_hi);
+        BitTracker::RegisterRef TL = { R, SubLo };
+        BitTracker::RegisterRef TH = { R, SubHi };
+        BitTracker::RegisterRef ML, MH;
+        if (findMatch(TL, ML, AVB) && findMatch(TH, MH, AVB)) {
+          auto *FRC = HBS::getFinalVRegClass(R, MRI);
+          unsigned NewR = MRI.createVirtualRegister(FRC);
+          BuildMI(B, At, DL, HII.get(TargetOpcode::REG_SEQUENCE), NewR)
+            .addReg(ML.Reg, 0, ML.Sub)
+            .addImm(SubLo)
+            .addReg(MH.Reg, 0, MH.Sub)
+            .addImm(SubHi);
+          BT.put(BitTracker::RegisterRef(NewR), BT.get(R));
+          HBS::replaceReg(R, NewR, MRI);
+          Forbidden.insert(R);
+        }
+      }
     }
   }
 
   return Changed;
 }
 
-
-bool CopyPropagation::isCopyReg(unsigned Opc) {
+bool CopyPropagation::isCopyReg(unsigned Opc, bool NoConv) {
   switch (Opc) {
     case TargetOpcode::COPY:
     case TargetOpcode::REG_SEQUENCE:
-    case Hexagon::A2_tfr:
-    case Hexagon::A2_tfrp:
-    case Hexagon::A2_combinew:
     case Hexagon::A4_combineir:
     case Hexagon::A4_combineri:
       return true;
+    case Hexagon::A2_tfr:
+    case Hexagon::A2_tfrp:
+    case Hexagon::A2_combinew:
+    case Hexagon::V6_vcombine:
+    case Hexagon::V6_vcombine_128B:
+      return NoConv;
     default:
       break;
   }
   return false;
 }
 
-
 bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
   bool Changed = false;
   unsigned Opc = MI.getOpcode();
@@ -1602,27 +1679,31 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
     }
     case TargetOpcode::REG_SEQUENCE: {
       BitTracker::RegisterRef SL, SH;
-      if (HBS::parseRegSequence(MI, SL, SH)) {
-        Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
-                                         SL.Reg, SL.Sub, MRI);
-        Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
-                                          SH.Reg, SH.Sub, MRI);
+      if (HBS::parseRegSequence(MI, SL, SH, MRI)) {
+        const TargetRegisterClass *RC = MRI.getRegClass(RD.Reg);
+        unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
+        unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
+        Changed  = HBS::replaceSubWithSub(RD.Reg, SubLo, SL.Reg, SL.Sub, MRI);
+        Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, SH.Reg, SH.Sub, MRI);
       }
       break;
     }
-    case Hexagon::A2_combinew: {
+    case Hexagon::A2_combinew:
+    case Hexagon::V6_vcombine:
+    case Hexagon::V6_vcombine_128B: {
+      const TargetRegisterClass *RC = MRI.getRegClass(RD.Reg);
+      unsigned SubLo = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo);
+      unsigned SubHi = HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_hi);
       BitTracker::RegisterRef RH = MI.getOperand(1), RL = MI.getOperand(2);
-      Changed = HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_loreg,
-                                       RL.Reg, RL.Sub, MRI);
-      Changed |= HBS::replaceSubWithSub(RD.Reg, Hexagon::subreg_hireg,
-                                        RH.Reg, RH.Sub, MRI);
+      Changed  = HBS::replaceSubWithSub(RD.Reg, SubLo, RL.Reg, RL.Sub, MRI);
+      Changed |= HBS::replaceSubWithSub(RD.Reg, SubHi, RH.Reg, RH.Sub, MRI);
       break;
     }
     case Hexagon::A4_combineir:
     case Hexagon::A4_combineri: {
       unsigned SrcX = (Opc == Hexagon::A4_combineir) ? 2 : 1;
-      unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::subreg_loreg
-                                                    : Hexagon::subreg_hireg;
+      unsigned Sub = (Opc == Hexagon::A4_combineir) ? Hexagon::isub_lo
+                                                    : Hexagon::isub_hi;
       BitTracker::RegisterRef RS = MI.getOperand(SrcX);
       Changed = HBS::replaceSubWithSub(RD.Reg, Sub, RS.Reg, RS.Sub, MRI);
       break;
@@ -1631,7 +1712,6 @@ bool CopyPropagation::propagateRegCopy(MachineInstr &MI) {
   return Changed;
 }
 
-
 bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
   std::vector<MachineInstr*> Instrs;
   for (auto I = B.rbegin(), E = B.rend(); I != E; ++I)
@@ -1640,7 +1720,7 @@ bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
   bool Changed = false;
   for (auto I : Instrs) {
     unsigned Opc = I->getOpcode();
-    if (!CopyPropagation::isCopyReg(Opc))
+    if (!CopyPropagation::isCopyReg(Opc, true))
       continue;
     Changed |= propagateRegCopy(*I);
   }
@@ -1648,20 +1728,20 @@ bool CopyPropagation::processBlock(MachineBasicBlock &B, const RegisterSet&) {
   return Changed;
 }
 
+namespace {
 
-//
-// Bit simplification
-//
 // Recognize patterns that can be simplified and replace them with the
 // simpler forms.
 // This is by no means complete
-namespace {
   class BitSimplification : public Transformation {
   public:
     BitSimplification(BitTracker &bt, const HexagonInstrInfo &hii,
-        MachineRegisterInfo &mri)
-      : Transformation(true), HII(hii), MRI(mri), BT(bt) {}
+        const HexagonRegisterInfo &hri, MachineRegisterInfo &mri,
+        MachineFunction &mf)
+      : Transformation(true), HII(hii), HRI(hri), MRI(mri), MF(mf), BT(bt) {}
+
     bool processBlock(MachineBasicBlock &B, const RegisterSet &AVs) override;
+
   private:
     struct RegHalf : public BitTracker::RegisterRef {
       bool Low;  // Low/High halfword.
@@ -1669,6 +1749,7 @@ namespace {
 
     bool matchHalf(unsigned SelfR, const BitTracker::RegisterCell &RC,
           unsigned B, RegHalf &RH);
+    bool validateReg(BitTracker::RegisterRef R, unsigned Opc, unsigned OpNum);
 
     bool matchPackhl(unsigned SelfR, const BitTracker::RegisterCell &RC,
           BitTracker::RegisterRef &Rs, BitTracker::RegisterRef &Rt);
@@ -1688,11 +1769,13 @@ namespace {
           const BitTracker::RegisterCell &RC);
 
     const HexagonInstrInfo &HII;
+    const HexagonRegisterInfo &HRI;
     MachineRegisterInfo &MRI;
+    MachineFunction &MF;
     BitTracker &BT;
   };
-}
 
+} // end anonymous namespace
 
 // Check if the bits [B..B+16) in register cell RC form a valid halfword,
 // i.e. [0..16), [16..32), etc. of some register. If so, return true and
@@ -1746,19 +1829,19 @@ bool BitSimplification::matchHalf(unsigned SelfR,
   unsigned Sub = 0;
   switch (Pos) {
     case 0:
-      Sub = Hexagon::subreg_loreg;
+      Sub = Hexagon::isub_lo;
       Low = true;
       break;
     case 16:
-      Sub = Hexagon::subreg_loreg;
+      Sub = Hexagon::isub_lo;
       Low = false;
       break;
     case 32:
-      Sub = Hexagon::subreg_hireg;
+      Sub = Hexagon::isub_hi;
       Low = true;
       break;
     case 48:
-      Sub = Hexagon::subreg_hireg;
+      Sub = Hexagon::isub_hi;
       Low = false;
       break;
     default:
@@ -1775,6 +1858,12 @@ bool BitSimplification::matchHalf(unsigned SelfR,
   return true;
 }
 
+bool BitSimplification::validateReg(BitTracker::RegisterRef R, unsigned Opc,
+      unsigned OpNum) {
+  auto *OpRC = HII.getRegClass(HII.get(Opc), OpNum, &HRI, MF);
+  auto *RRC = HBS::getFinalVRegClass(R, MRI);
+  return OpRC->hasSubClassEq(RRC);
+}
 
 // Check if RC matches the pattern of a S2_packhl. If so, return true and
 // set the inputs Rs and Rt.
@@ -1799,7 +1888,6 @@ bool BitSimplification::matchPackhl(unsigned SelfR,
   return true;
 }
 
-
 unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
   return HLow ? LLow ? Hexagon::A2_combine_ll
                      : Hexagon::A2_combine_lh
@@ -1807,7 +1895,6 @@ unsigned BitSimplification::getCombineOpcode(bool HLow, bool LLow) {
                      : Hexagon::A2_combine_hh;
 }
 
-
 // If MI stores the upper halfword of a register (potentially obtained via
 // shifts or extracts), replace it with a storerf instruction. This could
 // cause the "extraction" code to become dead.
@@ -1832,7 +1919,6 @@ bool BitSimplification::genStoreUpperHalf(MachineInstr *MI) {
   return true;
 }
 
-
 // If MI stores a value known at compile-time, and the value is within a range
 // that avoids using constant-extenders, replace it with a store-immediate.
 bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
@@ -1901,7 +1987,6 @@ bool BitSimplification::genStoreImmediate(MachineInstr *MI) {
   return true;
 }
 
-
 // If MI is equivalent o S2_packhl, generate the S2_packhl. MI could be the
 // last instruction in a sequence that results in something equivalent to
 // the pack-halfwords. The intent is to cause the entire sequence to become
@@ -1914,6 +1999,9 @@ bool BitSimplification::genPackhl(MachineInstr *MI,
   BitTracker::RegisterRef Rs, Rt;
   if (!matchPackhl(RD.Reg, RC, Rs, Rt))
     return false;
+  if (!validateReg(Rs, Hexagon::S2_packhl, 1) ||
+      !validateReg(Rt, Hexagon::S2_packhl, 2))
+    return false;
 
   MachineBasicBlock &B = *MI->getParent();
   unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
@@ -1928,7 +2016,6 @@ bool BitSimplification::genPackhl(MachineInstr *MI,
   return true;
 }
 
-
 // If MI produces halfword of the input in the low half of the output,
 // replace it with zero-extend or extractu.
 bool BitSimplification::genExtractHalf(MachineInstr *MI,
@@ -1948,14 +2035,18 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI,
   auto At = MI->isPHI() ? B.getFirstNonPHI()
                         : MachineBasicBlock::iterator(MI);
   if (L.Low && Opc != Hexagon::A2_zxth) {
-    NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
-    BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
-        .addReg(L.Reg, 0, L.Sub);
+    if (validateReg(L, Hexagon::A2_zxth, 1)) {
+      NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+      BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
+          .addReg(L.Reg, 0, L.Sub);
+    }
   } else if (!L.Low && Opc != Hexagon::S2_lsr_i_r) {
-    NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
-    BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
-        .addReg(L.Reg, 0, L.Sub)
-        .addImm(16);
+    if (validateReg(L, Hexagon::S2_lsr_i_r, 1)) {
+      NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
+      BuildMI(B, MI, DL, HII.get(Hexagon::S2_lsr_i_r), NewR)
+          .addReg(L.Reg, 0, L.Sub)
+          .addImm(16);
+    }
   }
   if (NewR == 0)
     return false;
@@ -1964,7 +2055,6 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI,
   return true;
 }
 
-
 // If MI is equivalent to a combine(.L/.H, .L/.H) replace with with the
 // combine.
 bool BitSimplification::genCombineHalf(MachineInstr *MI,
@@ -1981,6 +2071,8 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI,
   unsigned COpc = getCombineOpcode(H.Low, L.Low);
   if (COpc == Opc)
     return false;
+  if (!validateReg(H, COpc, 1) || !validateReg(L, COpc, 2))
+    return false;
 
   MachineBasicBlock &B = *MI->getParent();
   DebugLoc DL = MI->getDebugLoc();
@@ -1995,7 +2087,6 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI,
   return true;
 }
 
-
 // If MI resets high bits of a register and keeps the lower ones, replace it
 // with zero-extend byte/half, and-immediate, or extractu, as appropriate.
 bool BitSimplification::genExtractLow(MachineInstr *MI,
@@ -2039,6 +2130,8 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
       continue;
     if (BW < W || !HBS::isEqual(RC, 0, SC, BN, W))
       continue;
+    if (!validateReg(RS, NewOpc, 1))
+      continue;
 
     unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
     auto At = MI->isPHI() ? B.getFirstNonPHI()
@@ -2056,7 +2149,6 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
   return false;
 }
 
-
 // Check for tstbit simplification opportunity, where the bit being checked
 // can be tracked back to another register. For example:
 //   vreg2 = S2_lsr_i_r  vreg1, 5
@@ -2086,19 +2178,19 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
     // Need to map V.RefI.Reg to a 32-bit register, i.e. if it is
     // a double register, need to use a subregister and adjust bit
     // number.
-    unsigned P = UINT_MAX;
+    unsigned P = std::numeric_limits<unsigned>::max();
     BitTracker::RegisterRef RR(V.RefI.Reg, 0);
     if (TC == &Hexagon::DoubleRegsRegClass) {
       P = V.RefI.Pos;
-      RR.Sub = Hexagon::subreg_loreg;
+      RR.Sub = Hexagon::isub_lo;
       if (P >= 32) {
         P -= 32;
-        RR.Sub = Hexagon::subreg_hireg;
+        RR.Sub = Hexagon::isub_hi;
       }
     } else if (TC == &Hexagon::IntRegsRegClass) {
       P = V.RefI.Pos;
     }
-    if (P != UINT_MAX) {
+    if (P != std::numeric_limits<unsigned>::max()) {
       unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
       BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
           .addReg(RR.Reg, 0, RR.Sub)
@@ -2109,7 +2201,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
     }
   } else if (V.is(0) || V.is(1)) {
     unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
-    unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
+    unsigned NewOpc = V.is(0) ? Hexagon::PS_false : Hexagon::PS_true;
     BuildMI(B, At, DL, HII.get(NewOpc), NewR);
     HBS::replaceReg(RD.Reg, NewR, MRI);
     return true;
@@ -2118,9 +2210,10 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
   return false;
 }
 
-
 bool BitSimplification::processBlock(MachineBasicBlock &B,
       const RegisterSet &AVs) {
+  if (!BT.reached(&B))
+    return false;
   bool Changed = false;
   RegisterSet AVB = AVs;
   RegisterSet Defs;
@@ -2175,7 +2268,6 @@ bool BitSimplification::processBlock(MachineBasicBlock &B,
   return Changed;
 }
 
-
 bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -2203,10 +2295,14 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
 
   RegisterSet ARE;  // Available registers for RIE.
   RedundantInstrElimination RIE(BT, HII, MRI);
-  Changed |= visitBlock(Entry, RIE, ARE);
+  bool Ried = visitBlock(Entry, RIE, ARE);
+  if (Ried) {
+    Changed = true;
+    BT.run();
+  }
 
   RegisterSet ACG;  // Available registers for CG.
-  CopyGeneration CopyG(BT, HII, MRI);
+  CopyGeneration CopyG(BT, HII, HRI, MRI);
   Changed |= visitBlock(Entry, CopyG, ACG);
 
   RegisterSet ACP;  // Available registers for CP.
@@ -2217,7 +2313,7 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
 
   BT.run();
   RegisterSet ABS;  // Available registers for BS.
-  BitSimplification BitS(BT, HII, MRI);
+  BitSimplification BitS(BT, HII, HRI, MRI, MF);
   Changed |= visitBlock(Entry, BitS, ABS);
 
   Changed = DeadCodeElimination(MF, *MDT).run() || Changed;
@@ -2231,7 +2327,6 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-
 // Recognize loops where the code at the end of the loop matches the code
 // before the entry of the loop, and the matching code is such that is can
 // be simplified. This pass relies on the bit simplification above and only
@@ -2295,16 +2390,20 @@ bool HexagonBitSimplify::runOnMachineFunction(MachineFunction &MF) {
 // }:endloop0
 
 namespace llvm {
+
   FunctionPass *createHexagonLoopRescheduling();
   void initializeHexagonLoopReschedulingPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
+
   class HexagonLoopRescheduling : public MachineFunctionPass {
   public:
     static char ID;
+
     HexagonLoopRescheduling() : MachineFunctionPass(ID),
-        HII(0), HRI(0), MRI(0), BTP(0) {
+        HII(nullptr), HRI(nullptr), MRI(nullptr), BTP(nullptr) {
       initializeHexagonLoopReschedulingPass(*PassRegistry::getPassRegistry());
     }
 
@@ -2329,8 +2428,8 @@ namespace {
     struct PhiInfo {
       PhiInfo(MachineInstr &P, MachineBasicBlock &B);
       unsigned DefR;
-      BitTracker::RegisterRef LR, PR;
-      MachineBasicBlock *LB, *PB;
+      BitTracker::RegisterRef LR, PR; // Loop Register, Preheader Register
+      MachineBasicBlock *LB, *PB;     // Loop Block, Preheader Block
     };
 
     static unsigned getDefReg(const MachineInstr *MI);
@@ -2344,14 +2443,14 @@ namespace {
         MachineBasicBlock::iterator At, unsigned OldPhiR, unsigned NewPredR);
     bool processLoop(LoopCand &C);
   };
-}
+
+} // end anonymous namespace
 
 char HexagonLoopRescheduling::ID = 0;
 
 INITIALIZE_PASS(HexagonLoopRescheduling, "hexagon-loop-resched",
   "Hexagon Loop Rescheduling", false, false)
 
-
 HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
       MachineBasicBlock &B) {
   DefR = HexagonLoopRescheduling::getDefReg(&P);
@@ -2368,7 +2467,6 @@ HexagonLoopRescheduling::PhiInfo::PhiInfo(MachineInstr &P,
   }
 }
 
-
 unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
   RegisterSet Defs;
   HBS::getInstrDefs(*MI, Defs);
@@ -2377,7 +2475,6 @@ unsigned HexagonLoopRescheduling::getDefReg(const MachineInstr *MI) {
   return Defs.find_first();
 }
 
-
 bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
   if (!BTP->has(Reg))
     return false;
@@ -2390,7 +2487,6 @@ bool HexagonLoopRescheduling::isConst(unsigned Reg) const {
   return true;
 }
 
-
 bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
       unsigned DefR) const {
   unsigned Opc = MI->getOpcode();
@@ -2421,7 +2517,6 @@ bool HexagonLoopRescheduling::isBitShuffle(const MachineInstr *MI,
   return false;
 }
 
-
 bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
       unsigned InpR) const {
   for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
@@ -2434,7 +2529,6 @@ bool HexagonLoopRescheduling::isStoreInput(const MachineInstr *MI,
   return false;
 }
 
-
 bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
   if (!BTP->has(OutR) || !BTP->has(InpR))
     return false;
@@ -2449,7 +2543,6 @@ bool HexagonLoopRescheduling::isShuffleOf(unsigned OutR, unsigned InpR) const {
   return true;
 }
 
-
 bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
       unsigned OutR2, unsigned &InpR2) const {
   if (!BTP->has(OutR1) || !BTP->has(InpR1) || !BTP->has(OutR2))
@@ -2481,7 +2574,6 @@ bool HexagonLoopRescheduling::isSameShuffle(unsigned OutR1, unsigned InpR1,
   return true;
 }
 
-
 void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
       MachineBasicBlock &PB, MachineBasicBlock::iterator At, unsigned OldPhiR,
       unsigned NewPredR) {
@@ -2521,7 +2613,6 @@ void HexagonLoopRescheduling::moveGroup(InstrGroup &G, MachineBasicBlock &LB,
   HBS::replaceReg(OldPhiR, RegMap[G.Out.Reg], *MRI);
 }
 
-
 bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
   DEBUG(dbgs() << "Processing loop in BB#" << C.LB->getNumber() << "\n");
   std::vector<PhiInfo> Phis;
@@ -2595,7 +2686,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
           if (UseI->getOperand(Idx+1).getMBB() != C.LB)
             BadUse = true;
         } else {
-          auto F = std::find(ShufIns.begin(), ShufIns.end(), UseI);
+          auto F = find(ShufIns, UseI);
           if (F == ShufIns.end())
             BadUse = true;
         }
@@ -2661,7 +2752,7 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
     auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
       return G.Out.Reg == P.LR.Reg;
     };
-    if (std::find_if(Phis.begin(), Phis.end(), LoopInpEq) == Phis.end())
+    if (llvm::find_if(Phis, LoopInpEq) == Phis.end())
       continue;
 
     G.Inp.Reg = Inputs.find_first();
@@ -2686,41 +2777,46 @@ bool HexagonLoopRescheduling::processLoop(LoopCand &C) {
     auto LoopInpEq = [G] (const PhiInfo &P) -> bool {
       return G.Out.Reg == P.LR.Reg;
     };
-    auto F = std::find_if(Phis.begin(), Phis.end(), LoopInpEq);
+    auto F = llvm::find_if(Phis, LoopInpEq);
     if (F == Phis.end())
       continue;
-    unsigned PredR = 0;
-    if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PredR)) {
-      const MachineInstr *DefPredR = MRI->getVRegDef(F->PR.Reg);
-      unsigned Opc = DefPredR->getOpcode();
+    unsigned PrehR = 0;
+    if (!isSameShuffle(G.Out.Reg, G.Inp.Reg, F->PR.Reg, PrehR)) {
+      const MachineInstr *DefPrehR = MRI->getVRegDef(F->PR.Reg);
+      unsigned Opc = DefPrehR->getOpcode();
       if (Opc != Hexagon::A2_tfrsi && Opc != Hexagon::A2_tfrpi)
         continue;
-      if (!DefPredR->getOperand(1).isImm())
+      if (!DefPrehR->getOperand(1).isImm())
         continue;
-      if (DefPredR->getOperand(1).getImm() != 0)
+      if (DefPrehR->getOperand(1).getImm() != 0)
         continue;
       const TargetRegisterClass *RC = MRI->getRegClass(G.Inp.Reg);
       if (RC != MRI->getRegClass(F->PR.Reg)) {
-        PredR = MRI->createVirtualRegister(RC);
+        PrehR = MRI->createVirtualRegister(RC);
         unsigned TfrI = (RC == &Hexagon::IntRegsRegClass) ? Hexagon::A2_tfrsi
                                                           : Hexagon::A2_tfrpi;
         auto T = C.PB->getFirstTerminator();
         DebugLoc DL = (T != C.PB->end()) ? T->getDebugLoc() : DebugLoc();
-        BuildMI(*C.PB, T, DL, HII->get(TfrI), PredR)
+        BuildMI(*C.PB, T, DL, HII->get(TfrI), PrehR)
           .addImm(0);
       } else {
-        PredR = F->PR.Reg;
+        PrehR = F->PR.Reg;
       }
     }
-    assert(MRI->getRegClass(PredR) == MRI->getRegClass(G.Inp.Reg));
-    moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PredR);
+    // isSameShuffle could match with PrehR being of a wider class than
+    // G.Inp.Reg, for example if G shuffles the low 32 bits of its input,
+    // it would match for the input being a 32-bit register, and PrehR
+    // being a 64-bit register (where the low 32 bits match). This could
+    // be handled, but for now skip these cases.
+    if (MRI->getRegClass(PrehR) != MRI->getRegClass(G.Inp.Reg))
+      continue;
+    moveGroup(G, *F->LB, *F->PB, F->LB->getFirstNonPHI(), F->DefR, PrehR);
     Changed = true;
   }
 
   return Changed;
 }
 
-
 bool HexagonLoopRescheduling::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -2783,4 +2879,3 @@ FunctionPass *llvm::createHexagonLoopRescheduling() {
 FunctionPass *llvm::createHexagonBitSimplify() {
   return new HexagonBitSimplify();
 }
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
index 78b57d27ad50..b78c4126e0b1 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBitTracker.cpp
@@ -26,7 +26,7 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
                                    MachineRegisterInfo &mri,
                                    const HexagonInstrInfo &tii,
                                    MachineFunction &mf)
-    : MachineEvaluator(tri, mri), MF(mf), MFI(*mf.getFrameInfo()), TII(tii) {
+    : MachineEvaluator(tri, mri), MF(mf), MFI(mf.getFrameInfo()), TII(tii) {
   // Populate the VRX map (VR to extension-type).
   // Go over all the formal parameters of the function. If a given parameter
   // P is sign- or zero-extended, locate the virtual register holding that
@@ -60,13 +60,15 @@ HexagonEvaluator::HexagonEvaluator(const HexagonRegisterInfo &tri,
     // Module::AnyPointerSize.
     if (Width == 0 || Width > 64)
       break;
+    AttributeSet Attrs = F.getAttributes();
+    if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal))
+      continue;
     InPhysReg = getNextPhysReg(InPhysReg, Width);
     if (!InPhysReg)
       break;
     InVirtReg = getVirtRegFor(InPhysReg);
     if (!InVirtReg)
       continue;
-    AttributeSet Attrs = F.getAttributes();
     if (Attrs.hasAttribute(AttrIdx, Attribute::SExt))
       VRX.insert(std::make_pair(InVirtReg, ExtType(ExtType::SExt, Width)));
     else if (Attrs.hasAttribute(AttrIdx, Attribute::ZExt))
@@ -82,12 +84,14 @@ BT::BitMask HexagonEvaluator::mask(unsigned Reg, unsigned Sub) const {
   const TargetRegisterClass *RC = MRI.getRegClass(Reg);
   unsigned ID = RC->getID();
   uint16_t RW = getRegBitWidth(RegisterRef(Reg, Sub));
+  auto &HRI = static_cast<const HexagonRegisterInfo&>(TRI);
+  bool IsSubLo = (Sub == HRI.getHexagonSubRegIndex(RC, Hexagon::ps_sub_lo));
   switch (ID) {
     case DoubleRegsRegClassID:
     case VecDblRegsRegClassID:
     case VecDblRegs128BRegClassID:
-      return (Sub == subreg_loreg) ? BT::BitMask(0, RW-1)
-                                   : BT::BitMask(RW, 2*RW-1);
+      return IsSubLo ? BT::BitMask(0, RW-1)
+                     : BT::BitMask(RW, 2*RW-1);
     default:
       break;
   }
@@ -138,8 +142,20 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
   if (NumDefs == 0)
     return false;
 
-  if (MI.mayLoad())
-    return evaluateLoad(MI, Inputs, Outputs);
+  using namespace Hexagon;
+  unsigned Opc = MI.getOpcode();
+
+  if (MI.mayLoad()) {
+    switch (Opc) {
+      // These instructions may be marked as mayLoad, but they are generating
+      // immediate values, so skip them.
+      case CONST32:
+      case CONST64:
+        break;
+      default:
+        return evaluateLoad(MI, Inputs, Outputs);
+    }
+  }
 
   // Check COPY instructions that copy formal parameters into virtual
   // registers. Such parameters can be sign- or zero-extended at the
@@ -174,8 +190,6 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
   }
 
   RegisterRefs Reg(MI);
-  unsigned Opc = MI.getOpcode();
-  using namespace Hexagon;
 #define op(i) MI.getOperand(i)
 #define rc(i) RegisterCell::ref(getCell(Reg[i], Inputs))
 #define im(i) MI.getOperand(i).getImm()
@@ -246,16 +260,13 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
     case A2_tfrsi:
     case A2_tfrpi:
     case CONST32:
-    case CONST32_Float_Real:
-    case CONST32_Int_Real:
-    case CONST64_Float_Real:
-    case CONST64_Int_Real:
+    case CONST64:
       return rr0(eIMM(im(1), W0), Outputs);
-    case TFR_PdFalse:
+    case PS_false:
       return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::Zero), Outputs);
-    case TFR_PdTrue:
+    case PS_true:
       return rr0(RegisterCell(W0).fill(0, W0, BT::BitValue::One), Outputs);
-    case TFR_FI: {
+    case PS_fi: {
       int FI = op(1).getIndex();
       int Off = op(2).getImm();
       unsigned A = MFI.getObjectAlignment(FI) + std::abs(Off);
@@ -670,6 +681,8 @@ bool HexagonEvaluator::evaluate(const MachineInstr &MI,
     case A4_combineir:
     case A4_combineri:
     case A2_combinew:
+    case V6_vcombine:
+    case V6_vcombine_128B:
       assert(W0 % 2 == 0);
       return rr0(cop(2, W0/2).cat(cop(1, W0/2)), Outputs);
     case A2_combine_ll:
@@ -884,17 +897,19 @@ bool HexagonEvaluator::evaluate(const MachineInstr &BI,
                                 const CellMapType &Inputs,
                                 BranchTargetList &Targets,
                                 bool &FallsThru) const {
-  // We need to evaluate one branch at a time. TII::AnalyzeBranch checks
+  // We need to evaluate one branch at a time. TII::analyzeBranch checks
   // all the branches in a basic block at once, so we cannot use it.
   unsigned Opc = BI.getOpcode();
   bool SimpleBranch = false;
   bool Negated = false;
   switch (Opc) {
     case Hexagon::J2_jumpf:
+    case Hexagon::J2_jumpfpt:
     case Hexagon::J2_jumpfnew:
     case Hexagon::J2_jumpfnewpt:
       Negated = true;
     case Hexagon::J2_jumpt:
+    case Hexagon::J2_jumptpt:
     case Hexagon::J2_jumptnew:
     case Hexagon::J2_jumptnewpt:
       // Simple branch:  if([!]Pn) jump ...
@@ -986,7 +1001,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
     case L2_loadrb_pci:
     case L2_loadrb_pcr:
     case L2_loadrb_pi:
-    case L4_loadrb_abs:
+    case PS_loadrbabs:
     case L4_loadrb_ap:
     case L4_loadrb_rr:
     case L4_loadrb_ur:
@@ -1000,7 +1015,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
     case L2_loadrub_pci:
     case L2_loadrub_pcr:
     case L2_loadrub_pi:
-    case L4_loadrub_abs:
+    case PS_loadrubabs:
     case L4_loadrub_ap:
     case L4_loadrub_rr:
     case L4_loadrub_ur:
@@ -1014,7 +1029,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
     case L2_loadrh_pci:
     case L2_loadrh_pcr:
     case L2_loadrh_pi:
-    case L4_loadrh_abs:
+    case PS_loadrhabs:
     case L4_loadrh_ap:
     case L4_loadrh_rr:
     case L4_loadrh_ur:
@@ -1029,7 +1044,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
     case L2_loadruh_pcr:
     case L2_loadruh_pi:
     case L4_loadruh_rr:
-    case L4_loadruh_abs:
+    case PS_loadruhabs:
     case L4_loadruh_ap:
     case L4_loadruh_ur:
       BitNum = 16;
@@ -1043,7 +1058,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
     case L2_loadri_pcr:
     case L2_loadri_pi:
     case L2_loadw_locked:
-    case L4_loadri_abs:
+    case PS_loadriabs:
     case L4_loadri_ap:
     case L4_loadri_rr:
     case L4_loadri_ur:
@@ -1059,7 +1074,7 @@ bool HexagonEvaluator::evaluateLoad(const MachineInstr &MI,
     case L2_loadrd_pcr:
     case L2_loadrd_pi:
     case L4_loadd_locked:
-    case L4_loadrd_abs:
+    case PS_loadrdabs:
     case L4_loadrd_ap:
     case L4_loadrd_rr:
     case L4_loadrd_ur:
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
index 5c44029dc6e7..adc213c3d438 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.cpp
@@ -12,17 +12,19 @@
 #include "HexagonBlockRanges.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
-
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include <algorithm>
+#include <cassert>
+#include <iterator>
 #include <map>
 
 using namespace llvm;
@@ -40,7 +42,6 @@ bool HexagonBlockRanges::IndexRange::overlaps(const IndexRange &A) const {
   return false;
 }
 
-
 bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const {
   if (start() <= A.start()) {
     // Treat "None" in the range end as equal to the range start.
@@ -52,7 +53,6 @@ bool HexagonBlockRanges::IndexRange::contains(const IndexRange &A) const {
   return false;
 }
 
-
 void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) {
   // Allow merging adjacent ranges.
   assert(end() == A.start() || overlaps(A));
@@ -70,14 +70,12 @@ void HexagonBlockRanges::IndexRange::merge(const IndexRange &A) {
     Fixed = true;
 }
 
-
 void HexagonBlockRanges::RangeList::include(const RangeList &RL) {
   for (auto &R : RL)
-    if (std::find(begin(), end(), R) == end())
+    if (!is_contained(*this, R))
       push_back(R);
 }
 
-
 // Merge all overlapping ranges in the list, so that all that remains
 // is a list of disjoint ranges.
 void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
@@ -101,7 +99,6 @@ void HexagonBlockRanges::RangeList::unionize(bool MergeAdjacent) {
   }
 }
 
-
 // Compute a range A-B and add it to the list.
 void HexagonBlockRanges::RangeList::addsub(const IndexRange &A,
       const IndexRange &B) {
@@ -138,7 +135,6 @@ void HexagonBlockRanges::RangeList::addsub(const IndexRange &A,
   }
 }
 
-
 // Subtract a given range from each element in the list.
 void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) {
   // Cannot assume that the list is unionized (i.e. contains only non-
@@ -156,7 +152,6 @@ void HexagonBlockRanges::RangeList::subtract(const IndexRange &Range) {
   include(T);
 }
 
-
 HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B)
     : Block(B) {
   IndexType Idx = IndexType::First;
@@ -171,13 +166,11 @@ HexagonBlockRanges::InstrIndexMap::InstrIndexMap(MachineBasicBlock &B)
   Last = B.empty() ? IndexType::None : unsigned(Idx)-1;
 }
 
-
 MachineInstr *HexagonBlockRanges::InstrIndexMap::getInstr(IndexType Idx) const {
   auto F = Map.find(Idx);
-  return (F != Map.end()) ? F->second : 0;
+  return (F != Map.end()) ? F->second : nullptr;
 }
 
-
 HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex(
       MachineInstr *MI) const {
   for (auto &I : Map)
@@ -186,7 +179,6 @@ HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getIndex(
   return IndexType::None;
 }
 
-
 HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex(
       IndexType Idx) const {
   assert (Idx != IndexType::None);
@@ -199,7 +191,6 @@ HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getPrevIndex(
   return unsigned(Idx)-1;
 }
 
-
 HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex(
       IndexType Idx) const {
   assert (Idx != IndexType::None);
@@ -210,7 +201,6 @@ HexagonBlockRanges::IndexType HexagonBlockRanges::InstrIndexMap::getNextIndex(
   return unsigned(Idx)+1;
 }
 
-
 void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI,
       MachineInstr *NewMI) {
   for (auto &I : Map) {
@@ -224,7 +214,6 @@ void HexagonBlockRanges::InstrIndexMap::replaceInstr(MachineInstr *OldMI,
   }
 }
 
-
 HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
   : MF(mf), HST(mf.getSubtarget<HexagonSubtarget>()),
     TII(*HST.getInstrInfo()), TRI(*HST.getRegisterInfo()),
@@ -239,17 +228,33 @@ HexagonBlockRanges::HexagonBlockRanges(MachineFunction &mf)
   }
 }
 
-
 HexagonBlockRanges::RegisterSet HexagonBlockRanges::getLiveIns(
-      const MachineBasicBlock &B) {
+      const MachineBasicBlock &B, const MachineRegisterInfo &MRI,
+      const TargetRegisterInfo &TRI) {
   RegisterSet LiveIns;
-  for (auto I : B.liveins())
-    if (!Reserved[I.PhysReg])
-      LiveIns.insert({I.PhysReg, 0});
+  RegisterSet Tmp;
+  for (auto I : B.liveins()) {
+    if (I.LaneMask.all()) {
+      Tmp.insert({I.PhysReg,0});
+      continue;
+    }
+    for (MCSubRegIndexIterator S(I.PhysReg, &TRI); S.isValid(); ++S) {
+      LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
+      if ((M & I.LaneMask).any())
+        Tmp.insert({S.getSubReg(), 0});
+    }
+  }
+
+  for (auto R : Tmp) {
+    if (!Reserved[R.Reg])
+      LiveIns.insert(R);
+    for (auto S : expandToSubRegs(R, MRI, TRI))
+      if (!Reserved[S.Reg])
+        LiveIns.insert(S);
+  }
   return LiveIns;
 }
 
-
 HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
       RegisterRef R, const MachineRegisterInfo &MRI,
       const TargetRegisterInfo &TRI) {
@@ -279,7 +284,6 @@ HexagonBlockRanges::RegisterSet HexagonBlockRanges::expandToSubRegs(
   return SRs;
 }
 
-
 void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
       RegToRangeMap &LiveMap) {
   std::map<RegisterRef,IndexType> LastDef, LastUse;
@@ -287,9 +291,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
   MachineBasicBlock &B = IndexMap.getBlock();
   MachineRegisterInfo &MRI = B.getParent()->getRegInfo();
 
-  for (auto R : getLiveIns(B))
-    for (auto S : expandToSubRegs(R, MRI, TRI))
-      LiveOnEntry.insert(S);
+  for (auto R : getLiveIns(B, MRI, TRI))
+    LiveOnEntry.insert(R);
 
   for (auto R : LiveOnEntry)
     LastDef[R] = IndexType::Entry;
@@ -340,9 +343,8 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
   // Collect live-on-exit.
   RegisterSet LiveOnExit;
   for (auto *SB : B.successors())
-    for (auto R : getLiveIns(*SB))
-      for (auto S : expandToSubRegs(R, MRI, TRI))
-        LiveOnExit.insert(S);
+    for (auto R : getLiveIns(*SB, MRI, TRI))
+      LiveOnExit.insert(R);
 
   for (auto R : LiveOnExit)
     LastUse[R] = IndexType::Exit;
@@ -363,18 +365,16 @@ void HexagonBlockRanges::computeInitialLiveRanges(InstrIndexMap &IndexMap,
     P.second.unionize();
 }
 
-
 HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeLiveMap(
       InstrIndexMap &IndexMap) {
   RegToRangeMap LiveMap;
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": index map\n" << IndexMap << '\n');
+  DEBUG(dbgs() << __func__ << ": index map\n" << IndexMap << '\n');
   computeInitialLiveRanges(IndexMap, LiveMap);
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": live map\n"
+  DEBUG(dbgs() << __func__ << ": live map\n"
                << PrintRangeMap(LiveMap, TRI) << '\n');
   return LiveMap;
 }
 
-
 HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
       InstrIndexMap &IndexMap, RegToRangeMap &LiveMap) {
   RegToRangeMap DeadMap;
@@ -432,7 +432,7 @@ HexagonBlockRanges::RegToRangeMap HexagonBlockRanges::computeDeadMap(
     if (TargetRegisterInfo::isVirtualRegister(P.first.Reg))
       addDeadRanges(P.first);
 
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": dead map\n"
+  DEBUG(dbgs() << __func__ << ": dead map\n"
                << PrintRangeMap(DeadMap, TRI) << '\n');
   return DeadMap;
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
index 9c3f938f99eb..717480314d16 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBlockRanges.h
@@ -1,4 +1,4 @@
-//===--- HexagonBlockRanges.h ---------------------------------------------===//
+//===--- HexagonBlockRanges.h -----------------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -11,23 +11,21 @@
 
 #include "llvm/ADT/BitVector.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/MC/MCRegisterInfo.h"  // For MCPhysReg.
+#include <cassert>
 #include <map>
 #include <set>
 #include <vector>
+#include <utility>
 
 namespace llvm {
-  class Function;
-  class HexagonSubtarget;
-  class MachineBasicBlock;
-  class MachineFunction;
-  class MachineInstr;
-  class MCInstrDesc;
-  class raw_ostream;
-  class TargetInstrInfo;
-  class TargetRegisterClass;
-  class TargetRegisterInfo;
-  class Type;
+
+class HexagonSubtarget;
+class MachineBasicBlock;
+class MachineFunction;
+class MachineInstr;
+class raw_ostream;
+class TargetInstrInfo;
+class TargetRegisterInfo;
 
 struct HexagonBlockRanges {
   HexagonBlockRanges(MachineFunction &MF);
@@ -50,10 +48,12 @@ struct HexagonBlockRanges {
       Exit  = 2,
       First = 11  // 10th + 1st
     };
-    static bool isInstr(IndexType X) { return X.Index >= First; }
 
     IndexType() : Index(None) {}
     IndexType(unsigned Idx) : Index(Idx) {}
+
+    static bool isInstr(IndexType X) { return X.Index >= First; }
+
     operator unsigned() const;
     bool operator== (unsigned x) const;
     bool operator== (IndexType Idx) const;
@@ -76,21 +76,23 @@ struct HexagonBlockRanges {
   // register is dead.
   class IndexRange : public std::pair<IndexType,IndexType> {
   public:
-    IndexRange() : Fixed(false), TiedEnd(false) {}
+    IndexRange() = default;
     IndexRange(IndexType Start, IndexType End, bool F = false, bool T = false)
       : std::pair<IndexType,IndexType>(Start, End), Fixed(F), TiedEnd(T) {}
+
     IndexType start() const { return first; }
     IndexType end() const   { return second; }
 
     bool operator< (const IndexRange &A) const {
       return start() < A.start();
     }
+
     bool overlaps(const IndexRange &A) const;
     bool contains(const IndexRange &A) const;
     void merge(const IndexRange &A);
 
-    bool Fixed;      // Can be renamed?  "Fixed" means "no".
-    bool TiedEnd;    // The end is not a use, but a dead def tied to a use.
+    bool Fixed = false;   // Can be renamed?  "Fixed" means "no".
+    bool TiedEnd = false; // The end is not a use, but a dead def tied to a use.
 
   private:
     void setStart(const IndexType &S) { first = S; }
@@ -107,6 +109,7 @@ struct HexagonBlockRanges {
     void add(const IndexRange &Range) {
       push_back(Range);
     }
+
     void include(const RangeList &RL);
     void unionize(bool MergeAdjacent = false);
     void subtract(const IndexRange &Range);
@@ -118,6 +121,7 @@ struct HexagonBlockRanges {
   class InstrIndexMap {
   public:
     InstrIndexMap(MachineBasicBlock &B);
+
     MachineInstr *getInstr(IndexType Idx) const;
     IndexType getIndex(MachineInstr *MI) const;
     MachineBasicBlock &getBlock() const { return Block; }
@@ -126,6 +130,7 @@ struct HexagonBlockRanges {
     void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI);
 
     friend raw_ostream &operator<< (raw_ostream &OS, const InstrIndexMap &Map);
+
     IndexType First, Last;
 
   private:
@@ -144,13 +149,15 @@ struct HexagonBlockRanges {
         : Map(M), TRI(I) {}
 
     friend raw_ostream &operator<< (raw_ostream &OS, const PrintRangeMap &P);
+
   private:
     const RegToRangeMap &Map;
     const TargetRegisterInfo &TRI;
   };
 
 private:
-  RegisterSet getLiveIns(const MachineBasicBlock &B);
+  RegisterSet getLiveIns(const MachineBasicBlock &B,
+      const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI);
 
   void computeInitialLiveRanges(InstrIndexMap &IndexMap,
       RegToRangeMap &LiveMap);
@@ -162,7 +169,6 @@ private:
   BitVector Reserved;
 };
 
-
 inline HexagonBlockRanges::IndexType::operator unsigned() const {
   assert(Index >= First);
   return Index;
@@ -223,7 +229,6 @@ inline bool HexagonBlockRanges::IndexType::operator<= (IndexType Idx) const {
   return operator==(Idx) || operator<(Idx);
 }
 
-
 raw_ostream &operator<< (raw_ostream &OS, HexagonBlockRanges::IndexType Idx);
 raw_ostream &operator<< (raw_ostream &OS,
       const HexagonBlockRanges::IndexRange &IR);
@@ -234,6 +239,6 @@ raw_ostream &operator<< (raw_ostream &OS,
 raw_ostream &operator<< (raw_ostream &OS,
       const HexagonBlockRanges::PrintRangeMap &P);
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // HEXAGON_BLOCK_RANGES_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
index f042baf1ef05..84af4b14b9f7 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonBranchRelaxation.cpp
@@ -12,15 +12,23 @@
 #include "Hexagon.h"
 #include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/PassSupport.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
 
 using namespace llvm;
 
@@ -30,21 +38,25 @@ static cl::opt<uint32_t> BranchRelaxSafetyBuffer("branch-relax-safety-buffer",
   cl::init(200), cl::Hidden, cl::ZeroOrMore, cl::desc("safety buffer size"));
 
 namespace llvm {
+
   FunctionPass *createHexagonBranchRelaxation();
   void initializeHexagonBranchRelaxationPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
+
   struct HexagonBranchRelaxation : public MachineFunctionPass {
   public:
     static char ID;
+
     HexagonBranchRelaxation() : MachineFunctionPass(ID) {
       initializeHexagonBranchRelaxationPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Hexagon Branch Relaxation";
     }
 
@@ -67,6 +79,7 @@ namespace {
   };
 
   char HexagonBranchRelaxation::ID = 0;
+
 } // end anonymous namespace
 
 INITIALIZE_PASS(HexagonBranchRelaxation, "hexagon-brelax",
@@ -76,7 +89,6 @@ FunctionPass *llvm::createHexagonBranchRelaxation() {
   return new HexagonBranchRelaxation();
 }
 
-
 bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "****** Hexagon Branch Relaxation ******\n");
 
@@ -89,7 +101,6 @@ bool HexagonBranchRelaxation::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-
 void HexagonBranchRelaxation::computeOffset(MachineFunction &MF,
       DenseMap<MachineBasicBlock*, unsigned> &OffsetMap) {
   // offset of the current instruction from the start.
@@ -104,11 +115,10 @@ void HexagonBranchRelaxation::computeOffset(MachineFunction &MF,
     }
     OffsetMap[&B] = InstOffset;
     for (auto &MI : B.instrs())
-      InstOffset += HII->getSize(&MI);
+      InstOffset += HII->getSize(MI);
   }
 }
 
-
 /// relaxBranches - For Hexagon, if the jump target/loop label is too far from
 /// the jump/loop instruction then, we need to make sure that we have constant
 /// extenders set for jumps and loops.
@@ -124,7 +134,6 @@ bool HexagonBranchRelaxation::relaxBranches(MachineFunction &MF) {
   return reGenerateBranch(MF, BlockToInstOffset);
 }
 
-
 /// Check if a given instruction is:
 /// - a jump to a distant target
 /// - that exceeds its immediate range
@@ -144,7 +153,7 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI,
   // Number of instructions times typical instruction size.
   InstOffset += HII->nonDbgBBSize(&B) * HEXAGON_INSTR_SIZE;
 
-  MachineBasicBlock *TBB = NULL, *FBB = NULL;
+  MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
   SmallVector<MachineOperand, 4> Cond;
 
   // Try to analyze this branch.
@@ -152,13 +161,13 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI,
     // Could not analyze it. See if this is something we can recognize.
     // If it is a NVJ, it should always have its target in
     // a fixed location.
-    if (HII->isNewValueJump(&*FirstTerm))
-      TBB = FirstTerm->getOperand(HII->getCExtOpNum(&*FirstTerm)).getMBB();
+    if (HII->isNewValueJump(*FirstTerm))
+      TBB = FirstTerm->getOperand(HII->getCExtOpNum(*FirstTerm)).getMBB();
   }
   if (TBB && &MI == &*FirstTerm) {
     Distance = std::abs((long long)InstOffset - BlockToInstOffset[TBB])
                 + BranchRelaxSafetyBuffer;
-    return !HII->isJumpWithinBranchRange(&*FirstTerm, Distance);
+    return !HII->isJumpWithinBranchRange(*FirstTerm, Distance);
   }
   if (FBB) {
     // Look for second terminator.
@@ -171,12 +180,11 @@ bool HexagonBranchRelaxation::isJumpOutOfRange(MachineInstr &MI,
     // Analyze the second branch in the BB.
     Distance = std::abs((long long)InstOffset - BlockToInstOffset[FBB])
                 + BranchRelaxSafetyBuffer;
-    return !HII->isJumpWithinBranchRange(&*SecondTerm, Distance);
+    return !HII->isJumpWithinBranchRange(*SecondTerm, Distance);
   }
   return false;
 }
 
-
 bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF,
       DenseMap<MachineBasicBlock*, unsigned> &BlockToInstOffset) {
   bool Changed = false;
@@ -186,16 +194,16 @@ bool HexagonBranchRelaxation::reGenerateBranch(MachineFunction &MF,
       if (!MI.isBranch() || !isJumpOutOfRange(MI, BlockToInstOffset))
         continue;
       DEBUG(dbgs() << "Long distance jump. isExtendable("
-                   << HII->isExtendable(&MI) << ") isConstExtended("
-                   << HII->isConstExtended(&MI) << ") " << MI);
+                   << HII->isExtendable(MI) << ") isConstExtended("
+                   << HII->isConstExtended(MI) << ") " << MI);
 
       // Since we have not merged HW loops relaxation into
       // this code (yet), soften our approach for the moment.
-      if (!HII->isExtendable(&MI) && !HII->isExtended(&MI)) {
+      if (!HII->isExtendable(MI) && !HII->isExtended(MI)) {
         DEBUG(dbgs() << "\tUnderimplemented relax branch instruction.\n");
       } else {
         // Find which operand is expandable.
-        int ExtOpNum = HII->getCExtOpNum(&MI);
+        int ExtOpNum = HII->getCExtOpNum(MI);
         MachineOperand &MO = MI.getOperand(ExtOpNum);
         // This need to be something we understand. So far we assume all
         // branches have only MBB address as expandable field.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
index 559bdfb16a6f..2f8fe6e087f5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCFGOptimizer.cpp
@@ -45,13 +45,11 @@ public:
     initializeHexagonCFGOptimizerPass(*PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override {
-    return "Hexagon CFG Optimizer";
-  }
+  StringRef getPassName() const override { return "Hexagon CFG Optimizer"; }
   bool runOnMachineFunction(MachineFunction &Fn) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 };
 
@@ -59,8 +57,18 @@ public:
 char HexagonCFGOptimizer::ID = 0;
 
 static bool IsConditionalBranch(int Opc) {
-  return (Opc == Hexagon::J2_jumpt) || (Opc == Hexagon::J2_jumpf)
-    || (Opc == Hexagon::J2_jumptnewpt) || (Opc == Hexagon::J2_jumpfnewpt);
+  switch (Opc) {
+    case Hexagon::J2_jumpt:
+    case Hexagon::J2_jumptpt:
+    case Hexagon::J2_jumpf:
+    case Hexagon::J2_jumpfpt:
+    case Hexagon::J2_jumptnew:
+    case Hexagon::J2_jumpfnew:
+    case Hexagon::J2_jumptnewpt:
+    case Hexagon::J2_jumpfnewpt:
+      return true;
+  }
+  return false;
 }
 
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
index b612b11aed50..489da6be923d 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCommonGEP.cpp
@@ -9,30 +9,43 @@
 
 #define DEBUG_TYPE "commgep"
 
-#include "llvm/Pass.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/FoldingSet.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/PostDominators.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
-
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
 #include <map>
 #include <set>
+#include <utility>
 #include <vector>
 
-#include "HexagonTargetMachine.h"
-
 using namespace llvm;
 
 static cl::opt<bool> OptSpeculate("commgep-speculate", cl::init(true),
@@ -45,10 +58,13 @@ static cl::opt<bool> OptEnableConst("commgep-const", cl::init(true),
   cl::Hidden, cl::ZeroOrMore);
 
 namespace llvm {
+
   void initializeHexagonCommonGEPPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
+
   struct GepNode;
   typedef std::set<GepNode*> NodeSet;
   typedef std::map<GepNode*,Value*> NodeToValueMap;
@@ -60,7 +76,7 @@ namespace {
   // Numbering map for gep nodes. Used to keep track of ordering for
   // gep nodes.
   struct NodeOrdering {
-    NodeOrdering() : LastNum(0) {}
+    NodeOrdering() = default;
 
     void insert(const GepNode *N) { Map.insert(std::make_pair(N, ++LastNum)); }
     void clear() { Map.clear(); }
@@ -73,21 +89,21 @@ namespace {
 
   private:
     std::map<const GepNode *, unsigned> Map;
-    unsigned LastNum;
+    unsigned LastNum = 0;
   };
 
   class HexagonCommonGEP : public FunctionPass {
   public:
     static char ID;
+
     HexagonCommonGEP() : FunctionPass(ID) {
       initializeHexagonCommonGEPPass(*PassRegistry::getPassRegistry());
     }
-    virtual bool runOnFunction(Function &F);
-    virtual const char *getPassName() const {
-      return "Hexagon Common GEP";
-    }
 
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+    bool runOnFunction(Function &F) override;
+    StringRef getPassName() const override { return "Hexagon Common GEP"; }
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
       AU.addRequired<PostDominatorTreeWrapperPass>();
@@ -140,8 +156,8 @@ namespace {
     PostDominatorTree *PDT;
     Function *Fn;
   };
-}
 
+} // end anonymous namespace
 
 char HexagonCommonGEP::ID = 0;
 INITIALIZE_PASS_BEGIN(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
@@ -153,6 +169,7 @@ INITIALIZE_PASS_END(HexagonCommonGEP, "hcommgep", "Hexagon Common GEP",
       false, false)
 
 namespace {
+
   struct GepNode {
     enum {
       None      = 0,
@@ -169,18 +186,20 @@ namespace {
     Value *Idx;
     Type *PTy;  // Type of the pointer operand.
 
-    GepNode() : Flags(0), Parent(0), Idx(0), PTy(0) {}
+    GepNode() : Flags(0), Parent(nullptr), Idx(nullptr), PTy(nullptr) {}
     GepNode(const GepNode *N) : Flags(N->Flags), Idx(N->Idx), PTy(N->PTy) {
       if (Flags & Root)
         BaseVal = N->BaseVal;
       else
         Parent = N->Parent;
     }
+
     friend raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN);
   };
 
-
   Type *next_type(Type *Ty, Value *Idx) {
+    if (auto *PTy = dyn_cast<PointerType>(Ty))
+      return PTy->getElementType();
     // Advance the type.
     if (!Ty->isStructTy()) {
       Type *NexTy = cast<SequentialType>(Ty)->getElementType();
@@ -194,7 +213,6 @@ namespace {
     return NextTy;
   }
 
-
   raw_ostream &operator<< (raw_ostream &OS, const GepNode &GN) {
     OS << "{ {";
     bool Comma = false;
@@ -241,7 +259,6 @@ namespace {
     return OS;
   }
 
-
   template <typename NodeContainer>
   void dump_node_container(raw_ostream &OS, const NodeContainer &S) {
     typedef typename NodeContainer::const_iterator const_iterator;
@@ -256,7 +273,6 @@ namespace {
     return OS;
   }
 
-
   raw_ostream &operator<< (raw_ostream &OS,
                            const NodeToUsesMap &M) LLVM_ATTRIBUTE_UNUSED;
   raw_ostream &operator<< (raw_ostream &OS, const NodeToUsesMap &M){
@@ -276,23 +292,22 @@ namespace {
     return OS;
   }
 
-
   struct in_set {
     in_set(const NodeSet &S) : NS(S) {}
     bool operator() (GepNode *N) const {
       return NS.find(N) != NS.end();
     }
+
   private:
     const NodeSet &NS;
   };
-}
 
+} // end anonymous namespace
 
 inline void *operator new(size_t, SpecificBumpPtrAllocator<GepNode> &A) {
   return A.Allocate();
 }
 
-
 void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
       ValueVect &Order) {
   // Compute block ordering for a typical DT-based traversal of the flow
@@ -307,7 +322,6 @@ void HexagonCommonGEP::getBlockTraversalOrder(BasicBlock *Root,
     getBlockTraversalOrder((*I)->getBlock(), Order);
 }
 
-
 bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
   // No vector GEPs.
   if (!GepI->getType()->isPointerTy())
@@ -318,7 +332,6 @@ bool HexagonCommonGEP::isHandledGepForm(GetElementPtrInst *GepI) {
   return true;
 }
 
-
 void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
       ValueToNodeMap &NM) {
   DEBUG(dbgs() << "Visiting GEP: " << *GepI << '\n');
@@ -384,7 +397,6 @@ void HexagonCommonGEP::processGepInst(GetElementPtrInst *GepI,
   NM.insert(std::make_pair(GepI, PN));
 }
 
-
 void HexagonCommonGEP::collect() {
   // Establish depth-first traversal order of the dominator tree.
   ValueVect BO;
@@ -408,10 +420,8 @@ void HexagonCommonGEP::collect() {
   DEBUG(dbgs() << "Gep nodes after initial collection:\n" << Nodes);
 }
 
-
-namespace {
-  void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
-        NodeVect &Roots) {
+static void invert_find_roots(const NodeVect &Nodes, NodeChildrenMap &NCM,
+                              NodeVect &Roots) {
     typedef NodeVect::const_iterator const_iterator;
     for (const_iterator I = Nodes.begin(), E = Nodes.end(); I != E; ++I) {
       GepNode *N = *I;
@@ -422,9 +432,10 @@ namespace {
       GepNode *PN = N->Parent;
       NCM[PN].push_back(N);
     }
-  }
+}
 
-  void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM, NodeSet &Nodes) {
+static void nodes_for_root(GepNode *Root, NodeChildrenMap &NCM,
+                           NodeSet &Nodes) {
     NodeVect Work;
     Work.push_back(Root);
     Nodes.insert(Root);
@@ -439,41 +450,43 @@ namespace {
         Nodes.insert(CF->second.begin(), CF->second.end());
       }
     }
-  }
 }
 
-
 namespace {
+
   typedef std::set<NodeSet> NodeSymRel;
   typedef std::pair<GepNode*,GepNode*> NodePair;
   typedef std::set<NodePair> NodePairSet;
 
-  const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
+} // end anonymous namespace
+
+static const NodeSet *node_class(GepNode *N, NodeSymRel &Rel) {
     for (NodeSymRel::iterator I = Rel.begin(), E = Rel.end(); I != E; ++I)
       if (I->count(N))
         return &*I;
-    return 0;
-  }
+    return nullptr;
+}
 
   // Create an ordered pair of GepNode pointers. The pair will be used in
   // determining equality. The only purpose of the ordering is to eliminate
   // duplication due to the commutativity of equality/non-equality.
-  NodePair node_pair(GepNode *N1, GepNode *N2) {
+static NodePair node_pair(GepNode *N1, GepNode *N2) {
     uintptr_t P1 = uintptr_t(N1), P2 = uintptr_t(N2);
     if (P1 <= P2)
       return std::make_pair(N1, N2);
     return std::make_pair(N2, N1);
-  }
+}
 
-  unsigned node_hash(GepNode *N) {
+static unsigned node_hash(GepNode *N) {
     // Include everything except flags and parent.
     FoldingSetNodeID ID;
     ID.AddPointer(N->Idx);
     ID.AddPointer(N->PTy);
     return ID.ComputeHash();
-  }
+}
 
-  bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq, NodePairSet &Ne) {
+static bool node_eq(GepNode *N1, GepNode *N2, NodePairSet &Eq,
+                    NodePairSet &Ne) {
     // Don't cache the result for nodes with different hashes. The hash
     // comparison is fast enough.
     if (node_hash(N1) != node_hash(N2))
@@ -505,10 +518,8 @@ namespace {
       return true;
     }
     return false;
-  }
 }
 
-
 void HexagonCommonGEP::common() {
   // The essence of this commoning is finding gep nodes that are equal.
   // To do this we need to compare all pairs of nodes. To save time,
@@ -572,7 +583,6 @@ void HexagonCommonGEP::common() {
     }
   });
 
-
   // Create a projection from a NodeSet to the minimal element in it.
   typedef std::map<const NodeSet*,GepNode*> ProjMap;
   ProjMap PM;
@@ -639,17 +649,14 @@ void HexagonCommonGEP::common() {
     // Node for removal.
     Erase.insert(*I);
   }
-  NodeVect::iterator NewE = std::remove_if(Nodes.begin(), Nodes.end(),
-                                           in_set(Erase));
+  NodeVect::iterator NewE = remove_if(Nodes, in_set(Erase));
   Nodes.resize(std::distance(Nodes.begin(), NewE));
 
   DEBUG(dbgs() << "Gep nodes after post-commoning cleanup:\n" << Nodes);
 }
 
-
-namespace {
-  template <typename T>
-  BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
+template <typename T>
+static BasicBlock *nearest_common_dominator(DominatorTree *DT, T &Blocks) {
     DEBUG({
       dbgs() << "NCD of {";
       for (typename T::iterator I = Blocks.begin(), E = Blocks.end();
@@ -662,23 +669,23 @@ namespace {
       dbgs() << " }\n";
     });
 
-    // Allow null basic blocks in Blocks.  In such cases, return 0.
+    // Allow null basic blocks in Blocks.  In such cases, return nullptr.
     typename T::iterator I = Blocks.begin(), E = Blocks.end();
     if (I == E || !*I)
-      return 0;
+      return nullptr;
     BasicBlock *Dom = cast<BasicBlock>(*I);
     while (++I != E) {
       BasicBlock *B = cast_or_null<BasicBlock>(*I);
-      Dom = B ? DT->findNearestCommonDominator(Dom, B) : 0;
+      Dom = B ? DT->findNearestCommonDominator(Dom, B) : nullptr;
       if (!Dom)
-        return 0;
+        return nullptr;
     }
     DEBUG(dbgs() << "computed:" << Dom->getName() << '\n');
     return Dom;
-  }
+}
 
-  template <typename T>
-  BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) {
+template <typename T>
+static BasicBlock *nearest_common_dominatee(DominatorTree *DT, T &Blocks) {
     // If two blocks, A and B, dominate a block C, then A dominates B,
     // or B dominates A.
     typename T::iterator I = Blocks.begin(), E = Blocks.end();
@@ -695,16 +702,16 @@ namespace {
       if (DT->dominates(B, DomB))
         continue;
       if (!DT->dominates(DomB, B))
-        return 0;
+        return nullptr;
       DomB = B;
     }
     return DomB;
-  }
+}
 
-  // Find the first use in B of any value from Values. If no such use,
-  // return B->end().
-  template <typename T>
-  BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) {
+// Find the first use in B of any value from Values. If no such use,
+// return B->end().
+template <typename T>
+static BasicBlock::iterator first_use_of_in_block(T &Values, BasicBlock *B) {
     BasicBlock::iterator FirstUse = B->end(), BEnd = B->end();
     typedef typename T::iterator iterator;
     for (iterator I = Values.begin(), E = Values.end(); I != E; ++I) {
@@ -726,20 +733,18 @@ namespace {
         FirstUse = It;
     }
     return FirstUse;
-  }
+}
 
-  bool is_empty(const BasicBlock *B) {
+static bool is_empty(const BasicBlock *B) {
     return B->empty() || (&*B->begin() == B->getTerminator());
-  }
 }
 
-
 BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
       NodeChildrenMap &NCM, NodeToValueMap &Loc) {
   DEBUG(dbgs() << "Loc for node:" << Node << '\n');
   // Recalculate the placement for Node, assuming that the locations of
   // its children in Loc are valid.
-  // Return 0 if there is no valid placement for Node (for example, it
+  // Return nullptr if there is no valid placement for Node (for example, it
   // uses an index value that is not available at the location required
   // to dominate all children, etc.).
 
@@ -782,11 +787,11 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
 
   BasicBlock *DomB = nearest_common_dominator(DT, Bs);
   if (!DomB)
-    return 0;
+    return nullptr;
   // Check if the index used by Node dominates the computed dominator.
   Instruction *IdxI = dyn_cast<Instruction>(Node->Idx);
   if (IdxI && !DT->dominates(IdxI->getParent(), DomB))
-    return 0;
+    return nullptr;
 
   // Avoid putting nodes into empty blocks.
   while (is_empty(DomB)) {
@@ -801,7 +806,6 @@ BasicBlock *HexagonCommonGEP::recalculatePlacement(GepNode *Node,
   return DomB;
 }
 
-
 BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
       NodeChildrenMap &NCM, NodeToValueMap &Loc) {
   DEBUG(dbgs() << "LocRec begin for node:" << Node << '\n');
@@ -818,7 +822,6 @@ BasicBlock *HexagonCommonGEP::recalculatePlacementRec(GepNode *Node,
   return LB;
 }
 
-
 bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) {
   if (isa<Constant>(Val) || isa<Argument>(Val))
     return true;
@@ -829,7 +832,6 @@ bool HexagonCommonGEP::isInvariantIn(Value *Val, Loop *L) {
   return DT->properlyDominates(DefB, HdrB);
 }
 
-
 bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) {
   if (Node->Flags & GepNode::Root)
     if (!isInvariantIn(Node->BaseVal, L))
@@ -837,7 +839,6 @@ bool HexagonCommonGEP::isInvariantIn(GepNode *Node, Loop *L) {
   return isInvariantIn(Node->Idx, L);
 }
 
-
 bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) {
   BasicBlock *HB = L->getHeader();
   BasicBlock *LB = L->getLoopLatch();
@@ -849,21 +850,17 @@ bool HexagonCommonGEP::isInMainPath(BasicBlock *B, Loop *L) {
   return false;
 }
 
-
-namespace {
-  BasicBlock *preheader(DominatorTree *DT, Loop *L) {
-    if (BasicBlock *PH = L->getLoopPreheader())
-      return PH;
-    if (!OptSpeculate)
-      return 0;
-    DomTreeNode *DN = DT->getNode(L->getHeader());
-    if (!DN)
-      return 0;
-    return DN->getIDom()->getBlock();
-  }
+static BasicBlock *preheader(DominatorTree *DT, Loop *L) {
+  if (BasicBlock *PH = L->getLoopPreheader())
+    return PH;
+  if (!OptSpeculate)
+    return nullptr;
+  DomTreeNode *DN = DT->getNode(L->getHeader());
+  if (!DN)
+    return nullptr;
+  return DN->getIDom()->getBlock();
 }
 
-
 BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
       NodeChildrenMap &NCM, NodeToValueMap &Loc) {
   // Find the "topmost" location for Node: it must be dominated by both,
@@ -913,10 +910,11 @@ BasicBlock *HexagonCommonGEP::adjustForInvariance(GepNode *Node,
   return LocB;
 }
 
-
 namespace {
+
   struct LocationAsBlock {
     LocationAsBlock(const NodeToValueMap &L) : Map(L) {}
+
     const NodeToValueMap &Map;
   };
 
@@ -936,8 +934,8 @@ namespace {
   inline bool is_constant(GepNode *N) {
     return isa<ConstantInt>(N->Idx);
   }
-}
 
+} // end anonymous namespace
 
 void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
       NodeToValueMap &Loc) {
@@ -947,7 +945,7 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
   BasicBlock *PB = cast<Instruction>(R)->getParent();
 
   GepNode *N = Node;
-  GepNode *C = 0, *NewNode = 0;
+  GepNode *C = nullptr, *NewNode = nullptr;
   while (is_constant(N) && !(N->Flags & GepNode::Root)) {
     // XXX if (single-use) dont-replicate;
     GepNode *NewN = new (*Mem) GepNode(N);
@@ -991,7 +989,6 @@ void HexagonCommonGEP::separateChainForNode(GepNode *Node, Use *U,
   Uses[NewNode] = NewUs;
 }
 
-
 void HexagonCommonGEP::separateConstantChains(GepNode *Node,
       NodeChildrenMap &NCM, NodeToValueMap &Loc) {
   // First approximation: extract all chains.
@@ -1045,7 +1042,6 @@ void HexagonCommonGEP::separateConstantChains(GepNode *Node,
   }
 }
 
-
 void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
   // Compute the inverse of the Node.Parent links. Also, collect the set
   // of root nodes.
@@ -1080,7 +1076,6 @@ void HexagonCommonGEP::computeNodePlacement(NodeToValueMap &Loc) {
   DEBUG(dbgs() << "Final node placement:\n" << LocationAsBlock(Loc));
 }
 
-
 Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
       BasicBlock *LocB) {
   DEBUG(dbgs() << "Fabricating GEP in " << LocB->getName()
@@ -1089,7 +1084,7 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
   GepNode *RN = NA[0];
   assert((RN->Flags & GepNode::Root) && "Creating GEP for non-root");
 
-  Value *NewInst = 0;
+  Value *NewInst = nullptr;
   Value *Input = RN->BaseVal;
   Value **IdxList = new Value*[Num+1];
   unsigned nax = 0;
@@ -1128,7 +1123,6 @@ Value *HexagonCommonGEP::fabricateGEP(NodeVect &NA, BasicBlock::iterator At,
   return NewInst;
 }
 
-
 void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
       NodeChildrenMap &NCM) {
   NodeVect Work;
@@ -1153,7 +1147,6 @@ void HexagonCommonGEP::getAllUsersForNode(GepNode *Node, ValueVect &Values,
   }
 }
 
-
 void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
   DEBUG(dbgs() << "Nodes before materialization:\n" << Nodes << '\n');
   NodeChildrenMap NCM;
@@ -1192,7 +1185,7 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
         break;
       GepNode *Child = CF->second.front();
       BasicBlock *ChildB = cast_or_null<BasicBlock>(Loc[Child]);
-      if (ChildB != 0 && LastB != ChildB)
+      if (ChildB != nullptr && LastB != ChildB)
         break;
       Last = Child;
     } while (true);
@@ -1236,7 +1229,6 @@ void HexagonCommonGEP::materialize(NodeToValueMap &Loc) {
   }
 }
 
-
 void HexagonCommonGEP::removeDeadCode() {
   ValueVect BO;
   BO.push_back(&Fn->front());
@@ -1265,7 +1257,6 @@ void HexagonCommonGEP::removeDeadCode() {
   }
 }
 
-
 bool HexagonCommonGEP::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
@@ -1304,9 +1295,10 @@ bool HexagonCommonGEP::runOnFunction(Function &F) {
   return true;
 }
 
-
 namespace llvm {
+
   FunctionPass *createHexagonCommonGEP() {
     return new HexagonCommonGEP();
   }
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
new file mode 100644
index 000000000000..783b916e04b0
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonConstPropagation.cpp
@@ -0,0 +1,3149 @@
+//===--- HexagonConstPropagation.cpp --------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hcp"
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <iterator>
+#include <map>
+#include <queue>
+#include <set>
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+  // Properties of a value that are tracked by the propagation.
+  // A property that is marked as present (i.e. bit is set) dentes that the
+  // value is known (proven) to have this property. Not all combinations
+  // of bits make sense, for example Zero and NonZero are mutually exclusive,
+  // but on the other hand, Zero implies Finite. In this case, whenever
+  // the Zero property is present, Finite should also be present.
+  class ConstantProperties {
+  public:
+    enum {
+      Unknown   = 0x0000,
+      Zero      = 0x0001,
+      NonZero   = 0x0002,
+      Finite    = 0x0004,
+      Infinity  = 0x0008,
+      NaN       = 0x0010,
+      SignedZero = 0x0020,
+      NumericProperties = (Zero|NonZero|Finite|Infinity|NaN|SignedZero),
+      PosOrZero       = 0x0100,
+      NegOrZero       = 0x0200,
+      SignProperties  = (PosOrZero|NegOrZero),
+      Everything      = (NumericProperties|SignProperties)
+    };
+
+    // For a given constant, deduce the set of trackable properties that this
+    // constant has.
+    static uint32_t deduce(const Constant *C);
+  };
+
+  // A representation of a register as it can appear in a MachineOperand,
+  // i.e. a pair register:subregister.
+  struct Register {
+    unsigned Reg, SubReg;
+
+    explicit Register(unsigned R, unsigned SR = 0) : Reg(R), SubReg(SR) {}
+    explicit Register(const MachineOperand &MO)
+      : Reg(MO.getReg()), SubReg(MO.getSubReg()) {}
+
+    void print(const TargetRegisterInfo *TRI = nullptr) const {
+      dbgs() << PrintReg(Reg, TRI, SubReg);
+    }
+
+    bool operator== (const Register &R) const {
+      return (Reg == R.Reg) && (SubReg == R.SubReg);
+    }
+  };
+
+  // Lattice cell, based on that was described in the W-Z paper on constant
+  // propagation.
+  // Latice cell will be allowed to hold multiple constant values. While
+  // multiple values would normally indicate "bottom", we can still derive
+  // some useful information from them. For example, comparison X > 0
+  // could be folded if all the values in the cell associated with X are
+  // positive.
+  class LatticeCell {
+  private:
+    enum { Normal, Top, Bottom };
+
+    static const unsigned MaxCellSize = 4;
+
+    unsigned Kind:2;
+    unsigned Size:3;
+    unsigned IsSpecial:1;
+    unsigned :0;
+
+  public:
+    union {
+      uint32_t Properties;
+      const Constant *Value;
+      const Constant *Values[MaxCellSize];
+    };
+
+    LatticeCell() : Kind(Top), Size(0), IsSpecial(false) {
+      for (unsigned i = 0; i < MaxCellSize; ++i)
+        Values[i] = nullptr;
+    }
+
+    bool meet(const LatticeCell &L);
+    bool add(const Constant *C);
+    bool add(uint32_t Property);
+    uint32_t properties() const;
+    unsigned size() const { return Size; }
+
+    LatticeCell &operator= (const LatticeCell &L) {
+      if (this != &L) {
+        // This memcpy also copies Properties (when L.Size == 0).
+        uint32_t N = L.IsSpecial ? sizeof L.Properties
+                                 : L.Size*sizeof(const Constant*);
+        memcpy(Values, L.Values, N);
+        Kind = L.Kind;
+        Size = L.Size;
+        IsSpecial = L.IsSpecial;
+      }
+      return *this;
+    }
+
+    bool isSingle() const { return size() == 1; }
+    bool isProperty() const { return IsSpecial; }
+    bool isTop() const { return Kind == Top; }
+    bool isBottom() const { return Kind == Bottom; }
+
+    bool setBottom() {
+      bool Changed = (Kind != Bottom);
+      Kind = Bottom;
+      Size = 0;
+      IsSpecial = false;
+      return Changed;
+    }
+
+    void print(raw_ostream &os) const;
+
+  private:
+    void setProperty() {
+      IsSpecial = true;
+      Size = 0;
+      Kind = Normal;
+    }
+
+    bool convertToProperty();
+  };
+
+  raw_ostream &operator<< (raw_ostream &os, const LatticeCell &L) {
+    L.print(os);
+    return os;
+  }
+
+  class MachineConstEvaluator;
+
+  class MachineConstPropagator {
+  public:
+    MachineConstPropagator(MachineConstEvaluator &E) : MCE(E) {
+      Bottom.setBottom();
+    }
+
+    // Mapping: vreg -> cell
+    // The keys are registers _without_ subregisters. This won't allow
+    // definitions in the form of "vreg:subreg<def> = ...". Such definitions
+    // would be questionable from the point of view of SSA, since the "vreg"
+    // could not be initialized in its entirety (specifically, an instruction
+    // defining the "other part" of "vreg" would also count as a definition
+    // of "vreg", which would violate the SSA).
+    // If a value of a pair vreg:subreg needs to be obtained, the cell for
+    // "vreg" needs to be looked up, and then the value of subregister "subreg"
+    // needs to be evaluated.
+    class CellMap {
+    public:
+      CellMap() {
+        assert(Top.isTop());
+        Bottom.setBottom();
+      }
+
+      void clear() { Map.clear(); }
+
+      bool has(unsigned R) const {
+        // All non-virtual registers are considered "bottom".
+        if (!TargetRegisterInfo::isVirtualRegister(R))
+          return true;
+        MapType::const_iterator F = Map.find(R);
+        return F != Map.end();
+      }
+
+      const LatticeCell &get(unsigned R) const {
+        if (!TargetRegisterInfo::isVirtualRegister(R))
+          return Bottom;
+        MapType::const_iterator F = Map.find(R);
+        if (F != Map.end())
+          return F->second;
+        return Top;
+      }
+
+      // Invalidates any const references.
+      void update(unsigned R, const LatticeCell &L) {
+        Map[R] = L;
+      }
+
+      void print(raw_ostream &os, const TargetRegisterInfo &TRI) const;
+
+    private:
+      typedef std::map<unsigned,LatticeCell> MapType;
+      MapType Map;
+      // To avoid creating "top" entries, return a const reference to
+      // this cell in "get". Also, have a "Bottom" cell to return from
+      // get when a value of a physical register is requested.
+      LatticeCell Top, Bottom;
+
+    public:
+      typedef MapType::const_iterator const_iterator;
+      const_iterator begin() const { return Map.begin(); }
+      const_iterator end() const { return Map.end(); }
+    };
+
+    bool run(MachineFunction &MF);
+
+  private:
+    void visitPHI(const MachineInstr &PN);
+    void visitNonBranch(const MachineInstr &MI);
+    void visitBranchesFrom(const MachineInstr &BrI);
+    void visitUsesOf(unsigned R);
+    bool computeBlockSuccessors(const MachineBasicBlock *MB,
+          SetVector<const MachineBasicBlock*> &Targets);
+    void removeCFGEdge(MachineBasicBlock *From, MachineBasicBlock *To);
+
+    void propagate(MachineFunction &MF);
+    bool rewrite(MachineFunction &MF);
+
+    MachineRegisterInfo      *MRI;
+    MachineConstEvaluator    &MCE;
+
+    typedef std::pair<unsigned,unsigned> CFGEdge;
+    typedef std::set<CFGEdge> SetOfCFGEdge;
+    typedef std::set<const MachineInstr*> SetOfInstr;
+    typedef std::queue<CFGEdge> QueueOfCFGEdge;
+
+    LatticeCell     Bottom;
+    CellMap         Cells;
+    SetOfCFGEdge    EdgeExec;
+    SetOfInstr      InstrExec;
+    QueueOfCFGEdge  FlowQ;
+  };
+
+  // The "evaluator/rewriter" of machine instructions. This is an abstract
+  // base class that provides the interface that the propagator will use,
+  // as well as some helper functions that are target-independent.
+  class MachineConstEvaluator {
+  public:
+    MachineConstEvaluator(MachineFunction &Fn)
+      : TRI(*Fn.getSubtarget().getRegisterInfo()),
+        MF(Fn), CX(Fn.getFunction()->getContext()) {}
+    virtual ~MachineConstEvaluator() = default;
+
+    // The required interface:
+    // - A set of three "evaluate" functions. Each returns "true" if the
+    //       computation succeeded, "false" otherwise.
+    //   (1) Given an instruction MI, and the map with input values "Inputs",
+    //       compute the set of output values "Outputs". An example of when
+    //       the computation can "fail" is if MI is not an instruction that
+    //       is recognized by the evaluator.
+    //   (2) Given a register R (as reg:subreg), compute the cell that
+    //       corresponds to the "subreg" part of the given register.
+    //   (3) Given a branch instruction BrI, compute the set of target blocks.
+    //       If the branch can fall-through, add null (0) to the list of
+    //       possible targets.
+    // - A function "rewrite", that given the cell map after propagation,
+    //   could rewrite instruction MI in a more beneficial form. Return
+    //   "true" if a change has been made, "false" otherwise.
+    typedef MachineConstPropagator::CellMap CellMap;
+    virtual bool evaluate(const MachineInstr &MI, const CellMap &Inputs,
+                          CellMap &Outputs) = 0;
+    virtual bool evaluate(const Register &R, const LatticeCell &SrcC,
+                          LatticeCell &Result) = 0;
+    virtual bool evaluate(const MachineInstr &BrI, const CellMap &Inputs,
+                          SetVector<const MachineBasicBlock*> &Targets,
+                          bool &CanFallThru) = 0;
+    virtual bool rewrite(MachineInstr &MI, const CellMap &Inputs) = 0;
+
+    const TargetRegisterInfo &TRI;
+
+  protected:
+    MachineFunction &MF;
+    LLVMContext     &CX;
+
+    struct Comparison {
+      enum {
+        Unk = 0x00,
+        EQ  = 0x01,
+        NE  = 0x02,
+        L   = 0x04, // Less-than property.
+        G   = 0x08, // Greater-than property.
+        U   = 0x40, // Unsigned property.
+        LTs = L,
+        LEs = L | EQ,
+        GTs = G,
+        GEs = G | EQ,
+        LTu = L      | U,
+        LEu = L | EQ | U,
+        GTu = G      | U,
+        GEu = G | EQ | U
+      };
+
+      static uint32_t negate(uint32_t Cmp) {
+        if (Cmp == EQ)
+          return NE;
+        if (Cmp == NE)
+          return EQ;
+        assert((Cmp & (L|G)) != (L|G));
+        return Cmp ^ (L|G);
+      }
+    };
+
+    // Helper functions.
+
+    bool getCell(const Register &R, const CellMap &Inputs, LatticeCell &RC);
+    bool constToInt(const Constant *C, APInt &Val) const;
+    bool constToFloat(const Constant *C, APFloat &Val) const;
+    const ConstantInt *intToConst(const APInt &Val) const;
+
+    // Compares.
+    bool evaluateCMPrr(uint32_t Cmp, const Register &R1, const Register &R2,
+          const CellMap &Inputs, bool &Result);
+    bool evaluateCMPri(uint32_t Cmp, const Register &R1, const APInt &A2,
+          const CellMap &Inputs, bool &Result);
+    bool evaluateCMPrp(uint32_t Cmp, const Register &R1, uint64_t Props2,
+          const CellMap &Inputs, bool &Result);
+    bool evaluateCMPii(uint32_t Cmp, const APInt &A1, const APInt &A2,
+          bool &Result);
+    bool evaluateCMPpi(uint32_t Cmp, uint32_t Props, const APInt &A2,
+          bool &Result);
+    bool evaluateCMPpp(uint32_t Cmp, uint32_t Props1, uint32_t Props2,
+          bool &Result);
+
+    bool evaluateCOPY(const Register &R1, const CellMap &Inputs,
+          LatticeCell &Result);
+
+    // Logical operations.
+    bool evaluateANDrr(const Register &R1, const Register &R2,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateANDri(const Register &R1, const APInt &A2,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateANDii(const APInt &A1, const APInt &A2, APInt &Result);
+    bool evaluateORrr(const Register &R1, const Register &R2,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateORri(const Register &R1, const APInt &A2,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateORii(const APInt &A1, const APInt &A2, APInt &Result);
+    bool evaluateXORrr(const Register &R1, const Register &R2,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateXORri(const Register &R1, const APInt &A2,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateXORii(const APInt &A1, const APInt &A2, APInt &Result);
+
+    // Extensions.
+    bool evaluateZEXTr(const Register &R1, unsigned Width, unsigned Bits,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateZEXTi(const APInt &A1, unsigned Width, unsigned Bits,
+          APInt &Result);
+    bool evaluateSEXTr(const Register &R1, unsigned Width, unsigned Bits,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateSEXTi(const APInt &A1, unsigned Width, unsigned Bits,
+          APInt &Result);
+
+    // Leading/trailing bits.
+    bool evaluateCLBr(const Register &R1, bool Zeros, bool Ones,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateCLBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result);
+    bool evaluateCTBr(const Register &R1, bool Zeros, bool Ones,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateCTBi(const APInt &A1, bool Zeros, bool Ones, APInt &Result);
+
+    // Bitfield extract.
+    bool evaluateEXTRACTr(const Register &R1, unsigned Width, unsigned Bits,
+          unsigned Offset, bool Signed, const CellMap &Inputs,
+          LatticeCell &Result);
+    bool evaluateEXTRACTi(const APInt &A1, unsigned Bits, unsigned Offset,
+          bool Signed, APInt &Result);
+    // Vector operations.
+    bool evaluateSplatr(const Register &R1, unsigned Bits, unsigned Count,
+          const CellMap &Inputs, LatticeCell &Result);
+    bool evaluateSplati(const APInt &A1, unsigned Bits, unsigned Count,
+          APInt &Result);
+  };
+
+} // end anonymous namespace
+
+uint32_t ConstantProperties::deduce(const Constant *C) {
+  if (isa<ConstantInt>(C)) {
+    const ConstantInt *CI = cast<ConstantInt>(C);
+    if (CI->isZero())
+      return Zero | PosOrZero | NegOrZero | Finite;
+    uint32_t Props = (NonZero | Finite);
+    if (CI->isNegative())
+      return Props | NegOrZero;
+    return Props | PosOrZero;
+  }
+
+  if (isa<ConstantFP>(C)) {
+    const ConstantFP *CF = cast<ConstantFP>(C);
+    uint32_t Props = CF->isNegative() ? (NegOrZero|NonZero)
+                                      : PosOrZero;
+    if (CF->isZero())
+      return (Props & ~NumericProperties) | (Zero|Finite);
+    Props = (Props & ~NumericProperties) | NonZero;
+    if (CF->isNaN())
+      return (Props & ~NumericProperties) | NaN;
+    const APFloat &Val = CF->getValueAPF();
+    if (Val.isInfinity())
+      return (Props & ~NumericProperties) | Infinity;
+    Props |= Finite;
+    return Props;
+  }
+
+  return Unknown;
+}
+
+// Convert a cell from a set of specific values to a cell that tracks
+// properties.
+bool LatticeCell::convertToProperty() {
+  if (isProperty())
+    return false;
+  // Corner case: converting a fresh (top) cell to "special".
+  // This can happen, when adding a property to a top cell.
+  uint32_t Everything = ConstantProperties::Everything;
+  uint32_t Ps = !isTop() ? properties()
+                         : Everything;
+  if (Ps != ConstantProperties::Unknown) {
+    Properties = Ps;
+    setProperty();
+  } else {
+    setBottom();
+  }
+  return true;
+}
+
+void LatticeCell::print(raw_ostream &os) const {
+  if (isProperty()) {
+    os << "{ ";
+    uint32_t Ps = properties();
+    if (Ps & ConstantProperties::Zero)
+      os << "zero ";
+    if (Ps & ConstantProperties::NonZero)
+      os << "nonzero ";
+    if (Ps & ConstantProperties::Finite)
+      os << "finite ";
+    if (Ps & ConstantProperties::Infinity)
+      os << "infinity ";
+    if (Ps & ConstantProperties::NaN)
+      os << "nan ";
+    if (Ps & ConstantProperties::PosOrZero)
+      os << "poz ";
+    if (Ps & ConstantProperties::NegOrZero)
+      os << "nez ";
+    os << '}';
+    return;
+  }
+
+  os << "{ ";
+  if (isBottom()) {
+    os << "bottom";
+  } else if (isTop()) {
+    os << "top";
+  } else {
+    for (unsigned i = 0; i < size(); ++i) {
+      const Constant *C = Values[i];
+      if (i != 0)
+        os << ", ";
+      C->print(os);
+    }
+  }
+  os << " }";
+}
+
+// "Meet" operation on two cells. This is the key of the propagation
+// algorithm.
+bool LatticeCell::meet(const LatticeCell &L) {
+  bool Changed = false;
+  if (L.isBottom())
+    Changed = setBottom();
+  if (isBottom() || L.isTop())
+    return Changed;
+  if (isTop()) {
+    *this = L;
+    // L can be neither Top nor Bottom, so *this must have changed.
+    return true;
+  }
+
+  // Top/bottom cases covered. Need to integrate L's set into ours.
+  if (L.isProperty())
+    return add(L.properties());
+  for (unsigned i = 0; i < L.size(); ++i) {
+    const Constant *LC = L.Values[i];
+    Changed |= add(LC);
+  }
+  return Changed;
+}
+
+// Add a new constant to the cell. This is actually where the cell update
+// happens. If a cell has room for more constants, the new constant is added.
+// Otherwise, the cell is converted to a "property" cell (i.e. a cell that
+// will track properties of the associated values, and not the values
+// themselves. Care is taken to handle special cases, like "bottom", etc.
+bool LatticeCell::add(const Constant *LC) {
+  assert(LC);
+  if (isBottom())
+    return false;
+
+  if (!isProperty()) {
+    // Cell is not special. Try to add the constant here first,
+    // if there is room.
+    unsigned Index = 0;
+    while (Index < Size) {
+      const Constant *C = Values[Index];
+      // If the constant is already here, no change is needed.
+      if (C == LC)
+        return false;
+      Index++;
+    }
+    if (Index < MaxCellSize) {
+      Values[Index] = LC;
+      Kind = Normal;
+      Size++;
+      return true;
+    }
+  }
+
+  bool Changed = false;
+
+  // This cell is special, or is not special, but is full. After this
+  // it will be special.
+  Changed = convertToProperty();
+  uint32_t Ps = properties();
+  uint32_t NewPs = Ps & ConstantProperties::deduce(LC);
+  if (NewPs == ConstantProperties::Unknown) {
+    setBottom();
+    return true;
+  }
+  if (Ps != NewPs) {
+    Properties = NewPs;
+    Changed = true;
+  }
+  return Changed;
+}
+
+// Add a property to the cell. This will force the cell to become a property-
+// tracking cell.
+bool LatticeCell::add(uint32_t Property) {
+  bool Changed = convertToProperty();
+  uint32_t Ps = properties();
+  if (Ps == (Ps & Property))
+    return Changed;
+  Properties = Property & Ps;
+  return true;
+}
+
+// Return the properties of the values in the cell. This is valid for any
+// cell, and does not alter the cell itself.
+uint32_t LatticeCell::properties() const {
+  if (isProperty())
+    return Properties;
+  assert(!isTop() && "Should not call this for a top cell");
+  if (isBottom())
+    return ConstantProperties::Unknown;
+
+  assert(size() > 0 && "Empty cell");
+  uint32_t Ps = ConstantProperties::deduce(Values[0]);
+  for (unsigned i = 1; i < size(); ++i) {
+    if (Ps == ConstantProperties::Unknown)
+      break;
+    Ps &= ConstantProperties::deduce(Values[i]);
+  }
+  return Ps;
+}
+
+void MachineConstPropagator::CellMap::print(raw_ostream &os,
+      const TargetRegisterInfo &TRI) const {
+  for (auto &I : Map)
+    dbgs() << "  " << PrintReg(I.first, &TRI) << " -> " << I.second << '\n';
+}
+
+void MachineConstPropagator::visitPHI(const MachineInstr &PN) {
+  const MachineBasicBlock *MB = PN.getParent();
+  unsigned MBN = MB->getNumber();
+  DEBUG(dbgs() << "Visiting FI(BB#" << MBN << "): " << PN);
+
+  const MachineOperand &MD = PN.getOperand(0);
+  Register DefR(MD);
+  assert(TargetRegisterInfo::isVirtualRegister(DefR.Reg));
+
+  bool Changed = false;
+
+  // If the def has a sub-register, set the corresponding cell to "bottom".
+  if (DefR.SubReg) {
+Bottomize:
+    const LatticeCell &T = Cells.get(DefR.Reg);
+    Changed = !T.isBottom();
+    Cells.update(DefR.Reg, Bottom);
+    if (Changed)
+      visitUsesOf(DefR.Reg);
+    return;
+  }
+
+  LatticeCell DefC = Cells.get(DefR.Reg);
+
+  for (unsigned i = 1, n = PN.getNumOperands(); i < n; i += 2) {
+    const MachineBasicBlock *PB = PN.getOperand(i+1).getMBB();
+    unsigned PBN = PB->getNumber();
+    if (!EdgeExec.count(CFGEdge(PBN, MBN))) {
+      DEBUG(dbgs() << "  edge BB#" << PBN << "->BB#" << MBN
+                   << " not executable\n");
+      continue;
+    }
+    const MachineOperand &SO = PN.getOperand(i);
+    Register UseR(SO);
+    // If the input is not a virtual register, we don't really know what
+    // value it holds.
+    if (!TargetRegisterInfo::isVirtualRegister(UseR.Reg))
+      goto Bottomize;
+    // If there is no cell for an input register, it means top.
+    if (!Cells.has(UseR.Reg))
+      continue;
+
+    LatticeCell SrcC;
+    bool Eval = MCE.evaluate(UseR, Cells.get(UseR.Reg), SrcC);
+    DEBUG(dbgs() << "  edge from BB#" << PBN << ": "
+                 << PrintReg(UseR.Reg, &MCE.TRI, UseR.SubReg)
+                 << SrcC << '\n');
+    Changed |= Eval ? DefC.meet(SrcC)
+                    : DefC.setBottom();
+    Cells.update(DefR.Reg, DefC);
+    if (DefC.isBottom())
+      break;
+  }
+  if (Changed)
+    visitUsesOf(DefR.Reg);
+}
+
+void MachineConstPropagator::visitNonBranch(const MachineInstr &MI) {
+  DEBUG(dbgs() << "Visiting MI(BB#" << MI.getParent()->getNumber()
+               << "): " << MI);
+  CellMap Outputs;
+  bool Eval = MCE.evaluate(MI, Cells, Outputs);
+  DEBUG({
+    if (Eval) {
+      dbgs() << "  outputs:";
+      for (auto &I : Outputs)
+        dbgs() << ' ' << I.second;
+      dbgs() << '\n';
+    }
+  });
+
+  // Update outputs. If the value was not computed, set all the
+  // def cells to bottom.
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    Register DefR(MO);
+    // Only track virtual registers.
+    if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
+      continue;
+    bool Changed = false;
+    // If the evaluation failed, set cells for all output registers to bottom.
+    if (!Eval) {
+      const LatticeCell &T = Cells.get(DefR.Reg);
+      Changed = !T.isBottom();
+      Cells.update(DefR.Reg, Bottom);
+    } else {
+      // Find the corresponding cell in the computed outputs.
+      // If it's not there, go on to the next def.
+      if (!Outputs.has(DefR.Reg))
+        continue;
+      LatticeCell RC = Cells.get(DefR.Reg);
+      Changed = RC.meet(Outputs.get(DefR.Reg));
+      Cells.update(DefR.Reg, RC);
+    }
+    if (Changed)
+      visitUsesOf(DefR.Reg);
+  }
+}
+
+// \brief Starting at a given branch, visit remaining branches in the block.
+// Traverse over the subsequent branches for as long as the preceding one
+// can fall through. Add all the possible targets to the flow work queue,
+// including the potential fall-through to the layout-successor block.
+void MachineConstPropagator::visitBranchesFrom(const MachineInstr &BrI) {
+  const MachineBasicBlock &B = *BrI.getParent();
+  unsigned MBN = B.getNumber();
+  MachineBasicBlock::const_iterator It = BrI.getIterator();
+  MachineBasicBlock::const_iterator End = B.end();
+
+  SetVector<const MachineBasicBlock*> Targets;
+  bool EvalOk = true, FallsThru = true;
+  while (It != End) {
+    const MachineInstr &MI = *It;
+    InstrExec.insert(&MI);
+    DEBUG(dbgs() << "Visiting " << (EvalOk ? "BR" : "br") << "(BB#"
+                 << MBN << "): " << MI);
+    // Do not evaluate subsequent branches if the evaluation of any of the
+    // previous branches failed. Keep iterating over the branches only
+    // to mark them as executable.
+    EvalOk = EvalOk && MCE.evaluate(MI, Cells, Targets, FallsThru);
+    if (!EvalOk)
+      FallsThru = true;
+    if (!FallsThru)
+      break;
+    ++It;
+  }
+
+  if (EvalOk) {
+    // Need to add all CFG successors that lead to EH landing pads.
+    // There won't be explicit branches to these blocks, but they must
+    // be processed.
+    for (const MachineBasicBlock *SB : B.successors()) {
+      if (SB->isEHPad())
+        Targets.insert(SB);
+    }
+    if (FallsThru) {
+      const MachineFunction &MF = *B.getParent();
+      MachineFunction::const_iterator BI = B.getIterator();
+      MachineFunction::const_iterator Next = std::next(BI);
+      if (Next != MF.end())
+        Targets.insert(&*Next);
+    }
+  } else {
+    // If the evaluation of the branches failed, make "Targets" to be the
+    // set of all successors of the block from the CFG.
+    // If the evaluation succeeded for all visited branches, then if the
+    // last one set "FallsThru", then add an edge to the layout successor
+    // to the targets.
+    Targets.clear();
+    DEBUG(dbgs() << "  failed to evaluate a branch...adding all CFG "
+                    "successors\n");
+    for (const MachineBasicBlock *SB : B.successors())
+      Targets.insert(SB);
+  }
+
+  for (const MachineBasicBlock *TB : Targets) {
+    unsigned TBN = TB->getNumber();
+    DEBUG(dbgs() << "  pushing edge BB#" << MBN << " -> BB#" << TBN << "\n");
+    FlowQ.push(CFGEdge(MBN, TBN));
+  }
+}
+
+void MachineConstPropagator::visitUsesOf(unsigned Reg) {
+  DEBUG(dbgs() << "Visiting uses of " << PrintReg(Reg, &MCE.TRI)
+               << Cells.get(Reg) << '\n');
+  for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
+    // Do not process non-executable instructions. They can become exceutable
+    // later (via a flow-edge in the work queue). In such case, the instruc-
+    // tion will be visited at that time.
+    if (!InstrExec.count(&MI))
+      continue;
+    if (MI.isPHI())
+      visitPHI(MI);
+    else if (!MI.isBranch())
+      visitNonBranch(MI);
+    else
+      visitBranchesFrom(MI);
+  }
+}
+
+bool MachineConstPropagator::computeBlockSuccessors(const MachineBasicBlock *MB,
+      SetVector<const MachineBasicBlock*> &Targets) {
+  MachineBasicBlock::const_iterator FirstBr = MB->end();
+  for (const MachineInstr &MI : *MB) {
+    if (MI.isDebugValue())
+      continue;
+    if (MI.isBranch()) {
+      FirstBr = MI.getIterator();
+      break;
+    }
+  }
+
+  Targets.clear();
+  MachineBasicBlock::const_iterator End = MB->end();
+
+  bool DoNext = true;
+  for (MachineBasicBlock::const_iterator I = FirstBr; I != End; ++I) {
+    const MachineInstr &MI = *I;
+    // Can there be debug instructions between branches?
+    if (MI.isDebugValue())
+      continue;
+    if (!InstrExec.count(&MI))
+      continue;
+    bool Eval = MCE.evaluate(MI, Cells, Targets, DoNext);
+    if (!Eval)
+      return false;
+    if (!DoNext)
+      break;
+  }
+  // If the last branch could fall-through, add block's layout successor.
+  if (DoNext) {
+    MachineFunction::const_iterator BI = MB->getIterator();
+    MachineFunction::const_iterator NextI = std::next(BI);
+    if (NextI != MB->getParent()->end())
+      Targets.insert(&*NextI);
+  }
+
+  // Add all the EH landing pads.
+  for (const MachineBasicBlock *SB : MB->successors())
+    if (SB->isEHPad())
+      Targets.insert(SB);
+
+  return true;
+}
+
+void MachineConstPropagator::removeCFGEdge(MachineBasicBlock *From,
+      MachineBasicBlock *To) {
+  // First, remove the CFG successor/predecessor information.
+  From->removeSuccessor(To);
+  // Remove all corresponding PHI operands in the To block.
+  for (auto I = To->begin(), E = To->getFirstNonPHI(); I != E; ++I) {
+    MachineInstr *PN = &*I;
+    // reg0 = PHI reg1, bb2, reg3, bb4, ...
+    int N = PN->getNumOperands()-2;
+    while (N > 0) {
+      if (PN->getOperand(N+1).getMBB() == From) {
+        PN->RemoveOperand(N+1);
+        PN->RemoveOperand(N);
+      }
+      N -= 2;
+    }
+  }
+}
+
+void MachineConstPropagator::propagate(MachineFunction &MF) {
+  MachineBasicBlock *Entry = GraphTraits<MachineFunction*>::getEntryNode(&MF);
+  unsigned EntryNum = Entry->getNumber();
+
+  // Start with a fake edge, just to process the entry node.
+  FlowQ.push(CFGEdge(EntryNum, EntryNum));
+
+  while (!FlowQ.empty()) {
+    CFGEdge Edge = FlowQ.front();
+    FlowQ.pop();
+
+    DEBUG(dbgs() << "Picked edge BB#" << Edge.first << "->BB#"
+                 << Edge.second << '\n');
+    if (Edge.first != EntryNum)
+      if (EdgeExec.count(Edge))
+        continue;
+    EdgeExec.insert(Edge);
+    MachineBasicBlock *SB = MF.getBlockNumbered(Edge.second);
+
+    // Process the block in three stages:
+    // - visit all PHI nodes,
+    // - visit all non-branch instructions,
+    // - visit block branches.
+    MachineBasicBlock::const_iterator It = SB->begin(), End = SB->end();
+
+    // Visit PHI nodes in the successor block.
+    while (It != End && It->isPHI()) {
+      InstrExec.insert(&*It);
+      visitPHI(*It);
+      ++It;
+    }
+
+    // If the successor block just became executable, visit all instructions.
+    // To see if this is the first time we're visiting it, check the first
+    // non-debug instruction to see if it is executable.
+    while (It != End && It->isDebugValue())
+      ++It;
+    assert(It == End || !It->isPHI());
+    // If this block has been visited, go on to the next one.
+    if (It != End && InstrExec.count(&*It))
+      continue;
+    // For now, scan all non-branch instructions. Branches require different
+    // processing.
+    while (It != End && !It->isBranch()) {
+      if (!It->isDebugValue()) {
+        InstrExec.insert(&*It);
+        visitNonBranch(*It);
+      }
+      ++It;
+    }
+
+    // Time to process the end of the block. This is different from
+    // processing regular (non-branch) instructions, because there can
+    // be multiple branches in a block, and they can cause the block to
+    // terminate early.
+    if (It != End) {
+      visitBranchesFrom(*It);
+    } else {
+      // If the block didn't have a branch, add all successor edges to the
+      // work queue. (There should really be only one successor in such case.)
+      unsigned SBN = SB->getNumber();
+      for (const MachineBasicBlock *SSB : SB->successors())
+        FlowQ.push(CFGEdge(SBN, SSB->getNumber()));
+    }
+  } // while (FlowQ)
+
+  DEBUG({
+    dbgs() << "Cells after propagation:\n";
+    Cells.print(dbgs(), MCE.TRI);
+    dbgs() << "Dead CFG edges:\n";
+    for (const MachineBasicBlock &B : MF) {
+      unsigned BN = B.getNumber();
+      for (const MachineBasicBlock *SB : B.successors()) {
+        unsigned SN = SB->getNumber();
+        if (!EdgeExec.count(CFGEdge(BN, SN)))
+          dbgs() << "  BB#" << BN << " -> BB#" << SN << '\n';
+      }
+    }
+  });
+}
+
+bool MachineConstPropagator::rewrite(MachineFunction &MF) {
+  bool Changed = false;
+  // Rewrite all instructions based on the collected cell information.
+  //
+  // Traverse the instructions in a post-order, so that rewriting an
+  // instruction can make changes "downstream" in terms of control-flow
+  // without affecting the rewriting process. (We should not change
+  // instructions that have not yet been visited by the rewriter.)
+  // The reason for this is that the rewriter can introduce new vregs,
+  // and replace uses of old vregs (which had corresponding cells
+  // computed during propagation) with these new vregs (which at this
+  // point would not have any cells, and would appear to be "top").
+  // If an attempt was made to evaluate an instruction with a fresh
+  // "top" vreg, it would cause an error (abend) in the evaluator.
+
+  // Collect the post-order-traversal block ordering. The subsequent
+  // traversal/rewrite will update block successors, so it's safer
+  // if the visiting order it computed ahead of time.
+  std::vector<MachineBasicBlock*> POT;
+  for (MachineBasicBlock *B : post_order(&MF))
+    if (!B->empty())
+      POT.push_back(B);
+
+  for (MachineBasicBlock *B : POT) {
+    // Walk the block backwards (which usually begin with the branches).
+    // If any branch is rewritten, we may need to update the successor
+    // information for this block. Unless the block's successors can be
+    // precisely determined (which may not be the case for indirect
+    // branches), we cannot modify any branch.
+
+    // Compute the successor information.
+    SetVector<const MachineBasicBlock*> Targets;
+    bool HaveTargets = computeBlockSuccessors(B, Targets);
+    // Rewrite the executable instructions. Skip branches if we don't
+    // have block successor information.
+    for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
+      MachineInstr &MI = *I;
+      if (InstrExec.count(&MI)) {
+        if (MI.isBranch() && !HaveTargets)
+          continue;
+        Changed |= MCE.rewrite(MI, Cells);
+      }
+    }
+    // The rewriting could rewrite PHI nodes to non-PHI nodes, causing
+    // regular instructions to appear in between PHI nodes. Bring all
+    // the PHI nodes to the beginning of the block.
+    for (auto I = B->begin(), E = B->end(); I != E; ++I) {
+      if (I->isPHI())
+        continue;
+      // I is not PHI. Find the next PHI node P.
+      auto P = I;
+      while (++P != E)
+        if (P->isPHI())
+          break;
+      // Not found.
+      if (P == E)
+        break;
+      // Splice P right before I.
+      B->splice(I, B, P);
+      // Reset I to point at the just spliced PHI node.
+      --I;
+    }
+    // Update the block successor information: remove unnecessary successors.
+    if (HaveTargets) {
+      SmallVector<MachineBasicBlock*,2> ToRemove;
+      for (MachineBasicBlock *SB : B->successors()) {
+        if (!Targets.count(SB))
+          ToRemove.push_back(const_cast<MachineBasicBlock*>(SB));
+        Targets.remove(SB);
+      }
+      for (unsigned i = 0, n = ToRemove.size(); i < n; ++i)
+        removeCFGEdge(B, ToRemove[i]);
+      // If there are any blocks left in the computed targets, it means that
+      // we think that the block could go somewhere, but the CFG does not.
+      // This could legitimately happen in blocks that have non-returning
+      // calls---we would think that the execution can continue, but the
+      // CFG will not have a successor edge.
+    }
+  }
+  // Need to do some final post-processing.
+  // If a branch was not executable, it will not get rewritten, but should
+  // be removed (or replaced with something equivalent to a A2_nop). We can't
+  // erase instructions during rewriting, so this needs to be delayed until
+  // now.
+  for (MachineBasicBlock &B : MF) {
+    MachineBasicBlock::iterator I = B.begin(), E = B.end();
+    while (I != E) {
+      auto Next = std::next(I);
+      if (I->isBranch() && !InstrExec.count(&*I))
+        B.erase(I);
+      I = Next;
+    }
+  }
+  return Changed;
+}
+
+// This is the constant propagation algorithm as described by Wegman-Zadeck.
+// Most of the terminology comes from there.
+bool MachineConstPropagator::run(MachineFunction &MF) {
+  DEBUG(MF.print(dbgs() << "Starting MachineConstPropagator\n", 0));
+
+  MRI = &MF.getRegInfo();
+
+  Cells.clear();
+  EdgeExec.clear();
+  InstrExec.clear();
+  assert(FlowQ.empty());
+
+  propagate(MF);
+  bool Changed = rewrite(MF);
+
+  DEBUG({
+    dbgs() << "End of MachineConstPropagator (Changed=" << Changed << ")\n";
+    if (Changed)
+      MF.print(dbgs(), 0);
+  });
+  return Changed;
+}
+
+// --------------------------------------------------------------------
+// Machine const evaluator.
+
+bool MachineConstEvaluator::getCell(const Register &R, const CellMap &Inputs,
+      LatticeCell &RC) {
+  if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
+    return false;
+  const LatticeCell &L = Inputs.get(R.Reg);
+  if (!R.SubReg) {
+    RC = L;
+    return !RC.isBottom();
+  }
+  bool Eval = evaluate(R, L, RC);
+  return Eval && !RC.isBottom();
+}
+
+bool MachineConstEvaluator::constToInt(const Constant *C,
+      APInt &Val) const {
+  const ConstantInt *CI = dyn_cast<ConstantInt>(C);
+  if (!CI)
+    return false;
+  Val = CI->getValue();
+  return true;
+}
+
+const ConstantInt *MachineConstEvaluator::intToConst(const APInt &Val) const {
+  return ConstantInt::get(CX, Val);
+}
+
+bool MachineConstEvaluator::evaluateCMPrr(uint32_t Cmp, const Register &R1,
+      const Register &R2, const CellMap &Inputs, bool &Result) {
+  assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
+  LatticeCell LS1, LS2;
+  if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2))
+    return false;
+
+  bool IsProp1 = LS1.isProperty();
+  bool IsProp2 = LS2.isProperty();
+  if (IsProp1) {
+    uint32_t Prop1 = LS1.properties();
+    if (IsProp2)
+      return evaluateCMPpp(Cmp, Prop1, LS2.properties(), Result);
+    uint32_t NegCmp = Comparison::negate(Cmp);
+    return evaluateCMPrp(NegCmp, R2, Prop1, Inputs, Result);
+  }
+  if (IsProp2) {
+    uint32_t Prop2 = LS2.properties();
+    return evaluateCMPrp(Cmp, R1, Prop2, Inputs, Result);
+  }
+
+  APInt A;
+  bool IsTrue = true, IsFalse = true;
+  for (unsigned i = 0; i < LS2.size(); ++i) {
+    bool Res;
+    bool Computed = constToInt(LS2.Values[i], A) &&
+                    evaluateCMPri(Cmp, R1, A, Inputs, Res);
+    if (!Computed)
+      return false;
+    IsTrue &= Res;
+    IsFalse &= !Res;
+  }
+  assert(!IsTrue || !IsFalse);
+  // The actual logical value of the comparison is same as IsTrue.
+  Result = IsTrue;
+  // Return true if the result was proven to be true or proven to be false.
+  return IsTrue || IsFalse;
+}
+
+bool MachineConstEvaluator::evaluateCMPri(uint32_t Cmp, const Register &R1,
+      const APInt &A2, const CellMap &Inputs, bool &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS;
+  if (!getCell(R1, Inputs, LS))
+    return false;
+  if (LS.isProperty())
+    return evaluateCMPpi(Cmp, LS.properties(), A2, Result);
+
+  APInt A;
+  bool IsTrue = true, IsFalse = true;
+  for (unsigned i = 0; i < LS.size(); ++i) {
+    bool Res;
+    bool Computed = constToInt(LS.Values[i], A) &&
+                    evaluateCMPii(Cmp, A, A2, Res);
+    if (!Computed)
+      return false;
+    IsTrue &= Res;
+    IsFalse &= !Res;
+  }
+  assert(!IsTrue || !IsFalse);
+  // The actual logical value of the comparison is same as IsTrue.
+  Result = IsTrue;
+  // Return true if the result was proven to be true or proven to be false.
+  return IsTrue || IsFalse;
+}
+
+bool MachineConstEvaluator::evaluateCMPrp(uint32_t Cmp, const Register &R1,
+      uint64_t Props2, const CellMap &Inputs, bool &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS;
+  if (!getCell(R1, Inputs, LS))
+    return false;
+  if (LS.isProperty())
+    return evaluateCMPpp(Cmp, LS.properties(), Props2, Result);
+
+  APInt A;
+  uint32_t NegCmp = Comparison::negate(Cmp);
+  bool IsTrue = true, IsFalse = true;
+  for (unsigned i = 0; i < LS.size(); ++i) {
+    bool Res;
+    bool Computed = constToInt(LS.Values[i], A) &&
+                    evaluateCMPpi(NegCmp, Props2, A, Res);
+    if (!Computed)
+      return false;
+    IsTrue &= Res;
+    IsFalse &= !Res;
+  }
+  assert(!IsTrue || !IsFalse);
+  Result = IsTrue;
+  return IsTrue || IsFalse;
+}
+
+bool MachineConstEvaluator::evaluateCMPii(uint32_t Cmp, const APInt &A1,
+      const APInt &A2, bool &Result) {
+  // NE is a special kind of comparison (not composed of smaller properties).
+  if (Cmp == Comparison::NE) {
+    Result = !APInt::isSameValue(A1, A2);
+    return true;
+  }
+  if (Cmp == Comparison::EQ) {
+    Result = APInt::isSameValue(A1, A2);
+    return true;
+  }
+  if (Cmp & Comparison::EQ) {
+    if (APInt::isSameValue(A1, A2))
+      return (Result = true);
+  }
+  assert((Cmp & (Comparison::L | Comparison::G)) && "Malformed comparison");
+  Result = false;
+
+  unsigned W1 = A1.getBitWidth();
+  unsigned W2 = A2.getBitWidth();
+  unsigned MaxW = (W1 >= W2) ? W1 : W2;
+  if (Cmp & Comparison::U) {
+    const APInt Zx1 = A1.zextOrSelf(MaxW);
+    const APInt Zx2 = A2.zextOrSelf(MaxW);
+    if (Cmp & Comparison::L)
+      Result = Zx1.ult(Zx2);
+    else if (Cmp & Comparison::G)
+      Result = Zx2.ult(Zx1);
+    return true;
+  }
+
+  // Signed comparison.
+  const APInt Sx1 = A1.sextOrSelf(MaxW);
+  const APInt Sx2 = A2.sextOrSelf(MaxW);
+  if (Cmp & Comparison::L)
+    Result = Sx1.slt(Sx2);
+  else if (Cmp & Comparison::G)
+    Result = Sx2.slt(Sx1);
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateCMPpi(uint32_t Cmp, uint32_t Props,
+      const APInt &A2, bool &Result) {
+  if (Props == ConstantProperties::Unknown)
+    return false;
+
+  // Should never see NaN here, but check for it for completeness.
+  if (Props & ConstantProperties::NaN)
+    return false;
+  // Infinity could theoretically be compared to a number, but the
+  // presence of infinity here would be very suspicious. If we don't
+  // know for sure that the number is finite, bail out.
+  if (!(Props & ConstantProperties::Finite))
+    return false;
+
+  // Let X be a number that has properties Props.
+
+  if (Cmp & Comparison::U) {
+    // In case of unsigned comparisons, we can only compare against 0.
+    if (A2 == 0) {
+      // Any x!=0 will be considered >0 in an unsigned comparison.
+      if (Props & ConstantProperties::Zero)
+        Result = (Cmp & Comparison::EQ);
+      else if (Props & ConstantProperties::NonZero)
+        Result = (Cmp & Comparison::G) || (Cmp == Comparison::NE);
+      else
+        return false;
+      return true;
+    }
+    // A2 is not zero. The only handled case is if X = 0.
+    if (Props & ConstantProperties::Zero) {
+      Result = (Cmp & Comparison::L) || (Cmp == Comparison::NE);
+      return true;
+    }
+    return false;
+  }
+
+  // Signed comparisons are different.
+  if (Props & ConstantProperties::Zero) {
+    if (A2 == 0)
+      Result = (Cmp & Comparison::EQ);
+    else
+      Result = (Cmp == Comparison::NE) ||
+               ((Cmp & Comparison::L) && !A2.isNegative()) ||
+               ((Cmp & Comparison::G) &&  A2.isNegative());
+    return true;
+  }
+  if (Props & ConstantProperties::PosOrZero) {
+    // X >= 0 and !(A2 < 0) => cannot compare
+    if (!A2.isNegative())
+      return false;
+    // X >= 0 and A2 < 0
+    Result = (Cmp & Comparison::G) || (Cmp == Comparison::NE);
+    return true;
+  }
+  if (Props & ConstantProperties::NegOrZero) {
+    // X <= 0 and Src1 < 0 => cannot compare
+    if (A2 == 0 || A2.isNegative())
+      return false;
+    // X <= 0 and A2 > 0
+    Result = (Cmp & Comparison::L) || (Cmp == Comparison::NE);
+    return true;
+  }
+
+  return false;
+}
+
+bool MachineConstEvaluator::evaluateCMPpp(uint32_t Cmp, uint32_t Props1,
+      uint32_t Props2, bool &Result) {
+  typedef ConstantProperties P;
+  if ((Props1 & P::NaN) && (Props2 & P::NaN))
+    return false;
+  if (!(Props1 & P::Finite) || !(Props2 & P::Finite))
+    return false;
+
+  bool Zero1 = (Props1 & P::Zero), Zero2 = (Props2 & P::Zero);
+  bool NonZero1 = (Props1 & P::NonZero), NonZero2 = (Props2 & P::NonZero);
+  if (Zero1 && Zero2) {
+    Result = (Cmp & Comparison::EQ);
+    return true;
+  }
+  if (Cmp == Comparison::NE) {
+    if ((Zero1 && NonZero2) || (NonZero1 && Zero2))
+      return (Result = true);
+    return false;
+  }
+
+  if (Cmp & Comparison::U) {
+    // In unsigned comparisons, we can only compare against a known zero,
+    // or a known non-zero.
+    if (Zero1 && NonZero2) {
+      Result = (Cmp & Comparison::L);
+      return true;
+    }
+    if (NonZero1 && Zero2) {
+      Result = (Cmp & Comparison::G);
+      return true;
+    }
+    return false;
+  }
+
+  // Signed comparison. The comparison is not NE.
+  bool Poz1 = (Props1 & P::PosOrZero), Poz2 = (Props2 & P::PosOrZero);
+  bool Nez1 = (Props1 & P::NegOrZero), Nez2 = (Props2 & P::NegOrZero);
+  if (Nez1 && Poz2) {
+    if (NonZero1 || NonZero2) {
+      Result = (Cmp & Comparison::L);
+      return true;
+    }
+    // Either (or both) could be zero. Can only say that X <= Y.
+    if ((Cmp & Comparison::EQ) && (Cmp & Comparison::L))
+      return (Result = true);
+  }
+  if (Poz1 && Nez2) {
+    if (NonZero1 || NonZero2) {
+      Result = (Cmp & Comparison::G);
+      return true;
+    }
+    // Either (or both) could be zero. Can only say that X >= Y.
+    if ((Cmp & Comparison::EQ) && (Cmp & Comparison::G))
+      return (Result = true);
+  }
+
+  return false;
+}
+
+bool MachineConstEvaluator::evaluateCOPY(const Register &R1,
+      const CellMap &Inputs, LatticeCell &Result) {
+  return getCell(R1, Inputs, Result);
+}
+
+bool MachineConstEvaluator::evaluateANDrr(const Register &R1,
+      const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
+  const LatticeCell &L1 = Inputs.get(R2.Reg);
+  const LatticeCell &L2 = Inputs.get(R2.Reg);
+  // If both sources are bottom, exit. Otherwise try to evaluate ANDri
+  // with the non-bottom argument passed as the immediate. This is to
+  // catch cases of ANDing with 0.
+  if (L2.isBottom()) {
+    if (L1.isBottom())
+      return false;
+    return evaluateANDrr(R2, R1, Inputs, Result);
+  }
+  LatticeCell LS2;
+  if (!evaluate(R2, L2, LS2))
+    return false;
+  if (LS2.isBottom() || LS2.isProperty())
+    return false;
+
+  APInt A;
+  for (unsigned i = 0; i < LS2.size(); ++i) {
+    LatticeCell RC;
+    bool Eval = constToInt(LS2.Values[i], A) &&
+                evaluateANDri(R1, A, Inputs, RC);
+    if (!Eval)
+      return false;
+    Result.meet(RC);
+  }
+  return !Result.isBottom();
+}
+
+bool MachineConstEvaluator::evaluateANDri(const Register &R1,
+      const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  if (A2 == -1)
+    return getCell(R1, Inputs, Result);
+  if (A2 == 0) {
+    LatticeCell RC;
+    RC.add(intToConst(A2));
+    // Overwrite Result.
+    Result = RC;
+    return true;
+  }
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom() || LS1.isProperty())
+    return false;
+
+  APInt A, ResA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateANDii(A, A2, ResA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(ResA);
+    Result.add(C);
+  }
+  return !Result.isBottom();
+}
+
+bool MachineConstEvaluator::evaluateANDii(const APInt &A1,
+      const APInt &A2, APInt &Result) {
+  Result = A1 & A2;
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateORrr(const Register &R1,
+      const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
+  const LatticeCell &L1 = Inputs.get(R2.Reg);
+  const LatticeCell &L2 = Inputs.get(R2.Reg);
+  // If both sources are bottom, exit. Otherwise try to evaluate ORri
+  // with the non-bottom argument passed as the immediate. This is to
+  // catch cases of ORing with -1.
+  if (L2.isBottom()) {
+    if (L1.isBottom())
+      return false;
+    return evaluateORrr(R2, R1, Inputs, Result);
+  }
+  LatticeCell LS2;
+  if (!evaluate(R2, L2, LS2))
+    return false;
+  if (LS2.isBottom() || LS2.isProperty())
+    return false;
+
+  APInt A;
+  for (unsigned i = 0; i < LS2.size(); ++i) {
+    LatticeCell RC;
+    bool Eval = constToInt(LS2.Values[i], A) &&
+                evaluateORri(R1, A, Inputs, RC);
+    if (!Eval)
+      return false;
+    Result.meet(RC);
+  }
+  return !Result.isBottom();
+}
+
+bool MachineConstEvaluator::evaluateORri(const Register &R1,
+      const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  if (A2 == 0)
+    return getCell(R1, Inputs, Result);
+  if (A2 == -1) {
+    LatticeCell RC;
+    RC.add(intToConst(A2));
+    // Overwrite Result.
+    Result = RC;
+    return true;
+  }
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom() || LS1.isProperty())
+    return false;
+
+  APInt A, ResA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateORii(A, A2, ResA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(ResA);
+    Result.add(C);
+  }
+  return !Result.isBottom();
+}
+
+bool MachineConstEvaluator::evaluateORii(const APInt &A1,
+      const APInt &A2, APInt &Result) {
+  Result = A1 | A2;
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateXORrr(const Register &R1,
+      const Register &R2, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
+  LatticeCell LS1, LS2;
+  if (!getCell(R1, Inputs, LS1) || !getCell(R2, Inputs, LS2))
+    return false;
+  if (LS1.isProperty()) {
+    if (LS1.properties() & ConstantProperties::Zero)
+      return !(Result = LS2).isBottom();
+    return false;
+  }
+  if (LS2.isProperty()) {
+    if (LS2.properties() & ConstantProperties::Zero)
+      return !(Result = LS1).isBottom();
+    return false;
+  }
+
+  APInt A;
+  for (unsigned i = 0; i < LS2.size(); ++i) {
+    LatticeCell RC;
+    bool Eval = constToInt(LS2.Values[i], A) &&
+                evaluateXORri(R1, A, Inputs, RC);
+    if (!Eval)
+      return false;
+    Result.meet(RC);
+  }
+  return !Result.isBottom();
+}
+
+bool MachineConstEvaluator::evaluateXORri(const Register &R1,
+      const APInt &A2, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isProperty()) {
+    if (LS1.properties() & ConstantProperties::Zero) {
+      const Constant *C = intToConst(A2);
+      Result.add(C);
+      return !Result.isBottom();
+    }
+    return false;
+  }
+
+  APInt A, XA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateXORii(A, A2, XA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(XA);
+    Result.add(C);
+  }
+  return !Result.isBottom();
+}
+
+bool MachineConstEvaluator::evaluateXORii(const APInt &A1,
+      const APInt &A2, APInt &Result) {
+  Result = A1 ^ A2;
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateZEXTr(const Register &R1, unsigned Width,
+      unsigned Bits, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isProperty())
+    return false;
+
+  APInt A, XA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateZEXTi(A, Width, Bits, XA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(XA);
+    Result.add(C);
+  }
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateZEXTi(const APInt &A1, unsigned Width,
+      unsigned Bits, APInt &Result) {
+  unsigned BW = A1.getBitWidth();
+  (void)BW;
+  assert(Width >= Bits && BW >= Bits);
+  APInt Mask = APInt::getLowBitsSet(Width, Bits);
+  Result = A1.zextOrTrunc(Width) & Mask;
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateSEXTr(const Register &R1, unsigned Width,
+      unsigned Bits, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom() || LS1.isProperty())
+    return false;
+
+  APInt A, XA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateSEXTi(A, Width, Bits, XA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(XA);
+    Result.add(C);
+  }
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateSEXTi(const APInt &A1, unsigned Width,
+      unsigned Bits, APInt &Result) {
+  unsigned BW = A1.getBitWidth();
+  assert(Width >= Bits && BW >= Bits);
+  // Special case to make things faster for smaller source widths.
+  // Sign extension of 0 bits generates 0 as a result. This is consistent
+  // with what the HW does.
+  if (Bits == 0) {
+    Result = APInt(Width, 0);
+    return true;
+  }
+  // In C, shifts by 64 invoke undefined behavior: handle that case in APInt.
+  if (BW <= 64 && Bits != 0) {
+    int64_t V = A1.getSExtValue();
+    switch (Bits) {
+      case 8:
+        V = static_cast<int8_t>(V);
+        break;
+      case 16:
+        V = static_cast<int16_t>(V);
+        break;
+      case 32:
+        V = static_cast<int32_t>(V);
+        break;
+      default:
+        // Shift left to lose all bits except lower "Bits" bits, then shift
+        // the value back, replicating what was a sign bit after the first
+        // shift.
+        V = (V << (64-Bits)) >> (64-Bits);
+        break;
+    }
+    // V is a 64-bit sign-extended value. Convert it to APInt of desired
+    // width.
+    Result = APInt(Width, V, true);
+    return true;
+  }
+  // Slow case: the value doesn't fit in int64_t.
+  if (Bits < BW)
+    Result = A1.trunc(Bits).sext(Width);
+  else // Bits == BW
+    Result = A1.sext(Width);
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateCLBr(const Register &R1, bool Zeros,
+      bool Ones, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom() || LS1.isProperty())
+    return false;
+
+  APInt A, CA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateCLBi(A, Zeros, Ones, CA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(CA);
+    Result.add(C);
+  }
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateCLBi(const APInt &A1, bool Zeros,
+      bool Ones, APInt &Result) {
+  unsigned BW = A1.getBitWidth();
+  if (!Zeros && !Ones)
+    return false;
+  unsigned Count = 0;
+  if (Zeros && (Count == 0))
+    Count = A1.countLeadingZeros();
+  if (Ones && (Count == 0))
+    Count = A1.countLeadingOnes();
+  Result = APInt(BW, static_cast<uint64_t>(Count), false);
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateCTBr(const Register &R1, bool Zeros,
+      bool Ones, const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom() || LS1.isProperty())
+    return false;
+
+  APInt A, CA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateCTBi(A, Zeros, Ones, CA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(CA);
+    Result.add(C);
+  }
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateCTBi(const APInt &A1, bool Zeros,
+      bool Ones, APInt &Result) {
+  unsigned BW = A1.getBitWidth();
+  if (!Zeros && !Ones)
+    return false;
+  unsigned Count = 0;
+  if (Zeros && (Count == 0))
+    Count = A1.countTrailingZeros();
+  if (Ones && (Count == 0))
+    Count = A1.countTrailingOnes();
+  Result = APInt(BW, static_cast<uint64_t>(Count), false);
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateEXTRACTr(const Register &R1,
+      unsigned Width, unsigned Bits, unsigned Offset, bool Signed,
+      const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  assert(Bits+Offset <= Width);
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom())
+    return false;
+  if (LS1.isProperty()) {
+    uint32_t Ps = LS1.properties();
+    if (Ps & ConstantProperties::Zero) {
+      const Constant *C = intToConst(APInt(Width, 0, false));
+      Result.add(C);
+      return true;
+    }
+    return false;
+  }
+
+  APInt A, CA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateEXTRACTi(A, Bits, Offset, Signed, CA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(CA);
+    Result.add(C);
+  }
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateEXTRACTi(const APInt &A1, unsigned Bits,
+      unsigned Offset, bool Signed, APInt &Result) {
+  unsigned BW = A1.getBitWidth();
+  assert(Bits+Offset <= BW);
+  // Extracting 0 bits generates 0 as a result (as indicated by the HW people).
+  if (Bits == 0) {
+    Result = APInt(BW, 0);
+    return true;
+  }
+  if (BW <= 64) {
+    int64_t V = A1.getZExtValue();
+    V <<= (64-Bits-Offset);
+    if (Signed)
+      V >>= (64-Bits);
+    else
+      V = static_cast<uint64_t>(V) >> (64-Bits);
+    Result = APInt(BW, V, Signed);
+    return true;
+  }
+  if (Signed)
+    Result = A1.shl(BW-Bits-Offset).ashr(BW-Bits);
+  else
+    Result = A1.shl(BW-Bits-Offset).lshr(BW-Bits);
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateSplatr(const Register &R1,
+      unsigned Bits, unsigned Count, const CellMap &Inputs,
+      LatticeCell &Result) {
+  assert(Inputs.has(R1.Reg));
+  LatticeCell LS1;
+  if (!getCell(R1, Inputs, LS1))
+    return false;
+  if (LS1.isBottom() || LS1.isProperty())
+    return false;
+
+  APInt A, SA;
+  for (unsigned i = 0; i < LS1.size(); ++i) {
+    bool Eval = constToInt(LS1.Values[i], A) &&
+                evaluateSplati(A, Bits, Count, SA);
+    if (!Eval)
+      return false;
+    const Constant *C = intToConst(SA);
+    Result.add(C);
+  }
+  return true;
+}
+
+bool MachineConstEvaluator::evaluateSplati(const APInt &A1, unsigned Bits,
+      unsigned Count, APInt &Result) {
+  assert(Count > 0);
+  unsigned BW = A1.getBitWidth(), SW = Count*Bits;
+  APInt LoBits = (Bits < BW) ? A1.trunc(Bits) : A1.zextOrSelf(Bits);
+  if (Count > 1)
+    LoBits = LoBits.zext(SW);
+
+  APInt Res(SW, 0, false);
+  for (unsigned i = 0; i < Count; ++i) {
+    Res <<= Bits;
+    Res |= LoBits;
+  }
+  Result = Res;
+  return true;
+}
+
+// ----------------------------------------------------------------------
+// Hexagon-specific code.
+
+namespace llvm {
+
+  FunctionPass *createHexagonConstPropagationPass();
+  void initializeHexagonConstPropagationPass(PassRegistry &Registry);
+
+} // end namespace llvm
+
+namespace {
+
+  class HexagonConstEvaluator : public MachineConstEvaluator {
+  public:
+    HexagonConstEvaluator(MachineFunction &Fn);
+
+    bool evaluate(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs) override;
+    bool evaluate(const Register &R, const LatticeCell &SrcC,
+          LatticeCell &Result) override;
+    bool evaluate(const MachineInstr &BrI, const CellMap &Inputs,
+          SetVector<const MachineBasicBlock*> &Targets, bool &FallsThru)
+          override;
+    bool rewrite(MachineInstr &MI, const CellMap &Inputs) override;
+
+  private:
+    unsigned getRegBitWidth(unsigned Reg) const;
+
+    static uint32_t getCmp(unsigned Opc);
+    static APInt getCmpImm(unsigned Opc, unsigned OpX,
+          const MachineOperand &MO);
+    void replaceWithNop(MachineInstr &MI);
+
+    bool evaluateHexRSEQ32(Register RL, Register RH, const CellMap &Inputs,
+          LatticeCell &Result);
+    bool evaluateHexCompare(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs);
+    // This is suitable to be called for compare-and-jump instructions.
+    bool evaluateHexCompare2(uint32_t Cmp, const MachineOperand &Src1,
+          const MachineOperand &Src2, const CellMap &Inputs, bool &Result);
+    bool evaluateHexLogical(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs);
+    bool evaluateHexCondMove(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs);
+    bool evaluateHexExt(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs);
+    bool evaluateHexVector1(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs);
+    bool evaluateHexVector2(const MachineInstr &MI, const CellMap &Inputs,
+          CellMap &Outputs);
+
+    void replaceAllRegUsesWith(unsigned FromReg, unsigned ToReg);
+    bool rewriteHexBranch(MachineInstr &BrI, const CellMap &Inputs);
+    bool rewriteHexConstDefs(MachineInstr &MI, const CellMap &Inputs,
+          bool &AllDefs);
+    bool rewriteHexConstUses(MachineInstr &MI, const CellMap &Inputs);
+
+    MachineRegisterInfo *MRI;
+    const HexagonInstrInfo &HII;
+    const HexagonRegisterInfo &HRI;
+  };
+
+  class HexagonConstPropagation : public MachineFunctionPass {
+  public:
+    static char ID;
+
+    HexagonConstPropagation() : MachineFunctionPass(ID) {
+      PassRegistry &Registry = *PassRegistry::getPassRegistry();
+      initializeHexagonConstPropagationPass(Registry);
+    }
+
+    StringRef getPassName() const override {
+      return "Hexagon Constant Propagation";
+    }
+
+    bool runOnMachineFunction(MachineFunction &MF) override {
+      const Function *F = MF.getFunction();
+      if (!F)
+        return false;
+      if (skipFunction(*F))
+        return false;
+
+      HexagonConstEvaluator HCE(MF);
+      return MachineConstPropagator(HCE).run(MF);
+    }
+  };
+
+  char HexagonConstPropagation::ID = 0;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(HexagonConstPropagation, "hcp", "Hexagon Constant Propagation",
+                false, false)
+
+HexagonConstEvaluator::HexagonConstEvaluator(MachineFunction &Fn)
+  : MachineConstEvaluator(Fn),
+    HII(*Fn.getSubtarget<HexagonSubtarget>().getInstrInfo()),
+    HRI(*Fn.getSubtarget<HexagonSubtarget>().getRegisterInfo()) {
+  MRI = &Fn.getRegInfo();
+}
+
+bool HexagonConstEvaluator::evaluate(const MachineInstr &MI,
+      const CellMap &Inputs, CellMap &Outputs) {
+  if (MI.isCall())
+    return false;
+  if (MI.getNumOperands() == 0 || !MI.getOperand(0).isReg())
+    return false;
+  const MachineOperand &MD = MI.getOperand(0);
+  if (!MD.isDef())
+    return false;
+
+  unsigned Opc = MI.getOpcode();
+  Register DefR(MD);
+  assert(!DefR.SubReg);
+  if (!TargetRegisterInfo::isVirtualRegister(DefR.Reg))
+    return false;
+
+  if (MI.isCopy()) {
+    LatticeCell RC;
+    Register SrcR(MI.getOperand(1));
+    bool Eval = evaluateCOPY(SrcR, Inputs, RC);
+    if (!Eval)
+      return false;
+    Outputs.update(DefR.Reg, RC);
+    return true;
+  }
+  if (MI.isRegSequence()) {
+    unsigned Sub1 = MI.getOperand(2).getImm();
+    unsigned Sub2 = MI.getOperand(4).getImm();
+    const TargetRegisterClass *DefRC = MRI->getRegClass(DefR.Reg);
+    unsigned SubLo = HRI.getHexagonSubRegIndex(DefRC, Hexagon::ps_sub_lo);
+    unsigned SubHi = HRI.getHexagonSubRegIndex(DefRC, Hexagon::ps_sub_hi);
+    if (Sub1 != SubLo && Sub1 != SubHi)
+      return false;
+    if (Sub2 != SubLo && Sub2 != SubHi)
+      return false;
+    assert(Sub1 != Sub2);
+    bool LoIs1 = (Sub1 == SubLo);
+    const MachineOperand &OpLo = LoIs1 ? MI.getOperand(1) : MI.getOperand(3);
+    const MachineOperand &OpHi = LoIs1 ? MI.getOperand(3) : MI.getOperand(1);
+    LatticeCell RC;
+    Register SrcRL(OpLo), SrcRH(OpHi);
+    bool Eval = evaluateHexRSEQ32(SrcRL, SrcRH, Inputs, RC);
+    if (!Eval)
+      return false;
+    Outputs.update(DefR.Reg, RC);
+    return true;
+  }
+  if (MI.isCompare()) {
+    bool Eval = evaluateHexCompare(MI, Inputs, Outputs);
+    return Eval;
+  }
+
+  switch (Opc) {
+    default:
+      return false;
+    case Hexagon::A2_tfrsi:
+    case Hexagon::A2_tfrpi:
+    case Hexagon::CONST32:
+    case Hexagon::CONST64:
+    {
+      const MachineOperand &VO = MI.getOperand(1);
+      // The operand of CONST32 can be a blockaddress, e.g.
+      //   %vreg0<def> = CONST32 <blockaddress(@eat, %L)>
+      // Do this check for all instructions for safety.
+      if (!VO.isImm())
+        return false;
+      int64_t V = MI.getOperand(1).getImm();
+      unsigned W = getRegBitWidth(DefR.Reg);
+      if (W != 32 && W != 64)
+        return false;
+      IntegerType *Ty = (W == 32) ? Type::getInt32Ty(CX)
+                                  : Type::getInt64Ty(CX);
+      const ConstantInt *CI = ConstantInt::get(Ty, V, true);
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      RC.add(CI);
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::PS_true:
+    case Hexagon::PS_false:
+    {
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      bool NonZero = (Opc == Hexagon::PS_true);
+      uint32_t P = NonZero ? ConstantProperties::NonZero
+                           : ConstantProperties::Zero;
+      RC.add(P);
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::A2_and:
+    case Hexagon::A2_andir:
+    case Hexagon::A2_andp:
+    case Hexagon::A2_or:
+    case Hexagon::A2_orir:
+    case Hexagon::A2_orp:
+    case Hexagon::A2_xor:
+    case Hexagon::A2_xorp:
+    {
+      bool Eval = evaluateHexLogical(MI, Inputs, Outputs);
+      if (!Eval)
+        return false;
+      break;
+    }
+
+    case Hexagon::A2_combineii:  // combine(#s8Ext, #s8)
+    case Hexagon::A4_combineii:  // combine(#s8, #u6Ext)
+    {
+      uint64_t Hi = MI.getOperand(1).getImm();
+      uint64_t Lo = MI.getOperand(2).getImm();
+      uint64_t Res = (Hi << 32) | (Lo & 0xFFFFFFFF);
+      IntegerType *Ty = Type::getInt64Ty(CX);
+      const ConstantInt *CI = ConstantInt::get(Ty, Res, false);
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      RC.add(CI);
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::S2_setbit_i:
+    {
+      int64_t B = MI.getOperand(2).getImm();
+      assert(B >=0 && B < 32);
+      APInt A(32, (1ull << B), false);
+      Register R(MI.getOperand(1));
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      bool Eval = evaluateORri(R, A, Inputs, RC);
+      if (!Eval)
+        return false;
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::C2_mux:
+    case Hexagon::C2_muxir:
+    case Hexagon::C2_muxri:
+    case Hexagon::C2_muxii:
+    {
+      bool Eval = evaluateHexCondMove(MI, Inputs, Outputs);
+      if (!Eval)
+        return false;
+      break;
+    }
+
+    case Hexagon::A2_sxtb:
+    case Hexagon::A2_sxth:
+    case Hexagon::A2_sxtw:
+    case Hexagon::A2_zxtb:
+    case Hexagon::A2_zxth:
+    {
+      bool Eval = evaluateHexExt(MI, Inputs, Outputs);
+      if (!Eval)
+        return false;
+      break;
+    }
+
+    case Hexagon::S2_ct0:
+    case Hexagon::S2_ct0p:
+    case Hexagon::S2_ct1:
+    case Hexagon::S2_ct1p:
+    {
+      using namespace Hexagon;
+
+      bool Ones = (Opc == S2_ct1) || (Opc == S2_ct1p);
+      Register R1(MI.getOperand(1));
+      assert(Inputs.has(R1.Reg));
+      LatticeCell T;
+      bool Eval = evaluateCTBr(R1, !Ones, Ones, Inputs, T);
+      if (!Eval)
+        return false;
+      // All of these instructions return a 32-bit value. The evaluate
+      // will generate the same type as the operand, so truncate the
+      // result if necessary.
+      APInt C;
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      for (unsigned i = 0; i < T.size(); ++i) {
+        const Constant *CI = T.Values[i];
+        if (constToInt(CI, C) && C.getBitWidth() > 32)
+          CI = intToConst(C.trunc(32));
+        RC.add(CI);
+      }
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::S2_cl0:
+    case Hexagon::S2_cl0p:
+    case Hexagon::S2_cl1:
+    case Hexagon::S2_cl1p:
+    case Hexagon::S2_clb:
+    case Hexagon::S2_clbp:
+    {
+      using namespace Hexagon;
+
+      bool OnlyZeros = (Opc == S2_cl0) || (Opc == S2_cl0p);
+      bool OnlyOnes =  (Opc == S2_cl1) || (Opc == S2_cl1p);
+      Register R1(MI.getOperand(1));
+      assert(Inputs.has(R1.Reg));
+      LatticeCell T;
+      bool Eval = evaluateCLBr(R1, !OnlyOnes, !OnlyZeros, Inputs, T);
+      if (!Eval)
+        return false;
+      // All of these instructions return a 32-bit value. The evaluate
+      // will generate the same type as the operand, so truncate the
+      // result if necessary.
+      APInt C;
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      for (unsigned i = 0; i < T.size(); ++i) {
+        const Constant *CI = T.Values[i];
+        if (constToInt(CI, C) && C.getBitWidth() > 32)
+          CI = intToConst(C.trunc(32));
+        RC.add(CI);
+      }
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::S4_extract:
+    case Hexagon::S4_extractp:
+    case Hexagon::S2_extractu:
+    case Hexagon::S2_extractup:
+    {
+      bool Signed = (Opc == Hexagon::S4_extract) ||
+                    (Opc == Hexagon::S4_extractp);
+      Register R1(MI.getOperand(1));
+      unsigned BW = getRegBitWidth(R1.Reg);
+      unsigned Bits = MI.getOperand(2).getImm();
+      unsigned Offset = MI.getOperand(3).getImm();
+      LatticeCell RC = Outputs.get(DefR.Reg);
+      if (Offset >= BW) {
+        APInt Zero(BW, 0, false);
+        RC.add(intToConst(Zero));
+        break;
+      }
+      if (Offset+Bits > BW) {
+        // If the requested bitfield extends beyond the most significant bit,
+        // the extra bits are treated as 0s. To emulate this behavior, reduce
+        // the number of requested bits, and make the extract unsigned.
+        Bits = BW-Offset;
+        Signed = false;
+      }
+      bool Eval = evaluateEXTRACTr(R1, BW, Bits, Offset, Signed, Inputs, RC);
+      if (!Eval)
+        return false;
+      Outputs.update(DefR.Reg, RC);
+      break;
+    }
+
+    case Hexagon::S2_vsplatrb:
+    case Hexagon::S2_vsplatrh:
+    // vabsh, vabsh:sat
+    // vabsw, vabsw:sat
+    // vconj:sat
+    // vrndwh, vrndwh:sat
+    // vsathb, vsathub, vsatwuh
+    // vsxtbh, vsxthw
+    // vtrunehb, vtrunohb
+    // vzxtbh, vzxthw
+    {
+      bool Eval = evaluateHexVector1(MI, Inputs, Outputs);
+      if (!Eval)
+        return false;
+      break;
+    }
+
+    // TODO:
+    // A2_vaddh
+    // A2_vaddhs
+    // A2_vaddw
+    // A2_vaddws
+  }
+
+  return true;
+}
+
+bool HexagonConstEvaluator::evaluate(const Register &R,
+      const LatticeCell &Input, LatticeCell &Result) {
+  if (!R.SubReg) {
+    Result = Input;
+    return true;
+  }
+  const TargetRegisterClass *RC = MRI->getRegClass(R.Reg);
+  if (RC != &Hexagon::DoubleRegsRegClass)
+    return false;
+  if (R.SubReg != Hexagon::isub_lo && R.SubReg != Hexagon::isub_hi)
+    return false;
+
+  assert(!Input.isTop());
+  if (Input.isBottom())
+    return false;
+
+  typedef ConstantProperties P;
+  if (Input.isProperty()) {
+    uint32_t Ps = Input.properties();
+    if (Ps & (P::Zero|P::NaN)) {
+      uint32_t Ns = (Ps & (P::Zero|P::NaN|P::SignProperties));
+      Result.add(Ns);
+      return true;
+    }
+    if (R.SubReg == Hexagon::isub_hi) {
+      uint32_t Ns = (Ps & P::SignProperties);
+      Result.add(Ns);
+      return true;
+    }
+    return false;
+  }
+
+  // The Input cell contains some known values. Pick the word corresponding
+  // to the subregister.
+  APInt A;
+  for (unsigned i = 0; i < Input.size(); ++i) {
+    const Constant *C = Input.Values[i];
+    if (!constToInt(C, A))
+      return false;
+    if (!A.isIntN(64))
+      return false;
+    uint64_t U = A.getZExtValue();
+    if (R.SubReg == Hexagon::isub_hi)
+      U >>= 32;
+    U &= 0xFFFFFFFFULL;
+    uint32_t U32 = Lo_32(U);
+    int32_t V32;
+    memcpy(&V32, &U32, sizeof V32);
+    IntegerType *Ty = Type::getInt32Ty(CX);
+    const ConstantInt *C32 = ConstantInt::get(Ty, static_cast<int64_t>(V32));
+    Result.add(C32);
+  }
+  return true;
+}
+
+bool HexagonConstEvaluator::evaluate(const MachineInstr &BrI,
+      const CellMap &Inputs, SetVector<const MachineBasicBlock*> &Targets,
+      bool &FallsThru) {
+  // We need to evaluate one branch at a time. TII::analyzeBranch checks
+  // all the branches in a basic block at once, so we cannot use it.
+  unsigned Opc = BrI.getOpcode();
+  bool SimpleBranch = false;
+  bool Negated = false;
+  switch (Opc) {
+    case Hexagon::J2_jumpf:
+    case Hexagon::J2_jumpfnew:
+    case Hexagon::J2_jumpfnewpt:
+      Negated = true;
+    case Hexagon::J2_jumpt:
+    case Hexagon::J2_jumptnew:
+    case Hexagon::J2_jumptnewpt:
+      // Simple branch:  if([!]Pn) jump ...
+      // i.e. Op0 = predicate, Op1 = branch target.
+      SimpleBranch = true;
+      break;
+    case Hexagon::J2_jump:
+      Targets.insert(BrI.getOperand(0).getMBB());
+      FallsThru = false;
+      return true;
+    default:
+Undetermined:
+      // If the branch is of unknown type, assume that all successors are
+      // executable.
+      FallsThru = !BrI.isUnconditionalBranch();
+      return false;
+  }
+
+  if (SimpleBranch) {
+    const MachineOperand &MD = BrI.getOperand(0);
+    Register PR(MD);
+    // If the condition operand has a subregister, this is not something
+    // we currently recognize.
+    if (PR.SubReg)
+      goto Undetermined;
+    assert(Inputs.has(PR.Reg));
+    const LatticeCell &PredC = Inputs.get(PR.Reg);
+    if (PredC.isBottom())
+      goto Undetermined;
+
+    uint32_t Props = PredC.properties();
+    bool CTrue = false, CFalse = false;;
+    if (Props & ConstantProperties::Zero)
+      CFalse = true;
+    else if (Props & ConstantProperties::NonZero)
+      CTrue = true;
+    // If the condition is not known to be either, bail out.
+    if (!CTrue && !CFalse)
+      goto Undetermined;
+
+    const MachineBasicBlock *BranchTarget = BrI.getOperand(1).getMBB();
+
+    FallsThru = false;
+    if ((!Negated && CTrue) || (Negated && CFalse))
+      Targets.insert(BranchTarget);
+    else if ((!Negated && CFalse) || (Negated && CTrue))
+      FallsThru = true;
+    else
+      goto Undetermined;
+  }
+
+  return true;
+}
+
+bool HexagonConstEvaluator::rewrite(MachineInstr &MI, const CellMap &Inputs) {
+  if (MI.isBranch())
+    return rewriteHexBranch(MI, Inputs);
+
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+    default:
+      break;
+    case Hexagon::A2_tfrsi:
+    case Hexagon::A2_tfrpi:
+    case Hexagon::CONST32:
+    case Hexagon::CONST64:
+    case Hexagon::PS_true:
+    case Hexagon::PS_false:
+      return false;
+  }
+
+  unsigned NumOp = MI.getNumOperands();
+  if (NumOp == 0)
+    return false;
+
+  bool AllDefs, Changed;
+  Changed = rewriteHexConstDefs(MI, Inputs, AllDefs);
+  // If not all defs have been rewritten (i.e. the instruction defines
+  // a register that is not compile-time constant), then try to rewrite
+  // register operands that are known to be constant with immediates.
+  if (!AllDefs)
+    Changed |= rewriteHexConstUses(MI, Inputs);
+
+  return Changed;
+}
+
+unsigned HexagonConstEvaluator::getRegBitWidth(unsigned Reg) const {
+  const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+  if (Hexagon::IntRegsRegClass.hasSubClassEq(RC))
+    return 32;
+  if (Hexagon::DoubleRegsRegClass.hasSubClassEq(RC))
+    return 64;
+  if (Hexagon::PredRegsRegClass.hasSubClassEq(RC))
+    return 8;
+  llvm_unreachable("Invalid register");
+  return 0;
+}
+
+uint32_t HexagonConstEvaluator::getCmp(unsigned Opc) {
+  switch (Opc) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::A4_cmpbeq:
+    case Hexagon::A4_cmpheq:
+    case Hexagon::A4_cmpbeqi:
+    case Hexagon::A4_cmpheqi:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::J4_cmpeqn1_t_jumpnv_nt:
+    case Hexagon::J4_cmpeqn1_t_jumpnv_t:
+    case Hexagon::J4_cmpeqi_t_jumpnv_nt:
+    case Hexagon::J4_cmpeqi_t_jumpnv_t:
+    case Hexagon::J4_cmpeq_t_jumpnv_nt:
+    case Hexagon::J4_cmpeq_t_jumpnv_t:
+      return Comparison::EQ;
+
+    case Hexagon::C4_cmpneq:
+    case Hexagon::C4_cmpneqi:
+    case Hexagon::J4_cmpeqn1_f_jumpnv_nt:
+    case Hexagon::J4_cmpeqn1_f_jumpnv_t:
+    case Hexagon::J4_cmpeqi_f_jumpnv_nt:
+    case Hexagon::J4_cmpeqi_f_jumpnv_t:
+    case Hexagon::J4_cmpeq_f_jumpnv_nt:
+    case Hexagon::J4_cmpeq_f_jumpnv_t:
+      return Comparison::NE;
+
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtp:
+    case Hexagon::A4_cmpbgt:
+    case Hexagon::A4_cmphgt:
+    case Hexagon::A4_cmpbgti:
+    case Hexagon::A4_cmphgti:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::J4_cmpgtn1_t_jumpnv_nt:
+    case Hexagon::J4_cmpgtn1_t_jumpnv_t:
+    case Hexagon::J4_cmpgti_t_jumpnv_nt:
+    case Hexagon::J4_cmpgti_t_jumpnv_t:
+    case Hexagon::J4_cmpgt_t_jumpnv_nt:
+    case Hexagon::J4_cmpgt_t_jumpnv_t:
+      return Comparison::GTs;
+
+    case Hexagon::C4_cmplte:
+    case Hexagon::C4_cmpltei:
+    case Hexagon::J4_cmpgtn1_f_jumpnv_nt:
+    case Hexagon::J4_cmpgtn1_f_jumpnv_t:
+    case Hexagon::J4_cmpgti_f_jumpnv_nt:
+    case Hexagon::J4_cmpgti_f_jumpnv_t:
+    case Hexagon::J4_cmpgt_f_jumpnv_nt:
+    case Hexagon::J4_cmpgt_f_jumpnv_t:
+      return Comparison::LEs;
+
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::A4_cmpbgtu:
+    case Hexagon::A4_cmpbgtui:
+    case Hexagon::A4_cmphgtu:
+    case Hexagon::A4_cmphgtui:
+    case Hexagon::C2_cmpgtui:
+    case Hexagon::J4_cmpgtui_t_jumpnv_nt:
+    case Hexagon::J4_cmpgtui_t_jumpnv_t:
+    case Hexagon::J4_cmpgtu_t_jumpnv_nt:
+    case Hexagon::J4_cmpgtu_t_jumpnv_t:
+      return Comparison::GTu;
+
+    case Hexagon::J4_cmpltu_f_jumpnv_nt:
+    case Hexagon::J4_cmpltu_f_jumpnv_t:
+      return Comparison::GEu;
+
+    case Hexagon::J4_cmpltu_t_jumpnv_nt:
+    case Hexagon::J4_cmpltu_t_jumpnv_t:
+      return Comparison::LTu;
+
+    case Hexagon::J4_cmplt_f_jumpnv_nt:
+    case Hexagon::J4_cmplt_f_jumpnv_t:
+      return Comparison::GEs;
+
+    case Hexagon::C4_cmplteu:
+    case Hexagon::C4_cmplteui:
+    case Hexagon::J4_cmpgtui_f_jumpnv_nt:
+    case Hexagon::J4_cmpgtui_f_jumpnv_t:
+    case Hexagon::J4_cmpgtu_f_jumpnv_nt:
+    case Hexagon::J4_cmpgtu_f_jumpnv_t:
+      return Comparison::LEu;
+
+    case Hexagon::J4_cmplt_t_jumpnv_nt:
+    case Hexagon::J4_cmplt_t_jumpnv_t:
+      return Comparison::LTs;
+
+    default:
+      break;
+  }
+  return Comparison::Unk;
+}
+
+APInt HexagonConstEvaluator::getCmpImm(unsigned Opc, unsigned OpX,
+      const MachineOperand &MO) {
+  bool Signed = false;
+  switch (Opc) {
+    case Hexagon::A4_cmpbgtui:   // u7
+    case Hexagon::A4_cmphgtui:   // u7
+      break;
+    case Hexagon::A4_cmpheqi:    // s8
+    case Hexagon::C4_cmpneqi:   // s8
+      Signed = true;
+    case Hexagon::A4_cmpbeqi:    // u8
+      break;
+    case Hexagon::C2_cmpgtui:      // u9
+    case Hexagon::C4_cmplteui:  // u9
+      break;
+    case Hexagon::C2_cmpeqi:       // s10
+    case Hexagon::C2_cmpgti:       // s10
+    case Hexagon::C4_cmpltei:   // s10
+      Signed = true;
+      break;
+    case Hexagon::J4_cmpeqi_f_jumpnv_nt:   // u5
+    case Hexagon::J4_cmpeqi_f_jumpnv_t:    // u5
+    case Hexagon::J4_cmpeqi_t_jumpnv_nt:   // u5
+    case Hexagon::J4_cmpeqi_t_jumpnv_t:    // u5
+    case Hexagon::J4_cmpgti_f_jumpnv_nt:   // u5
+    case Hexagon::J4_cmpgti_f_jumpnv_t:    // u5
+    case Hexagon::J4_cmpgti_t_jumpnv_nt:   // u5
+    case Hexagon::J4_cmpgti_t_jumpnv_t:    // u5
+    case Hexagon::J4_cmpgtui_f_jumpnv_nt:  // u5
+    case Hexagon::J4_cmpgtui_f_jumpnv_t:   // u5
+    case Hexagon::J4_cmpgtui_t_jumpnv_nt:  // u5
+    case Hexagon::J4_cmpgtui_t_jumpnv_t:   // u5
+      break;
+    default:
+      llvm_unreachable("Unhandled instruction");
+      break;
+  }
+
+  uint64_t Val = MO.getImm();
+  return APInt(32, Val, Signed);
+}
+
+void HexagonConstEvaluator::replaceWithNop(MachineInstr &MI) {
+  MI.setDesc(HII.get(Hexagon::A2_nop));
+  while (MI.getNumOperands() > 0)
+    MI.RemoveOperand(0);
+}
+
+bool HexagonConstEvaluator::evaluateHexRSEQ32(Register RL, Register RH,
+      const CellMap &Inputs, LatticeCell &Result) {
+  assert(Inputs.has(RL.Reg) && Inputs.has(RH.Reg));
+  LatticeCell LSL, LSH;
+  if (!getCell(RL, Inputs, LSL) || !getCell(RH, Inputs, LSH))
+    return false;
+  if (LSL.isProperty() || LSH.isProperty())
+    return false;
+
+  unsigned LN = LSL.size(), HN = LSH.size();
+  SmallVector<APInt,4> LoVs(LN), HiVs(HN);
+  for (unsigned i = 0; i < LN; ++i) {
+    bool Eval = constToInt(LSL.Values[i], LoVs[i]);
+    if (!Eval)
+      return false;
+    assert(LoVs[i].getBitWidth() == 32);
+  }
+  for (unsigned i = 0; i < HN; ++i) {
+    bool Eval = constToInt(LSH.Values[i], HiVs[i]);
+    if (!Eval)
+      return false;
+    assert(HiVs[i].getBitWidth() == 32);
+  }
+
+  for (unsigned i = 0; i < HiVs.size(); ++i) {
+    APInt HV = HiVs[i].zextOrSelf(64) << 32;
+    for (unsigned j = 0; j < LoVs.size(); ++j) {
+      APInt LV = LoVs[j].zextOrSelf(64);
+      const Constant *C = intToConst(HV | LV);
+      Result.add(C);
+      if (Result.isBottom())
+        return false;
+    }
+  }
+  return !Result.isBottom();
+}
+
+bool HexagonConstEvaluator::evaluateHexCompare(const MachineInstr &MI,
+      const CellMap &Inputs, CellMap &Outputs) {
+  unsigned Opc = MI.getOpcode();
+  bool Classic = false;
+  switch (Opc) {
+    case Hexagon::C2_cmpeq:
+    case Hexagon::C2_cmpeqp:
+    case Hexagon::C2_cmpgt:
+    case Hexagon::C2_cmpgtp:
+    case Hexagon::C2_cmpgtu:
+    case Hexagon::C2_cmpgtup:
+    case Hexagon::C2_cmpeqi:
+    case Hexagon::C2_cmpgti:
+    case Hexagon::C2_cmpgtui:
+      // Classic compare:  Dst0 = CMP Src1, Src2
+      Classic = true;
+      break;
+    default:
+      // Not handling other compare instructions now.
+      return false;
+  }
+
+  if (Classic) {
+    const MachineOperand &Src1 = MI.getOperand(1);
+    const MachineOperand &Src2 = MI.getOperand(2);
+
+    bool Result;
+    unsigned Opc = MI.getOpcode();
+    bool Computed = evaluateHexCompare2(Opc, Src1, Src2, Inputs, Result);
+    if (Computed) {
+      // Only create a zero/non-zero cell. At this time there isn't really
+      // much need for specific values.
+      Register DefR(MI.getOperand(0));
+      LatticeCell L = Outputs.get(DefR.Reg);
+      uint32_t P = Result ? ConstantProperties::NonZero
+                          : ConstantProperties::Zero;
+      L.add(P);
+      Outputs.update(DefR.Reg, L);
+      return true;
+    }
+  }
+
+  return false;
+}
+
+bool HexagonConstEvaluator::evaluateHexCompare2(unsigned Opc,
+      const MachineOperand &Src1, const MachineOperand &Src2,
+      const CellMap &Inputs, bool &Result) {
+  uint32_t Cmp = getCmp(Opc);
+  bool Reg1 = Src1.isReg(), Reg2 = Src2.isReg();
+  bool Imm1 = Src1.isImm(), Imm2 = Src2.isImm();
+  if (Reg1) {
+    Register R1(Src1);
+    if (Reg2) {
+      Register R2(Src2);
+      return evaluateCMPrr(Cmp, R1, R2, Inputs, Result);
+    } else if (Imm2) {
+      APInt A2 = getCmpImm(Opc, 2, Src2);
+      return evaluateCMPri(Cmp, R1, A2, Inputs, Result);
+    }
+  } else if (Imm1) {
+    APInt A1 = getCmpImm(Opc, 1, Src1);
+    if (Reg2) {
+      Register R2(Src2);
+      uint32_t NegCmp = Comparison::negate(Cmp);
+      return evaluateCMPri(NegCmp, R2, A1, Inputs, Result);
+    } else if (Imm2) {
+      APInt A2 = getCmpImm(Opc, 2, Src2);
+      return evaluateCMPii(Cmp, A1, A2, Result);
+    }
+  }
+  // Unknown kind of comparison.
+  return false;
+}
+
+bool HexagonConstEvaluator::evaluateHexLogical(const MachineInstr &MI,
+      const CellMap &Inputs, CellMap &Outputs) {
+  unsigned Opc = MI.getOpcode();
+  if (MI.getNumOperands() != 3)
+    return false;
+  const MachineOperand &Src1 = MI.getOperand(1);
+  const MachineOperand &Src2 = MI.getOperand(2);
+  Register R1(Src1);
+  bool Eval = false;
+  LatticeCell RC;
+  switch (Opc) {
+    default:
+      return false;
+    case Hexagon::A2_and:
+    case Hexagon::A2_andp:
+      Eval = evaluateANDrr(R1, Register(Src2), Inputs, RC);
+      break;
+    case Hexagon::A2_andir: {
+      APInt A(32, Src2.getImm(), true);
+      Eval = evaluateANDri(R1, A, Inputs, RC);
+      break;
+    }
+    case Hexagon::A2_or:
+    case Hexagon::A2_orp:
+      Eval = evaluateORrr(R1, Register(Src2), Inputs, RC);
+      break;
+    case Hexagon::A2_orir: {
+      APInt A(32, Src2.getImm(), true);
+      Eval = evaluateORri(R1, A, Inputs, RC);
+      break;
+    }
+    case Hexagon::A2_xor:
+    case Hexagon::A2_xorp:
+      Eval = evaluateXORrr(R1, Register(Src2), Inputs, RC);
+      break;
+  }
+  if (Eval) {
+    Register DefR(MI.getOperand(0));
+    Outputs.update(DefR.Reg, RC);
+  }
+  return Eval;
+}
+
+bool HexagonConstEvaluator::evaluateHexCondMove(const MachineInstr &MI,
+      const CellMap &Inputs, CellMap &Outputs) {
+  // Dst0 = Cond1 ? Src2 : Src3
+  Register CR(MI.getOperand(1));
+  assert(Inputs.has(CR.Reg));
+  LatticeCell LS;
+  if (!getCell(CR, Inputs, LS))
+    return false;
+  uint32_t Ps = LS.properties();
+  unsigned TakeOp;
+  if (Ps & ConstantProperties::Zero)
+    TakeOp = 3;
+  else if (Ps & ConstantProperties::NonZero)
+    TakeOp = 2;
+  else
+    return false;
+
+  const MachineOperand &ValOp = MI.getOperand(TakeOp);
+  Register DefR(MI.getOperand(0));
+  LatticeCell RC = Outputs.get(DefR.Reg);
+
+  if (ValOp.isImm()) {
+    int64_t V = ValOp.getImm();
+    unsigned W = getRegBitWidth(DefR.Reg);
+    APInt A(W, V, true);
+    const Constant *C = intToConst(A);
+    RC.add(C);
+    Outputs.update(DefR.Reg, RC);
+    return true;
+  }
+  if (ValOp.isReg()) {
+    Register R(ValOp);
+    const LatticeCell &LR = Inputs.get(R.Reg);
+    LatticeCell LSR;
+    if (!evaluate(R, LR, LSR))
+      return false;
+    RC.meet(LSR);
+    Outputs.update(DefR.Reg, RC);
+    return true;
+  }
+  return false;
+}
+
+bool HexagonConstEvaluator::evaluateHexExt(const MachineInstr &MI,
+      const CellMap &Inputs, CellMap &Outputs) {
+  // Dst0 = ext R1
+  Register R1(MI.getOperand(1));
+  assert(Inputs.has(R1.Reg));
+
+  unsigned Opc = MI.getOpcode();
+  unsigned Bits;
+  switch (Opc) {
+    case Hexagon::A2_sxtb:
+    case Hexagon::A2_zxtb:
+      Bits = 8;
+      break;
+    case Hexagon::A2_sxth:
+    case Hexagon::A2_zxth:
+      Bits = 16;
+      break;
+    case Hexagon::A2_sxtw:
+      Bits = 32;
+      break;
+  }
+
+  bool Signed = false;
+  switch (Opc) {
+    case Hexagon::A2_sxtb:
+    case Hexagon::A2_sxth:
+    case Hexagon::A2_sxtw:
+      Signed = true;
+      break;
+  }
+
+  Register DefR(MI.getOperand(0));
+  unsigned BW = getRegBitWidth(DefR.Reg);
+  LatticeCell RC = Outputs.get(DefR.Reg);
+  bool Eval = Signed ? evaluateSEXTr(R1, BW, Bits, Inputs, RC)
+                     : evaluateZEXTr(R1, BW, Bits, Inputs, RC);
+  if (!Eval)
+    return false;
+  Outputs.update(DefR.Reg, RC);
+  return true;
+}
+
+bool HexagonConstEvaluator::evaluateHexVector1(const MachineInstr &MI,
+      const CellMap &Inputs, CellMap &Outputs) {
+  // DefR = op R1
+  Register DefR(MI.getOperand(0));
+  Register R1(MI.getOperand(1));
+  assert(Inputs.has(R1.Reg));
+  LatticeCell RC = Outputs.get(DefR.Reg);
+  bool Eval;
+
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+    case Hexagon::S2_vsplatrb:
+      // Rd = 4 times Rs:0..7
+      Eval = evaluateSplatr(R1, 8, 4, Inputs, RC);
+      break;
+    case Hexagon::S2_vsplatrh:
+      // Rdd = 4 times Rs:0..15
+      Eval = evaluateSplatr(R1, 16, 4, Inputs, RC);
+      break;
+    default:
+      return false;
+  }
+
+  if (!Eval)
+    return false;
+  Outputs.update(DefR.Reg, RC);
+  return true;
+}
+
+bool HexagonConstEvaluator::rewriteHexConstDefs(MachineInstr &MI,
+      const CellMap &Inputs, bool &AllDefs) {
+  AllDefs = false;
+
+  // Some diagnostics.
+  // DEBUG({...}) gets confused with all this code as an argument.
+#ifndef NDEBUG
+  bool Debugging = DebugFlag && isCurrentDebugType(DEBUG_TYPE);
+  if (Debugging) {
+    bool Const = true, HasUse = false;
+    for (const MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
+        continue;
+      Register R(MO);
+      if (!TargetRegisterInfo::isVirtualRegister(R.Reg))
+        continue;
+      HasUse = true;
+      // PHIs can legitimately have "top" cells after propagation.
+      if (!MI.isPHI() && !Inputs.has(R.Reg)) {
+        dbgs() << "Top " << PrintReg(R.Reg, &HRI, R.SubReg)
+               << " in MI: " << MI;
+        continue;
+      }
+      const LatticeCell &L = Inputs.get(R.Reg);
+      Const &= L.isSingle();
+      if (!Const)
+        break;
+    }
+    if (HasUse && Const) {
+      if (!MI.isCopy()) {
+        dbgs() << "CONST: " << MI;
+        for (const MachineOperand &MO : MI.operands()) {
+          if (!MO.isReg() || !MO.isUse() || MO.isImplicit())
+            continue;
+          unsigned R = MO.getReg();
+          dbgs() << PrintReg(R, &TRI) << ": " << Inputs.get(R) << "\n";
+        }
+      }
+    }
+  }
+#endif
+
+  // Avoid generating TFRIs for register transfers---this will keep the
+  // coalescing opportunities.
+  if (MI.isCopy())
+    return false;
+
+  // Collect all virtual register-def operands.
+  SmallVector<unsigned,2> DefRegs;
+  for (const MachineOperand &MO : MI.operands()) {
+    if (!MO.isReg() || !MO.isDef())
+      continue;
+    unsigned R = MO.getReg();
+    if (!TargetRegisterInfo::isVirtualRegister(R))
+      continue;
+    assert(!MO.getSubReg());
+    assert(Inputs.has(R));
+    DefRegs.push_back(R);
+  }
+
+  MachineBasicBlock &B = *MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+  unsigned ChangedNum = 0;
+#ifndef NDEBUG
+  SmallVector<const MachineInstr*,4> NewInstrs;
+#endif
+
+  // For each defined register, if it is a constant, create an instruction
+  //   NewR = const
+  // and replace all uses of the defined register with NewR.
+  for (unsigned i = 0, n = DefRegs.size(); i < n; ++i) {
+    unsigned R = DefRegs[i];
+    const LatticeCell &L = Inputs.get(R);
+    if (L.isBottom())
+      continue;
+    const TargetRegisterClass *RC = MRI->getRegClass(R);
+    MachineBasicBlock::iterator At = MI.getIterator();
+
+    if (!L.isSingle()) {
+      // If this a zero/non-zero cell, we can fold a definition
+      // of a predicate register.
+      typedef ConstantProperties P;
+      uint64_t Ps = L.properties();
+      if (!(Ps & (P::Zero|P::NonZero)))
+        continue;
+      const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
+      if (RC != PredRC)
+        continue;
+      const MCInstrDesc *NewD = (Ps & P::Zero) ?
+        &HII.get(Hexagon::PS_false) :
+        &HII.get(Hexagon::PS_true);
+      unsigned NewR = MRI->createVirtualRegister(PredRC);
+      const MachineInstrBuilder &MIB = BuildMI(B, At, DL, *NewD, NewR);
+      (void)MIB;
+#ifndef NDEBUG
+      NewInstrs.push_back(&*MIB);
+#endif
+      replaceAllRegUsesWith(R, NewR);
+    } else {
+      // This cell has a single value.
+      APInt A;
+      if (!constToInt(L.Value, A) || !A.isSignedIntN(64))
+        continue;
+      const TargetRegisterClass *NewRC;
+      const MCInstrDesc *NewD;
+
+      unsigned W = getRegBitWidth(R);
+      int64_t V = A.getSExtValue();
+      assert(W == 32 || W == 64);
+      if (W == 32)
+        NewRC = &Hexagon::IntRegsRegClass;
+      else
+        NewRC = &Hexagon::DoubleRegsRegClass;
+      unsigned NewR = MRI->createVirtualRegister(NewRC);
+      const MachineInstr *NewMI;
+
+      if (W == 32) {
+        NewD = &HII.get(Hexagon::A2_tfrsi);
+        NewMI = BuildMI(B, At, DL, *NewD, NewR)
+                  .addImm(V);
+      } else {
+        if (A.isSignedIntN(8)) {
+          NewD = &HII.get(Hexagon::A2_tfrpi);
+          NewMI = BuildMI(B, At, DL, *NewD, NewR)
+                    .addImm(V);
+        } else {
+          int32_t Hi = V >> 32;
+          int32_t Lo = V & 0xFFFFFFFFLL;
+          if (isInt<8>(Hi) && isInt<8>(Lo)) {
+            NewD = &HII.get(Hexagon::A2_combineii);
+            NewMI = BuildMI(B, At, DL, *NewD, NewR)
+                      .addImm(Hi)
+                      .addImm(Lo);
+          } else {
+            NewD = &HII.get(Hexagon::CONST64);
+            NewMI = BuildMI(B, At, DL, *NewD, NewR)
+                      .addImm(V);
+          }
+        }
+      }
+      (void)NewMI;
+#ifndef NDEBUG
+      NewInstrs.push_back(NewMI);
+#endif
+      replaceAllRegUsesWith(R, NewR);
+    }
+    ChangedNum++;
+  }
+
+  DEBUG({
+    if (!NewInstrs.empty()) {
+      MachineFunction &MF = *MI.getParent()->getParent();
+      dbgs() << "In function: " << MF.getFunction()->getName() << "\n";
+      dbgs() << "Rewrite: for " << MI << "  created " << *NewInstrs[0];
+      for (unsigned i = 1; i < NewInstrs.size(); ++i)
+        dbgs() << "          " << *NewInstrs[i];
+    }
+  });
+
+  AllDefs = (ChangedNum == DefRegs.size());
+  return ChangedNum > 0;
+}
+
+bool HexagonConstEvaluator::rewriteHexConstUses(MachineInstr &MI,
+      const CellMap &Inputs) {
+  bool Changed = false;
+  unsigned Opc = MI.getOpcode();
+  MachineBasicBlock &B = *MI.getParent();
+  const DebugLoc &DL = MI.getDebugLoc();
+  MachineBasicBlock::iterator At = MI.getIterator();
+  MachineInstr *NewMI = nullptr;
+
+  switch (Opc) {
+    case Hexagon::M2_maci:
+    // Convert DefR += mpyi(R2, R3)
+    //   to   DefR += mpyi(R, #imm),
+    //   or   DefR -= mpyi(R, #imm).
+    {
+      Register DefR(MI.getOperand(0));
+      assert(!DefR.SubReg);
+      Register R2(MI.getOperand(2));
+      Register R3(MI.getOperand(3));
+      assert(Inputs.has(R2.Reg) && Inputs.has(R3.Reg));
+      LatticeCell LS2, LS3;
+      // It is enough to get one of the input cells, since we will only try
+      // to replace one argument---whichever happens to be a single constant.
+      bool HasC2 = getCell(R2, Inputs, LS2), HasC3 = getCell(R3, Inputs, LS3);
+      if (!HasC2 && !HasC3)
+        return false;
+      bool Zero = ((HasC2 && (LS2.properties() & ConstantProperties::Zero)) ||
+                   (HasC3 && (LS3.properties() & ConstantProperties::Zero)));
+      // If one of the operands is zero, eliminate the multiplication.
+      if (Zero) {
+        // DefR == R1 (tied operands).
+        MachineOperand &Acc = MI.getOperand(1);
+        Register R1(Acc);
+        unsigned NewR = R1.Reg;
+        if (R1.SubReg) {
+          // Generate COPY. FIXME: Replace with the register:subregister.
+          const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
+          NewR = MRI->createVirtualRegister(RC);
+          NewMI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+                    .addReg(R1.Reg, getRegState(Acc), R1.SubReg);
+        }
+        replaceAllRegUsesWith(DefR.Reg, NewR);
+        MRI->clearKillFlags(NewR);
+        Changed = true;
+        break;
+      }
+
+      bool Swap = false;
+      if (!LS3.isSingle()) {
+        if (!LS2.isSingle())
+          return false;
+        Swap = true;
+      }
+      const LatticeCell &LI = Swap ? LS2 : LS3;
+      const MachineOperand &OpR2 = Swap ? MI.getOperand(3)
+                                        : MI.getOperand(2);
+      // LI is single here.
+      APInt A;
+      if (!constToInt(LI.Value, A) || !A.isSignedIntN(8))
+        return false;
+      int64_t V = A.getSExtValue();
+      const MCInstrDesc &D = (V >= 0) ? HII.get(Hexagon::M2_macsip)
+                                      : HII.get(Hexagon::M2_macsin);
+      if (V < 0)
+        V = -V;
+      const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
+      unsigned NewR = MRI->createVirtualRegister(RC);
+      const MachineOperand &Src1 = MI.getOperand(1);
+      NewMI = BuildMI(B, At, DL, D, NewR)
+                .addReg(Src1.getReg(), getRegState(Src1), Src1.getSubReg())
+                .addReg(OpR2.getReg(), getRegState(OpR2), OpR2.getSubReg())
+                .addImm(V);
+      replaceAllRegUsesWith(DefR.Reg, NewR);
+      Changed = true;
+      break;
+    }
+
+    case Hexagon::A2_and:
+    {
+      Register R1(MI.getOperand(1));
+      Register R2(MI.getOperand(2));
+      assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
+      LatticeCell LS1, LS2;
+      unsigned CopyOf = 0;
+      // Check if any of the operands is -1 (i.e. all bits set).
+      if (getCell(R1, Inputs, LS1) && LS1.isSingle()) {
+        APInt M1;
+        if (constToInt(LS1.Value, M1) && !~M1)
+          CopyOf = 2;
+      }
+      else if (getCell(R2, Inputs, LS2) && LS2.isSingle()) {
+        APInt M1;
+        if (constToInt(LS2.Value, M1) && !~M1)
+          CopyOf = 1;
+      }
+      if (!CopyOf)
+        return false;
+      MachineOperand &SO = MI.getOperand(CopyOf);
+      Register SR(SO);
+      Register DefR(MI.getOperand(0));
+      unsigned NewR = SR.Reg;
+      if (SR.SubReg) {
+        const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
+        NewR = MRI->createVirtualRegister(RC);
+        NewMI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+                  .addReg(SR.Reg, getRegState(SO), SR.SubReg);
+      }
+      replaceAllRegUsesWith(DefR.Reg, NewR);
+      MRI->clearKillFlags(NewR);
+      Changed = true;
+    }
+    break;
+
+    case Hexagon::A2_or:
+    {
+      Register R1(MI.getOperand(1));
+      Register R2(MI.getOperand(2));
+      assert(Inputs.has(R1.Reg) && Inputs.has(R2.Reg));
+      LatticeCell LS1, LS2;
+      unsigned CopyOf = 0;
+      typedef ConstantProperties P;
+      if (getCell(R1, Inputs, LS1) && (LS1.properties() & P::Zero))
+        CopyOf = 2;
+      else if (getCell(R2, Inputs, LS2) && (LS2.properties() & P::Zero))
+        CopyOf = 1;
+      if (!CopyOf)
+        return false;
+      MachineOperand &SO = MI.getOperand(CopyOf);
+      Register SR(SO);
+      Register DefR(MI.getOperand(0));
+      unsigned NewR = SR.Reg;
+      if (SR.SubReg) {
+        const TargetRegisterClass *RC = MRI->getRegClass(DefR.Reg);
+        NewR = MRI->createVirtualRegister(RC);
+        NewMI = BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
+                  .addReg(SR.Reg, getRegState(SO), SR.SubReg);
+      }
+      replaceAllRegUsesWith(DefR.Reg, NewR);
+      MRI->clearKillFlags(NewR);
+      Changed = true;
+    }
+    break;
+  }
+
+  if (NewMI) {
+    // clear all the kill flags of this new instruction.
+    for (MachineOperand &MO : NewMI->operands())
+      if (MO.isReg() && MO.isUse())
+        MO.setIsKill(false);
+  }
+
+  DEBUG({
+    if (NewMI) {
+      dbgs() << "Rewrite: for " << MI;
+      if (NewMI != &MI)
+        dbgs() << "  created " << *NewMI;
+      else
+        dbgs() << "  modified the instruction itself and created:" << *NewMI;
+    }
+  });
+
+  return Changed;
+}
+
+void HexagonConstEvaluator::replaceAllRegUsesWith(unsigned FromReg,
+      unsigned ToReg) {
+  assert(TargetRegisterInfo::isVirtualRegister(FromReg));
+  assert(TargetRegisterInfo::isVirtualRegister(ToReg));
+  for (auto I = MRI->use_begin(FromReg), E = MRI->use_end(); I != E;) {
+    MachineOperand &O = *I;
+    ++I;
+    O.setReg(ToReg);
+  }
+}
+
+bool HexagonConstEvaluator::rewriteHexBranch(MachineInstr &BrI,
+      const CellMap &Inputs) {
+  MachineBasicBlock &B = *BrI.getParent();
+  unsigned NumOp = BrI.getNumOperands();
+  if (!NumOp)
+    return false;
+
+  bool FallsThru;
+  SetVector<const MachineBasicBlock*> Targets;
+  bool Eval = evaluate(BrI, Inputs, Targets, FallsThru);
+  unsigned NumTargets = Targets.size();
+  if (!Eval || NumTargets > 1 || (NumTargets == 1 && FallsThru))
+    return false;
+  if (BrI.getOpcode() == Hexagon::J2_jump)
+    return false;
+
+  DEBUG(dbgs() << "Rewrite(BB#" << B.getNumber() << "):" << BrI);
+  bool Rewritten = false;
+  if (NumTargets > 0) {
+    assert(!FallsThru && "This should have been checked before");
+    // MIB.addMBB needs non-const pointer.
+    MachineBasicBlock *TargetB = const_cast<MachineBasicBlock*>(Targets[0]);
+    bool Moot = B.isLayoutSuccessor(TargetB);
+    if (!Moot) {
+      // If we build a branch here, we must make sure that it won't be
+      // erased as "non-executable". We can't mark any new instructions
+      // as executable here, so we need to overwrite the BrI, which we
+      // know is executable.
+      const MCInstrDesc &JD = HII.get(Hexagon::J2_jump);
+      auto NI = BuildMI(B, BrI.getIterator(), BrI.getDebugLoc(), JD)
+                  .addMBB(TargetB);
+      BrI.setDesc(JD);
+      while (BrI.getNumOperands() > 0)
+        BrI.RemoveOperand(0);
+      // This ensures that all implicit operands (e.g. %R31<imp-def>, etc)
+      // are present in the rewritten branch.
+      for (auto &Op : NI->operands())
+        BrI.addOperand(Op);
+      NI->eraseFromParent();
+      Rewritten = true;
+    }
+  }
+
+  // Do not erase instructions. A newly created instruction could get
+  // the same address as an instruction marked as executable during the
+  // propagation.
+  if (!Rewritten)
+    replaceWithNop(BrI);
+  return true;
+}
+
+FunctionPass *llvm::createHexagonConstPropagationPass() {
+  return new HexagonConstPropagation();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
index face0f3f64b4..36080997ec6b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp
@@ -11,13 +11,9 @@
 // to move them together. If we can move them next to each other we do so and
 // replace them with a combine instruction.
 //===----------------------------------------------------------------------===//
-#include "llvm/PassSupport.h"
-#include "Hexagon.h"
 #include "HexagonInstrInfo.h"
-#include "HexagonMachineFunctionInfo.h"
-#include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
-#include "HexagonTargetMachine.h"
+#include "llvm/PassSupport.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -64,6 +60,7 @@ namespace {
 class HexagonCopyToCombine : public MachineFunctionPass  {
   const HexagonInstrInfo *TII;
   const TargetRegisterInfo *TRI;
+  const HexagonSubtarget *ST;
   bool ShouldCombineAggressively;
 
   DenseSet<MachineInstr *> PotentiallyNewifiableTFR;
@@ -80,7 +77,7 @@ public:
     MachineFunctionPass::getAnalysisUsage(AU);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Hexagon Copy-To-Combine Pass";
   }
 
@@ -88,7 +85,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
@@ -163,6 +160,10 @@ static bool isCombinableInstType(MachineInstr &MI, const HexagonInstrInfo *TII,
            (ShouldCombineAggressively || NotExt);
   }
 
+  case Hexagon::V6_vassign:
+  case Hexagon::V6_vassign_128B:
+    return true;
+
   default:
     break;
   }
@@ -186,11 +187,22 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI,
                                     MachineInstr &LowRegInst, bool AllowC64) {
   unsigned HiOpc = HighRegInst.getOpcode();
   unsigned LoOpc = LowRegInst.getOpcode();
-  (void)HiOpc; // Fix compiler warning
-  (void)LoOpc; // Fix compiler warning
-  assert((HiOpc == Hexagon::A2_tfr || HiOpc == Hexagon::A2_tfrsi) &&
-         (LoOpc == Hexagon::A2_tfr || LoOpc == Hexagon::A2_tfrsi) &&
-         "Assume individual instructions are of a combinable type");
+
+  auto verifyOpc = [](unsigned Opc) -> void {
+    switch (Opc) {
+      case Hexagon::A2_tfr:
+      case Hexagon::A2_tfrsi:
+      case Hexagon::V6_vassign:
+        break;
+      default:
+        llvm_unreachable("Unexpected opcode");
+    }
+  };
+  verifyOpc(HiOpc);
+  verifyOpc(LoOpc);
+
+  if (HiOpc == Hexagon::V6_vassign || LoOpc == Hexagon::V6_vassign)
+    return HiOpc == LoOpc;
 
   if (!AllowC64) {
     // There is no combine of two constant extended values.
@@ -216,9 +228,13 @@ static bool areCombinableOperations(const TargetRegisterInfo *TRI,
 }
 
 static bool isEvenReg(unsigned Reg) {
-  assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
-         Hexagon::IntRegsRegClass.contains(Reg));
-  return (Reg - Hexagon::R0) % 2 == 0;
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  if (Hexagon::IntRegsRegClass.contains(Reg))
+    return (Reg - Hexagon::R0) % 2 == 0;
+  if (Hexagon::VectorRegsRegClass.contains(Reg) ||
+      Hexagon::VectorRegs128BRegClass.contains(Reg))
+    return (Reg - Hexagon::V0) % 2 == 0;
+  llvm_unreachable("Invalid register");
 }
 
 static void removeKillInfo(MachineInstr &MI, unsigned RegNotKilled) {
@@ -385,7 +401,7 @@ HexagonCopyToCombine::findPotentialNewifiableTFRs(MachineBasicBlock &BB) {
       continue;
 
     // Mark TFRs that feed a potential new value store as such.
-    if (TII->mayBeNewStore(&MI)) {
+    if (TII->mayBeNewStore(MI)) {
       // Look for uses of TFR instructions.
       for (unsigned OpdIdx = 0, OpdE = MI.getNumOperands(); OpdIdx != OpdE;
            ++OpdIdx) {
@@ -446,8 +462,9 @@ bool HexagonCopyToCombine::runOnMachineFunction(MachineFunction &MF) {
   bool HasChanged = false;
 
   // Get target info.
-  TRI = MF.getSubtarget().getRegisterInfo();
-  TII = MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  ST = &MF.getSubtarget<HexagonSubtarget>();
+  TRI = ST->getRegisterInfo();
+  TII = ST->getInstrInfo();
 
   const Function *F = MF.getFunction();
   bool OptForSize = F->hasFnAttribute(Attribute::OptimizeForSize);
@@ -504,8 +521,7 @@ MachineInstr *HexagonCopyToCombine::findPairable(MachineInstr &I1,
                                                  bool &DoInsertAtI1,
                                                  bool AllowC64) {
   MachineBasicBlock::iterator I2 = std::next(MachineBasicBlock::iterator(I1));
-
-  while (I2->isDebugValue())
+  while (I2 != I1.getParent()->end() && I2->isDebugValue())
     ++I2;
 
   unsigned I1DestReg = I1.getOperand(0).getReg();
@@ -564,14 +580,26 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2,
   unsigned I2DestReg = I2.getOperand(0).getReg();
   bool IsI1Loreg = (I2DestReg - I1DestReg) == 1;
   unsigned LoRegDef = IsI1Loreg ? I1DestReg : I2DestReg;
+  unsigned SubLo;
+
+  const TargetRegisterClass *SuperRC = nullptr;
+  if (Hexagon::IntRegsRegClass.contains(LoRegDef)) {
+    SuperRC = &Hexagon::DoubleRegsRegClass;
+    SubLo = Hexagon::isub_lo;
+  } else if (Hexagon::VectorRegsRegClass.contains(LoRegDef)) {
+    assert(ST->useHVXOps());
+    if (ST->useHVXSglOps())
+      SuperRC = &Hexagon::VecDblRegsRegClass;
+    else
+      SuperRC = &Hexagon::VecDblRegs128BRegClass;
+    SubLo = Hexagon::vsub_lo;
+  } else
+    llvm_unreachable("Unexpected register class");
 
   // Get the double word register.
-  unsigned DoubleRegDest =
-    TRI->getMatchingSuperReg(LoRegDef, Hexagon::subreg_loreg,
-                             &Hexagon::DoubleRegsRegClass);
+  unsigned DoubleRegDest = TRI->getMatchingSuperReg(LoRegDef, SubLo, SuperRC);
   assert(DoubleRegDest != 0 && "Expect a valid register");
 
-
   // Setup source operands.
   MachineOperand &LoOperand = IsI1Loreg ? I1.getOperand(1) : I2.getOperand(1);
   MachineOperand &HiOperand = IsI1Loreg ? I2.getOperand(1) : I1.getOperand(1);
@@ -605,7 +633,7 @@ void HexagonCopyToCombine::combine(MachineInstr &I1, MachineInstr &I2,
     for (auto NewMI : DbgMItoMove) {
       // If iterator MI is pointing to DEBUG_VAL, make sure
       // MI now points to next relevant instruction.
-      if (NewMI == (MachineInstr*)MI)
+      if (NewMI == MI)
         ++MI;
       BB->splice(InsertPt, BB, NewMI);
     }
@@ -628,8 +656,7 @@ void HexagonCopyToCombine::emitConst64(MachineBasicBlock::iterator &InsertPt,
 
   int64_t V = HiOperand.getImm();
   V = (V << 32) | (0x0ffffffffLL & LoOperand.getImm());
-  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64_Int_Real),
-    DoubleDestReg)
+  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::CONST64), DoubleDestReg)
     .addImm(V);
 }
 
@@ -838,7 +865,19 @@ void HexagonCopyToCombine::emitCombineRR(MachineBasicBlock::iterator &InsertPt,
 
   // Insert new combine instruction.
   //  DoubleRegDest = combine HiReg, LoReg
-  BuildMI(*BB, InsertPt, DL, TII->get(Hexagon::A2_combinew), DoubleDestReg)
+  unsigned NewOpc;
+  if (Hexagon::DoubleRegsRegClass.contains(DoubleDestReg)) {
+    NewOpc = Hexagon::A2_combinew;
+  } else if (Hexagon::VecDblRegsRegClass.contains(DoubleDestReg)) {
+    assert(ST->useHVXOps());
+    if (ST->useHVXSglOps())
+      NewOpc = Hexagon::V6_vcombine;
+    else
+      NewOpc = Hexagon::V6_vcombine_128B;
+  } else
+    llvm_unreachable("Unexpected register");
+
+  BuildMI(*BB, InsertPt, DL, TII->get(NewOpc), DoubleDestReg)
     .addReg(HiReg, HiRegKillFlag)
     .addReg(LoReg, LoRegKillFlag);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
index 2665acd19fb1..a5351cd08da5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonEarlyIfConv.cpp
@@ -52,7 +52,7 @@
 //         %vreg41<def> = S2_tstbit_i %vreg40<kill>, 0
 // spec->  %vreg11<def> = A2_addp %vreg6, %vreg10
 // pred->  S2_pstorerdf_io %vreg41, %vreg32, 16, %vreg11
-//         %vreg46<def> = MUX64_rr %vreg41, %vreg6, %vreg11
+//         %vreg46<def> = PS_pselect %vreg41, %vreg6, %vreg11
 //         %vreg13<def> = A2_addp %vreg7, %vreg46
 //         %vreg42<def> = C2_cmpeqi %vreg9, 10
 //         J2_jumpf %vreg42<kill>, <BB#3>, %PC<imp-def,dead>
@@ -61,32 +61,46 @@
 
 #define DEBUG_TYPE "hexagon-eif"
 
+#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "HexagonTargetMachine.h"
-
-#include <functional>
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <iterator>
 
 using namespace llvm;
 
 namespace llvm {
+
   FunctionPass *createHexagonEarlyIfConversion();
   void initializeHexagonEarlyIfConversionPass(PassRegistry& Registry);
-}
+
+} // end namespace llvm
 
 namespace {
+
   cl::opt<bool> EnableHexagonBP("enable-hexagon-br-prob", cl::Hidden,
     cl::init(false), cl::desc("Enable branch probability info"));
   cl::opt<unsigned> SizeLimit("eif-limit", cl::init(6), cl::Hidden,
@@ -103,18 +117,22 @@ namespace {
   }
 
   struct FlowPattern {
-    FlowPattern() : SplitB(0), TrueB(0), FalseB(0), JoinB(0), PredR(0) {}
+    FlowPattern() = default;
     FlowPattern(MachineBasicBlock *B, unsigned PR, MachineBasicBlock *TB,
           MachineBasicBlock *FB, MachineBasicBlock *JB)
       : SplitB(B), TrueB(TB), FalseB(FB), JoinB(JB), PredR(PR) {}
 
-    MachineBasicBlock *SplitB;
-    MachineBasicBlock *TrueB, *FalseB, *JoinB;
-    unsigned PredR;
+    MachineBasicBlock *SplitB = nullptr;
+    MachineBasicBlock *TrueB = nullptr;
+    MachineBasicBlock *FalseB = nullptr;
+    MachineBasicBlock *JoinB = nullptr;
+    unsigned PredR = 0;
   };
+
   struct PrintFP {
     PrintFP(const FlowPattern &P, const TargetRegisterInfo &T)
       : FP(P), TRI(T) {}
+
     const FlowPattern &FP;
     const TargetRegisterInfo &TRI;
     friend raw_ostream &operator<< (raw_ostream &OS, const PrintFP &P);
@@ -133,13 +151,17 @@ namespace {
   class HexagonEarlyIfConversion : public MachineFunctionPass {
   public:
     static char ID;
+
     HexagonEarlyIfConversion() : MachineFunctionPass(ID),
-        TII(0), TRI(0), MFN(0), MRI(0), MDT(0), MLI(0) {
+        HII(nullptr), TRI(nullptr), MFN(nullptr), MRI(nullptr), MDT(nullptr),
+        MLI(nullptr) {
       initializeHexagonEarlyIfConversionPass(*PassRegistry::getPassRegistry());
     }
-    const char *getPassName() const override {
+
+    StringRef getPassName() const override {
       return "Hexagon early if conversion";
     }
+
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineBranchProbabilityInfo>();
       AU.addRequired<MachineDominatorTree>();
@@ -147,6 +169,7 @@ namespace {
       AU.addRequired<MachineLoopInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
+
     bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
@@ -185,7 +208,7 @@ namespace {
     void mergeBlocks(MachineBasicBlock *PredB, MachineBasicBlock *SuccB);
     void simplifyFlowGraph(const FlowPattern &FP);
 
-    const TargetInstrInfo *TII;
+    const HexagonInstrInfo *HII;
     const TargetRegisterInfo *TRI;
     MachineFunction *MFN;
     MachineRegisterInfo *MRI;
@@ -196,7 +219,8 @@ namespace {
   };
 
   char HexagonEarlyIfConversion::ID = 0;
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS(HexagonEarlyIfConversion, "hexagon-eif",
   "Hexagon early if conversion", false, false)
@@ -209,7 +233,6 @@ bool HexagonEarlyIfConversion::isPreheader(const MachineBasicBlock *B) const {
   return L && SB == L->getHeader();
 }
 
-
 bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
     MachineLoop *L, FlowPattern &FP) {
   DEBUG(dbgs() << "Checking flow pattern at BB#" << B->getNumber() << "\n");
@@ -217,7 +240,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
   // Interested only in conditional branches, no .new, no new-value, etc.
   // Check the terminators directly, it's easier than handling all responses
   // from AnalyzeBranch.
-  MachineBasicBlock *TB = 0, *FB = 0;
+  MachineBasicBlock *TB = nullptr, *FB = nullptr;
   MachineBasicBlock::const_iterator T1I = B->getFirstTerminator();
   if (T1I == B->end())
     return false;
@@ -228,7 +251,7 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
 
   // Get the layout successor, or 0 if B does not have one.
   MachineFunction::iterator NextBI = std::next(MachineFunction::iterator(B));
-  MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : 0;
+  MachineBasicBlock *NextB = (NextBI != MFN->end()) ? &*NextBI : nullptr;
 
   MachineBasicBlock *T1B = T1I->getOperand(1).getMBB();
   MachineBasicBlock::const_iterator T2I = std::next(T1I);
@@ -273,9 +296,9 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
   if (!TOk && !FOk)
     return false;
 
-  MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : 0;
-  MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : 0;
-  MachineBasicBlock *JB = 0;
+  MachineBasicBlock *TSB = (TNS > 0) ? *TB->succ_begin() : nullptr;
+  MachineBasicBlock *FSB = (FNS > 0) ? *FB->succ_begin() : nullptr;
+  MachineBasicBlock *JB = nullptr;
 
   if (TOk) {
     if (FOk) {
@@ -286,14 +309,14 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
       // TOk && !FOk
       if (TSB == FB) {
         JB = FB;
-        FB = 0;
+        FB = nullptr;
       }
     }
   } else {
     // !TOk && FOk  (at least one must be true by now).
     if (FSB == TB) {
       JB = TB;
-      TB = 0;
+      TB = nullptr;
     }
   }
   // Don't try to predicate loop preheaders.
@@ -308,7 +331,6 @@ bool HexagonEarlyIfConversion::matchFlowPattern(MachineBasicBlock *B,
   return true;
 }
 
-
 // KLUDGE: HexagonInstrInfo::AnalyzeBranch won't work on a block that
 // contains EH_LABEL.
 bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
@@ -318,7 +340,6 @@ bool HexagonEarlyIfConversion::hasEHLabel(const MachineBasicBlock *B) const {
   return false;
 }
 
-
 // KLUDGE: HexagonInstrInfo::AnalyzeBranch may be unable to recognize
 // that a block can never fall-through.
 bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
@@ -332,7 +353,6 @@ bool HexagonEarlyIfConversion::hasUncondBranch(const MachineBasicBlock *B)
   return false;
 }
 
-
 bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
       const {
   if (!B)
@@ -357,10 +377,10 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
     // update the use of it after predication). PHI uses will be updated
     // to use a result of a MUX, and a MUX cannot be created for predicate
     // registers.
-    for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
-      if (!MO->isReg() || !MO->isDef())
+    for (const MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg() || !MO.isDef())
         continue;
-      unsigned R = MO->getReg();
+      unsigned R = MO.getReg();
       if (!TargetRegisterInfo::isVirtualRegister(R))
         continue;
       if (MRI->getRegClass(R) != &Hexagon::PredRegsRegClass)
@@ -373,12 +393,11 @@ bool HexagonEarlyIfConversion::isValidCandidate(const MachineBasicBlock *B)
   return true;
 }
 
-
 bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
-  for (ConstMIOperands MO(*MI); MO.isValid(); ++MO) {
-    if (!MO->isReg() || !MO->isUse())
+  for (const MachineOperand &MO : MI->operands()) {
+    if (!MO.isReg() || !MO.isUse())
       continue;
-    unsigned R = MO->getReg();
+    unsigned R = MO.getReg();
     if (!TargetRegisterInfo::isVirtualRegister(R))
       continue;
     const MachineInstr *DefI = MRI->getVRegDef(R);
@@ -390,7 +409,6 @@ bool HexagonEarlyIfConversion::usesUndefVReg(const MachineInstr *MI) const {
   return false;
 }
 
-
 bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
   if (hasEHLabel(FP.SplitB))  // KLUDGE: see function definition
     return false;
@@ -424,7 +442,6 @@ bool HexagonEarlyIfConversion::isValid(const FlowPattern &FP) const {
   return true;
 }
 
-
 unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
   assert(B->pred_size() <= 2);
   if (B->pred_size() < 2)
@@ -443,21 +460,20 @@ unsigned HexagonEarlyIfConversion::computePhiCost(MachineBasicBlock *B) const {
     }
     MachineInstr *Def1 = MRI->getVRegDef(RO1.getReg());
     MachineInstr *Def3 = MRI->getVRegDef(RO3.getReg());
-    if (!TII->isPredicable(*Def1) || !TII->isPredicable(*Def3))
+    if (!HII->isPredicable(*Def1) || !HII->isPredicable(*Def3))
       Cost++;
   }
   return Cost;
 }
 
-
 unsigned HexagonEarlyIfConversion::countPredicateDefs(
       const MachineBasicBlock *B) const {
   unsigned PredDefs = 0;
   for (auto &MI : *B) {
-    for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
-      if (!MO->isReg() || !MO->isDef())
+    for (const MachineOperand &MO : MI.operands()) {
+      if (!MO.isReg() || !MO.isDef())
         continue;
-      unsigned R = MO->getReg();
+      unsigned R = MO.getReg();
       if (!TargetRegisterInfo::isVirtualRegister(R))
         continue;
       if (MRI->getRegClass(R) == &Hexagon::PredRegsRegClass)
@@ -467,7 +483,6 @@ unsigned HexagonEarlyIfConversion::countPredicateDefs(
   return PredDefs;
 }
 
-
 bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
   if (FP.TrueB && FP.FalseB) {
 
@@ -547,7 +562,6 @@ bool HexagonEarlyIfConversion::isProfitable(const FlowPattern &FP) const {
   return true;
 }
 
-
 bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
       MachineLoop *L) {
   bool Changed = false;
@@ -593,9 +607,8 @@ bool HexagonEarlyIfConversion::visitBlock(MachineBasicBlock *B,
   return true;
 }
 
-
 bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
-  MachineBasicBlock *HB = L ? L->getHeader() : 0;
+  MachineBasicBlock *HB = L ? L->getHeader() : nullptr;
   DEBUG((L ? dbgs() << "Visiting loop H:" << PrintMB(HB)
            : dbgs() << "Visiting function") << "\n");
   bool Changed = false;
@@ -609,34 +622,29 @@ bool HexagonEarlyIfConversion::visitLoop(MachineLoop *L) {
   return Changed;
 }
 
-
 bool HexagonEarlyIfConversion::isPredicableStore(const MachineInstr *MI)
       const {
-  // Exclude post-increment stores. Those return a value, so we cannot
-  // predicate them.
+  // HexagonInstrInfo::isPredicable will consider these stores are non-
+  // -predicable if the offset would become constant-extended after
+  // predication.
   unsigned Opc = MI->getOpcode();
-  using namespace Hexagon;
   switch (Opc) {
-    // Store byte:
-    case S2_storerb_io: case S4_storerb_rr:
-    case S2_storerbabs: case S4_storeirb_io:  case S2_storerbgp:
-    // Store halfword:
-    case S2_storerh_io: case S4_storerh_rr:
-    case S2_storerhabs: case S4_storeirh_io:  case S2_storerhgp:
-    // Store upper halfword:
-    case S2_storerf_io: case S4_storerf_rr:
-    case S2_storerfabs: case S2_storerfgp:
-    // Store word:
-    case S2_storeri_io: case S4_storeri_rr:
-    case S2_storeriabs: case S4_storeiri_io:  case S2_storerigp:
-    // Store doubleword:
-    case S2_storerd_io: case S4_storerd_rr:
-    case S2_storerdabs: case S2_storerdgp:
+    case Hexagon::S2_storerb_io:
+    case Hexagon::S2_storerbnew_io:
+    case Hexagon::S2_storerh_io:
+    case Hexagon::S2_storerhnew_io:
+    case Hexagon::S2_storeri_io:
+    case Hexagon::S2_storerinew_io:
+    case Hexagon::S2_storerd_io:
+    case Hexagon::S4_storeirb_io:
+    case Hexagon::S4_storeirh_io:
+    case Hexagon::S4_storeiri_io:
       return true;
   }
-  return false;
-}
 
+  // TargetInstrInfo::isPredicable takes a non-const pointer.
+  return MI->mayStore() && HII->isPredicable(const_cast<MachineInstr&>(*MI));
+}
 
 bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
       const {
@@ -650,59 +658,11 @@ bool HexagonEarlyIfConversion::isSafeToSpeculate(const MachineInstr *MI)
   return true;
 }
 
-
 unsigned HexagonEarlyIfConversion::getCondStoreOpcode(unsigned Opc,
       bool IfTrue) const {
-  // Exclude post-increment stores.
-  using namespace Hexagon;
-  switch (Opc) {
-    case S2_storerb_io:
-      return IfTrue ? S2_pstorerbt_io : S2_pstorerbf_io;
-    case S4_storerb_rr:
-      return IfTrue ? S4_pstorerbt_rr : S4_pstorerbf_rr;
-    case S2_storerbabs:
-    case S2_storerbgp:
-      return IfTrue ? S4_pstorerbt_abs : S4_pstorerbf_abs;
-    case S4_storeirb_io:
-      return IfTrue ? S4_storeirbt_io : S4_storeirbf_io;
-    case S2_storerh_io:
-      return IfTrue ? S2_pstorerht_io : S2_pstorerhf_io;
-    case S4_storerh_rr:
-      return IfTrue ? S4_pstorerht_rr : S4_pstorerhf_rr;
-    case S2_storerhabs:
-    case S2_storerhgp:
-      return IfTrue ? S4_pstorerht_abs : S4_pstorerhf_abs;
-    case S2_storerf_io:
-      return IfTrue ? S2_pstorerft_io : S2_pstorerff_io;
-    case S4_storerf_rr:
-      return IfTrue ? S4_pstorerft_rr : S4_pstorerff_rr;
-    case S2_storerfabs:
-    case S2_storerfgp:
-      return IfTrue ? S4_pstorerft_abs : S4_pstorerff_abs;
-    case S4_storeirh_io:
-      return IfTrue ? S4_storeirht_io : S4_storeirhf_io;
-    case S2_storeri_io:
-      return IfTrue ? S2_pstorerit_io : S2_pstorerif_io;
-    case S4_storeri_rr:
-      return IfTrue ? S4_pstorerit_rr : S4_pstorerif_rr;
-    case S2_storeriabs:
-    case S2_storerigp:
-      return IfTrue ? S4_pstorerit_abs : S4_pstorerif_abs;
-    case S4_storeiri_io:
-      return IfTrue ? S4_storeirit_io : S4_storeirif_io;
-    case S2_storerd_io:
-      return IfTrue ? S2_pstorerdt_io : S2_pstorerdf_io;
-    case S4_storerd_rr:
-      return IfTrue ? S4_pstorerdt_rr : S4_pstorerdf_rr;
-    case S2_storerdabs:
-    case S2_storerdgp:
-      return IfTrue ? S4_pstorerdt_abs : S4_pstorerdf_abs;
-  }
-  llvm_unreachable("Unexpected opcode");
-  return 0;
+  return HII->getCondOpcode(Opc, !IfTrue);
 }
 
-
 void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
       MachineBasicBlock::iterator At, MachineInstr *MI,
       unsigned PredR, bool IfTrue) {
@@ -717,10 +677,15 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
   if (isPredicableStore(MI)) {
     unsigned COpc = getCondStoreOpcode(Opc, IfTrue);
     assert(COpc);
-    MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, TII->get(COpc))
-      .addReg(PredR);
-    for (MIOperands MO(*MI); MO.isValid(); ++MO)
-      MIB.addOperand(*MO);
+    MachineInstrBuilder MIB = BuildMI(*ToB, At, DL, HII->get(COpc));
+    MachineInstr::mop_iterator MOI = MI->operands_begin();
+    if (HII->isPostIncrement(*MI)) {
+      MIB.addOperand(*MOI);
+      ++MOI;
+    }
+    MIB.addReg(PredR);
+    for (const MachineOperand &MO : make_range(MOI, MI->operands_end()))
+      MIB.addOperand(MO);
 
     // Set memory references.
     MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin();
@@ -733,7 +698,7 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
 
   if (Opc == Hexagon::J2_jump) {
     MachineBasicBlock *TB = MI->getOperand(0).getMBB();
-    const MCInstrDesc &D = TII->get(IfTrue ? Hexagon::J2_jumpt
+    const MCInstrDesc &D = HII->get(IfTrue ? Hexagon::J2_jumpt
                                            : Hexagon::J2_jumpf);
     BuildMI(*ToB, At, DL, D)
       .addReg(PredR)
@@ -748,7 +713,6 @@ void HexagonEarlyIfConversion::predicateInstr(MachineBasicBlock *ToB,
   llvm_unreachable("Unexpected instruction");
 }
 
-
 // Predicate/speculate non-branch instructions from FromB into block ToB.
 // Leave the branches alone, they will be handled later. Btw, at this point
 // FromB should have at most one branch, and it should be unconditional.
@@ -769,7 +733,6 @@ void HexagonEarlyIfConversion::predicateBlockNB(MachineBasicBlock *ToB,
   }
 }
 
-
 void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
       const FlowPattern &FP) {
   // Visit all PHI nodes in the WhereB block and generate MUX instructions
@@ -799,10 +762,25 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
     assert(TR && FR);
 
     using namespace Hexagon;
+
     unsigned DR = PN->getOperand(0).getReg();
     const TargetRegisterClass *RC = MRI->getRegClass(DR);
-    const MCInstrDesc &D = RC == &IntRegsRegClass ? TII->get(C2_mux)
-                                                  : TII->get(MUX64_rr);
+    unsigned Opc = 0;
+    if (RC == &IntRegsRegClass)
+      Opc = C2_mux;
+    else if (RC == &DoubleRegsRegClass)
+      Opc = PS_pselect;
+    else if (RC == &VectorRegsRegClass)
+      Opc = PS_vselect;
+    else if (RC == &VecDblRegsRegClass)
+      Opc = PS_wselect;
+    else if (RC == &VectorRegs128BRegClass)
+      Opc = PS_vselect_128B;
+    else if (RC == &VecDblRegs128BRegClass)
+      Opc = PS_wselect_128B;
+    else
+      llvm_unreachable("unexpected register type");
+    const MCInstrDesc &D = HII->get(Opc);
 
     MachineBasicBlock::iterator MuxAt = FP.SplitB->getFirstTerminator();
     DebugLoc DL;
@@ -819,9 +797,8 @@ void HexagonEarlyIfConversion::updatePhiNodes(MachineBasicBlock *WhereB,
   }
 }
 
-
 void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
-  MachineBasicBlock *TSB = 0, *FSB = 0;
+  MachineBasicBlock *TSB = nullptr, *FSB = nullptr;
   MachineBasicBlock::iterator OldTI = FP.SplitB->getFirstTerminator();
   assert(OldTI != FP.SplitB->end());
   DebugLoc DL = OldTI->getDebugLoc();
@@ -839,7 +816,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
   // Regenerate new terminators in the split block and update the successors.
   // First, remember any information that may be needed later and remove the
   // existing terminators/successors from the split block.
-  MachineBasicBlock *SSB = 0;
+  MachineBasicBlock *SSB = nullptr;
   FP.SplitB->erase(OldTI, FP.SplitB->end());
   while (FP.SplitB->succ_size() > 0) {
     MachineBasicBlock *T = *FP.SplitB->succ_begin();
@@ -870,21 +847,21 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
   // generated.
   if (FP.JoinB) {
     assert(!SSB || SSB == FP.JoinB);
-    BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+    BuildMI(*FP.SplitB, FP.SplitB->end(), DL, HII->get(Hexagon::J2_jump))
       .addMBB(FP.JoinB);
     FP.SplitB->addSuccessor(FP.JoinB);
   } else {
     bool HasBranch = false;
     if (TSB) {
-      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jumpt))
+      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, HII->get(Hexagon::J2_jumpt))
         .addReg(FP.PredR)
         .addMBB(TSB);
       FP.SplitB->addSuccessor(TSB);
       HasBranch = true;
     }
     if (FSB) {
-      const MCInstrDesc &D = HasBranch ? TII->get(Hexagon::J2_jump)
-                                       : TII->get(Hexagon::J2_jumpf);
+      const MCInstrDesc &D = HasBranch ? HII->get(Hexagon::J2_jump)
+                                       : HII->get(Hexagon::J2_jumpf);
       MachineInstrBuilder MIB = BuildMI(*FP.SplitB, FP.SplitB->end(), DL, D);
       if (!HasBranch)
         MIB.addReg(FP.PredR);
@@ -896,7 +873,7 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
       // successor blocks of the TrueB and FalseB (or null of the TrueB
       // or FalseB block is null). SSB is the potential successor block
       // of the SplitB that is neither TrueB nor FalseB.
-      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, TII->get(Hexagon::J2_jump))
+      BuildMI(*FP.SplitB, FP.SplitB->end(), DL, HII->get(Hexagon::J2_jump))
         .addMBB(SSB);
       FP.SplitB->addSuccessor(SSB);
     }
@@ -915,7 +892,6 @@ void HexagonEarlyIfConversion::convert(const FlowPattern &FP) {
   }
 }
 
-
 void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
   DEBUG(dbgs() << "Removing block " << PrintMB(B) << "\n");
 
@@ -944,7 +920,6 @@ void HexagonEarlyIfConversion::removeBlock(MachineBasicBlock *B) {
   MFN->erase(B->getIterator());
 }
 
-
 void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
   DEBUG(dbgs() << "Removing phi nodes from block " << PrintMB(B) << "\n");
   MachineBasicBlock::iterator I, NextI, NonPHI = B->getFirstNonPHI();
@@ -963,7 +938,7 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
       const DebugLoc &DL = PN->getDebugLoc();
       const TargetRegisterClass *RC = MRI->getRegClass(DefR);
       NewR = MRI->createVirtualRegister(RC);
-      NonPHI = BuildMI(*B, NonPHI, DL, TII->get(TargetOpcode::COPY), NewR)
+      NonPHI = BuildMI(*B, NonPHI, DL, HII->get(TargetOpcode::COPY), NewR)
         .addReg(UseR, 0, UseSR);
     }
     MRI->replaceRegWith(DefR, NewR);
@@ -971,7 +946,6 @@ void HexagonEarlyIfConversion::eliminatePhis(MachineBasicBlock *B) {
   }
 }
 
-
 void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
       MachineBasicBlock *NewB) {
   for (auto I = OldB->succ_begin(), E = OldB->succ_end(); I != E; ++I) {
@@ -979,21 +953,20 @@ void HexagonEarlyIfConversion::replacePhiEdges(MachineBasicBlock *OldB,
     MachineBasicBlock::iterator P, N = SB->getFirstNonPHI();
     for (P = SB->begin(); P != N; ++P) {
       MachineInstr &PN = *P;
-      for (MIOperands MO(PN); MO.isValid(); ++MO)
-        if (MO->isMBB() && MO->getMBB() == OldB)
-          MO->setMBB(NewB);
+      for (MachineOperand &MO : PN.operands())
+        if (MO.isMBB() && MO.getMBB() == OldB)
+          MO.setMBB(NewB);
     }
   }
 }
 
-
 void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
       MachineBasicBlock *SuccB) {
   DEBUG(dbgs() << "Merging blocks " << PrintMB(PredB) << " and "
                << PrintMB(SuccB) << "\n");
   bool TermOk = hasUncondBranch(SuccB);
   eliminatePhis(SuccB);
-  TII->RemoveBranch(*PredB);
+  HII->removeBranch(*PredB);
   PredB->removeSuccessor(SuccB);
   PredB->splice(PredB->end(), SuccB, SuccB->begin(), SuccB->end());
   MachineBasicBlock::succ_iterator I, E = SuccB->succ_end();
@@ -1006,7 +979,6 @@ void HexagonEarlyIfConversion::mergeBlocks(MachineBasicBlock *PredB,
     PredB->updateTerminator();
 }
 
-
 void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
   if (FP.TrueB)
     removeBlock(FP.TrueB);
@@ -1030,13 +1002,12 @@ void HexagonEarlyIfConversion::simplifyFlowGraph(const FlowPattern &FP) {
     mergeBlocks(FP.SplitB, SB);
 }
 
-
 bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
 
-  auto &ST = MF.getSubtarget();
-  TII = ST.getInstrInfo();
+  auto &ST = MF.getSubtarget<HexagonSubtarget>();
+  HII = ST.getInstrInfo();
   TRI = ST.getRegisterInfo();
   MFN = &MF;
   MRI = &MF.getRegInfo();
@@ -1050,7 +1021,7 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
 
   for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I)
     Changed |= visitLoop(*I);
-  Changed |= visitLoop(0);
+  Changed |= visitLoop(nullptr);
 
   return Changed;
 }
@@ -1061,4 +1032,3 @@ bool HexagonEarlyIfConversion::runOnMachineFunction(MachineFunction &MF) {
 FunctionPass *llvm::createHexagonEarlyIfConversion() {
   return new HexagonEarlyIfConversion();
 }
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
index bd5bb9cbc235..8f070d842b8c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonExpandCondsets.cpp
@@ -85,78 +85,33 @@
 // implicit uses will be added later, after predication. The extra price,
 // however, is that finding the locations where the implicit uses need
 // to be added, and updating the live ranges will be more involved.
-//
-// An additional problem appears when subregister liveness tracking is
-// enabled. In such a scenario, the live interval for the super-register
-// will have live ranges for each subregister (i.e. subranges). This sub-
-// range contains all liveness information about the subregister, except
-// for one case: a "read-undef" flag from another subregister will not
-// be reflected: given
-//   vreg1:subreg_hireg<def,read-undef> = ...  ; "undefines" subreg_loreg
-// the subrange for subreg_loreg will not have any indication that it is
-// undefined at this point. Calculating subregister liveness based only
-// on the information from the subrange may create a segment which spans
-// over such a "read-undef" flag. This would create inconsistencies in
-// the liveness data, resulting in assertions or incorrect code.
-// Example:
-//   vreg1:subreg_loreg<def> = ...
-//   vreg1:subreg_hireg<def, read-undef> = ... ; "undefines" subreg_loreg
-//   ...
-//   vreg1:subreg_loreg<def> = A2_tfrt ...     ; may end up with imp-use
-//                                             ; of subreg_loreg
-// The remedy takes advantage of the fact, that at this point we have
-// an unconditional definition of the subregister. What this means is
-// that any preceding value in this subregister will be overwritten,
-// or in other words, the last use before this def is a kill. This also
-// implies that the first of the predicated transfers at this location
-// should not have any implicit uses.
-// Assume for a moment that no part of the corresponding super-register
-// is used as a source. In such case, the entire super-register can be
-// considered undefined immediately before this instruction. Because of
-// that, we can insert an IMPLICIT_DEF of the super-register at this
-// location, which will cause it to be reflected in all the associated
-// subranges. What is important here is that if an IMPLICIT_DEF of
-// subreg_loreg was used, we would lose the indication that subreg_hireg
-// is also considered undefined. This could lead to having implicit uses
-// incorrectly added.
-//
-// What is left is the two cases when the super-register is used as a
-// source.
-// * Case 1: the used part is the same as the one that is defined:
-//   vreg1<def> = ...
-//   ...
-//   vreg1:subreg_loreg<def,read-undef> = C2_mux ..., vreg1:subreg_loreg
-// In the end, the subreg_loreg should be marked as live at the point of
-// the splitting:
-//   vreg1:subreg_loreg<def,read-undef> = A2_tfrt ; should have imp-use
-//   vreg1:subreg_loreg<def,read-undef> = A2_tfrf ; should have imp-use
-// Hence, an IMPLICIT_DEF of only vreg1:subreg_hireg would be sufficient.
-// * Case 2: the used part does not overlap the part being defined:
-//   vreg1<def> = ...
-//   ...
-//   vreg1:subreg_loreg<def,read-undef> = C2_mux ..., vreg1:subreg_hireg
-// For this case, we insert an IMPLICIT_DEF of vreg1:subreg_hireg after
-// the C2_mux.
 
 #define DEBUG_TYPE "expand-condsets"
 
-#include "HexagonTargetMachine.h"
+#include "HexagonInstrInfo.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SetVector.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/LiveInterval.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-
-#include <algorithm>
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
 #include <iterator>
 #include <set>
 #include <utility>
@@ -169,17 +124,21 @@ static cl::opt<unsigned> OptCoaLimit("expand-condsets-coa-limit",
   cl::init(~0U), cl::Hidden, cl::desc("Max number of segment coalescings"));
 
 namespace llvm {
+
   void initializeHexagonExpandCondsetsPass(PassRegistry&);
   FunctionPass *createHexagonExpandCondsets();
-}
+
+} // end namespace llvm
 
 namespace {
+
   class HexagonExpandCondsets : public MachineFunctionPass {
   public:
     static char ID;
+
     HexagonExpandCondsets() :
-        MachineFunctionPass(ID), HII(0), TRI(0), MRI(0),
-        LIS(0), CoaLimitActive(false),
+        MachineFunctionPass(ID), HII(nullptr), TRI(nullptr), MRI(nullptr),
+        LIS(nullptr), CoaLimitActive(false),
         TfrLimitActive(false), CoaCounter(0), TfrCounter(0) {
       if (OptCoaLimit.getPosition())
         CoaLimitActive = true, CoaLimit = OptCoaLimit;
@@ -188,9 +147,8 @@ namespace {
       initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
     }
 
-    const char *getPassName() const override {
-      return "Hexagon Expand Condsets";
-    }
+    StringRef getPassName() const override { return "Hexagon Expand Condsets"; }
+
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<LiveIntervals>();
       AU.addPreserved<LiveIntervals>();
@@ -199,6 +157,7 @@ namespace {
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
+
     bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
@@ -207,7 +166,6 @@ namespace {
     MachineDominatorTree *MDT;
     MachineRegisterInfo *MRI;
     LiveIntervals *LIS;
-    std::set<MachineInstr*> LocalImpDefs;
 
     bool CoaLimitActive, TfrLimitActive;
     unsigned CoaLimit, TfrLimit, CoaCounter, TfrCounter;
@@ -216,6 +174,7 @@ namespace {
       RegisterRef(const MachineOperand &Op) : Reg(Op.getReg()),
           Sub(Op.getSubReg()) {}
       RegisterRef(unsigned R = 0, unsigned S = 0) : Reg(R), Sub(S) {}
+
       bool operator== (RegisterRef RR) const {
         return Reg == RR.Reg && Sub == RR.Sub;
       }
@@ -223,6 +182,7 @@ namespace {
       bool operator< (RegisterRef RR) const {
         return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
       }
+
       unsigned Reg, Sub;
     };
 
@@ -236,7 +196,6 @@ namespace {
     void addRefToMap(RegisterRef RR, ReferenceMap &Map, unsigned Exec);
     bool isRefInMap(RegisterRef, ReferenceMap &Map, unsigned Exec);
 
-    void removeImpDefSegments(LiveRange &Range);
     void updateDeadsInRange(unsigned Reg, LaneBitmask LM, LiveRange &Range);
     void updateKillFlags(unsigned Reg);
     void updateDeadFlags(unsigned Reg);
@@ -251,7 +210,6 @@ namespace {
         unsigned DstSR, const MachineOperand &PredOp, bool PredSense,
         bool ReadUndef, bool ImpUse);
     bool split(MachineInstr &MI, std::set<unsigned> &UpdRegs);
-    bool splitInBlock(MachineBasicBlock &B, std::set<unsigned> &UpdRegs);
 
     bool isPredicable(MachineInstr *MI);
     MachineInstr *getReachingDefForPred(RegisterRef RD,
@@ -272,12 +230,20 @@ namespace {
     bool isIntReg(RegisterRef RR, unsigned &BW);
     bool isIntraBlocks(LiveInterval &LI);
     bool coalesceRegisters(RegisterRef R1, RegisterRef R2);
-    bool coalesceSegments(MachineFunction &MF);
+    bool coalesceSegments(const SmallVectorImpl<MachineInstr*> &Condsets,
+                          std::set<unsigned> &UpdRegs);
   };
-}
+
+} // end anonymous namespace
 
 char HexagonExpandCondsets::ID = 0;
 
+namespace llvm {
+
+  char &HexagonExpandCondsetsID = HexagonExpandCondsets::ID;
+
+} // end namespace llvm
+
 INITIALIZE_PASS_BEGIN(HexagonExpandCondsets, "expand-condsets",
   "Hexagon Expand Condsets", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
@@ -288,9 +254,11 @@ INITIALIZE_PASS_END(HexagonExpandCondsets, "expand-condsets",
 
 unsigned HexagonExpandCondsets::getMaskForSub(unsigned Sub) {
   switch (Sub) {
-    case Hexagon::subreg_loreg:
+    case Hexagon::isub_lo:
+    case Hexagon::vsub_lo:
       return Sub_Low;
-    case Hexagon::subreg_hireg:
+    case Hexagon::isub_hi:
+    case Hexagon::vsub_hi:
       return Sub_High;
     case Hexagon::NoSubRegister:
       return Sub_None;
@@ -305,21 +273,19 @@ bool HexagonExpandCondsets::isCondset(const MachineInstr &MI) {
     case Hexagon::C2_muxii:
     case Hexagon::C2_muxir:
     case Hexagon::C2_muxri:
-    case Hexagon::MUX64_rr:
+    case Hexagon::PS_pselect:
         return true;
       break;
   }
   return false;
 }
 
-
 LaneBitmask HexagonExpandCondsets::getLaneMask(unsigned Reg, unsigned Sub) {
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
   return Sub != 0 ? TRI->getSubRegIndexLaneMask(Sub)
                   : MRI->getMaxLaneMaskForVReg(Reg);
 }
 
-
 void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
       unsigned Exec) {
   unsigned Mask = getMaskForSub(RR.Sub) | Exec;
@@ -330,7 +296,6 @@ void HexagonExpandCondsets::addRefToMap(RegisterRef RR, ReferenceMap &Map,
     F->second |= Mask;
 }
 
-
 bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
       unsigned Exec) {
   ReferenceMap::iterator F = Map.find(RR.Reg);
@@ -342,7 +307,6 @@ bool HexagonExpandCondsets::isRefInMap(RegisterRef RR, ReferenceMap &Map,
   return false;
 }
 
-
 void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
   auto KillAt = [this,Reg] (SlotIndex K, LaneBitmask LM) -> void {
     // Set the <kill> flag on a use of Reg whose lane mask is contained in LM.
@@ -392,16 +356,6 @@ void HexagonExpandCondsets::updateKillFlags(unsigned Reg) {
   }
 }
 
-
-void HexagonExpandCondsets::removeImpDefSegments(LiveRange &Range) {
-  auto StartImpDef = [this] (LiveRange::Segment &S) -> bool {
-    return S.start.isRegister() &&
-           LocalImpDefs.count(LIS->getInstructionFromIndex(S.start));
-  };
-  Range.segments.erase(std::remove_if(Range.begin(), Range.end(), StartImpDef),
-                       Range.end());
-}
-
 void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
       LiveRange &Range) {
   assert(TargetRegisterInfo::isVirtualRegister(Reg));
@@ -415,7 +369,7 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
     if (!TargetRegisterInfo::isVirtualRegister(DR) || DR != Reg)
       return false;
     LaneBitmask SLM = getLaneMask(DR, DSR);
-    return (SLM & LM) != 0;
+    return (SLM & LM).any();
   };
 
   // The splitting step will create pairs of predicated definitions without
@@ -425,7 +379,6 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
   // We need to identify predicated defs that need implicit uses, and
   // dead defs that are not really dead, and correct both problems.
 
-  SetVector<MachineBasicBlock*> Defs;
   auto Dominate = [this] (SetVector<MachineBasicBlock*> &Defs,
                           MachineBasicBlock *Dest) -> bool {
     for (MachineBasicBlock *D : Defs)
@@ -449,20 +402,25 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
   // First, try to extend live range within individual basic blocks. This
   // will leave us only with dead defs that do not reach any predicated
   // defs in the same block.
+  SetVector<MachineBasicBlock*> Defs;
   SmallVector<SlotIndex,4> PredDefs;
   for (auto &Seg : Range) {
     if (!Seg.start.isRegister())
       continue;
     MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
-    if (LocalImpDefs.count(DefI))
-      continue;
     Defs.insert(DefI->getParent());
     if (HII->isPredicated(*DefI))
       PredDefs.push_back(Seg.start);
   }
+
+  SmallVector<SlotIndex,8> Undefs;
+  LiveInterval &LI = LIS->getInterval(Reg);
+  LI.computeSubRangeUndefs(Undefs, LM, *MRI, *LIS->getSlotIndexes());
+
   for (auto &SI : PredDefs) {
     MachineBasicBlock *BB = LIS->getMBBFromIndex(SI);
-    if (Range.extendInBlock(LIS->getMBBStartIdx(BB), SI))
+    auto P = Range.extendInBlock(Undefs, LIS->getMBBStartIdx(BB), SI);
+    if (P.first != nullptr || P.second)
       SI = SlotIndex();
   }
 
@@ -476,10 +434,21 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
     if (BB->pred_empty())
       continue;
     // If the defs from this range reach SI via all predecessors, it is live.
+    // It can happen that SI is reached by the defs through some paths, but
+    // not all. In the IR coming into this optimization, SI would not be
+    // considered live, since the defs would then not jointly dominate SI.
+    // That means that SI is an overwriting def, and no implicit use is
+    // needed at this point. Do not add SI to the extension points, since
+    // extendToIndices will abort if there is no joint dominance.
+    // If the abort was avoided by adding extra undefs added to Undefs,
+    // extendToIndices could actually indicate that SI is live, contrary
+    // to the original IR.
     if (Dominate(Defs, BB))
       ExtTo.push_back(SI);
   }
-  LIS->extendToIndices(Range, ExtTo);
+
+  if (!ExtTo.empty())
+    LIS->extendToIndices(Range, ExtTo, Undefs);
 
   // Remove <dead> flags from all defs that are not dead after live range
   // extension, and collect all def operands. They will be used to generate
@@ -489,8 +458,6 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
     if (!Seg.start.isRegister())
       continue;
     MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
-    if (LocalImpDefs.count(DefI))
-      continue;
     for (auto &Op : DefI->operands()) {
       if (Seg.start.isDead() || !IsRegDef(Op))
         continue;
@@ -499,40 +466,34 @@ void HexagonExpandCondsets::updateDeadsInRange(unsigned Reg, LaneBitmask LM,
     }
   }
 
-
   // Finally, add implicit uses to each predicated def that is reached
-  // by other defs. Remove segments started by implicit-defs first, since
-  // they do not define registers.
-  removeImpDefSegments(Range);
-
+  // by other defs.
   for (auto &Seg : Range) {
     if (!Seg.start.isRegister() || !Range.liveAt(Seg.start.getPrevSlot()))
       continue;
     MachineInstr *DefI = LIS->getInstructionFromIndex(Seg.start);
     if (!HII->isPredicated(*DefI))
       continue;
-    MachineFunction &MF = *DefI->getParent()->getParent();
     // Construct the set of all necessary implicit uses, based on the def
     // operands in the instruction.
     std::set<RegisterRef> ImpUses;
     for (auto &Op : DefI->operands())
       if (Op.isReg() && Op.isDef() && DefRegs.count(Op))
         ImpUses.insert(Op);
+    if (ImpUses.empty())
+      continue;
+    MachineFunction &MF = *DefI->getParent()->getParent();
     for (RegisterRef R : ImpUses)
       MachineInstrBuilder(MF, DefI).addReg(R.Reg, RegState::Implicit, R.Sub);
   }
 }
 
-
 void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
   LiveInterval &LI = LIS->getInterval(Reg);
   if (LI.hasSubRanges()) {
     for (LiveInterval::SubRange &S : LI.subranges()) {
       updateDeadsInRange(Reg, S.LaneMask, S);
       LIS->shrinkToUses(S, Reg);
-      // LI::shrinkToUses will add segments started by implicit-defs.
-      // Remove them again.
-      removeImpDefSegments(S);
     }
     LI.clear();
     LIS->constructMainRangeFromSubranges(LI);
@@ -541,7 +502,6 @@ void HexagonExpandCondsets::updateDeadFlags(unsigned Reg) {
   }
 }
 
-
 void HexagonExpandCondsets::recalculateLiveInterval(unsigned Reg) {
   LIS->removeInterval(Reg);
   LIS->createAndComputeVirtRegInterval(Reg);
@@ -552,7 +512,6 @@ void HexagonExpandCondsets::removeInstr(MachineInstr &MI) {
   MI.eraseFromParent();
 }
 
-
 void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
       bool Recalc, bool UpdateKills, bool UpdateDeads) {
   UpdateKills |= UpdateDeads;
@@ -571,12 +530,12 @@ void HexagonExpandCondsets::updateLiveness(std::set<unsigned> &RegSet,
   }
 }
 
-
 /// Get the opcode for a conditional transfer of the value in SO (source
 /// operand). The condition (true/false) is given in Cond.
 unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
       bool IfTrue) {
   using namespace Hexagon;
+
   if (SO.isReg()) {
     unsigned PhysR;
     RegisterRef RS = SO;
@@ -603,7 +562,6 @@ unsigned HexagonExpandCondsets::getCondTfrOpcode(const MachineOperand &SO,
   llvm_unreachable("Unexpected source operand");
 }
 
-
 /// Generate a conditional transfer, copying the value SrcOp to the
 /// destination register DstR:DstSR, and using the predicate register from
 /// PredOp. The Cond argument specifies whether the predicate is to be
@@ -623,19 +581,29 @@ MachineInstr *HexagonExpandCondsets::genCondTfrFor(MachineOperand &SrcOp,
   /// predicate.
 
   unsigned Opc = getCondTfrOpcode(SrcOp, PredSense);
-  unsigned State = RegState::Define | (ReadUndef ? RegState::Undef : 0);
-  MachineInstrBuilder MIB = BuildMI(B, At, DL, HII->get(Opc))
-        .addReg(DstR, State, DstSR)
-        .addOperand(PredOp)
-        .addOperand(SrcOp);
-
-  // We don't want any kills yet.
-  MIB->clearKillInfo();
+  unsigned DstState = RegState::Define | (ReadUndef ? RegState::Undef : 0);
+  unsigned PredState = getRegState(PredOp) & ~RegState::Kill;
+  MachineInstrBuilder MIB;
+
+  if (SrcOp.isReg()) {
+    unsigned SrcState = getRegState(SrcOp);
+    if (RegisterRef(SrcOp) == RegisterRef(DstR, DstSR))
+      SrcState &= ~RegState::Kill;
+    MIB = BuildMI(B, At, DL, HII->get(Opc))
+          .addReg(DstR, DstState, DstSR)
+          .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
+          .addReg(SrcOp.getReg(), SrcState, SrcOp.getSubReg());
+  } else {
+    MIB = BuildMI(B, At, DL, HII->get(Opc))
+          .addReg(DstR, DstState, DstSR)
+          .addReg(PredOp.getReg(), PredState, PredOp.getSubReg())
+          .addOperand(SrcOp);
+  }
+
   DEBUG(dbgs() << "created an initial copy: " << *MIB);
   return &*MIB;
 }
 
-
 /// Replace a MUX instruction MI with a pair A2_tfrt/A2_tfrf. This function
 /// performs all necessary changes to complete the replacement.
 bool HexagonExpandCondsets::split(MachineInstr &MI,
@@ -649,44 +617,36 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
                << MI);
   MachineOperand &MD = MI.getOperand(0);  // Definition
   MachineOperand &MP = MI.getOperand(1);  // Predicate register
-  MachineOperand &MS1 = MI.getOperand(2); // Source value #1
-  MachineOperand &MS2 = MI.getOperand(3); // Source value #2
   assert(MD.isDef());
   unsigned DR = MD.getReg(), DSR = MD.getSubReg();
   bool ReadUndef = MD.isUndef();
   MachineBasicBlock::iterator At = MI;
 
-  if (ReadUndef && DSR != 0 && MRI->shouldTrackSubRegLiveness(DR)) {
-    unsigned NewSR = 0;
-    MachineBasicBlock::iterator DefAt = At;
-    bool SameReg = (MS1.isReg() && DR == MS1.getReg()) ||
-                   (MS2.isReg() && DR == MS2.getReg());
-    if (SameReg) {
-      NewSR = (DSR == Hexagon::subreg_loreg) ? Hexagon::subreg_hireg
-                                             : Hexagon::subreg_loreg;
-      // Advance the insertion point if the subregisters differ between
-      // the source and the target (with the same super-register).
-      // Note: this case has never occured during tests.
-      if ((MS1.isReg() && NewSR == MS1.getSubReg()) ||
-          (MS2.isReg() && NewSR == MS2.getSubReg()))
-        ++DefAt;
+  // If this is a mux of the same register, just replace it with COPY.
+  // Ideally, this would happen earlier, so that register coalescing would
+  // see it.
+  MachineOperand &ST = MI.getOperand(2);
+  MachineOperand &SF = MI.getOperand(3);
+  if (ST.isReg() && SF.isReg()) {
+    RegisterRef RT(ST);
+    if (RT == RegisterRef(SF)) {
+      MI.setDesc(HII->get(TargetOpcode::COPY));
+      unsigned S = getRegState(ST);
+      while (MI.getNumOperands() > 1)
+        MI.RemoveOperand(MI.getNumOperands()-1);
+      MachineFunction &MF = *MI.getParent()->getParent();
+      MachineInstrBuilder(MF, MI).addReg(RT.Reg, S, RT.Sub);
+      return true;
     }
-    // Use "At", since "DefAt" may be end().
-    MachineBasicBlock &B = *At->getParent();
-    DebugLoc DL = At->getDebugLoc();
-    auto ImpD = BuildMI(B, DefAt, DL, HII->get(TargetOpcode::IMPLICIT_DEF))
-                  .addReg(DR, RegState::Define, NewSR);
-    LIS->InsertMachineInstrInMaps(*ImpD);
-    LocalImpDefs.insert(&*ImpD);
   }
 
   // First, create the two invididual conditional transfers, and add each
   // of them to the live intervals information. Do that first and then remove
   // the old instruction from live intervals.
   MachineInstr *TfrT =
-      genCondTfrFor(MI.getOperand(2), At, DR, DSR, MP, true, ReadUndef, false);
+      genCondTfrFor(ST, At, DR, DSR, MP, true, ReadUndef, false);
   MachineInstr *TfrF =
-      genCondTfrFor(MI.getOperand(3), At, DR, DSR, MP, false, ReadUndef, true);
+      genCondTfrFor(SF, At, DR, DSR, MP, false, ReadUndef, true);
   LIS->InsertMachineInstrInMaps(*TfrT);
   LIS->InsertMachineInstrInMaps(*TfrF);
 
@@ -699,22 +659,6 @@ bool HexagonExpandCondsets::split(MachineInstr &MI,
   return true;
 }
 
-
-/// Split all MUX instructions in the given block into pairs of conditional
-/// transfers.
-bool HexagonExpandCondsets::splitInBlock(MachineBasicBlock &B,
-      std::set<unsigned> &UpdRegs) {
-  bool Changed = false;
-  MachineBasicBlock::iterator I, E, NextI;
-  for (I = B.begin(), E = B.end(); I != E; I = NextI) {
-    NextI = std::next(I);
-    if (isCondset(*I))
-      Changed |= split(*I, UpdRegs);
-  }
-  return Changed;
-}
-
-
 bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
   if (HII->isPredicated(*MI) || !HII->isPredicable(*MI))
     return false;
@@ -735,7 +679,6 @@ bool HexagonExpandCondsets::isPredicable(MachineInstr *MI) {
   return true;
 }
 
-
 /// Find the reaching definition for a predicated use of RD. The RD is used
 /// under the conditions given by PredR and Cond, and this function will ignore
 /// definitions that set RD under the opposite conditions.
@@ -744,7 +687,7 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
   MachineBasicBlock &B = *UseIt->getParent();
   MachineBasicBlock::iterator I = UseIt, S = B.begin();
   if (I == S)
-    return 0;
+    return nullptr;
 
   bool PredValid = true;
   do {
@@ -775,15 +718,14 @@ MachineInstr *HexagonExpandCondsets::getReachingDefForPred(RegisterRef RD,
       if (RR.Sub == RD.Sub)
         return MI;
       if (RR.Sub == 0 || RD.Sub == 0)
-        return 0;
+        return nullptr;
       // We have different subregisters, so we can continue looking.
     }
   } while (I != S);
 
-  return 0;
+  return nullptr;
 }
 
-
 /// Check if the instruction MI can be safely moved over a set of instructions
 /// whose side-effects (in terms of register defs and uses) are expressed in
 /// the maps Defs and Uses. These maps reflect the conditional defs and uses
@@ -813,7 +755,6 @@ bool HexagonExpandCondsets::canMoveOver(MachineInstr &MI, ReferenceMap &Defs,
   return true;
 }
 
-
 /// Check if the instruction accessing memory (TheI) can be moved to the
 /// location ToI.
 bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI,
@@ -848,7 +789,6 @@ bool HexagonExpandCondsets::canMoveMemTo(MachineInstr &TheI, MachineInstr &ToI,
   return true;
 }
 
-
 /// Generate a predicated version of MI (where the condition is given via
 /// PredR and Cond) at the point indicated by Where.
 void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
@@ -909,7 +849,6 @@ void HexagonExpandCondsets::predicateAt(const MachineOperand &DefOp,
       UpdRegs.insert(Op.getReg());
 }
 
-
 /// In the range [First, Last], rename all references to the "old" register RO
 /// to the "new" register RN, but only in instructions predicated on the given
 /// condition.
@@ -937,7 +876,6 @@ void HexagonExpandCondsets::renameInRange(RegisterRef RO, RegisterRef RN,
   }
 }
 
-
 /// For a given conditional copy, predicate the definition of the source of
 /// the copy under the given condition (using the same predicate register as
 /// the copy).
@@ -982,7 +920,7 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
   // conditions when collecting def and use information.
   bool PredValid = true;
   for (MachineBasicBlock::iterator I = std::next(DefIt); I != TfrIt; ++I) {
-    if (!I->modifiesRegister(PredR, 0))
+    if (!I->modifiesRegister(PredR, nullptr))
       continue;
     PredValid = false;
     break;
@@ -1013,6 +951,13 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
         return false;
 
       ReferenceMap &Map = Op.isDef() ? Defs : Uses;
+      if (Op.isDef() && Op.isUndef()) {
+        assert(RR.Sub && "Expecting a subregister on <def,read-undef>");
+        // If this is a <def,read-undef>, then it invalidates the non-written
+        // part of the register. For the purpose of checking the validity of
+        // the move, assume that it modifies the whole register.
+        RR.Sub = 0;
+      }
       addRefToMap(RR, Map, Exec);
     }
   }
@@ -1067,7 +1012,6 @@ bool HexagonExpandCondsets::predicate(MachineInstr &TfrI, bool Cond,
   return true;
 }
 
-
 /// Predicate all cases of conditional copies in the specified block.
 bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
       std::set<unsigned> &UpdRegs) {
@@ -1094,7 +1038,6 @@ bool HexagonExpandCondsets::predicateInBlock(MachineBasicBlock &B,
   return Changed;
 }
 
-
 bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
   if (!TargetRegisterInfo::isVirtualRegister(RR.Reg))
     return false;
@@ -1110,7 +1053,6 @@ bool HexagonExpandCondsets::isIntReg(RegisterRef RR, unsigned &BW) {
   return false;
 }
 
-
 bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
   for (LiveInterval::iterator I = LI.begin(), E = LI.end(); I != E; ++I) {
     LiveRange::Segment &LR = *I;
@@ -1124,7 +1066,6 @@ bool HexagonExpandCondsets::isIntraBlocks(LiveInterval &LI) {
   return true;
 }
 
-
 bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
   if (CoaLimitActive) {
     if (CoaCounter >= CoaLimit)
@@ -1141,6 +1082,10 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
 
   LiveInterval &L1 = LIS->getInterval(R1.Reg);
   LiveInterval &L2 = LIS->getInterval(R2.Reg);
+  if (L2.empty())
+    return false;
+  if (L1.hasSubRanges() || L2.hasSubRanges())
+    return false;
   bool Overlap = L1.overlaps(L2);
 
   DEBUG(dbgs() << "compatible registers: ("
@@ -1176,6 +1121,7 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
   }
   while (L2.begin() != L2.end())
     L2.removeSegment(*L2.begin());
+  LIS->removeInterval(R2.Reg);
 
   updateKillFlags(R1.Reg);
   DEBUG(dbgs() << "coalesced: " << L1 << "\n");
@@ -1184,28 +1130,22 @@ bool HexagonExpandCondsets::coalesceRegisters(RegisterRef R1, RegisterRef R2) {
   return true;
 }
 
-
-/// Attempt to coalesce one of the source registers to a MUX intruction with
+/// Attempt to coalesce one of the source registers to a MUX instruction with
 /// the destination register. This could lead to having only one predicated
 /// instruction in the end instead of two.
-bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
-  SmallVector<MachineInstr*,16> Condsets;
-  for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
-    MachineBasicBlock &B = *I;
-    for (MachineBasicBlock::iterator J = B.begin(), F = B.end(); J != F; ++J) {
-      MachineInstr *MI = &*J;
-      if (!isCondset(*MI))
-        continue;
-      MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
-      if (!S1.isReg() && !S2.isReg())
-        continue;
-      Condsets.push_back(MI);
-    }
+bool HexagonExpandCondsets::coalesceSegments(
+      const SmallVectorImpl<MachineInstr*> &Condsets,
+      std::set<unsigned> &UpdRegs) {
+  SmallVector<MachineInstr*,16> TwoRegs;
+  for (MachineInstr *MI : Condsets) {
+    MachineOperand &S1 = MI->getOperand(2), &S2 = MI->getOperand(3);
+    if (!S1.isReg() && !S2.isReg())
+      continue;
+    TwoRegs.push_back(MI);
   }
 
   bool Changed = false;
-  for (unsigned i = 0, n = Condsets.size(); i < n; ++i) {
-    MachineInstr *CI = Condsets[i];
+  for (MachineInstr *CI : TwoRegs) {
     RegisterRef RD = CI->getOperand(0);
     RegisterRef RP = CI->getOperand(1);
     MachineOperand &S1 = CI->getOperand(2), &S2 = CI->getOperand(3);
@@ -1231,21 +1171,30 @@ bool HexagonExpandCondsets::coalesceSegments(MachineFunction &MF) {
     if (S1.isReg()) {
       RegisterRef RS = S1;
       MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, true);
-      if (!RDef || !HII->isPredicable(*RDef))
+      if (!RDef || !HII->isPredicable(*RDef)) {
         Done = coalesceRegisters(RD, RegisterRef(S1));
+        if (Done) {
+          UpdRegs.insert(RD.Reg);
+          UpdRegs.insert(S1.getReg());
+        }
+      }
     }
     if (!Done && S2.isReg()) {
       RegisterRef RS = S2;
       MachineInstr *RDef = getReachingDefForPred(RS, CI, RP.Reg, false);
-      if (!RDef || !HII->isPredicable(*RDef))
+      if (!RDef || !HII->isPredicable(*RDef)) {
         Done = coalesceRegisters(RD, RegisterRef(S2));
+        if (Done) {
+          UpdRegs.insert(RD.Reg);
+          UpdRegs.insert(S2.getReg());
+        }
+      }
     }
     Changed |= Done;
   }
   return Changed;
 }
 
-
 bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -1255,25 +1204,54 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
   MDT = &getAnalysis<MachineDominatorTree>();
   LIS = &getAnalysis<LiveIntervals>();
   MRI = &MF.getRegInfo();
-  LocalImpDefs.clear();
 
   DEBUG(LIS->print(dbgs() << "Before expand-condsets\n",
                    MF.getFunction()->getParent()));
 
   bool Changed = false;
-  std::set<unsigned> SplitUpd, PredUpd;
+  std::set<unsigned> CoalUpd, PredUpd;
+
+  SmallVector<MachineInstr*,16> Condsets;
+  for (auto &B : MF)
+    for (auto &I : B)
+      if (isCondset(I))
+        Condsets.push_back(&I);
 
   // Try to coalesce the target of a mux with one of its sources.
   // This could eliminate a register copy in some circumstances.
-  Changed |= coalesceSegments(MF);
+  Changed |= coalesceSegments(Condsets, CoalUpd);
+
+  // Update kill flags on all source operands. This is done here because
+  // at this moment (when expand-condsets runs), there are no kill flags
+  // in the IR (they have been removed by live range analysis).
+  // Updating them right before we split is the easiest, because splitting
+  // adds definitions which would interfere with updating kills afterwards.
+  std::set<unsigned> KillUpd;
+  for (MachineInstr *MI : Condsets)
+    for (MachineOperand &Op : MI->operands())
+      if (Op.isReg() && Op.isUse())
+        if (!CoalUpd.count(Op.getReg()))
+          KillUpd.insert(Op.getReg());
+  updateLiveness(KillUpd, false, true, false);
+  DEBUG(LIS->print(dbgs() << "After coalescing\n",
+                   MF.getFunction()->getParent()));
 
   // First, simply split all muxes into a pair of conditional transfers
   // and update the live intervals to reflect the new arrangement. The
   // goal is to update the kill flags, since predication will rely on
   // them.
-  for (auto &B : MF)
-    Changed |= splitInBlock(B, SplitUpd);
-  updateLiveness(SplitUpd, true, true, false);
+  for (MachineInstr *MI : Condsets)
+    Changed |= split(*MI, PredUpd);
+  Condsets.clear(); // The contents of Condsets are invalid here anyway.
+
+  // Do not update live ranges after splitting. Recalculation of live
+  // intervals removes kill flags, which were preserved by splitting on
+  // the source operands of condsets. These kill flags are needed by
+  // predication, and after splitting they are difficult to recalculate
+  // (because of predicated defs), so make sure they are left untouched.
+  // Predication does not use live intervals.
+  DEBUG(LIS->print(dbgs() << "After splitting\n",
+                   MF.getFunction()->getParent()));
 
   // Traverse all blocks and collapse predicable instructions feeding
   // conditional transfers into predicated instructions.
@@ -1281,18 +1259,11 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
   // cases that were not created in the previous step.
   for (auto &B : MF)
     Changed |= predicateInBlock(B, PredUpd);
+  DEBUG(LIS->print(dbgs() << "After predicating\n",
+                   MF.getFunction()->getParent()));
 
+  PredUpd.insert(CoalUpd.begin(), CoalUpd.end());
   updateLiveness(PredUpd, true, true, true);
-  // Remove from SplitUpd all registers contained in PredUpd to avoid
-  // unnecessary liveness recalculation.
-  std::set<unsigned> Diff;
-  std::set_difference(SplitUpd.begin(), SplitUpd.end(),
-                      PredUpd.begin(), PredUpd.end(),
-                      std::inserter(Diff, Diff.begin()));
-  updateLiveness(Diff, false, false, true);
-
-  for (auto *ImpD : LocalImpDefs)
-    removeInstr(*ImpD);
 
   DEBUG({
     if (Changed)
@@ -1303,7 +1274,6 @@ bool HexagonExpandCondsets::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
index 3de817cc8fb6..dfd1f1d4f886 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFixupHwLoops.cpp
@@ -47,10 +47,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Hexagon Hardware Loop Fixup";
     }
 
@@ -125,7 +125,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
 
     BlockToInstOffset[&MBB] = InstOffset;
     for (const MachineInstr &MI : MBB)
-      InstOffset += HII->getSize(&MI);
+      InstOffset += HII->getSize(MI);
   }
 
   // Second pass - check each loop instruction to see if it needs to be
@@ -138,7 +138,7 @@ bool HexagonFixupHwLoops::fixupLoopInstrs(MachineFunction &MF) {
     MachineBasicBlock::iterator MII = MBB.begin();
     MachineBasicBlock::iterator MIE = MBB.end();
     while (MII != MIE) {
-      InstOffset += HII->getSize(&*MII);
+      InstOffset += HII->getSize(*MII);
       if (MII->isDebugValue()) {
         ++MII;
         continue;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
index 25402147bf53..a3f6273f9f67 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
@@ -17,25 +17,51 @@
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
 #include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/None.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
+#include "llvm/MC/MCDwarf.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
 #include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <limits>
+#include <map>
+#include <new>
+#include <utility>
+#include <vector>
 
 // Hexagon stack frame layout as defined by the ABI:
 //
@@ -99,27 +125,26 @@
 // cated (reserved) register, it needs to be kept live throughout the function
 // to be available as the base register for local object accesses.
 // Normally, an address of a stack objects is obtained by a pseudo-instruction
-// TFR_FI. To access local objects with the AP register present, a different
-// pseudo-instruction needs to be used: TFR_FIA. The TFR_FIA takes one extra
-// argument compared to TFR_FI: the first input register is the AP register.
+// PS_fi. To access local objects with the AP register present, a different
+// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
+// argument compared to PS_fi: the first input register is the AP register.
 // This keeps the register live between its definition and its uses.
 
-// The AP register is originally set up using pseudo-instruction ALIGNA:
-//   AP = ALIGNA A
+// The AP register is originally set up using pseudo-instruction PS_aligna:
+//   AP = PS_aligna A
 // where
 //   A  - required stack alignment
 // The alignment value must be the maximum of all alignments required by
 // any stack object.
 
-// The dynamic allocation uses a pseudo-instruction ALLOCA:
-//   Rd = ALLOCA Rs, A
+// The dynamic allocation uses a pseudo-instruction PS_alloca:
+//   Rd = PS_alloca Rs, A
 // where
 //   Rd - address of the allocated space
 //   Rs - minimum size (the actual allocated can be larger to accommodate
 //        alignment)
 //   A  - required alignment
 
-
 using namespace llvm;
 
 static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
@@ -145,9 +170,13 @@ static cl::opt<bool> EnableShrinkWrapping("hexagon-shrink-frame",
     cl::init(true), cl::Hidden, cl::ZeroOrMore,
     cl::desc("Enable stack frame shrink wrapping"));
 
-static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit", cl::init(UINT_MAX),
-    cl::Hidden, cl::ZeroOrMore, cl::desc("Max count of stack frame "
-    "shrink-wraps"));
+static cl::opt<unsigned> ShrinkLimit("shrink-frame-limit",
+    cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Max count of stack frame shrink-wraps"));
+
+static cl::opt<bool> EnableSaveRestoreLong("enable-save-restore-long",
+    cl::Hidden, cl::desc("Enable long calls for save-restore stubs."),
+    cl::init(false), cl::ZeroOrMore);
 
 static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
     cl::Hidden, cl::desc("Use allocframe more conservatively"));
@@ -155,29 +184,41 @@ static cl::opt<bool> UseAllocframe("use-allocframe", cl::init(true),
 static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
     cl::init(true), cl::desc("Optimize spill slots"));
 
+#ifndef NDEBUG
+static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,
+    cl::init(std::numeric_limits<unsigned>::max()));
+static unsigned SpillOptCount = 0;
+#endif
 
 namespace llvm {
+
   void initializeHexagonCallFrameInformationPass(PassRegistry&);
   FunctionPass *createHexagonCallFrameInformation();
-}
+
+} // end namespace llvm
 
 namespace {
+
   class HexagonCallFrameInformation : public MachineFunctionPass {
   public:
     static char ID;
+
     HexagonCallFrameInformation() : MachineFunctionPass(ID) {
       PassRegistry &PR = *PassRegistry::getPassRegistry();
       initializeHexagonCallFrameInformationPass(PR);
     }
+
     bool runOnMachineFunction(MachineFunction &MF) override;
+
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
   };
 
   char HexagonCallFrameInformation::ID = 0;
-}
+
+} // end anonymous namespace
 
 bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
   auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
@@ -197,12 +238,11 @@ FunctionPass *llvm::createHexagonCallFrameInformation() {
   return new HexagonCallFrameInformation();
 }
 
-
-namespace {
-  /// Map a register pair Reg to the subregister that has the greater "number",
-  /// i.e. D3 (aka R7:6) will be mapped to R7, etc.
-  unsigned getMax32BitSubRegister(unsigned Reg, const TargetRegisterInfo &TRI,
-                                  bool hireg = true) {
+/// Map a register pair Reg to the subregister that has the greater "number",
+/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
+static unsigned getMax32BitSubRegister(unsigned Reg,
+                                       const TargetRegisterInfo &TRI,
+                                       bool hireg = true) {
     if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
       return Reg;
 
@@ -217,11 +257,11 @@ namespace {
       }
     }
     return RegNo;
-  }
+}
 
-  /// Returns the callee saved register with the largest id in the vector.
-  unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
-                                const TargetRegisterInfo &TRI) {
+/// Returns the callee saved register with the largest id in the vector.
+static unsigned getMaxCalleeSavedReg(const std::vector<CalleeSavedInfo> &CSI,
+                                     const TargetRegisterInfo &TRI) {
     static_assert(Hexagon::R1 > 0,
                   "Assume physical registers are encoded as positive integers");
     if (CSI.empty())
@@ -234,20 +274,20 @@ namespace {
         Max = Reg;
     }
     return Max;
-  }
+}
 
-  /// Checks if the basic block contains any instruction that needs a stack
-  /// frame to be already in place.
-  bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
-        const HexagonRegisterInfo &HRI) {
+/// Checks if the basic block contains any instruction that needs a stack
+/// frame to be already in place.
+static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
+                            const HexagonRegisterInfo &HRI) {
     for (auto &I : MBB) {
       const MachineInstr *MI = &I;
       if (MI->isCall())
         return true;
       unsigned Opc = MI->getOpcode();
       switch (Opc) {
-        case Hexagon::ALLOCA:
-        case Hexagon::ALIGNA:
+        case Hexagon::PS_alloca:
+        case Hexagon::PS_aligna:
           return true;
         default:
           break;
@@ -274,60 +314,62 @@ namespace {
       }
     }
     return false;
-  }
+}
 
   /// Returns true if MBB has a machine instructions that indicates a tail call
   /// in the block.
-  bool hasTailCall(const MachineBasicBlock &MBB) {
+static bool hasTailCall(const MachineBasicBlock &MBB) {
     MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
     unsigned RetOpc = I->getOpcode();
-    return RetOpc == Hexagon::TCRETURNi || RetOpc == Hexagon::TCRETURNr;
-  }
+    return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;
+}
 
-  /// Returns true if MBB contains an instruction that returns.
-  bool hasReturn(const MachineBasicBlock &MBB) {
+/// Returns true if MBB contains an instruction that returns.
+static bool hasReturn(const MachineBasicBlock &MBB) {
     for (auto I = MBB.getFirstTerminator(), E = MBB.end(); I != E; ++I)
       if (I->isReturn())
         return true;
     return false;
-  }
+}
 
-  /// Returns the "return" instruction from this block, or nullptr if there
-  /// isn't any.
-  MachineInstr *getReturn(MachineBasicBlock &MBB) {
+/// Returns the "return" instruction from this block, or nullptr if there
+/// isn't any.
+static MachineInstr *getReturn(MachineBasicBlock &MBB) {
     for (auto &I : MBB)
       if (I.isReturn())
         return &I;
     return nullptr;
-  }
+}
 
-  bool isRestoreCall(unsigned Opc) {
+static bool isRestoreCall(unsigned Opc) {
     switch (Opc) {
       case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
       case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
+      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
+      case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
+      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
+      case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
       case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
       case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
         return true;
     }
     return false;
-  }
+}
 
-  inline bool isOptNone(const MachineFunction &MF) {
+static inline bool isOptNone(const MachineFunction &MF) {
     return MF.getFunction()->hasFnAttribute(Attribute::OptimizeNone) ||
            MF.getTarget().getOptLevel() == CodeGenOpt::None;
-  }
+}
 
-  inline bool isOptSize(const MachineFunction &MF) {
+static inline bool isOptSize(const MachineFunction &MF) {
     const Function &F = *MF.getFunction();
     return F.optForSize() && !F.optForMinSize();
-  }
+}
 
-  inline bool isMinSize(const MachineFunction &MF) {
+static inline bool isMinSize(const MachineFunction &MF) {
     return MF.getFunction()->optForMinSize();
-  }
 }
 
-
 /// Implements shrink-wrapping of the stack frame. By default, stack frame
 /// is created in the function entry block, and is cleaned up in every block
 /// that returns. This function finds alternate blocks: one for the frame
@@ -342,7 +384,7 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
     ShrinkCounter++;
   }
 
-  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HRI = *HST.getRegisterInfo();
 
   MachineDominatorTree MDT;
@@ -432,7 +474,6 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
   EpilogB = PDomB;
 }
 
-
 /// Perform most of the PEI work here:
 /// - saving/restoring of the callee-saved registers,
 /// - stack frame creation and destruction.
@@ -440,11 +481,11 @@ void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
 /// in one place allows shrink-wrapping of the stack frame.
 void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
-  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HRI = *HST.getRegisterInfo();
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
   if (EnableShrinkWrapping)
@@ -453,6 +494,7 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
   bool PrologueStubs = false;
   insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
   insertPrologueInBlock(*PrologB, PrologueStubs);
+  updateEntryPaths(MF, *PrologB);
 
   if (EpilogB) {
     insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
@@ -481,50 +523,49 @@ void HexagonFrameLowering::emitPrologue(MachineFunction &MF,
     // If there is an epilog block, it may not have a return instruction.
     // In such case, we need to add the callee-saved registers as live-ins
     // in all blocks on all paths from the epilog to any return block.
-    unsigned MaxBN = 0;
-    for (auto &B : MF)
-      if (B.getNumber() >= 0)
-        MaxBN = std::max(MaxBN, unsigned(B.getNumber()));
+    unsigned MaxBN = MF.getNumBlockIDs();
     BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
-    updateExitPaths(*EpilogB, EpilogB, DoneT, DoneF, Path);
+    updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
   }
 }
 
-
 void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
       bool PrologueStubs) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HII = *HST.getInstrInfo();
   auto &HRI = *HST.getRegisterInfo();
   DebugLoc dl;
 
-  unsigned MaxAlign = std::max(MFI->getMaxAlignment(), getStackAlignment());
+  unsigned MaxAlign = std::max(MFI.getMaxAlignment(), getStackAlignment());
 
   // Calculate the total stack frame size.
   // Get the number of bytes to allocate from the FrameInfo.
-  unsigned FrameSize = MFI->getStackSize();
+  unsigned FrameSize = MFI.getStackSize();
   // Round up the max call frame size to the max alignment on the stack.
-  unsigned MaxCFA = alignTo(MFI->getMaxCallFrameSize(), MaxAlign);
-  MFI->setMaxCallFrameSize(MaxCFA);
+  unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
+  MFI.setMaxCallFrameSize(MaxCFA);
 
   FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
-  MFI->setStackSize(FrameSize);
+  MFI.setStackSize(FrameSize);
 
   bool AlignStack = (MaxAlign > getStackAlignment());
 
   // Get the number of bytes to allocate from the FrameInfo.
-  unsigned NumBytes = MFI->getStackSize();
+  unsigned NumBytes = MFI.getStackSize();
   unsigned SP = HRI.getStackRegister();
-  unsigned MaxCF = MFI->getMaxCallFrameSize();
+  unsigned MaxCF = MFI.getMaxCallFrameSize();
   MachineBasicBlock::iterator InsertPt = MBB.begin();
 
-  auto *FuncInfo = MF.getInfo<HexagonMachineFunctionInfo>();
-  auto &AdjustRegs = FuncInfo->getAllocaAdjustInsts();
+  SmallVector<MachineInstr *, 4> AdjustRegs;
+  for (auto &MBB : MF)
+    for (auto &MI : MBB)
+      if (MI.getOpcode() == Hexagon::PS_alloca)
+        AdjustRegs.push_back(&MI);
 
   for (auto MI : AdjustRegs) {
-    assert((MI->getOpcode() == Hexagon::ALLOCA) && "Expected alloca");
+    assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
     expandAlloca(MI, HII, SP, MaxCF);
     MI->eraseFromParent();
   }
@@ -551,7 +592,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
     // Subtract offset from frame pointer.
     // We use a caller-saved non-parameter register for that.
     unsigned CallerSavedReg = HRI.getFirstCallerSavedNonParamReg();
-    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32_Int_Real),
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CONST32),
             CallerSavedReg).addImm(NumBytes);
     BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_sub), SP)
       .addReg(SP)
@@ -572,7 +613,7 @@ void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
   // registers inline (i.e. did not use a spill function), then call
   // the stack checker directly.
   if (EnableStackOVFSanitizer && !PrologueStubs)
-    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::CALLstk))
+    BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
            .addExternalSymbol("__runtime_stack_check");
 }
 
@@ -581,7 +622,7 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
   if (!hasFP(MF))
     return;
 
-  auto &HST = static_cast<const HexagonSubtarget&>(MF.getSubtarget());
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HII = *HST.getInstrInfo();
   auto &HRI = *HST.getRegisterInfo();
   unsigned SP = HRI.getStackRegister();
@@ -608,7 +649,9 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
   // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
   // frame instruction if we encounter it.
   if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
-      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC) {
+      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||
+      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||
+      RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) {
     MachineBasicBlock::iterator It = RetI;
     ++It;
     // Delete all instructions after the RESTORE (except labels).
@@ -629,16 +672,19 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
     MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
     unsigned COpc = PrevIt->getOpcode();
     if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
-        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC)
+        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||
+        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
+        COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
+        COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr)
       NeedsDeallocframe = false;
   }
 
   if (!NeedsDeallocframe)
     return;
-  // If the returning instruction is JMPret, replace it with dealloc_return,
+  // If the returning instruction is PS_jmpret, replace it with dealloc_return,
   // otherwise just add deallocframe. The function could be returning via a
   // tail call.
-  if (RetOpc != Hexagon::JMPret || DisableDeallocRet) {
+  if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {
     BuildMI(MBB, InsertPt, DL, HII.get(Hexagon::L2_deallocframe));
     return;
   }
@@ -649,9 +695,30 @@ void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
   MBB.erase(RetI);
 }
 
+void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
+      MachineBasicBlock &SaveB) const {
+  SetVector<unsigned> Worklist;
+
+  MachineBasicBlock &EntryB = MF.front();
+  Worklist.insert(EntryB.getNumber());
+
+  unsigned SaveN = SaveB.getNumber();
+  auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
+
+  for (unsigned i = 0; i < Worklist.size(); ++i) {
+    unsigned BN = Worklist[i];
+    MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);
+    for (auto &R : CSI)
+      if (!MBB.isLiveIn(R.getReg()))
+        MBB.addLiveIn(R.getReg());
+    if (BN != SaveN)
+      for (auto &SB : MBB.successors())
+        Worklist.insert(SB->getNumber());
+  }
+}
 
 bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
-      MachineBasicBlock *RestoreB, BitVector &DoneT, BitVector &DoneF,
+      MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
       BitVector &Path) const {
   assert(MBB.getNumber() >= 0);
   unsigned BN = MBB.getNumber();
@@ -660,7 +727,7 @@ bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
   if (DoneT[BN])
     return true;
 
-  auto &CSI = MBB.getParent()->getFrameInfo()->getCalleeSavedInfo();
+  auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
 
   Path[BN] = true;
   bool ReachedExit = false;
@@ -681,7 +748,7 @@ bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
   // We don't want to add unnecessary live-ins to the restore block: since
   // the callee-saved registers are being defined in it, the entry of the
   // restore block cannot be on the path from the definitions to any exit.
-  if (ReachedExit && &MBB != RestoreB) {
+  if (ReachedExit && &MBB != &RestoreB) {
     for (auto &R : CSI)
       if (!MBB.isLiveIn(R.getReg()))
         MBB.addLiveIn(R.getReg());
@@ -694,42 +761,49 @@ bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
   return ReachedExit;
 }
 
-
-namespace {
-  bool IsAllocFrame(MachineBasicBlock::const_iterator It) {
-    if (!It->isBundle())
-      return It->getOpcode() == Hexagon::S2_allocframe;
-    auto End = It->getParent()->instr_end();
-    MachineBasicBlock::const_instr_iterator I = It.getInstrIterator();
-    while (++I != End && I->isBundled())
-      if (I->getOpcode() == Hexagon::S2_allocframe)
-        return true;
-    return false;
-  }
-
-  MachineBasicBlock::iterator FindAllocFrame(MachineBasicBlock &B) {
-    for (auto &I : B)
-      if (IsAllocFrame(I))
-        return I;
-    return B.end();
-  }
+static Optional<MachineBasicBlock::iterator>
+findCFILocation(MachineBasicBlock &B) {
+    // The CFI instructions need to be inserted right after allocframe.
+    // An exception to this is a situation where allocframe is bundled
+    // with a call: then the CFI instructions need to be inserted before
+    // the packet with the allocframe+call (in case the call throws an
+    // exception).
+    auto End = B.instr_end();
+
+    for (MachineInstr &I : B) {
+      MachineBasicBlock::iterator It = I.getIterator();
+      if (!I.isBundle()) {
+        if (I.getOpcode() == Hexagon::S2_allocframe)
+          return std::next(It);
+        continue;
+      }
+      // I is a bundle.
+      bool HasCall = false, HasAllocFrame = false;
+      auto T = It.getInstrIterator();
+      while (++T != End && T->isBundled()) {
+        if (T->getOpcode() == Hexagon::S2_allocframe)
+          HasAllocFrame = true;
+        else if (T->isCall())
+          HasCall = true;
+      }
+      if (HasAllocFrame)
+        return HasCall ? It : std::next(It);
+    }
+    return None;
 }
 
-
 void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {
   for (auto &B : MF) {
-    auto AF = FindAllocFrame(B);
-    if (AF == B.end())
-      continue;
-    insertCFIInstructionsAt(B, ++AF);
+    auto At = findCFILocation(B);
+    if (At.hasValue())
+      insertCFIInstructionsAt(B, At.getValue());
   }
 }
 
-
 void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator At) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
   auto &HII = *HST.getInstrInfo();
@@ -761,15 +835,15 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
     // MCCFIInstruction::createOffset takes the offset without sign change.
     auto DefCfa = MCCFIInstruction::createDefCfa(FrameLabel, DwFPReg, -8);
     BuildMI(MBB, At, DL, CFID)
-        .addCFIIndex(MMI.addFrameInst(DefCfa));
+        .addCFIIndex(MF.addFrameInst(DefCfa));
     // R31 (return addr) = CFA - 4
     auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
     BuildMI(MBB, At, DL, CFID)
-        .addCFIIndex(MMI.addFrameInst(OffR31));
+        .addCFIIndex(MF.addFrameInst(OffR31));
     // R30 (frame ptr) = CFA - 8
     auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
     BuildMI(MBB, At, DL, CFID)
-        .addCFIIndex(MMI.addFrameInst(OffR30));
+        .addCFIIndex(MF.addFrameInst(OffR30));
   }
 
   static unsigned int RegsToMove[] = {
@@ -789,7 +863,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
     auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
       return C.getReg() == Reg;
     };
-    auto F = std::find_if(CSI.begin(), CSI.end(), IfR);
+    auto F = find_if(CSI, IfR);
     if (F == CSI.end())
       continue;
 
@@ -815,7 +889,7 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
       auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
                                                    Offset);
       BuildMI(MBB, At, DL, CFID)
-          .addCFIIndex(MMI.addFrameInst(OffReg));
+          .addCFIIndex(MF.addFrameInst(OffReg));
     } else {
       // Split the double regs into subregs, and generate appropriate
       // cfi_offsets.
@@ -823,25 +897,24 @@ void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
       // understand paired registers for cfi_offset.
       // Eg .cfi_offset r1:0, -64
 
-      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::subreg_hireg);
-      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::subreg_loreg);
+      unsigned HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
+      unsigned LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
       unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
       unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
       auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
                                                   Offset+4);
       BuildMI(MBB, At, DL, CFID)
-          .addCFIIndex(MMI.addFrameInst(OffHi));
+          .addCFIIndex(MF.addFrameInst(OffHi));
       auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
                                                   Offset);
       BuildMI(MBB, At, DL, CFID)
-          .addCFIIndex(MMI.addFrameInst(OffLo));
+          .addCFIIndex(MF.addFrameInst(OffLo));
     }
   }
 }
 
-
 bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
-  auto &MFI = *MF.getFrameInfo();
+  auto &MFI = MF.getFrameInfo();
   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
 
   bool HasFixed = MFI.getNumFixedObjects();
@@ -877,7 +950,6 @@ bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {
   return false;
 }
 
-
 enum SpillKind {
   SK_ToMem,
   SK_FromMem,
@@ -952,13 +1024,12 @@ static const char *getSpillFunctionFor(unsigned MaxReg, SpillKind SpillType,
   default:
     llvm_unreachable("Unhandled maximum callee save register");
   }
-  return 0;
+  return nullptr;
 }
 
-
 int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
       int FI, unsigned &FrameReg) const {
-  auto &MFI = *MF.getFrameInfo();
+  auto &MFI = MF.getFrameInfo();
   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
 
   int Offset = MFI.getObjectOffset(FI);
@@ -1039,7 +1110,6 @@ int HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF,
   return RealOffset;
 }
 
-
 bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
       const CSIVect &CSI, const HexagonRegisterInfo &HRI,
       bool &PrologueStubs) const {
@@ -1049,7 +1119,8 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
   MachineBasicBlock::iterator MI = MBB.begin();
   PrologueStubs = false;
   MachineFunction &MF = *MBB.getParent();
-  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
 
   if (useSpillFunction(MF, CSI)) {
     PrologueStubs = true;
@@ -1059,20 +1130,31 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
                                                StkOvrFlowEnabled);
     auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
     bool IsPIC = HTM.isPositionIndependent();
+    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
 
     // Call spill function.
     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
     unsigned SpillOpc;
-    if (StkOvrFlowEnabled)
-      SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
-                       : Hexagon::SAVE_REGISTERS_CALL_V4STK;
-    else
-      SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
-                       : Hexagon::SAVE_REGISTERS_CALL_V4;
+    if (StkOvrFlowEnabled) {
+      if (LongCalls)
+        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC
+                         : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
+      else
+        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
+                         : Hexagon::SAVE_REGISTERS_CALL_V4STK;
+    } else {
+      if (LongCalls)
+        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC
+                         : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
+      else
+        SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
+                         : Hexagon::SAVE_REGISTERS_CALL_V4;
+    }
 
     MachineInstr *SaveRegsCall =
         BuildMI(MBB, MI, DL, HII.get(SpillOpc))
           .addExternalSymbol(SpillFun);
+
     // Add callee-saved registers as use.
     addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
     // Add live in registers.
@@ -1096,7 +1178,6 @@ bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
   return true;
 }
 
-
 bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
       const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
   if (CSI.empty())
@@ -1104,7 +1185,8 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
 
   MachineBasicBlock::iterator MI = MBB.getFirstTerminator();
   MachineFunction &MF = *MBB.getParent();
-  auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  auto &HII = *HST.getInstrInfo();
 
   if (useRestoreFunction(MF, CSI)) {
     bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
@@ -1113,6 +1195,7 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
     const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
     auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
     bool IsPIC = HTM.isPositionIndependent();
+    bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
 
     // Call spill function.
     DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
@@ -1120,17 +1203,27 @@ bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
     MachineInstr *DeallocCall = nullptr;
 
     if (HasTC) {
-      unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
-                            : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
-      DeallocCall = BuildMI(MBB, MI, DL, HII.get(ROpc))
+      unsigned RetOpc;
+      if (LongCalls)
+        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC
+                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
+      else
+        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
+                       : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
+      DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
           .addExternalSymbol(RestoreFn);
     } else {
       // The block has a return.
       MachineBasicBlock::iterator It = MBB.getFirstTerminator();
       assert(It->isReturn() && std::next(It) == MBB.end());
-      unsigned ROpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
-                            : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
-      DeallocCall = BuildMI(MBB, It, DL, HII.get(ROpc))
+      unsigned RetOpc;
+      if (LongCalls)
+        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC
+                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
+      else
+        RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
+                       : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
+      DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
           .addExternalSymbol(RestoreFn);
       // Transfer the function live-out registers.
       DeallocCall->copyImplicitOps(MF, *It);
@@ -1160,39 +1253,38 @@ MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(
   return MBB.erase(I);
 }
 
-
 void HexagonFrameLowering::processFunctionBeforeFrameFinalized(
     MachineFunction &MF, RegScavenger *RS) const {
   // If this function has uses aligned stack and also has variable sized stack
   // objects, then we need to map all spill slots to fixed positions, so that
   // they can be accessed through FP. Otherwise they would have to be accessed
   // via AP, which may not be available at the particular place in the program.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  bool HasAlloca = MFI->hasVarSizedObjects();
-  bool NeedsAlign = (MFI->getMaxAlignment() > getStackAlignment());
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  bool HasAlloca = MFI.hasVarSizedObjects();
+  bool NeedsAlign = (MFI.getMaxAlignment() > getStackAlignment());
 
   if (!HasAlloca || !NeedsAlign)
     return;
 
-  unsigned LFS = MFI->getLocalFrameSize();
-  for (int i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (!MFI->isSpillSlotObjectIndex(i) || MFI->isDeadObjectIndex(i))
+  unsigned LFS = MFI.getLocalFrameSize();
+  for (int i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+    if (!MFI.isSpillSlotObjectIndex(i) || MFI.isDeadObjectIndex(i))
       continue;
-    unsigned S = MFI->getObjectSize(i);
+    unsigned S = MFI.getObjectSize(i);
     // Reduce the alignment to at most 8. This will require unaligned vector
     // stores if they happen here.
-    unsigned A = std::max(MFI->getObjectAlignment(i), 8U);
-    MFI->setObjectAlignment(i, 8);
+    unsigned A = std::max(MFI.getObjectAlignment(i), 8U);
+    MFI.setObjectAlignment(i, 8);
     LFS = alignTo(LFS+S, A);
-    MFI->mapLocalFrameObject(i, -LFS);
+    MFI.mapLocalFrameObject(i, -LFS);
   }
 
-  MFI->setLocalFrameSize(LFS);
-  unsigned A = MFI->getLocalFrameMaxAlign();
+  MFI.setLocalFrameSize(LFS);
+  unsigned A = MFI.getLocalFrameMaxAlign();
   assert(A <= 8 && "Unexpected local frame alignment");
   if (A == 0)
-    MFI->setLocalFrameMaxAlign(8);
-  MFI->setUseLocalStackAllocationBlock(true);
+    MFI.setLocalFrameMaxAlign(8);
+  MFI.setUseLocalStackAllocationBlock(true);
 
   // Set the physical aligned-stack base address register.
   unsigned AP = 0;
@@ -1224,7 +1316,6 @@ static bool needToReserveScavengingSpillSlots(MachineFunction &MF,
   return true;
 }
 
-
 #ifndef NDEBUG
 static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
   dbgs() << '{';
@@ -1236,12 +1327,11 @@ static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {
 }
 #endif
 
-
 bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
       const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << " on "
+  DEBUG(dbgs() << __func__ << " on "
                << MF.getFunction()->getName() << '\n');
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   BitVector SRegs(Hexagon::NUM_TARGET_REGS);
 
   // Generate a set of unique, callee-saved registers (SRegs), where each
@@ -1321,7 +1411,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
     if (!SRegs[S->Reg])
       continue;
     const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
-    int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), S->Offset);
+    int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), S->Offset);
     MinOffset = std::min(MinOffset, S->Offset);
     CSI.push_back(CalleeSavedInfo(S->Reg, FI));
     SRegs[S->Reg] = false;
@@ -1337,7 +1427,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
     unsigned Align = std::min(RC->getAlignment(), getStackAlignment());
     assert(isPowerOf2_32(Align));
     Off &= -Align;
-    int FI = MFI->CreateFixedSpillStackObject(RC->getSize(), Off);
+    int FI = MFI.CreateFixedSpillStackObject(RC->getSize(), Off);
     MinOffset = std::min(MinOffset, Off);
     CSI.push_back(CalleeSavedInfo(R, FI));
     SRegs[R] = false;
@@ -1347,7 +1437,7 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
     dbgs() << "CS information: {";
     for (unsigned i = 0, n = CSI.size(); i < n; ++i) {
       int FI = CSI[i].getFrameIdx();
-      int Off = MFI->getObjectOffset(FI);
+      int Off = MFI.getObjectOffset(FI);
       dbgs() << ' ' << PrintReg(CSI[i].getReg(), TRI) << ":fi#" << FI << ":sp";
       if (Off >= 0)
         dbgs() << '+';
@@ -1371,7 +1461,6 @@ bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,
   return true;
 }
 
-
 bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
@@ -1398,12 +1487,13 @@ bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   MachineInstr *MI = &*It;
+  if (!MI->getOperand(0).isFI())
+    return false;
+
   DebugLoc DL = MI->getDebugLoc();
   unsigned Opc = MI->getOpcode();
   unsigned SrcR = MI->getOperand(2).getReg();
   bool IsKill = MI->getOperand(2).isKill();
-
-  assert(MI->getOperand(0).isFI() && "Expect a frame index");
   int FI = MI->getOperand(0).getIndex();
 
   // TmpR = C2_tfrpr SrcR   if SrcR is a predicate register
@@ -1430,11 +1520,12 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   MachineInstr *MI = &*It;
+  if (!MI->getOperand(1).isFI())
+    return false;
+
   DebugLoc DL = MI->getDebugLoc();
   unsigned Opc = MI->getOpcode();
   unsigned DstR = MI->getOperand(0).getReg();
-
-  assert(MI->getOperand(1).isFI() && "Expect a frame index");
   int FI = MI->getOperand(1).getIndex();
 
   // TmpR = L2_loadri_io FI, 0
@@ -1456,17 +1547,17 @@ bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
   return true;
 }
 
-
 bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
       MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
   MachineInstr *MI = &*It;
+  if (!MI->getOperand(0).isFI())
+    return false;
+
   DebugLoc DL = MI->getDebugLoc();
   unsigned SrcR = MI->getOperand(2).getReg();
   bool IsKill = MI->getOperand(2).isKill();
-
-  assert(MI->getOperand(0).isFI() && "Expect a frame index");
   int FI = MI->getOperand(0).getIndex();
 
   bool Is128B = HST.useHVXDblOps();
@@ -1503,10 +1594,11 @@ bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   auto &HST = B.getParent()->getSubtarget<HexagonSubtarget>();
   MachineInstr *MI = &*It;
+  if (!MI->getOperand(1).isFI())
+    return false;
+
   DebugLoc DL = MI->getDebugLoc();
   unsigned DstR = MI->getOperand(0).getReg();
-
-  assert(MI->getOperand(1).isFI() && "Expect a frame index");
   int FI = MI->getOperand(1).getIndex();
 
   bool Is128B = HST.useHVXDblOps();
@@ -1541,17 +1633,27 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   MachineFunction &MF = *B.getParent();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
-  auto &MFI = *MF.getFrameInfo();
+  auto &MFI = MF.getFrameInfo();
   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
   MachineInstr *MI = &*It;
-  DebugLoc DL = MI->getDebugLoc();
+  if (!MI->getOperand(0).isFI())
+    return false;
+
+  // It is possible that the double vector being stored is only partially
+  // defined. From the point of view of the liveness tracking, it is ok to
+  // store it as a whole, but if we break it up we may end up storing a
+  // register that is entirely undefined.
+  LivePhysRegs LPR(&HRI);
+  LPR.addLiveIns(B);
+  SmallVector<std::pair<unsigned, const MachineOperand*>,2> Clobbers;
+  for (auto R = B.begin(); R != It; ++R)
+    LPR.stepForward(*R, Clobbers);
 
+  DebugLoc DL = MI->getDebugLoc();
   unsigned SrcR = MI->getOperand(2).getReg();
-  unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::subreg_loreg);
-  unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::subreg_hireg);
+  unsigned SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
+  unsigned SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
   bool IsKill = MI->getOperand(2).isKill();
-
-  assert(MI->getOperand(0).isFI() && "Expect a frame index");
   int FI = MI->getOperand(0).getIndex();
 
   bool Is128B = HST.useHVXDblOps();
@@ -1563,28 +1665,32 @@ bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
   unsigned StoreOpc;
 
   // Store low part.
-  if (NeedAlign <= HasAlign)
-    StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai  : Hexagon::V6_vS32b_ai_128B;
-  else
-    StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
+  if (LPR.contains(SrcLo)) {
+    if (NeedAlign <= HasAlign)
+      StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai  : Hexagon::V6_vS32b_ai_128B;
+    else
+      StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
 
-  BuildMI(B, It, DL, HII.get(StoreOpc))
-    .addFrameIndex(FI)
-    .addImm(0)
-    .addReg(SrcLo, getKillRegState(IsKill))
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+    BuildMI(B, It, DL, HII.get(StoreOpc))
+      .addFrameIndex(FI)
+      .addImm(0)
+      .addReg(SrcLo, getKillRegState(IsKill))
+      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  }
 
-  // Load high part.
-  if (NeedAlign <= MinAlign(HasAlign, Size))
-    StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai  : Hexagon::V6_vS32b_ai_128B;
-  else
-    StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
+  // Store high part.
+  if (LPR.contains(SrcHi)) {
+    if (NeedAlign <= MinAlign(HasAlign, Size))
+      StoreOpc = !Is128B ? Hexagon::V6_vS32b_ai  : Hexagon::V6_vS32b_ai_128B;
+    else
+      StoreOpc = !Is128B ? Hexagon::V6_vS32Ub_ai : Hexagon::V6_vS32Ub_ai_128B;
 
-  BuildMI(B, It, DL, HII.get(StoreOpc))
-    .addFrameIndex(FI)
-    .addImm(Size)
-    .addReg(SrcHi, getKillRegState(IsKill))
-    .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+    BuildMI(B, It, DL, HII.get(StoreOpc))
+      .addFrameIndex(FI)
+      .addImm(Size)
+      .addReg(SrcHi, getKillRegState(IsKill))
+      .setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
+  }
 
   B.erase(It);
   return true;
@@ -1595,16 +1701,16 @@ bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   MachineFunction &MF = *B.getParent();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
-  auto &MFI = *MF.getFrameInfo();
+  auto &MFI = MF.getFrameInfo();
   auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
   MachineInstr *MI = &*It;
-  DebugLoc DL = MI->getDebugLoc();
+  if (!MI->getOperand(1).isFI())
+    return false;
 
+  DebugLoc DL = MI->getDebugLoc();
   unsigned DstR = MI->getOperand(0).getReg();
-  unsigned DstHi = HRI.getSubReg(DstR, Hexagon::subreg_hireg);
-  unsigned DstLo = HRI.getSubReg(DstR, Hexagon::subreg_loreg);
-
-  assert(MI->getOperand(1).isFI() && "Expect a frame index");
+  unsigned DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
+  unsigned DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
   int FI = MI->getOperand(1).getIndex();
 
   bool Is128B = HST.useHVXDblOps();
@@ -1646,14 +1752,14 @@ bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   MachineFunction &MF = *B.getParent();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
-  auto &MFI = *MF.getFrameInfo();
+  auto &MFI = MF.getFrameInfo();
   MachineInstr *MI = &*It;
-  DebugLoc DL = MI->getDebugLoc();
+  if (!MI->getOperand(0).isFI())
+    return false;
 
+  DebugLoc DL = MI->getDebugLoc();
   unsigned SrcR = MI->getOperand(2).getReg();
   bool IsKill = MI->getOperand(2).isKill();
-
-  assert(MI->getOperand(0).isFI() && "Expect a frame index");
   int FI = MI->getOperand(0).getIndex();
 
   bool Is128B = HST.useHVXDblOps();
@@ -1684,13 +1790,13 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
       const HexagonInstrInfo &HII, SmallVectorImpl<unsigned> &NewRegs) const {
   MachineFunction &MF = *B.getParent();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
-  auto &MFI = *MF.getFrameInfo();
+  auto &MFI = MF.getFrameInfo();
   MachineInstr *MI = &*It;
-  DebugLoc DL = MI->getDebugLoc();
+  if (!MI->getOperand(1).isFI())
+    return false;
 
+  DebugLoc DL = MI->getDebugLoc();
   unsigned DstR = MI->getOperand(0).getReg();
-
-  assert(MI->getOperand(1).isFI() && "Expect a frame index");
   int FI = MI->getOperand(1).getIndex();
 
   bool Is128B = HST.useHVXDblOps();
@@ -1715,7 +1821,6 @@ bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
   return true;
 }
 
-
 bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
       SmallVectorImpl<unsigned> &NewRegs) const {
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
@@ -1743,30 +1848,26 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
         case Hexagon::LDriw_mod:
           Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
           break;
-        case Hexagon::STriq_pred_V6:
-        case Hexagon::STriq_pred_V6_128B:
+        case Hexagon::PS_vstorerq_ai:
+        case Hexagon::PS_vstorerq_ai_128B:
           Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
           break;
-        case Hexagon::LDriq_pred_V6:
-        case Hexagon::LDriq_pred_V6_128B:
+        case Hexagon::PS_vloadrq_ai:
+        case Hexagon::PS_vloadrq_ai_128B:
           Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
           break;
-        case Hexagon::LDrivv_pseudo_V6:
-        case Hexagon::LDrivv_pseudo_V6_128B:
+        case Hexagon::PS_vloadrw_ai:
+        case Hexagon::PS_vloadrwu_ai:
+        case Hexagon::PS_vloadrw_ai_128B:
+        case Hexagon::PS_vloadrwu_ai_128B:
           Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
           break;
-        case Hexagon::STrivv_pseudo_V6:
-        case Hexagon::STrivv_pseudo_V6_128B:
+        case Hexagon::PS_vstorerw_ai:
+        case Hexagon::PS_vstorerwu_ai:
+        case Hexagon::PS_vstorerw_ai_128B:
+        case Hexagon::PS_vstorerwu_ai_128B:
           Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
           break;
-        case Hexagon::STriv_pseudo_V6:
-        case Hexagon::STriv_pseudo_V6_128B:
-          Changed |= expandStoreVec(B, I, MRI, HII, NewRegs);
-          break;
-        case Hexagon::LDriv_pseudo_V6:
-        case Hexagon::LDriv_pseudo_V6_128B:
-          Changed |= expandLoadVec(B, I, MRI, HII, NewRegs);
-          break;
       }
     }
   }
@@ -1774,7 +1875,6 @@ bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
   return Changed;
 }
 
-
 void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                 BitVector &SavedRegs,
                                                 RegScavenger *RS) const {
@@ -1797,8 +1897,8 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
 
   // We need to reserve a a spill slot if scavenging could potentially require
   // spilling a scavenged register.
-  if (!NewRegs.empty()) {
-    MachineFrameInfo &MFI = *MF.getFrameInfo();
+  if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {
+    MachineFrameInfo &MFI = MF.getFrameInfo();
     MachineRegisterInfo &MRI = MF.getRegInfo();
     SetVector<const TargetRegisterClass*> SpillRCs;
     // Reserve an int register in any case, because it could be used to hold
@@ -1823,7 +1923,6 @@ void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 }
 
-
 unsigned HexagonFrameLowering::findPhysReg(MachineFunction &MF,
       HexagonBlockRanges::IndexRange &FIR,
       HexagonBlockRanges::InstrIndexMap &IndexMap,
@@ -1872,29 +1971,16 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
 
   struct SlotInfo {
     BlockRangeMap Map;
-    unsigned Size;
-    const TargetRegisterClass *RC;
+    unsigned Size = 0;
+    const TargetRegisterClass *RC = nullptr;
 
-    SlotInfo() : Map(), Size(0), RC(nullptr) {}
+    SlotInfo() = default;
   };
 
   BlockIndexMap BlockIndexes;
   SmallSet<int,4> BadFIs;
   std::map<int,SlotInfo> FIRangeMap;
 
-  auto getRegClass = [&MRI,&HRI] (HexagonBlockRanges::RegisterRef R)
-        -> const TargetRegisterClass* {
-    if (TargetRegisterInfo::isPhysicalRegister(R.Reg))
-      assert(R.Sub == 0);
-    if (TargetRegisterInfo::isVirtualRegister(R.Reg)) {
-      auto *RCR = MRI.getRegClass(R.Reg);
-      if (R.Sub == 0)
-        return RCR;
-      unsigned PR = *RCR->begin();
-      R.Reg = HRI.getSubReg(PR, R.Sub);
-    }
-    return HRI.getMinimalPhysRegClass(R.Reg);
-  };
   // Accumulate register classes: get a common class for a pre-existing
   // class HaveRC and a new class NewRC. Return nullptr if a common class
   // cannot be found, otherwise return the resulting class. If HaveRC is
@@ -1942,19 +2028,13 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
       // this restriction.
       if (Load || Store) {
         int TFI = Load ? LFI : SFI;
-        unsigned AM = HII.getAddrMode(&In);
+        unsigned AM = HII.getAddrMode(In);
         SlotInfo &SI = FIRangeMap[TFI];
         bool Bad = (AM != HexagonII::BaseImmOffset);
         if (!Bad) {
           // If the addressing mode is ok, check the register class.
-          const TargetRegisterClass *RC = nullptr;
-          if (Load) {
-            MachineOperand &DataOp = In.getOperand(0);
-            RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
-          } else {
-            MachineOperand &DataOp = In.getOperand(2);
-            RC = getRegClass({DataOp.getReg(), DataOp.getSubReg()});
-          }
+          unsigned OpNum = Load ? 0 : 2;
+          auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);
           RC = getCommonRC(SI.RC, RC);
           if (RC == nullptr)
             Bad = true;
@@ -1963,12 +2043,20 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
         }
         if (!Bad) {
           // Check sizes.
-          unsigned S = (1U << (HII.getMemAccessSize(&In) - 1));
+          unsigned S = (1U << (HII.getMemAccessSize(In) - 1));
           if (SI.Size != 0 && SI.Size != S)
             Bad = true;
           else
             SI.Size = S;
         }
+        if (!Bad) {
+          for (auto *Mo : In.memoperands()) {
+            if (!Mo->isVolatile())
+              continue;
+            Bad = true;
+            break;
+          }
+        }
         if (Bad)
           BadFIs.insert(TFI);
       }
@@ -2081,6 +2169,10 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
     }
   });
 
+#ifndef NDEBUG
+  bool HasOptLimit = SpillOptMax.getPosition();
+#endif
+
   // eliminate loads, when all loads eliminated, eliminate all stores.
   for (auto &B : MF) {
     auto F = BlockIndexes.find(&B);
@@ -2101,26 +2193,33 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
         if (!IndexType::isInstr(Range.start()) ||
             !IndexType::isInstr(Range.end()))
           continue;
-        MachineInstr *SI = IM.getInstr(Range.start());
-        MachineInstr *EI = IM.getInstr(Range.end());
-        assert(SI->mayStore() && "Unexpected start instruction");
-        assert(EI->mayLoad() && "Unexpected end instruction");
-        MachineOperand &SrcOp = SI->getOperand(2);
+        MachineInstr &SI = *IM.getInstr(Range.start());
+        MachineInstr &EI = *IM.getInstr(Range.end());
+        assert(SI.mayStore() && "Unexpected start instruction");
+        assert(EI.mayLoad() && "Unexpected end instruction");
+        MachineOperand &SrcOp = SI.getOperand(2);
 
         HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
                                                   SrcOp.getSubReg() };
-        auto *RC = getRegClass({SrcOp.getReg(), SrcOp.getSubReg()});
+        auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);
         // The this-> is needed to unconfuse MSVC.
         unsigned FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
         DEBUG(dbgs() << "Replacement reg:" << PrintReg(FoundR, &HRI) << '\n');
         if (FoundR == 0)
           continue;
+#ifndef NDEBUG
+        if (HasOptLimit) {
+          if (SpillOptCount >= SpillOptMax)
+            return;
+          SpillOptCount++;
+        }
+#endif
 
         // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
-        MachineBasicBlock::iterator StartIt = SI, NextIt;
+        MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
         MachineInstr *CopyIn = nullptr;
         if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
-          const DebugLoc &DL = SI->getDebugLoc();
+          const DebugLoc &DL = SI.getDebugLoc();
           CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
                       .addOperand(SrcOp);
         }
@@ -2137,33 +2236,33 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
           // We are keeping this register live.
           SrcOp.setIsKill(false);
         } else {
-          B.erase(SI);
-          IM.replaceInstr(SI, CopyIn);
+          B.erase(&SI);
+          IM.replaceInstr(&SI, CopyIn);
         }
 
-        auto EndIt = std::next(MachineBasicBlock::iterator(EI));
+        auto EndIt = std::next(EI.getIterator());
         for (auto It = StartIt; It != EndIt; It = NextIt) {
-          MachineInstr *MI = &*It;
+          MachineInstr &MI = *It;
           NextIt = std::next(It);
           int TFI;
-          if (!HII.isLoadFromStackSlot(*MI, TFI) || TFI != FI)
+          if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
             continue;
-          unsigned DstR = MI->getOperand(0).getReg();
-          assert(MI->getOperand(0).getSubReg() == 0);
+          unsigned DstR = MI.getOperand(0).getReg();
+          assert(MI.getOperand(0).getSubReg() == 0);
           MachineInstr *CopyOut = nullptr;
           if (DstR != FoundR) {
-            DebugLoc DL = MI->getDebugLoc();
+            DebugLoc DL = MI.getDebugLoc();
             unsigned MemSize = (1U << (HII.getMemAccessSize(MI) - 1));
             assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
             unsigned CopyOpc = TargetOpcode::COPY;
-            if (HII.isSignExtendingLoad(*MI))
+            if (HII.isSignExtendingLoad(MI))
               CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
-            else if (HII.isZeroExtendingLoad(*MI))
+            else if (HII.isZeroExtendingLoad(MI))
               CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
             CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
-                        .addReg(FoundR, getKillRegState(MI == EI));
+                        .addReg(FoundR, getKillRegState(&MI == &EI));
           }
-          IM.replaceInstr(MI, CopyOut);
+          IM.replaceInstr(&MI, CopyOut);
           B.erase(It);
         }
 
@@ -2176,7 +2275,6 @@ void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
   }
 }
 
-
 void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
       const HexagonInstrInfo &HII, unsigned SP, unsigned CF) const {
   MachineBasicBlock &MB = *AI->getParent();
@@ -2235,28 +2333,25 @@ void HexagonFrameLowering::expandAlloca(MachineInstr *AI,
   }
 }
 
-
 bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (!MFI->hasVarSizedObjects())
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (!MFI.hasVarSizedObjects())
     return false;
-  unsigned MaxA = MFI->getMaxAlignment();
+  unsigned MaxA = MFI.getMaxAlignment();
   if (MaxA <= getStackAlignment())
     return false;
   return true;
 }
 
-
 const MachineInstr *HexagonFrameLowering::getAlignaInstr(
       const MachineFunction &MF) const {
   for (auto &B : MF)
     for (auto &I : B)
-      if (I.getOpcode() == Hexagon::ALIGNA)
+      if (I.getOpcode() == Hexagon::PS_aligna)
         return &I;
   return nullptr;
 }
 
-
 /// Adds all callee-saved registers as implicit uses or defs to the
 /// instruction.
 void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
@@ -2266,7 +2361,6 @@ void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
     MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
 }
 
-
 /// Determine whether the callee-saved register saves and restores should
 /// be generated via inline code. If this function returns "true", inline
 /// code will be generated. If this function returns "false", additional
@@ -2301,7 +2395,6 @@ bool HexagonFrameLowering::shouldInlineCSR(MachineFunction &MF,
   return false;
 }
 
-
 bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
       const CSIVect &CSI) const {
   if (shouldInlineCSR(MF, CSI))
@@ -2315,7 +2408,6 @@ bool HexagonFrameLowering::useSpillFunction(MachineFunction &MF,
   return Threshold < NumCSI;
 }
 
-
 bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
       const CSIVect &CSI) const {
   if (shouldInlineCSR(MF, CSI))
@@ -2336,3 +2428,14 @@ bool HexagonFrameLowering::useRestoreFunction(MachineFunction &MF,
                                      : SpillFuncThreshold;
   return Threshold < NumCSI;
 }
+
+bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
+  unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
+  auto &HST = MF.getSubtarget<HexagonSubtarget>();
+  // A fairly simplistic guess as to whether a potential load/store to a
+  // stack location could require an extra register. It does not account
+  // for store-immediate instructions.
+  if (HST.useHVXOps())
+    return StackSize > 256;
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
index 3e76214559b7..529a61d4a5b5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonFrameLowering.h
@@ -12,7 +12,11 @@
 
 #include "Hexagon.h"
 #include "HexagonBlockRanges.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/Target/TargetFrameLowering.h"
+#include <vector>
 
 namespace llvm {
 
@@ -31,11 +35,13 @@ public:
       override;
   void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
       override {}
+
   bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
       const TargetRegisterInfo *TRI) const override {
     return true;
   }
+
   bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI,
       const TargetRegisterInfo *TRI) const override {
@@ -53,6 +59,7 @@ public:
   bool targetHandlesStackFrameRounding() const override {
     return true;
   }
+
   int getFrameIndexReference(const MachineFunction &MF, int FI,
       unsigned &FrameReg) const override;
   bool hasFP(const MachineFunction &MF) const override;
@@ -91,7 +98,8 @@ private:
       const HexagonRegisterInfo &HRI, bool &PrologueStubs) const;
   bool insertCSRRestoresInBlock(MachineBasicBlock &MBB, const CSIVect &CSI,
       const HexagonRegisterInfo &HRI) const;
-  bool updateExitPaths(MachineBasicBlock &MBB, MachineBasicBlock *RestoreB,
+  void updateEntryPaths(MachineFunction &MF, MachineBasicBlock &SaveB) const;
+  bool updateExitPaths(MachineBasicBlock &MBB, MachineBasicBlock &RestoreB,
       BitVector &DoneT, BitVector &DoneF, BitVector &Path) const;
   void insertCFIInstructionsAt(MachineBasicBlock &MBB,
       MachineBasicBlock::iterator At) const;
@@ -140,11 +148,12 @@ private:
 
   void addCalleeSaveRegistersAsImpOperand(MachineInstr *MI, const CSIVect &CSI,
       bool IsDef, bool IsKill) const;
-  bool shouldInlineCSR(llvm::MachineFunction &MF, const CSIVect &CSI) const;
+  bool shouldInlineCSR(MachineFunction &MF, const CSIVect &CSI) const;
   bool useSpillFunction(MachineFunction &MF, const CSIVect &CSI) const;
   bool useRestoreFunction(MachineFunction &MF, const CSIVect &CSI) const;
+  bool mayOverflowFrameOffset(MachineFunction &MF) const;
 };
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONFRAMELOWERING_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index f46b6d2a82e3..bb5e379ce014 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -7,20 +7,25 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <cstdint>
+#include <iterator>
 
 using namespace llvm;
 
@@ -41,28 +46,34 @@ static cl::opt<bool> NeedAnd("extract-needand", cl::init(true), cl::Hidden,
   cl::desc("Require & in extract patterns"));
 
 namespace llvm {
+
   void initializeHexagonGenExtractPass(PassRegistry&);
   FunctionPass *createHexagonGenExtract();
-}
 
+} // end namespace llvm
 
 namespace {
+
   class HexagonGenExtract : public FunctionPass {
   public:
     static char ID;
+
     HexagonGenExtract() : FunctionPass(ID), ExtractCount(0) {
       initializeHexagonGenExtractPass(*PassRegistry::getPassRegistry());
     }
-    virtual const char *getPassName() const override {
+
+    StringRef getPassName() const override {
       return "Hexagon generate \"extract\" instructions";
     }
-    virtual bool runOnFunction(Function &F) override;
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const override {
+
+    bool runOnFunction(Function &F) override;
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<DominatorTreeWrapperPass>();
       AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addPreserved<MachineFunctionAnalysis>();
       FunctionPass::getAnalysisUsage(AU);
     }
+
   private:
     bool visitBlock(BasicBlock *B);
     bool convert(Instruction *In);
@@ -72,7 +83,8 @@ namespace {
   };
 
   char HexagonGenExtract::ID = 0;
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(HexagonGenExtract, "hextract", "Hexagon generate "
   "\"extract\" instructions", false, false)
@@ -80,11 +92,11 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_END(HexagonGenExtract, "hextract", "Hexagon generate "
   "\"extract\" instructions", false, false)
 
-
 bool HexagonGenExtract::convert(Instruction *In) {
   using namespace PatternMatch;
-  Value *BF = 0;
-  ConstantInt *CSL = 0, *CSR = 0, *CM = 0;
+
+  Value *BF = nullptr;
+  ConstantInt *CSL = nullptr, *CSR = nullptr, *CM = nullptr;
   BasicBlock *BB = In->getParent();
   LLVMContext &Ctx = BB->getContext();
   bool LogicalSR;
@@ -126,14 +138,14 @@ bool HexagonGenExtract::convert(Instruction *In) {
                             m_ConstantInt(CM)));
   }
   if (!Match) {
-    CM = 0;
+    CM = nullptr;
     // (shl (lshr x, #sr), #sl)
     LogicalSR = true;
     Match = match(In, m_Shl(m_LShr(m_Value(BF), m_ConstantInt(CSR)),
                             m_ConstantInt(CSL)));
   }
   if (!Match) {
-    CM = 0;
+    CM = nullptr;
     // (shl (ashr x, #sr), #sl)
     LogicalSR = false;
     Match = match(In, m_Shl(m_AShr(m_Value(BF), m_ConstantInt(CSR)),
@@ -207,7 +219,6 @@ bool HexagonGenExtract::convert(Instruction *In) {
   return true;
 }
 
-
 bool HexagonGenExtract::visitBlock(BasicBlock *B) {
   // Depth-first, bottom-up traversal.
   DomTreeNode *DTN = DT->getNode(B);
@@ -240,7 +251,6 @@ bool HexagonGenExtract::visitBlock(BasicBlock *B) {
   return Changed;
 }
 
-
 bool HexagonGenExtract::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
@@ -256,7 +266,6 @@ bool HexagonGenExtract::runOnFunction(Function &F) {
   return Changed;
 }
 
-
 FunctionPass *llvm::createHexagonGenExtract() {
   return new HexagonGenExtract();
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
index 71d079193d79..5a8e392d1275 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenInsert.cpp
@@ -9,29 +9,39 @@
 
 #define DEBUG_TYPE "hexinsert"
 
+#include "BitTracker.h"
+#include "HexagonBitTracker.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Support/Timer.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Timer.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
-#include "Hexagon.h"
-#include "HexagonRegisterInfo.h"
-#include "HexagonTargetMachine.h"
-#include "HexagonBitTracker.h"
-
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <utility>
 #include <vector>
 
 using namespace llvm;
@@ -59,20 +69,18 @@ static cl::opt<bool> OptSelectHas0("insert-has0", cl::init(false), cl::Hidden,
 static cl::opt<bool> OptConst("insert-const", cl::init(false), cl::Hidden,
   cl::ZeroOrMore);
 
-namespace {
-  // The preprocessor gets confused when the DEBUG macro is passed larger
-  // chunks of code. Use this function to detect debugging.
-  inline bool isDebug() {
+// The preprocessor gets confused when the DEBUG macro is passed larger
+// chunks of code. Use this function to detect debugging.
+inline static bool isDebug() {
 #ifndef NDEBUG
-    return ::llvm::DebugFlag && ::llvm::isCurrentDebugType(DEBUG_TYPE);
+  return DebugFlag && isCurrentDebugType(DEBUG_TYPE);
 #else
-    return false;
+  return false;
 #endif
-  }
 }
 
-
 namespace {
+
   // Set of virtual registers, based on BitVector.
   struct RegisterSet : private BitVector {
     RegisterSet() = default;
@@ -146,20 +154,23 @@ namespace {
       if (size() <= Idx)
         resize(std::max(Idx+1, 32U));
     }
+
     static inline unsigned v2x(unsigned v) {
       return TargetRegisterInfo::virtReg2Index(v);
     }
+
     static inline unsigned x2v(unsigned x) {
       return TargetRegisterInfo::index2VirtReg(x);
     }
   };
 
-
   struct PrintRegSet {
     PrintRegSet(const RegisterSet &S, const TargetRegisterInfo *RI)
       : RS(S), TRI(RI) {}
+
     friend raw_ostream &operator<< (raw_ostream &OS,
           const PrintRegSet &P);
+
   private:
     const RegisterSet &RS;
     const TargetRegisterInfo *TRI;
@@ -172,14 +183,12 @@ namespace {
     OS << " }";
     return OS;
   }
-}
-
 
-namespace {
   // A convenience class to associate unsigned numbers (such as virtual
   // registers) with unsigned numbers.
   struct UnsignedMap : public DenseMap<unsigned,unsigned> {
-    UnsignedMap() : BaseType() {}
+    UnsignedMap() = default;
+
   private:
     typedef DenseMap<unsigned,unsigned> BaseType;
   };
@@ -190,22 +199,21 @@ namespace {
   // by a potentially expensive comparison function, or obtained by a proce-
   // dure that should not be repeated each time two registers are compared.
   struct RegisterOrdering : public UnsignedMap {
-    RegisterOrdering() : UnsignedMap() {}
+    RegisterOrdering() = default;
+
     unsigned operator[](unsigned VR) const {
       const_iterator F = find(VR);
       assert(F != end());
       return F->second;
     }
+
     // Add operator(), so that objects of this class can be used as
     // comparators in std::sort et al.
     bool operator() (unsigned VR1, unsigned VR2) const {
       return operator[](VR1) < operator[](VR2);
     }
   };
-}
-
 
-namespace {
   // Ordering of bit values. This class does not have operator[], but
   // is supplies a comparison operator() for use in std:: algorithms.
   // The order is as follows:
@@ -214,12 +222,14 @@ namespace {
   //   or ord(ref1.Reg) == ord(ref2.Reg), and ref1.Pos < ref2.Pos.
   struct BitValueOrdering {
     BitValueOrdering(const RegisterOrdering &RB) : BaseOrd(RB) {}
+
     bool operator() (const BitTracker::BitValue &V1,
           const BitTracker::BitValue &V2) const;
+
     const RegisterOrdering &BaseOrd;
   };
-}
 
+} // end anonymous namespace
 
 bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
       const BitTracker::BitValue &V2) const {
@@ -241,20 +251,21 @@ bool BitValueOrdering::operator() (const BitTracker::BitValue &V1,
   return V1.RefI.Pos < V2.RefI.Pos;
 }
 
-
 namespace {
+
   // Cache for the BitTracker's cell map. Map lookup has a logarithmic
   // complexity, this class will memoize the lookup results to reduce
   // the access time for repeated lookups of the same cell.
   struct CellMapShadow {
     CellMapShadow(const BitTracker &T) : BT(T) {}
+
     const BitTracker::RegisterCell &lookup(unsigned VR) {
       unsigned RInd = TargetRegisterInfo::virtReg2Index(VR);
       // Grow the vector to at least 32 elements.
       if (RInd >= CVect.size())
-        CVect.resize(std::max(RInd+16, 32U), 0);
+        CVect.resize(std::max(RInd+16, 32U), nullptr);
       const BitTracker::RegisterCell *CP = CVect[RInd];
-      if (CP == 0)
+      if (CP == nullptr)
         CP = CVect[RInd] = &BT.lookup(VR);
       return *CP;
     }
@@ -265,16 +276,15 @@ namespace {
     typedef std::vector<const BitTracker::RegisterCell*> CellVectType;
     CellVectType CVect;
   };
-}
 
-
-namespace {
   // Comparator class for lexicographic ordering of virtual registers
   // according to the corresponding BitTracker::RegisterCell objects.
   struct RegisterCellLexCompare {
     RegisterCellLexCompare(const BitValueOrdering &BO, CellMapShadow &M)
       : BitOrd(BO), CM(M) {}
+
     bool operator() (unsigned VR1, unsigned VR2) const;
+
   private:
     const BitValueOrdering &BitOrd;
     CellMapShadow &CM;
@@ -290,15 +300,17 @@ namespace {
     RegisterCellBitCompareSel(unsigned R, unsigned B, unsigned N,
           const BitValueOrdering &BO, CellMapShadow &M)
       : SelR(R), SelB(B), BitN(N), BitOrd(BO), CM(M) {}
+
     bool operator() (unsigned VR1, unsigned VR2) const;
+
   private:
     const unsigned SelR, SelB;
     const unsigned BitN;
     const BitValueOrdering &BitOrd;
     CellMapShadow &CM;
   };
-}
 
+} // end anonymous namespace
 
 bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
   // Ordering of registers, made up from two given orderings:
@@ -327,7 +339,6 @@ bool RegisterCellLexCompare::operator() (unsigned VR1, unsigned VR2) const {
   return BitOrd.BaseOrd[VR1] < BitOrd.BaseOrd[VR2];
 }
 
-
 bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
   if (VR1 == VR2)
     return false;
@@ -353,18 +364,22 @@ bool RegisterCellBitCompareSel::operator() (unsigned VR1, unsigned VR2) const {
   return false;
 }
 
-
 namespace {
+
   class OrderedRegisterList {
     typedef std::vector<unsigned> ListType;
+
   public:
     OrderedRegisterList(const RegisterOrdering &RO) : Ord(RO) {}
+
     void insert(unsigned VR);
     void remove(unsigned VR);
+
     unsigned operator[](unsigned Idx) const {
       assert(Idx < Seq.size());
       return Seq[Idx];
     }
+
     unsigned size() const {
       return Seq.size();
     }
@@ -378,16 +393,18 @@ namespace {
 
     // Convenience function to convert an iterator to the corresponding index.
     unsigned idx(iterator It) const { return It-begin(); }
+
   private:
     ListType Seq;
     const RegisterOrdering &Ord;
   };
 
-
   struct PrintORL {
     PrintORL(const OrderedRegisterList &L, const TargetRegisterInfo *RI)
       : RL(L), TRI(RI) {}
+
     friend raw_ostream &operator<< (raw_ostream &OS, const PrintORL &P);
+
   private:
     const OrderedRegisterList &RL;
     const TargetRegisterInfo *TRI;
@@ -404,8 +421,8 @@ namespace {
     OS << ')';
     return OS;
   }
-}
 
+} // end anonymous namespace
 
 void OrderedRegisterList::insert(unsigned VR) {
   iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
@@ -415,21 +432,21 @@ void OrderedRegisterList::insert(unsigned VR) {
     Seq.insert(L, VR);
 }
 
-
 void OrderedRegisterList::remove(unsigned VR) {
   iterator L = std::lower_bound(Seq.begin(), Seq.end(), VR, Ord);
   assert(L != Seq.end());
   Seq.erase(L);
 }
 
-
 namespace {
+
   // A record of the insert form. The fields correspond to the operands
   // of the "insert" instruction:
   // ... = insert(SrcR, InsR, #Wdh, #Off)
   struct IFRecord {
     IFRecord(unsigned SR = 0, unsigned IR = 0, uint16_t W = 0, uint16_t O = 0)
       : SrcR(SR), InsR(IR), Wdh(W), Off(O) {}
+
     unsigned SrcR, InsR;
     uint16_t Wdh, Off;
   };
@@ -437,10 +454,12 @@ namespace {
   struct PrintIFR {
     PrintIFR(const IFRecord &R, const TargetRegisterInfo *RI)
       : IFR(R), TRI(RI) {}
+
   private:
+    friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
+
     const IFRecord &IFR;
     const TargetRegisterInfo *TRI;
-    friend raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P);
   };
 
   raw_ostream &operator<< (raw_ostream &OS, const PrintIFR &P) {
@@ -451,31 +470,37 @@ namespace {
   }
 
   typedef std::pair<IFRecord,RegisterSet> IFRecordWithRegSet;
-}
 
+} // end anonymous namespace
 
 namespace llvm {
+
   void initializeHexagonGenInsertPass(PassRegistry&);
   FunctionPass *createHexagonGenInsert();
-}
 
+} // end namespace llvm
 
 namespace {
+
   class HexagonGenInsert : public MachineFunctionPass {
   public:
     static char ID;
-    HexagonGenInsert() : MachineFunctionPass(ID), HII(0), HRI(0) {
+
+    HexagonGenInsert() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) {
       initializeHexagonGenInsertPass(*PassRegistry::getPassRegistry());
     }
-    virtual const char *getPassName() const {
+
+    StringRef getPassName() const override {
       return "Hexagon generate \"insert\" instructions";
     }
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
     typedef DenseMap<std::pair<unsigned,unsigned>,unsigned> PairMapType;
@@ -533,8 +558,8 @@ namespace {
   };
 
   char HexagonGenInsert::ID = 0;
-}
 
+} // end anonymous namespace
 
 void HexagonGenInsert::dump_map() const {
   typedef IFMapType::const_iterator iterator;
@@ -547,7 +572,6 @@ void HexagonGenInsert::dump_map() const {
   }
 }
 
-
 void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
   unsigned Index = 0;
   typedef MachineFunction::const_iterator mf_iterator;
@@ -574,7 +598,6 @@ void HexagonGenInsert::buildOrderingMF(RegisterOrdering &RO) const {
   // in the map.
 }
 
-
 void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
       RegisterOrdering &RO) const {
   // Create a vector of all virtual registers (collect them from the base
@@ -591,12 +614,10 @@ void HexagonGenInsert::buildOrderingBT(RegisterOrdering &RB,
     RO.insert(std::make_pair(VRs[i], i));
 }
 
-
 inline bool HexagonGenInsert::isIntClass(const TargetRegisterClass *RC) const {
   return RC == &Hexagon::IntRegsRegClass || RC == &Hexagon::DoubleRegsRegClass;
 }
 
-
 bool HexagonGenInsert::isConstant(unsigned VR) const {
   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
   uint16_t W = RC.width();
@@ -609,7 +630,6 @@ bool HexagonGenInsert::isConstant(unsigned VR) const {
   return true;
 }
 
-
 bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
   uint16_t W = RC.width();
@@ -633,7 +653,6 @@ bool HexagonGenInsert::isSmallConstant(unsigned VR) const {
   return isInt<8>(Lo_32(V)) && isInt<8>(Hi_32(V));
 }
 
-
 bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
       unsigned InsR, uint16_t L, uint16_t S) const {
   const TargetRegisterClass *DstRC = MRI->getRegClass(DstR);
@@ -656,7 +675,6 @@ bool HexagonGenInsert::isValidInsertForm(unsigned DstR, unsigned SrcR,
   return true;
 }
 
-
 bool HexagonGenInsert::findSelfReference(unsigned VR) const {
   const BitTracker::RegisterCell &RC = CMS->lookup(VR);
   for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
@@ -667,7 +685,6 @@ bool HexagonGenInsert::findSelfReference(unsigned VR) const {
   return false;
 }
 
-
 bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
   BitTracker::RegisterCell RC = CMS->lookup(VR);
   for (uint16_t i = 0, w = RC.width(); i < w; ++i) {
@@ -678,7 +695,6 @@ bool HexagonGenInsert::findNonSelfReference(unsigned VR) const {
   return false;
 }
 
-
 void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
       RegisterSet &Defs) const {
   for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
@@ -692,7 +708,6 @@ void HexagonGenInsert::getInstrDefs(const MachineInstr *MI,
   }
 }
 
-
 void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
       RegisterSet &Uses) const {
   for (unsigned i = 0, n = MI->getNumOperands(); i < n; ++i) {
@@ -706,7 +721,6 @@ void HexagonGenInsert::getInstrUses(const MachineInstr *MI,
   }
 }
 
-
 unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
       const MachineBasicBlock *ToB, const UnsignedMap &RPO,
       PairMapType &M) const {
@@ -740,7 +754,6 @@ unsigned HexagonGenInsert::distance(const MachineBasicBlock *FromB,
   return MaxD;
 }
 
-
 unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
       MachineBasicBlock::const_iterator ToI, const UnsignedMap &RPO,
       PairMapType &M) const {
@@ -753,11 +766,10 @@ unsigned HexagonGenInsert::distance(MachineBasicBlock::const_iterator FromI,
   return D1+D2+D3;
 }
 
-
 bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
       OrderedRegisterList &AVs) {
   if (isDebug()) {
-    dbgs() << LLVM_FUNCTION_NAME << ": " << PrintReg(VR, HRI)
+    dbgs() << __func__ << ": " << PrintReg(VR, HRI)
            << "  AVs: " << PrintORL(AVs, HRI) << "\n";
   }
   if (AVs.size() == 0)
@@ -832,7 +844,6 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
     }
   }
 
-
   bool Recorded = false;
 
   for (iterator I = AVs.begin(), E = AVs.end(); I != E; ++I) {
@@ -888,7 +899,6 @@ bool HexagonGenInsert::findRecordInsertForms(unsigned VR,
   return Recorded;
 }
 
-
 void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
       OrderedRegisterList &AVs) {
   if (isDebug())
@@ -949,7 +959,6 @@ void HexagonGenInsert::collectInBlock(MachineBasicBlock *B,
     AVs.remove(VR);
 }
 
-
 void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
       RegisterSet &RMs) const {
   // For a given register VR and a insert form, find the registers that are
@@ -1001,7 +1010,6 @@ void HexagonGenInsert::findRemovableRegisters(unsigned VR, IFRecord IF,
   RMs.remove(VR);
 }
 
-
 void HexagonGenInsert::computeRemovableRegisters() {
   for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
     IFListType &LL = I->second;
@@ -1010,21 +1018,19 @@ void HexagonGenInsert::computeRemovableRegisters() {
   }
 }
 
-
 void HexagonGenInsert::pruneEmptyLists() {
   // Remove all entries from the map, where the register has no insert forms
   // associated with it.
   typedef SmallVector<IFMapType::iterator,16> IterListType;
   IterListType Prune;
   for (IFMapType::iterator I = IFMap.begin(), E = IFMap.end(); I != E; ++I) {
-    if (I->second.size() == 0)
+    if (I->second.empty())
       Prune.push_back(I);
   }
   for (unsigned i = 0, n = Prune.size(); i < n; ++i)
     IFMap.erase(Prune[i]);
 }
 
-
 void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
   IFMapType::iterator F = IFMap.find(VR);
   assert(F != IFMap.end());
@@ -1038,7 +1044,7 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
   // If there exists a candidate with a non-empty set, the ones with empty
   // sets will not be used and can be removed.
   MachineInstr *DefVR = MRI->getVRegDef(VR);
-  bool DefEx = HII->isConstExtended(DefVR);
+  bool DefEx = HII->isConstExtended(*DefVR);
   bool HasNE = false;
   for (unsigned i = 0, n = LL.size(); i < n; ++i) {
     if (LL[i].second.empty())
@@ -1052,7 +1058,7 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
     auto IsEmpty = [] (const IFRecordWithRegSet &IR) -> bool {
       return IR.second.empty();
     };
-    auto End = std::remove_if(LL.begin(), LL.end(), IsEmpty);
+    auto End = llvm::remove_if(LL, IsEmpty);
     if (End != LL.end())
       LL.erase(End, LL.end());
   } else {
@@ -1112,7 +1118,6 @@ void HexagonGenInsert::pruneCoveredSets(unsigned VR) {
   }
 }
 
-
 void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
       PairMapType &M) {
   IFMapType::iterator F = IFMap.find(VR);
@@ -1135,7 +1140,6 @@ void HexagonGenInsert::pruneUsesTooFar(unsigned VR, const UnsignedMap &RPO,
   }
 }
 
-
 void HexagonGenInsert::pruneRegCopies(unsigned VR) {
   IFMapType::iterator F = IFMap.find(VR);
   assert(F != IFMap.end());
@@ -1144,12 +1148,11 @@ void HexagonGenInsert::pruneRegCopies(unsigned VR) {
   auto IsCopy = [] (const IFRecordWithRegSet &IR) -> bool {
     return IR.first.Wdh == 32 && (IR.first.Off == 0 || IR.first.Off == 32);
   };
-  auto End = std::remove_if(LL.begin(), LL.end(), IsCopy);
+  auto End = llvm::remove_if(LL, IsCopy);
   if (End != LL.end())
     LL.erase(End, LL.end());
 }
 
-
 void HexagonGenInsert::pruneCandidates() {
   // Remove candidates that are not beneficial, regardless of the final
   // selection method.
@@ -1176,8 +1179,8 @@ void HexagonGenInsert::pruneCandidates() {
     pruneRegCopies(I->first);
 }
 
-
 namespace {
+
   // Class for comparing IF candidates for registers that have multiple of
   // them. The smaller the candidate, according to this ordering, the better.
   // First, compare the number of zeros in the associated potentially remova-
@@ -1189,16 +1192,19 @@ namespace {
   struct IFOrdering {
     IFOrdering(const UnsignedMap &UC, const RegisterOrdering &BO)
       : UseC(UC), BaseOrd(BO) {}
+
     bool operator() (const IFRecordWithRegSet &A,
           const IFRecordWithRegSet &B) const;
+
   private:
     void stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
           unsigned &Sum) const;
+
     const UnsignedMap &UseC;
     const RegisterOrdering &BaseOrd;
   };
-}
 
+} // end anonymous namespace
 
 bool IFOrdering::operator() (const IFRecordWithRegSet &A,
       const IFRecordWithRegSet &B) const {
@@ -1228,7 +1234,6 @@ bool IFOrdering::operator() (const IFRecordWithRegSet &A,
   return A.first.Off < B.first.Off;
 }
 
-
 void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
       unsigned &Sum) const {
   for (unsigned R = Rs.find_first(); R; R = Rs.find_next(R)) {
@@ -1242,7 +1247,6 @@ void IFOrdering::stats(const RegisterSet &Rs, unsigned &Size, unsigned &Zero,
   }
 }
 
-
 void HexagonGenInsert::selectCandidates() {
   // Some registers may have multiple valid candidates. Pick the best one
   // (or decide not to use any).
@@ -1280,7 +1284,6 @@ void HexagonGenInsert::selectCandidates() {
     UseC[R] = (C > D) ? C-D : 0;  // doz
   }
 
-
   bool SelectAll0 = OptSelectAll0, SelectHas0 = OptSelectHas0;
   if (!SelectAll0 && !SelectHas0)
     SelectAll0 = true;
@@ -1345,12 +1348,12 @@ void HexagonGenInsert::selectCandidates() {
   AllRMs.clear();
   for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
     const IFListType &LL = I->second;
-    if (LL.size() > 0)
+    if (!LL.empty())
       AllRMs.insert(LL[0].second);
   }
   for (IFMapType::iterator I = IFMap.begin(); I != End; ++I) {
     IFListType &LL = I->second;
-    if (LL.size() == 0)
+    if (LL.empty())
       continue;
     unsigned SR = LL[0].first.SrcR, IR = LL[0].first.InsR;
     if (AllRMs[SR] || AllRMs[IR])
@@ -1360,7 +1363,6 @@ void HexagonGenInsert::selectCandidates() {
   pruneEmptyLists();
 }
 
-
 bool HexagonGenInsert::generateInserts() {
   // Create a new register for each one from IFMap, and store them in the
   // map.
@@ -1387,9 +1389,9 @@ bool HexagonGenInsert::generateInserts() {
     unsigned Wdh = IF.Wdh, Off = IF.Off;
     unsigned InsS = 0;
     if (R32 && MRI->getRegClass(IF.InsR) == &Hexagon::DoubleRegsRegClass) {
-      InsS = Hexagon::subreg_loreg;
+      InsS = Hexagon::isub_lo;
       if (Off >= 32) {
-        InsS = Hexagon::subreg_hireg;
+        InsS = Hexagon::isub_hi;
         Off -= 32;
       }
     }
@@ -1418,7 +1420,6 @@ bool HexagonGenInsert::generateInserts() {
   return true;
 }
 
-
 bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
   bool Changed = false;
   typedef GraphTraits<MachineDomTreeNode*> GTN;
@@ -1444,10 +1445,10 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
 
     bool AllDead = true;
     SmallVector<unsigned,2> Regs;
-    for (ConstMIOperands Op(*MI); Op.isValid(); ++Op) {
-      if (!Op->isReg() || !Op->isDef())
+    for (const MachineOperand &MO : MI->operands()) {
+      if (!MO.isReg() || !MO.isDef())
         continue;
-      unsigned R = Op->getReg();
+      unsigned R = MO.getReg();
       if (!TargetRegisterInfo::isVirtualRegister(R) ||
           !MRI->use_nodbg_empty(R)) {
         AllDead = false;
@@ -1467,15 +1468,12 @@ bool HexagonGenInsert::removeDeadCode(MachineDomTreeNode *N) {
   return Changed;
 }
 
-
 bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
 
   bool Timing = OptTiming, TimingDetail = Timing && OptTimingDetail;
   bool Changed = false;
-  TimerGroup __G("hexinsert");
-  NamedRegionTimer __T("hexinsert", Timing && !TimingDetail);
 
   // Sanity check: one, but not both.
   assert(!OptSelectAll0 || !OptSelectHas0);
@@ -1521,8 +1519,12 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
   MachineBasicBlock *RootB = MDT->getRoot();
   OrderedRegisterList AvailR(CellOrd);
 
+  const char *const TGName = "hexinsert";
+  const char *const TGDesc = "Generate Insert Instructions";
+
   {
-    NamedRegionTimer _T("collection", "hexinsert", TimingDetail);
+    NamedRegionTimer _T("collection", "collection", TGName, TGDesc,
+                        TimingDetail);
     collectInBlock(RootB, AvailR);
     // Complete the information gathered in IFMap.
     computeRemovableRegisters();
@@ -1537,7 +1539,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
     return Changed;
 
   {
-    NamedRegionTimer _T("pruning", "hexinsert", TimingDetail);
+    NamedRegionTimer _T("pruning", "pruning", TGName, TGDesc, TimingDetail);
     pruneCandidates();
   }
 
@@ -1550,7 +1552,7 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
     return Changed;
 
   {
-    NamedRegionTimer _T("selection", "hexinsert", TimingDetail);
+    NamedRegionTimer _T("selection", "selection", TGName, TGDesc, TimingDetail);
     selectCandidates();
   }
 
@@ -1576,19 +1578,18 @@ bool HexagonGenInsert::runOnMachineFunction(MachineFunction &MF) {
     return Changed;
 
   {
-    NamedRegionTimer _T("generation", "hexinsert", TimingDetail);
+    NamedRegionTimer _T("generation", "generation", TGName, TGDesc,
+                        TimingDetail);
     generateInserts();
   }
 
   return true;
 }
 
-
 FunctionPass *llvm::createHexagonGenInsert() {
   return new HexagonGenInsert();
 }
 
-
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
index bb9256db4b48..a718df9c70ab 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenMux.cpp
@@ -22,36 +22,60 @@
 
 #define DEBUG_TYPE "hexmux"
 
-#include "llvm/CodeGen/Passes.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "HexagonTargetMachine.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/MathExtras.h"
+#include <algorithm>
+#include <limits>
+#include <iterator>
+#include <utility>
 
 using namespace llvm;
 
 namespace llvm {
+
   FunctionPass *createHexagonGenMux();
   void initializeHexagonGenMuxPass(PassRegistry& Registry);
-}
+
+} // end namespace llvm
 
 namespace {
+
   class HexagonGenMux : public MachineFunctionPass {
   public:
     static char ID;
-    HexagonGenMux() : MachineFunctionPass(ID), HII(0), HRI(0) {
+
+    HexagonGenMux() : MachineFunctionPass(ID), HII(nullptr), HRI(nullptr) {
       initializeHexagonGenMuxPass(*PassRegistry::getPassRegistry());
     }
-    const char *getPassName() const override {
+
+    StringRef getPassName() const override {
       return "Hexagon generate mux instructions";
     }
+
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
+
     bool runOnMachineFunction(MachineFunction &MF) override;
+
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -59,26 +83,33 @@ namespace {
     const HexagonRegisterInfo *HRI;
 
     struct CondsetInfo {
-      unsigned PredR;
-      unsigned TrueX, FalseX;
-      CondsetInfo() : PredR(0), TrueX(UINT_MAX), FalseX(UINT_MAX) {}
+      unsigned PredR = 0;
+      unsigned TrueX = std::numeric_limits<unsigned>::max();
+      unsigned FalseX = std::numeric_limits<unsigned>::max();
+
+      CondsetInfo() = default;
     };
+
     struct DefUseInfo {
       BitVector Defs, Uses;
-      DefUseInfo() : Defs(), Uses() {}
+
+      DefUseInfo() = default;
       DefUseInfo(const BitVector &D, const BitVector &U) : Defs(D), Uses(U) {}
     };
+
     struct MuxInfo {
       MachineBasicBlock::iterator At;
       unsigned DefR, PredR;
       MachineOperand *SrcT, *SrcF;
       MachineInstr *Def1, *Def2;
+
       MuxInfo(MachineBasicBlock::iterator It, unsigned DR, unsigned PR,
               MachineOperand *TOp, MachineOperand *FOp, MachineInstr &D1,
               MachineInstr &D2)
           : At(It), DefR(DR), PredR(PR), SrcT(TOp), SrcF(FOp), Def1(&D1),
             Def2(&D2) {}
     };
+
     typedef DenseMap<MachineInstr*,unsigned> InstrIndexMap;
     typedef DenseMap<unsigned,DefUseInfo> DefUseInfoMap;
     typedef SmallVector<MuxInfo,4> MuxInfoList;
@@ -86,6 +117,7 @@ namespace {
     bool isRegPair(unsigned Reg) const {
       return Hexagon::DoubleRegsRegClass.contains(Reg);
     }
+
     void getSubRegs(unsigned Reg, BitVector &SRs) const;
     void expandReg(unsigned Reg, BitVector &Set) const;
     void getDefsUses(const MachineInstr *MI, BitVector &Defs,
@@ -99,18 +131,17 @@ namespace {
   };
 
   char HexagonGenMux::ID = 0;
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS(HexagonGenMux, "hexagon-mux",
   "Hexagon generate mux instructions", false, false)
 
-
 void HexagonGenMux::getSubRegs(unsigned Reg, BitVector &SRs) const {
   for (MCSubRegIterator I(Reg, HRI); I.isValid(); ++I)
     SRs[*I] = true;
 }
 
-
 void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
   if (isRegPair(Reg))
     getSubRegs(Reg, Set);
@@ -118,7 +149,6 @@ void HexagonGenMux::expandReg(unsigned Reg, BitVector &Set) const {
     Set[Reg] = true;
 }
 
-
 void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
       BitVector &Uses) const {
   // First, get the implicit defs and uses for this instruction.
@@ -132,16 +162,15 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs,
       expandReg(*R++, Uses);
 
   // Look over all operands, and collect explicit defs and uses.
-  for (ConstMIOperands Mo(*MI); Mo.isValid(); ++Mo) {
-    if (!Mo->isReg() || Mo->isImplicit())
+  for (const MachineOperand &MO : MI->operands()) {
+    if (!MO.isReg() || MO.isImplicit())
       continue;
-    unsigned R = Mo->getReg();
-    BitVector &Set = Mo->isDef() ? Defs : Uses;
+    unsigned R = MO.getReg();
+    BitVector &Set = MO.isDef() ? Defs : Uses;
     expandReg(R, Set);
   }
 }
 
-
 void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
       DefUseInfoMap &DUM) {
   unsigned Index = 0;
@@ -159,7 +188,6 @@ void HexagonGenMux::buildMaps(MachineBasicBlock &B, InstrIndexMap &I2X,
   }
 }
 
-
 bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
   switch (Opc) {
     case Hexagon::A2_tfrt:
@@ -171,7 +199,6 @@ bool HexagonGenMux::isCondTransfer(unsigned Opc) const {
   return false;
 }
 
-
 unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
       const MachineOperand &Src2) const {
   bool IsReg1 = Src1.isReg(), IsReg2 = Src2.isReg();
@@ -188,7 +215,6 @@ unsigned HexagonGenMux::getMuxOpcode(const MachineOperand &Src1,
   return 0;
 }
 
-
 bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
   bool Changed = false;
   InstrIndexMap I2X;
@@ -231,7 +257,8 @@ bool HexagonGenMux::genMuxInBlock(MachineBasicBlock &B) {
       CI.TrueX = Idx;
     else
       CI.FalseX = Idx;
-    if (CI.TrueX == UINT_MAX || CI.FalseX == UINT_MAX)
+    if (CI.TrueX == std::numeric_limits<unsigned>::max() ||
+        CI.FalseX == std::numeric_limits<unsigned>::max())
       continue;
 
     // There is now a complete definition of DR, i.e. we have the predicate
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
index dcfd3e8317a9..f14c733dcf51 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonGenPredicate.cpp
@@ -9,49 +9,68 @@
 
 #define DEBUG_TYPE "gen-pred"
 
-#include "HexagonTargetMachine.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-
-#include <functional>
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <iterator>
+#include <map>
 #include <queue>
 #include <set>
+#include <utility>
 
 using namespace llvm;
 
 namespace llvm {
+
   void initializeHexagonGenPredicatePass(PassRegistry& Registry);
   FunctionPass *createHexagonGenPredicate();
-}
+
+} // end namespace llvm
 
 namespace {
+
   struct Register {
     unsigned R, S;
+
     Register(unsigned r = 0, unsigned s = 0) : R(r), S(s) {}
     Register(const MachineOperand &MO) : R(MO.getReg()), S(MO.getSubReg()) {}
+
     bool operator== (const Register &Reg) const {
       return R == Reg.R && S == Reg.S;
     }
+
     bool operator< (const Register &Reg) const {
       return R < Reg.R || (R == Reg.R && S < Reg.S);
     }
   };
+
   struct PrintRegister {
-    PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
     friend raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR);
+
+    PrintRegister(Register R, const TargetRegisterInfo &I) : Reg(R), TRI(I) {}
+
   private:
     Register Reg;
     const TargetRegisterInfo &TRI;
   };
+
   raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR)
     LLVM_ATTRIBUTE_UNUSED;
   raw_ostream &operator<< (raw_ostream &OS, const PrintRegister &PR) {
@@ -61,18 +80,23 @@ namespace {
   class HexagonGenPredicate : public MachineFunctionPass {
   public:
     static char ID;
-    HexagonGenPredicate() : MachineFunctionPass(ID), TII(0), TRI(0), MRI(0) {
+
+    HexagonGenPredicate() : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr),
+        MRI(nullptr) {
       initializeHexagonGenPredicatePass(*PassRegistry::getPassRegistry());
     }
-    virtual const char *getPassName() const {
+
+    StringRef getPassName() const override {
       return "Hexagon generate predicate operations";
     }
-    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+
+    void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineDominatorTree>();
       AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    virtual bool runOnMachineFunction(MachineFunction &MF);
+
+    bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
     typedef SetVector<MachineInstr*> VectOfInst;
@@ -99,7 +123,8 @@ namespace {
   };
 
   char HexagonGenPredicate::ID = 0;
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(HexagonGenPredicate, "hexagon-gen-pred",
   "Hexagon generate predicate operations", false, false)
@@ -114,7 +139,6 @@ bool HexagonGenPredicate::isPredReg(unsigned R) {
   return RC == &Hexagon::PredRegsRegClass;
 }
 
-
 unsigned HexagonGenPredicate::getPredForm(unsigned Opc) {
   using namespace Hexagon;
 
@@ -159,7 +183,6 @@ unsigned HexagonGenPredicate::getPredForm(unsigned Opc) {
   return 0;
 }
 
-
 bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
   unsigned Opc = MI->getOpcode();
   if (getPredForm(Opc) != 0)
@@ -179,7 +202,6 @@ bool HexagonGenPredicate::isConvertibleToPredForm(const MachineInstr *MI) {
   return false;
 }
 
-
 void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
   for (MachineFunction::iterator A = MF.begin(), Z = MF.end(); A != Z; ++A) {
     MachineBasicBlock &B = *A;
@@ -200,9 +222,8 @@ void HexagonGenPredicate::collectPredicateGPR(MachineFunction &MF) {
   }
 }
 
-
 void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": "
+  DEBUG(dbgs() << __func__ << ": "
                << PrintReg(Reg.R, TRI, Reg.S) << "\n");
   typedef MachineRegisterInfo::use_iterator use_iterator;
   use_iterator I = MRI->use_begin(Reg.R), E = MRI->use_end();
@@ -220,7 +241,6 @@ void HexagonGenPredicate::processPredicateGPR(const Register &Reg) {
   }
 }
 
-
 Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
   // Create a predicate register for a given Reg. The newly created register
   // will have its value copied from Reg, so that it can be later used as
@@ -230,7 +250,7 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
   if (F != G2P.end())
     return F->second;
 
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << PrintRegister(Reg, *TRI));
+  DEBUG(dbgs() << __func__ << ": " << PrintRegister(Reg, *TRI));
   MachineInstr *DefI = MRI->getVRegDef(Reg.R);
   assert(DefI);
   unsigned Opc = DefI->getOpcode();
@@ -261,7 +281,6 @@ Register HexagonGenPredicate::getPredRegFor(const Register &Reg) {
   llvm_unreachable("Invalid argument");
 }
 
-
 bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
   switch (Opc) {
     case Hexagon::C2_cmpeq:
@@ -298,7 +317,6 @@ bool HexagonGenPredicate::isScalarCmp(unsigned Opc) {
   return false;
 }
 
-
 bool HexagonGenPredicate::isScalarPred(Register PredReg) {
   std::queue<Register> WorkQ;
   WorkQ.push(PredReg);
@@ -330,9 +348,9 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) {
       case Hexagon::C4_or_orn:
       case Hexagon::C2_xor:
         // Add operands to the queue.
-        for (ConstMIOperands Mo(*DefI); Mo.isValid(); ++Mo)
-          if (Mo->isReg() && Mo->isUse())
-            WorkQ.push(Register(Mo->getReg()));
+        for (const MachineOperand &MO : DefI->operands())
+          if (MO.isReg() && MO.isUse())
+            WorkQ.push(Register(MO.getReg()));
         break;
 
       // All non-vector compares are ok, everything else is bad.
@@ -344,9 +362,8 @@ bool HexagonGenPredicate::isScalarPred(Register PredReg) {
   return true;
 }
 
-
 bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << ": " << MI << " " << *MI);
+  DEBUG(dbgs() << __func__ << ": " << MI << " " << *MI);
 
   unsigned Opc = MI->getOpcode();
   assert(isConvertibleToPredForm(MI));
@@ -356,7 +373,7 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
     if (!MO.isReg() || !MO.isUse())
       continue;
     Register Reg(MO);
-    if (Reg.S && Reg.S != Hexagon::subreg_loreg)
+    if (Reg.S && Reg.S != Hexagon::isub_lo)
       return false;
     if (!PredGPRs.count(Reg))
       return false;
@@ -430,9 +447,8 @@ bool HexagonGenPredicate::convertToPredForm(MachineInstr *MI) {
   return true;
 }
 
-
 bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
-  DEBUG(dbgs() << LLVM_FUNCTION_NAME << "\n");
+  DEBUG(dbgs() << __func__ << "\n");
   const TargetRegisterClass *PredRC = &Hexagon::PredRegsRegClass;
   bool Changed = false;
   VectOfInst Erase;
@@ -474,7 +490,6 @@ bool HexagonGenPredicate::eliminatePredCopies(MachineFunction &MF) {
   return Changed;
 }
 
-
 bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(*MF.getFunction()))
     return false;
@@ -518,8 +533,6 @@ bool HexagonGenPredicate::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-
 FunctionPass *llvm::createHexagonGenPredicate() {
   return new HexagonGenPredicate();
 }
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
index cc154c4be012..e477dcc0f64a 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHardwareLoops.cpp
@@ -25,22 +25,37 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/SmallSet.h"
-#include "Hexagon.h"
+#include "HexagonInstrInfo.h"
 #include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/PassSupport.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include <algorithm>
+#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <map>
+#include <set>
+#include <utility>
 #include <vector>
 
 using namespace llvm;
@@ -60,15 +75,26 @@ static cl::opt<bool> HWCreatePreheader("hexagon-hwloop-preheader",
     cl::Hidden, cl::init(true),
     cl::desc("Add a preheader to a hardware loop if one doesn't exist"));
 
+// Turn it off by default. If a preheader block is not created here, the
+// software pipeliner may be unable to find a block suitable to serve as
+// a preheader. In that case SWP will not run.
+static cl::opt<bool> SpecPreheader("hwloop-spec-preheader", cl::init(false),
+  cl::Hidden, cl::ZeroOrMore, cl::desc("Allow speculation of preheader "
+  "instructions"));
+
 STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
 
 namespace llvm {
+
   FunctionPass *createHexagonHardwareLoops();
   void initializeHexagonHardwareLoopsPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
+
   class CountValue;
+
   struct HexagonHardwareLoops : public MachineFunctionPass {
     MachineLoopInfo            *MLI;
     MachineRegisterInfo        *MRI;
@@ -87,7 +113,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
-    const char *getPassName() const override { return "Hexagon Hardware Loops"; }
+    StringRef getPassName() const override { return "Hexagon Hardware Loops"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineDominatorTree>();
@@ -138,7 +164,6 @@ namespace {
       static bool isUnsigned(Kind Cmp) {
         return (Cmp & U);
       }
-
     };
 
     /// \brief Find the register that contains the loop controlling
@@ -289,6 +314,7 @@ namespace {
       CV_Register,
       CV_Immediate
     };
+
   private:
     CountValueType Kind;
     union Values {
@@ -309,6 +335,7 @@ namespace {
         Contents.ImmVal = v;
       }
     }
+
     bool isReg() const { return Kind == CV_Register; }
     bool isImm() const { return Kind == CV_Immediate; }
 
@@ -330,8 +357,8 @@ namespace {
       if (isImm()) { OS << Contents.ImmVal; }
     }
   };
-} // end anonymous namespace
 
+} // end anonymous namespace
 
 INITIALIZE_PASS_BEGIN(HexagonHardwareLoops, "hwloops",
                       "Hexagon Hardware Loops", false, false)
@@ -366,28 +393,15 @@ bool HexagonHardwareLoops::runOnMachineFunction(MachineFunction &MF) {
   return Changed;
 }
 
-/// \brief Return the latch block if it's one of the exiting blocks. Otherwise,
-/// return the exiting block. Return 'null' when multiple exiting blocks are
-/// present.
-static MachineBasicBlock* getExitingBlock(MachineLoop *L) {
-  if (MachineBasicBlock *Latch = L->getLoopLatch()) {
-    if (L->isLoopExiting(Latch))
-      return Latch;
-    else
-      return L->getExitingBlock();
-  }
-  return nullptr;
-}
-
 bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
                                                  unsigned &Reg,
                                                  int64_t &IVBump,
                                                  MachineInstr *&IVOp
                                                  ) const {
   MachineBasicBlock *Header = L->getHeader();
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
   MachineBasicBlock *Latch = L->getLoopLatch();
-  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
   if (!Header || !Preheader || !Latch || !ExitingBlock)
     return false;
 
@@ -417,10 +431,8 @@ bool HexagonHardwareLoops::findInductionRegister(MachineLoop *L,
 
       unsigned PhiOpReg = Phi->getOperand(i).getReg();
       MachineInstr *DI = MRI->getVRegDef(PhiOpReg);
-      unsigned UpdOpc = DI->getOpcode();
-      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
 
-      if (isAdd) {
+      if (DI->getDesc().isAdd()) {
         // If the register operand to the add is the PHI we're looking at, this
         // meets the induction pattern.
         unsigned IndReg = DI->getOperand(1).getReg();
@@ -555,7 +567,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   // Look for the cmp instruction to determine if we can get a useful trip
   // count.  The trip count can be either a register or an immediate.  The
   // location of the value depends upon the type (reg or imm).
-  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
   if (!ExitingBlock)
     return nullptr;
 
@@ -566,7 +578,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   if (!FoundIV)
     return nullptr;
 
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
 
   MachineOperand *InitialValue = nullptr;
   MachineInstr *IV_Phi = MRI->getVRegDef(IVReg);
@@ -593,7 +605,7 @@ CountValue *HexagonHardwareLoops::getLoopTripCount(MachineLoop *L,
   // the fall through can go to the header.
   assert (TB && "Exit block without a branch?");
   if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
-    MachineBasicBlock *LTB = 0, *LFB = 0;
+    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
     SmallVector<MachineOperand,2> LCond;
     bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
     if (NotAnalyzed)
@@ -787,7 +799,7 @@ CountValue *HexagonHardwareLoops::computeCount(MachineLoop *Loop,
   if (!isPowerOf2_64(std::abs(IVBump)))
     return nullptr;
 
-  MachineBasicBlock *PH = Loop->getLoopPreheader();
+  MachineBasicBlock *PH = MLI->findLoopPreheader(Loop, SpecPreheader);
   assert (PH && "Should have a preheader by now");
   MachineBasicBlock::iterator InsertPos = PH->getFirstTerminator();
   DebugLoc DL;
@@ -951,8 +963,8 @@ bool HexagonHardwareLoops::isInvalidLoopOperation(const MachineInstr *MI,
 
   // Call is not allowed because the callee may use a hardware loop except for
   // the case when the call never returns.
-  if (MI->getDesc().isCall() && MI->getOpcode() != Hexagon::CALLv3nr)
-    return true;
+  if (MI->getDesc().isCall())
+    return !TII->doesNotReturn(*MI);
 
   // Check if the instruction defines a hardware loop register.
   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -1138,7 +1150,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
   if (containsInvalidInstruction(L, IsInnerHWLoop))
     return false;
 
-  MachineBasicBlock *LastMBB = getExitingBlock(L);
+  MachineBasicBlock *LastMBB = L->findLoopControlBlock();
   // Don't generate hw loop if the loop has more than one exit.
   if (!LastMBB)
     return false;
@@ -1153,7 +1165,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
 
   // Ensure the loop has a preheader: the loop instruction will be
   // placed there.
-  MachineBasicBlock *Preheader = L->getLoopPreheader();
+  MachineBasicBlock *Preheader = MLI->findLoopPreheader(L, SpecPreheader);
   if (!Preheader) {
     Preheader = createPreheaderForLoop(L);
     if (!Preheader)
@@ -1180,10 +1192,10 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
 
   // Determine the loop start.
   MachineBasicBlock *TopBlock = L->getTopBlock();
-  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
-  MachineBasicBlock *LoopStart = 0;
+  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
+  MachineBasicBlock *LoopStart = nullptr;
   if (ExitingBlock !=  L->getLoopLatch()) {
-    MachineBasicBlock *TB = 0, *FB = 0;
+    MachineBasicBlock *TB = nullptr, *FB = nullptr;
     SmallVector<MachineOperand, 2> Cond;
 
     if (TII->analyzeBranch(*ExitingBlock, TB, FB, Cond, false))
@@ -1254,7 +1266,7 @@ bool HexagonHardwareLoops::convertToHardwareLoop(MachineLoop *L,
       if (LastI != LastMBB->end())
         LastI = LastMBB->erase(LastI);
       SmallVector<MachineOperand, 0> Cond;
-      TII->InsertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
+      TII->insertBranch(*LastMBB, BranchTarget, nullptr, Cond, LastIDL);
     }
   } else {
     // Conditional branch to loop start; just delete it.
@@ -1423,12 +1435,13 @@ bool HexagonHardwareLoops::loopCountMayWrapOrUnderFlow(
     if (!TII->analyzeCompare(*MI, CmpReg1, CmpReg2, CmpMask, CmpValue))
       continue;
 
-    MachineBasicBlock *TBB = 0, *FBB = 0;
+    MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
     SmallVector<MachineOperand, 2> Cond;
     if (TII->analyzeBranch(*MI->getParent(), TBB, FBB, Cond, false))
       continue;
 
-    Comparison::Kind Cmp = getComparisonKind(MI->getOpcode(), 0, 0, 0);
+    Comparison::Kind Cmp =
+        getComparisonKind(MI->getOpcode(), nullptr, nullptr, 0);
     if (Cmp == 0)
       continue;
     if (TII->predOpcodeHasNot(Cond) ^ (TBB != MBB))
@@ -1479,8 +1492,8 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
     case TargetOpcode::COPY:
     case Hexagon::A2_tfrsi:
     case Hexagon::A2_tfrpi:
-    case Hexagon::CONST32_Int_Real:
-    case Hexagon::CONST64_Int_Real: {
+    case Hexagon::CONST32:
+    case Hexagon::CONST64: {
       // Call recursively to avoid an extra check whether operand(1) is
       // indeed an immediate (it could be a global address, for example),
       // plus we can handle COPY at the same time.
@@ -1509,9 +1522,9 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
         return false;
       unsigned Sub2 = DI->getOperand(2).getImm();
       unsigned Sub4 = DI->getOperand(4).getImm();
-      if (Sub2 == Hexagon::subreg_loreg && Sub4 == Hexagon::subreg_hireg)
+      if (Sub2 == Hexagon::isub_lo && Sub4 == Hexagon::isub_hi)
         TV = V1 | (V3 << 32);
-      else if (Sub2 == Hexagon::subreg_hireg && Sub4 == Hexagon::subreg_loreg)
+      else if (Sub2 == Hexagon::isub_hi && Sub4 == Hexagon::isub_lo)
         TV = V3 | (V1 << 32);
       else
         llvm_unreachable("Unexpected form of REG_SEQUENCE");
@@ -1522,13 +1535,13 @@ bool HexagonHardwareLoops::checkForImmediate(const MachineOperand &MO,
       return false;
   }
 
-  // By now, we should have successfuly obtained the immediate value defining
+  // By now, we should have successfully obtained the immediate value defining
   // the register referenced in MO. Handle a potential use of a subregister.
   switch (MO.getSubReg()) {
-    case Hexagon::subreg_loreg:
+    case Hexagon::isub_lo:
       Val = TV & 0xFFFFFFFFULL;
       break;
-    case Hexagon::subreg_hireg:
+    case Hexagon::isub_hi:
       Val = (TV >> 32) & 0xFFFFFFFFULL;
       break;
     default:
@@ -1569,7 +1582,7 @@ static bool isImmValidForOpcode(unsigned CmpOpc, int64_t Imm) {
 bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   MachineBasicBlock *Header = L->getHeader();
   MachineBasicBlock *Latch = L->getLoopLatch();
-  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
 
   if (!(Header && Latch && ExitingBlock))
     return false;
@@ -1598,10 +1611,8 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
 
       unsigned PhiReg = Phi->getOperand(i).getReg();
       MachineInstr *DI = MRI->getVRegDef(PhiReg);
-      unsigned UpdOpc = DI->getOpcode();
-      bool isAdd = (UpdOpc == Hexagon::A2_addi || UpdOpc == Hexagon::A2_addp);
 
-      if (isAdd) {
+      if (DI->getDesc().isAdd()) {
         // If the register operand to the add/sub is the PHI we are looking
         // at, this meets the induction pattern.
         unsigned IndReg = DI->getOperand(1).getReg();
@@ -1626,7 +1637,7 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
     return false;
 
   if (ExitingBlock != Latch && (TB == Latch || FB == Latch)) {
-    MachineBasicBlock *LTB = 0, *LFB = 0;
+    MachineBasicBlock *LTB = nullptr, *LFB = nullptr;
     SmallVector<MachineOperand,2> LCond;
     bool NotAnalyzed = TII->analyzeBranch(*Latch, LTB, LFB, LCond, false);
     if (NotAnalyzed)
@@ -1764,7 +1775,8 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
 
       // It is not valid to do this transformation on an unsigned comparison
       // because it may underflow.
-      Comparison::Kind Cmp = getComparisonKind(PredDef->getOpcode(), 0, 0, 0);
+      Comparison::Kind Cmp =
+          getComparisonKind(PredDef->getOpcode(), nullptr, nullptr, 0);
       if (!Cmp || Comparison::isUnsigned(Cmp))
         return false;
 
@@ -1807,18 +1819,17 @@ bool HexagonHardwareLoops::fixupInductionVariable(MachineLoop *L) {
   return false;
 }
 
-/// \brief Create a preheader for a given loop.
+/// createPreheaderForLoop - Create a preheader for a given loop.
 MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
       MachineLoop *L) {
-  if (MachineBasicBlock *TmpPH = L->getLoopPreheader())
+  if (MachineBasicBlock *TmpPH = MLI->findLoopPreheader(L, SpecPreheader))
     return TmpPH;
-
   if (!HWCreatePreheader)
     return nullptr;
 
   MachineBasicBlock *Header = L->getHeader();
   MachineBasicBlock *Latch = L->getLoopLatch();
-  MachineBasicBlock *ExitingBlock = getExitingBlock(L);
+  MachineBasicBlock *ExitingBlock = L->findLoopControlBlock();
   MachineFunction *MF = Header->getParent();
   DebugLoc DL;
 
@@ -1898,7 +1909,6 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
       PN->addOperand(MachineOperand::CreateReg(NewPR, false));
       PN->addOperand(MachineOperand::CreateMBB(NewPH));
     }
-
   } else {
     assert(Header->pred_size() == 2);
 
@@ -1934,7 +1944,7 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
       (void)NotAnalyzed; // suppress compiler warning
       assert (!NotAnalyzed && "Should be analyzable!");
       if (TB != Header && (Tmp2.empty() || FB != Header))
-        TII->InsertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
+        TII->insertBranch(*PB, NewPH, nullptr, EmptyCond, DL);
       PB->ReplaceUsesOfBlockWith(Header, NewPH);
     }
   }
@@ -1946,10 +1956,10 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
   (void)LatchNotAnalyzed; // suppress compiler warning
   assert (!LatchNotAnalyzed && "Should be analyzable!");
   if (!TB && !FB)
-    TII->InsertBranch(*Latch, Header, nullptr, EmptyCond, DL);
+    TII->insertBranch(*Latch, Header, nullptr, EmptyCond, DL);
 
   // Finally, the branch from the preheader to the header.
-  TII->InsertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
+  TII->insertBranch(*NewPH, Header, nullptr, EmptyCond, DL);
   NewPH->addSuccessor(Header);
 
   MachineLoop *ParentLoop = L->getParentLoop();
@@ -1958,9 +1968,12 @@ MachineBasicBlock *HexagonHardwareLoops::createPreheaderForLoop(
 
   // Update the dominator information with the new preheader.
   if (MDT) {
-    MachineDomTreeNode *HDom = MDT->getNode(Header);
-    MDT->addNewBlock(NewPH, HDom->getIDom()->getBlock());
-    MDT->changeImmediateDominator(Header, NewPH);
+    if (MachineDomTreeNode *HN = MDT->getNode(Header)) {
+      if (MachineDomTreeNode *DHN = HN->getIDom()) {
+        MDT->addNewBlock(NewPH, DHN->getBlock());
+        MDT->changeImmediateDominator(Header, NewPH);
+      }
+    }
   }
 
   return NewPH;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
new file mode 100644
index 000000000000..036b18678709
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.cpp
@@ -0,0 +1,140 @@
+//===-- HexagonHazardRecognizer.cpp - Hexagon Post RA Hazard Recognizer ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the hazard recognizer for scheduling on Hexagon.
+// Use a DFA based hazard recognizer.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonHazardRecognizer.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "post-RA-sched"
+
+void HexagonHazardRecognizer::Reset() {
+  DEBUG(dbgs() << "Reset hazard recognizer\n");
+  Resources->clearResources();
+  PacketNum = 0;
+  UsesDotCur = nullptr;
+  DotCurPNum = -1;
+  RegDefs.clear();
+}
+
+ScheduleHazardRecognizer::HazardType
+HexagonHazardRecognizer::getHazardType(SUnit *SU, int stalls) {
+  MachineInstr *MI = SU->getInstr();
+  if (!MI || TII->isZeroCost(MI->getOpcode()))
+    return NoHazard;
+
+  if (!Resources->canReserveResources(*MI)) {
+    DEBUG(dbgs() << "*** Hazard in cycle " << PacketNum << ", " << *MI);
+    HazardType RetVal = Hazard;
+    if (TII->mayBeNewStore(*MI)) {
+      // Make sure the register to be stored is defined by an instruction in the
+      // packet.
+      MachineOperand &MO = MI->getOperand(MI->getNumOperands() - 1);
+      if (!MO.isReg() || RegDefs.count(MO.getReg()) == 0)
+        return Hazard;
+      // The .new store version uses different resources so check if it
+      // causes a hazard.
+      MachineFunction *MF = MI->getParent()->getParent();
+      MachineInstr *NewMI =
+        MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)),
+                               MI->getDebugLoc());
+      if (Resources->canReserveResources(*NewMI))
+        RetVal = NoHazard;
+      DEBUG(dbgs() << "*** Try .new version? " << (RetVal == NoHazard) << "\n");
+      MF->DeleteMachineInstr(NewMI);
+    }
+    return RetVal;
+  }
+
+  if (SU == UsesDotCur && DotCurPNum != (int)PacketNum) {
+    DEBUG(dbgs() << "*** .cur Hazard in cycle " << PacketNum << ", " << *MI);
+    return Hazard;
+  }
+
+  return NoHazard;
+}
+
+void HexagonHazardRecognizer::AdvanceCycle() {
+  DEBUG(dbgs() << "Advance cycle, clear state\n");
+  Resources->clearResources();
+  if (DotCurPNum != -1 && DotCurPNum != (int)PacketNum) {
+    UsesDotCur = nullptr;
+    DotCurPNum = -1;
+  }
+  PacketNum++;
+  RegDefs.clear();
+}
+
+/// If a packet contains a dot cur instruction, then we may prefer the
+/// instruction that can use the dot cur result. Or, if the use
+/// isn't scheduled in the same packet, then prefer other instructions
+/// in the subsequent packet.
+bool HexagonHazardRecognizer::ShouldPreferAnother(SUnit *SU) {
+  return UsesDotCur && ((SU == UsesDotCur) ^ (DotCurPNum == (int)PacketNum));
+}
+
+void HexagonHazardRecognizer::EmitInstruction(SUnit *SU) {
+  MachineInstr *MI = SU->getInstr();
+  if (!MI)
+    return;
+
+  // Keep the set of definitions for each packet, which is used to determine
+  // if a .new can be used.
+  for (const MachineOperand &MO : MI->operands())
+    if (MO.isReg() && MO.isDef() && !MO.isImplicit())
+      RegDefs.insert(MO.getReg());
+
+  if (TII->isZeroCost(MI->getOpcode()))
+    return;
+
+  if (!Resources->canReserveResources(*MI)) {
+    // It must be a .new store since other instructions must be able to be
+    // reserved at this point.
+    assert(TII->mayBeNewStore(*MI) && "Expecting .new store");
+    MachineFunction *MF = MI->getParent()->getParent();
+    MachineInstr *NewMI =
+        MF->CreateMachineInstr(TII->get(TII->getDotNewOp(*MI)),
+                               MI->getDebugLoc());
+    assert(Resources->canReserveResources(*NewMI));
+    Resources->reserveResources(*NewMI);
+    MF->DeleteMachineInstr(NewMI);
+  }
+  else
+    Resources->reserveResources(*MI);
+  DEBUG(dbgs() << " Add instruction " << *MI);
+
+  // When scheduling a dot cur instruction, check if there is an instruction
+  // that can use the dot cur in the same packet. If so, we'll attempt to
+  // schedule it before other instructions. We only do this if the use has
+  // the same height as the dot cur. Otherwise, we may miss scheduling an
+  // instruction with a greater height, which is more important.
+  if (TII->mayBeCurLoad(*MI))
+    for (auto &S : SU->Succs)
+      if (S.isAssignedRegDep() && S.getLatency() == 0 &&
+          SU->getHeight() == S.getSUnit()->getHeight()) {
+        UsesDotCur = S.getSUnit();
+        DotCurPNum = PacketNum;
+        break;
+      }
+  if (SU == UsesDotCur) {
+    UsesDotCur = nullptr;
+    DotCurPNum = -1;
+  }
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
new file mode 100644
index 000000000000..70efcb7a9f76
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonHazardRecognizer.h
@@ -0,0 +1,78 @@
+//===--- HexagonHazardRecognizer.h - Hexagon Post RA Hazard Recognizer ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines the hazard recognizer for scheduling on Hexagon.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONPROFITRECOGNIZER_H
+#define LLVM_LIB_TARGET_HEXAGON_HEXAGONPROFITRECOGNIZER_H
+
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+
+namespace llvm {
+
+class HexagonHazardRecognizer : public ScheduleHazardRecognizer {
+  DFAPacketizer *Resources;
+  const HexagonInstrInfo *TII;
+  unsigned PacketNum;
+  // If the packet contains a potential dot cur instruction. This is
+  // used for the scheduling priority function.
+  SUnit *UsesDotCur;
+  // The packet number when a dor cur is emitted. If its use is not generated
+  // in the same packet, then try to wait another cycle before emitting.
+  int DotCurPNum;
+  // The set of registers defined by instructions in the current packet.
+  SmallSet<unsigned, 8> RegDefs;
+
+public:
+  HexagonHazardRecognizer(const InstrItineraryData *II,
+                          const HexagonInstrInfo *HII,
+                          const HexagonSubtarget &ST)
+    : Resources(ST.createDFAPacketizer(II)), TII(HII), PacketNum(0),
+    UsesDotCur(nullptr), DotCurPNum(-1) { }
+
+  ~HexagonHazardRecognizer() override {
+    if (Resources)
+      delete Resources;
+  }
+
+  /// This callback is invoked when a new block of instructions is about to be
+  /// scheduled. The hazard state is set to an initialized state.
+  void Reset() override;
+
+  /// Return the hazard type of emitting this node.  There are three
+  /// possible results.  Either:
+  ///  * NoHazard: it is legal to issue this instruction on this cycle.
+  ///  * Hazard: issuing this instruction would stall the machine.  If some
+  ///     other instruction is available, issue it first.
+  HazardType getHazardType(SUnit *SU, int stalls) override;
+
+  /// This callback is invoked when an instruction is emitted to be scheduled,
+  /// to advance the hazard state.
+  void EmitInstruction(SUnit *) override;
+
+  /// This callback may be invoked if getHazardType returns NoHazard. If, even
+  /// though there is no hazard, it would be better to schedule another
+  /// available instruction, this callback should return true.
+  bool ShouldPreferAnother(SUnit *) override;
+
+  /// This callback is invoked whenever the next top-down instruction to be
+  /// scheduled cannot issue in the current cycle, either because of latency
+  /// or resource conflicts.  This should increment the internal state of the
+  /// hazard recognizer so that previously "Hazard" instructions will now not
+  /// be hazards.
+  void AdvanceCycle() override;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONPROFITRECOGNIZER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
index 22247aa39b61..f6012d29d422 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp
@@ -26,11 +26,22 @@ using namespace llvm;
 #define DEBUG_TYPE "hexagon-isel"
 
 static
-cl::opt<unsigned>
-MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
-  cl::Hidden, cl::init(2),
-  cl::desc("Maximum number of uses of a global address such that we still us a"
-           "constant extended instruction"));
+cl::opt<bool>
+EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
+  cl::desc("Rebalance address calculation trees to improve "
+          "instruction selection"));
+
+// Rebalance only if this allows e.g. combining a GA with an offset or
+// factoring out a shift.
+static
+cl::opt<bool>
+RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
+  cl::desc("Rebalance address tree only if this allows optimizations"));
+
+static
+cl::opt<bool>
+RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
+  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
 
 //===----------------------------------------------------------------------===//
 // Instruction Selector Implementation
@@ -42,14 +53,13 @@ MaxNumOfUsesForConstExtenders("ga-max-num-uses-for-constant-extenders",
 ///
 namespace {
 class HexagonDAGToDAGISel : public SelectionDAGISel {
-  const HexagonTargetMachine &HTM;
   const HexagonSubtarget *HST;
   const HexagonInstrInfo *HII;
   const HexagonRegisterInfo *HRI;
 public:
   explicit HexagonDAGToDAGISel(HexagonTargetMachine &tm,
                                CodeGenOpt::Level OptLevel)
-      : SelectionDAGISel(tm, OptLevel), HTM(tm), HST(nullptr), HII(nullptr),
+      : SelectionDAGISel(tm, OptLevel), HST(nullptr), HII(nullptr),
         HRI(nullptr) {}
 
   bool runOnMachineFunction(MachineFunction &MF) override {
@@ -61,8 +71,8 @@ public:
     return true;
   }
 
-  virtual void PreprocessISelDAG() override;
-  virtual void EmitFunctionEntryCode() override;
+  void PreprocessISelDAG() override;
+  void EmitFunctionEntryCode() override;
 
   void Select(SDNode *N) override;
 
@@ -72,7 +82,7 @@ public:
   bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP);
   bool SelectAddrFI(SDValue &N, SDValue &R);
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Hexagon DAG->DAG Pattern Instruction Selection";
   }
 
@@ -92,7 +102,6 @@ public:
                                     std::vector<SDValue> &OutOps) override;
   bool tryLoadOfLoadIntrinsic(LoadSDNode *N);
   void SelectLoad(SDNode *N);
-  void SelectBaseOffsetLoad(LoadSDNode *LD, SDLoc dl);
   void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl);
   void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl);
   void SelectStore(SDNode *N);
@@ -103,82 +112,27 @@ public:
   void SelectIntrinsicWOChain(SDNode *N);
   void SelectConstant(SDNode *N);
   void SelectConstantFP(SDNode *N);
-  void SelectAdd(SDNode *N);
   void SelectBitcast(SDNode *N);
-  void SelectBitOp(SDNode *N);
-
-  // XformMskToBitPosU5Imm - Returns the bit position which
-  // the single bit 32 bit mask represents.
-  // Used in Clr and Set bit immediate memops.
-  SDValue XformMskToBitPosU5Imm(uint32_t Imm, const SDLoc &DL) {
-    int32_t bitPos;
-    bitPos = Log2_32(Imm);
-    assert(bitPos >= 0 && bitPos < 32 &&
-           "Constant out of range for 32 BitPos Memops");
-    return CurDAG->getTargetConstant(bitPos, DL, MVT::i32);
-  }
-
-  // XformMskToBitPosU4Imm - Returns the bit position which the single-bit
-  // 16 bit mask represents. Used in Clr and Set bit immediate memops.
-  SDValue XformMskToBitPosU4Imm(uint16_t Imm, const SDLoc &DL) {
-    return XformMskToBitPosU5Imm(Imm, DL);
-  }
-
-  // XformMskToBitPosU3Imm - Returns the bit position which the single-bit
-  // 8 bit mask represents. Used in Clr and Set bit immediate memops.
-  SDValue XformMskToBitPosU3Imm(uint8_t Imm, const SDLoc &DL) {
-    return XformMskToBitPosU5Imm(Imm, DL);
-  }
-
-  // Return true if there is exactly one bit set in V, i.e., if V is one of the
-  // following integers: 2^0, 2^1, ..., 2^31.
-  bool ImmIsSingleBit(uint32_t v) const {
-    return isPowerOf2_32(v);
-  }
-
-  // XformM5ToU5Imm - Return a target constant with the specified value, of
-  // type i32 where the negative literal is transformed into a positive literal
-  // for use in -= memops.
-  inline SDValue XformM5ToU5Imm(signed Imm, const SDLoc &DL) {
-    assert((Imm >= -31 && Imm <= -1) && "Constant out of range for Memops");
-    return CurDAG->getTargetConstant(-Imm, DL, MVT::i32);
-  }
-
-  // XformU7ToU7M1Imm - Return a target constant decremented by 1, in range
-  // [1..128], used in cmpb.gtu instructions.
-  inline SDValue XformU7ToU7M1Imm(signed Imm, const SDLoc &DL) {
-    assert((Imm >= 1 && Imm <= 128) && "Constant out of range for cmpb op");
-    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i8);
-  }
-
-  // XformS8ToS8M1Imm - Return a target constant decremented by 1.
-  inline SDValue XformSToSM1Imm(signed Imm, const SDLoc &DL) {
-    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
-  }
-
-  // XformU8ToU8M1Imm - Return a target constant decremented by 1.
-  inline SDValue XformUToUM1Imm(unsigned Imm, const SDLoc &DL) {
-    assert((Imm >= 1) && "Cannot decrement unsigned int less than 1");
-    return CurDAG->getTargetConstant(Imm - 1, DL, MVT::i32);
-  }
-
-  // XformSToSM2Imm - Return a target constant decremented by 2.
-  inline SDValue XformSToSM2Imm(unsigned Imm, const SDLoc &DL) {
-    return CurDAG->getTargetConstant(Imm - 2, DL, MVT::i32);
-  }
-
-  // XformSToSM3Imm - Return a target constant decremented by 3.
-  inline SDValue XformSToSM3Imm(unsigned Imm, const SDLoc &DL) {
-    return CurDAG->getTargetConstant(Imm - 3, DL, MVT::i32);
-  }
 
   // Include the pieces autogenerated from the target description.
   #include "HexagonGenDAGISel.inc"
 
 private:
   bool isValueExtension(const SDValue &Val, unsigned FromBits, SDValue &Src);
-  bool orIsAdd(const SDNode *N) const;
+  bool isOrEquivalentToAdd(const SDNode *N) const;
   bool isAlignedMemNode(const MemSDNode *N) const;
+  bool isPositiveHalfWord(const SDNode *N) const;
+
+  SmallDenseMap<SDNode *,int> RootWeights;
+  SmallDenseMap<SDNode *,int> RootHeights;
+  SmallDenseMap<const Value *,int> GAUsesInFunction;
+  int getWeight(SDNode *N);
+  int getHeight(SDNode *N);
+  SDValue getMultiplierForSHL(SDNode *N);
+  SDValue factorOutPowerOf2(SDValue V, unsigned Power);
+  unsigned getUsesInFunction(const Value *V);
+  SDValue balanceSubTree(SDNode *N, bool Factorize = false);
+  void rebalanceAddressTrees();
 }; // end HexagonDAGToDAGISel
 }  // end anonymous namespace
 
@@ -588,7 +542,7 @@ void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
 
   if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
     assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
-    Value = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg,
+    Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
                                            dl, MVT::i32, Value);
   }
 
@@ -640,7 +594,6 @@ void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
 void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
   SDLoc dl(N);
 
-  //
   // %conv.i = sext i32 %tmp1 to i64
   // %conv2.i = sext i32 %add to i64
   // %mul.i = mul nsw i64 %conv2.i, %conv.i
@@ -665,7 +618,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
         SelectCode(N);
         return;
       }
-
       OP0 = Sext0;
     } else if (MulOp0.getOpcode() == ISD::LOAD) {
       LoadSDNode *LD = cast<LoadSDNode>(MulOp0.getNode());
@@ -675,7 +627,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
         SelectCode(N);
         return;
       }
-
       SDValue Chain = LD->getChain();
       SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
       OP0 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
@@ -694,7 +645,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
         SelectCode(N);
         return;
       }
-
       OP1 = Sext1;
     } else if (MulOp1.getOpcode() == ISD::LOAD) {
       LoadSDNode *LD = cast<LoadSDNode>(MulOp1.getNode());
@@ -704,7 +654,6 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
         SelectCode(N);
         return;
       }
-
       SDValue Chain = LD->getChain();
       SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
       OP1 = SDValue(CurDAG->getMachineNode(Hexagon::L2_loadri_io, dl, MVT::i32,
@@ -717,8 +666,8 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
     }
 
     // Generate a mpy instruction.
-    SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl, MVT::i64,
-                                            OP0, OP1);
+    SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_dpmpyss_s0, dl,
+                                            MVT::i64, OP0, OP1);
     ReplaceNode(N, Result);
     return;
   }
@@ -728,68 +677,56 @@ void HexagonDAGToDAGISel::SelectMul(SDNode *N) {
 
 void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
   SDLoc dl(N);
-  if (N->getValueType(0) == MVT::i32) {
-    SDValue Shl_0 = N->getOperand(0);
-    SDValue Shl_1 = N->getOperand(1);
-    // RHS is const.
-    if (Shl_1.getOpcode() == ISD::Constant) {
-      if (Shl_0.getOpcode() == ISD::MUL) {
-        SDValue Mul_0 = Shl_0.getOperand(0); // Val
-        SDValue Mul_1 = Shl_0.getOperand(1); // Const
-        // RHS of mul is const.
-        if (Mul_1.getOpcode() == ISD::Constant) {
-          int32_t ShlConst =
-            cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
-          int32_t MulConst =
-            cast<ConstantSDNode>(Mul_1.getNode())->getSExtValue();
-          int32_t ValConst = MulConst << ShlConst;
-          SDValue Val = CurDAG->getTargetConstant(ValConst, dl,
-                                                  MVT::i32);
-          if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Val.getNode()))
-            if (isInt<9>(CN->getSExtValue())) {
-              SDNode* Result =
-                CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
-                                       MVT::i32, Mul_0, Val);
-              ReplaceNode(N, Result);
-              return;
-            }
+  SDValue Shl_0 = N->getOperand(0);
+  SDValue Shl_1 = N->getOperand(1);
+
+  auto Default = [this,N] () -> void { SelectCode(N); };
+
+  if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
+    return Default();
+
+  // RHS is const.
+  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
+
+  if (Shl_0.getOpcode() == ISD::MUL) {
+    SDValue Mul_0 = Shl_0.getOperand(0); // Val
+    SDValue Mul_1 = Shl_0.getOperand(1); // Const
+    // RHS of mul is const.
+    if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
+      int32_t ValConst = C->getSExtValue() << ShlConst;
+      if (isInt<9>(ValConst)) {
+        SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
+        SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
+                                                MVT::i32, Mul_0, Val);
+        ReplaceNode(N, Result);
+        return;
+      }
+    }
+    return Default();
+  }
 
-        }
-      } else if (Shl_0.getOpcode() == ISD::SUB) {
-        SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
-        SDValue Sub_1 = Shl_0.getOperand(1); // Val
-        if (Sub_0.getOpcode() == ISD::Constant) {
-          int32_t SubConst =
-            cast<ConstantSDNode>(Sub_0.getNode())->getSExtValue();
-          if (SubConst == 0) {
-            if (Sub_1.getOpcode() == ISD::SHL) {
-              SDValue Shl2_0 = Sub_1.getOperand(0); // Val
-              SDValue Shl2_1 = Sub_1.getOperand(1); // Const
-              if (Shl2_1.getOpcode() == ISD::Constant) {
-                int32_t ShlConst =
-                  cast<ConstantSDNode>(Shl_1.getNode())->getSExtValue();
-                int32_t Shl2Const =
-                  cast<ConstantSDNode>(Shl2_1.getNode())->getSExtValue();
-                int32_t ValConst = 1 << (ShlConst+Shl2Const);
-                SDValue Val = CurDAG->getTargetConstant(-ValConst, dl,
-                                                        MVT::i32);
-                if (ConstantSDNode *CN =
-                    dyn_cast<ConstantSDNode>(Val.getNode()))
-                  if (isInt<9>(CN->getSExtValue())) {
-                    SDNode* Result =
-                      CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl, MVT::i32,
-                                             Shl2_0, Val);
-                    ReplaceNode(N, Result);
-                    return;
-                  }
-              }
-            }
-          }
+  if (Shl_0.getOpcode() == ISD::SUB) {
+    SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
+    SDValue Sub_1 = Shl_0.getOperand(1); // Val
+    if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
+      if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
+        return Default();
+      SDValue Shl2_0 = Sub_1.getOperand(0); // Val
+      SDValue Shl2_1 = Sub_1.getOperand(1); // Const
+      if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
+        int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
+        if (isInt<9>(-ValConst)) {
+          SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
+          SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
+                                                  MVT::i32, Shl2_0, Val);
+          ReplaceNode(N, Result);
+          return;
         }
       }
     }
   }
-  SelectCode(N);
+
+  return Default();
 }
 
 
@@ -815,20 +752,19 @@ void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
     SDNode *Mask = CurDAG->getMachineNode(Hexagon::C2_mask, dl, MVT::i64, Op0);
     unsigned NE = OpVT.getVectorNumElements();
     EVT ExVT = N->getValueType(0);
-    unsigned ES = ExVT.getVectorElementType().getSizeInBits();
+    unsigned ES = ExVT.getScalarSizeInBits();
     uint64_t MV = 0, Bit = 1;
     for (unsigned i = 0; i < NE; ++i) {
       MV |= Bit;
       Bit <<= ES;
     }
     SDValue Ones = CurDAG->getTargetConstant(MV, dl, MVT::i64);
-    SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64_Int_Real, dl,
+    SDNode *OnesReg = CurDAG->getMachineNode(Hexagon::CONST64, dl,
                                              MVT::i64, Ones);
     if (ExVT.getSizeInBits() == 32) {
       SDNode *And = CurDAG->getMachineNode(Hexagon::A2_andp, dl, MVT::i64,
                                            SDValue(Mask,0), SDValue(OnesReg,0));
-      SDValue SubR = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
-                                               MVT::i32);
+      SDValue SubR = CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32);
       ReplaceNode(N, CurDAG->getMachineNode(Hexagon::EXTRACT_SUBREG, dl, ExVT,
                                             SDValue(And, 0), SubR));
       return;
@@ -839,21 +775,18 @@ void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
     return;
   }
 
-  SDNode *IsIntrinsic = N->getOperand(0).getNode();
-  if ((IsIntrinsic->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
-    unsigned ID =
-      cast<ConstantSDNode>(IsIntrinsic->getOperand(0))->getZExtValue();
+  SDNode *Int = N->getOperand(0).getNode();
+  if ((Int->getOpcode() == ISD::INTRINSIC_WO_CHAIN)) {
+    unsigned ID = cast<ConstantSDNode>(Int->getOperand(0))->getZExtValue();
     if (doesIntrinsicReturnPredicate(ID)) {
       // Now we need to differentiate target data types.
       if (N->getValueType(0) == MVT::i64) {
         // Convert the zero_extend to Rs = Pd followed by A2_combinew(0,Rs).
         SDValue TargetConst0 = CurDAG->getTargetConstant(0, dl, MVT::i32);
         SDNode *Result_1 = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
-                                                  MVT::i32,
-                                                  SDValue(IsIntrinsic, 0));
+                                                  MVT::i32, SDValue(Int, 0));
         SDNode *Result_2 = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl,
-                                                  MVT::i32,
-                                                  TargetConst0);
+                                                  MVT::i32, TargetConst0);
         SDNode *Result_3 = CurDAG->getMachineNode(Hexagon::A2_combinew, dl,
                                                   MVT::i64, MVT::Other,
                                                   SDValue(Result_2, 0),
@@ -864,8 +797,7 @@ void HexagonDAGToDAGISel::SelectZeroExtend(SDNode *N) {
       if (N->getValueType(0) == MVT::i32) {
         // Convert the zero_extend to Rs = Pd
         SDNode* RsPd = CurDAG->getMachineNode(Hexagon::C2_tfrpr, dl,
-                                              MVT::i32,
-                                              SDValue(IsIntrinsic, 0));
+                                              MVT::i32, SDValue(Int, 0));
         ReplaceNode(N, RsPd);
         return;
       }
@@ -921,19 +853,15 @@ void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
 void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
   SDLoc dl(N);
   ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
-  const APFloat &APF = CN->getValueAPF();
+  APInt A = CN->getValueAPF().bitcastToAPInt();
   if (N->getValueType(0) == MVT::f32) {
-    ReplaceNode(
-        N, CurDAG->getMachineNode(Hexagon::TFRI_f, dl, MVT::f32,
-                                  CurDAG->getTargetConstantFP(
-                                      APF.convertToFloat(), dl, MVT::f32)));
+    SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
+    ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
     return;
   }
-  else if (N->getValueType(0) == MVT::f64) {
-    ReplaceNode(
-        N, CurDAG->getMachineNode(Hexagon::CONST64_Float_Real, dl, MVT::f64,
-                                  CurDAG->getTargetConstantFP(
-                                      APF.convertToDouble(), dl, MVT::f64)));
+  if (N->getValueType(0) == MVT::f64) {
+    SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
+    ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
     return;
   }
 
@@ -941,226 +869,46 @@ void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
 }
 
 //
-// Map predicate true (encoded as -1 in LLVM) to a XOR.
+// Map boolean values.
 //
 void HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
-  SDLoc dl(N);
   if (N->getValueType(0) == MVT::i1) {
-    SDNode* Result = 0;
-    int32_t Val = cast<ConstantSDNode>(N)->getSExtValue();
-    if (Val == -1) {
-      Result = CurDAG->getMachineNode(Hexagon::TFR_PdTrue, dl, MVT::i1);
-    } else if (Val == 0) {
-      Result = CurDAG->getMachineNode(Hexagon::TFR_PdFalse, dl, MVT::i1);
-    }
-    if (Result) {
-      ReplaceNode(N, Result);
-      return;
-    }
-  }
-
-  SelectCode(N);
-}
-
-
-//
-// Map add followed by a asr -> asr +=.
-//
-void HexagonDAGToDAGISel::SelectAdd(SDNode *N) {
-  SDLoc dl(N);
-  if (N->getValueType(0) != MVT::i32) {
-    SelectCode(N);
-    return;
-  }
-  // Identify nodes of the form: add(asr(...)).
-  SDNode* Src1 = N->getOperand(0).getNode();
-  if (Src1->getOpcode() != ISD::SRA || !Src1->hasOneUse()
-      || Src1->getValueType(0) != MVT::i32) {
-    SelectCode(N);
-    return;
-  }
-
-  // Build Rd = Rd' + asr(Rs, Rt). The machine constraints will ensure that
-  // Rd and Rd' are assigned to the same register
-  SDNode* Result = CurDAG->getMachineNode(Hexagon::S2_asr_r_r_acc, dl, MVT::i32,
-                                          N->getOperand(1),
-                                          Src1->getOperand(0),
-                                          Src1->getOperand(1));
-  ReplaceNode(N, Result);
-}
-
-//
-// Map the following, where possible.
-// AND/FABS -> clrbit
-// OR -> setbit
-// XOR/FNEG ->toggle_bit.
-//
-void HexagonDAGToDAGISel::SelectBitOp(SDNode *N) {
-  SDLoc dl(N);
-  EVT ValueVT = N->getValueType(0);
-
-  // We handle only 32 and 64-bit bit ops.
-  if (!(ValueVT == MVT::i32 || ValueVT == MVT::i64 ||
-        ValueVT == MVT::f32 || ValueVT == MVT::f64)) {
-    SelectCode(N);
+    assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
+    unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
+                      ? Hexagon::PS_true
+                      : Hexagon::PS_false;
+    ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1));
     return;
   }
 
-  // We handly only fabs and fneg for V5.
-  unsigned Opc = N->getOpcode();
-  if ((Opc == ISD::FABS || Opc == ISD::FNEG) && !HST->hasV5TOps()) {
-    SelectCode(N);
-    return;
-  }
-
-  int64_t Val = 0;
-  if (Opc != ISD::FABS && Opc != ISD::FNEG) {
-    if (N->getOperand(1).getOpcode() == ISD::Constant)
-      Val = cast<ConstantSDNode>((N)->getOperand(1))->getSExtValue();
-    else {
-     SelectCode(N);
-     return;
-    }
-  }
-
-  if (Opc == ISD::AND) {
-    // Check if this is a bit-clearing AND, if not select code the usual way.
-    if ((ValueVT == MVT::i32 && isPowerOf2_32(~Val)) ||
-        (ValueVT == MVT::i64 && isPowerOf2_64(~Val)))
-      Val = ~Val;
-    else {
-      SelectCode(N);
-      return;
-    }
-  }
-
-  // If OR or AND is being fed by shl, srl and, sra don't do this change,
-  // because Hexagon provide |= &= on shl, srl, and sra.
-  // Traverse the DAG to see if there is shl, srl and sra.
-  if (Opc == ISD::OR || Opc == ISD::AND) {
-    switch (N->getOperand(0)->getOpcode()) {
-      default:
-        break;
-      case ISD::SRA:
-      case ISD::SRL:
-      case ISD::SHL:
-        SelectCode(N);
-        return;
-    }
-  }
-
-  // Make sure it's power of 2.
-  unsigned BitPos = 0;
-  if (Opc != ISD::FABS && Opc != ISD::FNEG) {
-    if ((ValueVT == MVT::i32 && !isPowerOf2_32(Val)) ||
-        (ValueVT == MVT::i64 && !isPowerOf2_64(Val))) {
-      SelectCode(N);
-      return;
-    }
-
-    // Get the bit position.
-    BitPos = countTrailingZeros(uint64_t(Val));
-  } else {
-    // For fabs and fneg, it's always the 31st bit.
-    BitPos = 31;
-  }
-
-  unsigned BitOpc = 0;
-  // Set the right opcode for bitwise operations.
-  switch (Opc) {
-    default:
-      llvm_unreachable("Only bit-wise/abs/neg operations are allowed.");
-    case ISD::AND:
-    case ISD::FABS:
-      BitOpc = Hexagon::S2_clrbit_i;
-      break;
-    case ISD::OR:
-      BitOpc = Hexagon::S2_setbit_i;
-      break;
-    case ISD::XOR:
-    case ISD::FNEG:
-      BitOpc = Hexagon::S2_togglebit_i;
-      break;
-  }
-
-  SDNode *Result;
-  // Get the right SDVal for the opcode.
-  SDValue SDVal = CurDAG->getTargetConstant(BitPos, dl, MVT::i32);
-
-  if (ValueVT == MVT::i32 || ValueVT == MVT::f32) {
-    Result = CurDAG->getMachineNode(BitOpc, dl, ValueVT,
-                                    N->getOperand(0), SDVal);
-  } else {
-    // 64-bit gymnastic to use REG_SEQUENCE. But it's worth it.
-    EVT SubValueVT;
-    if (ValueVT == MVT::i64)
-      SubValueVT = MVT::i32;
-    else
-      SubValueVT = MVT::f32;
-
-    SDNode *Reg = N->getOperand(0).getNode();
-    SDValue RegClass = CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID,
-                                                 dl, MVT::i64);
-
-    SDValue SubregHiIdx = CurDAG->getTargetConstant(Hexagon::subreg_hireg, dl,
-                                                    MVT::i32);
-    SDValue SubregLoIdx = CurDAG->getTargetConstant(Hexagon::subreg_loreg, dl,
-                                                    MVT::i32);
-
-    SDValue SubregHI = CurDAG->getTargetExtractSubreg(Hexagon::subreg_hireg, dl,
-                                                    MVT::i32, SDValue(Reg, 0));
-
-    SDValue SubregLO = CurDAG->getTargetExtractSubreg(Hexagon::subreg_loreg, dl,
-                                                    MVT::i32, SDValue(Reg, 0));
-
-    // Clear/set/toggle hi or lo registers depending on the bit position.
-    if (SubValueVT != MVT::f32 && BitPos < 32) {
-      SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
-                                               SubregLO, SDVal);
-      const SDValue Ops[] = { RegClass, SubregHI, SubregHiIdx,
-                              SDValue(Result0, 0), SubregLoIdx };
-      Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
-                                      dl, ValueVT, Ops);
-    } else {
-      if (Opc != ISD::FABS && Opc != ISD::FNEG)
-        SDVal = CurDAG->getTargetConstant(BitPos-32, dl, MVT::i32);
-      SDNode *Result0 = CurDAG->getMachineNode(BitOpc, dl, SubValueVT,
-                                               SubregHI, SDVal);
-      const SDValue Ops[] = { RegClass, SDValue(Result0, 0), SubregHiIdx,
-                              SubregLO, SubregLoIdx };
-      Result = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE,
-                                      dl, ValueVT, Ops);
-    }
-  }
-
-  ReplaceNode(N, Result);
+  SelectCode(N);
 }
 
 
 void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
-  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
   const HexagonFrameLowering *HFI = HST->getFrameLowering();
   int FX = cast<FrameIndexSDNode>(N)->getIndex();
   unsigned StkA = HFI->getStackAlignment();
-  unsigned MaxA = MFI->getMaxAlignment();
+  unsigned MaxA = MFI.getMaxAlignment();
   SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
   SDLoc DL(N);
   SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
-  SDNode *R = 0;
+  SDNode *R = nullptr;
 
-  // Use TFR_FI when:
+  // Use PS_fi when:
   // - the object is fixed, or
   // - there are no objects with higher-than-default alignment, or
   // - there are no dynamically allocated objects.
-  // Otherwise, use TFR_FIA.
-  if (FX < 0 || MaxA <= StkA || !MFI->hasVarSizedObjects()) {
-    R = CurDAG->getMachineNode(Hexagon::TFR_FI, DL, MVT::i32, FI, Zero);
+  // Otherwise, use PS_fia.
+  if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
+    R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
   } else {
     auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
     unsigned AR = HMFI.getStackAlignBaseVReg();
     SDValue CH = CurDAG->getEntryNode();
     SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
-    R = CurDAG->getMachineNode(Hexagon::TFR_FIA, DL, MVT::i32, Ops);
+    R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
   }
 
   ReplaceNode(N, R);
@@ -1202,10 +950,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
     SelectFrameIndex(N);
     return;
 
-  case ISD::ADD:
-    SelectAdd(N);
-    return;
-
   case ISD::BITCAST:
     SelectBitcast(N);
     return;
@@ -1226,14 +970,6 @@ void HexagonDAGToDAGISel::Select(SDNode *N) {
     SelectMul(N);
     return;
 
-  case ISD::AND:
-  case ISD::OR:
-  case ISD::XOR:
-  case ISD::FABS:
-  case ISD::FNEG:
-    SelectBitOp(N);
-    return;
-
   case ISD::ZERO_EXTEND:
     SelectZeroExtend(N);
     return;
@@ -1373,6 +1109,16 @@ void HexagonDAGToDAGISel::PreprocessISelDAG() {
     SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
     ReplaceNode(T0.getNode(), NewShl.getNode());
   }
+
+  if (EnableAddressRebalancing) {
+    rebalanceAddressTrees();
+
+    DEBUG(
+      dbgs() << "************* SelectionDAG after preprocessing: ***********\n";
+      CurDAG->dump();
+      dbgs() << "************* End SelectionDAG after preprocessing ********\n";
+    );
+  }
 }
 
 void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
@@ -1381,11 +1127,11 @@ void HexagonDAGToDAGISel::EmitFunctionEntryCode() {
   if (!HFI.needsAligna(*MF))
     return;
 
-  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineBasicBlock *EntryBB = &MF->front();
   unsigned AR = FuncInfo->CreateReg(MVT::i32);
-  unsigned MaxA = MFI->getMaxAlignment();
-  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::ALIGNA), AR)
+  unsigned MaxA = MFI.getMaxAlignment();
+  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
       .addImm(MaxA);
   MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
 }
@@ -1395,9 +1141,9 @@ bool HexagonDAGToDAGISel::SelectAddrFI(SDValue& N, SDValue &R) {
   if (N.getOpcode() != ISD::FrameIndex)
     return false;
   auto &HFI = *HST->getFrameLowering();
-  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
   int FX = cast<FrameIndexSDNode>(N)->getIndex();
-  if (!MFI->isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
+  if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
     return false;
   R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
   return true;
@@ -1519,15 +1265,15 @@ bool HexagonDAGToDAGISel::isValueExtension(const SDValue &Val,
 }
 
 
-bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const {
+bool HexagonDAGToDAGISel::isOrEquivalentToAdd(const SDNode *N) const {
   assert(N->getOpcode() == ISD::OR);
   auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
   assert(C);
 
   // Detect when "or" is used to add an offset to a stack object.
   if (auto *FN = dyn_cast<FrameIndexSDNode>(N->getOperand(0))) {
-    MachineFrameInfo *MFI = MF->getFrameInfo();
-    unsigned A = MFI->getObjectAlignment(FN->getIndex());
+    MachineFrameInfo &MFI = MF->getFrameInfo();
+    unsigned A = MFI.getObjectAlignment(FN->getIndex());
     assert(isPowerOf2_32(A));
     int32_t Off = C->getSExtValue();
     // If the alleged offset fits in the zero bits guaranteed by
@@ -1540,3 +1286,717 @@ bool HexagonDAGToDAGISel::orIsAdd(const SDNode *N) const {
 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
   return N->getAlignment() >= N->getMemoryVT().getStoreSize();
 }
+
+// Return true when the given node fits in a positive half word.
+bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
+  if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
+    int64_t V = CN->getSExtValue();
+    return V > 0 && isInt<16>(V);
+  }
+  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
+    const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
+    return VN->getVT().getSizeInBits() <= 16;
+  }
+  return false;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+// Rebalancing of address calculation trees
+
+static bool isOpcodeHandled(const SDNode *N) {
+  switch (N->getOpcode()) {
+    case ISD::ADD:
+    case ISD::MUL:
+      return true;
+    case ISD::SHL:
+      // We only handle constant shifts because these can be easily flattened
+      // into multiplications by 2^Op1.
+      return isa<ConstantSDNode>(N->getOperand(1).getNode());
+    default:
+      return false;
+  }
+}
+
+/// \brief Return the weight of an SDNode
+int HexagonDAGToDAGISel::getWeight(SDNode *N) {
+  if (!isOpcodeHandled(N))
+    return 1;
+  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
+  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
+  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
+  return RootWeights[N];
+}
+
+int HexagonDAGToDAGISel::getHeight(SDNode *N) {
+  if (!isOpcodeHandled(N))
+    return 0;
+  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
+      "Cannot query height of unvisited/RAUW'd node!");
+  return RootHeights[N];
+}
+
+namespace {
+struct WeightedLeaf {
+  SDValue Value;
+  int Weight;
+  int InsertionOrder;
+
+  WeightedLeaf() : Value(SDValue()) { }
+
+  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
+    Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
+    assert(Weight >= 0 && "Weight must be >= 0");
+  }
+
+  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
+    assert(A.Value.getNode() && B.Value.getNode());
+    return A.Weight == B.Weight ?
+            (A.InsertionOrder > B.InsertionOrder) :
+            (A.Weight > B.Weight);
+  }
+};
+
+/// A specialized priority queue for WeigthedLeaves. It automatically folds
+/// constants and allows removal of non-top elements while maintaining the
+/// priority order.
+class LeafPrioQueue {
+  SmallVector<WeightedLeaf, 8> Q;
+  bool HaveConst;
+  WeightedLeaf ConstElt;
+  unsigned Opcode;
+
+public:
+  bool empty() {
+    return (!HaveConst && Q.empty());
+  }
+
+  size_t size() {
+    return Q.size() + HaveConst;
+  }
+
+  bool hasConst() {
+    return HaveConst;
+  }
+
+  const WeightedLeaf &top() {
+    if (HaveConst)
+      return ConstElt;
+    return Q.front();
+  }
+
+  WeightedLeaf pop() {
+    if (HaveConst) {
+      HaveConst = false;
+      return ConstElt;
+    }
+    std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+    return Q.pop_back_val();
+  }
+
+  void push(WeightedLeaf L, bool SeparateConst=true) {
+    if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
+      if (Opcode == ISD::MUL &&
+          cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
+        return;
+      if (Opcode == ISD::ADD &&
+          cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
+        return;
+
+      HaveConst = true;
+      ConstElt = L;
+    } else {
+      Q.push_back(L);
+      std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+    }
+  }
+
+  /// Push L to the bottom of the queue regardless of its weight. If L is
+  /// constant, it will not be folded with other constants in the queue.
+  void pushToBottom(WeightedLeaf L) {
+    L.Weight = 1000;
+    push(L, false);
+  }
+
+  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
+  /// lowest weight and remove it from the queue.
+  WeightedLeaf findSHL(uint64_t MaxAmount);
+
+  WeightedLeaf findMULbyConst();
+
+  LeafPrioQueue(unsigned Opcode) :
+    HaveConst(false), Opcode(Opcode) { }
+};
+} // end anonymous namespace
+
+WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
+  int ResultPos;
+  WeightedLeaf Result;
+
+  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
+    const WeightedLeaf &L = Q[Pos];
+    const SDValue &Val = L.Value;
+    if (Val.getOpcode() != ISD::SHL ||
+        !isa<ConstantSDNode>(Val.getOperand(1)) ||
+        Val.getConstantOperandVal(1) > MaxAmount)
+      continue;
+    if (!Result.Value.getNode() || Result.Weight > L.Weight ||
+        (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
+    {
+      Result = L;
+      ResultPos = Pos;
+    }
+  }
+
+  if (Result.Value.getNode()) {
+    Q.erase(&Q[ResultPos]);
+    std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+  }
+
+  return Result;
+}
+
+WeightedLeaf LeafPrioQueue::findMULbyConst() {
+  int ResultPos;
+  WeightedLeaf Result;
+
+  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
+    const WeightedLeaf &L = Q[Pos];
+    const SDValue &Val = L.Value;
+    if (Val.getOpcode() != ISD::MUL ||
+        !isa<ConstantSDNode>(Val.getOperand(1)) ||
+        Val.getConstantOperandVal(1) > 127)
+      continue;
+    if (!Result.Value.getNode() || Result.Weight > L.Weight ||
+        (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
+    {
+      Result = L;
+      ResultPos = Pos;
+    }
+  }
+
+  if (Result.Value.getNode()) {
+    Q.erase(&Q[ResultPos]);
+    std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
+  }
+
+  return Result;
+}
+
+SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
+  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
+  return CurDAG->getConstant(MulFactor, SDLoc(N),
+                             N->getOperand(1).getValueType());
+}
+
+/// @returns the value x for which 2^x is a factor of Val
+static unsigned getPowerOf2Factor(SDValue Val) {
+  if (Val.getOpcode() == ISD::MUL) {
+    unsigned MaxFactor = 0;
+    for (int i = 0; i < 2; ++i) {
+      ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
+      if (!C)
+        continue;
+      const APInt &CInt = C->getAPIntValue();
+      if (CInt.getBoolValue())
+        MaxFactor = CInt.countTrailingZeros();
+    }
+    return MaxFactor;
+  }
+  if (Val.getOpcode() == ISD::SHL) {
+    if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
+      return 0;
+    return (unsigned) Val.getConstantOperandVal(1);
+  }
+
+  return 0;
+}
+
+/// @returns true if V>>Amount will eliminate V's operation on its child
+static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
+  if (V.getOpcode() == ISD::MUL) {
+    SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
+    for (int i = 0; i < 2; ++i)
+      if (isa<ConstantSDNode>(Ops[i].getNode()) &&
+          V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
+        uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
+        return (NewConst == 1);
+      }
+  } else if (V.getOpcode() == ISD::SHL) {
+    return (Amount == V.getConstantOperandVal(1));
+  }
+
+  return false;
+}
+
+SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
+  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
+  if (V.getOpcode() == ISD::MUL) {
+    for (int i=0; i < 2; ++i) {
+      if (isa<ConstantSDNode>(Ops[i].getNode()) &&
+          V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
+        uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
+        if (NewConst == 1)
+          return Ops[!i];
+        Ops[i] = CurDAG->getConstant(NewConst,
+                                     SDLoc(V), V.getValueType());
+        break;
+      }
+    }
+  } else if (V.getOpcode() == ISD::SHL) {
+    uint64_t ShiftAmount = V.getConstantOperandVal(1);
+    if (ShiftAmount == Power)
+      return Ops[0];
+    Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
+                                 SDLoc(V), V.getValueType());
+  }
+
+  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
+}
+
+static bool isTargetConstant(const SDValue &V) {
+  return V.getOpcode() == HexagonISD::CONST32 ||
+         V.getOpcode() == HexagonISD::CONST32_GP;
+}
+
+unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
+  if (GAUsesInFunction.count(V))
+    return GAUsesInFunction[V];
+
+  unsigned Result = 0;
+  const Function *CurF = CurDAG->getMachineFunction().getFunction();
+  for (const User *U : V->users()) {
+    if (isa<Instruction>(U) &&
+        cast<Instruction>(U)->getParent()->getParent() == CurF)
+      ++Result;
+  }
+
+  GAUsesInFunction[V] = Result;
+
+  return Result;
+}
+
+/// Note - After calling this, N may be dead. It may have been replaced by a
+/// new node, so always use the returned value in place of N.
+///
+/// @returns The SDValue taking the place of N (which could be N if it is
+/// unchanged)
+SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
+  assert(RootWeights.count(N) && "Cannot balance non-root node.");
+  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
+  assert(!TopLevel || N->getOpcode() == ISD::ADD);
+
+  // Return early if this node was already visited
+  if (RootWeights[N] != -1)
+    return SDValue(N, 0);
+
+  assert(isOpcodeHandled(N));
+
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // Return early if the operands will remain unchanged or are all roots
+  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
+      (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
+    SDNode *Op0N = Op0.getNode();
+    int Weight;
+    if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
+      Weight = getWeight(balanceSubTree(Op0N).getNode());
+      // Weight = calculateWeight(Op0N);
+    } else
+      Weight = getWeight(Op0N);
+
+    SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
+    if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
+      Weight += getWeight(balanceSubTree(Op1N).getNode());
+      // Weight += calculateWeight(Op1N);
+    } else
+      Weight += getWeight(Op1N);
+
+    RootWeights[N] = Weight;
+    RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
+                              getHeight(N->getOperand(1).getNode())) + 1;
+
+    DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
+                 << " Height=" << RootHeights[N] << "): ");
+    DEBUG(N->dump());
+
+    return SDValue(N, 0);
+  }
+
+  DEBUG(dbgs() << "** Balancing root node: ");
+  DEBUG(N->dump());
+
+  unsigned NOpcode = N->getOpcode();
+
+  LeafPrioQueue Leaves(NOpcode);
+  SmallVector<SDValue, 4> Worklist;
+  Worklist.push_back(SDValue(N, 0));
+
+  // SHL nodes will be converted to MUL nodes
+  if (NOpcode == ISD::SHL)
+    NOpcode = ISD::MUL;
+
+  bool CanFactorize = false;
+  WeightedLeaf Mul1, Mul2;
+  unsigned MaxPowerOf2 = 0;
+  WeightedLeaf GA;
+
+  // Do not try to factor out a shift if there is already a shift at the tip of
+  // the tree.
+  bool HaveTopLevelShift = false;
+  if (TopLevel &&
+      ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
+                        Op0.getConstantOperandVal(1) < 4) ||
+       (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
+                        Op1.getConstantOperandVal(1) < 4)))
+    HaveTopLevelShift = true;
+
+  // Flatten the subtree into an ordered list of leaves; at the same time
+  // determine whether the tree is already balanced.
+  int InsertionOrder = 0;
+  SmallDenseMap<SDValue, int> NodeHeights;
+  bool Imbalanced = false;
+  int CurrentWeight = 0;
+  while (!Worklist.empty()) {
+    SDValue Child = Worklist.pop_back_val();
+
+    if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
+      // CASE 1: Child is a root note
+
+      int Weight = RootWeights[Child.getNode()];
+      if (Weight == -1) {
+        Child = balanceSubTree(Child.getNode());
+        // calculateWeight(Child.getNode());
+        Weight = getWeight(Child.getNode());
+      } else if (Weight == -2) {
+        // Whoops, this node was RAUWd by one of the balanceSubTree calls we
+        // made. Our worklist isn't up to date anymore.
+        // Restart the whole process.
+        DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+        return balanceSubTree(N, TopLevel);
+      }
+
+      NodeHeights[Child] = 1;
+      CurrentWeight += Weight;
+
+      unsigned PowerOf2;
+      if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
+          (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
+          Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
+        // Try to identify two factorizable MUL/SHL children greedily. Leave
+        // them out of the priority queue for now so we can deal with them
+        // after.
+        if (!Mul1.Value.getNode()) {
+          Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
+          MaxPowerOf2 = PowerOf2;
+        } else {
+          Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
+          MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
+
+          // Our addressing modes can only shift by a maximum of 3
+          if (MaxPowerOf2 > 3)
+            MaxPowerOf2 = 3;
+
+          CanFactorize = true;
+        }
+      } else
+        Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
+    } else if (!isOpcodeHandled(Child.getNode())) {
+      // CASE 2: Child is an unhandled kind of node (e.g. constant)
+      int Weight = getWeight(Child.getNode());
+
+      NodeHeights[Child] = getHeight(Child.getNode());
+      CurrentWeight += Weight;
+
+      if (isTargetConstant(Child) && !GA.Value.getNode())
+        GA = WeightedLeaf(Child, Weight, InsertionOrder++);
+      else
+        Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
+    } else {
+      // CASE 3: Child is a subtree of same opcode
+      // Visit children first, then flatten.
+      unsigned ChildOpcode = Child.getOpcode();
+      assert(ChildOpcode == NOpcode ||
+             (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
+
+      // Convert SHL to MUL
+      SDValue Op1;
+      if (ChildOpcode == ISD::SHL)
+        Op1 = getMultiplierForSHL(Child.getNode());
+      else
+        Op1 = Child->getOperand(1);
+
+      if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
+        assert(!NodeHeights.count(Child) && "Parent visited before children?");
+        // Visit children first, then re-visit this node
+        Worklist.push_back(Child);
+        Worklist.push_back(Op1);
+        Worklist.push_back(Child->getOperand(0));
+      } else {
+        // Back at this node after visiting the children
+        if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
+          Imbalanced = true;
+
+        NodeHeights[Child] = std::max(NodeHeights[Op1],
+                                      NodeHeights[Child->getOperand(0)]) + 1;
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
+               << " weight=" << CurrentWeight << " imbalanced="
+               << Imbalanced << "\n");
+
+  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
+  //  This factors out a shift in order to match memw(a<<Y+b).
+  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
+                       willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
+    DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
+    int Weight = Mul1.Weight + Mul2.Weight;
+    int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
+    SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
+    SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
+    SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
+                                  Mul1Factored, Mul2Factored);
+    SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
+                                        Mul1.Value.getValueType());
+    SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
+                                  Sum, Const);
+    NodeHeights[New] = Height;
+    Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
+  } else if (Mul1.Value.getNode()) {
+    // We failed to factorize two MULs, so now the Muls are left outside the
+    // queue... add them back.
+    Leaves.push(Mul1);
+    if (Mul2.Value.getNode())
+      Leaves.push(Mul2);
+    CanFactorize = false;
+  }
+
+  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
+  // and the root node itself is not used more than twice. This reduces the
+  // amount of additional constant extenders introduced by this optimization.
+  bool CombinedGA = false;
+  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
+      GA.Value.hasOneUse() && N->use_size() < 3) {
+    GlobalAddressSDNode *GANode =
+      cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
+    ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
+
+    if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
+        getTargetLowering()->isOffsetFoldingLegal(GANode)) {
+      DEBUG(dbgs() << "--> Combining GA and offset (" << Offset->getSExtValue()
+          << "): ");
+      DEBUG(GANode->dump());
+
+      SDValue NewTGA =
+        CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
+            GANode->getValueType(0),
+            GANode->getOffset() + (uint64_t)Offset->getSExtValue());
+      GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
+          GA.Value.getValueType(), NewTGA);
+      GA.Weight += Leaves.top().Weight;
+
+      NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
+      CombinedGA = true;
+
+      Leaves.pop(); // Remove the offset constant from the queue
+    }
+  }
+
+  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
+      (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
+    RootWeights[N] = CurrentWeight;
+    RootHeights[N] = NodeHeights[SDValue(N, 0)];
+
+    return SDValue(N, 0);
+  }
+
+  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
+  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
+    WeightedLeaf SHL = Leaves.findSHL(31);
+    if (SHL.Value.getNode()) {
+      int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
+      GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
+                                 GA.Value.getValueType(),
+                                 GA.Value, SHL.Value);
+      GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
+      NodeHeights[GA.Value] = Height;
+    }
+  }
+
+  if (GA.Value.getNode())
+    Leaves.push(GA);
+
+  // If this is the top level and we haven't factored out a shift, we should try
+  // to move a constant to the bottom to match addressing modes like memw(rX+C)
+  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
+    DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
+    Leaves.pushToBottom(Leaves.pop());
+  }
+
+  const DataLayout &DL = CurDAG->getDataLayout();
+  const TargetLowering &TLI = *getTargetLowering();
+
+  // Rebuild the tree using Huffman's algorithm
+  while (Leaves.size() > 1) {
+    WeightedLeaf L0 = Leaves.pop();
+
+    // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
+    // otherwise just get the next leaf
+    WeightedLeaf L1 = Leaves.findMULbyConst();
+    if (!L1.Value.getNode())
+      L1 = Leaves.pop();
+
+    assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
+
+    SDValue V0 = L0.Value;
+    int V0Weight = L0.Weight;
+    SDValue V1 = L1.Value;
+    int V1Weight = L1.Weight;
+
+    // Make sure that none of these nodes have been RAUW'd
+    if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
+        (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
+      DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
+      return balanceSubTree(N, TopLevel);
+    }
+
+    ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
+    ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
+    EVT VT = N->getValueType(0);
+    SDValue NewNode;
+
+    if (V0C && !V1C) {
+      std::swap(V0, V1);
+      std::swap(V0C, V1C);
+    }
+
+    // Calculate height of this node
+    assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
+           "Children must have been visited before re-combining them!");
+    int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
+
+    // Rebuild this node (and restore SHL from MUL if needed)
+    if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
+      NewNode = CurDAG->getNode(
+          ISD::SHL, SDLoc(V0), VT, V0,
+          CurDAG->getConstant(
+              V1C->getAPIntValue().logBase2(), SDLoc(N),
+              TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
+    else
+      NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
+
+    NodeHeights[NewNode] = Height;
+
+    int Weight = V0Weight + V1Weight;
+    Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
+
+    DEBUG(dbgs() << "--> Built new node (Weight=" << Weight << ",Height="
+                 << Height << "):\n");
+    DEBUG(NewNode.dump());
+  }
+
+  assert(Leaves.size() == 1);
+  SDValue NewRoot = Leaves.top().Value;
+
+  assert(NodeHeights.count(NewRoot));
+  int Height = NodeHeights[NewRoot];
+
+  // Restore SHL if we earlier converted it to a MUL
+  if (NewRoot.getOpcode() == ISD::MUL) {
+    ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
+    if (V1C && V1C->getAPIntValue().isPowerOf2()) {
+      EVT VT = NewRoot.getValueType();
+      SDValue V0 = NewRoot.getOperand(0);
+      NewRoot = CurDAG->getNode(
+          ISD::SHL, SDLoc(NewRoot), VT, V0,
+          CurDAG->getConstant(
+              V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
+              TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
+    }
+  }
+
+  if (N != NewRoot.getNode()) {
+    DEBUG(dbgs() << "--> Root is now: ");
+    DEBUG(NewRoot.dump());
+
+    // Replace all uses of old root by new root
+    CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
+    // Mark that we have RAUW'd N
+    RootWeights[N] = -2;
+  } else {
+    DEBUG(dbgs() << "--> Root unchanged.\n");
+  }
+
+  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
+  RootHeights[NewRoot.getNode()] = Height;
+
+  return NewRoot;
+}
+
+void HexagonDAGToDAGISel::rebalanceAddressTrees() {
+  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
+    SDNode *N = &*I++;
+    if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
+      continue;
+
+    SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
+    if (BasePtr.getOpcode() != ISD::ADD)
+      continue;
+
+    // We've already processed this node
+    if (RootWeights.count(BasePtr.getNode()))
+      continue;
+
+    DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
+    DEBUG(N->dump());
+
+    // FindRoots
+    SmallVector<SDNode *, 4> Worklist;
+
+    Worklist.push_back(BasePtr.getOperand(0).getNode());
+    Worklist.push_back(BasePtr.getOperand(1).getNode());
+
+    while (!Worklist.empty()) {
+      SDNode *N = Worklist.pop_back_val();
+      unsigned Opcode = N->getOpcode();
+
+      if (!isOpcodeHandled(N))
+        continue;
+
+      Worklist.push_back(N->getOperand(0).getNode());
+      Worklist.push_back(N->getOperand(1).getNode());
+
+      // Not a root if it has only one use and same opcode as its parent
+      if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
+        continue;
+
+      // This root node has already been processed
+      if (RootWeights.count(N))
+        continue;
+
+      RootWeights[N] = -1;
+    }
+
+    // Balance node itself
+    RootWeights[BasePtr.getNode()] = -1;
+    SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
+
+    if (N->getOpcode() == ISD::LOAD)
+      N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
+            NewBasePtr, N->getOperand(2));
+    else
+      N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
+            NewBasePtr, N->getOperand(3));
+
+    DEBUG(dbgs() << "--> Final node: ");
+    DEBUG(N->dump());
+  }
+
+  CurDAG->RemoveDeadNodes();
+  GAUsesInFunction.clear();
+  RootHeights.clear();
+  RootWeights.clear();
+}
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
index cdd4c2f8617d..e87e1e6a7e0f 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.cpp
@@ -12,30 +12,52 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "Hexagon.h"
 #include "HexagonISelLowering.h"
 #include "HexagonMachineFunctionInfo.h"
+#include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "HexagonTargetObjectFile.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/SelectionDAG.h"
 #include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetCallingConv.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <utility>
 
 using namespace llvm;
 
@@ -83,23 +105,31 @@ static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
 
 
 namespace {
-class HexagonCCState : public CCState {
-  unsigned NumNamedVarArgParams;
 
-public:
-  HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
-                 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
-                 int NumNamedVarArgParams)
-      : CCState(CC, isVarArg, MF, locs, C),
-        NumNamedVarArgParams(NumNamedVarArgParams) {}
+  class HexagonCCState : public CCState {
+    unsigned NumNamedVarArgParams;
 
-  unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
-};
-}
+  public:
+    HexagonCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
+                   SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
+                   int NumNamedVarArgParams)
+        : CCState(CC, isVarArg, MF, locs, C),
+          NumNamedVarArgParams(NumNamedVarArgParams) {}
+
+    unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
+  };
+
+  enum StridedLoadKind {
+    Even = 0,
+    Odd,
+    NoPattern
+  };
+
+} // end anonymous namespace
 
 // Implement calling convention for Hexagon.
 
-static bool IsHvxVectorType(MVT ty);
+static bool isHvxVectorType(MVT ty);
 
 static bool
 CC_Hexagon(unsigned ValNo, MVT ValVT,
@@ -153,13 +183,13 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
   }
 
   // Deal with un-named arguments.
-  unsigned ofst;
+  unsigned Offset;
   if (ArgFlags.isByVal()) {
     // If pass-by-value, the size allocated on stack is decided
     // by ArgFlags.getByValSize(), not by the size of LocVT.
-    ofst = State.AllocateStack(ArgFlags.getByValSize(),
-                               ArgFlags.getByValAlign());
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(ArgFlags.getByValSize(),
+                                 ArgFlags.getByValAlign());
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::i1 || LocVT == MVT::i8 || LocVT == MVT::i16) {
@@ -173,50 +203,49 @@ CC_Hexagon_VarArg (unsigned ValNo, MVT ValVT,
       LocInfo = CCValAssign::AExt;
   }
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
-    ofst = State.AllocateStack(4, 4);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(4, 4);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
-    ofst = State.AllocateStack(8, 8);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(8, 8);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::v2i64 || LocVT == MVT::v4i32 || LocVT == MVT::v8i16 ||
       LocVT == MVT::v16i8) {
-    ofst = State.AllocateStack(16, 16);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(16, 16);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::v4i64 || LocVT == MVT::v8i32 || LocVT == MVT::v16i16 ||
       LocVT == MVT::v32i8) {
-    ofst = State.AllocateStack(32, 32);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(32, 32);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::v8i64 || LocVT == MVT::v16i32 || LocVT == MVT::v32i16 ||
       LocVT == MVT::v64i8 || LocVT == MVT::v512i1) {
-    ofst = State.AllocateStack(64, 64);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(64, 64);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::v16i64 || LocVT == MVT::v32i32 || LocVT == MVT::v64i16 ||
       LocVT == MVT::v128i8 || LocVT == MVT::v1024i1) {
-    ofst = State.AllocateStack(128, 128);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(128, 128);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
   if (LocVT == MVT::v32i64 || LocVT == MVT::v64i32 || LocVT == MVT::v128i16 ||
       LocVT == MVT::v256i8) {
-    ofst = State.AllocateStack(256, 256);
-    State.addLoc(CCValAssign::getMem(ValNo, ValVT, ofst, LocVT, LocInfo));
+    Offset = State.AllocateStack(256, 256);
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
     return false;
   }
 
   llvm_unreachable(nullptr);
 }
 
-
 static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
       CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) {
   if (ArgFlags.isByVal()) {
@@ -260,7 +289,7 @@ static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
     return false;
   }
 
-  if (IsHvxVectorType(LocVT)) {
+  if (isHvxVectorType(LocVT)) {
     if (!CC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
       return false;
   }
@@ -272,7 +301,6 @@ static bool CC_Hexagon (unsigned ValNo, MVT ValVT, MVT LocVT,
 static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
                          MVT LocVT, CCValAssign::LocInfo LocInfo,
                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
   static const MCPhysReg RegList[] = {
     Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4,
     Hexagon::R5
@@ -290,7 +318,6 @@ static bool CC_Hexagon32(unsigned ValNo, MVT ValVT,
 static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
                          MVT LocVT, CCValAssign::LocInfo LocInfo,
                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
   if (unsigned Reg = State.AllocateReg(Hexagon::D0)) {
     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
     return false;
@@ -315,19 +342,16 @@ static bool CC_Hexagon64(unsigned ValNo, MVT ValVT,
 static bool CC_HexagonVector(unsigned ValNo, MVT ValVT,
                              MVT LocVT, CCValAssign::LocInfo LocInfo,
                              ISD::ArgFlagsTy ArgFlags, CCState &State) {
-
-    static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1,
-                                         Hexagon::V2, Hexagon::V3,
-                                         Hexagon::V4, Hexagon::V5,
-                                         Hexagon::V6, Hexagon::V7,
-                                         Hexagon::V8, Hexagon::V9,
-                                         Hexagon::V10, Hexagon::V11,
-                                         Hexagon::V12, Hexagon::V13,
-                                         Hexagon::V14, Hexagon::V15};
-    static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1,
-                                         Hexagon::W2, Hexagon::W3,
-                                         Hexagon::W4, Hexagon::W5,
-                                         Hexagon::W6, Hexagon::W7};
+  static const MCPhysReg VecLstS[] = {
+      Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4,
+      Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9,
+      Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14,
+      Hexagon::V15
+  };
+  static const MCPhysReg VecLstD[] = {
+      Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4,
+      Hexagon::W5, Hexagon::W6, Hexagon::W7
+  };
   auto &MF = State.getMachineFunction();
   auto &HST = MF.getSubtarget<HexagonSubtarget>();
   bool UseHVX = HST.useHVXOps();
@@ -429,16 +453,16 @@ static bool RetCC_Hexagon(unsigned ValNo, MVT ValVT,
   }
   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
     if (!RetCC_Hexagon32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
-    return false;
+      return false;
   }
 
   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
     if (!RetCC_Hexagon64(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
-    return false;
+      return false;
   }
   if (LocVT == MVT::v16i32 || LocVT == MVT::v32i32 || LocVT == MVT::v64i32) {
     if (!RetCC_HexagonVector(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State))
-    return false;
+      return false;
   }
   return true;  // CC didn't match.
 }
@@ -452,7 +476,7 @@ static bool RetCC_Hexagon32(unsigned ValNo, MVT ValVT,
     // return structs using these additional registers.
     static const uint16_t RegList[] = { Hexagon::R0, Hexagon::R1,
                                         Hexagon::R2, Hexagon::R3,
-                                        Hexagon::R4, Hexagon::R5};
+                                        Hexagon::R4, Hexagon::R5 };
     if (unsigned Reg = State.AllocateReg(RegList)) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
@@ -525,7 +549,7 @@ void HexagonTargetLowering::promoteLdStType(MVT VT, MVT PromotedLdStVT) {
 
 SDValue
 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
-const {
+      const {
   return SDValue();
 }
 
@@ -537,7 +561,6 @@ const {
 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
                                          SDValue Chain, ISD::ArgFlagsTy Flags,
                                          SelectionDAG &DAG, const SDLoc &dl) {
-
   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
   return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(),
                        /*isVolatile=*/false, /*AlwaysInline=*/false,
@@ -545,14 +568,26 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
                        MachinePointerInfo(), MachinePointerInfo());
 }
 
-static bool IsHvxVectorType(MVT ty) {
-  return (ty == MVT::v8i64 || ty == MVT::v16i32 || ty == MVT::v32i16 ||
-          ty == MVT::v64i8 ||
-          ty == MVT::v16i64 || ty == MVT::v32i32 || ty == MVT::v64i16 ||
-          ty == MVT::v128i8 ||
-          ty == MVT::v32i64 || ty == MVT::v64i32 || ty == MVT::v128i16 ||
-          ty == MVT::v256i8 ||
-          ty == MVT::v512i1 || ty == MVT::v1024i1);
+static bool isHvxVectorType(MVT Ty) {
+  switch (Ty.SimpleTy) {
+  case MVT::v8i64:
+  case MVT::v16i32:
+  case MVT::v32i16:
+  case MVT::v64i8:
+  case MVT::v16i64:
+  case MVT::v32i32:
+  case MVT::v64i16:
+  case MVT::v128i8:
+  case MVT::v32i64:
+  case MVT::v64i32:
+  case MVT::v128i16:
+  case MVT::v256i8:
+  case MVT::v512i1:
+  case MVT::v1024i1:
+    return true;
+  default:
+    return false;
+  }
 }
 
 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
@@ -564,7 +599,6 @@ HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
                                    const SmallVectorImpl<SDValue> &OutVals,
                                    const SDLoc &dl, SelectionDAG &DAG) const {
-
   // CCValAssign - represent the assignment of the return value to locations.
   SmallVector<CCValAssign, 16> RVLocs;
 
@@ -669,17 +703,17 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
   SDValue Chain                         = CLI.Chain;
   SDValue Callee                        = CLI.Callee;
-  bool &isTailCall                      = CLI.IsTailCall;
+  bool &IsTailCall                      = CLI.IsTailCall;
   CallingConv::ID CallConv              = CLI.CallConv;
-  bool isVarArg                         = CLI.IsVarArg;
-  bool doesNotReturn                    = CLI.DoesNotReturn;
+  bool IsVarArg                         = CLI.IsVarArg;
+  bool DoesNotReturn                    = CLI.DoesNotReturn;
 
   bool IsStructRet    = (Outs.empty()) ? false : Outs[0].Flags.isSRet();
   MachineFunction &MF = DAG.getMachineFunction();
   auto PtrVT = getPointerTy(MF.getDataLayout());
 
   // Check for varargs.
-  int NumNamedVarArgParams = -1;
+  unsigned NumNamedVarArgParams = -1U;
   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = GAN->getGlobal();
     Callee = DAG.getTargetGlobalAddress(GV, dl, MVT::i32);
@@ -694,32 +728,32 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   // Analyze operands of the call, assigning locations to each operand.
   SmallVector<CCValAssign, 16> ArgLocs;
-  HexagonCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
+  HexagonCCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
                         *DAG.getContext(), NumNamedVarArgParams);
 
-  if (isVarArg)
+  if (IsVarArg)
     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_VarArg);
   else
     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
 
   auto Attr = MF.getFunction()->getFnAttribute("disable-tail-calls");
   if (Attr.getValueAsString() == "true")
-    isTailCall = false;
+    IsTailCall = false;
 
-  if (isTailCall) {
+  if (IsTailCall) {
     bool StructAttrFlag = MF.getFunction()->hasStructRetAttr();
-    isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
-                                                   isVarArg, IsStructRet,
+    IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
+                                                   IsVarArg, IsStructRet,
                                                    StructAttrFlag,
                                                    Outs, OutVals, Ins, DAG);
     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
       CCValAssign &VA = ArgLocs[i];
       if (VA.isMemLoc()) {
-        isTailCall = false;
+        IsTailCall = false;
         break;
       }
     }
-    DEBUG(dbgs() << (isTailCall ? "Eligible for Tail Call\n"
+    DEBUG(dbgs() << (IsTailCall ? "Eligible for Tail Call\n"
                                 : "Argument must be passed on stack. "
                                   "Not eligible for Tail Call\n"));
   }
@@ -740,7 +774,7 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     SDValue Arg = OutVals[i];
     ISD::ArgFlagsTy Flags = Outs[i].Flags;
     // Record if we need > 8 byte alignment on an argument.
-    bool ArgAlign = IsHvxVectorType(VA.getValVT());
+    bool ArgAlign = isHvxVectorType(VA.getValVT());
     NeedsArgAlign |= ArgAlign;
 
     // Promote the value if needed.
@@ -792,35 +826,35 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
 
   if (NeedsArgAlign && Subtarget.hasV60TOps()) {
     DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
-    MachineFrameInfo* MFI = DAG.getMachineFunction().getFrameInfo();
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
     // V6 vectors passed by value have 64 or 128 byte alignment depending
     // on whether we are 64 byte vector mode or 128 byte.
     bool UseHVXDbl = Subtarget.useHVXDblOps();
     assert(Subtarget.useHVXOps());
     const unsigned ObjAlign = UseHVXDbl ? 128 : 64;
     LargestAlignSeen = std::max(LargestAlignSeen, ObjAlign);
-    MFI->ensureMaxAlignment(LargestAlignSeen);
+    MFI.ensureMaxAlignment(LargestAlignSeen);
   }
   // Transform all store nodes into one single node because all store
   // nodes are independent of each other.
   if (!MemOpChains.empty())
     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
 
-  if (!isTailCall) {
+  if (!IsTailCall) {
     SDValue C = DAG.getConstant(NumBytes, dl, PtrVT, true);
     Chain = DAG.getCALLSEQ_START(Chain, C, dl);
   }
 
   // Build a sequence of copy-to-reg nodes chained together with token
   // chain and flag operands which copy the outgoing args into registers.
-  // The InFlag in necessary since all emitted instructions must be
+  // The Glue is necessary since all emitted instructions must be
   // stuck together.
-  SDValue InFlag;
-  if (!isTailCall) {
+  SDValue Glue;
+  if (!IsTailCall) {
     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                               RegsToPass[i].second, InFlag);
-      InFlag = Chain.getValue(1);
+                               RegsToPass[i].second, Glue);
+      Glue = Chain.getValue(1);
     }
   } else {
     // For tail calls lower the arguments to the 'real' stack slot.
@@ -833,23 +867,26 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // on every argument instead of just those arguments it would clobber.
     //
     // Do not flag preceding copytoreg stuff together with the following stuff.
-    InFlag = SDValue();
+    Glue = SDValue();
     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
-                               RegsToPass[i].second, InFlag);
-      InFlag = Chain.getValue(1);
+                               RegsToPass[i].second, Glue);
+      Glue = Chain.getValue(1);
     }
-    InFlag = SDValue();
+    Glue = SDValue();
   }
 
+  bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
+  unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
+
   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
   // node so that legalize doesn't hack it.
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
-    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT);
+    Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
   } else if (ExternalSymbolSDNode *S =
              dyn_cast<ExternalSymbolSDNode>(Callee)) {
-    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT);
+    Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
   }
 
   // Returns a chain & a flag for retval copy to use.
@@ -865,33 +902,32 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                                   RegsToPass[i].second.getValueType()));
   }
 
-  if (InFlag.getNode())
-    Ops.push_back(InFlag);
+  if (Glue.getNode())
+    Ops.push_back(Glue);
 
-  if (isTailCall) {
-    MF.getFrameInfo()->setHasTailCall();
+  if (IsTailCall) {
+    MF.getFrameInfo().setHasTailCall();
     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
   }
 
-  int OpCode = doesNotReturn ? HexagonISD::CALLv3nr : HexagonISD::CALLv3;
+  unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
-  InFlag = Chain.getValue(1);
+  Glue = Chain.getValue(1);
 
   // Create the CALLSEQ_END node.
   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
-                             DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
-  InFlag = Chain.getValue(1);
+                             DAG.getIntPtrConstant(0, dl, true), Glue, dl);
+  Glue = Chain.getValue(1);
 
   // Handle result values, copying them out of physregs into vregs that we
   // return.
-  return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
+  return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
                          InVals, OutVals, Callee);
 }
 
 static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
-                                   bool isSEXTLoad, SDValue &Base,
-                                   SDValue &Offset, bool &isInc,
-                                   SelectionDAG &DAG) {
+                                   SDValue &Base, SDValue &Offset,
+                                   bool &IsInc, SelectionDAG &DAG) {
   if (Ptr->getOpcode() != ISD::ADD)
     return false;
 
@@ -908,11 +944,11 @@ static bool getIndexedAddressParts(SDNode *Ptr, EVT VT,
 
   if (ValidHVXDblType || ValidHVXType ||
       VT == MVT::i64 || VT == MVT::i32 || VT == MVT::i16 || VT == MVT::i8) {
-    isInc = (Ptr->getOpcode() == ISD::ADD);
+    IsInc = (Ptr->getOpcode() == ISD::ADD);
     Base = Ptr->getOperand(0);
     Offset = Ptr->getOperand(1);
     // Ensure that Offset is a constant.
-    return (isa<ConstantSDNode>(Offset));
+    return isa<ConstantSDNode>(Offset);
   }
 
   return false;
@@ -929,28 +965,24 @@ bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
 {
   EVT VT;
   SDValue Ptr;
-  bool isSEXTLoad = false;
 
   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
     VT  = LD->getMemoryVT();
-    isSEXTLoad = LD->getExtensionType() == ISD::SEXTLOAD;
   } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
     VT  = ST->getMemoryVT();
-    if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore()) {
+    if (ST->getValue().getValueType() == MVT::i64 && ST->isTruncatingStore())
       return false;
-    }
   } else {
     return false;
   }
 
-  bool isInc = false;
-  bool isLegal = getIndexedAddressParts(Op, VT, isSEXTLoad, Base, Offset,
-                                        isInc, DAG);
+  bool IsInc = false;
+  bool isLegal = getIndexedAddressParts(Op, VT, Base, Offset, IsInc, DAG);
   if (isLegal) {
     auto &HII = *Subtarget.getInstrInfo();
     int32_t OffsetVal = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
     if (HII.isValidAutoIncImm(VT, OffsetVal)) {
-      AM = isInc ? ISD::POST_INC : ISD::POST_DEC;
+      AM = IsInc ? ISD::POST_INC : ISD::POST_DEC;
       return true;
     }
   }
@@ -1054,7 +1086,7 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
     A = HFI.getStackAlignment();
 
   DEBUG({
-    dbgs () << LLVM_FUNCTION_NAME << " Align: " << A << " Size: ";
+    dbgs () << __func__ << " Align: " << A << " Size: ";
     Size.getNode()->dump(&DAG);
     dbgs() << "\n";
   });
@@ -1071,9 +1103,8 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
 
@@ -1173,7 +1204,7 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
 
       StackLocation = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
       // Create the frame index object for this incoming parameter...
-      FI = MFI->CreateFixedObject(ObjSize, StackLocation, true);
+      FI = MFI.CreateFixedObject(ObjSize, StackLocation, true);
 
       // Create the SelectionDAG nodes cordl, responding to a load
       // from this parameter.
@@ -1196,10 +1227,10 @@ SDValue HexagonTargetLowering::LowerFormalArguments(
 
   if (isVarArg) {
     // This will point to the next argument passed via stack.
-    int FrameIndex = MFI->CreateFixedObject(Hexagon_PointerSize,
-                                            HEXAGON_LRFP_SIZE +
-                                            CCInfo.getNextStackOffset(),
-                                            true);
+    int FrameIndex = MFI.CreateFixedObject(Hexagon_PointerSize,
+                                           HEXAGON_LRFP_SIZE +
+                                           CCInfo.getNextStackOffset(),
+                                           true);
     FuncInfo.setVarArgsFrameIndex(FrameIndex);
   }
 
@@ -1392,7 +1423,6 @@ SDValue HexagonTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
   return DAG.getMergeValues(Ops, DL);
 }
 
-
 SDValue
 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   EVT ValTy = Op.getValueType();
@@ -1401,11 +1431,18 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   bool IsPositionIndependent = isPositionIndependent();
   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
 
+  unsigned Offset = 0;
   SDValue T;
   if (CPN->isMachineConstantPoolEntry())
-    T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, TF);
+    T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Align, Offset,
+                                  TF);
   else
-    T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, TF);
+    T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Align, Offset,
+                                  TF);
+
+  assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
+         "Inconsistent target flag encountered");
+
   if (IsPositionIndependent)
     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
@@ -1428,7 +1465,7 @@ SDValue
 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
@@ -1453,7 +1490,7 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
 SDValue
 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
-  MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
@@ -1473,7 +1510,6 @@ HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
 }
 
-
 SDValue
 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
   SDLoc dl(Op);
@@ -1487,7 +1523,8 @@ HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
 
   if (RM == Reloc::Static) {
     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
-    if (HLOF.isGlobalInSmallSection(GV, HTM))
+    const GlobalObject *GO = GV->getBaseObject();
+    if (GO && HLOF.isGlobalInSmallSection(GO, HTM))
       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
   }
@@ -1536,7 +1573,7 @@ SDValue
 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
       GlobalAddressSDNode *GA, SDValue *InFlag, EVT PtrVT, unsigned ReturnReg,
       unsigned char OperandFlags) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
@@ -1554,14 +1591,14 @@ HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
   if (InFlag) {
     SDValue Ops[] = { Chain, TGA,
                       DAG.getRegister(Hexagon::R0, PtrVT), *InFlag };
-    Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops);
+    Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
   } else {
     SDValue Ops[]  = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT)};
-    Chain = DAG.getNode(HexagonISD::CALLv3, dl, NodeTys, Ops);
+    Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
   }
 
   // Inform MFI that function has calls.
-  MFI->setAdjustsStack(true);
+  MFI.setAdjustsStack(true);
 
   SDValue Flag = Chain.getValue(1);
   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -1761,7 +1798,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
       addRegisterClass(MVT::v32i64,  &Hexagon::VecDblRegs128BRegClass);
       addRegisterClass(MVT::v1024i1, &Hexagon::VecPredRegs128BRegClass);
     }
-
   }
 
   //
@@ -1812,7 +1848,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   if (EmitJumpTables)
     setMinimumJumpTableEntries(MinimumJumpTables);
   else
-    setMinimumJumpTableEntries(INT_MAX);
+    setMinimumJumpTableEntries(std::numeric_limits<int>::max());
   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
 
   // Hexagon has instructions for add/sub with carry. The problem with
@@ -1861,7 +1897,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   // operation. There is a pattern that will match i64 mul and transform it
   // to a series of instructions.
   setOperationAction(ISD::MUL,   MVT::i64, Expand);
-  setOperationAction(ISD::MULHS, MVT::i64, Expand);
 
   for (unsigned IntExpOp :
        { ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
@@ -1887,7 +1922,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   }
   // Turn FP truncstore into trunc + store.
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
-  // Turn FP extload into load/fextend.
+  // Turn FP extload into load/fpextend.
   for (MVT VT : MVT::fp_valuetypes())
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
 
@@ -1937,7 +1972,7 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
     // Misc:
-    ISD::SELECT,  ISD::ConstantPool,
+    ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
     // Vector:
     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
@@ -1949,12 +1984,22 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     for (unsigned VectExpOp : VectExpOps)
       setOperationAction(VectExpOp, VT, Expand);
 
-    // Expand all extended loads and truncating stores:
+    // Expand all extending loads and truncating stores:
     for (MVT TargetVT : MVT::vector_valuetypes()) {
+      if (TargetVT == VT)
+        continue;
       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
+      setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
+      setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
       setTruncStoreAction(VT, TargetVT, Expand);
     }
 
+    // Normalize all inputs to SELECT to be vectors of i32.
+    if (VT.getVectorElementType() != MVT::i32) {
+      MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
+      setOperationAction(ISD::SELECT, VT, Promote);
+      AddPromotedToType(ISD::SELECT, VT, VT32);
+    }
     setOperationAction(ISD::SRA, VT, Custom);
     setOperationAction(ISD::SHL, VT, Custom);
     setOperationAction(ISD::SRL, VT, Custom);
@@ -1983,17 +2028,33 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
+
   if (UseHVX) {
     if (UseHVXSgl) {
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8,  Custom);
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16,  Custom);
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32,  Custom);
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64,  Custom);
+      // We try to generate the vpack{e/o} instructions. If we fail
+      // we fall back upon ExpandOp.
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i8,  Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v32i16, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i8, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i16, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v16i32, Custom);
     } else if (UseHVXDbl) {
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8,  Custom);
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom);
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32,  Custom);
       setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64,  Custom);
+      // We try to generate the vpack{e/o} instructions. If we fail
+      // we fall back upon ExpandOp.
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v128i8,  Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v64i16,  Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v4i32, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v128i8, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v64i16, Custom);
+      setOperationAction(ISD::EXTRACT_SUBVECTOR, MVT::v32i32, Custom);
     } else {
       llvm_unreachable("Unrecognized HVX mode");
     }
@@ -2006,6 +2067,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::FSUB, MVT::f64, Expand);
     setOperationAction(ISD::FMUL, MVT::f64, Expand);
 
+    setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+    setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+
     setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
     setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
     setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
@@ -2018,7 +2082,6 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
     setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
     setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
-
   } else { // V4
     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand);
     setOperationAction(ISD::SINT_TO_FP, MVT::i64, Expand);
@@ -2052,13 +2115,20 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
 
   // Handling of indexed loads/stores: default is "expand".
   //
-  for (MVT LSXTy : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
-    setIndexedLoadAction(ISD::POST_INC, LSXTy, Legal);
-    setIndexedStoreAction(ISD::POST_INC, LSXTy, Legal);
+  for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
+    setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+    setIndexedStoreAction(ISD::POST_INC, VT, Legal);
   }
 
-  if (UseHVXDbl) {
-    for (MVT VT : {MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+  if (UseHVXSgl) {
+    for (MVT VT : {MVT::v64i8,  MVT::v32i16, MVT::v16i32, MVT::v8i64,
+                   MVT::v128i8, MVT::v64i16, MVT::v32i32, MVT::v16i64}) {
+      setIndexedLoadAction(ISD::POST_INC, VT, Legal);
+      setIndexedStoreAction(ISD::POST_INC, VT, Legal);
+    }
+  } else if (UseHVXDbl) {
+    for (MVT VT : {MVT::v128i8, MVT::v64i16,  MVT::v32i32, MVT::v16i64,
+                   MVT::v256i8, MVT::v128i16, MVT::v64i32, MVT::v32i64}) {
       setIndexedLoadAction(ISD::POST_INC, VT, Legal);
       setIndexedStoreAction(ISD::POST_INC, VT, Legal);
     }
@@ -2177,17 +2247,15 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   setLibcallName(RTLIB::SRA_I128, nullptr);
 }
 
-
 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((HexagonISD::NodeType)Opcode) {
   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
-  case HexagonISD::ARGEXTEND:     return "HexagonISD::ARGEXTEND";
   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
+  case HexagonISD::CALL:          return "HexagonISD::CALL";
+  case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
-  case HexagonISD::CALLv3nr:      return "HexagonISD::CALLv3nr";
-  case HexagonISD::CALLv3:        return "HexagonISD::CALLv3";
   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
@@ -2196,7 +2264,6 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
   case HexagonISD::EXTRACTURP:    return "HexagonISD::EXTRACTURP";
-  case HexagonISD::FCONST32:      return "HexagonISD::FCONST32";
   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
   case HexagonISD::INSERTRP:      return "HexagonISD::INSERTRP";
   case HexagonISD::JT:            return "HexagonISD::JT";
@@ -2218,6 +2285,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case HexagonISD::VCMPWGT:       return "HexagonISD::VCMPWGT";
   case HexagonISD::VCMPWGTU:      return "HexagonISD::VCMPWGTU";
   case HexagonISD::VCOMBINE:      return "HexagonISD::VCOMBINE";
+  case HexagonISD::VPACK:         return "HexagonISD::VPACK";
   case HexagonISD::VSHLH:         return "HexagonISD::VSHLH";
   case HexagonISD::VSHLW:         return "HexagonISD::VSHLW";
   case HexagonISD::VSPLATB:       return "HexagonISD::VSPLTB";
@@ -2247,12 +2315,13 @@ bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   return (VT1.getSimpleVT() == MVT::i64) && (VT2.getSimpleVT() == MVT::i32);
 }
 
-// shouldExpandBuildVectorWithShuffles
-// Should we expand the build vector with shuffles?
-bool
-HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
-                                  unsigned DefinedValues) const {
+bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const {
+  return isOperationLegalOrCustom(ISD::FMA, VT);
+}
 
+// Should we expand the build vector with shuffles?
+bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
+      unsigned DefinedValues) const {
   // Hexagon vector shuffle operates on element sizes of bytes or halfwords
   EVT EltVT = VT.getVectorElementType();
   int EltBits = EltVT.getSizeInBits();
@@ -2262,14 +2331,48 @@ HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
   return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
 }
 
-// LowerVECTOR_SHUFFLE - Lower a vector shuffle (V1, V2, V3).  V1 and
-// V2 are the two vectors to select data from, V3 is the permutation.
-static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
+static StridedLoadKind isStridedLoad(const ArrayRef<int> &Mask) {
+  int even_start = -2;
+  int odd_start = -1;
+  size_t mask_len = Mask.size();
+  for (auto idx : Mask) {
+    if ((idx - even_start) == 2)
+      even_start = idx;
+    else
+      break;
+  }
+  if (even_start == (int)(mask_len * 2) - 2)
+    return StridedLoadKind::Even;
+  for (auto idx : Mask) {
+    if ((idx - odd_start) == 2)
+      odd_start = idx;
+    else
+      break;
+  }
+  if (odd_start == (int)(mask_len * 2) - 1)
+    return StridedLoadKind::Odd;
+
+  return StridedLoadKind::NoPattern;
+}
+
+bool HexagonTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &Mask,
+      EVT VT) const {
+  if (Subtarget.useHVXOps())
+    return isStridedLoad(Mask) != StridedLoadKind::NoPattern;
+  return true;
+}
+
+// Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
+// to select data from, V3 is the permutation.
+SDValue
+HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
+      const {
   const ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op);
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   SDLoc dl(Op);
   EVT VT = Op.getValueType();
+  bool UseHVX = Subtarget.useHVXOps();
 
   if (V2.isUndef())
     V2 = V1;
@@ -2288,17 +2391,42 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
     if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR &&
         !isa<ConstantSDNode>(V1.getOperand(0))) {
       bool IsScalarToVector = true;
-      for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i)
+      for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) {
         if (!V1.getOperand(i).isUndef()) {
           IsScalarToVector = false;
           break;
         }
+      }
       if (IsScalarToVector)
         return createSplat(DAG, dl, VT, V1.getOperand(0));
     }
     return createSplat(DAG, dl, VT, DAG.getConstant(Lane, dl, MVT::i32));
   }
 
+  if (UseHVX) {
+    ArrayRef<int> Mask = SVN->getMask();
+    size_t MaskLen = Mask.size();
+    int ElemSizeInBits = VT.getScalarSizeInBits();
+    if ((Subtarget.useHVXSglOps() && (ElemSizeInBits * MaskLen) == 64 * 8) ||
+        (Subtarget.useHVXDblOps() && (ElemSizeInBits * MaskLen) == 128 * 8)) {
+      // Return 1 for odd and 2 of even
+      StridedLoadKind Pattern = isStridedLoad(Mask);
+
+      if (Pattern == StridedLoadKind::NoPattern)
+        return SDValue();
+
+      SDValue Vec0 = Op.getOperand(0);
+      SDValue Vec1 = Op.getOperand(1);
+      SDValue StridePattern = DAG.getConstant(Pattern, dl, MVT::i32);
+      SDValue Ops[] = { Vec1, Vec0, StridePattern };
+      return DAG.getNode(HexagonISD::VPACK, dl, VT, Ops);
+    }
+    // We used to assert in the "else" part here, but that is bad for Halide
+    // Halide creates intermediate double registers by interleaving two
+    // concatenated vector registers. The interleaving requires vector_shuffle
+    // nodes and we shouldn't barf on a double register result of a
+    // vector_shuffle because it is most likely an intermediate result.
+  }
   // FIXME: We need to support more general vector shuffles.  See
   // below the comment from the ARM backend that deals in the general
   // case with the vector shuffles.  For now, let expand handle these.
@@ -2321,11 +2449,12 @@ static bool isCommonSplatElement(BuildVectorSDNode *BVN) {
   return true;
 }
 
-// LowerVECTOR_SHIFT - Lower a vector shift. Try to convert
+// Lower a vector shift. Try to convert
 // <VT> = SHL/SRA/SRL <VT> by <VT> to Hexagon specific
 // <VT> = SHL/SRA/SRL <VT> by <IT/i32>.
-static SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) {
-  BuildVectorSDNode *BVN = 0;
+SDValue
+HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
+  BuildVectorSDNode *BVN = nullptr;
   SDValue V1 = Op.getOperand(0);
   SDValue V2 = Op.getOperand(1);
   SDValue V3;
@@ -2442,7 +2571,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         SDValue pack = DAG.getNode(HexagonISD::PACKHL, dl, MVT::v4i16,
                                    BVN->getOperand(1), BVN->getOperand(0));
 
-        return DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::v2i16,
+        return DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::v2i16,
                                           pack);
       }
     }
@@ -2474,6 +2603,9 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     Res = (Res << EltSize) | Val;
   }
 
+  if (Size > 64)
+    return SDValue();
+
   if (Size == 64)
     ConstVal = DAG.getConstant(Res, dl, MVT::i64);
   else
@@ -2497,7 +2629,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
         continue;
 
       if (VT.getSizeInBits() == 64 &&
-          Operand.getValueType().getSizeInBits() == 32) {
+          Operand.getValueSizeInBits() == 32) {
         SDValue C = DAG.getConstant(0, dl, MVT::i32);
         Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand);
       }
@@ -2562,7 +2694,7 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
     unsigned N = NElts-i-1;
     SDValue OpN = Op.getOperand(N);
 
-    if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) {
+    if (VT.getSizeInBits() == 64 && OpN.getValueSizeInBits() == 32) {
       SDValue C = DAG.getConstant(0, dl, MVT::i32);
       OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN);
     }
@@ -2571,16 +2703,66 @@ HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
     SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset);
     if (VT.getSizeInBits() == 32)
       V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or});
-    else
+    else if (VT.getSizeInBits() == 64)
       V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or});
+    else
+      return SDValue();
   }
 
   return DAG.getNode(ISD::BITCAST, dl, VT, V);
 }
 
 SDValue
+HexagonTargetLowering::LowerEXTRACT_SUBVECTOR_HVX(SDValue Op,
+                                                  SelectionDAG &DAG) const {
+  EVT VT = Op.getOperand(0).getValueType();
+  SDLoc dl(Op);
+  bool UseHVX = Subtarget.useHVXOps();
+  bool UseHVXSgl = Subtarget.useHVXSglOps();
+  // Just in case...
+
+  if (!VT.isVector() || !UseHVX)
+    return SDValue();
+
+  EVT ResVT = Op.getValueType();
+  unsigned ResSize = ResVT.getSizeInBits();
+  unsigned VectorSizeInBits = UseHVXSgl ? (64 * 8) : (128 * 8);
+  unsigned OpSize = VT.getSizeInBits();
+
+  // We deal only with cases where the result is the vector size
+  // and the vector operand is a double register.
+  if (!(ResVT.isByteSized() && ResSize == VectorSizeInBits) ||
+      !(VT.isByteSized() && OpSize == 2 * VectorSizeInBits))
+    return SDValue();
+
+  ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+  if (!Cst)
+    return SDValue();
+  unsigned Val = Cst->getZExtValue();
+
+  // These two will get lowered to an appropriate EXTRACT_SUBREG in ISel.
+  if (Val == 0) {
+    SDValue Vec = Op.getOperand(0);
+    return DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, ResVT, Vec);
+  }
+
+  if (ResVT.getVectorNumElements() == Val) {
+    SDValue Vec = Op.getOperand(0);
+    return DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, ResVT, Vec);
+  }
+
+  return SDValue();
+}
+
+SDValue
 HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
                                            SelectionDAG &DAG) const {
+  // If we are dealing with EXTRACT_SUBVECTOR on a HVX type, we may
+  // be able to simplify it to an EXTRACT_SUBREG.
+  if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && Subtarget.useHVXOps() &&
+      isHvxVectorType(Op.getValueType().getSimpleVT()))
+    return LowerEXTRACT_SUBVECTOR_HVX(Op, DAG);
+
   EVT VT = Op.getValueType();
   int VTN = VT.isVector() ? VT.getVectorNumElements() : 1;
   SDLoc dl(Op);
@@ -2607,27 +2789,28 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
 
     if (W == 32) {
       // Translate this node into EXTRACT_SUBREG.
-      unsigned Subreg = (X == 0) ? Hexagon::subreg_loreg : 0;
+      unsigned Subreg = (X == 0) ? Hexagon::isub_lo : 0;
 
       if (X == 0)
-        Subreg = Hexagon::subreg_loreg;
+        Subreg = Hexagon::isub_lo;
       else if (SVT == MVT::v2i32 && X == 1)
-        Subreg = Hexagon::subreg_hireg;
+        Subreg = Hexagon::isub_hi;
       else if (SVT == MVT::v4i16 && X == 2)
-        Subreg = Hexagon::subreg_hireg;
+        Subreg = Hexagon::isub_hi;
       else if (SVT == MVT::v8i8 && X == 4)
-        Subreg = Hexagon::subreg_hireg;
+        Subreg = Hexagon::isub_hi;
       else
         llvm_unreachable("Bad offset");
       N = DAG.getTargetExtractSubreg(Subreg, dl, MVT::i32, Vec);
 
-    } else if (VecVT.getSizeInBits() == 32) {
+    } else if (SVT.getSizeInBits() == 32) {
       N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i32, Ops);
-    } else {
+    } else if (SVT.getSizeInBits() == 64) {
       N = DAG.getNode(HexagonISD::EXTRACTU, dl, MVT::i64, Ops);
       if (VT.getSizeInBits() == 32)
-        N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
-    }
+        N = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, N);
+    } else
+      return SDValue();
 
     return DAG.getNode(ISD::BITCAST, dl, VT, N);
   }
@@ -2647,7 +2830,7 @@ HexagonTargetLowering::LowerEXTRACT_VECTOR(SDValue Op,
   } else {
     N = DAG.getNode(HexagonISD::EXTRACTURP, dl, MVT::i64, Ops);
     if (VT.getSizeInBits() == 32)
-      N = DAG.getTargetExtractSubreg(Hexagon::subreg_loreg, dl, MVT::i32, N);
+      N = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, N);
   }
   return DAG.getNode(ISD::BITCAST, dl, VT, N);
 }
@@ -2674,8 +2857,10 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
     SDValue N;
     if (VT.getSizeInBits() == 32)
       N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32, Ops);
-    else
+    else if (VT.getSizeInBits() == 64)
       N = DAG.getNode(HexagonISD::INSERT, dl, MVT::i64, Ops);
+    else
+      return SDValue();
 
     return DAG.getNode(ISD::BITCAST, dl, VT, N);
   }
@@ -2687,8 +2872,7 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
                                 DAG.getConstant(32, dl, MVT::i64));
   SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset);
 
-  if (VT.getSizeInBits() == 64 &&
-      Val.getValueType().getSizeInBits() == 32) {
+  if (VT.getSizeInBits() == 64 && Val.getValueSizeInBits() == 32) {
     SDValue C = DAG.getConstant(0, dl, MVT::i32);
     Val = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Val);
   }
@@ -2698,8 +2882,10 @@ HexagonTargetLowering::LowerINSERT_VECTOR(SDValue Op,
   SDValue N;
   if (VT.getSizeInBits() == 32)
     N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops);
-  else
+  else if (VT.getSizeInBits() == 64)
     N = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops);
+  else
+    return SDValue();
 
   return DAG.getNode(ISD::BITCAST, dl, VT, N);
 }
@@ -2800,20 +2986,6 @@ HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
 }
 
-MachineBasicBlock *HexagonTargetLowering::EmitInstrWithCustomInserter(
-    MachineInstr &MI, MachineBasicBlock *BB) const {
-  switch (MI.getOpcode()) {
-  case Hexagon::ALLOCA: {
-    MachineFunction *MF = BB->getParent();
-    auto *FuncInfo = MF->getInfo<HexagonMachineFunctionInfo>();
-    FuncInfo->addAllocaAdjustInst(&MI);
-    return BB;
-  }
-  default:
-    llvm_unreachable("Unexpected instr type to insert");
-  } // switch
-}
-
 //===----------------------------------------------------------------------===//
 // Inline Assembly Support
 //===----------------------------------------------------------------------===//
@@ -2832,7 +3004,7 @@ HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
   return TargetLowering::getConstraintType(Constraint);
 }
 
-std::pair<unsigned, const TargetRegisterClass *>
+std::pair<unsigned, const TargetRegisterClass*>
 HexagonTargetLowering::getRegForInlineAsmConstraint(
     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps();
@@ -2840,53 +3012,53 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
     case 'r':   // R0-R31
-       switch (VT.SimpleTy) {
-       default:
-         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
-       case MVT::i32:
-       case MVT::i16:
-       case MVT::i8:
-       case MVT::f32:
-         return std::make_pair(0U, &Hexagon::IntRegsRegClass);
-       case MVT::i64:
-       case MVT::f64:
-         return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
+      switch (VT.SimpleTy) {
+      default:
+        llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+      case MVT::i1:
+      case MVT::i8:
+      case MVT::i16:
+      case MVT::i32:
+      case MVT::f32:
+        return std::make_pair(0U, &Hexagon::IntRegsRegClass);
+      case MVT::i64:
+      case MVT::f64:
+        return std::make_pair(0U, &Hexagon::DoubleRegsRegClass);
       }
     case 'q': // q0-q3
-       switch (VT.SimpleTy) {
-       default:
-         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
-       case MVT::v1024i1:
-       case MVT::v512i1:
-       case MVT::v32i16:
-       case MVT::v16i32:
-       case MVT::v64i8:
-       case MVT::v8i64:
-         return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
-    }
+      switch (VT.SimpleTy) {
+      default:
+        llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+      case MVT::v1024i1:
+      case MVT::v512i1:
+      case MVT::v32i16:
+      case MVT::v16i32:
+      case MVT::v64i8:
+      case MVT::v8i64:
+        return std::make_pair(0U, &Hexagon::VecPredRegsRegClass);
+      }
     case 'v': // V0-V31
-       switch (VT.SimpleTy) {
-       default:
-         llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
-       case MVT::v16i32:
-       case MVT::v32i16:
-       case MVT::v64i8:
-       case MVT::v8i64:
-         return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
-       case MVT::v32i32:
-       case MVT::v64i16:
-       case MVT::v16i64:
-       case MVT::v128i8:
-         if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
-           return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
-         else
-           return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
-       case MVT::v256i8:
-       case MVT::v128i16:
-       case MVT::v64i32:
-       case MVT::v32i64:
-         return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
-       }
+      switch (VT.SimpleTy) {
+      default:
+        llvm_unreachable("getRegForInlineAsmConstraint Unhandled data type");
+      case MVT::v16i32:
+      case MVT::v32i16:
+      case MVT::v64i8:
+      case MVT::v8i64:
+        return std::make_pair(0U, &Hexagon::VectorRegsRegClass);
+      case MVT::v32i32:
+      case MVT::v64i16:
+      case MVT::v16i64:
+      case MVT::v128i8:
+        if (Subtarget.hasV60TOps() && UseHVX && UseHVXDbl)
+          return std::make_pair(0U, &Hexagon::VectorRegs128BRegClass);
+        return std::make_pair(0U, &Hexagon::VecDblRegsRegClass);
+      case MVT::v256i8:
+      case MVT::v128i16:
+      case MVT::v64i32:
+      case MVT::v32i64:
+        return std::make_pair(0U, &Hexagon::VecDblRegs128BRegClass);
+      }
 
     default:
       llvm_unreachable("Unknown asm register class");
@@ -2908,16 +3080,30 @@ bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
                                                   const AddrMode &AM, Type *Ty,
                                                   unsigned AS) const {
-  // Allows a signed-extended 11-bit immediate field.
-  if (AM.BaseOffs <= -(1LL << 13) || AM.BaseOffs >= (1LL << 13)-1)
-    return false;
+  if (Ty->isSized()) {
+    // When LSR detects uses of the same base address to access different
+    // types (e.g. unions), it will assume a conservative type for these
+    // uses:
+    //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
+    // The type Ty passed here would then be "void". Skip the alignment
+    // checks, but do not return false right away, since that confuses
+    // LSR into crashing.
+    unsigned A = DL.getABITypeAlignment(Ty);
+    // The base offset must be a multiple of the alignment.
+    if ((AM.BaseOffs % A) != 0)
+      return false;
+    // The shifted offset must fit in 11 bits.
+    if (!isInt<11>(AM.BaseOffs >> Log2_32(A)))
+      return false;
+  }
 
   // No global is ever allowed as a base.
   if (AM.BaseGV)
     return false;
 
   int Scale = AM.Scale;
-  if (Scale < 0) Scale = -Scale;
+  if (Scale < 0)
+    Scale = -Scale;
   switch (Scale) {
   case 0:  // No scale reg, "r+i", "r", or just "i".
     break;
@@ -2934,7 +3120,6 @@ bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
   return HTM.getRelocationModel() == Reloc::Static;
 }
 
-
 /// isLegalICmpImmediate - Return true if the specified immediate is legal
 /// icmp immediate, that is the target has icmp instructions which can compare
 /// a register against the immediate without having to materialize the
@@ -2966,14 +3151,20 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
   // ***************************************************************************
 
   // If this is a tail call via a function pointer, then don't do it!
-  if (!(isa<GlobalAddressSDNode>(Callee)) &&
-      !(isa<ExternalSymbolSDNode>(Callee))) {
+  if (!isa<GlobalAddressSDNode>(Callee) &&
+      !isa<ExternalSymbolSDNode>(Callee)) {
     return false;
   }
 
-  // Do not optimize if the calling conventions do not match.
-  if (!CCMatch)
-    return false;
+  // Do not optimize if the calling conventions do not match and the conventions
+  // used are not C or Fast.
+  if (!CCMatch) {
+    bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
+    bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
+    // If R & E, then ok.
+    if (!R || !E)
+      return false;
+  }
 
   // Do not tail call optimize vararg calls.
   if (isVarArg)
@@ -2991,18 +3182,33 @@ bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
   return true;
 }
 
-// Return true when the given node fits in a positive half word.
-bool llvm::isPositiveHalfWord(SDNode *N) {
-  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N);
-  if (CN && CN->getSExtValue() > 0 && isInt<16>(CN->getSExtValue()))
-    return true;
+/// Returns the target specific optimal type for load and store operations as
+/// a result of memset, memcpy, and memmove lowering.
+///
+/// If DstAlign is zero that means it's safe to destination alignment can
+/// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
+/// a need to check it against alignment requirement, probably because the
+/// source does not need to be loaded. If 'IsMemset' is true, that means it's
+/// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
+/// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
+/// does not need to be loaded.  It returns EVT::Other if the type should be
+/// determined using generic target-independent logic.
+EVT HexagonTargetLowering::getOptimalMemOpType(uint64_t Size,
+      unsigned DstAlign, unsigned SrcAlign, bool IsMemset, bool ZeroMemset,
+      bool MemcpyStrSrc, MachineFunction &MF) const {
+
+  auto Aligned = [](unsigned GivenA, unsigned MinA) -> bool {
+    return (GivenA % MinA) == 0;
+  };
 
-  switch (N->getOpcode()) {
-  default:
-    return false;
-  case ISD::SIGN_EXTEND_INREG:
-    return true;
-  }
+  if (Size >= 8 && Aligned(DstAlign, 8) && (IsMemset || Aligned(SrcAlign, 8)))
+    return MVT::i64;
+  if (Size >= 4 && Aligned(DstAlign, 4) && (IsMemset || Aligned(SrcAlign, 4)))
+    return MVT::i32;
+  if (Size >= 2 && Aligned(DstAlign, 2) && (IsMemset || Aligned(SrcAlign, 2)))
+    return MVT::i16;
+
+  return MVT::Other;
 }
 
 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
@@ -3030,7 +3236,6 @@ bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
   return false;
 }
 
-
 std::pair<const TargetRegisterClass*, uint8_t>
 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
       MVT VT) const {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
index 71f67349befe..a8ed29e585d4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonISelLowering.h
@@ -16,30 +16,33 @@
 #define LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
 
 #include "Hexagon.h"
-#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/CallingConv.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/Target/TargetLowering.h"
+#include <cstdint>
+#include <utility>
 
 namespace llvm {
 
-// Return true when the given node fits in a positive half word.
-bool isPositiveHalfWord(SDNode *N);
+namespace HexagonISD {
 
-  namespace HexagonISD {
     enum NodeType : unsigned {
       OP_BEGIN = ISD::BUILTIN_OP_END,
 
       CONST32 = OP_BEGIN,
       CONST32_GP,  // For marking data present in GP.
-      FCONST32,
       ALLOCA,
-      ARGEXTEND,
 
       AT_GOT,      // Index in GOT.
       AT_PCREL,    // Offset relative to PC.
 
-      CALLv3,      // A V3+ call instruction.
-      CALLv3nr,    // A V3+ call instruction that doesn't return.
+      CALL,        // Function call.
+      CALLnr,      // Function call that does not return.
       CALLR,
 
       RET_FLAG,    // Return with a flag operand.
@@ -79,24 +82,26 @@ bool isPositiveHalfWord(SDNode *N);
       EXTRACTU,
       EXTRACTURP,
       VCOMBINE,
+      VPACK,
       TC_RETURN,
       EH_RETURN,
       DCFETCH,
 
       OP_END
     };
-  }
+
+} // end namespace HexagonISD
 
   class HexagonSubtarget;
 
   class HexagonTargetLowering : public TargetLowering {
     int VarArgsFrameOffset;   // Frame offset to start of varargs area.
+    const HexagonTargetMachine &HTM;
+    const HexagonSubtarget &Subtarget;
 
     bool CanReturnSmallStruct(const Function* CalleeFn, unsigned& RetSize)
         const;
     void promoteLdStType(MVT VT, MVT PromotedLdStVT);
-    const HexagonTargetMachine &HTM;
-    const HexagonSubtarget &Subtarget;
 
   public:
     explicit HexagonTargetLowering(const TargetMachine &TM,
@@ -116,15 +121,27 @@ bool isPositiveHalfWord(SDNode *N);
 
     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
 
+    /// Return true if an FMA operation is faster than a pair of mul and add
+    /// instructions. fmuladd intrinsics will be expanded to FMAs when this
+    /// method returns true (and FMAs are legal), otherwise fmuladd is
+    /// expanded to mul + add.
+    bool isFMAFasterThanFMulAndFAdd(EVT) const override;
+
     // Should we expand the build vector with shuffles?
     bool shouldExpandBuildVectorWithShuffles(EVT VT,
         unsigned DefinedValues) const override;
 
+    bool isShuffleMaskLegal(const SmallVectorImpl<int> &Mask, EVT VT)
+        const override;
+
     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
     const char *getTargetNodeName(unsigned Opcode) const override;
     SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEXTRACT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerEXTRACT_SUBVECTOR_HVX(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINSERT_VECTOR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
@@ -174,9 +191,6 @@ bool isPositiveHalfWord(SDNode *N);
                         const SDLoc &dl, SelectionDAG &DAG) const override;
 
     bool mayBeEmittedAsTailCall(CallInst *CI) const override;
-    MachineBasicBlock *
-    EmitInstrWithCustomInserter(MachineInstr &MI,
-                                MachineBasicBlock *BB) const override;
 
     /// If a physical register, this returns the register that receives the
     /// exception address on entry to an EH pad.
@@ -195,6 +209,7 @@ bool isPositiveHalfWord(SDNode *N);
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
+
     EVT getSetCCResultType(const DataLayout &, LLVMContext &C,
                            EVT VT) const override {
       if (!VT.isVector())
@@ -243,6 +258,10 @@ bool isPositiveHalfWord(SDNode *N);
     /// the immediate into a register.
     bool isLegalICmpImmediate(int64_t Imm) const override;
 
+    EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign,
+        unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+        MachineFunction &MF) const override;
+
     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace,
         unsigned Align, bool *Fast) const override;
 
@@ -269,6 +288,7 @@ bool isPositiveHalfWord(SDNode *N);
     findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT)
         const override;
   };
+
 } // end namespace llvm
 
-#endif    // Hexagon_ISELLOWERING_H
+#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
index 9cbeae7c67c8..7283d94ee759 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrAlias.td
@@ -63,34 +63,34 @@ def : InstAlias<"memw($Rs) = $Rt.new",
       (S2_storerinew_io IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
 
 def : InstAlias<"memb($Rs) = #$S8",
-      (S4_storeirb_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+      (S4_storeirb_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
 
 def : InstAlias<"memh($Rs) = #$S8",
-      (S4_storeirh_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+      (S4_storeirh_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
 
 def : InstAlias<"memw($Rs) = #$S8",
-      (S4_storeiri_io IntRegs:$Rs, 0, s8Ext:$S8), 0>;
+      (S4_storeiri_io IntRegs:$Rs, 0, s8_0Ext:$S8), 0>;
 
 def : InstAlias<"memd($Rs) = $Rtt",
       (S2_storerd_io IntRegs:$Rs, 0, DoubleRegs:$Rtt), 0>;
 
 def : InstAlias<"memb($Rs) = setbit(#$U5)",
-      (L4_ior_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+      (L4_ior_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
 
 def : InstAlias<"memh($Rs) = setbit(#$U5)",
-      (L4_ior_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+      (L4_ior_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
 
 def : InstAlias<"memw($Rs) = setbit(#$U5)",
-      (L4_ior_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+      (L4_ior_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
 
 def : InstAlias<"memb($Rs) = clrbit(#$U5)",
-      (L4_iand_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+      (L4_iand_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
 
 def : InstAlias<"memh($Rs) = clrbit(#$U5)",
-      (L4_iand_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+      (L4_iand_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
 
 def : InstAlias<"memw($Rs) = clrbit(#$U5)",
-      (L4_iand_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>;
+      (L4_iand_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>;
 
 // Alias of: $Rd = memXX($Rs+#XX) to $Rd = memXX($Rs)
 def : InstAlias<"$Rd = memb($Rs)",
@@ -241,40 +241,40 @@ def : InstAlias<"if (!$Pt.new) memw($Rs) = $Rt.new",
       (S4_pstorerinewfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, IntRegs:$Rt), 0>;
 
 def : InstAlias<"if ($Pt) memb($Rs) = #$S6",
-      (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirbt_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if ($Pt) memh($Rs) = #$S6",
-      (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirht_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if ($Pt) memw($Rs) = #$S6",
-      (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirit_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if ($Pt.new) memb($Rs) = #$S6",
-      (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirbtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if ($Pt.new) memh($Rs) = #$S6",
-      (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirhtnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if ($Pt.new) memw($Rs) = #$S6",
-      (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeiritnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if (!$Pt) memb($Rs) = #$S6",
-      (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirbf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if (!$Pt) memh($Rs) = #$S6",
-      (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirhf_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if (!$Pt) memw($Rs) = #$S6",
-      (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirif_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if (!$Pt.new) memb($Rs) = #$S6",
-      (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirbfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if (!$Pt.new) memh($Rs) = #$S6",
-      (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirhfnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 def : InstAlias<"if (!$Pt.new) memw($Rs) = #$S6",
-      (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6Ext:$S6), 0>;
+      (S4_storeirifnew_io PredRegs:$Pt, IntRegs:$Rs, 0, s6_0Ext:$S6), 0>;
 
 // Alias of: memXX($Rs + $u6_X) |= $Rt, also &=, +=, -=
 //       to: memXX($Rs) |= $Rt
@@ -295,11 +295,11 @@ def : InstAlias<"memb($Rs) -= $Rt",
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memb($Rs) += #$U5",
-      (L4_iadd_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      (L4_iadd_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memb($Rs) -= #$U5",
-      (L4_isub_memopb_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      (L4_isub_memopb_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memh($Rs) &= $Rt",
@@ -319,11 +319,11 @@ def : InstAlias<"memh($Rs) -= $Rt",
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memh($Rs) += #$U5",
-      (L4_iadd_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      (L4_iadd_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memh($Rs) -= #$U5",
-      (L4_isub_memoph_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      (L4_isub_memoph_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memw($Rs) &= $Rt",
@@ -343,11 +343,11 @@ def : InstAlias<"memw($Rs) -= $Rt",
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memw($Rs) += #$U5",
-      (L4_iadd_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      (L4_iadd_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
       Requires<[UseMEMOP]>;
 
 def : InstAlias<"memw($Rs) -= #$U5",
-      (L4_isub_memopw_io IntRegs:$Rs, 0, u5Imm:$U5), 0>,
+      (L4_isub_memopw_io IntRegs:$Rs, 0, u5_0Imm:$U5), 0>,
       Requires<[UseMEMOP]>;
 
 //
@@ -492,12 +492,10 @@ def : InstAlias<"if ($src1) jumpr $src2",
 def : InstAlias<"if (!$src1) jumpr $src2",
       (J2_jumprf PredRegs:$src1, IntRegs:$src2), 0>;
 
-// V6_vassignp: Vector assign mapping.
-let hasNewValue = 1, opNewValue = 0, isAsmParserOnly = 1 in
-def HEXAGON_V6_vassignpair: CVI_VA_DV_Resource <
-  (outs VecDblRegs:$Vdd),
-  (ins VecDblRegs:$Vss),
-  "$Vdd = $Vss">;
+// maps Vdd = Vss to Vdd = V6_vassignp(Vss)
+def : InstAlias<"$Vdd = $Vss",
+      (V6_vassignp VecDblRegs:$Vdd, VecDblRegs:$Vss)>,
+      Requires<[HasV60T]>;
 
 // maps Vd = #0 to Vd = vxor(Vd, Vd)
 def : InstAlias<"$Vd = #0",
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
index 0bfb04447f2f..fa3cccbd0879 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormats.td
@@ -54,7 +54,7 @@ class MemAccessSize<bits<4> value> {
   bits<4> Value = value;
 }
 
-def NoMemAccess      : MemAccessSize<0>;// Not a memory acces instruction.
+def NoMemAccess      : MemAccessSize<0>;// Not a memory access instruction.
 def ByteAccess       : MemAccessSize<1>;// Byte access instruction (memb).
 def HalfWordAccess   : MemAccessSize<2>;// Half word access instruction (memh).
 def WordAccess       : MemAccessSize<3>;// Word access instruction (memw).
@@ -179,6 +179,9 @@ class InstHexagon<dag outs, dag ins, string asmstr, list<dag> pattern,
   bits<1> isAccumulator = 0;
   let TSFlags{54} = isAccumulator;
 
+  bit cofMax1 = 0;
+  let TSFlags{60} = cofMax1;
+
   // Fields used for relation models.
   bit isNonTemporal = 0;
   string isNT = ""; // set to "true" for non-temporal vector stores.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
index e17f71fe4e6a..493d04703da9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV4.td
@@ -17,7 +17,7 @@
 //                        *** Must match BaseInfo.h ***
 //----------------------------------------------------------------------------//
 
-def TypeMEMOP    : IType<9>;
+def TypeV4LDST    : IType<9>;
 def TypeNV       : IType<10>;
 def TypeDUPLEX   : IType<11>;
 def TypeCOMPOUND : IType<12>;
@@ -132,7 +132,7 @@ class NCJInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
 let mayLoad = 1, mayStore = 1 in
 class MEMInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
               string cstr = "", InstrItinClass itin = V4LDST_tc_st_SLOT0>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeMEMOP>,
+  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, TypeV4LDST>,
     OpcodeHexagon;
 
 class MEMInst_V4<dag outs, dag ins, string asmstr, list<dag> pattern = [],
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
index f3d43dec733e..b9f4373a0b79 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrFormatsV60.td
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 //----------------------------------------------------------------------------//
-//                         Hexagon Intruction Flags +
+//                         Hexagon Instruction Flags +
 //
 //                        *** Must match BaseInfo.h ***
 //----------------------------------------------------------------------------//
@@ -34,7 +34,7 @@ def TypeCVI_VM_NEW_ST  : IType<26>;
 def TypeCVI_VM_STU     : IType<27>;
 def TypeCVI_HIST       : IType<28>;
 //----------------------------------------------------------------------------//
-//                         Intruction Classes Definitions +
+//                         Instruction Classes Definitions +
 //----------------------------------------------------------------------------//
 
 let validSubTargets = HasV60SubT in
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index fe9f97d1d5e7..34ce3e652995 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -11,18 +11,20 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "HexagonHazardRecognizer.h"
 #include "HexagonInstrInfo.h"
-#include "Hexagon.h"
 #include "HexagonRegisterInfo.h"
 #include "HexagonSubtarget.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -39,8 +41,6 @@ using namespace llvm;
 #include "HexagonGenInstrInfo.inc"
 #include "HexagonGenDFAPacketizer.inc"
 
-using namespace llvm;
-
 cl::opt<bool> ScheduleInlineAsm("hexagon-sched-inline-asm", cl::Hidden,
   cl::init(false), cl::desc("Do not consider inline-asm a scheduling/"
                             "packetization boundary."));
@@ -67,6 +67,10 @@ static cl::opt<bool> EnableACCForwarding(
 static cl::opt<bool> BranchRelaxAsmLarge("branch-relax-asm-large",
   cl::init(true), cl::Hidden, cl::ZeroOrMore, cl::desc("branch relax asm"));
 
+static cl::opt<bool> UseDFAHazardRec("dfa-hazard-rec",
+  cl::init(true), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Use the DFA based hazard recognizer."));
+
 ///
 /// Constants for Hexagon instructions.
 ///
@@ -112,8 +116,8 @@ static bool isIntRegForSubInst(unsigned Reg) {
 
 
 static bool isDblRegForSubInst(unsigned Reg, const HexagonRegisterInfo &HRI) {
-  return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_loreg)) &&
-         isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::subreg_hireg));
+  return isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_lo)) &&
+         isIntRegForSubInst(HRI.getSubReg(Reg, Hexagon::isub_hi));
 }
 
 
@@ -174,13 +178,13 @@ static MachineInstr *findLoopInstr(MachineBasicBlock *BB, int EndLoopOp,
 /// Gather register def/uses from MI.
 /// This treats possible (predicated) defs as actually happening ones
 /// (conservatively).
-static inline void parseOperands(const MachineInstr *MI,
+static inline void parseOperands(const MachineInstr &MI,
       SmallVector<unsigned, 4> &Defs, SmallVector<unsigned, 8> &Uses) {
   Defs.clear();
   Uses.clear();
 
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
 
     if (!MO.isReg())
       continue;
@@ -236,10 +240,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   switch (MI.getOpcode()) {
   default:
     break;
-  case Hexagon::L2_loadrb_io:
-  case Hexagon::L2_loadrub_io:
-  case Hexagon::L2_loadrh_io:
-  case Hexagon::L2_loadruh_io:
   case Hexagon::L2_loadri_io:
   case Hexagon::L2_loadrd_io:
   case Hexagon::V6_vL32b_ai:
@@ -248,14 +248,10 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   case Hexagon::V6_vL32Ub_ai_128B:
   case Hexagon::LDriw_pred:
   case Hexagon::LDriw_mod:
-  case Hexagon::LDriq_pred_V6:
-  case Hexagon::LDriq_pred_vec_V6:
-  case Hexagon::LDriv_pseudo_V6:
-  case Hexagon::LDrivv_pseudo_V6:
-  case Hexagon::LDriq_pred_V6_128B:
-  case Hexagon::LDriq_pred_vec_V6_128B:
-  case Hexagon::LDriv_pseudo_V6_128B:
-  case Hexagon::LDrivv_pseudo_V6_128B: {
+  case Hexagon::PS_vloadrq_ai:
+  case Hexagon::PS_vloadrw_ai:
+  case Hexagon::PS_vloadrq_ai_128B:
+  case Hexagon::PS_vloadrw_ai_128B: {
     const MachineOperand OpFI = MI.getOperand(1);
     if (!OpFI.isFI())
       return 0;
@@ -266,14 +262,6 @@ unsigned HexagonInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
     return MI.getOperand(0).getReg();
   }
 
-  case Hexagon::L2_ploadrbt_io:
-  case Hexagon::L2_ploadrbf_io:
-  case Hexagon::L2_ploadrubt_io:
-  case Hexagon::L2_ploadrubf_io:
-  case Hexagon::L2_ploadrht_io:
-  case Hexagon::L2_ploadrhf_io:
-  case Hexagon::L2_ploadruht_io:
-  case Hexagon::L2_ploadruhf_io:
   case Hexagon::L2_ploadrit_io:
   case Hexagon::L2_ploadrif_io:
   case Hexagon::L2_ploadrdt_io:
@@ -313,14 +301,10 @@ unsigned HexagonInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   case Hexagon::V6_vS32Ub_ai_128B:
   case Hexagon::STriw_pred:
   case Hexagon::STriw_mod:
-  case Hexagon::STriq_pred_V6:
-  case Hexagon::STriq_pred_vec_V6:
-  case Hexagon::STriv_pseudo_V6:
-  case Hexagon::STrivv_pseudo_V6:
-  case Hexagon::STriq_pred_V6_128B:
-  case Hexagon::STriq_pred_vec_V6_128B:
-  case Hexagon::STriv_pseudo_V6_128B:
-  case Hexagon::STrivv_pseudo_V6_128B: {
+  case Hexagon::PS_vstorerq_ai:
+  case Hexagon::PS_vstorerw_ai:
+  case Hexagon::PS_vstorerq_ai_128B:
+  case Hexagon::PS_vstorerw_ai_128B: {
     const MachineOperand &OpFI = MI.getOperand(0);
     if (!OpFI.isFI())
       return 0;
@@ -455,7 +439,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     return true;
 
   bool LastOpcodeHasJMP_c = PredOpcodeHasJMP_c(LastOpcode);
-  bool LastOpcodeHasNVJump = isNewValueJump(LastInst);
+  bool LastOpcodeHasNVJump = isNewValueJump(*LastInst);
 
   if (LastOpcodeHasJMP_c && !LastInst->getOperand(1).isMBB())
     return true;
@@ -493,7 +477,7 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   }
 
   bool SecLastOpcodeHasJMP_c = PredOpcodeHasJMP_c(SecLastOpcode);
-  bool SecLastOpcodeHasNVJump = isNewValueJump(SecondLastInst);
+  bool SecLastOpcodeHasNVJump = isNewValueJump(*SecondLastInst);
   if (SecLastOpcodeHasJMP_c && (LastOpcode == Hexagon::J2_jump)) {
     if (!SecondLastInst->getOperand(1).isMBB())
       return true;
@@ -541,7 +525,10 @@ bool HexagonInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
 }
 
 
-unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned HexagonInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                        int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   DEBUG(dbgs() << "\nRemoving branches out of BB#" << MBB.getNumber());
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
@@ -561,17 +548,19 @@ unsigned HexagonInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return Count;
 }
 
-unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned HexagonInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL) const {
+                                        const DebugLoc &DL,
+                                        int *BytesAdded) const {
   unsigned BOpc   = Hexagon::J2_jump;
   unsigned BccOpc = Hexagon::J2_jumpt;
   assert(validateBranchCond(Cond) && "Invalid branching condition");
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
+  assert(!BytesAdded && "code size not handled");
 
-  // Check if ReverseBranchCondition has asked to reverse this branch
+  // Check if reverseBranchCondition has asked to reverse this branch
   // If we want to reverse the branch an odd number of times, we want
   // J2_jumpf.
   if (!Cond.empty() && Cond[0].isImm())
@@ -587,13 +576,11 @@ unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
       SmallVector<MachineOperand, 4> Cond;
       auto Term = MBB.getFirstTerminator();
       if (Term != MBB.end() && isPredicated(*Term) &&
-          !analyzeBranch(MBB, NewTBB, NewFBB, Cond, false)) {
-        MachineBasicBlock *NextBB = &*++MBB.getIterator();
-        if (NewTBB == NextBB) {
-          ReverseBranchCondition(Cond);
-          RemoveBranch(MBB);
-          return InsertBranch(MBB, TBB, nullptr, Cond, DL);
-        }
+          !analyzeBranch(MBB, NewTBB, NewFBB, Cond, false) &&
+          MachineFunction::iterator(NewTBB) == ++MBB.getIterator()) {
+        reverseBranchCondition(Cond);
+        removeBranch(MBB);
+        return insertBranch(MBB, TBB, nullptr, Cond, DL);
       }
       BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);
     } else if (isEndLoopN(Cond[0].getImm())) {
@@ -657,6 +644,85 @@ unsigned HexagonInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
+/// Analyze the loop code to find the loop induction variable and compare used
+/// to compute the number of iterations. Currently, we analyze loop that are
+/// controlled using hardware loops.  In this case, the induction variable
+/// instruction is null.  For all other cases, this function returns true, which
+/// means we're unable to analyze it.
+bool HexagonInstrInfo::analyzeLoop(MachineLoop &L,
+                                   MachineInstr *&IndVarInst,
+                                   MachineInstr *&CmpInst) const {
+
+  MachineBasicBlock *LoopEnd = L.getBottomBlock();
+  MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator();
+  // We really "analyze" only hardware loops right now.
+  if (I != LoopEnd->end() && isEndLoopN(I->getOpcode())) {
+    IndVarInst = nullptr;
+    CmpInst = &*I;
+    return false;
+  }
+  return true;
+}
+
+/// Generate code to reduce the loop iteration by one and check if the loop is
+/// finished. Return the value/register of the new loop count. this function
+/// assumes the nth iteration is peeled first.
+unsigned HexagonInstrInfo::reduceLoopCount(MachineBasicBlock &MBB,
+      MachineInstr *IndVar, MachineInstr &Cmp,
+      SmallVectorImpl<MachineOperand> &Cond,
+      SmallVectorImpl<MachineInstr *> &PrevInsts,
+      unsigned Iter, unsigned MaxIter) const {
+  // We expect a hardware loop currently. This means that IndVar is set
+  // to null, and the compare is the ENDLOOP instruction.
+  assert((!IndVar) && isEndLoopN(Cmp.getOpcode())
+                   && "Expecting a hardware loop");
+  MachineFunction *MF = MBB.getParent();
+  DebugLoc DL = Cmp.getDebugLoc();
+  SmallPtrSet<MachineBasicBlock *, 8> VisitedBBs;
+  MachineInstr *Loop = findLoopInstr(&MBB, Cmp.getOpcode(), VisitedBBs);
+  if (!Loop)
+    return 0;
+  // If the loop trip count is a compile-time value, then just change the
+  // value.
+  if (Loop->getOpcode() == Hexagon::J2_loop0i ||
+      Loop->getOpcode() == Hexagon::J2_loop1i) {
+    int64_t Offset = Loop->getOperand(1).getImm();
+    if (Offset <= 1)
+      Loop->eraseFromParent();
+    else
+      Loop->getOperand(1).setImm(Offset - 1);
+    return Offset - 1;
+  }
+  // The loop trip count is a run-time value. We generate code to subtract
+  // one from the trip count, and update the loop instruction.
+  assert(Loop->getOpcode() == Hexagon::J2_loop0r && "Unexpected instruction");
+  unsigned LoopCount = Loop->getOperand(1).getReg();
+  // Check if we're done with the loop.
+  unsigned LoopEnd = createVR(MF, MVT::i1);
+  MachineInstr *NewCmp = BuildMI(&MBB, DL, get(Hexagon::C2_cmpgtui), LoopEnd).
+    addReg(LoopCount).addImm(1);
+  unsigned NewLoopCount = createVR(MF, MVT::i32);
+  MachineInstr *NewAdd = BuildMI(&MBB, DL, get(Hexagon::A2_addi), NewLoopCount).
+    addReg(LoopCount).addImm(-1);
+  // Update the previously generated instructions with the new loop counter.
+  for (SmallVectorImpl<MachineInstr *>::iterator I = PrevInsts.begin(),
+         E = PrevInsts.end(); I != E; ++I)
+    (*I)->substituteRegister(LoopCount, NewLoopCount, 0, getRegisterInfo());
+  PrevInsts.clear();
+  PrevInsts.push_back(NewCmp);
+  PrevInsts.push_back(NewAdd);
+  // Insert the new loop instruction if this is the last time the loop is
+  // decremented.
+  if (Iter == MaxIter)
+    BuildMI(&MBB, DL, get(Hexagon::J2_loop0r)).
+      addMBB(Loop->getOperand(0).getMBB()).addReg(NewLoopCount);
+  // Delete the old loop instruction.
+  if (Iter == 0)
+    Loop->eraseFromParent();
+  Cond.push_back(MachineOperand::CreateImm(Hexagon::J2_jumpf));
+  Cond.push_back(NewCmp->getOperand(0));
+  return NewLoopCount;
+}
 
 bool HexagonInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
       unsigned NumCycles, unsigned ExtraPredCycles,
@@ -743,9 +809,11 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
   if (Hexagon::VecDblRegsRegClass.contains(SrcReg, DestReg)) {
+    unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+    unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
     BuildMI(MBB, I, DL, get(Hexagon::V6_vcombine), DestReg)
-      .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), KillFlag)
-      .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), KillFlag);
+      .addReg(HiSrc, KillFlag)
+      .addReg(LoSrc, KillFlag);
     return;
   }
   if (Hexagon::VecPredRegsRegClass.contains(SrcReg, DestReg)) {
@@ -765,12 +833,14 @@ void HexagonInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
   if (Hexagon::VecPredRegs128BRegClass.contains(SrcReg, DestReg)) {
-    unsigned DstHi = HRI.getSubReg(DestReg, Hexagon::subreg_hireg);
-    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DstHi)
-      .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_hireg), KillFlag);
-    unsigned DstLo = HRI.getSubReg(DestReg, Hexagon::subreg_loreg);
-    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), DstLo)
-      .addReg(HRI.getSubReg(SrcReg, Hexagon::subreg_loreg), KillFlag);
+    unsigned HiDst = HRI.getSubReg(DestReg, Hexagon::vsub_hi);
+    unsigned LoDst = HRI.getSubReg(DestReg, Hexagon::vsub_lo);
+    unsigned HiSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+    unsigned LoSrc = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), HiDst)
+      .addReg(HiSrc, KillFlag);
+    BuildMI(MBB, I, DL, get(Hexagon::V6_pred_and), LoDst)
+      .addReg(LoSrc, KillFlag);
     return;
   }
 
@@ -789,7 +859,7 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBB.findDebugLoc(I);
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
   unsigned KillFlag = getKillRegState(isKill);
 
@@ -814,31 +884,35 @@ void HexagonInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VecPredRegs128BRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::STriq_pred_V6_128B))
+    BuildMI(MBB, I, DL, get(Hexagon::PS_vstorerq_ai_128B))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VecPredRegsRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::STriq_pred_V6))
+    BuildMI(MBB, I, DL, get(Hexagon::PS_vstorerq_ai))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating 128B vector spill");
-    BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6_128B))
+    unsigned Opc = Align < 128 ? Hexagon::V6_vS32Ub_ai_128B
+                               : Hexagon::V6_vS32b_ai_128B;
+    BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating vector spill");
-    BuildMI(MBB, I, DL, get(Hexagon::STriv_pseudo_V6))
+    unsigned Opc = Align < 64 ? Hexagon::V6_vS32Ub_ai
+                              : Hexagon::V6_vS32b_ai;
+    BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating double vector spill");
-    BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6))
+    unsigned Opc = Align < 64 ? Hexagon::PS_vstorerwu_ai
+                              : Hexagon::PS_vstorerw_ai;
+    BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating 128B double vector spill");
-    BuildMI(MBB, I, DL, get(Hexagon::STrivv_pseudo_V6_128B))
+    unsigned Opc = Align < 128 ? Hexagon::PS_vstorerwu_ai_128B
+                               : Hexagon::PS_vstorerw_ai_128B;
+    BuildMI(MBB, I, DL, get(Opc))
       .addFrameIndex(FI).addImm(0)
       .addReg(SrcReg, KillFlag).addMemOperand(MMO);
   } else {
@@ -852,7 +926,7 @@ void HexagonInstrInfo::loadRegFromStackSlot(
     const TargetRegisterInfo *TRI) const {
   DebugLoc DL = MBB.findDebugLoc(I);
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
@@ -872,26 +946,30 @@ void HexagonInstrInfo::loadRegFromStackSlot(
     BuildMI(MBB, I, DL, get(Hexagon::LDriw_mod), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VecPredRegs128BRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6_128B), DestReg)
+    BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai_128B), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VecPredRegsRegClass.hasSubClassEq(RC)) {
-    BuildMI(MBB, I, DL, get(Hexagon::LDriq_pred_V6), DestReg)
+    BuildMI(MBB, I, DL, get(Hexagon::PS_vloadrq_ai), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegs128BRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating 128B double vector restore");
-    BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6_128B), DestReg)
+    unsigned Opc = Align < 128 ? Hexagon::PS_vloadrwu_ai_128B
+                               : Hexagon::PS_vloadrw_ai_128B;
+    BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VectorRegs128BRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating 128B vector restore");
-    BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6_128B), DestReg)
+    unsigned Opc = Align < 128 ? Hexagon::V6_vL32Ub_ai_128B
+                               : Hexagon::V6_vL32b_ai_128B;
+    BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VectorRegsRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating vector restore");
-    BuildMI(MBB, I, DL, get(Hexagon::LDriv_pseudo_V6), DestReg)
+    unsigned Opc = Align < 64 ? Hexagon::V6_vL32Ub_ai
+                              : Hexagon::V6_vL32b_ai;
+    BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else if (Hexagon::VecDblRegsRegClass.hasSubClassEq(RC)) {
-    DEBUG(dbgs() << "++Generating double vector restore");
-    BuildMI(MBB, I, DL, get(Hexagon::LDrivv_pseudo_V6), DestReg)
+    unsigned Opc = Align < 64 ? Hexagon::PS_vloadrwu_ai
+                              : Hexagon::PS_vloadrw_ai;
+    BuildMI(MBB, I, DL, get(Opc), DestReg)
       .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
   } else {
     llvm_unreachable("Can't store this register to stack slot");
@@ -899,6 +977,14 @@ void HexagonInstrInfo::loadRegFromStackSlot(
 }
 
 
+static void getLiveRegsAt(LivePhysRegs &Regs, const MachineInstr &MI) {
+  const MachineBasicBlock &B = *MI.getParent();
+  Regs.addLiveOuts(B);
+  auto E = ++MachineBasicBlock::const_iterator(MI.getIterator()).getReverse();
+  for (auto I = B.rbegin(); I != E; ++I)
+    Regs.stepBackward(*I);
+}
+
 /// expandPostRAPseudo - This function is called for all pseudo instructions
 /// that remain after register allocation. Many pseudo instructions are
 /// created to help register allocation. This is the place to convert them
@@ -912,7 +998,6 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   DebugLoc DL = MI.getDebugLoc();
   unsigned Opc = MI.getOpcode();
   const unsigned VecOffset = 1;
-  bool Is128B = false;
 
   switch (Opc) {
     case TargetOpcode::COPY: {
@@ -926,58 +1011,71 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MBB.erase(MBBI);
       return true;
     }
-    case Hexagon::ALIGNA:
+    case Hexagon::PS_aligna:
       BuildMI(MBB, MI, DL, get(Hexagon::A2_andir), MI.getOperand(0).getReg())
           .addReg(HRI.getFrameRegister())
           .addImm(-MI.getOperand(1).getImm());
       MBB.erase(MI);
       return true;
-    case Hexagon::HEXAGON_V6_vassignp_128B:
-    case Hexagon::HEXAGON_V6_vassignp: {
+    case Hexagon::V6_vassignp_128B:
+    case Hexagon::V6_vassignp: {
       unsigned SrcReg = MI.getOperand(1).getReg();
       unsigned DstReg = MI.getOperand(0).getReg();
-      if (SrcReg != DstReg)
-        copyPhysReg(MBB, MI, DL, DstReg, SrcReg, MI.getOperand(1).isKill());
+      unsigned Kill = getKillRegState(MI.getOperand(1).isKill());
+      BuildMI(MBB, MI, DL, get(Hexagon::V6_vcombine), DstReg)
+        .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_hi), Kill)
+        .addReg(HRI.getSubReg(SrcReg, Hexagon::vsub_lo), Kill);
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::HEXAGON_V6_lo_128B:
-    case Hexagon::HEXAGON_V6_lo: {
+    case Hexagon::V6_lo_128B:
+    case Hexagon::V6_lo: {
       unsigned SrcReg = MI.getOperand(1).getReg();
       unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
+      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
       copyPhysReg(MBB, MI, DL, DstReg, SrcSubLo, MI.getOperand(1).isKill());
       MBB.erase(MI);
       MRI.clearKillFlags(SrcSubLo);
       return true;
     }
-    case Hexagon::HEXAGON_V6_hi_128B:
-    case Hexagon::HEXAGON_V6_hi: {
+    case Hexagon::V6_hi_128B:
+    case Hexagon::V6_hi: {
       unsigned SrcReg = MI.getOperand(1).getReg();
       unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
+      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
       copyPhysReg(MBB, MI, DL, DstReg, SrcSubHi, MI.getOperand(1).isKill());
       MBB.erase(MI);
       MRI.clearKillFlags(SrcSubHi);
       return true;
     }
-    case Hexagon::STrivv_indexed_128B:
-      Is128B = true;
-    case Hexagon::STrivv_indexed: {
+    case Hexagon::PS_vstorerw_ai:
+    case Hexagon::PS_vstorerwu_ai:
+    case Hexagon::PS_vstorerw_ai_128B:
+    case Hexagon::PS_vstorerwu_ai_128B: {
+      bool Is128B = (Opc == Hexagon::PS_vstorerw_ai_128B ||
+                     Opc == Hexagon::PS_vstorerwu_ai_128B);
+      bool Aligned = (Opc == Hexagon::PS_vstorerw_ai ||
+                      Opc == Hexagon::PS_vstorerw_ai_128B);
       unsigned SrcReg = MI.getOperand(2).getReg();
-      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::subreg_hireg);
-      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::subreg_loreg);
-      unsigned NewOpcd = Is128B ? Hexagon::V6_vS32b_ai_128B
-                                : Hexagon::V6_vS32b_ai;
+      unsigned SrcSubHi = HRI.getSubReg(SrcReg, Hexagon::vsub_hi);
+      unsigned SrcSubLo = HRI.getSubReg(SrcReg, Hexagon::vsub_lo);
+      unsigned NewOpc;
+      if (Aligned)
+        NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
+                        : Hexagon::V6_vS32b_ai;
+      else
+        NewOpc = Is128B ? Hexagon::V6_vS32Ub_ai_128B
+                        : Hexagon::V6_vS32Ub_ai;
+
       unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
       MachineInstr *MI1New =
-          BuildMI(MBB, MI, DL, get(NewOpcd))
+          BuildMI(MBB, MI, DL, get(NewOpc))
               .addOperand(MI.getOperand(0))
               .addImm(MI.getOperand(1).getImm())
               .addReg(SrcSubLo)
               .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
       MI1New->getOperand(0).setIsKill(false);
-      BuildMI(MBB, MI, DL, get(NewOpcd))
+      BuildMI(MBB, MI, DL, get(NewOpc))
           .addOperand(MI.getOperand(0))
           // The Vectors are indexed in multiples of vector size.
           .addImm(MI.getOperand(1).getImm() + Offset)
@@ -986,23 +1084,32 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::LDrivv_pseudo_V6_128B:
-    case Hexagon::LDrivv_indexed_128B:
-      Is128B = true;
-    case Hexagon::LDrivv_pseudo_V6:
-    case Hexagon::LDrivv_indexed: {
-      unsigned NewOpcd = Is128B ? Hexagon::V6_vL32b_ai_128B
-                                : Hexagon::V6_vL32b_ai;
+    case Hexagon::PS_vloadrw_ai:
+    case Hexagon::PS_vloadrwu_ai:
+    case Hexagon::PS_vloadrw_ai_128B:
+    case Hexagon::PS_vloadrwu_ai_128B: {
+      bool Is128B = (Opc == Hexagon::PS_vloadrw_ai_128B ||
+                     Opc == Hexagon::PS_vloadrwu_ai_128B);
+      bool Aligned = (Opc == Hexagon::PS_vloadrw_ai ||
+                      Opc == Hexagon::PS_vloadrw_ai_128B);
+      unsigned NewOpc;
+      if (Aligned)
+        NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
+                        : Hexagon::V6_vL32b_ai;
+      else
+        NewOpc = Is128B ? Hexagon::V6_vL32Ub_ai_128B
+                        : Hexagon::V6_vL32Ub_ai;
+
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned Offset = Is128B ? VecOffset << 7 : VecOffset << 6;
       MachineInstr *MI1New =
-          BuildMI(MBB, MI, DL, get(NewOpcd),
-                  HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+          BuildMI(MBB, MI, DL, get(NewOpc),
+                  HRI.getSubReg(DstReg, Hexagon::vsub_lo))
               .addOperand(MI.getOperand(1))
               .addImm(MI.getOperand(2).getImm());
       MI1New->getOperand(1).setIsKill(false);
-      BuildMI(MBB, MI, DL, get(NewOpcd),
-              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+      BuildMI(MBB, MI, DL, get(NewOpc),
+              HRI.getSubReg(DstReg, Hexagon::vsub_hi))
           .addOperand(MI.getOperand(1))
           // The Vectors are indexed in multiples of vector size.
           .addImm(MI.getOperand(2).getImm() + Offset)
@@ -1010,35 +1117,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::LDriv_pseudo_V6_128B:
-      Is128B = true;
-    case Hexagon::LDriv_pseudo_V6: {
-      unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned NewOpc = Is128B ? Hexagon::V6_vL32b_ai_128B
-                               : Hexagon::V6_vL32b_ai;
-      int32_t Off = MI.getOperand(2).getImm();
-      BuildMI(MBB, MI, DL, get(NewOpc), DstReg)
-          .addOperand(MI.getOperand(1))
-          .addImm(Off)
-          .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-      MBB.erase(MI);
-      return true;
-    }
-    case Hexagon::STriv_pseudo_V6_128B:
-      Is128B = true;
-    case Hexagon::STriv_pseudo_V6: {
-      unsigned NewOpc = Is128B ? Hexagon::V6_vS32b_ai_128B
-                               : Hexagon::V6_vS32b_ai;
-      int32_t Off = MI.getOperand(1).getImm();
-      BuildMI(MBB, MI, DL, get(NewOpc))
-          .addOperand(MI.getOperand(0))
-          .addImm(Off)
-          .addOperand(MI.getOperand(2))
-          .setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
-      MBB.erase(MI);
-      return true;
-    }
-    case Hexagon::TFR_PdTrue: {
+    case Hexagon::PS_true: {
       unsigned Reg = MI.getOperand(0).getReg();
       BuildMI(MBB, MI, DL, get(Hexagon::C2_orn), Reg)
         .addReg(Reg, RegState::Undef)
@@ -1046,7 +1125,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::TFR_PdFalse: {
+    case Hexagon::PS_false: {
       unsigned Reg = MI.getOperand(0).getReg();
       BuildMI(MBB, MI, DL, get(Hexagon::C2_andn), Reg)
         .addReg(Reg, RegState::Undef)
@@ -1054,21 +1133,21 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::VMULW: {
+    case Hexagon::PS_vmulw: {
       // Expand a 64-bit vector multiply into 2 32-bit scalar multiplies.
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned Src1Reg = MI.getOperand(1).getReg();
       unsigned Src2Reg = MI.getOperand(2).getReg();
-      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
-      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
-      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
-      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
+      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi);
+      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo);
+      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi);
+      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo);
       BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi),
-              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+              HRI.getSubReg(DstReg, Hexagon::isub_hi))
           .addReg(Src1SubHi)
           .addReg(Src2SubHi);
       BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_mpyi),
-              HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+              HRI.getSubReg(DstReg, Hexagon::isub_lo))
           .addReg(Src1SubLo)
           .addReg(Src2SubLo);
       MBB.erase(MI);
@@ -1078,25 +1157,25 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MRI.clearKillFlags(Src2SubLo);
       return true;
     }
-    case Hexagon::VMULW_ACC: {
+    case Hexagon::PS_vmulw_acc: {
       // Expand 64-bit vector multiply with addition into 2 scalar multiplies.
       unsigned DstReg = MI.getOperand(0).getReg();
       unsigned Src1Reg = MI.getOperand(1).getReg();
       unsigned Src2Reg = MI.getOperand(2).getReg();
       unsigned Src3Reg = MI.getOperand(3).getReg();
-      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::subreg_hireg);
-      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::subreg_loreg);
-      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::subreg_hireg);
-      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::subreg_loreg);
-      unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::subreg_hireg);
-      unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::subreg_loreg);
+      unsigned Src1SubHi = HRI.getSubReg(Src1Reg, Hexagon::isub_hi);
+      unsigned Src1SubLo = HRI.getSubReg(Src1Reg, Hexagon::isub_lo);
+      unsigned Src2SubHi = HRI.getSubReg(Src2Reg, Hexagon::isub_hi);
+      unsigned Src2SubLo = HRI.getSubReg(Src2Reg, Hexagon::isub_lo);
+      unsigned Src3SubHi = HRI.getSubReg(Src3Reg, Hexagon::isub_hi);
+      unsigned Src3SubLo = HRI.getSubReg(Src3Reg, Hexagon::isub_lo);
       BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci),
-              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
+              HRI.getSubReg(DstReg, Hexagon::isub_hi))
           .addReg(Src1SubHi)
           .addReg(Src2SubHi)
           .addReg(Src3SubHi);
       BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::M2_maci),
-              HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
+              HRI.getSubReg(DstReg, Hexagon::isub_lo))
           .addReg(Src1SubLo)
           .addReg(Src2SubLo)
           .addReg(Src3SubLo);
@@ -1109,49 +1188,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MRI.clearKillFlags(Src3SubLo);
       return true;
     }
-    case Hexagon::Insert4: {
-      unsigned DstReg = MI.getOperand(0).getReg();
-      unsigned Src1Reg = MI.getOperand(1).getReg();
-      unsigned Src2Reg = MI.getOperand(2).getReg();
-      unsigned Src3Reg = MI.getOperand(3).getReg();
-      unsigned Src4Reg = MI.getOperand(4).getReg();
-      unsigned Src1RegIsKill = getKillRegState(MI.getOperand(1).isKill());
-      unsigned Src2RegIsKill = getKillRegState(MI.getOperand(2).isKill());
-      unsigned Src3RegIsKill = getKillRegState(MI.getOperand(3).isKill());
-      unsigned Src4RegIsKill = getKillRegState(MI.getOperand(4).isKill());
-      unsigned DstSubHi = HRI.getSubReg(DstReg, Hexagon::subreg_hireg);
-      unsigned DstSubLo = HRI.getSubReg(DstReg, Hexagon::subreg_loreg);
-      BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert),
-              HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
-          .addReg(DstSubLo)
-          .addReg(Src1Reg, Src1RegIsKill)
-          .addImm(16)
-          .addImm(0);
-      BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert),
-              HRI.getSubReg(DstReg, Hexagon::subreg_loreg))
-          .addReg(DstSubLo)
-          .addReg(Src2Reg, Src2RegIsKill)
-          .addImm(16)
-          .addImm(16);
-      BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert),
-              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
-          .addReg(DstSubHi)
-          .addReg(Src3Reg, Src3RegIsKill)
-          .addImm(16)
-          .addImm(0);
-      BuildMI(MBB, MI, MI.getDebugLoc(), get(Hexagon::S2_insert),
-              HRI.getSubReg(DstReg, Hexagon::subreg_hireg))
-          .addReg(DstSubHi)
-          .addReg(Src4Reg, Src4RegIsKill)
-          .addImm(16)
-          .addImm(16);
-      MBB.erase(MI);
-      MRI.clearKillFlags(DstReg);
-      MRI.clearKillFlags(DstSubHi);
-      MRI.clearKillFlags(DstSubLo);
-      return true;
-    }
-    case Hexagon::MUX64_rr: {
+    case Hexagon::PS_pselect: {
       const MachineOperand &Op0 = MI.getOperand(0);
       const MachineOperand &Op1 = MI.getOperand(1);
       const MachineOperand &Op2 = MI.getOperand(2);
@@ -1175,64 +1212,96 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::VSelectPseudo_V6: {
+    case Hexagon::PS_vselect:
+    case Hexagon::PS_vselect_128B: {
       const MachineOperand &Op0 = MI.getOperand(0);
       const MachineOperand &Op1 = MI.getOperand(1);
       const MachineOperand &Op2 = MI.getOperand(2);
       const MachineOperand &Op3 = MI.getOperand(3);
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov))
-        .addOperand(Op0)
-        .addOperand(Op1)
-        .addOperand(Op2);
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov))
-        .addOperand(Op0)
-        .addOperand(Op1)
-        .addOperand(Op3);
+      LivePhysRegs LiveAtMI(&HRI);
+      getLiveRegsAt(LiveAtMI, MI);
+      bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+      if (Op0.getReg() != Op2.getReg()) {
+        auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vcmov))
+                    .addOperand(Op0)
+                    .addOperand(Op1)
+                    .addOperand(Op2);
+        if (IsDestLive)
+          T.addReg(Op0.getReg(), RegState::Implicit);
+        IsDestLive = true;
+      }
+      if (Op0.getReg() != Op3.getReg()) {
+        auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vncmov))
+                    .addOperand(Op0)
+                    .addOperand(Op1)
+                    .addOperand(Op3);
+        if (IsDestLive)
+          T.addReg(Op0.getReg(), RegState::Implicit);
+      }
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::VSelectDblPseudo_V6: {
+    case Hexagon::PS_wselect:
+    case Hexagon::PS_wselect_128B: {
       MachineOperand &Op0 = MI.getOperand(0);
       MachineOperand &Op1 = MI.getOperand(1);
       MachineOperand &Op2 = MI.getOperand(2);
       MachineOperand &Op3 = MI.getOperand(3);
-      unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::subreg_loreg);
-      unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::subreg_hireg);
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
-        .addOperand(Op0)
-        .addOperand(Op1)
-        .addReg(SrcHi)
-        .addReg(SrcLo);
-      SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::subreg_loreg);
-      SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::subreg_hireg);
-      BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
-        .addOperand(Op0)
-        .addOperand(Op1)
-        .addReg(SrcHi)
-        .addReg(SrcLo);
+      LivePhysRegs LiveAtMI(&HRI);
+      getLiveRegsAt(LiveAtMI, MI);
+      bool IsDestLive = !LiveAtMI.available(MRI, Op0.getReg());
+
+      if (Op0.getReg() != Op2.getReg()) {
+        unsigned SrcLo = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_lo);
+        unsigned SrcHi = HRI.getSubReg(Op2.getReg(), Hexagon::vsub_hi);
+        auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vccombine))
+                    .addOperand(Op0)
+                    .addOperand(Op1)
+                    .addReg(SrcHi)
+                    .addReg(SrcLo);
+        if (IsDestLive)
+          T.addReg(Op0.getReg(), RegState::Implicit);
+        IsDestLive = true;
+      }
+      if (Op0.getReg() != Op3.getReg()) {
+        unsigned SrcLo = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_lo);
+        unsigned SrcHi = HRI.getSubReg(Op3.getReg(), Hexagon::vsub_hi);
+        auto T = BuildMI(MBB, MI, DL, get(Hexagon::V6_vnccombine))
+                    .addOperand(Op0)
+                    .addOperand(Op1)
+                    .addReg(SrcHi)
+                    .addReg(SrcLo);
+        if (IsDestLive)
+          T.addReg(Op0.getReg(), RegState::Implicit);
+      }
       MBB.erase(MI);
       return true;
     }
-    case Hexagon::TCRETURNi:
+    case Hexagon::PS_tailcall_i:
       MI.setDesc(get(Hexagon::J2_jump));
       return true;
-    case Hexagon::TCRETURNr:
+    case Hexagon::PS_tailcall_r:
+    case Hexagon::PS_jmpret:
       MI.setDesc(get(Hexagon::J2_jumpr));
       return true;
-    case Hexagon::TFRI_f:
-    case Hexagon::TFRI_cPt_f:
-    case Hexagon::TFRI_cNotPt_f: {
-      unsigned Opx = (Opc == Hexagon::TFRI_f) ? 1 : 2;
-      APFloat FVal = MI.getOperand(Opx).getFPImm()->getValueAPF();
-      APInt IVal = FVal.bitcastToAPInt();
-      MI.RemoveOperand(Opx);
-      unsigned NewOpc = (Opc == Hexagon::TFRI_f)     ? Hexagon::A2_tfrsi   :
-                        (Opc == Hexagon::TFRI_cPt_f) ? Hexagon::C2_cmoveit :
-                                                       Hexagon::C2_cmoveif;
-      MI.setDesc(get(NewOpc));
-      MI.addOperand(MachineOperand::CreateImm(IVal.getZExtValue()));
+    case Hexagon::PS_jmprett:
+      MI.setDesc(get(Hexagon::J2_jumprt));
+      return true;
+    case Hexagon::PS_jmpretf:
+      MI.setDesc(get(Hexagon::J2_jumprf));
+      return true;
+    case Hexagon::PS_jmprettnewpt:
+      MI.setDesc(get(Hexagon::J2_jumprtnewpt));
+      return true;
+    case Hexagon::PS_jmpretfnewpt:
+      MI.setDesc(get(Hexagon::J2_jumprfnewpt));
+      return true;
+    case Hexagon::PS_jmprettnew:
+      MI.setDesc(get(Hexagon::J2_jumprtnew));
+      return true;
+    case Hexagon::PS_jmpretfnew:
+      MI.setDesc(get(Hexagon::J2_jumprfnew));
       return true;
-    }
   }
 
   return false;
@@ -1241,7 +1310,7 @@ bool HexagonInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
 
 // We indicate that we want to reverse the branch by
 // inserting the reversed branching opcode.
-bool HexagonInstrInfo::ReverseBranchCondition(
+bool HexagonInstrInfo::reverseBranchCondition(
       SmallVectorImpl<MachineOperand> &Cond) const {
   if (Cond.empty())
     return true;
@@ -1264,6 +1333,11 @@ void HexagonInstrInfo::insertNoop(MachineBasicBlock &MBB,
 }
 
 
+bool HexagonInstrInfo::isPostIncrement(const MachineInstr &MI) const {
+  return getAddrMode(MI) == HexagonII::PostInc;
+}
+
+
 // Returns true if an instruction is predicated irrespective of the predicate
 // sense. For example, all of the following will return true.
 // if (p0) R1 = add(R2, R3)
@@ -1372,6 +1446,9 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
 
   // Throwing call is a boundary.
   if (MI.isCall()) {
+    // Don't mess around with no return calls.
+    if (doesNotReturn(MI))
+      return true;
     // If any of the block's successors is a landing pad, this could be a
     // throwing call.
     for (auto I : MBB->successors())
@@ -1379,10 +1456,6 @@ bool HexagonInstrInfo::isSchedulingBoundary(const MachineInstr &MI,
         return true;
   }
 
-  // Don't mess around with no return calls.
-  if (MI.getOpcode() == Hexagon::CALLv3nr)
-    return true;
-
   // Terminators and labels can't be scheduled around.
   if (MI.getDesc().isTerminator() || MI.isPosition())
     return true;
@@ -1418,8 +1491,8 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
       Length += MAI.getMaxInstLength();
       atInsnStart = false;
     }
-    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
-                               strlen(MAI.getCommentString())) == 0)
+    if (atInsnStart && strncmp(Str, MAI.getCommentString().data(),
+                               MAI.getCommentString().size()) == 0)
       atInsnStart = false;
   }
 
@@ -1433,6 +1506,10 @@ unsigned HexagonInstrInfo::getInlineAsmLength(const char *Str,
 ScheduleHazardRecognizer*
 HexagonInstrInfo::CreateTargetPostRAHazardRecognizer(
       const InstrItineraryData *II, const ScheduleDAG *DAG) const {
+  if (UseDFAHazardRec) {
+    auto &HST = DAG->MF.getSubtarget<HexagonSubtarget>();
+    return new HexagonHazardRecognizer(II, this, HST);
+  }
   return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);
 }
 
@@ -1529,7 +1606,7 @@ bool HexagonInstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
 unsigned HexagonInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
                                            const MachineInstr &MI,
                                            unsigned *PredCost) const {
-  return getInstrTimingClassLatency(ItinData, &MI);
+  return getInstrTimingClassLatency(ItinData, MI);
 }
 
 
@@ -1555,16 +1632,16 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
 
   // Instructions that are pure loads, not loads and stores like memops are not
   // dependent.
-  if (MIa.mayLoad() && !isMemOp(&MIa) && MIb.mayLoad() && !isMemOp(&MIb))
+  if (MIa.mayLoad() && !isMemOp(MIa) && MIb.mayLoad() && !isMemOp(MIb))
     return true;
 
   // Get base, offset, and access size in MIa.
-  unsigned BaseRegA = getBaseAndOffset(&MIa, OffsetA, SizeA);
+  unsigned BaseRegA = getBaseAndOffset(MIa, OffsetA, SizeA);
   if (!BaseRegA || !SizeA)
     return false;
 
   // Get base, offset, and access size in MIb.
-  unsigned BaseRegB = getBaseAndOffset(&MIb, OffsetB, SizeB);
+  unsigned BaseRegB = getBaseAndOffset(MIb, OffsetB, SizeB);
   if (!BaseRegB || !SizeB)
     return false;
 
@@ -1585,7 +1662,23 @@ bool HexagonInstrInfo::areMemAccessesTriviallyDisjoint(
 }
 
 
-unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
+/// If the instruction is an increment of a constant value, return the amount.
+bool HexagonInstrInfo::getIncrementValue(const MachineInstr &MI,
+      int &Value) const {
+  if (isPostIncrement(MI)) {
+    unsigned AccessSize;
+    return getBaseAndOffset(MI, Value, AccessSize);
+  }
+  if (MI.getOpcode() == Hexagon::A2_addi) {
+    Value = MI.getOperand(2).getImm();
+    return true;
+  }
+
+  return false;
+}
+
+
+unsigned HexagonInstrInfo::createVR(MachineFunction *MF, MVT VT) const {
   MachineRegisterInfo &MRI = MF->getRegInfo();
   const TargetRegisterClass *TRC;
   if (VT == MVT::i1) {
@@ -1603,32 +1696,32 @@ unsigned HexagonInstrInfo::createVR(MachineFunction* MF, MVT VT) const {
 }
 
 
-bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr* MI) const {
+bool HexagonInstrInfo::isAbsoluteSet(const MachineInstr &MI) const {
   return (getAddrMode(MI) == HexagonII::AbsoluteSet);
 }
 
 
-bool HexagonInstrInfo::isAccumulator(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::isAccumulator(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return((F >> HexagonII::AccumulatorPos) & HexagonII::AccumulatorMask);
 }
 
 
-bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
-  const MachineFunction *MF = MI->getParent()->getParent();
+bool HexagonInstrInfo::isComplex(const MachineInstr &MI) const {
+  const MachineFunction *MF = MI.getParent()->getParent();
   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   const HexagonInstrInfo *QII = (const HexagonInstrInfo *) TII;
 
   if (!(isTC1(MI))
       && !(QII->isTC2Early(MI))
-      && !(MI->getDesc().mayLoad())
-      && !(MI->getDesc().mayStore())
-      && (MI->getDesc().getOpcode() != Hexagon::S2_allocframe)
-      && (MI->getDesc().getOpcode() != Hexagon::L2_deallocframe)
+      && !(MI.getDesc().mayLoad())
+      && !(MI.getDesc().mayStore())
+      && (MI.getDesc().getOpcode() != Hexagon::S2_allocframe)
+      && (MI.getDesc().getOpcode() != Hexagon::L2_deallocframe)
       && !(QII->isMemOp(MI))
-      && !(MI->isBranch())
-      && !(MI->isReturn())
-      && !MI->isCall())
+      && !(MI.isBranch())
+      && !(MI.isReturn())
+      && !MI.isCall())
     return true;
 
   return false;
@@ -1636,24 +1729,24 @@ bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const {
 
 
 // Return true if the instruction is a compund branch instruction.
-bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const {
-  return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch());
+bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr &MI) const {
+  return (getType(MI) == HexagonII::TypeCOMPOUND && MI.isBranch());
 }
 
 
-bool HexagonInstrInfo::isCondInst(const MachineInstr *MI) const {
-  return (MI->isBranch() && isPredicated(*MI)) ||
+bool HexagonInstrInfo::isCondInst(const MachineInstr &MI) const {
+  return (MI.isBranch() && isPredicated(MI)) ||
          isConditionalTransfer(MI) ||
          isConditionalALU32(MI)    ||
          isConditionalLoad(MI)     ||
          // Predicated stores which don't have a .new on any operands.
-         (MI->mayStore() && isPredicated(*MI) && !isNewValueStore(MI) &&
-          !isPredicatedNew(*MI));
+         (MI.mayStore() && isPredicated(MI) && !isNewValueStore(MI) &&
+          !isPredicatedNew(MI));
 }
 
 
-bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isConditionalALU32(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
     case Hexagon::A2_paddf:
     case Hexagon::A2_paddfnew:
     case Hexagon::A2_paddif:
@@ -1712,11 +1805,11 @@ bool HexagonInstrInfo::isConditionalALU32(const MachineInstr* MI) const {
 
 // FIXME - Function name and it's functionality don't match.
 // It should be renamed to hasPredNewOpcode()
-bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
-  if (!MI->getDesc().mayLoad() || !isPredicated(*MI))
+bool HexagonInstrInfo::isConditionalLoad(const MachineInstr &MI) const {
+  if (!MI.getDesc().mayLoad() || !isPredicated(MI))
     return false;
 
-  int PNewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+  int PNewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
   // Instruction with valid predicated-new opcode can be promoted to .new.
   return PNewOpcode >= 0;
 }
@@ -1726,8 +1819,8 @@ bool HexagonInstrInfo::isConditionalLoad(const MachineInstr* MI) const {
 //
 // Note: It doesn't include conditional new-value stores as they can't be
 // converted to .new predicate.
-bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isConditionalStore(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
     default: return false;
     case Hexagon::S4_storeirbt_io:
     case Hexagon::S4_storeirbf_io:
@@ -1780,8 +1873,8 @@ bool HexagonInstrInfo::isConditionalStore(const MachineInstr* MI) const {
 }
 
 
-bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
     case Hexagon::A2_tfrt:
     case Hexagon::A2_tfrf:
     case Hexagon::C2_cmoveit:
@@ -1803,8 +1896,8 @@ bool HexagonInstrInfo::isConditionalTransfer(const MachineInstr *MI) const {
 
 // TODO: In order to have isExtendable for fpimm/f32Ext, we need to handle
 // isFPImm and later getFPImm as well.
-bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::isConstExtended(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   unsigned isExtended = (F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask;
   if (isExtended) // Instruction must be extended.
     return true;
@@ -1814,11 +1907,11 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
   if (!isExtendable)
     return false;
 
-  if (MI->isCall())
+  if (MI.isCall())
     return false;
 
   short ExtOpNum = getCExtOpNum(MI);
-  const MachineOperand &MO = MI->getOperand(ExtOpNum);
+  const MachineOperand &MO = MI.getOperand(ExtOpNum);
   // Use MO operand flags to determine if MO
   // has the HMOTF_ConstExtended flag set.
   if (MO.getTargetFlags() && HexagonII::HMOTF_ConstExtended)
@@ -1835,7 +1928,7 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
   // object we are going to end up with here for now.
   // In the future we probably should add isSymbol(), etc.
   if (MO.isGlobal() || MO.isSymbol() || MO.isBlockAddress() ||
-      MO.isJTI() || MO.isCPI())
+      MO.isJTI() || MO.isCPI() || MO.isFPImm())
     return true;
 
   // If the extendable operand is not 'Immediate' type, the instruction should
@@ -1850,8 +1943,8 @@ bool HexagonInstrInfo::isConstExtended(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isDeallocRet(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case Hexagon::L4_return :
   case Hexagon::L4_return_t :
   case Hexagon::L4_return_f :
@@ -1866,10 +1959,9 @@ bool HexagonInstrInfo::isDeallocRet(const MachineInstr *MI) const {
 
 
 // Return true when ConsMI uses a register defined by ProdMI.
-bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
-      const MachineInstr *ConsMI) const {
-  const MCInstrDesc &ProdMCID = ProdMI->getDesc();
-  if (!ProdMCID.getNumDefs())
+bool HexagonInstrInfo::isDependent(const MachineInstr &ProdMI,
+      const MachineInstr &ConsMI) const {
+  if (!ProdMI.getDesc().getNumDefs())
     return false;
 
   auto &HRI = getRegisterInfo();
@@ -1904,8 +1996,8 @@ bool HexagonInstrInfo::isDependent(const MachineInstr *ProdMI,
 
 
 // Returns true if the instruction is alread a .cur.
-bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isDotCurInst(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case Hexagon::V6_vL32b_cur_pi:
   case Hexagon::V6_vL32b_cur_ai:
   case Hexagon::V6_vL32b_cur_pi_128B:
@@ -1918,8 +2010,8 @@ bool HexagonInstrInfo::isDotCurInst(const MachineInstr* MI) const {
 
 // Returns true, if any one of the operands is a dot new
 // insn, whether it is predicated dot new or register dot new.
-bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
-  if (isNewValueInst(MI) || (isPredicated(*MI) && isPredicatedNew(*MI)))
+bool HexagonInstrInfo::isDotNewInst(const MachineInstr &MI) const {
+  if (isNewValueInst(MI) || (isPredicated(MI) && isPredicatedNew(MI)))
     return true;
 
   return false;
@@ -1927,23 +2019,20 @@ bool HexagonInstrInfo::isDotNewInst(const MachineInstr* MI) const {
 
 
 /// Symmetrical. See if these two instructions are fit for duplex pair.
-bool HexagonInstrInfo::isDuplexPair(const MachineInstr *MIa,
-      const MachineInstr *MIb) const {
+bool HexagonInstrInfo::isDuplexPair(const MachineInstr &MIa,
+      const MachineInstr &MIb) const {
   HexagonII::SubInstructionGroup MIaG = getDuplexCandidateGroup(MIa);
   HexagonII::SubInstructionGroup MIbG = getDuplexCandidateGroup(MIb);
   return (isDuplexPairMatch(MIaG, MIbG) || isDuplexPairMatch(MIbG, MIaG));
 }
 
 
-bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr *MI) const {
-  if (!MI)
-    return false;
-
-  if (MI->mayLoad() || MI->mayStore() || MI->isCompare())
+bool HexagonInstrInfo::isEarlySourceInstr(const MachineInstr &MI) const {
+  if (MI.mayLoad() || MI.mayStore() || MI.isCompare())
     return true;
 
   // Multiply
-  unsigned SchedClass = MI->getDesc().getSchedClass();
+  unsigned SchedClass = MI.getDesc().getSchedClass();
   if (SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23)
     return true;
   return false;
@@ -1971,17 +2060,18 @@ bool HexagonInstrInfo::isExpr(unsigned OpType) const {
 }
 
 
-bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
-  const MCInstrDesc &MID = MI->getDesc();
+bool HexagonInstrInfo::isExtendable(const MachineInstr &MI) const {
+  const MCInstrDesc &MID = MI.getDesc();
   const uint64_t F = MID.TSFlags;
   if ((F >> HexagonII::ExtendablePos) & HexagonII::ExtendableMask)
     return true;
 
   // TODO: This is largely obsolete now. Will need to be removed
   // in consecutive patches.
-  switch(MI->getOpcode()) {
-    // TFR_FI Remains a special case.
-    case Hexagon::TFR_FI:
+  switch (MI.getOpcode()) {
+    // PS_fi and PS_fia remain special cases.
+    case Hexagon::PS_fi:
+    case Hexagon::PS_fia:
       return true;
     default:
       return false;
@@ -1993,15 +2083,15 @@ bool HexagonInstrInfo::isExtendable(const MachineInstr *MI) const {
 // This returns true in two cases:
 // - The OP code itself indicates that this is an extended instruction.
 // - One of MOs has been marked with HMOTF_ConstExtended flag.
-bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
+bool HexagonInstrInfo::isExtended(const MachineInstr &MI) const {
   // First check if this is permanently extended op code.
-  const uint64_t F = MI->getDesc().TSFlags;
+  const uint64_t F = MI.getDesc().TSFlags;
   if ((F >> HexagonII::ExtendedPos) & HexagonII::ExtendedMask)
     return true;
   // Use MO operand flags to determine if one of MI's operands
   // has HMOTF_ConstExtended flag set.
-  for (MachineInstr::const_mop_iterator I = MI->operands_begin(),
-       E = MI->operands_end(); I != E; ++I) {
+  for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
+       E = MI.operands_end(); I != E; ++I) {
     if (I->getTargetFlags() && HexagonII::HMOTF_ConstExtended)
       return true;
   }
@@ -2009,37 +2099,38 @@ bool HexagonInstrInfo::isExtended(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isFloat(const MachineInstr *MI) const {
-  unsigned Opcode = MI->getOpcode();
+bool HexagonInstrInfo::isFloat(const MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
   const uint64_t F = get(Opcode).TSFlags;
   return (F >> HexagonII::FPPos) & HexagonII::FPMask;
 }
 
 
 // No V60 HVX VMEM with A_INDIRECT.
-bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr *I,
-      const MachineInstr *J) const {
+bool HexagonInstrInfo::isHVXMemWithAIndirect(const MachineInstr &I,
+      const MachineInstr &J) const {
   if (!isV60VectorInstruction(I))
     return false;
-  if (!I->mayLoad() && !I->mayStore())
+  if (!I.mayLoad() && !I.mayStore())
     return false;
-  return J->isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
+  return J.isIndirectBranch() || isIndirectCall(J) || isIndirectL4Return(J);
 }
 
 
-bool HexagonInstrInfo::isIndirectCall(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isIndirectCall(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case Hexagon::J2_callr :
   case Hexagon::J2_callrf :
   case Hexagon::J2_callrt :
+  case Hexagon::PS_call_nr :
     return true;
   }
   return false;
 }
 
 
-bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case Hexagon::L4_return :
   case Hexagon::L4_return_t :
   case Hexagon::L4_return_f :
@@ -2053,8 +2144,8 @@ bool HexagonInstrInfo::isIndirectL4Return(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isJumpR(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case Hexagon::J2_jumpr :
   case Hexagon::J2_jumprt :
   case Hexagon::J2_jumprf :
@@ -2068,24 +2159,24 @@ bool HexagonInstrInfo::isJumpR(const MachineInstr *MI) const {
 }
 
 
-// Return true if a given MI can accomodate given offset.
+// Return true if a given MI can accommodate given offset.
 // Use abs estimate as oppose to the exact number.
 // TODO: This will need to be changed to use MC level
 // definition of instruction extendable field size.
-bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
+bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr &MI,
       unsigned offset) const {
   // This selection of jump instructions matches to that what
   // AnalyzeBranch can parse, plus NVJ.
   if (isNewValueJump(MI)) // r9:2
     return isInt<11>(offset);
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   // Still missing Jump to address condition on register value.
   default:
     return false;
   case Hexagon::J2_jump: // bits<24> dst; // r22:2
   case Hexagon::J2_call:
-  case Hexagon::CALLv3nr:
+  case Hexagon::PS_call_nr:
     return isInt<24>(offset);
   case Hexagon::J2_jumpt: //bits<17> dst; // r15:2
   case Hexagon::J2_jumpf:
@@ -2113,18 +2204,15 @@ bool HexagonInstrInfo::isJumpWithinBranchRange(const MachineInstr *MI,
 }
 
 
-bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
-      const MachineInstr *ESMI) const {
-  if (!LRMI || !ESMI)
-    return false;
-
+bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
+      const MachineInstr &ESMI) const {
   bool isLate = isLateResultInstr(LRMI);
   bool isEarly = isEarlySourceInstr(ESMI);
 
   DEBUG(dbgs() << "V60" <<  (isLate ? "-LR  " : " --  "));
-  DEBUG(LRMI->dump());
+  DEBUG(LRMI.dump());
   DEBUG(dbgs() << "V60" <<  (isEarly ? "-ES  " : " --  "));
-  DEBUG(ESMI->dump());
+  DEBUG(ESMI.dump());
 
   if (isLate && isEarly) {
     DEBUG(dbgs() << "++Is Late Result feeding Early Source\n");
@@ -2135,11 +2223,8 @@ bool HexagonInstrInfo::isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
 }
 
 
-bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
-  if (!MI)
-    return false;
-
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isLateResultInstr(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case TargetOpcode::EXTRACT_SUBREG:
   case TargetOpcode::INSERT_SUBREG:
   case TargetOpcode::SUBREG_TO_REG:
@@ -2153,7 +2238,7 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
     break;
   }
 
-  unsigned SchedClass = MI->getDesc().getSchedClass();
+  unsigned SchedClass = MI.getDesc().getSchedClass();
 
   switch (SchedClass) {
   case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
@@ -2175,18 +2260,15 @@ bool HexagonInstrInfo::isLateResultInstr(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr *MI) const {
-  if (!MI)
-    return false;
-
+bool HexagonInstrInfo::isLateSourceInstr(const MachineInstr &MI) const {
   // Instructions with iclass A_CVI_VX and attribute A_CVI_LATE uses a multiply
   // resource, but all operands can be received late like an ALU instruction.
-  return MI->getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
+  return MI.getDesc().getSchedClass() == Hexagon::Sched::CVI_VX_LATE;
 }
 
 
-bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
-  unsigned Opcode = MI->getOpcode();
+bool HexagonInstrInfo::isLoopN(const MachineInstr &MI) const {
+  unsigned Opcode = MI.getOpcode();
   return Opcode == Hexagon::J2_loop0i    ||
          Opcode == Hexagon::J2_loop0r    ||
          Opcode == Hexagon::J2_loop0iext ||
@@ -2198,8 +2280,8 @@ bool HexagonInstrInfo::isLoopN(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isMemOp(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
     default: return false;
     case Hexagon::L4_iadd_memopw_io :
     case Hexagon::L4_isub_memopw_io :
@@ -2231,8 +2313,8 @@ bool HexagonInstrInfo::isMemOp(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isNewValue(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::isNewValue(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::NewValuePos) & HexagonII::NewValueMask;
 }
 
@@ -2243,13 +2325,13 @@ bool HexagonInstrInfo::isNewValue(unsigned Opcode) const {
 }
 
 
-bool HexagonInstrInfo::isNewValueInst(const MachineInstr *MI) const {
+bool HexagonInstrInfo::isNewValueInst(const MachineInstr &MI) const {
   return isNewValueJump(MI) || isNewValueStore(MI);
 }
 
 
-bool HexagonInstrInfo::isNewValueJump(const MachineInstr *MI) const {
-  return isNewValue(MI) && MI->isBranch();
+bool HexagonInstrInfo::isNewValueJump(const MachineInstr &MI) const {
+  return isNewValue(MI) && MI.isBranch();
 }
 
 
@@ -2258,8 +2340,8 @@ bool HexagonInstrInfo::isNewValueJump(unsigned Opcode) const {
 }
 
 
-bool HexagonInstrInfo::isNewValueStore(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::isNewValueStore(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::NVStorePos) & HexagonII::NVStoreMask;
 }
 
@@ -2271,19 +2353,14 @@ bool HexagonInstrInfo::isNewValueStore(unsigned Opcode) const {
 
 
 // Returns true if a particular operand is extendable for an instruction.
-bool HexagonInstrInfo::isOperandExtended(const MachineInstr *MI,
+bool HexagonInstrInfo::isOperandExtended(const MachineInstr &MI,
     unsigned OperandNum) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+  const uint64_t F = MI.getDesc().TSFlags;
   return ((F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask)
           == OperandNum;
 }
 
 
-bool HexagonInstrInfo::isPostIncrement(const MachineInstr* MI) const {
-  return getAddrMode(MI) == HexagonII::PostInc;
-}
-
-
 bool HexagonInstrInfo::isPredicatedNew(const MachineInstr &MI) const {
   const uint64_t F = MI.getDesc().TSFlags;
   assert(isPredicated(MI));
@@ -2334,11 +2411,11 @@ bool HexagonInstrInfo::isPredictedTaken(unsigned Opcode) const {
 }
 
 
-bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr *MI) const {
-  return MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
-         MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT ||
-         MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_PIC ||
-         MI->getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC;
+bool HexagonInstrInfo::isSaveCalleeSavedRegsCall(const MachineInstr &MI) const {
+  return MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4 ||
+         MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT ||
+         MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_PIC ||
+         MI.getOpcode() == Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC;
 }
 
 bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const {
@@ -2420,14 +2497,14 @@ bool HexagonInstrInfo::isSignExtendingLoad(const MachineInstr &MI) const {
 }
 
 
-bool HexagonInstrInfo::isSolo(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::isSolo(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::SoloPos) & HexagonII::SoloMask;
 }
 
 
-bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
-  switch (MI->getOpcode()) {
+bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   case Hexagon::STriw_pred :
   case Hexagon::LDriw_pred :
     return true;
@@ -2437,11 +2514,11 @@ bool HexagonInstrInfo::isSpillPredRegOp(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isTailCall(const MachineInstr *MI) const {
-  if (!MI->isBranch())
+bool HexagonInstrInfo::isTailCall(const MachineInstr &MI) const {
+  if (!MI.isBranch())
     return false;
 
-  for (auto &Op : MI->operands())
+  for (auto &Op : MI.operands())
     if (Op.isGlobal() || Op.isSymbol())
       return true;
   return false;
@@ -2449,8 +2526,8 @@ bool HexagonInstrInfo::isTailCall(const MachineInstr *MI) const {
 
 
 // Returns true when SU has a timing class TC1.
-bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
-  unsigned SchedClass = MI->getDesc().getSchedClass();
+bool HexagonInstrInfo::isTC1(const MachineInstr &MI) const {
+  unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
   case Hexagon::Sched::ALU32_2op_tc_1_SLOT0123:
   case Hexagon::Sched::ALU32_3op_tc_1_SLOT0123:
@@ -2468,8 +2545,8 @@ bool HexagonInstrInfo::isTC1(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
-  unsigned SchedClass = MI->getDesc().getSchedClass();
+bool HexagonInstrInfo::isTC2(const MachineInstr &MI) const {
+  unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
   case Hexagon::Sched::ALU32_3op_tc_2_SLOT0123:
   case Hexagon::Sched::ALU64_tc_2_SLOT23:
@@ -2485,8 +2562,8 @@ bool HexagonInstrInfo::isTC2(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
-  unsigned SchedClass = MI->getDesc().getSchedClass();
+bool HexagonInstrInfo::isTC2Early(const MachineInstr &MI) const {
+  unsigned SchedClass = MI.getDesc().getSchedClass();
   switch (SchedClass) {
   case Hexagon::Sched::ALU32_2op_tc_2early_SLOT0123:
   case Hexagon::Sched::ALU32_3op_tc_2early_SLOT0123:
@@ -2506,41 +2583,33 @@ bool HexagonInstrInfo::isTC2Early(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isTC4x(const MachineInstr *MI) const {
-  if (!MI)
-    return false;
-
-  unsigned SchedClass = MI->getDesc().getSchedClass();
+bool HexagonInstrInfo::isTC4x(const MachineInstr &MI) const {
+  unsigned SchedClass = MI.getDesc().getSchedClass();
   return SchedClass == Hexagon::Sched::M_tc_3or4x_SLOT23;
 }
 
 
 // Schedule this ASAP.
-bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr *MI1,
-      const MachineInstr *MI2) const {
-  if (!MI1 || !MI2)
-    return false;
+bool HexagonInstrInfo::isToBeScheduledASAP(const MachineInstr &MI1,
+      const MachineInstr &MI2) const {
   if (mayBeCurLoad(MI1)) {
     // if (result of SU is used in Next) return true;
-    unsigned DstReg = MI1->getOperand(0).getReg();
-    int N = MI2->getNumOperands();
+    unsigned DstReg = MI1.getOperand(0).getReg();
+    int N = MI2.getNumOperands();
     for (int I = 0; I < N; I++)
-      if (MI2->getOperand(I).isReg() && DstReg == MI2->getOperand(I).getReg())
+      if (MI2.getOperand(I).isReg() && DstReg == MI2.getOperand(I).getReg())
         return true;
   }
   if (mayBeNewStore(MI2))
-    if (MI2->getOpcode() == Hexagon::V6_vS32b_pi)
-      if (MI1->getOperand(0).isReg() && MI2->getOperand(3).isReg() &&
-          MI1->getOperand(0).getReg() == MI2->getOperand(3).getReg())
+    if (MI2.getOpcode() == Hexagon::V6_vS32b_pi)
+      if (MI1.getOperand(0).isReg() && MI2.getOperand(3).isReg() &&
+          MI1.getOperand(0).getReg() == MI2.getOperand(3).getReg())
         return true;
   return false;
 }
 
 
-bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr *MI) const {
-  if (!MI)
-    return false;
-
+bool HexagonInstrInfo::isV60VectorInstruction(const MachineInstr &MI) const {
   const uint64_t V = getType(MI);
   return HexagonII::TypeCVI_FIRST <= V && V <= HexagonII::TypeCVI_LAST;
 }
@@ -2597,16 +2666,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
   // misaligns with respect to load size.
 
   switch (Opcode) {
-  case Hexagon::STriq_pred_V6:
-  case Hexagon::STriq_pred_vec_V6:
-  case Hexagon::STriv_pseudo_V6:
-  case Hexagon::STrivv_pseudo_V6:
-  case Hexagon::LDriq_pred_V6:
-  case Hexagon::LDriq_pred_vec_V6:
-  case Hexagon::LDriv_pseudo_V6:
-  case Hexagon::LDrivv_pseudo_V6:
-  case Hexagon::LDrivv_indexed:
-  case Hexagon::STrivv_indexed:
+  case Hexagon::PS_vstorerq_ai:
+  case Hexagon::PS_vstorerw_ai:
+  case Hexagon::PS_vloadrq_ai:
+  case Hexagon::PS_vloadrw_ai:
   case Hexagon::V6_vL32b_ai:
   case Hexagon::V6_vS32b_ai:
   case Hexagon::V6_vL32Ub_ai:
@@ -2614,16 +2677,10 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
     return (Offset >= Hexagon_MEMV_OFFSET_MIN) &&
       (Offset <= Hexagon_MEMV_OFFSET_MAX);
 
-  case Hexagon::STriq_pred_V6_128B:
-  case Hexagon::STriq_pred_vec_V6_128B:
-  case Hexagon::STriv_pseudo_V6_128B:
-  case Hexagon::STrivv_pseudo_V6_128B:
-  case Hexagon::LDriq_pred_V6_128B:
-  case Hexagon::LDriq_pred_vec_V6_128B:
-  case Hexagon::LDriv_pseudo_V6_128B:
-  case Hexagon::LDrivv_pseudo_V6_128B:
-  case Hexagon::LDrivv_indexed_128B:
-  case Hexagon::STrivv_indexed_128B:
+  case Hexagon::PS_vstorerq_ai_128B:
+  case Hexagon::PS_vstorerw_ai_128B:
+  case Hexagon::PS_vloadrq_ai_128B:
+  case Hexagon::PS_vloadrw_ai_128B:
   case Hexagon::V6_vL32b_ai_128B:
   case Hexagon::V6_vS32b_ai_128B:
   case Hexagon::V6_vL32Ub_ai_128B:
@@ -2713,8 +2770,8 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
   case Hexagon::LDriw_mod:
     return true;
 
-  case Hexagon::TFR_FI:
-  case Hexagon::TFR_FIA:
+  case Hexagon::PS_fi:
+  case Hexagon::PS_fia:
   case Hexagon::INLINEASM:
     return true;
 
@@ -2752,15 +2809,13 @@ bool HexagonInstrInfo::isValidOffset(unsigned Opcode, int Offset,
 }
 
 
-bool HexagonInstrInfo::isVecAcc(const MachineInstr *MI) const {
-  return MI && isV60VectorInstruction(MI) && isAccumulator(MI);
+bool HexagonInstrInfo::isVecAcc(const MachineInstr &MI) const {
+  return isV60VectorInstruction(MI) && isAccumulator(MI);
 }
 
 
-bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
-  if (!MI)
-    return false;
-  const uint64_t F = get(MI->getOpcode()).TSFlags;
+bool HexagonInstrInfo::isVecALU(const MachineInstr &MI) const {
+  const uint64_t F = get(MI.getOpcode()).TSFlags;
   const uint64_t V = ((F >> HexagonII::TypePos) & HexagonII::TypeMask);
   return
     V == HexagonII::TypeCVI_VA         ||
@@ -2768,8 +2823,8 @@ bool HexagonInstrInfo::isVecALU(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr *ProdMI,
-      const MachineInstr *ConsMI) const {
+bool HexagonInstrInfo::isVecUsableNextPacket(const MachineInstr &ProdMI,
+      const MachineInstr &ConsMI) const {
   if (EnableACCForwarding && isVecAcc(ProdMI) && isVecAcc(ConsMI))
     return true;
 
@@ -2862,8 +2917,8 @@ bool HexagonInstrInfo::isZeroExtendingLoad(const MachineInstr &MI) const {
 
 
 // Add latency to instruction.
-bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr *MI1,
-      const MachineInstr *MI2) const {
+bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
+      const MachineInstr &MI2) const {
   if (isV60VectorInstruction(MI1) && isV60VectorInstruction(MI2))
     if (!isVecUsableNextPacket(MI1, MI2))
       return true;
@@ -2871,20 +2926,32 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr *MI1,
 }
 
 
+/// \brief Get the base register and byte offset of a load/store instr.
+bool HexagonInstrInfo::getMemOpBaseRegImmOfs(MachineInstr &LdSt,
+      unsigned &BaseReg, int64_t &Offset, const TargetRegisterInfo *TRI)
+      const {
+  unsigned AccessSize = 0;
+  int OffsetVal = 0;
+  BaseReg = getBaseAndOffset(LdSt, OffsetVal, AccessSize);
+  Offset = OffsetVal;
+  return BaseReg != 0;
+}
+
+
 /// \brief Can these instructions execute at the same time in a bundle.
-bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
-      const MachineInstr *Second) const {
+bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr &First,
+      const MachineInstr &Second) const {
   if (DisableNVSchedule)
     return false;
   if (mayBeNewStore(Second)) {
     // Make sure the definition of the first instruction is the value being
     // stored.
     const MachineOperand &Stored =
-      Second->getOperand(Second->getNumOperands() - 1);
+      Second.getOperand(Second.getNumOperands() - 1);
     if (!Stored.isReg())
       return false;
-    for (unsigned i = 0, e = First->getNumOperands(); i < e; ++i) {
-      const MachineOperand &Op = First->getOperand(i);
+    for (unsigned i = 0, e = First.getNumOperands(); i < e; ++i) {
+      const MachineOperand &Op = First.getOperand(i);
       if (Op.isReg() && Op.isDef() && Op.getReg() == Stored.getReg())
         return true;
     }
@@ -2893,6 +2960,12 @@ bool HexagonInstrInfo::canExecuteInBundle(const MachineInstr *First,
 }
 
 
+bool HexagonInstrInfo::doesNotReturn(const MachineInstr &CallMI) const {
+  unsigned Opc = CallMI.getOpcode();
+  return Opc == Hexagon::PS_call_nr || Opc == Hexagon::PS_callr_nr;
+}
+
+
 bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
   for (auto &I : *B)
     if (I.isEHLabel())
@@ -2903,30 +2976,30 @@ bool HexagonInstrInfo::hasEHLabel(const MachineBasicBlock *B) const {
 
 // Returns true if an instruction can be converted into a non-extended
 // equivalent instruction.
-bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
+bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr &MI) const {
   short NonExtOpcode;
   // Check if the instruction has a register form that uses register in place
   // of the extended operand, if so return that as the non-extended form.
-  if (Hexagon::getRegForm(MI->getOpcode()) >= 0)
+  if (Hexagon::getRegForm(MI.getOpcode()) >= 0)
     return true;
 
-  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+  if (MI.getDesc().mayLoad() || MI.getDesc().mayStore()) {
     // Check addressing mode and retrieve non-ext equivalent instruction.
 
     switch (getAddrMode(MI)) {
     case HexagonII::Absolute :
       // Load/store with absolute addressing mode can be converted into
       // base+offset mode.
-      NonExtOpcode = Hexagon::getBaseWithImmOffset(MI->getOpcode());
+      NonExtOpcode = Hexagon::getBaseWithImmOffset(MI.getOpcode());
       break;
     case HexagonII::BaseImmOffset :
       // Load/store with base+offset addressing mode can be converted into
       // base+register offset addressing mode. However left shift operand should
       // be set to 0.
-      NonExtOpcode = Hexagon::getBaseWithRegOffset(MI->getOpcode());
+      NonExtOpcode = Hexagon::getBaseWithRegOffset(MI.getOpcode());
       break;
     case HexagonII::BaseLongOffset:
-      NonExtOpcode = Hexagon::getRegShlForm(MI->getOpcode());
+      NonExtOpcode = Hexagon::getRegShlForm(MI.getOpcode());
       break;
     default:
       return false;
@@ -2939,8 +3012,8 @@ bool HexagonInstrInfo::hasNonExtEquivalent(const MachineInstr *MI) const {
 }
 
 
-bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr *MI) const {
-  return Hexagon::getRealHWInstr(MI->getOpcode(),
+bool HexagonInstrInfo::hasPseudoInstrPair(const MachineInstr &MI) const {
+  return Hexagon::getRealHWInstr(MI.getOpcode(),
                                  Hexagon::InstrType_Pseudo) >= 0;
 }
 
@@ -2958,23 +3031,23 @@ bool HexagonInstrInfo::hasUncondBranch(const MachineBasicBlock *B)
 
 
 // Returns true, if a LD insn can be promoted to a cur load.
-bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr *MI) const {
-  auto &HST = MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>();
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::mayBeCurLoad(const MachineInstr &MI) const {
+  auto &HST = MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>();
+  const uint64_t F = MI.getDesc().TSFlags;
   return ((F >> HexagonII::mayCVLoadPos) & HexagonII::mayCVLoadMask) &&
          HST.hasV60TOps();
 }
 
 
 // Returns true, if a ST insn can be promoted to a new-value store.
-bool HexagonInstrInfo::mayBeNewStore(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+bool HexagonInstrInfo::mayBeNewStore(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::mayNVStorePos) & HexagonII::mayNVStoreMask;
 }
 
 
-bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
-      const MachineInstr *ConsMI) const {
+bool HexagonInstrInfo::producesStall(const MachineInstr &ProdMI,
+      const MachineInstr &ConsMI) const {
   // There is no stall when ProdMI is not a V60 vector.
   if (!isV60VectorInstruction(ProdMI))
     return false;
@@ -2992,7 +3065,7 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *ProdMI,
 }
 
 
-bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
+bool HexagonInstrInfo::producesStall(const MachineInstr &MI,
       MachineBasicBlock::const_instr_iterator BII) const {
   // There is no stall when I is not a V60 vector.
   if (!isV60VectorInstruction(MI))
@@ -3001,8 +3074,8 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
   MachineBasicBlock::const_instr_iterator MII = BII;
   MachineBasicBlock::const_instr_iterator MIE = MII->getParent()->instr_end();
 
-  if (!(*MII).isBundle()) {
-    const MachineInstr *J = &*MII;
+  if (!MII->isBundle()) {
+    const MachineInstr &J = *MII;
     if (!isV60VectorInstruction(J))
       return false;
     else if (isVecUsableNextPacket(J, MI))
@@ -3011,7 +3084,7 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
   }
 
   for (++MII; MII != MIE && MII->isInsideBundle(); ++MII) {
-    const MachineInstr *J = &*MII;
+    const MachineInstr &J = *MII;
     if (producesStall(J, MI))
       return true;
   }
@@ -3019,10 +3092,10 @@ bool HexagonInstrInfo::producesStall(const MachineInstr *MI,
 }
 
 
-bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
+bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr &MI,
       unsigned PredReg) const {
-  for (unsigned opNum = 0; opNum < MI->getNumOperands(); opNum++) {
-    const MachineOperand &MO = MI->getOperand(opNum);
+  for (unsigned opNum = 0; opNum < MI.getNumOperands(); opNum++) {
+    const MachineOperand &MO = MI.getOperand(opNum);
     if (MO.isReg() && MO.isDef() && MO.isImplicit() && (MO.getReg() == PredReg))
       return false; // Predicate register must be explicitly defined.
   }
@@ -3030,7 +3103,7 @@ bool HexagonInstrInfo::predCanBeUsedAsDotNew(const MachineInstr *MI,
   // Hexagon Programmer's Reference says that decbin, memw_locked, and
   // memd_locked cannot be used as .new as well,
   // but we don't seem to have these instructions defined.
-  return MI->getOpcode() != Hexagon::A4_tlbmatch;
+  return MI.getOpcode() != Hexagon::A4_tlbmatch;
 }
 
 
@@ -3051,20 +3124,20 @@ bool HexagonInstrInfo::predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const {
 }
 
 
-short HexagonInstrInfo::getAbsoluteForm(const MachineInstr *MI) const {
-  return Hexagon::getAbsoluteForm(MI->getOpcode());
+short HexagonInstrInfo::getAbsoluteForm(const MachineInstr &MI) const {
+  return Hexagon::getAbsoluteForm(MI.getOpcode());
 }
 
 
-unsigned HexagonInstrInfo::getAddrMode(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+unsigned HexagonInstrInfo::getAddrMode(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::AddrModePos) & HexagonII::AddrModeMask;
 }
 
 
 // Returns the base register in a memory access (load/store). The offset is
 // returned in Offset and the access size is returned in AccessSize.
-unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
+unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr &MI,
       int &Offset, unsigned &AccessSize) const {
   // Return if it is not a base+offset type instruction or a MemOp.
   if (getAddrMode(MI) != HexagonII::BaseImmOffset &&
@@ -3092,30 +3165,30 @@ unsigned HexagonInstrInfo::getBaseAndOffset(const MachineInstr *MI,
   if (isPostIncrement(MI))
     Offset = 0;
   else {
-    Offset = MI->getOperand(offsetPos).getImm();
+    Offset = MI.getOperand(offsetPos).getImm();
   }
 
-  return MI->getOperand(basePos).getReg();
+  return MI.getOperand(basePos).getReg();
 }
 
 
 /// Return the position of the base and offset operands for this instruction.
-bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
+bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr &MI,
       unsigned &BasePos, unsigned &OffsetPos) const {
   // Deal with memops first.
   if (isMemOp(MI)) {
     BasePos = 0;
     OffsetPos = 1;
-  } else if (MI->mayStore()) {
+  } else if (MI.mayStore()) {
     BasePos = 0;
     OffsetPos = 1;
-  } else if (MI->mayLoad()) {
+  } else if (MI.mayLoad()) {
     BasePos = 1;
     OffsetPos = 2;
   } else
     return false;
 
-  if (isPredicated(*MI)) {
+  if (isPredicated(MI)) {
     BasePos++;
     OffsetPos++;
   }
@@ -3124,14 +3197,14 @@ bool HexagonInstrInfo::getBaseAndOffsetPosition(const MachineInstr *MI,
     OffsetPos++;
   }
 
-  if (!MI->getOperand(BasePos).isReg() || !MI->getOperand(OffsetPos).isImm())
+  if (!MI.getOperand(BasePos).isReg() || !MI.getOperand(OffsetPos).isImm())
     return false;
 
   return true;
 }
 
 
-// Inserts branching instructions in reverse order of their occurence.
+// Inserts branching instructions in reverse order of their occurrence.
 // e.g. jump_t t1 (i1)
 // jump t2        (i2)
 // Jumpers = {i2, i1}
@@ -3200,29 +3273,29 @@ short HexagonInstrInfo::getBaseWithLongOffset(short Opcode) const {
 }
 
 
-short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr *MI) const {
-  return Hexagon::getBaseWithLongOffset(MI->getOpcode());
+short HexagonInstrInfo::getBaseWithLongOffset(const MachineInstr &MI) const {
+  return Hexagon::getBaseWithLongOffset(MI.getOpcode());
 }
 
 
-short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr *MI) const {
-  return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+short HexagonInstrInfo::getBaseWithRegOffset(const MachineInstr &MI) const {
+  return Hexagon::getBaseWithRegOffset(MI.getOpcode());
 }
 
 
 // Returns Operand Index for the constant extended instruction.
-unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+unsigned HexagonInstrInfo::getCExtOpNum(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::ExtendableOpPos) & HexagonII::ExtendableOpMask;
 }
 
 // See if instruction could potentially be a duplex candidate.
 // If so, return its group. Zero otherwise.
 HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
-      const MachineInstr *MI) const {
+      const MachineInstr &MI) const {
   unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   default:
     return HexagonII::HCG_None;
   //
@@ -3234,9 +3307,9 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
   case Hexagon::C2_cmpeq:
   case Hexagon::C2_cmpgt:
   case Hexagon::C2_cmpgtu:
-    DstReg = MI->getOperand(0).getReg();
-    Src1Reg = MI->getOperand(1).getReg();
-    Src2Reg = MI->getOperand(2).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    Src1Reg = MI.getOperand(1).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
     if (Hexagon::PredRegsRegClass.contains(DstReg) &&
         (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
         isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg))
@@ -3246,19 +3319,19 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
   case Hexagon::C2_cmpgti:
   case Hexagon::C2_cmpgtui:
     // P0 = cmp.eq(Rs,#u2)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (Hexagon::PredRegsRegClass.contains(DstReg) &&
         (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
-        isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
-        ((isUInt<5>(MI->getOperand(2).getImm())) ||
-         (MI->getOperand(2).getImm() == -1)))
+        isIntRegForSubInst(SrcReg) && MI.getOperand(2).isImm() &&
+        ((isUInt<5>(MI.getOperand(2).getImm())) ||
+         (MI.getOperand(2).getImm() == -1)))
       return HexagonII::HCG_A;
     break;
   case Hexagon::A2_tfr:
     // Rd = Rs
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
       return HexagonII::HCG_A;
     break;
@@ -3266,17 +3339,17 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
     // Rd = #u6
     // Do not test for #u6 size since the const is getting extended
     // regardless and compound could be formed.
-    DstReg = MI->getOperand(0).getReg();
+    DstReg = MI.getOperand(0).getReg();
     if (isIntRegForSubInst(DstReg))
       return HexagonII::HCG_A;
     break;
   case Hexagon::S2_tstbit_i:
-    DstReg = MI->getOperand(0).getReg();
-    Src1Reg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    Src1Reg = MI.getOperand(1).getReg();
     if (Hexagon::PredRegsRegClass.contains(DstReg) &&
         (Hexagon::P0 == DstReg || Hexagon::P1 == DstReg) &&
-        MI->getOperand(2).isImm() &&
-        isIntRegForSubInst(Src1Reg) && (MI->getOperand(2).getImm() == 0))
+        MI.getOperand(2).isImm() &&
+        isIntRegForSubInst(Src1Reg) && (MI.getOperand(2).getImm() == 0))
       return HexagonII::HCG_A;
     break;
   // The fact that .new form is used pretty much guarantees
@@ -3287,7 +3360,7 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
   case Hexagon::J2_jumpfnew:
   case Hexagon::J2_jumptnewpt:
   case Hexagon::J2_jumpfnewpt:
-    Src1Reg = MI->getOperand(0).getReg();
+    Src1Reg = MI.getOperand(0).getReg();
     if (Hexagon::PredRegsRegClass.contains(Src1Reg) &&
         (Hexagon::P0 == Src1Reg || Hexagon::P1 == Src1Reg))
       return HexagonII::HCG_B;
@@ -3298,6 +3371,7 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
   // Do not test for jump range here.
   case Hexagon::J2_jump:
   case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
     return HexagonII::HCG_C;
     break;
   }
@@ -3307,15 +3381,15 @@ HexagonII::CompoundGroup HexagonInstrInfo::getCompoundCandidateGroup(
 
 
 // Returns -1 when there is no opcode found.
-unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr *GA,
-      const MachineInstr *GB) const {
+unsigned HexagonInstrInfo::getCompoundOpcode(const MachineInstr &GA,
+      const MachineInstr &GB) const {
   assert(getCompoundCandidateGroup(GA) == HexagonII::HCG_A);
   assert(getCompoundCandidateGroup(GB) == HexagonII::HCG_B);
-  if ((GA->getOpcode() != Hexagon::C2_cmpeqi) ||
-      (GB->getOpcode() != Hexagon::J2_jumptnew))
+  if ((GA.getOpcode() != Hexagon::C2_cmpeqi) ||
+      (GB.getOpcode() != Hexagon::J2_jumptnew))
     return -1;
-  unsigned DestReg = GA->getOperand(0).getReg();
-  if (!GB->readsRegister(DestReg))
+  unsigned DestReg = GA.getOperand(0).getReg();
+  if (!GB.readsRegister(DestReg))
     return -1;
   if (DestReg == Hexagon::P0)
     return Hexagon::J4_cmpeqi_tp0_jump_nt;
@@ -3333,21 +3407,13 @@ int HexagonInstrInfo::getCondOpcode(int Opc, bool invertPredicate) const {
   if (CondOpcode >= 0) // Valid Conditional opcode/instruction
     return CondOpcode;
 
-  // This switch case will be removed once all the instructions have been
-  // modified to use relation maps.
-  switch(Opc) {
-  case Hexagon::TFRI_f:
-    return !invertPredicate ? Hexagon::TFRI_cPt_f :
-                              Hexagon::TFRI_cNotPt_f;
-  }
-
   llvm_unreachable("Unexpected predicable instruction");
 }
 
 
 // Return the cur value instruction for a given store.
-int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
-  switch (MI->getOpcode()) {
+int HexagonInstrInfo::getDotCurOp(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
   default: llvm_unreachable("Unknown .cur type");
   case Hexagon::V6_vL32b_pi:
     return Hexagon::V6_vL32b_cur_pi;
@@ -3445,12 +3511,12 @@ int HexagonInstrInfo::getDotCurOp(const MachineInstr* MI) const {
 
 
 // Return the new value instruction for a given store.
-int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
-  int NVOpcode = Hexagon::getNewValueOpcode(MI->getOpcode());
+int HexagonInstrInfo::getDotNewOp(const MachineInstr &MI) const {
+  int NVOpcode = Hexagon::getNewValueOpcode(MI.getOpcode());
   if (NVOpcode >= 0) // Valid new-value store instruction.
     return NVOpcode;
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   default: llvm_unreachable("Unknown .new type");
   case Hexagon::S4_storerb_ur:
     return Hexagon::S4_storerbnew_ur;
@@ -3490,19 +3556,19 @@ int HexagonInstrInfo::getDotNewOp(const MachineInstr* MI) const {
 // Returns the opcode to use when converting MI, which is a conditional jump,
 // into a conditional instruction which uses the .new value of the predicate.
 // We also use branch probabilities to add a hint to the jump.
-int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
+int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr &MI,
       const MachineBranchProbabilityInfo *MBPI) const {
   // We assume that block can have at most two successors.
   bool taken = false;
-  const MachineBasicBlock *Src = MI->getParent();
-  const MachineOperand *BrTarget = &MI->getOperand(1);
-  const MachineBasicBlock *Dst = BrTarget->getMBB();
+  const MachineBasicBlock *Src = MI.getParent();
+  const MachineOperand &BrTarget = MI.getOperand(1);
+  const MachineBasicBlock *Dst = BrTarget.getMBB();
 
   const BranchProbability Prediction = MBPI->getEdgeProbability(Src, Dst);
   if (Prediction >= BranchProbability(1,2))
     taken = true;
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   case Hexagon::J2_jumpt:
     return taken ? Hexagon::J2_jumptnewpt : Hexagon::J2_jumptnew;
   case Hexagon::J2_jumpf:
@@ -3515,13 +3581,13 @@ int HexagonInstrInfo::getDotNewPredJumpOp(const MachineInstr *MI,
 
 
 // Return .new predicate version for an instruction.
-int HexagonInstrInfo::getDotNewPredOp(const MachineInstr *MI,
+int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
       const MachineBranchProbabilityInfo *MBPI) const {
-  int NewOpcode = Hexagon::getPredNewOpcode(MI->getOpcode());
+  int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
   if (NewOpcode >= 0) // Valid predicate new instruction
     return NewOpcode;
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   // Condtional Jumps
   case Hexagon::J2_jumpt:
   case Hexagon::J2_jumpf:
@@ -3553,11 +3619,11 @@ int HexagonInstrInfo::getDotOldOp(const int opc) const {
 // See if instruction could potentially be a duplex candidate.
 // If so, return its group. Zero otherwise.
 HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
-      const MachineInstr *MI) const {
+      const MachineInstr &MI) const {
   unsigned DstReg, SrcReg, Src1Reg, Src2Reg;
   auto &HRI = getRegisterInfo();
 
-  switch (MI->getOpcode()) {
+  switch (MI.getOpcode()) {
   default:
     return HexagonII::HSIG_None;
   //
@@ -3566,29 +3632,29 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   // Rd = memw(Rs+#u4:2)
   // Rd = memub(Rs+#u4:0)
   case Hexagon::L2_loadri_io:
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     // Special case this one from Group L2.
     // Rd = memw(r29+#u5:2)
     if (isIntRegForSubInst(DstReg)) {
       if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
           HRI.getStackRegister() == SrcReg &&
-          MI->getOperand(2).isImm() &&
-          isShiftedUInt<5,2>(MI->getOperand(2).getImm()))
+          MI.getOperand(2).isImm() &&
+          isShiftedUInt<5,2>(MI.getOperand(2).getImm()))
         return HexagonII::HSIG_L2;
       // Rd = memw(Rs+#u4:2)
       if (isIntRegForSubInst(SrcReg) &&
-          (MI->getOperand(2).isImm() &&
-          isShiftedUInt<4,2>(MI->getOperand(2).getImm())))
+          (MI.getOperand(2).isImm() &&
+          isShiftedUInt<4,2>(MI.getOperand(2).getImm())))
         return HexagonII::HSIG_L1;
     }
     break;
   case Hexagon::L2_loadrub_io:
     // Rd = memub(Rs+#u4:0)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
-        MI->getOperand(2).isImm() && isUInt<4>(MI->getOperand(2).getImm()))
+        MI.getOperand(2).isImm() && isUInt<4>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_L1;
     break;
   //
@@ -3604,61 +3670,62 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   case Hexagon::L2_loadrh_io:
   case Hexagon::L2_loadruh_io:
     // Rd = memh/memuh(Rs+#u3:1)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
-        MI->getOperand(2).isImm() &&
-        isShiftedUInt<3,1>(MI->getOperand(2).getImm()))
+        MI.getOperand(2).isImm() &&
+        isShiftedUInt<3,1>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_L2;
     break;
   case Hexagon::L2_loadrb_io:
     // Rd = memb(Rs+#u3:0)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
-        MI->getOperand(2).isImm() &&
-        isUInt<3>(MI->getOperand(2).getImm()))
+        MI.getOperand(2).isImm() &&
+        isUInt<3>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_L2;
     break;
   case Hexagon::L2_loadrd_io:
     // Rdd = memd(r29+#u5:3)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isDblRegForSubInst(DstReg, HRI) &&
         Hexagon::IntRegsRegClass.contains(SrcReg) &&
         HRI.getStackRegister() == SrcReg &&
-        MI->getOperand(2).isImm() &&
-        isShiftedUInt<5,3>(MI->getOperand(2).getImm()))
+        MI.getOperand(2).isImm() &&
+        isShiftedUInt<5,3>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_L2;
     break;
   // dealloc_return is not documented in Hexagon Manual, but marked
   // with A_SUBINSN attribute in iset_v4classic.py.
   case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
+  case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
   case Hexagon::L4_return:
   case Hexagon::L2_deallocframe:
     return HexagonII::HSIG_L2;
   case Hexagon::EH_RETURN_JMPR:
-  case Hexagon::JMPret :
+  case Hexagon::PS_jmpret:
     // jumpr r31
     // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
-    DstReg = MI->getOperand(0).getReg();
+    DstReg = MI.getOperand(0).getReg();
     if (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg))
       return HexagonII::HSIG_L2;
     break;
-  case Hexagon::JMPrett:
-  case Hexagon::JMPretf:
-  case Hexagon::JMPrettnewpt:
-  case Hexagon::JMPretfnewpt :
-  case Hexagon::JMPrettnew :
-  case Hexagon::JMPretfnew :
-    DstReg = MI->getOperand(1).getReg();
-    SrcReg = MI->getOperand(0).getReg();
+  case Hexagon::PS_jmprett:
+  case Hexagon::PS_jmpretf:
+  case Hexagon::PS_jmprettnewpt:
+  case Hexagon::PS_jmpretfnewpt:
+  case Hexagon::PS_jmprettnew:
+  case Hexagon::PS_jmpretfnew:
+    DstReg = MI.getOperand(1).getReg();
+    SrcReg = MI.getOperand(0).getReg();
     // [if ([!]p0[.new])] jumpr r31
     if ((Hexagon::PredRegsRegClass.contains(SrcReg) &&
         (Hexagon::P0 == SrcReg)) &&
         (Hexagon::IntRegsRegClass.contains(DstReg) && (Hexagon::R31 == DstReg)))
       return HexagonII::HSIG_L2;
-     break;
+    break;
   case Hexagon::L4_return_t :
   case Hexagon::L4_return_f :
   case Hexagon::L4_return_tnew_pnt :
@@ -3666,7 +3733,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   case Hexagon::L4_return_tnew_pt :
   case Hexagon::L4_return_fnew_pt :
     // [if ([!]p0[.new])] dealloc_return
-    SrcReg = MI->getOperand(0).getReg();
+    SrcReg = MI.getOperand(0).getReg();
     if (Hexagon::PredRegsRegClass.contains(SrcReg) && (Hexagon::P0 == SrcReg))
       return HexagonII::HSIG_L2;
     break;
@@ -3678,25 +3745,25 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   case Hexagon::S2_storeri_io:
     // Special case this one from Group S2.
     // memw(r29+#u5:2) = Rt
-    Src1Reg = MI->getOperand(0).getReg();
-    Src2Reg = MI->getOperand(2).getReg();
+    Src1Reg = MI.getOperand(0).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
     if (Hexagon::IntRegsRegClass.contains(Src1Reg) &&
         isIntRegForSubInst(Src2Reg) &&
-        HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
-        isShiftedUInt<5,2>(MI->getOperand(1).getImm()))
+        HRI.getStackRegister() == Src1Reg && MI.getOperand(1).isImm() &&
+        isShiftedUInt<5,2>(MI.getOperand(1).getImm()))
       return HexagonII::HSIG_S2;
     // memw(Rs+#u4:2) = Rt
     if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
-        MI->getOperand(1).isImm() &&
-        isShiftedUInt<4,2>(MI->getOperand(1).getImm()))
+        MI.getOperand(1).isImm() &&
+        isShiftedUInt<4,2>(MI.getOperand(1).getImm()))
       return HexagonII::HSIG_S1;
     break;
   case Hexagon::S2_storerb_io:
     // memb(Rs+#u4:0) = Rt
-    Src1Reg = MI->getOperand(0).getReg();
-    Src2Reg = MI->getOperand(2).getReg();
+    Src1Reg = MI.getOperand(0).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
     if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
-        MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()))
+        MI.getOperand(1).isImm() && isUInt<4>(MI.getOperand(1).getImm()))
       return HexagonII::HSIG_S1;
     break;
   //
@@ -3710,42 +3777,42 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   // allocframe(#u5:3)
   case Hexagon::S2_storerh_io:
     // memh(Rs+#u3:1) = Rt
-    Src1Reg = MI->getOperand(0).getReg();
-    Src2Reg = MI->getOperand(2).getReg();
+    Src1Reg = MI.getOperand(0).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
     if (isIntRegForSubInst(Src1Reg) && isIntRegForSubInst(Src2Reg) &&
-        MI->getOperand(1).isImm() &&
-        isShiftedUInt<3,1>(MI->getOperand(1).getImm()))
+        MI.getOperand(1).isImm() &&
+        isShiftedUInt<3,1>(MI.getOperand(1).getImm()))
       return HexagonII::HSIG_S1;
     break;
   case Hexagon::S2_storerd_io:
     // memd(r29+#s6:3) = Rtt
-    Src1Reg = MI->getOperand(0).getReg();
-    Src2Reg = MI->getOperand(2).getReg();
+    Src1Reg = MI.getOperand(0).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
     if (isDblRegForSubInst(Src2Reg, HRI) &&
         Hexagon::IntRegsRegClass.contains(Src1Reg) &&
-        HRI.getStackRegister() == Src1Reg && MI->getOperand(1).isImm() &&
-        isShiftedInt<6,3>(MI->getOperand(1).getImm()))
+        HRI.getStackRegister() == Src1Reg && MI.getOperand(1).isImm() &&
+        isShiftedInt<6,3>(MI.getOperand(1).getImm()))
       return HexagonII::HSIG_S2;
     break;
   case Hexagon::S4_storeiri_io:
     // memw(Rs+#u4:2) = #U1
-    Src1Reg = MI->getOperand(0).getReg();
-    if (isIntRegForSubInst(Src1Reg) && MI->getOperand(1).isImm() &&
-        isShiftedUInt<4,2>(MI->getOperand(1).getImm()) &&
-        MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+    Src1Reg = MI.getOperand(0).getReg();
+    if (isIntRegForSubInst(Src1Reg) && MI.getOperand(1).isImm() &&
+        isShiftedUInt<4,2>(MI.getOperand(1).getImm()) &&
+        MI.getOperand(2).isImm() && isUInt<1>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_S2;
     break;
   case Hexagon::S4_storeirb_io:
     // memb(Rs+#u4) = #U1
-    Src1Reg = MI->getOperand(0).getReg();
+    Src1Reg = MI.getOperand(0).getReg();
     if (isIntRegForSubInst(Src1Reg) &&
-        MI->getOperand(1).isImm() && isUInt<4>(MI->getOperand(1).getImm()) &&
-        MI->getOperand(2).isImm() && isUInt<1>(MI->getOperand(2).getImm()))
+        MI.getOperand(1).isImm() && isUInt<4>(MI.getOperand(1).getImm()) &&
+        MI.getOperand(2).isImm() && isUInt<1>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_S2;
     break;
   case Hexagon::S2_allocframe:
-    if (MI->getOperand(0).isImm() &&
-        isShiftedUInt<5,3>(MI->getOperand(0).getImm()))
+    if (MI.getOperand(0).isImm() &&
+        isShiftedUInt<5,3>(MI.getOperand(0).getImm()))
       return HexagonII::HSIG_S1;
     break;
   //
@@ -3767,31 +3834,31 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   // Rd = sxth/sxtb/zxtb/zxth(Rs)
   // Rd = and(Rs,#1)
   case Hexagon::A2_addi:
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg)) {
       // Rd = add(r29,#u6:2)
       if (Hexagon::IntRegsRegClass.contains(SrcReg) &&
-        HRI.getStackRegister() == SrcReg && MI->getOperand(2).isImm() &&
-        isShiftedUInt<6,2>(MI->getOperand(2).getImm()))
+        HRI.getStackRegister() == SrcReg && MI.getOperand(2).isImm() &&
+        isShiftedUInt<6,2>(MI.getOperand(2).getImm()))
         return HexagonII::HSIG_A;
       // Rx = add(Rx,#s7)
-      if ((DstReg == SrcReg) && MI->getOperand(2).isImm() &&
-          isInt<7>(MI->getOperand(2).getImm()))
+      if ((DstReg == SrcReg) && MI.getOperand(2).isImm() &&
+          isInt<7>(MI.getOperand(2).getImm()))
         return HexagonII::HSIG_A;
       // Rd = add(Rs,#1)
       // Rd = add(Rs,#-1)
-      if (isIntRegForSubInst(SrcReg) && MI->getOperand(2).isImm() &&
-          ((MI->getOperand(2).getImm() == 1) ||
-          (MI->getOperand(2).getImm() == -1)))
+      if (isIntRegForSubInst(SrcReg) && MI.getOperand(2).isImm() &&
+          ((MI.getOperand(2).getImm() == 1) ||
+          (MI.getOperand(2).getImm() == -1)))
         return HexagonII::HSIG_A;
     }
     break;
   case Hexagon::A2_add:
     // Rx = add(Rx,Rs)
-    DstReg = MI->getOperand(0).getReg();
-    Src1Reg = MI->getOperand(1).getReg();
-    Src2Reg = MI->getOperand(2).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    Src1Reg = MI.getOperand(1).getReg();
+    Src2Reg = MI.getOperand(2).getReg();
     if (isIntRegForSubInst(DstReg) && (DstReg == Src1Reg) &&
         isIntRegForSubInst(Src2Reg))
       return HexagonII::HSIG_A;
@@ -3800,18 +3867,18 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
     // Same as zxtb.
     // Rd16=and(Rs16,#255)
     // Rd16=and(Rs16,#1)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg) &&
-        MI->getOperand(2).isImm() &&
-        ((MI->getOperand(2).getImm() == 1) ||
-        (MI->getOperand(2).getImm() == 255)))
+        MI.getOperand(2).isImm() &&
+        ((MI.getOperand(2).getImm() == 1) ||
+        (MI.getOperand(2).getImm() == 255)))
       return HexagonII::HSIG_A;
     break;
   case Hexagon::A2_tfr:
     // Rd = Rs
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
       return HexagonII::HSIG_A;
     break;
@@ -3820,7 +3887,7 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
     // Do not test for #u6 size since the const is getting extended
     // regardless and compound could be formed.
     // Rd = #-1
-    DstReg = MI->getOperand(0).getReg();
+    DstReg = MI.getOperand(0).getReg();
     if (isIntRegForSubInst(DstReg))
       return HexagonII::HSIG_A;
     break;
@@ -3831,51 +3898,51 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
     // if ([!]P0[.new]) Rd = #0
     // Actual form:
     // %R16<def> = C2_cmovenewit %P0<internal>, 0, %R16<imp-use,undef>;
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) &&
         Hexagon::PredRegsRegClass.contains(SrcReg) && Hexagon::P0 == SrcReg &&
-        MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0)
+        MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0)
       return HexagonII::HSIG_A;
     break;
   case Hexagon::C2_cmpeqi:
     // P0 = cmp.eq(Rs,#u2)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (Hexagon::PredRegsRegClass.contains(DstReg) &&
         Hexagon::P0 == DstReg && isIntRegForSubInst(SrcReg) &&
-        MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm()))
+        MI.getOperand(2).isImm() && isUInt<2>(MI.getOperand(2).getImm()))
       return HexagonII::HSIG_A;
     break;
   case Hexagon::A2_combineii:
   case Hexagon::A4_combineii:
     // Rdd = combine(#u2,#U2)
-    DstReg = MI->getOperand(0).getReg();
+    DstReg = MI.getOperand(0).getReg();
     if (isDblRegForSubInst(DstReg, HRI) &&
-        ((MI->getOperand(1).isImm() && isUInt<2>(MI->getOperand(1).getImm())) ||
-        (MI->getOperand(1).isGlobal() &&
-        isUInt<2>(MI->getOperand(1).getOffset()))) &&
-        ((MI->getOperand(2).isImm() && isUInt<2>(MI->getOperand(2).getImm())) ||
-        (MI->getOperand(2).isGlobal() &&
-        isUInt<2>(MI->getOperand(2).getOffset()))))
+        ((MI.getOperand(1).isImm() && isUInt<2>(MI.getOperand(1).getImm())) ||
+        (MI.getOperand(1).isGlobal() &&
+        isUInt<2>(MI.getOperand(1).getOffset()))) &&
+        ((MI.getOperand(2).isImm() && isUInt<2>(MI.getOperand(2).getImm())) ||
+        (MI.getOperand(2).isGlobal() &&
+        isUInt<2>(MI.getOperand(2).getOffset()))))
       return HexagonII::HSIG_A;
     break;
   case Hexagon::A4_combineri:
     // Rdd = combine(Rs,#0)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
-        ((MI->getOperand(2).isImm() && MI->getOperand(2).getImm() == 0) ||
-        (MI->getOperand(2).isGlobal() && MI->getOperand(2).getOffset() == 0)))
+        ((MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) ||
+        (MI.getOperand(2).isGlobal() && MI.getOperand(2).getOffset() == 0)))
       return HexagonII::HSIG_A;
     break;
   case Hexagon::A4_combineir:
     // Rdd = combine(#0,Rs)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(2).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(2).getReg();
     if (isDblRegForSubInst(DstReg, HRI) && isIntRegForSubInst(SrcReg) &&
-        ((MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0) ||
-        (MI->getOperand(1).isGlobal() && MI->getOperand(1).getOffset() == 0)))
+        ((MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) ||
+        (MI.getOperand(1).isGlobal() && MI.getOperand(1).getOffset() == 0)))
       return HexagonII::HSIG_A;
     break;
   case Hexagon::A2_sxtb:
@@ -3883,8 +3950,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
   case Hexagon::A2_zxtb:
   case Hexagon::A2_zxth:
     // Rd = sxth/sxtb/zxtb/zxth(Rs)
-    DstReg = MI->getOperand(0).getReg();
-    SrcReg = MI->getOperand(1).getReg();
+    DstReg = MI.getOperand(0).getReg();
+    SrcReg = MI.getOperand(1).getReg();
     if (isIntRegForSubInst(DstReg) && isIntRegForSubInst(SrcReg))
       return HexagonII::HSIG_A;
     break;
@@ -3894,8 +3961,8 @@ HexagonII::SubInstructionGroup HexagonInstrInfo::getDuplexCandidateGroup(
 }
 
 
-short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
-  return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Real);
+short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr &MI) const {
+  return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Real);
 }
 
 
@@ -3903,30 +3970,30 @@ short HexagonInstrInfo::getEquivalentHWInstr(const MachineInstr *MI) const {
 MachineInstr *HexagonInstrInfo::getFirstNonDbgInst(MachineBasicBlock *BB)
       const {
   for (auto MII = BB->instr_begin(), End = BB->instr_end(); MII != End; MII++) {
-    MachineInstr *MI = &*MII;
-    if (MI->isDebugValue())
+    MachineInstr &MI = *MII;
+    if (MI.isDebugValue())
       continue;
-    return MI;
+    return &MI;
   }
   return nullptr;
 }
 
 
 unsigned HexagonInstrInfo::getInstrTimingClassLatency(
-      const InstrItineraryData *ItinData, const MachineInstr *MI) const {
+      const InstrItineraryData *ItinData, const MachineInstr &MI) const {
   // Default to one cycle for no itinerary. However, an "empty" itinerary may
   // still have a MinLatency property, which getStageLatency checks.
   if (!ItinData)
-    return getInstrLatency(ItinData, *MI);
+    return getInstrLatency(ItinData, MI);
 
   // Get the latency embedded in the itinerary. If we're not using timing class
   // latencies or if we using BSB scheduling, then restrict the maximum latency
   // to 1 (that is, either 0 or 1).
-  if (MI->isTransient())
+  if (MI.isTransient())
     return 0;
-  unsigned Latency = ItinData->getStageLatency(MI->getDesc().getSchedClass());
+  unsigned Latency = ItinData->getStageLatency(MI.getDesc().getSchedClass());
   if (!EnableTimingClassLatency ||
-      MI->getParent()->getParent()->getSubtarget<HexagonSubtarget>().
+      MI.getParent()->getParent()->getSubtarget<HexagonSubtarget>().
       useBSBScheduling())
     if (Latency > 1)
       Latency = 1;
@@ -3959,8 +4026,8 @@ unsigned HexagonInstrInfo::getInvertedPredicatedOpcode(const int Opc) const {
 
 
 // Returns the max value that doesn't need to be extended.
-int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+int HexagonInstrInfo::getMaxValue(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
                     & HexagonII::ExtentSignedMask;
   unsigned bits =  (F >> HexagonII::ExtentBitsPos)
@@ -3973,15 +4040,15 @@ int HexagonInstrInfo::getMaxValue(const MachineInstr *MI) const {
 }
 
 
-unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+unsigned HexagonInstrInfo::getMemAccessSize(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::MemAccessSizePos) & HexagonII::MemAccesSizeMask;
 }
 
 
 // Returns the min value that doesn't need to be extended.
-int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+int HexagonInstrInfo::getMinValue(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   unsigned isSigned = (F >> HexagonII::ExtentSignedPos)
                     & HexagonII::ExtentSignedMask;
   unsigned bits =  (F >> HexagonII::ExtentBitsPos)
@@ -3995,22 +4062,22 @@ int HexagonInstrInfo::getMinValue(const MachineInstr *MI) const {
 
 
 // Returns opcode of the non-extended equivalent instruction.
-short HexagonInstrInfo::getNonExtOpcode(const MachineInstr *MI) const {
+short HexagonInstrInfo::getNonExtOpcode(const MachineInstr &MI) const {
   // Check if the instruction has a register form that uses register in place
   // of the extended operand, if so return that as the non-extended form.
-  short NonExtOpcode = Hexagon::getRegForm(MI->getOpcode());
+  short NonExtOpcode = Hexagon::getRegForm(MI.getOpcode());
     if (NonExtOpcode >= 0)
       return NonExtOpcode;
 
-  if (MI->getDesc().mayLoad() || MI->getDesc().mayStore()) {
+  if (MI.getDesc().mayLoad() || MI.getDesc().mayStore()) {
     // Check addressing mode and retrieve non-ext equivalent instruction.
     switch (getAddrMode(MI)) {
     case HexagonII::Absolute :
-      return Hexagon::getBaseWithImmOffset(MI->getOpcode());
+      return Hexagon::getBaseWithImmOffset(MI.getOpcode());
     case HexagonII::BaseImmOffset :
-      return Hexagon::getBaseWithRegOffset(MI->getOpcode());
+      return Hexagon::getBaseWithRegOffset(MI.getOpcode());
     case HexagonII::BaseLongOffset:
-      return Hexagon::getRegShlForm(MI->getOpcode());
+      return Hexagon::getRegShlForm(MI.getOpcode());
 
     default:
       return -1;
@@ -4026,8 +4093,8 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
     return false;
   assert(Cond.size() == 2);
   if (isNewValueJump(Cond[0].getImm()) || Cond[1].isMBB()) {
-     DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
-     return false;
+    DEBUG(dbgs() << "No predregs for new-value jumps/endloop");
+    return false;
   }
   PredReg = Cond[1].getReg();
   PredRegPos = 1;
@@ -4041,13 +4108,13 @@ bool HexagonInstrInfo::getPredReg(ArrayRef<MachineOperand> Cond,
 }
 
 
-short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr *MI) const {
-  return Hexagon::getRealHWInstr(MI->getOpcode(), Hexagon::InstrType_Pseudo);
+short HexagonInstrInfo::getPseudoInstrPair(const MachineInstr &MI) const {
+  return Hexagon::getRealHWInstr(MI.getOpcode(), Hexagon::InstrType_Pseudo);
 }
 
 
-short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
-  return Hexagon::getRegForm(MI->getOpcode());
+short HexagonInstrInfo::getRegForm(const MachineInstr &MI) const {
+  return Hexagon::getRegForm(MI.getOpcode());
 }
 
 
@@ -4055,11 +4122,11 @@ short HexagonInstrInfo::getRegForm(const MachineInstr *MI) const {
 // Hexagon instructions are fixed length, 4 bytes, unless they
 // use a constant extender, which requires another 4 bytes.
 // For debug instructions and prolog labels, return 0.
-unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
-  if (MI->isDebugValue() || MI->isPosition())
+unsigned HexagonInstrInfo::getSize(const MachineInstr &MI) const {
+  if (MI.isDebugValue() || MI.isPosition())
     return 0;
 
-  unsigned Size = MI->getDesc().getSize();
+  unsigned Size = MI.getDesc().getSize();
   if (!Size)
     // Assume the default insn size in case it cannot be determined
     // for whatever reason.
@@ -4069,20 +4136,20 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
     Size += HEXAGON_INSTR_SIZE;
 
   // Try and compute number of instructions in asm.
-  if (BranchRelaxAsmLarge && MI->getOpcode() == Hexagon::INLINEASM) {
-    const MachineBasicBlock &MBB = *MI->getParent();
+  if (BranchRelaxAsmLarge && MI.getOpcode() == Hexagon::INLINEASM) {
+    const MachineBasicBlock &MBB = *MI.getParent();
     const MachineFunction *MF = MBB.getParent();
     const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
 
     // Count the number of register definitions to find the asm string.
     unsigned NumDefs = 0;
-    for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
+    for (; MI.getOperand(NumDefs).isReg() && MI.getOperand(NumDefs).isDef();
          ++NumDefs)
-      assert(NumDefs != MI->getNumOperands()-2 && "No asm string?");
+      assert(NumDefs != MI.getNumOperands()-2 && "No asm string?");
 
-    assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
+    assert(MI.getOperand(NumDefs).isSymbol() && "No asm string?");
     // Disassemble the AsmStr and approximate number of instructions.
-    const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+    const char *AsmStr = MI.getOperand(NumDefs).getSymbolName();
     Size = getInlineAsmLength(AsmStr, *MAI);
   }
 
@@ -4090,16 +4157,16 @@ unsigned HexagonInstrInfo::getSize(const MachineInstr *MI) const {
 }
 
 
-uint64_t HexagonInstrInfo::getType(const MachineInstr* MI) const {
-  const uint64_t F = MI->getDesc().TSFlags;
+uint64_t HexagonInstrInfo::getType(const MachineInstr &MI) const {
+  const uint64_t F = MI.getDesc().TSFlags;
   return (F >> HexagonII::TypePos) & HexagonII::TypeMask;
 }
 
 
-unsigned HexagonInstrInfo::getUnits(const MachineInstr* MI) const {
-  const TargetSubtargetInfo &ST = MI->getParent()->getParent()->getSubtarget();
+unsigned HexagonInstrInfo::getUnits(const MachineInstr &MI) const {
+  const TargetSubtargetInfo &ST = MI.getParent()->getParent()->getSubtarget();
   const InstrItineraryData &II = *ST.getInstrItineraryData();
-  const InstrStage &IS = *II.beginStage(MI->getDesc().getSchedClass());
+  const InstrStage &IS = *II.beginStage(MI.getDesc().getSchedClass());
 
   return IS.getUnits();
 }
@@ -4122,18 +4189,18 @@ unsigned HexagonInstrInfo::nonDbgBundleSize(
   assert(BundleHead->isBundle() && "Not a bundle header");
   auto MII = BundleHead.getInstrIterator();
   // Skip the bundle header.
-  return nonDbgMICount(++MII, getBundleEnd(*BundleHead));
+  return nonDbgMICount(++MII, getBundleEnd(BundleHead.getInstrIterator()));
 }
 
 
 /// immediateExtend - Changes the instruction in place to one using an immediate
 /// extender.
-void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
+void HexagonInstrInfo::immediateExtend(MachineInstr &MI) const {
   assert((isExtendable(MI)||isConstExtended(MI)) &&
                                "Instruction must be extendable");
   // Find which operand is extendable.
   short ExtOpNum = getCExtOpNum(MI);
-  MachineOperand &MO = MI->getOperand(ExtOpNum);
+  MachineOperand &MO = MI.getOperand(ExtOpNum);
   // This needs to be something we understand.
   assert((MO.isMBB() || MO.isImm()) &&
          "Branch with unknown extendable field type");
@@ -4143,22 +4210,22 @@ void HexagonInstrInfo::immediateExtend(MachineInstr *MI) const {
 
 
 bool HexagonInstrInfo::invertAndChangeJumpTarget(
-      MachineInstr* MI, MachineBasicBlock* NewTarget) const {
+      MachineInstr &MI, MachineBasicBlock *NewTarget) const {
   DEBUG(dbgs() << "\n[invertAndChangeJumpTarget] to BB#"
-               << NewTarget->getNumber(); MI->dump(););
-  assert(MI->isBranch());
-  unsigned NewOpcode = getInvertedPredicatedOpcode(MI->getOpcode());
-  int TargetPos = MI->getNumOperands() - 1;
+               << NewTarget->getNumber(); MI.dump(););
+  assert(MI.isBranch());
+  unsigned NewOpcode = getInvertedPredicatedOpcode(MI.getOpcode());
+  int TargetPos = MI.getNumOperands() - 1;
   // In general branch target is the last operand,
   // but some implicit defs added at the end might change it.
-  while ((TargetPos > -1) && !MI->getOperand(TargetPos).isMBB())
+  while ((TargetPos > -1) && !MI.getOperand(TargetPos).isMBB())
     --TargetPos;
-  assert((TargetPos >= 0) && MI->getOperand(TargetPos).isMBB());
-  MI->getOperand(TargetPos).setMBB(NewTarget);
-  if (EnableBranchPrediction && isPredicatedNew(*MI)) {
+  assert((TargetPos >= 0) && MI.getOperand(TargetPos).isMBB());
+  MI.getOperand(TargetPos).setMBB(NewTarget);
+  if (EnableBranchPrediction && isPredicatedNew(MI)) {
     NewOpcode = reversePrediction(NewOpcode);
   }
-  MI->setDesc(get(NewOpcode));
+  MI.setDesc(get(NewOpcode));
   return true;
 }
 
@@ -4168,13 +4235,12 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
   MachineFunction::iterator A = MF.begin();
   MachineBasicBlock &B = *A;
   MachineBasicBlock::iterator I = B.begin();
-  MachineInstr *MI = &*I;
-  DebugLoc DL = MI->getDebugLoc();
+  DebugLoc DL = I->getDebugLoc();
   MachineInstr *NewMI;
 
   for (unsigned insn = TargetOpcode::GENERIC_OP_END+1;
        insn < Hexagon::INSTRUCTION_LIST_END; ++insn) {
-    NewMI = BuildMI(B, MI, DL, get(insn));
+    NewMI = BuildMI(B, I, DL, get(insn));
     DEBUG(dbgs() << "\n" << getName(NewMI->getOpcode()) <<
           "  Class: " << NewMI->getDesc().getSchedClass());
     NewMI->eraseFromParent();
@@ -4186,9 +4252,9 @@ void HexagonInstrInfo::genAllInsnTimingClasses(MachineFunction &MF) const {
 // inverts the predication logic.
 // p -> NotP
 // NotP -> P
-bool HexagonInstrInfo::reversePredSense(MachineInstr* MI) const {
-  DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI->dump());
-  MI->setDesc(get(getInvertedPredicatedOpcode(MI->getOpcode())));
+bool HexagonInstrInfo::reversePredSense(MachineInstr &MI) const {
+  DEBUG(dbgs() << "\nTrying to reverse pred. sense of:"; MI.dump());
+  MI.setDesc(get(getInvertedPredicatedOpcode(MI.getOpcode())));
   return true;
 }
 
@@ -4212,6 +4278,6 @@ bool HexagonInstrInfo::validateBranchCond(const ArrayRef<MachineOperand> &Cond)
 }
 
 
-short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr *MI) const {
-  return Hexagon::xformRegToImmOffset(MI->getOpcode());
+short HexagonInstrInfo::xformRegToImmOffset(const MachineInstr &MI) const {
+  return Hexagon::xformRegToImmOffset(MI.getOpcode());
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index 66b6883c955b..2d184d1484e9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -73,7 +73,7 @@ public:
   ///    condition.  These operands can be passed to other TargetInstrInfo
   ///    methods to create new branches.
   ///
-  /// Note that RemoveBranch and InsertBranch must be implemented to support
+  /// Note that removeBranch and insertBranch must be implemented to support
   /// cases where this method returns success.
   ///
   /// If AllowModify is true, then this routine is allowed to modify the basic
@@ -87,7 +87,8 @@ public:
   /// Remove the branching code at the end of the specific MBB.
   /// This is only invoked in cases where AnalyzeBranch returns success. It
   /// returns the number of instructions that were removed.
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
 
   /// Insert branch code into the end of the specified MachineBasicBlock.
   /// The operands to this method are the same as those
@@ -99,9 +100,26 @@ public:
   /// cases where AnalyzeBranch doesn't apply because there was no original
   /// branch to analyze.  At least this much must be implemented, else tail
   /// merging needs to be disabled.
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
+
+  /// Analyze the loop code, return true if it cannot be understood. Upon
+  /// success, this function returns false and returns information about the
+  /// induction variable and compare instruction used at the end.
+  bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst,
+                   MachineInstr *&CmpInst) const override;
+
+  /// Generate code to reduce the loop iteration by one and check if the loop is
+  /// finished.  Return the value/register of the the new loop count.  We need
+  /// this function when peeling off one or more iterations of a loop. This
+  /// function assumes the nth iteration is peeled first.
+  unsigned reduceLoopCount(MachineBasicBlock &MBB,
+                           MachineInstr *IndVar, MachineInstr &Cmp,
+                           SmallVectorImpl<MachineOperand> &Cond,
+                           SmallVectorImpl<MachineInstr *> &PrevInsts,
+                           unsigned Iter, unsigned MaxIter) const override;
 
   /// Return true if it's profitable to predicate
   /// instructions with accumulated instruction latency of "NumCycles"
@@ -172,9 +190,14 @@ public:
   /// anything was changed.
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
+  /// \brief Get the base register and byte offset of a load/store instr.
+  bool getMemOpBaseRegImmOfs(MachineInstr &LdSt, unsigned &BaseReg,
+                             int64_t &Offset,
+                             const TargetRegisterInfo *TRI) const override;
+
   /// Reverses the branch condition of the specified condition list,
   /// returning false on success and true if it cannot be reversed.
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
+  bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
         const override;
 
   /// Insert a noop into the instruction stream at the specified point.
@@ -184,6 +207,9 @@ public:
   /// Returns true if the instruction is already predicated.
   bool isPredicated(const MachineInstr &MI) const override;
 
+  /// Return true for post-incremented instructions.
+  bool isPostIncrement(const MachineInstr &MI) const override;
+
   /// Convert the instruction into a predicated instruction.
   /// It returns true if the operation was successful.
   bool PredicateInstruction(MachineInstr &MI,
@@ -248,6 +274,16 @@ public:
   areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
                                   AliasAnalysis *AA = nullptr) const override;
 
+  /// For instructions with a base and offset, return the position of the
+  /// base register and offset operands.
+  bool getBaseAndOffsetPosition(const MachineInstr &MI, unsigned &BasePos,
+                                unsigned &OffsetPos) const override;
+
+  /// If the instruction is an increment of a constant value, return the amount.
+  bool getIncrementValue(const MachineInstr &MI, int &Value) const override;
+
+  bool isTailCall(const MachineInstr &MI) const override;
+
   /// HexagonInstrInfo specifics.
   ///
 
@@ -255,49 +291,48 @@ public:
 
   unsigned createVR(MachineFunction* MF, MVT VT) const;
 
-  bool isAbsoluteSet(const MachineInstr* MI) const;
-  bool isAccumulator(const MachineInstr *MI) const;
-  bool isComplex(const MachineInstr *MI) const;
-  bool isCompoundBranchInstr(const MachineInstr *MI) const;
-  bool isCondInst(const MachineInstr *MI) const;
-  bool isConditionalALU32 (const MachineInstr* MI) const;
-  bool isConditionalLoad(const MachineInstr* MI) const;
-  bool isConditionalStore(const MachineInstr* MI) const;
-  bool isConditionalTransfer(const MachineInstr* MI) const;
-  bool isConstExtended(const MachineInstr *MI) const;
-  bool isDeallocRet(const MachineInstr *MI) const;
-  bool isDependent(const MachineInstr *ProdMI,
-                   const MachineInstr *ConsMI) const;
-  bool isDotCurInst(const MachineInstr* MI) const;
-  bool isDotNewInst(const MachineInstr* MI) const;
-  bool isDuplexPair(const MachineInstr *MIa, const MachineInstr *MIb) const;
-  bool isEarlySourceInstr(const MachineInstr *MI) const;
+  bool isAbsoluteSet(const MachineInstr &MI) const;
+  bool isAccumulator(const MachineInstr &MI) const;
+  bool isComplex(const MachineInstr &MI) const;
+  bool isCompoundBranchInstr(const MachineInstr &MI) const;
+  bool isCondInst(const MachineInstr &MI) const;
+  bool isConditionalALU32 (const MachineInstr &MI) const;
+  bool isConditionalLoad(const MachineInstr &MI) const;
+  bool isConditionalStore(const MachineInstr &MI) const;
+  bool isConditionalTransfer(const MachineInstr &MI) const;
+  bool isConstExtended(const MachineInstr &MI) const;
+  bool isDeallocRet(const MachineInstr &MI) const;
+  bool isDependent(const MachineInstr &ProdMI,
+                   const MachineInstr &ConsMI) const;
+  bool isDotCurInst(const MachineInstr &MI) const;
+  bool isDotNewInst(const MachineInstr &MI) const;
+  bool isDuplexPair(const MachineInstr &MIa, const MachineInstr &MIb) const;
+  bool isEarlySourceInstr(const MachineInstr &MI) const;
   bool isEndLoopN(unsigned Opcode) const;
   bool isExpr(unsigned OpType) const;
-  bool isExtendable(const MachineInstr* MI) const;
-  bool isExtended(const MachineInstr* MI) const;
-  bool isFloat(const MachineInstr *MI) const;
-  bool isHVXMemWithAIndirect(const MachineInstr *I,
-                             const MachineInstr *J) const;
-  bool isIndirectCall(const MachineInstr *MI) const;
-  bool isIndirectL4Return(const MachineInstr *MI) const;
-  bool isJumpR(const MachineInstr *MI) const;
-  bool isJumpWithinBranchRange(const MachineInstr *MI, unsigned offset) const;
-  bool isLateInstrFeedsEarlyInstr(const MachineInstr *LRMI,
-                                  const MachineInstr *ESMI) const;
-  bool isLateResultInstr(const MachineInstr *MI) const;
-  bool isLateSourceInstr(const MachineInstr *MI) const;
-  bool isLoopN(const MachineInstr *MI) const;
-  bool isMemOp(const MachineInstr *MI) const;
-  bool isNewValue(const MachineInstr* MI) const;
+  bool isExtendable(const MachineInstr &MI) const;
+  bool isExtended(const MachineInstr &MI) const;
+  bool isFloat(const MachineInstr &MI) const;
+  bool isHVXMemWithAIndirect(const MachineInstr &I,
+                             const MachineInstr &J) const;
+  bool isIndirectCall(const MachineInstr &MI) const;
+  bool isIndirectL4Return(const MachineInstr &MI) const;
+  bool isJumpR(const MachineInstr &MI) const;
+  bool isJumpWithinBranchRange(const MachineInstr &MI, unsigned offset) const;
+  bool isLateInstrFeedsEarlyInstr(const MachineInstr &LRMI,
+                                  const MachineInstr &ESMI) const;
+  bool isLateResultInstr(const MachineInstr &MI) const;
+  bool isLateSourceInstr(const MachineInstr &MI) const;
+  bool isLoopN(const MachineInstr &MI) const;
+  bool isMemOp(const MachineInstr &MI) const;
+  bool isNewValue(const MachineInstr &MI) const;
   bool isNewValue(unsigned Opcode) const;
-  bool isNewValueInst(const MachineInstr* MI) const;
-  bool isNewValueJump(const MachineInstr* MI) const;
+  bool isNewValueInst(const MachineInstr &MI) const;
+  bool isNewValueJump(const MachineInstr &MI) const;
   bool isNewValueJump(unsigned Opcode) const;
-  bool isNewValueStore(const MachineInstr* MI) const;
+  bool isNewValueStore(const MachineInstr &MI) const;
   bool isNewValueStore(unsigned Opcode) const;
-  bool isOperandExtended(const MachineInstr *MI, unsigned OperandNum) const;
-  bool isPostIncrement(const MachineInstr* MI) const;
+  bool isOperandExtended(const MachineInstr &MI, unsigned OperandNum) const;
   bool isPredicatedNew(const MachineInstr &MI) const;
   bool isPredicatedNew(unsigned Opcode) const;
   bool isPredicatedTrue(const MachineInstr &MI) const;
@@ -305,87 +340,85 @@ public:
   bool isPredicated(unsigned Opcode) const;
   bool isPredicateLate(unsigned Opcode) const;
   bool isPredictedTaken(unsigned Opcode) const;
-  bool isSaveCalleeSavedRegsCall(const MachineInstr *MI) const;
+  bool isSaveCalleeSavedRegsCall(const MachineInstr &MI) const;
   bool isSignExtendingLoad(const MachineInstr &MI) const;
-  bool isSolo(const MachineInstr* MI) const;
-  bool isSpillPredRegOp(const MachineInstr *MI) const;
-  bool isTailCall(const MachineInstr *MI) const;
-  bool isTC1(const MachineInstr *MI) const;
-  bool isTC2(const MachineInstr *MI) const;
-  bool isTC2Early(const MachineInstr *MI) const;
-  bool isTC4x(const MachineInstr *MI) const;
-  bool isToBeScheduledASAP(const MachineInstr *MI1,
-                           const MachineInstr *MI2) const;
-  bool isV60VectorInstruction(const MachineInstr *MI) const;
+  bool isSolo(const MachineInstr &MI) const;
+  bool isSpillPredRegOp(const MachineInstr &MI) const;
+  bool isTC1(const MachineInstr &MI) const;
+  bool isTC2(const MachineInstr &MI) const;
+  bool isTC2Early(const MachineInstr &MI) const;
+  bool isTC4x(const MachineInstr &MI) const;
+  bool isToBeScheduledASAP(const MachineInstr &MI1,
+                           const MachineInstr &MI2) const;
+  bool isV60VectorInstruction(const MachineInstr &MI) const;
   bool isValidAutoIncImm(const EVT VT, const int Offset) const;
   bool isValidOffset(unsigned Opcode, int Offset, bool Extend = true) const;
-  bool isVecAcc(const MachineInstr *MI) const;
-  bool isVecALU(const MachineInstr *MI) const;
-  bool isVecUsableNextPacket(const MachineInstr *ProdMI,
-                             const MachineInstr *ConsMI) const;
+  bool isVecAcc(const MachineInstr &MI) const;
+  bool isVecALU(const MachineInstr &MI) const;
+  bool isVecUsableNextPacket(const MachineInstr &ProdMI,
+                             const MachineInstr &ConsMI) const;
   bool isZeroExtendingLoad(const MachineInstr &MI) const;
 
-  bool addLatencyToSchedule(const MachineInstr *MI1,
-                            const MachineInstr *MI2) const;
-  bool canExecuteInBundle(const MachineInstr *First,
-                          const MachineInstr *Second) const;
+  bool addLatencyToSchedule(const MachineInstr &MI1,
+                            const MachineInstr &MI2) const;
+  bool canExecuteInBundle(const MachineInstr &First,
+                          const MachineInstr &Second) const;
+  bool doesNotReturn(const MachineInstr &CallMI) const;
   bool hasEHLabel(const MachineBasicBlock *B) const;
-  bool hasNonExtEquivalent(const MachineInstr *MI) const;
-  bool hasPseudoInstrPair(const MachineInstr *MI) const;
+  bool hasNonExtEquivalent(const MachineInstr &MI) const;
+  bool hasPseudoInstrPair(const MachineInstr &MI) const;
   bool hasUncondBranch(const MachineBasicBlock *B) const;
-  bool mayBeCurLoad(const MachineInstr* MI) const;
-  bool mayBeNewStore(const MachineInstr* MI) const;
-  bool producesStall(const MachineInstr *ProdMI,
-                     const MachineInstr *ConsMI) const;
-  bool producesStall(const MachineInstr *MI,
+  bool mayBeCurLoad(const MachineInstr &MI) const;
+  bool mayBeNewStore(const MachineInstr &MI) const;
+  bool producesStall(const MachineInstr &ProdMI,
+                     const MachineInstr &ConsMI) const;
+  bool producesStall(const MachineInstr &MI,
                      MachineBasicBlock::const_instr_iterator MII) const;
-  bool predCanBeUsedAsDotNew(const MachineInstr *MI, unsigned PredReg) const;
+  bool predCanBeUsedAsDotNew(const MachineInstr &MI, unsigned PredReg) const;
   bool PredOpcodeHasJMP_c(unsigned Opcode) const;
   bool predOpcodeHasNot(ArrayRef<MachineOperand> Cond) const;
 
 
-  short getAbsoluteForm(const MachineInstr *MI) const;
-  unsigned getAddrMode(const MachineInstr* MI) const;
-  unsigned getBaseAndOffset(const MachineInstr *MI, int &Offset,
+  short getAbsoluteForm(const MachineInstr &MI) const;
+  unsigned getAddrMode(const MachineInstr &MI) const;
+  unsigned getBaseAndOffset(const MachineInstr &MI, int &Offset,
                             unsigned &AccessSize) const;
-  bool getBaseAndOffsetPosition(const MachineInstr *MI, unsigned &BasePos,
-                                unsigned &OffsetPos) const;
   short getBaseWithLongOffset(short Opcode) const;
-  short getBaseWithLongOffset(const MachineInstr *MI) const;
-  short getBaseWithRegOffset(const MachineInstr *MI) const;
+  short getBaseWithLongOffset(const MachineInstr &MI) const;
+  short getBaseWithRegOffset(const MachineInstr &MI) const;
   SmallVector<MachineInstr*,2> getBranchingInstrs(MachineBasicBlock& MBB) const;
-  unsigned getCExtOpNum(const MachineInstr *MI) const;
+  unsigned getCExtOpNum(const MachineInstr &MI) const;
   HexagonII::CompoundGroup
-  getCompoundCandidateGroup(const MachineInstr *MI) const;
-  unsigned getCompoundOpcode(const MachineInstr *GA,
-                             const MachineInstr *GB) const;
+  getCompoundCandidateGroup(const MachineInstr &MI) const;
+  unsigned getCompoundOpcode(const MachineInstr &GA,
+                             const MachineInstr &GB) const;
   int getCondOpcode(int Opc, bool sense) const;
-  int getDotCurOp(const MachineInstr* MI) const;
-  int getDotNewOp(const MachineInstr* MI) const;
-  int getDotNewPredJumpOp(const MachineInstr *MI,
+  int getDotCurOp(const MachineInstr &MI) const;
+  int getDotNewOp(const MachineInstr &MI) const;
+  int getDotNewPredJumpOp(const MachineInstr &MI,
                           const MachineBranchProbabilityInfo *MBPI) const;
-  int getDotNewPredOp(const MachineInstr *MI,
+  int getDotNewPredOp(const MachineInstr &MI,
                       const MachineBranchProbabilityInfo *MBPI) const;
   int getDotOldOp(const int opc) const;
-  HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr *MI)
+  HexagonII::SubInstructionGroup getDuplexCandidateGroup(const MachineInstr &MI)
                                                          const;
-  short getEquivalentHWInstr(const MachineInstr *MI) const;
+  short getEquivalentHWInstr(const MachineInstr &MI) const;
   MachineInstr *getFirstNonDbgInst(MachineBasicBlock *BB) const;
   unsigned getInstrTimingClassLatency(const InstrItineraryData *ItinData,
-                                      const MachineInstr *MI) const;
+                                      const MachineInstr &MI) const;
   bool getInvertedPredSense(SmallVectorImpl<MachineOperand> &Cond) const;
   unsigned getInvertedPredicatedOpcode(const int Opc) const;
-  int getMaxValue(const MachineInstr *MI) const;
-  unsigned getMemAccessSize(const MachineInstr* MI) const;
-  int getMinValue(const MachineInstr *MI) const;
-  short getNonExtOpcode(const MachineInstr *MI) const;
+  int getMaxValue(const MachineInstr &MI) const;
+  unsigned getMemAccessSize(const MachineInstr &MI) const;
+  int getMinValue(const MachineInstr &MI) const;
+  short getNonExtOpcode(const MachineInstr &MI) const;
   bool getPredReg(ArrayRef<MachineOperand> Cond, unsigned &PredReg,
                   unsigned &PredRegPos, unsigned &PredRegFlags) const;
-  short getPseudoInstrPair(const MachineInstr *MI) const;
-  short getRegForm(const MachineInstr *MI) const;
-  unsigned getSize(const MachineInstr *MI) const;
-  uint64_t getType(const MachineInstr* MI) const;
-  unsigned getUnits(const MachineInstr* MI) const;
+  short getPseudoInstrPair(const MachineInstr &MI) const;
+  short getRegForm(const MachineInstr &MI) const;
+  unsigned getSize(const MachineInstr &MI) const;
+  uint64_t getType(const MachineInstr &MI) const;
+  unsigned getUnits(const MachineInstr &MI) const;
   unsigned getValidSubTargets(const unsigned Opcode) const;
 
 
@@ -395,14 +428,14 @@ public:
   unsigned nonDbgBundleSize(MachineBasicBlock::const_iterator BundleHead) const;
 
 
-  void immediateExtend(MachineInstr *MI) const;
-  bool invertAndChangeJumpTarget(MachineInstr* MI,
+  void immediateExtend(MachineInstr &MI) const;
+  bool invertAndChangeJumpTarget(MachineInstr &MI,
                                  MachineBasicBlock* NewTarget) const;
   void genAllInsnTimingClasses(MachineFunction &MF) const;
-  bool reversePredSense(MachineInstr* MI) const;
+  bool reversePredSense(MachineInstr &MI) const;
   unsigned reversePrediction(unsigned Opcode) const;
   bool validateBranchCond(const ArrayRef<MachineOperand> &Cond) const;
-  short xformRegToImmOffset(const MachineInstr *MI) const;
+  short xformRegToImmOffset(const MachineInstr &MI) const;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
index 74dc5ac9a3ad..c5719ad5b6d8 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.td
@@ -14,54 +14,6 @@
 include "HexagonInstrFormats.td"
 include "HexagonOperands.td"
 include "HexagonInstrEnc.td"
-// Pattern fragment that combines the value type and the register class
-// into a single parameter.
-// The pat frags in the definitions below need to have a named register,
-// otherwise i32 will be assumed regardless of the register class. The
-// name of the register does not matter.
-def I1  : PatLeaf<(i1 PredRegs:$R)>;
-def I32 : PatLeaf<(i32 IntRegs:$R)>;
-def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
-def F32 : PatLeaf<(f32 IntRegs:$R)>;
-def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
-
-// Pattern fragments to extract the low and high subregisters from a
-// 64-bit value.
-def LoReg: OutPatFrag<(ops node:$Rs),
-                      (EXTRACT_SUBREG (i64 $Rs), subreg_loreg)>;
-def HiReg: OutPatFrag<(ops node:$Rs),
-                      (EXTRACT_SUBREG (i64 $Rs), subreg_hireg)>;
-
-def orisadd: PatFrag<(ops node:$Addr, node:$off),
-    (or node:$Addr, node:$off), [{ return orIsAdd(N); }]>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_SIGNED : SDNodeXForm<imm, [{
-   // Return the byte immediate const-1 as an SDNode.
-   int32_t imm = N->getSExtValue();
-   return XformSToSM1Imm(imm, SDLoc(N));
-}]>;
-
-// SDNode for converting immediate C to C-2.
-def DEC2_CONST_SIGNED : SDNodeXForm<imm, [{
-   // Return the byte immediate const-2 as an SDNode.
-   int32_t imm = N->getSExtValue();
-   return XformSToSM2Imm(imm, SDLoc(N));
-}]>;
-
-// SDNode for converting immediate C to C-3.
-def DEC3_CONST_SIGNED : SDNodeXForm<imm, [{
-   // Return the byte immediate const-3 as an SDNode.
-   int32_t imm = N->getSExtValue();
-   return XformSToSM3Imm(imm, SDLoc(N));
-}]>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_UNSIGNED : SDNodeXForm<imm, [{
-   // Return the byte immediate const-1 as an SDNode.
-   uint32_t imm = N->getZExtValue();
-   return XformUToUM1Imm(imm, SDLoc(N));
-}]>;
 
 //===----------------------------------------------------------------------===//
 // Compare
@@ -92,32 +44,15 @@ class T_CMP <string mnemonic, bits<2> MajOp, bit isNot, Operand ImmOp>
     let Inst{1-0}   = dst;
   }
 
-def C2_cmpeqi   : T_CMP <"cmp.eq",  0b00, 0, s10Ext>;
-def C2_cmpgti   : T_CMP <"cmp.gt",  0b01, 0, s10Ext>;
-def C2_cmpgtui  : T_CMP <"cmp.gtu", 0b10, 0, u9Ext>;
-
-class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
-  : Pat<(i1 (OpNode (i32 IntRegs:$src1), ImmPred:$src2)),
-        (MI IntRegs:$src1, ImmPred:$src2)>;
-
-def : T_CMP_pat <C2_cmpeqi,  seteq,  s10ImmPred>;
-def : T_CMP_pat <C2_cmpgti,  setgt,  s10ImmPred>;
-def : T_CMP_pat <C2_cmpgtui, setugt, u9ImmPred>;
+def C2_cmpeqi   : T_CMP <"cmp.eq",  0b00, 0, s10_0Ext>;
+def C2_cmpgti   : T_CMP <"cmp.gt",  0b01, 0, s10_0Ext>;
+def C2_cmpgtui  : T_CMP <"cmp.gtu", 0b10, 0, u9_0Ext>;
 
 //===----------------------------------------------------------------------===//
 // ALU32/ALU +
 //===----------------------------------------------------------------------===//
 // Add.
 
-def SDT_Int32Leaf  : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>;
-def SDT_Int32Unary : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
-  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
-
-def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
-def HexagonPACKHL  : SDNode<"HexagonISD::PACKHL",  SDTHexagonI64I32I32>;
-
 let hasSideEffects = 0, hasNewValue = 1, InputType = "reg" in
 class T_ALU32_3op<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit OpsRev,
                   bit IsComm>
@@ -227,17 +162,6 @@ defm or  : T_ALU32_3op_A2<"or",  0b001, 0b001, 0, 1>;
 defm sub : T_ALU32_3op_A2<"sub", 0b011, 0b001, 1, 0>;
 defm xor : T_ALU32_3op_A2<"xor", 0b001, 0b011, 0, 1>;
 
-// Pats for instruction selection.
-class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
-  : Pat<(ResT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
-        (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
-
-def: BinOp32_pat<add, A2_add, i32>;
-def: BinOp32_pat<and, A2_and, i32>;
-def: BinOp32_pat<or,  A2_or,  i32>;
-def: BinOp32_pat<sub, A2_sub, i32>;
-def: BinOp32_pat<xor, A2_xor, i32>;
-
 // A few special cases producing register pairs:
 let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
   def S2_packhl    : T_ALU32_3op  <"packhl",  0b101, 0b100, 0, 0>;
@@ -252,9 +176,6 @@ let OutOperandList = (outs DoubleRegs:$Rd), hasNewValue = 0 in {
   def C2_ccombinewnewf : T_ALU32_3op_pred<"combine", 0b101, 0b000, 0, 1, 1>;
 }
 
-def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
-def: BinOp32_pat<HexagonPACKHL,  S2_packhl,   i64>;
-
 let hasSideEffects = 0, hasNewValue = 1, isCompare = 1, InputType = "reg"  in
 class T_ALU32_3op_cmp<string mnemonic, bits<2> MinOp, bit IsNeg, bit IsComm>
   : ALU32_rr<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -282,23 +203,6 @@ let Itinerary = ALU32_3op_tc_2early_SLOT0123 in {
   def C2_cmpgtu  : T_ALU32_3op_cmp< "cmp.gtu", 0b11, 0, 0>;
 }
 
-// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
-// that reverse the order of the operands.
-class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
-
-// Pats for compares. They use PatFrags as operands, not SDNodes,
-// since seteq/setgt/etc. are defined as ParFrags.
-class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
-  : Pat<(VT (Op (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
-        (VT (MI IntRegs:$Rs, IntRegs:$Rt))>;
-
-def: T_cmp32_rr_pat<C2_cmpeq,  seteq, i1>;
-def: T_cmp32_rr_pat<C2_cmpgt,  setgt, i1>;
-def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
-
-def: T_cmp32_rr_pat<C2_cmpgt,  RevCmp<setlt>,  i1>;
-def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
-
 let CextOpcode = "MUX", InputType = "reg", hasNewValue = 1 in
 def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
                      (ins PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt),
@@ -320,9 +224,6 @@ def C2_mux: ALU32_rr<(outs IntRegs:$Rd),
   let Inst{4-0} = Rd;
 }
 
-def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
-         (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
-
 // Combines the two immediates into a double register.
 // Increase complexity to make it greater than any complexity of a combine
 // that involves a register.
@@ -330,10 +231,9 @@ def: Pat<(i32 (select (i1 PredRegs:$Pu), (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))),
 let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
     isExtentSigned = 1, isExtendable = 1, opExtentBits = 8, opExtendable = 1,
     AddedComplexity = 75 in
-def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
+def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8_0Ext:$s8, s8_0Imm:$S8),
   "$Rdd = combine(#$s8, #$S8)",
-  [(set (i64 DoubleRegs:$Rdd),
-        (i64 (HexagonCOMBINE(i32 s32ImmPred:$s8), (i32 s8ImmPred:$S8))))]> {
+  []> {
     bits<5> Rdd;
     bits<8> s8;
     bits<8> S8;
@@ -352,7 +252,7 @@ def A2_combineii: ALU32Inst <(outs DoubleRegs:$Rdd), (ins s8Ext:$s8, s8Imm:$S8),
 let hasNewValue = 1, hasSideEffects = 0 in
 class T_Addri_Pred <bit PredNot, bit PredNew>
   : ALU32_ri <(outs IntRegs:$Rd),
-              (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+              (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
   !if(PredNot, "if (!$Pu", "if ($Pu")#!if(PredNew,".new) $Rd = ",
   ") $Rd = ")#"add($Rs, #$s8)"> {
     bits<5> Rd;
@@ -406,8 +306,8 @@ multiclass Addri_Pred<string mnemonic, bit PredNot> {
 let isExtendable = 1, isExtentSigned = 1, InputType = "imm" in
 multiclass Addri_base<string mnemonic, SDNode OpNode> {
   let CextOpcode = mnemonic, BaseOpcode = mnemonic#_ri in {
-    let opExtendable = 2, opExtentBits = 16, isPredicable = 1 in
-    def A2_#NAME : T_Addri<s16Ext>;
+    let opExtendable = 2, opExtentBits = 16, isPredicable = 1, isAdd = 1 in
+    def A2_#NAME : T_Addri<s16_0Ext>;
 
     let opExtendable = 3, opExtentBits = 8, isPredicated = 1 in {
       defm A2_p#NAME#t : Addri_Pred<mnemonic, 0>;
@@ -418,9 +318,6 @@ multiclass Addri_base<string mnemonic, SDNode OpNode> {
 
 defm addi : Addri_base<"add", add>, ImmRegRel, PredNewRel;
 
-def: Pat<(i32 (add I32:$Rs, s32ImmPred:$s16)),
-         (i32 (A2_addi I32:$Rs, imm:$s16))>;
-
 let hasNewValue = 1, hasSideEffects = 0, isPseudo = 1 in
 def A2_iconst
   : ALU32_ri <(outs IntRegs:$Rd),
@@ -436,9 +333,9 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 10,
 InputType = "imm", hasNewValue = 1 in
 class T_ALU32ri_logical <string mnemonic, SDNode OpNode, bits<2> MinOp>
   : ALU32_ri <(outs IntRegs:$Rd),
-              (ins IntRegs:$Rs, s10Ext:$s10),
+              (ins IntRegs:$Rs, s10_0Ext:$s10),
   "$Rd = "#mnemonic#"($Rs, #$s10)" ,
-  [(set (i32 IntRegs:$Rd), (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10))]> {
+  []> {
     bits<5> Rd;
     bits<5> Rs;
     bits<10> s10;
@@ -461,7 +358,7 @@ def A2_andir : T_ALU32ri_logical<"and", and, 0b00>, ImmRegRel;
 // Rd32=sub(#s10,Rs32)
 let isExtendable = 1, CextOpcode = "sub", opExtendable = 1, isExtentSigned = 1,
     opExtentBits = 10, InputType = "imm", hasNewValue = 1, hasSideEffects = 0 in
-def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10Ext:$s10, IntRegs:$Rs),
+def A2_subri: ALU32_ri <(outs IntRegs:$Rd), (ins s10_0Ext:$s10, IntRegs:$Rs),
   "$Rd = sub(#$s10, $Rs)", []>, ImmRegRel {
     bits<5> Rd;
     bits<10> s10;
@@ -483,16 +380,9 @@ def A2_nop: ALU32Inst <(outs), (ins), "nop" > {
   let Inst{27-24} = 0b1111;
 }
 
-def: Pat<(sub s32ImmPred:$s10, IntRegs:$Rs),
-         (A2_subri imm:$s10, IntRegs:$Rs)>;
-
-// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
-def: Pat<(not (i32 IntRegs:$src1)),
-         (A2_subri -1, IntRegs:$src1)>;
-
 let hasSideEffects = 0, hasNewValue = 1 in
 class T_tfr16<bit isHi>
-  : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16Imm:$u16),
+  : ALU32Inst <(outs IntRegs:$Rx), (ins IntRegs:$src1, u16_0Imm:$u16),
   "$Rx"#!if(isHi, ".h", ".l")#" = #$u16",
   [], "$src1 = $Rx" > {
     bits<5> Rx;
@@ -601,7 +491,7 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1, opExtentBits = 12,
     isMoveImm = 1, opExtendable = 2, BaseOpcode = "TFRI", CextOpcode = "TFR",
     hasSideEffects = 0, isPredicated = 1, hasNewValue = 1 in
 class T_TFRI_Pred<bit PredNot, bit PredNew>
-  : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12Ext:$s12),
+  : ALU32_ri<(outs IntRegs:$Rd), (ins PredRegs:$Pu, s12_0Ext:$s12),
     "if ("#!if(PredNot,"!","")#"$Pu"#!if(PredNew,".new","")#") $Rd = #$s12",
     [], "", ALU32_2op_tc_1_SLOT0123>, ImmRegRel, PredNewRel {
   let isPredicatedFalse = PredNot;
@@ -630,8 +520,8 @@ let InputType = "imm", isExtendable = 1, isExtentSigned = 1,
     CextOpcode = "TFR", BaseOpcode = "TFRI", hasNewValue = 1, opNewValue = 0,
     isAsCheapAsAMove = 1 , opExtendable = 1, opExtentBits = 16, isMoveImm = 1,
     isPredicated = 0, isPredicable = 1, isReMaterializable = 1 in
-def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16Ext:$s16), "$Rd = #$s16",
-    [(set (i32 IntRegs:$Rd), s32ImmPred:$s16)], "", ALU32_2op_tc_1_SLOT0123>,
+def A2_tfrsi : ALU32Inst<(outs IntRegs:$Rd), (ins s16_0Ext:$s16), "$Rd = #$s16",
+    [], "", ALU32_2op_tc_1_SLOT0123>,
     ImmRegRel, PredRel {
   bits<5> Rd;
   bits<16> s16;
@@ -649,17 +539,17 @@ defm A2_tfrp : TFR64_base<"TFR64">, PredNewRel;
 // Assembler mapped
 let isReMaterializable = 1, isMoveImm = 1, isAsCheapAsAMove = 1,
     isAsmParserOnly = 1 in
-def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8Imm64:$src1),
+def A2_tfrpi : ALU64_rr<(outs DoubleRegs:$dst), (ins s8_0Imm64:$src1),
                       "$dst = #$src1",
-                      [(set (i64 DoubleRegs:$dst), s8Imm64Pred:$src1)]>;
+                      []>;
 
 // TODO: see if this instruction can be deleted..
 let isExtendable = 1, opExtendable = 1, opExtentBits = 6,
     isAsmParserOnly = 1 in {
-def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64Imm:$src1),
+def TFRI64_V4 : ALU64_rr<(outs DoubleRegs:$dst), (ins u64_0Imm:$src1),
                          "$dst = #$src1">;
 def TFRI64_V2_ext : ALU64_rr<(outs DoubleRegs:$dst),
-                             (ins s8Ext:$src1, s8Imm:$src2),
+                             (ins s8_0Ext:$src1, s8_0Imm:$src2),
                              "$dst = combine(##$src1, #$src2)">;
 }
 
@@ -692,27 +582,20 @@ class T_MUX1 <bit MajOp, dag ins, string AsmStr>
 }
 
 let opExtendable = 2 in
-def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8Ext:$s8, IntRegs:$Rs),
+def C2_muxri : T_MUX1<0b1, (ins PredRegs:$Pu, s8_0Ext:$s8, IntRegs:$Rs),
                            "$Rd = mux($Pu, #$s8, $Rs)">;
 
 let opExtendable = 3 in
-def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8Ext:$s8),
+def C2_muxir : T_MUX1<0b0, (ins PredRegs:$Pu, IntRegs:$Rs, s8_0Ext:$s8),
                            "$Rd = mux($Pu, $Rs, #$s8)">;
 
-def : Pat<(i32 (select I1:$Pu, s32ImmPred:$s8, I32:$Rs)),
-          (C2_muxri I1:$Pu, s32ImmPred:$s8, I32:$Rs)>;
-
-def : Pat<(i32 (select I1:$Pu, I32:$Rs, s32ImmPred:$s8)),
-          (C2_muxir I1:$Pu, I32:$Rs, s32ImmPred:$s8)>;
-
 // C2_muxii: Scalar mux immediates.
 let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1,
     opExtentBits = 8, opExtendable = 2 in
 def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
-                         (ins PredRegs:$Pu, s8Ext:$s8, s8Imm:$S8),
+                         (ins PredRegs:$Pu, s8_0Ext:$s8, s8_0Imm:$S8),
   "$Rd = mux($Pu, #$s8, #$S8)" ,
-  [(set (i32 IntRegs:$Rd),
-        (i32 (select I1:$Pu, s32ImmPred:$s8, s8ImmPred:$S8)))] > {
+  []> {
     bits<5> Rd;
     bits<2> Pu;
     bits<8> s8;
@@ -729,9 +612,9 @@ def C2_muxii: ALU32Inst <(outs IntRegs:$Rd),
   }
 
 let isCodeGenOnly = 1, isPseudo = 1 in
-def MUX64_rr : ALU64_rr<(outs DoubleRegs:$Rd),
-               (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
-               ".error \"should not emit\" ", []>;
+def PS_pselect : ALU64_rr<(outs DoubleRegs:$Rd),
+      (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
+      ".error \"should not emit\" ", []>;
 
 
 //===----------------------------------------------------------------------===//
@@ -809,7 +692,7 @@ defm sxth : ALU32_2op_base<"sxth", 0b111>, PredNewRel;
 defm zxth : ALU32_2op_base<"zxth", 0b110>, PredNewRel;
 
 // Rd=zxtb(Rs): assembler mapped to Rd=and(Rs,#255).
-// Compiler would want to generate 'zxtb' instead of 'and' becuase 'zxtb' has
+// Compiler would want to generate 'zxtb' instead of 'and' because 'zxtb' has
 // predicated forms while 'and' doesn't. Since integrated assembler can't
 // handle 'mapped' instructions, we need to encode 'zxtb' same as 'and' where
 // immediate operand is set to '255'.
@@ -845,11 +728,6 @@ multiclass ZXTB_base <string mnemonic, bits<3> minOp> {
 
 defm zxtb : ZXTB_base<"zxtb",0b100>, PredNewRel;
 
-def: Pat<(shl I32:$src1, (i32 16)),   (A2_aslh I32:$src1)>;
-def: Pat<(sra I32:$src1, (i32 16)),   (A2_asrh I32:$src1)>;
-def: Pat<(sext_inreg I32:$src1, i8),  (A2_sxtb I32:$src1)>;
-def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
-
 //===----------------------------------------------------------------------===//
 // Template class for vector add and avg
 //===----------------------------------------------------------------------===//
@@ -980,10 +858,6 @@ class T_vcmp <string Str, bits<4> minOp>
     let Inst{12-8} = Rtt;
   }
 
-class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
-  : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
-        (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
-
 // Vector compare bytes
 def A2_vcmpbeq  : T_vcmp <"vcmpb.eq",  0b0110>;
 def A2_vcmpbgtu : T_vcmp <"vcmpb.gtu", 0b0111>;
@@ -998,15 +872,6 @@ def A2_vcmpweq  : T_vcmp <"vcmpw.eq",  0b0000>;
 def A2_vcmpwgt  : T_vcmp <"vcmpw.gt",  0b0001>;
 def A2_vcmpwgtu : T_vcmp <"vcmpw.gtu", 0b0010>;
 
-def: T_vcmp_pat<A2_vcmpbeq,  seteq,  v8i8>;
-def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
-def: T_vcmp_pat<A2_vcmpheq,  seteq,  v4i16>;
-def: T_vcmp_pat<A2_vcmphgt,  setgt,  v4i16>;
-def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
-def: T_vcmp_pat<A2_vcmpweq,  seteq,  v2i32>;
-def: T_vcmp_pat<A2_vcmpwgt,  setgt,  v2i32>;
-def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
-
 //===----------------------------------------------------------------------===//
 // ALU32/PERM -
 //===----------------------------------------------------------------------===//
@@ -1019,10 +884,10 @@ def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
 // transform it to cmp.gt subtracting 1 from the immediate.
 let isPseudo = 1 in {
 def C2_cmpgei: ALU32Inst <
-  (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8Ext:$s8),
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, s8_0Ext:$s8),
   "$Pd = cmp.ge($Rs, #$s8)">;
 def C2_cmpgeui: ALU32Inst <
-  (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8Ext:$s8),
+  (outs PredRegs:$Pd), (ins IntRegs:$Rs, u8_0Ext:$s8),
   "$Pd = cmp.geu($Rs, #$s8)">;
 }
 
@@ -1112,23 +977,6 @@ let Itinerary = ALU64_tc_2_SLOT23, Defs = [USR_OVF] in {
   def A2_addh_h16_sat_hh : T_XTYPE_ADD_SUB <0b11, 1, 1, 0>;
 }
 
-// Add halfword.
-def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
-         (A2_addh_l16_ll I32:$src1, I32:$src2)>;
-
-def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
-         (A2_addh_l16_hl I32:$src1, I32:$src2)>;
-
-def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
-         (A2_addh_h16_ll I32:$src1, I32:$src2)>;
-
-// Subtract halfword.
-def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
-         (A2_subh_l16_ll I32:$src1, I32:$src2)>;
-
-def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
-         (A2_subh_h16_ll I32:$src1, I32:$src2)>;
-
 let hasSideEffects = 0, hasNewValue = 1 in
 def S2_parityp: ALU64Inst<(outs IntRegs:$Rd),
       (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
@@ -1168,52 +1016,6 @@ def A2_minu : T_XTYPE_MIN_MAX < 0, 1 >;
 def A2_max  : T_XTYPE_MIN_MAX < 1, 0 >;
 def A2_maxu : T_XTYPE_MIN_MAX < 1, 1 >;
 
-// Here, depending on  the operand being selected, we'll either generate a
-// min or max instruction.
-// Ex:
-// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
-// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
-// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
-// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
-
-multiclass T_MinMax_pats <PatFrag Op, RegisterClass RC, ValueType VT,
-                          InstHexagon Inst, InstHexagon SwapInst> {
-  def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
-                   (VT RC:$src1), (VT RC:$src2)),
-           (Inst RC:$src1, RC:$src2)>;
-  def: Pat<(select (i1 (Op (VT RC:$src1), (VT RC:$src2))),
-                   (VT RC:$src2), (VT RC:$src1)),
-           (SwapInst RC:$src1, RC:$src2)>;
-}
-
-
-multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
-  defm: T_MinMax_pats<Op, IntRegs, i32, Inst, SwapInst>;
-
-  def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
-                                            (i32 PositiveHalfWord:$src2))),
-                                    (i32 PositiveHalfWord:$src1),
-                                    (i32 PositiveHalfWord:$src2))), i16),
-           (Inst IntRegs:$src1, IntRegs:$src2)>;
-
-  def: Pat<(sext_inreg (i32 (select (i1 (Op (i32 PositiveHalfWord:$src1),
-                                            (i32 PositiveHalfWord:$src2))),
-                                    (i32 PositiveHalfWord:$src2),
-                                    (i32 PositiveHalfWord:$src1))), i16),
-           (SwapInst IntRegs:$src1, IntRegs:$src2)>;
-}
-
-let AddedComplexity = 200 in {
-  defm: MinMax_pats<setge,  A2_max,  A2_min>;
-  defm: MinMax_pats<setgt,  A2_max,  A2_min>;
-  defm: MinMax_pats<setle,  A2_min,  A2_max>;
-  defm: MinMax_pats<setlt,  A2_min,  A2_max>;
-  defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
-  defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
-  defm: MinMax_pats<setule, A2_minu, A2_maxu>;
-  defm: MinMax_pats<setult, A2_minu, A2_maxu>;
-}
-
 class T_cmp64_rr<string mnemonic, bits<3> MinOp, bit IsComm>
   : ALU64_rr<(outs PredRegs:$Pd), (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
              "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", ALU64_tc_2early_SLOT23> {
@@ -1237,16 +1039,6 @@ def C2_cmpeqp  : T_cmp64_rr<"cmp.eq",  0b000, 1>;
 def C2_cmpgtp  : T_cmp64_rr<"cmp.gt",  0b010, 0>;
 def C2_cmpgtup : T_cmp64_rr<"cmp.gtu", 0b100, 0>;
 
-class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
-  : Pat<(i1 (CmpOp (i64 DoubleRegs:$Rs), (i64 DoubleRegs:$Rt))),
-        (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
-
-def: T_cmp64_rr_pat<C2_cmpeqp,  seteq>;
-def: T_cmp64_rr_pat<C2_cmpgtp,  setgt>;
-def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
-def: T_cmp64_rr_pat<C2_cmpgtp,  RevCmp<setlt>>;
-def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
-
 def C2_vmux : ALU64_rr<(outs DoubleRegs:$Rd),
       (ins PredRegs:$Pu, DoubleRegs:$Rs, DoubleRegs:$Rt),
       "$Rd = vmux($Pu, $Rs, $Rt)", [], "", ALU64_tc_1_SLOT23> {
@@ -1292,12 +1084,10 @@ class T_ALU64_arith<string mnemonic, bits<3> MajOp, bits<3> MinOp, bit IsSat,
   : T_ALU64_rr<mnemonic, !if(IsSat,":sat",""), 0b0011, MajOp, MinOp, OpsRev,
                IsComm, "">;
 
+let isAdd = 1 in
 def A2_addp : T_ALU64_arith<"add", 0b000, 0b111, 0, 0, 1>;
 def A2_subp : T_ALU64_arith<"sub", 0b001, 0b111, 0, 1, 0>;
 
-def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
-def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
-
 class T_ALU64_logical<string mnemonic, bits<3> MinOp, bit OpsRev, bit IsComm,
                       bit IsNeg>
   : T_ALU64_rr<mnemonic, "", 0b0011, 0b111, MinOp, OpsRev, IsComm,
@@ -1307,10 +1097,6 @@ def A2_andp : T_ALU64_logical<"and", 0b000, 0, 1, 0>;
 def A2_orp  : T_ALU64_logical<"or",  0b010, 0, 1, 0>;
 def A2_xorp : T_ALU64_logical<"xor", 0b100, 0, 1, 0>;
 
-def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
-def: Pat<(i64 (or  I64:$Rs, I64:$Rt)), (A2_orp  I64:$Rs, I64:$Rt)>;
-def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
-
 //===----------------------------------------------------------------------===//
 // ALU64/ALU -
 //===----------------------------------------------------------------------===//
@@ -1361,9 +1147,6 @@ def C2_any8 : T_LOGICAL_1OP<"any8", 0b00>;
 def C2_all8 : T_LOGICAL_1OP<"all8", 0b01>;
 def C2_not  : T_LOGICAL_1OP<"not",  0b10>;
 
-def: Pat<(i1 (not (i1 PredRegs:$Ps))),
-         (C2_not PredRegs:$Ps)>;
-
 let hasSideEffects = 0 in
 class T_LOGICAL_2OP<string MnOp, bits<3> OpBits, bit IsNeg, bit Rev>
     : CRInst<(outs PredRegs:$Pd), (ins PredRegs:$Ps, PredRegs:$Pt),
@@ -1389,12 +1172,6 @@ def C2_xor  : T_LOGICAL_2OP<"xor", 0b010, 0, 0>;
 def C2_andn : T_LOGICAL_2OP<"and", 0b011, 1, 1>;
 def C2_orn  : T_LOGICAL_2OP<"or",  0b111, 1, 1>;
 
-def: Pat<(i1 (and I1:$Ps, I1:$Pt)),       (C2_and  I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (or  I1:$Ps, I1:$Pt)),       (C2_or   I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (xor I1:$Ps, I1:$Pt)),       (C2_xor  I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
-def: Pat<(i1 (or  I1:$Ps, (not I1:$Pt))), (C2_orn  I1:$Ps, I1:$Pt)>;
-
 let hasSideEffects = 0, hasNewValue = 1 in
 def C2_vitpack : SInst<(outs IntRegs:$Rd), (ins PredRegs:$Ps, PredRegs:$Pt),
       "$Rd = vitpack($Ps, $Pt)", [], "", S_2op_tc_1_SLOT23> {
@@ -1431,10 +1208,6 @@ def C2_mask : SInst<(outs DoubleRegs:$Rd), (ins PredRegs:$Pt),
 // JR +
 //===----------------------------------------------------------------------===//
 
-def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
-                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
-def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
-
 class CondStr<string CReg, bit True, bit New> {
   string S = "if (" # !if(True,"","!") # CReg # !if(New,".new","") # ") ";
 }
@@ -1587,8 +1360,8 @@ let isTerminator = 1, hasSideEffects = 0 in {
 
   defm J2_jumpr : JMPR_base<"JMPr">, PredNewRel;
 
-  let isReturn = 1, isCodeGenOnly = 1 in
-  defm JMPret : JMPR_base<"JMPret">, PredNewRel;
+  let isReturn = 1, isPseudo = 1, isCodeGenOnly = 1 in
+  defm PS_jmpret : JMPR_base<"JMPret">, PredNewRel;
 }
 
 let validSubTargets  = HasV60SubT in
@@ -1610,23 +1383,11 @@ multiclass JMPRpt_base<string BaseOp> {
 defm J2_jumpr : JMPRpt_base<"JMPr">;
 defm J2_jump  : JMPpt_base<"JMP">;
 
-def: Pat<(br bb:$dst),
-         (J2_jump brtarget:$dst)>;
-def: Pat<(retflag),
-         (JMPret (i32 R31))>;
-def: Pat<(brcond (i1 PredRegs:$src1), bb:$offset),
-         (J2_jumpt PredRegs:$src1, bb:$offset)>;
-
 // A return through builtin_eh_return.
 let isReturn = 1, isTerminator = 1, isBarrier = 1, hasSideEffects = 0,
     isCodeGenOnly = 1, Defs = [PC], Uses = [R28], isPredicable = 0 in
 def EH_RETURN_JMPR : T_JMPr;
 
-def: Pat<(eh_return),
-         (EH_RETURN_JMPR (i32 R31))>;
-def: Pat<(brind (i32 IntRegs:$dst)),
-         (J2_jumpr IntRegs:$dst)>;
-
 //===----------------------------------------------------------------------===//
 // JR -
 //===----------------------------------------------------------------------===//
@@ -1784,45 +1545,6 @@ def L2_loadalignh_io: T_loadalign_io <"memh_fifo", 0b0010, s11_1Ext>;
 let accessSize = ByteAccess, opExtentBits = 11 in
 def L2_loadalignb_io: T_loadalign_io <"memb_fifo", 0b0100, s11_0Ext>;
 
-// Patterns to select load-indexed (i.e. load from base+offset).
-multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
-                     InstHexagon MI> {
-  def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
-  def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
-           (VT (MI AddrFI:$fi, imm:$Off))>;
-  def: Pat<(VT (Load (orisadd (i32 AddrFI:$fi), ImmPred:$Off))),
-           (VT (MI AddrFI:$fi, imm:$Off))>;
-  def: Pat<(VT (Load (add (i32 IntRegs:$Rs), ImmPred:$Off))),
-           (VT (MI IntRegs:$Rs, imm:$Off))>;
-  def: Pat<(VT (Load (i32 IntRegs:$Rs))), (VT (MI IntRegs:$Rs, 0))>;
-}
-
-let AddedComplexity = 20 in {
-  defm: Loadx_pat<load,           i32, s30_2ImmPred, L2_loadri_io>;
-  defm: Loadx_pat<load,           i64, s29_3ImmPred, L2_loadrd_io>;
-  defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
-  defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
-  defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
-  defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
-
-  defm: Loadx_pat<extloadi1,      i32, s32_0ImmPred, L2_loadrub_io>;
-  defm: Loadx_pat<extloadi8,      i32, s32_0ImmPred, L2_loadrub_io>;
-  defm: Loadx_pat<extloadi16,     i32, s31_1ImmPred, L2_loadruh_io>;
-  defm: Loadx_pat<sextloadi8,     i32, s32_0ImmPred, L2_loadrb_io>;
-  defm: Loadx_pat<sextloadi16,    i32, s31_1ImmPred, L2_loadrh_io>;
-  defm: Loadx_pat<zextloadi1,     i32, s32_0ImmPred, L2_loadrub_io>;
-  defm: Loadx_pat<zextloadi8,     i32, s32_0ImmPred, L2_loadrub_io>;
-  defm: Loadx_pat<zextloadi16,    i32, s31_1ImmPred, L2_loadruh_io>;
-  // No sextloadi1.
-}
-
-// Sign-extending loads of i1 need to replicate the lowest bit throughout
-// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
-// do the trick.
-let AddedComplexity = 20 in
-def: Pat<(i32 (sextloadi1 (i32 IntRegs:$Rs))),
-         (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
-
 //===----------------------------------------------------------------------===//
 // Post increment load
 //===----------------------------------------------------------------------===//
@@ -2696,10 +2418,6 @@ def M2_mpy_up_s1_sat : T_MType_rr1 <"mpy", 0b111, 0b000, 1>;
 def M2_hmmpyh_s1 : T_MType_rr2 <"mpy", 0b101, 0b000, 1, 0, ".h">;
 def M2_hmmpyl_s1 : T_MType_rr2 <"mpy", 0b101, 0b001, 1, 0, ".l">;
 
-def: Pat<(i32 (mul   I32:$src1, I32:$src2)), (M2_mpyi    I32:$src1, I32:$src2)>;
-def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up  I32:$src1, I32:$src2)>;
-def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
-
 let hasNewValue = 1, opNewValue = 0 in
 class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
   : MInst < (outs IntRegs:$Rd), (ins IntRegs:$Rs, ImmOp:$u8),
@@ -2720,12 +2438,9 @@ class T_MType_mpy_ri <bit isNeg, Operand ImmOp, list<dag> pattern>
   }
 
 let isExtendable = 1, opExtentBits = 8, opExtendable = 2 in
-def M2_mpysip : T_MType_mpy_ri <0, u8Ext,
-                [(set (i32 IntRegs:$Rd), (mul IntRegs:$Rs, u32ImmPred:$u8))]>;
+def M2_mpysip : T_MType_mpy_ri <0, u8_0Ext, []>;
 
-def M2_mpysin :  T_MType_mpy_ri <1, u8Imm,
-                [(set (i32 IntRegs:$Rd), (ineg (mul IntRegs:$Rs,
-                                                    u8ImmPred:$u8)))]>;
+def M2_mpysin :  T_MType_mpy_ri <1, u8_0Imm, []>;
 
 // Assember mapped to M2_mpyi
 let isAsmParserOnly = 1 in
@@ -2740,10 +2455,8 @@ def M2_mpyui : MInst<(outs IntRegs:$dst),
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 9,
     CextOpcode = "mpyi", InputType = "imm", hasNewValue = 1,
     isAsmParserOnly = 1 in
-def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9Ext:$src2),
-    "$dst = mpyi($src1, #$src2)",
-    [(set (i32 IntRegs:$dst), (mul (i32 IntRegs:$src1),
-                                   s32ImmPred:$src2))]>, ImmRegRel;
+def M2_mpysmi : MInst<(outs IntRegs:$dst), (ins IntRegs:$src1, s9_0Ext:$src2),
+    "$dst = mpyi($src1, #$src2)", []>, ImmRegRel;
 
 let hasNewValue = 1, isExtendable = 1,  opExtentBits = 8, opExtendable = 3,
     InputType = "imm" in
@@ -2792,58 +2505,31 @@ class T_MType_acc_rr <string mnemonic, bits<3> MajOp, bits<3> MinOp,
   }
 
 let CextOpcode = "MPYI_acc", Itinerary = M_tc_3x_SLOT23 in {
-  def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8Ext,
-                  [(set (i32 IntRegs:$dst),
-                        (add (mul IntRegs:$src2, u32ImmPred:$src3),
-                             IntRegs:$src1))]>, ImmRegRel;
-
-  def M2_maci   : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0,
-                 [(set (i32 IntRegs:$dst),
-                       (add (mul IntRegs:$src2, IntRegs:$src3),
-                            IntRegs:$src1))]>, ImmRegRel;
+  def M2_macsip : T_MType_acc_ri <"+= mpyi", 0b010, u8_0Ext, []>, ImmRegRel;
+
+  def M2_maci   : T_MType_acc_rr <"+= mpyi", 0b000, 0b000, 0, []>, ImmRegRel;
 }
 
 let CextOpcode = "ADD_acc" in {
   let isExtentSigned = 1 in
-  def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8Ext,
-                 [(set (i32 IntRegs:$dst),
-                       (add (add (i32 IntRegs:$src2), s32ImmPred:$src3),
-                            (i32 IntRegs:$src1)))]>, ImmRegRel;
-
-  def M2_acci  : T_MType_acc_rr <"+= add",  0b000, 0b001, 0,
-                 [(set (i32 IntRegs:$dst),
-                       (add (add (i32 IntRegs:$src2), (i32 IntRegs:$src3)),
-                            (i32 IntRegs:$src1)))]>, ImmRegRel;
+  def M2_accii : T_MType_acc_ri <"+= add", 0b100, s8_0Ext, []>, ImmRegRel;
+
+  def M2_acci  : T_MType_acc_rr <"+= add",  0b000, 0b001, 0, []>, ImmRegRel;
 }
 
 let CextOpcode = "SUB_acc" in {
   let isExtentSigned = 1 in
-  def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8Ext>, ImmRegRel;
+  def M2_naccii : T_MType_acc_ri <"-= add", 0b101, s8_0Ext>, ImmRegRel;
 
   def M2_nacci  : T_MType_acc_rr <"-= add",  0b100, 0b001, 0>, ImmRegRel;
 }
 
 let Itinerary = M_tc_3x_SLOT23 in
-def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8Ext>;
+def M2_macsin : T_MType_acc_ri <"-= mpyi", 0b011, u8_0Ext>;
 
 def M2_xor_xacc : T_MType_acc_rr < "^= xor", 0b100, 0b011, 0>;
 def M2_subacc : T_MType_acc_rr <"+= sub",  0b000, 0b011, 1>;
 
-class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
-                        PatLeaf ImmPred>
-  : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
-         (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
-
-class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
-  : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
-         (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
-def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32ImmPred>;
-
-def : T_MType_acc_pat1 <M2_naccii, add, sub, s32ImmPred>;
-def : T_MType_acc_pat2 <M2_nacci, add, sub>;
-
 //===----------------------------------------------------------------------===//
 // Template Class -- XType Vector Instructions
 //===----------------------------------------------------------------------===//
@@ -3189,51 +2875,6 @@ def M2_vmac2     : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b001, 0b001, 0, 0, 0>;
 def M2_vmac2s_s1 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b100, 0b101, 1, 1, 0>;
 def M2_vmac2s_s0 : T_XTYPE_mpy64_acc < "vmpyh", "+", 0b000, 0b101, 1, 0, 0>;
 
-def: Pat<(i64 (mul (i64 (anyext (i32 IntRegs:$src1))),
-                   (i64 (anyext (i32 IntRegs:$src2))))),
-         (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
-
-def: Pat<(i64 (mul (i64 (sext (i32 IntRegs:$src1))),
-                   (i64 (sext (i32 IntRegs:$src2))))),
-         (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
-
-def: Pat<(i64 (mul (is_sext_i32:$src1),
-                   (is_sext_i32:$src2))),
-         (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
-
-// Multiply and accumulate, use full result.
-// Rxx[+-]=mpy(Rs,Rt)
-
-def: Pat<(i64 (add (i64 DoubleRegs:$src1),
-                   (mul (i64 (sext (i32 IntRegs:$src2))),
-                        (i64 (sext (i32 IntRegs:$src3)))))),
-         (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
-                   (mul (i64 (sext (i32 IntRegs:$src2))),
-                        (i64 (sext (i32 IntRegs:$src3)))))),
-         (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (add (i64 DoubleRegs:$src1),
-                   (mul (i64 (anyext (i32 IntRegs:$src2))),
-                        (i64 (anyext (i32 IntRegs:$src3)))))),
-         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (add (i64 DoubleRegs:$src1),
-                   (mul (i64 (zext (i32 IntRegs:$src2))),
-                        (i64 (zext (i32 IntRegs:$src3)))))),
-         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
-                   (mul (i64 (anyext (i32 IntRegs:$src2))),
-                        (i64 (anyext (i32 IntRegs:$src3)))))),
-         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
-def: Pat<(i64 (sub (i64 DoubleRegs:$src1),
-                   (mul (i64 (zext (i32 IntRegs:$src2))),
-                        (i64 (zext (i32 IntRegs:$src3)))))),
-         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
-
 //===----------------------------------------------------------------------===//
 // MTYPE/MPYH -
 //===----------------------------------------------------------------------===//
@@ -3375,16 +3016,6 @@ defm storerd: ST_PostInc <"memd", "STrid", DoubleRegs, s4_3Imm, 0b1110>;
 let accessSize = HalfWordAccess, isNVStorable = 0 in
 defm storerf: ST_PostInc <"memh", "STrih_H", IntRegs, s4_1Imm, 0b1011, 1>;
 
-class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
-                  InstHexagon MI>
-  : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
-        (MI I32:$src2, imm:$offset, Value:$src1)>;
-
-def: Storepi_pat<post_truncsti8,  I32, s4_0ImmPred, S2_storerb_pi>;
-def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
-def: Storepi_pat<post_store,      I32, s4_2ImmPred, S2_storeri_pi>;
-def: Storepi_pat<post_store,      I64, s4_3ImmPred, S2_storerd_pi>;
-
 //===----------------------------------------------------------------------===//
 // Template class for post increment stores with register offset.
 //===----------------------------------------------------------------------===//
@@ -3535,116 +3166,6 @@ let addrMode = BaseImmOffset, InputType = "imm" in {
                             u6_1Ext, 0b011, 1>;
 }
 
-// Patterns for generating stores, where the address takes different forms:
-// - frameindex,
-// - frameindex + offset,
-// - base + offset,
-// - simple (base address without offset).
-// These would usually be used together (via Storex_pat defined below), but
-// in some cases one may want to apply different properties (such as
-// AddedComplexity) to the individual patterns.
-class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
-  : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
-multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
-                             InstHexagon MI> {
-  def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
-           (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
-  def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
-           (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
-}
-multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
-                          InstHexagon MI> {
-  def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
-           (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
-  def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
-           (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
-}
-class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
-  : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
-        (MI IntRegs:$Rs, 0, Value:$Rt)>;
-
-// Patterns for generating stores, where the address takes different forms,
-// and where the value being stored is transformed through the value modifier
-// ValueMod.  The address forms are same as above.
-class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
-                     InstHexagon MI>
-  : Pat<(Store Value:$Rs, AddrFI:$fi),
-        (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
-multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
-                              PatFrag ValueMod, InstHexagon MI> {
-  def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
-           (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
-  def: Pat<(Store Value:$Rs, (orisadd (i32 AddrFI:$fi), ImmPred:$Off)),
-           (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
-}
-multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
-                           PatFrag ValueMod, InstHexagon MI> {
-  def: Pat<(Store Value:$Rt, (add (i32 IntRegs:$Rs), ImmPred:$Off)),
-           (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
-  def: Pat<(Store Value:$Rt, (orisadd (i32 IntRegs:$Rs), ImmPred:$Off)),
-           (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
-}
-class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
-                         InstHexagon MI>
-  : Pat<(Store Value:$Rt, (i32 IntRegs:$Rs)),
-        (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
-
-multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
-                      InstHexagon MI> {
-  def:  Storex_fi_pat     <Store, Value,          MI>;
-  defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
-  defm: Storex_add_pat    <Store, Value, ImmPred, MI>;
-}
-
-multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
-                       PatFrag ValueMod, InstHexagon MI> {
-  def:  Storexm_fi_pat     <Store, Value,          ValueMod, MI>;
-  defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
-  defm: Storexm_add_pat    <Store, Value, ImmPred, ValueMod, MI>;
-}
-
-// Regular stores in the DAG have two operands: value and address.
-// Atomic stores also have two, but they are reversed: address, value.
-// To use atomic stores with the patterns, they need to have their operands
-// swapped. This relies on the knowledge that the F.Fragment uses names
-// "ptr" and "val".
-class SwapSt<PatFrag F>
-  : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
-            F.OperandTransform>;
-
-let AddedComplexity = 20 in {
-  defm: Storex_pat<truncstorei8,    I32, s32_0ImmPred, S2_storerb_io>;
-  defm: Storex_pat<truncstorei16,   I32, s31_1ImmPred, S2_storerh_io>;
-  defm: Storex_pat<store,           I32, s30_2ImmPred, S2_storeri_io>;
-  defm: Storex_pat<store,           I64, s29_3ImmPred, S2_storerd_io>;
-
-  defm: Storex_pat<SwapSt<atomic_store_8>,  I32, s32_0ImmPred, S2_storerb_io>;
-  defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
-  defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
-  defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
-}
-
-// Simple patterns should be tried with the least priority.
-def: Storex_simple_pat<truncstorei8,    I32, S2_storerb_io>;
-def: Storex_simple_pat<truncstorei16,   I32, S2_storerh_io>;
-def: Storex_simple_pat<store,           I32, S2_storeri_io>;
-def: Storex_simple_pat<store,           I64, S2_storerd_io>;
-
-def: Storex_simple_pat<SwapSt<atomic_store_8>,  I32, S2_storerb_io>;
-def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
-def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
-def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
-
-let AddedComplexity = 20 in {
-  defm: Storexm_pat<truncstorei8,  I64, s32_0ImmPred, LoReg, S2_storerb_io>;
-  defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
-  defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
-}
-
-def: Storexm_simple_pat<truncstorei8,  I64, LoReg, S2_storerb_io>;
-def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
-def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
-
 // Store predicate.
 let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
     isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in
@@ -3951,8 +3472,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
 // Sign extend word to doubleword
 def A2_sxtw   : T_S2op_1_di <"sxtw", 0b01, 0b000>;
 
-def: Pat <(i64 (sext I32:$src)), (A2_sxtw I32:$src)>;
-
 // Vector saturate and pack
 let Defs = [USR_OVF] in {
   def S2_svsathb  : T_S2op_1_ii <"vsathb", 0b10, 0b000>;
@@ -4001,22 +3520,11 @@ let Itinerary = S_2op_tc_2_SLOT23 in {
   def A2_negsat : T_S2op_1_ii <"neg", 0b10, 0b110, 1>;
 }
 
-def: Pat<(i32 (select (i1 (setlt (i32 IntRegs:$src), 0)),
-                      (i32 (sub 0, (i32 IntRegs:$src))),
-                      (i32 IntRegs:$src))),
-         (A2_abs IntRegs:$src)>;
-
-let AddedComplexity = 50 in
-def: Pat<(i32 (xor (add (sra (i32 IntRegs:$src), (i32 31)),
-                        (i32 IntRegs:$src)),
-                   (sra (i32 IntRegs:$src), (i32 31)))),
-         (A2_abs IntRegs:$src)>;
-
 class T_S2op_2 <string mnemonic, bits<4> RegTyBits, RegisterClass RCOut,
                 RegisterClass RCIn, bits<3> MajOp, bits<3> MinOp,
                 bit isSat, bit isRnd, list<dag> pattern = []>
   : SInst <(outs RCOut:$dst),
-  (ins RCIn:$src, u5Imm:$u5),
+  (ins RCIn:$src, u5_0Imm:$u5),
   "$dst = "#mnemonic#"($src, #$u5)"#!if(isSat, ":sat", "")
                                    #!if(isRnd, ":rnd", ""),
   pattern, "", S_2op_tc_2_SLOT23> {
@@ -4049,9 +3557,7 @@ class T_S2op_2_ii <string mnemonic, bits<3> MajOp, bits<3> MinOp,
               isSat, isRnd, pattern>;
 
 class T_S2op_shift <string mnemonic, bits<3> MajOp, bits<3> MinOp, SDNode OpNd>
-  : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0,
-    [(set (i32 IntRegs:$dst), (OpNd (i32 IntRegs:$src),
-                                    (u5ImmPred:$u5)))]>;
+  : T_S2op_2_ii <mnemonic, MajOp, MinOp, 0, 0, []>;
 
 // Vector arithmetic shift right by immediate with truncate and pack
 def S2_asr_i_svw_trun : T_S2op_2_id <"vasrw", 0b110, 0b010>;
@@ -4072,7 +3578,7 @@ def S2_asr_i_r_rnd : T_S2op_2_ii <"asr", 0b010, 0b000, 0, 1>;
 
 let isAsmParserOnly = 1 in
 def S2_asr_i_r_rnd_goodsyntax
-  : SInst <(outs IntRegs:$dst), (ins  IntRegs:$src, u5Imm:$u5),
+  : SInst <(outs IntRegs:$dst), (ins  IntRegs:$src, u5_0Imm:$u5),
   "$dst = asrrnd($src, #$u5)",
   [], "", S_2op_tc_1_SLOT23>;
 
@@ -4080,11 +3586,6 @@ let isAsmParserOnly = 1 in
 def A2_not: ALU32_rr<(outs IntRegs:$dst),(ins IntRegs:$src),
   "$dst = not($src)">;
 
-def: Pat<(i32 (sra (i32 (add (i32 (sra I32:$src1, u5ImmPred:$src2)),
-                             (i32 1))),
-                   (i32 1))),
-         (S2_asr_i_r_rnd IntRegs:$src1, u5ImmPred:$src2)>;
-
 class T_S2op_3<string opc, bits<2>MajOp, bits<3>minOp, bits<1> sat = 0>
   : SInst<(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
            "$Rdd = "#opc#"($Rss)"#!if(!eq(sat, 1),":sat","")> {
@@ -4124,9 +3625,6 @@ def A2_vabshsat : T_S2op_3 <"vabsh", 0b01, 0b101, 1>;
 def A2_vabsw    : T_S2op_3 <"vabsw", 0b01, 0b110>;
 def A2_vabswsat : T_S2op_3 <"vabsw", 0b01, 0b111, 1>;
 
-def : Pat<(not (i64 DoubleRegs:$src1)),
-          (A2_notp DoubleRegs:$src1)>;
-
 //===----------------------------------------------------------------------===//
 // STYPE/BIT +
 //===----------------------------------------------------------------------===//
@@ -4166,27 +3664,13 @@ def S2_clb     : T_COUNT_LEADING_32<"clb",     0b000, 0b100>;
 def S2_clbp    : T_COUNT_LEADING_64<"clb",     0b010, 0b000>;
 def S2_clbnorm : T_COUNT_LEADING_32<"normamt", 0b000, 0b111>;
 
-// Count leading zeros.
-def: Pat<(i32 (ctlz I32:$Rs)), (S2_cl0 I32:$Rs)>;
-def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
-
-// Count trailing zeros: 32-bit.
-def: Pat<(i32 (cttz I32:$Rs)), (S2_ct0 I32:$Rs)>;
-
-// Count leading ones.
-def: Pat<(i32 (ctlz (not I32:$Rs))), (S2_cl1 I32:$Rs)>;
-def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
-
-// Count trailing ones: 32-bit.
-def: Pat<(i32 (cttz (not I32:$Rs))), (S2_ct1 I32:$Rs)>;
-
 // The 64-bit counts leading/trailing are defined in HexagonInstrInfoV4.td.
 
 // Bit set/clear/toggle
 
 let hasSideEffects = 0, hasNewValue = 1 in
 class T_SCT_BIT_IMM<string MnOp, bits<3> MinOp>
-    : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5Imm:$u5),
+    : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, u5_0Imm:$u5),
             "$Rd = "#MnOp#"($Rs, #$u5)", [], "", S_2op_tc_1_SLOT23> {
   bits<5> Rd;
   bits<5> Rs;
@@ -4222,24 +3706,11 @@ def S2_clrbit_r    : T_SCT_BIT_REG<"clrbit",    0b01>;
 def S2_setbit_r    : T_SCT_BIT_REG<"setbit",    0b00>;
 def S2_togglebit_r : T_SCT_BIT_REG<"togglebit", 0b10>;
 
-def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, u5ImmPred:$u5)))),
-         (S2_clrbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
-def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
-         (S2_setbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
-def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, u5ImmPred:$u5))),
-         (S2_togglebit_i IntRegs:$Rs, u5ImmPred:$u5)>;
-def: Pat<(i32 (and (i32 IntRegs:$Rs), (not (shl 1, (i32 IntRegs:$Rt))))),
-         (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(i32 (or (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
-         (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
-def: Pat<(i32 (xor (i32 IntRegs:$Rs), (shl 1, (i32 IntRegs:$Rt)))),
-         (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
-
 // Bit test
 
 let hasSideEffects = 0 in
 class T_TEST_BIT_IMM<string MnOp, bits<3> MajOp>
-    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5Imm:$u5),
+    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u5_0Imm:$u5),
             "$Pd = "#MnOp#"($Rs, #$u5)",
             [], "", S_2op_tc_2early_SLOT23> {
   bits<2> Pd;
@@ -4273,20 +3744,9 @@ class T_TEST_BIT_REG<string MnOp, bit IsNeg>
 def S2_tstbit_i : T_TEST_BIT_IMM<"tstbit", 0b000>;
 def S2_tstbit_r : T_TEST_BIT_REG<"tstbit", 0>;
 
-let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
-  def: Pat<(i1 (setne (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
-           (S2_tstbit_i IntRegs:$Rs, u5ImmPred:$u5)>;
-  def: Pat<(i1 (setne (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
-           (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
-  def: Pat<(i1 (trunc (i32 IntRegs:$Rs))),
-           (S2_tstbit_i IntRegs:$Rs, 0)>;
-  def: Pat<(i1 (trunc (i64 DoubleRegs:$Rs))),
-           (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
-}
-
 let hasSideEffects = 0 in
 class T_TEST_BITS_IMM<string MnOp, bits<2> MajOp, bit IsNeg>
-    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6Imm:$u6),
+    : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, u6_0Imm:$u6),
             "$Pd = "#MnOp#"($Rs, #$u6)",
             [], "", S_2op_tc_2early_SLOT23> {
   bits<2> Pd;
@@ -4322,17 +3782,6 @@ def C2_bitsclri : T_TEST_BITS_IMM<"bitsclr", 0b10, 0>;
 def C2_bitsclr  : T_TEST_BITS_REG<"bitsclr", 0b10, 0>;
 def C2_bitsset  : T_TEST_BITS_REG<"bitsset", 0b01, 0>;
 
-let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
-  def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), u6ImmPred:$u6), 0)),
-           (C2_bitsclri IntRegs:$Rs, u6ImmPred:$u6)>;
-  def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), 0)),
-           (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
-}
-
-let AddedComplexity = 10 in   // Complexity greater than compare reg-reg.
-def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
-         (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
-
 //===----------------------------------------------------------------------===//
 // STYPE/BIT -
 //===----------------------------------------------------------------------===//
@@ -4348,14 +3797,6 @@ def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), IntRegs:$Rt)),
 // XTYPE/PERM +
 //===----------------------------------------------------------------------===//
 
-def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add (i32 IntRegs:$b), 3))),
-                               (i32 8)),
-                          (i32 (zextloadi8 (add (i32 IntRegs:$b), 2)))),
-                      (i32 16)),
-                 (shl (i32 (zextloadi8 (add (i32 IntRegs:$b), 1))), (i32 8))),
-             (zextloadi8 (i32 IntRegs:$b))),
-         (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
-
 //===----------------------------------------------------------------------===//
 // XTYPE/PERM -
 //===----------------------------------------------------------------------===//
@@ -4395,24 +3836,6 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in
 def C2_pxfer_map: SInst<(outs PredRegs:$dst), (ins PredRegs:$src),
      "$dst = $src">;
 
-
-// Patterns for loads of i1:
-def: Pat<(i1 (load AddrFI:$fi)),
-         (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
-def: Pat<(i1 (load (add (i32 IntRegs:$Rs), s32ImmPred:$Off))),
-         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
-def: Pat<(i1 (load (i32 IntRegs:$Rs))),
-         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
-
-def I1toI32: OutPatFrag<(ops node:$Rs),
-                        (C2_muxii (i1 $Rs), 1, 0)>;
-
-def I32toI1: OutPatFrag<(ops node:$Rs),
-                        (i1 (C2_tfrrp (i32 $Rs)))>;
-
-defm: Storexm_pat<store, I1, s32ImmPred, I1toI32, S2_storerb_io>;
-def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
-
 //===----------------------------------------------------------------------===//
 // STYPE/PRED -
 //===----------------------------------------------------------------------===//
@@ -4436,9 +3859,7 @@ class S_2OpInstImm<string Mnemonic, bits<3>MajOp, bits<3>MinOp,
 }
 
 class S_2OpInstImmI6<string Mnemonic, SDNode OpNode, bits<3>MinOp>
-  : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6Imm,
-  [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
-                                        u6ImmPred:$src2))]> {
+  : S_2OpInstImm<Mnemonic, 0b000, MinOp, u6_0Imm, []> {
   bits<6> src2;
   let Inst{13-8} = src2;
 }
@@ -4451,10 +3872,8 @@ def S2_lsr_i_p : S_2OpInstImmI6<"lsr", srl, 0b001>;
 // Shift left by small amount and add.
 let AddedComplexity = 100, hasNewValue = 1, hasSideEffects = 0 in
 def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
-                           (ins IntRegs:$Rt, IntRegs:$Rs, u3Imm:$u3),
-  "$Rd = addasl($Rt, $Rs, #$u3)" ,
-  [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rt),
-                                (shl (i32 IntRegs:$Rs), u3ImmPred:$u3)))],
+                           (ins IntRegs:$Rt, IntRegs:$Rs, u3_0Imm:$u3),
+  "$Rd = addasl($Rt, $Rs, #$u3)" , [],
   "", S_3op_tc_2_SLOT23> {
     bits<5> Rd;
     bits<5> Rt;
@@ -4496,12 +3915,8 @@ def S2_addasl_rrri: SInst <(outs IntRegs:$Rd),
 //===----------------------------------------------------------------------===//
 // SYSTEM/USER +
 //===----------------------------------------------------------------------===//
-def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
-
 let hasSideEffects = 1, isSoloAX = 1 in
-def Y2_barrier : SYSInst<(outs), (ins),
-                     "barrier",
-                     [(HexagonBARRIER)],"",ST_tc_st_SLOT0> {
+def Y2_barrier : SYSInst<(outs), (ins), "barrier", [],"",ST_tc_st_SLOT0> {
   let Inst{31-28} = 0b1010;
   let Inst{27-21} = 0b1000000;
 }
@@ -4517,15 +3932,12 @@ def Y2_barrier : SYSInst<(outs), (ins),
 //
 let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1,
     isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
-  def TFR_FI  : ALU32_ri<(outs IntRegs:$Rd),
-                         (ins IntRegs:$fi, s32Imm:$off), "">;
-  def TFR_FIA : ALU32_ri<(outs IntRegs:$Rd),
-                         (ins IntRegs:$Rs, IntRegs:$fi, s32Imm:$off), "">;
+  def PS_fi  : ALU32_ri<(outs IntRegs:$Rd),
+                        (ins IntRegs:$fi, s32_0Imm:$off), "">;
+  def PS_fia : ALU32_ri<(outs IntRegs:$Rd),
+                        (ins IntRegs:$Rs, IntRegs:$fi, s32_0Imm:$off), "">;
 }
 
-def: Pat<(i32 (orisadd (i32 AddrFI:$Rs), s32ImmPred:$off)),
-         (i32 (TFR_FI (i32 AddrFI:$Rs), s32ImmPred:$off))>;
-
 //===----------------------------------------------------------------------===//
 // CRUSER - Type.
 //===----------------------------------------------------------------------===//
@@ -4533,7 +3945,7 @@ def: Pat<(i32 (orisadd (i32 AddrFI:$Rs), s32ImmPred:$off)),
 let isExtendable = 1, isExtentSigned = 1, opExtentBits = 9, opExtentAlign = 2,
     opExtendable = 0, hasSideEffects = 0 in
 class LOOP_iBase<string mnemonic, Operand brOp, bit mustExtend = 0>
-         : CRInst<(outs), (ins brOp:$offset, u10Imm:$src2),
+         : CRInst<(outs), (ins brOp:$offset, u10_0Imm:$src2),
            #mnemonic#"($offset, #$src2)",
            [], "" , CR_tc_3x_SLOT3> {
     bits<9> offset;
@@ -4605,7 +4017,7 @@ let Defs = [LC0, SA0, P3, USR], hasSideEffects = 0,
     isExtentSigned = 1, isExtendable = 1, opExtentBits = 9, opExtentAlign = 2,
     opExtendable = 0, isPredicateLate = 1 in
 class SPLOOP_iBase<string SP, bits<2> op>
-  : CRInst <(outs), (ins brtarget:$r7_2, u10Imm:$U10),
+  : CRInst <(outs), (ins brtarget:$r7_2, u10_0Imm:$U10),
   "p3 = sp"#SP#"loop0($r7_2, #$U10)" > {
     bits<9> r7_2;
     bits<10> U10;
@@ -4733,20 +4145,12 @@ def Y4_trace: CRInst <(outs), (ins IntRegs:$Rs),
     let Inst{20-16} = Rs;
   }
 
-// Support for generating global address.
-// Taken from X86InstrInfo.td.
-def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
-                                             SDTCisVT<1, i32>,
-                                             SDTCisPtrTy<0>]>;
-def HexagonCONST32    : SDNode<"HexagonISD::CONST32",    SDTHexagonCONST32>;
-def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
-
 // HI/LO Instructions
 let isReMaterializable = 1, isMoveImm = 1, hasSideEffects = 0,
     hasNewValue = 1, opNewValue = 0 in
 class REG_IMMED<string RegHalf, bit Rs, bits<3> MajOp, bit MinOp>
   : ALU32_ri<(outs IntRegs:$dst),
-              (ins u16Imm:$imm_value),
+              (ins u16_0Imm:$imm_value),
               "$dst"#RegHalf#" = $imm_value", []> {
     bits<5> dst;
     bits<32> imm_value;
@@ -4765,100 +4169,28 @@ let isAsmParserOnly = 1 in {
   def HI : REG_IMMED<".h", 0b0, 0b010, 0b1>;
 }
 
-let  isMoveImm = 1, isCodeGenOnly = 1 in
-def LO_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
-             "$dst.l = #LO($label@GOTREL)",
-             []>;
-
-let  isMoveImm = 1, isCodeGenOnly = 1 in
-def HI_PIC : ALU32_ri<(outs IntRegs:$dst), (ins bblabel:$label),
-             "$dst.h = #HI($label@GOTREL)",
-             []>;
-
-let isReMaterializable = 1, isMoveImm = 1,
-    isCodeGenOnly = 1, hasSideEffects = 0 in
-def HI_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
-             "$dst.h = #HI($global@GOT)",
-             []>;
-
-let isReMaterializable = 1, isMoveImm = 1,
-    isCodeGenOnly = 1, hasSideEffects = 0 in
-def LO_GOT : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
-             "$dst.l = #LO($global@GOT)",
-             []>;
-
-let isReMaterializable = 1, isMoveImm = 1,
-    isCodeGenOnly = 1, hasSideEffects = 0 in
-def HI_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
-                "$dst.h = #HI($global@GOTREL)",
-                []>;
-
-let isReMaterializable = 1, isMoveImm = 1,
-    isCodeGenOnly = 1, hasSideEffects = 0 in
-def LO_GOTREL : ALU32_ri<(outs IntRegs:$dst), (ins globaladdress:$global),
-               "$dst.l = #LO($global@GOTREL)",
-               []>;
-
-// This pattern is incorrect. When we add small data, we should change
-// this pattern to use memw(#foo).
-// This is for sdata.
-let isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST32 : CONSTLDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
-              "$dst = CONST32(#$global)",
-              [(set (i32 IntRegs:$dst),
-                    (load (HexagonCONST32 tglobaltlsaddr:$global)))]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST32_Int_Real : CONSTLDInst<(outs IntRegs:$dst), (ins i32imm:$global),
-                       "$dst = CONST32(#$global)",
-                       [(set (i32 IntRegs:$dst), imm:$global) ]>;
-
-// Map TLS addressses to a CONST32 instruction
-def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16Ext:$addr)>;
-def: Pat<(HexagonCONST32 bbl:$label),           (A2_tfrsi s16Ext:$label)>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST64_Int_Real : CONSTLDInst<(outs DoubleRegs:$dst), (ins i64imm:$global),
-                       "$dst = CONST64(#$global)",
-                       [(set (i64 DoubleRegs:$dst), imm:$global)]>;
+let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in {
+  def CONST32 : CONSTLDInst<(outs IntRegs:$Rd), (ins i32imm:$v),
+                "$Rd = CONST32(#$v)", []>;
+  def CONST64 : CONSTLDInst<(outs DoubleRegs:$Rd), (ins i64imm:$v),
+                "$Rd = CONST64(#$v)", []>;
+}
 
 let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
     isCodeGenOnly = 1 in
-def TFR_PdTrue : SInst<(outs PredRegs:$dst), (ins), "",
-                 [(set (i1 PredRegs:$dst), 1)]>;
+def PS_true : SInst<(outs PredRegs:$dst), (ins), "", []>;
 
 let hasSideEffects = 0, isReMaterializable = 1, isPseudo = 1,
     isCodeGenOnly = 1 in
-def TFR_PdFalse : SInst<(outs PredRegs:$dst), (ins), "",
-                  [(set (i1 PredRegs:$dst), 0)]>;
-
-// Pseudo instructions.
-def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
-def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
-                                        SDTCisVT<1, i32> ]>;
-
-def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
-                    [SDNPHasChain, SDNPOutGlue]>;
-def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
-                    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-
-def SDT_SPCall  : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
-
-// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
-// Optional Flag and Variable Arguments.
-// Its 1 Operand has pointer type.
-def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
-                          [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+def PS_false : SInst<(outs PredRegs:$dst), (ins), "", []>;
 
 let Defs = [R29, R30], Uses = [R31, R30, R29], isPseudo = 1 in
 def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt),
-                              ".error \"should not emit\" ",
-                              [(callseq_start timm:$amt)]>;
+                              ".error \"should not emit\" ", []>;
 
 let Defs = [R29, R30, R31], Uses = [R29], isPseudo = 1 in
 def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
-                             ".error \"should not emit\" ",
-                             [(callseq_end timm:$amt1, timm:$amt2)]>;
+                             ".error \"should not emit\" ", []>;
 
 // Call subroutine indirectly.
 let Defs = VolatileV3.Regs in
@@ -4867,260 +4199,21 @@ def J2_callr : JUMPR_MISC_CALLR<0, 1>;
 // Indirect tail-call.
 let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
     isTerminator = 1, isCodeGenOnly = 1 in
-def TCRETURNr : T_JMPr;
+def PS_tailcall_r : T_JMPr;
 
 // Direct tail-calls.
 let isPseudo = 1, isCall = 1, isReturn = 1, isBarrier = 1, isPredicable = 0,
     isTerminator = 1, isCodeGenOnly = 1 in
-def TCRETURNi : JInst<(outs), (ins calltarget:$dst), "", []>;
-
-//Tail calls.
-def: Pat<(HexagonTCRet tglobaladdr:$dst),
-         (TCRETURNi tglobaladdr:$dst)>;
-def: Pat<(HexagonTCRet texternalsym:$dst),
-         (TCRETURNi texternalsym:$dst)>;
-def: Pat<(HexagonTCRet (i32 IntRegs:$dst)),
-         (TCRETURNr IntRegs:$dst)>;
-
-// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
-def: Pat<(and (i32 IntRegs:$src1), 65535),
-         (A2_zxth IntRegs:$src1)>;
-
-// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
-def: Pat<(and (i32 IntRegs:$src1), 255),
-         (A2_zxtb IntRegs:$src1)>;
-
-// Map Add(p1, true) to p1 = not(p1).
-//     Add(p1, false) should never be produced,
-//     if it does, it got to be mapped to NOOP.
-def: Pat<(add (i1 PredRegs:$src1), -1),
-         (C2_not PredRegs:$src1)>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
-def: Pat<(select (not (i1 PredRegs:$src1)), s8ImmPred:$src2, s32ImmPred:$src3),
-         (C2_muxii PredRegs:$src1, s32ImmPred:$src3, s8ImmPred:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = C2_muxir(p0, r1, #i)
-def: Pat<(select (not (i1 PredRegs:$src1)), s32ImmPred:$src2,
-                 (i32 IntRegs:$src3)),
-         (C2_muxir PredRegs:$src1, IntRegs:$src3, s32ImmPred:$src2)>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = C2_muxri (p0, #i, r1)
-def: Pat<(select (not (i1 PredRegs:$src1)), IntRegs:$src2, s32ImmPred:$src3),
-         (C2_muxri PredRegs:$src1, s32ImmPred:$src3, IntRegs:$src2)>;
-
-// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
-def: Pat<(brcond (not (i1 PredRegs:$src1)), bb:$offset),
-         (J2_jumpf PredRegs:$src1, bb:$offset)>;
-
-// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
-def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i32)),
-         (A2_sxtw (LoReg DoubleRegs:$src1))>;
-
-// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
-def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i16)),
-         (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
-
-// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
-def: Pat<(i64 (sext_inreg (i64 DoubleRegs:$src1), i8)),
-         (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
-
-// We want to prevent emitting pnot's as much as possible.
-// Map brcond with an unsupported setcc to a J2_jumpf.
-def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-                        bb:$offset),
-      (J2_jumpf (C2_cmpeq (i32 IntRegs:$src1), (i32 IntRegs:$src2)),
-                bb:$offset)>;
-
-def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), s10ImmPred:$src2)),
-                        bb:$offset),
-      (J2_jumpf (C2_cmpeqi (i32 IntRegs:$src1), s10ImmPred:$src2), bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 -1))), bb:$offset),
-         (J2_jumpf PredRegs:$src1, bb:$offset)>;
-
-def: Pat<(brcond (i1 (setne (i1 PredRegs:$src1), (i1 0))), bb:$offset),
-         (J2_jumpt PredRegs:$src1, bb:$offset)>;
-
-// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
-def: Pat<(brcond (i1 (setlt (i32 IntRegs:$src1), s8ImmPred:$src2)), bb:$offset),
-        (J2_jumpf (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s8ImmPred:$src2)),
-                  bb:$offset)>;
-
-// Map from a 64-bit select to an emulated 64-bit mux.
-// Hexagon does not support 64-bit MUXes; so emulate with combines.
-def: Pat<(select (i1 PredRegs:$src1), (i64 DoubleRegs:$src2),
-                 (i64 DoubleRegs:$src3)),
-         (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
-                                              (HiReg DoubleRegs:$src3)),
-                      (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
-                                              (LoReg DoubleRegs:$src3)))>;
-
-// Map from a 1-bit select to logical ops.
-// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
-def: Pat<(select (i1 PredRegs:$src1), (i1 PredRegs:$src2), (i1 PredRegs:$src3)),
-         (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
-                (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
-
-// Map for truncating from 64 immediates to 32 bit immediates.
-def: Pat<(i32 (trunc (i64 DoubleRegs:$src))),
-         (LoReg DoubleRegs:$src)>;
-
-// Map for truncating from i64 immediates to i1 bit immediates.
-def: Pat<(i1 (trunc (i64 DoubleRegs:$src))),
-         (C2_tfrrp (LoReg DoubleRegs:$src))>;
-
-// rs <= rt -> !(rs > rt).
-let AddedComplexity = 30 in
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
-
-// rs <= rt -> !(rs > rt).
-def : Pat<(i1 (setle (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_not (C2_cmpgt (i32 IntRegs:$src1), (i32 IntRegs:$src2))))>;
-
-// Rss <= Rtt -> !(Rss > Rtt).
-def: Pat<(i1 (setle (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-         (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Map cmpne -> cmpeq.
-// Hexagon_TODO: We should improve on this.
-// rs != rt -> !(rs == rt).
-let AddedComplexity = 30 in
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C2_not (C2_cmpeqi IntRegs:$src1, s32ImmPred:$src2))>;
-
-// Convert setne back to xor for hexagon since we compute w/ pred registers.
-def: Pat<(i1 (setne (i1 PredRegs:$src1), (i1 PredRegs:$src2))),
-         (C2_xor PredRegs:$src1, PredRegs:$src2)>;
-
-// Map cmpne(Rss) -> !cmpew(Rss).
-// rs != rt -> !(rs == rt).
-def: Pat<(i1 (setne (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-         (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Map cmpge(Rs, Rt) -> !(cmpgt(Rs, Rt).
-// rs >= rt -> !(rt > rs).
-def : Pat <(i1 (setge (i32 IntRegs:$src1), (i32 IntRegs:$src2))),
-      (i1 (C2_not (i1 (C2_cmpgt (i32 IntRegs:$src2), (i32 IntRegs:$src1)))))>;
-
-// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
-let AddedComplexity = 30 in
-def: Pat<(i1 (setge (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C2_cmpgti IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
-
-// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
-// rss >= rtt -> !(rtt > rss).
-def: Pat<(i1 (setge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-         (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
-
-// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
-// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
-// rs < rt -> !(rs >= rt).
-let AddedComplexity = 30 in
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C2_not (C2_cmpgti IntRegs:$src1,
-                            (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
-
-// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
-def: Pat<(i1 (setuge (i32 IntRegs:$src1), 0)),
-         (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
-
-// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
-def: Pat<(i1 (setuge (i32 IntRegs:$src1), u32ImmPred:$src2)),
-         (C2_cmpgtui IntRegs:$src1, (DEC_CONST_UNSIGNED u32ImmPred:$src2))>;
-
-// Generate cmpgtu(Rs, #u9)
-def: Pat<(i1 (setugt (i32 IntRegs:$src1), u32ImmPred:$src2)),
-         (C2_cmpgtui IntRegs:$src1, u32ImmPred:$src2)>;
-
-// Map from Rs >= Rt -> !(Rt > Rs).
-// rs >= rt -> !(rt > rs).
-def: Pat<(i1 (setuge (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-         (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
-
-// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
-// Map from (Rs <= Rt) -> !(Rs > Rt).
-def: Pat<(i1 (setule (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2))),
-         (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Sign extends.
-// i1 -> i32
-def: Pat<(i32 (sext (i1 PredRegs:$src1))),
-         (C2_muxii PredRegs:$src1, -1, 0)>;
-
-// i1 -> i64
-def: Pat<(i64 (sext (i1 PredRegs:$src1))),
-         (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
-
-// Zero extends.
-// i1 -> i32
-def: Pat<(i32 (zext (i1 PredRegs:$src1))),
-         (C2_muxii PredRegs:$src1, 1, 0)>;
-
-// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
-def: Pat<(i32 (anyext (i1 PredRegs:$src1))),
-         (C2_muxii PredRegs:$src1, 1, 0)>;
-
-// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
-def: Pat<(i64 (anyext (i1 PredRegs:$src1))),
-         (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
-
-// Multiply 64-bit unsigned and use upper result.
-def : Pat <(mulhu (i64 DoubleRegs:$src1), (i64 DoubleRegs:$src2)),
-  (A2_addp
-    (M2_dpmpyuu_acc_s0
-      (S2_lsr_i_p
-        (A2_addp
-          (M2_dpmpyuu_acc_s0
-            (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (LoReg $src2)), 32),
-            (HiReg $src1),
-            (LoReg $src2)),
-          (A2_combinew (A2_tfrsi 0),
-                       (LoReg (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2))))),
-        32),
-      (HiReg $src1),
-      (HiReg $src2)),
-    (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $src1), (HiReg $src2)), 32)
-)>;
-
-// Hexagon specific ISD nodes.
-def SDTHexagonALLOCA : SDTypeProfile<1, 2,
-      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
-      [SDNPHasChain]>;
+def PS_tailcall_i : JInst<(outs), (ins calltarget:$dst), "", []>;
 
 // The reason for the custom inserter is to record all ALLOCA instructions
 // in MachineFunctionInfo.
-let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1,
-    usesCustomInserter = 1 in
-def ALLOCA: ALU32Inst<(outs IntRegs:$Rd),
-      (ins IntRegs:$Rs, u32Imm:$A), "",
-      [(set (i32 IntRegs:$Rd),
-            (HexagonALLOCA (i32 IntRegs:$Rs), (i32 imm:$A)))]>;
+let Defs = [R29], isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 1 in
+def PS_alloca: ALU32Inst<(outs IntRegs:$Rd),
+      (ins IntRegs:$Rs, u32_0Imm:$A), "", []>;
 
 let isCodeGenOnly = 1, isPseudo = 1, Uses = [R30], hasSideEffects = 0 in
-def ALIGNA : ALU32Inst<(outs IntRegs:$Rd), (ins u32Imm:$A), "", []>;
-
-def SDTHexagonARGEXTEND : SDTypeProfile<1, 1, [SDTCisVT<0, i32>]>;
-def Hexagon_ARGEXTEND : SDNode<"HexagonISD::ARGEXTEND", SDTHexagonARGEXTEND>;
-let isCodeGenOnly = 1 in
-def ARGEXTEND : ALU32_rr <(outs IntRegs:$dst), (ins IntRegs:$src1),
-                "$dst = $src1",
-                [(set (i32 IntRegs:$dst),
-                      (Hexagon_ARGEXTEND (i32 IntRegs:$src1)))]>;
-
-let AddedComplexity = 100 in
-def: Pat<(i32 (sext_inreg (Hexagon_ARGEXTEND (i32 IntRegs:$src1)), i16)),
-         (i32 IntRegs:$src1)>;
-
-def HexagonJT:     SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
-def HexagonCP:     SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
-
-def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi s16Ext:$dst)>;
-def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi s16Ext:$dst)>;
+def PS_aligna : ALU32Inst<(outs IntRegs:$Rd), (ins u32_0Imm:$A), "", []>;
 
 // XTYPE/SHIFT
 //
@@ -5137,11 +4230,8 @@ let hasNewValue = 1, opNewValue = 0 in
 class T_shift_imm_acc_r <string opc1, string opc2, SDNode OpNode1,
                          SDNode OpNode2, bits<3> majOp, bits<2> minOp>
   : SInst_acc<(outs IntRegs:$Rx),
-              (ins IntRegs:$src1, IntRegs:$Rs, u5Imm:$u5),
-  "$Rx "#opc2#opc1#"($Rs, #$u5)",
-  [(set (i32 IntRegs:$Rx),
-         (OpNode2 (i32 IntRegs:$src1),
-                  (OpNode1 (i32 IntRegs:$Rs), u5ImmPred:$u5)))],
+              (ins IntRegs:$src1, IntRegs:$Rs, u5_0Imm:$u5),
+  "$Rx "#opc2#opc1#"($Rs, #$u5)", [],
   "$src1 = $Rx", S_2op_tc_2_SLOT23> {
     bits<5> Rx;
     bits<5> Rs;
@@ -5168,10 +4258,7 @@ class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
                          SDNode OpNode2, bits<2> majOp, bits<2> minOp>
   : SInst_acc<(outs IntRegs:$Rx),
               (ins IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt),
-  "$Rx "#opc2#opc1#"($Rs, $Rt)",
-  [(set (i32 IntRegs:$Rx),
-         (OpNode2 (i32 IntRegs:$src1),
-                  (OpNode1 (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))))],
+  "$Rx "#opc2#opc1#"($Rs, $Rt)", [],
   "$src1 = $Rx", S_3op_tc_2_SLOT23 > {
     bits<5> Rx;
     bits<5> Rs;
@@ -5194,11 +4281,8 @@ class T_shift_reg_acc_r <string opc1, string opc2, SDNode OpNode1,
 class T_shift_imm_acc_p <string opc1, string opc2, SDNode OpNode1,
                          SDNode OpNode2, bits<3> majOp, bits<2> minOp>
   : SInst_acc<(outs DoubleRegs:$Rxx),
-              (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6Imm:$u6),
-  "$Rxx "#opc2#opc1#"($Rss, #$u6)",
-  [(set (i64 DoubleRegs:$Rxx),
-        (OpNode2 (i64 DoubleRegs:$src1),
-                 (OpNode1 (i64 DoubleRegs:$Rss), u6ImmPred:$u6)))],
+              (ins DoubleRegs:$src1, DoubleRegs:$Rss, u6_0Imm:$u6),
+  "$Rxx "#opc2#opc1#"($Rss, #$u6)", [],
   "$src1 = $Rxx", S_2op_tc_2_SLOT23> {
     bits<5> Rxx;
     bits<5> Rss;
@@ -5225,10 +4309,7 @@ class T_shift_reg_acc_p <string opc1, string opc2, SDNode OpNode1,
                          SDNode OpNode2, bits<3> majOp, bits<2> minOp>
   : SInst_acc<(outs DoubleRegs:$Rxx),
               (ins DoubleRegs:$src1, DoubleRegs:$Rss, IntRegs:$Rt),
-  "$Rxx "#opc2#opc1#"($Rss, $Rt)",
-  [(set (i64 DoubleRegs:$Rxx),
-        (OpNode2 (i64 DoubleRegs:$src1),
-                 (OpNode1 (i64 DoubleRegs:$Rss), (i32 IntRegs:$Rt))))],
+  "$Rxx "#opc2#opc1#"($Rss, $Rt)", [],
   "$src1 = $Rxx", S_3op_tc_2_SLOT23> {
     bits<5> Rxx;
     bits<5> Rss;
@@ -5400,9 +4481,7 @@ class T_S3op_3 <string mnemonic, RegisterClass RC, bits<2> MajOp,
 
 let hasNewValue = 1 in
 class T_S3op_shift32 <string mnemonic, SDNode OpNode, bits<2> MinOp>
-  : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0,
-    [(set (i32 IntRegs:$dst), (OpNode (i32 IntRegs:$src1),
-                                      (i32 IntRegs:$src2)))]>;
+  : T_S3op_3 <mnemonic, IntRegs, 0b01, MinOp, 0, []>;
 
 let hasNewValue = 1, Itinerary = S_3op_tc_2_SLOT23 in
 class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
@@ -5410,9 +4489,7 @@ class T_S3op_shift32_Sat <string mnemonic, bits<2> MinOp>
 
 
 class T_S3op_shift64 <string mnemonic, SDNode OpNode, bits<2> MinOp>
-  : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0,
-    [(set (i64 DoubleRegs:$dst), (OpNode (i64 DoubleRegs:$src1),
-                                         (i32 IntRegs:$src2)))]>;
+  : T_S3op_3 <mnemonic, DoubleRegs, 0b10, MinOp, 0, []>;
 
 
 class T_S3op_shiftVect <string mnemonic, bits<2> MajOp, bits<2> MinOp>
@@ -5473,7 +4550,7 @@ def S2_vcrotate : T_S3op_shiftVect < "vcrotate", 0b11, 0b00>;
 let hasSideEffects = 0 in
 class T_S3op_7 <string mnemonic, bit MajOp >
   : SInst <(outs DoubleRegs:$Rdd),
-           (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3Imm:$u3),
+           (ins DoubleRegs:$Rss, DoubleRegs:$Rtt, u3_0Imm:$u3),
   "$Rdd = "#mnemonic#"($Rss, $Rtt, #$u3)" ,
   [], "", S_3op_tc_1_SLOT23 > {
     bits<5> Rdd;
@@ -5530,8 +4607,8 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
     bit bit13;
     string ImmOpStr = !cast<string>(ImmOp);
 
-    let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5}, 0);
-    let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+    let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5}, 0);
+    let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0);
 
     let IClass = 0b1000;
 
@@ -5549,42 +4626,13 @@ class T_S2op_insert <bits<4> RegTyBits, RegisterClass RC, Operand ImmOp>
 // Rx=insert(Rs,#u5,#U5)
 let hasNewValue = 1 in {
   def S2_insert_rp : T_S3op_insert <"insert", IntRegs>;
-  def S2_insert    : T_S2op_insert <0b1111, IntRegs, u5Imm>;
+  def S2_insert    : T_S2op_insert <0b1111, IntRegs, u5_0Imm>;
 }
 
 // Rxx=insert(Rss,Rtt)
 // Rxx=insert(Rss,#u6,#U6)
 def S2_insertp_rp : T_S3op_insert<"insert", DoubleRegs>;
-def S2_insertp    : T_S2op_insert <0b0011, DoubleRegs, u6Imm>;
-
-
-def SDTHexagonINSERT:
-  SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
-                       SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
-def SDTHexagonINSERTRP:
-  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
-                       SDTCisInt<0>, SDTCisVT<3, i64>]>;
-
-def HexagonINSERT   : SDNode<"HexagonISD::INSERT",   SDTHexagonINSERT>;
-def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
-
-def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2),
-         (S2_insert I32:$Rs, I32:$Rt, u5ImmPred:$u1, u5ImmPred:$u2)>;
-def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2),
-         (S2_insertp I64:$Rs, I64:$Rt, u6ImmPred:$u1, u6ImmPred:$u2)>;
-def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
-         (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
-def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
-         (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
-
-let AddedComplexity = 100 in
-def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
-                                     (i32 (extloadi8  (add I32:$b, 3))),
-                                     24, 8),
-                      (i32 16)),
-                 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
-             (zextloadi8 I32:$b)),
-         (A2_swiz (L2_loadri_io I32:$b, 0))>;
+def S2_insertp    : T_S2op_insert <0b0011, DoubleRegs, u6_0Imm>;
 
 
 //===----------------------------------------------------------------------===//
@@ -5622,10 +4670,10 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
     bit bit13;
     string ImmOpStr = !cast<string>(ImmOp);
 
-    let bit23 = !if (!eq(ImmOpStr, "u6Imm"), src3{5},
+    let bit23 = !if (!eq(ImmOpStr, "u6_0Imm"), src3{5},
                 !if (!eq(mnemonic, "extractu"), 0, 1));
 
-    let bit13 = !if (!eq(ImmOpStr, "u6Imm"), src2{5}, 0);
+    let bit13 = !if (!eq(ImmOpStr, "u6_0Imm"), src2{5}, 0);
 
     let IClass = 0b1000;
 
@@ -5644,38 +4692,15 @@ class T_S2op_extract <string mnemonic, bits<4> RegTyBits,
 // Rdd=extractu(Rss,Rtt)
 // Rdd=extractu(Rss,#u6,#U6)
 def S2_extractup_rp : T_S3op_64 < "extractu", 0b00, 0b000, 0>;
-def S2_extractup    : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6Imm>;
+def S2_extractup    : T_S2op_extract <"extractu", 0b0001, DoubleRegs, u6_0Imm>;
 
 // Rd=extractu(Rs,Rtt)
 // Rd=extractu(Rs,#u5,#U5)
 let hasNewValue = 1 in {
   def S2_extractu_rp : T_S3op_extract<"extractu", 0b00>;
-  def S2_extractu    : T_S2op_extract <"extractu", 0b1101, IntRegs, u5Imm>;
+  def S2_extractu    : T_S2op_extract <"extractu", 0b1101, IntRegs, u5_0Imm>;
 }
 
-def SDTHexagonEXTRACTU:
-  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
-                       SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
-def SDTHexagonEXTRACTURP:
-  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
-                       SDTCisVT<2, i64>]>;
-
-def HexagonEXTRACTU   : SDNode<"HexagonISD::EXTRACTU",   SDTHexagonEXTRACTU>;
-def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
-
-def: Pat<(HexagonEXTRACTU I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3),
-         (S2_extractu I32:$src1, u5ImmPred:$src2, u5ImmPred:$src3)>;
-def: Pat<(HexagonEXTRACTU I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3),
-         (S2_extractup I64:$src1, u6ImmPred:$src2, u6ImmPred:$src3)>;
-def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
-         (S2_extractu_rp I32:$src1, I64:$src2)>;
-def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
-         (S2_extractup_rp I64:$src1, I64:$src2)>;
-
-// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
-def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
-         (M2_mpysin IntRegs:$src1, u8ImmPred:$src2)>;
-
 //===----------------------------------------------------------------------===//
 // :raw for of tableindx[bdhw] insns
 //===----------------------------------------------------------------------===//
@@ -5683,7 +4708,7 @@ def: Pat<(mul (i32 IntRegs:$src1), (ineg n8ImmPred:$src2)),
 let hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
 class tableidxRaw<string OpStr, bits<2>MinOp>
   : SInst <(outs IntRegs:$Rx),
-           (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, s6Imm:$S6),
+           (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, s6_0Imm:$S6),
            "$Rx = "#OpStr#"($Rs, #$u4, #$S6):raw",
     [], "$Rx = $_dst_" > {
     bits<5> Rx;
@@ -5714,7 +4739,7 @@ def S2_tableidxd : tableidxRaw<"tableidxd", 0b11>;
 let isPseudo = 1 in
 class tableidx_goodsyntax <string mnemonic>
   : SInst <(outs IntRegs:$Rx),
-           (ins IntRegs:$_dst_, IntRegs:$Rs, u4Imm:$u4, u5Imm:$u5),
+           (ins IntRegs:$_dst_, IntRegs:$Rs, u4_0Imm:$u4, u5_0Imm:$u5),
            "$Rx = "#mnemonic#"($Rs, #$u4, #$u5)",
            [], "$Rx = $_dst_" >;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
index 9024a43aa7eb..225f94405076 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV3.td
@@ -11,12 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-def callv3 : SDNode<"HexagonISD::CALLv3", SDT_SPCall,
-           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
-
-def callv3nr : SDNode<"HexagonISD::CALLv3nr", SDT_SPCall,
-           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
-
 //===----------------------------------------------------------------------===//
 // J +
 //===----------------------------------------------------------------------===//
@@ -66,11 +60,13 @@ multiclass T_Calls<bit CSR, string ExtStr> {
 
 defm J2_call: T_Calls<1, "">, PredRel;
 
-let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = VolatileV3.Regs in
-def CALLv3nr :  T_Call<1, "">, PredRel;
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
+    Defs = VolatileV3.Regs in
+def PS_call_nr : T_Call<1, "">, PredRel;
 
-let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1, Defs = [PC, R31, R6, R7, P0] in
-def CALLstk :  T_Call<0, "">, PredRel;
+let isCodeGenOnly = 1, isCall = 1, hasSideEffects = 1,
+    Defs = [PC, R31, R6, R7, P0] in
+def PS_call_stk :  T_Call<0, "">, PredRel;
 
 //===----------------------------------------------------------------------===//
 // J -
@@ -83,7 +79,7 @@ def CALLstk :  T_Call<0, "">, PredRel;
 // Call subroutine from register.
 
 let isCodeGenOnly = 1, Defs = VolatileV3.Regs in {
-  def CALLRv3nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
+  def PS_callr_nr : JUMPR_MISC_CALLR<0, 1>; // Call, no return.
 }
 
 //===----------------------------------------------------------------------===//
@@ -105,9 +101,7 @@ def A2_addsph : T_ALU64_addsp_hl<":raw:hi", 0b111>;
 
 let hasSideEffects = 0, isAsmParserOnly = 1 in
 def A2_addsp : ALU64_rr<(outs DoubleRegs:$Rd),
-  (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)",
-  [(set (i64 DoubleRegs:$Rd), (i64 (add (i64 (sext (i32 IntRegs:$Rs))),
-                                        (i64 DoubleRegs:$Rt))))],
+  (ins IntRegs:$Rs, DoubleRegs:$Rt), "$Rd = add($Rs, $Rt)", [],
   "", ALU64_tc_1_SLOT23>;
 
 
@@ -137,60 +131,10 @@ def A2_minup : T_XTYPE_MIN_MAX_P<0, 1>;
 def A2_maxp  : T_XTYPE_MIN_MAX_P<1, 0>;
 def A2_maxup : T_XTYPE_MIN_MAX_P<1, 1>;
 
-multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
-  defm: T_MinMax_pats<Op, DoubleRegs, i64, Inst, SwapInst>;
-}
-
-let AddedComplexity = 200 in {
-  defm: MinMax_pats_p<setge,  A2_maxp,  A2_minp>;
-  defm: MinMax_pats_p<setgt,  A2_maxp,  A2_minp>;
-  defm: MinMax_pats_p<setle,  A2_minp,  A2_maxp>;
-  defm: MinMax_pats_p<setlt,  A2_minp,  A2_maxp>;
-  defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
-  defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
-  defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
-  defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
-}
-
 //===----------------------------------------------------------------------===//
 // ALU64/ALU -
 //===----------------------------------------------------------------------===//
 
-
-
-
-//def : Pat <(brcond (i1 (seteq (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegEzt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setne (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegNzt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setle (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegLezt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setge (i32 IntRegs:$src1), 0)), bb:$offset),
-//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
-
-//def : Pat <(brcond (i1 (setgt (i32 IntRegs:$src1), -1)), bb:$offset),
-//      (JMP_RegGezt (i32 IntRegs:$src1), bb:$offset)>;
-
-// Map call instruction
-def : Pat<(callv3 (i32 IntRegs:$dst)),
-      (J2_callr (i32 IntRegs:$dst))>;
-def : Pat<(callv3 tglobaladdr:$dst),
-      (J2_call tglobaladdr:$dst)>;
-def : Pat<(callv3 texternalsym:$dst),
-      (J2_call texternalsym:$dst)>;
-def : Pat<(callv3 tglobaltlsaddr:$dst),
-      (J2_call tglobaltlsaddr:$dst)>;
-
-def : Pat<(callv3nr (i32 IntRegs:$dst)),
-      (CALLRv3nr (i32 IntRegs:$dst))>;
-def : Pat<(callv3nr tglobaladdr:$dst),
-      (CALLv3nr tglobaladdr:$dst)>;
-def : Pat<(callv3nr texternalsym:$dst),
-      (CALLv3nr texternalsym:$dst)>;
-
 //===----------------------------------------------------------------------===//
 // :raw form of vrcmpys:hi/lo insns
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 398d2d3bc716..18943a082d28 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -30,9 +30,6 @@ def DuplexIClassD:  InstDuplex < 0xD >;
 def DuplexIClassE:  InstDuplex < 0xE >;
 def DuplexIClassF:  InstDuplex < 0xF >;
 
-def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
-def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
-
 let hasSideEffects = 0 in
 class T_Immext<Operand ImmType>
   : EXTENDERInst<(outs), (ins ImmType:$imm),
@@ -53,14 +50,6 @@ let isCodeGenOnly = 1 in {
   def A4_ext_g : T_Immext<globaladdress>;
 }
 
-def BITPOS32 : SDNodeXForm<imm, [{
-   // Return the bit position we will set [0-31].
-   // As an SDNode.
-   int32_t imm = N->getSExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-
 // Hexagon V4 Architecture spec defines 8 instruction classes:
 // LD ST ALU32 XTYPE J JR MEMOP NV CR SYSTEM(system is not implemented in the
 // compiler)
@@ -145,22 +134,6 @@ def C4_cmpneq  : T_ALU32_3op_cmp<"!cmp.eq",  0b00, 1, 1>;
 def C4_cmplte  : T_ALU32_3op_cmp<"!cmp.gt",  0b10, 1, 0>;
 def C4_cmplteu : T_ALU32_3op_cmp<"!cmp.gtu", 0b11, 1, 0>;
 
-// Pats for instruction selection.
-
-// A class to embed the usual comparison patfrags within a zext to i32.
-// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
-// names, or else the frag's "body" won't match the operands.
-class CmpInReg<PatFrag Op>
-  : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
-
-def: T_cmp32_rr_pat<A4_rcmpeq,  CmpInReg<seteq>, i32>;
-def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
-
-def: T_cmp32_rr_pat<C4_cmpneq,  setne,  i1>;
-def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
-
-def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
-
 class T_CMP_rrbh<string mnemonic, bits<3> MinOp, bit IsComm>
   : SInst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, IntRegs:$Rt),
     "$Pd = "#mnemonic#"($Rs, $Rt)", [], "", S_3op_tc_2early_SLOT23>,
@@ -190,21 +163,6 @@ def A4_cmpheq  : T_CMP_rrbh<"cmph.eq",  0b011, 1>;
 def A4_cmphgt  : T_CMP_rrbh<"cmph.gt",  0b100, 0>;
 def A4_cmphgtu : T_CMP_rrbh<"cmph.gtu", 0b101, 0>;
 
-let AddedComplexity = 100 in {
-  def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
-                       255), 0)),
-           (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
-  def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
-                       255), 0)),
-           (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
-  def: Pat<(i1 (seteq (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
-                           65535), 0)),
-           (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
-  def: Pat<(i1 (setne (and (xor (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)),
-                           65535), 0)),
-           (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
-}
-
 class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
                  Operand ImmType, bit IsImmExt, bit IsImmSigned, int ImmBits>
   : ALU64Inst<(outs PredRegs:$Pd), (ins IntRegs:$Rs, ImmType:$Imm),
@@ -234,15 +192,15 @@ class T_CMP_ribh<string mnemonic, bits<2> MajOp, bit IsHalf, bit IsComm,
   let Inst{1-0} = Pd;
 }
 
-def A4_cmpbeqi  : T_CMP_ribh<"cmpb.eq",  0b00, 0, 1, u8Imm, 0, 0, 8>;
-def A4_cmpbgti  : T_CMP_ribh<"cmpb.gt",  0b01, 0, 0, s8Imm, 0, 1, 8>;
-def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7Ext, 1, 0, 7>;
-def A4_cmpheqi  : T_CMP_ribh<"cmph.eq",  0b00, 1, 1, s8Ext, 1, 1, 8>;
-def A4_cmphgti  : T_CMP_ribh<"cmph.gt",  0b01, 1, 0, s8Ext, 1, 1, 8>;
-def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7Ext, 1, 0, 7>;
+def A4_cmpbeqi  : T_CMP_ribh<"cmpb.eq",  0b00, 0, 1, u8_0Imm, 0, 0, 8>;
+def A4_cmpbgti  : T_CMP_ribh<"cmpb.gt",  0b01, 0, 0, s8_0Imm, 0, 1, 8>;
+def A4_cmpbgtui : T_CMP_ribh<"cmpb.gtu", 0b10, 0, 0, u7_0Ext, 1, 0, 7>;
+def A4_cmpheqi  : T_CMP_ribh<"cmph.eq",  0b00, 1, 1, s8_0Ext, 1, 1, 8>;
+def A4_cmphgti  : T_CMP_ribh<"cmph.gt",  0b01, 1, 0, s8_0Ext, 1, 1, 8>;
+def A4_cmphgtui : T_CMP_ribh<"cmph.gtu", 0b10, 1, 0, u7_0Ext, 1, 0, 7>;
 
 class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
-  : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8Ext:$s8),
+  : ALU32_ri<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s8_0Ext:$s8),
     "$Rd = "#mnemonic#"($Rs, #$s8)", [], "", ALU32_2op_tc_1_SLOT0123>,
     ImmRegRel {
   let InputType = "imm";
@@ -270,16 +228,6 @@ class T_RCMP_EQ_ri<string mnemonic, bit IsNeg>
 def A4_rcmpeqi  : T_RCMP_EQ_ri<"cmp.eq",  0>;
 def A4_rcmpneqi : T_RCMP_EQ_ri<"!cmp.eq", 1>;
 
-def: Pat<(i32 (zext (i1 (seteq (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
-         (A4_rcmpeqi IntRegs:$Rs, s32ImmPred:$s8)>;
-def: Pat<(i32 (zext (i1 (setne (i32 IntRegs:$Rs), s32ImmPred:$s8)))),
-         (A4_rcmpneqi IntRegs:$Rs, s32ImmPred:$s8)>;
-
-// Preserve the S2_tstbit_r generation
-def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, (i32 IntRegs:$src2))),
-                                         (i32 IntRegs:$src1))), 0)))),
-         (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
-
 //===----------------------------------------------------------------------===//
 // ALU32 -
 //===----------------------------------------------------------------------===//
@@ -308,26 +256,16 @@ class T_Combine1 <bits<2> MajOp, dag ins, string AsmStr>
   }
 
 let opExtendable = 2 in
-def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8Ext:$s8),
+def A4_combineri : T_Combine1<0b00, (ins IntRegs:$Rs, s8_0Ext:$s8),
                                     "$Rdd = combine($Rs, #$s8)">;
 
 let opExtendable = 1 in
-def A4_combineir : T_Combine1<0b01, (ins s8Ext:$s8, IntRegs:$Rs),
+def A4_combineir : T_Combine1<0b01, (ins s8_0Ext:$s8, IntRegs:$Rs),
                                     "$Rdd = combine(#$s8, $Rs)">;
 
-// The complexity of the combines involving immediates should be greater
-// than the complexity of the combine with two registers.
-let AddedComplexity = 50 in {
-def: Pat<(HexagonCOMBINE IntRegs:$r, s32ImmPred:$i),
-         (A4_combineri IntRegs:$r, s32ImmPred:$i)>;
-
-def: Pat<(HexagonCOMBINE s32ImmPred:$i, IntRegs:$r),
-         (A4_combineir s32ImmPred:$i, IntRegs:$r)>;
-}
-
 // A4_combineii: Set two small immediates.
 let hasSideEffects = 0, isExtendable = 1, opExtentBits = 6, opExtendable = 2 in
-def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
+def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8_0Imm:$s8, u6_0Ext:$U6),
   "$Rdd = combine(#$s8, #$U6)"> {
     bits<5> Rdd;
     bits<8> s8;
@@ -341,12 +279,6 @@ def A4_combineii: ALU32Inst<(outs DoubleRegs:$Rdd), (ins s8Imm:$s8, u6Ext:$U6),
     let Inst{4-0}   = Rdd;
   }
 
-// The complexity of the combine with two immediates should be greater than
-// the complexity of a combine involving a register.
-let AddedComplexity = 75 in
-def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
-         (A4_combineii imm:$s8, imm:$u6)>;
-
 //===----------------------------------------------------------------------===//
 // ALU32/PERM -
 //===----------------------------------------------------------------------===//
@@ -355,39 +287,6 @@ def: Pat<(HexagonCOMBINE s8ImmPred:$s8, u32ImmPred:$u6),
 // LD +
 //===----------------------------------------------------------------------===//
 
-def Zext64: OutPatFrag<(ops node:$Rs),
-  (i64 (A4_combineir 0, (i32 $Rs)))>;
-def Sext64: OutPatFrag<(ops node:$Rs),
-  (i64 (A2_sxtw (i32 $Rs)))>;
-
-// Patterns to generate indexed loads with different forms of the address:
-// - frameindex,
-// - base + offset,
-// - base (without offset).
-multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
-                      PatLeaf ImmPred, InstHexagon MI> {
-  def: Pat<(VT (Load AddrFI:$fi)),
-           (VT (ValueMod (MI AddrFI:$fi, 0)))>;
-  def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
-           (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
-  def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
-           (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
-  def: Pat<(VT (Load (i32 IntRegs:$Rs))),
-           (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
-}
-
-defm: Loadxm_pat<extloadi1,   i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi8,   i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<extloadi16,  i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
-defm: Loadxm_pat<zextloadi1,  i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi8,  i64, Zext64, s32_0ImmPred, L2_loadrub_io>;
-defm: Loadxm_pat<zextloadi16, i64, Zext64, s31_1ImmPred, L2_loadruh_io>;
-defm: Loadxm_pat<sextloadi8,  i64, Sext64, s32_0ImmPred, L2_loadrb_io>;
-defm: Loadxm_pat<sextloadi16, i64, Sext64, s31_1ImmPred, L2_loadrh_io>;
-
-// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
-def: Pat<(i64 (anyext (i32 IntRegs:$src1))), (Zext64 IntRegs:$src1)>;
-
 //===----------------------------------------------------------------------===//
 // Template class for load instructions with Absolute set addressing mode.
 //===----------------------------------------------------------------------===//
@@ -395,7 +294,7 @@ let isExtended = 1, opExtendable = 2, opExtentBits = 6, addrMode = AbsoluteSet,
     hasSideEffects = 0 in
 class T_LD_abs_set<string mnemonic, RegisterClass RC, bits<4>MajOp>:
             LDInst<(outs RC:$dst1, IntRegs:$dst2),
-            (ins u6Ext:$addr),
+            (ins u6_0Ext:$addr),
             "$dst1 = "#mnemonic#"($dst2 = #$addr)",
             []> {
   bits<7> name;
@@ -447,7 +346,7 @@ let InputType = "imm", addrMode = BaseLongOffset, isExtended = 1,
 opExtentBits = 6, opExtendable = 3 in
 class T_LoadAbsReg <string mnemonic, string CextOp, RegisterClass RC,
                     bits<4> MajOp>
-  : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3),
+  : LDInst <(outs RC:$dst), (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3),
   "$dst = "#mnemonic#"($src1<<#$src2 + #$src3)",
   [] >, ImmRegShl {
     bits<5> dst;
@@ -495,48 +394,12 @@ let accessSize = DoubleWordAccess in
 def L4_loadrd_ur  : T_LoadAbsReg<"memd", "LDrid", DoubleRegs, 0b1110>;
 
 
-multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
-  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                             (HexagonCONST32 tglobaladdr:$src3)))),
-              (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3)>;
-  def  : Pat <(VT (ldOp (add IntRegs:$src1,
-                             (HexagonCONST32 tglobaladdr:$src2)))),
-              (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
-
-  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                             (HexagonCONST32 tconstpool:$src3)))),
-              (MI IntRegs:$src1, u2ImmPred:$src2, tconstpool:$src3)>;
-  def  : Pat <(VT (ldOp (add IntRegs:$src1,
-                             (HexagonCONST32 tconstpool:$src2)))),
-              (MI IntRegs:$src1, 0, tconstpool:$src2)>;
-
-  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                             (HexagonCONST32 tjumptable:$src3)))),
-              (MI IntRegs:$src1, u2ImmPred:$src2, tjumptable:$src3)>;
-  def  : Pat <(VT (ldOp (add IntRegs:$src1,
-                             (HexagonCONST32 tjumptable:$src2)))),
-              (MI IntRegs:$src1, 0, tjumptable:$src2)>;
-}
-
-let AddedComplexity  = 60 in {
-defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
-defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
-defm : T_LoadAbsReg_Pat <extloadi8,  L4_loadrub_ur>;
-
-defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
-defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
-defm : T_LoadAbsReg_Pat <extloadi16,  L4_loadruh_ur>;
-
-defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
-defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
-}
-
 //===----------------------------------------------------------------------===//
 // Template classes for the non-predicated load instructions with
 // base + register offset addressing mode
 //===----------------------------------------------------------------------===//
 class T_load_rr <string mnemonic, RegisterClass RC, bits<3> MajOp>:
-   LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2Imm:$u2),
+   LDInst<(outs RC:$dst), (ins IntRegs:$src1, IntRegs:$src2, u2_0Imm:$u2),
   "$dst = "#mnemonic#"($src1 + $src2<<#$u2)",
   [], "", V4LDST_tc_ld_SLOT01>, ImmRegShl, AddrModeRel {
     bits<5> dst;
@@ -563,7 +426,7 @@ let isPredicated =  1 in
 class T_pload_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
                   bit isNot, bit isPredNew>:
    LDInst <(outs RC:$dst),
-           (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2Imm:$u2),
+           (ins PredRegs:$src1, IntRegs:$src2, IntRegs:$src3, u2_0Imm:$u2),
   !if(isNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
   ") ")#"$dst = "#mnemonic#"($src2+$src3<<#$u2)",
   [], "", V4LDST_tc_ld_SLOT01>, AddrModeRel {
@@ -628,50 +491,6 @@ defm loadri : ld_idxd_shl<"memw", "LDriw", IntRegs, 0b100>;
 let accessSize = DoubleWordAccess in
 defm loadrd  : ld_idxd_shl<"memd", "LDrid", DoubleRegs, 0b110>;
 
-// 'def pats' for load instructions with base + register offset and non-zero
-// immediate value. Immediate value is used to left-shift the second
-// register operand.
-class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
-  : Pat<(VT (Load (add (i32 IntRegs:$Rs),
-                       (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2))))),
-        (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
-
-let AddedComplexity = 40 in {
-  def: Loadxs_pat<extloadi8,   i32, L4_loadrub_rr>;
-  def: Loadxs_pat<zextloadi8,  i32, L4_loadrub_rr>;
-  def: Loadxs_pat<sextloadi8,  i32, L4_loadrb_rr>;
-  def: Loadxs_pat<extloadi16,  i32, L4_loadruh_rr>;
-  def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
-  def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
-  def: Loadxs_pat<load,        i32, L4_loadri_rr>;
-  def: Loadxs_pat<load,        i64, L4_loadrd_rr>;
-}
-
-// 'def pats' for load instruction base + register offset and
-// zero immediate value.
-class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
-  : Pat<(VT (Load (add (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)))),
-        (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
-
-let AddedComplexity = 20 in {
-  def: Loadxs_simple_pat<extloadi8,   i32, L4_loadrub_rr>;
-  def: Loadxs_simple_pat<zextloadi8,  i32, L4_loadrub_rr>;
-  def: Loadxs_simple_pat<sextloadi8,  i32, L4_loadrb_rr>;
-  def: Loadxs_simple_pat<extloadi16,  i32, L4_loadruh_rr>;
-  def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
-  def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
-  def: Loadxs_simple_pat<load,        i32, L4_loadri_rr>;
-  def: Loadxs_simple_pat<load,        i64, L4_loadrd_rr>;
-}
-
-// zext i1->i64
-def: Pat<(i64 (zext (i1 PredRegs:$src1))),
-         (Zext64 (C2_muxii PredRegs:$src1, 1, 0))>;
-
-// zext i32->i64
-def: Pat<(i64 (zext (i32 IntRegs:$src1))),
-         (Zext64 IntRegs:$src1)>;
-
 //===----------------------------------------------------------------------===//
 // LD -
 //===----------------------------------------------------------------------===//
@@ -688,7 +507,7 @@ let isExtended = 1, opExtendable = 1, opExtentBits = 6,
 class T_ST_absset <string mnemonic, string BaseOp, RegisterClass RC,
                    bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
   : STInst<(outs IntRegs:$dst),
-           (ins u6Ext:$addr, RC:$src),
+           (ins u6_0Ext:$addr, RC:$src),
     mnemonic#"($dst = #$addr) = $src"#!if(isHalf, ".h","")>, NewValueRel {
     bits<5> dst;
     bits<6> addr;
@@ -727,7 +546,7 @@ isExtended = 1, opExtentBits= 6 in
 class T_ST_absset_nv <string mnemonic, string BaseOp, bits<2> MajOp,
                       MemAccessSize AccessSz >
   : NVInst <(outs IntRegs:$dst),
-            (ins u6Ext:$addr, IntRegs:$src),
+            (ins u6_0Ext:$addr, IntRegs:$src),
     mnemonic#"($dst = #$addr) = $src.new">, NewValueRel {
     bits<5> dst;
     bits<6> addr;
@@ -757,7 +576,7 @@ let isExtended = 1, opExtendable = 2, opExtentBits = 6, InputType = "imm",
 class T_StoreAbsReg <string mnemonic, string CextOp, RegisterClass RC,
                      bits<3> MajOp, MemAccessSize AccessSz, bit isHalf = 0>
   : STInst<(outs),
-           (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, RC:$src4),
+           (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, RC:$src4),
    mnemonic#"($src1<<#$src2 + #$src3) = $src4"#!if(isHalf, ".h",""),
    []>, ImmRegShl, NewValueRel {
 
@@ -794,35 +613,12 @@ def S4_storeri_ur : T_StoreAbsReg <"memw", "STriw", IntRegs, 0b100, WordAccess>;
 def S4_storerd_ur : T_StoreAbsReg <"memd", "STrid", DoubleRegs, 0b110,
                                    DoubleWordAccess>;
 
-let AddedComplexity = 40 in
-multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
-                           PatFrag stOp> {
- def : Pat<(stOp (VT RC:$src4),
-                 (add (shl (i32 IntRegs:$src1), u2ImmPred:$src2),
-                      u32ImmPred:$src3)),
-          (MI IntRegs:$src1, u2ImmPred:$src2, u32ImmPred:$src3, RC:$src4)>;
-
- def : Pat<(stOp (VT RC:$src4),
-                 (add (shl IntRegs:$src1, u2ImmPred:$src2),
-                      (HexagonCONST32 tglobaladdr:$src3))),
-           (MI IntRegs:$src1, u2ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
-
- def : Pat<(stOp (VT RC:$src4),
-                 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
-           (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
-}
-
-defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
-defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
-defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
-defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
-
 let mayStore = 1, isNVStore = 1, isExtended = 1, addrMode = BaseLongOffset,
     opExtentBits = 6, isNewValue = 1, opNewValue = 3, opExtendable = 2 in
 class T_StoreAbsRegNV <string mnemonic, string CextOp, bits<2> MajOp,
                        MemAccessSize AccessSz>
   : NVInst <(outs ),
-            (ins IntRegs:$src1, u2Imm:$src2, u6Ext:$src3, IntRegs:$src4),
+            (ins IntRegs:$src1, u2_0Imm:$src2, u6_0Ext:$src3, IntRegs:$src4),
   mnemonic#"($src1<<#$src2 + #$src3) = $src4.new">, NewValueRel {
     bits<5> src1;
     bits<2> src2;
@@ -854,7 +650,7 @@ def S4_storerinew_ur : T_StoreAbsRegNV <"memw", "STriw", 0b10, WordAccess>;
 //===----------------------------------------------------------------------===//
 let isPredicable = 1 in
 class T_store_rr <string mnemonic, RegisterClass RC, bits<3> MajOp, bit isH>
-  : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+  : STInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt),
   mnemonic#"($Rs + $Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
   [],"",V4LDST_tc_st_SLOT01>, ImmRegShl, AddrModeRel {
 
@@ -885,7 +681,7 @@ let isPredicated = 1 in
 class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
                    bit isNot, bit isPredNew, bit isH>
   : STInst <(outs),
-            (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, RC:$Rt),
+            (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, RC:$Rt),
 
   !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
   ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Rt"#!if(isH, ".h",""),
@@ -921,7 +717,7 @@ class T_pstore_rr <string mnemonic, RegisterClass RC, bits<3> MajOp,
 //===----------------------------------------------------------------------===//
 let isPredicable = 1, isNewValue = 1, opNewValue = 3 in
 class T_store_new_rr <string mnemonic, bits<2> MajOp> :
-  NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+  NVInst < (outs ), (ins IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt),
   mnemonic#"($Rs + $Ru<<#$u2) = $Nt.new",
   [],"",V4LDST_tc_st_SLOT0>, ImmRegShl, AddrModeRel {
 
@@ -948,7 +744,7 @@ class T_store_new_rr <string mnemonic, bits<2> MajOp> :
 let isPredicated = 1, isNewValue = 1, opNewValue = 4 in
 class T_pstore_new_rr <string mnemonic, bits<2> MajOp, bit isNot, bit isPredNew>
   : NVInst<(outs),
-           (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2Imm:$u2, IntRegs:$Nt),
+           (ins PredRegs:$Pv, IntRegs:$Rs, IntRegs:$Ru, u2_0Imm:$u2, IntRegs:$Nt),
    !if(isNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
    ") ")#mnemonic#"($Rs+$Ru<<#$u2) = $Nt.new",
    [], "", V4LDST_tc_st_SLOT0>, AddrModeRel {
@@ -1035,48 +831,13 @@ let addrMode = BaseRegOffset, InputType = "reg", hasSideEffects = 0 in {
   defm storerf: ST_Idxd_shl<"memh", "STrif", IntRegs, 0b011, 1>;
 }
 
-class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
-  : Pat<(Store Value:$Ru, (add (i32 IntRegs:$Rs),
-                               (i32 (shl (i32 IntRegs:$Rt), u2ImmPred:$u2)))),
-        (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
-
-let AddedComplexity = 40 in {
-  def: Storexs_pat<truncstorei8,  I32, S4_storerb_rr>;
-  def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
-  def: Storexs_pat<store,         I32, S4_storeri_rr>;
-  def: Storexs_pat<store,         I64, S4_storerd_rr>;
-}
-
-class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
-  : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
-        (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
-
-let AddedComplexity = 20 in {
-  def: Store_rr_pat<truncstorei8,  I32, S4_storerb_rr>;
-  def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
-  def: Store_rr_pat<store,         I32, S4_storeri_rr>;
-  def: Store_rr_pat<store,         I64, S4_storerd_rr>;
-}
-
-
-// memd(Rx++#s4:3)=Rtt
-// memd(Rx++#s4:3:circ(Mu))=Rtt
-// memd(Rx++I:circ(Mu))=Rtt
-// memd(Rx++Mu)=Rtt
-// memd(Rx++Mu:brev)=Rtt
-// memd(gp+#u16:3)=Rtt
-
-// Store doubleword conditionally.
-// if ([!]Pv[.new]) memd(#u6)=Rtt
-// TODO: needs to be implemented.
-
 //===----------------------------------------------------------------------===//
 // Template class
 //===----------------------------------------------------------------------===//
 let isPredicable = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 8,
     opExtendable = 2 in
 class T_StoreImm <string mnemonic, Operand OffsetOp, bits<2> MajOp >
-  : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8Ext:$S8),
+  : STInst <(outs ), (ins IntRegs:$Rs, OffsetOp:$offset, s8_0Ext:$S8),
   mnemonic#"($Rs+#$offset)=#$S8",
   [], "", V4LDST_tc_st_SLOT01>,
   ImmRegRel, PredNewRel {
@@ -1105,7 +866,7 @@ let isPredicated = 1, isExtendable = 1, isExtentSigned = 1, opExtentBits = 6,
 class T_StoreImm_pred <string mnemonic, Operand OffsetOp, bits<2> MajOp,
                        bit isPredNot, bit isPredNew >
   : STInst <(outs ),
-            (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6Ext:$S6),
+            (ins PredRegs:$Pv, IntRegs:$Rs, OffsetOp:$offset, s6_0Ext:$S6),
   !if(isPredNot, "if (!$Pv", "if ($Pv")#!if(isPredNew, ".new) ",
   ") ")#mnemonic#"($Rs+#$offset)=#$S6",
   [], "", V4LDST_tc_st_SLOT01>,
@@ -1173,126 +934,6 @@ let hasSideEffects = 0, addrMode = BaseImmOffset,
   defm S4_storeiri : ST_Imm<"memw", "STriw", u6_2Imm, 0b10>;
 }
 
-def IMM_BYTE : SDNodeXForm<imm, [{
-  // -1 etc is  represented as 255 etc
-  // assigning to a byte restores our desired signed value.
-  int8_t imm = N->getSExtValue();
-  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
-}]>;
-
-def IMM_HALF : SDNodeXForm<imm, [{
-  // -1 etc is  represented as 65535 etc
-  // assigning to a short restores our desired signed value.
-  int16_t imm = N->getSExtValue();
-  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
-}]>;
-
-def IMM_WORD : SDNodeXForm<imm, [{
-  // -1 etc can be represented as 4294967295 etc
-  // Currently, it's not doing this. But some optimization
-  // might convert -1 to a large +ve number.
-  // assigning to a word restores our desired signed value.
-  int32_t imm = N->getSExtValue();
-  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
-}]>;
-
-def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
-def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
-def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
-
-// Emit store-immediate, but only when the stored value will not be constant-
-// extended. The reason for that is that there is no pass that can optimize
-// constant extenders in store-immediate instructions. In some cases we can
-// end up will a number of such stores, all of which store the same extended
-// value (e.g. after unrolling a loop that initializes floating point array).
-
-// Predicates to determine if the 16-bit immediate is expressible as a sign-
-// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
-// beyond 0..15, so we don't care what is in there.
-
-def i16in8ImmPred: PatLeaf<(i32 imm), [{
-  int64_t v = (int16_t)N->getSExtValue();
-  return v == (int64_t)(int8_t)v;
-}]>;
-
-// Predicates to determine if the 32-bit immediate is expressible as a sign-
-// extended 8-bit immediate.
-def i32in8ImmPred: PatLeaf<(i32 imm), [{
-  int64_t v = (int32_t)N->getSExtValue();
-  return v == (int64_t)(int8_t)v;
-}]>;
-
-
-let AddedComplexity = 40 in {
-  // Even though the offset is not extendable in the store-immediate, we
-  // can still generate the fi# in the base address. If the final offset
-  // is not valid for the instruction, we will replace it with a scratch
-  // register.
-//  def: Storexm_fi_pat <truncstorei8, s32ImmPred, ToImmByte, S4_storeirb_io>;
-//  def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
-//                       S4_storeirh_io>;
-//  def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
-
-//  defm: Storexm_fi_add_pat <truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
-//                            S4_storeirb_io>;
-//  defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
-//                            ToImmHalf, S4_storeirh_io>;
-//  defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
-//                            S4_storeiri_io>;
-
-  defm: Storexm_add_pat<truncstorei8, s32ImmPred, u6_0ImmPred, ToImmByte,
-                        S4_storeirb_io>;
-  defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
-                        S4_storeirh_io>;
-  defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
-                        S4_storeiri_io>;
-}
-
-def: Storexm_simple_pat<truncstorei8,  s32ImmPred, ToImmByte, S4_storeirb_io>;
-def: Storexm_simple_pat<truncstorei16, s32ImmPred, ToImmHalf, S4_storeirh_io>;
-def: Storexm_simple_pat<store,         s32ImmPred, ToImmWord, S4_storeiri_io>;
-
-// memb(Rx++#s4:0:circ(Mu))=Rt
-// memb(Rx++I:circ(Mu))=Rt
-// memb(Rx++Mu)=Rt
-// memb(Rx++Mu:brev)=Rt
-// memb(gp+#u16:0)=Rt
-
-// Store halfword.
-// TODO: needs to be implemented
-// memh(Re=#U6)=Rt.H
-// memh(Rs+#s11:1)=Rt.H
-// memh(Rs+Ru<<#u2)=Rt.H
-// TODO: needs to be implemented.
-
-// memh(Ru<<#u2+#U6)=Rt.H
-// memh(Rx++#s4:1:circ(Mu))=Rt.H
-// memh(Rx++#s4:1:circ(Mu))=Rt
-// memh(Rx++I:circ(Mu))=Rt.H
-// memh(Rx++I:circ(Mu))=Rt
-// memh(Rx++Mu)=Rt.H
-// memh(Rx++Mu)=Rt
-// memh(Rx++Mu:brev)=Rt.H
-// memh(Rx++Mu:brev)=Rt
-// memh(gp+#u16:1)=Rt
-// if ([!]Pv[.new]) memh(#u6)=Rt.H
-// if ([!]Pv[.new]) memh(#u6)=Rt
-
-// if ([!]Pv[.new]) memh(Rs+#u6:1)=Rt.H
-// TODO: needs to be implemented.
-
-// if ([!]Pv[.new]) memh(Rx++#s4:1)=Rt.H
-// TODO: Needs to be implemented.
-
-// Store word.
-// memw(Re=#U6)=Rt
-// TODO: Needs to be implemented.
-// memw(Rx++#s4:2)=Rt
-// memw(Rx++#s4:2:circ(Mu))=Rt
-// memw(Rx++I:circ(Mu))=Rt
-// memw(Rx++Mu)=Rt
-// memw(Rx++Mu:brev)=Rt
-
 //===----------------------------------------------------------------------===
 // ST -
 //===----------------------------------------------------------------------===
@@ -1685,7 +1326,7 @@ let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 11,
 class NVJri_template<string mnemonic, bits<3> majOp, bit isNegCond,
                          bit isTak>
   : NVInst_V4<(outs),
-    (ins IntRegs:$src1, u5Imm:$src2, brtarget:$offset),
+    (ins IntRegs:$src1, u5_0Imm:$src2, brtarget:$offset),
     "if ("#!if(isNegCond, "!","")#mnemonic#"($src1.new, #$src2)) jump:"
     #!if(isTak, "t","nt")#" $offset", []> {
 
@@ -1738,19 +1379,22 @@ let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator = 1,
 // with a register and an hardcoded 0/-1 immediate value.
 //===----------------------------------------------------------------------===//
 
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
+let isExtendable = 1, isExtentSigned = 1, opExtentBits = 11,
     opExtentAlign = 2 in
 class NVJ_ConstImm_template<string mnemonic, bits<3> majOp, string ImmVal,
                             bit isNegCond, bit isTak>
   : NVInst_V4<(outs),
-    (ins IntRegs:$src1, brtarget:$offset),
+    !if(!eq(ImmVal, "{-1}"),
+        (ins IntRegs:$src1, n1Const:$n1, brtarget:$offset),
+        (ins IntRegs:$src1, brtarget:$offset)),
     "if ("#!if(isNegCond, "!","")#mnemonic
-    #"($src1.new, #" # ImmVal # ")) jump:"
+    #"($src1.new, #" # !if(!eq(ImmVal, "{-1}"), "$n1", ImmVal) # ")) jump:"
     #!if(isTak, "t","nt")#" $offset", []> {
 
       let isTaken = isTak;
       let isPredicatedFalse = isNegCond;
       let isTaken = isTak;
+      let opExtendable = !if(!eq(ImmVal, "{-1}"), 2, 1);
 
       bits<3> src1;
       bits<11> offset;
@@ -1787,8 +1431,8 @@ multiclass NVJ_ConstImm_base<string mnemonic, string BaseOp, bits<3> majOp,
 let isPredicated = 1, isBranch = 1, isNewValue = 1, isTerminator=1,
     Defs = [PC], hasSideEffects = 0 in {
   defm J4_tstbit0 : NVJ_ConstImm_base<"tstbit", "TSTBIT", 0b011, "0">, PredRel;
-  defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "-1">, PredRel;
-  defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "-1">, PredRel;
+  defm J4_cmpeqn1 : NVJ_ConstImm_base<"cmp.eq", "CMPEQ",  0b100, "{-1}">, PredRel;
+  defm J4_cmpgtn1 : NVJ_ConstImm_base<"cmp.gt", "CMPGT",  0b101, "{-1}">, PredRel;
 }
 
 // J4_hintjumpr: Hint indirect conditional jump.
@@ -1814,7 +1458,7 @@ def J4_hintjumpr: JRInst <
 // PC-relative add
 let hasNewValue = 1, isExtendable = 1, opExtendable = 1,
     isExtentSigned = 0, opExtentBits = 6, hasSideEffects = 0, Uses = [PC] in
-def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6Ext:$u6),
+def C4_addipc : CRInst <(outs IntRegs:$Rd), (ins u6_0Ext:$u6),
   "$Rd = add(pc, #$u6)", [], "", CR_tc_2_SLOT3 > {
     bits<5> Rd;
     bits<6> u6;
@@ -1860,48 +1504,6 @@ def C4_and_orn  : T_LOGICAL_3OP<"and", "or",  0b01, 1>;
 def C4_or_andn  : T_LOGICAL_3OP<"or",  "and", 0b10, 1>;
 def C4_or_orn   : T_LOGICAL_3OP<"or",  "or",  0b11, 1>;
 
-// op(Ps, op(Pt, Pu))
-class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
-  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
-        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
-
-// op(Ps, op(Pt, ~Pu))
-class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
-  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
-        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
-
-def: LogLog_pat<and, and, C4_and_and>;
-def: LogLog_pat<and, or,  C4_and_or>;
-def: LogLog_pat<or,  and, C4_or_and>;
-def: LogLog_pat<or,  or,  C4_or_or>;
-
-def: LogLogNot_pat<and, and, C4_and_andn>;
-def: LogLogNot_pat<and, or,  C4_and_orn>;
-def: LogLogNot_pat<or,  and, C4_or_andn>;
-def: LogLogNot_pat<or,  or,  C4_or_orn>;
-
-//===----------------------------------------------------------------------===//
-// PIC: Support for PIC compilations. The patterns and SD nodes defined
-// below are needed to support code generation for PIC
-//===----------------------------------------------------------------------===//
-
-def SDT_HexagonAtGot
-  : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
-def SDT_HexagonAtPcrel
-  : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
-
-// AT_GOT address-of-GOT, address-of-global, offset-in-global
-def HexagonAtGot       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
-// AT_PCREL address-of-global
-def HexagonAtPcrel     : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
-
-def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
-         (L2_loadri_io I32:$got, imm:$addr)>;
-def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
-         (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
-def: Pat<(HexagonAtPcrel I32:$addr),
-         (C4_addipc imm:$addr)>;
-
 //===----------------------------------------------------------------------===//
 // CR -
 //===----------------------------------------------------------------------===//
@@ -1914,11 +1516,6 @@ def: Pat<(HexagonAtPcrel I32:$addr),
 def A4_andnp : T_ALU64_logical<"and", 0b001, 1, 0, 1>;
 def A4_ornp  : T_ALU64_logical<"or",  0b011, 1, 0, 1>;
 
-def: Pat<(i64 (and (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
-         (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
-def: Pat<(i64 (or  (i64 DoubleRegs:$Rs), (i64 (not (i64 DoubleRegs:$Rt))))),
-         (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
-
 let hasNewValue = 1, hasSideEffects = 0 in
 def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
       "$Rd = parity($Rs, $Rt)", [], "", ALU64_tc_2_SLOT23> {
@@ -1938,10 +1535,8 @@ def S4_parity: ALU64Inst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, IntRegs:$Rt),
 let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 6,
     opExtendable = 3 in
 def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
-                            (ins IntRegs:$Rs, IntRegs:$Ru, s6Ext:$s6),
-  "$Rd = add($Rs, add($Ru, #$s6))" ,
-  [(set (i32 IntRegs:$Rd), (add (i32 IntRegs:$Rs),
-                           (add (i32 IntRegs:$Ru), s32ImmPred:$s6)))],
+                            (ins IntRegs:$Rs, IntRegs:$Ru, s6_0Ext:$s6),
+  "$Rd = add($Rs, add($Ru, #$s6))" , [],
   "", ALU64_tc_2_SLOT23> {
     bits<5> Rd;
     bits<5> Rs;
@@ -1962,7 +1557,7 @@ def S4_addaddi : ALU64Inst <(outs IntRegs:$Rd),
 let isExtentSigned = 1, hasSideEffects = 0, hasNewValue = 1, isExtendable = 1,
     opExtentBits = 6, opExtendable = 2 in
 def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
-                           (ins IntRegs:$Rs, s6Ext:$s6, IntRegs:$Ru),
+                           (ins IntRegs:$Rs, s6_0Ext:$s6, IntRegs:$Ru),
   "$Rd = add($Rs, sub(#$s6, $Ru))",
   [], "", ALU64_tc_2_SLOT23> {
     bits<5> Rd;
@@ -1981,40 +1576,12 @@ def S4_subaddi: ALU64Inst <(outs IntRegs:$Rd),
     let Inst{4-0}   = Ru;
   }
 
-// Rd=add(Rs,sub(#s6,Ru))
-def: Pat<(add (i32 IntRegs:$src1), (sub s32ImmPred:$src2,
-                                        (i32 IntRegs:$src3))),
-         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
-
-// Rd=sub(add(Rs,#s6),Ru)
-def: Pat<(sub (add (i32 IntRegs:$src1), s32ImmPred:$src2),
-                   (i32 IntRegs:$src3)),
-         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
-
-// Rd=add(sub(Rs,Ru),#s6)
-def: Pat<(add (sub (i32 IntRegs:$src1), (i32 IntRegs:$src3)),
-                   (s32ImmPred:$src2)),
-         (S4_subaddi IntRegs:$src1, s32ImmPred:$src2, IntRegs:$src3)>;
-
-
-//  Add or subtract doublewords with carry.
-//TODO:
-//  Rdd=add(Rss,Rtt,Px):carry
-//TODO:
-//  Rdd=sub(Rss,Rtt,Px):carry
-
-// Extract bitfield
-// Rdd=extract(Rss,#u6,#U6)
-// Rdd=extract(Rss,Rtt)
-// Rd=extract(Rs,Rtt)
-// Rd=extract(Rs,#u5,#U5)
-
 def S4_extractp_rp : T_S3op_64 < "extract",  0b11, 0b100, 0>;
-def S4_extractp    : T_S2op_extract <"extract",  0b1010, DoubleRegs, u6Imm>;
+def S4_extractp    : T_S2op_extract <"extract",  0b1010, DoubleRegs, u6_0Imm>;
 
 let hasNewValue = 1 in {
   def S4_extract_rp : T_S3op_extract<"extract",  0b01>;
-  def S4_extract    : T_S2op_extract <"extract",  0b1101, IntRegs, u5Imm>;
+  def S4_extract    : T_S2op_extract <"extract",  0b1101, IntRegs, u5_0Imm>;
 }
 
 // Complex add/sub halfwords/words
@@ -2041,10 +1608,7 @@ let hasSideEffects = 0 in
 def M4_xor_xacc
   : SInst <(outs DoubleRegs:$Rxx),
            (ins DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt),
-  "$Rxx ^= xor($Rss, $Rtt)",
-  [(set (i64 DoubleRegs:$Rxx),
-   (xor (i64 DoubleRegs:$dst2), (xor (i64 DoubleRegs:$Rss),
-                                     (i64 DoubleRegs:$Rtt))))],
+  "$Rxx ^= xor($Rss, $Rtt)", [],
   "$dst2 = $Rxx", S_3op_tc_1_SLOT23> {
     bits<5> Rxx;
     bits<5> Rss;
@@ -2064,7 +1628,7 @@ def M4_xor_xacc
 let hasSideEffects = 0 in
 def S4_vrcrotate
   : SInst <(outs DoubleRegs:$Rdd),
-           (ins DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+           (ins DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2),
   "$Rdd = vrcrotate($Rss, $Rt, #$u2)",
   [], "", S_3op_tc_3x_SLOT23> {
     bits<5> Rdd;
@@ -2088,7 +1652,7 @@ def S4_vrcrotate
 let hasSideEffects = 0 in
 def S4_vrcrotate_acc
   : SInst <(outs DoubleRegs:$Rxx),
-           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2Imm:$u2),
+           (ins DoubleRegs:$dst2, DoubleRegs:$Rss, IntRegs:$Rt, u2_0Imm:$u2),
   "$Rxx += vrcrotate($Rss, $Rt, #$u2)", [],
   "$dst2 = $Rxx", S_3op_tc_3x_SLOT23> {
     bits<5> Rxx;
@@ -2144,10 +1708,8 @@ let isExtentSigned = 1, hasNewValue = 1, isExtendable = 1, opExtentBits = 10,
     opExtendable = 3 in
 def S4_or_andix:
   ALU64Inst<(outs IntRegs:$Rx),
-            (ins IntRegs:$Ru, IntRegs:$_src_, s10Ext:$s10),
-  "$Rx = or($Ru, and($_src_, #$s10))" ,
-  [(set (i32 IntRegs:$Rx),
-        (or (i32 IntRegs:$Ru), (and (i32 IntRegs:$_src_), s32ImmPred:$s10)))] ,
+            (ins IntRegs:$Ru, IntRegs:$_src_, s10_0Ext:$s10),
+  "$Rx = or($Ru, and($_src_, #$s10))" , [] ,
   "$_src_ = $Rx", ALU64_tc_2_SLOT23> {
     bits<5> Rx;
     bits<5> Ru;
@@ -2266,33 +1828,13 @@ def M4_xor_andn : T_MType_acc_rr < "^= and", 0b001, 0b010, 0, [], 1>;
 def M4_or_andn  : T_MType_acc_rr < "|= and", 0b001, 0b000, 0, [], 1>;
 def M4_and_andn : T_MType_acc_rr < "&= and", 0b001, 0b001, 0, [], 1>;
 
-def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
-def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
-def: T_MType_acc_pat2 <M4_or_and, and, or>;
-def: T_MType_acc_pat2 <M4_and_and, and, and>;
-def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
-def: T_MType_acc_pat2 <M4_or_or, or, or>;
-def: T_MType_acc_pat2 <M4_and_or, or, and>;
-def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
-
-class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
-  : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2,
-                                              (not IntRegs:$src3)))),
-         (i32 (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3))>;
-
-def: T_MType_acc_pat3 <M4_or_andn, and, or>;
-def: T_MType_acc_pat3 <M4_and_andn, and, and>;
-def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
-
 // Compound or-or and or-and
 let isExtentSigned = 1, InputType = "imm", hasNewValue = 1, isExtendable = 1,
     opExtentBits = 10, opExtendable = 3 in
 class T_CompOR <string mnemonic, bits<2> MajOp, SDNode OpNode>
   : MInst_acc <(outs IntRegs:$Rx),
-               (ins IntRegs:$src1, IntRegs:$Rs, s10Ext:$s10),
-  "$Rx |= "#mnemonic#"($Rs, #$s10)",
-  [(set (i32 IntRegs:$Rx), (or (i32 IntRegs:$src1),
-                           (OpNode (i32 IntRegs:$Rs), s32ImmPred:$s10)))],
+               (ins IntRegs:$src1, IntRegs:$Rs, s10_0Ext:$s10),
+  "$Rx |= "#mnemonic#"($Rs, #$s10)", [],
   "$src1 = $Rx", ALU64_tc_2_SLOT23>, ImmRegRel {
     bits<5> Rx;
     bits<5> Rs;
@@ -2363,21 +1905,8 @@ def S2_ct0p : T_COUNT_LEADING_64<"ct0", 0b111, 0b010>;
 def S2_ct1p : T_COUNT_LEADING_64<"ct1", 0b111, 0b100>;
 def S4_clbpnorm : T_COUNT_LEADING_64<"normamt", 0b011, 0b000>;
 
-// Count trailing zeros: 64-bit.
-def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
-
-// Count trailing ones: 64-bit.
-def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
-
-// Define leading/trailing patterns that require zero-extensions to 64 bits.
-def: Pat<(i64 (ctlz I64:$Rss)), (Zext64 (S2_cl0p I64:$Rss))>;
-def: Pat<(i64 (cttz I64:$Rss)), (Zext64 (S2_ct0p I64:$Rss))>;
-def: Pat<(i64 (ctlz (not I64:$Rss))), (Zext64 (S2_cl1p I64:$Rss))>;
-def: Pat<(i64 (cttz (not I64:$Rss))), (Zext64 (S2_ct1p I64:$Rss))>;
-
-
 let hasSideEffects = 0, hasNewValue = 1 in
-def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
+def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6_0Imm:$s6),
     "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
   bits<5> Rs;
   bits<5> Rd;
@@ -2392,7 +1921,7 @@ def S4_clbaddi : SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs, s6Imm:$s6),
 }
 
 let hasSideEffects = 0, hasNewValue = 1 in
-def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
+def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6_0Imm:$s6),
     "$Rd = add(clb($Rs), #$s6)", [], "", S_2op_tc_2_SLOT23> {
   bits<5> Rs;
   bits<5> Rd;
@@ -2411,41 +1940,10 @@ def S4_clbpaddi : SInst<(outs IntRegs:$Rd), (ins DoubleRegs:$Rs, s6Imm:$s6),
 def S4_ntstbit_i : T_TEST_BIT_IMM<"!tstbit", 0b001>;
 def S4_ntstbit_r : T_TEST_BIT_REG<"!tstbit", 1>;
 
-let AddedComplexity = 20 in {   // Complexity greater than cmp reg-imm.
-  def: Pat<(i1 (seteq (and (shl 1, u5ImmPred:$u5), (i32 IntRegs:$Rs)), 0)),
-           (S4_ntstbit_i (i32 IntRegs:$Rs), u5ImmPred:$u5)>;
-  def: Pat<(i1 (seteq (and (shl 1, (i32 IntRegs:$Rt)), (i32 IntRegs:$Rs)), 0)),
-           (S4_ntstbit_r (i32 IntRegs:$Rs), (i32 IntRegs:$Rt))>;
-}
-
-// Add extra complexity to prefer these instructions over bitsset/bitsclr.
-// The reason is that tstbit/ntstbit can be folded into a compound instruction:
-//   if ([!]tstbit(...)) jump ...
-let AddedComplexity = 100 in
-def: Pat<(i1 (setne (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
-         (S2_tstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
-
-let AddedComplexity = 100 in
-def: Pat<(i1 (seteq (and (i32 IntRegs:$Rs), (i32 Set5ImmPred:$u5)), (i32 0))),
-         (S4_ntstbit_i (i32 IntRegs:$Rs), (BITPOS32 Set5ImmPred:$u5))>;
-
 def C4_nbitsset  : T_TEST_BITS_REG<"!bitsset", 0b01, 1>;
 def C4_nbitsclr  : T_TEST_BITS_REG<"!bitsclr", 0b10, 1>;
 def C4_nbitsclri : T_TEST_BITS_IMM<"!bitsclr", 0b10, 1>;
 
-// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
-// represented as a compare against "value & 0xFF", which is an exact match
-// for cmpb (same for cmph). The patterns below do not contain any additional
-// complexity that would make them preferable, and if they were actually used
-// instead of cmpb/cmph, they would result in a compare against register that
-// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
-def: Pat<(i1 (setne (and I32:$Rs, u6ImmPred:$u6), 0)),
-         (C4_nbitsclri I32:$Rs, u6ImmPred:$u6)>;
-def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
-         (C4_nbitsclr I32:$Rs, I32:$Rt)>;
-def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
-         (C4_nbitsset I32:$Rs, I32:$Rt)>;
-
 //===----------------------------------------------------------------------===//
 // XTYPE/BIT -
 //===----------------------------------------------------------------------===//
@@ -2458,11 +1956,8 @@ def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
 
 let hasNewValue = 1, isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
 def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
-  (ins u6Ext:$u6, IntRegs:$Rs, u6Imm:$U6),
-  "$Rd = add(#$u6, mpyi($Rs, #$U6))" ,
-  [(set (i32 IntRegs:$Rd),
-        (add (mul (i32 IntRegs:$Rs), u6ImmPred:$U6),
-             u32ImmPred:$u6))] ,"",ALU64_tc_3x_SLOT23> {
+  (ins u6_0Ext:$u6, IntRegs:$Rs, u6_0Imm:$U6),
+  "$Rd = add(#$u6, mpyi($Rs, #$U6))" , [],"",ALU64_tc_3x_SLOT23> {
     bits<5> Rd;
     bits<6> u6;
     bits<5> Rs;
@@ -2484,11 +1979,8 @@ def M4_mpyri_addi : MInst<(outs IntRegs:$Rd),
 let CextOpcode = "ADD_MPY", InputType = "imm", hasNewValue = 1,
     isExtendable = 1, opExtentBits = 6, opExtendable = 1 in
 def M4_mpyrr_addi : MInst <(outs IntRegs:$Rd),
-  (ins u6Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
-  "$Rd = add(#$u6, mpyi($Rs, $Rt))" ,
-  [(set (i32 IntRegs:$Rd),
-        (add (mul (i32 IntRegs:$Rs), (i32 IntRegs:$Rt)), u32ImmPred:$u6))],
-  "", ALU64_tc_3x_SLOT23>, ImmRegRel {
+  (ins u6_0Ext:$u6, IntRegs:$Rs, IntRegs:$Rt),
+  "$Rd = add(#$u6, mpyi($Rs, $Rt))" , [], "", ALU64_tc_3x_SLOT23>, ImmRegRel {
     bits<5> Rd;
     bits<6> u6;
     bits<5> Rs;
@@ -2509,9 +2001,7 @@ let hasNewValue = 1 in
 class T_AddMpy <bit MajOp, PatLeaf ImmPred, dag ins>
   : ALU64Inst <(outs IntRegs:$dst), ins,
   "$dst = add($src1, mpyi("#!if(MajOp,"$src3, #$src2))",
-                                      "#$src2, $src3))"),
-  [(set (i32 IntRegs:$dst),
-        (add (i32 IntRegs:$src1), (mul (i32 IntRegs:$src3), ImmPred:$src2)))],
+                                      "#$src2, $src3))"), [],
   "", ALU64_tc_3x_SLOT23> {
     bits<5> dst;
     bits<5> src1;
@@ -2537,16 +2027,14 @@ def M4_mpyri_addr_u2 : T_AddMpy<0b0, u6_2ImmPred,
 
 let isExtendable = 1, opExtentBits = 6, opExtendable = 3,
     CextOpcode = "ADD_MPY", InputType = "imm" in
-def M4_mpyri_addr : T_AddMpy<0b1, u32ImmPred,
-                    (ins IntRegs:$src1, IntRegs:$src3, u6Ext:$src2)>, ImmRegRel;
+def M4_mpyri_addr : T_AddMpy<0b1, u32_0ImmPred,
+                    (ins IntRegs:$src1, IntRegs:$src3, u6_0Ext:$src2)>, ImmRegRel;
 
 // Rx=add(Ru,mpyi(Rx,Rs))
 let CextOpcode = "ADD_MPY", InputType = "reg", hasNewValue = 1 in
 def M4_mpyrr_addr: MInst_acc <(outs IntRegs:$Rx),
                               (ins IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs),
-  "$Rx = add($Ru, mpyi($_src_, $Rs))",
-  [(set (i32 IntRegs:$Rx), (add (i32 IntRegs:$Ru),
-                           (mul (i32 IntRegs:$_src_), (i32 IntRegs:$Rs))))],
+  "$Rx = add($Ru, mpyi($_src_, $Rs))", [],
   "$_src_ = $Rx", M_tc_3x_SLOT23>, ImmRegRel {
     bits<5> Rx;
     bits<5> Ru;
@@ -2637,24 +2125,23 @@ class T_vcmpImm <string Str, bits<2> cmpOp, bits<2> minOp, Operand ImmOprnd>
 
 // Vector compare bytes
 def A4_vcmpbgt   : T_vcmp <"vcmpb.gt", 0b1010>;
-def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
 
 let AsmString = "$Pd = any8(vcmpb.eq($Rss, $Rtt))" in
 def A4_vcmpbeq_any : T_vcmp <"any8(vcmpb.gt", 0b1000>;
 
-def A4_vcmpbeqi  : T_vcmpImm <"vcmpb.eq",  0b00, 0b00, u8Imm>;
-def A4_vcmpbgti  : T_vcmpImm <"vcmpb.gt",  0b01, 0b00, s8Imm>;
-def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7Imm>;
+def A4_vcmpbeqi  : T_vcmpImm <"vcmpb.eq",  0b00, 0b00, u8_0Imm>;
+def A4_vcmpbgti  : T_vcmpImm <"vcmpb.gt",  0b01, 0b00, s8_0Imm>;
+def A4_vcmpbgtui : T_vcmpImm <"vcmpb.gtu", 0b10, 0b00, u7_0Imm>;
 
 // Vector compare halfwords
-def A4_vcmpheqi  : T_vcmpImm <"vcmph.eq",  0b00, 0b01, s8Imm>;
-def A4_vcmphgti  : T_vcmpImm <"vcmph.gt",  0b01, 0b01, s8Imm>;
-def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7Imm>;
+def A4_vcmpheqi  : T_vcmpImm <"vcmph.eq",  0b00, 0b01, s8_0Imm>;
+def A4_vcmphgti  : T_vcmpImm <"vcmph.gt",  0b01, 0b01, s8_0Imm>;
+def A4_vcmphgtui : T_vcmpImm <"vcmph.gtu", 0b10, 0b01, u7_0Imm>;
 
 // Vector compare words
-def A4_vcmpweqi  : T_vcmpImm <"vcmpw.eq",  0b00, 0b10, s8Imm>;
-def A4_vcmpwgti  : T_vcmpImm <"vcmpw.gt",  0b01, 0b10, s8Imm>;
-def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>;
+def A4_vcmpweqi  : T_vcmpImm <"vcmpw.eq",  0b00, 0b10, s8_0Imm>;
+def A4_vcmpwgti  : T_vcmpImm <"vcmpw.gt",  0b01, 0b10, s8_0Imm>;
+def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7_0Imm>;
 
 //===----------------------------------------------------------------------===//
 // XTYPE/SHIFT +
@@ -2666,13 +2153,11 @@ def A4_vcmpwgtui : T_vcmpImm <"vcmpw.gtu", 0b10, 0b10, u7Imm>;
 // Rx=or(#u8,asl(Rx,#U5))   Rx=or(#u8,lsr(Rx,#U5))
 let isExtendable = 1, opExtendable = 1, isExtentSigned = 0, opExtentBits = 8,
     hasNewValue = 1, opNewValue = 0 in
-class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
-                        bit asl_lsr, bits<2> MajOp, InstrItinClass Itin>
-  : MInst_acc<(outs IntRegs:$Rd), (ins u8Ext:$u8, IntRegs:$Rx, u5Imm:$U5),
+class T_S4_ShiftOperate<string MnOp, string MnSh, bit asl_lsr,
+                        bits<2> MajOp, InstrItinClass Itin>
+  : MInst_acc<(outs IntRegs:$Rd), (ins u8_0Ext:$u8, IntRegs:$Rx, u5_0Imm:$U5),
       "$Rd = "#MnOp#"(#$u8, "#MnSh#"($Rx, #$U5))",
-      [(set (i32 IntRegs:$Rd),
-            (Op (Sh I32:$Rx, u5ImmPred:$U5), u32ImmPred:$u8))],
-      "$Rd = $Rx", Itin> {
+      [], "$Rd = $Rx", Itin> {
 
   bits<5> Rd;
   bits<8> u8;
@@ -2691,32 +2176,15 @@ class T_S4_ShiftOperate<string MnOp, string MnSh, SDNode Op, SDNode Sh,
   let Inst{2-1} = MajOp;
 }
 
-multiclass T_ShiftOperate<string mnemonic, SDNode Op, bits<2> MajOp,
-                          InstrItinClass Itin> {
-  def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", Op, shl, 0, MajOp, Itin>;
-  def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", Op, srl, 1, MajOp, Itin>;
-}
-
-let AddedComplexity = 200 in {
-  defm S4_addi : T_ShiftOperate<"add", add, 0b10, ALU64_tc_2_SLOT23>;
-  defm S4_andi : T_ShiftOperate<"and", and, 0b00, ALU64_tc_2_SLOT23>;
+multiclass T_ShiftOperate<string mnemonic, bits<2> MajOp, InstrItinClass Itin> {
+  def _asl_ri : T_S4_ShiftOperate<mnemonic, "asl", 0, MajOp, Itin>;
+  def _lsr_ri : T_S4_ShiftOperate<mnemonic, "lsr", 1, MajOp, Itin>;
 }
 
-let AddedComplexity = 30 in
-defm S4_ori  : T_ShiftOperate<"or",  or,  0b01, ALU64_tc_1_SLOT23>;
-
-defm S4_subi : T_ShiftOperate<"sub", sub, 0b11, ALU64_tc_1_SLOT23>;
-
-let AddedComplexity = 200 in {
-  def: Pat<(add addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
-           (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
-  def: Pat<(add addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
-           (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
-  def: Pat<(sub addrga:$addr, (shl I32:$src2, u5ImmPred:$src3)),
-           (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
-  def: Pat<(sub addrga:$addr, (srl I32:$src2, u5ImmPred:$src3)),
-           (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5ImmPred:$src3)>;
-}
+defm S4_addi : T_ShiftOperate<"add", 0b10, ALU64_tc_2_SLOT23>;
+defm S4_andi : T_ShiftOperate<"and", 0b00, ALU64_tc_2_SLOT23>;
+defm S4_ori  : T_ShiftOperate<"or",  0b01, ALU64_tc_1_SLOT23>;
+defm S4_subi : T_ShiftOperate<"sub", 0b11, ALU64_tc_1_SLOT23>;
 
 // Vector conditional negate
 // Rdd=vcnegh(Rss,Rt)
@@ -2806,11 +2274,8 @@ def A4_vrminuw : T_S3op_6 < "vrminuw", 0b110, 1>;
 
 // Shift an immediate left by register amount.
 let hasNewValue = 1, hasSideEffects = 0 in
-def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
-  "$Rd = lsl(#$s6, $Rt)" ,
-  [(set (i32 IntRegs:$Rd), (shl s6ImmPred:$s6,
-                                 (i32 IntRegs:$Rt)))],
-  "", S_3op_tc_1_SLOT23> {
+def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6_0Imm:$s6, IntRegs:$Rt),
+  "$Rd = lsl(#$s6, $Rt)" , [], "", S_3op_tc_1_SLOT23> {
     bits<5> Rd;
     bits<6> s6;
     bits<5> Rt;
@@ -2833,71 +2298,6 @@ def S4_lsli: SInst <(outs IntRegs:$Rd), (ins s6Imm:$s6, IntRegs:$Rt),
 // MEMOP
 //===----------------------------------------------------------------------===//
 
-def m5Imm8Pred : PatLeaf<(i32 imm), [{
-  int8_t v = (int8_t)N->getSExtValue();
-  return v > -32 && v <= -1;
-}]>;
-
-def m5Imm16Pred : PatLeaf<(i32 imm), [{
-  int16_t v = (int16_t)N->getSExtValue();
-  return v > -32 && v <= -1;
-}]>;
-
-def Clr5Imm8Pred : PatLeaf<(i32 imm), [{
-  uint32_t v = (uint8_t)~N->getZExtValue();
-  return ImmIsSingleBit(v);
-}]>;
-
-def Clr5Imm16Pred : PatLeaf<(i32 imm), [{
-  uint32_t v = (uint16_t)~N->getZExtValue();
-  return ImmIsSingleBit(v);
-}]>;
-
-def Set5Imm8 : SDNodeXForm<imm, [{
-   uint32_t imm = (uint8_t)N->getZExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Set5Imm16 : SDNodeXForm<imm, [{
-   uint32_t imm = (uint16_t)N->getZExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Set5Imm32 : SDNodeXForm<imm, [{
-   uint32_t imm = (uint32_t)N->getZExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Clr5Imm8 : SDNodeXForm<imm, [{
-   uint32_t imm = (uint8_t)~N->getZExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Clr5Imm16 : SDNodeXForm<imm, [{
-   uint32_t imm = (uint16_t)~N->getZExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def Clr5Imm32 : SDNodeXForm<imm, [{
-   int32_t imm = (int32_t)~N->getZExtValue();
-   return XformMskToBitPosU5Imm(imm, SDLoc(N));
-}]>;
-
-def NegImm8 : SDNodeXForm<imm, [{
-  int8_t V = N->getSExtValue();
-  return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
-}]>;
-
-def NegImm16 : SDNodeXForm<imm, [{
-  int16_t V = N->getSExtValue();
-  return CurDAG->getTargetConstant(-V, SDLoc(N), MVT::i32);
-}]>;
-
-def NegImm32 : SDNodeXForm<imm, [{
-  return CurDAG->getTargetConstant(-N->getSExtValue(), SDLoc(N), MVT::i32);
-}]>;
-
-def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
 
 //===----------------------------------------------------------------------===//
 // Template class for MemOp instructions with the register value.
@@ -2936,7 +2336,7 @@ class MemOp_rr_base <string opc, bits<2> opcBits, Operand ImmOp,
 class MemOp_ri_base <string opc, bits<2> opcBits, Operand ImmOp,
                      string memOp, bits<2> memOpBits> :
       MEMInst_V4 <(outs),
-                  (ins IntRegs:$base, ImmOp:$offset, u5Imm:$delta),
+                  (ins IntRegs:$base, ImmOp:$offset, u5_0Imm:$delta),
                   opc#"($base+#$offset)"#memOp#"#$delta"
                   #!if(memOpBits{1},")", ""), // clrbit, setbit - include ')'
                   []>,
@@ -2996,235 +2396,6 @@ let isExtendable = 1, opExtendable = 1, isExtentSigned = 0 in {
 }
 
 
-multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
-                              InstHexagon MI> {
-  // Addr: i32
-  def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
-           (MI I32:$Rs, 0, I32:$A)>;
-  // Addr: fi
-  def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
-           (MI AddrFI:$Rs, 0, I32:$A)>;
-}
-
-multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
-                           SDNode Oper, InstHexagon MI> {
-  // Addr: i32
-  def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
-                  (add I32:$Rs, ImmPred:$Off)),
-           (MI I32:$Rs, imm:$Off, I32:$A)>;
-  def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), I32:$A),
-                  (orisadd I32:$Rs, ImmPred:$Off)),
-           (MI I32:$Rs, imm:$Off, I32:$A)>;
-  // Addr: fi
-  def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
-                  (add AddrFI:$Rs, ImmPred:$Off)),
-           (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
-  def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
-                  (orisadd AddrFI:$Rs, ImmPred:$Off)),
-           (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
-}
-
-multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
-                       SDNode Oper, InstHexagon MI> {
-  defm: Memopxr_simple_pat <Load, Store,          Oper, MI>;
-  defm: Memopxr_add_pat    <Load, Store, ImmPred, Oper, MI>;
-}
-
-let AddedComplexity = 180 in {
-  // add reg
-  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
-        /*anyext*/  L4_add_memopb_io>;
-  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
-        /*sext*/    L4_add_memopb_io>;
-  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
-        /*zext*/    L4_add_memopb_io>;
-  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
-        /*anyext*/  L4_add_memoph_io>;
-  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
-        /*sext*/    L4_add_memoph_io>;
-  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
-        /*zext*/    L4_add_memoph_io>;
-  defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
-
-  // sub reg
-  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
-        /*anyext*/  L4_sub_memopb_io>;
-  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
-        /*sext*/    L4_sub_memopb_io>;
-  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
-        /*zext*/    L4_sub_memopb_io>;
-  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
-        /*anyext*/  L4_sub_memoph_io>;
-  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
-        /*sext*/    L4_sub_memoph_io>;
-  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
-        /*zext*/    L4_sub_memoph_io>;
-  defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
-
-  // and reg
-  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
-        /*anyext*/  L4_and_memopb_io>;
-  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
-        /*sext*/    L4_and_memopb_io>;
-  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
-        /*zext*/    L4_and_memopb_io>;
-  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
-        /*anyext*/  L4_and_memoph_io>;
-  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
-        /*sext*/    L4_and_memoph_io>;
-  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
-        /*zext*/    L4_and_memoph_io>;
-  defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
-
-  // or reg
-  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
-        /*anyext*/  L4_or_memopb_io>;
-  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
-        /*sext*/    L4_or_memopb_io>;
-  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
-        /*zext*/    L4_or_memopb_io>;
-  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
-        /*anyext*/  L4_or_memoph_io>;
-  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
-        /*sext*/    L4_or_memoph_io>;
-  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
-        /*zext*/    L4_or_memoph_io>;
-  defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
-}
-
-
-multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
-                              PatFrag Arg, SDNodeXForm ArgMod,
-                              InstHexagon MI> {
-  // Addr: i32
-  def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
-           (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
-  // Addr: fi
-  def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
-           (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
-}
-
-multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
-                           SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
-                           InstHexagon MI> {
-  // Addr: i32
-  def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
-                  (add I32:$Rs, ImmPred:$Off)),
-           (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
-  def: Pat<(Store (Oper (Load (orisadd I32:$Rs, ImmPred:$Off)), Arg:$A),
-                  (orisadd I32:$Rs, ImmPred:$Off)),
-           (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
-  // Addr: fi
-  def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
-                  (add AddrFI:$Rs, ImmPred:$Off)),
-           (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
-  def: Pat<(Store (Oper (Load (orisadd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
-                  (orisadd AddrFI:$Rs, ImmPred:$Off)),
-           (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
-}
-
-multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
-                       SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
-                       InstHexagon MI> {
-  defm: Memopxi_simple_pat <Load, Store,          Oper, Arg, ArgMod, MI>;
-  defm: Memopxi_add_pat    <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
-}
-
-
-let AddedComplexity = 200 in {
-  // add imm
-  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5ImmPred,
-        /*anyext*/  IdImm, L4_iadd_memopb_io>;
-  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5ImmPred,
-        /*sext*/    IdImm, L4_iadd_memopb_io>;
-  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5ImmPred,
-        /*zext*/    IdImm, L4_iadd_memopb_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5ImmPred,
-        /*anyext*/  IdImm, L4_iadd_memoph_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5ImmPred,
-        /*sext*/    IdImm, L4_iadd_memoph_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5ImmPred,
-        /*zext*/    IdImm, L4_iadd_memoph_io>;
-  defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5ImmPred, IdImm,
-                    L4_iadd_memopw_io>;
-  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5Imm8Pred,
-        /*anyext*/  NegImm8, L4_iadd_memopb_io>;
-  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5Imm8Pred,
-        /*sext*/    NegImm8, L4_iadd_memopb_io>;
-  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5Imm8Pred,
-        /*zext*/    NegImm8, L4_iadd_memopb_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5Imm16Pred,
-        /*anyext*/  NegImm16, L4_iadd_memoph_io>;
-  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5Imm16Pred,
-        /*sext*/    NegImm16, L4_iadd_memoph_io>;
-  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5Imm16Pred,
-        /*zext*/    NegImm16, L4_iadd_memoph_io>;
-  defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5ImmPred, NegImm32,
-                    L4_iadd_memopw_io>;
-
-  // sub imm
-  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5ImmPred,
-        /*anyext*/  IdImm, L4_isub_memopb_io>;
-  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5ImmPred,
-        /*sext*/    IdImm, L4_isub_memopb_io>;
-  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5ImmPred,
-        /*zext*/    IdImm, L4_isub_memopb_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5ImmPred,
-        /*anyext*/  IdImm, L4_isub_memoph_io>;
-  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5ImmPred,
-        /*sext*/    IdImm, L4_isub_memoph_io>;
-  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5ImmPred,
-        /*zext*/    IdImm, L4_isub_memoph_io>;
-  defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5ImmPred, IdImm,
-                    L4_isub_memopw_io>;
-  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5Imm8Pred,
-        /*anyext*/  NegImm8, L4_isub_memopb_io>;
-  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5Imm8Pred,
-        /*sext*/    NegImm8, L4_isub_memopb_io>;
-  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5Imm8Pred,
-        /*zext*/    NegImm8, L4_isub_memopb_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5Imm16Pred,
-        /*anyext*/  NegImm16, L4_isub_memoph_io>;
-  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5Imm16Pred,
-        /*sext*/    NegImm16, L4_isub_memoph_io>;
-  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5Imm16Pred,
-        /*zext*/    NegImm16, L4_isub_memoph_io>;
-  defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5ImmPred, NegImm32,
-                    L4_isub_memopw_io>;
-
-  // clrbit imm
-  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
-        /*anyext*/  Clr5Imm8, L4_iand_memopb_io>;
-  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
-        /*sext*/    Clr5Imm8, L4_iand_memopb_io>;
-  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, Clr5Imm8Pred,
-        /*zext*/    Clr5Imm8, L4_iand_memopb_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
-        /*anyext*/  Clr5Imm16, L4_iand_memoph_io>;
-  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
-        /*sext*/    Clr5Imm16, L4_iand_memoph_io>;
-  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, Clr5Imm16Pred,
-        /*zext*/    Clr5Imm16, L4_iand_memoph_io>;
-  defm: Memopxi_pat<load, store, u6_2ImmPred, and, Clr5ImmPred, Clr5Imm32,
-                    L4_iand_memopw_io>;
-
-  // setbit imm
-  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
-        /*anyext*/  Set5Imm8, L4_ior_memopb_io>;
-  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
-        /*sext*/    Set5Imm8, L4_ior_memopb_io>;
-  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, Set5ImmPred,
-        /*zext*/    Set5Imm8, L4_ior_memopb_io>;
-  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
-        /*anyext*/  Set5Imm16, L4_ior_memoph_io>;
-  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
-        /*sext*/    Set5Imm16, L4_ior_memoph_io>;
-  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, Set5ImmPred,
-        /*zext*/    Set5Imm16, L4_ior_memoph_io>;
-  defm: Memopxi_pat<load, store, u6_2ImmPred, or, Set5ImmPred, Set5Imm32,
-                    L4_ior_memopw_io>;
-}
-
 //===----------------------------------------------------------------------===//
 // XTYPE/PRED +
 //===----------------------------------------------------------------------===//
@@ -3241,57 +2412,9 @@ let AddedComplexity = 200 in {
 // Pd=cmpb.eq(Rs,#u8)
 
 // p=!cmp.eq(r1,#s10)
-def C4_cmpneqi  : T_CMP <"cmp.eq",  0b00, 1, s10Ext>;
-def C4_cmpltei  : T_CMP <"cmp.gt",  0b01, 1, s10Ext>;
-def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9Ext>;
-
-def : T_CMP_pat <C4_cmpneqi,  setne,  s32ImmPred>;
-def : T_CMP_pat <C4_cmpltei,  setle,  s32ImmPred>;
-def : T_CMP_pat <C4_cmplteui, setule, u9ImmPred>;
-
-// rs <= rt -> !(rs > rt).
-/*
-def: Pat<(i1 (setle (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C2_not (C2_cmpgti IntRegs:$src1, s32ImmPred:$src2))>;
-//         (C4_cmpltei IntRegs:$src1, s32ImmPred:$src2)>;
-*/
-// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
-def: Pat<(i1 (setlt (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C4_cmpltei IntRegs:$src1, (DEC_CONST_SIGNED s32ImmPred:$src2))>;
-
-// rs != rt -> !(rs == rt).
-def: Pat<(i1 (setne (i32 IntRegs:$src1), s32ImmPred:$src2)),
-         (C4_cmpneqi IntRegs:$src1, s32ImmPred:$src2)>;
-
-// SDNode for converting immediate C to C-1.
-def DEC_CONST_BYTE : SDNodeXForm<imm, [{
-   // Return the byte immediate const-1 as an SDNode.
-   int32_t imm = N->getSExtValue();
-   return XformU7ToU7M1Imm(imm, SDLoc(N));
-}]>;
-
-// For the sequence
-//   zext( setult ( and(Rs, 255), u8))
-// Use the isdigit transformation below
-
-// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
-// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
-// The isdigit transformation relies on two 'clever' aspects:
-// 1) The data type is unsigned which allows us to eliminate a zero test after
-//    biasing the expression by 48. We are depending on the representation of
-//    the unsigned types, and semantics.
-// 2) The front end has converted <= 9 into < 10 on entry to LLVM
-//
-// For the C code:
-//   retval = ((c>='0') & (c<='9')) ? 1 : 0;
-// The code is transformed upstream of llvm into
-//   retval = (c-48) < 10 ? 1 : 0;
-let AddedComplexity = 139 in
-def: Pat<(i32 (zext (i1 (setult (i32 (and (i32 IntRegs:$src1), 255)),
-                         u7StrictPosImmPred:$src2)))),
-         (C2_muxii (A4_cmpbgtui IntRegs:$src1,
-                    (DEC_CONST_BYTE u7StrictPosImmPred:$src2)),
-          0, 1)>;
+def C4_cmpneqi  : T_CMP <"cmp.eq",  0b00, 1, s10_0Ext>;
+def C4_cmpltei  : T_CMP <"cmp.gt",  0b01, 1, s10_0Ext>;
+def C4_cmplteui : T_CMP <"cmp.gtu", 0b10, 1, u9_0Ext>;
 
 //===----------------------------------------------------------------------===//
 // XTYPE/PRED -
@@ -3450,7 +2573,7 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
 let hasSideEffects = 0, isPredicated = 1, opExtentBits = 6, opExtendable = 1 in
 class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
                        bit isHalf, bit isNot, bit isNew>
-  : STInst<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, RC: $src2),
+  : STInst<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, RC: $src2),
   !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
   ") ")#mnemonic#"(#$absaddr) = $src2"#!if(isHalf, ".h",""),
   [], "", ST_tc_st_SLOT01>, AddrModeRel {
@@ -3482,7 +2605,7 @@ class T_StoreAbs_Pred <string mnemonic, RegisterClass RC, bits<2> MajOp,
 //===----------------------------------------------------------------------===//
 class T_StoreAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<2> MajOp, bit isHalf>
-  : T_StoreAbsGP <mnemonic, RC, u32MustExt, MajOp, 1, isHalf>,
+  : T_StoreAbsGP <mnemonic, RC, u32_0MustExt, MajOp, 1, isHalf>,
                   AddrModeRel {
   string ImmOpStr = !cast<string>(ImmOp);
   let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3504,7 +2627,7 @@ multiclass ST_Abs<string mnemonic, string CextOp, RegisterClass RC,
                   Operand ImmOp, bits<2> MajOp, bit isHalf = 0> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 0, isPredicable = 1 in
-    def S2_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
+    def PS_#NAME#abs : T_StoreAbs <mnemonic, RC, ImmOp, MajOp, isHalf>;
 
     // Predicated
     def S4_p#NAME#t_abs : T_StoreAbs_Pred<mnemonic, RC, MajOp, isHalf, 0, 0>;
@@ -3554,7 +2677,7 @@ class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp>
 let hasSideEffects = 0, isPredicated = 1, mayStore = 1, isNVStore = 1,
     isNewValue = 1, opNewValue = 2, opExtentBits = 6, opExtendable = 1 in
 class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
-  : NVInst_V4<(outs), (ins PredRegs:$src1, u32MustExt:$absaddr, IntRegs:$src2),
+  : NVInst_V4<(outs), (ins PredRegs:$src1, u32_0MustExt:$absaddr, IntRegs:$src2),
   !if(isNot, "if (!$src1", "if ($src1")#!if(isNew, ".new) ",
   ") ")#mnemonic#"(#$absaddr) = $src2.new",
   [], "", ST_tc_st_SLOT0>, AddrModeRel {
@@ -3584,7 +2707,7 @@ class T_StoreAbs_NV_Pred <string mnemonic, bits<2> MajOp, bit isNot, bit isNew>
 // absolute addressing.
 //===----------------------------------------------------------------------===//
 class T_StoreAbs_NV <string mnemonic, Operand ImmOp, bits<2> MajOp>
-  : T_StoreAbsGP_NV <mnemonic, u32MustExt, MajOp>, AddrModeRel {
+  : T_StoreAbsGP_NV <mnemonic, u32_0MustExt, MajOp>, AddrModeRel {
 
   string ImmOpStr = !cast<string>(ImmOp);
   let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3606,7 +2729,7 @@ multiclass ST_Abs_NV <string mnemonic, string CextOp, Operand ImmOp,
                    bits<2> MajOp> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 0, isPredicable = 1 in
-    def S2_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
+    def PS_#NAME#newabs : T_StoreAbs_NV <mnemonic, ImmOp, MajOp>;
 
     // Predicated
     def S4_p#NAME#newt_abs  : T_StoreAbs_NV_Pred <mnemonic, MajOp, 0, 0>;
@@ -3687,50 +2810,6 @@ let isNVStorable = 0, accessSize = HalfWordAccess in
 def S2_storerfgp : T_StoreGP <"memh", "STrif", IntRegs,
                               u16_1Imm, 0b01, 1>, PredNewRel;
 
-class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
-  : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
-
-class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
-                 InstHexagon MI>
-  : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
-
-class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
-  : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
-
-class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
-                  InstHexagon MI>
-  : Pat<(Store Value:$val, Addr:$addr),
-        (MI Addr:$addr, (ValueMod Value:$val))>;
-
-let AddedComplexity = 30 in {
-  def: Storea_pat<truncstorei8,  I32, addrga, S2_storerbabs>;
-  def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>;
-  def: Storea_pat<store,         I32, addrga, S2_storeriabs>;
-  def: Storea_pat<store,         I64, addrga, S2_storerdabs>;
-
-  def: Stoream_pat<truncstorei8,  I64, addrga, LoReg, S2_storerbabs>;
-  def: Stoream_pat<truncstorei16, I64, addrga, LoReg, S2_storerhabs>;
-  def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
-}
-
-def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbgp>;
-def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
-def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
-def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
-
-let AddedComplexity = 100 in {
-  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbgp>;
-  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
-  def: Storea_pat<store,         I32, addrgp, S2_storerigp>;
-  def: Storea_pat<store,         I64, addrgp, S2_storerdgp>;
-
-  // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
-  //       to "r0 = 1; memw(#foo) = r0"
-  let AddedComplexity = 100 in
-  def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
-           (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
-}
-
 //===----------------------------------------------------------------------===//
 // Template class for non predicated load instructions with
 // absolute addressing mode.
@@ -3764,7 +2843,7 @@ class T_LoadAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
 
 class T_LoadAbs <string mnemonic, RegisterClass RC, Operand ImmOp,
                  bits<3> MajOp>
-  : T_LoadAbsGP <mnemonic, RC, u32MustExt, MajOp>, AddrModeRel {
+  : T_LoadAbsGP <mnemonic, RC, u32_0MustExt, MajOp>, AddrModeRel {
 
     string ImmOpStr = !cast<string>(ImmOp);
     let opExtentBits = !if (!eq(ImmOpStr, "u16_3Imm"), 19,
@@ -3786,7 +2865,7 @@ let isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opExtentBits = 6,
     opExtendable = 2 in
 class T_LoadAbs_Pred <string mnemonic, RegisterClass RC, bits<3> MajOp,
                       bit isPredNot, bit isPredNew>
-  : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32MustExt:$absaddr),
+  : LDInst <(outs RC:$dst), (ins PredRegs:$src1, u32_0MustExt:$absaddr),
   !if(isPredNot, "if (!$src1", "if ($src1")#!if(isPredNew, ".new) ",
   ") ")#"$dst = "#mnemonic#"(#$absaddr)">, AddrModeRel {
     bits<5> dst;
@@ -3826,7 +2905,7 @@ multiclass LD_Abs<string mnemonic, string CextOp, RegisterClass RC,
                   Operand ImmOp, bits<3> MajOp> {
   let CextOpcode = CextOp, BaseOpcode = CextOp#_abs in {
     let opExtendable = 1, isPredicable = 1 in
-    def L4_#NAME#_abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
+    def PS_#NAME#abs: T_LoadAbs <mnemonic, RC, ImmOp, MajOp>;
 
     // Predicated
     defm L4_p#NAME#t : LD_Abs_Pred<mnemonic, RC, MajOp, 0>;
@@ -3850,26 +2929,6 @@ defm loadri  : LD_Abs<"memw",  "LDriw",  IntRegs, u16_2Imm, 0b100>;
 let accessSize = DoubleWordAccess in
 defm loadrd  : LD_Abs<"memd",  "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 
-class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
-  : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
-         (VT (MI tglobaladdr:$absaddr))>;
-
-let AddedComplexity  = 30 in {
-  def: LoadAbs_pats <load,        L4_loadri_abs>;
-  def: LoadAbs_pats <zextloadi1,  L4_loadrub_abs>;
-  def: LoadAbs_pats <sextloadi8,  L4_loadrb_abs>;
-  def: LoadAbs_pats <extloadi8,   L4_loadrub_abs>;
-  def: LoadAbs_pats <zextloadi8,  L4_loadrub_abs>;
-  def: LoadAbs_pats <sextloadi16, L4_loadrh_abs>;
-  def: LoadAbs_pats <extloadi16,  L4_loadruh_abs>;
-  def: LoadAbs_pats <zextloadi16, L4_loadruh_abs>;
-  def: LoadAbs_pats <load,        L4_loadrd_abs, i64>;
-}
-
-let AddedComplexity  = 30 in
-def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
-         (Zext64 (L4_loadrub_abs tglobaladdr:$absaddr))>;
-
 //===----------------------------------------------------------------------===//
 // multiclass for load instructions with GP-relative addressing mode.
 // Rx=mem[bhwd](##global)
@@ -3900,149 +2959,6 @@ def L2_loadrigp  : T_LoadGP<"memw",  "LDriw",  IntRegs, u16_2Imm, 0b100>;
 let accessSize = DoubleWordAccess in
 def L2_loadrdgp  : T_LoadGP<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
 
-def: Loada_pat<atomic_load_8,  i32, addrgp, L2_loadrubgp>;
-def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
-def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
-def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
-
-// Map from Pd = load(globaladdress) -> Rd = memb(globaladdress), Pd = Rd
-def: Loadam_pat<load, i1, addrga, I32toI1, L4_loadrub_abs>;
-def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
-
-def: Stoream_pat<store, I1, addrga, I1toI32, S2_storerbabs>;
-def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
-
-// Map from load(globaladdress) -> mem[u][bhwd](#foo)
-class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
-  : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
-         (VT (MI tglobaladdr:$global))>;
-
-let AddedComplexity = 100 in {
-  def: LoadGP_pats <extloadi8,   L2_loadrubgp>;
-  def: LoadGP_pats <sextloadi8,  L2_loadrbgp>;
-  def: LoadGP_pats <zextloadi8,  L2_loadrubgp>;
-  def: LoadGP_pats <extloadi16,  L2_loadruhgp>;
-  def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
-  def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
-  def: LoadGP_pats <load,        L2_loadrigp>;
-  def: LoadGP_pats <load,        L2_loadrdgp, i64>;
-}
-
-// When the Interprocedural Global Variable optimizer realizes that a certain
-// global variable takes only two constant values, it shrinks the global to
-// a boolean. Catch those loads here in the following 3 patterns.
-let AddedComplexity = 100 in {
-  def: LoadGP_pats <extloadi1, L2_loadrubgp>;
-  def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
-}
-
-// Transfer global address into a register
-def: Pat<(HexagonCONST32 tglobaladdr:$Rs),      (A2_tfrsi s16Ext:$Rs)>;
-def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi s16Ext:$Rs)>;
-def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs),   (A2_tfrsi s16Ext:$Rs)>;
-
-let AddedComplexity  = 30 in {
-  def: Storea_pat<truncstorei8,  I32, u32ImmPred, S2_storerbabs>;
-  def: Storea_pat<truncstorei16, I32, u32ImmPred, S2_storerhabs>;
-  def: Storea_pat<store,         I32, u32ImmPred, S2_storeriabs>;
-}
-
-let AddedComplexity  = 30 in {
-  def: Loada_pat<load,        i32, u32ImmPred, L4_loadri_abs>;
-  def: Loada_pat<sextloadi8,  i32, u32ImmPred, L4_loadrb_abs>;
-  def: Loada_pat<zextloadi8,  i32, u32ImmPred, L4_loadrub_abs>;
-  def: Loada_pat<sextloadi16, i32, u32ImmPred, L4_loadrh_abs>;
-  def: Loada_pat<zextloadi16, i32, u32ImmPred, L4_loadruh_abs>;
-}
-
-// Indexed store word - global address.
-// memw(Rs+#u6:2)=#S8
-let AddedComplexity = 100 in
-defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
-
-// Load from a global address that has only one use in the current basic block.
-let AddedComplexity = 100 in {
-  def: Loada_pat<extloadi8,   i32, addrga, L4_loadrub_abs>;
-  def: Loada_pat<sextloadi8,  i32, addrga, L4_loadrb_abs>;
-  def: Loada_pat<zextloadi8,  i32, addrga, L4_loadrub_abs>;
-
-  def: Loada_pat<extloadi16,  i32, addrga, L4_loadruh_abs>;
-  def: Loada_pat<sextloadi16, i32, addrga, L4_loadrh_abs>;
-  def: Loada_pat<zextloadi16, i32, addrga, L4_loadruh_abs>;
-
-  def: Loada_pat<load,        i32, addrga, L4_loadri_abs>;
-  def: Loada_pat<load,        i64, addrga, L4_loadrd_abs>;
-}
-
-// Store to a global address that has only one use in the current basic block.
-let AddedComplexity = 100 in {
-  def: Storea_pat<truncstorei8,  I32, addrga, S2_storerbabs>;
-  def: Storea_pat<truncstorei16, I32, addrga, S2_storerhabs>;
-  def: Storea_pat<store,         I32, addrga, S2_storeriabs>;
-  def: Storea_pat<store,         I64, addrga, S2_storerdabs>;
-
-  def: Stoream_pat<truncstorei32, I64, addrga, LoReg, S2_storeriabs>;
-}
-
-// i8/i16/i32 -> i64 loads
-// We need a complexity of 120 here to override preceding handling of
-// zextload.
-let AddedComplexity = 120 in {
-  def: Loadam_pat<extloadi8,   i64, addrga, Zext64, L4_loadrub_abs>;
-  def: Loadam_pat<sextloadi8,  i64, addrga, Sext64, L4_loadrb_abs>;
-  def: Loadam_pat<zextloadi8,  i64, addrga, Zext64, L4_loadrub_abs>;
-
-  def: Loadam_pat<extloadi16,  i64, addrga, Zext64, L4_loadruh_abs>;
-  def: Loadam_pat<sextloadi16, i64, addrga, Sext64, L4_loadrh_abs>;
-  def: Loadam_pat<zextloadi16, i64, addrga, Zext64, L4_loadruh_abs>;
-
-  def: Loadam_pat<extloadi32,  i64, addrga, Zext64, L4_loadri_abs>;
-  def: Loadam_pat<sextloadi32, i64, addrga, Sext64, L4_loadri_abs>;
-  def: Loadam_pat<zextloadi32, i64, addrga, Zext64, L4_loadri_abs>;
-}
-
-let AddedComplexity = 100 in {
-  def: Loada_pat<extloadi8,   i32, addrgp, L4_loadrub_abs>;
-  def: Loada_pat<sextloadi8,  i32, addrgp, L4_loadrb_abs>;
-  def: Loada_pat<zextloadi8,  i32, addrgp, L4_loadrub_abs>;
-
-  def: Loada_pat<extloadi16,  i32, addrgp, L4_loadruh_abs>;
-  def: Loada_pat<sextloadi16, i32, addrgp, L4_loadrh_abs>;
-  def: Loada_pat<zextloadi16, i32, addrgp, L4_loadruh_abs>;
-
-  def: Loada_pat<load,        i32, addrgp, L4_loadri_abs>;
-  def: Loada_pat<load,        i64, addrgp, L4_loadrd_abs>;
-}
-
-let AddedComplexity = 100 in {
-  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbabs>;
-  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhabs>;
-  def: Storea_pat<store,         I32, addrgp, S2_storeriabs>;
-  def: Storea_pat<store,         I64, addrgp, S2_storerdabs>;
-}
-
-def: Loada_pat<atomic_load_8,  i32, addrgp, L4_loadrub_abs>;
-def: Loada_pat<atomic_load_16, i32, addrgp, L4_loadruh_abs>;
-def: Loada_pat<atomic_load_32, i32, addrgp, L4_loadri_abs>;
-def: Loada_pat<atomic_load_64, i64, addrgp, L4_loadrd_abs>;
-
-def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbabs>;
-def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhabs>;
-def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storeriabs>;
-def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdabs>;
-
-let Constraints = "@earlyclobber $dst" in
-def Insert4 : PseudoM<(outs DoubleRegs:$dst), (ins IntRegs:$a, IntRegs:$b,
-                                                   IntRegs:$c, IntRegs:$d),
-  ".error \"Should never try to emit Insert4\"",
-  [(set (i64 DoubleRegs:$dst),
-        (or (or (or (shl (i64 (zext (i32 (and (i32 IntRegs:$b), (i32 65535))))),
-                         (i32 16)),
-                    (i64 (zext (i32 (and (i32 IntRegs:$a), (i32 65535)))))),
-                (shl (i64 (anyext (i32 (and (i32 IntRegs:$c), (i32 65535))))),
-                     (i32 32))),
-            (shl (i64 (anyext (i32 IntRegs:$d))), (i32 48))))]>;
-
 //===----------------------------------------------------------------------===//
 // :raw for of boundscheck:hi:lo insns
 //===----------------------------------------------------------------------===//
@@ -4111,20 +3027,12 @@ def A4_tlbmatch : ALU64Inst<(outs PredRegs:$Pd),
     let Inst{1-0} = Pd;
   }
 
-// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
-// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
-// We don't really want either one here.
-def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
-def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
-                            [SDNPHasChain]>;
-
 // Use LD0Inst for dcfetch, but set "mayLoad" to 0 because this doesn't
 // really do a load.
 let hasSideEffects = 1, mayLoad = 0 in
 def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
       "dcfetch($Rs + #$u11_3)",
-      [(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3)],
-      "", LD_tc_ld_SLOT0> {
+      [], "", LD_tc_ld_SLOT0> {
   bits<5> Rs;
   bits<14> u11_3;
 
@@ -4136,9 +3044,6 @@ def Y2_dcfetchbo : LD0Inst<(outs), (ins IntRegs:$Rs, u11_3Imm:$u11_3),
 }
 
 
-def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
-         (Y2_dcfetchbo IntRegs:$Rs, u11_3ImmPred:$u11_3)>;
-
 //===----------------------------------------------------------------------===//
 // Compound instructions
 //===----------------------------------------------------------------------===//
@@ -4248,7 +3153,7 @@ let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
     isPredicated = 1, isPredicatedNew = 1, isExtendable = 1, opExtentBits = 11,
     opExtentAlign = 2, opExtendable = 2, isTerminator = 1 in
 class CJInst_RU5<string px, string op, bit np, string tnt>
-  : InstHexagon<(outs), (ins IntRegs:$Rs, u5Imm:$U5, brtarget:$r9_2),
+  : InstHexagon<(outs), (ins IntRegs:$Rs, u5_0Imm:$U5, brtarget:$r9_2),
   ""#px#" = cmp."#op#"($Rs, #$U5); if ("
     #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
   [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
@@ -4300,11 +3205,11 @@ defm gtu : T_pnp_CJInst_RU5<"gtu">;
 
 let isBranch = 1, hasSideEffects = 0, isExtentSigned = 1,
     isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1,
-    isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 1,
+    isExtendable = 1, opExtentBits = 11, opExtentAlign = 2, opExtendable = 2,
     isTerminator = 1 in
 class CJInst_Rn1<string px, string op, bit np, string tnt>
-  : InstHexagon<(outs), (ins IntRegs:$Rs, brtarget:$r9_2),
-  ""#px#" = cmp."#op#"($Rs,#-1); if ("
+  : InstHexagon<(outs), (ins IntRegs:$Rs, n1Const:$n1, brtarget:$r9_2),
+  ""#px#" = cmp."#op#"($Rs,#$n1); if ("
   #!if(np, "!","")#""#px#".new) jump:"#tnt#" $r9_2",
   [], "", COMPOUND_CJ_ARCHDEPSLOT, TypeCOMPOUND>, OpcodeHexagon {
   bits<4> Rs;
@@ -4357,7 +3262,7 @@ let Defs = [PC], isBranch = 1, hasSideEffects = 0, hasNewValue = 1,
     opExtentAlign = 2, opExtendable = 2 in
 def J4_jumpseti: CJInst_JMPSET <
   (outs IntRegs:$Rd),
-  (ins u6Imm:$U6, brtarget:$r9_2),
+  (ins u6_0Imm:$U6, brtarget:$r9_2),
   "$Rd = #$U6 ; jump $r9_2"> {
     bits<4> Rd;
     bits<6> U6;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
index 823961fb6e6f..cd19b6916f21 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td
@@ -43,10 +43,7 @@ let Predicates = [HasV5T] in {
     def A5_vaddhubs: T_S3op_1 <"vaddhub", IntRegs, 0b01, 0b001, 0, 1>;
 }
 
-def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
-      [(set I64:$dst,
-            (sra (i64 (add (i64 (sra I64:$src1, u6ImmPred:$src2)), 1)),
-                 (i32 1)))], 1>,
+def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6_0Imm, [], 1>,
       Requires<[HasV5T]> {
   bits<6> src2;
   let Inst{13-8} = src2;
@@ -54,7 +51,7 @@ def S2_asr_i_p_rnd : S_2OpInstImm<"asr", 0b110, 0b111, u6Imm,
 
 let isAsmParserOnly = 1 in
 def S2_asr_i_p_rnd_goodsyntax
-  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6Imm:$src2),
+  : MInst<(outs DoubleRegs:$dst), (ins DoubleRegs:$src1, u6_0Imm:$src2),
     "$dst = asrrnd($src1, #$src2)">;
 
 def C4_fastcorner9 : T_LOGICAL_2OP<"fastcorner9", 0b000, 0, 0>,
@@ -67,66 +64,9 @@ def C4_fastcorner9_not : T_LOGICAL_2OP<"!fastcorner9", 0b000, 0, 0>,
   let Inst{20,13,7,4} = 0b1111;
 }
 
-def SDTHexagonFCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, f32>,
-                                              SDTCisPtrTy<1>]>;
-def HexagonFCONST32 : SDNode<"HexagonISD::FCONST32", SDTHexagonFCONST32>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def FCONST32_nsdata : LDInst<(outs IntRegs:$dst), (ins globaladdress:$global),
-                             "$dst = CONST32(#$global)",
-                             [(set F32:$dst,
-                              (HexagonFCONST32 tglobaladdr:$global))]>,
-                             Requires<[HasV5T]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST64_Float_Real : LDInst<(outs DoubleRegs:$dst), (ins f64imm:$src1),
-                                "$dst = CONST64(#$src1)",
-                                [(set F64:$dst, fpimm:$src1)]>,
-                                Requires<[HasV5T]>;
-
-let isReMaterializable = 1, isMoveImm = 1, isAsmParserOnly = 1 in
-def CONST32_Float_Real : LDInst<(outs IntRegs:$dst), (ins f32imm:$src1),
-                                "$dst = CONST32(#$src1)",
-                                [(set F32:$dst, fpimm:$src1)]>,
-                                Requires<[HasV5T]>;
-
-// Transfer immediate float.
-// Only works with single precision fp value.
-// For double precision, use CONST64_float_real, as 64bit transfer
-// can only hold 40-bit values - 32 from const ext + 8 bit immediate.
-// Make sure that complexity is more than the CONST32 pattern in
-// HexagonInstrInfo.td patterns.
-let isExtended = 1, opExtendable = 1, isMoveImm = 1, isReMaterializable = 1,
-    isPredicable = 1, AddedComplexity = 30, validSubTargets = HasV5SubT,
-    isCodeGenOnly = 1, isPseudo = 1 in
-def TFRI_f : ALU32_ri<(outs IntRegs:$dst), (ins f32Ext:$src1),
-                      "$dst = #$src1",
-                      [(set F32:$dst, fpimm:$src1)]>,
-                      Requires<[HasV5T]>;
-
-let isExtended = 1, opExtendable = 2, isPredicated = 1, hasSideEffects = 0,
-    validSubTargets = HasV5SubT, isCodeGenOnly = 1, isPseudo = 1 in
-def TFRI_cPt_f : ALU32_ri<(outs IntRegs:$dst),
-                          (ins PredRegs:$src1, f32Ext:$src2),
-                          "if ($src1) $dst = #$src2", []>,
-                          Requires<[HasV5T]>;
-
-let isExtended = 1, opExtendable = 2, isPredicated = 1, isPredicatedFalse = 1,
-    hasSideEffects = 0, validSubTargets = HasV5SubT, isPseudo = 1 in
-def TFRI_cNotPt_f : ALU32_ri<(outs IntRegs:$dst),
-                             (ins PredRegs:$src1, f32Ext:$src2),
-                             "if (!$src1) $dst = #$src2", []>,
-                             Requires<[HasV5T]>;
-
-def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
-                                           SDTCisVT<1, i64>]>;
-
-def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
-
 let hasNewValue = 1, validSubTargets = HasV5SubT in
 def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
-  "$Rd = popcount($Rss)",
-  [(set I32:$Rd, (HexagonPOPCOUNT I64:$Rss))], "", S_2op_tc_2_SLOT23>,
+  "$Rd = popcount($Rss)", [], "", S_2op_tc_2_SLOT23>,
   Requires<[HasV5T]> {
     bits<5> Rd;
     bits<5> Rss;
@@ -139,14 +79,6 @@ def S5_popcountp : ALU64_rr<(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
     let Inst{20-16} = Rss;
   }
 
-defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
-defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
-
-defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
-defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
-def: Storex_simple_pat<store, F32, S2_storeri_io>;
-def: Storex_simple_pat<store, F64, S2_storerd_io>;
-
 let isFP = 1, hasNewValue = 1, opNewValue = 0 in
 class T_MInstFloat <string mnemonic, bits<3> MajOp, bits<3> MinOp>
   : MInst<(outs IntRegs:$Rd),
@@ -176,44 +108,19 @@ let isCommutable = 1 in {
 
 def F2_sfsub : T_MInstFloat < "sfsub", 0b000, 0b001>;
 
-def: Pat<(f32 (fadd F32:$src1, F32:$src2)),
-         (F2_sfadd F32:$src1, F32:$src2)>;
-
-def: Pat<(f32 (fsub F32:$src1, F32:$src2)),
-         (F2_sfsub F32:$src1, F32:$src2)>;
-
-def: Pat<(f32 (fmul F32:$src1, F32:$src2)),
-         (F2_sfmpy F32:$src1, F32:$src2)>;
-
 let Itinerary = M_tc_3x_SLOT23 in {
   def F2_sfmax : T_MInstFloat < "sfmax", 0b100, 0b000>;
   def F2_sfmin : T_MInstFloat < "sfmin", 0b100, 0b001>;
 }
 
-let AddedComplexity = 100, Predicates = [HasV5T] in {
-  def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
-                        F32:$src1, F32:$src2)),
-           (F2_sfmin F32:$src1, F32:$src2)>;
-
-  def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
-                        F32:$src2, F32:$src1)),
-           (F2_sfmin F32:$src1, F32:$src2)>;
-
-  def: Pat<(f32 (select (i1 (setogt F32:$src1, F32:$src2)),
-                        F32:$src1, F32:$src2)),
-           (F2_sfmax F32:$src1, F32:$src2)>;
-
-  def: Pat<(f32 (select (i1 (setolt F32:$src1, F32:$src2)),
-                        F32:$src2, F32:$src1)),
-           (F2_sfmax F32:$src1, F32:$src2)>;
-}
-
+let Itinerary = M_tc_3or4x_SLOT23 in {
 def F2_sffixupn : T_MInstFloat < "sffixupn", 0b110, 0b000>;
 def F2_sffixupd : T_MInstFloat < "sffixupd", 0b110, 0b001>;
+}
 
 // F2_sfrecipa: Reciprocal approximation for division.
-let isPredicateLate = 1, isFP = 1,
-hasSideEffects = 0, hasNewValue = 1 in
+let Uses = [USR], isPredicateLate = 1, isFP = 1,
+    hasSideEffects = 0, hasNewValue = 1, Itinerary = M_tc_3or4x_SLOT23 in
 def F2_sfrecipa: MInst <
   (outs IntRegs:$Rd, PredRegs:$Pe),
   (ins IntRegs:$Rs, IntRegs:$Rt),
@@ -235,7 +142,7 @@ def F2_sfrecipa: MInst <
   }
 
 // F2_dfcmpeq: Floating point compare for equal.
-let isCompare = 1, isFP = 1 in
+let Uses = [USR], isCompare = 1, isFP = 1 in
 class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
               list<dag> pattern = [] >
   : ALU64Inst <(outs PredRegs:$dst), (ins RC:$src1, RC:$src2),
@@ -256,15 +163,13 @@ class T_fcmp <string mnemonic, RegisterClass RC, bits<3> MinOp,
   }
 
 class T_fcmp64 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
-  : T_fcmp <mnemonic, DoubleRegs, MinOp,
-  [(set  I1:$dst, (OpNode F64:$src1, F64:$src2))]> {
+  : T_fcmp <mnemonic, DoubleRegs, MinOp, []> {
   let IClass = 0b1101;
   let Inst{27-21} = 0b0010111;
 }
 
 class T_fcmp32 <string mnemonic, PatFrag OpNode, bits<3> MinOp>
-  : T_fcmp <mnemonic, IntRegs, MinOp,
-  [(set  I1:$dst, (OpNode F32:$src1, F32:$src2))]> {
+  : T_fcmp <mnemonic, IntRegs, MinOp, []> {
   let IClass = 0b1100;
   let Inst{27-21} = 0b0111111;
 }
@@ -279,259 +184,12 @@ def F2_sfcmpuo : T_fcmp32<"sfcmp.uo", setuo,  0b001>;
 def F2_sfcmpeq : T_fcmp32<"sfcmp.eq", setoeq, 0b011>;
 def F2_sfcmpgt : T_fcmp32<"sfcmp.gt", setogt, 0b100>;
 
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for ordered gt, ge, eq operations.
-//===----------------------------------------------------------------------===//
-
-let Predicates = [HasV5T] in
-multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
-  // IntRegs
-  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
-           (IntMI F32:$src1, F32:$src2)>;
-  // DoubleRegs
-  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
-           (DoubleMI F64:$src1, F64:$src2)>;
-}
-
-defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
-defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
-defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
-
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
-//===----------------------------------------------------------------------===//
-let Predicates = [HasV5T] in
-multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
-  // IntRegs
-  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
-           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
-                  (IntMI F32:$src1, F32:$src2))>;
-
-  // DoubleRegs
-  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
-           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
-                  (DoubleMI F64:$src1, F64:$src2))>;
-}
-
-defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
-defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
-defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
-
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for the following dags:
-// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
-// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
-// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
-// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
-//===----------------------------------------------------------------------===//
-let Predicates = [HasV5T] in
-multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
-                         InstHexagon DoubleMI> {
-  // IntRegs
-  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
-           (C2_not (IntMI F32:$src1, F32:$src2))>;
-  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
-           (IntMI F32:$src1, F32:$src2)>;
-  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
-           (IntMI F32:$src1, F32:$src2)>;
-  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
-           (C2_not (IntMI F32:$src1, F32:$src2))>;
-
-  // DoubleRegs
-  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
-            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
-  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
-            (DoubleMI F64:$src1, F64:$src2)>;
-  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
-            (DoubleMI F64:$src1, F64:$src2)>;
-  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
-            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
-}
-
-defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
-defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
-defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
-
-//===----------------------------------------------------------------------===//
-// Multiclass to define 'Def Pats' for the following dags:
-// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
-// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
-// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
-// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
-//===----------------------------------------------------------------------===//
-let Predicates = [HasV5T] in
-multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
-                         InstHexagon DoubleMI> {
-  // IntRegs
-  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
-           (C2_not (IntMI F32:$src2, F32:$src1))>;
-  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
-           (IntMI F32:$src2, F32:$src1)>;
-  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
-           (IntMI F32:$src2, F32:$src1)>;
-  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
-           (C2_not (IntMI F32:$src2, F32:$src1))>;
-
-  // DoubleRegs
-  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
-           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
-  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
-           (DoubleMI F64:$src2, F64:$src1)>;
-  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
-           (DoubleMI F64:$src2, F64:$src1)>;
-  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
-           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
-}
-
-defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
-defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
-
-
-// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
-let Predicates = [HasV5T] in {
-  def: Pat<(i1 (seto F32:$src1, F32:$src2)),
-           (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
-  def: Pat<(i1 (seto F32:$src1, fpimm:$src2)),
-           (C2_not (F2_sfcmpuo (TFRI_f fpimm:$src2), F32:$src1))>;
-  def: Pat<(i1 (seto F64:$src1, F64:$src2)),
-           (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
-  def: Pat<(i1 (seto F64:$src1, fpimm:$src2)),
-           (C2_not (F2_dfcmpuo (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
-}
-
-// Ordered lt.
-let Predicates = [HasV5T] in {
-  def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
-           (F2_sfcmpgt F32:$src2, F32:$src1)>;
-  def: Pat<(i1 (setolt F32:$src1, fpimm:$src2)),
-           (F2_sfcmpgt (f32 (TFRI_f fpimm:$src2)), F32:$src1)>;
-  def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
-           (F2_dfcmpgt F64:$src2, F64:$src1)>;
-  def: Pat<(i1 (setolt F64:$src1, fpimm:$src2)),
-           (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
-}
-
-// Unordered lt.
-let Predicates = [HasV5T] in {
-  def: Pat<(i1 (setult F32:$src1, F32:$src2)),
-           (C2_or (F2_sfcmpuo  F32:$src1, F32:$src2),
-                  (F2_sfcmpgt F32:$src2, F32:$src1))>;
-  def: Pat<(i1 (setult F32:$src1, fpimm:$src2)),
-           (C2_or (F2_sfcmpuo  F32:$src1, (TFRI_f fpimm:$src2)),
-                  (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1))>;
-  def: Pat<(i1 (setult F64:$src1, F64:$src2)),
-           (C2_or (F2_dfcmpuo  F64:$src1, F64:$src2),
-                  (F2_dfcmpgt F64:$src2, F64:$src1))>;
-  def: Pat<(i1 (setult F64:$src1, fpimm:$src2)),
-           (C2_or (F2_dfcmpuo  F64:$src1, (CONST64_Float_Real fpimm:$src2)),
-                  (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
-}
-
-// Ordered le.
-let Predicates = [HasV5T] in {
-  // rs <= rt -> rt >= rs.
-  def: Pat<(i1 (setole F32:$src1, F32:$src2)),
-           (F2_sfcmpge F32:$src2, F32:$src1)>;
-  def: Pat<(i1 (setole F32:$src1, fpimm:$src2)),
-           (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
-
-  // Rss <= Rtt -> Rtt >= Rss.
-  def: Pat<(i1 (setole F64:$src1, F64:$src2)),
-           (F2_dfcmpge F64:$src2, F64:$src1)>;
-  def: Pat<(i1 (setole F64:$src1, fpimm:$src2)),
-           (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
-}
-
-// Unordered le.
-let Predicates = [HasV5T] in {
-// rs <= rt -> rt >= rs.
-  def: Pat<(i1 (setule F32:$src1, F32:$src2)),
-           (C2_or (F2_sfcmpuo  F32:$src1, F32:$src2),
-                  (F2_sfcmpge F32:$src2, F32:$src1))>;
-  def: Pat<(i1 (setule F32:$src1, fpimm:$src2)),
-           (C2_or (F2_sfcmpuo  F32:$src1, (TFRI_f fpimm:$src2)),
-                  (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1))>;
-  def: Pat<(i1 (setule F64:$src1, F64:$src2)),
-           (C2_or (F2_dfcmpuo  F64:$src1, F64:$src2),
-                  (F2_dfcmpge F64:$src2, F64:$src1))>;
-  def: Pat<(i1 (setule F64:$src1, fpimm:$src2)),
-           (C2_or (F2_dfcmpuo  F64:$src1, (CONST64_Float_Real fpimm:$src2)),
-                  (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1))>;
-}
-
-// Ordered ne.
-let Predicates = [HasV5T] in {
-  def: Pat<(i1 (setone F32:$src1, F32:$src2)),
-           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
-  def: Pat<(i1 (setone F64:$src1, F64:$src2)),
-           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
-  def: Pat<(i1 (setone F32:$src1, fpimm:$src2)),
-           (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
-  def: Pat<(i1 (setone F64:$src1, fpimm:$src2)),
-           (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
-}
-
-// Unordered ne.
-let Predicates = [HasV5T] in {
-  def: Pat<(i1 (setune F32:$src1, F32:$src2)),
-           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
-                  (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
-  def: Pat<(i1 (setune F64:$src1, F64:$src2)),
-           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
-                  (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
-  def: Pat<(i1 (setune F32:$src1, fpimm:$src2)),
-           (C2_or (F2_sfcmpuo F32:$src1, (TFRI_f fpimm:$src2)),
-                  (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2))))>;
-  def: Pat<(i1 (setune F64:$src1, fpimm:$src2)),
-           (C2_or (F2_dfcmpuo F64:$src1, (CONST64_Float_Real fpimm:$src2)),
-                  (C2_not (F2_dfcmpeq F64:$src1,
-                                        (CONST64_Float_Real fpimm:$src2))))>;
-}
-
-// Besides set[o|u][comparions], we also need set[comparisons].
-let Predicates = [HasV5T] in {
-  // lt.
-  def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
-           (F2_sfcmpgt F32:$src2, F32:$src1)>;
-  def: Pat<(i1 (setlt F32:$src1, fpimm:$src2)),
-           (F2_sfcmpgt (TFRI_f fpimm:$src2), F32:$src1)>;
-  def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
-           (F2_dfcmpgt F64:$src2, F64:$src1)>;
-  def: Pat<(i1 (setlt F64:$src1, fpimm:$src2)),
-           (F2_dfcmpgt (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
-
-  // le.
-  // rs <= rt -> rt >= rs.
-  def: Pat<(i1 (setle F32:$src1, F32:$src2)),
-           (F2_sfcmpge F32:$src2, F32:$src1)>;
-  def: Pat<(i1 (setle F32:$src1, fpimm:$src2)),
-           (F2_sfcmpge (TFRI_f fpimm:$src2), F32:$src1)>;
-
-  // Rss <= Rtt -> Rtt >= Rss.
-  def: Pat<(i1 (setle F64:$src1, F64:$src2)),
-           (F2_dfcmpge F64:$src2, F64:$src1)>;
-  def: Pat<(i1 (setle F64:$src1, fpimm:$src2)),
-           (F2_dfcmpge (CONST64_Float_Real fpimm:$src2), F64:$src1)>;
-
-  // ne.
-  def: Pat<(i1 (setne F32:$src1, F32:$src2)),
-           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
-  def: Pat<(i1 (setne F64:$src1, F64:$src2)),
-           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
-  def: Pat<(i1 (setne F32:$src1, fpimm:$src2)),
-           (C2_not (F2_sfcmpeq F32:$src1, (TFRI_f fpimm:$src2)))>;
-  def: Pat<(i1 (setne F64:$src1, fpimm:$src2)),
-           (C2_not (F2_dfcmpeq F64:$src1, (CONST64_Float_Real fpimm:$src2)))>;
-}
-
 // F2 convert template classes:
-let isFP = 1 in
+let Uses = [USR], isFP = 1 in
 class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
-                         SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
                          string chop ="">
   : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss),
-   "$Rdd = "#mnemonic#"($Rss)"#chop,
-   [(set RCOut:$Rdd, (Op RCIn:$Rss))], "",
+   "$Rdd = "#mnemonic#"($Rss)"#chop, [], "",
    S_2op_tc_3or4x_SLOT23> {
      bits<5> Rdd;
      bits<5> Rss;
@@ -544,13 +202,11 @@ class F2_RDD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
      let Inst{4-0} = Rdd;
   }
 
-let isFP = 1 in
+let Uses = [USR], isFP = 1 in
 class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
-                        SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
                         string chop ="">
   : SInst <(outs DoubleRegs:$Rdd), (ins IntRegs:$Rs),
-   "$Rdd = "#mnemonic#"($Rs)"#chop,
-   [(set RCOut:$Rdd, (Op RCIn:$Rs))], "",
+   "$Rdd = "#mnemonic#"($Rs)"#chop, [], "",
    S_2op_tc_3or4x_SLOT23> {
      bits<5> Rdd;
      bits<5> Rs;
@@ -563,13 +219,11 @@ class F2_RDD_RS_CONVERT<string mnemonic, bits<3> MinOp,
      let Inst{4-0} = Rdd;
   }
 
-let isFP = 1, hasNewValue = 1 in
+let Uses = [USR], isFP = 1, hasNewValue = 1 in
 class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
-                        SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
                         string chop ="">
   : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss),
-   "$Rd = "#mnemonic#"($Rss)"#chop,
-   [(set RCOut:$Rd, (Op RCIn:$Rss))], "",
+   "$Rd = "#mnemonic#"($Rss)"#chop, [], "",
    S_2op_tc_3or4x_SLOT23> {
      bits<5> Rd;
      bits<5> Rss;
@@ -583,13 +237,11 @@ class F2_RD_RSS_CONVERT<string mnemonic, bits<3> MinOp,
      let Inst{4-0} = Rd;
   }
 
-let isFP = 1, hasNewValue = 1 in
+let Uses = [USR], isFP = 1, hasNewValue = 1 in
 class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
-                        SDNode Op, PatLeaf RCOut, PatLeaf RCIn,
                         string chop ="">
   : SInst <(outs IntRegs:$Rd), (ins IntRegs:$Rs),
-   "$Rd = "#mnemonic#"($Rs)"#chop,
-   [(set RCOut:$Rd, (Op RCIn:$Rs))], "",
+   "$Rd = "#mnemonic#"($Rs)"#chop, [], "",
    S_2op_tc_3or4x_SLOT23> {
      bits<5> Rd;
      bits<5> Rs;
@@ -604,70 +256,45 @@ class F2_RD_RS_CONVERT<string mnemonic, bits<3> MajOp, bits<3> MinOp,
   }
 
 // Convert single precision to double precision and vice-versa.
-def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000,
-                                       fextend, F64, F32>;
-
-def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000,
-                                       fround, F32, F64>;
+def F2_conv_sf2df : F2_RDD_RS_CONVERT <"convert_sf2df", 0b000>;
+def F2_conv_df2sf : F2_RD_RSS_CONVERT <"convert_df2sf", 0b000>;
 
 // Convert Integer to Floating Point.
-def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010,
-                                       sint_to_fp, F32, I64>;
-def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001,
-                                       uint_to_fp, F32, I64>;
-def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000,
-                                       uint_to_fp, F32, I32>;
-def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000,
-                                       sint_to_fp, F32, I32>;
-def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011,
-                                       sint_to_fp, F64, I64>;
-def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010,
-                                        uint_to_fp, F64, I64>;
-def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001,
-                                       uint_to_fp, F64, I32>;
-def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010,
-                                       sint_to_fp, F64, I32>;
-
-// Convert Floating Point to Integer - default.
-def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101,
-                                            fp_to_uint, I32, F64, ":chop">;
-def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111,
-                                            fp_to_sint, I32, F64, ":chop">;
+def F2_conv_d2sf : F2_RD_RSS_CONVERT <"convert_d2sf", 0b010>;
+def F2_conv_ud2sf : F2_RD_RSS_CONVERT <"convert_ud2sf", 0b001>;
+def F2_conv_uw2sf : F2_RD_RS_CONVERT <"convert_uw2sf", 0b001, 0b000>;
+def F2_conv_w2sf : F2_RD_RS_CONVERT <"convert_w2sf", 0b010, 0b000>;
+def F2_conv_d2df : F2_RDD_RSS_CONVERT <"convert_d2df", 0b011>;
+def F2_conv_ud2df : F2_RDD_RSS_CONVERT <"convert_ud2df", 0b010>;
+def F2_conv_uw2df : F2_RDD_RS_CONVERT <"convert_uw2df", 0b001>;
+def F2_conv_w2df : F2_RDD_RS_CONVERT <"convert_w2df", 0b010>;
+
+// Convert Floating Point to Integer.
+def F2_conv_df2uw_chop : F2_RD_RSS_CONVERT <"convert_df2uw", 0b101, ":chop">;
+def F2_conv_df2w_chop : F2_RD_RSS_CONVERT <"convert_df2w", 0b111, ":chop">;
 def F2_conv_sf2uw_chop : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b001,
-                                       fp_to_uint, I32, F32, ":chop">;
+                                           ":chop">;
 def F2_conv_sf2w_chop : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b001,
-                                       fp_to_sint, I32, F32, ":chop">;
-def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110,
-                                            fp_to_sint, I64, F64, ":chop">;
-def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111,
-                                             fp_to_uint, I64, F64, ":chop">;
-def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110,
-                                       fp_to_sint, I64, F32, ":chop">;
-def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101,
-                                            fp_to_uint, I64, F32, ":chop">;
+                                          ":chop">;
+def F2_conv_df2d_chop : F2_RDD_RSS_CONVERT <"convert_df2d", 0b110, ":chop">;
+def F2_conv_df2ud_chop : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b111, ":chop">;
+def F2_conv_sf2d_chop : F2_RDD_RS_CONVERT <"convert_sf2d", 0b110, ":chop">;
+def F2_conv_sf2ud_chop : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b101, ":chop">;
 
 // Convert Floating Point to Integer: non-chopped.
-let AddedComplexity = 20, Predicates = [HasV5T, IEEERndNearV5T] in {
-  def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000,
-                                         fp_to_sint, I64, F64>;
-  def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001,
-                                          fp_to_uint, I64, F64>;
-  def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011,
-                                         fp_to_uint, I64, F32>;
-  def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100,
-                                         fp_to_sint, I64, F32>;
-  def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011,
-                                         fp_to_uint, I32, F64>;
-  def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100,
-                                         fp_to_sint, I32, F64>;
-  def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000,
-                                         fp_to_uint, I32, F32>;
-  def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000,
-                                         fp_to_sint, I32, F32>;
+let AddedComplexity = 20, Predicates = [HasV5T] in {
+  def F2_conv_df2d : F2_RDD_RSS_CONVERT <"convert_df2d", 0b000>;
+  def F2_conv_df2ud : F2_RDD_RSS_CONVERT <"convert_df2ud", 0b001>;
+  def F2_conv_sf2ud : F2_RDD_RS_CONVERT <"convert_sf2ud", 0b011>;
+  def F2_conv_sf2d : F2_RDD_RS_CONVERT <"convert_sf2d", 0b100>;
+  def F2_conv_df2uw : F2_RD_RSS_CONVERT <"convert_df2uw", 0b011>;
+  def F2_conv_df2w : F2_RD_RSS_CONVERT <"convert_df2w", 0b100>;
+  def F2_conv_sf2uw : F2_RD_RS_CONVERT <"convert_sf2uw", 0b011, 0b000>;
+  def F2_conv_sf2w : F2_RD_RS_CONVERT <"convert_sf2w", 0b100, 0b000>;
 }
 
 // Fix up radicand.
-let isFP = 1, hasNewValue = 1 in
+let Uses = [USR], isFP = 1, hasNewValue = 1 in
 def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
   "$Rd = sffixupr($Rs)",
   [], "" , S_2op_tc_3or4x_SLOT23>, Requires<[HasV5T]> {
@@ -682,21 +309,13 @@ def F2_sffixupr: SInst<(outs IntRegs:$Rd), (ins IntRegs:$Rs),
     let Inst{4-0}   = Rd;
   }
 
-// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
-let Predicates = [HasV5T] in {
-  def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
-  def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
-  def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
-  def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
-}
-
 // F2_sffma: Floating-point fused multiply add.
-let isFP = 1, hasNewValue = 1 in
+let Uses = [USR], isFP = 1, hasNewValue = 1 in
 class T_sfmpy_acc <bit isSub, bit isLib>
   : MInst<(outs IntRegs:$Rx),
           (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt),
   "$Rx "#!if(isSub, "-=","+=")#" sfmpy($Rs, $Rt)"#!if(isLib, ":lib",""),
-  [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+  [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
   Requires<[HasV5T]> {
     bits<5> Rx;
     bits<5> Rs;
@@ -719,16 +338,13 @@ def F2_sffms: T_sfmpy_acc <1, 0>;
 def F2_sffma_lib: T_sfmpy_acc <0, 1>;
 def F2_sffms_lib: T_sfmpy_acc <1, 1>;
 
-def : Pat <(f32 (fma F32:$src2, F32:$src3, F32:$src1)),
-           (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
-
 // Floating-point fused multiply add w/ additional scaling (2**pu).
-let isFP = 1, hasNewValue = 1 in
+let Uses = [USR], isFP = 1, hasNewValue = 1 in
 def F2_sffma_sc: MInst <
   (outs IntRegs:$Rx),
   (ins IntRegs:$dst2, IntRegs:$Rs, IntRegs:$Rt, PredRegs:$Pu),
   "$Rx += sfmpy($Rs, $Rt, $Pu):scale" ,
-  [], "$dst2 = $Rx" , M_tc_3_SLOT23 > ,
+  [], "$dst2 = $Rx" , M_tc_3or4x_SLOT23 > ,
   Requires<[HasV5T]> {
     bits<5> Rx;
     bits<5> Rs;
@@ -746,54 +362,6 @@ def F2_sffma_sc: MInst <
     let Inst{4-0}   = Rx;
   }
 
-let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 3,
-    isPseudo = 1, InputType = "imm" in
-def MUX_ir_f : ALU32_rr<(outs IntRegs:$dst),
-      (ins PredRegs:$src1, IntRegs:$src2, f32Ext:$src3),
-      "$dst = mux($src1, $src2, #$src3)",
-      [(set F32:$dst, (f32 (select I1:$src1, F32:$src2, fpimm:$src3)))]>,
-    Requires<[HasV5T]>;
-
-let isExtended = 1, isExtentSigned = 1, opExtentBits = 8, opExtendable = 2,
-    isPseudo = 1, InputType = "imm" in
-def MUX_ri_f : ALU32_rr<(outs IntRegs:$dst),
-      (ins PredRegs:$src1, f32Ext:$src2, IntRegs:$src3),
-      "$dst = mux($src1, #$src2, $src3)",
-      [(set F32:$dst, (f32 (select I1:$src1, fpimm:$src2, F32:$src3)))]>,
-    Requires<[HasV5T]>;
-
-def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
-         (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
-     Requires<[HasV5T]>;
-
-def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
-         (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
-     Requires<[HasV5T]>;
-
-def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
-         (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
-    Requires<[HasV5T]>;
-
-def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
-         (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
-     Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
-// => r0 = MUX_ir_f(p0, #i, r1)
-def: Pat<(select (not I1:$src1), fpimm:$src2, F32:$src3),
-         (MUX_ir_f I1:$src1, F32:$src3, fpimm:$src2)>,
-     Requires<[HasV5T]>;
-
-// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
-// => r0 = MUX_ri_f(p0, r1, #i)
-def: Pat<(select (not I1:$src1), F32:$src2, fpimm:$src3),
-         (MUX_ri_f I1:$src1, fpimm:$src3, F32:$src2)>,
-     Requires<[HasV5T]>;
-
-def: Pat<(i32 (fp_to_sint F64:$src1)),
-         (LoReg (F2_conv_df2d_chop F64:$src1))>,
-     Requires<[HasV5T]>;
-
 //===----------------------------------------------------------------------===//
 // :natural forms of vasrh and vasrhub insns
 //===----------------------------------------------------------------------===//
@@ -802,7 +370,7 @@ def: Pat<(i32 (fp_to_sint F64:$src1)),
 let Defs = [USR_OVF], hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
 class T_ASRHUB<bit isSat>
   : SInst <(outs IntRegs:$Rd),
-  (ins DoubleRegs:$Rss, u4Imm:$u4),
+  (ins DoubleRegs:$Rss, u4_0Imm:$u4),
   "$Rd = vasrhub($Rss, #$u4):"#!if(isSat, "sat", "raw"),
   [], "", S_2op_tc_2_SLOT23>,
   Requires<[HasV5T]> {
@@ -826,13 +394,13 @@ def S5_asrhub_sat : T_ASRHUB <1>;
 
 let isAsmParserOnly = 1 in
 def S5_asrhub_rnd_sat_goodsyntax
-  : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+  : SInst <(outs IntRegs:$Rd), (ins DoubleRegs:$Rss, u4_0Imm:$u4),
   "$Rd = vasrhub($Rss, #$u4):rnd:sat">, Requires<[HasV5T]>;
 
 // S5_vasrhrnd: Vector arithmetic shift right by immediate with round.
 let hasSideEffects = 0 in
 def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
-                         (ins DoubleRegs:$Rss, u4Imm:$u4),
+                         (ins DoubleRegs:$Rss, u4_0Imm:$u4),
   "$Rdd = vasrh($Rss, #$u4):raw">,
   Requires<[HasV5T]> {
     bits<5> Rdd;
@@ -851,7 +419,7 @@ def S5_vasrhrnd : SInst <(outs DoubleRegs:$Rdd),
 
 let isAsmParserOnly = 1 in
 def S5_vasrhrnd_goodsyntax
-  : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4Imm:$u4),
+  : SInst <(outs DoubleRegs:$Rdd), (ins DoubleRegs:$Rss, u4_0Imm:$u4),
   "$Rdd = vasrh($Rss,#$u4):rnd">, Requires<[HasV5T]>;
 
 // Floating point reciprocal square root approximation
@@ -883,11 +451,11 @@ let Defs = [USR_OVF], Itinerary = S_3op_tc_3x_SLOT23 in {
 }
 
 // Classify floating-point value
-let isFP = 1 in
- def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>;
+let Uses = [USR], isFP = 1 in
+def F2_sfclass : T_TEST_BIT_IMM<"sfclass", 0b111>, Requires<[HasV5T]>;
 
-let isFP = 1 in
-def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
+let Uses = [USR], isFP = 1 in
+def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5_0Imm:$u5),
   "$Pd = dfclass($Rss, #$u5)",
   [], "" , ALU64_tc_2early_SLOT23 > , Requires<[HasV5T]> {
     bits<2> Pd;
@@ -905,9 +473,9 @@ def F2_dfclass: ALU64Inst<(outs PredRegs:$Pd), (ins DoubleRegs:$Rss, u5Imm:$u5),
 
 // Instructions to create floating point constant
 class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
-  : ALU64Inst<(outs RC:$dst), (ins u10Imm:$src),
+  : ALU64Inst<(outs RC:$dst), (ins u10_0Imm:$src),
   "$dst = "#mnemonic#"(#$src)"#!if(isNeg, ":neg", ":pos"),
-  [], "", ALU64_tc_3x_SLOT23>, Requires<[HasV5T]> {
+  [], "", ALU64_tc_2_SLOT23>, Requires<[HasV5T]> {
     bits<5> dst;
     bits<10> src;
 
@@ -921,17 +489,9 @@ class T_fimm <string mnemonic, RegisterClass RC, bits<4> RegType, bit isNeg>
   }
 
 let hasNewValue = 1, opNewValue = 0 in {
-def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
-def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
+  def F2_sfimm_p : T_fimm <"sfmake", IntRegs, 0b0110, 0>;
+  def F2_sfimm_n : T_fimm <"sfmake", IntRegs, 0b0110, 1>;
 }
 
 def F2_dfimm_p : T_fimm <"dfmake", DoubleRegs, 0b1001, 0>;
 def F2_dfimm_n : T_fimm <"dfmake", DoubleRegs, 0b1001, 1>;
-
-def : Pat <(fabs (f32 IntRegs:$src1)),
-           (S2_clrbit_i (f32 IntRegs:$src1), 31)>,
-          Requires<[HasV5T]>;
-
-def : Pat <(fneg (f32 IntRegs:$src1)),
-           (S2_togglebit_i (f32 IntRegs:$src1), 31)>,
-          Requires<[HasV5T]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
index c3f09b69ce85..c50141b18ead 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoV60.td
@@ -10,33 +10,6 @@
 // This file describes the Hexagon V60 instructions in TableGen format.
 //
 //===----------------------------------------------------------------------===//
-def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
-  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
-  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
-  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
-  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
-}]>;
-
-
-// Vector store
-let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
-{
-  class VSTInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
-                string cstr = "", InstrItinClass itin = CVI_VM_ST,
-                IType type = TypeCVI_VM_ST>
-  : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>, OpcodeHexagon;
-
-}
-
 // Vector load
 let Predicates = [HasV60T, UseHVX] in
 let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
@@ -45,6 +18,7 @@ let mayLoad = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
                   IType type = TypeCVI_VM_LD>
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
 
+// Vector store
 let Predicates = [HasV60T, UseHVX] in
 let mayStore = 1, validSubTargets = HasV60SubT, hasSideEffects = 0 in
 class V6_STInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -219,6 +193,8 @@ let isNVStorable = 1 in {
   def V6_vS32b_npred_ai_128B   : T_vstore_pred_ai_128B <"vmem", "vS32b_ai", 1>,
                                  V6_vS32b_npred_ai_128B_enc;
 }
+
+
 let isNVStorable = 1, isNonTemporal = 1 in {
   def V6_vS32b_nt_pred_ai  : T_vstore_pred_ai_64B <"vmem", "vS32b_ai", 0, 1>,
                              V6_vS32b_nt_pred_ai_enc;
@@ -774,256 +750,60 @@ def V6_vS32b_nt_new_npred_ppu : T_vstore_new_pred_ppu<1, 1>,
                                 V6_vS32b_nt_new_npred_ppu_enc;
 }
 
-let isPseudo = 1, validSubTargets = HasV60SubT in
-class STrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>:
-        VSTInst<(outs), (ins IntRegs:$addr, ImmOp:$off, RC:$src),
-                #mnemonic#"($addr+#$off) = $src", []>;
-
-def STrivv_indexed: STrivv_template<"vvmem", s4_6Imm, VecDblRegs>,
-                    Requires<[HasV60T, UseHVXSgl]>;
-def STrivv_indexed_128B: STrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>,
-                         Requires<[HasV60T, UseHVXDbl]>;
-
-multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
-  def : Pat<(store (VTSgl VecDblRegs:$src1), IntRegs:$addr),
-            (STrivv_indexed IntRegs:$addr, #0, (VTSgl VecDblRegs:$src1))>,
-            Requires<[UseHVXSgl]>;
-
-  def : Pat<(store (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
-            (STrivv_indexed_128B IntRegs:$addr, #0,
-                                 (VTDbl VecDblRegs128B:$src1))>,
-            Requires<[UseHVXDbl]>;
-}
-
-defm : STrivv_pats <v128i8, v256i8>;
-defm : STrivv_pats <v64i16, v128i16>;
-defm : STrivv_pats <v32i32, v64i32>;
-defm : STrivv_pats <v16i64, v32i64>;
-
-
-multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
-  // Aligned stores
-  def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
-            (V6_vS32b_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
-            Requires<[UseHVXSgl]>;
-  def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
-            (V6_vS32Ub_ai IntRegs:$addr, #0, (VTSgl VectorRegs:$src1))>,
-            Requires<[UseHVXSgl]>;
-
-  // 128B Aligned stores
-  def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
-            (V6_vS32b_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
-            Requires<[UseHVXDbl]>;
-  def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
-            (V6_vS32Ub_ai_128B IntRegs:$addr, #0, (VTDbl VectorRegs128B:$src1))>,
-            Requires<[UseHVXDbl]>;
-
-  // Fold Add R+IFF into vector store.
-  let AddedComplexity = 10 in {
-    def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
-                     (add IntRegs:$src2, s4_6ImmPred:$offset)),
-              (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
-                           (VTSgl VectorRegs:$src1))>,
-              Requires<[UseHVXSgl]>;
-    def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
-                     (add IntRegs:$src2, s4_6ImmPred:$offset)),
-              (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
-                           (VTSgl VectorRegs:$src1))>,
-              Requires<[UseHVXSgl]>;
-
-    // Fold Add R+IFF into vector store 128B.
-    def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
-                     (add IntRegs:$src2, s4_7ImmPred:$offset)),
-              (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
-                                (VTDbl VectorRegs128B:$src1))>,
-              Requires<[UseHVXDbl]>;
-    def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
-                     (add IntRegs:$src2, s4_7ImmPred:$offset)),
-              (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
-                                (VTDbl VectorRegs128B:$src1))>,
-              Requires<[UseHVXDbl]>;
-  }
-}
-
-defm : vS32b_ai_pats <v64i8,  v128i8>;
-defm : vS32b_ai_pats <v32i16, v64i16>;
-defm : vS32b_ai_pats <v16i32, v32i32>;
-defm : vS32b_ai_pats <v8i64,  v16i64>;
-
-let isPseudo = 1, validSubTargets = HasV60SubT in
-class LDrivv_template<string mnemonic, Operand ImmOp, RegisterClass RC>
-  : V6_LDInst <(outs RC:$dst), (ins IntRegs:$addr, ImmOp:$off),
-               "$dst="#mnemonic#"($addr+#$off)",
-               []>,
-               Requires<[HasV60T,UseHVXSgl]>;
-
-def LDrivv_indexed: LDrivv_template<"vvmem", s4_6Imm, VecDblRegs>;
-def LDrivv_indexed_128B: LDrivv_template<"vvmem", s4_7Imm, VecDblRegs128B>;
-
-multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
-  def : Pat < (VTSgl (load IntRegs:$addr)),
-              (LDrivv_indexed IntRegs:$addr, #0) >,
-              Requires<[UseHVXSgl]>;
-
-  def : Pat < (VTDbl (load IntRegs:$addr)),
-              (LDrivv_indexed_128B IntRegs:$addr, #0) >,
-              Requires<[UseHVXDbl]>;
-}
-
-defm : LDrivv_pats <v128i8, v256i8>;
-defm : LDrivv_pats <v64i16, v128i16>;
-defm : LDrivv_pats <v32i32, v64i32>;
-defm : LDrivv_pats <v16i64, v32i64>;
-
-multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
-  // Aligned loads
-  def : Pat < (VTSgl (alignedload IntRegs:$addr)),
-              (V6_vL32b_ai IntRegs:$addr, #0) >,
-              Requires<[UseHVXSgl]>;
-  def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
-              (V6_vL32Ub_ai IntRegs:$addr, #0) >,
-              Requires<[UseHVXSgl]>;
-
-  // 128B Load
-  def : Pat < (VTDbl (alignedload IntRegs:$addr)),
-              (V6_vL32b_ai_128B IntRegs:$addr, #0) >,
-              Requires<[UseHVXDbl]>;
-  def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
-              (V6_vL32Ub_ai_128B IntRegs:$addr, #0) >,
-              Requires<[UseHVXDbl]>;
-
-  // Fold Add R+IFF into vector load.
-  let AddedComplexity = 10 in {
-    def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
-              (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
-               Requires<[UseHVXDbl]>;
-    def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
-              (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
-               Requires<[UseHVXDbl]>;
-
-    def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
-              (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
-              Requires<[UseHVXSgl]>;
-    def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
-              (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
-              Requires<[UseHVXSgl]>;
-  }
-}
-
-defm : vL32b_ai_pats <v64i8,  v128i8>;
-defm : vL32b_ai_pats <v32i16, v64i16>;
-defm : vL32b_ai_pats <v16i32, v32i32>;
-defm : vL32b_ai_pats <v8i64,  v16i64>;
 
-// Store vector predicate pseudo.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
-    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
-def STriq_pred_V6 : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VecPredRegs:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-
-def STriq_pred_vec_V6 : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-
-def STriq_pred_V6_128B : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VecPredRegs128B:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
-
-def STriq_pred_vec_V6_128B : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
-}
+// Vector load/store pseudos
 
-// Load vector predicate pseudo.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
-    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
-def LDriq_pred_V6 : LDInst<(outs VecPredRegs:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def LDriq_pred_vec_V6 : LDInst<(outs VectorRegs:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def LDriq_pred_V6_128B : LDInst<(outs VecPredRegs128B:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
-def LDriq_pred_vec_V6_128B : LDInst<(outs VectorRegs128B:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
-}
-
-// Store vector pseudo.
-let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
-    isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
-def STriv_pseudo_V6 : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VectorRegs:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def STriv_pseudo_V6_128B : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VectorRegs128B:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
-}
+let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+class STrivv_template<RegisterClass RC>
+  : V6_STInst<(outs), (ins IntRegs:$addr, s32_0Imm:$off, RC:$src), "", []>;
+
+def PS_vstorerw_ai: STrivv_template<VecDblRegs>,
+      Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerwu_ai: STrivv_template<VecDblRegs>,
+      Requires<[HasV60T,UseHVXSgl]>;
+def PS_vstorerw_ai_128B: STrivv_template<VecDblRegs128B>,
+      Requires<[HasV60T,UseHVXDbl]>;
+def PS_vstorerwu_ai_128B: STrivv_template<VecDblRegs128B>,
+      Requires<[HasV60T,UseHVXDbl]>;
+
+
+let isPseudo = 1, isCodeGenOnly = 1, validSubTargets = HasV60SubT in
+class LDrivv_template<RegisterClass RC>
+  : V6_LDInst<(outs RC:$dst), (ins IntRegs:$addr, s32_0Imm:$off), "", []>;
 
+def PS_vloadrw_ai: LDrivv_template<VecDblRegs>,
+      Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrwu_ai: LDrivv_template<VecDblRegs>,
+      Requires<[HasV60T,UseHVXSgl]>;
+def PS_vloadrw_ai_128B: LDrivv_template<VecDblRegs128B>,
+      Requires<[HasV60T,UseHVXDbl]>;
+def PS_vloadrwu_ai_128B: LDrivv_template<VecDblRegs128B>,
+      Requires<[HasV60T,UseHVXDbl]>;
+
+// Store vector predicate pseudo.
 let isExtendable = 1, opExtendable = 1, isExtentSigned = 1, opExtentBits = 13,
     isCodeGenOnly = 1, isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
-def STrivv_pseudo_V6 : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VecDblRegs:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def STrivv_pseudo_V6_128B : STInst<(outs),
-            (ins IntRegs:$base, s32Imm:$offset, VecDblRegs128B:$src1),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
-}
-
-// Load vector pseudo.
-let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
-    opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
-def LDriv_pseudo_V6 : LDInst<(outs VectorRegs:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def LDriv_pseudo_V6_128B : LDInst<(outs VectorRegs128B:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
+  def PS_vstorerq_ai : STInst<(outs),
+              (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs:$src1),
+              ".error \"should not emit\"", []>,
+              Requires<[HasV60T,UseHVXSgl]>;
+  def PS_vstorerq_ai_128B : STInst<(outs),
+              (ins IntRegs:$base, s32_0Imm:$offset, VecPredRegs128B:$src1),
+              ".error \"should not emit\"", []>,
+              Requires<[HasV60T,UseHVXDbl]>;
 }
 
+// Load vector predicate pseudo.
 let isExtendable = 1, opExtendable = 2, isExtentSigned = 1, opExtentBits = 13,
     opExtentAlign = 2, isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
-def LDrivv_pseudo_V6 : LDInst<(outs VecDblRegs:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def LDrivv_pseudo_V6_128B : LDInst<(outs VecDblRegs128B:$dst),
-            (ins IntRegs:$base, s32Imm:$offset),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXDbl]>;
+  def PS_vloadrq_ai : LDInst<(outs VecPredRegs:$dst),
+              (ins IntRegs:$base, s32_0Imm:$offset),
+              ".error \"should not emit\"", []>,
+              Requires<[HasV60T,UseHVXSgl]>;
+  def PS_vloadrq_ai_128B : LDInst<(outs VecPredRegs128B:$dst),
+              (ins IntRegs:$base, s32_0Imm:$offset),
+              ".error \"should not emit\"", []>,
+              Requires<[HasV60T,UseHVXDbl]>;
 }
 
 class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
@@ -1032,26 +812,19 @@ class VSELInst<dag outs, dag ins, string asmstr, list<dag> pattern = [],
   : InstHexagon<outs, ins, asmstr, pattern, cstr, itin, type>;
 
 let isCodeGenOnly = 1, isPseudo = 1, hasSideEffects = 0 in {
-def VSelectPseudo_V6 : VSELInst<(outs VectorRegs:$dst),
-            (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3),
-            ".error \"should not emit\" ",
-            []>,
-            Requires<[HasV60T,UseHVXSgl]>;
-def VSelectDblPseudo_V6 : VSELInst<(outs VecDblRegs:$dst),
-               (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3),
-               ".error \"should not emit\" ",
-               []>,
-               Requires<[HasV60T,UseHVXSgl]>;
-}
-
-def : Pat <(v16i32 (selectcc (i32 IntRegs:$lhs), (i32 IntRegs:$rhs),
-                             (v16i32 VectorRegs:$tval),
-                             (v16i32 VectorRegs:$fval), SETEQ)),
-      (v16i32 (VSelectPseudo_V6 (i32 (C2_cmpeq (i32 IntRegs:$lhs),
-                                (i32 IntRegs:$rhs))),
-                                (v16i32 VectorRegs:$tval),
-                                (v16i32 VectorRegs:$fval)))>;
-
+  def PS_vselect: VSELInst<(outs VectorRegs:$dst),
+        (ins PredRegs:$src1, VectorRegs:$src2, VectorRegs:$src3), "", []>,
+        Requires<[HasV60T,UseHVXSgl]>;
+  def PS_vselect_128B: VSELInst<(outs VectorRegs128B:$dst),
+        (ins PredRegs:$src1, VectorRegs128B:$src2, VectorRegs128B:$src3),
+        "", []>, Requires<[HasV60T,UseHVXDbl]>;
+  def PS_wselect: VSELInst<(outs VecDblRegs:$dst),
+        (ins PredRegs:$src1, VecDblRegs:$src2, VecDblRegs:$src3), "", []>,
+        Requires<[HasV60T,UseHVXSgl]>;
+  def PS_wselect_128B: VSELInst<(outs VecDblRegs128B:$dst),
+        (ins PredRegs:$src1, VecDblRegs128B:$src2, VecDblRegs128B:$src3),
+        "", []>, Requires<[HasV60T,UseHVXDbl]>;
+}
 
 let hasNewValue = 1 in
 class T_vmpy <string asmString, RegisterClass RCout, RegisterClass RCin>
@@ -1581,20 +1354,6 @@ let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in
 defm V6_vcombine :
      T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc;
 
-def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
-      SDTCisSubVecOfVec<1, 0>]>;
-
-def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
-
-def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
-                                  (v16i32 VectorRegs:$Vt))),
-         (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
-         Requires<[UseHVXSgl]>;
-def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
-                                  (v32i32 VecDblRegs:$Vt))),
-         (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
-         Requires<[UseHVXDbl]>;
-
 let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in {
 defm V6_vsathub :
      T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc;
@@ -1782,7 +1541,7 @@ let isAccumulator = 1, hasNewValue = 1, Itinerary = CVI_VX_DV_LONG,
     Type = TypeCVI_VX_DV in
 class T_HVX_vmpyacc2 <string asmString, RegisterClass RC>
   : CVI_VA_Resource1 <(outs RC:$dst),
-                      (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1Imm:$src3),
+                      (ins RC:$_src_, RC:$src1, IntRegs:$src2, u1_0Imm:$src3),
     asmString, [], "$dst = $_src_" > ;
 
 
@@ -1806,7 +1565,7 @@ defm V6_vrmpyubi_acc :
 
 let Itinerary = CVI_VX_DV_LONG, Type = TypeCVI_VX_DV, hasNewValue = 1 in
 class T_HVX_vmpy2 <string asmString, RegisterClass RC>
-  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1Imm:$src3),
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, IntRegs:$src2, u1_0Imm:$src3),
     asmString>;
 
 
@@ -1958,7 +1717,7 @@ defm V6_vunpackoh : T_HVX_unpack <"$dst.w |= vunpacko($src1.h)">, V6_vunpackoh_e
 let Itinerary = CVI_VP_LONG, Type = TypeCVI_VP, hasNewValue = 1,
     hasSideEffects = 0 in
 class T_HVX_valign <string asmString, RegisterClass RC>
-  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3Imm:$src3),
+  : CVI_VA_Resource1<(outs RC:$dst), (ins RC:$src1, RC:$src2, u3_0Imm:$src3),
     asmString>;
 
 multiclass T_HVX_valign <string asmString> {
@@ -2095,9 +1854,9 @@ class T_HVX_rol <string asmString, RegisterClass RC, Operand ImmOp >
   : SInst2 <(outs RC:$dst), (ins  RC:$src1, ImmOp:$src2), asmString>;
 
 class T_HVX_rol_R <string asmString>
-  : T_HVX_rol <asmString, IntRegs, u5Imm>;
+  : T_HVX_rol <asmString, IntRegs, u5_0Imm>;
 class T_HVX_rol_P <string asmString>
-  : T_HVX_rol <asmString, DoubleRegs, u6Imm>;
+  : T_HVX_rol <asmString, DoubleRegs, u6_0Imm>;
 
 def S6_rol_i_p : T_HVX_rol_P <"$dst = rol($src1,#$src2)">, S6_rol_i_p_enc;
 let hasNewValue = 1, opNewValue = 0 in
@@ -2109,10 +1868,10 @@ class T_HVX_rol_acc <string asmString, RegisterClass RC, Operand ImmOp>
     asmString, [], "$dst = $_src_" >;
 
 class T_HVX_rol_acc_P <string asmString>
-  : T_HVX_rol_acc <asmString, DoubleRegs, u6Imm>;
+  : T_HVX_rol_acc <asmString, DoubleRegs, u6_0Imm>;
 
 class T_HVX_rol_acc_R <string asmString>
-  : T_HVX_rol_acc <asmString, IntRegs, u5Imm>;
+  : T_HVX_rol_acc <asmString, IntRegs, u5_0Imm>;
 
 def S6_rol_i_p_nac :
     T_HVX_rol_acc_P <"$dst -= rol($src1,#$src2)">, S6_rol_i_p_nac_enc;
@@ -2285,3 +2044,25 @@ def V6_vhistq
 def V6_vhist
   : CVI_HIST_Resource1 <(outs), (ins),
     "vhist" >, V6_vhist_enc;
+
+
+let isPseudo = 1, isCodeGenOnly = 1, hasSideEffects = 0 in {
+  def V6_vd0: CVI_VA_Resource<(outs VectorRegs:$dst), (ins), "$dst = #0", []>;
+  def V6_vd0_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst), (ins),
+      "$dst = #0", []>;
+
+  def V6_vassignp: CVI_VA_Resource<(outs VecDblRegs:$dst),
+      (ins VecDblRegs:$src), "", []>;
+  def V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
+      (ins VecDblRegs128B:$src), "", []>;
+
+  def V6_lo: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1),
+      "", []>;
+  def V6_lo_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst),
+      (ins VecDblRegs128B:$src1), "", []>;
+
+  def V6_hi: CVI_VA_Resource<(outs VectorRegs:$dst), (ins VecDblRegs:$src1),
+      "", []>;
+  def V6_hi_128B: CVI_VA_Resource<(outs VectorRegs128B:$dst),
+      (ins VecDblRegs128B:$src1), "", []>;
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
index 0277d5e3c28c..e3520bd6e515 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonInstrInfoVector.td
@@ -11,37 +11,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-def V2I1:  PatLeaf<(v2i1  PredRegs:$R)>;
-def V4I1:  PatLeaf<(v4i1  PredRegs:$R)>;
-def V8I1:  PatLeaf<(v8i1  PredRegs:$R)>;
-def V4I8:  PatLeaf<(v4i8  IntRegs:$R)>;
-def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
-def V8I8:  PatLeaf<(v8i8  DoubleRegs:$R)>;
-def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
-def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
-
-
-multiclass bitconvert_32<ValueType a, ValueType b> {
-  def : Pat <(b (bitconvert (a IntRegs:$src))),
-             (b IntRegs:$src)>;
-  def : Pat <(a (bitconvert (b IntRegs:$src))),
-             (a IntRegs:$src)>;
-}
-
-multiclass bitconvert_64<ValueType a, ValueType b> {
-  def : Pat <(b (bitconvert (a DoubleRegs:$src))),
-             (b DoubleRegs:$src)>;
-  def : Pat <(a (bitconvert (b DoubleRegs:$src))),
-             (a DoubleRegs:$src)>;
-}
-
-// Bit convert vector types to integers.
-defm : bitconvert_32<v4i8,  i32>;
-defm : bitconvert_32<v2i16, i32>;
-defm : bitconvert_64<v8i8,  i64>;
-defm : bitconvert_64<v4i16, i64>;
-defm : bitconvert_64<v2i32, i64>;
-
 // Vector shift support. Vector shifting in Hexagon is rather different
 // from internal representation of LLVM.
 // LLVM assumes all shifts (in vector case) will have the form
@@ -51,27 +20,17 @@ defm : bitconvert_64<v2i32, i64>;
 // As a result, special care is needed to guarantee correctness and
 // performance.
 class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
-  : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
-      [(set (v4i16 DoubleRegs:$dst),
-            (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
+  : S_2OpInstImm<Str, MajOp, MinOp, u4_0Imm, []> {
   bits<4> src2;
   let Inst{11-8} = src2;
 }
 
 class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
-  : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
-      [(set (v2i32 DoubleRegs:$dst),
-            (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
+  : S_2OpInstImm<Str, MajOp, MinOp, u5_0Imm, []> {
   bits<5> src2;
   let Inst{12-8} = src2;
 }
 
-def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
-          (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
-
-def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
-          (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
-
 def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
 def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
 def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
@@ -80,87 +39,6 @@ def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
 def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
 def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
 
-
-def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
-def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
-
-// Replicate the low 8-bits from 32-bits input register into each of the
-// four bytes of 32-bits destination register.
-def: Pat<(v4i8  (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
-
-// Replicate the low 16-bits from 32-bits input register into each of the
-// four halfwords of 64-bits destination register.
-def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
-
-
-class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
-  : Pat <(Op Type:$Rss, Type:$Rtt),
-         (MI Type:$Rss, Type:$Rtt)>;
-
-def: VArith_pat <A2_vaddub, add, V8I8>;
-def: VArith_pat <A2_vaddh,  add, V4I16>;
-def: VArith_pat <A2_vaddw,  add, V2I32>;
-def: VArith_pat <A2_vsubub, sub, V8I8>;
-def: VArith_pat <A2_vsubh,  sub, V4I16>;
-def: VArith_pat <A2_vsubw,  sub, V2I32>;
-
-def: VArith_pat <A2_and,    and, V2I16>;
-def: VArith_pat <A2_xor,    xor, V2I16>;
-def: VArith_pat <A2_or,     or,  V2I16>;
-
-def: VArith_pat <A2_andp,   and, V8I8>;
-def: VArith_pat <A2_andp,   and, V4I16>;
-def: VArith_pat <A2_andp,   and, V2I32>;
-def: VArith_pat <A2_orp,    or,  V8I8>;
-def: VArith_pat <A2_orp,    or,  V4I16>;
-def: VArith_pat <A2_orp,    or,  V2I32>;
-def: VArith_pat <A2_xorp,   xor, V8I8>;
-def: VArith_pat <A2_xorp,   xor, V4I16>;
-def: VArith_pat <A2_xorp,   xor, V2I32>;
-
-def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
-                                                    (i32 u5ImmPred:$c))))),
-         (S2_asr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
-                                                    (i32 u5ImmPred:$c))))),
-         (S2_lsr_i_vw V2I32:$b, imm:$c)>;
-def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
-                                                    (i32 u5ImmPred:$c))))),
-         (S2_asl_i_vw V2I32:$b, imm:$c)>;
-
-def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
-         (S2_asr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
-         (S2_lsr_i_vh V4I16:$b, imm:$c)>;
-def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
-         (S2_asl_i_vh V4I16:$b, imm:$c)>;
-
-
-def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
-  [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
-def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
-  [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
-
-def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
-def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
-def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
-def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
-
-def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
-         (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
-         (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
-def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
-         (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
-         (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
-def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
-         (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
-def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
-         (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
-
 // Vector shift words by register
 def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
 def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
@@ -173,305 +51,19 @@ def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
 def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
 def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
 
-class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
-  : Pat <(Op Value:$Rs, I32:$Rt),
-         (MI Value:$Rs, I32:$Rt)>;
-
-def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
-def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
-def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
-def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
-def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
-def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
-
-
-def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
-  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
-def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
-  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
-def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
-  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
-
-def HexagonVCMPBEQ:  SDNode<"HexagonISD::VCMPBEQ",  SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPBGT:  SDNode<"HexagonISD::VCMPBGT",  SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
-def HexagonVCMPHEQ:  SDNode<"HexagonISD::VCMPHEQ",  SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPHGT:  SDNode<"HexagonISD::VCMPHGT",  SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
-def HexagonVCMPWEQ:  SDNode<"HexagonISD::VCMPWEQ",  SDTHexagonVecCompare_v2i32>;
-def HexagonVCMPWGT:  SDNode<"HexagonISD::VCMPWGT",  SDTHexagonVecCompare_v2i32>;
-def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
-
-
-class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
-  : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
-         (MI Value:$Rs, Value:$Rt)>;
-
-def: vcmp_i1_pat<A2_vcmpbeq,  HexagonVCMPBEQ,  V8I8>;
-def: vcmp_i1_pat<A4_vcmpbgt,  HexagonVCMPBGT,  V8I8>;
-def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
-
-def: vcmp_i1_pat<A2_vcmpheq,  HexagonVCMPHEQ,  V4I16>;
-def: vcmp_i1_pat<A2_vcmphgt,  HexagonVCMPHGT,  V4I16>;
-def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
-
-def: vcmp_i1_pat<A2_vcmpweq,  HexagonVCMPWEQ,  V2I32>;
-def: vcmp_i1_pat<A2_vcmpwgt,  HexagonVCMPWGT,  V2I32>;
-def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
-
-
-class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
-  : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
-         (MI InVal:$Rs, InVal:$Rt)>;
-
-def: vcmp_vi1_pat<A2_vcmpweq,  seteq,  V2I32, v2i1>;
-def: vcmp_vi1_pat<A2_vcmpwgt,  setgt,  V2I32, v2i1>;
-def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
-
-def: vcmp_vi1_pat<A2_vcmpheq,  seteq,  V4I16, v4i1>;
-def: vcmp_vi1_pat<A2_vcmphgt,  setgt,  V4I16, v4i1>;
-def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
-
 
 // Hexagon doesn't have a vector multiply with C semantics.
 // Instead, generate a pseudo instruction that gets expaneded into two
 // scalar MPYI instructions.
 // This is expanded by ExpandPostRAPseudos.
 let isPseudo = 1 in
-def VMULW : PseudoM<(outs DoubleRegs:$Rd),
-      (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
-      ".error \"Should never try to emit VMULW\"",
-      [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
+def PS_vmulw : PseudoM<(outs DoubleRegs:$Rd),
+      (ins DoubleRegs:$Rs, DoubleRegs:$Rt), "", []>;
 
 let isPseudo = 1 in
-def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
-      (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
-      ".error \"Should never try to emit VMULW_ACC\"",
-      [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
+def PS_vmulw_acc : PseudoM<(outs DoubleRegs:$Rd),
+      (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), "", [],
       "$Rd = $Rx">;
 
-// Adds two v4i8: Hexagon does not have an insn for this one, so we
-// use the double add v8i8, and use only the low part of the result.
-def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
-         (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
-
-// Subtract two v4i8: Hexagon does not have an insn for this one, so we
-// use the double sub v8i8, and use only the low part of the result.
-def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
-         (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
-
-//
-// No 32 bit vector mux.
-//
-def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
-         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
-def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
-         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
-
-//
-// 64-bit vector mux.
-//
-def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
-         (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
-def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
-         (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
-def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
-         (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
-
-//
-// No 32 bit vector compare.
-//
-def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
-         (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
-         (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
-         (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
-
-def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
-         (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
-         (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
-def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
-         (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
-
-
-class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
-                    ValueType CmpTy>
-  : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
-        (InvMI Value:$Rt, Value:$Rs)>;
-
-// Map from a compare operation to the corresponding instruction with the
-// order of operands reversed, e.g.  x > y --> cmp.lt(y,x).
-def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  i1>;
-def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  v8i1>;
-def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, i1>;
-def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, v4i1>;
-def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, i1>;
-def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, v2i1>;
-
-def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  i1>;
-def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  v8i1>;
-def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
-def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
-def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
-def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
-
-// Map from vcmpne(Rss) -> !vcmpew(Rss).
-// rs != rt -> !(rs == rt).
-def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
-         (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
-
-
-// Truncate: from vector B copy all 'E'ven 'B'yte elements:
-// A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
-def: Pat<(v4i8 (trunc V4I16:$Rs)),
-         (S2_vtrunehb V4I16:$Rs)>;
-
-// Truncate: from vector B copy all 'O'dd 'B'yte elements:
-// A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
-// S2_vtrunohb
-
-// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
-// A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
-// S2_vtruneh
-
-def: Pat<(v2i16 (trunc V2I32:$Rs)),
-         (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
-
-
-def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
-def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
-
-def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
-def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
-
-def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
-def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
-def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
-
-// Sign extends a v2i8 into a v2i32.
-def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
-         (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
-
-// Sign extends a v2i16 into a v2i32.
-def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
-         (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
-
-
-// Multiplies two v2i16 and returns a v2i32.  We are using here the
-// saturating multiply, as hexagon does not provide a non saturating
-// vector multiply, and saturation does not impact the result that is
-// in double precision of the operands.
-
-// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
-// with the C semantics for this one, this pattern uses the half word
-// multiply vmpyh that takes two v2i16 and returns a v2i32.  This is
-// then truncated to fit this back into a v2i16 and to simulate the
-// wrap around semantics for unsigned in C.
-def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
-                      (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
-
-def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
-         (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
-                             (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
-
-// Multiplies two v4i16 vectors.
-def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
-         (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
-                      (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
-
-def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
-  (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
-               (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
-
-// Multiplies two v4i8 vectors.
-def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
-         (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
-     Requires<[HasV5T]>;
-
-def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
-         (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
-
-// Multiplies two v8i8 vectors.
-def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
-         (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
-                      (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
-     Requires<[HasV5T]>;
-
-def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
-         (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
-                      (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
-
-
-class shuffler<SDNode Op, string Str>
-  : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
-      "$a = " # Str # "($b, $c)",
-      [(set (i64 DoubleRegs:$a),
-            (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
-      "", S_3op_tc_1_SLOT23>;
-
-def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
-  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
-
-def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
-def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
-def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
-def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
-
-class ShufflePat<InstHexagon MI, SDNode Op>
-  : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
-        (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
-
-// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
-def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
-
-// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
-def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
-
-// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
-def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
-
-// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
-def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
-
-
-// Truncated store from v4i16 to v4i8.
-def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
-                            (truncstore node:$val, node:$ptr),
-    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
-
-// Truncated store from v2i32 to v2i16.
-def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
-                             (truncstore node:$val, node:$ptr),
-    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
-
-def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
-         (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
-                                                      (LoReg $Rs))))>;
-
-def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
-         (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
-
-
-// Zero and sign extended load from v2i8 into v2i16.
-def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
-    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
-
-def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
-    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
-
-def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
-         (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
-
-def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
-         (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
 
-def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
-         (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
 
-def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
-         (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
index a319dd4f9789..d4f303bf6ff0 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsics.td
@@ -774,13 +774,13 @@ def: T_RR_pat<A2_combine_hl, int_hexagon_A2_combine_hl>;
 def: T_RR_pat<A2_combine_lh, int_hexagon_A2_combine_lh>;
 def: T_RR_pat<A2_combine_ll, int_hexagon_A2_combine_ll>;
 
-def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32ImmPred, s8ImmPred>;
+def: T_II_pat<A2_combineii, int_hexagon_A2_combineii, s32_0ImmPred, s8_0ImmPred>;
 
 // Mux
 def : T_QRR_pat<C2_mux,   int_hexagon_C2_mux>;
-def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32ImmPred>;
-def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32ImmPred>;
-def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32ImmPred, s8ImmPred>;
+def : T_QRI_pat<C2_muxir, int_hexagon_C2_muxir, s32_0ImmPred>;
+def : T_QIR_pat<C2_muxri, int_hexagon_C2_muxri, s32_0ImmPred>;
+def : T_QII_pat<C2_muxii, int_hexagon_C2_muxii, s32_0ImmPred, s8_0ImmPred>;
 
 // Shift halfword
 def : T_R_pat<A2_aslh, int_hexagon_A2_aslh>;
@@ -801,17 +801,15 @@ def : T_Q_RR_pat<C2_cmpeq,  int_hexagon_C2_cmpeq>;
 def : T_Q_RR_pat<C2_cmpgt,  int_hexagon_C2_cmpgt>;
 def : T_Q_RR_pat<C2_cmpgtu, int_hexagon_C2_cmpgtu>;
 
-def : T_Q_RI_pat<C2_cmpeqi,  int_hexagon_C2_cmpeqi, s32ImmPred>;
-def : T_Q_RI_pat<C2_cmpgti,  int_hexagon_C2_cmpgti, s32ImmPred>;
-def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32ImmPred>;
+def : T_Q_RI_pat<C2_cmpeqi,  int_hexagon_C2_cmpeqi, s32_0ImmPred>;
+def : T_Q_RI_pat<C2_cmpgti,  int_hexagon_C2_cmpgti, s32_0ImmPred>;
+def : T_Q_RI_pat<C2_cmpgtui, int_hexagon_C2_cmpgtui, u32_0ImmPred>;
 
-def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32ImmPred:$src2),
-           (C2_tfrpr (C2_cmpgti I32:$src1,
-                                (DEC_CONST_SIGNED s32ImmPred:$src2)))>;
+def : Pat <(int_hexagon_C2_cmpgei I32:$src1, s32_0ImmPred:$src2),
+           (C2_tfrpr (C2_cmpgti I32:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
 
-def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32ImmPred:$src2),
-           (C2_tfrpr (C2_cmpgtui I32:$src1,
-                                 (DEC_CONST_UNSIGNED u32ImmPred:$src2)))>;
+def : Pat <(int_hexagon_C2_cmpgeui I32:$src1, u32_0ImmPred:$src2),
+           (C2_tfrpr (C2_cmpgtui I32:$src1, (UDEC1 u32_0ImmPred:$src2)))>;
 
 def : Pat <(int_hexagon_C2_cmpgeui I32:$src, 0),
            (C2_tfrpr (C2_cmpeq I32:$src, I32:$src))>;
@@ -1104,14 +1102,14 @@ def : Pat<(i64 (int_hexagon_S2_insertp_rp I64:$src1, I64:$src2, I64:$src3)),
           (i64 (S2_insertp_rp I64:$src1, I64:$src2, I64:$src3))>;
 
 def : Pat<(int_hexagon_S2_insert I32:$src1, I32:$src2,
-                                 u5ImmPred:$src3, u5ImmPred:$src4),
+                                 u5_0ImmPred:$src3, u5_0ImmPred:$src4),
           (S2_insert I32:$src1, I32:$src2,
-                     u5ImmPred:$src3, u5ImmPred:$src4)>;
+                     u5_0ImmPred:$src3, u5_0ImmPred:$src4)>;
 
 def : Pat<(i64 (int_hexagon_S2_insertp I64:$src1, I64:$src2,
-                                       u6ImmPred:$src3, u6ImmPred:$src4)),
+                                       u6_0ImmPred:$src3, u6_0ImmPred:$src4)),
           (i64 (S2_insertp I64:$src1, I64:$src2,
-                           u6ImmPred:$src3, u6ImmPred:$src4))>;
+                           u6_0ImmPred:$src3, u6_0ImmPred:$src4))>;
 
 // Innterleave/deinterleave
 def : T_P_pat <S2_interleave, int_hexagon_S2_interleave>;
@@ -1239,10 +1237,19 @@ def : T_RI_pat <S2_asl_i_r_sat, int_hexagon_S2_asl_i_r_sat>;
 //===----------------------------------------------------------------------===//
 class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
                          SDNodeXForm XformImm>
-  : Pat <(IntID I32:$src1, I32:$src2, u4ImmPred:$src3, u5ImmPred:$src4),
-         (OutputInst I32:$src1, I32:$src2, u4ImmPred:$src3,
-                     (XformImm u5ImmPred:$src4))>;
+  : Pat <(IntID I32:$src1, I32:$src2, u4_0ImmPred:$src3, u5_0ImmPred:$src4),
+         (OutputInst I32:$src1, I32:$src2, u4_0ImmPred:$src3,
+                     (XformImm u5_0ImmPred:$src4))>;
 
+def SDEC2 : SDNodeXForm<imm, [{
+  int32_t V = N->getSExtValue();
+  return CurDAG->getTargetConstant(V-2, SDLoc(N), MVT::i32);
+}]>;
+
+def SDEC3 : SDNodeXForm<imm, [{
+  int32_t V = N->getSExtValue();
+  return CurDAG->getTargetConstant(V-3, SDLoc(N), MVT::i32);
+}]>;
 
 // Table Index : Extract and insert bits.
 // Map to the real hardware instructions after subtracting appropriate
@@ -1250,16 +1257,16 @@ class S2op_tableidx_pat <Intrinsic IntID, InstHexagon OutputInst,
 // needed for int_hexagon_S2_tableidxb_goodsyntax.
 
 def : Pat <(int_hexagon_S2_tableidxb_goodsyntax I32:$src1, I32:$src2,
-                                              u4ImmPred:$src3, u5ImmPred:$src4),
+                                              u4_0ImmPred:$src3, u5_0ImmPred:$src4),
            (S2_tableidxb I32:$src1, I32:$src2,
-                         u4ImmPred:$src3, u5ImmPred:$src4)>;
+                         u4_0ImmPred:$src3, u5_0ImmPred:$src4)>;
 
 def : S2op_tableidx_pat <int_hexagon_S2_tableidxh_goodsyntax, S2_tableidxh,
-                         DEC_CONST_SIGNED>;
+                         SDEC1>;
 def : S2op_tableidx_pat <int_hexagon_S2_tableidxw_goodsyntax, S2_tableidxw,
-                         DEC2_CONST_SIGNED>;
+                         SDEC2>;
 def : S2op_tableidx_pat <int_hexagon_S2_tableidxd_goodsyntax, S2_tableidxd,
-                         DEC3_CONST_SIGNED>;
+                         SDEC3>;
 
 //*******************************************************************
 //           STYPE/VH
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
index 4c28b28337f4..400c17333f73 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsDerived.td
@@ -20,21 +20,21 @@ def : Pat <(mul DoubleRegs:$src1, DoubleRegs:$src2),
            (EXTRACT_SUBREG
             (i64
              (M2_dpmpyuu_s0 (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1),
-                                          subreg_loreg)),
+                                          isub_lo)),
                      (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                          subreg_loreg)))),
-            subreg_hireg)),
-          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
-          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_hireg))),
-         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), subreg_loreg)),
-         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_hireg))),
+                                          isub_lo)))),
+            isub_hi)),
+          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)),
+          (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_hi))),
+         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2), isub_lo)),
+         (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_hi))),
         (i32
          (EXTRACT_SUBREG
           (i64
            (M2_dpmpyuu_s0 
-             (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), subreg_loreg)),
+             (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src1), isub_lo)),
                    (i32 (EXTRACT_SUBREG (i64 DoubleRegs:$src2),
-                                        subreg_loreg)))), subreg_loreg))))>;
+                                        isub_lo)))), isub_lo))))>;
 
 
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
index 578973db1933..2affe531515d 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV4.td
@@ -167,15 +167,15 @@ def : T_PPR_pat <A4_vrminuw, int_hexagon_A4_vrminuw>;
 
 // Rotate and reduce bytes
 def : Pat <(int_hexagon_S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2,
-                                     u2ImmPred:$src3),
-           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2ImmPred:$src3)>;
+                                     u2_0ImmPred:$src3),
+           (S4_vrcrotate DoubleRegs:$src1, IntRegs:$src2, u2_0ImmPred:$src3)>;
 
 // Rotate and reduce bytes with accumulation
 // Rxx+=vrcrotate(Rss,Rt,#u2)
 def : Pat <(int_hexagon_S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
-                                         IntRegs:$src3, u2ImmPred:$src4),
+                                         IntRegs:$src3, u2_0ImmPred:$src4),
            (S4_vrcrotate_acc DoubleRegs:$src1, DoubleRegs:$src2,
-                             IntRegs:$src3, u2ImmPred:$src4)>;
+                             IntRegs:$src3, u2_0ImmPred:$src4)>;
 
 // Vector conditional negate
 def : T_PPR_pat<S2_vrcnegh, int_hexagon_S2_vrcnegh>;
@@ -223,17 +223,17 @@ def: T_RR_pat<A4_orn,  int_hexagon_A4_orn>;
 //*******************************************************************
 
 // Combine Words Into Doublewords.
-def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32ImmPred>;
-def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32ImmPred>;
+def: T_RI_pat<A4_combineri, int_hexagon_A4_combineri, s32_0ImmPred>;
+def: T_IR_pat<A4_combineir, int_hexagon_A4_combineir, s32_0ImmPred>;
 
 //*******************************************************************
 //           ALU32/PRED
 //*******************************************************************
 
 // Compare
-def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32ImmPred>;
-def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32ImmPred>;
-def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32ImmPred>;
+def : T_Q_RI_pat<C4_cmpneqi, int_hexagon_C4_cmpneqi, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmpltei, int_hexagon_C4_cmpltei, s32_0ImmPred>;
+def : T_Q_RI_pat<C4_cmplteui, int_hexagon_C4_cmplteui, u32_0ImmPred>;
 
 // Compare To General Register.
 def: T_Q_RR_pat<C4_cmpneq,  int_hexagon_C4_cmpneq>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
index 82bc91bb3021..a45e1c9d7be4 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIntrinsicsV60.td
@@ -12,72 +12,21 @@
 //===----------------------------------------------------------------------===//
 
 
-let isCodeGenOnly = 1 in {
-def HEXAGON_V6_vd0_pseudo : CVI_VA_Resource<(outs VectorRegs:$dst),
-    (ins ),
-    "$dst=#0",
-    [(set VectorRegs:$dst, (int_hexagon_V6_vd0 ))]>;
-
-def HEXAGON_V6_vd0_pseudo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
-    (ins ),
-    "$dst=#0",
-    [(set VectorRegs128B:$dst, (int_hexagon_V6_vd0_128B ))]>;
-}
-
-let isPseudo = 1 in
-def HEXAGON_V6_vassignp : CVI_VA_Resource<(outs VecDblRegs:$dst),
-    (ins VecDblRegs:$src1),
-    "$dst=vassignp_W($src1)",
-    [(set VecDblRegs:$dst, (int_hexagon_V6_vassignp VecDblRegs:$src1))]>;
-
-let isPseudo = 1 in
-def HEXAGON_V6_vassignp_128B : CVI_VA_Resource<(outs VecDblRegs128B:$dst),
-    (ins VecDblRegs128B:$src1),
-    "$dst=vassignp_W_128B($src1)",
-    [(set VecDblRegs128B:$dst, (int_hexagon_V6_vassignp_128B
-                                VecDblRegs128B:$src1))]>;
-
-let isPseudo = 1 in
-def HEXAGON_V6_lo : CVI_VA_Resource<(outs VectorRegs:$dst),
-    (ins VecDblRegs:$src1),
-    "$dst=lo_W($src1)",
-    [(set VectorRegs:$dst, (int_hexagon_V6_lo VecDblRegs:$src1))]>;
-
-let isPseudo = 1 in
-def HEXAGON_V6_hi : CVI_VA_Resource<(outs VectorRegs:$dst),
-    (ins VecDblRegs:$src1),
-    "$dst=hi_W($src1)",
-    [(set VectorRegs:$dst, (int_hexagon_V6_hi VecDblRegs:$src1))]>;
-
-let isPseudo = 1 in
-def HEXAGON_V6_lo_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
-    (ins VecDblRegs128B:$src1),
-    "$dst=lo_W($src1)",
-    [(set VectorRegs128B:$dst, (int_hexagon_V6_lo_128B VecDblRegs128B:$src1))]>;
-
-let isPseudo = 1 in
-def HEXAGON_V6_hi_128B : CVI_VA_Resource<(outs VectorRegs128B:$dst),
-    (ins VecDblRegs128B:$src1),
-    "$dst=hi_W($src1)",
-    [(set VectorRegs128B:$dst, (int_hexagon_V6_hi_128B VecDblRegs128B:$src1))]>;
-
 let AddedComplexity = 100 in {
 def : Pat < (v16i32 (int_hexagon_V6_lo (v32i32 VecDblRegs:$src1))),
-            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_loreg)) >,
+            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), vsub_lo)) >,
             Requires<[UseHVXSgl]>;
 
 def : Pat < (v16i32 (int_hexagon_V6_hi (v32i32 VecDblRegs:$src1))),
-            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), subreg_hireg)) >,
+            (v16i32 (EXTRACT_SUBREG (v32i32 VecDblRegs:$src1), vsub_hi)) >,
             Requires<[UseHVXSgl]>;
 
 def : Pat < (v32i32 (int_hexagon_V6_lo_128B (v64i32 VecDblRegs128B:$src1))),
-            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
-                                     subreg_loreg)) >,
+            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), vsub_lo)) >,
             Requires<[UseHVXDbl]>;
 
 def : Pat < (v32i32 (int_hexagon_V6_hi_128B (v64i32 VecDblRegs128B:$src1))),
-            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1),
-                                     subreg_hireg)) >,
+            (v32i32 (EXTRACT_SUBREG (v64i32 VecDblRegs128B:$src1), vsub_hi)) >,
             Requires<[UseHVXDbl]>;
 }
 
@@ -204,6 +153,16 @@ multiclass T_V_pat <InstHexagon MI, Intrinsic IntID> {
        Requires<[UseHVXDbl]>;
 }
 
+multiclass T_W_pat <InstHexagon MI, Intrinsic IntID> {
+  def: Pat<(IntID VecDblRegs:$src1),
+           (MI    VecDblRegs:$src1)>,
+       Requires<[UseHVXSgl]>;
+
+  def: Pat<(!cast<Intrinsic>(IntID#"_128B") VecDblRegs128B:$src1),
+           (!cast<InstHexagon>(MI#"_128B") VecDblRegs128B:$src1)>,
+       Requires<[UseHVXDbl]>;
+}
+
 multiclass T_Q_pat <InstHexagon MI, Intrinsic IntID> {
   def: Pat<(IntID VecPredRegs:$src1),
            (MI    VecPredRegs:$src1)>,
@@ -495,7 +454,7 @@ multiclass T_WVVR_pat <InstHexagon MI, Intrinsic IntID> {
        Requires<[UseHVXDbl]>;
 }
 
-defm : T_WR_pat<V6_vtmpyb, int_hexagon_V6_vtmpyb>;
+defm : T_WR_pat <V6_vtmpyb, int_hexagon_V6_vtmpyb>;
 defm : T_WR_pat <V6_vtmpybus, int_hexagon_V6_vtmpybus>;
 defm : T_VR_pat <V6_vdmpyhb, int_hexagon_V6_vdmpyhb>;
 defm : T_VR_pat <V6_vrmpyub, int_hexagon_V6_vrmpyub>;
@@ -751,6 +710,10 @@ defm : T_V_pat <V6_vcl0h, int_hexagon_V6_vcl0h>;
 defm : T_V_pat <V6_vnormamtw, int_hexagon_V6_vnormamtw>;
 defm : T_V_pat <V6_vnormamth, int_hexagon_V6_vnormamth>;
 
+defm : T_W_pat <V6_lo, int_hexagon_V6_lo>;
+defm : T_W_pat <V6_hi, int_hexagon_V6_hi>;
+defm : T_W_pat <V6_vassignp, int_hexagon_V6_vassignp>;
+
 defm : T_WRI_pat <V6_vrmpybusi, int_hexagon_V6_vrmpybusi>;
 defm : T_WRI_pat <V6_vrsadubi, int_hexagon_V6_vrsadubi>;
 defm : T_WRI_pat <V6_vrmpyubi, int_hexagon_V6_vrmpyubi>;
@@ -831,8 +794,10 @@ def : T_PPQ_pat <S2_cabacencbin, int_hexagon_S2_cabacencbin>;
 
 def: Pat<(v64i16 (trunc v64i32:$Vdd)),
          (v64i16 (V6_vpackwh_sat_128B
-                 (v32i32 (HEXAGON_V6_hi_128B VecDblRegs128B:$Vdd)),
-                 (v32i32 (HEXAGON_V6_lo_128B VecDblRegs128B:$Vdd))))>,
+                 (v32i32 (V6_hi_128B VecDblRegs128B:$Vdd)),
+                 (v32i32 (V6_lo_128B VecDblRegs128B:$Vdd))))>,
      Requires<[UseHVXDbl]>;
 
+def: Pat<(int_hexagon_V6_vd0),      (V6_vd0)>;
+def: Pat<(int_hexagon_V6_vd0_128B), (V6_vd0_128B)>;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
index 0ca95e999859..ebedf2cbaf17 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonIsetDx.td
@@ -13,9 +13,9 @@
 
 // SA1_combine1i: Combines.
 let isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_combine1i: SUBInst <
+def SA1_combine1i: SUBInst <
   (outs DoubleRegs:$Rdd),
-  (ins u2Imm:$u2),
+  (ins u2_0Imm:$u2),
   "$Rdd = combine(#1, #$u2)"> {
     bits<3> Rdd;
     bits<2> u2;
@@ -30,7 +30,7 @@ def V4_SA1_combine1i: SUBInst <
 // SL2_jumpr31_f: Indirect conditional jump if false.
 // SL2_jumpr31_f -> SL2_jumpr31_fnew
 let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def V4_SL2_jumpr31_f: SUBInst <
+def SL2_jumpr31_f: SUBInst <
   (outs ),
   (ins ),
   "if (!p0) jumpr r31"> {
@@ -40,7 +40,7 @@ def V4_SL2_jumpr31_f: SUBInst <
 
 // SL2_deallocframe: Deallocate stack frame.
 let Defs = [R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
-def V4_SL2_deallocframe: SUBInst <
+def SL2_deallocframe: SUBInst <
   (outs ),
   (ins ),
   "deallocframe"> {
@@ -51,7 +51,7 @@ def V4_SL2_deallocframe: SUBInst <
 // SL2_return_f: Deallocate stack frame and return.
 // SL2_return_f -> SL2_return_fnew
 let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def V4_SL2_return_f: SUBInst <
+def SL2_return_f: SUBInst <
   (outs ),
   (ins ),
   "if (!p0) dealloc_return"> {
@@ -61,9 +61,9 @@ def V4_SL2_return_f: SUBInst <
 
 // SA1_combine3i: Combines.
 let isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_combine3i: SUBInst <
+def SA1_combine3i: SUBInst <
   (outs DoubleRegs:$Rdd),
-  (ins u2Imm:$u2),
+  (ins u2_0Imm:$u2),
   "$Rdd = combine(#3, #$u2)"> {
     bits<3> Rdd;
     bits<2> u2;
@@ -77,7 +77,7 @@ def V4_SA1_combine3i: SUBInst <
 
 // SS2_storebi0: Store byte.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def V4_SS2_storebi0: SUBInst <
+def SS2_storebi0: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u4_0Imm:$u4_0),
   "memb($Rs + #$u4_0)=#0"> {
@@ -91,10 +91,10 @@ def V4_SS2_storebi0: SUBInst <
 
 // SA1_clrtnew: Clear if true.
 let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_clrtnew: SUBInst <
+def SA1_clrtnew: SUBInst <
   (outs IntRegs:$Rd),
-  (ins ),
-  "if (p0.new) $Rd = #0"> {
+  (ins PredRegs:$Pu),
+  "if ($Pu.new) $Rd = #0"> {
     bits<4> Rd;
 
     let Inst{12-9} = 0b1101;
@@ -104,7 +104,7 @@ def V4_SA1_clrtnew: SUBInst <
 
 // SL2_loadruh_io: Load half.
 let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
-def V4_SL2_loadruh_io: SUBInst <
+def SL2_loadruh_io: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs, u3_1Imm:$u3_1),
   "$Rd = memuh($Rs + #$u3_1)"> {
@@ -120,7 +120,7 @@ def V4_SL2_loadruh_io: SUBInst <
 
 // SL2_jumpr31_tnew: Indirect conditional jump if true.
 let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def V4_SL2_jumpr31_tnew: SUBInst <
+def SL2_jumpr31_tnew: SUBInst <
   (outs ),
   (ins ),
   "if (p0.new) jumpr:nt r31"> {
@@ -130,9 +130,9 @@ def V4_SL2_jumpr31_tnew: SUBInst <
 
 // SA1_addi: Add.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 1, opExtentBits = 7, opExtendable = 2 in
-def V4_SA1_addi: SUBInst <
+def SA1_addi: SUBInst <
   (outs IntRegs:$Rx),
-  (ins IntRegs:$_src_, s7Ext:$s7),
+  (ins IntRegs:$_src_, s7_0Ext:$s7),
   "$Rx = add($_src_, #$s7)" ,
   [] ,
   "$_src_ = $Rx"> {
@@ -146,7 +146,7 @@ def V4_SA1_addi: SUBInst <
 
 // SL1_loadrub_io: Load byte.
 let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
-def V4_SL1_loadrub_io: SUBInst <
+def SL1_loadrub_io: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs, u4_0Imm:$u4_0),
   "$Rd = memub($Rs + #$u4_0)"> {
@@ -162,7 +162,7 @@ def V4_SL1_loadrub_io: SUBInst <
 
 // SL1_loadri_io: Load word.
 let isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
-def V4_SL1_loadri_io: SUBInst <
+def SL1_loadri_io: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs, u4_2Imm:$u4_2),
   "$Rd = memw($Rs + #$u4_2)"> {
@@ -178,9 +178,9 @@ def V4_SL1_loadri_io: SUBInst <
 
 // SA1_cmpeqi: Compareimmed.
 let Defs = [P0], isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_cmpeqi: SUBInst <
+def SA1_cmpeqi: SUBInst <
   (outs ),
-  (ins IntRegs:$Rs, u2Imm:$u2),
+  (ins IntRegs:$Rs, u2_0Imm:$u2),
   "p0 = cmp.eq($Rs, #$u2)"> {
     bits<4> Rs;
     bits<2> u2;
@@ -192,7 +192,7 @@ def V4_SA1_cmpeqi: SUBInst <
 
 // SA1_combinerz: Combines.
 let isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_combinerz: SUBInst <
+def SA1_combinerz: SUBInst <
   (outs DoubleRegs:$Rdd),
   (ins IntRegs:$Rs),
   "$Rdd = combine($Rs, #0)"> {
@@ -209,7 +209,7 @@ def V4_SA1_combinerz: SUBInst <
 // SL2_return_t: Deallocate stack frame and return.
 // SL2_return_t -> SL2_return_tnew
 let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def V4_SL2_return_t: SUBInst <
+def SL2_return_t: SUBInst <
   (outs ),
   (ins ),
   "if (p0) dealloc_return"> {
@@ -219,7 +219,7 @@ def V4_SL2_return_t: SUBInst <
 
 // SS2_allocframe: Allocate stack frame.
 let Defs = [R29, R30], Uses = [R30, R31, R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
-def V4_SS2_allocframe: SUBInst <
+def SS2_allocframe: SUBInst <
   (outs ),
   (ins u5_3Imm:$u5_3),
   "allocframe(#$u5_3)"> {
@@ -231,7 +231,7 @@ def V4_SS2_allocframe: SUBInst <
 
 // SS2_storeh_io: Store half.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = HalfWordAccess in
-def V4_SS2_storeh_io: SUBInst <
+def SS2_storeh_io: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u3_1Imm:$u3_1, IntRegs:$Rt),
   "memh($Rs + #$u3_1) = $Rt"> {
@@ -247,7 +247,7 @@ def V4_SS2_storeh_io: SUBInst <
 
 // SS2_storewi0: Store word.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def V4_SS2_storewi0: SUBInst <
+def SS2_storewi0: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u4_2Imm:$u4_2),
   "memw($Rs + #$u4_2)=#0"> {
@@ -261,7 +261,7 @@ def V4_SS2_storewi0: SUBInst <
 
 // SS2_storewi1: Store word.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def V4_SS2_storewi1: SUBInst <
+def SS2_storewi1: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u4_2Imm:$u4_2),
   "memw($Rs + #$u4_2)=#1"> {
@@ -275,7 +275,7 @@ def V4_SS2_storewi1: SUBInst <
 
 // SL2_jumpr31: Indirect conditional jump if true.
 let Defs = [PC], Uses = [R31], isCodeGenOnly = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def V4_SL2_jumpr31: SUBInst <
+def SL2_jumpr31: SUBInst <
   (outs ),
   (ins ),
   "jumpr r31"> {
@@ -285,7 +285,7 @@ def V4_SL2_jumpr31: SUBInst <
 
 // SA1_combinezr: Combines.
 let isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_combinezr: SUBInst <
+def SA1_combinezr: SUBInst <
   (outs DoubleRegs:$Rdd),
   (ins IntRegs:$Rs),
   "$Rdd = combine(#0, $Rs)"> {
@@ -301,7 +301,7 @@ def V4_SA1_combinezr: SUBInst <
 
 // SL2_loadrh_io: Load half.
 let isCodeGenOnly = 1, mayLoad = 1, accessSize = HalfWordAccess, hasNewValue = 1, opNewValue = 0 in
-def V4_SL2_loadrh_io: SUBInst <
+def SL2_loadrh_io: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs, u3_1Imm:$u3_1),
   "$Rd = memh($Rs + #$u3_1)"> {
@@ -317,7 +317,7 @@ def V4_SL2_loadrh_io: SUBInst <
 
 // SA1_addrx: Add.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_addrx: SUBInst <
+def SA1_addrx: SUBInst <
   (outs IntRegs:$Rx),
   (ins IntRegs:$_src_, IntRegs:$Rs),
   "$Rx = add($_src_, $Rs)" ,
@@ -333,10 +333,10 @@ def V4_SA1_addrx: SUBInst <
 
 // SA1_setin1: Set to -1.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_setin1: SUBInst <
+def SA1_setin1: SUBInst <
   (outs IntRegs:$Rd),
   (ins ),
-  "$Rd = #-1"> {
+  "$Rd = #{-1}"> {
     bits<4> Rd;
 
     let Inst{12-9} = 0b1101;
@@ -346,7 +346,7 @@ def V4_SA1_setin1: SUBInst <
 
 // SA1_sxth: Sxth.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_sxth: SUBInst <
+def SA1_sxth: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = sxth($Rs)"> {
@@ -360,9 +360,9 @@ def V4_SA1_sxth: SUBInst <
 
 // SA1_combine0i: Combines.
 let isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_combine0i: SUBInst <
+def SA1_combine0i: SUBInst <
   (outs DoubleRegs:$Rdd),
-  (ins u2Imm:$u2),
+  (ins u2_0Imm:$u2),
   "$Rdd = combine(#0, #$u2)"> {
     bits<3> Rdd;
     bits<2> u2;
@@ -376,9 +376,9 @@ def V4_SA1_combine0i: SUBInst <
 
 // SA1_combine2i: Combines.
 let isCodeGenOnly = 1, hasSideEffects = 0 in
-def V4_SA1_combine2i: SUBInst <
+def SA1_combine2i: SUBInst <
   (outs DoubleRegs:$Rdd),
-  (ins u2Imm:$u2),
+  (ins u2_0Imm:$u2),
   "$Rdd = combine(#2, #$u2)"> {
     bits<3> Rdd;
     bits<2> u2;
@@ -392,7 +392,7 @@ def V4_SA1_combine2i: SUBInst <
 
 // SA1_sxtb: Sxtb.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_sxtb: SUBInst <
+def SA1_sxtb: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = sxtb($Rs)"> {
@@ -407,10 +407,10 @@ def V4_SA1_sxtb: SUBInst <
 // SA1_clrf: Clear if false.
 // SA1_clrf -> SA1_clrfnew
 let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_clrf: SUBInst <
+def SA1_clrf: SUBInst <
   (outs IntRegs:$Rd),
-  (ins ),
-  "if (!p0) $Rd = #0"> {
+  (ins PredRegs:$Pu),
+  "if (!$Pu) $Rd = #0"> {
     bits<4> Rd;
 
     let Inst{12-9} = 0b1101;
@@ -420,7 +420,7 @@ def V4_SA1_clrf: SUBInst <
 
 // SL2_loadrb_io: Load byte.
 let isCodeGenOnly = 1, mayLoad = 1, accessSize = ByteAccess, hasNewValue = 1, opNewValue = 0 in
-def V4_SL2_loadrb_io: SUBInst <
+def SL2_loadrb_io: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs, u3_0Imm:$u3_0),
   "$Rd = memb($Rs + #$u3_0)"> {
@@ -436,7 +436,7 @@ def V4_SL2_loadrb_io: SUBInst <
 
 // SA1_tfr: Tfr.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_tfr: SUBInst <
+def SA1_tfr: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = $Rs"> {
@@ -450,7 +450,7 @@ def V4_SA1_tfr: SUBInst <
 
 // SL2_loadrd_sp: Load dword.
 let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess in
-def V4_SL2_loadrd_sp: SUBInst <
+def SL2_loadrd_sp: SUBInst <
   (outs DoubleRegs:$Rdd),
   (ins u5_3Imm:$u5_3),
   "$Rdd = memd(r29 + #$u5_3)"> {
@@ -464,7 +464,7 @@ def V4_SL2_loadrd_sp: SUBInst <
 
 // SA1_and1: And #1.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_and1: SUBInst <
+def SA1_and1: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = and($Rs, #1)"> {
@@ -478,7 +478,7 @@ def V4_SA1_and1: SUBInst <
 
 // SS2_storebi1: Store byte.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def V4_SS2_storebi1: SUBInst <
+def SS2_storebi1: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u4_0Imm:$u4_0),
   "memb($Rs + #$u4_0)=#1"> {
@@ -492,7 +492,7 @@ def V4_SS2_storebi1: SUBInst <
 
 // SA1_inc: Inc.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_inc: SUBInst <
+def SA1_inc: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = add($Rs, #1)"> {
@@ -506,7 +506,7 @@ def V4_SA1_inc: SUBInst <
 
 // SS2_stored_sp: Store dword.
 let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = DoubleWordAccess in
-def V4_SS2_stored_sp: SUBInst <
+def SS2_stored_sp: SUBInst <
   (outs ),
   (ins s6_3Imm:$s6_3, DoubleRegs:$Rtt),
   "memd(r29 + #$s6_3) = $Rtt"> {
@@ -520,7 +520,7 @@ def V4_SS2_stored_sp: SUBInst <
 
 // SS2_storew_sp: Store word.
 let Uses = [R29], isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def V4_SS2_storew_sp: SUBInst <
+def SS2_storew_sp: SUBInst <
   (outs ),
   (ins u5_2Imm:$u5_2, IntRegs:$Rt),
   "memw(r29 + #$u5_2) = $Rt"> {
@@ -534,7 +534,7 @@ def V4_SS2_storew_sp: SUBInst <
 
 // SL2_jumpr31_fnew: Indirect conditional jump if false.
 let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def V4_SL2_jumpr31_fnew: SUBInst <
+def SL2_jumpr31_fnew: SUBInst <
   (outs ),
   (ins ),
   "if (!p0.new) jumpr:nt r31"> {
@@ -545,10 +545,10 @@ def V4_SL2_jumpr31_fnew: SUBInst <
 // SA1_clrt: Clear if true.
 // SA1_clrt -> SA1_clrtnew
 let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_clrt: SUBInst <
+def SA1_clrt: SUBInst <
   (outs IntRegs:$Rd),
-  (ins ),
-  "if (p0) $Rd = #0"> {
+  (ins PredRegs:$Pu),
+  "if ($Pu) $Rd = #0"> {
     bits<4> Rd;
 
     let Inst{12-9} = 0b1101;
@@ -558,7 +558,7 @@ def V4_SA1_clrt: SUBInst <
 
 // SL2_return: Deallocate stack frame and return.
 let Defs = [PC, R31, R29, R30], Uses = [R30], isCodeGenOnly = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def V4_SL2_return: SUBInst <
+def SL2_return: SUBInst <
   (outs ),
   (ins ),
   "dealloc_return"> {
@@ -568,10 +568,10 @@ def V4_SL2_return: SUBInst <
 
 // SA1_dec: Dec.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_dec: SUBInst <
+def SA1_dec: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
-  "$Rd = add($Rs,#-1)"> {
+  "$Rd = add($Rs,#{-1})"> {
     bits<4> Rd;
     bits<4> Rs;
 
@@ -582,9 +582,9 @@ def V4_SA1_dec: SUBInst <
 
 // SA1_seti: Set immed.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0, isExtendable = 1, isExtentSigned = 0, opExtentBits = 6, opExtendable = 1 in
-def V4_SA1_seti: SUBInst <
+def SA1_seti: SUBInst <
   (outs IntRegs:$Rd),
-  (ins u6Ext:$u6),
+  (ins u6_0Ext:$u6),
   "$Rd = #$u6"> {
     bits<4> Rd;
     bits<6> u6;
@@ -597,7 +597,7 @@ def V4_SA1_seti: SUBInst <
 // SL2_jumpr31_t: Indirect conditional jump if true.
 // SL2_jumpr31_t -> SL2_jumpr31_tnew
 let Defs = [PC], Uses = [P0, R31], isCodeGenOnly = 1, isPredicated = 1, isBranch = 1, isIndirectBranch = 1, hasSideEffects = 0 in
-def V4_SL2_jumpr31_t: SUBInst <
+def SL2_jumpr31_t: SUBInst <
   (outs ),
   (ins ),
   "if (p0) jumpr r31"> {
@@ -607,10 +607,10 @@ def V4_SL2_jumpr31_t: SUBInst <
 
 // SA1_clrfnew: Clear if false.
 let Uses = [P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_clrfnew: SUBInst <
+def SA1_clrfnew: SUBInst <
   (outs IntRegs:$Rd),
-  (ins ),
-  "if (!p0.new) $Rd = #0"> {
+  (ins PredRegs:$Pu),
+  "if (!$Pu.new) $Rd = #0"> {
     bits<4> Rd;
 
     let Inst{12-9} = 0b1101;
@@ -620,7 +620,7 @@ def V4_SA1_clrfnew: SUBInst <
 
 // SS1_storew_io: Store word.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = WordAccess in
-def V4_SS1_storew_io: SUBInst <
+def SS1_storew_io: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u4_2Imm:$u4_2, IntRegs:$Rt),
   "memw($Rs + #$u4_2) = $Rt"> {
@@ -636,7 +636,7 @@ def V4_SS1_storew_io: SUBInst <
 
 // SA1_zxtb: Zxtb.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_zxtb: SUBInst <
+def SA1_zxtb: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = and($Rs, #255)"> {
@@ -650,7 +650,7 @@ def V4_SA1_zxtb: SUBInst <
 
 // SA1_addsp: Add.
 let Uses = [R29], isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_addsp: SUBInst <
+def SA1_addsp: SUBInst <
   (outs IntRegs:$Rd),
   (ins u6_2Imm:$u6_2),
   "$Rd = add(r29, #$u6_2)"> {
@@ -664,7 +664,7 @@ def V4_SA1_addsp: SUBInst <
 
 // SL2_loadri_sp: Load word.
 let Uses = [R29], isCodeGenOnly = 1, mayLoad = 1, accessSize = WordAccess, hasNewValue = 1, opNewValue = 0 in
-def V4_SL2_loadri_sp: SUBInst <
+def SL2_loadri_sp: SUBInst <
   (outs IntRegs:$Rd),
   (ins u5_2Imm:$u5_2),
   "$Rd = memw(r29 + #$u5_2)"> {
@@ -678,7 +678,7 @@ def V4_SL2_loadri_sp: SUBInst <
 
 // SS1_storeb_io: Store byte.
 let isCodeGenOnly = 1, mayStore = 1, accessSize = ByteAccess in
-def V4_SS1_storeb_io: SUBInst <
+def SS1_storeb_io: SUBInst <
   (outs ),
   (ins IntRegs:$Rs, u4_0Imm:$u4_0, IntRegs:$Rt),
   "memb($Rs + #$u4_0) = $Rt"> {
@@ -694,7 +694,7 @@ def V4_SS1_storeb_io: SUBInst <
 
 // SL2_return_tnew: Deallocate stack frame and return.
 let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def V4_SL2_return_tnew: SUBInst <
+def SL2_return_tnew: SUBInst <
   (outs ),
   (ins ),
   "if (p0.new) dealloc_return:nt"> {
@@ -704,7 +704,7 @@ def V4_SL2_return_tnew: SUBInst <
 
 // SL2_return_fnew: Deallocate stack frame and return.
 let Defs = [PC, R31, R29, R30], Uses = [R30, P0], isCodeGenOnly = 1, isPredicated = 1, isPredicatedFalse = 1, isPredicatedNew = 1, mayLoad = 1, accessSize = DoubleWordAccess, isBranch = 1, isIndirectBranch = 1 in
-def V4_SL2_return_fnew: SUBInst <
+def SL2_return_fnew: SUBInst <
   (outs ),
   (ins ),
   "if (!p0.new) dealloc_return:nt"> {
@@ -714,7 +714,7 @@ def V4_SL2_return_fnew: SUBInst <
 
 // SA1_zxth: Zxth.
 let isCodeGenOnly = 1, hasSideEffects = 0, hasNewValue = 1, opNewValue = 0 in
-def V4_SA1_zxth: SUBInst <
+def SA1_zxth: SUBInst <
   (outs IntRegs:$Rd),
   (ins IntRegs:$Rs),
   "$Rd = zxth($Rs)"> {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
index 26c5b63fec6c..371b52108b9b 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.h
@@ -29,7 +29,6 @@ class HexagonMachineFunctionInfo : public MachineFunctionInfo {
   unsigned SRetReturnReg;
   unsigned StackAlignBaseVReg;    // Aligned-stack base register (virtual)
   unsigned StackAlignBasePhysReg; //                             (physical)
-  std::vector<MachineInstr*> AllocaAdjustInsts;
   int VarArgsFrameIndex;
   bool HasClobberLR;
   bool HasEHReturn;
@@ -47,13 +46,6 @@ public:
   unsigned getSRetReturnReg() const { return SRetReturnReg; }
   void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
 
-  void addAllocaAdjustInst(MachineInstr* MI) {
-    AllocaAdjustInsts.push_back(MI);
-  }
-  const std::vector<MachineInstr*>& getAllocaAdjustInsts() {
-    return AllocaAdjustInsts;
-  }
-
   void setVarArgsFrameIndex(int v) { VarArgsFrameIndex = v; }
   int getVarArgsFrameIndex() { return VarArgsFrameIndex; }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
index 6dcac0dc7ee2..9ff9d93ea0c3 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.cpp
@@ -51,6 +51,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "misched"
 
+namespace {
 class HexagonCallMutation : public ScheduleDAGMutation {
 public:
   void apply(ScheduleDAGInstrs *DAG) override;
@@ -58,6 +59,7 @@ private:
   bool shouldTFRICallBind(const HexagonInstrInfo &HII,
                           const SUnit &Inst1, const SUnit &Inst2) const;
 };
+} // end anonymous namespace
 
 // Check if a call and subsequent A2_tfrpi instructions should maintain
 // scheduling affinity. We are looking for the TFRI to be consumed in
@@ -72,7 +74,7 @@ bool HexagonCallMutation::shouldTFRICallBind(const HexagonInstrInfo &HII,
     return false;
 
   // TypeXTYPE are 64 bit operations.
-  if (HII.getType(Inst2.getInstr()) == HexagonII::TypeXTYPE)
+  if (HII.getType(*Inst2.getInstr()) == HexagonII::TypeXTYPE)
     return true;
   return false;
 }
@@ -168,7 +170,7 @@ bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
       continue;
 
     // Enable .cur formation.
-    if (QII.mayBeCurLoad(Packet[i]->getInstr()))
+    if (QII.mayBeCurLoad(*Packet[i]->getInstr()))
       continue;
 
     for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
@@ -616,7 +618,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
   if (!SU || SU->isScheduled)
     return ResCount;
 
-  MachineInstr *Instr = SU->getInstr();
+  MachineInstr &Instr = *SU->getInstr();
 
   DEBUG(if (verbose) dbgs() << ((Q.getID() == TopQID) ? "(top|" : "(bot|"));
   // Forced priority is high.
@@ -705,7 +707,7 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
   // available for it.
   auto &QST = DAG->MF.getSubtarget<HexagonSubtarget>();
   auto &QII = *QST.getInstrInfo();
-  if (SU->isInstr() && QII.mayBeCurLoad(SU->getInstr())) {
+  if (SU->isInstr() && QII.mayBeCurLoad(*SU->getInstr())) {
     if (Q.getID() == TopQID && Top.ResourceModel->isResourceAvailable(SU)) {
       ResCount += PriorityTwo;
       DEBUG(if (verbose) dbgs() << "C|");
@@ -744,11 +746,11 @@ int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
     // Check for stalls in the previous packet.
     if (Q.getID() == TopQID) {
       for (auto J : Top.ResourceModel->OldPacket)
-        if (QII.producesStall(J->getInstr(), Instr))
+        if (QII.producesStall(*J->getInstr(), Instr))
           ResCount -= PriorityOne;
     } else {
       for (auto J : Bot.ResourceModel->OldPacket)
-        if (QII.producesStall(Instr, J->getInstr()))
+        if (QII.producesStall(Instr, *J->getInstr()))
           ResCount -= PriorityOne;
     }
   }
@@ -841,8 +843,8 @@ pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
       const MachineInstr *CandI = Candidate.SU->getInstr();
       const InstrItineraryData *InstrItins = QST.getInstrItineraryData();
 
-      unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, MI);
-      unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, CandI);
+      unsigned InstrLatency = QII.getInstrTimingClassLatency(InstrItins, *MI);
+      unsigned CandLatency = QII.getInstrTimingClassLatency(InstrItins, *CandI);
       DEBUG(dbgs() << "TC Tie Breaker Cand: "
                    << CandLatency << " Instr:" << InstrLatency << "\n"
                    << *MI << *CandI << "\n");
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
index 51c84a4cee31..dc10028c0424 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonMachineScheduler.h
@@ -94,9 +94,7 @@ public:
   void savePacket();
   unsigned getTotalPackets() const { return TotalPackets; }
 
-  bool isInPacket(SUnit *SU) const {
-    return std::find(Packet.begin(), Packet.end(), SU) != Packet.end();
-  }
+  bool isInPacket(SUnit *SU) const { return is_contained(Packet, SU); }
 };
 
 /// Extend the standard ScheduleDAGMI to provide more context and override the
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
index 3ffb9cffc6a6..72d8011277e6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonNewValueJump.cpp
@@ -29,7 +29,6 @@
 #include "HexagonTargetMachine.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -79,14 +78,12 @@ namespace {
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
-    const char *getPassName() const override {
-      return "Hexagon NewValueJump";
-    }
+    StringRef getPassName() const override { return "Hexagon NewValueJump"; }
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -180,7 +177,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
     return false;
 
   // if call in path, bail out.
-  if (MII->getOpcode() == Hexagon::J2_call)
+  if (MII->isCall())
     return false;
 
   // if NVJ is running prior to RA, do the following checks.
@@ -189,9 +186,9 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
     // to new value jump. If they are in the path, bail out.
     // KILL sets kill flag on the opcode. It also sets up a
     // single register, out of pair.
-    //    %D0<def> = Hexagon_S2_lsr_r_p %D0<kill>, %R2<kill>
+    //    %D0<def> = S2_lsr_r_p %D0<kill>, %R2<kill>
     //    %R0<def> = KILL %R0, %D0<imp-use,kill>
-    //    %P0<def> = CMPEQri %R0<kill>, 0
+    //    %P0<def> = C2_cmpeqi %R0<kill>, 0
     // PHI can be anything after RA.
     // COPY can remateriaze things in between feeder, compare and nvj.
     if (MII->getOpcode() == TargetOpcode::KILL ||
@@ -203,7 +200,7 @@ static bool commonChecksToProhibitNewValueJump(bool afterRA,
     // of registers by individual passes in the backend. At this time,
     // we don't know the scope of usage and definitions of these
     // instructions.
-    if (MII->getOpcode() == Hexagon::LDriw_pred     ||
+    if (MII->getOpcode() == Hexagon::LDriw_pred ||
         MII->getOpcode() == Hexagon::STriw_pred)
       return false;
   }
@@ -226,10 +223,23 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
   // range specified by the arch.
   if (!secondReg) {
     int64_t v = MI.getOperand(2).getImm();
+    bool Valid = false;
 
-    if (!(isUInt<5>(v) || ((MI.getOpcode() == Hexagon::C2_cmpeqi ||
-                            MI.getOpcode() == Hexagon::C2_cmpgti) &&
-                           (v == -1))))
+    switch (MI.getOpcode()) {
+      case Hexagon::C2_cmpeqi:
+      case Hexagon::C2_cmpgti:
+        Valid = (isUInt<5>(v) || v == -1);
+        break;
+      case Hexagon::C2_cmpgtui:
+        Valid = isUInt<5>(v);
+        break;
+      case Hexagon::S2_tstbit_i:
+      case Hexagon::S4_ntstbit_i:
+        Valid = (v == 0);
+        break;
+    }
+
+    if (!Valid)
       return false;
   }
 
@@ -239,6 +249,11 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
   if (secondReg) {
     cmpOp2 = MI.getOperand(2).getReg();
 
+    // If the same register appears as both operands, we cannot generate a new
+    // value compare. Only one operand may use the .new suffix.
+    if (cmpReg1 == cmpOp2)
+      return false;
+
     // Make sure that that second register is not from COPY
     // At machine code level, we don't need this, but if we decide
     // to move new value jump prior to RA, we would be needing this.
@@ -255,6 +270,8 @@ static bool canCompareBeNewValueJump(const HexagonInstrInfo *QII,
   ++II ;
   for (MachineBasicBlock::iterator localII = II; localII != end;
        ++localII) {
+    if (localII->isDebugValue())
+      continue;
 
     // Check 1.
     // If "common" checks fail, bail out.
@@ -449,7 +466,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
       DEBUG(dbgs() << "Instr: "; MI.dump(); dbgs() << "\n");
 
       if (!foundJump && (MI.getOpcode() == Hexagon::J2_jumpt ||
+                         MI.getOpcode() == Hexagon::J2_jumptpt ||
                          MI.getOpcode() == Hexagon::J2_jumpf ||
+                         MI.getOpcode() == Hexagon::J2_jumpfpt ||
                          MI.getOpcode() == Hexagon::J2_jumptnewpt ||
                          MI.getOpcode() == Hexagon::J2_jumptnew ||
                          MI.getOpcode() == Hexagon::J2_jumpfnewpt ||
@@ -472,7 +491,7 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
         //if(LVs.isLiveOut(predReg, *MBB)) break;
 
         // Get all the successors of this block - which will always
-        // be 2. Check if the predicate register is live in in those
+        // be 2. Check if the predicate register is live-in in those
         // successor. If yes, we can not delete the predicate -
         // I am doing this only because LLVM does not provide LiveOut
         // at the BB level.
@@ -580,8 +599,9 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
           if (isSecondOpReg) {
             // In case of CMPLT, or CMPLTU, or EQ with the second register
             // to newify, swap the operands.
-            if (cmpInstr->getOpcode() == Hexagon::C2_cmpeq &&
-                                     feederReg == (unsigned) cmpOp2) {
+            unsigned COp = cmpInstr->getOpcode();
+            if ((COp == Hexagon::C2_cmpeq || COp == Hexagon::C4_cmpneq) &&
+                (feederReg == (unsigned) cmpOp2)) {
               unsigned tmp = cmpReg1;
               bool tmpIsKill = MO1IsKill;
               cmpReg1 = cmpOp2;
@@ -647,16 +667,6 @@ bool HexagonNewValueJump::runOnMachineFunction(MachineFunction &MF) {
                                     .addReg(cmpOp2, getKillRegState(MO2IsKill))
                                     .addMBB(jmpTarget);
 
-          else if ((cmpInstr->getOpcode() == Hexagon::C2_cmpeqi ||
-                    cmpInstr->getOpcode() == Hexagon::C2_cmpgti) &&
-                    cmpOp2 == -1 )
-            // Corresponding new-value compare jump instructions don't have the
-            // operand for -1 immediate value.
-            NewMI = BuildMI(*MBB, jmpPos, dl,
-                                  QII->get(opc))
-                                    .addReg(cmpReg1, getKillRegState(MO1IsKill))
-                                    .addMBB(jmpTarget);
-
           else
             NewMI = BuildMI(*MBB, jmpPos, dl,
                                   QII->get(opc))
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
index 11092d2b92fe..983310571563 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOperands.td
@@ -7,58 +7,53 @@
 //
 //===----------------------------------------------------------------------===//
 
-def s32ImmOperand : AsmOperandClass { let Name = "s32Imm"; }
+def s32_0ImmOperand : AsmOperandClass { let Name = "s32_0Imm"; }
 def s23_2ImmOperand : AsmOperandClass { let Name = "s23_2Imm"; }
-def s8ImmOperand : AsmOperandClass { let Name = "s8Imm"; }
-def s8Imm64Operand : AsmOperandClass { let Name = "s8Imm64"; }
-def s6ImmOperand : AsmOperandClass { let Name = "s6Imm"; }
-def s4ImmOperand : AsmOperandClass { let Name = "s4Imm"; }
+def s8_0ImmOperand : AsmOperandClass { let Name = "s8_0Imm"; }
+def s8_0Imm64Operand : AsmOperandClass { let Name = "s8_0Imm64"; }
+def s6_0ImmOperand : AsmOperandClass { let Name = "s6_0Imm"; }
 def s4_0ImmOperand : AsmOperandClass { let Name = "s4_0Imm"; }
 def s4_1ImmOperand : AsmOperandClass { let Name = "s4_1Imm"; }
 def s4_2ImmOperand : AsmOperandClass { let Name = "s4_2Imm"; }
 def s4_3ImmOperand : AsmOperandClass { let Name = "s4_3Imm"; }
 def s4_6ImmOperand : AsmOperandClass { let Name = "s4_6Imm"; }
 def s3_6ImmOperand : AsmOperandClass { let Name = "s3_6Imm"; }
-def u64ImmOperand : AsmOperandClass { let Name = "u64Imm"; }
-def u32ImmOperand : AsmOperandClass { let Name = "u32Imm"; }
+def u64_0ImmOperand : AsmOperandClass { let Name = "u64_0Imm"; }
+def u32_0ImmOperand : AsmOperandClass { let Name = "u32_0Imm"; }
 def u26_6ImmOperand : AsmOperandClass { let Name = "u26_6Imm"; }
-def u16ImmOperand : AsmOperandClass { let Name = "u16Imm"; }
 def u16_0ImmOperand : AsmOperandClass { let Name = "u16_0Imm"; }
 def u16_1ImmOperand : AsmOperandClass { let Name = "u16_1Imm"; }
 def u16_2ImmOperand : AsmOperandClass { let Name = "u16_2Imm"; }
 def u16_3ImmOperand : AsmOperandClass { let Name = "u16_3Imm"; }
 def u11_3ImmOperand : AsmOperandClass { let Name = "u11_3Imm"; }
-def u10ImmOperand : AsmOperandClass { let Name = "u10Imm"; }
-def u9ImmOperand : AsmOperandClass { let Name = "u9Imm"; }
-def u8ImmOperand : AsmOperandClass { let Name = "u8Imm"; }
-def u7ImmOperand : AsmOperandClass { let Name = "u7Imm"; }
-def u6ImmOperand : AsmOperandClass { let Name = "u6Imm"; }
+def u10_0ImmOperand : AsmOperandClass { let Name = "u10_0Imm"; }
+def u9_0ImmOperand : AsmOperandClass { let Name = "u9_0Imm"; }
+def u8_0ImmOperand : AsmOperandClass { let Name = "u8_0Imm"; }
+def u7_0ImmOperand : AsmOperandClass { let Name = "u7_0Imm"; }
 def u6_0ImmOperand : AsmOperandClass { let Name = "u6_0Imm"; }
 def u6_1ImmOperand : AsmOperandClass { let Name = "u6_1Imm"; }
 def u6_2ImmOperand : AsmOperandClass { let Name = "u6_2Imm"; }
 def u6_3ImmOperand : AsmOperandClass { let Name = "u6_3Imm"; }
-def u5ImmOperand : AsmOperandClass { let Name = "u5Imm"; }
-def u4ImmOperand : AsmOperandClass { let Name = "u4Imm"; }
-def u3ImmOperand : AsmOperandClass { let Name = "u3Imm"; }
-def u2ImmOperand : AsmOperandClass { let Name = "u2Imm"; }
-def u1ImmOperand : AsmOperandClass { let Name = "u1Imm"; }
-def n8ImmOperand : AsmOperandClass { let Name = "n8Imm"; }
+def u5_0ImmOperand : AsmOperandClass { let Name = "u5_0Imm"; }
+def u4_0ImmOperand : AsmOperandClass { let Name = "u4_0Imm"; }
+def u3_0ImmOperand : AsmOperandClass { let Name = "u3_0Imm"; }
+def u2_0ImmOperand : AsmOperandClass { let Name = "u2_0Imm"; }
+def u1_0ImmOperand : AsmOperandClass { let Name = "u1_0Imm"; }
+def n8_0ImmOperand : AsmOperandClass { let Name = "n8_0Imm"; }
 // Immediate operands.
 
 let OperandType = "OPERAND_IMMEDIATE",
     DecoderMethod = "unsignedImmDecoder" in {
-  def s32Imm : Operand<i32> { let ParserMatchClass = s32ImmOperand;
-                              let DecoderMethod = "s32ImmDecoder"; }
+  def s32_0Imm : Operand<i32> { let ParserMatchClass = s32_0ImmOperand;
+                                let DecoderMethod = "s32_0ImmDecoder"; }
   def s23_2Imm : Operand<i32> { let ParserMatchClass = s23_2ImmOperand; }
-  def s8Imm : Operand<i32> { let ParserMatchClass = s8ImmOperand;
-                             let DecoderMethod = "s8ImmDecoder"; }
-  def s8Imm64 : Operand<i64>  { let ParserMatchClass = s8Imm64Operand;
-                                let DecoderMethod = "s8ImmDecoder"; }
-  def s6Imm : Operand<i32> { let ParserMatchClass = s6ImmOperand;
+  def s8_0Imm : Operand<i32> { let ParserMatchClass = s8_0ImmOperand;
+                               let DecoderMethod = "s8_0ImmDecoder"; }
+  def s8_0Imm64 : Operand<i64>  { let ParserMatchClass = s8_0Imm64Operand;
+                                  let DecoderMethod = "s8_0ImmDecoder"; }
+  def s6_0Imm : Operand<i32> { let ParserMatchClass = s6_0ImmOperand;
                              let DecoderMethod = "s6_0ImmDecoder"; }
   def s6_3Imm : Operand<i32>;
-  def s4Imm : Operand<i32> { let ParserMatchClass = s4ImmOperand;
-                             let DecoderMethod = "s4_0ImmDecoder"; }
   def s4_0Imm : Operand<i32> { let ParserMatchClass = s4_0ImmOperand;
                                let DecoderMethod = "s4_0ImmDecoder"; }
   def s4_1Imm : Operand<i32> { let ParserMatchClass = s4_1ImmOperand;
@@ -67,42 +62,37 @@ let OperandType = "OPERAND_IMMEDIATE",
                                let DecoderMethod = "s4_2ImmDecoder"; }
   def s4_3Imm : Operand<i32> { let ParserMatchClass = s4_3ImmOperand;
                                let DecoderMethod = "s4_3ImmDecoder"; }
-  def u64Imm : Operand<i64> { let ParserMatchClass = u64ImmOperand; }
-  def u32Imm : Operand<i32> { let ParserMatchClass = u32ImmOperand; }
+  def u64_0Imm : Operand<i64> { let ParserMatchClass = u64_0ImmOperand; }
+  def u32_0Imm : Operand<i32> { let ParserMatchClass = u32_0ImmOperand; }
   def u26_6Imm : Operand<i32> { let ParserMatchClass = u26_6ImmOperand; }
-  def u16Imm : Operand<i32> { let ParserMatchClass = u16ImmOperand; }
   def u16_0Imm : Operand<i32> { let ParserMatchClass = u16_0ImmOperand; }
   def u16_1Imm : Operand<i32> { let ParserMatchClass = u16_1ImmOperand; }
   def u16_2Imm : Operand<i32> { let ParserMatchClass = u16_2ImmOperand; }
   def u16_3Imm : Operand<i32> { let ParserMatchClass = u16_3ImmOperand; }
   def u11_3Imm : Operand<i32> { let ParserMatchClass = u11_3ImmOperand; }
-  def u10Imm : Operand<i32> { let ParserMatchClass = u10ImmOperand; }
-  def u9Imm : Operand<i32> { let ParserMatchClass = u9ImmOperand; }
-  def u8Imm : Operand<i32> { let ParserMatchClass = u8ImmOperand; }
-  def u7Imm : Operand<i32> { let ParserMatchClass = u7ImmOperand; }
-  def u6Imm : Operand<i32> { let ParserMatchClass = u6ImmOperand; }
+  def u10_0Imm : Operand<i32> { let ParserMatchClass = u10_0ImmOperand; }
+  def u9_0Imm : Operand<i32> { let ParserMatchClass = u9_0ImmOperand; }
+  def u8_0Imm : Operand<i32> { let ParserMatchClass = u8_0ImmOperand; }
+  def u7_0Imm : Operand<i32> { let ParserMatchClass = u7_0ImmOperand; }
   def u6_0Imm : Operand<i32> { let ParserMatchClass = u6_0ImmOperand; }
   def u6_1Imm : Operand<i32> { let ParserMatchClass = u6_1ImmOperand; }
   def u6_2Imm : Operand<i32> { let ParserMatchClass = u6_2ImmOperand; }
   def u6_3Imm : Operand<i32> { let ParserMatchClass = u6_3ImmOperand; }
-  def u5Imm : Operand<i32> { let ParserMatchClass = u5ImmOperand; }
-  def u5_0Imm : Operand<i32>;
+  def u5_0Imm : Operand<i32> { let ParserMatchClass = u5_0ImmOperand; }
   def u5_1Imm : Operand<i32>;
   def u5_2Imm : Operand<i32>;
   def u5_3Imm : Operand<i32>;
-  def u4Imm : Operand<i32> { let ParserMatchClass = u4ImmOperand; }
-  def u4_0Imm : Operand<i32>;
+  def u4_0Imm : Operand<i32> { let ParserMatchClass = u4_0ImmOperand; }
   def u4_1Imm : Operand<i32>;
   def u4_2Imm : Operand<i32>;
   def u4_3Imm : Operand<i32>;
-  def u3Imm : Operand<i32> { let ParserMatchClass = u3ImmOperand; }
-  def u3_0Imm : Operand<i32>;
+  def u3_0Imm : Operand<i32> { let ParserMatchClass = u3_0ImmOperand; }
   def u3_1Imm : Operand<i32>;
   def u3_2Imm : Operand<i32>;
   def u3_3Imm : Operand<i32>;
-  def u2Imm : Operand<i32> { let ParserMatchClass = u2ImmOperand; }
-  def u1Imm : Operand<i32> { let ParserMatchClass = u1ImmOperand; }
-  def n8Imm : Operand<i32> { let ParserMatchClass = n8ImmOperand; }
+  def u2_0Imm : Operand<i32> { let ParserMatchClass = u2_0ImmOperand; }
+  def u1_0Imm : Operand<i32> { let ParserMatchClass = u1_0ImmOperand; }
+  def n8_0Imm : Operand<i32> { let ParserMatchClass = n8_0ImmOperand; }
 }
 
 let OperandType = "OPERAND_IMMEDIATE" in {
@@ -117,15 +107,12 @@ let OperandType = "OPERAND_IMMEDIATE" in {
   def s3_7Imm : Operand<i32> { let PrintMethod = "prints3_7ImmOperand";
                                let DecoderMethod = "s3_6ImmDecoder";}
 }
+def n1ConstOperand : AsmOperandClass { let Name = "n1Const"; }
+def n1Const : Operand<i32> { let ParserMatchClass = n1ConstOperand; }
 
 //
 // Immediate predicates
 //
-def s32ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<32>(v);
-}]>;
-
 def s32_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<32>(v);
@@ -146,47 +133,22 @@ def s29_3ImmPred  : PatLeaf<(i32 imm), [{
   return isShiftedInt<29,3>(v);
 }]>;
 
-def s16ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<16>(v);
-}]>;
-
-def s11_0ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isInt<11>(v);
-}]>;
-
-def s11_1ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,1>(v);
-}]>;
-
-def s11_2ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,2>(v);
-}]>;
-
-def s11_3ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedInt<11,3>(v);
-}]>;
-
-def s10ImmPred  : PatLeaf<(i32 imm), [{
+def s10_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<10>(v);
 }]>;
 
-def s8ImmPred  : PatLeaf<(i32 imm), [{
+def s8_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<8>(v);
 }]>;
 
-def s8Imm64Pred  : PatLeaf<(i64 imm), [{
+def s8_0Imm64Pred  : PatLeaf<(i64 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<8>(v);
 }]>;
 
-def s6ImmPred  : PatLeaf<(i32 imm), [{
+def s6_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isInt<6>(v);
 }]>;
@@ -211,92 +173,31 @@ def s4_3ImmPred  : PatLeaf<(i32 imm), [{
   return isShiftedInt<4,3>(v);
 }]>;
 
-def u64ImmPred  : PatLeaf<(i64 imm), [{
-  // Adding "N ||" to suppress gcc unused warning.
-  return (N || true);
-}]>;
-
-def u32ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<32>(v);
-}]>;
-
 def u32_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<32>(v);
 }]>;
 
-def u31_1ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<31,1>(v);
-}]>;
-
-def u30_2ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<30,2>(v);
-}]>;
-
-def u29_3ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<29,3>(v);
-}]>;
-
-def u26_6ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<26,6>(v);
-}]>;
-
 def u16_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<16>(v);
 }]>;
 
-def u16_1ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<16,1>(v);
-}]>;
-
-def u16_2ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<16,2>(v);
-}]>;
-
 def u11_3ImmPred : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isShiftedUInt<11,3>(v);
 }]>;
 
-def u10ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<10>(v);
-}]>;
-
-def u9ImmPred  : PatLeaf<(i32 imm), [{
+def u9_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<9>(v);
 }]>;
 
-def u8ImmPred  : PatLeaf<(i32 imm), [{
+def u8_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<8>(v);
 }]>;
 
-def u7StrictPosImmPred : ImmLeaf<i32, [{
-  // u7StrictPosImmPred predicate - True if the immediate fits in an 7-bit
-  // unsigned field and is strictly greater than 0.
-  return isUInt<7>(Imm) && Imm > 0;
-}]>;
-
-def u7ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<7>(v);
-}]>;
-
-def u6ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<6>(v);
-}]>;
-
 def u6_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<6>(v);
@@ -312,182 +213,87 @@ def u6_2ImmPred  : PatLeaf<(i32 imm), [{
   return isShiftedUInt<6,2>(v);
 }]>;
 
-def u6_3ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isShiftedUInt<6,3>(v);
-}]>;
-
-def u5ImmPred  : PatLeaf<(i32 imm), [{
+def u5_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<5>(v);
 }]>;
 
-def u4ImmPred  : PatLeaf<(i32 imm), [{
+def u4_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<4>(v);
 }]>;
 
-def u3ImmPred  : PatLeaf<(i32 imm), [{
+def u3_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<3>(v);
 }]>;
 
-def u2ImmPred  : PatLeaf<(i32 imm), [{
+def u2_0ImmPred  : PatLeaf<(i32 imm), [{
   int64_t v = (int64_t)N->getSExtValue();
   return isUInt<2>(v);
 }]>;
 
-def u1ImmPred  : PatLeaf<(i1 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<1>(v);
-}]>;
-
-def u1ImmPred32  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  return isUInt<1>(v);
-}]>;
-
-def m5ImmPred  : PatLeaf<(i32 imm), [{
-  // m5ImmPred predicate - True if the number is in range -1 .. -31
-  // and will fit in a 5 bit field when made positive, for use in memops.
-  int64_t v = (int64_t)N->getSExtValue();
-  return (-31 <= v && v <= -1);
-}]>;
-
-//InN means negative integers in [-(2^N - 1), 0]
-def n8ImmPred  : PatLeaf<(i32 imm), [{
-  // n8ImmPred predicate - True if the immediate fits in a 8-bit signed
-  // field.
-  int64_t v = (int64_t)N->getSExtValue();
-  return (-255 <= v && v <= 0);
-}]>;
-
-def nOneImmPred  : PatLeaf<(i32 imm), [{
-  // nOneImmPred predicate - True if the immediate is -1.
-  int64_t v = (int64_t)N->getSExtValue();
-  return (-1 == v);
-}]>;
-
-def Set5ImmPred : PatLeaf<(i32 imm), [{
-  // Set5ImmPred predicate - True if the number is in the series of values.
-  // [ 2^0, 2^1, ... 2^31 ]
-  // For use in setbit immediate.
-  uint32_t v = (int32_t)N->getSExtValue();
-  // Constrain to 32 bits, and then check for single bit.
-  return ImmIsSingleBit(v);
-}]>;
-
-def Clr5ImmPred : PatLeaf<(i32 imm), [{
-  // Clr5ImmPred predicate - True if the number is in the series of
-  // bit negated values.
-  // [ 2^0, 2^1, ... 2^31 ]
-  // For use in clrbit immediate.
-  // Note: we are bit NOTing the value.
-  uint32_t v = ~ (int32_t)N->getSExtValue();
-  // Constrain to 32 bits, and then check for single bit.
-  return ImmIsSingleBit(v);
-}]>;
-
 // Extendable immediate operands.
 def f32ExtOperand : AsmOperandClass { let Name = "f32Ext"; }
-def s16ExtOperand : AsmOperandClass { let Name = "s16Ext"; }
-def s12ExtOperand : AsmOperandClass { let Name = "s12Ext"; }
-def s10ExtOperand : AsmOperandClass { let Name = "s10Ext"; }
-def s9ExtOperand : AsmOperandClass { let Name = "s9Ext"; }
-def s8ExtOperand : AsmOperandClass { let Name = "s8Ext"; }
-def s7ExtOperand : AsmOperandClass { let Name = "s7Ext"; }
-def s6ExtOperand : AsmOperandClass { let Name = "s6Ext"; }
+def s16_0ExtOperand : AsmOperandClass { let Name = "s16_0Ext"; }
+def s12_0ExtOperand : AsmOperandClass { let Name = "s12_0Ext"; }
+def s10_0ExtOperand : AsmOperandClass { let Name = "s10_0Ext"; }
+def s9_0ExtOperand : AsmOperandClass { let Name = "s9_0Ext"; }
+def s8_0ExtOperand : AsmOperandClass { let Name = "s8_0Ext"; }
+def s7_0ExtOperand : AsmOperandClass { let Name = "s7_0Ext"; }
+def s6_0ExtOperand : AsmOperandClass { let Name = "s6_0Ext"; }
 def s11_0ExtOperand : AsmOperandClass { let Name = "s11_0Ext"; }
 def s11_1ExtOperand : AsmOperandClass { let Name = "s11_1Ext"; }
 def s11_2ExtOperand : AsmOperandClass { let Name = "s11_2Ext"; }
 def s11_3ExtOperand : AsmOperandClass { let Name = "s11_3Ext"; }
-def u6ExtOperand : AsmOperandClass { let Name = "u6Ext"; }
-def u7ExtOperand : AsmOperandClass { let Name = "u7Ext"; }
-def u8ExtOperand : AsmOperandClass { let Name = "u8Ext"; }
-def u9ExtOperand : AsmOperandClass { let Name = "u9Ext"; }
-def u10ExtOperand : AsmOperandClass { let Name = "u10Ext"; }
 def u6_0ExtOperand : AsmOperandClass { let Name = "u6_0Ext"; }
+def u7_0ExtOperand : AsmOperandClass { let Name = "u7_0Ext"; }
+def u8_0ExtOperand : AsmOperandClass { let Name = "u8_0Ext"; }
+def u9_0ExtOperand : AsmOperandClass { let Name = "u9_0Ext"; }
+def u10_0ExtOperand : AsmOperandClass { let Name = "u10_0Ext"; }
 def u6_1ExtOperand : AsmOperandClass { let Name = "u6_1Ext"; }
 def u6_2ExtOperand : AsmOperandClass { let Name = "u6_2Ext"; }
 def u6_3ExtOperand : AsmOperandClass { let Name = "u6_3Ext"; }
-def u32MustExtOperand : AsmOperandClass { let Name = "u32MustExt"; }
+def u32_0MustExtOperand : AsmOperandClass { let Name = "u32_0MustExt"; }
 
 
 
 let OperandType = "OPERAND_IMMEDIATE", PrintMethod = "printExtOperand",
     DecoderMethod = "unsignedImmDecoder" in {
   def f32Ext : Operand<f32> { let ParserMatchClass = f32ExtOperand; }
-  def s16Ext : Operand<i32> { let ParserMatchClass = s16ExtOperand;
-                              let DecoderMethod = "s16ImmDecoder"; }
-  def s12Ext : Operand<i32> { let ParserMatchClass = s12ExtOperand;
-                              let DecoderMethod = "s12ImmDecoder"; }
+  def s16_0Ext : Operand<i32> { let ParserMatchClass = s16_0ExtOperand;
+                                let DecoderMethod = "s16_0ImmDecoder"; }
+  def s12_0Ext : Operand<i32> { let ParserMatchClass = s12_0ExtOperand;
+                                let DecoderMethod = "s12_0ImmDecoder"; }
   def s11_0Ext : Operand<i32> { let ParserMatchClass = s11_0ExtOperand;
-                              let DecoderMethod = "s11_0ImmDecoder"; }
+                                let DecoderMethod = "s11_0ImmDecoder"; }
   def s11_1Ext : Operand<i32> { let ParserMatchClass = s11_1ExtOperand;
-                              let DecoderMethod = "s11_1ImmDecoder"; }
+                                let DecoderMethod = "s11_1ImmDecoder"; }
   def s11_2Ext : Operand<i32> { let ParserMatchClass = s11_2ExtOperand;
-                              let DecoderMethod = "s11_2ImmDecoder"; }
+                                let DecoderMethod = "s11_2ImmDecoder"; }
   def s11_3Ext : Operand<i32> { let ParserMatchClass = s11_3ExtOperand;
-                              let DecoderMethod = "s11_3ImmDecoder"; }
-  def s10Ext : Operand<i32> { let ParserMatchClass = s10ExtOperand;
-                              let DecoderMethod = "s10ImmDecoder"; }
-  def s9Ext : Operand<i32> { let ParserMatchClass = s9ExtOperand;
-                              let DecoderMethod = "s90ImmDecoder"; }
-  def s8Ext : Operand<i32> { let ParserMatchClass = s8ExtOperand;
-                              let DecoderMethod = "s8ImmDecoder"; }
-  def s7Ext : Operand<i32> { let ParserMatchClass = s7ExtOperand; }
-  def s6Ext : Operand<i32> { let ParserMatchClass = s6ExtOperand;
-                              let DecoderMethod = "s6_0ImmDecoder"; }
-  def u6Ext : Operand<i32> { let ParserMatchClass = u6ExtOperand; }
-  def u7Ext : Operand<i32> { let ParserMatchClass = u7ExtOperand; }
-  def u8Ext : Operand<i32> { let ParserMatchClass = u8ExtOperand; }
-  def u9Ext : Operand<i32> { let ParserMatchClass = u9ExtOperand; }
-  def u10Ext : Operand<i32> { let ParserMatchClass = u10ExtOperand; }
+                                let DecoderMethod = "s11_3ImmDecoder"; }
+  def s10_0Ext : Operand<i32> { let ParserMatchClass = s10_0ExtOperand;
+                                let DecoderMethod = "s10_0ImmDecoder"; }
+  def s9_0Ext : Operand<i32> { let ParserMatchClass = s9_0ExtOperand;
+                               let DecoderMethod = "s9_0ImmDecoder"; }
+  def s8_0Ext : Operand<i32> { let ParserMatchClass = s8_0ExtOperand;
+                               let DecoderMethod = "s8_0ImmDecoder"; }
+  def s7_0Ext : Operand<i32> { let ParserMatchClass = s7_0ExtOperand; }
+  def s6_0Ext : Operand<i32> { let ParserMatchClass = s6_0ExtOperand;
+                               let DecoderMethod = "s6_0ImmDecoder"; }
+  def u7_0Ext : Operand<i32> { let ParserMatchClass = u7_0ExtOperand; }
+  def u8_0Ext : Operand<i32> { let ParserMatchClass = u8_0ExtOperand; }
+  def u9_0Ext : Operand<i32> { let ParserMatchClass = u9_0ExtOperand; }
+  def u10_0Ext : Operand<i32> { let ParserMatchClass = u10_0ExtOperand; }
   def u6_0Ext : Operand<i32> { let ParserMatchClass = u6_0ExtOperand; }
   def u6_1Ext : Operand<i32> { let ParserMatchClass = u6_1ExtOperand; }
   def u6_2Ext : Operand<i32> { let ParserMatchClass = u6_2ExtOperand; }
   def u6_3Ext : Operand<i32> { let ParserMatchClass = u6_3ExtOperand; }
-  def u32MustExt : Operand<i32> { let ParserMatchClass = u32MustExtOperand; }
+  def u32_0MustExt : Operand<i32> { let ParserMatchClass = u32_0MustExtOperand; }
 }
 
 
-def s4_7ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (HST->hasV60TOps())
-    // Return true if the immediate can fit in a 10-bit sign extended field and
-    // is 128-byte aligned.
-    return isShiftedInt<4,7>(v);
-  return false;
-}]>;
-
-def s3_7ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (HST->hasV60TOps())
-    // Return true if the immediate can fit in a 9-bit sign extended field and
-    // is 128-byte aligned.
-    return isShiftedInt<3,7>(v);
-  return false;
-}]>;
-
-def s4_6ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (HST->hasV60TOps())
-    // Return true if the immediate can fit in a 10-bit sign extended field and
-    // is 64-byte aligned.
-    return isShiftedInt<4,6>(v);
-  return false;
-}]>;
-
-def s3_6ImmPred  : PatLeaf<(i32 imm), [{
-  int64_t v = (int64_t)N->getSExtValue();
-  if (HST->hasV60TOps())
-    // Return true if the immediate can fit in a 9-bit sign extended field and
-    // is 64-byte aligned.
-    return isShiftedInt<3,6>(v);
-  return false;
-}]>;
-
-
 // This complex pattern exists only to create a machine instruction operand
 // of type "frame index". There doesn't seem to be a way to do that directly
 // in the patterns.
@@ -524,12 +330,3 @@ def calltarget : Operand<i32> {
 
 def bblabel : Operand<i32>;
 def bbl     : SDNode<"ISD::BasicBlock", SDTPtrLeaf, [], "BasicBlockSDNode">;
-
-// Return true if for a 32 to 64-bit sign-extended load.
-def is_sext_i32 : PatLeaf<(i64 DoubleRegs:$src1), [{
-  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
-  if (!LD)
-    return false;
-  return LD->getExtensionType() == ISD::SEXTLOAD &&
-         LD->getMemoryVT().getScalarType() == MVT::i32;
-}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
index 4dff0dbc2b71..89db46799cb3 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptAddrMode.cpp
@@ -12,24 +12,30 @@
 
 #define DEBUG_TYPE "opt-addr-mode"
 
-#include "HexagonTargetMachine.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "MCTargetDesc/HexagonBaseInfo.h"
 #include "RDFGraph.h"
 #include "RDFLiveness.h"
-
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominanceFrontier.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
+#include <map>
 
 static cl::opt<int> CodeGrowthLimit("hexagon-amode-growth-limit",
   cl::Hidden, cl::init(0), cl::desc("Code growth limit for address mode "
@@ -39,28 +45,36 @@ using namespace llvm;
 using namespace rdf;
 
 namespace llvm {
+
   FunctionPass *createHexagonOptAddrMode();
   void initializeHexagonOptAddrModePass(PassRegistry &);
-}
+
+} // end namespace llvm
 
 namespace {
+
 class HexagonOptAddrMode : public MachineFunctionPass {
 public:
   static char ID;
+
   HexagonOptAddrMode()
-      : MachineFunctionPass(ID), HII(0), MDT(0), DFG(0), LV(0) {
+      : MachineFunctionPass(ID), HII(nullptr), MDT(nullptr), DFG(nullptr),
+        LV(nullptr) {
     PassRegistry &R = *PassRegistry::getPassRegistry();
     initializeHexagonOptAddrModePass(R);
   }
-  const char *getPassName() const override {
+
+  StringRef getPassName() const override {
     return "Optimize addressing mode of load/store";
   }
+
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     MachineFunctionPass::getAnalysisUsage(AU);
     AU.addRequired<MachineDominatorTree>();
     AU.addRequired<MachineDominanceFrontier>();
     AU.setPreservesAll();
   }
+
   bool runOnMachineFunction(MachineFunction &MF) override;
 
 private:
@@ -79,12 +93,12 @@ private:
                   NodeAddr<UseNode *> UseN, unsigned UseMOnum);
   bool analyzeUses(unsigned DefR, const NodeList &UNodeList,
                    InstrEvalMap &InstrEvalResult, short &SizeInc);
-  bool hasRepForm(MachineInstr *MI, unsigned TfrDefR);
-  bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr *MI,
+  bool hasRepForm(MachineInstr &MI, unsigned TfrDefR);
+  bool canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN, MachineInstr &MI,
                        const NodeList &UNodeList);
   void getAllRealUses(NodeAddr<StmtNode *> SN, NodeList &UNodeList);
   bool allValidCandidates(NodeAddr<StmtNode *> SA, NodeList &UNodeList);
-  short getBaseWithLongOffset(const MachineInstr *MI) const;
+  short getBaseWithLongOffset(const MachineInstr &MI) const;
   void updateMap(NodeAddr<InstrNode *> IA);
   bool constructDefMap(MachineBasicBlock *B);
   bool changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
@@ -93,7 +107,8 @@ private:
   bool changeAddAsl(NodeAddr<UseNode *> AddAslUN, MachineInstr *AddAslMI,
                     const MachineOperand &ImmOp, unsigned ImmOpNum);
 };
-}
+
+} // end anonymous namespace
 
 char HexagonOptAddrMode::ID = 0;
 
@@ -104,14 +119,14 @@ INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
 INITIALIZE_PASS_END(HexagonOptAddrMode, "opt-amode", "Optimize addressing mode",
                     false, false)
 
-bool HexagonOptAddrMode::hasRepForm(MachineInstr *MI, unsigned TfrDefR) {
-  const MCInstrDesc &MID = MI->getDesc();
+bool HexagonOptAddrMode::hasRepForm(MachineInstr &MI, unsigned TfrDefR) {
+  const MCInstrDesc &MID = MI.getDesc();
 
-  if ((!MID.mayStore() && !MID.mayLoad()) || HII->isPredicated(*MI))
+  if ((!MID.mayStore() && !MID.mayLoad()) || HII->isPredicated(MI))
     return false;
 
   if (MID.mayStore()) {
-    MachineOperand StOp = MI->getOperand(MI->getNumOperands() - 1);
+    MachineOperand StOp = MI.getOperand(MI.getNumOperands() - 1);
     if (StOp.isReg() && StOp.getReg() == TfrDefR)
       return false;
   }
@@ -137,18 +152,18 @@ bool HexagonOptAddrMode::hasRepForm(MachineInstr *MI, unsigned TfrDefR) {
 // Above three instructions can be replaced with Rd = memw(Rt<<#2 + ##foo+28)
 
 bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
-                                         MachineInstr *MI,
+                                         MachineInstr &MI,
                                          const NodeList &UNodeList) {
   // check offset size in addasl. if 'offset > 3' return false
-  const MachineOperand &OffsetOp = MI->getOperand(3);
+  const MachineOperand &OffsetOp = MI.getOperand(3);
   if (!OffsetOp.isImm() || OffsetOp.getImm() > 3)
     return false;
 
-  unsigned OffsetReg = MI->getOperand(2).getReg();
+  unsigned OffsetReg = MI.getOperand(2).getReg();
   RegisterRef OffsetRR;
   NodeId OffsetRegRD = 0;
   for (NodeAddr<UseNode *> UA : AddAslSN.Addr->members_if(DFG->IsUse, *DFG)) {
-    RegisterRef RR = UA.Addr->getRegRef();
+    RegisterRef RR = UA.Addr->getRegRef(*DFG);
     if (OffsetReg == RR.Reg) {
       OffsetRR = RR;
       OffsetRegRD = UA.Addr->getReachingDef();
@@ -162,25 +177,25 @@ bool HexagonOptAddrMode::canRemoveAddasl(NodeAddr<StmtNode *> AddAslSN,
         RDefMap[OffsetRR][IA.Id] != OffsetRegRD)
       return false;
 
-    MachineInstr *UseMI = NodeAddr<StmtNode *>(IA).Addr->getCode();
+    MachineInstr &UseMI = *NodeAddr<StmtNode *>(IA).Addr->getCode();
     NodeAddr<DefNode *> OffsetRegDN = DFG->addr<DefNode *>(OffsetRegRD);
     // Reaching Def to an offset register can't be a phi.
     if ((OffsetRegDN.Addr->getFlags() & NodeAttrs::PhiRef) &&
-        MI->getParent() != UseMI->getParent())
+        MI.getParent() != UseMI.getParent())
     return false;
 
-    const MCInstrDesc &UseMID = UseMI->getDesc();
+    const MCInstrDesc &UseMID = UseMI.getDesc();
     if ((!UseMID.mayLoad() && !UseMID.mayStore()) ||
         HII->getAddrMode(UseMI) != HexagonII::BaseImmOffset ||
         getBaseWithLongOffset(UseMI) < 0)
       return false;
 
     // Addasl output can't be a store value.
-    if (UseMID.mayStore() && UseMI->getOperand(2).isReg() &&
-        UseMI->getOperand(2).getReg() == MI->getOperand(0).getReg())
+    if (UseMID.mayStore() && UseMI.getOperand(2).isReg() &&
+        UseMI.getOperand(2).getReg() == MI.getOperand(0).getReg())
       return false;
 
-    for (auto &Mo : UseMI->operands())
+    for (auto &Mo : UseMI.operands())
       if (Mo.isFI())
         return false;
   }
@@ -191,7 +206,7 @@ bool HexagonOptAddrMode::allValidCandidates(NodeAddr<StmtNode *> SA,
                                             NodeList &UNodeList) {
   for (auto I = UNodeList.rbegin(), E = UNodeList.rend(); I != E; ++I) {
     NodeAddr<UseNode *> UN = *I;
-    RegisterRef UR = UN.Addr->getRegRef();
+    RegisterRef UR = UN.Addr->getRegRef(*DFG);
     NodeSet Visited, Defs;
     const auto &ReachingDefs = LV->getAllReachingDefsRec(UR, UN, Visited, Defs);
     if (ReachingDefs.size() > 1) {
@@ -215,7 +230,8 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
   for (NodeAddr<DefNode *> DA : SA.Addr->members_if(DFG->IsDef, *DFG)) {
     DEBUG(dbgs() << "\t\t[DefNode]: " << Print<NodeAddr<DefNode *>>(DA, *DFG)
                  << "\n");
-    RegisterRef DR = DA.Addr->getRegRef();
+    RegisterRef DR = DFG->normalizeRef(DA.Addr->getRegRef(*DFG));
+
     auto UseSet = LV->getAllReachedUses(DR, DA);
 
     for (auto UI : UseSet) {
@@ -232,13 +248,13 @@ void HexagonOptAddrMode::getAllRealUses(NodeAddr<StmtNode *> SA,
         const Liveness::RefMap &phiUse = LV->getRealUses(id);
         DEBUG(dbgs() << "\t\t\t\tphi real Uses"
                      << Print<Liveness::RefMap>(phiUse, *DFG) << "\n");
-        if (phiUse.size() > 0) {
+        if (!phiUse.empty()) {
           for (auto I : phiUse) {
-            if (DR != I.first)
+            if (DR.Reg != I.first)
               continue;
             auto phiUseSet = I.second;
             for (auto phiUI : phiUseSet) {
-              NodeAddr<UseNode *> phiUA = DFG->addr<UseNode *>(phiUI);
+              NodeAddr<UseNode *> phiUA = DFG->addr<UseNode *>(phiUI.first);
               UNodeList.push_back(phiUA);
             }
           }
@@ -261,8 +277,8 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR,
     bool CanBeReplaced = false;
     NodeAddr<UseNode *> UN = *I;
     NodeAddr<StmtNode *> SN = UN.Addr->getOwner(*DFG);
-    MachineInstr *MI = SN.Addr->getCode();
-    const MCInstrDesc &MID = MI->getDesc();
+    MachineInstr &MI = *SN.Addr->getCode();
+    const MCInstrDesc &MID = MI.getDesc();
     if ((MID.mayLoad() || MID.mayStore())) {
       if (!hasRepForm(MI, tfrDefR)) {
         KeepTfr = true;
@@ -270,10 +286,10 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR,
       }
       SizeInc++;
       CanBeReplaced = true;
-    } else if (MI->getOpcode() == Hexagon::S2_addasl_rrri) {
+    } else if (MI.getOpcode() == Hexagon::S2_addasl_rrri) {
       NodeList AddaslUseList;
 
-      DEBUG(dbgs() << "\nGetting ReachedUses for === " << *MI << "\n");
+      DEBUG(dbgs() << "\nGetting ReachedUses for === " << MI << "\n");
       getAllRealUses(SN, AddaslUseList);
       // Process phi nodes.
       if (allValidCandidates(SN, AddaslUseList) &&
@@ -290,7 +306,7 @@ bool HexagonOptAddrMode::analyzeUses(unsigned tfrDefR,
       // M4_mpyrr_addr -> M4_mpyrr_addi
       KeepTfr = true;
 
-    InstrEvalResult[MI] = CanBeReplaced;
+    InstrEvalResult[&MI] = CanBeReplaced;
     HasRepInstr |= CanBeReplaced;
   }
 
@@ -313,8 +329,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
   MachineInstrBuilder MIB;
 
   if (ImmOpNum == 1) {
-    if (HII->getAddrMode(OldMI) == HexagonII::BaseRegOffset) {
-      short NewOpCode = HII->getBaseWithLongOffset(OldMI);
+    if (HII->getAddrMode(*OldMI) == HexagonII::BaseRegOffset) {
+      short NewOpCode = HII->getBaseWithLongOffset(*OldMI);
       assert(NewOpCode >= 0 && "Invalid New opcode\n");
       MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
       MIB.addOperand(OldMI->getOperand(0));
@@ -323,8 +339,8 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
       MIB.addOperand(ImmOp);
       OpStart = 4;
       Changed = true;
-    } else if (HII->getAddrMode(OldMI) == HexagonII::BaseImmOffset) {
-      short NewOpCode = HII->getAbsoluteForm(OldMI);
+    } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
+      short NewOpCode = HII->getAbsoluteForm(*OldMI);
       assert(NewOpCode >= 0 && "Invalid New opcode\n");
       MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode))
                 .addOperand(OldMI->getOperand(0));
@@ -340,7 +356,7 @@ bool HexagonOptAddrMode::changeLoad(MachineInstr *OldMI, MachineOperand ImmOp,
     DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
     DEBUG(dbgs() << "[TO]: " << MIB << "\n");
   } else if (ImmOpNum == 2 && OldMI->getOperand(3).getImm() == 0) {
-    short NewOpCode = HII->xformRegToImmOffset(OldMI);
+    short NewOpCode = HII->xformRegToImmOffset(*OldMI);
     assert(NewOpCode >= 0 && "Invalid New opcode\n");
     MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
     MIB.addOperand(OldMI->getOperand(0));
@@ -370,8 +386,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
   ++InsertPt;
   MachineInstrBuilder MIB;
   if (ImmOpNum == 0) {
-    if (HII->getAddrMode(OldMI) == HexagonII::BaseRegOffset) {
-      short NewOpCode = HII->getBaseWithLongOffset(OldMI);
+    if (HII->getAddrMode(*OldMI) == HexagonII::BaseRegOffset) {
+      short NewOpCode = HII->getBaseWithLongOffset(*OldMI);
       assert(NewOpCode >= 0 && "Invalid New opcode\n");
       MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
       MIB.addOperand(OldMI->getOperand(1));
@@ -379,8 +395,8 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
       MIB.addOperand(ImmOp);
       MIB.addOperand(OldMI->getOperand(3));
       OpStart = 4;
-    } else if (HII->getAddrMode(OldMI) == HexagonII::BaseImmOffset) {
-      short NewOpCode = HII->getAbsoluteForm(OldMI);
+    } else if (HII->getAddrMode(*OldMI) == HexagonII::BaseImmOffset) {
+      short NewOpCode = HII->getAbsoluteForm(*OldMI);
       assert(NewOpCode >= 0 && "Invalid New opcode\n");
       MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
       const GlobalValue *GV = ImmOp.getGlobal();
@@ -393,7 +409,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
     DEBUG(dbgs() << "[Changing]: " << *OldMI << "\n");
     DEBUG(dbgs() << "[TO]: " << MIB << "\n");
   } else if (ImmOpNum == 1 && OldMI->getOperand(2).getImm() == 0) {
-    short NewOpCode = HII->xformRegToImmOffset(OldMI);
+    short NewOpCode = HII->xformRegToImmOffset(*OldMI);
     assert(NewOpCode >= 0 && "Invalid New opcode\n");
     MIB = BuildMI(*BB, InsertPt, OldMI->getDebugLoc(), HII->get(NewOpCode));
     MIB.addOperand(OldMI->getOperand(0));
@@ -411,7 +427,7 @@ bool HexagonOptAddrMode::changeStore(MachineInstr *OldMI, MachineOperand ImmOp,
   return Changed;
 }
 
-short HexagonOptAddrMode::getBaseWithLongOffset(const MachineInstr *MI) const {
+short HexagonOptAddrMode::getBaseWithLongOffset(const MachineInstr &MI) const {
   if (HII->getAddrMode(MI) == HexagonII::BaseImmOffset) {
     short TempOpCode = HII->getBaseWithRegOffset(MI);
     return HII->getBaseWithLongOffset(TempOpCode);
@@ -442,11 +458,11 @@ bool HexagonOptAddrMode::changeAddAsl(NodeAddr<UseNode *> AddAslUN,
     DEBUG(dbgs() << "[MI <BB#" << UseMI->getParent()->getNumber()
                  << ">]: " << *UseMI << "\n");
     const MCInstrDesc &UseMID = UseMI->getDesc();
-    assert(HII->getAddrMode(UseMI) == HexagonII::BaseImmOffset);
+    assert(HII->getAddrMode(*UseMI) == HexagonII::BaseImmOffset);
 
     auto UsePos = MachineBasicBlock::iterator(UseMI);
     MachineBasicBlock::instr_iterator InsertPt = UsePos.getInstrIterator();
-    short NewOpCode = getBaseWithLongOffset(UseMI);
+    short NewOpCode = getBaseWithLongOffset(*UseMI);
     assert(NewOpCode >= 0 && "Invalid New opcode\n");
 
     unsigned OpStart;
@@ -575,7 +591,7 @@ bool HexagonOptAddrMode::processBlock(NodeAddr<BlockNode *> BA) {
 void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) {
   RegisterSet RRs;
   for (NodeAddr<RefNode *> RA : IA.Addr->members(*DFG))
-    RRs.insert(RA.Addr->getRegRef());
+    RRs.insert(RA.Addr->getRegRef(*DFG));
   bool Common = false;
   for (auto &R : RDefMap) {
     if (!RRs.count(R.first))
@@ -587,7 +603,7 @@ void HexagonOptAddrMode::updateMap(NodeAddr<InstrNode *> IA) {
     return;
 
   for (auto &R : RDefMap) {
-    auto F = DefM.find(R.first);
+    auto F = DefM.find(R.first.Reg);
     if (F == DefM.end() || F->second.empty())
       continue;
     R.second[IA.Id] = F->second.top()->Id;
@@ -622,8 +638,7 @@ bool HexagonOptAddrMode::runOnMachineFunction(MachineFunction &MF) {
   const auto &TRI = *MF.getSubtarget().getRegisterInfo();
   const TargetOperandInfo TOI(*HII);
 
-  RegisterAliasInfo RAI(TRI);
-  DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, RAI, TOI);
+  DataFlowGraph G(MF, *HII, TRI, *MDT, MDF, TOI);
   G.build();
   DFG = &G;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
index 7937a7908b06..101de3d8fbee 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonOptimizeSZextends.cpp
@@ -12,7 +12,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
@@ -38,13 +37,9 @@ namespace {
     }
     bool runOnFunction(Function &F) override;
 
-    const char *getPassName() const override {
-      return "Remove sign extends";
-    }
+    StringRef getPassName() const override { return "Remove sign extends"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineFunctionAnalysis>();
-      AU.addPreserved<MachineFunctionAnalysis>();
       AU.addPreserved<StackProtector>();
       FunctionPass::getAnalysisUsage(AU);
     }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
new file mode 100644
index 000000000000..ad81287007e6
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPatterns.td
@@ -0,0 +1,3347 @@
+// Pattern fragment that combines the value type and the register class
+// into a single parameter.
+// The pat frags in the definitions below need to have a named register,
+// otherwise i32 will be assumed regardless of the register class. The
+// name of the register does not matter.
+def I1  : PatLeaf<(i1 PredRegs:$R)>;
+def I32 : PatLeaf<(i32 IntRegs:$R)>;
+def I64 : PatLeaf<(i64 DoubleRegs:$R)>;
+def F32 : PatLeaf<(f32 IntRegs:$R)>;
+def F64 : PatLeaf<(f64 DoubleRegs:$R)>;
+
+// Pattern fragments to extract the low and high subregisters from a
+// 64-bit value.
+def LoReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_lo)>;
+def HiReg: OutPatFrag<(ops node:$Rs), (EXTRACT_SUBREG (i64 $Rs), isub_hi)>;
+
+def IsOrAdd: PatFrag<(ops node:$Addr, node:$off),
+    (or node:$Addr, node:$off), [{ return isOrEquivalentToAdd(N); }]>;
+
+def IsPow2_32 : PatLeaf<(i32 imm), [{
+  uint32_t V = N->getZExtValue();
+  return isPowerOf2_32(V);
+}]>;
+
+def IsPow2_64 : PatLeaf<(i64 imm), [{
+  uint64_t V = N->getZExtValue();
+  return isPowerOf2_64(V);
+}]>;
+
+def IsNPow2_32 : PatLeaf<(i32 imm), [{
+  uint32_t NV = ~N->getZExtValue();
+  return isPowerOf2_32(NV);
+}]>;
+
+def IsPow2_64L : PatLeaf<(i64 imm), [{
+  uint64_t V = N->getZExtValue();
+  return isPowerOf2_64(V) && Log2_64(V) < 32;
+}]>;
+
+def IsPow2_64H : PatLeaf<(i64 imm), [{
+  uint64_t V = N->getZExtValue();
+  return isPowerOf2_64(V) && Log2_64(V) >= 32;
+}]>;
+
+def IsNPow2_64L : PatLeaf<(i64 imm), [{
+  uint64_t NV = ~N->getZExtValue();
+  return isPowerOf2_64(NV) && Log2_64(NV) < 32;
+}]>;
+
+def IsNPow2_64H : PatLeaf<(i64 imm), [{
+  uint64_t NV = ~N->getZExtValue();
+  return isPowerOf2_64(NV) && Log2_64(NV) >= 32;
+}]>;
+
+def SDEC1 : SDNodeXForm<imm, [{
+  int32_t V = N->getSExtValue();
+  return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
+}]>;
+
+def UDEC1 : SDNodeXForm<imm, [{
+  uint32_t V = N->getZExtValue();
+  assert(V >= 1);
+  return CurDAG->getTargetConstant(V-1, SDLoc(N), MVT::i32);
+}]>;
+
+def UDEC32 : SDNodeXForm<imm, [{
+  uint32_t V = N->getZExtValue();
+  assert(V >= 32);
+  return CurDAG->getTargetConstant(V-32, SDLoc(N), MVT::i32);
+}]>;
+
+def Log2_32 : SDNodeXForm<imm, [{
+  uint32_t V = N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
+}]>;
+
+def Log2_64 : SDNodeXForm<imm, [{
+  uint64_t V = N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_64(V), SDLoc(N), MVT::i32);
+}]>;
+
+def LogN2_32 : SDNodeXForm<imm, [{
+  uint32_t NV = ~N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
+}]>;
+
+def LogN2_64 : SDNodeXForm<imm, [{
+  uint64_t NV = ~N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_64(NV), SDLoc(N), MVT::i32);
+}]>;
+
+
+class T_CMP_pat <InstHexagon MI, PatFrag OpNode, PatLeaf ImmPred>
+  : Pat<(i1 (OpNode I32:$src1, ImmPred:$src2)),
+        (MI IntRegs:$src1, ImmPred:$src2)>;
+
+def : T_CMP_pat <C2_cmpeqi,  seteq,  s10_0ImmPred>;
+def : T_CMP_pat <C2_cmpgti,  setgt,  s10_0ImmPred>;
+def : T_CMP_pat <C2_cmpgtui, setugt, u9_0ImmPred>;
+
+def SDTHexagonI64I32I32 : SDTypeProfile<1, 2,
+  [SDTCisVT<0, i64>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>;
+
+def HexagonCOMBINE : SDNode<"HexagonISD::COMBINE", SDTHexagonI64I32I32>;
+def HexagonPACKHL  : SDNode<"HexagonISD::PACKHL",  SDTHexagonI64I32I32>;
+
+// Pats for instruction selection.
+class BinOp32_pat<SDNode Op, InstHexagon MI, ValueType ResT>
+  : Pat<(ResT (Op I32:$Rs, I32:$Rt)),
+        (ResT (MI IntRegs:$Rs, IntRegs:$Rt))>;
+
+def: BinOp32_pat<add, A2_add, i32>;
+def: BinOp32_pat<and, A2_and, i32>;
+def: BinOp32_pat<or,  A2_or,  i32>;
+def: BinOp32_pat<sub, A2_sub, i32>;
+def: BinOp32_pat<xor, A2_xor, i32>;
+
+def: BinOp32_pat<HexagonCOMBINE, A2_combinew, i64>;
+def: BinOp32_pat<HexagonPACKHL,  S2_packhl,   i64>;
+
+// Patfrag to convert the usual comparison patfrags (e.g. setlt) to ones
+// that reverse the order of the operands.
+class RevCmp<PatFrag F> : PatFrag<(ops node:$rhs, node:$lhs), F.Fragment>;
+
+// Pats for compares. They use PatFrags as operands, not SDNodes,
+// since seteq/setgt/etc. are defined as ParFrags.
+class T_cmp32_rr_pat<InstHexagon MI, PatFrag Op, ValueType VT>
+  : Pat<(VT (Op I32:$Rs, I32:$Rt)),
+        (MI IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: T_cmp32_rr_pat<C2_cmpeq,  seteq,  i1>;
+def: T_cmp32_rr_pat<C2_cmpgt,  setgt,  i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, setugt, i1>;
+
+def: T_cmp32_rr_pat<C2_cmpgt,  RevCmp<setlt>,  i1>;
+def: T_cmp32_rr_pat<C2_cmpgtu, RevCmp<setult>, i1>;
+
+def: Pat<(select I1:$Pu, I32:$Rs, I32:$Rt),
+         (C2_mux PredRegs:$Pu, IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(add I32:$Rs, s32_0ImmPred:$s16),
+         (A2_addi I32:$Rs, imm:$s16)>;
+
+def: Pat<(or I32:$Rs, s32_0ImmPred:$s10),
+         (A2_orir IntRegs:$Rs, imm:$s10)>;
+def: Pat<(and I32:$Rs, s32_0ImmPred:$s10),
+         (A2_andir IntRegs:$Rs, imm:$s10)>;
+
+def: Pat<(sub s32_0ImmPred:$s10, IntRegs:$Rs),
+         (A2_subri imm:$s10, IntRegs:$Rs)>;
+
+// Rd = not(Rs) gets mapped to Rd=sub(#-1, Rs).
+def: Pat<(not I32:$src1),
+         (A2_subri -1, IntRegs:$src1)>;
+
+def: Pat<(s32_0ImmPred:$s16), (A2_tfrsi imm:$s16)>;
+def: Pat<(s8_0Imm64Pred:$s8), (A2_tfrpi imm:$s8)>;
+
+def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, I32:$Rs),
+          (C2_muxri I1:$Pu, imm:$s8, I32:$Rs)>;
+
+def : Pat<(select I1:$Pu, I32:$Rs, s32_0ImmPred:$s8),
+          (C2_muxir I1:$Pu, I32:$Rs, imm:$s8)>;
+
+def : Pat<(select I1:$Pu, s32_0ImmPred:$s8, s8_0ImmPred:$S8),
+          (C2_muxii I1:$Pu, imm:$s8, imm:$S8)>;
+
+def: Pat<(shl I32:$src1, (i32 16)),   (A2_aslh I32:$src1)>;
+def: Pat<(sra I32:$src1, (i32 16)),   (A2_asrh I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i8),  (A2_sxtb I32:$src1)>;
+def: Pat<(sext_inreg I32:$src1, i16), (A2_sxth I32:$src1)>;
+
+class T_vcmp_pat<InstHexagon MI, PatFrag Op, ValueType T>
+  : Pat<(i1 (Op (T DoubleRegs:$Rss), (T DoubleRegs:$Rtt))),
+        (i1 (MI DoubleRegs:$Rss, DoubleRegs:$Rtt))>;
+
+def: T_vcmp_pat<A2_vcmpbeq,  seteq,  v8i8>;
+def: T_vcmp_pat<A2_vcmpbgtu, setugt, v8i8>;
+def: T_vcmp_pat<A2_vcmpheq,  seteq,  v4i16>;
+def: T_vcmp_pat<A2_vcmphgt,  setgt,  v4i16>;
+def: T_vcmp_pat<A2_vcmphgtu, setugt, v4i16>;
+def: T_vcmp_pat<A2_vcmpweq,  seteq,  v2i32>;
+def: T_vcmp_pat<A2_vcmpwgt,  setgt,  v2i32>;
+def: T_vcmp_pat<A2_vcmpwgtu, setugt, v2i32>;
+
+// Add halfword.
+def: Pat<(sext_inreg (add I32:$src1, I32:$src2), i16),
+         (A2_addh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(sra (add (shl I32:$src1, (i32 16)), I32:$src2), (i32 16)),
+         (A2_addh_l16_hl I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (add I32:$src1, I32:$src2), (i32 16)),
+         (A2_addh_h16_ll I32:$src1, I32:$src2)>;
+
+// Subtract halfword.
+def: Pat<(sext_inreg (sub I32:$src1, I32:$src2), i16),
+         (A2_subh_l16_ll I32:$src1, I32:$src2)>;
+
+def: Pat<(shl (sub I32:$src1, I32:$src2), (i32 16)),
+         (A2_subh_h16_ll I32:$src1, I32:$src2)>;
+
+// Here, depending on  the operand being selected, we'll either generate a
+// min or max instruction.
+// Ex:
+// (a>b)?a:b --> max(a,b) => Here check performed is '>' and the value selected
+// is the larger of two. So, the corresponding HexagonInst is passed in 'Inst'.
+// (a>b)?b:a --> min(a,b) => Here check performed is '>' but the smaller value
+// is selected and the corresponding HexagonInst is passed in 'SwapInst'.
+
+multiclass T_MinMax_pats <PatFrag Op, PatLeaf Val,
+                          InstHexagon Inst, InstHexagon SwapInst> {
+  def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src1, Val:$src2),
+           (Inst Val:$src1, Val:$src2)>;
+  def: Pat<(select (i1 (Op Val:$src1, Val:$src2)), Val:$src2, Val:$src1),
+           (SwapInst Val:$src1, Val:$src2)>;
+}
+
+def IsPosHalf : PatLeaf<(i32 IntRegs:$a), [{
+  return isPositiveHalfWord(N);
+}]>;
+
+multiclass MinMax_pats <PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+  defm: T_MinMax_pats<Op, I32, Inst, SwapInst>;
+
+  def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)),
+                               IsPosHalf:$src1, IsPosHalf:$src2),
+                       i16),
+           (Inst IntRegs:$src1, IntRegs:$src2)>;
+
+  def: Pat<(sext_inreg (select (i1 (Op IsPosHalf:$src1, IsPosHalf:$src2)),
+                               IsPosHalf:$src2, IsPosHalf:$src1),
+                       i16),
+           (SwapInst IntRegs:$src1, IntRegs:$src2)>;
+}
+
+let AddedComplexity = 200 in {
+  defm: MinMax_pats<setge,  A2_max,  A2_min>;
+  defm: MinMax_pats<setgt,  A2_max,  A2_min>;
+  defm: MinMax_pats<setle,  A2_min,  A2_max>;
+  defm: MinMax_pats<setlt,  A2_min,  A2_max>;
+  defm: MinMax_pats<setuge, A2_maxu, A2_minu>;
+  defm: MinMax_pats<setugt, A2_maxu, A2_minu>;
+  defm: MinMax_pats<setule, A2_minu, A2_maxu>;
+  defm: MinMax_pats<setult, A2_minu, A2_maxu>;
+}
+
+class T_cmp64_rr_pat<InstHexagon MI, PatFrag CmpOp>
+  : Pat<(i1 (CmpOp I64:$Rs, I64:$Rt)),
+        (i1 (MI DoubleRegs:$Rs, DoubleRegs:$Rt))>;
+
+def: T_cmp64_rr_pat<C2_cmpeqp,  seteq>;
+def: T_cmp64_rr_pat<C2_cmpgtp,  setgt>;
+def: T_cmp64_rr_pat<C2_cmpgtup, setugt>;
+def: T_cmp64_rr_pat<C2_cmpgtp,  RevCmp<setlt>>;
+def: T_cmp64_rr_pat<C2_cmpgtup, RevCmp<setult>>;
+
+def: Pat<(i64 (add I64:$Rs, I64:$Rt)), (A2_addp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (sub I64:$Rs, I64:$Rt)), (A2_subp I64:$Rs, I64:$Rt)>;
+
+def: Pat<(i64 (and I64:$Rs, I64:$Rt)), (A2_andp I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (or  I64:$Rs, I64:$Rt)), (A2_orp  I64:$Rs, I64:$Rt)>;
+def: Pat<(i64 (xor I64:$Rs, I64:$Rt)), (A2_xorp I64:$Rs, I64:$Rt)>;
+
+def: Pat<(i1 (not I1:$Ps)), (C2_not PredRegs:$Ps)>;
+
+def: Pat<(i1 (and I1:$Ps, I1:$Pt)),       (C2_and  I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or  I1:$Ps, I1:$Pt)),       (C2_or   I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (xor I1:$Ps, I1:$Pt)),       (C2_xor  I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (and I1:$Ps, (not I1:$Pt))), (C2_andn I1:$Ps, I1:$Pt)>;
+def: Pat<(i1 (or  I1:$Ps, (not I1:$Pt))), (C2_orn  I1:$Ps, I1:$Pt)>;
+
+def retflag : SDNode<"HexagonISD::RET_FLAG", SDTNone,
+                     [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def eh_return: SDNode<"HexagonISD::EH_RETURN", SDTNone, [SDNPHasChain]>;
+
+def: Pat<(br bb:$dst),                  (J2_jump brtarget:$dst)>;
+def: Pat<(brcond I1:$src1, bb:$block),  (J2_jumpt PredRegs:$src1, bb:$block)>;
+def: Pat<(brind I32:$dst),              (J2_jumpr IntRegs:$dst)>;
+
+def: Pat<(retflag),   (PS_jmpret (i32 R31))>;
+def: Pat<(eh_return), (EH_RETURN_JMPR (i32 R31))>;
+
+// Patterns to select load-indexed (i.e. load from base+offset).
+multiclass Loadx_pat<PatFrag Load, ValueType VT, PatLeaf ImmPred,
+                     InstHexagon MI> {
+  def: Pat<(VT (Load AddrFI:$fi)), (VT (MI AddrFI:$fi, 0))>;
+  def: Pat<(VT (Load (add (i32 AddrFI:$fi), ImmPred:$Off))),
+           (VT (MI AddrFI:$fi, imm:$Off))>;
+  def: Pat<(VT (Load (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off))),
+           (VT (MI AddrFI:$fi, imm:$Off))>;
+  def: Pat<(VT (Load (add I32:$Rs, ImmPred:$Off))),
+           (VT (MI IntRegs:$Rs, imm:$Off))>;
+  def: Pat<(VT (Load I32:$Rs)), (VT (MI IntRegs:$Rs, 0))>;
+}
+
+let AddedComplexity = 20 in {
+  defm: Loadx_pat<load,           i32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<load,           i64, s29_3ImmPred, L2_loadrd_io>;
+  defm: Loadx_pat<atomic_load_8 , i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<atomic_load_16, i32, s31_1ImmPred, L2_loadruh_io>;
+  defm: Loadx_pat<atomic_load_32, i32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<atomic_load_64, i64, s29_3ImmPred, L2_loadrd_io>;
+
+  defm: Loadx_pat<extloadi1,      i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<extloadi8,      i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<extloadi16,     i32, s31_1ImmPred, L2_loadruh_io>;
+  defm: Loadx_pat<sextloadi8,     i32, s32_0ImmPred, L2_loadrb_io>;
+  defm: Loadx_pat<sextloadi16,    i32, s31_1ImmPred, L2_loadrh_io>;
+  defm: Loadx_pat<zextloadi1,     i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<zextloadi8,     i32, s32_0ImmPred, L2_loadrub_io>;
+  defm: Loadx_pat<zextloadi16,    i32, s31_1ImmPred, L2_loadruh_io>;
+  // No sextloadi1.
+}
+
+// Sign-extending loads of i1 need to replicate the lowest bit throughout
+// the 32-bit value. Since the loaded value can only be 0 or 1, 0-v should
+// do the trick.
+let AddedComplexity = 20 in
+def: Pat<(i32 (sextloadi1 I32:$Rs)),
+         (A2_subri 0, (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def: Pat<(i32 (mul   I32:$src1, I32:$src2)), (M2_mpyi    I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhs I32:$src1, I32:$src2)), (M2_mpy_up  I32:$src1, I32:$src2)>;
+def: Pat<(i32 (mulhu I32:$src1, I32:$src2)), (M2_mpyu_up I32:$src1, I32:$src2)>;
+
+def: Pat<(mul IntRegs:$Rs, u32_0ImmPred:$u8),
+         (M2_mpysip IntRegs:$Rs, imm:$u8)>;
+def: Pat<(ineg (mul IntRegs:$Rs, u8_0ImmPred:$u8)),
+         (M2_mpysin IntRegs:$Rs, imm:$u8)>;
+def: Pat<(mul IntRegs:$src1, s32_0ImmPred:$src2),
+         (M2_mpysmi IntRegs:$src1, imm:$src2)>;
+def: Pat<(add (mul IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
+         (M2_macsip IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
+def: Pat<(add (mul I32:$src2, I32:$src3), I32:$src1),
+         (M2_maci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+def: Pat<(add (add IntRegs:$src2, u32_0ImmPred:$src3), IntRegs:$src1),
+         (M2_accii IntRegs:$src1, IntRegs:$src2, imm:$src3)>;
+def: Pat<(add (add I32:$src2, I32:$src3), I32:$src1),
+         (M2_acci IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+class T_MType_acc_pat1 <InstHexagon MI, SDNode firstOp, SDNode secOp,
+                        PatLeaf ImmPred>
+  : Pat <(secOp IntRegs:$src1, (firstOp IntRegs:$src2, ImmPred:$src3)),
+         (MI IntRegs:$src1, IntRegs:$src2, ImmPred:$src3)>;
+
+class T_MType_acc_pat2 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+  : Pat <(i32 (secOp IntRegs:$src1, (firstOp IntRegs:$src2, IntRegs:$src3))),
+         (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def : T_MType_acc_pat2 <M2_xor_xacc, xor, xor>;
+def : T_MType_acc_pat1 <M2_macsin, mul, sub, u32_0ImmPred>;
+
+def : T_MType_acc_pat1 <M2_naccii, add, sub, s32_0ImmPred>;
+def : T_MType_acc_pat2 <M2_nacci, add, sub>;
+
+def: T_MType_acc_pat2 <M4_or_xor, xor, or>;
+def: T_MType_acc_pat2 <M4_and_xor, xor, and>;
+def: T_MType_acc_pat2 <M4_or_and, and, or>;
+def: T_MType_acc_pat2 <M4_and_and, and, and>;
+def: T_MType_acc_pat2 <M4_xor_and, and, xor>;
+def: T_MType_acc_pat2 <M4_or_or, or, or>;
+def: T_MType_acc_pat2 <M4_and_or, or, and>;
+def: T_MType_acc_pat2 <M4_xor_or, or, xor>;
+
+class T_MType_acc_pat3 <InstHexagon MI, SDNode firstOp, SDNode secOp>
+  : Pat <(secOp I32:$src1, (firstOp I32:$src2, (not I32:$src3))),
+         (MI IntRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: T_MType_acc_pat3 <M4_or_andn, and, or>;
+def: T_MType_acc_pat3 <M4_and_andn, and, and>;
+def: T_MType_acc_pat3 <M4_xor_andn, and, xor>;
+
+def Aext64: PatFrag<(ops node:$Rs), (i64 (anyext node:$Rs))>;
+def Sext64: PatFrag<(ops node:$Rs), (i64 (sext node:$Rs))>;
+def Zext64: PatFrag<(ops node:$Rs), (i64 (zext node:$Rs))>;
+
+// Return true if for a 32 to 64-bit sign-extended load.
+def Sext64Ld : PatLeaf<(i64 DoubleRegs:$src1), [{
+  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
+  if (!LD)
+    return false;
+  return LD->getExtensionType() == ISD::SEXTLOAD &&
+         LD->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def: Pat<(mul (Aext64 I32:$src1), (Aext64 I32:$src2)),
+         (M2_dpmpyuu_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(mul (Sext64 I32:$src1), (Sext64 I32:$src2)),
+         (M2_dpmpyss_s0 IntRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(mul Sext64Ld:$src1, Sext64Ld:$src2),
+         (M2_dpmpyss_s0 (LoReg DoubleRegs:$src1), (LoReg DoubleRegs:$src2))>;
+
+// Multiply and accumulate, use full result.
+// Rxx[+-]=mpy(Rs,Rt)
+
+def: Pat<(add I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))),
+         (M2_dpmpyss_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(sub I64:$src1, (mul (Sext64 I32:$src2), (Sext64 I32:$src3))),
+         (M2_dpmpyss_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(add I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))),
+         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(add I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))),
+         (M2_dpmpyuu_acc_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(sub I64:$src1, (mul (Aext64 I32:$src2), (Aext64 I32:$src3))),
+         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+def: Pat<(sub I64:$src1, (mul (Zext64 I32:$src2), (Zext64 I32:$src3))),
+         (M2_dpmpyuu_nac_s0 DoubleRegs:$src1, IntRegs:$src2, IntRegs:$src3)>;
+
+class Storepi_pat<PatFrag Store, PatFrag Value, PatFrag Offset,
+                  InstHexagon MI>
+  : Pat<(Store Value:$src1, I32:$src2, Offset:$offset),
+        (MI I32:$src2, imm:$offset, Value:$src1)>;
+
+def: Storepi_pat<post_truncsti8,  I32, s4_0ImmPred, S2_storerb_pi>;
+def: Storepi_pat<post_truncsti16, I32, s4_1ImmPred, S2_storerh_pi>;
+def: Storepi_pat<post_store,      I32, s4_2ImmPred, S2_storeri_pi>;
+def: Storepi_pat<post_store,      I64, s4_3ImmPred, S2_storerd_pi>;
+
+// Patterns for generating stores, where the address takes different forms:
+// - frameindex,
+// - frameindex + offset,
+// - base + offset,
+// - simple (base address without offset).
+// These would usually be used together (via Storex_pat defined below), but
+// in some cases one may want to apply different properties (such as
+// AddedComplexity) to the individual patterns.
+class Storex_fi_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Rs, AddrFI:$fi), (MI AddrFI:$fi, 0, Value:$Rs)>;
+multiclass Storex_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                             InstHexagon MI> {
+  def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+           (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+  def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
+           (MI AddrFI:$fi, imm:$Off, Value:$Rs)>;
+}
+multiclass Storex_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                          InstHexagon MI> {
+  def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
+           (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+  def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)),
+           (MI IntRegs:$Rs, imm:$Off, Value:$Rt)>;
+}
+class Storex_simple_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Rt, I32:$Rs),
+        (MI IntRegs:$Rs, 0, Value:$Rt)>;
+
+// Patterns for generating stores, where the address takes different forms,
+// and where the value being stored is transformed through the value modifier
+// ValueMod.  The address forms are same as above.
+class Storexm_fi_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+                     InstHexagon MI>
+  : Pat<(Store Value:$Rs, AddrFI:$fi),
+        (MI AddrFI:$fi, 0, (ValueMod Value:$Rs))>;
+multiclass Storexm_fi_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                              PatFrag ValueMod, InstHexagon MI> {
+  def: Pat<(Store Value:$Rs, (add (i32 AddrFI:$fi), ImmPred:$Off)),
+           (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+  def: Pat<(Store Value:$Rs, (IsOrAdd (i32 AddrFI:$fi), ImmPred:$Off)),
+           (MI AddrFI:$fi, imm:$Off, (ValueMod Value:$Rs))>;
+}
+multiclass Storexm_add_pat<PatFrag Store, PatFrag Value, PatFrag ImmPred,
+                           PatFrag ValueMod, InstHexagon MI> {
+  def: Pat<(Store Value:$Rt, (add I32:$Rs, ImmPred:$Off)),
+           (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+  def: Pat<(Store Value:$Rt, (IsOrAdd I32:$Rs, ImmPred:$Off)),
+           (MI IntRegs:$Rs, imm:$Off, (ValueMod Value:$Rt))>;
+}
+class Storexm_simple_pat<PatFrag Store, PatFrag Value, PatFrag ValueMod,
+                         InstHexagon MI>
+  : Pat<(Store Value:$Rt, I32:$Rs),
+        (MI IntRegs:$Rs, 0, (ValueMod Value:$Rt))>;
+
+multiclass Storex_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+                      InstHexagon MI> {
+  def:  Storex_fi_pat     <Store, Value,          MI>;
+  defm: Storex_fi_add_pat <Store, Value, ImmPred, MI>;
+  defm: Storex_add_pat    <Store, Value, ImmPred, MI>;
+}
+
+multiclass Storexm_pat<PatFrag Store, PatFrag Value, PatLeaf ImmPred,
+                       PatFrag ValueMod, InstHexagon MI> {
+  def:  Storexm_fi_pat     <Store, Value,          ValueMod, MI>;
+  defm: Storexm_fi_add_pat <Store, Value, ImmPred, ValueMod, MI>;
+  defm: Storexm_add_pat    <Store, Value, ImmPred, ValueMod, MI>;
+}
+
+// Regular stores in the DAG have two operands: value and address.
+// Atomic stores also have two, but they are reversed: address, value.
+// To use atomic stores with the patterns, they need to have their operands
+// swapped. This relies on the knowledge that the F.Fragment uses names
+// "ptr" and "val".
+class SwapSt<PatFrag F>
+  : PatFrag<(ops node:$val, node:$ptr), F.Fragment, F.PredicateCode,
+            F.OperandTransform>;
+
+let AddedComplexity = 20 in {
+  defm: Storex_pat<truncstorei8,    I32, s32_0ImmPred, S2_storerb_io>;
+  defm: Storex_pat<truncstorei16,   I32, s31_1ImmPred, S2_storerh_io>;
+  defm: Storex_pat<store,           I32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<store,           I64, s29_3ImmPred, S2_storerd_io>;
+
+  defm: Storex_pat<SwapSt<atomic_store_8>,  I32, s32_0ImmPred, S2_storerb_io>;
+  defm: Storex_pat<SwapSt<atomic_store_16>, I32, s31_1ImmPred, S2_storerh_io>;
+  defm: Storex_pat<SwapSt<atomic_store_32>, I32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<SwapSt<atomic_store_64>, I64, s29_3ImmPred, S2_storerd_io>;
+}
+
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<truncstorei8,    I32, S2_storerb_io>;
+def: Storex_simple_pat<truncstorei16,   I32, S2_storerh_io>;
+def: Storex_simple_pat<store,           I32, S2_storeri_io>;
+def: Storex_simple_pat<store,           I64, S2_storerd_io>;
+
+def: Storex_simple_pat<SwapSt<atomic_store_8>,  I32, S2_storerb_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_16>, I32, S2_storerh_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_32>, I32, S2_storeri_io>;
+def: Storex_simple_pat<SwapSt<atomic_store_64>, I64, S2_storerd_io>;
+
+let AddedComplexity = 20 in {
+  defm: Storexm_pat<truncstorei8,  I64, s32_0ImmPred, LoReg, S2_storerb_io>;
+  defm: Storexm_pat<truncstorei16, I64, s31_1ImmPred, LoReg, S2_storerh_io>;
+  defm: Storexm_pat<truncstorei32, I64, s30_2ImmPred, LoReg, S2_storeri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8,  I64, LoReg, S2_storerb_io>;
+def: Storexm_simple_pat<truncstorei16, I64, LoReg, S2_storerh_io>;
+def: Storexm_simple_pat<truncstorei32, I64, LoReg, S2_storeri_io>;
+
+def: Pat <(Sext64 I32:$src), (A2_sxtw I32:$src)>;
+
+def: Pat<(select (i1 (setlt I32:$src, 0)), (sub 0, I32:$src), I32:$src),
+         (A2_abs IntRegs:$src)>;
+
+let AddedComplexity = 50 in
+def: Pat<(xor (add (sra I32:$src, (i32 31)),
+                   I32:$src),
+              (sra I32:$src, (i32 31))),
+         (A2_abs IntRegs:$src)>;
+
+def: Pat<(sra I32:$src, u5_0ImmPred:$u5),
+         (S2_asr_i_r IntRegs:$src, imm:$u5)>;
+def: Pat<(srl I32:$src, u5_0ImmPred:$u5),
+         (S2_lsr_i_r IntRegs:$src, imm:$u5)>;
+def: Pat<(shl I32:$src, u5_0ImmPred:$u5),
+         (S2_asl_i_r IntRegs:$src, imm:$u5)>;
+
+def: Pat<(sra (add (sra I32:$src1, u5_0ImmPred:$src2), 1), (i32 1)),
+         (S2_asr_i_r_rnd IntRegs:$src1, u5_0ImmPred:$src2)>;
+
+def : Pat<(not I64:$src1),
+          (A2_notp DoubleRegs:$src1)>;
+
+// Count leading zeros.
+def: Pat<(ctlz I32:$Rs), (S2_cl0 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz I64:$Rss))), (S2_cl0p I64:$Rss)>;
+
+// Count trailing zeros: 32-bit.
+def: Pat<(cttz I32:$Rs), (S2_ct0 I32:$Rs)>;
+
+// Count leading ones.
+def: Pat<(ctlz (not I32:$Rs)), (S2_cl1 I32:$Rs)>;
+def: Pat<(i32 (trunc (ctlz (not I64:$Rss)))), (S2_cl1p I64:$Rss)>;
+
+// Count trailing ones: 32-bit.
+def: Pat<(cttz (not I32:$Rs)), (S2_ct1 I32:$Rs)>;
+
+let AddedComplexity = 20 in { // Complexity greater than and/or/xor
+  def: Pat<(and I32:$Rs, IsNPow2_32:$V),
+           (S2_clrbit_i IntRegs:$Rs, (LogN2_32 $V))>;
+  def: Pat<(or I32:$Rs, IsPow2_32:$V),
+           (S2_setbit_i IntRegs:$Rs, (Log2_32 $V))>;
+  def: Pat<(xor I32:$Rs, IsPow2_32:$V),
+           (S2_togglebit_i IntRegs:$Rs, (Log2_32 $V))>;
+
+  def: Pat<(and I32:$Rs, (not (shl 1, I32:$Rt))),
+           (S2_clrbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(or I32:$Rs, (shl 1, I32:$Rt)),
+           (S2_setbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(xor I32:$Rs, (shl 1, I32:$Rt)),
+           (S2_togglebit_r IntRegs:$Rs, IntRegs:$Rt)>;
+}
+
+// Clr/set/toggle bit for 64-bit values with immediate bit index.
+let AddedComplexity = 20 in { // Complexity greater than and/or/xor
+  def: Pat<(and I64:$Rss, IsNPow2_64L:$V),
+           (REG_SEQUENCE DoubleRegs,
+                (i32 (HiReg $Rss)), isub_hi,
+                (S2_clrbit_i (LoReg $Rss), (LogN2_64 $V)), isub_lo)>;
+  def: Pat<(and I64:$Rss, IsNPow2_64H:$V),
+           (REG_SEQUENCE DoubleRegs,
+                (S2_clrbit_i (HiReg $Rss), (UDEC32 (i32 (LogN2_64 $V)))),
+                isub_hi,
+                (i32 (LoReg $Rss)), isub_lo)>;
+
+  def: Pat<(or I64:$Rss, IsPow2_64L:$V),
+           (REG_SEQUENCE DoubleRegs,
+                (i32 (HiReg $Rss)), isub_hi,
+                (S2_setbit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>;
+  def: Pat<(or I64:$Rss, IsPow2_64H:$V),
+           (REG_SEQUENCE DoubleRegs,
+                (S2_setbit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))),
+                isub_hi,
+                (i32 (LoReg $Rss)), isub_lo)>;
+
+  def: Pat<(xor I64:$Rss, IsPow2_64L:$V),
+           (REG_SEQUENCE DoubleRegs,
+                (i32 (HiReg $Rss)), isub_hi,
+                (S2_togglebit_i (LoReg $Rss), (Log2_64 $V)), isub_lo)>;
+  def: Pat<(xor I64:$Rss, IsPow2_64H:$V),
+           (REG_SEQUENCE DoubleRegs,
+                (S2_togglebit_i (HiReg $Rss), (UDEC32 (i32 (Log2_64 $V)))),
+                isub_hi,
+                (i32 (LoReg $Rss)), isub_lo)>;
+}
+
+let AddedComplexity = 20 in { // Complexity greater than cmp reg-imm.
+  def: Pat<(i1 (setne (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
+           (S2_tstbit_i IntRegs:$Rs, u5_0ImmPred:$u5)>;
+  def: Pat<(i1 (setne (and (shl 1, I32:$Rt), I32:$Rs), 0)),
+           (S2_tstbit_r IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (trunc I32:$Rs)),
+           (S2_tstbit_i IntRegs:$Rs, 0)>;
+  def: Pat<(i1 (trunc I64:$Rs)),
+           (S2_tstbit_i (LoReg DoubleRegs:$Rs), 0)>;
+}
+
+let AddedComplexity = 20 in { // Complexity greater than compare reg-imm.
+  def: Pat<(i1 (seteq (and I32:$Rs, u6_0ImmPred:$u6), 0)),
+           (C2_bitsclri IntRegs:$Rs, u6_0ImmPred:$u6)>;
+  def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), 0)),
+           (C2_bitsclr IntRegs:$Rs, IntRegs:$Rt)>;
+}
+
+let AddedComplexity = 10 in   // Complexity greater than compare reg-reg.
+def: Pat<(i1 (seteq (and I32:$Rs, I32:$Rt), IntRegs:$Rt)),
+         (C2_bitsset IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(or (or (shl (or (shl (i32 (extloadi8 (add I32:$b, 3))),
+                               (i32 8)),
+                          (i32 (zextloadi8 (add I32:$b, 2)))),
+                      (i32 16)),
+                 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+             (zextloadi8 I32:$b)),
+         (A2_swiz (L2_loadri_io IntRegs:$b, 0))>;
+
+// Patterns for loads of i1:
+def: Pat<(i1 (load AddrFI:$fi)),
+         (C2_tfrrp (L2_loadrub_io AddrFI:$fi, 0))>;
+def: Pat<(i1 (load (add I32:$Rs, s32_0ImmPred:$Off))),
+         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, imm:$Off))>;
+def: Pat<(i1 (load I32:$Rs)),
+         (C2_tfrrp (L2_loadrub_io IntRegs:$Rs, 0))>;
+
+def I1toI32: OutPatFrag<(ops node:$Rs),
+                        (C2_muxii (i1 $Rs), 1, 0)>;
+
+def I32toI1: OutPatFrag<(ops node:$Rs),
+                        (i1 (C2_tfrrp (i32 $Rs)))>;
+
+defm: Storexm_pat<store, I1, s32_0ImmPred, I1toI32, S2_storerb_io>;
+def: Storexm_simple_pat<store, I1, I1toI32, S2_storerb_io>;
+
+def: Pat<(sra I64:$src, u6_0ImmPred:$u6),
+         (S2_asr_i_p DoubleRegs:$src, imm:$u6)>;
+def: Pat<(srl I64:$src, u6_0ImmPred:$u6),
+         (S2_lsr_i_p DoubleRegs:$src, imm:$u6)>;
+def: Pat<(shl I64:$src, u6_0ImmPred:$u6),
+         (S2_asl_i_p DoubleRegs:$src, imm:$u6)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$Rt, (shl I32:$Rs, u3_0ImmPred:$u3)),
+         (S2_addasl_rrri IntRegs:$Rt, IntRegs:$Rs, imm:$u3)>;
+
+def HexagonBARRIER: SDNode<"HexagonISD::BARRIER", SDTNone, [SDNPHasChain]>;
+def: Pat<(HexagonBARRIER), (Y2_barrier)>;
+
+def: Pat<(IsOrAdd (i32 AddrFI:$Rs), s32_0ImmPred:$off),
+         (PS_fi (i32 AddrFI:$Rs), s32_0ImmPred:$off)>;
+
+
+// Support for generating global address.
+// Taken from X86InstrInfo.td.
+def SDTHexagonCONST32 : SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                             SDTCisVT<1, i32>,
+                                             SDTCisPtrTy<0>]>;
+def HexagonCONST32    : SDNode<"HexagonISD::CONST32",    SDTHexagonCONST32>;
+def HexagonCONST32_GP : SDNode<"HexagonISD::CONST32_GP", SDTHexagonCONST32>;
+
+// Map TLS addressses to A2_tfrsi.
+def: Pat<(HexagonCONST32 tglobaltlsaddr:$addr), (A2_tfrsi s16_0Ext:$addr)>;
+def: Pat<(HexagonCONST32 bbl:$label),           (A2_tfrsi s16_0Ext:$label)>;
+
+def: Pat<(i64 imm:$v), (CONST64 imm:$v)>;
+def: Pat<(i1 0), (PS_false)>;
+def: Pat<(i1 1), (PS_true)>;
+
+// Pseudo instructions.
+def SDT_SPCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
+def SDT_SPCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
+                                        SDTCisVT<1, i32> ]>;
+
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
+                    [SDNPHasChain, SDNPOutGlue]>;
+def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
+                    [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
+
+def SDT_SPCall  : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// For tailcalls a HexagonTCRet SDNode has 3 SDNode Properties - a chain,
+// Optional Flag and Variable Arguments.
+// Its 1 Operand has pointer type.
+def HexagonTCRet : SDNode<"HexagonISD::TC_RETURN", SDT_SPCall,
+                          [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
+
+
+def: Pat<(callseq_start timm:$amt),
+          (ADJCALLSTACKDOWN imm:$amt)>;
+def: Pat<(callseq_end timm:$amt1, timm:$amt2),
+         (ADJCALLSTACKUP imm:$amt1, imm:$amt2)>;
+
+//Tail calls.
+def: Pat<(HexagonTCRet tglobaladdr:$dst),
+         (PS_tailcall_i tglobaladdr:$dst)>;
+def: Pat<(HexagonTCRet texternalsym:$dst),
+         (PS_tailcall_i texternalsym:$dst)>;
+def: Pat<(HexagonTCRet I32:$dst),
+         (PS_tailcall_r I32:$dst)>;
+
+// Map from r0 = and(r1, 65535) to r0 = zxth(r1)
+def: Pat<(and I32:$src1, 65535),
+         (A2_zxth IntRegs:$src1)>;
+
+// Map from r0 = and(r1, 255) to r0 = zxtb(r1).
+def: Pat<(and I32:$src1, 255),
+         (A2_zxtb IntRegs:$src1)>;
+
+// Map Add(p1, true) to p1 = not(p1).
+//     Add(p1, false) should never be produced,
+//     if it does, it got to be mapped to NOOP.
+def: Pat<(add I1:$src1, -1),
+         (C2_not PredRegs:$src1)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, #i, #j) => r0 = mux(p0, #j, #i).
+def: Pat<(select (not I1:$src1), s8_0ImmPred:$src2, s32_0ImmPred:$src3),
+         (C2_muxii PredRegs:$src1, s32_0ImmPred:$src3, s8_0ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = C2_muxir(p0, r1, #i)
+def: Pat<(select (not I1:$src1), s32_0ImmPred:$src2,
+                 I32:$src3),
+         (C2_muxir PredRegs:$src1, IntRegs:$src3, s32_0ImmPred:$src2)>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = C2_muxri (p0, #i, r1)
+def: Pat<(select (not I1:$src1), IntRegs:$src2, s32_0ImmPred:$src3),
+         (C2_muxri PredRegs:$src1, s32_0ImmPred:$src3, IntRegs:$src2)>;
+
+// Map from p0 = pnot(p0); if (p0) jump => if (!p0) jump.
+def: Pat<(brcond (not I1:$src1), bb:$offset),
+         (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i32) -> Rdd = A2_sxtw(Rss.lo).
+def: Pat<(i64 (sext_inreg I64:$src1, i32)),
+         (A2_sxtw (LoReg DoubleRegs:$src1))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i16) -> Rdd = A2_sxtw(A2_sxth(Rss.lo)).
+def: Pat<(i64 (sext_inreg I64:$src1, i16)),
+         (A2_sxtw (A2_sxth (LoReg DoubleRegs:$src1)))>;
+
+// Map from Rdd = sign_extend_inreg(Rss, i8) -> Rdd = A2_sxtw(A2_sxtb(Rss.lo)).
+def: Pat<(i64 (sext_inreg I64:$src1, i8)),
+         (A2_sxtw (A2_sxtb (LoReg DoubleRegs:$src1)))>;
+
+// We want to prevent emitting pnot's as much as possible.
+// Map brcond with an unsupported setcc to a J2_jumpf.
+def : Pat <(brcond (i1 (setne I32:$src1, I32:$src2)),
+                        bb:$offset),
+      (J2_jumpf (C2_cmpeq I32:$src1, I32:$src2),
+                bb:$offset)>;
+
+def : Pat <(brcond (i1 (setne I32:$src1, s10_0ImmPred:$src2)),
+                        bb:$offset),
+      (J2_jumpf (C2_cmpeqi I32:$src1, s10_0ImmPred:$src2), bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne I1:$src1, (i1 -1))), bb:$offset),
+         (J2_jumpf PredRegs:$src1, bb:$offset)>;
+
+def: Pat<(brcond (i1 (setne I1:$src1, (i1 0))), bb:$offset),
+         (J2_jumpt PredRegs:$src1, bb:$offset)>;
+
+// cmp.lt(Rs, Imm) -> !cmp.ge(Rs, Imm) -> !cmp.gt(Rs, Imm-1)
+def: Pat<(brcond (i1 (setlt I32:$src1, s8_0ImmPred:$src2)), bb:$offset),
+        (J2_jumpf (C2_cmpgti IntRegs:$src1, (SDEC1 s8_0ImmPred:$src2)),
+                  bb:$offset)>;
+
+// Map from a 64-bit select to an emulated 64-bit mux.
+// Hexagon does not support 64-bit MUXes; so emulate with combines.
+def: Pat<(select I1:$src1, I64:$src2,
+                 I64:$src3),
+         (A2_combinew (C2_mux PredRegs:$src1, (HiReg DoubleRegs:$src2),
+                                              (HiReg DoubleRegs:$src3)),
+                      (C2_mux PredRegs:$src1, (LoReg DoubleRegs:$src2),
+                                              (LoReg DoubleRegs:$src3)))>;
+
+// Map from a 1-bit select to logical ops.
+// From LegalizeDAG.cpp: (B1 ? B2 : B3) <=> (B1 & B2)|(!B1&B3).
+def: Pat<(select I1:$src1, I1:$src2, I1:$src3),
+         (C2_or (C2_and PredRegs:$src1, PredRegs:$src2),
+                (C2_and (C2_not PredRegs:$src1), PredRegs:$src3))>;
+
+// Map for truncating from 64 immediates to 32 bit immediates.
+def: Pat<(i32 (trunc I64:$src)),
+         (LoReg DoubleRegs:$src)>;
+
+// Map for truncating from i64 immediates to i1 bit immediates.
+def: Pat<(i1 (trunc I64:$src)),
+         (C2_tfrrp (LoReg DoubleRegs:$src))>;
+
+// rs <= rt -> !(rs > rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setle I32:$src1, s32_0ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1, s32_0ImmPred:$src2))>;
+
+// rs <= rt -> !(rs > rt).
+def : Pat<(i1 (setle I32:$src1, I32:$src2)),
+      (i1 (C2_not (C2_cmpgt I32:$src1, I32:$src2)))>;
+
+// Rss <= Rtt -> !(Rss > Rtt).
+def: Pat<(i1 (setle I64:$src1, I64:$src2)),
+         (C2_not (C2_cmpgtp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpne -> cmpeq.
+// Hexagon_TODO: We should improve on this.
+// rs != rt -> !(rs == rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)),
+         (C2_not (C2_cmpeqi IntRegs:$src1, s32_0ImmPred:$src2))>;
+
+// Convert setne back to xor for hexagon since we compute w/ pred registers.
+def: Pat<(i1 (setne I1:$src1, I1:$src2)),
+         (C2_xor PredRegs:$src1, PredRegs:$src2)>;
+
+// Map cmpne(Rss) -> !cmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne I64:$src1, I64:$src2)),
+         (C2_not (C2_cmpeqp DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Map cmpge(Rs, Rt) -> !cmpgt(Rs, Rt).
+// rs >= rt -> !(rt > rs).
+def : Pat <(i1 (setge I32:$src1, I32:$src2)),
+      (i1 (C2_not (i1 (C2_cmpgt I32:$src2, I32:$src1))))>;
+
+// cmpge(Rs, Imm) -> cmpgt(Rs, Imm-1)
+let AddedComplexity = 30 in
+def: Pat<(i1 (setge I32:$src1, s32_0ImmPred:$src2)),
+         (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>;
+
+// Map cmpge(Rss, Rtt) -> !cmpgt(Rtt, Rss).
+// rss >= rtt -> !(rtt > rss).
+def: Pat<(i1 (setge I64:$src1, I64:$src2)),
+         (C2_not (C2_cmpgtp DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map cmplt(Rs, Imm) -> !cmpge(Rs, Imm).
+// !cmpge(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+// rs < rt -> !(rs >= rt).
+let AddedComplexity = 30 in
+def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)),
+         (C2_not (C2_cmpgti IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2)))>;
+
+// Generate cmpgeu(Rs, #0) -> cmpeq(Rs, Rs)
+def: Pat<(i1 (setuge I32:$src1, 0)),
+         (C2_cmpeq IntRegs:$src1, IntRegs:$src1)>;
+
+// Generate cmpgeu(Rs, #u8) -> cmpgtu(Rs, #u8 -1)
+def: Pat<(i1 (setuge I32:$src1, u32_0ImmPred:$src2)),
+         (C2_cmpgtui IntRegs:$src1, (UDEC1 u32_0ImmPred:$src2))>;
+
+// Generate cmpgtu(Rs, #u9)
+def: Pat<(i1 (setugt I32:$src1, u32_0ImmPred:$src2)),
+         (C2_cmpgtui IntRegs:$src1, u32_0ImmPred:$src2)>;
+
+// Map from Rs >= Rt -> !(Rt > Rs).
+// rs >= rt -> !(rt > rs).
+def: Pat<(i1 (setuge I64:$src1, I64:$src2)),
+         (C2_not (C2_cmpgtup DoubleRegs:$src2, DoubleRegs:$src1))>;
+
+// Map from cmpleu(Rss, Rtt) -> !cmpgtu(Rss, Rtt-1).
+// Map from (Rs <= Rt) -> !(Rs > Rt).
+def: Pat<(i1 (setule I64:$src1, I64:$src2)),
+         (C2_not (C2_cmpgtup DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Sign extends.
+// i1 -> i32
+def: Pat<(i32 (sext I1:$src1)),
+         (C2_muxii PredRegs:$src1, -1, 0)>;
+
+// i1 -> i64
+def: Pat<(i64 (sext I1:$src1)),
+         (A2_combinew (A2_tfrsi -1), (C2_muxii PredRegs:$src1, -1, 0))>;
+
+// Zero extends.
+// i1 -> i32
+def: Pat<(i32 (zext I1:$src1)),
+         (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rs = Pd to Pd = mux(Pd, #1, #0)
+def: Pat<(i32 (anyext I1:$src1)),
+         (C2_muxii PredRegs:$src1, 1, 0)>;
+
+// Map from Rss = Pd to Rdd = sxtw (mux(Pd, #1, #0))
+def: Pat<(i64 (anyext I1:$src1)),
+         (A2_sxtw (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// Clear the sign bit in a 64-bit register.
+def ClearSign : OutPatFrag<(ops node:$Rss),
+  (A2_combinew (S2_clrbit_i (HiReg $Rss), 31), (LoReg $Rss))>;
+
+def MulHU : OutPatFrag<(ops node:$Rss, node:$Rtt),
+  (A2_addp
+    (M2_dpmpyuu_acc_s0
+      (S2_lsr_i_p
+        (A2_addp
+          (M2_dpmpyuu_acc_s0
+            (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (LoReg $Rtt)), 32),
+            (HiReg $Rss),
+            (LoReg $Rtt)),
+          (A2_combinew (A2_tfrsi 0),
+                       (LoReg (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt))))),
+        32),
+      (HiReg $Rss),
+      (HiReg $Rtt)),
+    (S2_lsr_i_p (M2_dpmpyuu_s0 (LoReg $Rss), (HiReg $Rtt)), 32))>;
+
+// Multiply 64-bit unsigned and use upper result.
+def : Pat <(mulhu I64:$Rss, I64:$Rtt), (MulHU $Rss, $Rtt)>;
+
+// Multiply 64-bit signed and use upper result.
+//
+// For two signed 64-bit integers A and B, let A' and B' denote A and B
+// with the sign bit cleared. Then A = -2^63*s(A) + A', where s(A) is the
+// sign bit of A (and identically for B). With this notation, the signed
+// product A*B can be written as:
+//   AB = (-2^63 s(A) + A') * (-2^63 s(B) + B')
+//      = 2^126 s(A)s(B) - 2^63 [s(A)B'+s(B)A'] + A'B'
+//      = 2^126 s(A)s(B) + 2^63 [s(A)B'+s(B)A'] + A'B' - 2*2^63 [s(A)B'+s(B)A']
+//      = (unsigned product AB) - 2^64 [s(A)B'+s(B)A']
+
+def : Pat <(mulhs I64:$Rss, I64:$Rtt),
+  (A2_subp
+    (MulHU $Rss, $Rtt),
+    (A2_addp
+      (A2_andp (S2_asr_i_p $Rss, 63), (ClearSign $Rtt)),
+      (A2_andp (S2_asr_i_p $Rtt, 63), (ClearSign $Rss))))>;
+
+// Hexagon specific ISD nodes.
+def SDTHexagonALLOCA : SDTypeProfile<1, 2,
+      [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+def HexagonALLOCA : SDNode<"HexagonISD::ALLOCA", SDTHexagonALLOCA,
+      [SDNPHasChain]>;
+
+
+def: Pat<(HexagonALLOCA I32:$Rs, (i32 imm:$A)),
+         (PS_alloca IntRegs:$Rs, imm:$A)>;
+
+def HexagonJT:     SDNode<"HexagonISD::JT", SDTIntUnaryOp>;
+def HexagonCP:     SDNode<"HexagonISD::CP", SDTIntUnaryOp>;
+
+def: Pat<(HexagonJT tjumptable:$dst), (A2_tfrsi imm:$dst)>;
+def: Pat<(HexagonCP tconstpool:$dst), (A2_tfrsi imm:$dst)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(sub I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(and I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(or I32:$src1, (sra I32:$Rs, u5_0ImmPred:$u5)), (S2_asr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(sub I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(and I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(or I64:$src1, (sra I64:$Rs, u6_0ImmPred:$u5)), (S2_asr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(sub I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(and I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(or I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor I32:$src1, (srl I32:$Rs, u5_0ImmPred:$u5)), (S2_lsr_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(sub I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(and I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(or I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor I64:$src1, (srl I64:$Rs, u6_0ImmPred:$u5)), (S2_lsr_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_acc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(sub I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_nac IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(and I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_and IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+def: Pat<(or I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_or IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor I32:$src1, (shl I32:$Rs, u5_0ImmPred:$u5)), (S2_asl_i_r_xacc IntRegs:$src1, IntRegs:$Rs, u5_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(sub I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(and I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_and DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+def: Pat<(or I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_or DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+let AddedComplexity = 100 in
+def: Pat<(xor I64:$src1, (shl I64:$Rs, u6_0ImmPred:$u5)), (S2_asl_i_p_xacc DoubleRegs:$src1, DoubleRegs:$Rs, u6_0ImmPred:$u5)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_asl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_asl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I32:$src1, (sra I32:$Rs, I32:$Rt)), (S2_asr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor I64:$src1, (sra I64:$Rs, I32:$Rt)), (S2_asr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I32:$src1, (srl I32:$Rs, I32:$Rt)), (S2_lsr_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor I64:$src1, (srl I64:$Rs, I32:$Rt)), (S2_lsr_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+let AddedComplexity = 100 in
+def: Pat<(add I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_acc IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_nac IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_and IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I32:$src1, (shl I32:$Rs, I32:$Rt)), (S2_lsl_r_r_or IntRegs:$src1, IntRegs:$Rs, IntRegs:$Rt)>;
+let AddedComplexity = 100 in
+def: Pat<(add I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_acc DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(sub I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_nac DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(and I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_and DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(or I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_or DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+def: Pat<(xor I64:$src1, (shl I64:$Rs, I32:$Rt)), (S2_lsl_r_p_xor DoubleRegs:$src1, DoubleRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(sra I64:$src1, I32:$src2), (S2_asr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+def: Pat<(srl I64:$src1, I32:$src2), (S2_lsr_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl I64:$src1, I32:$src2), (S2_asl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl I64:$src1, I32:$src2), (S2_lsl_r_p DoubleRegs:$src1, IntRegs:$src2)>;
+
+def: Pat<(sra I32:$src1, I32:$src2), (S2_asr_r_r IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(srl I32:$src1, I32:$src2), (S2_lsr_r_r IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl I32:$src1, I32:$src2), (S2_asl_r_r IntRegs:$src1, IntRegs:$src2)>;
+def: Pat<(shl I32:$src1, I32:$src2), (S2_lsl_r_r IntRegs:$src1, IntRegs:$src2)>;
+
+def SDTHexagonINSERT:
+  SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                       SDTCisInt<0>, SDTCisVT<3, i32>, SDTCisVT<4, i32>]>;
+def SDTHexagonINSERTRP:
+  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
+                       SDTCisInt<0>, SDTCisVT<3, i64>]>;
+
+def HexagonINSERT   : SDNode<"HexagonISD::INSERT",   SDTHexagonINSERT>;
+def HexagonINSERTRP : SDNode<"HexagonISD::INSERTRP", SDTHexagonINSERTRP>;
+
+def: Pat<(HexagonINSERT I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2),
+         (S2_insert I32:$Rs, I32:$Rt, u5_0ImmPred:$u1, u5_0ImmPred:$u2)>;
+def: Pat<(HexagonINSERT I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2),
+         (S2_insertp I64:$Rs, I64:$Rt, u6_0ImmPred:$u1, u6_0ImmPred:$u2)>;
+def: Pat<(HexagonINSERTRP I32:$Rs, I32:$Rt, I64:$Ru),
+         (S2_insert_rp I32:$Rs, I32:$Rt, I64:$Ru)>;
+def: Pat<(HexagonINSERTRP I64:$Rs, I64:$Rt, I64:$Ru),
+         (S2_insertp_rp I64:$Rs, I64:$Rt, I64:$Ru)>;
+
+let AddedComplexity = 100 in
+def: Pat<(or (or (shl (HexagonINSERT (i32 (zextloadi8 (add I32:$b, 2))),
+                                     (i32 (extloadi8  (add I32:$b, 3))),
+                                     24, 8),
+                      (i32 16)),
+                 (shl (i32 (zextloadi8 (add I32:$b, 1))), (i32 8))),
+             (zextloadi8 I32:$b)),
+         (A2_swiz (L2_loadri_io I32:$b, 0))>;
+
+def SDTHexagonEXTRACTU:
+  SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+                       SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+def SDTHexagonEXTRACTURP:
+  SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<1>,
+                       SDTCisVT<2, i64>]>;
+
+def HexagonEXTRACTU   : SDNode<"HexagonISD::EXTRACTU",   SDTHexagonEXTRACTU>;
+def HexagonEXTRACTURP : SDNode<"HexagonISD::EXTRACTURP", SDTHexagonEXTRACTURP>;
+
+def: Pat<(HexagonEXTRACTU I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3),
+         (S2_extractu I32:$src1, u5_0ImmPred:$src2, u5_0ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTU I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3),
+         (S2_extractup I64:$src1, u6_0ImmPred:$src2, u6_0ImmPred:$src3)>;
+def: Pat<(HexagonEXTRACTURP I32:$src1, I64:$src2),
+         (S2_extractu_rp I32:$src1, I64:$src2)>;
+def: Pat<(HexagonEXTRACTURP I64:$src1, I64:$src2),
+         (S2_extractup_rp I64:$src1, I64:$src2)>;
+
+def n8_0ImmPred: PatLeaf<(i32 imm), [{
+  int64_t V = N->getSExtValue();
+  return -255 <= V && V <= 0;
+}]>;
+
+// Change the sign of the immediate for Rd=-mpyi(Rs,#u8)
+def: Pat<(mul I32:$src1, (ineg n8_0ImmPred:$src2)),
+         (M2_mpysin IntRegs:$src1, u8_0ImmPred:$src2)>;
+
+multiclass MinMax_pats_p<PatFrag Op, InstHexagon Inst, InstHexagon SwapInst> {
+  defm: T_MinMax_pats<Op, I64, Inst, SwapInst>;
+}
+
+def: Pat<(add (Sext64 I32:$Rs), I64:$Rt),
+         (A2_addsp IntRegs:$Rs, DoubleRegs:$Rt)>;
+
+let AddedComplexity = 200 in {
+  defm: MinMax_pats_p<setge,  A2_maxp,  A2_minp>;
+  defm: MinMax_pats_p<setgt,  A2_maxp,  A2_minp>;
+  defm: MinMax_pats_p<setle,  A2_minp,  A2_maxp>;
+  defm: MinMax_pats_p<setlt,  A2_minp,  A2_maxp>;
+  defm: MinMax_pats_p<setuge, A2_maxup, A2_minup>;
+  defm: MinMax_pats_p<setugt, A2_maxup, A2_minup>;
+  defm: MinMax_pats_p<setule, A2_minup, A2_maxup>;
+  defm: MinMax_pats_p<setult, A2_minup, A2_maxup>;
+}
+
+def callv3 : SDNode<"HexagonISD::CALL", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+def callv3nr : SDNode<"HexagonISD::CALLnr", SDT_SPCall,
+           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>;
+
+
+// Map call instruction
+def : Pat<(callv3 I32:$dst),
+          (J2_callr I32:$dst)>;
+def : Pat<(callv3 tglobaladdr:$dst),
+          (J2_call tglobaladdr:$dst)>;
+def : Pat<(callv3 texternalsym:$dst),
+          (J2_call texternalsym:$dst)>;
+def : Pat<(callv3 tglobaltlsaddr:$dst),
+          (J2_call tglobaltlsaddr:$dst)>;
+
+def : Pat<(callv3nr I32:$dst),
+          (PS_callr_nr I32:$dst)>;
+def : Pat<(callv3nr tglobaladdr:$dst),
+          (PS_call_nr tglobaladdr:$dst)>;
+def : Pat<(callv3nr texternalsym:$dst),
+          (PS_call_nr texternalsym:$dst)>;
+
+
+def addrga: PatLeaf<(i32 AddrGA:$Addr)>;
+def addrgp: PatLeaf<(i32 AddrGP:$Addr)>;
+
+
+// Pats for instruction selection.
+
+// A class to embed the usual comparison patfrags within a zext to i32.
+// The seteq/setne frags use "lhs" and "rhs" as operands, so use the same
+// names, or else the frag's "body" won't match the operands.
+class CmpInReg<PatFrag Op>
+  : PatFrag<(ops node:$lhs, node:$rhs),(i32 (zext (i1 Op.Fragment)))>;
+
+def: T_cmp32_rr_pat<A4_rcmpeq,  CmpInReg<seteq>, i32>;
+def: T_cmp32_rr_pat<A4_rcmpneq, CmpInReg<setne>, i32>;
+
+def: T_cmp32_rr_pat<C4_cmpneq,  setne,  i1>;
+def: T_cmp32_rr_pat<C4_cmplte,  setle,  i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, setule, i1>;
+
+def: T_cmp32_rr_pat<C4_cmplte,  RevCmp<setge>,  i1>;
+def: T_cmp32_rr_pat<C4_cmplteu, RevCmp<setuge>, i1>;
+
+let AddedComplexity = 100 in {
+  def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt),
+                       255), 0)),
+           (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt),
+                       255), 0)),
+           (C2_not (A4_cmpbeq IntRegs:$Rs, IntRegs:$Rt))>;
+  def: Pat<(i1 (seteq (and (xor I32:$Rs, I32:$Rt),
+                           65535), 0)),
+           (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt)>;
+  def: Pat<(i1 (setne (and (xor I32:$Rs, I32:$Rt),
+                           65535), 0)),
+           (C2_not (A4_cmpheq IntRegs:$Rs, IntRegs:$Rt))>;
+}
+
+def: Pat<(i32 (zext (i1 (seteq I32:$Rs, s32_0ImmPred:$s8)))),
+         (A4_rcmpeqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
+def: Pat<(i32 (zext (i1 (setne I32:$Rs, s32_0ImmPred:$s8)))),
+         (A4_rcmpneqi IntRegs:$Rs, s32_0ImmPred:$s8)>;
+
+// Preserve the S2_tstbit_r generation
+def: Pat<(i32 (zext (i1 (setne (i32 (and (i32 (shl 1, I32:$src2)),
+                                         I32:$src1)), 0)))),
+         (C2_muxii (S2_tstbit_r IntRegs:$src1, IntRegs:$src2), 1, 0)>;
+
+// The complexity of the combines involving immediates should be greater
+// than the complexity of the combine with two registers.
+let AddedComplexity = 50 in {
+def: Pat<(HexagonCOMBINE IntRegs:$r, s32_0ImmPred:$i),
+         (A4_combineri IntRegs:$r, s32_0ImmPred:$i)>;
+
+def: Pat<(HexagonCOMBINE s32_0ImmPred:$i, IntRegs:$r),
+         (A4_combineir s32_0ImmPred:$i, IntRegs:$r)>;
+}
+
+// The complexity of the combine with two immediates should be greater than
+// the complexity of a combine involving a register.
+let AddedComplexity = 75 in {
+def: Pat<(HexagonCOMBINE s8_0ImmPred:$s8, u32_0ImmPred:$u6),
+         (A4_combineii imm:$s8, imm:$u6)>;
+def: Pat<(HexagonCOMBINE s32_0ImmPred:$s8, s8_0ImmPred:$S8),
+         (A2_combineii imm:$s8, imm:$S8)>;
+}
+
+
+def ToZext64: OutPatFrag<(ops node:$Rs),
+  (i64 (A4_combineir 0, (i32 $Rs)))>;
+def ToSext64: OutPatFrag<(ops node:$Rs),
+  (i64 (A2_sxtw (i32 $Rs)))>;
+
+// Patterns to generate indexed loads with different forms of the address:
+// - frameindex,
+// - base + offset,
+// - base (without offset).
+multiclass Loadxm_pat<PatFrag Load, ValueType VT, PatFrag ValueMod,
+                      PatLeaf ImmPred, InstHexagon MI> {
+  def: Pat<(VT (Load AddrFI:$fi)),
+           (VT (ValueMod (MI AddrFI:$fi, 0)))>;
+  def: Pat<(VT (Load (add AddrFI:$fi, ImmPred:$Off))),
+           (VT (ValueMod (MI AddrFI:$fi, imm:$Off)))>;
+  def: Pat<(VT (Load (add IntRegs:$Rs, ImmPred:$Off))),
+           (VT (ValueMod (MI IntRegs:$Rs, imm:$Off)))>;
+  def: Pat<(VT (Load I32:$Rs)),
+           (VT (ValueMod (MI IntRegs:$Rs, 0)))>;
+}
+
+defm: Loadxm_pat<extloadi1,   i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi8,   i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<extloadi16,  i64, ToZext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<zextloadi1,  i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi8,  i64, ToZext64, s32_0ImmPred, L2_loadrub_io>;
+defm: Loadxm_pat<zextloadi16, i64, ToZext64, s31_1ImmPred, L2_loadruh_io>;
+defm: Loadxm_pat<sextloadi8,  i64, ToSext64, s32_0ImmPred, L2_loadrb_io>;
+defm: Loadxm_pat<sextloadi16, i64, ToSext64, s31_1ImmPred, L2_loadrh_io>;
+
+// Map Rdd = anyext(Rs) -> Rdd = combine(#0, Rs).
+def: Pat<(Aext64 I32:$src1), (ToZext64 IntRegs:$src1)>;
+
+multiclass T_LoadAbsReg_Pat <PatFrag ldOp, InstHexagon MI, ValueType VT = i32> {
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+                             (HexagonCONST32 tglobaladdr:$src3)))),
+              (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tglobaladdr:$src2)))),
+              (MI IntRegs:$src1, 0, tglobaladdr:$src2)>;
+
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+                             (HexagonCONST32 tconstpool:$src3)))),
+              (MI IntRegs:$src1, u2_0ImmPred:$src2, tconstpool:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tconstpool:$src2)))),
+              (MI IntRegs:$src1, 0, tconstpool:$src2)>;
+
+  def  : Pat <(VT (ldOp (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+                             (HexagonCONST32 tjumptable:$src3)))),
+              (MI IntRegs:$src1, u2_0ImmPred:$src2, tjumptable:$src3)>;
+  def  : Pat <(VT (ldOp (add IntRegs:$src1,
+                             (HexagonCONST32 tjumptable:$src2)))),
+              (MI IntRegs:$src1, 0, tjumptable:$src2)>;
+}
+
+let AddedComplexity  = 60 in {
+defm : T_LoadAbsReg_Pat <sextloadi8, L4_loadrb_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi8, L4_loadrub_ur>;
+defm : T_LoadAbsReg_Pat <extloadi8,  L4_loadrub_ur>;
+
+defm : T_LoadAbsReg_Pat <sextloadi16, L4_loadrh_ur>;
+defm : T_LoadAbsReg_Pat <zextloadi16, L4_loadruh_ur>;
+defm : T_LoadAbsReg_Pat <extloadi16,  L4_loadruh_ur>;
+
+defm : T_LoadAbsReg_Pat <load, L4_loadri_ur>;
+defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, i64>;
+}
+
+// 'def pats' for load instructions with base + register offset and non-zero
+// immediate value. Immediate value is used to left-shift the second
+// register operand.
+class Loadxs_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+  : Pat<(VT (Load (add I32:$Rs,
+                       (i32 (shl I32:$Rt, u2_0ImmPred:$u2))))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2))>;
+
+let AddedComplexity = 40 in {
+  def: Loadxs_pat<extloadi8,   i32, L4_loadrub_rr>;
+  def: Loadxs_pat<zextloadi8,  i32, L4_loadrub_rr>;
+  def: Loadxs_pat<sextloadi8,  i32, L4_loadrb_rr>;
+  def: Loadxs_pat<extloadi16,  i32, L4_loadruh_rr>;
+  def: Loadxs_pat<zextloadi16, i32, L4_loadruh_rr>;
+  def: Loadxs_pat<sextloadi16, i32, L4_loadrh_rr>;
+  def: Loadxs_pat<load,        i32, L4_loadri_rr>;
+  def: Loadxs_pat<load,        i64, L4_loadrd_rr>;
+}
+
+// 'def pats' for load instruction base + register offset and
+// zero immediate value.
+class Loadxs_simple_pat<PatFrag Load, ValueType VT, InstHexagon MI>
+  : Pat<(VT (Load (add I32:$Rs, I32:$Rt))),
+        (VT (MI IntRegs:$Rs, IntRegs:$Rt, 0))>;
+
+let AddedComplexity = 20 in {
+  def: Loadxs_simple_pat<extloadi8,   i32, L4_loadrub_rr>;
+  def: Loadxs_simple_pat<zextloadi8,  i32, L4_loadrub_rr>;
+  def: Loadxs_simple_pat<sextloadi8,  i32, L4_loadrb_rr>;
+  def: Loadxs_simple_pat<extloadi16,  i32, L4_loadruh_rr>;
+  def: Loadxs_simple_pat<zextloadi16, i32, L4_loadruh_rr>;
+  def: Loadxs_simple_pat<sextloadi16, i32, L4_loadrh_rr>;
+  def: Loadxs_simple_pat<load,        i32, L4_loadri_rr>;
+  def: Loadxs_simple_pat<load,        i64, L4_loadrd_rr>;
+}
+
+// zext i1->i64
+def: Pat<(i64 (zext I1:$src1)),
+         (ToZext64 (C2_muxii PredRegs:$src1, 1, 0))>;
+
+// zext i32->i64
+def: Pat<(Zext64 I32:$src1),
+         (ToZext64 IntRegs:$src1)>;
+
+let AddedComplexity = 40 in
+multiclass T_StoreAbsReg_Pats <InstHexagon MI, RegisterClass RC, ValueType VT,
+                           PatFrag stOp> {
+ def : Pat<(stOp (VT RC:$src4),
+                 (add (shl I32:$src1, u2_0ImmPred:$src2),
+                      u32_0ImmPred:$src3)),
+          (MI IntRegs:$src1, u2_0ImmPred:$src2, u32_0ImmPred:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+                 (add (shl IntRegs:$src1, u2_0ImmPred:$src2),
+                      (HexagonCONST32 tglobaladdr:$src3))),
+           (MI IntRegs:$src1, u2_0ImmPred:$src2, tglobaladdr:$src3, RC:$src4)>;
+
+ def : Pat<(stOp (VT RC:$src4),
+                 (add IntRegs:$src1, (HexagonCONST32 tglobaladdr:$src3))),
+           (MI IntRegs:$src1, 0, tglobaladdr:$src3, RC:$src4)>;
+}
+
+defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, i64, store>;
+defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, i32, store>;
+defm : T_StoreAbsReg_Pats <S4_storerb_ur, IntRegs, i32, truncstorei8>;
+defm : T_StoreAbsReg_Pats <S4_storerh_ur, IntRegs, i32, truncstorei16>;
+
+class Storexs_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Ru, (add I32:$Rs,
+                               (i32 (shl I32:$Rt, u2_0ImmPred:$u2)))),
+        (MI IntRegs:$Rs, IntRegs:$Rt, imm:$u2, Value:$Ru)>;
+
+let AddedComplexity = 40 in {
+  def: Storexs_pat<truncstorei8,  I32, S4_storerb_rr>;
+  def: Storexs_pat<truncstorei16, I32, S4_storerh_rr>;
+  def: Storexs_pat<store,         I32, S4_storeri_rr>;
+  def: Storexs_pat<store,         I64, S4_storerd_rr>;
+}
+
+def s30_2ProperPred  : PatLeaf<(i32 imm), [{
+  int64_t v = (int64_t)N->getSExtValue();
+  return isShiftedInt<30,2>(v) && !isShiftedInt<29,3>(v);
+}]>;
+def RoundTo8 : SDNodeXForm<imm, [{
+  int32_t Imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(Imm & -8, SDLoc(N), MVT::i32);
+}]>;
+
+let AddedComplexity = 40 in
+def: Pat<(store I64:$Ru, (add I32:$Rs, s30_2ProperPred:$Off)),
+         (S2_storerd_io (A2_addi I32:$Rs, 4), (RoundTo8 $Off), I64:$Ru)>;
+
+class Store_rr_pat<PatFrag Store, PatFrag Value, InstHexagon MI>
+  : Pat<(Store Value:$Ru, (add I32:$Rs, I32:$Rt)),
+        (MI IntRegs:$Rs, IntRegs:$Rt, 0, Value:$Ru)>;
+
+let AddedComplexity = 20 in {
+  def: Store_rr_pat<truncstorei8,  I32, S4_storerb_rr>;
+  def: Store_rr_pat<truncstorei16, I32, S4_storerh_rr>;
+  def: Store_rr_pat<store,         I32, S4_storeri_rr>;
+  def: Store_rr_pat<store,         I64, S4_storerd_rr>;
+}
+
+
+def IMM_BYTE : SDNodeXForm<imm, [{
+  // -1 etc is  represented as 255 etc
+  // assigning to a byte restores our desired signed value.
+  int8_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_HALF : SDNodeXForm<imm, [{
+  // -1 etc is  represented as 65535 etc
+  // assigning to a short restores our desired signed value.
+  int16_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def IMM_WORD : SDNodeXForm<imm, [{
+  // -1 etc can be represented as 4294967295 etc
+  // Currently, it's not doing this. But some optimization
+  // might convert -1 to a large +ve number.
+  // assigning to a word restores our desired signed value.
+  int32_t imm = N->getSExtValue();
+  return CurDAG->getTargetConstant(imm, SDLoc(N), MVT::i32);
+}]>;
+
+def ToImmByte : OutPatFrag<(ops node:$R), (IMM_BYTE $R)>;
+def ToImmHalf : OutPatFrag<(ops node:$R), (IMM_HALF $R)>;
+def ToImmWord : OutPatFrag<(ops node:$R), (IMM_WORD $R)>;
+
+// Emit store-immediate, but only when the stored value will not be constant-
+// extended. The reason for that is that there is no pass that can optimize
+// constant extenders in store-immediate instructions. In some cases we can
+// end up will a number of such stores, all of which store the same extended
+// value (e.g. after unrolling a loop that initializes floating point array).
+
+// Predicates to determine if the 16-bit immediate is expressible as a sign-
+// extended 8-bit immediate. Store-immediate-halfword will ignore any bits
+// beyond 0..15, so we don't care what is in there.
+
+def i16in8ImmPred: PatLeaf<(i32 imm), [{
+  int64_t v = (int16_t)N->getSExtValue();
+  return v == (int64_t)(int8_t)v;
+}]>;
+
+// Predicates to determine if the 32-bit immediate is expressible as a sign-
+// extended 8-bit immediate.
+def i32in8ImmPred: PatLeaf<(i32 imm), [{
+  int64_t v = (int32_t)N->getSExtValue();
+  return v == (int64_t)(int8_t)v;
+}]>;
+
+
+let AddedComplexity = 40 in {
+  // Even though the offset is not extendable in the store-immediate, we
+  // can still generate the fi# in the base address. If the final offset
+  // is not valid for the instruction, we will replace it with a scratch
+  // register.
+//  def: Storexm_fi_pat <truncstorei8, s32_0ImmPred, ToImmByte, S4_storeirb_io>;
+//  def: Storexm_fi_pat <truncstorei16, i16in8ImmPred, ToImmHalf,
+//                       S4_storeirh_io>;
+//  def: Storexm_fi_pat <store, i32in8ImmPred, ToImmWord, S4_storeiri_io>;
+
+//  defm: Storexm_fi_add_pat <truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
+//                            S4_storeirb_io>;
+//  defm: Storexm_fi_add_pat <truncstorei16, i16in8ImmPred, u6_1ImmPred,
+//                            ToImmHalf, S4_storeirh_io>;
+//  defm: Storexm_fi_add_pat <store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
+//                            S4_storeiri_io>;
+
+  defm: Storexm_add_pat<truncstorei8, s32_0ImmPred, u6_0ImmPred, ToImmByte,
+                        S4_storeirb_io>;
+  defm: Storexm_add_pat<truncstorei16, i16in8ImmPred, u6_1ImmPred, ToImmHalf,
+                        S4_storeirh_io>;
+  defm: Storexm_add_pat<store, i32in8ImmPred, u6_2ImmPred, ToImmWord,
+                        S4_storeiri_io>;
+}
+
+def: Storexm_simple_pat<truncstorei8,  s32_0ImmPred, ToImmByte, S4_storeirb_io>;
+def: Storexm_simple_pat<truncstorei16, s32_0ImmPred, ToImmHalf, S4_storeirh_io>;
+def: Storexm_simple_pat<store,         s32_0ImmPred, ToImmWord, S4_storeiri_io>;
+
+// op(Ps, op(Pt, Pu))
+class LogLog_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, I1:$Pu))),
+        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+// op(Ps, op(Pt, ~Pu))
+class LogLogNot_pat<SDNode Op1, SDNode Op2, InstHexagon MI>
+  : Pat<(i1 (Op1 I1:$Ps, (Op2 I1:$Pt, (not I1:$Pu)))),
+        (MI I1:$Ps, I1:$Pt, I1:$Pu)>;
+
+def: LogLog_pat<and, and, C4_and_and>;
+def: LogLog_pat<and, or,  C4_and_or>;
+def: LogLog_pat<or,  and, C4_or_and>;
+def: LogLog_pat<or,  or,  C4_or_or>;
+
+def: LogLogNot_pat<and, and, C4_and_andn>;
+def: LogLogNot_pat<and, or,  C4_and_orn>;
+def: LogLogNot_pat<or,  and, C4_or_andn>;
+def: LogLogNot_pat<or,  or,  C4_or_orn>;
+
+//===----------------------------------------------------------------------===//
+// PIC: Support for PIC compilations. The patterns and SD nodes defined
+// below are needed to support code generation for PIC
+//===----------------------------------------------------------------------===//
+
+def SDT_HexagonAtGot
+  : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>]>;
+def SDT_HexagonAtPcrel
+  : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// AT_GOT address-of-GOT, address-of-global, offset-in-global
+def HexagonAtGot       : SDNode<"HexagonISD::AT_GOT", SDT_HexagonAtGot>;
+// AT_PCREL address-of-global
+def HexagonAtPcrel     : SDNode<"HexagonISD::AT_PCREL", SDT_HexagonAtPcrel>;
+
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, (i32 0)),
+         (L2_loadri_io I32:$got, imm:$addr)>;
+def: Pat<(HexagonAtGot I32:$got, I32:$addr, s30_2ImmPred:$off),
+         (A2_addi (L2_loadri_io I32:$got, imm:$addr), imm:$off)>;
+def: Pat<(HexagonAtPcrel I32:$addr),
+         (C4_addipc imm:$addr)>;
+
+def: Pat<(i64 (and I64:$Rs, (i64 (not I64:$Rt)))),
+         (A4_andnp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(i64 (or  I64:$Rs, (i64 (not I64:$Rt)))),
+         (A4_ornp DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+def: Pat<(add I32:$Rs, (add I32:$Ru, s32_0ImmPred:$s6)),
+         (S4_addaddi IntRegs:$Rs, IntRegs:$Ru, imm:$s6)>;
+
+// Rd=add(Rs,sub(#s6,Ru))
+def: Pat<(add I32:$src1, (sub s32_0ImmPred:$src2,
+                                        I32:$src3)),
+         (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=sub(add(Rs,#s6),Ru)
+def: Pat<(sub (add I32:$src1, s32_0ImmPred:$src2),
+                   I32:$src3),
+         (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
+
+// Rd=add(sub(Rs,Ru),#s6)
+def: Pat<(add (sub I32:$src1, I32:$src3),
+                   (s32_0ImmPred:$src2)),
+         (S4_subaddi IntRegs:$src1, s32_0ImmPred:$src2, IntRegs:$src3)>;
+
+def: Pat<(xor I64:$dst2,
+              (xor I64:$Rss, I64:$Rtt)),
+         (M4_xor_xacc DoubleRegs:$dst2, DoubleRegs:$Rss, DoubleRegs:$Rtt)>;
+def: Pat<(or I32:$Ru, (and (i32 IntRegs:$_src_), s32_0ImmPred:$s10)),
+         (S4_or_andix IntRegs:$Ru, IntRegs:$_src_, imm:$s10)>;
+
+def: Pat<(or I32:$src1, (and I32:$Rs, s32_0ImmPred:$s10)),
+         (S4_or_andi IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
+
+def: Pat<(or I32:$src1, (or I32:$Rs, s32_0ImmPred:$s10)),
+         (S4_or_ori IntRegs:$src1, IntRegs:$Rs, imm:$s10)>;
+
+
+
+// Count trailing zeros: 64-bit.
+def: Pat<(i32 (trunc (cttz I64:$Rss))), (S2_ct0p I64:$Rss)>;
+
+// Count trailing ones: 64-bit.
+def: Pat<(i32 (trunc (cttz (not I64:$Rss)))), (S2_ct1p I64:$Rss)>;
+
+// Define leading/trailing patterns that require zero-extensions to 64 bits.
+def: Pat<(i64 (ctlz I64:$Rss)), (ToZext64 (S2_cl0p I64:$Rss))>;
+def: Pat<(i64 (cttz I64:$Rss)), (ToZext64 (S2_ct0p I64:$Rss))>;
+def: Pat<(i64 (ctlz (not I64:$Rss))), (ToZext64 (S2_cl1p I64:$Rss))>;
+def: Pat<(i64 (cttz (not I64:$Rss))), (ToZext64 (S2_ct1p I64:$Rss))>;
+
+
+let AddedComplexity = 20 in {   // Complexity greater than cmp reg-imm.
+  def: Pat<(i1 (seteq (and (shl 1, u5_0ImmPred:$u5), I32:$Rs), 0)),
+           (S4_ntstbit_i I32:$Rs, u5_0ImmPred:$u5)>;
+  def: Pat<(i1 (seteq (and (shl 1, I32:$Rt), I32:$Rs), 0)),
+           (S4_ntstbit_r I32:$Rs, I32:$Rt)>;
+}
+
+// Add extra complexity to prefer these instructions over bitsset/bitsclr.
+// The reason is that tstbit/ntstbit can be folded into a compound instruction:
+//   if ([!]tstbit(...)) jump ...
+let AddedComplexity = 100 in
+def: Pat<(i1 (setne (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
+         (S2_tstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
+
+let AddedComplexity = 100 in
+def: Pat<(i1 (seteq (and I32:$Rs, (i32 IsPow2_32:$u5)), (i32 0))),
+         (S4_ntstbit_i I32:$Rs, (Log2_32 imm:$u5))>;
+
+// Do not increase complexity of these patterns. In the DAG, "cmp i8" may be
+// represented as a compare against "value & 0xFF", which is an exact match
+// for cmpb (same for cmph). The patterns below do not contain any additional
+// complexity that would make them preferable, and if they were actually used
+// instead of cmpb/cmph, they would result in a compare against register that
+// is loaded with the byte/half mask (i.e. 0xFF or 0xFFFF).
+def: Pat<(i1 (setne (and I32:$Rs, u6_0ImmPred:$u6), 0)),
+         (C4_nbitsclri I32:$Rs, u6_0ImmPred:$u6)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), 0)),
+         (C4_nbitsclr I32:$Rs, I32:$Rt)>;
+def: Pat<(i1 (setne (and I32:$Rs, I32:$Rt), I32:$Rt)),
+         (C4_nbitsset I32:$Rs, I32:$Rt)>;
+
+
+def: Pat<(add (mul I32:$Rs, u6_0ImmPred:$U6), u32_0ImmPred:$u6),
+         (M4_mpyri_addi imm:$u6, IntRegs:$Rs, imm:$U6)>;
+def: Pat<(add (mul I32:$Rs, I32:$Rt), u32_0ImmPred:$u6),
+         (M4_mpyrr_addi imm:$u6, IntRegs:$Rs, IntRegs:$Rt)>;
+
+def: Pat<(add I32:$src1, (mul I32:$src3, u6_2ImmPred:$src2)),
+         (M4_mpyri_addr_u2 IntRegs:$src1, imm:$src2, IntRegs:$src3)>;
+def: Pat<(add I32:$src1, (mul I32:$src3, u32_0ImmPred:$src2)),
+         (M4_mpyri_addr IntRegs:$src1, IntRegs:$src3, imm:$src2)>;
+
+def: Pat<(add I32:$Ru, (mul (i32 IntRegs:$_src_), I32:$Rs)),
+         (M4_mpyrr_addr IntRegs:$Ru, IntRegs:$_src_, IntRegs:$Rs)>;
+
+def: T_vcmp_pat<A4_vcmpbgt, setgt, v8i8>;
+
+class T_Shift_CommOp_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
+  : Pat<(Op (ShOp IntRegs:$Rx, u5_0ImmPred:$U5), u32_0ImmPred:$u8),
+        (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
+
+let AddedComplexity = 200 in {
+  def : T_Shift_CommOp_pat <S4_addi_asl_ri, add, shl>;
+  def : T_Shift_CommOp_pat <S4_addi_lsr_ri, add, srl>;
+  def : T_Shift_CommOp_pat <S4_andi_asl_ri, and, shl>;
+  def : T_Shift_CommOp_pat <S4_andi_lsr_ri, and, srl>;
+}
+
+let AddedComplexity = 30 in {
+  def : T_Shift_CommOp_pat <S4_ori_asl_ri,  or,  shl>;
+  def : T_Shift_CommOp_pat <S4_ori_lsr_ri,  or,  srl>;
+}
+
+class T_Shift_Op_pat<InstHexagon MI, SDNode Op, SDNode ShOp>
+  : Pat<(Op u32_0ImmPred:$u8, (ShOp IntRegs:$Rx, u5_0ImmPred:$U5)),
+        (MI u32_0ImmPred:$u8, IntRegs:$Rx, u5_0ImmPred:$U5)>;
+
+def : T_Shift_Op_pat <S4_subi_asl_ri, sub, shl>;
+def : T_Shift_Op_pat <S4_subi_lsr_ri, sub, srl>;
+
+let AddedComplexity = 200 in {
+  def: Pat<(add addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
+           (S4_addi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+  def: Pat<(add addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
+           (S4_addi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+  def: Pat<(sub addrga:$addr, (shl I32:$src2, u5_0ImmPred:$src3)),
+           (S4_subi_asl_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+  def: Pat<(sub addrga:$addr, (srl I32:$src2, u5_0ImmPred:$src3)),
+           (S4_subi_lsr_ri addrga:$addr, IntRegs:$src2, u5_0ImmPred:$src3)>;
+}
+
+def: Pat<(shl s6_0ImmPred:$s6, I32:$Rt),
+         (S4_lsli imm:$s6, IntRegs:$Rt)>;
+
+
+//===----------------------------------------------------------------------===//
+// MEMOP
+//===----------------------------------------------------------------------===//
+
+def m5_0Imm8Pred : PatLeaf<(i32 imm), [{
+  int8_t V = N->getSExtValue();
+  return -32 < V && V <= -1;
+}]>;
+
+def m5_0Imm16Pred : PatLeaf<(i32 imm), [{
+  int16_t V = N->getSExtValue();
+  return -32 < V && V <= -1;
+}]>;
+
+def m5_0ImmPred  : PatLeaf<(i32 imm), [{
+  int64_t V = N->getSExtValue();
+  return -31 <= V && V <= -1;
+}]>;
+
+def IsNPow2_8 : PatLeaf<(i32 imm), [{
+  uint8_t NV = ~N->getZExtValue();
+  return isPowerOf2_32(NV);
+}]>;
+
+def IsNPow2_16 : PatLeaf<(i32 imm), [{
+  uint16_t NV = ~N->getZExtValue();
+  return isPowerOf2_32(NV);
+}]>;
+
+def Log2_8 : SDNodeXForm<imm, [{
+  uint8_t V = N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
+}]>;
+
+def Log2_16 : SDNodeXForm<imm, [{
+  uint16_t V = N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(V), SDLoc(N), MVT::i32);
+}]>;
+
+def LogN2_8 : SDNodeXForm<imm, [{
+  uint8_t NV = ~N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
+}]>;
+
+def LogN2_16 : SDNodeXForm<imm, [{
+  uint16_t NV = ~N->getZExtValue();
+  return CurDAG->getTargetConstant(Log2_32(NV), SDLoc(N), MVT::i32);
+}]>;
+
+def NegImm8 : SDNodeXForm<imm, [{
+  int8_t NV = -N->getSExtValue();
+  return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
+}]>;
+
+def NegImm16 : SDNodeXForm<imm, [{
+  int16_t NV = -N->getSExtValue();
+  return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
+}]>;
+
+def NegImm32 : SDNodeXForm<imm, [{
+  int32_t NV = -N->getSExtValue();
+  return CurDAG->getTargetConstant(NV, SDLoc(N), MVT::i32);
+}]>;
+
+def IdImm : SDNodeXForm<imm, [{ return SDValue(N, 0); }]>;
+
+multiclass Memopxr_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
+                              InstHexagon MI> {
+  // Addr: i32
+  def: Pat<(Store (Oper (Load I32:$Rs), I32:$A), I32:$Rs),
+           (MI I32:$Rs, 0, I32:$A)>;
+  // Addr: fi
+  def: Pat<(Store (Oper (Load AddrFI:$Rs), I32:$A), AddrFI:$Rs),
+           (MI AddrFI:$Rs, 0, I32:$A)>;
+}
+
+multiclass Memopxr_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+                           SDNode Oper, InstHexagon MI> {
+  // Addr: i32
+  def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), I32:$A),
+                  (add I32:$Rs, ImmPred:$Off)),
+           (MI I32:$Rs, imm:$Off, I32:$A)>;
+  def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), I32:$A),
+                  (IsOrAdd I32:$Rs, ImmPred:$Off)),
+           (MI I32:$Rs, imm:$Off, I32:$A)>;
+  // Addr: fi
+  def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), I32:$A),
+                  (add AddrFI:$Rs, ImmPred:$Off)),
+           (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
+  def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), I32:$A),
+                  (IsOrAdd AddrFI:$Rs, ImmPred:$Off)),
+           (MI AddrFI:$Rs, imm:$Off, I32:$A)>;
+}
+
+multiclass Memopxr_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+                       SDNode Oper, InstHexagon MI> {
+  defm: Memopxr_simple_pat <Load, Store,          Oper, MI>;
+  defm: Memopxr_add_pat    <Load, Store, ImmPred, Oper, MI>;
+}
+
+let AddedComplexity = 180 in {
+  // add reg
+  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, add,
+        /*anyext*/  L4_add_memopb_io>;
+  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, add,
+        /*sext*/    L4_add_memopb_io>;
+  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, add,
+        /*zext*/    L4_add_memopb_io>;
+  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, add,
+        /*anyext*/  L4_add_memoph_io>;
+  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, add,
+        /*sext*/    L4_add_memoph_io>;
+  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, add,
+        /*zext*/    L4_add_memoph_io>;
+  defm: Memopxr_pat<load, store, u6_2ImmPred, add, L4_add_memopw_io>;
+
+  // sub reg
+  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, sub,
+        /*anyext*/  L4_sub_memopb_io>;
+  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub,
+        /*sext*/    L4_sub_memopb_io>;
+  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub,
+        /*zext*/    L4_sub_memopb_io>;
+  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, sub,
+        /*anyext*/  L4_sub_memoph_io>;
+  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub,
+        /*sext*/    L4_sub_memoph_io>;
+  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub,
+        /*zext*/    L4_sub_memoph_io>;
+  defm: Memopxr_pat<load, store, u6_2ImmPred, sub, L4_sub_memopw_io>;
+
+  // and reg
+  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, and,
+        /*anyext*/  L4_and_memopb_io>;
+  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, and,
+        /*sext*/    L4_and_memopb_io>;
+  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, and,
+        /*zext*/    L4_and_memopb_io>;
+  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, and,
+        /*anyext*/  L4_and_memoph_io>;
+  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, and,
+        /*sext*/    L4_and_memoph_io>;
+  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, and,
+        /*zext*/    L4_and_memoph_io>;
+  defm: Memopxr_pat<load, store, u6_2ImmPred, and, L4_and_memopw_io>;
+
+  // or reg
+  defm: Memopxr_pat<extloadi8, truncstorei8, u6_0ImmPred, or,
+        /*anyext*/  L4_or_memopb_io>;
+  defm: Memopxr_pat<sextloadi8, truncstorei8, u6_0ImmPred, or,
+        /*sext*/    L4_or_memopb_io>;
+  defm: Memopxr_pat<zextloadi8, truncstorei8, u6_0ImmPred, or,
+        /*zext*/    L4_or_memopb_io>;
+  defm: Memopxr_pat<extloadi16, truncstorei16, u6_1ImmPred, or,
+        /*anyext*/  L4_or_memoph_io>;
+  defm: Memopxr_pat<sextloadi16, truncstorei16, u6_1ImmPred, or,
+        /*sext*/    L4_or_memoph_io>;
+  defm: Memopxr_pat<zextloadi16, truncstorei16, u6_1ImmPred, or,
+        /*zext*/    L4_or_memoph_io>;
+  defm: Memopxr_pat<load, store, u6_2ImmPred, or, L4_or_memopw_io>;
+}
+
+
+multiclass Memopxi_simple_pat<PatFrag Load, PatFrag Store, SDNode Oper,
+                              PatFrag Arg, SDNodeXForm ArgMod,
+                              InstHexagon MI> {
+  // Addr: i32
+  def: Pat<(Store (Oper (Load I32:$Rs), Arg:$A), I32:$Rs),
+           (MI I32:$Rs, 0, (ArgMod Arg:$A))>;
+  // Addr: fi
+  def: Pat<(Store (Oper (Load AddrFI:$Rs), Arg:$A), AddrFI:$Rs),
+           (MI AddrFI:$Rs, 0, (ArgMod Arg:$A))>;
+}
+
+multiclass Memopxi_add_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+                           SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
+                           InstHexagon MI> {
+  // Addr: i32
+  def: Pat<(Store (Oper (Load (add I32:$Rs, ImmPred:$Off)), Arg:$A),
+                  (add I32:$Rs, ImmPred:$Off)),
+           (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+  def: Pat<(Store (Oper (Load (IsOrAdd I32:$Rs, ImmPred:$Off)), Arg:$A),
+                  (IsOrAdd I32:$Rs, ImmPred:$Off)),
+           (MI I32:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+  // Addr: fi
+  def: Pat<(Store (Oper (Load (add AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
+                  (add AddrFI:$Rs, ImmPred:$Off)),
+           (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+  def: Pat<(Store (Oper (Load (IsOrAdd AddrFI:$Rs, ImmPred:$Off)), Arg:$A),
+                  (IsOrAdd AddrFI:$Rs, ImmPred:$Off)),
+           (MI AddrFI:$Rs, imm:$Off, (ArgMod Arg:$A))>;
+}
+
+multiclass Memopxi_pat<PatFrag Load, PatFrag Store, PatFrag ImmPred,
+                       SDNode Oper, PatFrag Arg, SDNodeXForm ArgMod,
+                       InstHexagon MI> {
+  defm: Memopxi_simple_pat <Load, Store,          Oper, Arg, ArgMod, MI>;
+  defm: Memopxi_add_pat    <Load, Store, ImmPred, Oper, Arg, ArgMod, MI>;
+}
+
+
+let AddedComplexity = 200 in {
+  // add imm
+  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
+        /*anyext*/  IdImm, L4_iadd_memopb_io>;
+  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
+        /*sext*/    IdImm, L4_iadd_memopb_io>;
+  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, u5_0ImmPred,
+        /*zext*/    IdImm, L4_iadd_memopb_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
+        /*anyext*/  IdImm, L4_iadd_memoph_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
+        /*sext*/    IdImm, L4_iadd_memoph_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, u5_0ImmPred,
+        /*zext*/    IdImm, L4_iadd_memoph_io>;
+  defm: Memopxi_pat<load, store, u6_2ImmPred, add, u5_0ImmPred, IdImm,
+                    L4_iadd_memopw_io>;
+  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
+        /*anyext*/  NegImm8, L4_iadd_memopb_io>;
+  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
+        /*sext*/    NegImm8, L4_iadd_memopb_io>;
+  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, m5_0Imm8Pred,
+        /*zext*/    NegImm8, L4_iadd_memopb_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
+        /*anyext*/  NegImm16, L4_iadd_memoph_io>;
+  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
+        /*sext*/    NegImm16, L4_iadd_memoph_io>;
+  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, m5_0Imm16Pred,
+        /*zext*/    NegImm16, L4_iadd_memoph_io>;
+  defm: Memopxi_pat<load, store, u6_2ImmPred, sub, m5_0ImmPred, NegImm32,
+                    L4_iadd_memopw_io>;
+
+  // sub imm
+  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
+        /*anyext*/  IdImm, L4_isub_memopb_io>;
+  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
+        /*sext*/    IdImm, L4_isub_memopb_io>;
+  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, sub, u5_0ImmPred,
+        /*zext*/    IdImm, L4_isub_memopb_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
+        /*anyext*/  IdImm, L4_isub_memoph_io>;
+  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
+        /*sext*/    IdImm, L4_isub_memoph_io>;
+  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, sub, u5_0ImmPred,
+        /*zext*/    IdImm, L4_isub_memoph_io>;
+  defm: Memopxi_pat<load, store, u6_2ImmPred, sub, u5_0ImmPred, IdImm,
+                    L4_isub_memopw_io>;
+  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
+        /*anyext*/  NegImm8, L4_isub_memopb_io>;
+  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
+        /*sext*/    NegImm8, L4_isub_memopb_io>;
+  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, add, m5_0Imm8Pred,
+        /*zext*/    NegImm8, L4_isub_memopb_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
+        /*anyext*/  NegImm16, L4_isub_memoph_io>;
+  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
+        /*sext*/    NegImm16, L4_isub_memoph_io>;
+  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, add, m5_0Imm16Pred,
+        /*zext*/    NegImm16, L4_isub_memoph_io>;
+  defm: Memopxi_pat<load, store, u6_2ImmPred, add, m5_0ImmPred, NegImm32,
+                    L4_isub_memopw_io>;
+
+  // clrbit imm
+  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
+        /*anyext*/  LogN2_8, L4_iand_memopb_io>;
+  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
+        /*sext*/    LogN2_8, L4_iand_memopb_io>;
+  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, and, IsNPow2_8,
+        /*zext*/    LogN2_8, L4_iand_memopb_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
+        /*anyext*/  LogN2_16, L4_iand_memoph_io>;
+  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
+        /*sext*/    LogN2_16, L4_iand_memoph_io>;
+  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, and, IsNPow2_16,
+        /*zext*/    LogN2_16, L4_iand_memoph_io>;
+  defm: Memopxi_pat<load, store, u6_2ImmPred, and, IsNPow2_32,
+		    LogN2_32, L4_iand_memopw_io>;
+
+  // setbit imm
+  defm: Memopxi_pat<extloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
+        /*anyext*/  Log2_8, L4_ior_memopb_io>;
+  defm: Memopxi_pat<sextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
+        /*sext*/    Log2_8, L4_ior_memopb_io>;
+  defm: Memopxi_pat<zextloadi8, truncstorei8, u6_0ImmPred, or, IsPow2_32,
+        /*zext*/    Log2_8, L4_ior_memopb_io>;
+  defm: Memopxi_pat<extloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
+        /*anyext*/  Log2_16, L4_ior_memoph_io>;
+  defm: Memopxi_pat<sextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
+        /*sext*/    Log2_16, L4_ior_memoph_io>;
+  defm: Memopxi_pat<zextloadi16, truncstorei16, u6_1ImmPred, or, IsPow2_32,
+        /*zext*/    Log2_16, L4_ior_memoph_io>;
+  defm: Memopxi_pat<load, store, u6_2ImmPred, or, IsPow2_32,
+		    Log2_32, L4_ior_memopw_io>;
+}
+
+def : T_CMP_pat <C4_cmpneqi,  setne,  s32_0ImmPred>;
+def : T_CMP_pat <C4_cmpltei,  setle,  s32_0ImmPred>;
+def : T_CMP_pat <C4_cmplteui, setule, u9_0ImmPred>;
+
+// Map cmplt(Rs, Imm) -> !cmpgt(Rs, Imm-1).
+def: Pat<(i1 (setlt I32:$src1, s32_0ImmPred:$src2)),
+         (C4_cmpltei IntRegs:$src1, (SDEC1 s32_0ImmPred:$src2))>;
+
+// rs != rt -> !(rs == rt).
+def: Pat<(i1 (setne I32:$src1, s32_0ImmPred:$src2)),
+         (C4_cmpneqi IntRegs:$src1, s32_0ImmPred:$src2)>;
+
+// For the sequence
+//   zext( setult ( and(Rs, 255), u8))
+// Use the isdigit transformation below
+
+
+def u7_0PosImmPred : ImmLeaf<i32, [{
+  // True if the immediate fits in an 7-bit unsigned field and
+  // is strictly greater than 0.
+  return Imm > 0 && isUInt<7>(Imm);
+}]>;
+
+
+// Generate code of the form 'C2_muxii(cmpbgtui(Rdd, C-1),0,1)'
+// for C code of the form r = ((c>='0') & (c<='9')) ? 1 : 0;.
+// The isdigit transformation relies on two 'clever' aspects:
+// 1) The data type is unsigned which allows us to eliminate a zero test after
+//    biasing the expression by 48. We are depending on the representation of
+//    the unsigned types, and semantics.
+// 2) The front end has converted <= 9 into < 10 on entry to LLVM
+//
+// For the C code:
+//   retval = ((c>='0') & (c<='9')) ? 1 : 0;
+// The code is transformed upstream of llvm into
+//   retval = (c-48) < 10 ? 1 : 0;
+
+let AddedComplexity = 139 in
+def: Pat<(i32 (zext (i1 (setult (and I32:$src1, 255), u7_0PosImmPred:$src2)))),
+         (C2_muxii (A4_cmpbgtui IntRegs:$src1, (UDEC1 imm:$src2)), 0, 1)>;
+
+class Loada_pat<PatFrag Load, ValueType VT, PatFrag Addr, InstHexagon MI>
+  : Pat<(VT (Load Addr:$addr)), (MI Addr:$addr)>;
+
+class Loadam_pat<PatFrag Load, ValueType VT, PatFrag Addr, PatFrag ValueMod,
+                 InstHexagon MI>
+  : Pat<(VT (Load Addr:$addr)), (ValueMod (MI Addr:$addr))>;
+
+class Storea_pat<PatFrag Store, PatFrag Value, PatFrag Addr, InstHexagon MI>
+  : Pat<(Store Value:$val, Addr:$addr), (MI Addr:$addr, Value:$val)>;
+
+class Stoream_pat<PatFrag Store, PatFrag Value, PatFrag Addr, PatFrag ValueMod,
+                  InstHexagon MI>
+  : Pat<(Store Value:$val, Addr:$addr),
+        (MI Addr:$addr, (ValueMod Value:$val))>;
+
+let AddedComplexity = 30 in {
+  def: Storea_pat<truncstorei8,  I32, addrga, PS_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
+  def: Storea_pat<store,         I32, addrga, PS_storeriabs>;
+  def: Storea_pat<store,         I64, addrga, PS_storerdabs>;
+
+  def: Stoream_pat<truncstorei8,  I64, addrga, LoReg, PS_storerbabs>;
+  def: Stoream_pat<truncstorei16, I64, addrga, LoReg, PS_storerhabs>;
+  def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
+}
+
+def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, S2_storerbgp>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, S2_storerhgp>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, S2_storerigp>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, S2_storerdgp>;
+
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrgp, S2_storerbgp>;
+  def: Storea_pat<truncstorei16, I32, addrgp, S2_storerhgp>;
+  def: Storea_pat<store,         I32, addrgp, S2_storerigp>;
+  def: Storea_pat<store,         I64, addrgp, S2_storerdgp>;
+
+  // Map from "i1 = constant<-1>; memw(CONST32(#foo)) = i1"
+  //       to "r0 = 1; memw(#foo) = r0"
+  let AddedComplexity = 100 in
+  def: Pat<(store (i1 -1), (HexagonCONST32_GP tglobaladdr:$global)),
+           (S2_storerbgp tglobaladdr:$global, (A2_tfrsi 1))>;
+}
+
+class LoadAbs_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+  : Pat <(VT (ldOp (HexagonCONST32 tglobaladdr:$absaddr))),
+         (VT (MI tglobaladdr:$absaddr))>;
+
+let AddedComplexity  = 30 in {
+  def: LoadAbs_pats <load,        PS_loadriabs>;
+  def: LoadAbs_pats <zextloadi1,  PS_loadrubabs>;
+  def: LoadAbs_pats <sextloadi8,  PS_loadrbabs>;
+  def: LoadAbs_pats <extloadi8,   PS_loadrubabs>;
+  def: LoadAbs_pats <zextloadi8,  PS_loadrubabs>;
+  def: LoadAbs_pats <sextloadi16, PS_loadrhabs>;
+  def: LoadAbs_pats <extloadi16,  PS_loadruhabs>;
+  def: LoadAbs_pats <zextloadi16, PS_loadruhabs>;
+  def: LoadAbs_pats <load,        PS_loadrdabs, i64>;
+}
+
+let AddedComplexity  = 30 in
+def: Pat<(i64 (zextloadi1 (HexagonCONST32 tglobaladdr:$absaddr))),
+         (ToZext64 (PS_loadrubabs tglobaladdr:$absaddr))>;
+
+def: Loada_pat<atomic_load_8,  i32, addrgp, L2_loadrubgp>;
+def: Loada_pat<atomic_load_16, i32, addrgp, L2_loadruhgp>;
+def: Loada_pat<atomic_load_32, i32, addrgp, L2_loadrigp>;
+def: Loada_pat<atomic_load_64, i64, addrgp, L2_loadrdgp>;
+
+def: Loadam_pat<load, i1, addrga, I32toI1, PS_loadrubabs>;
+def: Loadam_pat<load, i1, addrgp, I32toI1, L2_loadrubgp>;
+
+def: Stoream_pat<store, I1, addrga, I1toI32, PS_storerbabs>;
+def: Stoream_pat<store, I1, addrgp, I1toI32, S2_storerbgp>;
+
+// Map from load(globaladdress) -> mem[u][bhwd](#foo)
+class LoadGP_pats <PatFrag ldOp, InstHexagon MI, ValueType VT = i32>
+  : Pat <(VT (ldOp (HexagonCONST32_GP tglobaladdr:$global))),
+         (VT (MI tglobaladdr:$global))>;
+
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <extloadi8,   L2_loadrubgp>;
+  def: LoadGP_pats <sextloadi8,  L2_loadrbgp>;
+  def: LoadGP_pats <zextloadi8,  L2_loadrubgp>;
+  def: LoadGP_pats <extloadi16,  L2_loadruhgp>;
+  def: LoadGP_pats <sextloadi16, L2_loadrhgp>;
+  def: LoadGP_pats <zextloadi16, L2_loadruhgp>;
+  def: LoadGP_pats <load,        L2_loadrigp>;
+  def: LoadGP_pats <load,        L2_loadrdgp, i64>;
+}
+
+// When the Interprocedural Global Variable optimizer realizes that a certain
+// global variable takes only two constant values, it shrinks the global to
+// a boolean. Catch those loads here in the following 3 patterns.
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <extloadi1, L2_loadrubgp>;
+  def: LoadGP_pats <zextloadi1, L2_loadrubgp>;
+}
+
+// Transfer global address into a register
+def: Pat<(HexagonCONST32 tglobaladdr:$Rs),      (A2_tfrsi imm:$Rs)>;
+def: Pat<(HexagonCONST32_GP tblockaddress:$Rs), (A2_tfrsi imm:$Rs)>;
+def: Pat<(HexagonCONST32_GP tglobaladdr:$Rs),   (A2_tfrsi imm:$Rs)>;
+
+let AddedComplexity  = 30 in {
+  def: Storea_pat<truncstorei8,  I32, u32_0ImmPred, PS_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, u32_0ImmPred, PS_storerhabs>;
+  def: Storea_pat<store,         I32, u32_0ImmPred, PS_storeriabs>;
+}
+
+let AddedComplexity  = 30 in {
+  def: Loada_pat<load,        i32, u32_0ImmPred, PS_loadriabs>;
+  def: Loada_pat<sextloadi8,  i32, u32_0ImmPred, PS_loadrbabs>;
+  def: Loada_pat<zextloadi8,  i32, u32_0ImmPred, PS_loadrubabs>;
+  def: Loada_pat<sextloadi16, i32, u32_0ImmPred, PS_loadrhabs>;
+  def: Loada_pat<zextloadi16, i32, u32_0ImmPred, PS_loadruhabs>;
+}
+
+// Indexed store word - global address.
+// memw(Rs+#u6:2)=#S8
+let AddedComplexity = 100 in
+defm: Storex_add_pat<store, addrga, u6_2ImmPred, S4_storeiri_io>;
+
+// Load from a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+  def: Loada_pat<extloadi8,   i32, addrga, PS_loadrubabs>;
+  def: Loada_pat<sextloadi8,  i32, addrga, PS_loadrbabs>;
+  def: Loada_pat<zextloadi8,  i32, addrga, PS_loadrubabs>;
+
+  def: Loada_pat<extloadi16,  i32, addrga, PS_loadruhabs>;
+  def: Loada_pat<sextloadi16, i32, addrga, PS_loadrhabs>;
+  def: Loada_pat<zextloadi16, i32, addrga, PS_loadruhabs>;
+
+  def: Loada_pat<load,        i32, addrga, PS_loadriabs>;
+  def: Loada_pat<load,        i64, addrga, PS_loadrdabs>;
+}
+
+// Store to a global address that has only one use in the current basic block.
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrga, PS_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrga, PS_storerhabs>;
+  def: Storea_pat<store,         I32, addrga, PS_storeriabs>;
+  def: Storea_pat<store,         I64, addrga, PS_storerdabs>;
+
+  def: Stoream_pat<truncstorei32, I64, addrga, LoReg, PS_storeriabs>;
+}
+
+// i8/i16/i32 -> i64 loads
+// We need a complexity of 120 here to override preceding handling of
+// zextload.
+let AddedComplexity = 120 in {
+  def: Loadam_pat<extloadi8,   i64, addrga, ToZext64, PS_loadrubabs>;
+  def: Loadam_pat<sextloadi8,  i64, addrga, ToSext64, PS_loadrbabs>;
+  def: Loadam_pat<zextloadi8,  i64, addrga, ToZext64, PS_loadrubabs>;
+
+  def: Loadam_pat<extloadi16,  i64, addrga, ToZext64, PS_loadruhabs>;
+  def: Loadam_pat<sextloadi16, i64, addrga, ToSext64, PS_loadrhabs>;
+  def: Loadam_pat<zextloadi16, i64, addrga, ToZext64, PS_loadruhabs>;
+
+  def: Loadam_pat<extloadi32,  i64, addrga, ToZext64, PS_loadriabs>;
+  def: Loadam_pat<sextloadi32, i64, addrga, ToSext64, PS_loadriabs>;
+  def: Loadam_pat<zextloadi32, i64, addrga, ToZext64, PS_loadriabs>;
+}
+
+let AddedComplexity = 100 in {
+  def: Loada_pat<extloadi8,   i32, addrgp, PS_loadrubabs>;
+  def: Loada_pat<sextloadi8,  i32, addrgp, PS_loadrbabs>;
+  def: Loada_pat<zextloadi8,  i32, addrgp, PS_loadrubabs>;
+
+  def: Loada_pat<extloadi16,  i32, addrgp, PS_loadruhabs>;
+  def: Loada_pat<sextloadi16, i32, addrgp, PS_loadrhabs>;
+  def: Loada_pat<zextloadi16, i32, addrgp, PS_loadruhabs>;
+
+  def: Loada_pat<load,        i32, addrgp, PS_loadriabs>;
+  def: Loada_pat<load,        i64, addrgp, PS_loadrdabs>;
+}
+
+let AddedComplexity = 100 in {
+  def: Storea_pat<truncstorei8,  I32, addrgp, PS_storerbabs>;
+  def: Storea_pat<truncstorei16, I32, addrgp, PS_storerhabs>;
+  def: Storea_pat<store,         I32, addrgp, PS_storeriabs>;
+  def: Storea_pat<store,         I64, addrgp, PS_storerdabs>;
+}
+
+def: Loada_pat<atomic_load_8,  i32, addrgp, PS_loadrubabs>;
+def: Loada_pat<atomic_load_16, i32, addrgp, PS_loadruhabs>;
+def: Loada_pat<atomic_load_32, i32, addrgp, PS_loadriabs>;
+def: Loada_pat<atomic_load_64, i64, addrgp, PS_loadrdabs>;
+
+def: Storea_pat<SwapSt<atomic_store_8>,  I32, addrgp, PS_storerbabs>;
+def: Storea_pat<SwapSt<atomic_store_16>, I32, addrgp, PS_storerhabs>;
+def: Storea_pat<SwapSt<atomic_store_32>, I32, addrgp, PS_storeriabs>;
+def: Storea_pat<SwapSt<atomic_store_64>, I64, addrgp, PS_storerdabs>;
+
+def: Pat<(or (or (or (shl (i64 (zext (and I32:$b, (i32 65535)))), (i32 16)),
+                     (i64 (zext (i32 (and I32:$a, (i32 65535)))))),
+                 (shl (i64 (anyext (and I32:$c, (i32 65535)))), (i32 32))),
+             (shl (Aext64 I32:$d), (i32 48))),
+         (A2_combinew (A2_combine_ll I32:$d, I32:$c),
+                      (A2_combine_ll I32:$b, I32:$a))>;
+
+// We need custom lowering of ISD::PREFETCH into HexagonISD::DCFETCH
+// because the SDNode ISD::PREFETCH has properties MayLoad and MayStore.
+// We don't really want either one here.
+def SDTHexagonDCFETCH : SDTypeProfile<0, 2, [SDTCisPtrTy<0>,SDTCisInt<1>]>;
+def HexagonDCFETCH : SDNode<"HexagonISD::DCFETCH", SDTHexagonDCFETCH,
+                            [SDNPHasChain]>;
+
+def: Pat<(HexagonDCFETCH IntRegs:$Rs, u11_3ImmPred:$u11_3),
+         (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
+def: Pat<(HexagonDCFETCH (i32 (add IntRegs:$Rs, u11_3ImmPred:$u11_3)), (i32 0)),
+         (Y2_dcfetchbo IntRegs:$Rs, imm:$u11_3)>;
+
+def f32ImmPred : PatLeaf<(f32 fpimm:$F)>;
+def f64ImmPred : PatLeaf<(f64 fpimm:$F)>;
+
+def ftoi : SDNodeXForm<fpimm, [{
+  APInt I = N->getValueAPF().bitcastToAPInt();
+  return CurDAG->getTargetConstant(I.getZExtValue(), SDLoc(N),
+                                   MVT::getIntegerVT(I.getBitWidth()));
+}]>;
+
+
+def: Pat<(sra (i64 (add (sra I64:$src1, u6_0ImmPred:$src2), 1)), (i32 1)),
+         (S2_asr_i_p_rnd I64:$src1, imm:$src2)>;
+
+def SDTHexagonI32I64: SDTypeProfile<1, 1, [SDTCisVT<0, i32>,
+                                           SDTCisVT<1, i64>]>;
+def HexagonPOPCOUNT: SDNode<"HexagonISD::POPCOUNT", SDTHexagonI32I64>;
+
+def: Pat<(HexagonPOPCOUNT I64:$Rss), (S5_popcountp I64:$Rss)>;
+
+let AddedComplexity = 20 in {
+  defm: Loadx_pat<load, f32, s30_2ImmPred, L2_loadri_io>;
+  defm: Loadx_pat<load, f64, s29_3ImmPred, L2_loadrd_io>;
+}
+
+let AddedComplexity = 60 in {
+  defm : T_LoadAbsReg_Pat <load, L4_loadri_ur, f32>;
+  defm : T_LoadAbsReg_Pat <load, L4_loadrd_ur, f64>;
+}
+
+let AddedComplexity = 40 in {
+  def: Loadxs_pat<load, f32, L4_loadri_rr>;
+  def: Loadxs_pat<load, f64, L4_loadrd_rr>;
+}
+
+let AddedComplexity = 20 in {
+  def: Loadxs_simple_pat<load, f32, L4_loadri_rr>;
+  def: Loadxs_simple_pat<load, f64, L4_loadrd_rr>;
+}
+
+let AddedComplexity  = 80 in {
+  def: Loada_pat<load, f32, u32_0ImmPred, PS_loadriabs>;
+  def: Loada_pat<load, f32, addrga, PS_loadriabs>;
+  def: Loada_pat<load, f64, addrga, PS_loadrdabs>;
+}
+
+let AddedComplexity = 100 in {
+  def: LoadGP_pats <load, L2_loadrigp, f32>;
+  def: LoadGP_pats <load, L2_loadrdgp, f64>;
+}
+
+let AddedComplexity = 20 in {
+  defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+  defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+}
+
+// Simple patterns should be tried with the least priority.
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
+let AddedComplexity = 60 in {
+  defm : T_StoreAbsReg_Pats <S4_storeri_ur, IntRegs, f32, store>;
+  defm : T_StoreAbsReg_Pats <S4_storerd_ur, DoubleRegs, f64, store>;
+}
+
+let AddedComplexity = 40 in {
+  def: Storexs_pat<store, F32, S4_storeri_rr>;
+  def: Storexs_pat<store, F64, S4_storerd_rr>;
+}
+
+let AddedComplexity = 20 in {
+  def: Store_rr_pat<store, F32, S4_storeri_rr>;
+  def: Store_rr_pat<store, F64, S4_storerd_rr>;
+}
+
+let AddedComplexity = 80 in {
+  def: Storea_pat<store, F32, addrga, PS_storeriabs>;
+  def: Storea_pat<store, F64, addrga, PS_storerdabs>;
+}
+
+let AddedComplexity = 100 in {
+  def: Storea_pat<store, F32, addrgp, S2_storerigp>;
+  def: Storea_pat<store, F64, addrgp, S2_storerdgp>;
+}
+
+defm: Storex_pat<store, F32, s30_2ImmPred, S2_storeri_io>;
+defm: Storex_pat<store, F64, s29_3ImmPred, S2_storerd_io>;
+def: Storex_simple_pat<store, F32, S2_storeri_io>;
+def: Storex_simple_pat<store, F64, S2_storerd_io>;
+
+def: Pat<(fadd F32:$src1, F32:$src2),
+         (F2_sfadd F32:$src1, F32:$src2)>;
+
+def: Pat<(fsub F32:$src1, F32:$src2),
+         (F2_sfsub F32:$src1, F32:$src2)>;
+
+def: Pat<(fmul F32:$src1, F32:$src2),
+         (F2_sfmpy F32:$src1, F32:$src2)>;
+
+let Predicates = [HasV5T] in {
+  def: Pat<(f32 (fminnum F32:$Rs, F32:$Rt)), (F2_sfmin F32:$Rs, F32:$Rt)>;
+  def: Pat<(f32 (fmaxnum F32:$Rs, F32:$Rt)), (F2_sfmax F32:$Rs, F32:$Rt)>;
+}
+
+let AddedComplexity = 100, Predicates = [HasV5T] in {
+  class SfSel12<PatFrag Cmp, InstHexagon MI>
+    : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rs, F32:$Rt),
+          (MI F32:$Rs, F32:$Rt)>;
+  class SfSel21<PatFrag Cmp, InstHexagon MI>
+    : Pat<(select (i1 (Cmp F32:$Rs, F32:$Rt)), F32:$Rt, F32:$Rs),
+          (MI F32:$Rs, F32:$Rt)>;
+
+  def: SfSel12<setolt, F2_sfmin>;
+  def: SfSel12<setole, F2_sfmin>;
+  def: SfSel12<setogt, F2_sfmax>;
+  def: SfSel12<setoge, F2_sfmax>;
+  def: SfSel21<setolt, F2_sfmax>;
+  def: SfSel21<setole, F2_sfmax>;
+  def: SfSel21<setogt, F2_sfmin>;
+  def: SfSel21<setoge, F2_sfmin>;
+}
+
+class T_fcmp32_pat<PatFrag OpNode, InstHexagon MI>
+  : Pat<(i1 (OpNode F32:$src1, F32:$src2)),
+        (MI F32:$src1, F32:$src2)>;
+class T_fcmp64_pat<PatFrag OpNode, InstHexagon MI>
+  : Pat<(i1 (OpNode F64:$src1, F64:$src2)),
+        (MI F64:$src1, F64:$src2)>;
+
+def: T_fcmp32_pat<setoge, F2_sfcmpge>;
+def: T_fcmp32_pat<setuo,  F2_sfcmpuo>;
+def: T_fcmp32_pat<setoeq, F2_sfcmpeq>;
+def: T_fcmp32_pat<setogt, F2_sfcmpgt>;
+
+def: T_fcmp64_pat<setoge, F2_dfcmpge>;
+def: T_fcmp64_pat<setuo,  F2_dfcmpuo>;
+def: T_fcmp64_pat<setoeq, F2_dfcmpeq>;
+def: T_fcmp64_pat<setogt, F2_dfcmpgt>;
+
+let Predicates = [HasV5T] in
+multiclass T_fcmp_pats<PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+           (IntMI F32:$src1, F32:$src2)>;
+  // DoubleRegs
+  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+           (DoubleMI F64:$src1, F64:$src2)>;
+}
+
+defm : T_fcmp_pats <seteq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : T_fcmp_pats <setgt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : T_fcmp_pats <setge, F2_sfcmpge, F2_dfcmpge>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for unordered gt, ge, eq operations.
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass unord_Pats <PatFrag cmpOp, InstHexagon IntMI, InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (cmpOp F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (IntMI F32:$src1, F32:$src2))>;
+
+  // DoubleRegs
+  def: Pat<(i1 (cmpOp F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : unord_Pats <setuge, F2_sfcmpge, F2_dfcmpge>;
+defm : unord_Pats <setugt, F2_sfcmpgt, F2_dfcmpgt>;
+defm : unord_Pats <setueq, F2_sfcmpeq, F2_dfcmpeq>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setoeq(op1, op2), 0) -> not(setoeq(op1, op2))
+// seteq(setoeq(op1, op2), 1) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 0) -> setoeq(op1, op2)
+// setne(setoeq(op1, op2), 1) -> not(setoeq(op1, op2))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordgePats <PatFrag cmpOp, InstHexagon IntMI,
+                         InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (C2_not (IntMI F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (IntMI F32:$src1, F32:$src2)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (IntMI F32:$src1, F32:$src2)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (C2_not (IntMI F32:$src1, F32:$src2))>;
+
+  // DoubleRegs
+  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+  def : Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+            (DoubleMI F64:$src1, F64:$src2)>;
+  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+            (DoubleMI F64:$src1, F64:$src2)>;
+  def : Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+            (C2_not (DoubleMI F64:$src1, F64:$src2))>;
+}
+
+defm : eq_ordgePats<setoeq, F2_sfcmpeq, F2_dfcmpeq>;
+defm : eq_ordgePats<setoge, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordgePats<setogt, F2_sfcmpgt, F2_dfcmpgt>;
+
+//===----------------------------------------------------------------------===//
+// Multiclass to define 'Def Pats' for the following dags:
+// seteq(setolt(op1, op2), 0) -> not(setogt(op2, op1))
+// seteq(setolt(op1, op2), 1) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 0) -> setogt(op2, op1)
+// setne(setolt(op1, op2), 1) -> not(setogt(op2, op1))
+//===----------------------------------------------------------------------===//
+let Predicates = [HasV5T] in
+multiclass eq_ordltPats <PatFrag cmpOp, InstHexagon IntMI,
+                         InstHexagon DoubleMI> {
+  // IntRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (C2_not (IntMI F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (IntMI F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 0)),
+           (IntMI F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F32:$src1, F32:$src2)), 1)),
+           (C2_not (IntMI F32:$src2, F32:$src1))>;
+
+  // DoubleRegs
+  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (seteq (i1 (cmpOp F64:$src1, F64:$src2)), 1)),
+           (DoubleMI F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (DoubleMI F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setne (i1 (cmpOp F64:$src1, F64:$src2)), 0)),
+           (C2_not (DoubleMI F64:$src2, F64:$src1))>;
+}
+
+defm : eq_ordltPats<setole, F2_sfcmpge, F2_dfcmpge>;
+defm : eq_ordltPats<setolt, F2_sfcmpgt, F2_dfcmpgt>;
+
+
+// o. seto inverse of setuo. http://llvm.org/docs/LangRef.html#i_fcmp
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (seto F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpuo F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (seto F32:$src1, f32ImmPred:$src2)),
+           (C2_not (F2_sfcmpuo (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
+  def: Pat<(i1 (seto F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpuo F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (seto F64:$src1, f64ImmPred:$src2)),
+           (C2_not (F2_dfcmpuo (CONST64 (ftoi $src2)), F64:$src1))>;
+}
+
+// Ordered lt.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setolt F32:$src1, F32:$src2)),
+           (F2_sfcmpgt F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setolt F32:$src1, f32ImmPred:$src2)),
+           (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+  def: Pat<(i1 (setolt F64:$src1, F64:$src2)),
+           (F2_dfcmpgt F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setolt F64:$src1, f64ImmPred:$src2)),
+           (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
+}
+
+// Unordered lt.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setult F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (F2_sfcmpgt F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (setult F32:$src1, f32ImmPred:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
+                  (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
+  def: Pat<(i1 (setult F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (F2_dfcmpgt F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (setult F64:$src1, f64ImmPred:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
+                  (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1))>;
+}
+
+// Ordered le.
+let Predicates = [HasV5T] in {
+  // rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setole F32:$src1, F32:$src2)),
+           (F2_sfcmpge F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setole F32:$src1, f32ImmPred:$src2)),
+           (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+
+  // Rss <= Rtt -> Rtt >= Rss.
+  def: Pat<(i1 (setole F64:$src1, F64:$src2)),
+           (F2_dfcmpge F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setole F64:$src1, f64ImmPred:$src2)),
+           (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
+}
+
+// Unordered le.
+let Predicates = [HasV5T] in {
+// rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setule F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (F2_sfcmpge F32:$src2, F32:$src1))>;
+  def: Pat<(i1 (setule F32:$src1, f32ImmPred:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
+                  (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1))>;
+  def: Pat<(i1 (setule F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (F2_dfcmpge F64:$src2, F64:$src1))>;
+  def: Pat<(i1 (setule F64:$src1, f64ImmPred:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
+                  (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1))>;
+}
+
+// Ordered ne.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setone F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (setone F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+  def: Pat<(i1 (setone F32:$src1, f32ImmPred:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
+  def: Pat<(i1 (setone F64:$src1, f64ImmPred:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
+}
+
+// Unordered ne.
+let Predicates = [HasV5T] in {
+  def: Pat<(i1 (setune F32:$src1, F32:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, F32:$src2),
+                  (C2_not (F2_sfcmpeq F32:$src1, F32:$src2)))>;
+  def: Pat<(i1 (setune F64:$src1, F64:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, F64:$src2),
+                  (C2_not (F2_dfcmpeq F64:$src1, F64:$src2)))>;
+  def: Pat<(i1 (setune F32:$src1, f32ImmPred:$src2)),
+           (C2_or (F2_sfcmpuo F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))),
+                  (C2_not (F2_sfcmpeq F32:$src1,
+                                      (f32 (A2_tfrsi (ftoi $src2))))))>;
+  def: Pat<(i1 (setune F64:$src1, f64ImmPred:$src2)),
+           (C2_or (F2_dfcmpuo F64:$src1, (CONST64 (ftoi $src2))),
+                  (C2_not (F2_dfcmpeq F64:$src1,
+                                      (CONST64 (ftoi $src2)))))>;
+}
+
+// Besides set[o|u][comparions], we also need set[comparisons].
+let Predicates = [HasV5T] in {
+  // lt.
+  def: Pat<(i1 (setlt F32:$src1, F32:$src2)),
+           (F2_sfcmpgt F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setlt F32:$src1, f32ImmPred:$src2)),
+           (F2_sfcmpgt (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+  def: Pat<(i1 (setlt F64:$src1, F64:$src2)),
+           (F2_dfcmpgt F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setlt F64:$src1, f64ImmPred:$src2)),
+           (F2_dfcmpgt (CONST64 (ftoi $src2)), F64:$src1)>;
+
+  // le.
+  // rs <= rt -> rt >= rs.
+  def: Pat<(i1 (setle F32:$src1, F32:$src2)),
+           (F2_sfcmpge F32:$src2, F32:$src1)>;
+  def: Pat<(i1 (setle F32:$src1, f32ImmPred:$src2)),
+           (F2_sfcmpge (f32 (A2_tfrsi (ftoi $src2))), F32:$src1)>;
+
+  // Rss <= Rtt -> Rtt >= Rss.
+  def: Pat<(i1 (setle F64:$src1, F64:$src2)),
+           (F2_dfcmpge F64:$src2, F64:$src1)>;
+  def: Pat<(i1 (setle F64:$src1, f64ImmPred:$src2)),
+           (F2_dfcmpge (CONST64 (ftoi $src2)), F64:$src1)>;
+
+  // ne.
+  def: Pat<(i1 (setne F32:$src1, F32:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, F32:$src2))>;
+  def: Pat<(i1 (setne F64:$src1, F64:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, F64:$src2))>;
+  def: Pat<(i1 (setne F32:$src1, f32ImmPred:$src2)),
+           (C2_not (F2_sfcmpeq F32:$src1, (f32 (A2_tfrsi (ftoi $src2)))))>;
+  def: Pat<(i1 (setne F64:$src1, f64ImmPred:$src2)),
+           (C2_not (F2_dfcmpeq F64:$src1, (CONST64 (ftoi $src2))))>;
+}
+
+
+def: Pat<(f64 (fpextend F32:$Rs)), (F2_conv_sf2df F32:$Rs)>;
+def: Pat<(f32 (fpround F64:$Rs)), (F2_conv_df2sf F64:$Rs)>;
+
+def: Pat<(f32 (sint_to_fp I32:$Rs)), (F2_conv_w2sf I32:$Rs)>;
+def: Pat<(f32 (sint_to_fp I64:$Rs)), (F2_conv_d2sf I64:$Rs)>;
+def: Pat<(f64 (sint_to_fp I32:$Rs)), (F2_conv_w2df I32:$Rs)>;
+def: Pat<(f64 (sint_to_fp I64:$Rs)), (F2_conv_d2df I64:$Rs)>;
+
+def: Pat<(f32 (uint_to_fp I32:$Rs)), (F2_conv_uw2sf I32:$Rs)>;
+def: Pat<(f32 (uint_to_fp I64:$Rs)), (F2_conv_ud2sf I64:$Rs)>;
+def: Pat<(f64 (uint_to_fp I32:$Rs)), (F2_conv_uw2df I32:$Rs)>;
+def: Pat<(f64 (uint_to_fp I64:$Rs)), (F2_conv_ud2df I64:$Rs)>;
+
+def: Pat<(i32 (fp_to_sint F32:$Rs)), (F2_conv_sf2w_chop F32:$Rs)>;
+def: Pat<(i32 (fp_to_sint F64:$Rs)), (F2_conv_df2w_chop F64:$Rs)>;
+def: Pat<(i64 (fp_to_sint F32:$Rs)), (F2_conv_sf2d_chop F32:$Rs)>;
+def: Pat<(i64 (fp_to_sint F64:$Rs)), (F2_conv_df2d_chop F64:$Rs)>;
+
+def: Pat<(i32 (fp_to_uint F32:$Rs)), (F2_conv_sf2uw_chop F32:$Rs)>;
+def: Pat<(i32 (fp_to_uint F64:$Rs)), (F2_conv_df2uw_chop F64:$Rs)>;
+def: Pat<(i64 (fp_to_uint F32:$Rs)), (F2_conv_sf2ud_chop F32:$Rs)>;
+def: Pat<(i64 (fp_to_uint F64:$Rs)), (F2_conv_df2ud_chop F64:$Rs)>;
+
+// Bitcast is different than [fp|sint|uint]_to_[sint|uint|fp].
+let Predicates = [HasV5T] in {
+  def: Pat <(i32 (bitconvert F32:$src)), (I32:$src)>;
+  def: Pat <(f32 (bitconvert I32:$src)), (F32:$src)>;
+  def: Pat <(i64 (bitconvert F64:$src)), (I64:$src)>;
+  def: Pat <(f64 (bitconvert I64:$src)), (F64:$src)>;
+}
+
+def : Pat <(fma F32:$src2, F32:$src3, F32:$src1),
+           (F2_sffma F32:$src1, F32:$src2, F32:$src3)>;
+
+def : Pat <(fma (fneg F32:$src2), F32:$src3, F32:$src1),
+           (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
+
+def : Pat <(fma F32:$src2, (fneg F32:$src3), F32:$src1),
+           (F2_sffms F32:$src1, F32:$src2, F32:$src3)>;
+
+def: Pat<(select I1:$Pu, F32:$Rs, f32ImmPred:$imm),
+         (C2_muxir I1:$Pu, F32:$Rs, (ftoi $imm))>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select I1:$Pu, f32ImmPred:$imm, F32:$Rt),
+         (C2_muxri I1:$Pu, (ftoi $imm), F32:$Rt)>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F32:$src2, F32:$src3),
+         (C2_mux I1:$src1, F32:$src2, F32:$src3)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F32:$src1, F32:$src2)), F32:$src3, F32:$src4),
+         (C2_mux (F2_sfcmpgt F32:$src2, F32:$src1), F32:$src4, F32:$src3)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(select I1:$src1, F64:$src2, F64:$src3),
+         (C2_vmux I1:$src1, F64:$src2, F64:$src3)>,
+    Requires<[HasV5T]>;
+
+def: Pat<(select (i1 (setult F64:$src1, F64:$src2)), F64:$src3, F64:$src4),
+         (C2_vmux (F2_dfcmpgt F64:$src2, F64:$src1), F64:$src3, F64:$src4)>,
+     Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = select(p0, #i, r1)
+// => r0 = mux(p0, #i, r1)
+def: Pat<(select (not I1:$src1), f32ImmPred:$src2, F32:$src3),
+         (C2_muxir I1:$src1, F32:$src3, (ftoi $src2))>,
+     Requires<[HasV5T]>;
+
+// Map from p0 = pnot(p0); r0 = mux(p0, r1, #i)
+// => r0 = mux(p0, r1, #i)
+def: Pat<(select (not I1:$src1), F32:$src2, f32ImmPred:$src3),
+         (C2_muxri I1:$src1, (ftoi $src3), F32:$src2)>,
+     Requires<[HasV5T]>;
+
+def: Pat<(i32 (fp_to_sint F64:$src1)),
+         (LoReg (F2_conv_df2d_chop F64:$src1))>,
+     Requires<[HasV5T]>;
+
+def : Pat <(fabs F32:$src1),
+           (S2_clrbit_i F32:$src1, 31)>,
+          Requires<[HasV5T]>;
+
+def : Pat <(fneg F32:$src1),
+           (S2_togglebit_i F32:$src1, 31)>,
+          Requires<[HasV5T]>;
+
+def: Pat<(fabs F64:$Rs),
+         (REG_SEQUENCE DoubleRegs,
+              (S2_clrbit_i (HiReg $Rs), 31), isub_hi,
+              (i32 (LoReg $Rs)), isub_lo)>;
+
+def: Pat<(fneg F64:$Rs),
+         (REG_SEQUENCE DoubleRegs,
+              (S2_togglebit_i (HiReg $Rs), 31), isub_hi,
+              (i32 (LoReg $Rs)), isub_lo)>;
+
+def alignedload : PatFrag<(ops node:$addr), (load $addr), [{
+  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def unalignedload : PatFrag<(ops node:$addr), (load $addr), [{
+  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def alignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
+  return isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+def unalignedstore : PatFrag<(ops node:$val, node:$addr), (store $val, $addr), [{
+  return !isAlignedMemNode(dyn_cast<MemSDNode>(N));
+}]>;
+
+
+def s4_6ImmPred: PatLeaf<(i32 imm), [{
+  int64_t V = N->getSExtValue();
+  return isShiftedInt<4,6>(V);
+}]>;
+
+def s4_7ImmPred: PatLeaf<(i32 imm), [{
+  int64_t V = N->getSExtValue();
+  return isShiftedInt<4,7>(V);
+}]>;
+
+
+multiclass vS32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+  // Aligned stores
+  def : Pat<(alignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+            (V6_vS32b_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+  def : Pat<(unalignedstore (VTSgl VectorRegs:$src1), IntRegs:$addr),
+            (V6_vS32Ub_ai IntRegs:$addr, 0, (VTSgl VectorRegs:$src1))>,
+            Requires<[UseHVXSgl]>;
+
+  // 128B Aligned stores
+  def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+            (V6_vS32b_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+  def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1), IntRegs:$addr),
+            (V6_vS32Ub_ai_128B IntRegs:$addr, 0, (VTDbl VectorRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+
+  // Fold Add R+OFF into vector store.
+  let AddedComplexity = 10 in {
+    def : Pat<(alignedstore (VTSgl VectorRegs:$src1),
+                     (add IntRegs:$src2, s4_6ImmPred:$offset)),
+              (V6_vS32b_ai IntRegs:$src2, s4_6ImmPred:$offset,
+                           (VTSgl VectorRegs:$src1))>,
+              Requires<[UseHVXSgl]>;
+    def : Pat<(unalignedstore (VTSgl VectorRegs:$src1),
+                     (add IntRegs:$src2, s4_6ImmPred:$offset)),
+              (V6_vS32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset,
+                           (VTSgl VectorRegs:$src1))>,
+              Requires<[UseHVXSgl]>;
+
+    // Fold Add R+OFF into vector store 128B.
+    def : Pat<(alignedstore (VTDbl VectorRegs128B:$src1),
+                     (add IntRegs:$src2, s4_7ImmPred:$offset)),
+              (V6_vS32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+                                (VTDbl VectorRegs128B:$src1))>,
+              Requires<[UseHVXDbl]>;
+    def : Pat<(unalignedstore (VTDbl VectorRegs128B:$src1),
+                     (add IntRegs:$src2, s4_7ImmPred:$offset)),
+              (V6_vS32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset,
+                                (VTDbl VectorRegs128B:$src1))>,
+              Requires<[UseHVXDbl]>;
+  }
+}
+
+defm : vS32b_ai_pats <v64i8,  v128i8>;
+defm : vS32b_ai_pats <v32i16, v64i16>;
+defm : vS32b_ai_pats <v16i32, v32i32>;
+defm : vS32b_ai_pats <v8i64,  v16i64>;
+
+
+multiclass vL32b_ai_pats <ValueType VTSgl, ValueType VTDbl> {
+  // Aligned loads
+  def : Pat < (VTSgl (alignedload IntRegs:$addr)),
+              (V6_vL32b_ai IntRegs:$addr, 0) >,
+              Requires<[UseHVXSgl]>;
+  def : Pat < (VTSgl (unalignedload IntRegs:$addr)),
+              (V6_vL32Ub_ai IntRegs:$addr, 0) >,
+              Requires<[UseHVXSgl]>;
+
+  // 128B Load
+  def : Pat < (VTDbl (alignedload IntRegs:$addr)),
+              (V6_vL32b_ai_128B IntRegs:$addr, 0) >,
+              Requires<[UseHVXDbl]>;
+  def : Pat < (VTDbl (unalignedload IntRegs:$addr)),
+              (V6_vL32Ub_ai_128B IntRegs:$addr, 0) >,
+              Requires<[UseHVXDbl]>;
+
+  // Fold Add R+OFF into vector load.
+  let AddedComplexity = 10 in {
+    def : Pat<(VTDbl (alignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
+              (V6_vL32b_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+               Requires<[UseHVXDbl]>;
+    def : Pat<(VTDbl (unalignedload (add IntRegs:$src2, s4_7ImmPred:$offset))),
+              (V6_vL32Ub_ai_128B IntRegs:$src2, s4_7ImmPred:$offset)>,
+               Requires<[UseHVXDbl]>;
+
+    def : Pat<(VTSgl (alignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
+              (V6_vL32b_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+              Requires<[UseHVXSgl]>;
+    def : Pat<(VTSgl (unalignedload (add IntRegs:$src2, s4_6ImmPred:$offset))),
+              (V6_vL32Ub_ai IntRegs:$src2, s4_6ImmPred:$offset)>,
+              Requires<[UseHVXSgl]>;
+  }
+}
+
+defm : vL32b_ai_pats <v64i8,  v128i8>;
+defm : vL32b_ai_pats <v32i16, v64i16>;
+defm : vL32b_ai_pats <v16i32, v32i32>;
+defm : vL32b_ai_pats <v8i64,  v16i64>;
+
+multiclass STrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+  def : Pat<(alignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+            (PS_vstorerw_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+           Requires<[UseHVXSgl]>;
+  def : Pat<(unalignedstore (VTSgl VecDblRegs:$src1), IntRegs:$addr),
+            (PS_vstorerwu_ai IntRegs:$addr, 0, (VTSgl VecDblRegs:$src1))>,
+           Requires<[UseHVXSgl]>;
+
+  def : Pat<(alignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+            (PS_vstorerw_ai_128B IntRegs:$addr, 0,
+                  (VTDbl VecDblRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+  def : Pat<(unalignedstore (VTDbl VecDblRegs128B:$src1), IntRegs:$addr),
+            (PS_vstorerwu_ai_128B IntRegs:$addr, 0,
+                  (VTDbl VecDblRegs128B:$src1))>,
+            Requires<[UseHVXDbl]>;
+}
+
+defm : STrivv_pats <v128i8, v256i8>;
+defm : STrivv_pats <v64i16, v128i16>;
+defm : STrivv_pats <v32i32, v64i32>;
+defm : STrivv_pats <v16i64, v32i64>;
+
+multiclass LDrivv_pats <ValueType VTSgl, ValueType VTDbl> {
+  def : Pat<(VTSgl (alignedload I32:$addr)),
+            (PS_vloadrw_ai I32:$addr, 0)>,
+           Requires<[UseHVXSgl]>;
+  def : Pat<(VTSgl (unalignedload I32:$addr)),
+            (PS_vloadrwu_ai I32:$addr, 0)>,
+           Requires<[UseHVXSgl]>;
+
+  def : Pat<(VTDbl (alignedload I32:$addr)),
+            (PS_vloadrw_ai_128B I32:$addr, 0)>,
+           Requires<[UseHVXDbl]>;
+  def : Pat<(VTDbl (unalignedload I32:$addr)),
+            (PS_vloadrwu_ai_128B I32:$addr, 0)>,
+           Requires<[UseHVXDbl]>;
+}
+
+defm : LDrivv_pats <v128i8, v256i8>;
+defm : LDrivv_pats <v64i16, v128i16>;
+defm : LDrivv_pats <v32i32, v64i32>;
+defm : LDrivv_pats <v16i64, v32i64>;
+
+let Predicates = [HasV60T,UseHVXSgl] in {
+  def: Pat<(select I1:$Pu, (v16i32 VectorRegs:$Vs), VectorRegs:$Vt),
+           (PS_vselect I1:$Pu, VectorRegs:$Vs, VectorRegs:$Vt)>;
+  def: Pat<(select I1:$Pu, (v32i32 VecDblRegs:$Vs), VecDblRegs:$Vt),
+           (PS_wselect I1:$Pu, VecDblRegs:$Vs, VecDblRegs:$Vt)>;
+}
+let Predicates = [HasV60T,UseHVXDbl] in {
+  def: Pat<(select I1:$Pu, (v32i32 VectorRegs128B:$Vs), VectorRegs128B:$Vt),
+           (PS_vselect_128B I1:$Pu, VectorRegs128B:$Vs, VectorRegs128B:$Vt)>;
+  def: Pat<(select I1:$Pu, (v64i32 VecDblRegs128B:$Vs), VecDblRegs128B:$Vt),
+           (PS_wselect_128B I1:$Pu, VecDblRegs128B:$Vs, VecDblRegs128B:$Vt)>;
+}
+
+
+def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>,
+      SDTCisSubVecOfVec<1, 0>]>;
+
+def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>;
+
+def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs),
+                                  (v16i32 VectorRegs:$Vt))),
+         (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs),
+                                  (v32i32 VecDblRegs:$Vt))),
+         (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+         Requires<[UseHVXDbl]>;
+
+def SDTHexagonVPACK: SDTypeProfile<1, 3, [SDTCisSameAs<1, 2>,
+                                          SDTCisInt<3>]>;
+
+def HexagonVPACK: SDNode<"HexagonISD::VPACK", SDTHexagonVPACK>;
+
+// 0 as the last argument denotes vpacke. 1 denotes vpacko
+def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
+                              (v64i8 VectorRegs:$Vt), (i32 0))),
+         (V6_vpackeb VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+def: Pat<(v64i8 (HexagonVPACK (v64i8 VectorRegs:$Vs),
+                              (v64i8 VectorRegs:$Vt), (i32 1))),
+         (V6_vpackob VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
+                               (v32i16 VectorRegs:$Vt), (i32 0))),
+         (V6_vpackeh VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+def: Pat<(v32i16 (HexagonVPACK (v32i16 VectorRegs:$Vs),
+                             (v32i16 VectorRegs:$Vt), (i32 1))),
+         (V6_vpackoh VectorRegs:$Vs, VectorRegs:$Vt)>,
+         Requires<[UseHVXSgl]>;
+
+def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
+                             (v128i8 VecDblRegs:$Vt), (i32 0))),
+         (V6_vpackeb_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+         Requires<[UseHVXDbl]>;
+def: Pat<(v128i8 (HexagonVPACK (v128i8 VecDblRegs:$Vs),
+                             (v128i8 VecDblRegs:$Vt), (i32 1))),
+         (V6_vpackob_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+         Requires<[UseHVXDbl]>;
+def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
+                             (v64i16 VecDblRegs:$Vt), (i32 0))),
+         (V6_vpackeh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+         Requires<[UseHVXDbl]>;
+def: Pat<(v64i16 (HexagonVPACK (v64i16 VecDblRegs:$Vs),
+                            (v64i16 VecDblRegs:$Vt), (i32 1))),
+        (V6_vpackoh_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>,
+        Requires<[UseHVXDbl]>;
+
+def V2I1:  PatLeaf<(v2i1  PredRegs:$R)>;
+def V4I1:  PatLeaf<(v4i1  PredRegs:$R)>;
+def V8I1:  PatLeaf<(v8i1  PredRegs:$R)>;
+def V4I8:  PatLeaf<(v4i8  IntRegs:$R)>;
+def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
+def V8I8:  PatLeaf<(v8i8  DoubleRegs:$R)>;
+def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
+def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
+
+
+multiclass bitconvert_32<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a IntRegs:$src))),
+             (b IntRegs:$src)>;
+  def : Pat <(a (bitconvert (b IntRegs:$src))),
+             (a IntRegs:$src)>;
+}
+
+multiclass bitconvert_64<ValueType a, ValueType b> {
+  def : Pat <(b (bitconvert (a DoubleRegs:$src))),
+             (b DoubleRegs:$src)>;
+  def : Pat <(a (bitconvert (b DoubleRegs:$src))),
+             (a DoubleRegs:$src)>;
+}
+
+// Bit convert vector types to integers.
+defm : bitconvert_32<v4i8,  i32>;
+defm : bitconvert_32<v2i16, i32>;
+defm : bitconvert_64<v8i8,  i64>;
+defm : bitconvert_64<v4i16, i64>;
+defm : bitconvert_64<v2i32, i64>;
+
+def: Pat<(sra (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
+         (S2_asr_i_vh DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(srl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
+         (S2_lsr_i_vh DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(shl (v4i16 DoubleRegs:$src1), u4_0ImmPred:$src2),
+         (S2_asl_i_vh DoubleRegs:$src1, imm:$src2)>;
+
+def: Pat<(sra (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
+         (S2_asr_i_vw DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(srl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
+         (S2_lsr_i_vw DoubleRegs:$src1, imm:$src2)>;
+def: Pat<(shl (v2i32 DoubleRegs:$src1), u5_0ImmPred:$src2),
+         (S2_asl_i_vw DoubleRegs:$src1, imm:$src2)>;
+
+def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+          (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
+
+def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
+          (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
+
+def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
+def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
+
+// Replicate the low 8-bits from 32-bits input register into each of the
+// four bytes of 32-bits destination register.
+def: Pat<(v4i8  (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
+
+// Replicate the low 16-bits from 32-bits input register into each of the
+// four halfwords of 64-bits destination register.
+def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
+
+
+class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
+  : Pat <(Op Type:$Rss, Type:$Rtt),
+         (MI Type:$Rss, Type:$Rtt)>;
+
+def: VArith_pat <A2_vaddub, add, V8I8>;
+def: VArith_pat <A2_vaddh,  add, V4I16>;
+def: VArith_pat <A2_vaddw,  add, V2I32>;
+def: VArith_pat <A2_vsubub, sub, V8I8>;
+def: VArith_pat <A2_vsubh,  sub, V4I16>;
+def: VArith_pat <A2_vsubw,  sub, V2I32>;
+
+def: VArith_pat <A2_and,    and, V2I16>;
+def: VArith_pat <A2_xor,    xor, V2I16>;
+def: VArith_pat <A2_or,     or,  V2I16>;
+
+def: VArith_pat <A2_andp,   and, V8I8>;
+def: VArith_pat <A2_andp,   and, V4I16>;
+def: VArith_pat <A2_andp,   and, V2I32>;
+def: VArith_pat <A2_orp,    or,  V8I8>;
+def: VArith_pat <A2_orp,    or,  V4I16>;
+def: VArith_pat <A2_orp,    or,  V2I32>;
+def: VArith_pat <A2_xorp,   xor, V8I8>;
+def: VArith_pat <A2_xorp,   xor, V4I16>;
+def: VArith_pat <A2_xorp,   xor, V2I32>;
+
+def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
+                                                    (i32 u5_0ImmPred:$c))))),
+         (S2_asr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
+                                                    (i32 u5_0ImmPred:$c))))),
+         (S2_lsr_i_vw V2I32:$b, imm:$c)>;
+def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5_0ImmPred:$c),
+                                                    (i32 u5_0ImmPred:$c))))),
+         (S2_asl_i_vw V2I32:$b, imm:$c)>;
+
+def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+         (S2_asr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+         (S2_lsr_i_vh V4I16:$b, imm:$c)>;
+def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4_0ImmPred:$c)))))),
+         (S2_asl_i_vh V4I16:$b, imm:$c)>;
+
+
+def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
+def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
+
+def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
+def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
+def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
+
+def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5_0ImmPred:$u5)),
+         (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4_0ImmPred:$u4)),
+         (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5_0ImmPred:$u5)),
+         (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4_0ImmPred:$u4)),
+         (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
+def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5_0ImmPred:$u5)),
+         (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
+def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4_0ImmPred:$u4)),
+         (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
+
+class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+  : Pat <(Op Value:$Rs, I32:$Rt),
+         (MI Value:$Rs, I32:$Rt)>;
+
+def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
+def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
+def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
+def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
+def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
+def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
+
+
+def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
+def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
+def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
+
+def HexagonVCMPBEQ:  SDNode<"HexagonISD::VCMPBEQ",  SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGT:  SDNode<"HexagonISD::VCMPBGT",  SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
+def HexagonVCMPHEQ:  SDNode<"HexagonISD::VCMPHEQ",  SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGT:  SDNode<"HexagonISD::VCMPHGT",  SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
+def HexagonVCMPWEQ:  SDNode<"HexagonISD::VCMPWEQ",  SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGT:  SDNode<"HexagonISD::VCMPWGT",  SDTHexagonVecCompare_v2i32>;
+def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
+
+
+class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
+  : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
+         (MI Value:$Rs, Value:$Rt)>;
+
+def: vcmp_i1_pat<A2_vcmpbeq,  HexagonVCMPBEQ,  V8I8>;
+def: vcmp_i1_pat<A4_vcmpbgt,  HexagonVCMPBGT,  V8I8>;
+def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
+
+def: vcmp_i1_pat<A2_vcmpheq,  HexagonVCMPHEQ,  V4I16>;
+def: vcmp_i1_pat<A2_vcmphgt,  HexagonVCMPHGT,  V4I16>;
+def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
+
+def: vcmp_i1_pat<A2_vcmpweq,  HexagonVCMPWEQ,  V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgt,  HexagonVCMPWGT,  V2I32>;
+def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
+
+
+class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
+  : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
+         (MI InVal:$Rs, InVal:$Rt)>;
+
+def: vcmp_vi1_pat<A2_vcmpweq,  seteq,  V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgt,  setgt,  V2I32, v2i1>;
+def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
+
+def: vcmp_vi1_pat<A2_vcmpheq,  seteq,  V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgt,  setgt,  V4I16, v4i1>;
+def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
+
+def: Pat<(mul V2I32:$Rs, V2I32:$Rt),
+         (PS_vmulw DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+def: Pat<(add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)),
+         (PS_vmulw_acc DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt)>;
+
+
+// Adds two v4i8: Hexagon does not have an insn for this one, so we
+// use the double add v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+         (LoReg (A2_vaddub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+
+// Subtract two v4i8: Hexagon does not have an insn for this one, so we
+// use the double sub v8i8, and use only the low part of the result.
+def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
+         (LoReg (A2_vsubub (ToZext64 $Rs), (ToZext64 $Rt)))>;
+
+//
+// No 32 bit vector mux.
+//
+def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
+         (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
+         (LoReg (C2_vmux I1:$Pu, (ToZext64 $Rs), (ToZext64 $Rt)))>;
+
+//
+// 64-bit vector mux.
+//
+def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
+         (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
+def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
+         (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
+def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
+         (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
+
+//
+// No 32 bit vector compare.
+//
+def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
+         (A2_vcmpbeq (ToZext64 $Rs), (ToZext64 $Rt))>;
+def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
+         (A4_vcmpbgt (ToZext64 $Rs), (ToZext64 $Rt))>;
+def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
+         (A2_vcmpbgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
+
+def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmpheq (ToZext64 $Rs), (ToZext64 $Rt))>;
+def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmphgt (ToZext64 $Rs), (ToZext64 $Rt))>;
+def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
+         (A2_vcmphgtu (ToZext64 $Rs), (ToZext64 $Rt))>;
+
+
+class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
+                    ValueType CmpTy>
+  : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
+        (InvMI Value:$Rt, Value:$Rs)>;
+
+// Map from a compare operation to the corresponding instruction with the
+// order of operands reversed, e.g.  x > y --> cmp.lt(y,x).
+def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  i1>;
+def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  v8i1>;
+def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, v2i1>;
+
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  i1>;
+def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  v8i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
+def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
+def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
+
+// Map from vcmpne(Rss) -> !vcmpew(Rss).
+// rs != rt -> !(rs == rt).
+def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
+         (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
+
+
+// Truncate: from vector B copy all 'E'ven 'B'yte elements:
+// A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
+def: Pat<(v4i8 (trunc V4I16:$Rs)),
+         (S2_vtrunehb V4I16:$Rs)>;
+
+// Truncate: from vector B copy all 'O'dd 'B'yte elements:
+// A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
+// S2_vtrunohb
+
+// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
+// A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
+// S2_vtruneh
+
+def: Pat<(v2i16 (trunc V2I32:$Rs)),
+         (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
+
+
+def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
+def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
+
+def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
+def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
+
+def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
+def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
+def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
+
+// Sign extends a v2i8 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
+         (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
+
+// Sign extends a v2i16 into a v2i32.
+def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
+         (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
+
+
+// Multiplies two v2i16 and returns a v2i32.  We are using here the
+// saturating multiply, as hexagon does not provide a non saturating
+// vector multiply, and saturation does not impact the result that is
+// in double precision of the operands.
+
+// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
+// with the C semantics for this one, this pattern uses the half word
+// multiply vmpyh that takes two v2i16 and returns a v2i32.  This is
+// then truncated to fit this back into a v2i16 and to simulate the
+// wrap around semantics for unsigned in C.
+def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
+                      (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
+
+def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
+         (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
+                             (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
+
+// Multiplies two v4i16 vectors.
+def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
+         (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
+                      (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
+
+def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
+  (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
+               (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
+
+// Multiplies two v4i8 vectors.
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+         (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
+     Requires<[HasV5T]>;
+
+def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
+         (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
+
+// Multiplies two v8i8 vectors.
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+         (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
+                      (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
+     Requires<[HasV5T]>;
+
+def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
+         (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
+                      (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
+
+def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
+  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
+
+def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
+def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
+def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
+def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
+
+class ShufflePat<InstHexagon MI, SDNode Op>
+  : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
+        (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
+
+// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
+def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
+
+// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
+def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
+
+// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
+def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
+
+// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
+def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
+
+
+// Truncated store from v4i16 to v4i8.
+def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
+                            (truncstore node:$val, node:$ptr),
+    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
+
+// Truncated store from v2i32 to v2i16.
+def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
+                             (truncstore node:$val, node:$ptr),
+    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
+
+def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
+         (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
+                                                      (LoReg $Rs))))>;
+
+def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
+         (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
+
+
+// Zero and sign extended load from v2i8 into v2i16.
+def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
+    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
+    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
+
+def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
+         (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
+         (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
+
+def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
+         (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
+
+def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
+         (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
+
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
index b064decc5c76..ee3209354688 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonPeephole.cpp
@@ -10,7 +10,7 @@
 //    Transform the following pattern
 //    %vreg170<def> = SXTW %vreg166
 //    ...
-//    %vreg176<def> = COPY %vreg170:subreg_loreg
+//    %vreg176<def> = COPY %vreg170:isub_lo
 //
 //    Into
 //    %vreg176<def> = COPY vreg166
@@ -93,7 +93,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Hexagon optimize redundant zero and size extends";
     }
 
@@ -167,9 +167,9 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
 
       // Look for this sequence below
       // %vregDoubleReg1 = LSRd_ri %vregDoubleReg0, 32
-      // %vregIntReg = COPY %vregDoubleReg1:subreg_loreg.
+      // %vregIntReg = COPY %vregDoubleReg1:isub_lo.
       // and convert into
-      // %vregIntReg = COPY %vregDoubleReg0:subreg_hireg.
+      // %vregIntReg = COPY %vregDoubleReg0:isub_hi.
       if (MI.getOpcode() == Hexagon::S2_lsr_i_p) {
         assert(MI.getNumOperands() == 3);
         MachineOperand &Dst = MI.getOperand(0);
@@ -180,7 +180,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
         unsigned DstReg = Dst.getReg();
         unsigned SrcReg = Src1.getReg();
         PeepholeDoubleRegsMap[DstReg] =
-          std::make_pair(*&SrcReg, Hexagon::subreg_hireg);
+          std::make_pair(*&SrcReg, Hexagon::isub_hi);
       }
 
       // Look for P=NOT(P).
@@ -201,14 +201,14 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // Look for copy:
-      // %vreg176<def> = COPY %vreg170:subreg_loreg
+      // %vreg176<def> = COPY %vreg170:isub_lo
       if (!DisableOptSZExt && MI.isCopy()) {
         assert(MI.getNumOperands() == 2);
         MachineOperand &Dst = MI.getOperand(0);
         MachineOperand &Src = MI.getOperand(1);
 
         // Make sure we are copying the lower 32 bits.
-        if (Src.getSubReg() != Hexagon::subreg_loreg)
+        if (Src.getSubReg() != Hexagon::isub_lo)
           continue;
 
         unsigned DstReg = Dst.getReg();
@@ -250,6 +250,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
               if (unsigned PeepholeSrc = PeepholeMap.lookup(Reg0)) {
                 // Change the 1st operand and, flip the opcode.
                 MI.getOperand(0).setReg(PeepholeSrc);
+                MRI->clearKillFlags(PeepholeSrc);
                 int NewOp = QII->getInvertedPredicatedOpcode(MI.getOpcode());
                 MI.setDesc(QII->get(NewOp));
                 Done = true;
@@ -280,6 +281,7 @@ bool HexagonPeephole::runOnMachineFunction(MachineFunction &MF) {
             unsigned PSrc = MI.getOperand(PR).getReg();
             if (unsigned POrig = PeepholeMap.lookup(PSrc)) {
               MI.getOperand(PR).setReg(POrig);
+              MRI->clearKillFlags(POrig);
               MI.setDesc(QII->get(NewOp));
               // Swap operands S1 and S2.
               MachineOperand Op1 = MI.getOperand(S1);
@@ -304,6 +306,7 @@ void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
       if (Src.isReg()) {
         Dst.setReg(Src.getReg());
         Dst.setSubReg(Src.getSubReg());
+        MRI->clearKillFlags(Src.getReg());
       } else if (Src.isImm()) {
         Dst.ChangeToImmediate(Src.getImm());
       } else {
@@ -316,7 +319,7 @@ void HexagonPeephole::ChangeOpInto(MachineOperand &Dst, MachineOperand &Src) {
         Dst.setImm(Src.getImm());
       } else if (Src.isReg()) {
         Dst.ChangeToRegister(Src.getReg(), Src.isDef(), Src.isImplicit(),
-                             Src.isKill(), Src.isDead(), Src.isUndef(),
+                             false, Src.isDead(), Src.isUndef(),
                              Src.isDebug());
         Dst.setSubReg(Src.getSubReg());
       } else {
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp
deleted file mode 100644
index 06719cddf4b6..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.cpp
+++ /dev/null
@@ -1,60 +0,0 @@
-//===--- HexagonRDF.cpp ---------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#include "HexagonRDF.h"
-#include "HexagonInstrInfo.h"
-#include "HexagonRegisterInfo.h"
-
-#include "llvm/CodeGen/MachineInstr.h"
-
-using namespace llvm;
-using namespace rdf;
-
-bool HexagonRegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
-  if (RA == RB)
-    return true;
-
-  if (TargetRegisterInfo::isVirtualRegister(RA.Reg) &&
-      TargetRegisterInfo::isVirtualRegister(RB.Reg)) {
-    // Hexagon-specific cases.
-    if (RA.Reg == RB.Reg) {
-      if (RA.Sub == 0)
-        return true;
-      if (RB.Sub == 0)
-        return false;
-    }
-  }
-
-  return RegisterAliasInfo::covers(RA, RB);
-}
-
-bool HexagonRegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR)
-      const {
-  if (RRs.count(RR))
-    return true;
-
-  if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) {
-    assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
-    // Check if both covering subregisters are present.
-    bool HasLo = RRs.count({RR.Reg, Hexagon::subreg_loreg});
-    bool HasHi = RRs.count({RR.Reg, Hexagon::subreg_hireg});
-    if (HasLo && HasHi)
-      return true;
-  }
-
-  if (RR.Sub == 0) {
-    // Check if both covering subregisters are present.
-    unsigned Lo = TRI.getSubReg(RR.Reg, Hexagon::subreg_loreg);
-    unsigned Hi = TRI.getSubReg(RR.Reg, Hexagon::subreg_hireg);
-    if (RRs.count({Lo, 0}) && RRs.count({Hi, 0}))
-      return true;
-  }
-
-  return RegisterAliasInfo::covers(RRs, RR);
-}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h b/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h
deleted file mode 100644
index 9a63150c377d..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRDF.h
+++ /dev/null
@@ -1,28 +0,0 @@
-//===--- HexagonRDF.h -----------------------------------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef HEXAGON_RDF_H
-#define HEXAGON_RDF_H
-#include "RDFGraph.h"
-
-namespace llvm {
-  class TargetRegisterInfo;
-
-namespace rdf {
-  struct HexagonRegisterAliasInfo : public RegisterAliasInfo {
-    HexagonRegisterAliasInfo(const TargetRegisterInfo &TRI)
-      : RegisterAliasInfo(TRI) {}
-    bool covers(RegisterRef RA, RegisterRef RR) const override;
-    bool covers(const RegisterSet &RRs, RegisterRef RR) const override;
-  };
-} // namespace rdf
-} // namespace llvm
-
-#endif
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
index 642a8785def9..30640e19ebac 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -8,7 +8,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonInstrInfo.h"
-#include "HexagonRDF.h"
 #include "HexagonSubtarget.h"
 #include "RDFCopy.h"
 #include "RDFDeadCode.h"
@@ -50,14 +49,14 @@ namespace {
       AU.setPreservesAll();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Hexagon RDF optimizations";
     }
     bool runOnMachineFunction(MachineFunction &MF) override;
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     static char ID;
@@ -99,6 +98,7 @@ bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
     EM.insert(std::make_pair(DstR, SrcR));
   };
 
+  DataFlowGraph &DFG = getDFG();
   unsigned Opc = MI->getOpcode();
   switch (Opc) {
     case Hexagon::A2_combinew: {
@@ -106,23 +106,23 @@ bool HexagonCP::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
       const MachineOperand &HiOp = MI->getOperand(1);
       const MachineOperand &LoOp = MI->getOperand(2);
       assert(DstOp.getSubReg() == 0 && "Unexpected subregister");
-      mapRegs({ DstOp.getReg(), Hexagon::subreg_hireg },
-              { HiOp.getReg(), HiOp.getSubReg() });
-      mapRegs({ DstOp.getReg(), Hexagon::subreg_loreg },
-              { LoOp.getReg(), LoOp.getSubReg() });
+      mapRegs(DFG.makeRegRef(DstOp.getReg(), Hexagon::isub_hi),
+              DFG.makeRegRef(HiOp.getReg(),  HiOp.getSubReg()));
+      mapRegs(DFG.makeRegRef(DstOp.getReg(), Hexagon::isub_lo),
+              DFG.makeRegRef(LoOp.getReg(), LoOp.getSubReg()));
       return true;
     }
     case Hexagon::A2_addi: {
       const MachineOperand &A = MI->getOperand(2);
       if (!A.isImm() || A.getImm() != 0)
         return false;
+      LLVM_FALLTHROUGH;
     }
-    // Fall through.
     case Hexagon::A2_tfr: {
       const MachineOperand &DstOp = MI->getOperand(0);
       const MachineOperand &SrcOp = MI->getOperand(1);
-      mapRegs({ DstOp.getReg(), DstOp.getSubReg() },
-              { SrcOp.getReg(), SrcOp.getSubReg() });
+      mapRegs(DFG.makeRegRef(DstOp.getReg(), DstOp.getSubReg()),
+              DFG.makeRegRef(SrcOp.getReg(), SrcOp.getSubReg()));
       return true;
     }
   }
@@ -182,7 +182,8 @@ void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) {
     llvm_unreachable("Invalid operand");
   };
   DenseMap<NodeId,unsigned> OpMap;
-  NodeList Refs = IA.Addr->members(getDFG());
+  DataFlowGraph &DFG = getDFG();
+  NodeList Refs = IA.Addr->members(DFG);
   for (NodeAddr<RefNode*> RA : Refs)
     OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp())));
 
@@ -191,9 +192,9 @@ void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) {
   for (NodeAddr<RefNode*> RA : Refs) {
     unsigned N = OpMap[RA.Id];
     if (N < OpNum)
-      RA.Addr->setRegRef(&MI->getOperand(N));
+      RA.Addr->setRegRef(&MI->getOperand(N), DFG);
     else if (N > OpNum)
-      RA.Addr->setRegRef(&MI->getOperand(N-1));
+      RA.Addr->setRegRef(&MI->getOperand(N-1), DFG);
   }
 }
 
@@ -202,11 +203,11 @@ bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) {
   if (!getDFG().IsCode<NodeAttrs::Stmt>(IA))
     return false;
   DataFlowGraph &DFG = getDFG();
-  MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+  MachineInstr &MI = *NodeAddr<StmtNode*>(IA).Addr->getCode();
   auto &HII = static_cast<const HexagonInstrInfo&>(DFG.getTII());
   if (HII.getAddrMode(MI) != HexagonII::PostInc)
     return false;
-  unsigned Opc = MI->getOpcode();
+  unsigned Opc = MI.getOpcode();
   unsigned OpNum, NewOpc;
   switch (Opc) {
     case Hexagon::L2_loadri_pi:
@@ -240,12 +241,12 @@ bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) {
     return getDeadNodes().count(DA.Id);
   };
   NodeList Defs;
-  MachineOperand &Op = MI->getOperand(OpNum);
+  MachineOperand &Op = MI.getOperand(OpNum);
   for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) {
     if (&DA.Addr->getOp() != &Op)
       continue;
     Defs = DFG.getRelatedRefs(IA, DA);
-    if (!std::all_of(Defs.begin(), Defs.end(), IsDead))
+    if (!all_of(Defs, IsDead))
       return false;
     break;
   }
@@ -255,12 +256,12 @@ bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) {
     Remove.insert(D.Id);
 
   if (trace())
-    dbgs() << "Rewriting: " << *MI;
-  MI->setDesc(HII.get(NewOpc));
-  MI->getOperand(OpNum+2).setImm(0);
+    dbgs() << "Rewriting: " << MI;
+  MI.setDesc(HII.get(NewOpc));
+  MI.getOperand(OpNum+2).setImm(0);
   removeOperand(IA, OpNum);
   if (trace())
-    dbgs() << "       to: " << *MI;
+    dbgs() << "       to: " << MI;
 
   return true;
 }
@@ -286,9 +287,8 @@ bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) {
   if (RDFDump)
     MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr);
 
-  HexagonRegisterAliasInfo HAI(HRI);
   TargetOperandInfo TOI(HII);
-  DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI);
+  DataFlowGraph G(MF, HII, HRI, *MDT, MDF, TOI);
   // Dead phi nodes are necessary for copy propagation: we can add a use
   // of a register in a block where it would need a phi node, but which
   // was dead (and removed) during the graph build time.
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 23ebfd484be9..d3f230d3f8a6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -151,6 +151,7 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
   Reserved.set(Hexagon::CS0);
   Reserved.set(Hexagon::CS1);
   Reserved.set(Hexagon::CS);
+  Reserved.set(Hexagon::USR);
   return Reserved;
 }
 
@@ -180,12 +181,12 @@ void HexagonRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   unsigned Opc = MI.getOpcode();
   switch (Opc) {
-    case Hexagon::TFR_FIA:
+    case Hexagon::PS_fia:
       MI.setDesc(HII.get(Hexagon::A2_addi));
       MI.getOperand(FIOp).ChangeToImmediate(RealOffset);
       MI.RemoveOperand(FIOp+1);
       return;
-    case Hexagon::TFR_FI:
+    case Hexagon::PS_fi:
       // Set up the instruction for updating below.
       MI.setDesc(HII.get(Hexagon::A2_addi));
       break;
@@ -234,6 +235,28 @@ unsigned HexagonRegisterInfo::getStackRegister() const {
 }
 
 
+unsigned HexagonRegisterInfo::getHexagonSubRegIndex(
+      const TargetRegisterClass *RC, unsigned GenIdx) const {
+  assert(GenIdx == Hexagon::ps_sub_lo || GenIdx == Hexagon::ps_sub_hi);
+
+  static const unsigned ISub[] = { Hexagon::isub_lo, Hexagon::isub_hi };
+  static const unsigned VSub[] = { Hexagon::vsub_lo, Hexagon::vsub_hi };
+
+  switch (RC->getID()) {
+    case Hexagon::CtrRegs64RegClassID:
+    case Hexagon::DoubleRegsRegClassID:
+      return ISub[GenIdx];
+    case Hexagon::VecDblRegsRegClassID:
+    case Hexagon::VecDblRegs128BRegClassID:
+      return VSub[GenIdx];
+  }
+
+  if (const TargetRegisterClass *SuperRC = *RC->getSuperClasses())
+    return getHexagonSubRegIndex(SuperRC, GenIdx);
+
+  llvm_unreachable("Invalid register class");
+}
+
 bool HexagonRegisterInfo::useFPForScavengingIndex(const MachineFunction &MF)
       const {
   return MF.getSubtarget<HexagonSubtarget>().getFrameLowering()->hasFP(MF);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
index fc70679bc930..1fb295b5bd8c 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.h
@@ -22,6 +22,12 @@
 #include "HexagonGenRegisterInfo.inc"
 
 namespace llvm {
+
+namespace Hexagon {
+  // Generic (pseudo) subreg indices for use with getHexagonSubRegIndex.
+  enum { ps_sub_lo = 0, ps_sub_hi = 1 };
+}
+
 class HexagonRegisterInfo : public HexagonGenRegisterInfo {
 public:
   HexagonRegisterInfo();
@@ -61,6 +67,9 @@ public:
   unsigned getFrameRegister() const;
   unsigned getStackRegister() const;
 
+  unsigned getHexagonSubRegIndex(const TargetRegisterClass *RC,
+        unsigned GenIdx) const;
+
   const MCPhysReg *getCallerSavedRegs(const MachineFunction *MF,
         const TargetRegisterClass *RC) const;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
index 4d0d411d73da..a75f3514dbd2 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonRegisterInfo.td
@@ -81,8 +81,10 @@ let Namespace = "Hexagon" in {
     let Num = !cast<bits<5>>(num);
   }
 
-  def subreg_loreg  : SubRegIndex<32>;
-  def subreg_hireg  : SubRegIndex<32, 32>;
+  def isub_lo  : SubRegIndex<32>;
+  def isub_hi  : SubRegIndex<32, 32>;
+  def vsub_lo  : SubRegIndex<512>;
+  def vsub_hi  : SubRegIndex<512, 512>;
   def subreg_overflow : SubRegIndex<1, 0>;
 
   // Integer registers.
@@ -95,7 +97,7 @@ let Namespace = "Hexagon" in {
   def R31 : Ri<31, "r31", ["lr"]>, DwarfRegNum<[31]>;
 
   // Aliases of the R* registers used to hold 64-bit int values (doubles).
-  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
   def D0  : Rd< 0,  "r1:0",  [R0,  R1]>,  DwarfRegNum<[32]>;
   def D1  : Rd< 2,  "r3:2",  [R2,  R3]>,  DwarfRegNum<[34]>;
   def D2  : Rd< 4,  "r5:4",  [R4,  R5]>,  DwarfRegNum<[36]>;
@@ -150,12 +152,12 @@ let Namespace = "Hexagon" in {
   // Define C8 separately and make it aliased with USR.
   // The problem is that USR has subregisters (e.g. overflow). If USR was
   // specified as a subregister of C9_8, it would imply that subreg_overflow
-  // and subreg_loreg can be composed, which leads to all kinds of issues
+  // and isub_lo can be composed, which leads to all kinds of issues
   // with lane masks.
   def C8   : Rc<8,  "c8",       [], [USR]>, DwarfRegNum<[75]>;
   def PC   : Rc<9,  "pc">,                  DwarfRegNum<[76]>;
   def UGP  : Rc<10, "ugp",       ["c10"]>,  DwarfRegNum<[77]>;
-  def GP   : Rc<11, "gp">,                  DwarfRegNum<[78]>;
+  def GP   : Rc<11, "gp",        ["c11"]>,  DwarfRegNum<[78]>;
   def CS0  : Rc<12, "cs0",       ["c12"]>,  DwarfRegNum<[79]>;
   def CS1  : Rc<13, "cs1",       ["c13"]>,  DwarfRegNum<[80]>;
   def UPCL : Rc<14, "upcyclelo", ["c14"]>,  DwarfRegNum<[81]>;
@@ -163,9 +165,10 @@ let Namespace = "Hexagon" in {
 }
 
   // Control registers pairs.
-  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  let SubRegIndices = [isub_lo, isub_hi], CoveredBySubRegs = 1 in {
     def C1_0   : Rcc<0,   "c1:0",  [SA0, LC0], ["lc0:sa0"]>, DwarfRegNum<[67]>;
     def C3_2   : Rcc<2,   "c3:2",  [SA1, LC1], ["lc1:sa1"]>, DwarfRegNum<[69]>;
+    def C5_4   : Rcc<4,   "c5:4",  [P3_0, C5]>,              DwarfRegNum<[71]>;
     def C7_6   : Rcc<6,   "c7:6",  [C6, C7],   ["m1:0"]>,    DwarfRegNum<[72]>;
     // Use C8 instead of USR as a subregister of C9_8.
     def C9_8   : Rcc<8,   "c9:8",  [C8, PC]>,                DwarfRegNum<[74]>;
@@ -179,7 +182,7 @@ let Namespace = "Hexagon" in {
   }
 
   // Aliases of the V* registers used to hold double vec values.
-  let SubRegIndices = [subreg_loreg, subreg_hireg], CoveredBySubRegs = 1 in {
+  let SubRegIndices = [vsub_lo, vsub_hi], CoveredBySubRegs = 1 in {
   def W0  : Rd< 0,  "v1:0",  [V0,  V1]>,  DwarfRegNum<[99]>;
   def W1  : Rd< 2,  "v3:2",  [V2,  V3]>,  DwarfRegNum<[101]>;
   def W2  : Rd< 4,  "v5:4",  [V4,  V5]>,  DwarfRegNum<[103]>;
@@ -256,10 +259,13 @@ def ModRegs : RegisterClass<"Hexagon", [i32], 32, (add M0, M1)>;
 
 let Size = 32, isAllocatable = 0 in
 def CtrRegs : RegisterClass<"Hexagon", [i32], 32,
-                           (add LC0, SA0, LC1, SA1,
-                                P3_0,
-                                 M0, M1, C6, C7, CS0, CS1, UPCL, UPCH,
-                                 USR, USR_OVF, UGP, GP, PC)>;
+                            (add LC0, SA0, LC1, SA1,
+                                 P3_0, C5,
+                                 M0, M1, C6, C7, C8, CS0, CS1, UPCL, UPCH,
+                                 USR, UGP, GP, PC)>;
+
+let isAllocatable = 0 in
+def UsrBits : RegisterClass<"Hexagon", [i1], 0, (add USR_OVF)>;
 
 let Size = 64, isAllocatable = 0 in
 def CtrRegs64 : RegisterClass<"Hexagon", [i64], 64,
@@ -278,8 +284,3 @@ def VolatileV3 {
                          W12, W13, W14, W15,
                          Q0, Q1, Q2, Q3];
 }
-
-def PositiveHalfWord : PatLeaf<(i32 IntRegs:$a),
-[{
-  return isPositiveHalfWord(N);
-}]>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td b/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
deleted file mode 100644
index d8feb89c0ab5..000000000000
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectCCInfo.td
+++ /dev/null
@@ -1,121 +0,0 @@
-//===-- HexagoSelectCCInfo.td - Selectcc mappings ----------*- tablegen -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-
-//
-// selectcc mappings.
-//
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETEQ)),
-      (i32 (MUX_rr (i1 (CMPEQrr IntRegs:$lhs, IntRegs:$rhs)),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETNE)),
-      (i32 (MUX_rr (i1 (NOT_p (CMPEQrr IntRegs:$lhs, IntRegs:$rhs))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETGT)),
-      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, IntRegs:$rhs)),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETUGT)),
-      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs)),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETULT)),
-      (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs,
-                                         (ADD_ri IntRegs:$rhs, -1)))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETLT)),
-      (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs,
-                                        (ADD_ri IntRegs:$rhs, -1)))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETLE)),
-      (i32 (MUX_rr (i1 (NOT_p (CMPGTrr IntRegs:$lhs, IntRegs:$rhs))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETULE)),
-      (i32 (MUX_rr (i1 (NOT_p (CMPGTUrr IntRegs:$lhs, IntRegs:$rhs))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-
-//
-// selectcc mappings for greater-equal-to Rs => greater-than Rs-1.
-//
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETGE)),
-      (i32 (MUX_rr (i1 (CMPGTrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc IntRegs:$lhs, IntRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETUGE)),
-      (i32 (MUX_rr (i1 (CMPGTUrr IntRegs:$lhs, (ADD_ri IntRegs:$rhs, -1))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-
-
-//
-// selectcc mappings for predicate comparisons.
-//
-// Convert Rd = selectcc(p0, p1, true_val, false_val, SETEQ) into:
-//  pt = not(p1 xor p2)
-//  Rd = mux(pt, true_val, false_val)
-// and similarly for SETNE
-//
-def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETNE)),
-      (i32 (MUX_rr (i1 (XOR_pp PredRegs:$lhs, PredRegs:$rhs)), IntRegs:$tval,
-                   IntRegs:$fval))>;
-
-def : Pat <(i32 (selectcc PredRegs:$lhs, PredRegs:$rhs, IntRegs:$tval,
-                          IntRegs:$fval, SETEQ)),
-      (i32 (MUX_rr (i1 (NOT_p (XOR_pp PredRegs:$lhs, PredRegs:$rhs))),
-                   IntRegs:$tval, IntRegs:$fval))>;
-
-
-//
-// selectcc mappings for 64-bit operands are messy. Hexagon does not have a
-// MUX64 o, use this:
-// selectcc(Rss, Rdd, tval, fval, cond) ->
-//   combine(mux(cmp_cond(Rss, Rdd), tval.hi, fval.hi),
-//           mux(cmp_cond(Rss, Rdd), tval.lo, fval.lo))
-
-// setgt-64.
-def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
-                         DoubleRegs:$fval, SETGT)),
-      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
-                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
-                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
-                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs, DoubleRegs:$rhs),
-                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
-                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
-
-
-// setlt-64 -> setgt-64.
-def : Pat<(i64 (selectcc DoubleRegs:$lhs, DoubleRegs:$rhs, DoubleRegs:$tval,
-                         DoubleRegs:$fval, SETLT)),
-      (COMBINE_rr (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
-                                     (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
-                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_hireg),
-                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_hireg)),
-                   (MUX_rr (CMPGT64rr DoubleRegs:$lhs,
-                                      (ADD64_rr DoubleRegs:$rhs, (TFRI64 -1))),
-                           (EXTRACT_SUBREG DoubleRegs:$tval, subreg_loreg),
-                           (EXTRACT_SUBREG DoubleRegs:$fval, subreg_loreg)))>;
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
index 00dfed754995..10730536080e 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp
@@ -44,14 +44,17 @@ SDValue HexagonSelectionDAGInfo::EmitTargetCodeForMemcpy(
 
   const char *SpecialMemcpyName =
       "__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes";
+  const MachineFunction &MF = DAG.getMachineFunction();
+  bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
+  unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
 
   TargetLowering::CallLoweringInfo CLI(DAG);
   CLI.setDebugLoc(dl)
       .setChain(Chain)
       .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY),
                  Type::getVoidTy(*DAG.getContext()),
-                 DAG.getTargetExternalSymbol(
-                     SpecialMemcpyName, TLI.getPointerTy(DAG.getDataLayout())),
+                 DAG.getTargetExternalSymbol(SpecialMemcpyName,
+                      TLI.getPointerTy(DAG.getDataLayout()), Flags),
                  std::move(Args))
       .setDiscardResult();
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
index 6f2a42ce97f6..a83a8efb7588 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSelectionDAGInfo.h
@@ -20,6 +20,8 @@ namespace llvm {
 
 class HexagonSelectionDAGInfo : public SelectionDAGTargetInfo {
 public:
+  explicit HexagonSelectionDAGInfo() = default;
+
   SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
                                   SDValue Chain, SDValue Dst, SDValue Src,
                                   SDValue Size, unsigned Align, bool isVolatile,
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
index 5a94cce4ce57..68484344fded 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp
@@ -8,7 +8,7 @@
 //===----------------------------------------------------------------------===//
 //
 // When the compiler is invoked with no small data, for instance, with the -G0
-// command line option, then all CONST32_* opcodes should be broken down into
+// command line option, then all CONST* opcodes should be broken down into
 // appropriate LO and HI instructions. This splitting is done by this pass.
 // The only reason this is not done in the DAG lowering itself is that there
 // is no simple way of getting the register allocator to allot the same hard
@@ -17,24 +17,13 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "HexagonMachineFunctionInfo.h"
 #include "HexagonSubtarget.h"
 #include "HexagonTargetMachine.h"
 #include "HexagonTargetObjectFile.h"
-#include "llvm/CodeGen/LatencyPriorityQueue.h"
-#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
-#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
-#include "llvm/CodeGen/SchedulerRegistry.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
@@ -47,28 +36,30 @@ namespace llvm {
 }
 
 namespace {
-
-class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
- public:
+  class HexagonSplitConst32AndConst64 : public MachineFunctionPass {
+  public:
     static char ID;
-    HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {}
-
-    const char *getPassName() const override {
+    HexagonSplitConst32AndConst64() : MachineFunctionPass(ID) {
+      PassRegistry &R = *PassRegistry::getPassRegistry();
+      initializeHexagonSplitConst32AndConst64Pass(R);
+    }
+    StringRef getPassName() const override {
       return "Hexagon Split Const32s and Const64s";
     }
     bool runOnMachineFunction(MachineFunction &Fn) override;
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
-};
-
+  };
+}
 
 char HexagonSplitConst32AndConst64::ID = 0;
 
+INITIALIZE_PASS(HexagonSplitConst32AndConst64, "split-const-for-sdata",
+      "Hexagon Split Const32s and Const64s", false, false)
 
 bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
-
   const HexagonTargetObjectFile &TLOF =
       *static_cast<const HexagonTargetObjectFile *>(
           Fn.getTarget().getObjFileLowering());
@@ -79,93 +70,46 @@ bool HexagonSplitConst32AndConst64::runOnMachineFunction(MachineFunction &Fn) {
   const TargetRegisterInfo *TRI = Fn.getSubtarget().getRegisterInfo();
 
   // Loop over all of the basic blocks
-  for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
-       MBBb != MBBe; ++MBBb) {
-    MachineBasicBlock *MBB = &*MBBb;
-    // Traverse the basic block
-    MachineBasicBlock::iterator MII = MBB->begin();
-    MachineBasicBlock::iterator MIE = MBB->end ();
-    while (MII != MIE) {
-      MachineInstr &MI = *MII;
-      int Opc = MI.getOpcode();
-      if (Opc == Hexagon::CONST32_Int_Real &&
-          MI.getOperand(1).isBlockAddress()) {
-        int DestReg = MI.getOperand(0).getReg();
-        MachineOperand &Symbol = MI.getOperand(1);
-
-        BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::LO), DestReg)
-            .addOperand(Symbol);
-        BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::HI), DestReg)
-            .addOperand(Symbol);
-        // MBB->erase returns the iterator to the next instruction, which is the
-        // one we want to process next
-        MII = MBB->erase(&MI);
-        continue;
-      }
-
-      else if (Opc == Hexagon::CONST32_Int_Real ||
-               Opc == Hexagon::CONST32_Float_Real) {
-        int DestReg = MI.getOperand(0).getReg();
-
-        // We have to convert an FP immediate into its corresponding integer
-        // representation
-        int64_t ImmValue;
-        if (Opc == Hexagon::CONST32_Float_Real) {
-          APFloat Val = MI.getOperand(1).getFPImm()->getValueAPF();
-          ImmValue = *Val.bitcastToAPInt().getRawData();
-        }
-        else
-          ImmValue = MI.getOperand(1).getImm();
-
-        BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi),
-                DestReg)
+  for (MachineBasicBlock &B : Fn) {
+    for (auto I = B.begin(), E = B.end(); I != E; ) {
+      MachineInstr &MI = *I;
+      ++I;
+      unsigned Opc = MI.getOpcode();
+
+      if (Opc == Hexagon::CONST32) {
+        unsigned DestReg = MI.getOperand(0).getReg();
+        uint64_t ImmValue = MI.getOperand(1).getImm();
+        const DebugLoc &DL = MI.getDebugLoc();
+        BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestReg)
             .addImm(ImmValue);
-        MII = MBB->erase(&MI);
-        continue;
-      }
-      else if (Opc == Hexagon::CONST64_Int_Real ||
-               Opc == Hexagon::CONST64_Float_Real) {
-        int DestReg = MI.getOperand(0).getReg();
-
-        // We have to convert an FP immediate into its corresponding integer
-        // representation
-        int64_t ImmValue;
-        if (Opc == Hexagon::CONST64_Float_Real) {
-          APFloat Val = MI.getOperand(1).getFPImm()->getValueAPF();
-          ImmValue = *Val.bitcastToAPInt().getRawData();
-        }
-        else
-          ImmValue = MI.getOperand(1).getImm();
-
-        unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::subreg_loreg);
-        unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::subreg_hireg);
+        B.erase(&MI);
+      } else if (Opc == Hexagon::CONST64) {
+        unsigned DestReg = MI.getOperand(0).getReg();
+        int64_t ImmValue = MI.getOperand(1).getImm();
+        const DebugLoc &DL = MI.getDebugLoc();
+        unsigned DestLo = TRI->getSubReg(DestReg, Hexagon::isub_lo);
+        unsigned DestHi = TRI->getSubReg(DestReg, Hexagon::isub_hi);
 
         int32_t LowWord = (ImmValue & 0xFFFFFFFF);
         int32_t HighWord = (ImmValue >> 32) & 0xFFFFFFFF;
 
-        BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi),
-                DestLo)
+        BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestLo)
             .addImm(LowWord);
-        BuildMI(*MBB, MII, MI.getDebugLoc(), TII->get(Hexagon::A2_tfrsi),
-                DestHi)
+        BuildMI(B, MI, DL, TII->get(Hexagon::A2_tfrsi), DestHi)
             .addImm(HighWord);
-        MII = MBB->erase(&MI);
-        continue;
+        B.erase(&MI);
       }
-      ++MII;
     }
   }
 
   return true;
 }
 
-}
 
 //===----------------------------------------------------------------------===//
 //                         Public Constructor Functions
 //===----------------------------------------------------------------------===//
 
-FunctionPass *
-llvm::createHexagonSplitConst32AndConst64() {
+FunctionPass *llvm::createHexagonSplitConst32AndConst64() {
   return new HexagonSplitConst32AndConst64();
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
index 25b2affa2f0b..2c937216d463 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSplitDouble.cpp
@@ -9,32 +9,50 @@
 
 #define DEBUG_TYPE "hsdr"
 
+#include "HexagonInstrInfo.h"
 #include "HexagonRegisterInfo.h"
-#include "HexagonTargetMachine.h"
-
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Target/TargetRegisterInfo.h"
-
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <limits>
 #include <map>
 #include <set>
+#include <utility>
 #include <vector>
 
 using namespace llvm;
 
 namespace llvm {
+
   FunctionPass *createHexagonSplitDoubleRegs();
   void initializeHexagonSplitDoubleRegsPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
+
   static cl::opt<int> MaxHSDR("max-hsdr", cl::Hidden, cl::init(-1),
       cl::desc("Maximum number of split partitions"));
   static cl::opt<bool> MemRefsFixed("hsdr-no-mem", cl::Hidden, cl::init(true),
@@ -43,18 +61,22 @@ namespace {
   class HexagonSplitDoubleRegs : public MachineFunctionPass {
   public:
     static char ID;
+
     HexagonSplitDoubleRegs() : MachineFunctionPass(ID), TRI(nullptr),
         TII(nullptr) {
       initializeHexagonSplitDoubleRegsPass(*PassRegistry::getPassRegistry());
     }
-    const char *getPassName() const override {
+
+    StringRef getPassName() const override {
       return "Hexagon Split Double Registers";
     }
+
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<MachineLoopInfo>();
       AU.addPreserved<MachineLoopInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
+
     bool runOnMachineFunction(MachineFunction &MF) override;
 
   private:
@@ -98,16 +120,17 @@ namespace {
     static void dump_partition(raw_ostream&, const USet&,
        const TargetRegisterInfo&);
   };
+
   char HexagonSplitDoubleRegs::ID;
   int HexagonSplitDoubleRegs::Counter = 0;
   const TargetRegisterClass *const HexagonSplitDoubleRegs::DoubleRC
       = &Hexagon::DoubleRegsRegClass;
-}
+
+} // end anonymous namespace
 
 INITIALIZE_PASS(HexagonSplitDoubleRegs, "hexagon-split-double",
   "Hexagon Split Double Registers", false, false)
 
-
 void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
       const USet &Part, const TargetRegisterInfo &TRI) {
   dbgs() << '{';
@@ -116,7 +139,6 @@ void HexagonSplitDoubleRegs::dump_partition(raw_ostream &os,
   dbgs() << " }";
 }
 
-
 bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
   for (auto I : IRM) {
     const USet &Rs = I.second;
@@ -126,7 +148,6 @@ bool HexagonSplitDoubleRegs::isInduction(unsigned Reg, LoopRegMap &IRM) const {
   return false;
 }
 
-
 bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
   for (auto &I : MI->memoperands())
     if (I->isVolatile())
@@ -134,7 +155,6 @@ bool HexagonSplitDoubleRegs::isVolatileInstr(const MachineInstr *MI) const {
   return false;
 }
 
-
 bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
   if (MI->mayLoad() || MI->mayStore())
     if (MemRefsFixed || isVolatileInstr(MI))
@@ -170,7 +190,7 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
     case Hexagon::A4_combineii:
     case Hexagon::A4_combineri:
     case Hexagon::A2_combinew:
-    case Hexagon::CONST64_Int_Real:
+    case Hexagon::CONST64:
 
     case Hexagon::A2_sxtw:
 
@@ -194,7 +214,6 @@ bool HexagonSplitDoubleRegs::isFixedInstr(const MachineInstr *MI) const {
   return false;
 }
 
-
 void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
   typedef std::map<unsigned,unsigned> UUMap;
   typedef std::vector<unsigned> UVect;
@@ -283,7 +302,6 @@ void HexagonSplitDoubleRegs::partitionRegisters(UUSetMap &P2Rs) {
     P2Rs[I.second].insert(I.first);
 }
 
-
 static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
   int32_t P = 0;
   bool LoZ1 = false, HiZ1 = false;
@@ -296,7 +314,6 @@ static inline int32_t profitImm(unsigned Lo, unsigned Hi) {
   return P;
 }
 
-
 int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
   unsigned ImmX = 0;
   unsigned Opc = MI->getOpcode();
@@ -319,7 +336,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
       return 2;
 
     case Hexagon::A2_tfrpi:
-    case Hexagon::CONST64_Int_Real: {
+    case Hexagon::CONST64: {
       uint64_t D = MI->getOperand(1).getImm();
       unsigned Lo = D & 0xFFFFFFFFULL;
       unsigned Hi = D >> 32;
@@ -337,6 +354,7 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
       if (V == 0 || V == -1)
         return 10;
       // Fall through into A2_combinew.
+      LLVM_FALLTHROUGH;
     }
     case Hexagon::A2_combinew:
       return 2;
@@ -371,7 +389,6 @@ int32_t HexagonSplitDoubleRegs::profit(const MachineInstr *MI) const {
   return 0;
 }
 
-
 bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
       const {
   unsigned FixedNum = 0, SplitNum = 0, LoopPhiNum = 0;
@@ -380,7 +397,7 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
   for (unsigned DR : Part) {
     MachineInstr *DefI = MRI->getVRegDef(DR);
     int32_t P = profit(DefI);
-    if (P == INT_MIN)
+    if (P == std::numeric_limits<int>::min())
       return false;
     TotalP += P;
     // Reduce the profitability of splitting induction registers.
@@ -413,7 +430,7 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
       // Splittable instruction.
       SplitNum++;
       int32_t P = profit(UseI);
-      if (P == INT_MIN)
+      if (P == std::numeric_limits<int>::min())
         return false;
       TotalP += P;
     }
@@ -426,7 +443,6 @@ bool HexagonSplitDoubleRegs::isProfitable(const USet &Part, LoopRegMap &IRM)
   return TotalP > 0;
 }
 
-
 void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
       USet &Rs) {
   const MachineBasicBlock *HB = L->getHeader();
@@ -436,11 +452,11 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
 
   // Examine the latch branch. Expect it to be a conditional branch to
   // the header (either "br-cond header" or "br-cond exit; br header").
-  MachineBasicBlock *TB = 0, *FB = 0;
+  MachineBasicBlock *TB = nullptr, *FB = nullptr;
   MachineBasicBlock *TmpLB = const_cast<MachineBasicBlock*>(LB);
   SmallVector<MachineOperand,2> Cond;
   bool BadLB = TII->analyzeBranch(*TmpLB, TB, FB, Cond, false);
-  // Only analyzable conditional branches. HII::AnalyzeBranch will put
+  // Only analyzable conditional branches. HII::analyzeBranch will put
   // the branch opcode as the first element of Cond, and the predicate
   // operand as the second.
   if (BadLB || Cond.size() != 2)
@@ -451,7 +467,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
   // Must go to the header.
   if (TB != HB && FB != HB)
     return;
-  assert(Cond[1].isReg() && "Unexpected Cond vector from AnalyzeBranch");
+  assert(Cond[1].isReg() && "Unexpected Cond vector from analyzeBranch");
   // Expect a predicate register.
   unsigned PR = Cond[1].getReg();
   assert(MRI->getRegClass(PR) == &Hexagon::PredRegsRegClass);
@@ -510,7 +526,7 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
     }
     return true;
   };
-  UVect::iterator End = std::remove_if(DP.begin(), DP.end(), NoIndOp);
+  UVect::iterator End = llvm::remove_if(DP, NoIndOp);
   Rs.insert(DP.begin(), End);
   Rs.insert(CmpR1);
   Rs.insert(CmpR2);
@@ -522,7 +538,6 @@ void HexagonSplitDoubleRegs::collectIndRegsForLoop(const MachineLoop *L,
   });
 }
 
-
 void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
   typedef std::vector<MachineLoop*> LoopVector;
   LoopVector WorkQ;
@@ -544,7 +559,6 @@ void HexagonSplitDoubleRegs::collectIndRegs(LoopRegMap &IRM) {
   }
 }
 
-
 void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
       const UUPairMap &PairMap, unsigned SubR) {
   MachineBasicBlock &B = *MI->getParent();
@@ -568,7 +582,7 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
         SR = SubR;
       } else {
         const UUPair &P = F->second;
-        R = (SubR == Hexagon::subreg_loreg) ? P.first : P.second;
+        R = (SubR == Hexagon::isub_lo) ? P.first : P.second;
         SR = 0;
       }
     }
@@ -579,7 +593,6 @@ void HexagonSplitDoubleRegs::createHalfInstr(unsigned Opc, MachineInstr *MI,
   }
 }
 
-
 void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
       const UUPairMap &PairMap) {
   bool Load = MI->mayLoad();
@@ -652,7 +665,6 @@ void HexagonSplitDoubleRegs::splitMemRef(MachineInstr *MI,
   }
 }
 
-
 void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
       const UUPairMap &PairMap) {
   MachineOperand &Op0 = MI->getOperand(0);
@@ -680,7 +692,6 @@ void HexagonSplitDoubleRegs::splitImmediate(MachineInstr *MI,
     .addImm(int32_t(V >> 32));
 }
 
-
 void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
       const UUPairMap &PairMap) {
   MachineOperand &Op0 = MI->getOperand(0);
@@ -713,7 +724,6 @@ void HexagonSplitDoubleRegs::splitCombine(MachineInstr *MI,
     llvm_unreachable("Unexpected operand");
 }
 
-
 void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
       const UUPairMap &PairMap) {
   MachineOperand &Op0 = MI->getOperand(0);
@@ -734,9 +744,10 @@ void HexagonSplitDoubleRegs::splitExt(MachineInstr *MI,
     .addImm(31);
 }
 
-
 void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
       const UUPairMap &PairMap) {
+  using namespace Hexagon;
+
   MachineOperand &Op0 = MI->getOperand(0);
   MachineOperand &Op1 = MI->getOperand(1);
   MachineOperand &Op2 = MI->getOperand(2);
@@ -750,7 +761,6 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
   const UUPair &P = F->second;
   unsigned LoR = P.first;
   unsigned HiR = P.second;
-  using namespace Hexagon;
 
   unsigned Opc = MI->getOpcode();
   bool Right = (Opc == S2_lsr_i_p || Opc == S2_asr_i_p);
@@ -762,8 +772,8 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
   unsigned RS = getRegState(Op1);
   unsigned ShiftOpc = Left ? S2_asl_i_r
                            : (Signed ? S2_asr_i_r : S2_lsr_i_r);
-  unsigned LoSR = subreg_loreg;
-  unsigned HiSR = subreg_hireg;
+  unsigned LoSR = isub_lo;
+  unsigned HiSR = isub_hi;
 
   if (S == 0) {
     // No shift, subregister copy.
@@ -858,9 +868,10 @@ void HexagonSplitDoubleRegs::splitShift(MachineInstr *MI,
   }
 }
 
-
 void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
       const UUPairMap &PairMap) {
+  using namespace Hexagon;
+
   MachineOperand &Op0 = MI->getOperand(0);
   MachineOperand &Op1 = MI->getOperand(1);
   MachineOperand &Op2 = MI->getOperand(2);
@@ -875,7 +886,6 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
   const UUPair &P = F->second;
   unsigned LoR = P.first;
   unsigned HiR = P.second;
-  using namespace Hexagon;
 
   MachineBasicBlock &B = *MI->getParent();
   DebugLoc DL = MI->getDebugLoc();
@@ -883,8 +893,8 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
   unsigned RS2 = getRegState(Op2);
   const TargetRegisterClass *IntRC = &IntRegsRegClass;
 
-  unsigned LoSR = subreg_loreg;
-  unsigned HiSR = subreg_hireg;
+  unsigned LoSR = isub_lo;
+  unsigned HiSR = isub_hi;
 
   // Op0 = S2_asl_i_p_or Op1, Op2, Op3
   // means:  Op0 = or (Op1, asl(Op2, Op3))
@@ -951,38 +961,38 @@ void HexagonSplitDoubleRegs::splitAslOr(MachineInstr *MI,
   }
 }
 
-
 bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
       const UUPairMap &PairMap) {
+  using namespace Hexagon;
+
   DEBUG(dbgs() << "Splitting: " << *MI);
   bool Split = false;
   unsigned Opc = MI->getOpcode();
-  using namespace Hexagon;
 
   switch (Opc) {
     case TargetOpcode::PHI:
     case TargetOpcode::COPY: {
       unsigned DstR = MI->getOperand(0).getReg();
       if (MRI->getRegClass(DstR) == DoubleRC) {
-        createHalfInstr(Opc, MI, PairMap, subreg_loreg);
-        createHalfInstr(Opc, MI, PairMap, subreg_hireg);
+        createHalfInstr(Opc, MI, PairMap, isub_lo);
+        createHalfInstr(Opc, MI, PairMap, isub_hi);
         Split = true;
       }
       break;
     }
     case A2_andp:
-      createHalfInstr(A2_and, MI, PairMap, subreg_loreg);
-      createHalfInstr(A2_and, MI, PairMap, subreg_hireg);
+      createHalfInstr(A2_and, MI, PairMap, isub_lo);
+      createHalfInstr(A2_and, MI, PairMap, isub_hi);
       Split = true;
       break;
     case A2_orp:
-      createHalfInstr(A2_or, MI, PairMap, subreg_loreg);
-      createHalfInstr(A2_or, MI, PairMap, subreg_hireg);
+      createHalfInstr(A2_or, MI, PairMap, isub_lo);
+      createHalfInstr(A2_or, MI, PairMap, isub_hi);
       Split = true;
       break;
     case A2_xorp:
-      createHalfInstr(A2_xor, MI, PairMap, subreg_loreg);
-      createHalfInstr(A2_xor, MI, PairMap, subreg_hireg);
+      createHalfInstr(A2_xor, MI, PairMap, isub_lo);
+      createHalfInstr(A2_xor, MI, PairMap, isub_hi);
       Split = true;
       break;
 
@@ -995,7 +1005,7 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
       break;
 
     case A2_tfrpi:
-    case CONST64_Int_Real:
+    case CONST64:
       splitImmediate(MI, PairMap);
       Split = true;
       break;
@@ -1034,7 +1044,6 @@ bool HexagonSplitDoubleRegs::splitInstr(MachineInstr *MI,
   return Split;
 }
 
-
 void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
       const UUPairMap &PairMap) {
   for (auto &Op : MI->operands()) {
@@ -1046,10 +1055,10 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
       continue;
     const UUPair &P = F->second;
     switch (Op.getSubReg()) {
-      case Hexagon::subreg_loreg:
+      case Hexagon::isub_lo:
         Op.setReg(P.first);
         break;
-      case Hexagon::subreg_hireg:
+      case Hexagon::isub_hi:
         Op.setReg(P.second);
         break;
     }
@@ -1057,7 +1066,6 @@ void HexagonSplitDoubleRegs::replaceSubregUses(MachineInstr *MI,
   }
 }
 
-
 void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
       const UUPairMap &PairMap) {
   MachineBasicBlock &B = *MI->getParent();
@@ -1078,14 +1086,13 @@ void HexagonSplitDoubleRegs::collapseRegPairs(MachineInstr *MI,
     unsigned NewDR = MRI->createVirtualRegister(DoubleRC);
     BuildMI(B, MI, DL, TII->get(TargetOpcode::REG_SEQUENCE), NewDR)
       .addReg(Pr.first)
-      .addImm(Hexagon::subreg_loreg)
+      .addImm(Hexagon::isub_lo)
       .addReg(Pr.second)
-      .addImm(Hexagon::subreg_hireg);
+      .addImm(Hexagon::isub_hi);
     Op.setReg(NewDR);
   }
 }
 
-
 bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
   const TargetRegisterClass *IntRC = &Hexagon::IntRegsRegClass;
   typedef std::set<MachineInstr*> MISet;
@@ -1146,7 +1153,6 @@ bool HexagonSplitDoubleRegs::splitPartition(const USet &Part) {
   return Changed;
 }
 
-
 bool HexagonSplitDoubleRegs::runOnMachineFunction(MachineFunction &MF) {
   DEBUG(dbgs() << "Splitting double registers in function: "
         << MF.getName() << '\n');
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
index 54bc3cf6f6ff..af1bf48b6320 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonStoreWidening.cpp
@@ -23,33 +23,45 @@
 
 #define DEBUG_TYPE "hexagon-widen-stores"
 
-#include "HexagonTargetMachine.h"
-
-#include "llvm/PassSupport.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DebugLoc.h"
 #include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetInstrInfo.h"
-
 #include <algorithm>
-
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <vector>
 
 using namespace llvm;
 
 namespace llvm {
+
   FunctionPass *createHexagonStoreWidening();
   void initializeHexagonStoreWideningPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
+
   struct HexagonStoreWidening : public MachineFunctionPass {
     const HexagonInstrInfo      *TII;
     const HexagonRegisterInfo   *TRI;
@@ -59,15 +71,14 @@ namespace {
 
   public:
     static char ID;
+
     HexagonStoreWidening() : MachineFunctionPass(ID) {
       initializeHexagonStoreWideningPass(*PassRegistry::getPassRegistry());
     }
 
     bool runOnMachineFunction(MachineFunction &MF) override;
 
-    const char *getPassName() const override {
-      return "Hexagon Store Widening";
-    }
+    StringRef getPassName() const override { return "Hexagon Store Widening"; }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<AAResultsWrapperPass>();
@@ -98,19 +109,18 @@ namespace {
     bool storesAreAdjacent(const MachineInstr *S1, const MachineInstr *S2);
   };
 
-} // namespace
-
+char HexagonStoreWidening::ID = 0;
 
-namespace {
+} // end anonymous namespace
 
 // Some local helper functions...
-unsigned getBaseAddressRegister(const MachineInstr *MI) {
+static unsigned getBaseAddressRegister(const MachineInstr *MI) {
   const MachineOperand &MO = MI->getOperand(0);
   assert(MO.isReg() && "Expecting register operand");
   return MO.getReg();
 }
 
-int64_t getStoreOffset(const MachineInstr *MI) {
+static int64_t getStoreOffset(const MachineInstr *MI) {
   unsigned OpC = MI->getOpcode();
   assert(HexagonStoreWidening::handledStoreType(MI) && "Unhandled opcode");
 
@@ -128,23 +138,17 @@ int64_t getStoreOffset(const MachineInstr *MI) {
   return 0;
 }
 
-const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
+static const MachineMemOperand &getStoreTarget(const MachineInstr *MI) {
   assert(!MI->memoperands_empty() && "Expecting memory operands");
   return **MI->memoperands_begin();
 }
 
-} // namespace
-
-
-char HexagonStoreWidening::ID = 0;
-
 INITIALIZE_PASS_BEGIN(HexagonStoreWidening, "hexagon-widen-stores",
                 "Hexason Store Widening", false, false)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(HexagonStoreWidening, "hexagon-widen-stores",
                 "Hexagon Store Widening", false, false)
 
-
 // Filtering function: any stores whose opcodes are not "approved" of by
 // this function will not be subjected to widening.
 inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
@@ -162,7 +166,6 @@ inline bool HexagonStoreWidening::handledStoreType(const MachineInstr *MI) {
   }
 }
 
-
 // Check if the machine memory operand MMO is aliased with any of the
 // stores in the store group Stores.
 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
@@ -185,7 +188,6 @@ bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
   return false;
 }
 
-
 // Check if the machine instruction MI accesses any storage aliased with
 // any store in the group Stores.
 bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
@@ -196,7 +198,6 @@ bool HexagonStoreWidening::instrAliased(InstrGroup &Stores,
   return false;
 }
 
-
 // Inspect a machine basic block, and generate store groups out of stores
 // encountered in the block.
 //
@@ -233,7 +234,6 @@ void HexagonStoreWidening::createStoreGroups(MachineBasicBlock &MBB,
   }
 }
 
-
 // Create a single store group.  The stores need to be independent between
 // themselves, and also there cannot be other instructions between them
 // that could read or modify storage being stored into.
@@ -263,7 +263,7 @@ void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
       unsigned BR = getBaseAddressRegister(MI);
       if (BR == BaseReg) {
         Group.push_back(MI);
-        *I = 0;
+        *I = nullptr;
         continue;
       }
     }
@@ -280,7 +280,6 @@ void HexagonStoreWidening::createStoreGroup(MachineInstr *BaseStore,
   } // for
 }
 
-
 // Check if store instructions S1 and S2 are adjacent.  More precisely,
 // S2 has to access memory immediately following that accessed by S1.
 bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
@@ -298,7 +297,6 @@ bool HexagonStoreWidening::storesAreAdjacent(const MachineInstr *S1,
                      : int(Off1+S1MO.getSize()) == Off2;
 }
 
-
 /// Given a sequence of adjacent stores, and a maximum size of a single wide
 /// store, pick a group of stores that  can be replaced by a single store
 /// of size not exceeding MaxSize.  The selected sequence will be recorded
@@ -390,7 +388,6 @@ bool HexagonStoreWidening::selectStores(InstrGroup::iterator Begin,
   return true;
 }
 
-
 /// Given an "old group" OG of stores, create a "new group" NG of instructions
 /// to replace them.  Ideally, NG would only have a single instruction in it,
 /// but that may only be possible for store-immediate.
@@ -419,7 +416,6 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
     Shift += NBits;
   }
 
-
   MachineInstr *FirstSt = OG.front();
   DebugLoc DL = OG.back()->getDebugLoc();
   const MachineMemOperand &OldM = getStoreTarget(FirstSt);
@@ -471,7 +467,6 @@ bool HexagonStoreWidening::createWideStores(InstrGroup &OG, InstrGroup &NG,
   return true;
 }
 
-
 // Replace instructions from the old group OG with instructions from the
 // new group NG.  Conceptually, remove all instructions in OG, and then
 // insert all instructions in NG, starting at where the first instruction
@@ -536,7 +531,6 @@ bool HexagonStoreWidening::replaceStores(InstrGroup &OG, InstrGroup &NG) {
   return true;
 }
 
-
 // Break up the group into smaller groups, each of which can be replaced by
 // a single wide store.  Widen each such smaller group and replace the old
 // instructions with the widened ones.
@@ -566,7 +560,6 @@ bool HexagonStoreWidening::processStoreGroup(InstrGroup &Group) {
   return Changed;
 }
 
-
 // Process a single basic block: create the store groups, and replace them
 // with the widened stores, if possible.  Processing of each basic block
 // is independent from processing of any other basic block.  This transfor-
@@ -592,7 +585,6 @@ bool HexagonStoreWidening::processBasicBlock(MachineBasicBlock &MBB) {
   return Changed;
 }
 
-
 bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
   if (skipFunction(*MFn.getFunction()))
     return false;
@@ -612,8 +604,6 @@ bool HexagonStoreWidening::runOnMachineFunction(MachineFunction &MFn) {
   return Changed;
 }
 
-
 FunctionPass *llvm::createHexagonStoreWidening() {
   return new HexagonStoreWidening();
 }
-
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index fb315a730f39..8c23a2465dd6 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -66,9 +66,13 @@ static cl::opt<bool> DisableHexagonMISched("disable-hexagon-misched",
   cl::desc("Disable Hexagon MI Scheduling"));
 
 static cl::opt<bool> EnableSubregLiveness("hexagon-subreg-liveness",
-  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::Hidden, cl::ZeroOrMore, cl::init(true),
   cl::desc("Enable subregister liveness tracking for Hexagon"));
 
+static cl::opt<bool> OverrideLongCalls("hexagon-long-calls",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("If present, forces/disables the use of long calls"));
+
 void HexagonSubtarget::initializeEnvironment() {
   UseMemOps = false;
   ModeIEEERndNear = false;
@@ -77,7 +81,7 @@ void HexagonSubtarget::initializeEnvironment() {
 
 HexagonSubtarget &
 HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
-  CPUString = HEXAGON_MC::selectHexagonCPU(getTargetTriple(), CPU);
+  CPUString = Hexagon_MC::selectHexagonCPU(getTargetTriple(), CPU);
 
   static std::map<StringRef, HexagonArchEnum> CpuTable {
     { "hexagonv4", V4 },
@@ -94,12 +98,15 @@ HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
 
   UseHVXOps = false;
   UseHVXDblOps = false;
+  UseLongCalls = false;
   ParseSubtargetFeatures(CPUString, FS);
 
   if (EnableHexagonHVX.getPosition())
     UseHVXOps = EnableHexagonHVX;
   if (EnableHexagonHVXDouble.getPosition())
     UseHVXDblOps = EnableHexagonHVXDouble;
+  if (OverrideLongCalls.getPosition())
+    UseLongCalls = OverrideLongCalls;
 
   return *this;
 }
@@ -148,19 +155,19 @@ void HexagonSubtarget::HexagonDAGMutation::apply(ScheduleDAGInstrs *DAG) {
     // Update the latency of chain edges between v60 vector load or store
     // instructions to be 1. These instructions cannot be scheduled in the
     // same packet.
-    MachineInstr *MI1 = SU.getInstr();
+    MachineInstr &MI1 = *SU.getInstr();
     auto *QII = static_cast<const HexagonInstrInfo*>(DAG->TII);
-    bool IsStoreMI1 = MI1->mayStore();
-    bool IsLoadMI1 = MI1->mayLoad();
+    bool IsStoreMI1 = MI1.mayStore();
+    bool IsLoadMI1 = MI1.mayLoad();
     if (!QII->isV60VectorInstruction(MI1) || !(IsStoreMI1 || IsLoadMI1))
       continue;
     for (auto &SI : SU.Succs) {
       if (SI.getKind() != SDep::Order || SI.getLatency() != 0)
         continue;
-      MachineInstr *MI2 = SI.getSUnit()->getInstr();
+      MachineInstr &MI2 = *SI.getSUnit()->getInstr();
       if (!QII->isV60VectorInstruction(MI2))
         continue;
-      if ((IsStoreMI1 && MI2->mayStore()) || (IsLoadMI1 && MI2->mayLoad())) {
+      if ((IsStoreMI1 && MI2.mayStore()) || (IsLoadMI1 && MI2.mayLoad())) {
         SI.setLatency(1);
         SU.setHeightDirty();
         // Change the dependence in the opposite direction too.
@@ -181,6 +188,11 @@ void HexagonSubtarget::getPostRAMutations(
   Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>());
 }
 
+void HexagonSubtarget::getSMSMutations(
+      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations) const {
+  Mutations.push_back(make_unique<HexagonSubtarget::HexagonDAGMutation>());
+}
+
 
 // Pin the vtable to this file.
 void HexagonSubtarget::anchor() {}
@@ -196,8 +208,8 @@ bool HexagonSubtarget::enableSubRegLiveness() const {
 }
 
 // This helper function is responsible for increasing the latency only.
-void HexagonSubtarget::updateLatency(MachineInstr *SrcInst,
-      MachineInstr *DstInst, SDep &Dep) const {
+void HexagonSubtarget::updateLatency(MachineInstr &SrcInst,
+      MachineInstr &DstInst, SDep &Dep) const {
   if (!hasV60TOps())
     return;
 
@@ -231,19 +243,19 @@ static SUnit *getZeroLatency(SUnit *N, SmallVector<SDep, 4> &Deps) {
 /// Change the latency between the two SUnits.
 void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps,
       SUnit *Dst, unsigned Lat) const {
-  MachineInstr *SrcI = Src->getInstr();
+  MachineInstr &SrcI = *Src->getInstr();
   for (auto &I : Deps) {
     if (I.getSUnit() != Dst)
       continue;
     I.setLatency(Lat);
     SUnit *UpdateDst = I.getSUnit();
-    updateLatency(SrcI, UpdateDst->getInstr(), I);
+    updateLatency(SrcI, *UpdateDst->getInstr(), I);
     // Update the latency of opposite edge too.
     for (auto &PI : UpdateDst->Preds) {
       if (PI.getSUnit() != Src || !PI.isAssignedRegDep())
         continue;
       PI.setLatency(Lat);
-      updateLatency(SrcI, UpdateDst->getInstr(), PI);
+      updateLatency(SrcI, *UpdateDst->getInstr(), PI);
     }
   }
 }
@@ -254,10 +266,14 @@ void HexagonSubtarget::changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps,
 // ther others, if needed.
 bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
       const HexagonInstrInfo *TII) const {
-  MachineInstr *SrcInst = Src->getInstr();
-  MachineInstr *DstInst = Dst->getInstr();
+  MachineInstr &SrcInst = *Src->getInstr();
+  MachineInstr &DstInst = *Dst->getInstr();
+
+  // Ignore Boundary SU nodes as these have null instructions.
+  if (Dst->isBoundaryNode())
+    return false;
 
-  if (SrcInst->isPHI() || DstInst->isPHI())
+  if (SrcInst.isPHI() || DstInst.isPHI())
     return false;
 
   // Check if the Dst instruction is the best candidate first.
@@ -294,9 +310,9 @@ bool HexagonSubtarget::isBestZeroLatency(SUnit *Src, SUnit *Dst,
 
 // Update the latency of a Phi when the Phi bridges two instructions that
 // require a multi-cycle latency.
-void HexagonSubtarget::changePhiLatency(MachineInstr *SrcInst, SUnit *Dst,
+void HexagonSubtarget::changePhiLatency(MachineInstr &SrcInst, SUnit *Dst,
       SDep &Dep) const {
-  if (!SrcInst->isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0)
+  if (!SrcInst.isPHI() || Dst->NumPreds == 0 || Dep.getLatency() != 0)
     return;
 
   for (const SDep &PI : Dst->Preds) {
@@ -319,7 +335,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
   const HexagonInstrInfo *QII = static_cast<const HexagonInstrInfo *>(getInstrInfo());
 
   // Instructions with .new operands have zero latency.
-  if (QII->canExecuteInBundle(SrcInst, DstInst) &&
+  if (QII->canExecuteInBundle(*SrcInst, *DstInst) &&
       isBestZeroLatency(Src, Dst, QII)) {
     Dep.setLatency(0);
     return;
@@ -329,17 +345,17 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
     return;
 
   // Don't adjust the latency of post-increment part of the instruction.
-  if (QII->isPostIncrement(SrcInst) && Dep.isAssignedRegDep()) {
+  if (QII->isPostIncrement(*SrcInst) && Dep.isAssignedRegDep()) {
     if (SrcInst->mayStore())
       return;
     if (Dep.getReg() != SrcInst->getOperand(0).getReg())
       return;
-  } else if (QII->isPostIncrement(DstInst) && Dep.getKind() == SDep::Anti) {
+  } else if (QII->isPostIncrement(*DstInst) && Dep.getKind() == SDep::Anti) {
     if (DstInst->mayStore())
       return;
     if (Dep.getReg() != DstInst->getOperand(0).getReg())
       return;
-  } else if (QII->isPostIncrement(DstInst) && DstInst->mayStore() &&
+  } else if (QII->isPostIncrement(*DstInst) && DstInst->mayStore() &&
              Dep.isAssignedRegDep()) {
     MachineOperand &Op = DstInst->getOperand(DstInst->getNumOperands() - 1);
     if (Op.isReg() && Dep.getReg() != Op.getReg())
@@ -348,7 +364,7 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
 
   // Check if we need to change any the latency values when Phis are added.
   if (useBSBScheduling() && SrcInst->isPHI()) {
-    changePhiLatency(SrcInst, Dst, Dep);
+    changePhiLatency(*SrcInst, Dst, Dep);
     return;
   }
 
@@ -358,12 +374,20 @@ void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
     DstInst = Dst->Succs[0].getSUnit()->getInstr();
 
   // Try to schedule uses near definitions to generate .cur.
-  if (EnableDotCurSched && QII->isToBeScheduledASAP(SrcInst, DstInst) &&
+  if (EnableDotCurSched && QII->isToBeScheduledASAP(*SrcInst, *DstInst) &&
       isBestZeroLatency(Src, Dst, QII)) {
     Dep.setLatency(0);
     return;
   }
 
-  updateLatency(SrcInst, DstInst, Dep);
+  updateLatency(*SrcInst, *DstInst, Dep);
+}
+
+unsigned HexagonSubtarget::getL1CacheLineSize() const {
+  return 32;
+}
+
+unsigned HexagonSubtarget::getL1PrefetchDistance() const {
+  return 32;
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index 9b40c130e622..f2b9cdaad1ae 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -34,6 +34,7 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
   virtual void anchor();
 
   bool UseMemOps, UseHVXOps, UseHVXDblOps;
+  bool UseLongCalls;
   bool ModeIEEERndNear;
 
 public:
@@ -101,6 +102,7 @@ public:
   bool useHVXOps() const { return UseHVXOps; }
   bool useHVXDblOps() const { return UseHVXOps && UseHVXDblOps; }
   bool useHVXSglOps() const { return UseHVXOps && !UseHVXDblOps; }
+  bool useLongCalls() const { return UseLongCalls; }
 
   bool useBSBScheduling() const { return UseBSBScheduling; }
   bool enableMachineScheduler() const override;
@@ -128,19 +130,26 @@ public:
       std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
       const override;
 
+  void getSMSMutations(
+      std::vector<std::unique_ptr<ScheduleDAGMutation>> &Mutations)
+      const override;
+
   /// \brief Perform target specific adjustments to the latency of a schedule
   /// dependency.
   void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override;
 
+  unsigned getL1CacheLineSize() const;
+  unsigned getL1PrefetchDistance() const;
+
 private:
   // Helper function responsible for increasing the latency only.
-  void updateLatency(MachineInstr *SrcInst, MachineInstr *DstInst, SDep &Dep)
+  void updateLatency(MachineInstr &SrcInst, MachineInstr &DstInst, SDep &Dep)
       const;
   void changeLatency(SUnit *Src, SmallVector<SDep, 4> &Deps, SUnit *Dst,
       unsigned Lat) const;
   bool isBestZeroLatency(SUnit *Src, SUnit *Dst, const HexagonInstrInfo *TII)
       const;
-  void changePhiLatency(MachineInstr *SrcInst, SUnit *Dst, SDep &Dep) const;
+  void changePhiLatency(MachineInstr &SrcInst, SUnit *Dst, SDep &Dep) const;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
index 771498a40b99..629a98749ee9 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonSystemInst.td
@@ -118,7 +118,7 @@ def Y2_isync: JRInst <(outs), (ins),
 let hasSideEffects = 0, isSolo = 1 in
 class J2_MISC_TRAP_PAUSE<string mnemonic, bits<2> MajOp>
   : JRInst
-  <(outs), (ins u8Imm:$u8),
+  <(outs), (ins u8_0Imm:$u8),
    #mnemonic#"(#$u8)"> {
     bits<8> u8;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index f964a6612f43..132d12a66d46 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -27,7 +27,6 @@
 
 using namespace llvm;
 
-
 static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore,
   cl::init(true), cl::desc("Enable RDF-based optimizations"));
 
@@ -42,6 +41,9 @@ static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
   cl::Hidden, cl::ZeroOrMore, cl::init(false),
   cl::desc("Disable Hexagon CFG Optimization"));
 
+static cl::opt<bool> DisableHCP("disable-hcp", cl::init(false), cl::Hidden,
+  cl::ZeroOrMore, cl::desc("Disable Hexagon constant propagation"));
+
 static cl::opt<bool> DisableStoreWidening("disable-store-widen",
   cl::Hidden, cl::init(false), cl::desc("Disable store widening"));
 
@@ -68,6 +70,10 @@ static cl::opt<bool> EnableGenPred("hexagon-gen-pred", cl::init(true),
   cl::Hidden, cl::desc("Enable conversion of arithmetic operations to "
   "predicate instructions"));
 
+static cl::opt<bool> EnableLoopPrefetch("hexagon-loop-prefetch",
+  cl::init(false), cl::Hidden, cl::ZeroOrMore,
+  cl::desc("Enable loop data prefetch on Hexagon"));
+
 static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
   cl::desc("Disable splitting double registers"));
 
@@ -80,6 +86,10 @@ static cl::opt<bool> EnableLoopResched("hexagon-loop-resched", cl::init(true),
 static cl::opt<bool> HexagonNoOpt("hexagon-noopt", cl::init(false),
   cl::Hidden, cl::desc("Disable backend optimizations"));
 
+static cl::opt<bool> EnableVectorPrint("enable-hexagon-vector-print",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enable Hexagon Vector print instr pass"));
+
 /// HexagonTargetMachineModule - Note that this is used on hosts that
 /// cannot link in a library unless there are references into the
 /// library.  In particular, it seems that it is not possible to get
@@ -90,7 +100,7 @@ int HexagonTargetMachineModule = 0;
 
 extern "C" void LLVMInitializeHexagonTarget() {
   // Register the target.
-  RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
+  RegisterTargetMachine<HexagonTargetMachine> X(getTheHexagonTarget());
 }
 
 static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
@@ -102,14 +112,17 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
                     createVLIWMachineSched);
 
 namespace llvm {
+  extern char &HexagonExpandCondsetsID;
+  void initializeHexagonExpandCondsetsPass(PassRegistry&);
+
   FunctionPass *createHexagonBitSimplify();
   FunctionPass *createHexagonBranchRelaxation();
   FunctionPass *createHexagonCallFrameInformation();
   FunctionPass *createHexagonCFGOptimizer();
   FunctionPass *createHexagonCommonGEP();
+  FunctionPass *createHexagonConstPropagationPass();
   FunctionPass *createHexagonCopyToCombine();
   FunctionPass *createHexagonEarlyIfConversion();
-  FunctionPass *createHexagonExpandCondsets();
   FunctionPass *createHexagonFixupHwLoops();
   FunctionPass *createHexagonGenExtract();
   FunctionPass *createHexagonGenInsert();
@@ -128,6 +141,7 @@ namespace llvm {
   FunctionPass *createHexagonSplitConst32AndConst64();
   FunctionPass *createHexagonSplitDoubleRegs();
   FunctionPass *createHexagonStoreWidening();
+  FunctionPass *createHexagonVectorPrint();
 } // end namespace llvm;
 
 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
@@ -152,6 +166,7 @@ HexagonTargetMachine::HexagonTargetMachine(const Target &T, const Triple &TT,
           TT, CPU, FS, Options, getEffectiveRelocModel(RM), CM,
           (HexagonNoOpt ? CodeGenOpt::None : OL)),
       TLOF(make_unique<HexagonTargetObjectFile>()) {
+  initializeHexagonExpandCondsetsPass(*PassRegistry::getPassRegistry());
   initAsmInfo();
 }
 
@@ -225,6 +240,8 @@ void HexagonPassConfig::addIRPasses() {
 
   addPass(createAtomicExpandPass(TM));
   if (!NoOpt) {
+    if (EnableLoopPrefetch)
+      addPass(createLoopDataPrefetchPass());
     if (EnableCommGEP)
       addPass(createHexagonCommonGEP());
     // Replace certain combinations of shifts and ands with extracts.
@@ -257,6 +274,11 @@ bool HexagonPassConfig::addInstSelector() {
       addPass(createHexagonBitSimplify(), false);
     addPass(createHexagonPeephole());
     printAndVerify("After hexagon peephole pass");
+    // Constant propagation.
+    if (!DisableHCP) {
+      addPass(createHexagonConstPropagationPass(), false);
+      addPass(&UnreachableMachineBlockElimID, false);
+    }
     if (EnableGenInsert)
       addPass(createHexagonGenInsert(), false);
     if (EnableEarlyIf)
@@ -268,15 +290,15 @@ bool HexagonPassConfig::addInstSelector() {
 
 void HexagonPassConfig::addPreRegAlloc() {
   if (getOptLevel() != CodeGenOpt::None) {
-    if (EnableExpandCondsets) {
-      Pass *Exp = createHexagonExpandCondsets();
-      insertPass(&RegisterCoalescerID, IdentifyingPassPtr(Exp));
-    }
+    if (EnableExpandCondsets)
+      insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
     if (!DisableStoreWidening)
       addPass(createHexagonStoreWidening(), false);
     if (!DisableHardwareLoops)
       addPass(createHexagonHardwareLoops(), false);
   }
+  if (TM->getOptLevel() >= CodeGenOpt::Default)
+    addPass(&MachinePipelinerID);
 }
 
 void HexagonPassConfig::addPostRegAlloc() {
@@ -315,6 +337,8 @@ void HexagonPassConfig::addPreEmitPass() {
 
     addPass(createHexagonPacketizer(), false);
   }
+  if (EnableVectorPrint)
+    addPass(createHexagonVectorPrint(), false);
 
   // Add CFI instructions if necessary.
   addPass(createHexagonCallFrameInformation(), false);
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
index 82b437eb6a0c..e902f600e881 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.cpp
@@ -100,25 +100,23 @@ void HexagonTargetObjectFile::Initialize(MCContext &Ctx,
                                ELF::SHF_HEX_GPREL);
 }
 
-
 MCSection *HexagonTargetObjectFile::SelectSectionForGlobal(
-      const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-      const TargetMachine &TM) const {
-  TRACE("[SelectSectionForGlobal] GV(" << GV->getName() << ") ");
-  TRACE("input section(" << GV->getSection() << ") ");
-
-  TRACE((GV->hasPrivateLinkage() ? "private_linkage " : "")
-         << (GV->hasLocalLinkage() ? "local_linkage " : "")
-         << (GV->hasInternalLinkage() ? "internal " : "")
-         << (GV->hasExternalLinkage() ? "external " : "")
-         << (GV->hasCommonLinkage() ? "common_linkage " : "")
-         << (GV->hasCommonLinkage() ? "common " : "" )
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  TRACE("[SelectSectionForGlobal] GO(" << GO->getName() << ") ");
+  TRACE("input section(" << GO->getSection() << ") ");
+
+  TRACE((GO->hasPrivateLinkage() ? "private_linkage " : "")
+         << (GO->hasLocalLinkage() ? "local_linkage " : "")
+         << (GO->hasInternalLinkage() ? "internal " : "")
+         << (GO->hasExternalLinkage() ? "external " : "")
+         << (GO->hasCommonLinkage() ? "common_linkage " : "")
+         << (GO->hasCommonLinkage() ? "common " : "" )
          << (Kind.isCommon() ? "kind_common " : "" )
          << (Kind.isBSS() ? "kind_bss " : "" )
          << (Kind.isBSSLocal() ? "kind_bss_local " : "" ));
 
-  if (isGlobalInSmallSection(GV, TM))
-    return selectSmallSectionForGlobal(GV, Kind, Mang, TM);
+  if (isGlobalInSmallSection(GO, TM))
+    return selectSmallSectionForGlobal(GO, Kind, TM);
 
   if (Kind.isCommon()) {
     // This is purely for LTO+Linker Script because commons don't really have a
@@ -130,54 +128,50 @@ MCSection *HexagonTargetObjectFile::SelectSectionForGlobal(
 
   TRACE("default_ELF_section\n");
   // Otherwise, we work the same as ELF.
-  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
-              Mang, TM);
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
 
-
 MCSection *HexagonTargetObjectFile::getExplicitSectionGlobal(
-      const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-      const TargetMachine &TM) const {
-  TRACE("[getExplicitSectionGlobal] GV(" << GV->getName() << ") from("
-        << GV->getSection() << ") ");
-  TRACE((GV->hasPrivateLinkage() ? "private_linkage " : "")
-         << (GV->hasLocalLinkage() ? "local_linkage " : "")
-         << (GV->hasInternalLinkage() ? "internal " : "")
-         << (GV->hasExternalLinkage() ? "external " : "")
-         << (GV->hasCommonLinkage() ? "common_linkage " : "")
-         << (GV->hasCommonLinkage() ? "common " : "" )
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  TRACE("[getExplicitSectionGlobal] GO(" << GO->getName() << ") from("
+        << GO->getSection() << ") ");
+  TRACE((GO->hasPrivateLinkage() ? "private_linkage " : "")
+         << (GO->hasLocalLinkage() ? "local_linkage " : "")
+         << (GO->hasInternalLinkage() ? "internal " : "")
+         << (GO->hasExternalLinkage() ? "external " : "")
+         << (GO->hasCommonLinkage() ? "common_linkage " : "")
+         << (GO->hasCommonLinkage() ? "common " : "" )
          << (Kind.isCommon() ? "kind_common " : "" )
          << (Kind.isBSS() ? "kind_bss " : "" )
          << (Kind.isBSSLocal() ? "kind_bss_local " : "" ));
 
-  if (GV->hasSection()) {
-    StringRef Section = GV->getSection();
+  if (GO->hasSection()) {
+    StringRef Section = GO->getSection();
     if (Section.find(".access.text.group") != StringRef::npos)
-      return getContext().getELFSection(GV->getSection(), ELF::SHT_PROGBITS,
+      return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS,
                                         ELF::SHF_ALLOC | ELF::SHF_EXECINSTR);
     if (Section.find(".access.data.group") != StringRef::npos)
-      return getContext().getELFSection(GV->getSection(), ELF::SHT_PROGBITS,
+      return getContext().getELFSection(GO->getSection(), ELF::SHT_PROGBITS,
                                         ELF::SHF_WRITE | ELF::SHF_ALLOC);
   }
 
-  if (isGlobalInSmallSection(GV, TM))
-    return selectSmallSectionForGlobal(GV, Kind, Mang, TM);
+  if (isGlobalInSmallSection(GO, TM))
+    return selectSmallSectionForGlobal(GO, Kind, TM);
 
   // Otherwise, we work the same as ELF.
   TRACE("default_ELF_section\n");
-  return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GV, Kind,
-            Mang, TM);
+  return TargetLoweringObjectFileELF::getExplicitSectionGlobal(GO, Kind, TM);
 }
 
 
 /// Return true if this global value should be placed into small data/bss
 /// section.
-bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalValue *GV,
+bool HexagonTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
       const TargetMachine &TM) const {
   // Only global variables, not functions.
   DEBUG(dbgs() << "Checking if value is in small-data, -G"
-               << SmallDataThreshold << ": \"" << GV->getName() << "\": ");
-  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+               << SmallDataThreshold << ": \"" << GO->getName() << "\": ");
+  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO);
   if (!GVar) {
     DEBUG(dbgs() << "no, not a global variable\n");
     return false;
@@ -302,12 +296,10 @@ unsigned HexagonTargetObjectFile::getSmallestAddressableSize(const Type *Ty,
   return 0;
 }
 
-
 MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
-      const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-      const TargetMachine &TM) const {
-  const Type *GTy = GV->getType()->getElementType();
-  unsigned Size = getSmallestAddressableSize(GTy, GV, TM);
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  const Type *GTy = GO->getType()->getElementType();
+  unsigned Size = getSmallestAddressableSize(GTy, GO, TM);
 
   // If we have -ffunction-section or -fdata-section then we should emit the
   // global value to a unique section specifically for it... even for sdata.
@@ -333,7 +325,7 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
 
     if (EmitUniquedSection) {
       Name.append(".");
-      Name.append(GV->getName());
+      Name.append(GO->getName());
     }
     TRACE(" unique sbss(" << Name << ")\n");
     return getContext().getELFSection(Name.str(), ELF::SHT_NOBITS,
@@ -360,7 +352,7 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
   // case the Kind could be wrong for it.
   if (Kind.isMergeableConst()) {
     TRACE(" const_object_as_data ");
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GO);
     if (GVar->hasSection() && isSmallDataSection(GVar->getSection()))
       Kind = SectionKind::getData();
   }
@@ -377,7 +369,7 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
 
     if (EmitUniquedSection) {
       Name.append(".");
-      Name.append(GV->getName());
+      Name.append(GO->getName());
     }
     TRACE(" unique sdata(" << Name << ")\n");
     return getContext().getELFSection(Name.str(), ELF::SHT_PROGBITS,
@@ -386,6 +378,5 @@ MCSection *HexagonTargetObjectFile::selectSmallSectionForGlobal(
 
   TRACE("default ELF section\n");
   // Otherwise, we work the same as ELF.
-  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
-              Mang, TM);
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
index cbc00da88c58..58dff2b95e19 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetObjectFile.h
@@ -19,14 +19,15 @@ namespace llvm {
   public:
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
-    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-        Mangler &Mang, const TargetMachine &TM) const override;
+    MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
+                                      const TargetMachine &TM) const override;
 
-    MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-        Mangler &Mang, const TargetMachine &TM) const override;
+    MCSection *getExplicitSectionGlobal(const GlobalObject *GO,
+                                        SectionKind Kind,
+                                        const TargetMachine &TM) const override;
 
-    bool isGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM)
-        const;
+    bool isGlobalInSmallSection(const GlobalObject *GO,
+                                const TargetMachine &TM) const;
 
     bool isSmallDataEnabled() const;
 
@@ -39,8 +40,9 @@ namespace llvm {
     unsigned getSmallestAddressableSize(const Type *Ty, const GlobalValue *GV,
         const TargetMachine &TM) const;
 
-    MCSection *selectSmallSectionForGlobal(const GlobalValue *GV,
-        SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const;
+    MCSection *selectSmallSectionForGlobal(const GlobalObject *GO,
+                                           SectionKind Kind,
+                                           const TargetMachine &TM) const;
   };
 
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
index a05443eb83b8..d578bfab3658 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.cpp
@@ -14,6 +14,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "HexagonTargetTransformInfo.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/Support/Debug.h"
 
 using namespace llvm;
@@ -36,3 +37,35 @@ void HexagonTTIImpl::getUnrollingPreferences(Loop *L,
 unsigned HexagonTTIImpl::getNumberOfRegisters(bool vector) const {
   return vector ? 0 : 32;
 }
+
+unsigned HexagonTTIImpl::getPrefetchDistance() const {
+  return getST()->getL1PrefetchDistance();
+}
+
+unsigned HexagonTTIImpl::getCacheLineSize() const {
+  return getST()->getL1CacheLineSize();
+}
+
+int HexagonTTIImpl::getUserCost(const User *U) {
+  auto isCastFoldedIntoLoad = [] (const CastInst *CI) -> bool {
+    if (!CI->isIntegerCast())
+      return false;
+    const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
+    // Technically, this code could allow multiple uses of the load, and
+    // check if all the uses are the same extension operation, but this
+    // should be sufficient for most cases.
+    if (!LI || !LI->hasOneUse())
+      return false;
+
+    // Only extensions from an integer type shorter than 32-bit to i32
+    // can be folded into the load.
+    unsigned SBW = CI->getSrcTy()->getIntegerBitWidth();
+    unsigned DBW = CI->getDestTy()->getIntegerBitWidth();
+    return DBW == 32 && (SBW < DBW);
+  };
+
+  if (const CastInst *CI = dyn_cast<const CastInst>(U))
+    if (isCastFoldedIntoLoad(CI))
+      return TargetTransformInfo::TCC_Free;
+  return BaseT::getUserCost(U);
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
index 71ae17a19e5f..8414bfc4e197 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonTargetTransformInfo.h
@@ -40,13 +40,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  HexagonTTIImpl(const HexagonTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  HexagonTTIImpl(HexagonTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
 
@@ -55,6 +48,10 @@ public:
   // The Hexagon target can unroll loops with run-time trip counts.
   void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
 
+  // L1 cache prefetch.
+  unsigned getPrefetchDistance() const;
+  unsigned getCacheLineSize() const;
+
   /// @}
 
   /// \name Vector TTI Implementations
@@ -63,6 +60,8 @@ public:
   unsigned getNumberOfRegisters(bool vector) const;
 
   /// @}
+
+  int getUserCost(const User *U);
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
index d326b9471315..7b1247d815a5 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.cpp
@@ -22,7 +22,6 @@
 #include "HexagonVLIWPacketizer.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/MachineDominators.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -75,13 +74,11 @@ namespace {
       AU.addPreserved<MachineLoopInfo>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
-    const char *getPassName() const override {
-      return "Hexagon Packetizer";
-    }
+    StringRef getPassName() const override { return "Hexagon Packetizer"; }
     bool runOnMachineFunction(MachineFunction &Fn) override;
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -101,7 +98,6 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(HexagonPacketizer, "packets", "Hexagon Packetizer",
                     false, false)
 
-
 HexagonPacketizerList::HexagonPacketizerList(MachineFunction &MF,
       MachineLoopInfo &MLI, AliasAnalysis *AA,
       const MachineBranchProbabilityInfo *MBPI)
@@ -127,7 +123,7 @@ static bool hasWriteToReadDep(const MachineInstr &FirstI,
 }
 
 
-static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
+static MachineBasicBlock::iterator moveInstrOut(MachineInstr &MI,
       MachineBasicBlock::iterator BundleIt, bool Before) {
   MachineBasicBlock::instr_iterator InsertPt;
   if (Before)
@@ -135,20 +131,20 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
   else
     InsertPt = std::next(BundleIt).getInstrIterator();
 
-  MachineBasicBlock &B = *MI->getParent();
+  MachineBasicBlock &B = *MI.getParent();
   // The instruction should at least be bundled with the preceding instruction
   // (there will always be one, i.e. BUNDLE, if nothing else).
-  assert(MI->isBundledWithPred());
-  if (MI->isBundledWithSucc()) {
-    MI->clearFlag(MachineInstr::BundledSucc);
-    MI->clearFlag(MachineInstr::BundledPred);
+  assert(MI.isBundledWithPred());
+  if (MI.isBundledWithSucc()) {
+    MI.clearFlag(MachineInstr::BundledSucc);
+    MI.clearFlag(MachineInstr::BundledPred);
   } else {
     // If it's not bundled with the successor (i.e. it is the last one
     // in the bundle), then we can simply unbundle it from the predecessor,
     // which will take care of updating the predecessor's flag.
-    MI->unbundleFromPred();
+    MI.unbundleFromPred();
   }
-  B.splice(InsertPt, &B, MI);
+  B.splice(InsertPt, &B, MI.getIterator());
 
   // Get the size of the bundle without asserting.
   MachineBasicBlock::const_instr_iterator I = BundleIt.getInstrIterator();
@@ -164,9 +160,9 @@ static MachineBasicBlock::iterator moveInstrOut(MachineInstr *MI,
 
   // Otherwise, extract the single instruction out and delete the bundle.
   MachineBasicBlock::iterator NextIt = std::next(BundleIt);
-  MachineInstr *SingleI = BundleIt->getNextNode();
-  SingleI->unbundleFromPred();
-  assert(!SingleI->isBundledWithSucc());
+  MachineInstr &SingleI = *BundleIt->getNextNode();
+  SingleI.unbundleFromPred();
+  assert(!SingleI.isBundledWithSucc());
   BundleIt->eraseFromParent();
   return NextIt;
 }
@@ -267,7 +263,7 @@ bool HexagonPacketizerList::tryAllocateResourcesForConstExt(bool Reserve) {
 }
 
 
-bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
+bool HexagonPacketizerList::isCallDependent(const MachineInstr &MI,
       SDep::Kind DepType, unsigned DepReg) {
   // Check for LR dependence.
   if (DepReg == HRI->getRARegister())
@@ -284,11 +280,18 @@ bool HexagonPacketizerList::isCallDependent(const MachineInstr* MI,
 
   // Assumes that the first operand of the CALLr is the function address.
   if (HII->isIndirectCall(MI) && (DepType == SDep::Data)) {
-    MachineOperand MO = MI->getOperand(0);
+    const MachineOperand MO = MI.getOperand(0);
     if (MO.isReg() && MO.isUse() && (MO.getReg() == DepReg))
       return true;
   }
 
+  if (HII->isJumpR(MI)) {
+    const MachineOperand &MO = HII->isPredicated(MI) ? MI.getOperand(1)
+                                                     : MI.getOperand(0);
+    assert(MO.isReg() && MO.isUse());
+    if (MO.getReg() == DepReg)
+      return true;
+  }
   return false;
 }
 
@@ -297,54 +300,60 @@ static bool isRegDependence(const SDep::Kind DepType) {
          DepType == SDep::Output;
 }
 
-static bool isDirectJump(const MachineInstr* MI) {
-  return MI->getOpcode() == Hexagon::J2_jump;
+static bool isDirectJump(const MachineInstr &MI) {
+  return MI.getOpcode() == Hexagon::J2_jump;
 }
 
-static bool isSchedBarrier(const MachineInstr* MI) {
-  switch (MI->getOpcode()) {
+static bool isSchedBarrier(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
   case Hexagon::Y2_barrier:
     return true;
   }
   return false;
 }
 
-static bool isControlFlow(const MachineInstr* MI) {
-  return (MI->getDesc().isTerminator() || MI->getDesc().isCall());
+static bool isControlFlow(const MachineInstr &MI) {
+  return MI.getDesc().isTerminator() || MI.getDesc().isCall();
 }
 
 
 /// Returns true if the instruction modifies a callee-saved register.
-static bool doesModifyCalleeSavedReg(const MachineInstr *MI,
+static bool doesModifyCalleeSavedReg(const MachineInstr &MI,
                                      const TargetRegisterInfo *TRI) {
-  const MachineFunction &MF = *MI->getParent()->getParent();
+  const MachineFunction &MF = *MI.getParent()->getParent();
   for (auto *CSR = TRI->getCalleeSavedRegs(&MF); CSR && *CSR; ++CSR)
-    if (MI->modifiesRegister(*CSR, TRI))
+    if (MI.modifiesRegister(*CSR, TRI))
       return true;
   return false;
 }
 
-// TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
 // Returns true if an instruction can be promoted to .new predicate or
 // new-value store.
-bool HexagonPacketizerList::isNewifiable(const MachineInstr* MI) {
-  return HII->isCondInst(MI) || MI->isReturn() || HII->mayBeNewStore(MI);
+bool HexagonPacketizerList::isNewifiable(const MachineInstr &MI,
+      const TargetRegisterClass *NewRC) {
+  // Vector stores can be predicated, and can be new-value stores, but
+  // they cannot be predicated on a .new predicate value.
+  if (NewRC == &Hexagon::PredRegsRegClass)
+    if (HII->isV60VectorInstruction(MI) && MI.mayStore())
+      return false;
+  return HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn() ||
+         HII->mayBeNewStore(MI);
 }
 
 // Promote an instructiont to its .cur form.
 // At this time, we have already made a call to canPromoteToDotCur and made
 // sure that it can *indeed* be promoted.
-bool HexagonPacketizerList::promoteToDotCur(MachineInstr* MI,
+bool HexagonPacketizerList::promoteToDotCur(MachineInstr &MI,
       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
       const TargetRegisterClass* RC) {
   assert(DepType == SDep::Data);
   int CurOpcode = HII->getDotCurOp(MI);
-  MI->setDesc(HII->get(CurOpcode));
+  MI.setDesc(HII->get(CurOpcode));
   return true;
 }
 
 void HexagonPacketizerList::cleanUpDotCur() {
-  MachineInstr *MI = NULL;
+  MachineInstr *MI = nullptr;
   for (auto BI : CurrentPacketMIs) {
     DEBUG(dbgs() << "Cleanup packet has "; BI->dump(););
     if (BI->getOpcode() == Hexagon::V6_vL32b_cur_ai) {
@@ -365,12 +374,12 @@ void HexagonPacketizerList::cleanUpDotCur() {
 }
 
 // Check to see if an instruction can be dot cur.
-bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
+bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr &MI,
       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
       const TargetRegisterClass *RC) {
   if (!HII->isV60VectorInstruction(MI))
     return false;
-  if (!HII->isV60VectorInstruction(&*MII))
+  if (!HII->isV60VectorInstruction(*MII))
     return false;
 
   // Already a dot new instruction.
@@ -386,14 +395,14 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
 
   // Make sure candidate instruction uses cur.
   DEBUG(dbgs() << "Can we DOT Cur Vector MI\n";
-        MI->dump();
+        MI.dump();
         dbgs() << "in packet\n";);
   MachineInstr &MJ = *MII;
   DEBUG({
     dbgs() << "Checking CUR against ";
     MJ.dump();
   });
-  unsigned DestReg = MI->getOperand(0).getReg();
+  unsigned DestReg = MI.getOperand(0).getReg();
   bool FoundMatch = false;
   for (auto &MO : MJ.operands())
     if (MO.isReg() && MO.getReg() == DestReg)
@@ -409,7 +418,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
       return false;
   }
 
-  DEBUG(dbgs() << "Can Dot CUR MI\n"; MI->dump(););
+  DEBUG(dbgs() << "Can Dot CUR MI\n"; MI.dump(););
   // We can convert the opcode into a .cur.
   return true;
 }
@@ -417,7 +426,7 @@ bool HexagonPacketizerList::canPromoteToDotCur(const MachineInstr *MI,
 // Promote an instruction to its .new form. At this time, we have already
 // made a call to canPromoteToDotNew and made sure that it can *indeed* be
 // promoted.
-bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
+bool HexagonPacketizerList::promoteToDotNew(MachineInstr &MI,
       SDep::Kind DepType, MachineBasicBlock::iterator &MII,
       const TargetRegisterClass* RC) {
   assert (DepType == SDep::Data);
@@ -426,16 +435,53 @@ bool HexagonPacketizerList::promoteToDotNew(MachineInstr* MI,
     NewOpcode = HII->getDotNewPredOp(MI, MBPI);
   else
     NewOpcode = HII->getDotNewOp(MI);
-  MI->setDesc(HII->get(NewOpcode));
+  MI.setDesc(HII->get(NewOpcode));
   return true;
 }
 
-bool HexagonPacketizerList::demoteToDotOld(MachineInstr* MI) {
-  int NewOpcode = HII->getDotOldOp(MI->getOpcode());
-  MI->setDesc(HII->get(NewOpcode));
+bool HexagonPacketizerList::demoteToDotOld(MachineInstr &MI) {
+  int NewOpcode = HII->getDotOldOp(MI.getOpcode());
+  MI.setDesc(HII->get(NewOpcode));
   return true;
 }
 
+bool HexagonPacketizerList::useCallersSP(MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+    case Hexagon::S2_storerd_io:
+    case Hexagon::S2_storeri_io:
+    case Hexagon::S2_storerh_io:
+    case Hexagon::S2_storerb_io:
+      break;
+    default:
+      llvm_unreachable("Unexpected instruction");
+  }
+  unsigned FrameSize = MF.getFrameInfo().getStackSize();
+  MachineOperand &Off = MI.getOperand(1);
+  int64_t NewOff = Off.getImm() - (FrameSize + HEXAGON_LRFP_SIZE);
+  if (HII->isValidOffset(Opc, NewOff)) {
+    Off.setImm(NewOff);
+    return true;
+  }
+  return false;
+}
+
+void HexagonPacketizerList::useCalleesSP(MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
+  switch (Opc) {
+    case Hexagon::S2_storerd_io:
+    case Hexagon::S2_storeri_io:
+    case Hexagon::S2_storerh_io:
+    case Hexagon::S2_storerb_io:
+      break;
+    default:
+      llvm_unreachable("Unexpected instruction");
+  }
+  unsigned FrameSize = MF.getFrameInfo().getStackSize();
+  MachineOperand &Off = MI.getOperand(1);
+  Off.setImm(Off.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
+}
+
 enum PredicateKind {
   PK_False,
   PK_True,
@@ -453,7 +499,7 @@ static PredicateKind getPredicateSense(const MachineInstr &MI,
   return PK_False;
 }
 
-static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
+static const MachineOperand &getPostIncrementOperand(const MachineInstr &MI,
       const HexagonInstrInfo *HII) {
   assert(HII->isPostIncrement(MI) && "Not a post increment operation.");
 #ifndef NDEBUG
@@ -461,22 +507,22 @@ static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
   // list. Caution: Densemap initializes with the minimum of 64 buckets,
   // whereas there are at most 5 operands in the post increment.
   DenseSet<unsigned> DefRegsSet;
-  for (auto &MO : MI->operands())
+  for (auto &MO : MI.operands())
     if (MO.isReg() && MO.isDef())
       DefRegsSet.insert(MO.getReg());
 
-  for (auto &MO : MI->operands())
+  for (auto &MO : MI.operands())
     if (MO.isReg() && MO.isUse() && DefRegsSet.count(MO.getReg()))
       return MO;
 #else
-  if (MI->mayLoad()) {
-    const MachineOperand &Op1 = MI->getOperand(1);
+  if (MI.mayLoad()) {
+    const MachineOperand &Op1 = MI.getOperand(1);
     // The 2nd operand is always the post increment operand in load.
     assert(Op1.isReg() && "Post increment operand has be to a register.");
     return Op1;
   }
-  if (MI->getDesc().mayStore()) {
-    const MachineOperand &Op0 = MI->getOperand(0);
+  if (MI.getDesc().mayStore()) {
+    const MachineOperand &Op0 = MI.getOperand(0);
     // The 1st operand is always the post increment operand in store.
     assert(Op0.isReg() && "Post increment operand has be to a register.");
     return Op0;
@@ -487,13 +533,13 @@ static const MachineOperand &getPostIncrementOperand(const MachineInstr *MI,
 }
 
 // Get the value being stored.
-static const MachineOperand& getStoreValueOperand(const MachineInstr *MI) {
+static const MachineOperand& getStoreValueOperand(const MachineInstr &MI) {
   // value being stored is always the last operand.
-  return MI->getOperand(MI->getNumOperands()-1);
+  return MI.getOperand(MI.getNumOperands()-1);
 }
 
-static bool isLoadAbsSet(const MachineInstr *MI) {
-  unsigned Opc = MI->getOpcode();
+static bool isLoadAbsSet(const MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
   switch (Opc) {
     case Hexagon::L4_loadrd_ap:
     case Hexagon::L4_loadrb_ap:
@@ -506,9 +552,9 @@ static bool isLoadAbsSet(const MachineInstr *MI) {
   return false;
 }
 
-static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
+static const MachineOperand &getAbsSetOperand(const MachineInstr &MI) {
   assert(isLoadAbsSet(MI));
-  return MI->getOperand(1);
+  return MI.getOperand(1);
 }
 
 
@@ -529,8 +575,8 @@ static const MachineOperand &getAbsSetOperand(const MachineInstr *MI) {
 //    if there is a new value store in the packet. Corollary: if there is
 //    already a store in a packet, there can not be a new value store.
 //    Arch Spec: 3.4.4.2
-bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
-      const MachineInstr *PacketMI, unsigned DepReg) {
+bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr &MI,
+      const MachineInstr &PacketMI, unsigned DepReg) {
   // Make sure we are looking at the store, that can be promoted.
   if (!HII->mayBeNewStore(MI))
     return false;
@@ -540,7 +586,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
   if (Val.isReg() && Val.getReg() != DepReg)
     return false;
 
-  const MCInstrDesc& MCID = PacketMI->getDesc();
+  const MCInstrDesc& MCID = PacketMI.getDesc();
 
   // First operand is always the result.
   const TargetRegisterClass *PacketRC = HII->getRegClass(MCID, 0, HRI, MF);
@@ -563,7 +609,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
     return false;
   }
 
-  if (HII->isPostIncrement(PacketMI) && PacketMI->mayLoad() &&
+  if (HII->isPostIncrement(PacketMI) && PacketMI.mayLoad() &&
       getPostIncrementOperand(PacketMI, HII).getReg() == DepReg) {
     // If source is post_inc, or absolute-set addressing, it can not feed
     // into new value store
@@ -578,8 +624,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
 
   // If the source that feeds the store is predicated, new value store must
   // also be predicated.
-  if (HII->isPredicated(*PacketMI)) {
-    if (!HII->isPredicated(*MI))
+  if (HII->isPredicated(PacketMI)) {
+    if (!HII->isPredicated(MI))
       return false;
 
     // Check to make sure that they both will have their predicates
@@ -589,7 +635,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
     const TargetRegisterClass* predRegClass = nullptr;
 
     // Get predicate register used in the source instruction.
-    for (auto &MO : PacketMI->operands()) {
+    for (auto &MO : PacketMI.operands()) {
       if (!MO.isReg())
         continue;
       predRegNumSrc = MO.getReg();
@@ -601,7 +647,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
         "predicate register not found in a predicated PacketMI instruction");
 
     // Get predicate register used in new-value store instruction.
-    for (auto &MO : MI->operands()) {
+    for (auto &MO : MI.operands()) {
       if (!MO.isReg())
         continue;
       predRegNumDst = MO.getReg();
@@ -622,7 +668,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
     // sense, i.e, either both should be negated or both should be non-negated.
     if (predRegNumDst != predRegNumSrc ||
         HII->isDotNewInst(PacketMI) != HII->isDotNewInst(MI) ||
-        getPredicateSense(*MI, HII) != getPredicateSense(*PacketMI, HII))
+        getPredicateSense(MI, HII) != getPredicateSense(PacketMI, HII))
       return false;
   }
 
@@ -638,19 +684,19 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
 
   for (auto I : CurrentPacketMIs) {
     SUnit *TempSU = MIToSUnit.find(I)->second;
-    MachineInstr* TempMI = TempSU->getInstr();
+    MachineInstr &TempMI = *TempSU->getInstr();
 
     // Following condition is true for all the instructions until PacketMI is
     // reached (StartCheck is set to 0 before the for loop).
     // StartCheck flag is 1 for all the instructions after PacketMI.
-    if (TempMI != PacketMI && !StartCheck) // Start processing only after
-      continue;                            // encountering PacketMI.
+    if (&TempMI != &PacketMI && !StartCheck) // Start processing only after
+      continue;                              // encountering PacketMI.
 
     StartCheck = 1;
-    if (TempMI == PacketMI) // We don't want to check PacketMI for dependence.
+    if (&TempMI == &PacketMI) // We don't want to check PacketMI for dependence.
       continue;
 
-    for (auto &MO : MI->operands())
+    for (auto &MO : MI.operands())
       if (MO.isReg() && TempSU->getInstr()->modifiesRegister(MO.getReg(), HRI))
         return false;
   }
@@ -662,8 +708,8 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
   //    Eg.   r0 = add(r0, #3)
   //          memw(r1+r0<<#2) = r0
   if (!HII->isPostIncrement(MI)) {
-    for (unsigned opNum = 0; opNum < MI->getNumOperands()-1; opNum++) {
-      const MachineOperand &MO = MI->getOperand(opNum);
+    for (unsigned opNum = 0; opNum < MI.getNumOperands()-1; opNum++) {
+      const MachineOperand &MO = MI.getOperand(opNum);
       if (MO.isReg() && MO.getReg() == DepReg)
         return false;
     }
@@ -673,7 +719,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
   // do not newify the store. Eg.
   // %R9<def> = ZXTH %R12, %D6<imp-use>, %R12<imp-def>
   // S2_storerh_io %R8, 2, %R12<kill>; mem:ST2[%scevgep343]
-  for (auto &MO : PacketMI->operands()) {
+  for (auto &MO : PacketMI.operands()) {
     if (!MO.isReg() || !MO.isDef() || !MO.isImplicit())
       continue;
     unsigned R = MO.getReg();
@@ -686,7 +732,7 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
   // just-in-case. For example, we cannot newify R2 in the following case:
   // %R3<def> = A2_tfrsi 0;
   // S2_storeri_io %R0<kill>, 0, %R2<kill>, %D1<imp-use,kill>;
-  for (auto &MO : MI->operands()) {
+  for (auto &MO : MI.operands()) {
     if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == DepReg)
       return false;
   }
@@ -696,14 +742,14 @@ bool HexagonPacketizerList::canPromoteToNewValueStore(const MachineInstr *MI,
 }
 
 // Can this MI to promoted to either new value store or new value jump.
-bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
+bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr &MI,
       const SUnit *PacketSU, unsigned DepReg,
       MachineBasicBlock::iterator &MII) {
   if (!HII->mayBeNewStore(MI))
     return false;
 
   // Check to see the store can be new value'ed.
-  MachineInstr *PacketMI = PacketSU->getInstr();
+  MachineInstr &PacketMI = *PacketSU->getInstr();
   if (canPromoteToNewValueStore(MI, PacketMI, DepReg))
     return true;
 
@@ -712,8 +758,8 @@ bool HexagonPacketizerList::canPromoteToNewValue(const MachineInstr *MI,
   return false;
 }
 
-static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
-  for (auto &MO : I->operands())
+static bool isImplicitDependency(const MachineInstr &I, unsigned DepReg) {
+  for (auto &MO : I.operands())
     if (MO.isReg() && MO.isDef() && (MO.getReg() == DepReg) && MO.isImplicit())
       return true;
   return false;
@@ -724,25 +770,25 @@ static bool isImplicitDependency(const MachineInstr *I, unsigned DepReg) {
 // 1. dot new on predicate - V2/V3/V4
 // 2. dot new on stores NV/ST - V4
 // 3. dot new on jump NV/J - V4 -- This is generated in a pass.
-bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
+bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr &MI,
       const SUnit *PacketSU, unsigned DepReg, MachineBasicBlock::iterator &MII,
       const TargetRegisterClass* RC) {
   // Already a dot new instruction.
   if (HII->isDotNewInst(MI) && !HII->mayBeNewStore(MI))
     return false;
 
-  if (!isNewifiable(MI))
+  if (!isNewifiable(MI, RC))
     return false;
 
-  const MachineInstr *PI = PacketSU->getInstr();
+  const MachineInstr &PI = *PacketSU->getInstr();
 
   // The "new value" cannot come from inline asm.
-  if (PI->isInlineAsm())
+  if (PI.isInlineAsm())
     return false;
 
   // IMPLICIT_DEFs won't materialize as real instructions, so .new makes no
   // sense.
-  if (PI->isImplicitDef())
+  if (PI.isImplicitDef())
     return false;
 
   // If dependency is trough an implicitly defined register, we should not
@@ -750,16 +796,14 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
   if (isImplicitDependency(PI, DepReg))
     return false;
 
-  const MCInstrDesc& MCID = PI->getDesc();
+  const MCInstrDesc& MCID = PI.getDesc();
   const TargetRegisterClass *VecRC = HII->getRegClass(MCID, 0, HRI, MF);
   if (DisableVecDblNVStores && VecRC == &Hexagon::VecDblRegsRegClass)
     return false;
 
   // predicate .new
-  // bug 5670: until that is fixed
-  // TODO: MI->isIndirectBranch() and IsRegisterJump(MI)
   if (RC == &Hexagon::PredRegsRegClass)
-    if (HII->isCondInst(MI) || MI->isReturn())
+    if (HII->isCondInst(MI) || HII->isJumpR(MI) || MI.isReturn())
       return HII->predCanBeUsedAsDotNew(PI, DepReg);
 
   if (RC != &Hexagon::PredRegsRegClass && !HII->mayBeNewStore(MI))
@@ -795,9 +839,9 @@ bool HexagonPacketizerList::canPromoteToDotNew(const MachineInstr *MI,
 // The P3 from a) and d) will be complements after
 // a)'s P3 is converted to .new form
 // Anti-dep between c) and b) is irrelevant for this case
-bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr* MI,
+bool HexagonPacketizerList::restrictingDepExistInPacket(MachineInstr &MI,
                                                         unsigned DepReg) {
-  SUnit *PacketSUDep = MIToSUnit.find(MI)->second;
+  SUnit *PacketSUDep = MIToSUnit.find(&MI)->second;
 
   for (auto I : CurrentPacketMIs) {
     // We only care for dependencies to predicated instructions
@@ -889,7 +933,7 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
           // above example. Now I need to see if there is an anti dependency
           // from c) to any other instruction in the same packet on the pred
           // reg of interest.
-          if (restrictingDepExistInPacket(I, Dep.getReg()))
+          if (restrictingDepExistInPacket(*I, Dep.getReg()))
             return false;
         }
       }
@@ -906,7 +950,7 @@ bool HexagonPacketizerList::arePredicatesComplements(MachineInstr &MI1,
          Hexagon::PredRegsRegClass.contains(PReg1) &&
          Hexagon::PredRegsRegClass.contains(PReg2) &&
          getPredicateSense(MI1, HII) != getPredicateSense(MI2, HII) &&
-         HII->isDotNewInst(&MI1) == HII->isDotNewInst(&MI2);
+         HII->isDotNewInst(MI1) == HII->isDotNewInst(MI2);
 }
 
 // Initialize packetizer flags.
@@ -957,10 +1001,10 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
   // From Hexagon V4 Programmer's Reference Manual 3.4.4 Grouping constraints:
   // trap, pause, barrier, icinva, isync, and syncht are solo instructions.
   // They must not be grouped with other instructions in a packet.
-  if (isSchedBarrier(&MI))
+  if (isSchedBarrier(MI))
     return true;
 
-  if (HII->isSolo(&MI))
+  if (HII->isSolo(MI))
     return true;
 
   if (MI.getOpcode() == Hexagon::A2_nop)
@@ -977,9 +1021,9 @@ bool HexagonPacketizerList::isSoloInstruction(const MachineInstr &MI) {
 //   cannotCoexistAsymm(MI, MJ) || cannotCoexistAsymm(MJ, MI)
 // Doing the test only one way saves the amount of code in this function,
 // since every test would need to be repeated with the MI and MJ reversed.
-static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
+static bool cannotCoexistAsymm(const MachineInstr &MI, const MachineInstr &MJ,
       const HexagonInstrInfo &HII) {
-  const MachineFunction *MF = MI->getParent()->getParent();
+  const MachineFunction *MF = MI.getParent()->getParent();
   if (MF->getSubtarget<HexagonSubtarget>().hasV60TOpsOnly() &&
       HII.isHVXMemWithAIndirect(MI, MJ))
     return true;
@@ -988,9 +1032,27 @@ static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
   // able to remove the asm out after packetizing (i.e. if the asm must be
   // moved past the bundle).  Similarly, two asms cannot be together to avoid
   // complications when determining their relative order outside of a bundle.
-  if (MI->isInlineAsm())
-    return MJ->isInlineAsm() || MJ->isBranch() || MJ->isBarrier() ||
-           MJ->isCall() || MJ->isTerminator();
+  if (MI.isInlineAsm())
+    return MJ.isInlineAsm() || MJ.isBranch() || MJ.isBarrier() ||
+           MJ.isCall() || MJ.isTerminator();
+
+  switch (MI.getOpcode()) {
+  case (Hexagon::S2_storew_locked):
+  case (Hexagon::S4_stored_locked):
+  case (Hexagon::L2_loadw_locked):
+  case (Hexagon::L4_loadd_locked):
+  case (Hexagon::Y4_l2fetch): {
+    // These instructions can only be grouped with ALU32 or non-floating-point
+    // XTYPE instructions.  Since there is no convenient way of identifying fp
+    // XTYPE instructions, only allow grouping with ALU32 for now.
+    unsigned TJ = HII.getType(MJ);
+    if (TJ != HexagonII::TypeALU32)
+      return true;
+    break;
+  }
+  default:
+    break;
+  }
 
   // "False" really means that the quick check failed to determine if
   // I and J cannot coexist.
@@ -999,8 +1061,8 @@ static bool cannotCoexistAsymm(const MachineInstr *MI, const MachineInstr *MJ,
 
 
 // Full, symmetric check.
-bool HexagonPacketizerList::cannotCoexist(const MachineInstr *MI,
-      const MachineInstr *MJ) {
+bool HexagonPacketizerList::cannotCoexist(const MachineInstr &MI,
+      const MachineInstr &MJ) {
   return cannotCoexistAsymm(MI, MJ, *HII) || cannotCoexistAsymm(MJ, MI, *HII);
 }
 
@@ -1010,10 +1072,10 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
     MachineBasicBlock::instr_iterator NextI;
     for (auto I = B.instr_begin(), E = B.instr_end(); I != E; I = NextI) {
       NextI = std::next(I);
-      MachineInstr *MI = &*I;
-      if (MI->isBundle())
+      MachineInstr &MI = *I;
+      if (MI.isBundle())
         BundleIt = I;
-      if (!MI->isInsideBundle())
+      if (!MI.isInsideBundle())
         continue;
 
       // Decide on where to insert the instruction that we are pulling out.
@@ -1023,9 +1085,9 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
       // other instructions in the bundle read, then we need to place it
       // after the bundle (to preserve the bundle semantics).
       bool InsertBeforeBundle;
-      if (MI->isInlineAsm())
-        InsertBeforeBundle = !hasWriteToReadDep(*MI, *BundleIt, HRI);
-      else if (MI->isDebugValue())
+      if (MI.isInlineAsm())
+        InsertBeforeBundle = !hasWriteToReadDep(MI, *BundleIt, HRI);
+      else if (MI.isDebugValue())
         InsertBeforeBundle = true;
       else
         continue;
@@ -1036,8 +1098,8 @@ void HexagonPacketizerList::unpacketizeSoloInstrs(MachineFunction &MF) {
 }
 
 // Check if a given instruction is of class "system".
-static bool isSystemInstr(const MachineInstr *MI) {
-  unsigned Opc = MI->getOpcode();
+static bool isSystemInstr(const MachineInstr &MI) {
+  unsigned Opc = MI.getOpcode();
   switch (Opc) {
     case Hexagon::Y2_barrier:
     case Hexagon::Y2_dcfetchbo:
@@ -1046,24 +1108,24 @@ static bool isSystemInstr(const MachineInstr *MI) {
   return false;
 }
 
-bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
-                                              const MachineInstr *J) {
+bool HexagonPacketizerList::hasDeadDependence(const MachineInstr &I,
+                                              const MachineInstr &J) {
   // The dependence graph may not include edges between dead definitions,
   // so without extra checks, we could end up packetizing two instruction
   // defining the same (dead) register.
-  if (I->isCall() || J->isCall())
+  if (I.isCall() || J.isCall())
     return false;
-  if (HII->isPredicated(*I) || HII->isPredicated(*J))
+  if (HII->isPredicated(I) || HII->isPredicated(J))
     return false;
 
   BitVector DeadDefs(Hexagon::NUM_TARGET_REGS);
-  for (auto &MO : I->operands()) {
+  for (auto &MO : I.operands()) {
     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
       continue;
     DeadDefs[MO.getReg()] = true;
   }
 
-  for (auto &MO : J->operands()) {
+  for (auto &MO : J.operands()) {
     if (!MO.isReg() || !MO.isDef() || !MO.isDead())
       continue;
     unsigned R = MO.getReg();
@@ -1073,8 +1135,8 @@ bool HexagonPacketizerList::hasDeadDependence(const MachineInstr *I,
   return false;
 }
 
-bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
-                                                 const MachineInstr *J) {
+bool HexagonPacketizerList::hasControlDependence(const MachineInstr &I,
+                                                 const MachineInstr &J) {
   // A save callee-save register function call can only be in a packet
   // with instructions that don't write to the callee-save registers.
   if ((HII->isSaveCalleeSavedRegsCall(I) &&
@@ -1090,10 +1152,10 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
   // \ref-manual (7.3.4) A loop setup packet in loopN or spNloop0 cannot
   // contain a speculative indirect jump,
   // a new-value compare jump or a dealloc_return.
-  auto isBadForLoopN = [this] (const MachineInstr *MI) -> bool {
-    if (MI->isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
+  auto isBadForLoopN = [this] (const MachineInstr &MI) -> bool {
+    if (MI.isCall() || HII->isDeallocRet(MI) || HII->isNewValueJump(MI))
       return true;
-    if (HII->isPredicated(*MI) && HII->isPredicatedNew(*MI) && HII->isJumpR(MI))
+    if (HII->isPredicated(MI) && HII->isPredicatedNew(MI) && HII->isJumpR(MI))
       return true;
     return false;
   };
@@ -1106,13 +1168,13 @@ bool HexagonPacketizerList::hasControlDependence(const MachineInstr *I,
   // dealloc_return cannot appear in the same packet as a conditional or
   // unconditional jump.
   return HII->isDeallocRet(I) &&
-         (J->isBranch() || J->isCall() || J->isBarrier());
+         (J.isBranch() || J.isCall() || J.isBarrier());
 }
 
-bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
-                                                    const MachineInstr *J) {
+bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr &I,
+                                                    const MachineInstr &J) {
   bool SysI = isSystemInstr(I), SysJ = isSystemInstr(J);
-  bool StoreI = I->mayStore(), StoreJ = J->mayStore();
+  bool StoreI = I.mayStore(), StoreJ = J.mayStore();
   if ((SysI && StoreJ) || (SysJ && StoreI))
     return true;
 
@@ -1135,19 +1197,18 @@ bool HexagonPacketizerList::hasV4SpecificDependence(const MachineInstr *I,
 // SUJ is the current instruction inside the current packet against which that
 // SUI will be packetized.
 bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
-  MachineInstr *I = SUI->getInstr();
-  MachineInstr *J = SUJ->getInstr();
-  assert(I && J && "Unable to packetize null instruction!");
+  assert(SUI->getInstr() && SUJ->getInstr());
+  MachineInstr &I = *SUI->getInstr();
+  MachineInstr &J = *SUJ->getInstr();
 
   // Clear IgnoreDepMIs when Packet starts.
   if (CurrentPacketMIs.size() == 1)
     IgnoreDepMIs.clear();
 
-  MachineBasicBlock::iterator II = I;
-  const unsigned FrameSize = MF.getFrameInfo()->getStackSize();
+  MachineBasicBlock::iterator II = I.getIterator();
 
   // Solo instructions cannot go in the packet.
-  assert(!isSoloInstruction(*I) && "Unexpected solo instr!");
+  assert(!isSoloInstruction(I) && "Unexpected solo instr!");
 
   if (cannotCoexist(I, J))
     return false;
@@ -1164,23 +1225,23 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     return false;
 
   // If an instruction feeds new value jump, glue it.
-  MachineBasicBlock::iterator NextMII = I;
+  MachineBasicBlock::iterator NextMII = I.getIterator();
   ++NextMII;
-  if (NextMII != I->getParent()->end() && HII->isNewValueJump(&*NextMII)) {
+  if (NextMII != I.getParent()->end() && HII->isNewValueJump(*NextMII)) {
     MachineInstr &NextMI = *NextMII;
 
     bool secondRegMatch = false;
     const MachineOperand &NOp0 = NextMI.getOperand(0);
     const MachineOperand &NOp1 = NextMI.getOperand(1);
 
-    if (NOp1.isReg() && I->getOperand(0).getReg() == NOp1.getReg())
+    if (NOp1.isReg() && I.getOperand(0).getReg() == NOp1.getReg())
       secondRegMatch = true;
 
-    for (auto I : CurrentPacketMIs) {
-      SUnit *PacketSU = MIToSUnit.find(I)->second;
-      MachineInstr *PI = PacketSU->getInstr();
+    for (auto T : CurrentPacketMIs) {
+      SUnit *PacketSU = MIToSUnit.find(T)->second;
+      MachineInstr &PI = *PacketSU->getInstr();
       // NVJ can not be part of the dual jump - Arch Spec: section 7.8.
-      if (PI->isCall()) {
+      if (PI.isCall()) {
         Dependence = true;
         break;
       }
@@ -1192,14 +1253,14 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       // 3. If the second operand of the nvj is newified, (which means
       //    first operand is also a reg), first reg is not defined in
       //    the same packet.
-      if (PI->getOpcode() == Hexagon::S2_allocframe || PI->mayStore() ||
+      if (PI.getOpcode() == Hexagon::S2_allocframe || PI.mayStore() ||
           HII->isLoopN(PI)) {
         Dependence = true;
         break;
       }
       // Check #2/#3.
       const MachineOperand &OpR = secondRegMatch ? NOp0 : NOp1;
-      if (OpR.isReg() && PI->modifiesRegister(OpR.getReg(), HRI)) {
+      if (OpR.isReg() && PI.modifiesRegister(OpR.getReg(), HRI)) {
         Dependence = true;
         break;
       }
@@ -1237,12 +1298,6 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     // dealloc return unless we have dependencies on the explicit uses
     // of the registers used by jumpr (like r31) or dealloc return
     // (like r29 or r30).
-    //
-    // TODO: Currently, jumpr is handling only return of r31. So, the
-    // following logic (specificaly isCallDependent) is working fine.
-    // We need to enable jumpr for register other than r31 and then,
-    // we need to rework the last part, where it handles indirect call
-    // of that (isCallDependent) function. Bug 6216 is opened for this.
     unsigned DepReg = 0;
     const TargetRegisterClass *RC = nullptr;
     if (DepType == SDep::Data) {
@@ -1250,7 +1305,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       RC = HRI->getMinimalPhysRegClass(DepReg);
     }
 
-    if (I->isCall() || I->isReturn() || HII->isTailCall(I)) {
+    if (I.isCall() || HII->isJumpR(I) || I.isReturn() || HII->isTailCall(I)) {
       if (!isRegDependence(DepType))
         continue;
       if (!isCallDependent(I, DepType, SUJ->Succs[i].getReg()))
@@ -1283,8 +1338,8 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
 
     // For predicated instructions, if the predicates are complements then
     // there can be no dependence.
-    if (HII->isPredicated(*I) && HII->isPredicated(*J) &&
-        arePredicatesComplements(*I, *J)) {
+    if (HII->isPredicated(I) && HII->isPredicated(J) &&
+        arePredicatesComplements(I, J)) {
       // Not always safe to do this translation.
       // DAG Builder attempts to reduce dependence edges using transitive
       // nature of dependencies. Here is an example:
@@ -1297,24 +1352,24 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       // However, there is no dependence edge between (1)->(3). This results
       // in all 3 instructions going in the same packet. We ignore dependce
       // only once to avoid this situation.
-      auto Itr = std::find(IgnoreDepMIs.begin(), IgnoreDepMIs.end(), J);
+      auto Itr = find(IgnoreDepMIs, &J);
       if (Itr != IgnoreDepMIs.end()) {
         Dependence = true;
         return false;
       }
-      IgnoreDepMIs.push_back(I);
+      IgnoreDepMIs.push_back(&I);
       continue;
     }
 
     // Ignore Order dependences between unconditional direct branches
     // and non-control-flow instructions.
-    if (isDirectJump(I) && !J->isBranch() && !J->isCall() &&
+    if (isDirectJump(I) && !J.isBranch() && !J.isCall() &&
         DepType == SDep::Order)
       continue;
 
     // Ignore all dependences for jumps except for true and output
     // dependences.
-    if (I->isConditionalBranch() && DepType != SDep::Data &&
+    if (I.isConditionalBranch() && DepType != SDep::Data &&
         DepType != SDep::Output)
       continue;
 
@@ -1336,7 +1391,7 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
       unsigned DepReg = SUJ->Succs[i].getReg();
 
       // Check if I and J really defines DepReg.
-      if (!I->definesRegister(DepReg) && !J->definesRegister(DepReg))
+      if (!I.definesRegister(DepReg) && !J.definesRegister(DepReg))
         continue;
       FoundSequentialDependence = true;
       break;
@@ -1350,15 +1405,15 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     // 4. Load followed by any memory operation is allowed.
     if (DepType == SDep::Order) {
       if (!PacketizeVolatiles) {
-        bool OrdRefs = I->hasOrderedMemoryRef() || J->hasOrderedMemoryRef();
+        bool OrdRefs = I.hasOrderedMemoryRef() || J.hasOrderedMemoryRef();
         if (OrdRefs) {
           FoundSequentialDependence = true;
           break;
         }
       }
       // J is first, I is second.
-      bool LoadJ = J->mayLoad(), StoreJ = J->mayStore();
-      bool LoadI = I->mayLoad(), StoreI = I->mayStore();
+      bool LoadJ = J.mayLoad(), StoreJ = J.mayStore();
+      bool LoadI = I.mayLoad(), StoreI = I.mayStore();
       if (StoreJ) {
         // Two stores are only allowed on V4+. Load following store is never
         // allowed.
@@ -1383,25 +1438,21 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     // between ALLOCFRAME and subsequent store, allow it to be packetized
     // in a same packet. This implies that the store is using the caller's
     // SP. Hence, offset needs to be updated accordingly.
-    if (DepType == SDep::Data && J->getOpcode() == Hexagon::S2_allocframe) {
-      unsigned Opc = I->getOpcode();
+    if (DepType == SDep::Data && J.getOpcode() == Hexagon::S2_allocframe) {
+      unsigned Opc = I.getOpcode();
       switch (Opc) {
         case Hexagon::S2_storerd_io:
         case Hexagon::S2_storeri_io:
         case Hexagon::S2_storerh_io:
         case Hexagon::S2_storerb_io:
-          if (I->getOperand(0).getReg() == HRI->getStackRegister()) {
-            int64_t Imm = I->getOperand(1).getImm();
-            int64_t NewOff = Imm - (FrameSize + HEXAGON_LRFP_SIZE);
-            if (HII->isValidOffset(Opc, NewOff)) {
-              GlueAllocframeStore = true;
-              // Since this store is to be glued with allocframe in the same
-              // packet, it will use SP of the previous stack frame, i.e.
-              // caller's SP. Therefore, we need to recalculate offset
-              // according to this change.
-              I->getOperand(1).setImm(NewOff);
+          if (I.getOperand(0).getReg() == HRI->getStackRegister()) {
+            // Since this store is to be glued with allocframe in the same
+            // packet, it will use SP of the previous stack frame, i.e.
+            // caller's SP. Therefore, we need to recalculate offset
+            // according to this change.
+            GlueAllocframeStore = useCallersSP(I);
+            if (GlueAllocframeStore)
               continue;
-            }
           }
         default:
           break;
@@ -1414,12 +1465,12 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
     //   R0 = ...                   ; SUI
     // Those cannot be packetized together, since the call will observe
     // the effect of the assignment to R0.
-    if (DepType == SDep::Anti && J->isCall()) {
+    if (DepType == SDep::Anti && J.isCall()) {
       // Check if I defines any volatile register. We should also check
       // registers that the call may read, but these happen to be a
       // subset of the volatile register set.
-      for (const MCPhysReg *P = J->getDesc().ImplicitDefs; P && *P; ++P) {
-        if (!I->modifiesRegister(*P, HRI))
+      for (const MCPhysReg *P = J.getDesc().ImplicitDefs; P && *P; ++P) {
+        if (!I.modifiesRegister(*P, HRI))
           continue;
         FoundSequentialDependence = true;
         break;
@@ -1447,9 +1498,9 @@ bool HexagonPacketizerList::isLegalToPacketizeTogether(SUnit *SUI, SUnit *SUJ) {
 }
 
 bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
-  MachineInstr *I = SUI->getInstr();
-  MachineInstr *J = SUJ->getInstr();
-  assert(I && J && "Unable to packetize null instruction!");
+  assert(SUI->getInstr() && SUJ->getInstr());
+  MachineInstr &I = *SUI->getInstr();
+  MachineInstr &J = *SUJ->getInstr();
 
   if (cannotCoexist(I, J))
     return false;
@@ -1467,16 +1518,15 @@ bool HexagonPacketizerList::isLegalToPruneDependencies(SUnit *SUI, SUnit *SUJ) {
   // instruction. If so, restore its offset to its original value, i.e. use
   // current SP instead of caller's SP.
   if (GlueAllocframeStore) {
-    unsigned FrameSize = MF.getFrameInfo()->getStackSize();
-    MachineOperand &MOff = I->getOperand(1);
-    MOff.setImm(MOff.getImm() + FrameSize + HEXAGON_LRFP_SIZE);
+    useCalleesSP(I);
+    GlueAllocframeStore = false;
   }
   return false;
 }
 
 MachineBasicBlock::iterator
 HexagonPacketizerList::addToPacket(MachineInstr &MI) {
-  MachineBasicBlock::iterator MII = MI;
+  MachineBasicBlock::iterator MII = MI.getIterator();
   MachineBasicBlock *MBB = MI.getParent();
   if (MI.isImplicitDef()) {
     unsigned R = MI.getOperand(0).getReg();
@@ -1488,7 +1538,7 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
   }
   assert(ResourceTracker->canReserveResources(MI));
 
-  bool ExtMI = HII->isExtended(&MI) || HII->isConstExtended(&MI);
+  bool ExtMI = HII->isExtended(MI) || HII->isConstExtended(MI);
   bool Good = true;
 
   if (GlueToNewValueJump) {
@@ -1501,7 +1551,7 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
     if (ExtMI)
       Good = tryAllocateResourcesForConstExt(true);
 
-    bool ExtNvjMI = HII->isExtended(&NvjMI) || HII->isConstExtended(&NvjMI);
+    bool ExtNvjMI = HII->isExtended(NvjMI) || HII->isConstExtended(NvjMI);
     if (Good) {
       if (ResourceTracker->canReserveResources(NvjMI))
         ResourceTracker->reserveResources(NvjMI);
@@ -1535,7 +1585,11 @@ HexagonPacketizerList::addToPacket(MachineInstr &MI) {
   if (ExtMI && !tryAllocateResourcesForConstExt(true)) {
     endPacket(MBB, MI);
     if (PromotedToDotNew)
-      demoteToDotOld(&MI);
+      demoteToDotOld(MI);
+    if (GlueAllocframeStore) {
+      useCalleesSP(MI);
+      GlueAllocframeStore = false;
+    }
     ResourceTracker->reserveResources(MI);
     reserveResourcesForConstExt();
   }
@@ -1551,18 +1605,18 @@ void HexagonPacketizerList::endPacket(MachineBasicBlock *MBB,
 }
 
 bool HexagonPacketizerList::shouldAddToPacket(const MachineInstr &MI) {
-  return !producesStall(&MI);
+  return !producesStall(MI);
 }
 
 
 // Return true when ConsMI uses a register defined by ProdMI.
-static bool isDependent(const MachineInstr *ProdMI,
-      const MachineInstr *ConsMI) {
-  if (!ProdMI->getOperand(0).isReg())
+static bool isDependent(const MachineInstr &ProdMI,
+      const MachineInstr &ConsMI) {
+  if (!ProdMI.getOperand(0).isReg())
     return false;
-  unsigned DstReg = ProdMI->getOperand(0).getReg();
+  unsigned DstReg = ProdMI.getOperand(0).getReg();
 
-  for (auto &Op : ConsMI->operands())
+  for (auto &Op : ConsMI.operands())
     if (Op.isReg() && Op.isUse() && Op.getReg() == DstReg)
       // The MIs depend on each other.
       return true;
@@ -1571,7 +1625,7 @@ static bool isDependent(const MachineInstr *ProdMI,
 }
 
 // V60 forward scheduling.
-bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
+bool HexagonPacketizerList::producesStall(const MachineInstr &I) {
   // Check whether the previous packet is in a different loop. If this is the
   // case, there is little point in trying to avoid a stall because that would
   // favor the rare case (loop entry) over the common case (loop iteration).
@@ -1581,7 +1635,7 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
   // backedge.
   if (!OldPacketMIs.empty()) {
     auto *OldBB = OldPacketMIs.front()->getParent();
-    auto *ThisBB = I->getParent();
+    auto *ThisBB = I.getParent();
     if (MLI->getLoopFor(OldBB) != MLI->getLoopFor(ThisBB))
       return false;
   }
@@ -1589,9 +1643,9 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
   // Check for stall between two vector instructions.
   if (HII->isV60VectorInstruction(I)) {
     for (auto J : OldPacketMIs) {
-      if (!HII->isV60VectorInstruction(J))
+      if (!HII->isV60VectorInstruction(*J))
         continue;
-      if (isDependent(J, I) && !HII->isVecUsableNextPacket(J, I))
+      if (isDependent(*J, I) && !HII->isVecUsableNextPacket(*J, I))
         return true;
     }
     return false;
@@ -1601,17 +1655,17 @@ bool HexagonPacketizerList::producesStall(const MachineInstr *I) {
   // there is no definition of a use in the current packet, because it
   // may be a candidate for .new.
   for (auto J : CurrentPacketMIs)
-    if (!HII->isV60VectorInstruction(J) && isDependent(J, I))
+    if (!HII->isV60VectorInstruction(*J) && isDependent(*J, I))
       return false;
 
   // Check for stall between I and instructions in the previous packet.
   if (MF.getSubtarget<HexagonSubtarget>().useBSBScheduling()) {
     for (auto J : OldPacketMIs) {
-      if (HII->isV60VectorInstruction(J))
+      if (HII->isV60VectorInstruction(*J))
         continue;
-      if (!HII->isLateInstrFeedsEarlyInstr(J, I))
+      if (!HII->isLateInstrFeedsEarlyInstr(*J, I))
         continue;
-      if (isDependent(J, I) && !HII->canExecuteInBundle(J, I))
+      if (isDependent(*J, I) && !HII->canExecuteInBundle(*J, I))
         return true;
     }
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
index 3f8ed5af3540..b28b926ec300 100644
--- a/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVLIWPacketizer.h
@@ -73,42 +73,44 @@ public:
   void unpacketizeSoloInstrs(MachineFunction &MF);
 
 protected:
-  bool isCallDependent(const MachineInstr* MI, SDep::Kind DepType,
+  bool isCallDependent(const MachineInstr &MI, SDep::Kind DepType,
                        unsigned DepReg);
-  bool promoteToDotCur(MachineInstr* MI, SDep::Kind DepType,
+  bool promoteToDotCur(MachineInstr &MI, SDep::Kind DepType,
                        MachineBasicBlock::iterator &MII,
-                       const TargetRegisterClass* RC);
-  bool canPromoteToDotCur(const MachineInstr* MI, const SUnit* PacketSU,
+                       const TargetRegisterClass *RC);
+  bool canPromoteToDotCur(const MachineInstr &MI, const SUnit *PacketSU,
                           unsigned DepReg, MachineBasicBlock::iterator &MII,
-                          const TargetRegisterClass* RC);
+                          const TargetRegisterClass *RC);
   void cleanUpDotCur();
 
-  bool promoteToDotNew(MachineInstr* MI, SDep::Kind DepType,
+  bool promoteToDotNew(MachineInstr &MI, SDep::Kind DepType,
                        MachineBasicBlock::iterator &MII,
-                       const TargetRegisterClass* RC);
-  bool canPromoteToDotNew(const MachineInstr* MI, const SUnit* PacketSU,
+                       const TargetRegisterClass *RC);
+  bool canPromoteToDotNew(const MachineInstr &MI, const SUnit *PacketSU,
                           unsigned DepReg, MachineBasicBlock::iterator &MII,
-                          const TargetRegisterClass* RC);
-  bool canPromoteToNewValue(const MachineInstr* MI, const SUnit* PacketSU,
+                          const TargetRegisterClass *RC);
+  bool canPromoteToNewValue(const MachineInstr &MI, const SUnit *PacketSU,
                             unsigned DepReg, MachineBasicBlock::iterator &MII);
-  bool canPromoteToNewValueStore(const MachineInstr* MI,
-                                 const MachineInstr* PacketMI, unsigned DepReg);
-  bool demoteToDotOld(MachineInstr* MI);
+  bool canPromoteToNewValueStore(const MachineInstr &MI,
+                                 const MachineInstr &PacketMI, unsigned DepReg);
+  bool demoteToDotOld(MachineInstr &MI);
+  bool useCallersSP(MachineInstr &MI);
+  void useCalleesSP(MachineInstr &MI);
   bool arePredicatesComplements(MachineInstr &MI1, MachineInstr &MI2);
-  bool restrictingDepExistInPacket(MachineInstr*, unsigned);
-  bool isNewifiable(const MachineInstr *MI);
-  bool isCurifiable(MachineInstr* MI);
-  bool cannotCoexist(const MachineInstr *MI, const MachineInstr *MJ);
+  bool restrictingDepExistInPacket(MachineInstr&, unsigned);
+  bool isNewifiable(const MachineInstr &MI, const TargetRegisterClass *NewRC);
+  bool isCurifiable(MachineInstr &MI);
+  bool cannotCoexist(const MachineInstr &MI, const MachineInstr &MJ);
   inline bool isPromotedToDotNew() const {
     return PromotedToDotNew;
   }
   bool tryAllocateResourcesForConstExt(bool Reserve);
   bool canReserveResourcesForConstExt();
   void reserveResourcesForConstExt();
-  bool hasDeadDependence(const MachineInstr *I, const MachineInstr *J);
-  bool hasControlDependence(const MachineInstr *I, const MachineInstr *J);
-  bool hasV4SpecificDependence(const MachineInstr *I, const MachineInstr *J);
-  bool producesStall(const MachineInstr *MI);
+  bool hasDeadDependence(const MachineInstr &I, const MachineInstr &J);
+  bool hasControlDependence(const MachineInstr &I, const MachineInstr &J);
+  bool hasV4SpecificDependence(const MachineInstr &I, const MachineInstr &J);
+  bool producesStall(const MachineInstr &MI);
 };
 } // namespace llvm
 #endif // HEXAGONVLIWPACKETIZER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp b/contrib/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
new file mode 100644
index 000000000000..085d4645df06
--- /dev/null
+++ b/contrib/llvm/lib/Target/Hexagon/HexagonVectorPrint.cpp
@@ -0,0 +1,209 @@
+//===-- HexagonVectorPrint.cpp - Generate vector printing instructions -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass adds the capability to generate pseudo vector/predicate register
+// printing instructions. These pseudo instructions should be used with the
+// simulator, NEVER on hardware.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "hexagon-vector-print"
+
+#include "HexagonInstrInfo.h"
+#include "HexagonSubtarget.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+#include <vector>
+
+using namespace llvm;
+
+static cl::opt<bool> TraceHexVectorStoresOnly("trace-hex-vector-stores-only",
+  cl::Hidden, cl::ZeroOrMore, cl::init(false),
+  cl::desc("Enables tracing of vector stores"));
+
+namespace llvm {
+
+  FunctionPass *createHexagonVectorPrint();
+  void initializeHexagonVectorPrintPass(PassRegistry&);
+
+} // end namespace llvm
+
+namespace {
+
+class HexagonVectorPrint : public MachineFunctionPass {
+  const HexagonSubtarget *QST;
+  const HexagonInstrInfo *QII;
+  const HexagonRegisterInfo *QRI;
+
+public:
+  static char ID;
+
+  HexagonVectorPrint()
+      : MachineFunctionPass(ID), QST(nullptr), QII(nullptr), QRI(nullptr) {
+    initializeHexagonVectorPrintPass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "Hexagon VectorPrint pass"; }
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+};
+
+char HexagonVectorPrint::ID = 0;
+
+} // end anonymous namespace
+
+static bool isVecReg(unsigned Reg) {
+  return (Reg >= Hexagon::V0 && Reg <= Hexagon::V31)
+      || (Reg >= Hexagon::W0 && Reg <= Hexagon::W15)
+      || (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3);
+}
+
+static std::string getStringReg(unsigned R) {
+  if (R >= Hexagon::V0 && R <= Hexagon::V31) {
+    static const char* S[] = { "20", "21", "22", "23", "24", "25", "26", "27",
+                        "28", "29", "2a", "2b", "2c", "2d", "2e", "2f",
+                        "30", "31", "32", "33", "34", "35", "36", "37",
+                        "38", "39", "3a", "3b", "3c", "3d", "3e", "3f"};
+    return S[R-Hexagon::V0];
+  }
+  if (R >= Hexagon::Q0 && R <= Hexagon::Q3) {
+    static const char* S[] = { "00", "01", "02", "03"};
+    return S[R-Hexagon::Q0];
+
+  }
+  llvm_unreachable("valid vreg");
+}
+
+static void addAsmInstr(MachineBasicBlock *MBB, unsigned Reg,
+                        MachineBasicBlock::instr_iterator I,
+                        const DebugLoc &DL, const HexagonInstrInfo *QII,
+                        MachineFunction &Fn) {
+
+  std::string VDescStr = ".long 0x1dffe0" + getStringReg(Reg);
+  const char *cstr = Fn.createExternalSymbolName(VDescStr);
+  unsigned ExtraInfo = InlineAsm::Extra_HasSideEffects;
+  BuildMI(*MBB, I, DL, QII->get(TargetOpcode::INLINEASM))
+    .addExternalSymbol(cstr)
+    .addImm(ExtraInfo);
+}
+
+static bool getInstrVecReg(const MachineInstr &MI, unsigned &Reg) {
+  if (MI.getNumOperands() < 1) return false;
+  // Vec load or compute.
+  if (MI.getOperand(0).isReg() && MI.getOperand(0).isDef()) {
+    Reg = MI.getOperand(0).getReg();
+    if (isVecReg(Reg))
+      return !TraceHexVectorStoresOnly;
+  }
+  // Vec store.
+  if (MI.mayStore() && MI.getNumOperands() >= 3 && MI.getOperand(2).isReg()) {
+    Reg = MI.getOperand(2).getReg();
+    if (isVecReg(Reg))
+      return true;
+  }
+  // Vec store post increment.
+  if (MI.mayStore() && MI.getNumOperands() >= 4 && MI.getOperand(3).isReg()) {
+    Reg = MI.getOperand(3).getReg();
+    if (isVecReg(Reg))
+      return true;
+  }
+  return false;
+}
+
+bool HexagonVectorPrint::runOnMachineFunction(MachineFunction &Fn) {
+  bool Changed = false;
+  QST = &Fn.getSubtarget<HexagonSubtarget>();
+  QRI = QST->getRegisterInfo();
+  QII = QST->getInstrInfo();
+  std::vector<MachineInstr *> VecPrintList;
+  for (auto &MBB : Fn)
+    for (auto &MI : MBB) {
+      if (MI.isBundle()) {
+        MachineBasicBlock::instr_iterator MII = MI.getIterator();
+        for (++MII; MII != MBB.instr_end() && MII->isInsideBundle(); ++MII) {
+          if (MII->getNumOperands() < 1)
+            continue;
+          unsigned Reg = 0;
+          if (getInstrVecReg(*MII, Reg)) {
+            VecPrintList.push_back((&*MII));
+            DEBUG(dbgs() << "Found vector reg inside bundle \n"; MII->dump());
+          }
+        }
+      } else {
+        unsigned Reg = 0;
+        if (getInstrVecReg(MI, Reg)) {
+          VecPrintList.push_back(&MI);
+          DEBUG(dbgs() << "Found vector reg \n"; MI.dump());
+        }
+      }
+    }
+
+  Changed = !VecPrintList.empty();
+  if (!Changed)
+    return Changed;
+
+  for (auto *I : VecPrintList) {
+    DebugLoc DL = I->getDebugLoc();
+    MachineBasicBlock *MBB = I->getParent();
+    DEBUG(dbgs() << "Evaluating V MI\n"; I->dump());
+    unsigned Reg = 0;
+    if (!getInstrVecReg(*I, Reg))
+      llvm_unreachable("Need a vector reg");
+    MachineBasicBlock::instr_iterator MII = I->getIterator();
+    if (I->isInsideBundle()) {
+      DEBUG(dbgs() << "add to end of bundle\n"; I->dump());
+      while (MBB->instr_end() != MII && MII->isInsideBundle())
+        MII++;
+    } else {
+      DEBUG(dbgs() << "add after instruction\n"; I->dump());
+      MII++;
+    }
+    if (MBB->instr_end() == MII)
+      continue;
+
+    if (Reg >= Hexagon::V0 && Reg <= Hexagon::V31) {
+      DEBUG(dbgs() << "adding dump for V" << Reg-Hexagon::V0 << '\n');
+      addAsmInstr(MBB, Reg, MII, DL, QII, Fn);
+    } else if (Reg >= Hexagon::W0 && Reg <= Hexagon::W15) {
+      DEBUG(dbgs() << "adding dump for W" << Reg-Hexagon::W0 << '\n');
+      addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2 + 1,
+                  MII, DL, QII, Fn);
+      addAsmInstr(MBB, Hexagon::V0 + (Reg - Hexagon::W0) * 2,
+                   MII, DL, QII, Fn);
+    } else if (Reg >= Hexagon::Q0 && Reg <= Hexagon::Q3) {
+      DEBUG(dbgs() << "adding dump for Q" << Reg-Hexagon::Q0 << '\n');
+      addAsmInstr(MBB, Reg, MII, DL, QII, Fn);
+    } else
+      llvm_unreachable("Bad Vector reg");
+  }
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                         Public Constructor Functions
+//===----------------------------------------------------------------------===//
+INITIALIZE_PASS(HexagonVectorPrint, "hexagon-vector-print",
+  "Hexagon VectorPrint pass", false, false)
+
+FunctionPass *llvm::createHexagonVectorPrint() {
+  return new HexagonVectorPrint();
+}
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index 2898b056a03d..c140bd1d7ee2 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -569,8 +569,8 @@ public:
     if (!Resolved) {
       switch ((unsigned)Fixup.getKind()) {
       case fixup_Hexagon_B22_PCREL:
-      // GetFixupCount assumes B22 won't relax
-      // Fallthrough
+        // GetFixupCount assumes B22 won't relax
+        LLVM_FALLTHROUGH;
       default:
         return false;
         break;
@@ -745,7 +745,8 @@ public:
 namespace llvm {
 MCAsmBackend *createHexagonAsmBackend(Target const &T,
                                       MCRegisterInfo const & /*MRI*/,
-                                      const Triple &TT, StringRef CPU) {
+                                      const Triple &TT, StringRef CPU,
+                                      const MCTargetOptions &Options) {
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
   return new HexagonAsmBackend(T, OSABI, CPU);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
index c63f044b7128..4292f6b3faa4 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h
@@ -41,7 +41,7 @@ namespace HexagonII {
     TypeST      = 6,
     TypeSYSTEM  = 7,
     TypeXTYPE   = 8,
-    TypeMEMOP   = 9,
+    TypeV4LDST  = 9,
     TypeNV      = 10,
     TypeDUPLEX  = 11,
     TypeCOMPOUND = 12,
@@ -92,7 +92,7 @@ namespace HexagonII {
 
   // MemAccessSize is represented as 1+log2(N) where N is size in bits.
   enum class MemAccessSize {
-    NoMemAccess = 0,            // Not a memory acces instruction.
+    NoMemAccess = 0,            // Not a memory access instruction.
     ByteAccess = 1,             // Byte access instruction (memb).
     HalfWordAccess = 2,         // Half word access instruction (memh).
     WordAccess = 3,             // Word access instruction (memw).
@@ -201,9 +201,12 @@ namespace HexagonII {
     AccumulatorPos = 54,
     AccumulatorMask = 0x1,
 
-    // Complex XU, prevent xu competition by prefering slot3
+    // Complex XU, prevent xu competition by preferring slot3
     PrefersSlot3Pos = 55,
     PrefersSlot3Mask = 0x1,
+
+    CofMax1Pos = 60,
+    CofMax1Mask = 0x1
   };
 
   // *** The code above must match HexagonInstrFormat*.td *** //
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
index 9e2c28076432..c619c36164cf 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp
@@ -22,7 +22,6 @@ HexagonMCAsmInfo::HexagonMCAsmInfo(const Triple &TT) {
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
   Data64bitsDirective = nullptr;  // .xword is only supported by V9.
-  ZeroDirective = "\t.skip\t";
   CommentString = "//";
 
   LCOMMDirectiveAlignmentType = LCOMM::ByteAlignment;
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index 39b828d8a03a..2645a17b9bd0 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -78,6 +78,9 @@ void HexagonMCCodeEmitter::encodeInstruction(MCInst const &MI, raw_ostream &OS,
   size_t Last = HexagonMCInstrInfo::bundleSize(HMB) - 1;
   for (auto &I : HexagonMCInstrInfo::bundleInstructions(HMB)) {
     MCInst &HMI = const_cast<MCInst &>(*I.getInst());
+    verifyInstructionPredicates(HMI,
+                                computeAvailableFeatures(STI.getFeatureBits()));
+
     EncodeSingleInstruction(HMI, OS, Fixups, STI,
                             parseBits(Instruction, Last, HMB, HMI),
                             Instruction);
@@ -817,4 +820,5 @@ MCCodeEmitter *llvm::createHexagonMCCodeEmitter(MCInstrInfo const &MII,
   return new HexagonMCCodeEmitter(MII, MCT);
 }
 
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "HexagonGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
index 2a154da26c5d..8e0667d9ac8e 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.h
@@ -63,6 +63,11 @@ public:
   unsigned getMachineOpValue(MCInst const &MI, MCOperand const &MO,
                              SmallVectorImpl<MCFixup> &Fixups,
                              MCSubtargetInfo const &STI) const;
+
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 }; // class HexagonMCCodeEmitter
 
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
index d194bea3d8dc..5feaffe6efb9 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCCompound.cpp
@@ -290,8 +290,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
     CompoundInsn = new (Context) MCInst;
     CompoundInsn->setOpcode(compoundOpcode);
     CompoundInsn->addOperand(Rs);
-    if (Value != -1)
-      CompoundInsn->addOperand(L.getOperand(2));
+    CompoundInsn->addOperand(L.getOperand(2));
     CompoundInsn->addOperand(R.getOperand(1));
     break;
 
@@ -309,8 +308,7 @@ MCInst *getCompoundInsn(MCContext &Context, MCInst const &L, MCInst const &R) {
     CompoundInsn = new (Context) MCInst;
     CompoundInsn->setOpcode(compoundOpcode);
     CompoundInsn->addOperand(Rs);
-    if (Value != -1)
-      CompoundInsn->addOperand(L.getOperand(2));
+    CompoundInsn->addOperand(L.getOperand(2));
     CompoundInsn->addOperand(R.getOperand(1));
     break;
 
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
index 88336217cc8d..413f052aa4bd 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCDuplexInfo.cpp
@@ -27,58 +27,58 @@ using namespace Hexagon;
 
 // pair table of subInstructions with opcodes
 static const std::pair<unsigned, unsigned> opcodeData[] = {
-    std::make_pair((unsigned)V4_SA1_addi, 0),
-    std::make_pair((unsigned)V4_SA1_addrx, 6144),
-    std::make_pair((unsigned)V4_SA1_addsp, 3072),
-    std::make_pair((unsigned)V4_SA1_and1, 4608),
-    std::make_pair((unsigned)V4_SA1_clrf, 6768),
-    std::make_pair((unsigned)V4_SA1_clrfnew, 6736),
-    std::make_pair((unsigned)V4_SA1_clrt, 6752),
-    std::make_pair((unsigned)V4_SA1_clrtnew, 6720),
-    std::make_pair((unsigned)V4_SA1_cmpeqi, 6400),
-    std::make_pair((unsigned)V4_SA1_combine0i, 7168),
-    std::make_pair((unsigned)V4_SA1_combine1i, 7176),
-    std::make_pair((unsigned)V4_SA1_combine2i, 7184),
-    std::make_pair((unsigned)V4_SA1_combine3i, 7192),
-    std::make_pair((unsigned)V4_SA1_combinerz, 7432),
-    std::make_pair((unsigned)V4_SA1_combinezr, 7424),
-    std::make_pair((unsigned)V4_SA1_dec, 4864),
-    std::make_pair((unsigned)V4_SA1_inc, 4352),
-    std::make_pair((unsigned)V4_SA1_seti, 2048),
-    std::make_pair((unsigned)V4_SA1_setin1, 6656),
-    std::make_pair((unsigned)V4_SA1_sxtb, 5376),
-    std::make_pair((unsigned)V4_SA1_sxth, 5120),
-    std::make_pair((unsigned)V4_SA1_tfr, 4096),
-    std::make_pair((unsigned)V4_SA1_zxtb, 5888),
-    std::make_pair((unsigned)V4_SA1_zxth, 5632),
-    std::make_pair((unsigned)V4_SL1_loadri_io, 0),
-    std::make_pair((unsigned)V4_SL1_loadrub_io, 4096),
-    std::make_pair((unsigned)V4_SL2_deallocframe, 7936),
-    std::make_pair((unsigned)V4_SL2_jumpr31, 8128),
-    std::make_pair((unsigned)V4_SL2_jumpr31_f, 8133),
-    std::make_pair((unsigned)V4_SL2_jumpr31_fnew, 8135),
-    std::make_pair((unsigned)V4_SL2_jumpr31_t, 8132),
-    std::make_pair((unsigned)V4_SL2_jumpr31_tnew, 8134),
-    std::make_pair((unsigned)V4_SL2_loadrb_io, 4096),
-    std::make_pair((unsigned)V4_SL2_loadrd_sp, 7680),
-    std::make_pair((unsigned)V4_SL2_loadrh_io, 0),
-    std::make_pair((unsigned)V4_SL2_loadri_sp, 7168),
-    std::make_pair((unsigned)V4_SL2_loadruh_io, 2048),
-    std::make_pair((unsigned)V4_SL2_return, 8000),
-    std::make_pair((unsigned)V4_SL2_return_f, 8005),
-    std::make_pair((unsigned)V4_SL2_return_fnew, 8007),
-    std::make_pair((unsigned)V4_SL2_return_t, 8004),
-    std::make_pair((unsigned)V4_SL2_return_tnew, 8006),
-    std::make_pair((unsigned)V4_SS1_storeb_io, 4096),
-    std::make_pair((unsigned)V4_SS1_storew_io, 0),
-    std::make_pair((unsigned)V4_SS2_allocframe, 7168),
-    std::make_pair((unsigned)V4_SS2_storebi0, 4608),
-    std::make_pair((unsigned)V4_SS2_storebi1, 4864),
-    std::make_pair((unsigned)V4_SS2_stored_sp, 2560),
-    std::make_pair((unsigned)V4_SS2_storeh_io, 0),
-    std::make_pair((unsigned)V4_SS2_storew_sp, 2048),
-    std::make_pair((unsigned)V4_SS2_storewi0, 4096),
-    std::make_pair((unsigned)V4_SS2_storewi1, 4352)};
+    std::make_pair((unsigned)SA1_addi, 0),
+    std::make_pair((unsigned)SA1_addrx, 6144),
+    std::make_pair((unsigned)SA1_addsp, 3072),
+    std::make_pair((unsigned)SA1_and1, 4608),
+    std::make_pair((unsigned)SA1_clrf, 6768),
+    std::make_pair((unsigned)SA1_clrfnew, 6736),
+    std::make_pair((unsigned)SA1_clrt, 6752),
+    std::make_pair((unsigned)SA1_clrtnew, 6720),
+    std::make_pair((unsigned)SA1_cmpeqi, 6400),
+    std::make_pair((unsigned)SA1_combine0i, 7168),
+    std::make_pair((unsigned)SA1_combine1i, 7176),
+    std::make_pair((unsigned)SA1_combine2i, 7184),
+    std::make_pair((unsigned)SA1_combine3i, 7192),
+    std::make_pair((unsigned)SA1_combinerz, 7432),
+    std::make_pair((unsigned)SA1_combinezr, 7424),
+    std::make_pair((unsigned)SA1_dec, 4864),
+    std::make_pair((unsigned)SA1_inc, 4352),
+    std::make_pair((unsigned)SA1_seti, 2048),
+    std::make_pair((unsigned)SA1_setin1, 6656),
+    std::make_pair((unsigned)SA1_sxtb, 5376),
+    std::make_pair((unsigned)SA1_sxth, 5120),
+    std::make_pair((unsigned)SA1_tfr, 4096),
+    std::make_pair((unsigned)SA1_zxtb, 5888),
+    std::make_pair((unsigned)SA1_zxth, 5632),
+    std::make_pair((unsigned)SL1_loadri_io, 0),
+    std::make_pair((unsigned)SL1_loadrub_io, 4096),
+    std::make_pair((unsigned)SL2_deallocframe, 7936),
+    std::make_pair((unsigned)SL2_jumpr31, 8128),
+    std::make_pair((unsigned)SL2_jumpr31_f, 8133),
+    std::make_pair((unsigned)SL2_jumpr31_fnew, 8135),
+    std::make_pair((unsigned)SL2_jumpr31_t, 8132),
+    std::make_pair((unsigned)SL2_jumpr31_tnew, 8134),
+    std::make_pair((unsigned)SL2_loadrb_io, 4096),
+    std::make_pair((unsigned)SL2_loadrd_sp, 7680),
+    std::make_pair((unsigned)SL2_loadrh_io, 0),
+    std::make_pair((unsigned)SL2_loadri_sp, 7168),
+    std::make_pair((unsigned)SL2_loadruh_io, 2048),
+    std::make_pair((unsigned)SL2_return, 8000),
+    std::make_pair((unsigned)SL2_return_f, 8005),
+    std::make_pair((unsigned)SL2_return_fnew, 8007),
+    std::make_pair((unsigned)SL2_return_t, 8004),
+    std::make_pair((unsigned)SL2_return_tnew, 8006),
+    std::make_pair((unsigned)SS1_storeb_io, 4096),
+    std::make_pair((unsigned)SS1_storew_io, 0),
+    std::make_pair((unsigned)SS2_allocframe, 7168),
+    std::make_pair((unsigned)SS2_storebi0, 4608),
+    std::make_pair((unsigned)SS2_storebi1, 4864),
+    std::make_pair((unsigned)SS2_stored_sp, 2560),
+    std::make_pair((unsigned)SS2_storeh_io, 0),
+    std::make_pair((unsigned)SS2_storew_sp, 2048),
+    std::make_pair((unsigned)SS2_storewi0, 4096),
+    std::make_pair((unsigned)SS2_storewi1, 4352)};
 
 bool HexagonMCInstrInfo::isDuplexPairMatch(unsigned Ga, unsigned Gb) {
   switch (Ga) {
@@ -262,25 +262,19 @@ unsigned HexagonMCInstrInfo::getDuplexCandidateGroup(MCInst const &MCI) {
   case Hexagon::EH_RETURN_JMPR:
 
   case Hexagon::J2_jumpr:
-  case Hexagon::JMPret:
     // jumpr r31
     // Actual form JMPR %PC<imp-def>, %R31<imp-use>, %R0<imp-use,internal>.
     DstReg = MCI.getOperand(0).getReg();
-    if (Hexagon::R31 == DstReg) {
+    if (Hexagon::R31 == DstReg)
       return HexagonII::HSIG_L2;
-    }
     break;
 
   case Hexagon::J2_jumprt:
   case Hexagon::J2_jumprf:
   case Hexagon::J2_jumprtnew:
   case Hexagon::J2_jumprfnew:
-  case Hexagon::JMPrett:
-  case Hexagon::JMPretf:
-  case Hexagon::JMPrettnew:
-  case Hexagon::JMPretfnew:
-  case Hexagon::JMPrettnewpt:
-  case Hexagon::JMPretfnewpt:
+  case Hexagon::J2_jumprtnewpt:
+  case Hexagon::J2_jumprfnewpt:
     DstReg = MCI.getOperand(1).getReg();
     SrcReg = MCI.getOperand(0).getReg();
     // [if ([!]p0[.new])] jumpr r31
@@ -679,6 +673,7 @@ inline static void addOps(MCInst &subInstPtr, MCInst const &Inst,
     case Hexagon::D9:
     case Hexagon::D10:
     case Hexagon::D11:
+    case Hexagon::P0:
       subInstPtr.addOperand(Inst.getOperand(opNum));
       break;
     }
@@ -699,54 +694,54 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
     assert(Absolute);(void)Absolute;
     if (Value == 1) {
-      Result.setOpcode(Hexagon::V4_SA1_inc);
+      Result.setOpcode(Hexagon::SA1_inc);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break;
     } //  1,2 SUBInst $Rd = add($Rs, #1)
     else if (Value == -1) {
-      Result.setOpcode(Hexagon::V4_SA1_dec);
+      Result.setOpcode(Hexagon::SA1_dec);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break;
     } //  1,2 SUBInst $Rd = add($Rs,#-1)
     else if (Inst.getOperand(1).getReg() == Hexagon::R29) {
-      Result.setOpcode(Hexagon::V4_SA1_addsp);
+      Result.setOpcode(Hexagon::SA1_addsp);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break;
     } //  1,3 SUBInst $Rd = add(r29, #$u6_2)
     else {
-      Result.setOpcode(Hexagon::V4_SA1_addi);
+      Result.setOpcode(Hexagon::SA1_addi);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       addOps(Result, Inst, 2);
       break;
     } //    1,2,3 SUBInst $Rx = add($Rx, #$s7)
   case Hexagon::A2_add:
-    Result.setOpcode(Hexagon::V4_SA1_addrx);
+    Result.setOpcode(Hexagon::SA1_addrx);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst $Rx = add($_src_, $Rs)
   case Hexagon::S2_allocframe:
-    Result.setOpcode(Hexagon::V4_SS2_allocframe);
+    Result.setOpcode(Hexagon::SS2_allocframe);
     addOps(Result, Inst, 0);
     break; //    1 SUBInst allocframe(#$u5_3)
   case Hexagon::A2_andir:
     if (minConstant(Inst, 2) == 255) {
-      Result.setOpcode(Hexagon::V4_SA1_zxtb);
+      Result.setOpcode(Hexagon::SA1_zxtb);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //    1,2    $Rd = and($Rs, #255)
     } else {
-      Result.setOpcode(Hexagon::V4_SA1_and1);
+      Result.setOpcode(Hexagon::SA1_and1);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //    1,2 SUBInst $Rd = and($Rs, #1)
     }
   case Hexagon::C2_cmpeqi:
-    Result.setOpcode(Hexagon::V4_SA1_cmpeqi);
+    Result.setOpcode(Hexagon::SA1_cmpeqi);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    2,3 SUBInst p0 = cmp.eq($Rs, #$u2)
@@ -755,120 +750,115 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
     assert(Absolute);(void)Absolute;
     if (Value == 1) {
-      Result.setOpcode(Hexagon::V4_SA1_combine1i);
+      Result.setOpcode(Hexagon::SA1_combine1i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#1, #$u2)
     }
     if (Value == 3) {
-      Result.setOpcode(Hexagon::V4_SA1_combine3i);
+      Result.setOpcode(Hexagon::SA1_combine3i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#3, #$u2)
     }
     if (Value == 0) {
-      Result.setOpcode(Hexagon::V4_SA1_combine0i);
+      Result.setOpcode(Hexagon::SA1_combine0i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#0, #$u2)
     }
     if (Value == 2) {
-      Result.setOpcode(Hexagon::V4_SA1_combine2i);
+      Result.setOpcode(Hexagon::SA1_combine2i);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  1,3 SUBInst $Rdd = combine(#2, #$u2)
     }
   case Hexagon::A4_combineir:
-    Result.setOpcode(Hexagon::V4_SA1_combinezr);
+    Result.setOpcode(Hexagon::SA1_combinezr);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 2);
     break; //    1,3 SUBInst $Rdd = combine(#0, $Rs)
 
   case Hexagon::A4_combineri:
-    Result.setOpcode(Hexagon::V4_SA1_combinerz);
+    Result.setOpcode(Hexagon::SA1_combinerz);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     break; //    1,2 SUBInst $Rdd = combine($Rs, #0)
   case Hexagon::L4_return_tnew_pnt:
   case Hexagon::L4_return_tnew_pt:
-    Result.setOpcode(Hexagon::V4_SL2_return_tnew);
+    Result.setOpcode(Hexagon::SL2_return_tnew);
     break; //    none  SUBInst if (p0.new) dealloc_return:nt
   case Hexagon::L4_return_fnew_pnt:
   case Hexagon::L4_return_fnew_pt:
-    Result.setOpcode(Hexagon::V4_SL2_return_fnew);
+    Result.setOpcode(Hexagon::SL2_return_fnew);
     break; //    none  SUBInst if (!p0.new) dealloc_return:nt
   case Hexagon::L4_return_f:
-    Result.setOpcode(Hexagon::V4_SL2_return_f);
+    Result.setOpcode(Hexagon::SL2_return_f);
     break; //    none  SUBInst if (!p0) dealloc_return
   case Hexagon::L4_return_t:
-    Result.setOpcode(Hexagon::V4_SL2_return_t);
+    Result.setOpcode(Hexagon::SL2_return_t);
     break; //    none  SUBInst if (p0) dealloc_return
   case Hexagon::L4_return:
-    Result.setOpcode(Hexagon::V4_SL2_return);
+    Result.setOpcode(Hexagon::SL2_return);
     break; //    none  SUBInst dealloc_return
   case Hexagon::L2_deallocframe:
-    Result.setOpcode(Hexagon::V4_SL2_deallocframe);
+    Result.setOpcode(Hexagon::SL2_deallocframe);
     break; //    none  SUBInst deallocframe
   case Hexagon::EH_RETURN_JMPR:
   case Hexagon::J2_jumpr:
-  case Hexagon::JMPret:
-    Result.setOpcode(Hexagon::V4_SL2_jumpr31);
+    Result.setOpcode(Hexagon::SL2_jumpr31);
     break; //    none  SUBInst jumpr r31
   case Hexagon::J2_jumprf:
-  case Hexagon::JMPretf:
-    Result.setOpcode(Hexagon::V4_SL2_jumpr31_f);
+    Result.setOpcode(Hexagon::SL2_jumpr31_f);
     break; //    none  SUBInst if (!p0) jumpr r31
   case Hexagon::J2_jumprfnew:
-  case Hexagon::JMPretfnewpt:
-  case Hexagon::JMPretfnew:
-    Result.setOpcode(Hexagon::V4_SL2_jumpr31_fnew);
+  case Hexagon::J2_jumprfnewpt:
+    Result.setOpcode(Hexagon::SL2_jumpr31_fnew);
     break; //    none  SUBInst if (!p0.new) jumpr:nt r31
   case Hexagon::J2_jumprt:
-  case Hexagon::JMPrett:
-    Result.setOpcode(Hexagon::V4_SL2_jumpr31_t);
+    Result.setOpcode(Hexagon::SL2_jumpr31_t);
     break; //    none  SUBInst if (p0) jumpr r31
   case Hexagon::J2_jumprtnew:
-  case Hexagon::JMPrettnewpt:
-  case Hexagon::JMPrettnew:
-    Result.setOpcode(Hexagon::V4_SL2_jumpr31_tnew);
+  case Hexagon::J2_jumprtnewpt:
+    Result.setOpcode(Hexagon::SL2_jumpr31_tnew);
     break; //    none  SUBInst if (p0.new) jumpr:nt r31
   case Hexagon::L2_loadrb_io:
-    Result.setOpcode(Hexagon::V4_SL2_loadrb_io);
+    Result.setOpcode(Hexagon::SL2_loadrb_io);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst $Rd = memb($Rs + #$u3_0)
   case Hexagon::L2_loadrd_io:
-    Result.setOpcode(Hexagon::V4_SL2_loadrd_sp);
+    Result.setOpcode(Hexagon::SL2_loadrd_sp);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 2);
     break; //    1,3 SUBInst $Rdd = memd(r29 + #$u5_3)
   case Hexagon::L2_loadrh_io:
-    Result.setOpcode(Hexagon::V4_SL2_loadrh_io);
+    Result.setOpcode(Hexagon::SL2_loadrh_io);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst $Rd = memh($Rs + #$u3_1)
   case Hexagon::L2_loadrub_io:
-    Result.setOpcode(Hexagon::V4_SL1_loadrub_io);
+    Result.setOpcode(Hexagon::SL1_loadrub_io);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst $Rd = memub($Rs + #$u4_0)
   case Hexagon::L2_loadruh_io:
-    Result.setOpcode(Hexagon::V4_SL2_loadruh_io);
+    Result.setOpcode(Hexagon::SL2_loadruh_io);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst $Rd = memuh($Rs + #$u3_1)
   case Hexagon::L2_loadri_io:
     if (Inst.getOperand(1).getReg() == Hexagon::R29) {
-      Result.setOpcode(Hexagon::V4_SL2_loadri_sp);
+      Result.setOpcode(Hexagon::SL2_loadri_sp);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 2);
       break; //  2 1,3 SUBInst $Rd = memw(r29 + #$u5_2)
     } else {
-      Result.setOpcode(Hexagon::V4_SL1_loadri_io);
+      Result.setOpcode(Hexagon::SL1_loadri_io);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       addOps(Result, Inst, 2);
@@ -878,29 +868,29 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
     assert(Absolute);(void)Absolute;
     if (Value == 0) {
-      Result.setOpcode(Hexagon::V4_SS2_storebi0);
+      Result.setOpcode(Hexagon::SS2_storebi0);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //    1,2 SUBInst memb($Rs + #$u4_0)=#0
     } else if (Value == 1) {
-      Result.setOpcode(Hexagon::V4_SS2_storebi1);
+      Result.setOpcode(Hexagon::SS2_storebi1);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //  2 1,2 SUBInst memb($Rs + #$u4_0)=#1
     }
   case Hexagon::S2_storerb_io:
-    Result.setOpcode(Hexagon::V4_SS1_storeb_io);
+    Result.setOpcode(Hexagon::SS1_storeb_io);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    1,2,3 SUBInst memb($Rs + #$u4_0) = $Rt
   case Hexagon::S2_storerd_io:
-    Result.setOpcode(Hexagon::V4_SS2_stored_sp);
+    Result.setOpcode(Hexagon::SS2_stored_sp);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
     break; //    2,3 SUBInst memd(r29 + #$s6_3) = $Rtt
   case Hexagon::S2_storerh_io:
-    Result.setOpcode(Hexagon::V4_SS2_storeh_io);
+    Result.setOpcode(Hexagon::SS2_storeh_io);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     addOps(Result, Inst, 2);
@@ -909,84 +899,88 @@ MCInst HexagonMCInstrInfo::deriveSubInst(MCInst const &Inst) {
     Absolute = Inst.getOperand(2).getExpr()->evaluateAsAbsolute(Value);
     assert(Absolute);(void)Absolute;
     if (Value == 0) {
-      Result.setOpcode(Hexagon::V4_SS2_storewi0);
+      Result.setOpcode(Hexagon::SS2_storewi0);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //  3 1,2 SUBInst memw($Rs + #$u4_2)=#0
     } else if (Value == 1) {
-      Result.setOpcode(Hexagon::V4_SS2_storewi1);
+      Result.setOpcode(Hexagon::SS2_storewi1);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //  3 1,2 SUBInst memw($Rs + #$u4_2)=#1
     } else if (Inst.getOperand(0).getReg() == Hexagon::R29) {
-      Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+      Result.setOpcode(Hexagon::SS2_storew_sp);
       addOps(Result, Inst, 1);
       addOps(Result, Inst, 2);
       break; //  1 2,3 SUBInst memw(r29 + #$u5_2) = $Rt
     }
   case Hexagon::S2_storeri_io:
     if (Inst.getOperand(0).getReg() == Hexagon::R29) {
-      Result.setOpcode(Hexagon::V4_SS2_storew_sp);
+      Result.setOpcode(Hexagon::SS2_storew_sp);
       addOps(Result, Inst, 1);
       addOps(Result, Inst, 2); //  1,2,3 SUBInst memw(sp + #$u5_2) = $Rt
     } else {
-      Result.setOpcode(Hexagon::V4_SS1_storew_io);
+      Result.setOpcode(Hexagon::SS1_storew_io);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       addOps(Result, Inst, 2); //  1,2,3 SUBInst memw($Rs + #$u4_2) = $Rt
     }
     break;
   case Hexagon::A2_sxtb:
-    Result.setOpcode(Hexagon::V4_SA1_sxtb);
+    Result.setOpcode(Hexagon::SA1_sxtb);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     break; //  1,2 SUBInst $Rd = sxtb($Rs)
   case Hexagon::A2_sxth:
-    Result.setOpcode(Hexagon::V4_SA1_sxth);
+    Result.setOpcode(Hexagon::SA1_sxth);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     break; //  1,2 SUBInst $Rd = sxth($Rs)
   case Hexagon::A2_tfr:
-    Result.setOpcode(Hexagon::V4_SA1_tfr);
+    Result.setOpcode(Hexagon::SA1_tfr);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     break; //  1,2 SUBInst $Rd = $Rs
   case Hexagon::C2_cmovenewif:
-    Result.setOpcode(Hexagon::V4_SA1_clrfnew);
+    Result.setOpcode(Hexagon::SA1_clrfnew);
     addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
     break; //  2 SUBInst if (!p0.new) $Rd = #0
   case Hexagon::C2_cmovenewit:
-    Result.setOpcode(Hexagon::V4_SA1_clrtnew);
+    Result.setOpcode(Hexagon::SA1_clrtnew);
     addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
     break; //  2 SUBInst if (p0.new) $Rd = #0
   case Hexagon::C2_cmoveif:
-    Result.setOpcode(Hexagon::V4_SA1_clrf);
+    Result.setOpcode(Hexagon::SA1_clrf);
     addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
     break; //  2 SUBInst if (!p0) $Rd = #0
   case Hexagon::C2_cmoveit:
-    Result.setOpcode(Hexagon::V4_SA1_clrt);
+    Result.setOpcode(Hexagon::SA1_clrt);
     addOps(Result, Inst, 0);
+    addOps(Result, Inst, 1);
     break; //  2 SUBInst if (p0) $Rd = #0
   case Hexagon::A2_tfrsi:
     Absolute = Inst.getOperand(1).getExpr()->evaluateAsAbsolute(Value);
     if (Absolute && Value == -1) {
-      Result.setOpcode(Hexagon::V4_SA1_setin1);
+      Result.setOpcode(Hexagon::SA1_setin1);
       addOps(Result, Inst, 0);
       break; //  2 1 SUBInst $Rd = #-1
     } else {
-      Result.setOpcode(Hexagon::V4_SA1_seti);
+      Result.setOpcode(Hexagon::SA1_seti);
       addOps(Result, Inst, 0);
       addOps(Result, Inst, 1);
       break; //    1,2 SUBInst $Rd = #$u6
     }
   case Hexagon::A2_zxtb:
-    Result.setOpcode(Hexagon::V4_SA1_zxtb);
+    Result.setOpcode(Hexagon::SA1_zxtb);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     break; //    1,2    $Rd = and($Rs, #255)
 
   case Hexagon::A2_zxth:
-    Result.setOpcode(Hexagon::V4_SA1_zxth);
+    Result.setOpcode(Hexagon::SA1_zxth);
     addOps(Result, Inst, 0);
     addOps(Result, Inst, 1);
     break; //    1,2 SUBInst $Rd = zxth($Rs)
@@ -1022,7 +1016,7 @@ HexagonMCInstrInfo::getDuplexPossibilties(MCInstrInfo const &MCII,
                   k = j + distance;
          (j < numInstrInPacket) && (k < numInstrInPacket); ++j, ++k) {
 
-      // Check if reversable.
+      // Check if reversible.
       bool bisReversable = true;
       if (isStoreInst(MCB.getOperand(j).getInst()->getOpcode()) &&
           isStoreInst(MCB.getOperand(k).getInst()->getOpcode())) {
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
index 67dcb8fea739..226470cfbced 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.cpp
@@ -13,20 +13,27 @@
 //===----------------------------------------------------------------------===//
 #define DEBUG_TYPE "hexagonmcelfstreamer"
 
-#include "Hexagon.h"
-#include "HexagonMCELFStreamer.h"
-#include "MCTargetDesc/HexagonBaseInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCShuffler.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCAssembler.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCObjectStreamer.h"
+#include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+#include <cstdint>
 
 using namespace llvm;
 
@@ -148,8 +155,10 @@ void HexagonMCELFStreamer::HexagonMCEmitLocalCommonSymbol(
 }
 
 namespace llvm {
+
 MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
                                      raw_pwrite_stream &OS, MCCodeEmitter *CE) {
   return new HexagonMCELFStreamer(Context, MAB, OS, CE);
 }
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
index d77c0cd16b37..0ac1a68d4ef9 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCELFStreamer.h
@@ -7,14 +7,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef HEXAGONMCELFSTREAMER_H
-#define HEXAGONMCELFSTREAMER_H
+#ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCELFSTREAMER_H
+#define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCELFSTREAMER_H
 
-#include "MCTargetDesc/HexagonMCCodeEmitter.h"
-#include "MCTargetDesc/HexagonMCInstrInfo.h"
 #include "MCTargetDesc/HexagonMCTargetDesc.h"
 #include "llvm/MC/MCELFStreamer.h"
-#include "HexagonTargetStreamer.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include <cstdint>
+#include <memory>
 
 namespace llvm {
 
@@ -27,8 +27,7 @@ public:
       : MCELFStreamer(Context, TAB, OS, Emitter),
         MCII(createHexagonMCInstrInfo()) {}
 
-  virtual void EmitInstruction(const MCInst &Inst,
-                               const MCSubtargetInfo &STI) override;
+  void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;
   void EmitSymbol(const MCInst &Inst);
   void HexagonMCEmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
                                       unsigned ByteAlignment,
@@ -40,6 +39,6 @@ public:
 MCStreamer *createHexagonELFStreamer(MCContext &Context, MCAsmBackend &MAB,
                                      raw_pwrite_stream &OS, MCCodeEmitter *CE);
 
-} // namespace llvm
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCELFSTREAMER_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
index 941cbd6dc35d..e627f026c8ad 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp
@@ -306,7 +306,7 @@ int HexagonMCInstrInfo::getMinValue(MCInstrInfo const &MCII,
     return 0;
 }
 
-char const *HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
+StringRef HexagonMCInstrInfo::getName(MCInstrInfo const &MCII,
                                         MCInst const &MCI) {
   return MCII.getName(MCI.getOpcode());
 }
@@ -431,6 +431,11 @@ bool HexagonMCInstrInfo::isCanon(MCInstrInfo const &MCII, MCInst const &MCI) {
           HexagonMCInstrInfo::getType(MCII, MCI) != HexagonII::TypeENDLOOP);
 }
 
+bool HexagonMCInstrInfo::isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI) {
+  const uint64_t F = HexagonMCInstrInfo::getDesc(MCII, MCI).TSFlags;
+  return ((F >> HexagonII::CofMax1Pos) & HexagonII::CofMax1Mask);
+}
+
 bool HexagonMCInstrInfo::isCompound(MCInstrInfo const &MCII,
                                     MCInst const &MCI) {
   return (getType(MCII, MCI) == HexagonII::TypeCOMPOUND);
@@ -602,58 +607,58 @@ bool HexagonMCInstrInfo::isSubInstruction(MCInst const &MCI) {
   switch (MCI.getOpcode()) {
   default:
     return false;
-  case Hexagon::V4_SA1_addi:
-  case Hexagon::V4_SA1_addrx:
-  case Hexagon::V4_SA1_addsp:
-  case Hexagon::V4_SA1_and1:
-  case Hexagon::V4_SA1_clrf:
-  case Hexagon::V4_SA1_clrfnew:
-  case Hexagon::V4_SA1_clrt:
-  case Hexagon::V4_SA1_clrtnew:
-  case Hexagon::V4_SA1_cmpeqi:
-  case Hexagon::V4_SA1_combine0i:
-  case Hexagon::V4_SA1_combine1i:
-  case Hexagon::V4_SA1_combine2i:
-  case Hexagon::V4_SA1_combine3i:
-  case Hexagon::V4_SA1_combinerz:
-  case Hexagon::V4_SA1_combinezr:
-  case Hexagon::V4_SA1_dec:
-  case Hexagon::V4_SA1_inc:
-  case Hexagon::V4_SA1_seti:
-  case Hexagon::V4_SA1_setin1:
-  case Hexagon::V4_SA1_sxtb:
-  case Hexagon::V4_SA1_sxth:
-  case Hexagon::V4_SA1_tfr:
-  case Hexagon::V4_SA1_zxtb:
-  case Hexagon::V4_SA1_zxth:
-  case Hexagon::V4_SL1_loadri_io:
-  case Hexagon::V4_SL1_loadrub_io:
-  case Hexagon::V4_SL2_deallocframe:
-  case Hexagon::V4_SL2_jumpr31:
-  case Hexagon::V4_SL2_jumpr31_f:
-  case Hexagon::V4_SL2_jumpr31_fnew:
-  case Hexagon::V4_SL2_jumpr31_t:
-  case Hexagon::V4_SL2_jumpr31_tnew:
-  case Hexagon::V4_SL2_loadrb_io:
-  case Hexagon::V4_SL2_loadrd_sp:
-  case Hexagon::V4_SL2_loadrh_io:
-  case Hexagon::V4_SL2_loadri_sp:
-  case Hexagon::V4_SL2_loadruh_io:
-  case Hexagon::V4_SL2_return:
-  case Hexagon::V4_SL2_return_f:
-  case Hexagon::V4_SL2_return_fnew:
-  case Hexagon::V4_SL2_return_t:
-  case Hexagon::V4_SL2_return_tnew:
-  case Hexagon::V4_SS1_storeb_io:
-  case Hexagon::V4_SS1_storew_io:
-  case Hexagon::V4_SS2_allocframe:
-  case Hexagon::V4_SS2_storebi0:
-  case Hexagon::V4_SS2_storebi1:
-  case Hexagon::V4_SS2_stored_sp:
-  case Hexagon::V4_SS2_storeh_io:
-  case Hexagon::V4_SS2_storew_sp:
-  case Hexagon::V4_SS2_storewi0:
-  case Hexagon::V4_SS2_storewi1:
+  case Hexagon::SA1_addi:
+  case Hexagon::SA1_addrx:
+  case Hexagon::SA1_addsp:
+  case Hexagon::SA1_and1:
+  case Hexagon::SA1_clrf:
+  case Hexagon::SA1_clrfnew:
+  case Hexagon::SA1_clrt:
+  case Hexagon::SA1_clrtnew:
+  case Hexagon::SA1_cmpeqi:
+  case Hexagon::SA1_combine0i:
+  case Hexagon::SA1_combine1i:
+  case Hexagon::SA1_combine2i:
+  case Hexagon::SA1_combine3i:
+  case Hexagon::SA1_combinerz:
+  case Hexagon::SA1_combinezr:
+  case Hexagon::SA1_dec:
+  case Hexagon::SA1_inc:
+  case Hexagon::SA1_seti:
+  case Hexagon::SA1_setin1:
+  case Hexagon::SA1_sxtb:
+  case Hexagon::SA1_sxth:
+  case Hexagon::SA1_tfr:
+  case Hexagon::SA1_zxtb:
+  case Hexagon::SA1_zxth:
+  case Hexagon::SL1_loadri_io:
+  case Hexagon::SL1_loadrub_io:
+  case Hexagon::SL2_deallocframe:
+  case Hexagon::SL2_jumpr31:
+  case Hexagon::SL2_jumpr31_f:
+  case Hexagon::SL2_jumpr31_fnew:
+  case Hexagon::SL2_jumpr31_t:
+  case Hexagon::SL2_jumpr31_tnew:
+  case Hexagon::SL2_loadrb_io:
+  case Hexagon::SL2_loadrd_sp:
+  case Hexagon::SL2_loadrh_io:
+  case Hexagon::SL2_loadri_sp:
+  case Hexagon::SL2_loadruh_io:
+  case Hexagon::SL2_return:
+  case Hexagon::SL2_return_f:
+  case Hexagon::SL2_return_fnew:
+  case Hexagon::SL2_return_t:
+  case Hexagon::SL2_return_tnew:
+  case Hexagon::SS1_storeb_io:
+  case Hexagon::SS1_storew_io:
+  case Hexagon::SS2_allocframe:
+  case Hexagon::SS2_storebi0:
+  case Hexagon::SS2_storebi1:
+  case Hexagon::SS2_stored_sp:
+  case Hexagon::SS2_storeh_io:
+  case Hexagon::SS2_storew_sp:
+  case Hexagon::SS2_storewi0:
+  case Hexagon::SS2_storewi1:
     return true;
   }
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
index 58a8f68b9847..d701c3ade69e 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.h
@@ -133,7 +133,7 @@ int getMaxValue(MCInstrInfo const &MCII, MCInst const &MCI);
 int getMinValue(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return instruction name
-char const *getName(MCInstrInfo const &MCII, MCInst const &MCI);
+StringRef getName(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return the operand index for the new value.
 unsigned short getNewValueOp(MCInstrInfo const &MCII, MCInst const &MCI);
@@ -170,6 +170,7 @@ bool isBundle(MCInst const &MCI);
 
 // Return whether the insn is an actual insn.
 bool isCanon(MCInstrInfo const &MCII, MCInst const &MCI);
+bool isCofMax1(MCInstrInfo const &MCII, MCInst const &MCI);
 bool isCompound(MCInstrInfo const &MCII, MCInst const &MCI);
 
 // Return the duplex iclass given the two duplex classes
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
index 35a1a23a8892..694cf582f8d9 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp
@@ -11,22 +11,29 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "HexagonMCTargetDesc.h"
 #include "Hexagon.h"
-#include "HexagonMCAsmInfo.h"
-#include "HexagonMCELFStreamer.h"
+#include "HexagonTargetStreamer.h"
 #include "MCTargetDesc/HexagonInstPrinter.h"
+#include "MCTargetDesc/HexagonMCAsmInfo.h"
+#include "MCTargetDesc/HexagonMCELFStreamer.h"
+#include "MCTargetDesc/HexagonMCInstrInfo.h"
+#include "MCTargetDesc/HexagonMCTargetDesc.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDwarf.h"
 #include "llvm/MC/MCELFStreamer.h"
 #include "llvm/MC/MCInstrInfo.h"
-#include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSubtargetInfo.h"
-#include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/TargetRegistry.h"
+#include <cassert>
+#include <cstdint>
+#include <new>
+#include <string>
 
 using namespace llvm;
 
@@ -59,7 +66,6 @@ static cl::opt<bool> HexagonV55ArchVariant("mv55", cl::Hidden, cl::init(false),
 static cl::opt<bool> HexagonV60ArchVariant("mv60", cl::Hidden, cl::init(false),
   cl::desc("Build for Hexagon V60"));
 
-
 static StringRef DefaultArch = "hexagonv60";
 
 static StringRef HexagonGetArchVariant() {
@@ -74,7 +80,7 @@ static StringRef HexagonGetArchVariant() {
   return "";
 }
 
-StringRef HEXAGON_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
+StringRef Hexagon_MC::selectHexagonCPU(const Triple &TT, StringRef CPU) {
   StringRef ArchV = HexagonGetArchVariant();
   if (!ArchV.empty() && !CPU.empty()) {
     if (ArchV != CPU)
@@ -103,17 +109,19 @@ static MCRegisterInfo *createHexagonMCRegisterInfo(const Triple &TT) {
 
 static MCSubtargetInfo *
 createHexagonMCSubtargetInfo(const Triple &TT, StringRef CPU, StringRef FS) {
-  CPU = HEXAGON_MC::selectHexagonCPU(TT, CPU);
+  CPU = Hexagon_MC::selectHexagonCPU(TT, CPU);
   return createHexagonMCSubtargetInfoImpl(TT, CPU, FS);
 }
 
 namespace {
+
 class HexagonTargetAsmStreamer : public HexagonTargetStreamer {
 public:
   HexagonTargetAsmStreamer(MCStreamer &S,
                            formatted_raw_ostream &, bool,
                            MCInstPrinter &)
       : HexagonTargetStreamer(S) {}
+
   void prettyPrintAsm(MCInstPrinter &InstPrinter, raw_ostream &OS,
                       const MCInst &Inst, const MCSubtargetInfo &STI) override {
     assert(HexagonMCInstrInfo::isBundle(Inst));
@@ -145,14 +153,9 @@ public:
     OS << "\t}" << PacketBundle.second;
   }
 };
-}
 
-namespace {
 class HexagonTargetELFStreamer : public HexagonTargetStreamer {
 public:
-  MCELFStreamer &getStreamer() {
-    return static_cast<MCELFStreamer &>(Streamer);
-  }
   HexagonTargetELFStreamer(MCStreamer &S, MCSubtargetInfo const &STI)
       : HexagonTargetStreamer(S) {
     auto Bits = STI.getFeatureBits();
@@ -167,6 +170,11 @@ public:
       Flags = ELF::EF_HEXAGON_MACH_V4;
     getStreamer().getAssembler().setELFHeaderEFlags(Flags);
   }
+
+  MCELFStreamer &getStreamer() {
+    return static_cast<MCELFStreamer &>(Streamer);
+  }
+
   void EmitCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
                               unsigned ByteAlignment,
                               unsigned AccessSize) override {
@@ -175,6 +183,7 @@ public:
     HexagonELFStreamer.HexagonMCEmitCommonSymbol(Symbol, Size, ByteAlignment,
                                                  AccessSize);
   }
+
   void EmitLocalCommonSymbolSorted(MCSymbol *Symbol, uint64_t Size,
                                    unsigned ByteAlignment,
                                    unsigned AccessSize) override {
@@ -184,7 +193,8 @@ public:
         Symbol, Size, ByteAlignment, AccessSize);
   }
 };
-}
+
+} // end anonymous namespace
 
 static MCAsmInfo *createHexagonMCAsmInfo(const MCRegisterInfo &MRI,
                                          const Triple &TT) {
@@ -230,39 +240,39 @@ createHexagonObjectTargetStreamer(MCStreamer &S, MCSubtargetInfo const &STI) {
 // Force static initialization.
 extern "C" void LLVMInitializeHexagonTargetMC() {
   // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheHexagonTarget, createHexagonMCAsmInfo);
+  RegisterMCAsmInfoFn X(getTheHexagonTarget(), createHexagonMCAsmInfo);
 
   // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheHexagonTarget,
+  TargetRegistry::RegisterMCInstrInfo(getTheHexagonTarget(),
                                       createHexagonMCInstrInfo);
 
   // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheHexagonTarget,
+  TargetRegistry::RegisterMCRegInfo(getTheHexagonTarget(),
                                     createHexagonMCRegisterInfo);
 
   // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheHexagonTarget,
+  TargetRegistry::RegisterMCSubtargetInfo(getTheHexagonTarget(),
                                           createHexagonMCSubtargetInfo);
 
   // Register the MC Code Emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheHexagonTarget,
+  TargetRegistry::RegisterMCCodeEmitter(getTheHexagonTarget(),
                                         createHexagonMCCodeEmitter);
 
   // Register the asm backend
-  TargetRegistry::RegisterMCAsmBackend(TheHexagonTarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheHexagonTarget(),
                                        createHexagonAsmBackend);
 
   // Register the obj streamer
-  TargetRegistry::RegisterELFStreamer(TheHexagonTarget, createMCStreamer);
+  TargetRegistry::RegisterELFStreamer(getTheHexagonTarget(), createMCStreamer);
 
   // Register the asm streamer
-  TargetRegistry::RegisterAsmTargetStreamer(TheHexagonTarget,
+  TargetRegistry::RegisterAsmTargetStreamer(getTheHexagonTarget(),
                                             createMCAsmTargetStreamer);
 
   // Register the MC Inst Printer
-  TargetRegistry::RegisterMCInstPrinter(TheHexagonTarget,
+  TargetRegistry::RegisterMCInstPrinter(getTheHexagonTarget(),
                                         createHexagonMCInstPrinter);
 
   TargetRegistry::RegisterObjectTargetStreamer(
-      TheHexagonTarget, createHexagonObjectTargetStreamer);
+      getTheHexagonTarget(), createHexagonObjectTargetStreamer);
 }
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
index a005a014416b..6e677e9d9f86 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.h
@@ -14,11 +14,11 @@
 #ifndef LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
 #define LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
 
-#include <cstdint>
-
 #include "llvm/Support/CommandLine.h"
+#include <cstdint>
 
 namespace llvm {
+
 struct InstrItinerary;
 struct InstrStage;
 class MCAsmBackend;
@@ -28,13 +28,14 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class Target;
 class Triple;
 class StringRef;
 class raw_ostream;
 class raw_pwrite_stream;
 
-extern Target TheHexagonTarget;
+Target &getTheHexagonTarget();
 extern cl::opt<bool> HexagonDisableCompound;
 extern cl::opt<bool> HexagonDisableDuplex;
 extern const InstrStage HexagonStages[];
@@ -47,16 +48,19 @@ MCCodeEmitter *createHexagonMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createHexagonAsmBackend(const Target &T,
                                       const MCRegisterInfo &MRI,
-                                      const Triple &TT, StringRef CPU);
+                                      const Triple &TT, StringRef CPU,
+                                      const MCTargetOptions &Options);
 
 MCObjectWriter *createHexagonELFObjectWriter(raw_pwrite_stream &OS,
                                              uint8_t OSABI, StringRef CPU);
 
-namespace HEXAGON_MC {
+namespace Hexagon_MC {
+
   StringRef selectHexagonCPU(const Triple &TT, StringRef CPU);
-}
 
-} // End llvm namespace
+} // end namespace Hexagon_MC
+
+} // end namespace llvm
 
 // Define symbolic names for Hexagon registers.  This defines a mapping from
 // register name to register number.
@@ -72,4 +76,4 @@ namespace HEXAGON_MC {
 #define GET_SUBTARGETINFO_ENUM
 #include "HexagonGenSubtargetInfo.inc"
 
-#endif
+#endif // LLVM_LIB_TARGET_HEXAGON_MCTARGETDESC_HEXAGONMCTARGETDESC_H
diff --git a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 4e1cce3bd7d1..88f37d620dcf 100644
--- a/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -171,7 +171,7 @@ bool HexagonShuffler::check() {
   unsigned slotJump = slotFirstJump;
   unsigned slotLoadStore = slotFirstLoadStore;
   // Number of branches, solo branches, indirect branches.
-  unsigned jumps = 0, jump1 = 0, jumpr = 0;
+  unsigned jumps = 0, jump1 = 0;
   // Number of memory operations, loads, solo loads, stores, solo stores, single
   // stores.
   unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0;
@@ -207,6 +207,8 @@ bool HexagonShuffler::check() {
       ++pSlot3Cnt;
       slot3ISJ = ISJ;
     }
+    if (HexagonMCInstrInfo::isCofMax1(MCII, *ID))
+      ++jump1;
 
     switch (HexagonMCInstrInfo::getType(MCII, *ID)) {
     case HexagonII::TypeXTYPE:
@@ -214,8 +216,6 @@ bool HexagonShuffler::check() {
         ++xtypeFloat;
       break;
     case HexagonII::TypeJR:
-      ++jumpr;
-    // Fall-through.
     case HexagonII::TypeJ:
       ++jumps;
       break;
@@ -244,7 +244,7 @@ bool HexagonShuffler::check() {
       if (ISJ->Core.getUnits() == slotSingleStore)
         ++store0;
       break;
-    case HexagonII::TypeMEMOP:
+    case HexagonII::TypeV4LDST:
       ++loads;
       ++stores;
       ++store1;
@@ -304,7 +304,7 @@ bool HexagonShuffler::check() {
     if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() ||
         HexagonMCInstrInfo::getDesc(MCII, *ID).isCall())
       if (jumps > 1) {
-        if (jumpr || slotJump < slotLastJump) {
+        if (slotJump < slotLastJump) {
           // Error if indirect branch with another branch or
           // no more slots available for branches.
           Error = SHUFFLE_ERROR_BRANCHES;
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
index 61a83dada218..392871628d98 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.cpp
@@ -32,43 +32,19 @@ bool CopyPropagation::interpretAsCopy(const MachineInstr *MI, EqualityMap &EM) {
     case TargetOpcode::COPY: {
       const MachineOperand &Dst = MI->getOperand(0);
       const MachineOperand &Src = MI->getOperand(1);
-      RegisterRef DstR = { Dst.getReg(), Dst.getSubReg() };
-      RegisterRef SrcR = { Src.getReg(), Src.getSubReg() };
-      if (TargetRegisterInfo::isVirtualRegister(DstR.Reg)) {
-        if (!TargetRegisterInfo::isVirtualRegister(SrcR.Reg))
-          return false;
-        MachineRegisterInfo &MRI = DFG.getMF().getRegInfo();
-        if (MRI.getRegClass(DstR.Reg) != MRI.getRegClass(SrcR.Reg))
-          return false;
-      } else if (TargetRegisterInfo::isPhysicalRegister(DstR.Reg)) {
-        if (!TargetRegisterInfo::isPhysicalRegister(SrcR.Reg))
-          return false;
-        const TargetRegisterInfo &TRI = DFG.getTRI();
-        if (TRI.getMinimalPhysRegClass(DstR.Reg) !=
-            TRI.getMinimalPhysRegClass(SrcR.Reg))
-          return false;
-      } else {
-        // Copy between some unknown objects.
+      RegisterRef DstR = DFG.makeRegRef(Dst.getReg(), Dst.getSubReg());
+      RegisterRef SrcR = DFG.makeRegRef(Src.getReg(), Src.getSubReg());
+      assert(TargetRegisterInfo::isPhysicalRegister(DstR.Reg));
+      assert(TargetRegisterInfo::isPhysicalRegister(SrcR.Reg));
+      const TargetRegisterInfo &TRI = DFG.getTRI();
+      if (TRI.getMinimalPhysRegClass(DstR.Reg) !=
+          TRI.getMinimalPhysRegClass(SrcR.Reg))
         return false;
-      }
       EM.insert(std::make_pair(DstR, SrcR));
       return true;
     }
-    case TargetOpcode::REG_SEQUENCE: {
-      const MachineOperand &Dst = MI->getOperand(0);
-      RegisterRef DefR = { Dst.getReg(), Dst.getSubReg() };
-      SmallVector<TargetInstrInfo::RegSubRegPairAndIdx,2> Inputs;
-      const TargetInstrInfo &TII = DFG.getTII();
-      if (!TII.getRegSequenceInputs(*MI, 0, Inputs))
-        return false;
-      for (auto I : Inputs) {
-        unsigned S = DFG.getTRI().composeSubRegIndices(DefR.Sub, I.SubIdx);
-        RegisterRef DR = { DefR.Reg, S };
-        RegisterRef SR = { I.Reg, I.SubReg };
-        EM.insert(std::make_pair(DR, SR));
-      }
-      return true;
-    }
+    case TargetOpcode::REG_SEQUENCE:
+      llvm_unreachable("Unexpected REG_SEQUENCE");
   }
   return false;
 }
@@ -79,7 +55,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
   Copies.push_back(SA.Id);
 
   for (auto I : EM) {
-    auto FS = DefM.find(I.second);
+    auto FS = DefM.find(I.second.Reg);
     if (FS == DefM.end() || FS->second.empty())
       continue; // Undefined source
     RDefMap[I.second][SA.Id] = FS->second.top()->Id;
@@ -92,7 +68,7 @@ void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, EqualityMap &EM) {
 void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
   RegisterSet RRs;
   for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
-    RRs.insert(RA.Addr->getRegRef());
+    RRs.insert(RA.Addr->getRegRef(DFG));
   bool Common = false;
   for (auto &R : RDefMap) {
     if (!RRs.count(R.first))
@@ -106,7 +82,7 @@ void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
   for (auto &R : RDefMap) {
     if (!RRs.count(R.first))
       continue;
-    auto F = DefM.find(R.first);
+    auto F = DefM.find(R.first.Reg);
     if (F == DefM.end() || F->second.empty())
       continue;
     R.second[IA.Id] = F->second.top()->Id;
@@ -168,6 +144,18 @@ bool CopyPropagation::run() {
   bool HasLimit = CpLimit.getNumOccurrences() > 0;
 #endif
 
+  auto MinPhysReg = [this] (RegisterRef RR) -> unsigned {
+    const TargetRegisterInfo &TRI = DFG.getTRI();
+    const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
+    if ((RC.LaneMask & RR.Mask) == RC.LaneMask)
+      return RR.Reg;
+    for (MCSubRegIndexIterator S(RR.Reg, &TRI); S.isValid(); ++S)
+      if (RR.Mask == TRI.getSubRegIndexLaneMask(S.getSubRegIndex()))
+        return S.getSubReg();
+    llvm_unreachable("Should have found a register");
+    return 0;
+  };
+
   for (auto C : Copies) {
 #ifndef NDEBUG
     if (HasLimit && CpCount >= CpLimit)
@@ -180,7 +168,7 @@ bool CopyPropagation::run() {
 
     EqualityMap &EM = FS->second;
     for (NodeAddr<DefNode*> DA : SA.Addr->members_if(DFG.IsDef, DFG)) {
-      RegisterRef DR = DA.Addr->getRegRef();
+      RegisterRef DR = DA.Addr->getRegRef(DFG);
       auto FR = EM.find(DR);
       if (FR == EM.end())
         continue;
@@ -197,7 +185,7 @@ bool CopyPropagation::run() {
         uint16_t F = UA.Addr->getFlags();
         if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed))
           continue;
-        if (UA.Addr->getRegRef() != DR)
+        if (UA.Addr->getRegRef(DFG) != DR)
           continue;
 
         NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
@@ -214,8 +202,9 @@ bool CopyPropagation::run() {
                  << *NodeAddr<StmtNode*>(IA).Addr->getCode();
         }
 
-        Op.setReg(SR.Reg);
-        Op.setSubReg(SR.Sub);
+        unsigned NewReg = MinPhysReg(SR);
+        Op.setReg(NewReg);
+        Op.setSubReg(0);
         DFG.unlinkUse(UA, false);
         if (RDefSR_SA != 0) {
           UA.Addr->linkToDef(UA.Id, DFG.addr<DefNode*>(RDefSR_SA));
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
index e8a576cf57a3..517f17cc9c64 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFCopy.h
@@ -28,6 +28,7 @@ namespace rdf {
     bool run();
     void trace(bool On) { Trace = On; }
     bool trace() const { return Trace; }
+    DataFlowGraph &getDFG() { return DFG; }
 
     typedef std::map<RegisterRef, RegisterRef> EqualityMap;
     virtual bool interpretAsCopy(const MachineInstr *MI, EqualityMap &EM);
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
index 273d6b7cb0c8..33c3f03790f3 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.cpp
@@ -18,6 +18,7 @@
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetLowering.h"
 #include "llvm/Target/TargetRegisterInfo.h"
 
 using namespace llvm;
@@ -28,6 +29,12 @@ using namespace rdf;
 namespace llvm {
 namespace rdf {
 
+raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P) {
+  if (!P.Mask.all())
+    OS << ':' << PrintLaneMask(P.Mask);
+  return OS;
+}
+
 template<>
 raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
   auto &TRI = P.G.getTRI();
@@ -35,13 +42,7 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
     OS << TRI.getName(P.Obj.Reg);
   else
     OS << '#' << P.Obj.Reg;
-  if (P.Obj.Sub > 0) {
-    OS << ':';
-    if (P.Obj.Sub < TRI.getNumSubRegIndices())
-      OS << TRI.getSubRegIndexName(P.Obj.Sub);
-    else
-      OS << '#' << P.Obj.Sub;
-  }
+  OS << PrintLaneMaskOpt(P.Obj.Mask);
   return OS;
 }
 
@@ -62,6 +63,10 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
       }
       break;
     case NodeAttrs::Ref:
+      if (Flags & NodeAttrs::Undef)
+        OS << '/';
+      if (Flags & NodeAttrs::Dead)
+        OS << '\\';
       if (Flags & NodeAttrs::Preserving)
         OS << '+';
       if (Flags & NodeAttrs::Clobbering)
@@ -87,7 +92,7 @@ namespace {
   void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
         const DataFlowGraph &G) {
     OS << Print<NodeId>(RA.Id, G) << '<'
-       << Print<RegisterRef>(RA.Addr->getRegRef(), G) << '>';
+       << Print<RegisterRef>(RA.Addr->getRegRef(G), G) << '>';
     if (RA.Addr->getFlags() & NodeAttrs::Fixed)
       OS << '!';
   }
@@ -208,9 +213,27 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
 template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<NodeAddr<StmtNode*>> &P) {
-  unsigned Opc = P.Obj.Addr->getCode()->getOpcode();
-  OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc)
-     << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+  const MachineInstr &MI = *P.Obj.Addr->getCode();
+  unsigned Opc = MI.getOpcode();
+  OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc);
+  // Print the target for calls and branches (for readability).
+  if (MI.isCall() || MI.isBranch()) {
+    MachineInstr::const_mop_iterator T =
+          find_if(MI.operands(),
+                  [] (const MachineOperand &Op) -> bool {
+                    return Op.isMBB() || Op.isGlobal() || Op.isSymbol();
+                  });
+    if (T != MI.operands_end()) {
+      OS << ' ';
+      if (T->isMBB())
+        OS << "BB#" << T->getMBB()->getNumber();
+      else if (T->isGlobal())
+        OS << T->getGlobal()->getName();
+      else if (T->isSymbol())
+        OS << T->getSymbolName();
+    }
+  }
+  OS << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
   return OS;
 }
 
@@ -234,29 +257,29 @@ raw_ostream &operator<< (raw_ostream &OS,
 template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<NodeAddr<BlockNode*>> &P) {
-  auto *BB = P.Obj.Addr->getCode();
+  MachineBasicBlock *BB = P.Obj.Addr->getCode();
   unsigned NP = BB->pred_size();
   std::vector<int> Ns;
   auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void {
     unsigned N = Ns.size();
-    for (auto I : Ns) {
+    for (int I : Ns) {
       OS << "BB#" << I;
       if (--N)
         OS << ", ";
     }
   };
 
-  OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber()
-     << " === preds(" << NP << "): ";
-  for (auto I : BB->predecessors())
-    Ns.push_back(I->getNumber());
+  OS << Print<NodeId>(P.Obj.Id, P.G) << ": --- BB#" << BB->getNumber()
+     << " --- preds(" << NP << "): ";
+  for (MachineBasicBlock *B : BB->predecessors())
+    Ns.push_back(B->getNumber());
   PrintBBs(Ns);
 
   unsigned NS = BB->succ_size();
   OS << "  succs(" << NS << "): ";
   Ns.clear();
-  for (auto I : BB->successors())
-    Ns.push_back(I->getNumber());
+  for (MachineBasicBlock *B : BB->successors())
+    Ns.push_back(B->getNumber());
   PrintBBs(Ns);
   OS << '\n';
 
@@ -286,11 +309,17 @@ raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
 }
 
 template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterAggr> &P) {
+  P.Obj.print(OS);
+  return OS;
+}
+
+template<>
 raw_ostream &operator<< (raw_ostream &OS,
       const Print<DataFlowGraph::DefStack> &P) {
   for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
     OS << Print<NodeId>(I->Id, P.G)
-       << '<' << Print<RegisterRef>(I->Addr->getRegRef(), P.G) << '>';
+       << '<' << Print<RegisterRef>(I->Addr->getRegRef(P.G), P.G) << '>';
     I.down();
     if (I != E)
       OS << ' ';
@@ -376,27 +405,28 @@ void NodeBase::append(NodeAddr<NodeBase*> NA) {
 // Fundamental node manipulator functions.
 
 // Obtain the register reference from a reference node.
-RegisterRef RefNode::getRegRef() const {
+RegisterRef RefNode::getRegRef(const DataFlowGraph &G) const {
   assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
   if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
-    return Ref.RR;
+    return G.unpack(Ref.PR);
   assert(Ref.Op != nullptr);
-  return { Ref.Op->getReg(), Ref.Op->getSubReg() };
+  return G.makeRegRef(Ref.Op->getReg(), Ref.Op->getSubReg());
 }
 
 // Set the register reference in the reference node directly (for references
 // in phi nodes).
-void RefNode::setRegRef(RegisterRef RR) {
+void RefNode::setRegRef(RegisterRef RR, DataFlowGraph &G) {
   assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
   assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
-  Ref.RR = RR;
+  Ref.PR = G.pack(RR);
 }
 
 // Set the register reference in the reference node based on a machine
 // operand (for references in statement nodes).
-void RefNode::setRegRef(MachineOperand *Op) {
+void RefNode::setRegRef(MachineOperand *Op, DataFlowGraph &G) {
   assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
   assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
+  (void)G;
   Ref.Op = Op;
 }
 
@@ -442,7 +472,7 @@ NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
 
 // Add node NA at the end of the member list of the given code node.
 void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
-  auto ML = getLastMember(G);
+  NodeAddr<NodeBase*> ML = getLastMember(G);
   if (ML.Id != 0) {
     ML.Addr->append(NA);
   } else {
@@ -463,7 +493,7 @@ void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
 
 // Remove member node NA from the given code node.
 void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
-  auto MA = getFirstMember(G);
+  NodeAddr<NodeBase*> MA = getFirstMember(G);
   assert(MA.Id != 0);
 
   // Special handling if the member to remove is the first member.
@@ -514,7 +544,7 @@ NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
 
 // Add the phi node PA to the given block node.
 void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
-  auto M = getFirstMember(G);
+  NodeAddr<NodeBase*> M = getFirstMember(G);
   if (M.Id == 0) {
     addMember(PA, G);
     return;
@@ -561,114 +591,6 @@ NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
 }
 
 
-// Register aliasing information.
-//
-// In theory, the lane information could be used to determine register
-// covering (and aliasing), but depending on the sub-register structure,
-// the lane mask information may be missing. The covering information
-// must be available for this framework to work, so relying solely on
-// the lane data is not sufficient.
-
-// Determine whether RA covers RB.
-bool RegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
-  if (RA == RB)
-    return true;
-  if (TargetRegisterInfo::isVirtualRegister(RA.Reg)) {
-    assert(TargetRegisterInfo::isVirtualRegister(RB.Reg));
-    if (RA.Reg != RB.Reg)
-      return false;
-    if (RA.Sub == 0)
-      return true;
-    return TRI.composeSubRegIndices(RA.Sub, RB.Sub) == RA.Sub;
-  }
-
-  assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg) &&
-         TargetRegisterInfo::isPhysicalRegister(RB.Reg));
-  unsigned A = RA.Sub != 0 ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
-  unsigned B = RB.Sub != 0 ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
-  return TRI.isSubRegister(A, B);
-}
-
-// Determine whether RR is covered by the set of references RRs.
-bool RegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) const {
-  if (RRs.count(RR))
-    return true;
-
-  // For virtual registers, we cannot accurately determine covering based
-  // on subregisters. If RR itself is not present in RRs, but it has a sub-
-  // register reference, check for the super-register alone. Otherwise,
-  // assume non-covering.
-  if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
-    if (RR.Sub != 0)
-      return RRs.count({RR.Reg, 0});
-    return false;
-  }
-
-  // If any super-register of RR is present, then RR is covered.
-  unsigned Reg = RR.Sub == 0 ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub);
-  for (MCSuperRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
-    if (RRs.count({*SR, 0}))
-      return true;
-
-  return false;
-}
-
-// Get the list of references aliased to RR.
-std::vector<RegisterRef> RegisterAliasInfo::getAliasSet(RegisterRef RR) const {
-  // Do not include RR in the alias set. For virtual registers return an
-  // empty set.
-  std::vector<RegisterRef> AS;
-  if (TargetRegisterInfo::isVirtualRegister(RR.Reg))
-    return AS;
-  assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
-  unsigned R = RR.Reg;
-  if (RR.Sub)
-    R = TRI.getSubReg(RR.Reg, RR.Sub);
-
-  for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
-    AS.push_back(RegisterRef({*AI, 0}));
-  return AS;
-}
-
-// Check whether RA and RB are aliased.
-bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const {
-  bool VirtA = TargetRegisterInfo::isVirtualRegister(RA.Reg);
-  bool VirtB = TargetRegisterInfo::isVirtualRegister(RB.Reg);
-  bool PhysA = TargetRegisterInfo::isPhysicalRegister(RA.Reg);
-  bool PhysB = TargetRegisterInfo::isPhysicalRegister(RB.Reg);
-
-  if (VirtA != VirtB)
-    return false;
-
-  if (VirtA) {
-    if (RA.Reg != RB.Reg)
-      return false;
-    // RA and RB refer to the same register. If any of them refer to the
-    // whole register, they must be aliased.
-    if (RA.Sub == 0 || RB.Sub == 0)
-      return true;
-    unsigned SA = TRI.getSubRegIdxSize(RA.Sub);
-    unsigned OA = TRI.getSubRegIdxOffset(RA.Sub);
-    unsigned SB = TRI.getSubRegIdxSize(RB.Sub);
-    unsigned OB = TRI.getSubRegIdxOffset(RB.Sub);
-    if (OA <= OB && OA+SA > OB)
-      return true;
-    if (OB <= OA && OB+SB > OA)
-      return true;
-    return false;
-  }
-
-  assert(PhysA && PhysB);
-  (void)PhysA, (void)PhysB;
-  unsigned A = RA.Sub ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
-  unsigned B = RB.Sub ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
-  for (MCRegAliasIterator I(A, &TRI, true); I.isValid(); ++I)
-    if (B == *I)
-      return true;
-  return false;
-}
-
-
 // Target operand information.
 //
 
@@ -695,7 +617,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
     return true;
   // Check for a tail call.
   if (In.isBranch())
-    for (auto &O : In.operands())
+    for (const MachineOperand &O : In.operands())
       if (O.isGlobal() || O.isSymbol())
         return true;
 
@@ -708,7 +630,7 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
   // uses or defs, and those lists do not allow sub-registers.
   if (Op.getSubReg() != 0)
     return false;
-  unsigned Reg = Op.getReg();
+  RegisterId Reg = Op.getReg();
   const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
                                      : D.getImplicitUses();
   if (!ImpR)
@@ -720,16 +642,118 @@ bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
 }
 
 
+RegisterRef RegisterAggr::normalize(RegisterRef RR) const {
+  RegisterId SuperReg = RR.Reg;
+  while (true) {
+    MCSuperRegIterator SR(SuperReg, &TRI, false);
+    if (!SR.isValid())
+      break;
+    SuperReg = *SR;
+  }
+
+  const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
+  LaneBitmask Common = RR.Mask & RC.LaneMask;
+  uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
+  LaneBitmask SuperMask = TRI.composeSubRegIndexLaneMask(Sub, Common);
+  return RegisterRef(SuperReg, SuperMask);
+}
+
+bool RegisterAggr::hasAliasOf(RegisterRef RR) const {
+  RegisterRef NR = normalize(RR);
+  auto F = Masks.find(NR.Reg);
+  if (F != Masks.end()) {
+    if ((F->second & NR.Mask).any())
+      return true;
+  }
+  if (CheckUnits) {
+    for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U)
+      if (ExpAliasUnits.test(*U))
+        return true;
+  }
+  return false;
+}
+
+bool RegisterAggr::hasCoverOf(RegisterRef RR) const {
+  // Always have a cover for empty lane mask.
+  RegisterRef NR = normalize(RR);
+  if (NR.Mask.none())
+    return true;
+  auto F = Masks.find(NR.Reg);
+  if (F == Masks.end())
+    return false;
+  return (NR.Mask & F->second) == NR.Mask;
+}
+
+RegisterAggr &RegisterAggr::insert(RegisterRef RR) {
+  RegisterRef NR = normalize(RR);
+  auto F = Masks.find(NR.Reg);
+  if (F == Masks.end())
+    Masks.insert({NR.Reg, NR.Mask});
+  else
+    F->second |= NR.Mask;
+
+  // Visit all register units to see if there are any that were created
+  // by explicit aliases. Add those that were to the bit vector.
+  for (MCRegUnitIterator U(RR.Reg, &TRI); U.isValid(); ++U) {
+    MCRegUnitRootIterator R(*U, &TRI);
+    ++R;
+    if (!R.isValid())
+      continue;
+    ExpAliasUnits.set(*U);
+    CheckUnits = true;
+  }
+  return *this;
+}
+
+RegisterAggr &RegisterAggr::insert(const RegisterAggr &RG) {
+  for (std::pair<RegisterId,LaneBitmask> P : RG.Masks)
+    insert(RegisterRef(P.first, P.second));
+  return *this;
+}
+
+RegisterAggr &RegisterAggr::clear(RegisterRef RR) {
+  RegisterRef NR = normalize(RR);
+  auto F = Masks.find(NR.Reg);
+  if (F == Masks.end())
+    return *this;
+  LaneBitmask NewM = F->second & ~NR.Mask;
+  if (NewM.none())
+    Masks.erase(F);
+  else
+    F->second = NewM;
+  return *this;
+}
+
+RegisterAggr &RegisterAggr::clear(const RegisterAggr &RG) {
+  for (std::pair<RegisterId,LaneBitmask> P : RG.Masks)
+    clear(RegisterRef(P.first, P.second));
+  return *this;
+}
+
+RegisterRef RegisterAggr::clearIn(RegisterRef RR) const {
+  RegisterAggr T(TRI);
+  T.insert(RR).clear(*this);
+  if (T.empty())
+    return RegisterRef();
+  return RegisterRef(T.begin()->first, T.begin()->second);
+}
+
+void RegisterAggr::print(raw_ostream &OS) const {
+  OS << '{';
+  for (auto I : Masks)
+    OS << ' ' << PrintReg(I.first, &TRI) << PrintLaneMaskOpt(I.second);
+  OS << " }";
+}
+
+
 //
 // The data flow graph construction.
 //
 
 DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
       const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
-      const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
-      const TargetOperandInfo &toi)
-    : TimeG("rdf"), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), RAI(rai),
-      TOI(toi) {
+      const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi)
+    : LMI(), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), TOI(toi) {
 }
 
 
@@ -821,6 +845,33 @@ unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
   return P;
 }
 
+
+// Register information.
+
+// Get the list of references aliased to RR. Lane masks are ignored.
+RegisterSet DataFlowGraph::getAliasSet(RegisterId Reg) const {
+  // Do not include RR in the alias set.
+  RegisterSet AS;
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
+  for (MCRegAliasIterator AI(Reg, &TRI, false); AI.isValid(); ++AI)
+    AS.insert(RegisterRef(*AI));
+  return AS;
+}
+
+RegisterSet DataFlowGraph::getLandingPadLiveIns() const {
+  RegisterSet LR;
+  const Function &F = *MF.getFunction();
+  const Constant *PF = F.hasPersonalityFn() ? F.getPersonalityFn()
+                                            : nullptr;
+  const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+  if (RegisterId R = TLI.getExceptionPointerRegister(PF))
+    LR.insert(RegisterRef(R));
+  if (RegisterId R = TLI.getExceptionSelectorRegister(PF))
+    LR.insert(RegisterRef(R));
+  return LR;
+}
+
 // Node management functions.
 
 // Get the pointer to the node with the id N.
@@ -870,7 +921,7 @@ NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
 NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
       MachineOperand &Op, uint16_t Flags) {
   NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
-  UA.Addr->setRegRef(&Op);
+  UA.Addr->setRegRef(&Op, *this);
   return UA;
 }
 
@@ -878,7 +929,7 @@ NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
       RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
   NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
   assert(Flags & NodeAttrs::PhiRef);
-  PUA.Addr->setRegRef(RR);
+  PUA.Addr->setRegRef(RR, *this);
   PUA.Addr->setPredecessor(PredB.Id);
   return PUA;
 }
@@ -886,7 +937,7 @@ NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
 NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
       MachineOperand &Op, uint16_t Flags) {
   NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
-  DA.Addr->setRegRef(&Op);
+  DA.Addr->setRegRef(&Op, *this);
   return DA;
 }
 
@@ -894,7 +945,7 @@ NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
       RegisterRef RR, uint16_t Flags) {
   NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
   assert(Flags & NodeAttrs::PhiRef);
-  DA.Addr->setRegRef(RR);
+  DA.Addr->setRegRef(RR, *this);
   return DA;
 }
 
@@ -934,17 +985,20 @@ void DataFlowGraph::build(unsigned Options) {
   if (MF.empty())
     return;
 
-  for (auto &B : MF) {
-    auto BA = newBlock(Func, &B);
-    for (auto &I : B) {
+  for (MachineBasicBlock &B : MF) {
+    NodeAddr<BlockNode*> BA = newBlock(Func, &B);
+    BlockNodes.insert(std::make_pair(&B, BA));
+    for (MachineInstr &I : B) {
       if (I.isDebugValue())
         continue;
       buildStmt(BA, I);
     }
   }
 
-  // Collect information about block references.
   NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
+  NodeList Blocks = Func.Addr->members(*this);
+
+  // Collect information about block references.
   BlockRefsMap RefM;
   buildBlockRefs(EA, RefM);
 
@@ -952,16 +1006,48 @@ void DataFlowGraph::build(unsigned Options) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
   for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
     NodeAddr<PhiNode*> PA = newPhi(EA);
-    RegisterRef RR = { I->first, 0 };
+    RegisterRef RR = RegisterRef(I->first);
     uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
     NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
     PA.Addr->addMember(DA, *this);
   }
 
+  // Add phis for landing pads.
+  // Landing pads, unlike usual backs blocks, are not entered through
+  // branches in the program, or fall-throughs from other blocks. They
+  // are entered from the exception handling runtime and target's ABI
+  // may define certain registers as defined on entry to such a block.
+  RegisterSet EHRegs = getLandingPadLiveIns();
+  if (!EHRegs.empty()) {
+    for (NodeAddr<BlockNode*> BA : Blocks) {
+      const MachineBasicBlock &B = *BA.Addr->getCode();
+      if (!B.isEHPad())
+        continue;
+
+      // Prepare a list of NodeIds of the block's predecessors.
+      NodeList Preds;
+      for (MachineBasicBlock *PB : B.predecessors())
+        Preds.push_back(findBlock(PB));
+
+      // Build phi nodes for each live-in.
+      for (RegisterRef RR : EHRegs) {
+        NodeAddr<PhiNode*> PA = newPhi(BA);
+        uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+        // Add def:
+        NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+        PA.Addr->addMember(DA, *this);
+        // Add uses (no reaching defs for phi uses):
+        for (NodeAddr<BlockNode*> PBA : Preds) {
+          NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
+          PA.Addr->addMember(PUA, *this);
+        }
+      }
+    }
+  }
+
   // Build a map "PhiM" which will contain, for each block, the set
   // of references that will require phi definitions in that block.
   BlockRefsMap PhiM;
-  auto Blocks = Func.Addr->members(*this);
   for (NodeAddr<BlockNode*> BA : Blocks)
     recordDefsForDF(PhiM, RefM, BA);
   for (NodeAddr<BlockNode*> BA : Blocks)
@@ -976,6 +1062,47 @@ void DataFlowGraph::build(unsigned Options) {
     removeUnusedPhis();
 }
 
+RegisterRef DataFlowGraph::makeRegRef(unsigned Reg, unsigned Sub) const {
+  assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+  if (Sub != 0)
+    Reg = TRI.getSubReg(Reg, Sub);
+  return RegisterRef(Reg);
+}
+
+RegisterRef DataFlowGraph::normalizeRef(RegisterRef RR) const {
+  // FIXME copied from RegisterAggr
+  RegisterId SuperReg = RR.Reg;
+  while (true) {
+    MCSuperRegIterator SR(SuperReg, &TRI, false);
+    if (!SR.isValid())
+      break;
+    SuperReg = *SR;
+  }
+
+  uint32_t Sub = TRI.getSubRegIndex(SuperReg, RR.Reg);
+  const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(RR.Reg);
+  LaneBitmask SuperMask = RR.Mask &
+                          TRI.composeSubRegIndexLaneMask(Sub, RC.LaneMask);
+  return RegisterRef(SuperReg, SuperMask);
+}
+
+RegisterRef DataFlowGraph::restrictRef(RegisterRef AR, RegisterRef BR) const {
+  if (AR.Reg == BR.Reg) {
+    LaneBitmask M = AR.Mask & BR.Mask;
+    return M.any() ? RegisterRef(AR.Reg, M) : RegisterRef();
+  }
+#ifndef NDEBUG
+  RegisterRef NAR = normalizeRef(AR);
+  RegisterRef NBR = normalizeRef(BR);
+  assert(NAR.Reg != NBR.Reg);
+#endif
+  // This isn't strictly correct, because the overlap may happen in the
+  // part masked out.
+  if (TRI.regsOverlap(AR.Reg, BR.Reg))
+    return AR;
+  return RegisterRef();
+}
+
 // For each stack in the map DefM, push the delimiter for block B on it.
 void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
   // Push block delimiters.
@@ -1024,28 +1151,31 @@ void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
   for (NodeAddr<DefNode*> DA : Defs) {
     if (Visited.count(DA.Id))
       continue;
+
     NodeList Rel = getRelatedRefs(IA, DA);
     NodeAddr<DefNode*> PDA = Rel.front();
-    // Push the definition on the stack for the register and all aliases.
-    RegisterRef RR = PDA.Addr->getRegRef();
+    RegisterRef RR = PDA.Addr->getRegRef(*this);
 #ifndef NDEBUG
     // Assert if the register is defined in two or more unrelated defs.
     // This could happen if there are two or more def operands defining it.
     if (!Defined.insert(RR).second) {
-      auto *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+      MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
       dbgs() << "Multiple definitions of register: "
              << Print<RegisterRef>(RR, *this) << " in\n  " << *MI
              << "in BB#" << MI->getParent()->getNumber() << '\n';
       llvm_unreachable(nullptr);
     }
 #endif
-    DefM[RR].push(DA);
-    for (auto A : RAI.getAliasSet(RR)) {
+    // Push the definition on the stack for the register and all aliases.
+    // The def stack traversal in linkNodeUp will check the exact aliasing.
+    DefM[RR.Reg].push(DA);
+    for (RegisterRef A : getAliasSet(RR.Reg /*FIXME? use RegisterRef*/)) {
+      // Check that we don't push the same def twice.
       assert(A != RR);
-      DefM[A].push(DA);
+      DefM[A.Reg].push(DA);
     }
     // Mark all the related defs as visited.
-    for (auto T : Rel)
+    for (NodeAddr<NodeBase*> T : Rel)
       Visited.insert(T.Id);
   }
 }
@@ -1065,10 +1195,64 @@ NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
   return Refs;
 }
 
+// Return true if RA and RB overlap, false otherwise.
+bool DataFlowGraph::alias(RegisterRef RA, RegisterRef RB) const {
+  assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg));
+  assert(TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+
+  MCRegUnitMaskIterator UMA(RA.Reg, &TRI);
+  MCRegUnitMaskIterator UMB(RB.Reg, &TRI);
+  // Reg units are returned in the numerical order.
+  while (UMA.isValid() && UMB.isValid()) {
+    std::pair<uint32_t,LaneBitmask> PA = *UMA;
+    std::pair<uint32_t,LaneBitmask> PB = *UMB;
+    if (PA.first == PB.first) {
+      // Lane mask of 0 (given by the iterator) should be treated as "full".
+      // This can happen when the register has only one unit, or when the
+      // unit corresponds to explicit aliasing. In such cases, the lane mask
+      // from RegisterRef should be ignored.
+      if (PA.second.none() || PB.second.none())
+        return true;
+
+      // At this point the common unit corresponds to a subregister. The lane
+      // masks correspond to the lane mask of that unit within the original
+      // register, for example assuming register quadruple q0 = r3:0, and
+      // a register pair d1 = r3:2, the lane mask of r2 in q0 may be 0b0100,
+      // while the lane mask of r2 in d1 may be 0b0001.
+      LaneBitmask LA = PA.second & RA.Mask;
+      LaneBitmask LB = PB.second & RB.Mask;
+      if (LA.any() && LB.any()) {
+        unsigned Root = *MCRegUnitRootIterator(PA.first, &TRI);
+        // If register units were guaranteed to only have 1 bit in any lane
+        // mask, the code below would not be necessary. This is because LA
+        // and LB would have at most 1 bit set each, and that bit would be
+        // guaranteed to correspond to the given register unit.
+        uint32_t SubA = TRI.getSubRegIndex(RA.Reg, Root);
+        uint32_t SubB = TRI.getSubRegIndex(RB.Reg, Root);
+        const TargetRegisterClass &RC = *TRI.getMinimalPhysRegClass(Root);
+        LaneBitmask MaskA = TRI.reverseComposeSubRegIndexLaneMask(SubA, LA);
+        LaneBitmask MaskB = TRI.reverseComposeSubRegIndexLaneMask(SubB, LB);
+        if ((MaskA & MaskB & RC.LaneMask).any())
+          return true;
+      }
+
+      ++UMA;
+      ++UMB;
+      continue;
+    }
+    if (PA.first < PB.first)
+      ++UMA;
+    else if (PB.first < PA.first)
+      ++UMB;
+  }
+  return false;
+}
+
 
 // Clear all information in the graph.
 void DataFlowGraph::reset() {
   Memory.clear();
+  BlockNodes.clear();
   Func = NodeAddr<FuncNode*>();
 }
 
@@ -1083,10 +1267,10 @@ NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
       NodeAddr<RefNode*> RA) const {
   assert(IA.Id != 0 && RA.Id != 0);
 
-  auto Related = [RA](NodeAddr<RefNode*> TA) -> bool {
+  auto Related = [this,RA](NodeAddr<RefNode*> TA) -> bool {
     if (TA.Addr->getKind() != RA.Addr->getKind())
       return false;
-    if (TA.Addr->getRegRef() != RA.Addr->getRegRef())
+    if (TA.Addr->getRegRef(*this) != RA.Addr->getRegRef(*this))
       return false;
     return true;
   };
@@ -1105,7 +1289,7 @@ NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
     return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
   };
 
-  RegisterRef RR = RA.Addr->getRegRef();
+  RegisterRef RR = RA.Addr->getRegRef(*this);
   if (IA.Addr->getKind() == NodeAttrs::Stmt)
     return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
   return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
@@ -1174,31 +1358,45 @@ NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
 // Create a new statement node in the block node BA that corresponds to
 // the machine instruction MI.
 void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
-  auto SA = newStmt(BA, &In);
+  NodeAddr<StmtNode*> SA = newStmt(BA, &In);
 
   auto isCall = [] (const MachineInstr &In) -> bool {
     if (In.isCall())
       return true;
     // Is tail call?
     if (In.isBranch())
-      for (auto &Op : In.operands())
+      for (const MachineOperand &Op : In.operands())
         if (Op.isGlobal() || Op.isSymbol())
           return true;
     return false;
   };
 
+  auto isDefUndef = [this] (const MachineInstr &In, RegisterRef DR) -> bool {
+    // This instruction defines DR. Check if there is a use operand that
+    // would make DR live on entry to the instruction.
+    for (const MachineOperand &UseOp : In.operands()) {
+      if (!UseOp.isReg() || !UseOp.isUse() || UseOp.isUndef())
+        continue;
+      RegisterRef UR = makeRegRef(UseOp.getReg(), UseOp.getSubReg());
+      if (alias(DR, UR))
+        return false;
+    }
+    return true;
+  };
+
   // Collect a set of registers that this instruction implicitly uses
   // or defines. Implicit operands from an instruction will be ignored
   // unless they are listed here.
   RegisterSet ImpUses, ImpDefs;
   if (const uint16_t *ImpD = In.getDesc().getImplicitDefs())
     while (uint16_t R = *ImpD++)
-      ImpDefs.insert({R, 0});
+      ImpDefs.insert(RegisterRef(R));
   if (const uint16_t *ImpU = In.getDesc().getImplicitUses())
     while (uint16_t R = *ImpU++)
-      ImpUses.insert({R, 0});
+      ImpUses.insert(RegisterRef(R));
 
-  bool NeedsImplicit = isCall(In) || In.isInlineAsm() || In.isReturn();
+  bool IsCall = isCall(In);
+  bool NeedsImplicit = IsCall || In.isInlineAsm() || In.isReturn();
   bool IsPredicated = TII.isPredicated(In);
   unsigned NumOps = In.getNumOperands();
 
@@ -1212,14 +1410,20 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
     MachineOperand &Op = In.getOperand(OpN);
     if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
       continue;
-    RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+    RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
     uint16_t Flags = NodeAttrs::None;
-    if (TOI.isPreserving(In, OpN))
+    if (TOI.isPreserving(In, OpN)) {
       Flags |= NodeAttrs::Preserving;
+      // If the def is preserving, check if it is also undefined.
+      if (isDefUndef(In, RR))
+        Flags |= NodeAttrs::Undef;
+    }
     if (TOI.isClobbering(In, OpN))
       Flags |= NodeAttrs::Clobbering;
     if (TOI.isFixedReg(In, OpN))
       Flags |= NodeAttrs::Fixed;
+    if (IsCall && Op.isDead())
+      Flags |= NodeAttrs::Dead;
     NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
     SA.Addr->addMember(DA, *this);
     DoneDefs.insert(RR);
@@ -1231,18 +1435,24 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
     MachineOperand &Op = In.getOperand(OpN);
     if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
       continue;
-    RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+    RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
     if (!NeedsImplicit && !ImpDefs.count(RR))
       continue;
     if (DoneDefs.count(RR))
       continue;
     uint16_t Flags = NodeAttrs::None;
-    if (TOI.isPreserving(In, OpN))
+    if (TOI.isPreserving(In, OpN)) {
       Flags |= NodeAttrs::Preserving;
+      // If the def is preserving, check if it is also undefined.
+      if (isDefUndef(In, RR))
+        Flags |= NodeAttrs::Undef;
+    }
     if (TOI.isClobbering(In, OpN))
       Flags |= NodeAttrs::Clobbering;
     if (TOI.isFixedReg(In, OpN))
       Flags |= NodeAttrs::Fixed;
+    if (IsCall && Op.isDead())
+      Flags |= NodeAttrs::Dead;
     NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
     SA.Addr->addMember(DA, *this);
     DoneDefs.insert(RR);
@@ -1252,7 +1462,7 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
     MachineOperand &Op = In.getOperand(OpN);
     if (!Op.isReg() || !Op.isUse())
       continue;
-    RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+    RegisterRef RR = makeRegRef(Op.getReg(), Op.getSubReg());
     // Add implicit uses on return and call instructions, and on predicated
     // instructions regardless of whether or not they appear in the instruction
     // descriptor's list.
@@ -1261,6 +1471,8 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
     if (Implicit && !TakeImplicit && !ImpUses.count(RR))
       continue;
     uint16_t Flags = NodeAttrs::None;
+    if (Op.isUndef())
+      Flags |= NodeAttrs::Undef;
     if (TOI.isFixedReg(In, OpN))
       Flags |= NodeAttrs::Fixed;
     NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
@@ -1272,20 +1484,20 @@ void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
 // that block, and from all blocks dominated by it.
 void DataFlowGraph::buildBlockRefs(NodeAddr<BlockNode*> BA,
       BlockRefsMap &RefM) {
-  auto &Refs = RefM[BA.Id];
+  RegisterSet &Refs = RefM[BA.Id];
   MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
   assert(N);
   for (auto I : *N) {
     MachineBasicBlock *SB = I->getBlock();
-    auto SBA = Func.Addr->findBlock(SB, *this);
+    NodeAddr<BlockNode*> SBA = findBlock(SB);
     buildBlockRefs(SBA, RefM);
-    const auto &SRs = RefM[SBA.Id];
-    Refs.insert(SRs.begin(), SRs.end());
+    const RegisterSet &RefsS = RefM[SBA.Id];
+    Refs.insert(RefsS.begin(), RefsS.end());
   }
 
   for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
     for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
-      Refs.insert(RA.Addr->getRegRef());
+      Refs.insert(RA.Addr->getRegRef(*this));
 }
 
 // Scan all defs in the block node BA and record in PhiM the locations of
@@ -1307,17 +1519,11 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
   // This is done to make sure that each defined reference gets only one
   // phi node, even if it is defined multiple times.
   RegisterSet Defs;
-  for (auto I : BA.Addr->members(*this)) {
-    assert(I.Addr->getType() == NodeAttrs::Code);
-    assert(I.Addr->getKind() == NodeAttrs::Phi ||
-           I.Addr->getKind() == NodeAttrs::Stmt);
-    NodeAddr<InstrNode*> IA = I;
+  for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
     for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
-      Defs.insert(RA.Addr->getRegRef());
-  }
+      Defs.insert(RA.Addr->getRegRef(*this));
 
-  // Finally, add the set of defs to each block in the iterated dominance
-  // frontier.
+  // Calculate the iterated dominance frontier of BB.
   const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
   SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
   for (unsigned i = 0; i < IDF.size(); ++i) {
@@ -1329,13 +1535,15 @@ void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
   // Get the register references that are reachable from this block.
   RegisterSet &Refs = RefM[BA.Id];
   for (auto DB : IDF) {
-    auto DBA = Func.Addr->findBlock(DB, *this);
-    const auto &Rs = RefM[DBA.Id];
-    Refs.insert(Rs.begin(), Rs.end());
+    NodeAddr<BlockNode*> DBA = findBlock(DB);
+    const RegisterSet &RefsD = RefM[DBA.Id];
+    Refs.insert(RefsD.begin(), RefsD.end());
   }
 
+  // Finally, add the set of defs to each block in the iterated dominance
+  // frontier.
   for (auto DB : IDF) {
-    auto DBA = Func.Addr->findBlock(DB, *this);
+    NodeAddr<BlockNode*> DBA = findBlock(DB);
     PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
   }
 }
@@ -1355,19 +1563,19 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
   // are not covered by another ref (i.e. maximal with respect to covering).
 
   auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
-    for (auto I : RRs)
-      if (I != RR && RAI.covers(I, RR))
+    for (RegisterRef I : RRs)
+      if (I != RR && RegisterAggr::isCoverOf(I, RR, TRI))
         RR = I;
     return RR;
   };
 
   RegisterSet MaxDF;
-  for (auto I : HasDF->second)
+  for (RegisterRef I : HasDF->second)
     MaxDF.insert(MaxCoverIn(I, HasDF->second));
 
   std::vector<RegisterRef> MaxRefs;
-  auto &RefB = RefM[BA.Id];
-  for (auto I : MaxDF)
+  RegisterSet &RefB = RefM[BA.Id];
+  for (RegisterRef I : MaxDF)
     MaxRefs.push_back(MaxCoverIn(I, RefB));
 
   // Now, for each R in MaxRefs, get the alias closure of R. If the closure
@@ -1382,19 +1590,17 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
 
   auto Aliased = [this,&MaxRefs](RegisterRef RR,
                                  std::vector<unsigned> &Closure) -> bool {
-    for (auto I : Closure)
-      if (RAI.alias(RR, MaxRefs[I]))
+    for (unsigned I : Closure)
+      if (alias(RR, MaxRefs[I]))
         return true;
     return false;
   };
 
   // Prepare a list of NodeIds of the block's predecessors.
-  std::vector<NodeId> PredList;
+  NodeList Preds;
   const MachineBasicBlock *MBB = BA.Addr->getCode();
-  for (auto PB : MBB->predecessors()) {
-    auto B = Func.Addr->findBlock(PB, *this);
-    PredList.push_back(B.Id);
-  }
+  for (MachineBasicBlock *PB : MBB->predecessors())
+    Preds.push_back(findBlock(PB));
 
   while (!MaxRefs.empty()) {
     // Put the first element in the closure, and then add all subsequent
@@ -1418,8 +1624,7 @@ void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
       PA.Addr->addMember(DA, *this);
     }
     // Add phi uses.
-    for (auto P : PredList) {
-      auto PBA = addr<BlockNode*>(P);
+    for (NodeAddr<BlockNode*> PBA : Preds) {
       for (unsigned X = 0; X != CS; ++X) {
         RegisterRef RR = MaxRefs[ClosureIdx[X]];
         NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
@@ -1449,7 +1654,7 @@ void DataFlowGraph::removeUnusedPhis() {
   }
 
   static auto HasUsedDef = [](NodeList &Ms) -> bool {
-    for (auto M : Ms) {
+    for (NodeAddr<NodeBase*> M : Ms) {
       if (M.Addr->getKind() != NodeAttrs::Def)
         continue;
       NodeAddr<DefNode*> DA = M;
@@ -1493,25 +1698,25 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
       DefStack &DS) {
   if (DS.empty())
     return;
-  RegisterRef RR = TA.Addr->getRegRef();
+  RegisterRef RR = TA.Addr->getRegRef(*this);
   NodeAddr<T> TAP;
 
   // References from the def stack that have been examined so far.
-  RegisterSet Defs;
+  RegisterAggr Defs(TRI);
 
   for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
-    RegisterRef QR = I->Addr->getRegRef();
-    auto AliasQR = [QR,this] (RegisterRef RR) -> bool {
-      return RAI.alias(QR, RR);
-    };
-    bool PrecUp = RAI.covers(QR, RR);
+    RegisterRef QR = I->Addr->getRegRef(*this);
+
     // Skip all defs that are aliased to any of the defs that we have already
-    // seen. If we encounter a covering def, stop the stack traversal early.
-    if (std::any_of(Defs.begin(), Defs.end(), AliasQR)) {
-      if (PrecUp)
+    // seen. If this completes a cover of RR, stop the stack traversal.
+    bool Alias = Defs.hasAliasOf(QR);
+    bool Cover = Defs.insert(QR).hasCoverOf(RR);
+    if (Alias) {
+      if (Cover)
         break;
       continue;
     }
+
     // The reaching def.
     NodeAddr<DefNode*> RDA = *I;
 
@@ -1527,27 +1732,29 @@ void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
     // Create the link.
     TAP.Addr->linkToDef(TAP.Id, RDA);
 
-    if (PrecUp)
+    if (Cover)
       break;
-    Defs.insert(QR);
   }
 }
 
 // Create data-flow links for all reference nodes in the statement node SA.
 void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) {
+#ifndef NDEBUG
   RegisterSet Defs;
+#endif
 
   // Link all nodes (upwards in the data-flow) with their reaching defs.
   for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) {
     uint16_t Kind = RA.Addr->getKind();
     assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
-    RegisterRef RR = RA.Addr->getRegRef();
-    // Do not process multiple defs of the same reference.
-    if (Kind == NodeAttrs::Def && Defs.count(RR))
-      continue;
+    RegisterRef RR = RA.Addr->getRegRef(*this);
+#ifndef NDEBUG
+    // Do not expect multiple defs of the same reference.
+    assert(Kind != NodeAttrs::Def || !Defs.count(RR));
     Defs.insert(RR);
+#endif
 
-    auto F = DefM.find(RR);
+    auto F = DefM.find(RR.Reg);
     if (F == DefM.end())
       continue;
     DefStack &DS = F->second;
@@ -1584,7 +1791,7 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
   MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
   for (auto I : *N) {
     MachineBasicBlock *SB = I->getBlock();
-    auto SBA = Func.Addr->findBlock(SB, *this);
+    NodeAddr<BlockNode*> SBA = findBlock(SB);
     linkBlockRefs(DefM, SBA);
   }
 
@@ -1596,15 +1803,27 @@ void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
     NodeAddr<PhiUseNode*> PUA = NA;
     return PUA.Addr->getPredecessor() == BA.Id;
   };
+
+  RegisterSet EHLiveIns = getLandingPadLiveIns();
   MachineBasicBlock *MBB = BA.Addr->getCode();
-  for (auto SB : MBB->successors()) {
-    auto SBA = Func.Addr->findBlock(SB, *this);
+
+  for (MachineBasicBlock *SB : MBB->successors()) {
+    bool IsEHPad = SB->isEHPad();
+    NodeAddr<BlockNode*> SBA = findBlock(SB);
     for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
+      // Do not link phi uses for landing pad live-ins.
+      if (IsEHPad) {
+        // Find what register this phi is for.
+        NodeAddr<RefNode*> RA = IA.Addr->getFirstMember(*this);
+        assert(RA.Id != 0);
+        if (EHLiveIns.count(RA.Addr->getRegRef(*this)))
+          continue;
+      }
       // Go over each phi use associated with MBB, and link it.
       for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
         NodeAddr<PhiUseNode*> PUA = U;
-        RegisterRef RR = PUA.Addr->getRegRef();
-        linkRefUp<UseNode*>(IA, PUA, DefM[RR]);
+        RegisterRef RR = PUA.Addr->getRegRef(*this);
+        linkRefUp<UseNode*>(IA, PUA, DefM[RR.Reg]);
       }
     }
   }
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
index 49b053741263..871062ff2b05 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFGraph.h
@@ -175,7 +175,29 @@
 // - Clobbering: applied only to defs, indicates that the value generated
 //   by this def is unspecified. A typical example would be volatile registers
 //   after function calls.
-//
+// - Fixed: the register in this def/use cannot be replaced with any other
+//   register. A typical case would be a parameter register to a call, or
+//   the register with the return value from a function.
+// - Undef: the register in this reference the register is assumed to have
+//   no pre-existing value, even if it appears to be reached by some def.
+//   This is typically used to prevent keeping registers artificially live
+//   in cases when they are defined via predicated instructions. For example:
+//     r0 = add-if-true cond, r10, r11                (1)
+//     r0 = add-if-false cond, r12, r13, r0<imp-use>  (2)
+//     ... = r0                                       (3)
+//   Before (1), r0 is not intended to be live, and the use of r0 in (3) is
+//   not meant to be reached by any def preceding (1). However, since the
+//   defs in (1) and (2) are both preserving, these properties alone would
+//   imply that the use in (3) may indeed be reached by some prior def.
+//   Adding Undef flag to the def in (1) prevents that. The Undef flag
+//   may be applied to both defs and uses.
+// - Dead: applies only to defs. The value coming out of a "dead" def is
+//   assumed to be unused, even if the def appears to be reaching other defs
+//   or uses. The motivation for this flag comes from dead defs on function
+//   calls: there is no way to determine if such a def is dead without
+//   analyzing the target's ABI. Hence the graph should contain this info,
+//   as it is unavailable otherwise. On the other hand, a def without any
+//   uses on a typical instruction is not the intended target for this flag.
 //
 // *** Shadow references
 //
@@ -202,16 +224,24 @@
 #ifndef RDF_GRAPH_H
 #define RDF_GRAPH_H
 
+#include "llvm/ADT/BitVector.h"
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Timer.h"
+#include "llvm/Target/TargetRegisterInfo.h"
 
 #include <functional>
 #include <map>
 #include <set>
+#include <unordered_map>
 #include <vector>
 
+// RDF uses uint32_t to refer to registers. This is to ensure that the type
+// size remains specific. In other places, registers are often stored using
+// unsigned.
+static_assert(sizeof(uint32_t) == sizeof(unsigned), "Those should be equal");
+
 namespace llvm {
   class MachineBasicBlock;
   class MachineFunction;
@@ -220,10 +250,12 @@ namespace llvm {
   class MachineDominanceFrontier;
   class MachineDominatorTree;
   class TargetInstrInfo;
-  class TargetRegisterInfo;
 
 namespace rdf {
   typedef uint32_t NodeId;
+  typedef uint32_t RegisterId;
+
+  struct DataFlowGraph;
 
   struct NodeAttrs {
     enum : uint16_t {
@@ -243,13 +275,15 @@ namespace rdf {
       Block         = 0x0005 << 2,  // 101
       Func          = 0x0006 << 2,  // 110
 
-      // Flags: 5 bits for now
-      FlagMask      = 0x001F << 5,
-      Shadow        = 0x0001 << 5,  // 00001, Has extra reaching defs.
-      Clobbering    = 0x0002 << 5,  // 00010, Produces unspecified values.
-      PhiRef        = 0x0004 << 5,  // 00100, Member of PhiNode.
-      Preserving    = 0x0008 << 5,  // 01000, Def can keep original bits.
-      Fixed         = 0x0010 << 5,  // 10000, Fixed register.
+      // Flags: 7 bits for now
+      FlagMask      = 0x007F << 5,
+      Shadow        = 0x0001 << 5,  // 0000001, Has extra reaching defs.
+      Clobbering    = 0x0002 << 5,  // 0000010, Produces unspecified values.
+      PhiRef        = 0x0004 << 5,  // 0000100, Member of PhiNode.
+      Preserving    = 0x0008 << 5,  // 0001000, Def can keep original bits.
+      Fixed         = 0x0010 << 5,  // 0010000, Fixed register.
+      Undef         = 0x0020 << 5,  // 0100000, Has no pre-existing value.
+      Dead          = 0x0040 << 5,  // 1000000, Does not define a value.
     };
 
     static uint16_t type(uint16_t T)  { return T & TypeMask; }
@@ -294,8 +328,6 @@ namespace rdf {
   template <typename T> struct NodeAddr {
     NodeAddr() : Addr(nullptr), Id(0) {}
     NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
-    NodeAddr(const NodeAddr&) = default;
-    NodeAddr &operator= (const NodeAddr&) = default;
 
     bool operator== (const NodeAddr<T> &NA) const {
       assert((Addr == NA.Addr) == (Id == NA.Id));
@@ -367,36 +399,25 @@ namespace rdf {
   };
 
   struct RegisterRef {
-    unsigned Reg, Sub;
+    RegisterId Reg;
+    LaneBitmask Mask;
 
-    // No non-trivial constructors, since this will be a member of a union.
-    RegisterRef() = default;
-    RegisterRef(const RegisterRef &RR) = default;
-    RegisterRef &operator= (const RegisterRef &RR) = default;
+    RegisterRef() : RegisterRef(0) {}
+    explicit RegisterRef(RegisterId R, LaneBitmask M = LaneBitmask::getAll())
+      : Reg(R), Mask(R != 0 ? M : LaneBitmask::getNone()) {}
+    operator bool() const { return Reg != 0 && Mask.any(); }
     bool operator== (const RegisterRef &RR) const {
-      return Reg == RR.Reg && Sub == RR.Sub;
+      return Reg == RR.Reg && Mask == RR.Mask;
     }
     bool operator!= (const RegisterRef &RR) const {
       return !operator==(RR);
     }
     bool operator< (const RegisterRef &RR) const {
-      return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
+      return Reg < RR.Reg || (Reg == RR.Reg && Mask < RR.Mask);
     }
   };
   typedef std::set<RegisterRef> RegisterSet;
 
-  struct RegisterAliasInfo {
-    RegisterAliasInfo(const TargetRegisterInfo &tri) : TRI(tri) {}
-    virtual ~RegisterAliasInfo() {}
-
-    virtual std::vector<RegisterRef> getAliasSet(RegisterRef RR) const;
-    virtual bool alias(RegisterRef RA, RegisterRef RB) const;
-    virtual bool covers(RegisterRef RA, RegisterRef RB) const;
-    virtual bool covers(const RegisterSet &RRs, RegisterRef RR) const;
-
-    const TargetRegisterInfo &TRI;
-  };
-
   struct TargetOperandInfo {
     TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
     virtual ~TargetOperandInfo() {}
@@ -408,7 +429,107 @@ namespace rdf {
   };
 
 
-  struct DataFlowGraph;
+  // Packed register reference. Only used for storage.
+  struct PackedRegisterRef {
+    RegisterId Reg;
+    uint32_t MaskId;
+  };
+
+  // Template class for a map translating uint32_t into arbitrary types.
+  // The map will act like an indexed set: upon insertion of a new object,
+  // it will automatically assign a new index to it. Index of 0 is treated
+  // as invalid and is never allocated.
+  template <typename T, unsigned N = 32>
+  struct IndexedSet {
+    IndexedSet() : Map() { Map.reserve(N); }
+    T get(uint32_t Idx) const {
+      // Index Idx corresponds to Map[Idx-1].
+      assert(Idx != 0 && !Map.empty() && Idx-1 < Map.size());
+      return Map[Idx-1];
+    }
+    uint32_t insert(T Val) {
+      // Linear search.
+      auto F = llvm::find(Map, Val);
+      if (F != Map.end())
+        return F - Map.begin() + 1;
+      Map.push_back(Val);
+      return Map.size();  // Return actual_index + 1.
+    }
+    uint32_t find(T Val) const {
+      auto F = llvm::find(Map, Val);
+      assert(F != Map.end());
+      return F - Map.begin();
+    }
+  private:
+    std::vector<T> Map;
+  };
+
+  struct LaneMaskIndex : private IndexedSet<LaneBitmask> {
+    LaneMaskIndex() = default;
+
+    LaneBitmask getLaneMaskForIndex(uint32_t K) const {
+      return K == 0 ? LaneBitmask::getAll() : get(K);
+    }
+    uint32_t getIndexForLaneMask(LaneBitmask LM) {
+      assert(LM.any());
+      return LM.all() ? 0 : insert(LM);
+    }
+    uint32_t getIndexForLaneMask(LaneBitmask LM) const {
+      assert(LM.any());
+      return LM.all() ? 0 : find(LM);
+    }
+    PackedRegisterRef pack(RegisterRef RR) {
+      return { RR.Reg, getIndexForLaneMask(RR.Mask) };
+    }
+    PackedRegisterRef pack(RegisterRef RR) const {
+      return { RR.Reg, getIndexForLaneMask(RR.Mask) };
+    }
+    RegisterRef unpack(PackedRegisterRef PR) const {
+      return RegisterRef(PR.Reg, getLaneMaskForIndex(PR.MaskId));
+    }
+  };
+
+  struct RegisterAggr {
+    RegisterAggr(const TargetRegisterInfo &tri)
+        : Masks(), ExpAliasUnits(tri.getNumRegUnits()), CheckUnits(false),
+          TRI(tri) {}
+    RegisterAggr(const RegisterAggr &RG)
+        : Masks(RG.Masks), ExpAliasUnits(RG.ExpAliasUnits),
+          CheckUnits(RG.CheckUnits), TRI(RG.TRI) {}
+
+    bool empty() const { return Masks.empty(); }
+    bool hasAliasOf(RegisterRef RR) const;
+    bool hasCoverOf(RegisterRef RR) const;
+    static bool isCoverOf(RegisterRef RA, RegisterRef RB,
+                          const TargetRegisterInfo &TRI) {
+      return RegisterAggr(TRI).insert(RA).hasCoverOf(RB);
+    }
+
+    RegisterAggr &insert(RegisterRef RR);
+    RegisterAggr &insert(const RegisterAggr &RG);
+    RegisterAggr &clear(RegisterRef RR);
+    RegisterAggr &clear(const RegisterAggr &RG);
+
+    RegisterRef clearIn(RegisterRef RR) const;
+
+    void print(raw_ostream &OS) const;
+
+  private:
+    typedef std::unordered_map<RegisterId, LaneBitmask> MapType;
+
+  public:
+    typedef MapType::const_iterator iterator;
+    iterator begin() const { return Masks.begin(); }
+    iterator end() const { return Masks.end(); }
+    RegisterRef normalize(RegisterRef RR) const;
+
+  private:
+    MapType Masks;
+    BitVector ExpAliasUnits; // Register units for explicit aliases.
+    bool CheckUnits;
+    const TargetRegisterInfo &TRI;
+  };
+
 
   struct NodeBase {
   public:
@@ -454,7 +575,7 @@ namespace rdf {
       };
       union {
         MachineOperand *Op;   // Non-phi refs point to a machine operand.
-        RegisterRef RR;       // Phi refs store register info directly.
+        PackedRegisterRef PR; // Phi refs store register info directly.
       };
     };
 
@@ -475,13 +596,13 @@ namespace rdf {
 
   struct RefNode : public NodeBase {
     RefNode() = default;
-    RegisterRef getRegRef() const;
+    RegisterRef getRegRef(const DataFlowGraph &G) const;
     MachineOperand &getOp() {
       assert(!(getFlags() & NodeAttrs::PhiRef));
       return *Ref.Op;
     }
-    void setRegRef(RegisterRef RR);
-    void setRegRef(MachineOperand *Op);
+    void setRegRef(RegisterRef RR, DataFlowGraph &G);
+    void setRegRef(MachineOperand *Op, DataFlowGraph &G);
     NodeId getReachingDef() const {
       return Ref.RD;
     }
@@ -596,8 +717,7 @@ namespace rdf {
   struct DataFlowGraph {
     DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
         const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
-        const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
-        const TargetOperandInfo &toi);
+        const MachineDominanceFrontier &mdf, const TargetOperandInfo &toi);
 
     NodeBase *ptr(NodeId N) const;
     template <typename T> T ptr(NodeId N) const {
@@ -609,27 +729,12 @@ namespace rdf {
       return { ptr<T>(N), N };
     }
 
-    NodeAddr<FuncNode*> getFunc() const {
-      return Func;
-    }
-    MachineFunction &getMF() const {
-      return MF;
-    }
-    const TargetInstrInfo &getTII() const {
-      return TII;
-    }
-    const TargetRegisterInfo &getTRI() const {
-      return TRI;
-    }
-    const MachineDominatorTree &getDT() const {
-      return MDT;
-    }
-    const MachineDominanceFrontier &getDF() const {
-      return MDF;
-    }
-    const RegisterAliasInfo &getRAI() const {
-      return RAI;
-    }
+    NodeAddr<FuncNode*> getFunc() const { return Func; }
+    MachineFunction &getMF() const { return MF; }
+    const TargetInstrInfo &getTII() const { return TII; }
+    const TargetRegisterInfo &getTRI() const { return TRI; }
+    const MachineDominatorTree &getDT() const { return MDT; }
+    const MachineDominanceFrontier &getDF() const { return MDF; }
 
     struct DefStack {
       DefStack() = default;
@@ -679,13 +784,22 @@ namespace rdf {
       StorageType Stack;
     };
 
-    typedef std::map<RegisterRef,DefStack> DefStackMap;
+    // Make this std::unordered_map for speed of accessing elements.
+    // Map: Register (physical or virtual) -> DefStack
+    typedef std::unordered_map<RegisterId,DefStack> DefStackMap;
 
     void build(unsigned Options = BuildOptions::None);
     void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
     void markBlock(NodeId B, DefStackMap &DefM);
     void releaseBlock(NodeId B, DefStackMap &DefM);
 
+    PackedRegisterRef pack(RegisterRef RR)       { return LMI.pack(RR); }
+    PackedRegisterRef pack(RegisterRef RR) const { return LMI.pack(RR); }
+    RegisterRef unpack(PackedRegisterRef PR) const { return LMI.unpack(PR); }
+    RegisterRef makeRegRef(unsigned Reg, unsigned Sub) const;
+    RegisterRef normalizeRef(RegisterRef RR) const;
+    RegisterRef restrictRef(RegisterRef AR, RegisterRef BR) const;
+
     NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
         NodeAddr<RefNode*> RA) const;
     NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
@@ -734,10 +848,20 @@ namespace rdf {
       return BA.Addr->getType() == NodeAttrs::Code &&
              BA.Addr->getKind() == NodeAttrs::Phi;
     }
+    static bool IsPreservingDef(const NodeAddr<DefNode*> DA) {
+      uint16_t Flags = DA.Addr->getFlags();
+      return (Flags & NodeAttrs::Preserving) && !(Flags & NodeAttrs::Undef);
+    }
+
+    // Register aliasing.
+    bool alias(RegisterRef RA, RegisterRef RB) const;
 
   private:
     void reset();
 
+    RegisterSet getAliasSet(RegisterId Reg) const;
+    RegisterSet getLandingPadLiveIns() const;
+
     NodeAddr<NodeBase*> newNode(uint16_t Attrs);
     NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
     NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
@@ -783,16 +907,22 @@ namespace rdf {
       IA.Addr->removeMember(RA, *this);
     }
 
-    TimerGroup TimeG;
+    NodeAddr<BlockNode*> findBlock(MachineBasicBlock *BB) {
+      return BlockNodes[BB];
+    }
+
     NodeAddr<FuncNode*> Func;
     NodeAllocator Memory;
+    // Local map:  MachineBasicBlock -> NodeAddr<BlockNode*>
+    std::map<MachineBasicBlock*,NodeAddr<BlockNode*>> BlockNodes;
+    // Lane mask map.
+    LaneMaskIndex LMI;
 
     MachineFunction &MF;
     const TargetInstrInfo &TII;
     const TargetRegisterInfo &TRI;
     const MachineDominatorTree &MDT;
     const MachineDominanceFrontier &MDF;
-    const RegisterAliasInfo &RAI;
     const TargetOperandInfo &TOI;
   };  // struct DataFlowGraph
 
@@ -806,7 +936,7 @@ namespace rdf {
     while (NA.Addr != this) {
       if (NA.Addr->getType() == NodeAttrs::Ref) {
         NodeAddr<RefNode*> RA = NA;
-        if (RA.Addr->getRegRef() == RR && P(NA))
+        if (RA.Addr->getRegRef(G) == RR && P(NA))
           return NA;
         if (NextOnly)
           break;
@@ -838,6 +968,13 @@ namespace rdf {
   }
 
 
+  // Optionally print the lane mask, if it is not ~0.
+  struct PrintLaneMaskOpt {
+    PrintLaneMaskOpt(LaneBitmask M) : Mask(M) {}
+    LaneBitmask Mask;
+  };
+  raw_ostream &operator<< (raw_ostream &OS, const PrintLaneMaskOpt &P);
+
   template <typename T> struct Print;
   template <typename T>
   raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P);
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
index 641f01423176..e74c4bfc1645 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.cpp
@@ -41,10 +41,10 @@ namespace rdf {
   template<>
   raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
     OS << '{';
-    for (auto I : P.Obj) {
-      OS << ' ' << Print<RegisterRef>(I.first, P.G) << '{';
+    for (auto &I : P.Obj) {
+      OS << ' ' << PrintReg(I.first, &P.G.getTRI()) << '{';
       for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
-        OS << Print<NodeId>(*J, P.G);
+        OS << Print<NodeId>(J->first, P.G) << PrintLaneMaskOpt(J->second);
         if (++J != E)
           OS << ',';
       }
@@ -85,10 +85,19 @@ namespace rdf {
 // the data-flow.
 
 NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
-      NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) {
+      NodeAddr<RefNode*> RefA, bool FullChain, const RegisterAggr &DefRRs) {
+  NodeList RDefs; // Return value.
   SetVector<NodeId> DefQ;
   SetVector<NodeId> Owners;
 
+  // Dead defs will be treated as if they were live, since they are actually
+  // on the data-flow path. They cannot be ignored because even though they
+  // do not generate meaningful values, they still modify registers.
+
+  // If the reference is undefined, there is nothing to do.
+  if (RefA.Addr->getFlags() & NodeAttrs::Undef)
+    return RDefs;
+
   // The initial queue should not have reaching defs for shadows. The
   // whole point of a shadow is that it will have a reaching def that
   // is not aliased to the reaching defs of the related shadows.
@@ -108,26 +117,24 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
     if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
       continue;
     // Stop at the covering/overwriting def of the initial register reference.
-    RegisterRef RR = TA.Addr->getRegRef();
-    if (RAI.covers(RR, RefRR)) {
-      uint16_t Flags = TA.Addr->getFlags();
-      if (!(Flags & NodeAttrs::Preserving))
+    RegisterRef RR = TA.Addr->getRegRef(DFG);
+    if (!DFG.IsPreservingDef(TA))
+      if (RegisterAggr::isCoverOf(RR, RefRR, TRI))
         continue;
-    }
     // Get the next level of reaching defs. This will include multiple
     // reaching defs for shadows.
     for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
-      if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+      if (NodeId RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
         DefQ.insert(RD);
   }
 
   // Remove all non-phi defs that are not aliased to RefRR, and collect
   // the owners of the remaining defs.
   SetVector<NodeId> Defs;
-  for (auto N : DefQ) {
+  for (NodeId N : DefQ) {
     auto TA = DFG.addr<DefNode*>(N);
     bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
-    if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef()))
+    if (!IsPhi && !DFG.alias(RefRR, TA.Addr->getRegRef(DFG)))
       continue;
     Defs.insert(TA.Id);
     Owners.insert(TA.Addr->getOwner(DFG).Id);
@@ -156,8 +163,8 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
     if (StmtA) {
       if (!StmtB)   // OB is a phi and phis dominate statements.
         return true;
-      auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
-      auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+      MachineInstr *CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
+      MachineInstr *CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
       // The order must be linear, so tie-break such equalities.
       if (CA == CB)
         return A < B;
@@ -189,21 +196,20 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
   //                     covered if we added A first, and A would be covered
   //                     if we added B first.
 
-  NodeList RDefs;
-  RegisterSet RRs = DefRRs;
+  RegisterAggr RRs(DefRRs);
 
   auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
     return TA.Addr->getKind() == NodeAttrs::Def &&
            Defs.count(TA.Id);
   };
-  for (auto T : Tmp) {
-    if (!FullChain && RAI.covers(RRs, RefRR))
+  for (NodeId T : Tmp) {
+    if (!FullChain && RRs.hasCoverOf(RefRR))
       break;
     auto TA = DFG.addr<InstrNode*>(T);
     bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
     NodeList Ds;
     for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
-      auto QR = DA.Addr->getRegRef();
+      RegisterRef QR = DA.Addr->getRegRef(DFG);
       // Add phi defs even if they are covered by subsequent defs. This is
       // for cases where the reached use is not covered by any of the defs
       // encountered so far: the phi def is needed to expose the liveness
@@ -212,7 +218,7 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
       //   phi d1<R3>(,d2,), ...  Phi def d1 is covered by d2.
       //   d2<R3>(d1,,u3), ...
       //   ..., u3<D1>(d2)        This use needs to be live on entry.
-      if (FullChain || IsPhi || !RAI.covers(RRs, QR))
+      if (FullChain || IsPhi || !RRs.hasCoverOf(QR))
         Ds.push_back(DA);
     }
     RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
@@ -221,19 +227,17 @@ NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
       // defs to actually define a register.
       uint16_t Flags = DA.Addr->getFlags();
       if (!FullChain || !(Flags & NodeAttrs::PhiRef))
-        if (!(Flags & NodeAttrs::Preserving))
-          RRs.insert(DA.Addr->getRegRef());
+        if (!(Flags & NodeAttrs::Preserving)) // Don't care about Undef here.
+          RRs.insert(DA.Addr->getRegRef(DFG));
     }
   }
 
-  return RDefs;
-}
-
-
-static const RegisterSet NoRegs;
+  auto DeadP = [](const NodeAddr<DefNode*> DA) -> bool {
+    return DA.Addr->getFlags() & NodeAttrs::Dead;
+  };
+  RDefs.resize(std::distance(RDefs.begin(), remove_if(RDefs, DeadP)));
 
-NodeList Liveness::getAllReachingDefs(NodeAddr<RefNode*> RefA) {
-  return getAllReachingDefs(RefA.Addr->getRegRef(), RefA, false, NoRegs);
+  return RDefs;
 }
 
 
@@ -241,20 +245,20 @@ NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR,
       NodeAddr<RefNode*> RefA, NodeSet &Visited, const NodeSet &Defs) {
   // Collect all defined registers. Do not consider phis to be defining
   // anything, only collect "real" definitions.
-  RegisterSet DefRRs;
-  for (const auto D : Defs) {
+  RegisterAggr DefRRs(TRI);
+  for (NodeId D : Defs) {
     const auto DA = DFG.addr<const DefNode*>(D);
     if (!(DA.Addr->getFlags() & NodeAttrs::PhiRef))
-      DefRRs.insert(DA.Addr->getRegRef());
+      DefRRs.insert(DA.Addr->getRegRef(DFG));
   }
 
-  auto RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs);
+  NodeList RDs = getAllReachingDefs(RefRR, RefA, true, DefRRs);
   if (RDs.empty())
     return Defs;
 
   // Make a copy of the preexisting definitions and add the newly found ones.
   NodeSet TmpDefs = Defs;
-  for (auto R : RDs)
+  for (NodeAddr<NodeBase*> R : RDs)
     TmpDefs.insert(R.Id);
 
   NodeSet Result = Defs;
@@ -279,39 +283,43 @@ NodeSet Liveness::getAllReachingDefsRec(RegisterRef RefRR,
 
 
 NodeSet Liveness::getAllReachedUses(RegisterRef RefRR,
-      NodeAddr<DefNode*> DefA, const RegisterSet &DefRRs) {
+      NodeAddr<DefNode*> DefA, const RegisterAggr &DefRRs) {
   NodeSet Uses;
 
   // If the original register is already covered by all the intervening
   // defs, no more uses can be reached.
-  if (RAI.covers(DefRRs, RefRR))
+  if (DefRRs.hasCoverOf(RefRR))
     return Uses;
 
   // Add all directly reached uses.
-  NodeId U = DefA.Addr->getReachedUse();
+  // If the def is dead, it does not provide a value for any use.
+  bool IsDead = DefA.Addr->getFlags() & NodeAttrs::Dead;
+  NodeId U = !IsDead ? DefA.Addr->getReachedUse() : 0;
   while (U != 0) {
     auto UA = DFG.addr<UseNode*>(U);
-    auto UR = UA.Addr->getRegRef();
-    if (RAI.alias(RefRR, UR) && !RAI.covers(DefRRs, UR))
-      Uses.insert(U);
+    if (!(UA.Addr->getFlags() & NodeAttrs::Undef)) {
+      RegisterRef UR = UA.Addr->getRegRef(DFG);
+      if (DFG.alias(RefRR, UR) && !DefRRs.hasCoverOf(UR))
+        Uses.insert(U);
+    }
     U = UA.Addr->getSibling();
   }
 
-  // Traverse all reached defs.
+  // Traverse all reached defs. This time dead defs cannot be ignored.
   for (NodeId D = DefA.Addr->getReachedDef(), NextD; D != 0; D = NextD) {
     auto DA = DFG.addr<DefNode*>(D);
     NextD = DA.Addr->getSibling();
-    auto DR = DA.Addr->getRegRef();
+    RegisterRef DR = DA.Addr->getRegRef(DFG);
     // If this def is already covered, it cannot reach anything new.
     // Similarly, skip it if it is not aliased to the interesting register.
-    if (RAI.covers(DefRRs, DR) || !RAI.alias(RefRR, DR))
+    if (DefRRs.hasCoverOf(DR) || !DFG.alias(RefRR, DR))
       continue;
     NodeSet T;
-    if (DA.Addr->getFlags() & NodeAttrs::Preserving) {
+    if (DFG.IsPreservingDef(DA)) {
       // If it is a preserving def, do not update the set of intervening defs.
       T = getAllReachedUses(RefRR, DA, DefRRs);
     } else {
-      RegisterSet NewDefRRs = DefRRs;
+      RegisterAggr NewDefRRs = DefRRs;
       NewDefRRs.insert(DR);
       T = getAllReachedUses(RefRR, DA, NewDefRRs);
     }
@@ -326,42 +334,57 @@ void Liveness::computePhiInfo() {
 
   NodeList Phis;
   NodeAddr<FuncNode*> FA = DFG.getFunc();
-  auto Blocks = FA.Addr->members(DFG);
+  NodeList Blocks = FA.Addr->members(DFG);
   for (NodeAddr<BlockNode*> BA : Blocks) {
     auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
     Phis.insert(Phis.end(), Ps.begin(), Ps.end());
   }
 
   // phi use -> (map: reaching phi -> set of registers defined in between)
-  std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp;
+  std::map<NodeId,std::map<NodeId,RegisterAggr>> PhiUp;
   std::vector<NodeId> PhiUQ;  // Work list of phis for upward propagation.
 
   // Go over all phis.
   for (NodeAddr<PhiNode*> PhiA : Phis) {
     // Go over all defs and collect the reached uses that are non-phi uses
     // (i.e. the "real uses").
-    auto &RealUses = RealUseMap[PhiA.Id];
-    auto PhiRefs = PhiA.Addr->members(DFG);
+    RefMap &RealUses = RealUseMap[PhiA.Id];
+    NodeList PhiRefs = PhiA.Addr->members(DFG);
 
     // Have a work queue of defs whose reached uses need to be found.
     // For each def, add to the queue all reached (non-phi) defs.
     SetVector<NodeId> DefQ;
     NodeSet PhiDefs;
-    for (auto R : PhiRefs) {
+    for (NodeAddr<RefNode*> R : PhiRefs) {
       if (!DFG.IsRef<NodeAttrs::Def>(R))
         continue;
       DefQ.insert(R.Id);
       PhiDefs.insert(R.Id);
     }
+
+    // Collect the super-set of all possible reached uses. This set will
+    // contain all uses reached from this phi, either directly from the
+    // phi defs, or (recursively) via non-phi defs reached by the phi defs.
+    // This set of uses will later be trimmed to only contain these uses that
+    // are actually reached by the phi defs.
     for (unsigned i = 0; i < DefQ.size(); ++i) {
       NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
-      NodeId UN = DA.Addr->getReachedUse();
+      // Visit all reached uses. Phi defs should not really have the "dead"
+      // flag set, but check it anyway for consistency.
+      bool IsDead = DA.Addr->getFlags() & NodeAttrs::Dead;
+      NodeId UN = !IsDead ? DA.Addr->getReachedUse() : 0;
       while (UN != 0) {
         NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
-        if (!(A.Addr->getFlags() & NodeAttrs::PhiRef))
-          RealUses[getRestrictedRegRef(A)].insert(A.Id);
+        uint16_t F = A.Addr->getFlags();
+        if ((F & (NodeAttrs::Undef | NodeAttrs::PhiRef)) == 0) {
+	  RegisterRef R = DFG.normalizeRef(getRestrictedRegRef(A));
+          RealUses[R.Reg].insert({A.Id,R.Mask});
+	}
         UN = A.Addr->getSibling();
       }
+      // Visit all reached defs, and add them to the queue. These defs may
+      // override some of the uses collected here, but that will be handled
+      // later.
       NodeId DN = DA.Addr->getReachedDef();
       while (DN != 0) {
         NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
@@ -388,7 +411,7 @@ void Liveness::computePhiInfo() {
     //      = R1:0     u6     Not reached by d1 (covered collectively
     //                        by d3 and d5), but following reached
     //                        defs and uses from d1 will lead here.
-    auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool {
+    auto InPhiDefs = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool {
       return PhiDefs.count(DA.Id);
     };
     for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
@@ -396,11 +419,14 @@ void Liveness::computePhiInfo() {
       // uses of it. For each such use, check if it is reached by this phi,
       // i.e. check if the set of its reaching uses intersects the set of
       // this phi's defs.
-      auto &Uses = UI->second;
+      NodeRefSet &Uses = UI->second;
       for (auto I = Uses.begin(), E = Uses.end(); I != E; ) {
-        auto UA = DFG.addr<UseNode*>(*I);
-        NodeList RDs = getAllReachingDefs(UI->first, UA);
-        if (std::any_of(RDs.begin(), RDs.end(), HasDef))
+        auto UA = DFG.addr<UseNode*>(I->first);
+        // Undef flag is checked above.
+        assert((UA.Addr->getFlags() & NodeAttrs::Undef) == 0);
+	RegisterRef R(UI->first, I->second);
+        NodeList RDs = getAllReachingDefs(R, UA);
+        if (any_of(RDs, InPhiDefs))
           ++I;
         else
           I = Uses.erase(I);
@@ -418,31 +444,50 @@ void Liveness::computePhiInfo() {
 
     // Go over all phi uses and check if the reaching def is another phi.
     // Collect the phis that are among the reaching defs of these uses.
-    // While traversing the list of reaching defs for each phi use, collect
-    // the set of registers defined between this phi (Phi) and the owner phi
+    // While traversing the list of reaching defs for each phi use, accumulate
+    // the set of registers defined between this phi (PhiA) and the owner phi
     // of the reaching def.
+    NodeSet SeenUses;
+
     for (auto I : PhiRefs) {
-      if (!DFG.IsRef<NodeAttrs::Use>(I))
+      if (!DFG.IsRef<NodeAttrs::Use>(I) || SeenUses.count(I.Id))
         continue;
       NodeAddr<UseNode*> UA = I;
-      auto &UpMap = PhiUp[UA.Id];
-      RegisterSet DefRRs;
-      for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) {
-        if (DA.Addr->getFlags() & NodeAttrs::PhiRef)
-          UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs;
-        else
-          DefRRs.insert(DA.Addr->getRegRef());
+
+      // Given a phi use UA, traverse all related phi uses (including UA).
+      // The related phi uses may reach different phi nodes or may reach the
+      // same phi node. If multiple uses reach the same phi P, the intervening
+      // defs must be accumulated for all such uses. To group all such uses
+      // into one set, map their node ids to the first use id that reaches P.
+      std::map<NodeId,NodeId> FirstUse; // Phi reached up -> first phi use.
+
+      for (NodeAddr<UseNode*> VA : DFG.getRelatedRefs(PhiA, UA)) {
+        SeenUses.insert(VA.Id);
+        RegisterAggr DefRRs(TRI);
+        for (NodeAddr<DefNode*> DA : getAllReachingDefs(VA)) {
+          if (DA.Addr->getFlags() & NodeAttrs::PhiRef) {
+            NodeId RP = DA.Addr->getOwner(DFG).Id;
+            NodeId FU = FirstUse.insert({RP,VA.Id}).first->second;
+            std::map<NodeId,RegisterAggr> &M = PhiUp[FU];
+            auto F = M.find(RP);
+            if (F == M.end())
+              M.insert(std::make_pair(RP, DefRRs));
+            else
+              F->second.insert(DefRRs);
+          }
+          DefRRs.insert(DA.Addr->getRegRef(DFG));
+        }
       }
     }
   }
 
   if (Trace) {
-    dbgs() << "Phi-up-to-phi map:\n";
+    dbgs() << "Phi-up-to-phi map with intervening defs:\n";
     for (auto I : PhiUp) {
       dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {";
       for (auto R : I.second)
         dbgs() << ' ' << Print<NodeId>(R.first, DFG)
-               << Print<RegisterSet>(R.second, DFG);
+               << Print<RegisterAggr>(R.second, DFG);
       dbgs() << " }\n";
     }
   }
@@ -467,40 +512,50 @@ void Liveness::computePhiInfo() {
   //
   // When propagating uses up the phi chains, get the all reaching defs
   // for a given phi use, and traverse the list until the propagated ref
-  // is covered, or until or until reaching the final phi. Only assume
-  // that the reference reaches the phi in the latter case.
+  // is covered, or until reaching the final phi. Only assume that the
+  // reference reaches the phi in the latter case.
 
   for (unsigned i = 0; i < PhiUQ.size(); ++i) {
     auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
-    auto &RealUses = RealUseMap[PA.Id];
-    for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
-      NodeAddr<UseNode*> UA = U;
-      auto &UpPhis = PhiUp[UA.Id];
-      for (auto UP : UpPhis) {
+    NodeList PUs = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG);
+    RefMap &RUM = RealUseMap[PA.Id];
+
+    for (NodeAddr<UseNode*> UA : PUs) {
+      std::map<NodeId,RegisterAggr> &PUM = PhiUp[UA.Id];
+      RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(UA));
+      for (const std::pair<NodeId,RegisterAggr> &P : PUM) {
         bool Changed = false;
-        auto &MidDefs = UP.second;
-        // Collect the set UpReached of uses that are reached by the current
-        // phi PA, and are not covered by any intervening def between PA and
-        // the upward phi UP.
-        RegisterSet UpReached;
-        for (auto T : RealUses) {
-          if (!isRestricted(PA, UA, T.first))
-            continue;
-          if (!RAI.covers(MidDefs, T.first))
-            UpReached.insert(T.first);
-        }
-        if (UpReached.empty())
+        const RegisterAggr &MidDefs = P.second;
+
+        // Collect the set PropUp of uses that are reached by the current
+        // phi PA, and are not covered by any intervening def between the
+        // currently visited use UA and the the upward phi P.
+
+        if (MidDefs.hasCoverOf(UR))
           continue;
-        // Update the set PRUs of real uses reached by the upward phi UP with
-        // the actual set of uses (UpReached) that the UP phi reaches.
-        auto &PRUs = RealUseMap[UP.first];
-        for (auto R : UpReached) {
-          unsigned Z = PRUs[R].size();
-          PRUs[R].insert(RealUses[R].begin(), RealUses[R].end());
-          Changed |= (PRUs[R].size() != Z);
+
+        // General algorithm:
+        //   for each (R,U) : U is use node of R, U is reached by PA
+        //     if MidDefs does not cover (R,U)
+        //       then add (R-MidDefs,U) to RealUseMap[P]
+        //
+        for (const std::pair<RegisterId,NodeRefSet> &T : RUM) {
+          RegisterRef R = DFG.restrictRef(RegisterRef(T.first), UR);
+          if (!R)
+            continue;
+          for (std::pair<NodeId,LaneBitmask> V : T.second) {
+            RegisterRef S = DFG.restrictRef(RegisterRef(R.Reg, V.second), R);
+            if (!S)
+              continue;
+            if (RegisterRef SS = MidDefs.clearIn(S)) {
+              NodeRefSet &RS = RealUseMap[P.first][SS.Reg];
+              Changed |= RS.insert({V.first,SS.Mask}).second;
+            }
+          }
         }
+
         if (Changed)
-          PhiUQ.push_back(UP.first);
+          PhiUQ.push_back(P.first);
       }
     }
   }
@@ -512,7 +567,7 @@ void Liveness::computePhiInfo() {
       NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
       NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
       if (!Ds.empty()) {
-        RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef();
+        RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef(DFG);
         dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>';
       } else {
         dbgs() << "<noreg>";
@@ -540,7 +595,7 @@ void Liveness::computeLiveIns() {
 
   // Compute IDF first, then the inverse.
   decltype(IIDF) IDF;
-  for (auto &B : MF) {
+  for (MachineBasicBlock &B : MF) {
     auto F1 = MDF.find(&B);
     if (F1 == MDF.end())
       continue;
@@ -562,20 +617,20 @@ void Liveness::computeLiveIns() {
   computePhiInfo();
 
   NodeAddr<FuncNode*> FA = DFG.getFunc();
-  auto Blocks = FA.Addr->members(DFG);
+  NodeList Blocks = FA.Addr->members(DFG);
 
   // Build the phi live-on-entry map.
   for (NodeAddr<BlockNode*> BA : Blocks) {
     MachineBasicBlock *MB = BA.Addr->getCode();
-    auto &LON = PhiLON[MB];
+    RefMap &LON = PhiLON[MB];
     for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
-      for (auto S : RealUseMap[P.Id])
+      for (const RefMap::value_type &S : RealUseMap[P.Id])
         LON[S.first].insert(S.second.begin(), S.second.end());
   }
 
   if (Trace) {
     dbgs() << "Phi live-on-entry map:\n";
-    for (auto I : PhiLON)
+    for (auto &I : PhiLON)
       dbgs() << "block #" << I.first->getNumber() << " -> "
              << Print<RefMap>(I.second, DFG) << '\n';
   }
@@ -584,33 +639,35 @@ void Liveness::computeLiveIns() {
   // "real" uses. Propagate this set backwards into the block predecessors
   // through the reaching defs of the corresponding phi uses.
   for (NodeAddr<BlockNode*> BA : Blocks) {
-    auto Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+    NodeList Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
     for (NodeAddr<PhiNode*> PA : Phis) {
-      auto &RUs = RealUseMap[PA.Id];
+      RefMap &RUs = RealUseMap[PA.Id];
       if (RUs.empty())
         continue;
 
       for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
-        NodeAddr<PhiUseNode*> UA = U;
-        if (UA.Addr->getReachingDef() == 0)
+        NodeAddr<PhiUseNode*> PUA = U;
+        if (PUA.Addr->getReachingDef() == 0)
           continue;
 
         // Mark all reached "real" uses of P as live on exit in the
         // predecessor.
         // Remap all the RUs so that they have a correct reaching def.
-        auto PrA = DFG.addr<BlockNode*>(UA.Addr->getPredecessor());
-        auto &LOX = PhiLOX[PrA.Addr->getCode()];
-        for (auto R : RUs) {
-          RegisterRef RR = R.first;
-          if (!isRestricted(PA, UA, RR))
-            RR = getRestrictedRegRef(UA);
-          // The restricted ref may be different from the ref that was
-          // accessed in the "real use". This means that this phi use
-          // is not the one that carries this reference, so skip it.
-          if (!RAI.alias(R.first, RR))
+        auto PrA = DFG.addr<BlockNode*>(PUA.Addr->getPredecessor());
+        RefMap &LOX = PhiLOX[PrA.Addr->getCode()];
+
+        RegisterRef UR = DFG.normalizeRef(getRestrictedRegRef(PUA));
+        for (const std::pair<RegisterId,NodeRefSet> &T : RUs) {
+          // Check if T.first aliases UR?
+          LaneBitmask M;
+          for (std::pair<NodeId,LaneBitmask> P : T.second)
+            M |= P.second;
+
+          RegisterRef S = DFG.restrictRef(RegisterRef(T.first, M), UR);
+          if (!S)
             continue;
-          for (auto D : getAllReachingDefs(RR, UA))
-            LOX[RR].insert(D.Id);
+          for (NodeAddr<DefNode*> D : getAllReachingDefs(S, PUA))
+            LOX[S.Reg].insert({D.Id, S.Mask});
         }
       }  // for U : phi uses
     }  // for P : Phis
@@ -618,7 +675,7 @@ void Liveness::computeLiveIns() {
 
   if (Trace) {
     dbgs() << "Phi live-on-exit map:\n";
-    for (auto I : PhiLOX)
+    for (auto &I : PhiLOX)
       dbgs() << "block #" << I.first->getNumber() << " -> "
              << Print<RefMap>(I.second, DFG) << '\n';
   }
@@ -629,19 +686,41 @@ void Liveness::computeLiveIns() {
   // Add function live-ins to the live-in set of the function entry block.
   auto &EntryIn = LiveMap[&MF.front()];
   for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
-    EntryIn.insert({I->first,0});
+    EntryIn.insert(RegisterRef(I->first));
 
   if (Trace) {
     // Dump the liveness map
-    for (auto &B : MF) {
-      BitVector LV(TRI.getNumRegs());
+    for (MachineBasicBlock &B : MF) {
+      std::vector<RegisterRef> LV;
       for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
-        LV.set(I->PhysReg);
+        LV.push_back(RegisterRef(I->PhysReg, I->LaneMask));
+      std::sort(LV.begin(), LV.end());
       dbgs() << "BB#" << B.getNumber() << "\t rec = {";
-      for (int x = LV.find_first(); x >= 0; x = LV.find_next(x))
-        dbgs() << ' ' << Print<RegisterRef>({unsigned(x),0}, DFG);
+      for (auto I : LV)
+        dbgs() << ' ' << Print<RegisterRef>(I, DFG);
       dbgs() << " }\n";
-      dbgs() << "\tcomp = " << Print<RegisterSet>(LiveMap[&B], DFG) << '\n';
+      //dbgs() << "\tcomp = " << Print<RegisterAggr>(LiveMap[&B], DFG) << '\n';
+
+      LV.clear();
+      for (std::pair<RegisterId,LaneBitmask> P : LiveMap[&B]) {
+        MCSubRegIndexIterator S(P.first, &TRI);
+        if (!S.isValid()) {
+          LV.push_back(RegisterRef(P.first));
+          continue;
+        }
+        do {
+          LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
+          if ((M & P.second).any())
+            LV.push_back(RegisterRef(S.getSubReg()));
+          ++S;
+        } while (S.isValid());
+      }
+      std::sort(LV.begin(), LV.end());
+      dbgs() << "\tcomp = {";
+      for (auto I : LV)
+        dbgs() << ' ' << Print<RegisterRef>(I, DFG);
+      dbgs() << " }\n";
+
     }
   }
 }
@@ -658,8 +737,7 @@ void Liveness::resetLiveIns() {
     // Add the newly computed live-ins.
     auto &LiveIns = LiveMap[&B];
     for (auto I : LiveIns) {
-      assert(I.Sub == 0);
-      B.addLiveIn(I.Reg);
+      B.addLiveIn({MCPhysReg(I.first), I.second});
     }
   }
 }
@@ -672,9 +750,20 @@ void Liveness::resetKills() {
 
 
 void Liveness::resetKills(MachineBasicBlock *B) {
-  auto CopyLiveIns = [] (MachineBasicBlock *B, BitVector &LV) -> void {
-    for (auto I = B->livein_begin(), E = B->livein_end(); I != E; ++I)
-      LV.set(I->PhysReg);
+  auto CopyLiveIns = [this] (MachineBasicBlock *B, BitVector &LV) -> void {
+    for (auto I : B->liveins()) {
+      MCSubRegIndexIterator S(I.PhysReg, &TRI);
+      if (!S.isValid()) {
+        LV.set(I.PhysReg);
+        continue;
+      }
+      do {
+        LaneBitmask M = TRI.getSubRegIndexLaneMask(S.getSubRegIndex());
+        if ((M & I.LaneMask).any())
+          LV.set(S.getSubReg());
+        ++S;
+      } while (S.isValid());
+    }
   };
 
   BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
@@ -724,26 +813,6 @@ void Liveness::resetKills(MachineBasicBlock *B) {
 }
 
 
-// For shadows, determine if RR is aliased to a reaching def of any other
-// shadow associated with RA. If it is not, then RR is "restricted" to RA,
-// and so it can be considered a value specific to RA. This is important
-// for accurately determining values associated with phi uses.
-// For non-shadows, this function returns "true".
-bool Liveness::isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
-      RegisterRef RR) const {
-  NodeId Start = RA.Id;
-  for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA);
-       TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) {
-    NodeId RD = TA.Addr->getReachingDef();
-    if (RD == 0)
-      continue;
-    if (RAI.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef()))
-      return false;
-  }
-  return true;
-}
-
-
 RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
   assert(DFG.IsRef<NodeAttrs::Use>(RA));
   if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
@@ -751,14 +820,7 @@ RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
     assert(RD);
     RA = DFG.addr<DefNode*>(RD);
   }
-  return RA.Addr->getRegRef();
-}
-
-
-unsigned Liveness::getPhysReg(RegisterRef RR) const {
-  if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg))
-    return 0;
-  return RR.Sub ? TRI.getSubReg(RR.Reg, RR.Sub) : RR.Reg;
+  return RA.Addr->getRegRef(DFG);
 }
 
 
@@ -808,77 +870,99 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
   }
 
   if (Trace) {
-    dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber()
-           << " after recursion into";
+    dbgs() << "\n-- BB#" << B->getNumber() << ": " << __func__
+           << " after recursion into: {";
     for (auto I : *N)
       dbgs() << ' ' << I->getBlock()->getNumber();
-    dbgs() << "\n  LiveIn: " << Print<RefMap>(LiveIn, DFG);
-    dbgs() << "\n  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+    dbgs() << " }\n";
+    dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
   }
 
-  // Add phi uses that are live on exit from this block.
+  // Add reaching defs of phi uses that are live on exit from this block.
   RefMap &PUs = PhiLOX[B];
-  for (auto S : PUs)
+  for (auto &S : PUs)
     LiveIn[S.first].insert(S.second.begin(), S.second.end());
 
   if (Trace) {
     dbgs() << "after LOX\n";
     dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
-    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
   }
 
-  // Stop tracking all uses defined in this block: erase those records
-  // where the reaching def is located in B and which cover all reached
-  // uses.
-  auto Copy = LiveIn;
+  // The LiveIn map at this point has all defs that are live-on-exit from B,
+  // as if they were live-on-entry to B. First, we need to filter out all
+  // defs that are present in this block. Then we will add reaching defs of
+  // all upward-exposed uses.
+
+  // To filter out the defs, first make a copy of LiveIn, and then re-populate
+  // LiveIn with the defs that should remain.
+  RefMap LiveInCopy = LiveIn;
   LiveIn.clear();
 
-  for (auto I : Copy) {
-    auto &Defs = LiveIn[I.first];
-    NodeSet Rest;
-    for (auto R : I.second) {
-      auto DA = DFG.addr<DefNode*>(R);
-      RegisterRef DDR = DA.Addr->getRegRef();
+  for (const std::pair<RegisterId,NodeRefSet> &LE : LiveInCopy) {
+    RegisterRef LRef(LE.first);
+    NodeRefSet &NewDefs = LiveIn[LRef.Reg]; // To be filled.
+    const NodeRefSet &OldDefs = LE.second;
+    for (NodeRef OR : OldDefs) {
+      // R is a def node that was live-on-exit
+      auto DA = DFG.addr<DefNode*>(OR.first);
       NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
       NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
-      // Defs from a different block need to be preserved. Defs from this
-      // block will need to be processed further, except for phi defs, the
-      // liveness of which is handled through the PhiLON/PhiLOX maps.
-      if (B != BA.Addr->getCode())
-        Defs.insert(R);
-      else {
-        bool IsPreserving = DA.Addr->getFlags() & NodeAttrs::Preserving;
-        if (IA.Addr->getKind() != NodeAttrs::Phi && !IsPreserving) {
-          bool Covering = RAI.covers(DDR, I.first);
-          NodeId U = DA.Addr->getReachedUse();
-          while (U && Covering) {
-            auto DUA = DFG.addr<UseNode*>(U);
-            RegisterRef Q = DUA.Addr->getRegRef();
-            Covering = RAI.covers(DA.Addr->getRegRef(), Q);
-            U = DUA.Addr->getSibling();
-          }
-          if (!Covering)
-            Rest.insert(R);
-        }
+      if (B != BA.Addr->getCode()) {
+        // Defs from a different block need to be preserved. Defs from this
+        // block will need to be processed further, except for phi defs, the
+        // liveness of which is handled through the PhiLON/PhiLOX maps.
+        NewDefs.insert(OR);
+        continue;
+      }
+
+      // Defs from this block need to stop the liveness from being
+      // propagated upwards. This only applies to non-preserving defs,
+      // and to the parts of the register actually covered by those defs.
+      // (Note that phi defs should always be preserving.)
+      RegisterAggr RRs(TRI);
+      LRef.Mask = OR.second;
+
+      if (!DFG.IsPreservingDef(DA)) {
+        assert(!(IA.Addr->getFlags() & NodeAttrs::Phi));
+        // DA is a non-phi def that is live-on-exit from this block, and
+        // that is also located in this block. LRef is a register ref
+        // whose use this def reaches. If DA covers LRef, then no part
+        // of LRef is exposed upwards.A
+        if (RRs.insert(DA.Addr->getRegRef(DFG)).hasCoverOf(LRef))
+          continue;
       }
-    }
 
-    // Non-covering defs from B.
-    for (auto R : Rest) {
-      auto DA = DFG.addr<DefNode*>(R);
-      RegisterRef DRR = DA.Addr->getRegRef();
-      RegisterSet RRs;
+      // DA itself was not sufficient to cover LRef. In general, it is
+      // the last in a chain of aliased defs before the exit from this block.
+      // There could be other defs in this block that are a part of that
+      // chain. Check that now: accumulate the registers from these defs,
+      // and if they all together cover LRef, it is not live-on-entry.
       for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
-        NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
-        NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
-        // Preserving defs do not count towards covering.
+        // DefNode -> InstrNode -> BlockNode.
+        NodeAddr<InstrNode*> ITA = TA.Addr->getOwner(DFG);
+        NodeAddr<BlockNode*> BTA = ITA.Addr->getOwner(DFG);
+        // Reaching defs are ordered in the upward direction.
+        if (BTA.Addr->getCode() != B) {
+          // We have reached past the beginning of B, and the accumulated
+          // registers are not covering LRef. The first def from the
+          // upward chain will be live.
+          // Subtract all accumulated defs (RRs) from LRef.
+          RegisterAggr L(TRI);
+          L.insert(LRef).clear(RRs);
+          assert(!L.empty());
+          NewDefs.insert({TA.Id,L.begin()->second});
+          break;
+        }
+
+        // TA is in B. Only add this def to the accumulated cover if it is
+        // not preserving.
         if (!(TA.Addr->getFlags() & NodeAttrs::Preserving))
-          RRs.insert(TA.Addr->getRegRef());
-        if (BA.Addr->getCode() == B)
-          continue;
-        if (RAI.covers(RRs, DRR))
+          RRs.insert(TA.Addr->getRegRef(DFG));
+        // If this is enough to cover LRef, then stop.
+        if (RRs.hasCoverOf(LRef))
           break;
-        Defs.insert(TA.Id);
       }
     }
   }
@@ -888,7 +972,7 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
   if (Trace) {
     dbgs() << "after defs in block\n";
     dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
-    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
   }
 
   // Scan the block for upward-exposed uses and add them to the tracking set.
@@ -897,38 +981,44 @@ void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
     if (IA.Addr->getKind() != NodeAttrs::Stmt)
       continue;
     for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
-      RegisterRef RR = UA.Addr->getRegRef();
-      for (auto D : getAllReachingDefs(UA))
+      if (UA.Addr->getFlags() & NodeAttrs::Undef)
+        continue;
+      RegisterRef RR = DFG.normalizeRef(UA.Addr->getRegRef(DFG));
+      for (NodeAddr<DefNode*> D : getAllReachingDefs(UA))
         if (getBlockWithRef(D.Id) != B)
-          LiveIn[RR].insert(D.Id);
+          LiveIn[RR.Reg].insert({D.Id,RR.Mask});
     }
   }
 
   if (Trace) {
     dbgs() << "after uses in block\n";
     dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
-    dbgs() << "  Local:  " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterAggr>(LiveMap[B], DFG) << '\n';
   }
 
   // Phi uses should not be propagated up the dominator tree, since they
   // are not dominated by their corresponding reaching defs.
-  auto &Local = LiveMap[B];
-  auto &LON = PhiLON[B];
-  for (auto R : LON)
-    Local.insert(R.first);
+  RegisterAggr &Local = LiveMap[B];
+  RefMap &LON = PhiLON[B];
+  for (auto &R : LON) {
+    LaneBitmask M;
+    for (auto P : R.second)
+      M |= P.second;
+    Local.insert(RegisterRef(R.first,M));
+  }
 
   if (Trace) {
     dbgs() << "after phi uses in block\n";
     dbgs() << "  LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
-    dbgs() << "  Local:  " << Print<RegisterSet>(Local, DFG) << '\n';
+    dbgs() << "  Local:  " << Print<RegisterAggr>(Local, DFG) << '\n';
   }
 
   for (auto C : IIDF[B]) {
-    auto &LiveC = LiveMap[C];
-    for (auto S : LiveIn)
+    RegisterAggr &LiveC = LiveMap[C];
+    for (const std::pair<RegisterId,NodeRefSet> &S : LiveIn)
       for (auto R : S.second)
-        if (MDT.properlyDominates(getBlockWithRef(R), C))
-          LiveC.insert(S.first);
+        if (MDT.properlyDominates(getBlockWithRef(R.first), C))
+          LiveC.insert(RegisterRef(S.first, R.second));
   }
 }
 
diff --git a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
index 2b49c7488ce3..c88396f36bbb 100644
--- a/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
+++ b/contrib/llvm/lib/Target/Hexagon/RDFLiveness.h
@@ -30,20 +30,44 @@ namespace llvm {
 namespace rdf {
   struct Liveness {
   public:
-    typedef std::map<MachineBasicBlock*,RegisterSet> LiveMapType;
-    typedef std::map<RegisterRef,NodeSet> RefMap;
+    // This is really a std::map, except that it provides a non-trivial
+    // default constructor to the element accessed via [].
+    struct LiveMapType {
+      LiveMapType(const TargetRegisterInfo &tri) : Empty(tri) {}
+
+      RegisterAggr &operator[] (MachineBasicBlock *B) {
+        return Map.emplace(B, Empty).first->second;
+      }
+    private:
+      RegisterAggr Empty;
+      std::map<MachineBasicBlock*,RegisterAggr> Map;
+    };
+
+    typedef std::pair<NodeId,LaneBitmask> NodeRef;
+    typedef std::set<NodeRef> NodeRefSet;
+    // RegisterId in RefMap must be normalized.
+    typedef std::map<RegisterId,NodeRefSet> RefMap;
 
     Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
       : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()),
-        RAI(g.getRAI()), MRI(mri), Empty(), Trace(false) {}
+        MRI(mri), LiveMap(g.getTRI()), Empty(), NoRegs(g.getTRI()),
+        Trace(false) {}
 
     NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
-        bool FullChain = false, const RegisterSet &DefRRs = RegisterSet());
-    NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA);
+        bool FullChain, const RegisterAggr &DefRRs);
+    NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA) {
+      return getAllReachingDefs(RefA.Addr->getRegRef(DFG), RefA, false, NoRegs);
+    }
+    NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA) {
+      return getAllReachingDefs(RefRR, RefA, false, NoRegs);
+    }
     NodeSet getAllReachingDefsRec(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
         NodeSet &Visited, const NodeSet &Defs);
     NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA,
-        const RegisterSet &DefRRs = RegisterSet());
+        const RegisterAggr &DefRRs);
+    NodeSet getAllReachedUses(RegisterRef RefRR, NodeAddr<DefNode*> DefA) {
+      return getAllReachedUses(RefRR, DefA, NoRegs);
+    }
 
     LiveMapType &getLiveMap() { return LiveMap; }
     const LiveMapType &getLiveMap() const { return LiveMap; }
@@ -65,10 +89,10 @@ namespace rdf {
     const TargetRegisterInfo &TRI;
     const MachineDominatorTree &MDT;
     const MachineDominanceFrontier &MDF;
-    const RegisterAliasInfo &RAI;
     MachineRegisterInfo &MRI;
     LiveMapType LiveMap;
     const RefMap Empty;
+    const RegisterAggr NoRegs;
     bool Trace;
 
     // Cache of mapping from node ids (for RefNodes) to the containing
@@ -79,7 +103,8 @@ namespace rdf {
 
     // Phi information:
     //
-    // map: NodeId -> (map: RegisterRef -> NodeSet)
+    // RealUseMap
+    // map: NodeId -> (map: RegisterId -> NodeRefSet)
     //      phi id -> (map: register -> set of reached non-phi uses)
     std::map<NodeId, RefMap> RealUseMap;
 
@@ -96,10 +121,9 @@ namespace rdf {
     // the dominator tree), create a map: block -> set of uses live on exit.
     std::map<MachineBasicBlock*,RefMap> PhiLOX;
 
-    bool isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+    bool isRestrictedToRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
         RegisterRef RR) const;
     RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const;
-    unsigned getPhysReg(RegisterRef RR) const;
     MachineBasicBlock *getBlockWithRef(NodeId RN) const;
     void traverse(MachineBasicBlock *B, RefMap &LiveIn);
     void emptify(RefMap &M);
diff --git a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
index 40f6c8d23ea8..0554646bb6be 100644
--- a/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Hexagon/TargetInfo/HexagonTargetInfo.cpp
@@ -12,8 +12,12 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheHexagonTarget;
+Target &llvm::getTheHexagonTarget() {
+  static Target TheHexagonTarget;
+  return TheHexagonTarget;
+}
 
 extern "C" void LLVMInitializeHexagonTargetInfo() {
-  RegisterTarget<Triple::hexagon, /*HasJIT=*/false>  X(TheHexagonTarget, "hexagon", "Hexagon");
+  RegisterTarget<Triple::hexagon, /*HasJIT=*/false> X(getTheHexagonTarget(),
+                                                      "hexagon", "Hexagon");
 }
diff --git a/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp b/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
index cbb96d8b05b2..903f92a04431 100644
--- a/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Lanai/AsmParser/LanaiAsmParser.cpp
@@ -844,7 +844,7 @@ bool shouldBeSls(const LanaiOperand &Op) {
 }
 
 // Matches memory operand. Returns true if error encountered.
-LanaiAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
   // Try to match a memory operand.
   // The memory operands are of the form:
@@ -978,7 +978,7 @@ LanaiAsmParser::parseMemoryOperand(OperandVector &Operands) {
 // Looks at a token type and creates the relevant operand from this
 // information, adding to operands.
 // If operand was parsed, returns false, else true.
-LanaiAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 LanaiAsmParser::parseOperand(OperandVector *Operands, StringRef Mnemonic) {
   // Check if the current operand has a custom associated parser, if so, try to
   // custom parse the operand, or fallback to the general approach.
@@ -1207,7 +1207,7 @@ bool LanaiAsmParser::ParseInstruction(ParseInstructionInfo & /*Info*/,
 } // namespace
 
 extern "C" void LLVMInitializeLanaiAsmParser() {
-  RegisterMCAsmParser<LanaiAsmParser> x(TheLanaiTarget);
+  RegisterMCAsmParser<LanaiAsmParser> x(getTheLanaiTarget());
 }
 
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp b/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
index 744441bc90dd..609b650e5d32 100644
--- a/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Lanai/Disassembler/LanaiDisassembler.cpp
@@ -19,7 +19,6 @@
 #include "llvm/MC/MCInst.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/MemoryObject.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;
@@ -27,7 +26,7 @@ using namespace llvm;
 typedef MCDisassembler::DecodeStatus DecodeStatus;
 
 namespace llvm {
-extern Target TheLanaiTarget;
+Target &getTheLanaiTarget();
 }
 
 static MCDisassembler *createLanaiDisassembler(const Target & /*T*/,
@@ -38,7 +37,7 @@ static MCDisassembler *createLanaiDisassembler(const Target & /*T*/,
 
 extern "C" void LLVMInitializeLanaiDisassembler() {
   // Register the disassembler
-  TargetRegistry::RegisterMCDisassembler(TheLanaiTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheLanaiTarget(),
                                          createLanaiDisassembler);
 }
 
@@ -47,8 +46,9 @@ LanaiDisassembler::LanaiDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx)
 
 // Forward declare because the autogenerated code will reference this.
 // Definition is further down.
-DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
-                                    uint64_t Address, const void *Decoder);
+static DecodeStatus DecodeGPRRegisterClass(MCInst &Inst, unsigned RegNo,
+                                           uint64_t Address,
+                                           const void *Decoder);
 
 static DecodeStatus decodeRiMemoryValue(MCInst &Inst, unsigned Insn,
                                         uint64_t Address, const void *Decoder);
diff --git a/contrib/llvm/lib/Target/Lanai/Lanai.h b/contrib/llvm/lib/Target/Lanai/Lanai.h
index 47bd498c579c..c1fdf793305b 100644
--- a/contrib/llvm/lib/Target/Lanai/Lanai.h
+++ b/contrib/llvm/lib/Target/Lanai/Lanai.h
@@ -45,7 +45,7 @@ FunctionPass *createLanaiMemAluCombinerPass();
 // operations.
 FunctionPass *createLanaiSetflagAluCombinerPass();
 
-extern Target TheLanaiTarget;
+Target &getTheLanaiTarget();
 } // namespace llvm
 
 #endif // LLVM_LIB_TARGET_LANAI_LANAI_H
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiAluCode.h b/contrib/llvm/lib/Target/Lanai/LanaiAluCode.h
index b6ceedef6651..d5145694fe46 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiAluCode.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiAluCode.h
@@ -43,8 +43,8 @@ enum AluCode {
 
 // Bits indicating post- and pre-operators should be tested and set using Is*
 // and Make* utility functions
-constexpr int Lanai_PRE_OP = 0x40;
-constexpr int Lanai_POST_OP = 0x80;
+const int Lanai_PRE_OP = 0x40;
+const int Lanai_POST_OP = 0x80;
 
 inline static unsigned encodeLanaiAluCode(unsigned AluOp) {
   unsigned const OP_ENCODING_MASK = 0x07;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp b/contrib/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
index 9d39cef9f8ed..607b2a97b29f 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiAsmPrinter.cpp
@@ -45,7 +45,7 @@ public:
                            std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)) {}
 
-  const char *getPassName() const override { return "Lanai Assembly Printer"; }
+  StringRef getPassName() const override { return "Lanai Assembly Printer"; }
 
   void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O);
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
@@ -151,7 +151,7 @@ void LanaiAsmPrinter::emitCallInstruction(const MachineInstr *MI) {
   assert((MI->getOpcode() == Lanai::CALL || MI->getOpcode() == Lanai::CALLR) &&
          "Unsupported call function");
 
-  LanaiMCInstLower MCInstLowering(OutContext, *Mang, *this);
+  LanaiMCInstLower MCInstLowering(OutContext, *this);
   MCSubtargetInfo STI = getSubtargetInfo();
   // Insert save rca instruction immediately before the call.
   // TODO: We should generate a pc-relative mov instruction here instead
@@ -188,7 +188,7 @@ void LanaiAsmPrinter::emitCallInstruction(const MachineInstr *MI) {
 }
 
 void LanaiAsmPrinter::customEmitInstruction(const MachineInstr *MI) {
-  LanaiMCInstLower MCInstLowering(OutContext, *Mang, *this);
+  LanaiMCInstLower MCInstLowering(OutContext, *this);
   MCSubtargetInfo STI = getSubtargetInfo();
   MCInst TmpInst;
   MCInstLowering.Lower(MI, TmpInst);
@@ -239,5 +239,5 @@ bool LanaiAsmPrinter::isBlockOnlyReachableByFallthrough(
 
 // Force static initialization.
 extern "C" void LLVMInitializeLanaiAsmPrinter() {
-  RegisterAsmPrinter<LanaiAsmPrinter> X(TheLanaiTarget);
+  RegisterAsmPrinter<LanaiAsmPrinter> X(getTheLanaiTarget());
 }
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
index 7b106547d60b..802232b05828 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiDelaySlotFiller.cpp
@@ -42,7 +42,7 @@ struct Filler : public MachineFunctionPass {
   static char ID;
   explicit Filler() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "Lanai Delay Slot Filler"; }
+  StringRef getPassName() const override { return "Lanai Delay Slot Filler"; }
 
   bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
 
@@ -60,7 +60,7 @@ struct Filler : public MachineFunctionPass {
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
   void insertDefsUses(MachineBasicBlock::instr_iterator MI,
@@ -105,7 +105,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
         // RET is generated as part of epilogue generation and hence we know
         // what the two instructions preceding it are and that it is safe to
         // insert RET above them.
-        MachineBasicBlock::reverse_instr_iterator RI(I);
+        MachineBasicBlock::reverse_instr_iterator RI = ++I.getReverse();
         assert(RI->getOpcode() == Lanai::LDW_RI && RI->getOperand(0).isReg() &&
                RI->getOperand(0).getReg() == Lanai::FP &&
                RI->getOperand(1).isReg() &&
@@ -117,8 +117,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
                RI->getOperand(0).getReg() == Lanai::SP &&
                RI->getOperand(1).isReg() &&
                RI->getOperand(1).getReg() == Lanai::FP);
-        ++RI;
-        MachineBasicBlock::instr_iterator FI(RI.base());
+        MachineBasicBlock::instr_iterator FI = RI.getReverse();
         MBB.splice(std::next(I), &MBB, FI, I);
         FilledSlots += 2;
       } else {
@@ -154,14 +153,14 @@ bool Filler::findDelayInstr(MachineBasicBlock &MBB,
   bool SawLoad = false;
   bool SawStore = false;
 
-  for (MachineBasicBlock::reverse_instr_iterator I(Slot); I != MBB.instr_rend();
-       ++I) {
+  for (MachineBasicBlock::reverse_instr_iterator I = ++Slot.getReverse();
+       I != MBB.instr_rend(); ++I) {
     // skip debug value
     if (I->isDebugValue())
       continue;
 
     // Convert to forward iterator.
-    MachineBasicBlock::instr_iterator FI(std::next(I).base());
+    MachineBasicBlock::instr_iterator FI = I.getReverse();
 
     if (I->hasUnmodeledSideEffects() || I->isInlineAsm() || I->isLabel() ||
         FI == LastFiller || I->isPseudo())
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
index cb048d568df7..0723668c743e 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiFrameLowering.cpp
@@ -26,36 +26,36 @@ using namespace llvm;
 
 // Determines the size of the frame and maximum call frame size.
 void LanaiFrameLowering::determineFrameLayout(MachineFunction &MF) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const LanaiRegisterInfo *LRI = STI.getRegisterInfo();
 
   // Get the number of bytes to allocate from the FrameInfo.
-  unsigned FrameSize = MFI->getStackSize();
+  unsigned FrameSize = MFI.getStackSize();
 
   // Get the alignment.
-  unsigned StackAlign = LRI->needsStackRealignment(MF) ? MFI->getMaxAlignment()
+  unsigned StackAlign = LRI->needsStackRealignment(MF) ? MFI.getMaxAlignment()
                                                        : getStackAlignment();
 
   // Get the maximum call frame size of all the calls.
-  unsigned MaxCallFrameSize = MFI->getMaxCallFrameSize();
+  unsigned MaxCallFrameSize = MFI.getMaxCallFrameSize();
 
   // If we have dynamic alloca then MaxCallFrameSize needs to be aligned so
   // that allocations will be aligned.
-  if (MFI->hasVarSizedObjects())
+  if (MFI.hasVarSizedObjects())
     MaxCallFrameSize = alignTo(MaxCallFrameSize, StackAlign);
 
   // Update maximum call frame size.
-  MFI->setMaxCallFrameSize(MaxCallFrameSize);
+  MFI.setMaxCallFrameSize(MaxCallFrameSize);
 
   // Include call frame size in total.
-  if (!(hasReservedCallFrame(MF) && MFI->adjustsStack()))
+  if (!(hasReservedCallFrame(MF) && MFI.adjustsStack()))
     FrameSize += MaxCallFrameSize;
 
   // Make sure the frame is aligned.
   FrameSize = alignTo(FrameSize, StackAlign);
 
   // Update frame info.
-  MFI->setStackSize(FrameSize);
+  MFI.setStackSize(FrameSize);
 }
 
 // Iterates through each basic block in a machine function and replaces
@@ -64,7 +64,7 @@ void LanaiFrameLowering::determineFrameLayout(MachineFunction &MF) const {
 void LanaiFrameLowering::replaceAdjDynAllocPseudo(MachineFunction &MF) const {
   const LanaiInstrInfo &LII =
       *static_cast<const LanaiInstrInfo *>(STI.getInstrInfo());
-  unsigned MaxCallFrameSize = MF.getFrameInfo()->getMaxCallFrameSize();
+  unsigned MaxCallFrameSize = MF.getFrameInfo().getMaxCallFrameSize();
 
   for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); MBB != E;
        ++MBB) {
@@ -93,7 +93,7 @@ void LanaiFrameLowering::emitPrologue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const LanaiInstrInfo &LII =
       *static_cast<const LanaiInstrInfo *>(STI.getInstrInfo());
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -107,7 +107,7 @@ void LanaiFrameLowering::emitPrologue(MachineFunction &MF,
 
   // FIXME: This appears to be overallocating.  Needs investigation.
   // Get the number of bytes to allocate from the FrameInfo.
-  unsigned StackSize = MFI->getStackSize();
+  unsigned StackSize = MFI.getStackSize();
 
   // Push old FP
   // st %fp,-4[*%sp]
@@ -135,7 +135,7 @@ void LanaiFrameLowering::emitPrologue(MachineFunction &MF,
   }
 
   // Replace ADJDYNANALLOC
-  if (MFI->hasVarSizedObjects())
+  if (MFI.hasVarSizedObjects())
     replaceAdjDynAllocPseudo(MF);
 }
 
@@ -200,21 +200,21 @@ void LanaiFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                               RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const LanaiRegisterInfo *LRI =
       static_cast<const LanaiRegisterInfo *>(STI.getRegisterInfo());
   int Offset = -4;
 
   // Reserve 4 bytes for the saved RCA
-  MFI->CreateFixedObject(4, Offset, true);
+  MFI.CreateFixedObject(4, Offset, true);
   Offset -= 4;
 
   // Reserve 4 bytes for the saved FP
-  MFI->CreateFixedObject(4, Offset, true);
+  MFI.CreateFixedObject(4, Offset, true);
   Offset -= 4;
 
   if (LRI->hasBasePointer(MF)) {
-    MFI->CreateFixedObject(4, Offset, true);
+    MFI.CreateFixedObject(4, Offset, true);
     SavedRegs.reset(LRI->getBaseRegister());
   }
 }
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
index 29bc6e8a6c56..ed0c99a76ce4 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelDAGToDAG.cpp
@@ -56,7 +56,7 @@ public:
   }
 
   // Pass Name
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Lanai DAG->DAG Pattern Instruction Selection";
   }
 
@@ -282,9 +282,29 @@ void LanaiDAGToDAGISel::Select(SDNode *Node) {
     return;
   }
 
-  // Instruction Selection not handled by the auto-generated
-  // tablegen selection should be handled here.
+  // Instruction Selection not handled by the auto-generated tablegen selection
+  // should be handled here.
+  EVT VT = Node->getValueType(0);
   switch (Opcode) {
+  case ISD::Constant:
+    if (VT == MVT::i32) {
+      ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
+      // Materialize zero constants as copies from R0. This allows the coalescer
+      // to propagate these into other instructions.
+      if (ConstNode->isNullValue()) {
+        SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                             SDLoc(Node), Lanai::R0, MVT::i32);
+        return ReplaceNode(Node, New.getNode());
+      }
+      // Materialize all ones constants as copies from R1. This allows the
+      // coalescer to propagate these into other instructions.
+      if (ConstNode->isAllOnesValue()) {
+        SDValue New = CurDAG->getCopyFromReg(CurDAG->getEntryNode(),
+                                             SDLoc(Node), Lanai::R1, MVT::i32);
+        return ReplaceNode(Node, New.getNode());
+      }
+    }
+    break;
   case ISD::FrameIndex:
     selectFrameIndex(Node);
     return;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
index 66416b38e812..ae7870e07d42 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.cpp
@@ -104,7 +104,7 @@ LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::ROTR, MVT::i32, Expand);
   setOperationAction(ISD::ROTL, MVT::i32, Expand);
-  setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+  setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
   setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
 
@@ -179,6 +179,8 @@ SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
     return LowerSETCC(Op, DAG);
   case ISD::SETCCE:
     return LowerSETCCE(Op, DAG);
+  case ISD::SHL_PARTS:
+    return LowerSHL_PARTS(Op, DAG);
   case ISD::SRL_PARTS:
     return LowerSRL_PARTS(Op, DAG);
   case ISD::VASTART:
@@ -423,7 +425,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   LanaiMachineFunctionInfo *LanaiMFI = MF.getInfo<LanaiMachineFunctionInfo>();
 
@@ -480,7 +482,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
                << EVT(VA.getLocVT()).getEVTString() << "\n";
       }
       // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+      int FI = MFI.CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
 
       // Create the SelectionDAG nodes corresponding to a load
       // from this parameter
@@ -507,7 +509,7 @@ SDValue LanaiTargetLowering::LowerCCCArguments(
   if (IsVarArg) {
     // Record the frame index of the first variable argument
     // which is a value necessary to VASTART.
-    int FI = MFI->CreateFixedObject(4, CCInfo.getNextStackOffset(), true);
+    int FI = MFI.CreateFixedObject(4, CCInfo.getNextStackOffset(), true);
     LanaiMFI->setVarArgsFrameIndex(FI);
   }
 
@@ -588,7 +590,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
                  *DAG.getContext());
   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
 
   NumFixedArgs = 0;
   if (IsVarArg && G) {
@@ -619,7 +621,7 @@ SDValue LanaiTargetLowering::LowerCCCCallTo(
     unsigned Size = Flags.getByValSize();
     unsigned Align = Flags.getByValAlign();
 
-    int FI = MFI->CreateStackObject(Size, Align, false);
+    int FI = MFI.CreateStackObject(Size, Align, false);
     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32);
 
@@ -1052,8 +1054,8 @@ SDValue LanaiTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
 SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op,
                                              SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
@@ -1074,8 +1076,8 @@ SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op,
 
 SDValue LanaiTargetLowering::LowerFRAMEADDR(SDValue Op,
                                             SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
@@ -1167,8 +1169,8 @@ SDValue LanaiTargetLowering::LowerGlobalAddress(SDValue Op,
 
   // If the code model is small or global variable will be placed in the small
   // section, then assume address will fit in 21-bits.
-  if (getTargetMachine().getCodeModel() == CodeModel::Small ||
-      TLOF->isGlobalInSmallSection(GV, getTargetMachine())) {
+  const GlobalObject *GO = GV->getBaseObject();
+  if (TLOF->isGlobalInSmallSection(GO, getTargetMachine())) {
     SDValue Small = DAG.getTargetGlobalAddress(
         GV, DL, getPointerTy(DAG.getDataLayout()), Offset, LanaiII::MO_NO_FLAG);
     return DAG.getNode(ISD::OR, DL, MVT::i32,
@@ -1232,6 +1234,55 @@ SDValue LanaiTargetLowering::LowerJumpTable(SDValue Op,
   }
 }
 
+SDValue LanaiTargetLowering::LowerSHL_PARTS(SDValue Op,
+                                            SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+  unsigned VTBits = VT.getSizeInBits();
+  SDLoc dl(Op);
+  assert(Op.getNumOperands() == 3 && "Unexpected SHL!");
+  SDValue ShOpLo = Op.getOperand(0);
+  SDValue ShOpHi = Op.getOperand(1);
+  SDValue ShAmt = Op.getOperand(2);
+
+  // Performs the following for (ShOpLo + (ShOpHi << 32)) << ShAmt:
+  //   LoBitsForHi = (ShAmt == 0) ? 0 : (ShOpLo >> (32-ShAmt))
+  //   HiBitsForHi = ShOpHi << ShAmt
+  //   Hi = (ShAmt >= 32) ? (ShOpLo << (ShAmt-32)) : (LoBitsForHi | HiBitsForHi)
+  //   Lo = (ShAmt >= 32) ? 0 : (ShOpLo << ShAmt)
+  //   return (Hi << 32) | Lo;
+
+  SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
+                                 DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
+  SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
+
+  // If ShAmt == 0, we just calculated "(SRL ShOpLo, 32)" which is "undef". We
+  // wanted 0, so CSEL it directly.
+  SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
+  SDValue SetCC = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ);
+  LoBitsForHi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, LoBitsForHi);
+
+  SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
+                                   DAG.getConstant(VTBits, dl, MVT::i32));
+  SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
+  SDValue HiForNormalShift =
+      DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
+
+  SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
+
+  SetCC = DAG.getSetCC(dl, MVT::i32, ExtraShAmt, Zero, ISD::SETGE);
+  SDValue Hi =
+      DAG.getSelect(dl, MVT::i32, SetCC, HiForBigShift, HiForNormalShift);
+
+  // Lanai shifts of larger than register sizes are wrapped rather than
+  // clamped, so we can't just emit "lo << b" if b is too big.
+  SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
+  SDValue Lo = DAG.getSelect(
+      dl, MVT::i32, SetCC, DAG.getConstant(0, dl, MVT::i32), LoForNormalShift);
+
+  SDValue Ops[2] = {Lo, Hi};
+  return DAG.getMergeValues(Ops, dl);
+}
+
 SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op,
                                             SelectionDAG &DAG) const {
   MVT VT = Op.getSimpleValueType();
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.h b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.h
index 16ce8edd27c5..c2fba4f9d167 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiISelLowering.h
@@ -88,6 +88,7 @@ public:
   SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSETCCE(SDValue Op, SelectionDAG &DAG) const;
+  SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
   SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
 
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 673d23daf886..fcd5da876b15 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -558,7 +558,7 @@ LanaiInstrInfo::optimizeSelect(MachineInstr &MI,
 // - FalseBlock is set to the destination if condition evaluates to false (it
 //   is the nullptr if the branch is unconditional);
 // - condition is populated with machine operands needed to generate the branch
-//   to insert in InsertBranch;
+//   to insert in insertBranch;
 // Returns: false if branch could successfully be analyzed.
 bool LanaiInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
                                    MachineBasicBlock *&TrueBlock,
@@ -641,10 +641,10 @@ bool LanaiInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-// ReverseBranchCondition - Reverses the branch condition of the specified
+// reverseBranchCondition - Reverses the branch condition of the specified
 // condition list, returning false on success and true if it cannot be
 // reversed.
-bool LanaiInstrInfo::ReverseBranchCondition(
+bool LanaiInstrInfo::reverseBranchCondition(
     SmallVectorImpl<llvm::MachineOperand> &Condition) const {
   assert((Condition.size() == 1) &&
          "Lanai branch conditions should have one component.");
@@ -658,13 +658,15 @@ bool LanaiInstrInfo::ReverseBranchCondition(
 // Insert the branch with condition specified in condition and given targets
 // (TrueBlock and FalseBlock). This function returns the number of machine
 // instructions inserted.
-unsigned LanaiInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned LanaiInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TrueBlock,
                                       MachineBasicBlock *FalseBlock,
                                       ArrayRef<MachineOperand> Condition,
-                                      const DebugLoc &DL) const {
+                                      const DebugLoc &DL,
+                                      int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TrueBlock && "InsertBranch must not be told to insert a fallthrough");
+  assert(TrueBlock && "insertBranch must not be told to insert a fallthrough");
+  assert(!BytesAdded && "code size not handled");
 
   // If condition is empty then an unconditional branch is being inserted.
   if (Condition.empty()) {
@@ -688,7 +690,10 @@ unsigned LanaiInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-unsigned LanaiInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned LanaiInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                      int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator Instruction = MBB.end();
   unsigned Count = 0;
 
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 51f6c6ea436d..4387fe1af3c3 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -86,7 +86,8 @@ public:
                      SmallVectorImpl<MachineOperand> &Condition,
                      bool AllowModify) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
 
   // For a comparison instruction, return the source registers in SrcReg and
   // SrcReg2 if having two register operands, and the value it compares against
@@ -129,13 +130,14 @@ public:
                                SmallPtrSetImpl<MachineInstr *> &SeenMIs,
                                bool PreferFalse) const override;
 
-  bool ReverseBranchCondition(
+  bool reverseBranchCondition(
       SmallVectorImpl<MachineOperand> &Condition) const override;
 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TrueBlock,
                         MachineBasicBlock *FalseBlock,
                         ArrayRef<MachineOperand> Condition,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 };
 
 static inline bool isSPLSOpcode(unsigned Opcode) {
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.td b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.td
index cd1abc1f3359..285fca11737d 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.td
+++ b/contrib/llvm/lib/Target/Lanai/LanaiInstrInfo.td
@@ -115,9 +115,6 @@ def imm10 : Operand<i32>, PatLeaf<(imm), [{
   let ParserMatchClass = Imm10AsmOperand;
 }
 
-def immZExt21 : PatLeaf<(imm),
-                        [{return isUInt<21>(N->getZExtValue()); }], LO21>;
-
 def LoImm16AsmOperand : AsmOperandClass { let Name = "LoImm16"; }
 def i32lo16z : Operand<i32>, PatLeaf<(i32 imm), [{
     // i32lo16 predicate - true if the 32-bit immediate has only rightmost 16
@@ -834,11 +831,6 @@ def TRAILZ : InstSpecial<0b011, (outs GPR:$Rd), (ins GPR:$Rs1),
 // Non-Instruction Patterns
 //===----------------------------------------------------------------------===//
 
-// i32 0 and R0 can be used interchangeably.
-def : Pat<(i32 0), (i32 R0)>;
-// i32 -1 and R1 can be used interchangeably.
-def : Pat<(i32 -1), (i32 R1)>;
-
 // unsigned 16-bit immediate
 def : Pat<(i32 i32lo16z:$imm), (OR_I_LO (i32 R0), imm:$imm)>;
 
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp b/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
index 6c809b43f7ed..39c633578d43 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.cpp
@@ -21,7 +21,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/Mangler.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.h b/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.h
index 41c0766e86da..6d7818d63d87 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiMCInstLower.h
@@ -31,8 +31,7 @@ class LLVM_LIBRARY_VISIBILITY LanaiMCInstLower {
   AsmPrinter &Printer;
 
 public:
-  LanaiMCInstLower(MCContext &CTX, Mangler & /*Mang*/, AsmPrinter &AP)
-      : Ctx(CTX), Printer(AP) {}
+  LanaiMCInstLower(MCContext &CTX, AsmPrinter &AP) : Ctx(CTX), Printer(AP) {}
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
 
   MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
index c5a46143ee56..7259c02194ca 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiMemAluCombiner.cpp
@@ -61,7 +61,7 @@ public:
     initializeLanaiMemAluCombinerPass(*PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Lanai load / store optimization pass";
   }
 
@@ -69,7 +69,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
@@ -339,6 +339,9 @@ MbbIterator LanaiMemAluCombiner::findClosestSuitableAluInstr(
   while (First != Last) {
     Decrement ? --First : ++First;
 
+    if (First == Last)
+      break;
+
     // Skip over debug instructions
     if (First->isDebugValue())
       continue;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
index a4c612258e7b..12a2571c28d9 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiRegisterInfo.cpp
@@ -146,13 +146,13 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
                MI.getOperand(FIOperandNum + 1).getImm();
 
   // Addressable stack objects are addressed using neg. offsets from fp
   // or pos. offsets from sp/basepointer
   if (!HasFP || (needsStackRealignment(MF) && FrameIndex >= 0))
-    Offset += MF.getFrameInfo()->getStackSize();
+    Offset += MF.getFrameInfo().getStackSize();
 
   unsigned FrameReg = getFrameRegister(MF);
   if (FrameIndex >= 0) {
@@ -246,10 +246,10 @@ void LanaiRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
 }
 
 bool LanaiRegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   // When we need stack realignment and there are dynamic allocas, we can't
   // reference off of the stack pointer, so we reserve a base pointer.
-  if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
+  if (needsStackRealignment(MF) && MFI.hasVarSizedObjects())
     return true;
 
   return false;
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
index b1f4b496eb9e..2a9bc25d7fad 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetMachine.cpp
@@ -32,7 +32,8 @@ void initializeLanaiMemAluCombinerPass(PassRegistry &);
 
 extern "C" void LLVMInitializeLanaiTarget() {
   // Register the target.
-  RegisterTargetMachine<LanaiTargetMachine> registered_target(TheLanaiTarget);
+  RegisterTargetMachine<LanaiTargetMachine> registered_target(
+      getTheLanaiTarget());
 }
 
 static std::string computeDataLayout() {
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp b/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
index 4048c8535215..7475dbd68ae4 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.cpp
@@ -49,22 +49,25 @@ static bool isInSmallSection(uint64_t Size) {
 // Return true if this global address should be placed into small data/bss
 // section.
 bool LanaiTargetObjectFile::isGlobalInSmallSection(
-    const GlobalValue *GV, const TargetMachine &TM) const {
+    const GlobalObject *GO, const TargetMachine &TM) const {
+  if (GO == nullptr)
+    return false;
+
   // We first check the case where global is a declaration, because finding
   // section kind using getKindForGlobal() is only allowed for global
   // definitions.
-  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
-    return isGlobalInSmallSectionImpl(GV, TM);
+  if (GO->isDeclaration() || GO->hasAvailableExternallyLinkage())
+    return isGlobalInSmallSectionImpl(GO, TM);
 
-  return isGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+  return isGlobalInSmallSection(GO, TM, getKindForGlobal(GO, TM));
 }
 
 // Return true if this global address should be placed into small data/bss
 // section.
-bool LanaiTargetObjectFile::isGlobalInSmallSection(const GlobalValue *GV,
+bool LanaiTargetObjectFile::isGlobalInSmallSection(const GlobalObject *GO,
                                                    const TargetMachine &TM,
                                                    SectionKind Kind) const {
-  return (isGlobalInSmallSectionImpl(GV, TM) &&
+  return (isGlobalInSmallSectionImpl(GO, TM) &&
           (Kind.isData() || Kind.isBSS() || Kind.isCommon()));
 }
 
@@ -72,37 +75,43 @@ bool LanaiTargetObjectFile::isGlobalInSmallSection(const GlobalValue *GV,
 // section. This method does all the work, except for checking the section
 // kind.
 bool LanaiTargetObjectFile::isGlobalInSmallSectionImpl(
-    const GlobalValue *GV, const TargetMachine & /*TM*/) const {
+    const GlobalObject *GO, const TargetMachine &TM) const {
   // Only global variables, not functions.
-  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  const auto *GVA = dyn_cast<GlobalVariable>(GO);
   if (!GVA)
     return false;
 
-  if (GV->hasLocalLinkage())
+  // Global values placed in sections starting with .ldata do not fit in
+  // 21-bits, so always use large memory access for them. FIXME: This is a
+  // workaround for a tool limitation.
+  if (GVA->getSection().startswith(".ldata"))
+    return false;
+
+  if (TM.getCodeModel() == CodeModel::Small)
+    return true;
+
+  if (GVA->hasLocalLinkage())
     return false;
 
-  if (((GV->hasExternalLinkage() && GV->isDeclaration()) ||
-       GV->hasCommonLinkage()))
+  if (((GVA->hasExternalLinkage() && GVA->isDeclaration()) ||
+       GVA->hasCommonLinkage()))
     return false;
 
-  Type *Ty = GV->getType()->getElementType();
+  Type *Ty = GVA->getValueType();
   return isInSmallSection(
-      GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
+      GVA->getParent()->getDataLayout().getTypeAllocSize(Ty));
 }
 
-MCSection *
-LanaiTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
-                                              SectionKind Kind, Mangler &Mang,
-                                              const TargetMachine &TM) const {
+MCSection *LanaiTargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // Handle Small Section classification here.
-  if (Kind.isBSS() && isGlobalInSmallSection(GV, TM, Kind))
+  if (Kind.isBSS() && isGlobalInSmallSection(GO, TM, Kind))
     return SmallBSSSection;
-  if (Kind.isData() && isGlobalInSmallSection(GV, TM, Kind))
+  if (Kind.isData() && isGlobalInSmallSection(GO, TM, Kind))
     return SmallDataSection;
 
   // Otherwise, we work the same as ELF.
-  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,
-                                                             TM);
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
 
 /// Return true if this constant should be placed into small data section.
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h b/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h
index eb5195469f55..99ec1956da4b 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetObjectFile.h
@@ -18,20 +18,20 @@ class LanaiTargetObjectFile : public TargetLoweringObjectFileELF {
   MCSection *SmallDataSection;
   MCSection *SmallBSSSection;
 
+  bool isGlobalInSmallSection(const GlobalObject *GO, const TargetMachine &TM,
+                              SectionKind Kind) const;
+  bool isGlobalInSmallSectionImpl(const GlobalObject *GO,
+                                  const TargetMachine &TM) const;
+
 public:
   void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
   /// Return true if this global address should be placed into small data/bss
   /// section.
-  bool isGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
-                              SectionKind Kind) const;
-  bool isGlobalInSmallSection(const GlobalValue *GV,
+  bool isGlobalInSmallSection(const GlobalObject *GO,
                               const TargetMachine &TM) const;
-  bool isGlobalInSmallSectionImpl(const GlobalValue *GV,
-                                  const TargetMachine &TM) const;
 
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 
   /// Return true if this constant should be placed into small data section.
diff --git a/contrib/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h b/contrib/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
index 6300d2502d67..7fcb3ce45bbb 100644
--- a/contrib/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/Lanai/LanaiTargetTransformInfo.h
@@ -41,11 +41,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  LanaiTTIImpl(const LanaiTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  LanaiTTIImpl(LanaiTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(Arg.ST), TLI(Arg.TLI) {}
-
   bool shouldBuildLookupTables() const { return false; }
 
   TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth) {
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
index a3d8699f1317..a04fe8112fb9 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiAsmBackend.cpp
@@ -163,10 +163,10 @@ LanaiAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
 
 MCAsmBackend *llvm::createLanaiAsmBackend(const Target &T,
                                           const MCRegisterInfo & /*MRI*/,
-                                          const Triple &TheTriple,
-                                          StringRef /*CPU*/) {
-  if (!TheTriple.isOSBinFormatELF())
+                                          const Triple &TT, StringRef /*CPU*/,
+                                          const MCTargetOptions & /*Options*/) {
+  if (!TT.isOSBinFormatELF())
     llvm_unreachable("OS not supported");
 
-  return new LanaiAsmBackend(T, TheTriple.getOS());
+  return new LanaiAsmBackend(T, TT.getOS());
 }
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
index f14adc27dd45..ce68b7e24dba 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCCodeEmitter.cpp
@@ -75,10 +75,6 @@ public:
                                   SmallVectorImpl<MCFixup> &Fixups,
                                   const MCSubtargetInfo &SubtargetInfo) const;
 
-  unsigned getCallTargetOpValue(const MCInst &Inst, unsigned OpNo,
-                                SmallVectorImpl<MCFixup> &Fixups,
-                                const MCSubtargetInfo &SubtargetInfo) const;
-
   void encodeInstruction(const MCInst &Inst, raw_ostream &Ostream,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &SubtargetInfo) const override;
@@ -288,19 +284,6 @@ LanaiMCCodeEmitter::getSplsOpValue(const MCInst &Inst, unsigned OpNo,
   return Encoding;
 }
 
-unsigned LanaiMCCodeEmitter::getCallTargetOpValue(
-    const MCInst &Inst, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
-    const MCSubtargetInfo &SubtargetInfo) const {
-  const MCOperand &MCOp = Inst.getOperand(OpNo);
-  if (MCOp.isReg() || MCOp.isImm())
-    return getMachineOpValue(Inst, MCOp, Fixups, SubtargetInfo);
-
-  Fixups.push_back(MCFixup::create(
-      0, MCOp.getExpr(), static_cast<MCFixupKind>(Lanai::FIXUP_LANAI_25)));
-
-  return 0;
-}
-
 unsigned LanaiMCCodeEmitter::getBranchTargetOpValue(
     const MCInst &Inst, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &SubtargetInfo) const {
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
index 04bedfb7fba7..c2f8c0f7ad50 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.cpp
@@ -73,11 +73,12 @@ static MCInstPrinter *createLanaiMCInstPrinter(const Triple & /*T*/,
   return 0;
 }
 
-MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
-                                           MCContext &Ctx) {
+static MCRelocationInfo *createLanaiElfRelocation(const Triple &TheTriple,
+                                                  MCContext &Ctx) {
   return createMCRelocationInfo(TheTriple, Ctx);
 }
 
+namespace {
 class LanaiMCInstrAnalysis : public MCInstrAnalysis {
 public:
   explicit LanaiMCInstrAnalysis(const MCInstrInfo *Info)
@@ -106,6 +107,7 @@ public:
     }
   }
 };
+} // end anonymous namespace
 
 static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
   return new LanaiMCInstrAnalysis(Info);
@@ -113,37 +115,40 @@ static MCInstrAnalysis *createLanaiInstrAnalysis(const MCInstrInfo *Info) {
 
 extern "C" void LLVMInitializeLanaiTargetMC() {
   // Register the MC asm info.
-  RegisterMCAsmInfo<LanaiMCAsmInfo> X(TheLanaiTarget);
+  RegisterMCAsmInfo<LanaiMCAsmInfo> X(getTheLanaiTarget());
 
   // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheLanaiTarget, createLanaiMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(getTheLanaiTarget(),
+                                      createLanaiMCInstrInfo);
 
   // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheLanaiTarget, createLanaiMCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(getTheLanaiTarget(),
+                                    createLanaiMCRegisterInfo);
 
   // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheLanaiTarget,
+  TargetRegistry::RegisterMCSubtargetInfo(getTheLanaiTarget(),
                                           createLanaiMCSubtargetInfo);
 
   // Register the MC code emitter
-  TargetRegistry::RegisterMCCodeEmitter(TheLanaiTarget,
+  TargetRegistry::RegisterMCCodeEmitter(getTheLanaiTarget(),
                                         llvm::createLanaiMCCodeEmitter);
 
   // Register the ASM Backend
-  TargetRegistry::RegisterMCAsmBackend(TheLanaiTarget, createLanaiAsmBackend);
+  TargetRegistry::RegisterMCAsmBackend(getTheLanaiTarget(),
+                                       createLanaiAsmBackend);
 
   // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheLanaiTarget,
+  TargetRegistry::RegisterMCInstPrinter(getTheLanaiTarget(),
                                         createLanaiMCInstPrinter);
 
   // Register the ELF streamer.
-  TargetRegistry::RegisterELFStreamer(TheLanaiTarget, createMCStreamer);
+  TargetRegistry::RegisterELFStreamer(getTheLanaiTarget(), createMCStreamer);
 
   // Register the MC relocation info.
-  TargetRegistry::RegisterMCRelocationInfo(TheLanaiTarget,
+  TargetRegistry::RegisterMCRelocationInfo(getTheLanaiTarget(),
                                            createLanaiElfRelocation);
 
   // Register the MC instruction analyzer.
-  TargetRegistry::RegisterMCInstrAnalysis(TheLanaiTarget,
+  TargetRegistry::RegisterMCInstrAnalysis(getTheLanaiTarget(),
                                           createLanaiInstrAnalysis);
 }
diff --git a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
index e117ed7a500f..8adaf4cea420 100644
--- a/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_LANAI_MCTARGETDESC_LANAIMCTARGETDESC_H
 
 #include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
@@ -31,14 +32,15 @@ class Triple;
 class StringRef;
 class raw_pwrite_stream;
 
-extern Target TheLanaiTarget;
+Target &getTheLanaiTarget();
 
 MCCodeEmitter *createLanaiMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);
 
 MCAsmBackend *createLanaiAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TheTriple, StringRef CPU);
+                                    const Triple &TheTriple, StringRef CPU,
+                                    const MCTargetOptions &Options);
 
 MCObjectWriter *createLanaiELFObjectWriter(raw_pwrite_stream &OS,
                                            uint8_t OSABI);
diff --git a/contrib/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp b/contrib/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
index bd615d6ad3ac..e377db1d49da 100644
--- a/contrib/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp
@@ -13,8 +13,13 @@
 
 using namespace llvm;
 
-Target llvm::TheLanaiTarget;
+namespace llvm {
+Target &getTheLanaiTarget() {
+  static Target TheLanaiTarget;
+  return TheLanaiTarget;
+}
+} // namespace llvm
 
 extern "C" void LLVMInitializeLanaiTargetInfo() {
-  RegisterTarget<Triple::lanai> X(TheLanaiTarget, "lanai", "Lanai");
+  RegisterTarget<Triple::lanai> X(getTheLanaiTarget(), "lanai", "Lanai");
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
index b3631caca952..8c715500f38b 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp
@@ -59,20 +59,21 @@ static MCInstPrinter *createMSP430MCInstPrinter(const Triple &T,
 
 extern "C" void LLVMInitializeMSP430TargetMC() {
   // Register the MC asm info.
-  RegisterMCAsmInfo<MSP430MCAsmInfo> X(TheMSP430Target);
+  RegisterMCAsmInfo<MSP430MCAsmInfo> X(getTheMSP430Target());
 
   // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheMSP430Target, createMSP430MCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(getTheMSP430Target(),
+                                      createMSP430MCInstrInfo);
 
   // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheMSP430Target,
+  TargetRegistry::RegisterMCRegInfo(getTheMSP430Target(),
                                     createMSP430MCRegisterInfo);
 
   // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheMSP430Target,
+  TargetRegistry::RegisterMCSubtargetInfo(getTheMSP430Target(),
                                           createMSP430MCSubtargetInfo);
 
   // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+  TargetRegistry::RegisterMCInstPrinter(getTheMSP430Target(),
                                         createMSP430MCInstPrinter);
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
index 241f1d6f9c0f..b901c5f09794 100644
--- a/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/MSP430/MCTargetDesc/MSP430MCTargetDesc.h
@@ -19,7 +19,7 @@
 namespace llvm {
 class Target;
 
-extern Target TheMSP430Target;
+Target &getTheMSP430Target();
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
index 4342c10a1bf2..abf062fe86ae 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp
@@ -42,9 +42,7 @@ namespace {
     MSP430AsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
         : AsmPrinter(TM, std::move(Streamer)) {}
 
-    const char *getPassName() const override {
-      return "MSP430 Assembly Printer";
-    }
+    StringRef getPassName() const override { return "MSP430 Assembly Printer"; }
 
     void printOperand(const MachineInstr *MI, int OpNum,
                       raw_ostream &O, const char* Modifier = nullptr);
@@ -157,5 +155,5 @@ void MSP430AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeMSP430AsmPrinter() {
-  RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+  RegisterAsmPrinter<MSP430AsmPrinter> X(getTheMSP430Target());
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
index 511e5bcdec0d..5fd6b6305f68 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430BranchSelector.cpp
@@ -27,63 +27,84 @@ using namespace llvm;
 
 #define DEBUG_TYPE "msp430-branch-select"
 
+static cl::opt<bool>
+    BranchSelectEnabled("msp430-branch-select", cl::Hidden, cl::init(true),
+                        cl::desc("Expand out of range branches"));
+
+STATISTIC(NumSplit, "Number of machine basic blocks split");
 STATISTIC(NumExpanded, "Number of branches expanded to long format");
 
 namespace {
-  struct MSP430BSel : public MachineFunctionPass {
-    static char ID;
-    MSP430BSel() : MachineFunctionPass(ID) {}
+class MSP430BSel : public MachineFunctionPass {
 
-    /// BlockSizes - The sizes of the basic blocks in the function.
-    std::vector<unsigned> BlockSizes;
+  typedef SmallVector<int, 16> OffsetVector;
 
-    bool runOnMachineFunction(MachineFunction &Fn) override;
+  MachineFunction *MF;
+  const MSP430InstrInfo *TII;
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
-    }
+  unsigned measureFunction(OffsetVector &BlockOffsets,
+                           MachineBasicBlock *FromBB = nullptr);
+  bool expandBranches(OffsetVector &BlockOffsets);
 
-    const char *getPassName() const override {
-      return "MSP430 Branch Selector";
-    }
-  };
-  char MSP430BSel::ID = 0;
+public:
+  static char ID;
+  MSP430BSel() : MachineFunctionPass(ID) {}
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+  StringRef getPassName() const override { return "MSP430 Branch Selector"; }
+};
+char MSP430BSel::ID = 0;
 }
 
-/// createMSP430BranchSelectionPass - returns an instance of the Branch
-/// Selection Pass
-///
-FunctionPass *llvm::createMSP430BranchSelectionPass() {
-  return new MSP430BSel();
+static bool isInRage(int DistanceInBytes) {
+  // According to CC430 Family User's Guide, Section 4.5.1.3, branch
+  // instructions have the signed 10-bit word offset field, so first we need to
+  // convert the distance from bytes to words, then check if it fits in 10-bit
+  // signed integer.
+  const int WordSize = 2;
+
+  assert((DistanceInBytes % WordSize == 0) &&
+         "Branch offset should be word aligned!");
+
+  int Words = DistanceInBytes / WordSize;
+  return isInt<10>(Words);
 }
 
-bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
-  const MSP430InstrInfo *TII =
-      static_cast<const MSP430InstrInfo *>(Fn.getSubtarget().getInstrInfo());
+/// Measure each basic block, fill the BlockOffsets, and return the size of
+/// the function, starting with BB
+unsigned MSP430BSel::measureFunction(OffsetVector &BlockOffsets,
+                                     MachineBasicBlock *FromBB) {
   // Give the blocks of the function a dense, in-order, numbering.
-  Fn.RenumberBlocks();
-  BlockSizes.resize(Fn.getNumBlockIDs());
-
-  // Measure each MBB and compute a size for the entire function.
-  unsigned FuncSize = 0;
-  for (MachineBasicBlock &MBB : Fn) {
-    unsigned BlockSize = 0;
-    for (MachineInstr &MI : MBB)
-      BlockSize += TII->GetInstSizeInBytes(MI);
-
-    BlockSizes[MBB.getNumber()] = BlockSize;
-    FuncSize += BlockSize;
+  MF->RenumberBlocks(FromBB);
+
+  MachineFunction::iterator Begin;
+  if (FromBB == nullptr) {
+    Begin = MF->begin();
+  } else {
+    Begin = FromBB->getIterator();
   }
 
-  // If the entire function is smaller than the displacement of a branch field,
-  // we know we don't need to shrink any branches in this function.  This is a
-  // common case.
-  if (FuncSize < (1 << 9)) {
-    BlockSizes.clear();
-    return false;
+  BlockOffsets.resize(MF->getNumBlockIDs());
+
+  unsigned TotalSize = BlockOffsets[Begin->getNumber()];
+  for (auto &MBB : make_range(Begin, MF->end())) {
+    BlockOffsets[MBB.getNumber()] = TotalSize;
+    for (MachineInstr &MI : MBB) {
+      TotalSize += TII->getInstSizeInBytes(MI);
+    }
   }
+  return TotalSize;
+}
 
+/// Do expand branches and split the basic blocks if necessary.
+/// Returns true if made any change.
+bool MSP430BSel::expandBranches(OffsetVector &BlockOffsets) {
   // For each conditional branch, if the offset to its destination is larger
   // than the offset field allows, transform it into a long branch sequence
   // like this:
@@ -93,91 +114,144 @@ bool MSP430BSel::runOnMachineFunction(MachineFunction &Fn) {
   //     b!CC $PC+6
   //     b MBB
   //
-  bool MadeChange = true;
-  bool EverMadeChange = false;
-  while (MadeChange) {
-    // Iteratively expand branches until we reach a fixed point.
-    MadeChange = false;
-
-    for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
-         ++MFI) {
-      MachineBasicBlock &MBB = *MFI;
-      unsigned MBBStartOffset = 0;
-      for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
-           I != E; ++I) {
-        if ((I->getOpcode() != MSP430::JCC || I->getOperand(0).isImm()) &&
-            I->getOpcode() != MSP430::JMP) {
-          MBBStartOffset += TII->GetInstSizeInBytes(*I);
-          continue;
-        }
+  bool MadeChange = false;
+  for (auto MBB = MF->begin(), E = MF->end(); MBB != E; ++MBB) {
+    unsigned MBBStartOffset = 0;
+    for (auto MI = MBB->begin(), EE = MBB->end(); MI != EE; ++MI) {
+      MBBStartOffset += TII->getInstSizeInBytes(*MI);
 
-        // Determine the offset from the current branch to the destination
-        // block.
-        MachineBasicBlock *Dest = I->getOperand(0).getMBB();
-
-        int BranchSize;
-        if (Dest->getNumber() <= MBB.getNumber()) {
-          // If this is a backwards branch, the delta is the offset from the
-          // start of this block to this branch, plus the sizes of all blocks
-          // from this block to the dest.
-          BranchSize = MBBStartOffset;
-
-          for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i];
-        } else {
-          // Otherwise, add the size of the blocks between this block and the
-          // dest to the number of bytes left in this block.
-          BranchSize = -MBBStartOffset;
-
-          for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i];
-        }
+      // If this instruction is not a short branch then skip it.
+      if (MI->getOpcode() != MSP430::JCC && MI->getOpcode() != MSP430::JMP) {
+        continue;
+      }
 
-        // If this branch is in range, ignore it.
-        if (isInt<10>(BranchSize)) {
-          MBBStartOffset += 2;
-          continue;
-        }
+      MachineBasicBlock *DestBB = MI->getOperand(0).getMBB();
+      // Determine the distance from the current branch to the destination
+      // block. MBBStartOffset already includes the size of the current branch
+      // instruction.
+      int BlockDistance =
+          BlockOffsets[DestBB->getNumber()] - BlockOffsets[MBB->getNumber()];
+      int BranchDistance = BlockDistance - MBBStartOffset;
+
+      // If this branch is in range, ignore it.
+      if (isInRage(BranchDistance)) {
+        continue;
+      }
+
+      DEBUG(dbgs() << "  Found a branch that needs expanding, BB#"
+                   << DestBB->getNumber() << ", Distance " << BranchDistance
+                   << "\n");
+
+      // If JCC is not the last instruction we need to split the MBB.
+      if (MI->getOpcode() == MSP430::JCC && std::next(MI) != EE) {
+
+        DEBUG(dbgs() << "  Found a basic block that needs to be split, BB#"
+                     << MBB->getNumber() << "\n");
+
+        // Create a new basic block.
+        MachineBasicBlock *NewBB =
+            MF->CreateMachineBasicBlock(MBB->getBasicBlock());
+        MF->insert(std::next(MBB), NewBB);
 
-        // Otherwise, we have to expand it to a long branch.
-        unsigned NewSize;
-        MachineInstr &OldBranch = *I;
-        DebugLoc dl = OldBranch.getDebugLoc();
-
-        if (I->getOpcode() == MSP430::JMP) {
-          NewSize = 4;
-        } else {
-          // The BCC operands are:
-          // 0. MSP430 branch predicate
-          // 1. Target MBB
-          SmallVector<MachineOperand, 1> Cond;
-          Cond.push_back(I->getOperand(1));
-
-          // Jump over the uncond branch inst (i.e. $+6) on opposite condition.
-          TII->ReverseBranchCondition(Cond);
-          BuildMI(MBB, I, dl, TII->get(MSP430::JCC))
-            .addImm(4).addOperand(Cond[0]);
-
-          NewSize = 6;
+        // Splice the instructions following MI over to the NewBB.
+        NewBB->splice(NewBB->end(), &*MBB, std::next(MI), MBB->end());
+
+        // Update the successor lists.
+        for (MachineBasicBlock *Succ : MBB->successors()) {
+          if (Succ == DestBB) {
+            continue;
+          }
+          MBB->replaceSuccessor(Succ, NewBB);
+          NewBB->addSuccessor(Succ);
         }
-        // Uncond branch to the real destination.
-        I = BuildMI(MBB, I, dl, TII->get(MSP430::Bi)).addMBB(Dest);
 
-        // Remove the old branch from the function.
-        OldBranch.eraseFromParent();
+        // We introduced a new MBB so all following blocks should be numbered
+        // and measured again.
+        measureFunction(BlockOffsets, &*MBB);
 
-        // Remember that this instruction is NewSize bytes, increase the size of the
-        // block by NewSize-2, remember to iterate.
-        BlockSizes[MBB.getNumber()] += NewSize-2;
-        MBBStartOffset += NewSize;
+        ++NumSplit;
 
-        ++NumExpanded;
-        MadeChange = true;
+        // It may be not necessary to start all over at this point, but it's
+        // safer do this anyway.
+        return true;
       }
+
+      MachineInstr &OldBranch = *MI;
+      DebugLoc dl = OldBranch.getDebugLoc();
+      int InstrSizeDiff = -TII->getInstSizeInBytes(OldBranch);
+
+      if (MI->getOpcode() == MSP430::JCC) {
+        MachineBasicBlock *NextMBB = &*std::next(MBB);
+        assert(MBB->isSuccessor(NextMBB) &&
+               "This block must have a layout successor!");
+
+        // The BCC operands are:
+        // 0. Target MBB
+        // 1. MSP430 branch predicate
+        SmallVector<MachineOperand, 1> Cond;
+        Cond.push_back(MI->getOperand(1));
+
+        // Jump over the long branch on the opposite condition
+        TII->reverseBranchCondition(Cond);
+        MI = BuildMI(*MBB, MI, dl, TII->get(MSP430::JCC))
+                             .addMBB(NextMBB)
+                             .addOperand(Cond[0]);
+        InstrSizeDiff += TII->getInstSizeInBytes(*MI);
+        ++MI;
+      }
+
+      // Unconditional branch to the real destination.
+      MI = BuildMI(*MBB, MI, dl, TII->get(MSP430::Bi)).addMBB(DestBB);
+      InstrSizeDiff += TII->getInstSizeInBytes(*MI);
+
+      // Remove the old branch from the function.
+      OldBranch.eraseFromParent();
+
+      // The size of a new instruction is different from the old one, so we need
+      // to correct all block offsets.
+      for (int i = MBB->getNumber() + 1, e = BlockOffsets.size(); i < e; ++i) {
+        BlockOffsets[i] += InstrSizeDiff;
+      }
+      MBBStartOffset += InstrSizeDiff;
+
+      ++NumExpanded;
+      MadeChange = true;
     }
-    EverMadeChange |= MadeChange;
   }
+  return MadeChange;
+}
+
+bool MSP430BSel::runOnMachineFunction(MachineFunction &mf) {
+  MF = &mf;
+  TII = static_cast<const MSP430InstrInfo *>(MF->getSubtarget().getInstrInfo());
+
+  // If the pass is disabled, just bail early.
+  if (!BranchSelectEnabled)
+    return false;
+
+  DEBUG(dbgs() << "\n********** " << getPassName() << " **********\n");
+
+  // BlockOffsets - Contains the distance from the beginning of the function to
+  // the beginning of each basic block.
+  OffsetVector BlockOffsets;
+
+  unsigned FunctionSize = measureFunction(BlockOffsets);
+  // If the entire function is smaller than the displacement of a branch field,
+  // we know we don't need to expand any branches in this
+  // function. This is a common case.
+  if (isInRage(FunctionSize)) {
+    return false;
+  }
+
+  // Iteratively expand branches until we reach a fixed point.
+  bool MadeChange = false;
+  while (expandBranches(BlockOffsets))
+    MadeChange = true;
+
+  return MadeChange;
+}
 
-  BlockSizes.clear();
-  return true;
+/// Returns an instance of the Branch Selection Pass
+FunctionPass *llvm::createMSP430BranchSelectionPass() {
+  return new MSP430BSel();
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
index 29555f99e23d..f1cb0b6c031b 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430FrameLowering.cpp
@@ -27,21 +27,21 @@
 using namespace llvm;
 
 bool MSP430FrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          MF.getFrameInfo()->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken());
+          MF.getFrameInfo().hasVarSizedObjects() ||
+          MFI.isFrameAddressTaken());
 }
 
 bool MSP430FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MF.getFrameInfo().hasVarSizedObjects();
 }
 
 void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
   const MSP430InstrInfo &TII =
       *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -50,7 +50,7 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
   DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
 
   // Get the number of bytes to allocate from the FrameInfo.
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
 
   uint64_t NumBytes = 0;
   if (hasFP(MF)) {
@@ -61,7 +61,7 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
     // Get the offset of the stack slot for the EBP register... which is
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
     // Update the frame offset adjustment.
-    MFI->setOffsetAdjustment(-NumBytes);
+    MFI.setOffsetAdjustment(-NumBytes);
 
     // Save FP into the appropriate stack slot...
     BuildMI(MBB, MBBI, DL, TII.get(MSP430::PUSH16r))
@@ -106,7 +106,7 @@ void MSP430FrameLowering::emitPrologue(MachineFunction &MF,
 
 void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   MSP430MachineFunctionInfo *MSP430FI = MF.getInfo<MSP430MachineFunctionInfo>();
   const MSP430InstrInfo &TII =
       *static_cast<const MSP430InstrInfo *>(MF.getSubtarget().getInstrInfo());
@@ -123,7 +123,7 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
   }
 
   // Get the number of bytes to allocate from the FrameInfo
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
   unsigned CSSize = MSP430FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
@@ -150,10 +150,10 @@ void MSP430FrameLowering::emitEpilogue(MachineFunction &MF,
 
   // If there is an ADD16ri or SUB16ri of SP immediately before this
   // instruction, merge the two instructions.
-  //if (NumBytes || MFI->hasVarSizedObjects())
+  //if (NumBytes || MFI.hasVarSizedObjects())
   //  mergeSPUpdatesUp(MBB, MBBI, StackPtr, &NumBytes);
 
-  if (MFI->hasVarSizedObjects()) {
+  if (MFI.hasVarSizedObjects()) {
     BuildMI(MBB, MBBI, DL,
             TII.get(MSP430::MOV16rr), MSP430::SP).addReg(MSP430::FP);
     if (CSSize) {
@@ -293,9 +293,9 @@ MSP430FrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                                          RegScavenger *) const {
   // Create a frame entry for the FP register that must be saved.
   if (hasFP(MF)) {
-    int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true);
+    int FrameIdx = MF.getFrameInfo().CreateFixedObject(2, -4, true);
     (void)FrameIdx;
-    assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
+    assert(FrameIdx == MF.getFrameInfo().getObjectIndexBegin() &&
            "Slot for FP register must be last in order to be found!");
   }
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index 69c609d04b5e..6e481b68e038 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -95,7 +95,7 @@ namespace {
     MSP430DAGToDAGISel(MSP430TargetMachine &TM, CodeGenOpt::Level OptLevel)
         : SelectionDAGISel(TM, OptLevel) {}
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "MSP430 DAG->DAG Pattern Instruction Selection";
     }
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
index cb2c62029454..73346b9ce41d 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -54,8 +54,7 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden,
              clEnumValN(HWMultIntr, "interrupts",
                 "Assume hardware multiplier can be used inside interrupts"),
              clEnumValN(HWMultNoIntr, "use",
-                "Assume hardware multiplier cannot be used inside interrupts"),
-             clEnumValEnd));
+                "Assume hardware multiplier cannot be used inside interrupts")));
 
 MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM,
                                            const MSP430Subtarget &STI)
@@ -413,7 +412,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
 
@@ -426,7 +425,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
   // Create frame index for the start of the first vararg value
   if (isVarArg) {
     unsigned Offset = CCInfo.getNextStackOffset();
-    FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, Offset, true));
+    FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, Offset, true));
   }
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -471,8 +470,8 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
       ISD::ArgFlagsTy Flags = Ins[i].Flags;
 
       if (Flags.isByVal()) {
-        int FI = MFI->CreateFixedObject(Flags.getByValSize(),
-                                        VA.getLocMemOffset(), true);
+        int FI = MFI.CreateFixedObject(Flags.getByValSize(),
+                                       VA.getLocMemOffset(), true);
         InVal = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
       } else {
         // Load the argument to a virtual register
@@ -483,7 +482,7 @@ SDValue MSP430TargetLowering::LowerCCCArguments(
                 << "\n";
         }
         // Create the frame index object for this incoming parameter...
-        int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
+        int FI = MFI.CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
 
         // Create the SelectionDAG nodes corresponding to a load
         //from this parameter
@@ -807,7 +806,8 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
       std::swap(LHS, RHS);
     break;
   case ISD::SETULE:
-    std::swap(LHS, RHS);        // FALLTHROUGH
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
   case ISD::SETUGE:
     // Turn lhs u>= rhs with lhs constant into rhs u< lhs+1, this allows us to
     // fold constant into instruction.
@@ -820,7 +820,8 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     TCC = MSP430CC::COND_HS;    // aka COND_C
     break;
   case ISD::SETUGT:
-    std::swap(LHS, RHS);        // FALLTHROUGH
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
   case ISD::SETULT:
     // Turn lhs u< rhs with lhs constant into rhs u>= lhs+1, this allows us to
     // fold constant into instruction.
@@ -833,7 +834,8 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     TCC = MSP430CC::COND_LO;    // aka COND_NC
     break;
   case ISD::SETLE:
-    std::swap(LHS, RHS);        // FALLTHROUGH
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
   case ISD::SETGE:
     // Turn lhs >= rhs with lhs constant into rhs < lhs+1, this allows us to
     // fold constant into instruction.
@@ -846,7 +848,8 @@ static SDValue EmitCMP(SDValue &LHS, SDValue &RHS, SDValue &TargetCC,
     TCC = MSP430CC::COND_GE;
     break;
   case ISD::SETGT:
-    std::swap(LHS, RHS);        // FALLTHROUGH
+    std::swap(LHS, RHS);
+    LLVM_FALLTHROUGH;
   case ISD::SETLT:
     // Turn lhs < rhs with lhs constant into rhs >= lhs+1, this allows us to
     // fold constant into instruction.
@@ -997,7 +1000,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
     uint64_t SlotSize = MF.getDataLayout().getPointerSize();
-    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+    ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, -SlotSize,
                                                            true);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
@@ -1007,8 +1010,8 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
 
 SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
                                               SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -1034,8 +1037,8 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op,
 
 SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op,
                                              SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc dl(Op);  // FIXME probably not meaningful
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
index c834da3a11cd..6135ce080920 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -42,7 +42,7 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FrameIdx),
@@ -69,7 +69,7 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   DebugLoc DL;
   if (MI != MBB.end()) DL = MI->getDebugLoc();
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FrameIdx),
@@ -104,7 +104,10 @@ void MSP430InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     .addReg(SrcReg, getKillRegState(KillSrc));
 }
 
-unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned MSP430InstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                       int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
 
@@ -127,7 +130,7 @@ unsigned MSP430InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 }
 
 bool MSP430InstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1 && "Invalid Xbranch condition!");
 
   MSP430CC::CondCodes CC = static_cast<MSP430CC::CondCodes>(Cond[0].getImm());
@@ -260,15 +263,17 @@ bool MSP430InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned MSP430InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                        MachineBasicBlock *TBB,
                                        MachineBasicBlock *FBB,
                                        ArrayRef<MachineOperand> Cond,
-                                       const DebugLoc &DL) const {
+                                       const DebugLoc &DL,
+                                       int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
          "MSP430 branch conditions have one component!");
+  assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
     // Unconditional branch?
@@ -293,7 +298,7 @@ unsigned MSP430InstrInfo::InsertBranch(MachineBasicBlock &MBB,
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 ///
-unsigned MSP430InstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned MSP430InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   const MCInstrDesc &Desc = MI.getDesc();
 
   switch (Desc.TSFlags & MSP430II::SizeMask) {
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
index 46d4738d89af..e3259bd6a7bc 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h
@@ -68,21 +68,23 @@ public:
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const override;
 
-  unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   // Branch folding goodness
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
   bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 2fb82e535e8d..81cd9d1ad3f8 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -114,13 +114,13 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   unsigned BasePtr = (TFI->hasFP(MF) ? MSP430::FP : MSP430::SP);
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
 
   // Skip the saved PC
   Offset += 2;
 
   if (!TFI->hasFP(MF))
-    Offset += MF.getFrameInfo()->getStackSize();
+    Offset += MF.getFrameInfo().getStackSize();
   else
     Offset += 2; // Skip the saved FP
 
diff --git a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
index b2e698ca5548..bebe5fa35ad4 100644
--- a/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -23,7 +23,7 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeMSP430Target() {
   // Register the target.
-  RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target);
+  RegisterTargetMachine<MSP430TargetMachine> X(getTheMSP430Target());
 }
 
 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
diff --git a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
index 0d71d04ebe22..62f52a193674 100644
--- a/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp
@@ -12,9 +12,12 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheMSP430Target;
+Target &llvm::getTheMSP430Target() {
+  static Target TheMSP430Target;
+  return TheMSP430Target;
+}
 
-extern "C" void LLVMInitializeMSP430TargetInfo() { 
-  RegisterTarget<Triple::msp430> 
-    X(TheMSP430Target, "msp430", "MSP430 [experimental]");
+extern "C" void LLVMInitializeMSP430TargetInfo() {
+  RegisterTarget<Triple::msp430> X(getTheMSP430Target(), "msp430",
+                                   "MSP430 [experimental]");
 }
diff --git a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
index b51d0200b0b1..97ca11ca501e 100644
--- a/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Mips/AsmParser/MipsAsmParser.cpp
@@ -13,6 +13,7 @@
 #include "MipsRegisterInfo.h"
 #include "MipsTargetObjectFile.h"
 #include "MipsTargetStreamer.h"
+#include "MCTargetDesc/MipsBaseInfo.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/MC/MCContext.h"
@@ -129,6 +130,9 @@ class MipsAsmParser : public MCTargetAsmParser {
 #define GET_ASSEMBLER_HEADER
 #include "MipsGenAsmMatcher.inc"
 
+  unsigned
+  checkEarlyTargetMatchPredicate(MCInst &Inst,
+                                 const OperandVector &Operands) override;
   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
 
   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -230,7 +234,10 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc, MCStreamer &Out,
                  const MCSubtargetInfo *STI);
 
-  bool expandUlw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+  bool expandUsh(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                 const MCSubtargetInfo *STI);
+
+  bool expandUxw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                  const MCSubtargetInfo *STI);
 
   bool expandRotation(MCInst &Inst, SMLoc IDLoc,
@@ -245,13 +252,19 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool expandAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                  const MCSubtargetInfo *STI);
 
+  bool expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                             const MCSubtargetInfo *STI, bool IsLoad);
+
+  bool expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                 const MCSubtargetInfo *STI);
+
+  bool expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                  const MCSubtargetInfo *STI);
+
   bool reportParseError(Twine ErrorMsg);
   bool reportParseError(SMLoc Loc, Twine ErrorMsg);
 
   bool parseMemOffset(const MCExpr *&Res, bool isParenExpr);
-  bool parseRelocOperand(const MCExpr *&Res);
-
-  const MCExpr *evaluateRelocExpr(const MCExpr *Expr, StringRef RelocStr);
 
   bool isEvaluated(const MCExpr *Expr);
   bool parseSetMips0Directive();
@@ -292,6 +305,10 @@ class MipsAsmParser : public MCTargetAsmParser {
   bool parseDataDirective(unsigned Size, SMLoc L);
   bool parseDirectiveGpWord();
   bool parseDirectiveGpDWord();
+  bool parseDirectiveDtpRelWord();
+  bool parseDirectiveDtpRelDWord();
+  bool parseDirectiveTpRelWord();
+  bool parseDirectiveTpRelDWord();
   bool parseDirectiveModule();
   bool parseDirectiveModuleFP();
   bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI,
@@ -395,6 +412,8 @@ public:
     Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY,
     Match_RequiresDifferentOperands,
     Match_RequiresNoZeroRegister,
+    Match_RequiresSameSrcAndDst,
+    Match_NonZeroOperandForSync,
 #define GET_OPERAND_DIAGNOSTIC_TYPES
 #include "MipsGenAsmMatcher.inc"
 #undef GET_OPERAND_DIAGNOSTIC_TYPES
@@ -548,6 +567,64 @@ public:
   void warnIfNoMacro(SMLoc Loc);
 
   bool isLittle() const { return IsLittleEndian; }
+
+  const MCExpr *createTargetUnaryExpr(const MCExpr *E,
+                                      AsmToken::TokenKind OperatorToken,
+                                      MCContext &Ctx) override {
+    switch(OperatorToken) {
+    default:
+      llvm_unreachable("Unknown token");
+      return nullptr;
+    case AsmToken::PercentCall16:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT_CALL, E, Ctx);
+    case AsmToken::PercentCall_Hi:
+      return MipsMCExpr::create(MipsMCExpr::MEK_CALL_HI16, E, Ctx);
+    case AsmToken::PercentCall_Lo:
+      return MipsMCExpr::create(MipsMCExpr::MEK_CALL_LO16, E, Ctx);
+    case AsmToken::PercentDtprel_Hi:
+      return MipsMCExpr::create(MipsMCExpr::MEK_DTPREL_HI, E, Ctx);
+    case AsmToken::PercentDtprel_Lo:
+      return MipsMCExpr::create(MipsMCExpr::MEK_DTPREL_LO, E, Ctx);
+    case AsmToken::PercentGot:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT, E, Ctx);
+    case AsmToken::PercentGot_Disp:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT_DISP, E, Ctx);
+    case AsmToken::PercentGot_Hi:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT_HI16, E, Ctx);
+    case AsmToken::PercentGot_Lo:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT_LO16, E, Ctx);
+    case AsmToken::PercentGot_Ofst:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT_OFST, E, Ctx);
+    case AsmToken::PercentGot_Page:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOT_PAGE, E, Ctx);
+    case AsmToken::PercentGottprel:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GOTTPREL, E, Ctx);
+    case AsmToken::PercentGp_Rel:
+      return MipsMCExpr::create(MipsMCExpr::MEK_GPREL, E, Ctx);
+    case AsmToken::PercentHi:
+      return MipsMCExpr::create(MipsMCExpr::MEK_HI, E, Ctx);
+    case AsmToken::PercentHigher:
+      return MipsMCExpr::create(MipsMCExpr::MEK_HIGHER, E, Ctx);
+    case AsmToken::PercentHighest:
+      return MipsMCExpr::create(MipsMCExpr::MEK_HIGHEST, E, Ctx);
+    case AsmToken::PercentLo:
+      return MipsMCExpr::create(MipsMCExpr::MEK_LO, E, Ctx);
+    case AsmToken::PercentNeg:
+      return MipsMCExpr::create(MipsMCExpr::MEK_NEG, E, Ctx);
+    case AsmToken::PercentPcrel_Hi:
+      return MipsMCExpr::create(MipsMCExpr::MEK_PCREL_HI16, E, Ctx);
+    case AsmToken::PercentPcrel_Lo:
+      return MipsMCExpr::create(MipsMCExpr::MEK_PCREL_LO16, E, Ctx);
+    case AsmToken::PercentTlsgd:
+      return MipsMCExpr::create(MipsMCExpr::MEK_TLSGD, E, Ctx);
+    case AsmToken::PercentTlsldm:
+      return MipsMCExpr::create(MipsMCExpr::MEK_TLSLDM, E, Ctx);
+    case AsmToken::PercentTprel_Hi:
+      return MipsMCExpr::create(MipsMCExpr::MEK_TPREL_HI, E, Ctx);
+    case AsmToken::PercentTprel_Lo:
+      return MipsMCExpr::create(MipsMCExpr::MEK_TPREL_LO, E, Ctx);
+    }
+  }
 };
 }
 
@@ -605,6 +682,7 @@ private:
   struct RegIdxOp {
     unsigned Index; /// Index into the register class
     RegKind Kind;   /// Bitfield of the kinds it could possibly be
+    struct Token Tok; /// The input token this operand originated from.
     const MCRegisterInfo *RegInfo;
   };
 
@@ -632,7 +710,8 @@ private:
   SMLoc StartLoc, EndLoc;
 
   /// Internal constructor for register kinds
-  static std::unique_ptr<MipsOperand> CreateReg(unsigned Index, RegKind RegKind,
+  static std::unique_ptr<MipsOperand> CreateReg(unsigned Index, StringRef Str,
+                                                RegKind RegKind,
                                                 const MCRegisterInfo *RegInfo,
                                                 SMLoc S, SMLoc E,
                                                 MipsAsmParser &Parser) {
@@ -640,6 +719,8 @@ private:
     Op->RegIdx.Index = Index;
     Op->RegIdx.RegInfo = RegInfo;
     Op->RegIdx.Kind = RegKind;
+    Op->RegIdx.Tok.Data = Str.data();
+    Op->RegIdx.Tok.Length = Str.size();
     Op->StartLoc = S;
     Op->EndLoc = E;
     return Op;
@@ -856,9 +937,11 @@ public:
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(getFGR32Reg()));
     // FIXME: We ought to do this for -integrated-as without -via-file-asm too.
+    // FIXME: This should propagate failure up to parseStatement.
     if (!AsmParser.useOddSPReg() && RegIdx.Index & 1)
-      AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
-                                "registers");
+      AsmParser.getParser().printError(
+          StartLoc, "-mno-odd-spreg prohibits the use of odd FPU "
+                    "registers");
   }
 
   void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const {
@@ -925,7 +1008,7 @@ public:
   void addConstantUImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     uint64_t Imm = getConstantImm() - Offset;
-    Imm &= (1 << Bits) - 1;
+    Imm &= (1ULL << Bits) - 1;
     Imm += Offset;
     Imm += AdjustOffset;
     Inst.addOperand(MCOperand::createImm(Imm));
@@ -1023,7 +1106,8 @@ public:
   bool isRegIdx() const { return Kind == k_RegisterIndex; }
   bool isImm() const override { return Kind == k_Immediate; }
   bool isConstantImm() const {
-    return isImm() && isa<MCConstantExpr>(getImm());
+    int64_t Res;
+    return isImm() && getImm()->evaluateAsAbsolute(Res);
   }
   bool isConstantImmz() const {
     return isConstantImm() && getConstantImm() == 0;
@@ -1099,8 +1183,14 @@ public:
   }
   template <unsigned Bits, unsigned ShiftLeftAmount>
   bool isScaledSImm() const {
-    return isConstantImm() &&
-           isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm());
+    if (isConstantImm() && isShiftedInt<Bits, ShiftLeftAmount>(getConstantImm()))
+      return true;
+    // Operand can also be a symbol or symbol plus offset in case of relocations.
+    if (Kind != k_Immediate)
+      return false;
+    MCValue Res;
+    bool Success = getImm()->evaluateAsRelocatable(Res, nullptr, nullptr);
+    return Success && isShiftedInt<Bits, ShiftLeftAmount>(Res.getConstant());
   }
   bool isRegList16() const {
     if (!isRegList())
@@ -1188,7 +1278,9 @@ public:
 
   int64_t getConstantImm() const {
     const MCExpr *Val = getImm();
-    return static_cast<const MCConstantExpr *>(Val)->getValue();
+    int64_t Value = 0;
+    (void)Val->evaluateAsAbsolute(Value);
+    return Value;
   }
 
   MipsOperand *getMemBase() const {
@@ -1228,66 +1320,66 @@ public:
   /// Create a numeric register (e.g. $1). The exact register remains
   /// unresolved until an instruction successfully matches
   static std::unique_ptr<MipsOperand>
-  createNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
-                   SMLoc E, MipsAsmParser &Parser) {
+  createNumericReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+                   SMLoc S, SMLoc E, MipsAsmParser &Parser) {
     DEBUG(dbgs() << "createNumericReg(" << Index << ", ...)\n");
-    return CreateReg(Index, RegKind_Numeric, RegInfo, S, E, Parser);
+    return CreateReg(Index, Str, RegKind_Numeric, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely a GPR.
   /// This is typically only used for named registers such as $gp.
   static std::unique_ptr<MipsOperand>
-  createGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
-               MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_GPR, RegInfo, S, E, Parser);
+  createGPRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+               SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_GPR, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely a FGR.
   /// This is typically only used for named registers such as $f0.
   static std::unique_ptr<MipsOperand>
-  createFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
-               MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_FGR, RegInfo, S, E, Parser);
+  createFGRReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+               SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_FGR, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely a HWReg.
   /// This is typically only used for named registers such as $hwr_cpunum.
   static std::unique_ptr<MipsOperand>
-  createHWRegsReg(unsigned Index, const MCRegisterInfo *RegInfo,
+  createHWRegsReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
                   SMLoc S, SMLoc E, MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_HWRegs, RegInfo, S, E, Parser);
+    return CreateReg(Index, Str, RegKind_HWRegs, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely an FCC.
   /// This is typically only used for named registers such as $fcc0.
   static std::unique_ptr<MipsOperand>
-  createFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
-               MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_FCC, RegInfo, S, E, Parser);
+  createFCCReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+               SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_FCC, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely an ACC.
   /// This is typically only used for named registers such as $ac0.
   static std::unique_ptr<MipsOperand>
-  createACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E,
-               MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_ACC, RegInfo, S, E, Parser);
+  createACCReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+               SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_ACC, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely an MSA128.
   /// This is typically only used for named registers such as $w0.
   static std::unique_ptr<MipsOperand>
-  createMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
-                  SMLoc E, MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_MSA128, RegInfo, S, E, Parser);
+  createMSA128Reg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+                  SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_MSA128, RegInfo, S, E, Parser);
   }
 
   /// Create a register that is definitely an MSACtrl.
   /// This is typically only used for named registers such as $msaaccess.
   static std::unique_ptr<MipsOperand>
-  createMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S,
-                   SMLoc E, MipsAsmParser &Parser) {
-    return CreateReg(Index, RegKind_MSACtrl, RegInfo, S, E, Parser);
+  createMSACtrlReg(unsigned Index, StringRef Str, const MCRegisterInfo *RegInfo,
+                   SMLoc S, SMLoc E, MipsAsmParser &Parser) {
+    return CreateReg(Index, Str, RegKind_MSACtrl, RegInfo, S, E, Parser);
   }
 
   static std::unique_ptr<MipsOperand>
@@ -1428,10 +1520,11 @@ public:
       OS << ">";
       break;
     case k_RegisterIndex:
-      OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ">";
+      OS << "RegIdx<" << RegIdx.Index << ":" << RegIdx.Kind << ", "
+         << StringRef(RegIdx.Tok.Data, RegIdx.Tok.Length) << ">";
       break;
     case k_Token:
-      OS << Tok.Data;
+      OS << getToken();
       break;
     case k_RegList:
       OS << "RegList< ";
@@ -1444,6 +1537,22 @@ public:
       break;
     }
   }
+
+  bool isValidForTie(const MipsOperand &Other) const {
+    if (Kind != Other.Kind)
+      return false;
+
+    switch (Kind) {
+    default:
+      llvm_unreachable("Unexpected kind");
+      return false;
+    case k_RegisterIndex: {
+      StringRef Token(RegIdx.Tok.Data, RegIdx.Tok.Length);
+      StringRef OtherToken(Other.RegIdx.Tok.Data, Other.RegIdx.Tok.Length);
+      return Token == OtherToken;
+    }
+    }
+  }
 }; // class MipsOperand
 } // namespace
 
@@ -1526,7 +1635,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     case Mips::BBIT1:
     case Mips::BBIT132:
       assert(hasCnMips() && "instruction only valid for octeon cpus");
-      // Fall through
+      LLVM_FALLTHROUGH;
 
     case Mips::BEQ:
     case Mips::BNE:
@@ -1572,6 +1681,45 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
                             1LL << (inMicroMipsMode() ? 1 : 2)))
         return Error(IDLoc, "branch to misaligned address");
       break;
+    case Mips::BGEC:    case Mips::BGEC_MMR6:
+    case Mips::BLTC:    case Mips::BLTC_MMR6:
+    case Mips::BGEUC:   case Mips::BGEUC_MMR6:
+    case Mips::BLTUC:   case Mips::BLTUC_MMR6:
+    case Mips::BEQC:    case Mips::BEQC_MMR6:
+    case Mips::BNEC:    case Mips::BNEC_MMR6:
+      assert(MCID.getNumOperands() == 3 && "unexpected number of operands");
+      Offset = Inst.getOperand(2);
+      if (!Offset.isImm())
+        break; // We'll deal with this situation later on when applying fixups.
+      if (!isIntN(18, Offset.getImm()))
+        return Error(IDLoc, "branch target out of range");
+      if (OffsetToAlignment(Offset.getImm(), 1LL << 2))
+        return Error(IDLoc, "branch to misaligned address");
+      break;
+    case Mips::BLEZC:   case Mips::BLEZC_MMR6:
+    case Mips::BGEZC:   case Mips::BGEZC_MMR6:
+    case Mips::BGTZC:   case Mips::BGTZC_MMR6:
+    case Mips::BLTZC:   case Mips::BLTZC_MMR6:
+      assert(MCID.getNumOperands() == 2 && "unexpected number of operands");
+      Offset = Inst.getOperand(1);
+      if (!Offset.isImm())
+        break; // We'll deal with this situation later on when applying fixups.
+      if (!isIntN(18, Offset.getImm()))
+        return Error(IDLoc, "branch target out of range");
+      if (OffsetToAlignment(Offset.getImm(), 1LL << 2))
+        return Error(IDLoc, "branch to misaligned address");
+      break;
+    case Mips::BEQZC:   case Mips::BEQZC_MMR6:
+    case Mips::BNEZC:   case Mips::BNEZC_MMR6:
+      assert(MCID.getNumOperands() == 2 && "unexpected number of operands");
+      Offset = Inst.getOperand(1);
+      if (!Offset.isImm())
+        break; // We'll deal with this situation later on when applying fixups.
+      if (!isIntN(23, Offset.getImm()))
+        return Error(IDLoc, "branch target out of range");
+      if (OffsetToAlignment(Offset.getImm(), 1LL << 2))
+        return Error(IDLoc, "branch to misaligned address");
+      break;
     case Mips::BEQZ16_MM:
     case Mips::BEQZC16_MMR6:
     case Mips::BNEZ16_MM:
@@ -1638,6 +1786,17 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     }
   }
 
+  // For PIC code convert unconditional jump to unconditional branch.
+  if ((Inst.getOpcode() == Mips::J || Inst.getOpcode() == Mips::J_MM) &&
+      inPicMode()) {
+    MCInst BInst;
+    BInst.setOpcode(inMicroMipsMode() ? Mips::BEQ_MM : Mips::BEQ);
+    BInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    BInst.addOperand(MCOperand::createReg(Mips::ZERO));
+    BInst.addOperand(Inst.getOperand(0));
+    Inst = BInst;
+  }
+
   // This expansion is not in a function called by tryExpandInstruction()
   // because the pseudo-instruction doesn't have a distinct opcode.
   if ((Inst.getOpcode() == Mips::JAL || Inst.getOpcode() == Mips::JAL_MM) &&
@@ -1658,7 +1817,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     // FIXME: Add support for label+offset operands (currently causes an error).
     // FIXME: Add support for forward-declared local symbols.
     // FIXME: Add expansion for when the LargeGOT option is enabled.
-    if (JalSym->isInSection() || JalSym->isTemporary()) {
+    if (JalSym->isInSection() || JalSym->isTemporary() ||
+        (JalSym->isELF() && cast<MCSymbolELF>(JalSym)->getBinding() == ELF::STB_LOCAL)) {
       if (isABI_O32()) {
         // If it's a local symbol and the O32 ABI is being used, we expand to:
         //  lw $25, 0($gp)
@@ -1716,7 +1876,8 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
     ExpandedJalSym = true;
   }
 
-  if (MCID.mayLoad() || MCID.mayStore()) {
+  bool IsPCRelativeLoad = (MCID.TSFlags & MipsII::IsPCRelativeLoad) != 0;
+  if ((MCID.mayLoad() || MCID.mayStore()) && !IsPCRelativeLoad) {
     // Check the offset of memory operand, if it is a symbol
     // reference or immediate we may have to expand instructions.
     for (unsigned i = 0; i < MCID.getNumOperands(); i++) {
@@ -1729,7 +1890,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
           if (MemOffset < -32768 || MemOffset > 32767) {
             // Offset can't exceed 16bit value.
             expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad(), true);
-            return false;
+            return getParser().hasPendingError();
           }
         } else if (Op.isExpr()) {
           const MCExpr *Expr = Op.getExpr();
@@ -1739,11 +1900,11 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc,
             if (SR->getKind() == MCSymbolRefExpr::VK_None) {
               // Expand symbol.
               expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad(), false);
-              return false;
+              return getParser().hasPendingError();
             }
           } else if (!isEvaluated(Expr)) {
             expandMemInst(Inst, IDLoc, Out, STI, MCID.mayLoad(), false);
-            return false;
+            return getParser().hasPendingError();
           }
         }
       }
@@ -2034,8 +2195,11 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
     return expandUlh(Inst, true, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::Ulhu:
     return expandUlh(Inst, false, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::Ush:
+    return expandUsh(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::Ulw:
-    return expandUlw(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::Usw:
+    return expandUxw(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::NORImm:
     return expandAliasImmediate(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::ADDi:
@@ -2077,6 +2241,16 @@ MipsAsmParser::tryExpandInstruction(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
     return expandDRotationImm(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   case Mips::ABSMacro:
     return expandAbs(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::LDMacro:
+  case Mips::SDMacro:
+    return expandLoadStoreDMacro(Inst, IDLoc, Out, STI,
+                                 Inst.getOpcode() == Mips::LDMacro)
+               ? MER_Fail
+               : MER_Success;
+  case Mips::SEQMacro:
+    return expandSeq(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
+  case Mips::SEQIMacro:
+    return expandSeqI(Inst, IDLoc, Out, STI) ? MER_Fail : MER_Success;
   }
 }
 
@@ -2335,6 +2509,7 @@ bool MipsAsmParser::expandLoadAddress(unsigned DstReg, unsigned BaseReg,
     Error(IDLoc, "la used to load 64-bit address");
     // Continue as if we had 'dla' instead.
     Is32BitAddress = false;
+    return true;
   }
 
   // dla requires 64-bit addresses.
@@ -2561,9 +2736,9 @@ bool MipsAsmParser::expandUncondBranchMMPseudo(MCInst &Inst, SMLoc IDLoc,
         Inst.setOpcode(hasMips32r6() ? Mips::BC16_MMR6 : Mips::B16_MM);
     } else {
       if (!isInt<17>(Offset.getImm()))
-        Error(IDLoc, "branch target out of range");
+        return Error(IDLoc, "branch target out of range");
       if (OffsetToAlignment(Offset.getImm(), 1LL << 1))
-        Error(IDLoc, "branch to misaligned address");
+        return Error(IDLoc, "branch to misaligned address");
       Inst.clear();
       Inst.setOpcode(Mips::BEQ_MM);
       Inst.addOperand(MCOperand::createReg(Mips::ZERO));
@@ -3168,146 +3343,158 @@ bool MipsAsmParser::expandTrunc(MCInst &Inst, bool IsDouble, bool Is64FPU,
 
 bool MipsAsmParser::expandUlh(MCInst &Inst, bool Signed, SMLoc IDLoc,
                               MCStreamer &Out, const MCSubtargetInfo *STI) {
-  MipsTargetStreamer &TOut = getTargetStreamer();
-
   if (hasMips32r6() || hasMips64r6()) {
-    Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
-    return false;
+    return Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
   }
 
-  warnIfNoMacro(IDLoc);
-
   const MCOperand &DstRegOp = Inst.getOperand(0);
   assert(DstRegOp.isReg() && "expected register operand kind");
-
   const MCOperand &SrcRegOp = Inst.getOperand(1);
   assert(SrcRegOp.isReg() && "expected register operand kind");
-
   const MCOperand &OffsetImmOp = Inst.getOperand(2);
   assert(OffsetImmOp.isImm() && "expected immediate operand kind");
 
+  MipsTargetStreamer &TOut = getTargetStreamer();
   unsigned DstReg = DstRegOp.getReg();
   unsigned SrcReg = SrcRegOp.getReg();
   int64_t OffsetValue = OffsetImmOp.getImm();
 
   // NOTE: We always need AT for ULHU, as it is always used as the source
   // register for one of the LBu's.
+  warnIfNoMacro(IDLoc);
   unsigned ATReg = getATReg(IDLoc);
   if (!ATReg)
     return true;
 
-  // When the value of offset+1 does not fit in 16 bits, we have to load the
-  // offset in AT, (D)ADDu the original source register (if there was one), and
-  // then use AT as the source register for the 2 generated LBu's.
-  bool LoadedOffsetInAT = false;
-  if (!isInt<16>(OffsetValue + 1) || !isInt<16>(OffsetValue)) {
-    LoadedOffsetInAT = true;
-
-    if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
-                      true, IDLoc, Out, STI))
+  bool IsLargeOffset = !(isInt<16>(OffsetValue + 1) && isInt<16>(OffsetValue));
+  if (IsLargeOffset) {
+    if (loadImmediate(OffsetValue, ATReg, SrcReg, !ABI.ArePtrs64bit(), true,
+                      IDLoc, Out, STI))
       return true;
-
-    // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
-    // because it will make our output more similar to GAS'. For example,
-    // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9",
-    // instead of just an "ori $1, $9, 32768".
-    // NOTE: If there is no source register specified in the ULHU, the parser
-    // will interpret it as $0.
-    if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64)
-      TOut.emitAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), STI);
   }
 
-  unsigned FirstLbuDstReg = LoadedOffsetInAT ? DstReg : ATReg;
-  unsigned SecondLbuDstReg = LoadedOffsetInAT ? ATReg : DstReg;
-  unsigned LbuSrcReg = LoadedOffsetInAT ? ATReg : SrcReg;
+  int64_t FirstOffset = IsLargeOffset ? 0 : OffsetValue;
+  int64_t SecondOffset = IsLargeOffset ? 1 : (OffsetValue + 1);
+  if (isLittle())
+    std::swap(FirstOffset, SecondOffset);
 
-  int64_t FirstLbuOffset = 0, SecondLbuOffset = 0;
-  if (isLittle()) {
-    FirstLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1);
-    SecondLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue;
-  } else {
-    FirstLbuOffset = LoadedOffsetInAT ? 0 : OffsetValue;
-    SecondLbuOffset = LoadedOffsetInAT ? 1 : (OffsetValue + 1);
-  }
+  unsigned FirstLbuDstReg = IsLargeOffset ? DstReg : ATReg;
+  unsigned SecondLbuDstReg = IsLargeOffset ? ATReg : DstReg;
 
-  unsigned SllReg = LoadedOffsetInAT ? DstReg : ATReg;
+  unsigned LbuSrcReg = IsLargeOffset ? ATReg : SrcReg;
+  unsigned SllReg = IsLargeOffset ? DstReg : ATReg;
 
   TOut.emitRRI(Signed ? Mips::LB : Mips::LBu, FirstLbuDstReg, LbuSrcReg,
-               FirstLbuOffset, IDLoc, STI);
-
-  TOut.emitRRI(Mips::LBu, SecondLbuDstReg, LbuSrcReg, SecondLbuOffset, IDLoc,
-               STI);
-
+               FirstOffset, IDLoc, STI);
+  TOut.emitRRI(Mips::LBu, SecondLbuDstReg, LbuSrcReg, SecondOffset, IDLoc, STI);
   TOut.emitRRI(Mips::SLL, SllReg, SllReg, 8, IDLoc, STI);
-
   TOut.emitRRR(Mips::OR, DstReg, DstReg, ATReg, IDLoc, STI);
 
   return false;
 }
 
-bool MipsAsmParser::expandUlw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+bool MipsAsmParser::expandUsh(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
                               const MCSubtargetInfo *STI) {
+  if (hasMips32r6() || hasMips64r6()) {
+    return Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
+  }
+
+  const MCOperand &DstRegOp = Inst.getOperand(0);
+  assert(DstRegOp.isReg() && "expected register operand kind");
+  const MCOperand &SrcRegOp = Inst.getOperand(1);
+  assert(SrcRegOp.isReg() && "expected register operand kind");
+  const MCOperand &OffsetImmOp = Inst.getOperand(2);
+  assert(OffsetImmOp.isImm() && "expected immediate operand kind");
+
   MipsTargetStreamer &TOut = getTargetStreamer();
+  unsigned DstReg = DstRegOp.getReg();
+  unsigned SrcReg = SrcRegOp.getReg();
+  int64_t OffsetValue = OffsetImmOp.getImm();
+
+  warnIfNoMacro(IDLoc);
+  unsigned ATReg = getATReg(IDLoc);
+  if (!ATReg)
+    return true;
+
+  bool IsLargeOffset = !(isInt<16>(OffsetValue + 1) && isInt<16>(OffsetValue));
+  if (IsLargeOffset) {
+    if (loadImmediate(OffsetValue, ATReg, SrcReg, !ABI.ArePtrs64bit(), true,
+                      IDLoc, Out, STI))
+      return true;
+  }
+
+  int64_t FirstOffset = IsLargeOffset ? 1 : (OffsetValue + 1);
+  int64_t SecondOffset = IsLargeOffset ? 0 : OffsetValue;
+  if (isLittle())
+    std::swap(FirstOffset, SecondOffset);
 
+  if (IsLargeOffset) {
+    TOut.emitRRI(Mips::SB, DstReg, ATReg, FirstOffset, IDLoc, STI);
+    TOut.emitRRI(Mips::SRL, DstReg, DstReg, 8, IDLoc, STI);
+    TOut.emitRRI(Mips::SB, DstReg, ATReg, SecondOffset, IDLoc, STI);
+    TOut.emitRRI(Mips::LBu, ATReg, ATReg, 0, IDLoc, STI);
+    TOut.emitRRI(Mips::SLL, DstReg, DstReg, 8, IDLoc, STI);
+    TOut.emitRRR(Mips::OR, DstReg, DstReg, ATReg, IDLoc, STI);
+  } else {
+    TOut.emitRRI(Mips::SB, DstReg, SrcReg, FirstOffset, IDLoc, STI);
+    TOut.emitRRI(Mips::SRL, ATReg, DstReg, 8, IDLoc, STI);
+    TOut.emitRRI(Mips::SB, ATReg, SrcReg, SecondOffset, IDLoc, STI);
+  }
+
+  return false;
+}
+
+bool MipsAsmParser::expandUxw(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                              const MCSubtargetInfo *STI) {
   if (hasMips32r6() || hasMips64r6()) {
-    Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
-    return false;
+    return Error(IDLoc, "instruction not supported on mips32r6 or mips64r6");
   }
 
   const MCOperand &DstRegOp = Inst.getOperand(0);
   assert(DstRegOp.isReg() && "expected register operand kind");
-
   const MCOperand &SrcRegOp = Inst.getOperand(1);
   assert(SrcRegOp.isReg() && "expected register operand kind");
-
   const MCOperand &OffsetImmOp = Inst.getOperand(2);
   assert(OffsetImmOp.isImm() && "expected immediate operand kind");
 
+  MipsTargetStreamer &TOut = getTargetStreamer();
+  unsigned DstReg = DstRegOp.getReg();
   unsigned SrcReg = SrcRegOp.getReg();
   int64_t OffsetValue = OffsetImmOp.getImm();
-  unsigned ATReg = 0;
-
-  // When the value of offset+3 does not fit in 16 bits, we have to load the
-  // offset in AT, (D)ADDu the original source register (if there was one), and
-  // then use AT as the source register for the generated LWL and LWR.
-  bool LoadedOffsetInAT = false;
-  if (!isInt<16>(OffsetValue + 3) || !isInt<16>(OffsetValue)) {
-    ATReg = getATReg(IDLoc);
-    if (!ATReg)
-      return true;
-    LoadedOffsetInAT = true;
 
+  // Compute left/right load/store offsets.
+  bool IsLargeOffset = !(isInt<16>(OffsetValue + 3) && isInt<16>(OffsetValue));
+  int64_t LxlOffset = IsLargeOffset ? 0 : OffsetValue;
+  int64_t LxrOffset = IsLargeOffset ? 3 : (OffsetValue + 3);
+  if (isLittle())
+    std::swap(LxlOffset, LxrOffset);
+
+  bool IsLoadInst = (Inst.getOpcode() == Mips::Ulw);
+  bool DoMove = IsLoadInst && (SrcReg == DstReg) && !IsLargeOffset;
+  unsigned TmpReg = SrcReg;
+  if (IsLargeOffset || DoMove) {
     warnIfNoMacro(IDLoc);
-
-    if (loadImmediate(OffsetValue, ATReg, Mips::NoRegister, !ABI.ArePtrs64bit(),
-                      true, IDLoc, Out, STI))
+    TmpReg = getATReg(IDLoc);
+    if (!TmpReg)
       return true;
+  }
 
-    // NOTE: We do this (D)ADDu here instead of doing it in loadImmediate()
-    // because it will make our output more similar to GAS'. For example,
-    // generating an "ori $1, $zero, 32768" followed by an "addu $1, $1, $9",
-    // instead of just an "ori $1, $9, 32768".
-    // NOTE: If there is no source register specified in the ULW, the parser
-    // will interpret it as $0.
-    if (SrcReg != Mips::ZERO && SrcReg != Mips::ZERO_64)
-      TOut.emitAddu(ATReg, ATReg, SrcReg, ABI.ArePtrs64bit(), STI);
-  }
-
-  unsigned FinalSrcReg = LoadedOffsetInAT ? ATReg : SrcReg;
-  int64_t LeftLoadOffset = 0, RightLoadOffset  = 0;
-  if (isLittle()) {
-    LeftLoadOffset = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
-    RightLoadOffset  = LoadedOffsetInAT ? 0 : OffsetValue;
-  } else {
-    LeftLoadOffset = LoadedOffsetInAT ? 0 : OffsetValue;
-    RightLoadOffset  = LoadedOffsetInAT ? 3 : (OffsetValue + 3);
+  if (IsLargeOffset) {
+    if (loadImmediate(OffsetValue, TmpReg, SrcReg, !ABI.ArePtrs64bit(), true,
+                      IDLoc, Out, STI))
+      return true;
   }
 
-  TOut.emitRRI(Mips::LWL, DstRegOp.getReg(), FinalSrcReg, LeftLoadOffset, IDLoc,
-               STI);
+  if (DoMove)
+    std::swap(DstReg, TmpReg);
 
-  TOut.emitRRI(Mips::LWR, DstRegOp.getReg(), FinalSrcReg, RightLoadOffset,
-               IDLoc, STI);
+  unsigned XWL = IsLoadInst ? Mips::LWL : Mips::SWL;
+  unsigned XWR = IsLoadInst ? Mips::LWR : Mips::SWR;
+  TOut.emitRRI(XWL, DstReg, TmpReg, LxlOffset, IDLoc, STI);
+  TOut.emitRRI(XWR, DstReg, TmpReg, LxrOffset, IDLoc, STI);
+
+  if (DoMove)
+    TOut.emitRRR(Mips::OR, TmpReg, DstReg, Mips::ZERO, IDLoc, STI);
 
   return false;
 }
@@ -3685,8 +3872,197 @@ bool MipsAsmParser::expandAbs(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
   return false;
 }
 
+static unsigned nextReg(unsigned Reg) {
+  switch (Reg) {
+  case Mips::ZERO: return Mips::AT;
+  case Mips::AT:   return Mips::V0;
+  case Mips::V0:   return Mips::V1;
+  case Mips::V1:   return Mips::A0;
+  case Mips::A0:   return Mips::A1;
+  case Mips::A1:   return Mips::A2;
+  case Mips::A2:   return Mips::A3;
+  case Mips::A3:   return Mips::T0;
+  case Mips::T0:   return Mips::T1;
+  case Mips::T1:   return Mips::T2;
+  case Mips::T2:   return Mips::T3;
+  case Mips::T3:   return Mips::T4;
+  case Mips::T4:   return Mips::T5;
+  case Mips::T5:   return Mips::T6;
+  case Mips::T6:   return Mips::T7;
+  case Mips::T7:   return Mips::S0;
+  case Mips::S0:   return Mips::S1;
+  case Mips::S1:   return Mips::S2;
+  case Mips::S2:   return Mips::S3;
+  case Mips::S3:   return Mips::S4;
+  case Mips::S4:   return Mips::S5;
+  case Mips::S5:   return Mips::S6;
+  case Mips::S6:   return Mips::S7;
+  case Mips::S7:   return Mips::T8;
+  case Mips::T8:   return Mips::T9;
+  case Mips::T9:   return Mips::K0;
+  case Mips::K0:   return Mips::K1;
+  case Mips::K1:   return Mips::GP;
+  case Mips::GP:   return Mips::SP;
+  case Mips::SP:   return Mips::FP;
+  case Mips::FP:   return Mips::RA;
+  case Mips::RA:   return Mips::ZERO;
+  default:         return 0;
+  }
+
+}
+
+// Expand 'ld $<reg> offset($reg2)' to 'lw $<reg>, offset($reg2);
+//                                      lw $<reg+1>>, offset+4($reg2)'
+// or expand 'sd $<reg> offset($reg2)' to 'sw $<reg>, offset($reg2);
+//                                         sw $<reg+1>>, offset+4($reg2)'
+// for O32.
+bool MipsAsmParser::expandLoadStoreDMacro(MCInst &Inst, SMLoc IDLoc,
+                                          MCStreamer &Out,
+                                          const MCSubtargetInfo *STI,
+                                          bool IsLoad) {
+  if (!isABI_O32())
+    return true;
+
+  warnIfNoMacro(IDLoc);
+
+  MipsTargetStreamer &TOut = getTargetStreamer();
+  unsigned Opcode = IsLoad ? Mips::LW : Mips::SW;
+  unsigned FirstReg = Inst.getOperand(0).getReg();
+  unsigned SecondReg = nextReg(FirstReg);
+  unsigned BaseReg = Inst.getOperand(1).getReg();
+  if (!SecondReg)
+    return true;
+
+  warnIfRegIndexIsAT(FirstReg, IDLoc);
+
+  assert(Inst.getOperand(2).isImm() &&
+         "Offset for load macro is not immediate!");
+
+  MCOperand &FirstOffset = Inst.getOperand(2);
+  signed NextOffset = FirstOffset.getImm() + 4;
+  MCOperand SecondOffset = MCOperand::createImm(NextOffset);
+
+  if (!isInt<16>(FirstOffset.getImm()) || !isInt<16>(NextOffset))
+    return true;
+
+  // For loads, clobber the base register with the second load instead of the
+  // first if the BaseReg == FirstReg.
+  if (FirstReg != BaseReg || !IsLoad) {
+    TOut.emitRRX(Opcode, FirstReg, BaseReg, FirstOffset, IDLoc, STI);
+    TOut.emitRRX(Opcode, SecondReg, BaseReg, SecondOffset, IDLoc, STI);
+  } else {
+    TOut.emitRRX(Opcode, SecondReg, BaseReg, SecondOffset, IDLoc, STI);
+    TOut.emitRRX(Opcode, FirstReg, BaseReg, FirstOffset, IDLoc, STI);
+  }
+
+  return false;
+}
+
+bool MipsAsmParser::expandSeq(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                              const MCSubtargetInfo *STI) {
+
+  warnIfNoMacro(IDLoc);
+  MipsTargetStreamer &TOut = getTargetStreamer();
+
+  if (Inst.getOperand(1).getReg() != Mips::ZERO &&
+      Inst.getOperand(2).getReg() != Mips::ZERO) {
+    TOut.emitRRR(Mips::XOR, Inst.getOperand(0).getReg(),
+                 Inst.getOperand(1).getReg(), Inst.getOperand(2).getReg(),
+                 IDLoc, STI);
+    TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
+                 Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+    return false;
+  }
+
+  unsigned Reg = 0;
+  if (Inst.getOperand(1).getReg() == Mips::ZERO) {
+    Reg = Inst.getOperand(2).getReg();
+  } else {
+    Reg = Inst.getOperand(1).getReg();
+  }
+  TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(), Reg, 1, IDLoc, STI);
+  return false;
+}
+
+bool MipsAsmParser::expandSeqI(MCInst &Inst, SMLoc IDLoc, MCStreamer &Out,
+                               const MCSubtargetInfo *STI) {
+
+  warnIfNoMacro(IDLoc);
+  MipsTargetStreamer &TOut = getTargetStreamer();
+
+  unsigned Opc;
+  int64_t Imm = Inst.getOperand(2).getImm();
+  unsigned Reg = Inst.getOperand(1).getReg();
+
+  if (Imm == 0) {
+    TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
+                 Inst.getOperand(1).getReg(), 1, IDLoc, STI);
+    return false;
+  } else {
+
+    if (Reg == Mips::ZERO) {
+      Warning(IDLoc, "comparison is always false");
+      TOut.emitRRR(isGP64bit() ? Mips::DADDu : Mips::ADDu,
+                   Inst.getOperand(0).getReg(), Reg, Reg, IDLoc, STI);
+      return false;
+    }
+
+    if (Imm > -0x8000 && Imm < 0) {
+      Imm = -Imm;
+      Opc = isGP64bit() ? Mips::DADDiu : Mips::ADDiu;
+    } else {
+      Opc = Mips::XORi;
+    }
+  }
+  if (!isUInt<16>(Imm)) {
+    unsigned ATReg = getATReg(IDLoc);
+    if (!ATReg)
+      return true;
+
+    if (loadImmediate(Imm, ATReg, Mips::NoRegister, true, isGP64bit(), IDLoc,
+                      Out, STI))
+      return true;
+
+    TOut.emitRRR(Mips::XOR, Inst.getOperand(0).getReg(),
+                 Inst.getOperand(1).getReg(), ATReg, IDLoc, STI);
+    TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
+                 Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+    return false;
+  }
+
+  TOut.emitRRI(Opc, Inst.getOperand(0).getReg(), Inst.getOperand(1).getReg(),
+               Imm, IDLoc, STI);
+  TOut.emitRRI(Mips::SLTiu, Inst.getOperand(0).getReg(),
+               Inst.getOperand(0).getReg(), 1, IDLoc, STI);
+  return false;
+}
+
+unsigned
+MipsAsmParser::checkEarlyTargetMatchPredicate(MCInst &Inst,
+                                              const OperandVector &Operands) {
+  switch (Inst.getOpcode()) {
+  default:
+    return Match_Success;
+  case Mips::DATI:
+  case Mips::DAHI:
+  case Mips::DATI_MM64R6:
+  case Mips::DAHI_MM64R6:
+    if (static_cast<MipsOperand &>(*Operands[1])
+            .isValidForTie(static_cast<MipsOperand &>(*Operands[2])))
+      return Match_Success;
+    return Match_RequiresSameSrcAndDst;
+  }
+}
 unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   switch (Inst.getOpcode()) {
+  // As described by the MIPSR6 spec, daui must not use the zero operand for
+  // its source operand.
+  case Mips::DAUI:
+  case Mips::DAUI_MM64R6:
+    if (Inst.getOperand(1).getReg() == Mips::ZERO ||
+        Inst.getOperand(1).getReg() == Mips::ZERO_64)
+      return Match_RequiresNoZeroRegister;
+    return Match_Success;
   // As described by the Mips32r2 spec, the registers Rd and Rs for
   // jalr.hb must be different.
   // It also applies for registers Rt and Rs of microMIPSr6 jalrc.hb instruction
@@ -3702,6 +4078,10 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
     if (Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg())
       return Match_RequiresDifferentSrcAndDst;
     return Match_Success;
+  case Mips::SYNC:
+    if (Inst.getOperand(0).getImm() != 0 && !hasMips32())
+      return Match_NonZeroOperandForSync;
+    return Match_Success;
   // As described the MIPSR6 spec, the compact branches that compare registers
   // must:
   // a) Not use the zero register.
@@ -3714,24 +4094,39 @@ unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   // The compact branches that branch iff the signed addition of two registers
   // would overflow must have rs >= rt. That can be handled like beqc/bnec with
   // operand swapping. They do not have restriction of using the zero register.
-  case Mips::BLEZC:
-  case Mips::BGEZC:
-  case Mips::BGTZC:
-  case Mips::BLTZC:
-  case Mips::BEQZC:
-  case Mips::BNEZC:
-    if (Inst.getOperand(0).getReg() == Mips::ZERO)
+  case Mips::BLEZC:   case Mips::BLEZC_MMR6:
+  case Mips::BGEZC:   case Mips::BGEZC_MMR6:
+  case Mips::BGTZC:   case Mips::BGTZC_MMR6:
+  case Mips::BLTZC:   case Mips::BLTZC_MMR6:
+  case Mips::BEQZC:   case Mips::BEQZC_MMR6:
+  case Mips::BNEZC:   case Mips::BNEZC_MMR6:
+  case Mips::BLEZC64:
+  case Mips::BGEZC64:
+  case Mips::BGTZC64:
+  case Mips::BLTZC64:
+  case Mips::BEQZC64:
+  case Mips::BNEZC64:
+    if (Inst.getOperand(0).getReg() == Mips::ZERO ||
+        Inst.getOperand(0).getReg() == Mips::ZERO_64)
       return Match_RequiresNoZeroRegister;
     return Match_Success;
-  case Mips::BGEC:
-  case Mips::BLTC:
-  case Mips::BGEUC:
-  case Mips::BLTUC:
-  case Mips::BEQC:
-  case Mips::BNEC:
-    if (Inst.getOperand(0).getReg() == Mips::ZERO)
+  case Mips::BGEC:    case Mips::BGEC_MMR6:
+  case Mips::BLTC:    case Mips::BLTC_MMR6:
+  case Mips::BGEUC:   case Mips::BGEUC_MMR6:
+  case Mips::BLTUC:   case Mips::BLTUC_MMR6:
+  case Mips::BEQC:    case Mips::BEQC_MMR6:
+  case Mips::BNEC:    case Mips::BNEC_MMR6:
+  case Mips::BGEC64:
+  case Mips::BLTC64:
+  case Mips::BGEUC64:
+  case Mips::BLTUC64:
+  case Mips::BEQC64:
+  case Mips::BNEC64:
+    if (Inst.getOperand(0).getReg() == Mips::ZERO ||
+        Inst.getOperand(0).getReg() == Mips::ZERO_64)
       return Match_RequiresNoZeroRegister;
-    if (Inst.getOperand(1).getReg() == Mips::ZERO)
+    if (Inst.getOperand(1).getReg() == Mips::ZERO ||
+        Inst.getOperand(1).getReg() == Mips::ZERO_64)
       return Match_RequiresNoZeroRegister;
     if (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())
       return Match_RequiresDifferentOperands;
@@ -3784,6 +4179,8 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
 
     return Error(ErrorLoc, "invalid operand for instruction");
   }
+  case Match_NonZeroOperandForSync:
+    return Error(IDLoc, "s-type must be zero or unspecified for pre-MIPS32 ISAs");
   case Match_MnemonicFail:
     return Error(IDLoc, "invalid instruction");
   case Match_RequiresDifferentSrcAndDst:
@@ -3792,6 +4189,8 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
     return Error(IDLoc, "registers must be different");
   case Match_RequiresNoZeroRegister:
     return Error(IDLoc, "invalid operand ($zero) for instruction");
+  case Match_RequiresSameSrcAndDst:
+    return Error(IDLoc, "source and destination must match");
   case Match_Immz:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected '0'");
   case Match_UImm1_0:
@@ -3876,6 +4275,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_SImm16_Relaxed:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
                  "expected 16-bit signed immediate");
+  case Match_SImm19_Lsl2:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected both 19-bit signed immediate and multiple of 4");
   case Match_UImm20_0:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
                  "expected 20-bit unsigned immediate");
@@ -3886,6 +4288,9 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   case Match_SImm32_Relaxed:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
                  "expected 32-bit signed immediate");
+  case Match_UImm32_Coerced:
+    return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
+                 "expected 32-bit immediate");
   case Match_MemSImm9:
     return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo),
                  "expected memory with 9-bit signed offset");
@@ -4131,9 +4536,6 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   DEBUG(dbgs() << ".. Generic Parser\n");
 
   switch (getLexer().getKind()) {
-  default:
-    Error(Parser.getTok().getLoc(), "unexpected token in operand");
-    return true;
   case AsmToken::Dollar: {
     // Parse the register.
     SMLoc S = Parser.getTok().getLoc();
@@ -4160,72 +4562,23 @@ bool MipsAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
     Operands.push_back(MipsOperand::CreateImm(Res, S, E, *this));
     return false;
   }
-  // Else drop to expression parsing.
-  case AsmToken::LParen:
-  case AsmToken::Minus:
-  case AsmToken::Plus:
-  case AsmToken::Integer:
-  case AsmToken::Tilde:
-  case AsmToken::String: {
-    DEBUG(dbgs() << ".. generic integer\n");
-    OperandMatchResultTy ResTy = parseImm(Operands);
-    return ResTy != MatchOperand_Success;
-  }
-  case AsmToken::Percent: {
-    // It is a symbol reference or constant expression.
-    const MCExpr *IdVal;
+  default: {
+    DEBUG(dbgs() << ".. generic integer expression\n");
+
+    const MCExpr *Expr;
     SMLoc S = Parser.getTok().getLoc(); // Start location of the operand.
-    if (parseRelocOperand(IdVal))
+    if (getParser().parseExpression(Expr))
       return true;
 
     SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
 
-    Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
+    Operands.push_back(MipsOperand::CreateImm(Expr, S, E, *this));
     return false;
-  } // case AsmToken::Percent
+  }
   } // switch(getLexer().getKind())
   return true;
 }
 
-const MCExpr *MipsAsmParser::evaluateRelocExpr(const MCExpr *Expr,
-                                               StringRef RelocStr) {
-  if (RelocStr == "hi(%neg(%gp_rel")
-    return MipsMCExpr::createGpOff(MipsMCExpr::MEK_HI, Expr, getContext());
-  else if (RelocStr == "lo(%neg(%gp_rel")
-    return MipsMCExpr::createGpOff(MipsMCExpr::MEK_LO, Expr, getContext());
-
-  MipsMCExpr::MipsExprKind Kind =
-      StringSwitch<MipsMCExpr::MipsExprKind>(RelocStr)
-          .Case("call16", MipsMCExpr::MEK_GOT_CALL)
-          .Case("call_hi", MipsMCExpr::MEK_CALL_HI16)
-          .Case("call_lo", MipsMCExpr::MEK_CALL_LO16)
-          .Case("dtprel_hi", MipsMCExpr::MEK_DTPREL_HI)
-          .Case("dtprel_lo", MipsMCExpr::MEK_DTPREL_LO)
-          .Case("got", MipsMCExpr::MEK_GOT)
-          .Case("got_disp", MipsMCExpr::MEK_GOT_DISP)
-          .Case("got_hi", MipsMCExpr::MEK_GOT_HI16)
-          .Case("got_lo", MipsMCExpr::MEK_GOT_LO16)
-          .Case("got_ofst", MipsMCExpr::MEK_GOT_OFST)
-          .Case("got_page", MipsMCExpr::MEK_GOT_PAGE)
-          .Case("gottprel", MipsMCExpr::MEK_GOTTPREL)
-          .Case("gp_rel", MipsMCExpr::MEK_GPREL)
-          .Case("hi", MipsMCExpr::MEK_HI)
-          .Case("higher", MipsMCExpr::MEK_HIGHER)
-          .Case("highest", MipsMCExpr::MEK_HIGHEST)
-          .Case("lo", MipsMCExpr::MEK_LO)
-          .Case("neg", MipsMCExpr::MEK_NEG)
-          .Case("pcrel_hi", MipsMCExpr::MEK_PCREL_HI16)
-          .Case("pcrel_lo", MipsMCExpr::MEK_PCREL_LO16)
-          .Case("tlsgd", MipsMCExpr::MEK_TLSGD)
-          .Case("tlsldm", MipsMCExpr::MEK_TLSLDM)
-          .Case("tprel_hi", MipsMCExpr::MEK_TPREL_HI)
-          .Case("tprel_lo", MipsMCExpr::MEK_TPREL_LO)
-          .Default(MipsMCExpr::MEK_None);
-
-  assert(Kind != MipsMCExpr::MEK_None);
-  return MipsMCExpr::create(Kind, Expr, getContext());
-}
-
 bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
 
   switch (Expr->getKind()) {
@@ -4247,49 +4600,6 @@ bool MipsAsmParser::isEvaluated(const MCExpr *Expr) {
   return false;
 }
 
-bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) {
-  MCAsmParser &Parser = getParser();
-  Parser.Lex();                          // Eat the % token.
-  const AsmToken &Tok = Parser.getTok(); // Get next token, operation.
-  if (Tok.isNot(AsmToken::Identifier))
-    return true;
-
-  std::string Str = Tok.getIdentifier();
-
-  Parser.Lex(); // Eat the identifier.
-  // Now make an expression from the rest of the operand.
-  const MCExpr *IdVal;
-  SMLoc EndLoc;
-
-  if (getLexer().getKind() == AsmToken::LParen) {
-    while (1) {
-      Parser.Lex(); // Eat the '(' token.
-      if (getLexer().getKind() == AsmToken::Percent) {
-        Parser.Lex(); // Eat the % token.
-        const AsmToken &nextTok = Parser.getTok();
-        if (nextTok.isNot(AsmToken::Identifier))
-          return true;
-        Str += "(%";
-        Str += nextTok.getIdentifier();
-        Parser.Lex(); // Eat the identifier.
-        if (getLexer().getKind() != AsmToken::LParen)
-          return true;
-      } else
-        break;
-    }
-    if (getParser().parseParenExpression(IdVal, EndLoc))
-      return true;
-
-    while (getLexer().getKind() == AsmToken::RParen)
-      Parser.Lex(); // Eat the ')' token.
-
-  } else
-    return true; // Parenthesis must follow the relocation operand.
-
-  Res = evaluateRelocExpr(IdVal, Str);
-  return false;
-}
-
 bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                   SMLoc &EndLoc) {
   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands;
@@ -4317,45 +4627,21 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
 }
 
 bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) {
-  MCAsmParser &Parser = getParser();
   SMLoc S;
-  bool Result = true;
-  unsigned NumOfLParen = 0;
 
-  while (getLexer().getKind() == AsmToken::LParen) {
-    Parser.Lex();
-    ++NumOfLParen;
-  }
-
-  switch (getLexer().getKind()) {
-  default:
-    return true;
-  case AsmToken::Identifier:
-  case AsmToken::LParen:
-  case AsmToken::Integer:
-  case AsmToken::Minus:
-  case AsmToken::Plus:
-    if (isParenExpr)
-      Result = getParser().parseParenExprOfDepth(NumOfLParen, Res, S);
-    else
-      Result = (getParser().parseExpression(Res));
-    while (getLexer().getKind() == AsmToken::RParen)
-      Parser.Lex();
-    break;
-  case AsmToken::Percent:
-    Result = parseRelocOperand(Res);
-  }
-  return Result;
+  if (isParenExpr)
+    return getParser().parseParenExprOfDepth(0, Res, S);
+  return getParser().parseExpression(Res);
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseMemOperand(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   DEBUG(dbgs() << "parseMemOperand\n");
   const MCExpr *IdVal = nullptr;
   SMLoc S;
   bool isParenExpr = false;
-  MipsAsmParser::OperandMatchResultTy Res = MatchOperand_NoMatch;
+  OperandMatchResultTy Res = MatchOperand_NoMatch;
   // First operand is the offset.
   S = Parser.getTok().getLoc();
 
@@ -4383,14 +4669,66 @@ MipsAsmParser::parseMemOperand(OperandVector &Operands) {
 
         // Zero register assumed, add a memory operand with ZERO as its base.
         // "Base" will be managed by k_Memory.
-        auto Base = MipsOperand::createGPRReg(0, getContext().getRegisterInfo(),
-                                              S, E, *this);
+        auto Base = MipsOperand::createGPRReg(
+            0, "0", getContext().getRegisterInfo(), S, E, *this);
         Operands.push_back(
             MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this));
         return MatchOperand_Success;
       }
-      Error(Parser.getTok().getLoc(), "'(' expected");
-      return MatchOperand_ParseFail;
+      MCBinaryExpr::Opcode Opcode;
+      // GAS and LLVM treat comparison operators different. GAS will generate -1
+      // or 0, while LLVM will generate 0 or 1. Since a comparsion operator is
+      // highly unlikely to be found in a memory offset expression, we don't
+      // handle them.
+      switch (Tok.getKind()) {
+      case AsmToken::Plus:
+        Opcode = MCBinaryExpr::Add;
+        Parser.Lex();
+        break;
+      case AsmToken::Minus:
+        Opcode = MCBinaryExpr::Sub;
+        Parser.Lex();
+        break;
+      case AsmToken::Star:
+        Opcode = MCBinaryExpr::Mul;
+        Parser.Lex();
+        break;
+      case AsmToken::Pipe:
+        Opcode = MCBinaryExpr::Or;
+        Parser.Lex();
+        break;
+      case AsmToken::Amp:
+        Opcode = MCBinaryExpr::And;
+        Parser.Lex();
+        break;
+      case AsmToken::LessLess:
+        Opcode = MCBinaryExpr::Shl;
+        Parser.Lex();
+        break;
+      case AsmToken::GreaterGreater:
+        Opcode = MCBinaryExpr::LShr;
+        Parser.Lex();
+        break;
+      case AsmToken::Caret:
+        Opcode = MCBinaryExpr::Xor;
+        Parser.Lex();
+        break;
+      case AsmToken::Slash:
+        Opcode = MCBinaryExpr::Div;
+        Parser.Lex();
+        break;
+      case AsmToken::Percent:
+        Opcode = MCBinaryExpr::Mod;
+        Parser.Lex();
+        break;
+      default:
+        Error(Parser.getTok().getLoc(), "'(' or expression expected");
+        return MatchOperand_ParseFail;
+      }
+      const MCExpr * NextExpr;
+      if (getParser().parseExpression(NextExpr))
+        return MatchOperand_ParseFail;
+      IdVal = MCBinaryExpr::create(Opcode, IdVal, NextExpr, getContext());
     }
 
     Parser.Lex(); // Eat the '(' token.
@@ -4460,63 +4798,70 @@ bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) {
   return false;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::matchAnyRegisterNameWithoutDollar(OperandVector &Operands,
                                                  StringRef Identifier,
                                                  SMLoc S) {
   int Index = matchCPURegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createGPRReg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   Index = matchHWRegsRegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createHWRegsReg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   Index = matchFPURegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createFGRReg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   Index = matchFCCRegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createFCCReg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   Index = matchACRegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createACCReg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   Index = matchMSA128RegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createMSA128Reg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   Index = matchMSA128CtrlRegisterName(Identifier);
   if (Index != -1) {
     Operands.push_back(MipsOperand::createMSACtrlReg(
-        Index, getContext().getRegisterInfo(), S, getLexer().getLoc(), *this));
+        Index, Identifier, getContext().getRegisterInfo(), S,
+        getLexer().getLoc(), *this));
     return MatchOperand_Success;
   }
 
   return MatchOperand_NoMatch;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
   MCAsmParser &Parser = getParser();
   auto Token = Parser.getLexer().peekTok(false);
@@ -4530,8 +4875,8 @@ MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
   } else if (Token.is(AsmToken::Integer)) {
     DEBUG(dbgs() << ".. integer\n");
     Operands.push_back(MipsOperand::createNumericReg(
-        Token.getIntVal(), getContext().getRegisterInfo(), S, Token.getLoc(),
-        *this));
+        Token.getIntVal(), Token.getString(), getContext().getRegisterInfo(), S,
+        Token.getLoc(), *this));
     return MatchOperand_Success;
   }
 
@@ -4540,7 +4885,7 @@ MipsAsmParser::matchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) {
   return MatchOperand_NoMatch;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   DEBUG(dbgs() << "parseAnyRegister\n");
@@ -4568,48 +4913,19 @@ MipsAsmParser::parseAnyRegister(OperandVector &Operands) {
   return ResTy;
 }
 
-MipsAsmParser::OperandMatchResultTy
-MipsAsmParser::parseImm(OperandVector &Operands) {
-  MCAsmParser &Parser = getParser();
-  switch (getLexer().getKind()) {
-  default:
-    return MatchOperand_NoMatch;
-  case AsmToken::LParen:
-  case AsmToken::Minus:
-  case AsmToken::Plus:
-  case AsmToken::Integer:
-  case AsmToken::Tilde:
-  case AsmToken::String:
-    break;
-  }
-
-  const MCExpr *IdVal;
-  SMLoc S = Parser.getTok().getLoc();
-  if (getParser().parseExpression(IdVal))
-    return MatchOperand_ParseFail;
-
-  SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-  Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this));
-  return MatchOperand_Success;
-}
-
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseJumpTarget(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   DEBUG(dbgs() << "parseJumpTarget\n");
 
   SMLoc S = getLexer().getLoc();
 
-  // Integers and expressions are acceptable
-  OperandMatchResultTy ResTy = parseImm(Operands);
-  if (ResTy != MatchOperand_NoMatch)
-    return ResTy;
-
   // Registers are a valid target and have priority over symbols.
-  ResTy = parseAnyRegister(Operands);
+  OperandMatchResultTy ResTy = parseAnyRegister(Operands);
   if (ResTy != MatchOperand_NoMatch)
     return ResTy;
 
+  // Integers and expressions are acceptable
   const MCExpr *Expr = nullptr;
   if (Parser.parseExpression(Expr)) {
     // We have no way of knowing if a symbol was consumed so we must ParseFail
@@ -4620,7 +4936,7 @@ MipsAsmParser::parseJumpTarget(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseInvNum(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   const MCExpr *IdVal;
@@ -4639,7 +4955,7 @@ MipsAsmParser::parseInvNum(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseRegisterList(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SmallVector<unsigned, 10> Regs;
@@ -4725,7 +5041,7 @@ MipsAsmParser::parseRegisterList(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseRegisterPair(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
 
@@ -4741,7 +5057,7 @@ MipsAsmParser::parseRegisterPair(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-MipsAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 MipsAsmParser::parseMovePRegPair(OperandVector &Operands) {
   MCAsmParser &Parser = getParser();
   SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> TmpOperands;
@@ -4793,12 +5109,10 @@ bool MipsAsmParser::parseParenSuffix(StringRef Name, OperandVector &Operands) {
     Parser.Lex();
     if (parseOperand(Operands, Name)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
     if (Parser.getTok().isNot(AsmToken::RParen)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token, expected ')'");
     }
     Operands.push_back(
@@ -4823,12 +5137,10 @@ bool MipsAsmParser::parseBracketSuffix(StringRef Name,
     Parser.Lex();
     if (parseOperand(Operands, Name)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
     if (Parser.getTok().isNot(AsmToken::RBrac)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token, expected ']'");
     }
     Operands.push_back(
@@ -4848,7 +5160,6 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 
   // Check if we have valid mnemonic
   if (!mnemonicIsValid(Name, 0)) {
-    Parser.eatToEndOfStatement();
     return Error(NameLoc, "unknown instruction");
   }
   // First operand in MCInst is instruction mnemonic.
@@ -4859,7 +5170,6 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     // Read the first operand.
     if (parseOperand(Operands, Name)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
     if (getLexer().is(AsmToken::LBrac) && parseBracketSuffix(Name, Operands))
@@ -4871,7 +5181,6 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
       // Parse and remember the operand.
       if (parseOperand(Operands, Name)) {
         SMLoc Loc = getLexer().getLoc();
-        Parser.eatToEndOfStatement();
         return Error(Loc, "unexpected token in argument list");
       }
       // Parse bracket and parenthesis suffixes before we iterate
@@ -4885,7 +5194,6 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   }
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     SMLoc Loc = getLexer().getLoc();
-    Parser.eatToEndOfStatement();
     return Error(Loc, "unexpected token in argument list");
   }
   Parser.Lex(); // Consume the EndOfStatement.
@@ -4895,9 +5203,7 @@ bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 // FIXME: Given that these have the same name, these should both be
 // consistent on affecting the Parser.
 bool MipsAsmParser::reportParseError(Twine ErrorMsg) {
-  MCAsmParser &Parser = getParser();
   SMLoc Loc = getLexer().getLoc();
-  Parser.eatToEndOfStatement();
   return Error(Loc, ErrorMsg);
 }
 
@@ -5398,7 +5704,6 @@ bool MipsAsmParser::eatComma(StringRef ErrorStr) {
   MCAsmParser &Parser = getParser();
   if (getLexer().isNot(AsmToken::Comma)) {
     SMLoc Loc = getLexer().getLoc();
-    Parser.eatToEndOfStatement();
     return Error(Loc, ErrorStr);
   }
 
@@ -5507,7 +5812,6 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
   MipsOperand &FuncRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
   if (!FuncRegOpnd.isGPRAsmReg()) {
     reportParseError(FuncRegOpnd.getStartLoc(), "invalid register");
-    Parser.eatToEndOfStatement();
     return false;
   }
 
@@ -5526,7 +5830,6 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
     if (Parser.parseExpression(OffsetExpr) ||
         !OffsetExpr->evaluateAsAbsolute(OffsetVal)) {
       reportParseError(ExprLoc, "expected save register or stack offset");
-      Parser.eatToEndOfStatement();
       return false;
     }
 
@@ -5536,7 +5839,6 @@ bool MipsAsmParser::parseDirectiveCPSetup() {
     MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]);
     if (!SaveOpnd.isGPRAsmReg()) {
       reportParseError(SaveOpnd.getStartLoc(), "invalid register");
-      Parser.eatToEndOfStatement();
       return false;
     }
     Save = SaveOpnd.getGPR32Reg();
@@ -5740,7 +6042,79 @@ bool MipsAsmParser::parseDirectiveGpDWord() {
   getParser().getStreamer().EmitGPRel64Value(Value);
 
   if (getLexer().isNot(AsmToken::EndOfStatement))
-    return Error(getLexer().getLoc(), 
+    return Error(getLexer().getLoc(),
+                "unexpected token, expected end of statement");
+  Parser.Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
+/// parseDirectiveDtpRelWord
+///  ::= .dtprelword tls_sym
+bool MipsAsmParser::parseDirectiveDtpRelWord() {
+  MCAsmParser &Parser = getParser();
+  const MCExpr *Value;
+  // EmitDTPRel32Value requires an expression, so we are using base class
+  // method to evaluate the expression.
+  if (getParser().parseExpression(Value))
+    return true;
+  getParser().getStreamer().EmitDTPRel32Value(Value);
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
+                "unexpected token, expected end of statement");
+  Parser.Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
+/// parseDirectiveDtpRelDWord
+///  ::= .dtpreldword tls_sym
+bool MipsAsmParser::parseDirectiveDtpRelDWord() {
+  MCAsmParser &Parser = getParser();
+  const MCExpr *Value;
+  // EmitDTPRel64Value requires an expression, so we are using base class
+  // method to evaluate the expression.
+  if (getParser().parseExpression(Value))
+    return true;
+  getParser().getStreamer().EmitDTPRel64Value(Value);
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
+                "unexpected token, expected end of statement");
+  Parser.Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
+/// parseDirectiveTpRelWord
+///  ::= .tprelword tls_sym
+bool MipsAsmParser::parseDirectiveTpRelWord() {
+  MCAsmParser &Parser = getParser();
+  const MCExpr *Value;
+  // EmitTPRel32Value requires an expression, so we are using base class
+  // method to evaluate the expression.
+  if (getParser().parseExpression(Value))
+    return true;
+  getParser().getStreamer().EmitTPRel32Value(Value);
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
+                "unexpected token, expected end of statement");
+  Parser.Lex(); // Eat EndOfStatement token.
+  return false;
+}
+
+/// parseDirectiveTpRelDWord
+///  ::= .tpreldword tls_sym
+bool MipsAsmParser::parseDirectiveTpRelDWord() {
+  MCAsmParser &Parser = getParser();
+  const MCExpr *Value;
+  // EmitTPRel64Value requires an expression, so we are using base class
+  // method to evaluate the expression.
+  if (getParser().parseExpression(Value))
+    return true;
+  getParser().getStreamer().EmitTPRel64Value(Value);
+
+  if (getLexer().isNot(AsmToken::EndOfStatement))
+    return Error(getLexer().getLoc(),
                 "unexpected token, expected end of statement");
   Parser.Lex(); // Eat EndOfStatement token.
   return false;
@@ -5752,9 +6126,8 @@ bool MipsAsmParser::parseDirectiveOption() {
   AsmToken Tok = Parser.getTok();
   // At the moment only identifiers are supported.
   if (Tok.isNot(AsmToken::Identifier)) {
-    Error(Parser.getTok().getLoc(), "unexpected token, expected identifier");
-    Parser.eatToEndOfStatement();
-    return false;
+    return Error(Parser.getTok().getLoc(),
+                 "unexpected token, expected identifier");
   }
 
   StringRef Option = Tok.getIdentifier();
@@ -5766,9 +6139,8 @@ bool MipsAsmParser::parseDirectiveOption() {
     getTargetStreamer().emitDirectiveOptionPic0();
     Parser.Lex();
     if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
-      Error(Parser.getTok().getLoc(),
-            "unexpected token, expected end of statement");
-      Parser.eatToEndOfStatement();
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
     }
     return false;
   }
@@ -5780,9 +6152,8 @@ bool MipsAsmParser::parseDirectiveOption() {
     getTargetStreamer().emitDirectiveOptionPic2();
     Parser.Lex();
     if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
-      Error(Parser.getTok().getLoc(),
-            "unexpected token, expected end of statement");
-      Parser.eatToEndOfStatement();
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
     }
     return false;
   }
@@ -5873,8 +6244,7 @@ bool MipsAsmParser::parseDirectiveModule() {
     return false; // parseDirectiveModule has finished successfully.
   } else if (Option == "nooddspreg") {
     if (!isABI_O32()) {
-      Error(L, "'.module nooddspreg' requires the O32 ABI");
-      return false;
+      return Error(L, "'.module nooddspreg' requires the O32 ABI");
     }
 
     setModuleFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg");
@@ -6295,6 +6665,26 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
     return false;
   }
 
+  if (IDVal == ".dtprelword") {
+    parseDirectiveDtpRelWord();
+    return false;
+  }
+
+  if (IDVal == ".dtpreldword") {
+    parseDirectiveDtpRelDWord();
+    return false;
+  }
+
+  if (IDVal == ".tprelword") {
+    parseDirectiveTpRelWord();
+    return false;
+  }
+
+  if (IDVal == ".tpreldword") {
+    parseDirectiveTpRelDWord();
+    return false;
+  }
+
   if (IDVal == ".word") {
     parseDataDirective(4, DirectiveID.getLoc());
     return false;
@@ -6315,8 +6705,6 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) {
     if (Parser.getTok().isNot(AsmToken::EndOfStatement)) {
       Error(Parser.getTok().getLoc(), 
             "unexpected token, expected end of statement");
-      // Clear line
-      Parser.eatToEndOfStatement();
     }
     return false;
   }
@@ -6367,10 +6755,10 @@ bool MipsAsmParser::parseInternalDirectiveReallowModule() {
 }
 
 extern "C" void LLVMInitializeMipsAsmParser() {
-  RegisterMCAsmParser<MipsAsmParser> X(TheMipsTarget);
-  RegisterMCAsmParser<MipsAsmParser> Y(TheMipselTarget);
-  RegisterMCAsmParser<MipsAsmParser> A(TheMips64Target);
-  RegisterMCAsmParser<MipsAsmParser> B(TheMips64elTarget);
+  RegisterMCAsmParser<MipsAsmParser> X(getTheMipsTarget());
+  RegisterMCAsmParser<MipsAsmParser> Y(getTheMipselTarget());
+  RegisterMCAsmParser<MipsAsmParser> A(getTheMips64Target());
+  RegisterMCAsmParser<MipsAsmParser> B(getTheMips64elTarget());
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
index aebb4ef419d1..f80efb18507b 100644
--- a/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Mips/Disassembler/MipsDisassembler.cpp
@@ -439,6 +439,22 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
                                    const void *Decoder);
 
 template <typename InsnType>
+static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, uint64_t Address,
+                                   const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
+                                   const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, uint64_t Address,
+                                   const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
+                                   const void *Decoder);
+
+template <typename InsnType>
 static DecodeStatus
 DecodeAddiGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
                       const void *Decoder);
@@ -460,6 +476,16 @@ DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
 
 template <typename InsnType>
 static DecodeStatus
+DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
+                           const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
+DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn, uint64_t Address,
+                           const void *Decoder);
+
+template <typename InsnType>
+static DecodeStatus
 DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, uint64_t Address,
                        const void *Decoder);
 
@@ -501,8 +527,10 @@ static DecodeStatus DecodeMovePRegPair(MCInst &Inst, unsigned Insn,
                                        const void *Decoder);
 
 namespace llvm {
-extern Target TheMipselTarget, TheMipsTarget, TheMips64Target,
-              TheMips64elTarget;
+Target &getTheMipselTarget();
+Target &getTheMipsTarget();
+Target &getTheMips64Target();
+Target &getTheMips64elTarget();
 }
 
 static MCDisassembler *createMipsDisassembler(
@@ -521,13 +549,13 @@ static MCDisassembler *createMipselDisassembler(
 
 extern "C" void LLVMInitializeMipsDisassembler() {
   // Register the disassembler.
-  TargetRegistry::RegisterMCDisassembler(TheMipsTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheMipsTarget(),
                                          createMipsDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheMipselTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheMipselTarget(),
                                          createMipselDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheMips64Target,
+  TargetRegistry::RegisterMCDisassembler(getTheMips64Target(),
                                          createMipsDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheMips64elTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheMips64elTarget(),
                                          createMipselDisassembler);
 }
 
@@ -586,6 +614,34 @@ static DecodeStatus DecodeINSVE_DF(MCInst &MI, InsnType insn, uint64_t Address,
 }
 
 template <typename InsnType>
+static DecodeStatus DecodeDAHIDATIMMR6(MCInst &MI, InsnType insn, uint64_t Address,
+                               const void *Decoder) {
+  InsnType Rs = fieldFromInstruction(insn, 16, 5);
+  InsnType Imm = fieldFromInstruction(insn, 0, 16);
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
+                                       Rs)));
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
+                                       Rs)));
+  MI.addOperand(MCOperand::createImm(Imm));
+
+  return MCDisassembler::Success;
+}
+
+template <typename InsnType>
+static DecodeStatus DecodeDAHIDATI(MCInst &MI, InsnType insn, uint64_t Address,
+                               const void *Decoder) {
+  InsnType Rs = fieldFromInstruction(insn, 21, 5);
+  InsnType Imm = fieldFromInstruction(insn, 0, 16);
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
+                                       Rs)));
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR64RegClassID,
+                                       Rs)));
+  MI.addOperand(MCOperand::createImm(Imm));
+
+  return MCDisassembler::Success;
+}
+
+template <typename InsnType>
 static DecodeStatus DecodeAddiGroupBranch(MCInst &MI, InsnType insn,
                                           uint64_t Address,
                                           const void *Decoder) {
@@ -630,7 +686,7 @@ static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn,
                                                const void *Decoder) {
   InsnType Rt = fieldFromInstruction(insn, 21, 5);
   InsnType Rs = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2;
+  int64_t Imm = 0;
 
   if (Rs >= Rt) {
     MI.setOpcode(Mips::BOVC_MMR6);
@@ -638,16 +694,19 @@ static DecodeStatus DecodePOP35GroupBranchMMR6(MCInst &MI, InsnType insn,
                                        Rt)));
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
   } else if (Rs != 0 && Rs < Rt) {
     MI.setOpcode(Mips::BEQC_MMR6);
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rt)));
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4 + 4;
   } else {
     MI.setOpcode(Mips::BEQZALC_MMR6);
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rt)));
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
   }
 
   MI.addOperand(MCOperand::createImm(Imm));
@@ -700,7 +759,7 @@ static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn,
                                                const void *Decoder) {
   InsnType Rt = fieldFromInstruction(insn, 21, 5);
   InsnType Rs = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2;
+  int64_t Imm = 0;
 
   if (Rs >= Rt) {
     MI.setOpcode(Mips::BNVC_MMR6);
@@ -708,16 +767,19 @@ static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn,
                                        Rt)));
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
   } else if (Rs != 0 && Rs < Rt) {
     MI.setOpcode(Mips::BNEC_MMR6);
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rs)));
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rt)));
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4 + 4;
   } else {
     MI.setOpcode(Mips::BNEZALC_MMR6);
     MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
                                        Rt)));
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
   }
 
   MI.addOperand(MCOperand::createImm(Imm));
@@ -726,6 +788,84 @@ static DecodeStatus DecodePOP37GroupBranchMMR6(MCInst &MI, InsnType insn,
 }
 
 template <typename InsnType>
+static DecodeStatus DecodePOP65GroupBranchMMR6(MCInst &MI, InsnType insn,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // We have:
+  //    0b110101 ttttt sssss iiiiiiiiiiiiiiii
+  //      Invalid if rt == 0
+  //      BGTZC_MMR6   if rs == 0  && rt != 0
+  //      BLTZC_MMR6   if rs == rt && rt != 0
+  //      BLTC_MMR6    if rs != rt && rs != 0  && rt != 0
+
+  InsnType Rt = fieldFromInstruction(insn, 21, 5);
+  InsnType Rs = fieldFromInstruction(insn, 16, 5);
+  int64_t Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4 + 4;
+  bool HasRs = false;
+
+  if (Rt == 0)
+    return MCDisassembler::Fail;
+  else if (Rs == 0)
+    MI.setOpcode(Mips::BGTZC_MMR6);
+  else if (Rs == Rt)
+    MI.setOpcode(Mips::BLTZC_MMR6);
+  else {
+    MI.setOpcode(Mips::BLTC_MMR6);
+    HasRs = true;
+  }
+
+  if (HasRs)
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
+                                              Rs)));
+
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
+                                     Rt)));
+
+  MI.addOperand(MCOperand::createImm(Imm));
+
+  return MCDisassembler::Success;
+}
+
+template <typename InsnType>
+static DecodeStatus DecodePOP75GroupBranchMMR6(MCInst &MI, InsnType insn,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  // We have:
+  //    0b111101 ttttt sssss iiiiiiiiiiiiiiii
+  //      Invalid if rt == 0
+  //      BLEZC_MMR6   if rs == 0  && rt != 0
+  //      BGEZC_MMR6   if rs == rt && rt != 0
+  //      BGEC_MMR6    if rs != rt && rs != 0  && rt != 0
+
+  InsnType Rt = fieldFromInstruction(insn, 21, 5);
+  InsnType Rs = fieldFromInstruction(insn, 16, 5);
+  int64_t Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4 + 4;
+  bool HasRs = false;
+
+  if (Rt == 0)
+    return MCDisassembler::Fail;
+  else if (Rs == 0)
+    MI.setOpcode(Mips::BLEZC_MMR6);
+  else if (Rs == Rt)
+    MI.setOpcode(Mips::BGEZC_MMR6);
+  else {
+    HasRs = true;
+    MI.setOpcode(Mips::BGEC_MMR6);
+  }
+
+  if (HasRs)
+    MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
+                                       Rs)));
+
+  MI.addOperand(MCOperand::createReg(getReg(Decoder, Mips::GPR32RegClassID,
+                                     Rt)));
+
+  MI.addOperand(MCOperand::createImm(Imm));
+
+  return MCDisassembler::Success;
+}
+
+template <typename InsnType>
 static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn,
                                            uint64_t Address,
                                            const void *Decoder) {
@@ -904,7 +1044,7 @@ static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn,
 }
 
 /// Read two bytes from the ArrayRef and return 16 bit halfword sorted
-/// according to the given endianess.
+/// according to the given endianness.
 static DecodeStatus readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
                                       uint64_t &Size, uint32_t &Insn,
                                       bool IsBigEndian) {
@@ -924,7 +1064,7 @@ static DecodeStatus readInstruction16(ArrayRef<uint8_t> Bytes, uint64_t Address,
 }
 
 /// Read four bytes from the ArrayRef and return 32 bit word sorted
-/// according to the given endianess
+/// according to the given endianness.
 static DecodeStatus readInstruction32(ArrayRef<uint8_t> Bytes, uint64_t Address,
                                       uint64_t &Size, uint32_t &Insn,
                                       bool IsBigEndian, bool IsMicroMips) {
@@ -1662,7 +1802,7 @@ static DecodeStatus DecodeMemMMImm12(MCInst &Inst,
     break;
   case Mips::SC_MM:
     Inst.addOperand(MCOperand::createReg(Reg));
-    // fallthrough
+    LLVM_FALLTHROUGH;
   default:
     Inst.addOperand(MCOperand::createReg(Reg));
     if (Inst.getOpcode() == Mips::LWP_MM || Inst.getOpcode() == Mips::SWP_MM ||
@@ -2008,7 +2148,7 @@ static DecodeStatus DecodeBranchTarget21MM(MCInst &Inst,
                                            unsigned Offset,
                                            uint64_t Address,
                                            const void *Decoder) {
-  int32_t BranchOffset = SignExtend32<21>(Offset) << 1;
+  int32_t BranchOffset = SignExtend32<21>(Offset) * 4 + 4;
 
   Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
@@ -2046,7 +2186,7 @@ static DecodeStatus DecodeBranchTargetMM(MCInst &Inst,
                                          unsigned Offset,
                                          uint64_t Address,
                                          const void *Decoder) {
-  int32_t BranchOffset = SignExtend32<16>(Offset) * 2;
+  int32_t BranchOffset = SignExtend32<16>(Offset) * 2 + 4;
   Inst.addOperand(MCOperand::createImm(BranchOffset));
   return MCDisassembler::Success;
 }
@@ -2285,7 +2425,7 @@ static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn,
 
   InsnType Rt = fieldFromInstruction(insn, 21, 5);
   InsnType Rs = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2;
+  InsnType Imm = 0;
   bool HasRs = false;
   bool HasRt = false;
 
@@ -2294,15 +2434,18 @@ static DecodeStatus DecodeBgtzGroupBranchMMR6(MCInst &MI, InsnType insn,
   else if (Rs == 0) {
     MI.setOpcode(Mips::BGTZALC_MMR6);
     HasRt = true;
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
   }
   else if (Rs == Rt) {
     MI.setOpcode(Mips::BLTZALC_MMR6);
     HasRs = true;
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
   }
   else {
     MI.setOpcode(Mips::BLTUC_MMR6);
     HasRs = true;
     HasRt = true;
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4 + 4;
   }
 
   if (HasRs)
@@ -2324,25 +2467,30 @@ static DecodeStatus DecodeBlezGroupBranchMMR6(MCInst &MI, InsnType insn,
   const void *Decoder) {
   // We have:
   //    0b000110 ttttt sssss iiiiiiiiiiiiiiii
-  //      Invalid        if rs == 0
+  //      Invalid        if rt == 0
   //      BLEZALC_MMR6   if rs == 0  && rt != 0
   //      BGEZALC_MMR6   if rs == rt && rt != 0
   //      BGEUC_MMR6     if rs != rt && rs != 0  && rt != 0
 
   InsnType Rt = fieldFromInstruction(insn, 21, 5);
   InsnType Rs = fieldFromInstruction(insn, 16, 5);
-  InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2;
+  InsnType Imm = 0;
   bool HasRs = false;
 
   if (Rt == 0)
     return MCDisassembler::Fail;
-  else if (Rs == 0)
+  else if (Rs == 0) {
     MI.setOpcode(Mips::BLEZALC_MMR6);
-  else if (Rs == Rt)
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
+  }
+  else if (Rs == Rt) {
     MI.setOpcode(Mips::BGEZALC_MMR6);
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 2 + 4;
+  }
   else {
     HasRs = true;
     MI.setOpcode(Mips::BGEUC_MMR6);
+    Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) * 4 + 4;
   }
 
   if (HasRs)
diff --git a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
index 0fd593fcfbe1..49c42fd1880c 100644
--- a/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp
@@ -236,6 +236,7 @@ bool MipsInstPrinter::printAlias(const MCInst &MI, raw_ostream &OS) {
     // beq $r0, $zero, $L2 => beqz $r0, $L2
     return isReg<Mips::ZERO_64>(MI, 1) && printAlias("beqz", MI, 0, 2, OS);
   case Mips::BNE:
+  case Mips::BNE_MM:
     // bne $r0, $zero, $L2 => bnez $r0, $L2
     return isReg<Mips::ZERO>(MI, 1) && printAlias("bnez", MI, 0, 2, OS);
   case Mips::BNE64:
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
index 3cf632e789de..498ea6fda4b3 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsABIInfo.cpp
@@ -51,12 +51,11 @@ MipsABIInfo MipsABIInfo::computeTargetABI(const Triple &TT, StringRef CPU,
                                           const MCTargetOptions &Options) {
   if (Options.getABIName().startswith("o32"))
     return MipsABIInfo::O32();
-  else if (Options.getABIName().startswith("n32"))
+  if (Options.getABIName().startswith("n32"))
     return MipsABIInfo::N32();
-  else if (Options.getABIName().startswith("n64"))
+  if (Options.getABIName().startswith("n64"))
     return MipsABIInfo::N64();
-  else if (!Options.getABIName().empty())
-    llvm_unreachable("Unknown ABI option for MIPS");
+  assert(Options.getABIName().empty() && "Unknown ABI option for MIPS");
 
   if (TT.getArch() == Triple::mips64 || TT.getArch() == Triple::mips64el)
     return MipsABIInfo::N64();
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
index 8292d6b4c55a..38b11f78e36d 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp
@@ -59,9 +59,15 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value,
   case Mips::fixup_MIPS_PCLO16:
     Value &= 0xffff;
     break;
+  case FK_DTPRel_4:
+  case FK_DTPRel_8:
+  case FK_TPRel_4:
+  case FK_TPRel_8:
   case FK_GPRel_4:
   case FK_Data_4:
   case FK_Data_8:
+  case Mips::fixup_Mips_SUB:
+  case Mips::fixup_MICROMIPS_SUB:
     break;
   case Mips::fixup_Mips_PC16:
     // The displacement is then divided by 4 to give us an 18 bit
@@ -361,7 +367,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_TLS_DTPREL_HI16", 0,     16,   0 },
     { "fixup_MICROMIPS_TLS_DTPREL_LO16", 0,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_HI16",  0,     16,   0 },
-    { "fixup_MICROMIPS_TLS_TPREL_LO16",  0,     16,   0 }
+    { "fixup_MICROMIPS_TLS_TPREL_LO16",  0,     16,   0 },
+    { "fixup_Mips_SUB",                  0,     64,   0 },
+    { "fixup_MICROMIPS_SUB",             0,     64,   0 }
   };
 
   const static MCFixupKindInfo BigEndianInfos[Mips::NumTargetFixupKinds] = {
@@ -430,7 +438,9 @@ getFixupKindInfo(MCFixupKind Kind) const {
     { "fixup_MICROMIPS_TLS_DTPREL_HI16", 16,     16,   0 },
     { "fixup_MICROMIPS_TLS_DTPREL_LO16", 16,     16,   0 },
     { "fixup_MICROMIPS_TLS_TPREL_HI16",  16,     16,   0 },
-    { "fixup_MICROMIPS_TLS_TPREL_LO16",  16,     16,   0 }
+    { "fixup_MICROMIPS_TLS_TPREL_LO16",  16,     16,   0 },
+    { "fixup_Mips_SUB",                   0,     64,   0 },
+    { "fixup_MICROMIPS_SUB",              0,     64,   0 }
   };
 
   if (Kind < FirstTargetFixupKind)
@@ -482,27 +492,31 @@ void MipsAsmBackend::processFixupValue(const MCAssembler &Asm,
 // MCAsmBackend
 MCAsmBackend *llvm::createMipsAsmBackendEL32(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             const Triple &TT, StringRef CPU) {
+                                             const Triple &TT, StringRef CPU,
+                                             const MCTargetOptions &Options) {
   return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ true,
                             /*Is64Bit*/ false);
 }
 
 MCAsmBackend *llvm::createMipsAsmBackendEB32(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             const Triple &TT, StringRef CPU) {
+                                             const Triple &TT, StringRef CPU,
+                                             const MCTargetOptions &Options) {
   return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ false,
                             /*Is64Bit*/ false);
 }
 
 MCAsmBackend *llvm::createMipsAsmBackendEL64(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             const Triple &TT, StringRef CPU) {
+                                             const Triple &TT, StringRef CPU,
+                                             const MCTargetOptions &Options) {
   return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ true, /*Is64Bit*/ true);
 }
 
 MCAsmBackend *llvm::createMipsAsmBackendEB64(const Target &T,
                                              const MCRegisterInfo &MRI,
-                                             const Triple &TT, StringRef CPU) {
+                                             const Triple &TT, StringRef CPU,
+                                             const MCTargetOptions &Options) {
   return new MipsAsmBackend(T, TT.getOS(), /*IsLittle*/ false,
                             /*Is64Bit*/ true);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
index 2bcff881788c..35de7b27bf10 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h
@@ -120,7 +120,10 @@ namespace MipsII {
     /// IsCTI - Instruction is a Control Transfer Instruction.
     IsCTI = 1 << 4,
     /// HasForbiddenSlot - Instruction has a forbidden slot.
-    HasForbiddenSlot = 1 << 5
+    HasForbiddenSlot = 1 << 5,
+    /// IsPCRelativeLoad - A Load instruction with implicit source register
+    ///                    ($pc) with explicit offset and destination register
+    IsPCRelativeLoad = 1 << 6
 
   };
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
index 20c5f3691d23..b2efd726da53 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp
@@ -270,6 +270,14 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
   case Mips::fixup_Mips_64:
   case FK_Data_8:
     return ELF::R_MIPS_64;
+  case FK_DTPRel_4:
+    return ELF::R_MIPS_TLS_DTPREL32;
+  case FK_DTPRel_8:
+    return ELF::R_MIPS_TLS_DTPREL64;
+  case FK_TPRel_4:
+    return ELF::R_MIPS_TLS_TPREL32;
+  case FK_TPRel_8:
+    return ELF::R_MIPS_TLS_TPREL64;
   case FK_GPRel_4:
     if (isN64()) {
       unsigned Type = (unsigned)ELF::R_MIPS_NONE;
@@ -329,6 +337,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_MIPS_HIGHER;
   case Mips::fixup_Mips_HIGHEST:
     return ELF::R_MIPS_HIGHEST;
+  case Mips::fixup_Mips_SUB:
+    return ELF::R_MIPS_SUB;
   case Mips::fixup_Mips_GOT_HI16:
     return ELF::R_MIPS_GOT_HI16;
   case Mips::fixup_Mips_GOT_LO16:
@@ -365,6 +375,8 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
     return ELF::R_MICROMIPS_TLS_TPREL_HI16;
   case Mips::fixup_MICROMIPS_TLS_TPREL_LO16:
     return ELF::R_MICROMIPS_TLS_TPREL_LO16;
+  case Mips::fixup_MICROMIPS_SUB:
+    return ELF::R_MICROMIPS_SUB;
   }
 
   llvm_unreachable("invalid fixup kind!");
@@ -407,6 +419,13 @@ unsigned MipsELFObjectWriter::getRelocType(MCContext &Ctx,
 /// always match using the expressions from the source.
 void MipsELFObjectWriter::sortRelocs(const MCAssembler &Asm,
                                      std::vector<ELFRelocationEntry> &Relocs) {
+
+  // We do not need to sort the relocation table for RELA relocations which
+  // N32/N64 uses as the relocation addend contains the value we require,
+  // rather than it being split across a pair of relocations.
+  if (hasRelocationAddend())
+    return;
+
   if (Relocs.size() < 2)
     return;
 
@@ -527,7 +546,7 @@ bool MipsELFObjectWriter::needsRelocateWithSymbol(const MCSymbol &Sym,
   case ELF::R_MIPS_GPREL32:
     if (cast<MCSymbolELF>(Sym).getOther() & ELF::STO_MIPS_MICROMIPS)
       return true;
-    // fallthrough
+    LLVM_FALLTHROUGH;
   case ELF::R_MIPS_26:
   case ELF::R_MIPS_64:
   case ELF::R_MIPS_GPREL16:
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
index b4d8e9494650..149296212eca 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h
@@ -209,6 +209,10 @@ namespace Mips {
     // resulting in - R_MICROMIPS_TLS_TPREL_LO16
     fixup_MICROMIPS_TLS_TPREL_LO16,
 
+    // resulting in - R_MIPS_SUB/R_MICROMIPS_SUB
+    fixup_Mips_SUB,
+    fixup_MICROMIPS_SUB,
+
     // Marker
     LastTargetFixupKind,
     NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
index 1ce8f07092b1..a44a35f49e5f 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp
@@ -45,13 +45,22 @@ MipsMCAsmInfo::MipsMCAsmInfo(const Triple &TheTriple) {
   ZeroDirective               = "\t.space\t";
   GPRel32Directive            = "\t.gpword\t";
   GPRel64Directive            = "\t.gpdword\t";
+  DTPRel32Directive           = "\t.dtprelword\t";
+  DTPRel64Directive           = "\t.dtpreldword\t";
+  TPRel32Directive            = "\t.tprelword\t";
+  TPRel64Directive            = "\t.tpreldword\t";
   UseAssignmentForEHBegin = true;
   SupportsDebugInformation = true;
   ExceptionsType = ExceptionHandling::DwarfCFI;
   DwarfRegNumForCFI = true;
+  HasMipsExpressions = true;
 
   // Enable IAS by default for O32.
   if (TheTriple.getArch() == Triple::mips ||
       TheTriple.getArch() == Triple::mipsel)
     UseIntegratedAssembler = true;
+
+  // Enable IAS by default for Debian mips64/mips64el.
+  if (TheTriple.getEnvironment() == Triple::GNUABI64)
+    UseIntegratedAssembler = true;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
index 401c7d42c4ce..0614316d5ac7 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp
@@ -129,7 +129,8 @@ void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
   unsigned Reg0 =  Ctx.getRegisterInfo()->getEncodingValue(RegOp0);
   unsigned Reg1 =  Ctx.getRegisterInfo()->getEncodingValue(RegOp1);
 
-  if (Inst.getOpcode() == Mips::BNEC || Inst.getOpcode() == Mips::BEQC) {
+  if (Inst.getOpcode() == Mips::BNEC || Inst.getOpcode() == Mips::BEQC ||
+      Inst.getOpcode() == Mips::BNEC64 || Inst.getOpcode() == Mips::BEQC64) {
     assert(Reg0 != Reg1 && "Instruction has bad operands ($rs == $rt)!");
     if (Reg0 < Reg1)
       return;
@@ -141,7 +142,7 @@ void MipsMCCodeEmitter::LowerCompactBranch(MCInst& Inst) const {
     if (Reg1 >= Reg0)
       return;
   } else
-   llvm_unreachable("Cannot rewrite unknown branch!");
+    llvm_unreachable("Cannot rewrite unknown branch!");
 
   Inst.getOperand(0).setReg(RegOp1);
   Inst.getOperand(1).setReg(RegOp0);
@@ -210,6 +211,8 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Compact branches, enforce encoding restrictions.
   case Mips::BEQC:
   case Mips::BNEC:
+  case Mips::BEQC64:
+  case Mips::BNEC64:
   case Mips::BOVC:
   case Mips::BOVC_MMR6:
   case Mips::BNVC:
@@ -332,6 +335,30 @@ getBranchTargetOpValueMMR6(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
+/// getBranchTargetOpValueLsl2MMR6 - Return binary encoding of the branch
+/// target operand. If the machine operand requires relocation,
+/// record the relocation and return zero.
+unsigned MipsMCCodeEmitter::
+getBranchTargetOpValueLsl2MMR6(const MCInst &MI, unsigned OpNo,
+                               SmallVectorImpl<MCFixup> &Fixups,
+                               const MCSubtargetInfo &STI) const {
+
+  const MCOperand &MO = MI.getOperand(OpNo);
+
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm())
+    return MO.getImm() >> 2;
+
+  assert(MO.isExpr() &&
+         "getBranchTargetOpValueLsl2MMR6 expects only expressions or immediates");
+
+  const MCExpr *FixupExpression = MCBinaryExpr::createAdd(
+      MO.getExpr(), MCConstantExpr::create(-4, Ctx), Ctx);
+  Fixups.push_back(MCFixup::create(0, FixupExpression,
+                                   MCFixupKind(Mips::fixup_Mips_PC16)));
+  return 0;
+}
+
 /// getBranchTarget7OpValueMM - Return binary encoding of the microMIPS branch
 /// target operand. If the machine operand requires relocation,
 /// record the relocation and return zero.
@@ -432,8 +459,8 @@ getBranchTarget21OpValueMM(const MCInst &MI, unsigned OpNo,
 
   const MCOperand &MO = MI.getOperand(OpNo);
 
-  // If the destination is an immediate, divide by 2.
-  if (MO.isImm()) return MO.getImm() >> 1;
+  // If the destination is an immediate, divide by 4.
+  if (MO.isImm()) return MO.getImm() >> 2;
 
   assert(MO.isExpr() &&
     "getBranchTarget21OpValueMM expects only expressions or immediates");
@@ -634,7 +661,6 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
 
     Mips::Fixups FixupKind = Mips::Fixups(0);
     switch (MipsExpr->getKind()) {
-    case MipsMCExpr::MEK_NEG:
     case MipsMCExpr::MEK_None:
     case MipsMCExpr::MEK_Special:
       llvm_unreachable("Unhandled fixup kind!");
@@ -732,6 +758,10 @@ getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups,
       FixupKind = isMicroMips(STI) ? Mips::fixup_MICROMIPS_TLS_TPREL_LO16
                                    : Mips::fixup_Mips_TPREL_LO;
       break;
+    case MipsMCExpr::MEK_NEG:
+      FixupKind =
+          isMicroMips(STI) ? Mips::fixup_MICROMIPS_SUB : Mips::fixup_Mips_SUB;
+      break;
     }
     Fixups.push_back(MCFixup::create(0, MipsExpr, MCFixupKind(FixupKind)));
     return 0;
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
index 0f4dfe1200c0..2d041dcbf040 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h
@@ -116,6 +116,13 @@ public:
                                       SmallVectorImpl<MCFixup> &Fixups,
                                       const MCSubtargetInfo &STI) const;
 
+  // getBranchTargetOpValueLsl2MMR6 - Return binary encoding of the branch
+  // target operand. If the machine operand requires relocation,
+  // record the relocation and return zero.
+  unsigned getBranchTargetOpValueLsl2MMR6(const MCInst &MI, unsigned OpNo,
+                                          SmallVectorImpl<MCFixup> &Fixups,
+                                          const MCSubtargetInfo &STI) const;
+
   // getBranchTarget7OpValue - Return binary encoding of the microMIPS branch
   // target operand. If the machine operand requires relocation,
   // record the relocation and return zero.
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
index a05573950974..56fe18572118 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp
@@ -149,8 +149,8 @@ static MCInstrAnalysis *createMipsMCInstrAnalysis(const MCInstrInfo *Info) {
 }
 
 extern "C" void LLVMInitializeMipsTargetMC() {
-  for (Target *T : {&TheMipsTarget, &TheMipselTarget, &TheMips64Target,
-                    &TheMips64elTarget}) {
+  for (Target *T : {&getTheMipsTarget(), &getTheMipselTarget(),
+                    &getTheMips64Target(), &getTheMips64elTarget()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createMipsMCAsmInfo);
 
@@ -183,20 +183,19 @@ extern "C" void LLVMInitializeMipsTargetMC() {
   }
 
   // Register the MC Code Emitter
-  for (Target *T : {&TheMipsTarget, &TheMips64Target})
+  for (Target *T : {&getTheMipsTarget(), &getTheMips64Target()})
     TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEB);
 
-  for (Target *T : {&TheMipselTarget, &TheMips64elTarget})
+  for (Target *T : {&getTheMipselTarget(), &getTheMips64elTarget()})
     TargetRegistry::RegisterMCCodeEmitter(*T, createMipsMCCodeEmitterEL);
 
   // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheMipsTarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheMipsTarget(),
                                        createMipsAsmBackendEB32);
-  TargetRegistry::RegisterMCAsmBackend(TheMipselTarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheMipselTarget(),
                                        createMipsAsmBackendEL32);
-  TargetRegistry::RegisterMCAsmBackend(TheMips64Target,
+  TargetRegistry::RegisterMCAsmBackend(getTheMips64Target(),
                                        createMipsAsmBackendEB64);
-  TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheMips64elTarget(),
                                        createMipsAsmBackendEL64);
-
 }
diff --git a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
index 4069d7d184ed..b28681f42ebe 100644
--- a/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h
@@ -24,16 +24,17 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class StringRef;
 class Target;
 class Triple;
 class raw_ostream;
 class raw_pwrite_stream;
 
-extern Target TheMipsTarget;
-extern Target TheMipselTarget;
-extern Target TheMips64Target;
-extern Target TheMips64elTarget;
+Target &getTheMipsTarget();
+Target &getTheMipselTarget();
+Target &getTheMips64Target();
+Target &getTheMips64elTarget();
 
 MCCodeEmitter *createMipsMCCodeEmitterEB(const MCInstrInfo &MCII,
                                          const MCRegisterInfo &MRI,
@@ -44,16 +45,20 @@ MCCodeEmitter *createMipsMCCodeEmitterEL(const MCInstrInfo &MCII,
 
 MCAsmBackend *createMipsAsmBackendEB32(const Target &T,
                                        const MCRegisterInfo &MRI,
-                                       const Triple &TT, StringRef CPU);
+                                       const Triple &TT, StringRef CPU,
+                                       const MCTargetOptions &Options);
 MCAsmBackend *createMipsAsmBackendEL32(const Target &T,
                                        const MCRegisterInfo &MRI,
-                                       const Triple &TT, StringRef CPU);
+                                       const Triple &TT, StringRef CPU,
+                                       const MCTargetOptions &Options);
 MCAsmBackend *createMipsAsmBackendEB64(const Target &T,
                                        const MCRegisterInfo &MRI,
-                                       const Triple &TT, StringRef CPU);
+                                       const Triple &TT, StringRef CPU,
+                                       const MCTargetOptions &Options);
 MCAsmBackend *createMipsAsmBackendEL64(const Target &T,
                                        const MCRegisterInfo &MRI,
-                                       const Triple &TT, StringRef CPU);
+                                       const Triple &TT, StringRef CPU,
+                                       const MCTargetOptions &Options);
 
 MCObjectWriter *createMipsELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
                                           bool IsLittleEndian, bool Is64Bit);
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
index 2b636cfe3bfe..fd04f80dd566 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips32r6InstrInfo.td
@@ -32,6 +32,15 @@ def brtargetr6 : Operand<OtherVT> {
   let ParserMatchClass = MipsJumpTargetAsmOperand;
 }
 
+def brtarget_lsl2_mm : Operand<OtherVT> {
+  let EncoderMethod = "getBranchTargetOpValueLsl2MMR6";
+  let OperandType = "OPERAND_PCREL";
+  // Instructions that use this operand have their decoder method
+  // set with DecodeDisambiguates
+  let DecoderMethod = "";
+  let ParserMatchClass = MipsJumpTargetAsmOperand;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Encodings
@@ -56,16 +65,28 @@ class BITSWAP_MMR6_ENC : POOL32A_BITSWAP_FM_MMR6<0b101100>;
 class BRK_MMR6_ENC : BREAK_MMR6_ENC<"break">;
 class BEQZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"beqzc", 0b100000>;
 class BNEZC_MMR6_ENC : CMP_BRANCH_OFF21_FM_MMR6<"bnezc", 0b101000>;
-class BGEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgec", 0b111001>;
+class BGEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgec", 0b111101>,
+                      DecodeDisambiguates<"POP75GroupBranchMMR6">;
 class BGEUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bgeuc", 0b110000>,
                        DecodeDisambiguates<"BlezGroupBranchMMR6">;
-class BLTC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltc", 0b110001>;
+class BLTC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltc", 0b110101>,
+                      DecodeDisambiguates<"POP65GroupBranchMMR6">;
 class BLTUC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bltuc", 0b111000>,
                        DecodeDisambiguates<"BgtzGroupBranchMMR6">;
 class BEQC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"beqc", 0b011101>;
 class BNEC_MMR6_ENC : CMP_BRANCH_2R_OFF16_FM_MMR6<"bnec", 0b011111>;
-class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"beqzalc", 0b011101>;
-class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bnezalc", 0b011111>;
+class BLTZC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bltzc", 0b110101>,
+                       DecodeDisambiguates<"POP65GroupBranchMMR6">;
+class BLEZC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"blezc", 0b111101>,
+                       DecodeDisambiguates<"POP75GroupBranchMMR6">;
+class BGEZC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bgezc", 0b111101>,
+                       DecodeDisambiguates<"POP75GroupBranchMMR6">;
+class BGTZC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bgtzc", 0b110101>,
+                       DecodeDisambiguates<"POP65GroupBranchMMR6">;
+class BEQZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"beqzalc", 0b011101>,
+                         DecodeDisambiguates<"POP35GroupBranchMMR6">;
+class BNEZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bnezalc", 0b011111>,
+                         DecodeDisambiguates<"POP37GroupBranchMMR6">;
 class BGTZALC_MMR6_ENC : CMP_BRANCH_1R_RT_OFF16_FM_MMR6<"bgtzalc", 0b111000>,
                          MMDecodeDisambiguatedBy<"BgtzGroupBranchMMR6">;
 class BLTZALC_MMR6_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM_MMR6<"bltzalc", 0b111000>,
@@ -165,8 +186,6 @@ class TRUNC_W_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.s", 0, 0b10101100>;
 class TRUNC_W_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"trunc.w.d", 1, 0b10101100>;
 class SQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.s", 0, 0b00101000>;
 class SQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"sqrt.d", 1, 0b00101000>;
-class RSQRT_S_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.s", 0, 0b00001000>;
-class RSQRT_D_MMR6_ENC : POOL32F_MATH_FM_MMR6<"rsqrt.d", 1, 0b00001000>;
 class SB_MMR6_ENC : SB32_SH32_STORE_FM_MMR6<0b000110>;
 class SBE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b100>;
 class SCE_MMR6_ENC : POOL32C_STORE_EVA_FM_MMR6<0b110>;
@@ -177,8 +196,6 @@ class LWE_MMR6_ENC : LOAD_WORD_EVA_FM_MMR6<0b111>;
 class LW_MMR6_ENC : LOAD_WORD_FM_MMR6;
 class LUI_MMR6_ENC : LOAD_UPPER_IMM_FM_MMR6;
 class JALRC_HB_MMR6_ENC : POOL32A_JALRC_FM_MMR6<"jalrc.hb", 0b0001111100>;
-class RECIP_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.s", 0, 0b01001000>;
-class RECIP_D_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"recip.d", 1, 0b01001000>;
 class RINT_S_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.s", 0>;
 class RINT_D_MMR6_ENC : POOL32F_RINT_FM_MMR6<"rint.d", 1>;
 class ROUND_L_S_MMR6_ENC : POOL32F_RECIP_ROUND_FM_MMR6<"round.l.s", 0,
@@ -230,6 +247,49 @@ class SDC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"sdc2", 0b1010>;
 class LWC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"lwc2", 0b0000>;
 class SWC2_MMR6_ENC : POOL32B_LDWC2_SDWC2_FM_MMR6<"swc2", 0b1000>;
 
+/// Floating Point Instructions
+class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>;
+class FADD_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.d", 1, 0b00110000>;
+class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>;
+class FSUB_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.d", 1, 0b01110000>;
+class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>;
+class FMUL_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.d", 1, 0b10110000>;
+class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>;
+class FDIV_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.d", 1, 0b11110000>;
+class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>;
+class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
+class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
+class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
+class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
+class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
+class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
+class FNEG_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.d", 1, 0b0101101>;
+class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
+class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
+class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>;
+class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>;
+class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>;
+class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>;
+class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>;
+class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>;
+
+class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>;
+class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>;
+class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>;
+class CVT_W_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.d", 1, 0b00100100>;
+class CVT_D_S_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.s", 0, 0b1001101>;
+class CVT_D_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.w", 1, 0b1001101>;
+class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>;
+class CVT_S_D_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.d", 0, 0b1101101>;
+class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>;
+class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>;
+
+//===----------------------------------------------------------------------===//
+//
+// Instruction Descriptions
+//
+//===----------------------------------------------------------------------===//
+
 class CMP_CBR_RT_Z_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
                                   RegisterOperand GPROpnd>
     : BRANCH_DESC_BASE {
@@ -237,6 +297,7 @@ class CMP_CBR_RT_Z_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
   dag OutOperandList = (outs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $offset");
   list<Register> Defs = [AT];
+  InstrItinClass Itinerary = II_BCCZC;
 }
 
 class BEQZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"beqzalc", brtarget_mm,
@@ -269,91 +330,59 @@ class BNEZALC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bnezalc", brtarget_mm,
   list<Register> Defs = [RA];
 }
 
+class BLTZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bltzc", brtarget_lsl2_mm,
+                                                    GPR32Opnd>;
+class BLEZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"blezc", brtarget_lsl2_mm,
+                                                    GPR32Opnd>;
+class BGEZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgezc", brtarget_lsl2_mm,
+                                                    GPR32Opnd>;
+class BGTZC_MMR6_DESC : CMP_CBR_RT_Z_MMR6_DESC_BASE<"bgtzc", brtarget_lsl2_mm,
+                                                    GPR32Opnd>;
+
 class CMP_CBR_2R_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd,
                                 RegisterOperand GPROpnd> : BRANCH_DESC_BASE {
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, opnd:$offset);
   dag OutOperandList = (outs);
   string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $offset");
   list<Register> Defs = [AT];
+  InstrItinClass Itinerary = II_BCCC;
 }
 
-class BGEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgec", brtarget_mm,
+class BGEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgec", brtarget_lsl2_mm,
                                                  GPR32Opnd>;
-class BGEUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgeuc", brtarget_mm,
+class BGEUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bgeuc", brtarget_lsl2_mm,
                                                  GPR32Opnd>;
-class BLTC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltc", brtarget_mm,
+class BLTC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltc", brtarget_lsl2_mm,
                                                  GPR32Opnd>;
-class BLTUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltuc", brtarget_mm,
+class BLTUC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bltuc", brtarget_lsl2_mm,
                                                  GPR32Opnd>;
-class BEQC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"beqc", brtarget_mm,
+class BEQC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"beqc", brtarget_lsl2_mm,
                                                  GPR32Opnd>;
-class BNEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bnec", brtarget_mm,
+class BNEC_MMR6_DESC : CMP_CBR_2R_MMR6_DESC_BASE<"bnec", brtarget_lsl2_mm,
                                                  GPR32Opnd>;
 
-/// Floating Point Instructions
-class FADD_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.s", 0, 0b00110000>;
-class FADD_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"add.d", 1, 0b00110000>;
-class FSUB_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.s", 0, 0b01110000>;
-class FSUB_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"sub.d", 1, 0b01110000>;
-class FMUL_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.s", 0, 0b10110000>;
-class FMUL_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"mul.d", 1, 0b10110000>;
-class FDIV_S_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.s", 0, 0b11110000>;
-class FDIV_D_MMR6_ENC : POOL32F_ARITH_FM_MMR6<"div.d", 1, 0b11110000>;
-class MADDF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.s", 0, 0b110111000>;
-class MADDF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"maddf.d", 1, 0b110111000>;
-class MSUBF_S_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.s", 0, 0b111111000>;
-class MSUBF_D_MMR6_ENC : POOL32F_ARITHF_FM_MMR6<"msubf.d", 1, 0b111111000>;
-class FMOV_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.s", 0, 0b0000001>;
-class FMOV_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"mov.d", 1, 0b0000001>;
-class FNEG_S_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.s", 0, 0b0101101>;
-class FNEG_D_MMR6_ENC : POOL32F_MOV_NEG_FM_MMR6<"neg.d", 1, 0b0101101>;
-class MAX_S_MMR6_ENC : POOL32F_MINMAX_FM<"max.s", 0, 0b000001011>;
-class MAX_D_MMR6_ENC : POOL32F_MINMAX_FM<"max.d", 1, 0b000001011>;
-class MAXA_S_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.s", 0, 0b000101011>;
-class MAXA_D_MMR6_ENC : POOL32F_MINMAX_FM<"maxa.d", 1, 0b000101011>;
-class MIN_S_MMR6_ENC : POOL32F_MINMAX_FM<"min.s", 0, 0b000000011>;
-class MIN_D_MMR6_ENC : POOL32F_MINMAX_FM<"min.d", 1, 0b000000011>;
-class MINA_S_MMR6_ENC : POOL32F_MINMAX_FM<"mina.s", 0, 0b000100011>;
-class MINA_D_MMR6_ENC : POOL32F_MINMAX_FM<"mina.d", 1, 0b000100011>;
-
-class CVT_L_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.s", 0, 0b00000100>;
-class CVT_L_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.l.d", 1, 0b00000100>;
-class CVT_W_S_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.s", 0, 0b00100100>;
-class CVT_W_D_MMR6_ENC : POOL32F_CVT_LW_FM<"cvt.w.d", 1, 0b00100100>;
-class CVT_D_S_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.s", 0, 0b1001101>;
-class CVT_D_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.w", 1, 0b1001101>;
-class CVT_D_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.d.l", 2, 0b1001101>;
-class CVT_S_D_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.d", 0, 0b1101101>;
-class CVT_S_W_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.w", 1, 0b1101101>;
-class CVT_S_L_MMR6_ENC : POOL32F_CVT_DS_FM<"cvt.s.l", 2, 0b1101101>;
-
-//===----------------------------------------------------------------------===//
-//
-// Instruction Descriptions
-//
-//===----------------------------------------------------------------------===//
-
-class ADD_MMR6_DESC : ArithLogicR<"add", GPR32Opnd>;
+class ADD_MMR6_DESC : ArithLogicR<"add", GPR32Opnd, 1, II_ADD>;
 class ADDIU_MMR6_DESC : ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU, immSExt16, add>;
-class ADDU_MMR6_DESC : ArithLogicR<"addu", GPR32Opnd>;
+class ADDU_MMR6_DESC : ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU>;
 class MUL_MMR6_DESC : ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>;
 class MUH_MMR6_DESC : ArithLogicR<"muh", GPR32Opnd, 1, II_MUH, mulhs>;
 class MULU_MMR6_DESC : ArithLogicR<"mulu", GPR32Opnd, 1, II_MULU>;
 class MUHU_MMR6_DESC : ArithLogicR<"muhu", GPR32Opnd, 1, II_MUHU, mulhu>;
 
-class BC_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd>
+class BC_MMR6_DESC_BASE<string instr_asm, DAGOperand opnd, InstrItinClass Itin>
     : BRANCH_DESC_BASE, MMR6Arch<instr_asm> {
   dag InOperandList = (ins opnd:$offset);
   dag OutOperandList = (outs);
   string AsmString = !strconcat(instr_asm, "\t$offset");
   bit isBarrier = 1;
+  InstrItinClass Itinerary = Itin;
 }
 
-class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm> {
+class BALC_MMR6_DESC : BC_MMR6_DESC_BASE<"balc", brtarget26_mm, II_BALC> {
   bit isCall = 1;
   list<Register> Defs = [RA];
 }
-class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm>;
+class BC_MMR6_DESC : BC_MMR6_DESC_BASE<"bc", brtarget26_mm, II_BC>;
 
 class BC16_MMR6_DESC : MicroMipsInst16<(outs), (ins brtarget10_mm:$offset),
                                        !strconcat("bc16", "\t$offset"), [],
@@ -377,8 +406,8 @@ class BEQZC_BNEZC_MM16R6_DESC_BASE<string instr_asm>
 class BEQZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"beqzc16">;
 class BNEZC16_MMR6_DESC : BEQZC_BNEZC_MM16R6_DESC_BASE<"bnezc16">;
 
-class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd>;
-class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd>;
+class SUB_MMR6_DESC : ArithLogicR<"sub", GPR32Opnd, 0, II_SUB>;
+class SUBU_MMR6_DESC : ArithLogicR<"subu", GPR32Opnd, 0,II_SUBU>;
 
 class BITSWAP_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
     : MMR6Arch<instr_asm> {
@@ -386,6 +415,7 @@ class BITSWAP_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
   dag InOperandList = (ins GPROpnd:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rt");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_BITSWAP;
 }
 
 class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
@@ -393,63 +423,74 @@ class BITSWAP_MMR6_DESC : BITSWAP_MMR6_DESC_BASE<"bitswap", GPR32Opnd>;
 class BRK_MMR6_DESC : BRK_FT<"break">;
 
 class CACHE_HINT_MMR6_DESC<string instr_asm, Operand MemOpnd,
-                           RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
+                           RegisterOperand GPROpnd, InstrItinClass Itin>
+      : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs);
   dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
   string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
   list<dag> Pattern = [];
   string DecoderMethod = "DecodeCacheOpMM";
+  InstrItinClass Itinerary = Itin;
 }
 
-class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd>;
-class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd>;
+class CACHE_MMR6_DESC : CACHE_HINT_MMR6_DESC<"cache", mem_mm_12, GPR32Opnd,
+                                             II_CACHE>;
+class PREF_MMR6_DESC : CACHE_HINT_MMR6_DESC<"pref", mem_mm_12, GPR32Opnd,
+                                             II_PREF>;
 
 class PREFE_CACHEE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
-                                  RegisterOperand GPROpnd> :
-                                  CACHE_HINT_MMR6_DESC<instr_asm, MemOpnd,
-                                  GPROpnd> {
+                                  RegisterOperand GPROpnd, InstrItinClass Itin>
+    : CACHE_HINT_MMR6_DESC<instr_asm, MemOpnd, GPROpnd, Itin> {
   string DecoderMethod = "DecodePrefeOpMM";
 }
 
-class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9, GPR32Opnd>;
-class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9, GPR32Opnd>;
+class PREFE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"prefe", mem_mm_9,
+                                                    GPR32Opnd, II_PREFE>;
+class CACHEE_MMR6_DESC : PREFE_CACHEE_MMR6_DESC_BASE<"cachee", mem_mm_9,
+                                                     GPR32Opnd, II_CACHEE>;
 
 class LB_LBU_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
-                            RegisterOperand GPROpnd> : MMR6Arch<instr_asm> {
+                            RegisterOperand GPROpnd, InstrItinClass Itin>
+    : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins MemOpnd:$addr);
   string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
   string DecoderMethod = "DecodeLoadByte15";
   bit mayLoad = 1;
+  InstrItinClass Itinerary = Itin;
 }
-class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd>;
-class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd>;
+class LB_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lb", mem_mm_16, GPR32Opnd, II_LB>;
+class LBU_MMR6_DESC : LB_LBU_MMR6_DESC_BASE<"lbu", mem_mm_16, GPR32Opnd,
+                                            II_LBU>;
 
 class LBE_LBUE_MMR6_DESC_BASE<string instr_asm, Operand MemOpnd,
-                              RegisterOperand GPROpnd>
-    : LB_LBU_MMR6_DESC_BASE<instr_asm, MemOpnd, GPROpnd> {
+                              RegisterOperand GPROpnd, InstrItinClass Itin>
+    : LB_LBU_MMR6_DESC_BASE<instr_asm, MemOpnd, GPROpnd, Itin> {
   let DecoderMethod = "DecodeLoadByte9";
 }
-class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd>;
-class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd>;
+class LBE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbe", mem_mm_9, GPR32Opnd,
+                                              II_LBE>;
+class LBUE_MMR6_DESC : LBE_LBUE_MMR6_DESC_BASE<"lbue", mem_mm_9, GPR32Opnd,
+                                               II_LBUE>;
 
-class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
-    : MMR6Arch<instr_asm> {
+class CLO_CLZ_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                             InstrItinClass Itin> : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins GPROpnd:$rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
+  InstrItinClass Itinerary = Itin;
 }
 
-class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd>;
-class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd>;
+class CLO_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clo", GPR32Opnd, II_CLO>;
+class CLZ_MMR6_DESC : CLO_CLZ_MMR6_DESC_BASE<"clz", GPR32Opnd, II_CLZ>;
 
-class EHB_MMR6_DESC : Barrier<"ehb">;
-class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd>;
-class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd>;
+class EHB_MMR6_DESC : Barrier<"ehb", II_EHB>;
+class EI_MMR6_DESC : DEI_FT<"ei", GPR32Opnd, II_EI>;
+class DI_MMR6_DESC : DEI_FT<"di", GPR32Opnd, II_DI>;
 
-class ERET_MMR6_DESC : ER_FT<"eret">;
-class DERET_MMR6_DESC : ER_FT<"deret">;
-class ERETNC_MMR6_DESC : ER_FT<"eretnc">;
+class ERET_MMR6_DESC : ER_FT<"eret", II_ERET>;
+class DERET_MMR6_DESC : ER_FT<"deret", II_DERET>;
+class ERETNC_MMR6_DESC : ER_FT<"eretnc", II_ERETNC>;
 
 class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
     : MicroMipsInst16<(outs), (ins RO:$rs), !strconcat(opstr, "\t$rs"),
@@ -462,23 +503,25 @@ class JALRC16_MMR6_DESC_BASE<string opstr, RegisterOperand RO>
 class JALRC16_MMR6_DESC : JALRC16_MMR6_DESC_BASE<"jalr", GPR32Opnd>;
 
 class JMP_MMR6_IDX_COMPACT_DESC_BASE<string opstr, DAGOperand opnd,
-                                     RegisterOperand GPROpnd>
+                                     RegisterOperand GPROpnd,
+                                     InstrItinClass Itin>
     : MMR6Arch<opstr> {
   dag InOperandList = (ins GPROpnd:$rt, opnd:$offset);
   string AsmString = !strconcat(opstr, "\t$rt, $offset");
   list<dag> Pattern = [];
   bit isTerminator = 1;
   bit hasDelaySlot = 0;
+  InstrItinClass Itinerary = Itin;
 }
 
 class JIALC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
-                                                       GPR32Opnd> {
+                                                       GPR32Opnd, II_JIALC> {
   bit isCall = 1;
   list<Register> Defs = [RA];
 }
 
 class JIC_MMR6_DESC : JMP_MMR6_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16,
-                                                     GPR32Opnd> {
+                                                     GPR32Opnd, II_JIC> {
   bit isBarrier = 1;
   list<Register> Defs = [AT];
 }
@@ -505,65 +548,76 @@ class JRCADDIUSP_MMR6_DESC
 }
 
 class ALIGN_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                      Operand ImmOpnd>  : MMR6Arch<instr_asm> {
+                      Operand ImmOpnd, InstrItinClass Itin>
+    : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class ALIGN_MMR6_DESC : ALIGN_MMR6_DESC_BASE<"align", GPR32Opnd, uimm2>;
+class ALIGN_MMR6_DESC : ALIGN_MMR6_DESC_BASE<"align", GPR32Opnd, uimm2,
+                                             II_ALIGN>;
 
-class AUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
-    : MMR6Arch<instr_asm> {
+class AUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                         InstrItinClass Itin> : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rt);
-  dag InOperandList = (ins GPROpnd:$rs, simm16:$imm);
+  dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class AUI_MMR6_DESC : AUI_MMR6_DESC_BASE<"aui", GPR32Opnd>;
+class AUI_MMR6_DESC : AUI_MMR6_DESC_BASE<"aui", GPR32Opnd, II_AUI>;
 
 class SEB_MMR6_DESC : SignExtInReg<"seb", i8, GPR32Opnd, II_SEB>;
 class SEH_MMR6_DESC : SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>;
-class ALUIPC_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
-    : MMR6Arch<instr_asm> {
+class ALUIPC_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                            InstrItinClass Itin> : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins simm16:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class ALUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"aluipc", GPR32Opnd>;
-class AUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"auipc", GPR32Opnd>;
+class ALUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"aluipc", GPR32Opnd, II_ALUIPC>;
+class AUIPC_MMR6_DESC : ALUIPC_MMR6_DESC_BASE<"auipc", GPR32Opnd, II_AUIPC>;
 
 class LSA_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                         Operand ImmOpnd> : MMR6Arch<instr_asm> {
+                         Operand ImmOpnd, InstrItinClass Itin>
+    : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $rd, $imm2");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1>;
+class LSA_MMR6_DESC : LSA_MMR6_DESC_BASE<"lsa", GPR32Opnd, uimm2_plus1, II_LSA>;
 
 class PCREL_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                           Operand ImmOpnd> : MMR6Arch<instr_asm> {
+                           Operand ImmOpnd, InstrItinClass Itin>
+    : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins ImmOpnd:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class ADDIUPC_MMR6_DESC : PCREL_MMR6_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2>;
-class LWPC_MMR6_DESC: PCREL_MMR6_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2>;
+class ADDIUPC_MMR6_DESC : PCREL_MMR6_DESC_BASE<"addiupc", GPR32Opnd,
+                                               simm19_lsl2, II_ADDIUPC>;
+class LWPC_MMR6_DESC: PCREL_MMR6_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2,
+                                           II_LWPC>;
 
 class LWP_MMR6_DESC : MMR6Arch<"lwp"> {
   dag OutOperandList = (outs regpair:$rd);
   dag InOperandList = (ins mem_simm12:$addr);
   string AsmString = !strconcat("lwp", "\t$rd, $addr");
   list<dag> Pattern = [];
-  InstrItinClass Itin = NoItinerary;
+  InstrItinClass Itinerary = II_LWP;
   ComplexPattern Addr = addr;
   Format f = FrmI;
   string BaseOpcode = "lwp";
@@ -576,7 +630,7 @@ class SWP_MMR6_DESC : MMR6Arch<"swp"> {
   dag InOperandList = (ins regpair:$rd, mem_simm12:$addr);
   string AsmString = !strconcat("swp", "\t$rd, $addr");
   list<dag> Pattern = [];
-  InstrItinClass Itin = NoItinerary;
+  InstrItinClass Itinerary = II_SWP;
   ComplexPattern Addr = addr;
   Format f = FrmI;
   string BaseOpcode = "swp";
@@ -584,17 +638,20 @@ class SWP_MMR6_DESC : MMR6Arch<"swp"> {
   bit mayStore = 1;
 }
 
-class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
-    : MMR6Arch<instr_asm> {
+class SELEQNE_Z_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                               InstrItinClass Itin> : MMR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd>;
-class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd>;
-class PAUSE_MMR6_DESC : Barrier<"pause">;
+class SELEQZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"seleqz", GPR32Opnd,
+                                                  II_SELCCZ>;
+class SELNEZ_MMR6_DESC : SELEQNE_Z_MMR6_DESC_BASE<"selnez", GPR32Opnd,
+                                                  II_SELCCZ>;
+class PAUSE_MMR6_DESC : Barrier<"pause", II_PAUSE>;
 class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst {
   dag OutOperandList = (outs GPR32Opnd:$rt);
   dag InOperandList = (ins HWRegsOpnd:$rs, uimm3:$sel);
@@ -605,10 +662,14 @@ class RDHWR_MMR6_DESC : MMR6Arch<"rdhwr">, MipsR6Inst {
 }
 
 class WAIT_MMR6_DESC : WaitMM<"wait">;
-class SSNOP_MMR6_DESC : Barrier<"ssnop">;
+// FIXME: ssnop should not be defined for R6. Per MD000582 microMIPS32 6.03:
+//        Assemblers targeting specifically Release 6 should reject the SSNOP
+//        instruction with an error.
+class SSNOP_MMR6_DESC : Barrier<"ssnop", II_SSNOP>;
 class SLL_MMR6_DESC : shift_rotate_imm<"sll", uimm5, GPR32Opnd, II_SLL>;
 
 class DIVMOD_MMR6_DESC_BASE<string opstr, RegisterOperand GPROpnd,
+                            InstrItinClass Itin,
                             SDPatternOperator OpNode=null_frag>
     : MipsR6Inst {
   dag OutOperandList = (outs GPROpnd:$rd);
@@ -619,15 +680,16 @@ class DIVMOD_MMR6_DESC_BASE<string opstr, RegisterOperand GPROpnd,
   Format f = FrmR;
   let isCommutable = 0;
   let isReMaterializable = 1;
+  InstrItinClass Itinerary = Itin;
 
   // This instruction doesn't trap division by zero itself. We must insert
   // teq instructions as well.
   bit usesCustomInserter = 1;
 }
-class DIV_MMR6_DESC  : DIVMOD_MMR6_DESC_BASE<"div", GPR32Opnd, sdiv>;
-class DIVU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"divu", GPR32Opnd, udiv>;
-class MOD_MMR6_DESC  : DIVMOD_MMR6_DESC_BASE<"mod", GPR32Opnd, srem>;
-class MODU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"modu", GPR32Opnd, urem>;
+class DIV_MMR6_DESC  : DIVMOD_MMR6_DESC_BASE<"div", GPR32Opnd, II_DIV, sdiv>;
+class DIVU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"divu", GPR32Opnd, II_DIVU, udiv>;
+class MOD_MMR6_DESC  : DIVMOD_MMR6_DESC_BASE<"mod", GPR32Opnd, II_MOD, srem>;
+class MODU_MMR6_DESC : DIVMOD_MMR6_DESC_BASE<"modu", GPR32Opnd, II_MODU, urem>;
 class AND_MMR6_DESC : ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>;
 class ANDI_MMR6_DESC : ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI>;
 class NOR_MMR6_DESC : LogicNOR<"nor", GPR32Opnd>;
@@ -641,19 +703,21 @@ class XORI_MMR6_DESC : ArithLogicI<"xori", uimm16, GPR32Opnd, II_XORI,
                                    immZExt16, xor>;
 
 class SWE_MMR6_DESC_BASE<string opstr, DAGOperand RO, DAGOperand MO,
-                  SDPatternOperator OpNode = null_frag,
                   InstrItinClass Itin = NoItinerary,
+                  SDPatternOperator OpNode = null_frag,
                   ComplexPattern Addr = addr> :
   InstSE<(outs), (ins RO:$rt, MO:$addr), !strconcat(opstr, "\t$rt, $addr"),
          [(OpNode RO:$rt, Addr:$addr)], Itin, FrmI, opstr> {
   let DecoderMethod = "DecodeMem";
   let mayStore = 1;
 }
-class SW_MMR6_DESC : Store<"sw", GPR32Opnd>;
-class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9>;
+class SW_MMR6_DESC : Store<"sw", GPR32Opnd> {
+  InstrItinClass Itinerary = II_SW;
+}
+class SWE_MMR6_DESC : SWE_MMR6_DESC_BASE<"swe", GPR32Opnd, mem_simm9, II_SWE>;
 
-class WRPGPR_WSBH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
-    : MMR6Arch<instr_asm> {
+class WRPGPR_WSBH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO,
+                                 InstrItinClass Itin> : MMR6Arch<instr_asm> {
   dag InOperandList = (ins RO:$rs);
   dag OutOperandList = (outs RO:$rt);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
@@ -661,18 +725,21 @@ class WRPGPR_WSBH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
   Format f = FrmR;
   string BaseOpcode = instr_asm;
   bit hasSideEffects = 0;
+  InstrItinClass Itinerary = Itin;
 }
-class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd>;
-class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd>;
+class WRPGPR_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wrpgpr", GPR32Opnd,
+                                                    II_WRPGPR>;
+class WSBH_MMR6_DESC : WRPGPR_WSBH_MMR6_DESC_BASE<"wsbh", GPR32Opnd, II_WSBH>;
 
 class MTC0_MMR6_DESC_BASE<string opstr, RegisterOperand DstRC,
-                         RegisterOperand SrcRC> {
+                         RegisterOperand SrcRC, InstrItinClass Itin> {
   dag InOperandList = (ins SrcRC:$rt, uimm3:$sel);
   dag OutOperandList = (outs DstRC:$rs);
   string AsmString = !strconcat(opstr, "\t$rt, $rs, $sel");
   list<dag> Pattern = [];
   Format f = FrmFR;
   string BaseOpcode = opstr;
+  InstrItinClass Itinerary = Itin;
 }
 class MTC1_MMR6_DESC_BASE<
       string opstr, RegisterOperand DstRC, RegisterOperand SrcRC,
@@ -701,34 +768,42 @@ class MTC1_64_MMR6_DESC_BASE<
   let Constraints = "$fs = $fs_in";
 }
 class MTC2_MMR6_DESC_BASE<string opstr, RegisterOperand DstRC,
-                         RegisterOperand SrcRC> {
+                         RegisterOperand SrcRC, InstrItinClass Itin> {
   dag InOperandList = (ins SrcRC:$rt);
   dag OutOperandList = (outs DstRC:$impl);
   string AsmString = !strconcat(opstr, "\t$rt, $impl");
   list<dag> Pattern = [];
   Format f = FrmFR;
   string BaseOpcode = opstr;
+  InstrItinClass Itinerary = Itin;
 }
 
-class MTC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mtc0", COP0Opnd, GPR32Opnd>;
+class MTC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mtc0", COP0Opnd, GPR32Opnd,
+                                           II_MTC0>;
 class MTC1_MMR6_DESC : MTC1_MMR6_DESC_BASE<"mtc1", FGR32Opnd, GPR32Opnd,
                                            II_MTC1, bitconvert>, HARDFLOAT;
-class MTC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mtc2", COP2Opnd, GPR32Opnd>;
-class MTHC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mthc0", COP0Opnd, GPR32Opnd>;
-class MTHC1_D32_MMR6_DESC : MTC1_64_MMR6_DESC_BASE<"mthc1", AFGR64Opnd, GPR32Opnd>,
+class MTC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mtc2", COP2Opnd, GPR32Opnd,
+                                           II_MTC2>;
+class MTHC0_MMR6_DESC : MTC0_MMR6_DESC_BASE<"mthc0", COP0Opnd, GPR32Opnd,
+                                            II_MTHC0>;
+class MTHC1_D32_MMR6_DESC : MTC1_64_MMR6_DESC_BASE<"mthc1", AFGR64Opnd,
+                                                   GPR32Opnd, II_MTC1>,
                             HARDFLOAT, FGR_32;
-class MTHC1_D64_MMR6_DESC : MTC1_64_MMR6_DESC_BASE<"mthc1", FGR64Opnd, GPR32Opnd>,
+class MTHC1_D64_MMR6_DESC : MTC1_64_MMR6_DESC_BASE<"mthc1", FGR64Opnd,
+                                                   GPR32Opnd, II_MTC1>,
                             HARDFLOAT, FGR_64;
-class MTHC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mthc2", COP2Opnd, GPR32Opnd>;
+class MTHC2_MMR6_DESC : MTC2_MMR6_DESC_BASE<"mthc2", COP2Opnd, GPR32Opnd,
+                                            II_MTC2>;
 
 class MFC0_MMR6_DESC_BASE<string opstr, RegisterOperand DstRC,
-                          RegisterOperand SrcRC> {
+                          RegisterOperand SrcRC, InstrItinClass Itin> {
   dag InOperandList = (ins SrcRC:$rs, uimm3:$sel);
   dag OutOperandList = (outs DstRC:$rt);
   string AsmString = !strconcat(opstr, "\t$rt, $rs, $sel");
   list<dag> Pattern = [];
   Format f = FrmFR;
   string BaseOpcode = opstr;
+  InstrItinClass Itinerary = Itin;
 }
 class MFC1_MMR6_DESC_BASE<string opstr, RegisterOperand DstRC,
                           RegisterOperand SrcRC,
@@ -743,24 +818,29 @@ class MFC1_MMR6_DESC_BASE<string opstr, RegisterOperand DstRC,
   string BaseOpcode = opstr;
 }
 class MFC2_MMR6_DESC_BASE<string opstr, RegisterOperand DstRC,
-                          RegisterOperand SrcRC> {
+                          RegisterOperand SrcRC, InstrItinClass Itin> {
   dag InOperandList = (ins SrcRC:$impl);
   dag OutOperandList = (outs DstRC:$rt);
   string AsmString = !strconcat(opstr, "\t$rt, $impl");
   list<dag> Pattern = [];
   Format f = FrmFR;
   string BaseOpcode = opstr;
+  InstrItinClass Itinerary = Itin;
 }
-class MFC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfc0", GPR32Opnd, COP0Opnd>;
+class MFC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfc0", GPR32Opnd, COP0Opnd,
+                                           II_MFC0>;
 class MFC1_MMR6_DESC : MFC1_MMR6_DESC_BASE<"mfc1", GPR32Opnd, FGR32Opnd,
                                            II_MFC1, bitconvert>, HARDFLOAT;
-class MFC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfc2", GPR32Opnd, COP2Opnd>;
-class MFHC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfhc0", GPR32Opnd, COP0Opnd>;
+class MFC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfc2", GPR32Opnd, COP2Opnd,
+                                           II_MFC2>;
+class MFHC0_MMR6_DESC : MFC0_MMR6_DESC_BASE<"mfhc0", GPR32Opnd, COP0Opnd,
+                                            II_MFHC0>;
 class MFHC1_D32_MMR6_DESC : MFC1_MMR6_DESC_BASE<"mfhc1", GPR32Opnd, AFGR64Opnd,
                                                 II_MFHC1>, HARDFLOAT, FGR_32;
 class MFHC1_D64_MMR6_DESC : MFC1_MMR6_DESC_BASE<"mfhc1", GPR32Opnd, FGR64Opnd,
                                                 II_MFHC1>, HARDFLOAT, FGR_64;
-class MFHC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfhc2", GPR32Opnd, COP2Opnd>;
+class MFHC2_MMR6_DESC : MFC2_MMR6_DESC_BASE<"mfhc2", GPR32Opnd, COP2Opnd,
+                                            II_MFC2>;
 
 class LDC1_D64_MMR6_DESC : MipsR6Inst, HARDFLOAT, FGR_64 {
   dag InOperandList = (ins mem_mm_16:$addr);
@@ -786,33 +866,33 @@ class SDC1_D64_MMR6_DESC : MipsR6Inst, HARDFLOAT, FGR_64 {
   let DecoderMethod = "DecodeFMemMMR2";
 }
 
-class LDC2_LWC2_MMR6_DESC_BASE<string opstr> {
+class LDC2_LWC2_MMR6_DESC_BASE<string opstr, InstrItinClass itin> {
   dag OutOperandList = (outs COP2Opnd:$rt);
   dag InOperandList = (ins mem_mm_11:$addr);
   string AsmString = !strconcat(opstr, "\t$rt, $addr");
   list<dag> Pattern = [(set COP2Opnd:$rt, (load addrimm11:$addr))];
   Format f = FrmFI;
-  InstrItinClass Itinerary = NoItinerary;
+  InstrItinClass Itinerary = itin;
   string BaseOpcode = opstr;
   bit mayLoad = 1;
   string DecoderMethod = "DecodeFMemCop2MMR6";
 }
-class LDC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"ldc2">;
-class LWC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"lwc2">;
+class LDC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"ldc2", II_LDC2>;
+class LWC2_MMR6_DESC : LDC2_LWC2_MMR6_DESC_BASE<"lwc2", II_LWC2>;
 
-class SDC2_SWC2_MMR6_DESC_BASE<string opstr> {
+class SDC2_SWC2_MMR6_DESC_BASE<string opstr, InstrItinClass itin> {
   dag OutOperandList = (outs);
   dag InOperandList = (ins COP2Opnd:$rt, mem_mm_11:$addr);
   string AsmString = !strconcat(opstr, "\t$rt, $addr");
   list<dag> Pattern = [(store COP2Opnd:$rt, addrimm11:$addr)];
   Format f = FrmFI;
-  InstrItinClass Itinerary = NoItinerary;
+  InstrItinClass Itinerary = itin;
   string BaseOpcode = opstr;
   bit mayStore = 1;
   string DecoderMethod = "DecodeFMemCop2MMR6";
 }
-class SDC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"sdc2">;
-class SWC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"swc2">;
+class SDC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"sdc2", II_SDC2>;
+class SWC2_MMR6_DESC : SDC2_SWC2_MMR6_DESC_BASE<"swc2", II_SWC2>;
 
 /// Floating Point Instructions
 class FARITH_MMR6_DESC_BASE<string instr_asm, RegisterOperand RC,
@@ -841,10 +921,14 @@ class FDIV_S_MMR6_DESC
   : FARITH_MMR6_DESC_BASE<"div.s", FGR32Opnd, II_DIV_S, 0, fdiv>;
 class FDIV_D_MMR6_DESC
   : FARITH_MMR6_DESC_BASE<"div.d", AFGR64Opnd, II_DIV_D, 0, fdiv>;
-class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>, HARDFLOAT;
-class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>, HARDFLOAT;
-class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>, HARDFLOAT;
-class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd>, HARDFLOAT;
+class MADDF_S_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd,
+                                            II_MADDF_S>, HARDFLOAT;
+class MADDF_D_MMR6_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd,
+                                            II_MADDF_D>, HARDFLOAT;
+class MSUBF_S_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd,
+                                            II_MSUBF_S>, HARDFLOAT;
+class MSUBF_D_MMR6_DESC : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd,
+                                            II_MSUBF_D>, HARDFLOAT;
 
 class FMOV_FNEG_MMR6_DESC_BASE<string instr_asm, RegisterOperand DstRC,
                                RegisterOperand SrcRC, InstrItinClass Itin,
@@ -866,15 +950,23 @@ class FNEG_S_MMR6_DESC
 class FNEG_D_MMR6_DESC
   : FMOV_FNEG_MMR6_DESC_BASE<"neg.d", AFGR64Opnd, AFGR64Opnd, II_NEG, fneg>;
 
-class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>, HARDFLOAT;
-class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>, HARDFLOAT;
-class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>, HARDFLOAT;
-class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>, HARDFLOAT;
-
-class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>, HARDFLOAT;
-class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>, HARDFLOAT;
-class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>, HARDFLOAT;
-class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>, HARDFLOAT;
+class MAX_S_MMR6_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd, II_MAX_S>,
+                        HARDFLOAT;
+class MAX_D_MMR6_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd, II_MAX_D>,
+                        HARDFLOAT;
+class MIN_S_MMR6_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd, II_MIN_S>,
+                        HARDFLOAT;
+class MIN_D_MMR6_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd, II_MIN_D>,
+                        HARDFLOAT;
+
+class MAXA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd, II_MAXA_S>,
+                         HARDFLOAT;
+class MAXA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd, II_MAXA_D>,
+                         HARDFLOAT;
+class MINA_S_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd, II_MINA_S>,
+                         HARDFLOAT;
+class MINA_D_MMR6_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd, II_MINA_D>,
+                         HARDFLOAT;
 
 class CVT_MMR6_DESC_BASE<
     string instr_asm, RegisterOperand DstRC, RegisterOperand SrcRC,
@@ -910,70 +1002,70 @@ class CVT_S_L_MMR6_DESC : CVT_MMR6_DESC_BASE<"cvt.s.l", FGR64Opnd, FGR32Opnd,
                                              II_CVT>, FGR_64;
 
 multiclass CMP_CC_MMR6<bits<6> format, string Typestr,
-                       RegisterOperand FGROpnd> {
+                       RegisterOperand FGROpnd, InstrItinClass Itin> {
   def CMP_AF_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.af.", Typestr), format, FIELD_CMP_COND_AF>,
-      CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_UN_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.un.", Typestr), format, FIELD_CMP_COND_UN>,
-      CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, Itin, setuo>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_EQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.eq.", Typestr), format, FIELD_CMP_COND_EQ>,
-      CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, Itin, setoeq>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_UEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.ueq.", Typestr), format, FIELD_CMP_COND_UEQ>,
-      CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, Itin, setueq>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_LT_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.lt.", Typestr), format, FIELD_CMP_COND_LT>,
-      CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, Itin, setolt>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_ULT_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.ult.", Typestr), format, FIELD_CMP_COND_ULT>,
-      CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, Itin, setult>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_LE_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.le.", Typestr), format, FIELD_CMP_COND_LE>,
-      CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, Itin, setole>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_ULE_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.ule.", Typestr), format, FIELD_CMP_COND_ULE>,
-      CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, Itin, setule>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SAF_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.saf.", Typestr), format, FIELD_CMP_COND_SAF>,
-      CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SUN_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.sun.", Typestr), format, FIELD_CMP_COND_SUN>,
-      CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.seq.", Typestr), format, FIELD_CMP_COND_SEQ>,
-      CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SUEQ_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.sueq.", Typestr), format, FIELD_CMP_COND_SUEQ>,
-      CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SLT_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.slt.", Typestr), format, FIELD_CMP_COND_SLT>,
-      CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SULT_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.sult.", Typestr), format, FIELD_CMP_COND_SULT>,
-      CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SLE_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.sle.", Typestr), format, FIELD_CMP_COND_SLE>,
-      CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
   def CMP_SULE_#NAME : R6MMR6Rel, POOL32F_CMP_FM<
       !strconcat("cmp.sule.", Typestr), format, FIELD_CMP_COND_SULE>,
-      CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, HARDFLOAT,
+      CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd, Itin>, HARDFLOAT,
       ISA_MICROMIPS32R6;
 }
 
@@ -1022,14 +1114,6 @@ class SQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.s", FGR32Opnd, FGR32Opnd,
                                                  II_SQRT_S, fsqrt>;
 class SQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"sqrt.d", AFGR64Opnd, AFGR64Opnd,
                                                  II_SQRT_D, fsqrt>;
-class RSQRT_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.s", FGR32Opnd,
-                                                  FGR32Opnd, II_TRUNC>;
-class RSQRT_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"rsqrt.d", FGR32Opnd,
-                                                  AFGR64Opnd, II_TRUNC>;
-class RECIP_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.s", FGR32Opnd,
-                                                 FGR32Opnd, II_ROUND>;
-class RECIP_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"recip.d", FGR32Opnd, FGR32Opnd,
-                                                 II_ROUND>;
 class ROUND_L_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.s", FGR64Opnd,
                                                    FGR32Opnd, II_ROUND>;
 class ROUND_L_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.l.d", FGR64Opnd,
@@ -1039,49 +1123,63 @@ class ROUND_W_S_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.s", FGR32Opnd,
 class ROUND_W_D_MMR6_DESC : ABSS_FT_MMR6_DESC_BASE<"round.w.d", FGR64Opnd,
                                                    FGR64Opnd, II_ROUND>;
 
-class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>;
-class SEL_D_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> {
+class SEL_S_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd, II_SEL_S>;
+class SEL_D_MMR6_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd, II_SEL_D> {
   // We must insert a SUBREG_TO_REG around $fd_in
   bit usesCustomInserter = 1;
 }
 
-class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>;
-class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>;
-class SELNEZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>;
-class SELNEZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>;
-class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>;
-class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
-class CLASS_S_MMR6_DESC  : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
-class CLASS_D_MMR6_DESC  : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
-
-class STORE_MMR6_DESC_BASE<string opstr, DAGOperand RO>
+class SELEQZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd,
+                                              II_SELCCZ_S>;
+class SELEQZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd,
+                                              II_SELCCZ_D>;
+class SELNEZ_S_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd,
+                                              II_SELCCZ_S>;
+class SELNEZ_D_MMR6_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd,
+                                              II_SELCCZ_D>;
+class RINT_S_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd,
+                                              II_RINT_S>;
+class RINT_D_MMR6_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd,
+                                              II_RINT_S>;
+class CLASS_S_MMR6_DESC  : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd,
+                                              II_CLASS_S>;
+class CLASS_D_MMR6_DESC  : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd,
+                                              II_CLASS_S>;
+
+class STORE_MMR6_DESC_BASE<string opstr, DAGOperand RO,
+                           InstrItinClass Itin>
     : Store<opstr, RO>, MMR6Arch<opstr> {
   let DecoderMethod = "DecodeMemMMImm16";
+  InstrItinClass Itinerary = Itin;
 }
-class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd>;
+class SB_MMR6_DESC : STORE_MMR6_DESC_BASE<"sb", GPR32Opnd, II_SB>;
 
-class STORE_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO>
+class STORE_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO,
+                               InstrItinClass Itin>
     : MMR6Arch<instr_asm>, MipsR6Inst {
   dag OutOperandList = (outs);
   dag InOperandList = (ins RO:$rt, mem_simm9:$addr);
   string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
   string DecoderMethod = "DecodeStoreEvaOpMM";
   bit mayStore = 1;
+  InstrItinClass Itinerary = Itin;
 }
-class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd>;
-class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd>;
-class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd>;
-class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd>;
-class LOAD_WORD_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO> :
-            MMR6Arch<instr_asm>, MipsR6Inst {
+class SBE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sbe", GPR32Opnd, II_SBE>;
+class SCE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"sce", GPR32Opnd, II_SCE>;
+class SH_MMR6_DESC : STORE_MMR6_DESC_BASE<"sh", GPR32Opnd, II_SH>;
+class SHE_MMR6_DESC : STORE_EVA_MMR6_DESC_BASE<"she", GPR32Opnd, II_SHE>;
+class LOAD_WORD_EVA_MMR6_DESC_BASE<string instr_asm, RegisterOperand RO,
+                                   InstrItinClass Itin>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
   dag OutOperandList = (outs RO:$rt);
   dag InOperandList = (ins mem_simm9:$addr);
   string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
   string DecoderMethod = "DecodeMemMMImm9";
   bit mayLoad = 1;
+  InstrItinClass Itinerary = Itin;
 }
-class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd>;
-class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd>;
+class LLE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lle", GPR32Opnd, II_LLE>;
+class LWE_MMR6_DESC : LOAD_WORD_EVA_MMR6_DESC_BASE<"lwe", GPR32Opnd, II_LWE>;
 class ADDU16_MMR6_DESC : ArithRMM16<"addu16", GPRMM16Opnd, 1, II_ADDU, add>,
       MMR6Arch<"addu16"> {
   int AddedComplexity = 1;
@@ -1103,13 +1201,13 @@ class SLL16_MMR6_DESC : ShiftIMM16<"sll16", uimm3_shift, GPRMM16Opnd, II_SLL>,
       MMR6Arch<"sll16">;
 class SRL16_MMR6_DESC : ShiftIMM16<"srl16", uimm3_shift, GPRMM16Opnd, II_SRL>,
       MMR6Arch<"srl16">;
-class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16">, MMR6Arch<"break16">,
+class BREAK16_MMR6_DESC : BrkSdbbp16MM<"break16", II_BREAK>, MMR6Arch<"break16">,
       MicroMipsR6Inst16;
 class LI16_MMR6_DESC : LoadImmMM16<"li16", li16_imm, GPRMM16Opnd>,
       MMR6Arch<"li16">, MicroMipsR6Inst16, IsAsCheapAsAMove;
 class MOVE16_MMR6_DESC : MoveMM16<"move16", GPR32Opnd>, MMR6Arch<"move16">,
       MicroMipsR6Inst16;
-class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16">, MMR6Arch<"sdbbp16">,
+class SDBBP16_MMR6_DESC : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, MMR6Arch<"sdbbp16">,
       MicroMipsR6Inst16;
 class SUBU16_MMR6_DESC : ArithRMM16<"subu16", GPRMM16Opnd, 0, II_SUBU, sub>,
       MMR6Arch<"subu16">, MicroMipsR6Inst16 {
@@ -1147,7 +1245,7 @@ class SYNC_MMR6_DESC : MMR6Arch<"sync">, MipsR6Inst {
   dag InOperandList = (ins uimm5:$stype);
   string AsmString = !strconcat("sync", "\t$stype");
   list<dag> Pattern = [(MipsSync immZExt5:$stype)];
-  InstrItinClass Itinerary = NoItinerary;
+  InstrItinClass Itinerary = II_SYNC;
   bit HasSideEffects = 1;
 }
 
@@ -1159,6 +1257,7 @@ class RDPGPR_MMR6_DESC : MMR6Arch<"rdpgpr">, MipsR6Inst {
   dag OutOperandList = (outs GPR32Opnd:$rt);
   dag InOperandList = (ins GPR32Opnd:$rd);
   string AsmString = !strconcat("rdpgpr", "\t$rt, $rd");
+  InstrItinClass Itinerary = II_RDPGPR;
 }
 
 class SDBBP_MMR6_DESC : MipsR6Inst {
@@ -1166,27 +1265,26 @@ class SDBBP_MMR6_DESC : MipsR6Inst {
   dag InOperandList = (ins uimm20:$code_);
   string AsmString = !strconcat("sdbbp", "\t$code_");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_SDBBP;
 }
 
 class LWM16_MMR6_DESC
     : MicroMipsInst16<(outs reglist16:$rt), (ins mem_mm_4sp:$addr),
                       !strconcat("lwm16", "\t$rt, $addr"), [],
-                      NoItinerary, FrmI>,
+                      II_LWM, FrmI>,
       MMR6Arch<"lwm16">, MicroMipsR6Inst16 {
   let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
   let mayLoad = 1;
-  InstrItinClass Itin = NoItinerary;
   ComplexPattern Addr = addr;
 }
 
 class SWM16_MMR6_DESC
     : MicroMipsInst16<(outs), (ins reglist16:$rt, mem_mm_4sp:$addr),
                       !strconcat("swm16", "\t$rt, $addr"), [],
-                      NoItinerary, FrmI>,
+                      II_SWM, FrmI>,
       MMR6Arch<"swm16">, MicroMipsR6Inst16 {
   let DecoderMethod = "DecodeMemMMReglistImm4Lsl2";
   let mayStore = 1;
-  InstrItinClass Itin = NoItinerary;
   ComplexPattern Addr = addr;
 }
 
@@ -1219,31 +1317,34 @@ class JALRC_HB_MMR6_DESC {
   dag InOperandList = (ins GPR32Opnd:$rs);
   string AsmString = !strconcat("jalrc.hb", "\t$rt, $rs");
   list<dag> Pattern = [];
-  InstrItinClass Itinerary = NoItinerary;
+  InstrItinClass Itinerary = II_JALR_HB;
   Format Form = FrmJ;
   bit isIndirectBranch = 1;
   bit hasDelaySlot = 0;
 }
 
-class TLBINV_MMR6_DESC_BASE<string opstr> {
+class TLBINV_MMR6_DESC_BASE<string opstr, InstrItinClass Itin> {
   dag OutOperandList = (outs);
   dag InOperandList = (ins);
   string AsmString = opstr;
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
 
-class TLBINV_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinv">;
-class TLBINVF_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinvf">;
+class TLBINV_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinv", II_TLBINV>;
+class TLBINVF_MMR6_DESC : TLBINV_MMR6_DESC_BASE<"tlbinvf", II_TLBINVF>;
 
-class DVPEVP_MMR6_DESC_BASE<string opstr> {
-  dag OutOperandList = (outs);
-  dag InOperandList = (ins GPR32Opnd:$rs);
+class DVPEVP_MMR6_DESC_BASE<string opstr, InstrItinClass Itin> {
+  dag OutOperandList = (outs GPR32Opnd:$rs);
+  dag InOperandList = (ins);
   string AsmString = !strconcat(opstr, "\t$rs");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
+  bit hasUnModeledSideEffects = 1;
 }
 
-class DVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"dvp">;
-class EVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"evp">;
+class DVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"dvp", II_DVP>;
+class EVP_MMR6_DESC : DVPEVP_MMR6_DESC_BASE<"evp", II_EVP>;
 
 class BEQZC_MMR6_DESC
     : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21_mm, GPR32Opnd>,
@@ -1262,15 +1363,17 @@ class BRANCH_COP1_MMR6_DESC_BASE<string opstr> :
 class BC1EQZC_MMR6_DESC : BRANCH_COP1_MMR6_DESC_BASE<"bc1eqzc">;
 class BC1NEZC_MMR6_DESC : BRANCH_COP1_MMR6_DESC_BASE<"bc1nezc">;
 
-class BRANCH_COP2_MMR6_DESC_BASE<string opstr> : BRANCH_DESC_BASE {
+class BRANCH_COP2_MMR6_DESC_BASE<string opstr, InstrItinClass Itin>
+    : BRANCH_DESC_BASE {
   dag InOperandList = (ins COP2Opnd:$rt, brtarget_mm:$offset);
   dag OutOperandList = (outs);
   string AsmString = !strconcat(opstr, "\t$rt, $offset");
   list<Register> Defs = [AT];
+  InstrItinClass Itinerary = Itin;
 }
 
-class BC2EQZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2eqzc">;
-class BC2NEZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2nezc">;
+class BC2EQZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2eqzc", II_BC2CCZ>;
+class BC2NEZC_MMR6_DESC : BRANCH_COP2_MMR6_DESC_BASE<"bc2nezc", II_BC2CCZ>;
 
 class EXT_MMR6_DESC {
   dag OutOperandList = (outs GPR32Opnd:$rt);
@@ -1314,6 +1417,7 @@ class BOVC_BNVC_MMR6_DESC_BASE<string instr_asm, Operand opnd,
   dag OutOperandList = (outs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $offset");
   list<Register> Defs = [AT];
+  InstrItinClass Itinerary = II_BCCC;
 }
 
 class BOVC_MMR6_DESC : BOVC_BNVC_MMR6_DESC_BASE<"bovc", brtargetr6, GPR32Opnd>;
@@ -1517,8 +1621,8 @@ def CVT_S_W_MMR6 : StdMMR6Rel, CVT_S_W_MMR6_ENC, CVT_S_W_MMR6_DESC,
                    ISA_MICROMIPS32R6;
 def CVT_S_L_MMR6 : StdMMR6Rel, CVT_S_L_MMR6_ENC, CVT_S_L_MMR6_DESC,
                    ISA_MICROMIPS32R6;
-defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd>;
-defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd>;
+defm S_MMR6 : CMP_CC_MMR6<0b000101, "s", FGR32Opnd, II_CMP_CC_S>;
+defm D_MMR6 : CMP_CC_MMR6<0b010101, "d", FGR64Opnd, II_CMP_CC_D>;
 def ABS_S_MMR6 : StdMMR6Rel, ABS_S_MMR6_ENC, ABS_S_MMR6_DESC, ISA_MICROMIPS32R6;
 def ABS_D_MMR6 : StdMMR6Rel, ABS_D_MMR6_ENC, ABS_D_MMR6_DESC, ISA_MICROMIPS32R6;
 def FLOOR_L_S_MMR6 : StdMMR6Rel, FLOOR_L_S_MMR6_ENC, FLOOR_L_S_MMR6_DESC,
@@ -1549,10 +1653,6 @@ def SQRT_S_MMR6 : StdMMR6Rel, SQRT_S_MMR6_ENC, SQRT_S_MMR6_DESC,
                   ISA_MICROMIPS32R6;
 def SQRT_D_MMR6 : StdMMR6Rel, SQRT_D_MMR6_ENC, SQRT_D_MMR6_DESC,
                   ISA_MICROMIPS32R6;
-def RSQRT_S_MMR6 : StdMMR6Rel, RSQRT_S_MMR6_ENC, RSQRT_S_MMR6_DESC,
-                   ISA_MICROMIPS32R6;
-def RSQRT_D_MMR6 : StdMMR6Rel, RSQRT_D_MMR6_ENC, RSQRT_D_MMR6_DESC,
-                   ISA_MICROMIPS32R6;
 def SB_MMR6 : StdMMR6Rel, SB_MMR6_DESC, SB_MMR6_ENC, ISA_MICROMIPS32R6;
 def SBE_MMR6 : StdMMR6Rel, SBE_MMR6_DESC, SBE_MMR6_ENC, ISA_MICROMIPS32R6;
 def SCE_MMR6 : StdMMR6Rel, SCE_MMR6_DESC, SCE_MMR6_ENC, ISA_MICROMIPS32R6;
@@ -1593,9 +1693,6 @@ def JALRC_HB_MMR6 : R6MMR6Rel, JALRC_HB_MMR6_ENC, JALRC_HB_MMR6_DESC,
 def EXT_MMR6 : StdMMR6Rel, EXT_MMR6_ENC, EXT_MMR6_DESC, ISA_MICROMIPS32R6;
 def INS_MMR6 : StdMMR6Rel, INS_MMR6_ENC, INS_MMR6_DESC, ISA_MICROMIPS32R6;
 def JALRC_MMR6 : R6MMR6Rel, JALRC_MMR6_ENC, JALRC_MMR6_DESC, ISA_MICROMIPS32R6;
-def RECIP_S_MMR6 : StdMMR6Rel, RECIP_S_MMR6_ENC, RECIP_S_MMR6_DESC,
-                   ISA_MICROMIPS32R6;
-def RECIP_D_MMR6 : StdMMR6Rel, RECIP_D_MMR6_ENC, RECIP_D_MMR6_DESC, ISA_MICROMIPS32R6;
 def RINT_S_MMR6 : StdMMR6Rel, RINT_S_MMR6_ENC, RINT_S_MMR6_DESC,
                   ISA_MICROMIPS32R6;
 def RINT_D_MMR6 : StdMMR6Rel, RINT_D_MMR6_ENC, RINT_D_MMR6_DESC, ISA_MICROMIPS32R6;
@@ -1661,6 +1758,10 @@ def BEQC_MMR6 : R6MMR6Rel, BEQC_MMR6_ENC, BEQC_MMR6_DESC, ISA_MICROMIPS32R6,
                 DecodeDisambiguates<"POP35GroupBranchMMR6">;
 def BNEC_MMR6 : R6MMR6Rel, BNEC_MMR6_ENC, BNEC_MMR6_DESC, ISA_MICROMIPS32R6,
                 DecodeDisambiguates<"POP37GroupBranchMMR6">;
+def BLTZC_MMR6 : R6MMR6Rel, BLTZC_MMR6_ENC, BLTZC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BLEZC_MMR6 : R6MMR6Rel, BLEZC_MMR6_ENC, BLEZC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BGEZC_MMR6 : R6MMR6Rel, BGEZC_MMR6_ENC, BGEZC_MMR6_DESC, ISA_MICROMIPS32R6;
+def BGTZC_MMR6 : R6MMR6Rel, BGTZC_MMR6_ENC, BGTZC_MMR6_DESC, ISA_MICROMIPS32R6;
 def BGEZALC_MMR6 : R6MMR6Rel, BGEZALC_MMR6_ENC, BGEZALC_MMR6_DESC,
                    ISA_MICROMIPS32R6;
 def BGTZALC_MMR6 : R6MMR6Rel, BGTZALC_MMR6_ENC, BGTZALC_MMR6_DESC,
@@ -1727,6 +1828,10 @@ def : MipsInstAlias<"xor $rs, $imm",
 def : MipsInstAlias<"not $rt, $rs",
                     (NOR_MMR6 GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>,
                     ISA_MICROMIPS32R6;
+def : MipsInstAlias<"seh $rd", (SEH_MMR6 GPR32Opnd:$rd, GPR32Opnd:$rd), 0>,
+                    ISA_MICROMIPS32R6;
+def : MipsInstAlias<"seb $rd", (SEB_MMR6 GPR32Opnd:$rd, GPR32Opnd:$rd), 0>,
+                    ISA_MICROMIPS32R6;
 
 //===----------------------------------------------------------------------===//
 //
@@ -1772,3 +1877,5 @@ let AddedComplexity = 41 in {
   def : LoadRegImmPat<LDC1_D64_MMR6, f64, load>, FGR_64, ISA_MICROMIPS32R6;
   def : StoreRegImmPat<SDC1_D64_MMR6, f64>, FGR_64, ISA_MICROMIPS32R6;
 }
+
+def TAILCALL_MMR6 : TailCall<BC_MMR6, brtarget26_mm>, ISA_MICROMIPS32R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
index 4add305522f7..26062bfb2b8e 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrFormats.td
@@ -219,3 +219,49 @@ class POOL32S_3R_FM_MMR6<string instr_asm, bits<9> funct>
   let Inst{10-9}  = 0b00;
   let Inst{8-0}   = funct;
 }
+
+class POOL32S_DBITSWAP_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm>,
+      MipsR6Inst {
+  bits<5> rt;
+  bits<5> rd;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010110;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rd;
+  let Inst{15-12}  = 0b0000;
+  let Inst{11-6}  = 0b101100;
+  let Inst{5-0}   = 0b111100;
+}
+
+class POOL32S_3RSA_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm>,
+      MipsR6Inst {
+  bits<5> rt;
+  bits<5> rs;
+  bits<5> rd;
+  bits<2> sa;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010110;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = rs;
+  let Inst{15-11} = rd;
+  let Inst{10-9} = sa;
+  let Inst{8-6} = 0b100;
+  let Inst{5-0} = 0b000100;
+}
+
+class PCREL_1ROFFSET19_FM_MMR6<string instr_asm> : MMR6Arch<instr_asm>,
+      MipsR6Inst {
+  bits<5> rt;
+  bits<19> offset;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b011110;
+  let Inst{25-21} = rt;
+  let Inst{20-19} = 0b10;
+  let Inst{18-0} = offset;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
index 87c41deb85af..05aad515da46 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMips64r6InstrInfo.td
@@ -67,6 +67,9 @@ class SD_MM64R6_ENC : LD_SD_32_2R_OFFSET16_FM_MMR6<"sd", 0b110110>;
 class DSRL_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsrl", 0b001000000>;
 class DSRL32_MM64R6_ENC : POOL32S_2RSA5B0_FM_MMR6<"dsrl32", 0b001001000>;
 class DSRLV_MM64R6_ENC : POOL32S_3R_FM_MMR6<"dsrlv", 0b001010000>;
+class DBITSWAP_MM64R6_ENC : POOL32S_DBITSWAP_FM_MMR6<"dbitswap">;
+class DLSA_MM64R6_ENC : POOL32S_3RSA_FM_MMR6<"dlsa">;
+class LWUPC_MM64R6_ENC : PCREL_1ROFFSET19_FM_MMR6<"lwupc">;
 
 //===----------------------------------------------------------------------===//
 //
@@ -74,24 +77,28 @@ class DSRLV_MM64R6_ENC : POOL32S_3R_FM_MMR6<"dsrlv", 0b001010000>;
 //
 //===----------------------------------------------------------------------===//
 
-class DAUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+class DAUI_MMR6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                          InstrItinClass Itin>
     : MMR6Arch<instr_asm>, MipsR6Inst {
   dag OutOperandList = (outs GPROpnd:$rt);
-  dag InOperandList = (ins GPROpnd:$rs, simm16:$imm);
+  dag InOperandList = (ins GPROpnd:$rs, uimm16:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
 }
-class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd>;
+class DAUI_MMR6_DESC : DAUI_MMR6_DESC_BASE<"daui", GPR64Opnd, II_DAUI>;
 
-class DAHI_DATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
+class DAHI_DATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                          InstrItinClass Itin>
     : MMR6Arch<instr_asm>, MipsR6Inst {
   dag OutOperandList = (outs GPROpnd:$rs);
-  dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
-  string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+  dag InOperandList = (ins GPROpnd:$rt, uimm16:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rt, $rs, $imm");
   string Constraints = "$rs = $rt";
+  InstrItinClass Itinerary = Itin;
 }
-class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd>;
-class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd>;
+class DAHI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dahi", GPR64Opnd, II_DAHI>;
+class DATI_MMR6_DESC : DAHI_DATI_DESC_BASE<"dati", GPR64Opnd, II_DATI>;
 
 class EXTBITS_DESC_BASE<string instr_asm, RegisterOperand RO, Operand PosOpnd,
                         Operand SizeOpnd, SDPatternOperator Op = null_frag>
@@ -115,26 +122,33 @@ class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32,
                                           uimm5_plus1, MipsExt>;
 
 class DALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
-                      Operand ImmOpnd> : MMR6Arch<instr_asm>, MipsR6Inst {
+                      Operand ImmOpnd, InstrItinClass itin>
+    : MMR6Arch<instr_asm>, MipsR6Inst {
   dag OutOperandList = (outs GPROpnd:$rd);
   dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$bp);
   string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $bp");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = itin;
 }
 
-class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3>;
+class DALIGN_MMR6_DESC : DALIGN_DESC_BASE<"dalign", GPR64Opnd, uimm3,
+                                          II_DALIGN>;
 
-class DDIV_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"ddiv", GPR64Opnd, sdiv>;
-class DMOD_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"dmod", GPR64Opnd, srem>;
-class DDIVU_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"ddivu", GPR64Opnd, udiv>;
-class DMODU_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"dmodu", GPR64Opnd, urem>;
+class DDIV_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"ddiv", GPR64Opnd, II_DDIV,
+                                               sdiv>;
+class DMOD_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"dmod", GPR64Opnd, II_DMOD,
+                                               srem>;
+class DDIVU_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"ddivu", GPR64Opnd, II_DDIVU,
+                                                udiv>;
+class DMODU_MM64R6_DESC : DIVMOD_MMR6_DESC_BASE<"dmodu", GPR64Opnd, II_DMODU,
+                                                urem>;
 
 class DCLO_MM64R6_DESC {
   dag OutOperandList = (outs GPR64Opnd:$rt);
   dag InOperandList = (ins GPR64Opnd:$rs);
   string AsmString = !strconcat("dclo", "\t$rt, $rs");
   list<dag> Pattern = [(set GPR64Opnd:$rt, (ctlz (not GPR64Opnd:$rs)))];
-  InstrItinClass Itinerary = II_CLO;
+  InstrItinClass Itinerary = II_DCLO;
   Format Form = FrmR;
   string BaseOpcode = "dclo";
 }
@@ -144,7 +158,7 @@ class DCLZ_MM64R6_DESC {
   dag InOperandList = (ins GPR64Opnd:$rs);
   string AsmString = !strconcat("dclz", "\t$rt, $rs");
   list<dag> Pattern = [(set GPR64Opnd:$rt, (ctlz GPR64Opnd:$rs))];
-  InstrItinClass Itinerary = II_CLZ;
+  InstrItinClass Itinerary = II_DCLZ;
   Format Form = FrmR;
   string BaseOpcode = "dclz";
 }
@@ -154,16 +168,18 @@ class DINSU_MM64R6_DESC : InsBase<"dinsu", GPR64Opnd, uimm5_plus32,
 class DINSM_MM64R6_DESC : InsBase<"dinsm", GPR64Opnd, uimm5, uimm_range_2_64>;
 class DINS_MM64R6_DESC : InsBase<"dins", GPR64Opnd, uimm5, uimm5_inssize_plus1,
                                  MipsIns>;
-class DMTC0_MM64R6_DESC : MTC0_MMR6_DESC_BASE<"dmtc0", COP0Opnd, GPR64Opnd>;
+class DMTC0_MM64R6_DESC : MTC0_MMR6_DESC_BASE<"dmtc0", COP0Opnd, GPR64Opnd,
+                                              II_DMTC0>;
 class DMTC1_MM64R6_DESC : MTC1_MMR6_DESC_BASE<"dmtc1", FGR64Opnd, GPR64Opnd,
                                               II_DMTC1, bitconvert>;
-class DMTC2_MM64R6_DESC : MTC2_MMR6_DESC_BASE<"dmtc2", COP2Opnd, GPR64Opnd>;
-
-class DMFC0_MM64R6_DESC : MFC0_MMR6_DESC_BASE<"dmfc0", GPR64Opnd, COP0Opnd>;
+class DMTC2_MM64R6_DESC : MTC2_MMR6_DESC_BASE<"dmtc2", COP2Opnd, GPR64Opnd,
+                                              II_DMTC2>;
+class DMFC0_MM64R6_DESC : MFC0_MMR6_DESC_BASE<"dmfc0", GPR64Opnd, COP0Opnd,
+                                              II_DMFC0>;
 class DMFC1_MM64R6_DESC : MFC1_MMR6_DESC_BASE<"dmfc1", GPR64Opnd, FGR64Opnd,
                                               II_DMFC1, bitconvert>;
-class DMFC2_MM64R6_DESC : MFC2_MMR6_DESC_BASE<"dmfc2", GPR64Opnd, COP2Opnd>;
-
+class DMFC2_MM64R6_DESC : MFC2_MMR6_DESC_BASE<"dmfc2", GPR64Opnd, COP2Opnd,
+                                              II_DMFC2>;
 class DADD_MM64R6_DESC : ArithLogicR<"dadd", GPR64Opnd, 1, II_DADD>;
 class DADDIU_MM64R6_DESC : ArithLogicI<"daddiu", simm16_64, GPR64Opnd,
                                        II_DADDIU, immSExt16, add>,
@@ -188,7 +204,8 @@ class DSUB_DESC_BASE<string instr_asm, RegisterOperand RO,
 class DSUB_MM64R6_DESC : DSUB_DESC_BASE<"dsub", GPR64Opnd, II_DSUB>;
 class DSUBU_MM64R6_DESC : DSUB_DESC_BASE<"dsubu", GPR64Opnd, II_DSUBU, sub>;
 
-class LDPC_MM64R6_DESC : PCREL_MMR6_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3>;
+class LDPC_MM64R6_DESC : PCREL_MMR6_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3,
+                                              II_LDPC>;
 
 class MUL_MM64R6_DESC_BASE<string opstr, RegisterOperand GPROpnd,
                            InstrItinClass Itin = NoItinerary,
@@ -207,19 +224,20 @@ class DMULU_MM64R6_DESC : MUL_MM64R6_DESC_BASE<"dmulu", GPR64Opnd, II_DMULU>;
 class DMUHU_MM64R6_DESC : MUL_MM64R6_DESC_BASE<"dmuhu", GPR64Opnd, II_DMUHU,
                                                mulhu>;
 
-class DSBH_DSHD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> {
+class DSBH_DSHD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
+                          InstrItinClass Itin> {
   dag OutOperandList = (outs GPROpnd:$rt);
   dag InOperandList = (ins GPROpnd:$rs);
   string AsmString = !strconcat(instr_asm, "\t$rt, $rs");
   bit hasSideEffects = 0;
   list<dag> Pattern = [];
-  InstrItinClass Itinerary = NoItinerary;
+  InstrItinClass Itinerary = Itin;
   Format Form = FrmR;
   string BaseOpcode = instr_asm;
 }
 
-class DSBH_MM64R6_DESC : DSBH_DSHD_DESC_BASE<"dsbh", GPR64Opnd>;
-class DSHD_MM64R6_DESC : DSBH_DSHD_DESC_BASE<"dshd", GPR64Opnd>;
+class DSBH_MM64R6_DESC : DSBH_DSHD_DESC_BASE<"dsbh", GPR64Opnd, II_DSBH>;
+class DSHD_MM64R6_DESC : DSBH_DSHD_DESC_BASE<"dshd", GPR64Opnd, II_DSHD>;
 
 class SHIFT_ROTATE_IMM_MM64R6<string instr_asm, Operand ImmOpnd,
                               InstrItinClass itin,
@@ -308,6 +326,32 @@ class SD_MM64R6_DESC {
   string DecoderMethod = "DecodeMemMMImm16";
 }
 
+class DBITSWAP_MM64R6_DESC {
+  dag OutOperandList = (outs GPR64Opnd:$rd);
+  dag InOperandList = (ins GPR64Opnd:$rt);
+  string AsmString = !strconcat("dbitswap", "\t$rd, $rt");
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_DBITSWAP;
+}
+
+class DLSA_MM64R6_DESC {
+  dag OutOperandList = (outs GPR64Opnd:$rd);
+  dag InOperandList = (ins GPR64Opnd:$rt, GPR64Opnd:$rs, uimm2_plus1:$sa);
+  string AsmString = "dlsa\t$rt, $rs, $rd, $sa";
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_DLSA;
+}
+
+class LWUPC_MM64R6_DESC {
+  dag OutOperandList = (outs GPR64Opnd:$rt);
+  dag InOperandList = (ins simm19_lsl2:$offset);
+  string AsmString = "lwupc\t$rt, $offset";
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = II_LWUPC;
+  bit mayLoad = 1;
+  bit IsPCRelativeLoad = 1;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Definitions
@@ -316,8 +360,10 @@ class SD_MM64R6_DESC {
 
 let DecoderNamespace = "MicroMipsR6" in {
   def DAUI_MM64R6 : StdMMR6Rel, DAUI_MMR6_DESC, DAUI_MMR6_ENC, ISA_MICROMIPS64R6;
-  def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6;
-  def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6;
+  let DecoderMethod = "DecodeDAHIDATIMMR6" in {
+    def DAHI_MM64R6 : StdMMR6Rel, DAHI_MMR6_DESC, DAHI_MMR6_ENC, ISA_MICROMIPS64R6;
+    def DATI_MM64R6 : StdMMR6Rel, DATI_MMR6_DESC, DATI_MMR6_ENC, ISA_MICROMIPS64R6;
+  }
   def DEXT_MM64R6 : StdMMR6Rel, DEXT_MMR6_DESC, DEXT_MMR6_ENC,
                     ISA_MICROMIPS64R6;
   def DEXTM_MM64R6 : StdMMR6Rel, DEXTM_MMR6_DESC, DEXTM_MMR6_ENC,
@@ -412,8 +458,17 @@ let DecoderNamespace = "MicroMipsR6" in {
                       ISA_MICROMIPS64R6;
   def DSRLV_MM64R6 : StdMMR6Rel, DSRLV_MM64R6_ENC, DSRLV_MM64R6_DESC,
                      ISA_MICROMIPS64R6;
+  def DBITSWAP_MM64R6 : R6MMR6Rel, DBITSWAP_MM64R6_ENC, DBITSWAP_MM64R6_DESC,
+                        ISA_MICROMIPS64R6;
+  def DLSA_MM64R6 : R6MMR6Rel, DLSA_MM64R6_ENC, DLSA_MM64R6_DESC,
+                    ISA_MICROMIPS64R6;
+  def LWUPC_MM64R6 : R6MMR6Rel, LWUPC_MM64R6_ENC, LWUPC_MM64R6_DESC,
+                     ISA_MICROMIPS64R6;
 }
 
+let AdditionalPredicates = [InMicroMips] in
+defm : MaterializeImms<i64, ZERO_64, DADDIU_MM64R6, LUi64, ORi64>;
+
 //===----------------------------------------------------------------------===//
 //
 // Arbitrary patterns that map to one or more instructions
@@ -503,11 +558,11 @@ def : MipsInstAlias<"dneg $rt, $rs",
                     (DSUB_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
                     ISA_MICROMIPS64R6;
 def : MipsInstAlias<"dneg $rt",
-                    (DSUB_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 0>,
+                    (DSUB_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>,
                     ISA_MICROMIPS64R6;
 def : MipsInstAlias<"dnegu $rt, $rs",
                     (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
                     ISA_MICROMIPS64R6;
 def : MipsInstAlias<"dnegu $rt",
-                    (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 0>,
+                    (DSUBU_MM64R6 GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>,
                     ISA_MICROMIPS64R6;
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
index 7b0e00bd1c3c..fc83761e409b 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFPU.td
@@ -99,11 +99,6 @@ def MOVT_D32_MM : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D,
                                      MipsCMovFP_T>, CMov_F_F_FM_MM<0x60, 1>;
 def MOVF_D32_MM : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D,
                                      MipsCMovFP_F>, CMov_F_F_FM_MM<0x20, 1>;
-
-def CFC1_MM : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>,
-              MFC1_FM_MM<0x40>;
-def CTC1_MM : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>,
-              MFC1_FM_MM<0x60>;
 def MFC1_MM : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd,
                              II_MFC1, bitconvert>, MFC1_FM_MM<0x80>;
 def MTC1_MM : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd,
@@ -141,6 +136,22 @@ let AdditionalPredicates = [InMicroMips] in {
              MFC1_FM_MM<0xe0>, ISA_MIPS32R2, FGR_32;
   def MFHC1_MM : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, AFGR64Opnd, II_MFHC1>,
                  MFC1_FM_MM<0xc0>, ISA_MIPS32R2, FGR_32;
+  let DecoderNamespace = "MicroMips" in {
+    def CFC1_MM : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>,
+                  MFC1_FM_MM<0x40>;
+    def CTC1_MM : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>,
+                  MFC1_FM_MM<0x60>;
+    def RECIP_S_MM : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd,
+                                    II_RECIP_S>,
+                     ROUND_W_FM_MM<0b0, 0b01001000>;
+    def RECIP_D_MM : MMRel, ABSS_FT<"recip.d", AFGR64Opnd, AFGR64Opnd,
+                                 II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b01001000>;
+    def RSQRT_S_MM : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd,
+                                    II_RECIP_S>,
+                     ROUND_W_FM_MM<0b0, 0b00001000>;
+    def RSQRT_D_MM : MMRel, ABSS_FT<"rsqrt.d", AFGR64Opnd, AFGR64Opnd,
+                                 II_RECIP_D>, ROUND_W_FM_MM<0b1, 0b00001000>;
+  }
   let DecoderNamespace = "MicroMips",  DecoderMethod = "DecodeFMemMMR2" in {
     def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, mem_mm_16, II_LDC1, load>,
                   LW_FM_MM<0x2f>, FGR_32 {
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
index 79ef6482180d..8b595f9e6c4c 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrFormats.td
@@ -599,6 +599,17 @@ class SYNC_FM_MM : MMArch {
   let Inst{5-0}   = 0x3c;
 }
 
+class SYNCI_FM_MM : MMArch {
+  bits<5> rs;
+  bits<16> offset;
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b010000;
+  let Inst{25-21} = 0b10000;
+  let Inst{20-16} = rs;
+  let Inst{15-0}  = offset;
+}
+
 class BRK_FM_MM : MMArch {
   bits<10> code_1;
   bits<10> code_2;
@@ -1023,3 +1034,16 @@ class ADDIUPC_FM_MM {
   let Inst{25-23} = rs;
   let Inst{22-0} = imm;
 }
+
+class POOL32A_CFTC2_FM_MM<bits<10> funct> : MMArch {
+  bits<5> rt;
+  bits<5> impl;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = 0b000000;
+  let Inst{25-21} = rt;
+  let Inst{20-16} = impl;
+  let Inst{15-6}  = funct;
+  let Inst{5-0}   = 0b111100;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
index f27370f57fa0..c0de9e7390a4 100644
--- a/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td
@@ -194,20 +194,20 @@ class CompactBranchMM<string opstr, DAGOperand opnd, PatFrag cond_op,
 
 let canFoldAsLoad = 1 in
 class LoadLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
-                      Operand MemOpnd> :
+                      Operand MemOpnd, InstrItinClass Itin> :
   InstSE<(outs RO:$rt), (ins MemOpnd:$addr, RO:$src),
          !strconcat(opstr, "\t$rt, $addr"),
          [(set RO:$rt, (OpNode addrimm12:$addr, RO:$src))],
-         NoItinerary, FrmI> {
+         Itin, FrmI> {
   let DecoderMethod = "DecodeMemMMImm12";
   string Constraints = "$src = $rt";
 }
 
 class StoreLeftRightMM<string opstr, SDNode OpNode, RegisterOperand RO,
-                       Operand MemOpnd>:
+                       Operand MemOpnd, InstrItinClass Itin>:
   InstSE<(outs), (ins RO:$rt, MemOpnd:$addr),
          !strconcat(opstr, "\t$rt, $addr"),
-         [(OpNode RO:$rt, addrimm12:$addr)], NoItinerary, FrmI> {
+         [(OpNode RO:$rt, addrimm12:$addr)], Itin, FrmI> {
   let DecoderMethod = "DecodeMemMMImm12";
 }
 
@@ -248,39 +248,37 @@ def regpair : Operand<i32> {
   let MIOperandInfo = (ops ptr_rc, ptr_rc);
 }
 
-class StorePairMM<string opstr, InstrItinClass Itin = NoItinerary,
-                  ComplexPattern Addr = addr> :
-  InstSE<(outs), (ins regpair:$rt, mem_simm12:$addr),
-         !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+class StorePairMM<string opstr, ComplexPattern Addr = addr>
+    :  InstSE<(outs), (ins regpair:$rt, mem_simm12:$addr),
+         !strconcat(opstr, "\t$rt, $addr"), [], II_SWP, FrmI, opstr> {
   let DecoderMethod = "DecodeMemMMImm12";
   let mayStore = 1;
 }
 
-class LoadPairMM<string opstr, InstrItinClass Itin = NoItinerary,
-                 ComplexPattern Addr = addr> :
-  InstSE<(outs regpair:$rt), (ins mem_simm12:$addr),
-          !strconcat(opstr, "\t$rt, $addr"), [], Itin, FrmI, opstr> {
+class LoadPairMM<string opstr, ComplexPattern Addr = addr>
+    : InstSE<(outs regpair:$rt), (ins mem_simm12:$addr),
+          !strconcat(opstr, "\t$rt, $addr"), [], II_LWP, FrmI, opstr> {
   let DecoderMethod = "DecodeMemMMImm12";
   let mayLoad = 1;
 }
 
 class LLBaseMM<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$rt), (ins mem_mm_12:$addr),
-         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+         !strconcat(opstr, "\t$rt, $addr"), [], II_LL, FrmI> {
   let DecoderMethod = "DecodeMemMMImm12";
   let mayLoad = 1;
 }
 
 class LLEBaseMM<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$rt), (ins mem_simm9:$addr),
-         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+         !strconcat(opstr, "\t$rt, $addr"), [], II_LLE, FrmI> {
   let DecoderMethod = "DecodeMemMMImm9";
   let mayLoad = 1;
 }
 
 class SCBaseMM<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$dst), (ins RO:$rt, mem_mm_12:$addr),
-         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+         !strconcat(opstr, "\t$rt, $addr"), [], II_SC, FrmI> {
   let DecoderMethod = "DecodeMemMMImm12";
   let mayStore = 1;
   let Constraints = "$rt = $dst";
@@ -288,7 +286,7 @@ class SCBaseMM<string opstr, RegisterOperand RO> :
 
 class SCEBaseMM<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$dst), (ins RO:$rt, mem_simm9:$addr),
-         !strconcat(opstr, "\t$rt, $addr"), [], NoItinerary, FrmI> {
+         !strconcat(opstr, "\t$rt, $addr"), [], II_SCE, FrmI> {
   let DecoderMethod = "DecodeMemMMImm9";
   let mayStore = 1;
   let Constraints = "$rt = $dst";
@@ -331,7 +329,7 @@ class LogicRMM16<string opstr, RegisterOperand RO,
 class NotMM16<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs RO:$rt), (ins RO:$rs),
          !strconcat(opstr, "\t$rt, $rs"),
-         [(set RO:$rt, (not RO:$rs))], NoItinerary, FrmR>;
+         [(set RO:$rt, (not RO:$rs))], II_NOT, FrmR>;
 
 class ShiftIMM16<string opstr, Operand ImmOpnd, RegisterOperand RO,
                  InstrItinClass Itin = NoItinerary> :
@@ -385,23 +383,23 @@ class LoadGPMM16<string opstr, DAGOperand RO, InstrItinClass Itin,
 class AddImmUR2<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs RO:$rd), (ins RO:$rs, simm3_lsa2:$imm),
                   !strconcat(opstr, "\t$rd, $rs, $imm"),
-                  [], NoItinerary, FrmR> {
+                  [], II_ADDIU, FrmR> {
   let isCommutable = 1;
 }
 
 class AddImmUS5<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs RO:$dst), (ins RO:$rd, simm4:$imm),
-                  !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmR> {
+                  !strconcat(opstr, "\t$rd, $imm"), [], II_ADDIU, FrmR> {
   let Constraints = "$rd = $dst";
 }
 
 class AddImmUR1SP<string opstr, RegisterOperand RO> :
   MicroMipsInst16<(outs RO:$rd), (ins uimm6_lsl2:$imm),
-                  !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmR>;
+                  !strconcat(opstr, "\t$rd, $imm"), [], II_ADDIU, FrmR>;
 
 class AddImmUSP<string opstr> :
   MicroMipsInst16<(outs), (ins simm9_addiusp:$imm),
-                  !strconcat(opstr, "\t$imm"), [], NoItinerary, FrmI>;
+                  !strconcat(opstr, "\t$imm"), [], II_ADDIU, FrmI>;
 
 class MoveFromHILOMM<string opstr, RegisterOperand RO, Register UseReg> :
       MicroMipsInst16<(outs RO:$rd), (ins), !strconcat(opstr, "\t$rd"),
@@ -410,17 +408,15 @@ class MoveFromHILOMM<string opstr, RegisterOperand RO, Register UseReg> :
   let hasSideEffects = 0;
 }
 
-class MoveMM16<string opstr, RegisterOperand RO, bit isComm = 0,
-               InstrItinClass Itin = NoItinerary> :
-  MicroMipsInst16<(outs RO:$rd), (ins RO:$rs),
-                  !strconcat(opstr, "\t$rd, $rs"), [], Itin, FrmR> {
-  let isCommutable = isComm;
+class MoveMM16<string opstr, RegisterOperand RO>
+    :  MicroMipsInst16<(outs RO:$rd), (ins RO:$rs),
+                       !strconcat(opstr, "\t$rd, $rs"), [], II_MOVE, FrmR> {
   let isReMaterializable = 1;
 }
 
 class LoadImmMM16<string opstr, Operand Od, RegisterOperand RO> :
   MicroMipsInst16<(outs RO:$rd), (ins Od:$imm),
-                  !strconcat(opstr, "\t$rd, $imm"), [], NoItinerary, FrmI> {
+                  !strconcat(opstr, "\t$rd, $imm"), [], II_LI, FrmI> {
   let isReMaterializable = 1;
 }
 
@@ -472,10 +468,10 @@ class JumpRegCMM16<string opstr, RegisterOperand RO> :
 }
 
 // Break16 and Sdbbp16
-class BrkSdbbp16MM<string opstr> :
+class BrkSdbbp16MM<string opstr, InstrItinClass Itin> :
   MicroMipsInst16<(outs), (ins uimm4:$code_),
                   !strconcat(opstr, "\t$code_"),
-                  [], NoItinerary, FrmOther>;
+                  [], Itin, FrmOther>;
 
 class CBranchZeroMM<string opstr, DAGOperand opnd, RegisterOperand RO> :
   MicroMipsInst16<(outs), (ins RO:$rs, opnd:$offset),
@@ -505,18 +501,17 @@ let isCall = 1, hasDelaySlot = 1, Defs = [RA] in {
 }
 
 class LoadWordIndexedScaledMM<string opstr, RegisterOperand RO,
-                              InstrItinClass Itin = NoItinerary,
                               SDPatternOperator OpNode = null_frag> :
   InstSE<(outs RO:$rd), (ins PtrRC:$base, PtrRC:$index),
-         !strconcat(opstr, "\t$rd, ${index}(${base})"), [], Itin, FrmFI>;
+         !strconcat(opstr, "\t$rd, ${index}(${base})"), [], II_LWXS, FrmFI>;
 
 class PrefetchIndexed<string opstr> :
   InstSE<(outs), (ins PtrRC:$base, PtrRC:$index, uimm5:$hint),
-         !strconcat(opstr, "\t$hint, ${index}(${base})"), [], NoItinerary, FrmOther>;
+         !strconcat(opstr, "\t$hint, ${index}(${base})"), [], II_PREF, FrmOther>;
 
 class AddImmUPC<string opstr, RegisterOperand RO> :
   InstSE<(outs RO:$rs), (ins simm23_lsl2:$imm),
-         !strconcat(opstr, "\t$rs, $imm"), [], NoItinerary, FrmR>;
+         !strconcat(opstr, "\t$rs, $imm"), [], II_ADDIU, FrmR>;
 
 /// A list of registers used by load/store multiple instructions.
 def RegListAsmOperand : AsmOperandClass {
@@ -650,40 +645,50 @@ def BEQZ16_MM : CBranchZeroMM<"beqz16", brtarget7_mm, GPRMM16Opnd>,
 def BNEZ16_MM : CBranchZeroMM<"bnez16", brtarget7_mm, GPRMM16Opnd>,
                 BEQNEZ_FM_MM16<0x2b>;
 def B16_MM : UncondBranchMM16<"b16">, B16_FM;
-def BREAK16_MM : BrkSdbbp16MM<"break16">, BRKSDBBP16_FM_MM<0x28>,
+def BREAK16_MM : BrkSdbbp16MM<"break16", II_BREAK>, BRKSDBBP16_FM_MM<0x28>,
     ISA_MICROMIPS_NOT_32R6_64R6;
-def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16">, BRKSDBBP16_FM_MM<0x2C>,
+def SDBBP16_MM : BrkSdbbp16MM<"sdbbp16", II_SDBBP>, BRKSDBBP16_FM_MM<0x2C>,
     ISA_MICROMIPS_NOT_32R6_64R6;
 
 let DecoderNamespace = "MicroMips" in {
   /// Load and Store Instructions - multiple
-  def SWM16_MM : StoreMultMM16<"swm16">, LWM_FM_MM16<0x5>,
+  def SWM16_MM : StoreMultMM16<"swm16", II_SWM>, LWM_FM_MM16<0x5>,
                  ISA_MICROMIPS32_NOT_MIPS32R6;
-  def LWM16_MM : LoadMultMM16<"lwm16">, LWM_FM_MM16<0x4>,
+  def LWM16_MM : LoadMultMM16<"lwm16", II_LWM>, LWM_FM_MM16<0x4>,
                  ISA_MICROMIPS32_NOT_MIPS32R6;
+  let AdditionalPredicates = [InMicroMips] in {
+    def CFC2_MM : InstSE<(outs GPR32Opnd:$rt), (ins COP2Opnd:$impl),
+                         "cfc2\t$rt, $impl", [], II_CFC2, FrmFR, "cfc2">,
+                  POOL32A_CFTC2_FM_MM<0b1100110100>;
+    def CTC2_MM : InstSE<(outs COP2Opnd:$impl), (ins GPR32Opnd:$rt),
+                         "ctc2\t$rt, $impl", [], II_CTC2, FrmFR, "ctc2">,
+                  POOL32A_CFTC2_FM_MM<0b1101110100>;
+  }
 }
 
 class WaitMM<string opstr> :
   InstSE<(outs), (ins uimm10:$code_), !strconcat(opstr, "\t$code_"), [],
-         NoItinerary, FrmOther, opstr>;
+         II_WAIT, FrmOther, opstr>;
 
-let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
+let DecoderNamespace = "MicroMips", Predicates = [InMicroMips, NotMips32r6,
+                                                  NotMips64r6] in {
   /// Compact Branch Instructions
   def BEQZC_MM : CompactBranchMM<"beqzc", brtarget_mm, seteq, GPR32Opnd>,
                  COMPACT_BRANCH_FM_MM<0x7>;
   def BNEZC_MM : CompactBranchMM<"bnezc", brtarget_mm, setne, GPR32Opnd>,
                  COMPACT_BRANCH_FM_MM<0x5>;
-
+}
+let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   /// Arithmetic Instructions (ALU Immediate)
-  def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd>,
+  def ADDiu_MM : MMRel, ArithLogicI<"addiu", simm16, GPR32Opnd, II_ADDIU>,
                  ADDI_FM_MM<0xc>;
-  def ADDi_MM  : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd>,
+  def ADDi_MM  : MMRel, ArithLogicI<"addi", simm16, GPR32Opnd, II_ADDI>,
                  ADDI_FM_MM<0x4>;
   def SLTi_MM  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
                  SLTI_FM_MM<0x24>;
   def SLTiu_MM : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
                  SLTI_FM_MM<0x2c>;
-  def ANDi_MM  : MMRel, ArithLogicI<"andi", uimm16, GPR32Opnd>,
+  def ANDi_MM  : MMRel, ArithLogicI<"andi", uimm16, GPR32Opnd, II_ANDI>,
                  ADDI_FM_MM<0x34>;
   def ORi_MM   : MMRel, ArithLogicI<"ori", uimm16, GPR32Opnd, II_ORI, immZExt16,
                                     or>, ADDI_FM_MM<0x14>;
@@ -699,9 +704,12 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
                  ADD_FM_MM<0, 0x150>;
   def SUBu_MM  : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
                  ADD_FM_MM<0, 0x1d0>;
-  def MUL_MM   : MMRel, ArithLogicR<"mul", GPR32Opnd>, ADD_FM_MM<0, 0x210>;
-  def ADD_MM   : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM_MM<0, 0x110>;
-  def SUB_MM   : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM_MM<0, 0x190>;
+  def MUL_MM   : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL>,
+                 ADD_FM_MM<0, 0x210>;
+  def ADD_MM   : MMRel, ArithLogicR<"add", GPR32Opnd, 1, II_ADD>,
+                 ADD_FM_MM<0, 0x110>;
+  def SUB_MM   : MMRel, ArithLogicR<"sub", GPR32Opnd, 0, II_SUB>,
+                 ADD_FM_MM<0, 0x190>;
   def SLT_MM   : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM_MM<0, 0x350>;
   def SLTu_MM  : MMRel, SetCC_R<"sltu", setult, GPR32Opnd>,
                  ADD_FM_MM<0, 0x390>;
@@ -750,60 +758,69 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
 
   /// Load and Store Instructions - aligned
   let DecoderMethod = "DecodeMemMMImm16" in {
-    def LB_MM  : LoadMemory<"lb", GPR32Opnd, mem_mm_16>, MMRel, LW_FM_MM<0x7>;
-    def LBu_MM : LoadMemory<"lbu", GPR32Opnd, mem_mm_16>, MMRel, LW_FM_MM<0x5>;
+    def LB_MM  : LoadMemory<"lb", GPR32Opnd, mem_mm_16, null_frag, II_LB>,
+                 MMRel, LW_FM_MM<0x7>;
+    def LBu_MM : LoadMemory<"lbu", GPR32Opnd, mem_mm_16, null_frag, II_LBU>,
+                 MMRel, LW_FM_MM<0x5>;
     def LH_MM  : LoadMemory<"lh", GPR32Opnd, mem_simm16, sextloadi16, II_LH,
                             addrDefault>, MMRel, LW_FM_MM<0xf>;
     def LHu_MM : LoadMemory<"lhu", GPR32Opnd, mem_simm16, zextloadi16, II_LHU>,
                  MMRel, LW_FM_MM<0xd>;
-    def LW_MM  : Load<"lw", GPR32Opnd>, MMRel, LW_FM_MM<0x3f>;
-    def SB_MM  : Store<"sb", GPR32Opnd>, MMRel, LW_FM_MM<0x6>;
-    def SH_MM  : Store<"sh", GPR32Opnd>, MMRel, LW_FM_MM<0xe>;
-    def SW_MM  : Store<"sw", GPR32Opnd>, MMRel, LW_FM_MM<0x3e>;
+    def LW_MM  : Load<"lw", GPR32Opnd, null_frag, II_LW>, MMRel, LW_FM_MM<0x3f>;
+    def SB_MM  : Store<"sb", GPR32Opnd, null_frag, II_SB>, MMRel,
+                 LW_FM_MM<0x6>;
+    def SH_MM  : Store<"sh", GPR32Opnd, null_frag, II_SH>, MMRel,
+                 LW_FM_MM<0xe>;
+    def SW_MM  : Store<"sw", GPR32Opnd, null_frag, II_SW>, MMRel,
+                 LW_FM_MM<0x3e>;
   }
 
   let DecoderMethod = "DecodeMemMMImm9" in {
-    def LBE_MM  : Load<"lbe", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>;
-    def LBuE_MM : Load<"lbue", GPR32Opnd>, POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>;
-    def LHE_MM  : LoadMemory<"lhe", GPR32Opnd, mem_simm9>,
+    def LBE_MM  : Load<"lbe", GPR32Opnd, null_frag, II_LBE>,
+                  POOL32C_LHUE_FM_MM<0x18, 0x6, 0x4>;
+    def LBuE_MM : Load<"lbue", GPR32Opnd, null_frag, II_LBUE>,
+                  POOL32C_LHUE_FM_MM<0x18, 0x6, 0x0>;
+    def LHE_MM  : LoadMemory<"lhe", GPR32Opnd, mem_simm9, null_frag, II_LHE>,
                   POOL32C_LHUE_FM_MM<0x18, 0x6, 0x5>;
-    def LHuE_MM : LoadMemory<"lhue", GPR32Opnd, mem_simm9>,
+    def LHuE_MM : LoadMemory<"lhue", GPR32Opnd, mem_simm9, null_frag, II_LHUE>,
                   POOL32C_LHUE_FM_MM<0x18, 0x6, 0x1>;
-    def LWE_MM  : LoadMemory<"lwe", GPR32Opnd, mem_simm9>,
+    def LWE_MM  : LoadMemory<"lwe", GPR32Opnd, mem_simm9, null_frag, II_LWE>,
                   POOL32C_LHUE_FM_MM<0x18, 0x6, 0x7>;
-    def SBE_MM  : StoreMemory<"sbe", GPR32Opnd, mem_simm9>,
+    def SBE_MM  : StoreMemory<"sbe", GPR32Opnd, mem_simm9, null_frag, II_SBE>,
                   POOL32C_LHUE_FM_MM<0x18, 0xa, 0x4>;
-    def SHE_MM  : StoreMemory<"she", GPR32Opnd, mem_simm9>,
+    def SHE_MM  : StoreMemory<"she", GPR32Opnd, mem_simm9, null_frag, II_SHE>,
                   POOL32C_LHUE_FM_MM<0x18, 0xa, 0x5>;
-    def SWE_MM  : StoreMemory<"swe", GPR32Opnd, mem_simm9>,
+    def SWE_MM  : StoreMemory<"swe", GPR32Opnd, mem_simm9, null_frag, II_SWE>,
                   POOL32C_LHUE_FM_MM<0x18, 0xa, 0x7>;
   }
 
   def LWXS_MM : LoadWordIndexedScaledMM<"lwxs", GPR32Opnd>, LWXS_FM_MM<0x118>;
 
   /// Load and Store Instructions - unaligned
-  def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12>,
+  def LWL_MM : LoadLeftRightMM<"lwl", MipsLWL, GPR32Opnd, mem_mm_12, II_LWL>,
                LWL_FM_MM<0x0>;
-  def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12>,
+  def LWR_MM : LoadLeftRightMM<"lwr", MipsLWR, GPR32Opnd, mem_mm_12, II_LWR>,
                LWL_FM_MM<0x1>;
-  def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12>,
+  def SWL_MM : StoreLeftRightMM<"swl", MipsSWL, GPR32Opnd, mem_mm_12, II_SWL>,
                LWL_FM_MM<0x8>;
-  def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12>,
+  def SWR_MM : StoreLeftRightMM<"swr", MipsSWR, GPR32Opnd, mem_mm_12, II_SWR>,
                LWL_FM_MM<0x9>;
   let DecoderMethod = "DecodeMemMMImm9" in {
-    def LWLE_MM : LoadLeftRightMM<"lwle", MipsLWL, GPR32Opnd, mem_mm_9>,
-                  POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x2>;
-    def LWRE_MM : LoadLeftRightMM<"lwre", MipsLWR, GPR32Opnd, mem_mm_9>,
-                  POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x3>;
-    def SWLE_MM : StoreLeftRightMM<"swle", MipsSWL, GPR32Opnd, mem_mm_9>,
+    def LWLE_MM : LoadLeftRightMM<"lwle", MipsLWL, GPR32Opnd, mem_mm_9,
+                                  II_LWLE>, POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x2>;
+    def LWRE_MM : LoadLeftRightMM<"lwre", MipsLWR, GPR32Opnd, mem_mm_9,
+                                  II_LWRE>, POOL32C_STEVA_LDEVA_FM_MM<0x6, 0x3>;
+    def SWLE_MM : StoreLeftRightMM<"swle", MipsSWL, GPR32Opnd, mem_mm_9,
+                                   II_SWLE>,
                   POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x0>;
-    def SWRE_MM : StoreLeftRightMM<"swre", MipsSWR, GPR32Opnd, mem_mm_9>,
+    def SWRE_MM : StoreLeftRightMM<"swre", MipsSWR, GPR32Opnd, mem_mm_9,
+                                   II_SWRE>,
                   POOL32C_STEVA_LDEVA_FM_MM<0xa, 0x1>, ISA_MIPS1_NOT_32R6_64R6;
   }
 
   /// Load and Store Instructions - multiple
-  def SWM32_MM  : StoreMultMM<"swm32">, LWM_FM_MM<0xd>;
-  def LWM32_MM  : LoadMultMM<"lwm32">, LWM_FM_MM<0x5>;
+  def SWM32_MM  : StoreMultMM<"swm32", II_SWM>, LWM_FM_MM<0xd>;
+  def LWM32_MM  : LoadMultMM<"lwm32", II_LWM>, LWM_FM_MM<0x5>;
 
   /// Load and Store Pair Instructions
   def SWP_MM  : StorePairMM<"swp">, LWM_FM_MM<0x9>;
@@ -849,9 +866,9 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   def MSUBU_MM : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM_MM<0x3ec>;
 
   /// Count Leading
-  def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM_MM<0x16c>,
+  def CLZ_MM : MMRel, CountLeading0<"clz", GPR32Opnd, II_CLZ>, CLO_FM_MM<0x16c>,
                ISA_MIPS32;
-  def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM_MM<0x12c>,
+  def CLO_MM : MMRel, CountLeading1<"clo", GPR32Opnd, II_CLO>, CLO_FM_MM<0x12c>,
                ISA_MIPS32;
 
   /// Sign Ext In Register Instructions.
@@ -910,30 +927,35 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
 
   /// Control Instructions
   def SYNC_MM    : MMRel, SYNC_FT<"sync">, SYNC_FM_MM;
+  def SYNCI_MM   : MMRel, SYNCI_FT<"synci">, SYNCI_FM_MM;
   def BREAK_MM   : MMRel, BRK_FT<"break">, BRK_FM_MM;
-  def SYSCALL_MM : MMRel, SYS_FT<"syscall", uimm10>, SYS_FM_MM;
+  def SYSCALL_MM : MMRel, SYS_FT<"syscall", uimm10, II_SYSCALL>, SYS_FM_MM;
   def WAIT_MM    : WaitMM<"wait">, WAIT_FM_MM;
-  def ERET_MM    : MMRel, ER_FT<"eret">, ER_FM_MM<0x3cd>;
-  def DERET_MM   : MMRel, ER_FT<"deret">, ER_FM_MM<0x38d>;
-  def EI_MM      : MMRel, DEI_FT<"ei", GPR32Opnd>, EI_FM_MM<0x15d>,
+  def ERET_MM    : MMRel, ER_FT<"eret", II_ERET>, ER_FM_MM<0x3cd>;
+  def DERET_MM   : MMRel, ER_FT<"deret", II_DERET>, ER_FM_MM<0x38d>;
+  def EI_MM      : MMRel, DEI_FT<"ei", GPR32Opnd, II_EI>, EI_FM_MM<0x15d>,
                    ISA_MIPS32R2;
-  def DI_MM      : MMRel, DEI_FT<"di", GPR32Opnd>, EI_FM_MM<0x11d>,
+  def DI_MM      : MMRel, DEI_FT<"di", GPR32Opnd, II_DI>, EI_FM_MM<0x11d>,
                    ISA_MIPS32R2;
 
   /// Trap Instructions
-  def TEQ_MM  : MMRel, TEQ_FT<"teq", GPR32Opnd, uimm4>, TEQ_FM_MM<0x0>;
-  def TGE_MM  : MMRel, TEQ_FT<"tge", GPR32Opnd, uimm4>, TEQ_FM_MM<0x08>;
-  def TGEU_MM : MMRel, TEQ_FT<"tgeu", GPR32Opnd, uimm4>, TEQ_FM_MM<0x10>;
-  def TLT_MM  : MMRel, TEQ_FT<"tlt", GPR32Opnd, uimm4>, TEQ_FM_MM<0x20>;
-  def TLTU_MM : MMRel, TEQ_FT<"tltu", GPR32Opnd, uimm4>, TEQ_FM_MM<0x28>;
-  def TNE_MM  : MMRel, TEQ_FT<"tne", GPR32Opnd, uimm4>, TEQ_FM_MM<0x30>;
-
-  def TEQI_MM  : MMRel, TEQI_FT<"teqi", GPR32Opnd>, TEQI_FM_MM<0x0e>;
-  def TGEI_MM  : MMRel, TEQI_FT<"tgei", GPR32Opnd>, TEQI_FM_MM<0x09>;
-  def TGEIU_MM : MMRel, TEQI_FT<"tgeiu", GPR32Opnd>, TEQI_FM_MM<0x0b>;
-  def TLTI_MM  : MMRel, TEQI_FT<"tlti", GPR32Opnd>, TEQI_FM_MM<0x08>;
-  def TLTIU_MM : MMRel, TEQI_FT<"tltiu", GPR32Opnd>, TEQI_FM_MM<0x0a>;
-  def TNEI_MM  : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM_MM<0x0c>;
+  def TEQ_MM  : MMRel, TEQ_FT<"teq", GPR32Opnd, uimm4, II_TEQ>, TEQ_FM_MM<0x0>;
+  def TGE_MM  : MMRel, TEQ_FT<"tge", GPR32Opnd, uimm4, II_TGE>, TEQ_FM_MM<0x08>;
+  def TGEU_MM : MMRel, TEQ_FT<"tgeu", GPR32Opnd, uimm4, II_TGEU>,
+                TEQ_FM_MM<0x10>;
+  def TLT_MM  : MMRel, TEQ_FT<"tlt", GPR32Opnd, uimm4, II_TLT>, TEQ_FM_MM<0x20>;
+  def TLTU_MM : MMRel, TEQ_FT<"tltu", GPR32Opnd, uimm4, II_TLTU>,
+                TEQ_FM_MM<0x28>;
+  def TNE_MM  : MMRel, TEQ_FT<"tne", GPR32Opnd, uimm4, II_TNE>, TEQ_FM_MM<0x30>;
+
+  def TEQI_MM  : MMRel, TEQI_FT<"teqi", GPR32Opnd, II_TEQI>, TEQI_FM_MM<0x0e>;
+  def TGEI_MM  : MMRel, TEQI_FT<"tgei", GPR32Opnd, II_TGEI>, TEQI_FM_MM<0x09>;
+  def TGEIU_MM : MMRel, TEQI_FT<"tgeiu", GPR32Opnd, II_TGEIU>,
+                 TEQI_FM_MM<0x0b>;
+  def TLTI_MM  : MMRel, TEQI_FT<"tlti", GPR32Opnd, II_TLTI>, TEQI_FM_MM<0x08>;
+  def TLTIU_MM : MMRel, TEQI_FT<"tltiu", GPR32Opnd, II_TTLTIU>,
+                 TEQI_FM_MM<0x0a>;
+  def TNEI_MM  : MMRel, TEQI_FT<"tnei", GPR32Opnd, II_TNEI>, TEQI_FM_MM<0x0c>;
 
   /// Load-linked, Store-conditional
   def LL_MM : LLBaseMM<"ll", GPR32Opnd>, LL_FM_MM<0x3>;
@@ -943,32 +965,34 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in {
   def SCE_MM : SCEBaseMM<"sce", GPR32Opnd>, LLE_FM_MM<0xA>;
 
   let DecoderMethod = "DecodeCacheOpMM" in {
-  def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12>,
+  def CACHE_MM : MMRel, CacheOp<"cache", mem_mm_12, II_CACHE>,
                  CACHE_PREF_FM_MM<0x08, 0x6>;
-  def PREF_MM  : MMRel, CacheOp<"pref", mem_mm_12>,
+  def PREF_MM  : MMRel, CacheOp<"pref", mem_mm_12, II_PREF>,
                  CACHE_PREF_FM_MM<0x18, 0x2>;
   }
 
   let DecoderMethod = "DecodePrefeOpMM" in {
-    def PREFE_MM  : MMRel, CacheOp<"prefe", mem_mm_9>,
+    def PREFE_MM  : MMRel, CacheOp<"prefe", mem_mm_9, II_PREFE>,
                     CACHE_PREFE_FM_MM<0x18, 0x2>;
-    def CACHEE_MM : MMRel, CacheOp<"cachee", mem_mm_9>,
+    def CACHEE_MM : MMRel, CacheOp<"cachee", mem_mm_9, II_CACHEE>,
                     CACHE_PREFE_FM_MM<0x18, 0x3>;
   }
-  def SSNOP_MM : MMRel, Barrier<"ssnop">, BARRIER_FM_MM<0x1>;
-  def EHB_MM   : MMRel, Barrier<"ehb">, BARRIER_FM_MM<0x3>;
-  def PAUSE_MM : MMRel, Barrier<"pause">, BARRIER_FM_MM<0x5>;
+  def SSNOP_MM : MMRel, Barrier<"ssnop", II_SSNOP>, BARRIER_FM_MM<0x1>;
+  def EHB_MM   : MMRel, Barrier<"ehb", II_EHB>, BARRIER_FM_MM<0x3>;
+  def PAUSE_MM : MMRel, Barrier<"pause", II_PAUSE>, BARRIER_FM_MM<0x5>;
 
-  def TLBP_MM : MMRel, TLB<"tlbp">, COP0_TLB_FM_MM<0x0d>;
-  def TLBR_MM : MMRel, TLB<"tlbr">, COP0_TLB_FM_MM<0x4d>;
-  def TLBWI_MM : MMRel, TLB<"tlbwi">, COP0_TLB_FM_MM<0x8d>;
-  def TLBWR_MM : MMRel, TLB<"tlbwr">, COP0_TLB_FM_MM<0xcd>;
+  def TLBP_MM : MMRel, TLB<"tlbp", II_TLBP>, COP0_TLB_FM_MM<0x0d>;
+  def TLBR_MM : MMRel, TLB<"tlbr", II_TLBR>, COP0_TLB_FM_MM<0x4d>;
+  def TLBWI_MM : MMRel, TLB<"tlbwi", II_TLBWI>, COP0_TLB_FM_MM<0x8d>;
+  def TLBWR_MM : MMRel, TLB<"tlbwr", II_TLBWR>, COP0_TLB_FM_MM<0xcd>;
 
-  def SDBBP_MM : MMRel, SYS_FT<"sdbbp", uimm10>, SDBBP_FM_MM;
+  def SDBBP_MM : MMRel, SYS_FT<"sdbbp", uimm10, II_SDBBP>, SDBBP_FM_MM;
 
   def PREFX_MM : PrefetchIndexed<"prefx">, POOL32F_PREFX_FM_MM<0x15, 0x1A0>;
 }
 
+def TAILCALL_MM : TailCall<J_MM, jmptarget_mm>, ISA_MIPS1_NOT_32R6_64R6;
+
 let DecoderNamespace = "MicroMips" in {
   def RDHWR_MM : MMRel, R6MMR6Rel, ReadHardware<GPR32Opnd, HWRegsOpnd>,
                  RDHWR_FM_MM, ISA_MICROMIPS32_NOT_MIPS32R6;
@@ -981,6 +1005,12 @@ let DecoderNamespace = "MicroMips" in {
 // MicroMips arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
+def : MipsPat<(i32 immLi16:$imm),
+              (LI16_MM immLi16:$imm)>;
+
+let AdditionalPredicates = [InMicroMips] in
+defm :  MaterializeImms<i32, ZERO, ADDiu_MM, LUi_MM, ORi_MM>;
+
 let Predicates = [InMicroMips] in {
   def : MipsPat<(i32 immLi16:$imm),
                 (LI16_MM immLi16:$imm)>;
@@ -1036,6 +1066,11 @@ let Predicates = [InMicroMips] in {
                 (LW_MM addr:$addr)>;
   def : MipsPat<(subc GPR32:$lhs, GPR32:$rhs),
                 (SUBu_MM GPR32:$lhs, GPR32:$rhs)>;
+
+  def : MipsPat<(MipsTailCall (iPTR tglobaladdr:$dst)),
+                (TAILCALL_MM tglobaladdr:$dst)>, ISA_MIPS1_NOT_32R6_64R6;
+  def : MipsPat<(MipsTailCall (iPTR texternalsym:$dst)),
+                (TAILCALL_MM texternalsym:$dst)>, ISA_MIPS1_NOT_32R6_64R6;
 }
 
 let AddedComplexity = 40 in {
@@ -1047,6 +1082,15 @@ def : MipsPat<(atomic_load_16 addr:$a),
 def : MipsPat<(i32 (extloadi16 addr:$src)),
               (LHu_MM addr:$src)>;
 
+defm : BrcondPats<GPR32, BEQ_MM, BEQ_MM, BNE_MM, SLT_MM, SLTu_MM, SLTi_MM,
+                  SLTiu_MM, ZERO>;
+
+defm : SeteqPats<GPR32, SLTiu_MM, XOR_MM, SLTu_MM, ZERO>;
+defm : SetlePats<GPR32, XORi_MM, SLT_MM, SLTu_MM>;
+defm : SetgtPats<GPR32, SLT_MM, SLTu_MM>;
+defm : SetgePats<GPR32, XORi_MM, SLT_MM, SLTu_MM>;
+defm : SetgeImmPats<GPR32, XORi_MM, SLTi_MM, SLTiu_MM>;
+
 //===----------------------------------------------------------------------===//
 // MicroMips instruction aliases
 //===----------------------------------------------------------------------===//
@@ -1080,6 +1124,24 @@ let Predicates = [InMicroMips] in {
                       (TLTU_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
   def : MipsInstAlias<"tne $rs, $rt",
                       (TNE_MM GPR32Opnd:$rs, GPR32Opnd:$rt, 0), 1>;
+  def : MipsInstAlias<
+          "sgt $rd, $rs, $rt",
+          (SLT_MM GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
+          "sgt $rs, $rt",
+          (SLT_MM GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
+          "sgtu $rd, $rs, $rt",
+          (SLTu_MM GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
+          "sgtu $rs, $rt",
+          (SLTu_MM GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<"slt $rs, $rt, $imm",
+                      (SLTi_MM GPR32Opnd:$rs, GPR32Opnd:$rt,
+                               simm32_relaxed:$imm), 0>;
+  def : MipsInstAlias<"sltu $rs, $rt, $imm",
+                      (SLTiu_MM GPR32Opnd:$rs, GPR32Opnd:$rt,
+                                simm32_relaxed:$imm), 0>;
   def : MipsInstAlias<"sll $rd, $rt, $rs",
                       (SLLV_MM GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
   def : MipsInstAlias<"sra $rd, $rt, $rs",
@@ -1115,4 +1177,14 @@ let Predicates = [InMicroMips] in {
                       (XORi_MM GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>;
   def : MipsInstAlias<"not $rt, $rs",
                       (NOR_MM GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
+  def : MipsInstAlias<"not $rt",
+                      (NOR_MM GPR32Opnd:$rt, GPR32Opnd:$rt, ZERO), 0>;
+  def : MipsInstAlias<"bnez $rs,$offset",
+                      (BNE_MM GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+  def : MipsInstAlias<"beqz $rs,$offset",
+                      (BEQ_MM GPR32Opnd:$rs, ZERO, brtarget:$offset), 0>;
+  def : MipsInstAlias<"seh $rd", (SEH_MM GPR32Opnd:$rd, GPR32Opnd:$rd), 0>,
+                     ISA_MIPS32R2_NOT_32R6_64R6;
+  def : MipsInstAlias<"seb $rd", (SEB_MM GPR32Opnd:$rd, GPR32Opnd:$rd), 0>,
+                     ISA_MIPS32R2_NOT_32R6_64R6;
 }
diff --git a/contrib/llvm/lib/Target/Mips/Mips.td b/contrib/llvm/lib/Target/Mips/Mips.td
index ea3fa0a9578e..670272d47e95 100644
--- a/contrib/llvm/lib/Target/Mips/Mips.td
+++ b/contrib/llvm/lib/Target/Mips/Mips.td
@@ -57,6 +57,10 @@ include "MipsSchedule.td"
 include "MipsInstrInfo.td"
 include "MipsCallingConv.td"
 
+// Avoid forward declaration issues.
+include "MipsScheduleP5600.td"
+include "MipsScheduleGeneric.td"
+
 def MipsInstrInfo : InstrInfo;
 
 //===----------------------------------------------------------------------===//
@@ -188,7 +192,7 @@ def ImplP5600 : SubtargetFeature<"p5600", "ProcImpl",
                                  "The P5600 Processor", [FeatureMips32r5]>;
 
 class Proc<string Name, list<SubtargetFeature> Features>
- : Processor<Name, MipsGenericItineraries, Features>;
+ : ProcessorModel<Name, MipsGenericModel, Features>;
 
 def : Proc<"mips1", [FeatureMips1]>;
 def : Proc<"mips2", [FeatureMips2]>;
diff --git a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
index e937ffa7a7ab..e7ceca9612a9 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16FrameLowering.cpp
@@ -34,7 +34,7 @@ Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI)
 void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const Mips16InstrInfo &TII =
       *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   MachineBasicBlock::iterator MBBI = MBB.begin();
@@ -43,10 +43,10 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
   // to determine the end of the prologue.
   DebugLoc dl;
 
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
 
   // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->adjustsStack()) return;
+  if (StackSize == 0 && !MFI.adjustsStack()) return;
 
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -56,22 +56,22 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
   TII.makeFrame(Mips::SP, StackSize, MBB, MBBI);
 
   // emit ".cfi_def_cfa_offset StackSize"
-  unsigned CFIIndex = MMI.addFrameInst(
+  unsigned CFIIndex = MF.addFrameInst(
       MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   if (CSI.size()) {
-    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+    const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
     for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
          E = CSI.end(); I != E; ++I) {
-      int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+      int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
       unsigned Reg = I->getReg();
       unsigned DReg = MRI->getDwarfRegNum(Reg, true);
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, DReg, Offset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -86,11 +86,11 @@ void Mips16FrameLowering::emitPrologue(MachineFunction &MF,
 void Mips16FrameLowering::emitEpilogue(MachineFunction &MF,
                                  MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const Mips16InstrInfo &TII =
       *static_cast<const Mips16InstrInfo *>(STI.getInstrInfo());
   DebugLoc dl = MBBI->getDebugLoc();
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
 
   if (!StackSize)
     return;
@@ -120,12 +120,12 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     // Add the callee-saved register as live-in. Do not add if the register is
     // RA and return address is taken, because it has already been added in
-    // method MipsTargetLowering::LowerRETURNADDR.
+    // method MipsTargetLowering::lowerRETURNADDR.
     // It's killed at the spill, unless the register is RA and return address
     // is taken.
     unsigned Reg = CSI[i].getReg();
     bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA)
-      && MF->getFrameInfo()->isReturnAddressTaken();
+      && MF->getFrameInfo().isReturnAddressTaken();
     if (!IsRAAndRetAddrIsTaken)
       EntryBlock->addLiveIn(Reg);
   }
@@ -149,10 +149,10 @@ bool Mips16FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 
 bool
 Mips16FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   // Reserve call frame if the size of the maximum call frame fits into 15-bit
   // immediate field and there are no variable sized objects on the stack.
-  return isInt<15>(MFI->getMaxCallFrameSize()) && !MFI->hasVarSizedObjects();
+  return isInt<15>(MFI.getMaxCallFrameSize()) && !MFI.hasVarSizedObjects();
 }
 
 void Mips16FrameLowering::determineCalleeSaves(MachineFunction &MF,
diff --git a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
index d2d1c65e40d9..191006d6463c 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp
@@ -30,9 +30,7 @@ namespace {
 
     Mips16HardFloat(MipsTargetMachine &TM_) : ModulePass(ID), TM(TM_) {}
 
-    const char *getPassName() const override {
-      return "MIPS16 Hard Float Pass";
-    }
+    StringRef getPassName() const override { return "MIPS16 Hard Float Pass"; }
 
     bool runOnModule(Module &M) override;
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
index 0405291431cd..ce193b1734f3 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp
@@ -80,9 +80,10 @@ void Mips16DAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) {
   V1 = RegInfo.createVirtualRegister(RC);
   V2 = RegInfo.createVirtualRegister(RC);
 
-  BuildMI(MBB, I, DL, TII.get(Mips::GotPrologue16), V0)
-      .addReg(V1, RegState::Define)
-      .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI)
+
+  BuildMI(MBB, I, DL, TII.get(Mips::LiRxImmX16), V0)
+      .addExternalSymbol("_gp_disp", MipsII::MO_ABS_HI);
+  BuildMI(MBB, I, DL, TII.get(Mips::AddiuRxPcImmX16), V1)
       .addExternalSymbol("_gp_disp", MipsII::MO_ABS_LO);
 
   BuildMI(MBB, I, DL, TII.get(Mips::SllX16), V2).addReg(V0).addImm(16);
diff --git a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
index daa1355ffefd..35ef31749f40 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp
@@ -172,7 +172,7 @@ static void addSaveRestoreRegs(MachineInstrBuilder &MIB,
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     // Add the callee-saved register as live-in. Do not add if the register is
     // RA and return address is taken, because it has already been added in
-    // method MipsTargetLowering::LowerRETURNADDR.
+    // method MipsTargetLowering::lowerRETURNADDR.
     // It's killed at the spill, unless the register is RA and return address
     // is taken.
     unsigned Reg = CSI[e-i-1].getReg();
@@ -196,13 +196,13 @@ void Mips16InstrInfo::makeFrame(unsigned SP, int64_t FrameSize,
                                 MachineBasicBlock::iterator I) const {
   DebugLoc DL;
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MachineFrameInfo &MFI    = MF.getFrameInfo();
   const BitVector Reserved = RI.getReservedRegs(MF);
   bool SaveS2 = Reserved[Mips::S2];
   MachineInstrBuilder MIB;
   unsigned Opc = ((FrameSize <= 128) && !SaveS2)? Mips::Save16:Mips::SaveX16;
   MIB = BuildMI(MBB, I, DL, get(Opc));
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   addSaveRestoreRegs(MIB, CSI);
   if (SaveS2)
     MIB.addReg(Mips::S2);
@@ -226,7 +226,7 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize,
                                    MachineBasicBlock::iterator I) const {
   DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc();
   MachineFunction *MF = MBB.getParent();
-  MachineFrameInfo *MFI    = MF->getFrameInfo();
+  MachineFrameInfo &MFI    = MF->getFrameInfo();
   const BitVector Reserved = RI.getReservedRegs(*MF);
   bool SaveS2 = Reserved[Mips::S2];
   MachineInstrBuilder MIB;
@@ -245,7 +245,7 @@ void Mips16InstrInfo::restoreFrame(unsigned SP, int64_t FrameSize,
       adjustStackPtrBig(SP, Remainder, MBB, I, Mips::A0, Mips::A1);
   }
   MIB = BuildMI(MBB, I, DL, get(Opc));
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   addSaveRestoreRegs(MIB, CSI, RegState::Define);
   if (SaveS2)
     MIB.addReg(Mips::S2, RegState::Define);
@@ -510,8 +510,8 @@ unsigned Mips16InstrInfo::getInlineAsmLength(const char *Str,
       Length += MAI.getMaxInstLength();
       atInsnStart = false;
     }
-    if (atInsnStart && strncmp(Str, MAI.getCommentString(),
-                               strlen(MAI.getCommentString())) == 0)
+    if (atInsnStart && strncmp(Str, MAI.getCommentString().data(),
+                               MAI.getCommentString().size()) == 0)
       atInsnStart = false;
   }
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
index b034c26640e3..44771cbe8be1 100644
--- a/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.cpp
@@ -79,9 +79,9 @@ void Mips16RegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
                                      int64_t SPOffset) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   int MinCSFI = 0;
   int MaxCSFI = -1;
 
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
index a20c683c1e5e..516caa34fbf2 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrFormats.td
@@ -45,6 +45,7 @@ class MipsR6Inst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther>,
 class OPGROUP<bits<6> Val> {
   bits<6> Value = Val;
 }
+def OPGROUP_COP0     : OPGROUP<0b010000>;
 def OPGROUP_COP1     : OPGROUP<0b010001>;
 def OPGROUP_COP2     : OPGROUP<0b010010>;
 def OPGROUP_ADDI     : OPGROUP<0b001000>;
@@ -201,6 +202,21 @@ class BAL_FM : MipsR6Inst {
   let Inst{15-0} = offset;
 }
 
+class COP0_EVP_DVP_FM<bits<1> sc> : MipsR6Inst {
+  bits<5> rt;
+
+  bits<32> Inst;
+
+  let Inst{31-26} = OPGROUP_COP0.Value;
+  let Inst{25-21} = 0b01011;
+  let Inst{20-16} = rt;
+  let Inst{15-11} = 0b00000;
+  let Inst{10-6}  = 0b00000;
+  let Inst{5}     = sc;
+  let Inst{4-3}   = 0b00;
+  let Inst{2-0}   = 0b100;
+}
+
 class COP1_2R_FM<bits<6> funct, FIELD_FMT Format> : MipsR6Inst {
   bits<5> fs;
   bits<5> fd;
diff --git a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
index f552f8d37afb..1b4d73b79895 100644
--- a/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips32r6InstrInfo.td
@@ -98,6 +98,9 @@ class BC1NEZ_ENC : COP1_BCCZ_FM<OPCODE5_BC1NEZ>;
 class BC2EQZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2EQZ>;
 class BC2NEZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2NEZ>;
 
+class DVP_ENC : COP0_EVP_DVP_FM<0b1>;
+class EVP_ENC : COP0_EVP_DVP_FM<0b0>;
+
 class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>;
 class JIC_ENC   : JMP_IDX_COMPACT_FM<0b110110>;
 class JR_HB_R6_ENC : JR_HB_R6_FM<OPCODE6_JALR>;
@@ -177,90 +180,98 @@ class SDBBP_R6_ENC : SPECIAL_SDBBP_FM;
 
 class CMP_CONDN_DESC_BASE<string CondStr, string Typestr,
                           RegisterOperand FGROpnd,
+                          InstrItinClass Itin,
                           SDPatternOperator Op = null_frag> {
   dag OutOperandList = (outs FGRCCOpnd:$fd);
   dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
   string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft");
   list<dag> Pattern = [(set FGRCCOpnd:$fd, (Op FGROpnd:$fs, FGROpnd:$ft))];
   bit isCTI = 1;
+  InstrItinClass Itinerary = Itin;
 }
 
 multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr,
-                     RegisterOperand FGROpnd>{
+                     RegisterOperand FGROpnd, InstrItinClass Itin>{
   let AdditionalPredicates = [NotInMicroMips] in {
     def CMP_F_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>,
-                      CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>,
+                      CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd, Itin>,
                       MipsR6Arch<!strconcat("cmp.af.", Typestr)>,
                       ISA_MIPS32R6, HARDFLOAT;
     def CMP_UN_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>,
-                       CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>,
+                       CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, Itin, setuo>,
                        MipsR6Arch<!strconcat("cmp.un.", Typestr)>,
                        ISA_MIPS32R6, HARDFLOAT;
     def CMP_EQ_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>,
-                       CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>,
+                       CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, Itin,
+                                           setoeq>,
                        MipsR6Arch<!strconcat("cmp.eq.", Typestr)>,
                        ISA_MIPS32R6, HARDFLOAT;
     def CMP_UEQ_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_UEQ>,
-                        CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>,
+                        CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, Itin,
+                                            setueq>,
                         MipsR6Arch<!strconcat("cmp.ueq.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_LT_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>,
-                       CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>,
+                       CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, Itin,
+                                           setolt>,
                        MipsR6Arch<!strconcat("cmp.lt.", Typestr)>,
                        ISA_MIPS32R6, HARDFLOAT;
     def CMP_ULT_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_ULT>,
-                        CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>,
+                        CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, Itin,
+                                            setult>,
                         MipsR6Arch<!strconcat("cmp.ult.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_LE_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>,
-                       CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>,
+                       CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, Itin,
+                                           setole>,
                        MipsR6Arch<!strconcat("cmp.le.", Typestr)>,
                        ISA_MIPS32R6, HARDFLOAT;
     def CMP_ULE_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_ULE>,
-                        CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>,
+                        CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, Itin,
+                                            setule>,
                         MipsR6Arch<!strconcat("cmp.ule.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_SAF_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_SAF>,
-                        CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>,
+                        CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd, Itin>,
                         MipsR6Arch<!strconcat("cmp.saf.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_SUN_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_SUN>,
-                        CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>,
+                        CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd, Itin>,
                         MipsR6Arch<!strconcat("cmp.sun.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_SEQ_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_SEQ>,
-                        CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>,
+                        CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd, Itin>,
                         MipsR6Arch<!strconcat("cmp.seq.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_SUEQ_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                       FIELD_CMP_COND_SUEQ>,
-                         CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>,
+                         CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd, Itin>,
                          MipsR6Arch<!strconcat("cmp.sueq.", Typestr)>,
                          ISA_MIPS32R6, HARDFLOAT;
     def CMP_SLT_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_SLT>,
-                        CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>,
+                        CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd, Itin>,
                         MipsR6Arch<!strconcat("cmp.slt.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_SULT_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                       FIELD_CMP_COND_SULT>,
-                         CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>,
+                         CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd, Itin>,
                          MipsR6Arch<!strconcat("cmp.sult.", Typestr)>,
                          ISA_MIPS32R6, HARDFLOAT;
     def CMP_SLE_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                      FIELD_CMP_COND_SLE>,
-                        CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>,
+                        CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd, Itin>,
                         MipsR6Arch<!strconcat("cmp.sle.", Typestr)>,
                         ISA_MIPS32R6, HARDFLOAT;
     def CMP_SULE_#NAME : R6MMR6Rel, COP1_CMP_CONDN_FM<Format,
                                                       FIELD_CMP_COND_SULE>,
-                         CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>,
+                         CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd, Itin>,
                          MipsR6Arch<!strconcat("cmp.sule.", Typestr)>,
                          ISA_MIPS32R6, HARDFLOAT;
   }
@@ -316,7 +327,7 @@ class AUI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
                     InstrItinClass itin = NoItinerary>
       : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs GPROpnd:$rs);
-  dag InOperandList = (ins GPROpnd:$rt, simm16:$imm);
+  dag InOperandList = (ins GPROpnd:$rt, uimm16:$imm);
   string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm");
   list<dag> Pattern = [];
   InstrItinClass Itinerary = itin;
@@ -426,6 +437,7 @@ class COP2_BCCZ_DESC_BASE<string instr_asm> : BRANCH_DESC_BASE {
   string AsmString = instr_asm;
   bit hasDelaySlot = 1;
   bit isCTI = 1;
+  InstrItinClass Itinerary = II_BC2CCZ;
 }
 
 class BC2EQZ_DESC : COP2_BCCZ_DESC_BASE<"bc2eqz $ct, $offset">;
@@ -468,6 +480,7 @@ class JR_HB_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> {
   bit isTerminator=1;
   bit isBarrier=1;
   bit isCTI = 1;
+  InstrItinClass Itinerary = II_JR_HB;
 }
 
 class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
@@ -496,6 +509,19 @@ class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
   bit usesCustomInserter = 1;
 }
 
+class DVPEVP_DESC_BASE<string instr_asm, InstrItinClass Itin>
+    : MipsR6Arch<instr_asm> {
+  dag OutOperandList = (outs GPR32Opnd:$rt);
+  dag InOperandList = (ins);
+  string AsmString = !strconcat(instr_asm, "\t$rt");
+  list<dag> Pattern = [];
+  InstrItinClass Itinerary = Itin;
+  bit hasUnModeledSideEffects = 1;
+}
+
+class DVP_DESC : DVPEVP_DESC_BASE<"dvp", II_DVP>;
+class EVP_DESC : DVPEVP_DESC_BASE<"evp", II_EVP>;
+
 class DIV_DESC  : DIVMOD_DESC_BASE<"div", GPR32Opnd, II_DIV, sdiv>;
 class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd, II_DIVU, udiv>;
 class MOD_DESC  : DIVMOD_DESC_BASE<"mod", GPR32Opnd, II_MOD, srem>;
@@ -540,7 +566,8 @@ class MUHU_DESC   : MUL_R6_DESC_BASE<"muhu", GPR32Opnd, II_MUHU, mulhu>;
 class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd, II_MUL, mul>;
 class MULU_DESC   : MUL_R6_DESC_BASE<"mulu", GPR32Opnd, II_MULU>;
 
-class COP1_SEL_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+class COP1_SEL_DESC_BASE<string instr_asm, RegisterOperand FGROpnd,
+                         InstrItinClass itin> {
   dag OutOperandList = (outs FGROpnd:$fd);
   dag InOperandList = (ins FGRCCOpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft);
   string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
@@ -548,13 +575,16 @@ class COP1_SEL_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
                                                  FGROpnd:$ft,
                                                  FGROpnd:$fs))];
   string Constraints = "$fd_in = $fd";
+  InstrItinClass Itinerary = itin;
 }
 
-class SEL_D_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd>, MipsR6Arch<"sel.d"> {
+class SEL_D_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd, II_SEL_D>,
+                   MipsR6Arch<"sel.d"> {
   // We must insert a SUBREG_TO_REG around $fd_in
   bit usesCustomInserter = 1;
 }
-class SEL_S_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>, MipsR6Arch<"sel.s">;
+class SEL_S_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd, II_SEL_S>,
+                   MipsR6Arch<"sel.s">;
 
 class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd>
     : MipsR6Arch<instr_asm> {
@@ -583,86 +613,98 @@ class MADDF_D_DESC  : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd, II_MADDF_D>;
 class MSUBF_S_DESC  : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd, II_MSUBF_S>;
 class MSUBF_D_DESC  : COP1_4R_DESC_BASE<"msubf.d", FGR64Opnd, II_MSUBF_D>;
 
-class MAX_MIN_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+class MAX_MIN_DESC_BASE<string instr_asm, RegisterOperand FGROpnd,
+                        InstrItinClass itin> {
   dag OutOperandList = (outs FGROpnd:$fd);
   dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
   string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = itin;
 }
 
-class MAX_S_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd>;
-class MAX_D_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd>;
-class MIN_S_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd>;
-class MIN_D_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd>;
+class MAX_S_DESC : MAX_MIN_DESC_BASE<"max.s", FGR32Opnd, II_MAX_S>;
+class MAX_D_DESC : MAX_MIN_DESC_BASE<"max.d", FGR64Opnd, II_MAX_D>;
+class MIN_S_DESC : MAX_MIN_DESC_BASE<"min.s", FGR32Opnd, II_MIN_S>;
+class MIN_D_DESC : MAX_MIN_DESC_BASE<"min.d", FGR64Opnd, II_MIN_D>;
 
-class MAXA_S_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd>;
-class MAXA_D_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd>;
-class MINA_S_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd>;
-class MINA_D_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd>;
+class MAXA_S_DESC : MAX_MIN_DESC_BASE<"maxa.s", FGR32Opnd, II_MAX_S>;
+class MAXA_D_DESC : MAX_MIN_DESC_BASE<"maxa.d", FGR64Opnd, II_MAX_D>;
+class MINA_S_DESC : MAX_MIN_DESC_BASE<"mina.s", FGR32Opnd, II_MIN_D>;
+class MINA_D_DESC : MAX_MIN_DESC_BASE<"mina.d", FGR64Opnd, II_MIN_S>;
 
-class SELEQNEZ_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+class SELEQNEZ_DESC_BASE<string instr_asm, RegisterOperand FGROpnd,
+                         InstrItinClass itin> {
   dag OutOperandList = (outs FGROpnd:$fd);
   dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft);
   string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = itin;
 }
 
-class SELEQZ_S_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd>,
+class SELEQZ_S_DESC : SELEQNEZ_DESC_BASE<"seleqz.s", FGR32Opnd, II_SELCCZ_S>,
                       MipsR6Arch<"seleqz.s">;
-class SELEQZ_D_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd>,
+class SELEQZ_D_DESC : SELEQNEZ_DESC_BASE<"seleqz.d", FGR64Opnd, II_SELCCZ_D>,
                       MipsR6Arch<"seleqz.d">;
-class SELNEZ_S_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd>,
+class SELNEZ_S_DESC : SELEQNEZ_DESC_BASE<"selnez.s", FGR32Opnd, II_SELCCZ_S>,
                       MipsR6Arch<"selnez.s">;
-class SELNEZ_D_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd>,
+class SELNEZ_D_DESC : SELEQNEZ_DESC_BASE<"selnez.d", FGR64Opnd, II_SELCCZ_D>,
                       MipsR6Arch<"selnez.d">;
 
-class CLASS_RINT_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> {
+class CLASS_RINT_DESC_BASE<string instr_asm, RegisterOperand FGROpnd,
+                           InstrItinClass itin> {
   dag OutOperandList = (outs FGROpnd:$fd);
   dag InOperandList = (ins FGROpnd:$fs);
   string AsmString = !strconcat(instr_asm, "\t$fd, $fs");
   list<dag> Pattern = [];
+  InstrItinClass Itinerary = itin;
 }
 
-class RINT_S_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd>;
-class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>;
-class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>;
-class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>;
+class RINT_S_DESC : CLASS_RINT_DESC_BASE<"rint.s", FGR32Opnd, II_RINT_S>;
+class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd, II_RINT_D>;
+class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd, II_CLASS_S>;
+class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd, II_CLASS_D>;
 
 class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd,
-                      RegisterOperand GPROpnd> : MipsR6Arch<instr_asm> {
+                      RegisterOperand GPROpnd, InstrItinClass itin>
+                     : MipsR6Arch<instr_asm> {
   dag OutOperandList = (outs);
   dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint);
   string AsmString = !strconcat(instr_asm, "\t$hint, $addr");
   list<dag> Pattern = [];
   string DecoderMethod = "DecodeCacheeOp_CacheOpR6";
+  InstrItinClass Itinerary = itin;
 }
 
-class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>;
-class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd>;
+class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd, II_CACHE>;
+class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd, II_PREF>;
 
-class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
+class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd,
+                       InstrItinClass itin> {
   dag OutOperandList = (outs COPOpnd:$rt);
   dag InOperandList = (ins mem_simm11:$addr);
   string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
   list<dag> Pattern = [];
   bit mayLoad = 1;
   string DecoderMethod = "DecodeFMemCop2R6";
+  InstrItinClass Itinerary = itin;
 }
 
-class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>;
-class LWC2_R6_DESC : COP2LD_DESC_BASE<"lwc2", COP2Opnd>;
+class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd, II_LDC2>;
+class LWC2_R6_DESC : COP2LD_DESC_BASE<"lwc2", COP2Opnd, II_LWC2>;
 
-class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> {
+class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd,
+                       InstrItinClass itin> {
   dag OutOperandList = (outs);
   dag InOperandList = (ins COPOpnd:$rt, mem_simm11:$addr);
   string AsmString = !strconcat(instr_asm, "\t$rt, $addr");
   list<dag> Pattern = [];
   bit mayStore = 1;
   string DecoderMethod = "DecodeFMemCop2R6";
+  InstrItinClass Itinerary = itin;
 }
 
-class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>;
-class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>;
+class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd, II_SDC2>;
+class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd, II_SWC2>;
 
 class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd,
                        Operand ImmOpnd, InstrItinClass itin>
@@ -732,6 +774,7 @@ class SDBBP_R6_DESC {
   string AsmString = "sdbbp\t$code_";
   list<dag> Pattern = [];
   bit isCTI = 1;
+  InstrItinClass Itinerary = II_SDBBP;
 }
 
 //===----------------------------------------------------------------------===//
@@ -754,26 +797,28 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def BC2NEZ : BC2NEZ_ENC, BC2NEZ_DESC, ISA_MIPS32R6;
 }
 def BC : R6MMR6Rel, BC_ENC, BC_DESC, ISA_MIPS32R6;
-def BEQC : R6MMR6Rel, BEQC_ENC, BEQC_DESC, ISA_MIPS32R6;
-def BEQZALC : R6MMR6Rel, BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
-def BEQZC : R6MMR6Rel, BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6;
-def BGEC : R6MMR6Rel, BGEC_ENC, BGEC_DESC, ISA_MIPS32R6;
-def BGEUC : R6MMR6Rel, BGEUC_ENC, BGEUC_DESC, ISA_MIPS32R6;
-def BGEZALC : R6MMR6Rel, BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
-def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6;
-def BGTZALC : R6MMR6Rel, BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
-def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def BEQC : R6MMR6Rel, BEQC_ENC, BEQC_DESC, ISA_MIPS32R6;
+  def BEQZALC : R6MMR6Rel, BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6;
+  def BEQZC : R6MMR6Rel, BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6;
+  def BGEC : R6MMR6Rel, BGEC_ENC, BGEC_DESC, ISA_MIPS32R6;
+  def BGEUC : R6MMR6Rel, BGEUC_ENC, BGEUC_DESC, ISA_MIPS32R6;
+  def BGEZALC : R6MMR6Rel, BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6;
+  def BGEZC : R6MMR6Rel, BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6;
+  def BGTZALC : R6MMR6Rel, BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6;
+  def BGTZC : R6MMR6Rel, BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6;
+}
 def BITSWAP : R6MMR6Rel, BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6;
-def BLEZALC : R6MMR6Rel, BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
-def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
-def BLTC : R6MMR6Rel, BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
-def BLTUC : R6MMR6Rel, BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
-def BLTZALC : R6MMR6Rel, BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
-def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
-def BNEC : R6MMR6Rel, BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
-def BNEZALC : R6MMR6Rel, BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
-def BNEZC : R6MMR6Rel, BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
 let AdditionalPredicates = [NotInMicroMips] in {
+  def BLEZALC : R6MMR6Rel, BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6;
+  def BLEZC : R6MMR6Rel, BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6;
+  def BLTC : R6MMR6Rel, BLTC_ENC, BLTC_DESC, ISA_MIPS32R6;
+  def BLTUC : R6MMR6Rel, BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6;
+  def BLTZALC : R6MMR6Rel, BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6;
+  def BLTZC : R6MMR6Rel, BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6;
+  def BNEC : R6MMR6Rel, BNEC_ENC, BNEC_DESC, ISA_MIPS32R6;
+  def BNEZALC : R6MMR6Rel, BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6;
+  def BNEZC : R6MMR6Rel, BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6;
   def BNVC : R6MMR6Rel, BNVC_ENC, BNVC_DESC, ISA_MIPS32R6;
   def BOVC : R6MMR6Rel, BOVC_ENC, BOVC_DESC, ISA_MIPS32R6;
 }
@@ -784,12 +829,16 @@ let AdditionalPredicates = [NotInMicroMips] in {
 }
 def CLO_R6 : R6MMR6Rel, CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6;
 def CLZ_R6 : R6MMR6Rel, CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6;
-defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>;
-defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd>;
+defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd, II_CMP_CC_S>;
+defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd, II_CMP_CC_D>;
 let AdditionalPredicates = [NotInMicroMips] in {
   def DIV : R6MMR6Rel, DIV_ENC, DIV_DESC, ISA_MIPS32R6;
   def DIVU : R6MMR6Rel, DIVU_ENC, DIVU_DESC, ISA_MIPS32R6;
 }
+
+def DVP : R6MMR6Rel, DVP_ENC, DVP_DESC, ISA_MIPS32R6;
+def EVP : R6MMR6Rel, EVP_ENC, EVP_DESC, ISA_MIPS32R6;
+
 def JIALC : R6MMR6Rel, JIALC_ENC, JIALC_DESC, ISA_MIPS32R6;
 def JIC : R6MMR6Rel, JIC_ENC, JIC_DESC, ISA_MIPS32R6;
 def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6;
@@ -802,8 +851,8 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6;
 }
 def LWPC : R6MMR6Rel, LWPC_ENC, LWPC_DESC, ISA_MIPS32R6;
-def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
 let AdditionalPredicates = [NotInMicroMips] in {
+  def LWUPC : R6MMR6Rel, LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6;
   def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6, HARDFLOAT;
   def MADDF_D : MADDF_D_ENC, MADDF_D_DESC, ISA_MIPS32R6, HARDFLOAT;
   def MAXA_D : MAXA_D_ENC, MAXA_D_DESC, ISA_MIPS32R6, HARDFLOAT;
@@ -855,6 +904,9 @@ let AdditionalPredicates = [NotInMicroMips] in {
 //
 //===----------------------------------------------------------------------===//
 
+def : MipsInstAlias<"dvp", (DVP ZERO), 0>, ISA_MIPS32R6;
+def : MipsInstAlias<"evp", (EVP ZERO), 0>, ISA_MIPS32R6;
+
 let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6;
 def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6, GPR_32;
diff --git a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
index 88cfec5bc13c..521e22fb7992 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td
@@ -15,11 +15,6 @@
 // Mips Operand, Complex Patterns and Transformations Definitions.
 //===----------------------------------------------------------------------===//
 
-// Transformation Function - get Imm - 32.
-def Subtract32 : SDNodeXForm<imm, [{
-  return getImm(N, (unsigned)N->getZExtValue() - 32);
-}]>;
-
 // shamt must fit in 6 bits.
 def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>;
 
@@ -228,22 +223,24 @@ def LL64 : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, PTR_64,
            ISA_MIPS2_NOT_32R6_64R6;
 def SC64 : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, PTR_64,
            ISA_MIPS2_NOT_32R6_64R6;
+def JR64   : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>, PTR_64;
 }
 
+def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
+
 /// Jump and Branch Instructions
 let isCodeGenOnly = 1 in {
-  def JR64   : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>;
   def BEQ64  : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>;
   def BNE64  : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>;
   def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>;
   def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>;
   def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>;
   def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>;
-  def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM;
   def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>;
-  def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>;
 }
 
+def TAILCALLREG64 : TailCallReg<GPR64Opnd>;
+
 def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>;
 def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>;
 
@@ -293,14 +290,16 @@ def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>,
 
 /// Count Leading
 let AdditionalPredicates = [NotInMicroMips] in {
-  def DCLZ : StdMMR6Rel, CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>,
-             ISA_MIPS64_NOT_64R6;
-  def DCLO : StdMMR6Rel, CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>,
-             ISA_MIPS64_NOT_64R6;
+  def DCLZ : StdMMR6Rel, CountLeading0<"dclz", GPR64Opnd, II_DCLZ>,
+             CLO_FM<0x24>, ISA_MIPS64_NOT_64R6;
+  def DCLO : StdMMR6Rel, CountLeading1<"dclo", GPR64Opnd, II_DCLO>,
+             CLO_FM<0x25>, ISA_MIPS64_NOT_64R6;
 
 /// Double Word Swap Bytes/HalfWords
-  def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>, ISA_MIPS64R2;
-  def DSHD : SubwordSwap<"dshd", GPR64Opnd>, SEB_FM<5, 0x24>, ISA_MIPS64R2;
+  def DSBH : SubwordSwap<"dsbh", GPR64Opnd, II_DSBH>, SEB_FM<2, 0x24>,
+             ISA_MIPS64R2;
+  def DSHD : SubwordSwap<"dshd", GPR64Opnd, II_DSHD>, SEB_FM<5, 0x24>,
+             ISA_MIPS64R2;
 }
 
 def LEA_ADDiu64 : EffectiveAddress<"daddiu", GPR64Opnd>, LW_FM<0x19>;
@@ -357,11 +356,12 @@ class Count1s<string opstr, RegisterOperand RO>:
   let TwoOperandAliasConstraint = "$rd = $rs";
 }
 
-class ExtsCins<string opstr, SDPatternOperator Op = null_frag>:
+class ExtsCins<string opstr, InstrItinClass itin,
+               SDPatternOperator Op = null_frag>:
   InstSE<(outs GPR64Opnd:$rt), (ins GPR64Opnd:$rs, uimm5:$pos, uimm5:$lenm1),
          !strconcat(opstr, " $rt, $rs, $pos, $lenm1"),
          [(set GPR64Opnd:$rt, (Op GPR64Opnd:$rs, imm:$pos, imm:$lenm1))],
-         NoItinerary, FrmR, opstr> {
+         itin, FrmR, opstr> {
   let TwoOperandAliasConstraint = "$rt = $rs";
 }
 
@@ -395,9 +395,9 @@ class CBranchBitNum<string opstr, DAGOperand opnd, PatFrag cond_op,
   let Defs = [AT];
 }
 
-class MFC2OP<string asmstr, RegisterOperand RO> :
+class MFC2OP<string asmstr, RegisterOperand RO, InstrItinClass itin> :
   InstSE<(outs RO:$rt, uimm16:$imm16), (ins),
-         !strconcat(asmstr, "\t$rt, $imm16"), [], NoItinerary, FrmFR>;
+         !strconcat(asmstr, "\t$rt, $imm16"), [], itin, FrmFR>;
 
 // Unsigned Byte Add
 def BADDu  : ArithLogicR<"baddu", GPR64Opnd, 1, II_BADDU>,
@@ -425,12 +425,12 @@ def DMUL  : ArithLogicR<"dmul", GPR64Opnd, 1, II_DMUL, mul>,
 }
 
 // Extract a signed bit field /+32
-def EXTS  : ExtsCins<"exts">, EXTS_FM<0x3a>, ASE_CNMIPS;
-def EXTS32: ExtsCins<"exts32">, EXTS_FM<0x3b>, ASE_CNMIPS;
+def EXTS  : ExtsCins<"exts", II_EXT>, EXTS_FM<0x3a>, ASE_CNMIPS;
+def EXTS32: ExtsCins<"exts32", II_EXT>, EXTS_FM<0x3b>, ASE_CNMIPS;
 
 // Clear and insert a bit field /+32
-def CINS  : ExtsCins<"cins">, EXTS_FM<0x32>, ASE_CNMIPS;
-def CINS32: ExtsCins<"cins32">, EXTS_FM<0x33>, ASE_CNMIPS;
+def CINS  : ExtsCins<"cins", II_INS>, EXTS_FM<0x32>, ASE_CNMIPS;
+def CINS32: ExtsCins<"cins32", II_INS>, EXTS_FM<0x33>, ASE_CNMIPS;
 
 // Move to multiplier/product register
 def MTM0   : MoveToLOHI<"mtm0", GPR64Opnd, [MPL0, P0, P1, P2]>, MTMR_FM<0x08>,
@@ -472,8 +472,10 @@ def VMULU : ArithLogicR<"vmulu", GPR64Opnd, 0, II_DMUL>, ADD_FM<0x1c, 0x0f>,
 }
 
 // Move between CPU and coprocessor registers
-def DMFC2_OCTEON : MFC2OP<"dmfc2", GPR64Opnd>, MFC2OP_FM<0x12, 1>, ASE_CNMIPS;
-def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd>, MFC2OP_FM<0x12, 5>, ASE_CNMIPS;
+def DMFC2_OCTEON : MFC2OP<"dmfc2", GPR64Opnd, II_DMFC2>, MFC2OP_FM<0x12, 1>,
+                   ASE_CNMIPS;
+def DMTC2_OCTEON : MFC2OP<"dmtc2", GPR64Opnd, II_DMTC2>, MFC2OP_FM<0x12, 5>,
+                   ASE_CNMIPS;
 }
 
 }
@@ -494,6 +496,16 @@ def DMTC2 : MTC3OP<"dmtc2", COP2Opnd, GPR64Opnd, II_DMTC2>, MFC3OP_FM<0x12, 5>,
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
 
+// Materialize i64 constants.
+defm : MaterializeImms<i64, ZERO_64, DADDiu, LUi64, ORi64>;
+
+def : MipsPat<(i64 immZExt32Low16Zero:$imm),
+              (DSLL (ORi64 ZERO_64, (HI16 imm:$imm)), 16)>;
+
+def : MipsPat<(i64 immZExt32:$imm),
+              (ORi64 (DSLL (ORi64 ZERO_64, (HI16 imm:$imm)), 16),
+                     (LO16 imm:$imm))>;
+
 // extended loads
 def : MipsPat<(i64 (extloadi1  addr:$src)), (LB64 addr:$src)>;
 def : MipsPat<(i64 (extloadi8  addr:$src)), (LB64 addr:$src)>;
@@ -537,21 +549,21 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def : WrapperPat<tglobaltlsaddr, DADDiu, GPR64>;
 }
 
-defm : BrcondPats<GPR64, BEQ64, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
+defm : BrcondPats<GPR64, BEQ64, BEQ, BNE64, SLT64, SLTu64, SLTi64, SLTiu64,
                   ZERO_64>;
-
 def : MipsPat<(brcond (i32 (setlt i64:$lhs, 1)), bb:$dst),
               (BLEZ64 i64:$lhs, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setgt i64:$lhs, -1)), bb:$dst),
               (BGEZ64 i64:$lhs, bb:$dst)>;
 
 // setcc patterns
-defm : SeteqPats<GPR64, SLTiu64, XOR64, SLTu64, ZERO_64>;
-defm : SetlePats<GPR64, SLT64, SLTu64>;
-defm : SetgtPats<GPR64, SLT64, SLTu64>;
-defm : SetgePats<GPR64, SLT64, SLTu64>;
-defm : SetgeImmPats<GPR64, SLTi64, SLTiu64>;
-
+let AdditionalPredicates = [NotInMicroMips] in {
+  defm : SeteqPats<GPR64, SLTiu64, XOR64, SLTu64, ZERO_64>;
+  defm : SetlePats<GPR64, XORi, SLT64, SLTu64>;
+  defm : SetgtPats<GPR64, SLT64, SLTu64>;
+  defm : SetgePats<GPR64, XORi, SLT64, SLTu64>;
+  defm : SetgeImmPats<GPR64, XORi, SLTi64, SLTiu64>;
+}
 // truncate
 def : MipsPat<(trunc (assertsext GPR64:$src)),
               (EXTRACT_SUBREG GPR64:$src, sub_32)>;
@@ -658,11 +670,14 @@ let AdditionalPredicates = [NotInMicroMips] in {
                       (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
                       ISA_MIPS3;
   def : MipsInstAlias<"dneg $rt",
-                      (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 0>,
+                      (DSUB GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>,
                       ISA_MIPS3;
   def : MipsInstAlias<"dnegu $rt, $rs",
                       (DSUBu GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rs), 1>,
                       ISA_MIPS3;
+  def : MipsInstAlias<"dnegu $rt",
+                      (DSUBu GPR64Opnd:$rt, ZERO_64, GPR64Opnd:$rt), 1>,
+                      ISA_MIPS3;
 }
 def : MipsInstAlias<"dsubi $rs, $rt, $imm",
                     (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt,
diff --git a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
index 64effbef8a6a..dabf4e0a52e2 100644
--- a/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/Mips64r6InstrInfo.td
@@ -48,8 +48,8 @@ class SCD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SCD>;
 
 class AHI_ATI_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, InstrItinClass itin> {
   dag OutOperandList = (outs GPROpnd:$rs);
-  dag InOperandList = (ins GPROpnd:$rt, simm16_relaxed:$imm);
-  string AsmString = !strconcat(instr_asm, "\t$rt, $imm");
+  dag InOperandList = (ins GPROpnd:$rt, uimm16_altrelaxed:$imm);
+  string AsmString = !strconcat(instr_asm, "\t$rs, $rt, $imm");
   string Constraints = "$rs = $rt";
   InstrItinClass Itinerary = itin;
 }
@@ -76,13 +76,27 @@ class SCD_R6_DESC   : SC_R6_DESC_BASE<"scd", GPR64Opnd, II_SCD>;
 class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>;
 class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>;
 
+class BGEC64_DESC : CMP_BC_DESC_BASE<"bgec", brtarget, GPR64Opnd>;
+class BGEUC64_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR64Opnd>;
+class BEQC64_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR64Opnd>;
+class BNEC64_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR64Opnd>;
+class BLTC64_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR64Opnd>;
+class BLTUC64_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR64Opnd>;
+class BLTZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR64Opnd>;
+class BGEZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR64Opnd>;
+class BLEZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"blezc", brtarget, GPR64Opnd>;
+class BGTZC64_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgtzc", brtarget, GPR64Opnd>;
+class BEQZC64_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"beqzc", brtarget21, GPR64Opnd>;
+class BNEZC64_DESC : CMP_CBR_EQNE_Z_DESC_BASE<"bnezc", brtarget21, GPR64Opnd>;
+
 class JIALC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jialc", calloffset16,
-                                             GPR64Opnd> {
+                                               GPR64Opnd, II_JIALC> {
   bit isCall = 1;
   list<Register> Defs = [RA];
 }
 
-class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd> {
+class JIC64_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR64Opnd,
+                                             II_JIC> {
   bit isBarrier = 1;
   bit isTerminator = 1;
   list<Register> Defs = [AT];
@@ -97,22 +111,20 @@ class SC64_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd, II_SC>;
 //===----------------------------------------------------------------------===//
 
 let AdditionalPredicates = [NotInMicroMips] in {
-  def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
-  def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
+  let DecoderMethod = "DecodeDAHIDATI" in {
+    def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6;
+    def DAHI : DAHI_ENC, DAHI_DESC, ISA_MIPS64R6;
+  }
   def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6;
   def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6;
-}
-def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
-let AdditionalPredicates = [NotInMicroMips] in {
+  def DBITSWAP : R6MMR6Rel, DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6;
   def DCLO_R6 : R6MMR6Rel, DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6;
   def DCLZ_R6 : R6MMR6Rel, DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6;
   def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6;
   def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6;
   def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6;
   def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6;
-}
-def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6;
-let AdditionalPredicates = [NotInMicroMips] in {
+  def DLSA_R6 : R6MMR6Rel, DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6;
   def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6;
   def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6;
   def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6;
@@ -130,10 +142,28 @@ let AdditionalPredicates = [NotInMicroMips],
   def LL64_R6 : LL_R6_ENC, LL64_R6_DESC, PTR_64, ISA_MIPS64R6;
   def SC64_R6 : SC_R6_ENC, SC64_R6_DESC, PTR_64, ISA_MIPS64R6;
 }
-let isCodeGenOnly = 1 in {
-def JIALC64 : JIALC_ENC, JIALC64_DESC, ISA_MIPS64R6;
-def JIC64 : JIC_ENC, JIC64_DESC, ISA_MIPS64R6;
+
+let DecoderNamespace = "Mips32r6_64r6_GP64" in {
+// Jump and Branch Instructions
+def JIALC64 : JIALC_ENC, JIALC64_DESC, ISA_MIPS64R6, GPR_64;
+def JIC64 : JIC_ENC, JIC64_DESC, ISA_MIPS64R6, GPR_64;
+
+def BEQC64 : BEQC_ENC, BEQC64_DESC, ISA_MIPS64R6, GPR_64;
+def BEQZC64 : BEQZC_ENC, BEQZC64_DESC, ISA_MIPS64R6, GPR_64;
+def BGEC64 : BGEC_ENC, BGEC64_DESC, ISA_MIPS64R6, GPR_64;
+def BGEUC64 : BGEUC_ENC, BGEUC64_DESC, ISA_MIPS64R6, GPR_64;
+def BGTZC64 : BGTZC_ENC, BGTZC64_DESC, ISA_MIPS64R6, GPR_64;
+def BLEZC64 : BLEZC_ENC, BLEZC64_DESC, ISA_MIPS64R6, GPR_64;
+def BLTC64 : BLTC_ENC, BLTC64_DESC, ISA_MIPS64R6, GPR_64;
+def BLTUC64 : BLTUC_ENC, BLTUC64_DESC, ISA_MIPS64R6, GPR_64;
+def BNEC64 : BNEC_ENC, BNEC64_DESC, ISA_MIPS64R6, GPR_64;
+def BNEZC64 : BNEZC_ENC, BNEZC64_DESC, ISA_MIPS64R6, GPR_64;
 }
+let DecoderNamespace = "Mips32r6_64r6_BranchZero" in {
+def BLTZC64 : BLTZC_ENC, BLTZC64_DESC, ISA_MIPS64R6, GPR_64;
+def BGEZC64 : BGEZC_ENC, BGEZC64_DESC, ISA_MIPS64R6, GPR_64;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Instruction Aliases
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
index 3686c2fe3ec4..179695bc6988 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp
@@ -60,10 +60,6 @@ MipsTargetStreamer &MipsAsmPrinter::getTargetStreamer() const {
 bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<MipsSubtarget>();
 
-  // Initialize TargetLoweringObjectFile.
-  const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
-      .Initialize(OutContext, TM);
-
   MipsFI = MF.getInfo<MipsFunctionInfo>();
   if (Subtarget->inMips16Mode())
     for (std::map<
@@ -98,6 +94,7 @@ bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) {
 void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
                                               const MachineInstr *MI) {
   bool HasLinkReg = false;
+  bool InMicroMipsMode = Subtarget->inMicroMipsMode();
   MCInst TmpInst0;
 
   if (Subtarget->hasMips64r6()) {
@@ -106,8 +103,12 @@ void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer,
     HasLinkReg = true;
   } else if (Subtarget->hasMips32r6()) {
     // MIPS32r6 should use (JALR ZERO, $rs)
-    TmpInst0.setOpcode(Mips::JALR);
-    HasLinkReg = true;
+    if (InMicroMipsMode)
+      TmpInst0.setOpcode(Mips::JRC16_MMR6);
+    else {
+      TmpInst0.setOpcode(Mips::JALR);
+      HasLinkReg = true;
+    }
   } else if (Subtarget->inMicroMipsMode())
     // microMIPS should use (JR_MM $rs)
     TmpInst0.setOpcode(Mips::JR_MM);
@@ -185,7 +186,9 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     if (I->getOpcode() == Mips::PseudoReturn ||
         I->getOpcode() == Mips::PseudoReturn64 ||
         I->getOpcode() == Mips::PseudoIndirectBranch ||
-        I->getOpcode() == Mips::PseudoIndirectBranch64) {
+        I->getOpcode() == Mips::PseudoIndirectBranch64 ||
+        I->getOpcode() == Mips::TAILCALLREG ||
+        I->getOpcode() == Mips::TAILCALLREG64) {
       emitPseudoIndirectBranch(*OutStreamer, &*I);
       continue;
     }
@@ -250,9 +253,9 @@ void MipsAsmPrinter::printSavedRegsBitmask() {
   int CPUTopSavedRegOff, FPUTopSavedRegOff;
 
   // Set the CPU and FPU Bitmasks
-  const MachineFrameInfo *MFI = MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   // size of stack area to which FP callee-saved regs are saved.
   unsigned CPURegSize = Mips::GPR32RegClass.getSize();
   unsigned FGR32RegSize = Mips::FGR32RegClass.getSize();
@@ -302,7 +305,7 @@ void MipsAsmPrinter::emitFrameDirective() {
 
   unsigned stackReg  = RI.getFrameRegister(*MF);
   unsigned returnReg = RI.getRARegister();
-  unsigned stackSize = MF->getFrameInfo()->getStackSize();
+  unsigned stackSize = MF->getFrameInfo().getStackSize();
 
   getTargetStreamer().emitFrame(stackReg, stackSize, returnReg);
 }
@@ -497,7 +500,7 @@ bool MipsAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
 
       unsigned RegOp = OpNum;
       if (!Subtarget->isGP64bit()){
-        // Endianess reverses which register holds the high or low value
+        // Endianness reverses which register holds the high or low value
         // between M and L.
         switch(ExtraCode[0]) {
         case 'M':
@@ -1063,8 +1066,8 @@ bool MipsAsmPrinter::isLongBranchPseudo(int Opcode) const {
 
 // Force static initialization.
 extern "C" void LLVMInitializeMipsAsmPrinter() {
-  RegisterAsmPrinter<MipsAsmPrinter> X(TheMipsTarget);
-  RegisterAsmPrinter<MipsAsmPrinter> Y(TheMipselTarget);
-  RegisterAsmPrinter<MipsAsmPrinter> A(TheMips64Target);
-  RegisterAsmPrinter<MipsAsmPrinter> B(TheMips64elTarget);
+  RegisterAsmPrinter<MipsAsmPrinter> X(getTheMipsTarget());
+  RegisterAsmPrinter<MipsAsmPrinter> Y(getTheMipselTarget());
+  RegisterAsmPrinter<MipsAsmPrinter> A(getTheMips64Target());
+  RegisterAsmPrinter<MipsAsmPrinter> B(getTheMips64elTarget());
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
index f30141fc918f..259e557e1283 100644
--- a/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
+++ b/contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h
@@ -103,9 +103,7 @@ public:
       : AsmPrinter(TM, std::move(Streamer)), MCP(nullptr),
         InConstantPool(false), MCInstLowering(*this) {}
 
-  const char *getPassName() const override {
-    return "Mips Assembly Printer";
-  }
+  StringRef getPassName() const override { return "Mips Assembly Printer"; }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
index 1ea48e0439d4..08b8ed31ccbb 100644
--- a/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsConstantIslandPass.cpp
@@ -356,15 +356,13 @@ namespace {
         : MachineFunctionPass(ID), STI(nullptr), MF(nullptr), MCP(nullptr),
           PrescannedForConstants(false) {}
 
-    const char *getPassName() const override {
-      return "Mips Constant Islands";
-    }
+    StringRef getPassName() const override { return "Mips Constant Islands"; }
 
     bool runOnMachineFunction(MachineFunction &F) override;
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     void doInitialPlacement(std::vector<MachineInstr*> &CPEMIs);
@@ -801,7 +799,7 @@ void MipsConstantIslands::computeBlockSize(MachineBasicBlock *MBB) {
   BBI.Size = 0;
 
   for (const MachineInstr &MI : *MBB)
-    BBI.Size += TII->GetInstSizeInBytes(MI);
+    BBI.Size += TII->getInstSizeInBytes(MI);
 }
 
 /// getOffsetOf - Return the current offset of the specified machine instruction
@@ -818,7 +816,7 @@ unsigned MipsConstantIslands::getOffsetOf(MachineInstr *MI) const {
   // Sum instructions before MI in MBB.
   for (MachineBasicBlock::iterator I = MBB->begin(); &*I != MI; ++I) {
     assert(I != MBB->end() && "Didn't find MI in its own basic block?");
-    Offset += TII->GetInstSizeInBytes(*I);
+    Offset += TII->getInstSizeInBytes(*I);
   }
   return Offset;
 }
@@ -1297,12 +1295,11 @@ void MipsConstantIslands::createNewWater(unsigned CPUserIndex,
   unsigned CPUIndex = CPUserIndex+1;
   unsigned NumCPUsers = CPUsers.size();
   //MachineInstr *LastIT = 0;
-  for (unsigned Offset = UserOffset + TII->GetInstSizeInBytes(*UserMI);
+  for (unsigned Offset = UserOffset + TII->getInstSizeInBytes(*UserMI);
        Offset < BaseInsertOffset;
-       Offset += TII->GetInstSizeInBytes(*MI), MI = std::next(MI)) {
+       Offset += TII->getInstSizeInBytes(*MI), MI = std::next(MI)) {
     assert(MI != UserMBB->end() && "Fell off end of block");
-    if (CPUIndex < NumCPUsers &&
-        CPUsers[CPUIndex].MI == static_cast<MachineInstr *>(MI)) {
+    if (CPUIndex < NumCPUsers && CPUsers[CPUIndex].MI == MI) {
       CPUser &U = CPUsers[CPUIndex];
       if (!isOffsetInRange(Offset, EndInsertOffset, U)) {
         // Shift intertion point by one unit of alignment so it is within reach.
@@ -1374,7 +1371,7 @@ bool MipsConstantIslands::handleConstantPoolUser(unsigned CPUserIndex) {
     // it.  Check for this so it will be removed from the WaterList.
     // Also remove any entry from NewWaterList.
     MachineBasicBlock *WaterBB = &*--NewMBB->getIterator();
-    IP = std::find(WaterList.begin(), WaterList.end(), WaterBB);
+    IP = find(WaterList, WaterBB);
     if (IP != WaterList.end())
       NewWaterList.erase(WaterBB);
 
@@ -1622,7 +1619,7 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
     splitBlockBeforeInstr(*MI);
     // No need for the branch to the next block. We're adding an unconditional
     // branch to the destination.
-    int delta = TII->GetInstSizeInBytes(MBB->back());
+    int delta = TII->getInstSizeInBytes(MBB->back());
     BBInfo[MBB->getNumber()].Size -= delta;
     MBB->back().eraseFromParent();
     // BBInfo[SplitBB].Offset is wrong temporarily, fixed below
@@ -1644,14 +1641,14 @@ MipsConstantIslands::fixupConditionalBr(ImmBranch &Br) {
            .addMBB(NextBB);
   }
   Br.MI = &MBB->back();
-  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
   BuildMI(MBB, DebugLoc(), TII->get(Br.UncondBr)).addMBB(DestBB);
-  BBInfo[MBB->getNumber()].Size += TII->GetInstSizeInBytes(MBB->back());
+  BBInfo[MBB->getNumber()].Size += TII->getInstSizeInBytes(MBB->back());
   unsigned MaxDisp = getUnconditionalBrDisp(Br.UncondBr);
   ImmBranches.push_back(ImmBranch(&MBB->back(), MaxDisp, false, Br.UncondBr));
 
   // Remove the old conditional branch.  It may or may not still be in MBB.
-  BBInfo[MI->getParent()->getNumber()].Size -= TII->GetInstSizeInBytes(*MI);
+  BBInfo[MI->getParent()->getNumber()].Size -= TII->getInstSizeInBytes(*MI);
   MI->eraseFromParent();
   adjustBBOffsetsAfter(MBB);
   return true;
diff --git a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
index b5ba770df7bd..c821084f68cf 100644
--- a/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp
@@ -79,8 +79,7 @@ static cl::opt<CompactBranchPolicy> MipsCompactBranchPolicy(
   cl::values(
     clEnumValN(CB_Never, "never", "Do not use compact branches if possible."),
     clEnumValN(CB_Optimal, "optimal", "Use compact branches where appropiate (default)."),
-    clEnumValN(CB_Always, "always", "Always use compact branches if possible."),
-    clEnumValEnd
+    clEnumValN(CB_Always, "always", "Always use compact branches if possible.")
   )
 );
 
@@ -192,9 +191,7 @@ namespace {
     Filler(TargetMachine &tm)
       : MachineFunctionPass(ID), TM(tm) { }
 
-    const char *getPassName() const override {
-      return "Mips Delay Slot Filler";
-    }
+    StringRef getPassName() const override { return "Mips Delay Slot Filler"; }
 
     bool runOnMachineFunction(MachineFunction &F) override {
       bool Changed = false;
@@ -213,7 +210,7 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -242,7 +239,7 @@ namespace {
 
     /// This function searches in the backward direction for an instruction that
     /// can be moved to the delay slot. Returns true on success.
-    bool searchBackward(MachineBasicBlock &MBB, Iter Slot) const;
+    bool searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const;
 
     /// This function searches MBB in the forward direction for an instruction
     /// that can be moved to the delay slot. Returns true on success.
@@ -543,6 +540,9 @@ Iter Filler::replaceWithCompactBranch(MachineBasicBlock &MBB, Iter Branch,
 
 // For given opcode returns opcode of corresponding instruction with short
 // delay slot.
+// For the pseudo TAILCALL*_MM instrunctions return the short delay slot
+// form. Unfortunately, TAILCALL<->b16 is denied as b16 has a limited range
+// that is too short to make use of for tail calls.
 static int getEquivalentCallShort(int Opcode) {
   switch (Opcode) {
   case Mips::BGEZAL:
@@ -555,6 +555,10 @@ static int getEquivalentCallShort(int Opcode) {
     return Mips::JALRS_MM;
   case Mips::JALR16_MM:
     return Mips::JALRS16_MM;
+  case Mips::TAILCALL_MM:
+    llvm_unreachable("Attempting to shorten the TAILCALL_MM pseudo!");
+  case Mips::TAILCALLREG:
+    return Mips::JR16_MM;
   default:
     llvm_unreachable("Unexpected call instruction for microMIPS.");
   }
@@ -587,7 +591,7 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
 
       if (MipsCompactBranchPolicy.getValue() != CB_Always ||
            !TII->getEquivalentCompactForm(I)) {
-        if (searchBackward(MBB, I)) {
+        if (searchBackward(MBB, *I)) {
           Filled = true;
         } else if (I->isTerminator()) {
           if (searchSuccBBs(MBB, I)) {
@@ -602,10 +606,16 @@ bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
         // Get instruction with delay slot.
         MachineBasicBlock::instr_iterator DSI = I.getInstrIterator();
 
-        if (InMicroMipsMode && TII->GetInstSizeInBytes(*std::next(DSI)) == 2 &&
+        if (InMicroMipsMode && TII->getInstSizeInBytes(*std::next(DSI)) == 2 &&
             DSI->isCall()) {
           // If instruction in delay slot is 16b change opcode to
           // corresponding instruction with short delay slot.
+
+          // TODO: Implement an instruction mapping table of 16bit opcodes to
+          // 32bit opcodes so that an instruction can be expanded. This would
+          // save 16 bits as a TAILCALL_MM pseudo requires a fullsized nop.
+          // TODO: Permit b16 when branching backwards to the the same function
+          // if it is in range.
           DSI->setDesc(TII->get(getEquivalentCallShort(DSI->getOpcode())));
         }
         continue;
@@ -646,8 +656,6 @@ template<typename IterTy>
 bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
                          RegDefsUses &RegDU, InspectMemInstr& IM, Iter Slot,
                          IterTy &Filler) const {
-  bool IsReverseIter = std::is_convertible<IterTy, ReverseIter>::value;
-
   for (IterTy I = Begin; I != End;) {
     IterTy CurrI = I;
     ++I;
@@ -664,12 +672,6 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
 
     if (CurrI->isKill()) {
       CurrI->eraseFromParent();
-
-      // This special case is needed for reverse iterators, because when we
-      // erase an instruction, the iterators are updated to point to the next
-      // instruction.
-      if (IsReverseIter && I != End)
-        I = CurrI;
       continue;
     }
 
@@ -692,9 +694,14 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
     bool InMicroMipsMode = STI.inMicroMipsMode();
     const MipsInstrInfo *TII = STI.getInstrInfo();
     unsigned Opcode = (*Slot).getOpcode();
-    if (InMicroMipsMode && TII->GetInstSizeInBytes(*CurrI) == 2 &&
+    // This is complicated by the tail call optimization. For non-PIC code
+    // there is only a 32bit sized unconditional branch which can be assumed
+    // to be able to reach the target. b16 only has a range of +/- 1 KB.
+    // It's entirely possible that the target function is reachable with b16
+    // but we don't have enough information to make that decision.
+     if (InMicroMipsMode && TII->getInstSizeInBytes(*CurrI) == 2 &&
         (Opcode == Mips::JR || Opcode == Mips::PseudoIndirectBranch ||
-         Opcode == Mips::PseudoReturn))
+         Opcode == Mips::PseudoReturn || Opcode == Mips::TAILCALL))
       continue;
 
     Filler = CurrI;
@@ -704,23 +711,24 @@ bool Filler::searchRange(MachineBasicBlock &MBB, IterTy Begin, IterTy End,
   return false;
 }
 
-bool Filler::searchBackward(MachineBasicBlock &MBB, Iter Slot) const {
+bool Filler::searchBackward(MachineBasicBlock &MBB, MachineInstr &Slot) const {
   if (DisableBackwardSearch)
     return false;
 
   auto *Fn = MBB.getParent();
   RegDefsUses RegDU(*Fn->getSubtarget().getRegisterInfo());
-  MemDefsUses MemDU(Fn->getDataLayout(), Fn->getFrameInfo());
+  MemDefsUses MemDU(Fn->getDataLayout(), &Fn->getFrameInfo());
   ReverseIter Filler;
 
-  RegDU.init(*Slot);
+  RegDU.init(Slot);
 
-  if (!searchRange(MBB, ReverseIter(Slot), MBB.rend(), RegDU, MemDU, Slot,
+  MachineBasicBlock::iterator SlotI = Slot;
+  if (!searchRange(MBB, ++SlotI.getReverse(), MBB.rend(), RegDU, MemDU, Slot,
                    Filler))
     return false;
 
-  MBB.splice(std::next(Slot), &MBB, std::next(Filler).base());
-  MIBundleBuilder(MBB, Slot, std::next(Slot, 2));
+  MBB.splice(std::next(SlotI), &MBB, Filler.getReverse());
+  MIBundleBuilder(MBB, SlotI, std::next(SlotI, 2));
   ++UsefulSlots;
   return true;
 }
@@ -776,8 +784,8 @@ bool Filler::searchSuccBBs(MachineBasicBlock &MBB, Iter Slot) const {
   if (HasMultipleSuccs) {
     IM.reset(new LoadFromStackOrConst());
   } else {
-    const MachineFrameInfo *MFI = Fn->getFrameInfo();
-    IM.reset(new MemDefsUses(Fn->getDataLayout(), MFI));
+    const MachineFrameInfo &MFI = Fn->getFrameInfo();
+    IM.reset(new MemDefsUses(Fn->getDataLayout(), &MFI));
   }
 
   if (!searchRange(MBB, SuccBB->begin(), SuccBB->end(), RegDU, *IM, Slot,
diff --git a/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
index 11e191ad6d82..8c3024810d27 100644
--- a/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsEVAInstrFormats.td
@@ -50,7 +50,7 @@ def OPCODE6_TLBINVF    : OPCODE6<0b000100>;
 def OPCODE6_CACHEE     : OPCODE6<0b011011>;
 def OPCODE6_PREFE      : OPCODE6<0b100011>;
 
-def OPGROUP_COP0       : OPGROUP<0b010000>;
+def OPGROUP_COP0_TLB   : OPGROUP<0b010000>;
 
 //===----------------------------------------------------------------------===//
 //
@@ -77,7 +77,7 @@ class SPECIAL3_EVA_LOAD_STORE_FM<OPCODE6 Operation> : MipsEVAInst {
 class TLB_FM<OPCODE6 Operation> : MipsEVAInst {
   bits<32> Inst;
 
-  let Inst{31-26} = OPGROUP_COP0.Value;
+  let Inst{31-26} = OPGROUP_COP0_TLB.Value;
   let Inst{25} = 1;       // CO
   let Inst{24-6} = 0;
   let Inst{5-0} = Operation.Value;
diff --git a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
index 19c201d26b24..29f3e2c07e04 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFastISel.cpp
@@ -31,6 +31,9 @@
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "mips-fastisel"
 
 using namespace llvm;
 
@@ -95,10 +98,10 @@ class MipsFastISel final : public FastISel {
   // Convenience variables to avoid some queries.
   LLVMContext *Context;
 
+  bool fastLowerArguments() override;
   bool fastLowerCall(CallLoweringInfo &CLI) override;
   bool fastLowerIntrinsicCall(const IntrinsicInst *II) override;
 
-  bool TargetSupported;
   bool UnsupportedFPMode; // To allow fast-isel to proceed and just not handle
   // floating point but not reject doing fast-isel in other
   // situations
@@ -195,6 +198,9 @@ private:
   bool processCallArgs(CallLoweringInfo &CLI, SmallVectorImpl<MVT> &ArgVTs,
                        unsigned &NumBytes);
   bool finishCall(CallLoweringInfo &CLI, MVT RetVT, unsigned NumBytes);
+  const MipsABIInfo &getABI() const {
+    return static_cast<const MipsTargetMachine &>(TM).getABI();
+  }
 
 public:
   // Backend specific FastISel code.
@@ -205,12 +211,7 @@ public:
         TII(*Subtarget->getInstrInfo()), TLI(*Subtarget->getTargetLowering()) {
     MFI = funcInfo.MF->getInfo<MipsFunctionInfo>();
     Context = &funcInfo.Fn->getContext();
-    bool ISASupported = !Subtarget->hasMips32r6() &&
-                        !Subtarget->inMicroMipsMode() && Subtarget->hasMips32();
-    TargetSupported =
-        ISASupported && TM.isPositionIndependent() &&
-        (static_cast<const MipsTargetMachine &>(TM).getABI().IsO32());
-    UnsupportedFPMode = Subtarget->isFP64bit();
+    UnsupportedFPMode = Subtarget->isFP64bit() || Subtarget->useSoftFloat();
   }
 
   unsigned fastMaterializeAlloca(const AllocaInst *AI) override;
@@ -285,9 +286,6 @@ unsigned MipsFastISel::emitLogicalOp(unsigned ISDOpc, MVT RetVT,
 }
 
 unsigned MipsFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
-  if (!TargetSupported)
-    return 0;
-
   assert(TLI.getValueType(DL, AI->getType(), true) == MVT::i32 &&
          "Alloca should always return a pointer.");
 
@@ -398,9 +396,6 @@ unsigned MipsFastISel::materializeExternalCallSym(MCSymbol *Sym) {
 // Materialize a constant into a register, and return the register
 // number (or zero if we failed to handle it).
 unsigned MipsFastISel::fastMaterializeConstant(const Constant *C) {
-  if (!TargetSupported)
-    return 0;
-
   EVT CEVT = TLI.getValueType(DL, C->getType(), true);
 
   // Only handle simple types.
@@ -443,14 +438,14 @@ bool MipsFastISel::computeAddress(const Value *Obj, Address &Addr) {
   }
   case Instruction::GetElementPtr: {
     Address SavedAddr = Addr;
-    uint64_t TmpOffset = Addr.getOffset();
+    int64_t TmpOffset = Addr.getOffset();
     // Iterate through the GEP folding the constants into offsets where
     // we can.
     gep_type_iterator GTI = gep_type_begin(U);
     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end(); i != e;
          ++i, ++GTI) {
       const Value *Op = *i;
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
@@ -761,8 +756,8 @@ bool MipsFastISel::emitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
   if (Addr.isFIBase()) {
     unsigned FI = Addr.getFI();
     unsigned Align = 4;
-    unsigned Offset = Addr.getOffset();
-    MachineFrameInfo &MFI = *MF->getFrameInfo();
+    int64_t Offset = Addr.getOffset();
+    MachineFrameInfo &MFI = MF->getFrameInfo();
     MachineMemOperand *MMO = MF->getMachineMemOperand(
         MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
         MFI.getObjectSize(FI), Align);
@@ -812,8 +807,8 @@ bool MipsFastISel::emitStore(MVT VT, unsigned SrcReg, Address &Addr,
   if (Addr.isFIBase()) {
     unsigned FI = Addr.getFI();
     unsigned Align = 4;
-    unsigned Offset = Addr.getOffset();
-    MachineFrameInfo &MFI = *MF->getFrameInfo();
+    int64_t Offset = Addr.getOffset();
+    MachineFrameInfo &MFI = MF->getFrameInfo();
     MachineMemOperand *MMO = MF->getMachineMemOperand(
         MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
         MFI.getObjectSize(FI), Align);
@@ -970,9 +965,13 @@ bool MipsFastISel::selectFPExt(const Instruction *I) {
 bool MipsFastISel::selectSelect(const Instruction *I) {
   assert(isa<SelectInst>(I) && "Expected a select instruction.");
 
+  DEBUG(dbgs() << "selectSelect\n");
+
   MVT VT;
-  if (!isTypeSupported(I->getType(), VT))
+  if (!isTypeSupported(I->getType(), VT) || UnsupportedFPMode) {
+    DEBUG(dbgs() << ".. .. gave up (!isTypeSupported || UnsupportedFPMode)\n");
     return false;
+  }
 
   unsigned CondMovOpc;
   const TargetRegisterClass *RC;
@@ -1249,10 +1248,191 @@ bool MipsFastISel::finishCall(CallLoweringInfo &CLI, MVT RetVT,
   return true;
 }
 
-bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
-  if (!TargetSupported)
+bool MipsFastISel::fastLowerArguments() {
+  DEBUG(dbgs() << "fastLowerArguments\n");
+
+  if (!FuncInfo.CanLowerReturn) {
+    DEBUG(dbgs() << ".. gave up (!CanLowerReturn)\n");
     return false;
+  }
+
+  const Function *F = FuncInfo.Fn;
+  if (F->isVarArg()) {
+    DEBUG(dbgs() << ".. gave up (varargs)\n");
+    return false;
+  }
+
+  CallingConv::ID CC = F->getCallingConv();
+  if (CC != CallingConv::C) {
+    DEBUG(dbgs() << ".. gave up (calling convention is not C)\n");
+    return false;
+  }
+
+  const ArrayRef<MCPhysReg> GPR32ArgRegs = {Mips::A0, Mips::A1, Mips::A2,
+                                            Mips::A3};
+  const ArrayRef<MCPhysReg> FGR32ArgRegs = {Mips::F12, Mips::F14};
+  const ArrayRef<MCPhysReg> AFGR64ArgRegs = {Mips::D6, Mips::D7};
+  ArrayRef<MCPhysReg>::iterator NextGPR32 = GPR32ArgRegs.begin();
+  ArrayRef<MCPhysReg>::iterator NextFGR32 = FGR32ArgRegs.begin();
+  ArrayRef<MCPhysReg>::iterator NextAFGR64 = AFGR64ArgRegs.begin();
+
+  struct AllocatedReg {
+    const TargetRegisterClass *RC;
+    unsigned Reg;
+    AllocatedReg(const TargetRegisterClass *RC, unsigned Reg)
+        : RC(RC), Reg(Reg) {}
+  };
+
+  // Only handle simple cases. i.e. All arguments are directly mapped to
+  // registers of the appropriate type.
+  SmallVector<AllocatedReg, 4> Allocation;
+  unsigned Idx = 1;
+  for (const auto &FormalArg : F->args()) {
+    if (F->getAttributes().hasAttribute(Idx, Attribute::InReg) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::StructRet) ||
+        F->getAttributes().hasAttribute(Idx, Attribute::ByVal)) {
+      DEBUG(dbgs() << ".. gave up (inreg, structret, byval)\n");
+      return false;
+    }
+
+    Type *ArgTy = FormalArg.getType();
+    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) {
+      DEBUG(dbgs() << ".. gave up (struct, array, or vector)\n");
+      return false;
+    }
+
+    EVT ArgVT = TLI.getValueType(DL, ArgTy);
+    DEBUG(dbgs() << ".. " << (Idx - 1) << ": " << ArgVT.getEVTString() << "\n");
+    if (!ArgVT.isSimple()) {
+      DEBUG(dbgs() << ".. .. gave up (not a simple type)\n");
+      return false;
+    }
+
+    switch (ArgVT.getSimpleVT().SimpleTy) {
+    case MVT::i1:
+    case MVT::i8:
+    case MVT::i16:
+      if (!F->getAttributes().hasAttribute(Idx, Attribute::SExt) &&
+          !F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) {
+        // It must be any extend, this shouldn't happen for clang-generated IR
+        // so just fall back on SelectionDAG.
+        DEBUG(dbgs() << ".. .. gave up (i8/i16 arg is not extended)\n");
+        return false;
+      }
+
+      if (NextGPR32 == GPR32ArgRegs.end()) {
+        DEBUG(dbgs() << ".. .. gave up (ran out of GPR32 arguments)\n");
+        return false;
+      }
+
+      DEBUG(dbgs() << ".. .. GPR32(" << *NextGPR32 << ")\n");
+      Allocation.emplace_back(&Mips::GPR32RegClass, *NextGPR32++);
+
+      // Allocating any GPR32 prohibits further use of floating point arguments.
+      NextFGR32 = FGR32ArgRegs.end();
+      NextAFGR64 = AFGR64ArgRegs.end();
+      break;
+
+    case MVT::i32:
+      if (F->getAttributes().hasAttribute(Idx, Attribute::ZExt)) {
+        // The O32 ABI does not permit a zero-extended i32.
+        DEBUG(dbgs() << ".. .. gave up (i32 arg is zero extended)\n");
+        return false;
+      }
 
+      if (NextGPR32 == GPR32ArgRegs.end()) {
+        DEBUG(dbgs() << ".. .. gave up (ran out of GPR32 arguments)\n");
+        return false;
+      }
+
+      DEBUG(dbgs() << ".. .. GPR32(" << *NextGPR32 << ")\n");
+      Allocation.emplace_back(&Mips::GPR32RegClass, *NextGPR32++);
+
+      // Allocating any GPR32 prohibits further use of floating point arguments.
+      NextFGR32 = FGR32ArgRegs.end();
+      NextAFGR64 = AFGR64ArgRegs.end();
+      break;
+
+    case MVT::f32:
+      if (UnsupportedFPMode) {
+        DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode)\n");
+        return false;
+      }
+      if (NextFGR32 == FGR32ArgRegs.end()) {
+        DEBUG(dbgs() << ".. .. gave up (ran out of FGR32 arguments)\n");
+        return false;
+      }
+      DEBUG(dbgs() << ".. .. FGR32(" << *NextFGR32 << ")\n");
+      Allocation.emplace_back(&Mips::FGR32RegClass, *NextFGR32++);
+      // Allocating an FGR32 also allocates the super-register AFGR64, and
+      // ABI rules require us to skip the corresponding GPR32.
+      if (NextGPR32 != GPR32ArgRegs.end())
+        NextGPR32++;
+      if (NextAFGR64 != AFGR64ArgRegs.end())
+        NextAFGR64++;
+      break;
+
+    case MVT::f64:
+      if (UnsupportedFPMode) {
+        DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode)\n");
+        return false;
+      }
+      if (NextAFGR64 == AFGR64ArgRegs.end()) {
+        DEBUG(dbgs() << ".. .. gave up (ran out of AFGR64 arguments)\n");
+        return false;
+      }
+      DEBUG(dbgs() << ".. .. AFGR64(" << *NextAFGR64 << ")\n");
+      Allocation.emplace_back(&Mips::AFGR64RegClass, *NextAFGR64++);
+      // Allocating an FGR32 also allocates the super-register AFGR64, and
+      // ABI rules require us to skip the corresponding GPR32 pair.
+      if (NextGPR32 != GPR32ArgRegs.end())
+        NextGPR32++;
+      if (NextGPR32 != GPR32ArgRegs.end())
+        NextGPR32++;
+      if (NextFGR32 != FGR32ArgRegs.end())
+        NextFGR32++;
+      break;
+
+    default:
+      DEBUG(dbgs() << ".. .. gave up (unknown type)\n");
+      return false;
+    }
+
+    ++Idx;
+  }
+
+  Idx = 0;
+  for (const auto &FormalArg : F->args()) {
+    unsigned SrcReg = Allocation[Idx].Reg;
+    unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, Allocation[Idx].RC);
+    // FIXME: Unfortunately it's necessary to emit a copy from the livein copy.
+    // Without this, EmitLiveInCopies may eliminate the livein if its only
+    // use is a bitcast (which isn't turned into an instruction).
+    unsigned ResultReg = createResultReg(Allocation[Idx].RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg)
+        .addReg(DstReg, getKillRegState(true));
+    updateValueMap(&FormalArg, ResultReg);
+    ++Idx;
+  }
+
+  // Calculate the size of the incoming arguments area.
+  // We currently reject all the cases where this would be non-zero.
+  unsigned IncomingArgSizeInBytes = 0;
+
+  // Account for the reserved argument area on ABI's that have one (O32).
+  // It seems strange to do this on the caller side but it's necessary in
+  // SelectionDAG's implementation.
+  IncomingArgSizeInBytes = std::min(getABI().GetCalleeAllocdArgSizeInBytes(CC),
+                                    IncomingArgSizeInBytes);
+
+  MF->getInfo<MipsFunctionInfo>()->setFormalArgInfo(IncomingArgSizeInBytes,
+                                                    false);
+
+  return true;
+}
+
+bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
   CallingConv::ID CC = CLI.CallConv;
   bool IsTailCall = CLI.IsTailCall;
   bool IsVarArg = CLI.IsVarArg;
@@ -1337,9 +1517,6 @@ bool MipsFastISel::fastLowerCall(CallLoweringInfo &CLI) {
 }
 
 bool MipsFastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
-  if (!TargetSupported)
-    return false;
-
   switch (II->getIntrinsicID()) {
   default:
     return false;
@@ -1435,6 +1612,8 @@ bool MipsFastISel::selectRet(const Instruction *I) {
   const Function &F = *I->getParent()->getParent();
   const ReturnInst *Ret = cast<ReturnInst>(I);
 
+  DEBUG(dbgs() << "selectRet\n");
+
   if (!FuncInfo.CanLowerReturn)
     return false;
 
@@ -1495,6 +1674,12 @@ bool MipsFastISel::selectRet(const Instruction *I) {
     if (RVVT == MVT::f128)
       return false;
 
+    // Do not handle FGR64 returns for now.
+    if (RVVT == MVT::f64 && UnsupportedFPMode) {
+      DEBUG(dbgs() << ".. .. gave up (UnsupportedFPMode\n");
+      return false;
+    }
+
     MVT DestVT = VA.getValVT();
     // Special handling for extended integers.
     if (RVVT != DestVT) {
@@ -1778,8 +1963,6 @@ bool MipsFastISel::selectShift(const Instruction *I) {
 }
 
 bool MipsFastISel::fastSelectInstruction(const Instruction *I) {
-  if (!TargetSupported)
-    return false;
   switch (I->getOpcode()) {
   default:
     break;
diff --git a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
index fe6f332f2bdf..b2cf03976f81 100644
--- a/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsFrameLowering.cpp
@@ -92,30 +92,30 @@ const MipsFrameLowering *MipsFrameLowering::create(const MipsSubtarget &ST) {
 // if it needs dynamic stack realignment, if frame pointer elimination is
 // disabled, or if the frame address is taken.
 bool MipsFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
 
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
-      MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken() ||
+      MFI.hasVarSizedObjects() || MFI.isFrameAddressTaken() ||
       TRI->needsStackRealignment(MF);
 }
 
 bool MipsFrameLowering::hasBP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo *TRI = STI.getRegisterInfo();
 
-  return MFI->hasVarSizedObjects() && TRI->needsStackRealignment(MF);
+  return MFI.hasVarSizedObjects() && TRI->needsStackRealignment(MF);
 }
 
 uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
 
   int64_t Offset = 0;
 
   // Iterate over fixed sized objects.
-  for (int I = MFI->getObjectIndexBegin(); I != 0; ++I)
-    Offset = std::max(Offset, -MFI->getObjectOffset(I));
+  for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
+    Offset = std::max(Offset, -MFI.getObjectOffset(I));
 
   // Conservatively assume all callee-saved registers will be saved.
   for (const MCPhysReg *R = TRI.getCalleeSavedRegs(&MF); *R; ++R) {
@@ -123,19 +123,19 @@ uint64_t MipsFrameLowering::estimateStackSize(const MachineFunction &MF) const {
     Offset = alignTo(Offset + Size, Size);
   }
 
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned MaxAlign = MFI.getMaxAlignment();
 
   // Check that MaxAlign is not zero if there is a stack object that is not a
   // callee-saved spill.
-  assert(!MFI->getObjectIndexEnd() || MaxAlign);
+  assert(!MFI.getObjectIndexEnd() || MaxAlign);
 
   // Iterate over other objects.
-  for (unsigned I = 0, E = MFI->getObjectIndexEnd(); I != E; ++I)
-    Offset = alignTo(Offset + MFI->getObjectSize(I), MaxAlign);
+  for (unsigned I = 0, E = MFI.getObjectIndexEnd(); I != E; ++I)
+    Offset = alignTo(Offset + MFI.getObjectSize(I), MaxAlign);
 
   // Call frame.
-  if (MFI->adjustsStack() && hasReservedCallFrame(MF))
-    Offset = alignTo(Offset + MFI->getMaxCallFrameSize(),
+  if (MFI.adjustsStack() && hasReservedCallFrame(MF))
+    Offset = alignTo(Offset + MFI.getMaxCallFrameSize(),
                      std::max(MaxAlign, getStackAlignment()));
 
   return alignTo(Offset, getStackAlignment());
diff --git a/contrib/llvm/lib/Target/Mips/MipsHazardSchedule.cpp b/contrib/llvm/lib/Target/Mips/MipsHazardSchedule.cpp
index 10022ba60680..31b86124bc8d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsHazardSchedule.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsHazardSchedule.cpp
@@ -7,10 +7,10 @@
 //
 //===----------------------------------------------------------------------===//
 /// \file
-/// This pass is used to workaround certain pipeline hazards. For now, this covers
-/// compact branch hazards. In future this pass can be extended to other pipeline
-/// hazards, such as various MIPS1 hazards, processor errata that require
-/// instruction reorganization, etc.
+/// This pass is used to workaround certain pipeline hazards. For now, this
+/// covers compact branch hazards. In future this pass can be extended to other
+/// pipeline hazards, such as various MIPS1 hazards, processor errata that
+/// require instruction reorganization, etc.
 ///
 /// This pass has to run after the delay slot filler as that pass can introduce
 /// pipeline hazards, hence the existing hazard recognizer is not suitable.
@@ -18,8 +18,8 @@
 /// Hazards handled: forbidden slots for MIPSR6.
 ///
 /// A forbidden slot hazard occurs when a compact branch instruction is executed
-/// and the adjacent instruction in memory is a control transfer instruction such
-/// as a branch or jump, ERET, ERETNC, DERET, WAIT and PAUSE.
+/// and the adjacent instruction in memory is a control transfer instruction
+/// such as a branch or jump, ERET, ERETNC, DERET, WAIT and PAUSE.
 ///
 /// For example:
 ///
@@ -70,13 +70,13 @@ class MipsHazardSchedule : public MachineFunctionPass {
 public:
   MipsHazardSchedule() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "Mips Hazard Schedule"; }
+  StringRef getPassName() const override { return "Mips Hazard Schedule"; }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
@@ -91,20 +91,43 @@ FunctionPass *llvm::createMipsHazardSchedule() {
   return new MipsHazardSchedule();
 }
 
-// Find the next real instruction from the current position.
-static Iter getNextMachineInstr(Iter Position) {
+// Find the next real instruction from the current position in current basic
+// block.
+static Iter getNextMachineInstrInBB(Iter Position) {
   Iter I = Position, E = Position->getParent()->end();
-  I = std::find_if_not(I, E, [](const Iter &Insn) { return Insn->isTransient(); });
-  assert(I != E);
+  I = std::find_if_not(I, E,
+                       [](const Iter &Insn) { return Insn->isTransient(); });
+
   return I;
 }
 
+// Find the next real instruction from the current position, looking through
+// basic block boundaries.
+static Iter getNextMachineInstr(Iter Position, MachineBasicBlock *Parent) {
+  if (Position == Parent->end()) {
+    MachineBasicBlock *Succ = Parent->getNextNode();
+    if (Succ != nullptr && Parent->isSuccessor(Succ)) {
+      Position = Succ->begin();
+      Parent = Succ;
+    } else {
+      llvm_unreachable(
+          "Should have identified the end of the function earlier!");
+    }
+  }
+
+  Iter Instr = getNextMachineInstrInBB(Position);
+  if (Instr == Parent->end()) {
+    return getNextMachineInstr(Instr, Parent);
+  }
+  return Instr;
+}
+
 bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) {
 
   const MipsSubtarget *STI =
       &static_cast<const MipsSubtarget &>(MF.getSubtarget());
 
-  // Forbidden slot hazards are only defined for MIPSR6.
+  // Forbidden slot hazards are only defined for MIPSR6 but not microMIPSR6.
   if (!STI->hasMips32r6() || STI->inMicroMipsMode())
     return false;
 
@@ -118,27 +141,17 @@ bool MipsHazardSchedule::runOnMachineFunction(MachineFunction &MF) {
       if (!TII->HasForbiddenSlot(*I))
         continue;
 
-      bool InsertNop = false;
-      // Next instruction in the basic block.
-      if (std::next(I) != FI->end() &&
-          !TII->SafeInForbiddenSlot(*getNextMachineInstr(std::next(I)))) {
-        InsertNop = true;
-      } else {
-        // Next instruction in the physical successor basic block.
-        for (auto *Succ : FI->successors()) {
-          if (FI->isLayoutSuccessor(Succ) &&
-              getNextMachineInstr(Succ->begin()) != Succ->end() &&
-              !TII->SafeInForbiddenSlot(*getNextMachineInstr(Succ->begin()))) {
-            InsertNop = true;
-            break;
-          }
-        }
+      Iter Inst;
+      bool LastInstInFunction =
+          std::next(I) == FI->end() && std::next(FI) == MF.end();
+      if (!LastInstInFunction) {
+        Inst = getNextMachineInstr(std::next(I), &*FI);
       }
 
-      if (InsertNop) {
+      if (LastInstInFunction || !TII->SafeInForbiddenSlot(*Inst)) {
         Changed = true;
-        MIBundleBuilder(&*I).append(
-            BuildMI(MF, I->getDebugLoc(), TII->get(Mips::NOP)));
+        MIBundleBuilder(&*I)
+            .append(BuildMI(MF, I->getDebugLoc(), TII->get(Mips::NOP)));
         NumInsertedNops++;
       }
     }
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 83763a64ab6a..0e1173f1c617 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -108,8 +108,26 @@ bool MipsDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
   return false;
 }
 
-bool MipsDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
-                                        SDValue &Offset) const {
+bool MipsDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base,
+                                           SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
+bool MipsDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base,
+                                               SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
+bool MipsDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base,
+                                               SDValue &Offset) const {
+  llvm_unreachable("Unimplemented function.");
+  return false;
+}
+
+bool MipsDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base,
+                                               SDValue &Offset) const {
   llvm_unreachable("Unimplemented function.");
   return false;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
index 289832a8064e..20bdd4aa8f5f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h
@@ -35,7 +35,7 @@ public:
       : SelectionDAGISel(TM, OL), Subtarget(nullptr) {}
 
   // Pass Name
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "MIPS DAG->DAG Pattern Instruction Selection";
   }
 
@@ -78,8 +78,17 @@ private:
                                    SDValue &Offset) const;
 
   /// Match addr+simm10 and addr
-  virtual bool selectIntAddrMSA(SDValue Addr, SDValue &Base,
-                                SDValue &Offset) const;
+  virtual bool selectIntAddrSImm10(SDValue Addr, SDValue &Base,
+                                   SDValue &Offset) const;
+
+  virtual bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) const;
+
+  virtual bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) const;
+
+  virtual bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base,
+                                       SDValue &Offset) const;
 
   virtual bool selectAddr16(SDValue Addr, SDValue &Base, SDValue &Offset);
   virtual bool selectAddr16SP(SDValue Addr, SDValue &Base, SDValue &Offset);
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
index d63a62ade90b..9c511bd77822 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp
@@ -268,7 +268,7 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
 
   // Mips Custom Operations
-  setOperationAction(ISD::BR_JT,              MVT::Other, Custom);
+  setOperationAction(ISD::BR_JT,              MVT::Other, Expand);
   setOperationAction(ISD::GlobalAddress,      MVT::i32,   Custom);
   setOperationAction(ISD::BlockAddress,       MVT::i32,   Custom);
   setOperationAction(ISD::GlobalTLSAddress,   MVT::i32,   Custom);
@@ -426,6 +426,13 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
   setTargetDAGCombine(ISD::ADD);
   setTargetDAGCombine(ISD::AssertZext);
 
+  if (ABI.IsO32()) {
+    // These libcalls are not available in 32-bit.
+    setLibcallName(RTLIB::SHL_I128, nullptr);
+    setLibcallName(RTLIB::SRL_I128, nullptr);
+    setLibcallName(RTLIB::SRA_I128, nullptr);
+  }
+
   setMinFunctionAlignment(Subtarget.isGP64bit() ? 3 : 2);
 
   // The arguments on the stack are defined in terms of 4-byte slots on O32
@@ -451,9 +458,19 @@ const MipsTargetLowering *MipsTargetLowering::create(const MipsTargetMachine &TM
 FastISel *
 MipsTargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
                                   const TargetLibraryInfo *libInfo) const {
-  if (!funcInfo.MF->getTarget().Options.EnableFastISel)
-    return TargetLowering::createFastISel(funcInfo, libInfo);
-  return Mips::createFastISel(funcInfo, libInfo);
+  const MipsTargetMachine &TM =
+      static_cast<const MipsTargetMachine &>(funcInfo.MF->getTarget());
+
+  // We support only the standard encoding [MIPS32,MIPS32R5] ISAs.
+  bool UseFastISel = TM.Options.EnableFastISel && Subtarget.hasMips32() &&
+                     !Subtarget.hasMips32r6() && !Subtarget.inMips16Mode() &&
+                     !Subtarget.inMicroMipsMode();
+
+  // Disable if we don't generate PIC or the ABI isn't O32.
+  if (!TM.isPositionIndependent() || !TM.getABI().IsO32())
+    UseFastISel = false;
+
+  return UseFastISel ? Mips::createFastISel(funcInfo, libInfo) : nullptr;
 }
 
 EVT MipsTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
@@ -893,7 +910,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
 {
   switch (Op.getOpcode())
   {
-  case ISD::BR_JT:              return lowerBR_JT(Op, DAG);
   case ISD::BRCOND:             return lowerBRCOND(Op, DAG);
   case ISD::ConstantPool:       return lowerConstantPool(Op, DAG);
   case ISD::GlobalAddress:      return lowerGlobalAddress(Op, DAG);
@@ -1659,40 +1675,6 @@ MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr &MI,
   return BB;
 }
 
-//===----------------------------------------------------------------------===//
-//  Misc Lower Operation implementation
-//===----------------------------------------------------------------------===//
-SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const {
-  SDValue Chain = Op.getOperand(0);
-  SDValue Table = Op.getOperand(1);
-  SDValue Index = Op.getOperand(2);
-  SDLoc DL(Op);
-  auto &TD = DAG.getDataLayout();
-  EVT PTy = getPointerTy(TD);
-  unsigned EntrySize =
-      DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD);
-
-  Index = DAG.getNode(ISD::MUL, DL, PTy, Index,
-                      DAG.getConstant(EntrySize, DL, PTy));
-  SDValue Addr = DAG.getNode(ISD::ADD, DL, PTy, Index, Table);
-
-  EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
-  Addr = DAG.getExtLoad(
-      ISD::SEXTLOAD, DL, PTy, Chain, Addr,
-      MachinePointerInfo::getJumpTable(DAG.getMachineFunction()), MemVT);
-  Chain = Addr.getValue(1);
-
-  if (isPositionIndependent() || ABI.IsN64()) {
-    // For PIC, the sequence is:
-    // BRIND(load(Jumptable + index) + RelocBase)
-    // RelocBase can be JumpTable, GOT or some sort of global base.
-    Addr = DAG.getNode(ISD::ADD, DL, PTy, Addr,
-                       getPICJumpTableRelocBase(Table, DAG));
-  }
-
-  return DAG.getNode(ISD::BRIND, DL, MVT::Other, Chain, Addr);
-}
-
 SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   // The first operand is the chain, the second is the condition, the third is
   // the block to branch to if the condition is true.
@@ -1755,7 +1737,8 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
     const MipsTargetObjectFile *TLOF =
         static_cast<const MipsTargetObjectFile *>(
             getTargetMachine().getObjFileLowering());
-    if (TLOF->IsGlobalInSmallSection(GV, getTargetMachine()))
+    const GlobalObject *GO = GV->getBaseObject();
+    if (GO && TLOF->IsGlobalInSmallSection(GO, getTargetMachine()))
       // %gp_rel relocation
       return getAddrGPRel(N, SDLoc(N), Ty, DAG);
 
@@ -2099,8 +2082,8 @@ lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   assert((cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue() == 0) &&
          "Frame address can only be determined for current frame.");
 
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
   SDLoc DL(Op);
   SDValue FrameAddr = DAG.getCopyFromReg(
@@ -2118,10 +2101,10 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op,
          "Return address can be determined only for current frame.");
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MVT VT = Op.getSimpleValueType();
   unsigned RA = ABI.IsN64() ? Mips::RA_64 : Mips::RA;
-  MFI->setReturnAddressIsTaken(true);
+  MFI.setReturnAddressIsTaken(true);
 
   // Return RA, which contains the return address. Mark it an implicit live-in.
   unsigned Reg = MF.addLiveIn(RA, getRegClassFor(VT));
@@ -2398,9 +2381,9 @@ SDValue MipsTargetLowering::lowerEH_DWARF_CFA(SDValue Op,
 
   // Return a fixed StackObject with offset 0 which points to the old stack
   // pointer.
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   EVT ValTy = Op->getValueType(0);
-  int FI = MFI->CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false);
+  int FI = MFI.CreateFixedObject(Op.getValueSizeInBits() / 8, 0, false);
   return DAG.getFrameIndex(FI, ValTy);
 }
 
@@ -2563,8 +2546,8 @@ SDValue MipsTargetLowering::passArgOnStack(SDValue StackPtr, unsigned Offset,
     return DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo());
   }
 
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  int FI = MFI->CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  int FI = MFI.CreateFixedObject(Arg.getValueSizeInBits() / 8, Offset, false);
   SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   return DAG.getStore(Chain, DL, Arg, FIN, MachinePointerInfo(),
                       /* Alignment = */ 0, MachineMemOperand::MOVolatile);
@@ -2647,7 +2630,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   bool IsVarArg                         = CLI.IsVarArg;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetFrameLowering *TFL = Subtarget.getFrameLowering();
   MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
   bool IsPIC = isPositionIndependent();
@@ -2667,11 +2650,20 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NextStackOffset = CCInfo.getNextStackOffset();
 
-  // Check if it's really possible to do a tail call.
-  if (IsTailCall)
+  // Check if it's really possible to do a tail call. Restrict it to functions
+  // that are part of this compilation unit.
+  bool InternalLinkage = false;
+  if (IsTailCall) {
     IsTailCall = isEligibleForTailCallOptimization(
         CCInfo, NextStackOffset, *MF.getInfo<MipsFunctionInfo>());
-
+     if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+      InternalLinkage = G->getGlobal()->hasInternalLinkage();
+      IsTailCall &= (InternalLinkage || G->getGlobal()->hasLocalLinkage() ||
+                     G->getGlobal()->hasPrivateLinkage() ||
+                     G->getGlobal()->hasHiddenVisibility() ||
+                     G->getGlobal()->hasProtectedVisibility());
+     }
+  }
   if (!IsTailCall && CLI.CS && CLI.CS->isMustTailCall())
     report_fatal_error("failed to perform tail call elimination on a call "
                        "site marked musttail");
@@ -2754,19 +2746,19 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       break;
     case CCValAssign::SExtUpper:
       UseUpperBits = true;
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case CCValAssign::SExt:
       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, LocVT, Arg);
       break;
     case CCValAssign::ZExtUpper:
       UseUpperBits = true;
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case CCValAssign::ZExt:
       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, LocVT, Arg);
       break;
     case CCValAssign::AExtUpper:
       UseUpperBits = true;
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case CCValAssign::AExt:
       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, LocVT, Arg);
       break;
@@ -2806,9 +2798,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   // node so that legalize doesn't hack it.
   bool IsPICCall = (ABI.IsN64() || IsPIC); // true if calls are translated to
                                            // jalr $25
-  bool GlobalOrExternal = false, InternalLinkage = false, IsCallReloc = false;
   SDValue CalleeLo;
   EVT Ty = Callee.getValueType();
+  bool GlobalOrExternal = false, IsCallReloc = false;
 
   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
     if (IsPICCall) {
@@ -2859,8 +2851,10 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   getOpndList(Ops, RegsToPass, IsPICCall, GlobalOrExternal, InternalLinkage,
               IsCallReloc, CLI, Callee, Chain);
 
-  if (IsTailCall)
+  if (IsTailCall) {
+    MF.getFrameInfo().setHasTailCall();
     return DAG.getNode(MipsISD::TailCall, DL, MVT::Other, Ops);
+  }
 
   Chain = DAG.getNode(MipsISD::JmpLink, DL, NodeTys, Ops);
   SDValue InFlag = Chain.getValue(1);
@@ -3006,7 +3000,7 @@ SDValue MipsTargetLowering::LowerFormalArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   MipsFI->setVarArgsFrameIndex(0);
@@ -3105,8 +3099,8 @@ SDValue MipsTargetLowering::LowerFormalArguments(
       assert(VA.isMemLoc());
 
       // The stack pointer offset is relative to the caller stack frame.
-      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
-                                      VA.getLocMemOffset(), true);
+      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
+                                     VA.getLocMemOffset(), true);
 
       // Create load nodes to retrieve arguments from the stack
       SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
@@ -3224,19 +3218,19 @@ MipsTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       break;
     case CCValAssign::AExtUpper:
       UseUpperBits = true;
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case CCValAssign::AExt:
       Val = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Val);
       break;
     case CCValAssign::ZExtUpper:
       UseUpperBits = true;
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case CCValAssign::ZExt:
       Val = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Val);
       break;
     case CCValAssign::SExtUpper:
       UseUpperBits = true;
-      // Fallthrough
+      LLVM_FALLTHROUGH;
     case CCValAssign::SExt:
       Val = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Val);
       break;
@@ -3706,7 +3700,7 @@ void MipsTargetLowering::copyByValRegs(
     unsigned FirstReg, unsigned LastReg, const CCValAssign &VA,
     MipsCCState &State) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned GPRSizeInBytes = Subtarget.getGPRSizeInBytes();
   unsigned NumRegs = LastReg - FirstReg;
   unsigned RegAreaSize = NumRegs * GPRSizeInBytes;
@@ -3723,7 +3717,7 @@ void MipsTargetLowering::copyByValRegs(
 
   // Create frame object.
   EVT PtrTy = getPointerTy(DAG.getDataLayout());
-  int FI = MFI->CreateFixedObject(FrameObjSize, FrameObjOffset, true);
+  int FI = MFI.CreateFixedObject(FrameObjSize, FrameObjOffset, true);
   SDValue FIN = DAG.getFrameIndex(FI, PtrTy);
   InVals.push_back(FIN);
 
@@ -3751,7 +3745,7 @@ void MipsTargetLowering::passByValArg(
     SDValue Chain, const SDLoc &DL,
     std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
     SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
-    MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg,
+    MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg, unsigned FirstReg,
     unsigned LastReg, const ISD::ArgFlagsTy &Flags, bool isLittle,
     const CCValAssign &VA) const {
   unsigned ByValSizeInBytes = Flags.getByValSize();
@@ -3853,7 +3847,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
   MVT RegTy = MVT::getIntegerVT(RegSizeInBytes * 8);
   const TargetRegisterClass *RC = getRegClassFor(RegTy);
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   // Offset of the first variable argument from stack pointer.
@@ -3869,7 +3863,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
 
   // Record the frame index of the first variable argument
   // which is a value necessary to VASTART.
-  int FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
+  int FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
   MipsFI->setVarArgsFrameIndex(FI);
 
   // Copy the integer registers that have not been used for argument passing
@@ -3880,7 +3874,7 @@ void MipsTargetLowering::writeVarArgRegs(std::vector<SDValue> &OutChains,
        ++I, VaArgOffset += RegSizeInBytes) {
     unsigned Reg = addLiveIn(MF, ArgRegs[I], RC);
     SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, RegTy);
-    FI = MFI->CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
+    FI = MFI.CreateFixedObject(RegSizeInBytes, VaArgOffset, true);
     SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     SDValue Store =
         DAG.getStore(Chain, DL, ArgValue, PtrOff, MachinePointerInfo());
diff --git a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
index 2ded118003e6..cddf0903ca6a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsISelLowering.h
@@ -296,6 +296,10 @@ namespace llvm {
       return SrcAS < 256 && DestAS < 256;
     }
 
+    bool isJumpTableRelative() const override {
+      return getTargetMachine().isPositionIndependent() || ABI.IsN64();
+    }
+
   protected:
     SDValue getGlobalReg(SelectionDAG &DAG, EVT Ty) const;
 
@@ -426,7 +430,6 @@ namespace llvm {
                             TargetLowering::CallLoweringInfo &CLI) const;
 
     // Lower Operand specifics
-    SDValue lowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
@@ -471,7 +474,7 @@ namespace llvm {
     void passByValArg(SDValue Chain, const SDLoc &DL,
                       std::deque<std::pair<unsigned, SDValue>> &RegsToPass,
                       SmallVectorImpl<SDValue> &MemOpChains, SDValue StackPtr,
-                      MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg,
+                      MachineFrameInfo &MFI, SelectionDAG &DAG, SDValue Arg,
                       unsigned FirstReg, unsigned LastReg,
                       const ISD::ArgFlagsTy &Flags, bool isLittle,
                       const CCValAssign &VA) const;
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
index 87b02bdfdfdb..ab7aa9dcdcae 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFPU.td
@@ -284,6 +284,16 @@ defm CEIL_W  : ROUND_M<"ceil.w.d", II_CEIL>, ABSS_FM<0xe, 17>, ISA_MIPS2;
 defm FLOOR_W : ROUND_M<"floor.w.d", II_FLOOR>, ABSS_FM<0xf, 17>, ISA_MIPS2;
 defm CVT_W   : ROUND_M<"cvt.w.d", II_CVT>, ABSS_FM<0x24, 17>;
 
+let AdditionalPredicates = [NotInMicroMips] in {
+  def RECIP_S : MMRel, ABSS_FT<"recip.s", FGR32Opnd, FGR32Opnd, II_RECIP_S>,
+                ABSS_FM<0b010101, 0x10>, INSN_MIPS4_32R2;
+  def RECIP_D : MMRel, ABSS_FT<"recip.d", FGR64Opnd, FGR64Opnd, II_RECIP_D>,
+                ABSS_FM<0b010101, 0x11>, INSN_MIPS4_32R2;
+  def RSQRT_S : MMRel, ABSS_FT<"rsqrt.s", FGR32Opnd, FGR32Opnd, II_RSQRT_S>,
+                ABSS_FM<0b010110, 0x10>, INSN_MIPS4_32R2;
+  def RSQRT_D : MMRel, ABSS_FT<"rsqrt.d", FGR64Opnd, FGR64Opnd, II_RSQRT_D>,
+                ABSS_FM<0b010110, 0x11>, INSN_MIPS4_32R2;
+}
 let DecoderNamespace = "Mips64" in {
   let AdditionalPredicates = [NotInMicroMips] in {
   def ROUND_L_S : ABSS_FT<"round.l.s", FGR64Opnd, FGR32Opnd, II_ROUND>,
@@ -361,8 +371,10 @@ defm FSQRT : ABSS_M<"sqrt.d", II_SQRT_D, fsqrt>, ABSS_FM<0x4, 17>, ISA_MIPS2;
 // regardless of register aliasing.
 
 /// Move Control Registers From/To CPU Registers
-def CFC1 : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>, MFC1_FM<2>;
-def CTC1 : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, MFC1_FM<6>;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def CFC1 : MMRel, MFC1_FT<"cfc1", GPR32Opnd, CCROpnd, II_CFC1>, MFC1_FM<2>;
+  def CTC1 : MMRel, MTC1_FT<"ctc1", CCROpnd, GPR32Opnd, II_CTC1>, MFC1_FM<6>;
+}
 def MFC1 : MMRel, MFC1_FT<"mfc1", GPR32Opnd, FGR32Opnd, II_MFC1,
                           bitconvert>, MFC1_FM<0>;
 def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1,
@@ -599,6 +611,25 @@ def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>,
 def : MipsInstAlias<"bc1fl $offset", (BC1FL FCC0, brtarget:$offset)>,
       ISA_MIPS2_NOT_32R6_64R6, HARDFLOAT;
 
+def : MipsInstAlias
+        <"s.s $fd, $addr", (SWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
+      ISA_MIPS2, HARDFLOAT;
+def : MipsInstAlias
+        <"s.d $fd, $addr", (SDC1 AFGR64Opnd:$fd, mem_simm16:$addr), 0>,
+      FGR_32, ISA_MIPS2, HARDFLOAT;
+def : MipsInstAlias
+        <"s.d $fd, $addr", (SDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
+      FGR_64, ISA_MIPS2, HARDFLOAT;
+
+def : MipsInstAlias
+        <"l.s $fd, $addr", (LWC1 FGR32Opnd:$fd, mem_simm16:$addr), 0>,
+      ISA_MIPS2, HARDFLOAT;
+def : MipsInstAlias
+        <"l.d $fd, $addr", (LDC1 AFGR64Opnd:$fd, mem_simm16:$addr), 0>,
+      FGR_32, ISA_MIPS2, HARDFLOAT;
+def : MipsInstAlias
+        <"l.d $fd, $addr", (LDC164 FGR64Opnd:$fd, mem_simm16:$addr), 0>,
+      FGR_64, ISA_MIPS2, HARDFLOAT;
 //===----------------------------------------------------------------------===//
 // Floating Point Patterns
 //===----------------------------------------------------------------------===//
@@ -614,9 +645,9 @@ def : MipsPat<(f64 (sint_to_fp GPR32Opnd:$src)),
               (PseudoCVT_D32_W GPR32Opnd:$src)>, FGR_32;
 def : MipsPat<(MipsTruncIntFP AFGR64Opnd:$src),
               (TRUNC_W_D32 AFGR64Opnd:$src)>, FGR_32;
-def : MipsPat<(f32 (fround AFGR64Opnd:$src)),
+def : MipsPat<(f32 (fpround AFGR64Opnd:$src)),
               (CVT_S_D32 AFGR64Opnd:$src)>, FGR_32;
-def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+def : MipsPat<(f64 (fpextend FGR32Opnd:$src)),
               (CVT_D32_S FGR32Opnd:$src)>, FGR_32;
 
 def : MipsPat<(f64 fpimm0), (DMTC1 ZERO_64)>, FGR_64;
@@ -636,9 +667,9 @@ def : MipsPat<(MipsTruncIntFP FGR32Opnd:$src),
 def : MipsPat<(MipsTruncIntFP FGR64Opnd:$src),
               (TRUNC_L_D64 FGR64Opnd:$src)>, FGR_64;
 
-def : MipsPat<(f32 (fround FGR64Opnd:$src)),
+def : MipsPat<(f32 (fpround FGR64Opnd:$src)),
               (CVT_S_D64 FGR64Opnd:$src)>, FGR_64;
-def : MipsPat<(f64 (fextend FGR32Opnd:$src)),
+def : MipsPat<(f64 (fpextend FGR32Opnd:$src)),
               (CVT_D64_S FGR32Opnd:$src)>, FGR_64;
 
 // Patterns for loads/stores with a reg+imm operand.
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
index 0bbb49b6b08e..1437fb75434a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrFormats.td
@@ -98,11 +98,15 @@ class MipsInst<dag outs, dag ins, string asmstr, list<dag> pattern,
   bit isCTI            = 0; // Any form of Control Transfer Instruction.
                             // Required for MIPSR6
   bit hasForbiddenSlot = 0; // Instruction has a forbidden slot.
+  bit IsPCRelativeLoad = 0; // Load instruction with implicit source register
+                            // ($pc) and with explicit offset and destination
+                            // register
 
   // TSFlags layout should be kept in sync with MipsInstrInfo.h.
   let TSFlags{3-0}   = FormBits;
   let TSFlags{4}     = isCTI;
   let TSFlags{5}     = hasForbiddenSlot;
+  let TSFlags{6}     = IsPCRelativeLoad;
 
   let DecoderNamespace = "Mips";
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
index 800d834e0ab8..19af1914c819 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp
@@ -58,7 +58,7 @@ MachineMemOperand *
 MipsInstrInfo::GetMemOperand(MachineBasicBlock &MBB, int FI,
                              MachineMemOperand::Flags Flags) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   unsigned Align = MFI.getObjectAlignment(FI);
 
   return MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(MF, FI),
@@ -113,13 +113,15 @@ void MipsInstrInfo::BuildCondBr(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   MIB.addMBB(TBB);
 }
 
-unsigned MipsInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned MipsInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                      MachineBasicBlock *TBB,
                                      MachineBasicBlock *FBB,
                                      ArrayRef<MachineOperand> Cond,
-                                     const DebugLoc &DL) const {
+                                     const DebugLoc &DL,
+                                     int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
+  assert(!BytesAdded && "code size not handled");
 
   // # of condition operands:
   //  Unconditional branches: 0
@@ -145,16 +147,21 @@ unsigned MipsInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return 1;
 }
 
-unsigned MipsInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned MipsInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                     int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::reverse_iterator I = MBB.rbegin(), REnd = MBB.rend();
-  MachineBasicBlock::reverse_iterator FirstBr;
   unsigned removed;
 
   // Skip all the debug instructions.
   while (I != REnd && I->isDebugValue())
     ++I;
 
-  FirstBr = I;
+  if (I == REnd)
+    return 0;
+
+  MachineBasicBlock::iterator FirstBr = ++I.getReverse();
 
   // Up to 2 branches are removed.
   // Note that indirect branches are not removed.
@@ -162,14 +169,14 @@ unsigned MipsInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
     if (!getAnalyzableBrOpc(I->getOpcode()))
       break;
 
-  MBB.erase(I.base(), FirstBr.base());
+  MBB.erase((--I).getReverse(), FirstBr);
 
   return removed;
 }
 
-/// ReverseBranchCondition - Return the inverse opcode of the
+/// reverseBranchCondition - Return the inverse opcode of the
 /// specified Branch instruction.
-bool MipsInstrInfo::ReverseBranchCondition(
+bool MipsInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   assert( (Cond.size() && Cond.size() <= 3) &&
           "Invalid Mips branch condition!");
@@ -269,7 +276,9 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
   if (Subtarget.inMicroMipsMode()) {
     switch (Opcode) {
     case Mips::BNE:
+    case Mips::BNE_MM:
     case Mips::BEQ:
+    case Mips::BEQ_MM:
     // microMIPS has NE,EQ branches that do not have delay slots provided one
     // of the operands is zero.
       if (I->getOperand(1).getReg() == Subtarget.getABI().GetZeroReg())
@@ -280,6 +289,7 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
     case Mips::JR:
     case Mips::PseudoReturn:
     case Mips::PseudoIndirectBranch:
+    case Mips::TAILCALLREG:
       canUseShortMicroMipsCTI = true;
       break;
     }
@@ -302,12 +312,14 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
     case Mips::BAL:
       return Mips::BALC;
     case Mips::BEQ:
+    case Mips::BEQ_MM:
       if (canUseShortMicroMipsCTI)
         return Mips::BEQZC_MM;
       else if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
         return 0;
       return Mips::BEQC;
     case Mips::BNE:
+    case Mips::BNE_MM:
       if (canUseShortMicroMipsCTI)
         return Mips::BNEZC_MM;
       else if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
@@ -337,11 +349,28 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
       return Mips::BLTUC;
     case Mips::BLTZ:
       return Mips::BLTZC;
+    case Mips::BEQ64:
+      if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
+        return 0;
+      return Mips::BEQC64;
+    case Mips::BNE64:
+      if (I->getOperand(0).getReg() == I->getOperand(1).getReg())
+        return 0;
+      return Mips::BNEC64;
+    case Mips::BGTZ64:
+      return Mips::BGTZC64;
+    case Mips::BGEZ64:
+      return Mips::BGEZC64;
+    case Mips::BLTZ64:
+      return Mips::BLTZC64;
+    case Mips::BLEZ64:
+      return Mips::BLEZC64;
     // For MIPSR6, the instruction 'jic' can be used for these cases. Some
     // tools will accept 'jrc reg' as an alias for 'jic 0, $reg'.
     case Mips::JR:
     case Mips::PseudoReturn:
     case Mips::PseudoIndirectBranch:
+    case Mips::TAILCALLREG:
       if (canUseShortMicroMipsCTI)
         return Mips::JRC16_MM;
       return Mips::JIC;
@@ -350,6 +379,7 @@ unsigned MipsInstrInfo::getEquivalentCompactForm(
     case Mips::JR64:
     case Mips::PseudoReturn64:
     case Mips::PseudoIndirectBranch64:
+    case Mips::TAILCALLREG64:
       return Mips::JIC64;
     case Mips::JALR64Pseudo:
       return Mips::JIALC64;
@@ -378,7 +408,7 @@ bool MipsInstrInfo::HasForbiddenSlot(const MachineInstr &MI) const {
 }
 
 /// Return the number of bytes of code the specified instruction may be.
-unsigned MipsInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned MipsInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   switch (MI.getOpcode()) {
   default:
     return MI.getDesc().getSize();
@@ -399,17 +429,22 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
                                   MachineBasicBlock::iterator I) const {
   MachineInstrBuilder MIB;
 
-  // Certain branches have two forms: e.g beq $1, $zero, dst vs beqz $1, dest
+  // Certain branches have two forms: e.g beq $1, $zero, dest vs beqz $1, dest
   // Pick the zero form of the branch for readable assembly and for greater
   // branch distance in non-microMIPS mode.
+  // Additional MIPSR6 does not permit the use of register $zero for compact
+  // branches.
   // FIXME: Certain atomic sequences on mips64 generate 32bit references to
   // Mips::ZERO, which is incorrect. This test should be updated to use
   // Subtarget.getABI().GetZeroReg() when those atomic sequences and others
   // are fixed.
-  bool BranchWithZeroOperand =
-      (I->isBranch() && !I->isPseudo() && I->getOperand(1).isReg() &&
-       (I->getOperand(1).getReg() == Mips::ZERO ||
-        I->getOperand(1).getReg() == Mips::ZERO_64));
+  int ZeroOperandPosition = -1;
+  bool BranchWithZeroOperand = false;
+  if (I->isBranch() && !I->isPseudo()) {
+    auto TRI = I->getParent()->getParent()->getSubtarget().getRegisterInfo();
+    ZeroOperandPosition = I->findRegisterUseOperandIdx(Mips::ZERO, false, TRI);
+    BranchWithZeroOperand = ZeroOperandPosition != -1;
+  }
 
   if (BranchWithZeroOperand) {
     switch (NewOpc) {
@@ -425,6 +460,12 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
     case Mips::BLTC:
       NewOpc = Mips::BLTZC;
       break;
+    case Mips::BEQC64:
+      NewOpc = Mips::BEQZC64;
+      break;
+    case Mips::BNEC64:
+      NewOpc = Mips::BNEZC64;
+      break;
     }
   }
 
@@ -446,17 +487,11 @@ MipsInstrInfo::genInstrWithNewOpc(unsigned NewOpc,
 
     MIB.addImm(0);
 
- } else if (BranchWithZeroOperand) {
-    // For MIPSR6 and microMIPS branches with an explicit zero operand, copy
-    // everything after the zero.
-     MIB.addOperand(I->getOperand(0));
-
-    for (unsigned J = 2, E = I->getDesc().getNumOperands(); J < E; ++J) {
-      MIB.addOperand(I->getOperand(J));
-    }
   } else {
-    // All other cases copy all other operands.
     for (unsigned J = 0, E = I->getDesc().getNumOperands(); J < E; ++J) {
+      if (BranchWithZeroOperand && (unsigned)ZeroOperandPosition == J)
+        continue;
+
       MIB.addOperand(I->getOperand(J));
     }
   }
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
index 2e55012eec40..347b9187d08c 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.h
@@ -55,14 +55,16 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
   BranchType analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                            MachineBasicBlock *&FBB,
@@ -92,7 +94,7 @@ public:
   virtual unsigned getOppositeBranchOpc(unsigned Opc) const = 0;
 
   /// Return the number of bytes of code the specified instruction may be.
-  unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   void storeRegToStackSlot(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
diff --git a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
index 296f6e9b08bd..5bc48336121a 100644
--- a/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -157,6 +157,8 @@ def HasMips3_32r2 :   Predicate<"Subtarget->hasMips3_32r2()">,
                       AssemblerPredicate<"FeatureMips3_32r2">;
 def HasMips3     :    Predicate<"Subtarget->hasMips3()">,
                       AssemblerPredicate<"FeatureMips3">;
+def NotMips3     :    Predicate<"!Subtarget->hasMips3()">,
+                      AssemblerPredicate<"!FeatureMips3">;
 def HasMips4_32  :    Predicate<"Subtarget->hasMips4_32()">,
                       AssemblerPredicate<"FeatureMips4_32">;
 def NotMips4_32  :    Predicate<"!Subtarget->hasMips4_32()">,
@@ -201,6 +203,8 @@ def InMips16Mode :    Predicate<"Subtarget->inMips16Mode()">,
                       AssemblerPredicate<"FeatureMips16">;
 def HasCnMips    :    Predicate<"Subtarget->hasCnMips()">,
                       AssemblerPredicate<"FeatureCnMips">;
+def NotCnMips    :    Predicate<"!Subtarget->hasCnMips()">,
+                      AssemblerPredicate<"!FeatureCnMips">;
 def RelocNotPIC :     Predicate<"!TM.isPositionIndependent()">;
 def RelocPIC    :     Predicate<"TM.isPositionIndependent()">;
 def NoNaNsFPMath :    Predicate<"TM.Options.NoNaNsFPMath">;
@@ -242,6 +246,9 @@ class PTR_64 { list<Predicate> PTRPredicates = [IsPTR64bit]; }
 //        subtractive predicate will hopefully keep us under the 32 predicate
 //        limit long enough to develop an alternative way to handle P1||P2
 //        predicates.
+class ISA_MIPS1_NOT_MIPS3 {
+  list<Predicate> InsnPredicates = [NotMips3];
+}
 class ISA_MIPS1_NOT_4_32 {
   list<Predicate> InsnPredicates = [NotMips4_32];
 }
@@ -300,6 +307,9 @@ class INSN_MIPS3_32_NOT_32R6_64R6 {
 // The portions of MIPS-III that were also added to MIPS32
 class INSN_MIPS3_32R2 { list<Predicate> InsnPredicates = [HasMips3_32r2]; }
 
+// The portions of MIPS-IV that were also added to MIPS32.
+class INSN_MIPS4_32 { list <Predicate> InsnPredicates = [HasMips4_32]; }
+
 // The portions of MIPS-IV that were also added to MIPS32 but were removed in
 // MIPS32r6 and MIPS64r6.
 class INSN_MIPS4_32_NOT_32R6_64R6 {
@@ -312,6 +322,11 @@ class INSN_MIPS4_32R2_NOT_32R6_64R6 {
   list<Predicate> InsnPredicates = [HasMips4_32r2, NotMips32r6, NotMips64r6];
 }
 
+// The portions of MIPS-IV that were also added to MIPS32r2.
+class INSN_MIPS4_32R2 {
+  list<Predicate> InsnPredicates = [HasMips4_32r2];
+}
+
 // The portions of MIPS-V that were also added to MIPS32r2 but were removed in
 // MIPS32r6 and MIPS64r6.
 class INSN_MIPS5_32R2_NOT_32R6_64R6 {
@@ -322,6 +337,10 @@ class ASE_CNMIPS {
   list<Predicate> InsnPredicates = [HasCnMips];
 }
 
+class NOT_ASE_CNMIPS {
+  list<Predicate> InsnPredicates = [NotCnMips];
+}
+
 class ASE_MIPS64_CNMIPS {
   list<Predicate> InsnPredicates = [HasMips64, HasCnMips];
 }
@@ -413,6 +432,15 @@ class ConstantSImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
   let DiagnosticType = "SImm" # Bits # "_" # Offset;
 }
 
+class SimmLslAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
+                                  int Shift = 0> : AsmOperandClass {
+  let Name = "Simm" # Bits # "_Lsl" # Shift;
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<" # Bits # ", " # Shift # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "SImm" # Bits # "_Lsl" # Shift;
+}
+
 class ConstantUImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = [],
                                   int Offset = 0> : AsmOperandClass {
   let Name = "ConstantUImm" # Bits # "_" # Offset;
@@ -450,6 +478,16 @@ class UImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
   let DiagnosticType = "UImm" # Bits;
 }
 
+// Generic case - only to support certain assembly pseudo instructions.
+class UImmAnyAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
+    : AsmOperandClass {
+  let Name = "ImmAny";
+  let RenderMethod = "addConstantUImmOperands<32>";
+  let PredicateMethod = "isSImm<" # Bits # ">";
+  let SuperClasses = Supers;
+  let DiagnosticType = "ImmAny";
+}
+
 // AsmOperandClasses require a strict ordering which is difficult to manage
 // as a hierarchy. Instead, we use a linear ordering and impose an order that
 // is in some places arbitrary.
@@ -473,8 +511,13 @@ class UImmAsmOperandClass<int Bits, list<AsmOperandClass> Supers = []>
 //     uimm5 < uimm5_64, and uimm5 < vsplat_uimm5
 //   This is entirely arbitrary. We need an ordering and what we pick is
 //   unimportant since only one is possible for a given mnemonic.
+
+def UImm32CoercedAsmOperandClass : UImmAnyAsmOperandClass<33, []> {
+  let Name = "UImm32_Coerced";
+  let DiagnosticType = "UImm32_Coerced";
+}
 def SImm32RelaxedAsmOperandClass
-    : SImmAsmOperandClass<32, []> {
+    : SImmAsmOperandClass<32, [UImm32CoercedAsmOperandClass]> {
   let Name = "SImm32_Relaxed";
   let PredicateMethod = "isAnyImm<32>";
   let DiagnosticType = "SImm32_Relaxed";
@@ -485,12 +528,29 @@ def ConstantUImm26AsmOperandClass
     : ConstantUImmAsmOperandClass<26, [SImm32AsmOperandClass]>;
 def ConstantUImm20AsmOperandClass
     : ConstantUImmAsmOperandClass<20, [ConstantUImm26AsmOperandClass]>;
+def ConstantSImm19Lsl2AsmOperandClass : AsmOperandClass {
+  let Name = "SImm19Lsl2";
+  let RenderMethod = "addImmOperands";
+  let PredicateMethod = "isScaledSImm<19, 2>";
+  let SuperClasses = [ConstantUImm20AsmOperandClass];
+  let DiagnosticType = "SImm19_Lsl2";
+}
 def UImm16RelaxedAsmOperandClass
     : UImmAsmOperandClass<16, [ConstantUImm20AsmOperandClass]> {
   let Name = "UImm16_Relaxed";
   let PredicateMethod = "isAnyImm<16>";
   let DiagnosticType = "UImm16_Relaxed";
 }
+// Similar to the relaxed classes which take an SImm and render it as
+// an UImm, this takes a UImm and renders it as an SImm.
+def UImm16AltRelaxedAsmOperandClass
+    : SImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]> {
+  let Name = "UImm16_AltRelaxed";
+  let PredicateMethod = "isUImm<16>";
+  let DiagnosticType = "UImm16_AltRelaxed";
+}
+// FIXME: One of these should probably have UImm16AsmOperandClass as the
+//        superclass instead of UImm16RelaxedasmOPerandClass.
 def UImm16AsmOperandClass
     : UImmAsmOperandClass<16, [UImm16RelaxedAsmOperandClass]>;
 def SImm16RelaxedAsmOperandClass
@@ -611,6 +671,9 @@ def ConstantImmzAsmOperandClass : AsmOperandClass {
   let DiagnosticType = "Immz";
 }
 
+def Simm19Lsl2AsmOperand
+    : SimmLslAsmOperandClass<19, [], 2>;
+
 def MipsJumpTargetAsmOperand : AsmOperandClass {
   let Name = "JumpTarget";
   let ParserMethod = "parseJumpTarget";
@@ -645,7 +708,7 @@ def imm64: Operand<i64>;
 def simm19_lsl2 : Operand<i32> {
   let EncoderMethod = "getSimm19Lsl2Encoding";
   let DecoderMethod = "DecodeSimm19Lsl2";
-  let ParserMatchClass = MipsJumpTargetAsmOperand;
+  let ParserMatchClass = Simm19Lsl2AsmOperand;
 }
 
 def simm18_lsl3 : Operand<i32> {
@@ -766,6 +829,11 @@ def uimm16_64_relaxed : Operand<i64> {
       !cast<AsmOperandClass>("UImm16RelaxedAsmOperandClass");
 }
 
+def uimm16_altrelaxed : Operand<i32> {
+  let PrintMethod = "printUImm<16>";
+  let ParserMatchClass =
+      !cast<AsmOperandClass>("UImm16AltRelaxedAsmOperandClass");
+}
 // Like uimm5 but reports a less confusing error for 32-63 when
 // an instruction alias permits that.
 def uimm5_report_uimm6 : Operand<i32> {
@@ -845,6 +913,10 @@ def simm16_64 : Operand<i64> {
   let ParserMatchClass = !cast<AsmOperandClass>("SImm16AsmOperandClass");
 }
 
+// like simm32 but coerces simm32 to uimm32.
+def uimm32_coerced : Operand<i32> {
+  let ParserMatchClass = !cast<AsmOperandClass>("UImm32CoercedAsmOperandClass");
+}
 // Like simm32 but coerces uimm32 to simm32.
 def simm32_relaxed : Operand<i32> {
   let DecoderMethod = "DecodeSImmWithOffsetAndScale<32>";
@@ -1033,10 +1105,6 @@ def immSExt8  : PatLeaf<(imm), [{ return isInt<8>(N->getSExtValue()); }]>;
 // e.g. addi, andi
 def immSExt16  : PatLeaf<(imm), [{ return isInt<16>(N->getSExtValue()); }]>;
 
-// Node immediate fits as 15-bit sign extended on target immediate.
-// e.g. addi, andi
-def immSExt15  : PatLeaf<(imm), [{ return isInt<15>(N->getSExtValue()); }]>;
-
 // Node immediate fits as 7-bit zero extended on target immediate.
 def immZExt7 : PatLeaf<(imm), [{ return isUInt<7>(N->getZExtValue()); }]>;
 
@@ -1052,11 +1120,23 @@ def immZExt16  : PatLeaf<(imm), [{
 }], LO16>;
 
 // Immediate can be loaded with LUi (32-bit int with lower 16-bit cleared).
-def immLow16Zero : PatLeaf<(imm), [{
+def immSExt32Low16Zero : PatLeaf<(imm), [{
   int64_t Val = N->getSExtValue();
   return isInt<32>(Val) && !(Val & 0xffff);
 }]>;
 
+// Zero-extended 32-bit unsigned int with lower 16-bit cleared.
+def immZExt32Low16Zero : PatLeaf<(imm), [{
+  uint64_t Val = N->getZExtValue();
+  return isUInt<32>(Val) && !(Val & 0xffff);
+}]>;
+
+// Note immediate fits as a 32 bit signed extended on target immediate.
+def immSExt32  : PatLeaf<(imm), [{ return isInt<32>(N->getSExtValue()); }]>;
+
+// Note immediate fits as a 32 bit zero extended on target immediate.
+def immZExt32  : PatLeaf<(imm), [{ return isUInt<32>(N->getZExtValue()); }]>;
+
 // shamt field must fit in 5 bits.
 def immZExt5 : ImmLeaf<i32, [{return Imm == (Imm & 0x1f);}]>;
 
@@ -1086,7 +1166,13 @@ def addrRegImm :
 def addrDefault :
   ComplexPattern<iPTR, 2, "selectAddrDefault", [frameindex]>;
 
-def addrimm10 : ComplexPattern<iPTR, 2, "selectIntAddrMSA", [frameindex]>;
+def addrimm10 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10", [frameindex]>;
+def addrimm10lsl1 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10Lsl1",
+                                   [frameindex]>;
+def addrimm10lsl2 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10Lsl2",
+                                   [frameindex]>;
+def addrimm10lsl3 : ComplexPattern<iPTR, 2, "selectIntAddrSImm10Lsl3",
+                                   [frameindex]>;
 
 //===----------------------------------------------------------------------===//
 // Instructions specific format
@@ -1352,14 +1438,12 @@ let isCall=1, hasDelaySlot=1, isCTI=1, Defs = [RA] in {
 
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, hasDelaySlot = 1,
     hasExtraSrcRegAllocReq = 1, isCTI = 1, Defs = [AT] in {
-  class TailCall<Instruction JumpInst> :
+  class TailCall<Instruction JumpInst, DAGOperand Opnd> :
     PseudoSE<(outs), (ins calltarget:$target), [], II_J>,
-    PseudoInstExpansion<(JumpInst jmptarget:$target)>;
+    PseudoInstExpansion<(JumpInst Opnd:$target)>;
 
-  class TailCallReg<RegisterOperand RO, Instruction JRInst,
-                    RegisterOperand ResRO = RO> :
-    PseudoSE<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>,
-    PseudoInstExpansion<(JRInst ResRO:$rs)>;
+  class TailCallReg<RegisterOperand RO> :
+    MipsPseudo<(outs), (ins RO:$rs), [(MipsTailCall RO:$rs)], II_JR>;
 }
 
 class BAL_BR_Pseudo<Instruction RealInst> :
@@ -1686,33 +1770,35 @@ let AdditionalPredicates = [NotInMicroMips] in {
 }
 def ADDi  : MMRel, ArithLogicI<"addi", simm16_relaxed, GPR32Opnd, II_ADDI>, ADDI_FM<0x8>,
             ISA_MIPS1_NOT_32R6_64R6;
-def SLTi  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
-            SLTI_FM<0xa>;
-def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
-            SLTI_FM<0xb>;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def SLTi  : MMRel, SetCC_I<"slti", setlt, simm16, immSExt16, GPR32Opnd>,
+              SLTI_FM<0xa>;
+  def SLTiu : MMRel, SetCC_I<"sltiu", setult, simm16, immSExt16, GPR32Opnd>,
+              SLTI_FM<0xb>;
+}
 def LUi   : MMRel, LoadUpper<"lui", GPR32Opnd, uimm16_relaxed>, LUI_FM;
 let AdditionalPredicates = [NotInMicroMips] in {
-/// Arithmetic Instructions (3-Operand, R-Type)
-def ADDu  : MMRel, StdMMR6Rel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
-            ADD_FM<0, 0x21>;
-def SUBu  : MMRel, StdMMR6Rel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
-            ADD_FM<0, 0x23>;
+  /// Arithmetic Instructions (3-Operand, R-Type)
+  def ADDu  : MMRel, StdMMR6Rel, ArithLogicR<"addu", GPR32Opnd, 1, II_ADDU, add>,
+              ADD_FM<0, 0x21>;
+  def SUBu  : MMRel, StdMMR6Rel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>,
+              ADD_FM<0, 0x23>;
 }
 let Defs = [HI0, LO0] in
 def MUL   : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>,
             ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6;
 def ADD   : MMRel, StdMMR6Rel, ArithLogicR<"add", GPR32Opnd, 1, II_ADD>, ADD_FM<0, 0x20>;
 def SUB   : MMRel, StdMMR6Rel, ArithLogicR<"sub", GPR32Opnd, 0, II_SUB>, ADD_FM<0, 0x22>;
-def SLT   : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>;
-def SLTu  : MMRel, SetCC_R<"sltu", setult, GPR32Opnd>, ADD_FM<0, 0x2b>;
 let AdditionalPredicates = [NotInMicroMips] in {
-def AND   : MMRel, StdMMR6Rel, ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>,
-            ADD_FM<0, 0x24>;
-def OR    : MMRel, StdMMR6Rel, ArithLogicR<"or", GPR32Opnd, 1, II_OR, or>,
-            ADD_FM<0, 0x25>;
-def XOR   : MMRel, StdMMR6Rel, ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>,
-            ADD_FM<0, 0x26>;
-def NOR   : MMRel, StdMMR6Rel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
+  def SLT   : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>;
+  def SLTu  : MMRel, SetCC_R<"sltu", setult, GPR32Opnd>, ADD_FM<0, 0x2b>;
+  def AND   : MMRel, StdMMR6Rel, ArithLogicR<"and", GPR32Opnd, 1, II_AND, and>,
+              ADD_FM<0, 0x24>;
+  def OR    : MMRel, StdMMR6Rel, ArithLogicR<"or", GPR32Opnd, 1, II_OR, or>,
+              ADD_FM<0, 0x25>;
+  def XOR   : MMRel, StdMMR6Rel, ArithLogicR<"xor", GPR32Opnd, 1, II_XOR, xor>,
+              ADD_FM<0, 0x26>;
+  def NOR   : MMRel, StdMMR6Rel, LogicNOR<"nor", GPR32Opnd>, ADD_FM<0, 0x27>;
 }
 
 /// Shift Instructions
@@ -1794,11 +1880,10 @@ let DecoderNamespace = "COP3_" in {
   def SDC3 : SW_FT3<"sdc3", COP3Opnd, II_SDC3, store>, LW_FM<0x3f>,
              ISA_MIPS2;
 }
-}
 
-def SYNC : MMRel, StdMMR6Rel, SYNC_FT<"sync">, SYNC_FM,
-           ISA_MIPS32;
-def SYNCI : MMRel, StdMMR6Rel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
+  def SYNC : MMRel, StdMMR6Rel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS2;
+  def SYNCI : MMRel, StdMMR6Rel, SYNCI_FT<"synci">, SYNCI_FM, ISA_MIPS32R2;
+}
 
 let AdditionalPredicates = [NotInMicroMips] in {
   def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd, uimm10, II_TEQ>, TEQ_FM<0x34>, ISA_MIPS2;
@@ -1898,8 +1983,12 @@ def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>,
 def BLTZALL : MMRel, BGEZAL_FT<"bltzall", brtarget, GPR32Opnd, 0>,
               BGEZAL_FM<0x12>, ISA_MIPS2_NOT_32R6_64R6;
 def BAL_BR : BAL_BR_Pseudo<BGEZAL>;
-def TAILCALL : TailCall<J>;
-def TAILCALL_R : TailCallReg<GPR32Opnd, JR>;
+
+let Predicates = [NotInMicroMips] in { 
+  def TAILCALL : TailCall<J, jmptarget>;
+}
+
+def TAILCALLREG : TailCallReg<GPR32Opnd>;
 
 // Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64
 // then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA.
@@ -2177,6 +2266,21 @@ def : MipsInstAlias<"dror $rd, $imm",
 def ABSMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd), (ins GPR32Opnd:$rs),
                                  "abs\t$rd, $rs">;
 
+def SEQMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                 (ins GPR32Opnd:$rs, GPR32Opnd:$rt),
+                                 "seq $rd, $rs, $rt">, NOT_ASE_CNMIPS;
+
+def : MipsInstAlias<"seq $rd, $rs",
+                    (SEQMacro GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rs), 0>,
+                    NOT_ASE_CNMIPS;
+
+def SEQIMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rd),
+                                  (ins GPR32Opnd:$rs, simm32_relaxed:$imm),
+                                  "seq $rd, $rs, $imm">, NOT_ASE_CNMIPS;
+
+def : MipsInstAlias<"seq $rd, $imm",
+                    (SEQIMacro GPR32Opnd:$rd, GPR32Opnd:$rd, simm32:$imm), 0>,
+                    NOT_ASE_CNMIPS;
 //===----------------------------------------------------------------------===//
 // Instruction aliases
 //===----------------------------------------------------------------------===//
@@ -2219,17 +2323,31 @@ def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>;
 def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32;
 def : MipsInstAlias<"neg $rt, $rs",
                     (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
-def : MipsInstAlias<"negu $rt",
-                    (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 0>;
+def : MipsInstAlias<"neg $rt",
+                    (SUB GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>;
 def : MipsInstAlias<"negu $rt, $rs",
                     (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rs), 1>;
-def : MipsInstAlias<
+def : MipsInstAlias<"negu $rt",
+                    (SUBu GPR32Opnd:$rt, ZERO, GPR32Opnd:$rt), 1>;
+let AdditionalPredicates = [NotInMicroMips] in {
+  def : MipsInstAlias<
+          "sgt $rd, $rs, $rt",
+          (SLT GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
+          "sgt $rs, $rt",
+          (SLT GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
+          "sgtu $rd, $rs, $rt",
+          (SLTu GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
+          "sgtu $$rs, $rt",
+          (SLTu GPR32Opnd:$rs, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<
           "slt $rs, $rt, $imm",
           (SLTi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
-def : MipsInstAlias<
+  def : MipsInstAlias<
           "sltu $rt, $rs, $imm",
           (SLTiu GPR32Opnd:$rt, GPR32Opnd:$rs, simm32_relaxed:$imm), 0>;
-let AdditionalPredicates = [NotInMicroMips] in {
   def : MipsInstAlias<
           "and $rs, $rt, $imm",
           (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm32_relaxed:$imm), 0>;
@@ -2251,6 +2369,9 @@ let AdditionalPredicates = [NotInMicroMips] in {
   def : MipsInstAlias<
           "not $rt, $rs",
           (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>;
+  def : MipsInstAlias<
+          "not $rt",
+          (NOR GPR32Opnd:$rt, GPR32Opnd:$rt, ZERO), 0>;
   def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>;
 }
 def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, COP0Opnd:$rd, 0), 0>;
@@ -2310,6 +2431,16 @@ let AdditionalPredicates = [NotInMicroMips] in {
                       (SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
   def : MipsInstAlias<"srl $rd, $rt, $rs",
                       (SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>;
+  def : MipsInstAlias<"sll $rd, $rt",
+                      (SLLV GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rt), 0>;
+  def : MipsInstAlias<"sra $rd, $rt",
+                      (SRAV GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rt), 0>;
+  def : MipsInstAlias<"srl $rd, $rt",
+                      (SRLV GPR32Opnd:$rd, GPR32Opnd:$rd, GPR32Opnd:$rt), 0>;
+  def : MipsInstAlias<"seh $rd", (SEH GPR32Opnd:$rd, GPR32Opnd:$rd), 0>,
+                     ISA_MIPS32R2;
+  def : MipsInstAlias<"seb $rd", (SEB GPR32Opnd:$rd, GPR32Opnd:$rd), 0>,
+                     ISA_MIPS32R2;
 }
 def : MipsInstAlias<"sdbbp", (SDBBP 0)>, ISA_MIPS32_NOT_32R6_64R6;
 def : MipsInstAlias<"sync",
@@ -2318,11 +2449,12 @@ def : MipsInstAlias<"sync",
 // Assembler Pseudo Instructions
 //===----------------------------------------------------------------------===//
 
-// We use i32imm on li/la to defer range checking to the assembler.
+// We use uimm32_coerced to accept a 33 bit signed number that is rendered into
+// a 32 bit number.
 class LoadImmediate32<string instr_asm, Operand Od, RegisterOperand RO> :
   MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm32),
                      !strconcat(instr_asm, "\t$rt, $imm32")> ;
-def LoadImm32 : LoadImmediate32<"li", i32imm, GPR32Opnd>;
+def LoadImm32 : LoadImmediate32<"li", uimm32_coerced, GPR32Opnd>;
 
 class LoadAddressFromReg32<string instr_asm, Operand MemOpnd,
                            RegisterOperand RO> :
@@ -2441,6 +2573,18 @@ def Ulhu : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
 def Ulw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
                             "ulw\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
 
+def Ush : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+                            "ush\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def Usw : MipsAsmPseudoInst<(outs GPR32Opnd:$rt), (ins mem:$addr),
+                            "usw\t$rt, $addr">; //, ISA_MIPS1_NOT_32R6_64R6;
+
+def LDMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
+                                (ins mem_simm16:$addr), "ld $rt, $addr">,
+                                ISA_MIPS1_NOT_MIPS3;
+def SDMacro : MipsAsmPseudoInst<(outs GPR32Opnd:$rt),
+                                (ins mem_simm16:$addr), "sd $rt, $addr">,
+                                ISA_MIPS1_NOT_MIPS3;
 //===----------------------------------------------------------------------===//
 //  Arbitrary patterns that map to one or more instructions
 //===----------------------------------------------------------------------===//
@@ -2452,19 +2596,24 @@ class LoadRegImmPat<Instruction LoadInst, ValueType ValTy, PatFrag Node> :
 class StoreRegImmPat<Instruction StoreInst, ValueType ValTy> :
   MipsPat<(store ValTy:$v, addrRegImm:$a), (StoreInst ValTy:$v, addrRegImm:$a)>;
 
+// Materialize constants.
+multiclass MaterializeImms<ValueType VT, Register ZEROReg,
+                           Instruction ADDiuOp, Instruction LUiOp,
+                           Instruction ORiOp> {
+
 // Small immediates
-let AdditionalPredicates = [NotInMicroMips] in {
-def : MipsPat<(i32 immSExt16:$in),
-              (ADDiu ZERO, imm:$in)>;
-def : MipsPat<(i32 immZExt16:$in),
-              (ORi ZERO, imm:$in)>;
-}
-def : MipsPat<(i32 immLow16Zero:$in),
-              (LUi (HI16 imm:$in))>;
+def : MipsPat<(VT immSExt16:$imm), (ADDiuOp ZEROReg, imm:$imm)>;
+def : MipsPat<(VT immZExt16:$imm), (ORiOp ZEROReg, imm:$imm)>;
+
+// Bits 32-16 set, sign/zero extended.
+def : MipsPat<(VT immSExt32Low16Zero:$imm), (LUiOp (HI16 imm:$imm))>;
 
 // Arbitrary immediates
-def : MipsPat<(i32 imm:$imm),
-          (ORi (LUi (HI16 imm:$imm)), (LO16 imm:$imm))>;
+def : MipsPat<(VT immSExt32:$imm), (ORiOp (LUiOp (HI16 imm:$imm)), (LO16 imm:$imm))>;
+}
+
+let AdditionalPredicates = [NotInMicroMips] in
+  defm : MaterializeImms<i32, ZERO, ADDiu, LUi, ORi>;
 
 // Carry MipsPatterns
 let AdditionalPredicates = [NotInMicroMips] in {
@@ -2558,38 +2707,39 @@ let AdditionalPredicates = [NotInMicroMips] in {
 def : MipsPat<(store (i32 0), addr:$dst), (SW ZERO, addr:$dst)>;
 
 // brcond patterns
-multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BNEOp,
-                      Instruction SLTOp, Instruction SLTuOp, Instruction SLTiOp,
-                      Instruction SLTiuOp, Register ZEROReg> {
+multiclass BrcondPats<RegisterClass RC, Instruction BEQOp, Instruction BEQOp1,
+                      Instruction BNEOp, Instruction SLTOp, Instruction SLTuOp,
+                      Instruction SLTiOp, Instruction SLTiuOp,
+                      Register ZEROReg> {
 def : MipsPat<(brcond (i32 (setne RC:$lhs, 0)), bb:$dst),
               (BNEOp RC:$lhs, ZEROReg, bb:$dst)>;
 def : MipsPat<(brcond (i32 (seteq RC:$lhs, 0)), bb:$dst),
               (BEQOp RC:$lhs, ZEROReg, bb:$dst)>;
 
 def : MipsPat<(brcond (i32 (setge RC:$lhs, RC:$rhs)), bb:$dst),
-              (BEQ (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setuge RC:$lhs, RC:$rhs)), bb:$dst),
-              (BEQ (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTuOp RC:$lhs, RC:$rhs), ZERO, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setge RC:$lhs, immSExt16:$rhs)), bb:$dst),
-              (BEQ (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTiOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setuge RC:$lhs, immSExt16:$rhs)), bb:$dst),
-              (BEQ (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTiuOp RC:$lhs, immSExt16:$rhs), ZERO, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setgt RC:$lhs, immSExt16Plus1:$rhs)), bb:$dst),
-              (BEQ (SLTiOp RC:$lhs, (Plus1 imm:$rhs)), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTiOp RC:$lhs, (Plus1 imm:$rhs)), ZERO, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setugt RC:$lhs, immSExt16Plus1:$rhs)), bb:$dst),
-              (BEQ (SLTiuOp RC:$lhs, (Plus1 imm:$rhs)), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTiuOp RC:$lhs, (Plus1 imm:$rhs)), ZERO, bb:$dst)>;
 
 def : MipsPat<(brcond (i32 (setle RC:$lhs, RC:$rhs)), bb:$dst),
-              (BEQ (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setule RC:$lhs, RC:$rhs)), bb:$dst),
-              (BEQ (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
+              (BEQOp1 (SLTuOp RC:$rhs, RC:$lhs), ZERO, bb:$dst)>;
 
 def : MipsPat<(brcond RC:$cond, bb:$dst),
               (BNEOp RC:$cond, ZEROReg, bb:$dst)>;
 }
-
-defm : BrcondPats<GPR32, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
-
+let AdditionalPredicates = [NotInMicroMips] in {
+  defm : BrcondPats<GPR32, BEQ, BEQ, BNE, SLT, SLTu, SLTi, SLTiu, ZERO>;
+}
 def : MipsPat<(brcond (i32 (setlt i32:$lhs, 1)), bb:$dst),
               (BLEZ i32:$lhs, bb:$dst)>;
 def : MipsPat<(brcond (i32 (setgt i32:$lhs, -1)), bb:$dst),
@@ -2608,11 +2758,12 @@ multiclass SeteqPats<RegisterClass RC, Instruction SLTiuOp, Instruction XOROp,
                 (SLTuOp ZEROReg, (XOROp RC:$lhs, RC:$rhs))>;
 }
 
-multiclass SetlePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+multiclass SetlePats<RegisterClass RC, Instruction XORiOp, Instruction SLTOp,
+                     Instruction SLTuOp> {
   def : MipsPat<(setle RC:$lhs, RC:$rhs),
-                (XORi (SLTOp RC:$rhs, RC:$lhs), 1)>;
+                (XORiOp (SLTOp RC:$rhs, RC:$lhs), 1)>;
   def : MipsPat<(setule RC:$lhs, RC:$rhs),
-                (XORi (SLTuOp RC:$rhs, RC:$lhs), 1)>;
+                (XORiOp (SLTuOp RC:$rhs, RC:$lhs), 1)>;
 }
 
 multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
@@ -2622,26 +2773,29 @@ multiclass SetgtPats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
                 (SLTuOp RC:$rhs, RC:$lhs)>;
 }
 
-multiclass SetgePats<RegisterClass RC, Instruction SLTOp, Instruction SLTuOp> {
+multiclass SetgePats<RegisterClass RC, Instruction XORiOp, Instruction SLTOp,
+                     Instruction SLTuOp> {
   def : MipsPat<(setge RC:$lhs, RC:$rhs),
-                (XORi (SLTOp RC:$lhs, RC:$rhs), 1)>;
+                (XORiOp (SLTOp RC:$lhs, RC:$rhs), 1)>;
   def : MipsPat<(setuge RC:$lhs, RC:$rhs),
-                (XORi (SLTuOp RC:$lhs, RC:$rhs), 1)>;
+                (XORiOp (SLTuOp RC:$lhs, RC:$rhs), 1)>;
 }
 
-multiclass SetgeImmPats<RegisterClass RC, Instruction SLTiOp,
-                        Instruction SLTiuOp> {
+multiclass SetgeImmPats<RegisterClass RC, Instruction XORiOp,
+                        Instruction SLTiOp, Instruction SLTiuOp> {
   def : MipsPat<(setge RC:$lhs, immSExt16:$rhs),
-                (XORi (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
+                (XORiOp (SLTiOp RC:$lhs, immSExt16:$rhs), 1)>;
   def : MipsPat<(setuge RC:$lhs, immSExt16:$rhs),
-                (XORi (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
+                (XORiOp (SLTiuOp RC:$lhs, immSExt16:$rhs), 1)>;
 }
 
-defm : SeteqPats<GPR32, SLTiu, XOR, SLTu, ZERO>;
-defm : SetlePats<GPR32, SLT, SLTu>;
-defm : SetgtPats<GPR32, SLT, SLTu>;
-defm : SetgePats<GPR32, SLT, SLTu>;
-defm : SetgeImmPats<GPR32, SLTi, SLTiu>;
+let AdditionalPredicates = [NotInMicroMips] in {
+  defm : SeteqPats<GPR32, SLTiu, XOR, SLTu, ZERO>;
+  defm : SetlePats<GPR32, XORi, SLT, SLTu>;
+  defm : SetgtPats<GPR32, SLT, SLTu>;
+  defm : SetgePats<GPR32, XORi, SLT, SLTu>;
+  defm : SetgeImmPats<GPR32, XORi, SLTi, SLTiu>;
+}
 
 // bswap pattern
 def : MipsPat<(bswap GPR32:$rt), (ROTR (WSBH GPR32:$rt), 16)>;
diff --git a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
index e721312390d2..1087d0e0140e 100644
--- a/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp
@@ -66,15 +66,13 @@ namespace {
         : MachineFunctionPass(ID), TM(tm), IsPIC(TM.isPositionIndependent()),
           ABI(static_cast<const MipsTargetMachine &>(TM).getABI()) {}
 
-    const char *getPassName() const override {
-      return "Mips Long Branch";
-    }
+    StringRef getPassName() const override { return "Mips Long Branch"; }
 
     bool runOnMachineFunction(MachineFunction &F) override;
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
   private:
@@ -157,7 +155,7 @@ void MipsLongBranch::splitMBB(MachineBasicBlock *MBB) {
   MBB->addSuccessor(Tgt);
   MF->insert(std::next(MachineFunction::iterator(MBB)), NewMBB);
 
-  NewMBB->splice(NewMBB->end(), MBB, (++LastBr).base(), MBB->end());
+  NewMBB->splice(NewMBB->end(), MBB, LastBr.getReverse(), MBB->end());
 }
 
 // Fill MBBInfos.
@@ -179,7 +177,7 @@ void MipsLongBranch::initMBBInfo() {
     // Compute size of MBB.
     for (MachineBasicBlock::instr_iterator MI = MBB->instr_begin();
          MI != MBB->instr_end(); ++MI)
-      MBBInfos[I].Size += TII->GetInstSizeInBytes(*MI);
+      MBBInfos[I].Size += TII->getInstSizeInBytes(*MI);
 
     // Search for MBB's branch instruction.
     ReverseIter End = MBB->rend();
@@ -187,7 +185,7 @@ void MipsLongBranch::initMBBInfo() {
 
     if ((Br != End) && !Br->isIndirectBranch() &&
         (Br->isConditionalBranch() || (Br->isUnconditionalBranch() && IsPIC)))
-      MBBInfos[I].Br = &*(++Br).base();
+      MBBInfos[I].Br = &*Br;
   }
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
index deb4345e2662..8b04fcb76920 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -389,10 +389,6 @@ def mulsub : PatFrag<(ops node:$wd, node:$ws, node:$wt),
 def mul_fexp2 : PatFrag<(ops node:$ws, node:$wt),
                         (fmul node:$ws, (fexp2 node:$wt))>;
 
-// Immediates
-def immSExt5 : ImmLeaf<i32, [{return isInt<5>(Imm);}]>;
-def immSExt10: ImmLeaf<i32, [{return isInt<10>(Imm);}]>;
-
 // Instruction encoding.
 class ADD_A_B_ENC : MSA_3R_FMT<0b000, 0b00, 0b010000>;
 class ADD_A_H_ENC : MSA_3R_FMT<0b000, 0b01, 0b010000>;
@@ -2308,9 +2304,12 @@ class LD_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 }
 
 class LD_B_DESC : LD_DESC_BASE<"ld.b", load, v16i8, MSA128BOpnd, mem_simm10>;
-class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128HOpnd, mem_simm10_lsl1>;
-class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128WOpnd, mem_simm10_lsl2>;
-class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128DOpnd, mem_simm10_lsl3>;
+class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128HOpnd,
+                               mem_simm10_lsl1, addrimm10lsl1>;
+class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128WOpnd,
+                               mem_simm10_lsl2, addrimm10lsl2>;
+class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128DOpnd,
+                               mem_simm10_lsl3, addrimm10lsl3>;
 
 class LDI_B_DESC : MSA_I10_LDI_DESC_BASE<"ldi.b", MSA128BOpnd>;
 class LDI_H_DESC : MSA_I10_LDI_DESC_BASE<"ldi.h", MSA128HOpnd>;
@@ -2641,9 +2640,12 @@ class ST_DESC_BASE<string instr_asm, SDPatternOperator OpNode,
 }
 
 class ST_B_DESC : ST_DESC_BASE<"st.b", store, v16i8, MSA128BOpnd, mem_simm10>;
-class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, MSA128HOpnd, mem_simm10_lsl1>;
-class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, MSA128WOpnd, mem_simm10_lsl2>;
-class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd, mem_simm10_lsl3>;
+class ST_H_DESC : ST_DESC_BASE<"st.h", store, v8i16, MSA128HOpnd,
+                               mem_simm10_lsl1, addrimm10lsl1>;
+class ST_W_DESC : ST_DESC_BASE<"st.w", store, v4i32, MSA128WOpnd,
+                               mem_simm10_lsl2, addrimm10lsl2>;
+class ST_D_DESC : ST_DESC_BASE<"st.d", store, v2i64, MSA128DOpnd,
+                               mem_simm10_lsl3, addrimm10lsl3>;
 
 class SUBS_S_B_DESC : MSA_3R_DESC_BASE<"subs_s.b", int_mips_subs_s_b,
                                        MSA128BOpnd>;
@@ -3523,16 +3525,16 @@ class MSAPat<dag pattern, dag result, list<Predicate> pred = [HasMSA]> :
 def : MSAPat<(extractelt (v4i32 MSA128W:$ws), immZExt4:$idx),
              (COPY_S_W MSA128W:$ws, immZExt4:$idx)>;
 
-def : MSAPat<(v8f16 (load addrimm10:$addr)), (LD_H addrimm10:$addr)>;
-def : MSAPat<(v4f32 (load addrimm10:$addr)), (LD_W addrimm10:$addr)>;
-def : MSAPat<(v2f64 (load addrimm10:$addr)), (LD_D addrimm10:$addr)>;
+def : MSAPat<(v8f16 (load addrimm10lsl1:$addr)), (LD_H addrimm10lsl1:$addr)>;
+def : MSAPat<(v4f32 (load addrimm10lsl2:$addr)), (LD_W addrimm10lsl2:$addr)>;
+def : MSAPat<(v2f64 (load addrimm10lsl3:$addr)), (LD_D addrimm10lsl3:$addr)>;
 
-def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addrimm10:$addr),
-                   (ST_H MSA128H:$ws, addrimm10:$addr)>;
-def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrimm10:$addr),
-                   (ST_W MSA128W:$ws, addrimm10:$addr)>;
-def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrimm10:$addr),
-                   (ST_D MSA128D:$ws, addrimm10:$addr)>;
+def ST_FH : MSAPat<(store (v8f16 MSA128H:$ws), addrimm10lsl1:$addr),
+                   (ST_H MSA128H:$ws, addrimm10lsl1:$addr)>;
+def ST_FW : MSAPat<(store (v4f32 MSA128W:$ws), addrimm10lsl2:$addr),
+                   (ST_W MSA128W:$ws, addrimm10lsl2:$addr)>;
+def ST_FD : MSAPat<(store (v2f64 MSA128D:$ws), addrimm10lsl3:$addr),
+                   (ST_D MSA128D:$ws, addrimm10lsl3:$addr)>;
 
 class MSA_FABS_PSEUDO_DESC_BASE<RegisterOperand ROWD,
                                 RegisterOperand ROWS = ROWD,
@@ -3729,6 +3731,56 @@ def SZ_D_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAllZero, v2i64,
 def SZ_V_PSEUDO : MSA_CBRANCH_PSEUDO_DESC_BASE<MipsVAnyZero, v16i8,
                                                MSA128B, NoItinerary>;
 
+// Pseudoes used to implement transparent fp16 support.
+
+let Predicates = [HasMSA] in {
+ def ST_F16 : MipsPseudo<(outs), (ins MSA128F16:$ws, mem_simm10:$addr),
+                          [(store (f16 MSA128F16:$ws), (addrimm10:$addr))]> {
+   let usesCustomInserter = 1;
+ }
+
+ def LD_F16 : MipsPseudo<(outs MSA128F16:$ws), (ins mem_simm10:$addr),
+                         [(set MSA128F16:$ws, (f16 (load addrimm10:$addr)))]> {
+   let usesCustomInserter = 1;
+ }
+
+ def MSA_FP_EXTEND_W_PSEUDO : MipsPseudo<(outs FGR32Opnd:$fd),
+                                         (ins MSA128F16:$ws),
+                              [(set FGR32Opnd:$fd,
+                                    (f32 (fpextend MSA128F16:$ws)))]> {
+  let usesCustomInserter = 1;
+ }
+
+ def MSA_FP_ROUND_W_PSEUDO : MipsPseudo<(outs MSA128F16:$wd),
+                                        (ins FGR32Opnd:$fs),
+                              [(set MSA128F16:$wd,
+                                    (f16 (fpround FGR32Opnd:$fs)))]> {
+  let usesCustomInserter = 1;
+ }
+
+ def MSA_FP_EXTEND_D_PSEUDO : MipsPseudo<(outs FGR64Opnd:$fd),
+                                         (ins MSA128F16:$ws),
+                              [(set FGR64Opnd:$fd,
+                                    (f64 (fpextend MSA128F16:$ws)))]> {
+  let usesCustomInserter = 1;
+ }
+
+ def MSA_FP_ROUND_D_PSEUDO : MipsPseudo<(outs MSA128F16:$wd),
+                                        (ins FGR64Opnd:$fs),
+                              [(set MSA128F16:$wd,
+                                    (f16 (fpround FGR64Opnd:$fs)))]> {
+  let usesCustomInserter = 1;
+ }
+
+ def : MipsPat<(MipsTruncIntFP MSA128F16:$ws),
+               (TRUNC_W_D64 (MSA_FP_EXTEND_D_PSEUDO MSA128F16:$ws))>;
+
+ def : MipsPat<(MipsFPCmp MSA128F16:$ws, MSA128F16:$wt, imm:$cond),
+               (FCMP_S32 (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$ws),
+                         (MSA_FP_EXTEND_W_PSEUDO MSA128F16:$wt), imm:$cond)>,
+       ISA_MIPS1_NOT_32R6_64R6;
+}
+
 // Vector extraction with fixed index.
 //
 // Extracting 32-bit values on MSA32 should always use COPY_S_W rather than
diff --git a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
index f81e64e06f43..d0609b15341d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsMachineFunction.cpp
@@ -60,7 +60,7 @@ void MipsFunctionInfo::createEhDataRegsFI() {
             ? &Mips::GPR64RegClass
             : &Mips::GPR32RegClass;
 
-    EhDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+    EhDataRegFI[I] = MF.getFrameInfo().CreateStackObject(RC->getSize(),
         RC->getAlignment(), false);
   }
 }
@@ -68,12 +68,12 @@ void MipsFunctionInfo::createEhDataRegsFI() {
 void MipsFunctionInfo::createISRRegFI() {
   // ISRs require spill slots for Status & ErrorPC Coprocessor 0 registers.
   // The current implementation only supports Mips32r2+ not Mips64rX. Status
-  // is always 32 bits, ErrorPC is 32 or 64 bits dependant on architecture,
+  // is always 32 bits, ErrorPC is 32 or 64 bits dependent on architecture,
   // however Mips32r2+ is the supported architecture.
   const TargetRegisterClass *RC = &Mips::GPR32RegClass;
 
   for (int I = 0; I < 2; ++I)
-    ISRDataRegFI[I] = MF.getFrameInfo()->CreateStackObject(
+    ISRDataRegFI[I] = MF.getFrameInfo().CreateStackObject(
         RC->getSize(), RC->getAlignment(), false);
 }
 
@@ -95,7 +95,7 @@ MachinePointerInfo MipsFunctionInfo::callPtrInfo(const GlobalValue *GV) {
 
 int MipsFunctionInfo::getMoveF64ViaSpillFI(const TargetRegisterClass *RC) {
   if (MoveF64ViaSpillFI == -1) {
-    MoveF64ViaSpillFI = MF.getFrameInfo()->CreateStackObject(
+    MoveF64ViaSpillFI = MF.getFrameInfo().CreateStackObject(
         RC->getSize(), RC->getAlignment(), false);
   }
   return MoveF64ViaSpillFI;
diff --git a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
index b18a673912f8..cf85eb3f2416 100644
--- a/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsModuleISelDAGToDAG.cpp
@@ -26,7 +26,7 @@ namespace {
       : MachineFunctionPass(ID), TM(TM_) {}
 
     // Pass Name
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "MIPS DAG->DAG Pattern Instruction Selection";
     }
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp b/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
index 7c940ee1578c..f33857fe628f 100644
--- a/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsOptimizePICCall.cpp
@@ -61,7 +61,7 @@ class OptimizePICCall : public MachineFunctionPass {
 public:
   OptimizePICCall(TargetMachine &tm) : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "Mips OptimizePICCall"; }
+  StringRef getPassName() const override { return "Mips OptimizePICCall"; }
 
   bool runOnMachineFunction(MachineFunction &F) override;
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
index 8136907de4d7..51ac5620f585 100644
--- a/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsOs16.cpp
@@ -35,9 +35,7 @@ namespace {
 
     MipsOs16() : ModulePass(ID) {}
 
-    const char *getPassName() const override {
-      return "MIPS Os16 Optimization";
-    }
+    StringRef getPassName() const override { return "MIPS Os16 Optimization"; }
 
     bool runOnModule(Module &M) override;
   };
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
index 860cf9cfd138..65be350f259d 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -206,7 +206,7 @@ getReservedRegs(const MachineFunction &MF) const {
       // allocate variable-sized objects at runtime. This should test the
       // same conditions as MipsFrameLowering::hasBP().
       if (needsStackRealignment(MF) &&
-          MF.getFrameInfo()->hasVarSizedObjects()) {
+          MF.getFrameInfo().hasVarSizedObjects()) {
         Reserved.set(Mips::S7);
         Reserved.set(Mips::S7_64);
       }
@@ -281,8 +281,8 @@ eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
         errs() << "<--------->\n" << MI);
 
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  uint64_t stackSize = MF.getFrameInfo()->getStackSize();
-  int64_t spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+  uint64_t stackSize = MF.getFrameInfo().getStackSize();
+  int64_t spOffset = MF.getFrameInfo().getObjectOffset(FrameIndex);
 
   DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
                << "spOffset   : " << spOffset << "\n"
diff --git a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
index cfce7c8e6533..8c82239ebbd3 100644
--- a/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td
@@ -400,6 +400,8 @@ def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>,
 // This class allows us to represent this in codegen patterns.
 def FGRCC : RegisterClass<"Mips", [i32], 32, (sequence "F%u", 0, 31)>;
 
+def MSA128F16 : RegisterClass<"Mips", [f16], 128, (sequence "W%u", 0, 31)>;
+
 def MSA128B: RegisterClass<"Mips", [v16i8], 128,
                            (sequence "W%u", 0, 31)>;
 def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128,
@@ -646,6 +648,10 @@ def COP3Opnd : RegisterOperand<COP3> {
   let ParserMatchClass = COP3AsmOperand;
 }
 
+def MSA128F16Opnd : RegisterOperand<MSA128F16> {
+  let ParserMatchClass = MSA128AsmOperand;
+}
+
 def MSA128BOpnd : RegisterOperand<MSA128B> {
   let ParserMatchClass = MSA128AsmOperand;
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
index a7ddd7752736..4996d070eb29 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.cpp
@@ -374,7 +374,7 @@ MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI)
 void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-  MachineFrameInfo *MFI    = MF.getFrameInfo();
+  MachineFrameInfo &MFI    = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   const MipsSEInstrInfo &TII =
@@ -396,10 +396,10 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
         &Mips::GPR64RegClass : &Mips::GPR32RegClass;
 
   // First, compute final stack size.
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
 
   // No need to allocate space on the stack.
-  if (StackSize == 0 && !MFI->adjustsStack()) return;
+  if (StackSize == 0 && !MFI.adjustsStack()) return;
 
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
@@ -409,7 +409,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
   TII.adjustStackPtr(SP, -StackSize, MBB, MBBI);
 
   // emit ".cfi_def_cfa_offset StackSize"
-  unsigned CFIIndex = MMI.addFrameInst(
+  unsigned CFIIndex = MF.addFrameInst(
       MCCFIInstruction::createDefCfaOffset(nullptr, -StackSize));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
@@ -417,7 +417,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
   if (MF.getFunction()->hasFnAttribute("interrupt"))
     emitInterruptPrologueStub(MF, MBB);
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   if (CSI.size()) {
     // Find the instruction past the last instruction that saves a callee-saved
@@ -429,7 +429,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
     // directives.
     for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(),
            E = CSI.end(); I != E; ++I) {
-      int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+      int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
       unsigned Reg = I->getReg();
 
       // If Reg is a double precision register, emit two cfa_offsets,
@@ -443,12 +443,12 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
         if (!STI.isLittle())
           std::swap(Reg0, Reg1);
 
-        unsigned CFIIndex = MMI.addFrameInst(
+        unsigned CFIIndex = MF.addFrameInst(
             MCCFIInstruction::createOffset(nullptr, Reg0, Offset));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
 
-        CFIIndex = MMI.addFrameInst(
+        CFIIndex = MF.addFrameInst(
             MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
@@ -459,18 +459,18 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
         if (!STI.isLittle())
           std::swap(Reg0, Reg1);
 
-        unsigned CFIIndex = MMI.addFrameInst(
+        unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg0, Offset));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
 
-        CFIIndex = MMI.addFrameInst(
+        CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg1, Offset + 4));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
       } else {
         // Reg is either in GPR32 or FGR32.
-        unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr, MRI->getDwarfRegNum(Reg, 1), Offset));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
@@ -489,9 +489,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
 
     // Emit .cfi_offset directives for eh data registers.
     for (int I = 0; I < 4; ++I) {
-      int64_t Offset = MFI->getObjectOffset(MipsFI->getEhDataRegFI(I));
+      int64_t Offset = MFI.getObjectOffset(MipsFI->getEhDataRegFI(I));
       unsigned Reg = MRI->getDwarfRegNum(ABI.GetEhDataReg(I), true);
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg, Offset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -505,7 +505,7 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
       .setMIFlag(MachineInstr::FrameSetup);
 
     // emit ".cfi_def_cfa_register $fp"
-    unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(
+    unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(
         nullptr, MRI->getDwarfRegNum(FP, true)));
     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex);
@@ -514,9 +514,9 @@ void MipsSEFrameLowering::emitPrologue(MachineFunction &MF,
       // addiu $Reg, $zero, -MaxAlignment
       // andi $sp, $sp, $Reg
       unsigned VR = MF.getRegInfo().createVirtualRegister(RC);
-      assert(isInt<16>(MFI->getMaxAlignment()) &&
+      assert(isInt<16>(MFI.getMaxAlignment()) &&
              "Function's alignment size requirement is not supported.");
-      int MaxAlign = -(int)MFI->getMaxAlignment();
+      int MaxAlign = -(int)MFI.getMaxAlignment();
 
       BuildMI(MBB, MBBI, dl, TII.get(ADDiu), VR).addReg(ZERO) .addImm(MaxAlign);
       BuildMI(MBB, MBBI, dl, TII.get(AND), SP).addReg(SP).addReg(VR);
@@ -664,7 +664,7 @@ void MipsSEFrameLowering::emitInterruptPrologueStub(
 void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
                                        MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
-  MachineFrameInfo *MFI            = MF.getFrameInfo();
+  MachineFrameInfo &MFI            = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   const MipsSEInstrInfo &TII =
@@ -684,7 +684,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
     // Find the first instruction that restores a callee-saved register.
     MachineBasicBlock::iterator I = MBBI;
 
-    for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i)
       --I;
 
     // Insert instruction "move $sp, $fp" at this location.
@@ -697,7 +697,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
 
     // Find first instruction that restores a callee-saved register.
     MachineBasicBlock::iterator I = MBBI;
-    for (unsigned i = 0; i < MFI->getCalleeSavedInfo().size(); ++i)
+    for (unsigned i = 0; i < MFI.getCalleeSavedInfo().size(); ++i)
       --I;
 
     // Insert instructions that restore eh data registers.
@@ -711,7 +711,7 @@ void MipsSEFrameLowering::emitEpilogue(MachineFunction &MF,
     emitInterruptEpilogueStub(MF, MBB);
 
   // Get the number of bytes from FrameInfo
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
 
   if (!StackSize)
     return;
@@ -754,16 +754,16 @@ void MipsSEFrameLowering::emitInterruptEpilogueStub(
 int MipsSEFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                                 int FI,
                                                 unsigned &FrameReg) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   MipsABIInfo ABI = STI.getABI();
 
-  if (MFI->isFixedObjectIndex(FI))
+  if (MFI.isFixedObjectIndex(FI))
     FrameReg = hasFP(MF) ? ABI.GetFramePtr() : ABI.GetStackPtr();
   else
     FrameReg = hasBP(MF) ? ABI.GetBasePtr() : ABI.GetStackPtr();
 
-  return MFI->getObjectOffset(FI) + MFI->getStackSize() -
-         getOffsetOfLocalArea() + MFI->getOffsetAdjustment();
+  return MFI.getObjectOffset(FI) + MFI.getStackSize() -
+         getOffsetOfLocalArea() + MFI.getOffsetAdjustment();
 }
 
 bool MipsSEFrameLowering::
@@ -778,12 +778,12 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
     // Add the callee-saved register as live-in. Do not add if the register is
     // RA and return address is taken, because it has already been added in
-    // method MipsTargetLowering::LowerRETURNADDR.
+    // method MipsTargetLowering::lowerRETURNADDR.
     // It's killed at the spill, unless the register is RA and return address
     // is taken.
     unsigned Reg = CSI[i].getReg();
     bool IsRAAndRetAddrIsTaken = (Reg == Mips::RA || Reg == Mips::RA_64)
-        && MF->getFrameInfo()->isReturnAddressTaken();
+        && MF->getFrameInfo().isReturnAddressTaken();
     if (!IsRAAndRetAddrIsTaken)
       EntryBlock->addLiveIn(Reg);
 
@@ -819,14 +819,14 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB,
 
 bool
 MipsSEFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Reserve call frame if the size of the maximum call frame fits into 16-bit
   // immediate field and there are no variable sized objects on the stack.
   // Make sure the second register scavenger spill slot can be accessed with one
   // instruction.
-  return isInt<16>(MFI->getMaxCallFrameSize() + getStackAlignment()) &&
-    !MFI->hasVarSizedObjects();
+  return isInt<16>(MFI.getMaxCallFrameSize() + getStackAlignment()) &&
+    !MFI.hasVarSizedObjects();
 }
 
 /// Mark \p Reg and all registers aliasing it in the bitset.
@@ -868,7 +868,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
     // mips64, it should be 64-bit, otherwise it should be 32-bt.
     const TargetRegisterClass *RC = STI.hasMips64() ?
       &Mips::GPR64RegClass : &Mips::GPR32RegClass;
-    int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+    int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(),
                                                   RC->getAlignment(), false);
     RS->addScavengingFrameIndex(FI);
   }
@@ -882,7 +882,7 @@ void MipsSEFrameLowering::determineCalleeSaves(MachineFunction &MF,
 
   const TargetRegisterClass *RC =
       ABI.ArePtrs64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
-  int FI = MF.getFrameInfo()->CreateStackObject(RC->getSize(),
+  int FI = MF.getFrameInfo().CreateStackObject(RC->getSize(),
                                                 RC->getAlignment(), false);
   RS->addScavengingFrameIndex(FI);
 }
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
index d9528da5a96d..6f0fdddd7d55 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp
@@ -293,20 +293,25 @@ bool MipsSEDAGToDAGISel::selectAddrFrameIndex(SDValue Addr, SDValue &Base,
 }
 
 /// Match frameindex+offset and frameindex|offset
-bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base,
-                                                    SDValue &Offset,
-                                                    unsigned OffsetBits) const {
+bool MipsSEDAGToDAGISel::selectAddrFrameIndexOffset(
+    SDValue Addr, SDValue &Base, SDValue &Offset, unsigned OffsetBits,
+    unsigned ShiftAmount = 0) const {
   if (CurDAG->isBaseWithConstantOffset(Addr)) {
     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
-    if (isIntN(OffsetBits, CN->getSExtValue())) {
+    if (isIntN(OffsetBits + ShiftAmount, CN->getSExtValue())) {
       EVT ValTy = Addr.getValueType();
 
       // If the first operand is a FI, get the TargetFI Node
-      if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
-                                  (Addr.getOperand(0)))
+      if (FrameIndexSDNode *FIN =
+              dyn_cast<FrameIndexSDNode>(Addr.getOperand(0)))
         Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), ValTy);
-      else
+      else {
         Base = Addr.getOperand(0);
+        // If base is a FI, additional offset calculation is done in
+        // eliminateFrameIndex, otherwise we need to check the alignment
+        if (OffsetToAlignment(CN->getZExtValue(), 1ull << ShiftAmount) != 0)
+          return false;
+      }
 
       Offset = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(Addr),
                                          ValTy);
@@ -392,17 +397,6 @@ bool MipsSEDAGToDAGISel::selectAddrRegImm9(SDValue Addr, SDValue &Base,
   return false;
 }
 
-bool MipsSEDAGToDAGISel::selectAddrRegImm10(SDValue Addr, SDValue &Base,
-                                            SDValue &Offset) const {
-  if (selectAddrFrameIndex(Addr, Base, Offset))
-    return true;
-
-  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10))
-    return true;
-
-  return false;
-}
-
 /// Used on microMIPS LWC2, LDC2, SWC2 and SDC2 instructions (11-bit offset)
 bool MipsSEDAGToDAGISel::selectAddrRegImm11(SDValue Addr, SDValue &Base,
                                             SDValue &Offset) const {
@@ -478,15 +472,49 @@ bool MipsSEDAGToDAGISel::selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
   return selectAddrDefault(Addr, Base, Offset);
 }
 
-bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base,
-                                          SDValue &Offset) const {
-  if (selectAddrRegImm10(Addr, Base, Offset))
+bool MipsSEDAGToDAGISel::selectIntAddrSImm10(SDValue Addr, SDValue &Base,
+                                             SDValue &Offset) const {
+
+  if (selectAddrFrameIndex(Addr, Base, Offset))
     return true;
 
-  if (selectAddrDefault(Addr, Base, Offset))
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10))
     return true;
 
-  return false;
+  return selectAddrDefault(Addr, Base, Offset);
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base,
+                                                 SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 1))
+    return true;
+
+  return selectAddrDefault(Addr, Base, Offset);
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base,
+                                                 SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 2))
+    return true;
+
+  return selectAddrDefault(Addr, Base, Offset);
+}
+
+bool MipsSEDAGToDAGISel::selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base,
+                                                 SDValue &Offset) const {
+  if (selectAddrFrameIndex(Addr, Base, Offset))
+    return true;
+
+  if (selectAddrFrameIndexOffset(Addr, Base, Offset, 10, 3))
+    return true;
+
+  return selectAddrDefault(Addr, Base, Offset);
 }
 
 // Select constant vector splats.
@@ -771,13 +799,13 @@ bool MipsSEDAGToDAGISel::trySelect(SDNode *Node) {
 
   case ISD::Constant: {
     const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Node);
+    int64_t Imm = CN->getSExtValue();
     unsigned Size = CN->getValueSizeInBits(0);
 
-    if (Size == 32)
+    if (isInt<32>(Imm))
       break;
 
     MipsAnalyzeImmediate AnalyzeImm;
-    int64_t Imm = CN->getSExtValue();
 
     const MipsAnalyzeImmediate::InstSeq &Seq =
       AnalyzeImm.Analyze(Imm, Size, false);
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
index 0f08b72a334e..2a8e5877e848 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h
@@ -44,7 +44,8 @@ private:
 
   bool selectAddrFrameIndex(SDValue Addr, SDValue &Base, SDValue &Offset) const;
   bool selectAddrFrameIndexOffset(SDValue Addr, SDValue &Base, SDValue &Offset,
-                                  unsigned OffsetBits) const;
+                                  unsigned OffsetBits,
+                                  unsigned ShiftAmount) const;
 
   bool selectAddrRegImm(SDValue Addr, SDValue &Base,
                         SDValue &Offset) const override;
@@ -58,9 +59,6 @@ private:
   bool selectAddrRegImm9(SDValue Addr, SDValue &Base,
                          SDValue &Offset) const;
 
-  bool selectAddrRegImm10(SDValue Addr, SDValue &Base,
-                          SDValue &Offset) const;
-
   bool selectAddrRegImm11(SDValue Addr, SDValue &Base,
                           SDValue &Offset) const;
 
@@ -82,8 +80,17 @@ private:
   bool selectIntAddrLSL2MM(SDValue Addr, SDValue &Base,
                            SDValue &Offset) const override;
 
-  bool selectIntAddrMSA(SDValue Addr, SDValue &Base,
-                        SDValue &Offset) const override;
+  bool selectIntAddrSImm10(SDValue Addr, SDValue &Base,
+                           SDValue &Offset) const override;
+
+  bool selectIntAddrSImm10Lsl1(SDValue Addr, SDValue &Base,
+                               SDValue &Offset) const override;
+
+  bool selectIntAddrSImm10Lsl2(SDValue Addr, SDValue &Base,
+                               SDValue &Offset) const override;
+
+  bool selectIntAddrSImm10Lsl3(SDValue Addr, SDValue &Base,
+                               SDValue &Offset) const override;
 
   /// \brief Select constant vector splats.
   bool selectVSplat(SDNode *N, APInt &Imm,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
index 80c000d5746d..26e0f9a94368 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -27,8 +27,8 @@ using namespace llvm;
 #define DEBUG_TYPE "mips-isel"
 
 static cl::opt<bool>
-EnableMipsTailCalls("enable-mips-tail-calls", cl::Hidden,
-                    cl::desc("MIPS: Enable tail calls."), cl::init(false));
+UseMipsTailCalls("mips-tail-calls", cl::Hidden,
+                    cl::desc("MIPS: permit tail calls."), cl::init(false));
 
 static cl::opt<bool> NoDPLoadStore("mno-ldc1-sdc1", cl::init(false),
                                    cl::desc("Expand double precision loads and "
@@ -92,6 +92,44 @@ MipsSETargetLowering::MipsSETargetLowering(const MipsTargetMachine &TM,
     addMSAFloatType(MVT::v4f32, &Mips::MSA128WRegClass);
     addMSAFloatType(MVT::v2f64, &Mips::MSA128DRegClass);
 
+    // f16 is a storage-only type, always promote it to f32.
+    addRegisterClass(MVT::f16, &Mips::MSA128HRegClass);
+    setOperationAction(ISD::SETCC, MVT::f16, Promote);
+    setOperationAction(ISD::BR_CC, MVT::f16, Promote);
+    setOperationAction(ISD::SELECT_CC, MVT::f16, Promote);
+    setOperationAction(ISD::SELECT, MVT::f16, Promote);
+    setOperationAction(ISD::FADD, MVT::f16, Promote);
+    setOperationAction(ISD::FSUB, MVT::f16, Promote);
+    setOperationAction(ISD::FMUL, MVT::f16, Promote);
+    setOperationAction(ISD::FDIV, MVT::f16, Promote);
+    setOperationAction(ISD::FREM, MVT::f16, Promote);
+    setOperationAction(ISD::FMA, MVT::f16, Promote);
+    setOperationAction(ISD::FNEG, MVT::f16, Promote);
+    setOperationAction(ISD::FABS, MVT::f16, Promote);
+    setOperationAction(ISD::FCEIL, MVT::f16, Promote);
+    setOperationAction(ISD::FCOPYSIGN, MVT::f16, Promote);
+    setOperationAction(ISD::FCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FP_EXTEND, MVT::f16, Promote);
+    setOperationAction(ISD::FFLOOR, MVT::f16, Promote);
+    setOperationAction(ISD::FNEARBYINT, MVT::f16, Promote);
+    setOperationAction(ISD::FPOW, MVT::f16, Promote);
+    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
+    setOperationAction(ISD::FRINT, MVT::f16, Promote);
+    setOperationAction(ISD::FSIN, MVT::f16, Promote);
+    setOperationAction(ISD::FSINCOS, MVT::f16, Promote);
+    setOperationAction(ISD::FSQRT, MVT::f16, Promote);
+    setOperationAction(ISD::FEXP, MVT::f16, Promote);
+    setOperationAction(ISD::FEXP2, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG2, MVT::f16, Promote);
+    setOperationAction(ISD::FLOG10, MVT::f16, Promote);
+    setOperationAction(ISD::FROUND, MVT::f16, Promote);
+    setOperationAction(ISD::FTRUNC, MVT::f16, Promote);
+    setOperationAction(ISD::FMINNUM, MVT::f16, Promote);
+    setOperationAction(ISD::FMAXNUM, MVT::f16, Promote);
+    setOperationAction(ISD::FMINNAN, MVT::f16, Promote);
+    setOperationAction(ISD::FMAXNAN, MVT::f16, Promote);
+
     setTargetDAGCombine(ISD::AND);
     setTargetDAGCombine(ISD::OR);
     setTargetDAGCombine(ISD::SRA);
@@ -852,7 +890,7 @@ static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty,
   APInt SplatValue, SplatUndef;
   unsigned SplatBitSize;
   bool HasAnyUndefs;
-  unsigned EltSize = Ty.getVectorElementType().getSizeInBits();
+  unsigned EltSize = Ty.getScalarSizeInBits();
   BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N->getOperand(1));
 
   if (!Subtarget.hasDSP())
@@ -1172,13 +1210,25 @@ MipsSETargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     return emitFEXP2_W_1(MI, BB);
   case Mips::FEXP2_D_1_PSEUDO:
     return emitFEXP2_D_1(MI, BB);
+  case Mips::ST_F16:
+    return emitST_F16_PSEUDO(MI, BB);
+  case Mips::LD_F16:
+    return emitLD_F16_PSEUDO(MI, BB);
+  case Mips::MSA_FP_EXTEND_W_PSEUDO:
+    return emitFPEXTEND_PSEUDO(MI, BB, false);
+  case Mips::MSA_FP_ROUND_W_PSEUDO:
+    return emitFPROUND_PSEUDO(MI, BB, false);
+  case Mips::MSA_FP_EXTEND_D_PSEUDO:
+    return emitFPEXTEND_PSEUDO(MI, BB, true);
+  case Mips::MSA_FP_ROUND_D_PSEUDO:
+    return emitFPROUND_PSEUDO(MI, BB, true);
   }
 }
 
 bool MipsSETargetLowering::isEligibleForTailCallOptimization(
     const CCState &CCInfo, unsigned NextStackOffset,
     const MipsFunctionInfo &FI) const {
-  if (!EnableMipsTailCalls)
+  if (!UseMipsTailCalls)
     return false;
 
   // Exception has to be cleared with eret.
@@ -1504,7 +1554,7 @@ static SDValue lowerMSABitClear(SDValue Op, SelectionDAG &DAG) {
 static SDValue lowerMSABitClearImm(SDValue Op, SelectionDAG &DAG) {
   SDLoc DL(Op);
   EVT ResTy = Op->getValueType(0);
-  APInt BitImm = APInt(ResTy.getVectorElementType().getSizeInBits(), 1)
+  APInt BitImm = APInt(ResTy.getScalarSizeInBits(), 1)
                  << cast<ConstantSDNode>(Op->getOperand(2))->getAPIntValue();
   SDValue BitMask = DAG.getConstant(~BitImm, DL, ResTy);
 
@@ -3372,6 +3422,304 @@ MipsSETargetLowering::emitFILL_FD(MachineInstr &MI,
   return BB;
 }
 
+// Emit the ST_F16_PSEDUO instruction to store a f16 value from an MSA
+// register.
+//
+// STF16 MSA128F16:$wd, mem_simm10:$addr
+// =>
+//  copy_u.h $rtemp,$wd[0]
+//  sh $rtemp, $addr
+//
+// Safety: We can't use st.h & co as they would over write the memory after
+// the destination. It would require half floats be allocated 16 bytes(!) of
+// space.
+MachineBasicBlock *
+MipsSETargetLowering::emitST_F16_PSEUDO(MachineInstr &MI,
+                                       MachineBasicBlock *BB) const {
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Ws = MI.getOperand(0).getReg();
+  unsigned Rt = MI.getOperand(1).getReg();
+  const MachineMemOperand &MMO = **MI.memoperands_begin();
+  unsigned Imm = MMO.getOffset();
+
+  // Caution: A load via the GOT can expand to a GPR32 operand, a load via
+  //          spill and reload can expand as a GPR64 operand. Examine the
+  //          operand in detail and default to ABI.
+  const TargetRegisterClass *RC =
+      MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
+                               : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
+                                                        : &Mips::GPR64RegClass);
+  const bool UsingMips32 = RC == &Mips::GPR32RegClass;
+  unsigned Rs = RegInfo.createVirtualRegister(RC);
+
+  BuildMI(*BB, MI, DL, TII->get(Mips::COPY_U_H), Rs).addReg(Ws).addImm(0);
+  BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::SH : Mips::SH64))
+      .addReg(Rs)
+      .addReg(Rt)
+      .addImm(Imm)
+      .addMemOperand(BB->getParent()->getMachineMemOperand(
+          &MMO, MMO.getOffset(), MMO.getSize()));
+
+  MI.eraseFromParent();
+  return BB;
+}
+
+// Emit the LD_F16_PSEDUO instruction to load a f16 value into an MSA register.
+//
+// LD_F16 MSA128F16:$wd, mem_simm10:$addr
+// =>
+//  lh $rtemp, $addr
+//  fill.h $wd, $rtemp
+//
+// Safety: We can't use ld.h & co as they over-read from the source.
+// Additionally, if the address is not modulo 16, 2 cases can occur:
+//  a) Segmentation fault as the load instruction reads from a memory page
+//     memory it's not supposed to.
+//  b) The load crosses an implementation specific boundary, requiring OS
+//     intervention.
+//
+MachineBasicBlock *
+MipsSETargetLowering::emitLD_F16_PSEUDO(MachineInstr &MI,
+                                       MachineBasicBlock *BB) const {
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Wd = MI.getOperand(0).getReg();
+
+  // Caution: A load via the GOT can expand to a GPR32 operand, a load via
+  //          spill and reload can expand as a GPR64 operand. Examine the
+  //          operand in detail and default to ABI.
+  const TargetRegisterClass *RC =
+      MI.getOperand(1).isReg() ? RegInfo.getRegClass(MI.getOperand(1).getReg())
+                               : (Subtarget.isABI_O32() ? &Mips::GPR32RegClass
+                                                        : &Mips::GPR64RegClass);
+
+  const bool UsingMips32 = RC == &Mips::GPR32RegClass;
+  unsigned Rt = RegInfo.createVirtualRegister(RC);
+
+  MachineInstrBuilder MIB =
+      BuildMI(*BB, MI, DL, TII->get(UsingMips32 ? Mips::LH : Mips::LH64), Rt);
+  for (unsigned i = 1; i < MI.getNumOperands(); i++)
+    MIB.addOperand(MI.getOperand(i));
+
+  BuildMI(*BB, MI, DL, TII->get(Mips::FILL_H), Wd).addReg(Rt);
+
+  MI.eraseFromParent();
+  return BB;
+}
+
+// Emit the FPROUND_PSEUDO instruction.
+//
+// Round an FGR64Opnd, FGR32Opnd to an f16.
+//
+// Safety: Cycle the operand through the GPRs so the result always ends up
+//         the correct MSA register.
+//
+// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fs
+//        / FGR64Opnd:$Fs and MSA128F16:$Wd to the same physical register
+//        (which they can be, as the MSA registers are defined to alias the
+//        FPU's 64 bit and 32 bit registers) the result can be accessed using
+//        the correct register class. That requires operands be tie-able across
+//        register classes which have a sub/super register class relationship.
+//
+// For FPG32Opnd:
+//
+// FPROUND MSA128F16:$wd, FGR32Opnd:$fs
+// =>
+//  mfc1 $rtemp, $fs
+//  fill.w $rtemp, $wtemp
+//  fexdo.w $wd, $wtemp, $wtemp
+//
+// For FPG64Opnd on mips32r2+:
+//
+// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
+// =>
+//  mfc1 $rtemp, $fs
+//  fill.w $rtemp, $wtemp
+//  mfhc1 $rtemp2, $fs
+//  insert.w $wtemp[1], $rtemp2
+//  insert.w $wtemp[3], $rtemp2
+//  fexdo.w $wtemp2, $wtemp, $wtemp
+//  fexdo.h $wd, $temp2, $temp2
+//
+// For FGR64Opnd on mips64r2+:
+//
+// FPROUND MSA128F16:$wd, FGR64Opnd:$fs
+// =>
+//  dmfc1 $rtemp, $fs
+//  fill.d $rtemp, $wtemp
+//  fexdo.w $wtemp2, $wtemp, $wtemp
+//  fexdo.h $wd, $wtemp2, $wtemp2
+//
+// Safety note: As $wtemp is UNDEF, we may provoke a spurious exception if the
+//              undef bits are "just right" and the exception enable bits are
+//              set. By using fill.w to replicate $fs into all elements over
+//              insert.w for one element, we avoid that potiential case. If
+//              fexdo.[hw] causes an exception in, the exception is valid and it
+//              occurs for all elements.
+//
+MachineBasicBlock *
+MipsSETargetLowering::emitFPROUND_PSEUDO(MachineInstr &MI,
+                                         MachineBasicBlock *BB,
+                                         bool IsFGR64) const {
+
+  // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
+  // here. It's technically doable to support MIPS32 here, but the ISA forbids
+  // it.
+  assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
+
+  bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Wd = MI.getOperand(0).getReg();
+  unsigned Fs = MI.getOperand(1).getReg();
+
+  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+  unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+  const TargetRegisterClass *GPRRC =
+      IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+  unsigned MFC1Opc = IsFGR64onMips64 ? Mips::DMFC1 : Mips::MFC1;
+  unsigned FILLOpc = IsFGR64onMips64 ? Mips::FILL_D : Mips::FILL_W;
+
+  // Perform the register class copy as mentioned above.
+  unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
+  BuildMI(*BB, MI, DL, TII->get(MFC1Opc), Rtemp).addReg(Fs);
+  BuildMI(*BB, MI, DL, TII->get(FILLOpc), Wtemp).addReg(Rtemp);
+  unsigned WPHI = Wtemp;
+
+  if (!Subtarget.hasMips64() && IsFGR64) {
+    unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+    BuildMI(*BB, MI, DL, TII->get(Mips::MFHC1_D64), Rtemp2).addReg(Fs);
+    unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+    unsigned Wtemp3 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+    BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp2)
+        .addReg(Wtemp)
+        .addReg(Rtemp2)
+        .addImm(1);
+    BuildMI(*BB, MI, DL, TII->get(Mips::INSERT_W), Wtemp3)
+        .addReg(Wtemp2)
+        .addReg(Rtemp2)
+        .addImm(3);
+    WPHI = Wtemp3;
+  }
+
+  if (IsFGR64) {
+    unsigned Wtemp2 = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+    BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_W), Wtemp2)
+        .addReg(WPHI)
+        .addReg(WPHI);
+    WPHI = Wtemp2;
+  }
+
+  BuildMI(*BB, MI, DL, TII->get(Mips::FEXDO_H), Wd).addReg(WPHI).addReg(WPHI);
+
+  MI.eraseFromParent();
+  return BB;
+}
+
+// Emit the FPEXTEND_PSEUDO instruction.
+//
+// Expand an f16 to either a FGR32Opnd or FGR64Opnd.
+//
+// Safety: Cycle the result through the GPRs so the result always ends up
+//         the correct floating point register.
+//
+// FIXME: This copying is strictly unnecessary. If we could tie FGR32Opnd:$Fd
+//        / FGR64Opnd:$Fd and MSA128F16:$Ws to the same physical register
+//        (which they can be, as the MSA registers are defined to alias the
+//        FPU's 64 bit and 32 bit registers) the result can be accessed using
+//        the correct register class. That requires operands be tie-able across
+//        register classes which have a sub/super register class relationship. I
+//        haven't checked.
+//
+// For FGR32Opnd:
+//
+// FPEXTEND FGR32Opnd:$fd, MSA128F16:$ws
+// =>
+//  fexupr.w $wtemp, $ws
+//  copy_s.w $rtemp, $ws[0]
+//  mtc1 $rtemp, $fd
+//
+// For FGR64Opnd on Mips64:
+//
+// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
+// =>
+//  fexupr.w $wtemp, $ws
+//  fexupr.d $wtemp2, $wtemp
+//  copy_s.d $rtemp, $wtemp2s[0]
+//  dmtc1 $rtemp, $fd
+//
+// For FGR64Opnd on Mips32:
+//
+// FPEXTEND FGR64Opnd:$fd, MSA128F16:$ws
+// =>
+//  fexupr.w $wtemp, $ws
+//  fexupr.d $wtemp2, $wtemp
+//  copy_s.w $rtemp, $wtemp2[0]
+//  mtc1 $rtemp, $ftemp
+//  copy_s.w $rtemp2, $wtemp2[1]
+//  $fd = mthc1 $rtemp2, $ftemp
+//
+MachineBasicBlock *
+MipsSETargetLowering::emitFPEXTEND_PSEUDO(MachineInstr &MI,
+                                          MachineBasicBlock *BB,
+                                          bool IsFGR64) const {
+
+  // Strictly speaking, we need MIPS32R5 to support MSA. We'll be generous
+  // here. It's technically doable to support MIPS32 here, but the ISA forbids
+  // it.
+  assert(Subtarget.hasMSA() && Subtarget.hasMips32r2());
+
+  bool IsFGR64onMips64 = Subtarget.hasMips64() && IsFGR64;
+  bool IsFGR64onMips32 = !Subtarget.hasMips64() && IsFGR64;
+
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned Fd = MI.getOperand(0).getReg();
+  unsigned Ws = MI.getOperand(1).getReg();
+
+  MachineRegisterInfo &RegInfo = BB->getParent()->getRegInfo();
+  const TargetRegisterClass *GPRRC =
+      IsFGR64onMips64 ? &Mips::GPR64RegClass : &Mips::GPR32RegClass;
+  unsigned MTC1Opc = IsFGR64onMips64 ? Mips::DMTC1 : Mips::MTC1;
+  unsigned COPYOpc = IsFGR64onMips64 ? Mips::COPY_S_D : Mips::COPY_S_W;
+
+  unsigned Wtemp = RegInfo.createVirtualRegister(&Mips::MSA128WRegClass);
+  unsigned WPHI = Wtemp;
+
+  BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_W), Wtemp).addReg(Ws);
+  if (IsFGR64) {
+    WPHI = RegInfo.createVirtualRegister(&Mips::MSA128DRegClass);
+    BuildMI(*BB, MI, DL, TII->get(Mips::FEXUPR_D), WPHI).addReg(Wtemp);
+  }
+
+  // Perform the safety regclass copy mentioned above.
+  unsigned Rtemp = RegInfo.createVirtualRegister(GPRRC);
+  unsigned FPRPHI = IsFGR64onMips32
+                        ? RegInfo.createVirtualRegister(&Mips::FGR64RegClass)
+                        : Fd;
+  BuildMI(*BB, MI, DL, TII->get(COPYOpc), Rtemp).addReg(WPHI).addImm(0);
+  BuildMI(*BB, MI, DL, TII->get(MTC1Opc), FPRPHI).addReg(Rtemp);
+
+  if (IsFGR64onMips32) {
+    unsigned Rtemp2 = RegInfo.createVirtualRegister(GPRRC);
+    BuildMI(*BB, MI, DL, TII->get(Mips::COPY_S_W), Rtemp2)
+        .addReg(WPHI)
+        .addImm(1);
+    BuildMI(*BB, MI, DL, TII->get(Mips::MTHC1_D64), Fd)
+        .addReg(FPRPHI)
+        .addReg(Rtemp2);
+  }
+
+  MI.eraseFromParent();
+  return BB;
+}
+
 // Emit the FEXP2_W_1 pseudo instructions.
 //
 // fexp2_w_1_pseudo $wd, $wt
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
index 54154662f261..0abb9b318bda 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
+++ b/contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h
@@ -111,6 +111,20 @@ namespace llvm {
     /// \brief Emit the FEXP2_D_1 pseudo instructions.
     MachineBasicBlock *emitFEXP2_D_1(MachineInstr &MI,
                                      MachineBasicBlock *BB) const;
+    /// \brief Emit the FILL_FW pseudo instruction
+    MachineBasicBlock *emitLD_F16_PSEUDO(MachineInstr &MI,
+                                   MachineBasicBlock *BB) const;
+    /// \brief Emit the FILL_FD pseudo instruction
+    MachineBasicBlock *emitST_F16_PSEUDO(MachineInstr &MI,
+                                   MachineBasicBlock *BB) const;
+    /// \brief Emit the FEXP2_W_1 pseudo instructions.
+    MachineBasicBlock *emitFPEXTEND_PSEUDO(MachineInstr &MI,
+                                           MachineBasicBlock *BB,
+                                           bool IsFGR64) const;
+    /// \brief Emit the FEXP2_D_1 pseudo instructions.
+    MachineBasicBlock *emitFPROUND_PSEUDO(MachineInstr &MI,
+                                          MachineBasicBlock *BBi,
+                                          bool IsFGR64) const;
   };
 }
 
diff --git a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
index 29107b2c1aa5..ea703d0edd96 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -408,7 +408,9 @@ unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
   switch (Opc) {
   default:           llvm_unreachable("Illegal opcode!");
   case Mips::BEQ:    return Mips::BNE;
+  case Mips::BEQ_MM: return Mips::BNE_MM;
   case Mips::BNE:    return Mips::BEQ;
+  case Mips::BNE_MM: return Mips::BEQ_MM;
   case Mips::BGTZ:   return Mips::BLEZ;
   case Mips::BGEZ:   return Mips::BLTZ;
   case Mips::BLTZ:   return Mips::BGEZ;
@@ -431,6 +433,18 @@ unsigned MipsSEInstrInfo::getOppositeBranchOpc(unsigned Opc) const {
   case Mips::BGEZC:  return Mips::BLTZC;
   case Mips::BLTZC:  return Mips::BGEZC;
   case Mips::BLEZC:  return Mips::BGTZC;
+  case Mips::BEQZC64:  return Mips::BNEZC64;
+  case Mips::BNEZC64:  return Mips::BEQZC64;
+  case Mips::BEQC64:   return Mips::BNEC64;
+  case Mips::BNEC64:   return Mips::BEQC64;
+  case Mips::BGEC64:   return Mips::BLTC64;
+  case Mips::BGEUC64:  return Mips::BLTUC64;
+  case Mips::BLTC64:   return Mips::BGEC64;
+  case Mips::BLTUC64:  return Mips::BGEUC64;
+  case Mips::BGTZC64:  return Mips::BLEZC64;
+  case Mips::BGEZC64:  return Mips::BLTZC64;
+  case Mips::BLTZC64:  return Mips::BGEZC64;
+  case Mips::BLEZC64:  return Mips::BGTZC64;
   }
 }
 
@@ -506,17 +520,22 @@ unsigned MipsSEInstrInfo::loadImmediate(int64_t Imm, MachineBasicBlock &MBB,
 }
 
 unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const {
-  return (Opc == Mips::BEQ    || Opc == Mips::BNE    || Opc == Mips::BGTZ   ||
-          Opc == Mips::BGEZ   || Opc == Mips::BLTZ   || Opc == Mips::BLEZ   ||
-          Opc == Mips::BEQ64  || Opc == Mips::BNE64  || Opc == Mips::BGTZ64 ||
-          Opc == Mips::BGEZ64 || Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 ||
-          Opc == Mips::BC1T   || Opc == Mips::BC1F   || Opc == Mips::B      ||
-          Opc == Mips::J  || Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM ||
-          Opc == Mips::BEQC   || Opc == Mips::BNEC   || Opc == Mips::BLTC   ||
-          Opc == Mips::BGEC   || Opc == Mips::BLTUC  || Opc == Mips::BGEUC  ||
-          Opc == Mips::BGTZC  || Opc == Mips::BLEZC  || Opc == Mips::BGEZC  ||
-          Opc == Mips::BLTZC  || Opc == Mips::BEQZC  || Opc == Mips::BNEZC  ||
-          Opc == Mips::BC) ? Opc : 0;
+  return (Opc == Mips::BEQ    || Opc == Mips::BEQ_MM || Opc == Mips::BNE    ||
+          Opc == Mips::BNE_MM || Opc == Mips::BGTZ   || Opc == Mips::BGEZ   ||
+          Opc == Mips::BLTZ   || Opc == Mips::BLEZ   || Opc == Mips::BEQ64  ||
+          Opc == Mips::BNE64  || Opc == Mips::BGTZ64 || Opc == Mips::BGEZ64 ||
+          Opc == Mips::BLTZ64 || Opc == Mips::BLEZ64 || Opc == Mips::BC1T   ||
+          Opc == Mips::BC1F   || Opc == Mips::B      || Opc == Mips::J      ||
+          Opc == Mips::BEQZC_MM || Opc == Mips::BNEZC_MM || Opc == Mips::BEQC ||
+          Opc == Mips::BNEC   || Opc == Mips::BLTC   || Opc == Mips::BGEC   ||
+          Opc == Mips::BLTUC  || Opc == Mips::BGEUC  || Opc == Mips::BGTZC  ||
+          Opc == Mips::BLEZC  || Opc == Mips::BGEZC  || Opc == Mips::BLTZC  ||
+          Opc == Mips::BEQZC  || Opc == Mips::BNEZC  || Opc == Mips::BEQZC64 ||
+          Opc == Mips::BNEZC64 || Opc == Mips::BEQC64 || Opc == Mips::BNEC64 ||
+          Opc == Mips::BGEC64 || Opc == Mips::BGEUC64 || Opc == Mips::BLTC64 ||
+          Opc == Mips::BLTUC64 || Opc == Mips::BGTZC64 ||
+          Opc == Mips::BGEZC64 || Opc == Mips::BLTZC64 ||
+          Opc == Mips::BLEZC64 || Opc == Mips::BC) ? Opc : 0;
 }
 
 void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
index e3431cd118ab..86bd24166bb6 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.cpp
@@ -149,7 +149,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
                                      int64_t SPOffset) const {
   MachineInstr &MI = *II;
   MachineFunction &MF = *MI.getParent()->getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
 
   MipsABIInfo ABI =
@@ -157,7 +157,7 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
   const MipsRegisterInfo *RegInfo =
     static_cast<const MipsRegisterInfo *>(MF.getSubtarget().getRegisterInfo());
 
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   int MinCSFI = 0;
   int MaxCSFI = -1;
 
@@ -182,9 +182,9 @@ void MipsSERegisterInfo::eliminateFI(MachineBasicBlock::iterator II,
       IsISRRegFI)
     FrameReg = ABI.GetStackPtr();
   else if (RegInfo->needsStackRealignment(MF)) {
-    if (MFI->hasVarSizedObjects() && !MFI->isFixedObjectIndex(FrameIndex))
+    if (MFI.hasVarSizedObjects() && !MFI.isFixedObjectIndex(FrameIndex))
       FrameReg = ABI.GetBasePtr();
-    else if (MFI->isFixedObjectIndex(FrameIndex))
+    else if (MFI.isFixedObjectIndex(FrameIndex))
       FrameReg = getFrameRegister(MF);
     else
       FrameReg = ABI.GetStackPtr();
diff --git a/contrib/llvm/lib/Target/Mips/MipsSchedule.td b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
index 738b6c46407a..c0de59ba15f5 100644
--- a/contrib/llvm/lib/Target/Mips/MipsSchedule.td
+++ b/contrib/llvm/lib/Target/Mips/MipsSchedule.td
@@ -44,6 +44,7 @@ def II_BC1FL            : InstrItinClass;
 def II_BC1T             : InstrItinClass;
 def II_BC1TL            : InstrItinClass;
 def II_BC1CCZ           : InstrItinClass;
+def II_BC2CCZ           : InstrItinClass;
 def II_BCC              : InstrItinClass; // beq and bne
 def II_BCCZ             : InstrItinClass; // b[gl][et]z
 def II_BCCC             : InstrItinClass; // b<cc>c
@@ -53,12 +54,18 @@ def II_BCCZC            : InstrItinClass; // beqzc, bnezc
 def II_BITSWAP          : InstrItinClass;
 def II_CEIL             : InstrItinClass;
 def II_CFC1             : InstrItinClass;
+def II_CFC2             : InstrItinClass;
 def II_CLO              : InstrItinClass;
 def II_CLZ              : InstrItinClass;
 def II_CTC1             : InstrItinClass;
+def II_CTC2             : InstrItinClass;
 def II_CVT              : InstrItinClass;
 def II_C_CC_D           : InstrItinClass; // Any c.<cc>.d instruction
 def II_C_CC_S           : InstrItinClass; // Any c.<cc>.s instruction
+def II_CMP_CC_D         : InstrItinClass; // Any cmp.<cc>.d instruction
+def II_CMP_CC_S         : InstrItinClass; // Any cmp.<cc>.s instruction
+def II_CLASS_D          : InstrItinClass;
+def II_CLASS_S          : InstrItinClass;
 def II_DADDIU           : InstrItinClass;
 def II_DADDU            : InstrItinClass;
 def II_DADDI            : InstrItinClass;
@@ -172,6 +179,7 @@ def II_LHE              : InstrItinClass;
 def II_LHU              : InstrItinClass;
 def II_LHUE             : InstrItinClass;
 def II_LL               : InstrItinClass;
+def II_LI               : InstrItinClass;
 def II_LLD              : InstrItinClass;
 def II_LUI              : InstrItinClass;
 def II_LUXC1            : InstrItinClass;
@@ -180,14 +188,17 @@ def II_LWE              : InstrItinClass;
 def II_LWC1             : InstrItinClass;
 def II_LWC2             : InstrItinClass;
 def II_LWC3             : InstrItinClass;
+def II_LWM              : InstrItinClass;
 def II_LWL              : InstrItinClass;
 def II_LWLE             : InstrItinClass;
 def II_LWPC             : InstrItinClass;
+def II_LWP              : InstrItinClass;
 def II_LWR              : InstrItinClass;
 def II_LWRE             : InstrItinClass;
 def II_LWU              : InstrItinClass;
 def II_LWUPC            : InstrItinClass;
 def II_LWXC1            : InstrItinClass;
+def II_LWXS             : InstrItinClass;
 def II_LSA              : InstrItinClass;
 def II_DLSA             : InstrItinClass;
 def II_MADD             : InstrItinClass;
@@ -196,13 +207,23 @@ def II_MADD_D           : InstrItinClass;
 def II_MADD_S           : InstrItinClass;
 def II_MADDF_D          : InstrItinClass;
 def II_MADDF_S          : InstrItinClass;
+def II_MAX_D            : InstrItinClass;
+def II_MAX_S            : InstrItinClass;
+def II_MAXA_D           : InstrItinClass;
+def II_MAXA_S           : InstrItinClass;
+def II_MIN_D            : InstrItinClass;
+def II_MIN_S            : InstrItinClass;
+def II_MINA_D           : InstrItinClass;
+def II_MINA_S           : InstrItinClass;
 def II_MFC0             : InstrItinClass;
+def II_MFHC0            : InstrItinClass;
 def II_MFC1             : InstrItinClass;
 def II_MFHC1            : InstrItinClass;
 def II_MFC2             : InstrItinClass;
 def II_MFHI_MFLO        : InstrItinClass; // mfhi and mflo
 def II_MOD              : InstrItinClass;
 def II_MODU             : InstrItinClass;
+def II_MOVE             : InstrItinClass;
 def II_MOVF             : InstrItinClass;
 def II_MOVF_D           : InstrItinClass;
 def II_MOVF_S           : InstrItinClass;
@@ -224,6 +245,7 @@ def II_MSUB_S           : InstrItinClass;
 def II_MSUBF_D          : InstrItinClass;
 def II_MSUBF_S          : InstrItinClass;
 def II_MTC0             : InstrItinClass;
+def II_MTHC0            : InstrItinClass;
 def II_MTC1             : InstrItinClass;
 def II_MTHC1            : InstrItinClass;
 def II_MTC2             : InstrItinClass;
@@ -242,14 +264,21 @@ def II_NMADD_S          : InstrItinClass;
 def II_NMSUB_D          : InstrItinClass;
 def II_NMSUB_S          : InstrItinClass;
 def II_NOR              : InstrItinClass;
+def II_NOT              : InstrItinClass;
 def II_OR               : InstrItinClass;
 def II_ORI              : InstrItinClass;
 def II_POP              : InstrItinClass;
 def II_RDHWR            : InstrItinClass;
 def II_RESTORE          : InstrItinClass;
+def II_RECIP_S          : InstrItinClass;
+def II_RECIP_D          : InstrItinClass;
+def II_RINT_S           : InstrItinClass;
+def II_RINT_D           : InstrItinClass;
 def II_ROTR             : InstrItinClass;
 def II_ROTRV            : InstrItinClass;
 def II_ROUND            : InstrItinClass;
+def II_RSQRT_S          : InstrItinClass;
+def II_RSQRT_D          : InstrItinClass;
 def II_SAVE             : InstrItinClass;
 def II_SC               : InstrItinClass;
 def II_SCD              : InstrItinClass;
@@ -265,6 +294,8 @@ def II_SDXC1            : InstrItinClass;
 def II_SEB              : InstrItinClass;
 def II_SEH              : InstrItinClass;
 def II_SELCCZ           : InstrItinClass;
+def II_SELCCZ_D         : InstrItinClass;
+def II_SELCCZ_S         : InstrItinClass;
 def II_SEQ_SNE          : InstrItinClass; // seq and sne
 def II_SEQI_SNEI        : InstrItinClass; // seqi and snei
 def II_SH               : InstrItinClass;
@@ -275,6 +306,8 @@ def II_SLTI_SLTIU       : InstrItinClass; // slti and sltiu
 def II_SLT_SLTU         : InstrItinClass; // slt and sltu
 def II_SQRT_D           : InstrItinClass;
 def II_SQRT_S           : InstrItinClass;
+def II_SEL_D            : InstrItinClass;
+def II_SEL_S            : InstrItinClass;
 def II_SRA              : InstrItinClass;
 def II_SRAV             : InstrItinClass;
 def II_SRL              : InstrItinClass;
@@ -291,6 +324,8 @@ def II_SWC2             : InstrItinClass;
 def II_SWC3             : InstrItinClass;
 def II_SWL              : InstrItinClass;
 def II_SWLE             : InstrItinClass;
+def II_SWM              : InstrItinClass;
+def II_SWP              : InstrItinClass;
 def II_SWR              : InstrItinClass;
 def II_SWRE             : InstrItinClass;
 def II_SWXC1            : InstrItinClass;
@@ -306,6 +341,10 @@ def II_LLE              : InstrItinClass;
 def II_SCE              : InstrItinClass;
 def II_TLBINV           : InstrItinClass;
 def II_TLBINVF          : InstrItinClass;
+def II_WRPGPR           : InstrItinClass;
+def II_RDPGPR           : InstrItinClass;
+def II_DVP              : InstrItinClass;
+def II_EVP              : InstrItinClass;
 
 //===----------------------------------------------------------------------===//
 // Mips Generic instruction itineraries.
@@ -368,6 +407,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_EXT             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_INS             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_LUI             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_MOVE            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_MOVF            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_MOVN            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_MOVN_S          , [InstrStage<1,  [ALU]>]>,
@@ -375,6 +415,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MOVT            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_MOVZ            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_NOR             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_NOT             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_OR              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_POP             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_RDHWR           , [InstrStage<1,  [ALU]>]>,
@@ -392,6 +433,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_LHU             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LHUE            , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LW              , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<II_LWM             , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<II_LWP             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LWPC            , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LWL             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LWLE            , [InstrStage<3,  [ALU]>]>,
@@ -402,6 +445,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_LDL             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LDR             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LDPC            , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<II_LI              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_LL              , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LLD             , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_RESTORE         , [InstrStage<3,  [ALU]>]>,
@@ -409,15 +453,18 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_SH              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SHE             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SW              , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SWM             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SWL             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SWR             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SWP             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SDL             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SDR             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SD              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SC              , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SCD             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SAVE            , [InstrStage<1,  [ALU]>]>,
-  InstrItinData<II_SELCCZ          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SELCCZ_S        , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SELCCZ_D        , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SEQ_SNE         , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SEQI_SNEI       , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SLTI_SLTIU      , [InstrStage<1,  [ALU]>]>,
@@ -431,12 +478,15 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_BC1T            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BC1TL           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BC1CCZ          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_BC2CCZ          , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BCC             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BCCC            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BCCZ            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BCCZAL          , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BCCZALS         , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_BCCZC           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_CLASS_D         , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_CLASS_S         , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_IndirectBranchPseudo, [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_J               , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_JAL             , [InstrStage<1,  [ALU]>]>,
@@ -493,6 +543,14 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MADD            , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_MADDU           , [InstrStage<17, [IMULDIV]>]>,
   InstrItinData<II_MFHI_MFLO       , [InstrStage<1,  [IMULDIV]>]>,
+  InstrItinData<II_MAX_D           , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MAX_S           , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MAXA_D          , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MAXA_S          , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MIN_S           , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MIN_D           , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MINA_S          , [InstrStage<4,  [ALU]>]>,
+  InstrItinData<II_MINA_D          , [InstrStage<4,  [ALU]>]>,
   InstrItinData<II_MOD             , [InstrStage<38, [IMULDIV]>]>,
   InstrItinData<II_MODU            , [InstrStage<38, [IMULDIV]>]>,
   InstrItinData<II_MSUB            , [InstrStage<17, [IMULDIV]>]>,
@@ -521,6 +579,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MOV_S           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_CFC1            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_CTC1            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<II_CFC2            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<II_CTC2            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MOVF_D          , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MOVF_S          , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MOVT_D          , [InstrStage<2,  [ALU]>]>,
@@ -529,6 +589,8 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_MOVZ_S          , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_C_CC_S          , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_C_CC_D          , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<II_CMP_CC_S        , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<II_CMP_CC_D        , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_ADD_D           , [InstrStage<4,  [ALU]>]>,
   InstrItinData<II_ADD_S           , [InstrStage<4,  [ALU]>]>,
   InstrItinData<II_SUB_D           , [InstrStage<4,  [ALU]>]>,
@@ -549,8 +611,16 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_NMSUB_D         , [InstrStage<8,  [ALU]>]>,
   InstrItinData<II_DIV_S           , [InstrStage<23, [ALU]>]>,
   InstrItinData<II_DIV_D           , [InstrStage<36, [ALU]>]>,
+  InstrItinData<II_RECIP_D         , [InstrStage<25, [ALU]>]>,
+  InstrItinData<II_RECIP_S         , [InstrStage<13, [ALU]>]>,
+  InstrItinData<II_RSQRT_D         , [InstrStage<29, [ALU]>]>,
+  InstrItinData<II_RSQRT_S         , [InstrStage<14, [ALU]>]>,
+  InstrItinData<II_RINT_D          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_RINT_S          , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SQRT_S          , [InstrStage<54, [ALU]>]>,
   InstrItinData<II_SQRT_D          , [InstrStage<12, [ALU]>]>,
+  InstrItinData<II_SEL_D           , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_SEL_S           , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_WSBH            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_LSA             , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_DLSA            , [InstrStage<1,  [ALU]>]>,
@@ -563,6 +633,7 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_LDXC1           , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LWXC1           , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_LUXC1           , [InstrStage<3,  [ALU]>]>,
+  InstrItinData<II_LWXS            , [InstrStage<3,  [ALU]>]>,
   InstrItinData<II_SDC1            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SDC2            , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_SDC3            , [InstrStage<1,  [ALU]>]>,
@@ -579,9 +650,11 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_DMTC1           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_DMTC2           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFC0            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<II_MFHC0           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFC1            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFC2            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTC0            , [InstrStage<2,  [ALU]>]>,
+  InstrItinData<II_MTHC0           , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTC1            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MTC2            , [InstrStage<2,  [ALU]>]>,
   InstrItinData<II_MFHC1           , [InstrStage<2,  [ALU]>]>,
@@ -593,7 +666,9 @@ def MipsGenericItineraries : ProcessorItineraries<[ALU, IMULDIV], [], [
   InstrItinData<II_TLBINV          , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_TLBINVF         , [InstrStage<1,  [ALU]>]>,
   InstrItinData<II_LLE             , [InstrStage<3,  [ALU]>]>,
-  InstrItinData<II_SCE             , [InstrStage<1,  [ALU]>]>
+  InstrItinData<II_SCE             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_WRPGPR          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_RDPGPR          , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_DVP             , [InstrStage<1,  [ALU]>]>,
+  InstrItinData<II_EVP             , [InstrStage<1,  [ALU]>]>
 ]>;
-
-include "MipsScheduleP5600.td"
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
new file mode 100644
index 000000000000..15a0401b781e
--- /dev/null
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleGeneric.td
@@ -0,0 +1,1048 @@
+//=- MipsScheduleGeneric.td - Generic Scheduling Definitions -*- tablegen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the interAptiv processor in a manner of speaking. It
+// describes a hypothetical version of the in-order MIPS32R2 interAptiv with all
+// branches of the MIPS ISAs, ASEs and ISA variants. The itinerary lists are
+// broken down into per ISA lists, so that this file can be used to rapidly
+// develop new schedule models.
+//
+//===----------------------------------------------------------------------===//
+def MipsGenericModel : SchedMachineModel {
+  int IssueWidth = 1;
+  int MicroOpBufferSize = 0;
+
+  // These figures assume an L1 hit.
+  int LoadLatency = 2;
+  int MispredictPenalty = 4;
+
+  int HighLatency = 37;
+  list<Predicate> UnsupportedFeatures = [];
+
+  let CompleteModel = 1;
+  let PostRAScheduler = 1;
+}
+
+let SchedModel = MipsGenericModel in {
+
+// ALU Pipeline
+// ============
+
+def GenericALU : ProcResource<1> { let BufferSize = 1; }
+def GenericIssueALU : ProcResource<1> { let Super = GenericALU; }
+
+def GenericWriteALU : SchedWriteRes<[GenericIssueALU]>;
+
+// and, lui, nor, or, slti, sltiu, sub, subu, xor
+// add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
+// xori
+def : ItinRW<[GenericWriteALU], [II_ADD, II_ADDU, II_ADDI, II_ADDIU, II_ANDI,
+                                 II_AND, II_ANDI, II_CLO, II_CLZ, II_EXT,
+                                 II_INS, II_LUI, II_MULT, II_MULTU, II_NOR,
+                                 II_ORI, II_OR, II_ROTR, II_ROTRV, II_SEB,
+                                 II_SEH, II_SLTI_SLTIU, II_SLT_SLTU, II_SLL,
+                                 II_SRA, II_SRL, II_SLLV, II_SRAV, II_SRLV,
+                                 II_SSNOP, II_SUB, II_SUBU, II_WSBH, II_XOR,
+                                 II_XORI]>;
+
+def : InstRW<[GenericWriteALU], (instrs COPY)>;
+
+def GenericMDU : ProcResource<1> { let BufferSize = 1; }
+def GenericIssueMDU : ProcResource<1> { let Super = GenericALU; }
+def GenericIssueDIV : ProcResource<1> { let Super = GenericMDU; }
+def GenericWriteHILO : SchedWriteRes<[GenericIssueMDU]>;
+def GenericWriteALULong : SchedWriteRes<[GenericIssueALU]> { let Latency = 5; }
+def GenericWriteMove : SchedWriteRes<[GenericIssueALU]> { let Latency = 2; }
+
+def : ItinRW<[GenericWriteHILO], [II_MADD, II_MADDU, II_MSUB, II_MSUBU]>;
+
+def GenericWriteMDUtoGPR : SchedWriteRes<[GenericIssueMDU]> {
+  let Latency = 5;
+}
+
+def : ItinRW<[GenericWriteMDUtoGPR], [II_MUL]>;
+
+def GenericWriteDIV : SchedWriteRes<[GenericIssueDIV]> {
+  // Estimated worst case
+  let Latency = 33;
+  let ResourceCycles = [1, 33];
+}
+def GenericWriteDIVU : SchedWriteRes<[GenericIssueDIV]> {
+  // Estimated worst case
+  let Latency = 31;
+  let ResourceCycles = [1, 31];
+}
+
+def : ItinRW<[GenericWriteDIV], [II_DIV]>;
+
+def : ItinRW<[GenericWriteDIVU], [II_DIVU]>;
+
+// MIPS64
+// ======
+
+def : ItinRW<[GenericWriteALU], [II_DADDIU, II_DADDU, II_DADDI, II_DADD,
+                                 II_DCLO, II_DCLZ, II_DROTR, II_DROTR32,
+                                 II_DROTRV, II_DSBH, II_DSHD, II_DSLL,
+                                 II_DSLL32, II_DSLLV, II_DSRA, II_DSRA32,
+                                 II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV,
+                                 II_DSUBU, II_DSUB]>;
+
+def : ItinRW<[GenericWriteDIV], [II_DDIV]>;
+
+def : ItinRW<[GenericWriteDIVU], [II_DDIVU]>;
+
+def : ItinRW<[GenericWriteMDUtoGPR], [II_DMUL]>;
+
+def : ItinRW<[GenericWriteHILO], [II_DMULU, II_DMULT, II_DMULTU]>;
+
+// MIPS16e
+// =======
+
+def : ItinRW<[GenericWriteALU], [IIM16Alu, IIPseudo]>;
+
+// microMIPS
+// =========
+
+def : ItinRW<[GenericWriteALU], [II_MOVE, II_LI, II_NOT]>;
+
+// MIPSR6
+// ======
+
+def GenericWriteMul : SchedWriteRes<[GenericIssueMDU]> { let Latency = 4; }
+def : ItinRW<[GenericWriteMul], [II_MUH, II_MUHU, II_MULU]>;
+
+def : ItinRW<[GenericWriteDIV], [II_MOD, II_MODU]>;
+
+def : ItinRW<[GenericWriteALU], [II_ADDIUPC, II_ALIGN, II_ALUIPC, II_AUI,
+                                 II_AUIPC, II_BITSWAP, II_LSA, II_SELCCZ]>;
+
+// MIPS64R6
+// ========
+
+def : ItinRW<[GenericWriteALU], [II_DALIGN, II_DAHI, II_DATI, II_DAUI,
+                               II_DBITSWAP, II_DLSA]>;
+
+def : ItinRW<[GenericWriteMDUtoGPR], [II_DMUH, II_DMUHU]>;
+def : ItinRW<[GenericWriteDIV], [II_DMOD, II_DMODU]>;
+
+// clo, clz, di, mfhi, mflo
+def : ItinRW<[GenericWriteALULong], [II_MFHI_MFLO]>;
+def : ItinRW<[GenericWriteALU], [II_MOVN, II_MOVZ]>;
+def : ItinRW<[GenericWriteMove], [II_MTHI_MTLO, II_RDHWR]>;
+
+
+// CTISTD Pipeline
+// ---------------
+
+def GenericIssueCTISTD : ProcResource<1> { let Super = GenericALU; }
+
+def GenericLDST : ProcResource<1> { let BufferSize = 1; }
+def GenericIssueLDST : ProcResource<1> { let Super = GenericLDST; }
+
+def GenericWriteJump : SchedWriteRes<[GenericIssueCTISTD]>;
+def GenericWriteJumpAndLink : SchedWriteRes<[GenericIssueCTISTD]> {
+  let Latency = 2;
+}
+
+// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
+// jalr, jr.hb, jr, jalr.hb, jarlc, jialc
+def : ItinRW<[GenericWriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J,
+                                  II_JR, II_JR_HB, II_ERET, II_ERETNC,
+                                  II_DERET]>;
+
+def : ItinRW<[GenericWriteJumpAndLink], [II_JAL, II_JALR, II_JALR_HB,
+                                         II_BC2CCZ]>;
+
+def : ItinRW<[GenericWriteJump], [II_JRC, II_JRADDIUSP]>;
+
+def : ItinRW<[GenericWriteJumpAndLink], [II_BCCZALS, II_JALS, II_JALRS]>;
+
+// MIPSR6
+// ======
+
+def : ItinRW<[GenericWriteJumpAndLink], [II_BALC, II_JALRC, II_JIALC]>;
+
+def : ItinRW<[GenericWriteJump], [II_JIC, II_BC, II_BCCC, II_BCCZC]>;
+
+
+def GenericWriteTrap : SchedWriteRes<[GenericIssueCTISTD]>;
+
+def : ItinRW<[GenericWriteTrap], [II_BREAK, II_SYSCALL, II_TEQ, II_TEQI,
+                                  II_TGE, II_TGEI, II_TGEIU, II_TGEU, II_TNE,
+                                  II_TNEI, II_TLT, II_TLTI, II_TLTU, II_TTLTIU,
+                                  II_TRAP, II_SDBBP]>;
+
+// COP0 Pipeline
+// =============
+
+def GenericCOP0 : ProcResource<1> { let BufferSize = 1; }
+
+def GenericIssueCOP0 : ProcResource<1> { let Super = GenericCOP0; }
+def GenericWriteCOP0TLB : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 4; }
+def GenericWriteCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 3; }
+def GenericReadCOP0 : SchedWriteRes<[GenericIssueCOP0]> { let Latency = 2; }
+def GnereicReadWritePGPR : SchedWriteRes<[GenericIssueCOP0]>;
+
+def : ItinRW<[GenericWriteCOP0TLB], [II_TLBP, II_TLBR, II_TLBWI, II_TLBWR]>;
+def : ItinRW<[GenericWriteCOP0TLB], [II_TLBINV, II_TLBINVF]>;
+
+def : ItinRW<[GenericReadCOP0], [II_MFC0]>;
+def : ItinRW<[GenericWriteCOP0], [II_MTC0]>;
+
+def : ItinRW<[GenericWriteCOP0], [II_EVP, II_DVP]>;
+
+// MIPSR5
+// ======
+def : ItinRW<[GenericReadCOP0], [II_MFHC0]>;
+def : ItinRW<[GenericWriteCOP0], [II_MTHC0]>;
+
+// MIPS64
+// ======
+
+def : ItinRW<[GenericReadCOP0], [II_DMFC0]>;
+def : ItinRW<[GenericWriteCOP0], [II_DMTC0]>;
+
+def : ItinRW<[GenericWriteCOP0], [II_RDPGPR, II_WRPGPR]>;
+
+def : ItinRW<[GenericWriteCOP0], [II_DI, II_EI]>;
+
+def : ItinRW<[GenericWriteCOP0], [II_EHB, II_PAUSE, II_WAIT]>;
+
+def GenericCOP2 : ProcResource<1> { let BufferSize = 1; }
+def GenericWriteCOPOther : SchedWriteRes<[GenericCOP2]>;
+
+def : ItinRW<[GenericWriteCOPOther], [II_MFC2, II_MTC2, II_DMFC2, II_DMTC2]>;
+
+// LDST Pipeline
+// -------------
+
+def GenericWriteLoad : SchedWriteRes<[GenericIssueLDST]> {
+  let Latency = 2;
+}
+
+def GenericWritePref : SchedWriteRes<[GenericIssueLDST]>;
+def GenericWriteSync : SchedWriteRes<[GenericIssueLDST]>;
+def GenericWriteCache : SchedWriteRes<[GenericIssueLDST]> { let Latency = 5; }
+
+def GenericWriteStore : SchedWriteRes<[GenericIssueLDST]>;
+def GenericWriteStoreSC : SchedWriteRes<[GenericIssueLDST]> { let Latency = 2; }
+
+def GenericWriteGPRFromBypass : SchedWriteRes<[GenericIssueLDST]> {
+  let Latency = 2;
+}
+
+def GenericWriteStoreFromOtherUnits : SchedWriteRes<[GenericIssueLDST]>;
+def GenericWriteLoadToOtherUnits : SchedWriteRes<[GenericIssueLDST]> {
+  let Latency = 0;
+}
+
+// l[bhw], l[bh]u, ll
+def : ItinRW<[GenericWriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LL,
+                                  II_LWC2, II_LWC3, II_LDC2, II_LDC3]>;
+
+// lw[lr]
+def : ItinRW<[GenericWriteLoad], [II_LWL, II_LWR]>;
+
+// MIPS64 loads
+def : ItinRW<[GenericWriteLoad], [II_LD, II_LLD, II_LWU]>;
+
+// ld[lr]
+def : ItinRW<[GenericWriteLoad], [II_LDL, II_LDR]>;
+
+// MIPS32 EVA
+def : ItinRW<[GenericWriteLoad], [II_LBE, II_LBUE, II_LHE, II_LHUE, II_LWE,
+                                  II_LLE]>;
+
+def : ItinRW<[GenericWriteLoad], [II_LWLE, II_LWRE]>;
+
+// MIPS32R6 and MIPS16e
+// ====================
+
+def : ItinRW<[GenericWriteLoad], [II_LWPC]>;
+
+// MIPS64R6
+// ====================
+
+def : ItinRW<[GenericWriteLoad], [II_LWUPC, II_LDPC]>;
+
+
+// s[bhw], sc, s[dw]c[23]
+def : ItinRW<[GenericWriteStore], [II_SB, II_SH, II_SW, II_SWC2, II_SWC3,
+                                   II_SDC2, II_SDC3]>;
+
+def : ItinRW<[GenericWriteStoreSC], [II_SC]>;
+
+// PreMIPSR6 sw[lr]
+def : ItinRW<[GenericWriteStore], [II_SWL, II_SWR]>;
+
+// EVA ASE stores
+def : ItinRW<[GenericWriteStore], [II_SBE, II_SHE, II_SWE, II_SCE]>;
+
+def : ItinRW<[GenericWriteStore], [II_SWLE, II_SWRE]>;
+
+// MIPS64
+// ======
+
+def : ItinRW<[GenericWriteStore], [II_SD, II_SCD]>;
+
+// PreMIPSR6 stores
+// ================
+
+def : ItinRW<[GenericWriteStore], [II_SDL, II_SDR]>;
+
+// MIPS16e
+// =======
+
+def : ItinRW<[GenericWriteLoad], [II_RESTORE]>;
+
+def : ItinRW<[GenericWriteStore], [II_SAVE]>;
+
+// microMIPS
+// =========
+
+def : ItinRW<[GenericWriteLoad], [II_LWM, II_LWP, II_LWXS]>;
+
+def : ItinRW<[GenericWriteStore], [II_SWM, II_SWP]>;
+
+// pref
+def : ItinRW<[GenericWritePref], [II_PREF]>;
+
+def : ItinRW<[GenericWritePref], [II_PREFE]>;
+
+// cache
+def : ItinRW<[GenericWriteCache], [II_CACHE]>;
+
+def : ItinRW<[GenericWriteCache], [II_CACHEE]>;
+
+// sync
+def : ItinRW<[GenericWriteSync], [II_SYNC]>;
+
+def : ItinRW<[GenericWriteSync], [II_SYNCI]>;
+
+// FPU Pipelines
+// =============
+
+def GenericFPQ : ProcResource<1> { let BufferSize = 1; }
+def GenericIssueFPUS : ProcResource<1> { let Super = GenericFPQ; }
+def GenericIssueFPUL : ProcResource<1> { let Super = GenericFPQ; }
+def GenericIssueFPULoad : ProcResource<1> { let Super = GenericFPQ; }
+def GenericIssueFPUStore : ProcResource<1> { let Super = GenericFPQ; }
+def GenericIssueFPUMove : ProcResource<1> { let Super = GenericFPQ; }
+def GenericFPUDivSqrt : ProcResource<1> { let Super = GenericFPQ; }
+
+// The floating point compare of the 24k series including interAptiv has a
+// listed latency of 1-2. Using the higher latency here.
+
+def GenericWriteFPUCmp : SchedWriteRes<[GenericIssueFPUS]> { let Latency = 2; }
+def GenericWriteFPUS : SchedWriteRes<[GenericIssueFPUS]> { let Latency = 4; }
+def GenericWriteFPUL : SchedWriteRes<[GenericIssueFPUL]> { let Latency = 5; }
+def GenericWriteFPUStore : SchedWriteRes<[GenericIssueFPUStore]> { let
+  Latency = 1;
+}
+def GenericWriteFPULoad : SchedWriteRes<[GenericIssueFPULoad]> {
+  let Latency = 2;
+}
+def GenericWriteFPUMoveFP : SchedWriteRes<[GenericIssueFPUMove]> {
+  let Latency = 4;
+}
+def GenericWriteFPUMoveGPRFPU : SchedWriteRes<[GenericIssueFPUMove]> {
+  let Latency = 2;
+}
+def GenericWriteFPUDivS : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 17;
+  let ResourceCycles = [ 14 ];
+}
+def GenericWriteFPUDivD : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 32;
+  let ResourceCycles = [ 29 ];
+}
+def GenericWriteFPURcpS : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 13;
+  let ResourceCycles = [ 10 ];
+}
+def GenericWriteFPURcpD : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 25;
+  let ResourceCycles = [ 21 ];
+}
+def GenericWriteFPURsqrtS : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 17;
+  let ResourceCycles = [ 14 ];
+}
+def GenericWriteFPURsqrtD : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 32;
+  let ResourceCycles = [ 29 ];
+}
+def GenericWriteFPUSqrtS : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 17;
+  let ResourceCycles = [ 14 ];
+}
+def GenericWriteFPUSqrtD : SchedWriteRes<[GenericFPUDivSqrt]> {
+  let Latency = 29;
+  let ResourceCycles = [ 29 ];
+}
+
+// Floating point compare and branch
+// ---------------------------------
+//
+// c.<cc>.[ds], bc1[tf], bc1[tf]l
+def : ItinRW<[GenericWriteFPUCmp], [II_C_CC_D, II_C_CC_S, II_BC1F, II_BC1T,
+                                    II_BC1FL, II_BC1TL]>;
+
+def : ItinRW<[GenericWriteFPUCmp], [II_CMP_CC_D, II_CMP_CC_S]>;
+
+// Short Pipe
+// ----------
+//
+// abs.[ds], abs.ps, add.[ds], neg.[ds], neg.ps, madd.s, msub.s, nmadd,s
+// nmsub.s, sub.[ds], mul.s
+
+def : ItinRW<[GenericWriteFPUS], [II_ABS, II_ADD_D, II_ADD_S, II_MADD_S,
+                                  II_MSUB_S, II_MUL_S, II_NEG, II_NMADD_S,
+                                  II_NMSUB_S, II_SUB_S, II_SUB_D]>;
+// mov[tf].[ds]
+
+def : ItinRW<[GenericWriteFPUS], [II_MOVF_S, II_MOVF_D, II_MOVT_S, II_MOVT_D]>;
+
+// MIPSR6
+// ------
+//
+// sel(eq|ne).[ds], max.[ds], maxa.[ds], min.[ds], mina.[ds], class.[ds]
+def : ItinRW<[GenericWriteFPUS], [II_SELCCZ_S, II_SELCCZ_D, II_MAX_S,
+                                  II_MAX_D, II_MAXA_S, II_MAXA_D, II_MIN_S,
+                                  II_MIN_D, II_MINA_S, II_MINA_D, II_CLASS_S,
+                                  II_CLASS_D]>;
+
+// Long Pipe
+// ----------
+//
+// nmadd.d, nmsub.d, mul.[ds], mul.ps, ceil.[wl].[sd], cvt.d.[sw], cvt.s.[dw],
+// cvt.w.[sd], cvt.[sw].ps, trunc.w.[ds], trunc.w.ps, floor.[ds],
+// round.[lw].[ds], floor.[lw].ds
+
+// madd.d, msub.dm mul.d, mul.ps, nmadd.d, nmsub.d, ceil.[wl].[sd], cvt.d.[sw],
+// cvt.s.[dw], cvt.w.[sd], cvt.[sw].ps, round.[lw].[ds], floor.[lw].ds,
+// trunc.w.[ds], trunc.w.ps,
+def : ItinRW<[GenericWriteFPUL], [II_MADD_D, II_MSUB_D, II_MUL_D, II_NMADD_D,
+                                  II_NMSUB_D, II_CEIL, II_CVT,
+                                  II_FLOOR, II_ROUND, II_TRUNC]>;
+
+// div.[ds], div.ps
+def : ItinRW<[GenericWriteFPUDivS], [II_DIV_S]>;
+def : ItinRW<[GenericWriteFPUDivD], [II_DIV_D]>;
+
+// sqrt.[ds], sqrt.ps
+def : ItinRW<[GenericWriteFPUSqrtS], [II_SQRT_S]>;
+def : ItinRW<[GenericWriteFPUSqrtD], [II_SQRT_D]>;
+
+// rsqrt.[ds], recip.[ds]
+def : ItinRW<[GenericWriteFPURcpS], [II_RECIP_S, II_RSQRT_S]>;
+def : ItinRW<[GenericWriteFPURcpD], [II_RECIP_D, II_RSQRT_D]>;
+
+// MIPSR6
+// ======
+//
+// rint.[ds]
+def : ItinRW<[GenericWriteFPUL], [II_RINT_S, II_RINT_D]>;
+
+// Load Pipe
+// ---------
+
+// ctc1, mtc1, mthc1, cfc1, mfc1, mfhc1
+def : ItinRW<[GenericWriteFPUMoveGPRFPU], [II_CFC1, II_CTC1, II_MFC1, II_MFHC1,
+                                           II_MTC1, II_MTHC1]>;
+
+// swc1, swxc1
+def : ItinRW<[GenericWriteFPUStore], [II_SDC1, II_SDXC1, II_SUXC1, II_SWC1,
+                                      II_SWXC1]>;
+
+// movn.[ds], movz.[ds]
+def : ItinRW<[GenericWriteFPUMoveFP], [II_MOV_D, II_MOV_S, II_MOVF, II_MOVT,
+                                       II_MOVN_D, II_MOVN_S, II_MOVZ_D,
+                                       II_MOVZ_S]>;
+
+// l[dw]x?c1
+def : ItinRW<[GenericWriteFPULoad], [II_LDC1, II_LDXC1, II_LUXC1, II_LWC1,
+                                     II_LWXC1]>;
+
+// MIPS64
+// ======
+
+def : ItinRW<[GenericWriteFPUMoveGPRFPU], [II_DMFC1, II_DMTC1]>;
+
+// MIPSR6
+// ======
+
+def : ItinRW<[GenericWriteFPUS], [II_MADDF_S, II_MSUBF_S]>;
+
+def : ItinRW<[GenericWriteFPUS], [II_MADDF_D, II_MSUBF_D]>;
+
+def : ItinRW<[GenericWriteFPUCmp], [II_BC1CCZ, II_SEL_D, II_SEL_S]>;
+
+// Cavium Networks MIPS (cnMIPS) - Octeon, HasCnMips
+// =================================================
+
+def : ItinRW<[GenericWriteALU], [II_SEQ_SNE, II_SEQI_SNEI, II_POP, II_BADDU,
+                                 II_BBIT]>;
+
+// MIPS DSP ASE, HasDSP
+// ====================
+
+def GenericDSP : ProcResource<1> { let BufferSize = 1; }
+def GenericDSPShort : SchedWriteRes<[GenericDSP]> { let Latency = 2; }
+def GenericDSPLong : SchedWriteRes<[GenericDSP]> { let Latency = 6; }
+def GenericDSPBypass : SchedWriteRes<[GenericDSP]> { let Latency = 1; }
+def GenericDSPMTHILO : SchedWriteRes<[GenericDSP]> { let Latency = 5; }
+def GenericDSPLoad : SchedWriteRes<[GenericDSP]> { let Latency = 4; }
+def GenericDSPMTHLIP : SchedWriteRes<[GenericDSP]> { let Latency = 5; }
+
+def : InstRW<[GenericDSPLong], (instregex "^EXTRV_RS_W$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTRV_R_W$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTRV_S_H$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTRV_W$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTR_RS_W$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTR_R_W$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTR_S_H$")>;
+def : InstRW<[GenericDSPLong], (instregex "^EXTR_W$")>;
+def : InstRW<[GenericDSPLong], (instregex "^INSV$")>;
+
+def : InstRW<[GenericDSPMTHLIP], (instregex "^MTHLIP$")>;
+def : InstRW<[GenericDSPMTHILO], (instregex "^MTHI_DSP$")>;
+def : InstRW<[GenericDSPMTHILO], (instregex "^MTLO_DSP$")>;
+
+def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQ_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQ_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQ_S_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDSC$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_S_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDWC$")>;
+def : InstRW<[GenericDSPShort], (instregex "^BITREV$")>;
+def : InstRW<[GenericDSPShort], (instregex "^BPOSGE32$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGU_EQ_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGU_LE_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGU_LT_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPU_EQ_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPU_LE_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPU_LT_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMP_EQ_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMP_LE_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMP_LT_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQ_SA_L_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQ_S_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAU_H_QBL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAU_H_QBR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQ_SA_L_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQ_S_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSU_H_QBL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSU_H_QBR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTPDPV$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTPDP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTPV$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^LBUX$")>;
+def : InstRW<[GenericDSPShort], (instregex "^LHX$")>;
+def : InstRW<[GenericDSPShort], (instregex "^LWX$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MADDU_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MADD_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_SA_W_PHL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_SA_W_PHR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_S_W_PHL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_S_W_PHR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MFHI_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MFLO_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MODSUB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MSUBU_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MSUB_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEQ_S_W_PHL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEQ_S_W_PHR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEU_S_PH_QBL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEU_S_PH_QBR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_RS_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULSAQ_S_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULTU_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULT_DSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PACKRL_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PICK_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PICK_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBLA$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBRA$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQ_W_PHL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQ_W_PHR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBLA$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBL$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBRA$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBR$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQU_S_QB_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQ_PH_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQ_QB_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQ_RS_PH_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^RADDU_W_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^RDDSP$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPLV_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPLV_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPL_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPL_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHILOV$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHILO$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_S_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_S_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_R_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_R_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_R_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_R_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRLV_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRL_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQ_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQ_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQ_S_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_S_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^WRDSP$")>;
+
+// MIPS DSP R2 - hasDSP, HasDSPR2, InMicroMips
+// ===========================================
+
+def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_R_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_R_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDUH_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDUH_R_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^APPEND$")>;
+def : InstRW<[GenericDSPShort], (instregex "^BALIGN$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGDU_EQ_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGDU_LE_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGDU_LT_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPA_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQX_SA_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQX_S_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAX_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPS_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQX_S_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQX_SA_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSX_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MUL_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MUL_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_RS_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_S_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULSA_W_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECR_QB_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECR_SRA_PH_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECR_SRA_R_PH_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PREPEND$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_R_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_R_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRL_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRLV_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_R_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_R_W$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_S_PH$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBUH_QB$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBUH_R_QB$")>;
+
+// microMIPS DSP R1 - HasDSP, InMicroMips
+// ======================================
+
+def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQ_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQ_S_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQ_S_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDSC_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_S_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDWC_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^BITREV_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^BPOSGE32_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGU_EQ_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGU_LE_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGU_LT_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPU_EQ_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPU_LE_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPU_LT_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMP_EQ_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMP_LE_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMP_LT_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQ_SA_L_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQ_S_W_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAU_H_QBL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAU_H_QBR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQ_SA_L_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQ_S_W_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSU_H_QBL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSU_H_QBR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTPDPV_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTPDP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTPV_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTRV_RS_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTRV_R_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTRV_S_H_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTRV_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTR_RS_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTR_R_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTR_S_H_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^EXTR_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^INSV_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^LBUX_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^LHX_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^LWX_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MADDU_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MADD_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_SA_W_PHL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_SA_W_PHR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_S_W_PHL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MAQ_S_W_PHR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MFHI_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MFLO_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MODSUB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MOVEP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MOVN_I_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MOVZ_I_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MSUBU_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MSUB_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MTHI_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MTHLIP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MTLO_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEQ_S_W_PHL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEQ_S_W_PHR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEU_S_PH_QBL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULEU_S_PH_QBR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_RS_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULSAQ_S_W_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULTU_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULT_DSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PACKRL_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PICK_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PICK_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBLA_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBRA_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQU_PH_QBR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQ_W_PHL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEQ_W_PHR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBLA_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBL_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBRA_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECEU_PH_QBR_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQU_S_QB_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQ_PH_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQ_QB_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECRQ_RS_PH_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^RADDU_W_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^RDDSP_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPLV_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPLV_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPL_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^REPL_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHILOV_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHILO_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_S_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLLV_S_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_S_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHLL_S_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_R_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_R_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_R_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_R_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRLV_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRL_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQ_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQ_S_PH_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQ_S_W_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_S_QB_MM$")>;
+def : InstRW<[GenericDSPShort], (instregex "^WRDSP_MM$")>;
+
+
+// microMIPS DSP R2 - hasDSP, HasDSPR2, InMicroMips
+// ================================================
+
+def : InstRW<[GenericDSPShort], (instregex "^ABSQ_S_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_R_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_R_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDQH_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDUH_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDUH_R_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^ADDU_S_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^APPEND_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^BALIGN_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGDU_EQ_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGDU_LE_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^CMPGDU_LT_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPA_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQX_SA_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAQX_S_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPAX_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPS_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQX_S_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSQX_SA_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^DPSX_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MUL_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MUL_S_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_RS_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_S_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULQ_S_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^MULSA_W_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECR_QB_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECR_SRA_PH_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PRECR_SRA_R_PH_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^PREPEND_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRA_R_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRAV_R_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRL_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SHRLV_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_R_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBQH_R_W_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBU_S_PH_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBUH_QB_MMR2$")>;
+def : InstRW<[GenericDSPShort], (instregex "^SUBUH_R_QB_MMR2$")>;
+
+// microMIPS DSP R3 - hasDSP, hasDSPR2, hasDSPR3, InMicroMips
+// ==========================================================
+
+def : InstRW<[GenericDSPShort], (instregex "^BPOSGE32C_MMR3$")>;
+
+// MIPS MSA ASE - hasMSA
+// =====================
+
+def GenericWriteMSAShortLogic : SchedWriteRes<[GenericIssueFPUS]>;
+def GenericWriteMSAShortInt : SchedWriteRes<[GenericIssueFPUS]> {
+let Latency = 2;
+}
+def GenericWriteMoveOtherUnitsToFPU : SchedWriteRes<[GenericIssueFPUS]>;
+def GenericWriteMSAOther3 : SchedWriteRes<[GenericIssueFPUS]> {
+let Latency = 3;
+}
+def GenericWriteMSALongInt : SchedWriteRes<[GenericIssueFPUS]> {
+let Latency = 5;
+}
+def GenericWriteFPUDivI : SchedWriteRes<[GenericFPQ]> {
+  let Latency = 33;
+  let ResourceCycles = [ 33 ];
+}
+
+// FPUS is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def GenericWriteMoveFPUSToOtherUnits : SchedWriteRes<[GenericIssueFPUS]> {
+  let Latency = 0;
+}
+
+// FPUL is also used in moves from floating point and MSA registers to general
+// purpose registers.
+def GenericWriteMoveFPULToOtherUnits : SchedWriteRes<[GenericIssueFPUL]>;
+
+
+// adds_a.[bhwd], adds_[asu].[bhwd], addvi?.[bhwd], asub_[us].[bhwd],
+// aver?_[us].[bhwd]
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^ADD_A_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^ADDS_[ASU]_[BHWD]$")>;
+
+// TODO: ADDVI_[BHW] might be 1 cycle latency rather than 2. Need to confirm it.
+// add.[bhwd], addvi.[bhwd], asub_[us].[bhwd], ave.[bhwd], aver.[bhwd]
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
+
+// and.v, andi.b, move.v, ldi.[bhwd], xor.v, nor.v, xori.b, nori.b
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^MOVE_V$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
+
+// vshf.[bhwd], binsl.[bhwd], binsr.[bhwd], insert.[bhwd], sld?.[bhwd],
+// bset.[bhwd], bclr.[bhwd], bneg.[bhwd], bsel_v, bseli_b
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^VSHF_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BINSL|BINSLI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BINSR|BINSRI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^INSERT_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(SLD|SLDI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BSET|BSETI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+
+// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
+def : InstRW<[GenericWriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
+
+// bnz.[bhwdv], cfcmsa, ctcmsa
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(BNZ|BZ)_[BHWDV]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^C(F|T)CMSA$")>;
+
+// shf.[bhw], fill[bhwd], splat?.[bhwd]
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^SHF_[BHW]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^FILL_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^(SPLAT|SPLATI)_[BHWD]$")>;
+
+// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
+def : InstRW<[GenericWriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
+
+// fexp2_w, fexp2_d
+def : InstRW<[GenericWriteFPUS], (instregex "^FEXP2_(W|D)$")>;
+
+// compare, converts, round to int, floating point truncate.
+def : InstRW<[GenericWriteFPUS], (instregex "^(CLT|CLTI)_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^(CLE|CLEI)_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^(CEQ|CEQI)_[BHWD]$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_UN_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_UEQ_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_EQ_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_LT_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_ULT_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_LE_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^CMP_ULE_(S|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FS(AF|EQ|LT|LE|NE|OR)_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FSUEQ_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FSULE_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FSULT_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FSUNE_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FSUN_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCAF_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCEQ_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCLE_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCLT_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCNE_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCOR_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCUEQ_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCULE_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCULT_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCUNE_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FCUN_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FABS_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FFINT_(U|S)_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FFQL_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FFQR_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FTINT_(U|S)_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FRINT_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FTQ_(H|W)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FTRUNC_(U|S)_(W|D)$")>;
+
+// fexdo.[hw], fexupl.[wd], fexupr.[wd]
+def : InstRW<[GenericWriteFPUS], (instregex "^FEXDO_(H|W)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FEXUPL_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FEXUPR_(W|D)$")>;
+
+// fclass.[wd], fmax.[wd], fmax_a.[wd], fmin.[wd], fmin_a.[wd], flog2.[wd]
+def : InstRW<[GenericWriteFPUS], (instregex "^FCLASS_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FMAX_A_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FMAX_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FMIN_A_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FMIN_(W|D)$")>;
+def : InstRW<[GenericWriteFPUS], (instregex "^FLOG2_(W|D)$")>;
+
+// interleave right/left, interleave even/odd, insert
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(ILVR|ILVL)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(ILVEV|ILVOD)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
+
+// subs_?.[bhwd], subsus_?.[bhwd], subsuu_?.[bhwd], subvi.[bhwd], subv.[bhwd],
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBS_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBSUS_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBSUU_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBVI_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortInt], (instregex "^SUBV_[BHWD]$")>;
+
+// mod_[su].[bhwd], div_[su].[bhwd]
+def : InstRW<[GenericWriteFPUDivI], (instregex "^MOD_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteFPUDivI], (instregex "^DIV_(S|U)_[BHWD]$")>;
+
+// hadd_[su].[bhwd], hsub_[su].[bhwd], max_[sua].[bhwd], min_[sua].[bhwd],
+// maxi_[su].[bhwd], mini_[su].[bhwd], sra?.[bhwd], srar?.[bhwd], srlr.[bhwd],
+// sll?.[bhwd], pckev.[bhwd], pckod.[bhwd], nloc.[bhwd], nlzc.[bhwd],
+// insve.[bhwd]
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^HADD_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^HSUB_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(MAX|MIN)_S_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(MAX|MIN)_U_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(MAX|MIN)_A_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic],
+             (instregex "^(MAXI|MINI)_(S|U)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SRA|SRAI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SRL|SRLI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SRAR|SRARI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SRLR|SRLRI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(SLL|SLLI)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(PCKEV|PCKOD)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
+def : InstRW<[GenericWriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
+
+// dpadd_?.[bhwd], dpsub_?.[bhwd], dotp_?.[bhwd], msubv.[bhwd], maddv.[bhwd]
+// mulv.[bhwd].
+def : InstRW<[GenericWriteMSALongInt], (instregex "^DPADD_(S|U)_[HWD]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^DPSUB_(S|U)_[HWD]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^DOTP_(S|U)_[HWD]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MSUBV_[BHWD]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MADDV_[BHWD]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MULV_[BHWD]$")>;
+
+// madd?.q.[hw], msub?.q.[hw], mul?.q.[hw]
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MADDR_Q_[HW]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MADD_Q_[HW]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MSUBR_Q_[HW]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MSUB_Q_[HW]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MULR_Q_[HW]$")>;
+def : InstRW<[GenericWriteMSALongInt], (instregex "^MUL_Q_[HW]$")>;
+
+// fadd.[dw], fmadd.[dw], fmul.[dw], frcp.[dw], frsqrt.[dw], fsqrt.[dw]
+// fsub.[dw], fdiv.[dw]
+def : InstRW<[GenericWriteFPUL], (instregex "^FADD_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FMADD_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FMSUB_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FMUL_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FRCP_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FRSQRT_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FSQRT_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FSUB_[DW]$")>;
+def : InstRW<[GenericWriteFPUL], (instregex "^FDIV_[DW]$")>;
+
+// copy.[su]_[bhwd]
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instregex "^COPY_U_[BHW]$")>;
+def : InstRW<[GenericWriteFPUMoveGPRFPU], (instregex "^COPY_S_[BHWD]$")>;
+
+def : InstRW<[GenericWriteFPUStore], (instregex "^ST_[BHWD]$")>;
+def : InstRW<[GenericWriteFPULoad], (instregex "^LD_[BHWD]$")>;
+}
diff --git a/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td b/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
index cee42873c6e8..882a241d1426 100644
--- a/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
+++ b/contrib/llvm/lib/Target/Mips/MipsScheduleP5600.td
@@ -13,7 +13,14 @@ def MipsP5600Model : SchedMachineModel {
   int LoadLatency = 4;
   int MispredictPenalty = 8; // TODO: Estimated
 
-  let CompleteModel = 0;
+  let CompleteModel = 1;
+
+  list<Predicate> UnsupportedFeatures = [HasMips32r6, HasMips64r6,
+                                         HasMips64, HasMips64r2, HasCnMips,
+                                         InMicroMips, InMips16Mode,
+                                         HasMicroMips32r6, HasMicroMips64r6,
+                                         HasDSP, HasDSPR2];
+
 }
 
 let SchedModel = MipsP5600Model in {
@@ -31,7 +38,8 @@ def P5600WriteALU : SchedWriteRes<[P5600IssueALU]>;
 
 // and, lui, nor, or, slti, sltiu, sub, subu, xor
 def : ItinRW<[P5600WriteALU],
-             [II_AND, II_LUI, II_NOR, II_OR, II_SLTI_SLTIU, II_SUBU, II_XOR]>;
+             [II_AND, II_LUI, II_NOR, II_OR, II_SLTI_SLTIU, II_SUB, II_SUBU,
+              II_XOR]>;
 
 // AGQ Pipelines
 // =============
@@ -53,11 +61,22 @@ def P5600WriteJumpAndLink : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]> {
   let Latency = 2;
 }
 
-// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal, jalx,
+// b, beq, beql, bg[et]z, bl[et]z, bne, bnel, j, syscall, jal, bltzal,
 // jalr, jr.hb, jr
-def : ItinRW<[P5600WriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J, II_JR]>;
-def : ItinRW<[P5600WriteJumpAndLink], [II_JAL, II_JALR]>;
+def : ItinRW<[P5600WriteJump], [II_B, II_BCC, II_BCCZ, II_BCCZAL, II_J, II_JR,
+                                II_JR_HB, II_DERET, II_ERET, II_ERETNC, 
+                                II_SYSCALL, II_BREAK, II_SDBBP, II_SSNOP,
+                                II_TEQ, II_TEQI, II_TGE, II_TGEI, II_TGEIU,
+                                II_TGEU, II_TLT, II_TLTI, II_TLTU, II_TNE,
+                                II_TNEI, II_TRAP, II_TTLTIU, II_WAIT,
+                                II_PAUSE]>;
+
+def : ItinRW<[P5600WriteJumpAndLink], [II_JAL, II_JALR, II_JALR_HB]>;
 
+def P5600COP0 : SchedWriteRes<[P5600IssueCTISTD, P5600CTISTD]>;
+
+def : ItinRW<[P5600COP0], [II_TLBINV, II_TLBINVF, II_TLBP, II_TLBR, II_TLBWI,
+                           II_TLBWR, II_MFC0, II_MTC0]>;
 // LDST Pipeline
 // -------------
 
@@ -69,7 +88,7 @@ def P5600WriteLoadShifted : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
   let Latency = 4;
 }
 
-def P5600WritePref : SchedWriteRes<[P5600IssueLDST]>;
+def P5600WriteCache : SchedWriteRes<[P5600IssueLDST]>;
 
 def P5600WriteStore : SchedWriteRes<[P5600IssueLDST, P5600CTISTD]> {
   // FIXME: This is a bit pessimistic. P5600CTISTD is only used during cycle 2
@@ -87,21 +106,21 @@ def P5600WriteLoadToOtherUnits : SchedWriteRes<[P5600IssueLDST]> {
 }
 
 // l[bhw], l[bh]u, ll
-def : ItinRW<[P5600WriteLoad], [II_LB, II_LBU, II_LH, II_LHU, II_LW, II_LWU]>;
+def : ItinRW<[P5600WriteLoad], [II_LB, II_LBE, II_LBU, II_LBUE, II_LH, II_LHE,
+                                II_LHU, II_LHUE, II_LW, II_LWE, II_LL, II_LLE,
+                                II_LWPC]>;
 
 // lw[lr]
-def : ItinRW<[P5600WriteLoadShifted], [II_LWL, II_LWR]>;
+def : ItinRW<[P5600WriteLoadShifted], [II_LWL, II_LWLE, II_LWR, II_LWRE]>;
 
 // s[bhw], sw[lr]
-def : ItinRW<[P5600WriteStore], [II_SB, II_SH, II_SW, II_SWL, II_SWR]>;
-
-// pref
-// (this instruction does not exist in the backend yet)
-def : ItinRW<[P5600WritePref], []>;
+def : ItinRW<[P5600WriteStore], [II_SB, II_SBE, II_SH, II_SHE, II_SW, II_SWE,
+                                 II_SWL, II_SWLE, II_SWR, II_SWRE, II_SC,
+                                 II_SCE]>;
 
-// sc
-// (this instruction does not exist in the backend yet)
-def : ItinRW<[P5600WriteStore], []>;
+// pref, cache, sync, synci
+def : ItinRW<[P5600WriteCache], [II_PREF, II_PREFE, II_CACHE, II_CACHEE,
+                                 II_SYNC, II_SYNCI]>;
 
 // LDST is also used in moves from general purpose registers to floating point
 // and MSA.
@@ -134,11 +153,11 @@ def P5600WriteAL2MAdd: SchedWriteRes<[P5600IssueAL2, P5600CTISTD]> {
   let Latency = 5;
 }
 
-// clo, clz, di, mfhi, mflo
-def : ItinRW<[P5600WriteAL2], [II_CLO, II_CLZ, II_MFHI_MFLO]>;
+// clo, clz, di, ei, mfhi, mflo
+def : ItinRW<[P5600WriteAL2], [II_CLO, II_CLZ, II_DI, II_EI, II_MFHI_MFLO]>;
 
 // ehb, rdhwr, rdpgpr, wrpgpr, wsbh
-def : ItinRW<[P5600WriteAL2ShadowMov], [II_RDHWR]>;
+def : ItinRW<[P5600WriteAL2ShadowMov], [II_EHB, II_RDHWR, II_WSBH]>;
 
 // mov[nz]
 def : ItinRW<[P5600WriteAL2CondMov], [II_MOVN, II_MOVZ]>;
@@ -156,8 +175,7 @@ def : ItinRW<[P5600WriteAL2MAdd],
              [II_MADD, II_MADDU, II_MSUB, II_MSUBU, II_MTHI_MTLO]>;
 
 // ext, ins
-def : ItinRW<[P5600WriteAL2BitExt],
-             [II_EXT, II_INS]>;
+def : ItinRW<[P5600WriteAL2BitExt], [II_EXT, II_INS]>;
 
 // Either ALU or AL2 Pipelines
 // ---------------------------
@@ -176,9 +194,10 @@ def P5600WriteEitherALU : SchedWriteVariant<
 // add, addi, addiu, addu, andi, ori, rotr, se[bh], sllv?, sr[al]v?, slt, sltu,
 // xori
 def : ItinRW<[P5600WriteEitherALU],
-             [II_ADDI, II_ADDIU, II_ANDI, II_ORI, II_ROTR, II_SEB, II_SEH,
+             [II_ADD, II_ADDI, II_ADDIU, II_ANDI, II_ORI, II_ROTR, II_SEB, II_SEH,
               II_SLT_SLTU, II_SLL, II_SRA, II_SRL, II_XORI, II_ADDU, II_SLLV,
-              II_SRAV, II_SRLV]>;
+              II_SRAV, II_SRLV, II_LSA]>;
+def : InstRW<[], (instrs COPY)>;
 
 // FPU Pipelines
 // =============
@@ -193,6 +212,11 @@ def P5600FPUDivSqrt : ProcResource<2>;
 def P5600WriteFPUS : SchedWriteRes<[P5600IssueFPUS]>;
 def P5600WriteFPUL : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 4; }
 def P5600WriteFPUL_MADDSUB : SchedWriteRes<[P5600IssueFPUL]> { let Latency = 6; }
+def P5600WriteFPUDivI : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
+  // Best/Common/Worst case = 7 / 23 / 27
+  let Latency = 23; // Using common case
+  let ResourceCycles = [ 1, 23 ];
+}
 def P5600WriteFPUDivS : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
   // Best/Common/Worst case = 7 / 23 / 27
   let Latency = 23; // Using common case
@@ -236,6 +260,29 @@ def P5600WriteFPUSqrtD : SchedWriteRes<[P5600IssueFPUL, P5600FPUDivSqrt]> {
 def P5600WriteMSAShortLogic : SchedWriteRes<[P5600IssueFPUS]>;
 def P5600WriteMSAShortInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 2; }
 def P5600WriteMoveOtherUnitsToFPU : SchedWriteRes<[P5600IssueFPUS]>;
+def P5600WriteMSAOther3 : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 3; }
+def P5600WriteMSALongInt : SchedWriteRes<[P5600IssueFPUS]> { let Latency = 5; }
+
+// vshf.[bhwd], binsl.[bhwd], binsr.[bhwd], insert.[bhwd], sld?.[bhwd],
+// bset.[bhwd], bclr.[bhwd], bneg.[bhwd], bsel_v, bseli_b
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^VSHF_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BINSL|BINSLI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BINSR|BINSRI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^INSERT_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(SLD|SLDI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BSET|BSETI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+
+// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
+def : InstRW<[P5600WriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
+
+// bnz.[bhwdv], cfcmsa, ctcmsa
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(BNZ|BZ)_[BHWDV]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^C(F|T)CMSA$")>;
 
 // FPUS is also used in moves from floating point and MSA registers to general
 // purpose registers.
@@ -257,13 +304,16 @@ def : ItinRW<[P5600WriteFPUS], [II_ABS, II_MOVF_D, II_MOVF_S, II_MOVT_D,
                                 II_MOVT_S, II_MOV_D, II_MOV_S, II_NEG]>;
 
 // adds_a.[bhwd], adds_[asu].[bhwd], addvi?.[bhwd], asub_[us].[bhwd],
-// aver?_[us].[bhwd]
+// aver?_[us].[bhwd], shf.[bhw], fill[bhwd], splat?.[bhwd]
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADD_A_[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDS_[ASU]_[BHWD]$")>;
 // TODO: ADDVI_[BHW] might be 1 cycle latency rather than 2. Need to confirm it.
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^ADDVI?_[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^ASUB_[US].[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortInt], (instregex "^AVER?_[US].[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^SHF_[BHW]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^FILL_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(SPLAT|SPLATI)_[BHWD]$")>;
 
 // and.v, andi.b, move.v, ldi.[bhwd]
 def : InstRW<[P5600WriteMSAShortLogic], (instregex "^MOVE_V$")>;
@@ -271,6 +321,111 @@ def : InstRW<[P5600WriteMSAShortLogic], (instregex "^LDI_[BHWD]$")>;
 def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)_V$")>;
 def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(AND|OR|[XN]OR)I_B$")>;
 
+// vshf.[bhwd], binsl.[bhwd], binsr.[bhwd], insert.[bhwd], sld?.[bhwd],
+// bset.[bhwd], bclr.[bhwd], bneg.[bhwd], bsel_v, bseli_b
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^VSHF_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BINSL|BINSLI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BINSR|BINSRI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^INSERT_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(SLD|SLDI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BSET|BSETI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BCLR|BCLRI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BNEG|BNEGI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^(BSEL_V|BSELI_B)$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^BMN*Z.*$")>;
+
+// pcnt.[bhwd], sat_s.[bhwd], sat_u.bhwd]
+def : InstRW<[P5600WriteMSAOther3], (instregex "^PCNT_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAOther3], (instregex "^SAT_(S|U)_[BHWD]$")>;
+
+// fexp2_w, fexp2_d
+def : InstRW<[P5600WriteFPUS], (instregex "^FEXP2_(W|D)$")>;
+
+// compare, converts, round to int, floating point truncate.
+def : InstRW<[P5600WriteFPUS], (instregex "^(CLT|CLTI)_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^(CLE|CLEI)_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^(CEQ|CEQI)_[BHWD]$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_UN_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_UEQ_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_EQ_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_LT_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_ULT_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_LE_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^CMP_ULE_(S|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FS(AF|EQ|LT|LE|NE|OR)_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FSUEQ_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FSULE_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FSULT_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FSUNE_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FSUN_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCAF_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCEQ_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCLE_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCLT_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCNE_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCOR_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCUEQ_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCULE_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCULT_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCUNE_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FCUN_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FABS_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FFINT_(U|S)_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FFQL_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FFQR_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FTINT_(U|S)_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FRINT_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FTQ_(H|W)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FTRUNC_(U|S)_(W|D)$")>;
+
+// fexdo.[hw], fexupl.[wd], fexupr.[wd]
+def : InstRW<[P5600WriteFPUS], (instregex "^FEXDO_(H|W)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FEXUPL_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FEXUPR_(W|D)$")>;
+
+// fclass.[wd], fmax.[wd], fmax_a.[wd], fmin.[wd], fmin_a.[wd], flog2.[wd]
+def : InstRW<[P5600WriteFPUS], (instregex "^FCLASS_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FMAX_A_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FMAX_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FMIN_A_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FMIN_(W|D)$")>;
+def : InstRW<[P5600WriteFPUS], (instregex "^FLOG2_(W|D)$")>;
+
+// interleave right/left, interleave even/odd, insert
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(ILVR|ILVL)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(ILVEV|ILVOD)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
+
+// subs_?.[bhwd], subsus_?.[bhwd], subsuu_?.[bhwd], subvi.[bhwd], subv.[bhwd],
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^SUBS_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^SUBSUS_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^SUBSUU_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^SUBVI_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortInt], (instregex "^SUBV_[BHWD]$")>;
+
+// mod_[su].[bhwd], div_[su].[bhwd]
+def : InstRW<[P5600WriteFPUDivI], (instregex "^MOD_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteFPUDivI], (instregex "^DIV_(S|U)_[BHWD]$")>;
+
+// hadd_[su].[bhwd], hsub_[su].[bhwd], max_[sua].[bhwd], min_[sua].[bhwd],
+// maxi_[su].[bhwd], mini_[su].[bhwd], sra?.[bhwd], srar?.[bhwd], srlr.[bhwd],
+// sll?.[bhwd], pckev.[bhwd], pckod.[bhwd], nloc.[bhwd], nlzc.[bhwd],
+// insve.[bhwd]
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^HADD_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^HSUB_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(MAX|MIN)_S_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(MAX|MIN)_U_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(MAX|MIN)_A_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(MAXI|MINI)_(S|U)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(SRA|SRAI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(SRL|SRLI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(SRAR|SRARI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(SRLR|SRLRI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(SLL|SLLI)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(PCKEV|PCKOD)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^(NLOC|NLZC)_[BHWD]$")>;
+def : InstRW<[P5600WriteMSAShortLogic], (instregex "^INSVE_[BHWD]$")>;
+
 // Long Pipe
 // ----------
 //
@@ -289,6 +444,41 @@ def : ItinRW<[P5600WriteFPUDivD], [II_DIV_D]>;
 def : ItinRW<[P5600WriteFPUSqrtS], [II_SQRT_S]>;
 def : ItinRW<[P5600WriteFPUSqrtD], [II_SQRT_D]>;
 
+// frcp.[wd], frsqrt.[wd]
+def : InstRW<[P5600WriteFPURsqrtD], (instregex "^FRCP_(W|D)$")>;
+def : InstRW<[P5600WriteFPURsqrtD], (instregex "^FRSQRT_(W|D)$")>;
+
+def : ItinRW<[P5600WriteFPURsqrtD], [II_RECIP_D, II_RSQRT_D]>;
+def : ItinRW<[P5600WriteFPURsqrtS], [II_RECIP_S, II_RSQRT_S]>;
+
+// fmadd.[wd], fmsubb.[wd], fdiv.[wd], fsqrt.[wd], fmul.[wd], fadd.[wd],
+// fsub.[wd]
+def : InstRW<[P5600WriteFPUL_MADDSUB], (instregex "^FMADD_(W|D)$")>;
+def : InstRW<[P5600WriteFPUL_MADDSUB], (instregex "^FMSUB_(W|D)$")>;
+def : InstRW<[P5600WriteFPUDivS], (instregex "^FDIV_W$")>;
+def : InstRW<[P5600WriteFPUDivD], (instregex "^FDIV_D$")>;
+def : InstRW<[P5600WriteFPUSqrtS], (instregex "^FSQRT_W$")>;
+def : InstRW<[P5600WriteFPUSqrtD], (instregex "^FSQRT_D$")>;
+def : InstRW<[P5600WriteFPUL], (instregex "^FMUL_(W|D)$")>;
+def : InstRW<[P5600WriteFPUL], (instregex "^FADD_(W|D)$")>;
+def : InstRW<[P5600WriteFPUL], (instregex "^FSUB_(W|D)$")>;
+
+// dpadd_?.[bhwd], dpsub_?.[bhwd], dotp_?.[bhwd], msubv.[bhwd], maddv.[bhwd]
+// mulv.[bhwd].
+def : InstRW<[P5600WriteMSALongInt], (instregex "^DPADD_(S|U)_[HWD]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^DPSUB_(S|U)_[HWD]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^DOTP_(S|U)_[HWD]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MSUBV_[BHWD]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MADDV_[BHWD]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MULV_[BHWD]$")>;
+
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MADDR_Q_[HW]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MADD_Q_[HW]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MSUBR_Q_[HW]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MSUB_Q_[HW]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MULR_Q_[HW]$")>;
+def : InstRW<[P5600WriteMSALongInt], (instregex "^MUL_Q_[HW]$")>;
+
 // madd.[ds], msub.[ds], nmadd.[ds], nmsub.[ds],
 // Operand 0 is read on cycle 5. All other operands are read on operand 0.
 def : ItinRW<[SchedReadAdvance<5>, P5600WriteFPUL_MADDSUB],
@@ -348,19 +538,24 @@ def P5600WriteLoadFPU : WriteSequence<[P5600WriteLoadToOtherUnits,
 // ctc1, mtc1, mthc1
 def : ItinRW<[P5600WriteMoveGPRToFPU], [II_CTC1, II_MTC1, II_MTHC1]>;
 
+// copy.[su]_[bhwd]
+def : InstRW<[P5600WriteMoveFPUToGPR], (instregex "^COPY_U_[BHW]$")>;
+def : InstRW<[P5600WriteMoveFPUToGPR], (instregex "^COPY_S_[BHWD]$")>;
+
 // bc1[ft], cfc1, mfc1, mfhc1, movf, movt
 def : ItinRW<[P5600WriteMoveFPUToGPR],
-             [II_BC1F, II_BC1T, II_CFC1, II_MFC1, II_MFHC1, II_MOVF, II_MOVT]>;
+             [II_BC1F, II_BC1FL, II_BC1T, II_BC1TL, II_CFC1, II_MFC1, II_MFHC1, II_MOVF, II_MOVT]>;
 
 // swc1, swxc1, st.[bhwd]
-def : ItinRW<[P5600WriteStoreFPUS], [II_SWC1, II_SWXC1]>;
+def : ItinRW<[P5600WriteStoreFPUS], [II_SDC1, II_SDXC1, II_SUXC1, II_SWC1,
+                                     II_SWXC1]>;
 def : InstRW<[P5600WriteStoreFPUS], (instregex "^ST_[BHWD]$")>;
 
 // movn.[ds], movz.[ds]
 def : ItinRW<[P5600WriteStoreFPUL], [II_MOVN_D, II_MOVN_S, II_MOVZ_D, II_MOVZ_S]>;
 
 // l[dw]x?c1, ld.[bhwd]
-def : ItinRW<[P5600WriteLoadFPU], [II_LDC1, II_LDXC1, II_LWC1, II_LWXC1]>;
+def : ItinRW<[P5600WriteLoadFPU], [II_LDC1, II_LDXC1, II_LWC1, II_LWXC1, II_LUXC1]>;
 def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
 
 // Unsupported Instructions
@@ -370,11 +565,10 @@ def : InstRW<[P5600WriteLoadFPU], (instregex "LD_[BHWD]")>;
 //   II_DADDIU, II_DADDU, II_DMFC1, II_DMTC1, II_DMULT, II_DMULTU, II_DROTR,
 //   II_DROTR32, II_DROTRV, II_DDIV, II_DSLL, II_DSLL32, II_DSLLV, II_DSRA,
 //   II_DSRA32, II_DSRAV, II_DSRL, II_DSRL32, II_DSRLV, II_DSUBU, II_DDIVU,
-//   II_JALRC, II_LD, II_LD[LR], II_LUXC1, II_RESTORE, II_SAVE, II_SD, II_SDC1,
-//   II_SDL, II_SDR, II_SDXC1
+//   II_JALRC, II_LD, II_LD[LR], II_RESTORE, II_SAVE, II_SD, II_SDC1, II_SD[LR]
 //
 // The following instructions are never valid on P5600.
-//   addq.ph, rdhwr, repl.ph, repl.qb, subq.ph, subu_s.qb
+//   addq.ph, repl.ph, repl.qb, subq.ph, subu_s.qb
 //
 // Guesswork
 // =========
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
index 80641ed9bd31..bb48188e3b87 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp
@@ -39,10 +39,10 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeMipsTarget() {
   // Register the target.
-  RegisterTargetMachine<MipsebTargetMachine> X(TheMipsTarget);
-  RegisterTargetMachine<MipselTargetMachine> Y(TheMipselTarget);
-  RegisterTargetMachine<MipsebTargetMachine> A(TheMips64Target);
-  RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget);
+  RegisterTargetMachine<MipsebTargetMachine> X(getTheMipsTarget());
+  RegisterTargetMachine<MipselTargetMachine> Y(getTheMipselTarget());
+  RegisterTargetMachine<MipsebTargetMachine> A(getTheMips64Target());
+  RegisterTargetMachine<MipselTargetMachine> B(getTheMips64elTarget());
 }
 
 static std::string computeDataLayout(const Triple &TT, StringRef CPU,
@@ -208,7 +208,6 @@ public:
 
   void addIRPasses() override;
   bool addInstSelector() override;
-  void addMachineSSAOptimization() override;
   void addPreEmitPass() override;
 
   void addPreRegAlloc() override;
@@ -237,14 +236,8 @@ bool MipsPassConfig::addInstSelector() {
   return false;
 }
 
-void MipsPassConfig::addMachineSSAOptimization() {
-  addPass(createMipsOptimizePICCallPass(getMipsTargetMachine()));
-  TargetPassConfig::addMachineSSAOptimization();
-}
-
 void MipsPassConfig::addPreRegAlloc() {
-  if (getOptLevel() == CodeGenOpt::None)
-    addPass(createMipsOptimizePICCallPass(getMipsTargetMachine()));
+  addPass(createMipsOptimizePICCallPass(getMipsTargetMachine()));
 }
 
 TargetIRAnalysis MipsTargetMachine::getTargetIRAnalysis() {
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
index 3bd4567e3792..fadab7806120 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.cpp
@@ -61,23 +61,23 @@ static bool IsInSmallSection(uint64_t Size) {
 
 /// Return true if this global address should be placed into small data/bss
 /// section.
-bool MipsTargetObjectFile::
-IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+bool MipsTargetObjectFile::IsGlobalInSmallSection(
+    const GlobalObject *GO, const TargetMachine &TM) const {
   // We first check the case where global is a declaration, because finding
   // section kind using getKindForGlobal() is only allowed for global
   // definitions.
-  if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
-    return IsGlobalInSmallSectionImpl(GV, TM);
+  if (GO->isDeclaration() || GO->hasAvailableExternallyLinkage())
+    return IsGlobalInSmallSectionImpl(GO, TM);
 
-  return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+  return IsGlobalInSmallSection(GO, TM, getKindForGlobal(GO, TM));
 }
 
 /// Return true if this global address should be placed into small data/bss
 /// section.
 bool MipsTargetObjectFile::
-IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+IsGlobalInSmallSection(const GlobalObject *GO, const TargetMachine &TM,
                        SectionKind Kind) const {
-  return (IsGlobalInSmallSectionImpl(GV, TM) &&
+  return (IsGlobalInSmallSectionImpl(GO, TM) &&
           (Kind.isData() || Kind.isBSS() || Kind.isCommon()));
 }
 
@@ -85,7 +85,7 @@ IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
 /// section. This method does all the work, except for checking the section
 /// kind.
 bool MipsTargetObjectFile::
-IsGlobalInSmallSectionImpl(const GlobalValue *GV,
+IsGlobalInSmallSectionImpl(const GlobalObject *GO,
                            const TargetMachine &TM) const {
   const MipsSubtarget &Subtarget =
       *static_cast<const MipsTargetMachine &>(TM).getSubtargetImpl();
@@ -95,39 +95,37 @@ IsGlobalInSmallSectionImpl(const GlobalValue *GV,
     return false;
 
   // Only global variables, not functions.
-  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+  const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GO);
   if (!GVA)
     return false;
 
   // Enforce -mlocal-sdata.
-  if (!LocalSData && GV->hasLocalLinkage())
+  if (!LocalSData && GVA->hasLocalLinkage())
     return false;
 
   // Enforce -mextern-sdata.
-  if (!ExternSData && ((GV->hasExternalLinkage() && GV->isDeclaration()) ||
-                       GV->hasCommonLinkage()))
+  if (!ExternSData && ((GVA->hasExternalLinkage() && GVA->isDeclaration()) ||
+                       GVA->hasCommonLinkage()))
     return false;
 
-  Type *Ty = GV->getValueType();
+  Type *Ty = GVA->getValueType();
   return IsInSmallSection(
-      GV->getParent()->getDataLayout().getTypeAllocSize(Ty));
+      GVA->getParent()->getDataLayout().getTypeAllocSize(Ty));
 }
 
-MCSection *
-MipsTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
-                                             SectionKind Kind, Mangler &Mang,
-                                             const TargetMachine &TM) const {
+MCSection *MipsTargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
   // sections?
 
   // Handle Small Section classification here.
-  if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+  if (Kind.isBSS() && IsGlobalInSmallSection(GO, TM, Kind))
     return SmallBSSSection;
-  if (Kind.isData() && IsGlobalInSmallSection(GV, TM, Kind))
+  if (Kind.isData() && IsGlobalInSmallSection(GO, TM, Kind))
     return SmallDataSection;
 
   // Otherwise, we work the same as ELF.
-  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
 
 /// Return true if this constant should be placed into small data section.
diff --git a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
index 9840769aff69..e5423f9578a8 100644
--- a/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Mips/MipsTargetObjectFile.h
@@ -18,21 +18,21 @@ class MipsTargetMachine;
     MCSection *SmallDataSection;
     MCSection *SmallBSSSection;
     const MipsTargetMachine *TM;
+
+    bool IsGlobalInSmallSection(const GlobalObject *GO, const TargetMachine &TM,
+                                SectionKind Kind) const;
+    bool IsGlobalInSmallSectionImpl(const GlobalObject *GO,
+                                    const TargetMachine &TM) const;
   public:
 
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
     /// Return true if this global address should be placed into small data/bss
     /// section.
-    bool IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
-                                SectionKind Kind) const;
-    bool IsGlobalInSmallSection(const GlobalValue *GV,
+    bool IsGlobalInSmallSection(const GlobalObject *GO,
                                 const TargetMachine &TM) const;
-    bool IsGlobalInSmallSectionImpl(const GlobalValue *GV,
-                                    const TargetMachine &TM) const;
 
-    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+    MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
     /// Return true if this constant should be placed into small data section.
diff --git a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
index 6a65943515bb..4c1edfaaaeca 100644
--- a/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp
@@ -12,20 +12,37 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheMipsTarget, llvm::TheMipselTarget;
-Target llvm::TheMips64Target, llvm::TheMips64elTarget;
+Target &llvm::getTheMipsTarget() {
+  static Target TheMipsTarget;
+  return TheMipsTarget;
+}
+Target &llvm::getTheMipselTarget() {
+  static Target TheMipselTarget;
+  return TheMipselTarget;
+}
+Target &llvm::getTheMips64Target() {
+  static Target TheMips64Target;
+  return TheMips64Target;
+}
+Target &llvm::getTheMips64elTarget() {
+  static Target TheMips64elTarget;
+  return TheMips64elTarget;
+}
 
 extern "C" void LLVMInitializeMipsTargetInfo() {
   RegisterTarget<Triple::mips,
-        /*HasJIT=*/true> X(TheMipsTarget, "mips", "Mips");
+                 /*HasJIT=*/true>
+      X(getTheMipsTarget(), "mips", "Mips");
 
   RegisterTarget<Triple::mipsel,
-        /*HasJIT=*/true> Y(TheMipselTarget, "mipsel", "Mipsel");
+                 /*HasJIT=*/true>
+      Y(getTheMipselTarget(), "mipsel", "Mipsel");
 
   RegisterTarget<Triple::mips64,
-        /*HasJIT=*/true> A(TheMips64Target, "mips64", "Mips64 [experimental]");
+                 /*HasJIT=*/true>
+      A(getTheMips64Target(), "mips64", "Mips64 [experimental]");
 
   RegisterTarget<Triple::mips64el,
-        /*HasJIT=*/true> B(TheMips64elTarget,
-                            "mips64el", "Mips64el [experimental]");
+                 /*HasJIT=*/true>
+      B(getTheMips64elTarget(), "mips64el", "Mips64el [experimental]");
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
index a72ae2ef53a7..1cb92005979d 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h
@@ -30,60 +30,6 @@ enum AddressSpace {
   ADDRESS_SPACE_PARAM = 101
 };
 
-enum PropertyAnnotation {
-  PROPERTY_MAXNTID_X = 0,
-  PROPERTY_MAXNTID_Y,
-  PROPERTY_MAXNTID_Z,
-  PROPERTY_REQNTID_X,
-  PROPERTY_REQNTID_Y,
-  PROPERTY_REQNTID_Z,
-  PROPERTY_MINNCTAPERSM,
-  PROPERTY_ISTEXTURE,
-  PROPERTY_ISSURFACE,
-  PROPERTY_ISSAMPLER,
-  PROPERTY_ISREADONLY_IMAGE_PARAM,
-  PROPERTY_ISWRITEONLY_IMAGE_PARAM,
-  PROPERTY_ISREADWRITE_IMAGE_PARAM,
-  PROPERTY_ISKERNEL_FUNCTION,
-  PROPERTY_ALIGN,
-  PROPERTY_MANAGED,
-
-  // last property
-  PROPERTY_LAST
-};
-
-const unsigned AnnotationNameLen = 9; // length of each annotation name
-const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
-  "maxntidx",                         // PROPERTY_MAXNTID_X
-  "maxntidy",                         // PROPERTY_MAXNTID_Y
-  "maxntidz",                         // PROPERTY_MAXNTID_Z
-  "reqntidx",                         // PROPERTY_REQNTID_X
-  "reqntidy",                         // PROPERTY_REQNTID_Y
-  "reqntidz",                         // PROPERTY_REQNTID_Z
-  "minctasm",                         // PROPERTY_MINNCTAPERSM
-  "texture",                          // PROPERTY_ISTEXTURE
-  "surface",                          // PROPERTY_ISSURFACE
-  "sampler",                          // PROPERTY_ISSAMPLER
-  "rdoimage",                         // PROPERTY_ISREADONLY_IMAGE_PARAM
-  "wroimage",                         // PROPERTY_ISWRITEONLY_IMAGE_PARAM
-  "rdwrimage",                        // PROPERTY_ISREADWRITE_IMAGE_PARAM
-  "kernel",                           // PROPERTY_ISKERNEL_FUNCTION
-  "align",                            // PROPERTY_ALIGN
-  "managed",                          // PROPERTY_MANAGED
-
-              // last property
-  "proplast", // PROPERTY_LAST
-};
-
-// name of named metadata used for global annotations
-#if defined(__GNUC__)
-// As this is declared to be static but some of the .cpp files that
-// include NVVM.h do not use this array, gcc gives a warning when
-// compiling those .cpp files, hence __attribute__((unused)).
-__attribute__((unused))
-#endif
-    static const char *NamedMDForAnnotations = "nvvm.annotations";
-
 namespace NVPTXII {
 enum {
   // These must be kept in sync with TSFlags in NVPTXInstrFormats.td
@@ -94,7 +40,7 @@ enum {
   IsSurfTexQueryFlag = 0x800,
   IsTexModeUnifiedFlag = 0x1000
 };
-}
-}
+} // namespace NVPTXII
 
+} // namespace llvm
 #endif
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
index e356a965a04b..12f992749366 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp
@@ -60,7 +60,7 @@ static MCInstPrinter *createNVPTXMCInstPrinter(const Triple &T,
 
 // Force static initialization.
 extern "C" void LLVMInitializeNVPTXTargetMC() {
-  for (Target *T : {&TheNVPTXTarget32, &TheNVPTXTarget64}) {
+  for (Target *T : {&getTheNVPTXTarget32(), &getTheNVPTXTarget64()}) {
     // Register the MC asm info.
     RegisterMCAsmInfo<NVPTXMCAsmInfo> X(*T);
 
diff --git a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
index bfd512398408..0c9ad977e7ec 100644
--- a/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/NVPTX/MCTargetDesc/NVPTXMCTargetDesc.h
@@ -19,8 +19,8 @@
 namespace llvm {
 class Target;
 
-extern Target TheNVPTXTarget32;
-extern Target TheNVPTXTarget64;
+Target &getTheNVPTXTarget32();
+Target &getTheNVPTXTarget64();
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.h b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
index e91385ac13f2..c455a437d8d5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.h
@@ -45,7 +45,6 @@ FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
                                  llvm::CodeGenOpt::Level OptLevel);
 ModulePass *createNVPTXAssignValidGlobalNamesPass();
 ModulePass *createGenericToNVVMPass();
-FunctionPass *createNVPTXFavorNonGenericAddrSpacesPass();
 FunctionPass *createNVPTXInferAddressSpacesPass();
 FunctionPass *createNVVMIntrRangePass(unsigned int SmVersion);
 FunctionPass *createNVVMReflectPass();
@@ -53,12 +52,12 @@ FunctionPass *createNVVMReflectPass(const StringMap<int> &Mapping);
 MachineFunctionPass *createNVPTXPrologEpilogPass();
 MachineFunctionPass *createNVPTXReplaceImageHandlesPass();
 FunctionPass *createNVPTXImageOptimizerPass();
-FunctionPass *createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM);
+FunctionPass *createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM);
 BasicBlockPass *createNVPTXLowerAllocaPass();
 MachineFunctionPass *createNVPTXPeephole();
 
-extern Target TheNVPTXTarget32;
-extern Target TheNVPTXTarget64;
+Target &getTheNVPTXTarget32();
+Target &getTheNVPTXTarget64();
 
 namespace NVPTX {
 enum DrvInterface {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTX.td b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
index 032991a20cc9..c77ddbc99789 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTX.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTX.td
@@ -51,6 +51,9 @@ def SM61 : SubtargetFeature<"sm_61", "SmVersion", "61",
 def SM62 : SubtargetFeature<"sm_62", "SmVersion", "62",
                              "Target SM 6.2">;
 
+def SATOM : SubtargetFeature<"satom", "HasAtomScope", "true",
+                             "Atomic operations with scope">;
+
 // PTX Versions
 def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32",
                              "Use PTX version 3.2">;
@@ -81,9 +84,9 @@ def : Proc<"sm_37", [SM37, PTX41]>;
 def : Proc<"sm_50", [SM50, PTX40]>;
 def : Proc<"sm_52", [SM52, PTX41]>;
 def : Proc<"sm_53", [SM53, PTX42]>;
-def : Proc<"sm_60", [SM60, PTX50]>;
-def : Proc<"sm_61", [SM61, PTX50]>;
-def : Proc<"sm_62", [SM62, PTX50]>;
+def : Proc<"sm_60", [SM60, PTX50, SATOM]>;
+def : Proc<"sm_61", [SM61, PTX50, SATOM]>;
+def : Proc<"sm_62", [SM62, PTX50, SATOM]>;
 
 def NVPTXInstrInfo : InstrInfo {
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
index 4f3ccf47e264..bed52293197d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp
@@ -12,7 +12,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXAllocaHoisting.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@@ -28,11 +27,10 @@ public:
   NVPTXAllocaHoisting() : FunctionPass(ID) {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addPreserved<MachineFunctionAnalysis>();
     AU.addPreserved<StackProtector>();
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "NVPTX specific alloca hoisting";
   }
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 660016bfcd05..04c8d5c0443e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -45,7 +45,6 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/TargetRegistry.h"
-#include "llvm/Support/TimeValue.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
 #include "llvm/Transforms/Utils/UnrollLoop.h"
 #include <sstream>
@@ -225,8 +224,7 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) {
   const char *Sym = MFI->getImageHandleSymbol(Index);
   std::string *SymNamePtr =
     nvTM.getManagedStrPool()->getManagedString(Sym);
-  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(
-    StringRef(SymNamePtr->c_str())));
+  MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(StringRef(*SymNamePtr)));
 }
 
 void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) {
@@ -368,13 +366,13 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) {
     } else if (isa<PointerType>(Ty)) {
       O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits()
         << " func_retval0";
-    } else if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) {
+    } else if (Ty->isAggregateType() || Ty->isVectorTy()) {
       unsigned totalsz = DL.getTypeAllocSize(Ty);
-       unsigned retAlignment = 0;
-       if (!llvm::getAlign(*F, 0, retAlignment))
-         retAlignment = DL.getABITypeAlignment(Ty);
-       O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
-         << "]";
+      unsigned retAlignment = 0;
+      if (!llvm::getAlign(*F, 0, retAlignment))
+        retAlignment = DL.getABITypeAlignment(Ty);
+      O << ".param .align " << retAlignment << " .b8 func_retval0[" << totalsz
+        << "]";
     } else
       llvm_unreachable("Unknown return type");
   } else {
@@ -557,6 +555,10 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
   unsigned mincta;
   if (llvm::getMinCTASm(F, mincta))
     O << ".minnctapersm " << mincta << "\n";
+
+  unsigned maxnreg;
+  if (llvm::getMaxNReg(F, maxnreg))
+    O << ".maxnreg " << maxnreg << "\n";
 }
 
 std::string
@@ -844,12 +846,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
   // We need to call the parent's one explicitly.
   //bool Result = AsmPrinter::doInitialization(M);
 
-  // Initialize TargetLoweringObjectFile.
+  // Initialize TargetLoweringObjectFile since we didn't do in
+  // AsmPrinter::doInitialization either right above or where it's commented out
+  // below.
   const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
       .Initialize(OutContext, TM);
 
-  Mang = new Mangler();
-
   // Emit header before any dwarf directives are emitted below.
   emitHeader(M, OS1, STI);
   OutStreamer->EmitRawText(OS1.str());
@@ -1589,7 +1591,19 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) {
       unsigned align = PAL.getParamAlignment(paramIndex + 1);
       if (align == 0)
         align = DL.getABITypeAlignment(ETy);
-
+      // Work around a bug in ptxas. When PTX code takes address of
+      // byval parameter with alignment < 4, ptxas generates code to
+      // spill argument into memory. Alas on sm_50+ ptxas generates
+      // SASS code that fails with misaligned access. To work around
+      // the problem, make sure that we align byval parameters by at
+      // least 4. Matching change must be made in LowerCall() where we
+      // prepare parameters for the call.
+      //
+      // TODO: this will need to be undone when we get to support multi-TU
+      // device-side compilation as it breaks ABI compatibility with nvcc.
+      // Hopefully ptxas bug is fixed by then.
+      if (!isKernelFunc && align < 4)
+        align = 4;
       unsigned sz = DL.getTypeAllocSize(ETy);
       O << "\t.param .align " << align << " .b8 ";
       printParamName(I, paramIndex, O);
@@ -1648,10 +1662,10 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
   //unsigned numRegClasses = TRI->getNumRegClasses();
 
   // Emit the Fake Stack Object
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  int NumBytes = (int) MFI->getStackSize();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  int NumBytes = (int) MFI.getStackSize();
   if (NumBytes) {
-    O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME
+    O << "\t.local .align " << MFI.getMaxAlignment() << " .b8 \t" << DEPOTNAME
       << getFunctionNumber() << "[" << NumBytes << "];\n";
     if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) {
       O << "\t.reg .b64 \t%SP;\n";
@@ -1713,11 +1727,11 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) {
   if (Fp->getType()->getTypeID() == Type::FloatTyID) {
     numHex = 8;
     lead = "0f";
-    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &ignored);
+    APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored);
   } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) {
     numHex = 16;
     lead = "0d";
-    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
+    APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored);
   } else
     llvm_unreachable("unsupported fp type");
 
@@ -1836,9 +1850,9 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         ConvertIntToBytes<>(ptr, int32);
         aggBuffer->addBytes(ptr, 4, Bytes);
         break;
-      } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
-                ConstantFoldConstantExpression(Cexpr, DL))) {
+      } else if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
+        if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
+                ConstantFoldConstant(Cexpr, DL))) {
           int int32 = (int)(constInt->getZExtValue());
           ConvertIntToBytes<>(ptr, int32);
           aggBuffer->addBytes(ptr, 4, Bytes);
@@ -1859,8 +1873,8 @@ void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes,
         aggBuffer->addBytes(ptr, 8, Bytes);
         break;
       } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) {
-        if (const ConstantInt *constInt = dyn_cast<ConstantInt>(
-                ConstantFoldConstantExpression(Cexpr, DL))) {
+        if (const auto *constInt = dyn_cast_or_null<ConstantInt>(
+                ConstantFoldConstant(Cexpr, DL))) {
           long long int64 = (long long)(constInt->getZExtValue());
           ConvertIntToBytes<>(ptr, int64);
           aggBuffer->addBytes(ptr, 8, Bytes);
@@ -2062,8 +2076,8 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
     // If the code isn't optimized, there may be outstanding folding
     // opportunities. Attempt to fold the expression using DataLayout as a
     // last resort before giving up.
-    if (Constant *C = ConstantFoldConstantExpression(CE, getDataLayout()))
-      if (C != CE)
+    if (Constant *C = ConstantFoldConstant(CE, getDataLayout()))
+      if (C && C != CE)
         return lowerConstantForGV(C, ProcessingGeneric);
 
     // Otherwise report the problem to the user.
@@ -2112,7 +2126,7 @@ NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric)
     // expression properly.  This is important for differences between
     // blockaddress labels.  Since the two labels are in the same function, it
     // is reasonable to treat their delta as a 32-bit value.
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case Instruction::BitCast:
     return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric);
 
@@ -2368,6 +2382,6 @@ std::string LineReader::readLine(unsigned lineNum) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeNVPTXAsmPrinter() {
-  RegisterAsmPrinter<NVPTXAsmPrinter> X(TheNVPTXTarget32);
-  RegisterAsmPrinter<NVPTXAsmPrinter> Y(TheNVPTXTarget64);
+  RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32());
+  RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64());
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
index 85660fbdb26e..3dcc0e358a14 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h
@@ -195,7 +195,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
   void emitSrcInText(StringRef filename, unsigned line);
 
 private:
-  const char *getPassName() const override { return "NVPTX Assembly Printer"; }
+  StringRef getPassName() const override { return "NVPTX Assembly Printer"; }
 
   const Function *F;
   std::string CurrentFnName;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
deleted file mode 100644
index 7c5a54162d77..000000000000
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFavorNonGenericAddrSpaces.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//===-- NVPTXFavorNonGenericAddrSpace.cpp - ---------------------*- C++ -*-===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// FIXME: This pass is deprecated in favor of NVPTXInferAddressSpaces, which
-// uses a new algorithm that handles pointer induction variables.
-//
-// When a load/store accesses the generic address space, checks whether the
-// address is casted from a non-generic address space. If so, remove this
-// addrspacecast because accessing non-generic address spaces is typically
-// faster. Besides removing addrspacecasts directly used by loads/stores, this
-// optimization also recursively traces into a GEP's pointer operand and a
-// bitcast's source to find more eliminable addrspacecasts.
-//
-// For instance, the code below loads a float from an array allocated in
-// addrspace(3).
-//
-//   %0 = addrspacecast [10 x float] addrspace(3)* @a to [10 x float]*
-//   %1 = gep [10 x float]* %0, i64 0, i64 %i
-//   %2 = bitcast float* %1 to i32*
-//   %3 = load i32* %2 ; emits ld.u32
-//
-// First, function hoistAddrSpaceCastFrom reorders the addrspacecast, the GEP,
-// and the bitcast to expose more optimization opportunities to function
-// optimizeMemoryInst. The intermediate code looks like:
-//
-//   %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-//   %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
-//   %2 = addrspacecast i32 addrspace(3)* %1 to i32*
-//   %3 = load i32* %2 ; still emits ld.u32, but will be optimized shortly
-//
-// Then, function optimizeMemoryInstruction detects a load from addrspacecast'ed
-// generic pointers, and folds the load and the addrspacecast into a load from
-// the original address space. The final code looks like:
-//
-//   %0 = gep [10 x float] addrspace(3)* @a, i64 0, i64 %i
-//   %1 = bitcast float addrspace(3)* %0 to i32 addrspace(3)*
-//   %3 = load i32 addrspace(3)* %1 ; emits ld.shared.f32
-//
-// This pass may remove an addrspacecast in a different BB. Therefore, we
-// implement it as a FunctionPass.
-//
-// TODO:
-// The current implementation doesn't handle PHINodes. Eliminating
-// addrspacecasts used by PHINodes is trickier because PHINodes can introduce
-// loops in data flow. For example,
-//
-//     %generic.input = addrspacecast float addrspace(3)* %input to float*
-//   loop:
-//     %y = phi [ %generic.input, %y2 ]
-//     %y2 = getelementptr %y, 1
-//     %v = load %y2
-//     br ..., label %loop, ...
-//
-// Marking %y2 shared depends on marking %y shared, but %y also data-flow
-// depends on %y2. We probably need an iterative fix-point algorithm on handle
-// this case.
-//
-//===----------------------------------------------------------------------===//
-
-#include "NVPTX.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/Operator.h"
-#include "llvm/Support/CommandLine.h"
-
-using namespace llvm;
-
-// An option to disable this optimization. Enable it by default.
-static cl::opt<bool> DisableFavorNonGeneric(
-  "disable-nvptx-favor-non-generic",
-  cl::init(false),
-  cl::desc("Do not convert generic address space usage "
-           "to non-generic address space usage"),
-  cl::Hidden);
-
-namespace {
-/// \brief NVPTXFavorNonGenericAddrSpaces
-class NVPTXFavorNonGenericAddrSpaces : public FunctionPass {
-public:
-  static char ID;
-  NVPTXFavorNonGenericAddrSpaces() : FunctionPass(ID) {}
-  bool runOnFunction(Function &F) override;
-
-private:
-  /// Optimizes load/store instructions. Idx is the index of the pointer operand
-  /// (0 for load, and 1 for store). Returns true if it changes anything.
-  bool optimizeMemoryInstruction(Instruction *I, unsigned Idx);
-  /// Recursively traces into a GEP's pointer operand or a bitcast's source to
-  /// find an eliminable addrspacecast, and hoists that addrspacecast to the
-  /// outermost level. For example, this function transforms
-  ///   bitcast(gep(gep(addrspacecast(X))))
-  /// to
-  ///   addrspacecast(bitcast(gep(gep(X)))).
-  ///
-  /// This reordering exposes to optimizeMemoryInstruction more
-  /// optimization opportunities on loads and stores.
-  ///
-  /// If this function successfully hoists an eliminable addrspacecast or V is
-  /// already such an addrspacecast, it returns the transformed value (which is
-  /// guaranteed to be an addrspacecast); otherwise, it returns nullptr.
-  Value *hoistAddrSpaceCastFrom(Value *V, int Depth = 0);
-  /// Helper function for GEPs.
-  Value *hoistAddrSpaceCastFromGEP(GEPOperator *GEP, int Depth);
-  /// Helper function for bitcasts.
-  Value *hoistAddrSpaceCastFromBitCast(BitCastOperator *BC, int Depth);
-};
-}
-
-char NVPTXFavorNonGenericAddrSpaces::ID = 0;
-
-namespace llvm {
-void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
-}
-INITIALIZE_PASS(NVPTXFavorNonGenericAddrSpaces, "nvptx-favor-non-generic",
-                "Remove unnecessary non-generic-to-generic addrspacecasts",
-                false, false)
-
-// Decides whether V is an addrspacecast and shortcutting V in load/store is
-// valid and beneficial.
-static bool isEliminableAddrSpaceCast(Value *V) {
-  // Returns false if V is not even an addrspacecast.
-  Operator *Cast = dyn_cast<Operator>(V);
-  if (Cast == nullptr || Cast->getOpcode() != Instruction::AddrSpaceCast)
-    return false;
-
-  Value *Src = Cast->getOperand(0);
-  PointerType *SrcTy = cast<PointerType>(Src->getType());
-  PointerType *DestTy = cast<PointerType>(Cast->getType());
-  // TODO: For now, we only handle the case where the addrspacecast only changes
-  // the address space but not the type. If the type also changes, we could
-  // still get rid of the addrspacecast by adding an extra bitcast, but we
-  // rarely see such scenarios.
-  if (SrcTy->getElementType() != DestTy->getElementType())
-    return false;
-
-  // Checks whether the addrspacecast is from a non-generic address space to the
-  // generic address space.
-  return (SrcTy->getAddressSpace() != AddressSpace::ADDRESS_SPACE_GENERIC &&
-          DestTy->getAddressSpace() == AddressSpace::ADDRESS_SPACE_GENERIC);
-}
-
-Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromGEP(
-    GEPOperator *GEP, int Depth) {
-  Value *NewOperand =
-      hoistAddrSpaceCastFrom(GEP->getPointerOperand(), Depth + 1);
-  if (NewOperand == nullptr)
-    return nullptr;
-
-  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
-  assert(isEliminableAddrSpaceCast(NewOperand));
-  Operator *Cast = cast<Operator>(NewOperand);
-
-  SmallVector<Value *, 8> Indices(GEP->idx_begin(), GEP->idx_end());
-  Value *NewASC;
-  if (Instruction *GEPI = dyn_cast<Instruction>(GEP)) {
-    // GEP = gep (addrspacecast X), indices
-    // =>
-    // NewGEP = gep X, indices
-    // NewASC = addrspacecast NewGEP
-    GetElementPtrInst *NewGEP = GetElementPtrInst::Create(
-        GEP->getSourceElementType(), Cast->getOperand(0), Indices,
-        "", GEPI);
-    NewGEP->setIsInBounds(GEP->isInBounds());
-    NewGEP->takeName(GEP);
-    NewASC = new AddrSpaceCastInst(NewGEP, GEP->getType(), "", GEPI);
-    // Without RAUWing GEP, the compiler would visit GEP again and emit
-    // redundant instructions. This is exercised in test @rauw in
-    // access-non-generic.ll.
-    GEP->replaceAllUsesWith(NewASC);
-  } else {
-    // GEP is a constant expression.
-    Constant *NewGEP = ConstantExpr::getGetElementPtr(
-        GEP->getSourceElementType(), cast<Constant>(Cast->getOperand(0)),
-        Indices, GEP->isInBounds());
-    NewASC = ConstantExpr::getAddrSpaceCast(NewGEP, GEP->getType());
-  }
-  return NewASC;
-}
-
-Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFromBitCast(
-    BitCastOperator *BC, int Depth) {
-  Value *NewOperand = hoistAddrSpaceCastFrom(BC->getOperand(0), Depth + 1);
-  if (NewOperand == nullptr)
-    return nullptr;
-
-  // hoistAddrSpaceCastFrom returns an eliminable addrspacecast or nullptr.
-  assert(isEliminableAddrSpaceCast(NewOperand));
-  Operator *Cast = cast<Operator>(NewOperand);
-
-  // Cast  = addrspacecast Src
-  // BC    = bitcast Cast
-  //   =>
-  // Cast' = bitcast Src
-  // BC'   = addrspacecast Cast'
-  Value *Src = Cast->getOperand(0);
-  Type *TypeOfNewCast =
-      PointerType::get(BC->getType()->getPointerElementType(),
-                       Src->getType()->getPointerAddressSpace());
-  Value *NewBC;
-  if (BitCastInst *BCI = dyn_cast<BitCastInst>(BC)) {
-    Value *NewCast = new BitCastInst(Src, TypeOfNewCast, "", BCI);
-    NewBC = new AddrSpaceCastInst(NewCast, BC->getType(), "", BCI);
-    NewBC->takeName(BC);
-    // Without RAUWing BC, the compiler would visit BC again and emit
-    // redundant instructions. This is exercised in test @rauw in
-    // access-non-generic.ll.
-    BC->replaceAllUsesWith(NewBC);
-  } else {
-    // BC is a constant expression.
-    Constant *NewCast =
-        ConstantExpr::getBitCast(cast<Constant>(Src), TypeOfNewCast);
-    NewBC = ConstantExpr::getAddrSpaceCast(NewCast, BC->getType());
-  }
-  return NewBC;
-}
-
-Value *NVPTXFavorNonGenericAddrSpaces::hoistAddrSpaceCastFrom(Value *V,
-                                                              int Depth) {
-  // Returns V if V is already an eliminable addrspacecast.
-  if (isEliminableAddrSpaceCast(V))
-    return V;
-
-  // Limit the depth to prevent this recursive function from running too long.
-  const int MaxDepth = 20;
-  if (Depth >= MaxDepth)
-    return nullptr;
-
-  // If V is a GEP or bitcast, hoist the addrspacecast if any from its pointer
-  // operand. This enables optimizeMemoryInstruction to shortcut addrspacecasts
-  // that are not directly used by the load/store.
-  if (GEPOperator *GEP = dyn_cast<GEPOperator>(V))
-    return hoistAddrSpaceCastFromGEP(GEP, Depth);
-
-  if (BitCastOperator *BC = dyn_cast<BitCastOperator>(V))
-    return hoistAddrSpaceCastFromBitCast(BC, Depth);
-
-  return nullptr;
-}
-
-bool NVPTXFavorNonGenericAddrSpaces::optimizeMemoryInstruction(Instruction *MI,
-                                                               unsigned Idx) {
-  Value *NewOperand = hoistAddrSpaceCastFrom(MI->getOperand(Idx));
-  if (NewOperand == nullptr)
-    return false;
-
-  // load/store (addrspacecast X) => load/store X if shortcutting the
-  // addrspacecast is valid and can improve performance.
-  //
-  // e.g.,
-  // %1 = addrspacecast float addrspace(3)* %0 to float*
-  // %2 = load float* %1
-  // ->
-  // %2 = load float addrspace(3)* %0
-  //
-  // Note: the addrspacecast can also be a constant expression.
-  assert(isEliminableAddrSpaceCast(NewOperand));
-  Operator *ASC = dyn_cast<Operator>(NewOperand);
-  MI->setOperand(Idx, ASC->getOperand(0));
-  return true;
-}
-
-bool NVPTXFavorNonGenericAddrSpaces::runOnFunction(Function &F) {
-  if (DisableFavorNonGeneric || skipFunction(F))
-    return false;
-
-  bool Changed = false;
-  for (BasicBlock &B : F) {
-    for (Instruction &I : B) {
-      if (isa<LoadInst>(I)) {
-        // V = load P
-        Changed |= optimizeMemoryInstruction(&I, 0);
-      } else if (isa<StoreInst>(I)) {
-        // store V, P
-        Changed |= optimizeMemoryInstruction(&I, 1);
-      }
-    }
-  }
-  return Changed;
-}
-
-FunctionPass *llvm::createNVPTXFavorNonGenericAddrSpacesPass() {
-  return new NVPTXFavorNonGenericAddrSpaces();
-}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
index bbcb497ead9d..6ced2f6967cf 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp
@@ -32,7 +32,7 @@ bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
 
 void NVPTXFrameLowering::emitPrologue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
-  if (MF.getFrameInfo()->hasStackObjects()) {
+  if (MF.getFrameInfo().hasStackObjects()) {
     assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
     MachineInstr *MI = &MBB.front();
     MachineRegisterInfo &MR = MF.getRegInfo();
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
index 66a964082c5f..390776212ce7 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp
@@ -15,7 +15,6 @@
 #include "NVPTX.h"
 #include "MCTargetDesc/NVPTXBaseInfo.h"
 #include "NVPTXUtilities.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -55,7 +54,6 @@ private:
                                                 IRBuilder<> &Builder);
   Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
                            IRBuilder<> &Builder);
-  void remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N);
 
   typedef ValueMap<GlobalVariable *, GlobalVariable *> GVMapTy;
   typedef ValueMap<Constant *, Value *> ConstantToValueMapTy;
@@ -130,12 +128,6 @@ bool GenericToNVVM::runOnModule(Module &M) {
   for (auto I = GVMap.begin(), E = GVMap.end(); I != E; ++I)
     VM[I->first] = I->second;
 
-  // Walk through the metadata section and update the debug information
-  // associated with the global variables in the default address space.
-  for (NamedMDNode &I : M.named_metadata()) {
-    remapNamedMDNode(VM, &I);
-  }
-
   // Walk through the global variable  initializers, and replace any use of
   // original global variables in GVMap with a use of the corresponding copies
   // in GVMap.  The copies need to be bitcast to the original global variable
@@ -360,32 +352,3 @@ Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C,
     llvm_unreachable("GenericToNVVM encountered an unsupported ConstantExpr");
   }
 }
-
-void GenericToNVVM::remapNamedMDNode(ValueToValueMapTy &VM, NamedMDNode *N) {
-
-  bool OperandChanged = false;
-  SmallVector<MDNode *, 16> NewOperands;
-  unsigned NumOperands = N->getNumOperands();
-
-  // Check if any operand is or contains a global variable in  GVMap, and thus
-  // converted to another value.
-  for (unsigned i = 0; i < NumOperands; ++i) {
-    MDNode *Operand = N->getOperand(i);
-    MDNode *NewOperand = MapMetadata(Operand, VM);
-    OperandChanged |= Operand != NewOperand;
-    NewOperands.push_back(NewOperand);
-  }
-
-  // If none of the operands has been modified, return immediately.
-  if (!OperandChanged) {
-    return;
-  }
-
-  // Replace the old operands with the new operands.
-  N->dropAllReferences();
-  for (SmallVectorImpl<MDNode *>::iterator I = NewOperands.begin(),
-                                           E = NewOperands.end();
-       I != E; ++I) {
-    N->addOperand(*I);
-  }
-}
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index 61c6758ef118..43c478f4212f 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -558,21 +558,30 @@ static unsigned int getCodeAddrSpace(MemSDNode *N) {
 
 static bool canLowerToLDG(MemSDNode *N, const NVPTXSubtarget &Subtarget,
                           unsigned CodeAddrSpace, MachineFunction *F) {
-  // To use non-coherent caching, the load has to be from global
-  // memory and we have to prove that the memory area is not written
-  // to anywhere for the duration of the kernel call, not even after
-  // the load.
+  // We use ldg (i.e. ld.global.nc) for invariant loads from the global address
+  // space.
   //
-  // To ensure that there are no writes to the memory, we require the
-  // underlying pointer to be a noalias (__restrict) kernel parameter
-  // that is never used for a write. We can only do this for kernel
-  // functions since from within a device function, we cannot know if
-  // there were or will be writes to the memory from the caller - or we
-  // could, but then we would have to do inter-procedural analysis.
-  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL ||
-      !isKernelFunction(*F->getFunction())) {
+  // We have two ways of identifying invariant loads: Loads may be explicitly
+  // marked as invariant, or we may infer them to be invariant.
+  //
+  // We currently infer invariance only for kernel function pointer params that
+  // are noalias (i.e. __restrict) and never written to.
+  //
+  // TODO: Perform a more powerful invariance analysis (ideally IPO, and ideally
+  // not during the SelectionDAG phase).
+  //
+  // TODO: Infer invariance only at -O2.  We still want to use ldg at -O0 for
+  // explicitly invariant loads because these are how clang tells us to use ldg
+  // when the user uses a builtin.
+  if (!Subtarget.hasLDG() || CodeAddrSpace != NVPTX::PTXLdStInstCode::GLOBAL)
+    return false;
+
+  if (N->isInvariant())
+    return true;
+
+  // Load wasn't explicitly invariant.  Attempt to infer invariance.
+  if (!isKernelFunction(*F->getFunction()))
     return false;
-  }
 
   // We use GetUnderlyingObjects() here instead of
   // GetUnderlyingObject() mainly because the former looks through phi
@@ -4902,7 +4911,7 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
         uint64_t StartVal = StartConst->getZExtValue();
         // How many "good" bits do we have left?  "good" is defined here as bits
         // that exist in the original value, not shifted in.
-        uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal;
+        uint64_t GoodBits = Start.getValueSizeInBits() - StartVal;
         if (NumBits > GoodBits) {
           // Do not handle the case where bits have been shifted in. In theory
           // we could handle this, but the cost is likely higher than just
@@ -5010,15 +5019,14 @@ bool NVPTXDAGToDAGISel::tryBFE(SDNode *N) {
       // If the outer shift is more than the type size, we have no bitfield to
       // extract (since we also check that the inner shift is <= the outer shift
       // then this also implies that the inner shift is < the type size)
-      if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) {
+      if (OuterShiftAmt >= Val.getValueSizeInBits()) {
         return false;
       }
 
-      Start =
-        CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL, MVT::i32);
-      Len =
-        CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() -
-                                  OuterShiftAmt, DL, MVT::i32);
+      Start = CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, DL,
+                                        MVT::i32);
+      Len = CurDAG->getTargetConstant(Val.getValueSizeInBits() - OuterShiftAmt,
+                                      DL, MVT::i32);
 
       if (N->getOpcode() == ISD::SRA) {
         // If we have a arithmetic right shift, we need to use the signed bfe
@@ -5076,11 +5084,12 @@ bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) {
     Address = N.getOperand(0);
     return true;
   }
-  if (N.getOpcode() == ISD::INTRINSIC_WO_CHAIN) {
-    unsigned IID = cast<ConstantSDNode>(N.getOperand(0))->getZExtValue();
-    if (IID == Intrinsic::nvvm_ptr_gen_to_param)
-      if (N.getOperand(1).getOpcode() == NVPTXISD::MoveParam)
-        return (SelectDirectAddr(N.getOperand(1).getOperand(0), Address));
+  // addrspacecast(MoveParam(arg_symbol) to addrspace(PARAM)) -> arg_symbol
+  if (AddrSpaceCastSDNode *CastN = dyn_cast<AddrSpaceCastSDNode>(N)) {
+    if (CastN->getSrcAddressSpace() == ADDRESS_SPACE_GENERIC &&
+        CastN->getDestAddressSpace() == ADDRESS_SPACE_PARAM &&
+        CastN->getOperand(0).getOpcode() == NVPTXISD::MoveParam)
+      return SelectDirectAddr(CastN->getOperand(0).getOperand(0), Address);
   }
   return false;
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index d53c92f1eff3..0591035a6aa8 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -40,7 +40,7 @@ public:
                              CodeGenOpt::Level   OptLevel);
 
   // Pass Name
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "NVPTX DAG->DAG Pattern Instruction Selection";
   }
   bool runOnMachineFunction(MachineFunction &MF) override;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
index f28c89cd976a..2e4764feff11 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -206,7 +206,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   // intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
 
-  // Turn FP extload into load/fextend
+  // Turn FP extload into load/fpextend
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
   setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
@@ -278,6 +278,30 @@ NVPTXTargetLowering::NVPTXTargetLowering(const NVPTXTargetMachine &TM,
   setTargetDAGCombine(ISD::MUL);
   setTargetDAGCombine(ISD::SHL);
   setTargetDAGCombine(ISD::SELECT);
+  setTargetDAGCombine(ISD::SREM);
+  setTargetDAGCombine(ISD::UREM);
+
+  // Library functions.  These default to Expand, but we have instructions
+  // for them.
+  setOperationAction(ISD::FCEIL,  MVT::f32, Legal);
+  setOperationAction(ISD::FCEIL,  MVT::f64, Legal);
+  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
+  setOperationAction(ISD::FFLOOR, MVT::f64, Legal);
+  setOperationAction(ISD::FNEARBYINT, MVT::f32, Legal);
+  setOperationAction(ISD::FNEARBYINT, MVT::f64, Legal);
+  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
+  setOperationAction(ISD::FRINT,  MVT::f64, Legal);
+  setOperationAction(ISD::FROUND, MVT::f32, Legal);
+  setOperationAction(ISD::FROUND, MVT::f64, Legal);
+  setOperationAction(ISD::FTRUNC, MVT::f32, Legal);
+  setOperationAction(ISD::FTRUNC, MVT::f64, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
+  setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
+
+  // No FEXP2, FLOG2.  The PTX ex2 and log2 functions are always approximate.
+  // No FPOW or FREM in PTX.
 
   // Now deduce the information based on the above mentioned
   // actions
@@ -1002,11 +1026,15 @@ std::string NVPTXTargetLowering::getPrototype(
   return O.str();
 }
 
-unsigned
-NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
-                                          const ImmutableCallSite *CS,
-                                          Type *Ty,
-                                          unsigned Idx) const {
+unsigned NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
+                                                   const ImmutableCallSite *CS,
+                                                   Type *Ty, unsigned Idx,
+                                                   const DataLayout &DL) const {
+  if (!CS) {
+    // CallSite is zero, fallback to ABI type alignment
+    return DL.getABITypeAlignment(Ty);
+  }
+
   unsigned Align = 0;
   const Value *DirectCallee = CS->getCalledFunction();
 
@@ -1024,7 +1052,7 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
 
       const Value *CalleeV = cast<CallInst>(CalleeI)->getCalledValue();
       // Ignore any bitcast instructions
-      while(isa<ConstantExpr>(CalleeV)) {
+      while (isa<ConstantExpr>(CalleeV)) {
         const ConstantExpr *CE = cast<ConstantExpr>(CalleeV);
         if (!CE->isCast())
           break;
@@ -1047,7 +1075,6 @@ NVPTXTargetLowering::getArgumentAlignment(SDValue Callee,
 
   // Call is indirect or alignment information is not available, fall back to
   // the ABI type alignment
-  auto &DL = CS->getCaller()->getParent()->getDataLayout();
   return DL.getABITypeAlignment(Ty);
 }
 
@@ -1104,7 +1131,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
         ComputePTXValueVTs(*this, DAG.getDataLayout(), Ty, vtparts, &Offsets,
                            0);
 
-        unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
+        unsigned align =
+            getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
         // declare .param .align <align> .b8 .param<n>[<size>];
         unsigned sz = DL.getTypeAllocSize(Ty);
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -1144,7 +1172,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       }
       if (Ty->isVectorTy()) {
         EVT ObjectVT = getValueType(DL, Ty);
-        unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1);
+        unsigned align =
+            getArgumentAlignment(Callee, CS, Ty, paramCount + 1, DL);
         // declare .param .align <align> .b8 .param<n>[<size>];
         unsigned sz = DL.getTypeAllocSize(Ty);
         SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue);
@@ -1337,11 +1366,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // The ByValAlign in the Outs[OIdx].Flags is alway set at this point,
     // so we don't need to worry about natural alignment or not.
     // See TargetLowering::LowerCallTo().
-    SDValue DeclareParamOps[] = {
-      Chain, DAG.getConstant(Outs[OIdx].Flags.getByValAlign(), dl, MVT::i32),
-      DAG.getConstant(paramCount, dl, MVT::i32),
-      DAG.getConstant(sz, dl, MVT::i32), InFlag
-    };
+
+    // Enforce minumum alignment of 4 to work around ptxas miscompile
+    // for sm_50+. See corresponding alignment adjustment in
+    // emitFunctionParamList() for details.
+    if (ArgAlign < 4)
+      ArgAlign = 4;
+    SDValue DeclareParamOps[] = {Chain, DAG.getConstant(ArgAlign, dl, MVT::i32),
+                                 DAG.getConstant(paramCount, dl, MVT::i32),
+                                 DAG.getConstant(sz, dl, MVT::i32), InFlag};
     Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs,
                         DeclareParamOps);
     InFlag = Chain.getValue(1);
@@ -1400,7 +1433,7 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
                           DeclareRetOps);
       InFlag = Chain.getValue(1);
     } else {
-      retAlignment = getArgumentAlignment(Callee, CS, retTy, 0);
+      retAlignment = getArgumentAlignment(Callee, CS, retTy, 0, DL);
       SDVTList DeclareRetVTs = DAG.getVTList(MVT::Other, MVT::Glue);
       SDValue DeclareRetOps[] = { Chain,
                                   DAG.getConstant(retAlignment, dl, MVT::i32),
@@ -1607,9 +1640,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     } else {
       SmallVector<EVT, 16> VTs;
       SmallVector<uint64_t, 16> Offsets;
-      ComputePTXValueVTs(*this, DAG.getDataLayout(), retTy, VTs, &Offsets, 0);
+      auto &DL = DAG.getDataLayout();
+      ComputePTXValueVTs(*this, DL, retTy, VTs, &Offsets, 0);
       assert(VTs.size() == Ins.size() && "Bad value decomposition");
-      unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0);
+      unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0, DL);
       for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
         unsigned sz = VTs[i].getSizeInBits();
         unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]);
@@ -2073,7 +2107,6 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
   SDValue Root = DAG.getRoot();
   std::vector<SDValue> OutChains;
 
-  bool isKernel = llvm::isKernelFunction(*F);
   bool isABI = (STI.getSmVersion() >= 20);
   assert(isABI && "Non-ABI compilation is not supported");
   if (!isABI)
@@ -2107,7 +2140,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
             theArgs[i],
             (theArgs[i]->getParent() ? theArgs[i]->getParent()->getParent()
                                      : nullptr))) {
-      assert(isKernel && "Only kernels can have image/sampler params");
+      assert(llvm::isKernelFunction(*F) &&
+             "Only kernels can have image/sampler params");
       InVals.push_back(DAG.getConstant(i + 1, dl, MVT::i32));
       continue;
     }
@@ -2212,7 +2246,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
           SDValue P = DAG.getLoad(
               EltVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
               DL.getABITypeAlignment(EltVT.getTypeForEVT(F->getContext())),
-              MachineMemOperand::MOInvariant);
+              MachineMemOperand::MODereferenceable |
+                  MachineMemOperand::MOInvariant);
           if (P.getNode())
             P.getNode()->setIROrder(idx + 1);
 
@@ -2229,7 +2264,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
           SDValue P = DAG.getLoad(
               VecVT, dl, Root, Arg, MachinePointerInfo(SrcValue),
               DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
-              MachineMemOperand::MOInvariant);
+              MachineMemOperand::MODereferenceable |
+                  MachineMemOperand::MOInvariant);
           if (P.getNode())
             P.getNode()->setIROrder(idx + 1);
 
@@ -2271,7 +2307,8 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
             SDValue P = DAG.getLoad(
                 VecVT, dl, Root, SrcAddr, MachinePointerInfo(SrcValue),
                 DL.getABITypeAlignment(VecVT.getTypeForEVT(F->getContext())),
-                MachineMemOperand::MOInvariant);
+                MachineMemOperand::MODereferenceable |
+                    MachineMemOperand::MOInvariant);
             if (P.getNode())
               P.getNode()->setIROrder(idx + 1);
 
@@ -2332,14 +2369,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments(
     SDValue p = DAG.getNode(NVPTXISD::MoveParam, dl, ObjectVT, Arg);
     if (p.getNode())
       p.getNode()->setIROrder(idx + 1);
-    if (isKernel)
-      InVals.push_back(p);
-    else {
-      SDValue p2 = DAG.getNode(
-          ISD::INTRINSIC_WO_CHAIN, dl, ObjectVT,
-          DAG.getConstant(Intrinsic::nvvm_ptr_local_to_gen, dl, MVT::i32), p);
-      InVals.push_back(p2);
-    }
+    InVals.push_back(p);
   }
 
   // Clang will check explicit VarArg and issue error if any. However, Clang
@@ -2422,7 +2452,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       //      11 elem => 3 st.v4
 
       unsigned VecSize = 4;
-      if (OutVals[0].getValueType().getSizeInBits() == 64)
+      if (OutVals[0].getValueSizeInBits() == 64)
         VecSize = 2;
 
       unsigned Offset = 0;
@@ -2510,7 +2540,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
           TmpVal = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, TmpVal);
           TheStoreType = MVT::i32;
         }
-        else if (TmpVal.getValueType().getSizeInBits() < 16)
+        else if (TmpVal.getValueSizeInBits() < 16)
           TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal);
 
         SDValue Ops[] = {
@@ -3246,20 +3276,34 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
     return false;
 
   case Intrinsic::nvvm_atomic_load_add_f32:
-    Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::f32;
-    Info.ptrVal = I.getArgOperand(0);
-    Info.offset = 0;
-    Info.vol = 0;
-    Info.readMem = true;
-    Info.writeMem = true;
-    Info.align = 0;
-    return true;
-
   case Intrinsic::nvvm_atomic_load_inc_32:
   case Intrinsic::nvvm_atomic_load_dec_32:
+
+  case Intrinsic::nvvm_atomic_add_gen_f_cta:
+  case Intrinsic::nvvm_atomic_add_gen_f_sys:
+  case Intrinsic::nvvm_atomic_add_gen_i_cta:
+  case Intrinsic::nvvm_atomic_add_gen_i_sys:
+  case Intrinsic::nvvm_atomic_and_gen_i_cta:
+  case Intrinsic::nvvm_atomic_and_gen_i_sys:
+  case Intrinsic::nvvm_atomic_cas_gen_i_cta:
+  case Intrinsic::nvvm_atomic_cas_gen_i_sys:
+  case Intrinsic::nvvm_atomic_dec_gen_i_cta:
+  case Intrinsic::nvvm_atomic_dec_gen_i_sys:
+  case Intrinsic::nvvm_atomic_inc_gen_i_cta:
+  case Intrinsic::nvvm_atomic_inc_gen_i_sys:
+  case Intrinsic::nvvm_atomic_max_gen_i_cta:
+  case Intrinsic::nvvm_atomic_max_gen_i_sys:
+  case Intrinsic::nvvm_atomic_min_gen_i_cta:
+  case Intrinsic::nvvm_atomic_min_gen_i_sys:
+  case Intrinsic::nvvm_atomic_or_gen_i_cta:
+  case Intrinsic::nvvm_atomic_or_gen_i_sys:
+  case Intrinsic::nvvm_atomic_exch_gen_i_cta:
+  case Intrinsic::nvvm_atomic_exch_gen_i_sys:
+  case Intrinsic::nvvm_atomic_xor_gen_i_cta:
+  case Intrinsic::nvvm_atomic_xor_gen_i_sys: {
+    auto &DL = I.getModule()->getDataLayout();
     Info.opc = ISD::INTRINSIC_W_CHAIN;
-    Info.memVT = MVT::i32;
+    Info.memVT = getValueType(DL, I.getType());
     Info.ptrVal = I.getArgOperand(0);
     Info.offset = 0;
     Info.vol = 0;
@@ -3267,6 +3311,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
     Info.writeMem = true;
     Info.align = 0;
     return true;
+  }
 
   case Intrinsic::nvvm_ldu_global_i:
   case Intrinsic::nvvm_ldu_global_f:
@@ -4089,6 +4134,37 @@ static SDValue PerformSELECTCombine(SDNode *N,
                          DCI.DAG.getConstant(IntrinsicId, DL, VT), LHS, RHS);
 }
 
+static SDValue PerformREMCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 CodeGenOpt::Level OptLevel) {
+  assert(N->getOpcode() == ISD::SREM || N->getOpcode() == ISD::UREM);
+
+  // Don't do anything at less than -O2.
+  if (OptLevel < CodeGenOpt::Default)
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+  bool IsSigned = N->getOpcode() == ISD::SREM;
+  unsigned DivOpc = IsSigned ? ISD::SDIV : ISD::UDIV;
+
+  const SDValue &Num = N->getOperand(0);
+  const SDValue &Den = N->getOperand(1);
+
+  for (const SDNode *U : Num->uses()) {
+    if (U->getOpcode() == DivOpc && U->getOperand(0) == Num &&
+        U->getOperand(1) == Den) {
+      // Num % Den -> Num - (Num / Den) * Den
+      return DAG.getNode(ISD::SUB, DL, VT, Num,
+                         DAG.getNode(ISD::MUL, DL, VT,
+                                     DAG.getNode(DivOpc, DL, VT, Num, Den),
+                                     Den));
+    }
+  }
+  return SDValue();
+}
+
 enum OperandSignedness {
   Signed = 0,
   Unsigned,
@@ -4270,6 +4346,9 @@ SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N,
       return PerformANDCombine(N, DCI);
     case ISD::SELECT:
       return PerformSELECTCombine(N, DCI);
+    case ISD::UREM:
+    case ISD::SREM:
+      return PerformREMCombine(N, DCI, OptLevel);
   }
   return SDValue();
 }
@@ -4554,9 +4633,7 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
   delete static_cast<NVPTXSection *>(DwarfMacinfoSection);
 }
 
-MCSection *
-NVPTXTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
-                                              SectionKind Kind, Mangler &Mang,
-                                              const TargetMachine &TM) const {
+MCSection *NVPTXTargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   return getDataSection();
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
index 1c32232024d1..e433aed7781b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h
@@ -539,7 +539,8 @@ private:
   SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
   unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS,
-                                Type *Ty, unsigned Idx) const;
+                                Type *Ty, unsigned Idx,
+                                const DataLayout &DL) const;
 };
 } // namespace llvm
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
index e451d273cf44..f4940c937a2d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInferAddressSpaces.cpp
@@ -87,9 +87,6 @@
 // Finally, it fixes the undef in %y' so that
 //   %y' = phi float addrspace(3)* [ %input, %y2' ]
 //
-// TODO: This pass is experimental and not enabled by default. Users can turn it
-// on by setting the -nvptx-use-infer-addrspace flag of llc. We plan to replace
-// NVPTXNonFavorGenericAddrSpaces with this pass shortly.
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "nvptx-infer-addrspace"
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
index 0c7c6cbc4512..7f89742a3215 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp
@@ -110,19 +110,6 @@ bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
   return isStore;
 }
 
-bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
-  unsigned addrspace = 0;
-  if (MI->getOpcode() == NVPTX::INT_BARRIER0)
-    return false;
-  if (isLoadInstr(*MI, addrspace))
-    if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
-      return false;
-  if (isStoreInstr(*MI, addrspace))
-    if (addrspace == NVPTX::PTXLdStInstCode::SHARED)
-      return false;
-  return true;
-}
-
 /// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
 /// true if it cannot be understood (e.g. it's a switch dispatch or isn't
 /// implemented for a target).  Upon success, this returns false and returns
@@ -143,7 +130,7 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
 ///    operands can be passed to other TargetInstrInfo methods to create new
 ///    branches.
 ///
-/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// Note that removeBranch and insertBranch must be implemented to support
 /// cases where this method returns success.
 ///
 bool NVPTXInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
@@ -205,7 +192,9 @@ bool NVPTXInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned NVPTXInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                      int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
   MachineBasicBlock::iterator I = MBB.end();
   if (I == MBB.begin())
     return 0;
@@ -229,13 +218,16 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-unsigned NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned NVPTXInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TBB,
                                       MachineBasicBlock *FBB,
                                       ArrayRef<MachineOperand> Cond,
-                                      const DebugLoc &DL) const {
+                                      const DebugLoc &DL,
+                                      int *BytesAdded) const {
+  assert(!BytesAdded && "code size not handled");
+
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
          "NVPTX branch conditions have two components!");
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
index 050bf12fe859..d284282e28c5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h
@@ -57,16 +57,17 @@ public:
   bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
   bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
 
-  virtual bool CanTailMerge(const MachineInstr *MI) const;
   // Branch analysis.
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
   unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
     return MI.getOperand(2).getImm();
   }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index c158cc6cdab2..92a88c7f2506 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -71,10 +71,6 @@ def CmpLT   : PatLeaf<(i32 2)>;
 def CmpLE   : PatLeaf<(i32 3)>;
 def CmpGT   : PatLeaf<(i32 4)>;
 def CmpGE   : PatLeaf<(i32 5)>;
-def CmpLO   : PatLeaf<(i32 6)>;
-def CmpLS   : PatLeaf<(i32 7)>;
-def CmpHI   : PatLeaf<(i32 8)>;
-def CmpHS   : PatLeaf<(i32 9)>;
 def CmpEQU  : PatLeaf<(i32 10)>;
 def CmpNEU  : PatLeaf<(i32 11)>;
 def CmpLTU  : PatLeaf<(i32 12)>;
@@ -90,10 +86,6 @@ def CmpLT_FTZ   : PatLeaf<(i32 0x102)>;
 def CmpLE_FTZ   : PatLeaf<(i32 0x103)>;
 def CmpGT_FTZ   : PatLeaf<(i32 0x104)>;
 def CmpGE_FTZ   : PatLeaf<(i32 0x105)>;
-def CmpLO_FTZ   : PatLeaf<(i32 0x106)>;
-def CmpLS_FTZ   : PatLeaf<(i32 0x107)>;
-def CmpHI_FTZ   : PatLeaf<(i32 0x108)>;
-def CmpHS_FTZ   : PatLeaf<(i32 0x109)>;
 def CmpEQU_FTZ  : PatLeaf<(i32 0x10A)>;
 def CmpNEU_FTZ  : PatLeaf<(i32 0x10B)>;
 def CmpLTU_FTZ  : PatLeaf<(i32 0x10C)>;
@@ -107,13 +99,6 @@ def CmpMode : Operand<i32> {
   let PrintMethod = "printCmpMode";
 }
 
-def F32ConstZero : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
-    return CurDAG->getTargetConstantFP(0.0, MVT::f32);
-  }]>;
-def F32ConstOne : Operand<f32>, PatLeaf<(f32 fpimm)>, SDNodeXForm<fpimm, [{
-    return CurDAG->getTargetConstantFP(1.0, MVT::f32);
-  }]>;
-
 //===----------------------------------------------------------------------===//
 // NVPTX Instruction Predicate Definitions
 //===----------------------------------------------------------------------===//
@@ -131,6 +116,10 @@ def hasAtomRedGen64 : Predicate<"Subtarget->hasAtomRedGen64()">;
 def useAtomRedG64forGen64 :
   Predicate<"!Subtarget->hasAtomRedGen64() && Subtarget->hasAtomRedG64()">;
 def hasAtomAddF32 : Predicate<"Subtarget->hasAtomAddF32()">;
+def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
+def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
+def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
+def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
 def hasVote : Predicate<"Subtarget->hasVote()">;
 def hasDouble : Predicate<"Subtarget->hasDouble()">;
 def reqPTX20 : Predicate<"Subtarget->reqPTX20()">;
@@ -207,15 +196,63 @@ multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
 }
 
 // Template for instructions which take three fp64 or fp32 args.  The
-// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").
+// instructions are named "<OpcStr>.f<Width>" (e.g. "min.f64").
 //
 // Also defines ftz (flush subnormal inputs and results to sign-preserving
 // zero) variants for fp32 functions.
+//
+// This multiclass should be used for nodes that cannot be folded into FMAs.
+// For nodes that can be folded into FMAs (i.e. adds and muls), use
+// F3_fma_component.
 multiclass F3<string OpcStr, SDNode OpNode> {
    def f64rr :
      NVPTXInst<(outs Float64Regs:$dst),
                (ins Float64Regs:$a, Float64Regs:$b),
                !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>;
+   def f64ri :
+     NVPTXInst<(outs Float64Regs:$dst),
+               (ins Float64Regs:$a, f64imm:$b),
+               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
+               [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>;
+   def f32rr_ftz :
+     NVPTXInst<(outs Float32Regs:$dst),
+               (ins Float32Regs:$a, Float32Regs:$b),
+               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
+               Requires<[doF32FTZ]>;
+   def f32ri_ftz :
+     NVPTXInst<(outs Float32Regs:$dst),
+               (ins Float32Regs:$a, f32imm:$b),
+               !strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
+               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
+               Requires<[doF32FTZ]>;
+   def f32rr :
+     NVPTXInst<(outs Float32Regs:$dst),
+               (ins Float32Regs:$a, Float32Regs:$b),
+               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>;
+   def f32ri :
+     NVPTXInst<(outs Float32Regs:$dst),
+               (ins Float32Regs:$a, f32imm:$b),
+               !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
+               [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>;
+}
+
+// Template for instructions which take three fp64 or fp32 args.  The
+// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").
+//
+// Also defines ftz (flush subnormal inputs and results to sign-preserving
+// zero) variants for fp32 functions.
+//
+// This multiclass should be used for nodes that can be folded to make fma ops.
+// In this case, we use the ".rn" variant when FMA is disabled, as this behaves
+// just like the non ".rn" op, but prevents ptxas from creating FMAs.
+multiclass F3_fma_component<string OpcStr, SDNode OpNode> {
+   def f64rr :
+     NVPTXInst<(outs Float64Regs:$dst),
+               (ins Float64Regs:$a, Float64Regs:$b),
+               !strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
                [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
                Requires<[allowFMA]>;
    def f64ri :
@@ -248,41 +285,39 @@ multiclass F3<string OpcStr, SDNode OpNode> {
                !strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
                [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
                Requires<[allowFMA]>;
-}
 
-// Same as F3, but defines ".rn" variants (round to nearest even).
-multiclass F3_rn<string OpcStr, SDNode OpNode> {
-   def f64rr :
+   // These have strange names so we don't perturb existing mir tests.
+   def _rnf64rr :
      NVPTXInst<(outs Float64Regs:$dst),
                (ins Float64Regs:$a, Float64Regs:$b),
                !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
                [(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
                Requires<[noFMA]>;
-   def f64ri :
+   def _rnf64ri :
      NVPTXInst<(outs Float64Regs:$dst),
                (ins Float64Regs:$a, f64imm:$b),
                !strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
                [(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
                Requires<[noFMA]>;
-   def f32rr_ftz :
+   def _rnf32rr_ftz :
      NVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, Float32Regs:$b),
                !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
                [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
                Requires<[noFMA, doF32FTZ]>;
-   def f32ri_ftz :
+   def _rnf32ri_ftz :
      NVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, f32imm:$b),
                !strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
                [(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
                Requires<[noFMA, doF32FTZ]>;
-   def f32rr :
+   def _rnf32rr :
      NVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, Float32Regs:$b),
                !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
                [(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
                Requires<[noFMA]>;
-   def f32ri :
+   def _rnf32ri :
      NVPTXInst<(outs Float32Regs:$dst),
                (ins Float32Regs:$a, f32imm:$b),
                !strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
@@ -704,22 +739,21 @@ def INEG64 :
 
 // Constant 1.0f
 def FloatConst1 : PatLeaf<(fpimm), [{
-  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle &&
+  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEsingle() &&
          N->getValueAPF().convertToFloat() == 1.0f;
 }]>;
 // Constant 1.0 (double)
 def DoubleConst1 : PatLeaf<(fpimm), [{
-  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble &&
+  return &N->getValueAPF().getSemantics() == &llvm::APFloat::IEEEdouble() &&
          N->getValueAPF().convertToDouble() == 1.0;
 }]>;
 
-defm FADD : F3<"add", fadd>;
-defm FSUB : F3<"sub", fsub>;
-defm FMUL : F3<"mul", fmul>;
+defm FADD : F3_fma_component<"add", fadd>;
+defm FSUB : F3_fma_component<"sub", fsub>;
+defm FMUL : F3_fma_component<"mul", fmul>;
 
-defm FADD_rn : F3_rn<"add", fadd>;
-defm FSUB_rn : F3_rn<"sub", fsub>;
-defm FMUL_rn : F3_rn<"mul", fmul>;
+defm FMIN : F3<"min", fminnum>;
+defm FMAX : F3<"max", fmaxnum>;
 
 defm FABS  : F2<"abs", fabs>;
 defm FNEG  : F2<"neg", fneg>;
@@ -2613,21 +2647,70 @@ def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
 def : Pat<(ctpop Int16Regs:$a),
           (CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
 
-// fround f64 -> f32
-def : Pat<(f32 (fround Float64Regs:$a)),
+// fpround f64 -> f32
+def : Pat<(f32 (fpround Float64Regs:$a)),
           (CVT_f32_f64 Float64Regs:$a, CvtRN_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f32 (fround Float64Regs:$a)),
+def : Pat<(f32 (fpround Float64Regs:$a)),
           (CVT_f32_f64 Float64Regs:$a, CvtRN)>;
 
-// fextend f32 -> f64
-def : Pat<(f64 (fextend Float32Regs:$a)),
+// fpextend f32 -> f64
+def : Pat<(f64 (fpextend Float32Regs:$a)),
           (CVT_f64_f32 Float32Regs:$a, CvtNONE_FTZ)>, Requires<[doF32FTZ]>;
-def : Pat<(f64 (fextend Float32Regs:$a)),
+def : Pat<(f64 (fpextend Float32Regs:$a)),
           (CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
 
 def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
                      [SDNPHasChain, SDNPOptInGlue]>;
 
+// fceil, ffloor, fround, ftrunc.
+
+def : Pat<(fceil Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRPI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(fceil Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRPI)>, Requires<[doNoF32FTZ]>;
+def : Pat<(fceil Float64Regs:$a),
+          (CVT_f64_f64 Float64Regs:$a, CvtRPI)>;
+
+def : Pat<(ffloor Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRMI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(ffloor Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRMI)>, Requires<[doNoF32FTZ]>;
+def : Pat<(ffloor Float64Regs:$a),
+          (CVT_f64_f64 Float64Regs:$a, CvtRMI)>;
+
+def : Pat<(fround Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(f32 (fround Float32Regs:$a)),
+          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+def : Pat<(f64 (fround Float64Regs:$a)),
+          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+
+def : Pat<(ftrunc Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRZI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(ftrunc Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRZI)>, Requires<[doNoF32FTZ]>;
+def : Pat<(ftrunc Float64Regs:$a),
+          (CVT_f64_f64 Float64Regs:$a, CvtRZI)>;
+
+// nearbyint and rint are implemented as rounding to nearest even.  This isn't
+// strictly correct, because it causes us to ignore the rounding mode.  But it
+// matches what CUDA's "libm" does.
+
+def : Pat<(fnearbyint Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(fnearbyint Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+def : Pat<(fnearbyint Float64Regs:$a),
+          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+
+def : Pat<(frint Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRNI_FTZ)>, Requires<[doF32FTZ]>;
+def : Pat<(frint Float32Regs:$a),
+          (CVT_f32_f32 Float32Regs:$a, CvtRNI)>, Requires<[doNoF32FTZ]>;
+def : Pat<(frint Float64Regs:$a),
+          (CVT_f64_f64 Float64Regs:$a, CvtRNI)>;
+
+
 //-----------------------------------
 // Control-flow
 //-----------------------------------
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
index ed16afa24752..b0408f12f5b1 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
@@ -1377,8 +1377,204 @@ defm INT_PTX_ATOM_CAS_GEN_64 : F_ATOMIC_3<Int64Regs, "", ".b64", ".cas",
 defm INT_PTX_ATOM_CAS_GEN_64_USE_G : F_ATOMIC_3<Int64Regs, ".global", ".b64",
   ".cas", atomic_cmp_swap_64_gen, i64imm, useAtomRedG64forGen64>;
 
+// Support for scoped atomic operations.  Matches
+// int_nvvm_atomic_{op}_{space}_{type}_{scope}
+// and converts it into the appropriate instruction.
+// NOTE: not all possible combinations are implemented
+//  'space' is limited to generic as it's the only one needed to support CUDA.
+//  'scope' = 'gpu' is default and is handled by regular atomic instructions.
+class ATOM23_impl<string AsmStr, NVPTXRegClass regclass, list<Predicate> Preds,
+                  dag ins, dag Operands>
+      : NVPTXInst<(outs regclass:$result), ins,
+                  AsmStr,
+                  [(set regclass:$result, Operands)]>,
+        Requires<Preds>;
+
+// Define instruction variants for all addressing modes.
+multiclass ATOM2P_impl<string AsmStr,  Intrinsic Intr,
+                       NVPTXRegClass regclass, Operand ImmType,
+                       SDNode Imm, ValueType ImmTy,
+                       list<Predicate> Preds> {
+  let AddedComplexity = 1 in {
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int32Regs:$src, regclass:$b),
+                      (Intr Int32Regs:$src, regclass:$b)>;
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int64Regs:$src, regclass:$b),
+                      (Intr Int64Regs:$src, regclass:$b)>;
+  }
+  // tablegen can't infer argument types from Intrinsic (though it can
+  // from Instruction) so we have to enforce specific type on
+  // immediates via explicit cast to ImmTy.
+  def : ATOM23_impl<AsmStr, regclass, Preds,
+                    (ins Int32Regs:$src, ImmType:$b),
+                    (Intr Int32Regs:$src, (ImmTy Imm:$b))>;
+  def : ATOM23_impl<AsmStr, regclass, Preds,
+                    (ins Int64Regs:$src, ImmType:$b),
+                    (Intr Int64Regs:$src, (ImmTy Imm:$b))>;
+}
+
+multiclass ATOM3P_impl<string AsmStr,  Intrinsic Intr,
+                       NVPTXRegClass regclass, Operand ImmType,
+                       SDNode Imm, ValueType ImmTy,
+                       list<Predicate> Preds> {
+  // Variants for register/immediate permutations of $b and $c
+  let AddedComplexity = 2 in {
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int32Regs:$src, regclass:$b, regclass:$c),
+                      (Intr Int32Regs:$src, regclass:$b, regclass:$c)>;
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int64Regs:$src, regclass:$b, regclass:$c),
+                      (Intr Int64Regs:$src, regclass:$b, regclass:$c)>;
+  }
+  let AddedComplexity = 1 in {
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int32Regs:$src, ImmType:$b, regclass:$c),
+                      (Intr Int32Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int64Regs:$src, ImmType:$b, regclass:$c),
+                      (Intr Int64Regs:$src, (ImmTy Imm:$b), regclass:$c)>;
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int32Regs:$src, regclass:$b, ImmType:$c),
+                      (Intr Int32Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
+    def : ATOM23_impl<AsmStr, regclass, Preds,
+                      (ins Int64Regs:$src, regclass:$b, ImmType:$c),
+                      (Intr Int64Regs:$src, regclass:$b, (ImmTy Imm:$c))>;
+  }
+  def : ATOM23_impl<AsmStr, regclass, Preds,
+                    (ins Int32Regs:$src, ImmType:$b, ImmType:$c),
+                    (Intr Int32Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+  def : ATOM23_impl<AsmStr, regclass, Preds,
+                    (ins Int64Regs:$src, ImmType:$b, ImmType:$c),
+                    (Intr Int64Regs:$src, (ImmTy Imm:$b), (ImmTy Imm:$c))>;
+}
+
+// Constructs instrinsic name and instruction asm strings.
+multiclass ATOM2N_impl<string OpStr, string IntTypeStr, string TypeStr,
+                       string ScopeStr, string SpaceStr,
+                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+                       ValueType ImmTy, list<Predicate> Preds> {
+  defm : ATOM2P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
+                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
+                            # "." # OpStr # "." # TypeStr
+                            # " \t$result, [$src], $b;",
+                     !cast<Intrinsic>(
+                            "int_nvvm_atomic_" # OpStr
+                            # "_" # SpaceStr # "_" # IntTypeStr
+                            # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
+                     regclass, ImmType, Imm, ImmTy, Preds>;
+}
+multiclass ATOM3N_impl<string OpStr, string IntTypeStr, string TypeStr,
+                       string ScopeStr, string SpaceStr,
+                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+                       ValueType ImmTy, list<Predicate> Preds> {
+  defm : ATOM3P_impl<"atom" # !if(!eq(SpaceStr, "gen"), "", "." # SpaceStr)
+                            # !if(!eq(ScopeStr, "gpu"), "", "." # ScopeStr)
+                            # "." # OpStr # "." # TypeStr
+                            # " \t$result, [$src], $b, $c;",
+                     !cast<Intrinsic>(
+                            "int_nvvm_atomic_" # OpStr
+                            # "_" # SpaceStr # "_" # IntTypeStr
+                            # !if(!eq(ScopeStr,""), "", "_" # ScopeStr)),
+                     regclass, ImmType, Imm, ImmTy, Preds>;
+}
+
+// Constructs variants for different address spaces.
+// For now we only need variants for generic space pointers.
+multiclass ATOM2A_impl<string OpStr, string IntTypeStr, string TypeStr,
+                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
+                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
+   defm _gen_ : ATOM2N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
+                            regclass, ImmType, Imm, ImmTy, Preds>;
+}
+multiclass ATOM3A_impl<string OpStr, string IntTypeStr, string TypeStr,
+                       string ScopeStr, NVPTXRegClass regclass, Operand ImmType,
+                       SDNode Imm, ValueType ImmTy, list<Predicate> Preds> {
+   defm _gen_ : ATOM3N_impl<OpStr, IntTypeStr, TypeStr, ScopeStr, "gen",
+                            regclass, ImmType, Imm, ImmTy, Preds>;
+}
+
+// Constructs variants for different scopes of atomic op.
+multiclass ATOM2S_impl<string OpStr, string IntTypeStr, string TypeStr,
+                       NVPTXRegClass regclass, Operand ImmType, SDNode Imm,
+                       ValueType ImmTy, list<Predicate> Preds> {
+   // .gpu scope is default and is currently covered by existing
+   // atomics w/o explicitly specified scope.
+   defm _cta : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "cta",
+                           regclass, ImmType, Imm, ImmTy,
+                           !listconcat(Preds,[hasAtomScope])>;
+   defm _sys : ATOM2A_impl<OpStr, IntTypeStr, TypeStr, "sys",
+                           regclass, ImmType, Imm, ImmTy,
+                           !listconcat(Preds,[hasAtomScope])>;
+}
+multiclass ATOM3S_impl<string OpStr, string IntTypeStr, string TypeStr,
+           NVPTXRegClass regclass, Operand ImmType, SDNode Imm, ValueType ImmTy,
+           list<Predicate> Preds> {
+   // No need to define ".gpu"-scoped atomics.  They do the same thing
+   // as the regular, non-scoped atomics defined elsewhere.
+   defm _cta : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "cta",
+                           regclass, ImmType, Imm, ImmTy,
+                           !listconcat(Preds,[hasAtomScope])>;
+   defm _sys : ATOM3A_impl<OpStr, IntTypeStr, TypeStr, "sys",
+                           regclass, ImmType, Imm, ImmTy,
+                           !listconcat(Preds,[hasAtomScope])>;
+}
 
+// atom.add
+multiclass ATOM2_add_impl<string OpStr> {
+   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
+   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
+   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64, []>;
+   defm _f32  : ATOM2S_impl<OpStr, "f", "f32", Float32Regs, f32imm, fpimm, f32,
+                            [hasAtomAddF32]>;
+   defm _f64  : ATOM2S_impl<OpStr, "f", "f64", Float64Regs, f64imm, fpimm, f64,
+                            [hasAtomAddF64]>;
+}
+
+// atom.{and,or,xor}
+multiclass ATOM2_bitwise_impl<string OpStr> {
+   defm _b32  : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
+   defm _b64  : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64,
+                            [hasAtomBitwise64]>;
+}
+
+// atom.exch
+multiclass ATOM2_exch_impl<string OpStr> {
+   defm _b32 : ATOM2S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
+   defm _b64 : ATOM2S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
+}
+
+// atom.{min,max}
+multiclass ATOM2_minmax_impl<string OpStr> {
+   defm _s32  : ATOM2S_impl<OpStr, "i", "s32", Int32Regs, i32imm, imm, i32, []>;
+   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
+   defm _s64  : ATOM2S_impl<OpStr, "i", "s64", Int64Regs, i64imm, imm, i64,
+                            [hasAtomMinMax64]>;
+   defm _u64  : ATOM2S_impl<OpStr, "i", "u64", Int64Regs, i64imm, imm, i64,
+                            [hasAtomMinMax64]>;
+}
+
+// atom.{inc,dec}
+multiclass ATOM2_incdec_impl<string OpStr> {
+   defm _u32  : ATOM2S_impl<OpStr, "i", "u32", Int32Regs, i32imm, imm, i32, []>;
+}
+
+// atom.cas
+multiclass ATOM3_cas_impl<string OpStr> {
+   defm _b32  : ATOM3S_impl<OpStr, "i", "b32", Int32Regs, i32imm, imm, i32, []>;
+   defm _b64  : ATOM3S_impl<OpStr, "i", "b64", Int64Regs, i64imm, imm, i64, []>;
+}
 
+defm INT_PTX_SATOM_ADD : ATOM2_add_impl<"add">;
+defm INT_PTX_SATOM_AND : ATOM2_bitwise_impl<"and">;
+defm INT_PTX_SATOM_CAS : ATOM3_cas_impl<"cas">;
+defm INT_PTX_SATOM_DEC : ATOM2_incdec_impl<"dec">;
+defm INT_PTX_SATOM_EXCH: ATOM2_exch_impl<"exch">;
+defm INT_PTX_SATOM_INC : ATOM2_incdec_impl<"inc">;
+defm INT_PTX_SATOM_MAX : ATOM2_minmax_impl<"max">;
+defm INT_PTX_SATOM_MIN : ATOM2_minmax_impl<"min">;
+defm INT_PTX_SATOM_OR  : ATOM2_bitwise_impl<"or">;
+defm INT_PTX_SATOM_XOR : ATOM2_bitwise_impl<"xor">;
 
 //-----------------------------------
 // Support for ldu on sm_20 or later
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
index f770c2acaab5..b925b632ee4a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp
@@ -14,7 +14,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "NVPTXLowerAggrCopies.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/StackProtector.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -41,7 +40,6 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
   NVPTXLowerAggrCopies() : FunctionPass(ID) {}
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
-    AU.addPreserved<MachineFunctionAnalysis>();
     AU.addPreserved<StackProtector>();
   }
 
@@ -49,7 +47,7 @@ struct NVPTXLowerAggrCopies : public FunctionPass {
 
   static const unsigned MaxAggrCopySize = 128;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Lower aggregate copies/intrinsics into loops";
   }
 };
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
index fa1a3ef3fe24..e94c1914029d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerAlloca.cpp
@@ -20,8 +20,8 @@
 //   %Generic = addrspacecast i32 addrspace(5)* %A to i32*
 //   store i32 0, i32 addrspace(5)* %Generic ; emits st.local.u32
 //
-// And we will rely on NVPTXFavorNonGenericAddrSpace to combine the last
-// two instructions.
+// And we will rely on NVPTXInferAddressSpaces to combine the last two
+// instructions.
 //
 //===----------------------------------------------------------------------===//
 
@@ -47,7 +47,7 @@ class NVPTXLowerAlloca : public BasicBlockPass {
 public:
   static char ID; // Pass identification, replacement for typeid
   NVPTXLowerAlloca() : BasicBlockPass(ID) {}
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "convert address space of alloca'ed memory to local";
   }
 };
@@ -83,7 +83,7 @@ bool NVPTXLowerAlloca::runOnBasicBlock(BasicBlock &BB) {
             UI != UE; ) {
         // Check Load, Store, GEP, and BitCast Uses on alloca and make them
         // use the converted generic address, in order to expose non-generic
-        // addrspacecast to NVPTXFavorNonGenericAddrSpace. For other types
+        // addrspacecast to NVPTXInferAddressSpaces. For other types
         // of instructions this is unnecessary and may introduce redundant
         // address cast.
         const auto &AllocaUse = *UI++;
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
index d162a283f745..3f0c7be7863d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXLowerKernelArgs.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXLowerArgs.cpp
@@ -1,4 +1,4 @@
-//===-- NVPTXLowerKernelArgs.cpp - Lower kernel arguments -----------------===//
+//===-- NVPTXLowerArgs.cpp - Lower arguments ------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,20 +7,28 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Pointer arguments to kernel functions need to be lowered specially.
 //
-// 1. Copy byval struct args to local memory. This is a preparation for handling
-//    cases like
+// Arguments to kernel and device functions are passed via param space,
+// which imposes certain restrictions:
+// http://docs.nvidia.com/cuda/parallel-thread-execution/#state-spaces
 //
-//    kernel void foo(struct A arg, ...)
-//    {
-//      struct A *p = &arg;
-//      ...
-//      ... = p->filed1 ...  (this is no generic address for .param)
-//      p->filed2 = ...      (this is no write access to .param)
-//    }
+// Kernel parameters are read-only and accessible only via ld.param
+// instruction, directly or via a pointer. Pointers to kernel
+// arguments can't be converted to generic address space.
+//
+// Device function parameters are directly accessible via
+// ld.param/st.param, but taking the address of one returns a pointer
+// to a copy created in local space which *can't* be used with
+// ld.param/st.param.
 //
-// 2. Convert non-byval pointer arguments of CUDA kernels to pointers in the
+// Copying a byval struct into local memory in IR allows us to enforce
+// the param space restrictions, gives the rest of IR a pointer w/o
+// param space restrictions, and gives us an opportunity to eliminate
+// the copy.
+//
+// Pointer arguments to kernel functions need more work to be lowered:
+//
+// 1. Convert non-byval pointer arguments of CUDA kernels to pointers in the
 //    global address space. This allows later optimizations to emit
 //    ld.global.*/st.global.* for accessing these pointer arguments. For
 //    example,
@@ -39,7 +47,7 @@
 //      ...
 //    }
 //
-//    Later, NVPTXFavorNonGenericAddrSpaces will optimize it to
+//    Later, NVPTXInferAddressSpaces will optimize it to
 //
 //    define void @foo(float* %input) {
 //      %input2 = addrspacecast float* %input to float addrspace(1)*
@@ -47,7 +55,7 @@
 //      ...
 //    }
 //
-// 3. Convert pointers in a byval kernel parameter to pointers in the global
+// 2. Convert pointers in a byval kernel parameter to pointers in the global
 //    address space. As #2, it allows NVPTX to emit more ld/st.global. E.g.,
 //
 //    struct S {
@@ -77,8 +85,8 @@
 //      ; use %b_generic
 //    }
 //
-// TODO: merge this pass with NVPTXFavorNonGenericAddrSpace so that other passes
-// don't cancel the addrspacecast pair this pass emits.
+// TODO: merge this pass with NVPTXInferAddressSpaces so that other passes don't
+// cancel the addrspacecast pair this pass emits.
 //===----------------------------------------------------------------------===//
 
 #include "NVPTX.h"
@@ -94,26 +102,29 @@
 using namespace llvm;
 
 namespace llvm {
-void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerArgsPass(PassRegistry &);
 }
 
 namespace {
-class NVPTXLowerKernelArgs : public FunctionPass {
+class NVPTXLowerArgs : public FunctionPass {
   bool runOnFunction(Function &F) override;
 
+  bool runOnKernelFunction(Function &F);
+  bool runOnDeviceFunction(Function &F);
+
   // handle byval parameters
   void handleByValParam(Argument *Arg);
   // Knowing Ptr must point to the global address space, this function
   // addrspacecasts Ptr to global and then back to generic. This allows
-  // NVPTXFavorNonGenericAddrSpace to fold the global-to-generic cast into
+  // NVPTXInferAddressSpaces to fold the global-to-generic cast into
   // loads/stores that appear later.
   void markPointerAsGlobal(Value *Ptr);
 
 public:
   static char ID; // Pass identification, replacement for typeid
-  NVPTXLowerKernelArgs(const NVPTXTargetMachine *TM = nullptr)
+  NVPTXLowerArgs(const NVPTXTargetMachine *TM = nullptr)
       : FunctionPass(ID), TM(TM) {}
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Lower pointer arguments of CUDA kernels";
   }
 
@@ -122,10 +133,10 @@ private:
 };
 } // namespace
 
-char NVPTXLowerKernelArgs::ID = 1;
+char NVPTXLowerArgs::ID = 1;
 
-INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
-                "Lower kernel arguments (NVPTX)", false, false)
+INITIALIZE_PASS(NVPTXLowerArgs, "nvptx-lower-args",
+                "Lower arguments (NVPTX)", false, false)
 
 // =============================================================================
 // If the function had a byval struct ptr arg, say foo(%struct.x* byval %d),
@@ -140,7 +151,7 @@ INITIALIZE_PASS(NVPTXLowerKernelArgs, "nvptx-lower-kernel-args",
 // struct from param space to local space.
 // Then replace all occurrences of %d by %temp.
 // =============================================================================
-void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
+void NVPTXLowerArgs::handleByValParam(Argument *Arg) {
   Function *Func = Arg->getParent();
   Instruction *FirstInst = &(Func->getEntryBlock().front());
   PointerType *PType = dyn_cast<PointerType>(Arg->getType());
@@ -162,7 +173,7 @@ void NVPTXLowerKernelArgs::handleByValParam(Argument *Arg) {
   new StoreInst(LI, AllocA, FirstInst);
 }
 
-void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
+void NVPTXLowerArgs::markPointerAsGlobal(Value *Ptr) {
   if (Ptr->getType()->getPointerAddressSpace() == ADDRESS_SPACE_GLOBAL)
     return;
 
@@ -192,11 +203,7 @@ void NVPTXLowerKernelArgs::markPointerAsGlobal(Value *Ptr) {
 // =============================================================================
 // Main function for this pass.
 // =============================================================================
-bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
-  // Skip non-kernels. See the comments at the top of this file.
-  if (!isKernelFunction(F))
-    return false;
-
+bool NVPTXLowerArgs::runOnKernelFunction(Function &F) {
   if (TM && TM->getDrvInterface() == NVPTX::CUDA) {
     // Mark pointers in byval structs as global.
     for (auto &B : F) {
@@ -228,7 +235,19 @@ bool NVPTXLowerKernelArgs::runOnFunction(Function &F) {
   return true;
 }
 
+// Device functions only need to copy byval args into local memory.
+bool NVPTXLowerArgs::runOnDeviceFunction(Function &F) {
+  for (Argument &Arg : F.args())
+    if (Arg.getType()->isPointerTy() && Arg.hasByValAttr())
+      handleByValParam(&Arg);
+  return true;
+}
+
+bool NVPTXLowerArgs::runOnFunction(Function &F) {
+  return isKernelFunction(F) ? runOnKernelFunction(F) : runOnDeviceFunction(F);
+}
+
 FunctionPass *
-llvm::createNVPTXLowerKernelArgsPass(const NVPTXTargetMachine *TM) {
-  return new NVPTXLowerKernelArgs(TM);
+llvm::createNVPTXLowerArgsPass(const NVPTXTargetMachine *TM) {
+  return new NVPTXLowerArgs(TM);
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
index 84d5239ec096..eab5ee80561e 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp
@@ -30,12 +30,12 @@ void NVPTXFloatMCExpr::printImpl(raw_ostream &OS, const MCAsmInfo *MAI) const {
   case VK_NVPTX_SINGLE_PREC_FLOAT:
     OS << "0f";
     NumHex = 8;
-    APF.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven, &Ignored);
+    APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &Ignored);
     break;
   case VK_NVPTX_DOUBLE_PREC_FLOAT:
     OS << "0d";
     NumHex = 16;
-    APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &Ignored);
+    APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Ignored);
     break;
   }
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
index 7d0cd553e03f..49e639793efc 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
@@ -57,7 +57,7 @@ struct NVPTXPeephole : public MachineFunctionPass {
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "NVPTX optimize redundant cvta.to.local instruction";
   }
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
index 029e0097c5dc..88288abe64f9 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
@@ -80,14 +80,14 @@ bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
 
 /// AdjustStackOffset - Helper function used to adjust the stack frame offset.
 static inline void
-AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
+AdjustStackOffset(MachineFrameInfo &MFI, int FrameIdx,
                   bool StackGrowsDown, int64_t &Offset,
                   unsigned &MaxAlign) {
   // If the stack grows down, add the object size to find the lowest address.
   if (StackGrowsDown)
-    Offset += MFI->getObjectSize(FrameIdx);
+    Offset += MFI.getObjectSize(FrameIdx);
 
-  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
 
   // If the alignment of this object is greater than that of the stack, then
   // increase the stack alignment to match.
@@ -98,11 +98,11 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
 
   if (StackGrowsDown) {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
-    MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
+    MFI.setObjectOffset(FrameIdx, -Offset); // Set the computed offset
   } else {
     DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
-    MFI->setObjectOffset(FrameIdx, Offset);
-    Offset += MFI->getObjectSize(FrameIdx);
+    MFI.setObjectOffset(FrameIdx, Offset);
+    Offset += MFI.getObjectSize(FrameIdx);
   }
 }
 
@@ -115,7 +115,7 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
     TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   // Loop over all of the stack objects, assigning sequential addresses...
-  MachineFrameInfo *MFI = Fn.getFrameInfo();
+  MachineFrameInfo &MFI = Fn.getFrameInfo();
 
   // Start at the beginning of the local area.
   // The Offset is the distance from the stack top in the direction
@@ -132,24 +132,24 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // We currently don't support filling in holes in between fixed sized
   // objects, so we adjust 'Offset' to point to the end of last fixed sized
   // preallocated object.
-  for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
+  for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
     int64_t FixedOff;
     if (StackGrowsDown) {
       // The maximum distance from the stack pointer is at lower address of
       // the object -- which is given by offset. For down growing stack
       // the offset is negative, so we negate the offset to get the distance.
-      FixedOff = -MFI->getObjectOffset(i);
+      FixedOff = -MFI.getObjectOffset(i);
     } else {
       // The maximum distance from the start pointer is at the upper
       // address of the object.
-      FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
+      FixedOff = MFI.getObjectOffset(i) + MFI.getObjectSize(i);
     }
     if (FixedOff > Offset) Offset = FixedOff;
   }
 
   // NOTE: We do not have a call stack
 
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned MaxAlign = MFI.getMaxAlignment();
 
   // No scavenger
 
@@ -157,8 +157,8 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
   // check for whether the frame is large enough to want to use virtual
   // frame index registers. Functions which don't want/need this optimization
   // will continue to use the existing code path.
-  if (MFI->getUseLocalStackAllocationBlock()) {
-    unsigned Align = MFI->getLocalFrameMaxAlign();
+  if (MFI.getUseLocalStackAllocationBlock()) {
+    unsigned Align = MFI.getLocalFrameMaxAlign();
 
     // Adjust to alignment boundary.
     Offset = (Offset + Align - 1) / Align * Align;
@@ -166,15 +166,15 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
     DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
 
     // Resolve offsets for objects in the local block.
-    for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
-      std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
+    for (unsigned i = 0, e = MFI.getLocalFrameObjectCount(); i != e; ++i) {
+      std::pair<int, int64_t> Entry = MFI.getLocalFrameObjectMap(i);
       int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
       DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
             FIOffset << "]\n");
-      MFI->setObjectOffset(Entry.first, FIOffset);
+      MFI.setObjectOffset(Entry.first, FIOffset);
     }
     // Allocate the local block
-    Offset += MFI->getLocalFrameSize();
+    Offset += MFI.getLocalFrameSize();
 
     MaxAlign = std::max(Align, MaxAlign);
   }
@@ -183,11 +183,11 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Then assign frame offsets to stack objects that are not used to spill
   // callee saved registers.
-  for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
-    if (MFI->isObjectPreAllocated(i) &&
-        MFI->getUseLocalStackAllocationBlock())
+  for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
+    if (MFI.isObjectPreAllocated(i) &&
+        MFI.getUseLocalStackAllocationBlock())
       continue;
-    if (MFI->isDeadObjectIndex(i))
+    if (MFI.isDeadObjectIndex(i))
       continue;
 
     AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
@@ -199,8 +199,8 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // If we have reserved argument space for call sites in the function
     // immediately on entry to the current function, count it as part of the
     // overall stack size.
-    if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
-      Offset += MFI->getMaxCallFrameSize();
+    if (MFI.adjustsStack() && TFI.hasReservedCallFrame(Fn))
+      Offset += MFI.getMaxCallFrameSize();
 
     // Round up the size to a multiple of the alignment.  If the function has
     // any calls or alloca's, align to the target's StackAlignment value to
@@ -208,8 +208,8 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
     // otherwise, for leaf functions, align to the TransientStackAlignment
     // value.
     unsigned StackAlign;
-    if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
-        (RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
+    if (MFI.adjustsStack() || MFI.hasVarSizedObjects() ||
+        (RegInfo->needsStackRealignment(Fn) && MFI.getObjectIndexEnd() != 0))
       StackAlign = TFI.getStackAlignment();
     else
       StackAlign = TFI.getTransientStackAlignment();
@@ -223,5 +223,5 @@ NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
 
   // Update frame info to pretend that this is part of the stack...
   int64_t StackSize = Offset - LocalAreaOffset;
-  MFI->setStackSize(StackSize);
+  MFI.setStackSize(StackSize);
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
index 6e97f9efbc27..6cbf0604d7ef 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp
@@ -33,11 +33,29 @@ std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
   if (RC == &NVPTX::Float64RegsRegClass) {
     return ".f64";
   } else if (RC == &NVPTX::Int64RegsRegClass) {
-    return ".s64";
+    // We use untyped (.b) integer registers here as NVCC does.
+    // Correctness of generated code does not depend on register type,
+    // but using .s/.u registers runs into ptxas bug that prevents
+    // assembly of otherwise valid PTX into SASS. Despite PTX ISA
+    // specifying only argument size for fp16 instructions, ptxas does
+    // not allow using .s16 or .u16 arguments for .fp16
+    // instructions. At the same time it allows using .s32/.u32
+    // arguments for .fp16v2 instructions:
+    //
+    //   .reg .b16 rb16
+    //   .reg .s16 rs16
+    //   add.f16 rb16,rb16,rb16; // OK
+    //   add.f16 rs16,rs16,rs16; // Arguments mismatch for instruction 'add'
+    // but:
+    //   .reg .b32 rb32
+    //   .reg .s32 rs32
+    //   add.f16v2 rb32,rb32,rb32; // OK
+    //   add.f16v2 rs32,rs32,rs32; // OK
+    return ".b64";
   } else if (RC == &NVPTX::Int32RegsRegClass) {
-    return ".s32";
+    return ".b32";
   } else if (RC == &NVPTX::Int16RegsRegClass) {
-    return ".s16";
+    return ".b16";
   } else if (RC == &NVPTX::Int1RegsRegClass) {
     return ".pred";
   } else if (RC == &NVPTX::SpecialRegsRegClass) {
@@ -97,7 +115,7 @@ void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
 
   MachineFunction &MF = *MI.getParent()->getParent();
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
+  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex) +
                MI.getOperand(FIOperandNum + 1).getImm();
 
   // Using I0 as the frame pointer
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
index 5a83371b07f1..2022caca76ee 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXReplaceImageHandles.cpp
@@ -36,7 +36,7 @@ public:
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "NVPTX Replace Image Handles";
   }
 private:
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
index bd2509a3c8c9..6e1f427ed021 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp
@@ -29,8 +29,6 @@ void NVPTXSubtarget::anchor() {}
 NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU,
                                                                 StringRef FS) {
     // Provide the default CPU if we don't have one.
-  if (CPU.empty() && FS.size())
-    llvm_unreachable("we are not using FeatureStr");
   TargetName = CPU.empty() ? "sm_20" : CPU;
 
   ParseSubtargetFeatures(TargetName, FS);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
index 41670390c41b..da020a94bcdd 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h
@@ -48,6 +48,10 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
   // FrameLowering class because TargetFrameLowering is abstract.
   NVPTXFrameLowering FrameLowering;
 
+protected:
+  // Processor supports scoped atomic operations.
+  bool HasAtomScope;
+
 public:
   /// This constructor initializes the data members to match that
   /// of the specified module.
@@ -77,6 +81,10 @@ public:
   bool hasAtomRedGen32() const { return SmVersion >= 20; }
   bool hasAtomRedGen64() const { return SmVersion >= 20; }
   bool hasAtomAddF32() const { return SmVersion >= 20; }
+  bool hasAtomAddF64() const { return SmVersion >= 60; }
+  bool hasAtomScope() const { return HasAtomScope; }
+  bool hasAtomBitwise64() const { return SmVersion >= 32; }
+  bool hasAtomMinMax64() const { return SmVersion >= 32; }
   bool hasVote() const { return SmVersion >= 12; }
   bool hasDouble() const { return SmVersion >= 13; }
   bool reqPTX20() const { return SmVersion >= 20; }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
index b9f5919964c7..6c68a2c9370d 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp
@@ -20,7 +20,6 @@
 #include "NVPTXTargetTransformInfo.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineFunctionAnalysis.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
@@ -46,13 +45,16 @@
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Vectorize.h"
 
 using namespace llvm;
 
-static cl::opt<bool> UseInferAddressSpaces(
-    "nvptx-use-infer-addrspace", cl::init(false), cl::Hidden,
-    cl::desc("Optimize address spaces using NVPTXInferAddressSpaces instead of "
-             "NVPTXFavorNonGenericAddrSpaces"));
+// LSV is still relatively new; this switch lets us turn it off in case we
+// encounter (or suspect) a bug.
+static cl::opt<bool>
+    DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
+                               cl::desc("Disable load/store vectorizer"),
+                               cl::init(false), cl::Hidden);
 
 namespace llvm {
 void initializeNVVMIntrRangePass(PassRegistry&);
@@ -60,17 +62,16 @@ void initializeNVVMReflectPass(PassRegistry&);
 void initializeGenericToNVVMPass(PassRegistry&);
 void initializeNVPTXAllocaHoistingPass(PassRegistry &);
 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
-void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
 void initializeNVPTXInferAddressSpacesPass(PassRegistry &);
 void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
-void initializeNVPTXLowerKernelArgsPass(PassRegistry &);
+void initializeNVPTXLowerArgsPass(PassRegistry &);
 void initializeNVPTXLowerAllocaPass(PassRegistry &);
 }
 
 extern "C" void LLVMInitializeNVPTXTarget() {
   // Register the target.
-  RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
-  RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
+  RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
+  RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
 
   // FIXME: This pass is really intended to be invoked during IR optimization,
   // but it's very NVPTX-specific.
@@ -80,9 +81,8 @@ extern "C" void LLVMInitializeNVPTXTarget() {
   initializeGenericToNVVMPass(PR);
   initializeNVPTXAllocaHoistingPass(PR);
   initializeNVPTXAssignValidGlobalNamesPass(PR);
-  initializeNVPTXFavorNonGenericAddrSpacesPass(PR);
   initializeNVPTXInferAddressSpacesPass(PR);
-  initializeNVPTXLowerKernelArgsPass(PR);
+  initializeNVPTXLowerArgsPass(PR);
   initializeNVPTXLowerAllocaPass(PR);
   initializeNVPTXLowerAggrCopiesPass(PR);
 }
@@ -195,19 +195,11 @@ void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
 }
 
 void NVPTXPassConfig::addAddressSpaceInferencePasses() {
-  // NVPTXLowerKernelArgs emits alloca for byval parameters which can often
+  // NVPTXLowerArgs emits alloca for byval parameters which can often
   // be eliminated by SROA.
   addPass(createSROAPass());
   addPass(createNVPTXLowerAllocaPass());
-  if (UseInferAddressSpaces) {
-    addPass(createNVPTXInferAddressSpacesPass());
-  } else {
-    addPass(createNVPTXFavorNonGenericAddrSpacesPass());
-    // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave
-    // them unused. We could remove dead code in an ad-hoc manner, but that
-    // requires manual work and might be error-prone.
-    addPass(createDeadCodeEliminationPass());
-  }
+  addPass(createNVPTXInferAddressSpacesPass());
 }
 
 void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
@@ -253,11 +245,13 @@ void NVPTXPassConfig::addIRPasses() {
   addPass(createNVPTXAssignValidGlobalNamesPass());
   addPass(createGenericToNVVMPass());
 
-  // NVPTXLowerKernelArgs is required for correctness and should be run right
+  // NVPTXLowerArgs is required for correctness and should be run right
   // before the address space inference passes.
-  addPass(createNVPTXLowerKernelArgsPass(&getNVPTXTargetMachine()));
+  addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
   if (getOptLevel() != CodeGenOpt::None) {
     addAddressSpaceInferencePasses();
+    if (!DisableLoadStoreVectorizer)
+      addPass(createLoadStoreVectorizerPass());
     addStraightLineScalarOptimizationPasses();
   }
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 045fbb75a2a0..dc367a90594a 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -91,14 +91,12 @@ public:
     return ReadOnlySection;
   }
 
-  MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+  MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override {
     return DataSection;
   }
 
-  MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                    Mangler &Mang,
+  MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                     const TargetMachine &TM) const override;
 };
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
index 580d345cc663..48928ee2d540 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
@@ -42,6 +42,29 @@ static bool isNVVMAtomic(const IntrinsicInst *II) {
     case Intrinsic::nvvm_atomic_load_add_f32:
     case Intrinsic::nvvm_atomic_load_inc_32:
     case Intrinsic::nvvm_atomic_load_dec_32:
+
+    case Intrinsic::nvvm_atomic_add_gen_f_cta:
+    case Intrinsic::nvvm_atomic_add_gen_f_sys:
+    case Intrinsic::nvvm_atomic_add_gen_i_cta:
+    case Intrinsic::nvvm_atomic_add_gen_i_sys:
+    case Intrinsic::nvvm_atomic_and_gen_i_cta:
+    case Intrinsic::nvvm_atomic_and_gen_i_sys:
+    case Intrinsic::nvvm_atomic_cas_gen_i_cta:
+    case Intrinsic::nvvm_atomic_cas_gen_i_sys:
+    case Intrinsic::nvvm_atomic_dec_gen_i_cta:
+    case Intrinsic::nvvm_atomic_dec_gen_i_sys:
+    case Intrinsic::nvvm_atomic_inc_gen_i_cta:
+    case Intrinsic::nvvm_atomic_inc_gen_i_sys:
+    case Intrinsic::nvvm_atomic_max_gen_i_cta:
+    case Intrinsic::nvvm_atomic_max_gen_i_sys:
+    case Intrinsic::nvvm_atomic_min_gen_i_cta:
+    case Intrinsic::nvvm_atomic_min_gen_i_sys:
+    case Intrinsic::nvvm_atomic_or_gen_i_cta:
+    case Intrinsic::nvvm_atomic_or_gen_i_sys:
+    case Intrinsic::nvvm_atomic_exch_gen_i_cta:
+    case Intrinsic::nvvm_atomic_exch_gen_i_sys:
+    case Intrinsic::nvvm_atomic_xor_gen_i_cta:
+    case Intrinsic::nvvm_atomic_xor_gen_i_sys:
       return true;
   }
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
index 08ffdf191151..d953aa8a7199 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
@@ -41,13 +41,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  NVPTXTTIImpl(const NVPTXTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  NVPTXTTIImpl(NVPTXTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   bool hasBranchDivergence() { return true; }
 
   bool isSourceOfDivergence(const Value *V);
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
index 835e4b442039..e464f474b1d5 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp
@@ -26,16 +26,18 @@
 #include <string>
 #include <vector>
 
-using namespace llvm;
+namespace llvm {
 
+namespace {
 typedef std::map<std::string, std::vector<unsigned> > key_val_pair_t;
 typedef std::map<const GlobalValue *, key_val_pair_t> global_val_annot_t;
 typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
+} // anonymous namespace
 
-ManagedStatic<per_module_annot_t> annotationCache;
+static ManagedStatic<per_module_annot_t> annotationCache;
 static sys::Mutex Lock;
 
-void llvm::clearAnnotationCache(const llvm::Module *Mod) {
+void clearAnnotationCache(const Module *Mod) {
   MutexGuard Guard(Lock);
   annotationCache->erase(Mod);
 }
@@ -68,7 +70,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
 
 static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
   MutexGuard Guard(Lock);
-  NamedMDNode *NMD = m->getNamedMetadata(llvm::NamedMDForAnnotations);
+  NamedMDNode *NMD = m->getNamedMetadata("nvvm.annotations");
   if (!NMD)
     return;
   key_val_pair_t tmp;
@@ -99,8 +101,8 @@ static void cacheAnnotationFromMD(const Module *m, const GlobalValue *gv) {
   }
 }
 
-bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
-                                 unsigned &retval) {
+bool findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
+                           unsigned &retval) {
   MutexGuard Guard(Lock);
   const Module *m = gv->getParent();
   if ((*annotationCache).find(m) == (*annotationCache).end())
@@ -113,8 +115,8 @@ bool llvm::findOneNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
   return true;
 }
 
-bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
-                                 std::vector<unsigned> &retval) {
+bool findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
+                           std::vector<unsigned> &retval) {
   MutexGuard Guard(Lock);
   const Module *m = gv->getParent();
   if ((*annotationCache).find(m) == (*annotationCache).end())
@@ -127,12 +129,10 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, const std::string &prop,
   return true;
 }
 
-bool llvm::isTexture(const llvm::Value &val) {
+bool isTexture(const Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(
-            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
-            annot)) {
+    if (findOneNVVMAnnotation(gv, "texture", annot)) {
       assert((annot == 1) && "Unexpected annotation on a texture symbol");
       return true;
     }
@@ -140,12 +140,10 @@ bool llvm::isTexture(const llvm::Value &val) {
   return false;
 }
 
-bool llvm::isSurface(const llvm::Value &val) {
+bool isSurface(const Value &val) {
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(
-            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
-            annot)) {
+    if (findOneNVVMAnnotation(gv, "surface", annot)) {
       assert((annot == 1) && "Unexpected annotation on a surface symbol");
       return true;
     }
@@ -153,12 +151,12 @@ bool llvm::isSurface(const llvm::Value &val) {
   return false;
 }
 
-bool llvm::isSampler(const llvm::Value &val) {
+bool isSampler(const Value &val) {
+  const char *AnnotationName = "sampler";
+
   if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if (llvm::findOneNVVMAnnotation(
-            gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-            annot)) {
+    if (findOneNVVMAnnotation(gv, AnnotationName, annot)) {
       assert((annot == 1) && "Unexpected annotation on a sampler symbol");
       return true;
     }
@@ -166,72 +164,58 @@ bool llvm::isSampler(const llvm::Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(
-            func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
-            annot)) {
-      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+    if (findAllNVVMAnnotation(func, AnnotationName, annot)) {
+      if (is_contained(annot, arg->getArgNo()))
         return true;
     }
   }
   return false;
 }
 
-bool llvm::isImageReadOnly(const llvm::Value &val) {
+bool isImageReadOnly(const Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(func,
-                                    llvm::PropertyAnnotationNames[
-                                        llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
-                                    annot)) {
-      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+    if (findAllNVVMAnnotation(func, "rdoimage", annot)) {
+      if (is_contained(annot, arg->getArgNo()))
         return true;
     }
   }
   return false;
 }
 
-bool llvm::isImageWriteOnly(const llvm::Value &val) {
+bool isImageWriteOnly(const Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(func,
-                                    llvm::PropertyAnnotationNames[
-                                        llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
-                                    annot)) {
-      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+    if (findAllNVVMAnnotation(func, "wroimage", annot)) {
+      if (is_contained(annot, arg->getArgNo()))
         return true;
     }
   }
   return false;
 }
 
-bool llvm::isImageReadWrite(const llvm::Value &val) {
+bool isImageReadWrite(const Value &val) {
   if (const Argument *arg = dyn_cast<Argument>(&val)) {
     const Function *func = arg->getParent();
     std::vector<unsigned> annot;
-    if (llvm::findAllNVVMAnnotation(func,
-                                    llvm::PropertyAnnotationNames[
-                                        llvm::PROPERTY_ISREADWRITE_IMAGE_PARAM],
-                                    annot)) {
-      if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
+    if (findAllNVVMAnnotation(func, "rdwrimage", annot)) {
+      if (is_contained(annot, arg->getArgNo()))
         return true;
     }
   }
   return false;
 }
 
-bool llvm::isImage(const llvm::Value &val) {
-  return llvm::isImageReadOnly(val) || llvm::isImageWriteOnly(val) ||
-         llvm::isImageReadWrite(val);
+bool isImage(const Value &val) {
+  return isImageReadOnly(val) || isImageWriteOnly(val) || isImageReadWrite(val);
 }
 
-bool llvm::isManaged(const llvm::Value &val) {
+bool isManaged(const Value &val) {
   if(const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
     unsigned annot;
-    if(llvm::findOneNVVMAnnotation(gv,
-                          llvm::PropertyAnnotationNames[llvm::PROPERTY_MANAGED],
-                                   annot)) {
+    if (findOneNVVMAnnotation(gv, "managed", annot)) {
       assert((annot == 1) && "Unexpected annotation on a managed symbol");
       return true;
     }
@@ -239,71 +223,66 @@ bool llvm::isManaged(const llvm::Value &val) {
   return false;
 }
 
-std::string llvm::getTextureName(const llvm::Value &val) {
+std::string getTextureName(const Value &val) {
   assert(val.hasName() && "Found texture variable with no name");
   return val.getName();
 }
 
-std::string llvm::getSurfaceName(const llvm::Value &val) {
+std::string getSurfaceName(const Value &val) {
   assert(val.hasName() && "Found surface variable with no name");
   return val.getName();
 }
 
-std::string llvm::getSamplerName(const llvm::Value &val) {
+std::string getSamplerName(const Value &val) {
   assert(val.hasName() && "Found sampler variable with no name");
   return val.getName();
 }
 
-bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
+bool getMaxNTIDx(const Function &F, unsigned &x) {
+  return findOneNVVMAnnotation(&F, "maxntidx", x);
+}
+
+bool getMaxNTIDy(const Function &F, unsigned &y) {
+  return findOneNVVMAnnotation(&F, "maxntidy", y);
 }
 
-bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
+bool getMaxNTIDz(const Function &F, unsigned &z) {
+  return findOneNVVMAnnotation(&F, "maxntidz", z);
 }
 
-bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
+bool getReqNTIDx(const Function &F, unsigned &x) {
+  return findOneNVVMAnnotation(&F, "reqntidx", x);
 }
 
-bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
+bool getReqNTIDy(const Function &F, unsigned &y) {
+  return findOneNVVMAnnotation(&F, "reqntidy", y);
 }
 
-bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
+bool getReqNTIDz(const Function &F, unsigned &z) {
+  return findOneNVVMAnnotation(&F, "reqntidz", z);
 }
 
-bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
+bool getMinCTASm(const Function &F, unsigned &x) {
+  return findOneNVVMAnnotation(&F, "minctasm", x);
 }
 
-bool llvm::getMinCTASm(const Function &F, unsigned &x) {
-  return (llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
+bool getMaxNReg(const Function &F, unsigned &x) {
+  return findOneNVVMAnnotation(&F, "maxnreg", x);
 }
 
-bool llvm::isKernelFunction(const Function &F) {
+bool isKernelFunction(const Function &F) {
   unsigned x = 0;
-  bool retval = llvm::findOneNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
+  bool retval = findOneNVVMAnnotation(&F, "kernel", x);
   if (!retval) {
     // There is no NVVM metadata, check the calling convention
-    return F.getCallingConv() == llvm::CallingConv::PTX_Kernel;
+    return F.getCallingConv() == CallingConv::PTX_Kernel;
   }
   return (x == 1);
 }
 
-bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
+bool getAlign(const Function &F, unsigned index, unsigned &align) {
   std::vector<unsigned> Vs;
-  bool retval = llvm::findAllNVVMAnnotation(
-      &F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
+  bool retval = findAllNVVMAnnotation(&F, "align", Vs);
   if (!retval)
     return false;
   for (int i = 0, e = Vs.size(); i < e; i++) {
@@ -316,7 +295,7 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
   return false;
 }
 
-bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
+bool getAlign(const CallInst &I, unsigned index, unsigned &align) {
   if (MDNode *alignNode = I.getMetadata("callalign")) {
     for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
       if (const ConstantInt *CI =
@@ -335,108 +314,4 @@ bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
   return false;
 }
 
-// The following are some useful utilities for debugging
-
-BasicBlock *llvm::getParentBlock(Value *v) {
-  if (BasicBlock *B = dyn_cast<BasicBlock>(v))
-    return B;
-
-  if (Instruction *I = dyn_cast<Instruction>(v))
-    return I->getParent();
-
-  return nullptr;
-}
-
-Function *llvm::getParentFunction(Value *v) {
-  if (Function *F = dyn_cast<Function>(v))
-    return F;
-
-  if (Instruction *I = dyn_cast<Instruction>(v))
-    return I->getParent()->getParent();
-
-  if (BasicBlock *B = dyn_cast<BasicBlock>(v))
-    return B->getParent();
-
-  return nullptr;
-}
-
-// Dump a block by name
-void llvm::dumpBlock(Value *v, char *blockName) {
-  Function *F = getParentFunction(v);
-  if (!F)
-    return;
-
-  for (Function::iterator it = F->begin(), ie = F->end(); it != ie; ++it) {
-    BasicBlock *B = &*it;
-    if (strcmp(B->getName().data(), blockName) == 0) {
-      B->dump();
-      return;
-    }
-  }
-}
-
-// Find an instruction by name
-Instruction *llvm::getInst(Value *base, char *instName) {
-  Function *F = getParentFunction(base);
-  if (!F)
-    return nullptr;
-
-  for (inst_iterator it = inst_begin(F), ie = inst_end(F); it != ie; ++it) {
-    Instruction *I = &*it;
-    if (strcmp(I->getName().data(), instName) == 0) {
-      return I;
-    }
-  }
-
-  return nullptr;
-}
-
-// Dump an instruction by name
-void llvm::dumpInst(Value *base, char *instName) {
-  Instruction *I = getInst(base, instName);
-  if (I)
-    I->dump();
-}
-
-// Dump an instruction and all dependent instructions
-void llvm::dumpInstRec(Value *v, std::set<Instruction *> *visited) {
-  if (Instruction *I = dyn_cast<Instruction>(v)) {
-
-    if (visited->find(I) != visited->end())
-      return;
-
-    visited->insert(I);
-
-    for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
-      dumpInstRec(I->getOperand(i), visited);
-
-    I->dump();
-  }
-}
-
-// Dump an instruction and all dependent instructions
-void llvm::dumpInstRec(Value *v) {
-  std::set<Instruction *> visited;
-
-  //BasicBlock *B = getParentBlock(v);
-
-  dumpInstRec(v, &visited);
-}
-
-// Dump the parent for Instruction, block or function
-void llvm::dumpParent(Value *v) {
-  if (Instruction *I = dyn_cast<Instruction>(v)) {
-    I->getParent()->dump();
-    return;
-  }
-
-  if (BasicBlock *B = dyn_cast<BasicBlock>(v)) {
-    B->getParent()->dump();
-    return;
-  }
-
-  if (Function *F = dyn_cast<Function>(v)) {
-    F->getParent()->dump();
-    return;
-  }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
index ec5bfc17afc7..a0cc4e78ac21 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
+++ b/contrib/llvm/lib/Target/NVPTX/NVPTXUtilities.h
@@ -25,51 +25,40 @@
 
 namespace llvm {
 
-#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
-#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
+void clearAnnotationCache(const Module *);
 
-void clearAnnotationCache(const llvm::Module *);
-
-bool findOneNVVMAnnotation(const llvm::GlobalValue *, const std::string &,
+bool findOneNVVMAnnotation(const GlobalValue *, const std::string &,
                            unsigned &);
-bool findAllNVVMAnnotation(const llvm::GlobalValue *, const std::string &,
+bool findAllNVVMAnnotation(const GlobalValue *, const std::string &,
                            std::vector<unsigned> &);
 
-bool isTexture(const llvm::Value &);
-bool isSurface(const llvm::Value &);
-bool isSampler(const llvm::Value &);
-bool isImage(const llvm::Value &);
-bool isImageReadOnly(const llvm::Value &);
-bool isImageWriteOnly(const llvm::Value &);
-bool isImageReadWrite(const llvm::Value &);
-bool isManaged(const llvm::Value &);
-
-std::string getTextureName(const llvm::Value &);
-std::string getSurfaceName(const llvm::Value &);
-std::string getSamplerName(const llvm::Value &);
+bool isTexture(const Value &);
+bool isSurface(const Value &);
+bool isSampler(const Value &);
+bool isImage(const Value &);
+bool isImageReadOnly(const Value &);
+bool isImageWriteOnly(const Value &);
+bool isImageReadWrite(const Value &);
+bool isManaged(const Value &);
 
-bool getMaxNTIDx(const llvm::Function &, unsigned &);
-bool getMaxNTIDy(const llvm::Function &, unsigned &);
-bool getMaxNTIDz(const llvm::Function &, unsigned &);
+std::string getTextureName(const Value &);
+std::string getSurfaceName(const Value &);
+std::string getSamplerName(const Value &);
 
-bool getReqNTIDx(const llvm::Function &, unsigned &);
-bool getReqNTIDy(const llvm::Function &, unsigned &);
-bool getReqNTIDz(const llvm::Function &, unsigned &);
+bool getMaxNTIDx(const Function &, unsigned &);
+bool getMaxNTIDy(const Function &, unsigned &);
+bool getMaxNTIDz(const Function &, unsigned &);
 
-bool getMinCTASm(const llvm::Function &, unsigned &);
-bool isKernelFunction(const llvm::Function &);
+bool getReqNTIDx(const Function &, unsigned &);
+bool getReqNTIDy(const Function &, unsigned &);
+bool getReqNTIDz(const Function &, unsigned &);
 
-bool getAlign(const llvm::Function &, unsigned index, unsigned &);
-bool getAlign(const llvm::CallInst &, unsigned index, unsigned &);
+bool getMinCTASm(const Function &, unsigned &);
+bool getMaxNReg(const Function &, unsigned &);
+bool isKernelFunction(const Function &);
 
-BasicBlock *getParentBlock(Value *v);
-Function *getParentFunction(Value *v);
-void dumpBlock(Value *v, char *blockName);
-Instruction *getInst(Value *base, char *instName);
-void dumpInst(Value *base, char *instName);
-void dumpInstRec(Value *v, std::set<Instruction *> *visited);
-void dumpInstRec(Value *v);
-void dumpParent(Value *v);
+bool getAlign(const Function &, unsigned index, unsigned &);
+bool getAlign(const CallInst &, unsigned index, unsigned &);
 
 }
 
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
index b9c02c431141..9c71a2ee165b 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMIntrRange.cpp
@@ -65,6 +65,10 @@ INITIALIZE_PASS(NVVMIntrRange, "nvvm-intr-range",
 // Adds the passed-in [Low,High) range information as metadata to the
 // passed-in call instruction.
 static bool addRangeMetadata(uint64_t Low, uint64_t High, CallInst *C) {
+  // This call already has range metadata, nothing to do.
+  if (C->getMetadata(LLVMContext::MD_range))
+    return false;
+
   LLVMContext &Context = C->getParent()->getContext();
   IntegerType *Int32Ty = Type::getInt32Ty(Context);
   Metadata *LowAndHigh[] = {
diff --git a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
index e0c35e7039e5..c639c4dc0683 100644
--- a/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp
@@ -65,7 +65,6 @@ public:
   bool runOnFunction(Function &) override;
 
 private:
-  bool handleFunction(Function *ReflectFunction);
   void setVarMap();
 };
 }
diff --git a/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp b/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
index cc7d4dc5ece7..d44876abf729 100644
--- a/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp
@@ -12,12 +12,18 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheNVPTXTarget32;
-Target llvm::TheNVPTXTarget64;
+Target &llvm::getTheNVPTXTarget32() {
+  static Target TheNVPTXTarget32;
+  return TheNVPTXTarget32;
+}
+Target &llvm::getTheNVPTXTarget64() {
+  static Target TheNVPTXTarget64;
+  return TheNVPTXTarget64;
+}
 
 extern "C" void LLVMInitializeNVPTXTargetInfo() {
-  RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
+  RegisterTarget<Triple::nvptx> X(getTheNVPTXTarget32(), "nvptx",
                                   "NVIDIA PTX 32-bit");
-  RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
+  RegisterTarget<Triple::nvptx64> Y(getTheNVPTXTarget64(), "nvptx64",
                                     "NVIDIA PTX 64-bit");
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
index 4181775fc6da..52432a5820fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp
@@ -83,6 +83,16 @@ static const MCPhysReg FRegs[32] = {
   PPC::F24, PPC::F25, PPC::F26, PPC::F27,
   PPC::F28, PPC::F29, PPC::F30, PPC::F31
 };
+static const MCPhysReg VFRegs[32] = {
+  PPC::VF0,  PPC::VF1,  PPC::VF2,  PPC::VF3,
+  PPC::VF4,  PPC::VF5,  PPC::VF6,  PPC::VF7,
+  PPC::VF8,  PPC::VF9,  PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
 static const MCPhysReg VRegs[32] = {
   PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
   PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
@@ -103,14 +113,14 @@ static const MCPhysReg VSRegs[64] = {
   PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
   PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
 
-  PPC::VSH0,  PPC::VSH1,  PPC::VSH2,  PPC::VSH3,
-  PPC::VSH4,  PPC::VSH5,  PPC::VSH6,  PPC::VSH7,
-  PPC::VSH8,  PPC::VSH9,  PPC::VSH10, PPC::VSH11,
-  PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
-  PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
-  PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
-  PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
-  PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+  PPC::V0,  PPC::V1,  PPC::V2,  PPC::V3,
+  PPC::V4,  PPC::V5,  PPC::V6,  PPC::V7,
+  PPC::V8,  PPC::V9,  PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
 };
 static const MCPhysReg VSFRegs[64] = {
   PPC::F0,  PPC::F1,  PPC::F2,  PPC::F3,
@@ -246,13 +256,11 @@ class PPCAsmParser : public MCTargetAsmParser {
   bool IsDarwin;
 
   void Warning(SMLoc L, const Twine &Msg) { getParser().Warning(L, Msg); }
-  bool Error(SMLoc L, const Twine &Msg) { return getParser().Error(L, Msg); }
 
   bool isPPC64() const { return IsPPC64; }
   bool isDarwin() const { return IsDarwin; }
 
-  bool MatchRegisterName(const AsmToken &Tok,
-                         unsigned &RegNo, int64_t &IntVal);
+  bool MatchRegisterName(unsigned &RegNo, int64_t &IntVal);
 
   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
 
@@ -264,8 +272,8 @@ class PPCAsmParser : public MCTargetAsmParser {
 
   bool ParseOperand(OperandVector &Operands);
 
-  bool ParseDirectiveWord(unsigned Size, SMLoc L);
-  bool ParseDirectiveTC(unsigned Size, SMLoc L);
+  bool ParseDirectiveWord(unsigned Size, AsmToken ID);
+  bool ParseDirectiveTC(unsigned Size, AsmToken ID);
   bool ParseDirectiveMachine(SMLoc L);
   bool ParseDarwinDirectiveMachine(SMLoc L);
   bool ParseDirectiveAbiVersion(SMLoc L);
@@ -545,6 +553,7 @@ public:
                                        && isUInt<5>(getImm())); }
   bool isCRBitMask() const { return Kind == Immediate && isUInt<8>(getImm()) &&
                                     isPowerOf2_32(getImm()); }
+  bool isATBitsAsHint() const { return false; }
   bool isMem() const override { return false; }
   bool isReg() const override { return false; }
 
@@ -596,6 +605,11 @@ public:
     Inst.addOperand(MCOperand::createReg(FRegs[getReg()]));
   }
 
+  void addRegVFRCOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    Inst.addOperand(MCOperand::createReg(VFRegs[getReg()]));
+  }
+
   void addRegVRRCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     Inst.addOperand(MCOperand::createReg(VRegs[getReg()]));
@@ -874,6 +888,23 @@ void PPCAsmParser::ProcessInstruction(MCInst &Inst,
     Inst = TmpInst;
     break;
   }
+  case PPC::DCBFx:
+  case PPC::DCBFL:
+  case PPC::DCBFLP: {
+    int L = 0;
+    if (Opcode == PPC::DCBFL)
+      L = 1;
+    else if (Opcode == PPC::DCBFLP)
+      L = 3;
+
+    MCInst TmpInst;
+    TmpInst.setOpcode(PPC::DCBF);
+    TmpInst.addOperand(MCOperand::createImm(L));
+    TmpInst.addOperand(Inst.getOperand(0));
+    TmpInst.addOperand(Inst.getOperand(1));
+    Inst = TmpInst;
+    break;
+  }
   case PPC::LAx: {
     MCInst TmpInst;
     TmpInst.setOpcode(PPC::LA);
@@ -1263,68 +1294,54 @@ bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   llvm_unreachable("Implement any new match types added!");
 }
 
-bool PPCAsmParser::
-MatchRegisterName(const AsmToken &Tok, unsigned &RegNo, int64_t &IntVal) {
-  if (Tok.is(AsmToken::Identifier)) {
-    StringRef Name = Tok.getString();
-
+bool PPCAsmParser::MatchRegisterName(unsigned &RegNo, int64_t &IntVal) {
+  if (getParser().getTok().is(AsmToken::Identifier)) {
+    StringRef Name = getParser().getTok().getString();
     if (Name.equals_lower("lr")) {
       RegNo = isPPC64()? PPC::LR8 : PPC::LR;
       IntVal = 8;
-      return false;
     } else if (Name.equals_lower("ctr")) {
       RegNo = isPPC64()? PPC::CTR8 : PPC::CTR;
       IntVal = 9;
-      return false;
     } else if (Name.equals_lower("vrsave")) {
       RegNo = PPC::VRSAVE;
       IntVal = 256;
-      return false;
     } else if (Name.startswith_lower("r") &&
                !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
       RegNo = isPPC64()? XRegs[IntVal] : RRegs[IntVal];
-      return false;
     } else if (Name.startswith_lower("f") &&
                !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
       RegNo = FRegs[IntVal];
-      return false;
     } else if (Name.startswith_lower("vs") &&
                !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 64) {
       RegNo = VSRegs[IntVal];
-      return false;
     } else if (Name.startswith_lower("v") &&
                !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
       RegNo = VRegs[IntVal];
-      return false;
     } else if (Name.startswith_lower("q") &&
                !Name.substr(1).getAsInteger(10, IntVal) && IntVal < 32) {
       RegNo = QFRegs[IntVal];
-      return false;
     } else if (Name.startswith_lower("cr") &&
                !Name.substr(2).getAsInteger(10, IntVal) && IntVal < 8) {
       RegNo = CRRegs[IntVal];
-      return false;
-    }
+    } else
+      return true;
+    getParser().Lex();
+    return false;
   }
-
   return true;
 }
 
 bool PPCAsmParser::
 ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) {
-  MCAsmParser &Parser = getParser();
-  const AsmToken &Tok = Parser.getTok();
+  const AsmToken &Tok = getParser().getTok();
   StartLoc = Tok.getLoc();
   EndLoc = Tok.getEndLoc();
   RegNo = 0;
   int64_t IntVal;
-
-  if (!MatchRegisterName(Tok, RegNo, IntVal)) {
-    Parser.Lex(); // Eat identifier token.
-    return false;
-  }
-
-  return Error(StartLoc, "invalid register name");
+  if (MatchRegisterName(RegNo, IntVal))
+    return TokError("invalid register name");
+  return false;
 }
 
 /// Extract \code @l/@ha \endcode modifier from expression.  Recursively scan
@@ -1550,14 +1567,21 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
     Parser.Lex(); // Eat the '%'.
     unsigned RegNo;
     int64_t IntVal;
-    if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
-      Parser.Lex(); // Eat the identifier token.
-      Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
-      return false;
-    }
-    return Error(S, "invalid register name");
+    if (MatchRegisterName(RegNo, IntVal))
+      return Error(S, "invalid register name");
+
+    Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
+    return false;
 
   case AsmToken::Identifier:
+  case AsmToken::LParen:
+  case AsmToken::Plus:
+  case AsmToken::Minus:
+  case AsmToken::Integer:
+  case AsmToken::Dot:
+  case AsmToken::Dollar:
+  case AsmToken::Exclaim:
+  case AsmToken::Tilde:
     // Note that non-register-name identifiers from the compiler will begin
     // with '_', 'L'/'l' or '"'.  Of course, handwritten asm could include
     // identifiers like r31foo - so we fall through in the event that parsing
@@ -1565,25 +1589,17 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
     if (isDarwin()) {
       unsigned RegNo;
       int64_t IntVal;
-      if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
-        Parser.Lex(); // Eat the identifier token.
+      if (!MatchRegisterName(RegNo, IntVal)) {
         Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
         return false;
       }
     }
-  // Fall-through to process non-register-name identifiers as expression.
-  // All other expressions
-  case AsmToken::LParen:
-  case AsmToken::Plus:
-  case AsmToken::Minus:
-  case AsmToken::Integer:
-  case AsmToken::Dot:
-  case AsmToken::Dollar:
-  case AsmToken::Exclaim:
-  case AsmToken::Tilde:
+    // All other expressions
+
     if (!ParseExpression(EVal))
       break;
-    /* fall through */
+    // Fall-through
+    LLVM_FALLTHROUGH;
   default:
     return Error(S, "unknown operand");
   }
@@ -1621,40 +1637,33 @@ bool PPCAsmParser::ParseOperand(OperandVector &Operands) {
     case AsmToken::Percent:
       Parser.Lex(); // Eat the '%'.
       unsigned RegNo;
-      if (MatchRegisterName(Parser.getTok(), RegNo, IntVal))
+      if (MatchRegisterName(RegNo, IntVal))
         return Error(S, "invalid register name");
-      Parser.Lex(); // Eat the identifier token.
       break;
 
     case AsmToken::Integer:
-      if (!isDarwin()) {
-        if (getParser().parseAbsoluteExpression(IntVal) ||
-          IntVal < 0 || IntVal > 31)
-        return Error(S, "invalid register number");
-      } else {
+      if (isDarwin())
         return Error(S, "unexpected integer value");
-      }
+      else if (getParser().parseAbsoluteExpression(IntVal) || IntVal < 0 ||
+               IntVal > 31)
+        return Error(S, "invalid register number");
       break;
-
    case AsmToken::Identifier:
     if (isDarwin()) {
       unsigned RegNo;
-      if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) {
-        Parser.Lex(); // Eat the identifier token.
+      if (!MatchRegisterName(RegNo, IntVal)) {
         break;
       }
     }
-    // Fall-through..
+    LLVM_FALLTHROUGH;
 
     default:
       return Error(S, "invalid memory operand");
     }
 
-    if (getLexer().isNot(AsmToken::RParen))
-      return Error(Parser.getTok().getLoc(), "missing ')'");
     E = Parser.getTok().getLoc();
-    Parser.Lex(); // Eat the ')'.
-
+    if (parseToken(AsmToken::RParen, "missing ')'"))
+      return true;
     Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64()));
   }
 
@@ -1668,14 +1677,12 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   // If the next character is a '+' or '-', we need to add it to the
   // instruction name, to match what TableGen is doing.
   std::string NewOpcode;
-  if (getLexer().is(AsmToken::Plus)) {
-    getLexer().Lex();
+  if (parseOptionalToken(AsmToken::Plus)) {
     NewOpcode = Name;
     NewOpcode += '+';
     Name = NewOpcode;
   }
-  if (getLexer().is(AsmToken::Minus)) {
-    getLexer().Lex();
+  if (parseOptionalToken(AsmToken::Minus)) {
     NewOpcode = Name;
     NewOpcode += '-';
     Name = NewOpcode;
@@ -1700,20 +1707,15 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   }
 
   // If there are no more operands then finish
-  if (getLexer().is(AsmToken::EndOfStatement))
+  if (parseOptionalToken(AsmToken::EndOfStatement))
     return false;
 
   // Parse the first operand
   if (ParseOperand(Operands))
     return true;
 
-  while (getLexer().isNot(AsmToken::EndOfStatement) &&
-         getLexer().is(AsmToken::Comma)) {
-    // Consume the comma token
-    Lex();
-
-    // Parse the next operand
-    if (ParseOperand(Operands))
+  while (!parseOptionalToken(AsmToken::EndOfStatement)) {
+    if (parseToken(AsmToken::Comma) || ParseOperand(Operands))
       return true;
   }
 
@@ -1738,108 +1740,94 @@ bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
 /// ParseDirective parses the PPC specific directives
 bool PPCAsmParser::ParseDirective(AsmToken DirectiveID) {
   StringRef IDVal = DirectiveID.getIdentifier();
-  if (!isDarwin()) {
-    if (IDVal == ".word")
-      return ParseDirectiveWord(2, DirectiveID.getLoc());
-    if (IDVal == ".llong")
-      return ParseDirectiveWord(8, DirectiveID.getLoc());
-    if (IDVal == ".tc")
-      return ParseDirectiveTC(isPPC64()? 8 : 4, DirectiveID.getLoc());
+  if (isDarwin()) {
     if (IDVal == ".machine")
-      return ParseDirectiveMachine(DirectiveID.getLoc());
-    if (IDVal == ".abiversion")
-      return ParseDirectiveAbiVersion(DirectiveID.getLoc());
-    if (IDVal == ".localentry")
-      return ParseDirectiveLocalEntry(DirectiveID.getLoc());
-  } else {
-    if (IDVal == ".machine")
-      return ParseDarwinDirectiveMachine(DirectiveID.getLoc());
-  }
-  return true;
+      ParseDarwinDirectiveMachine(DirectiveID.getLoc());
+    else
+      return true;
+  } else if (IDVal == ".word")
+    ParseDirectiveWord(2, DirectiveID);
+  else if (IDVal == ".llong")
+    ParseDirectiveWord(8, DirectiveID);
+  else if (IDVal == ".tc")
+    ParseDirectiveTC(isPPC64() ? 8 : 4, DirectiveID);
+  else if (IDVal == ".machine")
+    ParseDirectiveMachine(DirectiveID.getLoc());
+  else if (IDVal == ".abiversion")
+    ParseDirectiveAbiVersion(DirectiveID.getLoc());
+  else if (IDVal == ".localentry")
+    ParseDirectiveLocalEntry(DirectiveID.getLoc());
+  else
+    return true;
+  return false;
 }
 
 /// ParseDirectiveWord
 ///  ::= .word [ expression (, expression)* ]
-bool PPCAsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
-  MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    for (;;) {
-      const MCExpr *Value;
-      SMLoc ExprLoc = getLexer().getLoc();
-      if (getParser().parseExpression(Value))
-        return false;
-
-      if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
-        assert(Size <= 8 && "Invalid size");
-        uint64_t IntValue = MCE->getValue();
-        if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
-          return Error(ExprLoc, "literal value out of range for directive");
-        getStreamer().EmitIntValue(IntValue, Size);
-      } else {
-        getStreamer().EmitValue(Value, Size, ExprLoc);
-      }
-
-      if (getLexer().is(AsmToken::EndOfStatement))
-        break;
-
-      if (getLexer().isNot(AsmToken::Comma))
-        return Error(L, "unexpected token in directive");
-      Parser.Lex();
-    }
-  }
+bool PPCAsmParser::ParseDirectiveWord(unsigned Size, AsmToken ID) {
+  auto parseOp = [&]() -> bool {
+    const MCExpr *Value;
+    SMLoc ExprLoc = getParser().getTok().getLoc();
+    if (getParser().parseExpression(Value))
+      return true;
+    if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
+      assert(Size <= 8 && "Invalid size");
+      uint64_t IntValue = MCE->getValue();
+      if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
+        return Error(ExprLoc, "literal value out of range for '" +
+                                  ID.getIdentifier() + "' directive");
+      getStreamer().EmitIntValue(IntValue, Size);
+    } else
+      getStreamer().EmitValue(Value, Size, ExprLoc);
+    return false;
+  };
 
-  Parser.Lex();
+  if (parseMany(parseOp))
+    return addErrorSuffix(" in '" + ID.getIdentifier() + "' directive");
   return false;
 }
 
 /// ParseDirectiveTC
 ///  ::= .tc [ symbol (, expression)* ]
-bool PPCAsmParser::ParseDirectiveTC(unsigned Size, SMLoc L) {
+bool PPCAsmParser::ParseDirectiveTC(unsigned Size, AsmToken ID) {
   MCAsmParser &Parser = getParser();
   // Skip TC symbol, which is only used with XCOFF.
   while (getLexer().isNot(AsmToken::EndOfStatement)
          && getLexer().isNot(AsmToken::Comma))
     Parser.Lex();
-  if (getLexer().isNot(AsmToken::Comma)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
-  Parser.Lex();
+  if (parseToken(AsmToken::Comma))
+    return addErrorSuffix(" in '.tc' directive");
 
   // Align to word size.
   getParser().getStreamer().EmitValueToAlignment(Size);
 
   // Emit expressions.
-  return ParseDirectiveWord(Size, L);
+  return ParseDirectiveWord(Size, ID);
 }
 
 /// ParseDirectiveMachine (ELF platforms)
 ///  ::= .machine [ cpu | "push" | "pop" ]
 bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::Identifier) &&
-      getLexer().isNot(AsmToken::String)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
+  if (Parser.getTok().isNot(AsmToken::Identifier) &&
+      Parser.getTok().isNot(AsmToken::String))
+    return Error(L, "unexpected token in '.machine' directive");
 
   StringRef CPU = Parser.getTok().getIdentifier();
-  Parser.Lex();
 
   // FIXME: Right now, the parser always allows any available
   // instruction, so the .machine directive is not useful.
   // Implement ".machine any" (by doing nothing) for the benefit
   // of existing assembler code.  Likewise, we can then implement
   // ".machine push" and ".machine pop" as no-op.
-  if (CPU != "any" && CPU != "push" && CPU != "pop") {
-    Error(L, "unrecognized machine type");
-    return false;
-  }
+  if (CPU != "any" && CPU != "push" && CPU != "pop")
+    return TokError("unrecognized machine type");
+
+  Parser.Lex();
+
+  if (parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix(" in '.machine' directive");
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
   PPCTargetStreamer &TStreamer =
       *static_cast<PPCTargetStreamer *>(
            getParser().getStreamer().getTargetStreamer());
@@ -1852,11 +1840,9 @@ bool PPCAsmParser::ParseDirectiveMachine(SMLoc L) {
 ///  ::= .machine cpu-identifier
 bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
   MCAsmParser &Parser = getParser();
-  if (getLexer().isNot(AsmToken::Identifier) &&
-      getLexer().isNot(AsmToken::String)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
+  if (Parser.getTok().isNot(AsmToken::Identifier) &&
+      Parser.getTok().isNot(AsmToken::String))
+    return Error(L, "unexpected token in directive");
 
   StringRef CPU = Parser.getTok().getIdentifier();
   Parser.Lex();
@@ -1864,25 +1850,14 @@ bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
   // FIXME: this is only the 'default' set of cpu variants.
   // However we don't act on this information at present, this is simply
   // allowing parsing to proceed with minimal sanity checking.
-  if (CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64") {
-    Error(L, "unrecognized cpu type");
-    return false;
-  }
-
-  if (isPPC64() && (CPU == "ppc7400" || CPU == "ppc")) {
-    Error(L, "wrong cpu type specified for 64bit");
-    return false;
-  }
-  if (!isPPC64() && CPU == "ppc64") {
-    Error(L, "wrong cpu type specified for 32bit");
-    return false;
-  }
-
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
-
+  if (check(CPU != "ppc7400" && CPU != "ppc" && CPU != "ppc64", L,
+            "unrecognized cpu type") ||
+      check(isPPC64() && (CPU == "ppc7400" || CPU == "ppc"), L,
+            "wrong cpu type specified for 64bit") ||
+      check(!isPPC64() && CPU == "ppc64", L,
+            "wrong cpu type specified for 32bit") ||
+      parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix(" in '.machine' directive");
   return false;
 }
 
@@ -1890,14 +1865,10 @@ bool PPCAsmParser::ParseDarwinDirectiveMachine(SMLoc L) {
 ///  ::= .abiversion constant-expression
 bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
   int64_t AbiVersion;
-  if (getParser().parseAbsoluteExpression(AbiVersion)){
-    Error(L, "expected constant expression");
-    return false;
-  }
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
+  if (check(getParser().parseAbsoluteExpression(AbiVersion), L,
+            "expected constant expression") ||
+      parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix(" in '.abiversion' directive");
 
   PPCTargetStreamer &TStreamer =
       *static_cast<PPCTargetStreamer *>(
@@ -1911,28 +1882,16 @@ bool PPCAsmParser::ParseDirectiveAbiVersion(SMLoc L) {
 ///  ::= .localentry symbol, expression
 bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
   StringRef Name;
-  if (getParser().parseIdentifier(Name)) {
-    Error(L, "expected identifier in directive");
-    return false;
-  }
-  MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
-
-  if (getLexer().isNot(AsmToken::Comma)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
-  Lex();
+  if (getParser().parseIdentifier(Name))
+    return Error(L, "expected identifier in '.localentry' directive");
 
+  MCSymbolELF *Sym = cast<MCSymbolELF>(getContext().getOrCreateSymbol(Name));
   const MCExpr *Expr;
-  if (getParser().parseExpression(Expr)) {
-    Error(L, "expected expression");
-    return false;
-  }
 
-  if (getLexer().isNot(AsmToken::EndOfStatement)) {
-    Error(L, "unexpected token in directive");
-    return false;
-  }
+  if (parseToken(AsmToken::Comma) ||
+      check(getParser().parseExpression(Expr), L, "expected expression") ||
+      parseToken(AsmToken::EndOfStatement))
+    return addErrorSuffix(" in '.localentry' directive");
 
   PPCTargetStreamer &TStreamer =
       *static_cast<PPCTargetStreamer *>(
@@ -1946,9 +1905,9 @@ bool PPCAsmParser::ParseDirectiveLocalEntry(SMLoc L) {
 
 /// Force static initialization.
 extern "C" void LLVMInitializePowerPCAsmParser() {
-  RegisterMCAsmParser<PPCAsmParser> A(ThePPC32Target);
-  RegisterMCAsmParser<PPCAsmParser> B(ThePPC64Target);
-  RegisterMCAsmParser<PPCAsmParser> C(ThePPC64LETarget);
+  RegisterMCAsmParser<PPCAsmParser> A(getThePPC32Target());
+  RegisterMCAsmParser<PPCAsmParser> B(getThePPC64Target());
+  RegisterMCAsmParser<PPCAsmParser> C(getThePPC64LETarget());
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
index 6ea4fb1bfbc3..12ffbfdeacc1 100644
--- a/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/Disassembler/PPCDisassembler.cpp
@@ -51,11 +51,11 @@ static MCDisassembler *createPPCLEDisassembler(const Target &T,
 
 extern "C" void LLVMInitializePowerPCDisassembler() {
   // Register the disassembler for each target.
-  TargetRegistry::RegisterMCDisassembler(ThePPC32Target,
+  TargetRegistry::RegisterMCDisassembler(getThePPC32Target(),
                                          createPPCDisassembler);
-  TargetRegistry::RegisterMCDisassembler(ThePPC64Target,
+  TargetRegistry::RegisterMCDisassembler(getThePPC64Target(),
                                          createPPCDisassembler);
-  TargetRegistry::RegisterMCDisassembler(ThePPC64LETarget,
+  TargetRegistry::RegisterMCDisassembler(getThePPC64LETarget(),
                                          createPPCLEDisassembler);
 }
 
@@ -89,6 +89,17 @@ static const unsigned FRegs[] = {
   PPC::F28, PPC::F29, PPC::F30, PPC::F31
 };
 
+static const unsigned VFRegs[] = {
+  PPC::VF0, PPC::VF1, PPC::VF2, PPC::VF3,
+  PPC::VF4, PPC::VF5, PPC::VF6, PPC::VF7,
+  PPC::VF8, PPC::VF9, PPC::VF10, PPC::VF11,
+  PPC::VF12, PPC::VF13, PPC::VF14, PPC::VF15,
+  PPC::VF16, PPC::VF17, PPC::VF18, PPC::VF19,
+  PPC::VF20, PPC::VF21, PPC::VF22, PPC::VF23,
+  PPC::VF24, PPC::VF25, PPC::VF26, PPC::VF27,
+  PPC::VF28, PPC::VF29, PPC::VF30, PPC::VF31
+};
+
 static const unsigned VRegs[] = {
   PPC::V0, PPC::V1, PPC::V2, PPC::V3,
   PPC::V4, PPC::V5, PPC::V6, PPC::V7,
@@ -110,14 +121,14 @@ static const unsigned VSRegs[] = {
   PPC::VSL24, PPC::VSL25, PPC::VSL26, PPC::VSL27,
   PPC::VSL28, PPC::VSL29, PPC::VSL30, PPC::VSL31,
 
-  PPC::VSH0, PPC::VSH1, PPC::VSH2, PPC::VSH3,
-  PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7,
-  PPC::VSH8, PPC::VSH9, PPC::VSH10, PPC::VSH11,
-  PPC::VSH12, PPC::VSH13, PPC::VSH14, PPC::VSH15,
-  PPC::VSH16, PPC::VSH17, PPC::VSH18, PPC::VSH19,
-  PPC::VSH20, PPC::VSH21, PPC::VSH22, PPC::VSH23,
-  PPC::VSH24, PPC::VSH25, PPC::VSH26, PPC::VSH27,
-  PPC::VSH28, PPC::VSH29, PPC::VSH30, PPC::VSH31
+  PPC::V0, PPC::V1, PPC::V2, PPC::V3,
+  PPC::V4, PPC::V5, PPC::V6, PPC::V7,
+  PPC::V8, PPC::V9, PPC::V10, PPC::V11,
+  PPC::V12, PPC::V13, PPC::V14, PPC::V15,
+  PPC::V16, PPC::V17, PPC::V18, PPC::V19,
+  PPC::V20, PPC::V21, PPC::V22, PPC::V23,
+  PPC::V24, PPC::V25, PPC::V26, PPC::V27,
+  PPC::V28, PPC::V29, PPC::V30, PPC::V31
 };
 
 static const unsigned VSFRegs[] = {
@@ -242,6 +253,12 @@ static DecodeStatus DecodeF8RCRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, FRegs);
 }
 
+static DecodeStatus DecodeVFRCRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                            uint64_t Address,
+                                            const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, VFRegs);
+}
+
 static DecodeStatus DecodeVRRCRegisterClass(MCInst &Inst, uint64_t RegNo,
                                             uint64_t Address,
                                             const void *Decoder) {
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
index d9d9b4f180f7..609d959c6d08 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPCInstPrinter.h"
+#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCPredicates.h"
 #include "llvm/MC/MCExpr.h"
@@ -33,6 +34,11 @@ static cl::opt<bool>
 FullRegNames("ppc-asm-full-reg-names", cl::Hidden, cl::init(false),
              cl::desc("Use full register names when printing assembly"));
 
+// Useful for testing purposes. Prints vs{31-63} as v{0-31} respectively.
+static cl::opt<bool>
+ShowVSRNumsAsVR("ppc-vsr-nums-as-vr", cl::Hidden, cl::init(false),
+             cl::desc("Prints full register names with vs{31-63} as v{0-31}"));
+
 #define PRINT_ALIAS_INSTR
 #include "PPCGenAsmWriter.inc"
 
@@ -135,6 +141,25 @@ void PPCInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
     printAnnotation(O, Annot);
     return;
   }
+
+  if (MI->getOpcode() == PPC::DCBF) {
+    unsigned char L = MI->getOperand(0).getImm();
+    if (!L || L == 1 || L == 3) {
+      O << "\tdcbf";
+      if (L == 1 || L == 3)
+        O << "l";
+      if (L == 3)
+        O << "p";
+      O << " ";
+
+      printOperand(MI, 1, O);
+      O << ", ";
+      printOperand(MI, 2, O);
+
+      printAnnotation(O, Annot);
+      return;
+    }
+  }
   
   if (!printAliasInstr(MI, O))
     printInstruction(MI, O);
@@ -239,6 +264,15 @@ void PPCInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNo,
   printOperand(MI, OpNo+1, O);
 }
 
+void PPCInstPrinter::printATBitsAsHint(const MCInst *MI, unsigned OpNo,
+                                       raw_ostream &O) {
+  unsigned Code = MI->getOperand(OpNo).getImm();
+  if (Code == 2)
+    O << "-";
+  else if (Code == 3)
+    O << "+";
+}
+
 void PPCInstPrinter::printU1ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
   unsigned int Value = MI->getOperand(OpNo).getImm();
@@ -295,10 +329,12 @@ void PPCInstPrinter::printU7ImmOperand(const MCInst *MI, unsigned OpNo,
   O << (unsigned int)Value;
 }
 
+// Operands of BUILD_VECTOR are signed and we use this to print operands
+// of XXSPLTIB which are unsigned. So we simply truncate to 8 bits and
+// print as unsigned.
 void PPCInstPrinter::printU8ImmOperand(const MCInst *MI, unsigned OpNo,
                                        raw_ostream &O) {
-  unsigned int Value = MI->getOperand(OpNo).getImm();
-  assert(Value <= 255 && "Invalid u8imm argument!");
+  unsigned char Value = MI->getOperand(OpNo).getImm();
   O << (unsigned int)Value;
 }
 
@@ -412,7 +448,7 @@ void PPCInstPrinter::printTLSCall(const MCInst *MI, unsigned OpNo,
 /// stripRegisterPrefix - This method strips the character prefix from a
 /// register name so that only the number is left.  Used by for linux asm.
 static const char *stripRegisterPrefix(const char *RegName) {
-  if (FullRegNames)
+  if (FullRegNames || ShowVSRNumsAsVR)
     return RegName;
 
   switch (RegName[0]) {
@@ -433,7 +469,24 @@ void PPCInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
                                   raw_ostream &O) {
   const MCOperand &Op = MI->getOperand(OpNo);
   if (Op.isReg()) {
-    const char *RegName = getRegisterName(Op.getReg());
+    unsigned Reg = Op.getReg();
+
+    // There are VSX instructions that use VSX register numbering (vs0 - vs63)
+    // as well as those that use VMX register numbering (v0 - v31 which
+    // correspond to vs32 - vs63). If we have an instruction that uses VSX
+    // numbering, we need to convert the VMX registers to VSX registers.
+    // Namely, we print 32-63 when the instruction operates on one of the
+    // VMX registers.
+    // (Please synchronize with PPCAsmPrinter::printOperand)
+    if ((MII.get(MI->getOpcode()).TSFlags & PPCII::UseVSXReg) &&
+        !ShowVSRNumsAsVR) {
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::V0);
+      else if (PPCInstrInfo::isVFRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+    }
+
+    const char *RegName = getRegisterName(Reg);
     // The linux and AIX assembler does not take register prefixes.
     if (!isDarwinSyntax())
       RegName = stripRegisterPrefix(RegName);
diff --git a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
index d0ffeff0247c..9c79ffb1176c 100644
--- a/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
+++ b/contrib/llvm/lib/Target/PowerPC/InstPrinter/PPCInstPrinter.h
@@ -45,6 +45,7 @@ public:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printPredicateOperand(const MCInst *MI, unsigned OpNo,
                              raw_ostream &O, const char *Modifier = nullptr);
+  void printATBitsAsHint(const MCInst *MI, unsigned OpNo, raw_ostream &O);
 
   void printU1ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printU2ImmOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
index 9100ecb4aa37..5847b3a52bfc 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp
@@ -230,7 +230,8 @@ namespace {
 
 MCAsmBackend *llvm::createPPCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU) {
+                                        const Triple &TT, StringRef CPU,
+                                        const MCTargetOptions &Options) {
   if (TT.isOSDarwin())
     return new DarwinPPCAsmBackend(T);
 
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index e7b2d8369f2f..017d21af08a8 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "PPCInstrInfo.h"
 #include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "MCTargetDesc/PPCFixupKinds.h"
 #include "llvm/ADT/Statistic.h"
@@ -105,6 +106,9 @@ public:
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
                          const MCSubtargetInfo &STI) const override {
+    verifyInstructionPredicates(MI,
+                                computeAvailableFeatures(STI.getFeatureBits()));
+
     unsigned Opcode = MI.getOpcode();
     const MCInstrDesc &Desc = MCII.get(Opcode);
 
@@ -138,7 +142,11 @@ public:
     
     ++MCNumEmitted;  // Keep track of the # of mi's emitted.
   }
-  
+
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 };
   
 } // end anonymous namespace
@@ -350,7 +358,6 @@ get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
   return 0x80 >> CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
 }
 
-
 unsigned PPCMCCodeEmitter::
 getMachineOpValue(const MCInst &MI, const MCOperand &MO,
                   SmallVectorImpl<MCFixup> &Fixups,
@@ -361,7 +368,14 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
     assert((MI.getOpcode() != PPC::MTOCRF && MI.getOpcode() != PPC::MTOCRF8 &&
             MI.getOpcode() != PPC::MFOCRF && MI.getOpcode() != PPC::MFOCRF8) ||
            MO.getReg() < PPC::CR0 || MO.getReg() > PPC::CR7);
-    return CTX.getRegisterInfo()->getEncodingValue(MO.getReg());
+    unsigned Reg = MO.getReg();
+    unsigned Encode = CTX.getRegisterInfo()->getEncodingValue(Reg);
+
+    if ((MCII.get(MI.getOpcode()).TSFlags & PPCII::UseVSXReg))
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Encode += 32;
+
+    return Encode;
   }
   
   assert(MO.isImm() &&
@@ -370,4 +384,6 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO,
 }
 
 
+
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "PPCGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
index c9074448fe45..bbd10e5b260f 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp
@@ -228,7 +228,8 @@ static MCInstPrinter *createPPCMCInstPrinter(const Triple &T,
 }
 
 extern "C" void LLVMInitializePowerPCTargetMC() {
-  for (Target *T : {&ThePPC32Target, &ThePPC64Target, &ThePPC64LETarget}) {
+  for (Target *T :
+       {&getThePPC32Target(), &getThePPC64Target(), &getThePPC64LETarget()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn C(*T, createPPCMCAsmInfo);
 
diff --git a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
index 77fe45882289..0989e0c8e268 100644
--- a/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h
@@ -28,22 +28,24 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class Target;
 class Triple;
 class StringRef;
 class raw_pwrite_stream;
 class raw_ostream;
 
-extern Target ThePPC32Target;
-extern Target ThePPC64Target;
-extern Target ThePPC64LETarget;
+Target &getThePPC32Target();
+Target &getThePPC64Target();
+Target &getThePPC64LETarget();
 
 MCCodeEmitter *createPPCMCCodeEmitter(const MCInstrInfo &MCII,
                                       const MCRegisterInfo &MRI,
                                       MCContext &Ctx);
 
 MCAsmBackend *createPPCAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                  const Triple &TT, StringRef CPU);
+                                  const Triple &TT, StringRef CPU,
+                                  const MCTargetOptions &Options);
 
 /// Construct an PPC ELF object writer.
 MCObjectWriter *createPPCELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
diff --git a/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
new file mode 100644
index 000000000000..aea022f88766
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/P9InstrResources.td
@@ -0,0 +1,808 @@
+//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines resources required by some of P9 instruction. This is part
+// P9 processor model used for instruction scheduling. Not every instruction
+// is listed here. Instructions in this file belong to itinerary classes that
+// have instructions with different resource requirements.
+//
+//===----------------------------------------------------------------------===//
+
+
+def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C,
+              DISP_1C, DISP_1C],
+      (instrs
+    VADDCUW,
+    VADDUBM,
+    VADDUDM,
+    VADDUHM,
+    VADDUWM,
+    VAND,
+    VANDC,
+    VCMPEQUB,
+    VCMPEQUBo,
+    VCMPEQUD,
+    VCMPEQUDo,
+    VCMPEQUH,
+    VCMPEQUHo,
+    VCMPEQUW,
+    VCMPEQUWo,
+    VCMPGTSB,
+    VCMPGTSBo,
+    VCMPGTSD,
+    VCMPGTSDo,
+    VCMPGTSH,
+    VCMPGTSHo,
+    VCMPGTSW,
+    VCMPGTSWo,
+    VCMPGTUB,
+    VCMPGTUBo,
+    VCMPGTUD,
+    VCMPGTUDo,
+    VCMPGTUH,
+    VCMPGTUHo,
+    VCMPGTUW,
+    VCMPGTUWo,
+    VCMPNEB,
+    VCMPNEBo,
+    VCMPNEH,
+    VCMPNEHo,
+    VCMPNEW,
+    VCMPNEWo,
+    VCMPNEZB,
+    VCMPNEZBo,
+    VCMPNEZH,
+    VCMPNEZHo,
+    VCMPNEZW,
+    VCMPNEZWo,
+    VEQV,
+    VEXTSB2D,
+    VEXTSB2W,
+    VEXTSH2D,
+    VEXTSH2W,
+    VEXTSW2D,
+    VMRGEW,
+    VMRGOW,
+    VNAND,
+    VNEGD,
+    VNEGW,
+    VNOR,
+    VOR,
+    VORC,
+    VPOPCNTB,
+    VPOPCNTH,
+    VPOPCNTW,
+    VSEL,
+    VSUBCUW,
+    VSUBUBM,
+    VSUBUDM,
+    VSUBUHM,
+    VSUBUWM,
+    VXOR,
+    V_SET0B,
+    V_SET0H,
+    V_SET0,
+    XVABSDP,
+    XVABSSP,
+    XVCPSGNDP,
+    XVCPSGNSP,
+    XVIEXPDP,
+    XVNABSDP,
+    XVNABSSP,
+    XVNEGDP,
+    XVNEGSP,
+    XVXEXPDP,
+    XXLAND,
+    XXLANDC,
+    XXLEQV,
+    XXLNAND,
+    XXLNOR,
+    XXLOR,
+    XXLORf,
+    XXLORC,
+    XXLXOR,
+    XXSEL
+)>;
+
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSABSQP,
+    XSCPSGNQP,
+    XSIEXPQP,
+    XSNABSQP,
+    XSNEGQP,
+    XSXEXPQP,
+    XSABSDP,
+    XSCPSGNDP,
+    XSIEXPDP,
+    XSNABSDP,
+    XSNEGDP,
+    XSXEXPDP
+)>;
+
+def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+
+    VMINSB,
+    VMINSD,
+    VMINSH,
+    VMINSW,
+    VMINUB,
+    VMINUD,
+    VMINUH,
+    VMINUW,
+    VPOPCNTD,
+    VPRTYBD,
+    VPRTYBW,
+    VRLB,
+    VRLD,
+    VRLDMI,
+    VRLDNM,
+    VRLH,
+    VRLW,
+    VRLWMI,
+    VRLWNM,
+    VSHASIGMAD,
+    VSHASIGMAW,
+    VSLB,
+    VSLD,
+    VSLH,
+    VSLW,
+    VSRAB,
+    VSRAD,
+    VSRAH,
+    VSRAW,
+    VSRB,
+    VSRD,
+    VSRH,
+    VSRW,
+    VSUBSBS,
+    VSUBSHS,
+    VSUBSWS,
+    VSUBUBS,
+    VSUBUHS,
+    VSUBUWS,
+    XSCMPEQDP,
+    XSCMPEXPDP,
+    XSCMPGEDP,
+    XSCMPGTDP,
+    XSCMPODP,
+    XSCMPUDP,
+    XSCVSPDPN,
+    XSMAXCDP,
+    XSMAXDP,
+    XSMAXJDP,
+    XSMINCDP,
+    XSMINDP,
+    XSMINJDP,
+    XSTDIVDP,
+    XSTSQRTDP,
+    XSTSTDCDP,
+    XSTSTDCSP,
+    XSXSIGDP,
+    XVCMPEQDP,
+    XVCMPEQDPo,
+    XVCMPEQSP,
+    XVCMPEQSPo,
+    XVCMPGEDP,
+    XVCMPGEDPo,
+    XVCMPGESP,
+    XVCMPGESPo,
+    XVCMPGTDP,
+    XVCMPGTDPo,
+    XVCMPGTSP,
+    XVCMPGTSPo,
+    XVIEXPSP,
+    XVMAXDP,
+    XVMAXSP,
+    XVMINDP,
+    XVMINSP,
+    XVTDIVDP,
+    XVTDIVSP,
+    XVTSQRTDP,
+    XVTSQRTSP,
+    XVTSTDCDP,
+    XVTSTDCSP,
+    XVXEXPSP,
+    XVXSIGDP,
+    XVXSIGSP
+)>;
+
+def : InstRW<[P9_ALUE_4C, P9_ALUO_4C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    VABSDUB,
+    VABSDUH,
+    VABSDUW,
+    VADDSBS,
+    VADDSHS,
+    VADDSWS,
+    VADDUBS,
+    VADDUHS,
+    VADDUWS,
+    VAVGSB,
+    VAVGSH,
+    VAVGSW,
+    VAVGUB,
+    VAVGUH,
+    VAVGUW,
+    VBPERMD,
+    VCLZB,
+    VCLZD,
+    VCLZH,
+    VCLZW,
+    VCMPBFP,
+    VCMPBFPo,
+    VCMPGTFP,
+    VCMPGTFPo,
+    VCTZB,
+    VCTZD,
+    VCTZH,
+    VCTZW,
+    VMAXFP,
+    VMAXSB,
+    VMAXSD,
+    VMAXSH,
+    VMAXSW,
+    VMAXUB,
+    VMAXUD,
+    VMAXUH,
+    VMAXUW,
+    VMINFP,
+    VCMPEQFP,
+    VCMPEQFPo,
+    VCMPGEFP,
+    VCMPGEFPo
+)>;
+
+def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    VADDFP,
+    VCTSXS,
+    VCTSXS_0,
+    VCTUXS,
+    VCTUXS_0,
+    VEXPTEFP,
+    VLOGEFP,
+    VMADDFP,
+    VMHADDSHS,
+    VNMSUBFP,
+    VREFP,
+    VRFIM,
+    VRFIN,
+    VRFIP,
+    VRFIZ,
+    VRSQRTEFP,
+    VSUBFP,
+    XVADDDP,
+    XVADDSP,
+    XVCVDPSP,
+    XVCVDPSXDS,
+    XVCVDPSXWS,
+    XVCVDPUXDS,
+    XVCVDPUXWS,
+    XVCVHPSP,
+    XVCVSPDP,
+    XVCVSPHP,
+    XVCVSPSXDS,
+    XVCVSPSXWS,
+    XVCVSPUXDS,
+    XVCVSPUXWS,
+    XVCVSXDDP,
+    XVCVSXDSP,
+    XVCVSXWDP,
+    XVCVSXWSP,
+    XVCVUXDDP,
+    XVCVUXDSP,
+    XVCVUXWDP,
+    XVCVUXWSP,
+    XVMADDADP,
+    XVMADDASP,
+    XVMADDMDP,
+    XVMADDMSP,
+    XVMSUBADP,
+    XVMSUBASP,
+    XVMSUBMDP,
+    XVMSUBMSP,
+    XVMULDP,
+    XVMULSP,
+    XVNMADDADP,
+    XVNMADDASP,
+    XVNMADDMDP,
+    XVNMADDMSP,
+    XVNMSUBADP,
+    XVNMSUBASP,
+    XVNMSUBMDP,
+    XVNMSUBMSP,
+    XVRDPI,
+    XVRDPIC,
+    XVRDPIM,
+    XVRDPIP,
+    XVRDPIZ,
+    XVREDP,
+    XVRESP,
+    XVRSPI,
+    XVRSPIC,
+    XVRSPIM,
+    XVRSPIP,
+    XVRSPIZ,
+    XVRSQRTEDP,
+    XVRSQRTESP,
+    XVSUBDP,
+    XVSUBSP,
+    VCFSX,
+    VCFSX_0,
+    VCFUX,
+    VCFUX_0,
+    VMHRADDSHS,
+    VMLADDUHM,
+    VMSUMMBM,
+    VMSUMSHM,
+    VMSUMSHS,
+    VMSUMUBM,
+    VMSUMUHM,
+    VMSUMUHS,
+    VMULESB,
+    VMULESH,
+    VMULESW,
+    VMULEUB,
+    VMULEUH,
+    VMULEUW,
+    VMULOSB,
+    VMULOSH,
+    VMULOSW,
+    VMULOUB,
+    VMULOUH,
+    VMULOUW,
+    VMULUWM,
+    VSUM2SWS,
+    VSUM4SBS,
+    VSUM4SHS,
+    VSUM4UBS,
+    VSUMSWS
+)>;
+
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSMADDADP,
+    XSMADDASP,
+    XSMADDMDP,
+    XSMADDMSP,
+    XSMSUBADP,
+    XSMSUBASP,
+    XSMSUBMDP,
+    XSMSUBMSP,
+    XSMULDP,
+    XSMULSP,
+    XSNMADDADP,
+    XSNMADDASP,
+    XSNMADDMDP,
+    XSNMADDMSP,
+    XSNMSUBADP,
+    XSNMSUBASP,
+    XSNMSUBMDP,
+    XSNMSUBMSP
+)>;
+
+
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSADDDP,
+    XSADDSP,
+    XSCVDPHP,
+    XSCVDPSP,
+    XSCVDPSXDS,
+    XSCVDPSXWS,
+    XSCVDPUXDS,
+    XSCVDPUXWS,
+    XSCVHPDP,
+    XSCVSPDP,
+    XSCVSXDDP,
+    XSCVSXDSP,
+    XSCVUXDDP,
+    XSCVUXDSP,
+    XSRDPI,
+    XSRDPIC,
+    XSRDPIM,
+    XSRDPIP,
+    XSRDPIZ,
+    XSREDP,
+    XSRESP,
+    //XSRSP,
+    XSRSQRTEDP,
+    XSRSQRTESP,
+    XSSUBDP,
+    XSSUBSP,
+    XSCVDPSPN
+)>;
+
+def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C, DISP_1C],
+      (instrs
+    VBPERMQ,
+    VCLZLSBB,
+    VCTZLSBB,
+    VEXTRACTD,
+    VEXTRACTUB,
+    VEXTRACTUH,
+    VEXTRACTUW,
+    VEXTUBLX,
+    VEXTUBRX,
+    VEXTUHLX,
+    VEXTUHRX,
+    VEXTUWLX,
+    VEXTUWRX,
+    VGBBD,
+    VINSERTB,
+    VINSERTD,
+    VINSERTH,
+    VINSERTW,
+    VMRGHB,
+    VMRGHH,
+    VMRGHW,
+    VMRGLB,
+    VMRGLH,
+    VMRGLW,
+    VPERM,
+    VPERMR,
+    VPERMXOR,
+    VPKPX,
+    VPKSDSS,
+    VPKSDUS,
+    VPKSHSS,
+    VPKSHUS,
+    VPKSWSS,
+    VPKSWUS,
+    VPKUDUM,
+    VPKUDUS,
+    VPKUHUM,
+    VPKUHUS,
+    VPKUWUM,
+    VPKUWUS,
+    VPRTYBQ,
+    VSL,
+    VSLDOI,
+    VSLO,
+    VSLV,
+    VSPLTB,
+    VSPLTH,
+    VSPLTISB,
+    VSPLTISH,
+    VSPLTISW,
+    VSPLTW,
+    VSR,
+    VSRO,
+    VSRV,
+    VUPKHPX,
+    VUPKHSB,
+    VUPKHSH,
+    VUPKHSW,
+    VUPKLPX,
+    VUPKLSB,
+    VUPKLSH,
+    VUPKLSW,
+    XXBRD,
+    XXBRH,
+    XXBRQ,
+    XXBRW,
+    XXEXTRACTUW,
+    XXINSERTW,
+    XXMRGHW,
+    XXMRGLW,
+    XXPERM,
+    XXPERMR,
+    XXSLDWI,
+    XXSPLTIB,
+    XXSPLTW,
+    VADDCUQ,
+    VADDECUQ,
+    VADDEUQM,
+    VADDUQM,
+    VMUL10CUQ,
+    VMUL10ECUQ,
+    VMUL10EUQ,
+    VMUL10UQ,
+    VSUBCUQ,
+    VSUBECUQ,
+    VSUBEUQM,
+    VSUBUQM,
+    XSCMPEXPQP,
+    XSCMPOQP,
+    XSCMPUQP,
+    XSTSTDCQP,
+    XSXSIGQP
+)>;
+
+def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSADDQP,
+    XSADDQPO,
+    XSCVDPQP,
+    XSCVQPDP,
+    XSCVQPDPO,
+    XSCVQPSDZ,
+    XSCVQPSWZ,
+    XSCVQPUDZ,
+    XSCVQPUWZ,
+    XSCVSDQP,
+    XSCVUDQP,
+    XSRQPI,
+    XSRQPXP,
+    XSSUBQP,
+    XSSUBQPO
+)>;
+
+def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSMADDQP,
+    XSMADDQPO,
+    XSMSUBQP,
+    XSMSUBQPO,
+    XSMULQP,
+    XSMULQPO,
+    XSNMADDQP,
+    XSNMADDQPO,
+    XSNMSUBQP,
+    XSNMSUBQPO
+)>;
+
+def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSDIVQP,
+    XSDIVQPO
+)>;
+
+def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    XSSQRTQP,
+    XSSQRTQPO
+)>;
+
+// Load Operation in IIC_LdStLFD
+
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C],
+      (instrs
+    LXSDX,
+    LXVD2X,
+    LXSIWZX,
+    LXV,
+    LXSD
+)>;
+
+def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LFIWZX,
+    LFDX,
+    LFD
+)>;
+
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LXSSPX,
+    LXSIWAX,
+    LXSSP
+)>;
+
+def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LFIWAX,
+    LFSX,
+    LFS
+)>;
+
+def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    LXVDSX,
+    LXVW4X
+)>;
+
+// Store Operations in IIC_LdStSTFD.
+
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    STFS,
+    STFD,
+    STFIWX,
+    STFSX,
+    STFDX,
+    STXSDX,
+    STXSSPX,
+    STXSIWX
+)>;
+
+def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C],
+      (instrs
+    STXVD2X,
+    STXVW4X
+)>;
+
+
+// Divide Operations in IIC_IntDivW, IIC_IntDivD.
+
+def : InstRW<[P9_DIV_16C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+      (instrs
+    DIVW,
+    DIVWU
+)>;
+
+def : InstRW<[P9_DIV_24C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+      (instrs
+    DIVWE,
+    DIVD,
+    DIVWEU,
+    DIVDU
+)>;
+
+def : InstRW<[P9_DIV_40C_8, IP_EXECE_1C, DISP_1C, DISP_1C],
+      (instrs
+    DIVDE,
+    DIVDEU
+)>;
+
+def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    DIVWEo,
+    DIVWEUo
+)>;
+
+def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    DIVDEo,
+    DIVDEUo
+)>;
+
+// Rotate Operations in IIC_IntRotateD, IIC_IntRotateDI
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    SLD,
+    SRD,
+    SRAD,
+    SRADI,
+    RLDIC
+)>;
+
+def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    RLDCL,
+    RLDCR,
+    RLDIMI,
+    RLDICL,
+    RLDICR,
+    RLDICL_32_64
+)>;
+
+// CR access instructions in _BrMCR, IIC_BrMCRX.
+
+def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    MTOCRF,
+    MTOCRF8,
+    MTCRF,
+    MTCRF8
+)>;
+
+def : InstRW<[P9_ALU_5C, IP_EXEC_1C, DISP_1C, DISP_1C],
+      (instrs
+    MCRF,
+    MCRXRX
+)>;
+
+def : InstRW<[P9_ALU_5C, P9_ALU_5C, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    MCRFS
+)>;
+
+// FP Div instructions in IIC_FPDivD and IIC_FPDivS.
+
+def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FDIV,
+    XSDIVDP
+)>;
+
+def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FDIVS,
+    XSDIVSP
+)>;
+
+def : InstRW<[P9_DP_24C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    XVDIVSP
+)>;
+
+def : InstRW<[P9_DP_33C_8, IP_EXECE_1C, IP_EXECO_1C, DISP_1C, DISP_1C],
+      (instrs
+    XVDIVDP
+)>;
+
+// FP Instructions in IIC_FPGeneral, IIC_FPFused
+
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FRSP,
+    FRIND,
+    FRINS,
+    FRIPD,
+    FRIPS,
+    FRIZD,
+    FRIZS,
+    FRIMD,
+    FRIMS,
+    FRE,
+    FRES,
+    FRSQRTE,
+    FRSQRTES,
+    FMADDS,
+    FMADD,
+    FMSUBS,
+    FMSUB,
+    FNMADDS,
+    FNMADD,
+    FNMSUBS,
+    FNMSUB,
+    FSELD,
+    FSELS,
+    FADDS,
+    FMULS,
+    FMUL,
+    FSUBS,
+    FCFID,
+    FCTID,
+    FCTIDZ,
+    FCFIDU,
+    FCFIDS,
+    FCFIDUS,
+    FCTIDUZ,
+    FCTIWUZ,
+    FCTIW,
+    FCTIWZ
+)>;
+
+def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FMR,
+    FABSD,
+    FABSS,
+    FNABSD,
+    FNABSS,
+    FNEGD,
+    FNEGS,
+    FCPSGND,
+    FCPSGNS
+)>;
+
+def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    FCMPUS,
+    FCMPUD
+)>;
+
+// Load instructions in IIC_LdStLFDU and IIC_LdStLFDUX.
+
+def : InstRW<[P9_LoadAndALUOp_7C, P9_ALU_2C,
+              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LFSU,
+    LFSUX
+)>;
+
+def : InstRW<[P9_LS_5C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
+              DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+      (instrs
+    LFDU,
+    LFDUX
+)>;
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPC.td b/contrib/llvm/lib/Target/PowerPC/PPC.td
index 6a8e87effc8f..46502208b175 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPC.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPC.td
@@ -216,7 +216,7 @@ def ProcessorFeatures {
   list<SubtargetFeature> Power8FeatureList =
       !listconcat(Power7FeatureList, Power8SpecificFeatures);
   list<SubtargetFeature> Power9SpecificFeatures =
-      [FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
+      [DirectivePwr9, FeatureP9Altivec, FeatureP9Vector, FeatureISA3_0];
   list<SubtargetFeature> Power9FeatureList =
       !listconcat(Power8FeatureList, Power9SpecificFeatures);
 }
@@ -289,7 +289,6 @@ def getAltVSXFMAOpcode : InstrMapping {
 
 include "PPCRegisterInfo.td"
 include "PPCSchedule.td"
-include "PPCInstrInfo.td"
 
 //===----------------------------------------------------------------------===//
 // PowerPC processors supported.
@@ -418,8 +417,7 @@ def : ProcessorModel<"pwr6x", G5Model,
                    FeatureMFTB, DeprecatedDST]>;
 def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.Power7FeatureList>;
 def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.Power8FeatureList>;
-// FIXME: Same as P8 until the POWER9 scheduling info is available
-def : ProcessorModel<"pwr9", P8Model, ProcessorFeatures.Power9FeatureList>;
+def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.Power9FeatureList>; 
 def : Processor<"ppc", G3Itineraries, [Directive32, FeatureHardFloat,
                                        FeatureMFTB]>;
 def : Processor<"ppc32", G3Itineraries, [Directive32, FeatureHardFloat,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 76c52ab6cf1e..f0e0ebc4946c 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -17,28 +17,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "PPCInstrInfo.h"
 #include "InstPrinter/PPCInstPrinter.h"
 #include "MCTargetDesc/PPCMCExpr.h"
-#include "MCTargetDesc/PPCPredicates.h"
+#include "MCTargetDesc/PPCMCTargetDesc.h"
 #include "PPCMachineFunctionInfo.h"
 #include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "PPCTargetStreamer.h"
 #include "llvm/ADT/MapVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/CodeGen/AsmPrinter.h"
-#include "llvm/CodeGen/MachineConstantPool.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineModuleInfoImpls.h"
+#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/StackMaps.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/Mangler.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/Module.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
@@ -48,21 +49,30 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/SectionKind.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/MachO.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetOptions.h"
-#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <memory>
+#include <new>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "asmprinter"
 
 namespace {
+
 class PPCAsmPrinter : public AsmPrinter {
 protected:
   MapVector<MCSymbol *, MCSymbol *> TOC;
@@ -74,17 +84,15 @@ public:
                          std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)), SM(*this) {}
 
-  const char *getPassName() const override {
-    return "PowerPC Assembly Printer";
-  }
+  StringRef getPassName() const override { return "PowerPC Assembly Printer"; }
 
-    MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
+  MCSymbol *lookUpOrCreateTOCEntry(MCSymbol *Sym);
 
-    virtual bool doInitialization(Module &M) override {
-      if (!TOC.empty())
-        TOC.clear();
-      return AsmPrinter::doInitialization(M);
-    }
+  bool doInitialization(Module &M) override {
+    if (!TOC.empty())
+      TOC.clear();
+    return AsmPrinter::doInitialization(M);
+  }
 
     void EmitInstruction(const MachineInstr *MI) override;
 
@@ -115,7 +123,7 @@ public:
                                 std::unique_ptr<MCStreamer> Streamer)
         : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Linux PPC Assembly Printer";
     }
 
@@ -136,14 +144,15 @@ public:
                                  std::unique_ptr<MCStreamer> Streamer)
         : PPCAsmPrinter(TM, std::move(Streamer)) {}
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Darwin PPC Assembly Printer";
     }
 
     bool doFinalization(Module &M) override;
     void EmitStartOfAsmFile(Module &M) override;
   };
-} // end of anonymous namespace
+
+} // end anonymous namespace
 
 /// stripRegisterPrefix - This method strips the character prefix from a
 /// register name so that only the number is left.  Used by for linux asm.
@@ -169,7 +178,23 @@ void PPCAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNo,
 
   switch (MO.getType()) {
   case MachineOperand::MO_Register: {
-    const char *RegName = PPCInstPrinter::getRegisterName(MO.getReg());
+    unsigned Reg = MO.getReg();
+
+    // There are VSX instructions that use VSX register numbering (vs0 - vs63)
+    // as well as those that use VMX register numbering (v0 - v31 which
+    // correspond to vs32 - vs63). If we have an instruction that uses VSX
+    // numbering, we need to convert the VMX registers to VSX registers.
+    // Namely, we print 32-63 when the instruction operates on one of the
+    // VMX registers.
+    // (Please synchronize with PPCInstPrinter::printOperand)
+    if (MI->getDesc().TSFlags & PPCII::UseVSXReg) {
+      if (PPCInstrInfo::isVRRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::V0);
+      else if (PPCInstrInfo::isVFRegister(Reg))
+        Reg = PPC::VSX32 + (Reg - PPC::VF0);
+    }
+    const char *RegName = PPCInstPrinter::getRegisterName(Reg);
+
     // Linux assembler (Others?) does not take register mnemonics.
     // FIXME - What about special registers used in mfspr/mtspr?
     if (!Subtarget->isDarwin())
@@ -347,11 +372,10 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
   PatchPointOpers Opers(&MI);
 
   unsigned EncodedBytes = 0;
-  const MachineOperand &CalleeMO =
-    Opers.getMetaOper(PatchPointOpers::TargetPos);
+  const MachineOperand &CalleeMO = Opers.getCallTarget();
 
   if (CalleeMO.isImm()) {
-    int64_t CallTarget = Opers.getMetaOper(PatchPointOpers::TargetPos).getImm();
+    int64_t CallTarget = CalleeMO.getImm();
     if (CallTarget) {
       assert((CallTarget & 0xFFFFFFFFFFFF) == CallTarget &&
              "High 16 bits of call target should be zero.");
@@ -430,7 +454,7 @@ void PPCAsmPrinter::LowerPATCHPOINT(StackMaps &SM, const MachineInstr &MI) {
   EncodedBytes *= 4;
 
   // Emit padding.
-  unsigned NumBytes = Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+  unsigned NumBytes = Opers.getNumPatchBytes();
   assert(NumBytes >= EncodedBytes &&
          "Patchpoint can't request size less than the length of a call.");
   assert((NumBytes - EncodedBytes) % 4 == 0 &&
@@ -674,6 +698,13 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
     const MCExpr *Exp =
       MCSymbolRefExpr::create(MOSymbol, MCSymbolRefExpr::VK_PPC_TOC_HA,
                               OutContext);
+
+    if (!MO.isJTI() && MO.getOffset())
+      Exp = MCBinaryExpr::createAdd(Exp,
+                                    MCConstantExpr::create(MO.getOffset(),
+                                                           OutContext),
+                                    OutContext);
+
     TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
     return;
@@ -1147,10 +1178,12 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
          E = TOC.end(); I != E; ++I) {
       OutStreamer->EmitLabel(I->second);
       MCSymbol *S = I->first;
-      if (isPPC64)
+      if (isPPC64) {
         TS.emitTCEntry(*S);
-      else
+      } else {
+        OutStreamer->EmitValueToAlignment(4);
         OutStreamer->EmitSymbolValue(S, 4);
+      }
     }
   }
 
@@ -1193,6 +1226,9 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
   if (Subtarget->isELFv2ABI()
       // Only do all that if the function uses r2 in the first place.
       && !MF->getRegInfo().use_empty(PPC::X2)) {
+    // Note: The logic here must be synchronized with the code in the
+    // branch-selection pass which sets the offset of the first block in the
+    // function. This matters because it affects the alignment.
     const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
 
     MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
@@ -1345,57 +1381,61 @@ bool PPCDarwinAsmPrinter::doFinalization(Module &M) {
   // Darwin/PPC always uses mach-o.
   const TargetLoweringObjectFileMachO &TLOFMacho =
       static_cast<const TargetLoweringObjectFileMachO &>(getObjFileLowering());
-  MachineModuleInfoMachO &MMIMacho =
-      MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
-  if (MAI->doesSupportExceptionHandling() && MMI) {
-    // Add the (possibly multiple) personalities to the set of global values.
-    // Only referenced functions get into the Personalities list.
-    for (const Function *Personality : MMI->getPersonalities()) {
-      if (Personality) {
-        MCSymbol *NLPSym =
-            getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
-        MachineModuleInfoImpl::StubValueTy &StubSym =
-            MMIMacho.getGVStubEntry(NLPSym);
-        StubSym =
-            MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
+  if (MMI) {
+    MachineModuleInfoMachO &MMIMacho =
+        MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
+    if (MAI->doesSupportExceptionHandling()) {
+      // Add the (possibly multiple) personalities to the set of global values.
+      // Only referenced functions get into the Personalities list.
+      for (const Function *Personality : MMI->getPersonalities()) {
+        if (Personality) {
+          MCSymbol *NLPSym =
+              getSymbolWithGlobalValueBase(Personality, "$non_lazy_ptr");
+          MachineModuleInfoImpl::StubValueTy &StubSym =
+              MMIMacho.getGVStubEntry(NLPSym);
+          StubSym =
+              MachineModuleInfoImpl::StubValueTy(getSymbol(Personality), true);
+        }
       }
     }
-  }
 
-  // Output stubs for dynamically-linked functions.
-  MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
-
-  // Output macho stubs for external and common global variables.
-  if (!Stubs.empty()) {
-    // Switch with ".non_lazy_symbol_pointer" directive.
-    OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
-    EmitAlignment(isPPC64 ? 3 : 2);
-
-    for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
-      // L_foo$stub:
-      OutStreamer->EmitLabel(Stubs[i].first);
-      //   .indirect_symbol _foo
-      MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
-      OutStreamer->EmitSymbolAttribute(MCSym.getPointer(), MCSA_IndirectSymbol);
-
-      if (MCSym.getInt())
-        // External to current translation unit.
-        OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4/*size*/);
-      else
-        // Internal to current translation unit.
-        //
-        // When we place the LSDA into the TEXT section, the type info pointers
-        // need to be indirect and pc-rel. We accomplish this by using NLPs.
-        // However, sometimes the types are local to the file. So we need to
-        // fill in the value for the NLP in those cases.
-        OutStreamer->EmitValue(MCSymbolRefExpr::create(MCSym.getPointer(),
-                                                       OutContext),
-                              isPPC64 ? 8 : 4/*size*/);
-    }
+    // Output stubs for dynamically-linked functions.
+    MachineModuleInfoMachO::SymbolListTy Stubs = MMIMacho.GetGVStubList();
+
+    // Output macho stubs for external and common global variables.
+    if (!Stubs.empty()) {
+      // Switch with ".non_lazy_symbol_pointer" directive.
+      OutStreamer->SwitchSection(TLOFMacho.getNonLazySymbolPointerSection());
+      EmitAlignment(isPPC64 ? 3 : 2);
+
+      for (unsigned i = 0, e = Stubs.size(); i != e; ++i) {
+        // L_foo$stub:
+        OutStreamer->EmitLabel(Stubs[i].first);
+        //   .indirect_symbol _foo
+        MachineModuleInfoImpl::StubValueTy &MCSym = Stubs[i].second;
+        OutStreamer->EmitSymbolAttribute(MCSym.getPointer(),
+                                         MCSA_IndirectSymbol);
+
+        if (MCSym.getInt())
+          // External to current translation unit.
+          OutStreamer->EmitIntValue(0, isPPC64 ? 8 : 4 /*size*/);
+        else
+          // Internal to current translation unit.
+          //
+          // When we place the LSDA into the TEXT section, the type info
+          // pointers
+          // need to be indirect and pc-rel. We accomplish this by using NLPs.
+          // However, sometimes the types are local to the file. So we need to
+          // fill in the value for the NLP in those cases.
+          OutStreamer->EmitValue(
+              MCSymbolRefExpr::create(MCSym.getPointer(), OutContext),
+              isPPC64 ? 8 : 4 /*size*/);
+      }
 
-    Stubs.clear();
-    OutStreamer->AddBlankLine();
+      Stubs.clear();
+      OutStreamer->AddBlankLine();
+    }
   }
 
   // Funny Darwin hack: This flag tells the linker that no global symbols
@@ -1422,7 +1462,10 @@ createPPCAsmPrinterPass(TargetMachine &tm,
 
 // Force static initialization.
 extern "C" void LLVMInitializePowerPCAsmPrinter() {
-  TargetRegistry::RegisterAsmPrinter(ThePPC32Target, createPPCAsmPrinterPass);
-  TargetRegistry::RegisterAsmPrinter(ThePPC64Target, createPPCAsmPrinterPass);
-  TargetRegistry::RegisterAsmPrinter(ThePPC64LETarget, createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(getThePPC32Target(),
+                                     createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(getThePPC64Target(),
+                                     createPPCAsmPrinterPass);
+  TargetRegistry::RegisterAsmPrinter(getThePPC64LETarget(),
+                                     createPPCAsmPrinterPass);
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index bfb4d8756901..93c201d03869 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -1,4 +1,4 @@
-//===- PPCBoolRetToInt.cpp - Convert bool literals to i32 if they are returned ==//
+//===- PPCBoolRetToInt.cpp ------------------------------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -33,15 +33,26 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Argument.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Pass.h"
+#include <cassert>
 
 using namespace llvm;
 
@@ -57,7 +68,6 @@ STATISTIC(NumBoolToIntPromotion,
           "Total number of times a bool was promoted to an int");
 
 class PPCBoolRetToInt : public FunctionPass {
-
   static SmallPtrSet<Value *, 8> findAllDefs(Value *V) {
     SmallPtrSet<Value *, 8> Defs;
     SmallVector<Value *, 8> WorkList;
@@ -66,7 +76,10 @@ class PPCBoolRetToInt : public FunctionPass {
     while (!WorkList.empty()) {
       Value *Curr = WorkList.back();
       WorkList.pop_back();
-      if (User *CurrUser = dyn_cast<User>(Curr))
+      auto *CurrUser = dyn_cast<User>(Curr);
+      // Operands of CallInst are skipped because they may not be Bool type,
+      // and their positions are defined by ABI.
+      if (CurrUser && !isa<CallInst>(Curr))
         for (auto &Op : CurrUser->operands())
           if (Defs.insert(Op).second)
             WorkList.push_back(Op);
@@ -77,9 +90,9 @@ class PPCBoolRetToInt : public FunctionPass {
   // Translate a i1 value to an equivalent i32 value:
   static Value *translate(Value *V) {
     Type *Int32Ty = Type::getInt32Ty(V->getContext());
-    if (Constant *C = dyn_cast<Constant>(V))
+    if (auto *C = dyn_cast<Constant>(V))
       return ConstantExpr::getZExt(C, Int32Ty);
-    if (PHINode *P = dyn_cast<PHINode>(V)) {
+    if (auto *P = dyn_cast<PHINode>(V)) {
       // Temporarily set the operands to 0. We'll fix this later in
       // runOnUse.
       Value *Zero = Constant::getNullValue(Int32Ty);
@@ -90,8 +103,8 @@ class PPCBoolRetToInt : public FunctionPass {
       return Q;
     }
 
-    Argument *A = dyn_cast<Argument>(V);
-    Instruction *I = dyn_cast<Instruction>(V);
+    auto *A = dyn_cast<Argument>(V);
+    auto *I = dyn_cast<Instruction>(V);
     assert((A || I) && "Unknown value type");
 
     auto InstPt =
@@ -114,7 +127,7 @@ class PPCBoolRetToInt : public FunctionPass {
     // Condition 1
     for (auto &BB : F)
       for (auto &I : BB)
-        if (const PHINode *P = dyn_cast<PHINode>(&I))
+        if (const auto *P = dyn_cast<PHINode>(&I))
           if (P->getType()->isIntegerTy(1))
             Promotable.insert(P);
 
@@ -131,14 +144,14 @@ class PPCBoolRetToInt : public FunctionPass {
       };
       const auto &Users = P->users();
       const auto &Operands = P->operands();
-      if (!std::all_of(Users.begin(), Users.end(), IsValidUser) ||
-          !std::all_of(Operands.begin(), Operands.end(), IsValidOperand))
+      if (!llvm::all_of(Users, IsValidUser) ||
+          !llvm::all_of(Operands, IsValidOperand))
         ToRemove.push_back(P);
     }
 
     // Iterate to convergence
     auto IsPromotable = [&Promotable] (const Value *V) -> bool {
-      const PHINode *Phi = dyn_cast<PHINode>(V);
+      const auto *Phi = dyn_cast<PHINode>(V);
       return !Phi || Promotable.count(Phi);
     };
     while (!ToRemove.empty()) {
@@ -150,8 +163,8 @@ class PPCBoolRetToInt : public FunctionPass {
         // Condition 4 and 5
         const auto &Users = P->users();
         const auto &Operands = P->operands();
-        if (!std::all_of(Users.begin(), Users.end(), IsPromotable) ||
-            !std::all_of(Operands.begin(), Operands.end(), IsPromotable))
+        if (!llvm::all_of(Users, IsPromotable) ||
+            !llvm::all_of(Operands, IsPromotable))
           ToRemove.push_back(P);
       }
     }
@@ -163,11 +176,12 @@ class PPCBoolRetToInt : public FunctionPass {
 
  public:
   static char ID;
+
   PPCBoolRetToInt() : FunctionPass(ID) {
     initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnFunction(Function &F) {
+  bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
 
@@ -176,12 +190,12 @@ class PPCBoolRetToInt : public FunctionPass {
     bool Changed = false;
     for (auto &BB : F) {
       for (auto &I : BB) {
-        if (ReturnInst *R = dyn_cast<ReturnInst>(&I))
+        if (auto *R = dyn_cast<ReturnInst>(&I))
           if (F.getReturnType()->isIntegerTy(1))
             Changed |=
               runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap);
 
-        if (CallInst *CI = dyn_cast<CallInst>(&I))
+        if (auto *CI = dyn_cast<CallInst>(&I))
           for (auto &U : CI->operands())
             if (U->getType()->isIntegerTy(1))
               Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap);
@@ -196,18 +210,19 @@ class PPCBoolRetToInt : public FunctionPass {
     auto Defs = findAllDefs(U);
 
     // If the values are all Constants or Arguments, don't bother
-    if (!std::any_of(Defs.begin(), Defs.end(), isa<Instruction, Value *>))
+    if (llvm::none_of(Defs, isa<Instruction, Value *>))
       return false;
 
-    // Presently, we only know how to handle PHINode, Constant, and Arguments.
-    // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension
-    // could also be handled in the future.
+    // Presently, we only know how to handle PHINode, Constant, Arguments and
+    // CallInst. Potentially, bitwise operations (AND, OR, XOR, NOT) and sign
+    // extension could also be handled in the future.
     for (Value *V : Defs)
-      if (!isa<PHINode>(V) && !isa<Constant>(V) && !isa<Argument>(V))
+      if (!isa<PHINode>(V) && !isa<Constant>(V) &&
+          !isa<Argument>(V) && !isa<CallInst>(V))
         return false;
 
     for (Value *V : Defs)
-      if (const PHINode *P = dyn_cast<PHINode>(V))
+      if (const auto *P = dyn_cast<PHINode>(V))
         if (!PromotablePHINodes.count(P))
           return false;
 
@@ -221,32 +236,35 @@ class PPCBoolRetToInt : public FunctionPass {
       if (!BoolToIntMap.count(V))
         BoolToIntMap[V] = translate(V);
 
-    // Replace the operands of the translated instructions. There were set to
+    // Replace the operands of the translated instructions. They were set to
     // zero in the translate function.
     for (auto &Pair : BoolToIntMap) {
-      User *First = dyn_cast<User>(Pair.first);
-      User *Second = dyn_cast<User>(Pair.second);
+      auto *First = dyn_cast<User>(Pair.first);
+      auto *Second = dyn_cast<User>(Pair.second);
       assert((!First || Second) && "translated from user to non-user!?");
-      if (First)
+      // Operands of CallInst are skipped because they may not be Bool type,
+      // and their positions are defined by ABI.
+      if (First && !isa<CallInst>(First))
         for (unsigned i = 0; i < First->getNumOperands(); ++i)
           Second->setOperand(i, BoolToIntMap[First->getOperand(i)]);
     }
 
     Value *IntRetVal = BoolToIntMap[U];
     Type *Int1Ty = Type::getInt1Ty(U->getContext());
-    Instruction *I = cast<Instruction>(U.getUser());
+    auto *I = cast<Instruction>(U.getUser());
     Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I);
     U.set(BackToBool);
 
     return true;
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
 };
-}
+
+} // end anonymous namespace
 
 char PPCBoolRetToInt::ID = 0;
 INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
index 4d63c5b5703c..ae76386fdfb6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCBranchSelector.cpp
@@ -19,8 +19,10 @@
 #include "MCTargetDesc/PPCPredicates.h"
 #include "PPCInstrBuilder.h"
 #include "PPCInstrInfo.h"
+#include "PPCSubtarget.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
@@ -41,19 +43,19 @@ namespace {
       initializePPCBSelPass(*PassRegistry::getPassRegistry());
     }
 
-    /// BlockSizes - The sizes of the basic blocks in the function.
-    std::vector<unsigned> BlockSizes;
+    // The sizes of the basic blocks in the function (the first
+    // element of the pair); the second element of the pair is the amount of the
+    // size that is due to potential padding.
+    std::vector<std::pair<unsigned, unsigned>> BlockSizes;
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
-      return "PowerPC Branch Selector";
-    }
+    StringRef getPassName() const override { return "PowerPC Branch Selector"; }
   };
   char PPCBSel::ID = 0;
 }
@@ -92,8 +94,19 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
     return AlignAmt + OffsetToAlignment(Offset, AlignAmt);
   };
 
+  // We need to be careful about the offset of the first block in the function
+  // because it might not have the function's alignment. This happens because,
+  // under the ELFv2 ABI, for functions which require a TOC pointer, we add a
+  // two-instruction sequence to the start of the function.
+  // Note: This needs to be synchronized with the check in
+  // PPCLinuxAsmPrinter::EmitFunctionBodyStart.
+  unsigned InitialOffset = 0;
+  if (Fn.getSubtarget<PPCSubtarget>().isELFv2ABI() &&
+      !Fn.getRegInfo().use_empty(PPC::X2))
+    InitialOffset = 8;
+
   // Measure each MBB and compute a size for the entire function.
-  unsigned FuncSize = 0;
+  unsigned FuncSize = InitialOffset;
   for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
        ++MFI) {
     MachineBasicBlock *MBB = &*MFI;
@@ -102,15 +115,19 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
     // alignment requirement.
     if (MBB->getNumber() > 0) {
       unsigned AlignExtra = GetAlignmentAdjustment(*MBB, FuncSize);
-      BlockSizes[MBB->getNumber()-1] += AlignExtra;
+
+      auto &BS = BlockSizes[MBB->getNumber()-1];
+      BS.first += AlignExtra;
+      BS.second = AlignExtra;
+
       FuncSize += AlignExtra;
     }
 
     unsigned BlockSize = 0;
     for (MachineInstr &MI : *MBB)
-      BlockSize += TII->GetInstSizeInBytes(MI);
+      BlockSize += TII->getInstSizeInBytes(MI);
 
-    BlockSizes[MBB->getNumber()] = BlockSize;
+    BlockSizes[MBB->getNumber()].first = BlockSize;
     FuncSize += BlockSize;
   }
   
@@ -155,7 +172,7 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
           Dest = I->getOperand(0).getMBB();
 
         if (!Dest) {
-          MBBStartOffset += TII->GetInstSizeInBytes(*I);
+          MBBStartOffset += TII->getInstSizeInBytes(*I);
           continue;
         }
         
@@ -169,14 +186,14 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
           BranchSize = MBBStartOffset;
           
           for (unsigned i = Dest->getNumber(), e = MBB.getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i];
+            BranchSize += BlockSizes[i].first;
         } else {
           // Otherwise, add the size of the blocks between this block and the
           // dest to the number of bytes left in this block.
           BranchSize = -MBBStartOffset;
 
           for (unsigned i = MBB.getNumber(), e = Dest->getNumber(); i != e; ++i)
-            BranchSize += BlockSizes[i];
+            BranchSize += BlockSizes[i].first;
         }
 
         // If this branch is in range, ignore it.
@@ -186,9 +203,9 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
         }
 
         // Otherwise, we have to expand it to a long branch.
-        MachineInstr *OldBranch = I;
-        DebugLoc dl = OldBranch->getDebugLoc();
- 
+        MachineInstr &OldBranch = *I;
+        DebugLoc dl = OldBranch.getDebugLoc();
+
         if (I->getOpcode() == PPC::BCC) {
           // The BCC operands are:
           // 0. PPC branch predicate
@@ -222,16 +239,42 @@ bool PPCBSel::runOnMachineFunction(MachineFunction &Fn) {
         I = BuildMI(MBB, I, dl, TII->get(PPC::B)).addMBB(Dest);
 
         // Remove the old branch from the function.
-        OldBranch->eraseFromParent();
-        
+        OldBranch.eraseFromParent();
+
         // Remember that this instruction is 8-bytes, increase the size of the
         // block by 4, remember to iterate.
-        BlockSizes[MBB.getNumber()] += 4;
+        BlockSizes[MBB.getNumber()].first += 4;
         MBBStartOffset += 8;
         ++NumExpanded;
         MadeChange = true;
       }
     }
+
+    if (MadeChange) {
+      // If we're going to iterate again, make sure we've updated our
+      // padding-based contributions to the block sizes.
+      unsigned Offset = InitialOffset;
+      for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+           ++MFI) {
+        MachineBasicBlock *MBB = &*MFI;
+
+        if (MBB->getNumber() > 0) {
+          auto &BS = BlockSizes[MBB->getNumber()-1];
+          BS.first -= BS.second;
+          Offset -= BS.second;
+
+          unsigned AlignExtra = GetAlignmentAdjustment(*MBB, Offset);
+
+          BS.first += AlignExtra;
+          BS.second = AlignExtra;
+
+          Offset += AlignExtra;
+        }
+
+        Offset += BlockSizes[MBB->getNumber()].first;
+      }
+    }
+
     EverMadeChange |= MadeChange;
   }
   
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
index 875226635917..2c62a0f1d909 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@@ -618,9 +618,9 @@ bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
 }
 
 #ifndef NDEBUG
-static bool clobbersCTR(const MachineInstr *MI) {
-  for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
-    const MachineOperand &MO = MI->getOperand(i);
+static bool clobbersCTR(const MachineInstr &MI) {
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+    const MachineOperand &MO = MI.getOperand(i);
     if (MO.isReg()) {
       if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
         return true;
@@ -659,7 +659,7 @@ check_block:
       break;
     }
 
-    if (I != BI && clobbersCTR(I)) {
+    if (I != BI && clobbersCTR(*I)) {
       DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
                       MBB->getFullName() << ") instruction " << *I <<
                       " clobbers CTR, invalidating " << "BB#" <<
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
index 53d2f77ff918..a4f4c8688cc1 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td
@@ -26,6 +26,9 @@ class CCIfNotSubtarget<string F, CCAction A>
 class CCIfOrigArgWasNotPPCF128<CCAction A>
     : CCIf<"!static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)",
            A>;
+class CCIfOrigArgWasPPCF128<CCAction A>
+    : CCIf<"static_cast<PPCCCState *>(&State)->WasOriginalArgPPCF128(ValNo)",
+           A>;
 
 //===----------------------------------------------------------------------===//
 // Return Value Calling Convention
@@ -65,11 +68,9 @@ def RetCC_PPC : CallingConv<[
  
   // Vector types returned as "direct" go into V2 .. V9; note that only the
   // ELFv2 ABI fully utilizes all these registers.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
            CCIfSubtarget<"hasAltivec()",
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
-  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
 ]>;
 
 // No explicit register is specified for the AnyReg calling convention. The
@@ -118,11 +119,9 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[
   CCIfType<[f64],  CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>,
   CCIfType<[v4f64, v4f32, v4i1],
            CCIfSubtarget<"hasQPX()", CCAssignToReg<[QF1, QF2]>>>,
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
            CCIfSubtarget<"hasAltivec()",
-           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>,
-  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9]>>>
+           CCAssignToReg<[V2, V3, V4, V5, V6, V7, V8, V9]>>>
 ]>;
 
 //===----------------------------------------------------------------------===//
@@ -142,6 +141,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[
   CCIfType<[i32],
   CCIfSplit<CCIfNotSubtarget<"useSoftFloat()", 
                             CCCustom<"CC_PPC32_SVR4_Custom_AlignArgRegs">>>>,
+  CCIfSplit<CCIfSubtarget<"useSoftFloat()",
+                          CCIfOrigArgWasPPCF128<CCCustom<
+                          "CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128">>>>,
 
   // The 'nest' parameter, if any, is passed in R11.
   CCIfNest<CCAssignToReg<[R11]>>,
@@ -187,12 +189,9 @@ def CC_PPC32_SVR4 : CallingConv<[
     CCAssignToReg<[QF1, QF2, QF3, QF4, QF5, QF6, QF7, QF8]>>>,
 
   // The first 12 Vector arguments are passed in AltiVec registers.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32], 
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v1i128, v4f32, v2f64],
            CCIfSubtarget<"hasAltivec()", CCAssignToReg<[V2, V3, V4, V5, V6, V7,
                           V8, V9, V10, V11, V12, V13]>>>,
-  CCIfType<[v2f64, v2i64], CCIfSubtarget<"hasVSX()",
-           CCAssignToReg<[VSH2, VSH3, VSH4, VSH5, VSH6, VSH7, VSH8, VSH9,
-                          VSH10, VSH11, VSH12, VSH13]>>>,
            
   CCDelegateTo<CC_PPC32_SVR4_Common>
 ]>;  
@@ -281,6 +280,5 @@ def CSR_64_AllRegs_Altivec : CalleeSavedRegs<(add CSR_64_AllRegs,
                                              (sequence "V%u", 0, 31))>;
 
 def CSR_64_AllRegs_VSX : CalleeSavedRegs<(add CSR_64_AllRegs_Altivec,
-                                         (sequence "VSL%u", 0, 31),
-                                         (sequence "VSH%u", 0, 31))>;
+                                         (sequence "VSL%u", 0, 31))>;
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index fcd2f50e1e3d..6bd229625fc3 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -58,7 +58,7 @@ protected:
       bool Changed = false;
 
       MachineBasicBlock::iterator I = ReturnMBB.begin();
-      I = ReturnMBB.SkipPHIsAndLabels(I);
+      I = ReturnMBB.SkipPHIsLabelsAndDebug(I);
 
       // The block must be essentially empty except for the blr.
       if (I == ReturnMBB.end() ||
@@ -196,7 +196,7 @@ public:
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
index 7e92042d2f96..9b91b9ab8f82 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp
@@ -146,11 +146,11 @@ class PPCFastISel final : public FastISel {
     bool isTypeLegal(Type *Ty, MVT &VT);
     bool isLoadTypeLegal(Type *Ty, MVT &VT);
     bool isValueAvailable(const Value *V) const;
-    bool isVSFRCRegister(unsigned Register) const {
-      return MRI.getRegClass(Register)->getID() == PPC::VSFRCRegClassID;
+    bool isVSFRCRegClass(const TargetRegisterClass *RC) const {
+      return RC->getID() == PPC::VSFRCRegClassID;
     }
-    bool isVSSRCRegister(unsigned Register) const {
-      return MRI.getRegClass(Register)->getID() == PPC::VSSRCRegClassID;
+    bool isVSSRCRegClass(const TargetRegisterClass *RC) const {
+      return RC->getID() == PPC::VSSRCRegClassID;
     }
     bool PPCEmitCmp(const Value *Src1Value, const Value *Src2Value,
                     bool isZExt, unsigned DestReg);
@@ -358,7 +358,7 @@ bool PPCFastISel::PPCComputeAddress(const Value *Obj, Address &Addr) {
       for (User::const_op_iterator II = U->op_begin() + 1, IE = U->op_end();
            II != IE; ++II, ++GTI) {
         const Value *Op = *II;
-        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        if (StructType *STy = GTI.getStructTypeOrNull()) {
           const StructLayout *SL = DL.getStructLayout(STy);
           unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
           TmpOffset += SL->getElementOffset(Idx);
@@ -458,7 +458,7 @@ void PPCFastISel::PPCSimplifyAddress(Address &Addr, bool &UseOffset,
 
 // Emit a load instruction if possible, returning true if we succeeded,
 // otherwise false.  See commentary below for how the register class of
-// the load is determined. 
+// the load is determined.
 bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
                               const TargetRegisterClass *RC,
                               bool IsZExt, unsigned FP64LoadOpc) {
@@ -489,20 +489,18 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       Opc = Is32BitInt ? PPC::LBZ : PPC::LBZ8;
       break;
     case MVT::i16:
-      Opc = (IsZExt ?
-             (Is32BitInt ? PPC::LHZ : PPC::LHZ8) : 
-             (Is32BitInt ? PPC::LHA : PPC::LHA8));
+      Opc = (IsZExt ? (Is32BitInt ? PPC::LHZ : PPC::LHZ8)
+                    : (Is32BitInt ? PPC::LHA : PPC::LHA8));
       break;
     case MVT::i32:
-      Opc = (IsZExt ? 
-             (Is32BitInt ? PPC::LWZ : PPC::LWZ8) :
-             (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
+      Opc = (IsZExt ? (Is32BitInt ? PPC::LWZ : PPC::LWZ8)
+                    : (Is32BitInt ? PPC::LWA_32 : PPC::LWA));
       if ((Opc == PPC::LWA || Opc == PPC::LWA_32) && ((Addr.Offset & 3) != 0))
         UseOffset = false;
       break;
     case MVT::i64:
       Opc = PPC::LD;
-      assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) && 
+      assert(UseRC->hasSuperClassEq(&PPC::G8RCRegClass) &&
              "64-bit load with 32-bit target??");
       UseOffset = ((Addr.Offset & 3) == 0);
       break;
@@ -521,10 +519,10 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
 
   // If this is a potential VSX load with an offset of 0, a VSX indexed load can
   // be used.
-  bool IsVSSRC = (ResultReg != 0) && isVSSRCRegister(ResultReg);
-  bool IsVSFRC = (ResultReg != 0) && isVSFRCRegister(ResultReg);
+  bool IsVSSRC = isVSSRCRegClass(UseRC);
+  bool IsVSFRC = isVSFRCRegClass(UseRC);
   bool Is32VSXLoad = IsVSSRC && Opc == PPC::LFS;
-  bool Is64VSXLoad = IsVSSRC && Opc == PPC::LFD;
+  bool Is64VSXLoad = IsVSFRC && Opc == PPC::LFD;
   if ((Is32VSXLoad || Is64VSXLoad) &&
       (Addr.BaseType != Address::FrameIndexBase) && UseOffset &&
       (Addr.Offset == 0)) {
@@ -579,8 +577,18 @@ bool PPCFastISel::PPCEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr,
       case PPC::LFS:    Opc = IsVSSRC ? PPC::LXSSPX : PPC::LFSX; break;
       case PPC::LFD:    Opc = IsVSFRC ? PPC::LXSDX : PPC::LFDX; break;
     }
-    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
-      .addReg(Addr.Base.Reg).addReg(IndexReg);
+
+    auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
+                       ResultReg);
+
+    // If we have an index register defined we use it in the store inst,
+    // otherwise we use X0 as base as it makes the vector instructions to
+    // use zero in the computation of the effective address regardless the
+    // content of the register.
+    if (IndexReg)
+      MIB.addReg(Addr.Base.Reg).addReg(IndexReg);
+    else
+      MIB.addReg(PPC::ZERO8).addReg(Addr.Base.Reg);
   }
 
   return true;
@@ -657,8 +665,8 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
 
   // If this is a potential VSX store with an offset of 0, a VSX indexed store
   // can be used.
-  bool IsVSSRC = isVSSRCRegister(SrcReg);
-  bool IsVSFRC = isVSFRCRegister(SrcReg);
+  bool IsVSSRC = isVSSRCRegClass(RC);
+  bool IsVSFRC = isVSFRCRegClass(RC);
   bool Is32VSXStore = IsVSSRC && Opc == PPC::STFS;
   bool Is64VSXStore = IsVSFRC && Opc == PPC::STFD;
   if ((Is32VSXStore || Is64VSXStore) &&
@@ -689,8 +697,9 @@ bool PPCFastISel::PPCEmitStore(MVT VT, unsigned SrcReg, Address &Addr) {
   // Base reg with offset in range.
   } else if (UseOffset) {
     // VSX only provides an indexed store.
-    if (Is32VSXStore || Is64VSXStore) return false;
-    
+    if (Is32VSXStore || Is64VSXStore)
+      return false;
+
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
       .addReg(SrcReg).addImm(Addr.Offset).addReg(Addr.Base.Reg);
 
@@ -828,7 +837,7 @@ bool PPCFastISel::PPCEmitCmp(const Value *SrcValue1, const Value *SrcValue2,
   long Imm = 0;
   bool UseImm = false;
 
-  // Only 16-bit integer constants can be represented in compares for 
+  // Only 16-bit integer constants can be represented in compares for
   // PowerPC.  Others will be materialized into a register.
   if (const ConstantInt *ConstInt = dyn_cast<ConstantInt>(SrcValue2)) {
     if (SrcVT == MVT::i64 || SrcVT == MVT::i32 || SrcVT == MVT::i16 ||
@@ -1617,7 +1626,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
     CCState CCInfo(CC, F.isVarArg(), *FuncInfo.MF, ValLocs, *Context);
     CCInfo.AnalyzeReturn(Outs, RetCC_PPC64_ELF_FIS);
     const Value *RV = Ret->getOperand(0);
-    
+
     // FIXME: Only one output register for now.
     if (ValLocs.size() > 1)
       return false;
@@ -1663,7 +1672,7 @@ bool PPCFastISel::SelectRet(const Instruction *I) {
         if (RVVT != DestVT && RVVT != MVT::i8 &&
             RVVT != MVT::i16 && RVVT != MVT::i32)
           return false;
-      
+
         if (RVVT != DestVT) {
           switch (VA.getLocInfo()) {
             default:
@@ -1907,7 +1916,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
   unsigned Align = DL.getPrefTypeAlignment(CFP->getType());
   assert(Align > 0 && "Unexpectedly missing alignment information!");
   unsigned Idx = MCP.getConstantPoolIndex(cast<Constant>(CFP), Align);
-  unsigned DestReg = createResultReg(TLI.getRegClassFor(VT));
+  const TargetRegisterClass *RC =
+    (VT == MVT::f32) ? &PPC::F4RCRegClass : &PPC::F8RCRegClass;
+  unsigned DestReg = createResultReg(RC);
   CodeModel::Model CModel = TM.getCodeModel();
 
   MachineMemOperand *MMO = FuncInfo.MF->getMachineMemOperand(
@@ -1936,8 +1947,9 @@ unsigned PPCFastISel::PPCMaterializeFP(const ConstantFP *CFP, MVT VT) {
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL),
               TmpReg2).addConstantPoolIndex(Idx).addReg(TmpReg);
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
-        .addImm(0).addReg(TmpReg2);
-    } else 
+          .addImm(0)
+          .addReg(TmpReg2);
+    } else
       BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), DestReg)
         .addConstantPoolIndex(Idx, 0, PPCII::MO_TOC_LO)
         .addReg(TmpReg)
@@ -2028,8 +2040,8 @@ unsigned PPCFastISel::PPCMaterialize32BitInt(int64_t Imm,
     // Just Hi bits.
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(IsGPRC ? PPC::LIS : PPC::LIS8), ResultReg)
-      .addImm(Hi);
-  
+        .addImm(Hi);
+
   return ResultReg;
 }
 
@@ -2145,7 +2157,12 @@ unsigned PPCFastISel::fastMaterializeConstant(const Constant *C) {
   else if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
     return PPCMaterializeGV(GV, VT);
   else if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
-    return PPCMaterializeInt(CI, VT, VT != MVT::i1);
+    // Note that the code in FunctionLoweringInfo::ComputePHILiveOutRegInfo
+    // assumes that constant PHI operands will be zero extended, and failure to
+    // match that assumption will cause problems if we sign extend here but
+    // some user of a PHI is in a block for which we fall back to full SDAG
+    // instruction selection.
+    return PPCMaterializeInt(CI, VT, false);
 
   return 0;
 }
@@ -2263,7 +2280,7 @@ bool PPCFastISel::fastLowerArguments() {
 // Handle materializing integer constants into a register.  This is not
 // automatically generated for PowerPC, so must be explicitly created here.
 unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
-  
+
   if (Opc != ISD::Constant)
     return 0;
 
@@ -2276,8 +2293,8 @@ unsigned PPCFastISel::fastEmit_i(MVT Ty, MVT VT, unsigned Opc, uint64_t Imm) {
     return ImmReg;
   }
 
-  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 &&
-      VT != MVT::i8 && VT != MVT::i1) 
+  if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 &&
+      VT != MVT::i1)
     return 0;
 
   const TargetRegisterClass *RC = ((VT == MVT::i64) ? &PPC::G8RCRegClass :
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index c3a5d3c95081..e786ef9aee0e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -253,8 +253,8 @@ const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
 /// contents is spilled and reloaded around the call.  Without the prolog code,
 /// the spill instruction refers to an undefined register.  This code needs
 /// to account for all uses of that GPR.
-static void RemoveVRSaveCode(MachineInstr *MI) {
-  MachineBasicBlock *Entry = MI->getParent();
+static void RemoveVRSaveCode(MachineInstr &MI) {
+  MachineBasicBlock *Entry = MI.getParent();
   MachineFunction *MF = Entry->getParent();
 
   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
@@ -293,16 +293,16 @@ static void RemoveVRSaveCode(MachineInstr *MI) {
   }
 
   // Finally, nuke the UPDATE_VRSAVE.
-  MI->eraseFromParent();
+  MI.eraseFromParent();
 }
 
 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
 // instruction selector.  Based on the vector registers that have been used,
 // transform this into the appropriate ORI instruction.
-static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
-  MachineFunction *MF = MI->getParent()->getParent();
+static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
+  MachineFunction *MF = MI.getParent()->getParent();
   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
-  DebugLoc dl = MI->getDebugLoc();
+  DebugLoc dl = MI.getDebugLoc();
 
   const MachineRegisterInfo &MRI = MF->getRegInfo();
   unsigned UsedRegMask = 0;
@@ -343,44 +343,44 @@ static void HandleVRSaveUpdate(MachineInstr *MI, const TargetInstrInfo &TII) {
     return;
   }
 
-  unsigned SrcReg = MI->getOperand(1).getReg();
-  unsigned DstReg = MI->getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(1).getReg();
+  unsigned DstReg = MI.getOperand(0).getReg();
 
   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
     if (DstReg != SrcReg)
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
-        .addReg(SrcReg)
-        .addImm(UsedRegMask);
+      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+          .addReg(SrcReg)
+          .addImm(UsedRegMask);
     else
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
-        .addReg(SrcReg, RegState::Kill)
-        .addImm(UsedRegMask);
+      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+          .addReg(SrcReg, RegState::Kill)
+          .addImm(UsedRegMask);
   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
     if (DstReg != SrcReg)
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg)
-        .addImm(UsedRegMask >> 16);
+      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+          .addReg(SrcReg)
+          .addImm(UsedRegMask >> 16);
     else
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg, RegState::Kill)
-        .addImm(UsedRegMask >> 16);
+      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+          .addReg(SrcReg, RegState::Kill)
+          .addImm(UsedRegMask >> 16);
   } else {
     if (DstReg != SrcReg)
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg)
-        .addImm(UsedRegMask >> 16);
+      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+          .addReg(SrcReg)
+          .addImm(UsedRegMask >> 16);
     else
-      BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
-        .addReg(SrcReg, RegState::Kill)
-        .addImm(UsedRegMask >> 16);
+      BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
+          .addReg(SrcReg, RegState::Kill)
+          .addImm(UsedRegMask >> 16);
 
-    BuildMI(*MI->getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
-      .addReg(DstReg, RegState::Kill)
-      .addImm(UsedRegMask & 0xFFFF);
+    BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
+        .addReg(DstReg, RegState::Kill)
+        .addImm(UsedRegMask & 0xFFFF);
   }
 
   // Remove the old UPDATE_VRSAVE instruction.
-  MI->eraseFromParent();
+  MI.eraseFromParent();
 }
 
 static bool spillsCR(const MachineFunction &MF) {
@@ -422,15 +422,15 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
                                                 bool UpdateMF,
                                                 bool UseEstimate) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Get the number of bytes to allocate from the FrameInfo
   unsigned FrameSize =
-    UseEstimate ? MFI->estimateStackSize(MF) : MFI->getStackSize();
+    UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
 
   // Get stack alignments. The frame must be aligned to the greatest of these:
   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
-  unsigned MaxAlign = MFI->getMaxAlignment(); // algmt required by data in frame
+  unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
 
   const PPCRegisterInfo *RegInfo =
@@ -448,18 +448,18 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
        !Subtarget.isSVR4ABI() ||                   //   allocated locals.
         FrameSize == 0) &&
       FrameSize <= 224 &&                          // Fits in red zone.
-      !MFI->hasVarSizedObjects() &&                // No dynamic alloca.
-      !MFI->adjustsStack() &&                      // No calls.
+      !MFI.hasVarSizedObjects() &&                 // No dynamic alloca.
+      !MFI.adjustsStack() &&                       // No calls.
       !MustSaveLR(MF, LR) &&
       !RegInfo->hasBasePointer(MF)) { // No special alignment.
     // No need for frame
     if (UpdateMF)
-      MFI->setStackSize(0);
+      MFI.setStackSize(0);
     return 0;
   }
 
   // Get the maximum call frame size of all the calls.
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+  unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
 
   // Maximum call frame needs to be at least big enough for linkage area.
   unsigned minCallFrameSize = getLinkageSize();
@@ -467,12 +467,12 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
   // that allocations will be aligned.
-  if (MFI->hasVarSizedObjects())
+  if (MFI.hasVarSizedObjects())
     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
 
   // Update maximum call frame size.
   if (UpdateMF)
-    MFI->setMaxCallFrameSize(maxCallFrameSize);
+    MFI.setMaxCallFrameSize(maxCallFrameSize);
 
   // Include call frame size in total.
   FrameSize += maxCallFrameSize;
@@ -482,7 +482,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 
   // Update frame info.
   if (UpdateMF)
-    MFI->setStackSize(FrameSize);
+    MFI.setStackSize(FrameSize);
 
   return FrameSize;
 }
@@ -490,18 +490,18 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
 // hasFP - Return true if the specified function actually has a dedicated frame
 // pointer register.
 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   // FIXME: This is pretty much broken by design: hasFP() might be called really
   // early, before the stack layout was calculated and thus hasFP() might return
   // true or false here depending on the time of call.
-  return (MFI->getStackSize()) && needsFP(MF);
+  return (MFI.getStackSize()) && needsFP(MF);
 }
 
 // needsFP - Return true if the specified function should have a dedicated frame
 // pointer register.  This is true if the function has variable sized allocas or
 // if frame pointer elimination is disabled.
 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Naked functions have no stack frame pushed, so we don't have a frame
   // pointer.
@@ -509,8 +509,7 @@ bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
     return false;
 
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
-    MFI->hasVarSizedObjects() ||
-    MFI->hasStackMap() || MFI->hasPatchPoint() ||
+    MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
     (MF.getTarget().Options.GuaranteedTailCallOpt &&
      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
 }
@@ -671,8 +670,8 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
   unsigned FrameSize = determineFrameLayout(MF, false);
   int NegFrameSize = -FrameSize;
   bool IsLargeFrame = !isInt<16>(NegFrameSize);
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned MaxAlign = MFI.getMaxAlignment();
   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
 
   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
@@ -694,7 +693,7 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const PPCInstrInfo &TII =
       *static_cast<const PPCInstrInfo *>(Subtarget.getInstrInfo());
   const PPCRegisterInfo *RegInfo =
@@ -719,7 +718,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   if (!isSVR4ABI)
     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
-        HandleVRSaveUpdate(MBBI, TII);
+        HandleVRSaveUpdate(*MBBI, TII);
         break;
       }
     }
@@ -733,7 +732,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   if (!isInt<32>(NegFrameSize))
     llvm_unreachable("Unhandled stack size!");
 
-  if (MFI->isFrameAddressTaken())
+  if (MFI.isFrameAddressTaken())
     replaceFPWithRealFP(MF);
 
   // Check if the link register (LR) must be saved.
@@ -779,7 +778,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
 
-  // Using the same bool variable as below to supress compiler warnings.
+  // Using the same bool variable as below to suppress compiler warnings.
   bool SingleScratchReg =
     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
                         &ScratchReg, &TempReg);
@@ -793,10 +792,10 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   int FPOffset = 0;
   if (HasFP) {
     if (isSVR4ABI) {
-      MachineFrameInfo *FFI = MF.getFrameInfo();
+      MachineFrameInfo &MFI = MF.getFrameInfo();
       int FPIndex = FI->getFramePointerSaveIndex();
       assert(FPIndex && "No Frame Pointer Save Slot!");
-      FPOffset = FFI->getObjectOffset(FPIndex);
+      FPOffset = MFI.getObjectOffset(FPIndex);
     } else {
       FPOffset = getFramePointerSaveOffset();
     }
@@ -805,10 +804,10 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   int BPOffset = 0;
   if (HasBP) {
     if (isSVR4ABI) {
-      MachineFrameInfo *FFI = MF.getFrameInfo();
+      MachineFrameInfo &MFI = MF.getFrameInfo();
       int BPIndex = FI->getBasePointerSaveIndex();
       assert(BPIndex && "No Base Pointer Save Slot!");
-      BPOffset = FFI->getObjectOffset(BPIndex);
+      BPOffset = MFI.getObjectOffset(BPIndex);
     } else {
       BPOffset = getBasePointerSaveOffset();
     }
@@ -816,14 +815,14 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
 
   int PBPOffset = 0;
   if (FI->usesPICBase()) {
-    MachineFrameInfo *FFI = MF.getFrameInfo();
+    MachineFrameInfo &MFI = MF.getFrameInfo();
     int PBPIndex = FI->getPICBasePointerSaveIndex();
     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
-    PBPOffset = FFI->getObjectOffset(PBPIndex);
+    PBPOffset = MFI.getObjectOffset(PBPIndex);
   }
 
   // Get stack alignments.
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned MaxAlign = MFI.getMaxAlignment();
   if (HasBP && MaxAlign > 1)
     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
            "Invalid alignment!");
@@ -1106,12 +1105,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
       // because if the stack needed aligning then CFA won't be at a fixed
       // offset from FP/SP.
       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
-      CFIIndex = MMI.addFrameInst(
+      CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
     } else {
       // Adjust the definition of CFA to account for the change in SP.
       assert(NegFrameSize);
-      CFIIndex = MMI.addFrameInst(
+      CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
     }
     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1120,7 +1119,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     if (HasFP) {
       // Describe where FP was saved, at a fixed offset from CFA.
       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
-      CFIIndex = MMI.addFrameInst(
+      CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -1129,7 +1128,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     if (FI->usesPICBase()) {
       // Describe where FP was saved, at a fixed offset from CFA.
       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
-      CFIIndex = MMI.addFrameInst(
+      CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -1138,7 +1137,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     if (HasBP) {
       // Describe where BP was saved, at a fixed offset from CFA.
       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
-      CFIIndex = MMI.addFrameInst(
+      CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -1147,7 +1146,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
     if (MustSaveLR) {
       // Describe where LR was saved, at a fixed offset from CFA.
       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
-      CFIIndex = MMI.addFrameInst(
+      CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -1164,7 +1163,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
       // Change the definition of CFA from SP+offset to FP+offset, because SP
       // will change at every alloca.
       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
-      unsigned CFIIndex = MMI.addFrameInst(
+      unsigned CFIIndex = MF.addFrameInst(
           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
 
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
@@ -1175,7 +1174,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
   if (needsCFI) {
     // Describe where callee saved registers were saved, at fixed offsets from
     // CFA.
-    const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+    const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
       unsigned Reg = CSI[I].getReg();
       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
@@ -1198,15 +1197,15 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
         // the whole CR word.  In the ELFv2 ABI, every CR that was
         // actually saved gets its own CFI record.
         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
-        unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
         continue;
       }
 
-      int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
-      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+      int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
           nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
           .addCFIIndex(CFIIndex);
@@ -1228,10 +1227,10 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
       static_cast<const PPCRegisterInfo *>(Subtarget.getRegisterInfo());
 
   // Get alignment info so we know how to restore the SP.
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Get the number of bytes allocated from the FrameInfo.
-  int FrameSize = MFI->getStackSize();
+  int FrameSize = MFI.getStackSize();
 
   // Get processor type.
   bool isPPC64 = Subtarget.isPPC64();
@@ -1272,7 +1271,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
 
   int FPOffset = 0;
 
-  // Using the same bool variable as below to supress compiler warnings.
+  // Using the same bool variable as below to suppress compiler warnings.
   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
                                               &TempReg);
   assert(SingleScratchReg &&
@@ -1284,7 +1283,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
     if (isSVR4ABI) {
       int FPIndex = FI->getFramePointerSaveIndex();
       assert(FPIndex && "No Frame Pointer Save Slot!");
-      FPOffset = MFI->getObjectOffset(FPIndex);
+      FPOffset = MFI.getObjectOffset(FPIndex);
     } else {
       FPOffset = getFramePointerSaveOffset();
     }
@@ -1295,7 +1294,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
     if (isSVR4ABI) {
       int BPIndex = FI->getBasePointerSaveIndex();
       assert(BPIndex && "No Base Pointer Save Slot!");
-      BPOffset = MFI->getObjectOffset(BPIndex);
+      BPOffset = MFI.getObjectOffset(BPIndex);
     } else {
       BPOffset = getBasePointerSaveOffset();
     }
@@ -1305,7 +1304,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
   if (FI->usesPICBase()) {
     int PBPIndex = FI->getPICBasePointerSaveIndex();
     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
-    PBPOffset = MFI->getObjectOffset(PBPIndex);
+    PBPOffset = MFI.getObjectOffset(PBPIndex);
   }
 
   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
@@ -1380,7 +1379,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
           .addReg(FPReg)
           .addReg(ScratchReg);
       }
-    } else if (!isLargeFrame && !HasBP && !MFI->hasVarSizedObjects()) {
+    } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
       if (HasRedZone) {
         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
           .addReg(SPReg)
@@ -1603,14 +1602,14 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
   int FPSI = FI->getFramePointerSaveIndex();
   bool isPPC64 = Subtarget.isPPC64();
   bool isDarwinABI  = Subtarget.isDarwinABI();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // If the frame pointer save index hasn't been defined yet.
   if (!FPSI && needsFP(MF)) {
     // Find out what the fix offset of the frame pointer save area.
     int FPOffset = getFramePointerSaveOffset();
     // Allocate the frame index for frame pointer save area.
-    FPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);
   }
@@ -1619,7 +1618,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
   if (!BPSI && RegInfo->hasBasePointer(MF)) {
     int BPOffset = getBasePointerSaveOffset();
     // Allocate the frame index for the base pointer save area.
-    BPSI = MFI->CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
+    BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
     // Save the result.
     FI->setBasePointerSaveIndex(BPSI);
   }
@@ -1627,7 +1626,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
   // Reserve stack space for the PIC Base register (R30).
   // Only used in SVR4 32-bit.
   if (FI->usesPICBase()) {
-    int PBPSI = MFI->CreateFixedObject(4, -8, true);
+    int PBPSI = MFI.CreateFixedObject(4, -8, true);
     FI->setPICBasePointerSaveIndex(PBPSI);
   }
 
@@ -1646,7 +1645,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
   int TCSPDelta = 0;
   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
-    MFI->CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
+    MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
   }
 
   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
@@ -1655,7 +1654,7 @@ void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
       (SavedRegs.test(PPC::CR2) ||
        SavedRegs.test(PPC::CR3) ||
        SavedRegs.test(PPC::CR4))) {
-    int FrameIdx = MFI->CreateFixedObject((uint64_t)4, (int64_t)-4, true);
+    int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
     FI->setCRSpillFrameIndex(FrameIdx);
   }
 }
@@ -1669,15 +1668,15 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
   }
 
   // Get callee saved register information.
-  MachineFrameInfo *FFI = MF.getFrameInfo();
-  const std::vector<CalleeSavedInfo> &CSI = FFI->getCalleeSavedInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
 
   // If the function is shrink-wrapped, and if the function has a tail call, the
   // tail call might not be in the new RestoreBlock, so real branch instruction
   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
   // RestoreBlock. So we handle this case here.
-  if (FFI->getSavePoint() && FFI->hasTailCall()) {
-    MachineBasicBlock *RestoreBlock = FFI->getRestorePoint();
+  if (MFI.getSavePoint() && MFI.hasTailCall()) {
+    MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
     for (MachineBasicBlock &MBB : MF) {
       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
         createTailCallBranchInstr(MBB);
@@ -1768,7 +1767,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
       int FI = FPRegs[i].getFrameIdx();
 
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
     }
 
     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
@@ -1782,7 +1781,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     int FI = PFI->getFramePointerSaveIndex();
     assert(FI && "No Frame Pointer Save Slot!");
 
-    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
   }
 
   if (PFI->usesPICBase()) {
@@ -1791,7 +1790,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     int FI = PFI->getPICBasePointerSaveIndex();
     assert(FI && "No PIC Base Pointer Save Slot!");
 
-    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
   }
 
   const PPCRegisterInfo *RegInfo =
@@ -1802,7 +1801,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     int FI = PFI->getBasePointerSaveIndex();
     assert(FI && "No Base Pointer Save Slot!");
 
-    FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+    MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
   }
 
   // General register save area starts right below the Floating-point
@@ -1813,7 +1812,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
       int FI = GPRegs[i].getFrameIdx();
 
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
     }
 
     // Move general register save area spill slots down, taking into account
@@ -1821,7 +1820,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
       int FI = G8Regs[i].getFrameIdx();
 
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
     }
 
     unsigned MinReg =
@@ -1852,7 +1851,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
                PPC::CRRCRegClass.contains(Reg)))) {
         int FI = CSI[i].getFrameIdx();
 
-        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
       }
     }
 
@@ -1869,7 +1868,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
       if (PPC::VRSAVERCRegClass.contains(Reg)) {
         int FI = CSI[i].getFrameIdx();
 
-        FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+        MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
       }
     }
 
@@ -1883,7 +1882,7 @@ void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
       int FI = VRegs[i].getFrameIdx();
 
-      FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI));
+      MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
     }
   }
 
@@ -1907,25 +1906,25 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
   // because we've not yet computed callee-saved register spills or the
   // needed alignment padding.
   unsigned StackSize = determineFrameLayout(MF, false, true);
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  if (MFI->hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
     const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
     const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
     const TargetRegisterClass *RC = Subtarget.isPPC64() ? G8RC : GPRC;
-    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),
-                                                       false));
+    RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
+                                                      RC->getAlignment(),
+                                                      false));
 
     // Might we have over-aligned allocas?
-    bool HasAlVars = MFI->hasVarSizedObjects() &&
-                     MFI->getMaxAlignment() > getStackAlignment();
+    bool HasAlVars = MFI.hasVarSizedObjects() &&
+                     MFI.getMaxAlignment() > getStackAlignment();
 
     // These kinds of spills might need two registers.
     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
-      RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                         RC->getAlignment(),
-                                                         false));
+      RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
+                                                        RC->getAlignment(),
+                                                        false));
 
   }
 }
@@ -2049,8 +2048,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
-      MachineInstr *MI = I;
-      const DebugLoc &dl = MI->getDebugLoc();
+      const DebugLoc &dl = I->getDebugLoc();
 
       if (isInt<16>(CalleeAmt)) {
         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index caab67d68b17..f327396370f6 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -226,7 +226,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {
   // group-terminating nop, the group is complete.
   // FIXME: the same for P9 as previous gen until POWER9 scheduling is ready
   if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||
-      Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR8 ||
+      Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 ||
       CurSlots == 6) {
     CurGroup.clear();
     CurSlots = CurBranches = 0;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 0e9b2daa0cb5..1e51c1f651c9 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -215,7 +215,7 @@ namespace {
 
     void InsertVRSaveCode(MachineFunction &MF);
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "PowerPC DAG->DAG Pattern Instruction Selection";
     }
 
@@ -334,12 +334,12 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
         }
       } else {
         GlobalBaseReg =
-          RegInfo->createVirtualRegister(&PPC::GPRC_NOR0RegClass);
+          RegInfo->createVirtualRegister(&PPC::GPRC_and_GPRC_NOR0RegClass);
         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR));
         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
       }
     } else {
-      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_NOX0RegClass);
+      GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
     }
@@ -633,6 +633,13 @@ static unsigned getInt64CountDirect(int64_t Imm) {
   // If no shift, we're done.
   if (!Shift) return Result;
 
+  // If Hi word == Lo word,
+  // we can use rldimi to insert the Lo word into Hi word.
+  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
+    ++Result;
+    return Result;
+  }
+
   // Shift for next step if the upper 32-bits were not zero.
   if (Imm)
     ++Result;
@@ -731,6 +738,14 @@ static SDNode *getInt64Direct(SelectionDAG *CurDAG, const SDLoc &dl,
   // If no shift, we're done.
   if (!Shift) return Result;
 
+  // If Hi word == Lo word,
+  // we can use rldimi to insert the Lo word into Hi word.
+  if ((unsigned)(Imm & 0xFFFFFFFF) == Remainder) {
+    SDValue Ops[] =
+      { SDValue(Result, 0), SDValue(Result, 0), getI32Imm(Shift), getI32Imm(0)};
+    return CurDAG->getMachineNode(PPC::RLDIMI, dl, MVT::i64, Ops);
+  }
+
   // Shift for next step if the upper 32-bits were not zero.
   if (Imm) {
     Result = CurDAG->getMachineNode(PPC::RLDICR, dl, MVT::i64,
@@ -912,84 +927,95 @@ class BitPermutationSelector {
     }
   };
 
-  // Return true if something interesting was deduced, return false if we're
+  using ValueBitsMemoizedValue = std::pair<bool, SmallVector<ValueBit, 64>>;
+  using ValueBitsMemoizer =
+      DenseMap<SDValue, std::unique_ptr<ValueBitsMemoizedValue>>;
+  ValueBitsMemoizer Memoizer;
+
+  // Return a pair of bool and a SmallVector pointer to a memoization entry.
+  // The bool is true if something interesting was deduced, otherwise if we're
   // providing only a generic representation of V (or something else likewise
-  // uninteresting for instruction selection).
-  bool getValueBits(SDValue V, SmallVector<ValueBit, 64> &Bits) {
+  // uninteresting for instruction selection) through the SmallVector.
+  std::pair<bool, SmallVector<ValueBit, 64> *> getValueBits(SDValue V,
+                                                            unsigned NumBits) {
+    auto &ValueEntry = Memoizer[V];
+    if (ValueEntry)
+      return std::make_pair(ValueEntry->first, &ValueEntry->second);
+    ValueEntry.reset(new ValueBitsMemoizedValue());
+    bool &Interesting = ValueEntry->first;
+    SmallVector<ValueBit, 64> &Bits = ValueEntry->second;
+    Bits.resize(NumBits);
+
     switch (V.getOpcode()) {
     default: break;
     case ISD::ROTL:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
         unsigned RotAmt = V.getConstantOperandVal(1);
 
-        SmallVector<ValueBit, 64> LHSBits(Bits.size());
-        getValueBits(V.getOperand(0), LHSBits);
+        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
 
-        for (unsigned i = 0; i < Bits.size(); ++i)
-          Bits[i] = LHSBits[i < RotAmt ? i + (Bits.size() - RotAmt) : i - RotAmt];
+        for (unsigned i = 0; i < NumBits; ++i)
+          Bits[i] = LHSBits[i < RotAmt ? i + (NumBits - RotAmt) : i - RotAmt];
 
-        return true;
+        return std::make_pair(Interesting = true, &Bits);
       }
       break;
     case ISD::SHL:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
         unsigned ShiftAmt = V.getConstantOperandVal(1);
 
-        SmallVector<ValueBit, 64> LHSBits(Bits.size());
-        getValueBits(V.getOperand(0), LHSBits);
+        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
 
-        for (unsigned i = ShiftAmt; i < Bits.size(); ++i)
+        for (unsigned i = ShiftAmt; i < NumBits; ++i)
           Bits[i] = LHSBits[i - ShiftAmt];
 
         for (unsigned i = 0; i < ShiftAmt; ++i)
           Bits[i] = ValueBit(ValueBit::ConstZero);
 
-        return true;
+        return std::make_pair(Interesting = true, &Bits);
       }
       break;
     case ISD::SRL:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
         unsigned ShiftAmt = V.getConstantOperandVal(1);
 
-        SmallVector<ValueBit, 64> LHSBits(Bits.size());
-        getValueBits(V.getOperand(0), LHSBits);
+        const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
 
-        for (unsigned i = 0; i < Bits.size() - ShiftAmt; ++i)
+        for (unsigned i = 0; i < NumBits - ShiftAmt; ++i)
           Bits[i] = LHSBits[i + ShiftAmt];
 
-        for (unsigned i = Bits.size() - ShiftAmt; i < Bits.size(); ++i)
+        for (unsigned i = NumBits - ShiftAmt; i < NumBits; ++i)
           Bits[i] = ValueBit(ValueBit::ConstZero);
 
-        return true;
+        return std::make_pair(Interesting = true, &Bits);
       }
       break;
     case ISD::AND:
       if (isa<ConstantSDNode>(V.getOperand(1))) {
         uint64_t Mask = V.getConstantOperandVal(1);
 
-        SmallVector<ValueBit, 64> LHSBits(Bits.size());
-        bool LHSTrivial = getValueBits(V.getOperand(0), LHSBits);
+        const SmallVector<ValueBit, 64> *LHSBits;
+        // Mark this as interesting, only if the LHS was also interesting. This
+        // prevents the overall procedure from matching a single immediate 'and'
+        // (which is non-optimal because such an and might be folded with other
+        // things if we don't select it here).
+        std::tie(Interesting, LHSBits) = getValueBits(V.getOperand(0), NumBits);
 
-        for (unsigned i = 0; i < Bits.size(); ++i)
+        for (unsigned i = 0; i < NumBits; ++i)
           if (((Mask >> i) & 1) == 1)
-            Bits[i] = LHSBits[i];
+            Bits[i] = (*LHSBits)[i];
           else
             Bits[i] = ValueBit(ValueBit::ConstZero);
 
-        // Mark this as interesting, only if the LHS was also interesting. This
-        // prevents the overall procedure from matching a single immediate 'and'
-        // (which is non-optimal because such an and might be folded with other
-        // things if we don't select it here).
-        return LHSTrivial;
+        return std::make_pair(Interesting, &Bits);
       }
       break;
     case ISD::OR: {
-      SmallVector<ValueBit, 64> LHSBits(Bits.size()), RHSBits(Bits.size());
-      getValueBits(V.getOperand(0), LHSBits);
-      getValueBits(V.getOperand(1), RHSBits);
+      const auto &LHSBits = *getValueBits(V.getOperand(0), NumBits).second;
+      const auto &RHSBits = *getValueBits(V.getOperand(1), NumBits).second;
 
       bool AllDisjoint = true;
-      for (unsigned i = 0; i < Bits.size(); ++i)
+      for (unsigned i = 0; i < NumBits; ++i)
         if (LHSBits[i].isZero())
           Bits[i] = RHSBits[i];
         else if (RHSBits[i].isZero())
@@ -1002,14 +1028,14 @@ class BitPermutationSelector {
       if (!AllDisjoint)
         break;
 
-      return true;
+      return std::make_pair(Interesting = true, &Bits);
     }
     }
 
-    for (unsigned i = 0; i < Bits.size(); ++i)
+    for (unsigned i = 0; i < NumBits; ++i)
       Bits[i] = ValueBit(V, i);
 
-    return false;
+    return std::make_pair(Interesting = false, &Bits);
   }
 
   // For each value (except the constant ones), compute the left-rotate amount
@@ -1648,9 +1674,12 @@ class BitPermutationSelector {
 
       unsigned NumRLInsts = 0;
       bool FirstBG = true;
+      bool MoreBG = false;
       for (auto &BG : BitGroups) {
-        if (!MatchingBG(BG))
+        if (!MatchingBG(BG)) {
+          MoreBG = true;
           continue;
+        }
         NumRLInsts +=
           SelectRotMask64Count(BG.RLAmt, BG.Repl32, BG.StartIdx, BG.EndIdx,
                                !FirstBG);
@@ -1668,7 +1697,10 @@ class BitPermutationSelector {
       // because that exposes more opportunities for CSE.
       if (NumAndInsts > NumRLInsts)
         continue;
-      if (Use32BitInsts && NumAndInsts == NumRLInsts)
+      // When merging multiple bit groups, instruction or is used.
+      // But when rotate is used, rldimi can inert the rotated value into any
+      // register, so instruction or can be avoided.
+      if ((Use32BitInsts || MoreBG) && NumAndInsts == NumRLInsts)
         continue;
 
       DEBUG(dbgs() << "\t\t\t\tusing masking\n");
@@ -1886,8 +1918,7 @@ class BitPermutationSelector {
   }
 
   void eraseMatchingBitGroups(function_ref<bool(const BitGroup &)> F) {
-    BitGroups.erase(std::remove_if(BitGroups.begin(), BitGroups.end(), F),
-                    BitGroups.end());
+    BitGroups.erase(remove_if(BitGroups, F), BitGroups.end());
   }
 
   SmallVector<ValueBit, 64> Bits;
@@ -1910,9 +1941,12 @@ public:
   // rotate-and-shift/shift/and/or instructions, using a set of heuristics
   // known to produce optimial code for common cases (like i32 byte swapping).
   SDNode *Select(SDNode *N) {
-    Bits.resize(N->getValueType(0).getSizeInBits());
-    if (!getValueBits(SDValue(N, 0), Bits))
+    Memoizer.clear();
+    auto Result =
+        getValueBits(SDValue(N, 0), N->getValueType(0).getSizeInBits());
+    if (!Result.first)
       return nullptr;
+    Bits = std::move(*Result.second);
 
     DEBUG(dbgs() << "Considering bit-permutation-based instruction"
                     " selection for:    ");
@@ -2623,6 +2657,23 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
       MB = 64 - countTrailingOnes(Imm64);
       SH = 0;
 
+      if (Val.getOpcode() == ISD::ANY_EXTEND) {
+        auto Op0 = Val.getOperand(0);
+        if ( Op0.getOpcode() == ISD::SRL &&
+           isInt32Immediate(Op0.getOperand(1).getNode(), Imm) && Imm <= MB) {
+
+           auto ResultType = Val.getNode()->getValueType(0);
+           auto ImDef = CurDAG->getMachineNode(PPC::IMPLICIT_DEF, dl,
+                                               ResultType);
+           SDValue IDVal (ImDef, 0);
+
+           Val = SDValue(CurDAG->getMachineNode(PPC::INSERT_SUBREG, dl,
+                         ResultType, IDVal, Op0.getOperand(0),
+                         getI32Imm(1, dl)), 0);
+           SH = 64 - Imm;
+        }
+      }
+
       // If the operand is a logical right shift, we can fold it into this
       // instruction: rldicl(rldicl(x, 64-n, n), 0, mb) -> rldicl(x, 64-n, mb)
       // for n <= mb. The right shift is really a left rotate followed by a
@@ -3187,7 +3238,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
           Op0.getOperand(1) == Op1.getOperand(1) && CC == ISD::SETEQ &&
           isa<ConstantSDNode>(Op0.getOperand(1))) {
 
-        unsigned Bits = Op0.getValueType().getSizeInBits();
+        unsigned Bits = Op0.getValueSizeInBits();
         if (b != Bits/8-1)
           return false;
         if (Op0.getConstantOperandVal(1) != Bits-8)
@@ -3215,9 +3266,9 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
 
         // Now we need to make sure that the upper bytes are known to be
         // zero.
-        unsigned Bits = Op0.getValueType().getSizeInBits();
-        if (!CurDAG->MaskedValueIsZero(Op0,
-              APInt::getHighBitsSet(Bits, Bits - (b+1)*8)))
+        unsigned Bits = Op0.getValueSizeInBits();
+        if (!CurDAG->MaskedValueIsZero(
+                Op0, APInt::getHighBitsSet(Bits, Bits - (b + 1) * 8)))
           return false;
 
         LHS = Op0.getOperand(0);
@@ -3250,7 +3301,7 @@ SDValue PPCDAGToDAGISel::combineToCMPB(SDNode *N) {
     } else if (Op.getOpcode() == ISD::SRL) {
       if (!isa<ConstantSDNode>(Op.getOperand(1)))
         return false;
-      unsigned Bits = Op.getValueType().getSizeInBits();
+      unsigned Bits = Op.getValueSizeInBits();
       if (b != Bits/8-1)
         return false;
       if (Op.getConstantOperandVal(1) != Bits-8)
@@ -3562,7 +3613,8 @@ void PPCDAGToDAGISel::PeepholeCROps() {
                    Op.getOperand(0) == Op.getOperand(1))
             Op2Not = true;
         }
-        }  // fallthrough
+        LLVM_FALLTHROUGH;
+      }
       case PPC::BC:
       case PPC::BCn:
       case PPC::SELECT_I4:
@@ -3989,8 +4041,9 @@ static bool PeepholePPC64ZExtGather(SDValue Op32,
     return true;
   }
 
-  // CNTLZW always produces a 64-bit value in [0,32], and so is zero extended.
-  if (Op32.getMachineOpcode() == PPC::CNTLZW) {
+  // CNT[LT]ZW always produce a 64-bit value in [0,32], and so is zero extended.
+  if (Op32.getMachineOpcode() == PPC::CNTLZW ||
+      Op32.getMachineOpcode() == PPC::CNTTZW) {
     ToPromote.insert(Op32.getNode());
     return true;
   }
@@ -4185,6 +4238,7 @@ void PPCDAGToDAGISel::PeepholePPC64ZExt() {
       case PPC::LHBRX:     NewOpcode = PPC::LHBRX8; break;
       case PPC::LWBRX:     NewOpcode = PPC::LWBRX8; break;
       case PPC::CNTLZW:    NewOpcode = PPC::CNTLZW8; break;
+      case PPC::CNTTZW:    NewOpcode = PPC::CNTTZW8; break;
       case PPC::RLWIMI:    NewOpcode = PPC::RLWIMI8; break;
       case PPC::OR:        NewOpcode = PPC::OR8; break;
       case PPC::SELECT_I4: NewOpcode = PPC::SELECT_I8; break;
@@ -4312,13 +4366,6 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     if (!Base.isMachineOpcode())
       continue;
 
-    // On targets with fusion, we don't want this to fire and remove a fusion
-    // opportunity, unless a) it results in another fusion opportunity or
-    // b) optimizing for size.
-    if (PPCSubTarget->hasFusion() &&
-        (!MF->getFunction()->optForSize() && !Base.hasOneUse()))
-      continue;
-
     unsigned Flags = 0;
     bool ReplaceFlags = true;
 
@@ -4363,15 +4410,64 @@ void PPCDAGToDAGISel::PeepholePPC64() {
     }
 
     SDValue ImmOpnd = Base.getOperand(1);
-    int MaxDisplacement = 0;
+
+    // On PPC64, the TOC base pointer is guaranteed by the ABI only to have
+    // 8-byte alignment, and so we can only use offsets less than 8 (otherwise,
+    // we might have needed different @ha relocation values for the offset
+    // pointers).
+    int MaxDisplacement = 7;
     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(ImmOpnd)) {
       const GlobalValue *GV = GA->getGlobal();
-      MaxDisplacement = GV->getAlignment() - 1;
+      MaxDisplacement = std::min((int) GV->getAlignment() - 1, MaxDisplacement);
     }
 
+    bool UpdateHBase = false;
+    SDValue HBase = Base.getOperand(0);
+
     int Offset = N->getConstantOperandVal(FirstOp);
-    if (Offset < 0 || Offset > MaxDisplacement)
-      continue;
+    if (ReplaceFlags) {
+      if (Offset < 0 || Offset > MaxDisplacement) {
+        // If we have a addi(toc@l)/addis(toc@ha) pair, and the addis has only
+        // one use, then we can do this for any offset, we just need to also
+        // update the offset (i.e. the symbol addend) on the addis also.
+        if (Base.getMachineOpcode() != PPC::ADDItocL)
+          continue;
+
+        if (!HBase.isMachineOpcode() ||
+            HBase.getMachineOpcode() != PPC::ADDIStocHA)
+          continue;
+
+        if (!Base.hasOneUse() || !HBase.hasOneUse())
+          continue;
+
+        SDValue HImmOpnd = HBase.getOperand(1);
+        if (HImmOpnd != ImmOpnd)
+          continue;
+
+        UpdateHBase = true;
+      }
+    } else {
+      // If we're directly folding the addend from an addi instruction, then:
+      //  1. In general, the offset on the memory access must be zero.
+      //  2. If the addend is a constant, then it can be combined with a
+      //     non-zero offset, but only if the result meets the encoding
+      //     requirements.
+      if (auto *C = dyn_cast<ConstantSDNode>(ImmOpnd)) {
+        Offset += C->getSExtValue();
+
+        if ((StorageOpcode == PPC::LWA || StorageOpcode == PPC::LD ||
+             StorageOpcode == PPC::STD) && (Offset % 4) != 0)
+          continue;
+
+        if (!isInt<16>(Offset))
+          continue;
+
+        ImmOpnd = CurDAG->getTargetConstant(Offset, SDLoc(ImmOpnd),
+                                            ImmOpnd.getValueType());
+      } else if (Offset != 0) {
+        continue;
+      }
+    }
 
     // We found an opportunity.  Reverse the operands from the add
     // immediate and substitute them into the load or store.  If
@@ -4414,6 +4510,10 @@ void PPCDAGToDAGISel::PeepholePPC64() {
       (void)CurDAG->UpdateNodeOperands(N, ImmOpnd, Base.getOperand(0),
                                        N->getOperand(2));
 
+    if (UpdateHBase)
+      (void)CurDAG->UpdateNodeOperands(HBase.getNode(), HBase.getOperand(0),
+                                       ImmOpnd);
+
     // The add-immediate may now be dead, in which case remove it.
     if (Base.getNode()->use_empty())
       CurDAG->RemoveDeadNode(Base.getNode());
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 9089c6a297a7..aa3ffde24b99 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -27,6 +27,7 @@
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAG.h"
@@ -216,11 +217,17 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     setOperationAction(ISD::FROUND, MVT::f32, Legal);
   }
 
-  // PowerPC does not have BSWAP, CTPOP or CTTZ
+  // PowerPC does not have BSWAP
+  // CTPOP or CTTZ were introduced in P8/P9 respectivelly
   setOperationAction(ISD::BSWAP, MVT::i32  , Expand);
-  setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
   setOperationAction(ISD::BSWAP, MVT::i64  , Expand);
-  setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  if (Subtarget.isISA3_0()) {
+    setOperationAction(ISD::CTTZ , MVT::i32  , Legal);
+    setOperationAction(ISD::CTTZ , MVT::i64  , Legal);
+  } else {
+    setOperationAction(ISD::CTTZ , MVT::i32  , Expand);
+    setOperationAction(ISD::CTTZ , MVT::i64  , Expand);
+  }
 
   if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
     setOperationAction(ISD::CTPOP, MVT::i32  , Legal);
@@ -433,6 +440,12 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
         setOperationAction(ISD::CTLZ, VT, Expand);
       }
 
+      // Vector instructions introduced in P9
+      if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
+        setOperationAction(ISD::CTTZ, VT, Legal);
+      else
+        setOperationAction(ISD::CTTZ, VT, Expand);
+
       // We promote all shuffles to v16i8.
       setOperationAction(ISD::VECTOR_SHUFFLE, VT, Promote);
       AddPromotedToType (ISD::VECTOR_SHUFFLE, VT, MVT::v16i8);
@@ -489,7 +502,6 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Expand);
       setOperationAction(ISD::FPOW, VT, Expand);
       setOperationAction(ISD::BSWAP, VT, Expand);
-      setOperationAction(ISD::CTTZ, VT, Expand);
       setOperationAction(ISD::VSELECT, VT, Expand);
       setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
       setOperationAction(ISD::ROTL, VT, Expand);
@@ -660,6 +672,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::FABS, MVT::v4f32, Legal);
       setOperationAction(ISD::FABS, MVT::v2f64, Legal);
 
+      if (Subtarget.hasDirectMove())
+        setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom);
+      setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom);
+
       addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
     }
 
@@ -1061,6 +1077,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case PPCISD::STBRX:           return "PPCISD::STBRX";
   case PPCISD::LFIWAX:          return "PPCISD::LFIWAX";
   case PPCISD::LFIWZX:          return "PPCISD::LFIWZX";
+  case PPCISD::LXSIZX:          return "PPCISD::LXSIZX";
+  case PPCISD::STXSIX:          return "PPCISD::STXSIX";
+  case PPCISD::VEXTS:           return "PPCISD::VEXTS";
   case PPCISD::LXVD2X:          return "PPCISD::LXVD2X";
   case PPCISD::STXVD2X:         return "PPCISD::STXVD2X";
   case PPCISD::COND_BRANCH:     return "PPCISD::COND_BRANCH";
@@ -1832,9 +1851,9 @@ static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
     return;
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  unsigned Align = MFI->getObjectAlignment(FrameIdx);
+  unsigned Align = MFI.getObjectAlignment(FrameIdx);
   if (Align >= 4)
     return;
 
@@ -2158,6 +2177,55 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
   return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
 }
 
+// For 64-bit PowerPC, prefer the more compact relative encodings.
+// This trades 32 bits per jump table entry for one or two instructions
+// on the jump site.
+unsigned PPCTargetLowering::getJumpTableEncoding() const {
+  if (isJumpTableRelative())
+    return MachineJumpTableInfo::EK_LabelDifference32;
+
+  return TargetLowering::getJumpTableEncoding();
+}
+
+bool PPCTargetLowering::isJumpTableRelative() const {
+  if (Subtarget.isPPC64())
+    return true;
+  return TargetLowering::isJumpTableRelative();
+}
+
+SDValue PPCTargetLowering::getPICJumpTableRelocBase(SDValue Table,
+                                                    SelectionDAG &DAG) const {
+  if (!Subtarget.isPPC64())
+    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
+
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Default:
+  case CodeModel::Small:
+  case CodeModel::Medium:
+    return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
+  default:
+    return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
+                       getPointerTy(DAG.getDataLayout()));
+  }
+}
+
+const MCExpr *
+PPCTargetLowering::getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                                unsigned JTI,
+                                                MCContext &Ctx) const {
+  if (!Subtarget.isPPC64())
+    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+
+  switch (getTargetMachine().getCodeModel()) {
+  case CodeModel::Default:
+  case CodeModel::Small:
+  case CodeModel::Medium:
+    return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
+  default:
+    return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
+  }
+}
+
 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   EVT PtrVT = Op.getValueType();
   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
@@ -2365,20 +2433,10 @@ SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // If we're comparing for equality to zero, expose the fact that this is
   // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
   // fold the new nodes.
+  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
+    return V;
+
   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-    if (C->isNullValue() && CC == ISD::SETEQ) {
-      EVT VT = Op.getOperand(0).getValueType();
-      SDValue Zext = Op.getOperand(0);
-      if (VT.bitsLT(MVT::i32)) {
-        VT = MVT::i32;
-        Zext = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Op.getOperand(0));
-      }
-      unsigned Log2b = Log2_32(VT.getSizeInBits());
-      SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Zext);
-      SDValue Scc = DAG.getNode(ISD::SRL, dl, VT, Clz,
-                                DAG.getConstant(Log2b, dl, MVT::i32));
-      return DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Scc);
-    }
     // Leave comparisons against 0 and -1 alone for now, since they're usually
     // optimized.  FIXME: revisit this when we can custom lower all setcc
     // optimizations.
@@ -2679,6 +2737,32 @@ bool llvm::CC_PPC32_SVR4_Custom_AlignArgRegs(unsigned &ValNo, MVT &ValVT,
   return false;
 }
 
+bool 
+llvm::CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
+                                                  MVT &LocVT,
+                                                  CCValAssign::LocInfo &LocInfo,
+                                                  ISD::ArgFlagsTy &ArgFlags,
+                                                  CCState &State) {
+  static const MCPhysReg ArgRegs[] = {
+    PPC::R3, PPC::R4, PPC::R5, PPC::R6,
+    PPC::R7, PPC::R8, PPC::R9, PPC::R10,
+  };
+  const unsigned NumArgRegs = array_lengthof(ArgRegs);
+
+  unsigned RegNum = State.getFirstUnallocated(ArgRegs);
+  int RegsLeft = NumArgRegs - RegNum;
+
+  // Skip if there is not enough registers left for long double type (4 gpr regs 
+  // in soft float mode) and put long double argument on the stack.
+  if (RegNum != NumArgRegs && RegsLeft < 4) {
+    for (int i = 0; i < RegsLeft; i++) {
+      State.AllocateReg(ArgRegs[RegNum + i]);
+    }
+  }
+
+  return false;
+}
+
 bool llvm::CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                                MVT &LocVT,
                                                CCValAssign::LocInfo &LocInfo,
@@ -2896,7 +2980,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
   //   AltiVec Technology Programming Interface Manual
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -2956,7 +3040,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
           break;
         case MVT::v2f64:
         case MVT::v2i64:
-          RC = &PPC::VSHRCRegClass;
+          RC = &PPC::VRRCRegClass;
           break;
         case MVT::v4f64:
           RC = &PPC::QFRCRegClass;
@@ -2980,8 +3064,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
       assert(VA.isMemLoc());
 
       unsigned ArgSize = VA.getLocVT().getStoreSize();
-      int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
-                                      isImmutable);
+      int FI = MFI.CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+                                     isImmutable);
 
       // Create load nodes to retrieve arguments from the stack.
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -3042,10 +3126,10 @@ SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
                 NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
 
     FuncInfo->setVarArgsStackOffset(
-      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                             CCInfo.getNextStackOffset(), true));
+      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
+                            CCInfo.getNextStackOffset(), true));
 
-    FuncInfo->setVarArgsFrameIndex(MFI->CreateStackObject(Depth, 8, false));
+    FuncInfo->setVarArgsFrameIndex(MFI.CreateStackObject(Depth, 8, false));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // The fixed integer arguments of a variadic function are stored to the
@@ -3118,7 +3202,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
   bool isELFv2ABI = Subtarget.isELFv2ABI();
   bool isLittleEndian = Subtarget.isLittleEndian();
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   assert(!(CallConv == CallingConv::Fast && isVarArg) &&
@@ -3139,10 +3223,6 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
-  static const MCPhysReg VSRH[] = {
-    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
-    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
-  };
 
   const unsigned Num_GPR_Regs = array_lengthof(GPR);
   const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
@@ -3231,7 +3311,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
       // pretend we have an 8-byte item at the current address for that
       // purpose.
       if (!ObjSize) {
-        int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+        int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
         SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
         InVals.push_back(FIN);
         continue;
@@ -3246,9 +3326,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
       int FI;
       if (HasParameterArea ||
           ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
-        FI = MFI->CreateFixedObject(ArgSize, ArgOffset, false, true);
+        FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
       else
-        FI = MFI->CreateStackObject(ArgSize, Align, false);
+        FI = MFI.CreateStackObject(ArgSize, Align, false);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
 
       // Handle aggregates smaller than 8 bytes.
@@ -3418,9 +3498,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
       // passed directly.  The latter are used to implement ELFv2 homogenous
       // vector aggregates.
       if (VR_idx != Num_VR_Regs) {
-        unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ?
-                        MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) :
-                        MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
+        unsigned VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
         ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
         ++VR_idx;
       } else {
@@ -3469,7 +3547,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
     if (needsLoad) {
       if (ObjSize < ArgSize && !isLittleEndian)
         CurArgOffset += ArgSize - ObjSize;
-      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
+      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
     }
@@ -3498,7 +3576,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
     int Depth = ArgOffset;
 
     FuncInfo->setVarArgsFrameIndex(
-      MFI->CreateFixedObject(PtrByteSize, Depth, true));
+      MFI.CreateFixedObject(PtrByteSize, Depth, true));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
@@ -3530,7 +3608,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
   // TODO: add description of PPC stack frame format, or at least some docs.
   //
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
@@ -3665,7 +3743,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
         CurArgOffset = CurArgOffset + (4 - ObjSize);
       }
       // The value of the object is its address.
-      int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, false, true);
+      int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, false, true);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       InVals.push_back(FIN);
       if (ObjSize==1 || ObjSize==2) {
@@ -3698,7 +3776,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
           else
             VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
-          int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true);
+          int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
           SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
           SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
           SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN,
@@ -3735,7 +3813,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
         ArgOffset += PtrByteSize;
         break;
       }
-      // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     case MVT::i64:  // PPC64
       if (GPR_idx != Num_GPR_Regs) {
         unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
@@ -3819,9 +3897,9 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
     // We need to load the argument to a virtual register if we determined above
     // that we ran out of physical registers of the appropriate type.
     if (needsLoad) {
-      int FI = MFI->CreateFixedObject(ObjSize,
-                                      CurArgOffset + (ArgSize - ObjSize),
-                                      isImmutable);
+      int FI = MFI.CreateFixedObject(ObjSize,
+                                     CurArgOffset + (ArgSize - ObjSize),
+                                     isImmutable);
       SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
       ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
     }
@@ -3852,8 +3930,8 @@ SDValue PPCTargetLowering::LowerFormalArguments_Darwin(
     int Depth = ArgOffset;
 
     FuncInfo->setVarArgsFrameIndex(
-      MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
-                             Depth, true));
+      MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
+                            Depth, true));
     SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
 
     // If this function is vararg, store any remaining integer argument regs
@@ -4037,8 +4115,7 @@ PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
     return false;
 
   // Caller contains any byval parameter is not supported.
-  if (std::any_of(Ins.begin(), Ins.end(),
-                  [](const ISD::InputArg& IA) { return IA.Flags.isByVal(); }))
+  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
     return false;
 
   // Callee contains any byval parameter is not supported, too.
@@ -4174,8 +4251,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
     bool isPPC64 = Subtarget.isPPC64();
     int SlotSize = isPPC64 ? 8 : 4;
     int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
-    int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
-                                                          NewRetAddrLoc, true);
+    int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
+                                                         NewRetAddrLoc, true);
     EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
     SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
     Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
@@ -4185,8 +4262,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain,
     // slot as the FP is never overwritten.
     if (Subtarget.isDarwinABI()) {
       int NewFPLoc = SPDiff + FL->getFramePointerSaveOffset();
-      int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
-                                                          true);
+      int NewFPIdx = MF.getFrameInfo().CreateFixedObject(SlotSize, NewFPLoc,
+                                                         true);
       SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
       Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
                            MachinePointerInfo::getFixedStack(
@@ -4203,8 +4280,8 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
                          SDValue Arg, int SPDiff, unsigned ArgOffset,
                      SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
   int Offset = ArgOffset + SPDiff;
-  uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
-  int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
+  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
   EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
   SDValue FIN = DAG.getFrameIndex(FI, VT);
   TailCallArgumentInfo Info;
@@ -4430,7 +4507,8 @@ PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain,
         LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
 
       auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
-                          ? MachineMemOperand::MOInvariant
+                          ? (MachineMemOperand::MODereferenceable |
+                             MachineMemOperand::MOInvariant)
                           : MachineMemOperand::MONone;
 
       MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
@@ -4610,7 +4688,7 @@ SDValue PPCTargetLowering::FinishCall(
             isa<ConstantSDNode>(Callee)) &&
     "Expecting an global address, external symbol, absolute value or register");
 
-    DAG.getMachineFunction().getFrameInfo()->setHasTailCall();
+    DAG.getMachineFunction().getFrameInfo().setHasTailCall();
     return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, Ops);
   }
 
@@ -5026,10 +5104,6 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
     PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
     PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
   };
-  static const MCPhysReg VSRH[] = {
-    PPC::VSH2, PPC::VSH3, PPC::VSH4, PPC::VSH5, PPC::VSH6, PPC::VSH7, PPC::VSH8,
-    PPC::VSH9, PPC::VSH10, PPC::VSH11, PPC::VSH12, PPC::VSH13
-  };
 
   const unsigned NumGPRs = array_lengthof(GPR);
   const unsigned NumFPRs = 13;
@@ -5456,13 +5530,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
           SDValue Load =
               DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
           MemOpChains.push_back(Load.getValue(1));
-
-          unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
-                           Arg.getSimpleValueType() == MVT::v2i64) ?
-                          VSRH[VR_idx] : VR[VR_idx];
-          ++VR_idx;
-
-          RegsToPass.push_back(std::make_pair(VReg, Load));
+          RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
         }
         ArgOffset += 16;
         for (unsigned i=0; i<16; i+=PtrByteSize) {
@@ -5480,12 +5548,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4(
 
       // Non-varargs Altivec params go into VRs or on the stack.
       if (VR_idx != NumVRs) {
-        unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 ||
-                         Arg.getSimpleValueType() == MVT::v2i64) ?
-                        VSRH[VR_idx] : VR[VR_idx];
-        ++VR_idx;
-
-        RegsToPass.push_back(std::make_pair(VReg, Arg));
+        RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
       } else {
         if (CallConv == CallingConv::Fast)
           ComputePtrOff();
@@ -6126,7 +6189,7 @@ SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
     // Find out what the fix offset of the frame pointer save area.
     int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
     // Allocate the frame index for frame pointer save area.
-    RASI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
+    RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
     // Save the result.
     FI->setReturnAddrSaveIndex(RASI);
   }
@@ -6149,7 +6212,7 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
     // Find out what the fix offset of the frame pointer save area.
     int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
     // Allocate the frame index for frame pointer save area.
-    FPSI = MF.getFrameInfo()->CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
+    FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
     // Save the result.
     FI->setFramePointerSaveIndex(FPSI);
   }
@@ -6183,7 +6246,7 @@ SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
   bool isPPC64 = Subtarget.isPPC64();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
-  int FI = MF.getFrameInfo()->CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
+  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
   return DAG.getFrameIndex(FI, PtrVT);
 }
 
@@ -6467,10 +6530,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
   LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
 
   return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
-                     RLI.Alignment,
-                     RLI.IsInvariant ? MachineMemOperand::MOInvariant
-                                     : MachineMemOperand::MONone,
-                     RLI.AAInfo, RLI.Ranges);
+                     RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
 }
 
 // We're trying to insert a regular store, S, and then a load, L. If the
@@ -6513,6 +6573,7 @@ bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
 
   RLI.Chain = LD->getChain();
   RLI.MPI = LD->getPointerInfo();
+  RLI.IsDereferenceable = LD->isDereferenceable();
   RLI.IsInvariant = LD->isInvariant();
   RLI.Alignment = LD->getAlignment();
   RLI.AAInfo = LD->getAAInfo();
@@ -6545,11 +6606,17 @@ void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
 /// \brief Analyze profitability of direct move
 /// prefer float load to int load plus direct move
 /// when there is no integer use of int load
-static bool directMoveIsProfitable(const SDValue &Op) {
+bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
   SDNode *Origin = Op.getOperand(0).getNode();
   if (Origin->getOpcode() != ISD::LOAD)
     return true;
 
+  // If there is no LXSIBZX/LXSIHZX, like Power8,
+  // prefer direct move if the memory size is 1 or 2 bytes.
+  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
+  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
+    return true;
+
   for (SDNode::use_iterator UI = Origin->use_begin(),
                             UE = Origin->use_end();
        UI != UE; ++UI) {
@@ -6705,11 +6772,8 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
 
     MachineFunction &MF = DAG.getMachineFunction();
     if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
-      Bits =
-          DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI, RLI.Alignment,
-                      RLI.IsInvariant ? MachineMemOperand::MOInvariant
-                                      : MachineMemOperand::MONone,
-                      RLI.AAInfo, RLI.Ranges);
+      Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
+                         RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
       spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
     } else if (Subtarget.hasLFIWAX() &&
                canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
@@ -6736,10 +6800,10 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
                 (Subtarget.hasFPCVT() &&
                  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
                SINT.getOperand(0).getValueType() == MVT::i32) {
-      MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+      MachineFrameInfo &MFI = MF.getFrameInfo();
       EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
-      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+      int FrameIdx = MFI.CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
       SDValue Store =
@@ -6782,7 +6846,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
   // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
   // then lfd it and fcfid it.
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *FrameInfo = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   EVT PtrVT = getPointerTy(MF.getDataLayout());
 
   SDValue Ld;
@@ -6791,7 +6855,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
     bool ReusingLoad;
     if (!(ReusingLoad = canReuseLoadAddress(Op.getOperand(0), MVT::i32, RLI,
                                             DAG))) {
-      int FrameIdx = FrameInfo->CreateStackObject(4, 4, false);
+      int FrameIdx = MFI.CreateStackObject(4, 4, false);
       SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
       SDValue Store =
@@ -6823,7 +6887,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
     assert(Subtarget.isPPC64() &&
            "i32->FP without LFIWAX supported only on PPC64");
 
-    int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
+    int FrameIdx = MFI.CreateStackObject(8, 8, false);
     SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
     SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64,
@@ -6882,7 +6946,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, None);
 
   // Save FP register to stack slot
-  int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
+  int SSFI = MF.getFrameInfo().CreateStackObject(8, 8, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain, StackSlot,
                                MachinePointerInfo());
@@ -7068,6 +7132,57 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
   return DAG.getNode(ISD::BITCAST, dl, VT, T);
 }
 
+/// Do we have an efficient pattern in a .td file for this node?
+///
+/// \param V - pointer to the BuildVectorSDNode being matched
+/// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
+///
+/// There are some patterns where it is beneficial to keep a BUILD_VECTOR
+/// node as a BUILD_VECTOR node rather than expanding it. The patterns where
+/// the opposite is true (expansion is beneficial) are:
+/// - The node builds a vector out of integers that are not 32 or 64-bits
+/// - The node builds a vector out of constants
+/// - The node is a "load-and-splat"
+/// In all other cases, we will choose to keep the BUILD_VECTOR.
+static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
+                                            bool HasDirectMove) {
+  EVT VecVT = V->getValueType(0);
+  bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
+    (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
+  if (!RightType)
+    return false;
+
+  bool IsSplat = true;
+  bool IsLoad = false;
+  SDValue Op0 = V->getOperand(0);
+
+  // This function is called in a block that confirms the node is not a constant
+  // splat. So a constant BUILD_VECTOR here means the vector is built out of
+  // different constants.
+  if (V->isConstant())
+    return false;
+  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
+    if (V->getOperand(i).isUndef())
+      return false;
+    // We want to expand nodes that represent load-and-splat even if the
+    // loaded value is a floating point truncation or conversion to int.
+    if (V->getOperand(i).getOpcode() == ISD::LOAD ||
+        (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
+         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
+        (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
+         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
+        (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
+         V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
+      IsLoad = true;
+    // If the operands are different or the input is not a load and has more
+    // uses than just this BV node, then it isn't a splat.
+    if (V->getOperand(i) != Op0 ||
+        (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
+      IsSplat = false;
+  }
+  return !(IsSplat && IsLoad);
+}
+
 // If this is a case we can't handle, return null and let the default
 // expansion code take care of it.  If we CAN select this case, and if it
 // selects to a single instruction, return Op.  Otherwise, if we can codegen
@@ -7083,8 +7198,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // We first build an i32 vector, load it into a QPX register,
     // then convert it to a floating-point vector and compare it
     // to a zero vector to get the boolean result.
-    MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
-    int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    int FrameIdx = MFI.CreateStackObject(16, 16, false);
     MachinePointerInfo PtrInfo =
         MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -7189,8 +7304,15 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
   bool HasAnyUndefs;
   if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
                              HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
-      SplatBitSize > 32)
+      SplatBitSize > 32) {
+    // BUILD_VECTOR nodes that are not constant splats of up to 32-bits can be
+    // lowered to VSX instructions under certain conditions.
+    // Without VSX, there is no pattern more efficient than expanding the node.
+    if (Subtarget.hasVSX() &&
+        haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
+      return Op;
     return SDValue();
+  }
 
   unsigned SplatBits = APSplatBits.getZExtValue();
   unsigned SplatUndef = APSplatUndef.getZExtValue();
@@ -7208,6 +7330,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
     return Op;
   }
 
+  // We have XXSPLTIB for constant splats one byte wide
+  if (Subtarget.hasP9Vector() && SplatSize == 1) {
+    // This is a splat of 1-byte elements with some elements potentially undef.
+    // Rather than trying to match undef in the SDAG patterns, ensure that all
+    // elements are the same constant.
+    if (HasAnyUndefs || ISD::isBuildVectorAllOnes(BVN)) {
+      SmallVector<SDValue, 16> Ops(16, DAG.getConstant(SplatBits,
+                                                       dl, MVT::i32));
+      SDValue NewBV = DAG.getBuildVector(MVT::v16i8, dl, Ops);
+      if (Op.getValueType() != MVT::v16i8)
+        return DAG.getBitcast(Op.getValueType(), NewBV);
+      return NewBV;
+    }
+    return Op;
+  }
+
   // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
   int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
                     (32-SplatBitSize));
@@ -7451,6 +7589,18 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   if (Subtarget.hasVSX()) {
     if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
       int SplatIdx = PPC::getVSPLTImmediate(SVOp, 4, DAG);
+
+      // If the source for the shuffle is a scalar_to_vector that came from a
+      // 32-bit load, it will have used LXVWSX so we don't need to splat again.
+      if (Subtarget.hasP9Vector() &&
+          ((isLittleEndian && SplatIdx == 3) ||
+           (!isLittleEndian && SplatIdx == 0))) {
+        SDValue Src = V1.getOperand(0);
+        if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+            Src.getOperand(0).getOpcode() == ISD::LOAD &&
+            Src.getOperand(0).hasOneUse())
+          return V1;
+      }
       SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
       SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
                                   DAG.getConstant(SplatIdx, dl, MVT::i32));
@@ -7662,6 +7812,27 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
       return false;
 
     break;
+  case Intrinsic::ppc_altivec_vcmpneb_p:
+  case Intrinsic::ppc_altivec_vcmpneh_p:
+  case Intrinsic::ppc_altivec_vcmpnew_p:
+  case Intrinsic::ppc_altivec_vcmpnezb_p:
+  case Intrinsic::ppc_altivec_vcmpnezh_p:
+  case Intrinsic::ppc_altivec_vcmpnezw_p:
+    if (Subtarget.hasP9Altivec()) {
+      switch(IntrinsicID) {
+      default: llvm_unreachable("Unknown comparison intrinsic.");
+      case Intrinsic::ppc_altivec_vcmpneb_p: CompareOpc = 7; break;
+      case Intrinsic::ppc_altivec_vcmpneh_p: CompareOpc = 71; break;
+      case Intrinsic::ppc_altivec_vcmpnew_p: CompareOpc = 135; break;
+      case Intrinsic::ppc_altivec_vcmpnezb_p: CompareOpc = 263; break;
+      case Intrinsic::ppc_altivec_vcmpnezh_p: CompareOpc = 327; break;
+      case Intrinsic::ppc_altivec_vcmpnezw_p: CompareOpc = 391; break;
+      }
+      isDot = 1;
+    } else
+      return false;
+
+    break;
   case Intrinsic::ppc_altivec_vcmpgefp_p: CompareOpc = 454; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtfp_p: CompareOpc = 710; isDot = 1; break;
   case Intrinsic::ppc_altivec_vcmpgtsb_p: CompareOpc = 774; isDot = 1; break;
@@ -7723,6 +7894,26 @@ static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
       return false;
 
     break;
+  case Intrinsic::ppc_altivec_vcmpneb:
+  case Intrinsic::ppc_altivec_vcmpneh:
+  case Intrinsic::ppc_altivec_vcmpnew:
+  case Intrinsic::ppc_altivec_vcmpnezb:
+  case Intrinsic::ppc_altivec_vcmpnezh:
+  case Intrinsic::ppc_altivec_vcmpnezw:
+    if (Subtarget.hasP9Altivec()) {
+      switch (IntrinsicID) {
+      default: llvm_unreachable("Unknown comparison intrinsic.");
+      case Intrinsic::ppc_altivec_vcmpneb: CompareOpc = 7; break;
+      case Intrinsic::ppc_altivec_vcmpneh: CompareOpc = 71; break;
+      case Intrinsic::ppc_altivec_vcmpnew: CompareOpc = 135; break;
+      case Intrinsic::ppc_altivec_vcmpnezb: CompareOpc = 263; break;
+      case Intrinsic::ppc_altivec_vcmpnezh: CompareOpc = 327; break;
+      case Intrinsic::ppc_altivec_vcmpnezw: CompareOpc = 391; break;
+      }
+      isDot = 0;
+    } else
+      return false;
+    break;
   case Intrinsic::ppc_altivec_vcmpgefp:   CompareOpc = 454; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtfp:   CompareOpc = 710; isDot = 0; break;
   case Intrinsic::ppc_altivec_vcmpgtsb:   CompareOpc = 774; isDot = 0; break;
@@ -7857,8 +8048,8 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
                                                    SelectionDAG &DAG) const {
   SDLoc dl(Op);
   // Create a stack slot that is 16-byte aligned.
-  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = MFI.CreateStackObject(16, 16, false);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
 
@@ -7909,8 +8100,8 @@ SDValue PPCTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
     Value);
 
-  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = MFI.CreateStackObject(16, 16, false);
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -8109,8 +8300,8 @@ SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
     DAG.getConstant(Intrinsic::ppc_qpx_qvfctiwu, dl, MVT::i32),
     Value);
 
-  MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
-  int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  int FrameIdx = MFI.CreateStackObject(16, 16, false);
   MachinePointerInfo PtrInfo =
       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx);
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
@@ -8545,6 +8736,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
   // registers without caring whether they're 32 or 64, but here we're
   // doing actual arithmetic on the addresses.
   bool is64bit = Subtarget.isPPC64();
+  bool isLittleEndian = Subtarget.isLittleEndian();
   unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
 
   const BasicBlock *LLVM_BB = BB->getBasicBlock();
@@ -8574,7 +8766,8 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
                                           : &PPC::GPRCRegClass;
   unsigned PtrReg = RegInfo.createVirtualRegister(RC);
   unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
-  unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+  unsigned ShiftReg =
+    isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
   unsigned Incr2Reg = RegInfo.createVirtualRegister(RC);
   unsigned MaskReg = RegInfo.createVirtualRegister(RC);
   unsigned Mask2Reg = RegInfo.createVirtualRegister(RC);
@@ -8619,8 +8812,9 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr &MI,
   }
   BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
       .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
-  BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
-      .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+  if (!isLittleEndian)
+    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
   if (is64bit)
     BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
       .addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9325,6 +9519,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     // since we're actually doing arithmetic on them.  Other registers
     // can be 32-bit.
     bool is64bit = Subtarget.isPPC64();
+    bool isLittleEndian = Subtarget.isLittleEndian();
     bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
 
     unsigned dest = MI.getOperand(0).getReg();
@@ -9351,7 +9546,8 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                             : &PPC::GPRCRegClass;
     unsigned PtrReg = RegInfo.createVirtualRegister(RC);
     unsigned Shift1Reg = RegInfo.createVirtualRegister(RC);
-    unsigned ShiftReg = RegInfo.createVirtualRegister(RC);
+    unsigned ShiftReg =
+      isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(RC);
     unsigned NewVal2Reg = RegInfo.createVirtualRegister(RC);
     unsigned NewVal3Reg = RegInfo.createVirtualRegister(RC);
     unsigned OldVal2Reg = RegInfo.createVirtualRegister(RC);
@@ -9406,8 +9602,9 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
     }
     BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg).addReg(Ptr1Reg)
         .addImm(3).addImm(27).addImm(is8bit ? 28 : 27);
-    BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
-        .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
+    if (!isLittleEndian)
+      BuildMI(BB, dl, TII->get(is64bit ? PPC::XORI8 : PPC::XORI), ShiftReg)
+          .addReg(Shift1Reg).addImm(is8bit ? 24 : 16);
     if (is64bit)
       BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
         .addReg(Ptr1Reg).addImm(0).addImm(61);
@@ -9532,23 +9729,21 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 // Target Optimization Hooks
 //===----------------------------------------------------------------------===//
 
-static std::string getRecipOp(const char *Base, EVT VT) {
-  std::string RecipOp(Base);
+static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
+  // For the estimates, convergence is quadratic, so we essentially double the
+  // number of digits correct after every iteration. For both FRE and FRSQRTE,
+  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
+  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
+  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
   if (VT.getScalarType() == MVT::f64)
-    RecipOp += "d";
-  else
-    RecipOp += "f";
-
-  if (VT.isVector())
-    RecipOp = "vec-" + RecipOp;
-
-  return RecipOp;
+    RefinementSteps++;
+  return RefinementSteps;
 }
 
-SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
-                                            DAGCombinerInfo &DCI,
-                                            unsigned &RefinementSteps,
-                                            bool &UseOneConstNR) const {
+SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
+                                           int Enabled, int &RefinementSteps,
+                                           bool &UseOneConstNR,
+                                           bool Reciprocal) const {
   EVT VT = Operand.getValueType();
   if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
       (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
@@ -9556,21 +9751,18 @@ SDValue PPCTargetLowering::getRsqrtEstimate(SDValue Operand,
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-    std::string RecipOp = getRecipOp("sqrt", VT);
-    if (!Recips.isEnabled(RecipOp))
-      return SDValue();
+    if (RefinementSteps == ReciprocalEstimate::Unspecified)
+      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
 
-    RefinementSteps = Recips.getRefinementSteps(RecipOp);
     UseOneConstNR = true;
-    return DCI.DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
+    return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
 }
 
-SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
-                                            DAGCombinerInfo &DCI,
-                                            unsigned &RefinementSteps) const {
+SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
+                                            int Enabled,
+                                            int &RefinementSteps) const {
   EVT VT = Operand.getValueType();
   if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
       (VT == MVT::f64 && Subtarget.hasFRE()) ||
@@ -9578,13 +9770,9 @@ SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand,
       (VT == MVT::v2f64 && Subtarget.hasVSX()) ||
       (VT == MVT::v4f32 && Subtarget.hasQPX()) ||
       (VT == MVT::v4f64 && Subtarget.hasQPX())) {
-    TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-    std::string RecipOp = getRecipOp("div", VT);
-    if (!Recips.isEnabled(RecipOp))
-      return SDValue();
-
-    RefinementSteps = Recips.getRefinementSteps(RecipOp);
-    return DCI.DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
+    if (RefinementSteps == ReciprocalEstimate::Unspecified)
+      RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
+    return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
   }
   return SDValue();
 }
@@ -9635,13 +9823,13 @@ static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base,
   if (Loc.getOpcode() == ISD::FrameIndex) {
     if (BaseLoc.getOpcode() != ISD::FrameIndex)
       return false;
-    const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
     int FI  = cast<FrameIndexSDNode>(Loc)->getIndex();
     int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
-    int FS  = MFI->getObjectSize(FI);
-    int BFS = MFI->getObjectSize(BFI);
+    int FS  = MFI.getObjectSize(FI);
+    int BFS = MFI.getObjectSize(BFI);
     if (FS != BFS || FS != (int)Bytes) return false;
-    return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+    return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
   }
 
   SDValue Base1 = Loc, Base2 = BaseLoc;
@@ -9699,9 +9887,11 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
     case Intrinsic::ppc_altivec_lvx:
     case Intrinsic::ppc_altivec_lvxl:
     case Intrinsic::ppc_vsx_lxvw4x:
+    case Intrinsic::ppc_vsx_lxvw4x_be:
       VT = MVT::v4i32;
       break;
     case Intrinsic::ppc_vsx_lxvd2x:
+    case Intrinsic::ppc_vsx_lxvd2x_be:
       VT = MVT::v2f64;
       break;
     case Intrinsic::ppc_altivec_lvebx:
@@ -9748,6 +9938,12 @@ static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base,
     case Intrinsic::ppc_vsx_stxvd2x:
       VT = MVT::v2f64;
       break;
+    case Intrinsic::ppc_vsx_stxvw4x_be:
+      VT = MVT::v4i32;
+      break;
+    case Intrinsic::ppc_vsx_stxvd2x_be:
+      VT = MVT::v2f64;
+      break;
     case Intrinsic::ppc_altivec_stvebx:
       VT = MVT::i8;
       break;
@@ -9833,6 +10029,87 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) {
   return false;
 }
 
+
+/// This function is called when we have proved that a SETCC node can be replaced
+/// by subtraction (and other supporting instructions) so that the result of
+/// comparison is kept in a GPR instead of CR. This function is purely for
+/// codegen purposes and has some flags to guide the codegen process.
+static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
+                                     bool Swap, SDLoc &DL, SelectionDAG &DAG) {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  // Zero extend the operands to the largest legal integer. Originally, they
+  // must be of a strictly smaller size.
+  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
+                         DAG.getConstant(Size, DL, MVT::i32));
+  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
+                         DAG.getConstant(Size, DL, MVT::i32));
+
+  // Swap if needed. Depends on the condition code.
+  if (Swap)
+    std::swap(Op0, Op1);
+
+  // Subtract extended integers.
+  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
+
+  // Move the sign bit to the least significant position and zero out the rest.
+  // Now the least significant bit carries the result of original comparison.
+  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
+                             DAG.getConstant(Size - 1, DL, MVT::i32));
+  auto Final = Shifted;
+
+  // Complement the result if needed. Based on the condition code.
+  if (Complement)
+    Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
+                        DAG.getConstant(1, DL, MVT::i64));
+
+  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
+}
+
+SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
+                                                  DAGCombinerInfo &DCI) const {
+
+  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc DL(N);
+
+  // Size of integers being compared has a critical role in the following
+  // analysis, so we prefer to do this when all types are legal.
+  if (!DCI.isAfterLegalizeVectorOps())
+    return SDValue();
+
+  // If all users of SETCC extend its value to a legal integer type
+  // then we replace SETCC with a subtraction
+  for (SDNode::use_iterator UI = N->use_begin(),
+       UE = N->use_end(); UI != UE; ++UI) {
+    if (UI->getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+  }
+
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+  auto OpSize = N->getOperand(0).getValueSizeInBits();
+
+  unsigned Size = DAG.getDataLayout().getLargestLegalIntTypeSizeInBits();
+
+  if (OpSize < Size) {
+    switch (CC) {
+    default: break;
+    case ISD::SETULT:
+      return generateEquivalentSub(N, Size, false, false, DL, DAG);
+    case ISD::SETULE:
+      return generateEquivalentSub(N, Size, true, true, DL, DAG);
+    case ISD::SETUGT:
+      return generateEquivalentSub(N, Size, false, true, DL, DAG);
+    case ISD::SETUGE:
+      return generateEquivalentSub(N, Size, true, false, DL, DAG);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
                                                   DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -9874,7 +10151,8 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
                                  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
           !DAG.MaskedValueIsZero(N->getOperand(1),
                                  APInt::getHighBitsSet(OpBits, OpBits-1)))
-        return SDValue();
+        return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
+                                             : SDValue());
     } else {
       // This is neither a signed nor an unsigned comparison, just make sure
       // that the high bits are equal.
@@ -10398,6 +10676,173 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
       ShiftCst);
 }
 
+/// \brief Reduces the number of fp-to-int conversion when building a vector.
+///
+/// If this vector is built out of floating to integer conversions,
+/// transform it to a vector built out of floating point values followed by a
+/// single floating to integer conversion of the vector.
+/// Namely  (build_vector (fptosi $A), (fptosi $B), ...)
+/// becomes (fptosi (build_vector ($A, $B, ...)))
+SDValue PPCTargetLowering::
+combineElementTruncationToVectorTruncation(SDNode *N,
+                                           DAGCombinerInfo &DCI) const {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+         "Should be called with a BUILD_VECTOR node");
+
+  SelectionDAG &DAG = DCI.DAG;
+  SDLoc dl(N);
+
+  SDValue FirstInput = N->getOperand(0);
+  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
+         "The input operand must be an fp-to-int conversion.");
+
+  // This combine happens after legalization so the fp_to_[su]i nodes are
+  // already converted to PPCSISD nodes.
+  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
+  if (FirstConversion == PPCISD::FCTIDZ ||
+      FirstConversion == PPCISD::FCTIDUZ ||
+      FirstConversion == PPCISD::FCTIWZ ||
+      FirstConversion == PPCISD::FCTIWUZ) {
+    bool IsSplat = true;
+    bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
+      FirstConversion == PPCISD::FCTIWUZ;
+    EVT SrcVT = FirstInput.getOperand(0).getValueType();
+    SmallVector<SDValue, 4> Ops;
+    EVT TargetVT = N->getValueType(0);
+    for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
+      if (N->getOperand(i).getOpcode() != PPCISD::MFVSR)
+        return SDValue();
+      unsigned NextConversion = N->getOperand(i).getOperand(0).getOpcode();
+      if (NextConversion != FirstConversion)
+        return SDValue();
+      if (N->getOperand(i) != FirstInput)
+        IsSplat = false;
+    }
+
+    // If this is a splat, we leave it as-is since there will be only a single
+    // fp-to-int conversion followed by a splat of the integer. This is better
+    // for 32-bit and smaller ints and neutral for 64-bit ints.
+    if (IsSplat)
+      return SDValue();
+
+    // Now that we know we have the right type of node, get its operands
+    for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
+      SDValue In = N->getOperand(i).getOperand(0);
+      // For 32-bit values, we need to add an FP_ROUND node.
+      if (Is32Bit) {
+        if (In.isUndef())
+          Ops.push_back(DAG.getUNDEF(SrcVT));
+        else {
+          SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
+                                      MVT::f32, In.getOperand(0),
+                                      DAG.getIntPtrConstant(1, dl));
+          Ops.push_back(Trunc);
+        }
+      } else
+        Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
+    }
+
+    unsigned Opcode;
+    if (FirstConversion == PPCISD::FCTIDZ ||
+        FirstConversion == PPCISD::FCTIWZ)
+      Opcode = ISD::FP_TO_SINT;
+    else
+      Opcode = ISD::FP_TO_UINT;
+
+    EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
+    SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
+    return DAG.getNode(Opcode, dl, TargetVT, BV);
+  }
+  return SDValue();
+}
+
+/// \brief Reduce the number of loads when building a vector.
+///
+/// Building a vector out of multiple loads can be converted to a load
+/// of the vector type if the loads are consecutive. If the loads are
+/// consecutive but in descending order, a shuffle is added at the end
+/// to reorder the vector.
+static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG) {
+  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
+         "Should be called with a BUILD_VECTOR node");
+
+  SDLoc dl(N);
+  bool InputsAreConsecutiveLoads = true;
+  bool InputsAreReverseConsecutive = true;
+  unsigned ElemSize = N->getValueType(0).getScalarSizeInBits() / 8;
+  SDValue FirstInput = N->getOperand(0);
+  bool IsRoundOfExtLoad = false;
+
+  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
+      FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
+    LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
+    IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
+  }
+  // Not a build vector of (possibly fp_rounded) loads.
+  if (!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD)
+    return SDValue();
+
+  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
+    // If any inputs are fp_round(extload), they all must be.
+    if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
+      return SDValue();
+
+    SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
+      N->getOperand(i);
+    if (NextInput.getOpcode() != ISD::LOAD)
+      return SDValue();
+
+    SDValue PreviousInput =
+      IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
+    LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
+    LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
+
+    // If any inputs are fp_round(extload), they all must be.
+    if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
+      return SDValue();
+
+    if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
+      InputsAreConsecutiveLoads = false;
+    if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
+      InputsAreReverseConsecutive = false;
+
+    // Exit early if the loads are neither consecutive nor reverse consecutive.
+    if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
+      return SDValue();
+  }
+
+  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
+         "The loads cannot be both consecutive and reverse consecutive.");
+
+  SDValue FirstLoadOp =
+    IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
+  SDValue LastLoadOp =
+    IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
+                       N->getOperand(N->getNumOperands()-1);
+
+  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
+  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
+  if (InputsAreConsecutiveLoads) {
+    assert(LD1 && "Input needs to be a LoadSDNode.");
+    return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
+                       LD1->getBasePtr(), LD1->getPointerInfo(),
+                       LD1->getAlignment());
+  }
+  if (InputsAreReverseConsecutive) {
+    assert(LDL && "Input needs to be a LoadSDNode.");
+    SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
+                               LDL->getBasePtr(), LDL->getPointerInfo(),
+                               LDL->getAlignment());
+    SmallVector<int, 16> Ops;
+    for (int i = N->getNumOperands() - 1; i >= 0; i--)
+      Ops.push_back(i);
+
+    return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
+                                DAG.getUNDEF(N->getValueType(0)), Ops);
+  }
+  return SDValue();
+}
+
 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
                                                  DAGCombinerInfo &DCI) const {
   assert(N->getOpcode() == ISD::BUILD_VECTOR &&
@@ -10405,21 +10850,41 @@ SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
 
   SelectionDAG &DAG = DCI.DAG;
   SDLoc dl(N);
-  if (N->getValueType(0) != MVT::v2f64 || !Subtarget.hasVSX())
+
+  if (!Subtarget.hasVSX())
+    return SDValue();
+
+  // The target independent DAG combiner will leave a build_vector of
+  // float-to-int conversions intact. We can generate MUCH better code for
+  // a float-to-int conversion of a vector of floats.
+  SDValue FirstInput = N->getOperand(0);
+  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
+    SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
+    if (Reduced)
+      return Reduced;
+  }
+
+  // If we're building a vector out of consecutive loads, just load that
+  // vector type.
+  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
+  if (Reduced)
+    return Reduced;
+
+  if (N->getValueType(0) != MVT::v2f64)
     return SDValue();
 
   // Looking for:
   // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
-  if (N->getOperand(0).getOpcode() != ISD::SINT_TO_FP &&
-      N->getOperand(0).getOpcode() != ISD::UINT_TO_FP)
+  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
+      FirstInput.getOpcode() != ISD::UINT_TO_FP)
     return SDValue();
   if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
       N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
     return SDValue();
-  if (N->getOperand(0).getOpcode() != N->getOperand(1).getOpcode())
+  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
     return SDValue();
 
-  SDValue Ext1 = N->getOperand(0).getOperand(0);
+  SDValue Ext1 = FirstInput.getOperand(0);
   SDValue Ext2 = N->getOperand(1).getOperand(0);
   if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
      Ext2.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
@@ -10464,6 +10929,34 @@ SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
   SDLoc dl(N);
   SDValue Op(N, 0);
 
+  SDValue FirstOperand(Op.getOperand(0));
+  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
+    (FirstOperand.getValueType() == MVT::i8 ||
+     FirstOperand.getValueType() == MVT::i16);
+  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
+    bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
+    bool DstDouble = Op.getValueType() == MVT::f64;
+    unsigned ConvOp = Signed ?
+      (DstDouble ? PPCISD::FCFID  : PPCISD::FCFIDS) :
+      (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
+    SDValue WidthConst =
+      DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
+                            dl, false);
+    LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
+    SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
+    SDValue Ld = DAG.getMemIntrinsicNode(PPCISD::LXSIZX, dl,
+                                         DAG.getVTList(MVT::f64, MVT::Other),
+                                         Ops, MVT::i8, LDN->getMemOperand());
+
+    // For signed conversion, we need to sign-extend the value in the VSR
+    if (Signed) {
+      SDValue ExtOps[] = { Ld, WidthConst };
+      SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
+      return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
+    } else
+      return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
+  }
+
   // Don't handle ppc_fp128 here or i1 conversions.
   if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
     return SDValue();
@@ -10676,10 +11169,14 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::UINT_TO_FP:
     return combineFPToIntToFP(N, DCI);
   case ISD::STORE: {
+    EVT Op1VT = N->getOperand(1).getValueType();
+    bool ValidTypeForStoreFltAsInt = (Op1VT == MVT::i32) ||
+      (Subtarget.hasP9Vector() && (Op1VT == MVT::i8 || Op1VT == MVT::i16));
+
     // Turn STORE (FP_TO_SINT F) -> STFIWX(FCTIWZ(F)).
     if (Subtarget.hasSTFIWX() && !cast<StoreSDNode>(N)->isTruncatingStore() &&
         N->getOperand(1).getOpcode() == ISD::FP_TO_SINT &&
-        N->getOperand(1).getValueType() == MVT::i32 &&
+        ValidTypeForStoreFltAsInt &&
         N->getOperand(1).getOperand(0).getValueType() != MVT::ppcf128) {
       SDValue Val = N->getOperand(1).getOperand(0);
       if (Val.getValueType() == MVT::f32) {
@@ -10689,15 +11186,31 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
       Val = DAG.getNode(PPCISD::FCTIWZ, dl, MVT::f64, Val);
       DCI.AddToWorklist(Val.getNode());
 
-      SDValue Ops[] = {
-        N->getOperand(0), Val, N->getOperand(2),
-        DAG.getValueType(N->getOperand(1).getValueType())
-      };
+      if (Op1VT == MVT::i32) {
+        SDValue Ops[] = {
+          N->getOperand(0), Val, N->getOperand(2),
+          DAG.getValueType(N->getOperand(1).getValueType())
+        };
+
+        Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
+                DAG.getVTList(MVT::Other), Ops,
+                cast<StoreSDNode>(N)->getMemoryVT(),
+                cast<StoreSDNode>(N)->getMemOperand());
+      } else {
+        unsigned WidthInBytes =
+          N->getOperand(1).getValueType() == MVT::i8 ? 1 : 2;
+        SDValue WidthConst = DAG.getIntPtrConstant(WidthInBytes, dl, false);
+
+        SDValue Ops[] = {
+          N->getOperand(0), Val, N->getOperand(2), WidthConst,
+          DAG.getValueType(N->getOperand(1).getValueType())
+        };
+        Val = DAG.getMemIntrinsicNode(PPCISD::STXSIX, dl,
+                                      DAG.getVTList(MVT::Other), Ops,
+                                      cast<StoreSDNode>(N)->getMemoryVT(),
+                                      cast<StoreSDNode>(N)->getMemOperand());
+      }
 
-      Val = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
-              DAG.getVTList(MVT::Other), Ops,
-              cast<StoreSDNode>(N)->getMemoryVT(),
-              cast<StoreSDNode>(N)->getMemOperand());
       DCI.AddToWorklist(Val.getNode());
       return Val;
     }
@@ -10726,10 +11239,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     }
 
     // For little endian, VSX stores require generating xxswapd/lxvd2x.
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
     EVT VT = N->getOperand(1).getValueType();
     if (VT.isSimple()) {
       MVT StoreVT = VT.getSimpleVT();
-      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+      if (Subtarget.needsSwapsForVSXMemOps() &&
           (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
            StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
         return expandVSXStoreForLE(N, DCI);
@@ -10741,9 +11255,10 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     EVT VT = LD->getValueType(0);
 
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
     if (VT.isSimple()) {
       MVT LoadVT = VT.getSimpleVT();
-      if (Subtarget.hasVSX() && Subtarget.isLittleEndian() &&
+      if (Subtarget.needsSwapsForVSXMemOps() &&
           (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
            LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
         return expandVSXLoadForLE(N, DCI);
@@ -11014,11 +11529,9 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
         int Bits = IID == Intrinsic::ppc_qpx_qvlpcld ?
                    5 /* 32 byte alignment */ : 4 /* 16 byte alignment */;
 
-        if (DAG.MaskedValueIsZero(
-                Add->getOperand(1),
-                APInt::getAllOnesValue(Bits /* alignment */)
-                    .zext(
-                        Add.getValueType().getScalarType().getSizeInBits()))) {
+        if (DAG.MaskedValueIsZero(Add->getOperand(1),
+                                  APInt::getAllOnesValue(Bits /* alignment */)
+                                      .zext(Add.getScalarValueSizeInBits()))) {
           SDNode *BasePtr = Add->getOperand(0).getNode();
           for (SDNode::use_iterator UI = BasePtr->use_begin(),
                                     UE = BasePtr->use_end();
@@ -11060,7 +11573,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
     break;
   case ISD::INTRINSIC_W_CHAIN: {
     // For little endian, VSX loads require generating lxvd2x/xxswapd.
-    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
+    if (Subtarget.needsSwapsForVSXMemOps()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
@@ -11073,7 +11587,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
   }
   case ISD::INTRINSIC_VOID: {
     // For little endian, VSX stores require generating xxswapd/stxvd2x.
-    if (Subtarget.hasVSX() && Subtarget.isLittleEndian()) {
+    // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
+    if (Subtarget.needsSwapsForVSXMemOps()) {
       switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
       default:
         break;
@@ -11392,7 +11907,7 @@ unsigned PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
     uint64_t LoopSize = 0;
     for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
       for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
-        LoopSize += TII->GetInstSizeInBytes(*J);
+        LoopSize += TII->getInstSizeInBytes(*J);
         if (LoopSize > 32)
           break;
       }
@@ -11688,8 +12203,8 @@ bool PPCTargetLowering::isLegalAddressingMode(const DataLayout &DL,
 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -11726,8 +12241,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
 
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT PtrVT = getPointerTy(MF.getDataLayout());
   bool isPPC64 = PtrVT == MVT::i64;
@@ -12237,3 +12752,20 @@ void PPCTargetLowering::insertSSPDeclarations(Module &M) const {
   if (!Subtarget.isTargetLinux())
     return TargetLowering::insertSSPDeclarations(M);
 }
+
+bool PPCTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+
+  if (!VT.isSimple() || !Subtarget.hasVSX())
+    return false;
+
+  switch(VT.getSimpleVT().SimpleTy) {
+  default:
+    // For FP types that are currently not supported by PPC backend, return
+    // false. Examples: f16, f80.
+    return false;
+  case MVT::f32:
+  case MVT::f64:
+  case MVT::ppcf128:
+    return Imm.isPosZero();
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
index cc7222b24f83..d3c88482f092 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -50,6 +50,10 @@ namespace llvm {
       /// unsigned integers.
       FCTIDUZ, FCTIWUZ,
 
+      /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
+      /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
+      VEXTS,
+
       /// Reciprocal estimate instructions (unary FP ops).
       FRE, FRSQRTE,
 
@@ -365,6 +369,16 @@ namespace llvm {
       /// destination 64-bit register.
       LFIWZX,
 
+      /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
+      /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
+      /// This can be used for converting loaded integers to floating point.
+      LXSIZX,
+
+      /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
+      /// chain, then an f64 value to store, then an address to store it to,
+      /// followed by a byte-width for the store.
+      STXSIX,
+
       /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
       /// Maps directly to an lxvd2x instruction that will be followed by
       /// an xxswapd.
@@ -474,7 +488,7 @@ namespace llvm {
     /// then the VPERM for the shuffle. All in all a very slow sequence.
     TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
       const override {
-      if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
+      if (VT.getScalarSizeInBits() % 8 == 0)
         return TypeWidenVector;
       return TargetLoweringBase::getPreferredVectorAction(VT);
     }
@@ -492,6 +506,14 @@ namespace llvm {
       return true;
     }
 
+    bool isCtlzFast() const override {
+      return true;
+    }
+
+    bool hasAndNotCompare(SDValue) const override {
+      return true;
+    }
+
     bool supportSplitCSR(MachineFunction *MF) const override {
       return
         MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
@@ -747,18 +769,40 @@ namespace llvm {
     bool useLoadStackGuardNode() const override;
     void insertSSPDeclarations(Module &M) const override;
 
+    bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
+
+    unsigned getJumpTableEncoding() const override;
+    bool isJumpTableRelative() const override;
+    SDValue getPICJumpTableRelocBase(SDValue Table,
+                                     SelectionDAG &DAG) const override;
+    const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
+                                               unsigned JTI,
+                                               MCContext &Ctx) const override;
+
   private:
     struct ReuseLoadInfo {
       SDValue Ptr;
       SDValue Chain;
       SDValue ResChain;
       MachinePointerInfo MPI;
+      bool IsDereferenceable;
       bool IsInvariant;
       unsigned Alignment;
       AAMDNodes AAInfo;
       const MDNode *Ranges;
 
-      ReuseLoadInfo() : IsInvariant(false), Alignment(0), Ranges(nullptr) {}
+      ReuseLoadInfo()
+          : IsDereferenceable(false), IsInvariant(false), Alignment(0),
+            Ranges(nullptr) {}
+
+      MachineMemOperand::Flags MMOFlags() const {
+        MachineMemOperand::Flags F = MachineMemOperand::MONone;
+        if (IsDereferenceable)
+          F |= MachineMemOperand::MODereferenceable;
+        if (IsInvariant)
+          F |= MachineMemOperand::MOInvariant;
+        return F;
+      }
     };
 
     bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
@@ -771,6 +815,8 @@ namespace llvm {
                                 SelectionDAG &DAG, const SDLoc &dl) const;
     SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
                                      const SDLoc &dl) const;
+
+    bool directMoveIsProfitable(const SDValue &Op) const;
     SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
                                      const SDLoc &dl) const;
 
@@ -933,14 +979,23 @@ namespace llvm {
     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
     SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
 
-    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps,
-                             bool &UseOneConstNR) const override;
-    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps) const override;
+    /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
+    /// SETCC with integer subtraction when (1) there is a legal way of doing it
+    /// (2) keeping the result of comparison in GPR has performance benefit.
+    SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
+
+    SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                            int &RefinementSteps, bool &UseOneConstNR,
+                            bool Reciprocal) const override;
+    SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                             int &RefinementSteps) const override;
     unsigned combineRepeatedFPDivisors() const override;
 
     CCAssignFn *useFastISelCCs(unsigned Flag) const;
+
+    SDValue
+      combineElementTruncationToVectorTruncation(SDNode *N,
+                                                 DAGCombinerInfo &DCI) const;
   };
 
   namespace PPC {
@@ -959,6 +1014,13 @@ namespace llvm {
                                          ISD::ArgFlagsTy &ArgFlags,
                                          CCState &State);
 
+  bool 
+  CC_PPC32_SVR4_Custom_SkipLastArgRegsPPCF128(unsigned &ValNo, MVT &ValVT,
+                                                 MVT &LocVT,
+                                                 CCValAssign::LocInfo &LocInfo,
+                                                 ISD::ArgFlagsTy &ArgFlags,
+                                                 CCState &State);
+
   bool CC_PPC32_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, MVT &ValVT,
                                            MVT &LocVT,
                                            CCValAssign::LocInfo &LocInfo,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 5e514c8e8cf6..03b2257a88a8 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -65,16 +65,6 @@ def SRL64 : SDNodeXForm<imm, [{
                            : getI32Imm(0, SDLoc(N));
 }]>;
 
-def HI32_48 : SDNodeXForm<imm, [{
-  // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned short)(N->getZExtValue() >> 32, SDLoc(N)));
-}]>;
-
-def HI48_64 : SDNodeXForm<imm, [{
-  // Transformation function: shift the immediate value down into the low bits.
-  return getI32Imm((unsigned short)(N->getZExtValue() >> 48, SDLoc(N)));
-}]>;
-
 
 //===----------------------------------------------------------------------===//
 // Calls.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
index e1c4673c2d7f..5c022749ad64 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td
@@ -26,6 +26,7 @@
 // ** in PPCVSXSwapRemoval::gatherVectorInstructions().                      **
 // ****************************************************************************
 
+
 //===----------------------------------------------------------------------===//
 // Altivec transformation functions and pattern fragments.
 //
@@ -242,7 +243,7 @@ def VSPLTISB_get_imm : SDNodeXForm<build_vector, [{
   return PPC::get_VSPLTI_elt(N, 1, *CurDAG);
 }]>;
 def vecspltisb : PatLeaf<(build_vector), [{
-  return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != 0;
+  return PPC::get_VSPLTI_elt(N, 1, *CurDAG).getNode() != nullptr;
 }], VSPLTISB_get_imm>;
 
 // VSPLTISH_get_imm xform function: convert build_vector to VSPLTISH imm.
@@ -250,7 +251,7 @@ def VSPLTISH_get_imm : SDNodeXForm<build_vector, [{
   return PPC::get_VSPLTI_elt(N, 2, *CurDAG);
 }]>;
 def vecspltish : PatLeaf<(build_vector), [{
-  return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != 0;
+  return PPC::get_VSPLTI_elt(N, 2, *CurDAG).getNode() != nullptr;
 }], VSPLTISH_get_imm>;
 
 // VSPLTISW_get_imm xform function: convert build_vector to VSPLTISW imm.
@@ -258,7 +259,7 @@ def VSPLTISW_get_imm : SDNodeXForm<build_vector, [{
   return PPC::get_VSPLTI_elt(N, 4, *CurDAG);
 }]>;
 def vecspltisw : PatLeaf<(build_vector), [{
-  return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != 0;
+  return PPC::get_VSPLTI_elt(N, 4, *CurDAG).getNode() != nullptr;
 }], VSPLTISW_get_imm>;
 
 //===----------------------------------------------------------------------===//
@@ -706,6 +707,12 @@ def VSPLTW : VXForm_1<652, (outs vrrc:$vD), (ins u5imm:$UIMM, vrrc:$vB),
                       "vspltw $vD, $vB, $UIMM", IIC_VecPerm,
                       [(set v16i8:$vD, 
                         (vspltw_shuffle:$UIMM v16i8:$vB, (undef)))]>;
+let isCodeGenOnly = 1 in {
+  def VSPLTBs : VXForm_1<524, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
+                         "vspltb $vD, $vB, $UIMM", IIC_VecPerm, []>;
+  def VSPLTHs : VXForm_1<588, (outs vrrc:$vD), (ins u5imm:$UIMM, vfrc:$vB),
+                         "vsplth $vD, $vB, $UIMM", IIC_VecPerm, []>;
+}
 
 def VSR    : VX1_Int_Ty< 708, "vsr"  , int_ppc_altivec_vsr,  v4i32>;
 def VSRO   : VX1_Int_Ty<1100, "vsro" , int_ppc_altivec_vsro, v4i32>;
@@ -1218,34 +1225,23 @@ def VSBOX : VXBX_Int_Ty<1480, "vsbox", int_ppc_altivec_crypto_vsbox, v2i64>;
 def HasP9Altivec : Predicate<"PPCSubTarget->hasP9Altivec()">;
 let Predicates = [HasP9Altivec] in {
 
-// Vector Compare Not Equal (Zero)
-class P9VCMP<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
-              IIC_VecFPCompare, []>;
-class P9VCMPo<bits<10> xo, string asmstr, ValueType Ty>
-  : VXRForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB), asmstr,
-              IIC_VecFPCompare, []> {
-  let Defs = [CR6];
-  let RC = 1;
-}
-
 // i8 element comparisons.
-def VCMPNEB   : P9VCMP <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;
-def VCMPNEBo  : P9VCMPo<  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
-def VCMPNEZB  : P9VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
-def VCMPNEZBo : P9VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
+def VCMPNEB   : VCMP   <  7, "vcmpneb $vD, $vA, $vB"  , v16i8>;
+def VCMPNEBo  : VCMPo  <  7, "vcmpneb. $vD, $vA, $vB" , v16i8>;
+def VCMPNEZB  : VCMP <263, "vcmpnezb $vD, $vA, $vB" , v16i8>;
+def VCMPNEZBo : VCMPo<263, "vcmpnezb. $vD, $vA, $vB", v16i8>;
 
 // i16 element comparisons.
-def VCMPNEH   : P9VCMP < 71, "vcmpneh $vD, $vA, $vB"  , v8i16>;
-def VCMPNEHo  : P9VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
-def VCMPNEZH  : P9VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
-def VCMPNEZHo : P9VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
+def VCMPNEH   : VCMP < 71, "vcmpneh $vD, $vA, $vB"  , v8i16>;
+def VCMPNEHo  : VCMPo< 71, "vcmpneh. $vD, $vA, $vB" , v8i16>;
+def VCMPNEZH  : VCMP <327, "vcmpnezh $vD, $vA, $vB" , v8i16>;
+def VCMPNEZHo : VCMPo<327, "vcmpnezh. $vD, $vA, $vB", v8i16>;
 
 // i32 element comparisons.
-def VCMPNEW   : P9VCMP <135, "vcmpnew $vD, $vA, $vB"  , v4i32>;
-def VCMPNEWo  : P9VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
-def VCMPNEZW  : P9VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
-def VCMPNEZWo : P9VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
+def VCMPNEW   : VCMP <135, "vcmpnew $vD, $vA, $vB"  , v4i32>;
+def VCMPNEWo  : VCMPo<135, "vcmpnew. $vD, $vA, $vB" , v4i32>;
+def VCMPNEZW  : VCMP <391, "vcmpnezw $vD, $vA, $vB" , v4i32>;
+def VCMPNEZWo : VCMPo<391, "vcmpnezw. $vD, $vA, $vB", v4i32>;
 
 // VX-Form: [PO VRT / UIM VRB XO].
 // We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
@@ -1281,17 +1277,28 @@ def VINSERTD : VX1_VT5_UIM5_VB5<973, "vinsertd", []>;
 class VX_VT5_EO5_VB5<bits<11> xo, bits<5> eo, string opc, list<dag> pattern>
   : VXForm_RD5_XO5_RS5<xo, eo, (outs vrrc:$vD), (ins vrrc:$vB),
                        !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
+class VX_VT5_EO5_VB5s<bits<11> xo, bits<5> eo, string opc, list<dag> pattern>
+  : VXForm_RD5_XO5_RS5<xo, eo, (outs vfrc:$vD), (ins vfrc:$vB),
+                       !strconcat(opc, " $vD, $vB"), IIC_VecGeneral, pattern>;
 
 // Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]
-def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs g8rc:$rD), (ins vrrc:$vB),
-                                  "vclzlsbb $rD, $vB", IIC_VecGeneral, []>;
-def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs g8rc:$rD), (ins vrrc:$vB),
-                                  "vctzlsbb $rD, $vB", IIC_VecGeneral, []>;
+def VCLZLSBB : VXForm_RD5_XO5_RS5<1538, 0, (outs gprc:$rD), (ins vrrc:$vB),
+                                  "vclzlsbb $rD, $vB", IIC_VecGeneral,
+                                  [(set i32:$rD, (int_ppc_altivec_vclzlsbb
+                                     v16i8:$vB))]>;
+def VCTZLSBB : VXForm_RD5_XO5_RS5<1538, 1, (outs gprc:$rD), (ins vrrc:$vB),
+                                  "vctzlsbb $rD, $vB", IIC_VecGeneral,
+                                  [(set i32:$rD, (int_ppc_altivec_vctzlsbb
+                                     v16i8:$vB))]>;
 // Vector Count Trailing Zeros
-def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb", []>;
-def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh", []>;
-def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw", []>;
-def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd", []>;
+def VCTZB : VX_VT5_EO5_VB5<1538, 28, "vctzb",
+                           [(set v16i8:$vD, (cttz v16i8:$vB))]>;
+def VCTZH : VX_VT5_EO5_VB5<1538, 29, "vctzh",
+                           [(set v8i16:$vD, (cttz v8i16:$vB))]>;
+def VCTZW : VX_VT5_EO5_VB5<1538, 30, "vctzw",
+                           [(set v4i32:$vD, (cttz v4i32:$vB))]>;
+def VCTZD : VX_VT5_EO5_VB5<1538, 31, "vctzd",
+                           [(set v2i64:$vD, (cttz v2i64:$vB))]>;
 
 // Vector Extend Sign
 def VEXTSB2W : VX_VT5_EO5_VB5<1538, 16, "vextsb2w", []>;
@@ -1299,15 +1306,31 @@ def VEXTSH2W : VX_VT5_EO5_VB5<1538, 17, "vextsh2w", []>;
 def VEXTSB2D : VX_VT5_EO5_VB5<1538, 24, "vextsb2d", []>;
 def VEXTSH2D : VX_VT5_EO5_VB5<1538, 25, "vextsh2d", []>;
 def VEXTSW2D : VX_VT5_EO5_VB5<1538, 26, "vextsw2d", []>;
+let isCodeGenOnly = 1 in {
+  def VEXTSB2Ws : VX_VT5_EO5_VB5s<1538, 16, "vextsb2w", []>;
+  def VEXTSH2Ws : VX_VT5_EO5_VB5s<1538, 17, "vextsh2w", []>;
+  def VEXTSB2Ds : VX_VT5_EO5_VB5s<1538, 24, "vextsb2d", []>;
+  def VEXTSH2Ds : VX_VT5_EO5_VB5s<1538, 25, "vextsh2d", []>;
+  def VEXTSW2Ds : VX_VT5_EO5_VB5s<1538, 26, "vextsw2d", []>;
+}
 
 // Vector Integer Negate
-def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw", []>;
-def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd", []>;
+def VNEGW : VX_VT5_EO5_VB5<1538, 6, "vnegw",
+                           [(set v4i32:$vD,
+                            (sub (v4i32 immAllZerosV), v4i32:$vB))]>;
+
+def VNEGD : VX_VT5_EO5_VB5<1538, 7, "vnegd",
+                           [(set v2i64:$vD,
+                            (sub (v2i64 (bitconvert (v4i32 immAllZerosV))),
+                                  v2i64:$vB))]>;
 
 // Vector Parity Byte
-def VPRTYBW : VX_VT5_EO5_VB5<1538,  8, "vprtybw", []>;
-def VPRTYBD : VX_VT5_EO5_VB5<1538,  9, "vprtybd", []>;
-def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", []>;
+def VPRTYBW : VX_VT5_EO5_VB5<1538, 8, "vprtybw", [(set v4i32:$vD,
+                            (int_ppc_altivec_vprtybw v4i32:$vB))]>;
+def VPRTYBD : VX_VT5_EO5_VB5<1538,  9, "vprtybd", [(set v2i64:$vD,
+                            (int_ppc_altivec_vprtybd v2i64:$vB))]>;
+def VPRTYBQ : VX_VT5_EO5_VB5<1538, 10, "vprtybq", [(set v1i128:$vD,
+                            (int_ppc_altivec_vprtybq v1i128:$vB))]>;
 
 // Vector (Bit) Permute (Right-indexed)
 def VBPERMD : VXForm_1<1484, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
@@ -1320,14 +1343,32 @@ class VX1_VT5_VA5_VB5<bits<11> xo, string opc, list<dag> pattern>
              !strconcat(opc, " $vD, $vA, $vB"), IIC_VecFP, pattern>;
 
 // Vector Rotate Left Mask/Mask-Insert
-def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm", []>;
-def VRLWMI : VX1_VT5_VA5_VB5<133, "vrlwmi", []>;
-def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm", []>;
-def VRLDMI : VX1_VT5_VA5_VB5<197, "vrldmi", []>;
+def VRLWNM : VX1_VT5_VA5_VB5<389, "vrlwnm",
+                             [(set v4i32:$vD,
+                                 (int_ppc_altivec_vrlwnm v4i32:$vA,
+                                                         v4i32:$vB))]>;
+def VRLWMI : VXForm_1<133, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+                      "vrlwmi $vD, $vA, $vB", IIC_VecFP,
+                      [(set v4i32:$vD,
+                         (int_ppc_altivec_vrlwmi v4i32:$vA, v4i32:$vB,
+                                                 v4i32:$vDi))]>,
+                      RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+def VRLDNM : VX1_VT5_VA5_VB5<453, "vrldnm",
+                             [(set v2i64:$vD,
+                                 (int_ppc_altivec_vrldnm v2i64:$vA,
+                                                         v2i64:$vB))]>;
+def VRLDMI : VXForm_1<197, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+                      "vrldmi $vD, $vA, $vB", IIC_VecFP,
+                      [(set v2i64:$vD,
+                         (int_ppc_altivec_vrldmi v2i64:$vA, v2i64:$vB,
+                                                 v2i64:$vDi))]>,
+                      RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
 
 // Vector Shift Left/Right
-def VSLV : VX1_VT5_VA5_VB5<1860, "vslv", []>;
-def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv", []>;
+def VSLV : VX1_VT5_VA5_VB5<1860, "vslv",
+                           [(set v16i8 : $vD, (int_ppc_altivec_vslv v16i8 : $vA, v16i8 : $vB))]>;
+def VSRV : VX1_VT5_VA5_VB5<1796, "vsrv",
+                           [(set v16i8 : $vD, (int_ppc_altivec_vsrv v16i8 : $vA, v16i8 : $vB))]>;
 
 // Vector Multiply-by-10 (& Write Carry) Unsigned Quadword
 def VMUL10UQ   : VXForm_BX<513, (outs vrrc:$vD), (ins vrrc:$vA),
@@ -1396,4 +1437,15 @@ def BCDSRo : VX_VT5_VA5_VB5_PS1_XO9_o<449, "bcdsr.", []>;
 // Decimal (Unsigned) Truncate
 def BCDTRUNCo :  VX_VT5_VA5_VB5_PS1_XO9_o<257, "bcdtrunc." , []>;
 def BCDUTRUNCo : VX_VT5_VA5_VB5_XO9_o    <321, "bcdutrunc.", []>;
+
+// Absolute Difference
+def VABSDUB : VXForm_1<1027, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vabsdub $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v16i8:$vD, (int_ppc_altivec_vabsdub v16i8:$vA, v16i8:$vB))]>;
+def VABSDUH : VXForm_1<1091, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vabsduh $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v8i16:$vD, (int_ppc_altivec_vabsduh v8i16:$vA, v8i16:$vB))]>;
+def VABSDUW : VXForm_1<1155, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                       "vabsduw $vD, $vA, $vB", IIC_VecGeneral,
+                       [(set v4i32:$vD, (int_ppc_altivec_vabsduw v4i32:$vA, v4i32:$vB))]>;
 } // end HasP9Altivec
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
index 5acff7559544..99689f656c2d 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td
@@ -38,6 +38,14 @@ class I<bits<6> opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin>
   let TSFlags{2}   = PPC970_Cracked;
   let TSFlags{5-3} = PPC970_Unit;
 
+  /// Indicate that the VSX instruction is to use VSX numbering/encoding.
+  /// Since ISA 3.0, there are scalar instructions that use the upper
+  /// half of the VSX register set only. Rather than adding further complexity
+  /// to the register class set, the VSX registers just include the Altivec
+  /// registers and this flag decides the numbering to be used for them.
+  bits<1> UseVSXReg = 0;
+  let TSFlags{6}   = UseVSXReg;
+
   // Fields used for relation models.
   string BaseName = "";
 
@@ -62,6 +70,8 @@ class PPC970_Unit_VALU     { bits<3> PPC970_Unit = 5;   }
 class PPC970_Unit_VPERM    { bits<3> PPC970_Unit = 6;   }
 class PPC970_Unit_BRU      { bits<3> PPC970_Unit = 7;   }
 
+class UseVSXReg { bits<1> UseVSXReg = 1; }
+
 // Two joined instructions; used to emit two adjacent instructions as one.
 // The itinerary from the first instruction is used for scheduling and
 // classification.
@@ -163,6 +173,22 @@ class BForm_3<bits<6> opcode, bit aa, bit lk,
   let Inst{31}    = lk;
 }
 
+class BForm_3_at<bits<6> opcode, bit aa, bit lk,
+                 dag OOL, dag IOL, string asmstr>
+  : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
+  bits<5> BO;
+  bits<2> at;
+  bits<5> BI;
+  bits<14> BD;
+
+  let Inst{6-8}   = BO{4-2};
+  let Inst{9-10}  = at;
+  let Inst{11-15} = BI;
+  let Inst{16-29} = BD;
+  let Inst{30}    = aa;
+  let Inst{31}    = lk;
+}
+
 class BForm_4<bits<6> opcode, bits<5> bo, bit aa, bit lk,
               dag OOL, dag IOL, string asmstr>
   : I<opcode, OOL, IOL, asmstr, IIC_BrB> {
@@ -1043,6 +1069,20 @@ class XX3Form<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = XT{5};
 }
 
+class XX3Form_Zero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+              InstrItinClass itin, list<dag> pattern>
+  : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let XA = XT;
+  let XB = XT;
+}
+
+class XX3Form_SetZero<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr,
+               InstrItinClass itin, list<dag> pattern>
+    : XX3Form<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let XB = XT;
+  let XA = XT;
+}
+
 class XX3Form_1<bits<6> opcode, bits<8> xo, dag OOL, dag IOL, string asmstr, 
                 InstrItinClass itin, list<dag> pattern>
   : I<opcode, OOL, IOL, asmstr, itin> {
@@ -1193,6 +1233,25 @@ class XLForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
   let Inst{31}    = 0;
 }
 
+class XLForm_1_np<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                  InstrItinClass itin, list<dag> pattern>
+  : XLForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  let CRD = 0;
+  let CRA = 0;
+  let CRB = 0;
+}
+
+class XLForm_1_gen<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
+                   InstrItinClass itin, list<dag> pattern>
+  : XLForm_1<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+  bits<5> RT;
+  bits<5> RB;
+
+  let CRD = RT;
+  let CRA = 0;
+  let CRB = RB;
+}
+
 class XLForm_1_ext<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
                InstrItinClass itin, list<dag> pattern>
     : I<opcode, OOL, IOL, asmstr, itin> {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index b6ae70ec1a2d..2e0b9355f82b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -273,6 +273,7 @@ unsigned PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   case PPC::RESTORE_CRBIT:
   case PPC::LVX:
   case PPC::LXVD2X:
+  case PPC::LXVX:
   case PPC::QVLFDX:
   case PPC::QVLFSXs:
   case PPC::QVLFDXb:
@@ -302,6 +303,7 @@ unsigned PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   case PPC::SPILL_CRBIT:
   case PPC::STVX:
   case PPC::STXVD2X:
+  case PPC::STXVX:
   case PPC::QVSTFDX:
   case PPC::QVSTFSXs:
   case PPC::QVSTFDXb:
@@ -460,57 +462,57 @@ bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     return false;
 
   // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
+  MachineInstr &LastInst = *I;
 
   // If there is only one terminator instruction, process it.
   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
-    if (LastInst->getOpcode() == PPC::B) {
-      if (!LastInst->getOperand(0).isMBB())
+    if (LastInst.getOpcode() == PPC::B) {
+      if (!LastInst.getOperand(0).isMBB())
         return true;
-      TBB = LastInst->getOperand(0).getMBB();
+      TBB = LastInst.getOperand(0).getMBB();
       return false;
-    } else if (LastInst->getOpcode() == PPC::BCC) {
-      if (!LastInst->getOperand(2).isMBB())
+    } else if (LastInst.getOpcode() == PPC::BCC) {
+      if (!LastInst.getOperand(2).isMBB())
         return true;
       // Block ends with fall-through condbranch.
-      TBB = LastInst->getOperand(2).getMBB();
-      Cond.push_back(LastInst->getOperand(0));
-      Cond.push_back(LastInst->getOperand(1));
+      TBB = LastInst.getOperand(2).getMBB();
+      Cond.push_back(LastInst.getOperand(0));
+      Cond.push_back(LastInst.getOperand(1));
       return false;
-    } else if (LastInst->getOpcode() == PPC::BC) {
-      if (!LastInst->getOperand(1).isMBB())
+    } else if (LastInst.getOpcode() == PPC::BC) {
+      if (!LastInst.getOperand(1).isMBB())
         return true;
       // Block ends with fall-through condbranch.
-      TBB = LastInst->getOperand(1).getMBB();
+      TBB = LastInst.getOperand(1).getMBB();
       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
-      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst.getOperand(0));
       return false;
-    } else if (LastInst->getOpcode() == PPC::BCn) {
-      if (!LastInst->getOperand(1).isMBB())
+    } else if (LastInst.getOpcode() == PPC::BCn) {
+      if (!LastInst.getOperand(1).isMBB())
         return true;
       // Block ends with fall-through condbranch.
-      TBB = LastInst->getOperand(1).getMBB();
+      TBB = LastInst.getOperand(1).getMBB();
       Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
-      Cond.push_back(LastInst->getOperand(0));
+      Cond.push_back(LastInst.getOperand(0));
       return false;
-    } else if (LastInst->getOpcode() == PPC::BDNZ8 ||
-               LastInst->getOpcode() == PPC::BDNZ) {
-      if (!LastInst->getOperand(0).isMBB())
+    } else if (LastInst.getOpcode() == PPC::BDNZ8 ||
+               LastInst.getOpcode() == PPC::BDNZ) {
+      if (!LastInst.getOperand(0).isMBB())
         return true;
       if (DisableCTRLoopAnal)
         return true;
-      TBB = LastInst->getOperand(0).getMBB();
+      TBB = LastInst.getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(1));
       Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                                true));
       return false;
-    } else if (LastInst->getOpcode() == PPC::BDZ8 ||
-               LastInst->getOpcode() == PPC::BDZ) {
-      if (!LastInst->getOperand(0).isMBB())
+    } else if (LastInst.getOpcode() == PPC::BDZ8 ||
+               LastInst.getOpcode() == PPC::BDZ) {
+      if (!LastInst.getOperand(0).isMBB())
         return true;
       if (DisableCTRLoopAnal)
         return true;
-      TBB = LastInst->getOperand(0).getMBB();
+      TBB = LastInst.getOperand(0).getMBB();
       Cond.push_back(MachineOperand::CreateImm(0));
       Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                                true));
@@ -522,80 +524,79 @@ bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   }
 
   // Get the instruction before it if it's a terminator.
-  MachineInstr *SecondLastInst = I;
+  MachineInstr &SecondLastInst = *I;
 
   // If there are three terminators, we don't know what sort of block this is.
-  if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
+  if (I != MBB.begin() && isUnpredicatedTerminator(*--I))
     return true;
 
   // If the block ends with PPC::B and PPC:BCC, handle it.
-  if (SecondLastInst->getOpcode() == PPC::BCC &&
-      LastInst->getOpcode() == PPC::B) {
-    if (!SecondLastInst->getOperand(2).isMBB() ||
-        !LastInst->getOperand(0).isMBB())
+  if (SecondLastInst.getOpcode() == PPC::BCC &&
+      LastInst.getOpcode() == PPC::B) {
+    if (!SecondLastInst.getOperand(2).isMBB() ||
+        !LastInst.getOperand(0).isMBB())
       return true;
-    TBB =  SecondLastInst->getOperand(2).getMBB();
-    Cond.push_back(SecondLastInst->getOperand(0));
-    Cond.push_back(SecondLastInst->getOperand(1));
-    FBB = LastInst->getOperand(0).getMBB();
+    TBB = SecondLastInst.getOperand(2).getMBB();
+    Cond.push_back(SecondLastInst.getOperand(0));
+    Cond.push_back(SecondLastInst.getOperand(1));
+    FBB = LastInst.getOperand(0).getMBB();
     return false;
-  } else if (SecondLastInst->getOpcode() == PPC::BC &&
-      LastInst->getOpcode() == PPC::B) {
-    if (!SecondLastInst->getOperand(1).isMBB() ||
-        !LastInst->getOperand(0).isMBB())
+  } else if (SecondLastInst.getOpcode() == PPC::BC &&
+             LastInst.getOpcode() == PPC::B) {
+    if (!SecondLastInst.getOperand(1).isMBB() ||
+        !LastInst.getOperand(0).isMBB())
       return true;
-    TBB =  SecondLastInst->getOperand(1).getMBB();
+    TBB = SecondLastInst.getOperand(1).getMBB();
     Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));
-    Cond.push_back(SecondLastInst->getOperand(0));
-    FBB = LastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst.getOperand(0));
+    FBB = LastInst.getOperand(0).getMBB();
     return false;
-  } else if (SecondLastInst->getOpcode() == PPC::BCn &&
-      LastInst->getOpcode() == PPC::B) {
-    if (!SecondLastInst->getOperand(1).isMBB() ||
-        !LastInst->getOperand(0).isMBB())
+  } else if (SecondLastInst.getOpcode() == PPC::BCn &&
+             LastInst.getOpcode() == PPC::B) {
+    if (!SecondLastInst.getOperand(1).isMBB() ||
+        !LastInst.getOperand(0).isMBB())
       return true;
-    TBB =  SecondLastInst->getOperand(1).getMBB();
+    TBB = SecondLastInst.getOperand(1).getMBB();
     Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));
-    Cond.push_back(SecondLastInst->getOperand(0));
-    FBB = LastInst->getOperand(0).getMBB();
+    Cond.push_back(SecondLastInst.getOperand(0));
+    FBB = LastInst.getOperand(0).getMBB();
     return false;
-  } else if ((SecondLastInst->getOpcode() == PPC::BDNZ8 ||
-              SecondLastInst->getOpcode() == PPC::BDNZ) &&
-      LastInst->getOpcode() == PPC::B) {
-    if (!SecondLastInst->getOperand(0).isMBB() ||
-        !LastInst->getOperand(0).isMBB())
+  } else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||
+              SecondLastInst.getOpcode() == PPC::BDNZ) &&
+             LastInst.getOpcode() == PPC::B) {
+    if (!SecondLastInst.getOperand(0).isMBB() ||
+        !LastInst.getOperand(0).isMBB())
       return true;
     if (DisableCTRLoopAnal)
       return true;
-    TBB = SecondLastInst->getOperand(0).getMBB();
+    TBB = SecondLastInst.getOperand(0).getMBB();
     Cond.push_back(MachineOperand::CreateImm(1));
     Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                              true));
-    FBB = LastInst->getOperand(0).getMBB();
+    FBB = LastInst.getOperand(0).getMBB();
     return false;
-  } else if ((SecondLastInst->getOpcode() == PPC::BDZ8 ||
-              SecondLastInst->getOpcode() == PPC::BDZ) &&
-      LastInst->getOpcode() == PPC::B) {
-    if (!SecondLastInst->getOperand(0).isMBB() ||
-        !LastInst->getOperand(0).isMBB())
+  } else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||
+              SecondLastInst.getOpcode() == PPC::BDZ) &&
+             LastInst.getOpcode() == PPC::B) {
+    if (!SecondLastInst.getOperand(0).isMBB() ||
+        !LastInst.getOperand(0).isMBB())
       return true;
     if (DisableCTRLoopAnal)
       return true;
-    TBB = SecondLastInst->getOperand(0).getMBB();
+    TBB = SecondLastInst.getOperand(0).getMBB();
     Cond.push_back(MachineOperand::CreateImm(0));
     Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,
                                              true));
-    FBB = LastInst->getOperand(0).getMBB();
+    FBB = LastInst.getOperand(0).getMBB();
     return false;
   }
 
   // If the block ends with two PPC:Bs, handle it.  The second one is not
   // executed, so remove it.
-  if (SecondLastInst->getOpcode() == PPC::B &&
-      LastInst->getOpcode() == PPC::B) {
-    if (!SecondLastInst->getOperand(0).isMBB())
+  if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {
+    if (!SecondLastInst.getOperand(0).isMBB())
       return true;
-    TBB = SecondLastInst->getOperand(0).getMBB();
+    TBB = SecondLastInst.getOperand(0).getMBB();
     I = LastInst;
     if (AllowModify)
       I->eraseFromParent();
@@ -606,7 +607,10 @@ bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                    int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
   if (I == MBB.end())
     return 0;
@@ -635,15 +639,17 @@ unsigned PPCInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return 2;
 }
 
-unsigned PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL) const {
+                                    const DebugLoc &DL,
+                                    int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "PPC branch conditions have two components!");
+  assert(!BytesAdded && "code size not handled");
 
   bool isPPC64 = Subtarget.isPPC64();
 
@@ -853,15 +859,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       llvm_unreachable("nop VSX copy");
 
     DestReg = SuperReg;
-  } else if (PPC::VRRCRegClass.contains(DestReg) &&
-             PPC::VSRCRegClass.contains(SrcReg)) {
-    unsigned SuperReg =
-      TRI->getMatchingSuperReg(DestReg, PPC::sub_128, &PPC::VSRCRegClass);
-
-    if (VSXSelfCopyCrash && SrcReg == SuperReg)
-      llvm_unreachable("nop VSX copy");
-
-    DestReg = SuperReg;
   } else if (PPC::F8RCRegClass.contains(SrcReg) &&
              PPC::VSRCRegClass.contains(DestReg)) {
     unsigned SuperReg =
@@ -871,15 +868,6 @@ void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
       llvm_unreachable("nop VSX copy");
 
     SrcReg = SuperReg;
-  } else if (PPC::VRRCRegClass.contains(SrcReg) &&
-             PPC::VSRCRegClass.contains(DestReg)) {
-    unsigned SuperReg =
-      TRI->getMatchingSuperReg(SrcReg, PPC::sub_128, &PPC::VSRCRegClass);
-
-    if (VSXSelfCopyCrash && DestReg == SuperReg)
-      llvm_unreachable("nop VSX copy");
-
-    SrcReg = SuperReg;
   }
 
   // Different class register copy
@@ -1004,19 +992,22 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF,
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXVD2X))
+    unsigned Op = Subtarget.hasP9Vector() ? PPC::STXVX : PPC::STXVD2X;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSDX))
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf64 : PPC::STXSDX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::STXSSPX))
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFSTOREf32 : PPC::STXSSPX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc))
                                        .addReg(SrcReg,
                                                getKillRegState(isKill)),
                                        FrameIdx));
@@ -1066,6 +1057,15 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasSpills();
 
+  // We need to avoid a situation in which the value from a VRRC register is
+  // spilled using an Altivec instruction and reloaded into a VSRC register
+  // using a VSX instruction. The issue with this is that the VSX
+  // load/store instructions swap the doublewords in the vector and the Altivec
+  // ones don't. The register classes on the spill/reload may be different if
+  // the register is defined using an Altivec instruction and is then used by a
+  // VSX instruction.
+  RC = updatedRC(RC);
+
   bool NonRI = false, SpillsVRS = false;
   if (StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs,
                           NonRI, SpillsVRS))
@@ -1080,7 +1080,7 @@ PPCInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
 
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FrameIdx),
       MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),
@@ -1125,16 +1125,19 @@ bool PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXVD2X), DestReg),
+    unsigned Op = Subtarget.hasP9Vector() ? PPC::LXVX : PPC::LXVD2X;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Op), DestReg),
                                        FrameIdx));
     NonRI = true;
   } else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSDX), DestReg),
-                                       FrameIdx));
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf64 : PPC::LXSDX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc),
+                                               DestReg), FrameIdx));
     NonRI = true;
   } else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {
-    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::LXSSPX), DestReg),
-                                       FrameIdx));
+    unsigned Opc = Subtarget.hasP9Vector() ? PPC::DFLOADf32 : PPC::LXSSPX;
+    NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opc),
+                                               DestReg), FrameIdx));
     NonRI = true;
   } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) {
     assert(Subtarget.isDarwin() &&
@@ -1177,6 +1180,16 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
   FuncInfo->setHasSpills();
 
+  // We need to avoid a situation in which the value from a VRRC register is
+  // spilled using an Altivec instruction and reloaded into a VSRC register
+  // using a VSX instruction. The issue with this is that the VSX
+  // load/store instructions swap the doublewords in the vector and the Altivec
+  // ones don't. The register classes on the spill/reload may be different if
+  // the register is defined using an Altivec instruction and is then used by a
+  // VSX instruction.
+  if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
+    RC = &PPC::VSRCRegClass;
+
   bool NonRI = false, SpillsVRS = false;
   if (LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs,
                            NonRI, SpillsVRS))
@@ -1191,7 +1204,7 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   for (unsigned i = 0, e = NewMIs.size(); i != e; ++i)
     MBB.insert(MI, NewMIs[i]);
 
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FrameIdx),
       MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),
@@ -1200,7 +1213,7 @@ PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
 }
 
 bool PPCInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
   if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)
     Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);
@@ -1809,7 +1822,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
 /// GetInstSize - Return the number of bytes of code the specified
 /// instruction may be.  This returns the maximum number of bytes.
 ///
-unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
+unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
 
   if (Opcode == PPC::INLINEASM) {
@@ -1817,10 +1830,11 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr &MI) const {
     const char *AsmStr = MI.getOperand(0).getSymbolName();
     return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());
   } else if (Opcode == TargetOpcode::STACKMAP) {
-    return MI.getOperand(1).getImm();
+    StackMapOpers Opers(&MI);
+    return Opers.getNumPatchBytes();
   } else if (Opcode == TargetOpcode::PATCHPOINT) {
     PatchPointOpers Opers(&MI);
-    return Opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+    return Opers.getNumPatchBytes();
   } else {
     const MCInstrDesc &Desc = get(Opcode);
     return Desc.getSize();
@@ -1872,6 +1886,48 @@ bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
         .addReg(Reg);
     return true;
   }
+  case PPC::DFLOADf32:
+  case PPC::DFLOADf64:
+  case PPC::DFSTOREf32:
+  case PPC::DFSTOREf64: {
+    assert(Subtarget.hasP9Vector() &&
+           "Invalid D-Form Pseudo-ops on non-P9 target.");
+    unsigned UpperOpcode, LowerOpcode;
+    switch (MI.getOpcode()) {
+    case PPC::DFLOADf32:
+      UpperOpcode = PPC::LXSSP;
+      LowerOpcode = PPC::LFS;
+      break;
+    case PPC::DFLOADf64:
+      UpperOpcode = PPC::LXSD;
+      LowerOpcode = PPC::LFD;
+      break;
+    case PPC::DFSTOREf32:
+      UpperOpcode = PPC::STXSSP;
+      LowerOpcode = PPC::STFS;
+      break;
+    case PPC::DFSTOREf64:
+      UpperOpcode = PPC::STXSD;
+      LowerOpcode = PPC::STFD;
+      break;
+    }
+    unsigned TargetReg = MI.getOperand(0).getReg();
+    unsigned Opcode;
+    if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||
+        (TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))
+      Opcode = LowerOpcode;
+    else
+      Opcode = UpperOpcode;
+    MI.setDesc(get(Opcode));
+    return true;
+  }
   }
   return false;
 }
+
+const TargetRegisterClass *
+PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {
+  if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)
+    return &PPC::VSRCRegClass;
+  return RC;
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 98baf125bdff..32b2f009a3f5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -61,6 +61,15 @@ enum PPC970_Unit {
   PPC970_VPERM  = 6 << PPC970_Shift,   // Vector Permute Unit
   PPC970_BRU    = 7 << PPC970_Shift    // Branch Unit
 };
+
+enum {
+  /// Shift count to bypass PPC970 flags
+  NewDef_Shift = 6,
+
+  /// The VSX instruction that uses VSX register (vs0-vs63), instead of VMX
+  /// register (v0-v31).
+  UseVSXReg = 0x1 << NewDef_Shift
+};
 } // end namespace PPCII
 
 class PPCSubtarget;
@@ -168,10 +177,12 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
   // Select analysis.
   bool canInsertSelect(const MachineBasicBlock &, ArrayRef<MachineOperand> Cond,
@@ -198,7 +209,7 @@ public:
                             const TargetRegisterInfo *TRI) const override;
 
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
   bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
                      MachineRegisterInfo *MRI) const override;
@@ -256,7 +267,7 @@ public:
   /// GetInstSize - Return the number of bytes of code the specified
   /// instruction may be.  This returns the maximum number of bytes.
   ///
-  unsigned GetInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   void getNoopForMachoTarget(MCInst &NopInst) const override;
 
@@ -271,6 +282,14 @@ public:
 
   // Lower pseudo instructions after register allocation.
   bool expandPostRAPseudo(MachineInstr &MI) const override;
+
+  static bool isVFRegister(unsigned Reg) {
+    return Reg >= PPC::VF0 && Reg <= PPC::VF31;
+  }
+  static bool isVRRegister(unsigned Reg) {
+    return Reg >= PPC::V0 && Reg <= PPC::V31;
+  }
+  const TargetRegisterClass *updatedRC(const TargetRegisterClass *RC) const;
 };
 
 }
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index a40d4e1a4a69..a7231bd2e2c0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -23,6 +23,15 @@ def SDT_PPCstfiwx : SDTypeProfile<0, 2, [ // stfiwx
 def SDT_PPClfiwx : SDTypeProfile<1, 1, [ // lfiw[az]x
   SDTCisVT<0, f64>, SDTCisPtrTy<1>
 ]>;
+def SDT_PPCLxsizx : SDTypeProfile<1, 2, [
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+def SDT_PPCstxsix : SDTypeProfile<0, 3, [
+  SDTCisVT<0, f64>, SDTCisPtrTy<1>, SDTCisPtrTy<2>
+]>;
+def SDT_PPCVexts  : SDTypeProfile<1, 2, [
+  SDTCisVT<0, f64>, SDTCisVT<1, f64>, SDTCisPtrTy<2>
+]>;
 
 def SDT_PPCCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>;
 def SDT_PPCCallSeqEnd   : SDCallSeqEnd<[ SDTCisVT<0, i32>,
@@ -108,6 +117,11 @@ def PPClfiwax : SDNode<"PPCISD::LFIWAX", SDT_PPClfiwx,
                        [SDNPHasChain, SDNPMayLoad]>;
 def PPClfiwzx : SDNode<"PPCISD::LFIWZX", SDT_PPClfiwx,
                        [SDNPHasChain, SDNPMayLoad]>;
+def PPClxsizx : SDNode<"PPCISD::LXSIZX", SDT_PPCLxsizx,
+                       [SDNPHasChain, SDNPMayLoad]>;
+def PPCstxsix : SDNode<"PPCISD::STXSIX", SDT_PPCstxsix,
+                       [SDNPHasChain, SDNPMayStore]>;
+def PPCVexts  : SDNode<"PPCISD::VEXTS", SDT_PPCVexts, []>;
 
 // Extract FPSCR (not modeled at the DAG level).
 def PPCmffs   : SDNode<"PPCISD::MFFS",
@@ -312,6 +326,8 @@ def immZExt16  : PatLeaf<(imm), [{
   // field.  Used by instructions like 'ori'.
   return (uint64_t)N->getZExtValue() == (unsigned short)N->getZExtValue();
 }], LO16>;
+def immAnyExt8 : ImmLeaf<i32, [{ return isInt<8>(Imm) || isUInt<8>(Imm); }]>;
+def immSExt5NonZero : ImmLeaf<i32, [{ return Imm && isInt<5>(Imm); }]>;
 
 // imm16Shifted* - These match immediates where the low 16-bits are zero.  There
 // are two forms: imm16ShiftedSExt and imm16ShiftedZExt.  These two forms are
@@ -444,6 +460,12 @@ def PPCRegVRRCAsmOperand : AsmOperandClass {
 def vrrc : RegisterOperand<VRRC> {
   let ParserMatchClass = PPCRegVRRCAsmOperand;
 }
+def PPCRegVFRCAsmOperand : AsmOperandClass {
+  let Name = "RegVFRC"; let PredicateMethod = "isRegNumber";
+}
+def vfrc : RegisterOperand<VFRC> {
+  let ParserMatchClass = PPCRegVFRCAsmOperand;
+}
 def PPCRegCRBITRCAsmOperand : AsmOperandClass {
   let Name = "RegCRBITRC"; let PredicateMethod = "isCRBitNumber";
 }
@@ -478,6 +500,15 @@ def u2imm   : Operand<i32> {
   let ParserMatchClass = PPCU2ImmAsmOperand;
 }
 
+def PPCATBitsAsHintAsmOperand : AsmOperandClass {
+  let Name = "ATBitsAsHint"; let PredicateMethod = "isATBitsAsHint";
+  let RenderMethod = "addImmOperands"; // Irrelevant, predicate always fails.
+}
+def atimm   : Operand<i32> {
+  let PrintMethod = "printATBitsAsHint";
+  let ParserMatchClass = PPCATBitsAsHintAsmOperand;
+}
+
 def PPCU3ImmAsmOperand : AsmOperandClass {
   let Name = "U3Imm"; let PredicateMethod = "isU3Imm";
   let RenderMethod = "addImmOperands";
@@ -591,6 +622,9 @@ def s17imm  : Operand<i32> {
   let ParserMatchClass = PPCS17ImmAsmOperand;
   let DecoderMethod = "decodeSImmOperand<16>";
 }
+
+def fpimm0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(+0.0); }]>;
+
 def PPCDirectBrAsmOperand : AsmOperandClass {
   let Name = "DirectBr"; let PredicateMethod = "isDirectBr";
   let RenderMethod = "addBranchTargetOperands";
@@ -1448,9 +1482,6 @@ def RFEBB : XLForm_S<19, 146, (outs), (ins u1imm:$imm), "rfebb $imm",
 def DCBA   : DCB_Form<758, 0, (outs), (ins memrr:$dst), "dcba $dst",
                       IIC_LdStDCBF, [(int_ppc_dcba xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
-def DCBF   : DCB_Form<86, 0, (outs), (ins memrr:$dst), "dcbf $dst",
-                      IIC_LdStDCBF, [(int_ppc_dcbf xoaddr:$dst)]>,
-                      PPC970_DGroup_Single;
 def DCBI   : DCB_Form<470, 0, (outs), (ins memrr:$dst), "dcbi $dst",
                       IIC_LdStDCBF, [(int_ppc_dcbi xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
@@ -1464,6 +1495,10 @@ def DCBZL  : DCB_Form<1014, 1, (outs), (ins memrr:$dst), "dcbzl $dst",
                       IIC_LdStDCBF, [(int_ppc_dcbzl xoaddr:$dst)]>,
                       PPC970_DGroup_Single;
 
+def DCBF   : DCB_Form_hint<86, (outs), (ins u5imm:$TH, memrr:$dst),
+                      "dcbf $dst, $TH", IIC_LdStDCBF, []>,
+                      PPC970_DGroup_Single;
+
 let hasSideEffects = 0, mayLoad = 1, mayStore = 1 in {
 def DCBT   : DCB_Form_hint<278, (outs), (ins u5imm:$TH, memrr:$dst),
                       "dcbt $dst, $TH", IIC_LdStDCBF, []>,
@@ -1480,6 +1515,8 @@ def : Pat<(int_ppc_dcbt xoaddr:$dst),
           (DCBT 0, xoaddr:$dst)>;
 def : Pat<(int_ppc_dcbtst xoaddr:$dst),
           (DCBTST 0, xoaddr:$dst)>;
+def : Pat<(int_ppc_dcbf xoaddr:$dst),
+          (DCBF 0, xoaddr:$dst)>;
 
 def : Pat<(prefetch xoaddr:$dst, (i32 0), imm, (i32 1)),
           (DCBT 0, xoaddr:$dst)>;   // data prefetch for loads
@@ -2146,15 +2183,15 @@ let Uses = [RM] in {
 
   defm FRSP   : XForm_26r<63, 12, (outs f4rc:$frD), (ins f8rc:$frB),
                           "frsp", "$frD, $frB", IIC_FPGeneral,
-                          [(set f32:$frD, (fround f64:$frB))]>;
+                          [(set f32:$frD, (fpround f64:$frB))]>;
 
   let Interpretation64Bit = 1, isCodeGenOnly = 1 in
   defm FRIND  : XForm_26r<63, 392, (outs f8rc:$frD), (ins f8rc:$frB),
                           "frin", "$frD, $frB", IIC_FPGeneral,
-                          [(set f64:$frD, (frnd f64:$frB))]>;
+                          [(set f64:$frD, (fround f64:$frB))]>;
   defm FRINS  : XForm_26r<63, 392, (outs f4rc:$frD), (ins f4rc:$frB),
                           "frin", "$frD, $frB", IIC_FPGeneral,
-                          [(set f32:$frD, (frnd f32:$frB))]>;
+                          [(set f32:$frD, (fround f32:$frB))]>;
   }
 
   let hasSideEffects = 0 in {
@@ -2892,7 +2929,7 @@ def : Pat<(f64 (extloadf32 iaddr:$src)),
 def : Pat<(f64 (extloadf32 xaddr:$src)),
           (COPY_TO_REGCLASS (LFSX xaddr:$src), F8RC)>;
 
-def : Pat<(f64 (fextend f32:$src)),
+def : Pat<(f64 (fpextend f32:$src)),
           (COPY_TO_REGCLASS $src, F8RC)>;
 
 // Only seq_cst fences require the heavyweight sync (SYNC 0).
@@ -3185,6 +3222,46 @@ defm : ExtSetCCPat<SETLE,
                    OutPatFrag<(ops node:$in),
                               (RLDICL $in, 1, 63)> >;
 
+// An extended SETCC with shift amount.
+multiclass ExtSetCCShiftPat<CondCode cc, PatFrag pfrag,
+                            OutPatFrag rfrag, OutPatFrag rfrag8> {
+  def : Pat<(i32 (zext (i1 (pfrag i32:$s1, i32:$sa, cc)))),
+            (rfrag $s1, $sa)>;
+  def : Pat<(i64 (zext (i1 (pfrag i64:$s1, i32:$sa, cc)))),
+            (rfrag8 $s1, $sa)>;
+  def : Pat<(i64 (zext (i1 (pfrag i32:$s1, i32:$sa, cc)))),
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1, $sa), sub_32)>;
+  def : Pat<(i32 (zext (i1 (pfrag i64:$s1, i32:$sa, cc)))),
+            (EXTRACT_SUBREG (rfrag8 $s1, $sa), sub_32)>;
+
+  def : Pat<(i32 (anyext (i1 (pfrag i32:$s1, i32:$sa, cc)))),
+            (rfrag $s1, $sa)>;
+  def : Pat<(i64 (anyext (i1 (pfrag i64:$s1, i32:$sa, cc)))),
+            (rfrag8 $s1, $sa)>;
+  def : Pat<(i64 (anyext (i1 (pfrag i32:$s1, i32:$sa, cc)))),
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)), (rfrag $s1, $sa), sub_32)>;
+  def : Pat<(i32 (anyext (i1 (pfrag i64:$s1, i32:$sa, cc)))),
+            (EXTRACT_SUBREG (rfrag8 $s1, $sa), sub_32)>;
+}
+
+defm : ExtSetCCShiftPat<SETNE,
+                        PatFrag<(ops node:$in, node:$sa, node:$cc),
+                                (setcc (and $in, (shl 1, $sa)), 0, $cc)>,
+                        OutPatFrag<(ops node:$in, node:$sa),
+                                   (RLWNM $in, (SUBFIC $sa, 32), 31, 31)>,
+                        OutPatFrag<(ops node:$in, node:$sa),
+                                   (RLDCL $in, (SUBFIC $sa, 64), 63)> >;
+
+defm : ExtSetCCShiftPat<SETEQ,
+                        PatFrag<(ops node:$in, node:$sa, node:$cc),
+                                (setcc (and $in, (shl 1, $sa)), 0, $cc)>,
+                        OutPatFrag<(ops node:$in, node:$sa),
+                                   (RLWNM (i32not $in),
+                                          (SUBFIC $sa, 32), 31, 31)>,
+                        OutPatFrag<(ops node:$in, node:$sa),
+                                   (RLDCL (i64not $in),
+                                          (SUBFIC $sa, 64), 63)> >;
+
 // SETCC for i32.
 def : Pat<(i1 (setcc i32:$s1, immZExt16:$imm, SETULT)),
           (EXTRACT_SUBREG (CMPLWI $s1, imm:$imm), sub_lt)>;
@@ -3654,6 +3731,9 @@ def SLBMTE : XForm_26<31, 402, (outs), (ins gprc:$RS, gprc:$RB),
 def SLBMFEE : XForm_26<31, 915, (outs gprc:$RT), (ins gprc:$RB),
                        "slbmfee $RT, $RB", IIC_SprSLBMFEE, []>;
 
+def SLBMFEV : XLForm_1_gen<31, 851, (outs gprc:$RT), (ins gprc:$RB),
+                       "slbmfev $RT, $RB", IIC_SprSLBMFEV, []>;
+
 def SLBIA : XForm_0<31, 498, (outs), (ins), "slbia", IIC_SprSLBIA, []>;
 
 def TLBIA : XForm_0<31, 370, (outs), (ins),
@@ -3716,6 +3796,9 @@ def MFDCR : XFXForm_1<31, 323, (outs gprc:$RT), (ins i32imm:$SPR),
 def MTDCR : XFXForm_1<31, 451, (outs), (ins gprc:$RT, i32imm:$SPR),
                       "mtdcr $SPR, $RT", IIC_SprMTSPR>, Requires<[IsPPC4xx]>;
 
+def HRFID : XLForm_1_np<19, 274, (outs), (ins), "hrfid", IIC_BrB, []>;
+def NAP   : XLForm_1_np<19, 434, (outs), (ins), "nap", IIC_BrB, []>;
+
 def ATTN : XForm_attn<0, 256, (outs), (ins), "attn", IIC_BrB>;
 
 def LBZCIX : XForm_base_r3xo<31, 853, (outs gprc:$RST), (ins gprc:$A, gprc:$B),
@@ -3780,6 +3863,10 @@ def DCBTSTCT : PPCAsmPseudo<"dcbtstct $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
 def DCBTSTDS : PPCAsmPseudo<"dcbtstds $dst, $TH", (ins memrr:$dst, u5imm:$TH)>;
 def DCBTSTT  : PPCAsmPseudo<"dcbtstt $dst", (ins memrr:$dst)>;
 
+def DCBFx  : PPCAsmPseudo<"dcbf $dst", (ins memrr:$dst)>;
+def DCBFL  : PPCAsmPseudo<"dcbfl $dst", (ins memrr:$dst)>;
+def DCBFLP : PPCAsmPseudo<"dcbflp $dst", (ins memrr:$dst)>;
+
 def : InstAlias<"crset $bx", (CREQV crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
 def : InstAlias<"crclr $bx", (CRXOR crbitrc:$bx, crbitrc:$bx, crbitrc:$bx)>;
 def : InstAlias<"crmove $bx, $by", (CROR crbitrc:$bx, crbitrc:$by, crbitrc:$by)>;
@@ -4081,6 +4168,16 @@ let PPC970_Unit = 7 in {
     def gBCA : BForm_3<16, 1, 0, (outs),
                        (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
                        "bca $bo, $bi, $dst">;
+    let isAsmParserOnly = 1 in {
+      def gBCat : BForm_3_at<16, 0, 0, (outs),
+                             (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
+                                  condbrtarget:$dst),
+                                  "bc$at $bo, $bi, $dst">;
+      def gBCAat : BForm_3_at<16, 1, 0, (outs),
+                              (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
+                                   abscondbrtarget:$dst),
+                                   "bca$at $bo, $bi, $dst">;
+    } // isAsmParserOnly = 1
   }
   let Defs = [LR, CTR], Uses = [CTR, RM] in {
     def gBCL : BForm_3<16, 0, 1, (outs),
@@ -4089,6 +4186,16 @@ let PPC970_Unit = 7 in {
     def gBCLA : BForm_3<16, 1, 1, (outs),
                         (ins u5imm:$bo, crbitrc:$bi, abscondbrtarget:$dst),
                         "bcla $bo, $bi, $dst">;
+    let isAsmParserOnly = 1 in {
+      def gBCLat : BForm_3_at<16, 0, 1, (outs),
+                         (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
+                              condbrtarget:$dst),
+                              "bcl$at $bo, $bi, $dst">;
+      def gBCLAat : BForm_3_at<16, 1, 1, (outs),
+                          (ins u5imm:$bo, atimm:$at, crbitrc:$bi,
+                               abscondbrtarget:$dst),
+                               "bcla$at $bo, $bi, $dst">;
+    } // // isAsmParserOnly = 1
   }
   let Defs = [CTR], Uses = [CTR, LR, RM] in
     def gBCLR : XLForm_2<19, 16, 0, (outs),
@@ -4107,6 +4214,20 @@ let PPC970_Unit = 7 in {
                            (ins u5imm:$bo, crbitrc:$bi, i32imm:$bh),
                            "bcctrl $bo, $bi, $bh", IIC_BrB, []>;
 }
+
+multiclass BranchSimpleMnemonicAT<string pm, int at> {
+  def : InstAlias<"bc"#pm#" $bo, $bi, $dst", (gBCat u5imm:$bo, at, crbitrc:$bi,
+                                                    condbrtarget:$dst)>;
+  def : InstAlias<"bca"#pm#" $bo, $bi, $dst", (gBCAat u5imm:$bo, at, crbitrc:$bi,
+                                                      condbrtarget:$dst)>;
+  def : InstAlias<"bcl"#pm#" $bo, $bi, $dst", (gBCLat u5imm:$bo, at, crbitrc:$bi,
+                                                      condbrtarget:$dst)>;
+  def : InstAlias<"bcla"#pm#" $bo, $bi, $dst", (gBCLAat u5imm:$bo, at, crbitrc:$bi,
+                                                        condbrtarget:$dst)>;
+}
+defm : BranchSimpleMnemonicAT<"+", 3>;
+defm : BranchSimpleMnemonicAT<"-", 2>;
+
 def : InstAlias<"bclr $bo, $bi", (gBCLR u5imm:$bo, crbitrc:$bi, 0)>;
 def : InstAlias<"bclrl $bo, $bi", (gBCLRL u5imm:$bo, crbitrc:$bi, 0)>;
 def : InstAlias<"bcctr $bo, $bi", (gBCCTR u5imm:$bo, crbitrc:$bi, 0)>;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
index 43120070d799..4940c77c7ae5 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrQPX.td
@@ -88,11 +88,11 @@ def pre_truncstv4f32 : PatFrag<(ops node:$val, node:$base, node:$offset),
   return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4f32;
 }]>;
 
-def fround_inexact : PatFrag<(ops node:$val), (fround node:$val), [{
+def fround_inexact : PatFrag<(ops node:$val), (fpround node:$val), [{
   return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 0;
 }]>;
 
-def fround_exact : PatFrag<(ops node:$val), (fround node:$val), [{
+def fround_exact : PatFrag<(ops node:$val), (fpround node:$val), [{
   return cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() == 1;
 }]>;
 
@@ -311,11 +311,11 @@ let Uses = [RM] in {
 
   def QVFRIN : XForm_19<4, 392, (outs qfrc:$FRT), (ins qfrc:$FRB),
                         "qvfrin $FRT, $FRB", IIC_FPGeneral,
-                        [(set v4f64:$FRT, (frnd v4f64:$FRB))]>;
+                        [(set v4f64:$FRT, (fround v4f64:$FRB))]>;
   let isCodeGenOnly = 1 in
     def QVFRINs : XForm_19<4, 392, (outs qsrc:$FRT), (ins qsrc:$FRB),
                            "qvfrin $FRT, $FRB", IIC_FPGeneral,
-                           [(set v4f32:$FRT, (frnd v4f32:$FRB))]>;
+                           [(set v4f32:$FRT, (fround v4f32:$FRB))]>;
 
   def QVFRIP : XForm_19<4, 456, (outs qfrc:$FRT), (ins qfrc:$FRB),
                         "qvfrip $FRT, $FRB", IIC_FPGeneral,
@@ -1103,7 +1103,7 @@ def : Pat<(xor v4i1:$FRA, v4i1:$FRB),
 def : Pat<(not v4i1:$FRA),
           (QVFLOGICALb $FRA, $FRA, (i32 10))>;
 
-def : Pat<(v4f64 (fextend v4f32:$src)),
+def : Pat<(v4f64 (fpextend v4f32:$src)),
           (COPY_TO_REGCLASS $src, QFRC)>;
 
 def : Pat<(v4f32 (fround_exact v4f64:$src)),
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index a02ace00a76f..0d9e3459f47e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -89,22 +89,42 @@ multiclass XX3Form_Rcr<bits<6> opcode, bits<7> xo, string asmbase,
   }
 }
 
+// Instruction form with a single input register for instructions such as
+// XXPERMDI. The reason for defining this is that specifying multiple chained
+// operands (such as loads) to an instruction will perform both chained
+// operations rather than coalescing them into a single register - even though
+// the source memory location is the same. This simply forces the instruction
+// to use the same register for both inputs.
+// For example, an output DAG such as this:
+//   (XXPERMDI (LXSIBZX xoaddr:$src), (LXSIBZX xoaddr:$src ), 0))
+// would result in two load instructions emitted and used as separate inputs
+// to the XXPERMDI instruction.
+class XX3Form_2s<bits<6> opcode, bits<5> xo, dag OOL, dag IOL, string asmstr,
+                 InstrItinClass itin, list<dag> pattern>
+  : XX3Form_2<opcode, xo, OOL, IOL, asmstr, itin, pattern> {
+    let XB = XA;
+}
+
 def HasVSX : Predicate<"PPCSubTarget->hasVSX()">;
 def IsLittleEndian : Predicate<"PPCSubTarget->isLittleEndian()">;
 def IsBigEndian : Predicate<"!PPCSubTarget->isLittleEndian()">;
+def HasOnlySwappingMemOps : Predicate<"!PPCSubTarget->hasP9Vector()">;
 
 let Predicates = [HasVSX] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
+let UseVSXReg = 1 in {
 let hasSideEffects = 0 in { // VSX instructions don't have side effects.
 let Uses = [RM] in {
 
   // Load indexed instructions
   let mayLoad = 1 in {
+    let CodeSize = 3 in
     def LXSDX : XX1Form<31, 588,
                         (outs vsfrc:$XT), (ins memrr:$src),
                         "lxsdx $XT, $src", IIC_LdStLFD,
                         [(set f64:$XT, (load xoaddr:$src))]>;
 
+    let Predicates = [HasVSX, HasOnlySwappingMemOps] in
     def LXVD2X : XX1Form<31, 844,
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvd2x $XT, $src", IIC_LdStLFD,
@@ -114,6 +134,7 @@ let Uses = [RM] in {
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvdsx $XT, $src", IIC_LdStLFD, []>;
 
+    let Predicates = [HasVSX, HasOnlySwappingMemOps] in
     def LXVW4X : XX1Form<31, 780,
                          (outs vsrc:$XT), (ins memrr:$src),
                          "lxvw4x $XT, $src", IIC_LdStLFD,
@@ -122,21 +143,25 @@ let Uses = [RM] in {
 
   // Store indexed instructions
   let mayStore = 1 in {
+    let CodeSize = 3 in
     def STXSDX : XX1Form<31, 716,
                         (outs), (ins vsfrc:$XT, memrr:$dst),
                         "stxsdx $XT, $dst", IIC_LdStSTFD,
                         [(store f64:$XT, xoaddr:$dst)]>;
 
+    let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
+    // The behaviour of this instruction is endianness-specific so we provide no
+    // pattern to match it without considering endianness.
     def STXVD2X : XX1Form<31, 972,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvd2x $XT, $dst", IIC_LdStSTFD,
-                         [(store v2f64:$XT, xoaddr:$dst)]>;
+                         []>;
 
     def STXVW4X : XX1Form<31, 908,
                          (outs), (ins vsrc:$XT, memrr:$dst),
                          "stxvw4x $XT, $dst", IIC_LdStSTFD,
                          [(store v4i32:$XT, xoaddr:$dst)]>;
-
+    }
   } // mayStore
 
   // Add/Mul Instructions
@@ -545,18 +570,38 @@ let Uses = [RM] in {
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpsxds $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctidz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPSXDSs : XX2Form<60, 344,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpsxds $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctidz f32:$XB))]>;
   def XSCVDPSXWS : XX2Form<60, 88,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpsxws $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctiwz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPSXWSs : XX2Form<60, 88,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpsxws $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctiwz f32:$XB))]>;
   def XSCVDPUXDS : XX2Form<60, 328,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpuxds $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctiduz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPUXDSs : XX2Form<60, 328,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpuxds $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctiduz f32:$XB))]>;
   def XSCVDPUXWS : XX2Form<60, 72,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvdpuxws $XT, $XB", IIC_VecFP,
                       [(set f64:$XT, (PPCfctiwuz f64:$XB))]>;
+  let isCodeGenOnly = 1 in
+  def XSCVDPUXWSs : XX2Form<60, 72,
+                      (outs vssrc:$XT), (ins vssrc:$XB),
+                      "xscvdpuxws $XT, $XB", IIC_VecFP,
+                      [(set f32:$XT, (PPCfctiwuz f32:$XB))]>;
   def XSCVSPDP : XX2Form<60, 329,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xscvspdp $XT, $XB", IIC_VecFP, []>;
@@ -571,47 +616,55 @@ let Uses = [RM] in {
 
   def XVCVDPSP : XX2Form<60, 393,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvdpsp $XT, $XB", IIC_VecFP,
+                      [(set v4f32:$XT, (int_ppc_vsx_xvcvdpsp v2f64:$XB))]>;
   def XVCVDPSXDS : XX2Form<60, 472,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvdpsxds $XT, $XB", IIC_VecFP,
                       [(set v2i64:$XT, (fp_to_sint v2f64:$XB))]>;
   def XVCVDPSXWS : XX2Form<60, 216,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvdpsxws $XT, $XB", IIC_VecFP, []>;
+                      "xvcvdpsxws $XT, $XB", IIC_VecFP,
+                      [(set v4i32:$XT, (int_ppc_vsx_xvcvdpsxws v2f64:$XB))]>;
   def XVCVDPUXDS : XX2Form<60, 456,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvdpuxds $XT, $XB", IIC_VecFP,
                       [(set v2i64:$XT, (fp_to_uint v2f64:$XB))]>;
   def XVCVDPUXWS : XX2Form<60, 200,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvdpuxws $XT, $XB", IIC_VecFP, []>;
+                      "xvcvdpuxws $XT, $XB", IIC_VecFP,
+                      [(set v4i32:$XT, (int_ppc_vsx_xvcvdpuxws v2f64:$XB))]>;
 
   def XVCVSPDP : XX2Form<60, 457,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvspdp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvspdp $XT, $XB", IIC_VecFP,
+                      [(set v2f64:$XT, (int_ppc_vsx_xvcvspdp v4f32:$XB))]>;
   def XVCVSPSXDS : XX2Form<60, 408,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvspsxds $XT, $XB", IIC_VecFP, []>;
   def XVCVSPSXWS : XX2Form<60, 152,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvspsxws $XT, $XB", IIC_VecFP, []>;
+                      "xvcvspsxws $XT, $XB", IIC_VecFP,
+                      [(set v4i32:$XT, (fp_to_sint v4f32:$XB))]>;
   def XVCVSPUXDS : XX2Form<60, 392,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvspuxds $XT, $XB", IIC_VecFP, []>;
   def XVCVSPUXWS : XX2Form<60, 136,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvspuxws $XT, $XB", IIC_VecFP, []>;
+                      "xvcvspuxws $XT, $XB", IIC_VecFP,
+                      [(set v4i32:$XT, (fp_to_uint v4f32:$XB))]>;
   def XVCVSXDDP : XX2Form<60, 504,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvsxddp $XT, $XB", IIC_VecFP,
                       [(set v2f64:$XT, (sint_to_fp v2i64:$XB))]>;
   def XVCVSXDSP : XX2Form<60, 440,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvsxdsp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvsxdsp $XT, $XB", IIC_VecFP,
+                      [(set v4f32:$XT, (int_ppc_vsx_xvcvsxdsp v2i64:$XB))]>;
   def XVCVSXWDP : XX2Form<60, 248,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvsxwdp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvsxwdp $XT, $XB", IIC_VecFP,
+                      [(set v2f64:$XT, (int_ppc_vsx_xvcvsxwdp v4i32:$XB))]>;
   def XVCVSXWSP : XX2Form<60, 184,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvcvsxwsp $XT, $XB", IIC_VecFP,
@@ -622,19 +675,22 @@ let Uses = [RM] in {
                       [(set v2f64:$XT, (uint_to_fp v2i64:$XB))]>;
   def XVCVUXDSP : XX2Form<60, 424,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvuxdsp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvuxdsp $XT, $XB", IIC_VecFP,
+                      [(set v4f32:$XT, (int_ppc_vsx_xvcvuxdsp v2i64:$XB))]>;
   def XVCVUXWDP : XX2Form<60, 232,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvuxwdp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvuxwdp $XT, $XB", IIC_VecFP,
+                      [(set v2f64:$XT, (int_ppc_vsx_xvcvuxwdp v4i32:$XB))]>;
   def XVCVUXWSP : XX2Form<60, 168,
                       (outs vsrc:$XT), (ins vsrc:$XB),
-                      "xvcvuxwsp $XT, $XB", IIC_VecFP, []>;
+                      "xvcvuxwsp $XT, $XB", IIC_VecFP,
+                      [(set v4f32:$XT, (uint_to_fp v4i32:$XB))]>;
 
   // Rounding Instructions
   def XSRDPI : XX2Form<60, 73,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xsrdpi $XT, $XB", IIC_VecFP,
-                      [(set f64:$XT, (frnd f64:$XB))]>;
+                      [(set f64:$XT, (fround f64:$XB))]>;
   def XSRDPIC : XX2Form<60, 107,
                       (outs vsfrc:$XT), (ins vsfrc:$XB),
                       "xsrdpic $XT, $XB", IIC_VecFP,
@@ -655,7 +711,7 @@ let Uses = [RM] in {
   def XVRDPI : XX2Form<60, 201,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvrdpi $XT, $XB", IIC_VecFP,
-                      [(set v2f64:$XT, (frnd v2f64:$XB))]>;
+                      [(set v2f64:$XT, (fround v2f64:$XB))]>;
   def XVRDPIC : XX2Form<60, 235,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvrdpic $XT, $XB", IIC_VecFP,
@@ -676,7 +732,7 @@ let Uses = [RM] in {
   def XVRSPI : XX2Form<60, 137,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvrspi $XT, $XB", IIC_VecFP,
-                      [(set v4f32:$XT, (frnd v4f32:$XB))]>;
+                      [(set v4f32:$XT, (fround v4f32:$XB))]>;
   def XVRSPIC : XX2Form<60, 171,
                       (outs vsrc:$XT), (ins vsrc:$XB),
                       "xvrspic $XT, $XB", IIC_VecFP,
@@ -761,6 +817,21 @@ let Uses = [RM] in {
                        "xxlxor $XT, $XA, $XB", IIC_VecGeneral,
                        [(set v4i32:$XT, (xor v4i32:$XA, v4i32:$XB))]>;
   } // isCommutable
+  let isCodeGenOnly = 1 in
+  def XXLXORz : XX3Form_Zero<60, 154, (outs vsrc:$XT), (ins),
+                       "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
+                       [(set v4i32:$XT, (v4i32 immAllZerosV))]>;
+
+  let isCodeGenOnly = 1 in {
+    def XXLXORdpz : XX3Form_SetZero<60, 154,
+                         (outs vsfrc:$XT), (ins),
+                         "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
+                         [(set f64:$XT, (fpimm0))]>;
+    def XXLXORspz : XX3Form_SetZero<60, 154,
+                         (outs vssrc:$XT), (ins),
+                         "xxlxor $XT, $XT, $XT", IIC_VecGeneral,
+                         [(set f32:$XT, (fpimm0))]>;
+  }
 
   // Permutation Instructions
   def XXMRGHW : XX3Form<60, 18,
@@ -773,6 +844,9 @@ let Uses = [RM] in {
   def XXPERMDI : XX3Form_2<60, 10,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, u2imm:$DM),
                        "xxpermdi $XT, $XA, $XB, $DM", IIC_VecPerm, []>;
+  let isCodeGenOnly = 1 in
+  def XXPERMDIs : XX3Form_2s<60, 10, (outs vsrc:$XT), (ins vsfrc:$XA, u2imm:$DM),
+                             "xxpermdi $XT, $XA, $XA, $DM", IIC_VecPerm, []>;
   def XXSEL : XX4Form<60, 3,
                       (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB, vsrc:$XC),
                       "xxsel $XT, $XA, $XB, $XC", IIC_VecPerm, []>;
@@ -787,7 +861,12 @@ let Uses = [RM] in {
                        "xxspltw $XT, $XB, $UIM", IIC_VecPerm,
                        [(set v4i32:$XT,
                              (PPCxxsplt v4i32:$XB, imm32SExt16:$UIM))]>;
+  let isCodeGenOnly = 1 in
+  def XXSPLTWs : XX2Form_2<60, 164,
+                       (outs vsrc:$XT), (ins vfrc:$XB, u2imm:$UIM),
+                       "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
 } // hasSideEffects
+} // UseVSXReg = 1
 
 // SELECT_CC_* - Used to implement the SELECT_CC DAG operation.  Expanded after
 // instruction selection into a branch sequence.
@@ -839,9 +918,17 @@ def : InstAlias<"xxmrgld $XT, $XA, $XB",
                 (XXPERMDI vsrc:$XT, vsrc:$XA, vsrc:$XB, 3)>;
 def : InstAlias<"xxswapd $XT, $XB",
                 (XXPERMDI vsrc:$XT, vsrc:$XB, vsrc:$XB, 2)>;
+def : InstAlias<"xxspltd $XT, $XB, 0",
+                (XXPERMDIs vsrc:$XT, vsfrc:$XB, 0)>;
+def : InstAlias<"xxspltd $XT, $XB, 1",
+                (XXPERMDIs vsrc:$XT, vsfrc:$XB, 3)>;
+def : InstAlias<"xxswapd $XT, $XB",
+                (XXPERMDIs vsrc:$XT, vsfrc:$XB, 2)>;
 
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
+def : Pat<(v4i32 (vnot_ppc v4i32:$A)),
+          (v4i32 (XXLNOR $A, $A))>;
 let Predicates = [IsBigEndian] in {
 def : Pat<(v2f64 (scalar_to_vector f64:$A)),
           (v2f64 (SUBREG_TO_REG (i64 1), $A, sub_64))>;
@@ -948,18 +1035,27 @@ def : Pat<(v2f64 (PPCuvec2fp v4i32:$C, 1)),
           (v2f64 (XVCVUXWDP (v2i64 (XXMRGLW $C, $C))))>;
 
 // Loads.
-def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
-def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
-
-// Stores.
-def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
-          (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
-def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
-          (STXVW4X $rS, xoaddr:$dst)>;
-def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+let Predicates = [HasVSX, HasOnlySwappingMemOps] in {
+  def : Pat<(v2f64 (PPClxvd2x xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
+  // Stores.
+  def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+            (STXVD2X $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+            (STXVW4X $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvd2x_be v2f64:$rS, xoaddr:$dst),
+            (STXVD2X $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x_be v4i32:$rS, xoaddr:$dst),
+            (STXVW4X $rS, xoaddr:$dst)>;
+  def : Pat<(PPCstxvd2x v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+}
+let Predicates = [IsBigEndian, HasVSX, HasOnlySwappingMemOps] in {
+  def : Pat<(v2f64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+  def : Pat<(v2i64 (load xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+  def : Pat<(v4i32 (load xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+  def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+  def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVD2X $rS, xoaddr:$dst)>;
+}
 
 // Permutes.
 def : Pat<(v2f64 (PPCxxswapd v2f64:$src)), (XXPERMDI $src, $src, 2)>;
@@ -1054,6 +1150,22 @@ def : Pat<(f64 (PPCfcfidu (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
 } // AddedComplexity
 } // HasVSX
 
+def ScalarLoads {
+  dag Li8 =       (i32 (extloadi8 xoaddr:$src));
+  dag ZELi8 =     (i32 (zextloadi8 xoaddr:$src));
+  dag ZELi8i64 =  (i64 (zextloadi8 xoaddr:$src));
+  dag SELi8 =     (i32 (sext_inreg (extloadi8 xoaddr:$src), i8));
+  dag SELi8i64 =  (i64 (sext_inreg (extloadi8 xoaddr:$src), i8));
+
+  dag Li16 =      (i32 (extloadi16 xoaddr:$src));
+  dag ZELi16 =    (i32 (zextloadi16 xoaddr:$src));
+  dag ZELi16i64 = (i64 (zextloadi16 xoaddr:$src));
+  dag SELi16 =    (i32 (sextloadi16 xoaddr:$src));
+  dag SELi16i64 = (i64 (sextloadi16 xoaddr:$src));
+
+  dag Li32 = (i32 (load xoaddr:$src));
+}
+
 // The following VSX instructions were introduced in Power ISA 2.07
 /* FIXME: if the operands are v2i64, these patterns will not match.
    we should define new patterns or otherwise match the same patterns
@@ -1063,7 +1175,7 @@ def HasP8Vector : Predicate<"PPCSubTarget->hasP8Vector()">;
 def HasDirectMove : Predicate<"PPCSubTarget->hasDirectMove()">;
 let Predicates = [HasP8Vector] in {
 let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
-  let isCommutable = 1 in {
+  let isCommutable = 1, UseVSXReg = 1 in {
     def XXLEQV : XX3Form<60, 186,
                          (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                          "xxleqv $XT, $XA, $XB", IIC_VecGeneral,
@@ -1073,11 +1185,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           "xxlnand $XT, $XA, $XB", IIC_VecGeneral,
                           [(set v4i32:$XT, (vnot_ppc (and v4i32:$XA,
                                                     v4i32:$XB)))]>;
-  } // isCommutable
+  } // isCommutable, UseVSXReg
 
   def : Pat<(int_ppc_vsx_xxleqv v4i32:$A, v4i32:$B),
             (XXLEQV $A, $B)>;
 
+  let UseVSXReg = 1 in {
   def XXLORC : XX3Form<60, 170,
                        (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                        "xxlorc $XT, $XA, $XB", IIC_VecGeneral,
@@ -1085,6 +1198,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
   // VSX scalar loads introduced in ISA 2.07
   let mayLoad = 1 in {
+    let CodeSize = 3 in
     def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
                          "lxsspx $XT, $src", IIC_LdStLFD,
                          [(set f32:$XT, (load xoaddr:$src))]>;
@@ -1098,6 +1212,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
 
   // VSX scalar stores introduced in ISA 2.07
   let mayStore = 1 in {
+    let CodeSize = 3 in
     def STXSSPX : XX1Form<31, 652, (outs), (ins vssrc:$XT, memrr:$dst),
                           "stxsspx $XT, $dst", IIC_LdStSTFD,
                           [(store f32:$XT, xoaddr:$dst)]>;
@@ -1105,10 +1220,13 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           "stxsiwx $XT, $dst", IIC_LdStSTFD,
                           [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
   } // mayStore
+  } // UseVSXReg = 1
 
   def : Pat<(f64 (extloadf32 xoaddr:$src)),
             (COPY_TO_REGCLASS (LXSSPX xoaddr:$src), VSFRC)>;
-  def : Pat<(f64 (fextend f32:$src)),
+  def : Pat<(f32 (fpround (extloadf32 xoaddr:$src))),
+            (f32 (LXSSPX xoaddr:$src))>;
+  def : Pat<(f64 (fpextend f32:$src)),
             (COPY_TO_REGCLASS $src, VSFRC)>;
 
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETLT)),
@@ -1132,6 +1250,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
   def : Pat<(f32 (selectcc i1:$lhs, i1:$rhs, f32:$tval, f32:$fval, SETNE)),
             (SELECT_VSSRC (CRXOR $lhs, $rhs), $tval, $fval)>;
 
+  let UseVSXReg = 1 in {
   // VSX Elementary Scalar FP arithmetic (SP)
   let isCommutable = 1 in {
     def XSADDSP : XX3Form<60, 0,
@@ -1256,6 +1375,7 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
                           "xscvdpspn $XT, $XB", IIC_VecFP, []>;
   def XSCVSPDPN : XX2Form<60, 331, (outs vssrc:$XT), (ins vsrc:$XB),
                           "xscvspdpn $XT, $XB", IIC_VecFP, []>;
+  } // UseVSXReg = 1
 
   let Predicates = [IsLittleEndian] in {
   def : Pat<(f32 (PPCfcfids (PPCmtvsra (i64 (vector_extract v2i64:$S, 0))))),
@@ -1278,9 +1398,12 @@ let AddedComplexity = 400 in { // Prefer VSX patterns over non-VSX patterns.
   def : Pat<(f32 (PPCfcfidus (PPCmtvsra (i64 (vector_extract v2i64:$S, 1))))),
             (f32 (XSCVUXDSP (COPY_TO_REGCLASS (XXPERMDI $S, $S, 2), VSFRC)))>;
   }
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.Li32)),
+            (v4i32 (XXSPLTWs (LXSIWAX xoaddr:$src), 1))>;
 } // AddedComplexity = 400
 } // HasP8Vector
 
+let UseVSXReg = 1, AddedComplexity = 400 in {
 let Predicates = [HasDirectMove] in {
   // VSX direct move instructions
   def MFVSRD : XX1_RS6_RD5_XO<31, 51, (outs g8rc:$rA), (ins vsfrc:$XT),
@@ -1304,8 +1427,7 @@ let Predicates = [HasDirectMove] in {
 
 let Predicates = [IsISA3_0, HasDirectMove] in {
   def MTVSRWS: XX1_RS6_RD5_XO<31, 403, (outs vsrc:$XT), (ins gprc:$rA),
-                              "mtvsrws $XT, $rA", IIC_VecGeneral,
-                              []>;
+                              "mtvsrws $XT, $rA", IIC_VecGeneral, []>;
 
   def MTVSRDD: XX1Form<31, 435, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
                        "mtvsrdd $XT, $rA, $rB", IIC_VecGeneral,
@@ -1316,6 +1438,7 @@ let Predicates = [IsISA3_0, HasDirectMove] in {
                               []>, Requires<[In64BitMode]>;
 
 } // IsISA3_0, HasDirectMove
+} // UseVSXReg = 1
 
 /*  Direct moves of various widths from GPR's into VSR's. Each move lines
     the value up into element 0 (both BE and LE). Namely, entities smaller than
@@ -1626,6 +1749,7 @@ def VectorExtractions {
   dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC);
 }
 
+let AddedComplexity = 400 in {
 // v4f32 scalar <-> vector conversions (BE)
 let Predicates = [IsBigEndian, HasP8Vector] in {
   def : Pat<(v4f32 (scalar_to_vector f32:$A)),
@@ -1754,6 +1878,9 @@ let Predicates = [IsLittleEndian, HasVSX] in
   def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)),
             (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>;
 
+  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x_be xoaddr:$src)), (LXVW4X xoaddr:$src)>;
+  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x_be xoaddr:$src)), (LXVD2X xoaddr:$src)>;
+
 let Predicates = [IsLittleEndian, HasDirectMove] in {
   // v16i8 scalar <-> vector conversions (LE)
   def : Pat<(v16i8 (scalar_to_vector i32:$A)),
@@ -1864,6 +1991,11 @@ def : Pat<(f64 (bitconvert i64:$S)),
           (f64 (MTVSRD $S))>;
 }
 
+// Materialize a zero-vector of long long
+def : Pat<(v2i64 immAllZerosV),
+          (v2i64 (XXLXORz))>;
+}
+
 def AlignValues {
   dag F32_TO_BE_WORD1 = (v4f32 (XXSLDWI (XSCVDPSPN $B), (XSCVDPSPN $B), 3));
   dag I32_TO_BE_WORD1 = (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC);
@@ -1891,6 +2023,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
     : X_RD5_XO5_RS5<opcode, xo2, xo, (outs vrrc:$vT), (ins vbtype:$vB),
                     !strconcat(opc, " $vT, $vB"), IIC_VecFP, pattern>;
 
+  let UseVSXReg = 1 in {
   // [PO T XO B XO BX /]
   class XX2_RT5_XO5_XB6<bits<6> opcode, bits<5> xo2, bits<9> xo, string opc,
                         list<dag> pattern>
@@ -1909,6 +2042,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                   InstrItinClass itin, list<dag> pattern>
     : XX3Form<opcode, xo, (outs xty:$XT), (ins aty:$XA, bty:$XB),
               !strconcat(opc, " $XT, $XA, $XB"), itin, pattern>;
+  } // UseVSXReg = 1
 
   // [PO VRT VRA VRB XO /]
   class X_VT5_VA5_VB5<bits<6> opcode, bits<10> xo, string opc,
@@ -1977,7 +2111,8 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // DP/QP Compare Exponents
   def XSCMPEXPDP : XX3Form_1<60, 59,
                              (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
-                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>;
+                             "xscmpexpdp $crD, $XA, $XB", IIC_FPCompare, []>,
+                   UseVSXReg;
   def XSCMPEXPQP : X_BF3_VA5_VB5<63, 164, "xscmpexpqp", []>;
 
   // DP Compare ==, >=, >, !=
@@ -1991,6 +2126,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                                   IIC_FPCompare, []>;
   def XSCMPNEDP : XX3_XT5_XA5_XB5<60, 27, "xscmpnedp", vsrc, vsfrc, vsfrc,
                                   IIC_FPCompare, []>;
+  let UseVSXReg = 1 in {
   // Vector Compare Not Equal
   def XVCMPNEDP  : XX3Form_Rc<60, 123,
                               (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
@@ -2008,12 +2144,13 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
                               (outs vsrc:$XT), (ins vsrc:$XA, vsrc:$XB),
                               "xvcmpnesp. $XT, $XA, $XB", IIC_VecFPCompare, []>,
                               isDOT;
+  } // UseVSXReg = 1
 
   //===--------------------------------------------------------------------===//
   // Quad-Precision Floating-Point Conversion Instructions:
 
   // Convert DP -> QP
-  def XSCVDPQP  : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vsfrc, []>;
+  def XSCVDPQP  : X_VT5_XO5_VB5_TyVB<63, 22, 836, "xscvdpqp", vfrc, []>;
 
   // Round & Convert QP -> DP (dword[1] is set to zero)
   def XSCVQPDP  : X_VT5_XO5_VB5   <63, 20, 836, "xscvqpdp" , []>;
@@ -2026,9 +2163,10 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def XSCVQPUWZ : X_VT5_XO5_VB5<63,  1, 836, "xscvqpuwz", []>;
 
   // Convert (Un)Signed DWord -> QP
-  def XSCVSDQP  : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vsfrc, []>;
-  def XSCVUDQP  : X_VT5_XO5_VB5_TyVB<63,  2, 836, "xscvudqp", vsfrc, []>;
+  def XSCVSDQP  : X_VT5_XO5_VB5_TyVB<63, 10, 836, "xscvsdqp", vfrc, []>;
+  def XSCVUDQP  : X_VT5_XO5_VB5_TyVB<63,  2, 836, "xscvudqp", vfrc, []>;
 
+  let UseVSXReg = 1 in {
   //===--------------------------------------------------------------------===//
   // Round to Floating-Point Integer Instructions
 
@@ -2041,7 +2179,17 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Vector HP -> SP
   def XVCVHPSP : XX2_XT6_XO5_XB6<60, 24, 475, "xvcvhpsp", vsrc, []>;
-  def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc, []>;
+  def XVCVSPHP : XX2_XT6_XO5_XB6<60, 25, 475, "xvcvsphp", vsrc,
+                                 [(set v4f32:$XT,
+                                     (int_ppc_vsx_xvcvsphp v4f32:$XB))]>;
+
+  } // UseVSXReg = 1
+
+  // Pattern for matching Vector HP -> Vector SP intrinsic. Defined as a
+  // seperate pattern so that it can convert the input register class from
+  // VRRC(v8i16) to VSRC.
+  def : Pat<(v4f32 (int_ppc_vsx_xvcvhpsp v8i16:$A)),
+            (v4f32 (XVCVHPSP (COPY_TO_REGCLASS $A, VSRC)))>;
 
   class Z23_VT5_R1_VB5_RMC2_EX1<bits<6> opcode, bits<8> xo, bit ex, string opc,
                                 list<dag> pattern>
@@ -2064,7 +2212,7 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // Insert Exponent DP/QP
   // XT NOTE: XT.dword[1] = 0xUUUU_UUUU_UUUU_UUUU
   def XSIEXPDP : XX1Form <60, 918, (outs vsrc:$XT), (ins g8rc:$rA, g8rc:$rB),
-                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>;
+                          "xsiexpdp $XT, $rA, $rB", IIC_VecFP, []>, UseVSXReg;
   // vB NOTE: only vB.dword[0] is used, that's why we don't use
   //          X_VT5_VA5_VB5 form
   def XSIEXPQP : XForm_18<63, 868, (outs vrrc:$vT), (ins vrrc:$vA, vsfrc:$vB),
@@ -2073,10 +2221,12 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   // Extract Exponent/Significand DP/QP
   def XSXEXPDP : XX2_RT5_XO5_XB6<60,  0, 347, "xsxexpdp", []>;
   def XSXSIGDP : XX2_RT5_XO5_XB6<60,  1, 347, "xsxsigdp", []>;
+
   def XSXEXPQP : X_VT5_XO5_VB5  <63,  2, 804, "xsxexpqp", []>;
   def XSXSIGQP : X_VT5_XO5_VB5  <63, 18, 804, "xsxsigqp", []>;
 
   // Vector Insert Word
+  let UseVSXReg = 1 in {
   // XB NOTE: Only XB.dword[1] is used, but we use vsrc on XB.
   def XXINSERTW   :
     XX2_RD6_UIM5_RS6<60, 181, (outs vsrc:$XT),
@@ -2090,39 +2240,64 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def XXEXTRACTUW : XX2_RD6_UIM5_RS6<60, 165,
                                   (outs vsfrc:$XT), (ins vsrc:$XB, u4imm:$UIMM),
                                   "xxextractuw $XT, $XB, $UIMM", IIC_VecFP, []>;
+  } // UseVSXReg = 1
 
   // Vector Insert Exponent DP/SP
   def XVIEXPDP : XX3_XT5_XA5_XB5<60, 248, "xviexpdp", vsrc, vsrc, vsrc,
-                                 IIC_VecFP, []>;
+    IIC_VecFP, [(set v2f64: $XT,(int_ppc_vsx_xviexpdp v2i64:$XA, v2i64:$XB))]>;
   def XVIEXPSP : XX3_XT5_XA5_XB5<60, 216, "xviexpsp", vsrc, vsrc, vsrc,
-                                 IIC_VecFP, []>;
+    IIC_VecFP, [(set v4f32: $XT,(int_ppc_vsx_xviexpsp v4i32:$XA, v4i32:$XB))]>;
 
   // Vector Extract Exponent/Significand DP/SP
-  def XVXEXPDP : XX2_XT6_XO5_XB6<60,  0, 475, "xvxexpdp", vsrc, []>;
-  def XVXEXPSP : XX2_XT6_XO5_XB6<60,  8, 475, "xvxexpsp", vsrc, []>;
-  def XVXSIGDP : XX2_XT6_XO5_XB6<60,  1, 475, "xvxsigdp", vsrc, []>;
-  def XVXSIGSP : XX2_XT6_XO5_XB6<60,  9, 475, "xvxsigsp", vsrc, []>;
+  def XVXEXPDP : XX2_XT6_XO5_XB6<60,  0, 475, "xvxexpdp", vsrc,
+                                 [(set v2i64: $XT,
+                                  (int_ppc_vsx_xvxexpdp v2f64:$XB))]>;
+  def XVXEXPSP : XX2_XT6_XO5_XB6<60,  8, 475, "xvxexpsp", vsrc,
+                                 [(set v4i32: $XT,
+                                  (int_ppc_vsx_xvxexpsp v4f32:$XB))]>;
+  def XVXSIGDP : XX2_XT6_XO5_XB6<60,  1, 475, "xvxsigdp", vsrc,
+                                 [(set v2i64: $XT,
+                                  (int_ppc_vsx_xvxsigdp v2f64:$XB))]>;
+  def XVXSIGSP : XX2_XT6_XO5_XB6<60,  9, 475, "xvxsigsp", vsrc,
+                                 [(set v4i32: $XT,
+                                  (int_ppc_vsx_xvxsigsp v4f32:$XB))]>;
+
+  let AddedComplexity = 400, Predicates = [HasP9Vector] in {
+  // Extra patterns expanding to vector Extract Word/Insert Word
+  def : Pat<(v4i32 (int_ppc_vsx_xxinsertw v4i32:$A, v2i64:$B, imm:$IMM)),
+            (v4i32 (XXINSERTW $A, $B, imm:$IMM))>;
+  def : Pat<(v2i64 (int_ppc_vsx_xxextractuw v2i64:$A, imm:$IMM)),
+            (v2i64 (COPY_TO_REGCLASS (XXEXTRACTUW $A, imm:$IMM), VSRC))>;
+  } // AddedComplexity = 400, HasP9Vector
 
   //===--------------------------------------------------------------------===//
 
   // Test Data Class SP/DP/QP
+  let UseVSXReg = 1 in {
   def XSTSTDCSP : XX2_BF3_DCMX7_RS6<60, 298,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcsp $BF, $XB, $DCMX", IIC_VecFP, []>;
   def XSTSTDCDP : XX2_BF3_DCMX7_RS6<60, 362,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vsfrc:$XB),
                               "xststdcdp $BF, $XB, $DCMX", IIC_VecFP, []>;
+  } // UseVSXReg = 1
   def XSTSTDCQP : X_BF3_DCMX7_RS5  <63, 708,
                               (outs crrc:$BF), (ins u7imm:$DCMX, vrrc:$vB),
                               "xststdcqp $BF, $vB, $DCMX", IIC_VecFP, []>;
 
   // Vector Test Data Class SP/DP
+  let UseVSXReg = 1 in {
   def XVTSTDCSP : XX2_RD6_DCMX7_RS6<60, 13, 5,
                               (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
-                              "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP, []>;
+                              "xvtstdcsp $XT, $XB, $DCMX", IIC_VecFP,
+                              [(set v4i32: $XT,
+                               (int_ppc_vsx_xvtstdcsp v4f32:$XB, imm:$DCMX))]>;
   def XVTSTDCDP : XX2_RD6_DCMX7_RS6<60, 15, 5,
                               (outs vsrc:$XT), (ins u7imm:$DCMX, vsrc:$XB),
-                              "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP, []>;
+                              "xvtstdcdp $XT, $XB, $DCMX", IIC_VecFP,
+                              [(set v2i64: $XT,
+                               (int_ppc_vsx_xvtstdcdp v2f64:$XB, imm:$DCMX))]>;
+  } // UseVSXReg = 1
 
   //===--------------------------------------------------------------------===//
 
@@ -2153,20 +2328,22 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
 
   // Vector Splat Immediate Byte
   def XXSPLTIB : X_RD6_IMM8<60, 360, (outs vsrc:$XT), (ins u8imm:$IMM8),
-                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>;
+                            "xxspltib $XT, $IMM8", IIC_VecPerm, []>, UseVSXReg;
 
   //===--------------------------------------------------------------------===//
   // Vector/Scalar Load/Store Instructions
 
+  // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+  // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
   let mayLoad = 1 in {
   // Load Vector
   def LXV : DQ_RD6_RS5_DQ12<61, 1, (outs vsrc:$XT), (ins memrix16:$src),
-                            "lxv $XT, $src", IIC_LdStLFD, []>;
+                            "lxv $XT, $src", IIC_LdStLFD, []>, UseVSXReg;
   // Load DWord
-  def LXSD  : DSForm_1<57, 2, (outs vrrc:$vD), (ins memrix:$src),
+  def LXSD  : DSForm_1<57, 2, (outs vfrc:$vD), (ins memrix:$src),
                        "lxsd $vD, $src", IIC_LdStLFD, []>;
   // Load SP from src, convert it to DP, and place in dword[0]
-  def LXSSP : DSForm_1<57, 3, (outs vrrc:$vD), (ins memrix:$src),
+  def LXSSP : DSForm_1<57, 3, (outs vfrc:$vD), (ins memrix:$src),
                        "lxssp $vD, $src", IIC_LdStLFD, []>;
 
   // [PO T RA RB XO TX] almost equal to [PO S RA RB XO SX], but has different
@@ -2174,59 +2351,83 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   class X_XT6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form<opcode, xo, (outs vtype:$XT), (ins memrr:$src),
-              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>;
+              !strconcat(opc, " $XT, $src"), IIC_LdStLFD, pattern>, UseVSXReg;
 
   // Load as Integer Byte/Halfword & Zero Indexed
-  def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc, []>;
-  def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc, []>;
+  def LXSIBZX : X_XT6_RA5_RB5<31, 781, "lxsibzx", vsfrc,
+                              [(set f64:$XT, (PPClxsizx xoaddr:$src, 1))]>;
+  def LXSIHZX : X_XT6_RA5_RB5<31, 813, "lxsihzx", vsfrc,
+                              [(set f64:$XT, (PPClxsizx xoaddr:$src, 2))]>;
 
   // Load Vector Halfword*8/Byte*16 Indexed
   def LXVH8X  : X_XT6_RA5_RB5<31, 812, "lxvh8x" , vsrc, []>;
   def LXVB16X : X_XT6_RA5_RB5<31, 876, "lxvb16x", vsrc, []>;
 
   // Load Vector Indexed
-  def LXVX    : X_XT6_RA5_RB5<31, 268, "lxvx"   , vsrc, []>;
+  def LXVX    : X_XT6_RA5_RB5<31, 268, "lxvx"   , vsrc,
+                [(set v2f64:$XT, (load xoaddr:$src))]>;
 
   // Load Vector (Left-justified) with Length
-  def LXVL    : X_XT6_RA5_RB5<31, 269, "lxvl"   , vsrc, []>;
-  def LXVLL   : X_XT6_RA5_RB5<31, 301, "lxvll"  , vsrc, []>;
+  def LXVL : XX1Form<31, 269, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
+                   "lxvl $XT, $src, $rB", IIC_LdStLoad,
+                   [(set v4i32:$XT, (int_ppc_vsx_lxvl addr:$src, i64:$rB))]>,
+                    UseVSXReg;
+  def LXVLL : XX1Form<31,301, (outs vsrc:$XT), (ins memr:$src, g8rc:$rB),
+                   "lxvll $XT, $src, $rB", IIC_LdStLoad,
+                   [(set v4i32:$XT, (int_ppc_vsx_lxvll addr:$src, i64:$rB))]>,
+                    UseVSXReg;
 
   // Load Vector Word & Splat Indexed
   def LXVWSX  : X_XT6_RA5_RB5<31, 364, "lxvwsx" , vsrc, []>;
-  } // end mayLoad
+  } // mayLoad
 
+  // When adding new D-Form loads/stores, be sure to update the ImmToIdxMap in
+  // PPCRegisterInfo::PPCRegisterInfo and maybe save yourself some debugging.
   let mayStore = 1 in {
   // Store Vector
   def STXV : DQ_RD6_RS5_DQ12<61, 5, (outs), (ins vsrc:$XT, memrix16:$dst),
-                             "stxv $XT, $dst", IIC_LdStSTFD, []>;
+                             "stxv $XT, $dst", IIC_LdStSTFD, []>, UseVSXReg;
   // Store DWord
-  def STXSD  : DSForm_1<61, 2, (outs), (ins vrrc:$vS, memrix:$dst),
+  def STXSD  : DSForm_1<61, 2, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxsd $vS, $dst", IIC_LdStSTFD, []>;
   // Convert DP of dword[0] to SP, and Store to dst
-  def STXSSP : DSForm_1<61, 3, (outs), (ins vrrc:$vS, memrix:$dst),
+  def STXSSP : DSForm_1<61, 3, (outs), (ins vfrc:$vS, memrix:$dst),
                         "stxssp $vS, $dst", IIC_LdStSTFD, []>;
 
   // [PO S RA RB XO SX]
   class X_XS6_RA5_RB5<bits<6> opcode, bits<10> xo, string opc,
                       RegisterOperand vtype, list<dag> pattern>
     : XX1Form<opcode, xo, (outs), (ins vtype:$XT, memrr:$dst),
-              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>;
+              !strconcat(opc, " $XT, $dst"), IIC_LdStSTFD, pattern>, UseVSXReg;
 
   // Store as Integer Byte/Halfword Indexed
-  def STXSIBX  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsfrc, []>;
-  def STXSIHX  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsfrc, []>;
+  def STXSIBX  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vsfrc,
+                               [(PPCstxsix f64:$XT, xoaddr:$dst, 1)]>;
+  def STXSIHX  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vsfrc,
+                               [(PPCstxsix f64:$XT, xoaddr:$dst, 2)]>;
+  let isCodeGenOnly = 1 in {
+    def STXSIBXv  : X_XS6_RA5_RB5<31,  909, "stxsibx" , vrrc, []>;
+    def STXSIHXv  : X_XS6_RA5_RB5<31,  941, "stxsihx" , vrrc, []>;
+  }
 
   // Store Vector Halfword*8/Byte*16 Indexed
   def STXVH8X  : X_XS6_RA5_RB5<31,  940, "stxvh8x" , vsrc, []>;
   def STXVB16X : X_XS6_RA5_RB5<31, 1004, "stxvb16x", vsrc, []>;
 
   // Store Vector Indexed
-  def STXVX    : X_XS6_RA5_RB5<31,  396, "stxvx"   , vsrc, []>;
+  def STXVX    : X_XS6_RA5_RB5<31,  396, "stxvx"   , vsrc,
+                 [(store v2f64:$XT, xoaddr:$dst)]>;
 
   // Store Vector (Left-justified) with Length
-  def STXVL    : X_XS6_RA5_RB5<31,  397, "stxvl"   , vsrc, []>;
-  def STXVLL   : X_XS6_RA5_RB5<31,  429, "stxvll"  , vsrc, []>;
-  } // end mayStore
+  def STXVL : XX1Form<31, 397, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB),
+                   "stxvl $XT, $dst, $rB", IIC_LdStLoad,
+                   [(int_ppc_vsx_stxvl v4i32:$XT, addr:$dst, i64:$rB)]>,
+                    UseVSXReg;
+  def STXVLL : XX1Form<31, 429, (outs), (ins vsrc:$XT, memr:$dst, g8rc:$rB),
+                   "stxvll $XT, $dst, $rB", IIC_LdStLoad,
+                   [(int_ppc_vsx_stxvll v4i32:$XT, addr:$dst, i64:$rB)]>,
+                    UseVSXReg;
+  } // mayStore
 
   // Patterns for which instructions from ISA 3.0 are a better match
   let Predicates = [IsLittleEndian, HasP9Vector] in {
@@ -2282,4 +2483,442 @@ let AddedComplexity = 400, Predicates = [HasP9Vector] in {
   def : Pat<(v4f32 (insertelt v4f32:$A, f32:$B, 3)),
             (v4f32 (XXINSERTW v4f32:$A, AlignValues.F32_TO_BE_WORD1, 12))>;
   } // IsLittleEndian, HasP9Vector
+
+  def : Pat<(v2f64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+  def : Pat<(v2i64 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+  def : Pat<(v4f32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+  def : Pat<(v4i32 (load xoaddr:$src)), (LXVX xoaddr:$src)>;
+  def : Pat<(v4i32 (int_ppc_vsx_lxvw4x xoaddr:$src)), (LXVX xoaddr:$src)>;
+  def : Pat<(v2f64 (int_ppc_vsx_lxvd2x xoaddr:$src)), (LXVX xoaddr:$src)>;
+  def : Pat<(store v2f64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+  def : Pat<(store v2i64:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+  def : Pat<(store v4f32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+  def : Pat<(store v4i32:$rS, xoaddr:$dst), (STXVX $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvw4x v4i32:$rS, xoaddr:$dst),
+            (STXVX $rS, xoaddr:$dst)>;
+  def : Pat<(int_ppc_vsx_stxvd2x v2f64:$rS, xoaddr:$dst),
+            (STXVX $rS, xoaddr:$dst)>;
+
+  def : Pat<(v4i32 (scalar_to_vector (i32 (load xoaddr:$src)))),
+            (v4i32 (LXVWSX xoaddr:$src))>;
+  def : Pat<(v4f32 (scalar_to_vector (f32 (load xoaddr:$src)))),
+            (v4f32 (LXVWSX xoaddr:$src))>;
+  def : Pat<(v4f32 (scalar_to_vector (f32 (fpround (extloadf32 xoaddr:$src))))),
+            (v4f32 (LXVWSX xoaddr:$src))>;
+
+  // Build vectors from i8 loads
+  def : Pat<(v16i8 (scalar_to_vector ScalarLoads.Li8)),
+            (v16i8 (VSPLTBs 7, (LXSIBZX xoaddr:$src)))>;
+  def : Pat<(v8i16 (scalar_to_vector ScalarLoads.ZELi8)),
+            (v8i16 (VSPLTHs 3, (LXSIBZX xoaddr:$src)))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi8)),
+           (v4i32 (XXSPLTWs (LXSIBZX xoaddr:$src), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi8i64)),
+            (v2i64 (XXPERMDIs (LXSIBZX xoaddr:$src), 0))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi8)),
+            (v4i32 (XXSPLTWs (VEXTSB2Ws (LXSIBZX xoaddr:$src)), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi8i64)),
+            (v2i64 (XXPERMDIs (VEXTSB2Ds (LXSIBZX xoaddr:$src)), 0))>;
+
+  // Build vectors from i16 loads
+  def : Pat<(v8i16 (scalar_to_vector ScalarLoads.Li16)),
+            (v8i16 (VSPLTHs 3, (LXSIHZX xoaddr:$src)))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.ZELi16)),
+            (v4i32 (XXSPLTWs (LXSIHZX xoaddr:$src), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.ZELi16i64)),
+           (v2i64 (XXPERMDIs (LXSIHZX xoaddr:$src), 0))>;
+  def : Pat<(v4i32 (scalar_to_vector ScalarLoads.SELi16)),
+            (v4i32 (XXSPLTWs (VEXTSH2Ws (LXSIHZX xoaddr:$src)), 1))>;
+  def : Pat<(v2i64 (scalar_to_vector ScalarLoads.SELi16i64)),
+            (v2i64 (XXPERMDIs (VEXTSH2Ds (LXSIHZX xoaddr:$src)), 0))>;
+
+  let Predicates = [IsBigEndian, HasP9Vector] in {
+  // Scalar stores of i8
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+            (STXSIBXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+
+  // Scalar stores of i16
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+            (STXSIHXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+  } // IsBigEndian, HasP9Vector
+
+  let Predicates = [IsLittleEndian, HasP9Vector] in {
+  // Scalar stores of i8
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 0)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 1)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 7), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 2)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 3)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 5), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 4)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 5)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 3), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 6)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 7)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 1), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 8)), xoaddr:$dst),
+            (STXSIBXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 9)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 15), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 10)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 11)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 13), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 12)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 13)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 11), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 14)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  def : Pat<(truncstorei8 (i32 (vector_extract v16i8:$S, 15)), xoaddr:$dst),
+            (STXSIBXv (VSLDOI $S, $S, 9), xoaddr:$dst)>;
+
+  // Scalar stores of i16
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 0)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 8), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 1)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 6), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 2)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 4), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 3)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 2), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 4)), xoaddr:$dst),
+            (STXSIHXv $S, xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 5)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 14), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 6)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 12), xoaddr:$dst)>;
+  def : Pat<(truncstorei16 (i32 (vector_extract v8i16:$S, 7)), xoaddr:$dst),
+            (STXSIHXv (VSLDOI $S, $S, 10), xoaddr:$dst)>;
+  } // IsLittleEndian, HasP9Vector
+
+
+  // Vector sign extensions
+  def : Pat<(f64 (PPCVexts f64:$A, 1)),
+            (f64 (COPY_TO_REGCLASS (VEXTSB2Ds $A), VSFRC))>;
+  def : Pat<(f64 (PPCVexts f64:$A, 2)),
+            (f64 (COPY_TO_REGCLASS (VEXTSH2Ds $A), VSFRC))>;
+
+  let isPseudo = 1 in {
+    def DFLOADf32  : Pseudo<(outs vssrc:$XT), (ins memrix:$src),
+                            "#DFLOADf32",
+                            [(set f32:$XT, (load iaddr:$src))]>;
+    def DFLOADf64  : Pseudo<(outs vsfrc:$XT), (ins memrix:$src),
+                            "#DFLOADf64",
+                            [(set f64:$XT, (load iaddr:$src))]>;
+    def DFSTOREf32 : Pseudo<(outs), (ins vssrc:$XT, memrix:$dst),
+                            "#DFSTOREf32",
+                            [(store f32:$XT, iaddr:$dst)]>;
+    def DFSTOREf64 : Pseudo<(outs), (ins vsfrc:$XT, memrix:$dst),
+                            "#DFSTOREf64",
+                            [(store f64:$XT, iaddr:$dst)]>;
+  }
+  def : Pat<(f64 (extloadf32 iaddr:$src)),
+            (COPY_TO_REGCLASS (DFLOADf32 iaddr:$src), VSFRC)>;
+  def : Pat<(f32 (fpround (extloadf32 iaddr:$src))),
+            (f32 (DFLOADf32 iaddr:$src))>;
 } // end HasP9Vector, AddedComplexity
+
+// Integer extend helper dags 32 -> 64
+def AnyExts {
+  dag A = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $A, sub_32);
+  dag B = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $B, sub_32);
+  dag C = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $C, sub_32);
+  dag D = (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $D, sub_32);
+}
+
+def DblToFlt {
+  dag A0 = (f32 (fpround (f64 (extractelt v2f64:$A, 0))));
+  dag A1 = (f32 (fpround (f64 (extractelt v2f64:$A, 1))));
+  dag B0 = (f32 (fpround (f64 (extractelt v2f64:$B, 0))));
+  dag B1 = (f32 (fpround (f64 (extractelt v2f64:$B, 1))));
+}
+def FltToIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToUIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToLongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToULongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (extloadf32 xoaddr:$A)))));
+}
+def FltToLong {
+  dag A = (i64 (PPCmfvsr (PPCfctidz (fpextend f32:$A))));
+}
+def FltToULong {
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (fpextend f32:$A))));
+}
+def DblToInt {
+  dag A = (i32 (PPCmfvsr (f64 (PPCfctiwz f64:$A))));
+}
+def DblToUInt {
+  dag A = (i32 (PPCmfvsr (f64 (PPCfctiwuz f64:$A))));
+}
+def DblToLong {
+  dag A = (i64 (PPCmfvsr (f64 (PPCfctidz f64:$A))));
+}
+def DblToULong {
+  dag A = (i64 (PPCmfvsr (f64 (PPCfctiduz f64:$A))));
+}
+def DblToIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwz (f64 (load xoaddr:$A)))));
+}
+def DblToUIntLoad {
+  dag A = (i32 (PPCmfvsr (PPCfctiwuz (f64 (load xoaddr:$A)))));
+}
+def DblToLongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctidz (f64 (load xoaddr:$A)))));
+}
+def DblToULongLoad {
+  dag A = (i64 (PPCmfvsr (PPCfctiduz (f64 (load xoaddr:$A)))));
+}
+
+// FP merge dags (for f32 -> v4f32)
+def MrgFP {
+  dag AC = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $A, VSRC),
+                               (COPY_TO_REGCLASS $C, VSRC), 0));
+  dag BD = (XVCVDPSP (XXPERMDI (COPY_TO_REGCLASS $B, VSRC),
+                               (COPY_TO_REGCLASS $D, VSRC), 0));
+  dag ABhToFlt = (XVCVDPSP (XXPERMDI $A, $B, 0));
+  dag ABlToFlt = (XVCVDPSP (XXPERMDI $A, $B, 3));
+  dag BAhToFlt = (XVCVDPSP (XXPERMDI $B, $A, 0));
+  dag BAlToFlt = (XVCVDPSP (XXPERMDI $B, $A, 3));
+}
+
+// Patterns for BUILD_VECTOR nodes.
+def NoP9Vector : Predicate<"!PPCSubTarget->hasP9Vector()">;
+let AddedComplexity = 400 in {
+
+  let Predicates = [HasVSX] in {
+    // Build vectors of floating point converted to i32.
+    def : Pat<(v4i32 (build_vector DblToInt.A, DblToInt.A,
+                                   DblToInt.A, DblToInt.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPSXWS $A), VSRC), 1))>;
+    def : Pat<(v4i32 (build_vector DblToUInt.A, DblToUInt.A,
+                                   DblToUInt.A, DblToUInt.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS (XSCVDPUXWS $A), VSRC), 1))>;
+    def : Pat<(v2i64 (build_vector DblToLong.A, DblToLong.A)),
+              (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC),
+                               (COPY_TO_REGCLASS (XSCVDPSXDS $A), VSRC), 0))>;
+    def : Pat<(v2i64 (build_vector DblToULong.A, DblToULong.A)),
+              (v2i64 (XXPERMDI (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC),
+                               (COPY_TO_REGCLASS (XSCVDPUXDS $A), VSRC), 0))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPSXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPUXWSs (LXSSPX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4f32 (build_vector f32:$A, f32:$A, f32:$A, f32:$A)),
+              (v4f32 (XXSPLTW (v4f32 (XSCVDPSPN $A)), 0))>;
+
+    // Build vectors of floating point converted to i64.
+    def : Pat<(v2i64 (build_vector FltToLong.A, FltToLong.A)),
+              (v2i64 (XXPERMDIs
+                       (COPY_TO_REGCLASS (XSCVDPSXDSs $A), VSFRC), 0))>;
+    def : Pat<(v2i64 (build_vector FltToULong.A, FltToULong.A)),
+              (v2i64 (XXPERMDIs
+                       (COPY_TO_REGCLASS (XSCVDPUXDSs $A), VSFRC), 0))>;
+    def : Pat<(v2i64 (scalar_to_vector DblToLongLoad.A)),
+              (v2i64 (XVCVDPSXDS (LXVDSX xoaddr:$A)))>;
+    def : Pat<(v2i64 (scalar_to_vector DblToULongLoad.A)),
+              (v2i64 (XVCVDPUXDS (LXVDSX xoaddr:$A)))>;
+  }
+
+  let Predicates = [HasVSX, NoP9Vector] in {
+    // Load-and-splat with fp-to-int conversion (using X-Form VSX loads).
+    def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPSXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPUXWS (LXSDX xoaddr:$A)), VSRC), 1))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
+                                              (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
+                                              (LXSSPX xoaddr:$A), VSFRC)), 0))>;
+  }
+
+  // Big endian, available on all targets with VSX
+  let Predicates = [IsBigEndian, HasVSX] in {
+    def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+              (v2f64 (XXPERMDI
+                        (COPY_TO_REGCLASS $A, VSRC),
+                        (COPY_TO_REGCLASS $B, VSRC), 0))>;
+
+    def : Pat<(v4f32 (build_vector f32:$A, f32:$B, f32:$C, f32:$D)),
+              (VMRGEW MrgFP.AC, MrgFP.BD)>;
+    def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+                                   DblToFlt.B0, DblToFlt.B1)),
+              (v4f32 (VMRGEW MrgFP.ABhToFlt, MrgFP.ABlToFlt))>;
+  }
+
+  let Predicates = [IsLittleEndian, HasVSX] in {
+  // Little endian, available on all targets with VSX
+    def : Pat<(v2f64 (build_vector f64:$A, f64:$B)),
+              (v2f64 (XXPERMDI
+                        (COPY_TO_REGCLASS $B, VSRC),
+                        (COPY_TO_REGCLASS $A, VSRC), 0))>;
+
+    def : Pat<(v4f32 (build_vector f32:$D, f32:$C, f32:$B, f32:$A)),
+              (VMRGEW MrgFP.AC, MrgFP.BD)>;
+    def : Pat<(v4f32 (build_vector DblToFlt.A0, DblToFlt.A1,
+                                   DblToFlt.B0, DblToFlt.B1)),
+              (v4f32 (VMRGEW MrgFP.BAhToFlt, MrgFP.BAlToFlt))>;
+  }
+
+  let Predicates = [HasDirectMove] in {
+    // Endianness-neutral constant splat on P8 and newer targets. The reason
+    // for this pattern is that on targets with direct moves, we don't expand
+    // BUILD_VECTOR nodes for v4i32.
+    def : Pat<(v4i32 (build_vector immSExt5NonZero:$A, immSExt5NonZero:$A,
+                                   immSExt5NonZero:$A, immSExt5NonZero:$A)),
+              (v4i32 (VSPLTISW imm:$A))>;
+  }
+
+  let Predicates = [IsBigEndian, HasDirectMove, NoP9Vector] in {
+    // Big endian integer vectors using direct moves.
+    def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+              (v2i64 (XXPERMDI
+                        (COPY_TO_REGCLASS (MTVSRD $A), VSRC),
+                        (COPY_TO_REGCLASS (MTVSRD $B), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC), 0),
+                      (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+              (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+  }
+
+  let Predicates = [IsLittleEndian, HasDirectMove, NoP9Vector] in {
+    // Little endian integer vectors using direct moves.
+    def : Pat<(v2i64 (build_vector i64:$A, i64:$B)),
+              (v2i64 (XXPERMDI
+                        (COPY_TO_REGCLASS (MTVSRD $B), VSRC),
+                        (COPY_TO_REGCLASS (MTVSRD $A), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $D), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $B), VSRC), 0),
+                      (XXPERMDI (COPY_TO_REGCLASS (MTVSRWZ $C), VSRC),
+                                   (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 0))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+              (XXSPLTW (COPY_TO_REGCLASS (MTVSRWZ $A), VSRC), 1)>;
+  }
+
+  let Predicates = [HasP9Vector] in {
+    // Endianness-neutral patterns for const splats with ISA 3.0 instructions.
+    def : Pat<(v4i32 (scalar_to_vector i32:$A)),
+              (v4i32 (MTVSRWS $A))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$A, i32:$A, i32:$A)),
+              (v4i32 (MTVSRWS $A))>;
+    def : Pat<(v16i8 (build_vector immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
+                                   immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
+                                   immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
+                                   immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
+                                   immAnyExt8:$A, immAnyExt8:$A, immAnyExt8:$A,
+                                   immAnyExt8:$A)),
+              (v16i8 (COPY_TO_REGCLASS (XXSPLTIB imm:$A), VSRC))>;
+    def : Pat<(v16i8 immAllOnesV),
+              (v16i8 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
+    def : Pat<(v8i16 immAllOnesV),
+              (v8i16 (COPY_TO_REGCLASS (XXSPLTIB 255), VSRC))>;
+    def : Pat<(v4i32 immAllOnesV),
+              (v4i32 (XXSPLTIB 255))>;
+    def : Pat<(v2i64 immAllOnesV),
+              (v2i64 (XXSPLTIB 255))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToIntLoad.A)),
+              (v4i32 (XVCVSPSXWS (LXVWSX xoaddr:$A)))>;
+    def : Pat<(v4i32 (scalar_to_vector FltToUIntLoad.A)),
+              (v4i32 (XVCVSPUXWS (LXVWSX xoaddr:$A)))>;
+    def : Pat<(v4i32 (scalar_to_vector DblToIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPSXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
+    def : Pat<(v4i32 (scalar_to_vector DblToUIntLoad.A)),
+              (v4i32 (XXSPLTW (COPY_TO_REGCLASS
+                                (XSCVDPUXWS (DFLOADf64 iaddr:$A)), VSRC), 1))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToLongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPSXDS (COPY_TO_REGCLASS
+                                              (DFLOADf32 iaddr:$A),
+                                              VSFRC)), 0))>;
+    def : Pat<(v2i64 (scalar_to_vector FltToULongLoad.A)),
+              (v2i64 (XXPERMDIs (XSCVDPUXDS (COPY_TO_REGCLASS
+                                              (DFLOADf32 iaddr:$A),
+                                              VSFRC)), 0))>;
+  }
+
+  let Predicates = [IsISA3_0, HasDirectMove, IsBigEndian] in {
+    def : Pat<(i64 (extractelt v2i64:$A, 1)),
+              (i64 (MFVSRLD $A))>;
+    // Better way to build integer vectors if we have MTVSRDD. Big endian.
+    def : Pat<(v2i64 (build_vector i64:$rB, i64:$rA)),
+              (v2i64 (MTVSRDD $rB, $rA))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.A, AnyExts.C), VSRC),
+                      (COPY_TO_REGCLASS (MTVSRDD AnyExts.B, AnyExts.D), VSRC))>;
+  }
+
+  let Predicates = [IsISA3_0, HasDirectMove, IsLittleEndian] in {
+    def : Pat<(i64 (extractelt v2i64:$A, 0)),
+              (i64 (MFVSRLD $A))>;
+    // Better way to build integer vectors if we have MTVSRDD. Little endian.
+    def : Pat<(v2i64 (build_vector i64:$rA, i64:$rB)),
+              (v2i64 (MTVSRDD $rB, $rA))>;
+    def : Pat<(v4i32 (build_vector i32:$A, i32:$B, i32:$C, i32:$D)),
+              (VMRGOW (COPY_TO_REGCLASS (MTVSRDD AnyExts.D, AnyExts.B), VSRC),
+                      (COPY_TO_REGCLASS (MTVSRDD AnyExts.C, AnyExts.A), VSRC))>;
+  }
+}
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
index 48a71cfc2a6e..2c3e75523e8f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCLoopPreIncPrep.cpp
@@ -20,31 +20,38 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "ppc-loop-preinc-prep"
+
 #include "PPC.h"
+#include "PPCSubtarget.h"
 #include "PPCTargetMachine.h"
 #include "llvm/ADT/DepthFirstIterator.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/CodeMetrics.h"
-#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <cassert>
+#include <iterator>
+#include <utility>
+
 using namespace llvm;
 
 // By default, we limit this to creating 16 PHIs (which is a little over half
@@ -54,14 +61,17 @@ static cl::opt<unsigned> MaxVars("ppc-preinc-prep-max-vars",
   cl::desc("Potential PHI threshold for PPC preinc loop prep"));
 
 namespace llvm {
+
   void initializePPCLoopPreIncPrepPass(PassRegistry&);
-}
+
+} // end namespace llvm
 
 namespace {
 
   class PPCLoopPreIncPrep : public FunctionPass {
   public:
     static char ID; // Pass ID, replacement for typeid
+
     PPCLoopPreIncPrep() : FunctionPass(ID), TM(nullptr) {
       initializePPCLoopPreIncPrepPass(*PassRegistry::getPassRegistry());
     }
@@ -89,7 +99,8 @@ namespace {
     ScalarEvolution *SE;
     bool PreserveLCSSA;
   };
-}
+
+} // end anonymous namespace
 
 char PPCLoopPreIncPrep::ID = 0;
 static const char *name = "Prepare loop for pre-inc. addressing modes";
@@ -103,6 +114,7 @@ FunctionPass *llvm::createPPCLoopPreIncPrepPass(PPCTargetMachine &TM) {
 }
 
 namespace {
+
   struct BucketElement {
     BucketElement(const SCEVConstant *O, Instruction *I) : Offset(O), Instr(I) {}
     BucketElement(Instruction *I) : Offset(nullptr), Instr(I) {}
@@ -118,7 +130,8 @@ namespace {
     const SCEV *BaseSCEV;
     SmallVector<BucketElement, 16> Elements;
   };
-}
+
+} // end anonymous namespace
 
 static bool IsPtrInBounds(Value *BasePtr) {
   Value *StrippedBasePtr = BasePtr;
@@ -140,7 +153,7 @@ static Value *GetPointerOperand(Value *MemI) {
       return IMemI->getArgOperand(0);
   }
 
-  return 0;
+  return nullptr;
 }
 
 bool PPCLoopPreIncPrep::runOnFunction(Function &F) {
@@ -394,7 +407,7 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
         Instruction *PtrIP = dyn_cast<Instruction>(Ptr);
         if (PtrIP && isa<Instruction>(NewBasePtr) &&
             cast<Instruction>(NewBasePtr)->getParent() == PtrIP->getParent())
-          PtrIP = 0;
+          PtrIP = nullptr;
         else if (isa<PHINode>(PtrIP))
           PtrIP = &*PtrIP->getParent()->getFirstInsertionPt();
         else if (!PtrIP)
@@ -437,4 +450,3 @@ bool PPCLoopPreIncPrep::runOnLoop(Loop *L) {
 
   return MadeChange;
 }
-
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 18377a44a7f8..e527b018d4fb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -34,10 +34,10 @@ static MachineModuleInfoMachO &getMachOMMI(AsmPrinter &AP) {
   return AP.MMI->getObjFileInfo<MachineModuleInfoMachO>();
 }
 
-
-static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
+static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO,
+                                      AsmPrinter &AP) {
   const TargetMachine &TM = AP.TM;
-  Mangler *Mang = AP.Mang;
+  Mangler &Mang = TM.getObjFileLowering()->getMangler();
   const DataLayout &DL = AP.getDataLayout();
   MCContext &Ctx = AP.OutContext;
 
@@ -54,7 +54,7 @@ static MCSymbol *GetSymbolFromOperand(const MachineOperand &MO, AsmPrinter &AP){
     Mangler::getNameWithPrefix(Name, MO.getSymbolName(), DL);
   } else {
     const GlobalValue *GV = MO.getGlobal();
-    TM.getNameWithPrefix(Name, GV, *Mang);
+    TM.getNameWithPrefix(Name, GV, Mang);
   }
 
   Name += Suffix;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
index a57a83d7aa93..2413af3f7042 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp
@@ -124,10 +124,40 @@ bool PPCMIPeephole::simplifyCode(void) {
           if (TrueReg1 == TrueReg2
               && TargetRegisterInfo::isVirtualRegister(TrueReg1)) {
             MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
+            unsigned DefOpc = DefMI ? DefMI->getOpcode() : 0;
+
+            // If this is a splat fed by a splatting load, the splat is
+            // redundant. Replace with a copy. This doesn't happen directly due
+            // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
+            // a load of a double to a vector of 64-bit integers.
+            auto isConversionOfLoadAndSplat = [=]() -> bool {
+              if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
+                return false;
+              unsigned DefReg = lookThruCopyLike(DefMI->getOperand(1).getReg());
+              if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+                MachineInstr *LoadMI = MRI->getVRegDef(DefReg);
+                if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
+                  return true;
+              }
+              return false;
+            };
+            if (DefMI && (Immed == 0 || Immed == 3)) {
+              if (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat()) {
+                DEBUG(dbgs()
+                      << "Optimizing load-and-splat/splat "
+                      "to load-and-splat/copy: ");
+                DEBUG(MI.dump());
+                BuildMI(MBB, &MI, MI.getDebugLoc(),
+                        TII->get(PPC::COPY), MI.getOperand(0).getReg())
+                  .addOperand(MI.getOperand(1));
+                ToErase = &MI;
+                Simplified = true;
+              }
+            }
 
             // If this is a splat or a swap fed by another splat, we
             // can replace it with a copy.
-            if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+            if (DefOpc == PPC::XXPERMDI) {
               unsigned FeedImmed = DefMI->getOperand(3).getImm();
               unsigned FeedReg1
                 = lookThruCopyLike(DefMI->getOperand(1).getReg());
@@ -170,14 +200,144 @@ bool PPCMIPeephole::simplifyCode(void) {
                 ToErase = &MI;
                 Simplified = true;
               }
+            } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
+                       (DefMI->getOperand(2).getImm() == 0 ||
+                        DefMI->getOperand(2).getImm() == 3)) {
+              // Splat fed by another splat - switch the output of the first
+              // and remove the second.
+              DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
+              ToErase = &MI;
+              Simplified = true;
+              DEBUG(dbgs() << "Removing redundant splat: ");
+              DEBUG(MI.dump());
+            }
+          }
+        }
+        break;
+      }
+      case PPC::VSPLTB:
+      case PPC::VSPLTH:
+      case PPC::XXSPLTW: {
+        unsigned MyOpcode = MI.getOpcode();
+        unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
+        unsigned TrueReg = lookThruCopyLike(MI.getOperand(OpNo).getReg());
+        if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
+          break;
+        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
+        if (!DefMI)
+          break;
+        unsigned DefOpcode = DefMI->getOpcode();
+        auto isConvertOfSplat = [=]() -> bool {
+          if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
+            return false;
+          unsigned ConvReg = DefMI->getOperand(1).getReg();
+          if (!TargetRegisterInfo::isVirtualRegister(ConvReg))
+            return false;
+          MachineInstr *Splt = MRI->getVRegDef(ConvReg);
+          return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
+            Splt->getOpcode() == PPC::XXSPLTW);
+        };
+        bool AlreadySplat = (MyOpcode == DefOpcode) ||
+          (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
+          (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
+          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
+          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
+          (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
+          (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
+        // If the instruction[s] that feed this splat have already splat
+        // the value, this splat is redundant.
+        if (AlreadySplat) {
+          DEBUG(dbgs() << "Changing redundant splat to a copy: ");
+          DEBUG(MI.dump());
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
+                  MI.getOperand(0).getReg())
+              .addOperand(MI.getOperand(OpNo));
+          ToErase = &MI;
+          Simplified = true;
+        }
+        // Splat fed by a shift. Usually when we align value to splat into
+        // vector element zero.
+        if (DefOpcode == PPC::XXSLDWI) {
+          unsigned ShiftRes = DefMI->getOperand(0).getReg();
+          unsigned ShiftOp1 = DefMI->getOperand(1).getReg();
+          unsigned ShiftOp2 = DefMI->getOperand(2).getReg();
+          unsigned ShiftImm = DefMI->getOperand(3).getImm();
+          unsigned SplatImm = MI.getOperand(2).getImm();
+          if (ShiftOp1 == ShiftOp2) {
+            unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
+            if (MRI->hasOneNonDBGUse(ShiftRes)) {
+              DEBUG(dbgs() << "Removing redundant shift: ");
+              DEBUG(DefMI->dump());
+              ToErase = DefMI;
             }
+            Simplified = true;
+            DEBUG(dbgs() << "Changing splat immediate from " << SplatImm <<
+                  " to " << NewElem << " in instruction: ");
+            DEBUG(MI.dump());
+            MI.getOperand(1).setReg(ShiftOp1);
+            MI.getOperand(2).setImm(NewElem);
           }
         }
         break;
       }
+      case PPC::XVCVDPSP: {
+        // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
+        unsigned TrueReg = lookThruCopyLike(MI.getOperand(1).getReg());
+        if (!TargetRegisterInfo::isVirtualRegister(TrueReg))
+          break;
+        MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
+
+        // This can occur when building a vector of single precision or integer
+        // values.
+        if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
+          unsigned DefsReg1 = lookThruCopyLike(DefMI->getOperand(1).getReg());
+          unsigned DefsReg2 = lookThruCopyLike(DefMI->getOperand(2).getReg());
+          if (!TargetRegisterInfo::isVirtualRegister(DefsReg1) ||
+              !TargetRegisterInfo::isVirtualRegister(DefsReg2))
+            break;
+          MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
+          MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
+
+          if (!P1 || !P2)
+            break;
+
+          // Remove the passed FRSP instruction if it only feeds this MI and
+          // set any uses of that FRSP (in this MI) to the source of the FRSP.
+          auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
+            if (RoundInstr->getOpcode() == PPC::FRSP &&
+                MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
+              Simplified = true;
+              unsigned ConvReg1 = RoundInstr->getOperand(1).getReg();
+              unsigned FRSPDefines = RoundInstr->getOperand(0).getReg();
+              MachineInstr &Use = *(MRI->use_instr_begin(FRSPDefines));
+              for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
+                if (Use.getOperand(i).isReg() &&
+                    Use.getOperand(i).getReg() == FRSPDefines)
+                  Use.getOperand(i).setReg(ConvReg1);
+              DEBUG(dbgs() << "Removing redundant FRSP:\n");
+              DEBUG(RoundInstr->dump());
+              DEBUG(dbgs() << "As it feeds instruction:\n");
+              DEBUG(MI.dump());
+              DEBUG(dbgs() << "Through instruction:\n");
+              DEBUG(DefMI->dump());
+              RoundInstr->eraseFromParent();
+            }
+          };
+
+          // If the input to XVCVDPSP is a vector that was built (even
+          // partially) out of FRSP's, the FRSP(s) can safely be removed
+          // since this instruction performs the same operation.
+          if (P1 != P2) {
+            removeFRSPIfPossible(P1);
+            removeFRSPIfPossible(P2);
+            break;
+          }
+          removeFRSPIfPossible(P1);
+        }
+        break;
+      }
       }
     }
-
     // If the last instruction was marked for elimination,
     // remove it now.
     if (ToErase) {
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
index bfe20c12974b..8a18ab9e0e9a 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCQPXLoadSplat.cpp
@@ -44,7 +44,7 @@ namespace {
 
     bool runOnMachineFunction(MachineFunction &Fn) override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "PowerPC QPX Load Splat Simplification";
     }
   };
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
index f0161a03d2d4..e49201402861 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -78,6 +78,18 @@ PPCRegisterInfo::PPCRegisterInfo(const PPCTargetMachine &TM)
   ImmToIdxMap[PPC::STB8] = PPC::STBX8; ImmToIdxMap[PPC::STH8] = PPC::STHX8;
   ImmToIdxMap[PPC::STW8] = PPC::STWX8; ImmToIdxMap[PPC::STDU] = PPC::STDUX;
   ImmToIdxMap[PPC::ADDI8] = PPC::ADD8;
+
+  // VSX
+  ImmToIdxMap[PPC::DFLOADf32] = PPC::LXSSPX;
+  ImmToIdxMap[PPC::DFLOADf64] = PPC::LXSDX;
+  ImmToIdxMap[PPC::DFSTOREf32] = PPC::STXSSPX;
+  ImmToIdxMap[PPC::DFSTOREf64] = PPC::STXSDX;
+  ImmToIdxMap[PPC::LXV] = PPC::LXVX;
+  ImmToIdxMap[PPC::LXSD] = PPC::LXSDX;
+  ImmToIdxMap[PPC::LXSSP] = PPC::LXSSPX;
+  ImmToIdxMap[PPC::STXV] = PPC::STXVX;
+  ImmToIdxMap[PPC::STXSD] = PPC::STXSDX;
+  ImmToIdxMap[PPC::STXSSP] = PPC::STXSSPX;
 }
 
 /// getPointerRegClass - Return the register class to use to hold pointers.
@@ -303,7 +315,6 @@ unsigned PPCRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case PPC::VRRCRegClassID:
   case PPC::VFRCRegClassID:
   case PPC::VSLRCRegClassID:
-  case PPC::VSHRCRegClassID:
     return 32 - DefaultSafety;
   case PPC::VSRCRegClassID:
   case PPC::VSFRCRegClassID:
@@ -352,7 +363,7 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   // Get the basic block's function.
   MachineFunction &MF = *MBB.getParent();
   // Get the frame info.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   // Get the instruction info.
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
@@ -361,14 +372,14 @@ void PPCRegisterInfo::lowerDynamicAlloc(MachineBasicBlock::iterator II) const {
   DebugLoc dl = MI.getDebugLoc();
 
   // Get the maximum call stack size.
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+  unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
   // Get the total frame size.
-  unsigned FrameSize = MFI->getStackSize();
+  unsigned FrameSize = MFI.getStackSize();
 
   // Get stack alignments.
   const PPCFrameLowering *TFI = getFrameLowering(MF);
   unsigned TargetAlign = TFI->getStackAlignment();
-  unsigned MaxAlign = MFI->getMaxAlignment();
+  unsigned MaxAlign = MFI.getMaxAlignment();
   assert((maxCallFrameSize & (MaxAlign-1)) == 0 &&
          "Maximum call-frame size not sufficiently aligned");
 
@@ -466,12 +477,12 @@ void PPCRegisterInfo::lowerDynamicAreaOffset(
   // Get the basic block's function.
   MachineFunction &MF = *MBB.getParent();
   // Get the frame info.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
   // Get the instruction info.
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
 
-  unsigned maxCallFrameSize = MFI->getMaxCallFrameSize();
+  unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
   DebugLoc dl = MI.getDebugLoc();
   BuildMI(MBB, II, dl, TII.get(PPC::LI), MI.getOperand(0).getReg())
       .addImm(maxCallFrameSize);
@@ -787,7 +798,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // Get the instruction info.
   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
   // Get the frame info.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   DebugLoc dl = MI.getDebugLoc();
 
   unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum);
@@ -848,7 +859,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                    OpC != TargetOpcode::PATCHPOINT && !ImmToIdxMap.count(OpC);
 
   // Now add the frame object offset to the offset from r1.
-  int Offset = MFI->getObjectOffset(FrameIndex);
+  int Offset = MFI.getObjectOffset(FrameIndex);
   Offset += MI.getOperand(OffsetOperandNo).getImm();
 
   // If we're not using a Frame Pointer that has been set to the value of the
@@ -859,7 +870,7 @@ PPCRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // functions.
   if (!MF.getFunction()->hasFnAttribute(Attribute::Naked)) {
     if (!(hasBasePointer(MF) && FrameIndex < 0))
-      Offset += MFI->getStackSize();
+      Offset += MFI.getStackSize();
   }
 
   // If we can, encode the offset directly into the instruction.  If this is a
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
index 459502eeb2e9..4a96327fe552 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -75,7 +75,7 @@ public:
 
   /// Code Generation virtual methods...
   const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
-  const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
+  const MCPhysReg *getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID CC) const override;
   const uint32_t *getNoPreservedMask() const override;
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
index e5f363c443cd..896cec7e4f6e 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -17,7 +17,6 @@ def sub_eq : SubRegIndex<1, 2>;
 def sub_un : SubRegIndex<1, 3>;
 def sub_32 : SubRegIndex<32>;
 def sub_64 : SubRegIndex<64>;
-def sub_128 : SubRegIndex<128>;
 }
 
 
@@ -79,15 +78,6 @@ class VSRL<FPR SubReg, string n> : PPCReg<n> {
   let SubRegIndices = [sub_64];
 }
 
-// VSRH - One of the 32 128-bit VSX registers that overlap with the vector
-// registers.
-class VSRH<VR SubReg, string n> : PPCReg<n> {
-  let HWEncoding{4-0} = SubReg.HWEncoding{4-0};
-  let HWEncoding{5} = 1;
-  let SubRegs = [SubReg];
-  let SubRegIndices = [sub_128];
-}
-
 // CR - One of the 8 4-bit condition registers
 class CR<bits<3> num, string n, list<Register> subregs> : PPCReg<n> {
   let HWEncoding{2-0} = num;
@@ -116,9 +106,12 @@ foreach Index = 0-31 in {
                 DwarfRegNum<[!add(Index, 32), !add(Index, 32)]>;
 }
 
-// Floating-point vector subregisters (for VSX)
+// 64-bit Floating-point subregisters of Altivec registers
+// Note: the register names are v0-v31 or vs32-vs63 depending on the use.
+//       Custom C++ code is used to produce the correct name and encoding.
 foreach Index = 0-31 in {
-  def VF#Index : VF<Index, "vs" # !add(Index, 32)>;
+  def VF#Index : VF<Index, "v" #Index>,
+                 DwarfRegNum<[!add(Index, 77), !add(Index, 77)]>;
 }
 
 // QPX Floating-point registers
@@ -138,9 +131,11 @@ foreach Index = 0-31 in {
   def VSL#Index : VSRL<!cast<FPR>("F"#Index), "vs"#Index>,
                   DwarfRegAlias<!cast<FPR>("F"#Index)>;
 }
-foreach Index = 0-31 in {
-  def VSH#Index : VSRH<!cast<VR>("V"#Index), "vs" # !add(Index, 32)>,
-                  DwarfRegAlias<!cast<VR>("V"#Index)>;
+
+// Dummy VSX registers, this defines string: "vs32"-"vs63", and is only used for
+// asm printing.
+foreach Index = 32-63 in {
+  def VSX#Index : PPCReg<"vs"#Index>;
 }
 
 // The reprsentation of r0 when treated as the constant 0.
@@ -288,7 +283,7 @@ def F8RC : RegisterClass<"PPC", [f64], 64, (add (sequence "F%u", 0, 13),
                                                 (sequence "F%u", 31, 14))>;
 def F4RC : RegisterClass<"PPC", [f32], 32, (add F8RC)>;
 
-def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
+def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32,v2f64], 128,
                          (add V2, V3, V4, V5, V0, V1, V6, V7, V8, V9, V10, V11,
                              V12, V13, V14, V15, V16, V17, V18, V19, V31, V30,
                              V29, V28, V27, V26, V25, V24, V23, V22, V21, V20)>;
@@ -298,14 +293,8 @@ def VRRC : RegisterClass<"PPC", [v16i8,v8i16,v4i32,v2i64,v1i128,v4f32], 128,
 def VSLRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
                           (add (sequence "VSL%u", 0, 13),
                                (sequence "VSL%u", 31, 14))>;
-def VSHRC : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
-                          (add VSH2, VSH3, VSH4, VSH5, VSH0, VSH1, VSH6, VSH7,
-			       VSH8, VSH9, VSH10, VSH11, VSH12, VSH13, VSH14,
-                               VSH15, VSH16, VSH17, VSH18, VSH19, VSH31, VSH30,
-                               VSH29, VSH28, VSH27, VSH26, VSH25, VSH24, VSH23,
-                               VSH22, VSH21, VSH20)>;
 def VSRC  : RegisterClass<"PPC", [v4i32,v4f32,v2f64,v2i64], 128,
-                          (add VSLRC, VSHRC)>;
+                          (add VSLRC, VRRC)>;
 
 // Register classes for the 64-bit "scalar" VSX subregisters.
 def VFRC :  RegisterClass<"PPC", [f64], 64,
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
index b4d72eff2b85..edabe7748673 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSchedule.td
@@ -109,6 +109,7 @@ def IIC_SprSLBIE     : InstrItinClass;
 def IIC_SprSLBIEG    : InstrItinClass;
 def IIC_SprSLBMTE    : InstrItinClass;
 def IIC_SprSLBMFEE   : InstrItinClass;
+def IIC_SprSLBMFEV   : InstrItinClass;
 def IIC_SprSLBIA     : InstrItinClass;
 def IIC_SprSLBSYNC   : InstrItinClass;
 def IIC_SprTLBIA     : InstrItinClass;
@@ -128,6 +129,7 @@ include "PPCScheduleG4Plus.td"
 include "PPCScheduleG5.td"
 include "PPCScheduleP7.td"
 include "PPCScheduleP8.td"
+include "PPCScheduleP9.td"
 include "PPCScheduleA2.td"
 include "PPCScheduleE500mc.td"
 include "PPCScheduleE5500.td"
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
new file mode 100644
index 000000000000..a9c1bd78b05e
--- /dev/null
+++ b/contrib/llvm/lib/Target/PowerPC/PPCScheduleP9.td
@@ -0,0 +1,335 @@
+//===-- PPCScheduleP9.td - PPC P9 Scheduling Definitions ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the itinerary class data for the POWER9 processor.
+//
+//===----------------------------------------------------------------------===//
+include "PPCInstrInfo.td"
+
+def P9Model : SchedMachineModel {
+  let IssueWidth = 8;
+
+  let LoadLatency = 5;
+
+  let MispredictPenalty = 16;
+
+  // Try to make sure we have at least 10 dispatch groups in a loop.
+  let LoopMicroOpBufferSize = 60;
+
+  let CompleteModel = 0;
+
+}
+
+let SchedModel = P9Model in {
+
+  // ***************** Processor Resources *****************
+
+  //Dispatcher:
+  def DISPATCHER : ProcResource<12>;
+
+  // Issue Ports
+  def IP_AGEN : ProcResource<4>;
+  def IP_EXEC : ProcResource<4>;
+  def IP_EXECE : ProcResource<2> {
+    //Even Exec Ports
+    let Super = IP_EXEC;
+  }
+  def IP_EXECO : ProcResource<2> {
+    //Odd Exec Ports
+    let Super = IP_EXEC;
+  }
+
+  // Pipeline Groups
+  def ALU : ProcResource<4>;
+  def ALUE : ProcResource<2> {
+    //Even ALU pipelines
+    let Super = ALU;
+  }
+  def ALUO : ProcResource<2> {
+    //Odd ALU pipelines
+    let Super = ALU;
+  }
+  def DIV : ProcResource<2>;
+  def DP : ProcResource<4>;
+  def DPE : ProcResource<2> {
+    //Even DP pipelines
+    let Super = DP;
+  }
+  def DPO : ProcResource<2> {
+    //Odd DP pipelines
+    let Super = DP;
+  }
+  def LS : ProcResource<4>;
+  def PM : ProcResource<2>;
+  def DFU : ProcResource<1>;
+
+  def TestGroup : ProcResGroup<[ALU, DP]>;
+
+  // ***************** SchedWriteRes Definitions *****************
+
+  //Dispatcher
+  def DISP_1C : SchedWriteRes<[DISPATCHER]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+
+  // Issue Ports
+  def IP_AGEN_1C : SchedWriteRes<[IP_AGEN]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+
+  def IP_EXEC_1C : SchedWriteRes<[IP_EXEC]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+
+  def IP_EXECE_1C : SchedWriteRes<[IP_EXECE]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+
+  def IP_EXECO_1C : SchedWriteRes<[IP_EXECO]> {
+    let NumMicroOps = 0;
+    let Latency = 1;
+  }
+
+  //Pipeline Groups
+  def P9_ALU_2C : SchedWriteRes<[ALU]> {
+    let Latency = 2;
+  }
+
+  def P9_ALUE_2C : SchedWriteRes<[ALUE]> {
+    let Latency = 2;
+  }
+
+  def P9_ALUO_2C : SchedWriteRes<[ALUO]> {
+    let Latency = 2;
+  }
+
+  def P9_ALU_3C : SchedWriteRes<[ALU]> {
+    let Latency = 3;
+  }
+
+  def P9_ALUE_3C : SchedWriteRes<[ALUE]> {
+    let Latency = 3;
+  }
+
+  def P9_ALUO_3C : SchedWriteRes<[ALUO]> {
+    let Latency = 3;
+  }
+
+  def P9_ALU_4C : SchedWriteRes<[ALU]> {
+    let Latency = 4;
+  }
+
+  def P9_ALUE_4C : SchedWriteRes<[ALUE]> {
+    let Latency = 4;
+  }
+
+  def P9_ALUO_4C : SchedWriteRes<[ALUO]> {
+    let Latency = 4;
+  }
+
+  def P9_ALU_5C : SchedWriteRes<[ALU]> {
+    let Latency = 5;
+  }
+
+  def P9_ALU_6C : SchedWriteRes<[ALU]> {
+    let Latency = 6;
+  }
+
+  def P9_DIV_16C_8 : SchedWriteRes<[DIV]> {
+    let ResourceCycles = [8];
+    let Latency = 16;
+  }
+
+  def P9_DIV_24C_8 : SchedWriteRes<[DIV]> {
+    let ResourceCycles = [8];
+    let Latency = 24;
+  }
+
+  def P9_DIV_40C_8 : SchedWriteRes<[DIV]> {
+    let ResourceCycles = [8];
+    let Latency = 40;
+  }
+
+  def P9_DP_2C : SchedWriteRes<[DP]> {
+    let Latency = 2;
+  }
+
+  def P9_DP_5C : SchedWriteRes<[DP]> {
+    let Latency = 5;
+  }
+
+  def P9_DP_7C : SchedWriteRes<[DP]> {
+    let Latency = 7;
+  }
+
+  def P9_DPE_7C : SchedWriteRes<[DPE]> {
+    let Latency = 7;
+  }
+
+  def P9_DPO_7C : SchedWriteRes<[DPO]> {
+    let Latency = 7;
+  }
+
+  def P9_DP_22C_5 : SchedWriteRes<[DP]> {
+    let ResourceCycles = [5];
+    let Latency = 22;
+  }
+
+  def P9_DP_24C_8 : SchedWriteRes<[DP]> {
+    let ResourceCycles = [8];
+    let Latency = 24;
+  }
+
+  def P9_DP_26C_5 : SchedWriteRes<[DP]> {
+    let ResourceCycles = [5];
+    let Latency = 22;
+  }
+
+  def P9_DP_27C_7 : SchedWriteRes<[DP]> {
+    let ResourceCycles = [7];
+    let Latency = 27;
+  }
+
+  def P9_DP_33C_8 : SchedWriteRes<[DP]> {
+    let ResourceCycles = [8];
+    let Latency = 33;
+  }
+
+  def P9_DP_36C_10 : SchedWriteRes<[DP]> {
+    let ResourceCycles = [10];
+    let Latency = 36;
+  }
+
+  def P9_PM_3C : SchedWriteRes<[PM]> {
+    let Latency = 3;
+  }
+
+  def P9_PM_7C : SchedWriteRes<[PM]> {
+    let Latency = 3;
+  }
+
+  def P9_LS_1C : SchedWriteRes<[LS]> {
+    let Latency = 1;
+  }
+
+  def P9_LS_4C : SchedWriteRes<[LS]> {
+    let Latency = 4;
+  }
+
+  def P9_LS_5C : SchedWriteRes<[LS]> {
+    let Latency = 5;
+  }
+
+  def P9_DFU_12C : SchedWriteRes<[DFU]> {
+    let Latency = 12;
+  }
+
+  def P9_DFU_24C : SchedWriteRes<[DFU]> {
+    let Latency = 24;
+    let ResourceCycles = [12];
+  }
+
+  def P9_DFU_58C : SchedWriteRes<[DFU]> {
+    let Latency = 58;
+    let ResourceCycles = [44];
+  }
+
+  def P9_DFU_76C : SchedWriteRes<[TestGroup, DFU]> {
+    let Latency = 76;
+    let ResourceCycles = [62];
+  }
+  // ***************** WriteSeq Definitions *****************
+
+  def P9_LoadAndALUOp_6C : WriteSequence<[P9_LS_4C, P9_ALU_2C]>;
+  def P9_LoadAndALUOp_7C : WriteSequence<[P9_LS_5C, P9_ALU_2C]>;
+  def P9_LoadAndPMOp_8C : WriteSequence<[P9_LS_5C, P9_PM_3C]>;
+  def P9_IntDivAndALUOp_26C_8 : WriteSequence<[P9_DIV_24C_8, P9_ALU_2C]>;
+  def P9_IntDivAndALUOp_42C_8 : WriteSequence<[P9_DIV_40C_8, P9_ALU_2C]>;
+  def P9_StoreAndALUOp_4C : WriteSequence<[P9_LS_1C, P9_ALU_3C]>;
+  def P9_ALUOpAndALUOp_4C : WriteSequence<[P9_ALU_2C, P9_ALU_2C]>;
+
+  // ***************** Defining Itinerary Class Resources *****************
+
+  def : ItinRW<[P9_DFU_76C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntSimple,
+                                         IIC_IntGeneral]>;
+
+  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_IntISEL, IIC_IntRotate, IIC_IntShift]>;
+
+  def : ItinRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C, DISP_1C], [IIC_IntCompare]>;
+
+  def : ItinRW<[P9_DP_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_IntMulHW, IIC_IntMulHWU, IIC_IntMulLI]>;
+
+  def : ItinRW<[P9_LS_5C, IP_EXEC_1C, DISP_1C, DISP_1C],
+               [IIC_LdStLoad, IIC_LdStLD]>;
+
+  def : ItinRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStLoadUpd, IIC_LdStLDU]>;
+
+  def : ItinRW<[P9_LS_4C, P9_ALU_2C, IP_EXECE_1C, IP_EXECO_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStLoadUpdX, IIC_LdStLDUX]>;
+
+  def : ItinRW<[P9_LS_1C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStSTFDU]>;
+
+  def : ItinRW<[P9_LoadAndALUOp_6C,
+                IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStLHA, IIC_LdStLWA]>;
+
+  def : ItinRW<[P9_LoadAndALUOp_6C, P9_ALU_2C,
+                IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStLHAU, IIC_LdStLHAUX]>;
+
+  // IIC_LdStLMW contains two microcoded insns. This is not accurate, but
+  // those insns are not used that much, if at all.
+  def : ItinRW<[P9_LS_4C, IP_EXEC_1C, DISP_1C, DISP_1C],
+               [IIC_LdStLWARX, IIC_LdStLDARX, IIC_LdStLMW]>;
+
+  def : ItinRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStSTFD, IIC_LdStSTD, IIC_LdStStore]>;
+
+  def : ItinRW<[P9_LS_1C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStSTDU, IIC_LdStSTDUX]>;
+
+  def : ItinRW<[P9_StoreAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
+                DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_LdStSTDCX, IIC_LdStSTWCX]>;
+
+  def : ItinRW<[P9_ALU_5C, IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C],
+               [IIC_BrCR, IIC_IntMTFSB0]>;
+
+  def : ItinRW<[P9_ALUOpAndALUOp_4C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
+                IP_EXEC_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C, DISP_1C,
+                DISP_1C, DISP_1C, DISP_1C], [IIC_SprMFCR, IIC_SprMFCRF]>;
+
+  // This class should be broken down to instruction level, once some missing
+  // info is obtained.
+  def : ItinRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
+                DISP_1C, DISP_1C, DISP_1C], [IIC_SprMTSPR]>;
+
+  def : ItinRW<[P9_DP_7C, IP_EXEC_1C,
+                DISP_1C, DISP_1C, DISP_1C], [IIC_FPGeneral, IIC_FPAddSub]>;
+
+  def : ItinRW<[P9_DP_36C_10, IP_EXEC_1C], [IIC_FPSqrtD]>;
+  def : ItinRW<[P9_DP_26C_5, P9_DP_26C_5, IP_EXEC_1C, IP_EXEC_1C], [IIC_FPSqrtS]>;
+
+  include "P9InstrResources.td"
+
+}
+
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
index 46da8402151c..7fd907990ceb 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -277,6 +277,9 @@ public:
   bool hasFloat128() const { return HasFloat128; }
   bool isISA3_0() const { return IsISA3_0; }
   bool useLongCalls() const { return UseLongCalls; }
+  bool needsSwapsForVSXMemOps() const {
+    return hasVSX() && isLittleEndian() && !hasP9Vector();
+  }
 
   POPCNTDKind hasPOPCNTD() const { return HasPOPCNTD; }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 61ce48ecd04f..0c1260a2965b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -56,26 +56,26 @@ protected:
 
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE;) {
-        MachineInstr *MI = I;
+        MachineInstr &MI = *I;
 
-        if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
-            MI->getOpcode() != PPC::ADDItlsldLADDR &&
-            MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
-            MI->getOpcode() != PPC::ADDItlsldLADDR32) {
+        if (MI.getOpcode() != PPC::ADDItlsgdLADDR &&
+            MI.getOpcode() != PPC::ADDItlsldLADDR &&
+            MI.getOpcode() != PPC::ADDItlsgdLADDR32 &&
+            MI.getOpcode() != PPC::ADDItlsldLADDR32) {
           ++I;
           continue;
         }
 
-        DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n    " << *MI;);
+        DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n    " << MI);
 
-        unsigned OutReg = MI->getOperand(0).getReg();
-        unsigned InReg  = MI->getOperand(1).getReg();
-        DebugLoc DL = MI->getDebugLoc();
+        unsigned OutReg = MI.getOperand(0).getReg();
+        unsigned InReg = MI.getOperand(1).getReg();
+        DebugLoc DL = MI.getDebugLoc();
         unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
         unsigned Opc1, Opc2;
         const unsigned OrigRegs[] = {OutReg, InReg, GPR3};
 
-        switch (MI->getOpcode()) {
+        switch (MI.getOpcode()) {
         default:
           llvm_unreachable("Opcode inconsistency error");
         case PPC::ADDItlsgdLADDR:
@@ -104,7 +104,7 @@ protected:
         // Expand into two ops built prior to the existing instruction.
         MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
           .addReg(InReg);
-        Addi->addOperand(MI->getOperand(2));
+        Addi->addOperand(MI.getOperand(2));
 
         // The ADDItls* instruction is the first instruction in the
         // repair range.
@@ -113,7 +113,7 @@ protected:
 
         MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
                               .addReg(GPR3));
-        Call->addOperand(MI->getOperand(3));
+        Call->addOperand(MI.getOperand(3));
 
         BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
 
@@ -126,7 +126,7 @@ protected:
 
         // Move past the original instruction and remove it.
         ++I;
-        MI->removeFromParent();
+        MI.removeFromParent();
 
         // Repair the live intervals.
         LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 1bb6b674081b..91b1d24b2e41 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -74,9 +74,9 @@ EnableMachineCombinerPass("ppc-machine-combiner",
 
 extern "C" void LLVMInitializePowerPCTarget() {
   // Register the targets
-  RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target);
-  RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target);
-  RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget);
+  RegisterTargetMachine<PPC32TargetMachine> A(getThePPC32Target());
+  RegisterTargetMachine<PPC64TargetMachine> B(getThePPC64Target());
+  RegisterTargetMachine<PPC64TargetMachine> C(getThePPC64LETarget());
 
   PassRegistry &PR = *PassRegistry::getPassRegistry();
   initializePPCBoolRetToIntPass(PR);
@@ -181,6 +181,10 @@ static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
                                            Optional<Reloc::Model> RM) {
   if (!RM.hasValue()) {
+    if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
+      if (!TT.isOSBinFormatMachO() && !TT.isMacOSX())
+        return Reloc::PIC_;
+    }
     if (TT.isOSDarwin())
       return Reloc::DynamicNoPIC;
     return Reloc::Static;
@@ -204,23 +208,6 @@ PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
       TargetABI(computeTargetABI(TT, Options)),
       Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) {
 
-  // For the estimates, convergence is quadratic, so we essentially double the
-  // number of digits correct after every iteration. For both FRE and FRSQRTE,
-  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
-  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
-  unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3,
-           RefinementSteps64 = RefinementSteps + 1;
-
-  this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps);
-
-  this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64);
-  this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64);
-
   initAsmInfo();
 }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
index 8f660355c0ac..a049dc3fda93 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp
@@ -23,8 +23,7 @@ Initialize(MCContext &Ctx, const TargetMachine &TM) {
 }
 
 MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
-    const GlobalValue *GV, SectionKind Kind, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // Here override ReadOnlySection to DataRelROSection for PPC64 SVR4 ABI
   // when we have a constant that contains global relocations.  This is
   // necessary because of this ABI's handling of pointers to functions in
@@ -40,14 +39,13 @@ MCSection *PPC64LinuxTargetObjectFile::SelectSectionForGlobal(
   // For more information, see the description of ELIMINATE_COPY_RELOCS in
   // GNU ld.
   if (Kind.isReadOnly()) {
-    const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+    const auto *GVar = dyn_cast<GlobalVariable>(GO);
 
     if (GVar && GVar->isConstant() && GVar->getInitializer()->needsRelocation())
       Kind = SectionKind::getReadOnlyWithRel();
   }
 
-  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind,
-                                                             Mang, TM);
+  return TargetLoweringObjectFileELF::SelectSectionForGlobal(GO, Kind, TM);
 }
 
 const MCExpr *PPC64LinuxTargetObjectFile::
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
index d248791f2cad..c8b9b2e9790b 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h
@@ -22,8 +22,7 @@ namespace llvm {
 
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
-    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+    MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
     /// \brief Describe a TLS variable address within debug info.
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 9331e41fb9c1..f7785342b364 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -131,12 +131,12 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
     return TTI::TCC_Free;
   case Instruction::And:
     RunFree = true; // (for the rotate-and-mask instructions)
-    // Fallthrough...
+    LLVM_FALLTHROUGH;
   case Instruction::Add:
   case Instruction::Or:
   case Instruction::Xor:
     ShiftedFree = true;
-    // Fallthrough...
+    LLVM_FALLTHROUGH;
   case Instruction::Sub:
   case Instruction::Mul:
   case Instruction::Shl:
@@ -147,7 +147,8 @@ int PPCTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
   case Instruction::ICmp:
     UnsignedFree = true;
     ImmIdx = 1;
-    // Fallthrough... (zero comparisons can use record-form instructions)
+    // Zero comparisons can use record-form instructions.
+    LLVM_FALLTHROUGH;
   case Instruction::Select:
     ZeroFree = true;
     break;
@@ -359,11 +360,6 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
 
   int Cost = BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
 
-  // Aligned loads and stores are easy.
-  unsigned SrcBytes = LT.second.getStoreSize();
-  if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
-    return Cost;
-
   bool IsAltivecType = ST->hasAltivec() &&
                        (LT.second == MVT::v16i8 || LT.second == MVT::v8i16 ||
                         LT.second == MVT::v4i32 || LT.second == MVT::v4f32);
@@ -372,6 +368,20 @@ int PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
   bool IsQPXType = ST->hasQPX() &&
                    (LT.second == MVT::v4f64 || LT.second == MVT::v4f32);
 
+  // VSX has 32b/64b load instructions. Legalization can handle loading of
+  // 32b/64b to VSR correctly and cheaply. But BaseT::getMemoryOpCost and
+  // PPCTargetLowering can't compute the cost appropriately. So here we
+  // explicitly check this case.
+  unsigned MemBytes = Src->getPrimitiveSizeInBits();
+  if (Opcode == Instruction::Load && ST->hasVSX() && IsAltivecType &&
+      (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
+    return 1;
+
+  // Aligned loads and stores are easy.
+  unsigned SrcBytes = LT.second.getStoreSize();
+  if (!SrcBytes || !Alignment || Alignment >= SrcBytes)
+    return Cost;
+
   // If we can use the permutation-based load sequence, then this is also
   // relatively cheap (not counting loop-invariant instructions): one load plus
   // one permute (the last load in a series has extra cost, but we're
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 5ea9a543cdb1..8308086ccfaa 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -41,13 +41,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  PPCTTIImpl(const PPCTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  PPCTTIImpl(PPCTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
index 60f1ad5585ff..3b5d8f094fd0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXCopy.cpp
@@ -89,37 +89,31 @@ protected:
       bool Changed = false;
 
       MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
-      for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
-           I != IE; ++I) {
-        MachineInstr *MI = I;
-        if (!MI->isFullCopy())
+      for (MachineInstr &MI : MBB) {
+        if (!MI.isFullCopy())
           continue;
 
-        MachineOperand &DstMO = MI->getOperand(0);
-        MachineOperand &SrcMO = MI->getOperand(1);
+        MachineOperand &DstMO = MI.getOperand(0);
+        MachineOperand &SrcMO = MI.getOperand(1);
 
         if ( IsVSReg(DstMO.getReg(), MRI) &&
             !IsVSReg(SrcMO.getReg(), MRI)) {
           // This is a copy *to* a VSX register from a non-VSX register.
           Changed = true;
 
-          const TargetRegisterClass *SrcRC =
-            IsVRReg(SrcMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
-                                           &PPC::VSLRCRegClass;
+          const TargetRegisterClass *SrcRC = &PPC::VSLRCRegClass;
           assert((IsF8Reg(SrcMO.getReg(), MRI) ||
-                  IsVRReg(SrcMO.getReg(), MRI) ||
                   IsVSSReg(SrcMO.getReg(), MRI) ||
                   IsVSFReg(SrcMO.getReg(), MRI)) &&
                  "Unknown source for a VSX copy");
 
           unsigned NewVReg = MRI.createVirtualRegister(SrcRC);
-          BuildMI(MBB, MI, MI->getDebugLoc(),
+          BuildMI(MBB, MI, MI.getDebugLoc(),
                   TII->get(TargetOpcode::SUBREG_TO_REG), NewVReg)
-            .addImm(1) // add 1, not 0, because there is no implicit clearing
-                       // of the high bits.
-            .addOperand(SrcMO)
-            .addImm(IsVRReg(SrcMO.getReg(), MRI) ? PPC::sub_128 :
-                                                   PPC::sub_64);
+              .addImm(1) // add 1, not 0, because there is no implicit clearing
+                         // of the high bits.
+              .addOperand(SrcMO)
+              .addImm(PPC::sub_64);
 
           // The source of the original copy is now the new virtual register.
           SrcMO.setReg(NewVReg);
@@ -128,25 +122,21 @@ protected:
           // This is a copy *from* a VSX register to a non-VSX register.
           Changed = true;
 
-          const TargetRegisterClass *DstRC =
-            IsVRReg(DstMO.getReg(), MRI) ? &PPC::VSHRCRegClass :
-                                           &PPC::VSLRCRegClass;
+          const TargetRegisterClass *DstRC = &PPC::VSLRCRegClass;
           assert((IsF8Reg(DstMO.getReg(), MRI) ||
                   IsVSFReg(DstMO.getReg(), MRI) ||
-                  IsVSSReg(DstMO.getReg(), MRI) ||
-                  IsVRReg(DstMO.getReg(), MRI)) &&
+                  IsVSSReg(DstMO.getReg(), MRI)) &&
                  "Unknown destination for a VSX copy");
 
           // Copy the VSX value into a new VSX register of the correct subclass.
           unsigned NewVReg = MRI.createVirtualRegister(DstRC);
-          BuildMI(MBB, MI, MI->getDebugLoc(),
-                  TII->get(TargetOpcode::COPY), NewVReg)
-            .addOperand(SrcMO);
+          BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY),
+                  NewVReg)
+              .addOperand(SrcMO);
 
           // Transform the original copy into a subregister extraction copy.
           SrcMO.setReg(NewVReg);
-          SrcMO.setSubReg(IsVRReg(DstMO.getReg(), MRI) ? PPC::sub_128 :
-                                                         PPC::sub_64);
+          SrcMO.setSubReg(PPC::sub_64);
         }
       }
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
index 7c22cb22bfa5..f6d20ced15a0 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXFMAMutate.cpp
@@ -21,6 +21,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -74,7 +75,7 @@ protected:
       const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
       for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
            I != IE; ++I) {
-        MachineInstr *MI = I;
+        MachineInstr &MI = *I;
 
         // The default (A-type) VSX FMA form kills the addend (it is taken from
         // the target register, which is then updated to reflect the result of
@@ -82,7 +83,7 @@ protected:
         // used for the product, then we can use the M-form instruction (which
         // will take that value from the to-be-defined register).
 
-        int AltOpc = PPC::getAltVSXFMAOpcode(MI->getOpcode());
+        int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());
         if (AltOpc == -1)
           continue;
 
@@ -105,10 +106,10 @@ protected:
         //                         %RM<imp-use>; VSLRC:%vreg16,%vreg18,%vreg9
         // and we remove: %vreg5<def> = COPY %vreg9; VSLRC:%vreg5,%vreg9
 
-        SlotIndex FMAIdx = LIS->getInstructionIndex(*MI);
+        SlotIndex FMAIdx = LIS->getInstructionIndex(MI);
 
         VNInfo *AddendValNo =
-          LIS->getInterval(MI->getOperand(1).getReg()).Query(FMAIdx).valueIn();
+            LIS->getInterval(MI.getOperand(1).getReg()).Query(FMAIdx).valueIn();
 
         // This can be null if the register is undef.
         if (!AddendValNo)
@@ -118,7 +119,7 @@ protected:
 
         // The addend and this instruction must be in the same block.
 
-        if (!AddendMI || AddendMI->getParent() != MI->getParent())
+        if (!AddendMI || AddendMI->getParent() != MI.getParent())
           continue;
 
         // The addend must be a full copy within the same register class.
@@ -182,12 +183,12 @@ protected:
         //   %vreg5 = A-form-op %vreg5, %vreg5, %vreg11;
         // where vreg5 and vreg11 are both kills. This case would be skipped
         // otherwise.
-        unsigned OldFMAReg = MI->getOperand(0).getReg();
+        unsigned OldFMAReg = MI.getOperand(0).getReg();
 
         // Find one of the product operands that is killed by this instruction.
         unsigned KilledProdOp = 0, OtherProdOp = 0;
-        unsigned Reg2 = MI->getOperand(2).getReg();
-        unsigned Reg3 = MI->getOperand(3).getReg();
+        unsigned Reg2 = MI.getOperand(2).getReg();
+        unsigned Reg3 = MI.getOperand(3).getReg();
         if (LIS->getInterval(Reg2).Query(FMAIdx).isKill()
             && Reg2 != OldFMAReg) {
           KilledProdOp = 2;
@@ -214,20 +215,20 @@ protected:
 
         // Transform: (O2 * O3) + O1 -> (O2 * O1) + O3.
 
-        unsigned KilledProdReg = MI->getOperand(KilledProdOp).getReg();
-        unsigned OtherProdReg  = MI->getOperand(OtherProdOp).getReg();
+        unsigned KilledProdReg = MI.getOperand(KilledProdOp).getReg();
+        unsigned OtherProdReg = MI.getOperand(OtherProdOp).getReg();
 
         unsigned AddSubReg = AddendMI->getOperand(1).getSubReg();
-        unsigned KilledProdSubReg = MI->getOperand(KilledProdOp).getSubReg();
-        unsigned OtherProdSubReg  = MI->getOperand(OtherProdOp).getSubReg();
+        unsigned KilledProdSubReg = MI.getOperand(KilledProdOp).getSubReg();
+        unsigned OtherProdSubReg = MI.getOperand(OtherProdOp).getSubReg();
 
         bool AddRegKill = AddendMI->getOperand(1).isKill();
-        bool KilledProdRegKill = MI->getOperand(KilledProdOp).isKill();
-        bool OtherProdRegKill  = MI->getOperand(OtherProdOp).isKill();
+        bool KilledProdRegKill = MI.getOperand(KilledProdOp).isKill();
+        bool OtherProdRegKill = MI.getOperand(OtherProdOp).isKill();
 
         bool AddRegUndef = AddendMI->getOperand(1).isUndef();
-        bool KilledProdRegUndef = MI->getOperand(KilledProdOp).isUndef();
-        bool OtherProdRegUndef  = MI->getOperand(OtherProdOp).isUndef();
+        bool KilledProdRegUndef = MI.getOperand(KilledProdOp).isUndef();
+        bool OtherProdRegUndef = MI.getOperand(OtherProdOp).isUndef();
 
         // If there isn't a class that fits, we can't perform the transform.
         // This is needed for correctness with a mixture of VSX and Altivec
@@ -240,39 +241,39 @@ protected:
         assert(OldFMAReg == AddendMI->getOperand(0).getReg() &&
                "Addend copy not tied to old FMA output!");
 
-        DEBUG(dbgs() << "VSX FMA Mutation:\n    " << *MI;);
+        DEBUG(dbgs() << "VSX FMA Mutation:\n    " << MI);
 
-        MI->getOperand(0).setReg(KilledProdReg);
-        MI->getOperand(1).setReg(KilledProdReg);
-        MI->getOperand(3).setReg(AddendSrcReg);
+        MI.getOperand(0).setReg(KilledProdReg);
+        MI.getOperand(1).setReg(KilledProdReg);
+        MI.getOperand(3).setReg(AddendSrcReg);
 
-        MI->getOperand(0).setSubReg(KilledProdSubReg);
-        MI->getOperand(1).setSubReg(KilledProdSubReg);
-        MI->getOperand(3).setSubReg(AddSubReg);
+        MI.getOperand(0).setSubReg(KilledProdSubReg);
+        MI.getOperand(1).setSubReg(KilledProdSubReg);
+        MI.getOperand(3).setSubReg(AddSubReg);
 
-        MI->getOperand(1).setIsKill(KilledProdRegKill);
-        MI->getOperand(3).setIsKill(AddRegKill);
+        MI.getOperand(1).setIsKill(KilledProdRegKill);
+        MI.getOperand(3).setIsKill(AddRegKill);
 
-        MI->getOperand(1).setIsUndef(KilledProdRegUndef);
-        MI->getOperand(3).setIsUndef(AddRegUndef);
+        MI.getOperand(1).setIsUndef(KilledProdRegUndef);
+        MI.getOperand(3).setIsUndef(AddRegUndef);
 
-        MI->setDesc(TII->get(AltOpc));
+        MI.setDesc(TII->get(AltOpc));
 
         // If the addend is also a multiplicand, replace it with the addend
         // source in both places.
         if (OtherProdReg == AddendMI->getOperand(0).getReg()) {
-          MI->getOperand(2).setReg(AddendSrcReg);
-          MI->getOperand(2).setSubReg(AddSubReg);
-          MI->getOperand(2).setIsKill(AddRegKill);
-          MI->getOperand(2).setIsUndef(AddRegUndef);
+          MI.getOperand(2).setReg(AddendSrcReg);
+          MI.getOperand(2).setSubReg(AddSubReg);
+          MI.getOperand(2).setIsKill(AddRegKill);
+          MI.getOperand(2).setIsUndef(AddRegUndef);
         } else {
-          MI->getOperand(2).setReg(OtherProdReg);
-          MI->getOperand(2).setSubReg(OtherProdSubReg);
-          MI->getOperand(2).setIsKill(OtherProdRegKill);
-          MI->getOperand(2).setIsUndef(OtherProdRegUndef);
+          MI.getOperand(2).setReg(OtherProdReg);
+          MI.getOperand(2).setSubReg(OtherProdSubReg);
+          MI.getOperand(2).setIsKill(OtherProdRegKill);
+          MI.getOperand(2).setIsUndef(OtherProdRegUndef);
         }
 
-        DEBUG(dbgs() << " -> " << *MI);
+        DEBUG(dbgs() << " -> " << MI);
 
         // The killed product operand was killed here, so we can reuse it now
         // for the result of the fma.
@@ -374,6 +375,8 @@ public:
       AU.addPreserved<LiveIntervals>();
       AU.addRequired<SlotIndexes>();
       AU.addPreserved<SlotIndexes>();
+      AU.addRequired<MachineDominatorTree>();
+      AU.addPreserved<MachineDominatorTree>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
   };
@@ -383,6 +386,7 @@ INITIALIZE_PASS_BEGIN(PPCVSXFMAMutate, DEBUG_TYPE,
                       "PowerPC VSX FMA Mutation", false, false)
 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 INITIALIZE_PASS_END(PPCVSXFMAMutate, DEBUG_TYPE,
                     "PowerPC VSX FMA Mutation", false, false)
 
diff --git a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
index d53c8e38254f..8197285b7b1f 100644
--- a/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCVSXSwapRemoval.cpp
@@ -962,7 +962,8 @@ void PPCVSXSwapRemoval::dumpSwapVector() {
     DEBUG(dbgs() << format("%6d", ID));
     DEBUG(dbgs() << format("%6d", EC->getLeaderValue(ID)));
     DEBUG(dbgs() << format(" BB#%3d", MI->getParent()->getNumber()));
-    DEBUG(dbgs() << format("  %14s  ", TII->getName(MI->getOpcode())));
+    DEBUG(dbgs() << format("  %14s  ",
+                           TII->getName(MI->getOpcode()).str().c_str()));
 
     if (SwapVector[EntryIdx].IsLoad)
       DEBUG(dbgs() << "load ");
diff --git a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
index 5b2fe19837ef..a637dd11f810 100644
--- a/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.cpp
@@ -12,15 +12,26 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::ThePPC32Target, llvm::ThePPC64Target, llvm::ThePPC64LETarget;
+Target &llvm::getThePPC32Target() {
+  static Target ThePPC32Target;
+  return ThePPC32Target;
+}
+Target &llvm::getThePPC64Target() {
+  static Target ThePPC64Target;
+  return ThePPC64Target;
+}
+Target &llvm::getThePPC64LETarget() {
+  static Target ThePPC64LETarget;
+  return ThePPC64LETarget;
+}
 
 extern "C" void LLVMInitializePowerPCTargetInfo() {
-  RegisterTarget<Triple::ppc, /*HasJIT=*/true>
-    X(ThePPC32Target, "ppc32", "PowerPC 32");
+  RegisterTarget<Triple::ppc, /*HasJIT=*/true> X(getThePPC32Target(), "ppc32",
+                                                 "PowerPC 32");
 
-  RegisterTarget<Triple::ppc64, /*HasJIT=*/true>
-    Y(ThePPC64Target, "ppc64", "PowerPC 64");
+  RegisterTarget<Triple::ppc64, /*HasJIT=*/true> Y(getThePPC64Target(), "ppc64",
+                                                   "PowerPC 64");
 
-  RegisterTarget<Triple::ppc64le, /*HasJIT=*/true>
-    Z(ThePPC64LETarget, "ppc64le", "PowerPC 64 LE");
+  RegisterTarget<Triple::ppc64le, /*HasJIT=*/true> Z(
+      getThePPC64LETarget(), "ppc64le", "PowerPC 64 LE");
 }
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
new file mode 100644
index 000000000000..f8ef142255c8
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp
@@ -0,0 +1,91 @@
+//===-- RISCVAsmBackend.cpp - RISCV Assembler Backend ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCAsmBackend.h"
+#include "llvm/MC/MCAssembler.h"
+#include "llvm/MC/MCDirectives.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixupKindInfo.h"
+#include "llvm/MC/MCObjectWriter.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+class RISCVAsmBackend : public MCAsmBackend {
+  uint8_t OSABI;
+  bool Is64Bit;
+
+public:
+  RISCVAsmBackend(uint8_t OSABI, bool Is64Bit)
+      : MCAsmBackend(), OSABI(OSABI), Is64Bit(Is64Bit) {}
+  ~RISCVAsmBackend() override {}
+
+  void applyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize,
+                  uint64_t Value, bool IsPCRel) const override;
+
+  MCObjectWriter *createObjectWriter(raw_pwrite_stream &OS) const override;
+
+  bool fixupNeedsRelaxation(const MCFixup &Fixup, uint64_t Value,
+                            const MCRelaxableFragment *DF,
+                            const MCAsmLayout &Layout) const override {
+    return false;
+  }
+
+  unsigned getNumFixupKinds() const override { return 1; }
+
+  bool mayNeedRelaxation(const MCInst &Inst) const override { return false; }
+
+  void relaxInstruction(const MCInst &Inst, const MCSubtargetInfo &STI,
+                        MCInst &Res) const override {
+
+    llvm_unreachable("RISCVAsmBackend::relaxInstruction() unimplemented");
+  }
+
+  bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override;
+};
+
+bool RISCVAsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const {
+  // Once support for the compressed instruction set is added, we will be able
+  // to conditionally support 16-bit NOPs
+  if ((Count % 4) != 0)
+    return false;
+
+  // The canonical nop on RISC-V is addi x0, x0, 0
+  for (uint64_t i = 0; i < Count; i += 4)
+    OW->write32(0x13);
+
+  return true;
+}
+
+void RISCVAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
+                                 unsigned DataSize, uint64_t Value,
+                                 bool IsPCRel) const {
+  return;
+}
+
+MCObjectWriter *
+RISCVAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
+  return createRISCVELFObjectWriter(OS, OSABI, Is64Bit);
+}
+
+} // end anonymous namespace
+
+MCAsmBackend *llvm::createRISCVAsmBackend(const Target &T,
+                                          const MCRegisterInfo &MRI,
+                                          const Triple &TT, StringRef CPU,
+                                          const MCTargetOptions &Options) {
+  uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
+  return new RISCVAsmBackend(OSABI, TT.isArch64Bit());
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
new file mode 100644
index 000000000000..4f085d31a267
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFObjectWriter.cpp
@@ -0,0 +1,47 @@
+//===-- RISCVELFObjectWriter.cpp - RISCV ELF Writer -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/MC/MCELFObjectWriter.h"
+#include "llvm/MC/MCFixup.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+namespace {
+class RISCVELFObjectWriter : public MCELFObjectTargetWriter {
+public:
+  RISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit);
+
+  ~RISCVELFObjectWriter() override;
+
+protected:
+  unsigned getRelocType(MCContext &Ctx, const MCValue &Target,
+                        const MCFixup &Fixup, bool IsPCRel) const override;
+};
+}
+
+RISCVELFObjectWriter::RISCVELFObjectWriter(uint8_t OSABI, bool Is64Bit)
+    : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_RISCV,
+                              /*HasRelocationAddend*/ false) {}
+
+RISCVELFObjectWriter::~RISCVELFObjectWriter() {}
+
+unsigned RISCVELFObjectWriter::getRelocType(MCContext &Ctx,
+                                            const MCValue &Target,
+                                            const MCFixup &Fixup,
+                                            bool IsPCRel) const {
+  llvm_unreachable("invalid fixup kind!");
+}
+
+MCObjectWriter *llvm::createRISCVELFObjectWriter(raw_pwrite_stream &OS,
+                                                 uint8_t OSABI, bool Is64Bit) {
+  MCELFObjectTargetWriter *MOTW = new RISCVELFObjectWriter(OSABI, Is64Bit);
+  return createELFObjectWriter(MOTW, OS, /*IsLittleEndian*/ true);
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
new file mode 100644
index 000000000000..b164df8b595a
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -0,0 +1,25 @@
+//===-- RISCVMCAsmInfo.cpp - RISCV Asm properties -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the RISCVMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMCAsmInfo.h"
+#include "llvm/ADT/Triple.h"
+using namespace llvm;
+
+void RISCVMCAsmInfo::anchor() {}
+
+RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
+  PointerSize = CalleeSaveStackSlotSize = TT.isArch64Bit() ? 8 : 4;
+  CommentString = "#";
+  AlignmentIsInBytes = false;
+  SupportsDebugInformation = true;
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
new file mode 100644
index 000000000000..901a1eba8af2
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h
@@ -0,0 +1,31 @@
+//===-- RISCVMCAsmInfo.h - RISCV Asm Info ----------------------*- C++ -*--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the RISCVMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCASMINFO_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCASMINFO_H
+
+#include "llvm/MC/MCAsmInfoELF.h"
+
+namespace llvm {
+class Triple;
+
+class RISCVMCAsmInfo : public MCAsmInfoELF {
+  void anchor() override;
+
+public:
+  explicit RISCVMCAsmInfo(const Triple &TargetTriple);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
new file mode 100644
index 000000000000..b2ed13758d41
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -0,0 +1,91 @@
+//===-- RISCVMCCodeEmitter.cpp - Convert RISCV code to machine code -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the RISCVMCCodeEmitter class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/MC/MCCodeEmitter.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/Support/EndianStream.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mccodeemitter"
+
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted");
+
+namespace {
+class RISCVMCCodeEmitter : public MCCodeEmitter {
+  RISCVMCCodeEmitter(const RISCVMCCodeEmitter &) = delete;
+  void operator=(const RISCVMCCodeEmitter &) = delete;
+  MCContext &Ctx;
+
+public:
+  RISCVMCCodeEmitter(MCContext &ctx) : Ctx(ctx) {}
+
+  ~RISCVMCCodeEmitter() override {}
+
+  void encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                         SmallVectorImpl<MCFixup> &Fixups,
+                         const MCSubtargetInfo &STI) const override;
+
+  /// TableGen'erated function for getting the binary encoding for an
+  /// instruction.
+  uint64_t getBinaryCodeForInstr(const MCInst &MI,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const;
+
+  /// Return binary encoding of operand. If the machine operand requires
+  /// relocation, record the relocation and return zero.
+  unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                             SmallVectorImpl<MCFixup> &Fixups,
+                             const MCSubtargetInfo &STI) const;
+};
+} // end anonymous namespace
+
+MCCodeEmitter *llvm::createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
+                                              const MCRegisterInfo &MRI,
+                                              MCContext &Ctx) {
+  return new RISCVMCCodeEmitter(Ctx);
+}
+
+void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
+                                           SmallVectorImpl<MCFixup> &Fixups,
+                                           const MCSubtargetInfo &STI) const {
+  // For now, we only support RISC-V instructions with 32-bit length
+  uint32_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
+  support::endian::Writer<support::little>(OS).write(Bits);
+  ++MCNumEmitted; // Keep track of the # of mi's emitted.
+}
+
+unsigned
+RISCVMCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO,
+                                      SmallVectorImpl<MCFixup> &Fixups,
+                                      const MCSubtargetInfo &STI) const {
+
+  if (MO.isReg())
+    return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg());
+
+  if (MO.isImm())
+    return static_cast<unsigned>(MO.getImm());
+
+  llvm_unreachable("Unhandled expression!");
+  return 0;
+}
+
+#include "RISCVGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
new file mode 100644
index 000000000000..4fc69a7fcaba
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.cpp
@@ -0,0 +1,59 @@
+//===-- RISCVMCTargetDesc.cpp - RISCV Target Descriptions -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// This file provides RISCV-specific target descriptions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMCTargetDesc.h"
+#include "RISCVMCAsmInfo.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/TargetRegistry.h"
+
+#define GET_INSTRINFO_MC_DESC
+#include "RISCVGenInstrInfo.inc"
+
+#define GET_REGINFO_MC_DESC
+#include "RISCVGenRegisterInfo.inc"
+
+using namespace llvm;
+
+static MCInstrInfo *createRISCVMCInstrInfo() {
+  MCInstrInfo *X = new MCInstrInfo();
+  InitRISCVMCInstrInfo(X);
+  return X;
+}
+
+static MCRegisterInfo *createRISCVMCRegisterInfo(const Triple &TT) {
+  MCRegisterInfo *X = new MCRegisterInfo();
+  InitRISCVMCRegisterInfo(X, RISCV::X1_32);
+  return X;
+}
+
+static MCAsmInfo *createRISCVMCAsmInfo(const MCRegisterInfo &MRI,
+                                       const Triple &TT) {
+  MCAsmInfo *MAI = new RISCVMCAsmInfo(TT);
+  return MAI;
+}
+
+extern "C" void LLVMInitializeRISCVTargetMC() {
+  for (Target *T : {&getTheRISCV32Target(), &getTheRISCV64Target()}) {
+    RegisterMCAsmInfoFn X(*T, createRISCVMCAsmInfo);
+    TargetRegistry::RegisterMCInstrInfo(*T, createRISCVMCInstrInfo);
+    TargetRegistry::RegisterMCRegInfo(*T, createRISCVMCRegisterInfo);
+    TargetRegistry::RegisterMCAsmBackend(*T, createRISCVAsmBackend);
+    TargetRegistry::RegisterMCCodeEmitter(*T, createRISCVMCCodeEmitter);
+  }
+}
diff --git a/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
new file mode 100644
index 000000000000..ddc3bf350452
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCTargetDesc.h
@@ -0,0 +1,58 @@
+//===-- RISCVMCTargetDesc.h - RISCV Target Descriptions ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides RISCV specific target descriptions.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H
+#define LLVM_LIB_TARGET_RISCV_MCTARGETDESC_RISCVMCTARGETDESC_H
+
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Config/config.h"
+
+namespace llvm {
+class MCAsmBackend;
+class MCCodeEmitter;
+class MCContext;
+class MCInstrInfo;
+class MCObjectWriter;
+class MCRegisterInfo;
+class MCSubtargetInfo;
+class StringRef;
+class Target;
+class Triple;
+class raw_ostream;
+class raw_pwrite_stream;
+
+Target &getTheRISCV32Target();
+Target &getTheRISCV64Target();
+
+MCCodeEmitter *createRISCVMCCodeEmitter(const MCInstrInfo &MCII,
+                                        const MCRegisterInfo &MRI,
+                                        MCContext &Ctx);
+
+MCAsmBackend *createRISCVAsmBackend(const Target &T, const MCRegisterInfo &MRI,
+                                    const Triple &TT, StringRef CPU,
+                                    const MCTargetOptions &Options);
+
+MCObjectWriter *createRISCVELFObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI,
+                                           bool Is64Bit);
+}
+
+// Defines symbolic names for RISC-V registers.
+#define GET_REGINFO_ENUM
+#include "RISCVGenRegisterInfo.inc"
+
+// Defines symbolic names for RISC-V instructions.
+#define GET_INSTRINFO_ENUM
+#include "RISCVGenInstrInfo.inc"
+
+#endif
diff --git a/contrib/llvm/lib/Target/RISCV/RISCV.td b/contrib/llvm/lib/Target/RISCV/RISCV.td
new file mode 100644
index 000000000000..14838309a1bf
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCV.td
@@ -0,0 +1,27 @@
+//===-- RISCV.td - Describe the RISCV Target Machine -------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+include "RISCVRegisterInfo.td"
+include "RISCVInstrInfo.td"
+
+
+def RISCVInstrInfo : InstrInfo;
+
+def Feature64Bit   : SubtargetFeature<"64bit", "HasRV64", "true",
+                                      "Implements RV64">;
+
+def : ProcessorModel<"generic-rv32", NoSchedModel, []>;
+
+def : ProcessorModel<"generic-rv64", NoSchedModel, [Feature64Bit]>;
+
+def RISCV : Target {
+  let InstructionSet = RISCVInstrInfo;
+}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
new file mode 100644
index 000000000000..1e9bc3bf9bc5
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrFormats.td
@@ -0,0 +1,152 @@
+//===-- RISCVInstrFormats.td - RISCV Instruction Formats ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+//  These instruction format definitions are structured to match the
+//  description in the RISC-V User-Level ISA specification as closely as
+//  possible. For instance, the specification describes instructions with the
+//  MSB (31st bit) on the left and the LSB (0th bit) on the right. This is
+//  reflected in the order of parameters to each instruction class.
+//
+//  One area of divergence is in the description of immediates. The
+//  specification describes immediate encoding in terms of bit-slicing
+//  operations on the logical value represented. The immediate argument to
+//  these instruction formats instead represents the bit sequence that will be
+//  inserted into the instruction. e.g. although JAL's immediate is logically
+//  a 21-bit value (where the LSB is always zero), we describe it as an imm20
+//  to match how it is encoded.
+//
+//===----------------------------------------------------------------------===//
+
+class RISCVInst<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : Instruction {
+  field bits<32> Inst;
+  let Size = 4;
+
+  bits<7> Opcode = 0;
+
+  let Inst{6-0} = Opcode;
+
+  let Namespace = "RISCV";
+
+  dag OutOperandList = outs;
+  dag InOperandList = ins;
+  let AsmString = asmstr;
+  let Pattern = pattern;
+}
+
+// Pseudo instructions
+class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern> {
+  let isPseudo = 1;
+}
+
+class FR<bits<7> funct7, bits<3> funct3, bits<7> opcode, dag outs, dag ins,
+         string asmstr, list<dag> pattern> : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<5> rs2;
+  bits<5> rs1;
+  bits<5> rd;
+
+  let Inst{31-25} = funct7;
+  let Inst{24-20} = rs2;
+  let Inst{19-15} = rs1;
+  let Inst{14-12} = funct3;
+  let Inst{11-7} = rd;
+  let Opcode = opcode;
+}
+
+class FI<bits<3> funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<12> imm12;
+  bits<5> rs1;
+  bits<5> rd;
+
+  let Inst{31-20} = imm12;
+  let Inst{19-15} = rs1;
+  let Inst{14-12} = funct3;
+  let Inst{11-7} = rd;
+  let Opcode = opcode;
+}
+
+class FI32Shift<bit arithshift, bits<3> funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<5> shamt;
+  bits<5> rs1;
+  bits<5> rd;
+
+  let Inst{31} = 0;
+  let Inst{30} = arithshift;
+  let Inst{29-25} = 0;
+  let Inst{24-20} = shamt;
+  let Inst{19-15} = rs1;
+  let Inst{14-12} = funct3;
+  let Inst{11-7} = rd;
+  let Opcode = opcode;
+}
+
+class FS<bits<3> funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<12> imm12;
+  bits<5> rs2;
+  bits<5> rs1;
+
+  let Inst{31-25} = imm12{11-5};
+  let Inst{24-20} = rs2;
+  let Inst{19-15} = rs1;
+  let Inst{14-12} = funct3;
+  let Inst{11-7} = imm12{4-0};
+  let Opcode = opcode;
+}
+
+class FSB<bits<3> funct3, bits<7> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<12> imm12;
+  bits<5> rs2;
+  bits<5> rs1;
+
+  let Inst{31} = imm12{11};
+  let Inst{30-25} = imm12{9-4};
+  let Inst{24-20} = rs2;
+  let Inst{19-15} = rs1;
+  let Inst{14-12} = funct3;
+  let Inst{11-8} = imm12{3-0};
+  let Inst{7} = imm12{10};
+  let Opcode = opcode;
+}
+
+class FU<bits<7> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<20> imm20;
+  bits<5> rd;
+
+  let Inst{31-12} = imm20;
+  let Inst{11-7} = rd;
+  let Opcode = opcode;
+}
+
+class FUJ<bits<7> opcode, dag outs, dag ins, string asmstr, list<dag> pattern>
+    : RISCVInst<outs, ins, asmstr, pattern>
+{
+  bits<20> imm20;
+  bits<5> rd;
+
+  let Inst{31} = imm20{19};
+  let Inst{30-21} = imm20{9-0};
+  let Inst{20} = imm20{10};
+  let Inst{19-12} = imm20{18-11};
+  let Inst{11-7} = rd;
+  let Opcode = opcode;
+}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
new file mode 100644
index 000000000000..52530c2f136c
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -0,0 +1,55 @@
+//===-- RISCVInstrInfo.td - Target Description for RISCV ---*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes the RISC-V instructions in TableGen format.
+//
+//===----------------------------------------------------------------------===//
+
+include "RISCVInstrFormats.td"
+
+def simm12 : Operand<i32>;
+
+// As noted in RISCVRegisterInfo.td, the hope is that support for
+// variable-sized register classes will mean that instruction definitions do
+// not need to be duplicated for 32-bit and 64-bit register classes. For now
+// we use 'GPR', which is 32-bit. When codegen for both RV32 and RV64 is
+// added, we will need to duplicate instruction definitions unless a proposal
+// like <http://lists.llvm.org/pipermail/llvm-dev/2016-September/105027.html>
+// is adopted.
+
+class ALU_ri<bits<3> funct3, string OpcodeStr> :
+      FI<funct3, 0b0010011, (outs GPR:$rd), (ins GPR:$rs1, simm12:$imm12),
+         OpcodeStr#"\t$rd, $rs1, $imm12", []>
+{
+}
+
+def ADDI  : ALU_ri<0b000, "addi">;
+def SLTI  : ALU_ri<0b010, "slti">;
+def SLTIU : ALU_ri<0b011, "sltiu">;
+def XORI  : ALU_ri<0b100, "xori">;
+def ORI   : ALU_ri<0b110, "ori">;
+def ANDI  : ALU_ri<0b111, "andi">;
+
+class ALU_rr<bits<7> funct7, bits<3> funct3, string OpcodeStr> :
+      FR<funct7, funct3, 0b0110011, (outs GPR:$rd), (ins GPR:$rs1, GPR:$rs2),
+         OpcodeStr#"\t$rd, $rs1, $rs2", []>
+{
+}
+
+def ADD  : ALU_rr<0b0000000, 0b000, "add">;
+def SUB  : ALU_rr<0b0100000, 0b000, "sub">;
+def SLL  : ALU_rr<0b0000000, 0b001, "sll">;
+def SLT  : ALU_rr<0b0000000, 0b010, "slt">;
+def SLTU : ALU_rr<0b0000000, 0b011, "sltu">;
+def XOR  : ALU_rr<0b0000000, 0b100, "xor">;
+def SRL  : ALU_rr<0b0000000, 0b101, "srl">;
+def SRA  : ALU_rr<0b0100000, 0b101, "sra">;
+def OR   : ALU_rr<0b0000000, 0b110, "or">;
+def AND  : ALU_rr<0b0000000, 0b111, "and">;
+
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/contrib/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
new file mode 100644
index 000000000000..f04de217bf0d
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -0,0 +1,90 @@
+//===-- RISCVRegisterInfo.td - RISC-V Register defs --------*- tablegen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//  Declarations that describe the RISC-V register file
+//===----------------------------------------------------------------------===//
+
+let Namespace = "RISCV" in {
+  def sub_32 : SubRegIndex<32>;
+
+  class RISCVReg32<bits<5> Enc, string n, list<string> alt = []> : Register<n> {
+    let HWEncoding{4-0} = Enc;
+    let AltNames = alt;
+  }
+
+  // RISCV64 registers don't define an AsmName or AltName. If they specified
+  // names aliasing the RISCVReg32 registers, the generation of the default
+  // MatchRegisterName/MatchRegisterAltName would fail. When necessary,
+  // RISCVAsmParser will need to convert a register number from a RISCVReg32
+  // to the equivalent RISCVReg64.
+  class RISCVReg64<RISCVReg32 subreg> : Register<""> {
+    let HWEncoding{4-0} = subreg.HWEncoding{4-0};
+    let SubRegs = [subreg];
+    let SubRegIndices = [sub_32];
+  }
+
+  def ABIRegAltName : RegAltNameIndex;
+}
+
+// Integer registers
+let RegAltNameIndices = [ABIRegAltName] in {
+  def X0_32    : RISCVReg32<0, "x0", ["zero"]>, DwarfRegNum<[0]>;
+  def X1_32    : RISCVReg32<1, "x1", ["ra"]>, DwarfRegNum<[1]>;
+  def X2_32    : RISCVReg32<2, "x2", ["sp"]>, DwarfRegNum<[2]>;
+  def X3_32    : RISCVReg32<3, "x3", ["gp"]>, DwarfRegNum<[3]>;
+  def X4_32    : RISCVReg32<4, "x4", ["tp"]>, DwarfRegNum<[4]>;
+  def X5_32    : RISCVReg32<5, "x5", ["t0"]>, DwarfRegNum<[5]>;
+  def X6_32    : RISCVReg32<6, "x6", ["t1"]>, DwarfRegNum<[6]>;
+  def X7_32    : RISCVReg32<7, "x7", ["t2"]>, DwarfRegNum<[7]>;
+  def X8_32    : RISCVReg32<8, "x8", ["s0"]>, DwarfRegNum<[8]>;
+  def X9_32    : RISCVReg32<9, "x9", ["s1"]>, DwarfRegNum<[9]>;
+  def X10_32   : RISCVReg32<10,"x10", ["a0"]>, DwarfRegNum<[10]>;
+  def X11_32   : RISCVReg32<11,"x11", ["a1"]>, DwarfRegNum<[11]>;
+  def X12_32   : RISCVReg32<12,"x12", ["a2"]>, DwarfRegNum<[12]>;
+  def X13_32   : RISCVReg32<13,"x13", ["a3"]>, DwarfRegNum<[13]>;
+  def X14_32   : RISCVReg32<14,"x14", ["a4"]>, DwarfRegNum<[14]>;
+  def X15_32   : RISCVReg32<15,"x15", ["a5"]>, DwarfRegNum<[15]>;
+  def X16_32   : RISCVReg32<16,"x16", ["a6"]>, DwarfRegNum<[16]>;
+  def X17_32   : RISCVReg32<17,"x17", ["a7"]>, DwarfRegNum<[17]>;
+  def X18_32   : RISCVReg32<18,"x18", ["s2"]>, DwarfRegNum<[18]>;
+  def X19_32   : RISCVReg32<19,"x19", ["s3"]>, DwarfRegNum<[19]>;
+  def X20_32   : RISCVReg32<20,"x20", ["s4"]>, DwarfRegNum<[20]>;
+  def X21_32   : RISCVReg32<21,"x21", ["s5"]>, DwarfRegNum<[21]>;
+  def X22_32   : RISCVReg32<22,"x22", ["s6"]>, DwarfRegNum<[22]>;
+  def X23_32   : RISCVReg32<23,"x23", ["s7"]>, DwarfRegNum<[23]>;
+  def X24_32   : RISCVReg32<24,"x24", ["s8"]>, DwarfRegNum<[24]>;
+  def X25_32   : RISCVReg32<25,"x25", ["s9"]>, DwarfRegNum<[25]>;
+  def X26_32   : RISCVReg32<26,"x26", ["s10"]>, DwarfRegNum<[26]>;
+  def X27_32   : RISCVReg32<27,"x27", ["s11"]>, DwarfRegNum<[27]>;
+  def X28_32   : RISCVReg32<28,"x28", ["t3"]>, DwarfRegNum<[28]>;
+  def X29_32   : RISCVReg32<29,"x29", ["t4"]>, DwarfRegNum<[29]>;
+  def X30_32   : RISCVReg32<30,"x30", ["t5"]>, DwarfRegNum<[30]>;
+  def X31_32   : RISCVReg32<31,"x31", ["t6"]>, DwarfRegNum<[31]>;
+}
+
+foreach Index = 0-31 in {
+  def X#Index#_64 : RISCVReg64<!cast<RISCVReg32>("X"#Index#"_32")>, DwarfRegNum<[Index]>;
+}
+
+// We currently define separate register classes for the 32-bit and 64-bit
+// GPRs. Once variable-sized register classes
+// <http://lists.llvm.org/pipermail/llvm-dev/2016-September/105027.html> or
+// similar are implemented, we can just use one 'GPR' class for most
+// instruction definitions.
+
+// TODO: once codegen is implemented, registers should be listed in an order
+// reflecting the preferred register allocation sequence.
+def GPR : RegisterClass<"RISCV", [i32], 32, (add
+  (sequence "X%u_32", 0, 31)
+)>;
+
+def GPR64 : RegisterClass<"RISCV", [i64], 64, (add
+  (sequence "X%u_64", 0, 31)
+)>;
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
new file mode 100644
index 000000000000..afbbe004186e
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -0,0 +1,58 @@
+//===-- RISCVTargetMachine.cpp - Define TargetMachine for RISCV -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about RISCV target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVTargetMachine.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Target/TargetOptions.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeRISCVTarget() {
+  RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
+  RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
+}
+
+static std::string computeDataLayout(const Triple &TT) {
+  if (TT.isArch64Bit()) {
+    return "e-m:e-i64:64-n32:64-S128";
+  } else {
+    assert(TT.isArch32Bit() && "only RV32 and RV64 are currently supported");
+    return "e-m:e-i64:64-n32-S128";
+  }
+}
+
+static Reloc::Model getEffectiveRelocModel(const Triple &TT,
+                                           Optional<Reloc::Model> RM) {
+  if (!RM.hasValue())
+    return Reloc::Static;
+  return *RM;
+}
+
+RISCVTargetMachine::RISCVTargetMachine(const Target &T, const Triple &TT,
+                                       StringRef CPU, StringRef FS,
+                                       const TargetOptions &Options,
+                                       Optional<Reloc::Model> RM,
+                                       CodeModel::Model CM,
+                                       CodeGenOpt::Level OL)
+    : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
+                        getEffectiveRelocModel(TT, RM), CM, OL),
+      TLOF(make_unique<TargetLoweringObjectFileELF>()) {}
+
+TargetPassConfig *RISCVTargetMachine::createPassConfig(PassManagerBase &PM) {
+  return new TargetPassConfig(this, PM);
+}
diff --git a/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.h
new file mode 100644
index 000000000000..d13e574c9bf8
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/RISCVTargetMachine.h
@@ -0,0 +1,40 @@
+//===-- RISCVTargetMachine.h - Define TargetMachine for RISCV ---*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the RISCV specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVTARGETMACHINE_H
+#define LLVM_LIB_TARGET_RISCV_RISCVTARGETMACHINE_H
+
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+class RISCVTargetMachine : public LLVMTargetMachine {
+  std::unique_ptr<TargetLoweringObjectFile> TLOF;
+
+public:
+  RISCVTargetMachine(const Target &T, const Triple &TT, StringRef CPU,
+                     StringRef FS, const TargetOptions &Options,
+                     Optional<Reloc::Model> RM, CodeModel::Model CM,
+                     CodeGenOpt::Level OL);
+
+  TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
+
+  TargetLoweringObjectFile *getObjFileLowering() const override {
+    return TLOF.get();
+  }
+};
+}
+
+#endif
diff --git a/contrib/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp b/contrib/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
new file mode 100644
index 000000000000..34932c259156
--- /dev/null
+++ b/contrib/llvm/lib/Target/RISCV/TargetInfo/RISCVTargetInfo.cpp
@@ -0,0 +1,30 @@
+//===-- RISCVTargetInfo.cpp - RISCV Target Implementation -----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+namespace llvm {
+Target &getTheRISCV32Target() {
+  static Target TheRISCV32Target;
+  return TheRISCV32Target;
+}
+
+Target &getTheRISCV64Target() {
+  static Target TheRISCV64Target;
+  return TheRISCV64Target;
+}
+}
+
+extern "C" void LLVMInitializeRISCVTargetInfo() {
+  RegisterTarget<Triple::riscv32> X(getTheRISCV32Target(), "riscv32",
+                                    "32-bit RISC-V");
+  RegisterTarget<Triple::riscv64> Y(getTheRISCV64Target(), "riscv64",
+                                    "64-bit RISC-V");
+}
diff --git a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
index b2003b8f101b..e775aa607b53 100644
--- a/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
+++ b/contrib/llvm/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp
@@ -84,7 +84,7 @@ class SparcAsmParser : public MCTargetAsmParser {
     return getSTI().getTargetTriple().getArch() == Triple::sparcv9;
   }
 
-  void expandSET(MCInst &Inst, SMLoc IDLoc,
+  bool expandSET(MCInst &Inst, SMLoc IDLoc,
                  SmallVectorImpl<MCInst> &Instructions);
 
 public:
@@ -121,7 +121,7 @@ public:
   static const MCPhysReg DoubleRegs[32] = {
     Sparc::D0,  Sparc::D1,  Sparc::D2,  Sparc::D3,
     Sparc::D4,  Sparc::D5,  Sparc::D6,  Sparc::D7,
-    Sparc::D8,  Sparc::D7,  Sparc::D8,  Sparc::D9,
+    Sparc::D8,  Sparc::D9,  Sparc::D10, Sparc::D11,
     Sparc::D12, Sparc::D13, Sparc::D14, Sparc::D15,
     Sparc::D16, Sparc::D17, Sparc::D18, Sparc::D19,
     Sparc::D20, Sparc::D21, Sparc::D22, Sparc::D23,
@@ -466,7 +466,7 @@ public:
 
 } // end namespace
 
-void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
+bool SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
                                SmallVectorImpl<MCInst> &Instructions) {
   MCOperand MCRegOp = Inst.getOperand(0);
   MCOperand MCValOp = Inst.getOperand(1);
@@ -479,8 +479,8 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
 
   // Allow either a signed or unsigned 32-bit immediate.
   if (RawImmValue < -2147483648LL || RawImmValue > 4294967295LL) {
-    Error(IDLoc, "set: argument must be between -2147483648 and 4294967295");
-    return;
+    return Error(IDLoc,
+                 "set: argument must be between -2147483648 and 4294967295");
   }
 
   // If the value was expressed as a large unsigned number, that's ok.
@@ -537,6 +537,7 @@ void SparcAsmParser::expandSET(MCInst &Inst, SMLoc IDLoc,
     TmpInst.addOperand(MCOperand::createExpr(Expr));
     Instructions.push_back(TmpInst);
   }
+  return false;
 }
 
 bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -556,7 +557,8 @@ bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
       Instructions.push_back(Inst);
       break;
     case SP::SET:
-      expandSET(Inst, IDLoc, Instructions);
+      if (expandSET(Inst, IDLoc, Instructions))
+        return true;
       break;
     }
 
@@ -626,13 +628,11 @@ bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     if (getLexer().is(AsmToken::Comma)) {
       if (parseBranchModifiers(Operands) != MatchOperand_Success) {
         SMLoc Loc = getLexer().getLoc();
-        Parser.eatToEndOfStatement();
         return Error(Loc, "unexpected token");
       }
     }
     if (parseOperand(Operands, Name) != MatchOperand_Success) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token");
     }
 
@@ -645,14 +645,12 @@ bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info,
       // Parse and remember the operand.
       if (parseOperand(Operands, Name) != MatchOperand_Success) {
         SMLoc Loc = getLexer().getLoc();
-        Parser.eatToEndOfStatement();
         return Error(Loc, "unexpected token");
       }
     }
   }
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     SMLoc Loc = getLexer().getLoc();
-    Parser.eatToEndOfStatement();
     return Error(Loc, "unexpected token");
   }
   Parser.Lex(); // Consume the EndOfStatement.
@@ -717,7 +715,7 @@ bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) {
   return false;
 }
 
-SparcAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
 
   SMLoc S, E;
@@ -755,7 +753,7 @@ SparcAsmParser::parseMEMOperand(OperandVector &Operands) {
   return MatchOperand_Success;
 }
 
-SparcAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
 
   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
@@ -823,7 +821,7 @@ SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) {
   return MatchOperand_Success;
 }
 
-SparcAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
                                      bool isCall) {
 
@@ -910,7 +908,7 @@ SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op,
   return (Op) ? MatchOperand_Success : MatchOperand_ParseFail;
 }
 
-SparcAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SparcAsmParser::parseBranchModifiers(OperandVector &Operands) {
 
   // parse (,a|,pn|,pt)+
@@ -1265,9 +1263,9 @@ bool SparcAsmParser::matchSparcAsmModifiers(const MCExpr *&EVal,
 }
 
 extern "C" void LLVMInitializeSparcAsmParser() {
-  RegisterMCAsmParser<SparcAsmParser> A(TheSparcTarget);
-  RegisterMCAsmParser<SparcAsmParser> B(TheSparcV9Target);
-  RegisterMCAsmParser<SparcAsmParser> C(TheSparcelTarget);
+  RegisterMCAsmParser<SparcAsmParser> A(getTheSparcTarget());
+  RegisterMCAsmParser<SparcAsmParser> B(getTheSparcV9Target());
+  RegisterMCAsmParser<SparcAsmParser> C(getTheSparcelTarget());
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
index 944f3551279e..6f9cc314e376 100644
--- a/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
+++ b/contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp
@@ -43,9 +43,7 @@ namespace {
     static char ID;
     Filler() : MachineFunctionPass(ID) {}
 
-    const char *getPassName() const override {
-      return "SPARC Delay Slot Filler";
-    }
+    StringRef getPassName() const override { return "SPARC Delay Slot Filler"; }
 
     bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
     bool runOnMachineFunction(MachineFunction &F) override {
@@ -64,7 +62,7 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
     void insertCallDefsUses(MachineBasicBlock::iterator MI,
diff --git a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
index 1dea379e14ec..da7e0b737e78 100644
--- a/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
+++ b/contrib/llvm/lib/Target/Sparc/Disassembler/SparcDisassembler.cpp
@@ -44,7 +44,9 @@ public:
 }
 
 namespace llvm {
-extern Target TheSparcTarget, TheSparcV9Target, TheSparcelTarget;
+Target &getTheSparcTarget();
+Target &getTheSparcV9Target();
+Target &getTheSparcelTarget();
 }
 
 static MCDisassembler *createSparcDisassembler(const Target &T,
@@ -56,11 +58,11 @@ static MCDisassembler *createSparcDisassembler(const Target &T,
 
 extern "C" void LLVMInitializeSparcDisassembler() {
   // Register the disassembler.
-  TargetRegistry::RegisterMCDisassembler(TheSparcTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheSparcTarget(),
                                          createSparcDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheSparcV9Target,
+  TargetRegistry::RegisterMCDisassembler(getTheSparcV9Target(),
                                          createSparcDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheSparcelTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheSparcelTarget(),
                                          createSparcDisassembler);
 }
 
diff --git a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
index 63f8b33c80cf..d06e734b5a7b 100755
--- a/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
+++ b/contrib/llvm/lib/Target/Sparc/LeonFeatures.td
@@ -10,82 +10,73 @@
 //
 //===----------------------------------------------------------------------===//
 
-//===----------------------------------------------------------------------===//
-// CASA Support differs between LEON3-FT GR712RC and LEON3-FT UT699
-// We need to have the option to switch this on and off.
-//===----------------------------------------------------------------------===//
-
-// support to casa instruction; for leon3 subtarget only
-def LeonCASA : SubtargetFeature<
-                   "hasleoncasa", "HasLeonCasa", "true",
-                   "Enable CASA instruction for LEON3 and LEON4 processors">;
 
 //===----------------------------------------------------------------------===//
 // UMAC and SMAC support for LEON3 and LEON4 processors.
 //===----------------------------------------------------------------------===//
 
-// support to casa instruction; for leon3 subtarget only
-def UMACSMACSupport
-    : SubtargetFeature<"hasumacsmac", "HasUmacSmac", "true",
-                       "Enable UMAC and SMAC for LEON3 and LEON4 processors">;
+//support to casa instruction; for leon3 subtarget only
+def UMACSMACSupport : SubtargetFeature<
+  "hasumacsmac", 
+  "HasUmacSmac", 
+  "true", 
+  "Enable UMAC and SMAC for LEON3 and LEON4 processors"
+>;
+
 
 //===----------------------------------------------------------------------===//
-// LEON Erratum fixes
+// CASA Support differs between LEON3-FT GR712RC and LEON3-FT UT699
+// We need to have the option to switch this on and off.
 //===----------------------------------------------------------------------===//
 
-def ReplaceSDIV
-    : SubtargetFeature<
-          "replacesdiv", "PerformSDIVReplace", "true",
-          "AT697E erratum fix: Do not emit SDIV, emit SDIVCC instead">;
-
-def FixCALL
-    : SubtargetFeature<"fixcall", "FixCallImmediates", "true",
-                       "AT697E erratum fix: Restrict the size of the immediate "
-                       "operand of the CALL instruction to 20 bits">;
-
-def IgnoreZeroFlag
-    : SubtargetFeature<"ignrzeroflag", "IgnoreZeroFlag", "true",
-                       "AT697E erratum fix: Do not rely on the zero bit flag "
-                       "on a divide overflow for SDIVCC and UDIVCC">;
-
-def InsertNOPDoublePrecision
-    : SubtargetFeature<"insrtnopdblprcsn", "InsertNOPDoublePrecision", "true",
-                       "LEON2 erratum fix: Insert a NOP before the double "
-                       "precision floating point instruction">;
-
-def FixFSMULD : SubtargetFeature<"fixfsmuld", "FixFSMULD", "true",
-                                 "LEON3 erratum fix: Do not select FSMULD">;
-
-def ReplaceFMULS
-    : SubtargetFeature<"replacefmuls", "ReplaceFMULS", "true",
-                       "LEON3 erratum fix: Replace FMULS instruction with a "
-                       "routine using conversions/double precision operations "
-                       "to replace FMULS">;
-
-def PreventRoundChange
-    : SubtargetFeature<"prvntroundchange", "PreventRoundChange", "true",
-                       "LEON3 erratum fix: Prevent any rounding mode change "
-                       "request: use only the round-to-nearest rounding mode">;
-
-def FixAllFDIVSQRT
-    : SubtargetFeature<"fixallfdivsqrt", "FixAllFDIVSQRT", "true",
-                       "LEON3 erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD "
-                       "instructions with NOPs and floating-point store">;
-
-def InsertNOPLoad
-    : SubtargetFeature<"insertnopload", "InsertNOPLoad", "true",
-                       "LEON3 erratum fix: Insert a NOP instruction after "
-                       "every single-cycle load instruction when the next "
-                       "instruction is another load/store instruction">;
-
-def FlushCacheLineSWAP
-    : SubtargetFeature<"flshcachelineswap", "FlushCacheLineSWAP", "true",
-                       "LEON3 erratum fix: Flush cache line containing the "
-                       "lock before performing any of the atomic instructions "
-                       "SWAP and LDSTUB">;
-
-def InsertNOPsLoadStore
-    : SubtargetFeature<"insertnopsloadstore", "InsertNOPsLoadStore", "true",
-                       "LEON3 erratum fix: Insert NOPs between "
-                       "single-precision loads and the store, so the number of "
-                       "instructions between is 4">;
+//support to casa instruction; for leon3 subtarget only
+def LeonCASA : SubtargetFeature<
+  "hasleoncasa", 
+  "HasLeonCasa", 
+  "true", 
+  "Enable CASA instruction for LEON3 and LEON4 processors"
+>;
+
+
+def ReplaceSDIV : SubtargetFeature<
+  "replacesdiv",
+  "PerformSDIVReplace",
+  "true",
+  "AT697E erratum fix: Do not emit SDIV, emit SDIVCC instead"
+>;
+          
+def InsertNOPLoad: SubtargetFeature<
+  "insertnopload",
+  "InsertNOPLoad",
+  "true",
+  "LEON3 erratum fix: Insert a NOP instruction after every single-cycle load instruction when the next instruction is another load/store instruction" 
+>;
+
+def FixFSMULD : SubtargetFeature<
+  "fixfsmuld",
+  "FixFSMULD",
+  "true",
+  "LEON erratum fix: Do not use FSMULD" 
+>;
+
+def ReplaceFMULS : SubtargetFeature<
+  "replacefmuls",
+  "ReplaceFMULS",
+  "true",
+  "LEON erratum fix: Replace FMULS instruction with FMULD and relevant conversion instructions" 
+>;
+
+def DetectRoundChange : SubtargetFeature<
+  "detectroundchange",
+  "DetectRoundChange",
+  "true",
+  "LEON3 erratum detection: Detects any rounding mode change "
+  "request: use only the round-to-nearest rounding mode"
+>;
+
+def FixAllFDIVSQRT : SubtargetFeature<
+  "fixallfdivsqrt",
+  "FixAllFDIVSQRT",
+  "true",
+  "LEON erratum fix: Fix FDIVS/FDIVD/FSQRTS/FSQRTD instructions with NOPs and floating-point store" 
+>;
diff --git a/contrib/llvm/lib/Target/Sparc/LeonPasses.cpp b/contrib/llvm/lib/Target/Sparc/LeonPasses.cpp
index 5d0920892ff0..0acc2875daa8 100755
--- a/contrib/llvm/lib/Target/Sparc/LeonPasses.cpp
+++ b/contrib/llvm/lib/Target/Sparc/LeonPasses.cpp
@@ -16,6 +16,7 @@
 #include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
@@ -51,8 +52,7 @@ int LEONMachineFunctionPass::GetRegIndexForOperand(MachineInstr &MI,
 int LEONMachineFunctionPass::getUnusedFPRegister(MachineRegisterInfo &MRI) {
   for (int RegisterIndex = SP::F0; RegisterIndex <= SP::F31; ++RegisterIndex) {
     if (!MRI.isPhysRegUsed(RegisterIndex) &&
-        !(std::find(UsedRegisters.begin(), UsedRegisters.end(),
-                    RegisterIndex) != UsedRegisters.end())) {
+        !is_contained(UsedRegisters, RegisterIndex)) {
       return RegisterIndex;
     }
   }
@@ -90,15 +90,6 @@ bool InsertNOPLoad::runOnMachineFunction(MachineFunction &MF) {
         MachineBasicBlock::iterator NMBBI = std::next(MBBI);
         BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
         Modified = true;
-      } else if (MI.isInlineAsm()) {
-        // Look for an inline ld or ldf instruction.
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("ld")) {
-          MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-          BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
-          Modified = true;
-        }
       }
     }
   }
@@ -148,29 +139,6 @@ bool FixFSMULD::runOnMachineFunction(MachineFunction &MF) {
         Reg1Index = MI.getOperand(0).getReg();
         Reg2Index = MI.getOperand(1).getReg();
         Reg3Index = MI.getOperand(2).getReg();
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("fsmuld")) {
-          // this is an inline FSMULD instruction
-
-          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
-
-          // extracts the registers from the inline assembly instruction
-          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
-            const MachineOperand &MO = MI.getOperand(i);
-            if (MO.isReg()) {
-              if (Reg1Index == UNASSIGNED_INDEX)
-                Reg1Index = MO.getReg();
-              else if (Reg2Index == UNASSIGNED_INDEX)
-                Reg2Index = MO.getReg();
-              else if (Reg3Index == UNASSIGNED_INDEX)
-                Reg3Index = MO.getReg();
-            }
-            if (Reg3Index != UNASSIGNED_INDEX)
-              break;
-          }
-        }
       }
 
       if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
@@ -260,28 +228,6 @@ bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
         Reg1Index = MI.getOperand(0).getReg();
         Reg2Index = MI.getOperand(1).getReg();
         Reg3Index = MI.getOperand(2).getReg();
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("fmuls")) {
-          // this is an inline FMULS instruction
-          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
-
-          // extracts the registers from the inline assembly instruction
-          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
-            const MachineOperand &MO = MI.getOperand(i);
-            if (MO.isReg()) {
-              if (Reg1Index == UNASSIGNED_INDEX)
-                Reg1Index = MO.getReg();
-              else if (Reg2Index == UNASSIGNED_INDEX)
-                Reg2Index = MO.getReg();
-              else if (Reg3Index == UNASSIGNED_INDEX)
-                Reg3Index = MO.getReg();
-            }
-            if (Reg3Index != UNASSIGNED_INDEX)
-              break;
-          }
-        }
       }
 
       if (Reg1Index != UNASSIGNED_INDEX && Reg2Index != UNASSIGNED_INDEX &&
@@ -329,391 +275,22 @@ bool ReplaceFMULS::runOnMachineFunction(MachineFunction &MF) {
   return Modified;
 }
 
-//*****************************************************************************
-//**** FixAllFDIVSQRT pass
-//*****************************************************************************
-// This pass fixes the incorrectly working FDIVx and FSQRTx instructions that
-// exist for some earlier versions of the LEON processor line. Five NOP
-// instructions need to be inserted after these instructions to ensure the
-// correct result is placed in the destination registers before they are used.
-//
-// This pass implements two fixes:
-//  1) fixing the FSQRTS and FSQRTD instructions.
-//  2) fixing the FDIVS and FDIVD instructions.
-//
-// FSQRTS and FDIVS are converted to FDIVD and FSQRTD respectively earlier in
-// the pipeline when this option is enabled, so this pass needs only to deal
-// with the changes that still need implementing for the "double" versions
-// of these instructions.
-//
-char FixAllFDIVSQRT::ID = 0;
-
-FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm)
-    : LEONMachineFunctionPass(tm, ID) {}
-
-bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-
-      if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("fsqrtd")) {
-          // this is an inline fsqrts instruction
-          Opcode = SP::FSQRTD;
-        } else if (AsmString.startswith_lower("fdivd")) {
-          // this is an inline fsqrts instruction
-          Opcode = SP::FDIVD;
-        }
-      }
-
-      // Note: FDIVS and FSQRTS cannot be generated when this erratum fix is
-      // switched on so we don't need to check for them here. They will
-      // already have been converted to FSQRTD or FDIVD earlier in the
-      // pipeline.
-      if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) {
-        // Insert 5 NOPs before FSQRTD,FDIVD.
-        for (int InsertedCount = 0; InsertedCount < 5; InsertedCount++)
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-
-        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-        // ... and inserting 28 NOPs after FSQRTD,FDIVD.
-        for (int InsertedCount = 0; InsertedCount < 28; InsertedCount++)
-          BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
-
-        Modified = true;
-      }
-    }
-  }
-
-  return Modified;
-}
-
-//*****************************************************************************
-//**** ReplaceSDIV pass
-//*****************************************************************************
-// This pass fixes the incorrectly working SDIV instruction that
-// exist for some earlier versions of the LEON processor line. The instruction
-// is replaced with an SDIVcc instruction instead, which is working.
-//
-char ReplaceSDIV::ID = 0;
-
-ReplaceSDIV::ReplaceSDIV() : LEONMachineFunctionPass(ID) {}
-
-ReplaceSDIV::ReplaceSDIV(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {}
-
-bool ReplaceSDIV::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-      if (Opcode == SP::SDIVrr) {
-        MI.setDesc(TII.get(SP::SDIVCCrr));
-        Modified = true;
-      } else if (Opcode == SP::SDIVri) {
-        MI.setDesc(TII.get(SP::SDIVCCri));
-        Modified = true;
-      }
-    }
-  }
-
-  return Modified;
-}
-
-static RegisterPass<ReplaceSDIV> X("replace-sdiv", "Replase SDIV Pass", false,
-                                   false);
-
-//*****************************************************************************
-//**** FixCALL pass
-//*****************************************************************************
-// This pass restricts the size of the immediate operand of the CALL
-// instruction, which can cause problems on some earlier versions of the LEON
-// processor, which can interpret some of the call address bits incorrectly.
-//
-char FixCALL::ID = 0;
-
-FixCALL::FixCALL(TargetMachine &tm) : LEONMachineFunctionPass(tm, ID) {}
-
-bool FixCALL::runOnMachineFunction(MachineFunction &MF) {
-  bool Modified = false;
-
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      MI.print(errs());
-      errs() << "\n";
-
-      unsigned Opcode = MI.getOpcode();
-      if (Opcode == SP::CALL || Opcode == SP::CALLrr) {
-        unsigned NumOperands = MI.getNumOperands();
-        for (unsigned OperandIndex = 0; OperandIndex < NumOperands;
-             OperandIndex++) {
-          MachineOperand &MO = MI.getOperand(OperandIndex);
-          if (MO.isImm()) {
-            int64_t Value = MO.getImm();
-            MO.setImm(Value & 0x000fffffL);
-            Modified = true;
-            break;
-          }
-        }
-      } else if (MI.isInlineAsm()) // inline assembly immediate call
-      {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("call")) {
-          // this is an inline call instruction
-          unsigned StartOp = InlineAsm::MIOp_FirstOperand;
-
-          // extracts the registers from the inline assembly instruction
-          for (unsigned i = StartOp, e = MI.getNumOperands(); i != e; ++i) {
-            MachineOperand &MO = MI.getOperand(i);
-            if (MO.isImm()) {
-              int64_t Value = MO.getImm();
-              MO.setImm(Value & 0x000fffffL);
-              Modified = true;
-            }
-          }
-        }
-      }
-    }
-  }
-
-  return Modified;
-}
-
-//*****************************************************************************
-//**** IgnoreZeroFlag pass
-//*****************************************************************************
-// This erratum fix fixes the overflow behavior of SDIVCC and UDIVCC
-// instructions that exists on some earlier LEON processors. Where these
-// instructions are detected, they are replaced by a sequence that will
-// explicitly write the overflow bit flag if this is required.
-//
-char IgnoreZeroFlag::ID = 0;
-
-IgnoreZeroFlag::IgnoreZeroFlag(TargetMachine &tm)
-    : LEONMachineFunctionPass(tm, ID) {}
-
-bool IgnoreZeroFlag::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-      if (Opcode == SP::SDIVCCrr || Opcode == SP::SDIVCCri ||
-          Opcode == SP::UDIVCCrr || Opcode == SP::UDIVCCri) {
-
-        // split the current machine basic block - just after the sdivcc/udivcc
-        // instruction
-        // create a label that help us skip the zero flag update (of PSR -
-        // Processor Status Register)
-        // if conditions are not met
-        const BasicBlock *LLVM_BB = MBB.getBasicBlock();
-        MachineFunction::iterator It =
-            std::next(MachineFunction::iterator(MBB));
-
-        MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
-        MF.insert(It, dneBB);
-
-        // Transfer the remainder of MBB and its successor edges to dneBB.
-        dneBB->splice(dneBB->begin(), &MBB,
-                      std::next(MachineBasicBlock::iterator(MI)), MBB.end());
-        dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
-
-        MBB.addSuccessor(dneBB);
-
-        MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-
-        // bvc - branch if overflow flag not set
-        BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
-            .addMBB(dneBB)
-            .addImm(SPCC::ICC_VS);
-
-        // bnz - branch if not zero
-        BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
-            .addMBB(dneBB)
-            .addImm(SPCC::ICC_NE);
-
-        // use the WRPSR (Write Processor State Register) instruction to set the
-        // zeo flag to 1
-        // create wr %g0, 1, %psr
-        BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri))
-            .addReg(SP::G0)
-            .addImm(1);
-
-        BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP));
-
-        Modified = true;
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("sdivcc") ||
-            AsmString.startswith_lower("udivcc")) {
-          // this is an inline SDIVCC or UDIVCC instruction
-
-          // split the current machine basic block - just after the
-          // sdivcc/udivcc instruction
-          // create a label that help us skip the zero flag update (of PSR -
-          // Processor Status Register)
-          // if conditions are not met
-          const BasicBlock *LLVM_BB = MBB.getBasicBlock();
-          MachineFunction::iterator It =
-              std::next(MachineFunction::iterator(MBB));
-
-          MachineBasicBlock *dneBB = MF.CreateMachineBasicBlock(LLVM_BB);
-          MF.insert(It, dneBB);
-
-          // Transfer the remainder of MBB and its successor edges to dneBB.
-          dneBB->splice(dneBB->begin(), &MBB,
-                        std::next(MachineBasicBlock::iterator(MI)), MBB.end());
-          dneBB->transferSuccessorsAndUpdatePHIs(&MBB);
-
-          MBB.addSuccessor(dneBB);
-
-          MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
-
-          // bvc - branch if overflow flag not set
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
-              .addMBB(dneBB)
-              .addImm(SPCC::ICC_VS);
-
-          // bnz - branch if not zero
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::BCOND))
-              .addMBB(dneBB)
-              .addImm(SPCC::ICC_NE);
-
-          // use the WRPSR (Write Processor State Register) instruction to set
-          // the zeo flag to 1
-          // create wr %g0, 1, %psr
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::WRPSRri))
-              .addReg(SP::G0)
-              .addImm(1);
-
-          BuildMI(MBB, NextMBBI, DL, TII.get(SP::NOP));
-
-          Modified = true;
-        }
-      }
-    }
-  }
-
-  return Modified;
-}
-
-//*****************************************************************************
-//**** InsertNOPDoublePrecision pass
-//*****************************************************************************
-// This erratum fix for some earlier LEON processors fixes a problem where a
-// double precision load will not yield the correct result if used in FMUL,
-// FDIV, FADD, FSUB or FSQRT instructions later. If this sequence is detected,
-// inserting a NOP between the two instructions will fix the erratum.
-// 1.scans the code after register allocation;
-// 2.checks for the problem conditions as described in the AT697E erratum
-// “Odd-Numbered FPU Register Dependency not Properly Checked in some
-// Double-Precision FPU Operations”;
-// 3.inserts NOPs if the problem exists.
-//
-char InsertNOPDoublePrecision::ID = 0;
-
-InsertNOPDoublePrecision::InsertNOPDoublePrecision(TargetMachine &tm)
-    : LEONMachineFunctionPass(tm, ID) {}
-
-bool InsertNOPDoublePrecision::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-      unsigned Opcode = MI.getOpcode();
-      if (Opcode == SP::LDDFri || Opcode == SP::LDDFrr) {
-        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-        MachineInstr &NMI = *NMBBI;
-
-        unsigned NextOpcode = NMI.getOpcode();
-        // NMI.print(errs());
-        if (NextOpcode == SP::FADDD || NextOpcode == SP::FSUBD ||
-            NextOpcode == SP::FMULD || NextOpcode == SP::FDIVD) {
-          int RegAIndex = GetRegIndexForOperand(MI, 0);
-          int RegBIndex = GetRegIndexForOperand(NMI, 0);
-          int RegCIndex =
-              GetRegIndexForOperand(NMI, 2); // Second source operand is index 2
-          int RegDIndex =
-              GetRegIndexForOperand(NMI, 1); // Destination operand is index 1
-
-          if ((RegAIndex == RegBIndex + 1 && RegBIndex == RegDIndex) ||
-              (RegAIndex == RegCIndex + 1 && RegCIndex == RegDIndex) ||
-              (RegAIndex == RegBIndex + 1 && RegCIndex == RegDIndex) ||
-              (RegAIndex == RegCIndex + 1 && RegBIndex == RegDIndex)) {
-            // Insert NOP between the two instructions.
-            BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
-            Modified = true;
-          }
-
-          // Check the errata patterns that only happen for FADDD and FMULD
-          if (Modified == false &&
-              (NextOpcode == SP::FADDD || NextOpcode == SP::FMULD)) {
-            RegAIndex = GetRegIndexForOperand(MI, 1);
-            if (RegAIndex == RegBIndex + 1 && RegBIndex == RegCIndex &&
-                RegBIndex == RegDIndex) {
-              // Insert NOP between the two instructions.
-              BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
-              Modified = true;
-            }
-          }
-        } else if (NextOpcode == SP::FSQRTD) {
-          int RegAIndex = GetRegIndexForOperand(MI, 1);
-          int RegBIndex = GetRegIndexForOperand(NMI, 0);
-          int RegCIndex = GetRegIndexForOperand(NMI, 1);
-
-          if (RegAIndex == RegBIndex + 1 && RegBIndex == RegCIndex) {
-            // Insert NOP between the two instructions.
-            BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
-            Modified = true;
-          }
-        }
-      }
-    }
-  }
-
-  return Modified;
-}
 
 //*****************************************************************************
-//**** PreventRoundChange pass
+//**** DetectRoundChange pass
 //*****************************************************************************
 // To prevent any explicit change of the default rounding mode, this pass
-// detects any call of the fesetround function and removes this call from the
-// list of generated operations.
+// detects any call of the fesetround function.
+// A warning is generated to ensure the user knows this has happened.
 //
-char PreventRoundChange::ID = 0;
+// Detects an erratum in UT699 LEON 3 processor
 
-PreventRoundChange::PreventRoundChange(TargetMachine &tm)
+char DetectRoundChange::ID = 0;
+
+DetectRoundChange::DetectRoundChange(TargetMachine &tm)
     : LEONMachineFunctionPass(tm, ID) {}
 
-bool PreventRoundChange::runOnMachineFunction(MachineFunction &MF) {
+bool DetectRoundChange::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<SparcSubtarget>();
 
   bool Modified = false;
@@ -728,10 +305,11 @@ bool PreventRoundChange::runOnMachineFunction(MachineFunction &MF) {
         if (MO.isGlobal()) {
           StringRef FuncName = MO.getGlobal()->getName();
           if (FuncName.compare_lower("fesetround") == 0) {
-            MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-            MI.eraseFromParent();
-            MBBI = NMBBI;
-            Modified = true;
+            errs() << "Error: You are using the detectroundchange "
+                      "option to detect rounding changes that will "
+                      "cause LEON errata. The only way to fix this "
+                      "is to remove the call to fesetround from "
+                      "the source code.\n";
           }
         }
       }
@@ -740,17 +318,30 @@ bool PreventRoundChange::runOnMachineFunction(MachineFunction &MF) {
 
   return Modified;
 }
+
 //*****************************************************************************
-//**** FlushCacheLineSWAP pass
+//**** FixAllFDIVSQRT pass
 //*****************************************************************************
-// This pass inserts FLUSHW just before any SWAP atomic instruction.
+// This pass fixes the incorrectly working FDIVx and FSQRTx instructions that
+// exist for some earlier versions of the LEON processor line. Five NOP
+// instructions need to be inserted after these instructions to ensure the
+// correct result is placed in the destination registers before they are used.
 //
-char FlushCacheLineSWAP::ID = 0;
+// This pass implements two fixes:
+//  1) fixing the FSQRTS and FSQRTD instructions.
+//  2) fixing the FDIVS and FDIVD instructions.
+//
+// FSQRTS and FDIVS are converted to FDIVD and FSQRTD respectively earlier in
+// the pipeline when this option is enabled, so this pass needs only to deal
+// with the changes that still need implementing for the "double" versions
+// of these instructions.
+//
+char FixAllFDIVSQRT::ID = 0;
 
-FlushCacheLineSWAP::FlushCacheLineSWAP(TargetMachine &tm)
+FixAllFDIVSQRT::FixAllFDIVSQRT(TargetMachine &tm)
     : LEONMachineFunctionPass(tm, ID) {}
 
-bool FlushCacheLineSWAP::runOnMachineFunction(MachineFunction &MF) {
+bool FixAllFDIVSQRT::runOnMachineFunction(MachineFunction &MF) {
   Subtarget = &MF.getSubtarget<SparcSubtarget>();
   const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
   DebugLoc DL = DebugLoc();
@@ -761,170 +352,20 @@ bool FlushCacheLineSWAP::runOnMachineFunction(MachineFunction &MF) {
     for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
       MachineInstr &MI = *MBBI;
       unsigned Opcode = MI.getOpcode();
-      if (Opcode == SP::SWAPrr || Opcode == SP::SWAPri ||
-          Opcode == SP::LDSTUBrr || Opcode == SP::LDSTUBri) {
-        // insert flush and 5 NOPs before the swap/ldstub instruction
-        BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-        BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
 
-        Modified = true;
-      } else if (MI.isInlineAsm()) {
-        StringRef AsmString =
-            MI.getOperand(InlineAsm::MIOp_AsmString).getSymbolName();
-        if (AsmString.startswith_lower("swap") ||
-            AsmString.startswith_lower("ldstub")) {
-          // this is an inline swap or ldstub instruction
-
-          // insert flush and 5 NOPs before the swap/ldstub instruction
-          BuildMI(MBB, MBBI, DL, TII.get(SP::FLUSH));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
+      // Note: FDIVS and FSQRTS cannot be generated when this erratum fix is
+      // switched on so we don't need to check for them here. They will
+      // already have been converted to FSQRTD or FDIVD earlier in the
+      // pipeline.
+      if (Opcode == SP::FSQRTD || Opcode == SP::FDIVD) {
+        for (int InsertedCount = 0; InsertedCount < 5; InsertedCount++)
           BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
 
-          Modified = true;
-        }
-      }
-    }
-  }
-
-  return Modified;
-}
-
-//*****************************************************************************
-//**** InsertNOPsLoadStore pass
-//*****************************************************************************
-// This pass shall insert NOPs between floating point loads and stores when the
-// following circumstances are present [5]:
-// Pattern 1:
-// 1. single-precision load or single-precision FPOP to register %fX, where X is
-// the same register as the store being checked;
-// 2. single-precision load or single-precision FPOP to register %fY , where Y
-// is the opposite register in the same double-precision pair;
-// 3. 0-3 instructions of any kind, except stores from %fX or %fY or operations
-// with %fX as destination;
-// 4. the store (from register %fX) being considered.
-// Pattern 2:
-// 1. double-precision FPOP;
-// 2. any number of operations on any kind, except no double-precision FPOP and
-// at most one (less than two) single-precision or single-to-double FPOPs;
-// 3. the store (from register %fX) being considered.
-//
-char InsertNOPsLoadStore::ID = 0;
-
-InsertNOPsLoadStore::InsertNOPsLoadStore(TargetMachine &tm)
-    : LEONMachineFunctionPass(tm, ID) {}
-
-bool InsertNOPsLoadStore::runOnMachineFunction(MachineFunction &MF) {
-  Subtarget = &MF.getSubtarget<SparcSubtarget>();
-  const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
-  DebugLoc DL = DebugLoc();
-
-  MachineInstr *Pattern1FirstInstruction = NULL;
-  MachineInstr *Pattern2FirstInstruction = NULL;
-  unsigned int StoreInstructionsToCheck = 0;
-  int FxRegIndex, FyRegIndex;
-
-  bool Modified = false;
-  for (auto MFI = MF.begin(), E = MF.end(); MFI != E; ++MFI) {
-    MachineBasicBlock &MBB = *MFI;
-    for (auto MBBI = MBB.begin(), E = MBB.end(); MBBI != E; ++MBBI) {
-      MachineInstr &MI = *MBBI;
-
-      if (StoreInstructionsToCheck > 0) {
-        if (((MI.getOpcode() == SP::STFrr || MI.getOpcode() == SP::STFri) &&
-             (GetRegIndexForOperand(MI, LAST_OPERAND) == FxRegIndex ||
-              GetRegIndexForOperand(MI, LAST_OPERAND) == FyRegIndex)) ||
-            GetRegIndexForOperand(MI, 0) == FxRegIndex) {
-          // Insert four NOPs
-          for (unsigned InsertedCount = 0; InsertedCount < 4; InsertedCount++) {
-            BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-          }
-          Modified = true;
-        }
-        StoreInstructionsToCheck--;
-      }
-
-      switch (MI.getOpcode()) {
-      // Watch for Pattern 1 FPop instructions
-      case SP::LDrr:
-      case SP::LDri:
-      case SP::LDFrr:
-      case SP::LDFri:
-      case SP::FADDS:
-      case SP::FSUBS:
-      case SP::FMULS:
-      case SP::FDIVS:
-      case SP::FSQRTS:
-      case SP::FCMPS:
-      case SP::FMOVS:
-      case SP::FNEGS:
-      case SP::FABSS:
-      case SP::FITOS:
-      case SP::FSTOI:
-      case SP::FITOD:
-      case SP::FDTOI:
-      case SP::FDTOS:
-        if (Pattern1FirstInstruction != NULL) {
-          FxRegIndex = GetRegIndexForOperand(*Pattern1FirstInstruction, 0);
-          FyRegIndex = GetRegIndexForOperand(MI, 0);
-
-          // Check to see if these registers are part of the same double
-          // precision
-          // register pair.
-          int DoublePrecRegIndexForX = (FxRegIndex - SP::F0) / 2;
-          int DoublePrecRegIndexForY = (FyRegIndex - SP::F0) / 2;
-
-          if (DoublePrecRegIndexForX == DoublePrecRegIndexForY)
-            StoreInstructionsToCheck = 4;
-        }
+        MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+        for (int InsertedCount = 0; InsertedCount < 28; InsertedCount++)
+          BuildMI(MBB, NMBBI, DL, TII.get(SP::NOP));
 
-        Pattern1FirstInstruction = &MI;
-        break;
-      // End of Pattern 1
-
-      // Search for Pattern 2
-      case SP::FADDD:
-      case SP::FSUBD:
-      case SP::FMULD:
-      case SP::FDIVD:
-      case SP::FSQRTD:
-      case SP::FCMPD:
-        Pattern2FirstInstruction = &MI;
-        Pattern1FirstInstruction = NULL;
-        break;
-
-      case SP::STFrr:
-      case SP::STFri:
-      case SP::STDFrr:
-      case SP::STDFri:
-        if (Pattern2FirstInstruction != NULL) {
-          if (GetRegIndexForOperand(MI, LAST_OPERAND) ==
-              GetRegIndexForOperand(*Pattern2FirstInstruction, 0)) {
-            // Insert four NOPs
-            for (unsigned InsertedCount = 0; InsertedCount < 4;
-                 InsertedCount++) {
-              BuildMI(MBB, MBBI, DL, TII.get(SP::NOP));
-            }
-
-            Pattern2FirstInstruction = NULL;
-          }
-        }
-        Pattern1FirstInstruction = NULL;
-        break;
-      // End of Pattern 2
-
-      default:
-        // Ensure we don't count debug-only values while we're testing for the
-        // patterns.
-        if (!MI.isDebugValue())
-          Pattern1FirstInstruction = NULL;
-        break;
+        Modified = true;
       }
     }
   }
diff --git a/contrib/llvm/lib/Target/Sparc/LeonPasses.h b/contrib/llvm/lib/Target/Sparc/LeonPasses.h
index 5e21813ed029..2158cb636bfc 100755
--- a/contrib/llvm/lib/Target/Sparc/LeonPasses.h
+++ b/contrib/llvm/lib/Target/Sparc/LeonPasses.h
@@ -44,57 +44,17 @@ protected:
   int getUnusedFPRegister(MachineRegisterInfo &MRI);
 };
 
-class LLVM_LIBRARY_VISIBILITY ReplaceSDIV : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  ReplaceSDIV();
-  ReplaceSDIV(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "ReplaceSDIV: Erratum Fix LBR25:  do not emit SDIV, but emit SDIVCC "
-           "instead";
-  }
-};
-
-class LLVM_LIBRARY_VISIBILITY FixCALL : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  FixCALL(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "FixCALL: Erratum Fix LBR26: restrict the size of the immediate "
-           "operand of the CALL instruction to 20 bits";
-  }
-};
-
-class LLVM_LIBRARY_VISIBILITY IgnoreZeroFlag : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  IgnoreZeroFlag(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "IgnoreZeroFlag: Erratum Fix LBR28: do not rely on the zero bit "
-           "flag on a divide overflow for SDIVCC and UDIVCC";
-  }
-};
-
-class LLVM_LIBRARY_VISIBILITY InsertNOPDoublePrecision
-    : public LEONMachineFunctionPass {
+class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
 public:
   static char ID;
 
-  InsertNOPDoublePrecision(TargetMachine &tm);
+  InsertNOPLoad(TargetMachine &tm);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "InsertNOPDoublePrecision: Erratum Fix LBR30: insert a NOP before "
-           "the double precision floating point instruction";
+  StringRef getPassName() const override {
+    return "InsertNOPLoad: Erratum Fix LBR35: insert a NOP instruction after "
+           "every single-cycle load instruction when the next instruction is "
+           "another load/store instruction";
   }
 };
 
@@ -105,7 +65,7 @@ public:
   FixFSMULD(TargetMachine &tm);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "FixFSMULD: Erratum Fix LBR31: do not select FSMULD";
   }
 };
@@ -117,24 +77,24 @@ public:
   ReplaceFMULS(TargetMachine &tm);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "ReplaceFMULS: Erratum Fix LBR32: replace FMULS instruction with a "
            "routine using conversions/double precision operations to replace "
            "FMULS";
   }
 };
 
-class LLVM_LIBRARY_VISIBILITY PreventRoundChange
+class LLVM_LIBRARY_VISIBILITY DetectRoundChange
     : public LEONMachineFunctionPass {
 public:
   static char ID;
 
-  PreventRoundChange(TargetMachine &tm);
+  DetectRoundChange(TargetMachine &tm);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
-    return "PreventRoundChange: Erratum Fix LBR33: prevent any rounding mode "
-           "change request: use only the round-to-nearest rounding mode";
+  StringRef getPassName() const override {
+    return "DetectRoundChange: Leon erratum detection: detect any rounding "
+           "mode change request: use only the round-to-nearest rounding mode";
   }
 };
 
@@ -145,55 +105,11 @@ public:
   FixAllFDIVSQRT(TargetMachine &tm);
   bool runOnMachineFunction(MachineFunction &MF) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "FixAllFDIVSQRT: Erratum Fix LBR34: fix FDIVS/FDIVD/FSQRTS/FSQRTD "
            "instructions with NOPs and floating-point store";
   }
 };
-
-class LLVM_LIBRARY_VISIBILITY InsertNOPLoad : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  InsertNOPLoad(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "InsertNOPLoad: insert a NOP instruction after "
-           "every single-cycle load instruction when the next instruction is "
-           "another load/store instruction";
-  }
-};
-
-class LLVM_LIBRARY_VISIBILITY FlushCacheLineSWAP
-    : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  FlushCacheLineSWAP(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "FlushCacheLineSWAP: Erratum Fix LBR36: flush cache line containing "
-           "the lock before performing any of the atomic instructions SWAP and "
-           "LDSTUB";
-  }
-};
-
-class LLVM_LIBRARY_VISIBILITY InsertNOPsLoadStore
-    : public LEONMachineFunctionPass {
-public:
-  static char ID;
-
-  InsertNOPsLoadStore(TargetMachine &tm);
-  bool runOnMachineFunction(MachineFunction &MF) override;
-
-  const char *getPassName() const override {
-    return "InsertNOPsLoadStore: Erratum Fix LBR37: insert NOPs between "
-           "single-precision loads and the store, so the number of "
-           "instructions between is 4";
-  }
-};
-} // namespace lllvm
+} // namespace llvm
 
 #endif // LLVM_LIB_TARGET_SPARC_LEON_PASSES_H
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 14a70d862f11..6106a6c32dc8 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -300,6 +300,7 @@ namespace {
 
 MCAsmBackend *llvm::createSparcAsmBackend(const Target &T,
                                           const MCRegisterInfo &MRI,
-                                          const Triple &TT, StringRef CPU) {
+                                          const Triple &TT, StringRef CPU,
+                                          const MCTargetOptions &Options) {
   return new ELFSparcAsmBackend(T, TT.getOS());
 }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
index 45bc4a1de01b..86341c61d1e2 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp
@@ -19,6 +19,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCRegisterInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCAsmInfo.h"
@@ -35,10 +36,12 @@ namespace {
 class SparcMCCodeEmitter : public MCCodeEmitter {
   SparcMCCodeEmitter(const SparcMCCodeEmitter &) = delete;
   void operator=(const SparcMCCodeEmitter &) = delete;
+  const MCInstrInfo &MCII;
   MCContext &Ctx;
 
 public:
-  SparcMCCodeEmitter(MCContext &ctx): Ctx(ctx) {}
+  SparcMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+      : MCII(mcii), Ctx(ctx) {}
 
   ~SparcMCCodeEmitter() override {}
 
@@ -71,18 +74,25 @@ public:
                                        SmallVectorImpl<MCFixup> &Fixups,
                                        const MCSubtargetInfo &STI) const;
 
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 };
 } // end anonymous namespace
 
 MCCodeEmitter *llvm::createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                               const MCRegisterInfo &MRI,
                                               MCContext &Ctx) {
-  return new SparcMCCodeEmitter(Ctx);
+  return new SparcMCCodeEmitter(MCII, Ctx);
 }
 
 void SparcMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
                                            SmallVectorImpl<MCFixup> &Fixups,
                                            const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
   unsigned Bits = getBinaryCodeForInstr(MI, Fixups, STI);
 
   if (Ctx.getAsmInfo()->isLittleEndian()) {
@@ -215,6 +225,5 @@ getBranchOnRegTargetOpValue(const MCInst &MI, unsigned OpNo,
   return 0;
 }
 
-
-
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "SparcGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
index dceaca791aab..889e2fd19ba9 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.cpp
@@ -128,11 +128,12 @@ static MCInstPrinter *createSparcMCInstPrinter(const Triple &T,
 
 extern "C" void LLVMInitializeSparcTargetMC() {
   // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheSparcTarget, createSparcMCAsmInfo);
-  RegisterMCAsmInfoFn Y(TheSparcV9Target, createSparcV9MCAsmInfo);
-  RegisterMCAsmInfoFn Z(TheSparcelTarget, createSparcMCAsmInfo);
+  RegisterMCAsmInfoFn X(getTheSparcTarget(), createSparcMCAsmInfo);
+  RegisterMCAsmInfoFn Y(getTheSparcV9Target(), createSparcV9MCAsmInfo);
+  RegisterMCAsmInfoFn Z(getTheSparcelTarget(), createSparcMCAsmInfo);
 
-  for (Target *T : {&TheSparcTarget, &TheSparcV9Target, &TheSparcelTarget}) {
+  for (Target *T :
+       {&getTheSparcTarget(), &getTheSparcV9Target(), &getTheSparcelTarget()}) {
     // Register the MC instruction info.
     TargetRegistry::RegisterMCInstrInfo(*T, createSparcMCInstrInfo);
 
@@ -160,10 +161,10 @@ extern "C" void LLVMInitializeSparcTargetMC() {
   }
 
   // Register the MC codegen info.
-  TargetRegistry::registerMCAdjustCodeGenOpts(TheSparcTarget,
+  TargetRegistry::registerMCAdjustCodeGenOpts(getTheSparcTarget(),
                                               adjustCodeGenOpts);
-  TargetRegistry::registerMCAdjustCodeGenOpts(TheSparcV9Target,
+  TargetRegistry::registerMCAdjustCodeGenOpts(getTheSparcV9Target(),
                                               adjustCodeGenOptsV9);
-  TargetRegistry::registerMCAdjustCodeGenOpts(TheSparcelTarget,
+  TargetRegistry::registerMCAdjustCodeGenOpts(getTheSparcelTarget(),
                                               adjustCodeGenOpts);
 }
diff --git a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
index a9c9f15454ec..4e754c132d11 100644
--- a/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/Sparc/MCTargetDesc/SparcMCTargetDesc.h
@@ -24,21 +24,23 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class Target;
 class Triple;
 class StringRef;
 class raw_pwrite_stream;
 class raw_ostream;
 
-extern Target TheSparcTarget;
-extern Target TheSparcV9Target;
-extern Target TheSparcelTarget;
+Target &getTheSparcTarget();
+Target &getTheSparcV9Target();
+Target &getTheSparcelTarget();
 
 MCCodeEmitter *createSparcMCCodeEmitter(const MCInstrInfo &MCII,
                                         const MCRegisterInfo &MRI,
                                         MCContext &Ctx);
 MCAsmBackend *createSparcAsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                    const Triple &TT, StringRef CPU);
+                                    const Triple &TT, StringRef CPU,
+                                    const MCTargetOptions &Options);
 MCObjectWriter *createSparcELFObjectWriter(raw_pwrite_stream &OS, bool Is64Bit,
                                            bool IsLIttleEndian, uint8_t OSABI);
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/Sparc/Sparc.td b/contrib/llvm/lib/Target/Sparc/Sparc.td
index 7a3d12448d52..11004c5a952f 100644
--- a/contrib/llvm/lib/Target/Sparc/Sparc.td
+++ b/contrib/llvm/lib/Target/Sparc/Sparc.td
@@ -21,34 +21,35 @@ include "llvm/Target/Target.td"
 //
 
 def FeatureV9
-    : SubtargetFeature<"v9", "IsV9", "true", "Enable SPARC-V9 instructions">;
+  : SubtargetFeature<"v9", "IsV9", "true",
+                     "Enable SPARC-V9 instructions">;
 def FeatureV8Deprecated
-    : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
-                       "Enable deprecated V8 instructions in V9 mode">;
+  : SubtargetFeature<"deprecated-v8", "V8DeprecatedInsts", "true",
+                     "Enable deprecated V8 instructions in V9 mode">;
 def FeatureVIS
-    : SubtargetFeature<"vis", "IsVIS", "true",
-                       "Enable UltraSPARC Visual Instruction Set extensions">;
+  : SubtargetFeature<"vis", "IsVIS", "true",
+                     "Enable UltraSPARC Visual Instruction Set extensions">;
 def FeatureVIS2
-    : SubtargetFeature<"vis2", "IsVIS2", "true",
-                       "Enable Visual Instruction Set extensions II">;
+  : SubtargetFeature<"vis2", "IsVIS2", "true",
+                     "Enable Visual Instruction Set extensions II">;
 def FeatureVIS3
-    : SubtargetFeature<"vis3", "IsVIS3", "true",
-                       "Enable Visual Instruction Set extensions III">;
+  : SubtargetFeature<"vis3", "IsVIS3", "true",
+                     "Enable Visual Instruction Set extensions III">;
 def FeatureLeon
-    : SubtargetFeature<"leon", "IsLeon", "true", "Enable LEON extensions">;
+  : SubtargetFeature<"leon", "IsLeon", "true",
+                     "Enable LEON extensions">;
 
 def FeatureHardQuad
-    : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
-                       "Enable quad-word floating point instructions">;
+  : SubtargetFeature<"hard-quad-float", "HasHardQuad", "true",
+                     "Enable quad-word floating point instructions">;
 
 def UsePopc : SubtargetFeature<"popc", "UsePopc", "true",
                                "Use the popc (population count) instruction">;
 
-def FeatureSoftFloat
-    : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
-                       "Use software emulation for floating point">;
+def FeatureSoftFloat : SubtargetFeature<"soft-float", "UseSoftFloat", "true",
+                              "Use software emulation for floating point">;  
 
-//==== Features added predmoninantly for LEON subtarget support
+//==== Features added predmoninantly for LEON subtarget support                               
 include "LeonFeatures.td"
 
 //===----------------------------------------------------------------------===//
@@ -62,92 +63,90 @@ include "SparcInstrInfo.td"
 
 def SparcInstrInfo : InstrInfo;
 
-def SparcAsmParser : AsmParser { bit ShouldEmitMatchRegisterName = 0; }
+def SparcAsmParser : AsmParser {
+  bit ShouldEmitMatchRegisterName = 0;
+}
 
 //===----------------------------------------------------------------------===//
 // SPARC processors supported.
 //===----------------------------------------------------------------------===//
 
 class Proc<string Name, list<SubtargetFeature> Features>
-    : Processor<Name, NoItineraries, Features>;
-
-def : Proc<"generic", []>;
-def : Proc<"v7", []>;
-def : Proc<"v8", []>;
-def : Proc<"supersparc", []>;
-def : Proc<"sparclite", []>;
-def : Proc<"f934", []>;
-def : Proc<"hypersparc", []>;
-def : Proc<"sparclite86x", []>;
-def : Proc<"sparclet", []>;
-def : Proc<"tsc701", []>;
-def : Proc<"myriad2", []>;
-def : Proc<"myriad2.1", []>;
-def : Proc<"myriad2.2", []>;
-def : Proc<"v9", [ FeatureV9 ]>;
-def : Proc<"ultrasparc", [ FeatureV9, FeatureV8Deprecated, FeatureVIS ]>;
-def : Proc<"ultrasparc3",
-           [ FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2 ]>;
-def : Proc<"niagara",
-           [ FeatureV9, FeatureV8Deprecated, FeatureVIS, FeatureVIS2 ]>;
-def : Proc<"niagara2", [
-  FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2
-]>;
-def : Proc<"niagara3", [
-  FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2
-]>;
-def : Proc<"niagara4", [
-  FeatureV9, FeatureV8Deprecated, UsePopc, FeatureVIS, FeatureVIS2, FeatureVIS3
-]>;
+ : Processor<Name, NoItineraries, Features>;
+
+def : Proc<"generic",         []>;
+def : Proc<"v7",              []>;
+def : Proc<"v8",              []>;
+def : Proc<"supersparc",      []>;
+def : Proc<"sparclite",       []>;
+def : Proc<"f934",            []>;
+def : Proc<"hypersparc",      []>;
+def : Proc<"sparclite86x",    []>;
+def : Proc<"sparclet",        []>;
+def : Proc<"tsc701",          []>;
+def : Proc<"myriad2",         [FeatureLeon, LeonCASA]>;
+def : Proc<"myriad2.1",       [FeatureLeon, LeonCASA]>;
+def : Proc<"myriad2.2",       [FeatureLeon, LeonCASA]>;
+def : Proc<"ma2100",          [FeatureLeon, LeonCASA]>;
+def : Proc<"ma2150",          [FeatureLeon, LeonCASA]>;
+def : Proc<"ma2450",          [FeatureLeon, LeonCASA]>;
+def : Proc<"v9",              [FeatureV9]>;
+def : Proc<"ultrasparc",      [FeatureV9, FeatureV8Deprecated, FeatureVIS]>;
+def : Proc<"ultrasparc3",     [FeatureV9, FeatureV8Deprecated, FeatureVIS,
+                               FeatureVIS2]>;
+def : Proc<"niagara",         [FeatureV9, FeatureV8Deprecated, FeatureVIS,
+                               FeatureVIS2]>;
+def : Proc<"niagara2",        [FeatureV9, FeatureV8Deprecated, UsePopc,
+                               FeatureVIS, FeatureVIS2]>;
+def : Proc<"niagara3",        [FeatureV9, FeatureV8Deprecated, UsePopc,
+                               FeatureVIS, FeatureVIS2]>;
+def : Proc<"niagara4",        [FeatureV9, FeatureV8Deprecated, UsePopc,
+                               FeatureVIS, FeatureVIS2, FeatureVIS3]>;
 
 // LEON 2 FT generic
-def : Processor<"leon2", LEON2Itineraries, [ FeatureLeon ]>;
+def : Processor<"leon2", LEON2Itineraries,
+                [FeatureLeon]>;
 
 // LEON 2 FT (AT697E)
-// AT697E: Provides full coverage of AT697E - covers all the erratum fixes for
-// LEON2 AT697E
-def : Processor<"at697e", LEON2Itineraries, [
-  FeatureLeon, ReplaceSDIV, FixCALL, IgnoreZeroFlag, InsertNOPDoublePrecision
-]>;
+// TO DO: Place-holder: Processor specific features will be added *very* soon here.
+def : Processor<"at697e", LEON2Itineraries,
+                [FeatureLeon, ReplaceSDIV, InsertNOPLoad]>;
 
 // LEON 2 FT (AT697F)
-// AT697F: Provides full coverage of AT697F - covers all the erratum fixes for
-// LEON2 AT697F
+// TO DO: Place-holder: Processor specific features will be added *very* soon here.
 def : Processor<"at697f", LEON2Itineraries,
-                [ FeatureLeon, InsertNOPDoublePrecision ]>;
+                [FeatureLeon, InsertNOPLoad]>;
+
 
 // LEON 3 FT generic
-def : Processor<"leon3", LEON3Itineraries, [ FeatureLeon, UMACSMACSupport ]>;
+def : Processor<"leon3", LEON3Itineraries,
+                [FeatureLeon, UMACSMACSupport]>;
 
 // LEON 3 FT (UT699). Provides features for the UT699 processor
-// - covers all the erratum fixes for LEON3, but does not support the CASA
-// instruction.
-def : Processor<"ut699", LEON3Itineraries, [
-  FeatureLeon, FixFSMULD, ReplaceFMULS, PreventRoundChange,
-  FixAllFDIVSQRT, InsertNOPLoad, FlushCacheLineSWAP, InsertNOPsLoadStore
-]>;
+// - covers all the erratum fixes for LEON3, but does not support the CASA instruction.
+def : Processor<"ut699", LEON3Itineraries, 
+                [FeatureLeon, InsertNOPLoad, FixFSMULD, ReplaceFMULS, FixAllFDIVSQRT]>;
 
 // LEON3 FT (GR712RC). Provides features for the GR712RC processor.
-// - covers all the erratum fixed for LEON3 and support for the CASA
-// instruction.
+// - covers all the erratum fixed for LEON3 and support for the CASA instruction. 
 def : Processor<"gr712rc", LEON3Itineraries,
-                [ FeatureLeon, LeonCASA ]>;
+                [FeatureLeon, LeonCASA]>;
 
 // LEON 4 FT generic
 def : Processor<"leon4", LEON4Itineraries,
-                [ FeatureLeon, LeonCASA ]>;
+                [FeatureLeon, UMACSMACSupport, LeonCASA]>;
 
-// GR740: Provides full coverage of GR740 - covers all the erratum fixes for
-// LEON3 + support to CASA + LEON 4 instruction timings
-def : Processor<"gr740", LEON4Itineraries,
-                [ FeatureLeon, LeonCASA ]> {}
+// LEON 4 FT (GR740) 
+// TO DO: Place-holder: Processor specific features will be added *very* soon here.
+def : Processor<"gr740", LEON4Itineraries, 
+                [FeatureLeon, UMACSMACSupport, LeonCASA]>;
 
 //===----------------------------------------------------------------------===//
 // Declare the target which we are implementing
 //===----------------------------------------------------------------------===//
 
 def SparcAsmWriter : AsmWriter {
-  string AsmWriterClassName = "InstPrinter";
+  string AsmWriterClassName  = "InstPrinter";
   int PassSubtarget = 1;
   int Variant = 0;
 }
@@ -155,6 +154,6 @@ def SparcAsmWriter : AsmWriter {
 def Sparc : Target {
   // Pull in Instruction Info:
   let InstructionSet = SparcInstrInfo;
-  let AssemblyParsers = [ SparcAsmParser ];
-  let AssemblyWriters = [ SparcAsmWriter ];
+  let AssemblyParsers  = [SparcAsmParser];
+  let AssemblyWriters = [SparcAsmWriter];
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
index c068440f7c05..31a128a5f271 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp
@@ -46,9 +46,7 @@ namespace {
                              std::unique_ptr<MCStreamer> Streamer)
         : AsmPrinter(TM, std::move(Streamer)) {}
 
-    const char *getPassName() const override {
-      return "Sparc Assembly Printer";
-    }
+    StringRef getPassName() const override { return "Sparc Assembly Printer"; }
 
     void printOperand(const MachineInstr *MI, int opNum, raw_ostream &OS);
     void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &OS,
@@ -445,7 +443,7 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
 // Force static initialization.
 extern "C" void LLVMInitializeSparcAsmPrinter() {
-  RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
-  RegisterAsmPrinter<SparcAsmPrinter> Y(TheSparcV9Target);
-  RegisterAsmPrinter<SparcAsmPrinter> Z(TheSparcelTarget);
+  RegisterAsmPrinter<SparcAsmPrinter> X(getTheSparcTarget());
+  RegisterAsmPrinter<SparcAsmPrinter> Y(getTheSparcV9Target());
+  RegisterAsmPrinter<SparcAsmPrinter> Z(getTheSparcelTarget());
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
index 87b01553b37e..122f830e0dc5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcFrameLowering.cpp
@@ -87,7 +87,7 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
   SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
 
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const SparcInstrInfo &TII =
       *static_cast<const SparcInstrInfo *>(MF.getSubtarget().getInstrInfo());
   const SparcRegisterInfo &RegInfo =
@@ -103,13 +103,13 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
   // rather than reporting an error, as would be sensible. This is
   // poor, but fixing that bogosity is going to be a large project.
   // For now, just see if it's lied, and report an error here.
-  if (!NeedsStackRealignment && MFI->getMaxAlignment() > getStackAlignment())
+  if (!NeedsStackRealignment && MFI.getMaxAlignment() > getStackAlignment())
     report_fatal_error("Function \"" + Twine(MF.getName()) + "\" required "
                        "stack re-alignment, but LLVM couldn't handle it "
                        "(probably because it has a dynamic alloca).");
 
   // Get the number of bytes to allocate from the FrameInfo
-  int NumBytes = (int) MFI->getStackSize();
+  int NumBytes = (int) MFI.getStackSize();
 
   unsigned SAVEri = SP::SAVEri;
   unsigned SAVErr = SP::SAVErr;
@@ -136,8 +136,8 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
   // Add the extra call frame stack size, if needed. (This is the same
   // code as in PrologEpilogInserter, but also gets disabled by
   // targetHandlesStackFrameRounding)
-  if (MFI->adjustsStack() && hasReservedCallFrame(MF))
-    NumBytes += MFI->getMaxCallFrameSize();
+  if (MFI.adjustsStack() && hasReservedCallFrame(MF))
+    NumBytes += MFI.getMaxCallFrameSize();
 
   // Adds the SPARC subtarget-specific spill area to the stack
   // size. Also ensures target-required alignment.
@@ -145,40 +145,39 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF,
 
   // Finally, ensure that the size is sufficiently aligned for the
   // data on the stack.
-  if (MFI->getMaxAlignment() > 0) {
-    NumBytes = alignTo(NumBytes, MFI->getMaxAlignment());
+  if (MFI.getMaxAlignment() > 0) {
+    NumBytes = alignTo(NumBytes, MFI.getMaxAlignment());
   }
 
   // Update stack size with corrected value.
-  MFI->setStackSize(NumBytes);
+  MFI.setStackSize(NumBytes);
 
   emitSPAdjustment(MF, MBB, MBBI, -NumBytes, SAVErr, SAVEri);
 
-  MachineModuleInfo &MMI = MF.getMMI();
   unsigned regFP = RegInfo.getDwarfRegNum(SP::I6, true);
 
   // Emit ".cfi_def_cfa_register 30".
   unsigned CFIIndex =
-      MMI.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
+      MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(nullptr, regFP));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
   // Emit ".cfi_window_save".
-  CFIIndex = MMI.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
+  CFIIndex = MF.addFrameInst(MCCFIInstruction::createWindowSave(nullptr));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
   unsigned regInRA = RegInfo.getDwarfRegNum(SP::I7, true);
   unsigned regOutRA = RegInfo.getDwarfRegNum(SP::O7, true);
   // Emit ".cfi_register 15, 31".
-  CFIIndex = MMI.addFrameInst(
+  CFIIndex = MF.addFrameInst(
       MCCFIInstruction::createRegister(nullptr, regOutRA, regInRA));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 
   if (NeedsStackRealignment) {
     // andn %o6, MaxAlign-1, %o6
-    int MaxAlign = MFI->getMaxAlignment();
+    int MaxAlign = MFI.getMaxAlignment();
     BuildMI(MBB, MBBI, dl, TII.get(SP::ANDNri), SP::O6).addReg(SP::O6).addImm(MaxAlign - 1);
   }
 }
@@ -213,9 +212,9 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
       .addReg(SP::G0);
     return;
   }
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
-  int NumBytes = (int) MFI->getStackSize();
+  int NumBytes = (int) MFI.getStackSize();
   if (NumBytes == 0)
     return;
 
@@ -224,7 +223,7 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF,
 
 bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   // Reserve call frame if there are no variable sized objects on the stack.
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MF.getFrameInfo().hasVarSizedObjects();
 }
 
 // hasFP - Return true if the specified function should have a dedicated frame
@@ -233,21 +232,21 @@ bool SparcFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 bool SparcFrameLowering::hasFP(const MachineFunction &MF) const {
   const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
 
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
       RegInfo->needsStackRealignment(MF) ||
-      MFI->hasVarSizedObjects() ||
-      MFI->isFrameAddressTaken();
+      MFI.hasVarSizedObjects() ||
+      MFI.isFrameAddressTaken();
 }
 
 
 int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                                unsigned &FrameReg) const {
   const SparcSubtarget &Subtarget = MF.getSubtarget<SparcSubtarget>();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const SparcRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   const SparcMachineFunctionInfo *FuncInfo = MF.getInfo<SparcMachineFunctionInfo>();
-  bool isFixed = MFI->isFixedObjectIndex(FI);
+  bool isFixed = MFI.isFixedObjectIndex(FI);
 
   // Addressable stack objects are accessed using neg. offsets from
   // %fp, or positive offsets from %sp.
@@ -273,7 +272,7 @@ int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI
     UseFP = true;
   }
 
-  int64_t FrameOffset = MF.getFrameInfo()->getObjectOffset(FI) +
+  int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI) +
       Subtarget.getStackPointerBias();
 
   if (UseFP) {
@@ -281,7 +280,7 @@ int SparcFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI
     return FrameOffset;
   } else {
     FrameReg = SP::O6; // %sp
-    return FrameOffset + MF.getFrameInfo()->getStackSize();
+    return FrameOffset + MF.getFrameInfo().getStackSize();
   }
 }
 
@@ -303,9 +302,9 @@ bool SparcFrameLowering::isLeafProc(MachineFunction &MF) const
 {
 
   MachineRegisterInfo &MRI = MF.getRegInfo();
-  MachineFrameInfo    *MFI = MF.getFrameInfo();
+  MachineFrameInfo    &MFI = MF.getFrameInfo();
 
-  return !(MFI->hasCalls()                 // has calls
+  return !(MFI.hasCalls()                  // has calls
            || !MRI.reg_nodbg_empty(SP::L0) // Too many registers needed
            || !MRI.reg_nodbg_empty(SP::O6) // %SP is used
            || hasFP(MF));                  // need %FP
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index 07948a33cde6..c36e75d1b076 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -53,7 +53,7 @@ public:
                                     unsigned ConstraintID,
                                     std::vector<SDValue> &OutOps) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SPARC DAG->DAG Pattern Instruction Selection";
   }
 
@@ -360,22 +360,15 @@ void SparcDAGToDAGISel::Select(SDNode *N) {
 
     // FIXME: Handle div by immediate.
     unsigned Opcode = N->getOpcode() == ISD::SDIV ? SP::SDIVrr : SP::UDIVrr;
+    // SDIV is a hardware erratum on some LEON2 processors. Replace it with SDIVcc here.
+    if (((SparcTargetMachine&)TM).getSubtargetImpl()->performSDIVReplace()
+        &&
+        Opcode == SP::SDIVrr) {
+      Opcode = SP::SDIVCCrr;
+    }
     CurDAG->SelectNodeTo(N, Opcode, MVT::i32, DivLHS, DivRHS, TopPart);
     return;
   }
-  case ISD::MULHU:
-  case ISD::MULHS: {
-    // FIXME: Handle mul by immediate.
-    SDValue MulLHS = N->getOperand(0);
-    SDValue MulRHS = N->getOperand(1);
-    unsigned Opcode = N->getOpcode() == ISD::MULHU ? SP::UMULrr : SP::SMULrr;
-    SDNode *Mul =
-        CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::i32, MulLHS, MulRHS);
-    SDValue ResultHigh = SDValue(Mul, 1);
-    ReplaceUses(SDValue(N, 0), ResultHigh);
-    CurDAG->RemoveDeadNode(N);
-    return;
-  }
   }
 
   SelectCode(N);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 8738bc82683d..2ac9aae2471b 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/ErrorHandling.h"
 using namespace llvm;
 
+
 //===----------------------------------------------------------------------===//
 // Calling Convention Implementation
 //===----------------------------------------------------------------------===//
@@ -403,7 +404,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
       if (InIdx != 0)
         report_fatal_error("sparc only supports sret on the first parameter");
       // Get SRet from [%fp+64].
-      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, 64, true);
+      int FrameIdx = MF.getFrameInfo().CreateFixedObject(4, 64, true);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
       SDValue Arg =
           DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
@@ -424,7 +425,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
 
         SDValue LoVal;
         if (NextVA.isMemLoc()) {
-          int FrameIdx = MF.getFrameInfo()->
+          int FrameIdx = MF.getFrameInfo().
             CreateFixedObject(4, StackOffset+NextVA.getLocMemOffset(),true);
           SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
           LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
@@ -466,9 +467,9 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
       assert(VA.getValVT() == MVT::f64 || VA.getValVT() == MVT::v2i32);
       // If it is double-word aligned, just load.
       if (Offset % 8 == 0) {
-        int FI = MF.getFrameInfo()->CreateFixedObject(8,
-                                                      Offset,
-                                                      true);
+        int FI = MF.getFrameInfo().CreateFixedObject(8,
+                                                     Offset,
+                                                     true);
         SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
         SDValue Load =
             DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo());
@@ -476,15 +477,15 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
         continue;
       }
 
-      int FI = MF.getFrameInfo()->CreateFixedObject(4,
-                                                    Offset,
-                                                    true);
+      int FI = MF.getFrameInfo().CreateFixedObject(4,
+                                                   Offset,
+                                                   true);
       SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
       SDValue HiVal =
           DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo());
-      int FI2 = MF.getFrameInfo()->CreateFixedObject(4,
-                                                     Offset+4,
-                                                     true);
+      int FI2 = MF.getFrameInfo().CreateFixedObject(4,
+                                                    Offset+4,
+                                                    true);
       SDValue FIPtr2 = DAG.getFrameIndex(FI2, PtrVT);
 
       SDValue LoVal =
@@ -500,9 +501,9 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
       continue;
     }
 
-    int FI = MF.getFrameInfo()->CreateFixedObject(4,
-                                                  Offset,
-                                                  true);
+    int FI = MF.getFrameInfo().CreateFixedObject(4,
+                                                 Offset,
+                                                 true);
     SDValue FIPtr = DAG.getFrameIndex(FI, PtrVT);
     SDValue Load ;
     if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) {
@@ -554,8 +555,8 @@ SDValue SparcTargetLowering::LowerFormalArguments_32(
       MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
       SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
 
-      int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
-                                                          true);
+      int FrameIdx = MF.getFrameInfo().CreateFixedObject(4, ArgOffset,
+                                                         true);
       SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
 
       OutChains.push_back(
@@ -638,7 +639,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_64(
     // prefer our own extending loads.
     if (VA.isExtInLoc())
       Offset += 8 - ValSize;
-    int FI = MF.getFrameInfo()->CreateFixedObject(ValSize, Offset, true);
+    int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
     InVals.push_back(
         DAG.getLoad(VA.getValVT(), DL, Chain,
                     DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
@@ -668,7 +669,7 @@ SDValue SparcTargetLowering::LowerFormalArguments_64(
   for (; ArgOffset < 6*8; ArgOffset += 8) {
     unsigned VReg = MF.addLiveIn(SP::I0 + ArgOffset/8, &SP::I64RegsRegClass);
     SDValue VArg = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
-    int FI = MF.getFrameInfo()->CreateFixedObject(8, ArgOffset + ArgArea, true);
+    int FI = MF.getFrameInfo().CreateFixedObject(8, ArgOffset + ArgArea, true);
     auto PtrVT = getPointerTy(MF.getDataLayout());
     OutChains.push_back(
         DAG.getStore(Chain, DL, VArg, DAG.getFrameIndex(FI, PtrVT),
@@ -740,7 +741,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
   // Keep stack frames 8-byte aligned.
   ArgsSize = (ArgsSize+7) & ~7;
 
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
 
   // Create local copies for byval args.
   SmallVector<SDValue, 8> ByValArgs;
@@ -754,7 +755,7 @@ SparcTargetLowering::LowerCall_32(TargetLowering::CallLoweringInfo &CLI,
     unsigned Align = Flags.getByValAlign();
 
     if (Size > 0U) {
-      int FI = MFI->CreateStackObject(Size, Align, false);
+      int FI = MFI.CreateStackObject(Size, Align, false);
       SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
       SDValue SizeNode = DAG.getConstant(Size, dl, MVT::i32);
 
@@ -1207,7 +1208,7 @@ SparcTargetLowering::LowerCall_64(TargetLowering::CallLoweringInfo &CLI,
     if (VA.isRegLoc()) {
       if (VA.needsCustom() && VA.getValVT() == MVT::f128
           && VA.getLocVT() == MVT::i128) {
-        // Store and reload into the interger register reg and reg+1.
+        // Store and reload into the integer register reg and reg+1.
         unsigned Offset = 8 * (VA.getLocReg() - SP::I0);
         unsigned StackOffset = Offset + Subtarget->getStackPointerBias() + 128;
         SDValue StackPtr = DAG.getRegister(SP::O6, PtrVT);
@@ -1507,7 +1508,7 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
     //    AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32);
   }
 
-  // Turn FP extload into load/fextend
+  // Turn FP extload into load/fpextend
   for (MVT VT : MVT::fp_valuetypes()) {
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f64, Expand);
@@ -1616,8 +1617,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   // Atomics are supported on SparcV9. 32-bit atomics are also
   // supported by some Leon SparcV8 variants. Otherwise, atomics
   // are unsupported.
-  if (Subtarget->isV9() || Subtarget->hasLeonCasa())
+  if (Subtarget->isV9())
     setMaxAtomicSizeInBitsSupported(64);
+  else if (Subtarget->hasLeonCasa())
+    setMaxAtomicSizeInBitsSupported(32);
   else
     setMaxAtomicSizeInBitsSupported(0);
 
@@ -1638,6 +1641,13 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_STORE, MVT::i64, Custom);
   }
 
+  if (!Subtarget->is64Bit()) {
+    // These libcalls are not available in 32-bit.
+    setLibcallName(RTLIB::SHL_I128, nullptr);
+    setLibcallName(RTLIB::SRL_I128, nullptr);
+    setLibcallName(RTLIB::SRA_I128, nullptr);
+  }
+
   if (!Subtarget->isV9()) {
     // SparcV8 does not have FNEGD and FABSD.
     setOperationAction(ISD::FNEG, MVT::f64, Custom);
@@ -1675,9 +1685,10 @@ SparcTargetLowering::SparcTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
   setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
 
-  // FIXME: Sparc provides these multiplies, but we don't have them yet.
-  setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
-  setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+  // Expands to [SU]MUL_LOHI.
+  setOperationAction(ISD::MULHU,     MVT::i32, Expand);
+  setOperationAction(ISD::MULHS,     MVT::i32, Expand);
+  setOperationAction(ISD::MUL,       MVT::i32, Expand);
 
   if (Subtarget->is64Bit()) {
     setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
@@ -1961,8 +1972,8 @@ SDValue SparcTargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
     SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, VT, GlobalBase, HiLo);
     // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
     // function has calls.
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-    MFI->setHasCalls(true);
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    MFI.setHasCalls(true);
     return DAG.getLoad(VT, DL, DAG.getEntryNode(), AbsAddr,
                        MachinePointerInfo::getGOT(DAG.getMachineFunction()));
   }
@@ -2089,8 +2100,8 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
 
     // GLOBAL_BASE_REG codegen'ed with call. Inform MFI that this
     // function has calls.
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-    MFI->setHasCalls(true);
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    MFI.setHasCalls(true);
 
     SDValue TGA = makeHiLoPair(Op,
                                SparcMCExpr::VK_Sparc_TLS_IE_HI22,
@@ -2120,7 +2131,7 @@ SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain,
                                                   ArgListTy &Args, SDValue Arg,
                                                   const SDLoc &DL,
                                                   SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   EVT ArgVT = Arg.getValueType();
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
 
@@ -2130,7 +2141,7 @@ SDValue SparcTargetLowering::LowerF128_LibCallArg(SDValue Chain,
 
   if (ArgTy->isFP128Ty()) {
     // Create a stack object and pass the pointer to the library function.
-    int FI = MFI->CreateStackObject(16, 8, false);
+    int FI = MFI.CreateStackObject(16, 8, false);
     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
     Chain = DAG.getStore(Chain, DL, Entry.Node, FIPtr, MachinePointerInfo(),
                          /* Alignment = */ 8);
@@ -2149,7 +2160,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
 
   ArgListTy Args;
 
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
   SDValue Callee = DAG.getExternalSymbol(LibFuncName, PtrVT);
@@ -2161,7 +2172,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG,
   if (RetTy->isFP128Ty()) {
     // Create a Stack Object to receive the return value of type f128.
     ArgListEntry Entry;
-    int RetFI = MFI->CreateStackObject(16, 8, false);
+    int RetFI = MFI.CreateStackObject(16, 8, false);
     RetPtr = DAG.getFrameIndex(RetFI, PtrVT);
     Entry.Node = RetPtr;
     Entry.Ty   = PointerType::getUnqual(RetTy);
@@ -2517,7 +2528,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
   auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
 
   // Need frame address to find the address of VarArgsFrameIndex.
-  MF.getFrameInfo()->setFrameAddressIsTaken(true);
+  MF.getFrameInfo().setFrameAddressIsTaken(true);
 
   // vastart just stores the address of the VarArgsFrameIndex slot into the
   // memory location argument.
@@ -2557,17 +2568,57 @@ static SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG,
                                        const SparcSubtarget *Subtarget) {
   SDValue Chain = Op.getOperand(0);  // Legalize the chain.
   SDValue Size  = Op.getOperand(1);  // Legalize the size.
+  unsigned Align = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+  unsigned StackAlign = Subtarget->getFrameLowering()->getStackAlignment();
   EVT VT = Size->getValueType(0);
   SDLoc dl(Op);
 
+  // TODO: implement over-aligned alloca. (Note: also implies
+  // supporting support for overaligned function frames + dynamic
+  // allocations, at all, which currently isn't supported)
+  if (Align > StackAlign) {
+    const MachineFunction &MF = DAG.getMachineFunction();
+    report_fatal_error("Function \"" + Twine(MF.getName()) + "\": "
+                       "over-aligned dynamic alloca not supported.");
+  }
+
+  // The resultant pointer needs to be above the register spill area
+  // at the bottom of the stack.
+  unsigned regSpillArea;
+  if (Subtarget->is64Bit()) {
+    regSpillArea = 128;
+  } else {
+    // On Sparc32, the size of the spill area is 92. Unfortunately,
+    // that's only 4-byte aligned, not 8-byte aligned (the stack
+    // pointer is 8-byte aligned). So, if the user asked for an 8-byte
+    // aligned dynamic allocation, we actually need to add 96 to the
+    // bottom of the stack, instead of 92, to ensure 8-byte alignment.
+
+    // That also means adding 4 to the size of the allocation --
+    // before applying the 8-byte rounding. Unfortunately, we the
+    // value we get here has already had rounding applied. So, we need
+    // to add 8, instead, wasting a bit more memory.
+
+    // Further, this only actually needs to be done if the required
+    // alignment is > 4, but, we've lost that info by this point, too,
+    // so we always apply it.
+
+    // (An alternative approach would be to always reserve 96 bytes
+    // instead of the required 92, but then we'd waste 4 extra bytes
+    // in every frame, not just those with dynamic stack allocations)
+
+    // TODO: modify code in SelectionDAGBuilder to make this less sad.
+
+    Size = DAG.getNode(ISD::ADD, dl, VT, Size,
+                       DAG.getConstant(8, dl, VT));
+    regSpillArea = 96;
+  }
+
   unsigned SPReg = SP::O6;
   SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
   SDValue NewSP = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value
   Chain = DAG.getCopyToReg(SP.getValue(1), dl, SPReg, NewSP);    // Output chain
 
-  // The resultant pointer is actually 16 words from the bottom of the stack,
-  // to provide a register spill area.
-  unsigned regSpillArea = Subtarget->is64Bit() ? 128 : 96;
   regSpillArea += Subtarget->getStackPointerBias();
 
   SDValue NewVal = DAG.getNode(ISD::ADD, dl, VT, NewSP,
@@ -2586,8 +2637,8 @@ static SDValue getFLUSHW(SDValue Op, SelectionDAG &DAG) {
 
 static SDValue getFRAMEADDR(uint64_t depth, SDValue Op, SelectionDAG &DAG,
                             const SparcSubtarget *Subtarget) {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   EVT VT = Op.getValueType();
   SDLoc dl(Op);
@@ -2628,14 +2679,15 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG,
   uint64_t depth = Op.getConstantOperandVal(0);
 
   return getFRAMEADDR(depth, Op, DAG, Subtarget);
+
 }
 
 static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG,
                                const SparcTargetLowering &TLI,
                                const SparcSubtarget *Subtarget) {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (TLI.verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -2805,7 +2857,7 @@ static SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG)
     SDValue Val = DAG.getNode(ISD::BITCAST, dl, MVT::v2i32, St->getValue());
     SDValue Chain = DAG.getStore(
         St->getChain(), dl, Val, St->getBasePtr(), St->getPointerInfo(),
-        St->isVolatile(), St->getMemOperand()->getFlags(), St->getAAInfo());
+        St->getAlignment(), St->getMemOperand()->getFlags(), St->getAAInfo());
     return Chain;
   }
 
@@ -3042,7 +3094,7 @@ MachineBasicBlock *
 SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                  MachineBasicBlock *BB) const {
   switch (MI.getOpcode()) {
-  default: llvm_unreachable("Unknown Custom Instruction!");
+  default: llvm_unreachable("Unknown SELECT_CC!");
   case SP::SELECT_CC_Int_ICC:
   case SP::SELECT_CC_FP_ICC:
   case SP::SELECT_CC_DFP_ICC:
@@ -3059,6 +3111,7 @@ SparcTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case SP::EH_SJLJ_LONGJMP32rr:
   case SP::EH_SJLJ_LONGJMP32ri:
     return emitEHSjLjLongJmp(MI, BB);
+
   }
 }
 
@@ -3329,11 +3382,8 @@ SparcTargetLowering::ConstraintType
 SparcTargetLowering::getConstraintType(StringRef Constraint) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
-    default:
-      break;
-    case 'f':
-    case 'r':
-      return C_RegisterClass;
+    default:  break;
+    case 'r': return C_RegisterClass;
     case 'I': // SIMM13
       return C_Other;
     }
@@ -3407,9 +3457,6 @@ SparcTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
                                                   MVT VT) const {
   if (Constraint.size() == 1) {
     switch (Constraint[0]) {
-    case 'f':
-      return std::make_pair(0U, &SP::FPRegsRegClass);
-
     case 'r':
       if (VT == MVT::v2i32)
         return std::make_pair(0U, &SP::IntPairRegClass);
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
index cfd342410550..ea8ed830bafc 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -118,19 +118,19 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
   
   case SPCC::CPCC_A:   return SPCC::CPCC_N;
   case SPCC::CPCC_N:   return SPCC::CPCC_A;
-  case SPCC::CPCC_3:   // Fall through
-  case SPCC::CPCC_2:   // Fall through
-  case SPCC::CPCC_23:  // Fall through
-  case SPCC::CPCC_1:   // Fall through
-  case SPCC::CPCC_13:  // Fall through
-  case SPCC::CPCC_12:  // Fall through
-  case SPCC::CPCC_123: // Fall through
-  case SPCC::CPCC_0:   // Fall through
-  case SPCC::CPCC_03:  // Fall through
-  case SPCC::CPCC_02:  // Fall through
-  case SPCC::CPCC_023: // Fall through
-  case SPCC::CPCC_01:  // Fall through
-  case SPCC::CPCC_013: // Fall through
+  case SPCC::CPCC_3:   LLVM_FALLTHROUGH;
+  case SPCC::CPCC_2:   LLVM_FALLTHROUGH;
+  case SPCC::CPCC_23:  LLVM_FALLTHROUGH;
+  case SPCC::CPCC_1:   LLVM_FALLTHROUGH;
+  case SPCC::CPCC_13:  LLVM_FALLTHROUGH;
+  case SPCC::CPCC_12:  LLVM_FALLTHROUGH;
+  case SPCC::CPCC_123: LLVM_FALLTHROUGH;
+  case SPCC::CPCC_0:   LLVM_FALLTHROUGH;
+  case SPCC::CPCC_03:  LLVM_FALLTHROUGH;
+  case SPCC::CPCC_02:  LLVM_FALLTHROUGH;
+  case SPCC::CPCC_023: LLVM_FALLTHROUGH;
+  case SPCC::CPCC_01:  LLVM_FALLTHROUGH;
+  case SPCC::CPCC_013: LLVM_FALLTHROUGH;
   case SPCC::CPCC_012:
       // "Opposite" code is not meaningful, as we don't know
       // what the CoProc condition means here. The cond-code will
@@ -240,14 +240,16 @@ bool SparcInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned SparcInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TBB,
                                       MachineBasicBlock *FBB,
                                       ArrayRef<MachineOperand> Cond,
-                                      const DebugLoc &DL) const {
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+                                      const DebugLoc &DL,
+                                      int *BytesAdded) const {
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
          "Sparc branch conditions should have one component!");
+  assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
     assert(!FBB && "Unconditional branch with multiple successors!");
@@ -269,8 +271,10 @@ unsigned SparcInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
-{
+unsigned SparcInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                      int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
   while (I != MBB.begin()) {
@@ -291,7 +295,7 @@ unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
   return Count;
 }
 
-bool SparcInstrInfo::ReverseBranchCondition(
+bool SparcInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1);
   SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[0].getImm());
@@ -397,7 +401,7 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   MachineFunction *MF = MBB.getParent();
-  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineMemOperand *MMO = MF->getMachineMemOperand(
       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
@@ -436,7 +440,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
   if (I != MBB.end()) DL = I->getDebugLoc();
 
   MachineFunction *MF = MBB.getParent();
-  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineMemOperand *MMO = MF->getMachineMemOperand(
       MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
       MFI.getObjectSize(FI), MFI.getObjectAlignment(FI));
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
index 8ed97c1479ca..c053cc4c475b 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h
@@ -70,14 +70,16 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
index cc55c9c8e032..5a19c624abb5 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td
@@ -734,8 +734,8 @@ let Defs = [ICC], rd = 0 in {
 
 // Section B.18 - Multiply Instructions, p. 113
 let Defs = [Y] in {
-  defm UMUL : F3_12np<"umul", 0b001010, IIC_iu_umul>;
-  defm SMUL : F3_12  <"smul", 0b001011, mul, IntRegs, i32, simm13Op, IIC_iu_smul>;
+  defm UMUL : F3_12<"umul", 0b001010, umullohi, IntRegs, i32, simm13Op, IIC_iu_umul>;
+  defm SMUL : F3_12<"smul", 0b001011, smullohi, IntRegs, i32, simm13Op, IIC_iu_smul>;
 }
 
 let Defs = [Y, ICC] in {
@@ -1131,32 +1131,32 @@ def FQTOI : F3_3u<2, 0b110100, 0b011010011,
 def FSTOD : F3_3u<2, 0b110100, 0b011001001,
                  (outs DFPRegs:$rd), (ins FPRegs:$rs2),
                  "fstod $rs2, $rd",
-                 [(set f64:$rd, (fextend f32:$rs2))],
+                 [(set f64:$rd, (fpextend f32:$rs2))],
                  IIC_fpu_stod>;
 def FSTOQ : F3_3u<2, 0b110100, 0b011001101,
                  (outs QFPRegs:$rd), (ins FPRegs:$rs2),
                  "fstoq $rs2, $rd",
-                 [(set f128:$rd, (fextend f32:$rs2))]>,
+                 [(set f128:$rd, (fpextend f32:$rs2))]>,
                  Requires<[HasHardQuad]>;
 def FDTOS : F3_3u<2, 0b110100, 0b011000110,
                  (outs FPRegs:$rd), (ins DFPRegs:$rs2),
                  "fdtos $rs2, $rd",
-                 [(set f32:$rd, (fround f64:$rs2))],
+                 [(set f32:$rd, (fpround f64:$rs2))],
                  IIC_fpu_fast_instr>;
 def FDTOQ : F3_3u<2, 0b110100, 0b011001110,
                  (outs QFPRegs:$rd), (ins DFPRegs:$rs2),
                  "fdtoq $rs2, $rd",
-                 [(set f128:$rd, (fextend f64:$rs2))]>,
+                 [(set f128:$rd, (fpextend f64:$rs2))]>,
                  Requires<[HasHardQuad]>;
 def FQTOS : F3_3u<2, 0b110100, 0b011000111,
                  (outs FPRegs:$rd), (ins QFPRegs:$rs2),
                  "fqtos $rs2, $rd",
-                 [(set f32:$rd, (fround f128:$rs2))]>,
+                 [(set f32:$rd, (fpround f128:$rs2))]>,
                  Requires<[HasHardQuad]>;
 def FQTOD : F3_3u<2, 0b110100, 0b011001011,
                  (outs DFPRegs:$rd), (ins QFPRegs:$rs2),
                  "fqtod $rs2, $rd",
-                 [(set f64:$rd, (fround f128:$rs2))]>,
+                 [(set f64:$rd, (fpround f128:$rs2))]>,
                  Requires<[HasHardQuad]>;
 
 // Floating-point Move Instructions, p. 144
@@ -1255,14 +1255,14 @@ let Predicates = [HasNoFsmuldFix] in
 def FSMULD : F3_3<2, 0b110100, 0b001101001,
                   (outs DFPRegs:$rd), (ins FPRegs:$rs1, FPRegs:$rs2),
                   "fsmuld $rs1, $rs2, $rd",
-                  [(set f64:$rd, (fmul (fextend f32:$rs1),
-                                        (fextend f32:$rs2)))],
+                  [(set f64:$rd, (fmul (fpextend f32:$rs1),
+                                        (fpextend f32:$rs2)))],
                   IIC_fpu_muld>;
 def FDMULQ : F3_3<2, 0b110100, 0b001101110,
                   (outs QFPRegs:$rd), (ins DFPRegs:$rs1, DFPRegs:$rs2),
                   "fdmulq $rs1, $rs2, $rd",
-                  [(set f128:$rd, (fmul (fextend f64:$rs1),
-                                         (fextend f64:$rs2)))]>,
+                  [(set f128:$rd, (fmul (fpextend f64:$rs1),
+                                         (fpextend f64:$rs2)))]>,
                   Requires<[HasHardQuad]>;
 
 // FDIVS generates an erratum on LEON processors, so by disabling this instruction
diff --git a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
index d1ef3b19dca7..6ecfddfc7d66 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
+++ b/contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td
@@ -331,7 +331,6 @@ def IntRegs : RegisterClass<"SP", [i32, i64], 32,
                                  (sequence "L%u", 0, 7),
                                  (sequence "O%u", 0, 7))>;
 
-
 // Should be in the same order as IntRegs.
 def IntPair : RegisterClass<"SP", [v2i32], 64,
     (add I0_I1, I2_I3, I4_I5, I6_I7,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
index a6a4dc54faed..43ddef3cc96e 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp
@@ -40,16 +40,11 @@ SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU,
   HasLeonCasa = false;
   HasUmacSmac = false;
   PerformSDIVReplace = false;
-  FixCallImmediates = false;
-  IgnoreZeroFlag = false;
-  InsertNOPDoublePrecision = false;
+  InsertNOPLoad = false;
   FixFSMULD = false;
   ReplaceFMULS = false;
-  PreventRoundChange = false;
   FixAllFDIVSQRT = false;
-  InsertNOPLoad = false;
-  FlushCacheLineSWAP = false;
-  InsertNOPsLoadStore = false;
+  DetectRoundChange = false;
 
   // Determine default and user specified characteristics
   std::string CPUName = CPU;
diff --git a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
index 42d693699994..fa42da425ff2 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcSubtarget.h
@@ -48,14 +48,8 @@ class SparcSubtarget : public SparcGenSubtargetInfo {
   bool FixFSMULD;
   bool ReplaceFMULS;
   bool FixAllFDIVSQRT;
-  bool UseSoftFpu;
+  bool DetectRoundChange;
   bool PerformSDIVReplace;
-  bool FixCallImmediates;
-  bool IgnoreZeroFlag;
-  bool InsertNOPDoublePrecision;
-  bool PreventRoundChange;
-  bool FlushCacheLineSWAP;
-  bool InsertNOPsLoadStore;
 
   SparcInstrInfo InstrInfo;
   SparcTargetLowering TLInfo;
@@ -93,20 +87,14 @@ public:
   bool useSoftFloat() const { return UseSoftFloat; }
 
   // Leon options
-  bool useSoftFpu() const { return UseSoftFpu; }
-  bool hasLeonCasa() const { return HasLeonCasa; }
   bool hasUmacSmac() const { return HasUmacSmac; }
   bool performSDIVReplace() const { return PerformSDIVReplace; }
-  bool fixCallImmediates() const { return FixCallImmediates; }
-  bool ignoreZeroFlag() const { return IgnoreZeroFlag; }
-  bool insertNOPDoublePrecision() const { return InsertNOPDoublePrecision; }
+  bool hasLeonCasa() const { return HasLeonCasa; }
+  bool insertNOPLoad() const { return InsertNOPLoad; }
   bool fixFSMULD() const { return FixFSMULD; }
   bool replaceFMULS() const { return ReplaceFMULS; }
-  bool preventRoundChange() const { return PreventRoundChange; }
   bool fixAllFDIVSQRT() const { return FixAllFDIVSQRT; }
-  bool flushCacheLineSWAP() const { return FlushCacheLineSWAP; }
-  bool insertNOPsLoadStore() const { return InsertNOPsLoadStore; }
-  bool insertNOPLoad() const { return InsertNOPLoad; }
+  bool detectRoundChange() const { return DetectRoundChange; }
 
   /// ParseSubtargetFeatures - Parses features string setting specified
   /// subtarget options.  Definition of function is auto generated by tblgen.
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
index 17fe86a70844..4ae64062d9e2 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -22,9 +22,9 @@ using namespace llvm;
 
 extern "C" void LLVMInitializeSparcTarget() {
   // Register the target.
-  RegisterTargetMachine<SparcV8TargetMachine> X(TheSparcTarget);
-  RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target);
-  RegisterTargetMachine<SparcelTargetMachine> Z(TheSparcelTarget);
+  RegisterTargetMachine<SparcV8TargetMachine> X(getTheSparcTarget());
+  RegisterTargetMachine<SparcV9TargetMachine> Y(getTheSparcV9Target());
+  RegisterTargetMachine<SparcelTargetMachine> Z(getTheSparcelTarget());
 }
 
 static std::string computeDataLayout(const Triple &T, bool is64Bit) {
@@ -76,7 +76,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, const Triple &TT,
 
 SparcTargetMachine::~SparcTargetMachine() {}
 
-const SparcSubtarget *
+const SparcSubtarget * 
 SparcTargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
   Attribute FSAttr = F.getFnAttribute("target-features");
@@ -95,7 +95,7 @@ SparcTargetMachine::getSubtargetImpl(const Function &F) const {
       F.hasFnAttribute("use-soft-float") &&
       F.getFnAttribute("use-soft-float").getValueAsString() == "true";
 
-  if (softFloat)
+  if (softFloat)         
     FS += FS.empty() ? "+soft-float" : ",+soft-float";
 
   auto &I = SubtargetMap[CPU + FS];
@@ -115,7 +115,7 @@ namespace {
 class SparcPassConfig : public TargetPassConfig {
 public:
   SparcPassConfig(SparcTargetMachine *TM, PassManagerBase &PM)
-      : TargetPassConfig(TM, PM) {}
+    : TargetPassConfig(TM, PM) {}
 
   SparcTargetMachine &getSparcTargetMachine() const {
     return getTM<SparcTargetMachine>();
@@ -142,46 +142,31 @@ bool SparcPassConfig::addInstSelector() {
   return false;
 }
 
-void SparcPassConfig::addPreEmitPass() {
+void SparcPassConfig::addPreEmitPass(){
   addPass(createSparcDelaySlotFillerPass(getSparcTargetMachine()));
-  if (this->getSparcTargetMachine().getSubtargetImpl()->ignoreZeroFlag()) {
-    addPass(new IgnoreZeroFlag(getSparcTargetMachine()));
-  }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->performSDIVReplace()) {
-    addPass(new ReplaceSDIV(getSparcTargetMachine()));
-  }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->fixCallImmediates()) {
-    addPass(new FixCALL(getSparcTargetMachine()));
+
+  if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad())
+  {
+    addPass(new InsertNOPLoad(getSparcTargetMachine()));
   }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD()) {
+  if (this->getSparcTargetMachine().getSubtargetImpl()->fixFSMULD())
+  {
     addPass(new FixFSMULD(getSparcTargetMachine()));
   }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS()) {
+  if (this->getSparcTargetMachine().getSubtargetImpl()->replaceFMULS())
+  {
     addPass(new ReplaceFMULS(getSparcTargetMachine()));
   }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->preventRoundChange()) {
-    addPass(new PreventRoundChange(getSparcTargetMachine()));
+  if (this->getSparcTargetMachine().getSubtargetImpl()->detectRoundChange()) {
+    addPass(new DetectRoundChange(getSparcTargetMachine()));
   }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT()) {
+  if (this->getSparcTargetMachine().getSubtargetImpl()->fixAllFDIVSQRT())
+  {
     addPass(new FixAllFDIVSQRT(getSparcTargetMachine()));
   }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPsLoadStore()) {
-    addPass(new InsertNOPsLoadStore(getSparcTargetMachine()));
-  }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->insertNOPLoad()) {
-    addPass(new InsertNOPLoad(getSparcTargetMachine()));
-  }
-  if (this->getSparcTargetMachine().getSubtargetImpl()->flushCacheLineSWAP()) {
-    addPass(new FlushCacheLineSWAP(getSparcTargetMachine()));
-  }
-  if (this->getSparcTargetMachine()
-          .getSubtargetImpl()
-          ->insertNOPDoublePrecision()) {
-    addPass(new InsertNOPDoublePrecision(getSparcTargetMachine()));
-  }
 }
 
-void SparcV8TargetMachine::anchor() {}
+void SparcV8TargetMachine::anchor() { }
 
 SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
@@ -191,7 +176,7 @@ SparcV8TargetMachine::SparcV8TargetMachine(const Target &T, const Triple &TT,
                                            CodeGenOpt::Level OL)
     : SparcTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
 
-void SparcV9TargetMachine::anchor() {}
+void SparcV9TargetMachine::anchor() { }
 
 SparcV9TargetMachine::SparcV9TargetMachine(const Target &T, const Triple &TT,
                                            StringRef CPU, StringRef FS,
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp b/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp
index 412e124f9a26..8fdde15d8d27 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.cpp
@@ -16,20 +16,19 @@
 using namespace llvm;
 
 const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
 
   if (Encoding & dwarf::DW_EH_PE_pcrel) {
     MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo<MachineModuleInfoELF>();
 
-    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", Mang, TM);
+    MCSymbol *SSym = getSymbolWithGlobalValueBase(GV, ".DW.stub", TM);
 
     // Add information about the stub reference to ELFMMI so that the stub
     // gets emitted by the asmprinter.
     MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym);
     if (!StubSym.getPointer()) {
-      MCSymbol *Sym = TM.getSymbol(GV, Mang);
+      MCSymbol *Sym = TM.getSymbol(GV);
       StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage());
     }
 
@@ -38,6 +37,6 @@ const MCExpr *SparcELFTargetObjectFile::getTTypeGlobalReference(
                                MCSymbolRefExpr::create(SSym, Ctx), Ctx);
   }
 
-  return TargetLoweringObjectFileELF::getTTypeGlobalReference(
-      GV, Encoding, Mang, TM, MMI, Streamer);
+  return TargetLoweringObjectFileELF::getTTypeGlobalReference(GV, Encoding, TM,
+                                                              MMI, Streamer);
 }
diff --git a/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.h b/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
index 76c8cca04680..fe8800625a56 100644
--- a/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/Sparc/SparcTargetObjectFile.h
@@ -23,11 +23,11 @@ public:
     TargetLoweringObjectFileELF()
   {}
 
-  const MCExpr *
-  getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
-                          Mangler &Mang, const TargetMachine &TM,
-                          MachineModuleInfo *MMI,
-                          MCStreamer &Streamer) const override;
+  const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                        unsigned Encoding,
+                                        const TargetMachine &TM,
+                                        MachineModuleInfo *MMI,
+                                        MCStreamer &Streamer) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
index ab1c6beee4eb..66178acd52ba 100644
--- a/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp
@@ -12,15 +12,24 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheSparcTarget;
-Target llvm::TheSparcV9Target;
-Target llvm::TheSparcelTarget;
+Target &llvm::getTheSparcTarget() {
+  static Target TheSparcTarget;
+  return TheSparcTarget;
+}
+Target &llvm::getTheSparcV9Target() {
+  static Target TheSparcV9Target;
+  return TheSparcV9Target;
+}
+Target &llvm::getTheSparcelTarget() {
+  static Target TheSparcelTarget;
+  return TheSparcelTarget;
+}
 
 extern "C" void LLVMInitializeSparcTargetInfo() {
-  RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(TheSparcTarget, "sparc",
+  RegisterTarget<Triple::sparc, /*HasJIT=*/true> X(getTheSparcTarget(), "sparc",
                                                    "Sparc");
-  RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y(TheSparcV9Target,
+  RegisterTarget<Triple::sparcv9, /*HasJIT=*/true> Y(getTheSparcV9Target(),
                                                      "sparcv9", "Sparc V9");
-  RegisterTarget<Triple::sparcel, /*HasJIT=*/true> Z(TheSparcelTarget,
+  RegisterTarget<Triple::sparcel, /*HasJIT=*/true> Z(getTheSparcelTarget(),
                                                      "sparcel", "Sparc LE");
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 3923614c89d3..a94717c93456 100644
--- a/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -12,6 +12,7 @@
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCExpr.h"
 #include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstBuilder.h"
 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
 #include "llvm/MC/MCStreamer.h"
@@ -42,13 +43,15 @@ enum RegisterKind {
   FP128Reg,
   VR32Reg,
   VR64Reg,
-  VR128Reg
+  VR128Reg,
+  AR32Reg,
 };
 
 enum MemoryKind {
   BDMem,
   BDXMem,
   BDLMem,
+  BDRMem,
   BDVMem
 };
 
@@ -59,7 +62,6 @@ private:
     KindInvalid,
     KindToken,
     KindReg,
-    KindAccessReg,
     KindImm,
     KindImmTLS,
     KindMem
@@ -98,7 +100,10 @@ private:
     unsigned MemKind : 4;
     unsigned RegKind : 4;
     const MCExpr *Disp;
-    const MCExpr *Length;
+    union {
+      const MCExpr *Imm;
+      unsigned Reg;
+    } Length;
   };
 
   // Imm is an immediate operand, and Sym is an optional TLS symbol
@@ -111,7 +116,6 @@ private:
   union {
     TokenOp Token;
     RegOp Reg;
-    unsigned AccessReg;
     const MCExpr *Imm;
     ImmTLSOp ImmTLS;
     MemOp Mem;
@@ -150,12 +154,6 @@ public:
     return Op;
   }
   static std::unique_ptr<SystemZOperand>
-  createAccessReg(unsigned Num, SMLoc StartLoc, SMLoc EndLoc) {
-    auto Op = make_unique<SystemZOperand>(KindAccessReg, StartLoc, EndLoc);
-    Op->AccessReg = Num;
-    return Op;
-  }
-  static std::unique_ptr<SystemZOperand>
   createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) {
     auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc);
     Op->Imm = Expr;
@@ -163,15 +161,18 @@ public:
   }
   static std::unique_ptr<SystemZOperand>
   createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
-            const MCExpr *Disp, unsigned Index, const MCExpr *Length,
-            SMLoc StartLoc, SMLoc EndLoc) {
+            const MCExpr *Disp, unsigned Index, const MCExpr *LengthImm,
+            unsigned LengthReg, SMLoc StartLoc, SMLoc EndLoc) {
     auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
     Op->Mem.MemKind = MemKind;
     Op->Mem.RegKind = RegKind;
     Op->Mem.Base = Base;
     Op->Mem.Index = Index;
     Op->Mem.Disp = Disp;
-    Op->Mem.Length = Length;
+    if (MemKind == BDLMem)
+      Op->Mem.Length.Imm = LengthImm;
+    if (MemKind == BDRMem)
+      Op->Mem.Length.Reg = LengthReg;
     return Op;
   }
   static std::unique_ptr<SystemZOperand>
@@ -204,12 +205,6 @@ public:
     return Reg.Num;
   }
 
-  // Access register operands.  Access registers aren't exposed to LLVM
-  // as registers.
-  bool isAccessReg() const {
-    return Kind == KindAccessReg;
-  }
-
   // Immediate operands.
   bool isImm() const override {
     return Kind == KindImm;
@@ -248,14 +243,7 @@ public:
     return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
   }
   bool isMemDisp12Len8(RegisterKind RegKind) const {
-    return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100);
-  }
-  void addBDVAddrOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 3 && "Invalid number of operands");
-    assert(isMem(BDVMem) && "Invalid operand type");
-    Inst.addOperand(MCOperand::createReg(Mem.Base));
-    addExpr(Inst, Mem.Disp);
-    Inst.addOperand(MCOperand::createReg(Mem.Index));
+    return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length.Imm, 1, 0x100);
   }
 
   // Override MCParsedAsmOperand.
@@ -269,11 +257,6 @@ public:
     assert(N == 1 && "Invalid number of operands");
     Inst.addOperand(MCOperand::createReg(getReg()));
   }
-  void addAccessRegOperands(MCInst &Inst, unsigned N) const {
-    assert(N == 1 && "Invalid number of operands");
-    assert(Kind == KindAccessReg && "Invalid operand type");
-    Inst.addOperand(MCOperand::createImm(AccessReg));
-  }
   void addImmOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands");
     addExpr(Inst, getImm());
@@ -296,7 +279,21 @@ public:
     assert(isMem(BDLMem) && "Invalid operand type");
     Inst.addOperand(MCOperand::createReg(Mem.Base));
     addExpr(Inst, Mem.Disp);
-    addExpr(Inst, Mem.Length);
+    addExpr(Inst, Mem.Length.Imm);
+  }
+  void addBDRAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands");
+    assert(isMem(BDRMem) && "Invalid operand type");
+    Inst.addOperand(MCOperand::createReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+    Inst.addOperand(MCOperand::createReg(Mem.Length.Reg));
+  }
+  void addBDVAddrOperands(MCInst &Inst, unsigned N) const {
+    assert(N == 3 && "Invalid number of operands");
+    assert(isMem(BDVMem) && "Invalid operand type");
+    Inst.addOperand(MCOperand::createReg(Mem.Base));
+    addExpr(Inst, Mem.Disp);
+    Inst.addOperand(MCOperand::createReg(Mem.Index));
   }
   void addImmTLSOperands(MCInst &Inst, unsigned N) const {
     assert(N == 2 && "Invalid number of operands");
@@ -322,6 +319,8 @@ public:
   bool isVR64() const { return isReg(VR64Reg); }
   bool isVF128() const { return false; }
   bool isVR128() const { return isReg(VR128Reg); }
+  bool isAR32() const { return isReg(AR32Reg); }
+  bool isAnyReg() const { return (isReg() || isImm(0, 15)); }
   bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
   bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
   bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
@@ -329,6 +328,7 @@ public:
   bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
   bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
   bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
+  bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, ADDR64Reg); }
   bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
   bool isU1Imm() const { return isImm(0, 1); }
   bool isU2Imm() const { return isImm(0, 3); }
@@ -342,6 +342,7 @@ public:
   bool isS16Imm() const { return isImm(-32768, 32767); }
   bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
   bool isS32Imm() const { return isImm(-(1LL << 31), (1LL << 31) - 1); }
+  bool isU48Imm() const { return isImm(0, (1LL << 48) - 1); }
 };
 
 class SystemZAsmParser : public MCTargetAsmParser {
@@ -354,7 +355,7 @@ private:
     RegGR,
     RegFP,
     RegV,
-    RegAccess
+    RegAR
   };
   struct Register {
     RegisterGroup Group;
@@ -371,9 +372,14 @@ private:
                                      RegisterGroup Group, const unsigned *Regs,
                                      RegisterKind Kind);
 
-  bool parseAddress(unsigned &Base, const MCExpr *&Disp,
-                    unsigned &Index, bool &IsVector, const MCExpr *&Length,
-                    const unsigned *Regs, RegisterKind RegKind);
+  OperandMatchResultTy parseAnyRegister(OperandVector &Operands);
+
+  bool parseAddress(bool &HaveReg1, Register &Reg1,
+                    bool &HaveReg2, Register &Reg2,
+                    const MCExpr *&Disp, const MCExpr *&Length);
+  bool parseAddressRegister(Register &Reg);
+
+  bool ParseDirectiveInsn(SMLoc L);
 
   OperandMatchResultTy parseAddress(OperandVector &Operands,
                                     MemoryKind MemKind, const unsigned *Regs,
@@ -454,6 +460,12 @@ public:
   OperandMatchResultTy parseVR128(OperandVector &Operands) {
     return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
   }
+  OperandMatchResultTy parseAR32(OperandVector &Operands) {
+    return parseRegister(Operands, RegAR, SystemZMC::AR32Regs, AR32Reg);
+  }
+  OperandMatchResultTy parseAnyReg(OperandVector &Operands) {
+    return parseAnyRegister(Operands);
+  }
   OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
     return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
   }
@@ -466,13 +478,21 @@ public:
   OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
     return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
   }
+  OperandMatchResultTy parseBDRAddr64(OperandVector &Operands) {
+    return parseAddress(Operands, BDRMem, SystemZMC::GR64Regs, ADDR64Reg);
+  }
   OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
     return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
   }
-  OperandMatchResultTy parseAccessReg(OperandVector &Operands);
+  OperandMatchResultTy parsePCRel12(OperandVector &Operands) {
+    return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false);
+  }
   OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
     return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false);
   }
+  OperandMatchResultTy parsePCRel24(OperandVector &Operands) {
+    return parsePCRel(Operands, -(1LL << 24), (1LL << 24) - 1, false);
+  }
   OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
     return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false);
   }
@@ -490,6 +510,83 @@ public:
 #define GET_MATCHER_IMPLEMENTATION
 #include "SystemZGenAsmMatcher.inc"
 
+// Used for the .insn directives; contains information needed to parse the
+// operands in the directive.
+struct InsnMatchEntry {
+  StringRef Format;
+  uint64_t Opcode;
+  int32_t NumOperands;
+  MatchClassKind OperandKinds[5];
+};
+
+// For equal_range comparison.
+struct CompareInsn {
+  bool operator() (const InsnMatchEntry &LHS, StringRef RHS) {
+    return LHS.Format < RHS;
+  }
+  bool operator() (StringRef LHS, const InsnMatchEntry &RHS) {
+    return LHS < RHS.Format;
+  }
+  bool operator() (const InsnMatchEntry &LHS, const InsnMatchEntry &RHS) {
+    return LHS.Format < RHS.Format;
+  }
+};
+
+// Table initializing information for parsing the .insn directive.
+static struct InsnMatchEntry InsnMatchTable[] = {
+  /* Format, Opcode, NumOperands, OperandKinds */
+  { "e", SystemZ::InsnE, 1,
+    { MCK_U16Imm } },
+  { "ri", SystemZ::InsnRI, 3,
+    { MCK_U32Imm, MCK_AnyReg, MCK_S16Imm } },
+  { "rie", SystemZ::InsnRIE, 4,
+    { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_PCRel16 } },
+  { "ril", SystemZ::InsnRIL, 3,
+    { MCK_U48Imm, MCK_AnyReg, MCK_PCRel32 } },
+  { "rilu", SystemZ::InsnRILU, 3,
+    { MCK_U48Imm, MCK_AnyReg, MCK_U32Imm } },
+  { "ris", SystemZ::InsnRIS, 5,
+    { MCK_U48Imm, MCK_AnyReg, MCK_S8Imm, MCK_U4Imm, MCK_BDAddr64Disp12 } },
+  { "rr", SystemZ::InsnRR, 3,
+    { MCK_U16Imm, MCK_AnyReg, MCK_AnyReg } },
+  { "rre", SystemZ::InsnRRE, 3,
+    { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg } },
+  { "rrf", SystemZ::InsnRRF, 5,
+    { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm } },
+  { "rrs", SystemZ::InsnRRS, 5,
+    { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_U4Imm, MCK_BDAddr64Disp12 } },
+  { "rs", SystemZ::InsnRS, 4,
+    { MCK_U32Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12 } },
+  { "rse", SystemZ::InsnRSE, 4,
+    { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp12 } },
+  { "rsi", SystemZ::InsnRSI, 4,
+    { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_PCRel16 } },
+  { "rsy", SystemZ::InsnRSY, 4,
+    { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDAddr64Disp20 } },
+  { "rx", SystemZ::InsnRX, 3,
+    { MCK_U32Imm, MCK_AnyReg, MCK_BDXAddr64Disp12 } },
+  { "rxe", SystemZ::InsnRXE, 3,
+    { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp12 } },
+  { "rxf", SystemZ::InsnRXF, 4,
+    { MCK_U48Imm, MCK_AnyReg, MCK_AnyReg, MCK_BDXAddr64Disp12 } },
+  { "rxy", SystemZ::InsnRXY, 3,
+    { MCK_U48Imm, MCK_AnyReg, MCK_BDXAddr64Disp20 } },
+  { "s", SystemZ::InsnS, 2,
+    { MCK_U32Imm, MCK_BDAddr64Disp12 } },
+  { "si", SystemZ::InsnSI, 3,
+    { MCK_U32Imm, MCK_BDAddr64Disp12, MCK_S8Imm } },
+  { "sil", SystemZ::InsnSIL, 3,
+    { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_U16Imm } },
+  { "siy", SystemZ::InsnSIY, 3,
+    { MCK_U48Imm, MCK_BDAddr64Disp20, MCK_U8Imm } },
+  { "ss", SystemZ::InsnSS, 4,
+    { MCK_U48Imm, MCK_BDXAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } },
+  { "sse", SystemZ::InsnSSE, 3,
+    { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12 } },
+  { "ssf", SystemZ::InsnSSF, 4,
+    { MCK_U48Imm, MCK_BDAddr64Disp12, MCK_BDAddr64Disp12, MCK_AnyReg } }
+};
+
 void SystemZOperand::print(raw_ostream &OS) const {
   llvm_unreachable("Not implemented");
 }
@@ -525,7 +622,7 @@ bool SystemZAsmParser::parseRegister(Register &Reg) {
   else if (Prefix == 'v' && Reg.Num < 32)
     Reg.Group = RegV;
   else if (Prefix == 'a' && Reg.Num < 16)
-    Reg.Group = RegAccess;
+    Reg.Group = RegAR;
   else
     return Error(Reg.StartLoc, "invalid register");
 
@@ -556,7 +653,7 @@ bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group,
 }
 
 // Parse a register and add it to Operands.  The other arguments are as above.
-SystemZAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
                                 const unsigned *Regs, RegisterKind Kind) {
   if (Parser.getTok().isNot(AsmToken::Percent))
@@ -572,58 +669,96 @@ SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group,
   return MatchOperand_Success;
 }
 
-// Parse a memory operand into Base, Disp, Index and Length.
-// Regs maps asm register numbers to LLVM register numbers and RegKind
-// says what kind of address register we're using (ADDR32Reg or ADDR64Reg).
-bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
-                                    unsigned &Index, bool &IsVector,
-                                    const MCExpr *&Length, const unsigned *Regs,
-                                    RegisterKind RegKind) {
+// Parse any type of register (including integers) and add it to Operands.
+OperandMatchResultTy
+SystemZAsmParser::parseAnyRegister(OperandVector &Operands) {
+  // Handle integer values.
+  if (Parser.getTok().is(AsmToken::Integer)) {
+    const MCExpr *Register;
+    SMLoc StartLoc = Parser.getTok().getLoc();
+    if (Parser.parseExpression(Register))
+      return MatchOperand_ParseFail;
+
+    if (auto *CE = dyn_cast<MCConstantExpr>(Register)) {
+      int64_t Value = CE->getValue();
+      if (Value < 0 || Value > 15) {
+        Error(StartLoc, "invalid register");
+        return MatchOperand_ParseFail;
+      }
+    }
+
+    SMLoc EndLoc =
+      SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+    Operands.push_back(SystemZOperand::createImm(Register, StartLoc, EndLoc));
+  }
+  else {
+    Register Reg;
+    if (parseRegister(Reg))
+      return MatchOperand_ParseFail;
+
+    // Map to the correct register kind.
+    RegisterKind Kind;
+    unsigned RegNo;
+    if (Reg.Group == RegGR) {
+      Kind = GR64Reg;
+      RegNo = SystemZMC::GR64Regs[Reg.Num];
+    }
+    else if (Reg.Group == RegFP) {
+      Kind = FP64Reg;
+      RegNo = SystemZMC::FP64Regs[Reg.Num];
+    }
+    else if (Reg.Group == RegV) {
+      Kind = VR128Reg;
+      RegNo = SystemZMC::VR128Regs[Reg.Num];
+    }
+    else if (Reg.Group == RegAR) {
+      Kind = AR32Reg;
+      RegNo = SystemZMC::AR32Regs[Reg.Num];
+    }
+    else {
+      return MatchOperand_ParseFail;
+    }
+
+    Operands.push_back(SystemZOperand::createReg(Kind, RegNo,
+                                                 Reg.StartLoc, Reg.EndLoc));
+  }
+  return MatchOperand_Success;
+}
+
+// Parse a memory operand into Reg1, Reg2, Disp, and Length.
+bool SystemZAsmParser::parseAddress(bool &HaveReg1, Register &Reg1,
+                                    bool &HaveReg2, Register &Reg2,
+                                    const MCExpr *&Disp,
+                                    const MCExpr *&Length) {
   // Parse the displacement, which must always be present.
   if (getParser().parseExpression(Disp))
     return true;
 
   // Parse the optional base and index.
-  Index = 0;
-  Base = 0;
-  IsVector = false;
+  HaveReg1 = false;
+  HaveReg2 = false;
   Length = nullptr;
   if (getLexer().is(AsmToken::LParen)) {
     Parser.Lex();
 
     if (getLexer().is(AsmToken::Percent)) {
-      // Parse the first register and decide whether it's a base or an index.
-      Register Reg;
-      if (parseRegister(Reg))
+      // Parse the first register.
+      HaveReg1 = true;
+      if (parseRegister(Reg1))
         return true;
-      if (Reg.Group == RegV) {
-        // A vector index register.  The base register is optional.
-        IsVector = true;
-        Index = SystemZMC::VR128Regs[Reg.Num];
-      } else if (Reg.Group == RegGR) {
-        if (Reg.Num == 0)
-          return Error(Reg.StartLoc, "%r0 used in an address");
-        // If the are two registers, the first one is the index and the
-        // second is the base.
-        if (getLexer().is(AsmToken::Comma))
-          Index = Regs[Reg.Num];
-        else
-          Base = Regs[Reg.Num];
-      } else
-        return Error(Reg.StartLoc, "invalid address register");
     } else {
       // Parse the length.
       if (getParser().parseExpression(Length))
         return true;
     }
 
-    // Check whether there's a second register.  It's the base if so.
+    // Check whether there's a second register.
     if (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();
-      Register Reg;
-      if (parseRegister(Reg, RegGR, Regs, RegKind))
+      HaveReg2 = true;
+      if (parseRegister(Reg2))
         return true;
-      Base = Reg.Num;
     }
 
     // Consume the closing bracket.
@@ -634,56 +769,255 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
   return false;
 }
 
+// Verify that Reg is a valid address register (base or index).
+bool
+SystemZAsmParser::parseAddressRegister(Register &Reg) {
+  if (Reg.Group == RegV) {
+    Error(Reg.StartLoc, "invalid use of vector addressing");
+    return true;
+  } else if (Reg.Group != RegGR) {
+    Error(Reg.StartLoc, "invalid address register");
+    return true;
+  } else if (Reg.Num == 0) {
+    Error(Reg.StartLoc, "%r0 used in an address");
+    return true;
+  }
+  return false;
+}
+
 // Parse a memory operand and add it to Operands.  The other arguments
 // are as above.
-SystemZAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
                                const unsigned *Regs, RegisterKind RegKind) {
   SMLoc StartLoc = Parser.getTok().getLoc();
-  unsigned Base, Index;
-  bool IsVector;
+  unsigned Base = 0, Index = 0, LengthReg = 0;
+  Register Reg1, Reg2;
+  bool HaveReg1, HaveReg2;
   const MCExpr *Disp;
   const MCExpr *Length;
-  if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind))
-    return MatchOperand_ParseFail;
-
-  if (IsVector && MemKind != BDVMem) {
-    Error(StartLoc, "invalid use of vector addressing");
-    return MatchOperand_ParseFail;
-  }
-
-  if (!IsVector && MemKind == BDVMem) {
-    Error(StartLoc, "vector index required in address");
-    return MatchOperand_ParseFail;
-  }
-
-  if (Index && MemKind != BDXMem && MemKind != BDVMem) {
-    Error(StartLoc, "invalid use of indexed addressing");
+  if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Disp, Length))
     return MatchOperand_ParseFail;
-  }
 
-  if (Length && MemKind != BDLMem) {
-    Error(StartLoc, "invalid use of length addressing");
-    return MatchOperand_ParseFail;
-  }
-
-  if (!Length && MemKind == BDLMem) {
-    Error(StartLoc, "missing length in address");
-    return MatchOperand_ParseFail;
+  switch (MemKind) {
+  case BDMem:
+    // If we have Reg1, it must be an address register.
+    if (HaveReg1) {
+      if (parseAddressRegister(Reg1))
+        return MatchOperand_ParseFail;
+      Base = Regs[Reg1.Num];
+    }
+    // There must be no Reg2 or length.
+    if (Length) {
+      Error(StartLoc, "invalid use of length addressing");
+      return MatchOperand_ParseFail;
+    }
+    if (HaveReg2) {
+      Error(StartLoc, "invalid use of indexed addressing");
+      return MatchOperand_ParseFail;
+    }
+    break;
+  case BDXMem:
+    // If we have Reg1, it must be an address register.
+    if (HaveReg1) {
+      if (parseAddressRegister(Reg1))
+        return MatchOperand_ParseFail;
+      // If the are two registers, the first one is the index and the
+      // second is the base.
+      if (HaveReg2)
+        Index = Regs[Reg1.Num];
+      else
+        Base = Regs[Reg1.Num];
+    }
+    // If we have Reg2, it must be an address register.
+    if (HaveReg2) {
+      if (parseAddressRegister(Reg2))
+        return MatchOperand_ParseFail;
+      Base = Regs[Reg2.Num];
+    }
+    // There must be no length.
+    if (Length) {
+      Error(StartLoc, "invalid use of length addressing");
+      return MatchOperand_ParseFail;
+    }
+    break;
+  case BDLMem:
+    // If we have Reg2, it must be an address register.
+    if (HaveReg2) {
+      if (parseAddressRegister(Reg2))
+        return MatchOperand_ParseFail;
+      Base = Regs[Reg2.Num];
+    }
+    // We cannot support base+index addressing.
+    if (HaveReg1 && HaveReg2) {
+      Error(StartLoc, "invalid use of indexed addressing");
+      return MatchOperand_ParseFail;
+    }
+    // We must have a length.
+    if (!Length) {
+      Error(StartLoc, "missing length in address");
+      return MatchOperand_ParseFail;
+    }
+    break;
+  case BDRMem:
+    // We must have Reg1, and it must be a GPR.
+    if (!HaveReg1 || Reg1.Group != RegGR) {
+      Error(StartLoc, "invalid operand for instruction");
+      return MatchOperand_ParseFail;
+    }
+    LengthReg = SystemZMC::GR64Regs[Reg1.Num];
+    // If we have Reg2, it must be an address register.
+    if (HaveReg2) {
+      if (parseAddressRegister(Reg2))
+        return MatchOperand_ParseFail;
+      Base = Regs[Reg2.Num];
+    }
+    // There must be no length.
+    if (Length) {
+      Error(StartLoc, "invalid use of length addressing");
+      return MatchOperand_ParseFail;
+    }
+    break;
+  case BDVMem:
+    // We must have Reg1, and it must be a vector register.
+    if (!HaveReg1 || Reg1.Group != RegV) {
+      Error(StartLoc, "vector index required in address");
+      return MatchOperand_ParseFail;
+    }
+    Index = SystemZMC::VR128Regs[Reg1.Num];
+    // If we have Reg2, it must be an address register.
+    if (HaveReg2) {
+      if (parseAddressRegister(Reg2))
+        return MatchOperand_ParseFail;
+      Base = Regs[Reg2.Num];
+    }
+    // There must be no length.
+    if (Length) {
+      Error(StartLoc, "invalid use of length addressing");
+      return MatchOperand_ParseFail;
+    }
+    break;
   }
 
   SMLoc EndLoc =
     SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
   Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
-                                               Index, Length, StartLoc,
-                                               EndLoc));
+                                               Index, Length, LengthReg,
+                                               StartLoc, EndLoc));
   return MatchOperand_Success;
 }
 
 bool SystemZAsmParser::ParseDirective(AsmToken DirectiveID) {
+  StringRef IDVal = DirectiveID.getIdentifier();
+
+  if (IDVal == ".insn")
+    return ParseDirectiveInsn(DirectiveID.getLoc());
+
   return true;
 }
 
+/// ParseDirectiveInsn
+/// ::= .insn [ format, encoding, (operands (, operands)*) ]
+bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
+  MCAsmParser &Parser = getParser();
+
+  // Expect instruction format as identifier.
+  StringRef Format;
+  SMLoc ErrorLoc = Parser.getTok().getLoc();
+  if (Parser.parseIdentifier(Format))
+    return Error(ErrorLoc, "expected instruction format");
+
+  SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> Operands;
+
+  // Find entry for this format in InsnMatchTable.
+  auto EntryRange =
+    std::equal_range(std::begin(InsnMatchTable), std::end(InsnMatchTable),
+                     Format, CompareInsn());
+
+  // If first == second, couldn't find a match in the table.
+  if (EntryRange.first == EntryRange.second)
+    return Error(ErrorLoc, "unrecognized format");
+
+  struct InsnMatchEntry *Entry = EntryRange.first;
+
+  // Format should match from equal_range.
+  assert(Entry->Format == Format);
+
+  // Parse the following operands using the table's information.
+  for (int i = 0; i < Entry->NumOperands; i++) {
+    MatchClassKind Kind = Entry->OperandKinds[i];
+
+    SMLoc StartLoc = Parser.getTok().getLoc();
+
+    // Always expect commas as separators for operands.
+    if (getLexer().isNot(AsmToken::Comma))
+      return Error(StartLoc, "unexpected token in directive");
+    Lex();
+
+    // Parse operands.
+    OperandMatchResultTy ResTy;
+    if (Kind == MCK_AnyReg)
+      ResTy = parseAnyReg(Operands);
+    else if (Kind == MCK_BDXAddr64Disp12 || Kind == MCK_BDXAddr64Disp20)
+      ResTy = parseBDXAddr64(Operands);
+    else if (Kind == MCK_BDAddr64Disp12 || Kind == MCK_BDAddr64Disp20)
+      ResTy = parseBDAddr64(Operands);
+    else if (Kind == MCK_PCRel32)
+      ResTy = parsePCRel32(Operands);
+    else if (Kind == MCK_PCRel16)
+      ResTy = parsePCRel16(Operands);
+    else {
+      // Only remaining operand kind is an immediate.
+      const MCExpr *Expr;
+      SMLoc StartLoc = Parser.getTok().getLoc();
+
+      // Expect immediate expression.
+      if (Parser.parseExpression(Expr))
+        return Error(StartLoc, "unexpected token in directive");
+
+      SMLoc EndLoc =
+        SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
+
+      Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
+      ResTy = MatchOperand_Success;
+    }
+
+    if (ResTy != MatchOperand_Success)
+      return true;
+  }
+
+  // Build the instruction with the parsed operands.
+  MCInst Inst = MCInstBuilder(Entry->Opcode);
+
+  for (size_t i = 0; i < Operands.size(); i++) {
+    MCParsedAsmOperand &Operand = *Operands[i];
+    MatchClassKind Kind = Entry->OperandKinds[i];
+
+    // Verify operand.
+    unsigned Res = validateOperandClass(Operand, Kind);
+    if (Res != Match_Success)
+      return Error(Operand.getStartLoc(), "unexpected operand type");
+
+    // Add operands to instruction.
+    SystemZOperand &ZOperand = static_cast<SystemZOperand &>(Operand);
+    if (ZOperand.isReg())
+      ZOperand.addRegOperands(Inst, 1);
+    else if (ZOperand.isMem(BDMem))
+      ZOperand.addBDAddrOperands(Inst, 2);
+    else if (ZOperand.isMem(BDXMem))
+      ZOperand.addBDXAddrOperands(Inst, 3);
+    else if (ZOperand.isImm())
+      ZOperand.addImmOperands(Inst, 1);
+    else
+      llvm_unreachable("unexpected operand type");
+  }
+
+  // Emit as a regular instruction.
+  Parser.getStreamer().EmitInstruction(Inst, getSTI());
+
+  return false;
+}
+
 bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
                                      SMLoc &EndLoc) {
   Register Reg;
@@ -695,9 +1029,8 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
     RegNo = SystemZMC::FP64Regs[Reg.Num];
   else if (Reg.Group == RegV)
     RegNo = SystemZMC::VR128Regs[Reg.Num];
-  else
-    // FIXME: Access registers aren't modelled as LLVM registers yet.
-    return Error(Reg.StartLoc, "invalid operand for instruction");
+  else if (Reg.Group == RegAR)
+    RegNo = SystemZMC::AR32Regs[Reg.Num];
   StartLoc = Reg.StartLoc;
   EndLoc = Reg.EndLoc;
   return false;
@@ -712,7 +1045,6 @@ bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     // Read the first operand.
     if (parseOperand(Operands, Name)) {
-      Parser.eatToEndOfStatement();
       return true;
     }
 
@@ -720,13 +1052,11 @@ bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
     while (getLexer().is(AsmToken::Comma)) {
       Parser.Lex();
       if (parseOperand(Operands, Name)) {
-        Parser.eatToEndOfStatement();
         return true;
       }
     }
     if (getLexer().isNot(AsmToken::EndOfStatement)) {
       SMLoc Loc = getLexer().getLoc();
-      Parser.eatToEndOfStatement();
       return Error(Loc, "unexpected token in argument list");
     }
   }
@@ -739,8 +1069,14 @@ bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info,
 bool SystemZAsmParser::parseOperand(OperandVector &Operands,
                                     StringRef Mnemonic) {
   // Check if the current operand has a custom associated parser, if so, try to
-  // custom parse the operand, or fallback to the general approach.
+  // custom parse the operand, or fallback to the general approach.  Force all
+  // features to be available during the operand check, or else we will fail to
+  // find the custom parser, and then we will later get an InvalidOperand error
+  // instead of a MissingFeature errror.
+  uint64_t AvailableFeatures = getAvailableFeatures();
+  setAvailableFeatures(~(uint64_t)0);
   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
+  setAvailableFeatures(AvailableFeatures);
   if (ResTy == MatchOperand_Success)
     return false;
 
@@ -766,16 +1102,23 @@ bool SystemZAsmParser::parseOperand(OperandVector &Operands,
   // real address operands should have used a context-dependent parse routine,
   // so we treat any plain expression as an immediate.
   SMLoc StartLoc = Parser.getTok().getLoc();
-  unsigned Base, Index;
-  bool IsVector;
-  const MCExpr *Expr, *Length;
-  if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs,
-                   ADDR64Reg))
+  Register Reg1, Reg2;
+  bool HaveReg1, HaveReg2;
+  const MCExpr *Expr;
+  const MCExpr *Length;
+  if (parseAddress(HaveReg1, Reg1, HaveReg2, Reg2, Expr, Length))
+    return true;
+  // If the register combination is not valid for any instruction, reject it.
+  // Otherwise, fall back to reporting an unrecognized instruction.
+  if (HaveReg1 && Reg1.Group != RegGR && Reg1.Group != RegV
+      && parseAddressRegister(Reg1))
+    return true;
+  if (HaveReg2 && parseAddressRegister(Reg2))
     return true;
 
   SMLoc EndLoc =
     SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
-  if (Base || Index || Length)
+  if (HaveReg1 || HaveReg2 || Length)
     Operands.push_back(SystemZOperand::createInvalid(StartLoc, EndLoc));
   else
     Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
@@ -834,22 +1177,7 @@ bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   llvm_unreachable("Unexpected match type");
 }
 
-SystemZAsmParser::OperandMatchResultTy
-SystemZAsmParser::parseAccessReg(OperandVector &Operands) {
-  if (Parser.getTok().isNot(AsmToken::Percent))
-    return MatchOperand_NoMatch;
-
-  Register Reg;
-  if (parseRegister(Reg, RegAccess, nullptr))
-    return MatchOperand_ParseFail;
-
-  Operands.push_back(SystemZOperand::createAccessReg(Reg.Num,
-                                                     Reg.StartLoc,
-                                                     Reg.EndLoc));
-  return MatchOperand_Success;
-}
-
-SystemZAsmParser::OperandMatchResultTy
+OperandMatchResultTy
 SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
                              int64_t MaxVal, bool AllowTLS) {
   MCContext &Ctx = getContext();
@@ -927,5 +1255,5 @@ SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
 
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmParser() {
-  RegisterMCAsmParser<SystemZAsmParser> X(TheSystemZTarget);
+  RegisterMCAsmParser<SystemZAsmParser> X(getTheSystemZTarget());
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 20e015b42d21..1806e015f61e 100644
--- a/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -42,7 +42,7 @@ static MCDisassembler *createSystemZDisassembler(const Target &T,
 
 extern "C" void LLVMInitializeSystemZDisassembler() {
   // Register the disassembler.
-  TargetRegistry::RegisterMCDisassembler(TheSystemZTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheSystemZTarget(),
                                          createSystemZDisassembler);
 }
 
@@ -150,6 +150,12 @@ static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
   return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32);
 }
 
+static DecodeStatus DecodeAR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodeRegisterClass(Inst, RegNo, SystemZMC::AR32Regs, 16);
+}
+
 template<unsigned N>
 static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
   if (!isUInt<N>(Imm))
@@ -166,12 +172,6 @@ static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
   return MCDisassembler::Success;
 }
 
-static DecodeStatus decodeAccessRegOperand(MCInst &Inst, uint64_t Imm,
-                                           uint64_t Address,
-                                           const void *Decoder) {
-  return decodeUImmOperand<4>(Inst, Imm);
-}
-
 static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm,
                                        uint64_t Address, const void *Decoder) {
   return decodeUImmOperand<1>(Inst, Imm);
@@ -247,12 +247,24 @@ static DecodeStatus decodePCDBLOperand(MCInst &Inst, uint64_t Imm,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus decodePC12DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodePCDBLOperand<12>(Inst, Imm, Address, true, Decoder);
+}
+
 static DecodeStatus decodePC16DBLBranchOperand(MCInst &Inst, uint64_t Imm,
                                                uint64_t Address,
                                                const void *Decoder) {
   return decodePCDBLOperand<16>(Inst, Imm, Address, true, Decoder);
 }
 
+static DecodeStatus decodePC24DBLBranchOperand(MCInst &Inst, uint64_t Imm,
+                                               uint64_t Address,
+                                               const void *Decoder) {
+  return decodePCDBLOperand<24>(Inst, Imm, Address, true, Decoder);
+}
+
 static DecodeStatus decodePC32DBLBranchOperand(MCInst &Inst, uint64_t Imm,
                                                uint64_t Address,
                                                const void *Decoder) {
@@ -321,6 +333,18 @@ static DecodeStatus decodeBDLAddr12Len8Operand(MCInst &Inst, uint64_t Field,
   return MCDisassembler::Success;
 }
 
+static DecodeStatus decodeBDRAddr12Operand(MCInst &Inst, uint64_t Field,
+                                           const unsigned *Regs) {
+  uint64_t Length = Field >> 16;
+  uint64_t Base = (Field >> 12) & 0xf;
+  uint64_t Disp = Field & 0xfff;
+  assert(Length < 16 && "Invalid BDRAddr12");
+  Inst.addOperand(MCOperand::createReg(Base == 0 ? 0 : Regs[Base]));
+  Inst.addOperand(MCOperand::createImm(Disp));
+  Inst.addOperand(MCOperand::createReg(Regs[Length]));
+  return MCDisassembler::Success;
+}
+
 static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
                                            const unsigned *Regs) {
   uint64_t Index = Field >> 16;
@@ -376,6 +400,13 @@ static DecodeStatus decodeBDLAddr64Disp12Len8Operand(MCInst &Inst,
   return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
 }
 
+static DecodeStatus decodeBDRAddr64Disp12Operand(MCInst &Inst,
+                                                 uint64_t Field,
+                                                 uint64_t Address,
+                                                 const void *Decoder) {
+  return decodeBDRAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
+}
+
 static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
                                                  uint64_t Address,
                                                  const void *Decoder) {
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
index 6444cf8e464d..1207c7b327e8 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
@@ -134,11 +134,9 @@ void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
   printUImmOperand<32>(MI, OpNum, O);
 }
 
-void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
-                                               raw_ostream &O) {
-  uint64_t Value = MI->getOperand(OpNum).getImm();
-  assert(Value < 16 && "Invalid access register number");
-  O << "%a" << (unsigned int)Value;
+void SystemZInstPrinter::printU48ImmOperand(const MCInst *MI, int OpNum,
+                                            raw_ostream &O) {
+  printUImmOperand<48>(MI, OpNum, O);
 }
 
 void SystemZInstPrinter::printPCRelOperand(const MCInst *MI, int OpNum,
@@ -203,6 +201,17 @@ void SystemZInstPrinter::printBDLAddrOperand(const MCInst *MI, int OpNum,
   O << ')';
 }
 
+void SystemZInstPrinter::printBDRAddrOperand(const MCInst *MI, int OpNum,
+                                             raw_ostream &O) {
+  unsigned Base = MI->getOperand(OpNum).getReg();
+  uint64_t Disp = MI->getOperand(OpNum + 1).getImm();
+  unsigned Length = MI->getOperand(OpNum + 2).getReg();
+  O << Disp << "(%" << getRegisterName(Length);
+  if (Base)
+    O << ",%" << getRegisterName(Base);
+  O << ')';
+}
+
 void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
                                              raw_ostream &O) {
   printAddress(MI->getOperand(OpNum).getReg(),
diff --git a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
index 7ca386fc4cb9..6336f5ee0efa 100644
--- a/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
@@ -48,6 +48,7 @@ private:
   void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
@@ -61,9 +62,9 @@ private:
   void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+  void printU48ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
   void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-  void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
 
   // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
   // This forms part of the instruction name rather than the operand list.
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
index c4d546cb7dff..9192448afd04 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
@@ -25,7 +25,9 @@ static uint64_t extractBitsForFixup(MCFixupKind Kind, uint64_t Value) {
     return Value;
 
   switch (unsigned(Kind)) {
+  case SystemZ::FK_390_PC12DBL:
   case SystemZ::FK_390_PC16DBL:
+  case SystemZ::FK_390_PC24DBL:
   case SystemZ::FK_390_PC32DBL:
     return (int64_t)Value / 2;
 
@@ -72,7 +74,9 @@ public:
 const MCFixupKindInfo &
 SystemZMCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const {
   const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
+    { "FK_390_PC12DBL",  4, 12, MCFixupKindInfo::FKF_IsPCRel },
     { "FK_390_PC16DBL",  0, 16, MCFixupKindInfo::FKF_IsPCRel },
+    { "FK_390_PC24DBL",  0, 24, MCFixupKindInfo::FKF_IsPCRel },
     { "FK_390_PC32DBL",  0, 32, MCFixupKindInfo::FKF_IsPCRel },
     { "FK_390_TLS_CALL", 0, 0, 0 }
   };
@@ -90,12 +94,15 @@ void SystemZMCAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
                                      bool IsPCRel) const {
   MCFixupKind Kind = Fixup.getKind();
   unsigned Offset = Fixup.getOffset();
-  unsigned Size = (getFixupKindInfo(Kind).TargetSize + 7) / 8;
+  unsigned BitSize = getFixupKindInfo(Kind).TargetSize;
+  unsigned Size = (BitSize + 7) / 8;
 
   assert(Offset + Size <= DataSize && "Invalid fixup offset!");
 
   // Big-endian insertion of Size bytes.
   Value = extractBitsForFixup(Kind, Value);
+  if (BitSize < 64)
+    Value &= ((uint64_t)1 << BitSize) - 1;
   unsigned ShiftValue = (Size * 8) - 8;
   for (unsigned I = 0; I != Size; ++I) {
     Data[Offset + I] |= uint8_t(Value >> ShiftValue);
@@ -112,7 +119,8 @@ bool SystemZMCAsmBackend::writeNopData(uint64_t Count,
 
 MCAsmBackend *llvm::createSystemZMCAsmBackend(const Target &T,
                                               const MCRegisterInfo &MRI,
-                                              const Triple &TT, StringRef CPU) {
+                                              const Triple &TT, StringRef CPU,
+                                              const MCTargetOptions &Options) {
   uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TT.getOS());
   return new SystemZMCAsmBackend(OSABI);
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
index fd52a2ebf2fd..7082abad716d 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
@@ -72,6 +72,9 @@ private:
   uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
                                     SmallVectorImpl<MCFixup> &Fixups,
                                     const MCSubtargetInfo &STI) const;
+  uint64_t getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                                SmallVectorImpl<MCFixup> &Fixups,
+                                const MCSubtargetInfo &STI) const;
   uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
                                 SmallVectorImpl<MCFixup> &Fixups,
                                 const MCSubtargetInfo &STI) const;
@@ -110,6 +113,29 @@ private:
     return getPCRelEncoding(MI, OpNum, Fixups,
                             SystemZ::FK_390_PC32DBL, 2, true);
   }
+  uint64_t getPC12DBLBPPEncoding(const MCInst &MI, unsigned OpNum,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const {
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC12DBL, 1, false);
+  }
+  uint64_t getPC16DBLBPPEncoding(const MCInst &MI, unsigned OpNum,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const {
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC16DBL, 4, false);
+  }
+  uint64_t getPC24DBLBPPEncoding(const MCInst &MI, unsigned OpNum,
+                                 SmallVectorImpl<MCFixup> &Fixups,
+                                 const MCSubtargetInfo &STI) const {
+    return getPCRelEncoding(MI, OpNum, Fixups,
+                            SystemZ::FK_390_PC24DBL, 3, false);
+  }
+
+private:
+  uint64_t computeAvailableFeatures(const FeatureBitset &FB) const;
+  void verifyInstructionPredicates(const MCInst &MI,
+                                   uint64_t AvailableFeatures) const;
 };
 } // end anonymous namespace
 
@@ -123,6 +149,9 @@ void SystemZMCCodeEmitter::
 encodeInstruction(const MCInst &MI, raw_ostream &OS,
                   SmallVectorImpl<MCFixup> &Fixups,
                   const MCSubtargetInfo &STI) const {
+  verifyInstructionPredicates(MI,
+                              computeAvailableFeatures(STI.getFeatureBits()));
+
   uint64_t Bits = getBinaryCodeForInstr(MI, Fixups, STI);
   unsigned Size = MCII.get(MI.getOpcode()).getSize();
   // Big-endian insertion of Size bytes.
@@ -199,6 +228,17 @@ getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
 }
 
 uint64_t SystemZMCCodeEmitter::
+getBDRAddr12Encoding(const MCInst &MI, unsigned OpNum,
+                     SmallVectorImpl<MCFixup> &Fixups,
+                     const MCSubtargetInfo &STI) const {
+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
+  uint64_t Len  = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
+  assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<4>(Len));
+  return (Len << 16) | (Base << 12) | Disp;
+}
+
+uint64_t SystemZMCCodeEmitter::
 getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
                      SmallVectorImpl<MCFixup> &Fixups,
                      const MCSubtargetInfo &STI) const {
@@ -240,4 +280,5 @@ SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
   return 0;
 }
 
+#define ENABLE_INSTR_PREDICATE_VERIFIER
 #include "SystemZGenMCCodeEmitter.inc"
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
index 229ab5dc86fb..c012accc14dd 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
@@ -16,7 +16,9 @@ namespace llvm {
 namespace SystemZ {
 enum FixupKind {
   // These correspond directly to R_390_* relocations.
-  FK_390_PC16DBL = FirstTargetFixupKind,
+  FK_390_PC12DBL = FirstTargetFixupKind,
+  FK_390_PC16DBL,
+  FK_390_PC24DBL,
   FK_390_PC32DBL,
   FK_390_TLS_CALL,
 
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
index 368c95f7bac2..43a96e84289c 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
@@ -53,7 +53,9 @@ static unsigned getPCRelReloc(unsigned Kind) {
   case FK_Data_2:                return ELF::R_390_PC16;
   case FK_Data_4:                return ELF::R_390_PC32;
   case FK_Data_8:                return ELF::R_390_PC64;
+  case SystemZ::FK_390_PC12DBL:  return ELF::R_390_PC12DBL;
   case SystemZ::FK_390_PC16DBL:  return ELF::R_390_PC16DBL;
+  case SystemZ::FK_390_PC24DBL:  return ELF::R_390_PC24DBL;
   case SystemZ::FK_390_PC32DBL:  return ELF::R_390_PC32DBL;
   }
   llvm_unreachable("Unsupported PC-relative address");
@@ -100,7 +102,9 @@ static unsigned getTLSGDReloc(unsigned Kind) {
 // Return the PLT relocation counterpart of MCFixupKind Kind.
 static unsigned getPLTReloc(unsigned Kind) {
   switch (Kind) {
+  case SystemZ::FK_390_PC12DBL: return ELF::R_390_PLT12DBL;
   case SystemZ::FK_390_PC16DBL: return ELF::R_390_PLT16DBL;
+  case SystemZ::FK_390_PC24DBL: return ELF::R_390_PLT24DBL;
   case SystemZ::FK_390_PC32DBL: return ELF::R_390_PLT32DBL;
   }
   llvm_unreachable("Unsupported absolute address");
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
index e16ba9e15317..dfea7e33fa15 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
@@ -109,6 +109,13 @@ const unsigned SystemZMC::VR128Regs[32] = {
   SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31
 };
 
+const unsigned SystemZMC::AR32Regs[16] = {
+  SystemZ::A0, SystemZ::A1, SystemZ::A2, SystemZ::A3,
+  SystemZ::A4, SystemZ::A5, SystemZ::A6, SystemZ::A7,
+  SystemZ::A8, SystemZ::A9, SystemZ::A10, SystemZ::A11,
+  SystemZ::A12, SystemZ::A13, SystemZ::A14, SystemZ::A15
+};
+
 unsigned SystemZMC::getFirstReg(unsigned Reg) {
   static unsigned Map[SystemZ::NUM_TARGET_REGS];
   static bool Initialized = false;
@@ -119,6 +126,7 @@ unsigned SystemZMC::getFirstReg(unsigned Reg) {
       Map[GR64Regs[I]] = I;
       Map[GR128Regs[I]] = I;
       Map[FP128Regs[I]] = I;
+      Map[AR32Regs[I]] = I;
     }
     for (unsigned I = 0; I < 32; ++I) {
       Map[VR32Regs[I]] = I;
@@ -205,34 +213,34 @@ static MCInstPrinter *createSystemZMCInstPrinter(const Triple &T,
 
 extern "C" void LLVMInitializeSystemZTargetMC() {
   // Register the MCAsmInfo.
-  TargetRegistry::RegisterMCAsmInfo(TheSystemZTarget,
+  TargetRegistry::RegisterMCAsmInfo(getTheSystemZTarget(),
                                     createSystemZMCAsmInfo);
 
   // Register the adjustCodeGenOpts.
-  TargetRegistry::registerMCAdjustCodeGenOpts(TheSystemZTarget,
+  TargetRegistry::registerMCAdjustCodeGenOpts(getTheSystemZTarget(),
                                               adjustCodeGenOpts);
 
   // Register the MCCodeEmitter.
-  TargetRegistry::RegisterMCCodeEmitter(TheSystemZTarget,
+  TargetRegistry::RegisterMCCodeEmitter(getTheSystemZTarget(),
                                         createSystemZMCCodeEmitter);
 
   // Register the MCInstrInfo.
-  TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget,
+  TargetRegistry::RegisterMCInstrInfo(getTheSystemZTarget(),
                                       createSystemZMCInstrInfo);
 
   // Register the MCRegisterInfo.
-  TargetRegistry::RegisterMCRegInfo(TheSystemZTarget,
+  TargetRegistry::RegisterMCRegInfo(getTheSystemZTarget(),
                                     createSystemZMCRegisterInfo);
 
   // Register the MCSubtargetInfo.
-  TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget,
+  TargetRegistry::RegisterMCSubtargetInfo(getTheSystemZTarget(),
                                           createSystemZMCSubtargetInfo);
 
   // Register the MCAsmBackend.
-  TargetRegistry::RegisterMCAsmBackend(TheSystemZTarget,
+  TargetRegistry::RegisterMCAsmBackend(getTheSystemZTarget(),
                                        createSystemZMCAsmBackend);
 
   // Register the MCInstPrinter.
-  TargetRegistry::RegisterMCInstPrinter(TheSystemZTarget,
+  TargetRegistry::RegisterMCInstPrinter(getTheSystemZTarget(),
                                         createSystemZMCInstPrinter);
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
index 0db48fe5a109..d9926c7e4986 100644
--- a/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
@@ -21,13 +21,14 @@ class MCInstrInfo;
 class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
+class MCTargetOptions;
 class StringRef;
 class Target;
 class Triple;
 class raw_pwrite_stream;
 class raw_ostream;
 
-extern Target TheSystemZTarget;
+Target &getTheSystemZTarget();
 
 namespace SystemZMC {
 // How many bytes are in the ABI-defined, caller-allocated part of
@@ -53,6 +54,7 @@ extern const unsigned FP128Regs[16];
 extern const unsigned VR32Regs[32];
 extern const unsigned VR64Regs[32];
 extern const unsigned VR128Regs[32];
+extern const unsigned AR32Regs[16];
 
 // Return the 0-based number of the first architectural register that
 // contains the given LLVM register.   E.g. R1D -> 1.
@@ -85,7 +87,8 @@ MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
 
 MCAsmBackend *createSystemZMCAsmBackend(const Target &T,
                                         const MCRegisterInfo &MRI,
-                                        const Triple &TT, StringRef CPU);
+                                        const Triple &TT, StringRef CPU,
+                                        const MCTargetOptions &Options);
 
 MCObjectWriter *createSystemZObjectWriter(raw_pwrite_stream &OS, uint8_t OSABI);
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.h b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
index c8ea9641fb62..9a8e508e4119 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZ.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.h
@@ -175,6 +175,7 @@ static inline bool isImmHF(uint64_t Val) {
 FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM,
                                    CodeGenOpt::Level OptLevel);
 FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
+FunctionPass *createSystemZExpandPseudoPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
 FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZ.td b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
index d4d636d3479c..6bdfd4d07edc 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZ.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZ.td
@@ -14,7 +14,19 @@
 include "llvm/Target/Target.td"
 
 //===----------------------------------------------------------------------===//
-// SystemZ supported processors and features
+// SystemZ subtarget features
+//===----------------------------------------------------------------------===//
+
+include "SystemZFeatures.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ subtarget scheduling models
+//===----------------------------------------------------------------------===//
+
+include "SystemZSchedule.td"
+
+//===----------------------------------------------------------------------===//
+// SystemZ supported processors
 //===----------------------------------------------------------------------===//
 
 include "SystemZProcessors.td"
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
index 9c0f327ff744..b39245b20b3c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp
@@ -418,10 +418,10 @@ void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
   case SystemZ::Serialize:
     if (MF->getSubtarget<SystemZSubtarget>().hasFastSerialization())
-      LoweredMI = MCInstBuilder(SystemZ::AsmBCR)
+      LoweredMI = MCInstBuilder(SystemZ::BCRAsm)
         .addImm(14).addReg(SystemZ::R0D);
     else
-      LoweredMI = MCInstBuilder(SystemZ::AsmBCR)
+      LoweredMI = MCInstBuilder(SystemZ::BCRAsm)
         .addImm(15).addReg(SystemZ::R0D);
     break;
 
@@ -523,5 +523,5 @@ bool SystemZAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
 // Force static initialization.
 extern "C" void LLVMInitializeSystemZAsmPrinter() {
-  RegisterAsmPrinter<SystemZAsmPrinter> X(TheSystemZTarget);
+  RegisterAsmPrinter<SystemZAsmPrinter> X(getTheSystemZTarget());
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
index 7f6e823729dc..fe8c88fe23e3 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZAsmPrinter.h
@@ -27,9 +27,7 @@ public:
       : AsmPrinter(TM, std::move(Streamer)) {}
 
   // Override AsmPrinter.
-  const char *getPassName() const override {
-    return "SystemZ Assembly Printer";
-  }
+  StringRef getPassName() const override { return "SystemZ Assembly Printer"; }
   void EmitInstruction(const MachineInstr *MI) override;
   void EmitMachineConstantPoolValue(MachineConstantPoolValue *MCPV) override;
   bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index 27350b88554d..b4c843f658aa 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -28,6 +28,7 @@ using namespace llvm;
 #define DEBUG_TYPE "systemz-elim-compare"
 
 STATISTIC(BranchOnCounts, "Number of branch-on-count instructions");
+STATISTIC(LoadAndTraps, "Number of load-and-trap instructions");
 STATISTIC(EliminatedComparisons, "Number of eliminated comparisons");
 STATISTIC(FusedComparisons, "Number of fused compare-and-branch instructions");
 
@@ -58,7 +59,7 @@ public:
   SystemZElimCompare(const SystemZTargetMachine &tm)
     : MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SystemZ Comparison Elimination";
   }
 
@@ -66,13 +67,15 @@ public:
   bool runOnMachineFunction(MachineFunction &F) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
   Reference getRegReferences(MachineInstr &MI, unsigned Reg);
   bool convertToBRCT(MachineInstr &MI, MachineInstr &Compare,
                      SmallVectorImpl<MachineInstr *> &CCUsers);
+  bool convertToLoadAndTrap(MachineInstr &MI, MachineInstr &Compare,
+                            SmallVectorImpl<MachineInstr *> &CCUsers);
   bool convertToLoadAndTest(MachineInstr &MI);
   bool adjustCCMasksForInstr(MachineInstr &MI, MachineInstr &Compare,
                              SmallVectorImpl<MachineInstr *> &CCUsers);
@@ -171,7 +174,7 @@ static unsigned getCompareSourceReg(MachineInstr &Compare) {
 
 // Compare compares the result of MI against zero.  If MI is an addition
 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
-// and convert the branch to a BRCT(G).  Return true on success.
+// and convert the branch to a BRCT(G) or BRCTH.  Return true on success.
 bool SystemZElimCompare::convertToBRCT(
     MachineInstr &MI, MachineInstr &Compare,
     SmallVectorImpl<MachineInstr *> &CCUsers) {
@@ -182,6 +185,8 @@ bool SystemZElimCompare::convertToBRCT(
     BRCT = SystemZ::BRCT;
   else if (Opcode == SystemZ::AGHI)
     BRCT = SystemZ::BRCTG;
+  else if (Opcode == SystemZ::AIH)
+    BRCT = SystemZ::BRCTH;
   else
     return false;
   if (MI.getOperand(2).getImm() != -1)
@@ -205,16 +210,61 @@ bool SystemZElimCompare::convertToBRCT(
     if (getRegReferences(*MBBI, SrcReg))
       return false;
 
-  // The transformation is OK.  Rebuild Branch as a BRCT(G).
+  // The transformation is OK.  Rebuild Branch as a BRCT(G) or BRCTH.
   MachineOperand Target(Branch->getOperand(2));
   while (Branch->getNumOperands())
     Branch->RemoveOperand(0);
   Branch->setDesc(TII->get(BRCT));
+  MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
+  MIB.addOperand(MI.getOperand(0))
+     .addOperand(MI.getOperand(1))
+     .addOperand(Target);
+  // Add a CC def to BRCT(G), since we may have to split them again if the
+  // branch displacement overflows.  BRCTH has a 32-bit displacement, so
+  // this is not necessary there.
+  if (BRCT != SystemZ::BRCTH)
+    MIB.addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+  MI.eraseFromParent();
+  return true;
+}
+
+// Compare compares the result of MI against zero.  If MI is a suitable load
+// instruction and if CCUsers is a single conditional trap on zero, eliminate
+// the load and convert the branch to a load-and-trap.  Return true on success.
+bool SystemZElimCompare::convertToLoadAndTrap(
+    MachineInstr &MI, MachineInstr &Compare,
+    SmallVectorImpl<MachineInstr *> &CCUsers) {
+  unsigned LATOpcode = TII->getLoadAndTrap(MI.getOpcode());
+  if (!LATOpcode)
+    return false;
+
+  // Check whether we have a single CondTrap that traps on zero.
+  if (CCUsers.size() != 1)
+    return false;
+  MachineInstr *Branch = CCUsers[0];
+  if (Branch->getOpcode() != SystemZ::CondTrap ||
+      Branch->getOperand(0).getImm() != SystemZ::CCMASK_ICMP ||
+      Branch->getOperand(1).getImm() != SystemZ::CCMASK_CMP_EQ)
+    return false;
+
+  // We already know that there are no references to the register between
+  // MI and Compare.  Make sure that there are also no references between
+  // Compare and Branch.
+  unsigned SrcReg = getCompareSourceReg(Compare);
+  MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
+  for (++MBBI; MBBI != MBBE; ++MBBI)
+    if (getRegReferences(*MBBI, SrcReg))
+      return false;
+
+  // The transformation is OK.  Rebuild Branch as a load-and-trap.
+  while (Branch->getNumOperands())
+    Branch->RemoveOperand(0);
+  Branch->setDesc(TII->get(LATOpcode));
   MachineInstrBuilder(*Branch->getParent()->getParent(), Branch)
       .addOperand(MI.getOperand(0))
       .addOperand(MI.getOperand(1))
-      .addOperand(Target)
-      .addReg(SystemZ::CC, RegState::ImplicitDefine | RegState::Dead);
+      .addOperand(MI.getOperand(2))
+      .addOperand(MI.getOperand(3));
   MI.eraseFromParent();
   return true;
 }
@@ -347,11 +397,17 @@ bool SystemZElimCompare::optimizeCompareZero(
     MachineInstr &MI = *MBBI;
     if (resultTests(MI, SrcReg)) {
       // Try to remove both MI and Compare by converting a branch to BRCT(G).
-      // We don't care in this case whether CC is modified between MI and
-      // Compare.
-      if (!CCRefs.Use && !SrcRefs && convertToBRCT(MI, Compare, CCUsers)) {
-        BranchOnCounts += 1;
-        return true;
+      // or a load-and-trap instruction.  We don't care in this case whether
+      // CC is modified between MI and Compare.
+      if (!CCRefs.Use && !SrcRefs) {
+        if (convertToBRCT(MI, Compare, CCUsers)) {
+          BranchOnCounts += 1;
+          return true;
+        }
+        if (convertToLoadAndTrap(MI, Compare, CCUsers)) {
+          LoadAndTraps += 1;
+          return true;
+        }
       }
       // Try to eliminate Compare by reusing a CC result from MI.
       if ((!CCRefs && convertToLoadAndTest(MI)) ||
@@ -403,6 +459,9 @@ bool SystemZElimCompare::fuseCompareOperations(
     return false;
 
   // Make sure that the operands are available at the branch.
+  // SrcReg2 is the register if the source operand is a register,
+  // 0 if the source operand is immediate, and the base register
+  // if the source operand is memory (index is not supported).
   unsigned SrcReg = Compare.getOperand(0).getReg();
   unsigned SrcReg2 =
       Compare.getOperand(1).isReg() ? Compare.getOperand(1).getReg() : 0;
@@ -435,11 +494,16 @@ bool SystemZElimCompare::fuseCompareOperations(
   Branch->RemoveOperand(0);
 
   // Rebuild Branch as a fused compare and branch.
+  // SrcNOps is the number of MI operands of the compare instruction
+  // that we need to copy over.
+  unsigned SrcNOps = 2;
+  if (FusedOpcode == SystemZ::CLT || FusedOpcode == SystemZ::CLGT)
+    SrcNOps = 3;
   Branch->setDesc(TII->get(FusedOpcode));
   MachineInstrBuilder MIB(*Branch->getParent()->getParent(), Branch);
-  MIB.addOperand(Compare.getOperand(0))
-      .addOperand(Compare.getOperand(1))
-      .addOperand(CCMask);
+  for (unsigned I = 0; I < SrcNOps; I++)
+    MIB.addOperand(Compare.getOperand(I));
+  MIB.addOperand(CCMask);
 
   if (Type == SystemZII::CompareAndBranch) {
     // Only conditional branches define CC, as they may be converted back
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp
new file mode 100644
index 000000000000..92ce8089c24f
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZExpandPseudo.cpp
@@ -0,0 +1,153 @@
+//==-- SystemZExpandPseudo.cpp - Expand pseudo instructions -------*- C++ -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expands pseudo instructions into target
+// instructions to allow proper scheduling and other late optimizations.  This
+// pass should be run after register allocation but before the post-regalloc
+// scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SystemZ.h"
+#include "SystemZInstrInfo.h"
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+using namespace llvm;
+
+#define SYSTEMZ_EXPAND_PSEUDO_NAME "SystemZ pseudo instruction expansion pass"
+
+namespace llvm {
+  void initializeSystemZExpandPseudoPass(PassRegistry&);
+}
+
+namespace {
+class SystemZExpandPseudo : public MachineFunctionPass {
+public:
+  static char ID;
+  SystemZExpandPseudo() : MachineFunctionPass(ID) {
+    initializeSystemZExpandPseudoPass(*PassRegistry::getPassRegistry());
+  }
+
+  const SystemZInstrInfo *TII;
+
+  bool runOnMachineFunction(MachineFunction &Fn) override;
+
+  StringRef getPassName() const override { return SYSTEMZ_EXPAND_PSEUDO_NAME; }
+
+private:
+  bool expandMBB(MachineBasicBlock &MBB);
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
+  bool expandLOCRMux(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                     MachineBasicBlock::iterator &NextMBBI);
+};
+char SystemZExpandPseudo::ID = 0;
+}
+
+INITIALIZE_PASS(SystemZExpandPseudo, "systemz-expand-pseudo",
+                SYSTEMZ_EXPAND_PSEUDO_NAME, false, false)
+
+/// \brief Returns an instance of the pseudo instruction expansion pass.
+FunctionPass *llvm::createSystemZExpandPseudoPass(SystemZTargetMachine &TM) {
+  return new SystemZExpandPseudo();
+}
+
+// MI is a load-register-on-condition pseudo instruction that could not be
+// handled as a single hardware instruction.  Replace it by a branch sequence.
+bool SystemZExpandPseudo::expandLOCRMux(MachineBasicBlock &MBB,
+                                        MachineBasicBlock::iterator MBBI,
+                                        MachineBasicBlock::iterator &NextMBBI) {
+  MachineFunction &MF = *MBB.getParent();
+  const BasicBlock *BB = MBB.getBasicBlock();
+  MachineInstr &MI = *MBBI;
+  DebugLoc DL = MI.getDebugLoc();
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  unsigned CCValid = MI.getOperand(3).getImm();
+  unsigned CCMask = MI.getOperand(4).getImm();
+
+  LivePhysRegs LiveRegs(&TII->getRegisterInfo());
+  LiveRegs.addLiveOuts(MBB);
+  for (auto I = std::prev(MBB.end()); I != MBBI; --I)
+    LiveRegs.stepBackward(*I);
+
+  // Splice MBB at MI, moving the rest of the block into RestMBB.
+  MachineBasicBlock *RestMBB = MF.CreateMachineBasicBlock(BB);
+  MF.insert(std::next(MachineFunction::iterator(MBB)), RestMBB);
+  RestMBB->splice(RestMBB->begin(), &MBB, MI, MBB.end());
+  RestMBB->transferSuccessors(&MBB);
+  for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+    RestMBB->addLiveIn(*I);
+
+  // Create a new block MoveMBB to hold the move instruction.
+  MachineBasicBlock *MoveMBB = MF.CreateMachineBasicBlock(BB);
+  MF.insert(std::next(MachineFunction::iterator(MBB)), MoveMBB);
+  MoveMBB->addLiveIn(SrcReg);
+  for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
+    MoveMBB->addLiveIn(*I);
+
+  // At the end of MBB, create a conditional branch to RestMBB if the
+  // condition is false, otherwise fall through to MoveMBB.
+  BuildMI(&MBB, DL, TII->get(SystemZ::BRC))
+    .addImm(CCValid).addImm(CCMask ^ CCValid).addMBB(RestMBB);
+  MBB.addSuccessor(RestMBB);
+  MBB.addSuccessor(MoveMBB);
+
+  // In MoveMBB, emit an instruction to move SrcReg into DestReg,
+  // then fall through to RestMBB.
+  TII->copyPhysReg(*MoveMBB, MoveMBB->end(), DL, DestReg, SrcReg,
+                   MI.getOperand(2).isKill());
+  MoveMBB->addSuccessor(RestMBB);
+
+  NextMBBI = MBB.end();
+  MI.eraseFromParent();
+  return true;
+}
+
+/// \brief If MBBI references a pseudo instruction that should be expanded here,
+/// do the expansion and return true.  Otherwise return false.
+bool SystemZExpandPseudo::expandMI(MachineBasicBlock &MBB,
+                                   MachineBasicBlock::iterator MBBI,
+                                   MachineBasicBlock::iterator &NextMBBI) {
+  MachineInstr &MI = *MBBI;
+  switch (MI.getOpcode()) {
+  case SystemZ::LOCRMux:
+    return expandLOCRMux(MBB, MBBI, NextMBBI);
+  default:
+    break;
+  }
+  return false;
+}
+
+/// \brief Iterate over the instructions in basic block MBB and expand any
+/// pseudo instructions.  Return true if anything was modified.
+bool SystemZExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
+  bool Modified = false;
+
+  MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+  while (MBBI != E) {
+    MachineBasicBlock::iterator NMBBI = std::next(MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
+    MBBI = NMBBI;
+  }
+
+  return Modified;
+}
+
+bool SystemZExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+  TII = static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
+
+  bool Modified = false;
+  for (auto &MBB : MF)
+    Modified |= expandMBB(MBB);
+  return Modified;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td
new file mode 100644
index 000000000000..716e5add8051
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -0,0 +1,171 @@
+//===-- SystemZ.td - SystemZ processors and features ---------*- tblgen -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Feature definitions.
+//
+//===----------------------------------------------------------------------===//
+
+class SystemZFeature<string extname, string intname, string desc>
+  : Predicate<"Subtarget->has"##intname##"()">,
+    AssemblerPredicate<"Feature"##intname, extname>,
+    SubtargetFeature<extname, "Has"##intname, "true", desc>;
+
+class SystemZMissingFeature<string intname>
+  : Predicate<"!Subtarget->has"##intname##"()">;
+
+class SystemZFeatureList<list<SystemZFeature> x> {
+  list<SystemZFeature> List = x;
+}
+
+class SystemZFeatureAdd<list<SystemZFeature> x, list<SystemZFeature> y>
+  : SystemZFeatureList<!listconcat(x, y)>;
+
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Ninth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureDistinctOps : SystemZFeature<
+  "distinct-ops", "DistinctOps",
+  "Assume that the distinct-operands facility is installed"
+>;
+
+def FeatureFastSerialization : SystemZFeature<
+  "fast-serialization", "FastSerialization",
+  "Assume that the fast-serialization facility is installed"
+>;
+
+def FeatureFPExtension : SystemZFeature<
+  "fp-extension", "FPExtension",
+  "Assume that the floating-point extension facility is installed"
+>;
+
+def FeatureHighWord : SystemZFeature<
+  "high-word", "HighWord",
+  "Assume that the high-word facility is installed"
+>;
+
+def FeatureInterlockedAccess1 : SystemZFeature<
+  "interlocked-access1", "InterlockedAccess1",
+  "Assume that interlocked-access facility 1 is installed"
+>;
+def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
+
+def FeatureLoadStoreOnCond : SystemZFeature<
+  "load-store-on-cond", "LoadStoreOnCond",
+  "Assume that the load/store-on-condition facility is installed"
+>;
+
+def FeaturePopulationCount : SystemZFeature<
+  "population-count", "PopulationCount",
+  "Assume that the population-count facility is installed"
+>;
+
+def Arch9NewFeatures : SystemZFeatureList<[
+    FeatureDistinctOps,
+    FeatureFastSerialization,
+    FeatureFPExtension,
+    FeatureHighWord,
+    FeatureInterlockedAccess1,
+    FeatureLoadStoreOnCond,
+    FeaturePopulationCount
+]>;
+
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Tenth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureExecutionHint : SystemZFeature<
+  "execution-hint", "ExecutionHint",
+  "Assume that the execution-hint facility is installed"
+>;
+
+def FeatureLoadAndTrap : SystemZFeature<
+  "load-and-trap", "LoadAndTrap",
+  "Assume that the load-and-trap facility is installed"
+>;
+
+def FeatureMiscellaneousExtensions : SystemZFeature<
+  "miscellaneous-extensions", "MiscellaneousExtensions",
+  "Assume that the miscellaneous-extensions facility is installed"
+>;
+
+def FeatureProcessorAssist : SystemZFeature<
+  "processor-assist", "ProcessorAssist",
+  "Assume that the processor-assist facility is installed"
+>;
+
+def FeatureTransactionalExecution : SystemZFeature<
+  "transactional-execution", "TransactionalExecution",
+  "Assume that the transactional-execution facility is installed"
+>;
+
+def Arch10NewFeatures : SystemZFeatureList<[
+    FeatureExecutionHint,
+    FeatureLoadAndTrap,
+    FeatureMiscellaneousExtensions,
+    FeatureProcessorAssist,
+    FeatureTransactionalExecution
+]>;
+
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Eleventh Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureLoadAndZeroRightmostByte : SystemZFeature<
+  "load-and-zero-rightmost-byte", "LoadAndZeroRightmostByte",
+  "Assume that the load-and-zero-rightmost-byte facility is installed"
+>;
+
+def FeatureLoadStoreOnCond2 : SystemZFeature<
+  "load-store-on-cond-2", "LoadStoreOnCond2",
+  "Assume that the load/store-on-condition facility 2 is installed"
+>;
+
+def FeatureVector : SystemZFeature<
+  "vector", "Vector",
+  "Assume that the vectory facility is installed"
+>;
+def FeatureNoVector : SystemZMissingFeature<"Vector">;
+
+def Arch11NewFeatures : SystemZFeatureList<[
+    FeatureLoadAndZeroRightmostByte,
+    FeatureLoadStoreOnCond2,
+    FeatureVector
+]>;
+
+//===----------------------------------------------------------------------===//
+//
+// Cumulative supported and unsupported feature sets
+//
+//===----------------------------------------------------------------------===//
+
+def Arch8SupportedFeatures
+  : SystemZFeatureList<[]>;
+def Arch9SupportedFeatures
+  : SystemZFeatureAdd<Arch8SupportedFeatures.List,  Arch9NewFeatures.List>;
+def Arch10SupportedFeatures
+  : SystemZFeatureAdd<Arch9SupportedFeatures.List,  Arch10NewFeatures.List>;
+def Arch11SupportedFeatures
+  : SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
+
+def Arch11UnsupportedFeatures
+  : SystemZFeatureList<[]>;
+def Arch10UnsupportedFeatures
+  : SystemZFeatureAdd<Arch11UnsupportedFeatures.List, Arch11NewFeatures.List>;
+def Arch9UnsupportedFeatures
+  : SystemZFeatureAdd<Arch10UnsupportedFeatures.List, Arch10NewFeatures.List>;
+def Arch8UnsupportedFeatures
+  : SystemZFeatureAdd<Arch9UnsupportedFeatures.List,  Arch9NewFeatures.List>;
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
index ccaed49475ca..a28a91e834f6 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
@@ -67,7 +67,7 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                                 RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
   bool HasFP = hasFP(MF);
   SystemZMachineFunctionInfo *MFI = MF.getInfo<SystemZMachineFunctionInfo>();
@@ -82,7 +82,7 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
       SavedRegs.set(SystemZ::ArgGPRs[I]);
 
   // If there are any landing pads, entering them will modify r6/r7.
-  if (!MF.getMMI().getLandingPads().empty()) {
+  if (!MF.getLandingPads().empty()) {
     SavedRegs.set(SystemZ::R6D);
     SavedRegs.set(SystemZ::R7D);
   }
@@ -94,7 +94,7 @@ void SystemZFrameLowering::determineCalleeSaves(MachineFunction &MF,
 
   // If the function calls other functions, record that the return
   // address register will be clobbered.
-  if (MFFrame->hasCalls())
+  if (MFFrame.hasCalls())
     SavedRegs.set(SystemZ::R14D);
 
   // If we are saving GPRs other than the stack pointer, we might as well
@@ -276,16 +276,16 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 void SystemZFrameLowering::
 processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                     RegScavenger *RS) const {
-  MachineFrameInfo *MFFrame = MF.getFrameInfo();
-  uint64_t MaxReach = (MFFrame->estimateStackSize(MF) +
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
+  uint64_t MaxReach = (MFFrame.estimateStackSize(MF) +
                        SystemZMC::CallFrameSize * 2);
   if (!isUInt<12>(MaxReach)) {
     // We may need register scavenging slots if some parts of the frame
     // are outside the reach of an unsigned 12-bit displacement.
     // Create 2 for the case where both addresses in an MVC are
     // out of range.
-    RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false));
-    RS->addScavengingFrameIndex(MFFrame->CreateStackObject(8, 8, false));
+    RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
+    RS->addScavengingFrameIndex(MFFrame.CreateStackObject(8, 8, false));
   }
 }
 
@@ -321,14 +321,14 @@ static void emitIncrement(MachineBasicBlock &MBB,
 void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
                                         MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
-  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
   auto *ZII =
       static_cast<const SystemZInstrInfo *>(MF.getSubtarget().getInstrInfo());
   SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.begin();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
-  const std::vector<CalleeSavedInfo> &CSI = MFFrame->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFFrame.getCalleeSavedInfo();
   bool HasFP = hasFP(MF);
 
   // Debug location must be unknown since the first debug location is used
@@ -350,7 +350,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
       unsigned Reg = Save.getReg();
       if (SystemZ::GR64BitRegClass.contains(Reg)) {
         int64_t Offset = SPOffsetFromCFA + RegSpillOffsets[Reg];
-        unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
         BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex);
@@ -374,7 +374,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
     emitIncrement(MBB, MBBI, DL, SystemZ::R15D, Delta, ZII);
 
     // Add CFI for the allocation.
-    unsigned CFIIndex = MMI.addFrameInst(
+    unsigned CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaOffset(nullptr, SPOffsetFromCFA + Delta));
     BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex);
@@ -392,7 +392,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
 
     // Add CFI for the new frame location.
     unsigned HardFP = MRI->getDwarfRegNum(SystemZ::R11D, true);
-    unsigned CFIIndex = MMI.addFrameInst(
+    unsigned CFIIndex = MF.addFrameInst(
         MCCFIInstruction::createDefCfaRegister(nullptr, HardFP));
     BuildMI(MBB, MBBI, DL, ZII->get(TargetOpcode::CFI_INSTRUCTION))
         .addCFIIndex(CFIIndex);
@@ -422,7 +422,7 @@ void SystemZFrameLowering::emitPrologue(MachineFunction &MF,
       int64_t Offset =
           getFrameIndexReference(MF, Save.getFrameIdx(), IgnoredFrameReg);
 
-      unsigned CFIIndex = MMI.addFrameInst(MCCFIInstruction::createOffset(
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
           nullptr, DwarfReg, SPOffsetFromCFA + Offset));
       CFIIndexes.push_back(CFIIndex);
     }
@@ -478,14 +478,14 @@ void SystemZFrameLowering::emitEpilogue(MachineFunction &MF,
 
 bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const {
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          MF.getFrameInfo()->hasVarSizedObjects() ||
+          MF.getFrameInfo().hasVarSizedObjects() ||
           MF.getInfo<SystemZMachineFunctionInfo>()->getManipulatesSP());
 }
 
 int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
                                                  int FI,
                                                  unsigned &FrameReg) const {
-  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const MachineFrameInfo &MFFrame = MF.getFrameInfo();
   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
 
   // Fill in FrameReg output argument.
@@ -494,8 +494,8 @@ int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
   // Start with the offset of FI from the top of the caller-allocated frame
   // (i.e. the top of the 160 bytes allocated by the caller).  This initial
   // offset is therefore negative.
-  int64_t Offset = (MFFrame->getObjectOffset(FI) +
-                    MFFrame->getOffsetAdjustment());
+  int64_t Offset = (MFFrame.getObjectOffset(FI) +
+                    MFFrame.getOffsetAdjustment());
 
   // Make the offset relative to the incoming stack pointer.
   Offset -= getOffsetOfLocalArea();
@@ -508,15 +508,15 @@ int SystemZFrameLowering::getFrameIndexReference(const MachineFunction &MF,
 
 uint64_t SystemZFrameLowering::
 getAllocatedStackSize(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  const MachineFrameInfo &MFFrame = MF.getFrameInfo();
 
   // Start with the size of the local variables and spill slots.
-  uint64_t StackSize = MFFrame->getStackSize();
+  uint64_t StackSize = MFFrame.getStackSize();
 
   // We need to allocate the ABI-defined 160-byte base area whenever
   // we allocate stack space for our own use and whenever we call another
   // function.
-  if (StackSize || MFFrame->hasVarSizedObjects() || MFFrame->hasCalls())
+  if (StackSize || MFFrame.hasVarSizedObjects() || MFFrame.hasCalls())
     StackSize += SystemZMC::CallFrameSize;
 
   return StackSize;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
new file mode 100644
index 000000000000..fe4b52b515e0
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
@@ -0,0 +1,337 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a hazard recognizer for the SystemZ scheduler.
+//
+// This class is used by the SystemZ scheduling strategy to maintain
+// the state during scheduling, and provide cost functions for
+// scheduling candidates. This includes:
+//
+// * Decoder grouping. A decoder group can maximally hold 3 uops, and
+// instructions that always begin a new group should be scheduled when
+// the current decoder group is empty.
+// * Processor resources usage. It is beneficial to balance the use of
+// resources.
+//
+// ===---------------------------------------------------------------------===//
+
+#include "SystemZHazardRecognizer.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// This is the limit of processor resource usage at which the
+// scheduler should try to look for other instructions (not using the
+// critical resource).
+static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
+                                   cl::desc("The OOO window for processor "
+                                            "resources during scheduling."),
+                                   cl::init(8));
+
+SystemZHazardRecognizer::
+SystemZHazardRecognizer(const MachineSchedContext *C) : DAG(nullptr),
+                                                        SchedModel(nullptr) {}
+
+unsigned SystemZHazardRecognizer::
+getNumDecoderSlots(SUnit *SU) const {
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  if (!SC->isValid())
+    return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
+
+  if (SC->BeginGroup) {
+    if (!SC->EndGroup)
+      return 2; // Cracked instruction
+    else
+      return 3; // Expanded/group-alone instruction
+  }
+    
+  return 1; // Normal instruction
+}
+
+unsigned SystemZHazardRecognizer::getCurrCycleIdx() {
+  unsigned Idx = CurrGroupSize;
+  if (GrpCount % 2)
+    Idx += 3;
+  return Idx;
+}
+
+ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
+getHazardType(SUnit *m, int Stalls) {
+  return (fitsIntoCurrentGroup(m) ? NoHazard : Hazard);
+}
+
+void SystemZHazardRecognizer::Reset() {
+  CurrGroupSize = 0;
+  clearProcResCounters();
+  GrpCount = 0;
+  LastFPdOpCycleIdx = UINT_MAX;
+  DEBUG(CurGroupDbg = "";);
+}
+
+bool
+SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  if (!SC->isValid())
+    return true;
+
+  // A cracked instruction only fits into schedule if the current
+  // group is empty.
+  if (SC->BeginGroup)
+    return (CurrGroupSize == 0);
+
+  // Since a full group is handled immediately in EmitInstruction(),
+  // SU should fit into current group. NumSlots should be 1 or 0,
+  // since it is not a cracked or expanded instruction.
+  assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
+          "Expected normal instruction to fit in non-full group!");
+
+  return true;
+}
+
+void SystemZHazardRecognizer::nextGroup(bool DbgOutput) {
+  if (CurrGroupSize > 0) {
+    DEBUG(dumpCurrGroup("Completed decode group"));
+    DEBUG(CurGroupDbg = "";);
+
+    GrpCount++;
+
+    // Reset counter for next group.
+    CurrGroupSize = 0;
+
+    // Decrease counters for execution units by one.
+    for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+      if (ProcResourceCounters[i] > 0)
+        ProcResourceCounters[i]--;
+
+    // Clear CriticalResourceIdx if it is now below the threshold.
+    if (CriticalResourceIdx != UINT_MAX &&
+        (ProcResourceCounters[CriticalResourceIdx] <=
+         ProcResCostLim))
+      CriticalResourceIdx = UINT_MAX;
+  }
+
+  DEBUG(if (DbgOutput)
+          dumpProcResourceCounters(););
+}
+
+#ifndef NDEBUG // Debug output
+void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
+  OS << "SU(" << SU->NodeNum << "):";
+  OS << SchedModel->getInstrInfo()->getName(SU->getInstr()->getOpcode());
+
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  if (!SC->isValid())
+    return;
+  
+  for (TargetSchedModel::ProcResIter
+         PI = SchedModel->getWriteProcResBegin(SC),
+         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+    const MCProcResourceDesc &PRD =
+      *SchedModel->getProcResource(PI->ProcResourceIdx);
+    std::string FU(PRD.Name);
+    // trim e.g. Z13_FXaUnit -> FXa
+    FU = FU.substr(FU.find("_") + 1);
+    FU.resize(FU.find("Unit"));
+    OS << "/" << FU;
+
+    if (PI->Cycles > 1)
+      OS << "(" << PI->Cycles << "cyc)";
+  }
+
+  if (SC->NumMicroOps > 1)
+    OS << "/" << SC->NumMicroOps << "uops";
+  if (SC->BeginGroup && SC->EndGroup)
+    OS << "/GroupsAlone";
+  else if (SC->BeginGroup)
+    OS << "/BeginsGroup";
+  else if (SC->EndGroup)
+    OS << "/EndsGroup";
+  if (SU->isUnbuffered)
+    OS << "/Unbuffered";
+}
+
+void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
+  dbgs() << "+++ " << Msg;
+  dbgs() << ": ";
+
+  if (CurGroupDbg.empty())
+    dbgs() << " <empty>\n";
+  else {
+    dbgs() << "{ " << CurGroupDbg << " }";
+    dbgs() << " (" << CurrGroupSize << " decoder slot"
+           << (CurrGroupSize > 1 ? "s":"")
+           << ")\n";
+  }
+}
+
+void SystemZHazardRecognizer::dumpProcResourceCounters() const {
+  bool any = false;
+
+  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+    if (ProcResourceCounters[i] > 0) {
+      any = true;
+      break;
+    }
+
+  if (!any)
+    return;
+
+  dbgs() << "+++ Resource counters:\n";
+  for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
+    if (ProcResourceCounters[i] > 0) {
+      dbgs() << "+++ Extra schedule for execution unit "
+             << SchedModel->getProcResource(i)->Name
+             << ": " << ProcResourceCounters[i] << "\n";
+      any = true;
+    }
+}
+#endif //NDEBUG
+
+void SystemZHazardRecognizer::clearProcResCounters() {
+  ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
+  CriticalResourceIdx = UINT_MAX;
+}
+
+// Update state with SU as the next scheduled unit.
+void SystemZHazardRecognizer::
+EmitInstruction(SUnit *SU) {
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  DEBUG( dumpCurrGroup("Decode group before emission"););
+
+  // If scheduling an SU that must begin a new decoder group, move on
+  // to next group.
+  if (!fitsIntoCurrentGroup(SU))
+    nextGroup();
+
+  DEBUG( dbgs() << "+++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
+         dbgs() << "\n";
+         raw_string_ostream cgd(CurGroupDbg);
+         if (CurGroupDbg.length())
+           cgd << ", ";
+         dumpSU(SU, cgd););
+
+  // After returning from a call, we don't know much about the state.
+  if (SU->getInstr()->isCall()) {
+    DEBUG (dbgs() << "+++ Clearing state after call.\n";);
+    clearProcResCounters();
+    LastFPdOpCycleIdx = UINT_MAX;
+    CurrGroupSize += getNumDecoderSlots(SU);
+    assert (CurrGroupSize <= 3);
+    nextGroup();
+    return;
+  }
+
+  // Increase counter for execution unit(s).
+  for (TargetSchedModel::ProcResIter
+         PI = SchedModel->getWriteProcResBegin(SC),
+         PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
+    // Don't handle FPd together with the other resources.
+    if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
+      continue;
+    int &CurrCounter =
+      ProcResourceCounters[PI->ProcResourceIdx];
+    CurrCounter += PI->Cycles;
+    // Check if this is now the new critical resource.
+    if ((CurrCounter > ProcResCostLim) &&
+        (CriticalResourceIdx == UINT_MAX ||
+         (PI->ProcResourceIdx != CriticalResourceIdx &&
+          CurrCounter >
+          ProcResourceCounters[CriticalResourceIdx]))) {
+      DEBUG( dbgs() << "+++ New critical resource: "
+             << SchedModel->getProcResource(PI->ProcResourceIdx)->Name
+             << "\n";);
+      CriticalResourceIdx = PI->ProcResourceIdx;
+    }
+  }
+
+  // Make note of an instruction that uses a blocking resource (FPd).
+  if (SU->isUnbuffered) {
+    LastFPdOpCycleIdx = getCurrCycleIdx();
+    DEBUG (dbgs() << "+++ Last FPd cycle index: "
+           << LastFPdOpCycleIdx << "\n";);
+  }
+
+  // Insert SU into current group by increasing number of slots used
+  // in current group.
+  CurrGroupSize += getNumDecoderSlots(SU);
+  assert (CurrGroupSize <= 3);
+
+  // Check if current group is now full/ended. If so, move on to next
+  // group to be ready to evaluate more candidates.
+  if (CurrGroupSize == 3 || SC->EndGroup)
+    nextGroup();
+}
+
+int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  if (!SC->isValid())
+    return 0;
+  
+  // If SU begins new group, it can either break a current group early
+  // or fit naturally if current group is empty (negative cost).
+  if (SC->BeginGroup) {
+    if (CurrGroupSize)
+      return 3 - CurrGroupSize;
+    return -1;
+  }
+
+  // Similarly, a group-ending SU may either fit well (last in group), or
+  // end the group prematurely.
+  if (SC->EndGroup) {
+    unsigned resultingGroupSize =
+      (CurrGroupSize + getNumDecoderSlots(SU));
+    if (resultingGroupSize < 3)
+      return (3 - resultingGroupSize);
+    return -1;
+  }
+
+  // Most instructions can be placed in any decoder slot.
+  return 0;
+}
+
+bool SystemZHazardRecognizer::isFPdOpPreferred_distance(const SUnit *SU) {
+  assert (SU->isUnbuffered);
+  // If this is the first FPd op, it should be scheduled high.
+  if (LastFPdOpCycleIdx == UINT_MAX)
+    return true;
+  // If this is not the first PFd op, it should go into the other side
+  // of the processor to use the other FPd unit there. This should
+  // generally happen if two FPd ops are placed with 2 other
+  // instructions between them (modulo 6).
+  if (LastFPdOpCycleIdx > getCurrCycleIdx())
+    return ((LastFPdOpCycleIdx - getCurrCycleIdx()) == 3);
+  return ((getCurrCycleIdx() - LastFPdOpCycleIdx) == 3);
+}
+
+int SystemZHazardRecognizer::
+resourcesCost(SUnit *SU) {
+  int Cost = 0;
+
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  if (!SC->isValid())
+    return 0;
+
+  // For a FPd op, either return min or max value as indicated by the
+  // distance to any prior FPd op.
+  if (SU->isUnbuffered)
+    Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
+  // For other instructions, give a cost to the use of the critical resource.
+  else if (CriticalResourceIdx != UINT_MAX) {
+    for (TargetSchedModel::ProcResIter
+           PI = SchedModel->getWriteProcResBegin(SC),
+           PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
+      if (PI->ProcResourceIdx == CriticalResourceIdx)
+        Cost = PI->Cycles;
+  }
+
+  return Cost;
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
new file mode 100644
index 000000000000..8fa54ee434cf
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.h
@@ -0,0 +1,128 @@
+//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares a hazard recognizer for the SystemZ scheduler.
+//
+// This class is used by the SystemZ scheduling strategy to maintain
+// the state during scheduling, and provide cost functions for
+// scheduling candidates. This includes:
+//
+// * Decoder grouping. A decoder group can maximally hold 3 uops, and
+// instructions that always begin a new group should be scheduled when
+// the current decoder group is empty.
+// * Processor resources usage. It is beneficial to balance the use of
+// resources.
+//
+// ===---------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H
+
+#include "SystemZSubtarget.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+
+/// SystemZHazardRecognizer maintains the state during scheduling.
+class SystemZHazardRecognizer : public ScheduleHazardRecognizer {
+
+  ScheduleDAGMI *DAG;
+  const TargetSchedModel *SchedModel;
+
+  /// Keep track of the number of decoder slots used in the current
+  /// decoder group.
+  unsigned CurrGroupSize;
+
+  /// The tracking of resources here are quite similar to the common
+  /// code use of a critical resource. However, z13 differs in the way
+  /// that it has two processor sides which may be interesting to
+  /// model in the future (a work in progress).
+
+  /// Counters for the number of uops scheduled per processor
+  /// resource.
+  SmallVector<int, 0> ProcResourceCounters;
+
+  /// This is the resource with the greatest queue, which the
+  /// scheduler tries to avoid.
+  unsigned CriticalResourceIdx;
+
+  /// Return the number of decoder slots MI requires.
+  inline unsigned getNumDecoderSlots(SUnit *SU) const;
+
+  /// Return true if MI fits into current decoder group.
+  bool fitsIntoCurrentGroup(SUnit *SU) const;
+
+  /// Two decoder groups per cycle are formed (for z13), meaning 2x3
+  /// instructions. This function returns a number between 0 and 5,
+  /// representing the current decoder slot of the current cycle.
+  unsigned getCurrCycleIdx();
+  
+  /// LastFPdOpCycleIdx stores the numbeer returned by getCurrCycleIdx()
+  /// when a stalling operation is scheduled (which uses the FPd resource).
+  unsigned LastFPdOpCycleIdx;
+
+  /// A counter of decoder groups scheduled.
+  unsigned GrpCount;
+
+  unsigned getCurrGroupSize() {return CurrGroupSize;};
+
+  /// Start next decoder group.
+  void nextGroup(bool DbgOutput = true);
+
+  /// Clear all counters for processor resources.
+  void clearProcResCounters();
+
+  /// With the goal of alternating processor sides for stalling (FPd)
+  /// ops, return true if it seems good to schedule an FPd op next.
+  bool isFPdOpPreferred_distance(const SUnit *SU);
+
+public:
+  SystemZHazardRecognizer(const MachineSchedContext *C);
+
+  void setDAG(ScheduleDAGMI *dag) {
+    DAG = dag;
+    SchedModel = dag->getSchedModel();
+  }
+  
+  HazardType getHazardType(SUnit *m, int Stalls = 0) override;    
+  void Reset() override;
+  void EmitInstruction(SUnit *SU) override;
+
+  // Cost functions used by SystemZPostRASchedStrategy while
+  // evaluating candidates.
+
+  /// Return the cost of decoder grouping for SU. If SU must start a
+  /// new decoder group, this is negative if this fits the schedule or
+  /// positive if it would mean ending a group prematurely. For normal
+  /// instructions this returns 0.
+  int groupingCost(SUnit *SU) const; 
+
+  /// Return the cost of SU in regards to processor resources usage.
+  /// A positive value means it would be better to wait with SU, while
+  /// a negative value means it would be good to schedule SU next.
+  int resourcesCost(SUnit *SU);
+
+#ifndef NDEBUG
+  // Debug dumping.
+  std::string CurGroupDbg; // current group as text
+  void dumpSU(SUnit *SU, raw_ostream &OS) const;
+  void dumpCurrGroup(std::string Msg = "") const;
+  void dumpProcResourceCounters() const;
+#endif
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZHAZARDRECOGNIZER_H */
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index cd7fcc3070a4..920b6e430e8f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -117,7 +117,7 @@ static uint64_t allOnes(unsigned int Count) {
 // case the result will be truncated as part of the operation).
 struct RxSBGOperands {
   RxSBGOperands(unsigned Op, SDValue N)
-    : Opcode(Op), BitSize(N.getValueType().getSizeInBits()),
+    : Opcode(Op), BitSize(N.getValueSizeInBits()),
       Mask(allOnes(BitSize)), Input(N), Start(64 - BitSize), End(63),
       Rotate(0) {}
 
@@ -339,7 +339,7 @@ public:
   }
 
   // Override MachineFunctionPass.
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SystemZ DAG->DAG Pattern Instruction Selection";
   }
 
@@ -709,7 +709,7 @@ bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
 
   // It's only an insertion if all bits are covered or are known to be zero.
   // The inner check covers all cases but is more expensive.
-  uint64_t Used = allOnes(Op.getValueType().getSizeInBits());
+  uint64_t Used = allOnes(Op.getValueSizeInBits());
   if (Used != (AndMask | InsertMask)) {
     APInt KnownZero, KnownOne;
     CurDAG->computeKnownBits(Op.getOperand(0), KnownZero, KnownOne);
@@ -749,7 +749,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
   case ISD::TRUNCATE: {
     if (RxSBG.Opcode == SystemZ::RNSBG)
       return false;
-    uint64_t BitSize = N.getValueType().getSizeInBits();
+    uint64_t BitSize = N.getValueSizeInBits();
     uint64_t Mask = allOnes(BitSize);
     if (!refineRxSBGMask(RxSBG, Mask))
       return false;
@@ -825,19 +825,19 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
   case ISD::ZERO_EXTEND:
     if (RxSBG.Opcode != SystemZ::RNSBG) {
       // Restrict the mask to the extended operand.
-      unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits();
+      unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();
       if (!refineRxSBGMask(RxSBG, allOnes(InnerBitSize)))
         return false;
 
       RxSBG.Input = N.getOperand(0);
       return true;
     }
-    // Fall through.
+    LLVM_FALLTHROUGH;
 
   case ISD::SIGN_EXTEND: {
     // Check that the extension bits are don't-care (i.e. are masked out
     // by the final mask).
-    unsigned InnerBitSize = N.getOperand(0).getValueType().getSizeInBits();
+    unsigned InnerBitSize = N.getOperand(0).getValueSizeInBits();
     if (maskMatters(RxSBG, allOnes(RxSBG.BitSize) - allOnes(InnerBitSize)))
       return false;
 
@@ -851,7 +851,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       return false;
 
     uint64_t Count = CountNode->getZExtValue();
-    unsigned BitSize = N.getValueType().getSizeInBits();
+    unsigned BitSize = N.getValueSizeInBits();
     if (Count < 1 || Count >= BitSize)
       return false;
 
@@ -878,7 +878,7 @@ bool SystemZDAGToDAGISel::expandRxSBG(RxSBGOperands &RxSBG) const {
       return false;
 
     uint64_t Count = CountNode->getZExtValue();
-    unsigned BitSize = N.getValueType().getSizeInBits();
+    unsigned BitSize = N.getValueSizeInBits();
     if (Count < 1 || Count >= BitSize)
       return false;
 
@@ -935,49 +935,55 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
       Count += 1;
   if (Count == 0)
     return false;
-  if (Count == 1) {
-    // Prefer to use normal shift instructions over RISBG, since they can handle
-    // all cases and are sometimes shorter.
-    if (N->getOpcode() != ISD::AND)
-      return false;
 
-    // Prefer register extensions like LLC over RISBG.  Also prefer to start
-    // out with normal ANDs if one instruction would be enough.  We can convert
-    // these ANDs into an RISBG later if a three-address instruction is useful.
-    if (VT == MVT::i32 ||
-        RISBG.Mask == 0xff ||
-        RISBG.Mask == 0xffff ||
-        SystemZ::isImmLF(~RISBG.Mask) ||
-        SystemZ::isImmHF(~RISBG.Mask)) {
-      // Force the new mask into the DAG, since it may include known-one bits.
-      auto *MaskN = cast<ConstantSDNode>(N->getOperand(1).getNode());
-      if (MaskN->getZExtValue() != RISBG.Mask) {
-        SDValue NewMask = CurDAG->getConstant(RISBG.Mask, DL, VT);
-        N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewMask);
-        SelectCode(N);
-        return true;
-      }
-      return false;
-    }
-  }
+  // Prefer to use normal shift instructions over RISBG, since they can handle
+  // all cases and are sometimes shorter.
+  if (Count == 1 && N->getOpcode() != ISD::AND)
+    return false;
 
-  // If the RISBG operands require no rotation and just masks the bottom
-  // 8/16 bits, attempt to convert this to a LLC zero extension.
-  if (RISBG.Rotate == 0 && (RISBG.Mask == 0xff || RISBG.Mask == 0xffff)) {
-    unsigned OpCode = (RISBG.Mask == 0xff ? SystemZ::LLGCR : SystemZ::LLGHR);
-    if (VT == MVT::i32) {
-      if (Subtarget->hasHighWord())
-        OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCRMux : SystemZ::LLHRMux);
-      else
-        OpCode = (RISBG.Mask == 0xff ? SystemZ::LLCR : SystemZ::LLHR);
+  // Prefer register extensions like LLC over RISBG.  Also prefer to start
+  // out with normal ANDs if one instruction would be enough.  We can convert
+  // these ANDs into an RISBG later if a three-address instruction is useful.
+  if (RISBG.Rotate == 0) {
+    bool PreferAnd = false;
+    // Prefer AND for any 32-bit and-immediate operation.
+    if (VT == MVT::i32)
+      PreferAnd = true;
+    // As well as for any 64-bit operation that can be implemented via LLC(R),
+    // LLH(R), LLGT(R), or one of the and-immediate instructions.
+    else if (RISBG.Mask == 0xff ||
+             RISBG.Mask == 0xffff ||
+             RISBG.Mask == 0x7fffffff ||
+             SystemZ::isImmLF(~RISBG.Mask) ||
+             SystemZ::isImmHF(~RISBG.Mask))
+     PreferAnd = true;
+    // And likewise for the LLZRGF instruction, which doesn't have a register
+    // to register version.
+    else if (auto *Load = dyn_cast<LoadSDNode>(RISBG.Input)) {
+      if (Load->getMemoryVT() == MVT::i32 &&
+          (Load->getExtensionType() == ISD::EXTLOAD ||
+           Load->getExtensionType() == ISD::ZEXTLOAD) &&
+          RISBG.Mask == 0xffffff00 &&
+          Subtarget->hasLoadAndZeroRightmostByte())
+      PreferAnd = true;
+    }
+    if (PreferAnd) {
+      // Replace the current node with an AND.  Note that the current node
+      // might already be that same AND, in which case it is already CSE'd
+      // with it, and we must not call ReplaceNode.
+      SDValue In = convertTo(DL, VT, RISBG.Input);
+      SDValue Mask = CurDAG->getConstant(RISBG.Mask, DL, VT);
+      SDValue New = CurDAG->getNode(ISD::AND, DL, VT, In, Mask);
+      if (N != New.getNode()) {
+        insertDAGNode(CurDAG, N, Mask);
+        insertDAGNode(CurDAG, N, New);
+        ReplaceNode(N, New.getNode());
+        N = New.getNode();
+      }
+      // Now, select the machine opcode to implement this operation.
+      SelectCode(N);
+      return true;
     }
-
-    SDValue In = convertTo(DL, VT, RISBG.Input);
-    SDValue New = convertTo(
-        DL, VT, SDValue(CurDAG->getMachineNode(OpCode, DL, VT, In), 0));
-    ReplaceUses(N, New.getNode());
-    CurDAG->RemoveDeadNode(N);
-    return true;
   }
 
   unsigned Opcode = SystemZ::RISBG;
@@ -1136,8 +1142,7 @@ bool SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
   SDValue Value = Store->getValue();
   if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
     return false;
-  if (Store->getMemoryVT().getSizeInBits() !=
-      Value.getValueType().getSizeInBits())
+  if (Store->getMemoryVT().getSizeInBits() != Value.getValueSizeInBits())
     return false;
 
   SDValue ElemV = Value.getOperand(1);
@@ -1176,7 +1181,7 @@ bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
     return false;
 
   // There's no chance of overlap if the load is invariant.
-  if (Load->isInvariant())
+  if (Load->isInvariant() && Load->isDereferenceable())
     return true;
 
   // Otherwise we need to check whether there's an alias.
@@ -1265,7 +1270,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
     if (Node->getOperand(1).getOpcode() != ISD::Constant)
       if (tryRxSBG(Node, SystemZ::RNSBG))
         return;
-    // Fall through.
+    LLVM_FALLTHROUGH;
   case ISD::ROTL:
   case ISD::SHL:
   case ISD::SRL:
@@ -1291,8 +1296,14 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
     SDValue Op0 = Node->getOperand(0);
     SDValue Op1 = Node->getOperand(1);
     // Prefer to put any load first, so that it can be matched as a
-    // conditional load.
-    if (Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) {
+    // conditional load.  Likewise for constants in range for LOCHI.
+    if ((Op1.getOpcode() == ISD::LOAD && Op0.getOpcode() != ISD::LOAD) ||
+        (Subtarget->hasLoadStoreOnCond2() &&
+         Node->getValueType(0).isInteger() &&
+         Op1.getOpcode() == ISD::Constant &&
+         isInt<16>(cast<ConstantSDNode>(Op1)->getSExtValue()) &&
+         !(Op0.getOpcode() == ISD::Constant &&
+           isInt<16>(cast<ConstantSDNode>(Op0)->getSExtValue())))) {
       SDValue CCValid = Node->getOperand(2);
       SDValue CCMask = Node->getOperand(3);
       uint64_t ConstCCValid =
@@ -1310,7 +1321,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
 
   case ISD::INSERT_VECTOR_ELT: {
     EVT VT = Node->getValueType(0);
-    unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
+    unsigned ElemBitSize = VT.getScalarSizeInBits();
     if (ElemBitSize == 32) {
       if (tryGather(Node, SystemZ::VGEF))
         return;
@@ -1323,7 +1334,7 @@ void SystemZDAGToDAGISel::Select(SDNode *Node) {
 
   case ISD::STORE: {
     auto *Store = cast<StoreSDNode>(Node);
-    unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
+    unsigned ElemBitSize = Store->getValue().getValueSizeInBits();
     if (ElemBitSize == 32) {
       if (tryScatter(Store, SystemZ::VSCEF))
         return;
@@ -1375,6 +1386,29 @@ SelectInlineAsmMemoryOperand(const SDValue &Op,
   }
 
   if (selectBDXAddr(Form, DispRange, Op, Base, Disp, Index)) {
+    const TargetRegisterClass *TRC =
+      Subtarget->getRegisterInfo()->getPointerRegClass(*MF);
+    SDLoc DL(Base);
+    SDValue RC = CurDAG->getTargetConstant(TRC->getID(), DL, MVT::i32);
+
+    // Make sure that the base address doesn't go into %r0.
+    // If it's a TargetFrameIndex or a fixed register, we shouldn't do anything.
+    if (Base.getOpcode() != ISD::TargetFrameIndex &&
+        Base.getOpcode() != ISD::Register) {
+      Base =
+        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                       DL, Base.getValueType(),
+                                       Base, RC), 0);
+    }
+
+    // Make sure that the index register isn't assigned to %r0 either.
+    if (Index.getOpcode() != ISD::Register) {
+      Index =
+        SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
+                                       DL, Index.getValueType(),
+                                       Index, RC), 0);
+    }
+
     OutOps.push_back(Base);
     OutOps.push_back(Disp);
     OutOps.push_back(Index);
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 14991bbbd365..2081809def70 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -20,6 +20,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/IR/Intrinsics.h"
 #include <cctype>
 
@@ -531,6 +532,28 @@ bool SystemZTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   return AM.Scale == 0 || AM.Scale == 1;
 }
 
+bool SystemZTargetLowering::isFoldableMemAccessOffset(Instruction *I,
+                                                      int64_t Offset) const {
+  // This only applies to z13.
+  if (!Subtarget.hasVector())
+    return true;
+
+  // * Use LDE instead of LE/LEY to avoid partial register
+  //   dependencies (LDE only supports small offsets).
+  // * Utilize the vector registers to hold floating point
+  //   values (vector load / store instructions only support small
+  //   offsets).
+
+  assert (isa<LoadInst>(I) || isa<StoreInst>(I));
+  Type *MemAccessTy = (isa<LoadInst>(I) ? I->getType() :
+                       I->getOperand(0)->getType());
+  if (!isUInt<12>(Offset) &&
+      (MemAccessTy->isFloatingPointTy() || MemAccessTy->isVectorTy()))
+    return false;
+
+  return true;
+}
+
 bool SystemZTargetLowering::isTruncateFree(Type *FromType, Type *ToType) const {
   if (!FromType->isIntegerTy() || !ToType->isIntegerTy())
     return false;
@@ -864,7 +887,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   SystemZMachineFunctionInfo *FuncInfo =
       MF.getInfo<SystemZMachineFunctionInfo>();
@@ -927,8 +950,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
       assert(VA.isMemLoc() && "Argument not register or memory");
 
       // Create the frame index object for this incoming parameter.
-      int FI = MFI->CreateFixedObject(LocVT.getSizeInBits() / 8,
-                                      VA.getLocMemOffset(), true);
+      int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
+                                     VA.getLocMemOffset(), true);
 
       // Create the SelectionDAG nodes corresponding to a load
       // from this parameter.  Unpromoted ints and floats are
@@ -971,12 +994,12 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
     // Likewise the address (in the form of a frame index) of where the
     // first stack vararg would be.  The 1-byte size here is arbitrary.
     int64_t StackSize = CCInfo.getNextStackOffset();
-    FuncInfo->setVarArgsFrameIndex(MFI->CreateFixedObject(1, StackSize, true));
+    FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
 
     // ...and a similar frame index for the caller-allocated save area
     // that will be used to store the incoming registers.
     int64_t RegSaveOffset = TFL->getOffsetOfLocalArea();
-    unsigned RegSaveIndex = MFI->CreateFixedObject(1, RegSaveOffset, true);
+    unsigned RegSaveIndex = MFI.CreateFixedObject(1, RegSaveOffset, true);
     FuncInfo->setRegSaveFrameIndex(RegSaveIndex);
 
     // Store the FPR varargs in the reserved frame slots.  (We store the
@@ -985,7 +1008,7 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
       SDValue MemOps[SystemZ::NumArgFPRs];
       for (unsigned I = NumFixedFPRs; I < SystemZ::NumArgFPRs; ++I) {
         unsigned Offset = TFL->getRegSpillOffset(SystemZ::ArgFPRs[I]);
-        int FI = MFI->CreateFixedObject(8, RegSaveOffset + Offset, true);
+        int FI = MFI.CreateFixedObject(8, RegSaveOffset + Offset, true);
         SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
         unsigned VReg = MF.addLiveIn(SystemZ::ArgFPRs[I],
                                      &SystemZ::FP64BitRegClass);
@@ -1837,8 +1860,7 @@ static void adjustICmpTruncate(SelectionDAG &DAG, const SDLoc &DL,
       C.Op1.getOpcode() == ISD::Constant &&
       cast<ConstantSDNode>(C.Op1)->getZExtValue() == 0) {
     auto *L = cast<LoadSDNode>(C.Op0.getOperand(0));
-    if (L->getMemoryVT().getStoreSizeInBits()
-        <= C.Op0.getValueType().getSizeInBits()) {
+    if (L->getMemoryVT().getStoreSizeInBits() <= C.Op0.getValueSizeInBits()) {
       unsigned Type = L->getExtensionType();
       if ((Type == ISD::ZEXTLOAD && C.ICmpType != SystemZICMP::SignedOnly) ||
           (Type == ISD::SEXTLOAD && C.ICmpType != SystemZICMP::UnsignedOnly)) {
@@ -1857,7 +1879,7 @@ static bool isSimpleShift(SDValue N, unsigned &ShiftVal) {
     return false;
 
   uint64_t Amount = Shift->getZExtValue();
-  if (Amount >= N.getValueType().getSizeInBits())
+  if (Amount >= N.getValueSizeInBits())
     return false;
 
   ShiftVal = Amount;
@@ -2008,7 +2030,7 @@ static void adjustForTestUnderMask(SelectionDAG &DAG, const SDLoc &DL,
 
   // Check whether the combination of mask, comparison value and comparison
   // type are suitable.
-  unsigned BitSize = NewC.Op0.getValueType().getSizeInBits();
+  unsigned BitSize = NewC.Op0.getValueSizeInBits();
   unsigned NewCCMask, ShiftVal;
   if (NewC.ICmpType != SystemZICMP::SignedOnly &&
       NewC.Op0.getOpcode() == ISD::SHL &&
@@ -2542,16 +2564,15 @@ SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
 
 SDValue SystemZTargetLowering::lowerThreadPointer(const SDLoc &DL,
                                                   SelectionDAG &DAG) const {
+  SDValue Chain = DAG.getEntryNode();
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
 
   // The high part of the thread pointer is in access register 0.
-  SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                             DAG.getConstant(0, DL, MVT::i32));
+  SDValue TPHi = DAG.getCopyFromReg(Chain, DL, SystemZ::A0, MVT::i32);
   TPHi = DAG.getNode(ISD::ANY_EXTEND, DL, PtrVT, TPHi);
 
   // The low part of the thread pointer is in access register 1.
-  SDValue TPLo = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                             DAG.getConstant(1, DL, MVT::i32));
+  SDValue TPLo = DAG.getCopyFromReg(Chain, DL, SystemZ::A1, MVT::i32);
   TPLo = DAG.getNode(ISD::ZERO_EXTEND, DL, PtrVT, TPLo);
 
   // Merge them into a single 64-bit address.
@@ -2691,8 +2712,8 @@ SDValue SystemZTargetLowering::lowerConstantPool(ConstantPoolSDNode *CP,
 SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
                                               SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setFrameAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setFrameAddressIsTaken(true);
 
   SDLoc DL(Op);
   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
@@ -2703,7 +2724,7 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
   int BackChainIdx = FI->getFramePointerSaveIndex();
   if (!BackChainIdx) {
     // By definition, the frame address is the address of the back chain.
-    BackChainIdx = MFI->CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
+    BackChainIdx = MFI.CreateFixedObject(8, -SystemZMC::CallFrameSize, false);
     FI->setFramePointerSaveIndex(BackChainIdx);
   }
   SDValue BackChain = DAG.getFrameIndex(BackChainIdx, PtrVT);
@@ -2719,8 +2740,8 @@ SDValue SystemZTargetLowering::lowerFRAMEADDR(SDValue Op,
 SDValue SystemZTargetLowering::lowerRETURNADDR(SDValue Op,
                                                SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -3080,7 +3101,7 @@ SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
   if (VT.isVector()) {
     Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
     Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
-    switch (VT.getVectorElementType().getSizeInBits()) {
+    switch (VT.getScalarSizeInBits()) {
     case 8:
       break;
     case 16: {
@@ -3288,8 +3309,7 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op,
     if (NegSrc2.getNode())
       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, MemVT,
                            Node->getChain(), Node->getBasePtr(), NegSrc2,
-                           Node->getMemOperand(), Node->getOrdering(),
-                           Node->getSynchScope());
+                           Node->getMemOperand());
 
     // Use the node as-is.
     return Op;
@@ -4355,7 +4375,7 @@ SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
   }
 
   // Otherwise bitcast to the equivalent integer form and insert via a GPR.
-  MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
+  MVT IntVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
   MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
   SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
                             DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
@@ -4395,8 +4415,8 @@ SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
   SDValue PackedOp = Op.getOperand(0);
   EVT OutVT = Op.getValueType();
   EVT InVT = PackedOp.getValueType();
-  unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
-  unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
+  unsigned ToBits = OutVT.getScalarSizeInBits();
+  unsigned FromBits = InVT.getScalarSizeInBits();
   do {
     FromBits *= 2;
     EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
@@ -4413,7 +4433,7 @@ SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
   SDValue Op1 = Op.getOperand(1);
   SDLoc DL(Op);
   EVT VT = Op.getValueType();
-  unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
+  unsigned ElemBitSize = VT.getScalarSizeInBits();
 
   // See whether the shift vector is a splat represented as BUILD_VECTOR.
   if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
@@ -4591,7 +4611,6 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
     OPCODE(BR_CCMASK);
     OPCODE(SELECT_CCMASK);
     OPCODE(ADJDYNALLOC);
-    OPCODE(EXTRACT_ACCESS);
     OPCODE(POPCNT);
     OPCODE(UMUL_LOHI64);
     OPCODE(SDIVREM32);
@@ -4687,7 +4706,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
 // Return true if VT is a vector whose elements are a whole number of bytes
 // in width.
 static bool canTreatAsByteVector(EVT VT) {
-  return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
+  return VT.isVector() && VT.getScalarSizeInBits() % 8 == 0;
 }
 
 // Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
@@ -4748,7 +4767,7 @@ SDValue SystemZTargetLowering::combineExtract(const SDLoc &DL, EVT ResVT,
       // We're extracting the low part of one operand of the BUILD_VECTOR.
       Op = Op.getOperand(End / OpBytesPerElement - 1);
       if (!Op.getValueType().isInteger()) {
-        EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
+        EVT VT = MVT::getIntegerVT(Op.getValueSizeInBits());
         Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
         DCI.AddToWorklist(Op.getNode());
       }
@@ -4848,8 +4867,7 @@ SDValue SystemZTargetLowering::combineSIGN_EXTEND(
     SDValue Inner = N0.getOperand(0);
     if (SraAmt && Inner.hasOneUse() && Inner.getOpcode() == ISD::SHL) {
       if (auto *ShlAmt = dyn_cast<ConstantSDNode>(Inner.getOperand(1))) {
-        unsigned Extra = (VT.getSizeInBits() -
-                          N0.getValueType().getSizeInBits());
+        unsigned Extra = (VT.getSizeInBits() - N0.getValueSizeInBits());
         unsigned NewShlAmt = ShlAmt->getZExtValue() + Extra;
         unsigned NewSraAmt = SraAmt->getZExtValue() + Extra;
         EVT ShiftVT = N0.getOperand(1).getValueType();
@@ -4972,8 +4990,8 @@ SDValue SystemZTargetLowering::combineJOIN_DWORDS(
 
 SDValue SystemZTargetLowering::combineFP_ROUND(
     SDNode *N, DAGCombinerInfo &DCI) const {
-  // (fround (extract_vector_elt X 0))
-  // (fround (extract_vector_elt X 1)) ->
+  // (fpround (extract_vector_elt X 0))
+  // (fpround (extract_vector_elt X 1)) ->
   // (extract_vector_elt (VROUND X) 0)
   // (extract_vector_elt (VROUND X) 1)
   //
@@ -5070,14 +5088,20 @@ SDValue SystemZTargetLowering::combineSHIFTROT(
   // Shift/rotate instructions only use the last 6 bits of the second operand
   // register. If the second operand is the result of an AND with an immediate
   // value that has its last 6 bits set, we can safely remove the AND operation.
+  //
+  // If the AND operation doesn't have the last 6 bits set, we can't remove it
+  // entirely, but we can still truncate it to a 16-bit value. This prevents
+  // us from ending up with a NILL with a signed operand, which will cause the
+  // instruction printer to abort.
   SDValue N1 = N->getOperand(1);
   if (N1.getOpcode() == ISD::AND) {
-    auto *AndMask = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    SDValue AndMaskOp = N1->getOperand(1);
+    auto *AndMask = dyn_cast<ConstantSDNode>(AndMaskOp);
 
     // The AND mask is constant
     if (AndMask) {
       auto AmtVal = AndMask->getZExtValue();
-
+      
       // Bottom 6 bits are set
       if ((AmtVal & 0x3f) == 0x3f) {
         SDValue AndOp = N1->getOperand(0);
@@ -5099,6 +5123,26 @@ SDValue SystemZTargetLowering::combineSHIFTROT(
 
           return Replace;
         }
+
+      // We can't remove the AND, but we can use NILL here (normally we would
+      // use NILF). Only keep the last 16 bits of the mask. The actual
+      // transformation will be handled by .td definitions.
+      } else if (AmtVal >> 16 != 0) {
+        SDValue AndOp = N1->getOperand(0);
+
+        auto NewMask = DAG.getConstant(AndMask->getZExtValue() & 0x0000ffff,
+                                       SDLoc(AndMaskOp),
+                                       AndMaskOp.getValueType());
+
+        auto NewAnd = DAG.getNode(N1.getOpcode(), SDLoc(N1), N1.getValueType(),
+                                  AndOp, NewMask);
+
+        SDValue Replace = DAG.getNode(N->getOpcode(), SDLoc(N),
+                                      N->getValueType(0), N->getOperand(0),
+                                      NewAnd);
+        DCI.AddToWorklist(Replace.getNode());
+
+        return Replace;
       }
     }
   }
@@ -5180,7 +5224,8 @@ static unsigned forceReg(MachineInstr &MI, MachineOperand &Base,
 // Implement EmitInstrWithCustomInserter for pseudo Select* instruction MI.
 MachineBasicBlock *
 SystemZTargetLowering::emitSelect(MachineInstr &MI,
-                                  MachineBasicBlock *MBB) const {
+                                  MachineBasicBlock *MBB,
+                                  unsigned LOCROpcode) const {
   const SystemZInstrInfo *TII =
       static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
 
@@ -5191,6 +5236,15 @@ SystemZTargetLowering::emitSelect(MachineInstr &MI,
   unsigned CCMask = MI.getOperand(4).getImm();
   DebugLoc DL = MI.getDebugLoc();
 
+  // Use LOCROpcode if possible.
+  if (LOCROpcode && Subtarget.hasLoadStoreOnCond()) {
+    BuildMI(*MBB, MI, DL, TII->get(LOCROpcode), DestReg)
+      .addReg(FalseReg).addReg(TrueReg)
+      .addImm(CCValid).addImm(CCMask);
+    MI.eraseFromParent();
+    return MBB;
+  }
+
   MachineBasicBlock *StartMBB = MBB;
   MachineBasicBlock *JoinMBB  = splitBlockBefore(MI, MBB);
   MachineBasicBlock *FalseMBB = emitBlockAfter(StartMBB);
@@ -5976,12 +6030,16 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     MachineInstr &MI, MachineBasicBlock *MBB) const {
   switch (MI.getOpcode()) {
   case SystemZ::Select32Mux:
+    return emitSelect(MI, MBB,
+                      Subtarget.hasLoadStoreOnCond2()? SystemZ::LOCRMux : 0);
   case SystemZ::Select32:
-  case SystemZ::SelectF32:
+    return emitSelect(MI, MBB, SystemZ::LOCR);
   case SystemZ::Select64:
+    return emitSelect(MI, MBB, SystemZ::LOCGR);
+  case SystemZ::SelectF32:
   case SystemZ::SelectF64:
   case SystemZ::SelectF128:
-    return emitSelect(MI, MBB);
+    return emitSelect(MI, MBB, 0);
 
   case SystemZ::CondStore8Mux:
     return emitCondStore(MI, MBB, SystemZ::STCMux, 0, false);
@@ -5991,6 +6049,10 @@ MachineBasicBlock *SystemZTargetLowering::EmitInstrWithCustomInserter(
     return emitCondStore(MI, MBB, SystemZ::STHMux, 0, false);
   case SystemZ::CondStore16MuxInv:
     return emitCondStore(MI, MBB, SystemZ::STHMux, 0, true);
+  case SystemZ::CondStore32Mux:
+    return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, false);
+  case SystemZ::CondStore32MuxInv:
+    return emitCondStore(MI, MBB, SystemZ::STMux, SystemZ::STOCMux, true);
   case SystemZ::CondStore8:
     return emitCondStore(MI, MBB, SystemZ::STC, 0, false);
   case SystemZ::CondStore8Inv:
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index b1de8936beed..7a21a474c119 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -83,10 +83,6 @@ enum NodeType : unsigned {
   // base of the dynamically-allocatable area.
   ADJDYNALLOC,
 
-  // Extracts the value of a 32-bit access register.  Operand 0 is
-  // the number of the register.
-  EXTRACT_ACCESS,
-
   // Count number of bits set in operand 0 per byte.
   POPCNT,
 
@@ -382,7 +378,7 @@ public:
     //
     // (c) there are no multiplication instructions for the widest integer
     //     type (v2i64).
-    if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
+    if (VT.getScalarSizeInBits() % 8 == 0)
       return TypeWidenVector;
     return TargetLoweringBase::getPreferredVectorAction(VT);
   }
@@ -394,6 +390,7 @@ public:
   bool isLegalAddImmediate(int64_t Imm) const override;
   bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty,
                              unsigned AS) const override;
+  bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const override;
   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
                                       unsigned Align,
                                       bool *Fast) const override;
@@ -564,7 +561,8 @@ private:
                                   MachineBasicBlock *Target) const;
 
   // Implement EmitInstrWithCustomInserter for individual operation types.
-  MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB) const;
+  MachineBasicBlock *emitSelect(MachineInstr &MI, MachineBasicBlock *BB,
+                                unsigned LOCROpcode) const;
   MachineBasicBlock *emitCondStore(MachineInstr &MI, MachineBasicBlock *BB,
                                    unsigned StoreOpcode, unsigned STOCOpcode,
                                    bool Invert) const;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
index 2cb8aba1b322..896b665d25eb 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrBuilder.h
@@ -27,7 +27,7 @@ static inline const MachineInstrBuilder &
 addFrameReference(const MachineInstrBuilder &MIB, int FI) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
-  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
   const MCInstrDesc &MCID = MI->getDesc();
   auto Flags = MachineMemOperand::MONone;
   if (MCID.mayLoad())
@@ -37,7 +37,7 @@ addFrameReference(const MachineInstrBuilder &MIB, int FI) {
   int64_t Offset = 0;
   MachineMemOperand *MMO = MF.getMachineMemOperand(
       MachinePointerInfo::getFixedStack(MF, FI, Offset), Flags,
-      MFFrame->getObjectSize(FI), MFFrame->getObjectAlignment(FI));
+      MFFrame.getObjectSize(FI), MFFrame.getObjectAlignment(FI));
   return MIB.addFrameIndex(FI).addImm(Offset).addReg(0).addMemOperand(MMO);
 }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
index 8b32047e08e3..bb6d27e24828 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td
@@ -27,28 +27,28 @@ defm CondStoreF64 : CondStores<FP64, nonvolatile_store,
 
 // Load zero.
 let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1 in {
-  def LZER : InherentRRE<"lzer", 0xB374, FP32,  (fpimm0)>;
-  def LZDR : InherentRRE<"lzdr", 0xB375, FP64,  (fpimm0)>;
-  def LZXR : InherentRRE<"lzxr", 0xB376, FP128, (fpimm0)>;
+  def LZER : InherentRRE<"lzer", 0xB374, FP32,  fpimm0>;
+  def LZDR : InherentRRE<"lzdr", 0xB375, FP64,  fpimm0>;
+  def LZXR : InherentRRE<"lzxr", 0xB376, FP128, fpimm0>;
 }
 
 // Moves between two floating-point registers.
 let hasSideEffects = 0 in {
-  def LER : UnaryRR <"le", 0x38,   null_frag, FP32,  FP32>;
-  def LDR : UnaryRR <"ld", 0x28,   null_frag, FP64,  FP64>;
-  def LXR : UnaryRRE<"lx", 0xB365, null_frag, FP128, FP128>;
+  def LER : UnaryRR <"ler", 0x38,   null_frag, FP32,  FP32>;
+  def LDR : UnaryRR <"ldr", 0x28,   null_frag, FP64,  FP64>;
+  def LXR : UnaryRRE<"lxr", 0xB365, null_frag, FP128, FP128>;
 
   // For z13 we prefer LDR over LER to avoid partial register dependencies.
   let isCodeGenOnly = 1 in
-    def LDR32 : UnaryRR<"ld", 0x28, null_frag, FP32, FP32>;
+    def LDR32 : UnaryRR<"ldr", 0x28, null_frag, FP32, FP32>;
 }
 
 // Moves between two floating-point registers that also set the condition
 // codes.
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  defm LTEBR : LoadAndTestRRE<"lteb", 0xB302, FP32>;
-  defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
-  defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
+  defm LTEBR : LoadAndTestRRE<"ltebr", 0xB302, FP32>;
+  defm LTDBR : LoadAndTestRRE<"ltdbr", 0xB312, FP64>;
+  defm LTXBR : LoadAndTestRRE<"ltxbr", 0xB342, FP128>;
 }
 // Note that LTxBRCompare is not available if we have vector support,
 // since load-and-test instructions will partially clobber the target
@@ -73,13 +73,13 @@ let Predicates = [FeatureVector] in {
 }
 
 // Moves between 64-bit integer and floating-point registers.
-def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
-def LDGR : UnaryRRE<"ldg", 0xB3C1, bitconvert, FP64, GR64>;
+def LGDR : UnaryRRE<"lgdr", 0xB3CD, bitconvert, GR64, FP64>;
+def LDGR : UnaryRRE<"ldgr", 0xB3C1, bitconvert, FP64, GR64>;
 
 // fcopysign with an FP32 result.
 let isCodeGenOnly = 1 in {
-  def CPSDRss : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP32>;
-  def CPSDRsd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP32, FP64>;
+  def CPSDRss : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP32>;
+  def CPSDRsd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP32, FP32, FP64>;
 }
 
 // The sign of an FP128 is in the high register.
@@ -88,8 +88,8 @@ def : Pat<(fcopysign FP32:$src1, FP128:$src2),
 
 // fcopysign with an FP64 result.
 let isCodeGenOnly = 1 in
-  def CPSDRds : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP32>;
-def CPSDRdd : BinaryRRF<"cpsd", 0xB372, fcopysign, FP64, FP64>;
+  def CPSDRds : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP32>;
+def CPSDRdd : BinaryRRFb<"cpsdr", 0xB372, fcopysign, FP64, FP64, FP64>;
 
 // The sign of an FP128 is in the high register.
 def : Pat<(fcopysign FP64:$src1, FP128:$src2),
@@ -154,26 +154,26 @@ let SimpleBDXStore = 1 in {
 // Convert floating-point values to narrower representations, rounding
 // according to the current mode.  The destination of LEXBR and LDXBR
 // is a 128-bit value, but only the first register of the pair is used.
-def LEDBR : UnaryRRE<"ledb", 0xB344, fround,    FP32,  FP64>;
-def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>;
-def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>;
+def LEDBR : UnaryRRE<"ledbr", 0xB344, fpround,    FP32,  FP64>;
+def LEXBR : UnaryRRE<"lexbr", 0xB346, null_frag, FP128, FP128>;
+def LDXBR : UnaryRRE<"ldxbr", 0xB345, null_frag, FP128, FP128>;
 
-def LEDBRA : UnaryRRF4<"ledbra", 0xB344, FP32,  FP64>,
+def LEDBRA : TernaryRRFe<"ledbra", 0xB344, FP32,  FP64>,
              Requires<[FeatureFPExtension]>;
-def LEXBRA : UnaryRRF4<"lexbra", 0xB346, FP128, FP128>,
+def LEXBRA : TernaryRRFe<"lexbra", 0xB346, FP128, FP128>,
              Requires<[FeatureFPExtension]>;
-def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>,
+def LDXBRA : TernaryRRFe<"ldxbra", 0xB345, FP128, FP128>,
              Requires<[FeatureFPExtension]>;
 
-def : Pat<(f32 (fround FP128:$src)),
+def : Pat<(f32 (fpround FP128:$src)),
           (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
-def : Pat<(f64 (fround FP128:$src)),
+def : Pat<(f64 (fpround FP128:$src)),
           (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
 
 // Extend register floating-point values to wider representations.
-def LDEBR : UnaryRRE<"ldeb", 0xB304, fextend, FP64,  FP32>;
-def LXEBR : UnaryRRE<"lxeb", 0xB306, fextend, FP128, FP32>;
-def LXDBR : UnaryRRE<"lxdb", 0xB305, fextend, FP128, FP64>;
+def LDEBR : UnaryRRE<"ldebr", 0xB304, fpextend, FP64,  FP32>;
+def LXEBR : UnaryRRE<"lxebr", 0xB306, fpextend, FP128, FP32>;
+def LXDBR : UnaryRRE<"lxdbr", 0xB305, fpextend, FP128, FP64>;
 
 // Extend memory floating-point values to wider representations.
 def LDEB : UnaryRXE<"ldeb", 0xED04, extloadf32, FP64,  4>;
@@ -181,23 +181,35 @@ def LXEB : UnaryRXE<"lxeb", 0xED06, extloadf32, FP128, 4>;
 def LXDB : UnaryRXE<"lxdb", 0xED05, extloadf64, FP128, 8>;
 
 // Convert a signed integer register value to a floating-point one.
-def CEFBR : UnaryRRE<"cefb", 0xB394, sint_to_fp, FP32,  GR32>;
-def CDFBR : UnaryRRE<"cdfb", 0xB395, sint_to_fp, FP64,  GR32>;
-def CXFBR : UnaryRRE<"cxfb", 0xB396, sint_to_fp, FP128, GR32>;
+def CEFBR : UnaryRRE<"cefbr", 0xB394, sint_to_fp, FP32,  GR32>;
+def CDFBR : UnaryRRE<"cdfbr", 0xB395, sint_to_fp, FP64,  GR32>;
+def CXFBR : UnaryRRE<"cxfbr", 0xB396, sint_to_fp, FP128, GR32>;
 
-def CEGBR : UnaryRRE<"cegb", 0xB3A4, sint_to_fp, FP32,  GR64>;
-def CDGBR : UnaryRRE<"cdgb", 0xB3A5, sint_to_fp, FP64,  GR64>;
-def CXGBR : UnaryRRE<"cxgb", 0xB3A6, sint_to_fp, FP128, GR64>;
+def CEGBR : UnaryRRE<"cegbr", 0xB3A4, sint_to_fp, FP32,  GR64>;
+def CDGBR : UnaryRRE<"cdgbr", 0xB3A5, sint_to_fp, FP64,  GR64>;
+def CXGBR : UnaryRRE<"cxgbr", 0xB3A6, sint_to_fp, FP128, GR64>;
+
+// The FP extension feature provides versions of the above that allow
+// specifying rounding mode and inexact-exception suppression flags.
+let Predicates = [FeatureFPExtension] in {
+  def CEFBRA : TernaryRRFe<"cefbra", 0xB394, FP32,  GR32>;
+  def CDFBRA : TernaryRRFe<"cdfbra", 0xB395, FP64,  GR32>;
+  def CXFBRA : TernaryRRFe<"cxfbra", 0xB396, FP128, GR32>;
+
+  def CEGBRA : TernaryRRFe<"cegbra", 0xB3A4, FP32,  GR64>;
+  def CDGBRA : TernaryRRFe<"cdgbra", 0xB3A5, FP64,  GR64>;
+  def CXGBRA : TernaryRRFe<"cxgbra", 0xB3A6, FP128, GR64>;
+}
 
 // Convert am unsigned integer register value to a floating-point one.
 let Predicates = [FeatureFPExtension] in {
-  def CELFBR : UnaryRRF4<"celfbr", 0xB390, FP32,  GR32>;
-  def CDLFBR : UnaryRRF4<"cdlfbr", 0xB391, FP64,  GR32>;
-  def CXLFBR : UnaryRRF4<"cxlfbr", 0xB392, FP128, GR32>;
+  def CELFBR : TernaryRRFe<"celfbr", 0xB390, FP32,  GR32>;
+  def CDLFBR : TernaryRRFe<"cdlfbr", 0xB391, FP64,  GR32>;
+  def CXLFBR : TernaryRRFe<"cxlfbr", 0xB392, FP128, GR32>;
 
-  def CELGBR : UnaryRRF4<"celgbr", 0xB3A0, FP32,  GR64>;
-  def CDLGBR : UnaryRRF4<"cdlgbr", 0xB3A1, FP64,  GR64>;
-  def CXLGBR : UnaryRRF4<"cxlgbr", 0xB3A2, FP128, GR64>;
+  def CELGBR : TernaryRRFe<"celgbr", 0xB3A0, FP32,  GR64>;
+  def CDLGBR : TernaryRRFe<"cdlgbr", 0xB3A1, FP64,  GR64>;
+  def CXLGBR : TernaryRRFe<"cxlgbr", 0xB3A2, FP128, GR64>;
 
   def : Pat<(f32  (uint_to_fp GR32:$src)), (CELFBR 0, GR32:$src, 0)>;
   def : Pat<(f64  (uint_to_fp GR32:$src)), (CDLFBR 0, GR32:$src, 0)>;
@@ -211,13 +223,13 @@ let Predicates = [FeatureFPExtension] in {
 // Convert a floating-point register value to a signed integer value,
 // with the second operand (modifier M3) specifying the rounding mode.
 let Defs = [CC] in {
-  def CFEBR : UnaryRRF<"cfeb", 0xB398, GR32, FP32>;
-  def CFDBR : UnaryRRF<"cfdb", 0xB399, GR32, FP64>;
-  def CFXBR : UnaryRRF<"cfxb", 0xB39A, GR32, FP128>;
+  def CFEBR : BinaryRRFe<"cfebr", 0xB398, GR32, FP32>;
+  def CFDBR : BinaryRRFe<"cfdbr", 0xB399, GR32, FP64>;
+  def CFXBR : BinaryRRFe<"cfxbr", 0xB39A, GR32, FP128>;
 
-  def CGEBR : UnaryRRF<"cgeb", 0xB3A8, GR64, FP32>;
-  def CGDBR : UnaryRRF<"cgdb", 0xB3A9, GR64, FP64>;
-  def CGXBR : UnaryRRF<"cgxb", 0xB3AA, GR64, FP128>;
+  def CGEBR : BinaryRRFe<"cgebr", 0xB3A8, GR64, FP32>;
+  def CGDBR : BinaryRRFe<"cgdbr", 0xB3A9, GR64, FP64>;
+  def CGXBR : BinaryRRFe<"cgxbr", 0xB3AA, GR64, FP128>;
 }
 
 // fp_to_sint always rounds towards zero, which is modifier value 5.
@@ -229,16 +241,28 @@ def : Pat<(i64 (fp_to_sint FP32:$src)),  (CGEBR 5, FP32:$src)>;
 def : Pat<(i64 (fp_to_sint FP64:$src)),  (CGDBR 5, FP64:$src)>;
 def : Pat<(i64 (fp_to_sint FP128:$src)), (CGXBR 5, FP128:$src)>;
 
+// The FP extension feature provides versions of the above that allow
+// also specifying the inexact-exception suppression flag.
+let Predicates = [FeatureFPExtension], Defs = [CC] in {
+  def CFEBRA : TernaryRRFe<"cfebra", 0xB398, GR32, FP32>;
+  def CFDBRA : TernaryRRFe<"cfdbra", 0xB399, GR32, FP64>;
+  def CFXBRA : TernaryRRFe<"cfxbra", 0xB39A, GR32, FP128>;
+
+  def CGEBRA : TernaryRRFe<"cgebra", 0xB3A8, GR64, FP32>;
+  def CGDBRA : TernaryRRFe<"cgdbra", 0xB3A9, GR64, FP64>;
+  def CGXBRA : TernaryRRFe<"cgxbra", 0xB3AA, GR64, FP128>;
+}
+
 // Convert a floating-point register value to an unsigned integer value.
 let Predicates = [FeatureFPExtension] in {
   let Defs = [CC] in {
-    def CLFEBR : UnaryRRF4<"clfebr", 0xB39C, GR32, FP32>;
-    def CLFDBR : UnaryRRF4<"clfdbr", 0xB39D, GR32, FP64>;
-    def CLFXBR : UnaryRRF4<"clfxbr", 0xB39E, GR32, FP128>;
+    def CLFEBR : TernaryRRFe<"clfebr", 0xB39C, GR32, FP32>;
+    def CLFDBR : TernaryRRFe<"clfdbr", 0xB39D, GR32, FP64>;
+    def CLFXBR : TernaryRRFe<"clfxbr", 0xB39E, GR32, FP128>;
 
-    def CLGEBR : UnaryRRF4<"clgebr", 0xB3AC, GR64, FP32>;
-    def CLGDBR : UnaryRRF4<"clgdbr", 0xB3AD, GR64, FP64>;
-    def CLGXBR : UnaryRRF4<"clgxbr", 0xB3AE, GR64, FP128>;
+    def CLGEBR : TernaryRRFe<"clgebr", 0xB3AC, GR64, FP32>;
+    def CLGDBR : TernaryRRFe<"clgdbr", 0xB3AD, GR64, FP64>;
+    def CLGXBR : TernaryRRFe<"clgxbr", 0xB3AE, GR64, FP128>;
   }
 
   def : Pat<(i32 (fp_to_uint FP32:$src)),  (CLFEBR 5, FP32:$src,  0)>;
@@ -265,50 +289,50 @@ let Predicates = [FeatureFPExtension] in {
 
 // Negation (Load Complement).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LCEBR : UnaryRRE<"lceb", 0xB303, null_frag, FP32,  FP32>;
-  def LCDBR : UnaryRRE<"lcdb", 0xB313, null_frag, FP64,  FP64>;
-  def LCXBR : UnaryRRE<"lcxb", 0xB343, fneg, FP128, FP128>;
+  def LCEBR : UnaryRRE<"lcebr", 0xB303, null_frag, FP32,  FP32>;
+  def LCDBR : UnaryRRE<"lcdbr", 0xB313, null_frag, FP64,  FP64>;
+  def LCXBR : UnaryRRE<"lcxbr", 0xB343, fneg, FP128, FP128>;
 }
 // Generic form, which does not set CC.
-def LCDFR : UnaryRRE<"lcdf", 0xB373, fneg, FP64,  FP64>;
+def LCDFR : UnaryRRE<"lcdfr", 0xB373, fneg, FP64,  FP64>;
 let isCodeGenOnly = 1 in
-  def LCDFR_32 : UnaryRRE<"lcdf", 0xB373, fneg, FP32,  FP32>;
+  def LCDFR_32 : UnaryRRE<"lcdfr", 0xB373, fneg, FP32,  FP32>;
 
 // Absolute value (Load Positive).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LPEBR : UnaryRRE<"lpeb", 0xB300, null_frag, FP32,  FP32>;
-  def LPDBR : UnaryRRE<"lpdb", 0xB310, null_frag, FP64,  FP64>;
-  def LPXBR : UnaryRRE<"lpxb", 0xB340, fabs, FP128, FP128>;
+  def LPEBR : UnaryRRE<"lpebr", 0xB300, null_frag, FP32,  FP32>;
+  def LPDBR : UnaryRRE<"lpdbr", 0xB310, null_frag, FP64,  FP64>;
+  def LPXBR : UnaryRRE<"lpxbr", 0xB340, fabs, FP128, FP128>;
 }
 // Generic form, which does not set CC.
-def LPDFR : UnaryRRE<"lpdf", 0xB370, fabs, FP64,  FP64>;
+def LPDFR : UnaryRRE<"lpdfr", 0xB370, fabs, FP64,  FP64>;
 let isCodeGenOnly = 1 in
-  def LPDFR_32 : UnaryRRE<"lpdf", 0xB370, fabs, FP32,  FP32>;
+  def LPDFR_32 : UnaryRRE<"lpdfr", 0xB370, fabs, FP32,  FP32>;
 
 // Negative absolute value (Load Negative).
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def LNEBR : UnaryRRE<"lneb", 0xB301, null_frag, FP32,  FP32>;
-  def LNDBR : UnaryRRE<"lndb", 0xB311, null_frag, FP64,  FP64>;
-  def LNXBR : UnaryRRE<"lnxb", 0xB341, fnabs, FP128, FP128>;
+  def LNEBR : UnaryRRE<"lnebr", 0xB301, null_frag, FP32,  FP32>;
+  def LNDBR : UnaryRRE<"lndbr", 0xB311, null_frag, FP64,  FP64>;
+  def LNXBR : UnaryRRE<"lnxbr", 0xB341, fnabs, FP128, FP128>;
 }
 // Generic form, which does not set CC.
-def LNDFR : UnaryRRE<"lndf", 0xB371, fnabs, FP64,  FP64>;
+def LNDFR : UnaryRRE<"lndfr", 0xB371, fnabs, FP64,  FP64>;
 let isCodeGenOnly = 1 in
-  def LNDFR_32 : UnaryRRE<"lndf", 0xB371, fnabs, FP32,  FP32>;
+  def LNDFR_32 : UnaryRRE<"lndfr", 0xB371, fnabs, FP32,  FP32>;
 
 // Square root.
-def SQEBR : UnaryRRE<"sqeb", 0xB314, fsqrt, FP32,  FP32>;
-def SQDBR : UnaryRRE<"sqdb", 0xB315, fsqrt, FP64,  FP64>;
-def SQXBR : UnaryRRE<"sqxb", 0xB316, fsqrt, FP128, FP128>;
+def SQEBR : UnaryRRE<"sqebr", 0xB314, fsqrt, FP32,  FP32>;
+def SQDBR : UnaryRRE<"sqdbr", 0xB315, fsqrt, FP64,  FP64>;
+def SQXBR : UnaryRRE<"sqxbr", 0xB316, fsqrt, FP128, FP128>;
 
 def SQEB : UnaryRXE<"sqeb", 0xED14, loadu<fsqrt>, FP32, 4>;
 def SQDB : UnaryRXE<"sqdb", 0xED15, loadu<fsqrt>, FP64, 8>;
 
 // Round to an integer, with the second operand (modifier M3) specifying
 // the rounding mode.  These forms always check for inexact conditions.
-def FIEBR : UnaryRRF<"fieb", 0xB357, FP32,  FP32>;
-def FIDBR : UnaryRRF<"fidb", 0xB35F, FP64,  FP64>;
-def FIXBR : UnaryRRF<"fixb", 0xB347, FP128, FP128>;
+def FIEBR : BinaryRRFe<"fiebr", 0xB357, FP32,  FP32>;
+def FIDBR : BinaryRRFe<"fidbr", 0xB35F, FP64,  FP64>;
+def FIXBR : BinaryRRFe<"fixbr", 0xB347, FP128, FP128>;
 
 // frint rounds according to the current mode (modifier 0) and detects
 // inexact conditions.
@@ -319,9 +343,9 @@ def : Pat<(frint FP128:$src), (FIXBR 0, FP128:$src)>;
 let Predicates = [FeatureFPExtension] in {
   // Extended forms of the FIxBR instructions.  M4 can be set to 4
   // to suppress detection of inexact conditions.
-  def FIEBRA : UnaryRRF4<"fiebra", 0xB357, FP32,  FP32>;
-  def FIDBRA : UnaryRRF4<"fidbra", 0xB35F, FP64,  FP64>;
-  def FIXBRA : UnaryRRF4<"fixbra", 0xB347, FP128, FP128>;
+  def FIEBRA : TernaryRRFe<"fiebra", 0xB357, FP32,  FP32>;
+  def FIDBRA : TernaryRRFe<"fidbra", 0xB35F, FP64,  FP64>;
+  def FIXBRA : TernaryRRFe<"fixbra", 0xB347, FP128, FP128>;
 
   // fnearbyint is like frint but does not detect inexact conditions.
   def : Pat<(fnearbyint FP32:$src),  (FIEBRA 0, FP32:$src,  4)>;
@@ -347,9 +371,9 @@ let Predicates = [FeatureFPExtension] in {
 
   // Same idea for round, where mode 1 is round towards nearest with
   // ties away from zero.
-  def : Pat<(frnd FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
-  def : Pat<(frnd FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
-  def : Pat<(frnd FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
+  def : Pat<(fround FP32:$src),  (FIEBRA 1, FP32:$src,  4)>;
+  def : Pat<(fround FP64:$src),  (FIDBRA 1, FP64:$src,  4)>;
+  def : Pat<(fround FP128:$src), (FIXBRA 1, FP128:$src, 4)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -359,9 +383,9 @@ let Predicates = [FeatureFPExtension] in {
 // Addition.
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
   let isCommutable = 1 in {
-    def AEBR : BinaryRRE<"aeb", 0xB30A, fadd, FP32,  FP32>;
-    def ADBR : BinaryRRE<"adb", 0xB31A, fadd, FP64,  FP64>;
-    def AXBR : BinaryRRE<"axb", 0xB34A, fadd, FP128, FP128>;
+    def AEBR : BinaryRRE<"aebr", 0xB30A, fadd, FP32,  FP32>;
+    def ADBR : BinaryRRE<"adbr", 0xB31A, fadd, FP64,  FP64>;
+    def AXBR : BinaryRRE<"axbr", 0xB34A, fadd, FP128, FP128>;
   }
   def AEB : BinaryRXE<"aeb", 0xED0A, fadd, FP32, load, 4>;
   def ADB : BinaryRXE<"adb", 0xED1A, fadd, FP64, load, 8>;
@@ -369,9 +393,9 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
 
 // Subtraction.
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
-  def SEBR : BinaryRRE<"seb", 0xB30B, fsub, FP32,  FP32>;
-  def SDBR : BinaryRRE<"sdb", 0xB31B, fsub, FP64,  FP64>;
-  def SXBR : BinaryRRE<"sxb", 0xB34B, fsub, FP128, FP128>;
+  def SEBR : BinaryRRE<"sebr", 0xB30B, fsub, FP32,  FP32>;
+  def SDBR : BinaryRRE<"sdbr", 0xB31B, fsub, FP64,  FP64>;
+  def SXBR : BinaryRRE<"sxbr", 0xB34B, fsub, FP128, FP128>;
 
   def SEB : BinaryRXE<"seb",  0xED0B, fsub, FP32, load, 4>;
   def SDB : BinaryRXE<"sdb",  0xED1B, fsub, FP64, load, 8>;
@@ -379,57 +403,57 @@ let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0xF in {
 
 // Multiplication.
 let isCommutable = 1 in {
-  def MEEBR : BinaryRRE<"meeb", 0xB317, fmul, FP32,  FP32>;
-  def MDBR  : BinaryRRE<"mdb",  0xB31C, fmul, FP64,  FP64>;
-  def MXBR  : BinaryRRE<"mxb",  0xB34C, fmul, FP128, FP128>;
+  def MEEBR : BinaryRRE<"meebr", 0xB317, fmul, FP32,  FP32>;
+  def MDBR  : BinaryRRE<"mdbr",  0xB31C, fmul, FP64,  FP64>;
+  def MXBR  : BinaryRRE<"mxbr",  0xB34C, fmul, FP128, FP128>;
 }
 def MEEB : BinaryRXE<"meeb", 0xED17, fmul, FP32, load, 4>;
 def MDB  : BinaryRXE<"mdb",  0xED1C, fmul, FP64, load, 8>;
 
 // f64 multiplication of two FP32 registers.
-def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
-def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
+def MDEBR : BinaryRRE<"mdebr", 0xB30C, null_frag, FP64, FP32>;
+def : Pat<(fmul (f64 (fpextend FP32:$src1)), (f64 (fpextend FP32:$src2))),
           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
                                 FP32:$src1, subreg_r32), FP32:$src2)>;
 
 // f64 multiplication of an FP32 register and an f32 memory.
 def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
-def : Pat<(fmul (f64 (fextend FP32:$src1)),
+def : Pat<(fmul (f64 (fpextend FP32:$src1)),
                 (f64 (extloadf32 bdxaddr12only:$addr))),
           (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
                 bdxaddr12only:$addr)>;
 
 // f128 multiplication of two FP64 registers.
-def MXDBR : BinaryRRE<"mxdb", 0xB307, null_frag, FP128, FP64>;
-def : Pat<(fmul (f128 (fextend FP64:$src1)), (f128 (fextend FP64:$src2))),
+def MXDBR : BinaryRRE<"mxdbr", 0xB307, null_frag, FP128, FP64>;
+def : Pat<(fmul (f128 (fpextend FP64:$src1)), (f128 (fpextend FP64:$src2))),
           (MXDBR (INSERT_SUBREG (f128 (IMPLICIT_DEF)),
                                 FP64:$src1, subreg_h64), FP64:$src2)>;
 
 // f128 multiplication of an FP64 register and an f64 memory.
 def MXDB : BinaryRXE<"mxdb", 0xED07, null_frag, FP128, load, 8>;
-def : Pat<(fmul (f128 (fextend FP64:$src1)),
+def : Pat<(fmul (f128 (fpextend FP64:$src1)),
                 (f128 (extloadf64 bdxaddr12only:$addr))),
           (MXDB (INSERT_SUBREG (f128 (IMPLICIT_DEF)), FP64:$src1, subreg_h64),
                 bdxaddr12only:$addr)>;
 
 // Fused multiply-add.
-def MAEBR : TernaryRRD<"maeb", 0xB30E, z_fma, FP32>;
-def MADBR : TernaryRRD<"madb", 0xB31E, z_fma, FP64>;
+def MAEBR : TernaryRRD<"maebr", 0xB30E, z_fma, FP32>;
+def MADBR : TernaryRRD<"madbr", 0xB31E, z_fma, FP64>;
 
 def MAEB : TernaryRXF<"maeb", 0xED0E, z_fma, FP32, load, 4>;
 def MADB : TernaryRXF<"madb", 0xED1E, z_fma, FP64, load, 8>;
 
 // Fused multiply-subtract.
-def MSEBR : TernaryRRD<"mseb", 0xB30F, z_fms, FP32>;
-def MSDBR : TernaryRRD<"msdb", 0xB31F, z_fms, FP64>;
+def MSEBR : TernaryRRD<"msebr", 0xB30F, z_fms, FP32>;
+def MSDBR : TernaryRRD<"msdbr", 0xB31F, z_fms, FP64>;
 
 def MSEB : TernaryRXF<"mseb", 0xED0F, z_fms, FP32, load, 4>;
 def MSDB : TernaryRXF<"msdb", 0xED1F, z_fms, FP64, load, 8>;
 
 // Division.
-def DEBR : BinaryRRE<"deb", 0xB30D, fdiv, FP32,  FP32>;
-def DDBR : BinaryRRE<"ddb", 0xB31D, fdiv, FP64,  FP64>;
-def DXBR : BinaryRRE<"dxb", 0xB34D, fdiv, FP128, FP128>;
+def DEBR : BinaryRRE<"debr", 0xB30D, fdiv, FP32,  FP32>;
+def DDBR : BinaryRRE<"ddbr", 0xB31D, fdiv, FP64,  FP64>;
+def DXBR : BinaryRRE<"dxbr", 0xB34D, fdiv, FP128, FP128>;
 
 def DEB : BinaryRXE<"deb", 0xED0D, fdiv, FP32, load, 4>;
 def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
@@ -439,9 +463,9 @@ def DDB : BinaryRXE<"ddb", 0xED1D, fdiv, FP64, load, 8>;
 //===----------------------------------------------------------------------===//
 
 let Defs = [CC], CCValues = 0xF in {
-  def CEBR : CompareRRE<"ceb", 0xB309, z_fcmp, FP32,  FP32>;
-  def CDBR : CompareRRE<"cdb", 0xB319, z_fcmp, FP64,  FP64>;
-  def CXBR : CompareRRE<"cxb", 0xB349, z_fcmp, FP128, FP128>;
+  def CEBR : CompareRRE<"cebr", 0xB309, z_fcmp, FP32,  FP32>;
+  def CDBR : CompareRRE<"cdbr", 0xB319, z_fcmp, FP64,  FP64>;
+  def CXBR : CompareRRE<"cxbr", 0xB349, z_fcmp, FP128, FP128>;
 
   def CEB : CompareRXE<"ceb", 0xED09, z_fcmp, FP32, load, 4>;
   def CDB : CompareRXE<"cdb", 0xED19, z_fcmp, FP64, load, 8>;
@@ -455,6 +479,26 @@ let Defs = [CC], CCValues = 0xC in {
 }
 
 //===----------------------------------------------------------------------===//
+// Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+let hasSideEffects = 1 in {
+  def EFPC  : InherentRRE<"efpc", 0xB38C, GR32, int_s390_efpc>;
+  def STFPC : StoreInherentS<"stfpc", 0xB29C, storei<int_s390_efpc>, 4>;
+
+  def SFPC : SideEffectUnaryRRE<"sfpc", 0xB384, GR32, int_s390_sfpc>;
+  def LFPC : SideEffectUnaryS<"lfpc", 0xB29D, loadu<int_s390_sfpc>, 4>;
+
+  def SFASR : SideEffectUnaryRRE<"sfasr", 0xB385, GR32, null_frag>;
+  def LFAS  : SideEffectUnaryS<"lfas", 0xB2BD, null_frag, 4>;
+
+  def SRNMB : SideEffectAddressS<"srnmb", 0xB2B8, null_frag, shift12only>,
+              Requires<[FeatureFPExtension]>;
+  def SRNM  : SideEffectAddressS<"srnm", 0xB299, null_frag, shift12only>;
+  def SRNMT : SideEffectAddressS<"srnmt", 0xB2B9, null_frag, shift12only>;
+}
+
+//===----------------------------------------------------------------------===//
 // Peepholes
 //===----------------------------------------------------------------------===//
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index 973894d5c001..c727f486087e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -29,7 +29,7 @@ class InstSystemZ<int size, dag outs, dag ins, string asmstr,
   string DispSize = "none";
 
   // Many register-based <INSN>R instructions have a memory-based <INSN>
-  // counterpart.  OpKey uniquely identifies <INSN>, while OpType is
+  // counterpart.  OpKey uniquely identifies <INSN>R, while OpType is
   // "reg" for <INSN>R and "mem" for <INSN>.
   string OpKey = "";
   string OpType = "none";
@@ -158,6 +158,14 @@ def getThreeOperandOpcode : InstrMapping {
 //
 //===----------------------------------------------------------------------===//
 
+class InstE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<2, outs, ins, asmstr, pattern> {
+  field bits<16> Inst;
+  field bits<16> SoftFail = 0;
+
+  let Inst = op;
+}
+
 class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<2, outs, ins, asmstr, pattern> {
   field bits<16> Inst;
@@ -169,7 +177,36 @@ class InstI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{7-0}  = I1;
 }
 
-class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstIE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> I1;
+  bits<4> I2;
+
+  let Inst{31-16} = op;
+  let Inst{15-8}  = 0;
+  let Inst{7-4}   = I1;
+  let Inst{3-0}   = I2;
+}
+
+class InstMII<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> M1;
+  bits<12> RI2;
+  bits<24> RI3;
+
+  let Inst{47-40} = op;
+  let Inst{39-36} = M1;
+  let Inst{35-24} = RI2;
+  let Inst{23-0}  = RI3;
+}
+
+class InstRIa<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
   field bits<32> SoftFail = 0;
@@ -183,6 +220,34 @@ class InstRI<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = I2;
 }
 
+class InstRIb<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<16> RI2;
+
+  let Inst{31-24} = op{11-4};
+  let Inst{23-20} = R1;
+  let Inst{19-16} = op{3-0};
+  let Inst{15-0}  = RI2;
+}
+
+class InstRIc<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> M1;
+  bits<16> RI2;
+
+  let Inst{31-24} = op{11-4};
+  let Inst{23-20} = M1;
+  let Inst{19-16} = op{3-0};
+  let Inst{15-0}  = RI2;
+}
+
 class InstRIEa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -255,6 +320,23 @@ class InstRIEd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{7-0}   = op{7-0};
 }
 
+class InstRIEe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<16> RI2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R3;
+  let Inst{31-16} = RI2;
+  let Inst{15-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
 class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -275,7 +357,24 @@ class InstRIEf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{7-0}   = op{7-0};
 }
 
-class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstRIEg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> M3;
+  bits<16> I2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = M3;
+  let Inst{31-16} = I2;
+  let Inst{15-8}  = 0;
+  let Inst{7-0}   = op{7-0};
+}
+
+class InstRILa<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
   field bits<48> SoftFail = 0;
@@ -289,6 +388,34 @@ class InstRIL<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{31-0}  = I2;
 }
 
+class InstRILb<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<32> RI2;
+
+  let Inst{47-40} = op{11-4};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = op{3-0};
+  let Inst{31-0}  = RI2;
+}
+
+class InstRILc<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> M1;
+  bits<32> RI2;
+
+  let Inst{47-40} = op{11-4};
+  let Inst{39-36} = M1;
+  let Inst{35-32} = op{3-0};
+  let Inst{31-0}  = RI2;
+}
+
 class InstRIS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -350,7 +477,7 @@ class InstRRE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{3-0}   = R2;
 }
 
-class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstRRFa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
   field bits<32> SoftFail = 0;
@@ -358,11 +485,28 @@ class InstRRF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   bits<4> R1;
   bits<4> R2;
   bits<4> R3;
-  bits<4> R4;
+  bits<4> M4;
 
   let Inst{31-16} = op;
   let Inst{15-12} = R3;
-  let Inst{11-8}  = R4;
+  let Inst{11-8}  = M4;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
+class InstRRFb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> R3;
+  bits<4> M4;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = R3;
+  let Inst{11-8}  = M4;
   let Inst{7-4}   = R1;
   let Inst{3-0}   = R2;
 }
@@ -383,6 +527,23 @@ class InstRRFc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{3-0}   = R2;
 }
 
+class InstRRFe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R2;
+  bits<4> M3;
+  bits<4> M4;
+
+  let Inst{31-16} = op;
+  let Inst{15-12} = M3;
+  let Inst{11-8}  = M4;
+  let Inst{7-4}   = R1;
+  let Inst{3-0}   = R2;
+}
+
 class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -402,7 +563,7 @@ class InstRRS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{7-0}   = op{7-0};
 }
 
-class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstRXa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
   field bits<32> SoftFail = 0;
@@ -417,6 +578,21 @@ class InstRX<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let HasIndex = 1;
 }
 
+class InstRXb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> M1;
+  bits<20> XBD2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = M1;
+  let Inst{19-0}  = XBD2;
+
+  let HasIndex = 1;
+}
+
 class InstRXE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
@@ -455,7 +631,7 @@ class InstRXF<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let HasIndex = 1;
 }
 
-class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstRXYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
   field bits<48> SoftFail = 0;
@@ -472,7 +648,24 @@ class InstRXY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let HasIndex = 1;
 }
 
-class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstRXYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> M1;
+  bits<28> XBD2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = M1;
+  let Inst{35-8}  = XBD2;
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+  let HasIndex = 1;
+}
+
+class InstRSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
   field bits<32> SoftFail = 0;
@@ -487,7 +680,37 @@ class InstRS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = BD2;
 }
 
-class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstRSb<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> M3;
+  bits<16> BD2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = R1;
+  let Inst{19-16} = M3;
+  let Inst{15-0}  = BD2;
+}
+
+class InstRSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
+  field bits<32> Inst;
+  field bits<32> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> R3;
+  bits<16> RI2;
+
+  let Inst{31-24} = op;
+  let Inst{23-20} = R1;
+  let Inst{19-16} = R3;
+  let Inst{15-0}  = RI2;
+}
+
+class InstRSYa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
   field bits<48> SoftFail = 0;
@@ -505,6 +728,24 @@ class InstRSY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Has20BitOffset = 1;
 }
 
+class InstRSYb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<4> M3;
+  bits<24> BD2;
+
+  let Inst{47-40} = op{15-8};
+  let Inst{39-36} = R1;
+  let Inst{35-32} = M3;
+  let Inst{31-8}  = BD2;
+  let Inst{7-0}   = op{7-0};
+
+  let Has20BitOffset = 1;
+}
+
 class InstSI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
@@ -547,7 +788,23 @@ class InstSIY<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Has20BitOffset = 1;
 }
 
-class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+class InstSMI<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> M1;
+  bits<16> RI2;
+  bits<16> BD3;
+
+  let Inst{47-40} = op;
+  let Inst{39-36} = M1;
+  let Inst{35-32} = 0;
+  let Inst{31-16} = BD3;
+  let Inst{15-0}  = RI2;
+}
+
+class InstSSa<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<6, outs, ins, asmstr, pattern> {
   field bits<48> Inst;
   field bits<48> SoftFail = 0;
@@ -560,6 +817,68 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   let Inst{15-0}  = BD2;
 }
 
+class InstSSd<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<20> RBD1;
+  bits<16> BD2;
+  bits<4> R3;
+
+  let Inst{47-40} = op;
+  let Inst{39-36} = RBD1{19-16};
+  let Inst{35-32} = R3;
+  let Inst{31-16} = RBD1{15-0};
+  let Inst{15-0}  = BD2;
+}
+
+class InstSSe<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<4> R1;
+  bits<16> BD2;
+  bits<4> R3;
+  bits<16> BD4;
+
+  let Inst{47-40} = op;
+  let Inst{39-36} = R1;
+  let Inst{35-32} = R3;
+  let Inst{31-16} = BD2;
+  let Inst{15-0}  = BD4;
+}
+
+class InstSSE<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<16> BD1;
+  bits<16> BD2;
+
+  let Inst{47-32} = op;
+  let Inst{31-16} = BD1;
+  let Inst{15-0}  = BD2;
+}
+
+class InstSSF<bits<12> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
+  field bits<48> Inst;
+  field bits<48> SoftFail = 0;
+
+  bits<16> BD1;
+  bits<16> BD2;
+  bits<4>  R3;
+
+  let Inst{47-40} = op{11-4};
+  let Inst{39-36} = R3;
+  let Inst{35-32} = op{3-0};
+  let Inst{31-16} = BD1;
+  let Inst{15-0}  = BD2;
+}
+
 class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
   : InstSystemZ<4, outs, ins, asmstr, pattern> {
   field bits<32> Inst;
@@ -948,6 +1267,294 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 }
 
 //===----------------------------------------------------------------------===//
+// Instruction classes for .insn directives
+//===----------------------------------------------------------------------===//
+
+class DirectiveInsnE<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstE<0, outs, ins, asmstr, pattern> {
+  bits<16> enc;
+
+  let Inst = enc;
+}
+
+class DirectiveInsnRI<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRIa<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-24} = enc{31-24};
+  let Inst{19-16} = enc{19-16};
+}
+
+class DirectiveInsnRIE<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRIEd<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRIL<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRILa<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+  string type;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{35-32} = enc{35-32};
+}
+
+class DirectiveInsnRIS<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRIS<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRR<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRR<0, outs, ins, asmstr, pattern> {
+  bits<16> enc;
+
+  let Inst{15-8} = enc{15-8};
+}
+
+class DirectiveInsnRRE<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRRE<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-16} = enc{31-16};
+}
+
+class DirectiveInsnRRF<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRRFa<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-16} = enc{31-16};
+}
+
+class DirectiveInsnRRS<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRRS<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRS<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRSa<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-24} = enc{31-24};
+}
+
+// RSE is like RSY except with a 12 bit displacement (instead of 20).
+class DirectiveInsnRSE<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRSYa<6, outs, ins, asmstr, pattern> {
+  bits <48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{31-16} = BD2{15-0};
+  let Inst{15-8}  = 0;
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRSI<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRSI<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-24} = enc{31-24};
+}
+
+class DirectiveInsnRSY<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRSYa<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRX<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRXa<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-24} = enc{31-24};
+}
+
+class DirectiveInsnRXE<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRXE<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let M3 = 0;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRXF<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRXF<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnRXY<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstRXYa<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnS<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstS<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-16} = enc{31-16};
+}
+
+class DirectiveInsnSI<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSI<0, outs, ins, asmstr, pattern> {
+  bits<32> enc;
+
+  let Inst{31-24} = enc{31-24};
+}
+
+class DirectiveInsnSIY<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSIY<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{7-0}   = enc{7-0};
+}
+
+class DirectiveInsnSIL<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSIL<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-32} = enc{47-32};
+}
+
+class DirectiveInsnSS<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSSd<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+}
+
+class DirectiveInsnSSE<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSSE<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-32} = enc{47-32};
+}
+
+class DirectiveInsnSSF<dag outs, dag ins, string asmstr, list<dag> pattern>
+  : InstSSF<0, outs, ins, asmstr, pattern> {
+  bits<48> enc;
+
+  let Inst{47-40} = enc{47-40};
+  let Inst{35-32} = enc{35-32};
+}
+
+//===----------------------------------------------------------------------===//
+// Variants of instructions with condition mask
+//===----------------------------------------------------------------------===//
+//
+// For instructions using a condition mask (e.g. conditional branches,
+// compare-and-branch instructions, or conditional move instructions),
+// we generally need to create multiple instruction patterns:
+//
+// - One used for code generation, which encodes the condition mask as an
+//   MI operand, but writes out an extended mnemonic for better readability.
+// - One pattern for the base form of the instruction with an explicit
+//   condition mask (encoded as a plain integer MI operand).
+// - Specific patterns for each extended mnemonic, where the condition mask
+//   is implied by the pattern name and not otherwise encoded at all.
+//
+// We need the latter primarily for the assembler and disassembler, since the
+// assembler parser is not able to decode part of an instruction mnemonic
+// into an operand.  Thus we provide separate patterns for each mnemonic.
+//
+// Note that in some cases there are two different mnemonics for the same
+// condition mask.  In this case we cannot have both instructions available
+// to the disassembler at the same time since the encodings are not distinct.
+// Therefore the alternate forms are marked isAsmParserOnly.
+//
+// We don't make one of the two names an alias of the other because
+// we need the custom parsing routines to select the correct register class.
+//
+// This section provides helpers for generating the specific forms.
+//
+//===----------------------------------------------------------------------===//
+
+// A class to describe a variant of an instruction with condition mask.
+class CondVariant<bits<4> ccmaskin, string suffixin, bit alternatein> {
+  // The fixed condition mask to use.
+  bits<4> ccmask = ccmaskin;
+
+  // The suffix to use for the extended assembler mnemonic.
+  string suffix = suffixin;
+
+  // Whether this is an alternate that needs to be marked isAsmParserOnly.
+  bit alternate = alternatein;
+}
+
+// Condition mask 15 means "always true", which is used to define
+// unconditional branches as a variant of conditional branches.
+def CondAlways : CondVariant<15, "", 0>;
+
+// Condition masks for general instructions that can set all 4 bits.
+def CondVariantO   : CondVariant<1,  "o",   0>;
+def CondVariantH   : CondVariant<2,  "h",   0>;
+def CondVariantP   : CondVariant<2,  "p",   1>;
+def CondVariantNLE : CondVariant<3,  "nle", 0>;
+def CondVariantL   : CondVariant<4,  "l",   0>;
+def CondVariantM   : CondVariant<4,  "m",   1>;
+def CondVariantNHE : CondVariant<5,  "nhe", 0>;
+def CondVariantLH  : CondVariant<6,  "lh",  0>;
+def CondVariantNE  : CondVariant<7,  "ne",  0>;
+def CondVariantNZ  : CondVariant<7,  "nz",  1>;
+def CondVariantE   : CondVariant<8,  "e",   0>;
+def CondVariantZ   : CondVariant<8,  "z",   1>;
+def CondVariantNLH : CondVariant<9,  "nlh", 0>;
+def CondVariantHE  : CondVariant<10, "he",  0>;
+def CondVariantNL  : CondVariant<11, "nl",  0>;
+def CondVariantNM  : CondVariant<11, "nm",  1>;
+def CondVariantLE  : CondVariant<12, "le",  0>;
+def CondVariantNH  : CondVariant<13, "nh",  0>;
+def CondVariantNP  : CondVariant<13, "np",  1>;
+def CondVariantNO  : CondVariant<14, "no",  0>;
+
+// A helper class to look up one of the above by name.
+class CV<string name>
+  : CondVariant<!cast<CondVariant>("CondVariant"#name).ccmask,
+                !cast<CondVariant>("CondVariant"#name).suffix,
+                !cast<CondVariant>("CondVariant"#name).alternate>;
+
+// Condition masks for integer instructions (e.g. compare-and-branch).
+// This is like the list above, except that condition 3 is not possible
+// and that the low bit of the mask is therefore always 0.  This means
+// that each condition has two names.  Conditions "o" and "no" are not used.
+def IntCondVariantH   : CondVariant<2,  "h",   0>;
+def IntCondVariantNLE : CondVariant<2,  "nle", 1>;
+def IntCondVariantL   : CondVariant<4,  "l",   0>;
+def IntCondVariantNHE : CondVariant<4,  "nhe", 1>;
+def IntCondVariantLH  : CondVariant<6,  "lh",  0>;
+def IntCondVariantNE  : CondVariant<6,  "ne",  1>;
+def IntCondVariantE   : CondVariant<8,  "e",   0>;
+def IntCondVariantNLH : CondVariant<8,  "nlh", 1>;
+def IntCondVariantHE  : CondVariant<10, "he",  0>;
+def IntCondVariantNL  : CondVariant<10, "nl",  1>;
+def IntCondVariantLE  : CondVariant<12, "le",  0>;
+def IntCondVariantNH  : CondVariant<12, "nh",  1>;
+
+// A helper class to look up one of the above by name.
+class ICV<string name>
+  : CondVariant<!cast<CondVariant>("IntCondVariant"#name).ccmask,
+                !cast<CondVariant>("IntCondVariant"#name).suffix,
+                !cast<CondVariant>("IntCondVariant"#name).alternate>;
+
+//===----------------------------------------------------------------------===//
 // Instruction definitions with semantics
 //===----------------------------------------------------------------------===//
 //
@@ -960,11 +1567,32 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //   Inherent:
 //     One register output operand and no input operands.
 //
+//   StoreInherent:
+//     One address operand.  The instruction stores to the address.
+//
+//   SideEffectInherent:
+//     No input or output operands, but causes some side effect.
+//
+//   Branch:
+//     One branch target.  The instruction branches to the target.
+//
+//   Call:
+//     One output operand and one branch target.  The instruction stores
+//     the return address to the output operand and branches to the target.
+//
+//   CmpBranch:
+//     Two input operands and one optional branch target.  The instruction
+//     compares the two input operands and branches or traps on the result.
+//
 //   BranchUnary:
-//     One register output operand, one register input operand and
-//     one branch displacement.  The instructions stores a modified
-//     form of the source register in the destination register and
-//     branches on the result.
+//     One register output operand, one register input operand and one branch
+//     target.  The instructions stores a modified form of the source register
+//     in the destination register and branches on the result.
+//
+//   BranchBinary:
+//     One register output operand, two register input operands and one branch
+//     target. The instructions stores a modified form of one of the source
+//     registers in the destination register and branches on the result.
 //
 //   LoadMultiple:
 //     One address input operand and two explicit output operands.
@@ -984,6 +1612,12 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //     doesn't write more than the number of bytes specified by the
 //     length operand.
 //
+//   LoadAddress:
+//     One register output operand and one address operand.
+//
+//   SideEffectAddress:
+//     One address operand.  No output operands, but causes some side effect.
+//
 //   Unary:
 //     One register output operand and one input operand.
 //
@@ -991,6 +1625,9 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //     One address operand and one other input operand.  The instruction
 //     stores to the address.
 //
+//   SideEffectUnary:
+//     One input operand.  No output operands, but causes some side effect.
+//
 //   Binary:
 //     One register output operand and two input operands.
 //
@@ -998,6 +1635,9 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //     One address operand and two other input operands.  The instruction
 //     stores to the address.
 //
+//   SideEffectBinary:
+//     Two input operands.  No output operands, but causes some side effect.
+//
 //   Compare:
 //     Two input operands and an implicit CC output operand.
 //
@@ -1008,6 +1648,9 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //   Ternary:
 //     One register output operand and three input operands.
 //
+//   SideEffectTernary:
+//     Three input operands.  No output operands, but causes some side effect.
+//
 //   Quaternary:
 //     One register output operand and four input operands.
 //
@@ -1027,6 +1670,9 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //     One 4-bit immediate operand and one address operand.  The immediate
 //     operand is 1 for a load prefetch and 2 for a store prefetch.
 //
+//   BranchPreload:
+//     One 4-bit immediate operand and two address operands.
+//
 // The format determines which input operands are tied to output operands,
 // and also determines the shape of any address operand.
 //
@@ -1038,10 +1684,10 @@ class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
 //===----------------------------------------------------------------------===//
 
 class InherentRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
-                  dag src>
+                  SDPatternOperator operator>
   : InstRRE<opcode, (outs cls:$R1), (ins),
             mnemonic#"\t$R1",
-            [(set cls:$R1, src)]> {
+            [(set cls:$R1, (operator))]> {
   let R2 = 0;
 }
 
@@ -1051,26 +1697,380 @@ class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value>
   let M3 = 0;
 }
 
+class StoreInherentS<string mnemonic, bits<16> opcode,
+                     SDPatternOperator operator, bits<5> bytes>
+  : InstS<opcode, (outs), (ins bdaddr12only:$BD2),
+          mnemonic#"\t$BD2", [(operator bdaddr12only:$BD2)]> {
+  let mayStore = 1;
+  let AccessBytes = bytes;
+}
+
+class SideEffectInherentE<string mnemonic, bits<16>opcode>
+  : InstE<opcode, (outs), (ins), mnemonic, []>;
+
+class SideEffectInherentS<string mnemonic, bits<16> opcode,
+                          SDPatternOperator operator>
+  : InstS<opcode, (outs), (ins), mnemonic, [(operator)]> {
+  let BD2 = 0;
+}
+
+// Allow an optional TLS marker symbol to generate TLS call relocations.
+class CallRI<string mnemonic, bits<12> opcode>
+  : InstRIb<opcode, (outs), (ins GR64:$R1, brtarget16tls:$RI2),
+            mnemonic#"\t$R1, $RI2", []>;
+
+// Allow an optional TLS marker symbol to generate TLS call relocations.
+class CallRIL<string mnemonic, bits<12> opcode>
+  : InstRILb<opcode, (outs), (ins GR64:$R1, brtarget32tls:$RI2),
+             mnemonic#"\t$R1, $RI2", []>;
+
+class CallRR<string mnemonic, bits<8> opcode>
+  : InstRR<opcode, (outs), (ins GR64:$R1, ADDR64:$R2),
+           mnemonic#"\t$R1, $R2", []>;
+
+class CallRX<string mnemonic, bits<8> opcode>
+  : InstRXa<opcode, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2),
+            mnemonic#"\t$R1, $XBD2", []>;
+
+class CondBranchRI<string mnemonic, bits<12> opcode,
+                   SDPatternOperator operator = null_frag>
+  : InstRIc<opcode, (outs), (ins cond4:$valid, cond4:$M1, brtarget16:$RI2),
+            !subst("#", "${M1}", mnemonic)#"\t$RI2",
+            [(operator cond4:$valid, cond4:$M1, bb:$RI2)]> {
+  let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRI<string mnemonic, bits<12> opcode>
+  : InstRIc<opcode, (outs), (ins imm32zx4:$M1, brtarget16:$RI2),
+            mnemonic#"\t$M1, $RI2", []>;
+
+class FixedCondBranchRI<CondVariant V, string mnemonic, bits<12> opcode,
+                        SDPatternOperator operator = null_frag>
+  : InstRIc<opcode, (outs), (ins brtarget16:$RI2),
+            !subst("#", V.suffix, mnemonic)#"\t$RI2", [(operator bb:$RI2)]> {
+  let isAsmParserOnly = V.alternate;
+  let M1 = V.ccmask;
+}
+
+class CondBranchRIL<string mnemonic, bits<12> opcode>
+  : InstRILc<opcode, (outs), (ins cond4:$valid, cond4:$M1, brtarget32:$RI2),
+             !subst("#", "${M1}", mnemonic)#"\t$RI2", []> {
+  let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRIL<string mnemonic, bits<12> opcode>
+  : InstRILc<opcode, (outs), (ins imm32zx4:$M1, brtarget32:$RI2),
+             mnemonic#"\t$M1, $RI2", []>;
+
+class FixedCondBranchRIL<CondVariant V, string mnemonic, bits<12> opcode>
+  : InstRILc<opcode, (outs), (ins brtarget32:$RI2),
+             !subst("#", V.suffix, mnemonic)#"\t$RI2", []> {
+  let isAsmParserOnly = V.alternate;
+  let M1 = V.ccmask;
+}
+
+class CondBranchRR<string mnemonic, bits<8> opcode>
+  : InstRR<opcode, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2),
+           !subst("#", "${R1}", mnemonic)#"\t$R2", []> {
+  let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRR<string mnemonic, bits<8> opcode>
+  : InstRR<opcode, (outs), (ins imm32zx4:$R1, GR64:$R2),
+           mnemonic#"\t$R1, $R2", []>;
+
+class FixedCondBranchRR<CondVariant V, string mnemonic, bits<8> opcode,
+                      SDPatternOperator operator = null_frag>
+  : InstRR<opcode, (outs), (ins ADDR64:$R2),
+           !subst("#", V.suffix, mnemonic)#"\t$R2", [(operator ADDR64:$R2)]> {
+  let isAsmParserOnly = V.alternate;
+  let R1 = V.ccmask;
+}
+
+class CondBranchRX<string mnemonic, bits<8> opcode>
+  : InstRXb<opcode, (outs), (ins cond4:$valid, cond4:$M1, bdxaddr12only:$XBD2),
+            !subst("#", "${M1}", mnemonic)#"\t$XBD2", []> {
+  let CCMaskFirst = 1;
+}
+
+class AsmCondBranchRX<string mnemonic, bits<8> opcode>
+  : InstRXb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr12only:$XBD2),
+            mnemonic#"\t$M1, $XBD2", []>;
+
+class FixedCondBranchRX<CondVariant V, string mnemonic, bits<8> opcode>
+  : InstRXb<opcode, (outs), (ins bdxaddr12only:$XBD2),
+            !subst("#", V.suffix, mnemonic)#"\t$XBD2", []> {
+  let isAsmParserOnly = V.alternate;
+  let M1 = V.ccmask;
+}
+
+class CmpBranchRIEa<string mnemonic, bits<16> opcode,
+                    RegisterOperand cls, Immediate imm>
+  : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, cond4:$M3),
+             mnemonic#"$M3\t$R1, $I2", []>;
+
+class AsmCmpBranchRIEa<string mnemonic, bits<16> opcode,
+                       RegisterOperand cls, Immediate imm>
+  : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $I2, $M3", []>;
+
+class FixedCmpBranchRIEa<CondVariant V, string mnemonic, bits<16> opcode,
+                          RegisterOperand cls, Immediate imm>
+  : InstRIEa<opcode, (outs), (ins cls:$R1, imm:$I2),
+             mnemonic#V.suffix#"\t$R1, $I2", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRIEaPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls, Immediate imm> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRIEa<mnemonic, opcode, cls, imm>;
+  def Asm : AsmCmpBranchRIEa<mnemonic, opcode, cls, imm>;
+}
+
+class CmpBranchRIEb<string mnemonic, bits<16> opcode,
+                    RegisterOperand cls>
+  : InstRIEb<opcode, (outs),
+             (ins cls:$R1, cls:$R2, cond4:$M3, brtarget16:$RI4),
+             mnemonic#"$M3\t$R1, $R2, $RI4", []>;
+
+class AsmCmpBranchRIEb<string mnemonic, bits<16> opcode,
+                       RegisterOperand cls>
+  : InstRIEb<opcode, (outs),
+             (ins cls:$R1, cls:$R2, imm32zx4:$M3, brtarget16:$RI4),
+             mnemonic#"\t$R1, $R2, $M3, $RI4", []>;
+
+class FixedCmpBranchRIEb<CondVariant V, string mnemonic, bits<16> opcode,
+                         RegisterOperand cls>
+  : InstRIEb<opcode, (outs), (ins cls:$R1, cls:$R2, brtarget16:$RI4),
+             mnemonic#V.suffix#"\t$R1, $R2, $RI4", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRIEbPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRIEb<mnemonic, opcode, cls>;
+  def Asm : AsmCmpBranchRIEb<mnemonic, opcode, cls>;
+}
+
+class CmpBranchRIEc<string mnemonic, bits<16> opcode,
+                    RegisterOperand cls, Immediate imm>
+  : InstRIEc<opcode, (outs),
+             (ins cls:$R1, imm:$I2, cond4:$M3, brtarget16:$RI4),
+             mnemonic#"$M3\t$R1, $I2, $RI4", []>;
+
+class AsmCmpBranchRIEc<string mnemonic, bits<16> opcode,
+                       RegisterOperand cls, Immediate imm>
+  : InstRIEc<opcode, (outs),
+             (ins cls:$R1, imm:$I2, imm32zx4:$M3, brtarget16:$RI4),
+             mnemonic#"\t$R1, $I2, $M3, $RI4", []>;
+
+class FixedCmpBranchRIEc<CondVariant V, string mnemonic, bits<16> opcode,
+                         RegisterOperand cls, Immediate imm>
+  : InstRIEc<opcode, (outs), (ins cls:$R1, imm:$I2, brtarget16:$RI4),
+             mnemonic#V.suffix#"\t$R1, $I2, $RI4", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRIEcPair<string mnemonic, bits<16> opcode,
+                            RegisterOperand cls, Immediate imm> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRIEc<mnemonic, opcode, cls, imm>;
+  def Asm : AsmCmpBranchRIEc<mnemonic, opcode, cls, imm>;
+}
+
+class CmpBranchRRFc<string mnemonic, bits<16> opcode,
+                    RegisterOperand cls>
+  : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2, cond4:$M3),
+             mnemonic#"$M3\t$R1, $R2", []>;
+
+class AsmCmpBranchRRFc<string mnemonic, bits<16> opcode,
+                       RegisterOperand cls>
+  : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $R2, $M3", []>;
+
+multiclass CmpBranchRRFcPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRRFc<mnemonic, opcode, cls>;
+  def Asm : AsmCmpBranchRRFc<mnemonic, opcode, cls>;
+}
+
+class FixedCmpBranchRRFc<CondVariant V, string mnemonic, bits<16> opcode,
+                          RegisterOperand cls>
+  : InstRRFc<opcode, (outs), (ins cls:$R1, cls:$R2),
+             mnemonic#V.suffix#"\t$R1, $R2", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+class CmpBranchRRS<string mnemonic, bits<16> opcode,
+                   RegisterOperand cls>
+  : InstRRS<opcode, (outs),
+            (ins cls:$R1, cls:$R2, cond4:$M3, bdaddr12only:$BD4),
+            mnemonic#"$M3\t$R1, $R2, $BD4", []>;
+
+class AsmCmpBranchRRS<string mnemonic, bits<16> opcode,
+                      RegisterOperand cls>
+  : InstRRS<opcode, (outs),
+            (ins cls:$R1, cls:$R2, imm32zx4:$M3, bdaddr12only:$BD4),
+            mnemonic#"\t$R1, $R2, $M3, $BD4", []>;
+
+class FixedCmpBranchRRS<CondVariant V, string mnemonic, bits<16> opcode,
+                        RegisterOperand cls>
+  : InstRRS<opcode, (outs), (ins cls:$R1, cls:$R2, bdaddr12only:$BD4),
+            mnemonic#V.suffix#"\t$R1, $R2, $BD4", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRRSPair<string mnemonic, bits<16> opcode,
+                            RegisterOperand cls> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRRS<mnemonic, opcode, cls>;
+  def Asm : AsmCmpBranchRRS<mnemonic, opcode, cls>;
+}
+
+class CmpBranchRIS<string mnemonic, bits<16> opcode,
+                   RegisterOperand cls, Immediate imm>
+  : InstRIS<opcode, (outs),
+            (ins cls:$R1, imm:$I2, cond4:$M3, bdaddr12only:$BD4),
+            mnemonic#"$M3\t$R1, $I2, $BD4", []>;
+
+class AsmCmpBranchRIS<string mnemonic, bits<16> opcode,
+                      RegisterOperand cls, Immediate imm>
+  : InstRIS<opcode, (outs),
+            (ins cls:$R1, imm:$I2, imm32zx4:$M3, bdaddr12only:$BD4),
+            mnemonic#"\t$R1, $I2, $M3, $BD4", []>;
+
+class FixedCmpBranchRIS<CondVariant V, string mnemonic, bits<16> opcode,
+                        RegisterOperand cls, Immediate imm>
+  : InstRIS<opcode, (outs), (ins cls:$R1, imm:$I2, bdaddr12only:$BD4),
+            mnemonic#V.suffix#"\t$R1, $I2, $BD4", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CmpBranchRISPair<string mnemonic, bits<16> opcode,
+                            RegisterOperand cls, Immediate imm> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRIS<mnemonic, opcode, cls, imm>;
+  def Asm : AsmCmpBranchRIS<mnemonic, opcode, cls, imm>;
+}
+
+class CmpBranchRSYb<string mnemonic, bits<16> opcode,
+                    RegisterOperand cls>
+  : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2, cond4:$M3),
+             mnemonic#"$M3\t$R1, $BD2", []>;
+
+class AsmCmpBranchRSYb<string mnemonic, bits<16> opcode,
+                       RegisterOperand cls>
+  : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $M3, $BD2", []>;
+
+multiclass CmpBranchRSYbPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls> {
+  let isCodeGenOnly = 1 in
+    def "" : CmpBranchRSYb<mnemonic, opcode, cls>;
+  def Asm : AsmCmpBranchRSYb<mnemonic, opcode, cls>;
+}
+
+class FixedCmpBranchRSYb<CondVariant V, string mnemonic, bits<16> opcode,
+                          RegisterOperand cls>
+  : InstRSYb<opcode, (outs), (ins cls:$R1, bdaddr20only:$BD2),
+             mnemonic#V.suffix#"\t$R1, $BD2", []> {
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
 class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
-  : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2),
-           mnemonic##"\t$R1, $I2", []> {
-  let isBranch = 1;
-  let isTerminator = 1;
+  : InstRIb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$RI2),
+            mnemonic##"\t$R1, $RI2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRIL<string mnemonic, bits<12> opcode, RegisterOperand cls>
+  : InstRILb<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget32:$RI2),
+             mnemonic##"\t$R1, $RI2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRR<string mnemonic, bits<8> opcode, RegisterOperand cls>
+  : InstRR<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
+           mnemonic##"\t$R1, $R2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRRE<opcode, (outs cls:$R1), (ins cls:$R1src, GR64:$R2),
+            mnemonic##"\t$R1, $R2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRX<string mnemonic, bits<8> opcode, RegisterOperand cls>
+  : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr12only:$XBD2),
+            mnemonic##"\t$R1, $XBD2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchUnaryRXY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, bdxaddr20only:$XBD2),
+             mnemonic##"\t$R1, $XBD2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRSI<string mnemonic, bits<8> opcode, RegisterOperand cls>
+  : InstRSI<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, brtarget16:$RI2),
+            mnemonic##"\t$R1, $R3, $RI2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRIEe<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRIEe<opcode, (outs cls:$R1),
+             (ins cls:$R1src, cls:$R3, brtarget16:$RI2),
+             mnemonic##"\t$R1, $R3, $RI2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls>
+  : InstRSa<opcode, (outs cls:$R1),
+            (ins cls:$R1src, cls:$R3, bdaddr12only:$BD2),
+            mnemonic##"\t$R1, $R3, $BD2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+class BranchBinaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
+  : InstRSYa<opcode,
+             (outs cls:$R1), (ins cls:$R1src, cls:$R3, bdaddr20only:$BD2),
+             mnemonic##"\t$R1, $R3, $BD2", []> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
 }
 
 class LoadMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
                      AddressingMode mode = bdaddr12only>
-  : InstRS<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
-           mnemonic#"\t$R1, $R3, $BD2", []> {
+  : InstRSa<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayLoad = 1;
 }
 
 class LoadMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
                       AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
-            mnemonic#"\t$R1, $R3, $BD2", []> {
+  : InstRSYa<opcode, (outs cls:$R1, cls:$R3), (ins mode:$BD2),
+             mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayLoad = 1;
 }
 
@@ -1093,9 +2093,9 @@ class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
 
 class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls>
-  : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
-            mnemonic#"\t$R1, $I2",
-            [(operator cls:$R1, pcrel32:$I2)]> {
+  : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
+             mnemonic#"\t$R1, $RI2",
+             [(operator cls:$R1, pcrel32:$RI2)]> {
   let mayStore = 1;
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
   // However, BDXs have two extra operands and are therefore 6 units more
@@ -1106,10 +2106,10 @@ class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
 class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               RegisterOperand cls, bits<5> bytes,
               AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2),
-           mnemonic#"\t$R1, $XBD2",
-           [(operator cls:$R1, mode:$XBD2)]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(operator cls:$R1, mode:$XBD2)]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let mayStore = 1;
   let AccessBytes = bytes;
@@ -1118,10 +2118,10 @@ class StoreRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
 class StoreRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls, bits<5> bytes,
                AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2),
-            mnemonic#"\t$R1, $XBD2",
-            [(operator cls:$R1, mode:$XBD2)]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXYa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+             mnemonic#"\t$R1, $XBD2",
+             [(operator cls:$R1, mode:$XBD2)]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let mayStore = 1;
   let AccessBytes = bytes;
@@ -1161,15 +2161,15 @@ class StoreLengthVRSb<string mnemonic, bits<16> opcode,
 
 class StoreMultipleRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
                       AddressingMode mode = bdaddr12only>
-  : InstRS<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
-           mnemonic#"\t$R1, $R3, $BD2", []> {
+  : InstRSa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayStore = 1;
 }
 
 class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
                        AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
-            mnemonic#"\t$R1, $R3, $BD2", []> {
+  : InstRSYa<opcode, (outs), (ins cls:$R1, cls:$R3, mode:$BD2),
+             mnemonic#"\t$R1, $R3, $BD2", []> {
   let mayStore = 1;
 }
 
@@ -1230,12 +2230,17 @@ multiclass StoreSIPair<string mnemonic, bits<8> siOpcode, bits<16> siyOpcode,
   }
 }
 
+class StoreSSE<string mnemonic, bits<16> opcode>
+  : InstSSE<opcode, (outs), (ins bdaddr12only:$BD1, bdaddr12only:$BD2),
+            mnemonic#"\t$BD1, $BD2", []> {
+  let mayStore = 1;
+}
+
 class CondStoreRSY<string mnemonic, bits<16> opcode,
                    RegisterOperand cls, bits<5> bytes,
                    AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3),
-            mnemonic#"$R3\t$R1, $BD2", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
+  : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$M3),
+            mnemonic#"$M3\t$R1, $BD2", []> {
   let mayStore = 1;
   let AccessBytes = bytes;
   let CCMaskLast = 1;
@@ -1246,139 +2251,127 @@ class CondStoreRSY<string mnemonic, bits<16> opcode,
 class AsmCondStoreRSY<string mnemonic, bits<16> opcode,
                       RegisterOperand cls, bits<5> bytes,
                       AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$R3),
-            mnemonic#"\t$R1, $BD2, $R3", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
+  : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $BD2, $M3", []> {
   let mayStore = 1;
   let AccessBytes = bytes;
 }
 
 // Like CondStoreRSY, but with a fixed CC mask.
-class FixedCondStoreRSY<string mnemonic, bits<16> opcode,
-                        RegisterOperand cls, bits<4> ccmask, bits<5> bytes,
+class FixedCondStoreRSY<CondVariant V, string mnemonic, bits<16> opcode,
+                        RegisterOperand cls, bits<5> bytes,
                         AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2),
-            mnemonic#"\t$R1, $BD2", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
+  : InstRSYb<opcode, (outs), (ins cls:$R1, mode:$BD2),
+             mnemonic#V.suffix#"\t$R1, $BD2", []> {
   let mayStore = 1;
   let AccessBytes = bytes;
-  let R3 = ccmask;
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
 }
 
-class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
-              RegisterOperand cls1, RegisterOperand cls2>
-  : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2),
-           mnemonic#"r\t$R1, $R2",
-           [(set cls1:$R1, (operator cls2:$R2))]> {
-  let OpKey = mnemonic ## cls1;
-  let OpType = "reg";
+multiclass CondStoreRSYPair<string mnemonic, bits<16> opcode,
+                            RegisterOperand cls, bits<5> bytes,
+                            AddressingMode mode = bdaddr20only> {
+  let isCodeGenOnly = 1 in
+    def "" : CondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
+  def Asm : AsmCondStoreRSY<mnemonic, opcode, cls, bytes, mode>;
 }
 
-class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-               RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2),
-            mnemonic#"r\t$R1, $R2",
-            [(set cls1:$R1, (operator cls2:$R2))]> {
-  let OpKey = mnemonic ## cls1;
-  let OpType = "reg";
-}
+class SideEffectUnaryI<string mnemonic, bits<8> opcode, Immediate imm>
+  : InstI<opcode, (outs), (ins imm:$I1),
+          mnemonic#"\t$I1", []>;
 
-class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
-               RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2),
-            mnemonic#"r\t$R1, $R3, $R2", []> {
-  let OpKey = mnemonic ## cls1;
-  let OpType = "reg";
-  let R4 = 0;
+class SideEffectUnaryRR<string mnemonic, bits<8>opcode, RegisterOperand cls>
+  : InstRR<opcode, (outs), (ins cls:$R1),
+           mnemonic#"\t$R1", []> {
+  let R2 = 0;
 }
 
-class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1,
-                RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2, imm32zx4:$R4),
-            mnemonic#"\t$R1, $R3, $R2, $R4", []>;
-
-// These instructions are generated by if conversion.  The old value of R1
-// is added as an implicit use.
-class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
-                   RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins cls2:$R2, cond4:$valid, cond4:$R3),
-            mnemonic#"r$R3\t$R1, $R2", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
-  let CCMaskLast = 1;
-  let R4 = 0;
+class SideEffectUnaryRRE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                         SDPatternOperator operator>
+  : InstRRE<opcode, (outs), (ins cls:$R1),
+            mnemonic#"\t$R1", [(operator cls:$R1)]> {
+  let R2 = 0;
 }
 
-class CondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
-                   Immediate imm>
-  : InstRIEd<opcode, (outs cls:$R1),
-                     (ins imm:$I2, cond4:$valid, cond4:$R3),
-             mnemonic#"$R3\t$R1, $I2", []>,
-    Requires<[FeatureLoadStoreOnCond2]> {
-  let CCMaskLast = 1;
+class SideEffectUnaryS<string mnemonic, bits<16> opcode,
+                       SDPatternOperator operator, bits<5> bytes,
+                       AddressingMode mode = bdaddr12only>
+  : InstS<opcode, (outs), (ins mode:$BD2),
+          mnemonic#"\t$BD2", [(operator mode:$BD2)]> {
+  let mayLoad = 1;
+  let AccessBytes = bytes;
 }
 
-// Like CondUnaryRRF, but used for the raw assembly form.  The condition-code
-// mask is the third operand rather than being part of the mnemonic.
-class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
-                      RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, imm32zx4:$R3),
-            mnemonic#"r\t$R1, $R2, $R3", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
-  let Constraints = "$R1 = $R1src";
-  let DisableEncoding = "$R1src";
-  let R4 = 0;
-}
+class SideEffectAddressS<string mnemonic, bits<16> opcode,
+                        SDPatternOperator operator,
+                        AddressingMode mode = bdaddr12only>
+  : InstS<opcode, (outs), (ins mode:$BD2),
+          mnemonic#"\t$BD2", [(operator mode:$BD2)]>;
 
-class AsmCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
-                   Immediate imm>
-  : InstRIEd<opcode, (outs cls:$R1),
-                     (ins cls:$R1src, imm:$I2, imm32zx4:$R3),
-             mnemonic#"\t$R1, $I2, $R3", []>,
-    Requires<[FeatureLoadStoreOnCond2]> {
-  let Constraints = "$R1 = $R1src";
-  let DisableEncoding = "$R1src";
+class LoadAddressRX<string mnemonic, bits<8> opcode,
+                    SDPatternOperator operator, AddressingMode mode>
+  : InstRXa<opcode, (outs GR64:$R1), (ins mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set GR64:$R1, (operator mode:$XBD2))]>;
+
+class LoadAddressRXY<string mnemonic, bits<16> opcode,
+                     SDPatternOperator operator, AddressingMode mode>
+  : InstRXYa<opcode, (outs GR64:$R1), (ins mode:$XBD2),
+             mnemonic#"\t$R1, $XBD2",
+             [(set GR64:$R1, (operator mode:$XBD2))]>;
+
+multiclass LoadAddressRXPair<string mnemonic, bits<8> rxOpcode,
+                             bits<16> rxyOpcode, SDPatternOperator operator> {
+  let DispKey = mnemonic in {
+    let DispSize = "12" in
+      def "" : LoadAddressRX<mnemonic, rxOpcode, operator, laaddr12pair>;
+    let DispSize = "20" in
+      def Y  : LoadAddressRXY<mnemonic#"y", rxyOpcode, operator, laaddr20pair>;
+  }
 }
 
-// Like CondUnaryRRF, but with a fixed CC mask.
-class FixedCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
-                        RegisterOperand cls2, bits<4> ccmask>
-  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
-            mnemonic#"\t$R1, $R2", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
-  let Constraints = "$R1 = $R1src";
-  let DisableEncoding = "$R1src";
-  let R3 = ccmask;
-  let R4 = 0;
+class LoadAddressRIL<string mnemonic, bits<12> opcode,
+                     SDPatternOperator operator>
+  : InstRILb<opcode, (outs GR64:$R1), (ins pcrel32:$RI2),
+             mnemonic#"\t$R1, $RI2",
+             [(set GR64:$R1, (operator pcrel32:$RI2))]>;
+
+class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
+              RegisterOperand cls1, RegisterOperand cls2>
+  : InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2),
+           mnemonic#"\t$R1, $R2",
+           [(set cls1:$R1, (operator cls2:$R2))]> {
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
-class FixedCondUnaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
-                   Immediate imm, bits<4> ccmask>
-  : InstRIEd<opcode, (outs cls:$R1),
-                     (ins cls:$R1src, imm:$I2),
-             mnemonic#"\t$R1, $I2", []>,
-    Requires<[FeatureLoadStoreOnCond2]> {
-  let Constraints = "$R1 = $R1src";
-  let DisableEncoding = "$R1src";
-  let R3 = ccmask;
+class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+               RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRE<opcode, (outs cls1:$R1), (ins cls2:$R2),
+            mnemonic#"\t$R1, $R2",
+            [(set cls1:$R1, (operator cls2:$R2))]> {
+  let OpKey = mnemonic#cls1;
+  let OpType = "reg";
 }
 
 class UnaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
               RegisterOperand cls, Immediate imm>
-  : InstRI<opcode, (outs cls:$R1), (ins imm:$I2),
-           mnemonic#"\t$R1, $I2",
-           [(set cls:$R1, (operator imm:$I2))]>;
+  : InstRIa<opcode, (outs cls:$R1), (ins imm:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(set cls:$R1, (operator imm:$I2))]>;
 
 class UnaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                RegisterOperand cls, Immediate imm>
-  : InstRIL<opcode, (outs cls:$R1), (ins imm:$I2),
-            mnemonic#"\t$R1, $I2",
-            [(set cls:$R1, (operator imm:$I2))]>;
+  : InstRILa<opcode, (outs cls:$R1), (ins imm:$I2),
+             mnemonic#"\t$R1, $I2",
+             [(set cls:$R1, (operator imm:$I2))]>;
 
 class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls>
-  : InstRIL<opcode, (outs cls:$R1), (ins pcrel32:$I2),
-            mnemonic#"\t$R1, $I2",
-            [(set cls:$R1, (operator pcrel32:$I2))]> {
+  : InstRILb<opcode, (outs cls:$R1), (ins pcrel32:$RI2),
+             mnemonic#"\t$R1, $RI2",
+             [(set cls:$R1, (operator pcrel32:$RI2))]> {
   let mayLoad = 1;
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
   // However, BDXs have two extra operands and are therefore 6 units more
@@ -1389,13 +2382,12 @@ class UnaryRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
 class CondUnaryRSY<string mnemonic, bits<16> opcode,
                    SDPatternOperator operator, RegisterOperand cls,
                    bits<5> bytes, AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1),
-            (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
-            mnemonic#"$R3\t$R1, $BD2",
-            [(set cls:$R1,
-                  (z_select_ccmask (load bdaddr20only:$BD2), cls:$R1src,
-                                   cond4:$valid, cond4:$R3))]>,
-    Requires<[FeatureLoadStoreOnCond]> {
+  : InstRSYb<opcode, (outs cls:$R1),
+             (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$M3),
+             mnemonic#"$M3\t$R1, $BD2",
+             [(set cls:$R1,
+                   (z_select_ccmask (operator bdaddr20only:$BD2), cls:$R1src,
+                                    cond4:$valid, cond4:$M3))]> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
   let mayLoad = 1;
@@ -1408,9 +2400,8 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode,
 class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
                       RegisterOperand cls, bits<5> bytes,
                       AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$R3),
-            mnemonic#"\t$R1, $BD2, $R3", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
+  : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $BD2, $M3", []> {
   let mayLoad = 1;
   let AccessBytes = bytes;
   let Constraints = "$R1 = $R1src";
@@ -1418,26 +2409,36 @@ class AsmCondUnaryRSY<string mnemonic, bits<16> opcode,
 }
 
 // Like CondUnaryRSY, but with a fixed CC mask.
-class FixedCondUnaryRSY<string mnemonic, bits<16> opcode,
-                        RegisterOperand cls, bits<4> ccmask, bits<5> bytes,
+class FixedCondUnaryRSY<CondVariant V, string mnemonic, bits<16> opcode,
+                        RegisterOperand cls, bits<5> bytes,
                         AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2),
-            mnemonic#"\t$R1, $BD2", []>,
-    Requires<[FeatureLoadStoreOnCond]> {
+  : InstRSYb<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2),
+             mnemonic#V.suffix#"\t$R1, $BD2", []> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
-  let R3 = ccmask;
   let mayLoad = 1;
   let AccessBytes = bytes;
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
 }
 
+multiclass CondUnaryRSYPair<string mnemonic, bits<16> opcode,
+                            SDPatternOperator operator,
+                            RegisterOperand cls, bits<5> bytes,
+                            AddressingMode mode = bdaddr20only> {
+  let isCodeGenOnly = 1 in
+    def "" : CondUnaryRSY<mnemonic, opcode, operator, cls, bytes, mode>;
+  def Asm : AsmCondUnaryRSY<mnemonic, opcode, cls, bytes, mode>;
+}
+
+
 class UnaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
               RegisterOperand cls, bits<5> bytes,
               AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs cls:$R1), (ins mode:$XBD2),
-           mnemonic#"\t$R1, $XBD2",
-           [(set cls:$R1, (operator mode:$XBD2))]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXa<opcode, (outs cls:$R1), (ins mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set cls:$R1, (operator mode:$XBD2))]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let mayLoad = 1;
   let AccessBytes = bytes;
@@ -1448,7 +2449,7 @@ class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   : InstRXE<opcode, (outs cls:$R1), (ins bdxaddr12only:$XBD2),
             mnemonic#"\t$R1, $XBD2",
             [(set cls:$R1, (operator bdxaddr12only:$XBD2))]> {
-  let OpKey = mnemonic ## cls;
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let mayLoad = 1;
   let AccessBytes = bytes;
@@ -1458,10 +2459,10 @@ class UnaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
 class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                RegisterOperand cls, bits<5> bytes,
                AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs cls:$R1), (ins mode:$XBD2),
-            mnemonic#"\t$R1, $XBD2",
-            [(set cls:$R1, (operator mode:$XBD2))]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXYa<opcode, (outs cls:$R1), (ins mode:$XBD2),
+             mnemonic#"\t$R1, $XBD2",
+             [(set cls:$R1, (operator mode:$XBD2))]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let mayLoad = 1;
   let AccessBytes = bytes;
@@ -1487,6 +2488,10 @@ class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M3 = type;
 }
 
+class UnaryVRIaGeneric<string mnemonic, bits<16> opcode, Immediate imm>
+  : InstVRIa<opcode, (outs VR128:$V1), (ins imm:$I2, imm32zx4:$M3),
+             mnemonic#"\t$V1, $I2, $M3", []>;
+
 class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
                 bits<4> m5 = 0>
@@ -1498,15 +2503,50 @@ class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M5 = m5;
 }
 
-multiclass UnaryVRRaSPair<string mnemonic, bits<16> opcode,
-                          SDPatternOperator operator,
-                          SDPatternOperator operator_cc, TypedReg tr1,
-                          TypedReg tr2, bits<4> type, bits<4> modifier = 0,
-                          bits<4> modifier_cc = 1> {
-  def "" : UnaryVRRa<mnemonic, opcode, operator, tr1, tr2, type, 0, modifier>;
+class UnaryVRRaGeneric<string mnemonic, bits<16> opcode, bits<4> m4 = 0,
+                       bits<4> m5 = 0>
+  : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3),
+             mnemonic#"\t$V1, $V2, $M3", []> {
+  let M4 = m4;
+  let M5 = m5;
+}
+
+class UnaryVRRaFloatGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0>
+  : InstVRRa<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4),
+             mnemonic#"\t$V1, $V2, $M3, $M4", []> {
+  let M5 = m5;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+// The form that does not set CC has an extra operand to optionally allow
+// specifying arbitrary M5 values in assembler.
+multiclass UnaryExtraVRRaSPair<string mnemonic, bits<16> opcode,
+                               SDPatternOperator operator,
+                               SDPatternOperator operator_cc,
+                               TypedReg tr1, TypedReg tr2, bits<4> type> {
+  let M3 = type, M4 = 0 in
+    def "" : InstVRRa<opcode, (outs tr1.op:$V1),
+                      (ins tr2.op:$V2, imm32zx4:$M5),
+                      mnemonic#"\t$V1, $V2, $M5", []>;
+  def : Pat<(tr1.vt (operator (tr2.vt tr2.op:$V2))),
+            (!cast<Instruction>(NAME) tr2.op:$V2, 0)>;
+  def : InstAlias<mnemonic#"\t$V1, $V2",
+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2, 0)>;
   let Defs = [CC] in
-    def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 0,
-                      modifier_cc>;
+    def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2,
+                      type, 0, 1>;
+}
+
+multiclass UnaryExtraVRRaSPairGeneric<string mnemonic, bits<16> opcode> {
+  let M4 = 0 in
+    def "" : InstVRRa<opcode, (outs VR128:$V1),
+                     (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M5),
+                     mnemonic#"\t$V1, $V2, $M3, $M5", []>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $M3",
+                  (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2,
+                                            imm32zx4:$M3, 0)>;
 }
 
 class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -1519,12 +2559,43 @@ class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let AccessBytes = bytes;
 }
 
+class UnaryVRXGeneric<string mnemonic, bits<16> opcode>
+  : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
+            mnemonic#"\t$V1, $XBD2, $M3", []> {
+  let mayLoad = 1;
+}
+
+class SideEffectBinaryRX<string mnemonic, bits<8> opcode,
+                         RegisterOperand cls>
+  : InstRXa<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
+            mnemonic##"\t$R1, $XBD2", []>;
+
+class SideEffectBinaryRILPC<string mnemonic, bits<12> opcode,
+                            RegisterOperand cls>
+  : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
+             mnemonic##"\t$R1, $RI2", []> {
+  // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
+  // However, BDXs have two extra operands and are therefore 6 units more
+  // complex.
+  let AddedComplexity = 7;
+}
+
+class SideEffectBinaryIE<string mnemonic, bits<16> opcode,
+                         Immediate imm1, Immediate imm2>
+  : InstIE<opcode, (outs), (ins imm1:$I1, imm2:$I2),
+           mnemonic#"\t$I1, $I2", []>;
+
+class SideEffectBinarySIL<string mnemonic, bits<16> opcode,
+                          SDPatternOperator operator, Immediate imm>
+  : InstSIL<opcode, (outs), (ins bdaddr12only:$BD1, imm:$I2),
+            mnemonic#"\t$BD1, $I2", [(operator bdaddr12only:$BD1, imm:$I2)]>;
+
 class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls1, RegisterOperand cls2>
   : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
-           mnemonic#"r\t$R1, $R2",
+           mnemonic#"\t$R1, $R2",
            [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
-  let OpKey = mnemonic ## cls1;
+  let OpKey = mnemonic#cls1;
   let OpType = "reg";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -1533,30 +2604,21 @@ class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
 class BinaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls1, RegisterOperand cls2>
   : InstRRE<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
-            mnemonic#"r\t$R1, $R2",
+            mnemonic#"\t$R1, $R2",
             [(set cls1:$R1, (operator cls1:$R1src, cls2:$R2))]> {
-  let OpKey = mnemonic ## cls1;
+  let OpKey = mnemonic#cls1;
   let OpType = "reg";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
 }
 
-class BinaryRRF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-                RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
-            mnemonic#"r\t$R1, $R3, $R2",
-            [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
-  let OpKey = mnemonic ## cls1;
-  let OpType = "reg";
-  let R4 = 0;
-}
-
-class BinaryRRFK<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-                 RegisterOperand cls1, RegisterOperand cls2>
-  : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R2, cls2:$R3),
-            mnemonic#"rk\t$R1, $R2, $R3",
-            [(set cls1:$R1, (operator cls1:$R2, cls2:$R3))]> {
-  let R4 = 0;
+class BinaryRRFa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls1, RegisterOperand cls2,
+                 RegisterOperand cls3>
+  : InstRRFa<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3),
+             mnemonic#"\t$R1, $R2, $R3",
+             [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
+  let M4 = 0;
 }
 
 multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
@@ -1564,7 +2626,7 @@ multiclass BinaryRRAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
                         RegisterOperand cls2> {
   let NumOpsKey = mnemonic in {
     let NumOpsValue = "3" in
-      def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>,
+      def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
               Requires<[FeatureDistinctOps]>;
     let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
       def "" : BinaryRR<mnemonic, opcode1, operator, cls1, cls2>;
@@ -1576,18 +2638,73 @@ multiclass BinaryRREAndK<string mnemonic, bits<16> opcode1, bits<16> opcode2,
                          RegisterOperand cls2> {
   let NumOpsKey = mnemonic in {
     let NumOpsValue = "3" in
-      def K : BinaryRRFK<mnemonic, opcode2, null_frag, cls1, cls2>,
+      def K : BinaryRRFa<mnemonic#"k", opcode2, null_frag, cls1, cls1, cls2>,
               Requires<[FeatureDistinctOps]>;
     let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in
       def "" : BinaryRRE<mnemonic, opcode1, operator, cls1, cls2>;
   }
 }
 
+class BinaryRRFb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 RegisterOperand cls1, RegisterOperand cls2,
+                 RegisterOperand cls3>
+  : InstRRFb<opcode, (outs cls1:$R1), (ins cls2:$R2, cls3:$R3),
+             mnemonic#"\t$R1, $R3, $R2",
+             [(set cls1:$R1, (operator cls2:$R2, cls3:$R3))]> {
+  let M4 = 0;
+}
+
+class BinaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                RegisterOperand cls2>
+  : InstRRFe<opcode, (outs cls1:$R1), (ins imm32zx4:$M3, cls2:$R2),
+             mnemonic#"\t$R1, $M3, $R2", []> {
+  let M4 = 0;
+}
+
+class CondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                   RegisterOperand cls2>
+  : InstRRFc<opcode, (outs cls1:$R1),
+             (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3),
+             mnemonic#"$M3\t$R1, $R2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let CCMaskLast = 1;
+}
+
+// Like CondBinaryRRF, but used for the raw assembly form.  The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                       RegisterOperand cls2>
+  : InstRRFc<opcode, (outs cls1:$R1),
+             (ins cls1:$R1src, cls2:$R2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $R2, $M3", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+// Like CondBinaryRRF, but with a fixed CC mask.
+class FixedCondBinaryRRF<CondVariant V, string mnemonic, bits<16> opcode,
+                         RegisterOperand cls1, RegisterOperand cls2>
+  : InstRRFc<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
+             mnemonic#V.suffix#"\t$R1, $R2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CondBinaryRRFPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls1, RegisterOperand cls2> {
+  let isCodeGenOnly = 1 in
+    def "" : CondBinaryRRF<mnemonic, opcode, cls1, cls2>;
+  def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
+}
+
 class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                RegisterOperand cls, Immediate imm>
-  : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
-           mnemonic#"\t$R1, $I2",
-           [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+  : InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
 }
@@ -1610,20 +2727,61 @@ multiclass BinaryRIAndK<string mnemonic, bits<12> opcode1, bits<16> opcode2,
   }
 }
 
+class CondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                    Immediate imm>
+  : InstRIEg<opcode, (outs cls:$R1),
+             (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
+             mnemonic#"$M3\t$R1, $I2",
+             [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
+                                             cond4:$valid, cond4:$M3))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let CCMaskLast = 1;
+}
+
+// Like CondBinaryRIE, but used for the raw assembly form.  The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRIE<string mnemonic, bits<16> opcode, RegisterOperand cls,
+                       Immediate imm>
+  : InstRIEg<opcode, (outs cls:$R1),
+             (ins cls:$R1src, imm:$I2, imm32zx4:$M3),
+             mnemonic#"\t$R1, $I2, $M3", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+}
+
+// Like CondBinaryRIE, but with a fixed CC mask.
+class FixedCondBinaryRIE<CondVariant V, string mnemonic, bits<16> opcode,
+                         RegisterOperand cls, Immediate imm>
+  : InstRIEg<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+             mnemonic#V.suffix#"\t$R1, $I2", []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let isAsmParserOnly = V.alternate;
+  let M3 = V.ccmask;
+}
+
+multiclass CondBinaryRIEPair<string mnemonic, bits<16> opcode,
+                             RegisterOperand cls, Immediate imm> {
+  let isCodeGenOnly = 1 in
+    def "" : CondBinaryRIE<mnemonic, opcode, cls, imm>;
+  def Asm : AsmCondBinaryRIE<mnemonic, opcode, cls, imm>;
+}
+
 class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                 RegisterOperand cls, Immediate imm>
-  : InstRIL<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
-            mnemonic#"\t$R1, $I2",
-            [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
+  : InstRILa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
+             mnemonic#"\t$R1, $I2",
+             [(set cls:$R1, (operator cls:$R1src, imm:$I2))]> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
 }
 
 class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls>
-  : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
-           mnemonic#"\t$R1, $BD2",
-           [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
+  : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2),
+            mnemonic#"\t$R1, $BD2",
+            [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> {
   let R3 = 0;
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -1631,9 +2789,9 @@ class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
 
 class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls>
-  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
-            mnemonic#"\t$R1, $R3, $BD2",
-            [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
+  : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2),
+             mnemonic#"\t$R1, $R3, $BD2",
+             [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>;
 
 multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
                         SDPatternOperator operator, RegisterOperand cls> {
@@ -1649,10 +2807,10 @@ multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2,
 class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
                AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
-           mnemonic#"\t$R1, $XBD2",
-           [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXa<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -1666,7 +2824,7 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
             mnemonic#"\t$R1, $XBD2",
             [(set cls:$R1, (operator cls:$R1src,
                                      (load bdxaddr12only:$XBD2)))]> {
-  let OpKey = mnemonic ## cls;
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -1678,10 +2836,10 @@ class BinaryRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
 class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
                 AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
-            mnemonic#"\t$R1, $XBD2",
-            [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXYa<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$XBD2),
+             mnemonic#"\t$R1, $XBD2",
+             [(set cls:$R1, (operator cls:$R1src, (load mode:$XBD2)))]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -1731,6 +2889,12 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode,
   }
 }
 
+class BinarySSF<string mnemonic, bits<12> opcode, RegisterOperand cls>
+  : InstSSF<opcode, (outs cls:$R3), (ins bdaddr12pair:$BD1, bdaddr12pair:$BD2),
+            mnemonic#"\t$R3, $BD1, $BD2", []> {
+  let mayLoad = 1;
+}
+
 class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  TypedReg tr, bits<4> type>
   : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
@@ -1739,6 +2903,11 @@ class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
+class BinaryVRIbGeneric<string mnemonic, bits<16> opcode>
+  : InstVRIb<opcode, (outs VR128:$V1),
+             (ins imm32zx8:$I2, imm32zx8:$I3, imm32zx4:$M4),
+             mnemonic#"\t$V1, $I2, $I3, $M4", []>;
+
 class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  TypedReg tr1, TypedReg tr2, bits<4> type>
   : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
@@ -1748,6 +2917,11 @@ class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
+class BinaryVRIcGeneric<string mnemonic, bits<16> opcode>
+  : InstVRIc<opcode, (outs VR128:$V1),
+             (ins VR128:$V3, imm32zx16:$I2, imm32zx4:$M4),
+             mnemonic#"\t$V1, $V3, $I2, $M4", []>;
+
 class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5>
   : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
@@ -1758,13 +2932,26 @@ class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M5 = m5;
 }
 
-class BinaryVRRa<string mnemonic, bits<16> opcode>
-  : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3),
-             mnemonic#"\t$V1, $V2, $M3", []> {
-  let M4 = 0;
-  let M5 = 0;
+class BinaryVRIeFloatGeneric<string mnemonic, bits<16> opcode>
+  : InstVRIe<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, imm32zx12:$I3, imm32zx4:$M4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $I3, $M4, $M5", []>;
+
+class BinaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
+                 TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0>
+  : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $M5",
+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
+                                                 imm32zx12:$M5)))]> {
+  let M3 = type;
+  let M4 = m4;
 }
 
+class BinaryVRRaFloatGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRa<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $M3, $M4, $M5", []>;
+
 class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  TypedReg tr1, TypedReg tr2, bits<4> type = 0,
                  bits<4> modifier = 0>
@@ -1781,12 +2968,47 @@ class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
 multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
                            SDPatternOperator operator,
                            SDPatternOperator operator_cc, TypedReg tr1,
-                           TypedReg tr2, bits<4> type,
-                           bits<4> modifier = 0, bits<4> modifier_cc = 1> {
-  def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, modifier>;
+                           TypedReg tr2, bits<4> type, bits<4> modifier = 0> {
+  def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
+                      !and (modifier, 14)>;
   let Defs = [CC] in
     def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-                       modifier_cc>;
+                       !add (!and (modifier, 14), 1)>;
+}
+
+class BinaryVRRbSPairGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRb<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+// The form that does not set CC has an extra operand to optionally allow
+// specifying arbitrary M5 values in assembler.
+multiclass BinaryExtraVRRbSPair<string mnemonic, bits<16> opcode,
+                                SDPatternOperator operator,
+                                SDPatternOperator operator_cc,
+                                TypedReg tr1, TypedReg tr2, bits<4> type> {
+  let M4 = type in
+    def "" : InstVRRb<opcode, (outs tr1.op:$V1),
+                      (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M5),
+                      mnemonic#"\t$V1, $V2, $V3, $M5", []>;
+  def : Pat<(tr1.vt (operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3))),
+            (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, 0)>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+                                            tr2.op:$V3, 0)>;
+  let Defs = [CC] in
+    def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 1>;
+}
+
+multiclass BinaryExtraVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
+  def "" : InstVRRb<opcode, (outs VR128:$V1),
+                   (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+                   mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
+                  (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+                                            imm32zx4:$M4, 0)>;
 }
 
 class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
@@ -1801,17 +3023,42 @@ class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M6 = m6;
 }
 
+class BinaryVRRcGeneric<string mnemonic, bits<16> opcode, bits<4> m5 = 0,
+                        bits<4> m6 = 0>
+  : InstVRRc<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, VR128:$V3, imm32zx4:$M4),
+             mnemonic#"\t$V1, $V2, $V3, $M4", []> {
+  let M5 = m5;
+  let M6 = m6;
+}
+
+class BinaryVRRcFloatGeneric<string mnemonic, bits<16> opcode, bits<4> m6 = 0>
+  : InstVRRc<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> {
+  let M6 = m6;
+}
+
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
 multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
                            SDPatternOperator operator,
                            SDPatternOperator operator_cc, TypedReg tr1,
                            TypedReg tr2, bits<4> type, bits<4> m5,
-                           bits<4> modifier = 0, bits<4> modifier_cc = 1> {
-  def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, modifier>;
+                           bits<4> modifier = 0> {
+  def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type,
+                      m5, !and (modifier, 14)>;
   let Defs = [CC] in
     def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-                       m5, modifier_cc>;
+                       m5, !add (!and (modifier, 14), 1)>;
 }
 
+class BinaryVRRcSPairFloatGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRc<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5,
+                  imm32zx4:$M6),
+             mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
+
 class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  TypedReg tr>
   : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3),
@@ -1827,6 +3074,11 @@ class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
+class BinaryVRSaGeneric<string mnemonic, bits<16> opcode>
+  : InstVRSa<opcode, (outs VR128:$V1),
+             (ins VR128:$V3, shift12only:$BD2, imm32zx4:$M4),
+             mnemonic#"\t$V1, $V3, $BD2, $M4", []>;
+
 class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  bits<5> bytes>
   : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
@@ -1845,6 +3097,11 @@ class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
+class BinaryVRScGeneric<string mnemonic, bits<16> opcode>
+  : InstVRSc<opcode, (outs GR64:$R1),
+             (ins VR128:$V3, shift12only:$BD2, imm32zx4: $M4),
+             mnemonic#"\t$R1, $V3, $BD2, $M4", []>;
+
 class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                 TypedReg tr, bits<5> bytes>
   : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
@@ -1873,12 +3130,18 @@ class StoreBinaryVRX<string mnemonic, bits<16> opcode,
   let AccessBytes = bytes;
 }
 
+class MemoryBinarySSd<string mnemonic, bits<8> opcode,
+                      RegisterOperand cls>
+  : InstSSd<opcode, (outs),
+            (ins bdraddr12only:$RBD1, bdaddr12only:$BD2, cls:$R3),
+            mnemonic#"\t$RBD1, $BD2, $R3", []>;
+
 class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls1, RegisterOperand cls2>
   : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
-           mnemonic#"r\t$R1, $R2",
+           mnemonic#"\t$R1, $R2",
            [(operator cls1:$R1, cls2:$R2)]> {
-  let OpKey = mnemonic ## cls1;
+  let OpKey = mnemonic#cls1;
   let OpType = "reg";
   let isCompare = 1;
 }
@@ -1886,34 +3149,34 @@ class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
 class CompareRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls1, RegisterOperand cls2>
   : InstRRE<opcode, (outs), (ins cls1:$R1, cls2:$R2),
-            mnemonic#"r\t$R1, $R2",
+            mnemonic#"\t$R1, $R2",
             [(operator cls1:$R1, cls2:$R2)]> {
-  let OpKey = mnemonic ## cls1;
+  let OpKey = mnemonic#cls1;
   let OpType = "reg";
   let isCompare = 1;
 }
 
 class CompareRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                 RegisterOperand cls, Immediate imm>
-  : InstRI<opcode, (outs), (ins cls:$R1, imm:$I2),
-           mnemonic#"\t$R1, $I2",
-           [(operator cls:$R1, imm:$I2)]> {
+  : InstRIa<opcode, (outs), (ins cls:$R1, imm:$I2),
+            mnemonic#"\t$R1, $I2",
+            [(operator cls:$R1, imm:$I2)]> {
   let isCompare = 1;
 }
 
 class CompareRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                  RegisterOperand cls, Immediate imm>
-  : InstRIL<opcode, (outs), (ins cls:$R1, imm:$I2),
-            mnemonic#"\t$R1, $I2",
-            [(operator cls:$R1, imm:$I2)]> {
+  : InstRILa<opcode, (outs), (ins cls:$R1, imm:$I2),
+             mnemonic#"\t$R1, $I2",
+             [(operator cls:$R1, imm:$I2)]> {
   let isCompare = 1;
 }
 
 class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
                    RegisterOperand cls, SDPatternOperator load>
-  : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
-            mnemonic#"\t$R1, $I2",
-            [(operator cls:$R1, (load pcrel32:$I2))]> {
+  : InstRILb<opcode, (outs), (ins cls:$R1, pcrel32:$RI2),
+             mnemonic#"\t$R1, $RI2",
+             [(operator cls:$R1, (load pcrel32:$RI2))]> {
   let isCompare = 1;
   let mayLoad = 1;
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
@@ -1925,10 +3188,10 @@ class CompareRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
 class CompareRX<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
                 AddressingMode mode = bdxaddr12only>
-  : InstRX<opcode, (outs), (ins cls:$R1, mode:$XBD2),
-           mnemonic#"\t$R1, $XBD2",
-           [(operator cls:$R1, (load mode:$XBD2))]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+            mnemonic#"\t$R1, $XBD2",
+            [(operator cls:$R1, (load mode:$XBD2))]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let isCompare = 1;
   let mayLoad = 1;
@@ -1940,7 +3203,7 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
             mnemonic#"\t$R1, $XBD2",
             [(operator cls:$R1, (load bdxaddr12only:$XBD2))]> {
-  let OpKey = mnemonic ## cls;
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let isCompare = 1;
   let mayLoad = 1;
@@ -1951,10 +3214,10 @@ class CompareRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
 class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, SDPatternOperator load, bits<5> bytes,
                  AddressingMode mode = bdxaddr20only>
-  : InstRXY<opcode, (outs), (ins cls:$R1, mode:$XBD2),
-            mnemonic#"\t$R1, $XBD2",
-            [(operator cls:$R1, (load mode:$XBD2))]> {
-  let OpKey = mnemonic ## cls;
+  : InstRXYa<opcode, (outs), (ins cls:$R1, mode:$XBD2),
+             mnemonic#"\t$R1, $XBD2",
+             [(operator cls:$R1, (load mode:$XBD2))]> {
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let isCompare = 1;
   let mayLoad = 1;
@@ -2026,6 +3289,22 @@ class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M5 = 0;
 }
 
+class CompareVRRaGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRa<opcode, (outs), (ins VR128:$V1, VR128:$V2, imm32zx4:$M3),
+             mnemonic#"\t$V1, $V2, $M3", []> {
+  let isCompare = 1;
+  let M4 = 0;
+  let M5 = 0;
+}
+
+class CompareVRRaFloatGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRa<opcode, (outs),
+             (ins VR64:$V1, VR64:$V2, imm32zx4:$M3, imm32zx4:$M4),
+             mnemonic#"\t$V1, $V2, $M3, $M4", []> {
+  let isCompare = 1;
+  let M5 = 0;
+}
+
 class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
               RegisterOperand cls>
   : InstRXE<opcode, (outs), (ins cls:$R1, bdxaddr12only:$XBD2),
@@ -2034,12 +3313,30 @@ class TestRXE<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M3 = 0;
 }
 
+class SideEffectTernaryRRFc<string mnemonic, bits<16> opcode,
+                            RegisterOperand cls1, RegisterOperand cls2,
+                            Immediate imm>
+  : InstRRFc<opcode, (outs), (ins cls1:$R1, cls2:$R2, imm:$M3),
+             mnemonic#"\t$R1, $R2, $M3", []>;
+
+class SideEffectTernarySSF<string mnemonic, bits<12> opcode,
+                           RegisterOperand cls>
+  : InstSSF<opcode, (outs),
+            (ins bdaddr12only:$BD1, bdaddr12only:$BD2, cls:$R3),
+            mnemonic#"\t$BD1, $BD2, $R3", []>;
+
+class TernaryRRFe<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+                  RegisterOperand cls2>
+  : InstRRFe<opcode, (outs cls1:$R1),
+             (ins imm32zx4:$M3, cls2:$R2, imm32zx4:$M4),
+             mnemonic#"\t$R1, $M3, $R2, $M4", []>;
+
 class TernaryRRD<string mnemonic, bits<16> opcode,
                  SDPatternOperator operator, RegisterOperand cls>
   : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
-            mnemonic#"r\t$R1, $R3, $R2",
+            mnemonic#"\t$R1, $R3, $R2",
             [(set cls:$R1, (operator cls:$R1src, cls:$R3, cls:$R2))]> {
-  let OpKey = mnemonic ## cls;
+  let OpKey = mnemonic#cls;
   let OpType = "reg";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -2047,9 +3344,9 @@ class TernaryRRD<string mnemonic, bits<16> opcode,
 
 class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
                 bits<5> bytes, AddressingMode mode = bdaddr12only>
-  : InstRS<opcode, (outs cls:$R1),
-          (ins cls:$R1src, imm32zx4:$R3, mode:$BD2),
-           mnemonic#"\t$R1, $R3, $BD2", []> {
+  : InstRSb<opcode, (outs cls:$R1),
+            (ins cls:$R1src, imm32zx4:$M3, mode:$BD2),
+            mnemonic#"\t$R1, $M3, $BD2", []> {
 
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -2059,9 +3356,9 @@ class TernaryRS<string mnemonic, bits<8> opcode, RegisterOperand cls,
 
 class TernaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls,
                 bits<5> bytes, AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1),
-           (ins cls:$R1src, imm32zx4:$R3, mode:$BD2),
-            mnemonic#"\t$R1, $R3, $BD2", []> {
+  : InstRSYb<opcode, (outs cls:$R1),
+             (ins cls:$R1src, imm32zx4:$M3, mode:$BD2),
+             mnemonic#"\t$R1, $M3, $BD2", []> {
 
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -2086,7 +3383,7 @@ class TernaryRXF<string mnemonic, bits<16> opcode, SDPatternOperator operator,
             mnemonic#"\t$R1, $R3, $XBD2",
             [(set cls:$R1, (operator cls:$R1src, cls:$R3,
                                      (load bdxaddr12only:$XBD2)))]> {
-  let OpKey = mnemonic ## cls;
+  let OpKey = mnemonic#"r"#cls;
   let OpType = "mem";
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
@@ -2127,6 +3424,11 @@ class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M3 = type;
 }
 
+class TernaryVRRaFloatGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRa<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, imm32zx4:$M3, imm32zx4:$M4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $M3, $M4, $M5", []>;
+
 class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   TypedReg tr1, TypedReg tr2, bits<4> type,
                   SDPatternOperator m5mask, bits<4> m5or>
@@ -2140,23 +3442,36 @@ class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
-multiclass TernaryVRRbSPair<string mnemonic, bits<16> opcode,
-                            SDPatternOperator operator,
-                            SDPatternOperator operator_cc, TypedReg tr1,
-                            TypedReg tr2, bits<4> type, bits<4> m5or> {
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M5.
+// Also create aliases to make use of M5 operand optional in assembler.
+multiclass TernaryOptVRRbSPair<string mnemonic, bits<16> opcode,
+                               SDPatternOperator operator,
+                               SDPatternOperator operator_cc,
+                               TypedReg tr1, TypedReg tr2, bits<4> type,
+                               bits<4> modifier = 0> {
   def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
-                       imm32zx4even, !and (m5or, 14)>;
+                       imm32zx4even, !and (modifier, 14)>;
   def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
                   (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
                                             tr2.op:$V3, 0)>;
   let Defs = [CC] in
     def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-                        imm32zx4even, !add(!and (m5or, 14), 1)>;
+                        imm32zx4even, !add(!and (modifier, 14), 1)>;
   def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
                   (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
                                                 tr2.op:$V3, 0)>;
 }
 
+multiclass TernaryOptVRRbSPairGeneric<string mnemonic, bits<16> opcode> {
+  def "" : InstVRRb<opcode, (outs VR128:$V1),
+                   (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+                   mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $M4",
+                  (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+                                            imm32zx4:$M4, 0)>;
+}
+
 class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   TypedReg tr1, TypedReg tr2>
   : InstVRRc<opcode, (outs tr1.op:$V1),
@@ -2181,6 +3496,13 @@ class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M6 = 0;
 }
 
+class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRd<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $V3, $V4, $M5", []> {
+  let M6 = 0;
+}
+
 class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
   : InstVRRe<opcode, (outs tr1.op:$V1),
@@ -2193,6 +3515,11 @@ class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M6 = type;
 }
 
+class TernaryVRReFloatGeneric<string mnemonic, bits<16> opcode>
+  : InstVRRe<opcode, (outs VR128:$V1),
+             (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx4:$M5, imm32zx4:$M6),
+             mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+
 class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type>
   : InstVRSb<opcode, (outs tr1.op:$V1),
@@ -2206,6 +3533,14 @@ class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
   let M4 = type;
 }
 
+class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
+  : InstVRSb<opcode, (outs VR128:$V1),
+             (ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
+             mnemonic#"\t$V1, $R3, $BD2, $M4", []> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+}
+
 class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
                  Immediate index>
   : InstVRV<opcode, (outs VR128:$V1),
@@ -2245,6 +3580,15 @@ class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operato
   let M5 = type;
 }
 
+class QuaternaryVRIdGeneric<string mnemonic, bits<16> opcode>
+  : InstVRId<opcode, (outs VR128:$V1),
+             (ins VR128:$V1src, VR128:$V2, VR128:$V3,
+                  imm32zx8:$I4, imm32zx4:$M5),
+             mnemonic#"\t$V1, $V2, $V3, $I4, $M5", []> {
+  let Constraints = "$V1 = $V1src";
+  let DisableEncoding = "$V1src";
+}
+
 class QuaternaryVRRd<string mnemonic, bits<16> opcode,
                      SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
                      bits<4> type, SDPatternOperator m6mask, bits<4> m6or>
@@ -2259,37 +3603,57 @@ class QuaternaryVRRd<string mnemonic, bits<16> opcode,
   let M5 = type;
 }
 
-multiclass QuaternaryVRRdSPair<string mnemonic, bits<16> opcode,
-                               SDPatternOperator operator,
-                               SDPatternOperator operator_cc, TypedReg tr1,
-                               TypedReg tr2, bits<4> type, bits<4> m6or> {
+// Declare a pair of instructions, one which sets CC and one which doesn't.
+// The CC-setting form ends with "S" and sets the low bit of M6.
+// Also create aliases to make use of M6 operand optional in assembler.
+multiclass QuaternaryOptVRRdSPair<string mnemonic, bits<16> opcode,
+                                  SDPatternOperator operator,
+                                SDPatternOperator operator_cc,
+                                TypedReg tr1, TypedReg tr2, bits<4> type,
+                                bits<4> modifier = 0> {
   def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type,
-                          imm32zx4even, !and (m6or, 14)>;
+                          imm32zx4even, !and (modifier, 14)>;
   def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
                   (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
                                             tr2.op:$V3, tr2.op:$V4, 0)>;
   let Defs = [CC] in
     def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-                           imm32zx4even, !add (!and (m6or, 14), 1)>;
+                           imm32zx4even, !add (!and (modifier, 14), 1)>;
   def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
                   (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
                                                 tr2.op:$V3, tr2.op:$V4, 0)>;
 }
 
+multiclass QuaternaryOptVRRdSPairGeneric<string mnemonic, bits<16> opcode> {
+  def "" : InstVRRd<opcode, (outs VR128:$V1),
+                   (ins VR128:$V2, VR128:$V3, VR128:$V4,
+                        imm32zx4:$M5, imm32zx4:$M6),
+                   mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
+                  (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+                                            VR128:$V4, imm32zx4:$M5, 0)>;
+}
+
+class SideEffectQuaternarySSe<string mnemonic, bits<8> opcode,
+                              RegisterOperand cls>
+  : InstSSe<opcode, (outs),
+            (ins cls:$R1, bdaddr12only:$BD2, cls:$R3, bdaddr12only:$BD4),
+            mnemonic#"\t$R1, $BD2, $R3, $BD4", []>;
+
 class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                   RegisterOperand cls, AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
-            mnemonic#"\t$R1, $R3, $BD2",
-            [(set cls:$R1, (operator mode:$BD2, cls:$R3))]> {
+  : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
+             mnemonic#"\t$R1, $R3, $BD2",
+             [(set cls:$R1, (operator mode:$BD2, cls:$R3))]> {
   let mayLoad = 1;
   let mayStore = 1;
 }
 
 class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
                 RegisterOperand cls, AddressingMode mode = bdaddr12only>
-  : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
-           mnemonic#"\t$R1, $R3, $BD2",
-           [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+  : InstRSa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+            mnemonic#"\t$R1, $R3, $BD2",
+            [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
   let mayLoad = 1;
@@ -2298,9 +3662,9 @@ class CmpSwapRS<string mnemonic, bits<8> opcode, SDPatternOperator operator,
 
 class CmpSwapRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
                  RegisterOperand cls, AddressingMode mode = bdaddr20only>
-  : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
-            mnemonic#"\t$R1, $R3, $BD2",
-            [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
+  : InstRSYa<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, mode:$BD2),
+             mnemonic#"\t$R1, $R3, $BD2",
+             [(set cls:$R1, (operator mode:$BD2, cls:$R1src, cls:$R3))]> {
   let Constraints = "$R1 = $R1src";
   let DisableEncoding = "$R1src";
   let mayLoad = 1;
@@ -2328,21 +3692,31 @@ class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1,
 }
 
 class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator>
-  : InstRXY<opcode, (outs), (ins imm32zx4:$R1, bdxaddr20only:$XBD2),
-            mnemonic##"\t$R1, $XBD2",
-            [(operator imm32zx4:$R1, bdxaddr20only:$XBD2)]>;
+  : InstRXYb<opcode, (outs), (ins imm32zx4:$M1, bdxaddr20only:$XBD2),
+             mnemonic##"\t$M1, $XBD2",
+             [(operator imm32zx4:$M1, bdxaddr20only:$XBD2)]>;
 
 class PrefetchRILPC<string mnemonic, bits<12> opcode,
                     SDPatternOperator operator>
-  : InstRIL<opcode, (outs), (ins imm32zx4:$R1, pcrel32:$I2),
-            mnemonic##"\t$R1, $I2",
-            [(operator imm32zx4:$R1, pcrel32:$I2)]> {
+  : InstRILc<opcode, (outs), (ins imm32zx4:$M1, pcrel32:$RI2),
+             mnemonic##"\t$M1, $RI2",
+             [(operator imm32zx4:$M1, pcrel32:$RI2)]> {
   // We want PC-relative addresses to be tried ahead of BD and BDX addresses.
   // However, BDXs have two extra operands and are therefore 6 units more
   // complex.
   let AddedComplexity = 7;
 }
 
+class BranchPreloadSMI<string mnemonic, bits<8> opcode>
+  : InstSMI<opcode, (outs),
+            (ins imm32zx4:$M1, brtarget16bpp:$RI2, bdxaddr12only:$BD3),
+            mnemonic#"\t$M1, $RI2, $BD3", []>;
+
+class BranchPreloadMII<string mnemonic, bits<8> opcode>
+  : InstMII<opcode, (outs),
+            (ins imm32zx4:$M1, brtarget12bpp:$RI2, brtarget24bpp:$RI3),
+            mnemonic#"\t$M1, $RI2, $RI3", []>;
+
 // A floating-point load-and test operation.  Create both a normal unary
 // operation and one that acts as a comparison against zero.
 // Note that the comparison against zero operation is not available if we
@@ -2371,6 +3745,11 @@ class Pseudo<dag outs, dag ins, list<dag> pattern>
   let isCodeGenOnly = 1;
 }
 
+// Like SideEffectBinarySIL, but expanded later.
+class SideEffectBinarySILPseudo<SDPatternOperator operator, Immediate imm>
+  : Pseudo<(outs), (ins bdaddr12only:$BD1, imm:$I2),
+           [(operator bdaddr12only:$BD1, imm:$I2)]>;
+
 // Like UnaryRI, but expanded after RA depending on the choice of register.
 class UnaryRIPseudo<SDPatternOperator operator, RegisterOperand cls,
                     Immediate imm>
@@ -2383,7 +3762,7 @@ class UnaryRXYPseudo<string key, SDPatternOperator operator,
                      AddressingMode mode = bdxaddr20only>
   : Pseudo<(outs cls:$R1), (ins mode:$XBD2),
            [(set cls:$R1, (operator mode:$XBD2))]> {
-  let OpKey = key ## cls;
+  let OpKey = key#"r"#cls;
   let OpType = "mem";
   let mayLoad = 1;
   let Has20BitOffset = 1;
@@ -2396,7 +3775,7 @@ class UnaryRRPseudo<string key, SDPatternOperator operator,
                     RegisterOperand cls1, RegisterOperand cls2>
   : Pseudo<(outs cls1:$R1), (ins cls2:$R2),
            [(set cls1:$R1, (operator cls2:$R2))]> {
-  let OpKey = key ## cls1;
+  let OpKey = key#cls1;
   let OpType = "reg";
 }
 
@@ -2430,7 +3809,9 @@ multiclass BinaryRIAndKPseudo<string key, SDPatternOperator operator,
 // Like CompareRI, but expanded after RA depending on the choice of register.
 class CompareRIPseudo<SDPatternOperator operator, RegisterOperand cls,
                       Immediate imm>
-  : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]>;
+  : Pseudo<(outs), (ins cls:$R1, imm:$I2), [(operator cls:$R1, imm:$I2)]> {
+  let isCompare = 1;
+}
 
 // Like CompareRXY, but expanded after RA depending on the choice of register.
 class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
@@ -2444,6 +3825,54 @@ class CompareRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
   let AccessBytes = bytes;
 }
 
+// Like CondBinaryRRF, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRRFPseudo<RegisterOperand cls1, RegisterOperand cls2>
+  : Pseudo<(outs cls1:$R1),
+           (ins cls1:$R1src, cls2:$R2, cond4:$valid, cond4:$M3), []> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let CCMaskLast = 1;
+}
+
+// Like CondBinaryRIE, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
+  : Pseudo<(outs cls:$R1),
+           (ins cls:$R1src, imm:$I2, cond4:$valid, cond4:$M3),
+           [(set cls:$R1, (z_select_ccmask imm:$I2, cls:$R1src,
+                                           cond4:$valid, cond4:$M3))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let CCMaskLast = 1;
+}
+
+// Like CondUnaryRSY, but expanded after RA depending on the choice of
+// register.
+class CondUnaryRSYPseudo<SDPatternOperator operator, RegisterOperand cls,
+                         bits<5> bytes, AddressingMode mode = bdaddr20only>
+  : Pseudo<(outs cls:$R1),
+           (ins cls:$R1src, mode:$BD2, cond4:$valid, cond4:$R3),
+           [(set cls:$R1,
+                 (z_select_ccmask (operator mode:$BD2), cls:$R1src,
+                                  cond4:$valid, cond4:$R3))]> {
+  let Constraints = "$R1 = $R1src";
+  let DisableEncoding = "$R1src";
+  let mayLoad = 1;
+  let AccessBytes = bytes;
+  let CCMaskLast = 1;
+}
+
+// Like CondStoreRSY, but expanded after RA depending on the choice of
+// register.
+class CondStoreRSYPseudo<RegisterOperand cls, bits<5> bytes,
+                         AddressingMode mode = bdaddr20only>
+  : Pseudo<(outs), (ins cls:$R1, mode:$BD2, cond4:$valid, cond4:$R3), []> {
+  let mayStore = 1;
+  let AccessBytes = bytes;
+  let CCMaskLast = 1;
+}
+
 // Like StoreRXY, but expanded after RA depending on the choice of register.
 class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls,
                      bits<5> bytes, AddressingMode mode = bdxaddr20only>
@@ -2509,6 +3938,7 @@ class AtomicLoadBinary<SDPatternOperator operator, RegisterOperand cls,
   let mayLoad = 1;
   let mayStore = 1;
   let usesCustomInserter = 1;
+  let hasNoSchedulingInfo = 1;
 }
 
 // Specializations of AtomicLoadWBinary.
@@ -2535,6 +3965,7 @@ class AtomicLoadWBinary<SDPatternOperator operator, dag pat,
   let mayLoad = 1;
   let mayStore = 1;
   let usesCustomInserter = 1;
+  let hasNoSchedulingInfo = 1;
 }
 
 // Specializations of AtomicLoadWBinary.
@@ -2550,10 +3981,10 @@ class AtomicLoadWBinaryImm<SDPatternOperator operator, Immediate imm>
 // another instruction to handle the excess.
 multiclass MemorySS<string mnemonic, bits<8> opcode,
                     SDPatternOperator sequence, SDPatternOperator loop> {
-  def "" : InstSS<opcode, (outs), (ins bdladdr12onlylen8:$BDL1,
-                                       bdaddr12only:$BD2),
-                  mnemonic##"\t$BDL1, $BD2", []>;
-  let usesCustomInserter = 1 in {
+  def "" : InstSSa<opcode, (outs), (ins bdladdr12onlylen8:$BDL1,
+                                        bdaddr12only:$BD2),
+                   mnemonic##"\t$BDL1, $BD2", []>;
+  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in {
     def Sequence : Pseudo<(outs), (ins bdaddr12only:$dest, bdaddr12only:$src,
                                        imm64:$length),
                            [(sequence bdaddr12only:$dest, bdaddr12only:$src,
@@ -2579,7 +4010,7 @@ multiclass StringRRE<string mnemonic, bits<16> opcode,
     let Constraints = "$R1 = $R1src, $R2 = $R2src";
     let DisableEncoding = "$R1src, $R2src";
   }
-  let usesCustomInserter = 1 in
+  let usesCustomInserter = 1, hasNoSchedulingInfo = 1 in
     def Loop : Pseudo<(outs GR64:$end),
                       (ins GR64:$start1, GR64:$start2, GR32:$char),
                       [(set GR64:$end, (operator GR64:$start1, GR64:$start2,
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 4084e93e5acb..3565d5f2c49c 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -88,10 +88,10 @@ void SystemZInstrInfo::splitMove(MachineBasicBlock::iterator MI,
 void SystemZInstrInfo::splitAdjDynAlloc(MachineBasicBlock::iterator MI) const {
   MachineBasicBlock *MBB = MI->getParent();
   MachineFunction &MF = *MBB->getParent();
-  MachineFrameInfo *MFFrame = MF.getFrameInfo();
+  MachineFrameInfo &MFFrame = MF.getFrameInfo();
   MachineOperand &OffsetMO = MI->getOperand(2);
 
-  uint64_t Offset = (MFFrame->getMaxCallFrameSize() +
+  uint64_t Offset = (MFFrame.getMaxCallFrameSize() +
                      SystemZMC::CallFrameSize +
                      OffsetMO.getImm());
   unsigned NewOpcode = getOpcodeForOffset(SystemZ::LA, Offset);
@@ -149,6 +149,37 @@ void SystemZInstrInfo::expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
   MI.setDesc(get(Opcode));
 }
 
+// MI is a load-on-condition pseudo instruction with a single register
+// (source or destination) operand.  Replace it with LowOpcode if the
+// register is a low GR32 and HighOpcode if the register is a high GR32.
+void SystemZInstrInfo::expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
+                                       unsigned HighOpcode) const {
+  unsigned Reg = MI.getOperand(0).getReg();
+  unsigned Opcode = isHighReg(Reg) ? HighOpcode : LowOpcode;
+  MI.setDesc(get(Opcode));
+}
+
+// MI is a load-register-on-condition pseudo instruction.  Replace it with
+// LowOpcode if source and destination are both low GR32s and HighOpcode if
+// source and destination are both high GR32s.
+void SystemZInstrInfo::expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
+                                        unsigned HighOpcode) const {
+  unsigned DestReg = MI.getOperand(0).getReg();
+  unsigned SrcReg = MI.getOperand(2).getReg();
+  bool DestIsHigh = isHighReg(DestReg);
+  bool SrcIsHigh = isHighReg(SrcReg);
+
+  if (!DestIsHigh && !SrcIsHigh)
+    MI.setDesc(get(LowOpcode));
+  else if (DestIsHigh && SrcIsHigh)
+    MI.setDesc(get(HighOpcode));
+
+  // If we were unable to implement the pseudo with a single instruction, we
+  // need to convert it back into a branch sequence.  This cannot be done here
+  // since the caller of expandPostRAPseudo does not handle changes to the CFG
+  // correctly.  This change is defered to the SystemZExpandPseudo pass.
+}
+
 // MI is an RR-style pseudo instruction that zero-extends the low Size bits
 // of one GRX32 into another.  Replace it with LowOpcode if both operands
 // are low registers, otherwise use RISB[LH]G.
@@ -172,7 +203,7 @@ void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
   MachineInstr *Ear1MI = MF.CloneMachineInstr(MI);
   MBB->insert(MI, Ear1MI);
   Ear1MI->setDesc(get(SystemZ::EAR));
-  MachineInstrBuilder(MF, Ear1MI).addImm(0);
+  MachineInstrBuilder(MF, Ear1MI).addReg(SystemZ::A0);
 
   // sllg <reg>, <reg>, 32
   MachineInstr *SllgMI = MF.CloneMachineInstr(MI);
@@ -184,7 +215,7 @@ void SystemZInstrInfo::expandLoadStackGuard(MachineInstr *MI) const {
   MachineInstr *Ear2MI = MF.CloneMachineInstr(MI);
   MBB->insert(MI, Ear2MI);
   Ear2MI->setDesc(get(SystemZ::EAR));
-  MachineInstrBuilder(MF, Ear2MI).addImm(1);
+  MachineInstrBuilder(MF, Ear2MI).addReg(SystemZ::A1);
 
   // lg <reg>, 40(<reg>)
   MI->setDesc(get(SystemZ::LG));
@@ -222,6 +253,36 @@ void SystemZInstrInfo::emitGRX32Move(MachineBasicBlock &MBB,
     .addImm(32 - Size).addImm(128 + 31).addImm(Rotate);
 }
 
+
+MachineInstr *SystemZInstrInfo::commuteInstructionImpl(MachineInstr &MI,
+                                                       bool NewMI,
+                                                       unsigned OpIdx1,
+                                                       unsigned OpIdx2) const {
+  auto cloneIfNew = [NewMI](MachineInstr &MI) -> MachineInstr & {
+    if (NewMI)
+      return *MI.getParent()->getParent()->CloneMachineInstr(&MI);
+    return MI;
+  };
+
+  switch (MI.getOpcode()) {
+  case SystemZ::LOCRMux:
+  case SystemZ::LOCFHR:
+  case SystemZ::LOCR:
+  case SystemZ::LOCGR: {
+    auto &WorkingMI = cloneIfNew(MI);
+    // Invert condition.
+    unsigned CCValid = WorkingMI.getOperand(3).getImm();
+    unsigned CCMask = WorkingMI.getOperand(4).getImm();
+    WorkingMI.getOperand(4).setImm(CCMask ^ CCValid);
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                   OpIdx1, OpIdx2);
+  }
+  default:
+    return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
+  }
+}
+
+
 // If MI is a simple load or store for a frame object, return the register
 // it loads or stores and set FrameIndex to the index of the frame object.
 // Return 0 otherwise.
@@ -252,7 +313,7 @@ bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI,
                                        int &DestFrameIndex,
                                        int &SrcFrameIndex) const {
   // Check for MVC 0(Length,FI1),0(FI2)
-  const MachineFrameInfo *MFI = MI.getParent()->getParent()->getFrameInfo();
+  const MachineFrameInfo &MFI = MI.getParent()->getParent()->getFrameInfo();
   if (MI.getOpcode() != SystemZ::MVC || !MI.getOperand(0).isFI() ||
       MI.getOperand(1).getImm() != 0 || !MI.getOperand(3).isFI() ||
       MI.getOperand(4).getImm() != 0)
@@ -262,8 +323,8 @@ bool SystemZInstrInfo::isStackSlotCopy(const MachineInstr &MI,
   int64_t Length = MI.getOperand(2).getImm();
   unsigned FI1 = MI.getOperand(0).getIndex();
   unsigned FI2 = MI.getOperand(3).getIndex();
-  if (MFI->getObjectSize(FI1) != Length ||
-      MFI->getObjectSize(FI2) != Length)
+  if (MFI.getObjectSize(FI1) != Length ||
+      MFI.getObjectSize(FI2) != Length)
     return false;
 
   DestFrameIndex = FI1;
@@ -363,7 +424,10 @@ bool SystemZInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned SystemZInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                        int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   // Most of the code and comments here are boilerplate.
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
@@ -386,25 +450,27 @@ unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
 }
 
 bool SystemZInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Invalid condition");
   Cond[1].setImm(Cond[1].getImm() ^ Cond[0].getImm());
   return false;
 }
 
-unsigned SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned SystemZInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                         MachineBasicBlock *TBB,
                                         MachineBasicBlock *FBB,
                                         ArrayRef<MachineOperand> Cond,
-                                        const DebugLoc &DL) const {
+                                        const DebugLoc &DL,
+                                        int *BytesAdded) const {
   // In this function we output 32-bit branches, which should always
   // have enough range.  They can be shortened and relaxed by later code
   // in the pipeline, if desired.
 
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "SystemZ branch conditions have one component!");
+  assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
     // Unconditional branch?
@@ -520,30 +586,128 @@ bool SystemZInstrInfo::optimizeCompareInstr(
          removeIPMBasedCompare(Compare, SrcReg, MRI, &RI);
 }
 
-// If Opcode is a move that has a conditional variant, return that variant,
-// otherwise return 0.
-static unsigned getConditionalMove(unsigned Opcode) {
-  switch (Opcode) {
-  case SystemZ::LR:  return SystemZ::LOCR;
-  case SystemZ::LGR: return SystemZ::LOCGR;
-  default:           return 0;
+
+bool SystemZInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
+                                       ArrayRef<MachineOperand> Pred,
+                                       unsigned TrueReg, unsigned FalseReg,
+                                       int &CondCycles, int &TrueCycles,
+                                       int &FalseCycles) const {
+  // Not all subtargets have LOCR instructions.
+  if (!STI.hasLoadStoreOnCond())
+    return false;
+  if (Pred.size() != 2)
+    return false;
+
+  // Check register classes.
+  const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC =
+    RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
+  if (!RC)
+    return false;
+
+  // We have LOCR instructions for 32 and 64 bit general purpose registers.
+  if ((STI.hasLoadStoreOnCond2() &&
+       SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) ||
+      SystemZ::GR32BitRegClass.hasSubClassEq(RC) ||
+      SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
+    CondCycles = 2;
+    TrueCycles = 2;
+    FalseCycles = 2;
+    return true;
   }
+
+  // Can't do anything else.
+  return false;
 }
 
-static unsigned getConditionalLoadImmediate(unsigned Opcode) {
-  switch (Opcode) {
-  case SystemZ::LHI:  return SystemZ::LOCHI;
-  case SystemZ::LGHI: return SystemZ::LOCGHI;
-  default:           return 0;
+void SystemZInstrInfo::insertSelect(MachineBasicBlock &MBB,
+                                    MachineBasicBlock::iterator I,
+                                    const DebugLoc &DL, unsigned DstReg,
+                                    ArrayRef<MachineOperand> Pred,
+                                    unsigned TrueReg,
+                                    unsigned FalseReg) const {
+  MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
+  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
+
+  assert(Pred.size() == 2 && "Invalid condition");
+  unsigned CCValid = Pred[0].getImm();
+  unsigned CCMask = Pred[1].getImm();
+
+  unsigned Opc;
+  if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
+    if (STI.hasLoadStoreOnCond2())
+      Opc = SystemZ::LOCRMux;
+    else {
+      Opc = SystemZ::LOCR;
+      MRI.constrainRegClass(DstReg, &SystemZ::GR32BitRegClass);
+    }
+  } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
+    Opc = SystemZ::LOCGR;
+  else
+    llvm_unreachable("Invalid register class");
+
+  BuildMI(MBB, I, DL, get(Opc), DstReg)
+    .addReg(FalseReg).addReg(TrueReg)
+    .addImm(CCValid).addImm(CCMask);
+}
+
+bool SystemZInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
+                                     unsigned Reg,
+                                     MachineRegisterInfo *MRI) const {
+  unsigned DefOpc = DefMI.getOpcode();
+  if (DefOpc != SystemZ::LHIMux && DefOpc != SystemZ::LHI &&
+      DefOpc != SystemZ::LGHI)
+    return false;
+  if (DefMI.getOperand(0).getReg() != Reg)
+    return false;
+  int32_t ImmVal = (int32_t)DefMI.getOperand(1).getImm();
+
+  unsigned UseOpc = UseMI.getOpcode();
+  unsigned NewUseOpc;
+  unsigned UseIdx;
+  int CommuteIdx = -1;
+  switch (UseOpc) {
+  case SystemZ::LOCRMux:
+    if (!STI.hasLoadStoreOnCond2())
+      return false;
+    NewUseOpc = SystemZ::LOCHIMux;
+    if (UseMI.getOperand(2).getReg() == Reg)
+      UseIdx = 2;
+    else if (UseMI.getOperand(1).getReg() == Reg)
+      UseIdx = 2, CommuteIdx = 1;
+    else
+      return false;
+    break;
+  case SystemZ::LOCGR:
+    if (!STI.hasLoadStoreOnCond2())
+      return false;
+    NewUseOpc = SystemZ::LOCGHI;
+    if (UseMI.getOperand(2).getReg() == Reg)
+      UseIdx = 2;
+    else if (UseMI.getOperand(1).getReg() == Reg)
+      UseIdx = 2, CommuteIdx = 1;
+    else
+      return false;
+    break;
+  default:
+    return false;
   }
+
+  if (CommuteIdx != -1)
+    if (!commuteInstruction(UseMI, false, CommuteIdx, UseIdx))
+      return false;
+
+  bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
+  UseMI.setDesc(get(NewUseOpc));
+  UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
+  if (DeleteDef)
+    DefMI.eraseFromParent();
+
+  return true;
 }
 
 bool SystemZInstrInfo::isPredicable(MachineInstr &MI) const {
   unsigned Opcode = MI.getOpcode();
-  if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode))
-    return true;
-  if (STI.hasLoadStoreOnCond2() && getConditionalLoadImmediate(Opcode))
-    return true;
   if (Opcode == SystemZ::Return ||
       Opcode == SystemZ::Trap ||
       Opcode == SystemZ::CallJG ||
@@ -595,26 +759,6 @@ bool SystemZInstrInfo::PredicateInstruction(
   unsigned CCMask = Pred[1].getImm();
   assert(CCMask > 0 && CCMask < 15 && "Invalid predicate");
   unsigned Opcode = MI.getOpcode();
-  if (STI.hasLoadStoreOnCond()) {
-    if (unsigned CondOpcode = getConditionalMove(Opcode)) {
-      MI.setDesc(get(CondOpcode));
-      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addImm(CCValid)
-          .addImm(CCMask)
-          .addReg(SystemZ::CC, RegState::Implicit);
-      return true;
-    }
-  }
-  if (STI.hasLoadStoreOnCond2()) {
-    if (unsigned CondOpcode = getConditionalLoadImmediate(Opcode)) {
-      MI.setDesc(get(CondOpcode));
-      MachineInstrBuilder(*MI.getParent()->getParent(), MI)
-          .addImm(CCValid)
-          .addImm(CCMask)
-          .addReg(SystemZ::CC, RegState::Implicit);
-      return true;
-    }
-  }
   if (Opcode == SystemZ::Trap) {
     MI.setDesc(get(SystemZ::CondTrap));
     MachineInstrBuilder(*MI.getParent()->getParent(), MI)
@@ -690,6 +834,14 @@ void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     Opcode = SystemZ::VLR64;
   else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
     Opcode = SystemZ::VLR;
+  else if (SystemZ::AR32BitRegClass.contains(DestReg, SrcReg))
+    Opcode = SystemZ::CPYA;
+  else if (SystemZ::AR32BitRegClass.contains(DestReg) &&
+           SystemZ::GR32BitRegClass.contains(SrcReg))
+    Opcode = SystemZ::SAR;
+  else if (SystemZ::GR32BitRegClass.contains(DestReg) &&
+           SystemZ::AR32BitRegClass.contains(SrcReg))
+    Opcode = SystemZ::EAR;
   else
     llvm_unreachable("Impossible reg-to-reg copy");
 
@@ -875,8 +1027,8 @@ MachineInstr *SystemZInstrInfo::foldMemoryOperandImpl(
     MachineBasicBlock::iterator InsertPt, int FrameIndex,
     LiveIntervals *LIS) const {
   const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned Size = MFI->getObjectSize(FrameIndex);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned Size = MFI.getObjectSize(FrameIndex);
   unsigned Opcode = MI.getOpcode();
 
   if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) {
@@ -1077,6 +1229,18 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandRXYPseudo(MI, SystemZ::L, SystemZ::LFH);
     return true;
 
+  case SystemZ::LOCMux:
+    expandLOCPseudo(MI, SystemZ::LOC, SystemZ::LOCFH);
+    return true;
+
+  case SystemZ::LOCHIMux:
+    expandLOCPseudo(MI, SystemZ::LOCHI, SystemZ::LOCHHI);
+    return true;
+
+  case SystemZ::LOCRMux:
+    expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
+    return true;
+
   case SystemZ::STCMux:
     expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
     return true;
@@ -1089,6 +1253,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandRXYPseudo(MI, SystemZ::ST, SystemZ::STFH);
     return true;
 
+  case SystemZ::STOCMux:
+    expandLOCPseudo(MI, SystemZ::STOC, SystemZ::STOCFH);
+    return true;
+
   case SystemZ::LHIMux:
     expandRIPseudo(MI, SystemZ::LHI, SystemZ::IIHF, true);
     return true;
@@ -1153,6 +1321,10 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     expandRIPseudo(MI, SystemZ::AFI, SystemZ::AIH, false);
     return true;
 
+  case SystemZ::CHIMux:
+    expandRIPseudo(MI, SystemZ::CHI, SystemZ::CIH, false);
+    return true;
+
   case SystemZ::CFIMux:
     expandRIPseudo(MI, SystemZ::CFI, SystemZ::CIH, false);
     return true;
@@ -1194,7 +1366,7 @@ bool SystemZInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   }
 }
 
-uint64_t SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
+unsigned SystemZInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   if (MI.getOpcode() == TargetOpcode::INLINEASM) {
     const MachineFunction *MF = MI.getParent()->getParent();
     const char *AsmStr = MI.getOperand(0).getSymbolName();
@@ -1218,6 +1390,7 @@ SystemZInstrInfo::getBranchInfo(const MachineInstr &MI) const {
                              MI.getOperand(1).getImm(), &MI.getOperand(2));
 
   case SystemZ::BRCT:
+  case SystemZ::BRCTH:
     return SystemZII::Branch(SystemZII::BranchCT, SystemZ::CCMASK_ICMP,
                              SystemZ::CCMASK_CMP_NE, &MI.getOperand(2));
 
@@ -1403,6 +1576,14 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
   case SystemZ::CLGFI:
     if (!(MI && isUInt<8>(MI->getOperand(1).getImm())))
       return 0;
+    break;
+  case SystemZ::CL:
+  case SystemZ::CLG:
+    if (!STI.hasMiscellaneousExtensions())
+      return 0;
+    if (!(MI && MI->getOperand(3).getReg() == 0))
+      return 0;
+    break;
   }
   switch (Type) {
   case SystemZII::CompareAndBranch:
@@ -1486,6 +1667,10 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
       return SystemZ::CLFIT;
     case SystemZ::CLGFI:
       return SystemZ::CLGIT;
+    case SystemZ::CL:
+      return SystemZ::CLT;
+    case SystemZ::CLG:
+      return SystemZ::CLGT;
     default:
       return 0;
     }
@@ -1493,6 +1678,25 @@ unsigned SystemZInstrInfo::getFusedCompare(unsigned Opcode,
   return 0;
 }
 
+unsigned SystemZInstrInfo::getLoadAndTrap(unsigned Opcode) const {
+  if (!STI.hasLoadAndTrap())
+    return 0;
+  switch (Opcode) {
+  case SystemZ::L:
+  case SystemZ::LY:
+    return SystemZ::LAT;
+  case SystemZ::LG:
+    return SystemZ::LGAT;
+  case SystemZ::LFH:
+    return SystemZ::LFHAT;
+  case SystemZ::LLGF:
+    return SystemZ::LLGFAT;
+  case SystemZ::LLGT:
+    return SystemZ::LLGTAT;
+  }
+  return 0;
+}
+
 void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
                                      MachineBasicBlock::iterator MBBI,
                                      unsigned Reg, uint64_t Value) const {
@@ -1511,3 +1715,38 @@ void SystemZInstrInfo::loadImmediate(MachineBasicBlock &MBB,
   }
   BuildMI(MBB, MBBI, DL, get(Opcode), Reg).addImm(Value);
 }
+
+bool SystemZInstrInfo::
+areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+                                AliasAnalysis *AA) const {
+
+  if (!MIa.hasOneMemOperand() || !MIb.hasOneMemOperand())
+    return false;
+
+  // If mem-operands show that the same address Value is used by both
+  // instructions, check for non-overlapping offsets and widths. Not
+  // sure if a register based analysis would be an improvement...
+
+  MachineMemOperand *MMOa = *MIa.memoperands_begin();
+  MachineMemOperand *MMOb = *MIb.memoperands_begin();
+  const Value *VALa = MMOa->getValue();
+  const Value *VALb = MMOb->getValue();
+  bool SameVal = (VALa && VALb && (VALa == VALb));
+  if (!SameVal) {
+    const PseudoSourceValue *PSVa = MMOa->getPseudoValue();
+    const PseudoSourceValue *PSVb = MMOb->getPseudoValue();
+    if (PSVa && PSVb && (PSVa == PSVb))
+      SameVal = true;
+  }
+  if (SameVal) {
+    int OffsetA = MMOa->getOffset(), OffsetB = MMOb->getOffset();
+    int WidthA = MMOa->getSize(), WidthB = MMOb->getSize();
+    int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
+    int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
+    int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
+    if (LowOffset + LowWidth <= HighOffset)
+      return true;
+  }
+
+  return false;
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
index 010010b89dc8..794b193a501e 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -142,6 +142,10 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                        unsigned LowOpcodeK, unsigned HighOpcode) const;
   void expandRXYPseudo(MachineInstr &MI, unsigned LowOpcode,
                        unsigned HighOpcode) const;
+  void expandLOCPseudo(MachineInstr &MI, unsigned LowOpcode,
+                       unsigned HighOpcode) const;
+  void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
+                        unsigned HighOpcode) const;
   void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
                         unsigned Size) const;
   void expandLoadStackGuard(MachineInstr *MI) const;
@@ -149,7 +153,23 @@ class SystemZInstrInfo : public SystemZGenInstrInfo {
                      const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
                      unsigned LowLowOpcode, unsigned Size, bool KillSrc) const;
   virtual void anchor();
-  
+
+protected:
+  /// Commutes the operands in the given instruction by changing the operands
+  /// order and/or changing the instruction's opcode and/or the immediate value
+  /// operand.
+  ///
+  /// The arguments 'CommuteOpIdx1' and 'CommuteOpIdx2' specify the operands
+  /// to be commuted.
+  ///
+  /// Do not call this method for a non-commutable instruction or
+  /// non-commutable operands.
+  /// Even though the instruction is commutable, the method may still
+  /// fail to commute the operands, null pointer is returned in such cases.
+  MachineInstr *commuteInstructionImpl(MachineInstr &MI, bool NewMI,
+                                       unsigned CommuteOpIdx1,
+                                       unsigned CommuteOpIdx2) const override;
+
 public:
   explicit SystemZInstrInfo(SystemZSubtarget &STI);
 
@@ -164,15 +184,25 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
   bool analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
                       unsigned &SrcReg2, int &Mask, int &Value) const override;
   bool optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
                             unsigned SrcReg2, int Mask, int Value,
                             const MachineRegisterInfo *MRI) const override;
+  bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
+                       unsigned, unsigned, int&, int&, int&) const override;
+  void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+                    const DebugLoc &DL, unsigned DstReg,
+                    ArrayRef<MachineOperand> Cond, unsigned TrueReg,
+                    unsigned FalseReg) const override;
+  bool FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI, unsigned Reg,
+                     MachineRegisterInfo *MRI) const override;
   bool isPredicable(MachineInstr &MI) const override;
   bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCycles,
                            unsigned ExtraPredCycles,
@@ -212,14 +242,14 @@ public:
       MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
       LiveIntervals *LIS = nullptr) const override;
   bool expandPostRAPseudo(MachineInstr &MBBI) const override;
-  bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
+  bool reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const
     override;
 
   // Return the SystemZRegisterInfo, which this class owns.
   const SystemZRegisterInfo &getRegisterInfo() const { return RI; }
 
   // Return the size in bytes of MI.
-  uint64_t getInstSizeInBytes(const MachineInstr &MI) const;
+  unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
 
   // Return true if MI is a conditional or unconditional branch.
   // When returning true, set Cond to the mask of condition-code
@@ -256,11 +286,23 @@ public:
                            SystemZII::FusedCompareType Type,
                            const MachineInstr *MI = nullptr) const;
 
+  // If Opcode is a LOAD opcode for with an associated LOAD AND TRAP
+  // operation exists, returh the opcode for the latter, otherwise return 0.
+  unsigned getLoadAndTrap(unsigned Opcode) const;
+
   // Emit code before MBBI in MI to move immediate value Value into
   // physical register Reg.
   void loadImmediate(MachineBasicBlock &MBB,
                      MachineBasicBlock::iterator MBBI,
                      unsigned Reg, uint64_t Value) const;
+
+  // Sometimes, it is possible for the target to tell, even without
+  // aliasing information, that two MIs access different memory
+  // addresses. This function returns true if two MIs access different
+  // memory addresses and false otherwise.
+  bool
+  areMemAccessesTriviallyDisjoint(MachineInstr &MIa, MachineInstr &MIb,
+                                  AliasAnalysis *AA = nullptr) const override;
 };
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index c510ca774be3..d63525f29412 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -11,10 +11,12 @@
 // Stack allocation
 //===----------------------------------------------------------------------===//
 
-def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
-                              [(callseq_start timm:$amt)]>;
-def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
-                              [(callseq_end timm:$amt1, timm:$amt2)]>;
+let hasNoSchedulingInfo = 1 in {
+  def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt),
+                                [(callseq_start timm:$amt)]>;
+  def ADJCALLSTACKUP   : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2),
+                                [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
 
 let hasSideEffects = 0 in {
   // Takes as input the value of the stack pointer after a dynamic allocation
@@ -29,348 +31,225 @@ let hasSideEffects = 0 in {
 }
 
 //===----------------------------------------------------------------------===//
-// Control flow instructions
+// Branch instructions
 //===----------------------------------------------------------------------===//
 
-// A return instruction (br %r14).
-let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
-  def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
-
-// A conditional return instruction (bcr <cond>, %r14).
-let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
-  def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
-
-// Fused compare and conditional returns.
-let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
-  def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
-  def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
-  def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
-  def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
-  def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
-  def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
-  def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
-  def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
-}
-
-// Unconditional branches.  R1 is the condition-code mask (all 1s).
-let isBranch = 1, isTerminator = 1, isBarrier = 1, R1 = 15 in {
-  let isIndirectBranch = 1 in
-    def BR : InstRR<0x07, (outs), (ins ADDR64:$R2),
-                    "br\t$R2", [(brind ADDR64:$R2)]>;
-
-  // An assembler extended mnemonic for BRC.
-  def J : InstRI<0xA74, (outs), (ins brtarget16:$I2), "j\t$I2",
-                 [(br bb:$I2)]>;
-
-  // An assembler extended mnemonic for BRCL.  (The extension is "G"
-  // rather than "L" because "JL" is "Jump if Less".)
-  def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2), "jg\t$I2", []>;
-}
+// Conditional branches.
+let isBranch = 1, isTerminator = 1, Uses = [CC] in {
+  // It's easier for LLVM to handle these branches in their raw BRC/BRCL form
+  // with the condition-code mask being the first operand.  It seems friendlier
+  // to use mnemonic forms like JE and JLH when writing out the assembly though.
+  let isCodeGenOnly = 1 in {
+    // An assembler extended mnemonic for BRC.
+    def BRC  : CondBranchRI <"j#",  0xA74, z_br_ccmask>;
+    // An assembler extended mnemonic for BRCL.  (The extension is "G"
+    // rather than "L" because "JL" is "Jump if Less".)
+    def BRCL : CondBranchRIL<"jg#", 0xC04>;
+    let isIndirectBranch = 1 in {
+      def BC  : CondBranchRX<"b#",  0x47>;
+      def BCR : CondBranchRR<"b#r", 0x07>;
+    }
+  }
 
-// FIXME: This trap instruction should be marked as isTerminator, but there is
-// currently a general bug that allows non-terminators to be placed between
-// terminators. Temporarily leave this unmarked until the bug is fixed.
-let isBarrier = 1, hasCtrlDep = 1 in {
-  def Trap : Alias<4, (outs), (ins), [(trap)]>;
-}
+  // Allow using the raw forms directly from the assembler (and occasional
+  // special code generation needs) as well.
+  def BRCAsm  : AsmCondBranchRI <"brc",  0xA74>;
+  def BRCLAsm : AsmCondBranchRIL<"brcl", 0xC04>;
+  let isIndirectBranch = 1 in {
+    def BCAsm  : AsmCondBranchRX<"bc",  0x47>;
+    def BCRAsm : AsmCondBranchRR<"bcr", 0x07>;
+  }
 
-let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in {
-  def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
+  // Define AsmParser extended mnemonics for each general condition-code mask
+  // (integer or floating-point)
+  foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+                "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+    def JAsm#V  : FixedCondBranchRI <CV<V>, "j#",  0xA74>;
+    def JGAsm#V : FixedCondBranchRIL<CV<V>, "jg#", 0xC04>;
+    let isIndirectBranch = 1 in {
+      def BAsm#V  : FixedCondBranchRX <CV<V>, "b#",  0x47>;
+      def BRAsm#V : FixedCondBranchRR <CV<V>, "b#r", 0x07>;
+    }
+  }
 }
 
-// Conditional branches.  It's easier for LLVM to handle these branches
-// in their raw BRC/BRCL form, with the 4-bit condition-code mask being
-// the first operand.  It seems friendlier to use mnemonic forms like
-// JE and JLH when writing out the assembly though.
-let isBranch = 1, isTerminator = 1, Uses = [CC] in {
-  let isCodeGenOnly = 1, CCMaskFirst = 1 in {
-    def BRC : InstRI<0xA74, (outs), (ins cond4:$valid, cond4:$R1,
-                                         brtarget16:$I2), "j$R1\t$I2",
-                     [(z_br_ccmask cond4:$valid, cond4:$R1, bb:$I2)]>;
-    def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1,
-                                           brtarget32:$I2), "jg$R1\t$I2", []>;
-    let isIndirectBranch = 1 in
-      def BCR : InstRR<0x07, (outs), (ins cond4:$valid, cond4:$R1, GR64:$R2),
-                       "b${R1}r\t$R2", []>;
-  }
-  def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2),
-                      "brc\t$R1, $I2", []>;
-  def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2),
-                        "brcl\t$R1, $I2", []>;
+// Unconditional branches.  These are in fact simply variants of the
+// conditional branches with the condition mask set to "always".
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
+  def J  : FixedCondBranchRI <CondAlways, "j",  0xA74, br>;
+  def JG : FixedCondBranchRIL<CondAlways, "jg", 0xC04>;
   let isIndirectBranch = 1 in {
-    def AsmBC : InstRX<0x47, (outs), (ins imm32zx4:$R1, bdxaddr12only:$XBD2),
-                       "bc\t$R1, $XBD2", []>;
-    def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2),
-                        "bcr\t$R1, $R2", []>;
+    def B  : FixedCondBranchRX<CondAlways, "b",  0x47>;
+    def BR : FixedCondBranchRR<CondAlways, "br", 0x07, brind>;
   }
 }
 
-def AsmNop  : InstAlias<"nop\t$XBD", (AsmBC 0, bdxaddr12only:$XBD), 0>;
-def AsmNopR : InstAlias<"nopr\t$R", (AsmBCR 0, GR64:$R), 0>;
+// NOPs.  These are again variants of the conditional branches,
+// with the condition mask set to "never".
+def NOP  : InstAlias<"nop\t$XBD", (BCAsm 0, bdxaddr12only:$XBD), 0>;
+def NOPR : InstAlias<"nopr\t$R", (BCRAsm 0, GR64:$R), 0>;
 
-// Fused compare-and-branch instructions.  As for normal branches,
-// we handle these instructions internally in their raw CRJ-like form,
-// but use assembly macros like CRJE when writing them out.
+// Fused compare-and-branch instructions.
 //
 // These instructions do not use or clobber the condition codes.
-// We nevertheless pretend that they clobber CC, so that we can lower
-// them to separate comparisons and BRCLs if the branch ends up being
-// out of range.
-multiclass CompareBranches<Operand ccmask, string pos1, string pos2> {
-  let isBranch = 1, isTerminator = 1, Defs = [CC] in {
-    def RJ  : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
-                                            brtarget16:$RI4),
-                       "crj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
-    def GRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
-                                            brtarget16:$RI4),
-                       "cgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
-    def IJ  : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
-                                            brtarget16:$RI4),
-                       "cij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
-    def GIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
-                                            brtarget16:$RI4),
-                       "cgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
-    def LRJ  : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
-                                             brtarget16:$RI4),
-                        "clrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
-    def LGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
-                                             brtarget16:$RI4),
-                        "clgrj"##pos1##"\t$R1, $R2"##pos2##", $RI4", []>;
-    def LIJ  : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
-                                             brtarget16:$RI4),
-                        "clij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
-    def LGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
-                                             brtarget16:$RI4),
-                        "clgij"##pos1##"\t$R1, $I2"##pos2##", $RI4", []>;
-    let isIndirectBranch = 1 in {
-      def RB  : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
-                                             bdaddr12only:$BD4),
-                        "crb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
-      def GRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
-                                             bdaddr12only:$BD4),
-                        "cgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
-      def IB  : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2, ccmask:$M3,
-                                             bdaddr12only:$BD4),
-                        "cib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
-      def GIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2, ccmask:$M3,
-                                             bdaddr12only:$BD4),
-                        "cgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
-      def LRB  : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3,
-                                              bdaddr12only:$BD4),
-                         "clrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
-      def LGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3,
-                                              bdaddr12only:$BD4),
-                         "clgrb"##pos1##"\t$R1, $R2"##pos2##", $BD4", []>;
-      def LIB  : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2, ccmask:$M3,
-                                              bdaddr12only:$BD4),
-                         "clib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
-      def LGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2, ccmask:$M3,
-                                              bdaddr12only:$BD4),
-                         "clgib"##pos1##"\t$R1, $I2"##pos2##", $BD4", []>;
-    }
+// We nevertheless pretend that the relative compare-and-branch
+// instructions clobber CC, so that we can lower them to separate
+// comparisons and BRCLs if the branch ends up being out of range.
+let isBranch = 1, isTerminator = 1 in {
+  // As for normal branches, we handle these instructions internally in
+  // their raw CRJ-like form, but use assembly macros like CRJE when writing
+  // them out.  Using the *Pair multiclasses, we also create the raw forms.
+  let Defs = [CC] in {
+    defm CRJ   : CmpBranchRIEbPair<"crj",   0xEC76, GR32>;
+    defm CGRJ  : CmpBranchRIEbPair<"cgrj",  0xEC64, GR64>;
+    defm CIJ   : CmpBranchRIEcPair<"cij",   0xEC7E, GR32, imm32sx8>;
+    defm CGIJ  : CmpBranchRIEcPair<"cgij",  0xEC7C, GR64, imm64sx8>;
+    defm CLRJ  : CmpBranchRIEbPair<"clrj",  0xEC77, GR32>;
+    defm CLGRJ : CmpBranchRIEbPair<"clgrj", 0xEC65, GR64>;
+    defm CLIJ  : CmpBranchRIEcPair<"clij",  0xEC7F, GR32, imm32zx8>;
+    defm CLGIJ : CmpBranchRIEcPair<"clgij", 0xEC7D, GR64, imm64zx8>;
   }
-
-  let isTerminator = 1, hasCtrlDep = 1 in {
-    def RT   : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3),
-                        "crt"##pos1##"\t$R1, $R2"##pos2, []>;
-    def GRT  : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3),
-                        "cgrt"##pos1##"\t$R1, $R2"##pos2, []>;
-    def LRT  : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2, ccmask:$M3),
-                        "clrt"##pos1##"\t$R1, $R2"##pos2, []>;
-    def LGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2, ccmask:$M3),
-                        "clgrt"##pos1##"\t$R1, $R2"##pos2, []>;
-    def IT   : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2, ccmask:$M3),
-                         "cit"##pos1##"\t$R1, $I2"##pos2, []>;
-    def GIT  : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2, ccmask:$M3),
-                         "cgit"##pos1##"\t$R1, $I2"##pos2, []>;
-    def LFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2, ccmask:$M3),
-                         "clfit"##pos1##"\t$R1, $I2"##pos2, []>;
-    def LGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2, ccmask:$M3),
-                         "clgit"##pos1##"\t$R1, $I2"##pos2, []>;
-  }
-}
-let isCodeGenOnly = 1 in
-  defm C : CompareBranches<cond4, "$M3", "">;
-defm AsmC : CompareBranches<imm32zx4, "", ", $M3">;
-
-// Define AsmParser mnemonics for each general condition-code mask
-// (integer or floating-point)
-multiclass CondExtendedMnemonicA<bits<4> ccmask, string name> {
-  let isBranch = 1, isTerminator = 1, R1 = ccmask in {
-    def J : InstRI<0xA74, (outs), (ins brtarget16:$I2),
-                   "j"##name##"\t$I2", []>;
-    def JG : InstRIL<0xC04, (outs), (ins brtarget32:$I2),
-                     "jg"##name##"\t$I2", []>;
-    def BR : InstRR<0x07, (outs), (ins ADDR64:$R2), "b"##name##"r\t$R2", []>;
+  let isIndirectBranch = 1 in {
+    defm CRB   : CmpBranchRRSPair<"crb",   0xECF6, GR32>;
+    defm CGRB  : CmpBranchRRSPair<"cgrb",  0xECE4, GR64>;
+    defm CIB   : CmpBranchRISPair<"cib",   0xECFE, GR32, imm32sx8>;
+    defm CGIB  : CmpBranchRISPair<"cgib",  0xECFC, GR64, imm64sx8>;
+    defm CLRB  : CmpBranchRRSPair<"clrb",  0xECF7, GR32>;
+    defm CLGRB : CmpBranchRRSPair<"clgrb", 0xECE5, GR64>;
+    defm CLIB  : CmpBranchRISPair<"clib",  0xECFF, GR32, imm32zx8>;
+    defm CLGIB : CmpBranchRISPair<"clgib", 0xECFD, GR64, imm64zx8>;
   }
-  def LOCR  : FixedCondUnaryRRF<"locr"##name,  0xB9F2, GR32, GR32, ccmask>;
-  def LOCGR : FixedCondUnaryRRF<"locgr"##name, 0xB9E2, GR64, GR64, ccmask>;
-  def LOCHI : FixedCondUnaryRIE<"lochi"##name,  0xEC42, GR64, imm32sx16,
-                                ccmask>;
-  def LOCGHI: FixedCondUnaryRIE<"locghi"##name, 0xEC46, GR64, imm64sx16,
-                                ccmask>;
-  def LOC   : FixedCondUnaryRSY<"loc"##name,   0xEBF2, GR32, ccmask, 4>;
-  def LOCG  : FixedCondUnaryRSY<"locg"##name,  0xEBE2, GR64, ccmask, 8>;
-  def STOC  : FixedCondStoreRSY<"stoc"##name,  0xEBF3, GR32, ccmask, 4>;
-  def STOCG : FixedCondStoreRSY<"stocg"##name, 0xEBE3, GR64, ccmask, 8>;
-}
-
-multiclass CondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
-  : CondExtendedMnemonicA<ccmask, name1> {
-  let isAsmParserOnly = 1 in
-    defm Alt : CondExtendedMnemonicA<ccmask, name2>;
-}
-
-defm AsmO   : CondExtendedMnemonicA<1,  "o">;
-defm AsmH   : CondExtendedMnemonic<2,  "h", "p">;
-defm AsmNLE : CondExtendedMnemonicA<3,  "nle">;
-defm AsmL   : CondExtendedMnemonic<4,  "l", "m">;
-defm AsmNHE : CondExtendedMnemonicA<5,  "nhe">;
-defm AsmLH  : CondExtendedMnemonicA<6,  "lh">;
-defm AsmNE  : CondExtendedMnemonic<7,  "ne", "nz">;
-defm AsmE   : CondExtendedMnemonic<8,  "e", "z">;
-defm AsmNLH : CondExtendedMnemonicA<9,  "nlh">;
-defm AsmHE  : CondExtendedMnemonicA<10, "he">;
-defm AsmNL  : CondExtendedMnemonic<11, "nl", "nm">;
-defm AsmLE  : CondExtendedMnemonicA<12, "le">;
-defm AsmNH  : CondExtendedMnemonic<13, "nh", "np">;
-defm AsmNO  : CondExtendedMnemonicA<14, "no">;
-
-// Define AsmParser mnemonics for each integer condition-code mask.
-// This is like the list above, except that condition 3 is not possible
-// and that the low bit of the mask is therefore always 0.  This means
-// that each condition has two names.  Conditions "o" and "no" are not used.
-//
-// We don't make one of the two names an alias of the other because
-// we need the custom parsing routines to select the correct register class.
-multiclass IntCondExtendedMnemonicA<bits<4> ccmask, string name> {
-  let isBranch = 1, isTerminator = 1, M3 = ccmask in {
-    def CRJ  : InstRIEb<0xEC76, (outs), (ins GR32:$R1, GR32:$R2,
-                                             brtarget16:$RI4),
-                        "crj"##name##"\t$R1, $R2, $RI4", []>;
-    def CGRJ : InstRIEb<0xEC64, (outs), (ins GR64:$R1, GR64:$R2,
-                                             brtarget16:$RI4),
-                        "cgrj"##name##"\t$R1, $R2, $RI4", []>;
-    def CIJ  : InstRIEc<0xEC7E, (outs), (ins GR32:$R1, imm32sx8:$I2,
-                                             brtarget16:$RI4),
-                        "cij"##name##"\t$R1, $I2, $RI4", []>;
-    def CGIJ : InstRIEc<0xEC7C, (outs), (ins GR64:$R1, imm64sx8:$I2,
-                                             brtarget16:$RI4),
-                        "cgij"##name##"\t$R1, $I2, $RI4", []>;
-    def CLRJ  : InstRIEb<0xEC77, (outs), (ins GR32:$R1, GR32:$R2,
-                                             brtarget16:$RI4),
-                         "clrj"##name##"\t$R1, $R2, $RI4", []>;
-    def CLGRJ : InstRIEb<0xEC65, (outs), (ins GR64:$R1, GR64:$R2,
-                                              brtarget16:$RI4),
-                         "clgrj"##name##"\t$R1, $R2, $RI4", []>;
-    def CLIJ  : InstRIEc<0xEC7F, (outs), (ins GR32:$R1, imm32zx8:$I2,
-                                              brtarget16:$RI4),
-                         "clij"##name##"\t$R1, $I2, $RI4", []>;
-    def CLGIJ : InstRIEc<0xEC7D, (outs), (ins GR64:$R1, imm64zx8:$I2,
-                                              brtarget16:$RI4),
-                         "clgij"##name##"\t$R1, $I2, $RI4", []>;
+
+  // Define AsmParser mnemonics for each integer condition-code mask.
+  foreach V = [ "E", "H", "L", "HE", "LE", "LH",
+                "NE", "NH", "NL", "NHE", "NLE", "NLH" ] in {
+    let Defs = [CC] in {
+      def CRJAsm#V   : FixedCmpBranchRIEb<ICV<V>, "crj",   0xEC76, GR32>;
+      def CGRJAsm#V  : FixedCmpBranchRIEb<ICV<V>, "cgrj",  0xEC64, GR64>;
+      def CIJAsm#V   : FixedCmpBranchRIEc<ICV<V>, "cij",   0xEC7E, GR32,
+                                          imm32sx8>;
+      def CGIJAsm#V  : FixedCmpBranchRIEc<ICV<V>, "cgij",  0xEC7C, GR64,
+                                          imm64sx8>;
+      def CLRJAsm#V  : FixedCmpBranchRIEb<ICV<V>, "clrj",  0xEC77, GR32>;
+      def CLGRJAsm#V : FixedCmpBranchRIEb<ICV<V>, "clgrj", 0xEC65, GR64>;
+      def CLIJAsm#V  : FixedCmpBranchRIEc<ICV<V>, "clij",  0xEC7F, GR32,
+                                          imm32zx8>;
+      def CLGIJAsm#V : FixedCmpBranchRIEc<ICV<V>, "clgij", 0xEC7D, GR64,
+                                          imm64zx8>;
+    }
     let isIndirectBranch = 1 in {
-      def CRB  : InstRRS<0xECF6, (outs), (ins GR32:$R1, GR32:$R2,
-                                              bdaddr12only:$BD4),
-                         "crb"##name##"\t$R1, $R2, $BD4", []>;
-      def CGRB : InstRRS<0xECE4, (outs), (ins GR64:$R1, GR64:$R2,
-                                              bdaddr12only:$BD4),
-                         "cgrb"##name##"\t$R1, $R2, $BD4", []>;
-      def CIB  : InstRIS<0xECFE, (outs), (ins GR32:$R1, imm32sx8:$I2,
-                                              bdaddr12only:$BD4),
-                         "cib"##name##"\t$R1, $I2, $BD4", []>;
-      def CGIB : InstRIS<0xECFC, (outs), (ins GR64:$R1, imm64sx8:$I2,
-                                              bdaddr12only:$BD4),
-                         "cgib"##name##"\t$R1, $I2, $BD4", []>;
-      def CLRB  : InstRRS<0xECF7, (outs), (ins GR32:$R1, GR32:$R2,
-                                              bdaddr12only:$BD4),
-                          "clrb"##name##"\t$R1, $R2, $BD4", []>;
-      def CLGRB : InstRRS<0xECE5, (outs), (ins GR64:$R1, GR64:$R2,
-                                               bdaddr12only:$BD4),
-                          "clgrb"##name##"\t$R1, $R2, $BD4", []>;
-      def CLIB  : InstRIS<0xECFF, (outs), (ins GR32:$R1, imm32zx8:$I2,
-                                               bdaddr12only:$BD4),
-                          "clib"##name##"\t$R1, $I2, $BD4", []>;
-      def CLGIB : InstRIS<0xECFD, (outs), (ins GR64:$R1, imm64zx8:$I2,
-                                               bdaddr12only:$BD4),
-                          "clgib"##name##"\t$R1, $I2, $BD4", []>;
+      def CRBAsm#V   : FixedCmpBranchRRS<ICV<V>, "crb",   0xECF6, GR32>;
+      def CGRBAsm#V  : FixedCmpBranchRRS<ICV<V>, "cgrb",  0xECE4, GR64>;
+      def CIBAsm#V   : FixedCmpBranchRIS<ICV<V>, "cib",   0xECFE, GR32,
+                                         imm32sx8>;
+      def CGIBAsm#V  : FixedCmpBranchRIS<ICV<V>, "cgib",  0xECFC, GR64,
+                                         imm64sx8>;
+      def CLRBAsm#V  : FixedCmpBranchRRS<ICV<V>, "clrb",  0xECF7, GR32>;
+      def CLGRBAsm#V : FixedCmpBranchRRS<ICV<V>, "clgrb", 0xECE5, GR64>;
+      def CLIBAsm#V  : FixedCmpBranchRIS<ICV<V>, "clib",  0xECFF, GR32,
+                                         imm32zx8>;
+      def CLGIBAsm#V : FixedCmpBranchRIS<ICV<V>, "clgib", 0xECFD, GR64,
+                                         imm64zx8>;
     }
   }
+}
 
-  let hasCtrlDep = 1, isTerminator = 1, M3 = ccmask in {
-      def CRT   : InstRRFc<0xB972, (outs), (ins GR32:$R1, GR32:$R2),
-                          "crt"##name##"\t$R1, $R2", []>;
-      def CGRT  : InstRRFc<0xB960, (outs), (ins GR64:$R1, GR64:$R2),
-                          "cgrt"##name##"\t$R1, $R2", []>;
-      def CLRT  : InstRRFc<0xB973, (outs), (ins GR32:$R1, GR32:$R2),
-                          "clrt"##name##"\t$R1, $R2", []>;
-      def CLGRT : InstRRFc<0xB961, (outs), (ins GR64:$R1, GR64:$R2),
-                          "clgrt"##name##"\t$R1, $R2", []>;
-      def CIT   : InstRIEa<0xEC72, (outs), (ins GR32:$R1, imm32sx16:$I2),
-                           "cit"##name##"\t$R1, $I2", []>;
-      def CGIT  : InstRIEa<0xEC70, (outs), (ins GR64:$R1, imm32sx16:$I2),
-                           "cgit"##name##"\t$R1, $I2", []>;
-      def CLFIT : InstRIEa<0xEC73, (outs), (ins GR32:$R1, imm32zx16:$I2),
-                           "clfit"##name##"\t$R1, $I2", []>;
-      def CLGIT : InstRIEa<0xEC71, (outs), (ins GR64:$R1, imm32zx16:$I2),
-                           "clgit"##name##"\t$R1, $I2", []>;
+// Decrement a register and branch if it is nonzero.  These don't clobber CC,
+// but we might need to split long relative branches into sequences that do.
+let isBranch = 1, isTerminator = 1 in {
+  let Defs = [CC] in {
+    def BRCT  : BranchUnaryRI<"brct",  0xA76, GR32>;
+    def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>;
   }
+  // This doesn't need to clobber CC since we never need to split it.
+  def BRCTH : BranchUnaryRIL<"brcth", 0xCC6, GRH32>,
+              Requires<[FeatureHighWord]>;
+
+  def BCT   : BranchUnaryRX<"bct",  0x46,GR32>;
+  def BCTR  : BranchUnaryRR<"bctr", 0x06, GR32>;
+  def BCTG  : BranchUnaryRXY<"bctg",  0xE346, GR64>;
+  def BCTGR : BranchUnaryRRE<"bctgr", 0xB946, GR64>;
 }
-multiclass IntCondExtendedMnemonic<bits<4> ccmask, string name1, string name2>
-  : IntCondExtendedMnemonicA<ccmask, name1> {
-  let isAsmParserOnly = 1 in
-    defm Alt : IntCondExtendedMnemonicA<ccmask, name2>;
-}
-defm AsmJH   : IntCondExtendedMnemonic<2,  "h",  "nle">;
-defm AsmJL   : IntCondExtendedMnemonic<4,  "l",  "nhe">;
-defm AsmJLH  : IntCondExtendedMnemonic<6,  "lh", "ne">;
-defm AsmJE   : IntCondExtendedMnemonic<8,  "e",  "nlh">;
-defm AsmJHE  : IntCondExtendedMnemonic<10, "he", "nl">;
-defm AsmJLE  : IntCondExtendedMnemonic<12, "le", "nh">;
 
-// Decrement a register and branch if it is nonzero.  These don't clobber CC,
-// but we might need to split long branches into sequences that do.
-let Defs = [CC] in {
-  def BRCT  : BranchUnaryRI<"brct",  0xA76, GR32>;
-  def BRCTG : BranchUnaryRI<"brctg", 0xA77, GR64>;
+let isBranch = 1, isTerminator = 1 in {
+  let Defs = [CC] in {
+    def BRXH  : BranchBinaryRSI<"brxh",  0x84, GR32>;
+    def BRXLE : BranchBinaryRSI<"brxle", 0x85, GR32>;
+    def BRXHG : BranchBinaryRIEe<"brxhg", 0xEC44, GR64>;
+    def BRXLG : BranchBinaryRIEe<"brxlg", 0xEC45, GR64>;
+  }
+  def BXH   : BranchBinaryRS<"bxh",  0x86, GR32>;
+  def BXLE  : BranchBinaryRS<"bxle", 0x87, GR32>;
+  def BXHG  : BranchBinaryRSY<"bxhg",  0xEB44, GR64>;
+  def BXLEG : BranchBinaryRSY<"bxleg", 0xEB45, GR64>;
 }
 
 //===----------------------------------------------------------------------===//
-// Select instructions
+// Trap instructions
 //===----------------------------------------------------------------------===//
 
-def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
-def Select32    : SelectWrapper<GR32>;
-def Select64    : SelectWrapper<GR64>;
+// Unconditional trap.
+// FIXME: This trap instruction should be marked as isTerminator, but there is
+// currently a general bug that allows non-terminators to be placed between
+// terminators. Temporarily leave this unmarked until the bug is fixed.
+let isBarrier = 1, hasCtrlDep = 1 in
+  def Trap : Alias<4, (outs), (ins), [(trap)]>;
 
-// We don't define 32-bit Mux stores because the low-only STOC should
-// always be used if possible.
-defm CondStore8Mux  : CondStores<GRX32, nonvolatile_truncstorei8,
-                                 nonvolatile_anyextloadi8, bdxaddr20only>,
-                      Requires<[FeatureHighWord]>;
-defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
-                                 nonvolatile_anyextloadi16, bdxaddr20only>,
-                      Requires<[FeatureHighWord]>;
-defm CondStore8     : CondStores<GR32, nonvolatile_truncstorei8,
-                                 nonvolatile_anyextloadi8, bdxaddr20only>;
-defm CondStore16    : CondStores<GR32, nonvolatile_truncstorei16,
-                                 nonvolatile_anyextloadi16, bdxaddr20only>;
-defm CondStore32    : CondStores<GR32, nonvolatile_store,
-                                 nonvolatile_load, bdxaddr20only>;
+// Conditional trap.
+let isTerminator = 1, hasCtrlDep = 1, Uses = [CC] in
+  def CondTrap : Alias<4, (outs), (ins cond4:$valid, cond4:$R1), []>;
 
-defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
-                    nonvolatile_anyextloadi8, bdxaddr20only>;
-defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
-                    nonvolatile_anyextloadi16, bdxaddr20only>;
-defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
-                    nonvolatile_anyextloadi32, bdxaddr20only>;
-defm CondStore64 : CondStores<GR64, nonvolatile_store,
-                              nonvolatile_load, bdxaddr20only>;
+// Fused compare-and-trap instructions.
+let isTerminator = 1, hasCtrlDep = 1 in {
+  // These patterns work the same way as for compare-and-branch.
+  defm CRT   : CmpBranchRRFcPair<"crt",   0xB972, GR32>;
+  defm CGRT  : CmpBranchRRFcPair<"cgrt",  0xB960, GR64>;
+  defm CLRT  : CmpBranchRRFcPair<"clrt",  0xB973, GR32>;
+  defm CLGRT : CmpBranchRRFcPair<"clgrt", 0xB961, GR64>;
+  defm CIT   : CmpBranchRIEaPair<"cit",   0xEC72, GR32, imm32sx16>;
+  defm CGIT  : CmpBranchRIEaPair<"cgit",  0xEC70, GR64, imm64sx16>;
+  defm CLFIT : CmpBranchRIEaPair<"clfit", 0xEC73, GR32, imm32zx16>;
+  defm CLGIT : CmpBranchRIEaPair<"clgit", 0xEC71, GR64, imm64zx16>;
+  let Predicates = [FeatureMiscellaneousExtensions] in {
+    defm CLT  : CmpBranchRSYbPair<"clt",  0xEB23, GR32>;
+    defm CLGT : CmpBranchRSYbPair<"clgt", 0xEB2B, GR64>;
+  }
+
+  foreach V = [ "E", "H", "L", "HE", "LE", "LH",
+                "NE", "NH", "NL", "NHE", "NLE", "NLH" ] in {
+    def CRTAsm#V   : FixedCmpBranchRRFc<ICV<V>, "crt",   0xB972, GR32>;
+    def CGRTAsm#V  : FixedCmpBranchRRFc<ICV<V>, "cgrt",  0xB960, GR64>;
+    def CLRTAsm#V  : FixedCmpBranchRRFc<ICV<V>, "clrt",  0xB973, GR32>;
+    def CLGRTAsm#V : FixedCmpBranchRRFc<ICV<V>, "clgrt", 0xB961, GR64>;
+    def CITAsm#V   : FixedCmpBranchRIEa<ICV<V>, "cit",   0xEC72, GR32,
+                                         imm32sx16>;
+    def CGITAsm#V  : FixedCmpBranchRIEa<ICV<V>, "cgit",  0xEC70, GR64,
+                                         imm64sx16>;
+    def CLFITAsm#V : FixedCmpBranchRIEa<ICV<V>, "clfit", 0xEC73, GR32,
+                                         imm32zx16>;
+    def CLGITAsm#V : FixedCmpBranchRIEa<ICV<V>, "clgit", 0xEC71, GR64,
+                                         imm64zx16>;
+    let Predicates = [FeatureMiscellaneousExtensions] in {
+      def CLTAsm#V  : FixedCmpBranchRSYb<ICV<V>, "clt",  0xEB23, GR32>;
+      def CLGTAsm#V : FixedCmpBranchRSYb<ICV<V>, "clgt", 0xEB2B, GR64>;
+    }
+  }
+}
 
 //===----------------------------------------------------------------------===//
-// Call instructions
+// Call and return instructions
 //===----------------------------------------------------------------------===//
 
+// Define the general form of the call instructions for the asm parser.
+// These instructions don't hard-code %r14 as the return address register.
+let isCall = 1, Defs = [CC] in {
+  def BRAS  : CallRI <"bras", 0xA75>;
+  def BRASL : CallRIL<"brasl", 0xC05>;
+  def BAS   : CallRX <"bas", 0x4D>;
+  def BASR  : CallRR <"basr", 0x0D>;
+}
+
+// Regular calls.
 let isCall = 1, Defs = [R14D, CC] in {
   def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops),
                         [(z_call pcrel32:$I2)]>;
@@ -378,6 +257,15 @@ let isCall = 1, Defs = [R14D, CC] in {
                         [(z_call ADDR64:$R2)]>;
 }
 
+// TLS calls.  These will be lowered into a call to __tls_get_offset,
+// with an extra relocation specifying the TLS symbol.
+let isCall = 1, Defs = [R14D, CC] in {
+  def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+                         [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
+  def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
+                         [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
+}
+
 // Sibling calls.  Indirect sibling calls must be via R1, since R2 upwards
 // are argument registers and since branching to R0 is a no-op.
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
@@ -387,10 +275,10 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in {
     def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
 }
 
+// Conditional sibling calls.
 let CCMaskFirst = 1, isCall = 1, isTerminator = 1, isReturn = 1 in {
   def CallBRCL : Alias<6, (outs), (ins cond4:$valid, cond4:$R1,
                                    pcrel32:$I2), []>;
-
   let Uses = [R1D] in
     def CallBCR : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
 }
@@ -407,60 +295,76 @@ let isCall = 1, isTerminator = 1, isReturn = 1, Uses = [R1D] in {
   def CLGIBCall : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
 }
 
-// TLS calls.  These will be lowered into a call to __tls_get_offset,
-// with an extra relocation specifying the TLS symbol.
-let isCall = 1, Defs = [R14D, CC] in {
-  def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
-                         [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
-  def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
-                         [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
-}
+// A return instruction (br %r14).
+let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in
+  def Return : Alias<2, (outs), (ins), [(z_retflag)]>;
 
-// Define the general form of the call instructions for the asm parser.
-// These instructions don't hard-code %r14 as the return address register.
-// Allow an optional TLS marker symbol to generate TLS call relocations.
-let isCall = 1, Defs = [CC] in {
-  def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
-                     "bras\t$R1, $I2", []>;
-  def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
-                      "brasl\t$R1, $I2", []>;
-  def BASR  : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
-                     "basr\t$R1, $R2", []>;
+// A conditional return instruction (bcr <cond>, %r14).
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1, CCMaskFirst = 1, Uses = [CC] in
+  def CondReturn : Alias<2, (outs), (ins cond4:$valid, cond4:$R1), []>;
+
+// Fused compare and conditional returns.
+let isReturn = 1, isTerminator = 1, hasCtrlDep = 1 in {
+  def CRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32sx8:$I2, cond4:$M3), []>;
+  def CGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64sx8:$I2, cond4:$M3), []>;
+  def CLRBReturn : Alias<6, (outs), (ins GR32:$R1, GR32:$R2, cond4:$M3), []>;
+  def CLGRBReturn : Alias<6, (outs), (ins GR64:$R1, GR64:$R2, cond4:$M3), []>;
+  def CLIBReturn : Alias<6, (outs), (ins GR32:$R1, imm32zx8:$I2, cond4:$M3), []>;
+  def CLGIBReturn : Alias<6, (outs), (ins GR64:$R1, imm64zx8:$I2, cond4:$M3), []>;
 }
 
 //===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+def Select32Mux : SelectWrapper<GRX32>, Requires<[FeatureHighWord]>;
+def Select32    : SelectWrapper<GR32>;
+def Select64    : SelectWrapper<GR64>;
+
+// We don't define 32-bit Mux stores if we don't have STOCFH, because the
+// low-only STOC should then always be used if possible.
+defm CondStore8Mux  : CondStores<GRX32, nonvolatile_truncstorei8,
+                                 nonvolatile_anyextloadi8, bdxaddr20only>,
+                      Requires<[FeatureHighWord]>;
+defm CondStore16Mux : CondStores<GRX32, nonvolatile_truncstorei16,
+                                 nonvolatile_anyextloadi16, bdxaddr20only>,
+                      Requires<[FeatureHighWord]>;
+defm CondStore32Mux : CondStores<GRX32, nonvolatile_store,
+                                 nonvolatile_load, bdxaddr20only>,
+                      Requires<[FeatureLoadStoreOnCond2]>;
+defm CondStore8     : CondStores<GR32, nonvolatile_truncstorei8,
+                                 nonvolatile_anyextloadi8, bdxaddr20only>;
+defm CondStore16    : CondStores<GR32, nonvolatile_truncstorei16,
+                                 nonvolatile_anyextloadi16, bdxaddr20only>;
+defm CondStore32    : CondStores<GR32, nonvolatile_store,
+                                 nonvolatile_load, bdxaddr20only>;
+
+defm : CondStores64<CondStore8, CondStore8Inv, nonvolatile_truncstorei8,
+                    nonvolatile_anyextloadi8, bdxaddr20only>;
+defm : CondStores64<CondStore16, CondStore16Inv, nonvolatile_truncstorei16,
+                    nonvolatile_anyextloadi16, bdxaddr20only>;
+defm : CondStores64<CondStore32, CondStore32Inv, nonvolatile_truncstorei32,
+                    nonvolatile_anyextloadi32, bdxaddr20only>;
+defm CondStore64 : CondStores<GR64, nonvolatile_store,
+                              nonvolatile_load, bdxaddr20only>;
+
+//===----------------------------------------------------------------------===//
 // Move instructions
 //===----------------------------------------------------------------------===//
 
 // Register moves.
 let hasSideEffects = 0 in {
   // Expands to LR, RISBHG or RISBLG, depending on the choice of registers.
-  def LRMux : UnaryRRPseudo<"l", null_frag, GRX32, GRX32>,
+  def LRMux : UnaryRRPseudo<"lr", null_frag, GRX32, GRX32>,
               Requires<[FeatureHighWord]>;
-  def LR  : UnaryRR <"l",  0x18,   null_frag, GR32, GR32>;
-  def LGR : UnaryRRE<"lg", 0xB904, null_frag, GR64, GR64>;
+  def LR  : UnaryRR <"lr",  0x18,   null_frag, GR32, GR32>;
+  def LGR : UnaryRRE<"lgr", 0xB904, null_frag, GR64, GR64>;
 }
 let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in {
-  def LTR  : UnaryRR <"lt",  0x12,   null_frag, GR32, GR32>;
-  def LTGR : UnaryRRE<"ltg", 0xB902, null_frag, GR64, GR64>;
-}
-
-// Move on condition.
-let isCodeGenOnly = 1, Uses = [CC] in {
-  def LOCR  : CondUnaryRRF<"loc",  0xB9F2, GR32, GR32>;
-  def LOCGR : CondUnaryRRF<"locg", 0xB9E2, GR64, GR64>;
-}
-let Uses = [CC] in {
-  def AsmLOCR  : AsmCondUnaryRRF<"loc",  0xB9F2, GR32, GR32>;
-  def AsmLOCGR : AsmCondUnaryRRF<"locg", 0xB9E2, GR64, GR64>;
-}
-let isCodeGenOnly = 1, Uses = [CC] in {
-  def LOCHI  : CondUnaryRIE<"lochi",  0xEC42, GR32, imm32sx16>;
-  def LOCGHI : CondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>;
-}
-let Uses = [CC] in {
-  def AsmLOCHI  : AsmCondUnaryRIE<"lochi",  0xEC42, GR32, imm32sx16>;
-  def AsmLOCGHI : AsmCondUnaryRIE<"locghi", 0xEC46, GR64, imm64sx16>;
+  def LTR  : UnaryRR <"ltr",  0x12,   null_frag, GR32, GR32>;
+  def LTGR : UnaryRRE<"ltgr", 0xB902, null_frag, GR64, GR64>;
 }
 
 // Immediate moves.
@@ -512,14 +416,21 @@ let canFoldAsLoad = 1 in {
   def LGRL : UnaryRILPC<"lgrl", 0xC48, aligned_load, GR64>;
 }
 
-// Load on condition.
-let isCodeGenOnly = 1, Uses = [CC] in {
-  def LOC  : CondUnaryRSY<"loc",  0xEBF2, nonvolatile_load, GR32, 4>;
-  def LOCG : CondUnaryRSY<"locg", 0xEBE2, nonvolatile_load, GR64, 8>;
+// Load and zero rightmost byte.
+let Predicates = [FeatureLoadAndZeroRightmostByte] in {
+  def LZRF : UnaryRXY<"lzrf", 0xE33B, null_frag, GR32, 4>;
+  def LZRG : UnaryRXY<"lzrg", 0xE32A, null_frag, GR64, 8>;
+  def : Pat<(and (i32 (load bdxaddr20only:$src)), 0xffffff00),
+            (LZRF bdxaddr20only:$src)>;
+  def : Pat<(and (i64 (load bdxaddr20only:$src)), 0xffffffffffffff00),
+            (LZRG bdxaddr20only:$src)>;
 }
-let Uses = [CC] in {
-  def AsmLOC  : AsmCondUnaryRSY<"loc",  0xEBF2, GR32, 4>;
-  def AsmLOCG : AsmCondUnaryRSY<"locg", 0xEBE2, GR64, 8>;
+
+// Load and trap.
+let Predicates = [FeatureLoadAndTrap] in {
+  def LAT   : UnaryRXY<"lat",   0xE39F, null_frag, GR32, 4>;
+  def LFHAT : UnaryRXY<"lfhat", 0xE3C8, null_frag, GRH32, 4>;
+  def LGAT  : UnaryRXY<"lgat",  0xE385, null_frag, GR64, 8>;
 }
 
 // Register stores.
@@ -542,16 +453,6 @@ let SimpleBDXStore = 1 in {
 def STRL  : StoreRILPC<"strl", 0xC4F, aligned_store, GR32>;
 def STGRL : StoreRILPC<"stgrl", 0xC4B, aligned_store, GR64>;
 
-// Store on condition.
-let isCodeGenOnly = 1, Uses = [CC] in {
-  def STOC  : CondStoreRSY<"stoc",  0xEBF3, GR32, 4>;
-  def STOCG : CondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
-}
-let Uses = [CC] in {
-  def AsmSTOC  : AsmCondStoreRSY<"stoc",  0xEBF3, GR32, 4>;
-  def AsmSTOCG : AsmCondStoreRSY<"stocg", 0xEBE3, GR64, 8>;
-}
-
 // 8-bit immediate stores to 8-bit fields.
 defm MVI : StoreSIPair<"mvi", 0x92, 0xEB52, truncstorei8, imm32zx8trunc>;
 
@@ -569,6 +470,82 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
   defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
 
 //===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
+  // Load immediate on condition.  Matched via DAG pattern and created
+  // by the PeepholeOptimizer via FoldImmediate.
+  let hasSideEffects = 0 in {
+    // Expands to LOCHI or LOCHHI, depending on the choice of register.
+    def LOCHIMux : CondBinaryRIEPseudo<GRX32, imm32sx16>;
+    defm LOCHHI  : CondBinaryRIEPair<"lochhi", 0xEC4E, GRH32, imm32sx16>;
+    defm LOCHI   : CondBinaryRIEPair<"lochi",  0xEC42, GR32, imm32sx16>;
+    defm LOCGHI  : CondBinaryRIEPair<"locghi", 0xEC46, GR64, imm64sx16>;
+  }
+
+  // Move register on condition.  Expanded from Select* pseudos and
+  // created by early if-conversion.
+  let hasSideEffects = 0, isCommutable = 1 in {
+    // Expands to LOCR or LOCFHR or a branch-and-move sequence,
+    // depending on the choice of registers.
+    def LOCRMux : CondBinaryRRFPseudo<GRX32, GRX32>;
+    defm LOCFHR : CondBinaryRRFPair<"locfhr", 0xB9E0, GRH32, GRH32>;
+  }
+
+  // Load on condition.  Matched via DAG pattern.
+  // Expands to LOC or LOCFH, depending on the choice of register.
+  def LOCMux : CondUnaryRSYPseudo<nonvolatile_load, GRX32, 4>;
+  defm LOCFH : CondUnaryRSYPair<"locfh", 0xEBE0, nonvolatile_load, GRH32, 4>;
+
+  // Store on condition.  Expanded from CondStore* pseudos.
+  // Expands to STOC or STOCFH, depending on the choice of register.
+  def STOCMux : CondStoreRSYPseudo<GRX32, 4>;
+  defm STOCFH : CondStoreRSYPair<"stocfh", 0xEBE1, GRH32, 4>;
+
+  // Define AsmParser extended mnemonics for each general condition-code mask.
+  foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+                "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+    def LOCHIAsm#V  : FixedCondBinaryRIE<CV<V>, "lochi",  0xEC42, GR32,
+                                         imm32sx16>;
+    def LOCGHIAsm#V : FixedCondBinaryRIE<CV<V>, "locghi", 0xEC46, GR64,
+                                         imm64sx16>;
+    def LOCHHIAsm#V : FixedCondBinaryRIE<CV<V>, "lochhi", 0xEC4E, GRH32,
+                                         imm32sx16>;
+    def LOCFHRAsm#V : FixedCondBinaryRRF<CV<V>, "locfhr", 0xB9E0, GRH32, GRH32>;
+    def LOCFHAsm#V  : FixedCondUnaryRSY<CV<V>, "locfh",  0xEBE0, GRH32, 4>;
+    def STOCFHAsm#V : FixedCondStoreRSY<CV<V>, "stocfh", 0xEBE1, GRH32, 4>;
+  }
+}
+
+let Predicates = [FeatureLoadStoreOnCond], Uses = [CC] in {
+  // Move register on condition.  Expanded from Select* pseudos and
+  // created by early if-conversion.
+  let hasSideEffects = 0, isCommutable = 1 in {
+    defm LOCR  : CondBinaryRRFPair<"locr",  0xB9F2, GR32, GR32>;
+    defm LOCGR : CondBinaryRRFPair<"locgr", 0xB9E2, GR64, GR64>;
+  }
+
+  // Load on condition.  Matched via DAG pattern.
+  defm LOC  : CondUnaryRSYPair<"loc",  0xEBF2, nonvolatile_load, GR32, 4>;
+  defm LOCG : CondUnaryRSYPair<"locg", 0xEBE2, nonvolatile_load, GR64, 8>;
+
+  // Store on condition.  Expanded from CondStore* pseudos.
+  defm STOC  : CondStoreRSYPair<"stoc",  0xEBF3, GR32, 4>;
+  defm STOCG : CondStoreRSYPair<"stocg", 0xEBE3, GR64, 8>;
+
+  // Define AsmParser extended mnemonics for each general condition-code mask.
+  foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+                "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+    def LOCRAsm#V   : FixedCondBinaryRRF<CV<V>, "locr",  0xB9F2, GR32, GR32>;
+    def LOCGRAsm#V  : FixedCondBinaryRRF<CV<V>, "locgr", 0xB9E2, GR64, GR64>;
+    def LOCAsm#V    : FixedCondUnaryRSY<CV<V>, "loc",   0xEBF2, GR32, 4>;
+    def LOCGAsm#V   : FixedCondUnaryRSY<CV<V>, "locg",  0xEBE2, GR64, 8>;
+    def STOCAsm#V   : FixedCondStoreRSY<CV<V>, "stoc",  0xEBF3, GR32, 4>;
+    def STOCGAsm#V  : FixedCondStoreRSY<CV<V>, "stocg", 0xEBE3, GR64, 8>;
+  }
+}
+//===----------------------------------------------------------------------===//
 // Sign extensions
 //===----------------------------------------------------------------------===//
 //
@@ -581,18 +558,18 @@ let mayLoad = 1, mayStore = 1, Defs = [CC] in
 
 // 32-bit extensions from registers.
 let hasSideEffects = 0 in {
-  def LBR : UnaryRRE<"lb", 0xB926, sext8,  GR32, GR32>;
-  def LHR : UnaryRRE<"lh", 0xB927, sext16, GR32, GR32>;
+  def LBR : UnaryRRE<"lbr", 0xB926, sext8,  GR32, GR32>;
+  def LHR : UnaryRRE<"lhr", 0xB927, sext16, GR32, GR32>;
 }
 
 // 64-bit extensions from registers.
 let hasSideEffects = 0 in {
-  def LGBR : UnaryRRE<"lgb", 0xB906, sext8,  GR64, GR64>;
-  def LGHR : UnaryRRE<"lgh", 0xB907, sext16, GR64, GR64>;
-  def LGFR : UnaryRRE<"lgf", 0xB914, sext32, GR64, GR32>;
+  def LGBR : UnaryRRE<"lgbr", 0xB906, sext8,  GR64, GR64>;
+  def LGHR : UnaryRRE<"lghr", 0xB907, sext16, GR64, GR64>;
+  def LGFR : UnaryRRE<"lgfr", 0xB914, sext32, GR64, GR32>;
 }
 let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
-  def LTGFR : UnaryRRE<"ltgf", 0xB912, null_frag, GR64, GR32>;
+  def LTGFR : UnaryRRE<"ltgfr", 0xB912, null_frag, GR64, GR32>;
 
 // Match 32-to-64-bit sign extensions in which the source is already
 // in a 64-bit register.
@@ -632,20 +609,20 @@ let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in
 // 32-bit extensions from registers.
 let hasSideEffects = 0 in {
   // Expands to LLCR or RISB[LH]G, depending on the choice of registers.
-  def LLCRMux : UnaryRRPseudo<"llc", zext8, GRX32, GRX32>,
+  def LLCRMux : UnaryRRPseudo<"llcr", zext8, GRX32, GRX32>,
                 Requires<[FeatureHighWord]>;
-  def LLCR    : UnaryRRE<"llc", 0xB994, zext8,  GR32, GR32>;
+  def LLCR    : UnaryRRE<"llcr", 0xB994, zext8,  GR32, GR32>;
   // Expands to LLHR or RISB[LH]G, depending on the choice of registers.
-  def LLHRMux : UnaryRRPseudo<"llh", zext16, GRX32, GRX32>,
+  def LLHRMux : UnaryRRPseudo<"llhr", zext16, GRX32, GRX32>,
                 Requires<[FeatureHighWord]>;
-  def LLHR    : UnaryRRE<"llh", 0xB995, zext16, GR32, GR32>;
+  def LLHR    : UnaryRRE<"llhr", 0xB995, zext16, GR32, GR32>;
 }
 
 // 64-bit extensions from registers.
 let hasSideEffects = 0 in {
-  def LLGCR : UnaryRRE<"llgc", 0xB984, zext8,  GR64, GR64>;
-  def LLGHR : UnaryRRE<"llgh", 0xB985, zext16, GR64, GR64>;
-  def LLGFR : UnaryRRE<"llgf", 0xB916, zext32, GR64, GR32>;
+  def LLGCR : UnaryRRE<"llgcr", 0xB984, zext8,  GR64, GR64>;
+  def LLGHR : UnaryRRE<"llghr", 0xB985, zext16, GR64, GR64>;
+  def LLGFR : UnaryRRE<"llgfr", 0xB916, zext32, GR64, GR32>;
 }
 
 // Match 32-to-64-bit zero extensions in which the source is already
@@ -677,6 +654,27 @@ def LLGF   : UnaryRXY<"llgf", 0xE316, azextloadi32, GR64, 4>;
 def LLGHRL : UnaryRILPC<"llghrl", 0xC46, aligned_azextloadi16, GR64>;
 def LLGFRL : UnaryRILPC<"llgfrl", 0xC4E, aligned_azextloadi32, GR64>;
 
+// 31-to-64-bit zero extensions.
+def LLGTR : UnaryRRE<"llgtr", 0xB917, null_frag, GR64, GR64>;
+def LLGT  : UnaryRXY<"llgt",  0xE317, null_frag, GR64, 4>;
+def : Pat<(and GR64:$src, 0x7fffffff),
+          (LLGTR GR64:$src)>;
+def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0x7fffffff),
+          (LLGT bdxaddr20only:$src)>;
+
+// Load and zero rightmost byte.
+let Predicates = [FeatureLoadAndZeroRightmostByte] in {
+  def LLZRGF : UnaryRXY<"llzrgf", 0xE33A, null_frag, GR64, 4>;
+  def : Pat<(and (i64 (azextloadi32 bdxaddr20only:$src)), 0xffffff00),
+            (LLZRGF bdxaddr20only:$src)>;
+}
+
+// Load and trap.
+let Predicates = [FeatureLoadAndTrap] in {
+  def LLGFAT : UnaryRXY<"llgfat", 0xE39D, null_frag, GR64, 4>;
+  def LLGTAT : UnaryRXY<"llgtat", 0xE39C, null_frag, GR64, 4>;
+}
+
 //===----------------------------------------------------------------------===//
 // Truncations
 //===----------------------------------------------------------------------===//
@@ -729,8 +727,8 @@ def STMH : StoreMultipleRSY<"stmh", 0xEB26, GRH32>;
 
 // Byte-swapping register moves.
 let hasSideEffects = 0 in {
-  def LRVR  : UnaryRRE<"lrv",  0xB91F, bswap, GR32, GR32>;
-  def LRVGR : UnaryRRE<"lrvg", 0xB90F, bswap, GR64, GR64>;
+  def LRVR  : UnaryRRE<"lrvr",  0xB91F, bswap, GR32, GR32>;
+  def LRVGR : UnaryRRE<"lrvgr", 0xB90F, bswap, GR64, GR64>;
 }
 
 // Byte-swapping loads.  Unlike normal loads, these instructions are
@@ -749,26 +747,14 @@ def STRVG : StoreRXY<"strvg", 0xE32F, z_strvg, GR64, 8>;
 //===----------------------------------------------------------------------===//
 
 // Load BDX-style addresses.
-let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1,
-    DispKey = "la" in {
-  let DispSize = "12" in
-    def LA : InstRX<0x41, (outs GR64:$R1), (ins laaddr12pair:$XBD2),
-                    "la\t$R1, $XBD2",
-                    [(set GR64:$R1, laaddr12pair:$XBD2)]>;
-  let DispSize = "20" in
-    def LAY : InstRXY<0xE371, (outs GR64:$R1), (ins laaddr20pair:$XBD2),
-                      "lay\t$R1, $XBD2",
-                      [(set GR64:$R1, laaddr20pair:$XBD2)]>;
-}
+let hasSideEffects = 0, isAsCheapAsAMove = 1, isReMaterializable = 1 in
+  defm LA : LoadAddressRXPair<"la", 0x41, 0xE371, bitconvert>;
 
 // Load a PC-relative address.  There's no version of this instruction
 // with a 16-bit offset, so there's no relaxation.
 let hasSideEffects = 0, isAsCheapAsAMove = 1, isMoveImm = 1,
-    isReMaterializable = 1 in {
-  def LARL : InstRIL<0xC00, (outs GR64:$R1), (ins pcrel32:$I2),
-                     "larl\t$R1, $I2",
-                     [(set GR64:$R1, pcrel32:$I2)]>;
-}
+    isReMaterializable = 1 in
+  def LARL : LoadAddressRIL<"larl", 0xC00, bitconvert>;
 
 // Load the Global Offset Table address.  This will be lowered into a
 //     larl $R1, _GLOBAL_OFFSET_TABLE_
@@ -782,11 +768,11 @@ def GOT : Alias<6, (outs GR64:$R1), (ins),
 
 let Defs = [CC] in {
   let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
-    def LPR  : UnaryRR <"lp",  0x10,   z_iabs, GR32, GR32>;
-    def LPGR : UnaryRRE<"lpg", 0xB900, z_iabs, GR64, GR64>;
+    def LPR  : UnaryRR <"lpr",  0x10,   z_iabs, GR32, GR32>;
+    def LPGR : UnaryRRE<"lpgr", 0xB900, z_iabs, GR64, GR64>;
   }
   let CCValues = 0xE, CompareZeroCCMask = 0xE in
-    def LPGFR : UnaryRRE<"lpgf", 0xB910, null_frag, GR64, GR32>;
+    def LPGFR : UnaryRRE<"lpgfr", 0xB910, null_frag, GR64, GR32>;
 }
 def : Pat<(z_iabs32 GR32:$src), (LPR  GR32:$src)>;
 def : Pat<(z_iabs64 GR64:$src), (LPGR GR64:$src)>;
@@ -795,11 +781,11 @@ defm : SXU<z_iabs64, LPGFR>;
 
 let Defs = [CC] in {
   let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
-    def LNR  : UnaryRR <"ln",  0x11,   z_inegabs, GR32, GR32>;
-    def LNGR : UnaryRRE<"lng", 0xB901, z_inegabs, GR64, GR64>;
+    def LNR  : UnaryRR <"lnr",  0x11,   z_inegabs, GR32, GR32>;
+    def LNGR : UnaryRRE<"lngr", 0xB901, z_inegabs, GR64, GR64>;
   }
   let CCValues = 0xE, CompareZeroCCMask = 0xE in
-    def LNGFR : UnaryRRE<"lngf", 0xB911, null_frag, GR64, GR32>;
+    def LNGFR : UnaryRRE<"lngfr", 0xB911, null_frag, GR64, GR32>;
 }
 def : Pat<(z_inegabs32 GR32:$src), (LNR  GR32:$src)>;
 def : Pat<(z_inegabs64 GR64:$src), (LNGR GR64:$src)>;
@@ -808,11 +794,11 @@ defm : SXU<z_inegabs64, LNGFR>;
 
 let Defs = [CC] in {
   let CCValues = 0xF, CompareZeroCCMask = 0x8 in {
-    def LCR  : UnaryRR <"lc",  0x13,   ineg, GR32, GR32>;
-    def LCGR : UnaryRRE<"lcg", 0xB903, ineg, GR64, GR64>;
+    def LCR  : UnaryRR <"lcr",  0x13,   ineg, GR32, GR32>;
+    def LCGR : UnaryRRE<"lcgr", 0xB903, ineg, GR64, GR64>;
   }
   let CCValues = 0xE, CompareZeroCCMask = 0xE in
-    def LCGFR : UnaryRRE<"lcgf", 0xB913, null_frag, GR64, GR32>;
+    def LCGFR : UnaryRRE<"lcgfr", 0xB913, null_frag, GR64, GR32>;
 }
 defm : SXU<ineg, LCGFR>;
 
@@ -880,10 +866,10 @@ def : Pat<(or (zext32 GR32:$src), imm64hf32:$imm),
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
   // Addition of a register.
   let isCommutable = 1 in {
-    defm AR : BinaryRRAndK<"a", 0x1A, 0xB9F8, add, GR32, GR32>;
-    defm AGR : BinaryRREAndK<"ag", 0xB908, 0xB9E8, add, GR64, GR64>;
+    defm AR : BinaryRRAndK<"ar", 0x1A, 0xB9F8, add, GR32, GR32>;
+    defm AGR : BinaryRREAndK<"agr", 0xB908, 0xB9E8, add, GR64, GR64>;
   }
-  def AGFR : BinaryRRE<"agf", 0xB918, null_frag, GR64, GR32>;
+  def AGFR : BinaryRRE<"agfr", 0xB918, null_frag, GR64, GR32>;
 
   // Addition of signed 16-bit immediates.
   defm AHIMux : BinaryRIAndKPseudo<"ahimux", add, GRX32, imm32sx16>;
@@ -914,10 +900,10 @@ defm : SXB<add, GR64, AGFR>;
 let Defs = [CC] in {
   // Addition of a register.
   let isCommutable = 1 in {
-    defm ALR : BinaryRRAndK<"al", 0x1E, 0xB9FA, addc, GR32, GR32>;
-    defm ALGR : BinaryRREAndK<"alg", 0xB90A, 0xB9EA, addc, GR64, GR64>;
+    defm ALR : BinaryRRAndK<"alr", 0x1E, 0xB9FA, addc, GR32, GR32>;
+    defm ALGR : BinaryRREAndK<"algr", 0xB90A, 0xB9EA, addc, GR64, GR64>;
   }
-  def ALGFR : BinaryRRE<"algf", 0xB91A, null_frag, GR64, GR32>;
+  def ALGFR : BinaryRRE<"algfr", 0xB91A, null_frag, GR64, GR32>;
 
   // Addition of signed 16-bit immediates.
   def ALHSIK  : BinaryRIE<"alhsik",  0xECDA, addc, GR32, imm32sx16>,
@@ -939,8 +925,8 @@ defm : ZXB<addc, GR64, ALGFR>;
 // Addition producing and using a carry.
 let Defs = [CC], Uses = [CC] in {
   // Addition of a register.
-  def ALCR  : BinaryRRE<"alc",  0xB998, adde, GR32, GR32>;
-  def ALCGR : BinaryRRE<"alcg", 0xB988, adde, GR64, GR64>;
+  def ALCR  : BinaryRRE<"alcr",  0xB998, adde, GR32, GR32>;
+  def ALCGR : BinaryRRE<"alcgr", 0xB988, adde, GR64, GR64>;
 
   // Addition of memory.
   def ALC  : BinaryRXY<"alc",  0xE398, adde, GR32, load, 4>;
@@ -955,9 +941,9 @@ let Defs = [CC], Uses = [CC] in {
 // add-immediate instruction instead.
 let Defs = [CC], CCValues = 0xF, CompareZeroCCMask = 0x8 in {
   // Subtraction of a register.
-  defm SR : BinaryRRAndK<"s", 0x1B, 0xB9F9, sub, GR32, GR32>;
-  def SGFR : BinaryRRE<"sgf", 0xB919, null_frag, GR64, GR32>;
-  defm SGR : BinaryRREAndK<"sg", 0xB909, 0xB9E9, sub, GR64, GR64>;
+  defm SR : BinaryRRAndK<"sr", 0x1B, 0xB9F9, sub, GR32, GR32>;
+  def SGFR : BinaryRRE<"sgfr", 0xB919, null_frag, GR64, GR32>;
+  defm SGR : BinaryRREAndK<"sgr", 0xB909, 0xB9E9, sub, GR64, GR64>;
 
   // Subtraction of memory.
   defm SH  : BinaryRXPair<"sh", 0x4B, 0xE37B, sub, GR32, asextloadi16, 2>;
@@ -970,9 +956,9 @@ defm : SXB<sub, GR64, SGFR>;
 // Subtraction producing a carry.
 let Defs = [CC] in {
   // Subtraction of a register.
-  defm SLR : BinaryRRAndK<"sl", 0x1F, 0xB9FB, subc, GR32, GR32>;
-  def SLGFR : BinaryRRE<"slgf", 0xB91B, null_frag, GR64, GR32>;
-  defm SLGR : BinaryRREAndK<"slg", 0xB90B, 0xB9EB, subc, GR64, GR64>;
+  defm SLR : BinaryRRAndK<"slr", 0x1F, 0xB9FB, subc, GR32, GR32>;
+  def SLGFR : BinaryRRE<"slgfr", 0xB91B, null_frag, GR64, GR32>;
+  defm SLGR : BinaryRREAndK<"slgr", 0xB90B, 0xB9EB, subc, GR64, GR64>;
 
   // Subtraction of unsigned 32-bit immediates.  These don't match
   // subc because we prefer addc for constants.
@@ -989,8 +975,8 @@ defm : ZXB<subc, GR64, SLGFR>;
 // Subtraction producing and using a carry.
 let Defs = [CC], Uses = [CC] in {
   // Subtraction of a register.
-  def SLBR  : BinaryRRE<"slb",  0xB999, sube, GR32, GR32>;
-  def SLBGR : BinaryRRE<"slbg", 0xB989, sube, GR64, GR64>;
+  def SLBR  : BinaryRRE<"slbr",  0xB999, sube, GR32, GR32>;
+  def SLBGR : BinaryRRE<"slbgr", 0xB989, sube, GR64, GR64>;
 
   // Subtraction of memory.
   def SLB  : BinaryRXY<"slb",  0xE399, sube, GR32, load, 4>;
@@ -1004,8 +990,8 @@ let Defs = [CC], Uses = [CC] in {
 let Defs = [CC] in {
   // ANDs of a register.
   let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
-    defm NR : BinaryRRAndK<"n", 0x14, 0xB9F4, and, GR32, GR32>;
-    defm NGR : BinaryRREAndK<"ng", 0xB980, 0xB9E4, and, GR64, GR64>;
+    defm NR : BinaryRRAndK<"nr", 0x14, 0xB9F4, and, GR32, GR32>;
+    defm NGR : BinaryRREAndK<"ngr", 0xB980, 0xB9E4, and, GR64, GR64>;
   }
 
   let isConvertibleToThreeAddress = 1 in {
@@ -1063,8 +1049,8 @@ defm : RMWIByte<and, bdaddr20pair, NIY>;
 let Defs = [CC] in {
   // ORs of a register.
   let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
-    defm OR : BinaryRRAndK<"o", 0x16, 0xB9F6, or, GR32, GR32>;
-    defm OGR : BinaryRREAndK<"og", 0xB981, 0xB9E6, or, GR64, GR64>;
+    defm OR : BinaryRRAndK<"or", 0x16, 0xB9F6, or, GR32, GR32>;
+    defm OGR : BinaryRREAndK<"ogr", 0xB981, 0xB9E6, or, GR64, GR64>;
   }
 
   // ORs of a 16-bit immediate, leaving other bits unaffected.
@@ -1120,8 +1106,8 @@ defm : RMWIByte<or, bdaddr20pair, OIY>;
 let Defs = [CC] in {
   // XORs of a register.
   let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
-    defm XR : BinaryRRAndK<"x", 0x17, 0xB9F7, xor, GR32, GR32>;
-    defm XGR : BinaryRREAndK<"xg", 0xB982, 0xB9E7, xor, GR64, GR64>;
+    defm XR : BinaryRRAndK<"xr", 0x17, 0xB9F7, xor, GR32, GR32>;
+    defm XGR : BinaryRREAndK<"xgr", 0xB982, 0xB9E7, xor, GR64, GR64>;
   }
 
   // XORs of a 32-bit immediate, leaving other bits unaffected.
@@ -1159,10 +1145,10 @@ defm : RMWIByte<xor, bdaddr20pair, XIY>;
 
 // Multiplication of a register.
 let isCommutable = 1 in {
-  def MSR  : BinaryRRE<"ms",  0xB252, mul, GR32, GR32>;
-  def MSGR : BinaryRRE<"msg", 0xB90C, mul, GR64, GR64>;
+  def MSR  : BinaryRRE<"msr",  0xB252, mul, GR32, GR32>;
+  def MSGR : BinaryRRE<"msgr", 0xB90C, mul, GR64, GR64>;
 }
-def MSGFR : BinaryRRE<"msgf", 0xB91C, null_frag, GR64, GR32>;
+def MSGFR : BinaryRRE<"msgfr", 0xB91C, null_frag, GR64, GR32>;
 defm : SXB<mul, GR64, MSGFR>;
 
 // Multiplication of a signed 16-bit immediate.
@@ -1180,7 +1166,7 @@ def  MSGF : BinaryRXY<"msgf", 0xE31C, mul, GR64, asextloadi32, 4>;
 def  MSG  : BinaryRXY<"msg",  0xE30C, mul, GR64, load, 8>;
 
 // Multiplication of a register, producing two results.
-def MLGR : BinaryRRE<"mlg", 0xB986, z_umul_lohi64, GR128, GR64>;
+def MLGR : BinaryRRE<"mlgr", 0xB986, z_umul_lohi64, GR128, GR64>;
 
 // Multiplication of memory, producing two results.
 def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
@@ -1189,17 +1175,19 @@ def MLG : BinaryRXY<"mlg", 0xE386, z_umul_lohi64, GR128, load, 8>;
 // Division and remainder
 //===----------------------------------------------------------------------===//
 
-// Division and remainder, from registers.
-def DSGFR : BinaryRRE<"dsgf", 0xB91D, z_sdivrem32, GR128, GR32>;
-def DSGR  : BinaryRRE<"dsg",  0xB90D, z_sdivrem64, GR128, GR64>;
-def DLR   : BinaryRRE<"dl",   0xB997, z_udivrem32, GR128, GR32>;
-def DLGR  : BinaryRRE<"dlg",  0xB987, z_udivrem64, GR128, GR64>;
+let hasSideEffects = 1 in {  // Do not speculatively execute.
+  // Division and remainder, from registers.
+  def DSGFR : BinaryRRE<"dsgfr", 0xB91D, z_sdivrem32, GR128, GR32>;
+  def DSGR  : BinaryRRE<"dsgr",  0xB90D, z_sdivrem64, GR128, GR64>;
+  def DLR   : BinaryRRE<"dlr",   0xB997, z_udivrem32, GR128, GR32>;
+  def DLGR  : BinaryRRE<"dlgr",  0xB987, z_udivrem64, GR128, GR64>;
 
-// Division and remainder, from memory.
-def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
-def DSG  : BinaryRXY<"dsg",  0xE30D, z_sdivrem64, GR128, load, 8>;
-def DL   : BinaryRXY<"dl",   0xE397, z_udivrem32, GR128, load, 4>;
-def DLG  : BinaryRXY<"dlg",  0xE387, z_udivrem64, GR128, load, 8>;
+  // Division and remainder, from memory.
+  def DSGF : BinaryRXY<"dsgf", 0xE31D, z_sdivrem32, GR128, load, 4>;
+  def DSG  : BinaryRXY<"dsg",  0xE30D, z_sdivrem64, GR128, load, 8>;
+  def DL   : BinaryRXY<"dl",   0xE397, z_udivrem32, GR128, load, 4>;
+  def DLG  : BinaryRXY<"dlg",  0xE387, z_udivrem64, GR128, load, 8>;
+}
 
 //===----------------------------------------------------------------------===//
 // Shifts
@@ -1274,11 +1262,14 @@ let Defs = [CC] in {
 // of the unsigned forms do.
 let Defs = [CC], CCValues = 0xE in {
   // Comparison with a register.
-  def CR   : CompareRR <"c",   0x19,   z_scmp,    GR32, GR32>;
-  def CGFR : CompareRRE<"cgf", 0xB930, null_frag, GR64, GR32>;
-  def CGR  : CompareRRE<"cg",  0xB920, z_scmp,    GR64, GR64>;
+  def CR   : CompareRR <"cr",   0x19,   z_scmp,    GR32, GR32>;
+  def CGFR : CompareRRE<"cgfr", 0xB930, null_frag, GR64, GR32>;
+  def CGR  : CompareRRE<"cgr",  0xB920, z_scmp,    GR64, GR64>;
 
-  // Comparison with a signed 16-bit immediate.
+  // Comparison with a signed 16-bit immediate.  CHIMux expands to CHI or CIH,
+  // depending on the choice of register.
+  def CHIMux : CompareRIPseudo<z_scmp, GRX32, imm32sx16>,
+               Requires<[FeatureHighWord]>;
   def CHI  : CompareRI<"chi",  0xA7E, z_scmp, GR32, imm32sx16>;
   def CGHI : CompareRI<"cghi", 0xA7F, z_scmp, GR64, imm64sx16>;
 
@@ -1317,9 +1308,9 @@ defm : SXB<z_scmp, GR64, CGFR>;
 // Unsigned comparisons.
 let Defs = [CC], CCValues = 0xE, IsLogical = 1 in {
   // Comparison with a register.
-  def CLR   : CompareRR <"cl",   0x15,   z_ucmp,    GR32, GR32>;
-  def CLGFR : CompareRRE<"clgf", 0xB931, null_frag, GR64, GR32>;
-  def CLGR  : CompareRRE<"clg",  0xB921, z_ucmp,    GR64, GR64>;
+  def CLR   : CompareRR <"clr",   0x15,   z_ucmp,    GR32, GR32>;
+  def CLGFR : CompareRRE<"clgfr", 0xB931, null_frag, GR64, GR32>;
+  def CLGR  : CompareRRE<"clgr",  0xB921, z_ucmp,    GR64, GR64>;
 
   // Comparison with an unsigned 32-bit immediate.  CLFIMux expands to CLFI
   // or CLIH, depending on the choice of register.
@@ -1391,12 +1382,21 @@ def TML : InstAlias<"tml\t$R, $I", (TMLL GR32:$R, imm32ll16:$I), 0>;
 def TMH : InstAlias<"tmh\t$R, $I", (TMLH GR32:$R, imm32lh16:$I), 0>;
 
 //===----------------------------------------------------------------------===//
-// Prefetch
+// Prefetch and execution hint
 //===----------------------------------------------------------------------===//
 
 def PFD : PrefetchRXY<"pfd", 0xE336, z_prefetch>;
 def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
 
+let Predicates = [FeatureExecutionHint] in {
+  // Branch Prediction Preload
+  def BPP : BranchPreloadSMI<"bpp", 0xC7>;
+  def BPRP : BranchPreloadMII<"bprp", 0xC5>;
+
+  // Next Instruction Access Intent
+  def NIAI : SideEffectBinaryIE<"niai", 0xB2FA, imm32zx4, imm32zx4>;
+}
+
 //===----------------------------------------------------------------------===//
 // Atomic operations
 //===----------------------------------------------------------------------===//
@@ -1407,7 +1407,7 @@ let hasSideEffects = 1 in
 def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
 
 // A pseudo instruction that serves as a compiler barrier.
-let hasSideEffects = 1 in
+let hasSideEffects = 1, hasNoSchedulingInfo = 1 in
 def MemBarrier : Pseudo<(outs), (ins), [(z_membarrier)]>;
 
 let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
@@ -1543,52 +1543,131 @@ def ATOMIC_CMP_SWAPW
   let mayLoad = 1;
   let mayStore = 1;
   let usesCustomInserter = 1;
+  let hasNoSchedulingInfo = 1;
 }
 
+// Test and set.
+let mayLoad = 1, Defs = [CC] in
+  def TS : StoreInherentS<"ts", 0x9300, null_frag, 1>;
+
+// Compare and swap.
 let Defs = [CC] in {
   defm CS  : CmpSwapRSPair<"cs", 0xBA, 0xEB14, atomic_cmp_swap_32, GR32>;
   def  CSG : CmpSwapRSY<"csg", 0xEB30, atomic_cmp_swap_64, GR64>;
 }
 
+// Compare double and swap.
+let Defs = [CC] in {
+  defm CDS  : CmpSwapRSPair<"cds", 0xBB, 0xEB31, null_frag, GR128>;
+  def  CDSG : CmpSwapRSY<"cdsg", 0xEB3E, null_frag, GR128>;
+}
+
+// Compare and swap and store.
+let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad = 1 in
+  def CSST : SideEffectTernarySSF<"csst", 0xC82, GR64>;
+
+// Perform locked operation.
+let Uses = [R0L, R1D], Defs = [CC], mayStore = 1, mayLoad =1 in
+  def PLO : SideEffectQuaternarySSe<"plo", 0xEE, GR64>;
+
+// Load/store pair from/to quadword.
+def LPQ  : UnaryRXY<"lpq", 0xE38F, null_frag, GR128, 16>;
+def STPQ : StoreRXY<"stpq", 0xE38E, null_frag, GR128, 16>;
+
+// Load pair disjoint.
+let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
+  def LPD  : BinarySSF<"lpd", 0xC84, GR128>;
+  def LPDG : BinarySSF<"lpdg", 0xC85, GR128>;
+}
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Read a 32-bit access register into a GR32.  As with all GR32 operations,
+// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
+// when a 64-bit address is stored in a pair of access registers.
+def EAR : UnaryRRE<"ear", 0xB24F, null_frag, GR32, AR32>;
+
+// Set access register.
+def SAR : UnaryRRE<"sar", 0xB24E, null_frag, AR32, GR32>;
+
+// Copy access register.
+def CPYA : UnaryRRE<"cpya", 0xB24D, null_frag, AR32, AR32>;
+
+// Load address extended.
+defm LAE : LoadAddressRXPair<"lae", 0x51, 0xE375, null_frag>;
+
+// Load access multiple.
+defm LAM : LoadMultipleRSPair<"lam", 0x9A, 0xEB9A, AR32>;
+
+// Load access multiple.
+defm STAM : StoreMultipleRSPair<"stam", 0x9B, 0xEB9B, AR32>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Extract CC and program mask into a register.  CC ends up in bits 29 and 28.
+let Uses = [CC] in
+  def IPM : InherentRRE<"ipm", 0xB222, GR32, z_ipm>;
+
+// Set CC and program mask from a register.
+let hasSideEffects = 1, Defs = [CC] in
+  def SPM : SideEffectUnaryRR<"spm", 0x04, GR32>;
+
+// Branch and link - like BAS, but also extracts CC and program mask.
+let isCall = 1, Uses = [CC], Defs = [CC] in {
+  def BAL  : CallRX<"bal", 0x45>;
+  def BALR : CallRR<"balr", 0x05>;
+}
+
+// Test addressing mode.
+let Defs = [CC] in
+  def TAM : SideEffectInherentE<"tam", 0x010B>;
+
+// Set addressing mode.
+let hasSideEffects = 1 in {
+  def SAM24 : SideEffectInherentE<"sam24", 0x010C>;
+  def SAM31 : SideEffectInherentE<"sam31", 0x010D>;
+  def SAM64 : SideEffectInherentE<"sam64", 0x010E>;
+}
+
+// Branch and set mode.  Not really a call, but also sets an output register.
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in
+  def BSM : CallRR<"bsm", 0x0B>;
+
+// Branch and save and set mode.
+let isCall = 1, Defs = [CC] in
+  def BASSM : CallRR<"bassm", 0x0C>;
+
 //===----------------------------------------------------------------------===//
 // Transactional execution
 //===----------------------------------------------------------------------===//
 
-let Predicates = [FeatureTransactionalExecution] in {
+let hasSideEffects = 1, Predicates = [FeatureTransactionalExecution] in {
   // Transaction Begin
-  let hasSideEffects = 1, mayStore = 1,
-      usesCustomInserter = 1, Defs = [CC] in {
-    def TBEGIN : InstSIL<0xE560,
-                         (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
-                         "tbegin\t$BD1, $I2",
-                         [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
-    def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
-                                [(z_tbegin_nofloat bdaddr12only:$BD1,
-                                                   imm32zx16:$I2)]>;
-    def TBEGINC : InstSIL<0xE561,
-                          (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
-                          "tbeginc\t$BD1, $I2",
-                          [(int_s390_tbeginc bdaddr12only:$BD1,
-                                             imm32zx16:$I2)]>;
+  let mayStore = 1, usesCustomInserter = 1, Defs = [CC] in {
+    def TBEGIN : SideEffectBinarySIL<"tbegin", 0xE560, z_tbegin, imm32zx16>;
+    def TBEGIN_nofloat : SideEffectBinarySILPseudo<z_tbegin_nofloat, imm32zx16>;
+
+    def TBEGINC : SideEffectBinarySIL<"tbeginc", 0xE561,
+                                      int_s390_tbeginc, imm32zx16>;
   }
 
   // Transaction End
-  let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
-    def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
+  let Defs = [CC] in
+    def TEND : SideEffectInherentS<"tend", 0xB2F8, z_tend>;
 
   // Transaction Abort
-  let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
-    def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
-                       "tabort\t$BD2",
-                       [(int_s390_tabort bdaddr12only:$BD2)]>;
+  let isTerminator = 1, isBarrier = 1 in
+    def TABORT : SideEffectAddressS<"tabort", 0xB2FC, int_s390_tabort>;
 
   // Nontransactional Store
-  let hasSideEffects = 1 in
-    def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
+  def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
 
   // Extract Transaction Nesting Depth
-  let hasSideEffects = 1 in
-    def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
+  def ETND : InherentRRE<"etnd", 0xB2EC, GR32, int_s390_etnd>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -1596,9 +1675,8 @@ let Predicates = [FeatureTransactionalExecution] in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [FeatureProcessorAssist] in {
-  let hasSideEffects = 1, R4 = 0 in
-    def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
-                      "ppa\t$R1, $R2, $R3", []>;
+  let hasSideEffects = 1 in
+    def PPA : SideEffectTernaryRRFc<"ppa", 0xB2E8, GR64, GR64, imm32zx4>;
   def : Pat<(int_s390_ppa_txassist GR32:$src),
             (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
                  0, 1)>;
@@ -1608,33 +1686,18 @@ let Predicates = [FeatureProcessorAssist] in {
 // Miscellaneous Instructions.
 //===----------------------------------------------------------------------===//
 
-// Extract CC into bits 29 and 28 of a register.
-let Uses = [CC] in
-  def IPM : InherentRRE<"ipm", 0xB222, GR32, (z_ipm)>;
-
-// Read a 32-bit access register into a GR32.  As with all GR32 operations,
-// the upper 32 bits of the enclosing GR64 remain unchanged, which is useful
-// when a 64-bit address is stored in a pair of access registers.
-def EAR : InstRRE<0xB24F, (outs GR32:$R1), (ins access_reg:$R2),
-                  "ear\t$R1, $R2",
-                  [(set GR32:$R1, (z_extract_access access_reg:$R2))]>;
-
 // Find leftmost one, AKA count leading zeros.  The instruction actually
 // returns a pair of GR64s, the first giving the number of leading zeros
 // and the second giving a copy of the source with the leftmost one bit
 // cleared.  We only use the first result here.
-let Defs = [CC] in {
-  def FLOGR : UnaryRRE<"flog", 0xB983, null_frag, GR128, GR64>;
-}
+let Defs = [CC] in
+  def FLOGR : UnaryRRE<"flogr", 0xB983, null_frag, GR128, GR64>;
 def : Pat<(ctlz GR64:$src),
           (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
 
 // Population count.  Counts bits set per byte.
-let Predicates = [FeaturePopulationCount], Defs = [CC] in {
-  def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
-                       "popcnt\t$R1, $R2",
-                       [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
-}
+let Predicates = [FeaturePopulationCount], Defs = [CC] in
+  def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
 
 // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
 def : Pat<(i64 (anyext GR32:$src)),
@@ -1651,35 +1714,137 @@ let usesCustomInserter = 1 in {
 let mayLoad = 1, Defs = [CC] in
   defm SRST : StringRRE<"srst", 0xb25e, z_search_string>;
 
-// Other instructions for inline assembly
-let hasSideEffects = 1, Defs = [CC], isCall = 1 in
-  def SVC : InstI<0x0A, (outs), (ins imm32zx8:$I1),
-                  "svc\t$I1",
-                  []>;
-let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
-  def STCK : InstS<0xB205, (outs), (ins bdaddr12only:$BD2),
-                       "stck\t$BD2",
-                       []>;
-let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
-  def STCKF : InstS<0xB27C, (outs), (ins bdaddr12only:$BD2),
-                       "stckf\t$BD2",
-                       []>;
-let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
-  def STCKE : InstS<0xB278, (outs), (ins bdaddr12only:$BD2),
-                       "stcke\t$BD2",
-                       []>;
-let hasSideEffects = 1, Defs = [CC], mayStore = 1 in
-  def STFLE : InstS<0xB2B0, (outs), (ins bdaddr12only:$BD2),
-                       "stfle\t$BD2",
-                       []>;
+// Supervisor call.
+let hasSideEffects = 1, isCall = 1, Defs = [CC] in
+  def SVC : SideEffectUnaryI<"svc", 0x0A, imm32zx8>;
+
+// Store clock.
+let hasSideEffects = 1, Defs = [CC] in {
+  def STCK  : StoreInherentS<"stck",  0xB205, null_frag, 8>;
+  def STCKF : StoreInherentS<"stckf", 0xB27C, null_frag, 8>;
+  def STCKE : StoreInherentS<"stcke", 0xB278, null_frag, 16>;
+}
+
+// Store facility list.
+let hasSideEffects = 1, Uses = [R0D], Defs = [R0D, CC] in
+  def STFLE : StoreInherentS<"stfle", 0xB2B0, null_frag, 0>;
+
+// Extract CPU time.
+let Defs = [R0D, R1D], hasSideEffects = 1, mayLoad = 1 in
+  def ECTG : SideEffectTernarySSF<"ectg", 0xC81, GR64>;
 
+// Execute.
 let hasSideEffects = 1 in {
-  def EX   : InstRX<0x44, (outs), (ins GR64:$R1, bdxaddr12only:$XBD2),
-                  "ex\t$R1, $XBD2", []>;
-  def EXRL : InstRIL<0xC60, (outs), (ins GR64:$R1, pcrel32:$I2),
-                     "exrl\t$R1, $I2", []>;
+  def EX   : SideEffectBinaryRX<"ex", 0x44, GR64>;
+  def EXRL : SideEffectBinaryRILPC<"exrl", 0xC60, GR64>;
 }
 
+// Program return.
+let hasSideEffects = 1, Defs = [CC] in
+  def PR : SideEffectInherentE<"pr", 0x0101>;
+
+// Move with key.
+let mayLoad = 1, mayStore = 1, Defs = [CC] in
+  def MVCK : MemoryBinarySSd<"mvck", 0xD9, GR64>;
+
+// Store real address.
+def STRAG : StoreSSE<"strag", 0xE502>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+  def InsnE   : DirectiveInsnE<(outs), (ins imm64zx16:$enc), ".insn e,$enc", []>;
+  def InsnRI  : DirectiveInsnRI<(outs), (ins imm64zx32:$enc, AnyReg:$R1,
+                                             imm32sx16:$I2),
+                                ".insn ri,$enc,$R1,$I2", []>;
+  def InsnRIE : DirectiveInsnRIE<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+                                              AnyReg:$R3, brtarget16:$I2),
+                                 ".insn rie,$enc,$R1,$R3,$I2", []>;
+  def InsnRIL : DirectiveInsnRIL<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+                                              brtarget32:$I2),
+                                 ".insn ril,$enc,$R1,$I2", []>;
+  def InsnRILU : DirectiveInsnRIL<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+                                               uimm32:$I2),
+                                  ".insn rilu,$enc,$R1,$I2", []>;
+  def InsnRIS : DirectiveInsnRIS<(outs),
+                                 (ins imm64zx48:$enc, AnyReg:$R1,
+                                      imm32sx8:$I2, imm32zx4:$M3,
+                                      bdaddr12only:$BD4),
+                                 ".insn ris,$enc,$R1,$I2,$M3,$BD4", []>;
+  def InsnRR : DirectiveInsnRR<(outs),
+                               (ins imm64zx16:$enc, AnyReg:$R1, AnyReg:$R2),
+                               ".insn rr,$enc,$R1,$R2", []>;
+  def InsnRRE : DirectiveInsnRRE<(outs), (ins imm64zx32:$enc,
+                                              AnyReg:$R1, AnyReg:$R2),
+                                 ".insn rre,$enc,$R1,$R2", []>;
+  def InsnRRF : DirectiveInsnRRF<(outs),
+                                 (ins imm64zx32:$enc, AnyReg:$R1, AnyReg:$R2,
+                                      AnyReg:$R3, imm32zx4:$M4),
+                                 ".insn rrf,$enc,$R1,$R2,$R3,$M4", []>;
+  def InsnRRS : DirectiveInsnRRS<(outs),
+                                 (ins imm64zx48:$enc, AnyReg:$R1,
+                                      AnyReg:$R2, imm32zx4:$M3,
+                                      bdaddr12only:$BD4),
+                                 ".insn rrs,$enc,$R1,$R2,$M3,$BD4", []>;
+  def InsnRS  : DirectiveInsnRS<(outs),
+                                (ins imm64zx32:$enc, AnyReg:$R1,
+                                     AnyReg:$R3, bdaddr12only:$BD2),
+                                ".insn rs,$enc,$R1,$R3,$BD2", []>;
+  def InsnRSE : DirectiveInsnRSE<(outs),
+                                 (ins imm64zx48:$enc, AnyReg:$R1,
+                                      AnyReg:$R3, bdaddr12only:$BD2),
+                                 ".insn rse,$enc,$R1,$R3,$BD2", []>;
+  def InsnRSI : DirectiveInsnRSI<(outs),
+                                 (ins imm64zx48:$enc, AnyReg:$R1,
+                                      AnyReg:$R3, brtarget16:$RI2),
+                                 ".insn rsi,$enc,$R1,$R3,$RI2", []>;
+  def InsnRSY : DirectiveInsnRSY<(outs),
+                                 (ins imm64zx48:$enc, AnyReg:$R1,
+                                      AnyReg:$R3, bdaddr20only:$BD2),
+                                 ".insn rsy,$enc,$R1,$R3,$BD2", []>;
+  def InsnRX  : DirectiveInsnRX<(outs), (ins imm64zx32:$enc, AnyReg:$R1,
+                                             bdxaddr12only:$XBD2),
+                                ".insn rx,$enc,$R1,$XBD2", []>;
+  def InsnRXE : DirectiveInsnRXE<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+                                              bdxaddr12only:$XBD2),
+                                 ".insn rxe,$enc,$R1,$XBD2", []>;
+  def InsnRXF : DirectiveInsnRXF<(outs),
+                                 (ins imm64zx48:$enc, AnyReg:$R1,
+                                      AnyReg:$R3, bdxaddr12only:$XBD2),
+                                 ".insn rxf,$enc,$R1,$R3,$XBD2", []>;
+  def InsnRXY : DirectiveInsnRXY<(outs), (ins imm64zx48:$enc, AnyReg:$R1,
+                                              bdxaddr20only:$XBD2),
+                                 ".insn rxy,$enc,$R1,$XBD2", []>;
+  def InsnS : DirectiveInsnS<(outs),
+                             (ins imm64zx32:$enc, bdaddr12only:$BD2),
+                             ".insn s,$enc,$BD2", []>;
+  def InsnSI : DirectiveInsnSI<(outs),
+                               (ins imm64zx32:$enc, bdaddr12only:$BD1,
+                                    imm32sx8:$I2),
+                               ".insn si,$enc,$BD1,$I2", []>;
+  def InsnSIY : DirectiveInsnSIY<(outs),
+                                 (ins imm64zx48:$enc,
+                                      bdaddr20only:$BD1, imm32zx8:$I2),
+                                 ".insn siy,$enc,$BD1,$I2", []>;
+  def InsnSIL : DirectiveInsnSIL<(outs),
+                                 (ins imm64zx48:$enc, bdaddr12only:$BD1,
+                                      imm32zx16:$I2),
+                                 ".insn sil,$enc,$BD1,$I2", []>;
+  def InsnSS : DirectiveInsnSS<(outs),
+                               (ins imm64zx48:$enc, bdraddr12only:$RBD1,
+                                    bdaddr12only:$BD2, AnyReg:$R3),
+                               ".insn ss,$enc,$RBD1,$BD2,$R3", []>;
+  def InsnSSE : DirectiveInsnSSE<(outs),
+                                 (ins imm64zx48:$enc,
+                                      bdaddr12only:$BD1,bdaddr12only:$BD2),
+                                 ".insn sse,$enc,$BD1,$BD2", []>;
+  def InsnSSF : DirectiveInsnSSF<(outs),
+                                 (ins imm64zx48:$enc, bdaddr12only:$BD1,
+                                      bdaddr12only:$BD2, AnyReg:$R3),
+                                 ".insn ssf,$enc,$BD1,$BD2,$R3", []>;
+}
 
 //===----------------------------------------------------------------------===//
 // Peepholes.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index c101e43ada3a..738ea7a33729 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -18,12 +18,14 @@ let Predicates = [FeatureVector] in {
   def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
 
   // Load GR from VR element.
+  def VLGV  : BinaryVRScGeneric<"vlgv", 0xE721>;
   def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
   def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>;
   def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>;
   def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>;
 
   // Load VR element from GR.
+  def VLVG  : TernaryVRSbGeneric<"vlvg", 0xE722>;
   def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert,
                           v128b, v128b, GR32, 0>;
   def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert,
@@ -60,6 +62,7 @@ let Predicates = [FeatureVector] in {
   def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
 
   // Generate mask.
+  def VGM  : BinaryVRIbGeneric<"vgm", 0xE746>;
   def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
   def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
   def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
@@ -85,6 +88,7 @@ let Predicates = [FeatureVector] in {
   }
 
   // Replicate immediate.
+  def VREPI  : UnaryVRIaGeneric<"vrepi", 0xE745, imm32sx16>;
   def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
   def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
   def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
@@ -119,6 +123,7 @@ let Predicates = [FeatureVector] in {
   def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
 
   // Load and replicate
+  def VLREP  : UnaryVRXGeneric<"vlrep", 0xE705>;
   def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8,  v128b, 1, 0>;
   def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
   def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
@@ -136,6 +141,7 @@ let Predicates = [FeatureVector] in {
   def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
 
   // Load logical element and zero.
+  def VLLEZ  : UnaryVRXGeneric<"vllez", 0xE704>;
   def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8,  v128b, 1, 0>;
   def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
   def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
@@ -223,6 +229,7 @@ let Predicates = [FeatureVector] in {
 
 let Predicates = [FeatureVector] in {
   // Merge high.
+  def VMRH:   BinaryVRRcGeneric<"vmrh", 0xE761>;
   def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>;
   def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
   def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
@@ -231,6 +238,7 @@ let Predicates = [FeatureVector] in {
   def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
 
   // Merge low.
+  def VMRL:   BinaryVRRcGeneric<"vmrl", 0xE760>;
   def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
   def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
   def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
@@ -245,6 +253,7 @@ let Predicates = [FeatureVector] in {
   def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
 
   // Replicate.
+  def VREP:   BinaryVRIcGeneric<"vrep", 0xE74D>;
   def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
   def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
   def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
@@ -264,11 +273,13 @@ let Predicates = [FeatureVector] in {
 
 let Predicates = [FeatureVector] in {
   // Pack
+  def VPK  : BinaryVRRcGeneric<"vpk", 0xE794>;
   def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>;
   def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>;
   def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
 
   // Pack saturate.
+  def  VPKS  : BinaryVRRbSPairGeneric<"vpks", 0xE797>;
   defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc,
                                v128b, v128h, 1>;
   defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc,
@@ -277,6 +288,7 @@ let Predicates = [FeatureVector] in {
                                v128f, v128g, 3>;
 
   // Pack saturate logical.
+  def  VPKLS  : BinaryVRRbSPairGeneric<"vpkls", 0xE795>;
   defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc,
                                 v128b, v128h, 1>;
   defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc,
@@ -285,6 +297,7 @@ let Predicates = [FeatureVector] in {
                                 v128f, v128g, 3>;
 
   // Sign-extend to doubleword.
+  def VSEG  : UnaryVRRaGeneric<"vseg", 0xE75F>;
   def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8,  v128g, v128g, 0>;
   def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>;
   def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>;
@@ -293,21 +306,25 @@ let Predicates = [FeatureVector] in {
   def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
 
   // Unpack high.
+  def VUPH  : UnaryVRRaGeneric<"vuph", 0xE7D7>;
   def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
   def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
   def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
 
   // Unpack logical high.
+  def VUPLH  : UnaryVRRaGeneric<"vuplh", 0xE7D5>;
   def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
   def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
   def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
 
   // Unpack low.
+  def VUPL   : UnaryVRRaGeneric<"vupl", 0xE7D6>;
   def VUPLB  : UnaryVRRa<"vuplb",  0xE7D6, z_unpack_low, v128h, v128b, 0>;
   def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
   def VUPLF  : UnaryVRRa<"vuplf",  0xE7D6, z_unpack_low, v128g, v128f, 2>;
 
   // Unpack logical low.
+  def VUPLL  : UnaryVRRaGeneric<"vupll", 0xE7D4>;
   def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
   def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
   def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
@@ -343,6 +360,7 @@ defm : GenericVectorOps<v2f64, v2i64>;
 
 let Predicates = [FeatureVector] in {
   // Add.
+  def VA  : BinaryVRRcGeneric<"va", 0xE7F3>;
   def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
   def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
   def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
@@ -350,6 +368,7 @@ let Predicates = [FeatureVector] in {
   def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
 
   // Add compute carry.
+  def VACC  : BinaryVRRcGeneric<"vacc", 0xE7F1>;
   def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
   def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
   def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
@@ -357,9 +376,11 @@ let Predicates = [FeatureVector] in {
   def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
 
   // Add with carry.
+  def VAC  : TernaryVRRdGeneric<"vac", 0xE7BB>;
   def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
 
   // Add with carry compute carry.
+  def VACCC  : TernaryVRRdGeneric<"vaccc", 0xE7B9>;
   def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
 
   // And.
@@ -369,12 +390,14 @@ let Predicates = [FeatureVector] in {
   def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
 
   // Average.
+  def VAVG  : BinaryVRRcGeneric<"vavg", 0xE7F2>;
   def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
   def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
   def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
   def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
 
   // Average logical.
+  def VAVGL  : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
   def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
   def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
   def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
@@ -384,12 +407,14 @@ let Predicates = [FeatureVector] in {
   def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
 
   // Count leading zeros.
+  def VCLZ  : UnaryVRRaGeneric<"vclz", 0xE753>;
   def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
   def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
   def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
   def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
 
   // Count trailing zeros.
+  def VCTZ  : UnaryVRRaGeneric<"vctz", 0xE752>;
   def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>;
   def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
   def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
@@ -399,134 +424,158 @@ let Predicates = [FeatureVector] in {
   def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
 
   // Galois field multiply sum.
+  def VGFM  : BinaryVRRcGeneric<"vgfm", 0xE7B4>;
   def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>;
   def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>;
   def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>;
   def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>;
 
   // Galois field multiply sum and accumulate.
+  def VGFMA  : TernaryVRRdGeneric<"vgfma", 0xE7BC>;
   def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>;
   def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>;
   def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>;
   def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>;
 
   // Load complement.
+  def VLC  : UnaryVRRaGeneric<"vlc", 0xE7DE>;
   def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
   def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
   def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
   def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
 
   // Load positive.
+  def VLP  : UnaryVRRaGeneric<"vlp", 0xE7DF>;
   def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8,  v128b, v128b, 0>;
   def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
   def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
   def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
 
   // Maximum.
+  def VMX  : BinaryVRRcGeneric<"vmx", 0xE7FF>;
   def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
   def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
   def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
   def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
 
   // Maximum logical.
+  def VMXL  : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
   def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
   def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
   def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
   def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
 
   // Minimum.
+  def VMN  : BinaryVRRcGeneric<"vmn", 0xE7FE>;
   def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
   def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
   def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
   def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
 
   // Minimum logical.
+  def VMNL  : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
   def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
   def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
   def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
   def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
 
   // Multiply and add low.
+  def VMAL   : TernaryVRRdGeneric<"vmal", 0xE7AA>;
   def VMALB  : TernaryVRRd<"vmalb",  0xE7AA, z_muladd, v128b, v128b, 0>;
   def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
   def VMALF  : TernaryVRRd<"vmalf",  0xE7AA, z_muladd, v128f, v128f, 2>;
 
   // Multiply and add high.
+  def VMAH  : TernaryVRRdGeneric<"vmah", 0xE7AB>;
   def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
   def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
   def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
 
   // Multiply and add logical high.
+  def VMALH  : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
   def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
   def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
   def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
 
   // Multiply and add even.
+  def VMAE  : TernaryVRRdGeneric<"vmae", 0xE7AE>;
   def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
   def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
   def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
 
   // Multiply and add logical even.
+  def VMALE  : TernaryVRRdGeneric<"vmale", 0xE7AC>;
   def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
   def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
   def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
 
   // Multiply and add odd.
+  def VMAO  : TernaryVRRdGeneric<"vmao", 0xE7AF>;
   def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
   def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
   def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
 
   // Multiply and add logical odd.
+  def VMALO  : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
   def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
   def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
   def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
 
   // Multiply high.
+  def VMH  : BinaryVRRcGeneric<"vmh", 0xE7A3>;
   def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
   def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
   def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
 
   // Multiply logical high.
+  def VMLH  : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
   def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
   def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
   def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
 
   // Multiply low.
+  def VML   : BinaryVRRcGeneric<"vml", 0xE7A2>;
   def VMLB  : BinaryVRRc<"vmlb",  0xE7A2, mul, v128b, v128b, 0>;
   def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
   def VMLF  : BinaryVRRc<"vmlf",  0xE7A2, mul, v128f, v128f, 2>;
 
   // Multiply even.
+  def VME  : BinaryVRRcGeneric<"vme", 0xE7A6>;
   def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
   def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
   def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
 
   // Multiply logical even.
+  def VMLE  : BinaryVRRcGeneric<"vmle", 0xE7A4>;
   def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
   def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
   def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
 
   // Multiply odd.
+  def VMO  : BinaryVRRcGeneric<"vmo", 0xE7A7>;
   def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
   def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
   def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
 
   // Multiply logical odd.
+  def VMLO  : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
   def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
   def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
   def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
 
   // Nor.
   def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
+  def : InstAlias<"vnot\t$V1, $V2", (VNO VR128:$V1, VR128:$V2, VR128:$V2), 0>;
 
   // Or.
   def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
 
   // Population count.
-  def VPOPCT : BinaryVRRa<"vpopct", 0xE750>;
+  def VPOPCT : UnaryVRRaGeneric<"vpopct", 0xE750>;
   def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
 
   // Element rotate left logical (with vector shift amount).
+  def VERLLV  : BinaryVRRcGeneric<"verllv", 0xE773>;
   def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
                            v128b, v128b, 0>;
   def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
@@ -537,48 +586,56 @@ let Predicates = [FeatureVector] in {
                            v128g, v128g, 3>;
 
   // Element rotate left logical (with scalar shift amount).
+  def VERLL  : BinaryVRSaGeneric<"verll", 0xE733>;
   def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
   def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
   def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
   def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
 
   // Element rotate and insert under mask.
+  def VERIM  : QuaternaryVRIdGeneric<"verim", 0xE772>;
   def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>;
   def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>;
   def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>;
   def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>;
 
   // Element shift left (with vector shift amount).
+  def VESLV  : BinaryVRRcGeneric<"veslv", 0xE770>;
   def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
   def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>;
   def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>;
   def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>;
 
   // Element shift left (with scalar shift amount).
+  def VESL  : BinaryVRSaGeneric<"vesl", 0xE730>;
   def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>;
   def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>;
   def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>;
   def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>;
 
   // Element shift right arithmetic (with vector shift amount).
+  def VESRAV  : BinaryVRRcGeneric<"vesrav", 0xE77A>;
   def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>;
   def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>;
   def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>;
   def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>;
 
   // Element shift right arithmetic (with scalar shift amount).
+  def VESRA  : BinaryVRSaGeneric<"vesra", 0xE73A>;
   def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>;
   def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>;
   def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>;
   def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>;
 
   // Element shift right logical (with vector shift amount).
+  def VESRLV  : BinaryVRRcGeneric<"vesrlv", 0xE778>;
   def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>;
   def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>;
   def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>;
   def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>;
 
   // Element shift right logical (with scalar shift amount).
+  def VESRL  : BinaryVRSaGeneric<"vesrl", 0xE738>;
   def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>;
   def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>;
   def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>;
@@ -608,6 +665,7 @@ let Predicates = [FeatureVector] in {
   def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
 
   // Subtract.
+  def VS  : BinaryVRRcGeneric<"vs", 0xE7F7>;
   def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
   def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
   def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
@@ -615,6 +673,7 @@ let Predicates = [FeatureVector] in {
   def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
 
   // Subtract compute borrow indication.
+  def VSCBI  : BinaryVRRcGeneric<"vscbi", 0xE7F5>;
   def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
   def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
   def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
@@ -622,21 +681,26 @@ let Predicates = [FeatureVector] in {
   def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
 
   // Subtract with borrow indication.
+  def VSBI  : TernaryVRRdGeneric<"vsbi", 0xE7BF>;
   def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
 
   // Subtract with borrow compute borrow indication.
+  def VSBCBI  : TernaryVRRdGeneric<"vsbcbi", 0xE7BD>;
   def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
                             v128q, v128q, 4>;
 
   // Sum across doubleword.
+  def VSUMG  : BinaryVRRcGeneric<"vsumg", 0xE765>;
   def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
   def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>;
 
   // Sum across quadword.
+  def VSUMQ  : BinaryVRRcGeneric<"vsumq", 0xE767>;
   def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>;
   def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>;
 
   // Sum across word.
+  def VSUM  : BinaryVRRcGeneric<"vsum", 0xE764>;
   def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>;
   def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>;
 }
@@ -737,6 +801,7 @@ defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
 let Predicates = [FeatureVector] in {
   // Element compare.
   let Defs = [CC] in {
+    def VEC  : CompareVRRaGeneric<"vec", 0xE7DB>;
     def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>;
     def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>;
     def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>;
@@ -745,6 +810,7 @@ let Predicates = [FeatureVector] in {
 
   // Element compare logical.
   let Defs = [CC] in {
+    def VECL  : CompareVRRaGeneric<"vecl", 0xE7D9>;
     def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>;
     def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>;
     def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>;
@@ -752,6 +818,7 @@ let Predicates = [FeatureVector] in {
   }
 
   // Compare equal.
+  def  VCEQ  : BinaryVRRbSPairGeneric<"vceq", 0xE7F8>;
   defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes,
                                v128b, v128b, 0>;
   defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes,
@@ -762,6 +829,7 @@ let Predicates = [FeatureVector] in {
                                v128g, v128g, 3>;
 
   // Compare high.
+  def  VCH  : BinaryVRRbSPairGeneric<"vch", 0xE7FB>;
   defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs,
                               v128b, v128b, 0>;
   defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs,
@@ -772,6 +840,7 @@ let Predicates = [FeatureVector] in {
                               v128g, v128g, 3>;
 
   // Compare high logical.
+  def  VCHL  : BinaryVRRbSPairGeneric<"vchl", 0xE7F9>;
   defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls,
                                v128b, v128b, 0>;
   defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls,
@@ -798,69 +867,86 @@ multiclass VectorRounding<Instruction insn, TypedReg tr> {
   def : FPConversion<insn, ffloor,     tr, tr, 4, 7>;
   def : FPConversion<insn, fceil,      tr, tr, 4, 6>;
   def : FPConversion<insn, ftrunc,     tr, tr, 4, 5>;
-  def : FPConversion<insn, frnd,       tr, tr, 4, 1>;
+  def : FPConversion<insn, fround,     tr, tr, 4, 1>;
 }
 
 let Predicates = [FeatureVector] in {
   // Add.
+  def VFA   : BinaryVRRcFloatGeneric<"vfa", 0xE7E3>;
   def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
   def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
 
   // Convert from fixed 64-bit.
+  def VCDG  : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
   def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
   def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
   def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
 
   // Convert from logical 64-bit.
+  def VCDLG  : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
   def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
   def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
   def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
 
   // Convert to fixed 64-bit.
+  def VCGD  : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
   def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
   def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
   // Rounding mode should agree with SystemZInstrFP.td.
   def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
 
   // Convert to logical 64-bit.
+  def VCLGD  : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
   def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
   def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
   // Rounding mode should agree with SystemZInstrFP.td.
   def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
 
   // Divide.
+  def VFD   : BinaryVRRcFloatGeneric<"vfd", 0xE7E5>;
   def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
   def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
 
   // Load FP integer.
+  def VFI   : TernaryVRRaFloatGeneric<"vfi", 0xE7C7>;
   def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
   def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
   defm : VectorRounding<VFIDB, v128db>;
   defm : VectorRounding<WFIDB, v64db>;
 
   // Load lengthened.
+  def VLDE  : UnaryVRRaFloatGeneric<"vlde", 0xE7C4>;
   def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
-  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
+  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fpextend, v64db, v32eb, 2, 8>;
 
   // Load rounded,
+  def VLED  : TernaryVRRaFloatGeneric<"vled", 0xE7C5>;
   def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
   def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
   def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
-  def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
+  def : FPConversion<WLEDB, fpround, v32eb, v64db, 0, 0>;
 
   // Multiply.
+  def VFM   : BinaryVRRcFloatGeneric<"vfm", 0xE7E7>;
   def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
   def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
 
   // Multiply and add.
+  def VFMA   : TernaryVRReFloatGeneric<"vfma", 0xE78F>;
   def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
   def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
 
   // Multiply and subtract.
+  def VFMS   : TernaryVRReFloatGeneric<"vfms", 0xE78E>;
   def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
   def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
 
-  // Load complement,
+  // Perform sign operation.
+  def VFPSO   : BinaryVRRaFloatGeneric<"vfpso", 0xE7CC>;
+  def VFPSODB : BinaryVRRa<"vfpsodb", 0xE7CC, null_frag, v128db, v128db, 3, 0>;
+  def WFPSODB : BinaryVRRa<"wfpsodb", 0xE7CC, null_frag, v64db, v64db, 3, 8>;
+
+  // Load complement.
   def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
   def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
 
@@ -873,15 +959,18 @@ let Predicates = [FeatureVector] in {
   def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
 
   // Square root.
+  def VFSQ   : UnaryVRRaFloatGeneric<"vfsq", 0xE7CE>;
   def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
   def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
 
   // Subtract.
+  def VFS   : BinaryVRRcFloatGeneric<"vfs", 0xE7E2>;
   def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
   def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
 
   // Test data class immediate.
   let Defs = [CC] in {
+    def VFTCI   : BinaryVRIeFloatGeneric<"vftci", 0xE74A>;
     def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
     def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
   }
@@ -893,26 +982,33 @@ let Predicates = [FeatureVector] in {
 
 let Predicates = [FeatureVector] in {
   // Compare scalar.
-  let Defs = [CC] in
+  let Defs = [CC] in {
+    def WFC   : CompareVRRaFloatGeneric<"wfc", 0xE7CB>;
     def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
+  }
 
   // Compare and signal scalar.
-  let Defs = [CC] in
+  let Defs = [CC] in {
+    def WFK   : CompareVRRaFloatGeneric<"wfk", 0xE7CA>;
     def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
+  }
 
   // Compare equal.
+  def  VFCE   : BinaryVRRcSPairFloatGeneric<"vfce", 0xE7E8>;
   defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
                                 v128g, v128db, 3, 0>;
   defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
                                 v64g, v64db, 3, 8>;
 
   // Compare high.
+  def  VFCH   : BinaryVRRcSPairFloatGeneric<"vfch", 0xE7EB>;
   defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
                                 v128g, v128db, 3, 0>;
   defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
                                 v64g, v64db, 3, 8>;
 
   // Compare high or equal.
+  def  VFCHE   : BinaryVRRcSPairFloatGeneric<"vfche", 0xE7EA>;
   defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
                                  v128g, v128db, 3, 0>;
   defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
@@ -983,11 +1079,13 @@ def : Pat<(v2i64 (z_replicate GR64:$scalar)),
 
 // Moving 32-bit values between GPRs and FPRs can be done using VLVGF
 // and VLGVF.
-def LEFR : UnaryAliasVRS<VR32, GR32>;
-def LFER : UnaryAliasVRS<GR64, VR32>;
-def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>;
-def : Pat<(i32 (bitconvert (f32 VR32:$src))),
-          (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>;
+let Predicates = [FeatureVector] in {
+  def LEFR : UnaryAliasVRS<VR32, GR32>;
+  def LFER : UnaryAliasVRS<GR64, VR32>;
+  def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>;
+  def : Pat<(i32 (bitconvert (f32 VR32:$src))),
+            (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>;
+}
 
 // Floating-point values are stored in element 0 of the corresponding
 // vector register.  Scalar to vector conversion is just a subreg and
@@ -1036,62 +1134,67 @@ let AddedComplexity = 4 in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [FeatureVector] in {
-  defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc,
-                                v128b, v128b, 0, 0>;
-  defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc,
-                                v128h, v128h, 1, 0>;
-  defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc,
-                                v128f, v128f, 2, 0>;
-  defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc,
-                                 v128b, v128b, 0, 2>;
-  defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc,
-                                 v128h, v128h, 1, 2>;
-  defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc,
-                                 v128f, v128f, 2, 2>;
-
-  defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc,
-                               v128b, v128b, 0, 0, 1>;
-  defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc,
-                               v128h, v128h, 1, 0, 1>;
-  defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc,
-                               v128f, v128f, 2, 0, 1>;
-  defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc,
-                                v128b, v128b, 0, 2, 3>;
-  defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc,
-                                v128h, v128h, 1, 2, 3>;
-  defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc,
-                                v128f, v128f, 2, 2, 3>;
-
-  defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc,
-                                v128b, v128b, 0, 0, 1>;
-  defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc,
-                                v128h, v128h, 1, 0, 1>;
-  defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc,
-                                v128f, v128f, 2, 0, 1>;
+  defm VFAE  : TernaryOptVRRbSPairGeneric<"vfae", 0xE782>;
+  defm VFAEB : TernaryOptVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb,
+                                   z_vfae_cc, v128b, v128b, 0>;
+  defm VFAEH : TernaryOptVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh,
+                                   z_vfae_cc, v128h, v128h, 1>;
+  defm VFAEF : TernaryOptVRRbSPair<"vfaef", 0xE782, int_s390_vfaef,
+                                   z_vfae_cc, v128f, v128f, 2>;
+  defm VFAEZB : TernaryOptVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb,
+                                    z_vfaez_cc, v128b, v128b, 0, 2>;
+  defm VFAEZH : TernaryOptVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh,
+                                    z_vfaez_cc, v128h, v128h, 1, 2>;
+  defm VFAEZF : TernaryOptVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf,
+                                    z_vfaez_cc, v128f, v128f, 2, 2>;
+
+  defm VFEE  : BinaryExtraVRRbSPairGeneric<"vfee", 0xE780>;
+  defm VFEEB : BinaryExtraVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb,
+                                    z_vfee_cc, v128b, v128b, 0>;
+  defm VFEEH : BinaryExtraVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh,
+                                    z_vfee_cc, v128h, v128h, 1>;
+  defm VFEEF : BinaryExtraVRRbSPair<"vfeef", 0xE780, int_s390_vfeef,
+                                    z_vfee_cc, v128f, v128f, 2>;
+  defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb,
+                                z_vfeez_cc, v128b, v128b, 0, 2>;
+  defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh,
+                                z_vfeez_cc, v128h, v128h, 1, 2>;
+  defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf,
+                                z_vfeez_cc, v128f, v128f, 2, 2>;
+
+  defm VFENE  : BinaryExtraVRRbSPairGeneric<"vfene", 0xE781>;
+  defm VFENEB : BinaryExtraVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb,
+                                     z_vfene_cc, v128b, v128b, 0>;
+  defm VFENEH : BinaryExtraVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh,
+                                     z_vfene_cc, v128h, v128h, 1>;
+  defm VFENEF : BinaryExtraVRRbSPair<"vfenef", 0xE781, int_s390_vfenef,
+                                     z_vfene_cc, v128f, v128f, 2>;
   defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb,
-                                 z_vfenez_cc, v128b, v128b, 0, 2, 3>;
+                                 z_vfenez_cc, v128b, v128b, 0, 2>;
   defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh,
-                                 z_vfenez_cc, v128h, v128h, 1, 2, 3>;
+                                 z_vfenez_cc, v128h, v128h, 1, 2>;
   defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf,
-                                 z_vfenez_cc, v128f, v128f, 2, 2, 3>;
-
-  defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc,
-                               v128b, v128b, 0>;
-  defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc,
-                               v128h, v128h, 1>;
-  defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc,
-                               v128f, v128f, 2>;
-
-  defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
-                                    z_vstrc_cc, v128b, v128b, 0, 0>;
-  defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
-                                    z_vstrc_cc, v128h, v128h, 1, 0>;
-  defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
-                                    z_vstrc_cc, v128f, v128f, 2, 0>;
-  defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
-                                     z_vstrcz_cc, v128b, v128b, 0, 2>;
-  defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
-                                     z_vstrcz_cc, v128h, v128h, 1, 2>;
-  defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
-                                     z_vstrcz_cc, v128f, v128f, 2, 2>;
+                                 z_vfenez_cc, v128f, v128f, 2, 2>;
+
+  defm VISTR  : UnaryExtraVRRaSPairGeneric<"vistr", 0xE75C>;
+  defm VISTRB : UnaryExtraVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb,
+                                    z_vistr_cc, v128b, v128b, 0>;
+  defm VISTRH : UnaryExtraVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh,
+                                    z_vistr_cc, v128h, v128h, 1>;
+  defm VISTRF : UnaryExtraVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf,
+                                    z_vistr_cc, v128f, v128f, 2>;
+
+  defm VSTRC  : QuaternaryOptVRRdSPairGeneric<"vstrc", 0xE78A>;
+  defm VSTRCB : QuaternaryOptVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
+                                       z_vstrc_cc, v128b, v128b, 0>;
+  defm VSTRCH : QuaternaryOptVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
+                                       z_vstrc_cc, v128h, v128h, 1>;
+  defm VSTRCF : QuaternaryOptVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
+                                       z_vstrc_cc, v128f, v128f, 2>;
+  defm VSTRCZB : QuaternaryOptVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
+                                        z_vstrcz_cc, v128b, v128b, 0, 2>;
+  defm VSTRCZH : QuaternaryOptVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
+                                        z_vstrcz_cc, v128h, v128h, 1, 2>;
+  defm VSTRCZF : QuaternaryOptVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
+                                        z_vstrcz_cc, v128f, v128f, 2, 2>;
 }
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
index 2cdf2f9bf990..ec8ce6e911fa 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZLDCleanup.cpp
@@ -33,7 +33,7 @@ public:
   SystemZLDCleanup(const SystemZTargetMachine &tm)
     : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SystemZ Local Dynamic TLS Access Clean-up";
   }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
index a24d47d2d16b..14ff6afbd4ae 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp
@@ -133,14 +133,12 @@ public:
   SystemZLongBranch(const SystemZTargetMachine &tm)
     : MachineFunctionPass(ID), TII(nullptr) {}
 
-  const char *getPassName() const override {
-    return "SystemZ Long Branch";
-  }
+  StringRef getPassName() const override { return "SystemZ Long Branch"; }
 
   bool runOnMachineFunction(MachineFunction &F) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
@@ -228,6 +226,10 @@ TerminatorInfo SystemZLongBranch::describeTerminator(MachineInstr &MI) {
       // Relaxes to A(G)HI and BRCL, which is 6 bytes longer.
       Terminator.ExtraRelaxSize = 6;
       break;
+    case SystemZ::BRCTH:
+      // Never needs to be relaxed.
+      Terminator.ExtraRelaxSize = 0;
+      break;
     case SystemZ::CRJ:
     case SystemZ::CLRJ:
       // Relaxes to a C(L)R/BRCL sequence, which is 2 bytes longer.
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
new file mode 100644
index 000000000000..ab6020f3f189
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.cpp
@@ -0,0 +1,153 @@
+//-- SystemZMachineScheduler.cpp - SystemZ Scheduler Interface -*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// -------------------------- Post RA scheduling ---------------------------- //
+// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
+// the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
+// implementation that looks to optimize decoder grouping and balance the
+// usage of processor resources.
+//===----------------------------------------------------------------------===//
+
+#include "SystemZMachineScheduler.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+#ifndef NDEBUG
+// Print the set of SUs
+void SystemZPostRASchedStrategy::SUSet::
+dump(SystemZHazardRecognizer &HazardRec) {
+  dbgs() << "{";
+  for (auto &SU : *this) {
+    HazardRec.dumpSU(SU, dbgs());
+    if (SU != *rbegin())
+      dbgs() << ",  ";
+  }
+  dbgs() << "}\n";
+}
+#endif
+
+SystemZPostRASchedStrategy::
+SystemZPostRASchedStrategy(const MachineSchedContext *C)
+  : DAG(nullptr), HazardRec(C) {}
+
+void SystemZPostRASchedStrategy::initialize(ScheduleDAGMI *dag) {
+  DAG = dag;
+  HazardRec.setDAG(dag);
+  HazardRec.Reset();
+}
+
+// Pick the next node to schedule.
+SUnit *SystemZPostRASchedStrategy::pickNode(bool &IsTopNode) {
+  // Only scheduling top-down.
+  IsTopNode = true;
+
+  if (Available.empty())
+    return nullptr;
+
+  // If only one choice, return it.
+  if (Available.size() == 1) {
+    DEBUG (dbgs() << "+++ Only one: ";
+           HazardRec.dumpSU(*Available.begin(), dbgs()); dbgs() << "\n";);
+    return *Available.begin();
+  }
+
+  // All nodes that are possible to schedule are stored by in the
+  // Available set.
+  DEBUG(dbgs() << "+++ Available: "; Available.dump(HazardRec););
+
+  Candidate Best;
+  for (auto *SU : Available) {
+
+    // SU is the next candidate to be compared against current Best.
+    Candidate c(SU, HazardRec);
+
+    // Remeber which SU is the best candidate.
+    if (Best.SU == nullptr || c < Best) {
+      Best = c;
+      DEBUG(dbgs() << "+++ Best sofar: ";
+            HazardRec.dumpSU(Best.SU, dbgs());
+            if (Best.GroupingCost != 0)
+              dbgs() << "\tGrouping cost:" << Best.GroupingCost;
+            if (Best.ResourcesCost != 0)
+              dbgs() << " Resource cost:" << Best.ResourcesCost;
+            dbgs() << " Height:" << Best.SU->getHeight();
+            dbgs() << "\n";);
+    }
+
+    // Once we know we have seen all SUs that affect grouping or use unbuffered
+    // resources, we can stop iterating if Best looks good.
+    if (!SU->isScheduleHigh && Best.noCost())
+      break;
+  }
+
+  assert (Best.SU != nullptr);
+  return Best.SU;
+}
+
+SystemZPostRASchedStrategy::Candidate::
+Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec) : Candidate() {
+  SU = SU_;
+
+  // Check the grouping cost. For a node that must begin / end a
+  // group, it is positive if it would do so prematurely, or negative
+  // if it would fit naturally into the schedule.
+  GroupingCost = HazardRec.groupingCost(SU);
+
+    // Check the resources cost for this SU.
+  ResourcesCost = HazardRec.resourcesCost(SU);
+}
+
+bool SystemZPostRASchedStrategy::Candidate::
+operator<(const Candidate &other) {
+
+  // Check decoder grouping.
+  if (GroupingCost < other.GroupingCost)
+    return true;
+  if (GroupingCost > other.GroupingCost)
+    return false;
+
+  // Compare the use of resources.
+  if (ResourcesCost < other.ResourcesCost)
+    return true;
+  if (ResourcesCost > other.ResourcesCost)
+    return false;
+
+  // Higher SU is otherwise generally better.
+  if (SU->getHeight() > other.SU->getHeight())
+    return true;
+  if (SU->getHeight() < other.SU->getHeight())
+    return false;
+
+  // If all same, fall back to original order.
+  if (SU->NodeNum < other.SU->NodeNum)
+    return true;
+
+  return false;
+}
+
+void SystemZPostRASchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  DEBUG(dbgs() << "+++ Scheduling SU(" << SU->NodeNum << ")\n";);
+
+  // Remove SU from Available set and update HazardRec.
+  Available.erase(SU);
+  HazardRec.EmitInstruction(SU);
+}
+
+void SystemZPostRASchedStrategy::releaseTopNode(SUnit *SU) {
+  // Set isScheduleHigh flag on all SUs that we want to consider first in
+  // pickNode().
+  const MCSchedClassDesc *SC = DAG->getSchedClass(SU);
+  bool AffectsGrouping = (SC->isValid() && (SC->BeginGroup || SC->EndGroup));
+  SU->isScheduleHigh = (AffectsGrouping || SU->isUnbuffered);
+
+  // Put all released SUs in the Available set.
+  Available.insert(SU);
+}
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
new file mode 100644
index 000000000000..b919758b70e7
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZMachineScheduler.h
@@ -0,0 +1,112 @@
+//==-- SystemZMachineScheduler.h - SystemZ Scheduler Interface -*- C++ -*---==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// -------------------------- Post RA scheduling ---------------------------- //
+// SystemZPostRASchedStrategy is a scheduling strategy which is plugged into
+// the MachineScheduler. It has a sorted Available set of SUs and a pickNode()
+// implementation that looks to optimize decoder grouping and balance the
+// usage of processor resources.
+//===----------------------------------------------------------------------===//
+
+#include "SystemZInstrInfo.h"
+#include "SystemZHazardRecognizer.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/Support/Debug.h"
+
+#ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H
+
+using namespace llvm;
+
+namespace llvm {
+  
+/// A MachineSchedStrategy implementation for SystemZ post RA scheduling.
+class SystemZPostRASchedStrategy : public MachineSchedStrategy {
+    ScheduleDAGMI *DAG;
+  
+  /// A candidate during instruction evaluation.
+  struct Candidate {
+    SUnit *SU;
+
+    /// The decoding cost.
+    int GroupingCost;
+
+    /// The processor resources cost.
+    int ResourcesCost;
+
+    Candidate() : SU(nullptr), GroupingCost(0), ResourcesCost(0) {}
+    Candidate(SUnit *SU_, SystemZHazardRecognizer &HazardRec);
+
+    // Compare two candidates.
+    bool operator<(const Candidate &other);
+
+    // Check if this node is free of cost ("as good as any").
+    bool inline noCost() {
+      return (GroupingCost <= 0 && !ResourcesCost);
+    }
+   };
+
+  // A sorter for the Available set that makes sure that SUs are considered
+  // in the best order.
+  struct SUSorter {
+    bool operator() (SUnit *lhs, SUnit *rhs) const {
+      if (lhs->isScheduleHigh && !rhs->isScheduleHigh)
+        return true;
+      if (!lhs->isScheduleHigh && rhs->isScheduleHigh)
+        return false;
+
+      if (lhs->getHeight() > rhs->getHeight())
+        return true;
+      else if (lhs->getHeight() < rhs->getHeight())
+        return false;
+
+      return (lhs->NodeNum < rhs->NodeNum);
+    }
+  };
+  // A set of SUs with a sorter and dump method.
+  struct SUSet : std::set<SUnit*, SUSorter> {
+    #ifndef NDEBUG
+    void dump(SystemZHazardRecognizer &HazardRec);
+    #endif
+  };
+
+  /// The set of available SUs to schedule next.
+  SUSet Available;
+
+  // HazardRecognizer that tracks the scheduler state for the current
+  // region.
+  SystemZHazardRecognizer HazardRec;
+  
+ public:
+  SystemZPostRASchedStrategy(const MachineSchedContext *C);
+
+  /// PostRA scheduling does not track pressure.
+  bool shouldTrackPressure() const override { return false; }
+
+  /// Initialize the strategy after building the DAG for a new region.
+  void initialize(ScheduleDAGMI *dag) override;
+
+  /// Pick the next node to schedule, or return NULL.
+  SUnit *pickNode(bool &IsTopNode) override;
+
+  /// ScheduleDAGMI has scheduled an instruction - tell HazardRec
+  /// about it.
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+
+  /// SU has had all predecessor dependencies resolved. Put it into
+  /// Available.
+  void releaseTopNode(SUnit *SU) override;
+
+  /// Currently only scheduling top-down, so this method is empty.
+  void releaseBottomNode(SUnit *SU) override {};
+};
+
+} // namespace llvm
+
+#endif /* LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZMACHINESCHEDULER_H */
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
index 17b076d88a34..7bb4fe5afb3f 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -133,6 +133,13 @@ class BDLMode<string type, string bitsize, string dispsize, string suffix,
                         !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
                         !cast<Immediate>("imm"##bitsize))>;
 
+// A BDMode paired with a register length operand.
+class BDRMode<string type, string bitsize, string dispsize, string suffix>
+  : AddressingMode<type, bitsize, dispsize, suffix, "", 3, "BDRAddr",
+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
+                        !cast<RegisterOperand>("GR"##bitsize))>;
+
 // An addressing mode with a base, displacement and a vector index.
 class BDVMode<string bitsize, string dispsize>
   : AddressOperand<bitsize, dispsize, "", "BDVAddr",
@@ -230,6 +237,12 @@ def UIMM32 : SDNodeXForm<imm, [{
                                    MVT::i64);
 }]>;
 
+// Truncate an immediate to a 48-bit unsigned quantity.
+def UIMM48 : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant(uint64_t(N->getZExtValue()) & 0xffffffffffff,
+                                   SDLoc(N), MVT::i64);
+}]>;
+
 // Negate and then truncate an immediate to a 32-bit unsigned quantity.
 def NEGIMM32 : SDNodeXForm<imm, [{
   return CurDAG->getTargetConstant(uint32_t(-N->getZExtValue()), SDLoc(N),
@@ -252,6 +265,7 @@ def S16Imm : ImmediateAsmOperand<"S16Imm">;
 def U16Imm : ImmediateAsmOperand<"U16Imm">;
 def S32Imm : ImmediateAsmOperand<"S32Imm">;
 def U32Imm : ImmediateAsmOperand<"U32Imm">;
+def U48Imm : ImmediateAsmOperand<"U48Imm">;
 
 //===----------------------------------------------------------------------===//
 // i32 immediates
@@ -425,6 +439,10 @@ def imm64zx32n : Immediate<i64, [{
   return isUInt<32>(-N->getSExtValue());
 }], NEGIMM32, "U32Imm">;
 
+def imm64zx48 : Immediate<i64, [{
+  return isUInt<64>(N->getZExtValue());
+}], UIMM48, "U48Imm">;
+
 def imm64 : ImmLeaf<i64, [{}]>, Operand<i64>;
 
 //===----------------------------------------------------------------------===//
@@ -442,7 +460,9 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ return N->isExactlyValue(-0.0); }]>;
 //===----------------------------------------------------------------------===//
 
 // PC-relative asm operands.
+def PCRel12 : PCRelAsmOperand<"12">;
 def PCRel16 : PCRelAsmOperand<"16">;
+def PCRel24 : PCRelAsmOperand<"24">;
 def PCRel32 : PCRelAsmOperand<"32">;
 def PCRelTLS16 : PCRelTLSAsmOperand<"16">;
 def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
@@ -458,6 +478,20 @@ def brtarget32 : PCRelOperand<OtherVT, PCRel32> {
   let DecoderMethod = "decodePC32DBLBranchOperand";
 }
 
+// Variants of brtarget for use with branch prediction preload.
+def brtarget12bpp : PCRelOperand<OtherVT, PCRel12> {
+  let EncoderMethod = "getPC12DBLBPPEncoding";
+  let DecoderMethod = "decodePC12DBLBranchOperand";
+}
+def brtarget16bpp : PCRelOperand<OtherVT, PCRel16> {
+  let EncoderMethod = "getPC16DBLBPPEncoding";
+  let DecoderMethod = "decodePC16DBLBranchOperand";
+}
+def brtarget24bpp : PCRelOperand<OtherVT, PCRel24> {
+  let EncoderMethod = "getPC24DBLBPPEncoding";
+  let DecoderMethod = "decodePC24DBLBranchOperand";
+}
+
 // Variants of brtarget16/32 with an optional additional TLS symbol.
 // These are used to annotate calls to __tls_get_offset.
 def tlssym : Operand<i64> { }
@@ -498,6 +532,7 @@ def BDAddr64Disp20      : AddressAsmOperand<"BDAddr",   "64", "20">;
 def BDXAddr64Disp12     : AddressAsmOperand<"BDXAddr",  "64", "12">;
 def BDXAddr64Disp20     : AddressAsmOperand<"BDXAddr",  "64", "20">;
 def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr",  "64", "12", "Len8">;
+def BDRAddr64Disp12     : AddressAsmOperand<"BDRAddr",  "64", "12">;
 def BDVAddr64Disp12     : AddressAsmOperand<"BDVAddr",  "64", "12">;
 
 // DAG patterns and operands for addressing modes.  Each mode has
@@ -544,23 +579,13 @@ def dynalloc12only    : BDXMode<"DynAlloc", "64", "12", "Only">;
 def laaddr12pair      : BDXMode<"LAAddr",   "64", "12", "Pair">;
 def laaddr20pair      : BDXMode<"LAAddr",   "64", "20", "Pair">;
 def bdladdr12onlylen8 : BDLMode<"BDLAddr",  "64", "12", "Only", "8">;
+def bdraddr12only     : BDRMode<"BDRAddr",  "64", "12", "Only">;
 def bdvaddr12only     : BDVMode<            "64", "12">;
 
 //===----------------------------------------------------------------------===//
 // Miscellaneous
 //===----------------------------------------------------------------------===//
 
-// Access registers.  At present we just use them for accessing the thread
-// pointer, so we don't expose them as register to LLVM.
-def AccessReg : AsmOperandClass {
-  let Name = "AccessReg";
-  let ParserMethod = "parseAccessReg";
-}
-def access_reg : Immediate<i32, [{ return N->getZExtValue() < 16; }],
-                           NOOP_SDNodeXForm, "AccessReg"> {
-  let ParserMatchClass = AccessReg;
-}
-
 // A 4-bit condition-code mask.
 def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>,
             Operand<i32> {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
index 8d031f1ea05d..fde26ed4e1c5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -35,9 +35,6 @@ def SDT_ZWrapOffset         : SDTypeProfile<1, 2,
                                              SDTCisSameAs<0, 2>,
                                              SDTCisPtrTy<0>]>;
 def SDT_ZAdjDynAlloc        : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>;
-def SDT_ZExtractAccess      : SDTypeProfile<1, 1,
-                                            [SDTCisVT<0, i32>,
-                                             SDTCisVT<1, i32>]>;
 def SDT_ZGR128Binary32      : SDTypeProfile<1, 2,
                                             [SDTCisVT<0, untyped>,
                                              SDTCisVT<1, untyped>,
@@ -186,8 +183,6 @@ def z_br_ccmask         : SDNode<"SystemZISD::BR_CCMASK", SDT_ZBRCCMask,
 def z_select_ccmask     : SDNode<"SystemZISD::SELECT_CCMASK", SDT_ZSelectCCMask,
     		                 [SDNPInGlue]>;
 def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
-def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
-                                 SDT_ZExtractAccess>;
 def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
 def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
 def z_sdivrem32         : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
@@ -387,15 +382,6 @@ def zext8  : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
 def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
 def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
 
-// Match extensions of an i32 to an i64, followed by an AND of the low
-// i8 or i16 part.
-def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>;
-def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>;
-
-// Typed floating-point loads.
-def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
-def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
-
 // Extending loads in which the extension type can be signed.
 def asextload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{
   unsigned Type = cast<LoadSDNode>(N)->getExtensionType();
@@ -529,7 +515,7 @@ def inserthf : PatFrag<(ops node:$src1, node:$src2),
 // ORs that can be treated as insertions.
 def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
                              (or node:$src1, node:$src2), [{
-  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
   return CurDAG->MaskedValueIsZero(N->getOperand(0),
                                    APInt::getLowBitsSet(BitWidth, 8));
 }]>;
@@ -537,7 +523,7 @@ def or_as_inserti8 : PatFrag<(ops node:$src1, node:$src2),
 // ORs that can be treated as reversed insertions.
 def or_as_revinserti8 : PatFrag<(ops node:$src1, node:$src2),
                                 (or node:$src1, node:$src2), [{
-  unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits();
+  unsigned BitWidth = N->getValueType(0).getScalarSizeInBits();
   return CurDAG->MaskedValueIsZero(N->getOperand(1),
                                    APInt::getLowBitsSet(BitWidth, 8));
 }]>;
@@ -584,6 +570,12 @@ class storeu<SDPatternOperator operator, SDPatternOperator store = store>
   : PatFrag<(ops node:$value, node:$addr),
             (store (operator node:$value), node:$addr)>;
 
+// Create a store operator that performs the given inherent operation
+// and stores the resulting value.
+class storei<SDPatternOperator operator, SDPatternOperator store = store>
+  : PatFrag<(ops node:$addr),
+            (store (operator), node:$addr)>;
+
 // Vector representation of all-zeros and all-ones.
 def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
 def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
index 9adc0189e650..1cdc0949ff4a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -7,96 +7,29 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// Processor and feature definitions.
+// Processor definitions.
+//
+// For compatibility with other compilers on the platform, each model can
+// be identifed either by the system name (e.g. z10) or the level of the
+// architecture the model supports, as identified by the edition level
+// of the z/Architecture Principles of Operation document (e.g. arch8).
+//
+// The minimum architecture level supported by LLVM is as defined in
+// the Eighth Edition of the PoP (i.e. as implemented on z10).
 //
 //===----------------------------------------------------------------------===//
 
-class SystemZFeature<string extname, string intname, string desc>
-  : Predicate<"Subtarget->has"##intname##"()">,
-    AssemblerPredicate<"Feature"##intname, extname>,
-    SubtargetFeature<extname, "Has"##intname, "true", desc>;
-
-class SystemZMissingFeature<string intname>
-  : Predicate<"!Subtarget->has"##intname##"()">;
-
-def FeatureDistinctOps : SystemZFeature<
-  "distinct-ops", "DistinctOps",
-  "Assume that the distinct-operands facility is installed"
->;
-
-def FeatureLoadStoreOnCond : SystemZFeature<
-  "load-store-on-cond", "LoadStoreOnCond",
-  "Assume that the load/store-on-condition facility is installed"
->;
-
-def FeatureLoadStoreOnCond2 : SystemZFeature<
-  "load-store-on-cond-2", "LoadStoreOnCond2",
-  "Assume that the load/store-on-condition facility 2 is installed"
->;
-
-def FeatureHighWord : SystemZFeature<
-  "high-word", "HighWord",
-  "Assume that the high-word facility is installed"
->;
-
-def FeatureFPExtension : SystemZFeature<
-  "fp-extension", "FPExtension",
-  "Assume that the floating-point extension facility is installed"
->;
-
-def FeaturePopulationCount : SystemZFeature<
-  "population-count", "PopulationCount",
-  "Assume that the population-count facility is installed"
->;
-
-def FeatureFastSerialization : SystemZFeature<
-  "fast-serialization", "FastSerialization",
-  "Assume that the fast-serialization facility is installed"
->;
-
-def FeatureInterlockedAccess1 : SystemZFeature<
-  "interlocked-access1", "InterlockedAccess1",
-  "Assume that interlocked-access facility 1 is installed"
->;
-def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
+def : ProcessorModel<"generic", NoSchedModel, []>;
 
-def FeatureMiscellaneousExtensions : SystemZFeature<
-  "miscellaneous-extensions", "MiscellaneousExtensions",
-  "Assume that the miscellaneous-extensions facility is installed"
->;
+def : ProcessorModel<"arch8", NoSchedModel, Arch8SupportedFeatures.List>;
+def : ProcessorModel<"z10", NoSchedModel, Arch8SupportedFeatures.List>;
 
-def FeatureTransactionalExecution : SystemZFeature<
-  "transactional-execution", "TransactionalExecution",
-  "Assume that the transactional-execution facility is installed"
->;
+def : ProcessorModel<"arch9", Z196Model, Arch9SupportedFeatures.List>;
+def : ProcessorModel<"z196", Z196Model, Arch9SupportedFeatures.List>;
 
-def FeatureProcessorAssist : SystemZFeature<
-  "processor-assist", "ProcessorAssist",
-  "Assume that the processor-assist facility is installed"
->;
+def : ProcessorModel<"arch10", ZEC12Model, Arch10SupportedFeatures.List>;
+def : ProcessorModel<"zEC12", ZEC12Model, Arch10SupportedFeatures.List>;
 
-def FeatureVector : SystemZFeature<
-  "vector", "Vector",
-  "Assume that the vectory facility is installed"
->;
-def FeatureNoVector : SystemZMissingFeature<"Vector">;
+def : ProcessorModel<"arch11", Z13Model, Arch11SupportedFeatures.List>;
+def : ProcessorModel<"z13", Z13Model, Arch11SupportedFeatures.List>;
 
-def : Processor<"generic", NoItineraries, []>;
-def : Processor<"z10", NoItineraries, []>;
-def : Processor<"z196", NoItineraries,
-                [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeaturePopulationCount,
-                 FeatureFastSerialization, FeatureInterlockedAccess1]>;
-def : Processor<"zEC12", NoItineraries,
-                [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeaturePopulationCount,
-                 FeatureFastSerialization, FeatureInterlockedAccess1,
-                 FeatureMiscellaneousExtensions,
-                 FeatureTransactionalExecution, FeatureProcessorAssist]>;
-def : Processor<"z13", NoItineraries,
-                [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-                 FeatureFPExtension, FeaturePopulationCount,
-                 FeatureFastSerialization, FeatureInterlockedAccess1,
-                 FeatureMiscellaneousExtensions,
-                 FeatureTransactionalExecution, FeatureProcessorAssist,
-                 FeatureVector, FeatureLoadStoreOnCond2]>;
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index b5e5fd4bfc4f..6ef8000d6f43 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -59,6 +59,11 @@ SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   Reserved.set(SystemZ::R15L);
   Reserved.set(SystemZ::R15H);
   Reserved.set(SystemZ::R14Q);
+
+  // A0 and A1 hold the thread pointer.
+  Reserved.set(SystemZ::A0);
+  Reserved.set(SystemZ::A1);
+
   return Reserved;
 }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
index 0d8b08b9cbdd..47d2f75cc11a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZRegisterInfo.td
@@ -36,15 +36,16 @@ def subreg_hr32  : ComposedSubRegIndex<subreg_h64, subreg_r32>;
 // associated operand called NAME.  SIZE is the size and alignment
 // of the registers and REGLIST is the list of individual registers.
 multiclass SystemZRegClass<string name, list<ValueType> types, int size,
-                           dag regList> {
+                           dag regList, bit allocatable = 1> {
   def AsmOperand : AsmOperandClass {
     let Name = name;
     let ParserMethod = "parse"##name;
     let RenderMethod = "addRegOperands";
   }
-  def Bit : RegisterClass<"SystemZ", types, size, regList> {
-    let Size = size;
-  }
+  let isAllocatable = allocatable in
+    def Bit : RegisterClass<"SystemZ", types, size, regList> {
+      let Size = size;
+    }
   def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
     let ParserMatchClass = !cast<AsmOperandClass>(name##"AsmOperand");
   }
@@ -121,6 +122,14 @@ defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>;
 // of a GR128.
 defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>;
 
+// Any type register. Used for .insn directives when we don't know what the
+// register types could be.
+defm AnyReg : SystemZRegClass<"AnyReg",
+                              [i64, f64, v8i8, v4i16, v2i32, v2f32], 64,
+                              (add (sequence "R%uD", 0, 15),
+                                   (sequence "F%uD", 0, 15),
+                                   (sequence "V%u", 0, 15))>;
+
 //===----------------------------------------------------------------------===//
 // Floating-point registers
 //===----------------------------------------------------------------------===//
@@ -284,3 +293,14 @@ def v128any : TypedReg<untyped, VR128>;
 def CC : SystemZReg<"cc">;
 let isAllocatable = 0 in
   def CCRegs : RegisterClass<"SystemZ", [i32], 32, (add CC)>;
+
+// Access registers.
+class ACR32<bits<16> num, string n> : SystemZReg<n> {
+  let HWEncoding = num;
+}
+foreach I = 0-15 in {
+  def A#I : ACR32<I, "a"#I>, DwarfRegNum<[!add(I, 48)]>;
+}
+defm AR32 : SystemZRegClass<"AR32", [i32], 32,
+                            (add (sequence "A%u", 0, 15)), 0>;
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
new file mode 100644
index 000000000000..dbba8ab42b5a
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSchedule.td
@@ -0,0 +1,77 @@
+//==-- SystemZSchedule.td - SystemZ Scheduling Definitions ----*- tblgen -*-==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+// Scheduler resources
+// Resources ending with a '2' use that resource for 2 cycles. An instruction
+// using two such resources use the mapped unit for 4 cycles, and 2 is added
+// to the total number of uops of the sched class.
+
+// These three resources are used to express decoder grouping rules.
+// The number of decoder slots needed by an instructions is normally
+// one. For a cracked instruction (BeginGroup && !EndGroup) it is
+// two. Expanded instructions (BeginGroup && EndGroup) group alone.
+def GroupAlone : SchedWrite;
+def BeginGroup : SchedWrite;
+def EndGroup   : SchedWrite;
+
+// Latencies, to make code a bit neater. If more than one resource is
+// used for an instruction, the greatest latency (not the sum) will be
+// output by Tablegen. Therefore, in such cases one of these resources
+// is needed.
+def Lat2 : SchedWrite;
+def Lat3 : SchedWrite;
+def Lat4 : SchedWrite;
+def Lat5 : SchedWrite;
+def Lat6 : SchedWrite;
+def Lat7 : SchedWrite;
+def Lat8 : SchedWrite;
+def Lat9 : SchedWrite;
+def Lat10 : SchedWrite;
+def Lat11 : SchedWrite;
+def Lat12 : SchedWrite;
+def Lat15 : SchedWrite;
+def Lat20 : SchedWrite;
+def Lat30 : SchedWrite;
+
+// Fixed-point
+def FXa         : SchedWrite;
+def FXa2        : SchedWrite;
+def FXb         : SchedWrite;
+def FXU         : SchedWrite;
+
+// Load/store unit
+def LSU         : SchedWrite;
+
+// Model a return without latency, otherwise if-converter will model
+// extra cost and abort (currently there is an assert that checks that
+// all instructions have at least one uop).
+def LSU_lat1    : SchedWrite;
+
+// Floating point unit (zEC12 and earlier)
+def FPU  : SchedWrite;
+def FPU2 : SchedWrite;
+
+// Vector sub units (z13)
+def VecBF     : SchedWrite;
+def VecBF2    : SchedWrite;
+def VecDF     : SchedWrite;
+def VecDF2    : SchedWrite;
+def VecFPd    : SchedWrite; // Blocking BFP div/sqrt unit.
+def VecMul    : SchedWrite;
+def VecStr    : SchedWrite;
+def VecXsPm   : SchedWrite;
+
+// Virtual branching unit
+def VBU         : SchedWrite;
+
+
+include "SystemZScheduleZ13.td"
+include "SystemZScheduleZEC12.td"
+include "SystemZScheduleZ196.td"
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
new file mode 100644
index 000000000000..e97d61d8355d
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ13.td
@@ -0,0 +1,1064 @@
+//-- SystemZScheduleZ13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z13 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z13Model : SchedMachineModel {
+
+    let UnsupportedFeatures = Arch11UnsupportedFeatures.List;
+    
+    let IssueWidth = 8;
+    let MicroOpBufferSize = 60;     // Issue queues
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty = 20;
+}
+
+let SchedModel = Z13Model in  {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<BeginGroup, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+}
+def : WriteRes<EndGroup, []> {
+  let NumMicroOps = 0;
+  let EndGroup    = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def Z13_FXaUnit     : ProcResource<2>;
+def Z13_FXbUnit     : ProcResource<2>;
+def Z13_LSUnit      : ProcResource<2>;
+def Z13_VecUnit     : ProcResource<2>;
+def Z13_VecFPdUnit  : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Z13_VBUnit      : ProcResource<2>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXa,     [Z13_FXaUnit]> { let Latency = 1; }
+def : WriteRes<FXa2,    [Z13_FXaUnit, Z13_FXaUnit]> { let Latency = 2; }
+def : WriteRes<FXb,     [Z13_FXbUnit]> { let Latency = 1; }
+def : WriteRes<LSU,     [Z13_LSUnit]>  { let Latency = 4; }
+def : WriteRes<VecBF,   [Z13_VecUnit]> { let Latency = 8; }
+def : WriteRes<VecBF2,  [Z13_VecUnit, Z13_VecUnit]> { let Latency = 9; }
+def : WriteRes<VecDF,   [Z13_VecUnit]> { let Latency = 8; }
+def : WriteRes<VecDF2,  [Z13_VecUnit, Z13_VecUnit]> { let Latency = 9; }
+def : WriteRes<VecFPd,  [Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit,
+                         Z13_VecFPdUnit, Z13_VecFPdUnit, Z13_VecFPdUnit]>
+                         { let Latency = 30; }
+def : WriteRes<VecMul,  [Z13_VecUnit]> { let Latency = 5; }
+def : WriteRes<VecStr,  [Z13_VecUnit]> { let Latency = 4; }
+def : WriteRes<VecXsPm, [Z13_VecUnit]> { let Latency = 3; }
+def : WriteRes<VBU,     [Z13_VBUnit]>; // Virtual Branching Unit
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[FXb, FXa, Lat2, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXa, FXa, FXb, FXb, Lat4, GroupAlone],
+             (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXb], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXb, FXb, Lat2, GroupAlone],
+             (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[VBU], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXb], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXb], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[VBU, FXa, FXa, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[FXb, EndGroup], (instregex "Return$")>;
+def : InstRW<[FXb], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo 
+def : InstRW<[FXa], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXa], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore32Mux(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXb, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[FXb, LSU, LSU, LSU, Lat8, GroupAlone], (instregex "MVC$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXa], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXa], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXa], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXa], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXa], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux|CBB)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXa], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXa], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXa], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXa], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LR(Mux)?$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSU], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[FXb, LSU, Lat5], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[FXa, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXa], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXb, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat2], (instregex "LOCRMux$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[FXa, Lat2], (instregex "LOC(G|H)?HI(Asm.*)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXa], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXa], (instregex "LTGFR$")>;
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXa], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSU], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[FXb, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+             (instregex "LM(H|Y|G)?$")>;
+
+// Store multiple (estimated average of ceil(5/2) FXb ops)
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, Lat10,
+              GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[FXa], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat2], (instregex "LP(G)?R$")>;
+def : InstRW<[FXa, FXa, Lat3, BeginGroup], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXa, Lat2], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXa], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXa, FXa, Lat2, BeginGroup], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXa], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXa], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "IILF(64)?$")>;
+def : InstRW<[FXa], (instregex "IILH(64)?$")>;
+def : InstRW<[FXa], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "A(Y)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "AH(Y)?$")>;
+def : InstRW<[FXa], (instregex "AIH$")>;
+def : InstRW<[FXa], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "AG$")>;
+def : InstRW<[FXa], (instregex "AGFI$")>;
+def : InstRW<[FXa], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXa], (instregex "AGR(K)?$")>;
+def : InstRW<[FXa], (instregex "AHI(K)?$")>;
+def : InstRW<[FXa], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "AL(Y)?$")>;
+def : InstRW<[FXa], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "ALG(F)?$")>;
+def : InstRW<[FXa], (instregex "ALGHSIK$")>;
+def : InstRW<[FXa], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXa], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXa], (instregex "ALR(K)?$")>;
+def : InstRW<[FXa], (instregex "AR(K)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "A(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXa, Lat2, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXa, LSU, Lat6], (instregex "AGF$")>;
+def : InstRW<[FXa, Lat2], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXa, LSU, Lat6], (instregex "SH(Y)?$")>;
+def : InstRW<[FXa], (instregex "SGR(K)?$")>;
+def : InstRW<[FXa], (instregex "SLFI$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXa], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXa], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXa], (instregex "SLR(K)?$")>;
+def : InstRW<[FXa], (instregex "SR(K)?$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXa, LSU, Lat6, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXa, Lat2, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXa, LSU, Lat6], (instregex "SGF$")>;
+def : InstRW<[FXa, Lat2], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXa], (instregex "NGR(K)?$")>;
+def : InstRW<[FXa], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXa], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "NILF(64)?$")>;
+def : InstRW<[FXa], (instregex "NILH(64)?$")>;
+def : InstRW<[FXa], (instregex "NILL(64)?$")>;
+def : InstRW<[FXa], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXa], (instregex "OGR(K)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXa], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXa], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXa], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXa], (instregex "OILF(64)?$")>;
+def : InstRW<[FXa], (instregex "OILH(64)?$")>;
+def : InstRW<[FXa], (instregex "OILL(64)?$")>;
+def : InstRW<[FXa], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXa], (instregex "XIFMux$")>;
+def : InstRW<[FXa], (instregex "XGR(K)?$")>;
+def : InstRW<[FXa], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXa], (instregex "XILF(64)?$")>;
+def : InstRW<[FXa], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXb, Lat9, BeginGroup], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat10], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXa, Lat6], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXa, LSU, Lat12], (instregex "MSG$")>;
+def : InstRW<[FXa, Lat8], (instregex "MSGR$")>;
+def : InstRW<[FXa, Lat6], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXa, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXa, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXa, Lat5], (instregex "MGHI$")>;
+def : InstRW<[FXa, Lat5], (instregex "MHI$")>;
+def : InstRW<[FXa, LSU, Lat9], (instregex "MH(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[LSU, FXa, Lat30, GroupAlone], (instregex "DSG(F)?$")>;
+def : InstRW<[FXa2, FXa2, Lat20, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[FXa2, FXa2, Lat30, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[FXa2, FXa2, LSU, Lat30, GroupAlone], (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXa], (instregex "SLA(K)?$")>;
+
+// Rotate
+def : InstRW<[FXa, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXa], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXa], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXa], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXa], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXa, FXa, Lat3, BeginGroup], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXb], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXb], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXb], (instregex "C(G)?R$")>;
+def : InstRW<[FXb], (instregex "CIH$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXb], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXb], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXb], (instregex "CLGR$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXb], (instregex "CLIH$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXb], (instregex "CLR$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "CLRL$")>;
+
+// Compare halfword
+def : InstRW<[FXb, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXb, LSU, Lat6], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXa, FXb, LSU, Lat6, BeginGroup], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXb, LSU, Lat6], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[FXb, LSU, LSU, Lat9, BeginGroup], (instregex "CLC$")>;
+
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXb, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXb], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXb], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXb], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXb], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXb], (instregex "TMLL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[FXb], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXb, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXa, FXb, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXa, FXa, FXb, FXb, FXa, LSU, Lat10, GroupAlone],
+             (instregex "CDS(Y)?$")>;
+def : InstRW<[FXa, FXa, FXb, FXb, LSU, FXb, FXb, LSU, LSU, Lat20, GroupAlone],
+             (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXb, FXb, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXa, Lat5, BeginGroup], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXa, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXa, FXa, FXb, Lat5, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXb], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[FXb, Lat2, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXa, FXb, Lat2, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXa, FXa, FXb, Lat3, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat15, GroupAlone],
+              (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[FXa], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[FXb, LSU, Lat5], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXa, Lat6, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXa, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXa], (instregex "AEXT128_64$")>;
+def : InstRW<[FXa], (instregex "ZEXT128_(32|64)$")>;
+
+// String instructions
+def : InstRW<[FXa, LSU, Lat30], (instregex "SRST$")>;
+
+// Move with key
+def : InstRW<[FXa, FXa, FXb, LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
+
+// Extract CPU Time
+def : InstRW<[FXa, Lat5, LSU], (instregex "ECTG$")>;
+
+// Execute
+def : InstRW<[FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+// Program return
+def : InstRW<[FXb, Lat30], (instregex "PR$")>;
+
+// Inline assembly
+def : InstRW<[LSU, LSU, LSU, FXa, FXa, FXb, Lat9, GroupAlone],
+             (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXa, FXa, FXb, FXb, Lat11, GroupAlone],
+             (instregex "STCKE$")>;
+def : InstRW<[FXa, LSU, Lat5], (instregex "STFLE$")>;
+def : InstRW<[FXb, Lat30], (instregex "SVC$")>;
+
+// Store real address
+def : InstRW<[FXb, LSU, Lat5], (instregex "STRAG$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXa], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXa], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXb], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXb, FXb, Lat2, BeginGroup], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[VecXsPm], (instregex "LER$")>;
+def : InstRW<[FXb], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXb, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXb, FXb, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[VecXsPm, Lat4], (instregex "LT(D|E)BR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone],
+             (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[VecXsPm], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[VecXsPm], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm, LSU, Lat7], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXb, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXb, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[VecBF], (instregex "LEDBR(A)?$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[VecDF, VecDF, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[VecBF, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[VecBF], (instregex "LDEBR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12 , GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CE(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CD(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXb, VecBF, Lat9, BeginGroup], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXb, VecDF2, VecDF2, Lat12, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CF(E|D)BR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CG(E|D)BR(A)?$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[FXb, VecBF, Lat11, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLFDBR$")>;
+def : InstRW<[FXb, VecBF, Lat11, BeginGroup], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXb, VecDF, VecDF, Lat20, BeginGroup], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXb], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXb], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXb], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[VecFPd, LSU], (instregex "SQ(E|D)B$")>;
+def : InstRW<[VecFPd], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[VecBF], (instregex "FIEBR(A)?$")>;
+def : InstRW<[VecBF], (instregex "FIDBR(A)?$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[VecBF, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[VecBF], (instregex "A(E|D)BR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[VecBF, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[VecBF], (instregex "S(E|D)BR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat11, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[VecBF, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[VecBF], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[VecBF2, VecBF2, LSU, Lat12, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[VecBF2, VecBF2, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[VecDF2, VecDF2, Lat20, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[VecBF, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[VecFPd, LSU], (instregex "D(E|D)B$")>;
+def : InstRW<[VecFPd], (instregex "D(E|D)BR$")>;
+def : InstRW<[VecFPd, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[VecXsPm, LSU, Lat8], (instregex "C(E|D)B$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "C(E|D)BR?$")>;
+def : InstRW<[VecDF, VecDF, Lat20, GroupAlone], (instregex "CXBR$")>;
+
+// Test Data Class
+def : InstRW<[LSU, VecXsPm, Lat9], (instregex "TC(E|D)B$")>;
+def : InstRW<[LSU, VecDF2, VecDF2, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXa, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[FXb, LSU, Lat5, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[FXa, Lat30, GroupAlone], (instregex "SFASR$")>;
+def : InstRW<[FXa, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXb, Lat3, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "VLR(32|64)?$")>;
+def : InstRW<[FXb, Lat4], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[FXb], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[FXb, Lat2], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VZERO$")>;
+def : InstRW<[VecXsPm], (instregex "VONE$")>;
+def : InstRW<[VecXsPm], (instregex "VGBM$")>;
+def : InstRW<[VecXsPm], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLEI(B|F|G|H)$")>;
+def : InstRW<[VecXsPm], (instregex "VREPI(B|F|G|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "VL(L|BB)?$")>;
+def : InstRW<[LSU], (instregex "VL(32|64)$")>;
+def : InstRW<[LSU], (instregex "VLLEZ(B|F|G|H)?$")>;
+def : InstRW<[LSU], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, LSU, Lat7], (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VGE(F|G)$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+              (instregex "VLM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb, LSU, Lat8], (instregex "VST(L|32|64)?$")>;
+def : InstRW<[FXb, LSU, Lat8], (instregex "VSTE(F|G)$")>;
+def : InstRW<[FXb, LSU, VecXsPm, Lat11, BeginGroup], (instregex "VSTE(B|H)$")>;
+def : InstRW<[LSU, LSU, FXb, FXb, FXb, FXb, FXb, Lat20, GroupAlone],
+              (instregex "VSTM$")>;
+def : InstRW<[FXb, FXb, LSU, Lat12, BeginGroup], (instregex "VSCE(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VPERM$")>;
+def : InstRW<[VecXsPm], (instregex "VPDI$")>;
+def : InstRW<[VecXsPm], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[VecXsPm], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VN(C|O)?$")>;
+def : InstRW<[VecXsPm], (instregex "VO$")>;
+def : InstRW<[VecMul], (instregex "VCKSM$")>;
+def : InstRW<[VecXsPm], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VX$")>;
+def : InstRW<[VecMul], (instregex "VGFM?$")>;
+def : InstRW<[VecMul], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[VecMul], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[VecXsPm], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[VecMul], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[VecMul], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VML(B|F)?$")>;
+def : InstRW<[VecMul], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[VecMul], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VMO(B|F|H)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VPOPCT$")>;
+
+def : InstRW<[VecXsPm], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[VecXsPm], (instregex "VSL(DB)?$")>;
+def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSLB$")>;
+def : InstRW<[VecXsPm], (instregex "VSR(A|L)$")>;
+def : InstRW<[VecXsPm, VecXsPm, Lat8], (instregex "VSR(A|L)B$")>;
+
+def : InstRW<[VecXsPm], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[VecXsPm], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[VecXsPm], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[VecMul], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[VecMul], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[VecMul], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm, Lat4], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[VecXsPm], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecBF2], (instregex "VCD(G|GB|LG|LGB)$")>;
+def : InstRW<[VecBF], (instregex "WCD(GB|LGB)$")>;
+def : InstRW<[VecBF2], (instregex "VC(L)?GD$")>;
+def : InstRW<[VecBF2], (instregex "VFADB$")>;
+def : InstRW<[VecBF], (instregex "WFADB$")>;
+def : InstRW<[VecBF2], (instregex "VCGDB$")>;
+def : InstRW<[VecBF], (instregex "WCGDB$")>;
+def : InstRW<[VecBF2], (instregex "VF(I|M|A|S)$")>;
+def : InstRW<[VecBF2], (instregex "VF(I|M|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WF(I|M|S)DB$")>;
+def : InstRW<[VecBF2], (instregex "VCLGDB$")>;
+def : InstRW<[VecBF], (instregex "WCLGDB$")>;
+def : InstRW<[VecXsPm], (instregex "VFL(C|N|P)DB$")>;
+def : InstRW<[VecXsPm], (instregex "WFL(C|N|P)DB$")>;
+def : InstRW<[VecBF2], (instregex "VFM(A|S)$")>;
+def : InstRW<[VecBF2], (instregex "VFM(A|S)DB$")>;
+def : InstRW<[VecBF], (instregex "WFM(A|S)DB$")>;
+def : InstRW<[VecXsPm], (instregex "VFPSO$")>;
+def : InstRW<[VecXsPm], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VFTCI(DB)?$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WFTCIDB$")>;
+def : InstRW<[VecBF2], (instregex "VL(DE|ED)$")>;
+def : InstRW<[VecBF2], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[VecBF], (instregex "WL(DE|ED)B$")>;
+
+// divide / square root
+def : InstRW<[VecFPd], (instregex "VFD$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FDDB$")>;
+def : InstRW<[VecFPd], (instregex "VFSQ$")>;
+def : InstRW<[VecFPd], (instregex "(V|W)FSQDB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)$")>;
+def : InstRW<[VecXsPm], (instregex "VFC(E|H|HE)DB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)$")>;
+def : InstRW<[VecXsPm], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WFC(E|H|HE)DBS$")>;
+def : InstRW<[VecXsPm, Lat4], (instregex "WF(C|K)DB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXb], (instregex "LEFR$")>;
+def : InstRW<[FXb, Lat4], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[VecStr], (instregex "VFAE(B)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAEBS$")>;
+def : InstRW<[VecStr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAE(F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[VecStr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[VecStr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[VecStr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[VecStr, Lat5], (instregex "VSTRCZ(B|F|H)S$")>;
+
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
new file mode 100644
index 000000000000..a950e54e7601
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZ196.td
@@ -0,0 +1,769 @@
+//=- SystemZScheduleZ196.td - SystemZ Scheduling Definitions ---*- tblgen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Z196 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def Z196Model : SchedMachineModel {
+
+    let UnsupportedFeatures = Arch9UnsupportedFeatures.List;
+    
+    let IssueWidth = 5;
+    let MicroOpBufferSize = 40;     // Issue queues
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty = 16;
+}
+
+let SchedModel = Z196Model in  {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<EndGroup, []> {
+  let NumMicroOps = 0;
+  let EndGroup    = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def Z196_FXUnit : ProcResource<2>;
+def Z196_LSUnit : ProcResource<2>;
+def Z196_FPUnit : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXU,       [Z196_FXUnit]> { let Latency = 1; }
+def : WriteRes<LSU,       [Z196_LSUnit]> { let Latency = 4; }
+def : WriteRes<LSU_lat1,  [Z196_LSUnit]> { let Latency = 1; }
+def : WriteRes<FPU,       [Z196_FPUnit]> { let Latency = 8; }
+def : WriteRes<FPU2,      [Z196_FPUnit, Z196_FPUnit]> { let Latency = 9; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[LSU, EndGroup], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BRCT(G|H)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXU, FXU, FXU, LSU, Lat7, GroupAlone],
+             (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXU, LSU, Lat5, GroupAlone],
+             (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone],
+             (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[LSU, EndGroup], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXU], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(F|G)IT(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[LSU, FXU, FXU, Lat6, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[LSU, FXU, FXU, Lat6, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[LSU, FXU, FXU, Lat6, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[LSU_lat1, EndGroup], (instregex "Return$")>;
+def : InstRW<[LSU_lat1, EndGroup], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo 
+def : InstRW<[FXU], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXU, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU, LSU, LSU, FXU, Lat8, GroupAlone], (instregex "MVC$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXU], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXU], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+
+// Load and test
+def : InstRW<[FXU, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXU], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXU, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2, EndGroup], (instregex "LOC(G)?R(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat6, EndGroup], (instregex "LOC(G)?(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "STOC(G)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+def : InstRW<[FXU], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXU], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXU], (instregex "LTGFR$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|F|H|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|F|H|T|FRL|HRL)$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+             (instregex "LM(H|Y|G)?$")>;
+
+// Store multiple (estimated average of 3 ops)
+def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone],
+             (instregex "STM(H|Y|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2], (instregex "LP(G)?R$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU, Lat2], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "A(Y|SI)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "AH(Y)?$")>;
+def : InstRW<[FXU], (instregex "AIH$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "AL(Y)?$")>;
+def : InstRW<[FXU], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ALG(F)?$")>;
+def : InstRW<[FXU], (instregex "ALGHSIK$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "AG(SI)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "AGF$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "SH(Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLFI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "SGF$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXU], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "XIFMux$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat10], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>;
+def : InstRW<[FXU, Lat8], (instregex "MSGR$")>;
+def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXU, Lat5], (instregex "MGHI$")>;
+def : InstRW<[FXU, Lat5], (instregex "MHI$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DSG(F)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DSG(F)?$")>;
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DL(G)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU, Lat2], (instregex "SLA(K)?$")>;
+
+// Rotate
+def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXU], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXU], (instregex "C(G)?R$")>;
+def : InstRW<[FXU], (instregex "CIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "CLGR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+
+// Compare halfword
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU, FXU, LSU, Lat6, Lat2, GroupAlone], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "CLC$")>;
+
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXU, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXU], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, GroupAlone], (instregex "PFD(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXU, LSU, FXU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, LSU, Lat10, GroupAlone],
+             (instregex "CDS(Y)?$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone],
+             (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXU, FXU, LSU, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXU, Lat5, GroupAlone], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXU, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXU, FXU, LSU, Lat8, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXU], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[LSU, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// String instructions
+def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
+
+// Move with key
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
+
+// Extract CPU Time
+def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>;
+
+// Execute
+def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
+
+// Program return
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+
+// Inline assembly
+def : InstRW<[FXU, LSU, Lat15], (instregex "STCK$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "STCKF$")>;
+def : InstRW<[LSU, FXU, Lat5], (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>;
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+
+// Store real address
+def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[FXU], (instregex "LER$")>;
+def : InstRW<[FXU], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXU, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)BR$")>;
+def : InstRW<[FPU], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone],
+             (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "LEDBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[FPU, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[FPU], (instregex "LDEBR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)BR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)BR(A)?$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)BR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)BR(A)?$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLF(E|D)BR$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIEBR(A)?$")>;
+def : InstRW<[FPU], (instregex "FIDBR(A)?$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[FPU], (instregex "A(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[FPU], (instregex "S(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)B$")>;
+def : InstRW<[FPU], (instregex "C(E|D)BR$")>;
+def : InstRW<[FPU, FPU, Lat30], (instregex "CXBR$")>;
+
+// Test Data Class
+def : InstRW<[FPU, LSU, Lat15], (instregex "TC(E|D)B$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
+def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
new file mode 100644
index 000000000000..8ab6c826f1ed
--- /dev/null
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZScheduleZEC12.td
@@ -0,0 +1,807 @@
+//=- SystemZScheduleZEC12.td - SystemZ Scheduling Definitions --*- tblgen -*-=//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ZEC12 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+def ZEC12Model : SchedMachineModel {
+
+    let UnsupportedFeatures = Arch10UnsupportedFeatures.List;
+    
+    let IssueWidth = 5;
+    let MicroOpBufferSize = 40;     // Issue queues
+    let LoadLatency = 1;            // Optimistic load latency.
+
+    let PostRAScheduler = 1;
+
+    // Extra cycles for a mispredicted branch.
+    let MispredictPenalty = 16;
+}
+
+let SchedModel = ZEC12Model in  {
+
+// These definitions could be put in a subtarget common include file,
+// but it seems the include system in Tablegen currently rejects
+// multiple includes of same file.
+def : WriteRes<GroupAlone, []> {
+  let NumMicroOps = 0;
+  let BeginGroup  = 1;
+  let EndGroup    = 1;
+}
+def : WriteRes<EndGroup, []> {
+  let NumMicroOps = 0;
+  let EndGroup    = 1;
+}
+def : WriteRes<Lat2, []> { let Latency = 2; let NumMicroOps = 0;}
+def : WriteRes<Lat3, []> { let Latency = 3; let NumMicroOps = 0;}
+def : WriteRes<Lat4, []> { let Latency = 4; let NumMicroOps = 0;}
+def : WriteRes<Lat5, []> { let Latency = 5; let NumMicroOps = 0;}
+def : WriteRes<Lat6, []> { let Latency = 6; let NumMicroOps = 0;}
+def : WriteRes<Lat7, []> { let Latency = 7; let NumMicroOps = 0;}
+def : WriteRes<Lat8, []> { let Latency = 8; let NumMicroOps = 0;}
+def : WriteRes<Lat9, []> { let Latency = 9; let NumMicroOps = 0;}
+def : WriteRes<Lat10, []> { let Latency = 10; let NumMicroOps = 0;}
+def : WriteRes<Lat11, []> { let Latency = 11; let NumMicroOps = 0;}
+def : WriteRes<Lat12, []> { let Latency = 12; let NumMicroOps = 0;}
+def : WriteRes<Lat15, []> { let Latency = 15; let NumMicroOps = 0;}
+def : WriteRes<Lat20, []> { let Latency = 20; let NumMicroOps = 0;}
+def : WriteRes<Lat30, []> { let Latency = 30; let NumMicroOps = 0;}
+
+// Execution units.
+def ZEC12_FXUnit : ProcResource<2>;
+def ZEC12_LSUnit : ProcResource<2>;
+def ZEC12_FPUnit : ProcResource<1>;
+def ZEC12_VBUnit : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+def : WriteRes<FXU,      [ZEC12_FXUnit]> { let Latency = 1; }
+def : WriteRes<LSU,      [ZEC12_LSUnit]> { let Latency = 4; }
+def : WriteRes<LSU_lat1, [ZEC12_LSUnit]> { let Latency = 1; }
+def : WriteRes<FPU,  [ZEC12_FPUnit]> { let Latency = 8; }
+def : WriteRes<FPU2, [ZEC12_FPUnit, ZEC12_FPUnit]> { let Latency = 9; }
+def : WriteRes<VBU,  [ZEC12_VBUnit]>; // Virtual Branching Unit
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "ADJDYNALLOC$")>; // Pseudo -> LA / LAY
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[VBU], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[VBU], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[LSU, Lat4], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[LSU, Lat4], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[FXU, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[FXU, FXU, FXU, LSU, Lat7, GroupAlone],
+             (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[FXU], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5, GroupAlone],
+             (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[VBU], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[FXU], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[FXU], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[VBU, FXU, FXU, Lat3, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[LSU_lat1, EndGroup], (instregex "Return$")>;
+def : InstRW<[LSU_lat1], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Select instructions
+//===----------------------------------------------------------------------===//
+
+// Select pseudo 
+def : InstRW<[FXU], (instregex "Select(32|64|32Mux)$")>;
+
+// CondStore pseudos
+def : InstRW<[FXU], (instregex "CondStore16(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore16Mux(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore64(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStore8Mux(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[FXU, LSU, Lat5], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[LSU, LSU, LSU, FXU, Lat8, GroupAlone], (instregex "MVC$")>;
+
+// Pseudo -> reg move
+def : InstRW<[FXU], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[FXU], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[FXU], (instregex "REG_SEQUENCE$")>;
+def : InstRW<[FXU], (instregex "SUBREG_TO_REG$")>;
+
+// Loads
+def : InstRW<[LSU], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSU], (instregex "LG(RL)?$")>;
+def : InstRW<[LSU], (instregex "L128$")>;
+
+def : InstRW<[FXU], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[FXU], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[FXU], (instregex "LG(F|H)I$")>;
+def : InstRW<[FXU], (instregex "LHI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LR(Mux)?$")>;
+
+// Load and trap
+def : InstRW<[FXU, LSU, Lat5], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[FXU, LSU, Lat5], (instregex "LT(G)?$")>;
+def : InstRW<[FXU], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[FXU, LSU, Lat5], (instregex "STG(RL)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST128$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2], (instregex "LOC(G)?R(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "LOC(G)?(Asm.*)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STOC(G)?(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "L(B|H|G)R$")>;
+def : InstRW<[FXU], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LTGF$")>;
+def : InstRW<[FXU], (instregex "LTGFR$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(B|H|F)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[FXU], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSU], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSU], (instregex "LLH(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSU], (instregex "LLHRL$")>;
+def : InstRW<[LSU], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and trap
+def : InstRW<[FXU, LSU, Lat5], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STH(H|Y|RL|Mux)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[LSU, LSU, LSU, LSU, LSU, Lat10, GroupAlone],
+             (instregex "LM(H|Y|G)?$")>;
+
+// Store multiple (estimated average of 3 ops)
+def : InstRW<[LSU, LSU, FXU, FXU, FXU, Lat10, GroupAlone],
+             (instregex "STM(H|Y|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LRV(G)?R$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LRV(G|H)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STRV(G|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address
+def : InstRW<[FXU], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, Lat2], (instregex "LP(G)?R$")>;
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "L(N|P)GFR$")>;
+def : InstRW<[FXU, Lat2], (instregex "LN(R|GR)$")>;
+def : InstRW<[FXU], (instregex "LC(R|GR)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "IC32(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ICM(H|Y)?$")>;
+def : InstRW<[FXU], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "IIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "IIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "IILF(64)?$")>;
+def : InstRW<[FXU], (instregex "IILH(64)?$")>;
+def : InstRW<[FXU], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "A(Y|SI)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "AH(Y)?$")>;
+def : InstRW<[FXU], (instregex "AIH$")>;
+def : InstRW<[FXU], (instregex "AFI(Mux)?$")>;
+def : InstRW<[FXU], (instregex "AGFI$")>;
+def : InstRW<[FXU], (instregex "AGHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AGR(K)?$")>;
+def : InstRW<[FXU], (instregex "AHI(K)?$")>;
+def : InstRW<[FXU], (instregex "AHIMux(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "AL(Y)?$")>;
+def : InstRW<[FXU], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "ALG(F)?$")>;
+def : InstRW<[FXU], (instregex "ALGHSIK$")>;
+def : InstRW<[FXU], (instregex "ALGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "ALGR(K)?$")>;
+def : InstRW<[FXU], (instregex "ALR(K)?$")>;
+def : InstRW<[FXU], (instregex "AR(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "AG(SI)?$")>;
+
+// Logical addition with carry
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "ALC(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (32 -> 64)
+def : InstRW<[FXU, LSU, Lat6], (instregex "AGF$")>;
+def : InstRW<[FXU, Lat2], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "S(G|Y)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "SH(Y)?$")>;
+def : InstRW<[FXU], (instregex "SGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLFI$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[FXU], (instregex "SLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "SLGR(K)?$")>;
+def : InstRW<[FXU], (instregex "SLR(K)?$")>;
+def : InstRW<[FXU], (instregex "SR(K)?$")>;
+
+// Subtraction with borrow
+def : InstRW<[FXU, LSU, Lat7, GroupAlone], (instregex "SLB(G)?$")>;
+def : InstRW<[FXU, Lat3, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (32 -> 64)
+def : InstRW<[FXU, LSU, Lat6], (instregex "SGF$")>;
+def : InstRW<[FXU, Lat2], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "N(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "NGR(K)?$")>;
+def : InstRW<[FXU], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "NI(Y)?$")>;
+def : InstRW<[FXU], (instregex "NIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "NIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "NILF(64)?$")>;
+def : InstRW<[FXU], (instregex "NILH(64)?$")>;
+def : InstRW<[FXU], (instregex "NILL(64)?$")>;
+def : InstRW<[FXU], (instregex "NR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "O(G|Y)?$")>;
+def : InstRW<[FXU], (instregex "OGR(K)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "OI(Y)?$")>;
+def : InstRW<[FXU], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[FXU], (instregex "OIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHH(64)?$")>;
+def : InstRW<[FXU], (instregex "OIHL(64)?$")>;
+def : InstRW<[FXU], (instregex "OILF(64)?$")>;
+def : InstRW<[FXU], (instregex "OILH(64)?$")>;
+def : InstRW<[FXU], (instregex "OILL(64)?$")>;
+def : InstRW<[FXU], (instregex "OR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "X(G|Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "XI(Y)?$")>;
+def : InstRW<[FXU], (instregex "XIFMux$")>;
+def : InstRW<[FXU], (instregex "XGR(K)?$")>;
+def : InstRW<[FXU], (instregex "XIHF(64)?$")>;
+def : InstRW<[FXU], (instregex "XILF(64)?$")>;
+def : InstRW<[FXU], (instregex "XR(K)?$")>;
+def : InstRW<[LSU, LSU, FXU, Lat9, GroupAlone], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat10], (instregex "MS(GF|Y)?$")>;
+def : InstRW<[FXU, Lat6], (instregex "MS(R|FI)$")>;
+def : InstRW<[FXU, LSU, Lat12], (instregex "MSG$")>;
+def : InstRW<[FXU, Lat8], (instregex "MSGR$")>;
+def : InstRW<[FXU, Lat6], (instregex "MSGF(I|R)$")>;
+def : InstRW<[FXU, LSU, Lat15, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[FXU, Lat9, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[FXU, Lat5], (instregex "MGHI$")>;
+def : InstRW<[FXU, Lat5], (instregex "MHI$")>;
+def : InstRW<[FXU, LSU, Lat9], (instregex "MH(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DSG(F)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DSG(F)?$")>;
+def : InstRW<[FPU2, FPU2, FXU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DL(G)?R$")>;
+def : InstRW<[FPU2, FPU2, LSU, FXU, FXU, FXU, FXU, Lat30, GroupAlone],
+              (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SLL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRL(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SRA(G|K)?$")>;
+def : InstRW<[FXU], (instregex "SLA(K)?$")>;
+
+// Rotate
+def : InstRW<[FXU, LSU, Lat6], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[FXU], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[FXU], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[FXU], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[FXU, FXU, Lat3, GroupAlone], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "C(G|Y|Mux|RL)?$")>;
+def : InstRW<[FXU], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[FXU], (instregex "CG(F|H)I$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[FXU], (instregex "C(G)?R$")>;
+def : InstRW<[FXU], (instregex "CIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CH(F|SI)$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CL(Y|Mux|FHSI)?$")>;
+def : InstRW<[FXU], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLG(HRL|HSI)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGF(RL)?$")>;
+def : InstRW<[FXU], (instregex "CLGF(I|R)$")>;
+def : InstRW<[FXU], (instregex "CLGR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLGRL$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLH(F|RL|HSI)$")>;
+def : InstRW<[FXU], (instregex "CLIH$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLI(Y)?$")>;
+def : InstRW<[FXU], (instregex "CLR$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "CLRL$")>;
+
+// Compare halfword
+def : InstRW<[FXU, LSU, Lat6], (instregex "CH(Y|RL)?$")>;
+def : InstRW<[FXU, LSU, Lat6], (instregex "CGH(RL)?$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[FXU, LSU, Lat6], (instregex "CGF(RL)?$")>;
+def : InstRW<[FXU, Lat2], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "CLC$")>;
+
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[FXU, LSU, Lat5], (instregex "TM(Y)?$")>;
+def : InstRW<[FXU], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[FXU], (instregex "TMHH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMHL(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLH(64)?$")>;
+def : InstRW<[FXU], (instregex "TMLL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "PFD(RL)?$")>;
+def : InstRW<[LSU], (instregex "BP(R)?P$")>;
+def : InstRW<[FXU], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAA(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAAL(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAN(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAO(G)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[FXU, LSU, Lat5, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, LSU, Lat10, GroupAlone],
+             (instregex "CDS(Y)?$")>;
+def : InstRW<[FXU, FXU, FXU, FXU, FXU, FXU, LSU, LSU, Lat12, GroupAlone],
+             (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[FXU, FXU, LSU, LSU, Lat6, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[LSU, LSU, Lat5, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[LSU], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[LSU, FXU, Lat5, GroupAlone], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[LSU, Lat30, GroupAlone], (instregex "(L|ST)AM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[FXU, Lat3, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[FXU, FXU, LSU, Lat8, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[FXU], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[LSU, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[FXU, LSU, Lat5, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[FXU, FXU, LSU, Lat6, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[LSU, LSU, FXU, FXU, FXU, FXU, FXU, Lat15, GroupAlone],
+              (instregex "TBEGIN(C|_nofloat)?$")>;
+
+// Transaction end
+def : InstRW<[LSU, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[LSU, GroupAlone], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[FXU], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[FXU, LSU, Lat5], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[FXU, Lat7, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[FXU, Lat3], (instregex "POPCNT$")>;
+
+// Extend
+def : InstRW<[FXU], (instregex "AEXT128_64$")>;
+def : InstRW<[FXU], (instregex "ZEXT128_(32|64)$")>;
+
+// String instructions
+def : InstRW<[FXU, LSU, Lat30], (instregex "SRST$")>;
+
+// Move with key
+def : InstRW<[LSU, Lat8, GroupAlone], (instregex "MVCK$")>;
+
+// Extract CPU Time
+def : InstRW<[FXU, Lat5, LSU], (instregex "ECTG$")>;
+
+// Execute
+def : InstRW<[LSU, GroupAlone], (instregex "EX(RL)?$")>;
+
+// Program return
+def : InstRW<[FXU, Lat30], (instregex "PR$")>;
+
+// Inline assembly
+def : InstRW<[FXU, LSU, LSU, Lat9, GroupAlone], (instregex "STCK(F)?$")>;
+def : InstRW<[LSU, LSU, LSU, LSU, FXU, FXU, Lat20, GroupAlone],
+             (instregex "STCKE$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STFLE$")>;
+def : InstRW<[FXU, Lat30], (instregex "SVC$")>;
+
+// Store real address
+def : InstRW<[FXU, LSU, Lat5], (instregex "STRAG$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Select instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU], (instregex "SelectF(32|64|128)$")>;
+def : InstRW<[FXU], (instregex "CondStoreF32(Inv)?$")>;
+def : InstRW<[FXU], (instregex "CondStoreF64(Inv)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[FXU], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[FXU], (instregex "LER$")>;
+def : InstRW<[FXU], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[FXU, Lat3], (instregex "LGDR$")>;
+def : InstRW<[FXU, FXU, Lat2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[FPU], (instregex "LT(D|E)BR$")>;
+def : InstRW<[FPU], (instregex "LTEBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU], (instregex "LTDBRCompare(_VecPseudo)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "LTXBR$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone],
+             (instregex "LTXBRCompare(_VecPseudo)?$")>;
+
+// Copy sign
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRd(d|s)$")>;
+def : InstRW<[FXU, FXU, Lat5, GroupAlone], (instregex "CPSDRs(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSU], (instregex "LE(Y)?$")>;
+def : InstRW<[LSU], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSU], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat7], (instregex "STD(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat7], (instregex "STE(Y)?$")>;
+def : InstRW<[FXU, LSU, Lat5], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[FPU], (instregex "LEDBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LEXBR(A)?$")>;
+def : InstRW<[FPU, FPU, Lat20], (instregex "LDXBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[FPU, LSU, Lat12], (instregex "LDEB$")>;
+def : InstRW<[FPU], (instregex "LDEBR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "LX(D|E)B$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "LX(D|E)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CE(F|G)BR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CD(F|G)BR(A?)$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CX(F|G)BR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CEL(F|G)BR$")>;
+def : InstRW<[FXU, FPU, Lat9, GroupAlone], (instregex "CDL(F|G)BR$")>;
+def : InstRW<[FXU, FPU2, FPU2, Lat11, GroupAlone], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CF(E|D)BR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat12, GroupAlone], (instregex "CG(E|D)BR(A?)$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "C(F|G)XBR(A?)$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLF(E|D)BR$")>;
+def : InstRW<[FXU, FPU, Lat11, GroupAlone], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[FXU, FPU, FPU, Lat20, GroupAlone], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[FPU], (instregex "L(C|N|P)DBR$")>;
+def : InstRW<[FPU], (instregex "L(C|N|P)EBR$")>;
+def : InstRW<[FXU], (instregex "LCDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LNDFR(_32)?$")>;
+def : InstRW<[FXU], (instregex "LPDFR(_32)?$")>;
+def : InstRW<[FPU2, FPU2, Lat9, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[FPU, LSU, Lat30], (instregex "SQ(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[FPU], (instregex "FIEBR(A)?$")>;
+def : InstRW<[FPU], (instregex "FIDBR(A)?$")>;
+def : InstRW<[FPU2, FPU2, Lat15, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[FPU, LSU, Lat12], (instregex "A(E|D)B$")>;
+def : InstRW<[FPU], (instregex "A(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[FPU, LSU, Lat12], (instregex "S(E|D)B$")>;
+def : InstRW<[FPU], (instregex "S(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat20, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[FPU, LSU, Lat12], (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[FPU], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "MXDB$")>;
+def : InstRW<[FPU2, FPU2, Lat10, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)EB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[FPU, LSU, Lat12, GroupAlone], (instregex "M(A|S)DB$")>;
+def : InstRW<[FPU, GroupAlone], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[FPU, LSU, Lat30], (instregex "D(E|D)B$")>;
+def : InstRW<[FPU, Lat30], (instregex "D(E|D)BR$")>;
+def : InstRW<[FPU2, FPU2, Lat30, GroupAlone], (instregex "DXBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[FPU, LSU, Lat12], (instregex "C(E|D)B$")>;
+def : InstRW<[FPU], (instregex "C(E|D)BR$")>;
+def : InstRW<[FPU, FPU, Lat30], (instregex "CXBR$")>;
+
+// Test Data Class
+def : InstRW<[FPU, LSU, Lat15], (instregex "TC(E|D)B$")>;
+def : InstRW<[FPU2, FPU2, LSU, Lat15, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[FXU, LSU, Lat4, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[LSU, LSU, Lat6, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[LSU, Lat3, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[FXU, Lat30, GroupAlone], (instregex "SFASR$")>;
+def : InstRW<[FXU, LSU, Lat30, GroupAlone], (instregex "LFAS$")>;
+def : InstRW<[FXU, Lat2, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+}
+
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
index 7f26a3519e50..83882fc0310a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp
@@ -29,7 +29,7 @@ public:
   static char ID;
   SystemZShortenInst(const SystemZTargetMachine &tm);
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "SystemZ Instruction Shortening";
   }
 
@@ -37,7 +37,7 @@ public:
   bool runOnMachineFunction(MachineFunction &F) override;
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
@@ -275,7 +275,7 @@ bool SystemZShortenInst::runOnMachineFunction(MachineFunction &F) {
   const SystemZSubtarget &ST = F.getSubtarget<SystemZSubtarget>();
   TII = ST.getInstrInfo();
   TRI = ST.getRegisterInfo();
-  LiveRegs.init(TRI);
+  LiveRegs.init(*TRI);
 
   bool Changed = false;
   for (auto &MBB : F)
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index 67d5e0179fe2..ce07ea3318a5 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -39,10 +39,12 @@ SystemZSubtarget::SystemZSubtarget(const Triple &TT, const std::string &CPU,
       HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
       HasPopulationCount(false), HasFastSerialization(false),
       HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
+      HasExecutionHint(false), HasLoadAndTrap(false),
       HasTransactionalExecution(false), HasProcessorAssist(false),
-      HasVector(false), HasLoadStoreOnCond2(false), TargetTriple(TT),
-      InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this),
-      TSInfo(), FrameLowering() {}
+      HasVector(false), HasLoadStoreOnCond2(false),
+      HasLoadAndZeroRightmostByte(false),
+      TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
+      TLInfo(TM, *this), TSInfo(), FrameLowering() {}
 
 bool SystemZSubtarget::isPC32DBLSymbol(const GlobalValue *GV,
                                        CodeModel::Model CM) const {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
index 6007f6fc9c4c..cdb61327a16a 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h
@@ -42,10 +42,13 @@ protected:
   bool HasFastSerialization;
   bool HasInterlockedAccess1;
   bool HasMiscellaneousExtensions;
+  bool HasExecutionHint;
+  bool HasLoadAndTrap;
   bool HasTransactionalExecution;
   bool HasProcessorAssist;
   bool HasVector;
   bool HasLoadStoreOnCond2;
+  bool HasLoadAndZeroRightmostByte;
 
 private:
   Triple TargetTriple;
@@ -77,6 +80,9 @@ public:
   // This is important for reducing register pressure in vector code.
   bool useAA() const override { return true; }
 
+  // Always enable the early if-conversion pass.
+  bool enableEarlyIfConversion() const override { return true; }
+
   // Automatically generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
@@ -109,12 +115,23 @@ public:
     return HasMiscellaneousExtensions;
   }
 
+  // Return true if the target has the execution-hint facility.
+  bool hasExecutionHint() const { return HasExecutionHint; }
+
+  // Return true if the target has the load-and-trap facility.
+  bool hasLoadAndTrap() const { return HasLoadAndTrap; }
+
   // Return true if the target has the transactional-execution facility.
   bool hasTransactionalExecution() const { return HasTransactionalExecution; }
 
   // Return true if the target has the processor-assist facility.
   bool hasProcessorAssist() const { return HasProcessorAssist; }
 
+  // Return true if the target has the load-and-zero-rightmost-byte facility.
+  bool hasLoadAndZeroRightmostByte() const {
+    return HasLoadAndZeroRightmostByte;
+  }
+
   // Return true if the target has the vector facility.
   bool hasVector() const { return HasVector; }
 
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 85a3f6f4a8be..33fdb8f90825 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -9,6 +9,7 @@
 
 #include "SystemZTargetMachine.h"
 #include "SystemZTargetTransformInfo.h"
+#include "SystemZMachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/Support/TargetRegistry.h"
@@ -17,10 +18,9 @@
 
 using namespace llvm;
 
-extern cl::opt<bool> MISchedPostRA;
 extern "C" void LLVMInitializeSystemZTarget() {
   // Register the target.
-  RegisterTargetMachine<SystemZTargetMachine> X(TheSystemZTarget);
+  RegisterTargetMachine<SystemZTargetMachine> X(getTheSystemZTarget());
 }
 
 // Determine whether we use the vector ABI.
@@ -114,8 +114,15 @@ public:
     return getTM<SystemZTargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createPostMachineScheduler(MachineSchedContext *C) const override {
+    return new ScheduleDAGMI(C, make_unique<SystemZPostRASchedStrategy>(C),
+                             /*RemoveKillFlags=*/true);
+  }
+
   void addIRPasses() override;
   bool addInstSelector() override;
+  bool addILPOpts() override;
   void addPreSched2() override;
   void addPreEmitPass() override;
 };
@@ -137,7 +144,14 @@ bool SystemZPassConfig::addInstSelector() {
   return false;
 }
 
+bool SystemZPassConfig::addILPOpts() {
+  addPass(&EarlyIfConverterID);
+  return true;
+}
+
 void SystemZPassConfig::addPreSched2() {
+  addPass(createSystemZExpandPseudoPass(getSystemZTargetMachine()));
+
   if (getOptLevel() != CodeGenOpt::None)
     addPass(&IfConverterID);
 }
@@ -180,12 +194,8 @@ void SystemZPassConfig::addPreEmitPass() {
   // Do final scheduling after all other optimizations, to get an
   // optimal input for the decoder (branch relaxation must happen
   // after block placement).
-  if (getOptLevel() != CodeGenOpt::None) {
-    if (MISchedPostRA)
-      addPass(&PostMachineSchedulerID);
-    else
-      addPass(&PostRASchedulerID);
-  }
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(&PostMachineSchedulerID);
 }
 
 TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 5ff5b21f49b8..b10c0e09a0d4 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -238,6 +238,63 @@ SystemZTTIImpl::getPopcntSupport(unsigned TyWidth) {
   return TTI::PSK_Software;
 }
 
+void SystemZTTIImpl::getUnrollingPreferences(Loop *L,
+                                             TTI::UnrollingPreferences &UP) {
+  // Find out if L contains a call, what the machine instruction count
+  // estimate is, and how many stores there are.
+  bool HasCall = false;
+  unsigned NumStores = 0;
+  for (auto &BB : L->blocks())
+    for (auto &I : *BB) {
+      if (isa<CallInst>(&I) || isa<InvokeInst>(&I)) {
+        ImmutableCallSite CS(&I);
+        if (const Function *F = CS.getCalledFunction()) {
+          if (isLoweredToCall(F))
+            HasCall = true;
+          if (F->getIntrinsicID() == Intrinsic::memcpy ||
+              F->getIntrinsicID() == Intrinsic::memset)
+            NumStores++;
+        } else { // indirect call.
+          HasCall = true;
+        }
+      }
+      if (isa<StoreInst>(&I)) {
+        NumStores++;
+        Type *MemAccessTy = I.getOperand(0)->getType();
+        if((MemAccessTy->isIntegerTy() || MemAccessTy->isFloatingPointTy()) &&
+           (getDataLayout().getTypeSizeInBits(MemAccessTy) == 128))
+          NumStores++;  // 128 bit fp/int stores get split.
+      }
+    }
+
+  // The z13 processor will run out of store tags if too many stores
+  // are fed into it too quickly. Therefore make sure there are not
+  // too many stores in the resulting unrolled loop.
+  unsigned const Max = (NumStores ? (12 / NumStores) : UINT_MAX);
+
+  if (HasCall) {
+    // Only allow full unrolling if loop has any calls.
+    UP.FullUnrollMaxCount = Max;
+    UP.MaxCount = 1;
+    return;
+  }
+
+  UP.MaxCount = Max;
+  if (UP.MaxCount <= 1)
+    return;
+
+  // Allow partial and runtime trip count unrolling.
+  UP.Partial = UP.Runtime = true;
+
+  UP.PartialThreshold = 75;
+  UP.DefaultUnrollRuntimeCount = 4;
+
+  // Allow expensive instructions in the pre-header of the loop.
+  UP.AllowExpensiveTripCount = true;
+
+  UP.Force = true;
+}
+
 unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
   if (!Vector)
     // Discount the stack pointer.  Also leave out %r0, since it can't
diff --git a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 9ae736d8413a..f7d2d827f11b 100644
--- a/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -32,13 +32,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  SystemZTTIImpl(const SystemZTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  SystemZTTIImpl(SystemZTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
 
@@ -50,6 +43,8 @@ public:
 
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
 
+  void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP);
+
   /// @}
 
   /// \name Vector TTI Implementations
diff --git a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
index 8f9aa2811d05..d3c53a43b391 100644
--- a/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp
@@ -12,9 +12,12 @@
 
 using namespace llvm;
 
-Target llvm::TheSystemZTarget;
+Target &llvm::getTheSystemZTarget() {
+  static Target TheSystemZTarget;
+  return TheSystemZTarget;
+}
 
 extern "C" void LLVMInitializeSystemZTargetInfo() {
-  RegisterTarget<Triple::systemz, /*HasJIT=*/true>
-    X(TheSystemZTarget, "systemz", "SystemZ");
+  RegisterTarget<Triple::systemz, /*HasJIT=*/true> X(getTheSystemZTarget(),
+                                                     "systemz", "SystemZ");
 }
diff --git a/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
index 64bd56f6e7df..e8b71924e0d9 100644
--- a/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
+++ b/contrib/llvm/lib/Target/TargetIntrinsicInfo.cpp
@@ -22,7 +22,7 @@ TargetIntrinsicInfo::TargetIntrinsicInfo() {
 TargetIntrinsicInfo::~TargetIntrinsicInfo() {
 }
 
-unsigned TargetIntrinsicInfo::getIntrinsicID(Function *F) const {
+unsigned TargetIntrinsicInfo::getIntrinsicID(const Function *F) const {
   const ValueName *ValName = F->getValueName();
   if (!ValName)
     return 0;
diff --git a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
index f863f429f43c..375f8511f7ad 100644
--- a/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
+++ b/contrib/llvm/lib/Target/TargetLoweringObjectFile.cpp
@@ -43,11 +43,15 @@ using namespace llvm;
 void TargetLoweringObjectFile::Initialize(MCContext &ctx,
                                           const TargetMachine &TM) {
   Ctx = &ctx;
+  // `Initialize` can be called more than once.
+  if (Mang != nullptr) delete Mang;
+  Mang = new Mangler();
   InitMCObjectFileInfo(TM.getTargetTriple(), TM.isPositionIndependent(),
                        TM.getCodeModel(), *Ctx);
 }
 
 TargetLoweringObjectFile::~TargetLoweringObjectFile() {
+  delete Mang;
 }
 
 static bool isSuitableForBSS(const GlobalVariable *GV, bool NoZerosInBSS) {
@@ -101,21 +105,20 @@ static bool IsNullTerminatedString(const Constant *C) {
 }
 
 MCSymbol *TargetLoweringObjectFile::getSymbolWithGlobalValueBase(
-    const GlobalValue *GV, StringRef Suffix, Mangler &Mang,
-    const TargetMachine &TM) const {
+    const GlobalValue *GV, StringRef Suffix, const TargetMachine &TM) const {
   assert(!Suffix.empty());
 
   SmallString<60> NameStr;
   NameStr += GV->getParent()->getDataLayout().getPrivateGlobalPrefix();
-  TM.getNameWithPrefix(NameStr, GV, Mang);
+  TM.getNameWithPrefix(NameStr, GV, *Mang);
   NameStr.append(Suffix.begin(), Suffix.end());
   return Ctx->getOrCreateSymbol(NameStr);
 }
 
 MCSymbol *TargetLoweringObjectFile::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
-  return TM.getSymbol(GV, Mang);
+  return TM.getSymbol(GV);
 }
 
 void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
@@ -129,15 +132,15 @@ void TargetLoweringObjectFile::emitPersonalityValue(MCStreamer &Streamer,
 /// classifies the global in a variety of ways that make various target
 /// implementations simpler.  The target implementation is free to ignore this
 /// extra info of course.
-SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
+SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalObject *GO,
                                                        const TargetMachine &TM){
-  assert(!GV->isDeclaration() && !GV->hasAvailableExternallyLinkage() &&
+  assert(!GO->isDeclaration() && !GO->hasAvailableExternallyLinkage() &&
          "Can only be used for global definitions");
 
   Reloc::Model ReloModel = TM.getRelocationModel();
 
   // Early exit - functions should be always in text sections.
-  const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV);
+  const auto *GVar = dyn_cast<GlobalVariable>(GO);
   if (!GVar)
     return SectionKind::getText();
 
@@ -198,7 +201,8 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
       // Otherwise, just drop it into a mergable constant section.  If we have
       // a section for this size, use it, otherwise use the arbitrary sized
       // mergable section.
-      switch (GV->getParent()->getDataLayout().getTypeAllocSize(C->getType())) {
+      switch (
+          GVar->getParent()->getDataLayout().getTypeAllocSize(C->getType())) {
       case 4:  return SectionKind::getMergeableConst4();
       case 8:  return SectionKind::getMergeableConst8();
       case 16: return SectionKind::getMergeableConst16();
@@ -208,12 +212,13 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
       }
 
     } else {
-      // In static relocation model, the linker will resolve all addresses, so
-      // the relocation entries will actually be constants by the time the app
-      // starts up.  However, we can't put this into a mergable section, because
-      // the linker doesn't take relocations into consideration when it tries to
-      // merge entries in the section.
-      if (ReloModel == Reloc::Static)
+      // In static, ROPI and RWPI relocation models, the linker will resolve
+      // all addresses, so the relocation entries will actually be constants by
+      // the time the app starts up.  However, we can't put this into a
+      // mergable section, because the linker doesn't take relocations into
+      // consideration when it tries to merge entries in the section.
+      if (ReloModel == Reloc::Static || ReloModel == Reloc::ROPI ||
+          ReloModel == Reloc::RWPI || ReloModel == Reloc::ROPI_RWPI)
         return SectionKind::getReadOnly();
 
       // Otherwise, the dynamic linker needs to fix it up, put it in the
@@ -229,21 +234,18 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
 /// This method computes the appropriate section to emit the specified global
 /// variable or function definition.  This should not be passed external (or
 /// available externally) globals.
-MCSection *
-TargetLoweringObjectFile::SectionForGlobal(const GlobalValue *GV,
-                                           SectionKind Kind, Mangler &Mang,
-                                           const TargetMachine &TM) const {
+MCSection *TargetLoweringObjectFile::SectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   // Select section name.
-  if (GV->hasSection())
-    return getExplicitSectionGlobal(GV, Kind, Mang, TM);
-
+  if (GO->hasSection())
+    return getExplicitSectionGlobal(GO, Kind, TM);
 
   // Use default section depending on the 'type' of global
-  return SelectSectionForGlobal(GV, Kind, Mang, TM);
+  return SelectSectionForGlobal(GO, Kind, TM);
 }
 
 MCSection *TargetLoweringObjectFile::getSectionForJumpTable(
-    const Function &F, Mangler &Mang, const TargetMachine &TM) const {
+    const Function &F, const TargetMachine &TM) const {
   unsigned Align = 0;
   return getSectionForConstant(F.getParent()->getDataLayout(),
                                SectionKind::getReadOnly(), /*C=*/nullptr,
@@ -283,11 +285,10 @@ MCSection *TargetLoweringObjectFile::getSectionForConstant(
 /// reference to the specified global variable from exception
 /// handling information.
 const MCExpr *TargetLoweringObjectFile::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
   const MCSymbolRefExpr *Ref =
-      MCSymbolRefExpr::create(TM.getSymbol(GV, Mang), getContext());
+      MCSymbolRefExpr::create(TM.getSymbol(GV), getContext());
 
   return getTTypeReference(Ref, Encoding, Streamer);
 }
@@ -319,7 +320,7 @@ const MCExpr *TargetLoweringObjectFile::getDebugThreadLocalSymbol(const MCSymbol
 }
 
 void TargetLoweringObjectFile::getNameWithPrefix(
-    SmallVectorImpl<char> &OutName, const GlobalValue *GV, Mangler &Mang,
+    SmallVectorImpl<char> &OutName, const GlobalValue *GV,
     const TargetMachine &TM) const {
-  Mang.getNameWithPrefix(OutName, GV, /*CannotUsePrivateLabel=*/false);
+  Mang->getNameWithPrefix(OutName, GV, /*CannotUsePrivateLabel=*/false);
 }
diff --git a/contrib/llvm/lib/Target/TargetMachine.cpp b/contrib/llvm/lib/Target/TargetMachine.cpp
index 82c68505c4e1..e16ced1661a1 100644
--- a/contrib/llvm/lib/Target/TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/TargetMachine.cpp
@@ -77,6 +77,16 @@ void TargetMachine::resetTargetOptions(const Function &F) const {
   RESET_OPTION(UnsafeFPMath, "unsafe-fp-math");
   RESET_OPTION(NoInfsFPMath, "no-infs-fp-math");
   RESET_OPTION(NoNaNsFPMath, "no-nans-fp-math");
+  RESET_OPTION(NoTrappingFPMath, "no-trapping-math");
+
+  StringRef Denormal =
+    F.getFnAttribute("denormal-fp-math").getValueAsString();
+  if (Denormal == "ieee")
+    Options.FPDenormalMode = FPDenormal::IEEE;
+  else if (Denormal == "preserve-sign")
+    Options.FPDenormalMode = FPDenormal::PreserveSign;
+  else if (Denormal == "positive-zero")
+    Options.FPDenormalMode = FPDenormal::PositiveZero;
 }
 
 /// Returns the code generation relocation model. The choices are static, PIC,
@@ -105,9 +115,6 @@ static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) {
   llvm_unreachable("invalid TLS model");
 }
 
-// FIXME: make this a proper option
-static bool CanUseCopyRelocWithPIE = false;
-
 bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
                                          const GlobalValue *GV) const {
   Reloc::Model RM = getRelocationModel();
@@ -117,8 +124,11 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
   if (GV && GV->hasDLLImportStorageClass())
     return false;
 
-  // Every other GV is local on COFF
-  if (TT.isOSBinFormatCOFF())
+  // Every other GV is local on COFF.
+  // Make an exception for windows OS in the triple: Some firmwares builds use
+  // *-win32-macho triples. This (accidentally?) produced windows relocations
+  // without GOT tables in older clang versions; Keep this behaviour.
+  if (TT.isOSBinFormatCOFF() || (TT.isOSWindows() && TT.isOSBinFormatMachO()))
     return true;
 
   if (GV && (GV->hasLocalLinkage() || !GV->hasDefaultVisibility()))
@@ -141,8 +151,10 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
       return true;
 
     bool IsTLS = GV && GV->isThreadLocal();
+    bool IsAccessViaCopyRelocs =
+        Options.MCOptions.MCPIECopyRelocations && GV && isa<GlobalVariable>(GV);
     // Check if we can use copy relocations.
-    if (!IsTLS && (RM == Reloc::Static || CanUseCopyRelocWithPIE))
+    if (!IsTLS && (RM == Reloc::Static || IsAccessViaCopyRelocs))
       return true;
   }
 
@@ -198,12 +210,12 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
     return;
   }
   const TargetLoweringObjectFile *TLOF = getObjFileLowering();
-  TLOF->getNameWithPrefix(Name, GV, Mang, *this);
+  TLOF->getNameWithPrefix(Name, GV, *this);
 }
 
-MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV, Mangler &Mang) const {
-  SmallString<128> NameStr;
-  getNameWithPrefix(NameStr, GV, Mang);
+MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const {
   const TargetLoweringObjectFile *TLOF = getObjFileLowering();
+  SmallString<128> NameStr;
+  getNameWithPrefix(NameStr, GV, TLOF->getMangler());
   return TLOF->getContext().getOrCreateSymbol(NameStr);
 }
diff --git a/contrib/llvm/lib/Target/TargetMachineC.cpp b/contrib/llvm/lib/Target/TargetMachineC.cpp
index 02836eaf08e5..5fb5b0227800 100644
--- a/contrib/llvm/lib/Target/TargetMachineC.cpp
+++ b/contrib/llvm/lib/Target/TargetMachineC.cpp
@@ -59,9 +59,8 @@ LLVMTargetRef LLVMGetNextTarget(LLVMTargetRef T) {
 
 LLVMTargetRef LLVMGetTargetFromName(const char *Name) {
   StringRef NameRef = Name;
-  auto I = std::find_if(
-      TargetRegistry::targets().begin(), TargetRegistry::targets().end(),
-      [&](const Target &T) { return T.getName() == NameRef; });
+  auto I = find_if(TargetRegistry::targets(),
+                   [&](const Target &T) { return T.getName() == NameRef; });
   return I != TargetRegistry::targets().end() ? wrap(&*I) : nullptr;
 }
 
diff --git a/contrib/llvm/lib/Target/TargetRecip.cpp b/contrib/llvm/lib/Target/TargetRecip.cpp
deleted file mode 100644
index 183fa5062eab..000000000000
--- a/contrib/llvm/lib/Target/TargetRecip.cpp
+++ /dev/null
@@ -1,225 +0,0 @@
-//===-------------------------- TargetRecip.cpp ---------------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class is used to customize machine-specific reciprocal estimate code
-// generation in a target-independent way.
-// If a target does not support operations in this specification, then code
-// generation will default to using supported operations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Target/TargetRecip.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/ErrorHandling.h"
-
-using namespace llvm;
-
-// These are the names of the individual reciprocal operations. These are
-// the key strings for queries and command-line inputs.
-// In addition, the command-line interface recognizes the global parameters
-// "all", "none", and "default".
-static const char *const RecipOps[] = {
-  "divd",
-  "divf",
-  "vec-divd",
-  "vec-divf",
-  "sqrtd",
-  "sqrtf",
-  "vec-sqrtd",
-  "vec-sqrtf",
-};
-
-// The uninitialized state is needed for the enabled settings and refinement
-// steps because custom settings may arrive via the command-line before target
-// defaults are set.
-TargetRecip::TargetRecip() {
-  unsigned NumStrings = llvm::array_lengthof(RecipOps);
-  for (unsigned i = 0; i < NumStrings; ++i)
-    RecipMap.insert(std::make_pair(RecipOps[i], RecipParams()));
-}
-
-static bool parseRefinementStep(StringRef In, size_t &Position,
-                                uint8_t &Value) {
-  const char RefStepToken = ':';
-  Position = In.find(RefStepToken);
-  if (Position == StringRef::npos)
-    return false;
-
-  StringRef RefStepString = In.substr(Position + 1);
-  // Allow exactly one numeric character for the additional refinement
-  // step parameter.
-  if (RefStepString.size() == 1) {
-    char RefStepChar = RefStepString[0];
-    if (RefStepChar >= '0' && RefStepChar <= '9') {
-      Value = RefStepChar - '0';
-      return true;
-    }
-  }
-  report_fatal_error("Invalid refinement step for -recip.");
-}
-
-bool TargetRecip::parseGlobalParams(const std::string &Arg) {
-  StringRef ArgSub = Arg;
-
-  // Look for an optional setting of the number of refinement steps needed
-  // for this type of reciprocal operation.
-  size_t RefPos;
-  uint8_t RefSteps;
-  StringRef RefStepString;
-  if (parseRefinementStep(ArgSub, RefPos, RefSteps)) {
-    // Split the string for further processing.
-    RefStepString = ArgSub.substr(RefPos + 1);
-    ArgSub = ArgSub.substr(0, RefPos);
-  }
-  bool Enable;
-  bool UseDefaults;
-  if (ArgSub == "all") {
-    UseDefaults = false;
-    Enable = true;
-  } else if (ArgSub == "none") {
-    UseDefaults = false;
-    Enable = false;
-  } else if (ArgSub == "default") {
-    UseDefaults = true;
-  } else {
-    // Any other string is invalid or an individual setting.
-    return false;
-  }
-
-  // All enable values will be initialized to target defaults if 'default' was
-  // specified.
-  if (!UseDefaults)
-    for (auto &KV : RecipMap)
-      KV.second.Enabled = Enable;
-
-  // Custom refinement count was specified with all, none, or default.
-  if (!RefStepString.empty())
-    for (auto &KV : RecipMap)
-      KV.second.RefinementSteps = RefSteps;
-  
-  return true;
-}
-
-void TargetRecip::parseIndividualParams(const std::vector<std::string> &Args) {
-  static const char DisabledPrefix = '!';
-  unsigned NumArgs = Args.size();
-
-  for (unsigned i = 0; i != NumArgs; ++i) {
-    StringRef Val = Args[i];
-    
-    bool IsDisabled = Val[0] == DisabledPrefix;
-    // Ignore the disablement token for string matching.
-    if (IsDisabled)
-      Val = Val.substr(1);
-    
-    size_t RefPos;
-    uint8_t RefSteps;
-    StringRef RefStepString;
-    if (parseRefinementStep(Val, RefPos, RefSteps)) {
-      // Split the string for further processing.
-      RefStepString = Val.substr(RefPos + 1);
-      Val = Val.substr(0, RefPos);
-    }
-
-    RecipIter Iter = RecipMap.find(Val);
-    if (Iter == RecipMap.end()) {
-      // Try again specifying float suffix.
-      Iter = RecipMap.find(Val.str() + 'f');
-      if (Iter == RecipMap.end()) {
-        Iter = RecipMap.find(Val.str() + 'd');
-        assert(Iter == RecipMap.end() && "Float entry missing from map");
-        report_fatal_error("Invalid option for -recip.");
-      }
-      
-      // The option was specified without a float or double suffix.
-      if (RecipMap[Val.str() + 'd'].Enabled != Uninitialized) {
-        // Make sure that the double entry was not already specified.
-        // The float entry will be checked below.
-        report_fatal_error("Duplicate option for -recip.");
-      }
-    }
-    
-    if (Iter->second.Enabled != Uninitialized)
-      report_fatal_error("Duplicate option for -recip.");
-    
-    // Mark the matched option as found. Do not allow duplicate specifiers.
-    Iter->second.Enabled = !IsDisabled;
-    if (!RefStepString.empty())
-      Iter->second.RefinementSteps = RefSteps;
-    
-    // If the precision was not specified, the double entry is also initialized.
-    if (Val.back() != 'f' && Val.back() != 'd') {
-      RecipParams &Params = RecipMap[Val.str() + 'd'];
-      Params.Enabled = !IsDisabled;
-      if (!RefStepString.empty())
-        Params.RefinementSteps = RefSteps;
-    }
-  }
-}
-
-TargetRecip::TargetRecip(const std::vector<std::string> &Args) :
-  TargetRecip() {
-  unsigned NumArgs = Args.size();
-
-  // Check if "all", "default", or "none" was specified.
-  if (NumArgs == 1 && parseGlobalParams(Args[0]))
-    return;
- 
-  parseIndividualParams(Args);
-}
-
-bool TargetRecip::isEnabled(StringRef Key) const {
-  ConstRecipIter Iter = RecipMap.find(Key);
-  assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.Enabled != Uninitialized &&
-         "Enablement setting was not initialized");
-  return Iter->second.Enabled;
-}
-
-unsigned TargetRecip::getRefinementSteps(StringRef Key) const {
-  ConstRecipIter Iter = RecipMap.find(Key);
-  assert(Iter != RecipMap.end() && "Unknown name for reciprocal map");
-  assert(Iter->second.RefinementSteps != Uninitialized &&
-         "Refinement step setting was not initialized");
-  return Iter->second.RefinementSteps;
-}
-
-/// Custom settings (previously initialized values) override target defaults.
-void TargetRecip::setDefaults(StringRef Key, bool Enable,
-                              unsigned RefSteps) {
-  if (Key == "all") {
-    for (auto &KV : RecipMap) {
-      RecipParams &RP = KV.second;
-      if (RP.Enabled == Uninitialized)
-        RP.Enabled = Enable;
-      if (RP.RefinementSteps == Uninitialized)
-        RP.RefinementSteps = RefSteps;
-    }
-  } else {
-    RecipParams &RP = RecipMap[Key];
-    if (RP.Enabled == Uninitialized)
-      RP.Enabled = Enable;
-    if (RP.RefinementSteps == Uninitialized)
-      RP.RefinementSteps = RefSteps;
-  }
-}
-
-bool TargetRecip::operator==(const TargetRecip &Other) const {
-  for (const auto &KV : RecipMap) {
-    StringRef Op = KV.first;
-    const RecipParams &RP = KV.second;
-    const RecipParams &OtherRP = Other.RecipMap.find(Op)->second;
-    if (RP.RefinementSteps != OtherRP.RefinementSteps)
-      return false;
-    if (RP.Enabled != OtherRP.Enabled)
-      return false;
-  }
-  return true;
-}
diff --git a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
index c0355aef0b35..b4763ca60ab6 100644
--- a/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -54,9 +54,9 @@ static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
 
 extern "C" void LLVMInitializeWebAssemblyDisassembler() {
   // Register the disassembler for each target.
-  TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget32,
+  TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget32(),
                                          createWebAssemblyDisassembler);
-  TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget64,
+  TargetRegistry::RegisterMCDisassembler(getTheWebAssemblyTarget64(),
                                          createWebAssemblyDisassembler);
 }
 
@@ -93,6 +93,7 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
     const MCOperandInfo &Info = Desc.OpInfo[i];
     switch (Info.OperandType) {
     case MCOI::OPERAND_IMMEDIATE:
+    case WebAssembly::OPERAND_LOCAL:
     case WebAssembly::OPERAND_P2ALIGN:
     case WebAssembly::OPERAND_BASIC_BLOCK: {
       if (Pos + sizeof(uint64_t) > Bytes.size())
@@ -110,8 +111,8 @@ MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
       MI.addOperand(MCOperand::createReg(Reg));
       break;
     }
-    case WebAssembly::OPERAND_FP32IMM:
-    case WebAssembly::OPERAND_FP64IMM: {
+    case WebAssembly::OPERAND_F32IMM:
+    case WebAssembly::OPERAND_F64IMM: {
       // TODO: MC converts all floating point immediate operands to double.
       // This is fine for numeric values, but may cause NaNs to change bits.
       if (Pos + sizeof(uint64_t) > Bytes.size())
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 267d716dd1d0..0af13cffdb04 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -54,7 +54,12 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
   const MCInstrDesc &Desc = MII.get(MI->getOpcode());
   if (Desc.isVariadic())
     for (auto i = Desc.getNumOperands(), e = MI->getNumOperands(); i < e; ++i) {
-      if (i != 0)
+      // FIXME: For CALL_INDIRECT_VOID, don't print a leading comma, because
+      // we have an extra flags operand which is not currently printed, for
+      // compatiblity reasons.
+      if (i != 0 &&
+          (MI->getOpcode() != WebAssembly::CALL_INDIRECT_VOID ||
+           i != Desc.getNumOperands()))
         OS << ", ";
       printOperand(MI, i, OS);
     }
@@ -69,11 +74,8 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
     default:
       break;
     case WebAssembly::LOOP: {
-      // Grab the TopLabel value first so that labels print in numeric order.
-      uint64_t TopLabel = ControlFlowCounter++;
-      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
-      printAnnotation(OS, "label" + utostr(TopLabel) + ':');
-      ControlFlowStack.push_back(std::make_pair(TopLabel, true));
+      printAnnotation(OS, "label" + utostr(ControlFlowCounter) + ':');
+      ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, true));
       break;
     }
     case WebAssembly::BLOCK:
@@ -81,8 +83,6 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
       break;
     case WebAssembly::END_LOOP:
       ControlFlowStack.pop_back();
-      printAnnotation(
-          OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
       break;
     case WebAssembly::END_BLOCK:
       printAnnotation(
@@ -94,9 +94,9 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
     unsigned NumFixedOperands = Desc.NumOperands;
     SmallSet<uint64_t, 8> Printed;
     for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
-      const MCOperandInfo &Info = Desc.OpInfo[i];
       if (!(i < NumFixedOperands
-                ? (Info.OperandType == WebAssembly::OPERAND_BASIC_BLOCK)
+                ? (Desc.OpInfo[i].OperandType ==
+                   WebAssembly::OPERAND_BASIC_BLOCK)
                 : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
         continue;
       uint64_t Depth = MI->getOperand(i).getImm();
@@ -113,7 +113,8 @@ static std::string toString(const APFloat &FP) {
   // Print NaNs with custom payloads specially.
   if (FP.isNaN() &&
       !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics())) &&
-      !FP.bitwiseIsEqual(APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
+      !FP.bitwiseIsEqual(
+          APFloat::getQNaN(FP.getSemantics(), /*Negative=*/true))) {
     APInt AI = FP.bitcastToAPInt();
     return
         std::string(AI.isNegative() ? "-" : "") + "nan:0x" +
@@ -154,11 +155,12 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
       O << '=';
   } else if (Op.isImm()) {
-    assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
-            (MII.get(MI->getOpcode()).TSFlags &
-             WebAssemblyII::VariableOpIsImmediate)) &&
+    const MCInstrDesc &Desc = MII.get(MI->getOpcode());
+    assert((OpNo < Desc.getNumOperands() ||
+            (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate)) &&
            "WebAssemblyII::VariableOpIsImmediate should be set for "
            "variable_ops immediate ops");
+    (void)Desc;
     // TODO: (MII.get(MI->getOpcode()).TSFlags &
     //        WebAssemblyII::VariableOpImmediateIsLabel)
     // can tell us whether this is an immediate referencing a label in the
@@ -171,12 +173,12 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
     assert(Desc.TSFlags == 0 &&
            "WebAssembly variable_ops floating point ops don't use TSFlags");
     const MCOperandInfo &Info = Desc.OpInfo[OpNo];
-    if (Info.OperandType == WebAssembly::OPERAND_FP32IMM) {
+    if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
       // TODO: MC converts all floating point immediate operands to double.
       // This is fine for numeric values, but may cause NaNs to change bits.
       O << toString(APFloat(float(Op.getFPImm())));
     } else {
-      assert(Info.OperandType == WebAssembly::OPERAND_FP64IMM);
+      assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
       O << toString(APFloat(Op.getFPImm()));
     }
   } else {
@@ -200,6 +202,27 @@ WebAssemblyInstPrinter::printWebAssemblyP2AlignOperand(const MCInst *MI,
   O << ":p2align=" << Imm;
 }
 
+void
+WebAssemblyInstPrinter::printWebAssemblySignatureOperand(const MCInst *MI,
+                                                         unsigned OpNo,
+                                                         raw_ostream &O) {
+  int64_t Imm = MI->getOperand(OpNo).getImm();
+  switch (WebAssembly::ExprType(Imm)) {
+  case WebAssembly::ExprType::Void: break;
+  case WebAssembly::ExprType::I32: O << "i32"; break;
+  case WebAssembly::ExprType::I64: O << "i64"; break;
+  case WebAssembly::ExprType::F32: O << "f32"; break;
+  case WebAssembly::ExprType::F64: O << "f64"; break;
+  case WebAssembly::ExprType::I8x16: O << "i8x16"; break;
+  case WebAssembly::ExprType::I16x8: O << "i16x8"; break;
+  case WebAssembly::ExprType::I32x4: O << "i32x4"; break;
+  case WebAssembly::ExprType::F32x4: O << "f32x4"; break;
+  case WebAssembly::ExprType::B8x16: O << "b8x16"; break;
+  case WebAssembly::ExprType::B16x8: O << "b16x8"; break;
+  case WebAssembly::ExprType::B32x4: O << "b32x4"; break;
+  }
+}
+
 const char *llvm::WebAssembly::TypeToString(MVT Ty) {
   switch (Ty.SimpleTy) {
   case MVT::i32:
@@ -210,6 +233,11 @@ const char *llvm::WebAssembly::TypeToString(MVT Ty) {
     return "f32";
   case MVT::f64:
     return "f64";
+  case MVT::v16i8:
+  case MVT::v8i16:
+  case MVT::v4i32:
+  case MVT::v4f32:
+    return "v128";
   default:
     llvm_unreachable("unsupported type");
   }
diff --git a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 07b0f914e447..d11f99c1ff39 100644
--- a/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/contrib/llvm/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -39,6 +39,8 @@ public:
   void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
   void printWebAssemblyP2AlignOperand(const MCInst *MI, unsigned OpNo,
                                       raw_ostream &O);
+  void printWebAssemblySignatureOperand(const MCInst *MI, unsigned OpNo,
+                                        raw_ostream &O);
 
   // Autogenerated by tblgen.
   void printInstruction(const MCInst *MI, raw_ostream &O);
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index df6fb8968d56..97454a824a34 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -66,8 +66,10 @@ bool WebAssemblyAsmBackend::writeNopData(uint64_t Count,
   if (Count == 0)
     return true;
 
-  // FIXME: Do something.
-  return false;
+  for (uint64_t i = 0; i < Count; ++i)
+    OW->write8(WebAssembly::Nop);
+
+  return true;
 }
 
 void WebAssemblyAsmBackend::applyFixup(const MCFixup &Fixup, char *Data,
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 23f8b3d0e827..d0e0eecd3002 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -23,6 +23,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/EndianStream.h"
+#include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 using namespace llvm;
 
@@ -45,7 +46,7 @@ class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
                          const MCSubtargetInfo &STI) const override;
 
 public:
-  WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
+  explicit WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii) : MCII(mcii) {}
 };
 } // end anonymous namespace
 
@@ -56,30 +57,59 @@ MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII) {
 void WebAssemblyMCCodeEmitter::encodeInstruction(
     const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
     const MCSubtargetInfo &STI) const {
-  // FIXME: This is not the real binary encoding. This is an extremely
-  // over-simplified encoding where we just use uint64_t for everything. This
-  // is a temporary measure.
-  support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode());
+  uint64_t Start = OS.tell();
+
+  uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI);
+  assert(Binary < UINT8_MAX && "Multi-byte opcodes not supported yet");
+  OS << uint8_t(Binary);
+
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  if (Desc.isVariadic())
-    support::endian::Writer<support::little>(OS).write<uint64_t>(
-        MI.getNumOperands() - Desc.NumOperands);
   for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
     const MCOperand &MO = MI.getOperand(i);
     if (MO.isReg()) {
-      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg());
+      /* nothing to encode */
     } else if (MO.isImm()) {
-      support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm());
+      if (i < Desc.getNumOperands()) {
+        assert(Desc.TSFlags == 0 &&
+               "WebAssembly non-variable_ops don't use TSFlags");
+        const MCOperandInfo &Info = Desc.OpInfo[i];
+        if (Info.OperandType == WebAssembly::OPERAND_I32IMM) {
+          encodeSLEB128(int32_t(MO.getImm()), OS);
+        } else if (Info.OperandType == WebAssembly::OPERAND_I64IMM) {
+          encodeSLEB128(int64_t(MO.getImm()), OS);
+        } else {
+          encodeULEB128(uint64_t(MO.getImm()), OS);
+        }
+      } else {
+        assert(Desc.TSFlags == (WebAssemblyII::VariableOpIsImmediate |
+                                WebAssemblyII::VariableOpImmediateIsLabel));
+        encodeULEB128(uint64_t(MO.getImm()), OS);
+      }
     } else if (MO.isFPImm()) {
-      support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm());
+      assert(i < Desc.getNumOperands() &&
+             "Unexpected floating-point immediate as a non-fixed operand");
+      assert(Desc.TSFlags == 0 &&
+             "WebAssembly variable_ops floating point ops don't use TSFlags");
+      const MCOperandInfo &Info = Desc.OpInfo[i];
+      if (Info.OperandType == WebAssembly::OPERAND_F32IMM) {
+        // TODO: MC converts all floating point immediate operands to double.
+        // This is fine for numeric values, but may cause NaNs to change bits.
+        float f = float(MO.getFPImm());
+        support::endian::Writer<support::little>(OS).write<float>(f);
+      } else {
+        assert(Info.OperandType == WebAssembly::OPERAND_F64IMM);
+        double d = MO.getFPImm();
+        support::endian::Writer<support::little>(OS).write<double>(d);
+      }
     } else if (MO.isExpr()) {
-      support::endian::Writer<support::little>(OS).write<uint64_t>(0);
       Fixups.push_back(MCFixup::create(
-          (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t),
-          MO.getExpr(),
+          OS.tell() - Start, MO.getExpr(),
           STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
           MI.getLoc()));
       ++MCNumFixups;
+      encodeULEB128(STI.getTargetTriple().isArch64Bit() ? UINT64_MAX
+                                                        : uint64_t(UINT32_MAX),
+                    OS);
     } else {
       llvm_unreachable("unexpected operand kind");
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index ac11a64086f2..3dc1ded17116 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -77,7 +77,8 @@ static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
 
 static MCAsmBackend *createAsmBackend(const Target & /*T*/,
                                       const MCRegisterInfo & /*MRI*/,
-                                      const Triple &TT, StringRef /*CPU*/) {
+                                      const Triple &TT, StringRef /*CPU*/,
+                                      const MCTargetOptions & /*Options*/) {
   return createWebAssemblyAsmBackend(TT);
 }
 
@@ -100,7 +101,8 @@ static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
 
 // Force static initialization.
 extern "C" void LLVMInitializeWebAssemblyTargetMC() {
-  for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
+  for (Target *T :
+       {&getTheWebAssemblyTarget32(), &getTheWebAssemblyTarget64()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createMCAsmInfo);
 
@@ -132,3 +134,13 @@ extern "C" void LLVMInitializeWebAssemblyTargetMC() {
     TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
   }
 }
+
+WebAssembly::ValType WebAssembly::toValType(const MVT &Ty) {
+  switch (Ty.SimpleTy) {
+  case MVT::i32: return WebAssembly::ValType::I32;
+  case MVT::i64: return WebAssembly::ValType::I64;
+  case MVT::f32: return WebAssembly::ValType::F32;
+  case MVT::f64: return WebAssembly::ValType::F64;
+  default: llvm_unreachable("unexpected type");
+  }
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index 001bd7f1fc43..8583b772deab 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -26,12 +26,13 @@ class MCContext;
 class MCInstrInfo;
 class MCObjectWriter;
 class MCSubtargetInfo;
+class MVT;
 class Target;
 class Triple;
 class raw_pwrite_stream;
 
-extern Target TheWebAssemblyTarget32;
-extern Target TheWebAssemblyTarget64;
+Target &getTheWebAssemblyTarget32();
+Target &getTheWebAssemblyTarget64();
 
 MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII);
 
@@ -44,23 +45,25 @@ namespace WebAssembly {
 enum OperandType {
   /// Basic block label in a branch construct.
   OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
+  /// Local index.
+  OPERAND_LOCAL,
+  /// 32-bit integer immediates.
+  OPERAND_I32IMM,
+  /// 64-bit integer immediates.
+  OPERAND_I64IMM,
   /// 32-bit floating-point immediates.
-  OPERAND_FP32IMM,
+  OPERAND_F32IMM,
   /// 64-bit floating-point immediates.
-  OPERAND_FP64IMM,
+  OPERAND_F64IMM,
+  /// 32-bit unsigned function indices.
+  OPERAND_FUNCTION32,
+  /// 32-bit unsigned memory offsets.
+  OPERAND_OFFSET32,
   /// p2align immediate for load and store address alignment.
-  OPERAND_P2ALIGN
+  OPERAND_P2ALIGN,
+  /// signature immediate for block/loop.
+  OPERAND_SIGNATURE
 };
-
-/// WebAssembly-specific directive identifiers.
-enum Directive {
-  // FIXME: This is not the real binary encoding.
-  DotParam = UINT64_MAX - 0,   ///< .param
-  DotResult = UINT64_MAX - 1,  ///< .result
-  DotLocal = UINT64_MAX - 2,   ///< .local
-  DotEndFunc = UINT64_MAX - 3, ///< .endfunc
-};
-
 } // end namespace WebAssembly
 
 namespace WebAssemblyII {
@@ -70,7 +73,7 @@ enum {
   VariableOpIsImmediate = (1 << 0),
   // For immediate values in the variable_ops range, this flag indicates
   // whether the value represents a control-flow label.
-  VariableOpImmediateIsLabel = (1 << 1),
+  VariableOpImmediateIsLabel = (1 << 1)
 };
 } // end namespace WebAssemblyII
 
@@ -123,14 +126,55 @@ inline unsigned GetDefaultP2Align(unsigned Opcode) {
   case WebAssembly::STORE_I64:
   case WebAssembly::STORE_F64:
     return 3;
-  default: llvm_unreachable("Only loads and stores have p2align values");
+  default:
+    llvm_unreachable("Only loads and stores have p2align values");
   }
 }
 
 /// The operand number of the load or store address in load/store instructions.
-static const unsigned MemOpAddressOperandNo = 2;
-/// The operand number of the stored value in a store instruction.
-static const unsigned StoreValueOperandNo = 4;
+static const unsigned LoadAddressOperandNo = 3;
+static const unsigned StoreAddressOperandNo = 2;
+
+/// The operand number of the load or store p2align in load/store instructions.
+static const unsigned LoadP2AlignOperandNo = 1;
+static const unsigned StoreP2AlignOperandNo = 0;
+
+/// This is used to indicate block signatures.
+enum class ExprType {
+  Void    = 0x40,
+  I32     = 0x7f,
+  I64     = 0x7e,
+  F32     = 0x7d,
+  F64     = 0x7c,
+  I8x16   = 0x7b,
+  I16x8   = 0x7a,
+  I32x4   = 0x79,
+  F32x4   = 0x78,
+  B8x16   = 0x77,
+  B16x8   = 0x76,
+  B32x4   = 0x75
+};
+
+/// This is used to indicate local types.
+enum class ValType {
+  I32     = 0x7f,
+  I64     = 0x7e,
+  F32     = 0x7d,
+  F64     = 0x7c,
+  I8x16   = 0x7b,
+  I16x8   = 0x7a,
+  I32x4   = 0x79,
+  F32x4   = 0x78,
+  B8x16   = 0x77,
+  B16x8   = 0x76,
+  B32x4   = 0x75
+};
+
+/// Instruction opcodes emitted via means other than CodeGen.
+static const unsigned Nop = 0x01;
+static const unsigned End = 0x0b;
+
+ValType toValType(const MVT &Ty);
 
 } // end namespace WebAssembly
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
index 3d61c15717b4..3cee8b2a1844 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -58,45 +58,63 @@ void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
 }
 
 void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
-  OS << "\t.local  \t";
-  PrintTypes(OS, Types);
+  if (!Types.empty()) {
+    OS << "\t.local  \t";
+    PrintTypes(OS, Types);
+  }
 }
 
 void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
 
 void WebAssemblyTargetAsmStreamer::emitIndirectFunctionType(
-    StringRef name, SmallVectorImpl<MVT> &SignatureVTs, size_t NumResults) {
+    StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
   OS << "\t.functype\t" << name;
-  if (NumResults == 0) OS << ", void";
-  for (auto Ty : SignatureVTs) {
-    OS << ", " << WebAssembly::TypeToString(Ty);
+  if (Results.empty())
+    OS << ", void";
+  else {
+    assert(Results.size() == 1);
+    OS << ", " << WebAssembly::TypeToString(Results.front());
   }
-  OS << "\n";
+  for (auto Ty : Params)
+    OS << ", " << WebAssembly::TypeToString(Ty);
+  OS << '\n';
 }
 
-// FIXME: What follows is not the real binary encoding.
+void WebAssemblyTargetAsmStreamer::emitGlobalImport(StringRef name) {
+  OS << "\t.import_global\t" << name << '\n';
+}
 
-static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(Types.size(), sizeof(uint64_t));
-  for (MVT Type : Types)
-    Streamer.EmitIntValue(Type.SimpleTy, sizeof(uint64_t));
+void WebAssemblyTargetAsmStreamer::emitIndIdx(const MCExpr *Value) {
+  OS << "\t.indidx  \t" << *Value << '\n';
 }
 
 void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(WebAssembly::DotParam, sizeof(uint64_t));
-  EncodeTypes(Streamer, Types);
+  // Nothing to emit; params are declared as part of the function signature.
 }
 
 void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(WebAssembly::DotResult, sizeof(uint64_t));
-  EncodeTypes(Streamer, Types);
+  // Nothing to emit; results are declared as part of the function signature.
 }
 
 void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
-  Streamer.EmitIntValue(WebAssembly::DotLocal, sizeof(uint64_t));
-  EncodeTypes(Streamer, Types);
+  Streamer.EmitULEB128IntValue(Types.size());
+  for (MVT Type : Types)
+    Streamer.EmitIntValue(int64_t(WebAssembly::toValType(Type)), 1);
 }
 
 void WebAssemblyTargetELFStreamer::emitEndFunc() {
-  Streamer.EmitIntValue(WebAssembly::DotEndFunc, sizeof(uint64_t));
+  Streamer.EmitIntValue(WebAssembly::End, 1);
+}
+
+void WebAssemblyTargetELFStreamer::emitIndIdx(const MCExpr *Value) {
+  llvm_unreachable(".indidx encoding not yet implemented");
 }
+
+void WebAssemblyTargetELFStreamer::emitIndirectFunctionType(
+    StringRef name, SmallVectorImpl<MVT> &Params, SmallVectorImpl<MVT> &Results) {
+  // Nothing to emit here. TODO: Re-design how linking works and re-evaluate
+  // whether it's necessary for .o files to declare indirect function types.
+}
+
+void WebAssemblyTargetELFStreamer::emitGlobalImport(StringRef name) {
+}
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
index 51354ef22d71..23ac3190243a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
+++ b/contrib/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -39,10 +39,14 @@ public:
   virtual void emitEndFunc() = 0;
   /// .functype
   virtual void emitIndirectFunctionType(StringRef name,
-                                        SmallVectorImpl<MVT> &SignatureVTs,
-                                        size_t NumResults) {
+                                        SmallVectorImpl<MVT> &Params,
+                                        SmallVectorImpl<MVT> &Results) {
     llvm_unreachable("emitIndirectFunctionType not implemented");
   }
+  /// .indidx
+  virtual void emitIndIdx(const MCExpr *Value) = 0;
+  /// .import_global
+  virtual void emitGlobalImport(StringRef name) = 0;
 };
 
 /// This part is for ascii assembly output
@@ -57,8 +61,10 @@ public:
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitEndFunc() override;
   void emitIndirectFunctionType(StringRef name,
-                                SmallVectorImpl<MVT> &SignatureVTs,
-                                size_t NumResults) override;
+                                SmallVectorImpl<MVT> &Params,
+                                SmallVectorImpl<MVT> &Results) override;
+  void emitIndIdx(const MCExpr *Value) override;
+  void emitGlobalImport(StringRef name) override;
 };
 
 /// This part is for ELF object output
@@ -70,6 +76,11 @@ public:
   void emitResult(ArrayRef<MVT> Types) override;
   void emitLocal(ArrayRef<MVT> Types) override;
   void emitEndFunc() override;
+  void emitIndirectFunctionType(StringRef name,
+                                SmallVectorImpl<MVT> &Params,
+                                SmallVectorImpl<MVT> &Results) override;
+  void emitIndIdx(const MCExpr *Value) override;
+  void emitGlobalImport(StringRef name) override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/README.txt b/contrib/llvm/lib/Target/WebAssembly/README.txt
index a6c2eefc0578..64991ad14071 100644
--- a/contrib/llvm/lib/Target/WebAssembly/README.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/README.txt
@@ -1,22 +1,30 @@
 //===-- README.txt - Notes for WebAssembly code gen -----------------------===//
 
-This WebAssembly backend is presently in a very early stage of development.
-The code should build and not break anything else, but don't expect a lot more
-at this point.
+This WebAssembly backend is presently under development.
 
-For more information on WebAssembly itself, see the design documents:
-  * https://github.com/WebAssembly/design/blob/master/README.md
+Currently the easiest way to use it is through Emscripten, which provides a
+compilation environment that includes standard libraries, tools, and packaging
+for producing WebAssembly applications that can run in browsers and other
+environments. For more information, see the Emscripten documentation in
+general, and this page in particular:
+  * https://github.com/kripken/emscripten/wiki/New-WebAssembly-Backend
 
-The following documents contain some information on the planned semantics and
-binary encoding of WebAssembly itself:
-  * https://github.com/WebAssembly/design/blob/master/AstSemantics.md
+Other ways of using this backend, such as via a standalone "clang", are also
+under development, though they are not generally usable yet.
+
+For more information on WebAssembly itself, see the home page:
+  * https://webassembly.github.io/
+
+The following documents contain some information on the semantics and binary
+encoding of WebAssembly itself:
+  * https://github.com/WebAssembly/design/blob/master/Semantics.md
   * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md
 
 The backend is built, tested and archived on the following waterfall:
   https://wasm-stat.us
 
-The backend's bringup is done using the GCC torture test suite first since it
-doesn't require C library support. Current known failures are in
+The backend's bringup is done in part by using the GCC torture test suite, since
+it doesn't require C library support. Current known failures are in
 known_gcc_test_failures.txt, all other tests should pass. The waterfall will
 turn red if not. Once most of these pass, further testing will use LLVM's own
 test suite. The tests can be run locally using:
@@ -24,13 +32,13 @@ test suite. The tests can be run locally using:
 
 //===---------------------------------------------------------------------===//
 
-Br, br_if, and br_table instructions can support having a value on the
-expression stack across the jump (sometimes). We should (a) model this, and
-(b) extend the stackifier to utilize it.
+Br, br_if, and br_table instructions can support having a value on the value
+stack across the jump (sometimes). We should (a) model this, and (b) extend
+the stackifier to utilize it.
 
 //===---------------------------------------------------------------------===//
 
-The min/max operators aren't exactly a<b?a:b because of NaN and negative zero
+The min/max instructions aren't exactly a<b?a:b because of NaN and negative zero
 behavior. The ARM target has the same kind of min/max instructions and has
 implemented optimizations for them; we should do similar optimizations for
 WebAssembly.
@@ -44,7 +52,7 @@ us too?
 
 //===---------------------------------------------------------------------===//
 
-Register stackification uses the EXPR_STACK physical register to impose
+Register stackification uses the VALUE_STACK physical register to impose
 ordering dependencies on instructions with stack operands. This is pessimistic;
 we should consider alternate ways to model stack dependencies.
 
@@ -99,12 +107,6 @@ according to their usage frequency to maximize the usage of smaller encodings.
 
 //===---------------------------------------------------------------------===//
 
-When the last statement in a function body computes the return value, it can
-just let that value be the exit value of the outermost block, rather than
-needing an explicit return operation.
-
-//===---------------------------------------------------------------------===//
-
 Many cases of irreducible control flow could be transformed more optimally
 than via the transform in WebAssemblyFixIrreducibleControlFlow.cpp.
 
@@ -135,3 +137,11 @@ enableMachineScheduler) and determine if it can be configured to schedule
 instructions advantageously for this purpose.
 
 //===---------------------------------------------------------------------===//
+
+WebAssembly is now officially a stack machine, rather than an AST, and this
+comes with additional opportunities for WebAssemblyRegStackify. Specifically,
+the stack doesn't need to be empty after an instruction with no return values.
+WebAssemblyRegStackify could be extended, or possibly rewritten, to take
+advantage of the new opportunities.
+
+//===---------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
index ddb1eb1d1892..f310f0a44461 100644
--- a/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/TargetInfo/WebAssemblyTargetInfo.cpp
@@ -19,12 +19,18 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm-target-info"
 
-Target llvm::TheWebAssemblyTarget32;
-Target llvm::TheWebAssemblyTarget64;
+Target &llvm::getTheWebAssemblyTarget32() {
+  static Target TheWebAssemblyTarget32;
+  return TheWebAssemblyTarget32;
+}
+Target &llvm::getTheWebAssemblyTarget64() {
+  static Target TheWebAssemblyTarget64;
+  return TheWebAssemblyTarget64;
+}
 
 extern "C" void LLVMInitializeWebAssemblyTargetInfo() {
-  RegisterTarget<Triple::wasm32> X(TheWebAssemblyTarget32, "wasm32",
+  RegisterTarget<Triple::wasm32> X(getTheWebAssemblyTarget32(), "wasm32",
                                    "WebAssembly 32-bit");
-  RegisterTarget<Triple::wasm64> Y(TheWebAssemblyTarget64, "wasm64",
+  RegisterTarget<Triple::wasm64> Y(getTheWebAssemblyTarget64(), "wasm64",
                                    "WebAssembly 64-bit");
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
index 957f31cae222..09c35b4825fc 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -16,14 +16,18 @@
 #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
 #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLY_H
 
+#include "llvm/PassRegistry.h"
 #include "llvm/Support/CodeGen.h"
 
 namespace llvm {
 
 class WebAssemblyTargetMachine;
+class ModulePass;
 class FunctionPass;
 
 // LLVM IR passes.
+ModulePass *createWebAssemblyLowerEmscriptenEHSjLj(bool DoEH, bool DoSjLj);
+void initializeWebAssemblyLowerEmscriptenEHSjLjPass(PassRegistry &);
 FunctionPass *createWebAssemblyOptimizeReturned();
 
 // ISel and immediate followup passes.
@@ -39,11 +43,13 @@ FunctionPass *createWebAssemblyOptimizeLiveIntervals();
 FunctionPass *createWebAssemblyStoreResults();
 FunctionPass *createWebAssemblyRegStackify();
 FunctionPass *createWebAssemblyRegColoring();
+FunctionPass *createWebAssemblyExplicitLocals();
 FunctionPass *createWebAssemblyFixIrreducibleControlFlow();
 FunctionPass *createWebAssemblyCFGStackify();
 FunctionPass *createWebAssemblyLowerBrUnless();
 FunctionPass *createWebAssemblyRegNumbering();
 FunctionPass *createWebAssemblyPeephole();
+FunctionPass *createWebAssemblyCallIndirectFixup();
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
index 551ad9345154..f647349d759b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssembly.td
@@ -23,7 +23,7 @@ include "llvm/Target/Target.td"
 // WebAssembly Subtarget features.
 //===----------------------------------------------------------------------===//
 
-def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "false",
+def FeatureSIMD128 : SubtargetFeature<"simd128", "HasSIMD128", "true",
                                       "Enable 128-bit SIMD">;
 
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
index 5887f45371fc..5fadca38b820 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyArgumentMove.cpp
@@ -26,9 +26,11 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/Passes.h"
@@ -44,9 +46,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyArgumentMove() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
-    return "WebAssembly Argument Move";
-  }
+  StringRef getPassName() const override { return "WebAssembly Argument Move"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -64,19 +64,6 @@ FunctionPass *llvm::createWebAssemblyArgumentMove() {
   return new WebAssemblyArgumentMove();
 }
 
-/// Test whether the given instruction is an ARGUMENT.
-static bool IsArgument(const MachineInstr &MI) {
-  switch (MI.getOpcode()) {
-  case WebAssembly::ARGUMENT_I32:
-  case WebAssembly::ARGUMENT_I64:
-  case WebAssembly::ARGUMENT_F32:
-  case WebAssembly::ARGUMENT_F64:
-    return true;
-  default:
-    return false;
-  }
-}
-
 bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
     dbgs() << "********** Argument Move **********\n"
@@ -89,7 +76,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
 
   // Look for the first NonArg instruction.
   for (MachineInstr &MI : EntryMBB) {
-    if (!IsArgument(MI)) {
+    if (!WebAssembly::isArgument(MI)) {
       InsertPt = MI;
       break;
     }
@@ -98,7 +85,7 @@ bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) {
   // Now move any argument instructions later in the block
   // to before our first NonArg instruction.
   for (MachineInstr &MI : llvm::make_range(InsertPt, EntryMBB.end())) {
-    if (IsArgument(MI)) {
+    if (WebAssembly::isArgument(MI)) {
       EntryMBB.insert(InsertPt, MI.removeFromParent());
       Changed = true;
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 54e9f7f52901..5b4b82eb5603 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -14,10 +14,10 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "WebAssembly.h"
 #include "InstPrinter/WebAssemblyInstPrinter.h"
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "MCTargetDesc/WebAssemblyTargetStreamer.h"
+#include "WebAssembly.h"
 #include "WebAssemblyMCInstLower.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblyRegisterInfo.h"
@@ -42,14 +42,14 @@ namespace {
 
 class WebAssemblyAsmPrinter final : public AsmPrinter {
   const MachineRegisterInfo *MRI;
-  const WebAssemblyFunctionInfo *MFI;
+  WebAssemblyFunctionInfo *MFI;
 
 public:
   WebAssemblyAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)), MRI(nullptr), MFI(nullptr) {}
 
 private:
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Assembly Printer";
   }
 
@@ -82,7 +82,6 @@ private:
                              raw_ostream &OS) override;
 
   MVT getRegType(unsigned RegNo) const;
-  const char *toString(MVT VT) const;
   std::string regToString(const MachineOperand &MO);
   WebAssemblyTargetStreamer *getTargetStreamer();
 };
@@ -95,7 +94,8 @@ private:
 
 MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   const TargetRegisterClass *TRC = MRI->getRegClass(RegNo);
-  for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
+  for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64, MVT::v16i8, MVT::v8i16,
+                MVT::v4i32, MVT::v4f32})
     if (TRC->hasType(T))
       return T;
   DEBUG(errs() << "Unknown type for register number: " << RegNo);
@@ -103,10 +103,6 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
   return MVT::Other;
 }
 
-const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
-  return WebAssembly::TypeToString(VT);
-}
-
 std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
   unsigned RegNo = MO.getReg();
   assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
@@ -125,45 +121,21 @@ WebAssemblyTargetStreamer *WebAssemblyAsmPrinter::getTargetStreamer() {
 //===----------------------------------------------------------------------===//
 // WebAssemblyAsmPrinter Implementation.
 //===----------------------------------------------------------------------===//
-static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
-                                 Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
-  const DataLayout &DL(F.getParent()->getDataLayout());
-  const WebAssemblyTargetLowering &TLI =
-      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
-  SmallVector<EVT, 4> VTs;
-  ComputeValueVTs(TLI, DL, Ty, VTs);
-
-  for (EVT VT : VTs) {
-    unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
-    MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
-    for (unsigned i = 0; i != NumRegs; ++i)
-      ValueVTs.push_back(RegisterVT);
-  }
-}
 
 void WebAssemblyAsmPrinter::EmitEndOfAsmFile(Module &M) {
   for (const auto &F : M) {
     // Emit function type info for all undefined functions
     if (F.isDeclarationForLinker() && !F.isIntrinsic()) {
-      SmallVector<MVT, 4> SignatureVTs;
-      ComputeLegalValueVTs(F, TM, F.getReturnType(), SignatureVTs);
-      size_t NumResults = SignatureVTs.size();
-      if (SignatureVTs.size() > 1) {
-        // WebAssembly currently can't lower returns of multiple values without
-        // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
-        // replace multiple return values with a pointer parameter.
-        SignatureVTs.clear();
-        SignatureVTs.push_back(
-            MVT::getIntegerVT(M.getDataLayout().getPointerSizeInBits()));
-        NumResults = 0;
-      }
-
-      for (auto &Arg : F.args()) {
-        ComputeLegalValueVTs(F, TM, Arg.getType(), SignatureVTs);
-      }
-
-      getTargetStreamer()->emitIndirectFunctionType(F.getName(), SignatureVTs,
-                                                    NumResults);
+      SmallVector<MVT, 4> Results;
+      SmallVector<MVT, 4> Params;
+      ComputeSignatureVTs(F, TM, Params, Results);
+      getTargetStreamer()->emitIndirectFunctionType(F.getName(), Params,
+                                                    Results);
+    }
+  }
+  for (const auto &G : M.globals()) {
+    if (!G.hasInitializer() && G.hasExternalLinkage()) {
+      getTargetStreamer()->emitGlobalImport(G.getGlobalIdentifier());
     }
   }
 }
@@ -183,6 +155,15 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
 
   SmallVector<MVT, 4> ResultVTs;
   const Function &F(*MF->getFunction());
+
+  // Emit the function index.
+  if (MDNode *Idx = F.getMetadata("wasm.index")) {
+    assert(Idx->getNumOperands() == 1);
+
+    getTargetStreamer()->emitIndIdx(AsmPrinter::lowerConstant(
+        cast<ConstantAsMetadata>(Idx->getOperand(0))->getValue()));
+  }
+
   ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
 
   // If the return type needs to be legalized it will get converted into
@@ -190,8 +171,8 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
   if (ResultVTs.size() == 1)
     getTargetStreamer()->emitResult(ResultVTs);
 
-  bool AnyWARegs = false;
-  SmallVector<MVT, 16> LocalTypes;
+  // FIXME: When ExplicitLocals is enabled by default, we won't need
+  // to define the locals here (and MFI can go back to being pointer-to-const).
   for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
     unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
     unsigned WAReg = MFI->getWAReg(VReg);
@@ -204,11 +185,10 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
     // Don't declare stackified registers.
     if (int(WAReg) < 0)
       continue;
-    LocalTypes.push_back(getRegType(VReg));
-    AnyWARegs = true;
+    MFI->addLocal(getRegType(VReg));
   }
-  if (AnyWARegs)
-    getTargetStreamer()->emitLocal(LocalTypes);
+
+  getTargetStreamer()->emitLocal(MFI->getLocals());
 
   AsmPrinter::EmitFunctionBodyStart();
 }
@@ -225,13 +205,21 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case WebAssembly::ARGUMENT_I64:
   case WebAssembly::ARGUMENT_F32:
   case WebAssembly::ARGUMENT_F64:
+  case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4f32:
     // These represent values which are live into the function entry, so there's
     // no instruction to emit.
     break;
   case WebAssembly::FALLTHROUGH_RETURN_I32:
   case WebAssembly::FALLTHROUGH_RETURN_I64:
   case WebAssembly::FALLTHROUGH_RETURN_F32:
-  case WebAssembly::FALLTHROUGH_RETURN_F64: {
+  case WebAssembly::FALLTHROUGH_RETURN_F64:
+  case WebAssembly::FALLTHROUGH_RETURN_v16i8:
+  case WebAssembly::FALLTHROUGH_RETURN_v8i16:
+  case WebAssembly::FALLTHROUGH_RETURN_v4i32:
+  case WebAssembly::FALLTHROUGH_RETURN_v4f32: {
     // These instructions represent the implicit return at the end of a
     // function body. The operand is always a pop.
     assert(MFI->isVRegStackified(MI->getOperand(0).getReg()));
@@ -329,6 +317,6 @@ bool WebAssemblyAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
 
 // Force static initialization.
 extern "C" void LLVMInitializeWebAssemblyAsmPrinter() {
-  RegisterAsmPrinter<WebAssemblyAsmPrinter> X(TheWebAssemblyTarget32);
-  RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(TheWebAssemblyTarget64);
+  RegisterAsmPrinter<WebAssemblyAsmPrinter> X(getTheWebAssemblyTarget32());
+  RegisterAsmPrinter<WebAssemblyAsmPrinter> Y(getTheWebAssemblyTarget64());
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index 33166f5b554f..49b9754e6b62 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -27,6 +27,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/MachineDominators.h"
@@ -43,9 +44,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyCFGStackify final : public MachineFunctionPass {
-  const char *getPassName() const override {
-    return "WebAssembly CFG Stackify";
-  }
+  StringRef getPassName() const override { return "WebAssembly CFG Stackify"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -294,26 +293,16 @@ static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
   return false;
 }
 
-/// Test whether MI is a child of some other node in an expression tree.
-static bool IsChild(const MachineInstr &MI,
-                    const WebAssemblyFunctionInfo &MFI) {
-  if (MI.getNumOperands() == 0)
-    return false;
-  const MachineOperand &MO = MI.getOperand(0);
-  if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
-    return false;
-  unsigned Reg = MO.getReg();
-  return TargetRegisterInfo::isVirtualRegister(Reg) &&
-         MFI.isVRegStackified(Reg);
-}
-
 /// Insert a BLOCK marker for branches to MBB (if needed).
-static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
-                             SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-                             const WebAssemblyInstrInfo &TII,
-                             const MachineLoopInfo &MLI,
-                             MachineDominatorTree &MDT,
-                             WebAssemblyFunctionInfo &MFI) {
+static void PlaceBlockMarker(
+    MachineBasicBlock &MBB, MachineFunction &MF,
+    SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
+    const WebAssemblyInstrInfo &TII,
+    const MachineLoopInfo &MLI,
+    MachineDominatorTree &MDT,
+    WebAssemblyFunctionInfo &MFI) {
   // First compute the nearest common dominator of all forward non-fallthrough
   // predecessors so that we minimize the time that the BLOCK is on the stack,
   // which reduces overall stack height.
@@ -332,7 +321,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     return;
 
   assert(&MBB != &MF.front() && "Header blocks shouldn't have predecessors");
-  MachineBasicBlock *LayoutPred = &*prev(MachineFunction::iterator(&MBB));
+  MachineBasicBlock *LayoutPred = &*std::prev(MachineFunction::iterator(&MBB));
 
   // If the nearest common dominator is inside a more deeply nested context,
   // walk out to the nearest scope which isn't more deeply nested.
@@ -340,7 +329,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     if (MachineBasicBlock *ScopeTop = ScopeTops[I->getNumber()]) {
       if (ScopeTop->getNumber() > Header->getNumber()) {
         // Skip over an intervening scope.
-        I = next(MachineFunction::iterator(ScopeTop));
+        I = std::next(MachineFunction::iterator(ScopeTop));
       } else {
         // We found a scope level at an appropriate depth.
         Header = ScopeTop;
@@ -349,13 +338,6 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     }
   }
 
-  // If there's a loop which ends just before MBB which contains Header, we can
-  // reuse its label instead of inserting a new BLOCK.
-  for (MachineLoop *Loop = MLI.getLoopFor(LayoutPred);
-       Loop && Loop->contains(LayoutPred); Loop = Loop->getParentLoop())
-    if (Loop && LoopBottom(Loop) == LayoutPred && Loop->contains(Header))
-      return;
-
   // Decide where in Header to put the BLOCK.
   MachineBasicBlock::iterator InsertPos;
   MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
@@ -363,28 +345,35 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
     // Header is the header of a loop that does not lexically contain MBB, so
     // the BLOCK needs to be above the LOOP, after any END constructs.
     InsertPos = Header->begin();
-    while (InsertPos->getOpcode() != WebAssembly::LOOP)
+    while (InsertPos->getOpcode() == WebAssembly::END_BLOCK ||
+           InsertPos->getOpcode() == WebAssembly::END_LOOP)
       ++InsertPos;
   } else {
     // Otherwise, insert the BLOCK as late in Header as we can, but before the
     // beginning of the local expression tree and any nested BLOCKs.
     InsertPos = Header->getFirstTerminator();
-    while (InsertPos != Header->begin() && IsChild(*prev(InsertPos), MFI) &&
-           prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
-           prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
-           prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
+    while (InsertPos != Header->begin() &&
+           WebAssembly::isChild(*std::prev(InsertPos), MFI) &&
+           std::prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
+           std::prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
+           std::prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
       --InsertPos;
   }
 
   // Add the BLOCK.
-  BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));
+  MachineInstr *Begin = BuildMI(*Header, InsertPos, DebugLoc(),
+                                TII.get(WebAssembly::BLOCK))
+      .addImm(int64_t(WebAssembly::ExprType::Void));
 
   // Mark the end of the block.
   InsertPos = MBB.begin();
   while (InsertPos != MBB.end() &&
-         InsertPos->getOpcode() == WebAssembly::END_LOOP)
+         InsertPos->getOpcode() == WebAssembly::END_LOOP &&
+         LoopTops[&*InsertPos]->getParent()->getNumber() >= Header->getNumber())
     ++InsertPos;
-  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));
+  MachineInstr *End = BuildMI(MBB, InsertPos, DebugLoc(),
+                              TII.get(WebAssembly::END_BLOCK));
+  BlockTops[End] = Begin;
 
   // Track the farthest-spanning scope that ends at this point.
   int Number = MBB.getNumber();
@@ -397,7 +386,7 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
 static void PlaceLoopMarker(
     MachineBasicBlock &MBB, MachineFunction &MF,
     SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
-    DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops,
     const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
   MachineLoop *Loop = MLI.getLoopFor(&MBB);
   if (!Loop || Loop->getHeader() != &MBB)
@@ -406,13 +395,13 @@ static void PlaceLoopMarker(
   // The operand of a LOOP is the first block after the loop. If the loop is the
   // bottom of the function, insert a dummy block at the end.
   MachineBasicBlock *Bottom = LoopBottom(Loop);
-  auto Iter = next(MachineFunction::iterator(Bottom));
+  auto Iter = std::next(MachineFunction::iterator(Bottom));
   if (Iter == MF.end()) {
     MachineBasicBlock *Label = MF.CreateMachineBasicBlock();
     // Give it a fake predecessor so that AsmPrinter prints its label.
     Label->addSuccessor(Label);
     MF.push_back(Label);
-    Iter = next(MachineFunction::iterator(Bottom));
+    Iter = std::next(MachineFunction::iterator(Bottom));
   }
   MachineBasicBlock *AfterLoop = &*Iter;
 
@@ -422,12 +411,14 @@ static void PlaceLoopMarker(
   while (InsertPos != MBB.end() &&
          InsertPos->getOpcode() == WebAssembly::END_LOOP)
     ++InsertPos;
-  BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP));
+  MachineInstr *Begin = BuildMI(MBB, InsertPos, DebugLoc(),
+                                TII.get(WebAssembly::LOOP))
+      .addImm(int64_t(WebAssembly::ExprType::Void));
 
   // Mark the end of the loop.
   MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(),
                               TII.get(WebAssembly::END_LOOP));
-  LoopTops[End] = &MBB;
+  LoopTops[End] = Begin;
 
   assert((!ScopeTops[AfterLoop->getNumber()] ||
           ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -449,6 +440,54 @@ GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
   return Depth;
 }
 
+/// In normal assembly languages, when the end of a function is unreachable,
+/// because the function ends in an infinite loop or a noreturn call or similar,
+/// it isn't necessary to worry about the function return type at the end of
+/// the function, because it's never reached. However, in WebAssembly, blocks
+/// that end at the function end need to have a return type signature that
+/// matches the function signature, even though it's unreachable. This function
+/// checks for such cases and fixes up the signatures.
+static void FixEndsAtEndOfFunction(
+    MachineFunction &MF,
+    const WebAssemblyFunctionInfo &MFI,
+    DenseMap<const MachineInstr *, MachineInstr *> &BlockTops,
+    DenseMap<const MachineInstr *, MachineInstr *> &LoopTops) {
+  assert(MFI.getResults().size() <= 1);
+
+  if (MFI.getResults().empty())
+    return;
+
+  WebAssembly::ExprType retType;
+  switch (MFI.getResults().front().SimpleTy) {
+  case MVT::i32: retType = WebAssembly::ExprType::I32; break;
+  case MVT::i64: retType = WebAssembly::ExprType::I64; break;
+  case MVT::f32: retType = WebAssembly::ExprType::F32; break;
+  case MVT::f64: retType = WebAssembly::ExprType::F64; break;
+  case MVT::v16i8: retType = WebAssembly::ExprType::I8x16; break;
+  case MVT::v8i16: retType = WebAssembly::ExprType::I16x8; break;
+  case MVT::v4i32: retType = WebAssembly::ExprType::I32x4; break;
+  case MVT::v4f32: retType = WebAssembly::ExprType::F32x4; break;
+  default: llvm_unreachable("unexpected return type");
+  }
+
+  for (MachineBasicBlock &MBB : reverse(MF)) {
+    for (MachineInstr &MI : reverse(MBB)) {
+      if (MI.isPosition() || MI.isDebugValue())
+        continue;
+      if (MI.getOpcode() == WebAssembly::END_BLOCK) {
+        BlockTops[&MI]->getOperand(0).setImm(int32_t(retType));
+        continue;
+      }
+      if (MI.getOpcode() == WebAssembly::END_LOOP) {
+        LoopTops[&MI]->getOperand(0).setImm(int32_t(retType));
+        continue;
+      }
+      // Something other than an `end`. We're done.
+      return;
+    }
+  }
+}
+
 /// Insert LOOP and BLOCK markers at appropriate places.
 static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
                          const WebAssemblyInstrInfo &TII,
@@ -461,15 +500,18 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
   // we may insert at the end.
   SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
 
-  // For eacn LOOP_END, the corresponding LOOP.
-  DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops;
+  // For each LOOP_END, the corresponding LOOP.
+  DenseMap<const MachineInstr *, MachineInstr *> LoopTops;
+
+  // For each END_BLOCK, the corresponding BLOCK.
+  DenseMap<const MachineInstr *, MachineInstr *> BlockTops;
 
   for (auto &MBB : MF) {
     // Place the LOOP for MBB if MBB is the header of a loop.
     PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
 
     // Place the BLOCK for MBB if MBB is branched to from above.
-    PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT, MFI);
+    PlaceBlockMarker(MBB, MF, ScopeTops, BlockTops, LoopTops, TII, MLI, MDT, MFI);
   }
 
   // Now rewrite references to basic blocks to be depth immediates.
@@ -478,21 +520,19 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
     for (auto &MI : reverse(MBB)) {
       switch (MI.getOpcode()) {
       case WebAssembly::BLOCK:
-        assert(ScopeTops[Stack.back()->getNumber()] == &MBB &&
+        assert(ScopeTops[Stack.back()->getNumber()]->getNumber() <= MBB.getNumber() &&
                "Block should be balanced");
         Stack.pop_back();
         break;
       case WebAssembly::LOOP:
         assert(Stack.back() == &MBB && "Loop top should be balanced");
         Stack.pop_back();
-        Stack.pop_back();
         break;
       case WebAssembly::END_BLOCK:
         Stack.push_back(&MBB);
         break;
       case WebAssembly::END_LOOP:
-        Stack.push_back(&MBB);
-        Stack.push_back(LoopTops[&MI]);
+        Stack.push_back(LoopTops[&MI]->getParent());
         break;
       default:
         if (MI.isTerminator()) {
@@ -511,6 +551,10 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
     }
   }
   assert(Stack.empty() && "Control flow should be balanced");
+
+  // Fix up block/loop signatures at the end of the function to conform to
+  // WebAssembly's rules.
+  FixEndsAtEndOfFunction(MF, MFI, BlockTops, LoopTops);
 }
 
 bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
@@ -520,7 +564,7 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
 
   const auto &MLI = getAnalysis<MachineLoopInfo>();
   auto &MDT = getAnalysis<MachineDominatorTree>();
-  // Liveness is not tracked for EXPR_STACK physreg.
+  // Liveness is not tracked for VALUE_STACK physreg.
   const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
   MF.getRegInfo().invalidateLiveness();
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
new file mode 100644
index 000000000000..fc0a01ca30e5
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyCallIndirectFixup.cpp
@@ -0,0 +1,120 @@
+//===-- WebAssemblyCallIndirectFixup.cpp - Fix call_indirects -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file converts pseudo call_indirect instructions into real
+/// call_indirects.
+///
+/// The order of arguments for a call_indirect is the arguments to the function
+/// call, followed by the function pointer. There's no natural way to express
+/// a machineinstr with varargs followed by one more arg, so we express it as
+/// the function pointer followed by varargs, then rewrite it here.
+///
+/// We need to rewrite the order of the arguments on the machineinstrs
+/// themselves so that register stackification knows the order they'll be
+/// executed in.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-call-indirect-fixup"
+
+namespace {
+class WebAssemblyCallIndirectFixup final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly CallIndirect Fixup";
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyCallIndirectFixup() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyCallIndirectFixup::ID = 0;
+FunctionPass *llvm::createWebAssemblyCallIndirectFixup() {
+  return new WebAssemblyCallIndirectFixup();
+}
+
+static unsigned GetNonPseudoCallIndirectOpcode(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+    using namespace WebAssembly;
+  case PCALL_INDIRECT_VOID: return CALL_INDIRECT_VOID;
+  case PCALL_INDIRECT_I32: return CALL_INDIRECT_I32;
+  case PCALL_INDIRECT_I64: return CALL_INDIRECT_I64;
+  case PCALL_INDIRECT_F32: return CALL_INDIRECT_F32;
+  case PCALL_INDIRECT_F64: return CALL_INDIRECT_F64;
+  case PCALL_INDIRECT_v16i8: return CALL_INDIRECT_v16i8;
+  case PCALL_INDIRECT_v8i16: return CALL_INDIRECT_v8i16;
+  case PCALL_INDIRECT_v4i32: return CALL_INDIRECT_v4i32;
+  case PCALL_INDIRECT_v4f32: return CALL_INDIRECT_v4f32;
+  default: return INSTRUCTION_LIST_END;
+  }
+}
+
+static bool IsPseudoCallIndirect(const MachineInstr &MI) {
+  return GetNonPseudoCallIndirectOpcode(MI) !=
+         WebAssembly::INSTRUCTION_LIST_END;
+}
+
+bool WebAssemblyCallIndirectFixup::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Fixing up CALL_INDIRECTs **********\n"
+               << MF.getName() << '\n');
+
+  bool Changed = false;
+  const WebAssemblyInstrInfo *TII =
+      MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineInstr &MI : MBB) {
+      if (IsPseudoCallIndirect(MI)) {
+        DEBUG(dbgs() << "Found call_indirect: " << MI << '\n');
+
+        // Rewrite pseudo to non-pseudo
+        const MCInstrDesc &Desc = TII->get(GetNonPseudoCallIndirectOpcode(MI));
+        MI.setDesc(Desc);
+
+        // Rewrite argument order
+        auto Uses = MI.explicit_uses();
+        MachineInstr::mop_iterator it = Uses.begin();
+        const MachineOperand MO = *it;
+
+        // Set up the flags immediate, which currently has no defined flags
+        // so it's always zero.
+        it->ChangeToImmediate(0);
+
+        MI.addOperand(MF, MO);
+
+        DEBUG(dbgs() << "  After transform: " << MI);
+        Changed = true;
+      }
+    }
+  }
+
+  DEBUG(dbgs() << "\nDone fixing up CALL_INDIRECTs\n\n");
+
+  return Changed;
+}
+
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
new file mode 100644
index 000000000000..04ede7ff110c
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyExplicitLocals.cpp
@@ -0,0 +1,308 @@
+//===-- WebAssemblyExplicitLocals.cpp - Make Locals Explicit --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file converts any remaining registers into WebAssembly locals.
+///
+/// After register stackification and register coloring, convert non-stackified
+/// registers into locals, inserting explicit get_local and set_local
+/// instructions.
+///
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "WebAssembly.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-explicit-locals"
+
+namespace {
+class WebAssemblyExplicitLocals final : public MachineFunctionPass {
+  StringRef getPassName() const override {
+    return "WebAssembly Explicit Locals";
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addPreserved<MachineBlockFrequencyInfo>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WebAssemblyExplicitLocals() : MachineFunctionPass(ID) {}
+};
+} // end anonymous namespace
+
+char WebAssemblyExplicitLocals::ID = 0;
+FunctionPass *llvm::createWebAssemblyExplicitLocals() {
+  return new WebAssemblyExplicitLocals();
+}
+
+/// Return a local id number for the given register, assigning it a new one
+/// if it doesn't yet have one.
+static unsigned getLocalId(DenseMap<unsigned, unsigned> &Reg2Local,
+                           unsigned &CurLocal, unsigned Reg) {
+  return Reg2Local.insert(std::make_pair(Reg, CurLocal++)).first->second;
+}
+
+/// Get the appropriate get_local opcode for the given register class.
+static unsigned getGetLocalOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::GET_LOCAL_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::GET_LOCAL_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::GET_LOCAL_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::GET_LOCAL_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::GET_LOCAL_V128;
+  llvm_unreachable("Unexpected register class");
+}
+
+/// Get the appropriate set_local opcode for the given register class.
+static unsigned getSetLocalOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::SET_LOCAL_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::SET_LOCAL_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::SET_LOCAL_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::SET_LOCAL_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::SET_LOCAL_V128;
+  llvm_unreachable("Unexpected register class");
+}
+
+/// Get the appropriate tee_local opcode for the given register class.
+static unsigned getTeeLocalOpcode(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return WebAssembly::TEE_LOCAL_I32;
+  if (RC == &WebAssembly::I64RegClass)
+    return WebAssembly::TEE_LOCAL_I64;
+  if (RC == &WebAssembly::F32RegClass)
+    return WebAssembly::TEE_LOCAL_F32;
+  if (RC == &WebAssembly::F64RegClass)
+    return WebAssembly::TEE_LOCAL_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::TEE_LOCAL_V128;
+  llvm_unreachable("Unexpected register class");
+}
+
+/// Get the type associated with the given register class.
+static MVT typeForRegClass(const TargetRegisterClass *RC) {
+  if (RC == &WebAssembly::I32RegClass)
+    return MVT::i32;
+  if (RC == &WebAssembly::I64RegClass)
+    return MVT::i64;
+  if (RC == &WebAssembly::F32RegClass)
+    return MVT::f32;
+  if (RC == &WebAssembly::F64RegClass)
+    return MVT::f64;
+  llvm_unreachable("unrecognized register class");
+}
+
+/// Given a MachineOperand of a stackified vreg, return the instruction at the
+/// start of the expression tree.
+static MachineInstr *FindStartOfTree(MachineOperand &MO,
+                                     MachineRegisterInfo &MRI,
+                                     WebAssemblyFunctionInfo &MFI) {
+  unsigned Reg = MO.getReg();
+  assert(MFI.isVRegStackified(Reg));
+  MachineInstr *Def = MRI.getVRegDef(Reg);
+
+  // Find the first stackified use and proceed from there.
+  for (MachineOperand &DefMO : Def->explicit_uses()) {
+    if (!DefMO.isReg())
+      continue;
+    return FindStartOfTree(DefMO, MRI, MFI);
+  }
+
+  // If there were no stackified uses, we've reached the start.
+  return Def;
+}
+
+bool WebAssemblyExplicitLocals::runOnMachineFunction(MachineFunction &MF) {
+  DEBUG(dbgs() << "********** Make Locals Explicit **********\n"
+                  "********** Function: "
+               << MF.getName() << '\n');
+
+  // Disable this pass if we aren't doing direct wasm object emission.
+  if (MF.getSubtarget<WebAssemblySubtarget>()
+        .getTargetTriple().isOSBinFormatELF())
+    return false;
+
+  bool Changed = false;
+  MachineRegisterInfo &MRI = MF.getRegInfo();
+  WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
+  const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+
+  // Map non-stackified virtual registers to their local ids.
+  DenseMap<unsigned, unsigned> Reg2Local;
+
+  // Handle ARGUMENTS first to ensure that they get the designated numbers.
+  for (MachineBasicBlock::iterator I = MF.begin()->begin(),
+                                   E = MF.begin()->end();
+       I != E;) {
+    MachineInstr &MI = *I++;
+    if (!WebAssembly::isArgument(MI))
+      break;
+    unsigned Reg = MI.getOperand(0).getReg();
+    assert(!MFI.isVRegStackified(Reg));
+    Reg2Local[Reg] = MI.getOperand(1).getImm();
+    MI.eraseFromParent();
+    Changed = true;
+  }
+
+  // Start assigning local numbers after the last parameter.
+  unsigned CurLocal = MFI.getParams().size();
+
+  // Visit each instruction in the function.
+  for (MachineBasicBlock &MBB : MF) {
+    for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
+      MachineInstr &MI = *I++;
+      assert(!WebAssembly::isArgument(MI));
+
+      if (MI.isDebugValue() || MI.isLabel())
+        continue;
+
+      // Replace tee instructions with tee_local. The difference is that tee
+      // instructins have two defs, while tee_local instructions have one def
+      // and an index of a local to write to.
+      if (WebAssembly::isTee(MI)) {
+        assert(MFI.isVRegStackified(MI.getOperand(0).getReg()));
+        assert(!MFI.isVRegStackified(MI.getOperand(1).getReg()));
+        unsigned OldReg = MI.getOperand(2).getReg();
+        const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+
+        // Stackify the input if it isn't stackified yet.
+        if (!MFI.isVRegStackified(OldReg)) {
+          unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+          unsigned NewReg = MRI.createVirtualRegister(RC);
+          unsigned Opc = getGetLocalOpcode(RC);
+          BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc), NewReg)
+              .addImm(LocalId);
+          MI.getOperand(2).setReg(NewReg);
+          MFI.stackifyVReg(NewReg);
+        }
+
+        // Replace the TEE with a TEE_LOCAL.
+        unsigned LocalId =
+            getLocalId(Reg2Local, CurLocal, MI.getOperand(1).getReg());
+        unsigned Opc = getTeeLocalOpcode(RC);
+        BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(Opc),
+                MI.getOperand(0).getReg())
+            .addImm(LocalId)
+            .addReg(MI.getOperand(2).getReg());
+
+        MI.eraseFromParent();
+        Changed = true;
+        continue;
+      }
+
+      // Insert set_locals for any defs that aren't stackified yet. Currently
+      // we handle at most one def.
+      assert(MI.getDesc().getNumDefs() <= 1);
+      if (MI.getDesc().getNumDefs() == 1) {
+        unsigned OldReg = MI.getOperand(0).getReg();
+        if (!MFI.isVRegStackified(OldReg) && !MRI.use_empty(OldReg)) {
+          unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+          const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+          unsigned NewReg = MRI.createVirtualRegister(RC);
+          auto InsertPt = std::next(MachineBasicBlock::iterator(&MI));
+          unsigned Opc = getSetLocalOpcode(RC);
+          BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc))
+              .addImm(LocalId)
+              .addReg(NewReg);
+          MI.getOperand(0).setReg(NewReg);
+          MFI.stackifyVReg(NewReg);
+          Changed = true;
+        }
+      }
+
+      // Insert get_locals for any uses that aren't stackified yet.
+      MachineInstr *InsertPt = &MI;
+      for (MachineOperand &MO : reverse(MI.explicit_uses())) {
+        if (!MO.isReg())
+          continue;
+
+        unsigned OldReg = MO.getReg();
+
+        // If we see a stackified register, prepare to insert subsequent
+        // get_locals before the start of its tree.
+        if (MFI.isVRegStackified(OldReg)) {
+          InsertPt = FindStartOfTree(MO, MRI, MFI);
+          continue;
+        }
+
+        // Insert a get_local.
+        unsigned LocalId = getLocalId(Reg2Local, CurLocal, OldReg);
+        const TargetRegisterClass *RC = MRI.getRegClass(OldReg);
+        unsigned NewReg = MRI.createVirtualRegister(RC);
+        unsigned Opc = getGetLocalOpcode(RC);
+        InsertPt =
+            BuildMI(MBB, InsertPt, MI.getDebugLoc(), TII->get(Opc), NewReg)
+                .addImm(LocalId);
+        MO.setReg(NewReg);
+        MFI.stackifyVReg(NewReg);
+        Changed = true;
+      }
+
+      // Coalesce and eliminate COPY instructions.
+      if (WebAssembly::isCopy(MI)) {
+        MRI.replaceRegWith(MI.getOperand(1).getReg(),
+                           MI.getOperand(0).getReg());
+        MI.eraseFromParent();
+        Changed = true;
+      }
+    }
+  }
+
+  // Define the locals.
+  for (size_t i = 0, e = MRI.getNumVirtRegs(); i < e; ++i) {
+    unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+    auto I = Reg2Local.find(Reg);
+    if (I == Reg2Local.end() || I->second < MFI.getParams().size())
+      continue;
+
+    MFI.addLocal(typeForRegClass(MRI.getRegClass(Reg)));
+    Changed = true;
+  }
+
+#ifndef NDEBUG
+  // Assert that all registers have been stackified at this point.
+  for (const MachineBasicBlock &MBB : MF) {
+    for (const MachineInstr &MI : MBB) {
+      if (MI.isDebugValue() || MI.isLabel())
+        continue;
+      for (const MachineOperand &MO : MI.explicit_operands()) {
+        assert(
+            (!MO.isReg() || MRI.use_empty(MO.getReg()) ||
+             MFI.isVRegStackified(MO.getReg())) &&
+            "WebAssemblyExplicitLocals failed to stackify a register operand");
+      }
+    }
+  }
+#endif
+
+  return Changed;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
index 7bfa4074849b..529540ea4ed2 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFastISel.cpp
@@ -84,7 +84,10 @@ class WebAssemblyFastISel final : public FastISel {
       return Base.FI;
     }
 
-    void setOffset(int64_t Offset_) { Offset = Offset_; }
+    void setOffset(int64_t Offset_) {
+      assert(Offset_ >= 0 && "Offsets must be non-negative");
+      Offset = Offset_;
+    }
     int64_t getOffset() const { return Offset; }
     void setGlobalValue(const GlobalValue *G) { GV = G; }
     const GlobalValue *getGlobalValue() const { return GV; }
@@ -113,6 +116,13 @@ private:
     case MVT::f32:
     case MVT::f64:
       return VT;
+    case MVT::v16i8:
+    case MVT::v8i16:
+    case MVT::v4i32:
+    case MVT::v4f32:
+      if (Subtarget->hasSIMD128())
+        return VT;
+      break;
     default:
       break;
     }
@@ -229,12 +239,15 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
   case Instruction::GetElementPtr: {
     Address SavedAddr = Addr;
     uint64_t TmpOffset = Addr.getOffset();
+    // Non-inbounds geps can wrap; wasm's offsets can't.
+    if (!cast<GEPOperator>(U)->isInBounds())
+      goto unsupported_gep;
     // Iterate through the GEP folding the constants into offsets where
     // we can.
     for (gep_type_iterator GTI = gep_type_begin(U), E = gep_type_end(U);
          GTI != E; ++GTI) {
       const Value *Op = GTI.getOperand();
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         unsigned Idx = cast<ConstantInt>(Op)->getZExtValue();
         TmpOffset += SL->getElementOffset(Idx);
@@ -265,10 +278,13 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
         }
       }
     }
-    // Try to grab the base operand now.
-    Addr.setOffset(TmpOffset);
-    if (computeAddress(U->getOperand(0), Addr))
-      return true;
+    // Don't fold in negative offsets.
+    if (int64_t(TmpOffset) >= 0) {
+      // Try to grab the base operand now.
+      Addr.setOffset(TmpOffset);
+      if (computeAddress(U->getOperand(0), Addr))
+        return true;
+    }
     // We failed, restore everything and try the other options.
     Addr = SavedAddr;
   unsupported_gep:
@@ -294,8 +310,11 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
       std::swap(LHS, RHS);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-      Addr.setOffset(Addr.getOffset() + CI->getSExtValue());
-      return computeAddress(LHS, Addr);
+      uint64_t TmpOffset = Addr.getOffset() + CI->getSExtValue();
+      if (int64_t(TmpOffset) >= 0) {
+        Addr.setOffset(TmpOffset);
+        return computeAddress(LHS, Addr);
+      }
     }
 
     Address Backup = Addr;
@@ -311,8 +330,11 @@ bool WebAssemblyFastISel::computeAddress(const Value *Obj, Address &Addr) {
     const Value *RHS = U->getOperand(1);
 
     if (const ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
-      Addr.setOffset(Addr.getOffset() - CI->getSExtValue());
-      return computeAddress(LHS, Addr);
+      int64_t TmpOffset = Addr.getOffset() - CI->getSExtValue();
+      if (TmpOffset >= 0) {
+        Addr.setOffset(TmpOffset);
+        return computeAddress(LHS, Addr);
+      }
     }
     break;
   }
@@ -341,6 +363,10 @@ void WebAssemblyFastISel::materializeLoadStoreOperands(Address &Addr) {
 void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr,
                                                const MachineInstrBuilder &MIB,
                                                MachineMemOperand *MMO) {
+  // Set the alignment operand (this is rewritten in SetP2AlignOperands).
+  // TODO: Disable SetP2AlignOperands for FastISel and just do it here.
+  MIB.addImm(0);
+
   if (const GlobalValue *GV = Addr.getGlobalValue())
     MIB.addGlobalAddress(GV, Addr.getOffset());
   else
@@ -351,10 +377,6 @@ void WebAssemblyFastISel::addLoadStoreOperands(const Address &Addr,
   else
     MIB.addFrameIndex(Addr.getFI());
 
-  // Set the alignment operand (this is rewritten in SetP2AlignOperands).
-  // TODO: Disable SetP2AlignOperands for FastISel and just do it here.
-  MIB.addImm(0);
-
   MIB.addMemOperand(MMO);
 }
 
@@ -381,6 +403,9 @@ unsigned WebAssemblyFastISel::getRegForI1Value(const Value *V, bool &Not) {
 
 unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
                                               MVT::SimpleValueType From) {
+  if (Reg == 0)
+    return 0;
+
   switch (From) {
   case MVT::i1:
     // If the value is naturally an i1, we don't need to mask it.
@@ -415,6 +440,9 @@ unsigned WebAssemblyFastISel::zeroExtendToI32(unsigned Reg, const Value *V,
 
 unsigned WebAssemblyFastISel::signExtendToI32(unsigned Reg, const Value *V,
                                               MVT::SimpleValueType From) {
+  if (Reg == 0)
+    return 0;
+
   switch (From) {
   case MVT::i1:
   case MVT::i8:
@@ -529,8 +557,8 @@ unsigned WebAssemblyFastISel::fastMaterializeAlloca(const AllocaInst *AI) {
                                          &WebAssembly::I64RegClass :
                                          &WebAssembly::I32RegClass);
     unsigned Opc = Subtarget->hasAddr64() ?
-                   WebAssembly::COPY_LOCAL_I64 :
-                   WebAssembly::COPY_LOCAL_I32;
+                   WebAssembly::COPY_I64 :
+                   WebAssembly::COPY_I32;
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg)
         .addFrameIndex(SI->second);
     return ResultReg;
@@ -575,7 +603,9 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       return false;
 
     Type *ArgTy = Arg.getType();
-    if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy())
+    if (ArgTy->isStructTy() || ArgTy->isArrayTy())
+      return false;
+    if (!Subtarget->hasSIMD128() && ArgTy->isVectorTy())
       return false;
 
     unsigned Opc;
@@ -600,6 +630,22 @@ bool WebAssemblyFastISel::fastLowerArguments() {
       Opc = WebAssembly::ARGUMENT_F64;
       RC = &WebAssembly::F64RegClass;
       break;
+    case MVT::v16i8:
+      Opc = WebAssembly::ARGUMENT_v16i8;
+      RC = &WebAssembly::V128RegClass;
+      break;
+    case MVT::v8i16:
+      Opc = WebAssembly::ARGUMENT_v8i16;
+      RC = &WebAssembly::V128RegClass;
+      break;
+    case MVT::v4i32:
+      Opc = WebAssembly::ARGUMENT_v4i32;
+      RC = &WebAssembly::V128RegClass;
+      break;
+    case MVT::v4f32:
+      Opc = WebAssembly::ARGUMENT_v4f32;
+      RC = &WebAssembly::V128RegClass;
+      break;
     default:
       return false;
     }
@@ -637,29 +683,52 @@ bool WebAssemblyFastISel::selectCall(const Instruction *I) {
   bool IsVoid = FuncTy->getReturnType()->isVoidTy();
   unsigned ResultReg;
   if (IsVoid) {
-    Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::CALL_INDIRECT_VOID;
+    Opc = IsDirect ? WebAssembly::CALL_VOID : WebAssembly::PCALL_INDIRECT_VOID;
   } else {
+    if (!Subtarget->hasSIMD128() && Call->getType()->isVectorTy())
+      return false;
+
     MVT::SimpleValueType RetTy = getSimpleType(Call->getType());
     switch (RetTy) {
     case MVT::i1:
     case MVT::i8:
     case MVT::i16:
     case MVT::i32:
-      Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::CALL_INDIRECT_I32;
+      Opc = IsDirect ? WebAssembly::CALL_I32 : WebAssembly::PCALL_INDIRECT_I32;
       ResultReg = createResultReg(&WebAssembly::I32RegClass);
       break;
     case MVT::i64:
-      Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::CALL_INDIRECT_I64;
+      Opc = IsDirect ? WebAssembly::CALL_I64 : WebAssembly::PCALL_INDIRECT_I64;
       ResultReg = createResultReg(&WebAssembly::I64RegClass);
       break;
     case MVT::f32:
-      Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::CALL_INDIRECT_F32;
+      Opc = IsDirect ? WebAssembly::CALL_F32 : WebAssembly::PCALL_INDIRECT_F32;
       ResultReg = createResultReg(&WebAssembly::F32RegClass);
       break;
     case MVT::f64:
-      Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::CALL_INDIRECT_F64;
+      Opc = IsDirect ? WebAssembly::CALL_F64 : WebAssembly::PCALL_INDIRECT_F64;
       ResultReg = createResultReg(&WebAssembly::F64RegClass);
       break;
+    case MVT::v16i8:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v16i8 : WebAssembly::PCALL_INDIRECT_v16i8;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v8i16:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v8i16 : WebAssembly::PCALL_INDIRECT_v8i16;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v4i32:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v4i32 : WebAssembly::PCALL_INDIRECT_v4i32;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
+    case MVT::v4f32:
+      Opc =
+          IsDirect ? WebAssembly::CALL_v4f32 : WebAssembly::PCALL_INDIRECT_v4f32;
+      ResultReg = createResultReg(&WebAssembly::V128RegClass);
+      break;
     default:
       return false;
     }
@@ -972,6 +1041,8 @@ bool WebAssemblyFastISel::selectLoad(const Instruction *I) {
   const LoadInst *Load = cast<LoadInst>(I);
   if (Load->isAtomic())
     return false;
+  if (!Subtarget->hasSIMD128() && Load->getType()->isVectorTy())
+    return false;
 
   Address Addr;
   if (!computeAddress(Load->getPointerOperand(), Addr))
@@ -1027,40 +1098,36 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
   const StoreInst *Store = cast<StoreInst>(I);
   if (Store->isAtomic())
     return false;
+  if (!Subtarget->hasSIMD128() &&
+      Store->getValueOperand()->getType()->isVectorTy())
+    return false;
 
   Address Addr;
   if (!computeAddress(Store->getPointerOperand(), Addr))
     return false;
 
   unsigned Opc;
-  const TargetRegisterClass *RC;
   bool VTIsi1 = false;
   switch (getSimpleType(Store->getValueOperand()->getType())) {
   case MVT::i1:
     VTIsi1 = true;
   case MVT::i8:
     Opc = WebAssembly::STORE8_I32;
-    RC = &WebAssembly::I32RegClass;
     break;
   case MVT::i16:
     Opc = WebAssembly::STORE16_I32;
-    RC = &WebAssembly::I32RegClass;
     break;
   case MVT::i32:
     Opc = WebAssembly::STORE_I32;
-    RC = &WebAssembly::I32RegClass;
     break;
   case MVT::i64:
     Opc = WebAssembly::STORE_I64;
-    RC = &WebAssembly::I64RegClass;
     break;
   case MVT::f32:
     Opc = WebAssembly::STORE_F32;
-    RC = &WebAssembly::F32RegClass;
     break;
   case MVT::f64:
     Opc = WebAssembly::STORE_F64;
-    RC = &WebAssembly::F64RegClass;
     break;
   default: return false;
   }
@@ -1068,12 +1135,12 @@ bool WebAssemblyFastISel::selectStore(const Instruction *I) {
   materializeLoadStoreOperands(Addr);
 
   unsigned ValueReg = getRegForValue(Store->getValueOperand());
+  if (ValueReg == 0)
+    return false;
   if (VTIsi1)
     ValueReg = maskI1Value(ValueReg, Store->getValueOperand());
 
-  unsigned ResultReg = createResultReg(RC);
-  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc),
-                     ResultReg);
+  auto MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc));
 
   addLoadStoreOperands(Addr, MIB, createMachineMemOperandFor(Store));
 
@@ -1094,6 +1161,8 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
 
   bool Not;
   unsigned CondReg = getRegForI1Value(Br->getCondition(), Not);
+  if (CondReg == 0)
+    return false;
 
   unsigned Opc = WebAssembly::BR_IF;
   if (Not)
@@ -1102,7 +1171,7 @@ bool WebAssemblyFastISel::selectBr(const Instruction *I) {
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc))
       .addMBB(TBB)
       .addReg(CondReg);
-  
+
   finishCondBranch(Br->getParent(), TBB, FBB);
   return true;
 }
@@ -1120,6 +1189,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   }
 
   Value *RV = Ret->getOperand(0);
+  if (!Subtarget->hasSIMD128() && RV->getType()->isVectorTy())
+    return false;
+
   unsigned Opc;
   switch (getSimpleType(RV->getType())) {
   case MVT::i1: case MVT::i8:
@@ -1129,8 +1201,24 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   case MVT::i64:
     Opc = WebAssembly::RETURN_I64;
     break;
-  case MVT::f32: Opc = WebAssembly::RETURN_F32; break;
-  case MVT::f64: Opc = WebAssembly::RETURN_F64; break;
+  case MVT::f32:
+    Opc = WebAssembly::RETURN_F32;
+    break;
+  case MVT::f64:
+    Opc = WebAssembly::RETURN_F64;
+    break;
+  case MVT::v16i8:
+    Opc = WebAssembly::RETURN_v16i8;
+    break;
+  case MVT::v8i16:
+    Opc = WebAssembly::RETURN_v8i16;
+    break;
+  case MVT::v4i32:
+    Opc = WebAssembly::RETURN_v4i32;
+    break;
+  case MVT::v4f32:
+    Opc = WebAssembly::RETURN_v4f32;
+    break;
   default: return false;
   }
 
@@ -1142,6 +1230,9 @@ bool WebAssemblyFastISel::selectRet(const Instruction *I) {
   else
     Reg = getRegForValue(RV);
 
+  if (Reg == 0)
+    return false;
+
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)).addReg(Reg);
   return true;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
index 5dc90920e310..2bbf7a2b42f9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
@@ -47,7 +47,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Fix Irreducible Control Flow";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 0a5782e5c287..a6a2c0bf06ae 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -37,15 +37,34 @@ using namespace llvm;
 // TODO: wasm64
 // TODO: Emit TargetOpcode::CFI_INSTRUCTION instructions
 
+/// We need a base pointer in the case of having items on the stack that
+/// require stricter alignment than the stack pointer itself.  Because we need
+/// to shift the stack pointer by some unknown amount to force the alignment,
+/// we need to record the value of the stack pointer on entry to the function.
+bool WebAssemblyFrameLowering::hasBP(
+    const MachineFunction &MF) const {
+  const auto *RegInfo =
+      MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
+  return RegInfo->needsStackRealignment(MF);
+}
+
 /// Return true if the specified function should have a dedicated frame pointer
 /// register.
 bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const auto *RegInfo =
-      MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
-  return MFI->isFrameAddressTaken() || MFI->hasVarSizedObjects() ||
-         MFI->hasStackMap() || MFI->hasPatchPoint() ||
-         RegInfo->needsStackRealignment(MF);
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  // When we have var-sized objects, we move the stack pointer by an unknown
+  // amount, and need to emit a frame pointer to restore the stack to where we
+  // were on function entry.
+  // If we already need a base pointer, we use that to fix up the stack pointer.
+  // If there are no fixed-size objects, we would have no use of a frame
+  // pointer, and thus should not emit one.
+  bool HasFixedSizedObjects = MFI.getStackSize() > 0;
+  bool NeedsFixedReference = !hasBP(MF) || HasFixedSizedObjects;
+
+  return MFI.isFrameAddressTaken() ||
+         (MFI.hasVarSizedObjects() && NeedsFixedReference) ||
+         MFI.hasStackMap() || MFI.hasPatchPoint();
 }
 
 /// Under normal circumstances, when a frame pointer is not required, we reserve
@@ -55,7 +74,7 @@ bool WebAssemblyFrameLowering::hasFP(const MachineFunction &MF) const {
 /// frame.
 bool WebAssemblyFrameLowering::hasReservedCallFrame(
     const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects();
+  return !MF.getFrameInfo().hasVarSizedObjects();
 }
 
 
@@ -88,18 +107,17 @@ static void writeSPToMemory(unsigned SrcReg, MachineFunction &MF,
   const TargetRegisterClass *PtrRC =
       MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
   unsigned Zero = MRI.createVirtualRegister(PtrRC);
-  unsigned Drop = MRI.createVirtualRegister(PtrRC);
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
   BuildMI(MBB, InsertAddr, DL, TII->get(WebAssembly::CONST_I32), Zero)
       .addImm(0);
-  auto *MMO = new MachineMemOperand(MachinePointerInfo(MF.getPSVManager()
-                                        .getExternalSymbolCallEntry(ES)),
-                                    MachineMemOperand::MOStore, 4, 4);
-  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32), Drop)
+  MachineMemOperand *MMO = MF.getMachineMemOperand(
+      MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+      MachineMemOperand::MOStore, 4, 4);
+  BuildMI(MBB, InsertStore, DL, TII->get(WebAssembly::STORE_I32))
+      .addImm(2)  // p2align
       .addExternalSymbol(SPSymbol)
       .addReg(Zero)
-      .addImm(2)  // p2align
       .addReg(SrcReg)
       .addMemOperand(MMO);
 }
@@ -108,11 +126,11 @@ MachineBasicBlock::iterator
 WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator I) const {
-  assert(!I->getOperand(0).getImm() && hasFP(MF) &&
+  assert(!I->getOperand(0).getImm() && (hasFP(MF) || hasBP(MF)) &&
          "Call frame pseudos should only be used for dynamic stack adjustment");
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   if (I->getOpcode() == TII->getCallFrameDestroyOpcode() &&
-      needsSPWriteback(MF, *MF.getFrameInfo())) {
+      needsSPWriteback(MF, MF.getFrameInfo())) {
     DebugLoc DL = I->getDebugLoc();
     writeSPToMemory(WebAssembly::SP32, MF, MBB, I, I, DL);
   }
@@ -122,12 +140,12 @@ WebAssemblyFrameLowering::eliminateCallFramePseudoInstr(
 void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
   // TODO: Do ".setMIFlag(MachineInstr::FrameSetup)" on emitted instructions
-  auto *MFI = MF.getFrameInfo();
-  assert(MFI->getCalleeSavedInfo().empty() &&
+  auto &MFI = MF.getFrameInfo();
+  assert(MFI.getCalleeSavedInfo().empty() &&
          "WebAssembly should not have callee-saved registers");
 
-  if (!needsSP(MF, *MFI)) return;
-  uint64_t StackSize = MFI->getStackSize();
+  if (!needsSP(MF, MFI)) return;
+  uint64_t StackSize = MFI.getStackSize();
 
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   auto &MRI = MF.getRegInfo();
@@ -138,22 +156,31 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
   const TargetRegisterClass *PtrRC =
       MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
   unsigned Zero = MRI.createVirtualRegister(PtrRC);
-  unsigned SPReg = MRI.createVirtualRegister(PtrRC);
+  unsigned SPReg = WebAssembly::SP32;
+  if (StackSize)
+    SPReg = MRI.createVirtualRegister(PtrRC);
   const char *ES = "__stack_pointer";
   auto *SPSymbol = MF.createExternalSymbolName(ES);
   BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), Zero)
       .addImm(0);
-  auto *LoadMMO = new MachineMemOperand(MachinePointerInfo(MF.getPSVManager()
-                                            .getExternalSymbolCallEntry(ES)),
-                                        MachineMemOperand::MOLoad, 4, 4);
+  MachineMemOperand *LoadMMO = MF.getMachineMemOperand(
+      MachinePointerInfo(MF.getPSVManager().getExternalSymbolCallEntry(ES)),
+      MachineMemOperand::MOLoad, 4, 4);
   // Load the SP value.
-  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32),
-          StackSize ? SPReg : (unsigned)WebAssembly::SP32)
+  BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::LOAD_I32), SPReg)
+      .addImm(2)       // p2align
       .addExternalSymbol(SPSymbol)
       .addReg(Zero)    // addr
-      .addImm(2)       // p2align
       .addMemOperand(LoadMMO);
 
+  bool HasBP = hasBP(MF);
+  if (HasBP) {
+    auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
+    unsigned BasePtr = MRI.createVirtualRegister(PtrRC);
+    FI->setBasePointerVreg(BasePtr);
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::COPY), BasePtr)
+        .addReg(SPReg);
+  }
   if (StackSize) {
     // Subtract the frame size
     unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
@@ -164,6 +191,18 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
         .addReg(SPReg)
         .addReg(OffsetReg);
   }
+  if (HasBP) {
+    unsigned BitmaskReg = MRI.createVirtualRegister(PtrRC);
+    unsigned Alignment = MFI.getMaxAlignment();
+    assert((1u << countTrailingZeros(Alignment)) == Alignment &&
+      "Alignment must be a power of 2");
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::CONST_I32), BitmaskReg)
+        .addImm((int)~(Alignment - 1));
+    BuildMI(MBB, InsertPt, DL, TII->get(WebAssembly::AND_I32),
+            WebAssembly::SP32)
+        .addReg(WebAssembly::SP32)
+        .addReg(BitmaskReg);
+  }
   if (hasFP(MF)) {
     // Unlike most conventional targets (where FP points to the saved FP),
     // FP points to the bottom of the fixed-size locals, so we can use positive
@@ -172,16 +211,16 @@ void WebAssemblyFrameLowering::emitPrologue(MachineFunction &MF,
             WebAssembly::FP32)
         .addReg(WebAssembly::SP32);
   }
-  if (StackSize && needsSPWriteback(MF, *MFI)) {
+  if (StackSize && needsSPWriteback(MF, MFI)) {
     writeSPToMemory(WebAssembly::SP32, MF, MBB, InsertPt, InsertPt, DL);
   }
 }
 
 void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
                                             MachineBasicBlock &MBB) const {
-  auto *MFI = MF.getFrameInfo();
-  uint64_t StackSize = MFI->getStackSize();
-  if (!needsSP(MF, *MFI) || !needsSPWriteback(MF, *MFI)) return;
+  auto &MFI = MF.getFrameInfo();
+  uint64_t StackSize = MFI.getStackSize();
+  if (!needsSP(MF, MFI) || !needsSPWriteback(MF, MFI)) return;
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
   auto &MRI = MF.getRegInfo();
   auto InsertPt = MBB.getFirstTerminator();
@@ -194,7 +233,10 @@ void WebAssemblyFrameLowering::emitEpilogue(MachineFunction &MF,
   // subtracted in the prolog.
   unsigned SPReg = 0;
   MachineBasicBlock::iterator InsertAddr = InsertPt;
-  if (StackSize) {
+  if (hasBP(MF)) {
+    auto FI = MF.getInfo<WebAssemblyFunctionInfo>();
+    SPReg = FI->getBasePointerVreg();
+  } else if (StackSize) {
     const TargetRegisterClass *PtrRC =
         MRI.getTargetRegisterInfo()->getPointerRegClass(MF);
     unsigned OffsetReg = MRI.createVirtualRegister(PtrRC);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
index e20fc5df7443..bf326fce88fa 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.h
@@ -46,6 +46,7 @@ class WebAssemblyFrameLowering final : public TargetFrameLowering {
   bool hasReservedCallFrame(const MachineFunction &MF) const override;
 
  private:
+  bool hasBP(const MachineFunction &MF) const;
   bool needsSP(const MachineFunction &MF, const MachineFrameInfo &MFI) const;
   bool needsSPWriteback(const MachineFunction &MF,
                         const MachineFrameInfo &MFI) const;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
index 88c38b3602b9..a67137f867e7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelDAGToDAG.cpp
@@ -42,7 +42,7 @@ public:
       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), ForCodeSize(false) {
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Instruction Selection";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 9e7731997d58..6a7f75a6b3a1 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -54,6 +54,12 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
   addRegisterClass(MVT::i64, &WebAssembly::I64RegClass);
   addRegisterClass(MVT::f32, &WebAssembly::F32RegClass);
   addRegisterClass(MVT::f64, &WebAssembly::F64RegClass);
+  if (Subtarget->hasSIMD128()) {
+    addRegisterClass(MVT::v16i8, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v8i16, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v4i32, &WebAssembly::V128RegClass);
+    addRegisterClass(MVT::v4f32, &WebAssembly::V128RegClass);
+  }
   // Compute derived properties from the register classes.
   computeRegisterProperties(Subtarget->getRegisterInfo());
 
@@ -190,6 +196,10 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
     switch (Constraint[0]) {
       case 'r':
         assert(VT != MVT::iPTR && "Pointer MVT not expected here");
+        if (Subtarget->hasSIMD128() && VT.isVector()) {
+          if (VT.getSizeInBits() == 128)
+            return std::make_pair(0U, &WebAssembly::V128RegClass);
+        }
         if (VT.isInteger() && !VT.isVector()) {
           if (VT.getSizeInBits() <= 32)
             return std::make_pair(0U, &WebAssembly::I32RegClass);
@@ -319,10 +329,10 @@ SDValue WebAssemblyTargetLowering::LowerCall(
     if (Out.Flags.isInConsecutiveRegsLast())
       fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments");
     if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
-      auto *MFI = MF.getFrameInfo();
-      int FI = MFI->CreateStackObject(Out.Flags.getByValSize(),
-                                      Out.Flags.getByValAlign(),
-                                      /*isSS=*/false);
+      auto &MFI = MF.getFrameInfo();
+      int FI = MFI.CreateStackObject(Out.Flags.getByValSize(),
+                                     Out.Flags.getByValAlign(),
+                                     /*isSS=*/false);
       SDValue SizeNode =
           DAG.getConstant(Out.Flags.getByValSize(), DL, MVT::i32);
       SDValue FINode = DAG.getFrameIndex(FI, getPointerTy(Layout));
@@ -365,9 +375,9 @@ SDValue WebAssemblyTargetLowering::LowerCall(
   if (IsVarArg && NumBytes) {
     // For non-fixed arguments, next emit stores to store the argument values
     // to the stack buffer at the offsets computed above.
-    int FI = MF.getFrameInfo()->CreateStackObject(NumBytes,
-                                                  Layout.getStackAlignment(),
-                                                  /*isSS=*/false);
+    int FI = MF.getFrameInfo().CreateStackObject(NumBytes,
+                                                 Layout.getStackAlignment(),
+                                                 /*isSS=*/false);
     unsigned ValNo = 0;
     SmallVector<SDValue, 8> Chains;
     for (SDValue Arg :
@@ -471,12 +481,12 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
-  MachineFunction &MF = DAG.getMachineFunction();
-  auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
-
   if (!CallingConvSupported(CallConv))
     fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions");
 
+  MachineFunction &MF = DAG.getMachineFunction();
+  auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
+
   // Set up the incoming ARGUMENTS value, which serves to represent the liveness
   // of the incoming values before they're represented by virtual registers.
   MF.getRegInfo().addLiveIn(WebAssembly::ARGUMENTS);
@@ -516,6 +526,13 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments(
     MFI->addParam(PtrVT);
   }
 
+  // Record the number and types of results.
+  SmallVector<MVT, 4> Params;
+  SmallVector<MVT, 4> Results;
+  ComputeSignatureVTs(*MF.getFunction(), DAG.getTarget(), Params, Results);
+  for (MVT VT : Results)
+    MFI->addResult(VT);
+
   return Chain;
 }
 
@@ -570,8 +587,8 @@ SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
     unsigned Reg = cast<RegisterSDNode>(Op.getOperand(1))->getReg();
     EVT VT = Src.getValueType();
     SDValue Copy(
-        DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_LOCAL_I32
-                                          : WebAssembly::COPY_LOCAL_I64,
+        DAG.getMachineNode(VT == MVT::i32 ? WebAssembly::COPY_I32
+                                          : WebAssembly::COPY_I64,
                            DL, VT, Src),
         0);
     return Op.getNode()->getNumValues() == 1
@@ -597,7 +614,7 @@ SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
   if (Op.getConstantOperandVal(0) > 0)
     return SDValue();
 
-  DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true);
+  DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
   EVT VT = Op.getValueType();
   unsigned FP =
       Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index cfa1519e6d99..047f4be066c0 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -26,25 +26,65 @@ def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt, i32imm:$amt2),
 } // isCodeGenOnly = 1
 
 multiclass CALL<WebAssemblyRegClass vt, string prefix> {
-  def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops),
+  def CALL_#vt : I<(outs vt:$dst), (ins function32_op:$callee, variable_ops),
                    [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))],
-                   !strconcat(prefix, "call\t$dst, $callee")>;
-  def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
-                            [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
-                            !strconcat(prefix, "call_indirect\t$dst, $callee")>;
+                   !strconcat(prefix, "call\t$dst, $callee"),
+                   0x10>;
+  let isCodeGenOnly = 1 in {
+    def PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
+                              [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
+                              "PSEUDO CALL INDIRECT\t$callee">;
+  } // isCodeGenOnly = 1
+
+  def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins i32imm:$flags, variable_ops),
+                            [],
+                            !strconcat(prefix, "call_indirect\t$dst"),
+                            0x11>;
+}
+
+multiclass SIMD_CALL<ValueType vt, string prefix> {
+  def CALL_#vt : SIMD_I<(outs V128:$dst), (ins function32_op:$callee, variable_ops),
+                         [(set (vt V128:$dst),
+                               (WebAssemblycall1 (i32 imm:$callee)))],
+                         !strconcat(prefix, "call\t$dst, $callee"),
+                         0x10>;
+  let isCodeGenOnly = 1 in {
+    def PCALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
+                                    (ins I32:$callee, variable_ops),
+                                    [(set (vt V128:$dst),
+                                          (WebAssemblycall1 I32:$callee))],
+                                    "PSEUDO CALL INDIRECT\t$callee">;
+  } // isCodeGenOnly = 1
+
+  def CALL_INDIRECT_#vt : SIMD_I<(outs V128:$dst),
+                                  (ins i32imm:$flags, variable_ops),
+                                  [],
+                                  !strconcat(prefix, "call_indirect\t$dst"),
+                                  0x11>;
 }
+
 let Uses = [SP32, SP64], isCall = 1 in {
   defm : CALL<I32, "i32.">;
   defm : CALL<I64, "i64.">;
   defm : CALL<F32, "f32.">;
   defm : CALL<F64, "f64.">;
+  defm : SIMD_CALL<v16i8, "i8x16.">;
+  defm : SIMD_CALL<v8i16, "i16x8.">;
+  defm : SIMD_CALL<v4i32, "i32x4.">;
+  defm : SIMD_CALL<v4f32, "f32x4.">;
 
-  def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops),
+  def CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
                     [(WebAssemblycall0 (i32 imm:$callee))],
-                    "call    \t$callee">;
-  def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
-                             [(WebAssemblycall0 I32:$callee)],
-                             "call_indirect\t$callee">;
+                    "call    \t$callee", 0x10>;
+  let isCodeGenOnly = 1 in {
+    def PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
+                      [(WebAssemblycall0 I32:$callee)],
+                      "PSEUDO CALL INDIRECT\t$callee">;
+  } // isCodeGenOnly = 1
+
+  def CALL_INDIRECT_VOID : I<(outs), (ins i32imm:$flags, variable_ops),
+                             [],
+                             "call_indirect\t", 0x11>;
 } // Uses = [SP32,SP64], isCall = 1
 
 } // Defs = [ARGUMENTS]
@@ -58,6 +98,14 @@ def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_F32 tglobaladdr:$callee)>;
 def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
           (CALL_F64 tglobaladdr:$callee)>;
+def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v16i8 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v8i16 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v4i32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))),
+          (CALL_v4f32 tglobaladdr:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)),
           (CALL_VOID tglobaladdr:$callee)>;
 
@@ -70,5 +118,13 @@ def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_F32 texternalsym:$callee)>;
 def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
           (CALL_F64 texternalsym:$callee)>;
+def : Pat<(v16i8 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v16i8 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v8i16 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v8i16 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v4i32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
+def : Pat<(v4f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))),
+          (CALL_v4f32 texternalsym:$callee)>, Requires<[HasSIMD128]>;
 def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)),
           (CALL_VOID texternalsym:$callee)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 444e275c6ebf..1146431e6b77 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -18,14 +18,13 @@ let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
 // The condition operand is a boolean value which WebAssembly represents as i32.
 def BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
               [(brcond I32:$cond, bb:$dst)],
-               "br_if   \t$dst, $cond">;
+               "br_if   \t$dst, $cond", 0x0d>;
 let isCodeGenOnly = 1 in
-def BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), [],
-                   "br_unless\t$dst, $cond">;
+def BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), []>;
 let isBarrier = 1 in {
 def BR   : I<(outs), (ins bb_op:$dst),
              [(br bb:$dst)],
-             "br      \t$dst">;
+             "br      \t$dst", 0x0c>;
 } // isBarrier = 1
 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
 
@@ -46,7 +45,7 @@ let Defs = [ARGUMENTS] in {
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
 def BR_TABLE_I32 : I<(outs), (ins I32:$index, variable_ops),
                      [(WebAssemblybr_table I32:$index)],
-                     "br_table \t$index"> {
+                     "br_table \t$index", 0x0e> {
   let TSFlags{0} = 1;
   let TSFlags{1} = 1;
 }
@@ -59,37 +58,57 @@ def BR_TABLE_I64 : I<(outs), (ins I64:$index, variable_ops),
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
 // Placemarkers to indicate the start or end of a block or loop scope. These
-// use/clobber EXPR_STACK to prevent them from being moved into the middle of
+// use/clobber VALUE_STACK to prevent them from being moved into the middle of
 // an expression tree.
-let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
-def BLOCK     : I<(outs), (ins), [], "block">;
-def LOOP      : I<(outs), (ins), [], "loop">;
-def END_BLOCK : I<(outs), (ins), [], "end_block">;
-def END_LOOP  : I<(outs), (ins), [], "end_loop">;
-} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
+let Uses = [VALUE_STACK], Defs = [VALUE_STACK] in {
+def BLOCK     : I<(outs), (ins Signature:$sig), [], "block   \t$sig", 0x02>;
+def LOOP      : I<(outs), (ins Signature:$sig), [], "loop    \t$sig", 0x03>;
+
+// END_BLOCK and END_LOOP are represented with the same opcode in wasm.
+def END_BLOCK : I<(outs), (ins), [], "end_block", 0x0b>;
+def END_LOOP  : I<(outs), (ins), [], "end_loop", 0x0b>;
+} // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
 
 multiclass RETURN<WebAssemblyRegClass vt> {
   def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
-                     "return  \t$val">;
+                     "return  \t$val", 0x0f>;
   // Equivalent to RETURN_#vt, for use at the end of a function when wasm
   // semantics return by falling off the end of the block.
   let isCodeGenOnly = 1 in
   def FALLTHROUGH_RETURN_#vt : I<(outs), (ins vt:$val), []>;
 }
 
+multiclass SIMD_RETURN<ValueType vt> {
+  def RETURN_#vt : SIMD_I<(outs), (ins V128:$val),
+                          [(WebAssemblyreturn (vt V128:$val))],
+                          "return  \t$val", 0x0f>;
+  // Equivalent to RETURN_#vt, for use at the end of a function when wasm
+  // semantics return by falling off the end of the block.
+  let isCodeGenOnly = 1 in
+  def FALLTHROUGH_RETURN_#vt : SIMD_I<(outs), (ins V128:$val), []>;
+}
+
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
+
 let isReturn = 1 in {
   defm : RETURN<I32>;
   defm : RETURN<I64>;
   defm : RETURN<F32>;
   defm : RETURN<F64>;
-  def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return">;
+  defm : SIMD_RETURN<v16i8>;
+  defm : SIMD_RETURN<v8i16>;
+  defm : SIMD_RETURN<v4i32>;
+  defm : SIMD_RETURN<v4f32>;
+
+  def RETURN_VOID : I<(outs), (ins), [(WebAssemblyreturn)], "return", 0x0f>;
 
   // This is to RETURN_VOID what FALLTHROUGH_RETURN_#vt is to RETURN_#vt.
   let isCodeGenOnly = 1 in
   def FALLTHROUGH_RETURN_VOID : I<(outs), (ins), []>;
 } // isReturn = 1
-  def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable">;
+
+def UNREACHABLE : I<(outs), (ins), [(trap)], "unreachable", 0x00>;
+
 } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
 
 } // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
index 931f4a913d0f..29483ba663d5 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
@@ -17,14 +17,14 @@ let Defs = [ARGUMENTS] in {
 
 def I32_WRAP_I64 : I<(outs I32:$dst), (ins I64:$src),
                       [(set I32:$dst, (trunc I64:$src))],
-                      "i32.wrap/i64\t$dst, $src">;
+                      "i32.wrap/i64\t$dst, $src", 0xa7>;
 
 def I64_EXTEND_S_I32 : I<(outs I64:$dst), (ins I32:$src),
                           [(set I64:$dst, (sext I32:$src))],
-                          "i64.extend_s/i32\t$dst, $src">;
+                          "i64.extend_s/i32\t$dst, $src", 0xac>;
 def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src),
                          [(set I64:$dst, (zext I32:$src))],
-                         "i64.extend_u/i32\t$dst, $src">;
+                         "i64.extend_u/i32\t$dst, $src", 0xad>;
 
 } // defs = [ARGUMENTS]
 
@@ -39,73 +39,73 @@ let Defs = [ARGUMENTS] in {
 let hasSideEffects = 1 in {
 def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src),
                         [(set I32:$dst, (fp_to_sint F32:$src))],
-                        "i32.trunc_s/f32\t$dst, $src">;
+                        "i32.trunc_s/f32\t$dst, $src", 0xa8>;
 def I32_TRUNC_U_F32 : I<(outs I32:$dst), (ins F32:$src),
                         [(set I32:$dst, (fp_to_uint F32:$src))],
-                        "i32.trunc_u/f32\t$dst, $src">;
+                        "i32.trunc_u/f32\t$dst, $src", 0xa9>;
 def I64_TRUNC_S_F32 : I<(outs I64:$dst), (ins F32:$src),
                         [(set I64:$dst, (fp_to_sint F32:$src))],
-                        "i64.trunc_s/f32\t$dst, $src">;
+                        "i64.trunc_s/f32\t$dst, $src", 0xae>;
 def I64_TRUNC_U_F32 : I<(outs I64:$dst), (ins F32:$src),
                         [(set I64:$dst, (fp_to_uint F32:$src))],
-                        "i64.trunc_u/f32\t$dst, $src">;
+                        "i64.trunc_u/f32\t$dst, $src", 0xaf>;
 def I32_TRUNC_S_F64 : I<(outs I32:$dst), (ins F64:$src),
                         [(set I32:$dst, (fp_to_sint F64:$src))],
-                        "i32.trunc_s/f64\t$dst, $src">;
+                        "i32.trunc_s/f64\t$dst, $src", 0xaa>;
 def I32_TRUNC_U_F64 : I<(outs I32:$dst), (ins F64:$src),
                         [(set I32:$dst, (fp_to_uint F64:$src))],
-                        "i32.trunc_u/f64\t$dst, $src">;
+                        "i32.trunc_u/f64\t$dst, $src", 0xab>;
 def I64_TRUNC_S_F64 : I<(outs I64:$dst), (ins F64:$src),
                         [(set I64:$dst, (fp_to_sint F64:$src))],
-                        "i64.trunc_s/f64\t$dst, $src">;
+                        "i64.trunc_s/f64\t$dst, $src", 0xb0>;
 def I64_TRUNC_U_F64 : I<(outs I64:$dst), (ins F64:$src),
                         [(set I64:$dst, (fp_to_uint F64:$src))],
-                        "i64.trunc_u/f64\t$dst, $src">;
+                        "i64.trunc_u/f64\t$dst, $src", 0xb1>;
 } // hasSideEffects = 1
 
 def F32_CONVERT_S_I32 : I<(outs F32:$dst), (ins I32:$src),
                           [(set F32:$dst, (sint_to_fp I32:$src))],
-                          "f32.convert_s/i32\t$dst, $src">;
+                          "f32.convert_s/i32\t$dst, $src", 0xb2>;
 def F32_CONVERT_U_I32 : I<(outs F32:$dst), (ins I32:$src),
                           [(set F32:$dst, (uint_to_fp I32:$src))],
-                          "f32.convert_u/i32\t$dst, $src">;
+                          "f32.convert_u/i32\t$dst, $src", 0xb3>;
 def F64_CONVERT_S_I32 : I<(outs F64:$dst), (ins I32:$src),
                           [(set F64:$dst, (sint_to_fp I32:$src))],
-                          "f64.convert_s/i32\t$dst, $src">;
+                          "f64.convert_s/i32\t$dst, $src", 0xb7>;
 def F64_CONVERT_U_I32 : I<(outs F64:$dst), (ins I32:$src),
                           [(set F64:$dst, (uint_to_fp I32:$src))],
-                          "f64.convert_u/i32\t$dst, $src">;
+                          "f64.convert_u/i32\t$dst, $src", 0xb8>;
 def F32_CONVERT_S_I64 : I<(outs F32:$dst), (ins I64:$src),
                           [(set F32:$dst, (sint_to_fp I64:$src))],
-                          "f32.convert_s/i64\t$dst, $src">;
+                          "f32.convert_s/i64\t$dst, $src", 0xb4>;
 def F32_CONVERT_U_I64 : I<(outs F32:$dst), (ins I64:$src),
                           [(set F32:$dst, (uint_to_fp I64:$src))],
-                          "f32.convert_u/i64\t$dst, $src">;
+                          "f32.convert_u/i64\t$dst, $src", 0xb5>;
 def F64_CONVERT_S_I64 : I<(outs F64:$dst), (ins I64:$src),
                           [(set F64:$dst, (sint_to_fp I64:$src))],
-                          "f64.convert_s/i64\t$dst, $src">;
+                          "f64.convert_s/i64\t$dst, $src", 0xb9>;
 def F64_CONVERT_U_I64 : I<(outs F64:$dst), (ins I64:$src),
                           [(set F64:$dst, (uint_to_fp I64:$src))],
-                          "f64.convert_u/i64\t$dst, $src">;
+                          "f64.convert_u/i64\t$dst, $src", 0xba>;
 
 def F64_PROMOTE_F32 : I<(outs F64:$dst), (ins F32:$src),
-                        [(set F64:$dst, (fextend F32:$src))],
-                        "f64.promote/f32\t$dst, $src">;
+                        [(set F64:$dst, (fpextend F32:$src))],
+                        "f64.promote/f32\t$dst, $src", 0xbb>;
 def F32_DEMOTE_F64 : I<(outs F32:$dst), (ins F64:$src),
-                       [(set F32:$dst, (fround F64:$src))],
-                       "f32.demote/f64\t$dst, $src">;
+                       [(set F32:$dst, (fpround F64:$src))],
+                       "f32.demote/f64\t$dst, $src", 0xb6>;
 
 def I32_REINTERPRET_F32 : I<(outs I32:$dst), (ins F32:$src),
                             [(set I32:$dst, (bitconvert F32:$src))],
-                            "i32.reinterpret/f32\t$dst, $src">;
+                            "i32.reinterpret/f32\t$dst, $src", 0xbc>;
 def F32_REINTERPRET_I32 : I<(outs F32:$dst), (ins I32:$src),
                             [(set F32:$dst, (bitconvert I32:$src))],
-                            "f32.reinterpret/i32\t$dst, $src">;
+                            "f32.reinterpret/i32\t$dst, $src", 0xbe>;
 def I64_REINTERPRET_F64 : I<(outs I64:$dst), (ins F64:$src),
                             [(set I64:$dst, (bitconvert F64:$src))],
-                            "i64.reinterpret/f64\t$dst, $src">;
+                            "i64.reinterpret/f64\t$dst, $src", 0xbd>;
 def F64_REINTERPRET_I64 : I<(outs F64:$dst), (ins I64:$src),
                             [(set F64:$dst, (bitconvert I64:$src))],
-                            "f64.reinterpret/i64\t$dst, $src">;
+                            "f64.reinterpret/i64\t$dst, $src", 0xbf>;
 
 } // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
index 64569720375c..030be0862a56 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFloat.td
@@ -15,26 +15,26 @@
 let Defs = [ARGUMENTS] in {
 
 let isCommutable = 1 in
-defm ADD : BinaryFP<fadd, "add ">;
-defm SUB : BinaryFP<fsub, "sub ">;
+defm ADD : BinaryFP<fadd, "add ", 0x92, 0xa0>;
+defm SUB : BinaryFP<fsub, "sub ", 0x93, 0xa1>;
 let isCommutable = 1 in
-defm MUL : BinaryFP<fmul, "mul ">;
-defm DIV : BinaryFP<fdiv, "div ">;
-defm SQRT : UnaryFP<fsqrt, "sqrt">;
+defm MUL : BinaryFP<fmul, "mul ", 0x94, 0xa2>;
+defm DIV : BinaryFP<fdiv, "div ", 0x95, 0xa3>;
+defm SQRT : UnaryFP<fsqrt, "sqrt", 0x91, 0x9f>;
 
-defm ABS : UnaryFP<fabs, "abs ">;
-defm NEG : UnaryFP<fneg, "neg ">;
-defm COPYSIGN : BinaryFP<fcopysign, "copysign">;
+defm ABS : UnaryFP<fabs, "abs ", 0x8b, 0x99>;
+defm NEG : UnaryFP<fneg, "neg ", 0x8c, 0x9a>;
+defm COPYSIGN : BinaryFP<fcopysign, "copysign", 0x98, 0xa6>;
 
 let isCommutable = 1 in {
-defm MIN : BinaryFP<fminnan, "min ">;
-defm MAX : BinaryFP<fmaxnan, "max ">;
+defm MIN : BinaryFP<fminnan, "min ", 0x96, 0xa4>;
+defm MAX : BinaryFP<fmaxnan, "max ", 0x97, 0xa5>;
 } // isCommutable = 1
 
-defm CEIL : UnaryFP<fceil, "ceil">;
-defm FLOOR : UnaryFP<ffloor, "floor">;
-defm TRUNC : UnaryFP<ftrunc, "trunc">;
-defm NEAREST : UnaryFP<fnearbyint, "nearest">;
+defm CEIL : UnaryFP<fceil, "ceil", 0x8d, 0x9b>;
+defm FLOOR : UnaryFP<ffloor, "floor", 0x8e, 0x9c>;
+defm TRUNC : UnaryFP<ftrunc, "trunc", 0x8f, 0x9d>;
+defm NEAREST : UnaryFP<fnearbyint, "nearest", 0x90, 0x9e>;
 
 } // Defs = [ARGUMENTS]
 
@@ -51,13 +51,13 @@ def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>;
 let Defs = [ARGUMENTS] in {
 
 let isCommutable = 1 in {
-defm EQ : ComparisonFP<SETOEQ, "eq  ">;
-defm NE : ComparisonFP<SETUNE, "ne  ">;
+defm EQ : ComparisonFP<SETOEQ, "eq  ", 0x5b, 0x61>;
+defm NE : ComparisonFP<SETUNE, "ne  ", 0x5c, 0x62>;
 } // isCommutable = 1
-defm LT : ComparisonFP<SETOLT, "lt  ">;
-defm LE : ComparisonFP<SETOLE, "le  ">;
-defm GT : ComparisonFP<SETOGT, "gt  ">;
-defm GE : ComparisonFP<SETOGE, "ge  ">;
+defm LT : ComparisonFP<SETOLT, "lt  ", 0x5d, 0x63>;
+defm LE : ComparisonFP<SETOLE, "le  ", 0x5e, 0x64>;
+defm GT : ComparisonFP<SETOGT, "gt  ", 0x5f, 0x65>;
+defm GE : ComparisonFP<SETOGE, "ge  ", 0x60, 0x66>;
 
 } // Defs = [ARGUMENTS]
 
@@ -79,10 +79,10 @@ let Defs = [ARGUMENTS] in {
 
 def SELECT_F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs, I32:$cond),
                    [(set F32:$dst, (select I32:$cond, F32:$lhs, F32:$rhs))],
-                   "f32.select\t$dst, $lhs, $rhs, $cond">;
+                   "f32.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 def SELECT_F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs, I32:$cond),
                    [(set F64:$dst, (select I32:$cond, F64:$lhs, F64:$rhs))],
-                   "f64.select\t$dst, $lhs, $rhs, $cond">;
+                   "f64.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 
 } // Defs = [ARGUMENTS]
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
index 8008dd32353a..5b2498402571 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrFormats.td
@@ -13,67 +13,90 @@
 //===----------------------------------------------------------------------===//
 
 // WebAssembly Instruction Format.
-class WebAssemblyInst<string asmstr> : Instruction {
-  field bits<0> Inst; // Instruction encoding.
+class WebAssemblyInst<bits<32> inst, string asmstr> : Instruction {
+  field bits<32> Inst = inst; // Instruction encoding.
   let Namespace   = "WebAssembly";
   let Pattern     = [];
   let AsmString   = asmstr;
 }
 
 // Normal instructions.
-class I<dag oops, dag iops, list<dag> pattern, string asmstr = "">
-    : WebAssemblyInst<asmstr> {
+class I<dag oops, dag iops, list<dag> pattern, string asmstr = "", bits<32> inst = -1>
+    : WebAssemblyInst<inst, asmstr> {
   dag OutOperandList = oops;
   dag InOperandList  = iops;
   let Pattern        = pattern;
 }
 
+class SIMD_I<dag oops, dag iops, list<dag> pattern,
+             string asmstr = "", bits<32> inst = -1>
+    : I<oops, iops, pattern, asmstr, inst>, Requires<[HasSIMD128]>;
+
 // Unary and binary instructions, for the local types that WebAssembly supports.
-multiclass UnaryInt<SDNode node, string name> {
+multiclass UnaryInt<SDNode node, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$src),
                [(set I32:$dst, (node I32:$src))],
-               !strconcat("i32.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("i32.", !strconcat(name, "\t$dst, $src")), i32Inst>;
   def _I64 : I<(outs I64:$dst), (ins I64:$src),
                [(set I64:$dst, (node I64:$src))],
-               !strconcat("i64.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("i64.", !strconcat(name, "\t$dst, $src")), i64Inst>;
 }
-multiclass BinaryInt<SDNode node, string name> {
+multiclass BinaryInt<SDNode node, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
                [(set I32:$dst, (node I32:$lhs, I32:$rhs))],
-               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")), i32Inst>;
   def _I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs),
                [(set I64:$dst, (node I64:$lhs, I64:$rhs))],
-               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")), i64Inst>;
 }
-multiclass UnaryFP<SDNode node, string name> {
+multiclass UnaryFP<SDNode node, string name, bits<32> f32Inst, bits<32> f64Inst> {
   def _F32 : I<(outs F32:$dst), (ins F32:$src),
                [(set F32:$dst, (node F32:$src))],
-               !strconcat("f32.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("f32.", !strconcat(name, "\t$dst, $src")), f32Inst>;
   def _F64 : I<(outs F64:$dst), (ins F64:$src),
                [(set F64:$dst, (node F64:$src))],
-               !strconcat("f64.", !strconcat(name, "\t$dst, $src"))>;
+               !strconcat("f64.", !strconcat(name, "\t$dst, $src")), f64Inst>;
 }
-multiclass BinaryFP<SDNode node, string name> {
+multiclass BinaryFP<SDNode node, string name, bits<32> f32Inst, bits<32> f64Inst> {
   def _F32 : I<(outs F32:$dst), (ins F32:$lhs, F32:$rhs),
                [(set F32:$dst, (node F32:$lhs, F32:$rhs))],
-               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")), f32Inst>;
   def _F64 : I<(outs F64:$dst), (ins F64:$lhs, F64:$rhs),
                [(set F64:$dst, (node F64:$lhs, F64:$rhs))],
-               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")), f64Inst>;
+}
+multiclass SIMDBinary<SDNode node, SDNode fnode, string name> {
+  def _I8x16 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v16i8 V128:$dst), (node V128:$lhs, V128:$rhs))],
+                      !strconcat("i8x16.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _I16x8 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v8i16 V128:$dst), (node V128:$lhs, V128:$rhs))],
+                      !strconcat("i16x8.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _I32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v4i32 V128:$dst), (node V128:$lhs, V128:$rhs))],
+                      !strconcat("i32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+  def _F32x4 : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs),
+                      [(set (v4f32 V128:$dst), (fnode V128:$lhs, V128:$rhs))],
+                      !strconcat("f32x4.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+
 }
-multiclass ComparisonInt<CondCode cond, string name> {
+multiclass ComparisonInt<CondCode cond, string name, bits<32> i32Inst, bits<32> i64Inst> {
   def _I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs),
                [(set I32:$dst, (setcc I32:$lhs, I32:$rhs, cond))],
-               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               i32Inst>;
   def _I64 : I<(outs I32:$dst), (ins I64:$lhs, I64:$rhs),
                [(set I32:$dst, (setcc I64:$lhs, I64:$rhs, cond))],
-               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("i64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               i64Inst>;
 }
-multiclass ComparisonFP<CondCode cond, string name> {
+multiclass ComparisonFP<CondCode cond, string name, bits<32> f32Inst, bits<32> f64Inst> {
   def _F32 : I<(outs I32:$dst), (ins F32:$lhs, F32:$rhs),
                [(set I32:$dst, (setcc F32:$lhs, F32:$rhs, cond))],
-               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f32.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               f32Inst>;
   def _F64 : I<(outs I32:$dst), (ins F64:$lhs, F64:$rhs),
                [(set I32:$dst, (setcc F64:$lhs, F64:$rhs, cond))],
-               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs"))>;
+               !strconcat("f64.", !strconcat(name, "\t$dst, $lhs, $rhs")),
+               f64Inst>;
 }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 2fd3eab99d78..0e2d8bbaf64c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -60,19 +60,19 @@ void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
           ? MRI.getRegClass(DestReg)
           : MRI.getTargetRegisterInfo()->getMinimalPhysRegClass(DestReg);
 
-  unsigned CopyLocalOpcode;
+  unsigned CopyOpcode;
   if (RC == &WebAssembly::I32RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_I32;
+    CopyOpcode = WebAssembly::COPY_I32;
   else if (RC == &WebAssembly::I64RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_I64;
+    CopyOpcode = WebAssembly::COPY_I64;
   else if (RC == &WebAssembly::F32RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_F32;
+    CopyOpcode = WebAssembly::COPY_F32;
   else if (RC == &WebAssembly::F64RegClass)
-    CopyLocalOpcode = WebAssembly::COPY_LOCAL_F64;
+    CopyOpcode = WebAssembly::COPY_F64;
   else
     llvm_unreachable("Unexpected register class");
 
-  BuildMI(MBB, I, DL, get(CopyLocalOpcode), DestReg)
+  BuildMI(MBB, I, DL, get(CopyOpcode), DestReg)
       .addReg(SrcReg, KillSrc ? RegState::Kill : 0);
 }
 
@@ -142,7 +142,10 @@ bool WebAssemblyInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return false;
 }
 
-unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned WebAssemblyInstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                            int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::instr_iterator I = MBB.instr_end();
   unsigned Count = 0;
 
@@ -161,11 +164,14 @@ unsigned WebAssemblyInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return Count;
 }
 
-unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned WebAssemblyInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                             MachineBasicBlock *TBB,
                                             MachineBasicBlock *FBB,
                                             ArrayRef<MachineOperand> Cond,
-                                            const DebugLoc &DL) const {
+                                            const DebugLoc &DL,
+                                            int *BytesAdded) const {
+  assert(!BytesAdded && "code size not handled");
+
   if (Cond.empty()) {
     if (!TBB)
       return 0;
@@ -190,7 +196,7 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB,
   return 2;
 }
 
-bool WebAssemblyInstrInfo::ReverseBranchCondition(
+bool WebAssemblyInstrInfo::reverseBranchCondition(
     SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 2 && "Expected a flag and a successor block");
   Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm());
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
index d93f958ca4cd..df6c937a364b 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.h
@@ -48,12 +48,14 @@ public:
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify = false) const override;
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 };
 
 } // end namespace llvm
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 4b319871cf16..dcfd1a42c6aa 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -71,18 +71,39 @@ let OperandNamespace = "WebAssembly" in {
 let OperandType = "OPERAND_BASIC_BLOCK" in
 def bb_op : Operand<OtherVT>;
 
-let OperandType = "OPERAND_FP32IMM" in
+let OperandType = "OPERAND_LOCAL" in
+def local_op : Operand<i32>;
+
+let OperandType = "OPERAND_I32IMM" in
+def i32imm_op : Operand<i32>;
+
+let OperandType = "OPERAND_I64IMM" in
+def i64imm_op : Operand<i64>;
+
+let OperandType = "OPERAND_F32IMM" in
 def f32imm_op : Operand<f32>;
 
-let OperandType = "OPERAND_FP64IMM" in
+let OperandType = "OPERAND_F64IMM" in
 def f64imm_op : Operand<f64>;
 
+let OperandType = "OPERAND_FUNCTION32" in
+def function32_op : Operand<i32>;
+
+let OperandType = "OPERAND_OFFSET32" in
+def offset32_op : Operand<i32>;
+
 let OperandType = "OPERAND_P2ALIGN" in {
 def P2Align : Operand<i32> {
   let PrintMethod = "printWebAssemblyP2AlignOperand";
 }
 } // OperandType = "OPERAND_P2ALIGN"
 
+let OperandType = "OPERAND_SIGNATURE" in {
+def Signature : Operand<i32> {
+  let PrintMethod = "printWebAssemblySignatureOperand";
+}
+} // OperandType = "OPERAND_SIGNATURE"
+
 } // OperandNamespace = "WebAssembly"
 
 //===----------------------------------------------------------------------===//
@@ -100,10 +121,20 @@ multiclass ARGUMENT<WebAssemblyRegClass vt> {
   def ARGUMENT_#vt : I<(outs vt:$res), (ins i32imm:$argno),
                        [(set vt:$res, (WebAssemblyargument timm:$argno))]>;
 }
+multiclass SIMD_ARGUMENT<ValueType vt> {
+  let hasSideEffects = 1, Uses = [ARGUMENTS], isCodeGenOnly = 1 in
+  def ARGUMENT_#vt : SIMD_I<(outs V128:$res), (ins i32imm:$argno),
+                            [(set (vt V128:$res),
+                                  (WebAssemblyargument timm:$argno))]>;
+}
 defm : ARGUMENT<I32>;
 defm : ARGUMENT<I64>;
 defm : ARGUMENT<F32>;
 defm : ARGUMENT<F64>;
+defm : SIMD_ARGUMENT<v16i8>;
+defm : SIMD_ARGUMENT<v8i16>;
+defm : SIMD_ARGUMENT<v4i32>;
+defm : SIMD_ARGUMENT<v4f32>;
 
 let Defs = [ARGUMENTS] in {
 
@@ -111,40 +142,63 @@ let Defs = [ARGUMENTS] in {
 // are implied by virtual register uses and defs.
 multiclass LOCAL<WebAssemblyRegClass vt> {
 let hasSideEffects = 0 in {
-  // COPY_LOCAL is not an actual instruction in wasm, but since we allow
-  // get_local and set_local to be implicit, we can have a COPY_LOCAL which
-  // is actually a no-op because all the work is done in the implied
-  // get_local and set_local.
-  let isAsCheapAsAMove = 1 in
-  def COPY_LOCAL_#vt : I<(outs vt:$res), (ins vt:$src), [],
-                         "copy_local\t$res, $src">;
-
-  // TEE_LOCAL is similar to COPY_LOCAL, but writes two copies of its result.
-  // Typically this would be used to stackify one result and write the other
-  // result to a local.
-  let isAsCheapAsAMove = 1 in
-  def TEE_LOCAL_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), [],
-                        "tee_local\t$res, $also, $src">;
+  // COPY is not an actual instruction in wasm, but since we allow get_local and
+  // set_local to be implicit during most of codegen, we can have a COPY which
+  // is actually a no-op because all the work is done in the implied get_local
+  // and set_local. COPYs are eliminated (and replaced with
+  // get_local/set_local) in the ExplicitLocals pass.
+  let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
+  def COPY_#vt : I<(outs vt:$res), (ins vt:$src), [], "copy_local\t$res, $src">;
+
+  // TEE is similar to COPY, but writes two copies of its result. Typically
+  // this would be used to stackify one result and write the other result to a
+  // local.
+  let isAsCheapAsAMove = 1, isCodeGenOnly = 1 in
+  def TEE_#vt : I<(outs vt:$res, vt:$also), (ins vt:$src), [],
+                  "tee_local\t$res, $also, $src">;
+
+  // This is the actual get_local instruction in wasm. These are made explicit
+  // by the ExplicitLocals pass. It has mayLoad because it reads from a wasm
+  // local, which is a side effect not otherwise modeled in LLVM.
+  let mayLoad = 1, isAsCheapAsAMove = 1 in
+  def GET_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local), [],
+                        "get_local\t$res, $local", 0x20>;
+
+  // This is the actual set_local instruction in wasm. These are made explicit
+  // by the ExplicitLocals pass. It has mayStore because it writes to a wasm
+  // local, which is a side effect not otherwise modeled in LLVM.
+  let mayStore = 1, isAsCheapAsAMove = 1 in
+  def SET_LOCAL_#vt : I<(outs), (ins local_op:$local, vt:$src), [],
+                        "set_local\t$local, $src", 0x21>;
+
+  // This is the actual tee_local instruction in wasm. TEEs are turned into
+  // TEE_LOCALs by the ExplicitLocals pass. It has mayStore for the same reason
+  // as SET_LOCAL.
+  let mayStore = 1, isAsCheapAsAMove = 1 in
+  def TEE_LOCAL_#vt : I<(outs vt:$res), (ins local_op:$local, vt:$src), [],
+                         "tee_local\t$res, $local, $src", 0x22>;
+
 } // hasSideEffects = 0
 }
 defm : LOCAL<I32>;
 defm : LOCAL<I64>;
 defm : LOCAL<F32>;
 defm : LOCAL<F64>;
+defm : LOCAL<V128>, Requires<[HasSIMD128]>;
 
 let isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1 in {
-def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
+def CONST_I32 : I<(outs I32:$res), (ins i32imm_op:$imm),
                   [(set I32:$res, imm:$imm)],
-                  "i32.const\t$res, $imm">;
-def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
+                  "i32.const\t$res, $imm", 0x41>;
+def CONST_I64 : I<(outs I64:$res), (ins i64imm_op:$imm),
                   [(set I64:$res, imm:$imm)],
-                  "i64.const\t$res, $imm">;
+                  "i64.const\t$res, $imm", 0x42>;
 def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm),
                   [(set F32:$res, fpimm:$imm)],
-                  "f32.const\t$res, $imm">;
+                  "f32.const\t$res, $imm", 0x43>;
 def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
                   [(set F64:$res, fpimm:$imm)],
-                  "f64.const\t$res, $imm">;
+                  "f64.const\t$res, $imm", 0x44>;
 } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
 
 } // Defs = [ARGUMENTS]
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
index 7eaa57bb217e..8a3248ee669e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrInteger.td
@@ -17,51 +17,51 @@ let Defs = [ARGUMENTS] in {
 // The spaces after the names are for aesthetic purposes only, to make
 // operands line up vertically after tab expansion.
 let isCommutable = 1 in
-defm ADD : BinaryInt<add, "add ">;
-defm SUB : BinaryInt<sub, "sub ">;
+defm ADD : BinaryInt<add, "add ", 0x6a, 0x7c>;
+defm SUB : BinaryInt<sub, "sub ", 0x6b, 0x7d>;
 let isCommutable = 1 in
-defm MUL : BinaryInt<mul, "mul ">;
+defm MUL : BinaryInt<mul, "mul ", 0x6c, 0x7e>;
 // Divide and remainder trap on a zero denominator.
 let hasSideEffects = 1 in {
-defm DIV_S : BinaryInt<sdiv, "div_s">;
-defm DIV_U : BinaryInt<udiv, "div_u">;
-defm REM_S : BinaryInt<srem, "rem_s">;
-defm REM_U : BinaryInt<urem, "rem_u">;
+defm DIV_S : BinaryInt<sdiv, "div_s", 0x6d, 0x7f>;
+defm DIV_U : BinaryInt<udiv, "div_u", 0x6e, 0x80>;
+defm REM_S : BinaryInt<srem, "rem_s", 0x6f, 0x81>;
+defm REM_U : BinaryInt<urem, "rem_u", 0x70, 0x82>;
 } // hasSideEffects = 1
 let isCommutable = 1 in {
-defm AND : BinaryInt<and, "and ">;
-defm OR : BinaryInt<or, "or  ">;
-defm XOR : BinaryInt<xor, "xor ">;
+defm AND : BinaryInt<and, "and ", 0x71, 0x83>;
+defm OR : BinaryInt<or, "or  ", 0x72, 0x84>;
+defm XOR : BinaryInt<xor, "xor ", 0x73, 0x85>;
 } // isCommutable = 1
-defm SHL : BinaryInt<shl, "shl ">;
-defm SHR_U : BinaryInt<srl, "shr_u">;
-defm SHR_S : BinaryInt<sra, "shr_s">;
-defm ROTL : BinaryInt<rotl, "rotl">;
-defm ROTR : BinaryInt<rotr, "rotr">;
+defm SHL : BinaryInt<shl, "shl ", 0x74, 0x86>;
+defm SHR_S : BinaryInt<sra, "shr_s", 0x75, 0x87>;
+defm SHR_U : BinaryInt<srl, "shr_u", 0x76, 0x88>;
+defm ROTL : BinaryInt<rotl, "rotl", 0x77, 0x89>;
+defm ROTR : BinaryInt<rotr, "rotr", 0x78, 0x8a>;
 
 let isCommutable = 1 in {
-defm EQ : ComparisonInt<SETEQ, "eq  ">;
-defm NE : ComparisonInt<SETNE, "ne  ">;
+defm EQ : ComparisonInt<SETEQ, "eq  ", 0x46, 0x68>;
+defm NE : ComparisonInt<SETNE, "ne  ", 0x47, 0x69>;
 } // isCommutable = 1
-defm LT_S : ComparisonInt<SETLT, "lt_s">;
-defm LE_S : ComparisonInt<SETLE, "le_s">;
-defm LT_U : ComparisonInt<SETULT, "lt_u">;
-defm LE_U : ComparisonInt<SETULE, "le_u">;
-defm GT_S : ComparisonInt<SETGT, "gt_s">;
-defm GE_S : ComparisonInt<SETGE, "ge_s">;
-defm GT_U : ComparisonInt<SETUGT, "gt_u">;
-defm GE_U : ComparisonInt<SETUGE, "ge_u">;
+defm LT_S : ComparisonInt<SETLT,  "lt_s", 0x48, 0x53>;
+defm LT_U : ComparisonInt<SETULT, "lt_u", 0x49, 0x54>;
+defm GT_S : ComparisonInt<SETGT,  "gt_s", 0x4a, 0x55>;
+defm GT_U : ComparisonInt<SETUGT, "gt_u", 0x4b, 0x56>;
+defm LE_S : ComparisonInt<SETLE,  "le_s", 0x4c, 0x57>;
+defm LE_U : ComparisonInt<SETULE, "le_u", 0x4d, 0x58>;
+defm GE_S : ComparisonInt<SETGE,  "ge_s", 0x4e, 0x59>;
+defm GE_U : ComparisonInt<SETUGE, "ge_u", 0x4f, 0x5a>;
 
-defm CLZ : UnaryInt<ctlz, "clz ">;
-defm CTZ : UnaryInt<cttz, "ctz ">;
-defm POPCNT : UnaryInt<ctpop, "popcnt">;
+defm CLZ : UnaryInt<ctlz, "clz ", 0x67, 0x79>;
+defm CTZ : UnaryInt<cttz, "ctz ", 0x68, 0x7a>;
+defm POPCNT : UnaryInt<ctpop, "popcnt", 0x69, 0x7b>;
 
 def EQZ_I32 : I<(outs I32:$dst), (ins I32:$src),
                 [(set I32:$dst, (setcc I32:$src, 0, SETEQ))],
-                "i32.eqz \t$dst, $src">;
+                "i32.eqz \t$dst, $src", 0x45>;
 def EQZ_I64 : I<(outs I32:$dst), (ins I64:$src),
                 [(set I32:$dst, (setcc I64:$src, 0, SETEQ))],
-                "i64.eqz \t$dst, $src">;
+                "i64.eqz \t$dst, $src", 0x50>;
 
 } // Defs = [ARGUMENTS]
 
@@ -75,10 +75,10 @@ let Defs = [ARGUMENTS] in {
 
 def SELECT_I32 : I<(outs I32:$dst), (ins I32:$lhs, I32:$rhs, I32:$cond),
                    [(set I32:$dst, (select I32:$cond, I32:$lhs, I32:$rhs))],
-                   "i32.select\t$dst, $lhs, $rhs, $cond">;
+                   "i32.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 def SELECT_I64 : I<(outs I64:$dst), (ins I64:$lhs, I64:$rhs, I32:$cond),
                    [(set I64:$dst, (select I32:$cond, I64:$lhs, I64:$rhs))],
-                   "i64.select\t$dst, $lhs, $rhs, $cond">;
+                   "i64.select\t$dst, $lhs, $rhs, $cond", 0x1b>;
 
 } // Defs = [ARGUMENTS]
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 521c664ca4ab..b606ebb0a68d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -41,15 +41,13 @@ def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{
 }]>;
 
 // GlobalAddresses are conceptually unsigned values, so we can also fold them
-// into immediate values as long as their offsets are non-negative.
+// into immediate values as long as the add is 'nuw'.
+// TODO: We'd like to also match GA offsets but there are cases where the
+// register can have a negative value. Find out what more we can do.
 def regPlusGA : PatFrag<(ops node:$addr, node:$off),
                         (add node:$addr, node:$off),
                         [{
-  return N->getFlags()->hasNoUnsignedWrap() ||
-         (N->getOperand(1)->getOpcode() == WebAssemblyISD::Wrapper &&
-          isa<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) &&
-          cast<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0))
-             ->getOffset() >= 0);
+  return N->getFlags()->hasNoUnsignedWrap();
 }]>;
 
 // We don't need a regPlusES because external symbols never have constant
@@ -58,636 +56,631 @@ def regPlusGA : PatFrag<(ops node:$addr, node:$off),
 let Defs = [ARGUMENTS] in {
 
 // Basic load.
-def LOAD_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "i32.load\t$dst, ${off}(${addr})${p2align}">;
-def LOAD_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "i64.load\t$dst, ${off}(${addr})${p2align}">;
-def LOAD_F32 : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "f32.load\t$dst, ${off}(${addr})${p2align}">;
-def LOAD_F64 : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
-                                   P2Align:$p2align), [],
-                 "f64.load\t$dst, ${off}(${addr})${p2align}">;
+// FIXME: When we can break syntax compatibility, reorder the fields in the
+// asmstrings to match the binary encoding.
+def LOAD_I32 : I<(outs I32:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "i32.load\t$dst, ${off}(${addr})${p2align}", 0x28>;
+def LOAD_I64 : I<(outs I64:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "i64.load\t$dst, ${off}(${addr})${p2align}", 0x29>;
+def LOAD_F32 : I<(outs F32:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "f32.load\t$dst, ${off}(${addr})${p2align}", 0x2a>;
+def LOAD_F64 : I<(outs F64:$dst),
+                 (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                 [], "f64.load\t$dst, ${off}(${addr})${p2align}", 0x2b>;
 
 } // Defs = [ARGUMENTS]
 
 // Select loads with no constant offset.
-def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr, 0)>;
-def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr, 0)>;
-def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr, 0)>;
-def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr, 0)>;
+def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, 0, $addr)>;
+def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, 0, $addr)>;
+def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, 0, $addr)>;
+def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, 0, $addr)>;
 
 // Select loads with a constant offset.
 def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr, 0)>;
+          (LOAD_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr, 0)>;
+          (LOAD_I64 0, imm:$off, $addr)>;
 def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr, 0)>;
+          (LOAD_F32 0, imm:$off, $addr)>;
 def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr, 0)>;
+          (LOAD_F64 0, imm:$off, $addr)>;
 def : Pat<(i32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I32 imm:$off, $addr, 0)>;
+          (LOAD_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_I64 imm:$off, $addr, 0)>;
+          (LOAD_I64 0, imm:$off, $addr)>;
 def : Pat<(f32 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F32 imm:$off, $addr, 0)>;
+          (LOAD_F32 0, imm:$off, $addr)>;
 def : Pat<(f64 (load (or_is_add I32:$addr, imm:$off))),
-          (LOAD_F64 imm:$off, $addr, 0)>;
+          (LOAD_F64 0, imm:$off, $addr)>;
 def : Pat<(i32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(f32 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_F32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(f64 (load (regPlusGA I32:$addr,
                                 (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD_F64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD_F64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F32 texternalsym:$off, $addr, 0)>;
+          (LOAD_F32 0, texternalsym:$off, $addr)>;
 def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD_F64 texternalsym:$off, $addr, 0)>;
+          (LOAD_F64 0, texternalsym:$off, $addr)>;
 
 // Select loads with just a constant offset.
-def : Pat<(i32 (load imm:$off)), (LOAD_I32 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(i64 (load imm:$off)), (LOAD_I64 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(f32 (load imm:$off)), (LOAD_F32 imm:$off, (CONST_I32 0), 0)>;
-def : Pat<(f64 (load imm:$off)), (LOAD_F64 imm:$off, (CONST_I32 0), 0)>;
+def : Pat<(i32 (load imm:$off)), (LOAD_I32 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(i64 (load imm:$off)), (LOAD_I64 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(f32 (load imm:$off)), (LOAD_F32 0, imm:$off, (CONST_I32 0))>;
+def : Pat<(f64 (load imm:$off)), (LOAD_F64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(f32 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_F32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(f64 (load (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD_F64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD_F64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(f32 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_F32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(f64 (load (WebAssemblywrapper texternalsym:$off))),
-          (LOAD_F64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD_F64 0, texternalsym:$off, (CONST_I32 0))>;
 
 let Defs = [ARGUMENTS] in {
 
 // Extending load.
-def LOAD8_S_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load8_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD8_U_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load8_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_S_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load16_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_U_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i32.load16_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD8_S_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load8_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD8_U_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load8_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load16_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD16_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load16_u\t$dst, ${off}(${addr})${p2align}">;
-def LOAD32_S_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load32_s\t$dst, ${off}(${addr})${p2align}">;
-def LOAD32_U_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                       P2Align:$p2align), [],
-                     "i64.load32_u\t$dst, ${off}(${addr})${p2align}">;
+def LOAD8_S_I32  : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load8_s\t$dst, ${off}(${addr})${p2align}", 0x2c>;
+def LOAD8_U_I32  : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load8_u\t$dst, ${off}(${addr})${p2align}", 0x2d>;
+def LOAD16_S_I32 : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load16_s\t$dst, ${off}(${addr})${p2align}", 0x2e>;
+def LOAD16_U_I32 : I<(outs I32:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i32.load16_u\t$dst, ${off}(${addr})${p2align}", 0x2f>;
+def LOAD8_S_I64  : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load8_s\t$dst, ${off}(${addr})${p2align}", 0x30>;
+def LOAD8_U_I64  : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load8_u\t$dst, ${off}(${addr})${p2align}", 0x31>;
+def LOAD16_S_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load16_s\t$dst, ${off}(${addr})${p2align}", 0x32>;
+def LOAD16_U_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load16_u\t$dst, ${off}(${addr})${p2align}", 0x33>;
+def LOAD32_S_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load32_s\t$dst, ${off}(${addr})${p2align}", 0x34>;
+def LOAD32_U_I64 : I<(outs I64:$dst),
+                     (ins P2Align:$p2align, offset32_op:$off, I32:$addr),
+                     [], "i64.load32_u\t$dst, ${off}(${addr})${p2align}", 0x35>;
 
 } // Defs = [ARGUMENTS]
 
 // Select extending loads with no constant offset.
-def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr, 0)>;
-def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr, 0)>;
-def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr, 0)>;
-def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr, 0)>;
-def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
+def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, 0, $addr)>;
+def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, 0, $addr)>;
+def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, 0, $addr)>;
+def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, 0, $addr)>;
+def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
 
 // Select extending loads with a constant offset.
 def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I32 imm:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I32 imm:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_S_I64 imm:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_S_I64 imm:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_S_I64 imm:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_S_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
                                        (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_S_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_S_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi8 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi16 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (sextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_S_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_S_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (zextloadi32 (add I32:$addr,
                                  (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
 
 // Select extending loads with just a constant offset.
 def : Pat<(i32 (sextloadi8 imm:$off)),
-          (LOAD8_S_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 imm:$off)),
-          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 imm:$off)),
-          (LOAD16_S_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 imm:$off)),
-          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 imm:$off)),
-          (LOAD8_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 imm:$off)),
-          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 imm:$off)),
-          (LOAD16_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 imm:$off)),
-          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 imm:$off)),
-          (LOAD32_S_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 imm:$off)),
-          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_S_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (sextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_S_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD32_S_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (zextloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 
 // Resolve "don't care" extending loads to zero-extending loads. This is
 // somewhat arbitrary, but zero-extending is conceptually simpler.
 
 // Select "don't care" extending loads with no constant offset.
-def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, $addr, 0)>;
-def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr, 0)>;
-def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr, 0)>;
-def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr, 0)>;
+def : Pat<(i32 (extloadi8 I32:$addr)),  (LOAD8_U_I32 0, 0, $addr)>;
+def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, 0, $addr)>;
+def : Pat<(i64 (extloadi8 I32:$addr)),  (LOAD8_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, 0, $addr)>;
+def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, 0, $addr)>;
 
 // Select "don't care" extending loads with a constant offset.
 def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I32 imm:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I32 imm:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (or_is_add I32:$addr, imm:$off))),
-          (LOAD8_U_I64 imm:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (or_is_add I32:$addr, imm:$off))),
-          (LOAD16_U_I64 imm:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (or_is_add I32:$addr, imm:$off))),
-          (LOAD32_U_I64 imm:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, imm:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I32 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
                                      (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD8_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD16_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off)))),
-          (LOAD32_U_I64 tglobaladdr:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, $addr)>;
 def : Pat<(i32 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i32 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I32 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi8 (add I32:$addr,
                                (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD8_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi16 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD16_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, $addr)>;
 def : Pat<(i64 (extloadi32 (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off)))),
-          (LOAD32_U_I64 texternalsym:$off, $addr, 0)>;
+          (LOAD32_U_I64 0, texternalsym:$off, $addr)>;
 
 // Select "don't care" extending loads with just a constant offset.
 def : Pat<(i32 (extloadi8 imm:$off)),
-          (LOAD8_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 imm:$off)),
-          (LOAD16_U_I32 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 imm:$off)),
-          (LOAD8_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 imm:$off)),
-          (LOAD16_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 imm:$off)),
-          (LOAD32_U_I64 imm:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, imm:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I32 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD8_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD16_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper tglobaladdr:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i32 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I32 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I32 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi8 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD8_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD8_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi16 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD16_U_I64 texternalsym:$off, (CONST_I32 0), 0)>;
+          (LOAD16_U_I64 0, texternalsym:$off, (CONST_I32 0))>;
 def : Pat<(i64 (extloadi32 (WebAssemblywrapper texternalsym:$off))),
-          (LOAD32_U_I64 tglobaladdr:$off, (CONST_I32 0), 0)>;
+          (LOAD32_U_I64 0, tglobaladdr:$off, (CONST_I32 0))>;
 
 let Defs = [ARGUMENTS] in {
 
 // Basic store.
-// Note that we split the patterns out of the instruction definitions because
-// WebAssembly's stores return their operand value, and tablegen doesn't like
-// instruction definition patterns that don't reference all of the output
-// operands.
 // Note: WebAssembly inverts SelectionDAG's usual operand order.
-def STORE_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, I32:$val), [],
-                   "i32.store\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, I64:$val), [],
-                   "i64.store\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE_F32  : I<(outs F32:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, F32:$val), [],
-                   "f32.store\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE_F64  : I<(outs F64:$dst), (ins i32imm:$off, I32:$addr,
-                                     P2Align:$p2align, F64:$val), [],
-                   "f64.store\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE_I32  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            I32:$val), [],
+                   "i32.store\t${off}(${addr})${p2align}, $val", 0x36>;
+def STORE_I64  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            I64:$val), [],
+                   "i64.store\t${off}(${addr})${p2align}, $val", 0x37>;
+def STORE_F32  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            F32:$val), [],
+                   "f32.store\t${off}(${addr})${p2align}, $val", 0x38>;
+def STORE_F64  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                            F64:$val), [],
+                   "f64.store\t${off}(${addr})${p2align}, $val", 0x39>;
 
 } // Defs = [ARGUMENTS]
 
 // Select stores with no constant offset.
-def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, 0, I32:$val)>;
-def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, 0, I64:$val)>;
-def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, 0, F32:$val)>;
-def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, 0, F64:$val)>;
+def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, 0, I32:$addr, I32:$val)>;
+def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, 0, I32:$addr, I64:$val)>;
+def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, 0, I32:$addr, F32:$val)>;
+def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, 0, I32:$addr, F64:$val)>;
 
 // Select stores with a constant offset.
 def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_F32 imm:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE_F64 imm:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F32 tglobaladdr:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, tglobaladdr:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (regPlusGA I32:$addr,
                                       (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE_F64 tglobaladdr:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, tglobaladdr:$off, I32:$addr, F64:$val)>;
 def : Pat<(store I32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(store I64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(store F32:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F32 texternalsym:$off, I32:$addr, 0, F32:$val)>;
+          (STORE_F32 0, texternalsym:$off, I32:$addr, F32:$val)>;
 def : Pat<(store F64:$val, (add I32:$addr,
                                 (WebAssemblywrapper texternalsym:$off))),
-          (STORE_F64 texternalsym:$off, I32:$addr, 0, F64:$val)>;
+          (STORE_F64 0, texternalsym:$off, I32:$addr, F64:$val)>;
 
 // Select stores with just a constant offset.
 def : Pat<(store I32:$val, imm:$off),
-          (STORE_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, imm:$off),
-          (STORE_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, imm:$off),
-          (STORE_F32 imm:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, imm:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, imm:$off),
-          (STORE_F64 imm:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, imm:$off, (CONST_I32 0), F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F32 tglobaladdr:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, tglobaladdr:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE_F64 tglobaladdr:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, tglobaladdr:$off, (CONST_I32 0), F64:$val)>;
 def : Pat<(store I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(store I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(store F32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F32 texternalsym:$off, (CONST_I32 0), 0, F32:$val)>;
+          (STORE_F32 0, texternalsym:$off, (CONST_I32 0), F32:$val)>;
 def : Pat<(store F64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE_F64 texternalsym:$off, (CONST_I32 0), 0, F64:$val)>;
+          (STORE_F64 0, texternalsym:$off, (CONST_I32 0), F64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Truncating store.
-def STORE8_I32  : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I32:$val), [],
-                    "i32.store8\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE16_I32 : I<(outs I32:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I32:$val), [],
-                    "i32.store16\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE8_I64  : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I64:$val), [],
-                    "i64.store8\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE16_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I64:$val), [],
-                    "i64.store16\t$dst, ${off}(${addr})${p2align}, $val">;
-def STORE32_I64 : I<(outs I64:$dst), (ins i32imm:$off, I32:$addr,
-                                      P2Align:$p2align, I64:$val), [],
-                    "i64.store32\t$dst, ${off}(${addr})${p2align}, $val">;
+def STORE8_I32  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I32:$val), [],
+                    "i32.store8\t${off}(${addr})${p2align}, $val", 0x3a>;
+def STORE16_I32 : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I32:$val), [],
+                    "i32.store16\t${off}(${addr})${p2align}, $val", 0x3b>;
+def STORE8_I64  : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I64:$val), [],
+                    "i64.store8\t${off}(${addr})${p2align}, $val", 0x3c>;
+def STORE16_I64 : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I64:$val), [],
+                    "i64.store16\t${off}(${addr})${p2align}, $val", 0x3d>;
+def STORE32_I64 : I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr,
+                             I64:$val), [],
+                    "i64.store32\t${off}(${addr})${p2align}, $val", 0x3e>;
 
 } // Defs = [ARGUMENTS]
 
 // Select truncating stores with no constant offset.
 def : Pat<(truncstorei8 I32:$val, I32:$addr),
-          (STORE8_I32 0, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, 0, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, I32:$addr),
-          (STORE16_I32 0, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, 0, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, I32:$addr),
-          (STORE8_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, 0, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, I32:$addr),
-          (STORE16_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, 0, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, I32:$addr),
-          (STORE32_I64 0, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, 0, I32:$addr, I64:$val)>;
 
 // Select truncating stores with a constant offset.
 def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE8_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE16_I32 imm:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE8_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE16_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (or_is_add I32:$addr, imm:$off)),
-          (STORE32_I64 imm:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I32 tglobaladdr:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, tglobaladdr:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (regPlusGA I32:$addr,
                                    (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE8_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE16_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (regPlusGA I32:$addr,
                                     (WebAssemblywrapper tglobaladdr:$off))),
-          (STORE32_I64 tglobaladdr:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, tglobaladdr:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
                                        (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE8_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei16 I32:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I32 texternalsym:$off, I32:$addr, 0, I32:$val)>;
+          (STORE16_I32 0, texternalsym:$off, I32:$addr, I32:$val)>;
 def : Pat<(truncstorei8 I64:$val,
                         (add I32:$addr,
                              (WebAssemblywrapper texternalsym:$off))),
-          (STORE8_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE8_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei16 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE16_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE16_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 def : Pat<(truncstorei32 I64:$val,
                          (add I32:$addr,
                               (WebAssemblywrapper texternalsym:$off))),
-          (STORE32_I64 texternalsym:$off, I32:$addr, 0, I64:$val)>;
+          (STORE32_I64 0, texternalsym:$off, I32:$addr, I64:$val)>;
 
 // Select truncating stores with just a constant offset.
 def : Pat<(truncstorei8 I32:$val, imm:$off),
-          (STORE8_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, imm:$off),
-          (STORE16_I32 imm:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, imm:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, imm:$off),
-          (STORE8_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, imm:$off),
-          (STORE16_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, imm:$off),
-          (STORE32_I64 imm:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, imm:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I32 tglobaladdr:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, tglobaladdr:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE8_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE16_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper tglobaladdr:$off)),
-          (STORE32_I64 tglobaladdr:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, tglobaladdr:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei8 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE8_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei16 I32:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I32 texternalsym:$off, (CONST_I32 0), 0, I32:$val)>;
+          (STORE16_I32 0, texternalsym:$off, (CONST_I32 0), I32:$val)>;
 def : Pat<(truncstorei8 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE8_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE8_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei16 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE16_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE16_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 def : Pat<(truncstorei32 I64:$val, (WebAssemblywrapper texternalsym:$off)),
-          (STORE32_I64 texternalsym:$off, (CONST_I32 0), 0, I64:$val)>;
+          (STORE32_I64 0, texternalsym:$off, (CONST_I32 0), I64:$val)>;
 
 let Defs = [ARGUMENTS] in {
 
 // Current memory size.
-def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins),
-                           [(set I32:$dst, (int_wasm_current_memory))],
-                           "current_memory\t$dst">,
+def CURRENT_MEMORY_I32 : I<(outs I32:$dst), (ins i32imm:$flags),
+                           [],
+                           "current_memory\t$dst", 0x3f>,
                          Requires<[HasAddr32]>;
-def CURRENT_MEMORY_I64 : I<(outs I64:$dst), (ins),
-                           [(set I64:$dst, (int_wasm_current_memory))],
-                           "current_memory\t$dst">,
-                         Requires<[HasAddr64]>;
 
 // Grow memory.
-def GROW_MEMORY_I32 : I<(outs), (ins I32:$delta),
-                        [(int_wasm_grow_memory I32:$delta)],
-                        "grow_memory\t$delta">,
+def GROW_MEMORY_I32 : I<(outs), (ins i32imm:$flags, I32:$delta),
+                        [],
+                        "grow_memory\t$delta", 0x40>,
                       Requires<[HasAddr32]>;
-def GROW_MEMORY_I64 : I<(outs), (ins I64:$delta),
-                        [(int_wasm_grow_memory I64:$delta)],
-                        "grow_memory\t$delta">,
-                      Requires<[HasAddr64]>;
 
 } // Defs = [ARGUMENTS]
+
+def : Pat<(int_wasm_current_memory),
+          (CURRENT_MEMORY_I32 0)>;
+def : Pat<(int_wasm_grow_memory I32:$delta),
+          (GROW_MEMORY_I32 0, $delta)>;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 3e29906219d2..e403534d580a 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -12,5 +12,8 @@
 ///
 //===----------------------------------------------------------------------===//
 
-// TODO: Implement SIMD instructions.
-// Note: use Requires<[HasSIMD128]>.
+let isCommutable = 1 in {
+defm ADD : SIMDBinary<add, fadd, "add ">;
+defm MUL: SIMDBinary<mul, fmul, "mul ">;
+} // isCommutable = 1
+defm SUB: SIMDBinary<sub, fsub, "sub ">;
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
index af53f3db967b..7ea5d05a1b21 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp
@@ -29,7 +29,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyLowerBrUnless final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Lower br_unless";
   }
 
@@ -104,12 +104,12 @@ bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) {
       }
 
       // If we weren't able to invert the condition in place. Insert an
-      // expression to invert it.
+      // instruction to invert it.
       if (!Inverted) {
         unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
-        MFI.stackifyVReg(Tmp);
         BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQZ_I32), Tmp)
             .addReg(Cond);
+        MFI.stackifyVReg(Tmp);
         Cond = Tmp;
         Inverted = true;
       }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
new file mode 100644
index 000000000000..72cb1ccbe668
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyLowerEmscriptenEHSjLj.cpp
@@ -0,0 +1,1184 @@
+//=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file lowers exception-related instructions and setjmp/longjmp
+/// function calls in order to use Emscripten's JavaScript try and catch
+/// mechanism.
+///
+/// To handle exceptions and setjmp/longjmps, this scheme relies on JavaScript's
+/// try and catch syntax and relevant exception-related libraries implemented
+/// in JavaScript glue code that will be produced by Emscripten. This is similar
+/// to the current Emscripten asm.js exception handling in fastcomp. For
+/// fastcomp's EH / SjLj scheme, see these files in fastcomp LLVM branch:
+/// (Location: https://github.com/kripken/emscripten-fastcomp)
+/// lib/Target/JSBackend/NaCl/LowerEmExceptionsPass.cpp
+/// lib/Target/JSBackend/NaCl/LowerEmSetjmp.cpp
+/// lib/Target/JSBackend/JSBackend.cpp
+/// lib/Target/JSBackend/CallHandlers.h
+///
+/// * Exception handling
+/// This pass lowers invokes and landingpads into library functions in JS glue
+/// code. Invokes are lowered into function wrappers called invoke wrappers that
+/// exist in JS side, which wraps the original function call with JS try-catch.
+/// If an exception occurred, cxa_throw() function in JS side sets some
+/// variables (see below) so we can check whether an exception occurred from
+/// wasm code and handle it appropriately.
+///
+/// * Setjmp-longjmp handling
+/// This pass lowers setjmp to a reasonably-performant approach for emscripten.
+/// The idea is that each block with a setjmp is broken up into two parts: the
+/// part containing setjmp and the part right after the setjmp. The latter part
+/// is either reached from the setjmp, or later from a longjmp. To handle the
+/// longjmp, all calls that might longjmp are also called using invoke wrappers
+/// and thus JS / try-catch. JS longjmp() function also sets some variables so
+/// we can check / whether a longjmp occurred from wasm code. Each block with a
+/// function call that might longjmp is also split up after the longjmp call.
+/// After the longjmp call, we check whether a longjmp occurred, and if it did,
+/// which setjmp it corresponds to, and jump to the right post-setjmp block.
+/// We assume setjmp-longjmp handling always run after EH handling, which means
+/// we don't expect any exception-related instructions when SjLj runs.
+/// FIXME Currently this scheme does not support indirect call of setjmp,
+/// because of the limitation of the scheme itself. fastcomp does not support it
+/// either.
+///
+/// In detail, this pass does following things:
+///
+/// 1) Create three global variables: __THREW__, __threwValue, and __tempRet0.
+///    __tempRet0 will be set within __cxa_find_matching_catch() function in
+///    JS library, and __THREW__ and __threwValue will be set in invoke wrappers
+///    in JS glue code. For what invoke wrappers are, refer to 3). These
+///    variables are used for both exceptions and setjmp/longjmps.
+///    __THREW__ indicates whether an exception or a longjmp occurred or not. 0
+///    means nothing occurred, 1 means an exception occurred, and other numbers
+///    mean a longjmp occurred. In the case of longjmp, __threwValue variable
+///    indicates the corresponding setjmp buffer the longjmp corresponds to.
+///    In exception handling, __tempRet0 indicates the type of an exception
+///    caught, and in setjmp/longjmp, it means the second argument to longjmp
+///    function.
+///
+/// * Exception handling
+///
+/// 2) Create setThrew and setTempRet0 functions.
+///    The global variables created in 1) will exist in wasm address space,
+///    but their values should be set in JS code, so we provide these functions
+///    as interfaces to JS glue code. These functions are equivalent to the
+///    following JS functions, which actually exist in asm.js version of JS
+///    library.
+///
+///    function setThrew(threw, value) {
+///      if (__THREW__ == 0) {
+///        __THREW__ = threw;
+///        __threwValue = value;
+///      }
+///    }
+///
+///    function setTempRet0(value) {
+///      __tempRet0 = value;
+///    }
+///
+/// 3) Lower
+///      invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad
+///    into
+///      __THREW__ = 0;
+///      call @__invoke_SIG(func, arg1, arg2)
+///      %__THREW__.val = __THREW__;
+///      __THREW__ = 0;
+///      if (%__THREW__.val == 1)
+///        goto %lpad
+///      else
+///         goto %invoke.cont
+///    SIG is a mangled string generated based on the LLVM IR-level function
+///    signature. After LLVM IR types are lowered to the target wasm types,
+///    the names for these wrappers will change based on wasm types as well,
+///    as in invoke_vi (function takes an int and returns void). The bodies of
+///    these wrappers will be generated in JS glue code, and inside those
+///    wrappers we use JS try-catch to generate actual exception effects. It
+///    also calls the original callee function. An example wrapper in JS code
+///    would look like this:
+///      function invoke_vi(index,a1) {
+///        try {
+///          Module["dynCall_vi"](index,a1); // This calls original callee
+///        } catch(e) {
+///          if (typeof e !== 'number' && e !== 'longjmp') throw e;
+///          asm["setThrew"](1, 0); // setThrew is called here
+///        }
+///      }
+///    If an exception is thrown, __THREW__ will be set to true in a wrapper,
+///    so we can jump to the right BB based on this value.
+///
+/// 4) Lower
+///      %val = landingpad catch c1 catch c2 catch c3 ...
+///      ... use %val ...
+///    into
+///      %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...)
+///      %val = {%fmc, __tempRet0}
+///      ... use %val ...
+///    Here N is a number calculated based on the number of clauses.
+///    Global variable __tempRet0 is set within __cxa_find_matching_catch() in
+///    JS glue code.
+///
+/// 5) Lower
+///      resume {%a, %b}
+///    into
+///      call @__resumeException(%a)
+///    where __resumeException() is a function in JS glue code.
+///
+/// 6) Lower
+///      call @llvm.eh.typeid.for(type) (intrinsic)
+///    into
+///      call @llvm_eh_typeid_for(type)
+///    llvm_eh_typeid_for function will be generated in JS glue code.
+///
+/// * Setjmp / Longjmp handling
+///
+/// 7) In the function entry that calls setjmp, initialize setjmpTable and
+///    sejmpTableSize as follows:
+///      setjmpTableSize = 4;
+///      setjmpTable = (int *) malloc(40);
+///      setjmpTable[0] = 0;
+///    setjmpTable and setjmpTableSize are used in saveSetjmp() function in JS
+///    code.
+///
+/// 8) Lower
+///      setjmp(buf)
+///    into
+///      setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
+///      setjmpTableSize = __tempRet0;
+///    For each dynamic setjmp call, setjmpTable stores its ID (a number which
+///    is incrementally assigned from 0) and its label (a unique number that
+///    represents each callsite of setjmp). When we need more entries in
+///    setjmpTable, it is reallocated in saveSetjmp() in JS code and it will
+///    return the new table address, and assign the new table size in
+///    __tempRet0. saveSetjmp also stores the setjmp's ID into the buffer buf.
+///    A BB with setjmp is split into two after setjmp call in order to make the
+///    post-setjmp BB the possible destination of longjmp BB.
+///
+/// 9) Lower
+///      longjmp(buf, value)
+///    into
+///      emscripten_longjmp_jmpbuf(buf, value)
+///    emscripten_longjmp_jmpbuf will be lowered to emscripten_longjmp later.
+///
+/// 10) Lower every call that might longjmp into
+///      __THREW__ = 0;
+///      call @__invoke_SIG(func, arg1, arg2)
+///      %__THREW__.val = __THREW__;
+///      __THREW__ = 0;
+///      if (%__THREW__.val != 0 & __threwValue != 0) {
+///        %label = testSetjmp(mem[%__THREW__.val], setjmpTable,
+///                            setjmpTableSize);
+///        if (%label == 0)
+///          emscripten_longjmp(%__THREW__.val, __threwValue);
+///        __tempRet0 = __threwValue;
+///      } else {
+///        %label = -1;
+///      }
+///      longjmp_result = __tempRet0;
+///      switch label {
+///        label 1: goto post-setjmp BB 1
+///        label 2: goto post-setjmp BB 2
+///        ...
+///        default: goto splitted next BB
+///      }
+///     testSetjmp examines setjmpTable to see if there is a matching setjmp
+///     call. After calling an invoke wrapper, if a longjmp occurred, __THREW__
+///     will be the address of matching jmp_buf buffer and __threwValue be the
+///     second argument to longjmp. mem[__THREW__.val] is a setjmp ID that is
+///     stored in saveSetjmp. testSetjmp returns a setjmp label, a unique ID to
+///     each setjmp callsite. Label 0 means this longjmp buffer does not
+///     correspond to one of the setjmp callsites in this function, so in this
+///     case we just chain the longjmp to the caller. (Here we call
+///     emscripten_longjmp, which is different from emscripten_longjmp_jmpbuf.
+///     emscripten_longjmp_jmpbuf takes jmp_buf as its first argument, while
+///     emscripten_longjmp takes an int. Both of them will eventually be lowered
+///     to emscripten_longjmp in s2wasm, but here we need two signatures - we
+///     can't translate an int value to a jmp_buf.)
+///     Label -1 means no longjmp occurred. Otherwise we jump to the right
+///     post-setjmp BB based on the label.
+///
+///===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-lower-em-ehsjlj"
+
+static cl::list<std::string>
+    EHWhitelist("emscripten-cxx-exceptions-whitelist",
+                cl::desc("The list of function names in which Emscripten-style "
+                         "exception handling is enabled (see emscripten "
+                         "EMSCRIPTEN_CATCHING_WHITELIST options)"),
+                cl::CommaSeparated);
+
+namespace {
+class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass {
+  static const char *ThrewGVName;
+  static const char *ThrewValueGVName;
+  static const char *TempRet0GVName;
+  static const char *ResumeFName;
+  static const char *EHTypeIDFName;
+  static const char *SetThrewFName;
+  static const char *SetTempRet0FName;
+  static const char *EmLongjmpFName;
+  static const char *EmLongjmpJmpbufFName;
+  static const char *SaveSetjmpFName;
+  static const char *TestSetjmpFName;
+  static const char *FindMatchingCatchPrefix;
+  static const char *InvokePrefix;
+
+  bool EnableEH;   // Enable exception handling
+  bool EnableSjLj; // Enable setjmp/longjmp handling
+
+  GlobalVariable *ThrewGV;
+  GlobalVariable *ThrewValueGV;
+  GlobalVariable *TempRet0GV;
+  Function *ResumeF;
+  Function *EHTypeIDF;
+  Function *EmLongjmpF;
+  Function *EmLongjmpJmpbufF;
+  Function *SaveSetjmpF;
+  Function *TestSetjmpF;
+
+  // __cxa_find_matching_catch_N functions.
+  // Indexed by the number of clauses in an original landingpad instruction.
+  DenseMap<int, Function *> FindMatchingCatches;
+  // Map of <function signature string, invoke_ wrappers>
+  StringMap<Function *> InvokeWrappers;
+  // Set of whitelisted function names for exception handling
+  std::set<std::string> EHWhitelistSet;
+
+  StringRef getPassName() const override {
+    return "WebAssembly Lower Emscripten Exceptions";
+  }
+
+  bool runEHOnFunction(Function &F);
+  bool runSjLjOnFunction(Function &F);
+  Function *getFindMatchingCatch(Module &M, unsigned NumClauses);
+
+  template <typename CallOrInvoke> Value *wrapInvoke(CallOrInvoke *CI);
+  void wrapTestSetjmp(BasicBlock *BB, Instruction *InsertPt, Value *Threw,
+                      Value *SetjmpTable, Value *SetjmpTableSize, Value *&Label,
+                      Value *&LongjmpResult, BasicBlock *&EndBB);
+  template <typename CallOrInvoke> Function *getInvokeWrapper(CallOrInvoke *CI);
+
+  bool areAllExceptionsAllowed() const { return EHWhitelistSet.empty(); }
+  bool canLongjmp(Module &M, const Value *Callee) const;
+
+  void createSetThrewFunction(Module &M);
+  void createSetTempRet0Function(Module &M);
+
+  void rebuildSSA(Function &F);
+
+public:
+  static char ID;
+
+  WebAssemblyLowerEmscriptenEHSjLj(bool EnableEH = true, bool EnableSjLj = true)
+      : ModulePass(ID), EnableEH(EnableEH), EnableSjLj(EnableSjLj),
+        ThrewGV(nullptr), ThrewValueGV(nullptr), TempRet0GV(nullptr),
+        ResumeF(nullptr), EHTypeIDF(nullptr), EmLongjmpF(nullptr),
+        EmLongjmpJmpbufF(nullptr), SaveSetjmpF(nullptr), TestSetjmpF(nullptr) {
+    EHWhitelistSet.insert(EHWhitelist.begin(), EHWhitelist.end());
+  }
+  bool runOnModule(Module &M) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<DominatorTreeWrapperPass>();
+  }
+};
+} // End anonymous namespace
+
+const char *WebAssemblyLowerEmscriptenEHSjLj::ThrewGVName = "__THREW__";
+const char *WebAssemblyLowerEmscriptenEHSjLj::ThrewValueGVName = "__threwValue";
+const char *WebAssemblyLowerEmscriptenEHSjLj::TempRet0GVName = "__tempRet0";
+const char *WebAssemblyLowerEmscriptenEHSjLj::ResumeFName = "__resumeException";
+const char *WebAssemblyLowerEmscriptenEHSjLj::EHTypeIDFName =
+    "llvm_eh_typeid_for";
+const char *WebAssemblyLowerEmscriptenEHSjLj::SetThrewFName = "setThrew";
+const char *WebAssemblyLowerEmscriptenEHSjLj::SetTempRet0FName = "setTempRet0";
+const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpFName =
+    "emscripten_longjmp";
+const char *WebAssemblyLowerEmscriptenEHSjLj::EmLongjmpJmpbufFName =
+    "emscripten_longjmp_jmpbuf";
+const char *WebAssemblyLowerEmscriptenEHSjLj::SaveSetjmpFName = "saveSetjmp";
+const char *WebAssemblyLowerEmscriptenEHSjLj::TestSetjmpFName = "testSetjmp";
+const char *WebAssemblyLowerEmscriptenEHSjLj::FindMatchingCatchPrefix =
+    "__cxa_find_matching_catch_";
+const char *WebAssemblyLowerEmscriptenEHSjLj::InvokePrefix = "__invoke_";
+
+char WebAssemblyLowerEmscriptenEHSjLj::ID = 0;
+INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE,
+                "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp",
+                false, false)
+
+ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj(bool EnableEH,
+                                                         bool EnableSjLj) {
+  return new WebAssemblyLowerEmscriptenEHSjLj(EnableEH, EnableSjLj);
+}
+
+static bool canThrow(const Value *V) {
+  if (const auto *F = dyn_cast<const Function>(V)) {
+    // Intrinsics cannot throw
+    if (F->isIntrinsic())
+      return false;
+    StringRef Name = F->getName();
+    // leave setjmp and longjmp (mostly) alone, we process them properly later
+    if (Name == "setjmp" || Name == "longjmp")
+      return false;
+    return !F->doesNotThrow();
+  }
+  // not a function, so an indirect call - can throw, we can't tell
+  return true;
+}
+
+// Returns an available name for a global value.
+// If the proposed name already exists in the module, adds '_' at the end of
+// the name until the name is available.
+static inline std::string createGlobalValueName(const Module &M,
+                                                const std::string &Propose) {
+  std::string Name = Propose;
+  while (M.getNamedGlobal(Name))
+    Name += "_";
+  return Name;
+}
+
+// Simple function name mangler.
+// This function simply takes LLVM's string representation of parameter types
+// and concatenate them with '_'. There are non-alphanumeric characters but llc
+// is ok with it, and we need to postprocess these names after the lowering
+// phase anyway.
+static std::string getSignature(FunctionType *FTy) {
+  std::string Sig;
+  raw_string_ostream OS(Sig);
+  OS << *FTy->getReturnType();
+  for (Type *ParamTy : FTy->params())
+    OS << "_" << *ParamTy;
+  if (FTy->isVarArg())
+    OS << "_...";
+  Sig = OS.str();
+  Sig.erase(remove_if(Sig, isspace), Sig.end());
+  // When s2wasm parses .s file, a comma means the end of an argument. So a
+  // mangled function name can contain any character but a comma.
+  std::replace(Sig.begin(), Sig.end(), ',', '.');
+  return Sig;
+}
+
+// Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2.
+// This is because a landingpad instruction contains two more arguments, a
+// personality function and a cleanup bit, and __cxa_find_matching_catch_N
+// functions are named after the number of arguments in the original landingpad
+// instruction.
+Function *
+WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M,
+                                                       unsigned NumClauses) {
+  if (FindMatchingCatches.count(NumClauses))
+    return FindMatchingCatches[NumClauses];
+  PointerType *Int8PtrTy = Type::getInt8PtrTy(M.getContext());
+  SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy);
+  FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false);
+  Function *F =
+      Function::Create(FTy, GlobalValue::ExternalLinkage,
+                       FindMatchingCatchPrefix + Twine(NumClauses + 2), &M);
+  FindMatchingCatches[NumClauses] = F;
+  return F;
+}
+
+// Generate invoke wrapper seqence with preamble and postamble
+// Preamble:
+// __THREW__ = 0;
+// Postamble:
+// %__THREW__.val = __THREW__; __THREW__ = 0;
+// Returns %__THREW__.val, which indicates whether an exception is thrown (or
+// whether longjmp occurred), for future use.
+template <typename CallOrInvoke>
+Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallOrInvoke *CI) {
+  LLVMContext &C = CI->getModule()->getContext();
+
+  // If we are calling a function that is noreturn, we must remove that
+  // attribute. The code we insert here does expect it to return, after we
+  // catch the exception.
+  if (CI->doesNotReturn()) {
+    if (auto *F = dyn_cast<Function>(CI->getCalledValue()))
+      F->removeFnAttr(Attribute::NoReturn);
+    CI->removeAttribute(AttributeSet::FunctionIndex, Attribute::NoReturn);
+  }
+
+  IRBuilder<> IRB(C);
+  IRB.SetInsertPoint(CI);
+
+  // Pre-invoke
+  // __THREW__ = 0;
+  IRB.CreateStore(IRB.getInt32(0), ThrewGV);
+
+  // Invoke function wrapper in JavaScript
+  SmallVector<Value *, 16> Args;
+  // Put the pointer to the callee as first argument, so it can be called
+  // within the invoke wrapper later
+  Args.push_back(CI->getCalledValue());
+  Args.append(CI->arg_begin(), CI->arg_end());
+  CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args);
+  NewCall->takeName(CI);
+  NewCall->setCallingConv(CI->getCallingConv());
+  NewCall->setDebugLoc(CI->getDebugLoc());
+
+  // Because we added the pointer to the callee as first argument, all
+  // argument attribute indices have to be incremented by one.
+  SmallVector<AttributeSet, 8> AttributesVec;
+  const AttributeSet &InvokePAL = CI->getAttributes();
+  CallSite::arg_iterator AI = CI->arg_begin();
+  unsigned i = 1; // Argument attribute index starts from 1
+  for (unsigned e = CI->getNumArgOperands(); i <= e; ++AI, ++i) {
+    if (InvokePAL.hasAttributes(i)) {
+      AttrBuilder B(InvokePAL, i);
+      AttributesVec.push_back(AttributeSet::get(C, i + 1, B));
+    }
+  }
+  // Add any return attributes.
+  if (InvokePAL.hasAttributes(AttributeSet::ReturnIndex))
+    AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getRetAttributes()));
+  // Add any function attributes.
+  if (InvokePAL.hasAttributes(AttributeSet::FunctionIndex))
+    AttributesVec.push_back(AttributeSet::get(C, InvokePAL.getFnAttributes()));
+  // Reconstruct the AttributesList based on the vector we constructed.
+  AttributeSet NewCallPAL = AttributeSet::get(C, AttributesVec);
+  NewCall->setAttributes(NewCallPAL);
+
+  CI->replaceAllUsesWith(NewCall);
+
+  // Post-invoke
+  // %__THREW__.val = __THREW__; __THREW__ = 0;
+  Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
+  IRB.CreateStore(IRB.getInt32(0), ThrewGV);
+  return Threw;
+}
+
+// Get matching invoke wrapper based on callee signature
+template <typename CallOrInvoke>
+Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallOrInvoke *CI) {
+  Module *M = CI->getModule();
+  SmallVector<Type *, 16> ArgTys;
+  Value *Callee = CI->getCalledValue();
+  FunctionType *CalleeFTy;
+  if (auto *F = dyn_cast<Function>(Callee))
+    CalleeFTy = F->getFunctionType();
+  else {
+    auto *CalleeTy = cast<PointerType>(Callee->getType())->getElementType();
+    CalleeFTy = dyn_cast<FunctionType>(CalleeTy);
+  }
+
+  std::string Sig = getSignature(CalleeFTy);
+  if (InvokeWrappers.find(Sig) != InvokeWrappers.end())
+    return InvokeWrappers[Sig];
+
+  // Put the pointer to the callee as first argument
+  ArgTys.push_back(PointerType::getUnqual(CalleeFTy));
+  // Add argument types
+  ArgTys.append(CalleeFTy->param_begin(), CalleeFTy->param_end());
+
+  FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys,
+                                        CalleeFTy->isVarArg());
+  Function *F = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                 InvokePrefix + Sig, M);
+  InvokeWrappers[Sig] = F;
+  return F;
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::canLongjmp(Module &M,
+                                                  const Value *Callee) const {
+  if (auto *CalleeF = dyn_cast<Function>(Callee))
+    if (CalleeF->isIntrinsic())
+      return false;
+
+  // The reason we include malloc/free here is to exclude the malloc/free
+  // calls generated in setjmp prep / cleanup routines.
+  Function *SetjmpF = M.getFunction("setjmp");
+  Function *MallocF = M.getFunction("malloc");
+  Function *FreeF = M.getFunction("free");
+  if (Callee == SetjmpF || Callee == MallocF || Callee == FreeF)
+    return false;
+
+  // There are functions in JS glue code
+  if (Callee == ResumeF || Callee == EHTypeIDF || Callee == SaveSetjmpF ||
+      Callee == TestSetjmpF)
+    return false;
+
+  // __cxa_find_matching_catch_N functions cannot longjmp
+  if (Callee->getName().startswith(FindMatchingCatchPrefix))
+    return false;
+
+  // Exception-catching related functions
+  Function *BeginCatchF = M.getFunction("__cxa_begin_catch");
+  Function *EndCatchF = M.getFunction("__cxa_end_catch");
+  Function *AllocExceptionF = M.getFunction("__cxa_allocate_exception");
+  Function *ThrowF = M.getFunction("__cxa_throw");
+  Function *TerminateF = M.getFunction("__clang_call_terminate");
+  if (Callee == BeginCatchF || Callee == EndCatchF ||
+      Callee == AllocExceptionF || Callee == ThrowF || Callee == TerminateF)
+    return false;
+
+  // Otherwise we don't know
+  return true;
+}
+
+// Generate testSetjmp function call seqence with preamble and postamble.
+// The code this generates is equivalent to the following JavaScript code:
+// if (%__THREW__.val != 0 & threwValue != 0) {
+//   %label = _testSetjmp(mem[%__THREW__.val], setjmpTable, setjmpTableSize);
+//   if (%label == 0)
+//     emscripten_longjmp(%__THREW__.val, threwValue);
+//   __tempRet0 = threwValue;
+// } else {
+//   %label = -1;
+// }
+// %longjmp_result = __tempRet0;
+//
+// As output parameters. returns %label, %longjmp_result, and the BB the last
+// instruction (%longjmp_result = ...) is in.
+void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp(
+    BasicBlock *BB, Instruction *InsertPt, Value *Threw, Value *SetjmpTable,
+    Value *SetjmpTableSize, Value *&Label, Value *&LongjmpResult,
+    BasicBlock *&EndBB) {
+  Function *F = BB->getParent();
+  LLVMContext &C = BB->getModule()->getContext();
+  IRBuilder<> IRB(C);
+  IRB.SetInsertPoint(InsertPt);
+
+  // if (%__THREW__.val != 0 & threwValue != 0)
+  IRB.SetInsertPoint(BB);
+  BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F);
+  BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F);
+  BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F);
+  Value *ThrewCmp = IRB.CreateICmpNE(Threw, IRB.getInt32(0));
+  Value *ThrewValue =
+      IRB.CreateLoad(ThrewValueGV, ThrewValueGV->getName() + ".val");
+  Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0));
+  Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1");
+  IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1);
+
+  // %label = _testSetjmp(mem[%__THREW__.val], _setjmpTable, _setjmpTableSize);
+  // if (%label == 0)
+  IRB.SetInsertPoint(ThenBB1);
+  BasicBlock *ThenBB2 = BasicBlock::Create(C, "if.then2", F);
+  BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F);
+  Value *ThrewInt = IRB.CreateIntToPtr(Threw, Type::getInt32PtrTy(C),
+                                       Threw->getName() + ".i32p");
+  Value *LoadedThrew =
+      IRB.CreateLoad(ThrewInt, ThrewInt->getName() + ".loaded");
+  Value *ThenLabel = IRB.CreateCall(
+      TestSetjmpF, {LoadedThrew, SetjmpTable, SetjmpTableSize}, "label");
+  Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0));
+  IRB.CreateCondBr(Cmp2, ThenBB2, EndBB2);
+
+  // emscripten_longjmp(%__THREW__.val, threwValue);
+  IRB.SetInsertPoint(ThenBB2);
+  IRB.CreateCall(EmLongjmpF, {Threw, ThrewValue});
+  IRB.CreateUnreachable();
+
+  // __tempRet0 = threwValue;
+  IRB.SetInsertPoint(EndBB2);
+  IRB.CreateStore(ThrewValue, TempRet0GV);
+  IRB.CreateBr(EndBB1);
+
+  IRB.SetInsertPoint(ElseBB1);
+  IRB.CreateBr(EndBB1);
+
+  // longjmp_result = __tempRet0;
+  IRB.SetInsertPoint(EndBB1);
+  PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label");
+  LabelPHI->addIncoming(ThenLabel, EndBB2);
+
+  LabelPHI->addIncoming(IRB.getInt32(-1), ElseBB1);
+
+  // Output parameter assignment
+  Label = LabelPHI;
+  EndBB = EndBB1;
+  LongjmpResult = IRB.CreateLoad(TempRet0GV, "longjmp_result");
+}
+
+// Create setThrew function
+// function setThrew(threw, value) {
+//   if (__THREW__ == 0) {
+//     __THREW__ = threw;
+//     __threwValue = value;
+//   }
+// }
+void WebAssemblyLowerEmscriptenEHSjLj::createSetThrewFunction(Module &M) {
+  LLVMContext &C = M.getContext();
+  IRBuilder<> IRB(C);
+
+  assert(!M.getNamedGlobal(SetThrewFName) && "setThrew already exists");
+  Type *Params[] = {IRB.getInt32Ty(), IRB.getInt32Ty()};
+  FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
+  Function *F =
+      Function::Create(FTy, GlobalValue::ExternalLinkage, SetThrewFName, &M);
+  Argument *Arg1 = &*(F->arg_begin());
+  Argument *Arg2 = &*(++F->arg_begin());
+  Arg1->setName("threw");
+  Arg2->setName("value");
+  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+  BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", F);
+  BasicBlock *EndBB = BasicBlock::Create(C, "if.end", F);
+
+  IRB.SetInsertPoint(EntryBB);
+  Value *Threw = IRB.CreateLoad(ThrewGV, ThrewGV->getName() + ".val");
+  Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(0), "cmp");
+  IRB.CreateCondBr(Cmp, ThenBB, EndBB);
+
+  IRB.SetInsertPoint(ThenBB);
+  IRB.CreateStore(Arg1, ThrewGV);
+  IRB.CreateStore(Arg2, ThrewValueGV);
+  IRB.CreateBr(EndBB);
+
+  IRB.SetInsertPoint(EndBB);
+  IRB.CreateRetVoid();
+}
+
+// Create setTempRet0 function
+// function setTempRet0(value) {
+//   __tempRet0 = value;
+// }
+void WebAssemblyLowerEmscriptenEHSjLj::createSetTempRet0Function(Module &M) {
+  LLVMContext &C = M.getContext();
+  IRBuilder<> IRB(C);
+
+  assert(!M.getNamedGlobal(SetTempRet0FName) && "setTempRet0 already exists");
+  Type *Params[] = {IRB.getInt32Ty()};
+  FunctionType *FTy = FunctionType::get(IRB.getVoidTy(), Params, false);
+  Function *F =
+      Function::Create(FTy, GlobalValue::ExternalLinkage, SetTempRet0FName, &M);
+  F->arg_begin()->setName("value");
+  BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
+  IRB.SetInsertPoint(EntryBB);
+  IRB.CreateStore(&*F->arg_begin(), TempRet0GV);
+  IRB.CreateRetVoid();
+}
+
+void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) {
+  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
+  DT.recalculate(F); // CFG has been changed
+  SSAUpdater SSA;
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      for (auto UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
+        Use &U = *UI;
+        ++UI;
+        SSA.Initialize(I.getType(), I.getName());
+        SSA.AddAvailableValue(&BB, &I);
+        Instruction *User = cast<Instruction>(U.getUser());
+        if (User->getParent() == &BB)
+          continue;
+
+        if (PHINode *UserPN = dyn_cast<PHINode>(User))
+          if (UserPN->getIncomingBlock(U) == &BB)
+            continue;
+
+        if (DT.dominates(&I, User))
+          continue;
+        SSA.RewriteUseAfterInsertions(U);
+      }
+    }
+  }
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) {
+  LLVMContext &C = M.getContext();
+  IRBuilder<> IRB(C);
+
+  Function *SetjmpF = M.getFunction("setjmp");
+  Function *LongjmpF = M.getFunction("longjmp");
+  bool SetjmpUsed = SetjmpF && !SetjmpF->use_empty();
+  bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty();
+  bool DoSjLj = EnableSjLj && (SetjmpUsed || LongjmpUsed);
+
+  // Create global variables __THREW__, threwValue, and __tempRet0, which are
+  // used in common for both exception handling and setjmp/longjmp handling
+  ThrewGV = new GlobalVariable(M, IRB.getInt32Ty(), false,
+                               GlobalValue::ExternalLinkage, IRB.getInt32(0),
+                               createGlobalValueName(M, ThrewGVName));
+  ThrewValueGV = new GlobalVariable(
+      M, IRB.getInt32Ty(), false, GlobalValue::ExternalLinkage, IRB.getInt32(0),
+      createGlobalValueName(M, ThrewValueGVName));
+  TempRet0GV = new GlobalVariable(M, IRB.getInt32Ty(), false,
+                                  GlobalValue::ExternalLinkage, IRB.getInt32(0),
+                                  createGlobalValueName(M, TempRet0GVName));
+
+  bool Changed = false;
+
+  // Exception handling
+  if (EnableEH) {
+    // Register __resumeException function
+    FunctionType *ResumeFTy =
+        FunctionType::get(IRB.getVoidTy(), IRB.getInt8PtrTy(), false);
+    ResumeF = Function::Create(ResumeFTy, GlobalValue::ExternalLinkage,
+                               ResumeFName, &M);
+
+    // Register llvm_eh_typeid_for function
+    FunctionType *EHTypeIDTy =
+        FunctionType::get(IRB.getInt32Ty(), IRB.getInt8PtrTy(), false);
+    EHTypeIDF = Function::Create(EHTypeIDTy, GlobalValue::ExternalLinkage,
+                                 EHTypeIDFName, &M);
+
+    for (Function &F : M) {
+      if (F.isDeclaration())
+        continue;
+      Changed |= runEHOnFunction(F);
+    }
+  }
+
+  // Setjmp/longjmp handling
+  if (DoSjLj) {
+    Changed = true; // We have setjmp or longjmp somewhere
+
+    Function *MallocF = M.getFunction("malloc");
+    Function *FreeF = M.getFunction("free");
+    if (!MallocF || !FreeF)
+      report_fatal_error(
+          "malloc and free must be linked into the module if setjmp is used");
+
+    // Register saveSetjmp function
+    FunctionType *SetjmpFTy = SetjmpF->getFunctionType();
+    SmallVector<Type *, 4> Params = {SetjmpFTy->getParamType(0),
+                                     IRB.getInt32Ty(), Type::getInt32PtrTy(C),
+                                     IRB.getInt32Ty()};
+    FunctionType *FTy =
+        FunctionType::get(Type::getInt32PtrTy(C), Params, false);
+    SaveSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                   SaveSetjmpFName, &M);
+
+    // Register testSetjmp function
+    Params = {IRB.getInt32Ty(), Type::getInt32PtrTy(C), IRB.getInt32Ty()};
+    FTy = FunctionType::get(IRB.getInt32Ty(), Params, false);
+    TestSetjmpF = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                   TestSetjmpFName, &M);
+
+    if (LongjmpF) {
+      // Replace all uses of longjmp with emscripten_longjmp_jmpbuf, which is
+      // defined in JS code
+      EmLongjmpJmpbufF = Function::Create(LongjmpF->getFunctionType(),
+                                          GlobalValue::ExternalLinkage,
+                                          EmLongjmpJmpbufFName, &M);
+
+      LongjmpF->replaceAllUsesWith(EmLongjmpJmpbufF);
+    }
+    FTy = FunctionType::get(IRB.getVoidTy(),
+                            {IRB.getInt32Ty(), IRB.getInt32Ty()}, false);
+    EmLongjmpF =
+        Function::Create(FTy, GlobalValue::ExternalLinkage, EmLongjmpFName, &M);
+
+    // Only traverse functions that uses setjmp in order not to insert
+    // unnecessary prep / cleanup code in every function
+    SmallPtrSet<Function *, 8> SetjmpUsers;
+    for (User *U : SetjmpF->users()) {
+      auto *UI = cast<Instruction>(U);
+      SetjmpUsers.insert(UI->getFunction());
+    }
+    for (Function *F : SetjmpUsers)
+      runSjLjOnFunction(*F);
+  }
+
+  if (!Changed) {
+    // Delete unused global variables and functions
+    ThrewGV->eraseFromParent();
+    ThrewValueGV->eraseFromParent();
+    TempRet0GV->eraseFromParent();
+    if (ResumeF)
+      ResumeF->eraseFromParent();
+    if (EHTypeIDF)
+      EHTypeIDF->eraseFromParent();
+    if (EmLongjmpF)
+      EmLongjmpF->eraseFromParent();
+    if (SaveSetjmpF)
+      SaveSetjmpF->eraseFromParent();
+    if (TestSetjmpF)
+      TestSetjmpF->eraseFromParent();
+    return false;
+  }
+
+  // If we have made any changes while doing exception handling or
+  // setjmp/longjmp handling, we have to create these functions for JavaScript
+  // to call.
+  createSetThrewFunction(M);
+  createSetTempRet0Function(M);
+
+  return true;
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) {
+  Module &M = *F.getParent();
+  LLVMContext &C = F.getContext();
+  IRBuilder<> IRB(C);
+  bool Changed = false;
+  SmallVector<Instruction *, 64> ToErase;
+  SmallPtrSet<LandingPadInst *, 32> LandingPads;
+  bool AllowExceptions =
+      areAllExceptionsAllowed() || EHWhitelistSet.count(F.getName());
+
+  for (BasicBlock &BB : F) {
+    auto *II = dyn_cast<InvokeInst>(BB.getTerminator());
+    if (!II)
+      continue;
+    Changed = true;
+    LandingPads.insert(II->getLandingPadInst());
+    IRB.SetInsertPoint(II);
+
+    bool NeedInvoke = AllowExceptions && canThrow(II->getCalledValue());
+    if (NeedInvoke) {
+      // Wrap invoke with invoke wrapper and generate preamble/postamble
+      Value *Threw = wrapInvoke(II);
+      ToErase.push_back(II);
+
+      // Insert a branch based on __THREW__ variable
+      Value *Cmp = IRB.CreateICmpEQ(Threw, IRB.getInt32(1), "cmp");
+      IRB.CreateCondBr(Cmp, II->getUnwindDest(), II->getNormalDest());
+
+    } else {
+      // This can't throw, and we don't need this invoke, just replace it with a
+      // call+branch
+      SmallVector<Value *, 16> Args(II->arg_begin(), II->arg_end());
+      CallInst *NewCall = IRB.CreateCall(II->getCalledValue(), Args);
+      NewCall->takeName(II);
+      NewCall->setCallingConv(II->getCallingConv());
+      NewCall->setDebugLoc(II->getDebugLoc());
+      NewCall->setAttributes(II->getAttributes());
+      II->replaceAllUsesWith(NewCall);
+      ToErase.push_back(II);
+
+      IRB.CreateBr(II->getNormalDest());
+
+      // Remove any PHI node entries from the exception destination
+      II->getUnwindDest()->removePredecessor(&BB);
+    }
+  }
+
+  // Process resume instructions
+  for (BasicBlock &BB : F) {
+    // Scan the body of the basic block for resumes
+    for (Instruction &I : BB) {
+      auto *RI = dyn_cast<ResumeInst>(&I);
+      if (!RI)
+        continue;
+
+      // Split the input into legal values
+      Value *Input = RI->getValue();
+      IRB.SetInsertPoint(RI);
+      Value *Low = IRB.CreateExtractValue(Input, 0, "low");
+      // Create a call to __resumeException function
+      IRB.CreateCall(ResumeF, {Low});
+      // Add a terminator to the block
+      IRB.CreateUnreachable();
+      ToErase.push_back(RI);
+    }
+  }
+
+  // Process llvm.eh.typeid.for intrinsics
+  for (BasicBlock &BB : F) {
+    for (Instruction &I : BB) {
+      auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI)
+        continue;
+      const Function *Callee = CI->getCalledFunction();
+      if (!Callee)
+        continue;
+      if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for)
+        continue;
+
+      IRB.SetInsertPoint(CI);
+      CallInst *NewCI =
+          IRB.CreateCall(EHTypeIDF, CI->getArgOperand(0), "typeid");
+      CI->replaceAllUsesWith(NewCI);
+      ToErase.push_back(CI);
+    }
+  }
+
+  // Look for orphan landingpads, can occur in blocks with no predecesors
+  for (BasicBlock &BB : F) {
+    Instruction *I = BB.getFirstNonPHI();
+    if (auto *LPI = dyn_cast<LandingPadInst>(I))
+      LandingPads.insert(LPI);
+  }
+
+  // Handle all the landingpad for this function together, as multiple invokes
+  // may share a single lp
+  for (LandingPadInst *LPI : LandingPads) {
+    IRB.SetInsertPoint(LPI);
+    SmallVector<Value *, 16> FMCArgs;
+    for (unsigned i = 0, e = LPI->getNumClauses(); i < e; ++i) {
+      Constant *Clause = LPI->getClause(i);
+      // As a temporary workaround for the lack of aggregate varargs support
+      // in the interface between JS and wasm, break out filter operands into
+      // their component elements.
+      if (LPI->isFilter(i)) {
+        auto *ATy = cast<ArrayType>(Clause->getType());
+        for (unsigned j = 0, e = ATy->getNumElements(); j < e; ++j) {
+          Value *EV = IRB.CreateExtractValue(Clause, makeArrayRef(j), "filter");
+          FMCArgs.push_back(EV);
+        }
+      } else
+        FMCArgs.push_back(Clause);
+    }
+
+    // Create a call to __cxa_find_matching_catch_N function
+    Function *FMCF = getFindMatchingCatch(M, FMCArgs.size());
+    CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc");
+    Value *Undef = UndefValue::get(LPI->getType());
+    Value *Pair0 = IRB.CreateInsertValue(Undef, FMCI, 0, "pair0");
+    Value *TempRet0 =
+        IRB.CreateLoad(TempRet0GV, TempRet0GV->getName() + ".val");
+    Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1");
+
+    LPI->replaceAllUsesWith(Pair1);
+    ToErase.push_back(LPI);
+  }
+
+  // Erase everything we no longer need in this function
+  for (Instruction *I : ToErase)
+    I->eraseFromParent();
+
+  return Changed;
+}
+
+bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) {
+  Module &M = *F.getParent();
+  LLVMContext &C = F.getContext();
+  IRBuilder<> IRB(C);
+  SmallVector<Instruction *, 64> ToErase;
+  // Vector of %setjmpTable values
+  std::vector<Instruction *> SetjmpTableInsts;
+  // Vector of %setjmpTableSize values
+  std::vector<Instruction *> SetjmpTableSizeInsts;
+
+  // Setjmp preparation
+
+  // This instruction effectively means %setjmpTableSize = 4.
+  // We create this as an instruction intentionally, and we don't want to fold
+  // this instruction to a constant 4, because this value will be used in
+  // SSAUpdater.AddAvailableValue(...) later.
+  BasicBlock &EntryBB = F.getEntryBlock();
+  BinaryOperator *SetjmpTableSize = BinaryOperator::Create(
+      Instruction::Add, IRB.getInt32(4), IRB.getInt32(0), "setjmpTableSize",
+      &*EntryBB.getFirstInsertionPt());
+  // setjmpTable = (int *) malloc(40);
+  Instruction *SetjmpTable = CallInst::CreateMalloc(
+      SetjmpTableSize, IRB.getInt32Ty(), IRB.getInt32Ty(), IRB.getInt32(40),
+      nullptr, nullptr, "setjmpTable");
+  // setjmpTable[0] = 0;
+  IRB.SetInsertPoint(SetjmpTableSize);
+  IRB.CreateStore(IRB.getInt32(0), SetjmpTable);
+  SetjmpTableInsts.push_back(SetjmpTable);
+  SetjmpTableSizeInsts.push_back(SetjmpTableSize);
+
+  // Setjmp transformation
+  std::vector<PHINode *> SetjmpRetPHIs;
+  Function *SetjmpF = M.getFunction("setjmp");
+  for (User *U : SetjmpF->users()) {
+    auto *CI = dyn_cast<CallInst>(U);
+    if (!CI)
+      report_fatal_error("Does not support indirect calls to setjmp");
+
+    BasicBlock *BB = CI->getParent();
+    if (BB->getParent() != &F) // in other function
+      continue;
+
+    // The tail is everything right after the call, and will be reached once
+    // when setjmp is called, and later when longjmp returns to the setjmp
+    BasicBlock *Tail = SplitBlock(BB, CI->getNextNode());
+    // Add a phi to the tail, which will be the output of setjmp, which
+    // indicates if this is the first call or a longjmp back. The phi directly
+    // uses the right value based on where we arrive from
+    IRB.SetInsertPoint(Tail->getFirstNonPHI());
+    PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret");
+
+    // setjmp initial call returns 0
+    SetjmpRet->addIncoming(IRB.getInt32(0), BB);
+    // The proper output is now this, not the setjmp call itself
+    CI->replaceAllUsesWith(SetjmpRet);
+    // longjmp returns to the setjmp will add themselves to this phi
+    SetjmpRetPHIs.push_back(SetjmpRet);
+
+    // Fix call target
+    // Our index in the function is our place in the array + 1 to avoid index
+    // 0, because index 0 means the longjmp is not ours to handle.
+    IRB.SetInsertPoint(CI);
+    Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()),
+                     SetjmpTable, SetjmpTableSize};
+    Instruction *NewSetjmpTable =
+        IRB.CreateCall(SaveSetjmpF, Args, "setjmpTable");
+    Instruction *NewSetjmpTableSize =
+        IRB.CreateLoad(TempRet0GV, "setjmpTableSize");
+    SetjmpTableInsts.push_back(NewSetjmpTable);
+    SetjmpTableSizeInsts.push_back(NewSetjmpTableSize);
+    ToErase.push_back(CI);
+  }
+
+  // Update each call that can longjmp so it can return to a setjmp where
+  // relevant.
+
+  // Because we are creating new BBs while processing and don't want to make
+  // all these newly created BBs candidates again for longjmp processing, we
+  // first make the vector of candidate BBs.
+  std::vector<BasicBlock *> BBs;
+  for (BasicBlock &BB : F)
+    BBs.push_back(&BB);
+
+  // BBs.size() will change within the loop, so we query it every time
+  for (unsigned i = 0; i < BBs.size(); i++) {
+    BasicBlock *BB = BBs[i];
+    for (Instruction &I : *BB) {
+      assert(!isa<InvokeInst>(&I));
+      auto *CI = dyn_cast<CallInst>(&I);
+      if (!CI)
+        continue;
+
+      const Value *Callee = CI->getCalledValue();
+      if (!canLongjmp(M, Callee))
+        continue;
+
+      Value *Threw = nullptr;
+      BasicBlock *Tail;
+      if (Callee->getName().startswith(InvokePrefix)) {
+        // If invoke wrapper has already been generated for this call in
+        // previous EH phase, search for the load instruction
+        // %__THREW__.val = __THREW__;
+        // in postamble after the invoke wrapper call
+        LoadInst *ThrewLI = nullptr;
+        StoreInst *ThrewResetSI = nullptr;
+        for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end();
+             I != IE; ++I) {
+          if (auto *LI = dyn_cast<LoadInst>(I))
+            if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand()))
+              if (GV == ThrewGV) {
+                Threw = ThrewLI = LI;
+                break;
+              }
+        }
+        // Search for the store instruction after the load above
+        // __THREW__ = 0;
+        for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end();
+             I != IE; ++I) {
+          if (auto *SI = dyn_cast<StoreInst>(I))
+            if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand()))
+              if (GV == ThrewGV && SI->getValueOperand() == IRB.getInt32(0)) {
+                ThrewResetSI = SI;
+                break;
+              }
+        }
+        assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke");
+        assert(ThrewResetSI && "Cannot find __THREW__ store after invoke");
+        Tail = SplitBlock(BB, ThrewResetSI->getNextNode());
+
+      } else {
+        // Wrap call with invoke wrapper and generate preamble/postamble
+        Threw = wrapInvoke(CI);
+        ToErase.push_back(CI);
+        Tail = SplitBlock(BB, CI->getNextNode());
+      }
+
+      // We need to replace the terminator in Tail - SplitBlock makes BB go
+      // straight to Tail, we need to check if a longjmp occurred, and go to the
+      // right setjmp-tail if so
+      ToErase.push_back(BB->getTerminator());
+
+      // Generate a function call to testSetjmp function and preamble/postamble
+      // code to figure out (1) whether longjmp occurred (2) if longjmp
+      // occurred, which setjmp it corresponds to
+      Value *Label = nullptr;
+      Value *LongjmpResult = nullptr;
+      BasicBlock *EndBB = nullptr;
+      wrapTestSetjmp(BB, CI, Threw, SetjmpTable, SetjmpTableSize, Label,
+                     LongjmpResult, EndBB);
+      assert(Label && LongjmpResult && EndBB);
+
+      // Create switch instruction
+      IRB.SetInsertPoint(EndBB);
+      SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size());
+      // -1 means no longjmp happened, continue normally (will hit the default
+      // switch case). 0 means a longjmp that is not ours to handle, needs a
+      // rethrow. Otherwise the index is the same as the index in P+1 (to avoid
+      // 0).
+      for (unsigned i = 0; i < SetjmpRetPHIs.size(); i++) {
+        SI->addCase(IRB.getInt32(i + 1), SetjmpRetPHIs[i]->getParent());
+        SetjmpRetPHIs[i]->addIncoming(LongjmpResult, EndBB);
+      }
+
+      // We are splitting the block here, and must continue to find other calls
+      // in the block - which is now split. so continue to traverse in the Tail
+      BBs.push_back(Tail);
+    }
+  }
+
+  // Erase everything we no longer need in this function
+  for (Instruction *I : ToErase)
+    I->eraseFromParent();
+
+  // Free setjmpTable buffer before each return instruction
+  for (BasicBlock &BB : F) {
+    TerminatorInst *TI = BB.getTerminator();
+    if (isa<ReturnInst>(TI))
+      CallInst::CreateFree(SetjmpTable, TI);
+  }
+
+  // Every call to saveSetjmp can change setjmpTable and setjmpTableSize
+  // (when buffer reallocation occurs)
+  // entry:
+  //   setjmpTableSize = 4;
+  //   setjmpTable = (int *) malloc(40);
+  //   setjmpTable[0] = 0;
+  // ...
+  // somebb:
+  //   setjmpTable = saveSetjmp(buf, label, setjmpTable, setjmpTableSize);
+  //   setjmpTableSize = __tempRet0;
+  // So we need to make sure the SSA for these variables is valid so that every
+  // saveSetjmp and testSetjmp calls have the correct arguments.
+  SSAUpdater SetjmpTableSSA;
+  SSAUpdater SetjmpTableSizeSSA;
+  SetjmpTableSSA.Initialize(Type::getInt32PtrTy(C), "setjmpTable");
+  SetjmpTableSizeSSA.Initialize(Type::getInt32Ty(C), "setjmpTableSize");
+  for (Instruction *I : SetjmpTableInsts)
+    SetjmpTableSSA.AddAvailableValue(I->getParent(), I);
+  for (Instruction *I : SetjmpTableSizeInsts)
+    SetjmpTableSizeSSA.AddAvailableValue(I->getParent(), I);
+
+  for (auto UI = SetjmpTable->use_begin(), UE = SetjmpTable->use_end();
+       UI != UE;) {
+    // Grab the use before incrementing the iterator.
+    Use &U = *UI;
+    // Increment the iterator before removing the use from the list.
+    ++UI;
+    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+      if (I->getParent() != &EntryBB)
+        SetjmpTableSSA.RewriteUse(U);
+  }
+  for (auto UI = SetjmpTableSize->use_begin(), UE = SetjmpTableSize->use_end();
+       UI != UE;) {
+    Use &U = *UI;
+    ++UI;
+    if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+      if (I->getParent() != &EntryBB)
+        SetjmpTableSizeSSA.RewriteUse(U);
+  }
+
+  // Finally, our modifications to the cfg can break dominance of SSA variables.
+  // For example, in this code,
+  // if (x()) { .. setjmp() .. }
+  // if (y()) { .. longjmp() .. }
+  // We must split the longjmp block, and it can jump into the block splitted
+  // from setjmp one. But that means that when we split the setjmp block, it's
+  // first part no longer dominates its second part - there is a theoretically
+  // possible control flow path where x() is false, then y() is true and we
+  // reach the second part of the setjmp block, without ever reaching the first
+  // part. So, we rebuild SSA form here.
+  rebuildSSA(F);
+  return true;
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
index 225c5d32cb5d..ccf6a18b32ea 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.cpp
@@ -14,6 +14,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "WebAssemblyMachineFunctionInfo.h"
+#include "WebAssemblyISelLowering.h"
+#include "WebAssemblySubtarget.h"
+#include "llvm/CodeGen/Analysis.h"
 using namespace llvm;
 
 WebAssemblyFunctionInfo::~WebAssemblyFunctionInfo() {}
@@ -23,3 +26,37 @@ void WebAssemblyFunctionInfo::initWARegs() {
   unsigned Reg = UnusedReg;
   WARegs.resize(MF.getRegInfo().getNumVirtRegs(), Reg);
 }
+
+void llvm::ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+                                Type *Ty, SmallVectorImpl<MVT> &ValueVTs) {
+  const DataLayout &DL(F.getParent()->getDataLayout());
+  const WebAssemblyTargetLowering &TLI =
+      *TM.getSubtarget<WebAssemblySubtarget>(F).getTargetLowering();
+  SmallVector<EVT, 4> VTs;
+  ComputeValueVTs(TLI, DL, Ty, VTs);
+
+  for (EVT VT : VTs) {
+    unsigned NumRegs = TLI.getNumRegisters(F.getContext(), VT);
+    MVT RegisterVT = TLI.getRegisterType(F.getContext(), VT);
+    for (unsigned i = 0; i != NumRegs; ++i)
+      ValueVTs.push_back(RegisterVT);
+  }
+}
+
+void llvm::ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+                               SmallVectorImpl<MVT> &Params,
+                               SmallVectorImpl<MVT> &Results) {
+  ComputeLegalValueVTs(F, TM, F.getReturnType(), Results);
+
+  if (Results.size() > 1) {
+    // WebAssembly currently can't lower returns of multiple values without
+    // demoting to sret (see WebAssemblyTargetLowering::CanLowerReturn). So
+    // replace multiple return values with a pointer parameter.
+    Results.clear();
+    Params.push_back(
+        MVT::getIntegerVT(TM.createDataLayout().getPointerSizeInBits()));
+  }
+
+  for (auto &Arg : F.args())
+    ComputeLegalValueVTs(F, TM, Arg.getType(), Params);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
index 89f607d84b71..756619bebbed 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h
@@ -27,6 +27,8 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   MachineFunction &MF;
 
   std::vector<MVT> Params;
+  std::vector<MVT> Results;
+  std::vector<MVT> Locals;
 
   /// A mapping from CodeGen vreg index to WebAssembly register number.
   std::vector<unsigned> WARegs;
@@ -44,6 +46,10 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   // TLI::LowerVASTART
   unsigned VarargVreg = -1U;
 
+  // A virtual register holding the base pointer for functions that have
+  // overaligned values on the user stack.
+  unsigned BasePtrVreg = -1U;
+
  public:
   explicit WebAssemblyFunctionInfo(MachineFunction &MF) : MF(MF) {}
   ~WebAssemblyFunctionInfo() override;
@@ -51,15 +57,28 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   void addParam(MVT VT) { Params.push_back(VT); }
   const std::vector<MVT> &getParams() const { return Params; }
 
+  void addResult(MVT VT) { Results.push_back(VT); }
+  const std::vector<MVT> &getResults() const { return Results; }
+
+  void addLocal(MVT VT) { Locals.push_back(VT); }
+  const std::vector<MVT> &getLocals() const { return Locals; }
+
   unsigned getVarargBufferVreg() const {
     assert(VarargVreg != -1U && "Vararg vreg hasn't been set");
     return VarargVreg;
   }
   void setVarargBufferVreg(unsigned Reg) { VarargVreg = Reg; }
 
+  unsigned getBasePointerVreg() const {
+    assert(BasePtrVreg != -1U && "Base ptr vreg hasn't been set");
+    return BasePtrVreg;
+  }
+  void setBasePointerVreg(unsigned Reg) { BasePtrVreg = Reg; }
+
   static const unsigned UnusedReg = -1u;
 
   void stackifyVReg(unsigned VReg) {
+    assert(MF.getRegInfo().getUniqueVRegDef(VReg));
     if (TargetRegisterInfo::virtReg2Index(VReg) >= VRegStackified.size())
       VRegStackified.resize(TargetRegisterInfo::virtReg2Index(VReg) + 1);
     VRegStackified.set(TargetRegisterInfo::virtReg2Index(VReg));
@@ -88,6 +107,13 @@ class WebAssemblyFunctionInfo final : public MachineFunctionInfo {
   }
 };
 
+void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
+                          Type *Ty, SmallVectorImpl<MVT> &ValueVTs);
+
+void ComputeSignatureVTs(const Function &F, const TargetMachine &TM,
+                         SmallVectorImpl<MVT> &Params,
+                         SmallVectorImpl<MVT> &Results);
+
 } // end namespace llvm
 
 #endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
index 473de7ddae7e..5a3a7411ed46 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeLiveIntervals.cpp
@@ -34,7 +34,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyOptimizeLiveIntervals final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Optimize Live Intervals";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
index 4dc401a2c7cc..96520aa5d28c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp
@@ -24,7 +24,7 @@ using namespace llvm;
 namespace {
 class OptimizeReturned final : public FunctionPass,
                                public InstVisitor<OptimizeReturned> {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Optimize Returned";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
index 56d44e6466eb..32dde88c2234 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPeephole.cpp
@@ -31,7 +31,7 @@ static cl::opt<bool> DisableWebAssemblyFallthroughReturnOpt(
 
 namespace {
 class WebAssemblyPeephole final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly late peephole optimizer";
   }
 
@@ -83,8 +83,8 @@ static bool MaybeRewriteToFallthrough(MachineInstr &MI, MachineBasicBlock &MBB,
   if (&MI != &MBB.back())
     return false;
 
-  // If the operand isn't stackified, insert a COPY_LOCAL to read the operand
-  // and stackify it.
+  // If the operand isn't stackified, insert a COPY to read the operand and
+  // stackify it.
   MachineOperand &MO = MI.getOperand(0);
   unsigned Reg = MO.getReg();
   if (!MFI.isVRegStackified(Reg)) {
@@ -119,25 +119,6 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
-      case WebAssembly::STORE8_I32:
-      case WebAssembly::STORE16_I32:
-      case WebAssembly::STORE8_I64:
-      case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64:
-      case WebAssembly::STORE_F32:
-      case WebAssembly::STORE_F64:
-      case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64: {
-        // Store instructions return their value operand. If we ended up using
-        // the same register for both, replace it with a dead def so that it
-        // can use $drop instead.
-        MachineOperand &MO = MI.getOperand(0);
-        unsigned OldReg = MO.getReg();
-        unsigned NewReg =
-            MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
-        Changed |= MaybeRewriteToDrop(OldReg, NewReg, MO, MFI, MRI);
-        break;
-      }
       case WebAssembly::CALL_I32:
       case WebAssembly::CALL_I64: {
         MachineOperand &Op1 = MI.getOperand(1);
@@ -169,22 +150,42 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::RETURN_I32:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I32,
-            WebAssembly::COPY_LOCAL_I32);
+            WebAssembly::COPY_I32);
         break;
       case WebAssembly::RETURN_I64:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_I64,
-            WebAssembly::COPY_LOCAL_I64);
+            WebAssembly::COPY_I64);
         break;
       case WebAssembly::RETURN_F32:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F32,
-            WebAssembly::COPY_LOCAL_F32);
+            WebAssembly::COPY_F32);
         break;
       case WebAssembly::RETURN_F64:
         Changed |= MaybeRewriteToFallthrough(
             MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_F64,
-            WebAssembly::COPY_LOCAL_F64);
+            WebAssembly::COPY_F64);
+        break;
+      case WebAssembly::RETURN_v16i8:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v16i8,
+            WebAssembly::COPY_V128);
+        break;
+      case WebAssembly::RETURN_v8i16:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v8i16,
+            WebAssembly::COPY_V128);
+        break;
+      case WebAssembly::RETURN_v4i32:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4i32,
+            WebAssembly::COPY_V128);
+        break;
+      case WebAssembly::RETURN_v4f32:
+        Changed |= MaybeRewriteToFallthrough(
+            MI, MBB, MF, MFI, MRI, TII, WebAssembly::FALLTHROUGH_RETURN_v4f32,
+            WebAssembly::COPY_V128);
         break;
       case WebAssembly::RETURN_VOID:
         if (!DisableWebAssemblyFallthroughReturnOpt &&
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
index 30444ac598a4..473dcb7a33fd 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyPrepareForLiveIntervals.cpp
@@ -23,6 +23,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -40,7 +41,7 @@ public:
   WebAssemblyPrepareForLiveIntervals() : MachineFunctionPass(ID) {}
 
 private:
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Prepare For LiveIntervals";
   }
 
@@ -58,23 +59,10 @@ FunctionPass *llvm::createWebAssemblyPrepareForLiveIntervals() {
   return new WebAssemblyPrepareForLiveIntervals();
 }
 
-/// Test whether the given instruction is an ARGUMENT.
-static bool IsArgument(const MachineInstr *MI) {
-  switch (MI->getOpcode()) {
-  case WebAssembly::ARGUMENT_I32:
-  case WebAssembly::ARGUMENT_I64:
-  case WebAssembly::ARGUMENT_F32:
-  case WebAssembly::ARGUMENT_F64:
-    return true;
-  default:
-    return false;
-  }
-}
-
 // Test whether the given register has an ARGUMENT def.
 static bool HasArgumentDef(unsigned Reg, const MachineRegisterInfo &MRI) {
-  for (auto &Def : MRI.def_instructions(Reg))
-    if (IsArgument(&Def))
+  for (const auto &Def : MRI.def_instructions(Reg))
+    if (WebAssembly::isArgument(Def))
       return true;
   return false;
 }
@@ -122,10 +110,10 @@ bool WebAssemblyPrepareForLiveIntervals::runOnMachineFunction(MachineFunction &M
   // Move ARGUMENT_* instructions to the top of the entry block, so that their
   // liveness reflects the fact that these really are live-in values.
   for (auto MII = Entry.begin(), MIE = Entry.end(); MII != MIE; ) {
-    MachineInstr *MI = &*MII++;
-    if (IsArgument(MI)) {
-      MI->removeFromParent();
-      Entry.insert(Entry.begin(), MI);
+    MachineInstr &MI = *MII++;
+    if (WebAssembly::isArgument(MI)) {
+      MI.removeFromParent();
+      Entry.insert(Entry.begin(), &MI);
     }
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
index dedd9108dfd5..5fd4a8d1949e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegColoring.cpp
@@ -35,7 +35,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyRegColoring() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Register Coloring";
   }
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
index 4a8fd96f8324..e3470825940c 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegNumbering.cpp
@@ -17,6 +17,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -32,7 +33,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyRegNumbering final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Register Numbering";
   }
 
@@ -68,20 +69,13 @@ bool WebAssemblyRegNumbering::runOnMachineFunction(MachineFunction &MF) {
   // variables. Assign the numbers for them first.
   MachineBasicBlock &EntryMBB = MF.front();
   for (MachineInstr &MI : EntryMBB) {
-    switch (MI.getOpcode()) {
-    case WebAssembly::ARGUMENT_I32:
-    case WebAssembly::ARGUMENT_I64:
-    case WebAssembly::ARGUMENT_F32:
-    case WebAssembly::ARGUMENT_F64: {
-      int64_t Imm = MI.getOperand(1).getImm();
-      DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
-                   << Imm << "\n");
-      MFI.setWAReg(MI.getOperand(0).getReg(), Imm);
+    if (!WebAssembly::isArgument(MI))
       break;
-    }
-    default:
-      break;
-    }
+
+    int64_t Imm = MI.getOperand(1).getImm();
+    DEBUG(dbgs() << "Arg VReg " << MI.getOperand(0).getReg() << " -> WAReg "
+                 << Imm << "\n");
+    MFI.setWAReg(MI.getOperand(0).getReg(), Imm);
   }
 
   // Then assign regular WebAssembly registers for all remaining used
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 0aa3b621da32..32ee09e45796 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -13,10 +13,10 @@
 /// This pass reorders instructions to put register uses and defs in an order
 /// such that they form single-use expression trees. Registers fitting this form
 /// are then marked as "stackified", meaning references to them are replaced by
-/// "push" and "pop" from the stack.
+/// "push" and "pop" from the value stack.
 ///
 /// This is primarily a code size optimization, since temporary values on the
-/// expression don't need to be named.
+/// value stack don't need to be named.
 ///
 //===----------------------------------------------------------------------===//
 
@@ -24,6 +24,7 @@
 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
 #include "WebAssemblyMachineFunctionInfo.h"
 #include "WebAssemblySubtarget.h"
+#include "WebAssemblyUtilities.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/LiveIntervalAnalysis.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -39,7 +40,7 @@ using namespace llvm;
 
 namespace {
 class WebAssemblyRegStackify final : public MachineFunctionPass {
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Register Stackify";
   }
 
@@ -73,19 +74,50 @@ FunctionPass *llvm::createWebAssemblyRegStackify() {
 // expression stack ordering constraints for an instruction which is on
 // the expression stack.
 static void ImposeStackOrdering(MachineInstr *MI) {
-  // Write the opaque EXPR_STACK register.
-  if (!MI->definesRegister(WebAssembly::EXPR_STACK))
-    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+  // Write the opaque VALUE_STACK register.
+  if (!MI->definesRegister(WebAssembly::VALUE_STACK))
+    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
                                              /*isDef=*/true,
                                              /*isImp=*/true));
 
-  // Also read the opaque EXPR_STACK register.
-  if (!MI->readsRegister(WebAssembly::EXPR_STACK))
-    MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK,
+  // Also read the opaque VALUE_STACK register.
+  if (!MI->readsRegister(WebAssembly::VALUE_STACK))
+    MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
                                              /*isDef=*/false,
                                              /*isImp=*/true));
 }
 
+// Convert an IMPLICIT_DEF instruction into an instruction which defines
+// a constant zero value.
+static void ConvertImplicitDefToConstZero(MachineInstr *MI,
+                                          MachineRegisterInfo &MRI,
+                                          const TargetInstrInfo *TII,
+                                          MachineFunction &MF) {
+  assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
+
+  const auto *RegClass =
+      MRI.getRegClass(MI->getOperand(0).getReg());
+  if (RegClass == &WebAssembly::I32RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_I32));
+    MI->addOperand(MachineOperand::CreateImm(0));
+  } else if (RegClass == &WebAssembly::I64RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_I64));
+    MI->addOperand(MachineOperand::CreateImm(0));
+  } else if (RegClass == &WebAssembly::F32RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_F32));
+    ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+        Type::getFloatTy(MF.getFunction()->getContext())));
+    MI->addOperand(MachineOperand::CreateFPImm(Val));
+  } else if (RegClass == &WebAssembly::F64RegClass) {
+    MI->setDesc(TII->get(WebAssembly::CONST_F64));
+    ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
+        Type::getDoubleTy(MF.getFunction()->getContext())));
+    MI->addOperand(MachineOperand::CreateFPImm(Val));
+  } else {
+    llvm_unreachable("Unexpected reg class");
+  }
+}
+
 // Determine whether a call to the callee referenced by
 // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
 // effects.
@@ -130,7 +162,7 @@ static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
     return;
 
   // Check for loads.
-  if (MI.mayLoad() && !MI.isInvariantLoad(&AA))
+  if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
     Read = true;
 
   // Check for stores.
@@ -255,7 +287,7 @@ static bool HasOneUse(unsigned Reg, MachineInstr *Def,
   const VNInfo *DefVNI = LI.getVNInfoAt(
       LIS.getInstructionIndex(*Def).getRegSlot());
   assert(DefVNI);
-  for (auto I : MRI.use_nodbg_operands(Reg)) {
+  for (auto &I : MRI.use_nodbg_operands(Reg)) {
     const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
     if (Result.valueIn() == DefVNI) {
       if (!Result.isKill())
@@ -274,11 +306,11 @@ static bool HasOneUse(unsigned Reg, MachineInstr *Def,
 // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
 // more precise.
 static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
-                         AliasAnalysis &AA, const LiveIntervals &LIS,
-                         const MachineRegisterInfo &MRI) {
+                         AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
   assert(Def->getParent() == Insert->getParent());
 
   // Check for register dependencies.
+  SmallVector<unsigned, 4> MutableRegisters;
   for (const MachineOperand &MO : Def->operands()) {
     if (!MO.isReg() || MO.isUndef())
       continue;
@@ -301,21 +333,11 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
       return false;
     }
 
-    // Ask LiveIntervals whether moving this virtual register use or def to
-    // Insert will change which value numbers are seen.
-    // 
-    // If the operand is a use of a register that is also defined in the same
-    // instruction, test that the newly defined value reaches the insert point,
-    // since the operand will be moving along with the def.
-    const LiveInterval &LI = LIS.getInterval(Reg);
-    VNInfo *DefVNI =
-        (MO.isDef() || Def->definesRegister(Reg)) ?
-        LI.getVNInfoAt(LIS.getInstructionIndex(*Def).getRegSlot()) :
-        LI.getVNInfoBefore(LIS.getInstructionIndex(*Def));
-    assert(DefVNI && "Instruction input missing value number");
-    VNInfo *InsVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*Insert));
-    if (InsVNI && DefVNI != InsVNI)
-      return false;
+    // If one of the operands isn't in SSA form, it has different values at
+    // different times, and we need to make sure we don't move our use across
+    // a different def.
+    if (!MO.isDef() && !MRI.hasOneDef(Reg))
+      MutableRegisters.push_back(Reg);
   }
 
   bool Read = false, Write = false, Effects = false, StackPointer = false;
@@ -323,7 +345,8 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
 
   // If the instruction does not access memory and has no side effects, it has
   // no additional dependencies.
-  if (!Read && !Write && !Effects && !StackPointer)
+  bool HasMutableRegisters = !MutableRegisters.empty();
+  if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
     return true;
 
   // Scan through the intervening instructions between Def and Insert.
@@ -343,6 +366,11 @@ static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
       return false;
     if (StackPointer && InterveningStackPointer)
       return false;
+
+    for (unsigned Reg : MutableRegisters)
+      for (const MachineOperand &MO : I->operands())
+        if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
+          return false;
   }
 
   return true;
@@ -360,7 +388,7 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
   const MachineInstr *OneUseInst = OneUse.getParent();
   VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
 
-  for (const MachineOperand &Use : MRI.use_operands(Reg)) {
+  for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
     if (&Use == &OneUse)
       continue;
 
@@ -384,7 +412,7 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
         //
         // This is needed as a consequence of using implicit get_locals for
         // uses and implicit set_locals for defs.
-        if (UseInst->getDesc().getNumDefs() == 0) 
+        if (UseInst->getDesc().getNumDefs() == 0)
           return false;
         const MachineOperand &MO = UseInst->getOperand(0);
         if (!MO.isReg())
@@ -408,16 +436,18 @@ static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
   return true;
 }
 
-/// Get the appropriate tee_local opcode for the given register class.
-static unsigned GetTeeLocalOpcode(const TargetRegisterClass *RC) {
+/// Get the appropriate tee opcode for the given register class.
+static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
   if (RC == &WebAssembly::I32RegClass)
-    return WebAssembly::TEE_LOCAL_I32;
+    return WebAssembly::TEE_I32;
   if (RC == &WebAssembly::I64RegClass)
-    return WebAssembly::TEE_LOCAL_I64;
+    return WebAssembly::TEE_I64;
   if (RC == &WebAssembly::F32RegClass)
-    return WebAssembly::TEE_LOCAL_F32;
+    return WebAssembly::TEE_F32;
   if (RC == &WebAssembly::F64RegClass)
-    return WebAssembly::TEE_LOCAL_F64;
+    return WebAssembly::TEE_F64;
+  if (RC == &WebAssembly::V128RegClass)
+    return WebAssembly::TEE_V128;
   llvm_unreachable("Unexpected register class");
 }
 
@@ -515,8 +545,8 @@ static MachineInstr *RematerializeCheapDef(
 
 /// A multiple-use def in the same block with no intervening memory or register
 /// dependencies; move the def down, nest it with the current instruction, and
-/// insert a tee_local to satisfy the rest of the uses. As an illustration,
-/// rewrite this:
+/// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
+/// this:
 ///
 ///    Reg = INST ...        // Def
 ///    INST ..., Reg, ...    // Insert
@@ -526,7 +556,7 @@ static MachineInstr *RematerializeCheapDef(
 /// to this:
 ///
 ///    DefReg = INST ...     // Def (to become the new Insert)
-///    TeeReg, Reg = TEE_LOCAL_... DefReg
+///    TeeReg, Reg = TEE_... DefReg
 ///    INST ..., TeeReg, ... // Insert
 ///    INST ..., Reg, ...
 ///    INST ..., Reg, ...
@@ -549,7 +579,7 @@ static MachineInstr *MoveAndTeeForMultiUse(
   unsigned DefReg = MRI.createVirtualRegister(RegClass);
   MachineOperand &DefMO = Def->getOperand(0);
   MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
-                              TII->get(GetTeeLocalOpcode(RegClass)), TeeReg)
+                              TII->get(GetTeeOpcode(RegClass)), TeeReg)
                           .addReg(Reg, RegState::Define)
                           .addReg(DefReg, getUndefRegState(DefMO.isDead()));
   Op.setReg(TeeReg);
@@ -749,8 +779,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
         if (TargetRegisterInfo::isPhysicalRegister(Reg))
           continue;
 
-        // Identify the definition for this register at this point. Most
-        // registers are in SSA form here so we try a quick MRI query first.
+        // Identify the definition for this register at this point.
         MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
         if (!Def)
           continue;
@@ -762,20 +791,17 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
 
         // Argument instructions represent live-in registers and not real
         // instructions.
-        if (Def->getOpcode() == WebAssembly::ARGUMENT_I32 ||
-            Def->getOpcode() == WebAssembly::ARGUMENT_I64 ||
-            Def->getOpcode() == WebAssembly::ARGUMENT_F32 ||
-            Def->getOpcode() == WebAssembly::ARGUMENT_F64)
+        if (WebAssembly::isArgument(*Def))
           continue;
 
         // Decide which strategy to take. Prefer to move a single-use value
-        // over cloning it, and prefer cloning over introducing a tee_local.
+        // over cloning it, and prefer cloning over introducing a tee.
         // For moving, we require the def to be in the same block as the use;
         // this makes things simpler (LiveIntervals' handleMove function only
         // supports intra-block moves) and it's MachineSink's job to catch all
         // the sinking opportunities anyway.
         bool SameBlock = Def->getParent() == &MBB;
-        bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, LIS, MRI) &&
+        bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
                        !TreeWalker.IsOnStack(Reg);
         if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
           Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
@@ -796,6 +822,12 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
           continue;
         }
 
+        // If the instruction we just stackified is an IMPLICIT_DEF, convert it
+        // to a constant 0 so that the def is explicit, and the push/pop
+        // correspondence is maintained.
+        if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
+          ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
+
         // We stackified an operand. Add the defining instruction's operands to
         // the worklist stack now to continue to build an ever deeper tree.
         Commuting.Reset();
@@ -806,19 +838,18 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
       // the next instruction we can build a tree on.
       if (Insert != &*MII) {
         ImposeStackOrdering(&*MII);
-        MII = std::prev(
-            llvm::make_reverse_iterator(MachineBasicBlock::iterator(Insert)));
+        MII = MachineBasicBlock::iterator(Insert).getReverse();
         Changed = true;
       }
     }
   }
 
-  // If we used EXPR_STACK anywhere, add it to the live-in sets everywhere so
+  // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
   // that it never looks like a use-before-def.
   if (Changed) {
-    MF.getRegInfo().addLiveIn(WebAssembly::EXPR_STACK);
+    MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
     for (MachineBasicBlock &MBB : MF)
-      MBB.addLiveIn(WebAssembly::EXPR_STACK);
+      MBB.addLiveIn(WebAssembly::VALUE_STACK);
   }
 
 #ifndef NDEBUG
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index 239fe89b7ef9..9367464c806e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -61,19 +61,25 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
   MachineFunction &MF = *MBB.getParent();
   MachineRegisterInfo &MRI = MF.getRegInfo();
   int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
-  const MachineFrameInfo &MFI = *MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
 
+  assert(MFI.getObjectSize(FrameIndex) != 0 &&
+         "We assume that variable-sized objects have already been lowered, "
+         "and don't use FrameIndex operands.");
+  unsigned FrameRegister = getFrameRegister(MF);
+
   // If this is the address operand of a load or store, make it relative to SP
   // and fold the frame offset directly in.
-  if (MI.mayLoadOrStore() && FIOperandNum == WebAssembly::MemOpAddressOperandNo) {
-    assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
-    int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
+  if ((MI.mayLoad() && FIOperandNum == WebAssembly::LoadAddressOperandNo) ||
+      (MI.mayStore() && FIOperandNum == WebAssembly::StoreAddressOperandNo)) {
+    assert(FrameOffset >= 0 && MI.getOperand(FIOperandNum - 1).getImm() >= 0);
+    int64_t Offset = MI.getOperand(FIOperandNum - 1).getImm() + FrameOffset;
 
     if (static_cast<uint64_t>(Offset) <= std::numeric_limits<uint32_t>::max()) {
       MI.getOperand(FIOperandNum - 1).setImm(Offset);
       MI.getOperand(FIOperandNum)
-          .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+          .ChangeToRegister(FrameRegister, /*IsDef=*/false);
       return;
     }
   }
@@ -94,7 +100,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
           MachineOperand &ImmMO = Def->getOperand(1);
           ImmMO.setImm(ImmMO.getImm() + uint32_t(FrameOffset));
           MI.getOperand(FIOperandNum)
-              .ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
+              .ChangeToRegister(FrameRegister, /*IsDef=*/false);
           return;
         }
       }
@@ -104,7 +110,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
   // Otherwise create an i32.add SP, offset and make it the operand.
   const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
 
-  unsigned FIRegOperand = WebAssembly::SP32;
+  unsigned FIRegOperand = FrameRegister;
   if (FrameOffset) {
     // Create i32.add SP, offset and make it the operand.
     const TargetRegisterClass *PtrRC =
@@ -116,7 +122,7 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
     FIRegOperand = MRI.createVirtualRegister(PtrRC);
     BuildMI(MBB, *II, II->getDebugLoc(), TII->get(WebAssembly::ADD_I32),
             FIRegOperand)
-        .addReg(WebAssembly::SP32)
+        .addReg(FrameRegister)
         .addReg(OffsetOp);
   }
   MI.getOperand(FIOperandNum).ChangeToRegister(FIRegOperand, /*IsDef=*/false);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
index 80a83fa76b57..90888100be17 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyRegisterInfo.td
@@ -39,9 +39,11 @@ def SP64 : WebAssemblyReg<"%SP64">;
 def F32_0 : WebAssemblyReg<"%f32.0">;
 def F64_0 : WebAssemblyReg<"%f64.0">;
 
-// The expression stack "register". This is an opaque entity which serves to
-// order uses and defs that must remain in LIFO order.
-def EXPR_STACK : WebAssemblyReg<"STACK">;
+def V128_0: WebAssemblyReg<"%v128">;
+
+// The value stack "register". This is an opaque entity which serves to order
+// uses and defs that must remain in LIFO order.
+def VALUE_STACK : WebAssemblyReg<"STACK">;
 
 // The incoming arguments "register". This is an opaque entity which serves to
 // order the ARGUMENT instructions that are emulating live-in registers and
@@ -56,3 +58,5 @@ def I32 : WebAssemblyRegClass<[i32], 32, (add FP32, SP32)>;
 def I64 : WebAssemblyRegClass<[i64], 64, (add FP64, SP64)>;
 def F32 : WebAssemblyRegClass<[f32], 32, (add F32_0)>;
 def F64 : WebAssemblyRegClass<[f64], 64, (add F64_0)>;
+def V128 : WebAssemblyRegClass<[v4f32, v4i32, v16i8, v8i16], 128, (add V128_0)>;
+
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
index 11bda47eac56..9e944df637d9 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyReplacePhysRegs.cpp
@@ -39,7 +39,7 @@ public:
   WebAssemblyReplacePhysRegs() : MachineFunctionPass(ID) {}
 
 private:
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Replace Physical Registers";
   }
 
@@ -76,7 +76,7 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
   for (unsigned PReg = WebAssembly::NoRegister + 1;
        PReg < WebAssembly::NUM_TARGET_REGS; ++PReg) {
     // Skip fake registers that are never used explicitly.
-    if (PReg == WebAssembly::EXPR_STACK || PReg == WebAssembly::ARGUMENTS)
+    if (PReg == WebAssembly::VALUE_STACK || PReg == WebAssembly::ARGUMENTS)
       continue;
 
     // Replace explicit uses of the physical register with a virtual register.
@@ -88,6 +88,8 @@ bool WebAssemblyReplacePhysRegs::runOnMachineFunction(MachineFunction &MF) {
         if (VReg == WebAssembly::NoRegister)
           VReg = MRI.createVirtualRegister(RC);
         MO.setReg(VReg);
+        if (MO.getParent()->isDebugValue())
+          MO.setIsDebug();
         Changed = true;
       }
     }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
index 4ebea68c58a5..2441ead7cb27 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblySetP2AlignOperands.cpp
@@ -30,7 +30,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblySetP2AlignOperands() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "WebAssembly Set p2align Operands";
   }
 
@@ -50,6 +50,27 @@ FunctionPass *llvm::createWebAssemblySetP2AlignOperands() {
   return new WebAssemblySetP2AlignOperands();
 }
 
+static void RewriteP2Align(MachineInstr &MI, unsigned OperandNo) {
+  assert(MI.getOperand(OperandNo).getImm() == 0 &&
+         "ISel should set p2align operands to 0");
+  assert(MI.hasOneMemOperand() &&
+         "Load and store instructions have exactly one mem operand");
+  assert((*MI.memoperands_begin())->getSize() ==
+             (UINT64_C(1)
+              << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
+         "Default p2align value should be natural");
+  assert(MI.getDesc().OpInfo[OperandNo].OperandType ==
+             WebAssembly::OPERAND_P2ALIGN &&
+         "Load and store instructions should have a p2align operand");
+  uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
+
+  // WebAssembly does not currently support supernatural alignment.
+  P2Align = std::min(
+      P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
+
+  MI.getOperand(OperandNo).setImm(P2Align);
+}
+
 bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
   DEBUG({
     dbgs() << "********** Set p2align Operands **********\n"
@@ -75,6 +96,8 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::LOAD16_U_I64:
       case WebAssembly::LOAD32_S_I64:
       case WebAssembly::LOAD32_U_I64:
+        RewriteP2Align(MI, WebAssembly::LoadP2AlignOperandNo);
+        break;
       case WebAssembly::STORE_I32:
       case WebAssembly::STORE_I64:
       case WebAssembly::STORE_F32:
@@ -83,27 +106,9 @@ bool WebAssemblySetP2AlignOperands::runOnMachineFunction(MachineFunction &MF) {
       case WebAssembly::STORE16_I32:
       case WebAssembly::STORE8_I64:
       case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64: {
-        assert(MI.getOperand(3).getImm() == 0 &&
-               "ISel should set p2align operands to 0");
-        assert(MI.hasOneMemOperand() &&
-               "Load and store instructions have exactly one mem operand");
-        assert((*MI.memoperands_begin())->getSize() ==
-                   (UINT64_C(1)
-                    << WebAssembly::GetDefaultP2Align(MI.getOpcode())) &&
-               "Default p2align value should be natural");
-        assert(MI.getDesc().OpInfo[3].OperandType ==
-                   WebAssembly::OPERAND_P2ALIGN &&
-               "Load and store instructions should have a p2align operand");
-        uint64_t P2Align = Log2_64((*MI.memoperands_begin())->getAlignment());
-
-        // WebAssembly does not currently support supernatural alignment.
-        P2Align = std::min(
-            P2Align, uint64_t(WebAssembly::GetDefaultP2Align(MI.getOpcode())));
-
-        MI.getOperand(3).setImm(P2Align);
+      case WebAssembly::STORE32_I64:
+        RewriteP2Align(MI, WebAssembly::StoreP2AlignOperandNo);
         break;
-      }
       default:
         break;
       }
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
index 1e9a773ae628..34ec6f2d34a7 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp
@@ -46,9 +46,7 @@ public:
   static char ID; // Pass identification, replacement for typeid
   WebAssemblyStoreResults() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override {
-    return "WebAssembly Store Results";
-  }
+  StringRef getPassName() const override { return "WebAssembly Store Results"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -90,7 +88,7 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
 
   SmallVector<SlotIndex, 4> Indices;
 
-  for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) {
+  for (auto I = MRI.use_nodbg_begin(FromReg), E = MRI.use_nodbg_end(); I != E;) {
     MachineOperand &O = *I++;
     MachineInstr *Where = O.getParent();
 
@@ -139,15 +137,6 @@ static bool ReplaceDominatedUses(MachineBasicBlock &MBB, MachineInstr &MI,
   return Changed;
 }
 
-static bool optimizeStore(MachineBasicBlock &MBB, MachineInstr &MI,
-                          const MachineRegisterInfo &MRI,
-                          MachineDominatorTree &MDT,
-                          LiveIntervals &LIS) {
-  unsigned ToReg = MI.getOperand(0).getReg();
-  unsigned FromReg = MI.getOperand(WebAssembly::StoreValueOperandNo).getReg();
-  return ReplaceDominatedUses(MBB, MI, FromReg, ToReg, MRI, MDT, LIS);
-}
-
 static bool optimizeCall(MachineBasicBlock &MBB, MachineInstr &MI,
                          const MachineRegisterInfo &MRI,
                          MachineDominatorTree &MDT,
@@ -202,17 +191,6 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) {
       switch (MI.getOpcode()) {
       default:
         break;
-      case WebAssembly::STORE8_I32:
-      case WebAssembly::STORE16_I32:
-      case WebAssembly::STORE8_I64:
-      case WebAssembly::STORE16_I64:
-      case WebAssembly::STORE32_I64:
-      case WebAssembly::STORE_F32:
-      case WebAssembly::STORE_F64:
-      case WebAssembly::STORE_I32:
-      case WebAssembly::STORE_I64:
-        Changed |= optimizeStore(MBB, MI, MRI, MDT, LIS);
-        break;
       case WebAssembly::CALL_I32:
       case WebAssembly::CALL_I64:
         Changed |= optimizeCall(MBB, MI, MRI, MDT, LIS, TLI, LibInfo);
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index 32154af3c1c2..b61bc0a08143 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -29,10 +29,28 @@ using namespace llvm;
 
 #define DEBUG_TYPE "wasm"
 
+// Emscripten's asm.js-style exception handling
+static cl::opt<bool> EnableEmException(
+    "enable-emscripten-cxx-exceptions",
+    cl::desc("WebAssembly Emscripten-style exception handling"),
+    cl::init(false));
+
+// Emscripten's asm.js-style setjmp/longjmp handling
+static cl::opt<bool> EnableEmSjLj(
+    "enable-emscripten-sjlj",
+    cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"),
+    cl::init(false));
+
 extern "C" void LLVMInitializeWebAssemblyTarget() {
   // Register the target.
-  RegisterTargetMachine<WebAssemblyTargetMachine> X(TheWebAssemblyTarget32);
-  RegisterTargetMachine<WebAssemblyTargetMachine> Y(TheWebAssemblyTarget64);
+  RegisterTargetMachine<WebAssemblyTargetMachine> X(
+      getTheWebAssemblyTarget32());
+  RegisterTargetMachine<WebAssemblyTargetMachine> Y(
+      getTheWebAssemblyTarget64());
+
+  // Register exception handling pass to opt
+  initializeWebAssemblyLowerEmscriptenEHSjLjPass(
+      *PassRegistry::getPassRegistry());
 }
 
 //===----------------------------------------------------------------------===//
@@ -57,10 +75,10 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
                         TT, CPU, FS, Options, getEffectiveRelocModel(RM),
                         CM, OL),
       TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
-  // WebAssembly type-checks expressions, but a noreturn function with a return
+  // WebAssembly type-checks instructions, but a noreturn function with a return
   // type that doesn't match the context will cause a check failure. So we lower
   // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's
-  // 'unreachable' expression which is meant for that case.
+  // 'unreachable' instructions which is meant for that case.
   this->Options.TrapUnreachable = true;
 
   initAsmInfo();
@@ -149,6 +167,23 @@ void WebAssemblyPassConfig::addIRPasses() {
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createWebAssemblyOptimizeReturned());
 
+  // If exception handling is not enabled and setjmp/longjmp handling is
+  // enabled, we lower invokes into calls and delete unreachable landingpad
+  // blocks. Lowering invokes when there is no EH support is done in
+  // TargetPassConfig::addPassesToHandleExceptions, but this runs after this
+  // function and SjLj handling expects all invokes to be lowered before.
+  if (!EnableEmException) {
+    addPass(createLowerInvokePass());
+    // The lower invoke pass may create unreachable code. Remove it in order not
+    // to process dead blocks in setjmp/longjmp handling.
+    addPass(createUnreachableBlockEliminationPass());
+  }
+
+  // Handle exceptions and setjmp/longjmp if enabled.
+  if (EnableEmException || EnableEmSjLj)
+    addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException,
+                                                   EnableEmSjLj));
+
   TargetPassConfig::addIRPasses();
 }
 
@@ -175,7 +210,7 @@ void WebAssemblyPassConfig::addPostRegAlloc() {
   // Has no asserts of its own, but was not written to handle virtual regs.
   disablePass(&ShrinkWrapID);
 
-  // These functions all require the AllVRegsAllocated property.
+  // These functions all require the NoVRegs property.
   disablePass(&MachineCopyPropagationID);
   disablePass(&PostRASchedulerID);
   disablePass(&FuncletLayoutID);
@@ -194,6 +229,11 @@ void WebAssemblyPassConfig::addPreEmitPass() {
   // colored, and numbered with the rest of the registers.
   addPass(createWebAssemblyReplacePhysRegs());
 
+  // Rewrite pseudo call_indirect instructions as real instructions.
+  // This needs to run before register stackification, because we change the
+  // order of the arguments.
+  addPass(createWebAssemblyCallIndirectFixup());
+
   if (getOptLevel() != CodeGenOpt::None) {
     // LiveIntervals isn't commonly run this late. Re-establish preconditions.
     addPass(createWebAssemblyPrepareForLiveIntervals());
@@ -204,7 +244,7 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     // Prepare store instructions for register stackifying.
     addPass(createWebAssemblyStoreResults());
 
-    // Mark registers as representing wasm's expression stack. This is a key
+    // Mark registers as representing wasm's value stack. This is a key
     // code-compression technique in WebAssembly. We run this pass (and
     // StoreResults above) very late, so that it sees as much code as possible,
     // including code emitted by PEI and expanded by late tail duplication.
@@ -216,6 +256,9 @@ void WebAssemblyPassConfig::addPreEmitPass() {
     addPass(createWebAssemblyRegColoring());
   }
 
+  // Insert explicit get_local and set_local operators.
+  addPass(createWebAssemblyExplicitLocals());
+
   // Eliminate multiple-entry loops.
   addPass(createWebAssemblyFixIrreducibleControlFlow());
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index fe99e96eb3b8..2a2e3941f82d 100644
--- a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -42,13 +42,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  WebAssemblyTTIImpl(const WebAssemblyTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  WebAssemblyTTIImpl(WebAssemblyTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
 
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
new file mode 100644
index 000000000000..a0049c147d2c
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.cpp
@@ -0,0 +1,71 @@
+//===-- WebAssemblyUtilities.cpp - WebAssembly Utility Functions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements several utility functions for WebAssembly.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyUtilities.h"
+#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+using namespace llvm;
+
+bool WebAssembly::isArgument(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case WebAssembly::ARGUMENT_I32:
+  case WebAssembly::ARGUMENT_I64:
+  case WebAssembly::ARGUMENT_F32:
+  case WebAssembly::ARGUMENT_F64:
+  case WebAssembly::ARGUMENT_v16i8:
+  case WebAssembly::ARGUMENT_v8i16:
+  case WebAssembly::ARGUMENT_v4i32:
+  case WebAssembly::ARGUMENT_v4f32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool WebAssembly::isCopy(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case WebAssembly::COPY_I32:
+  case WebAssembly::COPY_I64:
+  case WebAssembly::COPY_F32:
+  case WebAssembly::COPY_F64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool WebAssembly::isTee(const MachineInstr &MI) {
+  switch (MI.getOpcode()) {
+  case WebAssembly::TEE_I32:
+  case WebAssembly::TEE_I64:
+  case WebAssembly::TEE_F32:
+  case WebAssembly::TEE_F64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Test whether MI is a child of some other node in an expression tree.
+bool WebAssembly::isChild(const MachineInstr &MI,
+                          const WebAssemblyFunctionInfo &MFI) {
+  if (MI.getNumOperands() == 0)
+    return false;
+  const MachineOperand &MO = MI.getOperand(0);
+  if (!MO.isReg() || MO.isImplicit() || !MO.isDef())
+    return false;
+  unsigned Reg = MO.getReg();
+  return TargetRegisterInfo::isVirtualRegister(Reg) &&
+         MFI.isVRegStackified(Reg);
+}
diff --git a/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
new file mode 100644
index 000000000000..eb114403d14e
--- /dev/null
+++ b/contrib/llvm/lib/Target/WebAssembly/WebAssemblyUtilities.h
@@ -0,0 +1,34 @@
+//===-- WebAssemblyUtilities - WebAssembly Utility Functions ---*- C++ -*-====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file contains the declaration of the WebAssembly-specific
+/// utility functions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYUTILITIES_H
+
+namespace llvm {
+
+class MachineInstr;
+class WebAssemblyFunctionInfo;
+
+namespace WebAssembly {
+
+bool isArgument(const MachineInstr &MI);
+bool isCopy(const MachineInstr &MI);
+bool isTee(const MachineInstr &MI);
+bool isChild(const MachineInstr &MI, const WebAssemblyFunctionInfo &MFI);
+
+} // end namespace WebAssembly
+} // end namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
index f07400021dc1..8dd5e8a03e2e 100644
--- a/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/contrib/llvm/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -66,4 +66,3 @@ pr41935.c
 920728-1.c
 pr28865.c
 widechar-2.c
-pr41463.c
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 4e0ad8bfe1f1..e692118f47fd 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -59,6 +59,7 @@ class X86AsmParser : public MCTargetAsmParser {
   const MCInstrInfo &MII;
   ParseInstructionInfo *InstInfo;
   std::unique_ptr<X86AsmInstrumentation> Instrumentation;
+  bool Code16GCC;
 
 private:
   SMLoc consumeToken() {
@@ -68,6 +69,19 @@ private:
     return Result;
   }
 
+  unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
+                            uint64_t &ErrorInfo, bool matchingInlineAsm,
+                            unsigned VariantID = 0) {
+    // In Code16GCC mode, match as 32-bit.
+    if (Code16GCC)
+      SwitchMode(X86::Mode32Bit);
+    unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
+                                       matchingInlineAsm, VariantID);
+    if (Code16GCC)
+      SwitchMode(X86::Mode16Bit);
+    return rv;
+  }
+
   enum InfixCalculatorTok {
     IC_OR = 0,
     IC_XOR,
@@ -659,20 +673,15 @@ private:
     }
   };
 
-  bool Error(SMLoc L, const Twine &Msg,
-             ArrayRef<SMRange> Ranges = None,
+  bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
              bool MatchingInlineAsm = false) {
     MCAsmParser &Parser = getParser();
-    if (MatchingInlineAsm) return true;
-    return Parser.Error(L, Msg, Ranges);
-  }
-
-  bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
-          ArrayRef<SMRange> Ranges = None,
-          bool MatchingInlineAsm = false) {
-    MCAsmParser &Parser = getParser();
-    Parser.eatToEndOfStatement();
-    return Error(L, Msg, Ranges, MatchingInlineAsm);
+    if (MatchingInlineAsm) {
+      if (!getLexer().isAtStartOfStatement())
+        Parser.eatToEndOfStatement();
+      return false;
+    }
+    return Parser.Error(L, Msg, Range);
   }
 
   std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
@@ -698,14 +707,11 @@ private:
   std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
   std::unique_ptr<X86Operand>
   ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
-  std::unique_ptr<X86Operand>
-  ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
   std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
   bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
-  std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
-                                                       SMLoc Start,
-                                                       int64_t ImmDisp,
-                                                       unsigned Size);
+  std::unique_ptr<X86Operand>
+  ParseIntelBracExpression(unsigned SegReg, SMLoc Start, int64_t ImmDisp,
+                           bool isSymbol, unsigned Size);
   bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
                             InlineAsmIdentifierInfo &Info,
                             bool IsUnevaluatedOperand, SMLoc &End);
@@ -716,7 +722,8 @@ private:
   CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
                         unsigned IndexReg, unsigned Scale, SMLoc Start,
                         SMLoc End, unsigned Size, StringRef Identifier,
-                        InlineAsmIdentifierInfo &Info);
+                        InlineAsmIdentifierInfo &Info,
+                        bool AllowBetterSizeMatch = false);
 
   bool parseDirectiveEven(SMLoc L);
   bool ParseDirectiveWord(unsigned Size, SMLoc L);
@@ -753,10 +760,17 @@ private:
 
   /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
   /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
-  /// \return \c true if no parsing errors occurred, \c false otherwise.
+  /// return false if no parsing errors occurred, true otherwise.
   bool HandleAVX512Operand(OperandVector &Operands,
                            const MCParsedAsmOperand &Op);
 
+  bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
+
+  /// MS-compatibility:
+  /// Obtain an appropriate size qualifier, when facing its absence,
+  /// upon AVX512 vector/broadcast memory operand
+  unsigned AdjustAVX512Mem(unsigned Size, X86Operand* UnsizedMemOpNext);
+
   bool is64BitMode() const {
     // FIXME: Can tablegen auto-generate this?
     return getSTI().getFeatureBits()[X86::Mode64Bit];
@@ -802,7 +816,8 @@ private:
 public:
   X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
                const MCInstrInfo &mii, const MCTargetOptions &Options)
-    : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) {
+      : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr),
+        Code16GCC(false) {
 
     // Initialize the set of available features.
     setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
@@ -833,6 +848,11 @@ static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
   // If we have both a base register and an index register make sure they are
   // both 64-bit or 32-bit registers.
   // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
+
+  if ((BaseReg == X86::RIP && IndexReg != 0) || (IndexReg == X86::RIP)) {
+    ErrMsg = "invalid base+index expression";
+    return true;
+  }
   if (BaseReg != 0 && IndexReg != 0) {
     if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
         (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
@@ -907,8 +927,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo,
     if (RegNo == X86::RIZ ||
         X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
         X86II::isX86_64NonExtLowByteReg(RegNo) ||
-        X86II::isX86_64ExtendedReg(RegNo) ||
-        X86II::is32ExtendedReg(RegNo))
+        X86II::isX86_64ExtendedReg(RegNo))
       return Error(StartLoc, "register %"
                    + Tok.getString() + " is only available in 64-bit mode",
                    SMRange(StartLoc, EndLoc));
@@ -992,20 +1011,20 @@ void X86AsmParser::SetFrameRegister(unsigned RegNo) {
 }
 
 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
-  unsigned basereg =
-    is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
+  bool Parse32 = is32BitMode() || Code16GCC;
+  unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
-                               /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
+                               /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
                                Loc, Loc, 0);
 }
 
 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
-  unsigned basereg =
-    is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
+  bool Parse32 = is32BitMode() || Code16GCC;
+  unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
   const MCExpr *Disp = MCConstantExpr::create(0, getContext());
   return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
-                               /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
+                               /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
                                Loc, Loc, 0);
 }
 
@@ -1159,7 +1178,7 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) {
 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
     unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
     unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
-    InlineAsmIdentifierInfo &Info) {
+    InlineAsmIdentifierInfo &Info, bool AllowBetterSizeMatch) {
   // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
   // some other label reference.
   if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
@@ -1188,6 +1207,13 @@ std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
       if (Size)
         InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
                                             /*Len=*/0, Size);
+    if (AllowBetterSizeMatch)
+      // Handle cases where size qualifier is absent, upon an indirect symbol
+      // reference - e.g. "vaddps zmm1, zmm2, [var]"
+      // set Size to zero to allow matching mechansim to try and find a better
+      // size qualifier than our initial guess, based on available variants of
+      // the given instruction
+      Size = 0;
     }
   }
 
@@ -1271,7 +1297,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
 
     // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
     // identifier.  Don't try an parse it as a register.
-    if (Tok.getString().startswith("."))
+    if (PrevTK != AsmToken::Error && Tok.getString().startswith("."))
       break;
 
     // If we're parsing an immediate expression, we don't expect a '['.
@@ -1386,7 +1412,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
 
 std::unique_ptr<X86Operand>
 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
-                                       int64_t ImmDisp, unsigned Size) {
+                                       int64_t ImmDisp, bool isSymbol,
+                                       unsigned Size) {
   MCAsmParser &Parser = getParser();
   const AsmToken &Tok = Parser.getTok();
   SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
@@ -1436,6 +1463,21 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
     Disp = NewDisp;
   }
 
+  if (isSymbol) {
+    if (SM.getSym()) {
+      Error(Start, "cannot use more than one symbol in memory operand");
+      return nullptr;
+    }
+    if (SM.getBaseReg()) {
+      Error(Start, "cannot use base register with variable reference");
+      return nullptr;
+    }
+    if (SM.getIndexReg()) {
+      Error(Start, "cannot use index register with variable reference");
+      return nullptr;
+    }
+  }
+
   int BaseReg = SM.getBaseReg();
   int IndexReg = SM.getIndexReg();
   int Scale = SM.getScale();
@@ -1458,7 +1500,8 @@ X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
 
   InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
   return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
-                               End, Size, SM.getSymName(), Info);
+                               End, Size, SM.getSymName(), Info,
+                               isParsingInlineAsm());
 }
 
 // Inline assembly may use variable names with namespace alias qualifiers.
@@ -1541,7 +1584,7 @@ X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
   }
 
   if (getLexer().is(AsmToken::LBrac))
-    return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
+    return ParseIntelBracExpression(SegReg, Start, ImmDisp, false, Size);
 
   const MCExpr *Val;
   SMLoc End;
@@ -1598,66 +1641,6 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
   }
   return ErrorOperand(Tok.getLoc(), "unknown token in expression");
 }
-/// ParseIntelMemOperand - Parse intel style memory operand.
-std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
-                                                               SMLoc Start,
-                                                               unsigned Size) {
-  MCAsmParser &Parser = getParser();
-  const AsmToken &Tok = Parser.getTok();
-  SMLoc End;
-
-  // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
-  if (getLexer().is(AsmToken::LBrac))
-    return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
-  assert(ImmDisp == 0);
-
-  const MCExpr *Val;
-  if (!isParsingInlineAsm()) {
-    if (getParser().parsePrimaryExpr(Val, End))
-      return ErrorOperand(Tok.getLoc(), "unknown token in expression");
-
-    return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
-  }
-
-  InlineAsmIdentifierInfo Info;
-  StringRef Identifier = Tok.getString();
-  if (ParseIntelIdentifier(Val, Identifier, Info,
-                           /*Unevaluated=*/false, End))
-    return nullptr;
-
-  if (!getLexer().is(AsmToken::LBrac))
-    return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
-                                 /*Scale=*/1, Start, End, Size, Identifier, Info);
-
-  Parser.Lex(); // Eat '['
-
-  // Parse Identifier [ ImmDisp ]
-  IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
-                           /*AddImmPrefix=*/false);
-  if (ParseIntelExpression(SM, End))
-    return nullptr;
-
-  if (SM.getSym()) {
-    Error(Start, "cannot use more than one symbol in memory operand");
-    return nullptr;
-  }
-  if (SM.getBaseReg()) {
-    Error(Start, "cannot use base register with variable reference");
-    return nullptr;
-  }
-  if (SM.getIndexReg()) {
-    Error(Start, "cannot use index register with variable reference");
-    return nullptr;
-  }
-
-  const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
-  // BaseReg is non-zero to avoid assertions.  In the context of inline asm,
-  // we're pointing to a local variable in memory, so the base register is
-  // really the frame or stack pointer.
-  return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
-                               /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
-                               Start, End, Size, Identifier, Info.OpDecl);
-}
 
 /// Parse the '.' operator.
 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
@@ -1725,8 +1708,9 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
   // The offset operator will have an 'r' constraint, thus we need to create
   // register operand to ensure proper matching.  Just pick a GPR based on
   // the size of a pointer.
-  unsigned RegNo =
-      is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
+  bool Parse32 = is32BitMode() || Code16GCC;
+  unsigned RegNo = is64BitMode() ? X86::RBX : (Parse32 ? X86::EBX : X86::BX);
+
   return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
                                OffsetOfLoc, Identifier, Info.OpDecl);
 }
@@ -1804,49 +1788,8 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
     Parser.Lex(); // Eat ptr.
     PtrInOperand = true;
   }
-  Start = Tok.getLoc();
 
-  // Immediate.
-  if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
-      getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
-    AsmToken StartTok = Tok;
-    IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
-                             /*AddImmPrefix=*/false);
-    if (ParseIntelExpression(SM, End))
-      return nullptr;
-
-    int64_t Imm = SM.getImm();
-    if (isParsingInlineAsm()) {
-      unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
-      if (StartTok.getString().size() == Len)
-        // Just add a prefix if this wasn't a complex immediate expression.
-        InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
-      else
-        // Otherwise, rewrite the complex expression as a single immediate.
-        InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
-    }
-
-    if (getLexer().isNot(AsmToken::LBrac)) {
-      // If a directional label (ie. 1f or 2b) was parsed above from
-      // ParseIntelExpression() then SM.getSym() was set to a pointer to
-      // to the MCExpr with the directional local symbol and this is a
-      // memory operand not an immediate operand.
-      if (SM.getSym())
-        return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
-                                     Size);
-
-      const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
-      return X86Operand::CreateImm(ImmExpr, Start, End);
-    }
-
-    // Only positive immediates are valid.
-    if (Imm < 0)
-      return ErrorOperand(Start, "expected a positive immediate displacement "
-                          "before bracketed expr.");
-
-    // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
-    return ParseIntelMemOperand(Imm, Start, Size);
-  }
+  Start = Tok.getLoc();
 
   // rounding mode token
   if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
@@ -1855,24 +1798,78 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
 
   // Register.
   unsigned RegNo = 0;
-  if (!ParseRegister(RegNo, Start, End)) {
+  if (getLexer().is(AsmToken::Identifier) &&
+      !ParseRegister(RegNo, Start, End)) {
     // If this is a segment register followed by a ':', then this is the start
     // of a segment override, otherwise this is a normal register reference.
-    // In case it is a normal register and there is ptr in the operand this 
+    // In case it is a normal register and there is ptr in the operand this
     // is an error
-    if (getLexer().isNot(AsmToken::Colon)){
-      if (PtrInOperand){
+    if (RegNo == X86::RIP)
+      return ErrorOperand(Start, "rip can only be used as a base register");
+    if (getLexer().isNot(AsmToken::Colon)) {
+      if (PtrInOperand) {
         return ErrorOperand(Start, "expected memory operand after "
                                    "'ptr', found register operand instead");
       }
       return X86Operand::CreateReg(RegNo, Start, End);
     }
-    
     return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
   }
 
-  // Memory operand.
-  return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
+  // Immediates and Memory
+
+  // Parse [ BaseReg + Scale*IndexReg + Disp ].
+  if (getLexer().is(AsmToken::LBrac))
+    return ParseIntelBracExpression(/*SegReg=*/0, Start, /*ImmDisp=*/0, false,
+                                    Size);
+
+  AsmToken StartTok = Tok;
+  IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
+                           /*AddImmPrefix=*/false);
+  if (ParseIntelExpression(SM, End))
+    return nullptr;
+
+  bool isSymbol = SM.getSym() && SM.getSym()->getKind() != MCExpr::Constant;
+  int64_t Imm = SM.getImm();
+  if (SM.getSym() && SM.getSym()->getKind() == MCExpr::Constant)
+    SM.getSym()->evaluateAsAbsolute(Imm);
+
+  if (StartTok.isNot(AsmToken::Identifier) &&
+      StartTok.isNot(AsmToken::String) && isParsingInlineAsm()) {
+    unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
+    if (StartTok.getString().size() == Len)
+      // Just add a prefix if this wasn't a complex immediate expression.
+      InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
+    else
+      // Otherwise, rewrite the complex expression as a single immediate.
+      InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
+  }
+
+  if (getLexer().isNot(AsmToken::LBrac)) {
+    // If a directional label (ie. 1f or 2b) was parsed above from
+    // ParseIntelExpression() then SM.getSym() was set to a pointer to
+    // to the MCExpr with the directional local symbol and this is a
+    // memory operand not an immediate operand.
+    if (isSymbol) {
+      if (isParsingInlineAsm())
+        return CreateMemForInlineAsm(/*SegReg=*/0, SM.getSym(), /*BaseReg=*/0,
+                                     /*IndexReg=*/0,
+                                     /*Scale=*/1, Start, End, Size,
+                                     SM.getSymName(), SM.getIdentifierInfo());
+      return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
+                                   Size);
+    }
+
+    const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
+    return X86Operand::CreateImm(ImmExpr, Start, End);
+  }
+
+  // Only positive immediates are valid.
+  if (Imm < 0)
+    return ErrorOperand(Start, "expected a positive immediate displacement "
+                               "before bracketed expr.");
+
+  return ParseIntelBracExpression(/*SegReg=*/0, Start, Imm, isSymbol, Size);
 }
 
 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
@@ -1891,6 +1888,11 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
             SMRange(Start, End));
       return nullptr;
     }
+    if (RegNo == X86::RIP) {
+      Error(Start, "%rip can only be used as a base register",
+            SMRange(Start, End));
+      return nullptr;
+    }
 
     // If this is a segment register followed by a ':', then this is the start
     // of a memory reference, otherwise this is a normal register reference.
@@ -1916,11 +1918,33 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
     SMLoc Start = Parser.getTok().getLoc(), End;
     if (getSTI().getFeatureBits()[X86::FeatureAVX512])
       return ParseRoundingModeOp(Start, End);
-    return ErrorOperand(Start, "unknown token in expression");
+    return ErrorOperand(Start, "Unexpected '{' in expression");
   }
   }
 }
 
+// true on failure, false otherwise
+// If no {z} mark was found - Parser doesn't advance
+bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
+                          const SMLoc &StartLoc) {
+  MCAsmParser &Parser = getParser();
+  // Assuming we are just pass the '{' mark, quering the next token
+  // Searched for {z}, but none was found. Return false, as no parsing error was
+  // encountered
+  if (!(getLexer().is(AsmToken::Identifier) &&
+        (getLexer().getTok().getIdentifier() == "z")))
+    return false;
+  Parser.Lex(); // Eat z
+  // Query and eat the '}' mark
+  if (!getLexer().is(AsmToken::RCurly))
+    return Error(getLexer().getLoc(), "Expected } at this point");
+  Parser.Lex(); // Eat '}'
+  // Assign Z with the {z} mark opernad
+  Z = X86Operand::CreateToken("{z}", StartLoc);
+  return false;
+}
+
+// true on failure, false otherwise
 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
                                        const MCParsedAsmOperand &Op) {
   MCAsmParser &Parser = getParser();
@@ -1932,13 +1956,11 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
       if(getLexer().is(AsmToken::Integer)) {
         // Parse memory broadcasting ({1to<NUM>}).
         if (getLexer().getTok().getIntVal() != 1)
-          return !ErrorAndEatStatement(getLexer().getLoc(),
-                                       "Expected 1to<NUM> at this point");
+          return TokError("Expected 1to<NUM> at this point");
         Parser.Lex();  // Eat "1" of 1to8
         if (!getLexer().is(AsmToken::Identifier) ||
             !getLexer().getTok().getIdentifier().startswith("to"))
-          return !ErrorAndEatStatement(getLexer().getLoc(),
-                                       "Expected 1to<NUM> at this point");
+          return TokError("Expected 1to<NUM> at this point");
         // Recognize only reasonable suffixes.
         const char *BroadcastPrimitive =
           StringSwitch<const char*>(getLexer().getTok().getIdentifier())
@@ -1948,46 +1970,57 @@ bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
             .Case("to16", "{1to16}")
             .Default(nullptr);
         if (!BroadcastPrimitive)
-          return !ErrorAndEatStatement(getLexer().getLoc(),
-                                       "Invalid memory broadcast primitive.");
+          return TokError("Invalid memory broadcast primitive.");
         Parser.Lex();  // Eat "toN" of 1toN
         if (!getLexer().is(AsmToken::RCurly))
-          return !ErrorAndEatStatement(getLexer().getLoc(),
-                                       "Expected } at this point");
+          return TokError("Expected } at this point");
         Parser.Lex();  // Eat "}"
         Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
                                                    consumedToken));
         // No AVX512 specific primitives can pass
         // after memory broadcasting, so return.
-        return true;
+        return false;
       } else {
-        // Parse mask register {%k1}
-        Operands.push_back(X86Operand::CreateToken("{", consumedToken));
-        if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
-          Operands.push_back(std::move(Op));
-          if (!getLexer().is(AsmToken::RCurly))
-            return !ErrorAndEatStatement(getLexer().getLoc(),
-                                         "Expected } at this point");
-          Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
-
-          // Parse "zeroing non-masked" semantic {z}
-          if (getLexer().is(AsmToken::LCurly)) {
-            Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
-            if (!getLexer().is(AsmToken::Identifier) ||
-                getLexer().getTok().getIdentifier() != "z")
-              return !ErrorAndEatStatement(getLexer().getLoc(),
-                                           "Expected z at this point");
-            Parser.Lex();  // Eat the z
+        // Parse either {k}{z}, {z}{k}, {k} or {z}
+        // last one have no meaning, but GCC accepts it
+        // Currently, we're just pass a '{' mark
+        std::unique_ptr<X86Operand> Z;
+        if (ParseZ(Z, consumedToken))
+          return true;
+        // Reaching here means that parsing of the allegadly '{z}' mark yielded
+        // no errors.
+        // Query for the need of further parsing for a {%k<NUM>} mark
+        if (!Z || getLexer().is(AsmToken::LCurly)) {
+          const SMLoc StartLoc = Z ? consumeToken() : consumedToken;
+          // Parse an op-mask register mark ({%k<NUM>}), which is now to be
+          // expected
+          if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
             if (!getLexer().is(AsmToken::RCurly))
-              return !ErrorAndEatStatement(getLexer().getLoc(),
-                                           "Expected } at this point");
-            Parser.Lex();  // Eat the }
+              return Error(getLexer().getLoc(), "Expected } at this point");
+            Operands.push_back(X86Operand::CreateToken("{", StartLoc));
+            Operands.push_back(std::move(Op));
+            Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
+          } else
+            return Error(getLexer().getLoc(),
+                          "Expected an op-mask register at this point");
+          // {%k<NUM>} mark is found, inquire for {z}
+          if (getLexer().is(AsmToken::LCurly) && !Z) {
+            // Have we've found a parsing error, or found no (expected) {z} mark
+            // - report an error
+            if (ParseZ(Z, consumeToken()) || !Z)
+              return true;
+
           }
+          // '{z}' on its own is meaningless, hence should be ignored.
+          // on the contrary - have it been accompanied by a K register,
+          // allow it.
+          if (Z)
+            Operands.push_back(std::move(Z));
         }
       }
     }
   }
-  return true;
+  return false;
 }
 
 /// ParseMemOperand: segment: disp(basereg, indexreg, scale).  The '%ds:' prefix
@@ -2077,7 +2110,16 @@ std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
     // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
     if (getLexer().is(AsmToken::Percent)) {
       SMLoc L;
-      if (ParseRegister(IndexReg, L, L)) return nullptr;
+      if (ParseRegister(IndexReg, L, L))
+        return nullptr;
+      if (BaseReg == X86::RIP) {
+        Error(IndexLoc, "%rip as base register can not have an index register");
+        return nullptr;
+      }
+      if (IndexReg == X86::RIP) {
+        Error(IndexLoc, "%rip is not allowed as an index register");
+        return nullptr;
+      }
 
       if (getLexer().isNot(AsmToken::RParen)) {
         // Parse the scale amount:
@@ -2169,6 +2211,20 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   InstInfo = &Info;
   StringRef PatchedName = Name;
 
+  if (Name == "jmp" && isParsingIntelSyntax() && isParsingInlineAsm()) {
+    StringRef NextTok = Parser.getTok().getString();
+    if (NextTok == "short") {
+      SMLoc NameEndLoc =
+          NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
+      // Eat the short keyword
+      Parser.Lex();
+      // MS ignores the short keyword, it determines the jmp type based
+      // on the distance of the label
+      InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
+                                          NextTok.size() + 1);
+    }
+  }
+
   // FIXME: Hack to recognize setneb as setne.
   if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
       PatchedName != "setb" && PatchedName != "setnb")
@@ -2321,10 +2377,9 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     while(1) {
       if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
         Operands.push_back(std::move(Op));
-        if (!HandleAVX512Operand(Operands, *Operands.back()))
+        if (HandleAVX512Operand(Operands, *Operands.back()))
           return true;
       } else {
-         Parser.eatToEndOfStatement();
          return true;
       }
       // check for comma and eat it
@@ -2340,8 +2395,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
         isParsingIntelSyntax() && isParsingInlineAsm() &&
         (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
     if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
-      return ErrorAndEatStatement(getLexer().getLoc(),
-                                  "unexpected token in argument list");
+      return TokError("unexpected token in argument list");
    }
 
   // Consume the EndOfStatement or the prefix separator Slash
@@ -2367,6 +2421,30 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
     static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
   }
 
+  // Moving a 32 or 16 bit value into a segment register has the same
+  // behavior. Modify such instructions to always take shorter form.
+  if ((Name == "mov" || Name == "movw" || Name == "movl") &&
+      (Operands.size() == 3)) {
+    X86Operand &Op1 = (X86Operand &)*Operands[1];
+    X86Operand &Op2 = (X86Operand &)*Operands[2];
+    SMLoc Loc = Op1.getEndLoc();
+    if (Op1.isReg() && Op2.isReg() &&
+        X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
+            Op2.getReg()) &&
+        (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
+         X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
+      // Change instruction name to match new instruction.
+      if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
+        Name = is16BitMode() ? "movw" : "movl";
+        Operands[0] = X86Operand::CreateToken(Name, NameLoc);
+      }
+      // Select the correct equivalent 16-/32-bit source register.
+      unsigned Reg =
+          getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
+      Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
+    }
+  }
+
   // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
   // "outb %al, %dx".  Out doesn't take a memory form, but this is a widely
   // documented form in various unofficial manuals, so a lot of code uses it.
@@ -2472,7 +2550,7 @@ bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
         (Name == "smov" || Name == "smovb" || Name == "smovw" ||
          Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
       (Operands.size() == 1 || Operands.size() == 3)) {
-    if (Name == "movsd" && Operands.size() == 1)
+    if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
       Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
     AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
                               DefaultMemDIOperand(NameLoc));
@@ -2583,7 +2661,6 @@ void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
                                        bool MatchingInlineAsm) {
   assert(ErrorInfo && "Unknown missing feature!");
-  ArrayRef<SMRange> EmptyRanges = None;
   SmallString<126> Msg;
   raw_svector_ostream OS(Msg);
   OS << "instruction requires:";
@@ -2593,7 +2670,7 @@ bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
       OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
     Mask <<= 1;
   }
-  return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
+  return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
 }
 
 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
@@ -2604,7 +2681,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   assert(!Operands.empty() && "Unexpect empty operand list!");
   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
-  ArrayRef<SMRange> EmptyRanges = None;
+  SMRange EmptyRange = None;
 
   // First, handle aliases that expand to multiple instructions.
   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
@@ -2613,9 +2690,8 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   MCInst Inst;
 
   // First, try a direct match.
-  switch (MatchInstructionImpl(Operands, Inst,
-                               ErrorInfo, MatchingInlineAsm,
-                               isParsingIntelSyntax())) {
+  switch (MatchInstruction(Operands, Inst, ErrorInfo, MatchingInlineAsm,
+                           isParsingIntelSyntax())) {
   default: llvm_unreachable("Unexpected match result!");
   case Match_Success:
     // Some instructions need post-processing to, for example, tweak which
@@ -2666,8 +2742,8 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
     Tmp.back() = Suffixes[I];
-    Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
-                                  MatchingInlineAsm, isParsingIntelSyntax());
+    Match[I] = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
+                                MatchingInlineAsm, isParsingIntelSyntax());
     // If this returned as a missing feature failure, remember that.
     if (Match[I] == Match_MissingFeature)
       ErrorInfoMissingFeature = ErrorInfoIgnore;
@@ -2711,7 +2787,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
       OS << "'" << Base << MatchChars[i] << "'";
     }
     OS << ")";
-    Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
+    Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
     return true;
   }
 
@@ -2721,17 +2797,15 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   // mnemonic was invalid.
   if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
     if (!WasOriginallyInvalidOperand) {
-      ArrayRef<SMRange> Ranges =
-          MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
       return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
-                   Ranges, MatchingInlineAsm);
+                   Op.getLocRange(), MatchingInlineAsm);
     }
 
     // Recover location info for the operand if we know which was the problem.
     if (ErrorInfo != ~0ULL) {
       if (ErrorInfo >= Operands.size())
-        return Error(IDLoc, "too few operands for instruction",
-                     EmptyRanges, MatchingInlineAsm);
+        return Error(IDLoc, "too few operands for instruction", EmptyRange,
+                     MatchingInlineAsm);
 
       X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
       if (Operand.getStartLoc().isValid()) {
@@ -2741,7 +2815,7 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
       }
     }
 
-    return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+    return Error(IDLoc, "invalid operand for instruction", EmptyRange,
                  MatchingInlineAsm);
   }
 
@@ -2758,16 +2832,33 @@ bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
   // operand failure.
   if (std::count(std::begin(Match), std::end(Match),
                  Match_InvalidOperand) == 1) {
-    return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+    return Error(IDLoc, "invalid operand for instruction", EmptyRange,
                  MatchingInlineAsm);
   }
 
   // If all of these were an outright failure, report it in a useless way.
   Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
-        EmptyRanges, MatchingInlineAsm);
+        EmptyRange, MatchingInlineAsm);
   return true;
 }
 
+unsigned X86AsmParser::AdjustAVX512Mem(unsigned Size,
+    X86Operand* UnsizedMemOpNext) {
+  // Check for the existence of an AVX512 platform
+  if (!getSTI().getFeatureBits()[X86::FeatureAVX512])
+    return 0;
+  // Allow adjusting upon a (x|y|z)mm
+  if (Size == 512 || Size == 256 || Size == 128)
+    return Size;
+  // This is an allegadly broadcasting mem op adjustment,
+  // allow some more inquiring to validate it
+  if (Size == 64 || Size == 32)
+    return UnsizedMemOpNext && UnsizedMemOpNext->isToken() &&
+      UnsizedMemOpNext->getToken().substr(0, 4).equals("{1to") ? Size : 0;
+  // Do not allow any other type of adjustments
+  return 0;
+}
+
 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
                                                 OperandVector &Operands,
                                                 MCStreamer &Out,
@@ -2777,7 +2868,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
   assert(Op.isToken() && "Leading operand should always be a mnemonic!");
   StringRef Mnemonic = Op.getToken();
-  ArrayRef<SMRange> EmptyRanges = None;
+  SMRange EmptyRange = None;
+  StringRef Base = Op.getToken();
 
   // First, handle aliases that expand to multiple instructions.
   MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
@@ -2786,8 +2878,17 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   // Find one unsized memory operand, if present.
   X86Operand *UnsizedMemOp = nullptr;
+  // If unsized memory operand was found - obtain following operand.
+  // For use in AdjustAVX512Mem
+  X86Operand *UnsizedMemOpNext = nullptr;
   for (const auto &Op : Operands) {
     X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
+    if (UnsizedMemOp) {
+      UnsizedMemOpNext = X86Op;
+      // Have we found an unqualified memory operand,
+      // break. IA allows only one memory operand.
+      break;
+    }
     if (X86Op->isMemUnsized())
       UnsizedMemOp = X86Op;
   }
@@ -2804,26 +2905,58 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
     }
   }
 
+  SmallVector<unsigned, 8> Match;
+  uint64_t ErrorInfoMissingFeature = 0;
+
+  // If unsized push has immediate operand we should default the default pointer
+  // size for the size.
+  if (Mnemonic == "push" && Operands.size() == 2) {
+    auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
+    if (X86Op->isImm()) {
+      // If it's not a constant fall through and let remainder take care of it.
+      const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
+      unsigned Size = getPointerWidth();
+      if (CE &&
+          (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
+        SmallString<16> Tmp;
+        Tmp += Base;
+        Tmp += (is64BitMode())
+                   ? "q"
+                   : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
+        Op.setTokenValue(Tmp);
+        // Do match in ATT mode to allow explicit suffix usage.
+        Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
+                                         MatchingInlineAsm,
+                                         false /*isParsingIntelSyntax()*/));
+        Op.setTokenValue(Base);
+      }
+    }
+  }
+
   // If an unsized memory operand is present, try to match with each memory
   // operand size.  In Intel assembly, the size is not part of the instruction
   // mnemonic.
-  SmallVector<unsigned, 8> Match;
-  uint64_t ErrorInfoMissingFeature = 0;
+  unsigned MatchedSize = 0;
   if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
     static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
     for (unsigned Size : MopSizes) {
       UnsizedMemOp->Mem.Size = Size;
       uint64_t ErrorInfoIgnore;
       unsigned LastOpcode = Inst.getOpcode();
-      unsigned M =
-          MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
-                               MatchingInlineAsm, isParsingIntelSyntax());
+      unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
+                                    MatchingInlineAsm, isParsingIntelSyntax());
       if (Match.empty() || LastOpcode != Inst.getOpcode())
         Match.push_back(M);
 
       // If this returned as a missing feature failure, remember that.
       if (Match.back() == Match_MissingFeature)
         ErrorInfoMissingFeature = ErrorInfoIgnore;
+      if (M == Match_Success)
+        // MS-compatability:
+        // Adjust AVX512 vector/broadcast memory operand,
+        // when facing the absence of a size qualifier.
+        // Match GCC behavior on respective cases.
+        MatchedSize = AdjustAVX512Mem(Size, UnsizedMemOpNext);
     }
 
     // Restore the size of the unsized memory operand if we modified it.
@@ -2835,9 +2968,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   // operation.  There shouldn't be any ambiguity in our mnemonic table, so try
   // matching with the unsized operand.
   if (Match.empty()) {
-    Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
-                                         MatchingInlineAsm,
-                                         isParsingIntelSyntax()));
+    Match.push_back(MatchInstruction(
+        Operands, Inst, ErrorInfo, MatchingInlineAsm, isParsingIntelSyntax()));
     // If this returned as a missing feature failure, remember that.
     if (Match.back() == Match_MissingFeature)
       ErrorInfoMissingFeature = ErrorInfo;
@@ -2849,10 +2981,8 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
 
   // If it's a bad mnemonic, all results will be the same.
   if (Match.back() == Match_MnemonicFail) {
-    ArrayRef<SMRange> Ranges =
-        MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
     return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
-                 Ranges, MatchingInlineAsm);
+                 Op.getLocRange(), MatchingInlineAsm);
   }
 
   // If exactly one matched, then we treat that as a successful match (and the
@@ -2861,6 +2991,14 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   unsigned NumSuccessfulMatches =
       std::count(std::begin(Match), std::end(Match), Match_Success);
   if (NumSuccessfulMatches == 1) {
+    if (MatchedSize && isParsingInlineAsm() && isParsingIntelSyntax())
+      // MS compatibility -
+      // Fix the rewrite according to the matched memory size
+      // MS inline assembly only
+      for (AsmRewrite &AR : *InstInfo->AsmRewrites)
+        if ((AR.Loc.getPointer() == UnsizedMemOp->StartLoc.getPointer()) &&
+            (AR.Kind == AOK_SizeDirective))
+          AR.Val = MatchedSize;
     // Some instructions need post-processing to, for example, tweak which
     // encoding is selected. Loop on it while changes happen so the individual
     // transformations can chain off each other.
@@ -2875,11 +3013,9 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   } else if (NumSuccessfulMatches > 1) {
     assert(UnsizedMemOp &&
            "multiple matches only possible with unsized memory operands");
-    ArrayRef<SMRange> Ranges =
-        MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
     return Error(UnsizedMemOp->getStartLoc(),
                  "ambiguous operand size for instruction '" + Mnemonic + "\'",
-                 Ranges, MatchingInlineAsm);
+                 UnsizedMemOp->getLocRange(), MatchingInlineAsm);
   }
 
   // If one instruction matched with a missing feature, report this as a
@@ -2895,12 +3031,12 @@ bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
   // operand failure.
   if (std::count(std::begin(Match), std::end(Match),
                  Match_InvalidOperand) == 1) {
-    return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
+    return Error(IDLoc, "invalid operand for instruction", EmptyRange,
                  MatchingInlineAsm);
   }
 
   // If all of these were an outright failure, report it in a useless way.
-  return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
+  return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
                MatchingInlineAsm);
 }
 
@@ -2945,14 +3081,14 @@ bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
 /// parseDirectiveEven
 ///  ::= .even
 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
-  const MCSection *Section = getStreamer().getCurrentSection().first;
   if (getLexer().isNot(AsmToken::EndOfStatement)) {
     TokError("unexpected token in directive");
     return false;  
   }
+  const MCSection *Section = getStreamer().getCurrentSectionOnly();
   if (!Section) {
     getStreamer().InitSections(false);
-    Section = getStreamer().getCurrentSection().first;
+    Section = getStreamer().getCurrentSectionOnly();
   }
   if (Section->UseCodeAlign())
     getStreamer().EmitCodeAlignment(2, 0);
@@ -3001,12 +3137,21 @@ bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
 ///  ::= .code16 | .code32 | .code64
 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
   MCAsmParser &Parser = getParser();
+  Code16GCC = false;
   if (IDVal == ".code16") {
     Parser.Lex();
     if (!is16BitMode()) {
       SwitchMode(X86::Mode16Bit);
       getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
     }
+  } else if (IDVal == ".code16gcc") {
+    // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
+    Parser.Lex();
+    Code16GCC = true;
+    if (!is16BitMode()) {
+      SwitchMode(X86::Mode16Bit);
+      getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
+    }
   } else if (IDVal == ".code32") {
     Parser.Lex();
     if (!is32BitMode()) {
@@ -3029,8 +3174,8 @@ bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmParser() {
-  RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
-  RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
+  RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
+  RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
 }
 
 #define GET_REGISTER_MATCHER
diff --git a/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h b/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h
index a04c2f5c84a5..9db1a8483bee 100644
--- a/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/contrib/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -192,8 +192,10 @@ struct X86Operand : public MCParsedAsmOperand {
 
   bool isImmUnsignedi8() const {
     if (!isImm()) return false;
+    // If this isn't a constant expr, just assume it fits and let relaxation
+    // handle it.
     const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(getImm());
-    if (!CE) return false;
+    if (!CE) return true;
     return isImmUnsignedi8Value(CE->getValue());
   }
 
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
index 008dead5d0a5..0871888bbfcd 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp
@@ -96,7 +96,7 @@ void llvm::X86Disassembler::Debug(const char *file, unsigned line,
   dbgs() << file << ":" << line << ": " << s;
 }
 
-const char *llvm::X86Disassembler::GetInstrName(unsigned Opcode,
+StringRef llvm::X86Disassembler::GetInstrName(unsigned Opcode,
                                                 const void *mii) {
   const MCInstrInfo *MII = static_cast<const MCInstrInfo *>(mii);
   return MII->getName(Opcode);
@@ -470,10 +470,20 @@ static void translateImmediate(MCInst &mcInst, uint64_t immediate,
       case X86::VCMPPSZrmi:  NewOpc = X86::VCMPPSZrmi_alt;  break;
       case X86::VCMPPSZrri:  NewOpc = X86::VCMPPSZrri_alt;  break;
       case X86::VCMPPSZrrib: NewOpc = X86::VCMPPSZrrib_alt; break;
-      case X86::VCMPSDZrm:   NewOpc = X86::VCMPSDZrmi_alt;  break;
-      case X86::VCMPSDZrr:   NewOpc = X86::VCMPSDZrri_alt;  break;
-      case X86::VCMPSSZrm:   NewOpc = X86::VCMPSSZrmi_alt;  break;
-      case X86::VCMPSSZrr:   NewOpc = X86::VCMPSSZrri_alt;  break;
+      case X86::VCMPPDZ128rmi:  NewOpc = X86::VCMPPDZ128rmi_alt;  break;
+      case X86::VCMPPDZ128rri:  NewOpc = X86::VCMPPDZ128rri_alt;  break;
+      case X86::VCMPPSZ128rmi:  NewOpc = X86::VCMPPSZ128rmi_alt;  break;
+      case X86::VCMPPSZ128rri:  NewOpc = X86::VCMPPSZ128rri_alt;  break;
+      case X86::VCMPPDZ256rmi:  NewOpc = X86::VCMPPDZ256rmi_alt;  break;
+      case X86::VCMPPDZ256rri:  NewOpc = X86::VCMPPDZ256rri_alt;  break;
+      case X86::VCMPPSZ256rmi:  NewOpc = X86::VCMPPSZ256rmi_alt;  break;
+      case X86::VCMPPSZ256rri:  NewOpc = X86::VCMPPSZ256rri_alt;  break;
+      case X86::VCMPSDZrm_Int:  NewOpc = X86::VCMPSDZrmi_alt;  break;
+      case X86::VCMPSDZrr_Int:  NewOpc = X86::VCMPSDZrri_alt;  break;
+      case X86::VCMPSDZrrb_Int: NewOpc = X86::VCMPSDZrrb_alt;  break;
+      case X86::VCMPSSZrm_Int:  NewOpc = X86::VCMPSSZrmi_alt;  break;
+      case X86::VCMPSSZrr_Int:  NewOpc = X86::VCMPSSZrri_alt;  break;
+      case X86::VCMPSSZrrb_Int: NewOpc = X86::VCMPSSZrrb_alt;  break;
       }
       // Switch opcode to the one that doesn't get special printing.
       mcInst.setOpcode(NewOpc);
@@ -1066,8 +1076,8 @@ static MCDisassembler *createX86Disassembler(const Target &T,
 
 extern "C" void LLVMInitializeX86Disassembler() {
   // Register the disassembler.
-  TargetRegistry::RegisterMCDisassembler(TheX86_32Target,
+  TargetRegistry::RegisterMCDisassembler(getTheX86_32Target(),
                                          createX86Disassembler);
-  TargetRegistry::RegisterMCDisassembler(TheX86_64Target,
+  TargetRegistry::RegisterMCDisassembler(getTheX86_64Target(),
                                          createX86Disassembler);
 }
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
index b0a150ab564d..ab64d6fcf70b 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp
@@ -825,7 +825,7 @@ static int getIDWithAttrMask(uint16_t* instructionID,
  * @param orig  - The instruction that is not 16-bit
  * @param equiv - The instruction that is 16-bit
  */
-static bool is16BitEquivalent(const char* orig, const char* equiv) {
+static bool is16BitEquivalent(const char *orig, const char *equiv) {
   off_t i;
 
   for (i = 0;; i++) {
@@ -850,7 +850,7 @@ static bool is16BitEquivalent(const char* orig, const char* equiv) {
  *
  * @param name - The instruction that is not 16-bit
  */
-static bool is64Bit(const char* name) {
+static bool is64Bit(const char *name) {
   off_t i;
 
   for (i = 0;; ++i) {
@@ -1044,9 +1044,9 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
         return 0;
       }
 
-      const char *SpecName = GetInstrName(instructionIDWithREXW, miiArg);
+      auto SpecName = GetInstrName(instructionIDWithREXW, miiArg);
       // If not a 64-bit instruction. Switch the opcode.
-      if (!is64Bit(SpecName)) {
+      if (!is64Bit(SpecName.data())) {
         insn->instructionID = instructionIDWithREXW;
         insn->spec = specifierForUID(instructionIDWithREXW);
         return 0;
@@ -1092,7 +1092,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
 
     const struct InstructionSpecifier *spec;
     uint16_t instructionIDWithOpsize;
-    const char *specName, *specWithOpSizeName;
+    llvm::StringRef specName, specWithOpSizeName;
 
     spec = specifierForUID(instructionID);
 
@@ -1112,7 +1112,7 @@ static int getID(struct InternalInstruction* insn, const void *miiArg) {
     specName = GetInstrName(instructionID, miiArg);
     specWithOpSizeName = GetInstrName(instructionIDWithOpsize, miiArg);
 
-    if (is16BitEquivalent(specName, specWithOpSizeName) &&
+    if (is16BitEquivalent(specName.data(), specWithOpSizeName.data()) &&
         (insn->mode == MODE_16BIT) ^ insn->prefixPresent[0x66]) {
       insn->instructionID = instructionIDWithOpsize;
       insn->spec = specifierForUID(instructionIDWithOpsize);
diff --git a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
index 24d24a265b49..b07fd0b17d35 100644
--- a/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
+++ b/contrib/llvm/lib/Target/X86/Disassembler/X86DisassemblerDecoder.h
@@ -674,7 +674,7 @@ int decodeInstruction(InternalInstruction *insn,
 /// \param s    The message to print.
 void Debug(const char *file, unsigned line, const char *s);
 
-const char *GetInstrName(unsigned Opcode, const void *mii);
+StringRef GetInstrName(unsigned Opcode, const void *mii);
 
 } // namespace X86Disassembler
 } // namespace llvm
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
index 3a5d056888a1..10b7e6ff5ee2 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp
@@ -291,6 +291,9 @@ void X86ATTInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
 
 void X86ATTInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
                                    raw_ostream &O) {
+  if (MI->getOperand(Op).isExpr())
+    return printOperand(MI, Op, O);
+
   O << markup("<imm:") << '$' << formatImm(MI->getOperand(Op).getImm() & 0xff)
     << markup(">");
 }
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
index f5379566b619..8594addb5dd4 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.cpp
@@ -255,6 +255,10 @@ static std::string getMaskName(const MCInst *MI, const char *DestName,
   CASE_MASKZ_UNPCK(UNPCKLPS, r)
   CASE_MASKZ_SHUF(PALIGNR, r)
   CASE_MASKZ_SHUF(PALIGNR, m)
+  CASE_MASKZ_SHUF(ALIGNQ, r)
+  CASE_MASKZ_SHUF(ALIGNQ, m)
+  CASE_MASKZ_SHUF(ALIGND, r)
+  CASE_MASKZ_SHUF(ALIGND, m)
   CASE_MASKZ_SHUF(SHUFPD, m)
   CASE_MASKZ_SHUF(SHUFPD, r)
   CASE_MASKZ_SHUF(SHUFPS, m)
@@ -277,6 +281,26 @@ static std::string getMaskName(const MCInst *MI, const char *DestName,
   CASE_MASKZ_VSHUF(64X2, r)
   CASE_MASKZ_VSHUF(32X4, m)
   CASE_MASKZ_VSHUF(32X4, r)
+  CASE_MASKZ_INS_COMMON(BROADCASTF64X2, Z128, rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTI64X2, Z128, rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTF64X2, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTI64X2, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTF64X4, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTI64X4, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X4, Z256, rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X4, Z256, rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X4, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X4, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X8, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X8, , rm)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z256, r)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z256, r)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z256, m)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z256, m)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z, r)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z, r)
+  CASE_MASKZ_INS_COMMON(BROADCASTF32X2, Z, m)
+  CASE_MASKZ_INS_COMMON(BROADCASTI32X2, Z, m)
     MaskWithZero = true;
     MaskRegName = getRegName(MI->getOperand(1).getReg());
     break;
@@ -320,6 +344,10 @@ static std::string getMaskName(const MCInst *MI, const char *DestName,
   CASE_MASK_UNPCK(UNPCKLPS, r)
   CASE_MASK_SHUF(PALIGNR, r)
   CASE_MASK_SHUF(PALIGNR, m)
+  CASE_MASK_SHUF(ALIGNQ, r)
+  CASE_MASK_SHUF(ALIGNQ, m)
+  CASE_MASK_SHUF(ALIGND, r)
+  CASE_MASK_SHUF(ALIGND, m)
   CASE_MASK_SHUF(SHUFPD, m)
   CASE_MASK_SHUF(SHUFPD, r)
   CASE_MASK_SHUF(SHUFPS, m)
@@ -342,6 +370,26 @@ static std::string getMaskName(const MCInst *MI, const char *DestName,
   CASE_MASK_VSHUF(64X2, r)
   CASE_MASK_VSHUF(32X4, m)
   CASE_MASK_VSHUF(32X4, r)
+  CASE_MASK_INS_COMMON(BROADCASTF64X2, Z128, rm)
+  CASE_MASK_INS_COMMON(BROADCASTI64X2, Z128, rm)
+  CASE_MASK_INS_COMMON(BROADCASTF64X2, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTI64X2, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTF64X4, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTI64X4, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTF32X4, Z256, rm)
+  CASE_MASK_INS_COMMON(BROADCASTI32X4, Z256, rm)
+  CASE_MASK_INS_COMMON(BROADCASTF32X4, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTI32X4, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTF32X8, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTI32X8, , rm)
+  CASE_MASK_INS_COMMON(BROADCASTF32X2, Z256, r)
+  CASE_MASK_INS_COMMON(BROADCASTI32X2, Z256, r)
+  CASE_MASK_INS_COMMON(BROADCASTF32X2, Z256, m)
+  CASE_MASK_INS_COMMON(BROADCASTI32X2, Z256, m)
+  CASE_MASK_INS_COMMON(BROADCASTF32X2, Z, r)
+  CASE_MASK_INS_COMMON(BROADCASTI32X2, Z, r)
+  CASE_MASK_INS_COMMON(BROADCASTF32X2, Z, m)
+  CASE_MASK_INS_COMMON(BROADCASTI32X2, Z, m)
     MaskRegName = getRegName(MI->getOperand(2).getReg());
     break;
   }
@@ -382,7 +430,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VBLENDPDrri:
   case X86::VBLENDPDYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case X86::BLENDPDrmi:
   case X86::VBLENDPDrmi:
   case X86::VBLENDPDYrmi:
@@ -398,7 +446,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VBLENDPSrri:
   case X86::VBLENDPSYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case X86::BLENDPSrmi:
   case X86::VBLENDPSrmi:
   case X86::VBLENDPSYrmi:
@@ -414,7 +462,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPBLENDWrri:
   case X86::VPBLENDWYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case X86::PBLENDWrmi:
   case X86::VPBLENDWrmi:
   case X86::VPBLENDWYrmi:
@@ -429,7 +477,7 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPBLENDDrri:
   case X86::VPBLENDDYrri:
     Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case X86::VPBLENDDrmi:
   case X86::VPBLENDDYrmi:
     if (MI->getOperand(NumOperands - 1).isImm())
@@ -442,12 +490,12 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::INSERTPSrr:
   case X86::VINSERTPSrr:
-  case X86::VINSERTPSzrr:
+  case X86::VINSERTPSZrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
   case X86::INSERTPSrm:
   case X86::VINSERTPSrm:
-  case X86::VINSERTPSzrm:
+  case X86::VINSERTPSZrm:
     DestName = getRegName(MI->getOperand(0).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
     if (MI->getOperand(NumOperands - 1).isImm())
@@ -507,7 +555,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_MOVDUP(MOVSLDUP, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_MOVDUP(MOVSLDUP, m)
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodeMOVSLDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
@@ -515,7 +564,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_MOVDUP(MOVSHDUP, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_MOVDUP(MOVSHDUP, m)
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodeMOVSHDUPMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
@@ -523,7 +573,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_MOVDUP(MOVDDUP, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_MOVDUP(MOVDDUP, m)
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodeMOVDDUPMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
@@ -566,7 +617,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_SHUF(PALIGNR, rri)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_SHUF(PALIGNR, rmi)
     Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -576,9 +628,46 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
                         ShuffleMask);
     break;
 
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z, rri)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z256, rri)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z128, rri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z, rmi)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z256, rmi)
+  CASE_AVX512_INS_COMMON(ALIGNQ, Z128, rmi)
+    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVALIGNMask(getRegOperandVectorVT(MI, MVT::i64, 0),
+                       MI->getOperand(NumOperands - 1).getImm(),
+                       ShuffleMask);
+    break;
+
+  CASE_AVX512_INS_COMMON(ALIGND, Z, rri)
+  CASE_AVX512_INS_COMMON(ALIGND, Z256, rri)
+  CASE_AVX512_INS_COMMON(ALIGND, Z128, rri)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
+    RegForm = true;
+    LLVM_FALLTHROUGH;
+
+  CASE_AVX512_INS_COMMON(ALIGND, Z, rmi)
+  CASE_AVX512_INS_COMMON(ALIGND, Z256, rmi)
+  CASE_AVX512_INS_COMMON(ALIGND, Z128, rmi)
+    Src2Name = getRegName(MI->getOperand(NumOperands-(RegForm?3:7)).getReg());
+    DestName = getRegName(MI->getOperand(0).getReg());
+    if (MI->getOperand(NumOperands - 1).isImm())
+      DecodeVALIGNMask(getRegOperandVectorVT(MI, MVT::i32, 0),
+                       MI->getOperand(NumOperands - 1).getImm(),
+                       ShuffleMask);
+    break;
+
   CASE_SHUF(PSHUFD, ri)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_SHUF(PSHUFD, mi)
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(NumOperands - 1).isImm())
@@ -589,7 +678,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_SHUF(PSHUFHW, ri)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_SHUF(PSHUFHW, mi)
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(NumOperands - 1).isImm())
@@ -600,7 +690,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_SHUF(PSHUFLW, ri)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_SHUF(PSHUFLW, mi)
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(NumOperands - 1).isImm())
@@ -611,7 +702,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::MMX_PSHUFWri:
     Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   case X86::MMX_PSHUFWmi:
     DestName = getRegName(MI->getOperand(0).getReg());
     if (MI->getOperand(NumOperands - 1).isImm())
@@ -622,7 +714,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::PSWAPDrr:
     Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   case X86::PSWAPDrm:
     DestName = getRegName(MI->getOperand(0).getReg());
     DecodePSWAPMask(MVT::v2i32, ShuffleMask);
@@ -632,7 +725,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MMX_PUNPCKHBWirr:
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKHBW, m)
   case X86::MMX_PUNPCKHBWirm:
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -644,7 +738,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MMX_PUNPCKHWDirr:
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKHWD, m)
   case X86::MMX_PUNPCKHWDirm:
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -656,7 +751,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MMX_PUNPCKHDQirr:
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKHDQ, m)
   case X86::MMX_PUNPCKHDQirm:
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -667,7 +763,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_UNPCK(PUNPCKHQDQ, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKHQDQ, m)
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -678,7 +775,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MMX_PUNPCKLBWirr:
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKLBW, m)
   case X86::MMX_PUNPCKLBWirm:
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -690,7 +788,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MMX_PUNPCKLWDirr:
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKLWD, m)
   case X86::MMX_PUNPCKLWDirm:
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -702,7 +801,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::MMX_PUNPCKLDQirr:
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKLDQ, m)
   case X86::MMX_PUNPCKLDQirm:
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -713,7 +813,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_UNPCK(PUNPCKLQDQ, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(PUNPCKLQDQ, m)
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
     DestName = getRegName(MI->getOperand(0).getReg());
@@ -723,7 +824,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_SHUF(SHUFPD, rri)
     Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_SHUF(SHUFPD, rmi)
     if (MI->getOperand(NumOperands - 1).isImm())
       DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f64, 0),
@@ -736,7 +838,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_SHUF(SHUFPS, rri)
     Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_SHUF(SHUFPS, rmi)
     if (MI->getOperand(NumOperands - 1).isImm())
       DecodeSHUFPMask(getRegOperandVectorVT(MI, MVT::f32, 0),
@@ -749,7 +852,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_VSHUF(64X2, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_VSHUF(64X2, m)
     decodeVSHUF64x2FamilyMask(getRegOperandVectorVT(MI, MVT::i64, 0),
                               MI->getOperand(NumOperands - 1).getImm(),
@@ -761,7 +865,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_VSHUF(32X4, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_VSHUF(32X4, m)
     decodeVSHUF64x2FamilyMask(getRegOperandVectorVT(MI, MVT::i32, 0),
                               MI->getOperand(NumOperands - 1).getImm(),
@@ -773,7 +878,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_UNPCK(UNPCKLPD, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(UNPCKLPD, m)
     DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -783,7 +889,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_UNPCK(UNPCKLPS, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(UNPCKLPS, m)
     DecodeUNPCKLMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -793,7 +900,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_UNPCK(UNPCKHPD, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(UNPCKHPD, m)
     DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f64, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -803,7 +911,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_UNPCK(UNPCKHPS, r)
     Src2Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
     RegForm = true;
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_UNPCK(UNPCKHPS, m)
     DecodeUNPCKHMask(getRegOperandVectorVT(MI, MVT::f32, 0), ShuffleMask);
     Src1Name = getRegName(MI->getOperand(NumOperands-(RegForm?2:6)).getReg());
@@ -812,7 +921,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_VPERMILPI(PERMILPS, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_VPERMILPI(PERMILPS, m)
     if (MI->getOperand(NumOperands - 1).isImm())
       DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f32, 0),
@@ -823,7 +933,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_VPERMILPI(PERMILPD, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_VPERMILPI(PERMILPD, m)
     if (MI->getOperand(NumOperands - 1).isImm())
       DecodePSHUFMask(getRegOperandVectorVT(MI, MVT::f64, 0),
@@ -835,7 +946,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VPERM2F128rr:
   case X86::VPERM2I128rr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   case X86::VPERM2F128rm:
   case X86::VPERM2I128rm:
     // For instruction comments purpose, assume the 256-bit vector is v4i64.
@@ -849,7 +961,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_VPERM(PERMPD, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_VPERM(PERMPD, m)
     if (MI->getOperand(NumOperands - 1).isImm())
       DecodeVPERMMask(getRegOperandVectorVT(MI, MVT::f64, 0),
@@ -860,7 +973,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_VPERM(PERMQ, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 2).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_VPERM(PERMQ, m)
     if (MI->getOperand(NumOperands - 1).isImm())
       DecodeVPERMMask(getRegOperandVectorVT(MI, MVT::i64, 0),
@@ -874,7 +988,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VMOVSDZrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   case X86::MOVSDrm:
   case X86::VMOVSDrm:
   case X86::VMOVSDZrm:
@@ -887,7 +1002,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VMOVSSZrr:
     Src2Name = getRegName(MI->getOperand(2).getReg());
     Src1Name = getRegName(MI->getOperand(1).getReg());
-    // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   case X86::MOVSSrm:
   case X86::VMOVSSrm:
   case X86::VMOVSSZrm:
@@ -901,15 +1017,11 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   case X86::VMOVZPQILo2PQIrr:
   case X86::VMOVZPQILo2PQIZrr:
     Src1Name = getRegName(MI->getOperand(1).getReg());
-  // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   case X86::MOVQI2PQIrm:
-  case X86::MOVZQI2PQIrm:
-  case X86::MOVZPQILo2PQIrm:
   case X86::VMOVQI2PQIrm:
   case X86::VMOVQI2PQIZrm:
-  case X86::VMOVZQI2PQIrm:
-  case X86::VMOVZPQILo2PQIrm:
-  case X86::VMOVZPQILo2PQIZrm:
     DecodeZeroMoveLowMask(MVT::v2i64, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
@@ -946,15 +1058,59 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   case X86::VBROADCASTF128:
   case X86::VBROADCASTI128:
+  CASE_AVX512_INS_COMMON(BROADCASTF64X2, Z128, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X2, Z128, rm)
     DecodeSubVectorBroadcast(MVT::v4f64, MVT::v2f64, ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
     break;
+  CASE_AVX512_INS_COMMON(BROADCASTF64X2, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X2, , rm)
+    DecodeSubVectorBroadcast(MVT::v8f64, MVT::v2f64, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF64X4, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI64X4, , rm)
+    DecodeSubVectorBroadcast(MVT::v8f64, MVT::v4f64, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X4, Z256, rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X4, Z256, rm)
+    DecodeSubVectorBroadcast(MVT::v8f32, MVT::v4f32, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X4, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X4, , rm)
+    DecodeSubVectorBroadcast(MVT::v16f32, MVT::v4f32, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X8, , rm)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X8, , rm)
+    DecodeSubVectorBroadcast(MVT::v16f32, MVT::v8f32, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, r)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z256, m)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z256, m)
+    DecodeSubVectorBroadcast(MVT::v8f32, MVT::v2f32, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, r)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, r)
+    Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
+  CASE_AVX512_INS_COMMON(BROADCASTF32X2, Z, m)
+  CASE_AVX512_INS_COMMON(BROADCASTI32X2, Z, m)
+    DecodeSubVectorBroadcast(MVT::v16f32, MVT::v2f32, ShuffleMask);
+    DestName = getRegName(MI->getOperand(0).getReg());
+    break;
 
   CASE_PMOVZX(PMOVZXBW, r)
   CASE_PMOVZX(PMOVZXBD, r)
   CASE_PMOVZX(PMOVZXBQ, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-  // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_PMOVZX(PMOVZXBW, m)
   CASE_PMOVZX(PMOVZXBD, m)
   CASE_PMOVZX(PMOVZXBQ, m)
@@ -965,7 +1121,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
   CASE_PMOVZX(PMOVZXWD, r)
   CASE_PMOVZX(PMOVZXWQ, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-  // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_PMOVZX(PMOVZXWD, m)
   CASE_PMOVZX(PMOVZXWQ, m)
     DecodeZeroExtendMask(MVT::i16, getZeroExtensionResultType(MI), ShuffleMask);
@@ -974,7 +1131,8 @@ bool llvm::EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
 
   CASE_PMOVZX(PMOVZXDQ, r)
     Src1Name = getRegName(MI->getOperand(NumOperands - 1).getReg());
-  // FALL THROUGH.
+    LLVM_FALLTHROUGH;
+
   CASE_PMOVZX(PMOVZXDQ, m)
     DecodeZeroExtendMask(MVT::i32, getZeroExtensionResultType(MI), ShuffleMask);
     DestName = getRegName(MI->getOperand(0).getReg());
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
index 687581b10aec..c6d0d85a7d3d 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86InstComments.h
@@ -16,6 +16,11 @@
 #define LLVM_LIB_TARGET_X86_INSTPRINTER_X86INSTCOMMENTS_H
 
 namespace llvm {
+
+  enum AsmComments {
+    AC_EVEX_2_VEX = 0x2 // For instr that was compressed from EVEX to VEX.
+  };
+
   class MCInst;
   class raw_ostream;
   bool EmitAnyX86InstComments(const MCInst *MI, raw_ostream &OS,
diff --git a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
index 879378fc7a97..4443edb8e342 100644
--- a/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp
@@ -253,5 +253,8 @@ void X86IntelInstPrinter::printMemOffset(const MCInst *MI, unsigned Op,
 
 void X86IntelInstPrinter::printU8Imm(const MCInst *MI, unsigned Op,
                                      raw_ostream &O) {
+  if (MI->getOperand(Op).isExpr())
+    return MI->getOperand(Op).getExpr()->print(O, &MAI);
+
   O << formatImm(MI->getOperand(Op).getImm() & 0xff);
 }
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
index e77a0dc9bc27..e83ec9f4045a 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
@@ -76,12 +76,12 @@ class X86AsmBackend : public MCAsmBackend {
 public:
   X86AsmBackend(const Target &T, StringRef CPU)
       : MCAsmBackend(), CPU(CPU),
-        MaxNopLength((CPU == "slm" || CPU == "lakemont") ? 7 : 15) {
+        MaxNopLength((CPU == "slm") ? 7 : 15) {
     HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" &&
               CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" &&
               CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" &&
               CPU != "geode" && CPU != "winchip-c6" && CPU != "winchip2" &&
-              CPU != "c3" && CPU != "c3-2";
+              CPU != "c3" && CPU != "c3-2" && CPU != "lakemont";
   }
 
   unsigned getNumFixupKinds() const override {
@@ -546,8 +546,12 @@ protected:
         //     .cfi_def_cfa_register %rbp
         //
         HasFP = true;
-        assert(MRI.getLLVMRegNum(Inst.getRegister(), true) ==
-               (Is64Bit ? X86::RBP : X86::EBP) && "Invalid frame pointer!");
+
+        // If the frame pointer is other than esp/rsp, we do not have a way to
+        // generate a compact unwinding representation, so bail out.
+        if (MRI.getLLVMRegNum(Inst.getRegister(), true) !=
+            (Is64Bit ? X86::RBP : X86::EBP))
+          return 0;
 
         // Reset the counts.
         memset(SavedRegs, 0, sizeof(SavedRegs));
@@ -837,7 +841,8 @@ public:
 MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
                                            const Triple &TheTriple,
-                                           StringRef CPU) {
+                                           StringRef CPU,
+                                           const MCTargetOptions &Options) {
   if (TheTriple.isOSBinFormatMachO())
     return new DarwinX86_32AsmBackend(T, MRI, CPU);
 
@@ -855,7 +860,8 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
 MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
                                            const MCRegisterInfo &MRI,
                                            const Triple &TheTriple,
-                                           StringRef CPU) {
+                                           StringRef CPU,
+                                           const MCTargetOptions &Options) {
   if (TheTriple.isOSBinFormatMachO()) {
     MachO::CPUSubTypeX86 CS =
         StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName())
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
index b4195176f904..aab552547fac 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h
@@ -234,88 +234,114 @@ namespace X86II {
     /// their one register operand added to their opcode.
     AddRegFrm      = 2,
 
-    /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
-    /// to specify a destination, which in this case is a register.
-    ///
-    MRMDestReg     = 3,
-
-    /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
-    /// to specify a destination, which in this case is memory.
-    ///
-    MRMDestMem     = 4,
-
-    /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
-    /// to specify a source, which in this case is a register.
-    ///
-    MRMSrcReg      = 5,
-
-    /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
-    /// to specify a source, which in this case is memory.
-    ///
-    MRMSrcMem      = 6,
-
     /// RawFrmMemOffs - This form is for instructions that store an absolute
     /// memory offset as an immediate with a possible segment override.
-    RawFrmMemOffs  = 7,
+    RawFrmMemOffs  = 3,
 
     /// RawFrmSrc - This form is for instructions that use the source index
     /// register SI/ESI/RSI with a possible segment override.
-    RawFrmSrc      = 8,
+    RawFrmSrc      = 4,
 
     /// RawFrmDst - This form is for instructions that use the destination index
     /// register DI/EDI/ESI.
-    RawFrmDst      = 9,
+    RawFrmDst      = 5,
 
     /// RawFrmSrc - This form is for instructions that use the source index
     /// register SI/ESI/ERI with a possible segment override, and also the
     /// destination index register DI/ESI/RDI.
-    RawFrmDstSrc   = 10,
+    RawFrmDstSrc   = 6,
 
     /// RawFrmImm8 - This is used for the ENTER instruction, which has two
     /// immediates, the first of which is a 16-bit immediate (specified by
     /// the imm encoding) and the second is a 8-bit fixed value.
-    RawFrmImm8 = 11,
+    RawFrmImm8 = 7,
 
     /// RawFrmImm16 - This is used for CALL FAR instructions, which have two
     /// immediates, the first of which is a 16 or 32-bit immediate (specified by
     /// the imm encoding) and the second is a 16-bit fixed value.  In the AMD
     /// manual, this operand is described as pntr16:32 and pntr16:16
-    RawFrmImm16 = 12,
-
-    /// MRMX[rm] - The forms are used to represent instructions that use a
-    /// Mod/RM byte, and don't use the middle field for anything.
-    MRMXr = 14, MRMXm = 15,
+    RawFrmImm16 = 8,
 
     /// MRM[0-7][rm] - These forms are used to represent instructions that use
     /// a Mod/RM byte, and use the middle field to hold extended opcode
     /// information.  In the intel manual these are represented as /0, /1, ...
     ///
 
-    // First, instructions that operate on a register r/m operand...
-    MRM0r = 16,  MRM1r = 17,  MRM2r = 18,  MRM3r = 19, // Format /0 /1 /2 /3
-    MRM4r = 20,  MRM5r = 21,  MRM6r = 22,  MRM7r = 23, // Format /4 /5 /6 /7
+    /// MRMDestMem - This form is used for instructions that use the Mod/RM byte
+    /// to specify a destination, which in this case is memory.
+    ///
+    MRMDestMem     = 32,
+
+    /// MRMSrcMem - This form is used for instructions that use the Mod/RM byte
+    /// to specify a source, which in this case is memory.
+    ///
+    MRMSrcMem      = 33,
+
+    /// MRMSrcMem4VOp3 - This form is used for instructions that encode
+    /// operand 3 with VEX.VVVV and load from memory.
+    ///
+    MRMSrcMem4VOp3 = 34,
+
+    /// MRMSrcMemOp4 - This form is used for instructions that use the Mod/RM
+    /// byte to specify the fourth source, which in this case is memory.
+    ///
+    MRMSrcMemOp4   = 35,
+
+    /// MRMXm - This form is used for instructions that use the Mod/RM byte
+    /// to specify a memory source, but doesn't use the middle field.
+    ///
+    MRMXm = 39, // Instruction that uses Mod/RM but not the middle field.
 
     // Next, instructions that operate on a memory r/m operand...
-    MRM0m = 24,  MRM1m = 25,  MRM2m = 26,  MRM3m = 27, // Format /0 /1 /2 /3
-    MRM4m = 28,  MRM5m = 29,  MRM6m = 30,  MRM7m = 31, // Format /4 /5 /6 /7
-
-    //// MRM_XX - A mod/rm byte of exactly 0xXX.
-    MRM_C0 = 32, MRM_C1 = 33, MRM_C2 = 34, MRM_C3 = 35,
-    MRM_C4 = 36, MRM_C5 = 37, MRM_C6 = 38, MRM_C7 = 39,
-    MRM_C8 = 40, MRM_C9 = 41, MRM_CA = 42, MRM_CB = 43,
-    MRM_CC = 44, MRM_CD = 45, MRM_CE = 46, MRM_CF = 47,
-    MRM_D0 = 48, MRM_D1 = 49, MRM_D2 = 50, MRM_D3 = 51,
-    MRM_D4 = 52, MRM_D5 = 53, MRM_D6 = 54, MRM_D7 = 55,
-    MRM_D8 = 56, MRM_D9 = 57, MRM_DA = 58, MRM_DB = 59,
-    MRM_DC = 60, MRM_DD = 61, MRM_DE = 62, MRM_DF = 63,
-    MRM_E0 = 64, MRM_E1 = 65, MRM_E2 = 66, MRM_E3 = 67,
-    MRM_E4 = 68, MRM_E5 = 69, MRM_E6 = 70, MRM_E7 = 71,
-    MRM_E8 = 72, MRM_E9 = 73, MRM_EA = 74, MRM_EB = 75,
-    MRM_EC = 76, MRM_ED = 77, MRM_EE = 78, MRM_EF = 79,
-    MRM_F0 = 80, MRM_F1 = 81, MRM_F2 = 82, MRM_F3 = 83,
-    MRM_F4 = 84, MRM_F5 = 85, MRM_F6 = 86, MRM_F7 = 87,
-    MRM_F8 = 88, MRM_F9 = 89, MRM_FA = 90, MRM_FB = 91,
-    MRM_FC = 92, MRM_FD = 93, MRM_FE = 94, MRM_FF = 95,
+    MRM0m = 40,  MRM1m = 41,  MRM2m = 42,  MRM3m = 43, // Format /0 /1 /2 /3
+    MRM4m = 44,  MRM5m = 45,  MRM6m = 46,  MRM7m = 47, // Format /4 /5 /6 /7
+
+    /// MRMDestReg - This form is used for instructions that use the Mod/RM byte
+    /// to specify a destination, which in this case is a register.
+    ///
+    MRMDestReg     = 48,
+
+    /// MRMSrcReg - This form is used for instructions that use the Mod/RM byte
+    /// to specify a source, which in this case is a register.
+    ///
+    MRMSrcReg      = 49,
+
+    /// MRMSrcReg4VOp3 - This form is used for instructions that encode
+    /// operand 3 with VEX.VVVV and do not load from memory.
+    ///
+    MRMSrcReg4VOp3 = 50,
+
+    /// MRMSrcRegOp4 - This form is used for instructions that use the Mod/RM
+    /// byte to specify the fourth source, which in this case is a register.
+    ///
+    MRMSrcRegOp4   = 51,
+
+    /// MRMXr - This form is used for instructions that use the Mod/RM byte
+    /// to specify a register source, but doesn't use the middle field.
+    ///
+    MRMXr = 55, // Instruction that uses Mod/RM but not the middle field.
+
+    // Instructions that operate on a register r/m operand...
+    MRM0r = 56,  MRM1r = 57,  MRM2r = 58,  MRM3r = 59, // Format /0 /1 /2 /3
+    MRM4r = 60,  MRM5r = 61,  MRM6r = 62,  MRM7r = 63, // Format /4 /5 /6 /7
+
+    /// MRM_XX - A mod/rm byte of exactly 0xXX.
+    MRM_C0 = 64,  MRM_C1 = 65,  MRM_C2 = 66,  MRM_C3 = 67,
+    MRM_C4 = 68,  MRM_C5 = 69,  MRM_C6 = 70,  MRM_C7 = 71,
+    MRM_C8 = 72,  MRM_C9 = 73,  MRM_CA = 74,  MRM_CB = 75,
+    MRM_CC = 76,  MRM_CD = 77,  MRM_CE = 78,  MRM_CF = 79,
+    MRM_D0 = 80,  MRM_D1 = 81,  MRM_D2 = 82,  MRM_D3 = 83,
+    MRM_D4 = 84,  MRM_D5 = 85,  MRM_D6 = 86,  MRM_D7 = 87,
+    MRM_D8 = 88,  MRM_D9 = 89,  MRM_DA = 90,  MRM_DB = 91,
+    MRM_DC = 92,  MRM_DD = 93,  MRM_DE = 94,  MRM_DF = 95,
+    MRM_E0 = 96,  MRM_E1 = 97,  MRM_E2 = 98,  MRM_E3 = 99,
+    MRM_E4 = 100, MRM_E5 = 101, MRM_E6 = 102, MRM_E7 = 103,
+    MRM_E8 = 104, MRM_E9 = 105, MRM_EA = 106, MRM_EB = 107,
+    MRM_EC = 108, MRM_ED = 109, MRM_EE = 110, MRM_EF = 111,
+    MRM_F0 = 112, MRM_F1 = 113, MRM_F2 = 114, MRM_F3 = 115,
+    MRM_F4 = 116, MRM_F5 = 117, MRM_F6 = 118, MRM_F7 = 119,
+    MRM_F8 = 120, MRM_F9 = 121, MRM_FA = 122, MRM_FB = 123,
+    MRM_FC = 124, MRM_FD = 125, MRM_FE = 126, MRM_FF = 127,
 
     FormMask       = 127,
 
@@ -403,12 +429,13 @@ namespace X86II {
     ImmMask    = 15 << ImmShift,
     Imm8       = 1 << ImmShift,
     Imm8PCRel  = 2 << ImmShift,
-    Imm16      = 3 << ImmShift,
-    Imm16PCRel = 4 << ImmShift,
-    Imm32      = 5 << ImmShift,
-    Imm32PCRel = 6 << ImmShift,
-    Imm32S     = 7 << ImmShift,
-    Imm64      = 8 << ImmShift,
+    Imm8Reg    = 3 << ImmShift,
+    Imm16      = 4 << ImmShift,
+    Imm16PCRel = 5 << ImmShift,
+    Imm32      = 6 << ImmShift,
+    Imm32PCRel = 7 << ImmShift,
+    Imm32S     = 8 << ImmShift,
+    Imm64      = 9 << ImmShift,
 
     //===------------------------------------------------------------------===//
     // FP Instruction Classification...  Zero is non-fp instruction.
@@ -488,39 +515,15 @@ namespace X86II {
     VEX_4VShift = VEX_WShift + 1,
     VEX_4V      = 1ULL << VEX_4VShift,
 
-    /// VEX_4VOp3 - Similar to VEX_4V, but used on instructions that encode
-    /// operand 3 with VEX.vvvv.
-    VEX_4VOp3Shift = VEX_4VShift + 1,
-    VEX_4VOp3   = 1ULL << VEX_4VOp3Shift,
-
-    /// VEX_I8IMM - Specifies that the last register used in a AVX instruction,
-    /// must be encoded in the i8 immediate field. This usually happens in
-    /// instructions with 4 operands.
-    VEX_I8IMMShift = VEX_4VOp3Shift + 1,
-    VEX_I8IMM   = 1ULL << VEX_I8IMMShift,
-
     /// VEX_L - Stands for a bit in the VEX opcode prefix meaning the current
     /// instruction uses 256-bit wide registers. This is usually auto detected
     /// if a VR256 register is used, but some AVX instructions also have this
     /// field marked when using a f256 memory references.
-    VEX_LShift = VEX_I8IMMShift + 1,
+    VEX_LShift = VEX_4VShift + 1,
     VEX_L       = 1ULL << VEX_LShift,
 
-    // VEX_LIG - Specifies that this instruction ignores the L-bit in the VEX
-    // prefix. Usually used for scalar instructions. Needed by disassembler.
-    VEX_LIGShift = VEX_LShift + 1,
-    VEX_LIG     = 1ULL << VEX_LIGShift,
-
-    // TODO: we should combine VEX_L and VEX_LIG together to form a 2-bit field
-    // with following encoding:
-    // - 00 V128
-    // - 01 V256
-    // - 10 V512
-    // - 11 LIG (but, in insn encoding, leave VEX.L and EVEX.L in zeros.
-    // this will save 1 tsflag bit
-
     // EVEX_K - Set if this instruction requires masking
-    EVEX_KShift = VEX_LIGShift + 1,
+    EVEX_KShift = VEX_LShift + 1,
     EVEX_K      = 1ULL << EVEX_KShift,
 
     // EVEX_Z - Set if this instruction has EVEX.Z field set.
@@ -548,13 +551,8 @@ namespace X86II {
     Has3DNow0F0FOpcodeShift = CD8_Scale_Shift + 7,
     Has3DNow0F0FOpcode = 1ULL << Has3DNow0F0FOpcodeShift,
 
-    /// MemOp4 - Used to indicate swapping of operand 3 and 4 to be encoded in
-    /// ModRM or I8IMM. This is used for FMA4 and XOP instructions.
-    MemOp4Shift = Has3DNow0F0FOpcodeShift + 1,
-    MemOp4 = 1ULL << MemOp4Shift,
-
     /// Explicitly specified rounding control
-    EVEX_RCShift = MemOp4Shift + 1,
+    EVEX_RCShift = Has3DNow0F0FOpcodeShift + 1,
     EVEX_RC = 1ULL << EVEX_RCShift
   };
 
@@ -575,7 +573,8 @@ namespace X86II {
     switch (TSFlags & X86II::ImmMask) {
     default: llvm_unreachable("Unknown immediate size");
     case X86II::Imm8:
-    case X86II::Imm8PCRel:  return 1;
+    case X86II::Imm8PCRel:
+    case X86II::Imm8Reg:    return 1;
     case X86II::Imm16:
     case X86II::Imm16PCRel: return 2;
     case X86II::Imm32:
@@ -595,6 +594,7 @@ namespace X86II {
     case X86II::Imm32PCRel:
       return true;
     case X86II::Imm8:
+    case X86II::Imm8Reg:
     case X86II::Imm16:
     case X86II::Imm32:
     case X86II::Imm32S:
@@ -612,6 +612,7 @@ namespace X86II {
       return true;
     case X86II::Imm8:
     case X86II::Imm8PCRel:
+    case X86II::Imm8Reg:
     case X86II::Imm16:
     case X86II::Imm16PCRel:
     case X86II::Imm32:
@@ -626,26 +627,25 @@ namespace X86II {
   ///                  in this instruction.
   /// If this is a two-address instruction,skip one of the register operands.
   /// FIXME: This should be handled during MCInst lowering.
-  inline int getOperandBias(const MCInstrDesc& Desc)
+  inline unsigned getOperandBias(const MCInstrDesc& Desc)
   {
     unsigned NumOps = Desc.getNumOperands();
-    unsigned CurOp = 0;
     if (NumOps > 1 && Desc.getOperandConstraint(1, MCOI::TIED_TO) == 0)
-      ++CurOp;
-    else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
-             Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1)
+      return 1;
+    if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
+        Desc.getOperandConstraint(3, MCOI::TIED_TO) == 1)
       // Special case for AVX-512 GATHER with 2 TIED_TO operands
       // Skip the first 2 operands: dst, mask_wb
-      CurOp += 2;
-    else if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
-             Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1)
+      return 2;
+    if (NumOps > 3 && Desc.getOperandConstraint(2, MCOI::TIED_TO) == 0 &&
+        Desc.getOperandConstraint(NumOps - 1, MCOI::TIED_TO) == 1)
       // Special case for GATHER with 2 TIED_TO operands
       // Skip the first 2 operands: dst, mask_wb
-      CurOp += 2;
-    else if (NumOps > 2 && Desc.getOperandConstraint(NumOps - 2, MCOI::TIED_TO) == 0)
+      return 2;
+    if (NumOps > 2 && Desc.getOperandConstraint(NumOps - 2, MCOI::TIED_TO) == 0)
       // SCATTER
-      ++CurOp;
-    return CurOp;
+      return 1;
+    return 0;
   }
 
   /// getMemoryOperandNo - The function returns the MCInst operand # for the
@@ -658,7 +658,6 @@ namespace X86II {
   ///
   inline int getMemoryOperandNo(uint64_t TSFlags) {
     bool HasVEX_4V = TSFlags & X86II::VEX_4V;
-    bool HasMemOp4 = TSFlags & X86II::MemOp4;
     bool HasEVEX_K = TSFlags & X86II::EVEX_K;
 
     switch (TSFlags & X86II::FormMask) {
@@ -666,8 +665,6 @@ namespace X86II {
     case X86II::Pseudo:
     case X86II::RawFrm:
     case X86II::AddRegFrm:
-    case X86II::MRMDestReg:
-    case X86II::MRMSrcReg:
     case X86II::RawFrmImm8:
     case X86II::RawFrmImm16:
     case X86II::RawFrmMemOffs:
@@ -680,7 +677,17 @@ namespace X86II {
     case X86II::MRMSrcMem:
       // Start from 1, skip any registers encoded in VEX_VVVV or I8IMM, or a
       // mask register.
-      return 1 + HasVEX_4V + HasMemOp4 + HasEVEX_K;
+      return 1 + HasVEX_4V + HasEVEX_K;
+    case X86II::MRMSrcMem4VOp3:
+      // Skip registers encoded in reg.
+      return 1 + HasEVEX_K;
+    case X86II::MRMSrcMemOp4:
+      // Skip registers encoded in reg, VEX_VVVV, and I8IMM.
+      return 3;
+    case X86II::MRMDestReg:
+    case X86II::MRMSrcReg:
+    case X86II::MRMSrcReg4VOp3:
+    case X86II::MRMSrcRegOp4:
     case X86II::MRMXr:
     case X86II::MRM0r: case X86II::MRM1r:
     case X86II::MRM2r: case X86II::MRM3r:
@@ -723,12 +730,9 @@ namespace X86II {
   /// isX86_64ExtendedReg - Is the MachineOperand a x86-64 extended (r8 or
   /// higher) register?  e.g. r8, xmm8, xmm13, etc.
   inline bool isX86_64ExtendedReg(unsigned RegNo) {
-    if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM15) ||
-        (RegNo >= X86::XMM24 && RegNo <= X86::XMM31) ||
-        (RegNo >= X86::YMM8 && RegNo <= X86::YMM15) ||
-        (RegNo >= X86::YMM24 && RegNo <= X86::YMM31) ||
-        (RegNo >= X86::ZMM8 && RegNo <= X86::ZMM15) ||
-        (RegNo >= X86::ZMM24 && RegNo <= X86::ZMM31))
+    if ((RegNo >= X86::XMM8 && RegNo <= X86::XMM31) ||
+        (RegNo >= X86::YMM8 && RegNo <= X86::YMM31) ||
+        (RegNo >= X86::ZMM8 && RegNo <= X86::ZMM31))
       return true;
 
     switch (RegNo) {
@@ -743,6 +747,8 @@ namespace X86II {
     case X86::R12B:  case X86::R13B:  case X86::R14B:  case X86::R15B:
     case X86::CR8:   case X86::CR9:   case X86::CR10:  case X86::CR11:
     case X86::CR12:  case X86::CR13:  case X86::CR14:  case X86::CR15:
+    case X86::DR8:   case X86::DR9:   case X86::DR10:  case X86::DR11:
+    case X86::DR12:  case X86::DR13:  case X86::DR14:  case X86::DR15:
       return true;
     }
     return false;
@@ -761,6 +767,16 @@ namespace X86II {
     return (reg == X86::SPL || reg == X86::BPL ||
             reg == X86::SIL || reg == X86::DIL);
   }
+
+  /// isKMasked - Is this a masked instruction.
+  inline bool isKMasked(uint64_t TSFlags) {
+    return (TSFlags & X86II::EVEX_K) != 0;
+  }
+
+  /// isKMergedMasked - Is this a merge masked instruction.
+  inline bool isKMergeMasked(uint64_t TSFlags) {
+    return isKMasked(TSFlags) && (TSFlags & X86II::EVEX_Z) == 0;
+  }
 }
 
 } // end namespace llvm;
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
index b7c56cec2db8..48a1d8f1330c 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp
@@ -31,8 +31,7 @@ static cl::opt<AsmWriterFlavorTy>
 AsmWriterFlavor("x86-asm-syntax", cl::init(ATT),
   cl::desc("Choose style of code to emit from X86 backend:"),
   cl::values(clEnumValN(ATT,   "att",   "Emit AT&T-style assembly"),
-             clEnumValN(Intel, "intel", "Emit Intel-style assembly"),
-             clEnumValEnd));
+             clEnumValN(Intel, "intel", "Emit Intel-style assembly")));
 
 static cl::opt<bool>
 MarkedJTDataRegions("mark-data-regions", cl::init(true),
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 96c2e81c332a..8045e7c6d872 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -81,7 +81,8 @@ public:
                                                  MI.getOperand(OpNum).getReg());
   }
 
-  bool isX86_64ExtendedReg(const MCInst &MI, unsigned OpNum) const {
+  // Does this register require a bit to be set in REX prefix.
+  bool isREXExtendedReg(const MCInst &MI, unsigned OpNum) const {
     return (getX86RegEncoding(MI, OpNum) >> 3) & 1;
   }
 
@@ -602,8 +603,6 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   uint64_t Encoding = TSFlags & X86II::EncodingMask;
   bool HasEVEX_K = TSFlags & X86II::EVEX_K;
   bool HasVEX_4V = TSFlags & X86II::VEX_4V;
-  bool HasVEX_4VOp3 = TSFlags & X86II::VEX_4VOp3;
-  bool HasMemOp4 = TSFlags & X86II::MemOp4;
   bool HasEVEX_RC = TSFlags & X86II::EVEX_RC;
 
   // VEX_R: opcode externsion equivalent to REX.R in
@@ -745,11 +744,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     //  src1(ModR/M), MemAddr
     //  src1(ModR/M), src2(VEX_4V), MemAddr
     //  src1(ModR/M), MemAddr, imm8
-    //  src1(ModR/M), MemAddr, src2(VEX_I8IMM)
+    //  src1(ModR/M), MemAddr, src2(Imm[7:4])
     //
     //  FMA4:
-    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
-    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(Imm[7:4])
     unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
     VEX_R = ~(RegEnc >> 3) & 1;
     EVEX_R2 = ~(RegEnc >> 4) & 1;
@@ -770,13 +768,34 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     if (!HasVEX_4V) // Only needed with VSIB which don't use VVVV.
       EVEX_V2 = ~(IndexRegEnc >> 4) & 1;
 
-    if (HasVEX_4VOp3)
-      // Instruction format for 4VOp3:
-      //   src1(ModR/M), MemAddr, src3(VEX_4V)
-      // CurOp points to start of the MemoryOperand,
-      //   it skips TIED_TO operands if exist, then increments past src1.
-      // CurOp + X86::AddrNumOperands will point to src3.
-      VEX_4V = ~getX86RegEncoding(MI, CurOp + X86::AddrNumOperands) & 0xf;
+    break;
+  }
+  case X86II::MRMSrcMem4VOp3: {
+    // Instruction format for 4VOp3:
+    //   src1(ModR/M), MemAddr, src3(VEX_4V)
+    unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_R = ~(RegEnc >> 3) & 1;
+
+    unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
+    VEX_B = ~(BaseRegEnc >> 3) & 1;
+    unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+    VEX_X = ~(IndexRegEnc >> 3) & 1;
+
+    VEX_4V = ~getX86RegEncoding(MI, CurOp + X86::AddrNumOperands) & 0xf;
+    break;
+  }
+  case X86II::MRMSrcMemOp4: {
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
+    unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_R = ~(RegEnc >> 3) & 1;
+
+    unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_4V = ~VRegEnc & 0xf;
+
+    unsigned BaseRegEnc = getX86RegEncoding(MI, MemOperand + X86::AddrBaseReg);
+    VEX_B = ~(BaseRegEnc >> 3) & 1;
+    unsigned IndexRegEnc = getX86RegEncoding(MI, MemOperand+X86::AddrIndexReg);
+    VEX_X = ~(IndexRegEnc >> 3) & 1;
     break;
   }
   case X86II::MRM0m: case X86II::MRM1m:
@@ -803,13 +822,12 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
   }
   case X86II::MRMSrcReg: {
     // MRMSrcReg instructions forms:
-    //  dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
+    //  dst(ModR/M), src1(VEX_4V), src2(ModR/M), src3(Imm[7:4])
     //  dst(ModR/M), src1(ModR/M)
     //  dst(ModR/M), src1(ModR/M), imm8
     //
     //  FMA4:
-    //  dst(ModR/M.reg), src1(VEX_4V), src2(ModR/M), src3(VEX_I8IMM)
-    //  dst(ModR/M.reg), src1(VEX_4V), src2(VEX_I8IMM), src3(ModR/M),
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
     unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
     VEX_R = ~(RegEnc >> 3) & 1;
     EVEX_R2 = ~(RegEnc >> 4) & 1;
@@ -823,14 +841,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
       EVEX_V2 = ~(VRegEnc >> 4) & 1;
     }
 
-    if (HasMemOp4) // Skip second register source (encoded in I8IMM)
-      CurOp++;
-
     RegEnc = getX86RegEncoding(MI, CurOp++);
     VEX_B = ~(RegEnc >> 3) & 1;
     VEX_X = ~(RegEnc >> 4) & 1;
-    if (HasVEX_4VOp3)
-      VEX_4V = ~getX86RegEncoding(MI, CurOp++) & 0xf;
+
     if (EVEX_b) {
       if (HasEVEX_RC) {
         unsigned RcOperand = NumOps-1;
@@ -841,6 +855,34 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte,
     }
     break;
   }
+  case X86II::MRMSrcReg4VOp3: {
+    // Instruction format for 4VOp3:
+    //   src1(ModR/M), src2(ModR/M), src3(VEX_4V)
+    unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_R = ~(RegEnc >> 3) & 1;
+
+    RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_B = ~(RegEnc >> 3) & 1;
+
+    VEX_4V = ~getX86RegEncoding(MI, CurOp++) & 0xf;
+    break;
+  }
+  case X86II::MRMSrcRegOp4: {
+    //  dst(ModR/M.reg), src1(VEX_4V), src2(Imm[7:4]), src3(ModR/M),
+    unsigned RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_R = ~(RegEnc >> 3) & 1;
+
+    unsigned VRegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_4V = ~VRegEnc & 0xf;
+
+    // Skip second register source (encoded in Imm[7:4])
+    ++CurOp;
+
+    RegEnc = getX86RegEncoding(MI, CurOp++);
+    VEX_B = ~(RegEnc >> 3) & 1;
+    VEX_X = ~(RegEnc >> 4) & 1;
+    break;
+  }
   case X86II::MRMDestReg: {
     // MRMDestReg instructions forms:
     //  dst(ModR/M), src(ModR/M)
@@ -976,52 +1018,51 @@ uint8_t X86MCCodeEmitter::DetermineREXPrefix(const MCInst &MI, uint64_t TSFlags,
     unsigned Reg = MO.getReg();
     if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
       UsesHighByteReg = true;
-    if (!X86II::isX86_64NonExtLowByteReg(Reg)) continue;
-    // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
-    // that returns non-zero.
-    REX |= 0x40; // REX fixed encoding prefix
-    break;
+    if (X86II::isX86_64NonExtLowByteReg(Reg))
+      // FIXME: The caller of DetermineREXPrefix slaps this prefix onto anything
+      // that returns non-zero.
+      REX |= 0x40; // REX fixed encoding prefix
   }
 
   switch (TSFlags & X86II::FormMask) {
   case X86II::AddRegFrm:
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
     break;
   case X86II::MRMSrcReg:
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 2; // REX.R
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
     break;
   case X86II::MRMSrcMem: {
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 2; // REX.R
-    REX |= isX86_64ExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
-    REX |= isX86_64ExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
+    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+    REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
     CurOp += X86::AddrNumOperands;
     break;
   }
   case X86II::MRMDestReg:
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 0; // REX.B
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 2; // REX.R
+    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
     break;
   case X86II::MRMDestMem:
-    REX |= isX86_64ExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
-    REX |= isX86_64ExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
+    REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
     CurOp += X86::AddrNumOperands;
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 2; // REX.R
+    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
     break;
   case X86II::MRMXm:
   case X86II::MRM0m: case X86II::MRM1m:
   case X86II::MRM2m: case X86II::MRM3m:
   case X86II::MRM4m: case X86II::MRM5m:
   case X86II::MRM6m: case X86II::MRM7m:
-    REX |= isX86_64ExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
-    REX |= isX86_64ExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
+    REX |= isREXExtendedReg(MI, MemOperand+X86::AddrBaseReg) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, MemOperand+X86::AddrIndexReg) << 1; // REX.X
     break;
   case X86II::MRMXr:
   case X86II::MRM0r: case X86II::MRM1r:
   case X86II::MRM2r: case X86II::MRM3r:
   case X86II::MRM4r: case X86II::MRM5r:
   case X86II::MRM6r: case X86II::MRM7r:
-    REX |= isX86_64ExtendedReg(MI, CurOp++) << 0; // REX.B
+    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
     break;
   }
   if (REX && UsesHighByteReg)
@@ -1133,10 +1174,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
 
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = TSFlags & X86II::VEX_4V;
-  bool HasVEX_4VOp3 = TSFlags & X86II::VEX_4VOp3;
-  bool HasMemOp4 = TSFlags & X86II::MemOp4;
-  bool HasVEX_I8IMM = TSFlags & X86II::VEX_I8IMM;
-  assert((!HasMemOp4 || HasVEX_I8IMM) && "MemOp4 should imply VEX_I8IMM");
+  bool HasVEX_I8Reg = (TSFlags & X86II::ImmMask) == X86II::Imm8Reg;
 
   // It uses the EVEX.aaa field?
   bool HasEVEX_K = TSFlags & X86II::EVEX_K;
@@ -1312,21 +1350,42 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (HasVEX_4V) // Skip 1st src (which is encoded in VEX_VVVV)
       ++SrcRegNum;
 
-    if (HasMemOp4) // Capture 2nd src (which is encoded in I8IMM)
-      I8RegNum = getX86RegEncoding(MI, SrcRegNum++);
-
     EmitRegModRMByte(MI.getOperand(SrcRegNum),
                      GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
     CurOp = SrcRegNum + 1;
-    if (HasVEX_4VOp3)
-      ++CurOp;
-    if (!HasMemOp4 && HasVEX_I8IMM)
+    if (HasVEX_I8Reg)
       I8RegNum = getX86RegEncoding(MI, CurOp++);
     // do not count the rounding control operand
     if (HasEVEX_RC)
       --NumOps;
     break;
   }
+  case X86II::MRMSrcReg4VOp3: {
+    EmitByte(BaseOpcode, CurByte, OS);
+    unsigned SrcRegNum = CurOp + 1;
+
+    EmitRegModRMByte(MI.getOperand(SrcRegNum),
+                     GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
+    ++CurOp; // Encoded in VEX.VVVV
+    break;
+  }
+  case X86II::MRMSrcRegOp4: {
+    EmitByte(BaseOpcode, CurByte, OS);
+    unsigned SrcRegNum = CurOp + 1;
+
+    // Skip 1st src (which is encoded in VEX_VVVV)
+    ++SrcRegNum;
+
+    // Capture 2nd src (which is encoded in Imm[7:4])
+    assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
+    I8RegNum = getX86RegEncoding(MI, SrcRegNum++);
+
+    EmitRegModRMByte(MI.getOperand(SrcRegNum),
+                     GetX86RegNum(MI.getOperand(CurOp)), CurByte, OS);
+    CurOp = SrcRegNum + 1;
+    break;
+  }
   case X86II::MRMSrcMem: {
     unsigned FirstMemOp = CurOp+1;
 
@@ -1336,20 +1395,42 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     if (HasVEX_4V)
       ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
 
-    if (HasMemOp4) // Capture second register source (encoded in I8IMM)
-      I8RegNum = getX86RegEncoding(MI, FirstMemOp++);
-
     EmitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
                      TSFlags, Rex, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
-    if (HasVEX_4VOp3)
-      ++CurOp;
-    if (!HasMemOp4 && HasVEX_I8IMM)
+    if (HasVEX_I8Reg)
       I8RegNum = getX86RegEncoding(MI, CurOp++);
     break;
   }
+  case X86II::MRMSrcMem4VOp3: {
+    unsigned FirstMemOp = CurOp+1;
+
+    EmitByte(BaseOpcode, CurByte, OS);
+
+    emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+    CurOp = FirstMemOp + X86::AddrNumOperands;
+    ++CurOp; // Encoded in VEX.VVVV.
+    break;
+  }
+  case X86II::MRMSrcMemOp4: {
+    unsigned FirstMemOp = CurOp+1;
+
+    ++FirstMemOp;  // Skip the register source (which is encoded in VEX_VVVV).
+
+    // Capture second register source (encoded in Imm[7:4])
+    assert(HasVEX_I8Reg && "MRMSrcRegOp4 should imply VEX_I8Reg");
+    I8RegNum = getX86RegEncoding(MI, FirstMemOp++);
+
+    EmitByte(BaseOpcode, CurByte, OS);
+
+    emitMemModRMByte(MI, FirstMemOp, GetX86RegNum(MI.getOperand(CurOp)),
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+    CurOp = FirstMemOp + X86::AddrNumOperands;
+    break;
+  }
 
   case X86II::MRMXr:
   case X86II::MRM0r: case X86II::MRM1r:
@@ -1410,7 +1491,7 @@ encodeInstruction(const MCInst &MI, raw_ostream &OS,
     break;
   }
 
-  if (HasVEX_I8IMM) {
+  if (HasVEX_I8Reg) {
     // The last source register of a 4 operand instruction in AVX is encoded
     // in bits[7:4] of a immediate byte.
     assert(I8RegNum < 16 && "Register encoding out of range");
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
index 311a8d677eea..22cb0fac33cb 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp
@@ -234,7 +234,7 @@ static MCInstrAnalysis *createX86MCInstrAnalysis(const MCInstrInfo *Info) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86TargetMC() {
-  for (Target *T : {&TheX86_32Target, &TheX86_64Target}) {
+  for (Target *T : {&getTheX86_32Target(), &getTheX86_64Target()}) {
     // Register the MC asm info.
     RegisterMCAsmInfoFn X(*T, createX86MCAsmInfo);
 
@@ -268,9 +268,9 @@ extern "C" void LLVMInitializeX86TargetMC() {
   }
 
   // Register the asm backend.
-  TargetRegistry::RegisterMCAsmBackend(TheX86_32Target,
+  TargetRegistry::RegisterMCAsmBackend(getTheX86_32Target(),
                                        createX86_32AsmBackend);
-  TargetRegistry::RegisterMCAsmBackend(TheX86_64Target,
+  TargetRegistry::RegisterMCAsmBackend(getTheX86_64Target(),
                                        createX86_64AsmBackend);
 }
 
diff --git a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
index ca4f0d3e17d5..f73e734b9b0e 100644
--- a/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
+++ b/contrib/llvm/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h
@@ -27,13 +27,15 @@ class MCObjectWriter;
 class MCRegisterInfo;
 class MCSubtargetInfo;
 class MCRelocationInfo;
+class MCTargetOptions;
 class Target;
 class Triple;
 class StringRef;
 class raw_ostream;
 class raw_pwrite_stream;
 
-extern Target TheX86_32Target, TheX86_64Target;
+Target &getTheX86_32Target();
+Target &getTheX86_64Target();
 
 /// Flavour of dwarf regnumbers
 ///
@@ -69,9 +71,11 @@ MCCodeEmitter *createX86MCCodeEmitter(const MCInstrInfo &MCII,
                                       MCContext &Ctx);
 
 MCAsmBackend *createX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     const Triple &TT, StringRef CPU);
+                                     const Triple &TT, StringRef CPU,
+                                     const MCTargetOptions &Options);
 MCAsmBackend *createX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI,
-                                     const Triple &TT, StringRef CPU);
+                                     const Triple &TT, StringRef CPU,
+                                     const MCTargetOptions &Options);
 
 /// Construct an X86 Windows COFF machine code streamer which will generate
 /// PE/COFF format object files.
diff --git a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
index fceb083b5f2d..d2654fc67ed5 100644
--- a/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/TargetInfo/X86TargetInfo.cpp
@@ -11,12 +11,19 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheX86_32Target, llvm::TheX86_64Target;
+Target &llvm::getTheX86_32Target() {
+  static Target TheX86_32Target;
+  return TheX86_32Target;
+}
+Target &llvm::getTheX86_64Target() {
+  static Target TheX86_64Target;
+  return TheX86_64Target;
+}
 
 extern "C" void LLVMInitializeX86TargetInfo() {
-  RegisterTarget<Triple::x86, /*HasJIT=*/true>
-    X(TheX86_32Target, "x86", "32-bit X86: Pentium-Pro and above");
+  RegisterTarget<Triple::x86, /*HasJIT=*/true> X(
+      getTheX86_32Target(), "x86", "32-bit X86: Pentium-Pro and above");
 
-  RegisterTarget<Triple::x86_64, /*HasJIT=*/true>
-    Y(TheX86_64Target, "x86-64", "64-bit X86: EM64T and AMD64");
+  RegisterTarget<Triple::x86_64, /*HasJIT=*/true> Y(
+      getTheX86_64Target(), "x86-64", "64-bit X86: EM64T and AMD64");
 }
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
index 18f71675437b..1be5aec849fc 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.cpp
@@ -136,7 +136,7 @@ void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
 void DecodePALIGNRMask(MVT VT, unsigned Imm,
                        SmallVectorImpl<int> &ShuffleMask) {
   unsigned NumElts = VT.getVectorNumElements();
-  unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8);
+  unsigned Offset = Imm * (VT.getScalarSizeInBits() / 8);
 
   unsigned NumLanes = VT.getSizeInBits() / 128;
   unsigned NumLaneElts = NumElts / NumLanes;
@@ -151,6 +151,16 @@ void DecodePALIGNRMask(MVT VT, unsigned Imm,
   }
 }
 
+void DecodeVALIGNMask(MVT VT, unsigned Imm,
+                      SmallVectorImpl<int> &ShuffleMask) {
+  int NumElts = VT.getVectorNumElements();
+  // Not all bits of the immediate are used so mask it.
+  assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2");
+  Imm = Imm & (NumElts - 1);
+  for (int i = 0; i != NumElts; ++i)
+    ShuffleMask.push_back(i + Imm);
+}
+
 /// DecodePSHUFMask - This decodes the shuffle masks for pshufw, pshufd, and vpermilp*.
 /// VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
@@ -538,10 +548,11 @@ void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
   unsigned VecSize = VT.getSizeInBits();
   unsigned EltSize = VT.getScalarSizeInBits();
   unsigned NumLanes = VecSize / 128;
-  unsigned NumEltsPerLane = VT.getVectorNumElements() / NumLanes;
-  assert((VecSize == 128 || VecSize == 256) &&
-         "Unexpected vector size");
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumEltsPerLane = NumElts / NumLanes;
+  assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
   assert((EltSize == 32 || EltSize == 64) && "Unexpected element size");
+  assert((NumElts == RawMask.size()) && "Unexpected mask size");
 
   for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
     // VPERMIL2 Operation.
@@ -562,14 +573,15 @@ void DecodeVPERMIL2PMask(MVT VT, unsigned M2Z, ArrayRef<uint64_t> RawMask,
       continue;
     }
 
-    unsigned Index = i & ~(NumEltsPerLane - 1);
+    int Index = i & ~(NumEltsPerLane - 1);
     if (EltSize == 64)
       Index += (Selector >> 1) & 0x1;
     else
       Index += Selector & 0x3;
 
-    unsigned SrcOffset = (Selector >> 2) & 1;
-    ShuffleMask.push_back((int)(SrcOffset + Index));
+    int Src = (Selector >> 2) & 0x1;
+    Index += Src * NumElts;
+    ShuffleMask.push_back(Index);
   }
 }
 
diff --git a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
index dc21c19752c3..17619d09d059 100644
--- a/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
+++ b/contrib/llvm/lib/Target/X86/Utils/X86ShuffleDecode.h
@@ -55,6 +55,8 @@ void DecodePSRLDQMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
 void DecodePALIGNRMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
 
+void DecodeVALIGNMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask);
+
 /// Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
 /// VT indicates the type of the vector allowing it to handle different
 /// datatypes and vector widths.
diff --git a/contrib/llvm/lib/Target/X86/X86.h b/contrib/llvm/lib/Target/X86/X86.h
index 23d6c7120a4b..2cb80a482d06 100644
--- a/contrib/llvm/lib/Target/X86/X86.h
+++ b/contrib/llvm/lib/Target/X86/X86.h
@@ -87,6 +87,13 @@ FunctionPass *createX86ExpandPseudoPass();
 FunctionPass *createX86FixupBWInsts();
 
 void initializeFixupBWInstPassPass(PassRegistry &);
+
+/// This pass replaces EVEX ecnoded of AVX-512 instructiosn by VEX 
+/// encoding when possible in order to reduce code size.
+FunctionPass *createX86EvexToVexInsts();
+
+void initializeEvexToVexInstPassPass(PassRegistry &);
+
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86.td b/contrib/llvm/lib/Target/X86/X86.td
index 8267a84518fc..dc18a59a30ba 100644
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@@ -99,6 +99,8 @@ def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
                                        "Bit testing of memory is slow">;
 def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
                                        "SHLD instruction is slow">;
+def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
+                                        "PMULLD instruction is slow">;
 // FIXME: This should not apply to CPUs that do not have SSE.
 def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
                                 "IsUAMem16Slow", "true",
@@ -141,8 +143,8 @@ def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
                       "Enable AVX-512 Vector Length eXtensions",
                                       [FeatureAVX512]>;
 def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
-                      "Enable AVX-512 Vector Bit Manipulation Instructions",
-                                      [FeatureAVX512]>;
+                      "Enable AVX-512 Vector Byte Manipulation Instructions",
+                                      [FeatureBWI]>;
 def FeatureIFMA     : SubtargetFeature<"avx512ifma", "HasIFMA", "true",
                       "Enable AVX-512 Integer Fused Multiple-Add",
                                       [FeatureAVX512]>;
@@ -249,6 +251,25 @@ def FeatureSoftFloat
 def FeatureFastPartialYMMWrite
     : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite",
                        "true", "Partial writes to YMM registers are fast">;
+// FeatureFastScalarFSQRT should be enabled if scalar FSQRT has shorter latency
+// than the corresponding NR code. FeatureFastVectorFSQRT should be enabled if
+// vector FSQRT has higher throughput than the corresponding NR code.
+// The idea is that throughput bound code is likely to be vectorized, so for
+// vectorized code we should care about the throughput of SQRT operations.
+// But if the code is scalar that probably means that the code has some kind of
+// dependency and we should care more about reducing the latency.
+def FeatureFastScalarFSQRT
+    : SubtargetFeature<"fast-scalar-fsqrt", "HasFastScalarFSQRT",
+                       "true", "Scalar SQRT is fast (disable Newton-Raphson)">;
+def FeatureFastVectorFSQRT
+    : SubtargetFeature<"fast-vector-fsqrt", "HasFastVectorFSQRT",
+                       "true", "Vector SQRT is fast (disable Newton-Raphson)">;
+// If lzcnt has equivalent latency/throughput to most simple integer ops, it can
+// be used to replace test/set sequences.
+def FeatureFastLZCNT
+    : SubtargetFeature<
+          "fast-lzcnt", "HasFastLZCNT", "true",
+          "LZCNT instructions are as fast as most simple integer ops">;
 
 //===----------------------------------------------------------------------===//
 // X86 processors supported.
@@ -384,6 +405,7 @@ class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
   FeatureSlowLEA,
   FeatureSlowIncDec,
   FeatureSlowBTMem,
+  FeatureSlowPMULLD,
   FeatureLAHFSAHF
 ]>;
 def : SilvermontProc<"silvermont">;
@@ -442,7 +464,8 @@ def SNBFeatures : ProcessorFeatures<[], [
   FeaturePCLMUL,
   FeatureXSAVE,
   FeatureXSAVEOPT,
-  FeatureLAHFSAHF
+  FeatureLAHFSAHF,
+  FeatureFastScalarFSQRT
 ]>;
 
 class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
@@ -500,7 +523,8 @@ def SKLFeatures : ProcessorFeatures<BDWFeatures.Value, [
   FeatureXSAVEC,
   FeatureXSAVES,
   FeatureSGX,
-  FeatureCLFLUSHOPT
+  FeatureCLFLUSHOPT,
+  FeatureFastVectorFSQRT
 ]>;
 
 // FIXME: define SKL model
@@ -631,6 +655,7 @@ def : ProcessorModel<"btver2", BtVer2Model, [
   FeatureF16C,
   FeatureMOVBE,
   FeatureLZCNT,
+  FeatureFastLZCNT,
   FeaturePOPCNT,
   FeatureXSAVE,
   FeatureXSAVEOPT,
@@ -729,6 +754,7 @@ def : Proc<"bdver4", [
   FeatureTBM,
   FeatureFMA,
   FeatureXSAVEOPT,
+  FeatureSlowSHLD,
   FeatureFSGSBase,
   FeatureLAHFSAHF,
   FeatureMWAITX
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
index 67e51f1e9194..d42e1187ce64 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp
@@ -57,10 +57,10 @@ bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
   SetupMachineFunction(MF);
 
   if (Subtarget->isTargetCOFF()) {
-    bool Intrn = MF.getFunction()->hasInternalLinkage();
+    bool Local = MF.getFunction()->hasLocalLinkage();
     OutStreamer->BeginCOFFSymbolDef(CurrentFnSym);
-    OutStreamer->EmitCOFFSymbolStorageClass(Intrn ? COFF::IMAGE_SYM_CLASS_STATIC
-                                            : COFF::IMAGE_SYM_CLASS_EXTERNAL);
+    OutStreamer->EmitCOFFSymbolStorageClass(
+        Local ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL);
     OutStreamer->EmitCOFFSymbolType(COFF::IMAGE_SYM_DTYPE_FUNCTION
                                                << COFF::SCT_COMPLEX_TYPE_SHIFT);
     OutStreamer->EndCOFFSymbolDef();
@@ -627,11 +627,11 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
     raw_string_ostream FlagsOS(Flags);
 
     for (const auto &Function : M)
-      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function, *Mang);
+      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Function);
     for (const auto &Global : M.globals())
-      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global, *Mang);
+      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Global);
     for (const auto &Alias : M.aliases())
-      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias, *Mang);
+      TLOFCOFF.emitLinkerFlagsForGlobal(FlagsOS, &Alias);
 
     FlagsOS.flush();
 
@@ -656,6 +656,6 @@ void X86AsmPrinter::EmitEndOfAsmFile(Module &M) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeX86AsmPrinter() {
-  RegisterAsmPrinter<X86AsmPrinter> X(TheX86_32Target);
-  RegisterAsmPrinter<X86AsmPrinter> Y(TheX86_64Target);
+  RegisterAsmPrinter<X86AsmPrinter> X(getTheX86_32Target());
+  RegisterAsmPrinter<X86AsmPrinter> Y(getTheX86_64Target());
 }
diff --git a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
index dcb7b5a3466f..6798253d0f6a 100644
--- a/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
+++ b/contrib/llvm/lib/Target/X86/X86AsmPrinter.h
@@ -71,27 +71,6 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
 
   StackMapShadowTracker SMShadowTracker;
 
-  // This describes the kind of sled we're storing in the XRay table.
-  enum class SledKind : uint8_t {
-    FUNCTION_ENTER = 0,
-    FUNCTION_EXIT = 1,
-    TAIL_CALL = 2,
-  };
-
-  // The table will contain these structs that point to the sled, the function
-  // containing the sled, and what kind of sled (and whether they should always
-  // be instrumented).
-  struct XRayFunctionEntry {
-    const MCSymbol *Sled;
-    const MCSymbol *Function;
-    SledKind Kind;
-    bool AlwaysInstrument;
-    const class Function *Fn;
-  };
-
-  // All the sleds to be emitted.
-  std::vector<XRayFunctionEntry> Sleds;
-
   // All instructions emitted by the X86AsmPrinter should use this helper
   // method.
   //
@@ -117,15 +96,13 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter {
   // function.
   void EmitXRayTable();
 
-  // Helper function to record a given XRay sled.
-  void recordSled(MCSymbol *Sled, const MachineInstr &MI, SledKind Kind);
 public:
   explicit X86AsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer)
       : AsmPrinter(TM, std::move(Streamer)), SM(*this), FM(*this) {}
 
-  const char *getPassName() const override {
-    return "X86 Assembly / Object Emitter";
+  StringRef getPassName() const override {
+    return "X86 Assembly Printer";
   }
 
   const X86Subtarget &getSubtarget() const { return *Subtarget; }
diff --git a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
index 8f6fc40159ee..844c66d5a462 100644
--- a/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
+++ b/contrib/llvm/lib/Target/X86/X86CallFrameOptimization.cpp
@@ -100,7 +100,7 @@ private:
                                          const X86RegisterInfo &RegInfo,
                                          DenseSet<unsigned int> &UsedRegs);
 
-  const char *getPassName() const override { return "X86 Optimize Call Frame"; }
+  StringRef getPassName() const override { return "X86 Optimize Call Frame"; }
 
   const TargetInstrInfo *TII;
   const X86FrameLowering *TFL;
@@ -134,7 +134,7 @@ bool X86CallFrameOptimization::isLegal(MachineFunction &MF) {
   // in the compact unwind encoding that Darwin uses. So, bail if there
   // is a danger of that being generated.
   if (STI->isTargetDarwin() &&
-      (!MF.getMMI().getLandingPads().empty() ||
+      (!MF.getLandingPads().empty() ||
        (MF.getFunction()->needsUnwindTableEntry() && !TFL->hasFP(MF))))
     return false;
 
@@ -180,7 +180,7 @@ bool X86CallFrameOptimization::isProfitable(MachineFunction &MF,
   // This transformation is always a win when we do not expect to have
   // a reserved call frame. Under other circumstances, it may be either
   // a win or a loss, and requires a heuristic.
-  bool CannotReserveFrame = MF.getFrameInfo()->hasVarSizedObjects();
+  bool CannotReserveFrame = MF.getFrameInfo().hasVarSizedObjects();
   if (CannotReserveFrame)
     return true;
 
@@ -230,7 +230,7 @@ bool X86CallFrameOptimization::runOnMachineFunction(MachineFunction &MF) {
   assert(isPowerOf2_32(SlotSize) && "Expect power of 2 stack slot size");
   Log2SlotSize = Log2_32(SlotSize);
 
-  if (!isLegal(MF))
+  if (skipFunction(*MF.getFunction()) || !isLegal(MF))
     return false;
 
   unsigned FrameSetupOpcode = TII->getCallFrameSetupOpcode();
@@ -345,10 +345,10 @@ void X86CallFrameOptimization::collectCallInfo(MachineFunction &MF,
     return;
   }
 
-  // For globals in PIC mode, we can have some LEAs here.
-  // Ignore them, they don't bother us.
+  // Skip over DEBUG_VALUE.
+  // For globals in PIC mode, we can have some LEAs here. Skip them as well.
   // TODO: Extend this to something that covers more cases.
-  while (I->getOpcode() == X86::LEA32r)
+  while (I->getOpcode() == X86::LEA32r || I->isDebugValue())
     ++I;
 
   unsigned StackPtr = RegInfo.getStackRegister();
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.cpp b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
new file mode 100644
index 000000000000..5ae4962378d3
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.cpp
@@ -0,0 +1,46 @@
+//===-- llvm/lib/Target/X86/X86CallLowering.cpp - Call lowering -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file implements the lowering of LLVM calls to machine code calls for
+/// GlobalISel.
+///
+//===----------------------------------------------------------------------===//
+
+#include "X86CallLowering.h"
+#include "X86ISelLowering.h"
+#include "X86InstrInfo.h"
+#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
+
+using namespace llvm;
+
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+#error "This shouldn't be built without GISel"
+#endif
+
+X86CallLowering::X86CallLowering(const X86TargetLowering &TLI)
+    : CallLowering(&TLI) {}
+
+bool X86CallLowering::lowerReturn(MachineIRBuilder &MIRBuilder,
+                                  const Value *Val, unsigned VReg) const {
+  // TODO: handle functions returning non-void values.
+  if (Val)
+    return false;
+
+  MIRBuilder.buildInstr(X86::RET).addImm(0);
+
+  return true;
+}
+
+bool X86CallLowering::lowerFormalArguments(MachineIRBuilder &MIRBuilder,
+                                           const Function &F,
+                                           ArrayRef<unsigned> VRegs) const {
+  // TODO: handle functions with one or more arguments.
+  return F.arg_empty();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86CallLowering.h b/contrib/llvm/lib/Target/X86/X86CallLowering.h
new file mode 100644
index 000000000000..f2672f09d855
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallLowering.h
@@ -0,0 +1,39 @@
+//===-- llvm/lib/Target/X86/X86CallLowering.h - Call lowering -----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file describes how to lower LLVM calls to machine code calls.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86CALLLOWERING
+#define LLVM_LIB_TARGET_X86_X86CALLLOWERING
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+
+namespace llvm {
+
+class Function;
+class MachineIRBuilder;
+class X86TargetLowering;
+class Value;
+
+class X86CallLowering : public CallLowering {
+public:
+  X86CallLowering(const X86TargetLowering &TLI);
+
+  bool lowerReturn(MachineIRBuilder &MIRBuiler, const Value *Val,
+                   unsigned VReg) const override;
+
+  bool lowerFormalArguments(MachineIRBuilder &MIRBuilder, const Function &F,
+                            ArrayRef<unsigned> VRegs) const override;
+};
+} // End of namespace llvm;
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.cpp b/contrib/llvm/lib/Target/X86/X86CallingConv.cpp
new file mode 100644
index 000000000000..59dde982f512
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.cpp
@@ -0,0 +1,208 @@
+//=== X86CallingConv.cpp - X86 Custom Calling Convention Impl   -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of custom routines for the X86
+// Calling Convention that aren't done by tablegen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "X86Subtarget.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/IR/CallingConv.h"
+
+namespace llvm {
+
+bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  // List of GPR registers that are available to store values in regcall
+  // calling convention.
+  static const MCPhysReg RegList[] = {X86::EAX, X86::ECX, X86::EDX, X86::EDI,
+                                      X86::ESI};
+
+  // The vector will save all the available registers for allocation.
+  SmallVector<unsigned, 5> AvailableRegs;
+
+  // searching for the available registers.
+  for (auto Reg : RegList) {
+    if (!State.isAllocated(Reg))
+      AvailableRegs.push_back(Reg);
+  }
+
+  const size_t RequiredGprsUponSplit = 2;
+  if (AvailableRegs.size() < RequiredGprsUponSplit)
+    return false; // Not enough free registers - continue the search.
+
+  // Allocating the available registers.
+  for (unsigned I = 0; I < RequiredGprsUponSplit; I++) {
+
+    // Marking the register as located.
+    unsigned Reg = State.AllocateReg(AvailableRegs[I]);
+
+    // Since we previously made sure that 2 registers are available
+    // we expect that a real register number will be returned.
+    assert(Reg && "Expecting a register will be available");
+
+    // Assign the value to the allocated register
+    State.addLoc(CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+  }
+
+  // Successful in allocating regsiters - stop scanning next rules.
+  return true;
+}
+
+static ArrayRef<MCPhysReg> CC_X86_VectorCallGetSSEs(const MVT &ValVT) {
+  if (ValVT.is512BitVector()) {
+    static const MCPhysReg RegListZMM[] = {X86::ZMM0, X86::ZMM1, X86::ZMM2,
+                                           X86::ZMM3, X86::ZMM4, X86::ZMM5};
+    return makeArrayRef(std::begin(RegListZMM), std::end(RegListZMM));
+  }
+
+  if (ValVT.is256BitVector()) {
+    static const MCPhysReg RegListYMM[] = {X86::YMM0, X86::YMM1, X86::YMM2,
+                                           X86::YMM3, X86::YMM4, X86::YMM5};
+    return makeArrayRef(std::begin(RegListYMM), std::end(RegListYMM));
+  }
+
+  static const MCPhysReg RegListXMM[] = {X86::XMM0, X86::XMM1, X86::XMM2,
+                                         X86::XMM3, X86::XMM4, X86::XMM5};
+  return makeArrayRef(std::begin(RegListXMM), std::end(RegListXMM));
+}
+
+static ArrayRef<MCPhysReg> CC_X86_64_VectorCallGetGPRs() {
+  static const MCPhysReg RegListGPR[] = {X86::RCX, X86::RDX, X86::R8, X86::R9};
+  return makeArrayRef(std::begin(RegListGPR), std::end(RegListGPR));
+}
+
+static bool CC_X86_VectorCallAssignRegister(unsigned &ValNo, MVT &ValVT,
+                                            MVT &LocVT,
+                                            CCValAssign::LocInfo &LocInfo,
+                                            ISD::ArgFlagsTy &ArgFlags,
+                                            CCState &State) {
+
+  ArrayRef<MCPhysReg> RegList = CC_X86_VectorCallGetSSEs(ValVT);
+  bool Is64bit = static_cast<const X86Subtarget &>(
+                     State.getMachineFunction().getSubtarget())
+                     .is64Bit();
+
+  for (auto Reg : RegList) {
+    // If the register is not marked as allocated - assign to it.
+    if (!State.isAllocated(Reg)) {
+      unsigned AssigedReg = State.AllocateReg(Reg);
+      assert(AssigedReg == Reg && "Expecting a valid register allocation");
+      State.addLoc(
+          CCValAssign::getReg(ValNo, ValVT, AssigedReg, LocVT, LocInfo));
+      return true;
+    }
+    // If the register is marked as shadow allocated - assign to it.
+    if (Is64bit && State.IsShadowAllocatedReg(Reg)) {
+      State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+      return true;
+    }
+  }
+
+  llvm_unreachable("Clang should ensure that hva marked vectors will have "
+                   "an available register.");
+  return false;
+}
+
+bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  // On the second pass, go through the HVAs only.
+  if (ArgFlags.isSecArgPass()) {
+    if (ArgFlags.isHva())
+      return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
+                                             ArgFlags, State);
+    return true;
+  }
+
+  // Process only vector types as defined by vectorcall spec:
+  // "A vector type is either a floating-point type, for example,
+  //  a float or double, or an SIMD vector type, for example, __m128 or __m256".
+  if (!(ValVT.isFloatingPoint() ||
+        (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
+    // If R9 was already assigned it means that we are after the fourth element
+    // and because this is not an HVA / Vector type, we need to allocate
+    // shadow XMM register.
+    if (State.isAllocated(X86::R9)) {
+      // Assign shadow XMM register.
+      (void)State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT));
+    }
+
+    return false;
+  }
+
+  if (!ArgFlags.isHva() || ArgFlags.isHvaStart()) {
+    // Assign shadow GPR register.
+    (void)State.AllocateReg(CC_X86_64_VectorCallGetGPRs());
+
+    // Assign XMM register - (shadow for HVA and non-shadow for non HVA).
+    if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
+      // In Vectorcall Calling convention, additional shadow stack can be
+      // created on top of the basic 32 bytes of win64.
+      // It can happen if the fifth or sixth argument is vector type or HVA.
+      // At that case for each argument a shadow stack of 8 bytes is allocated.
+      if (Reg == X86::XMM4 || Reg == X86::XMM5)
+        State.AllocateStack(8, 8);
+
+      if (!ArgFlags.isHva()) {
+        State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+        return true; // Allocated a register - Stop the search.
+      }
+    }
+  }
+
+  // If this is an HVA - Stop the search,
+  // otherwise continue the search.
+  return ArgFlags.isHva();
+}
+
+bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          ISD::ArgFlagsTy &ArgFlags, CCState &State) {
+  // On the second pass, go through the HVAs only.
+  if (ArgFlags.isSecArgPass()) {
+    if (ArgFlags.isHva())
+      return CC_X86_VectorCallAssignRegister(ValNo, ValVT, LocVT, LocInfo,
+                                             ArgFlags, State);
+    return true;
+  }
+
+  // Process only vector types as defined by vectorcall spec:
+  // "A vector type is either a floating point type, for example,
+  //  a float or double, or an SIMD vector type, for example, __m128 or __m256".
+  if (!(ValVT.isFloatingPoint() ||
+        (ValVT.isVector() && ValVT.getSizeInBits() >= 128))) {
+    return false;
+  }
+
+  if (ArgFlags.isHva())
+    return true; // If this is an HVA - Stop the search.
+
+  // Assign XMM register.
+  if (unsigned Reg = State.AllocateReg(CC_X86_VectorCallGetSSEs(ValVT))) {
+    State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+    return true;
+  }
+
+  // In case we did not find an available XMM register for a vector -
+  // pass it indirectly.
+  // It is similar to CCPassIndirect, with the addition of inreg.
+  if (!ValVT.isFloatingPoint()) {
+    LocVT = MVT::i32;
+    LocInfo = CCValAssign::Indirect;
+    ArgFlags.setInReg();
+  }
+
+  return false; // No register was assigned - Continue the search.
+}
+
+} // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.h b/contrib/llvm/lib/Target/X86/X86CallingConv.h
index a08160f9feba..c49a6838fa44 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.h
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.h
@@ -21,18 +21,32 @@
 
 namespace llvm {
 
-inline bool CC_X86_32_VectorCallIndirect(unsigned &ValNo, MVT &ValVT,
-                                         MVT &LocVT,
-                                         CCValAssign::LocInfo &LocInfo,
-                                         ISD::ArgFlagsTy &ArgFlags,
-                                         CCState &State) {
-  // Similar to CCPassIndirect, with the addition of inreg.
-  LocVT = MVT::i32;
-  LocInfo = CCValAssign::Indirect;
-  ArgFlags.setInReg();
-  return false; // Continue the search, but now for i32.
-}
-
+/// When regcall calling convention compiled to 32 bit arch, special treatment
+/// is required for 64 bit masks.
+/// The value should be assigned to two GPRs.
+/// \return true if registers were allocated and false otherwise.
+bool CC_X86_32_RegCall_Assign2Regs(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                                   CCValAssign::LocInfo &LocInfo,
+                                   ISD::ArgFlagsTy &ArgFlags, CCState &State);
+
+/// Vectorcall calling convention has special handling for vector types or
+/// HVA for 64 bit arch.
+/// For HVAs shadow registers might be allocated on the first pass
+/// and actual XMM registers are allocated on the second pass.
+/// For vector types, actual XMM registers are allocated on the first pass.
+/// \return true if registers were allocated and false otherwise.
+bool CC_X86_64_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          ISD::ArgFlagsTy &ArgFlags, CCState &State);
+
+/// Vectorcall calling convention has special handling for vector types or
+/// HVA for 32 bit arch.
+/// For HVAs actual XMM registers are allocated on the second pass.
+/// For vector types, actual XMM registers are allocated on the first pass.
+/// \return true if registers were allocated and false otherwise.
+bool CC_X86_32_VectorCall(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+                          CCValAssign::LocInfo &LocInfo,
+                          ISD::ArgFlagsTy &ArgFlags, CCState &State);
 
 inline bool CC_X86_AnyReg_Error(unsigned &, MVT &, MVT &,
                                 CCValAssign::LocInfo &, ISD::ArgFlagsTy &,
diff --git a/contrib/llvm/lib/Target/X86/X86CallingConv.td b/contrib/llvm/lib/Target/X86/X86CallingConv.td
index 4cb62b56bce4..cf7bc981b8a5 100644
--- a/contrib/llvm/lib/Target/X86/X86CallingConv.td
+++ b/contrib/llvm/lib/Target/X86/X86CallingConv.td
@@ -18,6 +18,179 @@ class CCIfSubtarget<string F, CCAction A>
                        "(State.getMachineFunction().getSubtarget()).", F),
            A>;
 
+// Register classes for RegCall
+class RC_X86_RegCall {
+  list<Register> GPR_8 = [];
+  list<Register> GPR_16 = [];
+  list<Register> GPR_32 = [];
+  list<Register> GPR_64 = [];
+  list<Register> FP_CALL = [FP0];
+  list<Register> FP_RET = [FP0, FP1];
+  list<Register> XMM = [];
+  list<Register> YMM = [];
+  list<Register> ZMM = [];
+}
+
+// RegCall register classes for 32 bits
+def RC_X86_32_RegCall : RC_X86_RegCall {
+  let GPR_8 = [AL, CL, DL, DIL, SIL];
+  let GPR_16 = [AX, CX, DX, DI, SI];
+  let GPR_32 = [EAX, ECX, EDX, EDI, ESI];
+  let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle []
+                      ///< \todo Fix AssignToReg to enable empty lists
+  let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7];
+  let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7];
+  let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7];
+}
+
+class RC_X86_64_RegCall : RC_X86_RegCall {
+  let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+             XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15];
+  let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+             YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15];
+  let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7,
+             ZMM8, ZMM9, ZMM10, ZMM11, ZMM12, ZMM13, ZMM14, ZMM15];
+}
+
+def RC_X86_64_RegCall_Win : RC_X86_64_RegCall {
+  let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R10B, R11B, R12B, R14B, R15B];
+  let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R10W, R11W, R12W, R14W, R15W];
+  let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R10D, R11D, R12D, R14D, R15D];
+  let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15];
+}
+
+def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall {
+  let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B];
+  let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W];
+  let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R12D, R13D, R14D, R15D];
+  let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R12, R13, R14, R15];
+}
+
+// X86-64 Intel regcall calling convention.
+multiclass X86_RegCall_base<RC_X86_RegCall RC> {
+def CC_#NAME : CallingConv<[
+  // Handles byval parameters.
+    CCIfSubtarget<"is64Bit()", CCIfByVal<CCPassByVal<8, 8>>>,
+    CCIfByVal<CCPassByVal<4, 4>>,
+
+    // Promote i1/i8/i16 arguments to i32.
+    CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+    // Promote v8i1/v16i1/v32i1 arguments to i32.
+    CCIfType<[v8i1, v16i1, v32i1], CCPromoteToType<i32>>,
+
+    // bool, char, int, enum, long, pointer --> GPR
+    CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
+
+    // long long, __int64 --> GPR
+    CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
+
+    // __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
+    CCIfType<[v64i1], CCPromoteToType<i64>>,
+    CCIfSubtarget<"is64Bit()", CCIfType<[i64], 
+      CCAssignToReg<RC.GPR_64>>>,
+    CCIfSubtarget<"is32Bit()", CCIfType<[i64], 
+      CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
+
+    // float, double, float128 --> XMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[f32, f64, f128], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // long double --> FP
+    CCIfType<[f80], CCAssignToReg<RC.FP_CALL>>,
+
+    // __m128, __m128i, __m128d --> XMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m256, __m256i, __m256d --> YMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+      CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
+
+    // __m512, __m512i, __m512d --> ZMM
+    // In the case of SSE disabled --> save to stack
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], 
+      CCIfSubtarget<"hasAVX512()",CCAssignToReg<RC.ZMM>>>,
+
+    // If no register was found -> assign to stack
+
+    // In 64 bit, assign 64/32 bit values to 8 byte stack
+    CCIfSubtarget<"is64Bit()", CCIfType<[i32, i64, f32, f64], 
+      CCAssignToStack<8, 8>>>,
+
+    // In 32 bit, assign 64/32 bit values to 8/4 byte stack
+    CCIfType<[i32, f32], CCAssignToStack<4, 4>>,
+    CCIfType<[i64, f64], CCAssignToStack<8, 4>>,
+
+    // MMX type gets 8 byte slot in stack , while alignment depends on target
+    CCIfSubtarget<"is64Bit()", CCIfType<[x86mmx], CCAssignToStack<8, 8>>>,
+    CCIfType<[x86mmx], CCAssignToStack<8, 4>>,
+
+    // float 128 get stack slots whose size and alignment depends 
+    // on the subtarget.
+    CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
+
+    // Vectors get 16-byte stack slots that are 16-byte aligned.
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+      CCAssignToStack<16, 16>>,
+
+    // 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+      CCAssignToStack<32, 32>>,
+
+    // 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
+    CCIfType<[v16i32, v8i64, v16f32, v8f64], CCAssignToStack<64, 64>>
+]>;
+
+def RetCC_#NAME : CallingConv<[
+    // Promote i1, v8i1 arguments to i8.
+    CCIfType<[i1, v8i1], CCPromoteToType<i8>>,
+
+    // Promote v16i1 arguments to i16.
+    CCIfType<[v16i1], CCPromoteToType<i16>>,
+
+    // Promote v32i1 arguments to i32.
+    CCIfType<[v32i1], CCPromoteToType<i32>>,
+
+    // bool, char, int, enum, long, pointer --> GPR
+    CCIfType<[i8], CCAssignToReg<RC.GPR_8>>,
+    CCIfType<[i16], CCAssignToReg<RC.GPR_16>>,
+    CCIfType<[i32], CCAssignToReg<RC.GPR_32>>,
+
+    // long long, __int64 --> GPR
+    CCIfType<[i64], CCAssignToReg<RC.GPR_64>>,
+
+    // __mmask64 (v64i1) --> GPR64 (for x64) or 2 x GPR32 (for IA32)
+    CCIfType<[v64i1], CCPromoteToType<i64>>,
+    CCIfSubtarget<"is64Bit()", CCIfType<[i64], 
+      CCAssignToReg<RC.GPR_64>>>,
+    CCIfSubtarget<"is32Bit()", CCIfType<[i64], 
+      CCCustom<"CC_X86_32_RegCall_Assign2Regs">>>,
+
+    // long double --> FP
+    CCIfType<[f80], CCAssignToReg<RC.FP_RET>>,
+
+    // float, double, float128 --> XMM
+    CCIfType<[f32, f64, f128], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m128, __m128i, __m128d --> XMM
+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 
+      CCIfSubtarget<"hasSSE1()", CCAssignToReg<RC.XMM>>>,
+
+    // __m256, __m256i, __m256d --> YMM
+    CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64], 
+      CCIfSubtarget<"hasAVX()", CCAssignToReg<RC.YMM>>>,
+
+    // __m512, __m512i, __m512d --> ZMM
+    CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64], 
+      CCIfSubtarget<"hasAVX512()", CCAssignToReg<RC.ZMM>>>
+]>;
+}
+
 //===----------------------------------------------------------------------===//
 // Return Value Calling Conventions
 //===----------------------------------------------------------------------===//
@@ -135,20 +308,12 @@ def RetCC_X86_32_HiPE : CallingConv<[
   CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
 ]>;
 
-// X86-32 HiPE return-value convention.
+// X86-32 Vectorcall return-value convention.
 def RetCC_X86_32_VectorCall : CallingConv<[
-  // Vector types are returned in XMM0,XMM1,XMMM2 and XMM3.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+  // Floating Point types are returned in XMM0,XMM1,XMMM2 and XMM3.
+  CCIfType<[f32, f64, f128],
             CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
 
-  // 256-bit FP vectors
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-            CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
-
-  // 512-bit FP vectors
-  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
-            CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
-
   // Return integers in the standard way.
   CCDelegateTo<RetCC_X86Common>
 ]>;
@@ -177,6 +342,16 @@ def RetCC_X86_Win64_C : CallingConv<[
   CCDelegateTo<RetCC_X86_64_C>
 ]>;
 
+// X86-64 vectorcall return-value convention.
+def RetCC_X86_64_Vectorcall : CallingConv<[
+  // Vectorcall calling convention always returns FP values in XMMs.
+  CCIfType<[f32, f64, f128], 
+    CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>,
+
+  // Otherwise, everything is the same as Windows X86-64 C CC.
+  CCDelegateTo<RetCC_X86_Win64_C>
+]>;
+
 // X86-64 HiPE return-value convention.
 def RetCC_X86_64_HiPE : CallingConv<[
   // Promote all types to i64
@@ -196,6 +371,9 @@ def RetCC_X86_64_WebKit_JS : CallingConv<[
 ]>;
 
 def RetCC_X86_64_Swift : CallingConv<[
+
+  CCIfSwiftError<CCIfType<[i64], CCAssignToReg<[R12]>>>,
+
   // For integers, ECX, R8D can be used as extra return registers.
   CCIfType<[i1],  CCPromoteToType<i8>>,
   CCIfType<[i8] , CCAssignToReg<[AL, DL, CL, R8B]>>,
@@ -234,6 +412,14 @@ def RetCC_X86_64_HHVM: CallingConv<[
                                  RAX, R10, R11, R13, R14, R15]>>
 ]>;
 
+
+defm X86_32_RegCall :
+	 X86_RegCall_base<RC_X86_32_RegCall>;
+defm X86_Win64_RegCall :
+     X86_RegCall_base<RC_X86_64_RegCall_Win>;
+defm X86_SysV64_RegCall :
+     X86_RegCall_base<RC_X86_64_RegCall_SysV>;
+
 // This is the root return-value convention for the X86-32 backend.
 def RetCC_X86_32 : CallingConv<[
   // If FastCC, use RetCC_X86_32_Fast.
@@ -241,6 +427,7 @@ def RetCC_X86_32 : CallingConv<[
   // If HiPE, use RetCC_X86_32_HiPE.
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_32_VectorCall>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_32_RegCall>>,
 
   // Otherwise, use RetCC_X86_32_C.
   CCDelegateTo<RetCC_X86_32_C>
@@ -262,9 +449,17 @@ def RetCC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_64_Win64", CCDelegateTo<RetCC_X86_Win64_C>>,
   CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo<RetCC_X86_64_C>>,
 
+  // Handle Vectorcall CC
+  CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<RetCC_X86_64_Vectorcall>>,
+
   // Handle HHVM calls.
   CCIfCC<"CallingConv::HHVM", CCDelegateTo<RetCC_X86_64_HHVM>>,
 
+  CCIfCC<"CallingConv::X86_RegCall",
+          CCIfSubtarget<"isTargetWin64()",
+                        CCDelegateTo<RetCC_X86_Win64_RegCall>>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<RetCC_X86_SysV64_RegCall>>,
+          
   // Mingw64 and native Win64 use Win64 CC
   CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
 
@@ -436,18 +631,7 @@ def CC_X86_Win64_C : CallingConv<[
 ]>;
 
 def CC_X86_Win64_VectorCall : CallingConv<[
-  // The first 6 floating point and vector types of 128 bits or less use
-  // XMM0-XMM5.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-           CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
-
-  // 256-bit vectors use YMM registers.
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-           CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
-
-  // 512-bit vectors use ZMM registers.
-  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
-           CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
+  CCCustom<"CC_X86_64_VectorCall">,
 
   // Delegate to fastcall to handle integer types.
   CCDelegateTo<CC_X86_Win64_C>
@@ -657,25 +841,9 @@ def CC_X86_32_FastCall : CallingConv<[
   CCDelegateTo<CC_X86_32_Common>
 ]>;
 
-def CC_X86_32_VectorCall : CallingConv<[
-  // The first 6 floating point and vector types of 128 bits or less use
-  // XMM0-XMM5.
-  CCIfType<[f32, f64, v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-           CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5]>>,
-
-  // 256-bit vectors use YMM registers.
-  CCIfType<[v32i8, v16i16, v8i32, v4i64, v8f32, v4f64],
-           CCAssignToReg<[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5]>>,
-
-  // 512-bit vectors use ZMM registers.
-  CCIfType<[v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
-           CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5]>>,
-
-  // Otherwise, pass it indirectly.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64,
-            v32i8, v16i16, v8i32, v4i64, v8f32, v4f64,
-            v64i8, v32i16, v16i32, v8i64, v16f32, v8f64],
-           CCCustom<"CC_X86_32_VectorCallIndirect">>,
+def CC_X86_Win32_VectorCall : CallingConv<[
+  // Pass floating point in XMMs
+  CCCustom<"CC_X86_32_VectorCall">,
 
   // Delegate to fastcall to handle integer types.
   CCDelegateTo<CC_X86_32_FastCall>
@@ -809,11 +977,12 @@ def CC_X86_32 : CallingConv<[
   CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_32_Intr>>,
   CCIfSubtarget<"isTargetMCU()", CCDelegateTo<CC_X86_32_MCU>>,
   CCIfCC<"CallingConv::X86_FastCall", CCDelegateTo<CC_X86_32_FastCall>>,
-  CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_32_VectorCall>>,
+  CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win32_VectorCall>>,
   CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
   CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
   CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
   CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_32_RegCall>>,
 
   // Otherwise, drop to normal X86-32 CC
   CCDelegateTo<CC_X86_32_C>
@@ -830,6 +999,9 @@ def CC_X86_64 : CallingConv<[
   CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo<CC_X86_Win64_VectorCall>>,
   CCIfCC<"CallingConv::HHVM", CCDelegateTo<CC_X86_64_HHVM>>,
   CCIfCC<"CallingConv::HHVM_C", CCDelegateTo<CC_X86_64_HHVM_C>>,
+  CCIfCC<"CallingConv::X86_RegCall",
+    CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_RegCall>>>,
+  CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo<CC_X86_SysV64_RegCall>>,
   CCIfCC<"CallingConv::X86_INTR", CCDelegateTo<CC_X86_64_Intr>>,
 
   // Mingw64 and native Win64 use Win64 CC
@@ -860,7 +1032,9 @@ def CSR_64_SwiftError : CalleeSavedRegs<(sub CSR_64, R12)>;
 def CSR_32EHRet : CalleeSavedRegs<(add EAX, EDX, CSR_32)>;
 def CSR_64EHRet : CalleeSavedRegs<(add RAX, RDX, CSR_64)>;
 
-def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
+def CSR_Win64_NoSSE : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15)>;
+
+def CSR_Win64 : CalleeSavedRegs<(add CSR_Win64_NoSSE,
                                      (sequence "XMM%u", 6, 15))>;
 
 // The function used by Darwin to obtain the address of a thread-local variable
@@ -931,3 +1105,17 @@ def CSR_64_Intel_OCL_BI_AVX512 : CalleeSavedRegs<(add RBX, RDI, RSI, R14, R15,
 
 // Only R12 is preserved for PHP calls in HHVM.
 def CSR_64_HHVM : CalleeSavedRegs<(add R12)>;
+
+// Register calling convention preserves few GPR and XMM8-15
+def CSR_32_RegCall_NoSSE : CalleeSavedRegs<(add ESI, EDI, EBX, EBP, ESP)>;
+def CSR_32_RegCall       : CalleeSavedRegs<(add CSR_32_RegCall_NoSSE,
+                                           (sequence "XMM%u", 4, 7))>;                                            
+def CSR_Win64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+                                              (sequence "R%u", 10, 15))>;
+def CSR_Win64_RegCall       : CalleeSavedRegs<(add CSR_Win64_RegCall_NoSSE,                                  
+                                              (sequence "XMM%u", 8, 15))>;
+def CSR_SysV64_RegCall_NoSSE : CalleeSavedRegs<(add RBX, RBP, RSP,
+                                               (sequence "R%u", 12, 15))>;
+def CSR_SysV64_RegCall       : CalleeSavedRegs<(add CSR_SysV64_RegCall_NoSSE,               
+                                               (sequence "XMM%u", 8, 15))>;
+                                               
diff --git a/contrib/llvm/lib/Target/X86/X86EvexToVex.cpp b/contrib/llvm/lib/Target/X86/X86EvexToVex.cpp
new file mode 100755
index 000000000000..bdd1ab537bb2
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86EvexToVex.cpp
@@ -0,0 +1,213 @@
+//===----------------------- X86EvexToVex.cpp ----------------------------===//
+// Compress EVEX instructions to VEX encoding when possible to reduce code size
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+/// \file
+/// This file defines the pass that goes over all AVX-512 instructions which
+/// are encoded using the EVEX prefix and if possible replaces them by their
+/// corresponding VEX encoding which is usually shorter by 2 bytes.
+/// EVEX instructions may be encoded via the VEX prefix when the AVX-512
+/// instruction has a corresponding AVX/AVX2 opcode and when it does not
+/// use the xmm or the mask registers or xmm/ymm registers wuith indexes
+/// higher than 15.
+/// The pass applies code reduction on the generated code for AVX-512 instrs.
+///
+//===---------------------------------------------------------------------===//
+
+#include "InstPrinter/X86InstComments.h"
+#include "X86.h"
+#include "X86InstrBuilder.h"
+#include "X86InstrInfo.h"
+#include "X86InstrTablesInfo.h"
+#include "X86MachineFunctionInfo.h"
+#include "X86Subtarget.h"
+#include "X86TargetMachine.h"
+
+using namespace llvm;
+
+#define EVEX2VEX_DESC "Compressing EVEX instrs to VEX encoding when possible"
+#define EVEX2VEX_NAME "x86-evex-to-vex-compress"
+
+#define DEBUG_TYPE EVEX2VEX_NAME
+
+namespace {
+
+class EvexToVexInstPass : public MachineFunctionPass {
+
+  /// X86EvexToVexCompressTable - Evex to Vex encoding opcode map.
+  typedef DenseMap<unsigned, uint16_t> EvexToVexTableType;
+  EvexToVexTableType EvexToVex128Table;
+  EvexToVexTableType EvexToVex256Table;
+
+  /// For EVEX instructions that can be encoded using VEX encoding, replace
+  /// them by the VEX encoding in order to reduce size.
+  bool CompressEvexToVexImpl(MachineInstr &MI) const;
+
+  /// For initializing the hash map tables of all AVX-512 EVEX
+  /// corresponding to AVX/AVX2 opcodes.
+  void AddTableEntry(EvexToVexTableType &EvexToVexTable, uint16_t EvexOp,
+                     uint16_t VexOp);
+
+public:
+  static char ID;
+
+  StringRef getPassName() const override { return EVEX2VEX_DESC; }
+
+  EvexToVexInstPass() : MachineFunctionPass(ID) {
+    initializeEvexToVexInstPassPass(*PassRegistry::getPassRegistry());
+
+    // Initialize the EVEX to VEX 128 table map.
+    for (X86EvexToVexCompressTableEntry Entry : X86EvexToVex128CompressTable) {
+      AddTableEntry(EvexToVex128Table, Entry.EvexOpcode, Entry.VexOpcode);
+    }
+
+    // Initialize the EVEX to VEX 256 table map.
+    for (X86EvexToVexCompressTableEntry Entry : X86EvexToVex256CompressTable) {
+      AddTableEntry(EvexToVex256Table, Entry.EvexOpcode, Entry.VexOpcode);
+    }
+  }
+
+  /// Loop over all of the basic blocks, replacing EVEX instructions
+  /// by equivalent VEX instructions when possible for reducing code size.
+  bool runOnMachineFunction(MachineFunction &MF) override;
+
+  // This pass runs after regalloc and doesn't support VReg operands.
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoVRegs);
+  }
+
+private:
+  /// Machine instruction info used throughout the class.
+  const X86InstrInfo *TII;
+};
+
+char EvexToVexInstPass::ID = 0;
+}
+
+INITIALIZE_PASS(EvexToVexInstPass, EVEX2VEX_NAME, EVEX2VEX_DESC, false, false)
+
+FunctionPass *llvm::createX86EvexToVexInsts() {
+  return new EvexToVexInstPass();
+}
+
+bool EvexToVexInstPass::runOnMachineFunction(MachineFunction &MF) {
+  TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
+
+  const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
+  if (!ST.hasAVX512())
+    return false;
+
+  bool Changed = false;
+
+  /// Go over all basic blocks in function and replace
+  /// EVEX encoded instrs by VEX encoding when possible.
+  for (MachineBasicBlock &MBB : MF) {
+
+    // Traverse the basic block. 
+    for (MachineInstr &MI : MBB)      
+      Changed |= CompressEvexToVexImpl(MI);
+  }
+
+  return Changed;
+}
+
+void EvexToVexInstPass::AddTableEntry(EvexToVexTableType &EvexToVexTable,
+                                      uint16_t EvexOp, uint16_t VexOp) {
+  EvexToVexTable[EvexOp] = VexOp;
+}
+
+// For EVEX instructions that can be encoded using VEX encoding
+// replace them by the VEX encoding in order to reduce size.
+bool EvexToVexInstPass::CompressEvexToVexImpl(MachineInstr &MI) const {
+
+  // VEX format.
+  // # of bytes: 0,2,3  1      1      0,1   0,1,2,4  0,1
+  //  [Prefixes] [VEX]  OPCODE ModR/M [SIB] [DISP]  [IMM]
+  //
+  // EVEX format.
+  //  # of bytes: 4    1      1      1      4       / 1         1
+  //  [Prefixes]  EVEX Opcode ModR/M [SIB] [Disp32] / [Disp8*N] [Immediate]
+
+  const MCInstrDesc &Desc = MI.getDesc();
+
+  // Check for EVEX instructions only.
+  if ((Desc.TSFlags & X86II::EncodingMask) != X86II::EVEX)
+    return false;
+ 
+  // Check for EVEX instructions with mask or broadcast as in these cases 
+  // the EVEX prefix is needed in order to carry this information 
+  // thus preventing the transformation to VEX encoding.
+  if (Desc.TSFlags & (X86II::EVEX_K | X86II::EVEX_B))
+    return false;
+ 
+  // Check for non EVEX_V512 instrs only.
+  // EVEX_V512 instr: bit EVEX_L2 = 1; bit VEX_L = 0.
+  if ((Desc.TSFlags & X86II::EVEX_L2) && !(Desc.TSFlags & X86II::VEX_L))
+    return false;  
+        
+  // EVEX_V128 instr: bit EVEX_L2 = 0, bit VEX_L = 0.
+  bool IsEVEX_V128 =
+      (!(Desc.TSFlags & X86II::EVEX_L2) && !(Desc.TSFlags & X86II::VEX_L));
+
+  // EVEX_V256 instr: bit EVEX_L2 = 0, bit VEX_L = 1.
+  bool IsEVEX_V256 =
+      (!(Desc.TSFlags & X86II::EVEX_L2) && (Desc.TSFlags & X86II::VEX_L));
+
+  unsigned NewOpc = 0;
+
+  // Check for EVEX_V256 instructions.
+  if (IsEVEX_V256) {
+    // Search for opcode in the EvexToVex256 table.
+    auto It = EvexToVex256Table.find(MI.getOpcode());
+    if (It != EvexToVex256Table.end())
+      NewOpc = It->second;
+  }
+
+  // Check for EVEX_V128 or Scalar instructions.
+  else if (IsEVEX_V128) {
+    // Search for opcode in the EvexToVex128 table.
+    auto It = EvexToVex128Table.find(MI.getOpcode());
+    if (It != EvexToVex128Table.end())
+      NewOpc = It->second;
+  }
+
+  if (!NewOpc)
+    return false;
+
+  auto isHiRegIdx = [](unsigned Reg) {
+    // Check for XMM register with indexes between 16 - 31.
+    if (Reg >= X86::XMM16 && Reg <= X86::XMM31)
+      return true;
+
+    // Check for YMM register with indexes between 16 - 31.
+    if (Reg >= X86::YMM16 && Reg <= X86::YMM31)
+      return true;
+
+    return false;
+  };
+
+  // Check that operands are not ZMM regs or
+  // XMM/YMM regs with hi indexes between 16 - 31.
+  for (const MachineOperand &MO : MI.explicit_operands()) {
+    if (!MO.isReg())
+      continue;
+
+    unsigned Reg = MO.getReg();
+
+    assert (!(Reg >= X86::ZMM0 && Reg <= X86::ZMM31));
+
+    if (isHiRegIdx(Reg))
+      return false;
+  }
+ 
+  const MCInstrDesc &MCID = TII->get(NewOpc);
+  MI.setDesc(MCID);
+  MI.setAsmPrinterFlag(AC_EVEX_2_VEX);
+  return true; 
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 093fed7276f7..192b942490e7 100644
--- a/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -51,10 +51,10 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "X86 pseudo instruction expansion pass";
   }
 
@@ -77,9 +77,11 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   default:
     return false;
   case X86::TCRETURNdi:
+  case X86::TCRETURNdicc:
   case X86::TCRETURNri:
   case X86::TCRETURNmi:
   case X86::TCRETURNdi64:
+  case X86::TCRETURNdi64cc:
   case X86::TCRETURNri64:
   case X86::TCRETURNmi64: {
     bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;
@@ -94,9 +96,13 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
 
     // Incoporate the retaddr area.
-    Offset = StackAdj-MaxTCDelta;
+    Offset = StackAdj - MaxTCDelta;
     assert(Offset >= 0 && "Offset should never be negative");
 
+    if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {
+      assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");
+    }
+
     if (Offset) {
       // Check for possible merge with preceding ADD instruction.
       Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);
@@ -105,19 +111,40 @@ bool X86ExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
 
     // Jump to label or value in register.
     bool IsWin64 = STI->isTargetWin64();
-    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdi64) {
-      unsigned Op = (Opcode == X86::TCRETURNdi)
-                        ? X86::TAILJMPd
-                        : (IsWin64 ? X86::TAILJMPd64_REX : X86::TAILJMPd64);
+    if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||
+        Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {
+      unsigned Op;
+      switch (Opcode) {
+      case X86::TCRETURNdi:
+        Op = X86::TAILJMPd;
+        break;
+      case X86::TCRETURNdicc:
+        Op = X86::TAILJMPd_CC;
+        break;
+      case X86::TCRETURNdi64cc:
+        assert(!IsWin64 && "Conditional tail calls confuse the Win64 unwinder.");
+        // TODO: We could do it for Win64 "leaf" functions though; PR30337.
+        Op = X86::TAILJMPd64_CC;
+        break;
+      default:
+        // Note: Win64 uses REX prefixes indirect jumps out of functions, but
+        // not direct ones.
+        Op = X86::TAILJMPd64;
+        break;
+      }
       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));
-      if (JumpTarget.isGlobal())
+      if (JumpTarget.isGlobal()) {
         MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
                              JumpTarget.getTargetFlags());
-      else {
+      } else {
         assert(JumpTarget.isSymbol());
         MIB.addExternalSymbol(JumpTarget.getSymbolName(),
                               JumpTarget.getTargetFlags());
       }
+      if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {
+        MIB.addImm(MBBI->getOperand(2).getImm());
+      }
+
     } else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {
       unsigned Op = (Opcode == X86::TCRETURNmi)
                         ? X86::TAILJMPm
diff --git a/contrib/llvm/lib/Target/X86/X86FastISel.cpp b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
index dfe3c80be21d..c890fdd1e519 100644
--- a/contrib/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FastISel.cpp
@@ -170,6 +170,12 @@ private:
 
   const MachineInstrBuilder &addFullAddress(const MachineInstrBuilder &MIB,
                                             X86AddressMode &AM);
+
+  unsigned fastEmitInst_rrrr(unsigned MachineInstOpcode,
+                             const TargetRegisterClass *RC, unsigned Op0,
+                             bool Op0IsKill, unsigned Op1, bool Op1IsKill,
+                             unsigned Op2, bool Op2IsKill, unsigned Op3,
+                             bool Op3IsKill);
 };
 
 } // end anonymous namespace.
@@ -182,18 +188,18 @@ getX86ConditionCode(CmpInst::Predicate Predicate) {
   default: break;
   // Floating-point Predicates
   case CmpInst::FCMP_UEQ: CC = X86::COND_E;       break;
-  case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_OLT: NeedSwap = true;        LLVM_FALLTHROUGH;
   case CmpInst::FCMP_OGT: CC = X86::COND_A;       break;
-  case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_OLE: NeedSwap = true;        LLVM_FALLTHROUGH;
   case CmpInst::FCMP_OGE: CC = X86::COND_AE;      break;
-  case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_UGT: NeedSwap = true;        LLVM_FALLTHROUGH;
   case CmpInst::FCMP_ULT: CC = X86::COND_B;       break;
-  case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_UGE: NeedSwap = true;        LLVM_FALLTHROUGH;
   case CmpInst::FCMP_ULE: CC = X86::COND_BE;      break;
   case CmpInst::FCMP_ONE: CC = X86::COND_NE;      break;
   case CmpInst::FCMP_UNO: CC = X86::COND_P;       break;
   case CmpInst::FCMP_ORD: CC = X86::COND_NP;      break;
-  case CmpInst::FCMP_OEQ: // fall-through
+  case CmpInst::FCMP_OEQ:                         LLVM_FALLTHROUGH;
   case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break;
 
   // Integer Predicates
@@ -229,15 +235,15 @@ getX86SSEConditionCode(CmpInst::Predicate Predicate) {
   switch (Predicate) {
   default: llvm_unreachable("Unexpected predicate");
   case CmpInst::FCMP_OEQ: CC = 0;          break;
-  case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_OGT: NeedSwap = true; LLVM_FALLTHROUGH;
   case CmpInst::FCMP_OLT: CC = 1;          break;
-  case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_OGE: NeedSwap = true; LLVM_FALLTHROUGH;
   case CmpInst::FCMP_OLE: CC = 2;          break;
   case CmpInst::FCMP_UNO: CC = 3;          break;
   case CmpInst::FCMP_UNE: CC = 4;          break;
-  case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_ULE: NeedSwap = true; LLVM_FALLTHROUGH;
   case CmpInst::FCMP_UGE: CC = 5;          break;
-  case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through
+  case CmpInst::FCMP_ULT: NeedSwap = true; LLVM_FALLTHROUGH;
   case CmpInst::FCMP_UGT: CC = 6;          break;
   case CmpInst::FCMP_ORD: CC = 7;          break;
   case CmpInst::FCMP_UEQ:
@@ -351,6 +357,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
   bool HasSSE41 = Subtarget->hasSSE41();
   bool HasAVX = Subtarget->hasAVX();
   bool HasAVX2 = Subtarget->hasAVX2();
+  bool HasAVX512 = Subtarget->hasAVX512();
+  bool HasVLX = Subtarget->hasVLX();
   bool IsNonTemporal = MMO && MMO->isNonTemporal();
 
   // Get opcode and regclass of the output for the given load instruction.
@@ -378,7 +386,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     break;
   case MVT::f32:
     if (X86ScalarSSEf32) {
-      Opc = HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
+      Opc = HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm;
       RC  = &X86::FR32RegClass;
     } else {
       Opc = X86::LD_Fp32m;
@@ -387,7 +395,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     break;
   case MVT::f64:
     if (X86ScalarSSEf64) {
-      Opc = HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
+      Opc = HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm;
       RC  = &X86::FR64RegClass;
     } else {
       Opc = X86::LD_Fp64m;
@@ -399,20 +407,26 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     return false;
   case MVT::v4f32:
     if (IsNonTemporal && Alignment >= 16 && HasSSE41)
-      Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+      Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
+            HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
     else if (Alignment >= 16)
-      Opc = HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
+      Opc = HasVLX ? X86::VMOVAPSZ128rm :
+            HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm;
     else
-      Opc = HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
+      Opc = HasVLX ? X86::VMOVUPSZ128rm :
+            HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm;
     RC  = &X86::VR128RegClass;
     break;
   case MVT::v2f64:
     if (IsNonTemporal && Alignment >= 16 && HasSSE41)
-      Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+      Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
+            HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
     else if (Alignment >= 16)
-      Opc = HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
+      Opc = HasVLX ? X86::VMOVAPDZ128rm :
+            HasAVX ? X86::VMOVAPDrm : X86::MOVAPDrm;
     else
-      Opc = HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
+      Opc = HasVLX ? X86::VMOVUPDZ128rm :
+            HasAVX ? X86::VMOVUPDrm : X86::MOVUPDrm;
     RC  = &X86::VR128RegClass;
     break;
   case MVT::v4i32:
@@ -420,27 +434,34 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
   case MVT::v8i16:
   case MVT::v16i8:
     if (IsNonTemporal && Alignment >= 16)
-      Opc = HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
+      Opc = HasVLX ? X86::VMOVNTDQAZ128rm :
+            HasAVX ? X86::VMOVNTDQArm : X86::MOVNTDQArm;
     else if (Alignment >= 16)
-      Opc = HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
+      Opc = HasVLX ? X86::VMOVDQA64Z128rm :
+            HasAVX ? X86::VMOVDQArm : X86::MOVDQArm;
     else
-      Opc = HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
+      Opc = HasVLX ? X86::VMOVDQU64Z128rm :
+            HasAVX ? X86::VMOVDQUrm : X86::MOVDQUrm;
     RC  = &X86::VR128RegClass;
     break;
   case MVT::v8f32:
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
-      Opc = X86::VMOVNTDQAYrm;
+      Opc = HasVLX ? X86::VMOVNTDQAZ256rm : X86::VMOVNTDQAYrm;
+    else if (Alignment >= 32)
+      Opc = HasVLX ? X86::VMOVAPSZ256rm : X86::VMOVAPSYrm;
     else
-      Opc = (Alignment >= 32) ? X86::VMOVAPSYrm : X86::VMOVUPSYrm;
+      Opc = HasVLX ? X86::VMOVUPSZ256rm : X86::VMOVUPSYrm;
     RC  = &X86::VR256RegClass;
     break;
   case MVT::v4f64:
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = X86::VMOVNTDQAYrm;
+    else if (Alignment >= 32)
+      Opc = HasVLX ? X86::VMOVAPDZ256rm : X86::VMOVAPDYrm;
     else
-      Opc = (Alignment >= 32) ? X86::VMOVAPDYrm : X86::VMOVUPDYrm;
+      Opc = HasVLX ? X86::VMOVUPDZ256rm : X86::VMOVUPDYrm;
     RC  = &X86::VR256RegClass;
     break;
   case MVT::v8i32:
@@ -450,12 +471,14 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     assert(HasAVX);
     if (IsNonTemporal && Alignment >= 32 && HasAVX2)
       Opc = X86::VMOVNTDQAYrm;
+    else if (Alignment >= 32)
+      Opc = HasVLX ? X86::VMOVDQA64Z256rm : X86::VMOVDQAYrm;
     else
-      Opc = (Alignment >= 32) ? X86::VMOVDQAYrm : X86::VMOVDQUYrm;
+      Opc = HasVLX ? X86::VMOVDQU64Z256rm : X86::VMOVDQUYrm;
     RC  = &X86::VR256RegClass;
     break;
   case MVT::v16f32:
-    assert(Subtarget->hasAVX512());
+    assert(HasAVX512);
     if (IsNonTemporal && Alignment >= 64)
       Opc = X86::VMOVNTDQAZrm;
     else
@@ -463,7 +486,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
     RC  = &X86::VR512RegClass;
     break;
   case MVT::v8f64:
-    assert(Subtarget->hasAVX512());
+    assert(HasAVX512);
     if (IsNonTemporal && Alignment >= 64)
       Opc = X86::VMOVNTDQAZrm;
     else
@@ -474,7 +497,7 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, X86AddressMode &AM,
   case MVT::v16i32:
   case MVT::v32i16:
   case MVT::v64i8:
-    assert(Subtarget->hasAVX512());
+    assert(HasAVX512);
     // Note: There are a lot more choices based on type with AVX-512, but
     // there's really no advantage when the load isn't masked.
     if (IsNonTemporal && Alignment >= 64)
@@ -504,6 +527,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
   bool HasSSE2 = Subtarget->hasSSE2();
   bool HasSSE4A = Subtarget->hasSSE4A();
   bool HasAVX = Subtarget->hasAVX();
+  bool HasAVX512 = Subtarget->hasAVX512();
+  bool HasVLX = Subtarget->hasVLX();
   bool IsNonTemporal = MMO && MMO->isNonTemporal();
 
   // Get opcode and regclass of the output for the given store instruction.
@@ -518,8 +543,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
             TII.get(X86::AND8ri), AndResult)
       .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1);
     ValReg = AndResult;
+    LLVM_FALLTHROUGH; // handle i1 as i8.
   }
-  // FALLTHROUGH, handling i1 as i8.
   case MVT::i8:  Opc = X86::MOV8mr;  break;
   case MVT::i16: Opc = X86::MOV16mr; break;
   case MVT::i32:
@@ -534,7 +559,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
       if (IsNonTemporal && HasSSE4A)
         Opc = X86::MOVNTSS;
       else
-        Opc = HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
+        Opc = HasAVX512 ? X86::VMOVSSZmr :
+              HasAVX ? X86::VMOVSSmr : X86::MOVSSmr;
     } else
       Opc = X86::ST_Fp32m;
     break;
@@ -543,27 +569,34 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
       if (IsNonTemporal && HasSSE4A)
         Opc = X86::MOVNTSD;
       else
-        Opc = HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
+        Opc = HasAVX512 ? X86::VMOVSDZmr :
+              HasAVX ? X86::VMOVSDmr : X86::MOVSDmr;
     } else
       Opc = X86::ST_Fp64m;
     break;
   case MVT::v4f32:
     if (Aligned) {
       if (IsNonTemporal)
-        Opc = HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
+        Opc = HasVLX ? X86::VMOVNTPSZ128mr :
+              HasAVX ? X86::VMOVNTPSmr : X86::MOVNTPSmr;
       else
-        Opc = HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
+        Opc = HasVLX ? X86::VMOVAPSZ128mr :
+              HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr;
     } else
-      Opc = HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
+      Opc = HasVLX ? X86::VMOVUPSZ128mr :
+            HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr;
     break;
   case MVT::v2f64:
     if (Aligned) {
       if (IsNonTemporal)
-        Opc = HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
+        Opc = HasVLX ? X86::VMOVNTPDZ128mr :
+              HasAVX ? X86::VMOVNTPDmr : X86::MOVNTPDmr;
       else
-        Opc = HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
+        Opc = HasVLX ? X86::VMOVAPDZ128mr :
+              HasAVX ? X86::VMOVAPDmr : X86::MOVAPDmr;
     } else
-      Opc = HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
+      Opc = HasVLX ? X86::VMOVUPDZ128mr :
+            HasAVX ? X86::VMOVUPDmr : X86::MOVUPDmr;
     break;
   case MVT::v4i32:
   case MVT::v2i64:
@@ -571,45 +604,57 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
   case MVT::v16i8:
     if (Aligned) {
       if (IsNonTemporal)
-        Opc = HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
+        Opc = HasVLX ? X86::VMOVNTDQZ128mr :
+              HasAVX ? X86::VMOVNTDQmr : X86::MOVNTDQmr;
       else
-        Opc = HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
+        Opc = HasVLX ? X86::VMOVDQA64Z128mr :
+              HasAVX ? X86::VMOVDQAmr : X86::MOVDQAmr;
     } else
-      Opc = HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
+      Opc = HasVLX ? X86::VMOVDQU64Z128mr :
+            HasAVX ? X86::VMOVDQUmr : X86::MOVDQUmr;
     break;
   case MVT::v8f32:
     assert(HasAVX);
-    if (Aligned)
-      Opc = IsNonTemporal ? X86::VMOVNTPSYmr : X86::VMOVAPSYmr;
-    else
-      Opc = X86::VMOVUPSYmr;
+    if (Aligned) {
+      if (IsNonTemporal)
+        Opc = HasVLX ? X86::VMOVNTPSZ256mr : X86::VMOVNTPSYmr;
+      else
+        Opc = HasVLX ? X86::VMOVAPSZ256mr : X86::VMOVAPSYmr;
+    } else
+      Opc = HasVLX ? X86::VMOVUPSZ256mr : X86::VMOVUPSYmr;
     break;
   case MVT::v4f64:
     assert(HasAVX);
     if (Aligned) {
-      Opc = IsNonTemporal ? X86::VMOVNTPDYmr : X86::VMOVAPDYmr;
+      if (IsNonTemporal)
+        Opc = HasVLX ? X86::VMOVNTPDZ256mr : X86::VMOVNTPDYmr;
+      else
+        Opc = HasVLX ? X86::VMOVAPDZ256mr : X86::VMOVAPDYmr;
     } else
-      Opc = X86::VMOVUPDYmr;
+      Opc = HasVLX ? X86::VMOVUPDZ256mr : X86::VMOVUPDYmr;
     break;
   case MVT::v8i32:
   case MVT::v4i64:
   case MVT::v16i16:
   case MVT::v32i8:
     assert(HasAVX);
-    if (Aligned)
-      Opc = IsNonTemporal ? X86::VMOVNTDQYmr : X86::VMOVDQAYmr;
-    else
-      Opc = X86::VMOVDQUYmr;
+    if (Aligned) {
+      if (IsNonTemporal)
+        Opc = HasVLX ? X86::VMOVNTDQZ256mr : X86::VMOVNTDQYmr;
+      else
+        Opc = HasVLX ? X86::VMOVDQA64Z256mr : X86::VMOVDQAYmr;
+    } else
+      Opc = HasVLX ? X86::VMOVDQU64Z256mr : X86::VMOVDQUYmr;
     break;
   case MVT::v16f32:
-    assert(Subtarget->hasAVX512());
+    assert(HasAVX512);
     if (Aligned)
       Opc = IsNonTemporal ? X86::VMOVNTPSZmr : X86::VMOVAPSZmr;
     else
       Opc = X86::VMOVUPSZmr;
     break;
   case MVT::v8f64:
-    assert(Subtarget->hasAVX512());
+    assert(HasAVX512);
     if (Aligned) {
       Opc = IsNonTemporal ? X86::VMOVNTPDZmr : X86::VMOVAPDZmr;
     } else
@@ -619,7 +664,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill,
   case MVT::v16i32:
   case MVT::v32i16:
   case MVT::v64i8:
-    assert(Subtarget->hasAVX512());
+    assert(HasAVX512);
     // Note: There are a lot more choices based on type with AVX-512, but
     // there's really no advantage when the store isn't masked.
     if (Aligned)
@@ -659,7 +704,9 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val,
     bool Signed = true;
     switch (VT.getSimpleVT().SimpleTy) {
     default: break;
-    case MVT::i1:  Signed = false;     // FALLTHROUGH to handle as i8.
+    case MVT::i1:
+      Signed = false;
+      LLVM_FALLTHROUGH; // Handle as i8.
     case MVT::i8:  Opc = X86::MOV8mi;  break;
     case MVT::i16: Opc = X86::MOV16mi; break;
     case MVT::i32: Opc = X86::MOV32mi; break;
@@ -895,7 +942,7 @@ redo_gep:
     for (User::const_op_iterator i = U->op_begin() + 1, e = U->op_end();
          i != e; ++i, ++GTI) {
       const Value *Op = *i;
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         const StructLayout *SL = DL.getStructLayout(STy);
         Disp += SL->getElementOffset(cast<ConstantInt>(Op)->getZExtValue());
         continue;
@@ -1454,11 +1501,11 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) {
   }
 
   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
-  static unsigned SETFOpcTable[2][3] = {
+  static const uint16_t SETFOpcTable[2][3] = {
     { X86::SETEr,  X86::SETNPr, X86::AND8rr },
     { X86::SETNEr, X86::SETPr,  X86::OR8rr  }
   };
-  unsigned *SETFOpc = nullptr;
+  const uint16_t *SETFOpc = nullptr;
   switch (Predicate) {
   default: break;
   case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break;
@@ -1511,7 +1558,7 @@ bool X86FastISel::X86SelectZExt(const Instruction *I) {
 
   // Handle zero-extension from i1 to i8, which is common.
   MVT SrcVT = TLI.getSimpleValueType(DL, I->getOperand(0)->getType());
-  if (SrcVT.SimpleTy == MVT::i1) {
+  if (SrcVT == MVT::i1) {
     // Set the high bits to zero.
     ResultReg = fastEmitZExtFromI1(MVT::i8, ResultReg, /*TODO: Kill=*/false);
     SrcVT = MVT::i8;
@@ -1601,7 +1648,8 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       switch (Predicate) {
       default: break;
       case CmpInst::FCMP_OEQ:
-        std::swap(TrueMBB, FalseMBB); // fall-through
+        std::swap(TrueMBB, FalseMBB);
+        LLVM_FALLTHROUGH;
       case CmpInst::FCMP_UNE:
         NeedExtraBranch = true;
         Predicate = CmpInst::FCMP_ONE;
@@ -1651,6 +1699,7 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
       if (TestOpc) {
         unsigned OpReg = getRegForValue(TI->getOperand(0));
         if (OpReg == 0) return false;
+
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TestOpc))
           .addReg(OpReg).addImm(1);
 
@@ -1688,8 +1737,17 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) {
   unsigned OpReg = getRegForValue(BI->getCondition());
   if (OpReg == 0) return false;
 
+  // In case OpReg is a K register, COPY to a GPR
+  if (MRI.getRegClass(OpReg) == &X86::VK1RegClass) {
+    unsigned KOpReg = OpReg;
+    OpReg = createResultReg(&X86::GR8RegClass);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), OpReg)
+        .addReg(KOpReg);
+  }
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
-    .addReg(OpReg).addImm(1);
+      .addReg(OpReg)
+      .addImm(1);
   BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_1))
     .addMBB(TrueMBB);
   finishCondBranch(BI->getParent(), TrueMBB, FalseMBB);
@@ -1875,15 +1933,15 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) {
       // Copy the zero into the appropriate sub/super/identical physical
       // register. Unfortunately the operations needed are not uniform enough
       // to fit neatly into the table above.
-      if (VT.SimpleTy == MVT::i16) {
+      if (VT == MVT::i16) {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(Copy), TypeEntry.HighInReg)
           .addReg(Zero32, 0, X86::sub_16bit);
-      } else if (VT.SimpleTy == MVT::i32) {
+      } else if (VT == MVT::i32) {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(Copy), TypeEntry.HighInReg)
             .addReg(Zero32);
-      } else if (VT.SimpleTy == MVT::i64) {
+      } else if (VT == MVT::i64) {
         BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
                 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
             .addImm(0).addReg(Zero32).addImm(X86::sub_32bit);
@@ -1953,11 +2011,11 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
     CmpInst::Predicate Predicate = optimizeCmpPredicate(CI);
 
     // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
-    static unsigned SETFOpcTable[2][3] = {
+    static const uint16_t SETFOpcTable[2][3] = {
       { X86::SETNPr, X86::SETEr , X86::TEST8rr },
       { X86::SETPr,  X86::SETNEr, X86::OR8rr   }
     };
-    unsigned *SETFOpc = nullptr;
+    const uint16_t *SETFOpc = nullptr;
     switch (Predicate) {
     default: break;
     case CmpInst::FCMP_OEQ:
@@ -2023,8 +2081,17 @@ bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) {
       return false;
     bool CondIsKill = hasTrivialKill(Cond);
 
+    // In case OpReg is a K register, COPY to a GPR
+    if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
+      unsigned KCondReg = CondReg;
+      CondReg = createResultReg(&X86::GR8RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), CondReg)
+          .addReg(KCondReg, getKillRegState(CondIsKill));
+    }
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
-      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+        .addReg(CondReg, getKillRegState(CondIsKill))
+        .addImm(1);
   }
 
   const Value *LHS = I->getOperand(1);
@@ -2087,12 +2154,12 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
     std::swap(CmpLHS, CmpRHS);
 
   // Choose the SSE instruction sequence based on data type (float or double).
-  static unsigned OpcTable[2][4] = {
-    { X86::CMPSSrr,  X86::FsANDPSrr,  X86::FsANDNPSrr,  X86::FsORPSrr  },
-    { X86::CMPSDrr,  X86::FsANDPDrr,  X86::FsANDNPDrr,  X86::FsORPDrr  }
+  static const uint16_t OpcTable[2][4] = {
+    { X86::CMPSSrr,  X86::ANDPSrr,  X86::ANDNPSrr,  X86::ORPSrr  },
+    { X86::CMPSDrr,  X86::ANDPDrr,  X86::ANDNPDrr,  X86::ORPDrr  }
   };
 
-  unsigned *Opc = nullptr;
+  const uint16_t *Opc = nullptr;
   switch (RetVT.SimpleTy) {
   default: return false;
   case MVT::f32: Opc = &OpcTable[0][0]; break;
@@ -2119,9 +2186,36 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
 
   const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT);
   unsigned ResultReg;
-  
-  if (Subtarget->hasAVX()) {
-    const TargetRegisterClass *FR32 = &X86::FR32RegClass;
+
+  if (Subtarget->hasAVX512()) {
+    // If we have AVX512 we can use a mask compare and masked movss/sd.
+    const TargetRegisterClass *VR128X = &X86::VR128XRegClass;
+    const TargetRegisterClass *VK1 = &X86::VK1RegClass;
+
+    unsigned CmpOpcode =
+      (RetVT == MVT::f32) ? X86::VCMPSSZrr : X86::VCMPSDZrr;
+    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, VK1, CmpLHSReg, CmpLHSIsKill,
+                                       CmpRHSReg, CmpRHSIsKill, CC);
+
+    // Need an IMPLICIT_DEF for the input that is used to generate the upper
+    // bits of the result register since its not based on any of the inputs.
+    unsigned ImplicitDefReg = createResultReg(VR128X);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg);
+
+    // Place RHSReg is the passthru of the masked movss/sd operation and put
+    // LHS in the input. The mask input comes from the compare.
+    unsigned MovOpcode =
+      (RetVT == MVT::f32) ? X86::VMOVSSZrrk : X86::VMOVSDZrrk;
+    unsigned MovReg = fastEmitInst_rrrr(MovOpcode, VR128X, RHSReg, RHSIsKill,
+                                        CmpReg, true, ImplicitDefReg, true,
+                                        LHSReg, LHSIsKill);
+
+    ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(MovReg);
+
+  } else if (Subtarget->hasAVX()) {
     const TargetRegisterClass *VR128 = &X86::VR128RegClass;
 
     // If we have AVX, create 1 blendv instead of 3 logic instructions.
@@ -2130,11 +2224,11 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
     // instructions as the AND/ANDN/OR sequence due to register moves, so
     // don't bother.
     unsigned CmpOpcode =
-      (RetVT.SimpleTy == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
+      (RetVT == MVT::f32) ? X86::VCMPSSrr : X86::VCMPSDrr;
     unsigned BlendOpcode =
-      (RetVT.SimpleTy == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
-    
-    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, FR32, CmpLHSReg, CmpLHSIsKill,
+      (RetVT == MVT::f32) ? X86::VBLENDVPSrr : X86::VBLENDVPDrr;
+
+    unsigned CmpReg = fastEmitInst_rri(CmpOpcode, RC, CmpLHSReg, CmpLHSIsKill,
                                        CmpRHSReg, CmpRHSIsKill, CC);
     unsigned VBlendReg = fastEmitInst_rrr(BlendOpcode, VR128, RHSReg, RHSIsKill,
                                           LHSReg, LHSIsKill, CmpReg, true);
@@ -2142,14 +2236,18 @@ bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) {
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
             TII.get(TargetOpcode::COPY), ResultReg).addReg(VBlendReg);
   } else {
+    const TargetRegisterClass *VR128 = &X86::VR128RegClass;
     unsigned CmpReg = fastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill,
                                        CmpRHSReg, CmpRHSIsKill, CC);
-    unsigned AndReg = fastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false,
+    unsigned AndReg = fastEmitInst_rr(Opc[1], VR128, CmpReg, /*IsKill=*/false,
                                       LHSReg, LHSIsKill);
-    unsigned AndNReg = fastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true,
+    unsigned AndNReg = fastEmitInst_rr(Opc[2], VR128, CmpReg, /*IsKill=*/true,
                                        RHSReg, RHSIsKill);
-    ResultReg = fastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true,
-                                         AndReg, /*IsKill=*/true);
+    unsigned OrReg = fastEmitInst_rr(Opc[3], VR128, AndNReg, /*IsKill=*/true,
+                                     AndReg, /*IsKill=*/true);
+    ResultReg = createResultReg(RC);
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(OrReg);
   }
   updateValueMap(I, ResultReg);
   return true;
@@ -2195,8 +2293,18 @@ bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) {
     if (CondReg == 0)
       return false;
     bool CondIsKill = hasTrivialKill(Cond);
+
+    // In case OpReg is a K register, COPY to a GPR
+    if (MRI.getRegClass(CondReg) == &X86::VK1RegClass) {
+      unsigned KCondReg = CondReg;
+      CondReg = createResultReg(&X86::GR8RegClass);
+      BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+              TII.get(TargetOpcode::COPY), CondReg)
+          .addReg(KCondReg, getKillRegState(CondIsKill));
+    }
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri))
-      .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1);
+        .addReg(CondReg, getKillRegState(CondIsKill))
+        .addImm(1);
   }
 
   const Value *LHS = I->getOperand(1);
@@ -2522,8 +2630,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
 
     // This needs to be set before we call getPtrSizedFrameRegister, otherwise
     // we get the wrong frame register.
-    MachineFrameInfo *MFI = MF->getFrameInfo();
-    MFI->setFrameAddressIsTaken(true);
+    MachineFrameInfo &MFI = MF->getFrameInfo();
+    MFI.setFrameAddressIsTaken(true);
 
     const X86RegisterInfo *RegInfo = Subtarget->getRegisterInfo();
     unsigned FrameReg = RegInfo->getPtrSizedFrameRegister(*MF);
@@ -2698,7 +2806,9 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     const Function *Callee = II->getCalledFunction();
     auto *Ty = cast<StructType>(Callee->getReturnType());
     Type *RetTy = Ty->getTypeAtIndex(0U);
-    Type *CondTy = Ty->getTypeAtIndex(1);
+    assert(Ty->getTypeAtIndex(1)->isIntegerTy() &&
+           Ty->getTypeAtIndex(1)->getScalarSizeInBits() == 1 &&
+           "Overflow value expected to be an i1");
 
     MVT VT;
     if (!isTypeLegal(RetTy, VT))
@@ -2808,7 +2918,8 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     if (!ResultReg)
       return false;
 
-    unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy);
+    // Assign to a GPR since the overflow return value is lowered to a SETcc.
+    unsigned ResultReg2 = createResultReg(&X86::GR8RegClass);
     assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers.");
     BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc),
             ResultReg2);
@@ -2966,7 +3077,7 @@ bool X86FastISel::fastLowerArguments() {
     default: llvm_unreachable("Unexpected value type.");
     case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break;
     case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break;
-    case MVT::f32: // fall-through
+    case MVT::f32: LLVM_FALLTHROUGH;
     case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break;
     }
     unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC);
@@ -3140,7 +3251,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
       assert(VA.getLocVT().isInteger() && !VA.getLocVT().isVector() &&
              "Unexpected extend");
 
-      if (ArgVT.SimpleTy == MVT::i1)
+      if (ArgVT == MVT::i1)
         return false;
 
       bool Emitted = X86FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), ArgReg,
@@ -3154,7 +3265,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
              "Unexpected extend");
 
       // Handle zero-extension from i1 to i8, which is common.
-      if (ArgVT.SimpleTy == MVT::i1) {
+      if (ArgVT == MVT::i1) {
         // Set the high bits to zero.
         ArgReg = fastEmitZExtFromI1(MVT::i8, ArgReg, /*TODO: Kill=*/false);
         ArgVT = MVT::i8;
@@ -3456,8 +3567,14 @@ X86FastISel::fastSelectInstruction(const Instruction *I)  {
     if (!SrcVT.isSimple() || !DstVT.isSimple())
       return false;
 
-    if (!SrcVT.is128BitVector() &&
-        !(Subtarget->hasAVX() && SrcVT.is256BitVector()))
+    MVT SVT = SrcVT.getSimpleVT();
+    MVT DVT = DstVT.getSimpleVT();
+
+    if (!SVT.is128BitVector() &&
+        !(Subtarget->hasAVX() && SVT.is256BitVector()) &&
+        !(Subtarget->hasAVX512() && SVT.is512BitVector() &&
+          (Subtarget->hasBWI() || (SVT.getScalarSizeInBits() >= 32 &&
+                                   DVT.getScalarSizeInBits() >= 32))))
       return false;
 
     unsigned Reg = getRegForValue(I->getOperand(0));
@@ -3505,7 +3622,7 @@ unsigned X86FastISel::X86MaterializeInt(const ConstantInt *CI, MVT VT) {
   unsigned Opc = 0;
   switch (VT.SimpleTy) {
   default: llvm_unreachable("Unexpected value type");
-  case MVT::i1:  VT = MVT::i8; // fall-through
+  case MVT::i1:  VT = MVT::i8;       LLVM_FALLTHROUGH;
   case MVT::i8:  Opc = X86::MOV8ri;  break;
   case MVT::i16: Opc = X86::MOV16ri; break;
   case MVT::i32: Opc = X86::MOV32ri; break;
@@ -3775,6 +3892,38 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo,
   return true;
 }
 
+unsigned X86FastISel::fastEmitInst_rrrr(unsigned MachineInstOpcode,
+                                        const TargetRegisterClass *RC,
+                                        unsigned Op0, bool Op0IsKill,
+                                        unsigned Op1, bool Op1IsKill,
+                                        unsigned Op2, bool Op2IsKill,
+                                        unsigned Op3, bool Op3IsKill) {
+  const MCInstrDesc &II = TII.get(MachineInstOpcode);
+
+  unsigned ResultReg = createResultReg(RC);
+  Op0 = constrainOperandRegClass(II, Op0, II.getNumDefs());
+  Op1 = constrainOperandRegClass(II, Op1, II.getNumDefs() + 1);
+  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 2);
+  Op2 = constrainOperandRegClass(II, Op2, II.getNumDefs() + 3);
+
+  if (II.getNumDefs() >= 1)
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, ResultReg)
+        .addReg(Op0, getKillRegState(Op0IsKill))
+        .addReg(Op1, getKillRegState(Op1IsKill))
+        .addReg(Op2, getKillRegState(Op2IsKill))
+        .addReg(Op3, getKillRegState(Op3IsKill));
+  else {
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
+        .addReg(Op0, getKillRegState(Op0IsKill))
+        .addReg(Op1, getKillRegState(Op1IsKill))
+        .addReg(Op2, getKillRegState(Op2IsKill))
+        .addReg(Op3, getKillRegState(Op3IsKill));
+    BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
+            TII.get(TargetOpcode::COPY), ResultReg).addReg(II.ImplicitDefs[0]);
+  }
+  return ResultReg;
+}
+
 
 namespace llvm {
   FastISel *X86::createFastISel(FunctionLoweringInfo &funcInfo,
diff --git a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
index 90e758dc2e02..8bde4bf98d66 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupBWInsts.cpp
@@ -66,8 +66,6 @@ using namespace llvm;
 #define DEBUG_TYPE FIXUPBW_NAME
 
 // Option to allow this optimization pass to have fine-grained control.
-// This is turned off by default so as not to affect a large number of
-// existing lit tests.
 static cl::opt<bool>
     FixupBWInsts("fixup-byte-word-insts",
                  cl::desc("Change byte and word instructions to larger sizes"),
@@ -104,9 +102,7 @@ class FixupBWInstPass : public MachineFunctionPass {
 public:
   static char ID;
 
-  const char *getPassName() const override {
-    return FIXUPBW_DESC;
-  }
+  StringRef getPassName() const override { return FIXUPBW_DESC; }
 
   FixupBWInstPass() : MachineFunctionPass(ID) {
     initializeFixupBWInstPassPass(*PassRegistry::getPassRegistry());
@@ -125,7 +121,7 @@ public:
 
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
@@ -158,7 +154,7 @@ bool FixupBWInstPass::runOnMachineFunction(MachineFunction &MF) {
   TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
   OptForSize = MF.getFunction()->optForSize();
   MLI = &getAnalysis<MachineLoopInfo>();
-  LiveRegs.init(&TII->getRegisterInfo());
+  LiveRegs.init(TII->getRegisterInfo());
 
   DEBUG(dbgs() << "Start X86FixupBWInsts\n";);
 
diff --git a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 013ee249a60f..12095917ca30 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -40,7 +40,7 @@ class FixupLEAPass : public MachineFunctionPass {
   /// where appropriate.
   bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI);
 
-  const char *getPassName() const override { return "X86 LEA Fixup"; }
+  StringRef getPassName() const override { return "X86 LEA Fixup"; }
 
   /// \brief Given a machine register, look for the instruction
   /// which writes it in the current basic block. If found,
@@ -95,7 +95,7 @@ public:
   // This pass runs after regalloc and doesn't support VReg operands.
   MachineFunctionProperties getRequiredProperties() const override {
     return MachineFunctionProperties().set(
-        MachineFunctionProperties::Property::AllVRegsAllocated);
+        MachineFunctionProperties::Property::NoVRegs);
   }
 
 private:
diff --git a/contrib/llvm/lib/Target/X86/X86FixupSetCC.cpp b/contrib/llvm/lib/Target/X86/X86FixupSetCC.cpp
index fb317da95355..a86eb997635e 100644
--- a/contrib/llvm/lib/Target/X86/X86FixupSetCC.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FixupSetCC.cpp
@@ -39,7 +39,7 @@ class X86FixupSetCCPass : public MachineFunctionPass {
 public:
   X86FixupSetCCPass() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "X86 Fixup SetCC"; }
+  StringRef getPassName() const override { return "X86 Fixup SetCC"; }
 
   bool runOnMachineFunction(MachineFunction &MF) override;
 
@@ -99,7 +99,8 @@ bool X86FixupSetCCPass::isSetCCr(unsigned Opcode) {
 MachineInstr *
 X86FixupSetCCPass::findFlagsImpDef(MachineBasicBlock *MBB,
                                    MachineBasicBlock::reverse_iterator MI) {
-  auto MBBStart = MBB->instr_rend();
+  // FIXME: Should this be instr_rend(), and MI be reverse_instr_iterator?
+  auto MBBStart = MBB->rend();
   for (int i = 0; (i < SearchBound) && (MI != MBBStart); ++i, ++MI)
     for (auto &Op : MI->implicit_operands())
       if ((Op.getReg() == X86::EFLAGS) && (Op.isDef()))
diff --git a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
index 55c1bff2bc18..a5489b9aa8b7 100644
--- a/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp
@@ -78,10 +78,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override { return "X86 FP Stackifier"; }
+    StringRef getPassName() const override { return "X86 FP Stackifier"; }
 
   private:
     const TargetInstrInfo *TII; // Machine instruction info.
@@ -206,6 +206,13 @@ namespace {
       RegMap[Reg] = StackTop++;
     }
 
+    // popReg - Pop a register from the stack.
+    void popReg() {
+      if (StackTop == 0)
+        report_fatal_error("Cannot pop empty stack!");
+      RegMap[Stack[--StackTop]] = ~0;     // Update state
+    }
+
     bool isAtTop(unsigned RegNo) const { return getSlot(RegNo) == StackTop-1; }
     void moveToTop(unsigned RegNo, MachineBasicBlock::iterator I) {
       DebugLoc dl = I == MBB->end() ? DebugLoc() : I->getDebugLoc();
@@ -326,9 +333,28 @@ bool FPS::runOnMachineFunction(MachineFunction &MF) {
 
   // Process the function in depth first order so that we process at least one
   // of the predecessors for every reachable block in the function.
-  SmallPtrSet<MachineBasicBlock*, 8> Processed;
+  df_iterator_default_set<MachineBasicBlock*> Processed;
   MachineBasicBlock *Entry = &MF.front();
 
+  LiveBundle &Bundle =
+    LiveBundles[Bundles->getBundle(Entry->getNumber(), false)];
+  
+  // In regcall convention, some FP registers may not be passed through
+  // the stack, so they will need to be assigned to the stack first
+  if ((Entry->getParent()->getFunction()->getCallingConv() ==
+    CallingConv::X86_RegCall) && (Bundle.Mask && !Bundle.FixCount)) {
+    // In the register calling convention, up to one FP argument could be 
+    // saved in the first FP register.
+    // If bundle.mask is non-zero and Bundle.FixCount is zero, it means
+    // that the FP registers contain arguments.
+    // The actual value is passed in FP0.
+    // Here we fix the stack and mark FP0 as pre-assigned register.
+    assert((Bundle.Mask & 0xFE) == 0 &&
+      "Only FP0 could be passed as an argument");
+    Bundle.FixCount = 1;
+    Bundle.FixStack[0] = 0;
+  }
+
   bool Changed = false;
   for (MachineBasicBlock *BB : depth_first_ext(Entry, Processed))
     Changed |= processBasicBlock(MF, *BB);
@@ -791,9 +817,8 @@ void FPS::popStackAfter(MachineBasicBlock::iterator &I) {
   MachineInstr &MI = *I;
   const DebugLoc &dl = MI.getDebugLoc();
   ASSERT_SORTED(PopTable);
-  if (StackTop == 0)
-    report_fatal_error("Cannot pop empty stack!");
-  RegMap[Stack[--StackTop]] = ~0;     // Update state
+
+  popReg();
 
   // Check to see if there is a popping version of this instruction...
   int Opcode = Lookup(PopTable, I->getOpcode());
@@ -929,6 +954,7 @@ void FPS::shuffleStackTop(const unsigned char *FixStack,
 
 void FPS::handleCall(MachineBasicBlock::iterator &I) {
   unsigned STReturns = 0;
+  const MachineFunction* MF = I->getParent()->getParent();
 
   for (const auto &MO : I->operands()) {
     if (!MO.isReg())
@@ -937,7 +963,10 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
     unsigned R = MO.getReg() - X86::FP0;
 
     if (R < 8) {
-      assert(MO.isDef() && MO.isImplicit());
+      if (MF->getFunction()->getCallingConv() != CallingConv::X86_RegCall) {
+        assert(MO.isDef() && MO.isImplicit());
+      }
+
       STReturns |= 1 << R;
     }
   }
@@ -945,9 +974,15 @@ void FPS::handleCall(MachineBasicBlock::iterator &I) {
   unsigned N = countTrailingOnes(STReturns);
 
   // FP registers used for function return must be consecutive starting at
-  // FP0.
+  // FP0
   assert(STReturns == 0 || (isMask_32(STReturns) && N <= 2));
 
+  // Reset the FP Stack - It is required because of possible leftovers from
+  // passed arguments. The caller should assume that the FP stack is 
+  // returned empty (unless the callee returns values on FP stack).
+  while (StackTop > 0)
+    popReg();
+
   for (unsigned I = 0; I < N; ++I)
     pushReg(N - I - 1);
 }
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
index 03d925692adf..1deefe1231ca 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -50,7 +50,7 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
 }
 
 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
-  return !MF.getFrameInfo()->hasVarSizedObjects() &&
+  return !MF.getFrameInfo().hasVarSizedObjects() &&
          !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
 }
 
@@ -74,7 +74,7 @@ X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
 // when there are no stack objects.
 bool
 X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
-  return MF.getFrameInfo()->hasStackObjects() ||
+  return MF.getFrameInfo().hasStackObjects() ||
          MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
 }
 
@@ -82,17 +82,15 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
 /// pointer register.  This is true if the function has variable sized allocas
 /// or if frame pointer elimination is disabled.
 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  const MachineModuleInfo &MMI = MF.getMMI();
-
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
           TRI->needsStackRealignment(MF) ||
-          MFI->hasVarSizedObjects() ||
-          MFI->isFrameAddressTaken() || MFI->hasOpaqueSPAdjustment() ||
+          MFI.hasVarSizedObjects() ||
+          MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
-          MMI.callsUnwindInit() || MMI.hasEHFunclets() || MMI.callsEHReturn() ||
-          MFI->hasStackMap() || MFI->hasPatchPoint() ||
-          MFI->hasCopyImplyingStackAdjustment());
+          MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
+          MFI.hasStackMap() || MFI.hasPatchPoint() ||
+          MFI.hasCopyImplyingStackAdjustment());
 }
 
 static unsigned getSUBriOpcode(unsigned IsLP64, int64_t Imm) {
@@ -151,13 +149,15 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
                                        bool Is64Bit) {
   const MachineFunction *MF = MBB.getParent();
   const Function *F = MF->getFunction();
-  if (!F || MF->getMMI().callsEHReturn())
+  if (!F || MF->callsEHReturn())
     return 0;
 
   const TargetRegisterClass &AvailableRegs = *TRI->getGPRsForTailCall(*MF);
 
-  unsigned Opc = MBBI->getOpcode();
-  switch (Opc) {
+  if (MBBI == MBB.end())
+    return 0;
+
+  switch (MBBI->getOpcode()) {
   default: return 0;
   case TargetOpcode::PATCHABLE_RET:
   case X86::RET:
@@ -416,7 +416,7 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
                                 const DebugLoc &DL,
                                 const MCCFIInstruction &CFIInst) const {
   MachineFunction &MF = *MBB.getParent();
-  unsigned CFIIndex = MF.getMMI().addFrameInst(CFIInst);
+  unsigned CFIIndex = MF.addFrameInst(CFIInst);
   BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 }
@@ -425,18 +425,18 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     const DebugLoc &DL) const {
   MachineFunction &MF = *MBB.getParent();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineModuleInfo &MMI = MF.getMMI();
   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
 
   // Add callee saved registers to move list.
-  const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+  const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
   if (CSI.empty()) return;
 
   // Calculate offsets.
   for (std::vector<CalleeSavedInfo>::const_iterator
          I = CSI.begin(), E = CSI.end(); I != E; ++I) {
-    int64_t Offset = MFI->getObjectOffset(I->getFrameIdx());
+    int64_t Offset = MFI.getObjectOffset(I->getFrameIdx());
     unsigned Reg = I->getReg();
 
     unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
@@ -445,20 +445,19 @@ void X86FrameLowering::emitCalleeSavedFrameMoves(
   }
 }
 
-MachineInstr *X86FrameLowering::emitStackProbe(MachineFunction &MF,
-                                               MachineBasicBlock &MBB,
-                                               MachineBasicBlock::iterator MBBI,
-                                               const DebugLoc &DL,
-                                               bool InProlog) const {
+void X86FrameLowering::emitStackProbe(MachineFunction &MF,
+                                      MachineBasicBlock &MBB,
+                                      MachineBasicBlock::iterator MBBI,
+                                      const DebugLoc &DL, bool InProlog) const {
   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   if (STI.isTargetWindowsCoreCLR()) {
     if (InProlog) {
-      return emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
+      emitStackProbeInlineStub(MF, MBB, MBBI, DL, true);
     } else {
-      return emitStackProbeInline(MF, MBB, MBBI, DL, false);
+      emitStackProbeInline(MF, MBB, MBBI, DL, false);
     }
   } else {
-    return emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
+    emitStackProbeCall(MF, MBB, MBBI, DL, InProlog);
   }
 }
 
@@ -479,17 +478,19 @@ void X86FrameLowering::inlineStackProbe(MachineFunction &MF,
     assert(!ChkStkStub->isBundled() &&
            "Not expecting bundled instructions here");
     MachineBasicBlock::iterator MBBI = std::next(ChkStkStub->getIterator());
-    assert(std::prev(MBBI).operator==(ChkStkStub) &&
-      "MBBI expected after __chkstk_stub.");
+    assert(std::prev(MBBI) == ChkStkStub &&
+           "MBBI expected after __chkstk_stub.");
     DebugLoc DL = PrologMBB.findDebugLoc(MBBI);
     emitStackProbeInline(MF, PrologMBB, MBBI, DL, true);
     ChkStkStub->eraseFromParent();
   }
 }
 
-MachineInstr *X86FrameLowering::emitStackProbeInline(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
+void X86FrameLowering::emitStackProbeInline(MachineFunction &MF,
+                                            MachineBasicBlock &MBB,
+                                            MachineBasicBlock::iterator MBBI,
+                                            const DebugLoc &DL,
+                                            bool InProlog) const {
   const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>();
   assert(STI.is64Bit() && "different expansion needed for 32 bit");
   assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR");
@@ -612,7 +613,7 @@ MachineInstr *X86FrameLowering::emitStackProbeInline(
   // lowest touched page on the stack, not the point at which the OS
   // will cause an overflow exception, so this is just an optimization
   // to avoid unnecessarily touching pages that are below the current
-  // SP but already commited to the stack by the OS.
+  // SP but already committed to the stack by the OS.
   BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg)
       .addReg(0)
       .addImm(1)
@@ -699,13 +700,13 @@ MachineInstr *X86FrameLowering::emitStackProbeInline(
   }
 
   // Possible TODO: physreg liveness for InProlog case.
-
-  return &*ContinueMBBI;
 }
 
-MachineInstr *X86FrameLowering::emitStackProbeCall(
-    MachineFunction &MF, MachineBasicBlock &MBB,
-    MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
+void X86FrameLowering::emitStackProbeCall(MachineFunction &MF,
+                                          MachineBasicBlock &MBB,
+                                          MachineBasicBlock::iterator MBBI,
+                                          const DebugLoc &DL,
+                                          bool InProlog) const {
   bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large;
 
   unsigned CallOp;
@@ -763,11 +764,9 @@ MachineInstr *X86FrameLowering::emitStackProbeCall(
     for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI)
       ExpansionMBBI->setFlag(MachineInstr::FrameSetup);
   }
-
-  return &*MBBI;
 }
 
-MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
+void X86FrameLowering::emitStackProbeInlineStub(
     MachineFunction &MF, MachineBasicBlock &MBB,
     MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const {
 
@@ -775,8 +774,6 @@ MachineInstr *X86FrameLowering::emitStackProbeInlineStub(
 
   BuildMI(MBB, MBBI, DL, TII.get(X86::CALLpcrel32))
       .addExternalSymbol("__chkstk_stub");
-
-  return &*MBBI;
 }
 
 static unsigned calculateSetFPREG(uint64_t SPAdjust) {
@@ -793,11 +790,11 @@ static unsigned calculateSetFPREG(uint64_t SPAdjust) {
 // have a call out.  Otherwise just make sure we have some alignment - we'll
 // go with the minimum SlotSize.
 uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment.
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  uint64_t MaxAlign = MFI.getMaxAlignment(); // Desired stack alignment.
   unsigned StackAlign = getStackAlignment();
   if (MF.getFunction()->hasFnAttribute("stackrealign")) {
-    if (MFI->hasCalls())
+    if (MFI.hasCalls())
       MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign;
     else if (MaxAlign < SlotSize)
       MaxAlign = SlotSize;
@@ -909,18 +906,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   assert(&STI == &MF.getSubtarget<X86Subtarget>() &&
          "MF used frame lowering for wrong subtarget");
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const Function *Fn = MF.getFunction();
   MachineModuleInfo &MMI = MF.getMMI();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment.
-  uint64_t StackSize = MFI->getStackSize();    // Number of bytes to allocate.
+  uint64_t StackSize = MFI.getStackSize();    // Number of bytes to allocate.
   bool IsFunclet = MBB.isEHFuncletEntry();
   EHPersonality Personality = EHPersonality::Unknown;
   if (Fn->hasPersonalityFn())
     Personality = classifyEHPersonality(Fn->getPersonalityFn());
   bool FnHasClrFunclet =
-      MMI.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
+      MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR;
   bool IsClrFunclet = IsFunclet && FnHasClrFunclet;
   bool HasFP = hasFP(MF);
   bool IsWin64CC = STI.isCallingConvWin64(Fn->getCallingConv());
@@ -933,6 +930,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
       STI.isTarget64BitILP32()
           ? getX86SubSuperRegister(FramePtr, 64) : FramePtr;
   unsigned BasePtr = TRI->getBaseRegister();
+  bool HasWinCFI = false;
   
   // Debug location must be unknown since the first debug location is used
   // to determine the end of the prologue.
@@ -964,16 +962,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   // push and pop from the stack.
   if (Is64Bit && !Fn->hasFnAttribute(Attribute::NoRedZone) &&
       !TRI->needsStackRealignment(MF) &&
-      !MFI->hasVarSizedObjects() &&             // No dynamic alloca.
-      !MFI->adjustsStack() &&                   // No calls.
-      !IsWin64CC &&                             // Win64 has no Red Zone
-      !MFI->hasCopyImplyingStackAdjustment() && // Don't push and pop.
-      !MF.shouldSplitStack()) {                 // Regular stack
+      !MFI.hasVarSizedObjects() &&             // No dynamic alloca.
+      !MFI.adjustsStack() &&                   // No calls.
+      !IsWin64CC &&                            // Win64 has no Red Zone
+      !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop.
+      !MF.shouldSplitStack()) {                // Regular stack
     uint64_t MinSize = X86FI->getCalleeSavedFrameSize();
     if (HasFP) MinSize += SlotSize;
     X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0);
     StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0);
-    MFI->setStackSize(StackSize);
+    MFI.setStackSize(StackSize);
   }
 
   // Insert stack pointer adjustment for later moving of return addr.  Only
@@ -1037,9 +1035,9 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     // guaranteed to be the last slot by processFunctionBeforeFrameFinalized.
     // Update the frame offset adjustment.
     if (!IsFunclet)
-      MFI->setOffsetAdjustment(-NumBytes);
+      MFI.setOffsetAdjustment(-NumBytes);
     else
-      assert(MFI->getOffsetAdjustment() == -(int)NumBytes &&
+      assert(MFI.getOffsetAdjustment() == -(int)NumBytes &&
              "should calculate same local variable offset for funclets");
 
     // Save EBP/RBP into the appropriate stack slot.
@@ -1061,6 +1059,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     }
 
     if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg))
           .addImm(FramePtr)
           .setMIFlag(MachineInstr::FrameSetup);
@@ -1122,6 +1121,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     }
 
     if (NeedsWinCFI) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag(
           MachineInstr::FrameSetup);
     }
@@ -1207,10 +1207,12 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, /*InEpilogue=*/false);
   }
 
-  if (NeedsWinCFI && NumBytes)
+  if (NeedsWinCFI && NumBytes) {
+    HasWinCFI = true;
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc))
         .addImm(NumBytes)
         .setMIFlag(MachineInstr::FrameSetup);
+  }
 
   int SEHFrameOffset = 0;
   unsigned SPOrEstablisher;
@@ -1257,6 +1259,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
 
     // If this is not a funclet, emit the CFI describing our frame pointer.
     if (NeedsWinCFI && !IsFunclet) {
+      HasWinCFI = true;
       BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame))
           .addImm(FramePtr)
           .addImm(SEHFrameOffset)
@@ -1293,6 +1296,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
           int Offset = getFrameIndexReference(MF, FI, IgnoredFrameReg);
           Offset += SEHFrameOffset;
 
+          HasWinCFI = true;
           BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM))
               .addImm(Reg)
               .addImm(Offset)
@@ -1302,7 +1306,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
     }
   }
 
-  if (NeedsWinCFI)
+  if (NeedsWinCFI && HasWinCFI)
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue))
         .setMIFlag(MachineInstr::FrameSetup);
 
@@ -1394,13 +1398,16 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF,
   if (Fn->getCallingConv() == CallingConv::X86_INTR)
     BuildMI(MBB, MBBI, DL, TII.get(X86::CLD))
         .setMIFlag(MachineInstr::FrameSetup);
+
+  // At this point we know if the function has WinCFI or not.
+  MF.setHasWinCFI(HasWinCFI);
 }
 
 bool X86FrameLowering::canUseLEAForSPInEpilogue(
     const MachineFunction &MF) const {
-  // We can't use LEA instructions for adjusting the stack pointer if this is a
-  // leaf function in the Win64 ABI.  Only ADD instructions may be used to
-  // deallocate the stack.
+  // We can't use LEA instructions for adjusting the stack pointer if we don't
+  // have a frame pointer in the Win64 ABI.  Only ADD instructions may be used
+  // to deallocate the stack.
   // This means that we can use LEA for SP in two situations:
   // 1. We *aren't* using the Win64 ABI which means we are free to use LEA.
   // 2. We *have* a frame pointer which means we are permitted to use LEA.
@@ -1457,7 +1464,7 @@ X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const {
     UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize;
   } else {
     // Other funclets just need enough stack for outgoing call arguments.
-    UsedSize = MF.getFrameInfo()->getMaxCallFrameSize();
+    UsedSize = MF.getFrameInfo().getMaxCallFrameSize();
   }
   // RBP is not included in the callee saved register block. After pushing RBP,
   // everything is 16 byte aligned. Everything we allocate before an outgoing
@@ -1477,10 +1484,12 @@ static bool isTailCallOpcode(unsigned Opc) {
 
 void X86FrameLowering::emitEpilogue(MachineFunction &MF,
                                     MachineBasicBlock &MBB) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
-  unsigned RetOpcode = MBBI->getOpcode();
+  Optional<unsigned> RetOpcode;
+  if (MBBI != MBB.end())
+    RetOpcode = MBBI->getOpcode();
   DebugLoc DL;
   if (MBBI != MBB.end())
     DL = MBBI->getDebugLoc();
@@ -1493,16 +1502,16 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   bool NeedsWinCFI =
       IsWin64Prologue && MF.getFunction()->needsUnwindTableEntry();
-  bool IsFunclet = isFuncletReturnInstr(*MBBI);
+  bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI);
   MachineBasicBlock *TargetMBB = nullptr;
 
   // Get the number of bytes to allocate from the FrameInfo.
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
   uint64_t MaxAlign = calculateMaxStackAlign(MF);
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
   uint64_t NumBytes = 0;
 
-  if (MBBI->getOpcode() == X86::CATCHRET) {
+  if (RetOpcode && *RetOpcode == X86::CATCHRET) {
     // SEH shouldn't use catchret.
     assert(!isAsynchronousEHPersonality(
                classifyEHPersonality(MF.getFunction()->getPersonalityFn())) &&
@@ -1516,7 +1525,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
             MachineFramePtr)
         .setMIFlag(MachineInstr::FrameDestroy);
-  } else if (MBBI->getOpcode() == X86::CLEANUPRET) {
+  } else if (RetOpcode && *RetOpcode == X86::CLEANUPRET) {
     NumBytes = getWinEHFuncletFrameSize(MF);
     assert(hasFP(MF) && "EH funclets without FP not yet implemented");
     BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r),
@@ -1541,19 +1550,22 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   }
   uint64_t SEHStackAllocAmt = NumBytes;
 
+  MachineBasicBlock::iterator FirstCSPop = MBBI;
   // Skip the callee-saved pop instructions.
   while (MBBI != MBB.begin()) {
     MachineBasicBlock::iterator PI = std::prev(MBBI);
     unsigned Opc = PI->getOpcode();
 
-    if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
-        (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
-        Opc != X86::DBG_VALUE && !PI->isTerminator())
-      break;
+    if (Opc != X86::DBG_VALUE && !PI->isTerminator()) {
+      if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) &&
+          (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)))
+        break;
+      FirstCSPop = PI;
+    }
 
     --MBBI;
   }
-  MachineBasicBlock::iterator FirstCSPop = MBBI;
+  MBBI = FirstCSPop;
 
   if (TargetMBB) {
     // Fill EAX/RAX with the address of the target block.
@@ -1581,14 +1593,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 
   // If there is an ADD32ri or SUB32ri of ESP immediately before this
   // instruction, merge the two instructions.
-  if (NumBytes || MFI->hasVarSizedObjects())
+  if (NumBytes || MFI.hasVarSizedObjects())
     NumBytes += mergeSPUpdates(MBB, MBBI, true);
 
   // If dynamic alloca is used, then reset esp to point to the last callee-saved
   // slot before popping them off! Same applies for the case, when stack was
   // realigned. Don't do this if this was a funclet epilogue, since the funclets
   // will not do realignment or dynamic stack allocation.
-  if ((TRI->needsStackRealignment(MF) || MFI->hasVarSizedObjects()) &&
+  if ((TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects()) &&
       !IsFunclet) {
     if (TRI->needsStackRealignment(MF))
       MBBI = FirstCSPop;
@@ -1626,10 +1638,10 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
   // into the epilogue.  To cope with that, we insert an epilogue marker here,
   // then replace it with a 'nop' if it ends up immediately after a CALL in the
   // final emitted code.
-  if (NeedsWinCFI)
+  if (NeedsWinCFI && MF.hasWinCFI())
     BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue));
 
-  if (!isTailCallOpcode(RetOpcode)) {
+  if (!RetOpcode || !isTailCallOpcode(*RetOpcode)) {
     // Add the return addr area delta back since we are not tail calling.
     int Offset = -1 * X86FI->getTCReturnAddrDelta();
     assert(Offset >= 0 && "TCDelta should never be positive");
@@ -1649,7 +1661,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
 // (probably?) it should be moved into here.
 int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                              unsigned &FrameReg) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // We can't calculate offset from frame pointer if the stack is realigned,
   // so enforce usage of stack/base pointer.  The base pointer is used when we
@@ -1665,16 +1677,16 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   // object.
   // We need to factor in additional offsets applied during the prologue to the
   // frame, base, and stack pointer depending on which is used.
-  int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
   const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   unsigned CSSize = X86FI->getCalleeSavedFrameSize();
-  uint64_t StackSize = MFI->getStackSize();
+  uint64_t StackSize = MFI.getStackSize();
   bool HasFP = hasFP(MF);
   bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI();
   int64_t FPDelta = 0;
 
   if (IsWin64Prologue) {
-    assert(!MFI->hasCalls() || (StackSize % 16) == 8);
+    assert(!MFI.hasCalls() || (StackSize % 16) == 8);
 
     // Calculate required stack adjustment.
     uint64_t FrameSize = StackSize - SlotSize;
@@ -1692,7 +1704,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
     // restricted Win64 prologue.
     // Add FPDelta to all offsets below that go through the frame pointer.
     FPDelta = FrameSize - SEHFrameOffset;
-    assert((!MFI->hasCalls() || (FPDelta % 16) == 0) &&
+    assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&
            "FPDelta isn't aligned per the Win64 ABI!");
   }
 
@@ -1703,7 +1715,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
       // Skip the saved EBP.
       return Offset + SlotSize + FPDelta;
     } else {
-      assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
+      assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
       return Offset + StackSize;
     }
   } else if (TRI->needsStackRealignment(MF)) {
@@ -1711,7 +1723,7 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
       // Skip the saved EBP.
       return Offset + SlotSize + FPDelta;
     } else {
-      assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
+      assert((-(Offset + StackSize)) % MFI.getObjectAlignment(FI) == 0);
       return Offset + StackSize;
     }
     // FIXME: Support tail calls
@@ -1736,9 +1748,9 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
                                                  int FI, unsigned &FrameReg,
                                                  bool IgnoreSPUpdates) const {
 
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   // Does not include any dynamic realign.
-  const uint64_t StackSize = MFI->getStackSize();
+  const uint64_t StackSize = MFI.getStackSize();
   // LLVM arranges the stack as follows:
   //   ...
   //   ARG2
@@ -1772,7 +1784,7 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
   // answer we give is relative to the SP after the prologue, and not the
   // SP in the middle of the function.
 
-  if (MFI->isFixedObjectIndex(FI) && TRI->needsStackRealignment(MF) &&
+  if (MFI.isFixedObjectIndex(FI) && TRI->needsStackRealignment(MF) &&
       !STI.isTargetWin64())
     return getFrameIndexReference(MF, FI, FrameReg);
 
@@ -1804,7 +1816,7 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
   //
   // A is the incoming stack pointer.
   // (B - A) is the local area offset (-8 for x86-64) [1]
-  // (C - A) is the Offset returned by MFI->getObjectOffset for Obj0 [2]
+  // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2]
   //
   // |(E - B)| is the StackSize (absolute value, positive).  For a
   // stack that grown down, this works out to be (B - E). [3]
@@ -1817,7 +1829,7 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
   //
 
   // Get the Offset from the StackPointer
-  int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
+  int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea();
 
   return Offset + StackSize;
 }
@@ -1825,7 +1837,7 @@ X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF,
 bool X86FrameLowering::assignCalleeSavedSpillSlots(
     MachineFunction &MF, const TargetRegisterInfo *TRI,
     std::vector<CalleeSavedInfo> &CSI) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
 
   unsigned CalleeSavedFrameSize = 0;
@@ -1834,7 +1846,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
   if (hasFP(MF)) {
     // emitPrologue always spills frame register the first thing.
     SpillSlotOffset -= SlotSize;
-    MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+    MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
 
     // Since emitPrologue and emitEpilogue will handle spilling and restoring of
     // the frame register, we can delete it from CSI list and not have to worry
@@ -1858,7 +1870,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     SpillSlotOffset -= SlotSize;
     CalleeSavedFrameSize += SlotSize;
 
-    int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
+    int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset);
     CSI[i - 1].setFrameIdx(SlotIndex);
   }
 
@@ -1876,9 +1888,9 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
     // spill into slot
     SpillSlotOffset -= RC->getSize();
     int SlotIndex =
-        MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
+        MFI.CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset);
     CSI[i - 1].setFrameIdx(SlotIndex);
-    MFI->ensureMaxAlignment(RC->getAlignment());
+    MFI.ensureMaxAlignment(RC->getAlignment());
   }
 
   return true;
@@ -1957,7 +1969,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
   if (CSI.empty())
     return false;
 
-  if (isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
+  if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) {
     // Don't restore CSRs in 32-bit EH funclets. Matches
     // spillCalleeSavedRegisters.
     if (STI.is32Bit())
@@ -2005,7 +2017,7 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
                                             RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
   int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -2020,7 +2032,7 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
     //     ...
     //   }
     //   [EBP]
-    MFI->CreateFixedObject(-TailCallReturnAddrDelta,
+    MFI.CreateFixedObject(-TailCallReturnAddrDelta,
                            TailCallReturnAddrDelta - SlotSize, true);
   }
 
@@ -2029,8 +2041,8 @@ void X86FrameLowering::determineCalleeSaves(MachineFunction &MF,
     SavedRegs.set(TRI->getBaseRegister());
 
     // Allocate a spill slot for EBP if we have a base pointer and EH funclets.
-    if (MF.getMMI().hasEHFunclets()) {
-      int FI = MFI->CreateSpillStackObject(SlotSize, SlotSize);
+    if (MF.hasEHFunclets()) {
+      int FI = MFI.CreateSpillStackObject(SlotSize, SlotSize);
       X86FI->setHasSEHFramePtrSave(true);
       X86FI->setSEHFramePtrSaveIndex(FI);
     }
@@ -2091,7 +2103,7 @@ static const uint64_t kSplitStackAvailable = 256;
 
 void X86FrameLowering::adjustForSegmentedStacks(
     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   uint64_t StackSize;
   unsigned TlsReg, TlsOffset;
   DebugLoc DL;
@@ -2114,7 +2126,7 @@ void X86FrameLowering::adjustForSegmentedStacks(
   // Eventually StackSize will be calculated by a link-time pass; which will
   // also decide whether checking code needs to be injected into this particular
   // prologue.
-  StackSize = MFI->getStackSize();
+  StackSize = MFI.getStackSize();
 
   // Do not generate a prologue for functions with a stack of size zero
   if (StackSize == 0)
@@ -2360,7 +2372,7 @@ static unsigned getHiPELiteral(
 ///       if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
 void X86FrameLowering::adjustForHiPEPrologue(
     MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   DebugLoc DL;
 
   // To support shrink-wrapping we would need to insert the new blocks
@@ -2380,7 +2392,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
   const unsigned Guaranteed = HipeLeafWords * SlotSize;
   unsigned CallerStkArity = MF.getFunction()->arg_size() > CCRegisteredArgs ?
                             MF.getFunction()->arg_size() - CCRegisteredArgs : 0;
-  unsigned MaxStack = MFI->getStackSize() + CallerStkArity*SlotSize + SlotSize;
+  unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize;
 
   assert(STI.isTargetLinux() &&
          "HiPE prologue is only supported on Linux operating systems.");
@@ -2392,7 +2404,7 @@ void X86FrameLowering::adjustForHiPEPrologue(
   // b) outgoing on-stack parameter areas, and
   // c) the minimum stack space this function needs to make available for the
   //    functions it calls (a tunable ABI property).
-  if (MFI->hasCalls()) {
+  if (MFI.hasCalls()) {
     unsigned MoreStackForCalls = 0;
 
     for (auto &MBB : MF) {
@@ -2599,8 +2611,7 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
     // GNU_ARGS_SIZE.
     // TODO: We don't need to reset this between subsequent functions,
     // if it didn't change.
-    bool HasDwarfEHHandlers = !WindowsCFI &&
-                              !MF.getMMI().getLandingPads().empty();
+    bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty();
 
     if (HasDwarfEHHandlers && !isDestroy &&
         MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences())
@@ -2728,12 +2739,12 @@ MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers(
   unsigned BasePtr = TRI->getBaseRegister();
   WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo();
   X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // FIXME: Don't set FrameSetup flag in catchret case.
 
   int FI = FuncInfo.EHRegNodeFrameIndex;
-  int EHRegSize = MFI->getObjectSize(FI);
+  int EHRegSize = MFI.getObjectSize(FI);
 
   if (RestoreSP) {
     // MOV32rm -EHRegSize(%ebp), %esp
@@ -2850,7 +2861,7 @@ struct X86FrameSortingComparator {
 // of uses and size of object in order to minimize code size.
 void X86FrameLowering::orderFrameObjects(
     const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
 
   // Don't waste time if there's nothing to do.
   if (ObjectsToAllocate.empty())
@@ -2861,16 +2872,16 @@ void X86FrameLowering::orderFrameObjects(
   // it easier to index into when we're counting "uses" down below.
   // We want to be able to easily/cheaply access an object by simply
   // indexing into it, instead of having to search for it every time.
-  std::vector<X86FrameSortingObject> SortingObjects(MFI->getObjectIndexEnd());
+  std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd());
 
   // Walk the objects we care about and mark them as such in our working
   // struct.
   for (auto &Obj : ObjectsToAllocate) {
     SortingObjects[Obj].IsValid = true;
     SortingObjects[Obj].ObjectIndex = Obj;
-    SortingObjects[Obj].ObjectAlignment = MFI->getObjectAlignment(Obj);
+    SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlignment(Obj);
     // Set the size.
-    int ObjectSize = MFI->getObjectSize(Obj);
+    int ObjectSize = MFI.getObjectSize(Obj);
     if (ObjectSize == 0)
       // Variable size. Just use 4.
       SortingObjects[Obj].ObjectSize = 4;
@@ -2890,7 +2901,7 @@ void X86FrameLowering::orderFrameObjects(
         int Index = MO.getIndex();
         // Check to see if it falls within our range, and is tagged
         // to require ordering.
-        if (Index >= 0 && Index < MFI->getObjectIndexEnd() &&
+        if (Index >= 0 && Index < MFI.getObjectIndexEnd() &&
             SortingObjects[Index].IsValid)
           SortingObjects[Index].ObjectNumUses++;
       }
@@ -2938,7 +2949,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
   // If this function isn't doing Win64-style C++ EH, we don't need to do
   // anything.
   const Function *Fn = MF.getFunction();
-  if (!STI.is64Bit() || !MF.getMMI().hasEHFunclets() ||
+  if (!STI.is64Bit() || !MF.hasEHFunclets() ||
       classifyEHPersonality(Fn->getPersonalityFn()) != EHPersonality::MSVC_CXX)
     return;
 
@@ -2947,21 +2958,21 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
   // object, so that we can allocate a slot immediately following it. If there
   // were no fixed objects, use offset -SlotSize, which is immediately after the
   // return address. Fixed objects have negative frame indices.
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo();
   int64_t MinFixedObjOffset = -SlotSize;
-  for (int I = MFI->getObjectIndexBegin(); I < 0; ++I)
-    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI->getObjectOffset(I));
+  for (int I = MFI.getObjectIndexBegin(); I < 0; ++I)
+    MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I));
 
   for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
     for (WinEHHandlerType &H : TBME.HandlerArray) {
       int FrameIndex = H.CatchObj.FrameIndex;
       if (FrameIndex != INT_MAX) {
         // Ensure alignment.
-        unsigned Align = MFI->getObjectAlignment(FrameIndex);
+        unsigned Align = MFI.getObjectAlignment(FrameIndex);
         MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align;
-        MinFixedObjOffset -= MFI->getObjectSize(FrameIndex);
-        MFI->setObjectOffset(FrameIndex, MinFixedObjOffset);
+        MinFixedObjOffset -= MFI.getObjectSize(FrameIndex);
+        MFI.setObjectOffset(FrameIndex, MinFixedObjOffset);
       }
     }
   }
@@ -2970,7 +2981,7 @@ void X86FrameLowering::processFunctionBeforeFrameFinalized(
   MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8;
   int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize;
   int UnwindHelpFI =
-      MFI->CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
+      MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*Immutable=*/false);
   EHInfo.UnwindHelpFrameIdx = UnwindHelpFI;
 
   // Store -2 into UnwindHelp on function entry. We have to scan forwards past
diff --git a/contrib/llvm/lib/Target/X86/X86FrameLowering.h b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
index 4a01014ee545..e1b04d6dc300 100644
--- a/contrib/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86FrameLowering.h
@@ -49,11 +49,10 @@ public:
 
   /// Emit target stack probe code. This is required for all
   /// large stack allocations on Windows. The caller is required to materialize
-  /// the number of bytes to probe in RAX/EAX. Returns instruction just
-  /// after the expansion.
-  MachineInstr *emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
-                               MachineBasicBlock::iterator MBBI,
-                               const DebugLoc &DL, bool InProlog) const;
+  /// the number of bytes to probe in RAX/EAX.
+  void emitStackProbe(MachineFunction &MF, MachineBasicBlock &MBB,
+                      MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+                      bool InProlog) const;
 
   /// Replace a StackProbe inline-stub with the actual probe code inline.
   void inlineStackProbe(MachineFunction &MF,
@@ -179,22 +178,19 @@ private:
   uint64_t calculateMaxStackAlign(const MachineFunction &MF) const;
 
   /// Emit target stack probe as a call to a helper function
-  MachineInstr *emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
-                                   MachineBasicBlock::iterator MBBI,
-                                   const DebugLoc &DL, bool InProlog) const;
+  void emitStackProbeCall(MachineFunction &MF, MachineBasicBlock &MBB,
+                          MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
+                          bool InProlog) const;
 
   /// Emit target stack probe as an inline sequence.
-  MachineInstr *emitStackProbeInline(MachineFunction &MF,
-                                     MachineBasicBlock &MBB,
-                                     MachineBasicBlock::iterator MBBI,
-                                     const DebugLoc &DL, bool InProlog) const;
+  void emitStackProbeInline(MachineFunction &MF, MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI,
+                            const DebugLoc &DL, bool InProlog) const;
 
   /// Emit a stub to later inline the target stack probe.
-  MachineInstr *emitStackProbeInlineStub(MachineFunction &MF,
-                                         MachineBasicBlock &MBB,
-                                         MachineBasicBlock::iterator MBBI,
-                                         const DebugLoc &DL,
-                                         bool InProlog) const;
+  void emitStackProbeInlineStub(MachineFunction &MF, MachineBasicBlock &MBB,
+                                MachineBasicBlock::iterator MBBI,
+                                const DebugLoc &DL, bool InProlog) const;
 
   /// Aligns the stack pointer by ANDing it with -MaxAlign.
   void BuildStackAlignAND(MachineBasicBlock &MBB,
diff --git a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7d53b3db6175..8b66790679d9 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -24,6 +24,7 @@
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
@@ -165,7 +166,7 @@ namespace {
         : SelectionDAGISel(tm, OptLevel), OptForSize(false),
           OptForMinSize(false) {}
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "X86 DAG->DAG Instruction Selection";
     }
 
@@ -228,6 +229,7 @@ namespace {
                              SDValue &Index, SDValue &Disp,
                              SDValue &Segment,
                              SDValue &NodeWithChain);
+    bool selectRelocImm(SDValue N, SDValue &Op);
 
     bool tryFoldLoad(SDNode *P, SDValue N,
                      SDValue &Base, SDValue &Scale,
@@ -1234,7 +1236,7 @@ bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
   case ISD::UMUL_LOHI:
     // A mul_lohi where we need the low part can be folded as a plain multiply.
     if (N.getResNo() != 0) break;
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case ISD::MUL:
   case X86ISD::MUL_IMM:
     // X*[3,5,9] -> X+X*[2,4,8]
@@ -1435,7 +1437,7 @@ bool X86DAGToDAGISel::selectVectorAddr(SDNode *Parent, SDValue N, SDValue &Base,
   SDLoc DL(N);
   Base = Mgs->getBasePtr();
   Index = Mgs->getIndex();
-  unsigned ScalarSize = Mgs->getValue().getValueType().getScalarSizeInBits();
+  unsigned ScalarSize = Mgs->getValue().getScalarValueSizeInBits();
   Scale = getI8Imm(ScalarSize/8, DL);
 
   // If Base is 0, the whole address is in index and the Scale is 1
@@ -1512,16 +1514,39 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
                                           SDValue &Scale, SDValue &Index,
                                           SDValue &Disp, SDValue &Segment,
                                           SDValue &PatternNodeWithChain) {
-  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
+  // We can allow a full vector load here since narrowing a load is ok.
+  if (ISD::isNON_EXTLoad(N.getNode())) {
+    PatternNodeWithChain = N;
+    if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
+        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
+      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+      return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
+                        Segment);
+    }
+  }
+
+  // We can also match the special zero extended load opcode.
+  if (N.getOpcode() == X86ISD::VZEXT_LOAD) {
+    PatternNodeWithChain = N;
+    if (IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
+        IsLegalToFold(PatternNodeWithChain, *N->use_begin(), Root, OptLevel)) {
+      auto *MI = cast<MemIntrinsicSDNode>(PatternNodeWithChain);
+      return selectAddr(MI, MI->getBasePtr(), Base, Scale, Index, Disp,
+                        Segment);
+    }
+  }
+
+  // Need to make sure that the SCALAR_TO_VECTOR and load are both only used
+  // once. Otherwise the load might get duplicated and the chain output of the
+  // duplicate load will not be observed by all dependencies.
+  if (N.getOpcode() == ISD::SCALAR_TO_VECTOR && N.getNode()->hasOneUse()) {
     PatternNodeWithChain = N.getOperand(0);
     if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
-        PatternNodeWithChain.hasOneUse() &&
-        IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
-        IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
+        IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
+        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
       LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
-      if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
-        return false;
-      return true;
+      return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
+                        Segment);
     }
   }
 
@@ -1530,18 +1555,18 @@ bool X86DAGToDAGISel::selectScalarSSELoad(SDNode *Root,
   if (N.getOpcode() == X86ISD::VZEXT_MOVL && N.getNode()->hasOneUse() &&
       // Check to see if the top elements are all zeros (or bitcast of zeros).
       N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
-      N.getOperand(0).getNode()->hasOneUse() &&
-      ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
-      N.getOperand(0).getOperand(0).hasOneUse() &&
-      IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
-      IsLegalToFold(N.getOperand(0), N.getNode(), Root, OptLevel)) {
-    // Okay, this is a zero extending load.  Fold it.
-    LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
-    if (!selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
-      return false;
-    PatternNodeWithChain = SDValue(LD, 0);
-    return true;
+      N.getOperand(0).getNode()->hasOneUse()) {
+    PatternNodeWithChain = N.getOperand(0).getOperand(0);
+    if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+        IsProfitableToFold(PatternNodeWithChain, N.getNode(), Root) &&
+        IsLegalToFold(PatternNodeWithChain, N.getNode(), Root, OptLevel)) {
+      // Okay, this is a zero extending load.  Fold it.
+      LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+      return selectAddr(LD, LD->getBasePtr(), Base, Scale, Index, Disp,
+                        Segment);
+    }
   }
+
   return false;
 }
 
@@ -1563,16 +1588,21 @@ bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) {
          "Unexpected node type for MOV32ri64");
   N = N.getOperand(0);
 
-  if (N->getOpcode() != ISD::TargetConstantPool &&
-      N->getOpcode() != ISD::TargetJumpTable &&
-      N->getOpcode() != ISD::TargetGlobalAddress &&
-      N->getOpcode() != ISD::TargetExternalSymbol &&
-      N->getOpcode() != ISD::MCSymbol &&
-      N->getOpcode() != ISD::TargetBlockAddress)
+  // At least GNU as does not accept 'movl' for TPOFF relocations.
+  // FIXME: We could use 'movl' when we know we are targeting MC.
+  if (N->getOpcode() == ISD::TargetGlobalTLSAddress)
     return false;
 
   Imm = N;
-  return TM.getCodeModel() == CodeModel::Small;
+  if (N->getOpcode() != ISD::TargetGlobalAddress)
+    return TM.getCodeModel() == CodeModel::Small;
+
+  Optional<ConstantRange> CR =
+      cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange();
+  if (!CR)
+    return TM.getCodeModel() == CodeModel::Small;
+
+  return CR->getUnsignedMax().ult(1ull << 32);
 }
 
 bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base,
@@ -1704,6 +1734,48 @@ bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base,
   return true;
 }
 
+bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) {
+  if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
+    Op = CurDAG->getTargetConstant(CN->getAPIntValue(), SDLoc(CN),
+                                   N.getValueType());
+    return true;
+  }
+
+  // Keep track of the original value type and whether this value was
+  // truncated. If we see a truncation from pointer type to VT that truncates
+  // bits that are known to be zero, we can use a narrow reference.
+  EVT VT = N.getValueType();
+  bool WasTruncated = false;
+  if (N.getOpcode() == ISD::TRUNCATE) {
+    WasTruncated = true;
+    N = N.getOperand(0);
+  }
+
+  if (N.getOpcode() != X86ISD::Wrapper)
+    return false;
+
+  // We can only use non-GlobalValues as immediates if they were not truncated,
+  // as we do not have any range information. If we have a GlobalValue and the
+  // address was not truncated, we can select it as an operand directly.
+  unsigned Opc = N.getOperand(0)->getOpcode();
+  if (Opc != ISD::TargetGlobalAddress || !WasTruncated) {
+    Op = N.getOperand(0);
+    // We can only select the operand directly if we didn't have to look past a
+    // truncate.
+    return !WasTruncated;
+  }
+
+  // Check that the global's range fits into VT.
+  auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0));
+  Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange();
+  if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits()))
+    return false;
+
+  // Okay, we can use a narrow reference.
+  Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT,
+                                      GA->getOffset(), GA->getTargetFlags());
+  return true;
+}
 
 bool X86DAGToDAGISel::tryFoldLoad(SDNode *P, SDValue N,
                                   SDValue &Base, SDValue &Scale,
@@ -2700,7 +2772,7 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
   case InlineAsm::Constraint_i:
     // FIXME: It seems strange that 'i' is needed here since it's supposed to
     //        be an immediate and not a memory constraint.
-    // Fallthrough.
+    LLVM_FALLTHROUGH;
   case InlineAsm::Constraint_o: // offsetable        ??
   case InlineAsm::Constraint_v: // not offsetable    ??
   case InlineAsm::Constraint_m: // memory
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
index f499e56de348..b293dfa98f82 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -17,6 +17,7 @@
 #include "X86CallingConv.h"
 #include "X86FrameLowering.h"
 #include "X86InstrBuilder.h"
+#include "X86IntrinsicsInfo.h"
 #include "X86MachineFunctionInfo.h"
 #include "X86ShuffleDecodeConstantPool.h"
 #include "X86TargetMachine.h"
@@ -53,10 +54,10 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Target/TargetOptions.h"
-#include "X86IntrinsicsInfo.h"
+#include <algorithm>
 #include <bitset>
-#include <numeric>
 #include <cctype>
+#include <numeric>
 using namespace llvm;
 
 #define DEBUG_TYPE "x86-isel"
@@ -104,7 +105,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       addBypassSlowDiv(64, 16);
   }
 
-  if (Subtarget.isTargetKnownWindowsMSVC()) {
+  if (Subtarget.isTargetKnownWindowsMSVC() ||
+      Subtarget.isTargetWindowsItanium()) {
     // Setup Windows compiler runtime calls.
     setLibcallName(RTLIB::SDIV_I64, "_alldiv");
     setLibcallName(RTLIB::UDIV_I64, "_aulldiv");
@@ -286,7 +288,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::UDIV, VT, Expand);
     setOperationAction(ISD::SREM, VT, Expand);
     setOperationAction(ISD::UREM, VT, Expand);
+  }
 
+  for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+    if (VT == MVT::i64 && !Subtarget.is64Bit())
+      continue;
     // Add/Sub overflow ops with MVT::Glues are lowered to EFLAGS dependences.
     setOperationAction(ISD::ADDC, VT, Custom);
     setOperationAction(ISD::ADDE, VT, Custom);
@@ -349,7 +355,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // Special handling for half-precision floating point conversions.
   // If we don't have F16C support, then lower half float conversions
   // into library calls.
-  if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) {
+  if (Subtarget.useSoftFloat() ||
+      (!Subtarget.hasF16C() && !Subtarget.hasAVX512())) {
     setOperationAction(ISD::FP16_TO_FP, MVT::f32, Expand);
     setOperationAction(ISD::FP_TO_FP16, MVT::f32, Expand);
   }
@@ -484,8 +491,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
     // f32 and f64 use SSE.
     // Set up the FP register classes.
-    addRegisterClass(MVT::f32, &X86::FR32RegClass);
-    addRegisterClass(MVT::f64, &X86::FR64RegClass);
+    addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
+                                                     : &X86::FR32RegClass);
+    addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
+                                                     : &X86::FR64RegClass);
 
     for (auto VT : { MVT::f32, MVT::f64 }) {
       // Use ANDPD to simulate FABS.
@@ -514,7 +523,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   } else if (UseX87 && X86ScalarSSEf32) {
     // Use SSE for f32, x87 for f64.
     // Set up the FP register classes.
-    addRegisterClass(MVT::f32, &X86::FR32RegClass);
+    addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
+                                                     : &X86::FR32RegClass);
     addRegisterClass(MVT::f64, &X86::RFP64RegClass);
 
     // Use ANDPS to simulate FABS.
@@ -590,14 +600,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::UNDEF,     MVT::f80, Expand);
     setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
     {
-      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended);
+      APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
       addLegalFPImmediate(TmpFlt);  // FLD0
       TmpFlt.changeSign();
       addLegalFPImmediate(TmpFlt);  // FLD0/FCHS
 
       bool ignored;
       APFloat TmpFlt2(+1.0);
-      TmpFlt2.convert(APFloat::x87DoubleExtended, APFloat::rmNearestTiesToEven,
+      TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
                       &ignored);
       addLegalFPImmediate(TmpFlt2);  // FLD1
       TmpFlt2.changeSign();
@@ -717,10 +727,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
-    addRegisterClass(MVT::v4f32, &X86::VR128RegClass);
+    addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                    : &X86::VR128RegClass);
 
     setOperationAction(ISD::FNEG,               MVT::v4f32, Custom);
     setOperationAction(ISD::FABS,               MVT::v4f32, Custom);
+    setOperationAction(ISD::FCOPYSIGN,          MVT::v4f32, Custom);
     setOperationAction(ISD::BUILD_VECTOR,       MVT::v4f32, Custom);
     setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4f32, Custom);
     setOperationAction(ISD::VSELECT,            MVT::v4f32, Custom);
@@ -730,14 +742,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
-    addRegisterClass(MVT::v2f64, &X86::VR128RegClass);
+    addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                    : &X86::VR128RegClass);
 
     // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
     // registers cannot be used even for integer operations.
-    addRegisterClass(MVT::v16i8, &X86::VR128RegClass);
-    addRegisterClass(MVT::v8i16, &X86::VR128RegClass);
-    addRegisterClass(MVT::v4i32, &X86::VR128RegClass);
-    addRegisterClass(MVT::v2i64, &X86::VR128RegClass);
+    addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                    : &X86::VR128RegClass);
+    addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                    : &X86::VR128RegClass);
+    addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                    : &X86::VR128RegClass);
+    addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
+                                                    : &X86::VR128RegClass);
 
     setOperationAction(ISD::MUL,                MVT::v16i8, Custom);
     setOperationAction(ISD::MUL,                MVT::v4i32, Custom);
@@ -751,6 +768,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::MUL,                MVT::v8i16, Legal);
     setOperationAction(ISD::FNEG,               MVT::v2f64, Custom);
     setOperationAction(ISD::FABS,               MVT::v2f64, Custom);
+    setOperationAction(ISD::FCOPYSIGN,          MVT::v2f64, Custom);
 
     setOperationAction(ISD::SMAX,               MVT::v8i16, Legal);
     setOperationAction(ISD::UMAX,               MVT::v16i8, Legal);
@@ -776,7 +794,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::CTTZ,               MVT::v16i8, Custom);
     setOperationAction(ISD::CTTZ,               MVT::v8i16, Custom);
     setOperationAction(ISD::CTTZ,               MVT::v4i32, Custom);
-    // ISD::CTTZ v2i64 - scalarization is faster.
+    setOperationAction(ISD::CTTZ,               MVT::v2i64, Custom);
 
     // Custom lower build_vector, vector_shuffle, and extract_vector_elt.
     for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
@@ -828,16 +846,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::SELECT,             MVT::v2i64, Custom);
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v4i32, Legal);
-    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_SINT,         MVT::v2i32, Custom);
 
+    setOperationAction(ISD::SINT_TO_FP,         MVT::v4i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v2i32, Custom);
 
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i8,  Custom);
     setOperationAction(ISD::UINT_TO_FP,         MVT::v4i16, Custom);
-    // As there is no 64-bit GPR available, we need build a special custom
-    // sequence to convert from v2i32 to v2f32.
-    if (!Subtarget.is64Bit())
-      setOperationAction(ISD::UINT_TO_FP,       MVT::v2f32, Custom);
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v2i32, Custom);
+
+    // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
+    setOperationAction(ISD::UINT_TO_FP,         MVT::v2f32, Custom);
 
     setOperationAction(ISD::FP_EXTEND,          MVT::v2f32, Custom);
     setOperationAction(ISD::FP_ROUND,           MVT::v2f32, Custom);
@@ -872,8 +891,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::BITREVERSE,         MVT::v16i8, Custom);
     setOperationAction(ISD::CTLZ,               MVT::v16i8, Custom);
     setOperationAction(ISD::CTLZ,               MVT::v8i16, Custom);
-    // ISD::CTLZ v4i32 - scalarization is faster.
-    // ISD::CTLZ v2i64 - scalarization is faster.
+    setOperationAction(ISD::CTLZ,               MVT::v4i32, Custom);
+    setOperationAction(ISD::CTLZ,               MVT::v2i64, Custom);
   }
 
   if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
@@ -946,12 +965,18 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   if (!Subtarget.useSoftFloat() && Subtarget.hasFp256()) {
     bool HasInt256 = Subtarget.hasInt256();
 
-    addRegisterClass(MVT::v32i8,  &X86::VR256RegClass);
-    addRegisterClass(MVT::v16i16, &X86::VR256RegClass);
-    addRegisterClass(MVT::v8i32,  &X86::VR256RegClass);
-    addRegisterClass(MVT::v8f32,  &X86::VR256RegClass);
-    addRegisterClass(MVT::v4i64,  &X86::VR256RegClass);
-    addRegisterClass(MVT::v4f64,  &X86::VR256RegClass);
+    addRegisterClass(MVT::v32i8,  Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v8i32,  Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v8f32,  Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v4i64,  Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                     : &X86::VR256RegClass);
+    addRegisterClass(MVT::v4f64,  Subtarget.hasVLX() ? &X86::VR256XRegClass
+                                                     : &X86::VR256RegClass);
 
     for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
       setOperationAction(ISD::FFLOOR,     VT, Legal);
@@ -961,6 +986,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FNEARBYINT, VT, Legal);
       setOperationAction(ISD::FNEG,       VT, Custom);
       setOperationAction(ISD::FABS,       VT, Custom);
+      setOperationAction(ISD::FCOPYSIGN,  VT, Custom);
     }
 
     // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
@@ -1011,16 +1037,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
       setOperationAction(ISD::CTPOP,           VT, Custom);
       setOperationAction(ISD::CTTZ,            VT, Custom);
-    }
-
-    // ISD::CTLZ v8i32/v4i64 - scalarization is faster without AVX2
-    // as we end up splitting the 256-bit vectors.
-    for (auto VT : { MVT::v32i8, MVT::v16i16 })
       setOperationAction(ISD::CTLZ,            VT, Custom);
-
-    if (HasInt256)
-      for (auto VT : { MVT::v8i32, MVT::v4i64 })
-        setOperationAction(ISD::CTLZ,          VT, Custom);
+    }
 
     if (Subtarget.hasAnyFMA()) {
       for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
@@ -1171,12 +1189,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FNEG,  VT, Custom);
       setOperationAction(ISD::FABS,  VT, Custom);
       setOperationAction(ISD::FMA,   VT, Legal);
+      setOperationAction(ISD::FCOPYSIGN, VT, Custom);
     }
 
     setOperationAction(ISD::FP_TO_SINT,         MVT::v16i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v16i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v8i32, Legal);
     setOperationAction(ISD::FP_TO_UINT,         MVT::v4i32, Legal);
+    setOperationAction(ISD::FP_TO_UINT,         MVT::v2i32, Custom);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i32, Legal);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v8i1,   Custom);
     setOperationAction(ISD::SINT_TO_FP,         MVT::v16i1,  Custom);
@@ -1216,10 +1236,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setTruncStoreAction(MVT::v4i32, MVT::v4i8,  Legal);
       setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
     } else {
-      setOperationAction(ISD::MLOAD,    MVT::v8i32, Custom);
-      setOperationAction(ISD::MLOAD,    MVT::v8f32, Custom);
-      setOperationAction(ISD::MSTORE,   MVT::v8i32, Custom);
-      setOperationAction(ISD::MSTORE,   MVT::v8f32, Custom);
+      for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
+           MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
+        setOperationAction(ISD::MLOAD,  VT, Custom);
+        setOperationAction(ISD::MSTORE, VT, Custom);
+      }
     }
     setOperationAction(ISD::TRUNCATE,           MVT::i1, Custom);
     setOperationAction(ISD::TRUNCATE,           MVT::v16i8, Custom);
@@ -1230,18 +1251,23 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::VSELECT,            MVT::v16i1, Expand);
     if (Subtarget.hasDQI()) {
       setOperationAction(ISD::SINT_TO_FP,       MVT::v8i64, Legal);
+      setOperationAction(ISD::SINT_TO_FP,       MVT::v4i64, Legal);
+      setOperationAction(ISD::SINT_TO_FP,       MVT::v2i64, Legal);
       setOperationAction(ISD::UINT_TO_FP,       MVT::v8i64, Legal);
+      setOperationAction(ISD::UINT_TO_FP,       MVT::v4i64, Legal);
+      setOperationAction(ISD::UINT_TO_FP,       MVT::v2i64, Legal);
       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i64, Legal);
+      setOperationAction(ISD::FP_TO_SINT,       MVT::v4i64, Legal);
+      setOperationAction(ISD::FP_TO_SINT,       MVT::v2i64, Legal);
       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i64, Legal);
+      setOperationAction(ISD::FP_TO_UINT,       MVT::v4i64, Legal);
+      setOperationAction(ISD::FP_TO_UINT,       MVT::v2i64, Legal);
+
       if (Subtarget.hasVLX()) {
-        setOperationAction(ISD::SINT_TO_FP,    MVT::v4i64, Legal);
-        setOperationAction(ISD::SINT_TO_FP,    MVT::v2i64, Legal);
-        setOperationAction(ISD::UINT_TO_FP,    MVT::v4i64, Legal);
-        setOperationAction(ISD::UINT_TO_FP,    MVT::v2i64, Legal);
-        setOperationAction(ISD::FP_TO_SINT,    MVT::v4i64, Legal);
-        setOperationAction(ISD::FP_TO_SINT,    MVT::v2i64, Legal);
-        setOperationAction(ISD::FP_TO_UINT,    MVT::v4i64, Legal);
-        setOperationAction(ISD::FP_TO_UINT,    MVT::v2i64, Legal);
+        // Fast v2f32 SINT_TO_FP( v2i32 ) custom conversion.
+        setOperationAction(ISD::SINT_TO_FP,    MVT::v2f32, Custom);
+        setOperationAction(ISD::FP_TO_SINT,    MVT::v2f32, Custom);
+        setOperationAction(ISD::FP_TO_UINT,    MVT::v2f32, Custom);
       }
     }
     if (Subtarget.hasVLX()) {
@@ -1250,7 +1276,6 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FP_TO_SINT,       MVT::v8i32, Legal);
       setOperationAction(ISD::FP_TO_UINT,       MVT::v8i32, Legal);
       setOperationAction(ISD::SINT_TO_FP,       MVT::v4i32, Legal);
-      setOperationAction(ISD::UINT_TO_FP,       MVT::v4i32, Legal);
       setOperationAction(ISD::FP_TO_SINT,       MVT::v4i32, Legal);
       setOperationAction(ISD::FP_TO_UINT,       MVT::v4i32, Legal);
       setOperationAction(ISD::ZERO_EXTEND,      MVT::v4i32, Custom);
@@ -1293,6 +1318,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::FNEARBYINT, VT, Legal);
     }
 
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i64,  Custom);
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v16i32, Custom);
+
+    // Without BWI we need to use custom lowering to handle MVT::v64i8 input.
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
+    setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, MVT::v64i8, Custom);
+
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8f64,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i64,  Custom);
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v16f32,  Custom);
@@ -1339,13 +1371,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::SRL, VT, Custom);
       setOperationAction(ISD::SHL, VT, Custom);
       setOperationAction(ISD::SRA, VT, Custom);
-      setOperationAction(ISD::AND, VT, Legal);
-      setOperationAction(ISD::OR,  VT, Legal);
-      setOperationAction(ISD::XOR, VT, Legal);
       setOperationAction(ISD::CTPOP, VT, Custom);
       setOperationAction(ISD::CTTZ, VT, Custom);
     }
 
+    // Need to promote to 64-bit even though we have 32-bit masked instructions
+    // because the IR optimizers rearrange bitcasts around logic ops leaving
+    // too many variations to handle if we don't promote them.
+    setOperationPromotedToType(ISD::AND, MVT::v16i32, MVT::v8i64);
+    setOperationPromotedToType(ISD::OR,  MVT::v16i32, MVT::v8i64);
+    setOperationPromotedToType(ISD::XOR, MVT::v16i32, MVT::v8i64);
+
     if (Subtarget.hasCDI()) {
       setOperationAction(ISD::CTLZ,             MVT::v8i64,  Legal);
       setOperationAction(ISD::CTLZ,             MVT::v16i32, Legal);
@@ -1377,12 +1413,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     } // Subtarget.hasCDI()
 
     if (Subtarget.hasDQI()) {
-      if (Subtarget.hasVLX()) {
-        setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
-        setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
-      }
+      // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
+      setOperationAction(ISD::MUL,             MVT::v2i64, Legal);
+      setOperationAction(ISD::MUL,             MVT::v4i64, Legal);
       setOperationAction(ISD::MUL,             MVT::v8i64, Legal);
     }
+
     // Custom lower several nodes.
     for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
                      MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
@@ -1413,6 +1449,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
       setOperationAction(ISD::MSCATTER,            VT, Custom);
     }
     for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32 }) {
+      setOperationPromotedToType(ISD::LOAD,   VT, MVT::v8i64);
       setOperationPromotedToType(ISD::SELECT, VT, MVT::v8i64);
     }
   }// has  AVX-512
@@ -1447,6 +1484,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v64i8, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i16, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i8, Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v32i1,  Custom);
+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v64i1, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v32i16, Custom);
     setOperationAction(ISD::SCALAR_TO_VECTOR,   MVT::v64i8, Custom);
     setOperationAction(ISD::SELECT,             MVT::v32i1, Custom);
@@ -1486,10 +1525,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::UMIN,               MVT::v64i8, Legal);
     setOperationAction(ISD::UMIN,               MVT::v32i16, Legal);
 
+    setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v32i16, Custom);
+
     setTruncStoreAction(MVT::v32i16,  MVT::v32i8, Legal);
-    setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
-    if (Subtarget.hasVLX())
+    if (Subtarget.hasVLX()) {
+      setTruncStoreAction(MVT::v16i16,  MVT::v16i8, Legal);
       setTruncStoreAction(MVT::v8i16,   MVT::v8i8,  Legal);
+    }
 
     LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom;
     for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
@@ -1532,35 +1574,25 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     addRegisterClass(MVT::v4i1,   &X86::VK4RegClass);
     addRegisterClass(MVT::v2i1,   &X86::VK2RegClass);
 
-    setOperationAction(ISD::ADD,                MVT::v2i1, Expand);
-    setOperationAction(ISD::ADD,                MVT::v4i1, Expand);
-    setOperationAction(ISD::SUB,                MVT::v2i1, Expand);
-    setOperationAction(ISD::SUB,                MVT::v4i1, Expand);
-    setOperationAction(ISD::MUL,                MVT::v2i1, Expand);
-    setOperationAction(ISD::MUL,                MVT::v4i1, Expand);
-
-    setOperationAction(ISD::TRUNCATE,           MVT::v2i1, Custom);
-    setOperationAction(ISD::TRUNCATE,           MVT::v4i1, Custom);
-    setOperationAction(ISD::SETCC,              MVT::v4i1, Custom);
-    setOperationAction(ISD::SETCC,              MVT::v2i1, Custom);
-    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
+    for (auto VT : { MVT::v2i1, MVT::v4i1 }) {
+      setOperationAction(ISD::ADD,                VT, Expand);
+      setOperationAction(ISD::SUB,                VT, Expand);
+      setOperationAction(ISD::MUL,                VT, Expand);
+      setOperationAction(ISD::VSELECT,            VT, Expand);
+
+      setOperationAction(ISD::TRUNCATE,           VT, Custom);
+      setOperationAction(ISD::SETCC,              VT, Custom);
+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
+      setOperationAction(ISD::INSERT_VECTOR_ELT,  VT, Custom);
+      setOperationAction(ISD::SELECT,             VT, Custom);
+      setOperationAction(ISD::BUILD_VECTOR,       VT, Custom);
+      setOperationAction(ISD::VECTOR_SHUFFLE,     VT, Custom);
+    }
+
     setOperationAction(ISD::CONCAT_VECTORS,     MVT::v8i1, Custom);
+    setOperationAction(ISD::CONCAT_VECTORS,     MVT::v4i1, Custom);
     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v8i1, Custom);
     setOperationAction(ISD::INSERT_SUBVECTOR,   MVT::v4i1, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v4i1, Custom);
-    setOperationAction(ISD::SELECT,             MVT::v2i1, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v4i1, Custom);
-    setOperationAction(ISD::BUILD_VECTOR,       MVT::v2i1, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v2i1, Custom);
-    setOperationAction(ISD::VECTOR_SHUFFLE,     MVT::v4i1, Custom);
-    setOperationAction(ISD::VSELECT,            MVT::v2i1, Expand);
-    setOperationAction(ISD::VSELECT,            MVT::v4i1, Expand);
-
-    for (auto VT : { MVT::v4i32, MVT::v8i32 }) {
-      setOperationAction(ISD::AND, VT, Legal);
-      setOperationAction(ISD::OR,  VT, Legal);
-      setOperationAction(ISD::XOR, VT, Legal);
-    }
 
     for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
       setOperationAction(ISD::SMAX, VT, Legal);
@@ -1629,7 +1661,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
   // is. We should promote the value to 64-bits to solve this.
   // This is what the CRT headers do - `fmodf` is an inline header
   // function casting to f64 and calling `fmod`.
-  if (Subtarget.is32Bit() && Subtarget.isTargetKnownWindowsMSVC())
+  if (Subtarget.is32Bit() && (Subtarget.isTargetKnownWindowsMSVC() ||
+                              Subtarget.isTargetWindowsItanium()))
     for (ISD::NodeType Op :
          {ISD::FCEIL, ISD::FCOS, ISD::FEXP, ISD::FFLOOR, ISD::FREM, ISD::FLOG,
           ISD::FLOG10, ISD::FPOW, ISD::FSIN})
@@ -1953,9 +1986,11 @@ X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
   case MVT::f32: case MVT::f64:
   case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
   case MVT::v4f32: case MVT::v2f64:
-  case MVT::v32i8: case MVT::v8i32: case MVT::v4i64: case MVT::v8f32:
-  case MVT::v4f64:
-    RRC = &X86::VR128RegClass;
+  case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
+  case MVT::v8f32: case MVT::v4f64:
+  case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
+  case MVT::v16f32: case MVT::v8f64:
+    RRC = &X86::VR128XRegClass;
     break;
   }
   return std::make_pair(RRC, Cost);
@@ -2019,6 +2054,9 @@ Value *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
 }
 
 Value *X86TargetLowering::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
+  if (Subtarget.getTargetTriple().isOSContiki())
+    return getDefaultSafeStackPointerLocation(IRB, false);
+
   if (!Subtarget.isTargetAndroid())
     return TargetLowering::getSafeStackPointerLocation(IRB);
 
@@ -2062,6 +2100,58 @@ const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
   return ScratchRegs;
 }
 
+/// Lowers masks values (v*i1) to the local register values
+/// \returns DAG node after lowering to register type
+static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
+                               const SDLoc &Dl, SelectionDAG &DAG) {
+  EVT ValVT = ValArg.getValueType();
+
+  if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
+      (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
+    // Two stage lowering might be required
+    // bitcast:   v8i1 -> i8 / v16i1 -> i16
+    // anyextend: i8   -> i32 / i16   -> i32
+    EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
+    SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
+    if (ValLoc == MVT::i32)
+      ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
+    return ValToCopy;
+  } else if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
+             (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
+    // One stage lowering is required
+    // bitcast:   v32i1 -> i32 / v64i1 -> i64
+    return DAG.getBitcast(ValLoc, ValArg);
+  } else
+    return DAG.getNode(ISD::SIGN_EXTEND, Dl, ValLoc, ValArg);
+}
+
+/// Breaks v64i1 value into two registers and adds the new node to the DAG
+static void Passv64i1ArgInRegs(
+    const SDLoc &Dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg,
+    SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, CCValAssign &VA,
+    CCValAssign &NextVA, const X86Subtarget &Subtarget) {
+  assert((Subtarget.hasBWI() || Subtarget.hasBMI()) &&
+         "Expected AVX512BW or AVX512BMI target!");
+  assert(Subtarget.is32Bit() && "Expecting 32 bit target");
+  assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
+  assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+         "The value should reside in two registers");
+
+  // Before splitting the value we cast it to i64
+  Arg = DAG.getBitcast(MVT::i64, Arg);
+
+  // Splitting the value into two i32 types
+  SDValue Lo, Hi;
+  Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
+                   DAG.getConstant(0, Dl, MVT::i32));
+  Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
+                   DAG.getConstant(1, Dl, MVT::i32));
+
+  // Attach the two i32 types into corresponding registers
+  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
+  RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
+}
+
 SDValue
 X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                                bool isVarArg,
@@ -2086,10 +2176,11 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
                    MVT::i32));
 
   // Copy the result values into the output registers.
-  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
-    CCValAssign &VA = RVLocs[i];
+  for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
+       ++I, ++OutsIndex) {
+    CCValAssign &VA = RVLocs[I];
     assert(VA.isRegLoc() && "Can only return in registers!");
-    SDValue ValToCopy = OutVals[i];
+    SDValue ValToCopy = OutVals[OutsIndex];
     EVT ValVT = ValToCopy.getValueType();
 
     // Promote values to the appropriate types.
@@ -2099,7 +2190,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
     else if (VA.getLocInfo() == CCValAssign::AExt) {
       if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
-        ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
+        ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
       else
         ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
     }
@@ -2152,9 +2243,27 @@ X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
       }
     }
 
-    Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ValToCopy, Flag);
-    Flag = Chain.getValue(1);
-    RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+    SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::v64i1 &&
+             "Currently the only custom case is when we split v64i1 to 2 regs");
+
+      Passv64i1ArgInRegs(dl, DAG, Chain, ValToCopy, RegsToPass, VA, RVLocs[++I],
+                         Subtarget);
+
+      assert(2 == RegsToPass.size() &&
+             "Expecting two registers after Pass64BitArgInRegs");
+    } else {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
+    }
+
+    // Add nodes to the DAG and add the values into the RetOps list
+    for (auto &Reg : RegsToPass) {
+      Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, Flag);
+      Flag = Chain.getValue(1);
+      RetOps.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
+    }
   }
 
   // Swift calling convention does not require we copy the sret argument
@@ -2282,6 +2391,98 @@ EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
   return VT.bitsLT(MinVT) ? MinVT : VT;
 }
 
+/// Reads two 32 bit registers and creates a 64 bit mask value.
+/// \param VA The current 32 bit value that need to be assigned.
+/// \param NextVA The next 32 bit value that need to be assigned.
+/// \param Root The parent DAG node.
+/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
+///                        glue purposes. In the case the DAG is already using
+///                        physical register instead of virtual, we should glue
+///                        our new SDValue to InFlag SDvalue.
+/// \return a new SDvalue of size 64bit.
+static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
+                                SDValue &Root, SelectionDAG &DAG,
+                                const SDLoc &Dl, const X86Subtarget &Subtarget,
+                                SDValue *InFlag = nullptr) {
+  assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
+  assert(Subtarget.is32Bit() && "Expecting 32 bit target");
+  assert(VA.getValVT() == MVT::v64i1 &&
+         "Expecting first location of 64 bit width type");
+  assert(NextVA.getValVT() == VA.getValVT() &&
+         "The locations should have the same type");
+  assert(VA.isRegLoc() && NextVA.isRegLoc() &&
+         "The values should reside in two registers");
+
+  SDValue Lo, Hi;
+  unsigned Reg;
+  SDValue ArgValueLo, ArgValueHi;
+
+  MachineFunction &MF = DAG.getMachineFunction();
+  const TargetRegisterClass *RC = &X86::GR32RegClass;
+
+  // Read a 32 bit value from the registers
+  if (nullptr == InFlag) {
+    // When no physical register is present,
+    // create an intermediate virtual register
+    Reg = MF.addLiveIn(VA.getLocReg(), RC);
+    ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
+    Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
+    ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
+  } else {
+    // When a physical register is available read the value from it and glue
+    // the reads together.
+    ArgValueLo =
+      DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
+    *InFlag = ArgValueLo.getValue(2);
+    ArgValueHi =
+      DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
+    *InFlag = ArgValueHi.getValue(2);
+  }
+
+  // Convert the i32 type into v32i1 type
+  Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
+
+  // Convert the i32 type into v32i1 type
+  Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
+
+  // Concantenate the two values together
+  return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
+}
+
+/// The function will lower a register of various sizes (8/16/32/64)
+/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
+/// \returns a DAG node contains the operand after lowering to mask type.
+static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
+                               const EVT &ValLoc, const SDLoc &Dl,
+                               SelectionDAG &DAG) {
+  SDValue ValReturned = ValArg;
+
+  if (ValVT == MVT::v64i1) {
+    // In 32 bit machine, this case is handled by getv64i1Argument
+    assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
+    // In 64 bit machine, There is no need to truncate the value only bitcast
+  } else {
+    MVT maskLen;
+    switch (ValVT.getSimpleVT().SimpleTy) {
+    case MVT::v8i1:
+      maskLen = MVT::i8;
+      break;
+    case MVT::v16i1:
+      maskLen = MVT::i16;
+      break;
+    case MVT::v32i1:
+      maskLen = MVT::i32;
+      break;
+    default:
+      llvm_unreachable("Expecting a vector of i1 types");
+    }
+
+    ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
+  }
+
+  return DAG.getBitcast(ValVT, ValReturned);
+}
+
 /// Lower the result values of a call into the
 /// appropriate copies out of appropriate physical registers.
 ///
@@ -2298,13 +2499,14 @@ SDValue X86TargetLowering::LowerCallResult(
   CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
 
   // Copy all of the result registers out of their specified physreg.
-  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
-    CCValAssign &VA = RVLocs[i];
+  for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
+       ++I, ++InsIndex) {
+    CCValAssign &VA = RVLocs[I];
     EVT CopyVT = VA.getLocVT();
 
     // If this is x86-64, and we disabled SSE, we can't return FP values
     if ((CopyVT == MVT::f32 || CopyVT == MVT::f64 || CopyVT == MVT::f128) &&
-        ((Is64Bit || Ins[i].Flags.isInReg()) && !Subtarget.hasSSE1())) {
+        ((Is64Bit || Ins[InsIndex].Flags.isInReg()) && !Subtarget.hasSSE1())) {
       report_fatal_error("SSE register return with SSE disabled");
     }
 
@@ -2319,19 +2521,34 @@ SDValue X86TargetLowering::LowerCallResult(
       RoundAfterCopy = (CopyVT != VA.getLocVT());
     }
 
-    Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
-                               CopyVT, InFlag).getValue(1);
-    SDValue Val = Chain.getValue(0);
+    SDValue Val;
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::v64i1 &&
+             "Currently the only custom case is when we split v64i1 to 2 regs");
+      Val =
+          getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
+    } else {
+      Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
+                  .getValue(1);
+      Val = Chain.getValue(0);
+      InFlag = Chain.getValue(2);
+    }
 
     if (RoundAfterCopy)
       Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
                         // This truncation won't change the value.
                         DAG.getIntPtrConstant(1, dl));
 
-    if (VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1)
-      Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+    if (VA.isExtInLoc() && (VA.getValVT().getScalarType() == MVT::i1)) {
+      if (VA.getValVT().isVector() &&
+          ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
+           (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
+        // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
+        Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
+      } else
+        Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
+    }
 
-    InFlag = Chain.getValue(2);
     InVals.push_back(Val);
   }
 
@@ -2399,7 +2616,8 @@ static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
 /// Return true if the calling convention is one that we can guarantee TCO for.
 static bool canGuaranteeTCO(CallingConv::ID CC) {
   return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
-          CC == CallingConv::HiPE || CC == CallingConv::HHVM);
+          CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
+          CC == CallingConv::HHVM);
 }
 
 /// Return true if we might ever do TCO for calls with this calling convention.
@@ -2445,7 +2663,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
                                     const SmallVectorImpl<ISD::InputArg> &Ins,
                                     const SDLoc &dl, SelectionDAG &DAG,
                                     const CCValAssign &VA,
-                                    MachineFrameInfo *MFI, unsigned i) const {
+                                    MachineFrameInfo &MFI, unsigned i) const {
   // Create the nodes corresponding to a load from this parameter slot.
   ISD::ArgFlagsTy Flags = Ins[i].Flags;
   bool AlwaysUseMutable = shouldGuaranteeTCO(
@@ -2454,9 +2672,11 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
   EVT ValVT;
 
   // If value is passed by pointer we have address passed instead of the value
-  // itself.
-  bool ExtendedInMem = VA.isExtInLoc() &&
-    VA.getValVT().getScalarType() == MVT::i1;
+  // itself. No need to extend if the mask value and location share the same
+  // absolute size.
+  bool ExtendedInMem =
+      VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
+      VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
 
   if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
     ValVT = VA.getLocVT();
@@ -2483,26 +2703,26 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
   if (Flags.isByVal()) {
     unsigned Bytes = Flags.getByValSize();
     if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
-    int FI = MFI->CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
+    int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable);
     // Adjust SP offset of interrupt parameter.
     if (CallConv == CallingConv::X86_INTR) {
-      MFI->setObjectOffset(FI, Offset);
+      MFI.setObjectOffset(FI, Offset);
     }
     return DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   } else {
-    int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
-                                    VA.getLocMemOffset(), isImmutable);
+    int FI = MFI.CreateFixedObject(ValVT.getSizeInBits()/8,
+                                   VA.getLocMemOffset(), isImmutable);
 
     // Set SExt or ZExt flag.
     if (VA.getLocInfo() == CCValAssign::ZExt) {
-      MFI->setObjectZExt(FI, true);
+      MFI.setObjectZExt(FI, true);
     } else if (VA.getLocInfo() == CCValAssign::SExt) {
-      MFI->setObjectSExt(FI, true);
+      MFI.setObjectSExt(FI, true);
     }
 
     // Adjust SP offset of interrupt parameter.
     if (CallConv == CallingConv::X86_INTR) {
-      MFI->setObjectOffset(FI, Offset);
+      MFI.setObjectOffset(FI, Offset);
     }
 
     SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
@@ -2562,6 +2782,13 @@ static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
   return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
 }
 
+static bool isSortedByValueNo(const SmallVectorImpl<CCValAssign> &ArgLocs) {
+  return std::is_sorted(ArgLocs.begin(), ArgLocs.end(),
+                        [](const CCValAssign &A, const CCValAssign &B) -> bool {
+                          return A.getValNo() < B.getValNo();
+                        });
+}
+
 SDValue X86TargetLowering::LowerFormalArguments(
     SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
@@ -2576,12 +2803,13 @@ SDValue X86TargetLowering::LowerFormalArguments(
       Fn->getName() == "main")
     FuncInfo->setForceFramePointer(true);
 
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   bool Is64Bit = Subtarget.is64Bit();
   bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
 
-  assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
-         "Var args not supported with calling convention fastcc, ghc or hipe");
+  assert(
+      !(isVarArg && canGuaranteeTCO(CallConv)) &&
+      "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
 
   if (CallConv == CallingConv::X86_INTR) {
     bool isLegal = Ins.size() == 1 ||
@@ -2595,59 +2823,78 @@ SDValue X86TargetLowering::LowerFormalArguments(
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
 
-  // Allocate shadow area for Win64
+  // Allocate shadow area for Win64.
   if (IsWin64)
     CCInfo.AllocateStack(32, 8);
 
-  CCInfo.AnalyzeFormalArguments(Ins, CC_X86);
+  CCInfo.AnalyzeArguments(Ins, CC_X86);
+
+  // In vectorcall calling convention a second pass is required for the HVA
+  // types.
+  if (CallingConv::X86_VectorCall == CallConv) {
+    CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
+  }
+
+  // The next loop assumes that the locations are in the same order of the
+  // input arguments.
+  if (!isSortedByValueNo(ArgLocs))
+    llvm_unreachable("Argument Location list must be sorted before lowering");
 
-  unsigned LastVal = ~0U;
   SDValue ArgValue;
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-    CCValAssign &VA = ArgLocs[i];
-    // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
-    // places.
-    assert(VA.getValNo() != LastVal &&
-           "Don't support value assigned to multiple locs yet");
-    (void)LastVal;
-    LastVal = VA.getValNo();
+  for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
+       ++I, ++InsIndex) {
+    assert(InsIndex < Ins.size() && "Invalid Ins index");
+    CCValAssign &VA = ArgLocs[I];
 
     if (VA.isRegLoc()) {
       EVT RegVT = VA.getLocVT();
-      const TargetRegisterClass *RC;
-      if (RegVT == MVT::i32)
-        RC = &X86::GR32RegClass;
-      else if (Is64Bit && RegVT == MVT::i64)
-        RC = &X86::GR64RegClass;
-      else if (RegVT == MVT::f32)
-        RC = &X86::FR32RegClass;
-      else if (RegVT == MVT::f64)
-        RC = &X86::FR64RegClass;
-      else if (RegVT == MVT::f128)
-        RC = &X86::FR128RegClass;
-      else if (RegVT.is512BitVector())
-        RC = &X86::VR512RegClass;
-      else if (RegVT.is256BitVector())
-        RC = &X86::VR256RegClass;
-      else if (RegVT.is128BitVector())
-        RC = &X86::VR128RegClass;
-      else if (RegVT == MVT::x86mmx)
-        RC = &X86::VR64RegClass;
-      else if (RegVT == MVT::i1)
-        RC = &X86::VK1RegClass;
-      else if (RegVT == MVT::v8i1)
-        RC = &X86::VK8RegClass;
-      else if (RegVT == MVT::v16i1)
-        RC = &X86::VK16RegClass;
-      else if (RegVT == MVT::v32i1)
-        RC = &X86::VK32RegClass;
-      else if (RegVT == MVT::v64i1)
-        RC = &X86::VK64RegClass;
-      else
-        llvm_unreachable("Unknown argument type!");
+      if (VA.needsCustom()) {
+        assert(
+            VA.getValVT() == MVT::v64i1 &&
+            "Currently the only custom case is when we split v64i1 to 2 regs");
+
+        // v64i1 values, in regcall calling convention, that are
+        // compiled to 32 bit arch, are splited up into two registers.
+        ArgValue =
+            getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
+      } else {
+        const TargetRegisterClass *RC;
+        if (RegVT == MVT::i32)
+          RC = &X86::GR32RegClass;
+        else if (Is64Bit && RegVT == MVT::i64)
+          RC = &X86::GR64RegClass;
+        else if (RegVT == MVT::f32)
+          RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
+        else if (RegVT == MVT::f64)
+          RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
+        else if (RegVT == MVT::f80)
+          RC = &X86::RFP80RegClass;
+        else if (RegVT == MVT::f128)
+          RC = &X86::FR128RegClass;
+        else if (RegVT.is512BitVector())
+          RC = &X86::VR512RegClass;
+        else if (RegVT.is256BitVector())
+          RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
+        else if (RegVT.is128BitVector())
+          RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
+        else if (RegVT == MVT::x86mmx)
+          RC = &X86::VR64RegClass;
+        else if (RegVT == MVT::i1)
+          RC = &X86::VK1RegClass;
+        else if (RegVT == MVT::v8i1)
+          RC = &X86::VK8RegClass;
+        else if (RegVT == MVT::v16i1)
+          RC = &X86::VK16RegClass;
+        else if (RegVT == MVT::v32i1)
+          RC = &X86::VK32RegClass;
+        else if (RegVT == MVT::v64i1)
+          RC = &X86::VK64RegClass;
+        else
+          llvm_unreachable("Unknown argument type!");
 
-      unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+        unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+        ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+      }
 
       // If this is an 8 or 16-bit value, it is really passed promoted to 32
       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
@@ -2665,12 +2912,19 @@ SDValue X86TargetLowering::LowerFormalArguments(
         // Handle MMX values passed in XMM regs.
         if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
           ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
-        else
+        else if (VA.getValVT().isVector() &&
+                 VA.getValVT().getScalarType() == MVT::i1 &&
+                 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
+                  (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
+          // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
+          ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
+        } else
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
     } else {
       assert(VA.isMemLoc());
-      ArgValue = LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, i);
+      ArgValue =
+          LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
     }
 
     // If value is passed via pointer - do a load.
@@ -2681,7 +2935,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
     InVals.push_back(ArgValue);
   }
 
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+  for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
     // Swift calling convention does not require we copy the sret argument
     // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
     if (CallConv == CallingConv::Swift)
@@ -2691,14 +2945,14 @@ SDValue X86TargetLowering::LowerFormalArguments(
     // sret argument into %rax/%eax (depending on ABI) for the return. Save
     // the argument into a virtual register so that we can access it from the
     // return points.
-    if (Ins[i].Flags.isSRet()) {
+    if (Ins[I].Flags.isSRet()) {
       unsigned Reg = FuncInfo->getSRetReturnReg();
       if (!Reg) {
         MVT PtrTy = getPointerTy(DAG.getDataLayout());
         Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
         FuncInfo->setSRetReturnReg(Reg);
       }
-      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[i]);
+      SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
       break;
     }
@@ -2713,11 +2967,10 @@ SDValue X86TargetLowering::LowerFormalArguments(
   // If the function takes variable number of arguments, make a frame index for
   // the start of the first vararg value... for expansion of llvm.va_start. We
   // can skip this if there are no va_start calls.
-  if (MFI->hasVAStart() &&
+  if (MFI.hasVAStart() &&
       (Is64Bit || (CallConv != CallingConv::X86_FastCall &&
                    CallConv != CallingConv::X86_ThisCall))) {
-    FuncInfo->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(1, StackSize, true));
+    FuncInfo->setVarArgsFrameIndex(MFI.CreateFixedObject(1, StackSize, true));
   }
 
   // Figure out if XMM registers are in use.
@@ -2727,7 +2980,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
 
   // 64-bit calling conventions support varargs and register parameters, so we
   // have to do extra work to spill them in the prologue.
-  if (Is64Bit && isVarArg && MFI->hasVAStart()) {
+  if (Is64Bit && isVarArg && MFI.hasVAStart()) {
     // Find the first unallocated argument registers.
     ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
     ArrayRef<MCPhysReg> ArgXMMs = get64BitArgumentXMMs(MF, CallConv, Subtarget);
@@ -2760,7 +3013,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
       // for the return address.
       int HomeOffset = TFI.getOffsetOfLocalArea() + 8;
       FuncInfo->setRegSaveFrameIndex(
-          MFI->CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
+          MFI.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
       // Fixup to set vararg frame on shadow area (4 x i64).
       if (NumIntRegs < 4)
         FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
@@ -2770,7 +3023,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
       // they may be loaded by dereferencing the result of va_next.
       FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
       FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
-      FuncInfo->setRegSaveFrameIndex(MFI->CreateStackObject(
+      FuncInfo->setRegSaveFrameIndex(MFI.CreateStackObject(
           ArgGPRs.size() * 8 + ArgXMMs.size() * 16, 16, false));
     }
 
@@ -2810,7 +3063,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
       Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
   }
 
-  if (isVarArg && MFI->hasMustTailInVarArgFunc()) {
+  if (isVarArg && MFI.hasMustTailInVarArgFunc()) {
     // Find the largest legal vector type.
     MVT VecVT = MVT::Other;
     // FIXME: Only some x86_32 calling conventions support AVX512.
@@ -2889,7 +3142,7 @@ SDValue X86TargetLowering::LowerFormalArguments(
       // same, so the size of funclets' (mostly empty) frames is dictated by
       // how far this slot is from the bottom (since they allocate just enough
       // space to accommodate holding this slot at the correct offset).
-      int PSPSymFI = MFI->CreateStackObject(8, 8, /*isSS=*/false);
+      int PSPSymFI = MFI.CreateStackObject(8, 8, /*isSS=*/false);
       EHInfo->PSPSymFrameIdx = PSPSymFI;
     }
   }
@@ -2938,7 +3191,7 @@ static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
   if (!FPDiff) return Chain;
   // Calculate the new stack slot for the return address.
   int NewReturnAddrFI =
-    MF.getFrameInfo()->CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
+    MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
                                          false);
   SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
   Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -3029,11 +3282,17 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SmallVector<CCValAssign, 16> ArgLocs;
   CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
 
-  // Allocate shadow area for Win64
+  // Allocate shadow area for Win64.
   if (IsWin64)
     CCInfo.AllocateStack(32, 8);
 
-  CCInfo.AnalyzeCallOperands(Outs, CC_X86);
+  CCInfo.AnalyzeArguments(Outs, CC_X86);
+
+  // In vectorcall calling convention a second pass is required for the HVA
+  // types.
+  if (CallingConv::X86_VectorCall == CallConv) {
+    CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
+  }
 
   // Get a count of how many bytes are to be pushed on the stack.
   unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
@@ -3088,18 +3347,25 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   SmallVector<SDValue, 8> MemOpChains;
   SDValue StackPtr;
 
+  // The next loop assumes that the locations are in the same order of the
+  // input arguments.
+  if (!isSortedByValueNo(ArgLocs))
+    llvm_unreachable("Argument Location list must be sorted before lowering");
+
   // Walk the register/memloc assignments, inserting copies/loads.  In the case
   // of tail call optimization arguments are handle later.
   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
-  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+  for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
+       ++I, ++OutIndex) {
+    assert(OutIndex < Outs.size() && "Invalid Out index");
     // Skip inalloca arguments, they have already been written.
-    ISD::ArgFlagsTy Flags = Outs[i].Flags;
+    ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
     if (Flags.isInAlloca())
       continue;
 
-    CCValAssign &VA = ArgLocs[i];
+    CCValAssign &VA = ArgLocs[I];
     EVT RegVT = VA.getLocVT();
-    SDValue Arg = OutVals[i];
+    SDValue Arg = OutVals[OutIndex];
     bool isByVal = Flags.isByVal();
 
     // Promote the value if needed.
@@ -3115,7 +3381,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     case CCValAssign::AExt:
       if (Arg.getValueType().isVector() &&
           Arg.getValueType().getVectorElementType() == MVT::i1)
-        Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+        Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
       else if (RegVT.is128BitVector()) {
         // Special case: passing MMX values in XMM registers.
         Arg = DAG.getBitcast(MVT::i64, Arg);
@@ -3139,7 +3405,13 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     }
     }
 
-    if (VA.isRegLoc()) {
+    if (VA.needsCustom()) {
+      assert(VA.getValVT() == MVT::v64i1 &&
+             "Currently the only custom case is when we split v64i1 to 2 regs");
+      // Split v64i1 value into two registers
+      Passv64i1ArgInRegs(dl, DAG, Chain, Arg, RegsToPass, VA, ArgLocs[++I],
+                         Subtarget);
+    } else if (VA.isRegLoc()) {
       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
       if (isVarArg && IsWin64) {
         // Win64 ABI requires argument XMM reg to be copied to the corresponding
@@ -3239,20 +3511,32 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     SmallVector<SDValue, 8> MemOpChains2;
     SDValue FIN;
     int FI = 0;
-    for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
-      CCValAssign &VA = ArgLocs[i];
-      if (VA.isRegLoc())
+    for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
+         ++I, ++OutsIndex) {
+      CCValAssign &VA = ArgLocs[I];
+
+      if (VA.isRegLoc()) {
+        if (VA.needsCustom()) {
+          assert((CallConv == CallingConv::X86_RegCall) &&
+                 "Expecting custome case only in regcall calling convention");
+          // This means that we are in special case where one argument was
+          // passed through two register locations - Skip the next location
+          ++I;
+        }
+
         continue;
+      }
+
       assert(VA.isMemLoc());
-      SDValue Arg = OutVals[i];
-      ISD::ArgFlagsTy Flags = Outs[i].Flags;
+      SDValue Arg = OutVals[OutsIndex];
+      ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
       // Skip inalloca arguments.  They don't require any work.
       if (Flags.isInAlloca())
         continue;
       // Create frame index.
       int32_t Offset = VA.getLocMemOffset()+FPDiff;
       uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
-      FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true);
+      FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
       FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
 
       if (Flags.isByVal()) {
@@ -3391,7 +3675,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     // This isn't right, although it's probably harmless on x86; liveouts
     // should be computed from returns not tail calls.  Consider a void
     // function making a tail call to a function returning int.
-    MF.getFrameInfo()->setHasTailCall();
+    MF.getFrameInfo().setHasTailCall();
     return DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
   }
 
@@ -3493,9 +3777,9 @@ X86TargetLowering::GetAlignedArgumentStackSize(unsigned StackSize,
 /// same position (relatively) of the caller's incoming argument stack.
 static
 bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
-                         MachineFrameInfo *MFI, const MachineRegisterInfo *MRI,
+                         MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
                          const X86InstrInfo *TII, const CCValAssign &VA) {
-  unsigned Bytes = Arg.getValueType().getSizeInBits() / 8;
+  unsigned Bytes = Arg.getValueSizeInBits() / 8;
 
   for (;;) {
     // Look through nodes that don't alter the bits of the incoming value.
@@ -3558,22 +3842,22 @@ bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
     return false;
 
   assert(FI != INT_MAX);
-  if (!MFI->isFixedObjectIndex(FI))
+  if (!MFI.isFixedObjectIndex(FI))
     return false;
 
-  if (Offset != MFI->getObjectOffset(FI))
+  if (Offset != MFI.getObjectOffset(FI))
     return false;
 
-  if (VA.getLocVT().getSizeInBits() > Arg.getValueType().getSizeInBits()) {
+  if (VA.getLocVT().getSizeInBits() > Arg.getValueSizeInBits()) {
     // If the argument location is wider than the argument type, check that any
     // extension flags match.
-    if (Flags.isZExt() != MFI->isObjectZExt(FI) ||
-        Flags.isSExt() != MFI->isObjectSExt(FI)) {
+    if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
+        Flags.isSExt() != MFI.isObjectSExt(FI)) {
       return false;
     }
   }
 
-  return Bytes == MFI->getObjectSize(FI);
+  return Bytes == MFI.getObjectSize(FI);
 }
 
 /// Check whether the call is eligible for tail call optimization. Targets
@@ -3700,7 +3984,7 @@ bool X86TargetLowering::IsEligibleForTailCallOptimization(
     if (CCInfo.getNextStackOffset()) {
       // Check if the arguments are already laid out in the right way as
       // the caller's fixed stack objects.
-      MachineFrameInfo *MFI = MF.getFrameInfo();
+      MachineFrameInfo &MFI = MF.getFrameInfo();
       const MachineRegisterInfo *MRI = &MF.getRegInfo();
       const X86InstrInfo *TII = Subtarget.getInstrInfo();
       for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
@@ -3787,6 +4071,14 @@ static bool MayFoldIntoStore(SDValue Op) {
   return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
 }
 
+static bool MayFoldIntoZeroExtend(SDValue Op) {
+  if (Op.hasOneUse()) {
+    unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
+    return (ISD::ZERO_EXTEND == Opcode);
+  }
+  return false;
+}
+
 static bool isTargetShuffle(unsigned Opcode) {
   switch(Opcode) {
   default: return false;
@@ -3821,6 +4113,7 @@ static bool isTargetShuffle(unsigned Opcode) {
   case X86ISD::VPPERM:
   case X86ISD::VPERMV:
   case X86ISD::VPERMV3:
+  case X86ISD::VPERMIV3:
   case X86ISD::VZEXT_MOVL:
     return true;
   }
@@ -3829,41 +4122,18 @@ static bool isTargetShuffle(unsigned Opcode) {
 static bool isTargetShuffleVariableMask(unsigned Opcode) {
   switch (Opcode) {
   default: return false;
+  // Target Shuffles.
   case X86ISD::PSHUFB:
   case X86ISD::VPERMILPV:
+  case X86ISD::VPERMIL2:
+  case X86ISD::VPPERM:
+  case X86ISD::VPERMV:
+  case X86ISD::VPERMV3:
+  case X86ISD::VPERMIV3:
+    return true;
+  // 'Faux' Target Shuffles.
+  case ISD::AND:
     return true;
-  }
-}
-
-static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT,
-                                    SDValue V1, unsigned TargetMask,
-                                    SelectionDAG &DAG) {
-  switch(Opc) {
-  default: llvm_unreachable("Unknown x86 shuffle node");
-  case X86ISD::PSHUFD:
-  case X86ISD::PSHUFHW:
-  case X86ISD::PSHUFLW:
-  case X86ISD::VPERMILPI:
-  case X86ISD::VPERMI:
-    return DAG.getNode(Opc, dl, VT, V1,
-                       DAG.getConstant(TargetMask, dl, MVT::i8));
-  }
-}
-
-static SDValue getTargetShuffleNode(unsigned Opc, const SDLoc &dl, MVT VT,
-                                    SDValue V1, SDValue V2, SelectionDAG &DAG) {
-  switch(Opc) {
-  default: llvm_unreachable("Unknown x86 shuffle node");
-  case X86ISD::MOVLHPS:
-  case X86ISD::MOVLHPD:
-  case X86ISD::MOVHLPS:
-  case X86ISD::MOVLPS:
-  case X86ISD::MOVLPD:
-  case X86ISD::MOVSS:
-  case X86ISD::MOVSD:
-  case X86ISD::UNPCKL:
-  case X86ISD::UNPCKH:
-    return DAG.getNode(Opc, dl, VT, V1, V2);
   }
 }
 
@@ -3876,9 +4146,9 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
   if (ReturnAddrIndex == 0) {
     // Set up a frame object for the return address.
     unsigned SlotSize = RegInfo->getSlotSize();
-    ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize,
-                                                           -(int64_t)SlotSize,
-                                                           false);
+    ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
+                                                          -(int64_t)SlotSize,
+                                                          false);
     FuncInfo->setRAIndex(ReturnAddrIndex);
   }
 
@@ -3974,7 +4244,7 @@ static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
 /// Do a one-to-one translation of a ISD::CondCode to the X86-specific
 /// condition code, returning the condition code and the LHS/RHS of the
 /// comparison to make.
-static unsigned TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
+static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
                                bool isFP, SDValue &LHS, SDValue &RHS,
                                SelectionDAG &DAG) {
   if (!isFP) {
@@ -4175,6 +4445,10 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
   return Subtarget.hasLZCNT();
 }
 
+bool X86TargetLowering::isCtlzFast() const {
+  return Subtarget.hasFastLZCNT();
+}
+
 bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
   if (!Subtarget.hasBMI())
     return false;
@@ -4187,11 +4461,21 @@ bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
   return true;
 }
 
+/// Val is the undef sentinel value or equal to the specified value.
+static bool isUndefOrEqual(int Val, int CmpVal) {
+  return ((Val == SM_SentinelUndef) || (Val == CmpVal));
+}
+
+/// Val is either the undef or zero sentinel value.
+static bool isUndefOrZero(int Val) {
+  return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
+}
+
 /// Return true if every element in Mask, beginning
-/// from position Pos and ending in Pos+Size is undef.
+/// from position Pos and ending in Pos+Size is the undef sentinel value.
 static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
   for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
-    if (0 <= Mask[i])
+    if (Mask[i] != SM_SentinelUndef)
       return false;
   return true;
 }
@@ -4199,7 +4483,7 @@ static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
 /// Return true if Val is undef or if its value falls within the
 /// specified range (L, H].
 static bool isUndefOrInRange(int Val, int Low, int Hi) {
-  return (Val < 0) || (Val >= Low && Val < Hi);
+  return (Val == SM_SentinelUndef) || (Val >= Low && Val < Hi);
 }
 
 /// Return true if every element in Mask is undef or if its value
@@ -4212,14 +4496,19 @@ static bool isUndefOrInRange(ArrayRef<int> Mask,
   return true;
 }
 
-/// Val is either less than zero (undef) or equal to the specified value.
-static bool isUndefOrEqual(int Val, int CmpVal) {
-  return (Val < 0 || Val == CmpVal);
+/// Return true if Val is undef, zero or if its value falls within the
+/// specified range (L, H].
+static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
+  return isUndefOrZero(Val) || (Val >= Low && Val < Hi);
 }
 
-/// Val is either the undef or zero sentinel value.
-static bool isUndefOrZero(int Val) {
-  return (Val == SM_SentinelUndef || Val == SM_SentinelZero);
+/// Return true if every element in Mask is undef, zero or if its value
+/// falls within the specified range (L, H].
+static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
+  for (int M : Mask)
+    if (!isUndefOrZeroOrInRange(M, Low, Hi))
+      return false;
+  return true;
 }
 
 /// Return true if every element in Mask, beginning
@@ -4244,6 +4533,100 @@ static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
   return true;
 }
 
+/// Return true if every element in Mask, beginning
+/// from position Pos and ending in Pos+Size is undef or is zero.
+static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
+                                 unsigned Size) {
+  for (unsigned i = Pos, e = Pos + Size; i != e; ++i)
+    if (!isUndefOrZero(Mask[i]))
+      return false;
+  return true;
+}
+
+/// \brief Helper function to test whether a shuffle mask could be
+/// simplified by widening the elements being shuffled.
+///
+/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
+/// leaves it in an unspecified state.
+///
+/// NOTE: This must handle normal vector shuffle masks and *target* vector
+/// shuffle masks. The latter have the special property of a '-2' representing
+/// a zero-ed lane of a vector.
+static bool canWidenShuffleElements(ArrayRef<int> Mask,
+                                    SmallVectorImpl<int> &WidenedMask) {
+  WidenedMask.assign(Mask.size() / 2, 0);
+  for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
+    // If both elements are undef, its trivial.
+    if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
+      WidenedMask[i / 2] = SM_SentinelUndef;
+      continue;
+    }
+
+    // Check for an undef mask and a mask value properly aligned to fit with
+    // a pair of values. If we find such a case, use the non-undef mask's value.
+    if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 &&
+        Mask[i + 1] % 2 == 1) {
+      WidenedMask[i / 2] = Mask[i + 1] / 2;
+      continue;
+    }
+    if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
+      WidenedMask[i / 2] = Mask[i] / 2;
+      continue;
+    }
+
+    // When zeroing, we need to spread the zeroing across both lanes to widen.
+    if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
+      if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
+          (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
+        WidenedMask[i / 2] = SM_SentinelZero;
+        continue;
+      }
+      return false;
+    }
+
+    // Finally check if the two mask values are adjacent and aligned with
+    // a pair.
+    if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 &&
+        Mask[i] + 1 == Mask[i + 1]) {
+      WidenedMask[i / 2] = Mask[i] / 2;
+      continue;
+    }
+
+    // Otherwise we can't safely widen the elements used in this shuffle.
+    return false;
+  }
+  assert(WidenedMask.size() == Mask.size() / 2 &&
+         "Incorrect size of mask after widening the elements!");
+
+  return true;
+}
+
+/// Helper function to scale a shuffle or target shuffle mask, replacing each
+/// mask index with the scaled sequential indices for an equivalent narrowed
+/// mask. This is the reverse process to canWidenShuffleElements, but can always
+/// succeed.
+static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
+                             SmallVectorImpl<int> &ScaledMask) {
+  assert(0 < Scale && "Unexpected scaling factor");
+  int NumElts = Mask.size();
+  ScaledMask.assign(NumElts * Scale, -1);
+
+  for (int i = 0; i != NumElts; ++i) {
+    int M = Mask[i];
+
+    // Repeat sentinel values in every mask element.
+    if (M < 0) {
+      for (int s = 0; s != Scale; ++s)
+        ScaledMask[(Scale * i) + s] = M;
+      continue;
+    }
+
+    // Scale mask element and increment across each mask element.
+    for (int s = 0; s != Scale; ++s)
+      ScaledMask[(Scale * i) + s] = (Scale * M) + s;
+  }
+}
+
 /// Return true if the specified EXTRACT_SUBVECTOR operand specifies a vector
 /// extract that is suitable for instruction that extract 128 or 256 bit vectors
 static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
@@ -4256,7 +4639,7 @@ static bool isVEXTRACTIndex(SDNode *N, unsigned vecWidth) {
     cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
 
   MVT VT = N->getSimpleValueType(0);
-  unsigned ElSize = VT.getVectorElementType().getSizeInBits();
+  unsigned ElSize = VT.getScalarSizeInBits();
   bool Result = (Index * ElSize) % vecWidth == 0;
 
   return Result;
@@ -4274,7 +4657,7 @@ static bool isVINSERTIndex(SDNode *N, unsigned vecWidth) {
     cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
 
   MVT VT = N->getSimpleValueType(0);
-  unsigned ElSize = VT.getVectorElementType().getSizeInBits();
+  unsigned ElSize = VT.getScalarSizeInBits();
   bool Result = (Index * ElSize) % vecWidth == 0;
 
   return Result;
@@ -4388,6 +4771,46 @@ static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
   return ConstsNode;
 }
 
+static SDValue getConstVector(ArrayRef<APInt> Bits, SmallBitVector &Undefs,
+                              MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
+  assert(Bits.size() == Undefs.size() && "Unequal constant and undef arrays");
+  SmallVector<SDValue, 32> Ops;
+  bool Split = false;
+
+  MVT ConstVecVT = VT;
+  unsigned NumElts = VT.getVectorNumElements();
+  bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
+  if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
+    ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
+    Split = true;
+  }
+
+  MVT EltVT = ConstVecVT.getVectorElementType();
+  for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
+    if (Undefs[i]) {
+      Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
+      continue;
+    }
+    const APInt &V = Bits[i];
+    assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes");
+    if (Split) {
+      Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
+      Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
+    } else if (EltVT == MVT::f32) {
+      APFloat FV(APFloat::IEEEsingle(), V);
+      Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
+    } else if (EltVT == MVT::f64) {
+      APFloat FV(APFloat::IEEEdouble(), V);
+      Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
+    } else {
+      Ops.push_back(DAG.getConstant(V, dl, EltVT));
+    }
+  }
+
+  SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
+  return DAG.getBitcast(VT, ConstsNode);
+}
+
 /// Returns a vector of specified type with all zero elements.
 static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
                              SelectionDAG &DAG, const SDLoc &dl) {
@@ -4416,8 +4839,6 @@ static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
 
 static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
                                 const SDLoc &dl, unsigned vectorWidth) {
-  assert((vectorWidth == 128 || vectorWidth == 256) &&
-         "Unsupported vector width");
   EVT VT = Vec.getValueType();
   EVT ElVT = VT.getVectorElementType();
   unsigned Factor = VT.getSizeInBits()/vectorWidth;
@@ -4438,8 +4859,8 @@ static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
 
   // If the input is a buildvector just emit a smaller one.
   if (Vec.getOpcode() == ISD::BUILD_VECTOR)
-    return DAG.getNode(ISD::BUILD_VECTOR,
-         dl, ResultVT, makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
+    return DAG.getNode(ISD::BUILD_VECTOR, dl, ResultVT,
+                       makeArrayRef(Vec->op_begin() + IdxVal, ElemsPerChunk));
 
   SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
@@ -4694,29 +5115,35 @@ static SDValue getOnesVector(EVT VT, const X86Subtarget &Subtarget,
   return DAG.getBitcast(VT, Vec);
 }
 
+/// Generate unpacklo/unpackhi shuffle mask.
+static void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
+                                    bool Unary) {
+  assert(Mask.empty() && "Expected an empty shuffle mask vector");
+  int NumElts = VT.getVectorNumElements();
+  int NumEltsInLane = 128 / VT.getScalarSizeInBits();
+
+  for (int i = 0; i < NumElts; ++i) {
+    unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
+    int Pos = (i % NumEltsInLane) / 2 + LaneStart;
+    Pos += (Unary ? 0 : NumElts * (i % 2));
+    Pos += (Lo ? 0 : NumEltsInLane / 2);
+    Mask.push_back(Pos);
+  }
+}
+
 /// Returns a vector_shuffle node for an unpackl operation.
 static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
                           SDValue V1, SDValue V2) {
-  assert(VT.is128BitVector() && "Expected a 128-bit vector type");
-  unsigned NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> Mask(NumElems);
-  for (unsigned i = 0, e = NumElems/2; i != e; ++i) {
-    Mask[i * 2]     = i;
-    Mask[i * 2 + 1] = i + NumElems;
-  }
+  SmallVector<int, 8> Mask;
+  createUnpackShuffleMask(VT, Mask, /* Lo = */ true, /* Unary = */ false);
   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
 }
 
 /// Returns a vector_shuffle node for an unpackh operation.
 static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
                           SDValue V1, SDValue V2) {
-  assert(VT.is128BitVector() && "Expected a 128-bit vector type");
-  unsigned NumElems = VT.getVectorNumElements();
-  SmallVector<int, 8> Mask(NumElems);
-  for (unsigned i = 0, Half = NumElems/2; i != Half; ++i) {
-    Mask[i * 2]     = i + Half;
-    Mask[i * 2 + 1] = i + NumElems + Half;
-  }
+  SmallVector<int, 8> Mask;
+  createUnpackShuffleMask(VT, Mask, /* Lo = */ false, /* Unary = */ false);
   return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
 }
 
@@ -4745,6 +5172,135 @@ static SDValue peekThroughBitcasts(SDValue V) {
   return V;
 }
 
+static SDValue peekThroughOneUseBitcasts(SDValue V) {
+  while (V.getNode() && V.getOpcode() == ISD::BITCAST &&
+         V.getOperand(0).hasOneUse())
+    V = V.getOperand(0);
+  return V;
+}
+
+static const Constant *getTargetConstantFromNode(SDValue Op) {
+  Op = peekThroughBitcasts(Op);
+
+  auto *Load = dyn_cast<LoadSDNode>(Op);
+  if (!Load)
+    return nullptr;
+
+  SDValue Ptr = Load->getBasePtr();
+  if (Ptr->getOpcode() == X86ISD::Wrapper ||
+      Ptr->getOpcode() == X86ISD::WrapperRIP)
+    Ptr = Ptr->getOperand(0);
+
+  auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr);
+  if (!CNode || CNode->isMachineConstantPoolEntry())
+    return nullptr;
+
+  return dyn_cast<Constant>(CNode->getConstVal());
+}
+
+// Extract raw constant bits from constant pools.
+static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits,
+                                          SmallBitVector &UndefElts,
+                                          SmallVectorImpl<APInt> &EltBits) {
+  assert(UndefElts.empty() && "Expected an empty UndefElts vector");
+  assert(EltBits.empty() && "Expected an empty EltBits vector");
+
+  Op = peekThroughBitcasts(Op);
+
+  EVT VT = Op.getValueType();
+  unsigned SizeInBits = VT.getSizeInBits();
+  assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!");
+  unsigned NumElts = SizeInBits / EltSizeInBits;
+
+  // Extract all the undef/constant element data and pack into single bitsets.
+  APInt UndefBits(SizeInBits, 0);
+  APInt MaskBits(SizeInBits, 0);
+
+  // Split the undef/constant single bitset data into the target elements.
+  auto SplitBitData = [&]() {
+    UndefElts = SmallBitVector(NumElts, false);
+    EltBits.resize(NumElts, APInt(EltSizeInBits, 0));
+
+    for (unsigned i = 0; i != NumElts; ++i) {
+      APInt UndefEltBits = UndefBits.lshr(i * EltSizeInBits);
+      UndefEltBits = UndefEltBits.zextOrTrunc(EltSizeInBits);
+
+      // Only treat an element as UNDEF if all bits are UNDEF, otherwise
+      // treat it as zero.
+      if (UndefEltBits.isAllOnesValue()) {
+        UndefElts[i] = true;
+        continue;
+      }
+
+      APInt Bits = MaskBits.lshr(i * EltSizeInBits);
+      Bits = Bits.zextOrTrunc(EltSizeInBits);
+      EltBits[i] = Bits.getZExtValue();
+    }
+    return true;
+  };
+
+  auto ExtractConstantBits = [SizeInBits](const Constant *Cst, APInt &Mask,
+                                          APInt &Undefs) {
+    if (!Cst)
+      return false;
+    unsigned CstSizeInBits = Cst->getType()->getPrimitiveSizeInBits();
+    if (isa<UndefValue>(Cst)) {
+      Mask = APInt::getNullValue(SizeInBits);
+      Undefs = APInt::getLowBitsSet(SizeInBits, CstSizeInBits);
+      return true;
+    }
+    if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+      Mask = CInt->getValue().zextOrTrunc(SizeInBits);
+      Undefs = APInt::getNullValue(SizeInBits);
+      return true;
+    }
+    if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+      Mask = CFP->getValueAPF().bitcastToAPInt().zextOrTrunc(SizeInBits);
+      Undefs = APInt::getNullValue(SizeInBits);
+      return true;
+    }
+    return false;
+  };
+
+  // Extract constant bits from constant pool vector.
+  if (auto *Cst = getTargetConstantFromNode(Op)) {
+    Type *CstTy = Cst->getType();
+    if (!CstTy->isVectorTy() || (SizeInBits != CstTy->getPrimitiveSizeInBits()))
+      return false;
+
+    unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
+    for (unsigned i = 0, e = CstTy->getVectorNumElements(); i != e; ++i) {
+      APInt Bits, Undefs;
+      if (!ExtractConstantBits(Cst->getAggregateElement(i), Bits, Undefs))
+        return false;
+      MaskBits |= Bits.shl(i * CstEltSizeInBits);
+      UndefBits |= Undefs.shl(i * CstEltSizeInBits);
+    }
+
+    return SplitBitData();
+  }
+
+  // Extract constant bits from a broadcasted constant pool scalar.
+  if (Op.getOpcode() == X86ISD::VBROADCAST &&
+      EltSizeInBits <= Op.getScalarValueSizeInBits()) {
+    if (auto *Broadcast = getTargetConstantFromNode(Op.getOperand(0))) {
+      APInt Bits, Undefs;
+      if (ExtractConstantBits(Broadcast, Bits, Undefs)) {
+        unsigned NumBroadcastBits = Op.getScalarValueSizeInBits();
+        unsigned NumBroadcastElts = SizeInBits / NumBroadcastBits;
+        for (unsigned i = 0; i != NumBroadcastElts; ++i) {
+          MaskBits |= Bits.shl(i * NumBroadcastBits);
+          UndefBits |= Undefs.shl(i * NumBroadcastBits);
+        }
+        return SplitBitData();
+      }
+    }
+  }
+
+  return false;
+}
+
+// TODO: Merge more of this with getTargetConstantBitsFromNode.
 static bool getTargetShuffleMaskIndices(SDValue MaskNode,
                                         unsigned MaskEltSizeInBits,
                                         SmallVectorImpl<uint64_t> &RawMask) {
@@ -4752,6 +5308,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
 
   MVT VT = MaskNode.getSimpleValueType();
   assert(VT.isVector() && "Can't produce a non-vector with a build_vector!");
+  unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits;
 
   // Split an APInt element into MaskEltSizeInBits sized pieces and
   // insert into the shuffle mask.
@@ -4783,17 +5340,20 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
 
   if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL &&
       MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) {
-
-    // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0
-    if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0)
-      return false;
-    unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
-
     SDValue MaskOp = MaskNode.getOperand(0).getOperand(0);
     if (auto *CN = dyn_cast<ConstantSDNode>(MaskOp)) {
-      SplitElementToMask(CN->getAPIntValue());
-      RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
-      return true;
+      if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) {
+        RawMask.push_back(CN->getZExtValue());
+        RawMask.append(NumMaskElts - 1, 0);
+        return true;
+      }
+
+      if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) {
+        unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits;
+        SplitElementToMask(CN->getAPIntValue());
+        RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0);
+        return true;
+      }
     }
     return false;
   }
@@ -4803,8 +5363,8 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
 
   // We can always decode if the buildvector is all zero constants,
   // but can't use isBuildVectorAllZeros as it might contain UNDEFs.
-  if (llvm::all_of(MaskNode->ops(), X86::isZeroNode)) {
-    RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0);
+  if (all_of(MaskNode->ops(), X86::isZeroNode)) {
+    RawMask.append(NumMaskElts, 0);
     return true;
   }
 
@@ -4824,25 +5384,6 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode,
   return true;
 }
 
-static const Constant *getTargetShuffleMaskConstant(SDValue MaskNode) {
-  MaskNode = peekThroughBitcasts(MaskNode);
-
-  auto *MaskLoad = dyn_cast<LoadSDNode>(MaskNode);
-  if (!MaskLoad)
-    return nullptr;
-
-  SDValue Ptr = MaskLoad->getBasePtr();
-  if (Ptr->getOpcode() == X86ISD::Wrapper ||
-      Ptr->getOpcode() == X86ISD::WrapperRIP)
-    Ptr = Ptr->getOperand(0);
-
-  auto *MaskCP = dyn_cast<ConstantPoolSDNode>(Ptr);
-  if (!MaskCP || MaskCP->isMachineConstantPoolEntry())
-    return nullptr;
-
-  return dyn_cast<Constant>(MaskCP->getConstVal());
-}
-
 /// Calculates the shuffle mask corresponding to the target-specific opcode.
 /// If the mask could be calculated, returns it in \p Mask, returns the shuffle
 /// operands in \p Ops, and returns true.
@@ -4896,6 +5437,9 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
     ImmN = N->getOperand(N->getNumOperands()-1);
     DecodePALIGNRMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+    IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+    Ops.push_back(N->getOperand(1));
+    Ops.push_back(N->getOperand(0));
     break;
   case X86ISD::VSHLDQ:
     assert(VT.getScalarType() == MVT::i8 && "Byte vector expected");
@@ -4947,7 +5491,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
       DecodeVPERMILPMask(VT, RawMask, Mask);
       break;
     }
-    if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
+    if (auto *C = getTargetConstantFromNode(MaskNode)) {
       DecodeVPERMILPMask(C, MaskEltSize, Mask);
       break;
     }
@@ -4961,7 +5505,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
       DecodePSHUFBMask(RawMask, Mask);
       break;
     }
-    if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
+    if (auto *C = getTargetConstantFromNode(MaskNode)) {
       DecodePSHUFBMask(C, Mask);
       break;
     }
@@ -5010,7 +5554,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
         DecodeVPERMIL2PMask(VT, CtrlImm, RawMask, Mask);
         break;
       }
-      if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
+      if (auto *C = getTargetConstantFromNode(MaskNode)) {
         DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
         break;
       }
@@ -5025,7 +5569,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
       DecodeVPPERMMask(RawMask, Mask);
       break;
     }
-    if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
+    if (auto *C = getTargetConstantFromNode(MaskNode)) {
       DecodeVPPERMMask(C, Mask);
       break;
     }
@@ -5042,8 +5586,8 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
       DecodeVPERMVMask(RawMask, Mask);
       break;
     }
-    if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
-      DecodeVPERMVMask(C, VT, Mask);
+    if (auto *C = getTargetConstantFromNode(MaskNode)) {
+      DecodeVPERMVMask(C, MaskEltSize, Mask);
       break;
     }
     return false;
@@ -5054,8 +5598,22 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
     Ops.push_back(N->getOperand(0));
     Ops.push_back(N->getOperand(2));
     SDValue MaskNode = N->getOperand(1);
-    if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
-      DecodeVPERMV3Mask(C, VT, Mask);
+    unsigned MaskEltSize = VT.getScalarSizeInBits();
+    if (auto *C = getTargetConstantFromNode(MaskNode)) {
+      DecodeVPERMV3Mask(C, MaskEltSize, Mask);
+      break;
+    }
+    return false;
+  }
+  case X86ISD::VPERMIV3: {
+    IsUnary = IsFakeUnary = N->getOperand(1) == N->getOperand(2);
+    // Unlike most shuffle nodes, VPERMIV3's mask operand is the first one.
+    Ops.push_back(N->getOperand(1));
+    Ops.push_back(N->getOperand(2));
+    SDValue MaskNode = N->getOperand(0);
+    unsigned MaskEltSize = VT.getScalarSizeInBits();
+    if (auto *C = getTargetConstantFromNode(MaskNode)) {
+      DecodeVPERMV3Mask(C, MaskEltSize, Mask);
       break;
     }
     return false;
@@ -5069,7 +5627,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
 
   // Check if we're getting a shuffle mask with zero'd elements.
   if (!AllowSentinelZero)
-    if (llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
+    if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
       return false;
 
   // If we have a fake unary shuffle, the shuffle mask is spread across two
@@ -5101,8 +5659,9 @@ static bool setTargetShuffleZeroElements(SDValue N,
   bool IsUnary;
   if (!isTargetShuffle(N.getOpcode()))
     return false;
-  if (!getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), true, Ops,
-                            Mask, IsUnary))
+
+  MVT VT = N.getSimpleValueType();
+  if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary))
     return false;
 
   SDValue V1 = Ops[0];
@@ -5164,9 +5723,94 @@ static bool setTargetShuffleZeroElements(SDValue N,
     }
   }
 
+  assert(VT.getVectorNumElements() == Mask.size() &&
+         "Different mask size from vector size!");
   return true;
 }
 
+// Attempt to decode ops that could be represented as a shuffle mask.
+// The decoded shuffle mask may contain a different number of elements to the
+// destination value type.
+static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
+                               SmallVectorImpl<SDValue> &Ops) {
+  Mask.clear();
+  Ops.clear();
+
+  MVT VT = N.getSimpleValueType();
+  unsigned NumElts = VT.getVectorNumElements();
+  unsigned NumSizeInBits = VT.getSizeInBits();
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+  assert((NumBitsPerElt % 8) == 0 && (NumSizeInBits % 8) == 0 &&
+         "Expected byte aligned value types");
+
+  unsigned Opcode = N.getOpcode();
+  switch (Opcode) {
+  case ISD::AND: {
+    // Attempt to decode as a per-byte mask.
+    SmallBitVector UndefElts;
+    SmallVector<APInt, 32> EltBits;
+    if (!getTargetConstantBitsFromNode(N.getOperand(1), 8, UndefElts, EltBits))
+      return false;
+    for (int i = 0, e = (int)EltBits.size(); i != e; ++i) {
+      if (UndefElts[i]) {
+        Mask.push_back(SM_SentinelUndef);
+        continue;
+      }
+      uint64_t ByteBits = EltBits[i].getZExtValue();
+      if (ByteBits != 0 && ByteBits != 255)
+        return false;
+      Mask.push_back(ByteBits == 0 ? SM_SentinelZero : i);
+    }
+    Ops.push_back(N.getOperand(0));
+    return true;
+  }
+  case X86ISD::VSHLI:
+  case X86ISD::VSRLI: {
+    uint64_t ShiftVal = N.getConstantOperandVal(1);
+    // Out of range bit shifts are guaranteed to be zero.
+    if (NumBitsPerElt <= ShiftVal) {
+      Mask.append(NumElts, SM_SentinelZero);
+      return true;
+    }
+
+    // We can only decode 'whole byte' bit shifts as shuffles.
+    if ((ShiftVal % 8) != 0)
+      break;
+
+    uint64_t ByteShift = ShiftVal / 8;
+    unsigned NumBytes = NumSizeInBits / 8;
+    unsigned NumBytesPerElt = NumBitsPerElt / 8;
+    Ops.push_back(N.getOperand(0));
+
+    // Clear mask to all zeros and insert the shifted byte indices.
+    Mask.append(NumBytes, SM_SentinelZero);
+
+    if (X86ISD::VSHLI == Opcode) {
+      for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
+        for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
+          Mask[i + j] = i + j - ByteShift;
+    } else {
+      for (unsigned i = 0; i != NumBytes; i += NumBytesPerElt)
+        for (unsigned j = ByteShift; j != NumBytesPerElt; ++j)
+          Mask[i + j - ByteShift] = i + j;
+    }
+    return true;
+  }
+  case X86ISD::VZEXT: {
+    // TODO - add support for VPMOVZX with smaller input vector types.
+    SDValue Src = N.getOperand(0);
+    MVT SrcVT = Src.getSimpleValueType();
+    if (NumSizeInBits != SrcVT.getSizeInBits())
+      break;
+    DecodeZeroExtendMask(SrcVT.getScalarType(), VT, Mask);
+    Ops.push_back(Src);
+    return true;
+  }
+  }
+
+  return false;
+}
+
 /// Calls setTargetShuffleZeroElements to resolve a target shuffle mask's inputs
 /// and set the SM_SentinelUndef and SM_SentinelZero values. Then check the
 /// remaining input indices in case we now have a unary shuffle and adjust the
@@ -5176,14 +5820,14 @@ static bool resolveTargetShuffleInputs(SDValue Op, SDValue &Op0, SDValue &Op1,
                                        SmallVectorImpl<int> &Mask) {
   SmallVector<SDValue, 2> Ops;
   if (!setTargetShuffleZeroElements(Op, Mask, Ops))
-    return false;
+    if (!getFauxShuffleMask(Op, Mask, Ops))
+      return false;
 
   int NumElts = Mask.size();
-  bool Op0InUse = std::any_of(Mask.begin(), Mask.end(), [NumElts](int Idx) {
+  bool Op0InUse = any_of(Mask, [NumElts](int Idx) {
     return 0 <= Idx && Idx < NumElts;
   });
-  bool Op1InUse = std::any_of(Mask.begin(), Mask.end(),
-                              [NumElts](int Idx) { return NumElts <= Idx; });
+  bool Op1InUse = any_of(Mask, [NumElts](int Idx) { return NumElts <= Idx; });
 
   Op0 = Op0InUse ? Ops[0] : SDValue();
   Op1 = Op1InUse ? Ops[1] : SDValue();
@@ -5523,15 +6167,15 @@ static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl,
     unsigned RequiredAlign = VT.getSizeInBits()/8;
     SDValue Chain = LD->getChain();
     // Make sure the stack object alignment is at least 16 or 32.
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
     if (DAG.InferPtrAlignment(Ptr) < RequiredAlign) {
-      if (MFI->isFixedObjectIndex(FI)) {
+      if (MFI.isFixedObjectIndex(FI)) {
         // Can't change the alignment. FIXME: It's possible to compute
         // the exact stack offset and reference FI + adjust offset instead.
         // If someone *really* cares about this. That's the way to implement it.
         return SDValue();
       } else {
-        MFI->setObjectAlignment(FI, RequiredAlign);
+        MFI.setObjectAlignment(FI, RequiredAlign);
       }
     }
 
@@ -5697,11 +6341,13 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
   int LoadSize =
       (1 + LastLoadedElt - FirstLoadedElt) * LDBaseVT.getStoreSizeInBits();
 
-  // VZEXT_LOAD - consecutive load/undefs followed by zeros/undefs.
-  if (IsConsecutiveLoad && FirstLoadedElt == 0 && LoadSize == 64 &&
+  // VZEXT_LOAD - consecutive 32/64-bit load/undefs followed by zeros/undefs.
+  if (IsConsecutiveLoad && FirstLoadedElt == 0 &&
+      (LoadSize == 32 || LoadSize == 64) &&
       ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
-    MVT VecSVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64;
-    MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / 64);
+    MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSize)
+                                      : MVT::getIntegerVT(LoadSize);
+    MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSize);
     if (TLI.isTypeLegal(VecVT)) {
       SDVTList Tys = DAG.getVTList(VecVT, MVT::Other);
       SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() };
@@ -5728,31 +6374,53 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts,
     }
   }
 
-  // VZEXT_MOVL - consecutive 32-bit load/undefs followed by zeros/undefs.
-  if (IsConsecutiveLoad && FirstLoadedElt == 0 && LoadSize == 32 &&
-      ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) {
-    MVT VecSVT = VT.isFloatingPoint() ? MVT::f32 : MVT::i32;
-    MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / 32);
-    if (TLI.isTypeLegal(VecVT)) {
-      SDValue V = LastLoadedElt != 0 ? CreateLoad(VecSVT, LDBase)
-                                     : DAG.getBitcast(VecSVT, EltBase);
-      V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, V);
-      V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, V);
-      return DAG.getBitcast(VT, V);
-    }
+  return SDValue();
+}
+
+static Constant *getConstantVector(MVT VT, APInt SplatValue,
+                                   unsigned SplatBitSize, LLVMContext &C) {
+  unsigned ScalarSize = VT.getScalarSizeInBits();
+  unsigned NumElm = SplatBitSize / ScalarSize;
+
+  SmallVector<Constant *, 32> ConstantVec;
+  for (unsigned i = 0; i < NumElm; i++) {
+    APInt Val = SplatValue.lshr(ScalarSize * i).trunc(ScalarSize);
+    Constant *Const;
+    if (VT.isFloatingPoint()) {
+      assert((ScalarSize == 32 || ScalarSize == 64) &&
+             "Unsupported floating point scalar size");
+      if (ScalarSize == 32)
+        Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
+      else
+        Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
+    } else
+      Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
+    ConstantVec.push_back(Const);
   }
+  return ConstantVector::get(ArrayRef<Constant *>(ConstantVec));
+}
 
-  return SDValue();
+static bool isUseOfShuffle(SDNode *N) {
+  for (auto *U : N->uses()) {
+    if (isTargetShuffle(U->getOpcode()))
+      return true;
+    if (U->getOpcode() == ISD::BITCAST) // Ignore bitcasts
+      return isUseOfShuffle(U);
+  }
+  return false;
 }
 
 /// Attempt to use the vbroadcast instruction to generate a splat value for the
 /// following cases:
-/// 1. A splat BUILD_VECTOR which uses a single scalar load, or a constant.
+/// 1. A splat BUILD_VECTOR which uses:
+///    a. A single scalar load, or a constant.
+///    b. Repeated pattern of constants (e.g. <0,1,0,1> or <0,1,2,3,0,1,2,3>).
 /// 2. A splat shuffle which uses a scalar_to_vector node which comes from
 /// a scalar load, or a constant.
+///
 /// The VBROADCAST node is returned when a pattern is found,
 /// or SDValue() otherwise.
-static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget &Subtarget,
+static SDValue LowerVectorBroadcast(BuildVectorSDNode *BVOp, const X86Subtarget &Subtarget,
                                     SelectionDAG &DAG) {
   // VBROADCAST requires AVX.
   // TODO: Splats could be generated for non-AVX CPUs using SSE
@@ -5760,81 +6428,103 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget &Subtarget,
   if (!Subtarget.hasAVX())
     return SDValue();
 
-  MVT VT = Op.getSimpleValueType();
-  SDLoc dl(Op);
+  MVT VT = BVOp->getSimpleValueType(0);
+  SDLoc dl(BVOp);
 
   assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) &&
          "Unsupported vector type for broadcast.");
 
-  SDValue Ld;
-  bool ConstSplatVal;
-
-  switch (Op.getOpcode()) {
-    default:
-      // Unknown pattern found.
-      return SDValue();
-
-    case ISD::BUILD_VECTOR: {
-      auto *BVOp = cast<BuildVectorSDNode>(Op.getNode());
-      BitVector UndefElements;
-      SDValue Splat = BVOp->getSplatValue(&UndefElements);
-
-      // We need a splat of a single value to use broadcast, and it doesn't
-      // make any sense if the value is only in one element of the vector.
-      if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1)
+  BitVector UndefElements;
+  SDValue Ld = BVOp->getSplatValue(&UndefElements);
+
+  // We need a splat of a single value to use broadcast, and it doesn't
+  // make any sense if the value is only in one element of the vector.
+  if (!Ld || (VT.getVectorNumElements() - UndefElements.count()) <= 1) {
+    APInt SplatValue, Undef;
+    unsigned SplatBitSize;
+    bool HasUndef;
+    // Check if this is a repeated constant pattern suitable for broadcasting.
+    if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) &&
+        SplatBitSize > VT.getScalarSizeInBits() &&
+        SplatBitSize < VT.getSizeInBits()) {
+      // Avoid replacing with broadcast when it's a use of a shuffle
+      // instruction to preserve the present custom lowering of shuffles.
+      if (isUseOfShuffle(BVOp) || BVOp->hasOneUse())
         return SDValue();
-
-      Ld = Splat;
-      ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
-                       Ld.getOpcode() == ISD::ConstantFP);
-
-      // Make sure that all of the users of a non-constant load are from the
-      // BUILD_VECTOR node.
-      if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
-        return SDValue();
-      break;
-    }
-
-    case ISD::VECTOR_SHUFFLE: {
-      ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
-
-      // Shuffles must have a splat mask where the first element is
-      // broadcasted.
-      if ((!SVOp->isSplat()) || SVOp->getMaskElt(0) != 0)
-        return SDValue();
-
-      SDValue Sc = Op.getOperand(0);
-      if (Sc.getOpcode() != ISD::SCALAR_TO_VECTOR &&
-          Sc.getOpcode() != ISD::BUILD_VECTOR) {
-
-        if (!Subtarget.hasInt256())
-          return SDValue();
-
-        // Use the register form of the broadcast instruction available on AVX2.
-        if (VT.getSizeInBits() >= 256)
-          Sc = extract128BitVector(Sc, 0, DAG, dl);
-        return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Sc);
+      // replace BUILD_VECTOR with broadcast of the repeated constants.
+      const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+      LLVMContext *Ctx = DAG.getContext();
+      MVT PVT = TLI.getPointerTy(DAG.getDataLayout());
+      if (Subtarget.hasAVX()) {
+        if (SplatBitSize <= 64 && Subtarget.hasAVX2() &&
+            !(SplatBitSize == 64 && Subtarget.is32Bit())) {
+          // Splatted value can fit in one INTEGER constant in constant pool.
+          // Load the constant and broadcast it.
+          MVT CVT = MVT::getIntegerVT(SplatBitSize);
+          Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize);
+          Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue);
+          SDValue CP = DAG.getConstantPool(C, PVT);
+          unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
+
+          unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+          Ld = DAG.getLoad(
+              CVT, dl, DAG.getEntryNode(), CP,
+              MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+              Alignment);
+          SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
+                                       MVT::getVectorVT(CVT, Repeat), Ld);
+          return DAG.getBitcast(VT, Brdcst);
+        } else if (SplatBitSize == 32 || SplatBitSize == 64) {
+          // Splatted value can fit in one FLOAT constant in constant pool.
+          // Load the constant and broadcast it.
+          // AVX have support for 32 and 64 bit broadcast for floats only.
+          // No 64bit integer in 32bit subtarget.
+          MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
+          Constant *C = SplatBitSize == 32
+                            ? ConstantFP::get(Type::getFloatTy(*Ctx),
+                                              SplatValue.bitsToFloat())
+                            : ConstantFP::get(Type::getDoubleTy(*Ctx),
+                                              SplatValue.bitsToDouble());
+          SDValue CP = DAG.getConstantPool(C, PVT);
+          unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
+
+          unsigned Alignment = cast<ConstantPoolSDNode>(CP)->getAlignment();
+          Ld = DAG.getLoad(
+              CVT, dl, DAG.getEntryNode(), CP,
+              MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+              Alignment);
+          SDValue Brdcst = DAG.getNode(X86ISD::VBROADCAST, dl,
+                                       MVT::getVectorVT(CVT, Repeat), Ld);
+          return DAG.getBitcast(VT, Brdcst);
+        } else if (SplatBitSize > 64) {
+          // Load the vector of constants and broadcast it.
+          MVT CVT = VT.getScalarType();
+          Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize,
+                                             *Ctx);
+          SDValue VCP = DAG.getConstantPool(VecC, PVT);
+          unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits();
+          unsigned Alignment = cast<ConstantPoolSDNode>(VCP)->getAlignment();
+          Ld = DAG.getLoad(
+              MVT::getVectorVT(CVT, NumElm), dl, DAG.getEntryNode(), VCP,
+              MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
+              Alignment);
+          SDValue Brdcst = DAG.getNode(X86ISD::SUBV_BROADCAST, dl, VT, Ld);
+          return DAG.getBitcast(VT, Brdcst);
+        }
       }
-
-      Ld = Sc.getOperand(0);
-      ConstSplatVal = (Ld.getOpcode() == ISD::Constant ||
-                       Ld.getOpcode() == ISD::ConstantFP);
-
-      // The scalar_to_vector node and the suspected
-      // load node must have exactly one user.
-      // Constants may have multiple users.
-
-      // AVX-512 has register version of the broadcast
-      bool hasRegVer = Subtarget.hasAVX512() && VT.is512BitVector() &&
-        Ld.getValueType().getSizeInBits() >= 32;
-      if (!ConstSplatVal && ((!Sc.hasOneUse() || !Ld.hasOneUse()) &&
-          !hasRegVer))
-        return SDValue();
-      break;
     }
+    return SDValue();
   }
 
-  unsigned ScalarSize = Ld.getValueType().getSizeInBits();
+  bool ConstSplatVal =
+      (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP);
+
+  // Make sure that all of the users of a non-constant load are from the
+  // BUILD_VECTOR node.
+  if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode()))
+    return SDValue();
+
+  unsigned ScalarSize = Ld.getValueSizeInBits();
   bool IsGE256 = (VT.getSizeInBits() >= 256);
 
   // When optimizing for size, generate up to 5 extra bytes for a broadcast
@@ -6025,8 +6715,7 @@ static SDValue ConvertI1VectorToInteger(SDValue Op, SelectionDAG &DAG) {
       Immediate |= cast<ConstantSDNode>(In)->getZExtValue() << idx;
   }
   SDLoc dl(Op);
-  MVT VT =
-   MVT::getIntegerVT(std::max((int)Op.getValueType().getSizeInBits(), 8));
+  MVT VT = MVT::getIntegerVT(std::max((int)Op.getValueSizeInBits(), 8));
   return DAG.getConstant(Immediate, dl, VT);
 }
 // Lower BUILD_VECTOR operation for v8i1 and v16i1 types.
@@ -6510,17 +7199,18 @@ static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV,
 /// NOTE: Its not in our interest to start make a general purpose vectorizer
 /// from this, but enough scalar bit operations are created from the later
 /// legalization + scalarization stages to need basic support.
-static SDValue lowerBuildVectorToBitOp(SDValue Op, SelectionDAG &DAG) {
+static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
+                                       SelectionDAG &DAG) {
   SDLoc DL(Op);
-  MVT VT = Op.getSimpleValueType();
+  MVT VT = Op->getSimpleValueType(0);
   unsigned NumElems = VT.getVectorNumElements();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // Check that all elements have the same opcode.
   // TODO: Should we allow UNDEFS and if so how many?
-  unsigned Opcode = Op.getOperand(0).getOpcode();
+  unsigned Opcode = Op->getOperand(0).getOpcode();
   for (unsigned i = 1; i < NumElems; ++i)
-    if (Opcode != Op.getOperand(i).getOpcode())
+    if (Opcode != Op->getOperand(i).getOpcode())
       return SDValue();
 
   // TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
@@ -6604,9 +7294,9 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
     return AddSub;
   if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG))
     return HorizontalOp;
-  if (SDValue Broadcast = LowerVectorBroadcast(Op, Subtarget, DAG))
+  if (SDValue Broadcast = LowerVectorBroadcast(BV, Subtarget, DAG))
     return Broadcast;
-  if (SDValue BitOp = lowerBuildVectorToBitOp(Op, DAG))
+  if (SDValue BitOp = lowerBuildVectorToBitOp(BV, DAG))
     return BitOp;
 
   unsigned EVTBits = ExtVT.getSizeInBits();
@@ -6673,12 +7363,8 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
 
       if (ExtVT == MVT::i32 || ExtVT == MVT::f32 || ExtVT == MVT::f64 ||
           (ExtVT == MVT::i64 && Subtarget.is64Bit())) {
-        if (VT.is512BitVector()) {
-          SDValue ZeroVec = getZeroVector(VT, Subtarget, DAG, dl);
-          return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, ZeroVec,
-                             Item, DAG.getIntPtrConstant(0, dl));
-        }
-        assert((VT.is128BitVector() || VT.is256BitVector()) &&
+        assert((VT.is128BitVector() || VT.is256BitVector() ||
+                VT.is512BitVector()) &&
                "Expected an SSE value type!");
         Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item);
         // Turn it into a MOVL (i.e. movss, movsd, or movd) to a zero vector.
@@ -7088,6 +7774,7 @@ static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
   RepeatedMask.assign(LaneSize, -1);
   int Size = Mask.size();
   for (int i = 0; i < Size; ++i) {
+    assert(Mask[i] == SM_SentinelUndef || Mask[i] >= 0);
     if (Mask[i] < 0)
       continue;
     if ((Mask[i] % Size) / LaneSize != i / LaneSize)
@@ -7122,26 +7809,40 @@ is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask,
   return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask);
 }
 
-static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
-                             SmallVectorImpl<int> &ScaledMask) {
-  assert(0 < Scale && "Unexpected scaling factor");
-  int NumElts = Mask.size();
-  ScaledMask.assign(NumElts * Scale, -1);
-
-  for (int i = 0; i != NumElts; ++i) {
-    int M = Mask[i];
-
-    // Repeat sentinel values in every mask element.
-    if (M < 0) {
-      for (int s = 0; s != Scale; ++s)
-        ScaledMask[(Scale * i) + s] = M;
+/// Test whether a target shuffle mask is equivalent within each sub-lane.
+/// Unlike isRepeatedShuffleMask we must respect SM_SentinelZero.
+static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT,
+                                        ArrayRef<int> Mask,
+                                        SmallVectorImpl<int> &RepeatedMask) {
+  int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+  RepeatedMask.assign(LaneSize, SM_SentinelUndef);
+  int Size = Mask.size();
+  for (int i = 0; i < Size; ++i) {
+    assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0));
+    if (Mask[i] == SM_SentinelUndef)
+      continue;
+    if (Mask[i] == SM_SentinelZero) {
+      if (!isUndefOrZero(RepeatedMask[i % LaneSize]))
+        return false;
+      RepeatedMask[i % LaneSize] = SM_SentinelZero;
       continue;
     }
+    if ((Mask[i] % Size) / LaneSize != i / LaneSize)
+      // This entry crosses lanes, so there is no way to model this shuffle.
+      return false;
 
-    // Scale mask element and increment across each mask element.
-    for (int s = 0; s != Scale; ++s)
-      ScaledMask[(Scale * i) + s] = (Scale * M) + s;
+    // Ok, handle the in-lane shuffles by detecting if and when they repeat.
+    // Adjust second vector indices to start at LaneSize instead of Size.
+    int LocalM =
+        Mask[i] < Size ? Mask[i] % LaneSize : Mask[i] % LaneSize + LaneSize;
+    if (RepeatedMask[i % LaneSize] == SM_SentinelUndef)
+      // This is the first non-undef entry in this slot of a 128-bit lane.
+      RepeatedMask[i % LaneSize] = LocalM;
+    else if (RepeatedMask[i % LaneSize] != LocalM)
+      // Found a mismatch with the repeated mask.
+      return false;
   }
+  return true;
 }
 
 /// \brief Checks whether a shuffle mask is equivalent to an explicit list of
@@ -7251,7 +7952,7 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
   bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
   bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
 
-  int VectorSizeInBits = V1.getValueType().getSizeInBits();
+  int VectorSizeInBits = V1.getValueSizeInBits();
   int ScalarSizeInBits = VectorSizeInBits / Mask.size();
   assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size");
 
@@ -7309,11 +8010,11 @@ static SmallBitVector computeZeroableShuffleElements(ArrayRef<int> Mask,
   return Zeroable;
 }
 
-/// Try to lower a shuffle with a single PSHUFB of V1.
-/// This is only possible if V2 is unused (at all, or only for zero elements).
+/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
 static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
                                             ArrayRef<int> Mask, SDValue V1,
                                             SDValue V2,
+                                            const SmallBitVector &Zeroable,
                                             const X86Subtarget &Subtarget,
                                             SelectionDAG &DAG) {
   int Size = Mask.size();
@@ -7325,12 +8026,11 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
          (Subtarget.hasAVX2() && VT.is256BitVector()) ||
          (Subtarget.hasBWI() && VT.is512BitVector()));
 
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-
   SmallVector<SDValue, 64> PSHUFBMask(NumBytes);
   // Sign bit set in i8 mask means zero element.
   SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8);
 
+  SDValue V;
   for (int i = 0; i < NumBytes; ++i) {
     int M = Mask[i / NumEltBytes];
     if (M < 0) {
@@ -7341,9 +8041,13 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
       PSHUFBMask[i] = ZeroMask;
       continue;
     }
-    // Only allow V1.
-    if (M >= Size)
+
+    // We can only use a single input of V1 or V2.
+    SDValue SrcV = (M >= Size ? V2 : V1);
+    if (V && V != SrcV)
       return SDValue();
+    V = SrcV;
+    M %= Size;
 
     // PSHUFB can't cross lanes, ensure this doesn't happen.
     if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize))
@@ -7353,10 +8057,11 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
     M = M * NumEltBytes + (i % NumEltBytes);
     PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8);
   }
+  assert(V && "Failed to find a source input");
 
   MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes);
   return DAG.getBitcast(
-      VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V1),
+      VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V),
                       DAG.getBuildVector(I8VT, DL, PSHUFBMask)));
 }
 
@@ -7365,21 +8070,13 @@ static SDValue lowerVectorShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
 static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
                                            ArrayRef<int> Mask, SDValue V1,
                                            SDValue V2, SelectionDAG &DAG) {
-  int NumElts = VT.getVectorNumElements();
-  int NumEltsInLane = 128 / VT.getScalarSizeInBits();
-  SmallVector<int, 8> Unpckl(NumElts);
-  SmallVector<int, 8> Unpckh(NumElts);
-
-  for (int i = 0; i < NumElts; ++i) {
-    unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
-    int LoPos = (i % NumEltsInLane) / 2 + LaneStart + NumElts * (i % 2);
-    int HiPos = LoPos + NumEltsInLane / 2;
-    Unpckl[i] = LoPos;
-    Unpckh[i] = HiPos;
-  }
-
+  SmallVector<int, 8> Unpckl;
+  createUnpackShuffleMask(VT, Unpckl, /* Lo = */ true, /* Unary = */ false);
   if (isShuffleEquivalent(V1, V2, Mask, Unpckl))
     return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2);
+
+  SmallVector<int, 8> Unpckh;
+  createUnpackShuffleMask(VT, Unpckh, /* Lo = */ false, /* Unary = */ false);
   if (isShuffleEquivalent(V1, V2, Mask, Unpckh))
     return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2);
 
@@ -7401,19 +8098,14 @@ static SDValue lowerVectorShuffleWithUNPCK(const SDLoc &DL, MVT VT,
 /// one of the inputs being zeroable.
 static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
                                            SDValue V2, ArrayRef<int> Mask,
+                                           const SmallBitVector &Zeroable,
                                            SelectionDAG &DAG) {
+  assert(!VT.isFloatingPoint() && "Floating point types are not supported");
   MVT EltVT = VT.getVectorElementType();
-  int NumEltBits = EltVT.getSizeInBits();
-  MVT IntEltVT = MVT::getIntegerVT(NumEltBits);
-  SDValue Zero = DAG.getConstant(0, DL, IntEltVT);
-  SDValue AllOnes = DAG.getConstant(APInt::getAllOnesValue(NumEltBits), DL,
-                                    IntEltVT);
-  if (EltVT.isFloatingPoint()) {
-    Zero = DAG.getBitcast(EltVT, Zero);
-    AllOnes = DAG.getBitcast(EltVT, AllOnes);
-  }
+  SDValue Zero = DAG.getConstant(0, DL, EltVT);
+  SDValue AllOnes =
+      DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, EltVT);
   SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero);
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
   SDValue V;
   for (int i = 0, Size = Mask.size(); i < Size; ++i) {
     if (Zeroable[i])
@@ -7431,10 +8123,7 @@ static SDValue lowerVectorShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1,
     return SDValue(); // No non-zeroable elements!
 
   SDValue VMask = DAG.getBuildVector(VT, DL, VMaskOps);
-  V = DAG.getNode(VT.isFloatingPoint()
-                  ? (unsigned) X86ISD::FAND : (unsigned) ISD::AND,
-                  DL, VT, V, VMask);
-  return V;
+  return DAG.getNode(ISD::AND, DL, VT, V, VMask);
 }
 
 /// \brief Try to emit a blend instruction for a shuffle using bit math.
@@ -7476,12 +8165,12 @@ static SDValue lowerVectorShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1,
 /// that the shuffle mask is a blend, or convertible into a blend with zero.
 static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
                                          SDValue V2, ArrayRef<int> Original,
+                                         const SmallBitVector &Zeroable,
                                          const X86Subtarget &Subtarget,
                                          SelectionDAG &DAG) {
   bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode());
   bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode());
   SmallVector<int, 8> Mask(Original.begin(), Original.end());
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
   bool ForceV1Zero = false, ForceV2Zero = false;
 
   // Attempt to generate the binary blend mask. If an input is zero then
@@ -7540,7 +8229,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
   case MVT::v4i64:
   case MVT::v8i32:
     assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!");
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case MVT::v2i64:
   case MVT::v4i32:
     // If we have AVX2 it is faster to use VPBLENDD when the shuffle fits into
@@ -7556,7 +8245,7 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
           VT, DAG.getNode(X86ISD::BLENDI, DL, BlendVT, V1, V2,
                           DAG.getConstant(BlendMask, DL, MVT::i8)));
     }
-    // FALLTHROUGH
+    LLVM_FALLTHROUGH;
   case MVT::v8i16: {
     // For integer shuffles we need to expand the mask and cast the inputs to
     // v8i16s prior to blending.
@@ -7582,15 +8271,16 @@ static SDValue lowerVectorShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1,
       return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2,
                          DAG.getConstant(BlendMask, DL, MVT::i8));
     }
+    LLVM_FALLTHROUGH;
   }
-    // FALLTHROUGH
   case MVT::v16i8:
   case MVT::v32i8: {
     assert((VT.is128BitVector() || Subtarget.hasAVX2()) &&
            "256-bit byte-blends require AVX2 support!");
 
     // Attempt to lower to a bitmask if we can. VPAND is faster than VPBLENDVB.
-    if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG))
+    if (SDValue Masked =
+            lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
       return Masked;
 
     // Scale the blend by the number of bytes per element.
@@ -7704,32 +8394,12 @@ static SDValue lowerVectorShuffleAsDecomposedShuffleBlend(const SDLoc &DL,
   return DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask);
 }
 
-/// \brief Try to lower a vector shuffle as a byte rotation.
+/// \brief Try to lower a vector shuffle as a rotation.
 ///
-/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
-/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
-/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
-/// try to generically lower a vector shuffle through such an pattern. It
-/// does not check for the profitability of lowering either as PALIGNR or
-/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
-/// This matches shuffle vectors that look like:
-///
-///   v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
-///
-/// Essentially it concatenates V1 and V2, shifts right by some number of
-/// elements, and takes the low elements as the result. Note that while this is
-/// specified as a *right shift* because x86 is little-endian, it is a *left
-/// rotate* of the vector lanes.
-static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
-                                              SDValue V1, SDValue V2,
-                                              ArrayRef<int> Mask,
-                                              const X86Subtarget &Subtarget,
-                                              SelectionDAG &DAG) {
-  assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
-
+/// This is used for support PALIGNR for SSSE3 or VALIGND/Q for AVX512.
+static int matchVectorShuffleAsRotate(SDValue &V1, SDValue &V2,
+                                      ArrayRef<int> Mask) {
   int NumElts = Mask.size();
-  int NumLanes = VT.getSizeInBits() / 128;
-  int NumLaneElts = NumElts / NumLanes;
 
   // We need to detect various ways of spelling a rotation:
   //   [11, 12, 13, 14, 15,  0,  1,  2]
@@ -7740,51 +8410,46 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
   //   [-1,  4,  5,  6, -1, -1, -1, -1]
   int Rotation = 0;
   SDValue Lo, Hi;
-  for (int l = 0; l < NumElts; l += NumLaneElts) {
-    for (int i = 0; i < NumLaneElts; ++i) {
-      if (Mask[l + i] < 0)
-        continue;
-
-      // Get the mod-Size index and lane correct it.
-      int LaneIdx = (Mask[l + i] % NumElts) - l;
-      // Make sure it was in this lane.
-      if (LaneIdx < 0 || LaneIdx >= NumLaneElts)
-        return SDValue();
+  for (int i = 0; i < NumElts; ++i) {
+    int M = Mask[i];
+    assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) &&
+           "Unexpected mask index.");
+    if (M < 0)
+      continue;
 
-      // Determine where a rotated vector would have started.
-      int StartIdx = i - LaneIdx;
-      if (StartIdx == 0)
-        // The identity rotation isn't interesting, stop.
-        return SDValue();
+    // Determine where a rotated vector would have started.
+    int StartIdx = i - (M % NumElts);
+    if (StartIdx == 0)
+      // The identity rotation isn't interesting, stop.
+      return -1;
 
-      // If we found the tail of a vector the rotation must be the missing
-      // front. If we found the head of a vector, it must be how much of the
-      // head.
-      int CandidateRotation = StartIdx < 0 ? -StartIdx : NumLaneElts - StartIdx;
+    // If we found the tail of a vector the rotation must be the missing
+    // front. If we found the head of a vector, it must be how much of the
+    // head.
+    int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx;
 
-      if (Rotation == 0)
-        Rotation = CandidateRotation;
-      else if (Rotation != CandidateRotation)
-        // The rotations don't match, so we can't match this mask.
-        return SDValue();
+    if (Rotation == 0)
+      Rotation = CandidateRotation;
+    else if (Rotation != CandidateRotation)
+      // The rotations don't match, so we can't match this mask.
+      return -1;
 
-      // Compute which value this mask is pointing at.
-      SDValue MaskV = Mask[l + i] < NumElts ? V1 : V2;
-
-      // Compute which of the two target values this index should be assigned
-      // to. This reflects whether the high elements are remaining or the low
-      // elements are remaining.
-      SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
-
-      // Either set up this value if we've not encountered it before, or check
-      // that it remains consistent.
-      if (!TargetV)
-        TargetV = MaskV;
-      else if (TargetV != MaskV)
-        // This may be a rotation, but it pulls from the inputs in some
-        // unsupported interleaving.
-        return SDValue();
-    }
+    // Compute which value this mask is pointing at.
+    SDValue MaskV = M < NumElts ? V1 : V2;
+
+    // Compute which of the two target values this index should be assigned
+    // to. This reflects whether the high elements are remaining or the low
+    // elements are remaining.
+    SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+    // Either set up this value if we've not encountered it before, or check
+    // that it remains consistent.
+    if (!TargetV)
+      TargetV = MaskV;
+    else if (TargetV != MaskV)
+      // This may be a rotation, but it pulls from the inputs in some
+      // unsupported interleaving.
+      return -1;
   }
 
   // Check that we successfully analyzed the mask, and normalize the results.
@@ -7795,23 +8460,75 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
   else if (!Hi)
     Hi = Lo;
 
+  V1 = Lo;
+  V2 = Hi;
+
+  return Rotation;
+}
+
+/// \brief Try to lower a vector shuffle as a byte rotation.
+///
+/// SSSE3 has a generic PALIGNR instruction in x86 that will do an arbitrary
+/// byte-rotation of the concatenation of two vectors; pre-SSSE3 can use
+/// a PSRLDQ/PSLLDQ/POR pattern to get a similar effect. This routine will
+/// try to generically lower a vector shuffle through such an pattern. It
+/// does not check for the profitability of lowering either as PALIGNR or
+/// PSRLDQ/PSLLDQ/POR, only whether the mask is valid to lower in that form.
+/// This matches shuffle vectors that look like:
+///
+///   v8i16 [11, 12, 13, 14, 15, 0, 1, 2]
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+static int matchVectorShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2,
+                                          ArrayRef<int> Mask) {
+  // Don't accept any shuffles with zero elements.
+  if (any_of(Mask, [](int M) { return M == SM_SentinelZero; }))
+    return -1;
+
+  // PALIGNR works on 128-bit lanes.
+  SmallVector<int, 16> RepeatedMask;
+  if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask))
+    return -1;
+
+  int Rotation = matchVectorShuffleAsRotate(V1, V2, RepeatedMask);
+  if (Rotation <= 0)
+    return -1;
+
+  // PALIGNR rotates bytes, so we need to scale the
+  // rotation based on how many bytes are in the vector lane.
+  int NumElts = RepeatedMask.size();
+  int Scale = 16 / NumElts;
+  return Rotation * Scale;
+}
+
+static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
+                                              SDValue V1, SDValue V2,
+                                              ArrayRef<int> Mask,
+                                              const X86Subtarget &Subtarget,
+                                              SelectionDAG &DAG) {
+  assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!");
+
+  SDValue Lo = V1, Hi = V2;
+  int ByteRotation = matchVectorShuffleAsByteRotate(VT, Lo, Hi, Mask);
+  if (ByteRotation <= 0)
+    return SDValue();
+
   // Cast the inputs to i8 vector of correct length to match PALIGNR or
   // PSLLDQ/PSRLDQ.
-  MVT ByteVT = MVT::getVectorVT(MVT::i8, 16 * NumLanes);
+  MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8);
   Lo = DAG.getBitcast(ByteVT, Lo);
   Hi = DAG.getBitcast(ByteVT, Hi);
 
-  // The actual rotate instruction rotates bytes, so we need to scale the
-  // rotation based on how many bytes are in the vector lane.
-  int Scale = 16 / NumLaneElts;
-
   // SSSE3 targets can use the palignr instruction.
   if (Subtarget.hasSSSE3()) {
     assert((!VT.is512BitVector() || Subtarget.hasBWI()) &&
            "512-bit PALIGNR requires BWI instructions");
     return DAG.getBitcast(
         VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi,
-                        DAG.getConstant(Rotation * Scale, DL, MVT::i8)));
+                        DAG.getConstant(ByteRotation, DL, MVT::i8)));
   }
 
   assert(VT.is128BitVector() &&
@@ -7822,8 +8539,8 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
          "SSE2 rotate lowering only needed for v16i8!");
 
   // Default SSE2 implementation
-  int LoByteShift = 16 - Rotation * Scale;
-  int HiByteShift = Rotation * Scale;
+  int LoByteShift = 16 - ByteRotation;
+  int HiByteShift = ByteRotation;
 
   SDValue LoShift = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo,
                                 DAG.getConstant(LoByteShift, DL, MVT::i8));
@@ -7833,6 +8550,37 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
                         DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift));
 }
 
+/// \brief Try to lower a vector shuffle as a dword/qword rotation.
+///
+/// AVX512 has a VALIGND/VALIGNQ instructions that will do an arbitrary
+/// rotation of the concatenation of two vectors; This routine will
+/// try to generically lower a vector shuffle through such an pattern.
+///
+/// Essentially it concatenates V1 and V2, shifts right by some number of
+/// elements, and takes the low elements as the result. Note that while this is
+/// specified as a *right shift* because x86 is little-endian, it is a *left
+/// rotate* of the vector lanes.
+static SDValue lowerVectorShuffleAsRotate(const SDLoc &DL, MVT VT,
+                                          SDValue V1, SDValue V2,
+                                          ArrayRef<int> Mask,
+                                          const X86Subtarget &Subtarget,
+                                          SelectionDAG &DAG) {
+  assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) &&
+         "Only 32-bit and 64-bit elements are supported!");
+
+  // 128/256-bit vectors are only supported with VLX.
+  assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector()))
+         && "VLX required for 128/256-bit vectors");
+
+  SDValue Lo = V1, Hi = V2;
+  int Rotation = matchVectorShuffleAsRotate(Lo, Hi, Mask);
+  if (Rotation <= 0)
+    return SDValue();
+
+  return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi,
+                     DAG.getConstant(Rotation, DL, MVT::i8));
+}
+
 /// \brief Try to lower a vector shuffle as a bit shift (shifts in zeros).
 ///
 /// Attempts to match a shuffle mask against the PSLL(W/D/Q/DQ) and
@@ -7856,14 +8604,13 @@ static SDValue lowerVectorShuffleAsByteRotate(const SDLoc &DL, MVT VT,
 /// [  5, 6,  7, zz, zz, zz, zz, zz]
 /// [ -1, 5,  6,  7, zz, zz, zz, zz]
 /// [  1, 2, -1, -1, -1, -1, zz, zz]
-static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
-                                         SDValue V2, ArrayRef<int> Mask,
-                                         const X86Subtarget &Subtarget,
-                                         SelectionDAG &DAG) {
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-
+static int matchVectorShuffleAsShift(MVT &ShiftVT, unsigned &Opcode,
+                                     unsigned ScalarSizeInBits,
+                                     ArrayRef<int> Mask, int MaskOffset,
+                                     const SmallBitVector &Zeroable,
+                                     const X86Subtarget &Subtarget) {
   int Size = Mask.size();
-  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+  unsigned SizeInBits = Size * ScalarSizeInBits;
 
   auto CheckZeros = [&](int Shift, int Scale, bool Left) {
     for (int i = 0; i < Size; i += Scale)
@@ -7874,37 +8621,30 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
     return true;
   };
 
-  auto MatchShift = [&](int Shift, int Scale, bool Left, SDValue V) {
+  auto MatchShift = [&](int Shift, int Scale, bool Left) {
     for (int i = 0; i != Size; i += Scale) {
       unsigned Pos = Left ? i + Shift : i;
       unsigned Low = Left ? i : i + Shift;
       unsigned Len = Scale - Shift;
-      if (!isSequentialOrUndefInRange(Mask, Pos, Len,
-                                      Low + (V == V1 ? 0 : Size)))
-        return SDValue();
+      if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset))
+        return -1;
     }
 
-    int ShiftEltBits = VT.getScalarSizeInBits() * Scale;
+    int ShiftEltBits = ScalarSizeInBits * Scale;
     bool ByteShift = ShiftEltBits > 64;
-    unsigned OpCode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
-                           : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
-    int ShiftAmt = Shift * VT.getScalarSizeInBits() / (ByteShift ? 8 : 1);
+    Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI)
+                  : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI);
+    int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1);
 
     // Normalize the scale for byte shifts to still produce an i64 element
     // type.
     Scale = ByteShift ? Scale / 2 : Scale;
 
     // We need to round trip through the appropriate type for the shift.
-    MVT ShiftSVT = MVT::getIntegerVT(VT.getScalarSizeInBits() * Scale);
-    MVT ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8)
-                            : MVT::getVectorVT(ShiftSVT, Size / Scale);
-    assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
-           "Illegal integer vector type");
-    V = DAG.getBitcast(ShiftVT, V);
-
-    V = DAG.getNode(OpCode, DL, ShiftVT, V,
-                    DAG.getConstant(ShiftAmt, DL, MVT::i8));
-    return DAG.getBitcast(VT, V);
+    MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale);
+    ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8)
+                        : MVT::getVectorVT(ShiftSVT, Size / Scale);
+    return (int)ShiftAmt;
   };
 
   // SSE/AVX supports logical shifts up to 64-bit integers - so we can just
@@ -7913,29 +8653,64 @@ static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
   // their width within the elements of the larger integer vector. Test each
   // multiple to see if we can find a match with the moved element indices
   // and that the shifted in elements are all zeroable.
-  unsigned MaxWidth = (VT.is512BitVector() && !Subtarget.hasBWI() ? 64 : 128);
-  for (int Scale = 2; Scale * VT.getScalarSizeInBits() <= MaxWidth; Scale *= 2)
+  unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128);
+  for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2)
     for (int Shift = 1; Shift != Scale; ++Shift)
       for (bool Left : {true, false})
-        if (CheckZeros(Shift, Scale, Left))
-          for (SDValue V : {V1, V2})
-            if (SDValue Match = MatchShift(Shift, Scale, Left, V))
-              return Match;
+        if (CheckZeros(Shift, Scale, Left)) {
+          int ShiftAmt = MatchShift(Shift, Scale, Left);
+          if (0 < ShiftAmt)
+            return ShiftAmt;
+        }
 
   // no match
-  return SDValue();
+  return -1;
+}
+
+static SDValue lowerVectorShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1,
+                                         SDValue V2, ArrayRef<int> Mask,
+                                         const SmallBitVector &Zeroable,
+                                         const X86Subtarget &Subtarget,
+                                         SelectionDAG &DAG) {
+  int Size = Mask.size();
+  assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+
+  MVT ShiftVT;
+  SDValue V = V1;
+  unsigned Opcode;
+
+  // Try to match shuffle against V1 shift.
+  int ShiftAmt = matchVectorShuffleAsShift(
+      ShiftVT, Opcode, VT.getScalarSizeInBits(), Mask, 0, Zeroable, Subtarget);
+
+  // If V1 failed, try to match shuffle against V2 shift.
+  if (ShiftAmt < 0) {
+    ShiftAmt =
+        matchVectorShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(),
+                                  Mask, Size, Zeroable, Subtarget);
+    V = V2;
+  }
+
+  if (ShiftAmt < 0)
+    return SDValue();
+
+  assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) &&
+         "Illegal integer vector type");
+  V = DAG.getBitcast(ShiftVT, V);
+  V = DAG.getNode(Opcode, DL, ShiftVT, V,
+                  DAG.getConstant(ShiftAmt, DL, MVT::i8));
+  return DAG.getBitcast(VT, V);
 }
 
 /// \brief Try to lower a vector shuffle using SSE4a EXTRQ/INSERTQ.
 static SDValue lowerVectorShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1,
                                            SDValue V2, ArrayRef<int> Mask,
+                                           const SmallBitVector &Zeroable,
                                            SelectionDAG &DAG) {
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-  assert(!Zeroable.all() && "Fully zeroable shuffle mask");
-
   int Size = Mask.size();
   int HalfSize = Size / 2;
   assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size");
+  assert(!Zeroable.all() && "Fully zeroable shuffle mask");
 
   // Upper half must be undefined.
   if (!isUndefInRange(Mask, HalfSize, HalfSize))
@@ -8111,8 +8886,10 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
     InputV = ShuffleOffset(InputV);
 
     // For 256-bit vectors, we only need the lower (128-bit) input half.
-    if (VT.is256BitVector())
-      InputV = extract128BitVector(InputV, 0, DAG, DL);
+    // For 512-bit vectors, we only need the lower input half or quarter.
+    if (VT.getSizeInBits() > 128)
+      InputV = extractSubVector(InputV, 0, DAG, DL,
+                                std::max(128, (int)VT.getSizeInBits() / Scale));
 
     InputV = DAG.getNode(X86ISD::VZEXT, DL, ExtVT, InputV);
     return DAG.getBitcast(VT, InputV);
@@ -8231,9 +9008,8 @@ static SDValue lowerVectorShuffleAsSpecificZeroOrAnyExtend(
 /// are both incredibly common and often quite performance sensitive.
 static SDValue lowerVectorShuffleAsZeroOrAnyExtend(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
-
+    const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+    SelectionDAG &DAG) {
   int Bits = VT.getSizeInBits();
   int NumLanes = Bits / 128;
   int NumElements = VT.getVectorNumElements();
@@ -8388,14 +9164,14 @@ static bool isShuffleFoldableLoad(SDValue V) {
 /// across all subtarget feature sets.
 static SDValue lowerVectorShuffleAsElementInsertion(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    const X86Subtarget &Subtarget, SelectionDAG &DAG) {
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+    const SmallBitVector &Zeroable, const X86Subtarget &Subtarget,
+    SelectionDAG &DAG) {
   MVT ExtVT = VT;
   MVT EltVT = VT.getVectorElementType();
 
-  int V2Index = std::find_if(Mask.begin(), Mask.end(),
-                             [&Mask](int M) { return M >= (int)Mask.size(); }) -
-                Mask.begin();
+  int V2Index =
+      find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) -
+      Mask.begin();
   bool IsV1Zeroable = true;
   for (int i = 0, Size = Mask.size(); i < Size; ++i)
     if (i != V2Index && !Zeroable[i]) {
@@ -8709,6 +9485,13 @@ static SDValue lowerVectorShuffleAsBroadcast(const SDLoc &DL, MVT VT,
     V = DAG.getBitcast(SrcVT, V);
   }
 
+  // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+  if (!Subtarget.is64Bit() && SrcVT == MVT::i64) {
+    V = DAG.getBitcast(MVT::f64, V);
+    unsigned NumBroadcastElts = BroadcastVT.getVectorNumElements();
+    BroadcastVT = MVT::getVectorVT(MVT::f64, NumBroadcastElts);
+  }
+
   return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V));
 }
 
@@ -8726,71 +9509,93 @@ static bool matchVectorShuffleAsInsertPS(SDValue &V1, SDValue &V2,
   assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!");
   assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!");
   assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
-  unsigned ZMask = 0;
-  int V1DstIndex = -1;
-  int V2DstIndex = -1;
-  bool V1UsedInPlace = false;
 
-  for (int i = 0; i < 4; ++i) {
-    // Synthesize a zero mask from the zeroable elements (includes undefs).
-    if (Zeroable[i]) {
-      ZMask |= 1 << i;
-      continue;
-    }
+  // Attempt to match INSERTPS with one element from VA or VB being
+  // inserted into VA (or undef). If successful, V1, V2 and InsertPSMask
+  // are updated.
+  auto matchAsInsertPS = [&](SDValue VA, SDValue VB,
+                             ArrayRef<int> CandidateMask) {
+    unsigned ZMask = 0;
+    int VADstIndex = -1;
+    int VBDstIndex = -1;
+    bool VAUsedInPlace = false;
+
+    for (int i = 0; i < 4; ++i) {
+      // Synthesize a zero mask from the zeroable elements (includes undefs).
+      if (Zeroable[i]) {
+        ZMask |= 1 << i;
+        continue;
+      }
 
-    // Flag if we use any V1 inputs in place.
-    if (i == Mask[i]) {
-      V1UsedInPlace = true;
-      continue;
+      // Flag if we use any VA inputs in place.
+      if (i == CandidateMask[i]) {
+        VAUsedInPlace = true;
+        continue;
+      }
+
+      // We can only insert a single non-zeroable element.
+      if (VADstIndex >= 0 || VBDstIndex >= 0)
+        return false;
+
+      if (CandidateMask[i] < 4) {
+        // VA input out of place for insertion.
+        VADstIndex = i;
+      } else {
+        // VB input for insertion.
+        VBDstIndex = i;
+      }
     }
 
-    // We can only insert a single non-zeroable element.
-    if (V1DstIndex >= 0 || V2DstIndex >= 0)
+    // Don't bother if we have no (non-zeroable) element for insertion.
+    if (VADstIndex < 0 && VBDstIndex < 0)
       return false;
 
-    if (Mask[i] < 4) {
-      // V1 input out of place for insertion.
-      V1DstIndex = i;
+    // Determine element insertion src/dst indices. The src index is from the
+    // start of the inserted vector, not the start of the concatenated vector.
+    unsigned VBSrcIndex = 0;
+    if (VADstIndex >= 0) {
+      // If we have a VA input out of place, we use VA as the V2 element
+      // insertion and don't use the original V2 at all.
+      VBSrcIndex = CandidateMask[VADstIndex];
+      VBDstIndex = VADstIndex;
+      VB = VA;
     } else {
-      // V2 input for insertion.
-      V2DstIndex = i;
+      VBSrcIndex = CandidateMask[VBDstIndex] - 4;
     }
-  }
 
-  // Don't bother if we have no (non-zeroable) element for insertion.
-  if (V1DstIndex < 0 && V2DstIndex < 0)
-    return false;
+    // If no V1 inputs are used in place, then the result is created only from
+    // the zero mask and the V2 insertion - so remove V1 dependency.
+    if (!VAUsedInPlace)
+      VA = DAG.getUNDEF(MVT::v4f32);
 
-  // Determine element insertion src/dst indices. The src index is from the
-  // start of the inserted vector, not the start of the concatenated vector.
-  unsigned V2SrcIndex = 0;
-  if (V1DstIndex >= 0) {
-    // If we have a V1 input out of place, we use V1 as the V2 element insertion
-    // and don't use the original V2 at all.
-    V2SrcIndex = Mask[V1DstIndex];
-    V2DstIndex = V1DstIndex;
-    V2 = V1;
-  } else {
-    V2SrcIndex = Mask[V2DstIndex] - 4;
-  }
+    // Update V1, V2 and InsertPSMask accordingly.
+    V1 = VA;
+    V2 = VB;
 
-  // If no V1 inputs are used in place, then the result is created only from
-  // the zero mask and the V2 insertion - so remove V1 dependency.
-  if (!V1UsedInPlace)
-    V1 = DAG.getUNDEF(MVT::v4f32);
+    // Insert the V2 element into the desired position.
+    InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask;
+    assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
+    return true;
+  };
 
-  // Insert the V2 element into the desired position.
-  InsertPSMask = V2SrcIndex << 6 | V2DstIndex << 4 | ZMask;
-  assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!");
-  return true;
+  if (matchAsInsertPS(V1, V2, Mask))
+    return true;
+
+  // Commute and try again.
+  SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end());
+  ShuffleVectorSDNode::commuteMask(CommutedMask);
+  if (matchAsInsertPS(V2, V1, CommutedMask))
+    return true;
+
+  return false;
 }
 
 static SDValue lowerVectorShuffleAsInsertPS(const SDLoc &DL, SDValue V1,
                                             SDValue V2, ArrayRef<int> Mask,
+                                            const SmallBitVector &Zeroable,
                                             SelectionDAG &DAG) {
   assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
   assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!");
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
 
   // Attempt to match the insertps pattern.
   unsigned InsertPSMask;
@@ -8922,6 +9727,7 @@ static SDValue lowerVectorShuffleAsPermuteAndUnpack(const SDLoc &DL, MVT VT,
 /// it is better to avoid lowering through this for integer vectors where
 /// possible.
 static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -8946,8 +9752,11 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                          DAG.getConstant(SHUFPDMask, DL, MVT::i8));
     }
 
-    return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V1,
-                       DAG.getConstant(SHUFPDMask, DL, MVT::i8));
+    return DAG.getNode(
+        X86ISD::SHUFP, DL, MVT::v2f64,
+        Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
+        Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1,
+        DAG.getConstant(SHUFPDMask, DL, MVT::i8));
   }
   assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!");
   assert(Mask[1] >= 2 && "Non-canonicalized blend!");
@@ -8955,14 +9764,14 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // If we have a single input, insert that into V1 if we can do so cheaply.
   if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1) {
     if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            DL, MVT::v2f64, V1, V2, Mask, Subtarget, DAG))
+            DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return Insertion;
     // Try inverting the insertion since for v2 masks it is easy to do and we
     // can't reliably sort the mask one way or the other.
     int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2),
                           Mask[1] < 0 ? -1 : (Mask[1] ^ 2)};
     if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-            DL, MVT::v2f64, V2, V1, InverseMask, Subtarget, DAG))
+            DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
       return Insertion;
   }
 
@@ -8980,7 +9789,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   if (Subtarget.hasSSE41())
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
       return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -9000,6 +9809,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// it falls back to the floating point shuffle operation with appropriate bit
 /// casting.
 static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -9052,19 +9862,19 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // When loading a scalar and then shuffling it into a vector we can often do
   // the insertion cheaply.
   if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-          DL, MVT::v2i64, V1, V2, Mask, Subtarget, DAG))
+          DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return Insertion;
   // Try inverting the insertion since for v2 masks it is easy to do and we
   // can't reliably sort the mask one way or the other.
   int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2};
   if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-          DL, MVT::v2i64, V2, V1, InverseMask, Subtarget, DAG))
+          DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG))
     return Insertion;
 
   // We have different paths for blend lowering, but they all must use the
@@ -9072,7 +9882,7 @@ static SDValue lowerV2I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
       return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -9139,9 +9949,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
 
   if (NumV2Elements == 1) {
-    int V2Index =
-        std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) -
-        Mask.begin();
+    int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin();
 
     // Compute the index adjacent to V2Index and in the same half by toggling
     // the low bit.
@@ -9220,6 +10028,7 @@ static SDValue lowerVectorShuffleWithSHUFPS(const SDLoc &DL, MVT VT,
 /// domain crossing penalties, as these are sufficient to implement all v4f32
 /// shuffles.
 static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -9262,17 +10071,18 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // when the V2 input is targeting element 0 of the mask -- that is the fast
   // case here.
   if (NumV2Elements == 1 && Mask[0] >= 4)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v4f32, V1, V2,
-                                                         Mask, Subtarget, DAG))
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+            DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
   if (Subtarget.hasSSE41()) {
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
       return Blend;
 
     // Use INSERTPS if we can complete the shuffle efficiently.
-    if (SDValue V = lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, DAG))
+    if (SDValue V =
+            lowerVectorShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG))
       return V;
 
     if (!isSingleSHUFPSMask(Mask))
@@ -9301,6 +10111,7 @@ static SDValue lowerV4F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// We try to handle these with integer-domain shuffles where we can, but for
 /// blends we use the floating point domain blend instructions.
 static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -9311,8 +10122,8 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
-                                                         Mask, Subtarget, DAG))
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; });
@@ -9341,13 +10152,13 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v4i32, V1, V2,
-                                                         Mask, Subtarget, DAG))
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+            DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
   // We have different paths for blend lowering, but they all must use the
@@ -9355,11 +10166,11 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
       return Blend;
 
-  if (SDValue Masked =
-          lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, DAG))
+  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask,
+                                                   Zeroable, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -9374,26 +10185,31 @@ static SDValue lowerV4I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
             DL, MVT::v4i32, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
 
-  // If we have direct support for blends, we should lower by decomposing into
-  // a permute. That will be faster than the domain cross.
-  if (IsBlendSupported)
-    return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
-                                                      Mask, DAG);
-
-  // Try to lower by permuting the inputs into an unpack instruction.
-  if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1,
-                                                            V2, Mask, DAG))
-    return Unpack;
+  // Assume that a single SHUFPS is faster than an alternative sequence of
+  // multiple instructions (even if the CPU has a domain penalty).
+  // If some CPU is harmed by the domain switch, we can fix it in a later pass.
+  if (!isSingleSHUFPSMask(Mask)) {
+    // If we have direct support for blends, we should lower by decomposing into
+    // a permute. That will be faster than the domain cross.
+    if (IsBlendSupported)
+      return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v4i32, V1, V2,
+                                                        Mask, DAG);
+
+    // Try to lower by permuting the inputs into an unpack instruction.
+    if (SDValue Unpack = lowerVectorShuffleAsPermuteAndUnpack(
+            DL, MVT::v4i32, V1, V2, Mask, DAG))
+      return Unpack;
+  }
 
   // We implement this with SHUFPS because it can blend from two vectors.
   // Because we're going to eventually use SHUFPS, we use SHUFPS even to build
   // up the inputs, bypassing domain shift penalties that we would encur if we
   // directly used PSHUFD on Nehalem and older. For newer chips, this isn't
   // relevant.
-  return DAG.getBitcast(
-      MVT::v4i32,
-      DAG.getVectorShuffle(MVT::v4f32, DL, DAG.getBitcast(MVT::v4f32, V1),
-                           DAG.getBitcast(MVT::v4f32, V2), Mask));
+  SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1);
+  SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2);
+  SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask);
+  return DAG.getBitcast(MVT::v4i32, ShufPS);
 }
 
 /// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2
@@ -9551,18 +10367,15 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
         auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord,
                                                        ArrayRef<int> Inputs) {
           int FixIdx = PinnedIdx ^ 1; // The adjacent slot to the pinned slot.
-          bool IsFixIdxInput = std::find(Inputs.begin(), Inputs.end(),
-                                         PinnedIdx ^ 1) != Inputs.end();
+          bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1);
           // Determine whether the free index is in the flipped dword or the
           // unflipped dword based on where the pinned index is. We use this bit
           // in an xor to conditionally select the adjacent dword.
           int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord));
-          bool IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
-                                             FixFreeIdx) != Inputs.end();
+          bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
           if (IsFixIdxInput == IsFixFreeIdxInput)
             FixFreeIdx += 1;
-          IsFixFreeIdxInput = std::find(Inputs.begin(), Inputs.end(),
-                                        FixFreeIdx) != Inputs.end();
+          IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx);
           assert(IsFixIdxInput != IsFixFreeIdxInput &&
                  "We need to be changing the number of flipped inputs!");
           int PSHUFHalfMask[] = {0, 1, 2, 3};
@@ -9734,9 +10547,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
     // by inputs being moved and *staying* in that half.
     if (IncomingInputs.size() == 1) {
       if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) {
-        int InputFixed = std::find(std::begin(SourceHalfMask),
-                                   std::end(SourceHalfMask), -1) -
-                         std::begin(SourceHalfMask) + SourceOffset;
+        int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) +
+                         SourceOffset;
         SourceHalfMask[InputFixed - SourceOffset] =
             IncomingInputs[0] - SourceOffset;
         std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0],
@@ -9868,8 +10680,8 @@ static SDValue lowerV8I16GeneralSingleInputVectorShuffle(
 /// blend if only one input is used.
 static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
     const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask,
-    SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) {
-  SmallBitVector Zeroable = computeZeroableShuffleElements(Mask, V1, V2);
+    const SmallBitVector &Zeroable, SelectionDAG &DAG, bool &V1InUse,
+    bool &V2InUse) {
   SDValue V1Mask[16];
   SDValue V2Mask[16];
   V1InUse = false;
@@ -9929,6 +10741,7 @@ static SDValue lowerVectorShuffleAsBlendOfPSHUFBs(
 /// halves of the inputs separately (making them have relatively few inputs)
 /// and then concatenate them.
 static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -9939,7 +10752,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative.
   if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v8i16, V1, V2, Mask, Subtarget, DAG))
+          DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; });
@@ -9952,7 +10765,7 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
     // Try to use shift instructions.
     if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
       return Shift;
 
     // Use dedicated unpack instructions for masks that match their pattern.
@@ -9978,18 +10791,19 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, DAG))
+    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask,
+                                                Zeroable, DAG))
       return V;
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Inputs == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v8i16, V1, V2,
-                                                         Mask, Subtarget, DAG))
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+            DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
   // We have different paths for blend lowering, but they all must use the
@@ -9997,11 +10811,11 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   bool IsBlendSupported = Subtarget.hasSSE41();
   if (IsBlendSupported)
     if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask,
-                                                  Subtarget, DAG))
+                                                  Zeroable, Subtarget, DAG))
       return Blend;
 
-  if (SDValue Masked =
-          lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, DAG))
+  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask,
+                                                   Zeroable, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -10027,14 +10841,14 @@ static SDValue lowerV8I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // can both shuffle and set up the inefficient blend.
   if (!IsBlendSupported && Subtarget.hasSSSE3()) {
     bool V1InUse, V2InUse;
-    return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, DAG,
-                                              V1InUse, V2InUse);
+    return lowerVectorShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask,
+                                              Zeroable, DAG, V1InUse, V2InUse);
   }
 
   // We can always bit-blend if we have to so the fallback strategy is to
   // decompose into single-input permutes and blends.
   return lowerVectorShuffleAsDecomposedShuffleBlend(DL, MVT::v8i16, V1, V2,
-                                                      Mask, DAG);
+                                                    Mask, DAG);
 }
 
 /// \brief Check whether a compaction lowering can be done by dropping even
@@ -10111,6 +10925,7 @@ static int canLowerByDroppingEvenElements(ArrayRef<int> Mask,
 /// the existing lowering for v8i16 blends on each half, finally PACK-ing them
 /// back together.
 static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -10120,7 +10935,7 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
@@ -10130,12 +10945,13 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use a zext lowering.
   if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
-          DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG))
+          DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   // See if we can use SSE4A Extraction / Insertion.
   if (Subtarget.hasSSE4A())
-    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, DAG))
+    if (SDValue V = lowerVectorShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask,
+                                                Zeroable, DAG))
       return V;
 
   int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; });
@@ -10238,8 +11054,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
       return V;
   }
 
-  if (SDValue Masked =
-          lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, DAG))
+  if (SDValue Masked = lowerVectorShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask,
+                                                   Zeroable, DAG))
     return Masked;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -10265,15 +11081,15 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     bool V2InUse = false;
 
     SDValue PSHUFB = lowerVectorShuffleAsBlendOfPSHUFBs(
-        DL, MVT::v16i8, V1, V2, Mask, DAG, V1InUse, V2InUse);
+        DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse);
 
     // If both V1 and V2 are in use and we can use a direct blend or an unpack,
     // do so. This avoids using them to handle blends-with-zero which is
     // important as a single pshufb is significantly faster for that.
     if (V1InUse && V2InUse) {
       if (Subtarget.hasSSE41())
-        if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i8, V1, V2,
-                                                      Mask, Subtarget, DAG))
+        if (SDValue Blend = lowerVectorShuffleAsBlend(
+                DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
           return Blend;
 
       // We can use an unpack to do the blending rather than an or in some
@@ -10294,8 +11110,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // There are special ways we can lower some single-element blends.
   if (NumV2Elements == 1)
-    if (SDValue V = lowerVectorShuffleAsElementInsertion(DL, MVT::v16i8, V1, V2,
-                                                         Mask, Subtarget, DAG))
+    if (SDValue V = lowerVectorShuffleAsElementInsertion(
+            DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return V;
 
   if (SDValue BitBlend =
@@ -10349,22 +11165,18 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // with a pack.
   SDValue V = V1;
 
-  int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
-  int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1};
+  std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
+  std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}};
   for (int i = 0; i < 16; ++i)
     if (Mask[i] >= 0)
       (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i];
 
-  SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL);
-
   SDValue VLoHalf, VHiHalf;
   // Check if any of the odd lanes in the v16i8 are used. If not, we can mask
   // them out and avoid using UNPCK{L,H} to extract the elements of V as
   // i16s.
-  if (std::none_of(std::begin(LoBlendMask), std::end(LoBlendMask),
-                   [](int M) { return M >= 0 && M % 2 == 1; }) &&
-      std::none_of(std::begin(HiBlendMask), std::end(HiBlendMask),
-                   [](int M) { return M >= 0 && M % 2 == 1; })) {
+  if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) &&
+      none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) {
     // Use a mask to drop the high bytes.
     VLoHalf = DAG.getBitcast(MVT::v8i16, V);
     VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf,
@@ -10383,6 +11195,8 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   } else {
     // Otherwise just unpack the low half of V into VLoHalf and the high half into
     // VHiHalf so that we can blend them as i16s.
+    SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL);
+
     VLoHalf = DAG.getBitcast(
         MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero));
     VHiHalf = DAG.getBitcast(
@@ -10401,83 +11215,28 @@ static SDValue lowerV16I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// dispatches to the lowering routines accordingly.
 static SDValue lower128BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                         MVT VT, SDValue V1, SDValue V2,
+                                        const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   switch (VT.SimpleTy) {
   case MVT::v2i64:
-    return lowerV2I64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV2I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v2f64:
-    return lowerV2F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV2F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v4i32:
-    return lowerV4I32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV4I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v4f32:
-    return lowerV4F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV4F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8i16:
-    return lowerV8I16VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV8I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16i8:
-    return lowerV16I8VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV16I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
 
   default:
     llvm_unreachable("Unimplemented!");
   }
 }
 
-/// \brief Helper function to test whether a shuffle mask could be
-/// simplified by widening the elements being shuffled.
-///
-/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
-/// leaves it in an unspecified state.
-///
-/// NOTE: This must handle normal vector shuffle masks and *target* vector
-/// shuffle masks. The latter have the special property of a '-2' representing
-/// a zero-ed lane of a vector.
-static bool canWidenShuffleElements(ArrayRef<int> Mask,
-                                    SmallVectorImpl<int> &WidenedMask) {
-  WidenedMask.assign(Mask.size() / 2, 0);
-  for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
-    // If both elements are undef, its trivial.
-    if (Mask[i] == SM_SentinelUndef && Mask[i + 1] == SM_SentinelUndef) {
-      WidenedMask[i/2] = SM_SentinelUndef;
-      continue;
-    }
-
-    // Check for an undef mask and a mask value properly aligned to fit with
-    // a pair of values. If we find such a case, use the non-undef mask's value.
-    if (Mask[i] == SM_SentinelUndef && Mask[i + 1] >= 0 && Mask[i + 1] % 2 == 1) {
-      WidenedMask[i/2] = Mask[i + 1] / 2;
-      continue;
-    }
-    if (Mask[i + 1] == SM_SentinelUndef && Mask[i] >= 0 && Mask[i] % 2 == 0) {
-      WidenedMask[i/2] = Mask[i] / 2;
-      continue;
-    }
-
-    // When zeroing, we need to spread the zeroing across both lanes to widen.
-    if (Mask[i] == SM_SentinelZero || Mask[i + 1] == SM_SentinelZero) {
-      if ((Mask[i] == SM_SentinelZero || Mask[i] == SM_SentinelUndef) &&
-          (Mask[i + 1] == SM_SentinelZero || Mask[i + 1] == SM_SentinelUndef)) {
-        WidenedMask[i/2] = SM_SentinelZero;
-        continue;
-      }
-      return false;
-    }
-
-    // Finally check if the two mask values are adjacent and aligned with
-    // a pair.
-    if (Mask[i] != SM_SentinelUndef && Mask[i] % 2 == 0 && Mask[i] + 1 == Mask[i + 1]) {
-      WidenedMask[i/2] = Mask[i] / 2;
-      continue;
-    }
-
-    // Otherwise we can't safely widen the elements used in this shuffle.
-    return false;
-  }
-  assert(WidenedMask.size() == Mask.size() / 2 &&
-         "Incorrect size of mask after widening the elements!");
-
-  return true;
-}
-
 /// \brief Generic routine to split vector shuffle into half-sized shuffles.
 ///
 /// This routine just extracts two subvectors, shuffles them independently, and
@@ -10712,6 +11471,7 @@ static SDValue lowerVectorShuffleAsLanePermuteAndBlend(const SDLoc &DL, MVT VT,
 /// \brief Handle lowering 2-lane 128-bit shuffles.
 static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
                                         SDValue V2, ArrayRef<int> Mask,
+                                        const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   // TODO: If minimizing size and one of the inputs is a zero vector and the
@@ -10720,7 +11480,7 @@ static SDValue lowerV2X128VectorShuffle(const SDLoc &DL, MVT VT, SDValue V1,
 
   // Blends are faster and handle all the non-lane-crossing cases.
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, VT, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   bool IsV1Zero = ISD::isBuildVectorAllZeros(V1.getNode());
@@ -11178,35 +11938,65 @@ static SDValue lowerShuffleAsRepeatedMaskAndLanePermute(
                               SubLaneMask);
 }
 
-static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
-                                            ArrayRef<int> Mask, SDValue V1,
-                                            SDValue V2, SelectionDAG &DAG) {
+static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
+                                         unsigned &ShuffleImm,
+                                         ArrayRef<int> Mask) {
+  int NumElts = VT.getVectorNumElements();
+  assert(VT.getScalarType() == MVT::f64 &&
+         (NumElts == 2 || NumElts == 4 || NumElts == 8) &&
+         "Unexpected data type for VSHUFPD");
 
   // Mask for V8F64: 0/1,  8/9,  2/3,  10/11, 4/5, ..
   // Mask for V4F64; 0/1,  4/5,  2/3,  6/7..
-  assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD");
-  int NumElts = VT.getVectorNumElements();
+  ShuffleImm = 0;
   bool ShufpdMask = true;
   bool CommutableMask = true;
-  unsigned Immediate = 0;
   for (int i = 0; i < NumElts; ++i) {
-    if (Mask[i] < 0)
+    if (Mask[i] == SM_SentinelUndef)
       continue;
+    if (Mask[i] < 0)
+      return false;
     int Val = (i & 6) + NumElts * (i & 1);
-    int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1);
-    if (Mask[i] < Val ||  Mask[i] > Val + 1)
+    int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
+    if (Mask[i] < Val || Mask[i] > Val + 1)
       ShufpdMask = false;
-    if (Mask[i] < CommutVal ||  Mask[i] > CommutVal + 1)
+    if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
       CommutableMask = false;
-    Immediate |= (Mask[i] % 2) << i;
+    ShuffleImm |= (Mask[i] % 2) << i;
   }
+
   if (ShufpdMask)
-    return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
-                       DAG.getConstant(Immediate, DL, MVT::i8));
-  if (CommutableMask)
-    return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
-                       DAG.getConstant(Immediate, DL, MVT::i8));
-  return SDValue();
+    return true;
+  if (CommutableMask) {
+    std::swap(V1, V2);
+    return true;
+  }
+
+  return false;
+}
+
+static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
+                                            ArrayRef<int> Mask, SDValue V1,
+                                            SDValue V2, SelectionDAG &DAG) {
+  unsigned Immediate = 0;
+  if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
+    return SDValue();
+
+  return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
+                     DAG.getConstant(Immediate, DL, MVT::i8));
+}
+
+static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
+                                           ArrayRef<int> Mask, SDValue V1,
+                                           SDValue V2, SelectionDAG &DAG) {
+  MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
+  MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
+
+  SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
+  if (V2.isUndef())
+    return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
+
+  return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
 }
 
 /// \brief Handle lowering of 4-lane 64-bit floating point shuffles.
@@ -11214,6 +12004,7 @@ static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
 /// Also ends up handling lowering of 4-lane 64-bit integer shuffles when AVX2
 /// isn't available.
 static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -11224,7 +12015,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   SmallVector<int, 4> WidenedMask;
   if (canWidenShuffleElements(Mask, WidenedMask))
     if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4f64, V1, V2, Mask,
-                                             Subtarget, DAG))
+                                             Zeroable, Subtarget, DAG))
       return V;
 
   if (V2.isUndef()) {
@@ -11268,7 +12059,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     return V;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check if the blend happens to exactly fit that of SHUFPD.
@@ -11280,7 +12071,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // the results into the target lanes.
   if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
           DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG))
-  return V;
+    return V;
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle. However, if we have AVX2 and either inputs are already in place,
@@ -11307,6 +12098,7 @@ static SDValue lowerV4F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v4i64 shuffling..
 static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -11318,11 +12110,11 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   SmallVector<int, 4> WidenedMask;
   if (canWidenShuffleElements(Mask, WidenedMask))
     if (SDValue V = lowerV2X128VectorShuffle(DL, MVT::v4i64, V1, V2, Mask,
-                                             Subtarget, DAG))
+                                             Zeroable, Subtarget, DAG))
       return V;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
@@ -11352,9 +12144,20 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
+  // If we have VLX support, we can use VALIGN.
+  if (Subtarget.hasVLX())
+    if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v4i64, V1, V2,
+                                                    Mask, Subtarget, DAG))
+      return Rotate;
+
+  // Try to use PALIGNR.
+  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v4i64, V1, V2,
+                                                      Mask, Subtarget, DAG))
+    return Rotate;
+
   // Use dedicated unpack instructions for masks that match their pattern.
   if (SDValue V =
           lowerVectorShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG))
@@ -11364,8 +12167,8 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // shuffle. However, if we have AVX2 and either inputs are already in place,
   // we will be able to shuffle even across lanes the other input in a single
   // instruction so skip this pattern.
-  if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) ||
-                                 isShuffleMaskInputInPlace(1, Mask))))
+  if (!isShuffleMaskInputInPlace(0, Mask) &&
+      !isShuffleMaskInputInPlace(1, Mask))
     if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
             DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG))
       return Result;
@@ -11380,6 +12183,7 @@ static SDValue lowerV4I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// Also ends up handling lowering of 8-lane 32-bit integer shuffles when AVX2
 /// isn't available.
 static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -11388,7 +12192,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!");
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
@@ -11432,17 +12236,12 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // If we have a single input shuffle with different shuffle patterns in the
   // two 128-bit lanes use the variable mask to VPERMILPS.
   if (V2.isUndef()) {
-    SDValue VPermMask[8];
-    for (int i = 0; i < 8; ++i)
-      VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
-                                 : DAG.getConstant(Mask[i], DL, MVT::i32);
+    SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
     if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask))
-      return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1,
-                         DAG.getBuildVector(MVT::v8i32, DL, VPermMask));
+      return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask);
 
     if (Subtarget.hasAVX2())
-      return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32,
-                         DAG.getBuildVector(MVT::v8i32, DL, VPermMask), V1);
+      return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1);
 
     // Otherwise, fall back.
     return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v8f32, V1, V2, Mask,
@@ -11470,6 +12269,7 @@ static SDValue lowerV8F32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v8i32 shuffling..
 static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -11481,12 +12281,12 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2,
-                                                         Mask, Subtarget, DAG))
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v8i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   // Check for being able to broadcast a single element.
@@ -11498,7 +12298,9 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // efficient instructions that mirror the shuffles across the two 128-bit
   // lanes.
   SmallVector<int, 4> RepeatedMask;
-  if (is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask)) {
+  bool Is128BitLaneRepeatedShuffle =
+      is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask);
+  if (Is128BitLaneRepeatedShuffle) {
     assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
     if (V2.isUndef())
       return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1,
@@ -11512,16 +12314,22 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
+  // If we have VLX support, we can use VALIGN.
+  if (Subtarget.hasVLX())
+    if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i32, V1, V2,
+                                                    Mask, Subtarget, DAG))
+      return Rotate;
+
   // Try to use byte rotation instructions.
   if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
           DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
     return Rotate;
 
   // Try to create an in-lane repeating shuffle mask and then shuffle the
-  // the results into the target lanes.
+  // results into the target lanes.
   if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute(
           DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG))
     return V;
@@ -11529,12 +12337,19 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // If the shuffle patterns aren't repeated but it is a single input, directly
   // generate a cross-lane VPERMD instruction.
   if (V2.isUndef()) {
-    SDValue VPermMask[8];
-    for (int i = 0; i < 8; ++i)
-      VPermMask[i] = Mask[i] < 0 ? DAG.getUNDEF(MVT::i32)
-                                 : DAG.getConstant(Mask[i], DL, MVT::i32);
-    return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32,
-                       DAG.getBuildVector(MVT::v8i32, DL, VPermMask), V1);
+    SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true);
+    return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1);
+  }
+
+  // Assume that a single SHUFPS is faster than an alternative sequence of
+  // multiple instructions (even if the CPU has a domain penalty).
+  // If some CPU is harmed by the domain switch, we can fix it in a later pass.
+  if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
+    SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1);
+    SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2);
+    SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask,
+                                                  CastV1, CastV2, DAG);
+    return DAG.getBitcast(MVT::v8i32, ShufPS);
   }
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -11553,6 +12368,7 @@ static SDValue lowerV8I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v16i16 shuffling..
 static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                        const SmallBitVector &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -11564,8 +12380,8 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v16i16, V1, V2,
-                                                         Mask, Subtarget, DAG))
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Check for being able to broadcast a single element.
@@ -11574,7 +12390,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     return Broadcast;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -11584,7 +12400,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
@@ -11615,10 +12431,14 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     }
   }
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1,
-                                                    V2, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
+          DL, MVT::v16i16, Mask, V1, V2, Zeroable, Subtarget, DAG))
     return PSHUFB;
 
+  // AVX512BWVL can lower to VPERMW.
+  if (Subtarget.hasBWI() && Subtarget.hasVLX())
+    return lowerVectorShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, DAG);
+
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
   // shuffle.
   if (SDValue Result = lowerVectorShuffleByMerging128BitLanes(
@@ -11634,6 +12454,7 @@ static SDValue lowerV16I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// This routine is only called when we have AVX2 and thus a reasonable
 /// instruction set for v32i8 shuffling..
 static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -11645,8 +12466,8 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   // Whenever we can lower this as a zext, that instruction is strictly faster
   // than any alternative. It also allows us to fold memory operands into the
   // shuffle in many cases.
-  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2,
-                                                         Mask, Subtarget, DAG))
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v32i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
     return ZExt;
 
   // Check for being able to broadcast a single element.
@@ -11655,7 +12476,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     return Broadcast;
 
   if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Blend;
 
   // Use dedicated unpack instructions for masks that match their pattern.
@@ -11665,7 +12486,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
@@ -11685,8 +12506,8 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     return lowerVectorShuffleAsLanePermuteAndBlend(DL, MVT::v32i8, V1, V2, Mask,
                                                    DAG);
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1,
-                                                    V2, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
+          DL, MVT::v32i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
     return PSHUFB;
 
   // Try to simplify this by merging 128-bit lanes to enable a lane-based
@@ -11706,6 +12527,7 @@ static SDValue lowerV32I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// together based on the available instructions.
 static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                         MVT VT, SDValue V1, SDValue V2,
+                                        const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   // If we have a single input to the zero element, insert that into V1 if we
@@ -11715,7 +12537,7 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   if (NumV2Elements == 1 && Mask[0] >= NumElts)
     if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
-                              DL, VT, V1, V2, Mask, Subtarget, DAG))
+            DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
       return Insertion;
 
   // Handle special cases where the lower or upper half is UNDEF.
@@ -11734,7 +12556,8 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     if (ElementBits < 32) {
       // No floating point type available, if we can't use the bit operations
       // for masking/blending then decompose into 128-bit vectors.
-      if (SDValue V = lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, DAG))
+      if (SDValue V =
+              lowerVectorShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, DAG))
         return V;
       if (SDValue V = lowerVectorShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG))
         return V;
@@ -11750,17 +12573,17 @@ static SDValue lower256BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   switch (VT.SimpleTy) {
   case MVT::v4f64:
-    return lowerV4F64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV4F64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v4i64:
-    return lowerV4I64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV4I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8f32:
-    return lowerV8F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV8F32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v8i32:
-    return lowerV8I32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV8I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16i16:
-    return lowerV16I16VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV16I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v32i8:
-    return lowerV32I8VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV32I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
 
   default:
     llvm_unreachable("Not a valid 256-bit x86 vector type!");
@@ -11815,22 +12638,6 @@ static SDValue lowerV4X128VectorShuffle(const SDLoc &DL, MVT VT,
                      DAG.getConstant(PermMask, DL, MVT::i8));
 }
 
-static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
-                                           ArrayRef<int> Mask, SDValue V1,
-                                           SDValue V2, SelectionDAG &DAG) {
-
-  assert(VT.getScalarSizeInBits() >= 16 && "Unexpected data type for PERMV");
-
-  MVT MaskEltVT = MVT::getIntegerVT(VT.getScalarSizeInBits());
-  MVT MaskVecVT = MVT::getVectorVT(MaskEltVT, VT.getVectorNumElements());
-
-  SDValue MaskNode = getConstVector(Mask, MaskVecVT, DAG, DL, true);
-  if (V2.isUndef())
-    return DAG.getNode(X86ISD::VPERMV, DL, VT, MaskNode, V1);
-
-  return DAG.getNode(X86ISD::VPERMV3, DL, VT, V1, MaskNode, V2);
-}
-
 /// \brief Handle lowering of 8-lane 64-bit floating point shuffles.
 static SDValue lowerV8F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                        SDValue V1, SDValue V2,
@@ -11917,6 +12724,7 @@ static SDValue lowerV16F32VectorShuffle(SDLoc DL, ArrayRef<int> Mask,
 
 /// \brief Handle lowering of 8-lane 64-bit integer shuffles.
 static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -11951,9 +12759,19 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
+  // Try to use VALIGN.
+  if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v8i64, V1, V2,
+                                                  Mask, Subtarget, DAG))
+    return Rotate;
+
+  // Try to use PALIGNR.
+  if (SDValue Rotate = lowerVectorShuffleAsByteRotate(DL, MVT::v8i64, V1, V2,
+                                                      Mask, Subtarget, DAG))
+    return Rotate;
+
   if (SDValue Unpck =
           lowerVectorShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG))
     return Unpck;
@@ -11963,6 +12781,7 @@ static SDValue lowerV8I64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
 /// \brief Handle lowering of 16-lane 32-bit integer shuffles.
 static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                        const SmallBitVector &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -11970,11 +12789,20 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!");
   assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!");
 
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operands into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    return ZExt;
+
   // If the shuffle mask is repeated in each 128-bit lane we can use more
   // efficient instructions that mirror the shuffles across the four 128-bit
   // lanes.
   SmallVector<int, 4> RepeatedMask;
-  if (is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask)) {
+  bool Is128BitLaneRepeatedShuffle =
+      is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask);
+  if (Is128BitLaneRepeatedShuffle) {
     assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!");
     if (V2.isUndef())
       return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1,
@@ -11988,20 +12816,36 @@ static SDValue lowerV16I32VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
+  // Try to use VALIGN.
+  if (SDValue Rotate = lowerVectorShuffleAsRotate(DL, MVT::v16i32, V1, V2,
+                                                  Mask, Subtarget, DAG))
+    return Rotate;
+
   // Try to use byte rotation instructions.
   if (Subtarget.hasBWI())
     if (SDValue Rotate = lowerVectorShuffleAsByteRotate(
             DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG))
       return Rotate;
 
+  // Assume that a single SHUFPS is faster than using a permv shuffle.
+  // If some CPU is harmed by the domain switch, we can fix it in a later pass.
+  if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) {
+    SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1);
+    SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2);
+    SDValue ShufPS = lowerVectorShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask,
+                                                  CastV1, CastV2, DAG);
+    return DAG.getBitcast(MVT::v16i32, ShufPS);
+  }
+
   return lowerVectorShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, DAG);
 }
 
 /// \brief Handle lowering of 32-lane 16-bit integer shuffles.
 static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                        const SmallBitVector &Zeroable,
                                         SDValue V1, SDValue V2,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
@@ -12010,6 +12854,13 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!");
   assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!");
 
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operands into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    return ZExt;
+
   // Use dedicated unpack instructions for masks that match their pattern.
   if (SDValue V =
           lowerVectorShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG))
@@ -12017,7 +12868,7 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
@@ -12041,6 +12892,7 @@ static SDValue lowerV32I16VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
 /// \brief Handle lowering of 64-lane 8-bit integer shuffles.
 static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
+                                       const SmallBitVector &Zeroable,
                                        SDValue V1, SDValue V2,
                                        const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
@@ -12049,6 +12901,13 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!");
   assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!");
 
+  // Whenever we can lower this as a zext, that instruction is strictly faster
+  // than any alternative. It also allows us to fold memory operands into the
+  // shuffle in many cases.
+  if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(
+          DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG))
+    return ZExt;
+
   // Use dedicated unpack instructions for masks that match their pattern.
   if (SDValue V =
           lowerVectorShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG))
@@ -12056,7 +12915,7 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 
   // Try to use shift instructions.
   if (SDValue Shift = lowerVectorShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask,
-                                                Subtarget, DAG))
+                                                Zeroable, Subtarget, DAG))
     return Shift;
 
   // Try to use byte rotation instructions.
@@ -12064,10 +12923,14 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
           DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG))
     return Rotate;
 
-  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1,
-                                                    V2, Subtarget, DAG))
+  if (SDValue PSHUFB = lowerVectorShuffleWithPSHUFB(
+          DL, MVT::v64i8, Mask, V1, V2, Zeroable, Subtarget, DAG))
     return PSHUFB;
 
+  // VBMI can use VPERMV/VPERMV3 byte shuffles.
+  if (Subtarget.hasVBMI())
+    return lowerVectorShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, DAG);
+
   // FIXME: Implement direct support for this type!
   return splitAndLowerVectorShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG);
 }
@@ -12079,11 +12942,22 @@ static SDValue lowerV64I8VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
 /// together based on the available instructions.
 static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
                                         MVT VT, SDValue V1, SDValue V2,
+                                        const SmallBitVector &Zeroable,
                                         const X86Subtarget &Subtarget,
                                         SelectionDAG &DAG) {
   assert(Subtarget.hasAVX512() &&
          "Cannot lower 512-bit vectors w/ basic ISA!");
 
+  // If we have a single input to the zero element, insert that into V1 if we
+  // can do so cheaply.
+  int NumElts = Mask.size();
+  int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; });
+
+  if (NumV2Elements == 1 && Mask[0] >= NumElts)
+    if (SDValue Insertion = lowerVectorShuffleAsElementInsertion(
+            DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG))
+      return Insertion;
+
   // Check for being able to broadcast a single element.
   if (SDValue Broadcast =
           lowerVectorShuffleAsBroadcast(DL, VT, V1, V2, Mask, Subtarget, DAG))
@@ -12099,13 +12973,13 @@ static SDValue lower512BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
   case MVT::v16f32:
     return lowerV16F32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
   case MVT::v8i64:
-    return lowerV8I64VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV8I64VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v16i32:
-    return lowerV16I32VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV16I32VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v32i16:
-    return lowerV32I16VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV32I16VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
   case MVT::v64i8:
-    return lowerV64I8VectorShuffle(DL, Mask, V1, V2, Subtarget, DAG);
+    return lowerV64I8VectorShuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG);
 
   default:
     llvm_unreachable("Not a valid 512-bit x86 vector type!");
@@ -12161,9 +13035,81 @@ static SDValue lower1BitVectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
     V2 = getOnesVector(ExtVT, Subtarget, DAG, DL);
   else
     V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2);
-  return DAG.getNode(ISD::TRUNCATE, DL, VT,
-                     DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask));
+
+  SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask);
+  // i1 was sign extended we can use X86ISD::CVT2MASK.
+  int NumElems = VT.getVectorNumElements();
+  if ((Subtarget.hasBWI() && (NumElems >= 32)) ||
+      (Subtarget.hasDQI() && (NumElems < 32)))
+    return DAG.getNode(X86ISD::CVT2MASK, DL, VT, Shuffle);
+
+  return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle);
 }
+
+/// Helper function that returns true if the shuffle mask should be
+/// commuted to improve canonicalization.
+static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) {
+  int NumElements = Mask.size();
+
+  int NumV1Elements = 0, NumV2Elements = 0, NumSentinelElements = 0;
+  for (int M : Mask)
+    if (M < 0)
+      ++NumSentinelElements;
+    else if (M < NumElements)
+      ++NumV1Elements;
+    else
+      ++NumV2Elements;
+
+  // Commute the shuffle as needed such that more elements come from V1 than
+  // V2. This allows us to match the shuffle pattern strictly on how many
+  // elements come from V1 without handling the symmetric cases.
+  if (NumV2Elements > NumV1Elements)
+    return true;
+
+  assert(NumV1Elements > 0 && "No V1 indices");
+
+  if (NumV2Elements == 0)
+    return false;
+
+  // When the number of V1 and V2 elements are the same, try to minimize the
+  // number of uses of V2 in the low half of the vector. When that is tied,
+  // ensure that the sum of indices for V1 is equal to or lower than the sum
+  // indices for V2. When those are equal, try to ensure that the number of odd
+  // indices for V1 is lower than the number of odd indices for V2.
+  if (NumV1Elements == NumV2Elements) {
+    int LowV1Elements = 0, LowV2Elements = 0;
+    for (int M : Mask.slice(0, NumElements / 2))
+      if (M >= NumElements)
+        ++LowV2Elements;
+      else if (M >= 0)
+        ++LowV1Elements;
+    if (LowV2Elements > LowV1Elements)
+      return true;
+    if (LowV2Elements == LowV1Elements) {
+      int SumV1Indices = 0, SumV2Indices = 0;
+      for (int i = 0, Size = Mask.size(); i < Size; ++i)
+        if (Mask[i] >= NumElements)
+          SumV2Indices += i;
+        else if (Mask[i] >= 0)
+          SumV1Indices += i;
+      if (SumV2Indices < SumV1Indices)
+        return true;
+      if (SumV2Indices == SumV1Indices) {
+        int NumV1OddIndices = 0, NumV2OddIndices = 0;
+        for (int i = 0, Size = Mask.size(); i < Size; ++i)
+          if (Mask[i] >= NumElements)
+            NumV2OddIndices += i % 2;
+          else if (Mask[i] >= 0)
+            NumV1OddIndices += i % 2;
+        if (NumV2OddIndices < NumV1OddIndices)
+          return true;
+      }
+    }
+  }
+
+  return false;
+}
+
 /// \brief Top-level lowering for x86 vector shuffles.
 ///
 /// This handles decomposition, canonicalization, and lowering of all x86
@@ -12209,6 +13155,12 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
         return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask);
       }
 
+  // Check for illegal shuffle mask element index values.
+  int MaskUpperLimit = Mask.size() * (V2IsUndef ? 1 : 2); (void)MaskUpperLimit;
+  assert(llvm::all_of(Mask,
+                      [&](int M) { return -1 <= M && M < MaskUpperLimit; }) &&
+         "Out of bounds shuffle index");
+
   // We actually see shuffles that are entirely re-arrangements of a set of
   // zero inputs. This mostly happens while decomposing complex shuffles into
   // simple ones. Directly lower these as a buildvector of zeros.
@@ -12237,69 +13189,22 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget &Subtarget,
     }
   }
 
-  int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0;
-  for (int M : Mask)
-    if (M < 0)
-      ++NumUndefElements;
-    else if (M < NumElements)
-      ++NumV1Elements;
-    else
-      ++NumV2Elements;
-
-  // Commute the shuffle as needed such that more elements come from V1 than
-  // V2. This allows us to match the shuffle pattern strictly on how many
-  // elements come from V1 without handling the symmetric cases.
-  if (NumV2Elements > NumV1Elements)
+  // Commute the shuffle if it will improve canonicalization.
+  if (canonicalizeShuffleMaskWithCommute(Mask))
     return DAG.getCommutedVectorShuffle(*SVOp);
 
-  assert(NumV1Elements > 0 && "No V1 indices");
-  assert((NumV2Elements > 0 || V2IsUndef) && "V2 not undef, but not used");
-
-  // When the number of V1 and V2 elements are the same, try to minimize the
-  // number of uses of V2 in the low half of the vector. When that is tied,
-  // ensure that the sum of indices for V1 is equal to or lower than the sum
-  // indices for V2. When those are equal, try to ensure that the number of odd
-  // indices for V1 is lower than the number of odd indices for V2.
-  if (NumV1Elements == NumV2Elements) {
-    int LowV1Elements = 0, LowV2Elements = 0;
-    for (int M : Mask.slice(0, NumElements / 2))
-      if (M >= NumElements)
-        ++LowV2Elements;
-      else if (M >= 0)
-        ++LowV1Elements;
-    if (LowV2Elements > LowV1Elements)
-      return DAG.getCommutedVectorShuffle(*SVOp);
-    if (LowV2Elements == LowV1Elements) {
-      int SumV1Indices = 0, SumV2Indices = 0;
-      for (int i = 0, Size = Mask.size(); i < Size; ++i)
-        if (Mask[i] >= NumElements)
-          SumV2Indices += i;
-        else if (Mask[i] >= 0)
-          SumV1Indices += i;
-      if (SumV2Indices < SumV1Indices)
-        return DAG.getCommutedVectorShuffle(*SVOp);
-      if (SumV2Indices == SumV1Indices) {
-        int NumV1OddIndices = 0, NumV2OddIndices = 0;
-        for (int i = 0, Size = Mask.size(); i < Size; ++i)
-          if (Mask[i] >= NumElements)
-            NumV2OddIndices += i % 2;
-          else if (Mask[i] >= 0)
-            NumV1OddIndices += i % 2;
-        if (NumV2OddIndices < NumV1OddIndices)
-          return DAG.getCommutedVectorShuffle(*SVOp);
-      }
-    }
-  }
-
   // For each vector width, delegate to a specialized lowering routine.
   if (VT.is128BitVector())
-    return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
+    return lower128BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
+                                    DAG);
 
   if (VT.is256BitVector())
-    return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
+    return lower256BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
+                                    DAG);
 
   if (VT.is512BitVector())
-    return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
+    return lower512BitVectorShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget,
+                                    DAG);
 
   if (Is1BitVector)
     return lower1BitVectorShuffle(DL, Mask, VT, V1, V2, Subtarget, DAG);
@@ -12392,21 +13297,6 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
     return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
   }
 
-  if (VT.getSizeInBits() == 16) {
-    // If Idx is 0, it's cheaper to do a move instead of a pextrw.
-    if (isNullConstant(Op.getOperand(1)))
-      return DAG.getNode(
-          ISD::TRUNCATE, dl, MVT::i16,
-          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
-                      DAG.getBitcast(MVT::v4i32, Op.getOperand(0)),
-                      Op.getOperand(1)));
-    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
-                                  Op.getOperand(0), Op.getOperand(1));
-    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
-                                  DAG.getValueType(VT));
-    return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
-  }
-
   if (VT == MVT::f32) {
     // EXTRACTPS outputs to a GPR32 register which will require a movd to copy
     // the result back to FR32 register. It's only worth matching if the
@@ -12432,6 +13322,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) {
     if (isa<ConstantSDNode>(Op.getOperand(1)))
       return Op;
   }
+
   return SDValue();
 }
 
@@ -12460,7 +13351,8 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
   }
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-  if (!Subtarget.hasDQI() && (VecVT.getVectorNumElements() <= 8)) {
+  if ((!Subtarget.hasDQI() && (VecVT.getVectorNumElements() == 8)) ||
+      (VecVT.getVectorNumElements() < 8)) {
     // Use kshiftlw/rw instruction.
     VecVT = MVT::v16i1;
     Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT,
@@ -12469,8 +13361,9 @@ X86TargetLowering::ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const
                       DAG.getIntPtrConstant(0, dl));
   }
   unsigned MaxSift = VecVT.getVectorNumElements() - 1;
-  Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
-                    DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
+  if (MaxSift - IdxVal)
+    Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+                      DAG.getConstant(MaxSift - IdxVal, dl, MVT::i8));
   Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
                     DAG.getConstant(MaxSift, dl, MVT::i8));
   return DAG.getNode(X86ISD::VEXTRACT, dl, MVT::i1, Vec,
@@ -12491,10 +13384,10 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   if (!isa<ConstantSDNode>(Idx)) {
     if (VecVT.is512BitVector() ||
         (VecVT.is256BitVector() && Subtarget.hasInt256() &&
-         VecVT.getVectorElementType().getSizeInBits() == 32)) {
+         VecVT.getScalarSizeInBits() == 32)) {
 
       MVT MaskEltVT =
-        MVT::getIntegerVT(VecVT.getVectorElementType().getSizeInBits());
+        MVT::getIntegerVT(VecVT.getScalarSizeInBits());
       MVT MaskVT = MVT::getVectorVT(MaskEltVT, VecVT.getSizeInBits() /
                                     MaskEltVT.getSizeInBits());
 
@@ -12531,26 +13424,31 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
 
   assert(VecVT.is128BitVector() && "Unexpected vector length");
 
-  if (Subtarget.hasSSE41())
-    if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
-      return Res;
-
   MVT VT = Op.getSimpleValueType();
-  // TODO: handle v16i8.
+
   if (VT.getSizeInBits() == 16) {
-    if (IdxVal == 0)
+    // If IdxVal is 0, it's cheaper to do a move instead of a pextrw, unless
+    // we're going to zero extend the register or fold the store (SSE41 only).
+    if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) &&
+        !(Subtarget.hasSSE41() && MayFoldIntoStore(Op)))
       return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16,
                          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32,
                                      DAG.getBitcast(MVT::v4i32, Vec), Idx));
 
     // Transform it so it match pextrw which produces a 32-bit result.
-    MVT EltVT = MVT::i32;
-    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, EltVT, Vec, Idx);
-    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, EltVT, Extract,
+    SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32,
+                                  Op.getOperand(0), Op.getOperand(1));
+    SDValue Assert  = DAG.getNode(ISD::AssertZext, dl, MVT::i32, Extract,
                                   DAG.getValueType(VT));
     return DAG.getNode(ISD::TRUNCATE, dl, VT, Assert);
   }
 
+  if (Subtarget.hasSSE41())
+    if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG))
+      return Res;
+
+  // TODO: handle v16i8.
+
   if (VT.getSizeInBits() == 32) {
     if (IdxVal == 0)
       return Op;
@@ -12604,12 +13502,46 @@ X86TargetLowering::InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Elt);
-  if (IdxVal)
+  unsigned NumElems = VecVT.getVectorNumElements();
+
+  if(Vec.isUndef()) {
+    if (IdxVal)
+      EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
+                             DAG.getConstant(IdxVal, dl, MVT::i8));
+    return EltInVec;
+  }
+
+  // Insertion of one bit into first or last position
+  // can be done with two SHIFTs + OR.
+  if (IdxVal == 0 ) {
+    // EltInVec already at correct index and other bits are 0.
+    // Clean the first bit in source vector.
+    Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+                      DAG.getConstant(1 , dl, MVT::i8));
+    Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+                      DAG.getConstant(1, dl, MVT::i8));
+
+    return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
+  }
+  if (IdxVal == NumElems -1) {
+    // Move the bit to the last position inside the vector.
     EltInVec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, EltInVec,
                            DAG.getConstant(IdxVal, dl, MVT::i8));
-  if (Vec.isUndef())
-    return EltInVec;
-  return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
+    // Clean the last bit in the source vector.
+    Vec = DAG.getNode(X86ISD::VSHLI, dl, VecVT, Vec,
+                           DAG.getConstant(1, dl, MVT::i8));
+    Vec = DAG.getNode(X86ISD::VSRLI, dl, VecVT, Vec,
+                           DAG.getConstant(1 , dl, MVT::i8));
+
+    return DAG.getNode(ISD::OR, dl, VecVT, Vec, EltInVec);
+  }
+
+  // Use shuffle to insert element.
+  SmallVector<int, 64> MaskVec(NumElems);
+  for (unsigned i = 0; i != NumElems; ++i)
+    MaskVec[i] = (i == IdxVal) ? NumElems : i;
+
+  return DAG.getVectorShuffle(VecVT, dl, Vec, EltInVec, MaskVec);
 }
 
 SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
@@ -12764,10 +13696,6 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
     return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl);
   }
 
-  if (OpVT == MVT::v1i64 &&
-      Op.getOperand(0).getValueType() == MVT::i64)
-    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op.getOperand(0));
-
   SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0));
   assert(OpVT.is128BitVector() && "Expected an SSE type!");
   return DAG.getBitcast(
@@ -12779,25 +13707,32 @@ static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) {
 // upper bits of a vector.
 static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
+  assert(Subtarget.hasAVX() && "EXTRACT_SUBVECTOR requires AVX");
+
   SDLoc dl(Op);
   SDValue In =  Op.getOperand(0);
   SDValue Idx = Op.getOperand(1);
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-  MVT ResVT   = Op.getSimpleValueType();
-  MVT InVT    = In.getSimpleValueType();
+  MVT ResVT = Op.getSimpleValueType();
 
-  if (Subtarget.hasFp256()) {
-    if (ResVT.is128BitVector() &&
-        (InVT.is256BitVector() || InVT.is512BitVector()) &&
-        isa<ConstantSDNode>(Idx)) {
-      return extract128BitVector(In, IdxVal, DAG, dl);
-    }
-    if (ResVT.is256BitVector() && InVT.is512BitVector() &&
-        isa<ConstantSDNode>(Idx)) {
-      return extract256BitVector(In, IdxVal, DAG, dl);
-    }
-  }
-  return SDValue();
+  assert((In.getSimpleValueType().is256BitVector() ||
+          In.getSimpleValueType().is512BitVector()) &&
+         "Can only extract from 256-bit or 512-bit vectors");
+
+  if (ResVT.is128BitVector())
+    return extract128BitVector(In, IdxVal, DAG, dl);
+  if (ResVT.is256BitVector())
+    return extract256BitVector(In, IdxVal, DAG, dl);
+
+  llvm_unreachable("Unimplemented!");
+}
+
+static bool areOnlyUsersOf(SDNode *N, ArrayRef<SDValue> ValidUsers) {
+  for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I)
+    if (llvm::all_of(ValidUsers,
+                     [&I](SDValue V) { return V.getNode() != *I; }))
+      return false;
+  return true;
 }
 
 // Lower a node with an INSERT_SUBVECTOR opcode.  This may result in a
@@ -12805,58 +13740,97 @@ static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
 // the upper bits of a vector.
 static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget,
                                      SelectionDAG &DAG) {
-  if (!Subtarget.hasAVX())
-    return SDValue();
+  assert(Subtarget.hasAVX() && "INSERT_SUBVECTOR requires AVX");
 
   SDLoc dl(Op);
   SDValue Vec = Op.getOperand(0);
   SDValue SubVec = Op.getOperand(1);
   SDValue Idx = Op.getOperand(2);
 
-  if (!isa<ConstantSDNode>(Idx))
-    return SDValue();
-
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
   MVT OpVT = Op.getSimpleValueType();
   MVT SubVecVT = SubVec.getSimpleValueType();
 
-  // Fold two 16-byte subvector loads into one 32-byte load:
-  // (insert_subvector (insert_subvector undef, (load addr), 0),
-  //                   (load addr + 16), Elts/2)
+  if (OpVT.getVectorElementType() == MVT::i1)
+    return insert1BitVector(Op, DAG, Subtarget);
+
+  assert((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
+         "Can only insert into 256-bit or 512-bit vectors");
+
+  // Fold two 16-byte or 32-byte subvector loads into one 32-byte or 64-byte
+  // load:
+  // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+  //                   (load16 addr + 16), Elts/2)
   // --> load32 addr
+  // or:
+  // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+  //                   (load32 addr + 32), Elts/2)
+  // --> load64 addr
+  // or a 16-byte or 32-byte broadcast:
+  // (insert_subvector (insert_subvector undef, (load16 addr), 0),
+  //                   (load16 addr), Elts/2)
+  // --> X86SubVBroadcast(load16 addr)
+  // or:
+  // (insert_subvector (insert_subvector undef, (load32 addr), 0),
+  //                   (load32 addr), Elts/2)
+  // --> X86SubVBroadcast(load32 addr)
   if ((IdxVal == OpVT.getVectorNumElements() / 2) &&
       Vec.getOpcode() == ISD::INSERT_SUBVECTOR &&
-      OpVT.is256BitVector() && SubVecVT.is128BitVector()) {
+      OpVT.getSizeInBits() == SubVecVT.getSizeInBits() * 2) {
     auto *Idx2 = dyn_cast<ConstantSDNode>(Vec.getOperand(2));
     if (Idx2 && Idx2->getZExtValue() == 0) {
+      SDValue SubVec2 = Vec.getOperand(1);
       // If needed, look through bitcasts to get to the load.
-      SDValue SubVec2 = peekThroughBitcasts(Vec.getOperand(1));
-      if (auto *FirstLd = dyn_cast<LoadSDNode>(SubVec2)) {
+      if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(SubVec2))) {
         bool Fast;
         unsigned Alignment = FirstLd->getAlignment();
         unsigned AS = FirstLd->getAddressSpace();
         const X86TargetLowering *TLI = Subtarget.getTargetLowering();
         if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
                                     OpVT, AS, Alignment, &Fast) && Fast) {
-          SDValue Ops[] = { SubVec2, SubVec };
+          SDValue Ops[] = {SubVec2, SubVec};
           if (SDValue Ld = EltsFromConsecutiveLoads(OpVT, Ops, dl, DAG, false))
             return Ld;
         }
       }
+      // If lower/upper loads are the same and the only users of the load, then
+      // lower to a VBROADCASTF128/VBROADCASTI128/etc.
+      if (auto *Ld = dyn_cast<LoadSDNode>(peekThroughOneUseBitcasts(SubVec2))) {
+        if (SubVec2 == SubVec && ISD::isNormalLoad(Ld) &&
+            areOnlyUsersOf(SubVec2.getNode(), {Op, Vec})) {
+          return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT, SubVec);
+        }
+      }
+      // If this is subv_broadcast insert into both halves, use a larger
+      // subv_broadcast.
+      if (SubVec.getOpcode() == X86ISD::SUBV_BROADCAST && SubVec == SubVec2) {
+        return DAG.getNode(X86ISD::SUBV_BROADCAST, dl, OpVT,
+                           SubVec.getOperand(0));
+      }
     }
   }
 
-  if ((OpVT.is256BitVector() || OpVT.is512BitVector()) &&
-      SubVecVT.is128BitVector())
+  if (SubVecVT.is128BitVector())
     return insert128BitVector(Vec, SubVec, IdxVal, DAG, dl);
 
-  if (OpVT.is512BitVector() && SubVecVT.is256BitVector())
+  if (SubVecVT.is256BitVector())
     return insert256BitVector(Vec, SubVec, IdxVal, DAG, dl);
 
-  if (OpVT.getVectorElementType() == MVT::i1)
-    return insert1BitVector(Op, DAG, Subtarget);
+  llvm_unreachable("Unimplemented!");
+}
 
-  return SDValue();
+// Returns the appropriate wrapper opcode for a global reference.
+unsigned X86TargetLowering::getGlobalWrapperKind(const GlobalValue *GV) const {
+  // References to absolute symbols are never PC-relative.
+  if (GV && GV->isAbsoluteSymbolRef())
+    return X86ISD::Wrapper;
+
+  CodeModel::Model M = getTargetMachine().getCodeModel();
+  if (Subtarget.isPICStyleRIPRel() &&
+      (M == CodeModel::Small || M == CodeModel::Kernel))
+    return X86ISD::WrapperRIP;
+
+  return X86ISD::Wrapper;
 }
 
 // ConstantPool, JumpTable, GlobalAddress, and ExternalSymbol are lowered as
@@ -12872,18 +13846,12 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
   // global base reg.
   unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);
-  unsigned WrapperKind = X86ISD::Wrapper;
-  CodeModel::Model M = DAG.getTarget().getCodeModel();
-
-  if (Subtarget.isPICStyleRIPRel() &&
-      (M == CodeModel::Small || M == CodeModel::Kernel))
-    WrapperKind = X86ISD::WrapperRIP;
 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result = DAG.getTargetConstantPool(
       CP->getConstVal(), PtrVT, CP->getAlignment(), CP->getOffset(), OpFlag);
   SDLoc DL(CP);
-  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
+  Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
   // With PIC, the address is actually $g + Offset.
   if (OpFlag) {
     Result =
@@ -12900,17 +13868,11 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the
   // global base reg.
   unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr);
-  unsigned WrapperKind = X86ISD::Wrapper;
-  CodeModel::Model M = DAG.getTarget().getCodeModel();
-
-  if (Subtarget.isPICStyleRIPRel() &&
-      (M == CodeModel::Small || M == CodeModel::Kernel))
-    WrapperKind = X86ISD::WrapperRIP;
 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
   SDLoc DL(JT);
-  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
+  Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (OpFlag)
@@ -12929,18 +13891,12 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const {
   // global base reg.
   const Module *Mod = DAG.getMachineFunction().getFunction()->getParent();
   unsigned char OpFlag = Subtarget.classifyGlobalReference(nullptr, *Mod);
-  unsigned WrapperKind = X86ISD::Wrapper;
-  CodeModel::Model M = DAG.getTarget().getCodeModel();
-
-  if (Subtarget.isPICStyleRIPRel() &&
-      (M == CodeModel::Small || M == CodeModel::Kernel))
-    WrapperKind = X86ISD::WrapperRIP;
 
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result = DAG.getTargetExternalSymbol(Sym, PtrVT, OpFlag);
 
   SDLoc DL(Op);
-  Result = DAG.getNode(WrapperKind, DL, PtrVT, Result);
+  Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (isPositionIndependent() && !Subtarget.is64Bit()) {
@@ -12963,18 +13919,12 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   // Create the TargetBlockAddressAddress node.
   unsigned char OpFlags =
     Subtarget.classifyBlockAddressReference();
-  CodeModel::Model M = DAG.getTarget().getCodeModel();
   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset();
   SDLoc dl(Op);
   auto PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags);
-
-  if (Subtarget.isPICStyleRIPRel() &&
-      (M == CodeModel::Small || M == CodeModel::Kernel))
-    Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
-  else
-    Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
+  Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (isGlobalRelativeToPICBase(OpFlags)) {
@@ -13003,11 +13953,7 @@ SDValue X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV,
     Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, OpFlags);
   }
 
-  if (Subtarget.isPICStyleRIPRel() &&
-      (M == CodeModel::Small || M == CodeModel::Kernel))
-    Result = DAG.getNode(X86ISD::WrapperRIP, dl, PtrVT, Result);
-  else
-    Result = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, Result);
+  Result = DAG.getNode(getGlobalWrapperKind(GV), dl, PtrVT, Result);
 
   // With PIC, the address is actually $g + Offset.
   if (isGlobalRelativeToPICBase(OpFlags)) {
@@ -13041,7 +13987,7 @@ static SDValue
 GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
            SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
            unsigned char OperandFlags, bool LocalDynamic = false) {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   SDLoc dl(GA);
   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
@@ -13061,8 +14007,8 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
   }
 
   // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
-  MFI->setAdjustsStack(true);
-  MFI->setHasCalls(true);
+  MFI.setAdjustsStack(true);
+  MFI.setHasCalls(true);
 
   SDValue Flag = Chain.getValue(1);
   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
@@ -13097,7 +14043,7 @@ static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
   SDLoc dl(GA);
 
   // Get the start address of the TLS block for this module.
-  X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
+  X86MachineFunctionInfo *MFI = DAG.getMachineFunction()
       .getInfo<X86MachineFunctionInfo>();
   MFI->incNumLocalDynamicTLSAccesses();
 
@@ -13251,8 +14197,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
                                Chain.getValue(1), DL);
 
     // TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
-    MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-    MFI->setAdjustsStack(true);
+    MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+    MFI.setAdjustsStack(true);
 
     // And our return value (tls address) is in the standard call return value
     // location.
@@ -13395,9 +14341,9 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   if (SrcVT.isVector()) {
     if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) {
-      return DAG.getNode(X86ISD::CVTDQ2PD, dl, VT,
+      return DAG.getNode(X86ISD::CVTSI2P, dl, VT,
                          DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src,
-                         DAG.getUNDEF(SrcVT)));
+                                     DAG.getUNDEF(SrcVT)));
     }
     if (SrcVT.getVectorElementType() == MVT::i1) {
       if (SrcVT == MVT::v2i1 && TLI.isTypeLegal(SrcVT))
@@ -13433,7 +14379,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
   unsigned Size = SrcVT.getSizeInBits()/8;
   MachineFunction &MF = DAG.getMachineFunction();
   auto PtrVT = getPointerTy(MF.getDataLayout());
-  int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
+  int SSFI = MF.getFrameInfo().CreateStackObject(Size, Size, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   SDValue Chain = DAG.getStore(
       DAG.getEntryNode(), dl, ValueToStore, StackSlot,
@@ -13479,8 +14425,8 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
     // shouldn't be necessary except that RFP cannot be live across
     // multiple blocks. When stackifier is fixed, they can be uncoupled.
     MachineFunction &MF = DAG.getMachineFunction();
-    unsigned SSFISize = Op.getValueType().getSizeInBits()/8;
-    int SSFI = MF.getFrameInfo()->CreateStackObject(SSFISize, SSFISize, false);
+    unsigned SSFISize = Op.getValueSizeInBits()/8;
+    int SSFI = MF.getFrameInfo().CreateStackObject(SSFISize, SSFISize, false);
     auto PtrVT = getPointerTy(MF.getDataLayout());
     SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
     Tys = DAG.getVTList(MVT::Other);
@@ -13528,10 +14474,10 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
 
   SmallVector<Constant*,2> CV1;
   CV1.push_back(
-    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
                                       APInt(64, 0x4330000000000000ULL))));
   CV1.push_back(
-    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble,
+    ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(),
                                       APInt(64, 0x4530000000000000ULL))));
   Constant *C1 = ConstantVector::get(CV1);
   SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, 16);
@@ -13560,8 +14506,7 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op,
     Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub);
   } else {
     SDValue S2F = DAG.getBitcast(MVT::v4i32, Sub);
-    SDValue Shuffle = getTargetShuffleNode(X86ISD::PSHUFD, dl, MVT::v4i32,
-                                           S2F, 0x4E, DAG);
+    SDValue Shuffle = DAG.getVectorShuffle(MVT::v4i32, dl, S2F, S2F, {2,3,0,1});
     Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64,
                          DAG.getBitcast(MVT::v2f64, Shuffle), Sub);
   }
@@ -13617,6 +14562,41 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i32(SDValue Op,
   return Sub;
 }
 
+static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget, SDLoc &DL) {
+  if (Op.getSimpleValueType() != MVT::v2f64)
+    return SDValue();
+
+  SDValue N0 = Op.getOperand(0);
+  assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type");
+
+  // Legalize to v4i32 type.
+  N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0,
+                   DAG.getUNDEF(MVT::v2i32));
+
+  if (Subtarget.hasAVX512())
+    return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0);
+
+  // Same implementation as VectorLegalizer::ExpandUINT_TO_FLOAT,
+  // but using v2i32 to v2f64 with X86ISD::CVTSI2P.
+  SDValue HalfWord = DAG.getConstant(16, DL, MVT::v4i32);
+  SDValue HalfWordMask = DAG.getConstant(0x0000FFFF, DL, MVT::v4i32);
+
+  // Two to the power of half-word-size.
+  SDValue TWOHW = DAG.getConstantFP(1 << 16, DL, MVT::v2f64);
+
+  // Clear upper part of LO, lower HI.
+  SDValue HI = DAG.getNode(ISD::SRL, DL, MVT::v4i32, N0, HalfWord);
+  SDValue LO = DAG.getNode(ISD::AND, DL, MVT::v4i32, N0, HalfWordMask);
+
+  SDValue fHI = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, HI);
+          fHI = DAG.getNode(ISD::FMUL, DL, MVT::v2f64, fHI, TWOHW);
+  SDValue fLO = DAG.getNode(X86ISD::CVTSI2P, DL, MVT::v2f64, LO);
+
+  // Add the two halves.
+  return DAG.getNode(ISD::FADD, DL, MVT::v2f64, fHI, fLO);
+}
+
 static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
                                      const X86Subtarget &Subtarget) {
   // The algorithm is the following:
@@ -13699,7 +14679,7 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
 
   // Create the vector constant for -(0x1.0p39f + 0x1.0p23f).
   SDValue VecCstFAdd = DAG.getConstantFP(
-      APFloat(APFloat::IEEEsingle, APInt(32, 0xD3000080)), DL, VecFloatVT);
+      APFloat(APFloat::IEEEsingle(), APInt(32, 0xD3000080)), DL, VecFloatVT);
 
   //     float4 fhi = (float4) hi - (0x1.0p39f + 0x1.0p23f);
   SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High);
@@ -13714,29 +14694,31 @@ static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG,
 SDValue X86TargetLowering::lowerUINT_TO_FP_vec(SDValue Op,
                                                SelectionDAG &DAG) const {
   SDValue N0 = Op.getOperand(0);
-  MVT SVT = N0.getSimpleValueType();
+  MVT SrcVT = N0.getSimpleValueType();
   SDLoc dl(Op);
 
-  if (SVT.getVectorElementType() == MVT::i1) {
-    if (SVT == MVT::v2i1)
+  if (SrcVT.getVectorElementType() == MVT::i1) {
+    if (SrcVT == MVT::v2i1)
       return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
                          DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, N0));
-    MVT IntegerVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+    MVT IntegerVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
     return DAG.getNode(ISD::UINT_TO_FP, dl, Op.getValueType(),
                        DAG.getNode(ISD::ZERO_EXTEND, dl, IntegerVT, N0));
   }
 
-  switch (SVT.SimpleTy) {
+  switch (SrcVT.SimpleTy) {
   default:
     llvm_unreachable("Custom UINT_TO_FP is not supported!");
   case MVT::v4i8:
   case MVT::v4i16:
   case MVT::v8i8:
   case MVT::v8i16: {
-    MVT NVT = MVT::getVectorVT(MVT::i32, SVT.getVectorNumElements());
+    MVT NVT = MVT::getVectorVT(MVT::i32, SrcVT.getVectorNumElements());
     return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(),
                        DAG.getNode(ISD::ZERO_EXTEND, dl, NVT, N0));
   }
+  case MVT::v2i32:
+    return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl);
   case MVT::v4i32:
   case MVT::v8i32:
     return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget);
@@ -13754,15 +14736,15 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
   SDLoc dl(Op);
   auto PtrVT = getPointerTy(DAG.getDataLayout());
 
-  if (Op.getSimpleValueType().isVector())
-    return lowerUINT_TO_FP_vec(Op, DAG);
-
   // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
   // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
   // the optimization here.
   if (DAG.SignBitIsZero(N0))
     return DAG.getNode(ISD::SINT_TO_FP, dl, Op.getValueType(), N0);
 
+  if (Op.getSimpleValueType().isVector())
+    return lowerUINT_TO_FP_vec(Op, DAG);
+
   MVT SrcVT = N0.getSimpleValueType();
   MVT DstVT = Op.getSimpleValueType();
 
@@ -13903,7 +14885,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
   // stack slot.
   MachineFunction &MF = DAG.getMachineFunction();
   unsigned MemSize = DstTy.getSizeInBits()/8;
-  int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+  int SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
   SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
 
   unsigned Opc;
@@ -13935,15 +14917,15 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
     // For X87 we'd like to use the smallest FP type for this constant, but
     // for DAG type consistency we have to match the FP operand type.
 
-    APFloat Thresh(APFloat::IEEEsingle, APInt(32, 0x5f000000));
+    APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000));
     LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK;
     bool LosesInfo = false;
     if (TheVT == MVT::f64)
       // The rounding mode is irrelevant as the conversion should be exact.
-      Status = Thresh.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven,
+      Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
                               &LosesInfo);
     else if (TheVT == MVT::f80)
-      Status = Thresh.convert(APFloat::x87DoubleExtended,
+      Status = Thresh.convert(APFloat::x87DoubleExtended(),
                               APFloat::rmNearestTiesToEven, &LosesInfo);
 
     assert(Status == APFloat::opOK && !LosesInfo &&
@@ -13981,7 +14963,7 @@ X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
                                 MachineMemOperand::MOLoad, MemSize, MemSize);
     Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, DstTy, MMO);
     Chain = Value.getValue(1);
-    SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
+    SSFI = MF.getFrameInfo().CreateStackObject(MemSize, MemSize, false);
     StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
   }
 
@@ -14084,7 +15066,7 @@ static  SDValue LowerZERO_EXTEND_AVX512(SDValue Op,
   SDValue In = Op->getOperand(0);
   MVT InVT = In.getSimpleValueType();
   SDLoc DL(Op);
-  unsigned int NumElts = VT.getVectorNumElements();
+  unsigned NumElts = VT.getVectorNumElements();
   if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
     return SDValue();
 
@@ -14137,6 +15119,85 @@ static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
   return SDValue();
 }
 
+/// Helper to recursively truncate vector elements in half with PACKSS.
+/// It makes use of the fact that vector comparison results will be all-zeros
+/// or all-ones to use (vXi8 PACKSS(vYi16, vYi16)) instead of matching types.
+/// AVX2 (Int256) sub-targets require extra shuffling as the PACKSS operates
+/// within each 128-bit lane.
+static SDValue truncateVectorCompareWithPACKSS(EVT DstVT, SDValue In,
+                                               const SDLoc &DL,
+                                               SelectionDAG &DAG,
+                                               const X86Subtarget &Subtarget) {
+  // Requires SSE2 but AVX512 has fast truncate.
+  if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
+    return SDValue();
+
+  EVT SrcVT = In.getValueType();
+
+  // No truncation required, we might get here due to recursive calls.
+  if (SrcVT == DstVT)
+    return In;
+
+  // We only support vector truncation to 128bits or greater from a
+  // 256bits or greater source.
+  if ((DstVT.getSizeInBits() % 128) != 0)
+    return SDValue();
+  if ((SrcVT.getSizeInBits() % 256) != 0)
+    return SDValue();
+
+  unsigned NumElems = SrcVT.getVectorNumElements();
+  assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
+  assert(SrcVT.getSizeInBits() > DstVT.getSizeInBits() && "Illegal truncation");
+
+  EVT PackedSVT =
+      EVT::getIntegerVT(*DAG.getContext(), SrcVT.getScalarSizeInBits() / 2);
+
+  // Extract lower/upper subvectors.
+  unsigned NumSubElts = NumElems / 2;
+  unsigned SrcSizeInBits = SrcVT.getSizeInBits();
+  SDValue Lo = extractSubVector(In, 0 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
+  SDValue Hi = extractSubVector(In, 1 * NumSubElts, DAG, DL, SrcSizeInBits / 2);
+
+  // 256bit -> 128bit truncate - PACKSS lower/upper 128-bit subvectors.
+  if (SrcVT.is256BitVector()) {
+    Lo = DAG.getBitcast(MVT::v8i16, Lo);
+    Hi = DAG.getBitcast(MVT::v8i16, Hi);
+    SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, Lo, Hi);
+    return DAG.getBitcast(DstVT, Res);
+  }
+
+  // AVX2: 512bit -> 256bit truncate - PACKSS lower/upper 256-bit subvectors.
+  // AVX2: 512bit -> 128bit truncate - PACKSS(PACKSS, PACKSS).
+  if (SrcVT.is512BitVector() && Subtarget.hasInt256()) {
+    Lo = DAG.getBitcast(MVT::v16i16, Lo);
+    Hi = DAG.getBitcast(MVT::v16i16, Hi);
+    SDValue Res = DAG.getNode(X86ISD::PACKSS, DL, MVT::v32i8, Lo, Hi);
+
+    // 256-bit PACKSS(ARG0, ARG1) leaves us with ((LO0,LO1),(HI0,HI1)),
+    // so we need to shuffle to get ((LO0,HI0),(LO1,HI1)).
+    Res = DAG.getBitcast(MVT::v4i64, Res);
+    Res = DAG.getVectorShuffle(MVT::v4i64, DL, Res, Res, {0, 2, 1, 3});
+
+    if (DstVT.is256BitVector())
+      return DAG.getBitcast(DstVT, Res);
+
+    // If 512bit -> 128bit truncate another stage.
+    EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems);
+    Res = DAG.getBitcast(PackedVT, Res);
+    return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget);
+  }
+
+  // Recursively pack lower/upper subvectors, concat result and pack again.
+  assert(SrcVT.getSizeInBits() >= 512 && "Expected 512-bit vector or greater");
+  EVT PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems / 2);
+  Lo = truncateVectorCompareWithPACKSS(PackedVT, Lo, DL, DAG, Subtarget);
+  Hi = truncateVectorCompareWithPACKSS(PackedVT, Hi, DL, DAG, Subtarget);
+
+  PackedVT = EVT::getVectorVT(*DAG.getContext(), PackedSVT, NumElems);
+  SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi);
+  return truncateVectorCompareWithPACKSS(DstVT, Res, DL, DAG, Subtarget);
+}
+
 static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
 
@@ -14203,6 +15264,22 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
                          DAG.getNode(X86ISD::VSEXT, DL, MVT::v16i32, In));
     return DAG.getNode(X86ISD::VTRUNC, DL, VT, In);
   }
+
+  // Truncate with PACKSS if we are truncating a vector comparison result.
+  // TODO: We should be able to support other operations as long as we
+  // we are saturating+packing zero/all bits only.
+  auto IsPackableComparison = [](SDValue V) {
+    unsigned Opcode = V.getOpcode();
+    return (Opcode == X86ISD::PCMPGT || Opcode == X86ISD::PCMPEQ ||
+            Opcode == X86ISD::CMPP);
+  };
+
+  if (IsPackableComparison(In) || (In.getOpcode() == ISD::CONCAT_VECTORS &&
+                                   all_of(In->ops(), IsPackableComparison))) {
+    if (SDValue V = truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget))
+      return V;
+  }
+
   if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) {
     // On AVX2, v4i64 -> v4i32 becomes VPERMD.
     if (Subtarget.hasInt256()) {
@@ -14299,30 +15376,31 @@ SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
                      DAG.getIntPtrConstant(0, DL));
 }
 
-SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op,
-                                           SelectionDAG &DAG) const {
-  assert(!Op.getSimpleValueType().isVector());
+SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op,
+                                          const X86Subtarget &Subtarget,
+                                          SelectionDAG &DAG) const {
+  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT;
 
-  std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
-    /*IsSigned=*/ true, /*IsReplace=*/ false);
-  SDValue FIST = Vals.first, StackSlot = Vals.second;
-  // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
-  if (!FIST.getNode())
-    return Op;
+  MVT VT = Op.getSimpleValueType();
 
-  if (StackSlot.getNode())
-    // Load the result.
-    return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot,
-                       MachinePointerInfo());
+  if (VT.isVector()) {
+    assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
+    SDValue Src = Op.getOperand(0);
+    SDLoc dl(Op);
+    if (VT == MVT::v2i64 && Src.getSimpleValueType() == MVT::v2f32) {
+      return DAG.getNode(IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI,
+                         dl, VT,
+                         DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+                                     DAG.getUNDEF(MVT::v2f32)));
+    }
 
-  // The node is the result.
-  return FIST;
-}
+    return SDValue();
+  }
+
+  assert(!VT.isVector());
 
-SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
-                                           SelectionDAG &DAG) const {
   std::pair<SDValue,SDValue> Vals = FP_TO_INTHelper(Op, DAG,
-    /*IsSigned=*/ false, /*IsReplace=*/ false);
+    IsSigned, /*IsReplace=*/ false);
   SDValue FIST = Vals.first, StackSlot = Vals.second;
   // If FP_TO_INTHelper failed, the node is actually supposed to be Legal.
   if (!FIST.getNode())
@@ -14330,8 +15408,7 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op,
 
   if (StackSlot.getNode())
     // Load the result.
-    return DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot,
-                       MachinePointerInfo());
+    return DAG.getLoad(VT, SDLoc(Op), FIST, StackSlot, MachinePointerInfo());
 
   // The node is the result.
   return FIST;
@@ -14376,17 +15453,14 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
 
   MVT LogicVT;
   MVT EltVT;
-  unsigned NumElts;
 
   if (VT.isVector()) {
     LogicVT = VT;
     EltVT = VT.getVectorElementType();
-    NumElts = VT.getVectorNumElements();
   } else if (IsF128) {
     // SSE instructions are used for optimized f128 logical operations.
     LogicVT = MVT::f128;
     EltVT = VT;
-    NumElts = 1;
   } else {
     // There are no scalar bitwise logical SSE/AVX instructions, so we
     // generate a 16-byte vector constant and logic op even for the scalar case.
@@ -14394,22 +15468,16 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
     // the logic op, so it can save (~4 bytes) on code size.
     LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
     EltVT = VT;
-    NumElts = (VT == MVT::f64) ? 2 : 4;
   }
 
   unsigned EltBits = EltVT.getSizeInBits();
-  LLVMContext *Context = DAG.getContext();
   // For FABS, mask is 0x7f...; for FNEG, mask is 0x80...
   APInt MaskElt =
     IsFABS ? APInt::getSignedMaxValue(EltBits) : APInt::getSignBit(EltBits);
-  Constant *C = ConstantInt::get(*Context, MaskElt);
-  C = ConstantVector::getSplat(NumElts, C);
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  SDValue CPIdx = DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout()));
-  unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
-  SDValue Mask = DAG.getLoad(
-      LogicVT, dl, DAG.getEntryNode(), CPIdx,
-      MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Alignment);
+  const fltSemantics &Sem =
+      EltVT == MVT::f64 ? APFloat::IEEEdouble() :
+          (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+  SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT);
 
   SDValue Op0 = Op.getOperand(0);
   bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS);
@@ -14429,92 +15497,73 @@ static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) {
 }
 
 static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) {
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  LLVMContext *Context = DAG.getContext();
-  SDValue Op0 = Op.getOperand(0);
-  SDValue Op1 = Op.getOperand(1);
+  SDValue Mag = Op.getOperand(0);
+  SDValue Sign = Op.getOperand(1);
   SDLoc dl(Op);
+
+  // If the sign operand is smaller, extend it first.
   MVT VT = Op.getSimpleValueType();
-  MVT SrcVT = Op1.getSimpleValueType();
-  bool IsF128 = (VT == MVT::f128);
+  if (Sign.getSimpleValueType().bitsLT(VT))
+    Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign);
 
-  // If second operand is smaller, extend it first.
-  if (SrcVT.bitsLT(VT)) {
-    Op1 = DAG.getNode(ISD::FP_EXTEND, dl, VT, Op1);
-    SrcVT = VT;
-  }
   // And if it is bigger, shrink it first.
-  if (SrcVT.bitsGT(VT)) {
-    Op1 = DAG.getNode(ISD::FP_ROUND, dl, VT, Op1, DAG.getIntPtrConstant(1, dl));
-    SrcVT = VT;
-  }
+  if (Sign.getSimpleValueType().bitsGT(VT))
+    Sign = DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(1, dl));
 
   // At this point the operands and the result should have the same
   // type, and that won't be f80 since that is not custom lowered.
-  assert((VT == MVT::f64 || VT == MVT::f32 || IsF128) &&
+  bool IsF128 = (VT == MVT::f128);
+  assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 ||
+          VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 ||
+          VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) &&
          "Unexpected type in LowerFCOPYSIGN");
 
+  MVT EltVT = VT.getScalarType();
   const fltSemantics &Sem =
-      VT == MVT::f64 ? APFloat::IEEEdouble :
-          (IsF128 ? APFloat::IEEEquad : APFloat::IEEEsingle);
-  const unsigned SizeInBits = VT.getSizeInBits();
+      EltVT == MVT::f64 ? APFloat::IEEEdouble()
+                        : (IsF128 ? APFloat::IEEEquad() : APFloat::IEEEsingle());
+
+  // Perform all scalar logic operations as 16-byte vectors because there are no
+  // scalar FP logic instructions in SSE.
+  // TODO: This isn't necessary. If we used scalar types, we might avoid some
+  // unnecessary splats, but we might miss load folding opportunities. Should
+  // this decision be based on OptimizeForSize?
+  bool IsFakeVector = !VT.isVector() && !IsF128;
+  MVT LogicVT = VT;
+  if (IsFakeVector)
+    LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32;
 
-  SmallVector<Constant *, 4> CV(
-      VT == MVT::f64 ? 2 : (IsF128 ? 1 : 4),
-      ConstantFP::get(*Context, APFloat(Sem, APInt(SizeInBits, 0))));
+  // The mask constants are automatically splatted for vector types.
+  unsigned EltSizeInBits = VT.getScalarSizeInBits();
+  SDValue SignMask = DAG.getConstantFP(
+      APFloat(Sem, APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
+  SDValue MagMask = DAG.getConstantFP(
+      APFloat(Sem, ~APInt::getSignBit(EltSizeInBits)), dl, LogicVT);
 
   // First, clear all bits but the sign bit from the second operand (sign).
-  CV[0] = ConstantFP::get(*Context,
-                          APFloat(Sem, APInt::getHighBitsSet(SizeInBits, 1)));
-  Constant *C = ConstantVector::get(CV);
-  auto PtrVT = TLI.getPointerTy(DAG.getDataLayout());
-  SDValue CPIdx = DAG.getConstantPool(C, PtrVT, 16);
-
-  // Perform all logic operations as 16-byte vectors because there are no
-  // scalar FP logic instructions in SSE. This allows load folding of the
-  // constants into the logic instructions.
-  MVT LogicVT = (VT == MVT::f64) ? MVT::v2f64 : (IsF128 ? MVT::f128 : MVT::v4f32);
-  SDValue Mask1 =
-      DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
-                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
-                  /* Alignment = */ 16);
-  if (!IsF128)
-    Op1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op1);
-  SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op1, Mask1);
+  if (IsFakeVector)
+    Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign);
+  SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask);
 
   // Next, clear the sign bit from the first operand (magnitude).
-  // If it's a constant, we can clear it here.
-  if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Op0)) {
+  // TODO: If we had general constant folding for FP logic ops, this check
+  // wouldn't be necessary.
+  SDValue MagBits;
+  if (ConstantFPSDNode *Op0CN = dyn_cast<ConstantFPSDNode>(Mag)) {
     APFloat APF = Op0CN->getValueAPF();
-    // If the magnitude is a positive zero, the sign bit alone is enough.
-    if (APF.isPosZero())
-      return IsF128 ? SignBit :
-          DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, SignBit,
-                      DAG.getIntPtrConstant(0, dl));
     APF.clearSign();
-    CV[0] = ConstantFP::get(*Context, APF);
+    MagBits = DAG.getConstantFP(APF, dl, LogicVT);
   } else {
-    CV[0] = ConstantFP::get(
-        *Context,
-        APFloat(Sem, APInt::getLowBitsSet(SizeInBits, SizeInBits - 1)));
-  }
-  C = ConstantVector::get(CV);
-  CPIdx = DAG.getConstantPool(C, PtrVT, 16);
-  SDValue Val =
-      DAG.getLoad(LogicVT, dl, DAG.getEntryNode(), CPIdx,
-                  MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
-                  /* Alignment = */ 16);
-  // If the magnitude operand wasn't a constant, we need to AND out the sign.
-  if (!isa<ConstantFPSDNode>(Op0)) {
-    if (!IsF128)
-      Op0 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Op0);
-    Val = DAG.getNode(X86ISD::FAND, dl, LogicVT, Op0, Val);
+    // If the magnitude operand wasn't a constant, we need to AND out the sign.
+    if (IsFakeVector)
+      Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag);
+    MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask);
   }
+
   // OR the magnitude value with the sign bit.
-  Val = DAG.getNode(X86ISD::FOR, dl, LogicVT, Val, SignBit);
-  return IsF128 ? Val :
-      DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SrcVT, Val,
-                  DAG.getIntPtrConstant(0, dl));
+  SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit);
+  return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or,
+                                          DAG.getIntPtrConstant(0, dl));
 }
 
 static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) {
@@ -14764,7 +15813,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
         goto default_case;
 
     if (ConstantSDNode *C =
-        dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) {
+        dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
       // An add of one will be selected as an INC.
       if (C->isOne() && !Subtarget.slowIncDec()) {
         Opcode = X86ISD::INC;
@@ -14821,7 +15870,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
       if (!Subtarget.hasBMI() || !isAndn || !isLegalAndnType)
         break;
     }
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case ISD::SUB:
   case ISD::OR:
   case ISD::XOR:
@@ -14968,14 +16017,27 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp,
   return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl);
 }
 
+/// Check if replacement of SQRT with RSQRT should be disabled.
+bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const {
+  EVT VT = Op.getValueType();
+
+  // We never want to use both SQRT and RSQRT instructions for the same input.
+  if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op))
+    return false;
+
+  if (VT.isVector())
+    return Subtarget.hasFastVectorFSQRT();
+  return Subtarget.hasFastScalarFSQRT();
+}
+
 /// The minimum architected relative accuracy is 2^-12. We need one
 /// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
-                                            DAGCombinerInfo &DCI,
-                                            unsigned &RefinementSteps,
-                                            bool &UseOneConstNR) const {
+SDValue X86TargetLowering::getSqrtEstimate(SDValue Op,
+                                           SelectionDAG &DAG, int Enabled,
+                                           int &RefinementSteps,
+                                           bool &UseOneConstNR,
+                                           bool Reciprocal) const {
   EVT VT = Op.getValueType();
-  const char *RecipOp;
 
   // SSE1 has rsqrtss and rsqrtps. AVX adds a 256-bit variant for rsqrtps.
   // TODO: Add support for AVX512 (v16f32).
@@ -14984,30 +16046,24 @@ SDValue X86TargetLowering::getRsqrtEstimate(SDValue Op,
   // instructions: convert to single, rsqrtss, convert back to double, refine
   // (3 steps = at least 13 insts). If an 'rsqrtsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget.hasSSE1())
-    RecipOp = "sqrtf";
-  else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget.hasAVX()))
-    RecipOp = "vec-sqrtf";
-  else
-    return SDValue();
+  if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+      (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+      (VT == MVT::v8f32 && Subtarget.hasAVX())) {
+    if (RefinementSteps == ReciprocalEstimate::Unspecified)
+      RefinementSteps = 1;
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
-
-  RefinementSteps = Recips.getRefinementSteps(RecipOp);
-  UseOneConstNR = false;
-  return DCI.DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+    UseOneConstNR = false;
+    return DAG.getNode(X86ISD::FRSQRT, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
 }
 
 /// The minimum architected relative accuracy is 2^-12. We need one
 /// Newton-Raphson step to have a good float result (24 bits of precision).
-SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
-                                            DAGCombinerInfo &DCI,
-                                            unsigned &RefinementSteps) const {
+SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG,
+                                            int Enabled,
+                                            int &RefinementSteps) const {
   EVT VT = Op.getValueType();
-  const char *RecipOp;
 
   // SSE1 has rcpss and rcpps. AVX adds a 256-bit variant for rcpps.
   // TODO: Add support for AVX512 (v16f32).
@@ -15016,20 +16072,22 @@ SDValue X86TargetLowering::getRecipEstimate(SDValue Op,
   // 15 instructions: convert to single, rcpss, convert back to double, refine
   // (3 steps = 12 insts). If an 'rcpsd' variant was added to the ISA
   // along with FMA, this could be a throughput win.
-  if (VT == MVT::f32 && Subtarget.hasSSE1())
-    RecipOp = "divf";
-  else if ((VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
-           (VT == MVT::v8f32 && Subtarget.hasAVX()))
-    RecipOp = "vec-divf";
-  else
-    return SDValue();
 
-  TargetRecip Recips = DCI.DAG.getTarget().Options.Reciprocals;
-  if (!Recips.isEnabled(RecipOp))
-    return SDValue();
+  if ((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+      (VT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+      (VT == MVT::v8f32 && Subtarget.hasAVX())) {
+    // Enable estimate codegen with 1 refinement step for vector division.
+    // Scalar division estimates are disabled because they break too much
+    // real-world code. These defaults are intended to match GCC behavior.
+    if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified)
+      return SDValue();
+
+    if (RefinementSteps == ReciprocalEstimate::Unspecified)
+      RefinementSteps = 1;
 
-  RefinementSteps = Recips.getRefinementSteps(RecipOp);
-  return DCI.DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+    return DAG.getNode(X86ISD::FRCP, SDLoc(Op), VT, Op);
+  }
+  return SDValue();
 }
 
 /// If we have at least two divisions that use the same divisor, convert to
@@ -15042,9 +16100,46 @@ unsigned X86TargetLowering::combineRepeatedFPDivisors() const {
   return 2;
 }
 
+/// Helper for creating a X86ISD::SETCC node.
+static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl,
+                        SelectionDAG &DAG) {
+  return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+                     DAG.getConstant(Cond, dl, MVT::i8), EFLAGS);
+}
+
+/// Create a BT (Bit Test) node - Test bit \p BitNo in \p Src and set condition
+/// according to equal/not-equal condition code \p CC.
+static SDValue getBitTestCondition(SDValue Src, SDValue BitNo, ISD::CondCode CC,
+                                   const SDLoc &dl, SelectionDAG &DAG) {
+  // If Src is i8, promote it to i32 with any_extend.  There is no i8 BT
+  // instruction.  Since the shift amount is in-range-or-undefined, we know
+  // that doing a bittest on the i32 value is ok.  We extend to i32 because
+  // the encoding for the i16 version is larger than the i32 version.
+  // Also promote i16 to i32 for performance / code size reason.
+  if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16)
+    Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src);
+
+  // See if we can use the 32-bit instruction instead of the 64-bit one for a
+  // shorter encoding. Since the former takes the modulo 32 of BitNo and the
+  // latter takes the modulo 64, this is only valid if the 5th bit of BitNo is
+  // known to be zero.
+  if (Src.getValueType() == MVT::i64 &&
+      DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32)))
+    Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src);
+
+  // If the operand types disagree, extend the shift amount to match.  Since
+  // BT ignores high bits (like shifts) we can use anyextend.
+  if (Src.getValueType() != BitNo.getValueType())
+    BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo);
+
+  SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo);
+  X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
+  return getSETCC(Cond, BT, dl , DAG);
+}
+
 /// Result of 'and' is compared against zero. Change to a BT node if possible.
-SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
-                                     const SDLoc &dl, SelectionDAG &DAG) const {
+static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC,
+                            const SDLoc &dl, SelectionDAG &DAG) {
   SDValue Op0 = And.getOperand(0);
   SDValue Op1 = And.getOperand(1);
   if (Op0.getOpcode() == ISD::TRUNCATE)
@@ -15087,27 +16182,35 @@ SDValue X86TargetLowering::LowerToBT(SDValue And, ISD::CondCode CC,
     }
   }
 
-  if (LHS.getNode()) {
-    // If LHS is i8, promote it to i32 with any_extend.  There is no i8 BT
-    // instruction.  Since the shift amount is in-range-or-undefined, we know
-    // that doing a bittest on the i32 value is ok.  We extend to i32 because
-    // the encoding for the i16 version is larger than the i32 version.
-    // Also promote i16 to i32 for performance / code size reason.
-    if (LHS.getValueType() == MVT::i8 ||
-        LHS.getValueType() == MVT::i16)
-      LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
+  if (LHS.getNode())
+    return getBitTestCondition(LHS, RHS, CC, dl, DAG);
 
-    // If the operand types disagree, extend the shift amount to match.  Since
-    // BT ignores high bits (like shifts) we can use anyextend.
-    if (LHS.getValueType() != RHS.getValueType())
-      RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS);
+  return SDValue();
+}
 
-    SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS);
-    X86::CondCode Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B;
-    return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                       DAG.getConstant(Cond, dl, MVT::i8), BT);
-  }
+// Convert (truncate (srl X, N) to i1) to (bt X, N)
+static SDValue LowerTruncateToBT(SDValue Op, ISD::CondCode CC,
+                                 const SDLoc &dl, SelectionDAG &DAG) {
+
+  assert(Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1 &&
+         "Expected TRUNCATE to i1 node");
+
+  if (Op.getOperand(0).getOpcode() != ISD::SRL)
+    return SDValue();
+
+  SDValue ShiftRight = Op.getOperand(0);
+  return getBitTestCondition(ShiftRight.getOperand(0), ShiftRight.getOperand(1),
+                             CC, dl, DAG);
+}
 
+/// Result of 'and' or 'trunc to i1' is compared against zero.
+/// Change to a BT node if possible.
+SDValue X86TargetLowering::LowerToBT(SDValue Op, ISD::CondCode CC,
+                                     const SDLoc &dl, SelectionDAG &DAG) const {
+  if (Op.getOpcode() == ISD::AND)
+    return LowerAndToBT(Op, CC, dl, DAG);
+  if (Op.getOpcode() == ISD::TRUNCATE && Op.getValueType() == MVT::i1)
+    return LowerTruncateToBT(Op, CC, dl, DAG);
   return SDValue();
 }
 
@@ -15132,19 +16235,19 @@ static int translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0,
   case ISD::SETOEQ:
   case ISD::SETEQ:  SSECC = 0; break;
   case ISD::SETOGT:
-  case ISD::SETGT:  Swap = true; // Fallthrough
+  case ISD::SETGT:  Swap = true; LLVM_FALLTHROUGH;
   case ISD::SETLT:
   case ISD::SETOLT: SSECC = 1; break;
   case ISD::SETOGE:
-  case ISD::SETGE:  Swap = true; // Fallthrough
+  case ISD::SETGE:  Swap = true; LLVM_FALLTHROUGH;
   case ISD::SETLE:
   case ISD::SETOLE: SSECC = 2; break;
   case ISD::SETUO:  SSECC = 3; break;
   case ISD::SETUNE:
   case ISD::SETNE:  SSECC = 4; break;
-  case ISD::SETULE: Swap = true; // Fallthrough
+  case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH;
   case ISD::SETUGE: SSECC = 5; break;
-  case ISD::SETULT: Swap = true; // Fallthrough
+  case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
   case ISD::SETUGT: SSECC = 6; break;
   case ISD::SETO:   SSECC = 7; break;
   case ISD::SETUEQ:
@@ -15250,12 +16353,12 @@ static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) {
   case ISD::SETNE:  SSECC = 4; break;
   case ISD::SETEQ:  Opc = X86ISD::PCMPEQM; break;
   case ISD::SETUGT: SSECC = 6; Unsigned = true; break;
-  case ISD::SETLT:  Swap = true; //fall-through
+  case ISD::SETLT:  Swap = true; LLVM_FALLTHROUGH;
   case ISD::SETGT:  Opc = X86ISD::PCMPGTM; break;
   case ISD::SETULT: SSECC = 1; Unsigned = true; break;
   case ISD::SETUGE: SSECC = 5; Unsigned = true; break; //NLT
   case ISD::SETGE:  Swap = true; SSECC = 2; break; // LE + swap
-  case ISD::SETULE: Unsigned = true; //fall-through
+  case ISD::SETULE: Unsigned = true; LLVM_FALLTHROUGH;
   case ISD::SETLE:  SSECC = 2; break;
   }
 
@@ -15414,7 +16517,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
     // In this case use SSE compare
     bool UseAVX512Inst =
       (OpVT.is512BitVector() ||
-       OpVT.getVectorElementType().getSizeInBits() >= 32 ||
+       OpVT.getScalarSizeInBits() >= 32 ||
        (Subtarget.hasBWI() && Subtarget.hasVLX()));
 
     if (UseAVX512Inst)
@@ -15638,15 +16741,12 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // Lower (X & (1 << N)) == 0 to BT(X, N).
   // Lower ((X >>u N) & 1) != 0 to BT(X, N).
   // Lower ((X >>s N) & 1) != 0 to BT(X, N).
-  if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() &&
-      isNullConstant(Op1) &&
+  // Lower (trunc (X >> N) to i1) to BT(X, N).
+  if (Op0.hasOneUse() && isNullConstant(Op1) &&
       (CC == ISD::SETEQ || CC == ISD::SETNE)) {
     if (SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG)) {
-      if (VT == MVT::i1) {
-        NewSetCC = DAG.getNode(ISD::AssertZext, dl, MVT::i8, NewSetCC,
-                               DAG.getValueType(MVT::i1));
+      if (VT == MVT::i1)
         return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewSetCC);
-      }
       return NewSetCC;
     }
   }
@@ -15665,14 +16765,9 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
         return Op0;
 
       CCode = X86::GetOppositeBranchCondition(CCode);
-      SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                  DAG.getConstant(CCode, dl, MVT::i8),
-                                  Op0.getOperand(1));
-      if (VT == MVT::i1) {
-        SetCC = DAG.getNode(ISD::AssertZext, dl, MVT::i8, SetCC,
-                            DAG.getValueType(MVT::i1));
+      SDValue SetCC = getSETCC(CCode, Op0.getOperand(1), dl, DAG);
+      if (VT == MVT::i1)
         return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
-      }
       return SetCC;
     }
   }
@@ -15687,20 +16782,16 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
-  bool isFP = Op1.getSimpleValueType().isFloatingPoint();
-  unsigned X86CC = TranslateX86CC(CC, dl, isFP, Op0, Op1, DAG);
+  bool IsFP = Op1.getSimpleValueType().isFloatingPoint();
+  X86::CondCode X86CC = TranslateX86CC(CC, dl, IsFP, Op0, Op1, DAG);
   if (X86CC == X86::COND_INVALID)
     return SDValue();
 
   SDValue EFLAGS = EmitCmp(Op0, Op1, X86CC, dl, DAG);
   EFLAGS = ConvertCmpIfNecessary(EFLAGS, DAG);
-  SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                              DAG.getConstant(X86CC, dl, MVT::i8), EFLAGS);
-  if (VT == MVT::i1) {
-    SetCC = DAG.getNode(ISD::AssertZext, dl, MVT::i8, SetCC,
-                        DAG.getValueType(MVT::i1));
+  SDValue SetCC = getSETCC(X86CC, EFLAGS, dl, DAG);
+  if (VT == MVT::i1)
     return DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, SetCC);
-  }
   return SetCC;
 }
 
@@ -15717,34 +16808,23 @@ SDValue X86TargetLowering::LowerSETCCE(SDValue Op, SelectionDAG &DAG) const {
   assert(Carry.getOpcode() != ISD::CARRY_FALSE);
   SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32);
   SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry);
-  SDValue SetCC = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                              DAG.getConstant(CC, DL, MVT::i8), Cmp.getValue(1));
-  if (Op.getSimpleValueType() == MVT::i1) {
-    SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
-                        DAG.getValueType(MVT::i1));
+  SDValue SetCC = getSETCC(CC, Cmp.getValue(1), DL, DAG);
+  if (Op.getSimpleValueType() == MVT::i1)
     return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
-  }
   return SetCC;
 }
 
 /// Return true if opcode is a X86 logical comparison.
 static bool isX86LogicalCmp(SDValue Op) {
-  unsigned Opc = Op.getNode()->getOpcode();
+  unsigned Opc = Op.getOpcode();
   if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI ||
       Opc == X86ISD::SAHF)
     return true;
   if (Op.getResNo() == 1 &&
-      (Opc == X86ISD::ADD ||
-       Opc == X86ISD::SUB ||
-       Opc == X86ISD::ADC ||
-       Opc == X86ISD::SBB ||
-       Opc == X86ISD::SMUL ||
-       Opc == X86ISD::UMUL ||
-       Opc == X86ISD::INC ||
-       Opc == X86ISD::DEC ||
-       Opc == X86ISD::OR ||
-       Opc == X86ISD::XOR ||
-       Opc == X86ISD::AND))
+      (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC ||
+       Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL ||
+       Opc == X86ISD::INC || Opc == X86ISD::DEC || Opc == X86ISD::OR ||
+       Opc == X86ISD::XOR || Opc == X86ISD::AND))
     return true;
 
   if (Op.getResNo() == 2 && Opc == X86ISD::UMUL)
@@ -15753,27 +16833,18 @@ static bool isX86LogicalCmp(SDValue Op) {
   return false;
 }
 
-/// Returns the "condition" node, that may be wrapped with "truncate".
-/// Like this: (i1 (trunc (i8 X86ISD::SETCC))).
-static SDValue getCondAfterTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
+static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) {
   if (V.getOpcode() != ISD::TRUNCATE)
-    return V;
+    return false;
 
   SDValue VOp0 = V.getOperand(0);
-  if (VOp0.getOpcode() == ISD::AssertZext &&
-      V.getValueSizeInBits() ==
-      cast<VTSDNode>(VOp0.getOperand(1))->getVT().getSizeInBits())
-    return VOp0.getOperand(0);
-
   unsigned InBits = VOp0.getValueSizeInBits();
   unsigned Bits = V.getValueSizeInBits();
-  if (DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)))
-    return V.getOperand(0);
-  return V;
+  return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits));
 }
 
 SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
-  bool addTest = true;
+  bool AddTest = true;
   SDValue Cond  = Op.getOperand(0);
   SDValue Op1 = Op.getOperand(1);
   SDValue Op2 = Op.getOperand(2);
@@ -15794,9 +16865,10 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
 
     if (SSECC != 8) {
       if (Subtarget.hasAVX512()) {
-        SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CondOp0, CondOp1,
-                                  DAG.getConstant(SSECC, DL, MVT::i8));
-        return DAG.getNode(X86ISD::SELECT, DL, VT, Cmp, Op1, Op2);
+        SDValue Cmp = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CondOp0,
+                                  CondOp1, DAG.getConstant(SSECC, DL, MVT::i8));
+        return DAG.getNode(VT.isVector() ? X86ISD::SELECT : X86ISD::SELECTS,
+                           DL, VT, Cmp, Op1, Op2);
       }
 
       SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1,
@@ -15840,6 +16912,11 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     }
   }
 
+  // AVX512 fallback is to lower selects of scalar floats to masked moves.
+  if (Cond.getValueType() == MVT::i1 && (VT == MVT::f64 || VT == MVT::f32) &&
+      Subtarget.hasAVX512())
+    return DAG.getNode(X86ISD::SELECTS, DL, VT, Cond, Op1, Op2);
+
   if (VT.isVector() && VT.getVectorElementType() == MVT::i1) {
     SDValue Op1Scalar;
     if (ISD::isBuildVectorOfConstantSDNodes(Op1.getNode()))
@@ -15953,7 +17030,7 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
     if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) ||
         Opc == X86ISD::BT) { // FIXME
       Cond = Cmp;
-      addTest = false;
+      AddTest = false;
     }
   } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO ||
              CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO ||
@@ -15987,12 +17064,13 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       Cond = X86Op.getValue(1);
 
     CC = DAG.getConstant(X86Cond, DL, MVT::i8);
-    addTest = false;
+    AddTest = false;
   }
 
-  if (addTest) {
+  if (AddTest) {
     // Look past the truncate if the high bits are known zero.
-    Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
+    if (isTruncWithZeroHighBitsInput(Cond, DAG))
+      Cond = Cond.getOperand(0);
 
     // We know the result of AND is compared against zero. Try to match
     // it to BT.
@@ -16000,12 +17078,12 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
       if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, DL, DAG)) {
         CC = NewSetCC.getOperand(0);
         Cond = NewSetCC.getOperand(1);
-        addTest = false;
+        AddTest = false;
       }
     }
   }
 
-  if (addTest) {
+  if (AddTest) {
     CC = DAG.getConstant(X86::COND_NE, DL, MVT::i8);
     Cond = EmitTest(Cond, X86::COND_NE, DL, DAG);
   }
@@ -16077,7 +17155,7 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
         VTElt.getSizeInBits() >= 32))))
     return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
 
-  unsigned int NumElts = VT.getVectorNumElements();
+  unsigned NumElts = VT.getVectorNumElements();
 
   if (NumElts != 8 && NumElts != 16 && !Subtarget.hasBWI())
     return SDValue();
@@ -16089,12 +17167,11 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
   }
 
   assert (InVT.getVectorElementType() == MVT::i1 && "Unexpected vector type");
-  MVT ExtVT = NumElts == 8 ? MVT::v8i64 : MVT::v16i32;
-  SDValue NegOne =
-   DAG.getConstant(APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl,
-                   ExtVT);
-  SDValue Zero =
-   DAG.getConstant(APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
+  MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(512/NumElts), NumElts);
+  SDValue NegOne = DAG.getConstant(
+      APInt::getAllOnesValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
+  SDValue Zero = DAG.getConstant(
+      APInt::getNullValue(ExtVT.getScalarSizeInBits()), dl, ExtVT);
 
   SDValue V = DAG.getNode(ISD::VSELECT, dl, ExtVT, In, NegOne, Zero);
   if (VT.is512BitVector())
@@ -16102,9 +17179,13 @@ static SDValue LowerSIGN_EXTEND_AVX512(SDValue Op,
   return DAG.getNode(X86ISD::VTRUNC, dl, VT, V);
 }
 
-static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
-                                             const X86Subtarget &Subtarget,
-                                             SelectionDAG &DAG) {
+// Lowering for SIGN_EXTEND_VECTOR_INREG and ZERO_EXTEND_VECTOR_INREG.
+// For sign extend this needs to handle all vector sizes and SSE4.1 and
+// non-SSE4.1 targets. For zero extend this should only handle inputs of
+// MVT::v64i8 when BWI is not supported, but AVX512 is.
+static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op,
+                                        const X86Subtarget &Subtarget,
+                                        SelectionDAG &DAG) {
   SDValue In = Op->getOperand(0);
   MVT VT = Op->getSimpleValueType(0);
   MVT InVT = In.getSimpleValueType();
@@ -16119,20 +17200,33 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
   if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8)
     return SDValue();
   if (!(VT.is128BitVector() && Subtarget.hasSSE2()) &&
-      !(VT.is256BitVector() && Subtarget.hasInt256()))
+      !(VT.is256BitVector() && Subtarget.hasInt256()) &&
+      !(VT.is512BitVector() && Subtarget.hasAVX512()))
     return SDValue();
 
   SDLoc dl(Op);
 
   // For 256-bit vectors, we only need the lower (128-bit) half of the input.
-  if (VT.is256BitVector())
-    In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl,
-                     MVT::getVectorVT(InSVT, InVT.getVectorNumElements() / 2),
-                     In, DAG.getIntPtrConstant(0, dl));
+  // For 512-bit vectors, we need 128-bits or 256-bits.
+  if (VT.getSizeInBits() > 128) {
+    // Input needs to be at least the same number of elements as output, and
+    // at least 128-bits.
+    int InSize = InSVT.getSizeInBits() * VT.getVectorNumElements();
+    In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128));
+  }
+
+  assert((Op.getOpcode() != ISD::ZERO_EXTEND_VECTOR_INREG ||
+          InVT == MVT::v64i8) && "Zero extend only for v64i8 input!");
 
   // SSE41 targets can use the pmovsx* instructions directly.
+  unsigned ExtOpc = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG ?
+                      X86ISD::VSEXT : X86ISD::VZEXT;
   if (Subtarget.hasSSE41())
-    return DAG.getNode(X86ISD::VSEXT, dl, VT, In);
+    return DAG.getNode(ExtOpc, dl, VT, In);
+
+  // We should only get here for sign extend.
+  assert(Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG &&
+         "Unexpected opcode!");
 
   // pre-SSE41 targets unpack lower lanes and then sign-extend using SRAI.
   SDValue Curr = In;
@@ -16150,7 +17244,7 @@ static SDValue LowerSIGN_EXTEND_VECTOR_INREG(SDValue Op,
   SDValue SignExt = Curr;
   if (CurrVT != InVT) {
     unsigned SignExtShift =
-        CurrVT.getVectorElementType().getSizeInBits() - InSVT.getSizeInBits();
+        CurrVT.getScalarSizeInBits() - InSVT.getSizeInBits();
     SignExt = DAG.getNode(X86ISD::VSRAI, dl, CurrVT, Curr,
                           DAG.getConstant(SignExtShift, dl, MVT::i8));
   }
@@ -16211,7 +17305,7 @@ static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget,
   SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, Undef, ShufMask2);
 
   MVT HalfVT = MVT::getVectorVT(VT.getVectorElementType(),
-                                VT.getVectorNumElements()/2);
+                                VT.getVectorNumElements() / 2);
 
   OpLo = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpLo);
   OpHi = DAG.getNode(X86ISD::VSEXT, dl, HalfVT, OpHi);
@@ -16643,7 +17737,7 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
       case X86::COND_B:
         // These can only come from an arithmetic instruction with overflow,
         // e.g. SADDO, UADDO.
-        Cond = Cond.getNode()->getOperand(1);
+        Cond = Cond.getOperand(1);
         addTest = false;
         break;
       }
@@ -16828,11 +17922,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
 
   if (addTest) {
     // Look pass the truncate if the high bits are known zero.
-    Cond = getCondAfterTruncWithZeroHighBitsInput(Cond, DAG);
+    if (isTruncWithZeroHighBitsInput(Cond, DAG))
+        Cond = Cond.getOperand(0);
 
-    // We know the result of AND is compared against zero. Try to match
-    // it to BT.
-    if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) {
+    // We know the result is compared against zero. Try to match it to BT.
+    if (Cond.hasOneUse()) {
       if (SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG)) {
         CC = NewSetCC.getOperand(0);
         Cond = NewSetCC.getOperand(1);
@@ -17000,7 +18094,7 @@ SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
 SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
   assert(Subtarget.is64Bit() &&
          "LowerVAARG only handles 64-bit va_arg!");
-  assert(Op.getNode()->getNumOperands() == 4);
+  assert(Op.getNumOperands() == 4);
 
   MachineFunction &MF = DAG.getMachineFunction();
   if (Subtarget.isCallingConvWin64(MF.getFunction()->getCallingConv()))
@@ -17290,7 +18384,7 @@ static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask,
   case X86ISD::VTRUNC:
   case X86ISD::VTRUNCS:
   case X86ISD::VTRUNCUS:
-  case ISD::FP_TO_FP16:
+  case X86ISD::CVTPS2PH:
     // We can't use ISD::VSELECT here because it is not always "Legal"
     // for the destination type. For example vpmovqb require only AVX512
     // and vselect that can operate on byte element type require BWI
@@ -17321,7 +18415,8 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
   // The mask should be of type MVT::i1
   SDValue IMask = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Mask);
 
-  if (Op.getOpcode() == X86ISD::FSETCC)
+  if (Op.getOpcode() == X86ISD::FSETCCM ||
+      Op.getOpcode() == X86ISD::FSETCCM_RND)
     return DAG.getNode(ISD::AND, dl, VT, Op, IMask);
   if (Op.getOpcode() == X86ISD::VFPCLASS ||
       Op.getOpcode() == X86ISD::VFPCLASSS)
@@ -17329,7 +18424,7 @@ static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask,
 
   if (PreservedSrc.isUndef())
     PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl);
-  return DAG.getNode(X86ISD::SELECT, dl, VT, IMask, Op, PreservedSrc);
+  return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc);
 }
 
 static int getSEHRegistrationNodeSize(const Function *Fn) {
@@ -17395,6 +18490,15 @@ static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn,
 
 static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                        SelectionDAG &DAG) {
+  // Helper to detect if the operand is CUR_DIRECTION rounding mode.
+  auto isRoundModeCurDirection = [](SDValue Rnd) {
+    if (!isa<ConstantSDNode>(Rnd))
+      return false;
+
+    unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
+    return Round == X86::STATIC_ROUNDING::CUR_DIRECTION;
+  };
+
   SDLoc dl(Op);
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   MVT VT = Op.getSimpleValueType();
@@ -17406,9 +18510,6 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
     case INTR_TYPE_2OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2));
-    case INTR_TYPE_2OP_IMM8:
-      return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
-                         DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(2)));
     case INTR_TYPE_3OP:
       return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Op.getOperand(1),
         Op.getOperand(2), Op.getOperand(3));
@@ -17420,7 +18521,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       SDValue PassThru = Op.getOperand(2);
       SDValue Mask = Op.getOperand(3);
       SDValue RoundingMode;
-      // We allways add rounding mode to the Node.
+      // We always add rounding mode to the Node.
       // If the rounding mode is not specified, we add the
       // "current direction" mode.
       if (Op.getNumOperands() == 4)
@@ -17428,13 +18529,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
           DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
       else
         RoundingMode = Op.getOperand(4);
-      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
-      if (IntrWithRoundingModeOpcode != 0)
-        if (cast<ConstantSDNode>(RoundingMode)->getZExtValue() !=
-            X86::STATIC_ROUNDING::CUR_DIRECTION)
-          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
-                                      dl, Op.getValueType(), Src, RoundingMode),
-                                      Mask, PassThru, Subtarget, DAG);
+      assert(IntrData->Opc1 == 0 && "Unexpected second opcode!");
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
                                               RoundingMode),
                                   Mask, PassThru, Subtarget, DAG);
@@ -17449,8 +18544,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(4);
-        unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
-        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+        if (!isRoundModeCurDirection(Rnd)) {
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                       dl, Op.getValueType(),
                                       Src, Rnd),
@@ -17478,8 +18572,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       // (2) With rounding mode and sae - 7 operands.
       if (Op.getNumOperands() == 6) {
         SDValue Sae  = Op.getOperand(5);
-        unsigned Opc = IntrData->Opc1 ? IntrData->Opc1 : IntrData->Opc0;
-        return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2,
+        return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2,
                                                 Sae),
                                     Mask, Src0, Subtarget, DAG);
       }
@@ -17506,8 +18599,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(5);
-        unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
-        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+        if (!isRoundModeCurDirection(Rnd)) {
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                       dl, Op.getValueType(),
                                       Src1, Src2, Rnd),
@@ -17564,8 +18656,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       else
         Rnd = DAG.getConstant(X86::STATIC_ROUNDING::CUR_DIRECTION, dl, MVT::i32);
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT,
-        Src1, Src2, Imm, Rnd),
-        Mask, PassThru, Subtarget, DAG);
+                                              Src1, Src2, Imm, Rnd),
+                                  Mask, PassThru, Subtarget, DAG);
     }
     case INTR_TYPE_3OP_IMM8_MASK:
     case INTR_TYPE_3OP_MASK:
@@ -17592,8 +18684,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(6);
-        unsigned Round = cast<ConstantSDNode>(Rnd)->getZExtValue();
-        if (Round != X86::STATIC_ROUNDING::CUR_DIRECTION) {
+        if (!isRoundModeCurDirection(Rnd)) {
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                       dl, Op.getValueType(),
                                       Src1, Src2, Src3, Rnd),
@@ -17616,19 +18707,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
     }
     case VPERM_3OP_MASKZ:
     case VPERM_3OP_MASK:{
+      MVT VT = Op.getSimpleValueType();
       // Src2 is the PassThru
       SDValue Src1 = Op.getOperand(1);
-      SDValue Src2 = Op.getOperand(2);
+      // PassThru needs to be the same type as the destination in order
+      // to pattern match correctly.
+      SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));
       SDValue Src3 = Op.getOperand(3);
       SDValue Mask = Op.getOperand(4);
-      MVT VT = Op.getSimpleValueType();
       SDValue PassThru = SDValue();
 
       // set PassThru element
       if (IntrData->Type == VPERM_3OP_MASKZ)
         PassThru = getZeroVector(VT, Subtarget, DAG, dl);
       else
-        PassThru = DAG.getBitcast(VT, Src2);
+        PassThru = Src2;
 
       // Swap Src1 and Src2 in the node creation
       return getVectorMaskingNode(DAG.getNode(IntrData->Opc0,
@@ -17660,8 +18753,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
       if (IntrWithRoundingModeOpcode != 0) {
         SDValue Rnd = Op.getOperand(5);
-        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
-            X86::STATIC_ROUNDING::CUR_DIRECTION)
+        if (!isRoundModeCurDirection(Rnd))
           return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
                                                   dl, Op.getValueType(),
                                                   Src1, Src2, Src3, Rnd),
@@ -17713,6 +18805,35 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
                                               Src1, Src2, Src3, Src4),
                                   Mask, PassThru, Subtarget, DAG);
     }
+    case CVTPD2PS:
+      // ISD::FP_ROUND has a second argument that indicates if the truncation
+      // does not change the value. Set it to 0 since it can change.
+      return DAG.getNode(IntrData->Opc0, dl, VT, Op.getOperand(1),
+                         DAG.getIntPtrConstant(0, dl));
+    case CVTPD2PS_MASK: {
+      SDValue Src = Op.getOperand(1);
+      SDValue PassThru = Op.getOperand(2);
+      SDValue Mask = Op.getOperand(3);
+      // We add rounding mode to the Node when
+      //   - RM Opcode is specified and
+      //   - RM is not "current direction".
+      unsigned IntrWithRoundingModeOpcode = IntrData->Opc1;
+      if (IntrWithRoundingModeOpcode != 0) {
+        SDValue Rnd = Op.getOperand(4);
+        if (!isRoundModeCurDirection(Rnd)) {
+          return getVectorMaskingNode(DAG.getNode(IntrWithRoundingModeOpcode,
+                                      dl, Op.getValueType(),
+                                      Src, Rnd),
+                                      Mask, PassThru, Subtarget, DAG);
+        }
+      }
+      assert(IntrData->Opc0 == ISD::FP_ROUND && "Unexpected opcode!");
+      // ISD::FP_ROUND has a second argument that indicates if the truncation
+      // does not change the value. Set it to 0 since it can change.
+      return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src,
+                                              DAG.getIntPtrConstant(0, dl)),
+                                  Mask, PassThru, Subtarget, DAG);
+    }
     case FPCLASS: {
       // FPclass intrinsics with mask
        SDValue Src1 = Op.getOperand(1);
@@ -17738,7 +18859,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::i1, Src1, Imm);
       SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask,
         DAG.getTargetConstant(0, dl, MVT::i1), Subtarget, DAG);
-      return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i8, FPclassMask);
+      return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, FPclassMask);
     }
     case CMP_MASK:
     case CMP_MASK_CC: {
@@ -17765,8 +18886,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
         // (IntrData->Opc1 != 0), then we check the rounding mode operand.
         if (IntrData->Opc1 != 0) {
           SDValue Rnd = Op.getOperand(5);
-          if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
-              X86::STATIC_ROUNDING::CUR_DIRECTION)
+          if (!isRoundModeCurDirection(Rnd))
             Cmp = DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1),
                               Op.getOperand(2), CC, Rnd);
         }
@@ -17798,8 +18918,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       SDValue Cmp;
       if (IntrData->Opc1 != 0) {
         SDValue Rnd = Op.getOperand(5);
-        if (cast<ConstantSDNode>(Rnd)->getZExtValue() !=
-            X86::STATIC_ROUNDING::CUR_DIRECTION)
+        if (!isRoundModeCurDirection(Rnd))
           Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::i1, Src1, Src2, CC, Rnd);
       }
       //default rounding mode
@@ -17822,39 +18941,29 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       SDValue SetCC;
       switch (CC) {
       case ISD::SETEQ: { // (ZF = 0 and PF = 0)
-        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                            DAG.getConstant(X86::COND_E, dl, MVT::i8), Comi);
-        SDValue SetNP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                    DAG.getConstant(X86::COND_NP, dl, MVT::i8),
-                                    Comi);
+        SetCC = getSETCC(X86::COND_E, Comi, dl, DAG);
+        SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG);
         SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP);
         break;
       }
       case ISD::SETNE: { // (ZF = 1 or PF = 1)
-        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                            DAG.getConstant(X86::COND_NE, dl, MVT::i8), Comi);
-        SDValue SetP = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                   DAG.getConstant(X86::COND_P, dl, MVT::i8),
-                                   Comi);
+        SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG);
+        SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG);
         SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP);
         break;
       }
       case ISD::SETGT: // (CF = 0 and ZF = 0)
-        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                            DAG.getConstant(X86::COND_A, dl, MVT::i8), Comi);
+        SetCC = getSETCC(X86::COND_A, Comi, dl, DAG);
         break;
       case ISD::SETLT: { // The condition is opposite to GT. Swap the operands.
-        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                            DAG.getConstant(X86::COND_A, dl, MVT::i8), InvComi);
+        SetCC = getSETCC(X86::COND_A, InvComi, dl, DAG);
         break;
       }
       case ISD::SETGE: // CF = 0
-        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                            DAG.getConstant(X86::COND_AE, dl, MVT::i8), Comi);
+        SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG);
         break;
       case ISD::SETLE: // The condition is opposite to GE. Swap the operands.
-        SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                            DAG.getConstant(X86::COND_AE, dl, MVT::i8), InvComi);
+        SetCC = getSETCC(X86::COND_AE, InvComi, dl, DAG);
         break;
       default:
         llvm_unreachable("Unexpected illegal condition!");
@@ -17868,12 +18977,11 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
       SDValue Sae = Op.getOperand(4);
 
       SDValue FCmp;
-      if (cast<ConstantSDNode>(Sae)->getZExtValue() ==
-          X86::STATIC_ROUNDING::CUR_DIRECTION)
-        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+      if (isRoundModeCurDirection(Sae))
+        FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::i1, LHS, RHS,
                                   DAG.getConstant(CondVal, dl, MVT::i8));
       else
-        FCmp = DAG.getNode(X86ISD::FSETCC, dl, MVT::i1, LHS, RHS,
+        FCmp = DAG.getNode(X86ISD::FSETCCM_RND, dl, MVT::i1, LHS, RHS,
                                   DAG.getConstant(CondVal, dl, MVT::i8), Sae);
       // AnyExt just uses KMOVW %kreg, %r32; ZeroExt emits "and $1, %reg"
       return DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, FCmp);
@@ -18027,14 +19135,15 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
   case Intrinsic::x86_avx_vtestc_pd_256:
   case Intrinsic::x86_avx_vtestnzc_pd_256: {
     bool IsTestPacked = false;
-    unsigned X86CC;
+    X86::CondCode X86CC;
     switch (IntNo) {
     default: llvm_unreachable("Bad fallthrough in Intrinsic lowering.");
     case Intrinsic::x86_avx_vtestz_ps:
     case Intrinsic::x86_avx_vtestz_pd:
     case Intrinsic::x86_avx_vtestz_ps_256:
     case Intrinsic::x86_avx_vtestz_pd_256:
-      IsTestPacked = true; // Fallthrough
+      IsTestPacked = true;
+      LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestz:
     case Intrinsic::x86_avx_ptestz_256:
       // ZF = 1
@@ -18044,7 +19153,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
     case Intrinsic::x86_avx_vtestc_pd:
     case Intrinsic::x86_avx_vtestc_ps_256:
     case Intrinsic::x86_avx_vtestc_pd_256:
-      IsTestPacked = true; // Fallthrough
+      IsTestPacked = true;
+      LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestc:
     case Intrinsic::x86_avx_ptestc_256:
       // CF = 1
@@ -18054,7 +19164,8 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
     case Intrinsic::x86_avx_vtestnzc_pd:
     case Intrinsic::x86_avx_vtestnzc_ps_256:
     case Intrinsic::x86_avx_vtestnzc_pd_256:
-      IsTestPacked = true; // Fallthrough
+      IsTestPacked = true;
+      LLVM_FALLTHROUGH;
     case Intrinsic::x86_sse41_ptestnzc:
     case Intrinsic::x86_avx_ptestnzc_256:
       // ZF and CF = 0
@@ -18066,18 +19177,17 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
     SDValue RHS = Op.getOperand(2);
     unsigned TestOpc = IsTestPacked ? X86ISD::TESTP : X86ISD::PTEST;
     SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS);
-    SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
-    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+    SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
   case Intrinsic::x86_avx512_kortestz_w:
   case Intrinsic::x86_avx512_kortestc_w: {
-    unsigned X86CC = (IntNo == Intrinsic::x86_avx512_kortestz_w)? X86::COND_E: X86::COND_B;
+    X86::CondCode X86CC =
+        (IntNo == Intrinsic::x86_avx512_kortestz_w) ? X86::COND_E : X86::COND_B;
     SDValue LHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(1));
     SDValue RHS = DAG.getBitcast(MVT::v16i1, Op.getOperand(2));
-    SDValue CC = DAG.getConstant(X86CC, dl, MVT::i8);
     SDValue Test = DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS);
-    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, CC, Test);
+    SDValue SetCC = getSETCC(X86CC, Test, dl, DAG);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
 
@@ -18092,7 +19202,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
   case Intrinsic::x86_sse42_pcmpistriz128:
   case Intrinsic::x86_sse42_pcmpestriz128: {
     unsigned Opcode;
-    unsigned X86CC;
+    X86::CondCode X86CC;
     switch (IntNo) {
     default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
     case Intrinsic::x86_sse42_pcmpistria128:
@@ -18139,9 +19249,7 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget
     SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end());
     SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
     SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps);
-    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                DAG.getConstant(X86CC, dl, MVT::i8),
-                                SDValue(PCMP.getNode(), 1));
+    SDValue SetCC = getSETCC(X86CC, SDValue(PCMP.getNode(), 1), dl, DAG);
     return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC);
   }
 
@@ -18267,6 +19375,51 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG,
   return SDValue(Res, 0);
 }
 
+/// Handles the lowering of builtin intrinsic that return the value
+/// of the extended control register.
+static void getExtendedControlRegister(SDNode *N, const SDLoc &DL,
+                                       SelectionDAG &DAG,
+                                       const X86Subtarget &Subtarget,
+                                       SmallVectorImpl<SDValue> &Results) {
+  assert(N->getNumOperands() == 3 && "Unexpected number of operands!");
+  SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue);
+  SDValue LO, HI;
+
+  // The ECX register is used to select the index of the XCR register to
+  // return.
+  SDValue Chain =
+      DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, N->getOperand(2));
+  SDNode *N1 = DAG.getMachineNode(X86::XGETBV, DL, Tys, Chain);
+  Chain = SDValue(N1, 0);
+
+  // Reads the content of XCR and returns it in registers EDX:EAX.
+  if (Subtarget.is64Bit()) {
+    LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1));
+    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64,
+                            LO.getValue(2));
+  } else {
+    LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1));
+    HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32,
+                            LO.getValue(2));
+  }
+  Chain = HI.getValue(1);
+
+  if (Subtarget.is64Bit()) {
+    // Merge the two 32-bit values into a 64-bit one..
+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI,
+                              DAG.getConstant(32, DL, MVT::i8));
+    Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp));
+    Results.push_back(Chain);
+    return;
+  }
+
+  // Use a buildpair to merge the two 32-bit values into a 64-bit one.
+  SDValue Ops[] = { LO, HI };
+  SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops);
+  Results.push_back(Pair);
+  Results.push_back(Chain);
+}
+
 /// Handles the lowering of builtin intrinsics that read performance monitor
 /// counters (x86_rdpmc).
 static void getReadPerformanceCounter(SDNode *N, const SDLoc &DL,
@@ -18413,6 +19566,33 @@ static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) {
   return Chain;
 }
 
+/// Emit Truncating Store with signed or unsigned saturation.
+static SDValue
+EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val,
+                SDValue Ptr, EVT MemVT, MachineMemOperand *MMO,
+                SelectionDAG &DAG) {
+
+  SDVTList VTs = DAG.getVTList(MVT::Other);
+  SDValue Undef = DAG.getUNDEF(Ptr.getValueType());
+  SDValue Ops[] = { Chain, Val, Ptr, Undef };
+  return SignedSat ?
+    DAG.getTargetMemSDNode<TruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
+    DAG.getTargetMemSDNode<TruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+}
+
+/// Emit Masked Truncating Store with signed or unsigned saturation.
+static SDValue
+EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl,
+                      SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT,
+                      MachineMemOperand *MMO, SelectionDAG &DAG) {
+
+  SDVTList VTs = DAG.getVTList(MVT::Other);
+  SDValue Ops[] = { Chain, Ptr, Mask, Val };
+  return SignedSat ?
+    DAG.getTargetMemSDNode<MaskedTruncSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO) :
+    DAG.getTargetMemSDNode<MaskedTruncUSStoreSDNode>(VTs, Ops, Dl, MemVT, MMO);
+}
+
 static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                                       SelectionDAG &DAG) {
   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
@@ -18429,8 +19609,8 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
         IntNo == llvm::Intrinsic::x86_flags_write_u64) {
       // We need a frame pointer because this will get lowered to a PUSH/POP
       // sequence.
-      MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-      MFI->setHasCopyImplyingStackAdjustment(true);
+      MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+      MFI.setHasCopyImplyingStackAdjustment(true);
       // Don't do anything here, we will expand these intrinsics out later
       // during ExpandISelPseudos in EmitInstrWithCustomInserter.
       return SDValue();
@@ -18509,13 +19689,18 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
     getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results);
     return DAG.getMergeValues(Results, dl);
   }
+  // Get Extended Control Register.
+  case XGETBV: {
+    SmallVector<SDValue, 2> Results;
+    getExtendedControlRegister(Op.getNode(), dl, DAG, Subtarget, Results);
+    return DAG.getMergeValues(Results, dl);
+  }
   // XTEST intrinsics.
   case XTEST: {
     SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other);
     SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0));
-    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                DAG.getConstant(X86::COND_NE, dl, MVT::i8),
-                                InTrans);
+
+    SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG);
     SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC);
     return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(),
                        Ret, SDValue(InTrans.getNode(), 1));
@@ -18530,9 +19715,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
                               Op.getOperand(4), GenCF.getValue(1));
     SDValue Store = DAG.getStore(Op.getOperand(0), dl, Res.getValue(0),
                                  Op.getOperand(5), MachinePointerInfo());
-    SDValue SetCC = DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                                DAG.getConstant(X86::COND_B, dl, MVT::i8),
-                                Res.getValue(1));
+    SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG);
     SDValue Results[] = { SetCC, Store };
     return DAG.getMergeValues(Results, dl);
   }
@@ -18550,11 +19733,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
       return DAG.getStore(Chain, dl, DataToCompress, Addr,
                           MemIntr->getMemOperand());
 
-    SDValue Compressed =
-      getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress),
-                           Mask, DAG.getUNDEF(VT), Subtarget, DAG);
-    return DAG.getStore(Chain, dl, Compressed, Addr,
-                        MemIntr->getMemOperand());
+    MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+    SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+    return DAG.getMaskedStore(Chain, dl, DataToCompress, Addr, VMask, VT,
+                              MemIntr->getMemOperand(),
+                              false /* truncating */, true /* compressing */);
   }
   case TRUNCATE_TO_MEM_VI8:
   case TRUNCATE_TO_MEM_VI16:
@@ -18567,18 +19751,39 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
     MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
     assert(MemIntr && "Expected MemIntrinsicSDNode!");
 
-    EVT VT  = MemIntr->getMemoryVT();
+    EVT MemVT  = MemIntr->getMemoryVT();
 
-    if (isAllOnesConstant(Mask)) // return just a truncate store
-      return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, VT,
-                               MemIntr->getMemOperand());
+    uint16_t TruncationOp = IntrData->Opc0;
+    switch (TruncationOp) {
+    case X86ISD::VTRUNC: {
+      if (isAllOnesConstant(Mask)) // return just a truncate store
+        return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT,
+                                 MemIntr->getMemOperand());
 
-    MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
-    SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
+      SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+      return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, MemVT,
+                                MemIntr->getMemOperand(), true /* truncating */);
+    }
+    case X86ISD::VTRUNCUS:
+    case X86ISD::VTRUNCS: {
+      bool IsSigned = (TruncationOp == X86ISD::VTRUNCS);
+      if (isAllOnesConstant(Mask))
+        return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT,
+                               MemIntr->getMemOperand(), DAG);
 
-    return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, VMask, VT,
-                              MemIntr->getMemOperand(), true);
+      MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements());
+      SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+
+      return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr,
+                                   VMask, MemVT, MemIntr->getMemOperand(), DAG);
+    }
+    default:
+      llvm_unreachable("Unsupported truncstore intrinsic");
+    }
   }
+
   case EXPAND_FROM_MEM: {
     SDValue Mask = Op.getOperand(4);
     SDValue PassThru = Op.getOperand(3);
@@ -18589,24 +19794,24 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget,
     MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
     assert(MemIntr && "Expected MemIntrinsicSDNode!");
 
-    SDValue DataToExpand = DAG.getLoad(VT, dl, Chain, Addr,
-                                       MemIntr->getMemOperand());
-
-    if (isAllOnesConstant(Mask)) // return just a load
-      return DataToExpand;
+    if (isAllOnesConstant(Mask)) // Return a regular (unmasked) vector load.
+      return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
+    if (X86::isZeroNode(Mask))
+      return DAG.getUNDEF(VT);
 
-    SDValue Results[] = {
-      getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, DataToExpand),
-                           Mask, PassThru, Subtarget, DAG), Chain};
-    return DAG.getMergeValues(Results, dl);
+    MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+    SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+    return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
+                             MemIntr->getMemOperand(), ISD::NON_EXTLOAD,
+                             true /* expanding */);
   }
   }
 }
 
 SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
                                            SelectionDAG &DAG) const {
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  MFI->setReturnAddressIsTaken(true);
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  MFI.setReturnAddressIsTaken(true);
 
   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
     return SDValue();
@@ -18630,14 +19835,20 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op,
                      MachinePointerInfo());
 }
 
+SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
+                                                 SelectionDAG &DAG) const {
+  DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true);
+  return getReturnAddressFrameIndex(DAG);
+}
+
 SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
   const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
   EVT VT = Op.getValueType();
 
-  MFI->setFrameAddressIsTaken(true);
+  MFI.setFrameAddressIsTaken(true);
 
   if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
     // Depth > 0 makes no sense on targets which use Windows unwind codes.  It
@@ -18647,7 +19858,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
     if (!FrameAddrIndex) {
       // Set up a frame object for the return address.
       unsigned SlotSize = RegInfo->getSlotSize();
-      FrameAddrIndex = MF.getFrameInfo()->CreateFixedObject(
+      FrameAddrIndex = MF.getFrameInfo().CreateFixedObject(
           SlotSize, /*Offset=*/0, /*IsImmutable=*/false);
       FuncInfo->setFAIndex(FrameAddrIndex);
     }
@@ -18965,7 +20176,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op,
   SDLoc DL(Op);
 
   // Save FP Control Word to stack slot
-  int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
+  int SSFI = MF.getFrameInfo().CreateStackObject(2, StackAlignment, false);
   SDValue StackSlot =
       DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout()));
 
@@ -19083,7 +20294,7 @@ static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL,
   SmallVector<SDValue, 64> LUTVec;
   for (int i = 0; i < NumBytes; ++i)
     LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8));
-  SDValue InRegLUT = DAG.getNode(ISD::BUILD_VECTOR, DL, CurrVT, LUTVec);
+  SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec);
 
   // Begin by bitcasting the input to byte vector, then split those bytes
   // into lo/hi nibbles and use the PSHUFB LUT to perform CLTZ on each of them.
@@ -19444,43 +20655,63 @@ static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget,
   assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) &&
          "Only know how to lower V2I64/V4I64/V8I64 multiply");
 
+  // 32-bit vector types used for MULDQ/MULUDQ.
+  MVT MulVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32);
+
+  // MULDQ returns the 64-bit result of the signed multiplication of the lower
+  // 32-bits. We can lower with this if the sign bits stretch that far.
+  if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(A) > 32 &&
+      DAG.ComputeNumSignBits(B) > 32) {
+    return DAG.getNode(X86ISD::PMULDQ, dl, VT, DAG.getBitcast(MulVT, A),
+                       DAG.getBitcast(MulVT, B));
+  }
+
   //  Ahi = psrlqi(a, 32);
   //  Bhi = psrlqi(b, 32);
   //
   //  AloBlo = pmuludq(a, b);
   //  AloBhi = pmuludq(a, Bhi);
   //  AhiBlo = pmuludq(Ahi, b);
+  //
+  //  Hi = psllqi(AloBhi + AhiBlo, 32);
+  //  return AloBlo + Hi;
+  APInt LowerBitsMask = APInt::getLowBitsSet(64, 32);
+  bool ALoIsZero = DAG.MaskedValueIsZero(A, LowerBitsMask);
+  bool BLoIsZero = DAG.MaskedValueIsZero(B, LowerBitsMask);
 
-  //  AloBhi = psllqi(AloBhi, 32);
-  //  AhiBlo = psllqi(AhiBlo, 32);
-  //  return AloBlo + AloBhi + AhiBlo;
+  APInt UpperBitsMask = APInt::getHighBitsSet(64, 32);
+  bool AHiIsZero = DAG.MaskedValueIsZero(A, UpperBitsMask);
+  bool BHiIsZero = DAG.MaskedValueIsZero(B, UpperBitsMask);
 
-  SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
-  SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
+  // Bit cast to 32-bit vectors for MULUDQ.
+  SDValue Alo = DAG.getBitcast(MulVT, A);
+  SDValue Blo = DAG.getBitcast(MulVT, B);
 
-  SDValue AhiBlo = Ahi;
-  SDValue AloBhi = Bhi;
-  // Bit cast to 32-bit vectors for MULUDQ
-  MVT MulVT = (VT == MVT::v2i64) ? MVT::v4i32 :
-                                  (VT == MVT::v4i64) ? MVT::v8i32 : MVT::v16i32;
-  A = DAG.getBitcast(MulVT, A);
-  B = DAG.getBitcast(MulVT, B);
-  Ahi = DAG.getBitcast(MulVT, Ahi);
-  Bhi = DAG.getBitcast(MulVT, Bhi);
+  SDValue Zero = getZeroVector(VT, Subtarget, DAG, dl);
 
-  SDValue AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B);
-  // After shifting right const values the result may be all-zero.
-  if (!ISD::isBuildVectorAllZeros(Ahi.getNode())) {
-    AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B);
-    AhiBlo = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AhiBlo, 32, DAG);
+  // Only multiply lo/hi halves that aren't known to be zero.
+  SDValue AloBlo = Zero;
+  if (!ALoIsZero && !BLoIsZero)
+    AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Blo);
+
+  SDValue AloBhi = Zero;
+  if (!ALoIsZero && !BHiIsZero) {
+    SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG);
+    Bhi = DAG.getBitcast(MulVT, Bhi);
+    AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Alo, Bhi);
   }
-  if (!ISD::isBuildVectorAllZeros(Bhi.getNode())) {
-    AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi);
-    AloBhi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, AloBhi, 32, DAG);
+
+  SDValue AhiBlo = Zero;
+  if (!AHiIsZero && !BLoIsZero) {
+    SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG);
+    Ahi = DAG.getBitcast(MulVT, Ahi);
+    AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, Blo);
   }
 
-  SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi);
-  return DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo);
+  SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo);
+  Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG);
+
+  return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi);
 }
 
 static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget,
@@ -19905,7 +21136,8 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG,
 
   // Special case in 32-bit mode, where i64 is expanded into high and low parts.
   if (!Subtarget.is64Bit() && !Subtarget.hasXOP() &&
-      (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64))) {
+      (VT == MVT::v2i64 || (Subtarget.hasInt256() && VT == MVT::v4i64) ||
+       (Subtarget.hasAVX512() && VT == MVT::v8i64))) {
 
     // Peek through any splat that was introduced for i64 shift vectorization.
     int SplatIndex = -1;
@@ -20147,7 +21379,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
   }
 
   // If possible, lower this shift as a sequence of two shifts by
-  // constant plus a MOVSS/MOVSD instead of scalarizing it.
+  // constant plus a MOVSS/MOVSD/PBLEND instead of scalarizing it.
   // Example:
   //   (v4i32 (srl A, (build_vector < X, Y, Y, Y>)))
   //
@@ -20167,7 +21399,7 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
     SDValue Amt2 = (VT == MVT::v4i32) ? Amt->getOperand(1) : Amt->getOperand(2);
 
     // See if it is possible to replace this node with a sequence of
-    // two shifts followed by a MOVSS/MOVSD
+    // two shifts followed by a MOVSS/MOVSD/PBLEND.
     if (VT == MVT::v4i32) {
       // Check if it is legal to use a MOVSS.
       CanBeSimplified = Amt2 == Amt->getOperand(2) &&
@@ -20199,21 +21431,21 @@ static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget,
 
     if (CanBeSimplified && isa<ConstantSDNode>(Amt1) &&
         isa<ConstantSDNode>(Amt2)) {
-      // Replace this node with two shifts followed by a MOVSS/MOVSD.
+      // Replace this node with two shifts followed by a MOVSS/MOVSD/PBLEND.
       MVT CastVT = MVT::v4i32;
       SDValue Splat1 =
-        DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
+          DAG.getConstant(cast<ConstantSDNode>(Amt1)->getAPIntValue(), dl, VT);
       SDValue Shift1 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat1);
       SDValue Splat2 =
-        DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
+          DAG.getConstant(cast<ConstantSDNode>(Amt2)->getAPIntValue(), dl, VT);
       SDValue Shift2 = DAG.getNode(Op->getOpcode(), dl, VT, R, Splat2);
-      if (TargetOpcode == X86ISD::MOVSD)
-        CastVT = MVT::v2i64;
       SDValue BitCast1 = DAG.getBitcast(CastVT, Shift1);
       SDValue BitCast2 = DAG.getBitcast(CastVT, Shift2);
-      SDValue Result = getTargetShuffleNode(TargetOpcode, dl, CastVT, BitCast2,
-                                            BitCast1, DAG);
-      return DAG.getBitcast(VT, Result);
+      if (TargetOpcode == X86ISD::MOVSD)
+        return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1,
+                                                       BitCast2, {0, 1, 6, 7}));
+      return DAG.getBitcast(VT, DAG.getVectorShuffle(CastVT, dl, BitCast1,
+                                                     BitCast2, {0, 5, 6, 7}));
     }
   }
 
@@ -20519,7 +21751,7 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
   unsigned BaseOp = 0;
-  unsigned Cond = 0;
+  X86::CondCode Cond;
   SDLoc DL(Op);
   switch (Op.getOpcode()) {
   default: llvm_unreachable("Unknown ovf instruction!");
@@ -20567,16 +21799,11 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
                                  MVT::i32);
     SDValue Sum = DAG.getNode(X86ISD::UMUL, DL, VTs, LHS, RHS);
 
-    SDValue SetCC =
-      DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                  DAG.getConstant(X86::COND_O, DL, MVT::i32),
-                  SDValue(Sum.getNode(), 2));
+    SDValue SetCC = getSETCC(X86::COND_O, SDValue(Sum.getNode(), 2), DL, DAG);
 
-    if (N->getValueType(1) == MVT::i1) {
-      SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
-                          DAG.getValueType(MVT::i1));
+    if (N->getValueType(1) == MVT::i1)
       SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
-    }
+
     return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
   }
   }
@@ -20585,16 +21812,11 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::i32);
   SDValue Sum = DAG.getNode(BaseOp, DL, VTs, LHS, RHS);
 
-  SDValue SetCC =
-    DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                DAG.getConstant(Cond, DL, MVT::i32),
-                SDValue(Sum.getNode(), 1));
+  SDValue SetCC = getSETCC(Cond, SDValue(Sum.getNode(), 1), DL, DAG);
 
-  if (N->getValueType(1) == MVT::i1) {
-    SetCC = DAG.getNode(ISD::AssertZext, DL, MVT::i8, SetCC,
-                        DAG.getValueType(MVT::i1));
+  if (N->getValueType(1) == MVT::i1)
     SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC);
-  }
+
   return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC);
 }
 
@@ -20790,9 +22012,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget,
     DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1));
   SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS,
                                       MVT::i32, cpOut.getValue(2));
-  SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1),
-                                DAG.getConstant(X86::COND_E, DL, MVT::i8),
-                                EFLAGS);
+  SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG);
 
   DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut);
   DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success);
@@ -20898,8 +22118,9 @@ static SDValue LowerHorizontalByteSum(SDValue V, MVT VT,
     // two v2i64 vectors which concatenated are the 4 population counts. We can
     // then use PACKUSWB to shrink and concatenate them into a v4i32 again.
     SDValue Zeros = getZeroVector(VT, Subtarget, DAG, DL);
-    SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V, Zeros);
-    SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V, Zeros);
+    SDValue V32 = DAG.getBitcast(VT, V);
+    SDValue Low = DAG.getNode(X86ISD::UNPCKL, DL, VT, V32, Zeros);
+    SDValue High = DAG.getNode(X86ISD::UNPCKH, DL, VT, V32, Zeros);
 
     // Do the horizontal sums into two v2i64s.
     Zeros = getZeroVector(ByteVecVT, Subtarget, DAG, DL);
@@ -21054,6 +22275,8 @@ static SDValue LowerVectorCTPOPBitmath(SDValue Op, const SDLoc &DL,
       DAG);
 }
 
+// Please ensure that any codegen change from LowerVectorCTPOP is reflected in
+// updated cost models in X86TTIImpl::getIntrinsicInstrCost.
 static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget,
                                 SelectionDAG &DAG) {
   MVT VT = Op.getSimpleValueType();
@@ -21260,8 +22483,7 @@ static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG,
       AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode());
       RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS);
       return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS,
-                           RHS, AN->getMemOperand(), AN->getOrdering(),
-                           AN->getSynchScope());
+                           RHS, AN->getMemOperand());
     }
     assert(Opc == ISD::ATOMIC_LOAD_ADD &&
            "Used AtomicRMW ops other than Add should have been expanded!");
@@ -21292,9 +22514,7 @@ static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) {
                                  cast<AtomicSDNode>(Node)->getMemoryVT(),
                                  Node->getOperand(0),
                                  Node->getOperand(1), Node->getOperand(2),
-                                 cast<AtomicSDNode>(Node)->getMemOperand(),
-                                 cast<AtomicSDNode>(Node)->getOrdering(),
-                                 cast<AtomicSDNode>(Node)->getSynchScope());
+                                 cast<AtomicSDNode>(Node)->getMemOperand());
     return Swap.getValue(1);
   }
   // Other atomic stores have a simple pattern.
@@ -21534,26 +22754,48 @@ static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget,
   SDValue Mask = N->getMask();
   SDLoc dl(Op);
 
+  assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) &&
+         "Expanding masked load is supported on AVX-512 target only!");
+
+  assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) &&
+         "Expanding masked load is supported for 32 and 64-bit types only!");
+
+  // 4x32, 4x64 and 2x64 vectors of non-expanding loads are legal regardless of
+  // VLX. These types for exp-loads are handled here.
+  if (!N->isExpandingLoad() && VT.getVectorNumElements() <= 4)
+    return Op;
+
   assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
          "Cannot lower masked load op.");
 
-  assert(((ScalarVT == MVT::i32 || ScalarVT == MVT::f32) ||
+  assert((ScalarVT.getSizeInBits() >= 32 ||
           (Subtarget.hasBWI() &&
               (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
          "Unsupported masked load op.");
 
   // This operation is legal for targets with VLX, but without
   // VLX the vector should be widened to 512 bit
-  unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
+  unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits();
   MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
-  MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
   SDValue Src0 = N->getSrc0();
   Src0 = ExtendToType(Src0, WideDataVT, DAG);
+
+  // Mask element has to be i1.
+  MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
+  assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
+         "We handle 4x32, 4x64 and 2x64 vectors only in this casse");
+
+  MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
+
   Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+  if (MaskEltTy != MVT::i1)
+    Mask = DAG.getNode(ISD::TRUNCATE, dl,
+                       MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
   SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(),
                                       N->getBasePtr(), Mask, Src0,
                                       N->getMemoryVT(), N->getMemOperand(),
-                                      N->getExtensionType());
+                                      N->getExtensionType(),
+                                      N->isExpandingLoad());
 
   SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT,
                                NewLoad.getValue(0),
@@ -21571,10 +22813,20 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
   SDValue Mask = N->getMask();
   SDLoc dl(Op);
 
+  assert((!N->isCompressingStore() || Subtarget.hasAVX512()) &&
+         "Expanding masked load is supported on AVX-512 target only!");
+
+  assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) &&
+         "Expanding masked load is supported for 32 and 64-bit types only!");
+
+  // 4x32 and 2x64 vectors of non-compressing stores are legal regardless to VLX.
+  if (!N->isCompressingStore() && VT.getVectorNumElements() <= 4)
+    return Op;
+
   assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() &&
          "Cannot lower masked store op.");
 
-  assert(((ScalarVT == MVT::i32 || ScalarVT == MVT::f32) ||
+  assert((ScalarVT.getSizeInBits() >= 32 ||
           (Subtarget.hasBWI() &&
               (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) &&
           "Unsupported masked store op.");
@@ -21583,12 +22835,22 @@ static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget,
   // VLX the vector should be widened to 512 bit
   unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits();
   MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec);
-  MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec);
+
+  // Mask element has to be i1.
+  MVT MaskEltTy = Mask.getSimpleValueType().getScalarType();
+  assert((MaskEltTy == MVT::i1 || VT.getVectorNumElements() <= 4) &&
+         "We handle 4x32, 4x64 and 2x64 vectors only in this casse");
+
+  MVT WideMaskVT = MVT::getVectorVT(MaskEltTy, NumEltsInWideVec);
+
   DataToStore = ExtendToType(DataToStore, WideDataVT, DAG);
   Mask = ExtendToType(Mask, WideMaskVT, DAG, true);
+  if (MaskEltTy != MVT::i1)
+    Mask = DAG.getNode(ISD::TRUNCATE, dl,
+                       MVT::getVectorVT(MVT::i1, NumEltsInWideVec), Mask);
   return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(),
                             Mask, N->getMemoryVT(), N->getMemOperand(),
-                            N->isTruncatingStore());
+                            N->isTruncatingStore(), N->isCompressingStore());
 }
 
 static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget,
@@ -21734,10 +22996,11 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::ZERO_EXTEND:        return LowerZERO_EXTEND(Op, Subtarget, DAG);
   case ISD::SIGN_EXTEND:        return LowerSIGN_EXTEND(Op, Subtarget, DAG);
   case ISD::ANY_EXTEND:         return LowerANY_EXTEND(Op, Subtarget, DAG);
+  case ISD::ZERO_EXTEND_VECTOR_INREG:
   case ISD::SIGN_EXTEND_VECTOR_INREG:
-    return LowerSIGN_EXTEND_VECTOR_INREG(Op, Subtarget, DAG);
-  case ISD::FP_TO_SINT:         return LowerFP_TO_SINT(Op, DAG);
-  case ISD::FP_TO_UINT:         return LowerFP_TO_UINT(Op, DAG);
+    return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG);
+  case ISD::FP_TO_SINT:
+  case ISD::FP_TO_UINT:         return LowerFP_TO_INT(Op, Subtarget, DAG);
   case ISD::FP_EXTEND:          return LowerFP_EXTEND(Op, DAG);
   case ISD::LOAD:               return LowerExtendedLoad(Op, Subtarget, DAG);
   case ISD::FABS:
@@ -21756,6 +23019,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   case ISD::INTRINSIC_VOID:
   case ISD::INTRINSIC_W_CHAIN:  return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG);
   case ISD::RETURNADDR:         return LowerRETURNADDR(Op, DAG);
+  case ISD::ADDROFRETURNADDR:   return LowerADDROFRETURNADDR(Op, DAG);
   case ISD::FRAMEADDR:          return LowerFRAMEADDR(Op, DAG);
   case ISD::FRAME_TO_ARGS_OFFSET:
                                 return LowerFRAME_TO_ARGS_OFFSET(Op, DAG);
@@ -21830,7 +23094,7 @@ void X86TargetLowering::LowerOperationWrapper(SDNode *N,
   // In some cases (LowerSINT_TO_FP for example) Res has more result values
   // than original node, chain should be dropped(last value).
   for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
-      Results.push_back(Res.getValue(I));
+    Results.push_back(Res.getValue(I));
 }
 
 /// Replace a node with an illegal result type with a new node built out of
@@ -21851,9 +23115,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     auto InVTSize = InVT.getSizeInBits();
     const unsigned RegSize =
         (InVTSize > 128) ? ((InVTSize > 256) ? 512 : 256) : 128;
-    assert((!Subtarget.hasAVX512() || RegSize < 512) &&
-           "512-bit vector requires AVX512");
-    assert((!Subtarget.hasAVX2() || RegSize < 256) &&
+    assert((Subtarget.hasBWI() || RegSize < 512) &&
+           "512-bit vector requires AVX512BW");
+    assert((Subtarget.hasAVX2() || RegSize < 256) &&
            "256-bit vector requires AVX2");
 
     auto ElemVT = InVT.getVectorElementType();
@@ -21888,13 +23152,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS));
     return;
   }
-  case ISD::SIGN_EXTEND_INREG:
-  case ISD::ADDC:
-  case ISD::ADDE:
-  case ISD::SUBC:
-  case ISD::SUBE:
-    // We don't want to expand or promote these.
-    return;
   case ISD::SDIV:
   case ISD::UDIV:
   case ISD::SREM:
@@ -21909,6 +23166,36 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
   case ISD::FP_TO_UINT: {
     bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT;
 
+    if (N->getValueType(0) == MVT::v2i32) {
+      assert((IsSigned || Subtarget.hasAVX512()) &&
+             "Can only handle signed conversion without AVX512");
+      assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
+      SDValue Src = N->getOperand(0);
+      if (Src.getValueType() == MVT::v2f64) {
+        SDValue Idx = DAG.getIntPtrConstant(0, dl);
+        SDValue Res = DAG.getNode(IsSigned ? X86ISD::CVTTP2SI
+                                           : X86ISD::CVTTP2UI,
+                                  dl, MVT::v4i32, Src);
+        Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+        Results.push_back(Res);
+        return;
+      }
+      if (Src.getValueType() == MVT::v2f32) {
+        SDValue Idx = DAG.getIntPtrConstant(0, dl);
+        SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src,
+                                  DAG.getUNDEF(MVT::v2f32));
+        Res = DAG.getNode(IsSigned ? ISD::FP_TO_SINT
+                                   : ISD::FP_TO_UINT, dl, MVT::v4i32, Res);
+        Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, Idx);
+        Results.push_back(Res);
+        return;
+      }
+
+      // The FP_TO_INTHelper below only handles f32/f64/f80 scalar inputs,
+      // so early out here.
+      return;
+    }
+
     std::pair<SDValue,SDValue> Vals =
         FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, /*IsReplace=*/ true);
     SDValue FIST = Vals.first, StackSlot = Vals.second;
@@ -21923,13 +23210,28 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     }
     return;
   }
+  case ISD::SINT_TO_FP: {
+    assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL!");
+    SDValue Src = N->getOperand(0);
+    if (N->getValueType(0) != MVT::v2f32 || Src.getValueType() != MVT::v2i64)
+      return;
+    Results.push_back(DAG.getNode(X86ISD::CVTSI2P, dl, MVT::v4f32, Src));
+    return;
+  }
   case ISD::UINT_TO_FP: {
     assert(Subtarget.hasSSE2() && "Requires at least SSE2!");
-    if (N->getOperand(0).getValueType() != MVT::v2i32 ||
-        N->getValueType(0) != MVT::v2f32)
+    EVT VT = N->getValueType(0);
+    if (VT != MVT::v2f32)
       return;
-    SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64,
-                                 N->getOperand(0));
+    SDValue Src = N->getOperand(0);
+    EVT SrcVT = Src.getValueType();
+    if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) {
+      Results.push_back(DAG.getNode(X86ISD::CVTUI2P, dl, MVT::v4f32, Src));
+      return;
+    }
+    if (SrcVT != MVT::v2i32)
+      return;
+    SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src);
     SDValue VBias =
         DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64);
     SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn,
@@ -21967,6 +23269,9 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                      Results);
     case Intrinsic::x86_rdpmc:
       return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results);
+
+    case Intrinsic::x86_xgetbv:
+      return getExtendedControlRegister(N, dl, DAG, Subtarget, Results);
     }
   }
   case ISD::INTRINSIC_WO_CHAIN: {
@@ -22052,9 +23357,7 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
 
     SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS,
                                         MVT::i32, cpOutH.getValue(2));
-    SDValue Success =
-        DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
-                    DAG.getConstant(X86::COND_E, dl, MVT::i8), EFLAGS);
+    SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG);
     Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1));
 
     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF));
@@ -22143,6 +23446,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::SETCC:              return "X86ISD::SETCC";
   case X86ISD::SETCC_CARRY:        return "X86ISD::SETCC_CARRY";
   case X86ISD::FSETCC:             return "X86ISD::FSETCC";
+  case X86ISD::FSETCCM:            return "X86ISD::FSETCCM";
+  case X86ISD::FSETCCM_RND:        return "X86ISD::FSETCCM_RND";
   case X86ISD::CMOV:               return "X86ISD::CMOV";
   case X86ISD::BRCOND:             return "X86ISD::BRCOND";
   case X86ISD::RET_FLAG:           return "X86ISD::RET_FLAG";
@@ -22215,11 +23520,17 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::VTRUNC:             return "X86ISD::VTRUNC";
   case X86ISD::VTRUNCS:            return "X86ISD::VTRUNCS";
   case X86ISD::VTRUNCUS:           return "X86ISD::VTRUNCUS";
+  case X86ISD::VTRUNCSTORES:       return "X86ISD::VTRUNCSTORES";
+  case X86ISD::VTRUNCSTOREUS:      return "X86ISD::VTRUNCSTOREUS";
+  case X86ISD::VMTRUNCSTORES:      return "X86ISD::VMTRUNCSTORES";
+  case X86ISD::VMTRUNCSTOREUS:     return "X86ISD::VMTRUNCSTOREUS";
   case X86ISD::VINSERT:            return "X86ISD::VINSERT";
   case X86ISD::VFPEXT:             return "X86ISD::VFPEXT";
+  case X86ISD::VFPEXT_RND:         return "X86ISD::VFPEXT_RND";
+  case X86ISD::VFPEXTS_RND:        return "X86ISD::VFPEXTS_RND";
   case X86ISD::VFPROUND:           return "X86ISD::VFPROUND";
-  case X86ISD::CVTDQ2PD:           return "X86ISD::CVTDQ2PD";
-  case X86ISD::CVTUDQ2PD:          return "X86ISD::CVTUDQ2PD";
+  case X86ISD::VFPROUND_RND:       return "X86ISD::VFPROUND_RND";
+  case X86ISD::VFPROUNDS_RND:      return "X86ISD::VFPROUNDS_RND";
   case X86ISD::CVT2MASK:           return "X86ISD::CVT2MASK";
   case X86ISD::VSHLDQ:             return "X86ISD::VSHLDQ";
   case X86ISD::VSRLDQ:             return "X86ISD::VSRLDQ";
@@ -22332,27 +23643,43 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::FNMSUB_RND:         return "X86ISD::FNMSUB_RND";
   case X86ISD::FMADDSUB_RND:       return "X86ISD::FMADDSUB_RND";
   case X86ISD::FMSUBADD_RND:       return "X86ISD::FMSUBADD_RND";
+  case X86ISD::FMADDS1_RND:        return "X86ISD::FMADDS1_RND";
+  case X86ISD::FNMADDS1_RND:       return "X86ISD::FNMADDS1_RND";
+  case X86ISD::FMSUBS1_RND:        return "X86ISD::FMSUBS1_RND";
+  case X86ISD::FNMSUBS1_RND:       return "X86ISD::FNMSUBS1_RND";
+  case X86ISD::FMADDS3_RND:        return "X86ISD::FMADDS3_RND";
+  case X86ISD::FNMADDS3_RND:       return "X86ISD::FNMADDS3_RND";
+  case X86ISD::FMSUBS3_RND:        return "X86ISD::FMSUBS3_RND";
+  case X86ISD::FNMSUBS3_RND:       return "X86ISD::FNMSUBS3_RND";
   case X86ISD::VPMADD52H:          return "X86ISD::VPMADD52H";
   case X86ISD::VPMADD52L:          return "X86ISD::VPMADD52L";
   case X86ISD::VRNDSCALE:          return "X86ISD::VRNDSCALE";
+  case X86ISD::VRNDSCALES:         return "X86ISD::VRNDSCALES";
   case X86ISD::VREDUCE:            return "X86ISD::VREDUCE";
+  case X86ISD::VREDUCES:           return "X86ISD::VREDUCES";
   case X86ISD::VGETMANT:           return "X86ISD::VGETMANT";
+  case X86ISD::VGETMANTS:          return "X86ISD::VGETMANTS";
   case X86ISD::PCMPESTRI:          return "X86ISD::PCMPESTRI";
   case X86ISD::PCMPISTRI:          return "X86ISD::PCMPISTRI";
   case X86ISD::XTEST:              return "X86ISD::XTEST";
   case X86ISD::COMPRESS:           return "X86ISD::COMPRESS";
   case X86ISD::EXPAND:             return "X86ISD::EXPAND";
   case X86ISD::SELECT:             return "X86ISD::SELECT";
+  case X86ISD::SELECTS:            return "X86ISD::SELECTS";
   case X86ISD::ADDSUB:             return "X86ISD::ADDSUB";
   case X86ISD::RCP28:              return "X86ISD::RCP28";
+  case X86ISD::RCP28S:             return "X86ISD::RCP28S";
   case X86ISD::EXP2:               return "X86ISD::EXP2";
   case X86ISD::RSQRT28:            return "X86ISD::RSQRT28";
+  case X86ISD::RSQRT28S:           return "X86ISD::RSQRT28S";
   case X86ISD::FADD_RND:           return "X86ISD::FADD_RND";
   case X86ISD::FSUB_RND:           return "X86ISD::FSUB_RND";
   case X86ISD::FMUL_RND:           return "X86ISD::FMUL_RND";
   case X86ISD::FDIV_RND:           return "X86ISD::FDIV_RND";
   case X86ISD::FSQRT_RND:          return "X86ISD::FSQRT_RND";
+  case X86ISD::FSQRTS_RND:         return "X86ISD::FSQRTS_RND";
   case X86ISD::FGETEXP_RND:        return "X86ISD::FGETEXP_RND";
+  case X86ISD::FGETEXPS_RND:       return "X86ISD::FGETEXPS_RND";
   case X86ISD::SCALEF:             return "X86ISD::SCALEF";
   case X86ISD::SCALEFS:            return "X86ISD::SCALEFS";
   case X86ISD::ADDS:               return "X86ISD::ADDS";
@@ -22361,13 +23688,27 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case X86ISD::MULHRS:             return "X86ISD::MULHRS";
   case X86ISD::SINT_TO_FP_RND:     return "X86ISD::SINT_TO_FP_RND";
   case X86ISD::UINT_TO_FP_RND:     return "X86ISD::UINT_TO_FP_RND";
-  case X86ISD::FP_TO_SINT_RND:     return "X86ISD::FP_TO_SINT_RND";
-  case X86ISD::FP_TO_UINT_RND:     return "X86ISD::FP_TO_UINT_RND";
+  case X86ISD::CVTTP2SI:           return "X86ISD::CVTTP2SI";
+  case X86ISD::CVTTP2UI:           return "X86ISD::CVTTP2UI";
+  case X86ISD::CVTTP2SI_RND:       return "X86ISD::CVTTP2SI_RND";
+  case X86ISD::CVTTP2UI_RND:       return "X86ISD::CVTTP2UI_RND";
+  case X86ISD::CVTTS2SI_RND:       return "X86ISD::CVTTS2SI_RND";
+  case X86ISD::CVTTS2UI_RND:       return "X86ISD::CVTTS2UI_RND";
+  case X86ISD::CVTSI2P:            return "X86ISD::CVTSI2P";
+  case X86ISD::CVTUI2P:            return "X86ISD::CVTUI2P";
   case X86ISD::VFPCLASS:           return "X86ISD::VFPCLASS";
   case X86ISD::VFPCLASSS:          return "X86ISD::VFPCLASSS";
   case X86ISD::MULTISHIFT:         return "X86ISD::MULTISHIFT";
-  case X86ISD::SCALAR_FP_TO_SINT_RND: return "X86ISD::SCALAR_FP_TO_SINT_RND";
-  case X86ISD::SCALAR_FP_TO_UINT_RND: return "X86ISD::SCALAR_FP_TO_UINT_RND";
+  case X86ISD::SCALAR_SINT_TO_FP_RND: return "X86ISD::SCALAR_SINT_TO_FP_RND";
+  case X86ISD::SCALAR_UINT_TO_FP_RND: return "X86ISD::SCALAR_UINT_TO_FP_RND";
+  case X86ISD::CVTPS2PH:           return "X86ISD::CVTPS2PH";
+  case X86ISD::CVTPH2PS:           return "X86ISD::CVTPH2PS";
+  case X86ISD::CVTP2SI:            return "X86ISD::CVTP2SI";
+  case X86ISD::CVTP2UI:            return "X86ISD::CVTP2UI";
+  case X86ISD::CVTP2SI_RND:        return "X86ISD::CVTP2SI_RND";
+  case X86ISD::CVTP2UI_RND:        return "X86ISD::CVTP2UI_RND";
+  case X86ISD::CVTS2SI_RND:        return "X86ISD::CVTS2SI_RND";
+  case X86ISD::CVTS2UI_RND:        return "X86ISD::CVTS2UI_RND";
   }
   return nullptr;
 }
@@ -24031,11 +25372,10 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
                                          MachineBasicBlock *BB) const {
   DebugLoc DL = MI.getDebugLoc();
   MachineFunction *MF = BB->getParent();
-  MachineModuleInfo *MMI = &MF->getMMI();
-  MachineFrameInfo *MFI = MF->getFrameInfo();
+  MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineRegisterInfo *MRI = &MF->getRegInfo();
   const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-  int FI = MFI->getFunctionContextIndex();
+  int FI = MFI.getFunctionContextIndex();
 
   // Get a mapping of the call site numbers to all of the landing pads they're
   // associated with.
@@ -24055,10 +25395,10 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
       break;
     }
 
-    if (!MMI->hasCallSiteLandingPad(Sym))
+    if (!MF->hasCallSiteLandingPad(Sym))
       continue;
 
-    for (unsigned CSI : MMI->getCallSiteLandingPad(Sym)) {
+    for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
       CallSiteNumToLPad[CSI].push_back(&MBB);
       MaxCSNum = std::max(MaxCSNum, CSI);
     }
@@ -24208,173 +25548,18 @@ X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI,
   return BB;
 }
 
-// Replace 213-type (isel default) FMA3 instructions with 231-type for
-// accumulator loops. Writing back to the accumulator allows the coalescer
-// to remove extra copies in the loop.
-// FIXME: Do this on AVX512.  We don't support 231 variants yet (PR23937).
-MachineBasicBlock *
-X86TargetLowering::emitFMA3Instr(MachineInstr &MI,
-                                 MachineBasicBlock *MBB) const {
-  MachineOperand &AddendOp = MI.getOperand(3);
-
-  // Bail out early if the addend isn't a register - we can't switch these.
-  if (!AddendOp.isReg())
-    return MBB;
-
-  MachineFunction &MF = *MBB->getParent();
-  MachineRegisterInfo &MRI = MF.getRegInfo();
-
-  // Check whether the addend is defined by a PHI:
-  assert(MRI.hasOneDef(AddendOp.getReg()) && "Multiple defs in SSA?");
-  MachineInstr &AddendDef = *MRI.def_instr_begin(AddendOp.getReg());
-  if (!AddendDef.isPHI())
-    return MBB;
-
-  // Look for the following pattern:
-  // loop:
-  //   %addend = phi [%entry, 0], [%loop, %result]
-  //   ...
-  //   %result<tied1> = FMA213 %m2<tied0>, %m1, %addend
-
-  // Replace with:
-  //   loop:
-  //   %addend = phi [%entry, 0], [%loop, %result]
-  //   ...
-  //   %result<tied1> = FMA231 %addend<tied0>, %m1, %m2
-
-  for (unsigned i = 1, e = AddendDef.getNumOperands(); i < e; i += 2) {
-    assert(AddendDef.getOperand(i).isReg());
-    MachineOperand PHISrcOp = AddendDef.getOperand(i);
-    MachineInstr &PHISrcInst = *MRI.def_instr_begin(PHISrcOp.getReg());
-    if (&PHISrcInst == &MI) {
-      // Found a matching instruction.
-      unsigned NewFMAOpc = 0;
-      switch (MI.getOpcode()) {
-      case X86::VFMADDPDr213r:
-        NewFMAOpc = X86::VFMADDPDr231r;
-        break;
-      case X86::VFMADDPSr213r:
-        NewFMAOpc = X86::VFMADDPSr231r;
-        break;
-      case X86::VFMADDSDr213r:
-        NewFMAOpc = X86::VFMADDSDr231r;
-        break;
-      case X86::VFMADDSSr213r:
-        NewFMAOpc = X86::VFMADDSSr231r;
-        break;
-      case X86::VFMSUBPDr213r:
-        NewFMAOpc = X86::VFMSUBPDr231r;
-        break;
-      case X86::VFMSUBPSr213r:
-        NewFMAOpc = X86::VFMSUBPSr231r;
-        break;
-      case X86::VFMSUBSDr213r:
-        NewFMAOpc = X86::VFMSUBSDr231r;
-        break;
-      case X86::VFMSUBSSr213r:
-        NewFMAOpc = X86::VFMSUBSSr231r;
-        break;
-      case X86::VFNMADDPDr213r:
-        NewFMAOpc = X86::VFNMADDPDr231r;
-        break;
-      case X86::VFNMADDPSr213r:
-        NewFMAOpc = X86::VFNMADDPSr231r;
-        break;
-      case X86::VFNMADDSDr213r:
-        NewFMAOpc = X86::VFNMADDSDr231r;
-        break;
-      case X86::VFNMADDSSr213r:
-        NewFMAOpc = X86::VFNMADDSSr231r;
-        break;
-      case X86::VFNMSUBPDr213r:
-        NewFMAOpc = X86::VFNMSUBPDr231r;
-        break;
-      case X86::VFNMSUBPSr213r:
-        NewFMAOpc = X86::VFNMSUBPSr231r;
-        break;
-      case X86::VFNMSUBSDr213r:
-        NewFMAOpc = X86::VFNMSUBSDr231r;
-        break;
-      case X86::VFNMSUBSSr213r:
-        NewFMAOpc = X86::VFNMSUBSSr231r;
-        break;
-      case X86::VFMADDSUBPDr213r:
-        NewFMAOpc = X86::VFMADDSUBPDr231r;
-        break;
-      case X86::VFMADDSUBPSr213r:
-        NewFMAOpc = X86::VFMADDSUBPSr231r;
-        break;
-      case X86::VFMSUBADDPDr213r:
-        NewFMAOpc = X86::VFMSUBADDPDr231r;
-        break;
-      case X86::VFMSUBADDPSr213r:
-        NewFMAOpc = X86::VFMSUBADDPSr231r;
-        break;
-
-      case X86::VFMADDPDr213rY:
-        NewFMAOpc = X86::VFMADDPDr231rY;
-        break;
-      case X86::VFMADDPSr213rY:
-        NewFMAOpc = X86::VFMADDPSr231rY;
-        break;
-      case X86::VFMSUBPDr213rY:
-        NewFMAOpc = X86::VFMSUBPDr231rY;
-        break;
-      case X86::VFMSUBPSr213rY:
-        NewFMAOpc = X86::VFMSUBPSr231rY;
-        break;
-      case X86::VFNMADDPDr213rY:
-        NewFMAOpc = X86::VFNMADDPDr231rY;
-        break;
-      case X86::VFNMADDPSr213rY:
-        NewFMAOpc = X86::VFNMADDPSr231rY;
-        break;
-      case X86::VFNMSUBPDr213rY:
-        NewFMAOpc = X86::VFNMSUBPDr231rY;
-        break;
-      case X86::VFNMSUBPSr213rY:
-        NewFMAOpc = X86::VFNMSUBPSr231rY;
-        break;
-      case X86::VFMADDSUBPDr213rY:
-        NewFMAOpc = X86::VFMADDSUBPDr231rY;
-        break;
-      case X86::VFMADDSUBPSr213rY:
-        NewFMAOpc = X86::VFMADDSUBPSr231rY;
-        break;
-      case X86::VFMSUBADDPDr213rY:
-        NewFMAOpc = X86::VFMSUBADDPDr231rY;
-        break;
-      case X86::VFMSUBADDPSr213rY:
-        NewFMAOpc = X86::VFMSUBADDPSr231rY;
-        break;
-      default:
-        llvm_unreachable("Unrecognized FMA variant.");
-      }
-
-      const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
-      MachineInstrBuilder MIB =
-          BuildMI(MF, MI.getDebugLoc(), TII.get(NewFMAOpc))
-              .addOperand(MI.getOperand(0))
-              .addOperand(MI.getOperand(3))
-              .addOperand(MI.getOperand(2))
-              .addOperand(MI.getOperand(1));
-      MBB->insert(MachineBasicBlock::iterator(MI), MIB);
-      MI.eraseFromParent();
-    }
-  }
-
-  return MBB;
-}
-
 MachineBasicBlock *
 X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
                                                MachineBasicBlock *BB) const {
+  MachineFunction *MF = BB->getParent();
+  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
+  DebugLoc DL = MI.getDebugLoc();
+
   switch (MI.getOpcode()) {
   default: llvm_unreachable("Unexpected instr type to insert");
   case X86::TAILJMPd64:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
-  case X86::TAILJMPd64_REX:
   case X86::TAILJMPr64_REX:
   case X86::TAILJMPm64_REX:
     llvm_unreachable("TAILJMP64 would not be touched here.");
@@ -24423,8 +25608,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
   case X86::RDFLAGS32:
   case X86::RDFLAGS64: {
-    DebugLoc DL = MI.getDebugLoc();
-    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     unsigned PushF =
         MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64;
     unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r;
@@ -24442,8 +25625,6 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
 
   case X86::WRFLAGS32:
   case X86::WRFLAGS64: {
-    DebugLoc DL = MI.getDebugLoc();
-    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
     unsigned Push =
         MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r;
     unsigned PopF =
@@ -24468,19 +25649,15 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case X86::FP80_TO_INT16_IN_MEM:
   case X86::FP80_TO_INT32_IN_MEM:
   case X86::FP80_TO_INT64_IN_MEM: {
-    MachineFunction *F = BB->getParent();
-    const TargetInstrInfo *TII = Subtarget.getInstrInfo();
-    DebugLoc DL = MI.getDebugLoc();
-
     // Change the floating point control register to use "round towards zero"
     // mode when truncating to an integer value.
-    int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
+    int CWFrameIdx = MF->getFrameInfo().CreateStackObject(2, 2, false);
     addFrameReference(BuildMI(*BB, MI, DL,
                               TII->get(X86::FNSTCW16m)), CWFrameIdx);
 
     // Load the old value of the high byte of the control word...
     unsigned OldCW =
-      F->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
+      MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass);
     addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16rm), OldCW),
                       CWFrameIdx);
 
@@ -24588,39 +25765,57 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   case TargetOpcode::PATCHPOINT:
     return emitPatchPoint(MI, BB);
 
-  case X86::VFMADDPDr213r:
-  case X86::VFMADDPSr213r:
-  case X86::VFMADDSDr213r:
-  case X86::VFMADDSSr213r:
-  case X86::VFMSUBPDr213r:
-  case X86::VFMSUBPSr213r:
-  case X86::VFMSUBSDr213r:
-  case X86::VFMSUBSSr213r:
-  case X86::VFNMADDPDr213r:
-  case X86::VFNMADDPSr213r:
-  case X86::VFNMADDSDr213r:
-  case X86::VFNMADDSSr213r:
-  case X86::VFNMSUBPDr213r:
-  case X86::VFNMSUBPSr213r:
-  case X86::VFNMSUBSDr213r:
-  case X86::VFNMSUBSSr213r:
-  case X86::VFMADDSUBPDr213r:
-  case X86::VFMADDSUBPSr213r:
-  case X86::VFMSUBADDPDr213r:
-  case X86::VFMSUBADDPSr213r:
-  case X86::VFMADDPDr213rY:
-  case X86::VFMADDPSr213rY:
-  case X86::VFMSUBPDr213rY:
-  case X86::VFMSUBPSr213rY:
-  case X86::VFNMADDPDr213rY:
-  case X86::VFNMADDPSr213rY:
-  case X86::VFNMSUBPDr213rY:
-  case X86::VFNMSUBPSr213rY:
-  case X86::VFMADDSUBPDr213rY:
-  case X86::VFMADDSUBPSr213rY:
-  case X86::VFMSUBADDPDr213rY:
-  case X86::VFMSUBADDPSr213rY:
-    return emitFMA3Instr(MI, BB);
+  case X86::LCMPXCHG8B: {
+    const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
+    // In addition to 4 E[ABCD] registers implied by encoding, CMPXCHG8B
+    // requires a memory operand. If it happens that current architecture is
+    // i686 and for current function we need a base pointer
+    // - which is ESI for i686 - register allocator would not be able to
+    // allocate registers for an address in form of X(%reg, %reg, Y)
+    // - there never would be enough unreserved registers during regalloc
+    // (without the need for base ptr the only option would be X(%edi, %esi, Y).
+    // We are giving a hand to register allocator by precomputing the address in
+    // a new vreg using LEA.
+
+    // If it is not i686 or there is no base pointer - nothing to do here.
+    if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF))
+      return BB;
+
+    // Even though this code does not necessarily needs the base pointer to
+    // be ESI, we check for that. The reason: if this assert fails, there are
+    // some changes happened in the compiler base pointer handling, which most
+    // probably have to be addressed somehow here.
+    assert(TRI->getBaseRegister() == X86::ESI &&
+           "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a "
+           "base pointer in mind");
+
+    MachineRegisterInfo &MRI = MF->getRegInfo();
+    MVT SPTy = getPointerTy(MF->getDataLayout());
+    const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy);
+    unsigned computedAddrVReg = MRI.createVirtualRegister(AddrRegClass);
+
+    X86AddressMode AM = getAddressFromInstr(&MI, 0);
+    // Regalloc does not need any help when the memory operand of CMPXCHG8B
+    // does not use index register.
+    if (AM.IndexReg == X86::NoRegister)
+      return BB;
+
+    // After X86TargetLowering::ReplaceNodeResults CMPXCHG8B is glued to its
+    // four operand definitions that are E[ABCD] registers. We skip them and
+    // then insert the LEA.
+    MachineBasicBlock::iterator MBBI(MI);
+    while (MBBI->definesRegister(X86::EAX) || MBBI->definesRegister(X86::EBX) ||
+           MBBI->definesRegister(X86::ECX) || MBBI->definesRegister(X86::EDX))
+      --MBBI;
+    addFullAddress(
+        BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM);
+
+    setDirectAddressInInstr(&MI, 0, computedAddrVReg);
+
+    return BB;
+  }
+  case X86::LCMPXCHG16B:
+    return BB;
   case X86::LCMPXCHG8B_SAVE_EBX:
   case X86::LCMPXCHG16B_SAVE_RBX: {
     unsigned BasePtr =
@@ -24667,7 +25862,7 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     // These nodes' second result is a boolean.
     if (Op.getResNo() == 0)
       break;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case X86ISD::SETCC:
     KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1);
     break;
@@ -24676,16 +25871,36 @@ void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
     KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - NumLoBits);
     break;
   }
+  case X86ISD::VZEXT: {
+    SDValue N0 = Op.getOperand(0);
+    unsigned NumElts = Op.getValueType().getVectorNumElements();
+    unsigned InNumElts = N0.getValueType().getVectorNumElements();
+    unsigned InBitWidth = N0.getValueType().getScalarSizeInBits();
+
+    KnownZero = KnownOne = APInt(InBitWidth, 0);
+    APInt DemandedElts = APInt::getLowBitsSet(InNumElts, NumElts);
+    DAG.computeKnownBits(N0, KnownZero, KnownOne, DemandedElts, Depth + 1);
+    KnownOne = KnownOne.zext(BitWidth);
+    KnownZero = KnownZero.zext(BitWidth);
+    KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - InBitWidth);
+    break;
+  }
   }
 }
 
 unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode(
-  SDValue Op,
-  const SelectionDAG &,
-  unsigned Depth) const {
+    SDValue Op, const SelectionDAG &DAG, unsigned Depth) const {
   // SETCC_CARRY sets the dest to ~0 for true or 0 for false.
   if (Op.getOpcode() == X86ISD::SETCC_CARRY)
-    return Op.getValueType().getScalarSizeInBits();
+    return Op.getScalarValueSizeInBits();
+
+  if (Op.getOpcode() == X86ISD::VSEXT) {
+    EVT VT = Op.getValueType();
+    EVT SrcVT = Op.getOperand(0).getValueType();
+    unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+    Tmp += VT.getScalarSizeInBits() - SrcVT.getScalarSizeInBits();
+    return Tmp;
+  }
 
   // Fallback case.
   return 1;
@@ -24706,171 +25921,114 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
   return TargetLowering::isGAPlusOffset(N, GA, Offset);
 }
 
-/// Performs shuffle combines for 256-bit vectors.
-/// FIXME: This could be expanded to support 512 bit vectors as well.
-static SDValue combineShuffle256(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget &Subtarget) {
-  SDLoc dl(N);
-  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
-  SDValue V1 = SVOp->getOperand(0);
-  SDValue V2 = SVOp->getOperand(1);
-  MVT VT = SVOp->getSimpleValueType(0);
-  unsigned NumElems = VT.getVectorNumElements();
-
-  if (V1.getOpcode() == ISD::CONCAT_VECTORS &&
-      V2.getOpcode() == ISD::CONCAT_VECTORS) {
-    //
-    //                   0,0,0,...
-    //                      |
-    //    V      UNDEF    BUILD_VECTOR    UNDEF
-    //     \      /           \           /
-    //  CONCAT_VECTOR         CONCAT_VECTOR
-    //         \                  /
-    //          \                /
-    //          RESULT: V + zero extended
-    //
-    if (V2.getOperand(0).getOpcode() != ISD::BUILD_VECTOR ||
-        !V2.getOperand(1).isUndef() || !V1.getOperand(1).isUndef())
-      return SDValue();
-
-    if (!ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()))
-      return SDValue();
-
-    // To match the shuffle mask, the first half of the mask should
-    // be exactly the first vector, and all the rest a splat with the
-    // first element of the second one.
-    for (unsigned i = 0; i != NumElems/2; ++i)
-      if (!isUndefOrEqual(SVOp->getMaskElt(i), i) ||
-          !isUndefOrEqual(SVOp->getMaskElt(i+NumElems/2), NumElems))
-        return SDValue();
-
-    // If V1 is coming from a vector load then just fold to a VZEXT_LOAD.
-    if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(V1.getOperand(0))) {
-      if (Ld->hasNUsesOfValue(1, 0)) {
-        SDVTList Tys = DAG.getVTList(MVT::v4i64, MVT::Other);
-        SDValue Ops[] = { Ld->getChain(), Ld->getBasePtr() };
-        SDValue ResNode =
-          DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops,
-                                  Ld->getMemoryVT(),
-                                  Ld->getPointerInfo(),
-                                  Ld->getAlignment(),
-                                  false/*isVolatile*/, true/*ReadMem*/,
-                                  false/*WriteMem*/);
-
-        // Make sure the newly-created LOAD is in the same position as Ld in
-        // terms of dependency. We create a TokenFactor for Ld and ResNode,
-        // and update uses of Ld's output chain to use the TokenFactor.
-        if (Ld->hasAnyUseOfValue(1)) {
-          SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
-                             SDValue(Ld, 1), SDValue(ResNode.getNode(), 1));
-          DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), NewChain);
-          DAG.UpdateNodeOperands(NewChain.getNode(), SDValue(Ld, 1),
-                                 SDValue(ResNode.getNode(), 1));
-        }
-
-        return DAG.getBitcast(VT, ResNode);
-      }
-    }
-
-    // Emit a zeroed vector and insert the desired subvector on its
-    // first half.
-    SDValue Zeros = getZeroVector(VT, Subtarget, DAG, dl);
-    SDValue InsV = insert128BitVector(Zeros, V1.getOperand(0), 0, DAG, dl);
-    return DCI.CombineTo(N, InsV);
-  }
-
-  return SDValue();
-}
-
 // Attempt to match a combined shuffle mask against supported unary shuffle
 // instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchUnaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
+static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
                                     const X86Subtarget &Subtarget,
-                                    unsigned &Shuffle, MVT &ShuffleVT) {
-  bool FloatDomain = SrcVT.isFloatingPoint() ||
-                     (!Subtarget.hasAVX2() && SrcVT.is256BitVector());
-
-  // Match a 128-bit integer vector against a VZEXT_MOVL (MOVQ) instruction.
-  if (!FloatDomain && SrcVT.is128BitVector() &&
-      isTargetShuffleEquivalent(Mask, {0, SM_SentinelZero})) {
+                                    unsigned &Shuffle, MVT &SrcVT, MVT &DstVT) {
+  unsigned NumMaskElts = Mask.size();
+  unsigned MaskEltSize = MaskVT.getScalarSizeInBits();
+  bool FloatDomain = MaskVT.isFloatingPoint() ||
+                     (!Subtarget.hasAVX2() && MaskVT.is256BitVector());
+
+  // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
+  if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) &&
+      isUndefOrEqual(Mask[0], 0) &&
+      isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
     Shuffle = X86ISD::VZEXT_MOVL;
-    ShuffleVT = MVT::v2i64;
+    SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT;
     return true;
   }
 
+  // Match against a VZEXT instruction.
+  // TODO: Add 256/512-bit vector support.
+  if (!FloatDomain && MaskVT.is128BitVector() && Subtarget.hasSSE41()) {
+    unsigned MaxScale = 64 / MaskEltSize;
+    for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) {
+      bool Match = true;
+      unsigned NumDstElts = NumMaskElts / Scale;
+      for (unsigned i = 0; i != NumDstElts && Match; ++i) {
+        Match &= isUndefOrEqual(Mask[i * Scale], (int)i);
+        Match &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1);
+      }
+      if (Match) {
+        SrcVT = MaskVT;
+        DstVT = MVT::getIntegerVT(Scale * MaskEltSize);
+        DstVT = MVT::getVectorVT(DstVT, NumDstElts);
+        Shuffle = X86ISD::VZEXT;
+        return true;
+      }
+    }
+  }
+
   // Check if we have SSE3 which will let us use MOVDDUP etc. The
   // instructions are no slower than UNPCKLPD but has the option to
   // fold the input operand into even an unaligned memory load.
-  if (SrcVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) {
+  if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && FloatDomain) {
     if (isTargetShuffleEquivalent(Mask, {0, 0})) {
       Shuffle = X86ISD::MOVDDUP;
-      ShuffleVT = MVT::v2f64;
+      SrcVT = DstVT = MVT::v2f64;
       return true;
     }
     if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
       Shuffle = X86ISD::MOVSLDUP;
-      ShuffleVT = MVT::v4f32;
+      SrcVT = DstVT = MVT::v4f32;
       return true;
     }
     if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3})) {
       Shuffle = X86ISD::MOVSHDUP;
-      ShuffleVT = MVT::v4f32;
+      SrcVT = DstVT = MVT::v4f32;
       return true;
     }
   }
 
-  if (SrcVT.is256BitVector() && FloatDomain) {
+  if (MaskVT.is256BitVector() && FloatDomain) {
     assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles");
     if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2})) {
       Shuffle = X86ISD::MOVDDUP;
-      ShuffleVT = MVT::v4f64;
+      SrcVT = DstVT = MVT::v4f64;
       return true;
     }
     if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
       Shuffle = X86ISD::MOVSLDUP;
-      ShuffleVT = MVT::v8f32;
+      SrcVT = DstVT = MVT::v8f32;
       return true;
     }
     if (isTargetShuffleEquivalent(Mask, {1, 1, 3, 3, 5, 5, 7, 7})) {
       Shuffle = X86ISD::MOVSHDUP;
-      ShuffleVT = MVT::v8f32;
+      SrcVT = DstVT = MVT::v8f32;
       return true;
     }
   }
 
-  if (SrcVT.is512BitVector() && FloatDomain) {
+  if (MaskVT.is512BitVector() && FloatDomain) {
     assert(Subtarget.hasAVX512() &&
            "AVX512 required for 512-bit vector shuffles");
     if (isTargetShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6})) {
       Shuffle = X86ISD::MOVDDUP;
-      ShuffleVT = MVT::v8f64;
+      SrcVT = DstVT = MVT::v8f64;
       return true;
     }
     if (isTargetShuffleEquivalent(
             Mask, {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14})) {
       Shuffle = X86ISD::MOVSLDUP;
-      ShuffleVT = MVT::v16f32;
+      SrcVT = DstVT = MVT::v16f32;
       return true;
     }
     if (isTargetShuffleEquivalent(
             Mask, {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15})) {
       Shuffle = X86ISD::MOVSHDUP;
-      ShuffleVT = MVT::v16f32;
+      SrcVT = DstVT = MVT::v16f32;
       return true;
     }
   }
 
   // Attempt to match against broadcast-from-vector.
   if (Subtarget.hasAVX2()) {
-    unsigned NumElts = Mask.size();
-    SmallVector<int, 64> BroadcastMask(NumElts, 0);
+    SmallVector<int, 64> BroadcastMask(NumMaskElts, 0);
     if (isTargetShuffleEquivalent(Mask, BroadcastMask)) {
-      unsigned EltSize = SrcVT.getSizeInBits() / NumElts;
-      ShuffleVT = FloatDomain ? MVT::getFloatingPointVT(EltSize)
-                              : MVT::getIntegerVT(EltSize);
-      ShuffleVT = MVT::getVectorVT(ShuffleVT, NumElts);
+      SrcVT = DstVT = MaskVT;
       Shuffle = X86ISD::VBROADCAST;
       return true;
     }
@@ -24882,19 +26040,44 @@ static bool matchUnaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
 // Attempt to match a combined shuffle mask against supported unary immediate
 // permute instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
-                                      const X86Subtarget &Subtarget,
-                                      unsigned &Shuffle, MVT &ShuffleVT,
-                                      unsigned &PermuteImm) {
-  // Ensure we don't contain any zero elements.
-  for (int M : Mask) {
-    if (M == SM_SentinelZero)
-      return false;
-    assert(SM_SentinelUndef <= M && M < (int)Mask.size() &&
-           "Expected unary shuffle");
+static bool matchUnaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                           const X86Subtarget &Subtarget,
+                                           unsigned &Shuffle, MVT &ShuffleVT,
+                                           unsigned &PermuteImm) {
+  unsigned NumMaskElts = Mask.size();
+  bool FloatDomain = MaskVT.isFloatingPoint();
+
+  bool ContainsZeros = false;
+  SmallBitVector Zeroable(NumMaskElts, false);
+  for (unsigned i = 0; i != NumMaskElts; ++i) {
+    int M = Mask[i];
+    Zeroable[i] = isUndefOrZero(M);
+    ContainsZeros |= (M == SM_SentinelZero);
+  }
+
+  // Attempt to match against byte/bit shifts.
+  // FIXME: Add 512-bit support.
+  if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+                       (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+    int ShiftAmt = matchVectorShuffleAsShift(ShuffleVT, Shuffle,
+                                             MaskVT.getScalarSizeInBits(), Mask,
+                                             0, Zeroable, Subtarget);
+    if (0 < ShiftAmt) {
+      PermuteImm = (unsigned)ShiftAmt;
+      return true;
+    }
   }
 
-  unsigned MaskScalarSizeInBits = SrcVT.getSizeInBits() / Mask.size();
+  // Ensure we don't contain any zero elements.
+  if (ContainsZeros)
+    return false;
+
+  assert(llvm::all_of(Mask, [&](int M) {
+                        return SM_SentinelUndef <= M && M < (int)NumMaskElts;
+                      }) && "Expected unary shuffle");
+
+  unsigned InputSizeInBits = MaskVT.getSizeInBits();
+  unsigned MaskScalarSizeInBits = InputSizeInBits / Mask.size();
   MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits);
 
   // Handle PSHUFLW/PSHUFHW repeated patterns.
@@ -24908,7 +26091,7 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
       if (isUndefOrInRange(LoMask, 0, 4) &&
           isSequentialOrUndefInRange(HiMask, 0, 4, 4)) {
         Shuffle = X86ISD::PSHUFLW;
-        ShuffleVT = MVT::getVectorVT(MVT::i16, SrcVT.getSizeInBits() / 16);
+        ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
         PermuteImm = getV4X86ShuffleImm(LoMask);
         return true;
       }
@@ -24922,7 +26105,7 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
           OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4);
 
         Shuffle = X86ISD::PSHUFHW;
-        ShuffleVT = MVT::getVectorVT(MVT::i16, SrcVT.getSizeInBits() / 16);
+        ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16);
         PermuteImm = getV4X86ShuffleImm(OffsetHiMask);
         return true;
       }
@@ -24938,24 +26121,23 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
 
   // AVX introduced the VPERMILPD/VPERMILPS float permutes, before then we
   // had to use 2-input SHUFPD/SHUFPS shuffles (not handled here).
-  bool FloatDomain = SrcVT.isFloatingPoint();
   if (FloatDomain && !Subtarget.hasAVX())
     return false;
 
   // Pre-AVX2 we must use float shuffles on 256-bit vectors.
-  if (SrcVT.is256BitVector() && !Subtarget.hasAVX2())
+  if (MaskVT.is256BitVector() && !Subtarget.hasAVX2())
     FloatDomain = true;
 
   // Check for lane crossing permutes.
   if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) {
     // PERMPD/PERMQ permutes within a 256-bit vector (AVX2+).
-    if (Subtarget.hasAVX2() && SrcVT.is256BitVector() && Mask.size() == 4) {
+    if (Subtarget.hasAVX2() && MaskVT.is256BitVector() && Mask.size() == 4) {
       Shuffle = X86ISD::VPERMI;
       ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
       PermuteImm = getV4X86ShuffleImm(Mask);
       return true;
     }
-    if (Subtarget.hasAVX512() && SrcVT.is512BitVector() && Mask.size() == 8) {
+    if (Subtarget.hasAVX512() && MaskVT.is512BitVector() && Mask.size() == 8) {
       SmallVector<int, 4> RepeatedMask;
       if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) {
         Shuffle = X86ISD::VPERMI;
@@ -24994,7 +26176,7 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
 
   Shuffle = (FloatDomain ? X86ISD::VPERMILPI : X86ISD::PSHUFD);
   ShuffleVT = (FloatDomain ? MVT::f32 : MVT::i32);
-  ShuffleVT = MVT::getVectorVT(ShuffleVT, SrcVT.getSizeInBits() / 32);
+  ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32);
   PermuteImm = getV4X86ShuffleImm(WordMask);
   return true;
 }
@@ -25002,47 +26184,260 @@ static bool matchPermuteVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
 // Attempt to match a combined unary shuffle mask against supported binary
 // shuffle instructions.
 // TODO: Investigate sharing more of this with shuffle lowering.
-static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
-                                     unsigned &Shuffle, MVT &ShuffleVT) {
-  bool FloatDomain = SrcVT.isFloatingPoint();
+static bool matchBinaryVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                     SDValue &V1, SDValue &V2,
+                                     const X86Subtarget &Subtarget,
+                                     unsigned &Shuffle, MVT &ShuffleVT,
+                                     bool IsUnary) {
+  bool FloatDomain = MaskVT.isFloatingPoint();
+  unsigned EltSizeInBits = MaskVT.getScalarSizeInBits();
 
-  if (SrcVT.is128BitVector()) {
+  if (MaskVT.is128BitVector()) {
     if (isTargetShuffleEquivalent(Mask, {0, 0}) && FloatDomain) {
+      V2 = V1;
       Shuffle = X86ISD::MOVLHPS;
       ShuffleVT = MVT::v4f32;
       return true;
     }
     if (isTargetShuffleEquivalent(Mask, {1, 1}) && FloatDomain) {
+      V2 = V1;
       Shuffle = X86ISD::MOVHLPS;
       ShuffleVT = MVT::v4f32;
       return true;
     }
-    if (isTargetShuffleEquivalent(Mask, {0, 0, 1, 1}) && FloatDomain) {
-      Shuffle = X86ISD::UNPCKL;
-      ShuffleVT = MVT::v4f32;
+    if (isTargetShuffleEquivalent(Mask, {0, 3}) && Subtarget.hasSSE2() &&
+        (FloatDomain || !Subtarget.hasSSE41())) {
+      std::swap(V1, V2);
+      Shuffle = X86ISD::MOVSD;
+      ShuffleVT = MaskVT;
       return true;
     }
-    if (isTargetShuffleEquivalent(Mask, {2, 2, 3, 3}) && FloatDomain) {
-      Shuffle = X86ISD::UNPCKH;
-      ShuffleVT = MVT::v4f32;
+    if (isTargetShuffleEquivalent(Mask, {4, 1, 2, 3}) &&
+        (FloatDomain || !Subtarget.hasSSE41())) {
+      Shuffle = X86ISD::MOVSS;
+      ShuffleVT = MaskVT;
       return true;
     }
-    if (isTargetShuffleEquivalent(Mask, {0, 0, 1, 1, 2, 2, 3, 3}) ||
-        isTargetShuffleEquivalent(
-            Mask, {0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7})) {
-      Shuffle = X86ISD::UNPCKL;
-      ShuffleVT = Mask.size() == 8 ? MVT::v8i16 : MVT::v16i8;
+  }
+
+  // Attempt to match against either a unary or binary UNPCKL/UNPCKH shuffle.
+  if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+      (MaskVT.is128BitVector() && Subtarget.hasSSE2()) ||
+      (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) ||
+      (MaskVT.is256BitVector() && Subtarget.hasAVX2()) ||
+      (MaskVT.is512BitVector() && Subtarget.hasAVX512())) {
+    MVT LegalVT = MaskVT;
+    if (LegalVT.is256BitVector() && !Subtarget.hasAVX2())
+      LegalVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64);
+
+    SmallVector<int, 64> Unpckl, Unpckh;
+    if (IsUnary) {
+      createUnpackShuffleMask(MaskVT, Unpckl, true, true);
+      if (isTargetShuffleEquivalent(Mask, Unpckl)) {
+        V2 = V1;
+        Shuffle = X86ISD::UNPCKL;
+        ShuffleVT = LegalVT;
+        return true;
+      }
+
+      createUnpackShuffleMask(MaskVT, Unpckh, false, true);
+      if (isTargetShuffleEquivalent(Mask, Unpckh)) {
+        V2 = V1;
+        Shuffle = X86ISD::UNPCKH;
+        ShuffleVT = LegalVT;
+        return true;
+      }
+    } else {
+      createUnpackShuffleMask(MaskVT, Unpckl, true, false);
+      if (isTargetShuffleEquivalent(Mask, Unpckl)) {
+        Shuffle = X86ISD::UNPCKL;
+        ShuffleVT = LegalVT;
+        return true;
+      }
+
+      createUnpackShuffleMask(MaskVT, Unpckh, false, false);
+      if (isTargetShuffleEquivalent(Mask, Unpckh)) {
+        Shuffle = X86ISD::UNPCKH;
+        ShuffleVT = LegalVT;
+        return true;
+      }
+
+      ShuffleVectorSDNode::commuteMask(Unpckl);
+      if (isTargetShuffleEquivalent(Mask, Unpckl)) {
+        std::swap(V1, V2);
+        Shuffle = X86ISD::UNPCKL;
+        ShuffleVT = LegalVT;
+        return true;
+      }
+
+      ShuffleVectorSDNode::commuteMask(Unpckh);
+      if (isTargetShuffleEquivalent(Mask, Unpckh)) {
+        std::swap(V1, V2);
+        Shuffle = X86ISD::UNPCKH;
+        ShuffleVT = LegalVT;
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static bool matchBinaryPermuteVectorShuffle(MVT MaskVT, ArrayRef<int> Mask,
+                                            SDValue &V1, SDValue &V2,
+                                            SDLoc &DL, SelectionDAG &DAG,
+                                            const X86Subtarget &Subtarget,
+                                            unsigned &Shuffle, MVT &ShuffleVT,
+                                            unsigned &PermuteImm) {
+  unsigned NumMaskElts = Mask.size();
+  bool FloatDomain = MaskVT.isFloatingPoint();
+
+  // Attempt to match against PALIGNR byte rotate.
+  if (!FloatDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+                       (MaskVT.is256BitVector() && Subtarget.hasAVX2()))) {
+    int ByteRotation = matchVectorShuffleAsByteRotate(MaskVT, V1, V2, Mask);
+    if (0 < ByteRotation) {
+      Shuffle = X86ISD::PALIGNR;
+      ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8);
+      PermuteImm = ByteRotation;
+      return true;
+    }
+  }
+
+  // Attempt to combine to X86ISD::BLENDI.
+  if (NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) ||
+                           (Subtarget.hasAVX() && MaskVT.is256BitVector()))) {
+    // Determine a type compatible with X86ISD::BLENDI.
+    // TODO - add 16i16 support (requires lane duplication).
+    MVT BlendVT = MaskVT;
+    if (Subtarget.hasAVX2()) {
+      if (BlendVT == MVT::v4i64)
+        BlendVT = MVT::v8i32;
+      else if (BlendVT == MVT::v2i64)
+        BlendVT = MVT::v4i32;
+    } else {
+      if (BlendVT == MVT::v2i64 || BlendVT == MVT::v4i32)
+        BlendVT = MVT::v8i16;
+      else if (BlendVT == MVT::v4i64)
+        BlendVT = MVT::v4f64;
+      else if (BlendVT == MVT::v8i32)
+        BlendVT = MVT::v8f32;
+    }
+
+    unsigned BlendSize = BlendVT.getVectorNumElements();
+    unsigned MaskRatio = BlendSize / NumMaskElts;
+
+    // Can we blend with zero?
+    if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
+                                         /*Low*/ 0) &&
+        NumMaskElts <= BlendVT.getVectorNumElements()) {
+      PermuteImm = 0;
+      for (unsigned i = 0; i != BlendSize; ++i)
+        if (Mask[i / MaskRatio] < 0)
+          PermuteImm |= 1u << i;
+
+      V2 = getZeroVector(BlendVT, Subtarget, DAG, DL);
+      Shuffle = X86ISD::BLENDI;
+      ShuffleVT = BlendVT;
+      return true;
+    }
+
+    // Attempt to match as a binary blend.
+    if (NumMaskElts <= BlendVT.getVectorNumElements()) {
+      bool MatchBlend = true;
+      for (int i = 0; i != (int)NumMaskElts; ++i) {
+        int M = Mask[i];
+        if (M == SM_SentinelUndef)
+          continue;
+        else if (M == SM_SentinelZero)
+          MatchBlend = false;
+        else if ((M != i) && (M != (i + (int)NumMaskElts)))
+          MatchBlend = false;
+      }
+
+      if (MatchBlend) {
+        PermuteImm = 0;
+        for (unsigned i = 0; i != BlendSize; ++i)
+          if ((int)NumMaskElts <= Mask[i / MaskRatio])
+            PermuteImm |= 1u << i;
+
+        Shuffle = X86ISD::BLENDI;
+        ShuffleVT = BlendVT;
+        return true;
+      }
+    }
+  }
+
+  // Attempt to combine to INSERTPS.
+  if (Subtarget.hasSSE41() && MaskVT == MVT::v4f32) {
+    SmallBitVector Zeroable(4, false);
+    for (unsigned i = 0; i != NumMaskElts; ++i)
+      if (Mask[i] < 0)
+        Zeroable[i] = true;
+
+    if (Zeroable.any() &&
+        matchVectorShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) {
+      Shuffle = X86ISD::INSERTPS;
+      ShuffleVT = MVT::v4f32;
       return true;
     }
-    if (isTargetShuffleEquivalent(Mask, {4, 4, 5, 5, 6, 6, 7, 7}) ||
-        isTargetShuffleEquivalent(Mask, {8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13,
-                                         13, 14, 14, 15, 15})) {
-      Shuffle = X86ISD::UNPCKH;
-      ShuffleVT = Mask.size() == 8 ? MVT::v8i16 : MVT::v16i8;
+  }
+
+  // Attempt to combine to SHUFPD.
+  if ((MaskVT == MVT::v2f64 && Subtarget.hasSSE2()) ||
+      (MaskVT == MVT::v4f64 && Subtarget.hasAVX()) ||
+      (MaskVT == MVT::v8f64 && Subtarget.hasAVX512())) {
+    if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
+      Shuffle = X86ISD::SHUFP;
+      ShuffleVT = MaskVT;
       return true;
     }
   }
 
+  // Attempt to combine to SHUFPS.
+  if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) ||
+      (MaskVT == MVT::v8f32 && Subtarget.hasAVX()) ||
+      (MaskVT == MVT::v16f32 && Subtarget.hasAVX512())) {
+    SmallVector<int, 4> RepeatedMask;
+    if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) {
+      auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) {
+        int M0 = RepeatedMask[Offset];
+        int M1 = RepeatedMask[Offset + 1];
+
+        if (isUndefInRange(RepeatedMask, Offset, 2)) {
+          return DAG.getUNDEF(MaskVT);
+        } else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) {
+          S0 = (SM_SentinelUndef == M0 ? -1 : 0);
+          S1 = (SM_SentinelUndef == M1 ? -1 : 1);
+          return getZeroVector(MaskVT, Subtarget, DAG, DL);
+        } else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) {
+          S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
+          S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
+          return V1;
+        } else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) {
+          S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3);
+          S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3);
+          return V2;
+        }
+
+        return SDValue();
+      };
+
+      int ShufMask[4] = {-1, -1, -1, -1};
+      SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]);
+      SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]);
+
+      if (Lo && Hi) {
+        V1 = Lo;
+        V2 = Hi;
+        Shuffle = X86ISD::SHUFP;
+        ShuffleVT = MaskVT;
+        PermuteImm = getV4X86ShuffleImm(ShufMask);
+        return true;
+      }
+    }
+  }
+
   return false;
 }
 
@@ -25055,33 +26450,44 @@ static bool matchBinaryVectorShuffle(MVT SrcVT, ArrayRef<int> Mask,
 /// into either a single instruction if there is a special purpose instruction
 /// for this operation, or into a PSHUFB instruction which is a fully general
 /// instruction but should only be used to replace chains over a certain depth.
-static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
+static bool combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
                                    ArrayRef<int> BaseMask, int Depth,
                                    bool HasVariableMask, SelectionDAG &DAG,
                                    TargetLowering::DAGCombinerInfo &DCI,
                                    const X86Subtarget &Subtarget) {
   assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!");
+  assert((Inputs.size() == 1 || Inputs.size() == 2) &&
+         "Unexpected number of shuffle inputs!");
 
-  // Find the operand that enters the chain. Note that multiple uses are OK
-  // here, we're not going to remove the operand we find.
-  Input = peekThroughBitcasts(Input);
+  // Find the inputs that enter the chain. Note that multiple uses are OK
+  // here, we're not going to remove the operands we find.
+  bool UnaryShuffle = (Inputs.size() == 1);
+  SDValue V1 = peekThroughBitcasts(Inputs[0]);
+  SDValue V2 = (UnaryShuffle ? V1 : peekThroughBitcasts(Inputs[1]));
 
-  MVT VT = Input.getSimpleValueType();
+  MVT VT1 = V1.getSimpleValueType();
+  MVT VT2 = V2.getSimpleValueType();
   MVT RootVT = Root.getSimpleValueType();
-  SDLoc DL(Root);
+  assert(VT1.getSizeInBits() == RootVT.getSizeInBits() &&
+         VT2.getSizeInBits() == RootVT.getSizeInBits() &&
+         "Vector size mismatch");
 
+  SDLoc DL(Root);
   SDValue Res;
 
   unsigned NumBaseMaskElts = BaseMask.size();
   if (NumBaseMaskElts == 1) {
     assert(BaseMask[0] == 0 && "Invalid shuffle index found!");
-    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Input),
+    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
                   /*AddTo*/ true);
     return true;
   }
 
   unsigned RootSizeInBits = RootVT.getSizeInBits();
+  unsigned NumRootElts = RootVT.getVectorNumElements();
   unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts;
+  bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() ||
+                     (RootVT.is256BitVector() && !Subtarget.hasAVX2());
 
   // Don't combine if we are a AVX512/EVEX target and the mask element size
   // is different from the root element size - this would prevent writemasks
@@ -25089,26 +26495,25 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
   // TODO - this currently prevents all lane shuffles from occurring.
   // TODO - check for writemasks usage instead of always preventing combining.
   // TODO - attempt to narrow Mask back to writemask size.
-  if (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits &&
-      (RootSizeInBits == 512 ||
-       (Subtarget.hasVLX() && RootSizeInBits >= 128))) {
+  bool IsEVEXShuffle =
+      RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128);
+  if (IsEVEXShuffle && (RootVT.getScalarSizeInBits() != BaseMaskEltSizeInBits))
     return false;
-  }
 
   // TODO - handle 128/256-bit lane shuffles of 512-bit vectors.
 
   // Handle 128-bit lane shuffles of 256-bit vectors.
-  if (VT.is256BitVector() && NumBaseMaskElts == 2 &&
+  // TODO - this should support binary shuffles.
+  if (UnaryShuffle && RootVT.is256BitVector() && NumBaseMaskElts == 2 &&
       !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0)) {
     if (Depth == 1 && Root.getOpcode() == X86ISD::VPERM2X128)
       return false; // Nothing to do!
-    MVT ShuffleVT = (VT.isFloatingPoint() || !Subtarget.hasAVX2() ? MVT::v4f64
-                                                                  : MVT::v4i64);
+    MVT ShuffleVT = (FloatDomain ? MVT::v4f64 : MVT::v4i64);
     unsigned PermMask = 0;
     PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0);
     PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4);
 
-    Res = DAG.getBitcast(ShuffleVT, Input);
+    Res = DAG.getBitcast(ShuffleVT, V1);
     DCI.AddToWorklist(Res.getNode());
     Res = DAG.getNode(X86ISD::VPERM2X128, DL, ShuffleVT, Res,
                       DAG.getUNDEF(ShuffleVT),
@@ -25134,144 +26539,233 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
   unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts;
 
   // Determine the effective mask value type.
-  bool FloatDomain =
-      (VT.isFloatingPoint() || (VT.is256BitVector() && !Subtarget.hasAVX2())) &&
-      (32 <= MaskEltSizeInBits);
+  FloatDomain &= (32 <= MaskEltSizeInBits);
   MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits)
                            : MVT::getIntegerVT(MaskEltSizeInBits);
   MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts);
 
+  // Only allow legal mask types.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT))
+    return false;
+
   // Attempt to match the mask against known shuffle patterns.
-  MVT ShuffleVT;
+  MVT ShuffleSrcVT, ShuffleVT;
   unsigned Shuffle, PermuteImm;
 
-  if (matchUnaryVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT)) {
-    if (Depth == 1 && Root.getOpcode() == Shuffle)
-      return false; // Nothing to do!
-    Res = DAG.getBitcast(ShuffleVT, Input);
-    DCI.AddToWorklist(Res.getNode());
-    Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
-    DCI.AddToWorklist(Res.getNode());
-    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
-                  /*AddTo*/ true);
-    return true;
+  if (UnaryShuffle) {
+    // If we are shuffling a X86ISD::VZEXT_LOAD then we can use the load
+    // directly if we don't shuffle the lower element and we shuffle the upper
+    // (zero) elements within themselves.
+    if (V1.getOpcode() == X86ISD::VZEXT_LOAD &&
+        (V1.getScalarValueSizeInBits() % MaskEltSizeInBits) == 0) {
+      unsigned Scale = V1.getScalarValueSizeInBits() / MaskEltSizeInBits;
+      ArrayRef<int> HiMask(Mask.data() + Scale, NumMaskElts - Scale);
+      if (isSequentialOrUndefInRange(Mask, 0, Scale, 0) &&
+          isUndefOrZeroOrInRange(HiMask, Scale, NumMaskElts)) {
+        DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, V1),
+                      /*AddTo*/ true);
+        return true;
+      }
+    }
+
+    if (matchUnaryVectorShuffle(MaskVT, Mask, Subtarget, Shuffle, ShuffleSrcVT,
+                                ShuffleVT)) {
+      if (Depth == 1 && Root.getOpcode() == Shuffle)
+        return false; // Nothing to do!
+      if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+        return false; // AVX512 Writemask clash.
+      Res = DAG.getBitcast(ShuffleSrcVT, V1);
+      DCI.AddToWorklist(Res.getNode());
+      Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res);
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
+
+    if (matchUnaryPermuteVectorShuffle(MaskVT, Mask, Subtarget, Shuffle,
+                                       ShuffleVT, PermuteImm)) {
+      if (Depth == 1 && Root.getOpcode() == Shuffle)
+        return false; // Nothing to do!
+      if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+        return false; // AVX512 Writemask clash.
+      Res = DAG.getBitcast(ShuffleVT, V1);
+      DCI.AddToWorklist(Res.getNode());
+      Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
+                        DAG.getConstant(PermuteImm, DL, MVT::i8));
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
   }
 
-  if (matchPermuteVectorShuffle(VT, Mask, Subtarget, Shuffle, ShuffleVT,
-                                PermuteImm)) {
+  if (matchBinaryVectorShuffle(MaskVT, Mask, V1, V2, Subtarget, Shuffle,
+                               ShuffleVT, UnaryShuffle)) {
     if (Depth == 1 && Root.getOpcode() == Shuffle)
       return false; // Nothing to do!
-    Res = DAG.getBitcast(ShuffleVT, Input);
-    DCI.AddToWorklist(Res.getNode());
-    Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res,
-                      DAG.getConstant(PermuteImm, DL, MVT::i8));
+    if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+      return false; // AVX512 Writemask clash.
+    V1 = DAG.getBitcast(ShuffleVT, V1);
+    DCI.AddToWorklist(V1.getNode());
+    V2 = DAG.getBitcast(ShuffleVT, V2);
+    DCI.AddToWorklist(V2.getNode());
+    Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2);
     DCI.AddToWorklist(Res.getNode());
     DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
                   /*AddTo*/ true);
     return true;
   }
 
-  if (matchBinaryVectorShuffle(VT, Mask, Shuffle, ShuffleVT)) {
+  if (matchBinaryPermuteVectorShuffle(MaskVT, Mask, V1, V2, DL, DAG, Subtarget,
+                                      Shuffle, ShuffleVT, PermuteImm)) {
     if (Depth == 1 && Root.getOpcode() == Shuffle)
       return false; // Nothing to do!
-    Res = DAG.getBitcast(ShuffleVT, Input);
-    DCI.AddToWorklist(Res.getNode());
-    Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, Res);
+    if (IsEVEXShuffle && (NumRootElts != ShuffleVT.getVectorNumElements()))
+      return false; // AVX512 Writemask clash.
+    V1 = DAG.getBitcast(ShuffleVT, V1);
+    DCI.AddToWorklist(V1.getNode());
+    V2 = DAG.getBitcast(ShuffleVT, V2);
+    DCI.AddToWorklist(V2.getNode());
+    Res = DAG.getNode(Shuffle, DL, ShuffleVT, V1, V2,
+                      DAG.getConstant(PermuteImm, DL, MVT::i8));
     DCI.AddToWorklist(Res.getNode());
     DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
                   /*AddTo*/ true);
     return true;
   }
 
-  // Attempt to blend with zero.
-  if (NumMaskElts <= 8 &&
-      ((Subtarget.hasSSE41() && VT.is128BitVector()) ||
-       (Subtarget.hasAVX() && VT.is256BitVector()))) {
-    // Convert VT to a type compatible with X86ISD::BLENDI.
-    // TODO - add 16i16 support (requires lane duplication).
-    MVT ShuffleVT = MaskVT;
-    if (Subtarget.hasAVX2()) {
-      if (ShuffleVT == MVT::v4i64)
-        ShuffleVT = MVT::v8i32;
-      else if (ShuffleVT == MVT::v2i64)
-        ShuffleVT = MVT::v4i32;
-    } else {
-      if (ShuffleVT == MVT::v2i64 || ShuffleVT == MVT::v4i32)
-        ShuffleVT = MVT::v8i16;
-      else if (ShuffleVT == MVT::v4i64)
-        ShuffleVT = MVT::v4f64;
-      else if (ShuffleVT == MVT::v8i32)
-        ShuffleVT = MVT::v8f32;
-    }
-
-    if (isSequentialOrUndefOrZeroInRange(Mask, /*Pos*/ 0, /*Size*/ NumMaskElts,
-                                         /*Low*/ 0) &&
-        NumMaskElts <= ShuffleVT.getVectorNumElements()) {
-      unsigned BlendMask = 0;
-      unsigned ShuffleSize = ShuffleVT.getVectorNumElements();
-      unsigned MaskRatio = ShuffleSize / NumMaskElts;
-
-      if (Depth == 1 && Root.getOpcode() == X86ISD::BLENDI)
-        return false;
-
-      for (unsigned i = 0; i != ShuffleSize; ++i)
-        if (Mask[i / MaskRatio] < 0)
-          BlendMask |= 1u << i;
+  // Don't try to re-form single instruction chains under any circumstances now
+  // that we've done encoding canonicalization for them.
+  if (Depth < 2)
+    return false;
 
-      SDValue Zero = getZeroVector(ShuffleVT, Subtarget, DAG, DL);
-      Res = DAG.getBitcast(ShuffleVT, Input);
+  bool MaskContainsZeros =
+      any_of(Mask, [](int M) { return M == SM_SentinelZero; });
+
+  if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) {
+    // If we have a single input lane-crossing shuffle then lower to VPERMV.
+    if (UnaryShuffle && (Depth >= 3 || HasVariableMask) && !MaskContainsZeros &&
+        ((Subtarget.hasAVX2() &&
+          (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
+         (Subtarget.hasAVX512() &&
+          (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+           MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+         (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
+         (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
+         (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
+         (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
+      MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits);
+      MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts);
+      SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true);
+      DCI.AddToWorklist(VPermMask.getNode());
+      Res = DAG.getBitcast(MaskVT, V1);
       DCI.AddToWorklist(Res.getNode());
-      Res = DAG.getNode(X86ISD::BLENDI, DL, ShuffleVT, Res, Zero,
-                        DAG.getConstant(BlendMask, DL, MVT::i8));
+      Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res);
       DCI.AddToWorklist(Res.getNode());
       DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
                     /*AddTo*/ true);
       return true;
     }
-  }
 
-  // Attempt to combine to INSERTPS.
-  if (Subtarget.hasSSE41() && NumMaskElts == 4 &&
-      (VT == MVT::v2f64 || VT == MVT::v4f32)) {
-    SmallBitVector Zeroable(4, false);
-    for (unsigned i = 0; i != NumMaskElts; ++i)
-      if (Mask[i] < 0)
-        Zeroable[i] = true;
+    // Lower a unary+zero lane-crossing shuffle as VPERMV3 with a zero
+    // vector as the second source.
+    if (UnaryShuffle && (Depth >= 3 || HasVariableMask) &&
+        ((Subtarget.hasAVX512() &&
+          (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+           MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+         (Subtarget.hasVLX() &&
+          (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
+           MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
+         (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
+         (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
+         (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
+         (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
+      // Adjust shuffle mask - replace SM_SentinelZero with second source index.
+      for (unsigned i = 0; i != NumMaskElts; ++i)
+        if (Mask[i] == SM_SentinelZero)
+          Mask[i] = NumMaskElts + i;
+
+      MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits);
+      MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts);
+      SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true);
+      DCI.AddToWorklist(VPermMask.getNode());
+      Res = DAG.getBitcast(MaskVT, V1);
+      DCI.AddToWorklist(Res.getNode());
+      SDValue Zero = getZeroVector(MaskVT, Subtarget, DAG, DL);
+      DCI.AddToWorklist(Zero.getNode());
+      Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, Res, VPermMask, Zero);
+      DCI.AddToWorklist(Res.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                    /*AddTo*/ true);
+      return true;
+    }
 
-    unsigned InsertPSMask;
-    SDValue V1 = Input, V2 = Input;
-    if (Zeroable.any() && matchVectorShuffleAsInsertPS(V1, V2, InsertPSMask,
-                                                       Zeroable, Mask, DAG)) {
-      if (Depth == 1 && Root.getOpcode() == X86ISD::INSERTPS)
-        return false; // Nothing to do!
-      V1 = DAG.getBitcast(MVT::v4f32, V1);
+    // If we have a dual input lane-crossing shuffle then lower to VPERMV3.
+    if ((Depth >= 3 || HasVariableMask) && !MaskContainsZeros &&
+        ((Subtarget.hasAVX512() &&
+          (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 ||
+           MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) ||
+         (Subtarget.hasVLX() &&
+          (MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 ||
+           MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) ||
+         (Subtarget.hasBWI() && MaskVT == MVT::v32i16) ||
+         (Subtarget.hasBWI() && Subtarget.hasVLX() && MaskVT == MVT::v16i16) ||
+         (Subtarget.hasVBMI() && MaskVT == MVT::v64i8) ||
+         (Subtarget.hasVBMI() && Subtarget.hasVLX() && MaskVT == MVT::v32i8))) {
+      MVT VPermMaskSVT = MVT::getIntegerVT(MaskEltSizeInBits);
+      MVT VPermMaskVT = MVT::getVectorVT(VPermMaskSVT, NumMaskElts);
+      SDValue VPermMask = getConstVector(Mask, VPermMaskVT, DAG, DL, true);
+      DCI.AddToWorklist(VPermMask.getNode());
+      V1 = DAG.getBitcast(MaskVT, V1);
       DCI.AddToWorklist(V1.getNode());
-      V2 = DAG.getBitcast(MVT::v4f32, V2);
+      V2 = DAG.getBitcast(MaskVT, V2);
       DCI.AddToWorklist(V2.getNode());
-      Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2,
-                        DAG.getConstant(InsertPSMask, DL, MVT::i8));
+      Res = DAG.getNode(X86ISD::VPERMV3, DL, MaskVT, V1, VPermMask, V2);
       DCI.AddToWorklist(Res.getNode());
       DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
                     /*AddTo*/ true);
       return true;
     }
-  }
-
-  // Don't try to re-form single instruction chains under any circumstances now
-  // that we've done encoding canonicalization for them.
-  if (Depth < 2)
-    return false;
-
-  if (is128BitLaneCrossingShuffleMask(MaskVT, Mask))
     return false;
+  }
 
-  bool MaskContainsZeros =
-      llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
+  // See if we can combine a single input shuffle with zeros to a bit-mask,
+  // which is much simpler than any shuffle.
+  if (UnaryShuffle && MaskContainsZeros && (Depth >= 3 || HasVariableMask) &&
+      isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) &&
+      DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) {
+    APInt Zero = APInt::getNullValue(MaskEltSizeInBits);
+    APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits);
+    SmallBitVector UndefElts(NumMaskElts, false);
+    SmallVector<APInt, 64> EltBits(NumMaskElts, Zero);
+    for (unsigned i = 0; i != NumMaskElts; ++i) {
+      int M = Mask[i];
+      if (M == SM_SentinelUndef) {
+        UndefElts[i] = true;
+        continue;
+      }
+      if (M == SM_SentinelZero)
+        continue;
+      EltBits[i] = AllOnes;
+    }
+    SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL);
+    DCI.AddToWorklist(BitMask.getNode());
+    Res = DAG.getBitcast(MaskVT, V1);
+    DCI.AddToWorklist(Res.getNode());
+    unsigned AndOpcode =
+        FloatDomain ? unsigned(X86ISD::FAND) : unsigned(ISD::AND);
+    Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask);
+    DCI.AddToWorklist(Res.getNode());
+    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                  /*AddTo*/ true);
+    return true;
+  }
 
   // If we have a single input shuffle with different shuffle patterns in the
   // the 128-bit lanes use the variable mask to VPERMILPS.
   // TODO Combine other mask types at higher depths.
-  if (HasVariableMask && !MaskContainsZeros &&
+  if (UnaryShuffle && HasVariableMask && !MaskContainsZeros &&
       ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) ||
        (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) {
     SmallVector<SDValue, 16> VPermIdx;
@@ -25283,7 +26777,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
     MVT VPermMaskVT = MVT::getVectorVT(MVT::i32, NumMaskElts);
     SDValue VPermMask = DAG.getBuildVector(VPermMaskVT, DL, VPermIdx);
     DCI.AddToWorklist(VPermMask.getNode());
-    Res = DAG.getBitcast(MaskVT, Input);
+    Res = DAG.getBitcast(MaskVT, V1);
     DCI.AddToWorklist(Res.getNode());
     Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask);
     DCI.AddToWorklist(Res.getNode());
@@ -25292,17 +26786,60 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
     return true;
   }
 
+  // With XOP, binary shuffles of 128/256-bit floating point vectors can combine
+  // to VPERMIL2PD/VPERMIL2PS.
+  if ((Depth >= 3 || HasVariableMask) && Subtarget.hasXOP() &&
+      (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 ||
+       MaskVT == MVT::v8f32)) {
+    // VPERMIL2 Operation.
+    // Bits[3] - Match Bit.
+    // Bits[2:1] - (Per Lane) PD Shuffle Mask.
+    // Bits[2:0] - (Per Lane) PS Shuffle Mask.
+    unsigned NumLanes = MaskVT.getSizeInBits() / 128;
+    unsigned NumEltsPerLane = NumMaskElts / NumLanes;
+    SmallVector<int, 8> VPerm2Idx;
+    MVT MaskIdxSVT = MVT::getIntegerVT(MaskVT.getScalarSizeInBits());
+    MVT MaskIdxVT = MVT::getVectorVT(MaskIdxSVT, NumMaskElts);
+    unsigned M2ZImm = 0;
+    for (int M : Mask) {
+      if (M == SM_SentinelUndef) {
+        VPerm2Idx.push_back(-1);
+        continue;
+      }
+      if (M == SM_SentinelZero) {
+        M2ZImm = 2;
+        VPerm2Idx.push_back(8);
+        continue;
+      }
+      int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane);
+      Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index);
+      VPerm2Idx.push_back(Index);
+    }
+    V1 = DAG.getBitcast(MaskVT, V1);
+    DCI.AddToWorklist(V1.getNode());
+    V2 = DAG.getBitcast(MaskVT, V2);
+    DCI.AddToWorklist(V2.getNode());
+    SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, MaskIdxVT, DAG, DL, true);
+    DCI.AddToWorklist(VPerm2MaskOp.getNode());
+    Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp,
+                      DAG.getConstant(M2ZImm, DL, MVT::i8));
+    DCI.AddToWorklist(Res.getNode());
+    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                  /*AddTo*/ true);
+    return true;
+  }
+
   // If we have 3 or more shuffle instructions or a chain involving a variable
   // mask, we can replace them with a single PSHUFB instruction profitably.
   // Intel's manuals suggest only using PSHUFB if doing so replacing 5
   // instructions, but in practice PSHUFB tends to be *very* fast so we're
   // more aggressive.
-  if ((Depth >= 3 || HasVariableMask) &&
-      ((VT.is128BitVector() && Subtarget.hasSSSE3()) ||
-       (VT.is256BitVector() && Subtarget.hasAVX2()) ||
-       (VT.is512BitVector() && Subtarget.hasBWI()))) {
+  if (UnaryShuffle && (Depth >= 3 || HasVariableMask) &&
+      ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) ||
+       (RootVT.is256BitVector() && Subtarget.hasAVX2()) ||
+       (RootVT.is512BitVector() && Subtarget.hasBWI()))) {
     SmallVector<SDValue, 16> PSHUFBMask;
-    int NumBytes = VT.getSizeInBits() / 8;
+    int NumBytes = RootVT.getSizeInBits() / 8;
     int Ratio = NumBytes / NumMaskElts;
     for (int i = 0; i < NumBytes; ++i) {
       int M = Mask[i / Ratio];
@@ -25319,7 +26856,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
       PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8));
     }
     MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes);
-    Res = DAG.getBitcast(ByteVT, Input);
+    Res = DAG.getBitcast(ByteVT, V1);
     DCI.AddToWorklist(Res.getNode());
     SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask);
     DCI.AddToWorklist(PSHUFBMaskOp.getNode());
@@ -25330,10 +26867,135 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
     return true;
   }
 
+  // With XOP, if we have a 128-bit binary input shuffle we can always combine
+  // to VPPERM. We match the depth requirement of PSHUFB - VPPERM is never
+  // slower than PSHUFB on targets that support both.
+  if ((Depth >= 3 || HasVariableMask) && RootVT.is128BitVector() &&
+      Subtarget.hasXOP()) {
+    // VPPERM Mask Operation
+    // Bits[4:0] - Byte Index (0 - 31)
+    // Bits[7:5] - Permute Operation (0 - Source byte, 4 - ZERO)
+    SmallVector<SDValue, 16> VPPERMMask;
+    int NumBytes = 16;
+    int Ratio = NumBytes / NumMaskElts;
+    for (int i = 0; i < NumBytes; ++i) {
+      int M = Mask[i / Ratio];
+      if (M == SM_SentinelUndef) {
+        VPPERMMask.push_back(DAG.getUNDEF(MVT::i8));
+        continue;
+      }
+      if (M == SM_SentinelZero) {
+        VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
+        continue;
+      }
+      M = Ratio * M + i % Ratio;
+      VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8));
+    }
+    MVT ByteVT = MVT::v16i8;
+    V1 = DAG.getBitcast(ByteVT, V1);
+    DCI.AddToWorklist(V1.getNode());
+    V2 = DAG.getBitcast(ByteVT, V2);
+    DCI.AddToWorklist(V2.getNode());
+    SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask);
+    DCI.AddToWorklist(VPPERMMaskOp.getNode());
+    Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp);
+    DCI.AddToWorklist(Res.getNode());
+    DCI.CombineTo(Root.getNode(), DAG.getBitcast(RootVT, Res),
+                  /*AddTo*/ true);
+    return true;
+  }
+
   // Failed to find any combines.
   return false;
 }
 
+// Attempt to constant fold all of the constant source ops.
+// Returns true if the entire shuffle is folded to a constant.
+// TODO: Extend this to merge multiple constant Ops and update the mask.
+static bool combineX86ShufflesConstants(const SmallVectorImpl<SDValue> &Ops,
+                                        ArrayRef<int> Mask, SDValue Root,
+                                        bool HasVariableMask, SelectionDAG &DAG,
+                                        TargetLowering::DAGCombinerInfo &DCI,
+                                        const X86Subtarget &Subtarget) {
+  MVT VT = Root.getSimpleValueType();
+
+  unsigned SizeInBits = VT.getSizeInBits();
+  unsigned NumMaskElts = Mask.size();
+  unsigned MaskSizeInBits = SizeInBits / NumMaskElts;
+  unsigned NumOps = Ops.size();
+
+  // Extract constant bits from each source op.
+  bool OneUseConstantOp = false;
+  SmallVector<SmallBitVector, 4> UndefEltsOps(NumOps);
+  SmallVector<SmallVector<APInt, 8>, 4> RawBitsOps(NumOps);
+  for (unsigned i = 0; i != NumOps; ++i) {
+    SDValue SrcOp = Ops[i];
+    OneUseConstantOp |= SrcOp.hasOneUse();
+    if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i],
+                                       RawBitsOps[i]))
+      return false;
+  }
+
+  // Only fold if at least one of the constants is only used once or
+  // the combined shuffle has included a variable mask shuffle, this
+  // is to avoid constant pool bloat.
+  if (!OneUseConstantOp && !HasVariableMask)
+    return false;
+
+  // Shuffle the constant bits according to the mask.
+  SmallBitVector UndefElts(NumMaskElts, false);
+  SmallBitVector ZeroElts(NumMaskElts, false);
+  SmallBitVector ConstantElts(NumMaskElts, false);
+  SmallVector<APInt, 8> ConstantBitData(NumMaskElts,
+                                        APInt::getNullValue(MaskSizeInBits));
+  for (unsigned i = 0; i != NumMaskElts; ++i) {
+    int M = Mask[i];
+    if (M == SM_SentinelUndef) {
+      UndefElts[i] = true;
+      continue;
+    } else if (M == SM_SentinelZero) {
+      ZeroElts[i] = true;
+      continue;
+    }
+    assert(0 <= M && M < (int)(NumMaskElts * NumOps));
+
+    unsigned SrcOpIdx = (unsigned)M / NumMaskElts;
+    unsigned SrcMaskIdx = (unsigned)M % NumMaskElts;
+
+    auto &SrcUndefElts = UndefEltsOps[SrcOpIdx];
+    if (SrcUndefElts[SrcMaskIdx]) {
+      UndefElts[i] = true;
+      continue;
+    }
+
+    auto &SrcEltBits = RawBitsOps[SrcOpIdx];
+    APInt &Bits = SrcEltBits[SrcMaskIdx];
+    if (!Bits) {
+      ZeroElts[i] = true;
+      continue;
+    }
+
+    ConstantElts[i] = true;
+    ConstantBitData[i] = Bits;
+  }
+  assert((UndefElts | ZeroElts | ConstantElts).count() == NumMaskElts);
+
+  // Create the constant data.
+  MVT MaskSVT;
+  if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64))
+    MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits);
+  else
+    MaskSVT = MVT::getIntegerVT(MaskSizeInBits);
+
+  MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts);
+
+  SDLoc DL(Root);
+  SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL);
+  DCI.AddToWorklist(CstOp.getNode());
+  DCI.CombineTo(Root.getNode(), DAG.getBitcast(VT, CstOp));
+  return true;
+}
+
 /// \brief Fully generic combining of x86 shuffle instructions.
 ///
 /// This should be the last combine run over the x86 shuffle instructions. Once
@@ -25350,7 +27012,7 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
 ///    instructions, and replace them with the slightly more expensive SSSE3
 ///    PSHUFB instruction if available. We do this as the last combining step
 ///    to ensure we avoid using PSHUFB if we can implement the shuffle with
-///    a suitable short sequence of other instructions. The PHUFB will either
+///    a suitable short sequence of other instructions. The PSHUFB will either
 ///    use a register or have to read from memory and so is slightly (but only
 ///    slightly) more expensive than the other shuffle instructions.
 ///
@@ -25363,7 +27025,8 @@ static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
 /// would simplify under the threshold for PSHUFB formation because of
 /// combine-ordering. To fix this, we should do the redundant instruction
 /// combining in this recursive walk.
-static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
+static bool combineX86ShufflesRecursively(ArrayRef<SDValue> SrcOps,
+                                          int SrcOpIndex, SDValue Root,
                                           ArrayRef<int> RootMask,
                                           int Depth, bool HasVariableMask,
                                           SelectionDAG &DAG,
@@ -25375,8 +27038,8 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
     return false;
 
   // Directly rip through bitcasts to find the underlying operand.
-  while (Op.getOpcode() == ISD::BITCAST && Op.getOperand(0).hasOneUse())
-    Op = Op.getOperand(0);
+  SDValue Op = SrcOps[SrcOpIndex];
+  Op = peekThroughOneUseBitcasts(Op);
 
   MVT VT = Op.getSimpleValueType();
   if (!VT.isVector())
@@ -25393,8 +27056,27 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
   if (!resolveTargetShuffleInputs(Op, Input0, Input1, OpMask))
     return false;
 
-  assert(VT.getVectorNumElements() == OpMask.size() &&
-         "Different mask size from vector size!");
+  // Add the inputs to the Ops list, avoiding duplicates.
+  SmallVector<SDValue, 8> Ops(SrcOps.begin(), SrcOps.end());
+
+  int InputIdx0 = -1, InputIdx1 = -1;
+  for (int i = 0, e = Ops.size(); i < e; ++i) {
+    SDValue BC = peekThroughBitcasts(Ops[i]);
+    if (Input0 && BC == peekThroughBitcasts(Input0))
+      InputIdx0 = i;
+    if (Input1 && BC == peekThroughBitcasts(Input1))
+      InputIdx1 = i;
+  }
+
+  if (Input0 && InputIdx0 < 0) {
+    InputIdx0 = SrcOpIndex;
+    Ops[SrcOpIndex] = Input0;
+  }
+  if (Input1 && InputIdx1 < 0) {
+    InputIdx1 = Ops.size();
+    Ops.push_back(Input1);
+  }
+
   assert(((RootMask.size() > OpMask.size() &&
            RootMask.size() % OpMask.size() == 0) ||
           (OpMask.size() > RootMask.size() &&
@@ -25424,6 +27106,17 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
     }
 
     int RootMaskedIdx = RootMask[RootIdx] * RootRatio + i % RootRatio;
+
+    // Just insert the scaled root mask value if it references an input other
+    // than the SrcOp we're currently inserting.
+    if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) ||
+        (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) {
+      Mask.push_back(RootMaskedIdx);
+      continue;
+    }
+
+    RootMaskedIdx %= MaskWidth;
+
     int OpIdx = RootMaskedIdx / OpRatio;
     if (OpMask[OpIdx] < 0) {
       // The incoming lanes are zero or undef, it doesn't matter which ones we
@@ -25432,17 +27125,27 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
       continue;
     }
 
-    // Ok, we have non-zero lanes, map them through.
-    Mask.push_back(OpMask[OpIdx] * OpRatio +
-                   RootMaskedIdx % OpRatio);
+    // Ok, we have non-zero lanes, map them through to one of the Op's inputs.
+    int OpMaskedIdx = OpMask[OpIdx] * OpRatio + RootMaskedIdx % OpRatio;
+    OpMaskedIdx %= MaskWidth;
+
+    if (OpMask[OpIdx] < (int)OpMask.size()) {
+      assert(0 <= InputIdx0 && "Unknown target shuffle input");
+      OpMaskedIdx += InputIdx0 * MaskWidth;
+    } else {
+      assert(0 <= InputIdx1 && "Unknown target shuffle input");
+      OpMaskedIdx += InputIdx1 * MaskWidth;
+    }
+
+    Mask.push_back(OpMaskedIdx);
   }
 
   // Handle the all undef/zero cases early.
-  if (llvm::all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) {
+  if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) {
     DCI.CombineTo(Root.getNode(), DAG.getUNDEF(Root.getValueType()));
     return true;
   }
-  if (llvm::all_of(Mask, [](int Idx) { return Idx < 0; })) {
+  if (all_of(Mask, [](int Idx) { return Idx < 0; })) {
     // TODO - should we handle the mixed zero/undef case as well? Just returning
     // a zero mask will lose information on undef elements possibly reducing
     // future combine possibilities.
@@ -25451,30 +27154,40 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
     return true;
   }
 
-  int MaskSize = Mask.size();
-  bool UseInput0 = std::any_of(Mask.begin(), Mask.end(),
-                  [MaskSize](int Idx) { return 0 <= Idx && Idx < MaskSize; });
-  bool UseInput1 = std::any_of(Mask.begin(), Mask.end(),
-                  [MaskSize](int Idx) { return MaskSize <= Idx; });
-
-  // At the moment we can only combine unary shuffle mask cases.
-  if (UseInput0 && UseInput1)
-    return false;
-  else if (UseInput1) {
-    std::swap(Input0, Input1);
-    ShuffleVectorSDNode::commuteMask(Mask);
+  // Remove unused shuffle source ops.
+  SmallVector<SDValue, 8> UsedOps;
+  for (int i = 0, e = Ops.size(); i < e; ++i) {
+    int lo = UsedOps.size() * MaskWidth;
+    int hi = lo + MaskWidth;
+    if (any_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) {
+      UsedOps.push_back(Ops[i]);
+      continue;
+    }
+    for (int &M : Mask)
+      if (lo <= M)
+        M -= MaskWidth;
   }
-
-  assert(Input0 && "Shuffle with no inputs detected");
+  assert(!UsedOps.empty() && "Shuffle with no inputs detected");
+  Ops = UsedOps;
 
   HasVariableMask |= isTargetShuffleVariableMask(Op.getOpcode());
 
-  // See if we can recurse into Input0 (if it's a target shuffle).
-  if (Op->isOnlyUserOf(Input0.getNode()) &&
-      combineX86ShufflesRecursively(Input0, Root, Mask, Depth + 1,
-                                    HasVariableMask, DAG, DCI, Subtarget))
+  // See if we can recurse into each shuffle source op (if it's a target shuffle).
+  for (int i = 0, e = Ops.size(); i < e; ++i)
+    if (Ops[i].getNode()->hasOneUse() || Op->isOnlyUserOf(Ops[i].getNode()))
+      if (combineX86ShufflesRecursively(Ops, i, Root, Mask, Depth + 1,
+                                        HasVariableMask, DAG, DCI, Subtarget))
+        return true;
+
+  // Attempt to constant fold all of the constant source ops.
+  if (combineX86ShufflesConstants(Ops, Mask, Root, HasVariableMask, DAG, DCI,
+                                  Subtarget))
     return true;
 
+  // We can only combine unary and binary shuffle mask cases.
+  if (Ops.size() > 2)
+    return false;
+
   // Minor canonicalization of the accumulated shuffle mask to make it easier
   // to match below. All this does is detect masks with sequential pairs of
   // elements, and shrink them to the half-width mask. It does this in a loop
@@ -25485,7 +27198,14 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
     Mask = std::move(WidenedMask);
   }
 
-  return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasVariableMask, DAG,
+  // Canonicalization of binary shuffle masks to improve pattern matching by
+  // commuting the inputs.
+  if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) {
+    ShuffleVectorSDNode::commuteMask(Mask);
+    std::swap(Ops[0], Ops[1]);
+  }
+
+  return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, DAG,
                                 DCI, Subtarget);
 }
 
@@ -25612,7 +27332,7 @@ combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask,
 
           Chain.push_back(V);
 
-          // Fallthrough!
+          LLVM_FALLTHROUGH;
         case ISD::BITCAST:
           V = V.getOperand(0);
           continue;
@@ -25742,7 +27462,8 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
   MVT VT = N.getSimpleValueType();
   SmallVector<int, 4> Mask;
 
-  switch (N.getOpcode()) {
+  unsigned Opcode = N.getOpcode();
+  switch (Opcode) {
   case X86ISD::PSHUFD:
   case X86ISD::PSHUFLW:
   case X86ISD::PSHUFHW:
@@ -25750,6 +27471,17 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
     assert(Mask.size() == 4);
     break;
   case X86ISD::UNPCKL: {
+    auto Op0 = N.getOperand(0);
+    auto Op1 = N.getOperand(1);
+    unsigned Opcode0 = Op0.getOpcode();
+    unsigned Opcode1 = Op1.getOpcode();
+
+    // Combine X86ISD::UNPCKL with 2 X86ISD::FHADD inputs into a single
+    // X86ISD::FHADD. This is generated by UINT_TO_FP v2f64 scalarization.
+    // TODO: Add other horizontal operations as required.
+    if (VT == MVT::v2f64 && Opcode0 == Opcode1 && Opcode0 == X86ISD::FHADD)
+      return DAG.getNode(Opcode0, DL, VT, Op0.getOperand(0), Op1.getOperand(0));
+
     // Combine X86ISD::UNPCKL and ISD::VECTOR_SHUFFLE into X86ISD::UNPCKH, in
     // which X86ISD::UNPCKL has a ISD::UNDEF operand, and ISD::VECTOR_SHUFFLE
     // moves upper half elements into the lower half part. For example:
@@ -25767,9 +27499,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
     if (!VT.is128BitVector())
       return SDValue();
 
-    auto Op0 = N.getOperand(0);
-    auto Op1 = N.getOperand(1);
-    if (Op0.isUndef() && Op1.getNode()->getOpcode() == ISD::VECTOR_SHUFFLE) {
+    if (Op0.isUndef() && Opcode1 == ISD::VECTOR_SHUFFLE) {
       ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Op1.getNode())->getMask();
 
       unsigned NumElts = VT.getVectorNumElements();
@@ -25806,44 +27536,31 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
           return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V0, NewMask);
         }
 
-    // Attempt to merge blend(insertps(x,y),zero).
-    if (V0.getOpcode() == X86ISD::INSERTPS ||
-        V1.getOpcode() == X86ISD::INSERTPS) {
-      assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32");
-
-      // Determine which elements are known to be zero.
-      SmallVector<int, 8> TargetMask;
-      SmallVector<SDValue, 2> BlendOps;
-      if (!setTargetShuffleZeroElements(N, TargetMask, BlendOps))
-        return SDValue();
-
-      // Helper function to take inner insertps node and attempt to
-      // merge the blend with zero into its zero mask.
-      auto MergeInsertPSAndBlend = [&](SDValue V, int Offset) {
-        if (V.getOpcode() != X86ISD::INSERTPS)
-          return SDValue();
-        SDValue Op0 = V.getOperand(0);
-        SDValue Op1 = V.getOperand(1);
-        SDValue Op2 = V.getOperand(2);
-        unsigned InsertPSMask = cast<ConstantSDNode>(Op2)->getZExtValue();
-
-        // Check each element of the blend node's target mask - must either
-        // be zeroable (and update the zero mask) or selects the element from
-        // the inner insertps node.
-        for (int i = 0; i != 4; ++i)
-          if (TargetMask[i] < 0)
-            InsertPSMask |= (1u << i);
-          else if (TargetMask[i] != (i + Offset))
-            return SDValue();
-        return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, Op0, Op1,
-                           DAG.getConstant(InsertPSMask, DL, MVT::i8));
-      };
-
-      if (SDValue V = MergeInsertPSAndBlend(V0, 0))
-        return V;
-      if (SDValue V = MergeInsertPSAndBlend(V1, 4))
-        return V;
+    return SDValue();
+  }
+  case X86ISD::MOVSD:
+  case X86ISD::MOVSS: {
+    bool isFloat = VT.isFloatingPoint();
+    SDValue V0 = peekThroughBitcasts(N->getOperand(0));
+    SDValue V1 = peekThroughBitcasts(N->getOperand(1));
+    bool isFloat0 = V0.getSimpleValueType().isFloatingPoint();
+    bool isFloat1 = V1.getSimpleValueType().isFloatingPoint();
+    bool isZero0 = ISD::isBuildVectorAllZeros(V0.getNode());
+    bool isZero1 = ISD::isBuildVectorAllZeros(V1.getNode());
+    assert(!(isZero0 && isZero1) && "Zeroable shuffle detected.");
+
+    // We often lower to MOVSD/MOVSS from integer as well as native float
+    // types; remove unnecessary domain-crossing bitcasts if we can to make it
+    // easier to combine shuffles later on. We've already accounted for the
+    // domain switching cost when we decided to lower with it.
+    if ((isFloat != isFloat0 || isZero0) && (isFloat != isFloat1 || isZero1)) {
+      MVT NewVT = isFloat ? (X86ISD::MOVSD == Opcode ? MVT::v2i64 : MVT::v4i32)
+                          : (X86ISD::MOVSD == Opcode ? MVT::v2f64 : MVT::v4f32);
+      V0 = DAG.getBitcast(NewVT, V0);
+      V1 = DAG.getBitcast(NewVT, V1);
+      return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, NewVT, V0, V1));
     }
+
     return SDValue();
   }
   case X86ISD::INSERTPS: {
@@ -25976,9 +27693,7 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
          V.getOpcode() == X86ISD::PSHUFHW) &&
         V.getOpcode() != N.getOpcode() &&
         V.hasOneUse()) {
-      SDValue D = V.getOperand(0);
-      while (D.getOpcode() == ISD::BITCAST && D.hasOneUse())
-        D = D.getOperand(0);
+      SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
       if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
         SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
         SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
@@ -26038,10 +27753,8 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
   if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
     return SDValue();
 
-  auto *SVN = cast<ShuffleVectorSDNode>(N);
-  SmallVector<int, 8> Mask;
-  for (int M : SVN->getMask())
-    Mask.push_back(M);
+  ArrayRef<int> OrigMask = cast<ShuffleVectorSDNode>(N)->getMask();
+  SmallVector<int, 8> Mask(OrigMask.begin(), OrigMask.end());
 
   SDValue V1 = N->getOperand(0);
   SDValue V2 = N->getOperand(1);
@@ -26075,6 +27788,51 @@ static SDValue combineShuffleToAddSub(SDNode *N, const X86Subtarget &Subtarget,
   return DAG.getNode(X86ISD::ADDSUB, DL, VT, LHS, RHS);
 }
 
+// We are looking for a shuffle where both sources are concatenated with undef
+// and have a width that is half of the output's width. AVX2 has VPERMD/Q, so
+// if we can express this as a single-source shuffle, that's preferable.
+static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
+  if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N))
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  // We only care about shuffles of 128/256-bit vectors of 32/64-bit values.
+  if (!VT.is128BitVector() && !VT.is256BitVector())
+    return SDValue();
+
+  if (VT.getVectorElementType() != MVT::i32 &&
+      VT.getVectorElementType() != MVT::i64 &&
+      VT.getVectorElementType() != MVT::f32 &&
+      VT.getVectorElementType() != MVT::f64)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // Check that both sources are concats with undef.
+  if (N0.getOpcode() != ISD::CONCAT_VECTORS ||
+      N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+      N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() ||
+      !N1.getOperand(1).isUndef())
+    return SDValue();
+
+  // Construct the new shuffle mask. Elements from the first source retain their
+  // index, but elements from the second source no longer need to skip an undef.
+  SmallVector<int, 8> Mask;
+  int NumElts = VT.getVectorNumElements();
+
+  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
+  for (int Elt : SVOp->getMask())
+    Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2));
+
+  SDLoc DL(N);
+  SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
+                               N1.getOperand(0));
+  return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask);
+}
+
 static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
                               TargetLowering::DAGCombinerInfo &DCI,
                               const X86Subtarget &Subtarget) {
@@ -26092,11 +27850,6 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     if (SDValue AddSub = combineShuffleToAddSub(N, Subtarget, DAG))
       return AddSub;
 
-  // Combine 256-bit vector shuffles. This is only profitable when in AVX mode
-  if (TLI.isTypeLegal(VT) && Subtarget.hasFp256() && VT.is256BitVector() &&
-      N->getOpcode() == ISD::VECTOR_SHUFFLE)
-    return combineShuffle256(N, DAG, DCI, Subtarget);
-
   // During Type Legalization, when promoting illegal vector types,
   // the backend might introduce new shuffle dag nodes and bitcasts.
   //
@@ -26127,13 +27880,18 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
       bool CanFold = false;
       switch (Opcode) {
       default : break;
-      case ISD::ADD :
-      case ISD::FADD :
-      case ISD::SUB :
-      case ISD::FSUB :
-      case ISD::MUL :
-      case ISD::FMUL :
-        CanFold = true;
+      case ISD::ADD:
+      case ISD::SUB:
+      case ISD::MUL:
+        // isOperationLegal lies for integer ops on floating point types.
+        CanFold = VT.isInteger();
+        break;
+      case ISD::FADD:
+      case ISD::FSUB:
+      case ISD::FMUL:
+        // isOperationLegal lies for floating point ops on integer types.
+        CanFold = VT.isFloatingPoint();
+        break;
       }
 
       unsigned SVTNumElts = SVT.getVectorNumElements();
@@ -26162,9 +27920,18 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
   if (SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true))
     return LD;
 
+  // For AVX2, we sometimes want to combine
+  // (vector_shuffle <mask> (concat_vectors t1, undef)
+  //                        (concat_vectors t2, undef))
+  // Into:
+  // (vector_shuffle <mask> (concat_vectors t1, t2), undef)
+  // Since the latter can be efficiently lowered with VPERMD/VPERMQ
+  if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget))
+    return ShufConcat;
+
   if (isTargetShuffle(N->getOpcode())) {
-    if (SDValue Shuffle =
-            combineTargetShuffle(SDValue(N, 0), DAG, DCI, Subtarget))
+    SDValue Op(N, 0);
+    if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget))
       return Shuffle;
 
     // Try recursively combining arbitrary sequences of x86 shuffle
@@ -26174,8 +27941,8 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
     // a particular chain.
     SmallVector<int, 1> NonceMask; // Just a placeholder.
     NonceMask.push_back(0);
-    if (combineX86ShufflesRecursively(SDValue(N, 0), SDValue(N, 0), NonceMask,
-                                      /*Depth*/ 1, /*HasPSHUFB*/ false, DAG,
+    if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+                                      /*Depth*/ 1, /*HasVarMask*/ false, DAG,
                                       DCI, Subtarget))
       return SDValue(); // This routine will use CombineTo to replace N.
   }
@@ -26305,11 +28072,10 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
   }
 
   // Convert a bitcasted integer logic operation that has one bitcasted
-  // floating-point operand and one constant operand into a floating-point
-  // logic operation. This may create a load of the constant, but that is
-  // cheaper than materializing the constant in an integer register and
-  // transferring it to an SSE register or transferring the SSE operand to
-  // integer register and back.
+  // floating-point operand into a floating-point logic operation. This may
+  // create a load of a constant, but that is cheaper than materializing the
+  // constant in an integer register and transferring it to an SSE register or
+  // transferring the SSE operand to integer register and back.
   unsigned FPOpcode;
   switch (N0.getOpcode()) {
     case ISD::AND: FPOpcode = X86ISD::FAND; break;
@@ -26317,25 +28083,238 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
     case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
     default: return SDValue();
   }
-  if (((Subtarget.hasSSE1() && VT == MVT::f32) ||
-       (Subtarget.hasSSE2() && VT == MVT::f64)) &&
-      isa<ConstantSDNode>(N0.getOperand(1)) &&
-      N0.getOperand(0).getOpcode() == ISD::BITCAST &&
-      N0.getOperand(0).getOperand(0).getValueType() == VT) {
-    SDValue N000 = N0.getOperand(0).getOperand(0);
-    SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
-    return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+
+  if (!((Subtarget.hasSSE1() && VT == MVT::f32) ||
+        (Subtarget.hasSSE2() && VT == MVT::f64)))
+    return SDValue();
+
+  SDValue LogicOp0 = N0.getOperand(0);
+  SDValue LogicOp1 = N0.getOperand(1);
+  SDLoc DL0(N0);
+
+  // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
+  if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST &&
+      LogicOp0.hasOneUse() && LogicOp0.getOperand(0).getValueType() == VT &&
+      !isa<ConstantSDNode>(LogicOp0.getOperand(0))) {
+    SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1);
+    return DAG.getNode(FPOpcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1);
+  }
+  // bitcast(logic(X, bitcast(Y))) --> logic'(bitcast(X), Y)
+  if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST &&
+      LogicOp1.hasOneUse() && LogicOp1.getOperand(0).getValueType() == VT &&
+      !isa<ConstantSDNode>(LogicOp1.getOperand(0))) {
+    SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0);
+    return DAG.getNode(FPOpcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0);
   }
 
   return SDValue();
 }
 
+// Match a binop + shuffle pyramid that represents a horizontal reduction over
+// the elements of a vector.
+// Returns the vector that is being reduced on, or SDValue() if a reduction
+// was not matched.
+static SDValue matchBinOpReduction(SDNode *Extract, ISD::NodeType BinOp) {
+  // The pattern must end in an extract from index 0.
+  if ((Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT) ||
+      !isNullConstant(Extract->getOperand(1)))
+    return SDValue();
+
+  unsigned Stages =
+      Log2_32(Extract->getOperand(0).getValueType().getVectorNumElements());
+
+  SDValue Op = Extract->getOperand(0);
+  // At each stage, we're looking for something that looks like:
+  // %s = shufflevector <8 x i32> %op, <8 x i32> undef,
+  //                    <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
+  //                               i32 undef, i32 undef, i32 undef, i32 undef>
+  // %a = binop <8 x i32> %op, %s
+  // Where the mask changes according to the stage. E.g. for a 3-stage pyramid,
+  // we expect something like:
+  // <4,5,6,7,u,u,u,u>
+  // <2,3,u,u,u,u,u,u>
+  // <1,u,u,u,u,u,u,u>
+  for (unsigned i = 0; i < Stages; ++i) {
+    if (Op.getOpcode() != BinOp)
+      return SDValue();
+
+    ShuffleVectorSDNode *Shuffle =
+        dyn_cast<ShuffleVectorSDNode>(Op.getOperand(0).getNode());
+    if (Shuffle) {
+      Op = Op.getOperand(1);
+    } else {
+      Shuffle = dyn_cast<ShuffleVectorSDNode>(Op.getOperand(1).getNode());
+      Op = Op.getOperand(0);
+    }
+
+    // The first operand of the shuffle should be the same as the other operand
+    // of the add.
+    if (!Shuffle || (Shuffle->getOperand(0) != Op))
+      return SDValue();
+
+    // Verify the shuffle has the expected (at this stage of the pyramid) mask.
+    for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
+      if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
+        return SDValue();
+  }
+
+  return Op;
+}
+
+// Given a select, detect the following pattern:
+// 1:    %2 = zext <N x i8> %0 to <N x i32>
+// 2:    %3 = zext <N x i8> %1 to <N x i32>
+// 3:    %4 = sub nsw <N x i32> %2, %3
+// 4:    %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N]
+// 5:    %6 = sub nsw <N x i32> zeroinitializer, %4
+// 6:    %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6
+// This is useful as it is the input into a SAD pattern.
+static bool detectZextAbsDiff(const SDValue &Select, SDValue &Op0,
+                              SDValue &Op1) {
+  // Check the condition of the select instruction is greater-than.
+  SDValue SetCC = Select->getOperand(0);
+  if (SetCC.getOpcode() != ISD::SETCC)
+    return false;
+  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
+  if (CC != ISD::SETGT)
+    return false;
+
+  SDValue SelectOp1 = Select->getOperand(1);
+  SDValue SelectOp2 = Select->getOperand(2);
+
+  // The second operand of the select should be the negation of the first
+  // operand, which is implemented as 0 - SelectOp1.
+  if (!(SelectOp2.getOpcode() == ISD::SUB &&
+        ISD::isBuildVectorAllZeros(SelectOp2.getOperand(0).getNode()) &&
+        SelectOp2.getOperand(1) == SelectOp1))
+    return false;
+
+  // The first operand of SetCC is the first operand of the select, which is the
+  // difference between the two input vectors.
+  if (SetCC.getOperand(0) != SelectOp1)
+    return false;
+
+  // The second operand of the comparison can be either -1 or 0.
+  if (!(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
+        ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
+    return false;
+
+  // The first operand of the select is the difference between the two input
+  // vectors.
+  if (SelectOp1.getOpcode() != ISD::SUB)
+    return false;
+
+  Op0 = SelectOp1.getOperand(0);
+  Op1 = SelectOp1.getOperand(1);
+
+  // Check if the operands of the sub are zero-extended from vectors of i8.
+  if (Op0.getOpcode() != ISD::ZERO_EXTEND ||
+      Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 ||
+      Op1.getOpcode() != ISD::ZERO_EXTEND ||
+      Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8)
+    return false;
+
+  return true;
+}
+
+// Given two zexts of <k x i8> to <k x i32>, create a PSADBW of the inputs
+// to these zexts.
+static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0,
+                            const SDValue &Zext1, const SDLoc &DL) {
+
+  // Find the appropriate width for the PSADBW.
+  EVT InVT = Zext0.getOperand(0).getValueType();
+  unsigned RegSize = std::max(128u, InVT.getSizeInBits());
+
+  // "Zero-extend" the i8 vectors. This is not a per-element zext, rather we
+  // fill in the missing vector elements with 0.
+  unsigned NumConcat = RegSize / InVT.getSizeInBits();
+  SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT));
+  Ops[0] = Zext0.getOperand(0);
+  MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
+  SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
+  Ops[0] = Zext1.getOperand(0);
+  SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
+
+  // Actually build the SAD
+  MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
+  return DAG.getNode(X86ISD::PSADBW, DL, SadVT, SadOp0, SadOp1);
+}
+
+static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG,
+                                      const X86Subtarget &Subtarget) {
+  // PSADBW is only supported on SSE2 and up.
+  if (!Subtarget.hasSSE2())
+    return SDValue();
+
+  // Verify the type we're extracting from is appropriate
+  // TODO: There's nothing special about i32, any integer type above i16 should
+  // work just as well.
+  EVT VT = Extract->getOperand(0).getValueType();
+  if (!VT.isSimple() || !(VT.getVectorElementType() == MVT::i32))
+    return SDValue();
+
+  unsigned RegSize = 128;
+  if (Subtarget.hasBWI())
+    RegSize = 512;
+  else if (Subtarget.hasAVX2())
+    RegSize = 256;
+
+  // We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
+  // TODO: We should be able to handle larger vectors by splitting them before
+  // feeding them into several SADs, and then reducing over those.
+  if (VT.getSizeInBits() / 4 > RegSize)
+    return SDValue();
+
+  // Match shuffle + add pyramid.
+  SDValue Root = matchBinOpReduction(Extract, ISD::ADD);
+
+  // If there was a match, we want Root to be a select that is the root of an
+  // abs-diff pattern.
+  if (!Root || (Root.getOpcode() != ISD::VSELECT))
+    return SDValue();
+
+  // Check whether we have an abs-diff pattern feeding into the select.
+  SDValue Zext0, Zext1;
+  if (!detectZextAbsDiff(Root, Zext0, Zext1))
+    return SDValue();
+
+  // Create the SAD instruction
+  SDLoc DL(Extract);
+  SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL);
+
+  // If the original vector was wider than 8 elements, sum over the results
+  // in the SAD vector.
+  unsigned Stages = Log2_32(VT.getVectorNumElements());
+  MVT SadVT = SAD.getSimpleValueType();
+  if (Stages > 3) {
+    unsigned SadElems = SadVT.getVectorNumElements();
+
+    for(unsigned i = Stages - 3; i > 0; --i) {
+      SmallVector<int, 16> Mask(SadElems, -1);
+      for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j)
+        Mask[j] = MaskEnd + j;
+
+      SDValue Shuffle =
+          DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask);
+      SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle);
+    }
+  }
+
+  // Return the lowest i32.
+  MVT ResVT = MVT::getVectorVT(MVT::i32, SadVT.getSizeInBits() / 32);
+  SAD = DAG.getNode(ISD::BITCAST, DL, ResVT, SAD);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, SAD,
+                     Extract->getOperand(1));
+}
+
 /// Detect vector gather/scatter index generation and convert it from being a
 /// bunch of shuffles and extracts into a somewhat faster sequence.
 /// For i686, the best sequence is apparently storing the value and loading
 /// scalars back, while for x64 we should use 64-bit extracts and shifts.
 static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
-                                       TargetLowering::DAGCombinerInfo &DCI) {
+                                       TargetLowering::DAGCombinerInfo &DCI,
+                                       const X86Subtarget &Subtarget) {
   if (SDValue NewOp = XFormVExtractWithShuffleIntoLoad(N, DAG, DCI))
     return NewOp;
 
@@ -26347,7 +28326,7 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
       InputVector.getValueType() == MVT::v2i32 &&
       isa<ConstantSDNode>(N->getOperand(1)) &&
       N->getConstantOperandVal(1) == 0) {
-    SDValue MMXSrc = InputVector.getNode()->getOperand(0);
+    SDValue MMXSrc = InputVector.getOperand(0);
 
     // The bitcast source is a direct mmx result.
     if (MMXSrc.getValueType() == MVT::x86mmx)
@@ -26366,6 +28345,13 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
     uint64_t Res = (InputValue >> ExtractedElt) & 1;
     return DAG.getConstant(Res, dl, MVT::i1);
   }
+
+  // Check whether this extract is the root of a sum of absolute differences
+  // pattern. This has to be done here because we really want it to happen
+  // pre-legalization,
+  if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget))
+    return SAD;
+
   // Only operate on vectors of 4 elements, where the alternative shuffling
   // gets to be more expensive.
   if (InputVector.getValueType() != MVT::v4i32)
@@ -26467,6 +28453,251 @@ static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// If a vector select has an operand that is -1 or 0, simplify the select to a
+/// bitwise logic operation.
+static SDValue combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG,
+                                                const X86Subtarget &Subtarget) {
+  SDValue Cond = N->getOperand(0);
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+  EVT VT = LHS.getValueType();
+  EVT CondVT = Cond.getValueType();
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+  if (N->getOpcode() != ISD::VSELECT)
+    return SDValue();
+
+  bool FValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+  // Check if the first operand is all zeros.This situation only
+  // applies to avx512.
+  if (FValIsAllZeros  && Subtarget.hasAVX512() && Cond.hasOneUse()) {
+      //Invert the cond to not(cond) : xor(op,allones)=not(op)
+      SDValue CondNew = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+        DAG.getConstant(1, DL, Cond.getValueType()));
+      //Vselect cond, op1, op2 = Vselect not(cond), op2, op1
+      return DAG.getNode(ISD::VSELECT, DL, VT, CondNew, RHS, LHS);
+  }
+  assert(CondVT.isVector() && "Vector select expects a vector selector!");
+
+  // To use the condition operand as a bitwise mask, it must have elements that
+  // are the same size as the select elements. Ie, the condition operand must
+  // have already been promoted from the IR select condition type <N x i1>.
+  // Don't check if the types themselves are equal because that excludes
+  // vector floating-point selects.
+  if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits())
+    return SDValue();
+
+  bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
+  FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
+
+  // Try to invert the condition if true value is not all 1s and false value is
+  // not all 0s.
+  if (!TValIsAllOnes && !FValIsAllZeros &&
+      // Check if the selector will be produced by CMPP*/PCMP*.
+      Cond.getOpcode() == ISD::SETCC &&
+      // Check if SETCC has already been promoted.
+      TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
+          CondVT) {
+    bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
+    bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
+
+    if (TValIsAllZeros || FValIsAllOnes) {
+      SDValue CC = Cond.getOperand(2);
+      ISD::CondCode NewCC =
+          ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
+                               Cond.getOperand(0).getValueType().isInteger());
+      Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1),
+                          NewCC);
+      std::swap(LHS, RHS);
+      TValIsAllOnes = FValIsAllOnes;
+      FValIsAllZeros = TValIsAllZeros;
+    }
+  }
+
+  if (!TValIsAllOnes && !FValIsAllZeros)
+    return SDValue();
+
+  SDValue Ret;
+  if (TValIsAllOnes && FValIsAllZeros)
+    Ret = Cond;
+  else if (TValIsAllOnes)
+    Ret = DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
+  else if (FValIsAllZeros)
+    Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond, DAG.getBitcast(CondVT, LHS));
+
+  return DAG.getBitcast(VT, Ret);
+}
+
+static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) {
+  SDValue Cond = N->getOperand(0);
+  SDValue LHS = N->getOperand(1);
+  SDValue RHS = N->getOperand(2);
+  SDLoc DL(N);
+
+  auto *TrueC = dyn_cast<ConstantSDNode>(LHS);
+  auto *FalseC = dyn_cast<ConstantSDNode>(RHS);
+  if (!TrueC || !FalseC)
+    return SDValue();
+
+  // Don't do this for crazy integer types.
+  if (!DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType()))
+    return SDValue();
+
+  // If this is efficiently invertible, canonicalize the LHSC/RHSC values
+  // so that TrueC (the true value) is larger than FalseC.
+  bool NeedsCondInvert = false;
+  if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
+      // Efficiently invertible.
+      (Cond.getOpcode() == ISD::SETCC || // setcc -> invertible.
+       (Cond.getOpcode() == ISD::XOR &&  // xor(X, C) -> invertible.
+        isa<ConstantSDNode>(Cond.getOperand(1))))) {
+    NeedsCondInvert = true;
+    std::swap(TrueC, FalseC);
+  }
+
+  // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
+  if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
+    if (NeedsCondInvert) // Invert the condition if needed.
+      Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                         DAG.getConstant(1, DL, Cond.getValueType()));
+
+    // Zero extend the condition if needed.
+    Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
+
+    unsigned ShAmt = TrueC->getAPIntValue().logBase2();
+    return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
+                       DAG.getConstant(ShAmt, DL, MVT::i8));
+  }
+
+  // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
+  if (FalseC->getAPIntValue() + 1 == TrueC->getAPIntValue()) {
+    if (NeedsCondInvert) // Invert the condition if needed.
+      Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                         DAG.getConstant(1, DL, Cond.getValueType()));
+
+    // Zero extend the condition if needed.
+    Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
+    return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                       SDValue(FalseC, 0));
+  }
+
+  // Optimize cases that will turn into an LEA instruction.  This requires
+  // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
+  if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
+    uint64_t Diff = TrueC->getZExtValue() - FalseC->getZExtValue();
+    if (N->getValueType(0) == MVT::i32)
+      Diff = (unsigned)Diff;
+
+    bool isFastMultiplier = false;
+    if (Diff < 10) {
+      switch ((unsigned char)Diff) {
+      default:
+        break;
+      case 1: // result = add base, cond
+      case 2: // result = lea base(    , cond*2)
+      case 3: // result = lea base(cond, cond*2)
+      case 4: // result = lea base(    , cond*4)
+      case 5: // result = lea base(cond, cond*4)
+      case 8: // result = lea base(    , cond*8)
+      case 9: // result = lea base(cond, cond*8)
+        isFastMultiplier = true;
+        break;
+      }
+    }
+
+    if (isFastMultiplier) {
+      APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue();
+      if (NeedsCondInvert) // Invert the condition if needed.
+        Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
+                           DAG.getConstant(1, DL, Cond.getValueType()));
+
+      // Zero extend the condition if needed.
+      Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), Cond);
+      // Scale the condition by the difference.
+      if (Diff != 1)
+        Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
+                           DAG.getConstant(Diff, DL, Cond.getValueType()));
+
+      // Add the base if non-zero.
+      if (FalseC->getAPIntValue() != 0)
+        Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
+                           SDValue(FalseC, 0));
+      return Cond;
+    }
+  }
+
+  return SDValue();
+}
+
+// If this is a bitcasted op that can be represented as another type, push the
+// the bitcast to the inputs. This allows more opportunities for pattern
+// matching masked instructions. This is called when we know that the operation
+// is used as one of the inputs of a vselect.
+static bool combineBitcastForMaskedOp(SDValue OrigOp, SelectionDAG &DAG,
+                                      TargetLowering::DAGCombinerInfo &DCI) {
+  // Make sure we have a bitcast.
+  if (OrigOp.getOpcode() != ISD::BITCAST)
+    return false;
+
+  SDValue Op = OrigOp.getOperand(0);
+
+  // If the operation is used by anything other than the bitcast, we shouldn't
+  // do this combine as that would replicate the operation.
+  if (!Op.hasOneUse())
+    return false;
+
+  MVT VT = OrigOp.getSimpleValueType();
+  MVT EltVT = VT.getVectorElementType();
+  SDLoc DL(Op.getNode());
+
+  auto BitcastAndCombineShuffle = [&](unsigned Opcode, SDValue Op0, SDValue Op1,
+                                      SDValue Op2) {
+    Op0 = DAG.getBitcast(VT, Op0);
+    DCI.AddToWorklist(Op0.getNode());
+    Op1 = DAG.getBitcast(VT, Op1);
+    DCI.AddToWorklist(Op1.getNode());
+    DCI.CombineTo(OrigOp.getNode(),
+                  DAG.getNode(Opcode, DL, VT, Op0, Op1, Op2));
+    return true;
+  };
+
+  unsigned Opcode = Op.getOpcode();
+  switch (Opcode) {
+  case X86ISD::PALIGNR:
+    // PALIGNR can be converted to VALIGND/Q for 128-bit vectors.
+    if (!VT.is128BitVector())
+      return false;
+    Opcode = X86ISD::VALIGN;
+    LLVM_FALLTHROUGH;
+  case X86ISD::VALIGN: {
+    if (EltVT != MVT::i32 && EltVT != MVT::i64)
+      return false;
+    uint64_t Imm = cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue();
+    MVT OpEltVT = Op.getSimpleValueType().getVectorElementType();
+    unsigned ShiftAmt = Imm * OpEltVT.getSizeInBits();
+    unsigned EltSize = EltVT.getSizeInBits();
+    // Make sure we can represent the same shift with the new VT.
+    if ((ShiftAmt % EltSize) != 0)
+      return false;
+    Imm = ShiftAmt / EltSize;
+    return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
+                                    DAG.getConstant(Imm, DL, MVT::i8));
+  }
+  case X86ISD::SHUF128: {
+    if (EltVT.getSizeInBits() != 32 && EltVT.getSizeInBits() != 64)
+      return false;
+    // Only change element size, not type.
+    if (VT.isInteger() != Op.getSimpleValueType().isInteger())
+      return false;
+    return BitcastAndCombineShuffle(Opcode, Op.getOperand(0), Op.getOperand(1),
+                                    Op.getOperand(2));
+  }
+  }
+
+  return false;
+}
+
 /// Do target-specific dag combines on SELECT and VSELECT nodes.
 static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
                              TargetLowering::DAGCombinerInfo &DCI,
@@ -26477,6 +28708,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   SDValue LHS = N->getOperand(1);
   SDValue RHS = N->getOperand(2);
   EVT VT = LHS.getValueType();
+  EVT CondVT = Cond.getValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
   // If we have SSE[12] support, try to form min/max nodes. SSE min/max
@@ -26625,117 +28857,24 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
   }
 
-  EVT CondVT = Cond.getValueType();
-  if (Subtarget.hasAVX512() && VT.isVector() && CondVT.isVector() &&
-      CondVT.getVectorElementType() == MVT::i1) {
-    // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
-    // lowering on KNL. In this case we convert it to
-    // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
-    // The same situation for all 128 and 256-bit vectors of i8 and i16.
-    // Since SKX these selects have a proper lowering.
-    EVT OpVT = LHS.getValueType();
-    if ((OpVT.is128BitVector() || OpVT.is256BitVector()) &&
-        (OpVT.getVectorElementType() == MVT::i8 ||
-         OpVT.getVectorElementType() == MVT::i16) &&
-        !(Subtarget.hasBWI() && Subtarget.hasVLX())) {
-      Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, OpVT, Cond);
-      DCI.AddToWorklist(Cond.getNode());
-      return DAG.getNode(N->getOpcode(), DL, OpVT, Cond, LHS, RHS);
-    }
+  // v16i8 (select v16i1, v16i8, v16i8) does not have a proper
+  // lowering on KNL. In this case we convert it to
+  // v16i8 (select v16i8, v16i8, v16i8) and use AVX instruction.
+  // The same situation for all 128 and 256-bit vectors of i8 and i16.
+  // Since SKX these selects have a proper lowering.
+  if (Subtarget.hasAVX512() && CondVT.isVector() &&
+      CondVT.getVectorElementType() == MVT::i1 &&
+      (VT.is128BitVector() || VT.is256BitVector()) &&
+      (VT.getVectorElementType() == MVT::i8 ||
+       VT.getVectorElementType() == MVT::i16) &&
+      !(Subtarget.hasBWI() && Subtarget.hasVLX())) {
+    Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
+    DCI.AddToWorklist(Cond.getNode());
+    return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS);
   }
-  // If this is a select between two integer constants, try to do some
-  // optimizations.
-  if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(LHS)) {
-    if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(RHS))
-      // Don't do this for crazy integer types.
-      if (DAG.getTargetLoweringInfo().isTypeLegal(LHS.getValueType())) {
-        // If this is efficiently invertible, canonicalize the LHSC/RHSC values
-        // so that TrueC (the true value) is larger than FalseC.
-        bool NeedsCondInvert = false;
-
-        if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue()) &&
-            // Efficiently invertible.
-            (Cond.getOpcode() == ISD::SETCC ||  // setcc -> invertible.
-             (Cond.getOpcode() == ISD::XOR &&   // xor(X, C) -> invertible.
-              isa<ConstantSDNode>(Cond.getOperand(1))))) {
-          NeedsCondInvert = true;
-          std::swap(TrueC, FalseC);
-        }
-
-        // Optimize C ? 8 : 0 -> zext(C) << 3.  Likewise for any pow2/0.
-        if (FalseC->getAPIntValue() == 0 &&
-            TrueC->getAPIntValue().isPowerOf2()) {
-          if (NeedsCondInvert) // Invert the condition if needed.
-            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
-                               DAG.getConstant(1, DL, Cond.getValueType()));
-
-          // Zero extend the condition if needed.
-          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, LHS.getValueType(), Cond);
-
-          unsigned ShAmt = TrueC->getAPIntValue().logBase2();
-          return DAG.getNode(ISD::SHL, DL, LHS.getValueType(), Cond,
-                             DAG.getConstant(ShAmt, DL, MVT::i8));
-        }
 
-        // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.
-        if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
-          if (NeedsCondInvert) // Invert the condition if needed.
-            Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
-                               DAG.getConstant(1, DL, Cond.getValueType()));
-
-          // Zero extend the condition if needed.
-          Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
-                             FalseC->getValueType(0), Cond);
-          return DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
-                             SDValue(FalseC, 0));
-        }
-
-        // Optimize cases that will turn into an LEA instruction.  This requires
-        // an i32 or i64 and an efficient multiplier (1, 2, 3, 4, 5, 8, 9).
-        if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) {
-          uint64_t Diff = TrueC->getZExtValue()-FalseC->getZExtValue();
-          if (N->getValueType(0) == MVT::i32) Diff = (unsigned)Diff;
-
-          bool isFastMultiplier = false;
-          if (Diff < 10) {
-            switch ((unsigned char)Diff) {
-              default: break;
-              case 1:  // result = add base, cond
-              case 2:  // result = lea base(    , cond*2)
-              case 3:  // result = lea base(cond, cond*2)
-              case 4:  // result = lea base(    , cond*4)
-              case 5:  // result = lea base(cond, cond*4)
-              case 8:  // result = lea base(    , cond*8)
-              case 9:  // result = lea base(cond, cond*8)
-                isFastMultiplier = true;
-                break;
-            }
-          }
-
-          if (isFastMultiplier) {
-            APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
-            if (NeedsCondInvert) // Invert the condition if needed.
-              Cond = DAG.getNode(ISD::XOR, DL, Cond.getValueType(), Cond,
-                                 DAG.getConstant(1, DL, Cond.getValueType()));
-
-            // Zero extend the condition if needed.
-            Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
-                               Cond);
-            // Scale the condition by the difference.
-            if (Diff != 1)
-              Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond,
-                                 DAG.getConstant(Diff, DL,
-                                                 Cond.getValueType()));
-
-            // Add the base if non-zero.
-            if (FalseC->getAPIntValue() != 0)
-              Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond,
-                                 SDValue(FalseC, 0));
-            return Cond;
-          }
-        }
-      }
-  }
+  if (SDValue V = combineSelectOfTwoConstants(N, DAG))
+    return V;
 
   // Canonicalize max and min:
   // (x > y) ? x : y -> (x >= y) ? x : y
@@ -26832,53 +28971,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     }
   }
 
-  // Simplify vector selection if condition value type matches vselect
-  // operand type
-  if (N->getOpcode() == ISD::VSELECT && CondVT == VT) {
-    assert(Cond.getValueType().isVector() &&
-           "vector select expects a vector selector!");
-
-    bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode());
-    bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
-
-    // Try invert the condition if true value is not all 1s and false value
-    // is not all 0s.
-    if (!TValIsAllOnes && !FValIsAllZeros &&
-        // Check if the selector will be produced by CMPP*/PCMP*
-        Cond.getOpcode() == ISD::SETCC &&
-        // Check if SETCC has already been promoted
-        TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) ==
-            CondVT) {
-      bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode());
-      bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode());
-
-      if (TValIsAllZeros || FValIsAllOnes) {
-        SDValue CC = Cond.getOperand(2);
-        ISD::CondCode NewCC =
-          ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
-                               Cond.getOperand(0).getValueType().isInteger());
-        Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), NewCC);
-        std::swap(LHS, RHS);
-        TValIsAllOnes = FValIsAllOnes;
-        FValIsAllZeros = TValIsAllZeros;
-      }
-    }
-
-    if (TValIsAllOnes || FValIsAllZeros) {
-      SDValue Ret;
-
-      if (TValIsAllOnes && FValIsAllZeros)
-        Ret = Cond;
-      else if (TValIsAllOnes)
-        Ret =
-            DAG.getNode(ISD::OR, DL, CondVT, Cond, DAG.getBitcast(CondVT, RHS));
-      else if (FValIsAllZeros)
-        Ret = DAG.getNode(ISD::AND, DL, CondVT, Cond,
-                          DAG.getBitcast(CondVT, LHS));
-
-      return DAG.getBitcast(VT, Ret);
-    }
-  }
+  if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, Subtarget))
+    return V;
 
   // If this is a *dynamic* select (non-constant condition) and we can match
   // this node with one of the variable blend instructions, restructure the
@@ -26887,7 +28981,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
   if (N->getOpcode() == ISD::VSELECT && DCI.isBeforeLegalizeOps() &&
       !DCI.isBeforeLegalize() &&
       !ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) {
-    unsigned BitWidth = Cond.getValueType().getScalarSizeInBits();
+    unsigned BitWidth = Cond.getScalarValueSizeInBits();
 
     // Don't optimize vector selects that map to mask-registers.
     if (BitWidth == 1)
@@ -26965,6 +29059,17 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
     }
   }
 
+  // Look for vselects with LHS/RHS being bitcasted from an operation that
+  // can be executed on another type. Push the bitcast to the inputs of
+  // the operation. This exposes opportunities for using masking instructions.
+  if (N->getOpcode() == ISD::VSELECT && !DCI.isBeforeLegalizeOps() &&
+      CondVT.getVectorElementType() == MVT::i1) {
+    if (combineBitcastForMaskedOp(LHS, DAG, DCI))
+      return SDValue(N, 0);
+    if (combineBitcastForMaskedOp(RHS, DAG, DCI))
+      return SDValue(N, 0);
+  }
+
   return SDValue();
 }
 
@@ -27088,7 +29193,6 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
   // Skip (zext $x), (trunc $x), or (and $x, 1) node.
   while (SetCC.getOpcode() == ISD::ZERO_EXTEND ||
          SetCC.getOpcode() == ISD::TRUNCATE ||
-         SetCC.getOpcode() == ISD::AssertZext ||
          SetCC.getOpcode() == ISD::AND) {
     if (SetCC.getOpcode() == ISD::AND) {
       int OpIdx = -1;
@@ -27114,7 +29218,7 @@ static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) {
       break;
     assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B &&
            "Invalid use of SETCC_CARRY!");
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case X86ISD::SETCC:
     // Set the condition code or opposite one if necessary.
     CC = X86::CondCode(SetCC.getConstantOperandVal(0));
@@ -27187,7 +29291,7 @@ static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0,
   case ISD::AND:
   case X86ISD::AND:
     isAnd = true;
-    // fallthru
+    LLVM_FALLTHROUGH;
   case ISD::OR:
   case X86ISD::OR:
     SetCC0 = Cond->getOperand(0);
@@ -27270,8 +29374,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
       // This is efficient for any integer data type (including i8/i16) and
       // shift amount.
       if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) {
-        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                           DAG.getConstant(CC, DL, MVT::i8), Cond);
+        Cond = getSETCC(CC, Cond, DL, DAG);
 
         // Zero extend the condition if needed.
         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond);
@@ -27287,8 +29390,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
       // Optimize Cond ? cst+1 : cst -> zext(setcc(C)+cst.  This is efficient
       // for any integer data type, including i8/i16.
       if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) {
-        Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                           DAG.getConstant(CC, DL, MVT::i8), Cond);
+        Cond = getSETCC(CC, Cond, DL, DAG);
 
         // Zero extend the condition if needed.
         Cond = DAG.getNode(ISD::ZERO_EXTEND, DL,
@@ -27325,8 +29427,7 @@ static SDValue combineCMov(SDNode *N, SelectionDAG &DAG,
 
         if (isFastMultiplier) {
           APInt Diff = TrueC->getAPIntValue()-FalseC->getAPIntValue();
-          Cond = DAG.getNode(X86ISD::SETCC, DL, MVT::i8,
-                             DAG.getConstant(CC, DL, MVT::i8), Cond);
+          Cond = getSETCC(CC, Cond, DL ,DAG);
           // Zero extend the condition if needed.
           Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0),
                              Cond);
@@ -27525,10 +29626,17 @@ static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) {
 /// generate pmullw+pmulhuw for it (MULU16 mode).
 static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
                                const X86Subtarget &Subtarget) {
-  // pmulld is supported since SSE41. It is better to use pmulld
-  // instead of pmullw+pmulhw.
+  // Check for legality
   // pmullw/pmulhw are not supported by SSE.
-  if (Subtarget.hasSSE41() || !Subtarget.hasSSE2())
+  if (!Subtarget.hasSSE2())
+    return SDValue();
+
+  // Check for profitability
+  // pmulld is supported since SSE41. It is better to use pmulld
+  // instead of pmullw+pmulhw, except for subtargets where pmulld is slower than
+  // the expansion.
+  bool OptForMinSize = DAG.getMachineFunction().getFunction()->optForMinSize();
+  if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow()))
     return SDValue();
 
   ShrinkMode Mode;
@@ -27591,7 +29699,12 @@ static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG,
     // <4 x i16> undef).
     //
     // Legalize the operands of mul.
-    SmallVector<SDValue, 16> Ops(RegSize / ReducedVT.getSizeInBits(),
+    // FIXME: We may be able to handle non-concatenated vectors by insertion.
+    unsigned ReducedSizeInBits = ReducedVT.getSizeInBits();
+    if ((RegSize % ReducedSizeInBits) != 0)
+      return SDValue();
+
+    SmallVector<SDValue, 16> Ops(RegSize / ReducedSizeInBits,
                                  DAG.getUNDEF(ReducedVT));
     Ops[0] = NewN0;
     NewN0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, OpsVT, Ops);
@@ -27851,7 +29964,7 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG,
     if (auto *AmtSplat = AmtBV->getConstantSplatNode()) {
       const APInt &ShiftAmt = AmtSplat->getAPIntValue();
       unsigned MaxAmount =
-        VT.getSimpleVT().getVectorElementType().getSizeInBits();
+        VT.getSimpleVT().getScalarSizeInBits();
 
       // SSE2/AVX2 logical shifts always return a vector of 0s
       // if the shift amount is bigger than or equal to
@@ -27883,6 +29996,45 @@ static SDValue combineShift(SDNode* N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static SDValue combineVectorShift(SDNode *N, SelectionDAG &DAG,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  const X86Subtarget &Subtarget) {
+  assert((X86ISD::VSHLI == N->getOpcode() || X86ISD::VSRLI == N->getOpcode()) &&
+         "Unexpected opcode");
+  EVT VT = N->getValueType(0);
+  unsigned NumBitsPerElt = VT.getScalarSizeInBits();
+
+  // This fails for mask register (vXi1) shifts.
+  if ((NumBitsPerElt % 8) != 0)
+    return SDValue();
+
+  // Out of range logical bit shifts are guaranteed to be zero.
+  APInt ShiftVal = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
+  if (ShiftVal.zextOrTrunc(8).uge(NumBitsPerElt))
+    return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+
+  // Shift N0 by zero -> N0.
+  if (!ShiftVal)
+    return N->getOperand(0);
+
+  // Shift zero -> zero.
+  if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()))
+    return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(N));
+
+  // We can decode 'whole byte' logical bit shifts as shuffles.
+  if ((ShiftVal.getZExtValue() % 8) == 0) {
+    SDValue Op(N, 0);
+    SmallVector<int, 1> NonceMask; // Just a placeholder.
+    NonceMask.push_back(0);
+    if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+                                      /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+                                      DCI, Subtarget))
+      return SDValue(); // This routine will use CombineTo to replace N.
+  }
+
+  return SDValue();
+}
+
 /// Recognize the distinctive (AND (setcc ...) (setcc ..)) where both setccs
 /// reference the same FP CMP, and rewrite for CMPEQSS and friends. Likewise for
 /// OR -> CMPNEQSS.
@@ -27943,7 +30095,7 @@ static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG,
           // See X86ATTInstPrinter.cpp:printSSECC().
           unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4;
           if (Subtarget.hasAVX512()) {
-            SDValue FSetCC = DAG.getNode(X86ISD::FSETCC, DL, MVT::i1, CMP00,
+            SDValue FSetCC = DAG.getNode(X86ISD::FSETCCM, DL, MVT::i1, CMP00,
                                          CMP01,
                                          DAG.getConstant(x86cc, DL, MVT::i8));
             if (N->getValueType(0) != MVT::i1)
@@ -27995,9 +30147,7 @@ static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) {
   SDValue N1 = N->getOperand(1);
   SDLoc DL(N);
 
-  if (VT != MVT::v2i64 && VT != MVT::v4i64 &&
-      VT != MVT::v8i64 && VT != MVT::v16i32 &&
-      VT != MVT::v4i32 && VT != MVT::v8i32) // Legal with VLX
+  if (VT != MVT::v2i64 && VT != MVT::v4i64 && VT != MVT::v8i64)
     return SDValue();
 
   // Canonicalize XOR to the left.
@@ -28111,95 +30261,6 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG,
   }
 }
 
-static SDValue combineVectorZext(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget &Subtarget) {
-  SDValue N0 = N->getOperand(0);
-  SDValue N1 = N->getOperand(1);
-  SDLoc DL(N);
-
-  // A vector zext_in_reg may be represented as a shuffle,
-  // feeding into a bitcast (this represents anyext) feeding into
-  // an and with a mask.
-  // We'd like to try to combine that into a shuffle with zero
-  // plus a bitcast, removing the and.
-  if (N0.getOpcode() != ISD::BITCAST ||
-      N0.getOperand(0).getOpcode() != ISD::VECTOR_SHUFFLE)
-    return SDValue();
-
-  // The other side of the AND should be a splat of 2^C, where C
-  // is the number of bits in the source type.
-  N1 = peekThroughBitcasts(N1);
-  if (N1.getOpcode() != ISD::BUILD_VECTOR)
-    return SDValue();
-  BuildVectorSDNode *Vector = cast<BuildVectorSDNode>(N1);
-
-  ShuffleVectorSDNode *Shuffle = cast<ShuffleVectorSDNode>(N0.getOperand(0));
-  EVT SrcType = Shuffle->getValueType(0);
-
-  // We expect a single-source shuffle
-  if (!Shuffle->getOperand(1)->isUndef())
-    return SDValue();
-
-  unsigned SrcSize = SrcType.getScalarSizeInBits();
-  unsigned NumElems = SrcType.getVectorNumElements();
-
-  APInt SplatValue, SplatUndef;
-  unsigned SplatBitSize;
-  bool HasAnyUndefs;
-  if (!Vector->isConstantSplat(SplatValue, SplatUndef,
-                                SplatBitSize, HasAnyUndefs))
-    return SDValue();
-
-  unsigned ResSize = N1.getValueType().getScalarSizeInBits();
-  // Make sure the splat matches the mask we expect
-  if (SplatBitSize > ResSize ||
-      (SplatValue + 1).exactLogBase2() != (int)SrcSize)
-    return SDValue();
-
-  // Make sure the input and output size make sense
-  if (SrcSize >= ResSize || ResSize % SrcSize)
-    return SDValue();
-
-  // We expect a shuffle of the form <0, u, u, u, 1, u, u, u...>
-  // The number of u's between each two values depends on the ratio between
-  // the source and dest type.
-  unsigned ZextRatio = ResSize / SrcSize;
-  bool IsZext = true;
-  for (unsigned i = 0; i != NumElems; ++i) {
-    if (i % ZextRatio) {
-      if (Shuffle->getMaskElt(i) > 0) {
-        // Expected undef
-        IsZext = false;
-        break;
-      }
-    } else {
-      if (Shuffle->getMaskElt(i) != (int)(i / ZextRatio)) {
-        // Expected element number
-        IsZext = false;
-        break;
-      }
-    }
-  }
-
-  if (!IsZext)
-    return SDValue();
-
-  // Ok, perform the transformation - replace the shuffle with
-  // a shuffle of the form <0, k, k, k, 1, k, k, k> with zero
-  // (instead of undef) where the k elements come from the zero vector.
-  SmallVector<int, 8> Mask;
-  for (unsigned i = 0; i != NumElems; ++i)
-    if (i % ZextRatio)
-      Mask.push_back(NumElems);
-    else
-      Mask.push_back(i / ZextRatio);
-
-  SDValue NewShuffle = DAG.getVectorShuffle(Shuffle->getValueType(0), DL,
-    Shuffle->getOperand(0), DAG.getConstant(0, DL, SrcType), Mask);
-  return DAG.getBitcast(N0.getValueType(), NewShuffle);
-}
-
 /// If both input operands of a logic op are being cast from floating point
 /// types, try to convert this into a floating point logic node to avoid
 /// unnecessary moves from SSE to integer registers.
@@ -28255,7 +30316,7 @@ static SDValue combinePCMPAnd1(SDNode *N, SelectionDAG &DAG) {
   // masked compare nodes, so they should not make it here.
   EVT VT0 = Op0.getValueType();
   EVT VT1 = Op1.getValueType();
-  unsigned EltBitWidth = VT0.getScalarType().getSizeInBits();
+  unsigned EltBitWidth = VT0.getScalarSizeInBits();
   if (VT0 != VT1 || EltBitWidth == 8)
     return SDValue();
 
@@ -28277,9 +30338,6 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
-  if (SDValue Zext = combineVectorZext(N, DAG, DCI, Subtarget))
-    return Zext;
-
   if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget))
     return R;
 
@@ -28297,6 +30355,17 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   SDValue N1 = N->getOperand(1);
   SDLoc DL(N);
 
+  // Attempt to recursively combine a bitmask AND with shuffles.
+  if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) {
+    SDValue Op(N, 0);
+    SmallVector<int, 1> NonceMask; // Just a placeholder.
+    NonceMask.push_back(0);
+    if (combineX86ShufflesRecursively({Op}, 0, Op, NonceMask,
+                                      /*Depth*/ 1, /*HasVarMask*/ false, DAG,
+                                      DCI, Subtarget))
+      return SDValue(); // This routine will use CombineTo to replace N.
+  }
+
   // Create BEXTR instructions
   // BEXTR is ((X >> imm) & (2**size-1))
   if (VT != MVT::i32 && VT != MVT::i64)
@@ -28372,7 +30441,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
   // Validate that the Mask operand is a vector sra node.
   // FIXME: what to do for bytes, since there is a psignb/pblendvb, but
   // there is no psrai.b
-  unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits();
+  unsigned EltBits = MaskVT.getScalarSizeInBits();
   unsigned SraAmt = ~0;
   if (Mask.getOpcode() == ISD::SRA) {
     if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1)))
@@ -28450,6 +30519,114 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG,
   return DAG.getBitcast(VT, Mask);
 }
 
+// Helper function for combineOrCmpEqZeroToCtlzSrl
+// Transforms:
+//   seteq(cmp x, 0)
+//   into:
+//   srl(ctlz x), log2(bitsize(x))
+// Input pattern is checked by caller.
+static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy,
+                                          SelectionDAG &DAG) {
+  SDValue Cmp = Op.getOperand(1);
+  EVT VT = Cmp.getOperand(0).getValueType();
+  unsigned Log2b = Log2_32(VT.getSizeInBits());
+  SDLoc dl(Op);
+  SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0));
+  // The result of the shift is true or false, and on X86, the 32-bit
+  // encoding of shr and lzcnt is more desirable.
+  SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32);
+  SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc,
+                            DAG.getConstant(Log2b, dl, VT));
+  return DAG.getZExtOrTrunc(Scc, dl, ExtTy);
+}
+
+// Try to transform:
+//   zext(or(setcc(eq, (cmp x, 0)), setcc(eq, (cmp y, 0))))
+//   into:
+//   srl(or(ctlz(x), ctlz(y)), log2(bitsize(x))
+// Will also attempt to match more generic cases, eg:
+//   zext(or(or(setcc(eq, cmp 0), setcc(eq, cmp 0)), setcc(eq, cmp 0)))
+// Only applies if the target supports the FastLZCNT feature.
+static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG,
+                                           TargetLowering::DAGCombinerInfo &DCI,
+                                           const X86Subtarget &Subtarget) {
+  if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast())
+    return SDValue();
+
+  auto isORCandidate = [](SDValue N) {
+    return (N->getOpcode() == ISD::OR && N->hasOneUse());
+  };
+
+  // Check the zero extend is extending to 32-bit or more. The code generated by
+  // srl(ctlz) for 16-bit or less variants of the pattern would require extra
+  // instructions to clear the upper bits.
+  if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) ||
+      !isORCandidate(N->getOperand(0)))
+    return SDValue();
+
+  // Check the node matches: setcc(eq, cmp 0)
+  auto isSetCCCandidate = [](SDValue N) {
+    return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() &&
+           X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E &&
+           N->getOperand(1).getOpcode() == X86ISD::CMP &&
+           N->getOperand(1).getConstantOperandVal(1) == 0 &&
+           N->getOperand(1).getValueType().bitsGE(MVT::i32);
+  };
+
+  SDNode *OR = N->getOperand(0).getNode();
+  SDValue LHS = OR->getOperand(0);
+  SDValue RHS = OR->getOperand(1);
+
+  // Save nodes matching or(or, setcc(eq, cmp 0)).
+  SmallVector<SDNode *, 2> ORNodes;
+  while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) ||
+          (isORCandidate(RHS) && isSetCCCandidate(LHS)))) {
+    ORNodes.push_back(OR);
+    OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode();
+    LHS = OR->getOperand(0);
+    RHS = OR->getOperand(1);
+  }
+
+  // The last OR node should match or(setcc(eq, cmp 0), setcc(eq, cmp 0)).
+  if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) ||
+      !isORCandidate(SDValue(OR, 0)))
+    return SDValue();
+
+  // We have a or(setcc(eq, cmp 0), setcc(eq, cmp 0)) pattern, try to lower it
+  // to
+  // or(srl(ctlz),srl(ctlz)).
+  // The dag combiner can then fold it into:
+  // srl(or(ctlz, ctlz)).
+  EVT VT = OR->getValueType(0);
+  SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG);
+  SDValue Ret, NewRHS;
+  if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG)))
+    Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS);
+
+  if (!Ret)
+    return SDValue();
+
+  // Try to lower nodes matching the or(or, setcc(eq, cmp 0)) pattern.
+  while (ORNodes.size() > 0) {
+    OR = ORNodes.pop_back_val();
+    LHS = OR->getOperand(0);
+    RHS = OR->getOperand(1);
+    // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or).
+    if (RHS->getOpcode() == ISD::OR)
+      std::swap(LHS, RHS);
+    EVT VT = OR->getValueType(0);
+    SDValue NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG);
+    if (!NewRHS)
+      return SDValue();
+    Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS);
+  }
+
+  if (Ret)
+    Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret);
+
+  return Ret;
+}
+
 static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
                          TargetLowering::DAGCombinerInfo &DCI,
                          const X86Subtarget &Subtarget) {
@@ -28505,18 +30682,23 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
   unsigned Opc = X86ISD::SHLD;
   SDValue Op0 = N0.getOperand(0);
   SDValue Op1 = N1.getOperand(0);
-  if (ShAmt0.getOpcode() == ISD::SUB) {
+  if (ShAmt0.getOpcode() == ISD::SUB ||
+      ShAmt0.getOpcode() == ISD::XOR) {
     Opc = X86ISD::SHRD;
     std::swap(Op0, Op1);
     std::swap(ShAmt0, ShAmt1);
   }
 
+  // OR( SHL( X, C ), SRL( Y, 32 - C ) ) -> SHLD( X, Y, C )
+  // OR( SRL( X, C ), SHL( Y, 32 - C ) ) -> SHRD( X, Y, C )
+  // OR( SHL( X, C ), SRL( SRL( Y, 1 ), XOR( C, 31 ) ) ) -> SHLD( X, Y, C )
+  // OR( SRL( X, C ), SHL( SHL( Y, 1 ), XOR( C, 31 ) ) ) -> SHRD( X, Y, C )
   unsigned Bits = VT.getSizeInBits();
   if (ShAmt1.getOpcode() == ISD::SUB) {
     SDValue Sum = ShAmt1.getOperand(0);
     if (ConstantSDNode *SumC = dyn_cast<ConstantSDNode>(Sum)) {
       SDValue ShAmt1Op1 = ShAmt1.getOperand(1);
-      if (ShAmt1Op1.getNode()->getOpcode() == ISD::TRUNCATE)
+      if (ShAmt1Op1.getOpcode() == ISD::TRUNCATE)
         ShAmt1Op1 = ShAmt1Op1.getOperand(0);
       if (SumC->getSExtValue() == Bits && ShAmt1Op1 == ShAmt0)
         return DAG.getNode(Opc, DL, VT,
@@ -28526,18 +30708,39 @@ static SDValue combineOr(SDNode *N, SelectionDAG &DAG,
     }
   } else if (ConstantSDNode *ShAmt1C = dyn_cast<ConstantSDNode>(ShAmt1)) {
     ConstantSDNode *ShAmt0C = dyn_cast<ConstantSDNode>(ShAmt0);
-    if (ShAmt0C &&
-        ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue() == Bits)
+    if (ShAmt0C && (ShAmt0C->getSExtValue() + ShAmt1C->getSExtValue()) == Bits)
       return DAG.getNode(Opc, DL, VT,
                          N0.getOperand(0), N1.getOperand(0),
                          DAG.getNode(ISD::TRUNCATE, DL,
                                        MVT::i8, ShAmt0));
+  } else if (ShAmt1.getOpcode() == ISD::XOR) {
+    SDValue Mask = ShAmt1.getOperand(1);
+    if (ConstantSDNode *MaskC = dyn_cast<ConstantSDNode>(Mask)) {
+      unsigned InnerShift = (X86ISD::SHLD == Opc ? ISD::SRL : ISD::SHL);
+      SDValue ShAmt1Op0 = ShAmt1.getOperand(0);
+      if (ShAmt1Op0.getOpcode() == ISD::TRUNCATE)
+        ShAmt1Op0 = ShAmt1Op0.getOperand(0);
+      if (MaskC->getSExtValue() == (Bits - 1) && ShAmt1Op0 == ShAmt0) {
+        if (Op1.getOpcode() == InnerShift &&
+            isa<ConstantSDNode>(Op1.getOperand(1)) &&
+            Op1.getConstantOperandVal(1) == 1) {
+          return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
+                             DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+        }
+        // Test for ADD( Y, Y ) as an equivalent to SHL( Y, 1 ).
+        if (InnerShift == ISD::SHL && Op1.getOpcode() == ISD::ADD &&
+            Op1.getOperand(0) == Op1.getOperand(1)) {
+          return DAG.getNode(Opc, DL, VT, Op0, Op1.getOperand(0),
+                     DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, ShAmt0));
+        }
+      }
+    }
   }
 
   return SDValue();
 }
 
-// Generate NEG and CMOV for integer abs.
+/// Generate NEG and CMOV for integer abs.
 static SDValue combineIntegerAbs(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
 
@@ -28553,21 +30756,19 @@ static SDValue combineIntegerAbs(SDNode *N, SelectionDAG &DAG) {
   // Check pattern of XOR(ADD(X,Y), Y) where Y is SRA(X, size(X)-1)
   // and change it to SUB and CMOV.
   if (VT.isInteger() && N->getOpcode() == ISD::XOR &&
-      N0.getOpcode() == ISD::ADD &&
-      N0.getOperand(1) == N1 &&
-      N1.getOpcode() == ISD::SRA &&
-      N1.getOperand(0) == N0.getOperand(0))
-    if (ConstantSDNode *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1)))
-      if (Y1C->getAPIntValue() == VT.getSizeInBits()-1) {
-        // Generate SUB & CMOV.
-        SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
-                                  DAG.getConstant(0, DL, VT), N0.getOperand(0));
-
-        SDValue Ops[] = { N0.getOperand(0), Neg,
-                          DAG.getConstant(X86::COND_GE, DL, MVT::i8),
-                          SDValue(Neg.getNode(), 1) };
-        return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
-      }
+      N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1 &&
+      N1.getOpcode() == ISD::SRA && N1.getOperand(0) == N0.getOperand(0)) {
+    auto *Y1C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
+    if (Y1C && Y1C->getAPIntValue() == VT.getSizeInBits() - 1) {
+      // Generate SUB & CMOV.
+      SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32),
+                                DAG.getConstant(0, DL, VT), N0.getOperand(0));
+      SDValue Ops[] = {N0.getOperand(0), Neg,
+                       DAG.getConstant(X86::COND_GE, DL, MVT::i8),
+                       SDValue(Neg.getNode(), 1)};
+      return DAG.getNode(X86ISD::CMOV, DL, DAG.getVTList(VT, MVT::Glue), Ops);
+    }
+  }
   return SDValue();
 }
 
@@ -28671,28 +30872,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
 }
 
-static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 const X86Subtarget &Subtarget) {
-  if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
-    return Cmp;
-
-  if (DCI.isBeforeLegalizeOps())
-    return SDValue();
-
-  if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
-    return RV;
-
-  if (Subtarget.hasCMov())
-    if (SDValue RV = combineIntegerAbs(N, DAG))
-      return RV;
-
-  if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
-    return FPLogic;
-
-  return SDValue();
-}
-
 /// This function detects the AVG pattern between vectors of unsigned i8/i16,
 /// which is c = (a + b + 1) / 2, and replace this operation with the efficient
 /// X86ISD::AVG instruction.
@@ -28717,7 +30896,7 @@ static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG,
 
   if (!Subtarget.hasSSE2())
     return SDValue();
-  if (Subtarget.hasAVX512()) {
+  if (Subtarget.hasBWI()) {
     if (VT.getSizeInBits() > 512)
       return SDValue();
   } else if (Subtarget.hasAVX2()) {
@@ -28999,6 +31178,11 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget &Subtarget) {
   MaskedLoadSDNode *Mld = cast<MaskedLoadSDNode>(N);
+
+  // TODO: Expanding load with constant mask may be optimized as well.
+  if (Mld->isExpandingLoad())
+    return SDValue();
+
   if (Mld->getExtensionType() == ISD::NON_EXTLOAD) {
     if (SDValue ScalarLoad = reduceMaskedLoadToScalarLoad(Mld, DAG, DCI))
       return ScalarLoad;
@@ -29018,8 +31202,8 @@ static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG,
   SDLoc dl(Mld);
 
   assert(LdVT != VT && "Cannot extend to the same type");
-  unsigned ToSz = VT.getVectorElementType().getSizeInBits();
-  unsigned FromSz = LdVT.getVectorElementType().getSizeInBits();
+  unsigned ToSz = VT.getScalarSizeInBits();
+  unsigned FromSz = LdVT.getScalarSizeInBits();
   // From/To sizes and ElemCount must be pow of two.
   assert (isPowerOf2_32(NumElems * FromSz * ToSz) &&
     "Unexpected size for extending masked load");
@@ -29114,6 +31298,10 @@ static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS,
 static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
                                   const X86Subtarget &Subtarget) {
   MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N);
+
+  if (Mst->isCompressingStore())
+    return SDValue();
+
   if (!Mst->isTruncatingStore())
     return reduceMaskedStoreToScalarStore(Mst, DAG);
 
@@ -29124,8 +31312,8 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG,
   SDLoc dl(Mst);
 
   assert(StVT != VT && "Cannot truncate to the same type");
-  unsigned FromSz = VT.getVectorElementType().getSizeInBits();
-  unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+  unsigned FromSz = VT.getScalarSizeInBits();
+  unsigned ToSz = StVT.getScalarSizeInBits();
 
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
 
@@ -29253,8 +31441,8 @@ static SDValue combineStore(SDNode *N, SelectionDAG &DAG,
     const TargetLowering &TLI = DAG.getTargetLoweringInfo();
     unsigned NumElems = VT.getVectorNumElements();
     assert(StVT != VT && "Cannot truncate to the same type");
-    unsigned FromSz = VT.getVectorElementType().getSizeInBits();
-    unsigned ToSz = StVT.getVectorElementType().getSizeInBits();
+    unsigned FromSz = VT.getScalarSizeInBits();
+    unsigned ToSz = StVT.getScalarSizeInBits();
 
     // The truncating store is legal in some cases. For example
     // vpmovqb, vpmovqw, vpmovqd, vpmovdb, vpmovdw
@@ -29681,7 +31869,7 @@ combineVectorTruncationWithPACKSS(SDNode *N, SelectionDAG &DAG,
 /// X86ISD::PACKUS/X86ISD::PACKSS operations. We do it here because after type
 /// legalization the truncation will be translated into a BUILD_VECTOR with each
 /// element that is extracted from a vector and then truncated, and it is
-/// diffcult to do this optimization based on them.
+/// difficult to do this optimization based on them.
 static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
                                        const X86Subtarget &Subtarget) {
   EVT OutVT = N->getValueType(0);
@@ -29737,6 +31925,45 @@ static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG,
     return SDValue();
 }
 
+/// This function transforms vector truncation of 'all or none' bits values.
+/// vXi16/vXi32/vXi64 to vXi8/vXi16/vXi32 into X86ISD::PACKSS operations.
+static SDValue combineVectorSignBitsTruncation(SDNode *N, SDLoc &DL,
+                                               SelectionDAG &DAG,
+                                               const X86Subtarget &Subtarget) {
+  // Requires SSE2 but AVX512 has fast truncate.
+  if (!Subtarget.hasSSE2() || Subtarget.hasAVX512())
+    return SDValue();
+
+  if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple())
+    return SDValue();
+
+  SDValue In = N->getOperand(0);
+  if (!In.getValueType().isSimple())
+    return SDValue();
+
+  MVT VT = N->getValueType(0).getSimpleVT();
+  MVT SVT = VT.getScalarType();
+
+  MVT InVT = In.getValueType().getSimpleVT();
+  MVT InSVT = InVT.getScalarType();
+
+  // Use PACKSS if the input is a splatted sign bit.
+  // e.g. Comparison result, sext_in_reg, etc.
+  unsigned NumSignBits = DAG.ComputeNumSignBits(In);
+  if (NumSignBits != InSVT.getSizeInBits())
+    return SDValue();
+
+  // Check we have a truncation suited for PACKSS.
+  if (!VT.is128BitVector() && !VT.is256BitVector())
+    return SDValue();
+  if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32)
+    return SDValue();
+  if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64)
+    return SDValue();
+
+  return truncateVectorCompareWithPACKSS(VT, In, DL, DAG, Subtarget);
+}
+
 static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
                                const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
@@ -29755,15 +31982,75 @@ static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG,
       return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc);
   }
 
+  // Try to truncate extended sign bits with PACKSS.
+  if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget))
+    return V;
+
   return combineVectorTruncation(N, DAG, Subtarget);
 }
 
+/// Returns the negated value if the node \p N flips sign of FP value.
+///
+/// FP-negation node may have different forms: FNEG(x) or FXOR (x, 0x80000000).
+/// AVX512F does not have FXOR, so FNEG is lowered as
+/// (bitcast (xor (bitcast x), (bitcast ConstantFP(0x80000000)))).
+/// In this case we go though all bitcasts.
+static SDValue isFNEG(SDNode *N) {
+  if (N->getOpcode() == ISD::FNEG)
+    return N->getOperand(0);
+
+  SDValue Op = peekThroughBitcasts(SDValue(N, 0));
+  if (Op.getOpcode() != X86ISD::FXOR && Op.getOpcode() != ISD::XOR)
+    return SDValue();
+
+  SDValue Op1 = peekThroughBitcasts(Op.getOperand(1));
+  if (!Op1.getValueType().isFloatingPoint())
+    return SDValue();
+
+  SDValue Op0 = peekThroughBitcasts(Op.getOperand(0));
+
+  unsigned EltBits = Op1.getScalarValueSizeInBits();
+  auto isSignBitValue = [&](const ConstantFP *C) {
+    return C->getValueAPF().bitcastToAPInt() == APInt::getSignBit(EltBits);
+  };
+
+  // There is more than one way to represent the same constant on
+  // the different X86 targets. The type of the node may also depend on size.
+  //  - load scalar value and broadcast
+  //  - BUILD_VECTOR node
+  //  - load from a constant pool.
+  // We check all variants here.
+  if (Op1.getOpcode() == X86ISD::VBROADCAST) {
+    if (auto *C = getTargetConstantFromNode(Op1.getOperand(0)))
+      if (isSignBitValue(cast<ConstantFP>(C)))
+        return Op0;
+
+  } else if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(Op1)) {
+    if (ConstantFPSDNode *CN = BV->getConstantFPSplatNode())
+      if (isSignBitValue(CN->getConstantFPValue()))
+        return Op0;
+
+  } else if (auto *C = getTargetConstantFromNode(Op1)) {
+    if (C->getType()->isVectorTy()) {
+      if (auto *SplatV = C->getSplatValue())
+        if (isSignBitValue(cast<ConstantFP>(SplatV)))
+          return Op0;
+    } else if (auto *FPConst = dyn_cast<ConstantFP>(C))
+      if (isSignBitValue(FPConst))
+        return Op0;
+  }
+  return SDValue();
+}
+
 /// Do target-specific dag combines on floating point negations.
 static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
                            const X86Subtarget &Subtarget) {
-  EVT VT = N->getValueType(0);
+  EVT OrigVT = N->getValueType(0);
+  SDValue Arg = isFNEG(N);
+  assert(Arg.getNode() && "N is expected to be an FNEG node");
+
+  EVT VT = Arg.getValueType();
   EVT SVT = VT.getScalarType();
-  SDValue Arg = N->getOperand(0);
   SDLoc DL(N);
 
   // Let legalize expand this if it isn't a legal type yet.
@@ -29776,70 +32063,182 @@ static SDValue combineFneg(SDNode *N, SelectionDAG &DAG,
   if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) &&
       Arg->getFlags()->hasNoSignedZeros() && Subtarget.hasAnyFMA()) {
     SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
-    return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
-                       Arg.getOperand(1), Zero);
+    SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
+                                  Arg.getOperand(1), Zero);
+    return DAG.getBitcast(OrigVT, NewNode);
   }
 
-  // If we're negating a FMA node, then we can adjust the
+  // If we're negating an FMA node, then we can adjust the
   // instruction to include the extra negation.
+  unsigned NewOpcode = 0;
   if (Arg.hasOneUse()) {
     switch (Arg.getOpcode()) {
-    case X86ISD::FMADD:
-      return DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
-    case X86ISD::FMSUB:
-      return DAG.getNode(X86ISD::FNMADD, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
-    case X86ISD::FNMADD:
-      return DAG.getNode(X86ISD::FMSUB, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
-    case X86ISD::FNMSUB:
-      return DAG.getNode(X86ISD::FMADD, DL, VT, Arg.getOperand(0),
-                         Arg.getOperand(1), Arg.getOperand(2));
-    }
-  }
+    case X86ISD::FMADD:        NewOpcode = X86ISD::FNMSUB;       break;
+    case X86ISD::FMSUB:        NewOpcode = X86ISD::FNMADD;       break;
+    case X86ISD::FNMADD:       NewOpcode = X86ISD::FMSUB;        break;
+    case X86ISD::FNMSUB:       NewOpcode = X86ISD::FMADD;        break;
+    case X86ISD::FMADD_RND:    NewOpcode = X86ISD::FNMSUB_RND;   break;
+    case X86ISD::FMSUB_RND:    NewOpcode = X86ISD::FNMADD_RND;   break;
+    case X86ISD::FNMADD_RND:   NewOpcode = X86ISD::FMSUB_RND;    break;
+    case X86ISD::FNMSUB_RND:   NewOpcode = X86ISD::FMADD_RND;    break;
+    // We can't handle scalar intrinsic node here because it would only
+    // invert one element and not the whole vector. But we could try to handle
+    // a negation of the lower element only.
+    }
+  }
+  if (NewOpcode)
+    return DAG.getBitcast(OrigVT, DAG.getNode(NewOpcode, DL, VT,
+                                              Arg.getNode()->ops()));
+
   return SDValue();
 }
 
 static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG,
-                              const X86Subtarget &Subtarget) {
-  EVT VT = N->getValueType(0);
-  if (VT.is512BitVector() && !Subtarget.hasDQI()) {
-    // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention.
-    // These logic operations may be executed in the integer domain.
+                                 const X86Subtarget &Subtarget) {
+  MVT VT = N->getSimpleValueType(0);
+  // If we have integer vector types available, use the integer opcodes.
+  if (VT.isVector() && Subtarget.hasSSE2()) {
     SDLoc dl(N);
-    MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits());
-    MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements());
+
+    MVT IntVT = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
 
     SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0));
     SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1));
-    unsigned IntOpcode = 0;
+    unsigned IntOpcode;
     switch (N->getOpcode()) {
-      default: llvm_unreachable("Unexpected FP logic op");
-      case X86ISD::FOR: IntOpcode = ISD::OR; break;
-      case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
-      case X86ISD::FAND: IntOpcode = ISD::AND; break;
-      case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
+    default: llvm_unreachable("Unexpected FP logic op");
+    case X86ISD::FOR: IntOpcode = ISD::OR; break;
+    case X86ISD::FXOR: IntOpcode = ISD::XOR; break;
+    case X86ISD::FAND: IntOpcode = ISD::AND; break;
+    case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break;
     }
     SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1);
     return DAG.getBitcast(VT, IntOp);
   }
   return SDValue();
 }
+
+static SDValue combineXor(SDNode *N, SelectionDAG &DAG,
+                          TargetLowering::DAGCombinerInfo &DCI,
+                          const X86Subtarget &Subtarget) {
+  if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget))
+    return Cmp;
+
+  if (DCI.isBeforeLegalizeOps())
+    return SDValue();
+
+  if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
+    return RV;
+
+  if (Subtarget.hasCMov())
+    if (SDValue RV = combineIntegerAbs(N, DAG))
+      return RV;
+
+  if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget))
+    return FPLogic;
+
+  if (isFNEG(N))
+    return combineFneg(N, DAG, Subtarget);
+  return SDValue();
+}
+
+
+static bool isNullFPScalarOrVectorConst(SDValue V) {
+  return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode());
+}
+
+/// If a value is a scalar FP zero or a vector FP zero (potentially including
+/// undefined elements), return a zero constant that may be used to fold away
+/// that value. In the case of a vector, the returned constant will not contain
+/// undefined elements even if the input parameter does. This makes it suitable
+/// to be used as a replacement operand with operations (eg, bitwise-and) where
+/// an undef should not propagate.
+static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG,
+                                        const X86Subtarget &Subtarget) {
+  if (!isNullFPScalarOrVectorConst(V))
+    return SDValue();
+
+  if (V.getValueType().isVector())
+    return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V));
+
+  return V;
+}
+
+static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG,
+                                      const X86Subtarget &Subtarget) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N->getValueType(0);
+  SDLoc DL(N);
+
+  // Vector types are handled in combineANDXORWithAllOnesIntoANDNP().
+  if (!((VT == MVT::f32 && Subtarget.hasSSE1()) ||
+        (VT == MVT::f64 && Subtarget.hasSSE2())))
+    return SDValue();
+
+  auto isAllOnesConstantFP = [](SDValue V) {
+    auto *C = dyn_cast<ConstantFPSDNode>(V);
+    return C && C->getConstantFPValue()->isAllOnesValue();
+  };
+
+  // fand (fxor X, -1), Y --> fandn X, Y
+  if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1)))
+    return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1);
+
+  // fand X, (fxor Y, -1) --> fandn Y, X
+  if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1)))
+    return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0);
+
+  return SDValue();
+}
+
+/// Do target-specific dag combines on X86ISD::FAND nodes.
+static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
+                           const X86Subtarget &Subtarget) {
+  // FAND(0.0, x) -> 0.0
+  if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget))
+    return V;
+
+  // FAND(x, 0.0) -> 0.0
+  if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
+    return V;
+
+  if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget))
+    return V;
+
+  return lowerX86FPLogicOp(N, DAG, Subtarget);
+}
+
+/// Do target-specific dag combines on X86ISD::FANDN nodes.
+static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
+                            const X86Subtarget &Subtarget) {
+  // FANDN(0.0, x) -> x
+  if (isNullFPScalarOrVectorConst(N->getOperand(0)))
+    return N->getOperand(1);
+
+  // FANDN(x, 0.0) -> 0.0
+  if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget))
+    return V;
+
+  return lowerX86FPLogicOp(N, DAG, Subtarget);
+}
+
 /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.
 static SDValue combineFOr(SDNode *N, SelectionDAG &DAG,
                           const X86Subtarget &Subtarget) {
   assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR);
 
   // F[X]OR(0.0, x) -> x
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
-    if (C->getValueAPF().isPosZero())
-      return N->getOperand(1);
+  if (isNullFPScalarOrVectorConst(N->getOperand(0)))
+    return N->getOperand(1);
 
   // F[X]OR(x, 0.0) -> x
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
-    if (C->getValueAPF().isPosZero())
-      return N->getOperand(0);
+  if (isNullFPScalarOrVectorConst(N->getOperand(1)))
+    return N->getOperand(0);
+
+  if (isFNEG(N))
+    if (SDValue NewVal = combineFneg(N, DAG, Subtarget))
+      return NewVal;
 
   return lowerX86FPLogicOp(N, DAG, Subtarget);
 }
@@ -29921,38 +32320,6 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(SelectOpcode, DL, VT, IsOp0Nan, Op1, MinOrMax);
 }
 
-/// Do target-specific dag combines on X86ISD::FAND nodes.
-static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG,
-                           const X86Subtarget &Subtarget) {
-  // FAND(0.0, x) -> 0.0
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
-    if (C->getValueAPF().isPosZero())
-      return N->getOperand(0);
-
-  // FAND(x, 0.0) -> 0.0
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
-    if (C->getValueAPF().isPosZero())
-      return N->getOperand(1);
-
-  return lowerX86FPLogicOp(N, DAG, Subtarget);
-}
-
-/// Do target-specific dag combines on X86ISD::FANDN nodes
-static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG,
-                            const X86Subtarget &Subtarget) {
-  // FANDN(0.0, x) -> x
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
-    if (C->getValueAPF().isPosZero())
-      return N->getOperand(1);
-
-  // FANDN(x, 0.0) -> 0.0
-  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(1)))
-    if (C->getValueAPF().isPosZero())
-      return N->getOperand(1);
-
-  return lowerX86FPLogicOp(N, DAG, Subtarget);
-}
-
 static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
                          TargetLowering::DAGCombinerInfo &DCI) {
   // BT ignores high bits in the bit index operand.
@@ -29971,17 +32338,6 @@ static SDValue combineBT(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
-static SDValue combineVZextMovl(SDNode *N, SelectionDAG &DAG) {
-  SDValue Op = peekThroughBitcasts(N->getOperand(0));
-  EVT VT = N->getValueType(0), OpVT = Op.getValueType();
-  if (Op.getOpcode() == X86ISD::VZEXT_LOAD &&
-      VT.getVectorElementType().getSizeInBits() ==
-      OpVT.getVectorElementType().getSizeInBits()) {
-    return DAG.getBitcast(VT, Op);
-  }
-  return SDValue();
-}
-
 static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
                                       const X86Subtarget &Subtarget) {
   EVT VT = N->getValueType(0);
@@ -30018,19 +32374,32 @@ static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG,
 }
 
 /// sext(add_nsw(x, C)) --> add(sext(x), C_sext)
-/// Promoting a sign extension ahead of an 'add nsw' exposes opportunities
-/// to combine math ops, use an LEA, or use a complex addressing mode. This can
-/// eliminate extend, add, and shift instructions.
-static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
-                                       const X86Subtarget &Subtarget) {
+/// zext(add_nuw(x, C)) --> add(zext(x), C_zext)
+/// Promoting a sign/zero extension ahead of a no overflow 'add' exposes
+/// opportunities to combine math ops, use an LEA, or use a complex addressing
+/// mode. This can eliminate extend, add, and shift instructions.
+static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG,
+                                   const X86Subtarget &Subtarget) {
+  if (Ext->getOpcode() != ISD::SIGN_EXTEND &&
+      Ext->getOpcode() != ISD::ZERO_EXTEND)
+    return SDValue();
+
   // TODO: This should be valid for other integer types.
-  EVT VT = Sext->getValueType(0);
+  EVT VT = Ext->getValueType(0);
   if (VT != MVT::i64)
     return SDValue();
 
-  // We need an 'add nsw' feeding into the 'sext'.
-  SDValue Add = Sext->getOperand(0);
-  if (Add.getOpcode() != ISD::ADD || !Add->getFlags()->hasNoSignedWrap())
+  SDValue Add = Ext->getOperand(0);
+  if (Add.getOpcode() != ISD::ADD)
+    return SDValue();
+
+  bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND;
+  bool NSW = Add->getFlags()->hasNoSignedWrap();
+  bool NUW = Add->getFlags()->hasNoUnsignedWrap();
+
+  // We need an 'add nsw' feeding into the 'sext' or 'add nuw' feeding
+  // into the 'zext'
+  if ((Sext && !NSW) || (!Sext && !NUW))
     return SDValue();
 
   // Having a constant operand to the 'add' ensures that we are not increasing
@@ -30046,7 +32415,7 @@ static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
   // of single 'add' instructions, but the cost model for selecting an LEA
   // currently has a high threshold.
   bool HasLEAPotential = false;
-  for (auto *User : Sext->uses()) {
+  for (auto *User : Ext->uses()) {
     if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) {
       HasLEAPotential = true;
       break;
@@ -30055,17 +32424,18 @@ static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
   if (!HasLEAPotential)
     return SDValue();
 
-  // Everything looks good, so pull the 'sext' ahead of the 'add'.
-  int64_t AddConstant = AddOp1->getSExtValue();
+  // Everything looks good, so pull the '{s|z}ext' ahead of the 'add'.
+  int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue();
   SDValue AddOp0 = Add.getOperand(0);
-  SDValue NewSext = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(Sext), VT, AddOp0);
+  SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0);
   SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT);
 
   // The wider add is guaranteed to not wrap because both operands are
   // sign-extended.
   SDNodeFlags Flags;
-  Flags.setNoSignedWrap(true);
-  return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
+  Flags.setNoSignedWrap(NSW);
+  Flags.setNoUnsignedWrap(NUW);
+  return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, &Flags);
 }
 
 /// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
@@ -30157,18 +32527,17 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
   // ISD::*_EXTEND_VECTOR_INREG which ensures lowering to X86ISD::V*EXT.
   // Also use this if we don't have SSE41 to allow the legalizer do its job.
   if (!Subtarget.hasSSE41() || VT.is128BitVector() ||
-      (VT.is256BitVector() && Subtarget.hasInt256())) {
+      (VT.is256BitVector() && Subtarget.hasInt256()) ||
+      (VT.is512BitVector() && Subtarget.hasAVX512())) {
     SDValue ExOp = ExtendVecSize(DL, N0, VT.getSizeInBits());
     return Opcode == ISD::SIGN_EXTEND
                ? DAG.getSignExtendVectorInReg(ExOp, DL, VT)
                : DAG.getZeroExtendVectorInReg(ExOp, DL, VT);
   }
 
-  // On pre-AVX2 targets, split into 128-bit nodes of
-  // ISD::*_EXTEND_VECTOR_INREG.
-  if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128)) {
-    unsigned NumVecs = VT.getSizeInBits() / 128;
-    unsigned NumSubElts = 128 / SVT.getSizeInBits();
+  auto SplitAndExtendInReg = [&](unsigned SplitSize) {
+    unsigned NumVecs = VT.getSizeInBits() / SplitSize;
+    unsigned NumSubElts = SplitSize / SVT.getSizeInBits();
     EVT SubVT = EVT::getVectorVT(*DAG.getContext(), SVT, NumSubElts);
     EVT InSubVT = EVT::getVectorVT(*DAG.getContext(), InSVT, NumSubElts);
 
@@ -30176,14 +32545,24 @@ static SDValue combineToExtendVectorInReg(SDNode *N, SelectionDAG &DAG,
     for (unsigned i = 0, Offset = 0; i != NumVecs; ++i, Offset += NumSubElts) {
       SDValue SrcVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InSubVT, N0,
                                    DAG.getIntPtrConstant(Offset, DL));
-      SrcVec = ExtendVecSize(DL, SrcVec, 128);
+      SrcVec = ExtendVecSize(DL, SrcVec, SplitSize);
       SrcVec = Opcode == ISD::SIGN_EXTEND
                    ? DAG.getSignExtendVectorInReg(SrcVec, DL, SubVT)
                    : DAG.getZeroExtendVectorInReg(SrcVec, DL, SubVT);
       Opnds.push_back(SrcVec);
     }
     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Opnds);
-  }
+  };
+
+  // On pre-AVX2 targets, split into 128-bit nodes of
+  // ISD::*_EXTEND_VECTOR_INREG.
+  if (!Subtarget.hasInt256() && !(VT.getSizeInBits() % 128))
+    return SplitAndExtendInReg(128);
+
+  // On pre-AVX512 targets, split into 256-bit nodes of
+  // ISD::*_EXTEND_VECTOR_INREG.
+  if (!Subtarget.hasAVX512() && !(VT.getSizeInBits() % 256))
+    return SplitAndExtendInReg(256);
 
   return SDValue();
 }
@@ -30216,7 +32595,7 @@ static SDValue combineSext(SDNode *N, SelectionDAG &DAG,
     if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
       return R;
 
-  if (SDValue NewAdd = promoteSextBeforeAddNSW(N, DAG, Subtarget))
+  if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
     return NewAdd;
 
   return SDValue();
@@ -30239,26 +32618,58 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
   SDValue B = N->getOperand(1);
   SDValue C = N->getOperand(2);
 
-  bool NegA = (A.getOpcode() == ISD::FNEG);
-  bool NegB = (B.getOpcode() == ISD::FNEG);
-  bool NegC = (C.getOpcode() == ISD::FNEG);
+  auto invertIfNegative = [](SDValue &V) {
+    if (SDValue NegVal = isFNEG(V.getNode())) {
+      V = NegVal;
+      return true;
+    }
+    return false;
+  };
+
+  // Do not convert the passthru input of scalar intrinsics.
+  // FIXME: We could allow negations of the lower element only.
+  bool NegA = N->getOpcode() != X86ISD::FMADDS1_RND && invertIfNegative(A);
+  bool NegB = invertIfNegative(B);
+  bool NegC = N->getOpcode() != X86ISD::FMADDS3_RND && invertIfNegative(C);
 
   // Negative multiplication when NegA xor NegB
   bool NegMul = (NegA != NegB);
-  if (NegA)
-    A = A.getOperand(0);
-  if (NegB)
-    B = B.getOperand(0);
-  if (NegC)
-    C = C.getOperand(0);
 
-  unsigned Opcode;
+  unsigned NewOpcode;
   if (!NegMul)
-    Opcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
+    NewOpcode = (!NegC) ? X86ISD::FMADD : X86ISD::FMSUB;
   else
-    Opcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+    NewOpcode = (!NegC) ? X86ISD::FNMADD : X86ISD::FNMSUB;
+
+
+  if (N->getOpcode() == X86ISD::FMADD_RND) {
+    switch (NewOpcode) {
+    case X86ISD::FMADD:  NewOpcode = X86ISD::FMADD_RND; break;
+    case X86ISD::FMSUB:  NewOpcode = X86ISD::FMSUB_RND; break;
+    case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADD_RND; break;
+    case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUB_RND; break;
+    }
+  } else if (N->getOpcode() == X86ISD::FMADDS1_RND) {
+    switch (NewOpcode) {
+    case X86ISD::FMADD:  NewOpcode = X86ISD::FMADDS1_RND; break;
+    case X86ISD::FMSUB:  NewOpcode = X86ISD::FMSUBS1_RND; break;
+    case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS1_RND; break;
+    case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS1_RND; break;
+    }
+  } else if (N->getOpcode() == X86ISD::FMADDS3_RND) {
+    switch (NewOpcode) {
+    case X86ISD::FMADD:  NewOpcode = X86ISD::FMADDS3_RND; break;
+    case X86ISD::FMSUB:  NewOpcode = X86ISD::FMSUBS3_RND; break;
+    case X86ISD::FNMADD: NewOpcode = X86ISD::FNMADDS3_RND; break;
+    case X86ISD::FNMSUB: NewOpcode = X86ISD::FNMSUBS3_RND; break;
+    }
+  } else {
+    assert((N->getOpcode() == X86ISD::FMADD || N->getOpcode() == ISD::FMA) &&
+           "Unexpected opcode!");
+    return DAG.getNode(NewOpcode, dl, VT, A, B, C);
+  }
 
-  return DAG.getNode(Opcode, dl, VT, A, B, C);
+  return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3));
 }
 
 static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
@@ -30308,6 +32719,12 @@ static SDValue combineZext(SDNode *N, SelectionDAG &DAG,
   if (SDValue DivRem8 = getDivRem8(N, DAG))
     return DivRem8;
 
+  if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget))
+    return NewAdd;
+
+  if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget))
+    return R;
+
   return SDValue();
 }
 
@@ -30443,10 +32860,8 @@ static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG,
     return MaterializeSETB(DL, EFLAGS, DAG, N->getSimpleValueType(0));
 
   // Try to simplify the EFLAGS and condition code operands.
-  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG)) {
-    SDValue Cond = DAG.getConstant(CC, DL, MVT::i8);
-    return DAG.getNode(X86ISD::SETCC, DL, N->getVTList(), Cond, Flags);
-  }
+  if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG))
+    return getSETCC(CC, Flags, DL, DAG);
 
   return SDValue();
 }
@@ -30539,6 +32954,12 @@ static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
   }
 
+  // Since UINT_TO_FP is legal (it's marked custom), dag combiner won't
+  // optimize it to a SINT_TO_FP when the sign bit is known zero. Perform
+  // the optimization here.
+  if (DAG.SignBitIsZero(Op0))
+    return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0);
+
   return SDValue();
 }
 
@@ -30555,9 +32976,12 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
   EVT InVT = Op0.getValueType();
   EVT InSVT = InVT.getScalarType();
 
+  // SINT_TO_FP(vXi1) -> SINT_TO_FP(SEXT(vXi1 to vXi32))
   // SINT_TO_FP(vXi8) -> SINT_TO_FP(SEXT(vXi8 to vXi32))
   // SINT_TO_FP(vXi16) -> SINT_TO_FP(SEXT(vXi16 to vXi32))
-  if (InVT.isVector() && (InSVT == MVT::i8 || InSVT == MVT::i16)) {
+  if (InVT.isVector() &&
+      (InSVT == MVT::i8 || InSVT == MVT::i16 ||
+       (InSVT == MVT::i1 && !DAG.getTargetLoweringInfo().isTypeLegal(InVT)))) {
     SDLoc dl(N);
     EVT DstVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32,
                                  InVT.getVectorNumElements());
@@ -30565,6 +32989,23 @@ static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P);
   }
 
+  // Without AVX512DQ we only support i64 to float scalar conversion. For both
+  // vectors and scalars, see if we know that the upper bits are all the sign
+  // bit, in which case we can truncate the input to i32 and convert from that.
+  if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) {
+    unsigned BitWidth = InVT.getScalarSizeInBits();
+    unsigned NumSignBits = DAG.ComputeNumSignBits(Op0);
+    if (NumSignBits >= (BitWidth - 31)) {
+      EVT TruncVT = EVT::getIntegerVT(*DAG.getContext(), 32);
+      if (InVT.isVector())
+        TruncVT = EVT::getVectorVT(*DAG.getContext(), TruncVT,
+                                   InVT.getVectorNumElements());
+      SDLoc dl(N);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0);
+      return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc);
+    }
+  }
+
   // Transform (SINT_TO_FP (i64 ...)) into an x87 operation if we have
   // a 32-bit target where SSE doesn't support i64->FP operations.
   if (!Subtarget.useSoftFloat() && Op0.getOpcode() == ISD::LOAD) {
@@ -30654,13 +33095,15 @@ static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
                      DAG.getConstant(0, DL, OtherVal.getValueType()), NewCmp);
 }
 
-static SDValue detectSADPattern(SDNode *N, SelectionDAG &DAG,
-                                const X86Subtarget &Subtarget) {
+static SDValue combineLoopSADPattern(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget &Subtarget) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
   SDValue Op0 = N->getOperand(0);
   SDValue Op1 = N->getOperand(1);
 
+  // TODO: There's nothing special about i32, any integer type above i16 should
+  // work just as well.
   if (!VT.isVector() || !VT.isSimple() ||
       !(VT.getVectorElementType() == MVT::i32))
     return SDValue();
@@ -30672,24 +33115,13 @@ static SDValue detectSADPattern(SDNode *N, SelectionDAG &DAG,
     RegSize = 256;
 
   // We only handle v16i32 for SSE2 / v32i32 for AVX2 / v64i32 for AVX512.
+  // TODO: We should be able to handle larger vectors by splitting them before
+  // feeding them into several SADs, and then reducing over those.
   if (VT.getSizeInBits() / 4 > RegSize)
     return SDValue();
 
-  // Detect the following pattern:
-  //
-  // 1:    %2 = zext <N x i8> %0 to <N x i32>
-  // 2:    %3 = zext <N x i8> %1 to <N x i32>
-  // 3:    %4 = sub nsw <N x i32> %2, %3
-  // 4:    %5 = icmp sgt <N x i32> %4, [0 x N] or [-1 x N]
-  // 5:    %6 = sub nsw <N x i32> zeroinitializer, %4
-  // 6:    %7 = select <N x i1> %5, <N x i32> %4, <N x i32> %6
-  // 7:    %8 = add nsw <N x i32> %7, %vec.phi
-  //
-  // The last instruction must be a reduction add. The instructions 3-6 forms an
-  // ABSDIFF pattern.
-
-  // The two operands of reduction add are from PHI and a select-op as in line 7
-  // above.
+  // We know N is a reduction add, which means one of its operands is a phi.
+  // To match SAD, we need the other operand to be a vector select.
   SDValue SelectOp, Phi;
   if (Op0.getOpcode() == ISD::VSELECT) {
     SelectOp = Op0;
@@ -30700,77 +33132,22 @@ static SDValue detectSADPattern(SDNode *N, SelectionDAG &DAG,
   } else
     return SDValue();
 
-  // Check the condition of the select instruction is greater-than.
-  SDValue SetCC = SelectOp->getOperand(0);
-  if (SetCC.getOpcode() != ISD::SETCC)
-    return SDValue();
-  ISD::CondCode CC = cast<CondCodeSDNode>(SetCC.getOperand(2))->get();
-  if (CC != ISD::SETGT)
-    return SDValue();
-
-  Op0 = SelectOp->getOperand(1);
-  Op1 = SelectOp->getOperand(2);
-
-  // The second operand of SelectOp Op1 is the negation of the first operand
-  // Op0, which is implemented as 0 - Op0.
-  if (!(Op1.getOpcode() == ISD::SUB &&
-        ISD::isBuildVectorAllZeros(Op1.getOperand(0).getNode()) &&
-        Op1.getOperand(1) == Op0))
-    return SDValue();
-
-  // The first operand of SetCC is the first operand of SelectOp, which is the
-  // difference between two input vectors.
-  if (SetCC.getOperand(0) != Op0)
-    return SDValue();
-
-  // The second operand of > comparison can be either -1 or 0.
-  if (!(ISD::isBuildVectorAllZeros(SetCC.getOperand(1).getNode()) ||
-        ISD::isBuildVectorAllOnes(SetCC.getOperand(1).getNode())))
-    return SDValue();
-
-  // The first operand of SelectOp is the difference between two input vectors.
-  if (Op0.getOpcode() != ISD::SUB)
-    return SDValue();
-
-  Op1 = Op0.getOperand(1);
-  Op0 = Op0.getOperand(0);
-
-  // Check if the operands of the diff are zero-extended from vectors of i8.
-  if (Op0.getOpcode() != ISD::ZERO_EXTEND ||
-      Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 ||
-      Op1.getOpcode() != ISD::ZERO_EXTEND ||
-      Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8)
+  // Check whether we have an abs-diff pattern feeding into the select.
+  if(!detectZextAbsDiff(SelectOp, Op0, Op1))
     return SDValue();
 
   // SAD pattern detected. Now build a SAD instruction and an addition for
-  // reduction. Note that the number of elments of the result of SAD is less
+  // reduction. Note that the number of elements of the result of SAD is less
   // than the number of elements of its input. Therefore, we could only update
   // part of elements in the reduction vector.
-
-  // Legalize the type of the inputs of PSADBW.
-  EVT InVT = Op0.getOperand(0).getValueType();
-  if (InVT.getSizeInBits() <= 128)
-    RegSize = 128;
-  else if (InVT.getSizeInBits() <= 256)
-    RegSize = 256;
-
-  unsigned NumConcat = RegSize / InVT.getSizeInBits();
-  SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT));
-  Ops[0] = Op0.getOperand(0);
-  MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8);
-  Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
-  Ops[0] = Op1.getOperand(0);
-  Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops);
+  SDValue Sad = createPSADBW(DAG, Op0, Op1, DL);
 
   // The output of PSADBW is a vector of i64.
-  MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64);
-  SDValue Sad = DAG.getNode(X86ISD::PSADBW, DL, SadVT, Op0, Op1);
-
   // We need to turn the vector of i64 into a vector of i32.
   // If the reduction vector is at least as wide as the psadbw result, just
   // bitcast. If it's narrower, truncate - the high i32 of each i64 is zero
   // anyway.
-  MVT ResVT = MVT::getVectorVT(MVT::i32, RegSize / 32);
+  MVT ResVT = MVT::getVectorVT(MVT::i32, Sad.getValueSizeInBits() / 32);
   if (VT.getSizeInBits() >= ResVT.getSizeInBits())
     Sad = DAG.getNode(ISD::BITCAST, DL, ResVT, Sad);
   else
@@ -30793,7 +33170,7 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
                           const X86Subtarget &Subtarget) {
   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   if (Flags->hasVectorReduction()) {
-    if (SDValue Sad = detectSADPattern(N, DAG, Subtarget))
+    if (SDValue Sad = combineLoopSADPattern(N, DAG, Subtarget))
       return Sad;
   }
   EVT VT = N->getValueType(0);
@@ -30842,10 +33219,11 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   return OptimizeConditionalInDecrement(N, DAG);
 }
 
-static SDValue combineVZext(SDNode *N, SelectionDAG &DAG,
-                            TargetLowering::DAGCombinerInfo &DCI,
-                            const X86Subtarget &Subtarget) {
+static SDValue combineVSZext(SDNode *N, SelectionDAG &DAG,
+                             TargetLowering::DAGCombinerInfo &DCI,
+                             const X86Subtarget &Subtarget) {
   SDLoc DL(N);
+  unsigned Opcode = N->getOpcode();
   MVT VT = N->getSimpleValueType(0);
   MVT SVT = VT.getVectorElementType();
   SDValue Op = N->getOperand(0);
@@ -30854,25 +33232,28 @@ static SDValue combineVZext(SDNode *N, SelectionDAG &DAG,
   unsigned InputBits = OpEltVT.getSizeInBits() * VT.getVectorNumElements();
 
   // Perform any constant folding.
+  // FIXME: Reduce constant pool usage and don't fold when OptSize is enabled.
   if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
-    SmallVector<SDValue, 4> Vals;
-    for (int i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
+    unsigned NumDstElts = VT.getVectorNumElements();
+    SmallBitVector Undefs(NumDstElts, false);
+    SmallVector<APInt, 4> Vals(NumDstElts, APInt(SVT.getSizeInBits(), 0));
+    for (unsigned i = 0; i != NumDstElts; ++i) {
       SDValue OpElt = Op.getOperand(i);
       if (OpElt.getOpcode() == ISD::UNDEF) {
-        Vals.push_back(DAG.getUNDEF(SVT));
+        Undefs[i] = true;
         continue;
       }
       APInt Cst = cast<ConstantSDNode>(OpElt.getNode())->getAPIntValue();
-      assert(Cst.getBitWidth() == OpEltVT.getSizeInBits());
-      Cst = Cst.zextOrTrunc(SVT.getSizeInBits());
-      Vals.push_back(DAG.getConstant(Cst, DL, SVT));
+      Vals[i] = Opcode == X86ISD::VZEXT ? Cst.zextOrTrunc(SVT.getSizeInBits())
+                                        : Cst.sextOrTrunc(SVT.getSizeInBits());
     }
-    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Vals);
+    return getConstVector(Vals, Undefs, VT, DAG, DL);
   }
 
   // (vzext (bitcast (vzext (x)) -> (vzext x)
+  // TODO: (vsext (bitcast (vsext (x)) -> (vsext x)
   SDValue V = peekThroughBitcasts(Op);
-  if (V != Op && V.getOpcode() == X86ISD::VZEXT) {
+  if (Opcode == X86ISD::VZEXT && V != Op && V.getOpcode() == X86ISD::VZEXT) {
     MVT InnerVT = V.getSimpleValueType();
     MVT InnerEltVT = InnerVT.getVectorElementType();
 
@@ -30897,7 +33278,9 @@ static SDValue combineVZext(SDNode *N, SelectionDAG &DAG,
   // Check if we can bypass extracting and re-inserting an element of an input
   // vector. Essentially:
   // (bitcast (sclr2vec (ext_vec_elt x))) -> (bitcast x)
-  if (V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
+  // TODO: Add X86ISD::VSEXT support
+  if (Opcode == X86ISD::VZEXT &&
+      V.getOpcode() == ISD::SCALAR_TO_VECTOR &&
       V.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
       V.getOperand(0).getSimpleValueType().getSizeInBits() == InputBits) {
     SDValue ExtractedV = V.getOperand(0);
@@ -30976,7 +33359,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
   default: break;
-  case ISD::EXTRACT_VECTOR_ELT: return combineExtractVectorElt(N, DAG, DCI);
+  case ISD::EXTRACT_VECTOR_ELT:
+    return combineExtractVectorElt(N, DAG, DCI, Subtarget);
   case ISD::VSELECT:
   case ISD::SELECT:
   case X86ISD::SHRUNKBLEND: return combineSelect(N, DAG, DCI, Subtarget);
@@ -31002,16 +33386,15 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::FSUB:           return combineFaddFsub(N, DAG, Subtarget);
   case ISD::FNEG:           return combineFneg(N, DAG, Subtarget);
   case ISD::TRUNCATE:       return combineTruncate(N, DAG, Subtarget);
+  case X86ISD::FAND:        return combineFAnd(N, DAG, Subtarget);
+  case X86ISD::FANDN:       return combineFAndn(N, DAG, Subtarget);
   case X86ISD::FXOR:
   case X86ISD::FOR:         return combineFOr(N, DAG, Subtarget);
   case X86ISD::FMIN:
   case X86ISD::FMAX:        return combineFMinFMax(N, DAG);
   case ISD::FMINNUM:
   case ISD::FMAXNUM:        return combineFMinNumFMaxNum(N, DAG, Subtarget);
-  case X86ISD::FAND:        return combineFAnd(N, DAG, Subtarget);
-  case X86ISD::FANDN:       return combineFAndn(N, DAG, Subtarget);
   case X86ISD::BT:          return combineBT(N, DAG, DCI);
-  case X86ISD::VZEXT_MOVL:  return combineVZextMovl(N, DAG);
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return combineZext(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND:    return combineSext(N, DAG, DCI, Subtarget);
@@ -31019,7 +33402,10 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::SETCC:          return combineSetCC(N, DAG, Subtarget);
   case X86ISD::SETCC:       return combineX86SetCC(N, DAG, DCI, Subtarget);
   case X86ISD::BRCOND:      return combineBrCond(N, DAG, DCI, Subtarget);
-  case X86ISD::VZEXT:       return combineVZext(N, DAG, DCI, Subtarget);
+  case X86ISD::VSHLI:
+  case X86ISD::VSRLI:       return combineVectorShift(N, DAG, DCI, Subtarget);
+  case X86ISD::VSEXT:
+  case X86ISD::VZEXT:       return combineVSZext(N, DAG, DCI, Subtarget);
   case X86ISD::SHUFP:       // Handle all target specific shuffles
   case X86ISD::INSERTPS:
   case X86ISD::PALIGNR:
@@ -31043,11 +33429,17 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
   case X86ISD::VPERMI:
   case X86ISD::VPERMV:
   case X86ISD::VPERMV3:
+  case X86ISD::VPERMIV3:
   case X86ISD::VPERMIL2:
   case X86ISD::VPERMILPI:
   case X86ISD::VPERMILPV:
   case X86ISD::VPERM2X128:
+  case X86ISD::VZEXT_MOVL:
   case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget);
+  case X86ISD::FMADD:
+  case X86ISD::FMADD_RND:
+  case X86ISD::FMADDS1_RND:
+  case X86ISD::FMADDS3_RND:
   case ISD::FMA:            return combineFMA(N, DAG, Subtarget);
   case ISD::MGATHER:
   case ISD::MSCATTER:       return combineGatherScatter(N, DAG);
@@ -31133,7 +33525,7 @@ bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
   case ISD::OR:
   case ISD::XOR:
     Commute = true;
-    // fallthrough
+    LLVM_FALLTHROUGH;
   case ISD::SUB: {
     SDValue N0 = Op.getOperand(0);
     SDValue N1 = Op.getOperand(1);
@@ -31280,9 +33672,11 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
     case 'u':
     case 'y':
     case 'x':
+    case 'v':
     case 'Y':
     case 'l':
       return C_RegisterClass;
+    case 'k': // AVX512 masking registers.
     case 'a':
     case 'b':
     case 'c':
@@ -31306,6 +33700,19 @@ X86TargetLowering::getConstraintType(StringRef Constraint) const {
       break;
     }
   }
+  else if (Constraint.size() == 2) {
+    switch (Constraint[0]) {
+    default:
+      break;
+    case 'Y':
+      switch (Constraint[1]) {
+      default:
+        break;
+      case 'k':
+        return C_Register;
+      }
+    }
+  }
   return TargetLowering::getConstraintType(Constraint);
 }
 
@@ -31349,12 +33756,28 @@ TargetLowering::ConstraintWeight
     if (type->isX86_MMXTy() && Subtarget.hasMMX())
       weight = CW_SpecificReg;
     break;
-  case 'x':
   case 'Y':
+    // Other "Y<x>" (e.g. "Yk") constraints should be implemented below.
+    if (constraint[1] == 'k') {
+      // Support for 'Yk' (similarly to the 'k' variant below).
+      weight = CW_SpecificReg;
+      break;
+    }
+  // Else fall through (handle "Y" constraint).
+    LLVM_FALLTHROUGH;
+  case 'v':
+    if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
+      weight = CW_Register;
+    LLVM_FALLTHROUGH;
+  case 'x':
     if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
         ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasFp256()))
       weight = CW_Register;
     break;
+  case 'k':
+    // Enable conditional vector operations using %k<#> registers.
+    weight = CW_SpecificReg;
+    break;
   case 'I':
     if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) {
       if (C->getZExtValue() <= 31)
@@ -31601,60 +34024,21 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
 /// Check if \p RC is a general purpose register class.
 /// I.e., GR* or one of their variant.
 static bool isGRClass(const TargetRegisterClass &RC) {
-  switch (RC.getID()) {
-  case X86::GR8RegClassID:
-  case X86::GR8_ABCD_LRegClassID:
-  case X86::GR8_ABCD_HRegClassID:
-  case X86::GR8_NOREXRegClassID:
-  case X86::GR16RegClassID:
-  case X86::GR16_ABCDRegClassID:
-  case X86::GR16_NOREXRegClassID:
-  case X86::GR32RegClassID:
-  case X86::GR32_ABCDRegClassID:
-  case X86::GR32_TCRegClassID:
-  case X86::GR32_NOREXRegClassID:
-  case X86::GR32_NOAXRegClassID:
-  case X86::GR32_NOSPRegClassID:
-  case X86::GR32_NOREX_NOSPRegClassID:
-  case X86::GR32_ADRegClassID:
-  case X86::GR64RegClassID:
-  case X86::GR64_ABCDRegClassID:
-  case X86::GR64_TCRegClassID:
-  case X86::GR64_TCW64RegClassID:
-  case X86::GR64_NOREXRegClassID:
-  case X86::GR64_NOSPRegClassID:
-  case X86::GR64_NOREX_NOSPRegClassID:
-  case X86::LOW32_ADDR_ACCESSRegClassID:
-  case X86::LOW32_ADDR_ACCESS_RBPRegClassID:
-    return true;
-  default:
-    return false;
-  }
+  return RC.hasSuperClassEq(&X86::GR8RegClass) ||
+         RC.hasSuperClassEq(&X86::GR16RegClass) ||
+         RC.hasSuperClassEq(&X86::GR32RegClass) ||
+         RC.hasSuperClassEq(&X86::GR64RegClass) ||
+         RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass);
 }
 
 /// Check if \p RC is a vector register class.
 /// I.e., FR* / VR* or one of their variant.
 static bool isFRClass(const TargetRegisterClass &RC) {
-  switch (RC.getID()) {
-  case X86::FR32RegClassID:
-  case X86::FR32XRegClassID:
-  case X86::FR64RegClassID:
-  case X86::FR64XRegClassID:
-  case X86::FR128RegClassID:
-  case X86::VR64RegClassID:
-  case X86::VR128RegClassID:
-  case X86::VR128LRegClassID:
-  case X86::VR128HRegClassID:
-  case X86::VR128XRegClassID:
-  case X86::VR256RegClassID:
-  case X86::VR256LRegClassID:
-  case X86::VR256HRegClassID:
-  case X86::VR256XRegClassID:
-  case X86::VR512RegClassID:
-    return true;
-  default:
-    return false;
-  }
+  return RC.hasSuperClassEq(&X86::FR32XRegClass) ||
+         RC.hasSuperClassEq(&X86::FR64XRegClass) ||
+         RC.hasSuperClassEq(&X86::VR128XRegClass) ||
+         RC.hasSuperClassEq(&X86::VR256XRegClass) ||
+         RC.hasSuperClassEq(&X86::VR512RegClass);
 }
 
 std::pair<unsigned, const TargetRegisterClass *>
@@ -31670,6 +34054,24 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       // TODO: Slight differences here in allocation order and leaving
       // RIP in the class. Do they matter any more here than they do
       // in the normal allocation?
+    case 'k':
+      if (Subtarget.hasAVX512()) {
+        //  Only supported in AVX512 or later.
+        switch (VT.SimpleTy) {
+        default: break;
+        case MVT::i32:
+          return std::make_pair(0U, &X86::VK32RegClass);
+        case MVT::i16:
+          return std::make_pair(0U, &X86::VK16RegClass);
+        case MVT::i8:
+          return std::make_pair(0U, &X86::VK8RegClass);
+        case MVT::i1:
+          return std::make_pair(0U, &X86::VK1RegClass);
+        case MVT::i64:
+          return std::make_pair(0U, &X86::VK64RegClass);
+        }
+      }
+      break;
     case 'q':   // GENERAL_REGS in 64-bit mode, Q_REGS in 32-bit mode.
       if (Subtarget.is64Bit()) {
         if (VT == MVT::i32 || VT == MVT::f32)
@@ -31723,18 +34125,24 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       return std::make_pair(0U, &X86::VR64RegClass);
     case 'Y':   // SSE_REGS if SSE2 allowed
       if (!Subtarget.hasSSE2()) break;
-      // FALL THROUGH.
+      LLVM_FALLTHROUGH;
+    case 'v':
     case 'x':   // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
       if (!Subtarget.hasSSE1()) break;
+      bool VConstraint = (Constraint[0] == 'v');
 
       switch (VT.SimpleTy) {
       default: break;
       // Scalar SSE types.
       case MVT::f32:
       case MVT::i32:
+        if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::FR32XRegClass);
         return std::make_pair(0U, &X86::FR32RegClass);
       case MVT::f64:
       case MVT::i64:
+        if (VConstraint && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::FR64XRegClass);
         return std::make_pair(0U, &X86::FR64RegClass);
       // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
       // Vector types.
@@ -31744,6 +34152,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       case MVT::v2i64:
       case MVT::v4f32:
       case MVT::v2f64:
+        if (VConstraint && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::VR128XRegClass);
         return std::make_pair(0U, &X86::VR128RegClass);
       // AVX types.
       case MVT::v32i8:
@@ -31752,6 +34162,8 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       case MVT::v4i64:
       case MVT::v8f32:
       case MVT::v4f64:
+        if (VConstraint && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::VR256XRegClass);
         return std::make_pair(0U, &X86::VR256RegClass);
       case MVT::v8f64:
       case MVT::v16f32:
@@ -31761,6 +34173,29 @@ X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
       }
       break;
     }
+  } else if (Constraint.size() == 2 && Constraint[0] == 'Y') {
+    switch (Constraint[1]) {
+    default:
+      break;
+    case 'k':
+      // This register class doesn't allocate k0 for masked vector operation.
+      if (Subtarget.hasAVX512()) { // Only supported in AVX512.
+        switch (VT.SimpleTy) {
+        default: break;
+        case MVT::i32:
+          return std::make_pair(0U, &X86::VK32WMRegClass);
+        case MVT::i16:
+          return std::make_pair(0U, &X86::VK16WMRegClass);
+        case MVT::i8:
+          return std::make_pair(0U, &X86::VK8WMRegClass);
+        case MVT::i1:
+          return std::make_pair(0U, &X86::VK1WMRegClass);
+        case MVT::i64:
+          return std::make_pair(0U, &X86::VK64WMRegClass);
+        }
+      }
+      break;
+    }
   }
 
   // Use the default implementation in TargetLowering to convert the register
@@ -31954,3 +34389,7 @@ void X86TargetLowering::insertCopiesSplitCSR(
           .addReg(NewVR);
   }
 }
+
+bool X86TargetLowering::supportSwiftError() const {
+  return Subtarget.is64Bit();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86ISelLowering.h b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
index d826f1ec3e05..37f9353042b1 100644
--- a/contrib/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/contrib/llvm/lib/Target/X86/X86ISelLowering.h
@@ -95,7 +95,7 @@ namespace llvm {
       SETCC,
 
       /// X86 Select
-      SELECT,
+      SELECT, SELECTS,
 
       // Same as SETCC except it's materialized with a sbb and the value is all
       // one's or all zero's.
@@ -106,6 +106,10 @@ namespace llvm {
       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
       FSETCC,
 
+      /// X86 FP SETCC, similar to above, but with output as an i1 mask and
+      /// with optional rounding mode.
+      FSETCCM, FSETCCM_RND,
+
       /// X86 conditional moves. Operand 0 and operand 1 are the two values
       /// to select from. Operand 2 is the condition code, and operand 3 is the
       /// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -135,8 +139,9 @@ namespace llvm {
       /// at function entry, used for PIC code.
       GlobalBaseReg,
 
-      /// A wrapper node for TargetConstantPool,
-      /// TargetExternalSymbol, and TargetGlobalAddress.
+      /// A wrapper node for TargetConstantPool, TargetJumpTable,
+      /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
+      /// MCSymbol and TargetBlockAddress.
       Wrapper,
 
       /// Special wrapper used under X86-64 PIC mode for RIP
@@ -205,12 +210,12 @@ namespace llvm {
       FDIV_RND,
       FMAX_RND,
       FMIN_RND,
-      FSQRT_RND,
+      FSQRT_RND, FSQRTS_RND,
 
       // FP vector get exponent.
-      FGETEXP_RND,
+      FGETEXP_RND, FGETEXPS_RND,
       // Extract Normalized Mantissas.
-      VGETMANT,
+      VGETMANT, VGETMANTS,
       // FP Scale.
       SCALEF,
       SCALEFS,
@@ -251,7 +256,7 @@ namespace llvm {
       /// in order to obtain suitable precision.
       FRSQRT, FRCP,
       FRSQRTS, FRCPS,
-   
+
       // Thread Local Storage.
       TLSADDR,
 
@@ -293,13 +298,10 @@ namespace llvm {
       VTRUNCUS, VTRUNCS,
 
       // Vector FP extend.
-      VFPEXT,
+      VFPEXT, VFPEXT_RND, VFPEXTS_RND,
 
       // Vector FP round.
-      VFPROUND,
-
-      // Vector signed/unsigned integer to double.
-      CVTDQ2PD, CVTUDQ2PD,
+      VFPROUND, VFPROUND_RND, VFPROUNDS_RND,
 
       // Convert a vector to mask, set bits base on MSB.
       CVT2MASK,
@@ -426,9 +428,9 @@ namespace llvm {
       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
       VRANGE,
       // Reduce - Perform Reduction Transformation on scalar\packed FP.
-      VREDUCE,
+      VREDUCE, VREDUCES,
       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
-      VRNDSCALE,
+      VRNDSCALE, VRNDSCALES,
       // Tests Types Of a FP Values for packed types.
       VFPCLASS,
       // Tests Types Of a FP Values for scalar types.
@@ -486,19 +488,33 @@ namespace llvm {
       FMADDSUB_RND,
       FMSUBADD_RND,
 
+      // Scalar intrinsic FMA with rounding mode.
+      // Two versions, passthru bits on op1 or op3.
+      FMADDS1_RND, FMADDS3_RND,
+      FNMADDS1_RND, FNMADDS3_RND,
+      FMSUBS1_RND, FMSUBS3_RND,
+      FNMSUBS1_RND, FNMSUBS3_RND,
+
       // Compress and expand.
       COMPRESS,
       EXPAND,
 
-      // Convert Unsigned/Integer to Scalar Floating-Point Value
-      // with rounding mode.
-      SINT_TO_FP_RND,
-      UINT_TO_FP_RND,
+      // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
+      SINT_TO_FP_RND, UINT_TO_FP_RND,
+      SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
 
       // Vector float/double to signed/unsigned integer.
-      FP_TO_SINT_RND, FP_TO_UINT_RND,
+      CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
       // Scalar float/double to signed/unsigned integer.
-      SCALAR_FP_TO_SINT_RND, SCALAR_FP_TO_UINT_RND,
+      CVTS2SI_RND, CVTS2UI_RND,
+
+      // Vector float/double to signed/unsigned integer with truncation.
+      CVTTP2SI, CVTTP2UI, CVTTP2SI_RND, CVTTP2UI_RND,
+      // Scalar float/double to signed/unsigned integer with truncation.
+      CVTTS2SI_RND, CVTTS2UI_RND,
+
+      // Vector signed/unsigned integer to float/double.
+      CVTSI2P, CVTUI2P,
 
       // Save xmm argument registers to the stack, according to %al. An operator
       // is needed so that this can be expanded with control flow.
@@ -537,7 +553,10 @@ namespace llvm {
       XTEST,
 
       // ERI instructions.
-      RSQRT28, RCP28, EXP2,
+      RSQRT28, RSQRT28S, RCP28, RCP28S, EXP2,
+
+      // Conversions between float and half-float.
+      CVTPS2PH, CVTPH2PS,
 
       // Compare and swap.
       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
@@ -587,7 +606,12 @@ namespace llvm {
 
       /// This instruction grabs the address of the next argument
       /// from a va_list. (reads and modifies the va_list in memory)
-      VAARG_64
+      VAARG_64,
+
+      // Vector truncating store with unsigned/signed saturation
+      VTRUNCSTOREUS, VTRUNCSTORES,
+      // Vector truncating masked store with unsigned/signed saturation
+      VMTRUNCSTOREUS, VMTRUNCSTORES
 
       // WARNING: Do not add anything in the end unless you want the node to
       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
@@ -760,10 +784,28 @@ namespace llvm {
 
     bool isCheapToSpeculateCtlz() const override;
 
+    bool isCtlzFast() const override;
+
     bool hasBitPreservingFPLogic(EVT VT) const override {
       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
     }
 
+    bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
+      // If the pair to store is a mixture of float and int values, we will
+      // save two bitwise instructions and one float-to-int instruction and
+      // increase one store instruction. There is potentially a more
+      // significant benefit because it avoids the float->int domain switch
+      // for input value. So It is more likely a win.
+      if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
+          (LTy.isInteger() && HTy.isFloatingPoint()))
+        return true;
+      // If the pair only contains int values, we will save two bitwise
+      // instructions and increase one store instruction (costing one more
+      // store buffer). Since the benefit is more blurred so we leave
+      // such pair out until we get testcase to prove it is a win.
+      return false;
+    }
+
     bool hasAndNotCompare(SDValue Y) const override;
 
     /// Return the value type to use for ISD::SETCC.
@@ -995,10 +1037,16 @@ namespace llvm {
 
     bool isIntDivCheap(EVT VT, AttributeSet Attr) const override;
 
-    bool supportSwiftError() const override {
-      return true;
-    }
+    bool supportSwiftError() const override;
 
+    unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
+
+    /// \brief Lower interleaved load(s) into target specific
+    /// instructions/intrinsics.
+    bool lowerInterleavedLoad(LoadInst *LI,
+                              ArrayRef<ShuffleVectorInst *> Shuffles,
+                              ArrayRef<unsigned> Indices,
+                              unsigned Factor) const override;
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,
@@ -1032,7 +1080,7 @@ namespace llvm {
     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
                              const SDLoc &dl, SelectionDAG &DAG,
-                             const CCValAssign &VA, MachineFrameInfo *MFI,
+                             const CCValAssign &VA, MachineFrameInfo &MFI,
                              unsigned i) const;
     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
                              const SDLoc &dl, SelectionDAG &DAG,
@@ -1073,8 +1121,9 @@ namespace llvm {
     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
     SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG) const;
     SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG) const;
-
     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
+
+    unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr) const;
     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalAddress(const GlobalValue *GV, const SDLoc &dl,
@@ -1082,14 +1131,15 @@ namespace llvm {
     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
+
     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG) const;
     SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const;
-    SDValue LowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerFP_TO_INT(SDValue Op, const X86Subtarget &Subtarget,
+                           SelectionDAG &DAG) const;
     SDValue LowerToBT(SDValue And, ISD::CondCode CC, const SDLoc &dl,
                       SelectionDAG &DAG) const;
     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
@@ -1101,6 +1151,7 @@ namespace llvm {
     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
@@ -1219,14 +1270,17 @@ namespace llvm {
     /// Convert a comparison if required by the subtarget.
     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
 
+    /// Check if replacement of SQRT with RSQRT should be disabled.
+    bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
+
     /// Use rsqrt* to speed up sqrt calculations.
-    SDValue getRsqrtEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps,
-                             bool &UseOneConstNR) const override;
+    SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                            int &RefinementSteps, bool &UseOneConstNR,
+                            bool Reciprocal) const override;
 
     /// Use rcp* to speed up fdiv calculations.
-    SDValue getRecipEstimate(SDValue Operand, DAGCombinerInfo &DCI,
-                             unsigned &RefinementSteps) const override;
+    SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
+                             int &RefinementSteps) const override;
 
     /// Reassociate floating point divisions into multiply by reciprocal.
     unsigned combineRepeatedFPDivisors() const override;
@@ -1236,6 +1290,93 @@ namespace llvm {
     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
                              const TargetLibraryInfo *libInfo);
   } // end namespace X86
+
+  // Base class for all X86 non-masked store operations.
+  class X86StoreSDNode : public MemSDNode {
+  public:
+    X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
+                   SDVTList VTs, EVT MemVT,
+                   MachineMemOperand *MMO)
+      :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
+    const SDValue &getValue() const { return getOperand(1); }
+    const SDValue &getBasePtr() const { return getOperand(2); }
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VTRUNCSTORES ||
+        N->getOpcode() == X86ISD::VTRUNCSTOREUS;
+    }
+  };
+
+  // Base class for all X86 masked store operations.
+  // The class has the same order of operands as MaskedStoreSDNode for
+  // convenience.
+  class X86MaskedStoreSDNode : public MemSDNode {
+  public:
+    X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
+                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                         MachineMemOperand *MMO)
+      : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
+
+    const SDValue &getBasePtr() const { return getOperand(1); }
+    const SDValue &getMask()    const { return getOperand(2); }
+    const SDValue &getValue()   const { return getOperand(3); }
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
+        N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
+    }
+  };
+
+  // X86 Truncating Store with Signed saturation.
+  class TruncSStoreSDNode : public X86StoreSDNode {
+  public:
+    TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
+                        SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+      : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VTRUNCSTORES;
+    }
+  };
+
+  // X86 Truncating Store with Unsigned saturation.
+  class TruncUSStoreSDNode : public X86StoreSDNode {
+  public:
+    TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
+                      SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
+      : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
+    }
+  };
+
+  // X86 Truncating Masked Store with Signed saturation.
+  class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
+  public:
+    MaskedTruncSStoreSDNode(unsigned Order,
+                         const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                         MachineMemOperand *MMO)
+      : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VMTRUNCSTORES;
+    }
+  };
+
+  // X86 Truncating Masked Store with Unsigned saturation.
+  class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
+  public:
+    MaskedTruncUSStoreSDNode(unsigned Order,
+                            const DebugLoc &dl, SDVTList VTs, EVT MemVT,
+                            MachineMemOperand *MMO)
+      : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
+
+    static bool classof(const SDNode *N) {
+      return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
+    }
+  };
+
 } // end namespace llvm
 
 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
diff --git a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
index 803a7e35c209..da7437ea0ccb 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -77,15 +77,15 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
                                   !if (!eq (TypeVariantName, "i"),
                                        !if (!eq (Size, 128), "v2i64",
                                        !if (!eq (Size, 256), "v4i64",
-                                            VTName)), VTName));
+                                       !if (!eq (Size, 512), "v8i64",
+                                            VTName))), VTName));
 
   PatFrag AlignedLdFrag = !cast<PatFrag>("alignedload" #
-                          !if (!eq (TypeVariantName, "i"),
-                                !if (!eq (Size, 128), "v2i64",
-                                !if (!eq (Size, 256), "v4i64",
-                                !if (!eq (Size, 512),
-                                    !if (!eq (EltSize, 64), "v8i64", "v16i32"),
-                                    VTName))), VTName));
+                                         !if (!eq (TypeVariantName, "i"),
+                                               !if (!eq (Size, 128), "v2i64",
+                                               !if (!eq (Size, 256), "v4i64",
+                                               !if (!eq (Size, 512), "v8i64",
+                                                   VTName))), VTName));
 
   PatFrag ScalarLdFrag = !cast<PatFrag>("load" # EltVT);
 
@@ -122,6 +122,10 @@ class X86VectorVTInfo<int numelts, ValueType eltvt, RegisterClass rc,
 
   RegisterClass FRC = !if (!eq (EltTypeName, "f32"), FR32X, FR64X);
 
+  // A vector tye of the same width with element type i64. This is used to
+  // create patterns for logic ops.
+  ValueType i64VT = !cast<ValueType>("v" # !srl(Size, 6) # "i64");
+
   // A vector type of the same width with element type i32.  This is used to
   // create the canonical constant zero node ImmAllZerosV.
   ValueType i32VT = !cast<ValueType>("v" # !srl(Size, 5) # "i32");
@@ -194,7 +198,8 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
                                   list<dag> ZeroMaskingPattern,
                                   string MaskingConstraint = "",
                                   InstrItinClass itin = NoItinerary,
-                                  bit IsCommutable = 0> {
+                                  bit IsCommutable = 0,
+                                  bit IsKCommutable = 0> {
   let isCommutable = IsCommutable in
     def NAME: AVX512<O, F, Outs, Ins,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
@@ -202,7 +207,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
                        Pattern, itin>;
 
   // Prefer over VMOV*rrk Pat<>
-  let AddedComplexity = 20 in
+  let AddedComplexity = 20, isCommutable = IsKCommutable in
     def NAME#k: AVX512<O, F, Outs, MaskingIns,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
                                      "$dst {${mask}}, "#IntelSrcAsm#"}",
@@ -210,8 +215,11 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
               EVEX_K {
       // In case of the 3src subclass this is overridden with a let.
       string Constraints = MaskingConstraint;
-  }
-  let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<>
+    }
+
+  // Zero mask does not add any restrictions to commute operands transformation.
+  // So, it is Ok to use IsCommutable instead of IsKCommutable.
+  let AddedComplexity = 30, isCommutable = IsCommutable in // Prefer over VMOV*rrkz Pat<>
     def NAME#kz: AVX512<O, F, Outs, ZeroMaskingIns,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}} {z}|"#
                                      "$dst {${mask}} {z}, "#IntelSrcAsm#"}",
@@ -231,14 +239,16 @@ multiclass AVX512_maskable_common<bits<8> O, Format F, X86VectorVTInfo _,
                                   SDNode Select = vselect,
                                   string MaskingConstraint = "",
                                   InstrItinClass itin = NoItinerary,
-                                  bit IsCommutable = 0> :
+                                  bit IsCommutable = 0,
+                                  bit IsKCommutable = 0> :
   AVX512_maskable_custom<O, F, Outs, Ins, MaskingIns, ZeroMaskingIns, OpcodeStr,
                          AttSrcAsm, IntelSrcAsm,
                          [(set _.RC:$dst, RHS)],
                          [(set _.RC:$dst, MaskingRHS)],
                          [(set _.RC:$dst,
                                (Select _.KRCWM:$mask, RHS, _.ImmAllZerosV))],
-                         MaskingConstraint, NoItinerary, IsCommutable>;
+                         MaskingConstraint, NoItinerary, IsCommutable,
+                         IsKCommutable>;
 
 // This multiclass generates the unconditional/non-masking, the masking and
 // the zero-masking variant of the vector instruction.  In the masking case, the
@@ -248,13 +258,14 @@ multiclass AVX512_maskable<bits<8> O, Format F, X86VectorVTInfo _,
                            string AttSrcAsm, string IntelSrcAsm,
                            dag RHS,
                            InstrItinClass itin = NoItinerary,
-                           bit IsCommutable = 0, SDNode Select = vselect> :
+                           bit IsCommutable = 0, bit IsKCommutable = 0,
+                           SDNode Select = vselect> :
    AVX512_maskable_common<O, F, _, Outs, Ins,
                           !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
                           !con((ins _.KRCWM:$mask), Ins),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
                           (Select _.KRCWM:$mask, RHS, _.RC:$src0), Select,
-                          "$src0 = $dst", itin, IsCommutable>;
+                          "$src0 = $dst", itin, IsCommutable, IsKCommutable>;
 
 // This multiclass generates the unconditional/non-masking, the masking and
 // the zero-masking variant of the scalar instruction.
@@ -278,41 +289,29 @@ multiclass AVX512_maskable_scalar<bits<8> O, Format F, X86VectorVTInfo _,
 multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,
                                 dag Outs, dag NonTiedIns, string OpcodeStr,
                                 string AttSrcAsm, string IntelSrcAsm,
-                                dag RHS> :
+                                dag RHS, bit IsCommutable = 0,
+                                bit IsKCommutable = 0> :
    AVX512_maskable_common<O, F, _, Outs,
                           !con((ins _.RC:$src1), NonTiedIns),
                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
-                          (vselect _.KRCWM:$mask, RHS, _.RC:$src1)>;
-
-// Similar to AVX512_maskable_3rc but in this case the input VT for the tied
-// operand differs from the output VT. This requires a bitconvert on
-// the preserved vector going into the vselect.
-multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT,
-                                     X86VectorVTInfo InVT,
-                                     dag Outs, dag NonTiedIns, string OpcodeStr,
-                                     string AttSrcAsm, string IntelSrcAsm,
-                                     dag RHS> :
-   AVX512_maskable_common<O, F, OutVT, Outs,
-                          !con((ins InVT.RC:$src1), NonTiedIns),
-                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
-                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns),
-                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
-                          (vselect InVT.KRCWM:$mask, RHS,
-                           (bitconvert InVT.RC:$src1))>;
+                          (vselect _.KRCWM:$mask, RHS, _.RC:$src1),
+                          vselect, "", NoItinerary, IsCommutable, IsKCommutable>;
 
 multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,
                                      dag Outs, dag NonTiedIns, string OpcodeStr,
                                      string AttSrcAsm, string IntelSrcAsm,
-                                     dag RHS> :
+                                     dag RHS, bit IsCommutable = 0,
+                                     bit IsKCommutable = 0> :
    AVX512_maskable_common<O, F, _, Outs,
                           !con((ins _.RC:$src1), NonTiedIns),
                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
                           !con((ins _.RC:$src1, _.KRCWM:$mask), NonTiedIns),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
                           (X86selects _.KRCWM:$mask, RHS, _.RC:$src1),
-                          X86selects>;
+                          X86selects, "", NoItinerary, IsCommutable,
+                          IsKCommutable>;
 
 multiclass AVX512_maskable_in_asm<bits<8> O, Format F, X86VectorVTInfo _,
                                   dag Outs, dag Ins,
@@ -334,7 +333,9 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
                                   string OpcodeStr,
                                   string AttSrcAsm, string IntelSrcAsm,
                                   list<dag> Pattern,
-                                  list<dag> MaskingPattern> {
+                                  list<dag> MaskingPattern,
+                                  bit IsCommutable = 0> {
+    let isCommutable = IsCommutable in
     def NAME: AVX512<O, F, Outs, Ins,
                        OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
                                      "$dst, "#IntelSrcAsm#"}",
@@ -351,20 +352,21 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
                                   dag Ins, dag MaskingIns,
                                   string OpcodeStr,
                                   string AttSrcAsm, string IntelSrcAsm,
-                                  dag RHS, dag MaskingRHS> :
+                                  dag RHS, dag MaskingRHS,
+                                  bit IsCommutable = 0> :
   AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
                          AttSrcAsm, IntelSrcAsm,
                          [(set _.KRC:$dst, RHS)],
-                         [(set _.KRC:$dst, MaskingRHS)]>;
+                         [(set _.KRC:$dst, MaskingRHS)], IsCommutable>;
 
 multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
                            string AttSrcAsm, string IntelSrcAsm,
-                           dag RHS> :
+                           dag RHS, bit IsCommutable = 0> :
    AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
                           !con((ins _.KRCWM:$mask), Ins),
                           OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
-                          (and _.KRCWM:$mask, RHS)>;
+                          (and _.KRCWM:$mask, RHS), IsCommutable>;
 
 multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
                            dag Outs, dag Ins, string OpcodeStr,
@@ -373,6 +375,27 @@ multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
                              Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
                              AttSrcAsm, IntelSrcAsm, [],[]>;
 
+// This multiclass generates the unconditional/non-masking, the masking and
+// the zero-masking variant of the vector instruction.  In the masking case, the
+// perserved vector elements come from a new dummy input operand tied to $dst.
+multiclass AVX512_maskable_logic<bits<8> O, Format F, X86VectorVTInfo _,
+                           dag Outs, dag Ins, string OpcodeStr,
+                           string AttSrcAsm, string IntelSrcAsm,
+                           dag RHS, dag MaskedRHS,
+                           InstrItinClass itin = NoItinerary,
+                           bit IsCommutable = 0, SDNode Select = vselect> :
+   AVX512_maskable_custom<O, F, Outs, Ins,
+                          !con((ins _.RC:$src0, _.KRCWM:$mask), Ins),
+                          !con((ins _.KRCWM:$mask), Ins),
+                          OpcodeStr, AttSrcAsm, IntelSrcAsm,
+                          [(set _.RC:$dst, RHS)],
+                          [(set _.RC:$dst,
+                                (Select _.KRCWM:$mask, MaskedRHS, _.RC:$src0))],
+                          [(set _.RC:$dst,
+                                (Select _.KRCWM:$mask, MaskedRHS,
+                                        _.ImmAllZerosV))],
+                          "$src0 = $dst", itin, IsCommutable>;
+
 // Bitcasts between 512-bit vector types. Return the original type since
 // no instruction is needed for the conversion.
 def : Pat<(v8f64  (bitconvert (v8i64  VR512:$src))), (v8f64  VR512:$src)>;
@@ -428,6 +451,16 @@ def AVX512_256_SET0 : I<0, Pseudo, (outs VR256X:$dst), (ins), "",
                [(set VR256X:$dst, (v8i32 immAllZerosV))]>;
 }
 
+// Alias instructions that map fld0 to xorps for sse or vxorps for avx.
+// This is expanded by ExpandPostRAPseudos.
+let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
+    isPseudo = 1, SchedRW = [WriteZero], Predicates = [HasVLX, HasDQI] in {
+  def AVX512_FsFLD0SS : I<0, Pseudo, (outs FR32X:$dst), (ins), "",
+                          [(set FR32X:$dst, fp32imm0)]>;
+  def AVX512_FsFLD0SD : I<0, Pseudo, (outs FR64X:$dst), (ins), "",
+                          [(set FR64X:$dst, fpimm0)]>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512 - VECTOR INSERT
 //
@@ -548,25 +581,28 @@ defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info,
               vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>;
 
 // vinsertps - insert f32 to XMM
-def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
+let ExeDomain = SSEPackedSingle in {
+def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst),
       (ins VR128X:$src1, VR128X:$src2, u8imm:$src3),
       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
       [(set VR128X:$dst, (X86insertps VR128X:$src1, VR128X:$src2, imm:$src3))]>,
       EVEX_4V;
-def VINSERTPSzrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
+def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst),
       (ins VR128X:$src1, f32mem:$src2, u8imm:$src3),
       "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}",
       [(set VR128X:$dst, (X86insertps VR128X:$src1,
                           (v4f32 (scalar_to_vector (loadf32 addr:$src2))),
                           imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>;
+}
 
 //===----------------------------------------------------------------------===//
 // AVX-512 VECTOR EXTRACT
 //---
 
 multiclass vextract_for_size<int Opcode,
-                                    X86VectorVTInfo From, X86VectorVTInfo To,
-                                    PatFrag vextract_extract> {
+                             X86VectorVTInfo From, X86VectorVTInfo To,
+                             PatFrag vextract_extract,
+                             SDNodeXForm EXTRACT_get_vextract_imm> {
 
   let hasSideEffects = 0, ExeDomain = To.ExeDomain in {
     // use AVX512_maskable_in_asm (AVX512_maskable can't be used due to
@@ -597,6 +633,24 @@ multiclass vextract_for_size<int Opcode,
                     []>, EVEX_K, EVEX;
   }
 
+  def : Pat<(To.VT (vselect To.KRCWM:$mask,
+                            (vextract_extract:$ext (From.VT From.RC:$src1),
+                                                   (iPTR imm)),
+                            To.RC:$src0)),
+            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+                                From.ZSuffix # "rrk")
+                To.RC:$src0, To.KRCWM:$mask, From.RC:$src1,
+                (EXTRACT_get_vextract_imm To.RC:$ext))>;
+
+  def : Pat<(To.VT (vselect To.KRCWM:$mask,
+                            (vextract_extract:$ext (From.VT From.RC:$src1),
+                                                   (iPTR imm)),
+                            To.ImmAllZerosV)),
+            (!cast<Instruction>(NAME # To.EltSize # "x" # To.NumElts #
+                                From.ZSuffix # "rrkz")
+                To.KRCWM:$mask, From.RC:$src1,
+                (EXTRACT_get_vextract_imm To.RC:$ext))>;
+
   // Intrinsic call with masking.
   def : Pat<(!cast<Intrinsic>("int_x86_avx512_mask_vextract" # To.EltTypeName #
                               "x" # To.NumElts # "_" # From.Size)
@@ -642,39 +696,45 @@ multiclass vextract_for_size_lowering<string InstrStr, X86VectorVTInfo From,
 }
 
 multiclass vextract_for_type<ValueType EltVT32, int Opcode128,
-                                             ValueType EltVT64, int Opcode256> {
+                             ValueType EltVT64, int Opcode256> {
   defm NAME # "32x4Z" : vextract_for_size<Opcode128,
                                  X86VectorVTInfo<16, EltVT32, VR512>,
                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
-                                 vextract128_extract>,
+                                 vextract128_extract,
+                                 EXTRACT_get_vextract128_imm>,
                                      EVEX_V512, EVEX_CD8<32, CD8VT4>;
   defm NAME # "64x4Z" : vextract_for_size<Opcode256,
                                  X86VectorVTInfo< 8, EltVT64, VR512>,
                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
-                                 vextract256_extract>,
+                                 vextract256_extract,
+                                 EXTRACT_get_vextract256_imm>,
                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT4>;
   let Predicates = [HasVLX] in
     defm NAME # "32x4Z256" : vextract_for_size<Opcode128,
                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
                                  X86VectorVTInfo< 4, EltVT32, VR128X>,
-                                 vextract128_extract>,
+                                 vextract128_extract,
+                                 EXTRACT_get_vextract128_imm>,
                                      EVEX_V256, EVEX_CD8<32, CD8VT4>;
   let Predicates = [HasVLX, HasDQI] in
     defm NAME # "64x2Z256" : vextract_for_size<Opcode128,
                                  X86VectorVTInfo< 4, EltVT64, VR256X>,
                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
-                                 vextract128_extract>,
+                                 vextract128_extract,
+                                 EXTRACT_get_vextract128_imm>,
                                      VEX_W, EVEX_V256, EVEX_CD8<64, CD8VT2>;
   let Predicates = [HasDQI] in {
     defm NAME # "64x2Z" : vextract_for_size<Opcode128,
                                  X86VectorVTInfo< 8, EltVT64, VR512>,
                                  X86VectorVTInfo< 2, EltVT64, VR128X>,
-                                 vextract128_extract>,
+                                 vextract128_extract,
+                                 EXTRACT_get_vextract128_imm>,
                                      VEX_W, EVEX_V512, EVEX_CD8<64, CD8VT2>;
     defm NAME # "32x8Z" : vextract_for_size<Opcode256,
                                  X86VectorVTInfo<16, EltVT32, VR512>,
                                  X86VectorVTInfo< 8, EltVT32, VR256X>,
-                                 vextract256_extract>,
+                                 vextract256_extract,
+                                 EXTRACT_get_vextract256_imm>,
                                      EVEX_V512, EVEX_CD8<32, CD8VT8>;
   }
 }
@@ -986,6 +1046,25 @@ multiclass avx512_subvec_broadcast_rm<bits<8> opc, string OpcodeStr,
                             AVX5128IBase, EVEX;
 }
 
+let Predicates = [HasVLX, HasBWI] in {
+  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+  // This means we'll encounter truncated i32 loads; match that here.
+  def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+            (VPBROADCASTWZ128m addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+            (VPBROADCASTWZ256m addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast
+              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+            (VPBROADCASTWZ128m addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast
+              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+            (VPBROADCASTWZ256m addr:$src)>;
+}
+
+//===----------------------------------------------------------------------===//
+// AVX-512 BROADCAST SUBVECTORS
+//
+
 defm VBROADCASTI32X4 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
                        v16i32_info, v4i32x_info>,
                        EVEX_V512, EVEX_CD8<32, CD8VT4>;
@@ -999,6 +1078,79 @@ defm VBROADCASTF64X4 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf64x4",
                        v8f64_info, v4f64x_info>, VEX_W,
                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
 
+let Predicates = [HasAVX512] in {
+def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
+          (VBROADCASTI64X4rm addr:$src)>;
+def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
+          (VBROADCASTI64X4rm addr:$src)>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
+          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8f32 VR256X:$src), 1)>;
+def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
+          (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v4f64 VR256X:$src), 1)>; 
+def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v4i64 VR256X:$src), 1)>; 
+def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8i32 VR256X:$src), 1)>;
+def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v16i16 VR256X:$src), 1)>;
+def : Pat<(v64i8 (X86SubVBroadcast (v32i8 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v32i8 VR256X:$src), 1)>;
+
+def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+          (VBROADCASTI32X4rm addr:$src)>;
+def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+          (VBROADCASTI32X4rm addr:$src)>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v8f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
+          (VINSERTF64x4Zrr
+           (VINSERTF32x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v8f64 (VINSERTF32x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)),
+                                                   VR128X:$src, sub_xmm),
+                                    VR128X:$src, 1)), sub_ymm), 1)>;
+def : Pat<(v8i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
+          (VINSERTI64x4Zrr
+           (VINSERTI32x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v8i64 (VINSERTI32x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)),
+                                                   VR128X:$src, sub_xmm),
+                                    VR128X:$src, 1)), sub_ymm), 1)>;
+
+def : Pat<(v32i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
+          (VINSERTI64x4Zrr
+           (VINSERTI32x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v32i16 (VINSERTI32x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)),
+                                                    VR128X:$src, sub_xmm),
+                                     VR128X:$src, 1)), sub_ymm), 1)>;
+def : Pat<(v64i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
+          (VINSERTI64x4Zrr
+           (VINSERTI32x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v64i8 (VINSERTI32x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)),
+                                                   VR128X:$src, sub_xmm),
+                                    VR128X:$src, 1)), sub_ymm), 1)>;
+}
+
 let Predicates = [HasVLX] in {
 defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
                            v8i32x_info, v4i32x_info>,
@@ -1006,7 +1158,28 @@ defm VBROADCASTI32X4Z256 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti32x4",
 defm VBROADCASTF32X4Z256 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf32x4",
                            v8f32x_info, v4f32x_info>,
                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
+
+def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+          (VBROADCASTI32X4Z256rm addr:$src)>;
+def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+          (VBROADCASTI32X4Z256rm addr:$src)>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
+          (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v4f32 VR128X:$src), 1)>;
+def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
+          (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v4i32 VR128X:$src), 1)>;
+def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128X:$src))),
+          (VINSERTI32x4Z256rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v8i16 VR128X:$src), 1)>;
+def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128X:$src))),
+          (VINSERTI32x4Z256rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v16i8 VR128X:$src), 1)>;
 }
+
 let Predicates = [HasVLX, HasDQI] in {
 defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
                            v4i64x_info, v2i64x_info>, VEX_W,
@@ -1014,7 +1187,73 @@ defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
 defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
                            v4f64x_info, v2f64x_info>, VEX_W,
                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
+          (VINSERTF64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v2f64 VR128X:$src), 1)>;
+def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
+          (VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v2i64 VR128X:$src), 1)>;
 }
+
+let Predicates = [HasVLX, NoDQI] in {
+def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+          (VBROADCASTF32X4Z256rm addr:$src)>;
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+          (VBROADCASTI32X4Z256rm addr:$src)>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
+          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v2f64 VR128X:$src), 1)>;
+def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
+          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v2i64 VR128X:$src), 1)>;
+}
+
+let Predicates = [HasAVX512, NoDQI] in {
+def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+          (VBROADCASTF32X4rm addr:$src)>;
+def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+          (VBROADCASTI32X4rm addr:$src)>;
+
+def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
+          (VINSERTF64x4Zrr
+           (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v16f32 (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
+                                                    VR128X:$src, sub_xmm),
+                                     VR128X:$src, 1)), sub_ymm), 1)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
+          (VINSERTI64x4Zrr
+           (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v16i32 (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
+                                                    VR128X:$src, sub_xmm),
+                                     VR128X:$src, 1)), sub_ymm), 1)>;
+
+def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
+          (VBROADCASTF64X4rm addr:$src)>;
+def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
+          (VBROADCASTI64X4rm addr:$src)>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
+          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8f32 VR256X:$src), 1)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8i32 VR256X:$src), 1)>;
+}
+
 let Predicates = [HasDQI] in {
 defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
                        v8i64_info, v2i64x_info>, VEX_W,
@@ -1028,6 +1267,34 @@ defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
 defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
                        v16f32_info, v8f32x_info>,
                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
+
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
+          (VINSERTF32x8Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8f32 VR256X:$src), 1)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
+          (VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8i32 VR256X:$src), 1)>;
+
+def : Pat<(v16f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
+          (VINSERTF32x8Zrr
+           (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v16f32 (VINSERTF32x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
+                                                    VR128X:$src, sub_xmm),
+                                     VR128X:$src, 1)), sub_ymm), 1)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
+          (VINSERTI32x8Zrr
+           (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
+                                           VR128X:$src, sub_xmm),
+                            VR128X:$src, 1),
+           (EXTRACT_SUBREG
+            (v16i32 (VINSERTI32x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)),
+                                                    VR128X:$src, sub_xmm),
+                                     VR128X:$src, 1)), sub_ymm), 1)>;
 }
 
 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,
@@ -1049,10 +1316,10 @@ multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr,
                                       EVEX_V128;
 }
 
-defm VPBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
-                                           avx512vl_i32_info, avx512vl_i64_info>;
-defm VPBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
-                                           avx512vl_f32_info, avx512vl_f64_info>;
+defm VBROADCASTI32X2  : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2",
+                                          avx512vl_i32_info, avx512vl_i64_info>;
+defm VBROADCASTF32X2  : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2",
+                                          avx512vl_f32_info, avx512vl_f64_info>;
 
 def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))),
           (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>;
@@ -1091,112 +1358,105 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",
 
 //===----------------------------------------------------------------------===//
 // -- VPERMI2 - 3 source operands form --
-multiclass avx512_perm_i<bits<8> opc, string OpcodeStr,
-                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
-let Constraints = "$src1 = $dst" in {
-  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst),
+multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
+  // The index operand in the pattern should really be an integer type. However,
+  // if we do that and it happens to come from a bitcast, then it becomes
+  // difficult to find the bitcast needed to convert the index to the
+  // destination type for the passthru since it will be folded with the bitcast
+  // of the index operand.
+  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,
+          (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3)), 1>, EVEX_4V,
          AVX5128IBase;
 
-  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
             (ins _.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
-            (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2,
-                   (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,
+            (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,
+                   (_.VT (bitconvert (_.LdFrag addr:$src3))))), 1>,
             EVEX_4V, AVX5128IBase;
   }
 }
 multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr,
-                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
-  let Constraints = "$src1 = $dst" in
-  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst),
+                            X86VectorVTInfo _> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
+  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins _.RC:$src2, _.ScalarMemOp:$src3),
               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
               !strconcat("$src2, ${src3}", _.BroadcastStr ),
-              (_.VT (X86VPermi2X IdxVT.RC:$src1,
-               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
-              AVX5128IBase, EVEX_4V, EVEX_B;
+              (_.VT (X86VPermi2X _.RC:$src1,
+               _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
+              1>, AVX5128IBase, EVEX_4V, EVEX_B;
 }
 
 multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr,
-                               AVX512VLVectorVTInfo VTInfo,
-                               AVX512VLVectorVTInfo ShuffleMask> {
-  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
-                           ShuffleMask.info512>,
-            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512,
-                             ShuffleMask.info512>, EVEX_V512;
+                               AVX512VLVectorVTInfo VTInfo> {
+  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>,
+            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
   let Predicates = [HasVLX] in {
-  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
-                               ShuffleMask.info128>,
-                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128,
-                                  ShuffleMask.info128>, EVEX_V128;
-  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
-                               ShuffleMask.info256>,
-                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256,
-                                  ShuffleMask.info256>,  EVEX_V256;
+  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>,
+                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,
+                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;
   }
 }
 
 multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,
                                  AVX512VLVectorVTInfo VTInfo,
-                                 AVX512VLVectorVTInfo Idx,
                                  Predicate Prd> {
   let Predicates = [Prd] in
-  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512,
-                           Idx.info512>, EVEX_V512;
+  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;
   let Predicates = [Prd, HasVLX] in {
-  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128,
-                               Idx.info128>, EVEX_V128;
-  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256,
-                               Idx.info256>,  EVEX_V256;
+  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128;
+  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,  EVEX_V256;
   }
 }
 
 defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d",
-                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
 defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q",
-                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
 defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w",
-                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>,
+                  avx512vl_i16_info, HasBWI>,
                   VEX_W, EVEX_CD8<16, CD8VF>;
 defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b",
-                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>,
+                  avx512vl_i8_info, HasVBMI>,
                   EVEX_CD8<8, CD8VF>;
 defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps",
-                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;
+                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;
 defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd",
-                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
+                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;
 
 // VPERMT2
 multiclass avx512_perm_t<bits<8> opc, string OpcodeStr,
                          X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
-let Constraints = "$src1 = $dst" in {
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins IdxVT.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3))>, EVEX_4V,
-         AVX5128IBase;
+          (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2, _.RC:$src3)), 1>,
+          EVEX_4V, AVX5128IBase;
 
   defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
             (ins IdxVT.RC:$src2, _.MemOp:$src3),
             OpcodeStr, "$src3, $src2", "$src2, $src3",
             (_.VT (X86VPermt2 _.RC:$src1, IdxVT.RC:$src2,
-                   (bitconvert (_.LdFrag addr:$src3))))>,
+                   (bitconvert (_.LdFrag addr:$src3)))), 1>,
             EVEX_4V, AVX5128IBase;
   }
 }
 multiclass avx512_perm_t_mb<bits<8> opc, string OpcodeStr,
                             X86VectorVTInfo _, X86VectorVTInfo IdxVT> {
-  let Constraints = "$src1 = $dst" in
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
   defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
               (ins IdxVT.RC:$src2, _.ScalarMemOp:$src3),
               OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
               !strconcat("$src2, ${src3}", _.BroadcastStr ),
               (_.VT (X86VPermt2 _.RC:$src1,
-               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,
-              AVX5128IBase, EVEX_4V, EVEX_B;
+               IdxVT.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))),
+              1>, AVX5128IBase, EVEX_4V, EVEX_B;
 }
 
 multiclass avx512_perm_t_sizes<bits<8> opc, string OpcodeStr,
@@ -1263,7 +1523,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
              !strconcat(OpcodeStr,
              "\t{$src2, $src1, ${dst} {${mask}}|${dst} {${mask}}, $src1, $src2}"),
-             [(set _.RC:$dst, (vselect _.KRCWM:$mask, 
+             [(set _.RC:$dst, (vselect _.KRCWM:$mask,
                                 (_.VT _.RC:$src2),
                                 (_.VT _.RC:$src1)))]>, EVEX_4V, EVEX_K;
   let hasSideEffects = 0 in
@@ -1354,15 +1614,15 @@ def : Pat<(v8f32 (vselect (v8i1 VK8WM:$mask), (v8f32 VR256X:$src1),
                             (v8f32 VR256X:$src2))),
             (EXTRACT_SUBREG
               (v16f32 (VBLENDMPSZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
-            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
 
 def : Pat<(v8i32 (vselect (v8i1 VK8WM:$mask), (v8i32 VR256X:$src1),
                             (v8i32 VR256X:$src2))),
             (EXTRACT_SUBREG
                 (v16i32 (VPBLENDMDZrrk (COPY_TO_REGCLASS VK8WM:$mask, VK16WM),
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)))), sub_ymm)>;
 }
 //===----------------------------------------------------------------------===//
 // Compare Instructions
@@ -1421,6 +1681,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
   }// let isAsmParserOnly = 1, hasSideEffects = 0
 
   let isCodeGenOnly = 1 in {
+    let isCommutable = 1 in
     def rr : AVX512Ii8<0xC2, MRMSrcReg,
                 (outs _.KRC:$dst), (ins _.FRC:$src1, _.FRC:$src2, AVXCC:$cc),
                 !strconcat("vcmp${cc}", _.Suffix,
@@ -1449,7 +1710,8 @@ let Predicates = [HasAVX512] in {
 }
 
 multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
-              X86VectorVTInfo _> {
+              X86VectorVTInfo _, bit IsCommutable> {
+  let isCommutable = IsCommutable in
   def rr : AVX512BI<opc, MRMSrcReg,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -1480,8 +1742,8 @@ multiclass avx512_icmp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
-              X86VectorVTInfo _> :
-           avx512_icmp_packed<opc, OpcodeStr, OpNode, _> {
+              X86VectorVTInfo _, bit IsCommutable> :
+           avx512_icmp_packed<opc, OpcodeStr, OpNode, _, IsCommutable> {
   def rmb : AVX512BI<opc, MRMSrcMem,
               (outs _.KRC:$dst), (ins _.RC:$src1, _.ScalarMemOp:$src2),
               !strconcat(OpcodeStr, "\t{${src2}", _.BroadcastStr, ", $src1, $dst",
@@ -1503,48 +1765,49 @@ multiclass avx512_icmp_packed_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 multiclass avx512_icmp_packed_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                 AVX512VLVectorVTInfo VTInfo, Predicate prd> {
+                                 AVX512VLVectorVTInfo VTInfo, Predicate prd,
+                                 bit IsCommutable = 0> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512>,
-           EVEX_V512;
+  defm Z : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info512,
+                              IsCommutable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256>,
-                EVEX_V256;
-    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128>,
-                EVEX_V128;
+    defm Z256 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info256,
+                                   IsCommutable>, EVEX_V256;
+    defm Z128 : avx512_icmp_packed<opc, OpcodeStr, OpNode, VTInfo.info128,
+                                   IsCommutable>, EVEX_V128;
   }
 }
 
 multiclass avx512_icmp_packed_rmb_vl<bits<8> opc, string OpcodeStr,
                                   SDNode OpNode, AVX512VLVectorVTInfo VTInfo,
-                                  Predicate prd> {
+                                  Predicate prd, bit IsCommutable = 0> {
   let Predicates = [prd] in
-  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512>,
-           EVEX_V512;
+  defm Z : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info512,
+                                  IsCommutable>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
-    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256>,
-                EVEX_V256;
-    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128>,
-                EVEX_V128;
+    defm Z256 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info256,
+                                       IsCommutable>, EVEX_V256;
+    defm Z128 : avx512_icmp_packed_rmb<opc, OpcodeStr, OpNode, VTInfo.info128,
+                                       IsCommutable>, EVEX_V128;
   }
 }
 
 defm VPCMPEQB : avx512_icmp_packed_vl<0x74, "vpcmpeqb", X86pcmpeqm,
-                      avx512vl_i8_info, HasBWI>,
+                      avx512vl_i8_info, HasBWI, 1>,
                 EVEX_CD8<8, CD8VF>;
 
 defm VPCMPEQW : avx512_icmp_packed_vl<0x75, "vpcmpeqw", X86pcmpeqm,
-                      avx512vl_i16_info, HasBWI>,
+                      avx512vl_i16_info, HasBWI, 1>,
                 EVEX_CD8<16, CD8VF>;
 
 defm VPCMPEQD : avx512_icmp_packed_rmb_vl<0x76, "vpcmpeqd", X86pcmpeqm,
-                      avx512vl_i32_info, HasAVX512>,
+                      avx512vl_i32_info, HasAVX512, 1>,
                 EVEX_CD8<32, CD8VF>;
 
 defm VPCMPEQQ : avx512_icmp_packed_rmb_vl<0x29, "vpcmpeqq", X86pcmpeqm,
-                      avx512vl_i64_info, HasAVX512>,
+                      avx512vl_i64_info, HasAVX512, 1>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 defm VPCMPGTB : avx512_icmp_packed_vl<0x64, "vpcmpgtb", X86pcmpgtm,
@@ -1563,18 +1826,21 @@ defm VPCMPGTQ : avx512_icmp_packed_rmb_vl<0x37, "vpcmpgtq", X86pcmpgtm,
                       avx512vl_i64_info, HasAVX512>,
                 T8PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
+let Predicates = [HasAVX512, NoVLX] in {
 def : Pat<(v8i1 (X86pcmpgtm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
             (COPY_TO_REGCLASS (VPCMPGTDZrr
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
 
 def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))),
             (COPY_TO_REGCLASS (VPCMPEQDZrr
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>;
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm))), VK8)>;
+}
 
 multiclass avx512_icmp_cc<bits<8> opc, string Suffix, SDNode OpNode,
                           X86VectorVTInfo _> {
+  let isCommutable = 1 in
   def rri : AVX512AIi8<opc, MRMSrcReg,
              (outs _.KRC:$dst), (ins _.RC:$src1, _.RC:$src2, AVX512ICC:$cc),
              !strconcat("vpcmp${cc}", Suffix,
@@ -1740,7 +2006,7 @@ multiclass avx512_vcmp_common<X86VectorVTInfo _> {
                    "$src2, $src1", "$src1, $src2",
                    (X86cmpm (_.VT _.RC:$src1),
                          (_.VT _.RC:$src2),
-                           imm:$cc)>;
+                           imm:$cc), 1>;
 
   defm  rmi  : AVX512_maskable_cmp<0xC2, MRMSrcMem, _,
                 (outs _.KRC:$dst),(ins _.RC:$src1, _.MemOp:$src2, AVXCC:$cc),
@@ -1824,18 +2090,18 @@ defm VCMPPS : avx512_vcmp<avx512vl_f32_info>,
 
 def : Pat<(v8i1 (X86cmpm (v8f32 VR256X:$src1), (v8f32 VR256X:$src2), imm:$cc)),
           (COPY_TO_REGCLASS (VCMPPSZrri
-            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
-            (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+            (v16f32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
             imm:$cc), VK8)>;
 def : Pat<(v8i1 (X86cmpm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
           (COPY_TO_REGCLASS (VPCMPDZrri
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
             imm:$cc), VK8)>;
 def : Pat<(v8i1 (X86cmpmu (v8i32 VR256X:$src1), (v8i32 VR256X:$src2), imm:$cc)),
           (COPY_TO_REGCLASS (VPCMPUDZrri
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)),
-            (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src1, sub_ymm)),
+            (v16i32 (INSERT_SUBREG (IMPLICIT_DEF), VR256X:$src2, sub_ymm)),
             imm:$cc), VK8)>;
 
 // ----------------------------------------------------------------
@@ -2011,34 +2277,38 @@ let Predicates = [HasBWI] in {
 }
 
 // GR from/to mask register
-let Predicates = [HasDQI] in {
-  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
-            (KMOVBkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit))>;
-  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
-            (EXTRACT_SUBREG (KMOVBrk VK8:$src), sub_8bit)>;
-  def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
-            (KMOVBrk VK8:$src)>;
-  def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
-            (KMOVBrk VK8:$src)>;
-}
-let Predicates = [HasAVX512] in {
-  def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
-            (KMOVWkr (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit))>;
-  def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
-            (EXTRACT_SUBREG (KMOVWrk VK16:$src), sub_16bit)>;
-  def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
-            (KMOVWrk VK16:$src)>;
-  def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
-            (KMOVWrk VK16:$src)>;
-}
-let Predicates = [HasBWI] in {
-  def : Pat<(v32i1 (bitconvert (i32 GR32:$src))), (KMOVDkr GR32:$src)>;
-  def : Pat<(i32 (bitconvert (v32i1 VK32:$src))), (KMOVDrk VK32:$src)>;
-}
-let Predicates = [HasBWI] in {
-  def : Pat<(v64i1 (bitconvert (i64 GR64:$src))), (KMOVQkr GR64:$src)>;
-  def : Pat<(i64 (bitconvert (v64i1 VK64:$src))), (KMOVQrk VK64:$src)>;
-}
+def : Pat<(v16i1 (bitconvert (i16 GR16:$src))),
+          (COPY_TO_REGCLASS GR16:$src, VK16)>;
+def : Pat<(i16 (bitconvert (v16i1 VK16:$src))),
+          (COPY_TO_REGCLASS VK16:$src, GR16)>;
+
+def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
+          (COPY_TO_REGCLASS GR8:$src, VK8)>;
+def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
+          (COPY_TO_REGCLASS VK8:$src, GR8)>;
+
+def : Pat<(i32 (zext (i16 (bitconvert (v16i1 VK16:$src))))),
+          (KMOVWrk VK16:$src)>;
+def : Pat<(i32 (anyext (i16 (bitconvert (v16i1 VK16:$src))))),
+          (i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                (i16 (COPY_TO_REGCLASS VK16:$src, GR16)), sub_16bit))>;
+
+def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
+          (MOVZX32rr8 (COPY_TO_REGCLASS VK8:$src, GR8))>, Requires<[NoDQI]>;
+def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
+          (KMOVBrk VK8:$src)>, Requires<[HasDQI]>;
+def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
+          (i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                (i8 (COPY_TO_REGCLASS VK8:$src, GR8)), sub_8bit))>;
+
+def : Pat<(v32i1 (bitconvert (i32 GR32:$src))),
+          (COPY_TO_REGCLASS GR32:$src, VK32)>;
+def : Pat<(i32 (bitconvert (v32i1 VK32:$src))),
+          (COPY_TO_REGCLASS VK32:$src, GR32)>;
+def : Pat<(v64i1 (bitconvert (i64 GR64:$src))),
+          (COPY_TO_REGCLASS GR64:$src, VK64)>;
+def : Pat<(i64 (bitconvert (v64i1 VK64:$src))),
+          (COPY_TO_REGCLASS VK64:$src, GR64)>;
 
 // Load/store kreg
 let Predicates = [HasDQI] in {
@@ -2104,65 +2374,58 @@ let Predicates = [HasBWI] in {
             (KMOVQkm addr:$src)>;
 }
 
-def assertzext_i1 : PatFrag<(ops node:$src), (assertzext node:$src), [{
-  return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
-}]>;
-
 let Predicates = [HasAVX512] in {
   def : Pat<(i1 (trunc (i64 GR64:$src))),
-            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND64ri8 $src, (i64 1)),
-                                    sub_16bit)), VK1)>;
-
-  def : Pat<(i1 (trunc (i64 (assertzext_i1 GR64:$src)))),
-            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
+            (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
+                                        (i32 1))), VK1)>;
 
   def : Pat<(i1 (trunc (i32 GR32:$src))),
-            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG (AND32ri8 $src, (i32 1)),
-                                    sub_16bit)), VK1)>;
+            (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
 
   def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
-            (COPY_TO_REGCLASS (i16 (EXTRACT_SUBREG $src, sub_16bit)), VK1)>;
+            (COPY_TO_REGCLASS GR32:$src, VK1)>;
 
   def : Pat<(i1 (trunc (i8 GR8:$src))),
-            (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), (AND8ri $src, (i8 1)),
-                                    sub_8bit)), VK1)>;
-
-  def : Pat<(i1 (trunc (i8 (assertzext_i1 GR8:$src)))),
-            (COPY_TO_REGCLASS (i16 (SUBREG_TO_REG (i64 0), $src, sub_8bit)), VK1)>;
+       (COPY_TO_REGCLASS
+        (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                          GR8:$src, sub_8bit), (i32 1))),
+       VK1)>;
 
   def : Pat<(i1 (trunc (i16 GR16:$src))),
-            (COPY_TO_REGCLASS (AND16ri GR16:$src, (i16 1)), VK1)>;
-
-  def : Pat<(i1 (trunc (i16 (assertzext_i1 GR16:$src)))),
-            (COPY_TO_REGCLASS $src, VK1)>;
+       (COPY_TO_REGCLASS
+        (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                          GR16:$src, sub_16bit), (i32 1))),
+       VK1)>;
 
   def : Pat<(i32 (zext VK1:$src)),
-            (i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
-                  sub_16bit))>;
+            (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
 
   def : Pat<(i32 (anyext VK1:$src)),
-            (i32 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
-                  sub_16bit))>;
+            (COPY_TO_REGCLASS VK1:$src, GR32)>;
 
   def : Pat<(i8 (zext VK1:$src)),
-            (i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS VK1:$src, GR16)), sub_8bit))>;
+            (EXTRACT_SUBREG
+             (AND32ri8 (KMOVWrk
+                        (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
 
   def : Pat<(i8 (anyext VK1:$src)),
-            (i8 (EXTRACT_SUBREG (i16 (COPY_TO_REGCLASS $src, GR16)), sub_8bit))>;
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
 
   def : Pat<(i64 (zext VK1:$src)),
-            (i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
-                  sub_16bit))>;
+            (AND64ri8 (SUBREG_TO_REG (i64 0),
+             (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
 
   def : Pat<(i64 (anyext VK1:$src)),
-            (i64 (SUBREG_TO_REG (i64 0), (i16 (COPY_TO_REGCLASS $src, GR16)),
-                  sub_16bit))>;
+            (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
+             (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_32bit)>;
 
   def : Pat<(i16 (zext VK1:$src)),
-            (COPY_TO_REGCLASS $src, GR16)>;
+            (EXTRACT_SUBREG
+             (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
+              sub_16bit)>;
 
   def : Pat<(i16 (anyext VK1:$src)),
-            (i16 (COPY_TO_REGCLASS $src, GR16))>;
+            (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;
 }
 def : Pat<(v16i1 (scalar_to_vector VK1:$src)),
           (COPY_TO_REGCLASS VK1:$src, VK16)>;
@@ -2181,34 +2444,12 @@ def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>;
 def : Pat<(store (i1  0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>;
 
-// With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
-let Predicates = [HasAVX512, NoDQI] in {
-  // GR from/to 8-bit mask without native support
-  def : Pat<(v8i1 (bitconvert (i8 GR8:$src))),
-            (COPY_TO_REGCLASS
-             (KMOVWkr (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), VK8)>;
-  def : Pat<(i8 (bitconvert (v8i1 VK8:$src))),
-            (EXTRACT_SUBREG
-              (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16)),
-              sub_8bit)>;
-  def : Pat<(i32 (zext (i8 (bitconvert (v8i1 VK8:$src))))),
-            (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>;
-  def : Pat<(i32 (anyext (i8 (bitconvert (v8i1 VK8:$src))))),
-            (KMOVWrk (COPY_TO_REGCLASS VK8:$src, VK16))>;
-}
-
-let Predicates = [HasAVX512] in {
-  def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))),
-            (COPY_TO_REGCLASS VK16:$src, VK1)>;
-  def : Pat<(i1 (X86Vextract VK8:$src, (iPTR 0))),
-            (COPY_TO_REGCLASS VK8:$src, VK1)>;
-}
-let Predicates = [HasBWI] in {
-  def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))),
-            (COPY_TO_REGCLASS VK32:$src, VK1)>;
-  def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))),
-            (COPY_TO_REGCLASS VK64:$src, VK1)>;
-}
+def : Pat<(i1 (X86Vextract VK64:$src, (iPTR 0))), (COPY_TO_REGCLASS VK64:$src, VK1)>;
+def : Pat<(i1 (X86Vextract VK32:$src, (iPTR 0))), (COPY_TO_REGCLASS VK32:$src, VK1)>;
+def : Pat<(i1 (X86Vextract VK16:$src, (iPTR 0))), (COPY_TO_REGCLASS VK16:$src, VK1)>;
+def : Pat<(i1 (X86Vextract VK8:$src,  (iPTR 0))), (COPY_TO_REGCLASS VK8:$src,  VK1)>;
+def : Pat<(i1 (X86Vextract VK4:$src,  (iPTR 0))), (COPY_TO_REGCLASS VK4:$src,  VK1)>;
+def : Pat<(i1 (X86Vextract VK2:$src,  (iPTR 0))), (COPY_TO_REGCLASS VK2:$src,  VK1)>;
 
 // Mask unary operation
 // - KNOT
@@ -2233,7 +2474,7 @@ multiclass avx512_mask_unop_all<bits<8> opc, string OpcodeStr,
                             HasBWI>, VEX, PS, VEX_W;
 }
 
-defm KNOT : avx512_mask_unop_all<0x44, "knot", not>;
+defm KNOT : avx512_mask_unop_all<0x44, "knot", vnot>;
 
 multiclass avx512_mask_unop_int<string IntName, string InstName> {
   let Predicates = [HasAVX512] in
@@ -2244,27 +2485,15 @@ multiclass avx512_mask_unop_int<string IntName, string InstName> {
 }
 defm : avx512_mask_unop_int<"knot", "KNOT">;
 
-let Predicates = [HasDQI] in
-def : Pat<(xor VK8:$src1, (v8i1 immAllOnesV)), (KNOTBrr VK8:$src1)>;
-let Predicates = [HasAVX512] in
-def : Pat<(xor VK16:$src1, (v16i1 immAllOnesV)), (KNOTWrr VK16:$src1)>;
-let Predicates = [HasBWI] in
-def : Pat<(xor VK32:$src1, (v32i1 immAllOnesV)), (KNOTDrr VK32:$src1)>;
-let Predicates = [HasBWI] in
-def : Pat<(xor VK64:$src1, (v64i1 immAllOnesV)), (KNOTQrr VK64:$src1)>;
-
 // KNL does not support KMOVB, 8-bit mask is promoted to 16-bit
-let Predicates = [HasAVX512, NoDQI] in {
-def : Pat<(xor VK8:$src1,  (v8i1 immAllOnesV)),
-          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src1, VK16)), VK8)>;
-def : Pat<(not VK8:$src),
-          (COPY_TO_REGCLASS
-            (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
-}
-def : Pat<(xor VK4:$src1,  (v4i1 immAllOnesV)),
-          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src1, VK16)), VK4)>;
-def : Pat<(xor VK2:$src1,  (v2i1 immAllOnesV)),
-          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src1, VK16)), VK2)>;
+let Predicates = [HasAVX512, NoDQI] in
+def : Pat<(vnot VK8:$src),
+          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$src, VK16)), VK8)>;
+
+def : Pat<(vnot VK4:$src),
+          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK4:$src, VK16)), VK4)>;
+def : Pat<(vnot VK2:$src),
+          (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK2:$src, VK16)), VK2)>;
 
 // Mask binary operation
 // - KAND, KANDN, KOR, KXNOR, KXOR
@@ -2293,13 +2522,16 @@ multiclass avx512_mask_binop_all<bits<8> opc, string OpcodeStr,
 
 def andn : PatFrag<(ops node:$i0, node:$i1), (and (not node:$i0), node:$i1)>;
 def xnor : PatFrag<(ops node:$i0, node:$i1), (not (xor node:$i0, node:$i1))>;
+// These nodes use 'vnot' instead of 'not' to support vectors.
+def vandn : PatFrag<(ops node:$i0, node:$i1), (and (vnot node:$i0), node:$i1)>;
+def vxnor : PatFrag<(ops node:$i0, node:$i1), (vnot (xor node:$i0, node:$i1))>;
 
-defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,  1>;
-defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,   1>;
-defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", xnor, 1>;
-defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,  1>;
-defm KANDN : avx512_mask_binop_all<0x42, "kandn", andn, 0>;
-defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,  1, HasDQI>;
+defm KAND  : avx512_mask_binop_all<0x41, "kand",  and,   1>;
+defm KOR   : avx512_mask_binop_all<0x45, "kor",   or,    1>;
+defm KXNOR : avx512_mask_binop_all<0x46, "kxnor", vxnor, 1>;
+defm KXOR  : avx512_mask_binop_all<0x47, "kxor",  xor,   1>;
+defm KANDN : avx512_mask_binop_all<0x42, "kandn", vandn, 0>;
+defm KADD  : avx512_mask_binop_all<0x4A, "kadd",  add,   1, HasDQI>;
 
 multiclass avx512_mask_binop_int<string IntName, string InstName> {
   let Predicates = [HasAVX512] in
@@ -2316,11 +2548,12 @@ defm : avx512_mask_binop_int<"kor",   "KOR">;
 defm : avx512_mask_binop_int<"kxnor", "KXNOR">;
 defm : avx512_mask_binop_int<"kxor",  "KXOR">;
 
-multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
+multiclass avx512_binop_pat<SDPatternOperator VOpNode, SDPatternOperator OpNode,
+                            Instruction Inst> {
   // With AVX512F, 8-bit mask is promoted to 16-bit mask,
   // for the DQI set, this type is legal and KxxxB instruction is used
   let Predicates = [NoDQI] in
-  def : Pat<(OpNode VK8:$src1, VK8:$src2),
+  def : Pat<(VOpNode VK8:$src1, VK8:$src2),
             (COPY_TO_REGCLASS
               (Inst (COPY_TO_REGCLASS VK8:$src1, VK16),
                     (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
@@ -2330,47 +2563,21 @@ multiclass avx512_binop_pat<SDPatternOperator OpNode, Instruction Inst> {
         (COPY_TO_REGCLASS (Inst
                            (COPY_TO_REGCLASS VK1:$src1, VK16),
                            (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
-  def : Pat<(OpNode VK2:$src1, VK2:$src2),
+  def : Pat<(VOpNode VK2:$src1, VK2:$src2),
         (COPY_TO_REGCLASS (Inst
                            (COPY_TO_REGCLASS VK2:$src1, VK16),
                            (COPY_TO_REGCLASS VK2:$src2, VK16)), VK1)>;
-  def : Pat<(OpNode VK4:$src1, VK4:$src2),
+  def : Pat<(VOpNode VK4:$src1, VK4:$src2),
         (COPY_TO_REGCLASS (Inst
                            (COPY_TO_REGCLASS VK4:$src1, VK16),
                            (COPY_TO_REGCLASS VK4:$src2, VK16)), VK1)>;
 }
 
-defm : avx512_binop_pat<and,  KANDWrr>;
-defm : avx512_binop_pat<andn, KANDNWrr>;
-defm : avx512_binop_pat<or,   KORWrr>;
-defm : avx512_binop_pat<xnor, KXNORWrr>;
-defm : avx512_binop_pat<xor,  KXORWrr>;
-
-def : Pat<(xor (xor VK16:$src1, VK16:$src2), (v16i1 immAllOnesV)),
-          (KXNORWrr VK16:$src1, VK16:$src2)>;
-def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
-          (KXNORBrr VK8:$src1, VK8:$src2)>, Requires<[HasDQI]>;
-def : Pat<(xor (xor VK32:$src1, VK32:$src2), (v32i1 immAllOnesV)),
-          (KXNORDrr VK32:$src1, VK32:$src2)>, Requires<[HasBWI]>;
-def : Pat<(xor (xor VK64:$src1, VK64:$src2), (v64i1 immAllOnesV)),
-          (KXNORQrr VK64:$src1, VK64:$src2)>, Requires<[HasBWI]>;
-
-let Predicates = [NoDQI] in
-def : Pat<(xor (xor VK8:$src1, VK8:$src2), (v8i1 immAllOnesV)),
-          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK8:$src1, VK16),
-                             (COPY_TO_REGCLASS VK8:$src2, VK16)), VK8)>;
-
-def : Pat<(xor (xor VK4:$src1, VK4:$src2), (v4i1 immAllOnesV)),
-          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK4:$src1, VK16),
-                             (COPY_TO_REGCLASS VK4:$src2, VK16)), VK4)>;
-
-def : Pat<(xor (xor VK2:$src1, VK2:$src2), (v2i1 immAllOnesV)),
-          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK2:$src1, VK16),
-                             (COPY_TO_REGCLASS VK2:$src2, VK16)), VK2)>;
-
-def : Pat<(xor (xor VK1:$src1, VK1:$src2), (i1 1)),
-          (COPY_TO_REGCLASS (KXNORWrr (COPY_TO_REGCLASS VK1:$src1, VK16),
-                             (COPY_TO_REGCLASS VK1:$src2, VK16)), VK1)>;
+defm : avx512_binop_pat<and,   and,  KANDWrr>;
+defm : avx512_binop_pat<vandn, andn, KANDNWrr>;
+defm : avx512_binop_pat<or,    or,   KORWrr>;
+defm : avx512_binop_pat<vxnor, xnor, KXNORWrr>;
+defm : avx512_binop_pat<xor,   xor,  KXORWrr>;
 
 // Mask unpacking
 multiclass avx512_mask_unpck<string Suffix,RegisterClass KRC, ValueType VT,
@@ -2466,6 +2673,8 @@ defm KSET1 : avx512_mask_setop_w<immAllOnesV>;
 // With AVX-512 only, 8-bit mask is promoted to 16-bit mask.
 let Predicates = [HasAVX512] in {
   def : Pat<(v8i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK8)>;
+  def : Pat<(v4i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK4)>;
+  def : Pat<(v2i1 immAllZerosV), (COPY_TO_REGCLASS (KSET0W), VK2)>;
   def : Pat<(v8i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK8)>;
   def : Pat<(v4i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK4)>;
   def : Pat<(v2i1 immAllOnesV),  (COPY_TO_REGCLASS (KSET1W), VK2)>;
@@ -2519,15 +2728,24 @@ def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 16))),
 def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 32))),
           (v32i1 (COPY_TO_REGCLASS (KSHIFTRQri VK64:$src, (i8 32)), VK32))>;
 
-def : Pat<(v8i1 (X86vshli VK8:$src, (i8 imm:$imm))),
-          (v8i1 (COPY_TO_REGCLASS
-                 (KSHIFTLWri (COPY_TO_REGCLASS VK8:$src, VK16),
-                  (I8Imm $imm)), VK8))>, Requires<[HasAVX512, NoDQI]>;
 
-def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
-          (v4i1 (COPY_TO_REGCLASS
-                 (KSHIFTLWri (COPY_TO_REGCLASS VK4:$src, VK16),
-                  (I8Imm $imm)), VK4))>, Requires<[HasAVX512]>;
+// Patterns for kmask shift
+multiclass mask_shift_lowering<RegisterClass RC, ValueType VT> {
+  def : Pat<(VT (X86vshli RC:$src, (i8 imm:$imm))),
+            (VT (COPY_TO_REGCLASS
+                   (KSHIFTLWri (COPY_TO_REGCLASS RC:$src, VK16),
+                               (I8Imm $imm)),
+                   RC))>;
+  def : Pat<(VT (X86vsrli RC:$src, (i8 imm:$imm))),
+            (VT (COPY_TO_REGCLASS
+                   (KSHIFTRWri (COPY_TO_REGCLASS RC:$src, VK16),
+                               (I8Imm $imm)),
+                   RC))>;
+}
+
+defm : mask_shift_lowering<VK8, v8i1>, Requires<[HasAVX512, NoDQI]>;
+defm : mask_shift_lowering<VK4, v4i1>, Requires<[HasAVX512]>;
+defm : mask_shift_lowering<VK2, v2i1>, Requires<[HasAVX512]>;
 //===----------------------------------------------------------------------===//
 // AVX-512 - Aligned and unaligned load and store
 //
@@ -2535,7 +2753,6 @@ def : Pat<(v4i1 (X86vshli VK4:$src, (i8 imm:$imm))),
 
 multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          PatFrag ld_frag, PatFrag mload,
-                         bit IsReMaterializable = 1,
                          SDPatternOperator SelectOprr = vselect> {
   let hasSideEffects = 0 in {
   def rr : AVX512PI<opc, MRMSrcReg, (outs _.RC:$dst), (ins _.RC:$src),
@@ -2550,7 +2767,7 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                                            _.ImmAllZerosV)))], _.ExeDomain>,
                        EVEX, EVEX_KZ;
 
-  let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable,
+  let canFoldAsLoad = 1, isReMaterializable = 1,
       SchedRW = [WriteLoad] in
   def rm : AVX512PI<opc, MRMSrcMem, (outs _.RC:$dst), (ins _.MemOp:$src),
                     !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
@@ -2598,37 +2815,32 @@ multiclass avx512_load<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 
 multiclass avx512_alignedload_vl<bits<8> opc, string OpcodeStr,
                                   AVX512VLVectorVTInfo _,
-                                  Predicate prd,
-                                  bit IsReMaterializable = 1> {
+                                  Predicate prd> {
   let Predicates = [prd] in
   defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.AlignedLdFrag,
-                       masked_load_aligned512, IsReMaterializable>, EVEX_V512;
+                       masked_load_aligned512>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
   defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.AlignedLdFrag,
-                          masked_load_aligned256, IsReMaterializable>, EVEX_V256;
+                          masked_load_aligned256>, EVEX_V256;
   defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.AlignedLdFrag,
-                          masked_load_aligned128, IsReMaterializable>, EVEX_V128;
+                          masked_load_aligned128>, EVEX_V128;
   }
 }
 
 multiclass avx512_load_vl<bits<8> opc, string OpcodeStr,
                                   AVX512VLVectorVTInfo _,
                                   Predicate prd,
-                                  bit IsReMaterializable = 1,
                                   SDPatternOperator SelectOprr = vselect> {
   let Predicates = [prd] in
   defm Z : avx512_load<opc, OpcodeStr, _.info512, _.info512.LdFrag,
-                       masked_load_unaligned, IsReMaterializable,
-                       SelectOprr>, EVEX_V512;
+                       masked_load_unaligned, SelectOprr>, EVEX_V512;
 
   let Predicates = [prd, HasVLX] in {
   defm Z256 : avx512_load<opc, OpcodeStr, _.info256, _.info256.LdFrag,
-                         masked_load_unaligned, IsReMaterializable,
-                         SelectOprr>, EVEX_V256;
+                         masked_load_unaligned, SelectOprr>, EVEX_V256;
   defm Z128 : avx512_load<opc, OpcodeStr, _.info128, _.info128.LdFrag,
-                         masked_load_unaligned, IsReMaterializable,
-                         SelectOprr>, EVEX_V128;
+                         masked_load_unaligned, SelectOprr>, EVEX_V128;
   }
 }
 
@@ -2704,11 +2916,11 @@ defm VMOVAPD : avx512_alignedload_vl<0x28, "vmovapd", avx512vl_f64_info,
                                      HasAVX512>, PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 defm VMOVUPS : avx512_load_vl<0x10, "vmovups", avx512vl_f32_info, HasAVX512,
-                              1, null_frag>,
+                              null_frag>,
                avx512_store_vl<0x11, "vmovups", avx512vl_f32_info, HasAVX512>,
                               PS, EVEX_CD8<32, CD8VF>;
 
-defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0,
+defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512,
                               null_frag>,
                avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
                PD, VEX_W, EVEX_CD8<64, CD8VF>;
@@ -2732,15 +2944,41 @@ defm VMOVDQU16 : avx512_load_vl<0x6F, "vmovdqu16", avx512vl_i16_info, HasBWI>,
                                  HasBWI>, XD, VEX_W, EVEX_CD8<16, CD8VF>;
 
 defm VMOVDQU32 : avx512_load_vl<0x6F, "vmovdqu32", avx512vl_i32_info, HasAVX512,
-                                1, null_frag>,
+                                null_frag>,
                  avx512_store_vl<0x7F, "vmovdqu32", avx512vl_i32_info,
                                  HasAVX512>, XS, EVEX_CD8<32, CD8VF>;
 
 defm VMOVDQU64 : avx512_load_vl<0x6F, "vmovdqu64", avx512vl_i64_info, HasAVX512,
-                                1, null_frag>,
+                                null_frag>,
                  avx512_store_vl<0x7F, "vmovdqu64", avx512vl_i64_info,
                                  HasAVX512>, XS, VEX_W, EVEX_CD8<64, CD8VF>;
 
+// Special instructions to help with spilling when we don't have VLX. We need
+// to load or store from a ZMM register instead. These are converted in
+// expandPostRAPseudos.
+let isReMaterializable = 1, canFoldAsLoad = 1,
+    isPseudo = 1, SchedRW = [WriteLoad], mayLoad = 1, hasSideEffects = 0 in {
+def VMOVAPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
+                            "", []>;
+def VMOVAPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
+                            "", []>;
+def VMOVUPSZ128rm_NOVLX : I<0, Pseudo, (outs VR128X:$dst), (ins f128mem:$src),
+                            "", []>;
+def VMOVUPSZ256rm_NOVLX : I<0, Pseudo, (outs VR256X:$dst), (ins f256mem:$src),
+                            "", []>;
+}
+
+let isPseudo = 1, mayStore = 1, hasSideEffects = 0 in {
+def VMOVAPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
+                            "", []>;
+def VMOVAPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
+                            "", []>;
+def VMOVUPSZ128mr_NOVLX : I<0, Pseudo, (outs), (ins f128mem:$dst, VR128X:$src),
+                            "", []>;
+def VMOVUPSZ256mr_NOVLX : I<0, Pseudo, (outs), (ins f256mem:$dst, VR256X:$src),
+                            "", []>;
+}
+
 def : Pat<(v8i64 (vselect VK8WM:$mask, (bc_v8i64 (v16i32 immAllZerosV)),
                           (v8i64 VR512:$src))),
    (VMOVDQA64Zrrkz (COPY_TO_REGCLASS (KNOTWrr (COPY_TO_REGCLASS VK8:$mask, VK16)),
@@ -2761,6 +2999,28 @@ def : Pat<(v16i32 (vselect (xor VK16:$mask, (v16i1 immAllOnesV)),
                            (v16i32 VR512:$src))),
                   (VMOVDQA32Zrrkz VK16WM:$mask, VR512:$src)>;
 
+let Predicates = [HasVLX, NoBWI] in {
+  // 128-bit load/store without BWI.
+  def : Pat<(alignedstore (v8i16 VR128X:$src), addr:$dst),
+            (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
+  def : Pat<(alignedstore (v16i8 VR128X:$src), addr:$dst),
+            (VMOVDQA32Z128mr addr:$dst, VR128X:$src)>;
+  def : Pat<(store (v8i16 VR128X:$src), addr:$dst),
+            (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
+  def : Pat<(store (v16i8 VR128X:$src), addr:$dst),
+            (VMOVDQU32Z128mr addr:$dst, VR128X:$src)>;
+
+  // 256-bit load/store without BWI.
+  def : Pat<(alignedstore256 (v16i16 VR256X:$src), addr:$dst),
+            (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
+  def : Pat<(alignedstore256 (v32i8 VR256X:$src), addr:$dst),
+            (VMOVDQA32Z256mr addr:$dst, VR256X:$src)>;
+  def : Pat<(store (v16i16 VR256X:$src), addr:$dst),
+            (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
+  def : Pat<(store (v32i8 VR256X:$src), addr:$dst),
+            (VMOVDQU32Z256mr addr:$dst, VR256X:$src)>;
+}
+
 let Predicates = [HasVLX] in {
   // Special patterns for storing subvector extracts of lower 128-bits of 256.
   // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
@@ -2844,23 +3104,23 @@ let Predicates = [HasVLX] in {
 
   // Special patterns for storing subvector extracts of lower 256-bits of 512.
   // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr
-  def : Pat<(alignedstore (v4f64 (extract_subvector
-                                  (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
+  def : Pat<(alignedstore256 (v4f64 (extract_subvector
+                                     (v8f64 VR512:$src), (iPTR 0))), addr:$dst),
      (VMOVAPDZ256mr addr:$dst, (v4f64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
   def : Pat<(alignedstore (v8f32 (extract_subvector
                                   (v16f32 VR512:$src), (iPTR 0))), addr:$dst),
      (VMOVAPSZ256mr addr:$dst, (v8f32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
-  def : Pat<(alignedstore (v4i64 (extract_subvector
-                                  (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
+  def : Pat<(alignedstore256 (v4i64 (extract_subvector
+                                     (v8i64 VR512:$src), (iPTR 0))), addr:$dst),
      (VMOVDQA64Z256mr addr:$dst, (v4i64 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
-  def : Pat<(alignedstore (v8i32 (extract_subvector
-                                  (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
+  def : Pat<(alignedstore256 (v8i32 (extract_subvector
+                                     (v16i32 VR512:$src), (iPTR 0))), addr:$dst),
      (VMOVDQA32Z256mr addr:$dst, (v8i32 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
-  def : Pat<(alignedstore (v16i16 (extract_subvector
-                                   (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
+  def : Pat<(alignedstore256 (v16i16 (extract_subvector
+                                      (v32i16 VR512:$src), (iPTR 0))), addr:$dst),
      (VMOVDQA32Z256mr addr:$dst, (v16i16 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
-  def : Pat<(alignedstore (v32i8 (extract_subvector
-                                  (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
+  def : Pat<(alignedstore256 (v32i8 (extract_subvector
+                                     (v64i8 VR512:$src), (iPTR 0))), addr:$dst),
      (VMOVDQA32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
 
   def : Pat<(store (v4f64 (extract_subvector
@@ -2883,12 +3143,13 @@ let Predicates = [HasVLX] in {
      (VMOVDQU32Z256mr addr:$dst, (v32i8 (EXTRACT_SUBREG VR512:$src,sub_ymm)))>;
 }
 
-
-// Move Int Doubleword to Packed Double Int
-//
-def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set VR128X:$dst,
+
+// Move Int Doubleword to Packed Double Int
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(set VR128X:$dst,
                         (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>,
                         EVEX;
 def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src),
@@ -2918,43 +3179,47 @@ def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src
 def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src),
                          "vmovq\t{$src, $dst|$dst, $src}",
                          [(store (i64 (bitconvert FR64X:$src)), addr:$dst)],
-                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
-                         EVEX_CD8<64, CD8VT1>;
-}
-
-// Move Int Doubleword to Single Scalar
-//
-let isCodeGenOnly = 1 in {
-def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set FR32X:$dst, (bitconvert GR32:$src))],
+                         IIC_SSE_MOVDQ>, EVEX, VEX_W, Sched<[WriteStore]>,
+                         EVEX_CD8<64, CD8VT1>;
+}
+} // ExeDomain = SSEPackedInt
+
+// Move Int Doubleword to Single Scalar
+//
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
+def VMOVDI2SSZrr  : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(set FR32X:$dst, (bitconvert GR32:$src))],
                       IIC_SSE_MOVDQ>, EVEX;
 
 def VMOVDI2SSZrm  : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
-                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
-}
-
-// Move doubleword from xmm register to r/m32
-//
-def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
-                       "vmovd\t{$src, $dst|$dst, $src}",
-                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(set FR32X:$dst, (bitconvert (loadi32 addr:$src)))],
+                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
+
+// Move doubleword from xmm register to r/m32
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVPDI2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$src),
+                       "vmovd\t{$src, $dst|$dst, $src}",
+                       [(set GR32:$dst, (extractelt (v4i32 VR128X:$src),
                                         (iPTR 0)))], IIC_SSE_MOVD_ToGP>,
                        EVEX;
 def VMOVPDI2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                        (ins i32mem:$dst, VR128X:$src),
                        "vmovd\t{$src, $dst|$dst, $src}",
-                       [(store (i32 (extractelt (v4i32 VR128X:$src),
-                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
-                       EVEX, EVEX_CD8<32, CD8VT1>;
-
-// Move quadword from xmm1 register to r/m64
-//
-def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
-                      "vmovq\t{$src, $dst|$dst, $src}",
-                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
+                       [(store (i32 (extractelt (v4i32 VR128X:$src),
+                                     (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>,
+                       EVEX, EVEX_CD8<32, CD8VT1>;
+} // ExeDomain = SSEPackedInt
+
+// Move quadword from xmm1 register to r/m64
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}",
+                      [(set GR64:$dst, (extractelt (v2i64 VR128X:$src),
                                                    (iPTR 0)))],
                       IIC_SSE_MOVD_ToGP>, PD, EVEX, VEX_W,
                       Requires<[HasAVX512, In64BitMode]>;
@@ -2975,71 +3240,89 @@ def VMOVPQI2QIZmr : I<0xD6, MRMDestMem, (outs),
 
 let hasSideEffects = 0 in
 def VMOVPQI2QIZrr : AVX512BI<0xD6, MRMDestReg, (outs VR128X:$dst),
-                             (ins VR128X:$src),
-                             "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
-                             EVEX, VEX_W;
-
-// Move Scalar Single to Double Int
-//
-let isCodeGenOnly = 1 in {
-def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
-                      (ins FR32X:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
+                             (ins VR128X:$src),
+                             "vmovq.s\t{$src, $dst|$dst, $src}",[]>,
+                             EVEX, VEX_W;
+} // ExeDomain = SSEPackedInt
+
+// Move Scalar Single to Double Int
+//
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
+def VMOVSS2DIZrr  : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst),
+                      (ins FR32X:$src),
+                      "vmovd\t{$src, $dst|$dst, $src}",
                       [(set GR32:$dst, (bitconvert FR32X:$src))],
                       IIC_SSE_MOVD_ToGP>, EVEX;
 def VMOVSS2DIZmr  : AVX512BI<0x7E, MRMDestMem, (outs),
                       (ins i32mem:$dst, FR32X:$src),
-                      "vmovd\t{$src, $dst|$dst, $src}",
-                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
-                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
-}
-
-// Move Quadword Int to Packed Quadword Int
-//
-def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
-                      (ins i64mem:$src),
-                      "vmovq\t{$src, $dst|$dst, $src}",
-                      [(set VR128X:$dst,
-                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
-                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
-
-//===----------------------------------------------------------------------===//
-// AVX-512  MOVSS, MOVSD
+                      "vmovd\t{$src, $dst|$dst, $src}",
+                      [(store (i32 (bitconvert FR32X:$src)), addr:$dst)],
+                      IIC_SSE_MOVDQ>, EVEX, EVEX_CD8<32, CD8VT1>;
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
+
+// Move Quadword Int to Packed Quadword Int
+//
+let ExeDomain = SSEPackedInt in {
+def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+                      (ins i64mem:$src),
+                      "vmovq\t{$src, $dst|$dst, $src}",
+                      [(set VR128X:$dst,
+                        (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>,
+                      EVEX, VEX_W, EVEX_CD8<8, CD8VT8>;
+} // ExeDomain = SSEPackedInt
+
+//===----------------------------------------------------------------------===//
+// AVX-512  MOVSS, MOVSD
 //===----------------------------------------------------------------------===//
 
-multiclass avx512_move_scalar <string asm, SDNode OpNode,
+multiclass avx512_move_scalar<string asm, SDNode OpNode,
                               X86VectorVTInfo _> {
-  defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1, _.RC:$src2),
-                    asm, "$src2, $src1","$src1, $src2",
-                    (_.VT (OpNode (_.VT _.RC:$src1),
-                                   (_.VT _.RC:$src2))),
-                                   IIC_SSE_MOV_S_RR>, EVEX_4V;
-  let Constraints = "$src1 = $dst" in
-    defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _,
-                    (outs _.RC:$dst),
-                    (ins _.ScalarMemOp:$src),
-                    asm,"$src","$src",
-                    (_.VT (OpNode (_.VT _.RC:$src1),
-                               (_.VT (scalar_to_vector
-                                     (_.ScalarLdFrag addr:$src)))))>, EVEX;
-  let isCodeGenOnly = 1 in {
-    def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
-               (ins _.RC:$src1, _.FRC:$src2),
-               !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
-               [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
-                                      (scalar_to_vector _.FRC:$src2))))],
-               _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
-    def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
-               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
-               [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
-               _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+  def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+             (ins _.RC:$src1, _.FRC:$src2),
+             !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+             [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1,
+                                    (scalar_to_vector _.FRC:$src2))))],
+             _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V;
+  def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+              (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+              !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|",
+              "$dst {${mask}} {z}, $src1, $src2}"),
+              [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
+                                      (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+                                      _.ImmAllZerosV)))],
+              _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ;
+  let Constraints = "$src0 = $dst"  in
+  def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst),
+             (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
+             !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|",
+             "$dst {${mask}}, $src1, $src2}"),
+             [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask,
+                                     (_.VT (OpNode _.RC:$src1, _.RC:$src2)),
+                                     (_.VT _.RC:$src0))))],
+             _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K;
+  let canFoldAsLoad = 1, isReMaterializable = 1 in
+  def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src),
+             !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
+             [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))],
+             _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX;
+  let mayLoad = 1, hasSideEffects = 0 in {
+    let Constraints = "$src0 = $dst" in
+    def rmk : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
+               (ins _.RC:$src0, _.KRCWM:$mask, _.ScalarMemOp:$src),
+               !strconcat(asm, "\t{$src, $dst {${mask}}|",
+               "$dst {${mask}}, $src}"),
+               [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_K;
+    def rmkz : AVX512PI<0x10, MRMSrcMem, (outs _.RC:$dst),
+               (ins _.KRCWM:$mask, _.ScalarMemOp:$src),
+               !strconcat(asm, "\t{$src, $dst {${mask}} {z}|",
+               "$dst {${mask}} {z}, $src}"),
+               [], _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX, EVEX_KZ;
   }
   def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src),
              !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
              [(store _.FRC:$src, addr:$dst)],  _.ExeDomain, IIC_SSE_MOV_S_MR>,
              EVEX;
-  let mayStore = 1 in
+  let mayStore = 1, hasSideEffects = 0 in
   def mrk: AVX512PI<0x11, MRMDestMem, (outs),
               (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src),
               !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"),
@@ -3052,12 +3335,99 @@ defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>,
 defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>,
                                   VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
+
+multiclass avx512_move_scalar_lowering<string InstrStr, SDNode OpNode,
+                                       PatLeaf ZeroFP, X86VectorVTInfo _> {
+
+def : Pat<(_.VT (OpNode _.RC:$src0,
+                        (_.VT (scalar_to_vector
+                                  (_.EltVT (X86selects (i1 (trunc GR32:$mask)),
+                                                       (_.EltVT _.FRC:$src1),
+                                                       (_.EltVT _.FRC:$src2))))))),
+          (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrk)
+                                          (COPY_TO_REGCLASS _.FRC:$src2, _.RC),
+                                          (COPY_TO_REGCLASS GR32:$mask, VK1WM),
+                                          (_.VT _.RC:$src0),
+                                          (COPY_TO_REGCLASS _.FRC:$src1, _.RC)),
+                            _.RC)>;
+
+def : Pat<(_.VT (OpNode _.RC:$src0,
+                        (_.VT (scalar_to_vector
+                                  (_.EltVT (X86selects (i1 (trunc GR32:$mask)),
+                                                       (_.EltVT _.FRC:$src1),
+                                                       (_.EltVT ZeroFP))))))),
+          (COPY_TO_REGCLASS (!cast<Instruction>(InstrStr#rrkz)
+                                          (COPY_TO_REGCLASS GR32:$mask, VK1WM),
+                                          (_.VT _.RC:$src0),
+                                          (COPY_TO_REGCLASS _.FRC:$src1, _.RC)),
+                            _.RC)>;
+
+}
+
+multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
+                                        dag Mask, RegisterClass MaskRC> {
+
+def : Pat<(masked_store addr:$dst, Mask,
+             (_.info512.VT (insert_subvector undef,
+                               (_.info256.VT (insert_subvector undef,
+                                                 (_.info128.VT _.info128.RC:$src),
+                                                 (i64 0))),
+                               (i64 0)))),
+          (!cast<Instruction>(InstrStr#mrk) addr:$dst,
+                      (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
+                      (COPY_TO_REGCLASS _.info128.RC:$src, _.info128.FRC))>;
+
+}
+
+multiclass avx512_load_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _,
+                                       dag Mask, RegisterClass MaskRC> {
+
+def : Pat<(_.info128.VT (extract_subvector
+                         (_.info512.VT (masked_load addr:$srcAddr, Mask,
+                                        (_.info512.VT (bitconvert
+                                                       (v16i32 immAllZerosV))))),
+                           (i64 0))),
+          (!cast<Instruction>(InstrStr#rmkz)
+                      (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
+                      addr:$srcAddr)>;
+
+def : Pat<(_.info128.VT (extract_subvector
+                (_.info512.VT (masked_load addr:$srcAddr, Mask,
+                      (_.info512.VT (insert_subvector undef,
+                            (_.info256.VT (insert_subvector undef,
+                                  (_.info128.VT (X86vzmovl _.info128.RC:$src)),
+                                  (i64 0))),
+                            (i64 0))))),
+                (i64 0))),
+          (!cast<Instruction>(InstrStr#rmk) _.info128.RC:$src,
+                      (i1 (COPY_TO_REGCLASS MaskRC:$mask, VK1WM)),
+                      addr:$srcAddr)>;
+
+}
+
+defm : avx512_move_scalar_lowering<"VMOVSSZ", X86Movss, fp32imm0, v4f32x_info>;
+defm : avx512_move_scalar_lowering<"VMOVSDZ", X86Movsd, fp64imm0, v2f64x_info>;
+
+defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
+                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
+defm : avx512_store_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
+                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
+defm : avx512_store_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
+                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+
+defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
+                   (v16i1 (bitconvert (i16 (trunc (and GR32:$mask, (i32 1)))))), GR32>;
+defm : avx512_load_scalar_lowering<"VMOVSSZ", avx512vl_f32_info,
+                   (v16i1 (bitconvert (i16 (and GR16:$mask, (i16 1))))), GR16>;
+defm : avx512_load_scalar_lowering<"VMOVSDZ", avx512vl_f64_info,
+                   (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8>;
+
 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
-          (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+          (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
 
 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
-          (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+          (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>;
 
 def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask),
@@ -3088,6 +3458,7 @@ let Predicates = [HasAVX512] in {
             (VMOVSSZrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
   def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))),
             (VMOVSDZrr (v2f64 (V_SET0)), FR64X:$src)>;
+  }
 
   // Move low f32 and clear high bits.
   def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
@@ -3097,8 +3468,15 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v8i32 (X86vzmovl (v8i32 VR256X:$src))),
             (SUBREG_TO_REG (i32 0),
              (VMOVSSZrr (v4i32 (V_SET0)),
-                       (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
-  }
+              (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm)), sub_xmm)>;
+  def : Pat<(v16f32 (X86vzmovl (v16f32 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (VMOVSSZrr (v4f32 (V_SET0)),
+              (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm)), sub_xmm)>;
+  def : Pat<(v16i32 (X86vzmovl (v16i32 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (VMOVSSZrr (v4i32 (V_SET0)),
+              (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
 
   let AddedComplexity = 20 in {
   // MOVSSrm zeros the high parts of the register; represent this
@@ -3109,6 +3487,8 @@ let Predicates = [HasAVX512] in {
             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
             (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
+  def : Pat<(v4f32 (X86vzload addr:$src)),
+            (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)>;
 
   // MOVSDrm zeros the high parts of the register; represent this
   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
@@ -3131,6 +3511,8 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
+  def : Pat<(v8f32 (X86vzload addr:$src)),
+            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
@@ -3145,6 +3527,8 @@ let Predicates = [HasAVX512] in {
   def : Pat<(v16f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
+  def : Pat<(v16f32 (X86vzload addr:$src)),
+            (SUBREG_TO_REG (i32 0), (VMOVSSZrm addr:$src), sub_xmm)>;
   def : Pat<(v8f64 (X86vzmovl (insert_subvector undef,
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
@@ -3168,10 +3552,17 @@ let Predicates = [HasAVX512] in {
             (SUBREG_TO_REG (i32 0),
              (VMOVSDZrr (v2f64 (V_SET0)),
                        (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm)), sub_xmm)>;
+  def : Pat<(v8f64 (X86vzmovl (v8f64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0),
+             (VMOVSDZrr (v2f64 (V_SET0)),
+                       (EXTRACT_SUBREG (v8f64 VR512:$src), sub_xmm)), sub_xmm)>;
 
   def : Pat<(v4i64 (X86vzmovl (v4i64 VR256X:$src))),
             (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
                        (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm)), sub_xmm)>;
+  def : Pat<(v8i64 (X86vzmovl (v8i64 VR512:$src))),
+            (SUBREG_TO_REG (i32 0), (VMOVSDZrr (v2i64 (V_SET0)),
+                       (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm)), sub_xmm)>;
 
   // Extract and store.
   def : Pat<(store (f32 (extractelt (v4f32 VR128X:$src), (iPTR 0))),
@@ -3238,15 +3629,6 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
                                                    (v2i64 VR128X:$src))))],
                                 IIC_SSE_MOVQ_RR>, EVEX, VEX_W;
 
-let AddedComplexity = 20 , isCodeGenOnly = 1 in
-def VMOVZPQILo2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
-                                 (ins i128mem:$src),
-                                 "vmovq\t{$src, $dst|$dst, $src}",
-                                 [(set VR128X:$dst, (v2i64 (X86vzmovl
-                                                     (loadv2i64 addr:$src))))],
-                                 IIC_SSE_MOVDQ>, EVEX, VEX_W,
-                                 EVEX_CD8<8, CD8VT8>;
-
 let Predicates = [HasAVX512] in {
   let AddedComplexity = 15 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
@@ -3258,34 +3640,46 @@ let Predicates = [HasAVX512] in {
     def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
                                  (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
               (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
+
+    def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
+                                 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+              (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
   }
   // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
   let AddedComplexity = 20 in {
     def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
               (VMOVDI2PDIZrm addr:$src)>;
-
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))),
               (VMOVDI2PDIZrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
               (VMOVDI2PDIZrm addr:$src)>;
+    def : Pat<(v4i32 (X86vzload addr:$src)),
+              (VMOVDI2PDIZrm addr:$src)>;
+    def : Pat<(v8i32 (X86vzload addr:$src)),
+              (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
     def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
-            (VMOVZPQILo2PQIZrm addr:$src)>;
+              (VMOVQI2PQIZrm addr:$src)>;
     def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
-            (VMOVZPQILo2PQIZrr VR128X:$src)>;
+              (VMOVZPQILo2PQIZrr VR128X:$src)>;
     def : Pat<(v2i64 (X86vzload addr:$src)),
-            (VMOVZPQILo2PQIZrm addr:$src)>;
+              (VMOVQI2PQIZrm addr:$src)>;
     def : Pat<(v4i64 (X86vzload addr:$src)),
-              (SUBREG_TO_REG (i64 0), (VMOVZPQILo2PQIZrm addr:$src), sub_xmm)>;
+              (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
   }
 
   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
+  def : Pat<(v16i32 (X86vzmovl (insert_subvector undef,
+                                (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
+            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrr GR32:$src), sub_xmm)>;
 
   // Use regular 128-bit instructions to match 512-bit scalar_to_vec+zext.
+  def : Pat<(v16i32 (X86vzload addr:$src)),
+            (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
   def : Pat<(v8i64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i64 0), (VMOVZPQILo2PQIZrm addr:$src), sub_xmm)>;
+            (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
 }
 
 def : Pat<(v16i32 (X86Vinsert (v16i32 immAllZerosV), GR32:$src2, (iPTR 0))),
@@ -3366,11 +3760,11 @@ let Predicates = [HasAVX512], AddedComplexity = 400 in {
             (VMOVNTDQAZrm addr:$src)>;
   def : Pat<(v8i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZrm addr:$src)>;
-  def : Pat<(v16i32 (alignednontemporalload addr:$src)),
+  def : Pat<(v16i32 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZrm addr:$src)>;
-  def : Pat<(v32i16 (alignednontemporalload addr:$src)),
+  def : Pat<(v32i16 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZrm addr:$src)>;
-  def : Pat<(v64i8 (alignednontemporalload addr:$src)),
+  def : Pat<(v64i8 (bitconvert (v8i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZrm addr:$src)>;
 }
 
@@ -3388,11 +3782,11 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
             (VMOVNTDQAZ256rm addr:$src)>;
   def : Pat<(v4i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v8i32 (alignednontemporalload addr:$src)),
+  def : Pat<(v8i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v16i16 (alignednontemporalload addr:$src)),
+  def : Pat<(v16i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
-  def : Pat<(v32i8 (alignednontemporalload addr:$src)),
+  def : Pat<(v32i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ256rm addr:$src)>;
 
   def : Pat<(alignednontemporalstore (v4i32 VR128X:$src), addr:$dst),
@@ -3408,11 +3802,11 @@ let Predicates = [HasVLX], AddedComplexity = 400 in {
             (VMOVNTDQAZ128rm addr:$src)>;
   def : Pat<(v2i64 (alignednontemporalload addr:$src)),
             (VMOVNTDQAZ128rm addr:$src)>;
-  def : Pat<(v4i32 (alignednontemporalload addr:$src)),
+  def : Pat<(v4i32 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ128rm addr:$src)>;
-  def : Pat<(v8i16 (alignednontemporalload addr:$src)),
+  def : Pat<(v8i16 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ128rm addr:$src)>;
-  def : Pat<(v16i8 (alignednontemporalload addr:$src)),
+  def : Pat<(v16i8 (bitconvert (v2i64 (alignednontemporalload addr:$src)))),
             (VMOVNTDQAZ128rm addr:$src)>;
 }
 
@@ -3563,10 +3957,10 @@ multiclass avx512_binop_rm2<bits<8> opc, string OpcodeStr, OpndItins itins,
                         AVX512BIBase, EVEX_4V;
 
   defm rmb : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
-                    (ins _Src.RC:$src1, _Dst.ScalarMemOp:$src2),
+                    (ins _Src.RC:$src1, _Brdct.ScalarMemOp:$src2),
                     OpcodeStr,
                     "${src2}"##_Brdct.BroadcastStr##", $src1",
-                     "$src1, ${src2}"##_Dst.BroadcastStr,
+                     "$src1, ${src2}"##_Brdct.BroadcastStr,
                     (_Dst.VT (OpNode (_Src.VT _Src.RC:$src1), (bitconvert
                                  (_Brdct.VT (X86VBroadcast
                                           (_Brdct.ScalarLdFrag addr:$src2)))))),
@@ -3646,13 +4040,14 @@ multiclass avx512_packs_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
 multiclass avx512_packs_rm<bits<8> opc, string OpcodeStr,
                             SDNode OpNode,X86VectorVTInfo _Src,
-                            X86VectorVTInfo _Dst> {
+                            X86VectorVTInfo _Dst, bit IsCommutable = 0> {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst),
                             (ins _Src.RC:$src1, _Src.RC:$src2), OpcodeStr,
                             "$src2, $src1","$src1, $src2",
                             (_Dst.VT (OpNode
                                          (_Src.VT _Src.RC:$src1),
-                                         (_Src.VT _Src.RC:$src2)))>,
+                                         (_Src.VT _Src.RC:$src2))),
+                            NoItinerary, IsCommutable>,
                             EVEX_CD8<_Src.EltSize, CD8VF>, EVEX_4V;
   defm rm : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
                         (ins _Src.RC:$src1, _Src.MemOp:$src2), OpcodeStr,
@@ -3695,15 +4090,15 @@ multiclass avx512_packs_all_i16_i8<bits<8> opc, string OpcodeStr,
 
 multiclass avx512_vpmadd<bits<8> opc, string OpcodeStr,
                             SDNode OpNode, AVX512VLVectorVTInfo _Src,
-                            AVX512VLVectorVTInfo _Dst> {
+                            AVX512VLVectorVTInfo _Dst, bit IsCommutable = 0> {
   let Predicates = [HasBWI] in
   defm NAME#Z : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info512,
-                                _Dst.info512>, EVEX_V512;
+                                _Dst.info512, IsCommutable>, EVEX_V512;
   let Predicates = [HasBWI, HasVLX] in {
     defm NAME#Z256 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info256,
-                                     _Dst.info256>, EVEX_V256;
+                                     _Dst.info256, IsCommutable>, EVEX_V256;
     defm NAME#Z128 : avx512_packs_rm<opc, OpcodeStr, OpNode, _Src.info128,
-                                     _Dst.info128>, EVEX_V128;
+                                     _Dst.info128, IsCommutable>, EVEX_V128;
   }
 }
 
@@ -3715,7 +4110,7 @@ defm VPACKUSWB : avx512_packs_all_i16_i8 <0x67, "vpackuswb", X86Packus>, AVX512B
 defm VPMADDUBSW : avx512_vpmadd<0x04, "vpmaddubsw", X86vpmaddubsw,
                      avx512vl_i8_info, avx512vl_i16_info>, AVX512BIBase, T8PD;
 defm VPMADDWD   : avx512_vpmadd<0xF5, "vpmaddwd", X86vpmaddwd,
-                     avx512vl_i16_info, avx512vl_i32_info>, AVX512BIBase;
+                     avx512vl_i16_info, avx512vl_i32_info, 1>, AVX512BIBase;
 
 defm VPMAXSB : avx512_binop_rm_vl_b<0x3C, "vpmaxsb", smax,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
@@ -3744,17 +4139,119 @@ defm VPMINUW : avx512_binop_rm_vl_w<0x3A, "vpminuw", umin,
                                      SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD;
 defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin,
                                      SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD;
+
+// PMULLQ: Use 512bit version to implement 128/256 bit in case NoVLX.
+let Predicates = [HasDQI, NoVLX] in {
+  def : Pat<(v4i64 (mul (v4i64 VR256X:$src1), (v4i64 VR256X:$src2))),
+            (EXTRACT_SUBREG
+                (VPMULLQZrr
+                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src1, sub_ymm),
+                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src2, sub_ymm)),
+             sub_ymm)>;
+
+  def : Pat<(v2i64 (mul (v2i64 VR128X:$src1), (v2i64 VR128X:$src2))),
+            (EXTRACT_SUBREG
+                (VPMULLQZrr
+                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src1, sub_xmm),
+                    (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR128X:$src2, sub_xmm)),
+             sub_xmm)>;
+}
+
 //===----------------------------------------------------------------------===//
 // AVX-512  Logical Instructions
 //===----------------------------------------------------------------------===//
 
-defm VPAND : avx512_binop_rm_vl_dq<0xDB, 0xDB, "vpand", and,
+multiclass avx512_logic_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           X86VectorVTInfo _, OpndItins itins,
+                           bit IsCommutable = 0> {
+  defm rr : AVX512_maskable_logic<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                    "$src2, $src1", "$src1, $src2",
+                    (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
+                                     (bitconvert (_.VT _.RC:$src2)))),
+                    (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                                                       _.RC:$src2)))),
+                    itins.rr, IsCommutable>,
+            AVX512BIBase, EVEX_4V;
+
+  defm rm : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr,
+                  "$src2, $src1", "$src1, $src2",
+                  (_.i64VT (OpNode (bitconvert (_.VT _.RC:$src1)),
+                                   (bitconvert (_.LdFrag addr:$src2)))),
+                  (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                                     (bitconvert (_.LdFrag addr:$src2)))))),
+                  itins.rm>,
+            AVX512BIBase, EVEX_4V;
+}
+
+multiclass avx512_logic_rmb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            X86VectorVTInfo _, OpndItins itins,
+                            bit IsCommutable = 0> :
+           avx512_logic_rm<opc, OpcodeStr, OpNode, _, itins, IsCommutable> {
+  defm rmb : AVX512_maskable_logic<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                  (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
+                  "${src2}"##_.BroadcastStr##", $src1",
+                  "$src1, ${src2}"##_.BroadcastStr,
+                  (_.i64VT (OpNode _.RC:$src1,
+                                   (bitconvert
+                                    (_.VT (X86VBroadcast
+                                            (_.ScalarLdFrag addr:$src2)))))),
+                  (_.VT (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                                     (bitconvert
+                                      (_.VT (X86VBroadcast
+                                             (_.ScalarLdFrag addr:$src2)))))))),
+                  itins.rm>,
+             AVX512BIBase, EVEX_4V, EVEX_B;
+}
+
+multiclass avx512_logic_rmb_vl<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                               AVX512VLVectorVTInfo VTInfo, OpndItins itins,
+                               Predicate prd, bit IsCommutable = 0> {
+  let Predicates = [prd] in
+    defm Z : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info512, itins,
+                             IsCommutable>, EVEX_V512;
+
+  let Predicates = [prd, HasVLX] in {
+    defm Z256 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info256, itins,
+                             IsCommutable>, EVEX_V256;
+    defm Z128 : avx512_logic_rmb<opc, OpcodeStr, OpNode, VTInfo.info128, itins,
+                             IsCommutable>, EVEX_V128;
+  }
+}
+
+multiclass avx512_logic_rm_vl_d<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                OpndItins itins, Predicate prd,
+                                bit IsCommutable = 0> {
+  defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i32_info,
+                               itins, prd, IsCommutable>, EVEX_CD8<32, CD8VF>;
+}
+
+multiclass avx512_logic_rm_vl_q<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                OpndItins itins, Predicate prd,
+                                bit IsCommutable = 0> {
+  defm NAME : avx512_logic_rmb_vl<opc, OpcodeStr, OpNode, avx512vl_i64_info,
+                               itins, prd, IsCommutable>,
+                               VEX_W, EVEX_CD8<64, CD8VF>;
+}
+
+multiclass avx512_logic_rm_vl_dq<bits<8> opc_d, bits<8> opc_q, string OpcodeStr,
+                                 SDNode OpNode, OpndItins itins, Predicate prd,
+                                 bit IsCommutable = 0> {
+  defm Q : avx512_logic_rm_vl_q<opc_q, OpcodeStr#"q", OpNode, itins, prd,
+                                IsCommutable>;
+
+  defm D : avx512_logic_rm_vl_d<opc_d, OpcodeStr#"d", OpNode, itins, prd,
+                                IsCommutable>;
+}
+
+defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and,
                                   SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPOR : avx512_binop_rm_vl_dq<0xEB, 0xEB, "vpor", or,
+defm VPOR : avx512_logic_rm_vl_dq<0xEB, 0xEB, "vpor", or,
                                   SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPXOR : avx512_binop_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
+defm VPXOR : avx512_logic_rm_vl_dq<0xEF, 0xEF, "vpxor", xor,
                                   SSE_INTALU_ITINS_P, HasAVX512, 1>;
-defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
+defm VPANDN : avx512_logic_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
                                   SSE_INTALU_ITINS_P, HasAVX512, 0>;
 
 //===----------------------------------------------------------------------===//
@@ -3763,13 +4260,13 @@ defm VPANDN : avx512_binop_rm_vl_dq<0xDF, 0xDF, "vpandn", X86andnp,
 multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          SDNode OpNode, SDNode VecNode, OpndItins itins,
                          bit IsCommutable> {
-
+  let ExeDomain = _.ExeDomain in {
   defm rr_Int : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                            "$src2, $src1", "$src1, $src2",
                            (VecNode (_.VT _.RC:$src1), (_.VT _.RC:$src2),
                            (i32 FROUND_CURRENT)),
-                           itins.rr, IsCommutable>;
+                           itins.rr>;
 
   defm rm_Int : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
@@ -3777,25 +4274,27 @@ multiclass avx512_fp_scalar<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          (VecNode (_.VT _.RC:$src1),
                           (_.VT (scalar_to_vector (_.ScalarLdFrag addr:$src2))),
                            (i32 FROUND_CURRENT)),
-                         itins.rm, IsCommutable>;
-  let isCodeGenOnly = 1, isCommutable = IsCommutable,
-      Predicates = [HasAVX512] in {
+                         itins.rm>;
+  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
                          (ins _.FRC:$src1, _.FRC:$src2),
                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
-                          itins.rr>;
+                          itins.rr> {
+    let isCommutable = IsCommutable;
+  }
   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                          [(set _.FRC:$dst, (OpNode _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src2)))], itins.rm>;
   }
+  }
 }
 
 multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          SDNode VecNode, OpndItins itins, bit IsCommutable = 0> {
-
+  let ExeDomain = _.ExeDomain in
   defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                           (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr,
                           "$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -3805,7 +4304,7 @@ multiclass avx512_fp_scalar_round<bits<8> opc, string OpcodeStr,X86VectorVTInfo
 }
 multiclass avx512_fp_scalar_sae<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
                          SDNode VecNode, OpndItins itins, bit IsCommutable> {
-
+  let ExeDomain = _.ExeDomain in
   defm rrb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                             (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                             "{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -3843,9 +4342,9 @@ multiclass avx512_binop_s_sae<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               XD, VEX_W, EVEX_4V, VEX_LIG, EVEX_CD8<64, CD8VT1>;
 }
 defm VADD : avx512_binop_s_round<0x58, "vadd", fadd, X86faddRnd, SSE_ALU_ITINS_S, 1>;
-defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_ALU_ITINS_S, 1>;
+defm VMUL : avx512_binop_s_round<0x59, "vmul", fmul, X86fmulRnd, SSE_MUL_ITINS_S, 1>;
 defm VSUB : avx512_binop_s_round<0x5C, "vsub", fsub, X86fsubRnd, SSE_ALU_ITINS_S, 0>;
-defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_ALU_ITINS_S, 0>;
+defm VDIV : avx512_binop_s_round<0x5E, "vdiv", fdiv, X86fdivRnd, SSE_DIV_ITINS_S, 0>;
 defm VMIN : avx512_binop_s_sae  <0x5D, "vmin", X86fmin, X86fminRnd, SSE_ALU_ITINS_S, 0>;
 defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITINS_S, 0>;
 
@@ -3853,12 +4352,14 @@ defm VMAX : avx512_binop_s_sae  <0x5F, "vmax", X86fmax, X86fmaxRnd, SSE_ALU_ITIN
 // X86fminc and X86fmaxc instead of X86fmin and X86fmax
 multiclass avx512_comutable_binop_s<bits<8> opc, string OpcodeStr,
                           X86VectorVTInfo _, SDNode OpNode, OpndItins itins> {
-  let isCodeGenOnly = 1, isCommutable =1, Predicates = [HasAVX512] in {
+  let isCodeGenOnly = 1, Predicates = [HasAVX512] in {
   def rr : I< opc, MRMSrcReg, (outs _.FRC:$dst),
                          (ins _.FRC:$src1, _.FRC:$src2),
                           OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
                           [(set _.FRC:$dst, (OpNode _.FRC:$src1, _.FRC:$src2))],
-                          itins.rr>;
+                          itins.rr> {
+    let isCommutable = 1;
+  }
   def rm : I< opc, MRMSrcMem, (outs _.FRC:$dst),
                          (ins _.FRC:$src1, _.ScalarMemOp:$src2),
                          OpcodeStr#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
@@ -3882,27 +4383,35 @@ defm VMAXCSDZ : avx512_comutable_binop_s<0x5F, "vmaxsd", f64x_info, X86fmaxc,
                                 SSE_ALU_ITINS_S.d>, XD, VEX_W, EVEX_4V, VEX_LIG,
                                 EVEX_CD8<64, CD8VT1>;
 
-multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                            X86VectorVTInfo _, bit IsCommutable> {
+multiclass avx512_fp_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                            X86VectorVTInfo _, OpndItins itins,
+                            bit IsCommutable> {
+  let ExeDomain = _.ExeDomain, hasSideEffects = 0 in {
   defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "$src2, $src1", "$src1, $src2",
-                  (_.VT (OpNode _.RC:$src1, _.RC:$src2))>, EVEX_4V;
-  defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                  (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
-                  "$src2, $src1", "$src1, $src2",
-                  (OpNode _.RC:$src1, (_.LdFrag addr:$src2))>, EVEX_4V;
-  defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                   (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
-                   "${src2}"##_.BroadcastStr##", $src1",
-                   "$src1, ${src2}"##_.BroadcastStr,
-                   (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
-                                              (_.ScalarLdFrag addr:$src2))))>,
-                   EVEX_4V, EVEX_B;
+                  (_.VT (OpNode _.RC:$src1, _.RC:$src2)), itins.rr,
+                  IsCommutable>, EVEX_4V;
+  let mayLoad = 1 in {
+    defm rm: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                    (ins _.RC:$src1, _.MemOp:$src2), OpcodeStr##_.Suffix,
+                    "$src2, $src1", "$src1, $src2",
+                    (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), itins.rm>,
+                    EVEX_4V;
+    defm rmb: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr##_.Suffix,
+                     "${src2}"##_.BroadcastStr##", $src1",
+                     "$src1, ${src2}"##_.BroadcastStr,
+                     (OpNode  _.RC:$src1, (_.VT (X86VBroadcast
+                                                (_.ScalarLdFrag addr:$src2)))),
+                     itins.rm>, EVEX_4V, EVEX_B;
+    }
+  }
 }
 
-multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
-                            X86VectorVTInfo _> {
+multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
+                                  X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in
   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2, AVX512RC:$rc), OpcodeStr##_.Suffix,
                   "$rc, $src2, $src1", "$src1, $src2, $rc",
@@ -3911,8 +4420,9 @@ multiclass avx512_fp_round_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRn
 }
 
 
-multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
-                            X86VectorVTInfo _> {
+multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDPatternOperator OpNodeRnd,
+                                X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in
   defm rb: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                   (ins _.RC:$src1, _.RC:$src2), OpcodeStr##_.Suffix,
                   "{sae}, $src2, $src1", "$src1, $src2, {sae}",
@@ -3920,30 +4430,31 @@ multiclass avx512_fp_sae_packed<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd,
                   EVEX_4V, EVEX_B;
 }
 
-multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                             Predicate prd, bit IsCommutable = 0> {
+multiclass avx512_fp_binop_p<bits<8> opc, string OpcodeStr, SDPatternOperator OpNode,
+                             Predicate prd, SizeItins itins,
+                             bit IsCommutable = 0> {
   let Predicates = [prd] in {
   defm PSZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v16f32_info,
-                              IsCommutable>, EVEX_V512, PS,
+                              itins.s, IsCommutable>, EVEX_V512, PS,
                               EVEX_CD8<32, CD8VF>;
   defm PDZ : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f64_info,
-                              IsCommutable>, EVEX_V512, PD, VEX_W,
+                              itins.d, IsCommutable>, EVEX_V512, PD, VEX_W,
                               EVEX_CD8<64, CD8VF>;
   }
 
     // Define only if AVX512VL feature is present.
   let Predicates = [prd, HasVLX] in {
     defm PSZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f32x_info,
-                                   IsCommutable>, EVEX_V128, PS,
+                                   itins.s, IsCommutable>, EVEX_V128, PS,
                                    EVEX_CD8<32, CD8VF>;
     defm PSZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v8f32x_info,
-                                   IsCommutable>, EVEX_V256, PS,
+                                   itins.s, IsCommutable>, EVEX_V256, PS,
                                    EVEX_CD8<32, CD8VF>;
     defm PDZ128 : avx512_fp_packed<opc, OpcodeStr, OpNode, v2f64x_info,
-                                   IsCommutable>, EVEX_V128, PD, VEX_W,
+                                   itins.d, IsCommutable>, EVEX_V128, PD, VEX_W,
                                    EVEX_CD8<64, CD8VF>;
     defm PDZ256 : avx512_fp_packed<opc, OpcodeStr, OpNode, v4f64x_info,
-                                   IsCommutable>, EVEX_V256, PD, VEX_W,
+                                   itins.d, IsCommutable>, EVEX_V256, PD, VEX_W,
                                    EVEX_CD8<64, CD8VF>;
   }
 }
@@ -3962,26 +4473,140 @@ multiclass avx512_fp_binop_p_sae<bits<8> opc, string OpcodeStr, SDNode OpNodeRnd
                               EVEX_V512, PD, VEX_W,EVEX_CD8<64, CD8VF>;
 }
 
-defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512, 1>,
+defm VADD : avx512_fp_binop_p<0x58, "vadd", fadd, HasAVX512,
+                              SSE_ALU_ITINS_P, 1>,
             avx512_fp_binop_p_round<0x58, "vadd", X86faddRnd>;
-defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512, 1>,
+defm VMUL : avx512_fp_binop_p<0x59, "vmul", fmul, HasAVX512,
+                              SSE_MUL_ITINS_P, 1>,
             avx512_fp_binop_p_round<0x59, "vmul", X86fmulRnd>;
-defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512>,
+defm VSUB : avx512_fp_binop_p<0x5C, "vsub", fsub, HasAVX512, SSE_ALU_ITINS_P>,
             avx512_fp_binop_p_round<0x5C, "vsub", X86fsubRnd>;
-defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512>,
+defm VDIV : avx512_fp_binop_p<0x5E, "vdiv", fdiv, HasAVX512, SSE_DIV_ITINS_P>,
             avx512_fp_binop_p_round<0x5E, "vdiv", X86fdivRnd>;
-defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512, 0>,
+defm VMIN : avx512_fp_binop_p<0x5D, "vmin", X86fmin, HasAVX512,
+                              SSE_ALU_ITINS_P, 0>,
             avx512_fp_binop_p_sae<0x5D, "vmin", X86fminRnd>;
-defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512, 0>,
+defm VMAX : avx512_fp_binop_p<0x5F, "vmax", X86fmax, HasAVX512,
+                              SSE_ALU_ITINS_P, 0>,
             avx512_fp_binop_p_sae<0x5F, "vmax", X86fmaxRnd>;
 let isCodeGenOnly = 1 in {
-  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512, 1>;
-  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512, 1>;
+  defm VMINC : avx512_fp_binop_p<0x5D, "vmin", X86fminc, HasAVX512,
+                                 SSE_ALU_ITINS_P, 1>;
+  defm VMAXC : avx512_fp_binop_p<0x5F, "vmax", X86fmaxc, HasAVX512,
+                                 SSE_ALU_ITINS_P, 1>;
+}
+defm VAND  : avx512_fp_binop_p<0x54, "vand", null_frag, HasDQI,
+                               SSE_ALU_ITINS_P, 1>;
+defm VANDN : avx512_fp_binop_p<0x55, "vandn", null_frag, HasDQI,
+                               SSE_ALU_ITINS_P, 0>;
+defm VOR   : avx512_fp_binop_p<0x56, "vor", null_frag, HasDQI,
+                               SSE_ALU_ITINS_P, 1>;
+defm VXOR  : avx512_fp_binop_p<0x57, "vxor", null_frag, HasDQI,
+                               SSE_ALU_ITINS_P, 1>;
+
+// Patterns catch floating point selects with bitcasted integer logic ops.
+multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode,
+                                      X86VectorVTInfo _, Predicate prd> {
+let Predicates = [prd] in {
+  // Masked register-register logical operations.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, _.RC:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1,
+             _.RC:$src2)>;
+  // Masked register-memory logical operations.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1,
+                                         (load addr:$src2)))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1,
+             addr:$src2)>;
+  // Register-broadcast logical operations.
+  def : Pat<(_.i64VT (OpNode _.RC:$src1,
+                      (bitconvert (_.VT (X86VBroadcast
+                                         (_.ScalarLdFrag addr:$src2)))))),
+            (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert
+                    (_.i64VT (OpNode _.RC:$src1,
+                              (bitconvert (_.VT
+                                           (X86VBroadcast
+                                            (_.ScalarLdFrag addr:$src2))))))),
+                   _.RC:$src0)),
+            (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (bitconvert
+                    (_.i64VT (OpNode _.RC:$src1,
+                              (bitconvert (_.VT
+                                           (X86VBroadcast
+                                            (_.ScalarLdFrag addr:$src2))))))),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(InstrStr#rmbkz)  _.KRCWM:$mask,
+             _.RC:$src1, addr:$src2)>;
+}
+}
+
+multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> {
+  defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>;
+  defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>;
+  defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>;
+}
+
+defm : avx512_fp_logical_lowering_sizes<"VPAND", and>;
+defm : avx512_fp_logical_lowering_sizes<"VPOR", or>;
+defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>;
+defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>;
+
+let Predicates = [HasVLX,HasDQI] in {
+  // Use packed logical operations for scalar ops.
+  def : Pat<(f64 (X86fand FR64X:$src1, FR64X:$src2)),
+            (COPY_TO_REGCLASS (VANDPDZ128rr
+                               (COPY_TO_REGCLASS FR64X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
+  def : Pat<(f64 (X86for FR64X:$src1, FR64X:$src2)),
+            (COPY_TO_REGCLASS (VORPDZ128rr
+                               (COPY_TO_REGCLASS FR64X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
+  def : Pat<(f64 (X86fxor FR64X:$src1, FR64X:$src2)),
+            (COPY_TO_REGCLASS (VXORPDZ128rr
+                               (COPY_TO_REGCLASS FR64X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
+  def : Pat<(f64 (X86fandn FR64X:$src1, FR64X:$src2)),
+            (COPY_TO_REGCLASS (VANDNPDZ128rr
+                               (COPY_TO_REGCLASS FR64X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR64X:$src2, VR128X)), FR64X)>;
+
+  def : Pat<(f32 (X86fand FR32X:$src1, FR32X:$src2)),
+            (COPY_TO_REGCLASS (VANDPSZ128rr
+                               (COPY_TO_REGCLASS FR32X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
+  def : Pat<(f32 (X86for FR32X:$src1, FR32X:$src2)),
+            (COPY_TO_REGCLASS (VORPSZ128rr
+                               (COPY_TO_REGCLASS FR32X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
+  def : Pat<(f32 (X86fxor FR32X:$src1, FR32X:$src2)),
+            (COPY_TO_REGCLASS (VXORPSZ128rr
+                               (COPY_TO_REGCLASS FR32X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
+  def : Pat<(f32 (X86fandn FR32X:$src1, FR32X:$src2)),
+            (COPY_TO_REGCLASS (VANDNPSZ128rr
+                               (COPY_TO_REGCLASS FR32X:$src1, VR128X),
+                               (COPY_TO_REGCLASS FR32X:$src2, VR128X)), FR32X)>;
 }
-defm VAND  : avx512_fp_binop_p<0x54, "vand", X86fand, HasDQI, 1>;
-defm VANDN : avx512_fp_binop_p<0x55, "vandn", X86fandn, HasDQI, 0>;
-defm VOR   : avx512_fp_binop_p<0x56, "vor", X86for, HasDQI, 1>;
-defm VXOR  : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI, 1>;
 
 multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _> {
@@ -4157,6 +4782,7 @@ defm VPTESTNM  : avx512_vptest_all_forms<0x26, 0x27, "vptestnm", X86testnm>, T8X
 //===----------------------------------------------------------------------===//
 multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
                          string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in {
   defm ri : AVX512_maskable<opc, ImmFormR, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, u8imm:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
@@ -4168,10 +4794,12 @@ multiclass avx512_shift_rmi<bits<8> opc, Format ImmFormR, Format ImmFormM,
                    (_.VT (OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
                           (i8 imm:$src2))),
                    SSE_INTSHIFT_ITINS_P.rm>;
+  }
 }
 
 multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
                          string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in
   defm mbi : AVX512_maskable<opc, ImmFormM, _, (outs _.RC:$dst),
                    (ins _.ScalarMemOp:$src1, u8imm:$src2), OpcodeStr,
       "$src2, ${src1}"##_.BroadcastStr, "${src1}"##_.BroadcastStr##", $src2",
@@ -4182,6 +4810,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM,
 multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                          ValueType SrcVT, PatFrag bc_frag, X86VectorVTInfo _> {
    // src2 is always 128-bit
+  let ExeDomain = _.ExeDomain in {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, VR128X:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
@@ -4193,6 +4822,7 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                    (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2)))),
                    SSE_INTSHIFT_ITINS_P.rm>, AVX512BIBase,
                    EVEX_4V;
+  }
 }
 
 multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode,
@@ -4286,6 +4916,7 @@ defm VPSRL : avx512_shift_types<0xD2, 0xD3, 0xD1, "vpsrl", X86vsrl>;
 //===-------------------------------------------------------------------===//
 multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in {
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                    (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
                       "$src2, $src1", "$src1, $src2",
@@ -4298,10 +4929,12 @@ multiclass avx512_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode,
                    (_.VT (bitconvert (_.LdFrag addr:$src2))))),
                    SSE_INTSHIFT_ITINS_P.rm>, AVX5128IBase, EVEX_4V,
                    EVEX_CD8<_.EltSize, CD8VF>;
+  }
 }
 
 multiclass avx512_var_shift_mb<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in
   defm rmb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src1, _.ScalarMemOp:$src2), OpcodeStr,
                     "${src2}"##_.BroadcastStr##", $src1",
@@ -4375,9 +5008,6 @@ defm VPSLLV : avx512_var_shift_types<0x47, "vpsllv", shl>,
 defm VPSRAV : avx512_var_shift_types<0x46, "vpsrav", sra>,
               avx512_var_shift_w<0x11, "vpsravw", sra>,
               avx512_var_shift_w_lowering<avx512vl_i16_info, sra>;
-let isCodeGenOnly = 1 in
-  defm VPSRAV_Int : avx512_var_shift_types<0x46, "vpsrav", X86vsrav>,
-                    avx512_var_shift_w<0x11, "vpsravw", X86vsrav>;
 
 defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
               avx512_var_shift_w<0x10, "vpsrlvw", srl>,
@@ -4385,6 +5015,76 @@ defm VPSRLV : avx512_var_shift_types<0x45, "vpsrlv", srl>,
 defm VPRORV : avx512_var_shift_types<0x14, "vprorv", rotr>;
 defm VPROLV : avx512_var_shift_types<0x15, "vprolv", rotl>;
 
+// Special handing for handling VPSRAV intrinsics.
+multiclass avx512_var_shift_int_lowering<string InstrStr, X86VectorVTInfo _,
+                                         list<Predicate> p> {
+  let Predicates = p in {
+    def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix#rr) _.RC:$src1,
+               _.RC:$src2)>;
+    def : Pat<(_.VT (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2)))),
+              (!cast<Instruction>(InstrStr#_.ZSuffix##rm)
+               _.RC:$src1, addr:$src2)>;
+    let AddedComplexity = 20 in {
+    def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                     (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix#rrk) _.RC:$src0,
+               _.KRC:$mask, _.RC:$src1, _.RC:$src2)>;
+    def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+                     _.RC:$src0)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix##rmk) _.RC:$src0,
+               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
+    }
+    let AddedComplexity = 30 in {
+    def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                     (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask,
+               _.RC:$src1, _.RC:$src2)>;
+    def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                     (X86vsrav _.RC:$src1, (bitconvert (_.LdFrag addr:$src2))),
+                     _.ImmAllZerosV)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask,
+               _.RC:$src1, addr:$src2)>;
+    }
+  }
+}
+
+multiclass avx512_var_shift_int_lowering_mb<string InstrStr, X86VectorVTInfo _,
+                                         list<Predicate> p> :
+           avx512_var_shift_int_lowering<InstrStr, _, p> {
+  let Predicates = p in {
+    def : Pat<(_.VT (X86vsrav _.RC:$src1,
+                     (X86VBroadcast (_.ScalarLdFrag addr:$src2)))),
+              (!cast<Instruction>(InstrStr#_.ZSuffix##rmb)
+               _.RC:$src1, addr:$src2)>;
+    let AddedComplexity = 20 in
+    def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                     (X86vsrav _.RC:$src1,
+                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
+                     _.RC:$src0)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbk) _.RC:$src0,
+               _.KRC:$mask, _.RC:$src1, addr:$src2)>;
+    let AddedComplexity = 30 in
+    def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                     (X86vsrav _.RC:$src1,
+                      (X86VBroadcast (_.ScalarLdFrag addr:$src2))),
+                     _.ImmAllZerosV)),
+              (!cast<Instruction>(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask,
+               _.RC:$src1, addr:$src2)>;
+  }
+}
+
+defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>;
+defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>;
+defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>;
+defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>;
+defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>;
+defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>;
+defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>;
+defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>;
+defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>;
+
 //===-------------------------------------------------------------------===//
 // 1-src variable permutation VPERMW/D/Q
 //===-------------------------------------------------------------------===//
@@ -4501,8 +5201,10 @@ multiclass avx512_permil<string OpcodeStr, bits<8> OpcImm, bits<8> OpcVar,
                     EVEX, AVX512AIi8Base, EVEX_CD8<_.info128.EltSize, CD8VF>;
 }
 
+let ExeDomain = SSEPackedSingle in
 defm VPERMILPS : avx512_permil<"vpermilps", 0x04, 0x0C, avx512vl_f32_info,
                                avx512vl_i32_info>;
+let ExeDomain = SSEPackedDouble in
 defm VPERMILPD : avx512_permil<"vpermilpd", 0x05, 0x0D, avx512vl_f64_info,
                                avx512vl_i64_info>, VEX_W;
 //===----------------------------------------------------------------------===//
@@ -4666,61 +5368,71 @@ let Predicates = [HasAVX512] in {
 // FMA - Fused Multiply Operations
 //
 
-let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3p_213_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _> {
+                               X86VectorVTInfo _, string Suff> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+          (_.VT (OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3)), 1, 1>,
          AVX512FMA3Base;
 
   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.MemOp:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (OpNode _.RC:$src1, _.RC:$src2, (_.LdFrag addr:$src3)))>,
+          (_.VT (OpNode _.RC:$src2, _.RC:$src1, (_.LdFrag addr:$src3))), 1, 0>,
           AVX512FMA3Base;
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
             (ins _.RC:$src2, _.ScalarMemOp:$src3),
             OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),
             !strconcat("$src2, ${src3}", _.BroadcastStr ),
-            (OpNode _.RC:$src1,
-             _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3))))>,
+            (OpNode _.RC:$src2,
+             _.RC:$src1,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))), 1, 0>,
             AVX512FMA3Base, EVEX_B;
+  }
+
+  // Additional pattern for folding broadcast nodes in other orders.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode _.RC:$src1, _.RC:$src2,
+                    (X86VBroadcast (_.ScalarLdFrag addr:$src3))),
+                   _.RC:$src1)),
+            (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
+             _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
 }
 
 multiclass avx512_fma3_213_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _> {
+                                 X86VectorVTInfo _, string Suff> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
-          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+          (_.VT ( OpNode _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 imm:$rc))), 1, 1>,
           AVX512FMA3Base, EVEX_B, EVEX_RC;
 }
-} // Constraints = "$src1 = $dst"
 
 multiclass avx512_fma3p_213_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+                                   SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
+                                   string Suff> {
   let Predicates = [HasAVX512] in {
-    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512>,
-                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
-                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+    defm Z      : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
+                  avx512_fma3_213_round<opc, OpcodeStr, OpNodeRnd, _.info512,
+                      Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   }
   let Predicates = [HasVLX, HasAVX512] in {
-    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256>,
+    defm Z256 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
-    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128>,
+    defm Z128 : avx512_fma3p_213_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   }
 }
 
 multiclass avx512_fma3p_213_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            SDNode OpNodeRnd > {
+                              SDNode OpNodeRnd > {
     defm PS : avx512_fma3p_213_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
-                                      avx512vl_f32_info>;
+                                      avx512vl_f32_info, "PS">;
     defm PD : avx512_fma3p_213_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
-                                      avx512vl_f64_info>, VEX_W;
+                                      avx512vl_f64_info, "PD">, VEX_W;
 }
 
 defm VFMADD213    : avx512_fma3p_213_f<0xA8, "vfmadd213", X86Fmadd, X86FmaddRnd>;
@@ -4731,19 +5443,19 @@ defm VFNMADD213   : avx512_fma3p_213_f<0xAC, "vfnmadd213", X86Fnmadd, X86FnmaddR
 defm VFNMSUB213   : avx512_fma3p_213_f<0xAE, "vfnmsub213", X86Fnmsub, X86FnmsubRnd>;
 
 
-let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _> {
+                               X86VectorVTInfo _, string Suff> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1))>,
+          (_.VT (OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1)), 1, 1>,
          AVX512FMA3Base;
 
   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.MemOp:$src3),
           OpcodeStr, "$src3, $src2", "$src2, $src3",
-          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1))>,
+          (_.VT (OpNode _.RC:$src2, (_.LdFrag addr:$src3), _.RC:$src1)), 1, 0>,
          AVX512FMA3Base;
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
@@ -4752,40 +5464,60 @@ multiclass avx512_fma3p_231_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          "$src2, ${src3}"##_.BroadcastStr,
          (_.VT (OpNode _.RC:$src2,
                       (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
-                      _.RC:$src1))>, AVX512FMA3Base, EVEX_B;
+                      _.RC:$src1)), 1, 0>, AVX512FMA3Base, EVEX_B;
+  }
+
+  // Additional patterns for folding broadcast nodes in other orders.
+  def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                   _.RC:$src2, _.RC:$src1)),
+            (!cast<Instruction>(NAME#Suff#_.ZSuffix#mb) _.RC:$src1,
+             _.RC:$src2, addr:$src3)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                    _.RC:$src2, _.RC:$src1),
+                   _.RC:$src1)),
+            (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
+             _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                    _.RC:$src2, _.RC:$src1),
+                   _.ImmAllZerosV)),
+            (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbkz) _.RC:$src1,
+             _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
 }
 
 multiclass avx512_fma3_231_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _> {
+                                 X86VectorVTInfo _, string Suff> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
           OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
-          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc)))>,
+          (_.VT ( OpNode _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 imm:$rc))), 1, 1>,
           AVX512FMA3Base, EVEX_B, EVEX_RC;
 }
-} // Constraints = "$src1 = $dst"
 
 multiclass avx512_fma3p_231_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+                                   SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
+                                   string Suff> {
   let Predicates = [HasAVX512] in {
-    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512>,
-                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
-                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+    defm Z      : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
+                  avx512_fma3_231_round<opc, OpcodeStr, OpNodeRnd, _.info512,
+                      Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   }
   let Predicates = [HasVLX, HasAVX512] in {
-    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256>,
+    defm Z256 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
-    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128>,
+    defm Z128 : avx512_fma3p_231_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   }
 }
 
 multiclass avx512_fma3p_231_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            SDNode OpNodeRnd > {
+                              SDNode OpNodeRnd > {
     defm PS : avx512_fma3p_231_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
-                                      avx512vl_f32_info>;
+                                      avx512vl_f32_info, "PS">;
     defm PD : avx512_fma3p_231_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
-                                      avx512vl_f64_info>, VEX_W;
+                                      avx512vl_f64_info, "PD">, VEX_W;
 }
 
 defm VFMADD231    : avx512_fma3p_231_f<0xB8, "vfmadd231", X86Fmadd, X86FmaddRnd>;
@@ -4795,61 +5527,71 @@ defm VFMSUBADD231 : avx512_fma3p_231_f<0xB7, "vfmsubadd231", X86Fmsubadd, X86Fms
 defm VFNMADD231   : avx512_fma3p_231_f<0xBC, "vfnmadd231", X86Fnmadd, X86FnmaddRnd>;
 defm VFNMSUB231   : avx512_fma3p_231_f<0xBE, "vfnmsub231", X86Fnmsub, X86FnmsubRnd>;
 
-let Constraints = "$src1 = $dst" in {
 multiclass avx512_fma3p_132_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _> {
+                               X86VectorVTInfo _, string Suff> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm r: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
-          (ins _.RC:$src3, _.RC:$src2),
-          OpcodeStr, "$src2, $src3", "$src3, $src2",
-          (_.VT (OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3))>,
+          (ins _.RC:$src2, _.RC:$src3),
+          OpcodeStr, "$src3, $src2", "$src2, $src3",
+          (_.VT (OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2)), 1, 1>,
          AVX512FMA3Base;
 
   defm m: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
-          (ins _.RC:$src3, _.MemOp:$src2),
-          OpcodeStr, "$src2, $src3", "$src3, $src2",
-          (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2), _.RC:$src3))>,
+          (ins _.RC:$src2, _.MemOp:$src3),
+          OpcodeStr, "$src3, $src2", "$src2, $src3",
+          (_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src3), _.RC:$src2)), 1, 0>,
          AVX512FMA3Base;
 
   defm mb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
-         (ins _.RC:$src3, _.ScalarMemOp:$src2),
-         OpcodeStr, "${src2}"##_.BroadcastStr##", $src3",
-         "$src3, ${src2}"##_.BroadcastStr,
+         (ins _.RC:$src2, _.ScalarMemOp:$src3),
+         OpcodeStr, "${src3}"##_.BroadcastStr##", $src2",
+         "$src2, ${src3}"##_.BroadcastStr,
          (_.VT (OpNode _.RC:$src1,
-                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
-                      _.RC:$src3))>, AVX512FMA3Base, EVEX_B;
+                      (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
+                      _.RC:$src2)), 1, 0>, AVX512FMA3Base, EVEX_B;
+  }
+
+  // Additional patterns for folding broadcast nodes in other orders.
+  def : Pat<(_.VT (vselect _.KRCWM:$mask,
+                   (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
+                    _.RC:$src1, _.RC:$src2),
+                   _.RC:$src1)),
+            (!cast<Instruction>(NAME#Suff#_.ZSuffix#mbk) _.RC:$src1,
+             _.KRCWM:$mask, _.RC:$src2, addr:$src3)>;
 }
 
 multiclass avx512_fma3_132_round<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _> {
+                                 X86VectorVTInfo _, string Suff> {
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in
   defm rb: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
-          (ins _.RC:$src3, _.RC:$src2, AVX512RC:$rc),
-          OpcodeStr, "$rc, $src2, $src3", "$src3, $src2, $rc",
-          (_.VT ( OpNode _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 imm:$rc)))>,
+          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
+          OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc",
+          (_.VT ( OpNode _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 imm:$rc))), 1, 1>,
           AVX512FMA3Base, EVEX_B, EVEX_RC;
 }
-} // Constraints = "$src1 = $dst"
 
 multiclass avx512_fma3p_132_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                     SDNode OpNodeRnd, AVX512VLVectorVTInfo _> {
+                                   SDNode OpNodeRnd, AVX512VLVectorVTInfo _,
+                                   string Suff> {
   let Predicates = [HasAVX512] in {
-    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512>,
-                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512>,
-                      EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
+    defm Z      : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info512, Suff>,
+                  avx512_fma3_132_round<opc, OpcodeStr, OpNodeRnd, _.info512,
+                      Suff>, EVEX_V512, EVEX_CD8<_.info512.EltSize, CD8VF>;
   }
   let Predicates = [HasVLX, HasAVX512] in {
-    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256>,
+    defm Z256 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info256, Suff>,
                       EVEX_V256, EVEX_CD8<_.info256.EltSize, CD8VF>;
-    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128>,
+    defm Z128 : avx512_fma3p_132_rm<opc, OpcodeStr, OpNode, _.info128, Suff>,
                       EVEX_V128, EVEX_CD8<_.info128.EltSize, CD8VF>;
   }
 }
 
 multiclass avx512_fma3p_132_f<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            SDNode OpNodeRnd > {
+                              SDNode OpNodeRnd > {
     defm PS : avx512_fma3p_132_common<opc, OpcodeStr#"ps", OpNode, OpNodeRnd,
-                                      avx512vl_f32_info>;
+                                      avx512vl_f32_info, "PS">;
     defm PD : avx512_fma3p_132_common<opc, OpcodeStr#"pd", OpNode, OpNodeRnd,
-                                      avx512vl_f64_info>, VEX_W;
+                                      avx512vl_f64_info, "PD">, VEX_W;
 }
 
 defm VFMADD132    : avx512_fma3p_132_f<0x98, "vfmadd132", X86Fmadd, X86FmaddRnd>;
@@ -4866,18 +5608,18 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                                                         dag RHS_r, dag RHS_m > {
   defm r_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.RC:$src3), OpcodeStr,
-          "$src3, $src2", "$src2, $src3", RHS_VEC_r>, AVX512FMA3Base;
+          "$src3, $src2", "$src2, $src3", RHS_VEC_r, 1, 1>, AVX512FMA3Base;
 
   defm m_Int: AVX512_maskable_3src_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
           (ins _.RC:$src2, _.ScalarMemOp:$src3), OpcodeStr,
-          "$src3, $src2", "$src2, $src3", RHS_VEC_m>, AVX512FMA3Base;
+          "$src3, $src2", "$src2, $src3", RHS_VEC_m, 1, 1>, AVX512FMA3Base;
 
   defm rb_Int: AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
          (ins _.RC:$src2, _.RC:$src3, AVX512RC:$rc),
-         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb>,
+         OpcodeStr, "$rc, $src3, $src2", "$src2, $src3, $rc", RHS_VEC_rb, 1, 1>,
                                        AVX512FMA3Base, EVEX_B, EVEX_RC;
 
-  let isCodeGenOnly = 1 in {
+  let isCodeGenOnly = 1, isCommutable = 1 in {
     def r     : AVX512FMA3<opc, MRMSrcReg, (outs _.FRC:$dst),
                      (ins _.FRC:$src1, _.FRC:$src2, _.FRC:$src3),
                      !strconcat(OpcodeStr,
@@ -4893,38 +5635,40 @@ multiclass avx512_fma3s_common<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 }// Constraints = "$src1 = $dst"
 
 multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
-         string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd, X86VectorVTInfo _ ,
-                                                                  string SUFF> {
-
-  defm NAME#213#SUFF: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
-                (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3, (i32 FROUND_CURRENT))),
-                (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src1,
+                            string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
+                            SDNode OpNodeRnds3, X86VectorVTInfo _ , string SUFF> {
+
+  defm NAME#213#SUFF#Z: avx512_fma3s_common<opc213, OpcodeStr#"213"#_.Suffix , _ ,
+                // Operands for intrinsic are in 123 order to preserve passthu
+                // semantics.
+                (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3, (i32 FROUND_CURRENT))),
+                (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2,
                          (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))), (i32 FROUND_CURRENT))),
-                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src1, _.RC:$src3,
+                (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src2, _.RC:$src3,
                          (i32 imm:$rc))),
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
                          _.FRC:$src3))),
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src1,
                          (_.ScalarLdFrag addr:$src3))))>;
 
-  defm NAME#231#SUFF: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
-                (_.VT (OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
-                (_.VT (OpNodeRnd _.RC:$src2,
+  defm NAME#231#SUFF#Z: avx512_fma3s_common<opc231, OpcodeStr#"231"#_.Suffix , _ ,
+                (_.VT (OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1, (i32 FROUND_CURRENT))),
+                (_.VT (OpNodeRnds3 _.RC:$src2,
                        (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
                               _.RC:$src1, (i32 FROUND_CURRENT))),
-                (_.VT ( OpNodeRnd _.RC:$src2, _.RC:$src3, _.RC:$src1,
+                (_.VT ( OpNodeRnds3 _.RC:$src2, _.RC:$src3, _.RC:$src1,
                                   (i32 imm:$rc))),
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2, _.FRC:$src3,
                                           _.FRC:$src1))),
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src2,
                             (_.ScalarLdFrag addr:$src3), _.FRC:$src1)))>;
 
-  defm NAME#132#SUFF: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
-                (_.VT (OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
-                (_.VT (OpNodeRnd _.RC:$src1,
+  defm NAME#132#SUFF#Z: avx512_fma3s_common<opc132, OpcodeStr#"132"#_.Suffix , _ ,
+                (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2, (i32 FROUND_CURRENT))),
+                (_.VT (OpNodeRnds1 _.RC:$src1,
                        (_.VT (scalar_to_vector(_.ScalarLdFrag addr:$src3))),
                               _.RC:$src2, (i32 FROUND_CURRENT))),
-                (_.VT ( OpNodeRnd _.RC:$src1, _.RC:$src3, _.RC:$src2,
+                (_.VT (OpNodeRnds1 _.RC:$src1, _.RC:$src3, _.RC:$src2,
                          (i32 imm:$rc))),
                 (set _.FRC:$dst, (_.EltVT (OpNode _.FRC:$src1, _.FRC:$src3,
                          _.FRC:$src2))),
@@ -4933,21 +5677,26 @@ multiclass avx512_fma3s_all<bits<8> opc213, bits<8> opc231, bits<8> opc132,
 }
 
 multiclass avx512_fma3s<bits<8> opc213, bits<8> opc231, bits<8> opc132,
-                             string OpcodeStr, SDNode OpNode, SDNode OpNodeRnd>{
+                        string OpcodeStr, SDNode OpNode, SDNode OpNodeRnds1,
+                        SDNode OpNodeRnds3> {
   let Predicates = [HasAVX512] in {
     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
-                                   OpNodeRnd, f32x_info, "SS">,
-                                   EVEX_CD8<32, CD8VT1>, VEX_LIG;
+                                 OpNodeRnds1, OpNodeRnds3, f32x_info, "SS">,
+                                 EVEX_CD8<32, CD8VT1>, VEX_LIG;
     defm NAME : avx512_fma3s_all<opc213, opc231, opc132, OpcodeStr, OpNode,
-                                   OpNodeRnd, f64x_info, "SD">,
-                                   EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
+                                 OpNodeRnds1, OpNodeRnds3, f64x_info, "SD">,
+                                 EVEX_CD8<64, CD8VT1>, VEX_LIG, VEX_W;
   }
 }
 
-defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnd>;
-defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnd>;
-defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd, X86FnmaddRnd>;
-defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub, X86FnmsubRnd>;
+defm VFMADD  : avx512_fma3s<0xA9, 0xB9, 0x99, "vfmadd", X86Fmadd, X86FmaddRnds1,
+                            X86FmaddRnds3>;
+defm VFMSUB  : avx512_fma3s<0xAB, 0xBB, 0x9B, "vfmsub", X86Fmsub, X86FmsubRnds1,
+                            X86FmsubRnds3>;
+defm VFNMADD : avx512_fma3s<0xAD, 0xBD, 0x9D, "vfnmadd", X86Fnmadd,
+                            X86FnmaddRnds1, X86FnmaddRnds3>;
+defm VFNMSUB : avx512_fma3s<0xAF, 0xBF, 0x9F, "vfnmsub", X86Fnmsub,
+                            X86FnmsubRnds1, X86FnmsubRnds3>;
 
 //===----------------------------------------------------------------------===//
 // AVX-512  Packed Multiply of Unsigned 52-bit Integers and Add the Low 52-bit IFMA
@@ -5067,6 +5816,11 @@ defm VCVTSI642SDZ: avx512_vcvtsi_common<0x2A, X86SintToFpRnd, GR64,
                                  v2f64x_info, i64mem, loadi64, "cvtsi2sd{q}">,
                                  XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
+def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
+def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
+
 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))),
           (VCVTSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))),
@@ -5098,6 +5852,11 @@ defm VCVTUSI642SDZ : avx512_vcvtsi_common<0x7B, X86UintToFpRnd, GR64,
                                   v2f64x_info, i64mem, loadi64, "cvtusi2sd{q}">,
                                   XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 
+def : InstAlias<"vcvtusi2ss\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTUSI2SSZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
+def : InstAlias<"vcvtusi2sd\t{$src, $src1, $dst|$dst, $src1, $src}",
+              (VCVTUSI2SDZrm FR64X:$dst, FR64X:$src1, i32mem:$src), 0>;
+
 def : Pat<(f32 (uint_to_fp (loadi32 addr:$src))),
           (VCVTUSI2SSZrm (f32 (IMPLICIT_DEF)), addr:$src)>;
 def : Pat<(f32 (uint_to_fp (loadi64 addr:$src))),
@@ -5170,106 +5929,134 @@ defm VCVTSD2USI64Z: avx512_cvt_s_int_round<0x79, f64x_info, i64x_info,
 // Therefore, the SSE intrinsics are mapped to the AVX512 instructions.
 let Predicates = [HasAVX512] in {
   def : Pat<(i32 (int_x86_sse_cvtss2si (v4f32 VR128X:$src))),
-            (VCVTSS2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+            (VCVTSS2SIZrr VR128X:$src)>;
+  def : Pat<(i32 (int_x86_sse_cvtss2si (sse_load_f32 addr:$src))),
+            (VCVTSS2SIZrm addr:$src)>;
   def : Pat<(i64 (int_x86_sse_cvtss2si64 (v4f32 VR128X:$src))),
-            (VCVTSS2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+            (VCVTSS2SI64Zrr VR128X:$src)>;
+  def : Pat<(i64 (int_x86_sse_cvtss2si64 (sse_load_f32 addr:$src))),
+            (VCVTSS2SI64Zrm addr:$src)>;
   def : Pat<(i32 (int_x86_sse2_cvtsd2si (v2f64 VR128X:$src))),
-            (VCVTSD2SIZrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+            (VCVTSD2SIZrr VR128X:$src)>;
+  def : Pat<(i32 (int_x86_sse2_cvtsd2si (sse_load_f64 addr:$src))),
+            (VCVTSD2SIZrm addr:$src)>;
   def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (v2f64 VR128X:$src))),
-            (VCVTSD2SI64Zrr (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+            (VCVTSD2SI64Zrr VR128X:$src)>;
+  def : Pat<(i64 (int_x86_sse2_cvtsd2si64 (sse_load_f64 addr:$src))),
+            (VCVTSD2SI64Zrm addr:$src)>;
 } // HasAVX512
 
-let isCodeGenOnly = 1 , Predicates = [HasAVX512] in {
-  defm Int_VCVTSI2SSZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
-            int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}",
-            SSE_CVT_Scalar, 0>, XS, EVEX_4V;
-  defm Int_VCVTSI2SS64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
-            int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}",
-            SSE_CVT_Scalar, 0>, XS, EVEX_4V, VEX_W;
-  defm Int_VCVTSI2SDZ : sse12_cvt_sint_3addr<0x2A, GR32, VR128X,
-            int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}",
-            SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-  defm Int_VCVTSI2SD64Z : sse12_cvt_sint_3addr<0x2A, GR64, VR128X,
-            int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}",
-            SSE_CVT_Scalar, 0>, XD, EVEX_4V, VEX_W;
-
-  defm Int_VCVTUSI2SDZ : sse12_cvt_sint_3addr<0x7B, GR32, VR128X,
-            int_x86_avx512_cvtusi2sd, i32mem, loadi32, "cvtusi2sd{l}",
-            SSE_CVT_Scalar, 0>, XD, EVEX_4V;
-} // isCodeGenOnly = 1, Predicates = [HasAVX512]
+let Predicates = [HasAVX512] in {
+  def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, GR32:$src2),
+            (VCVTSI2SSZrr_Int VR128X:$src1, GR32:$src2)>;
+  def : Pat<(int_x86_sse_cvtsi2ss VR128X:$src1, (loadi32 addr:$src2)),
+            (VCVTSI2SSZrm_Int VR128X:$src1, addr:$src2)>;
+  def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, GR64:$src2),
+            (VCVTSI642SSZrr_Int VR128X:$src1, GR64:$src2)>;
+  def : Pat<(int_x86_sse_cvtsi642ss VR128X:$src1, (loadi64 addr:$src2)),
+            (VCVTSI642SSZrm_Int VR128X:$src1, addr:$src2)>;
+  def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, GR32:$src2),
+            (VCVTSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
+  def : Pat<(int_x86_sse2_cvtsi2sd VR128X:$src1, (loadi32 addr:$src2)),
+            (VCVTSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
+  def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, GR64:$src2),
+            (VCVTSI642SDZrr_Int VR128X:$src1, GR64:$src2)>;
+  def : Pat<(int_x86_sse2_cvtsi642sd VR128X:$src1, (loadi64 addr:$src2)),
+            (VCVTSI642SDZrm_Int VR128X:$src1, addr:$src2)>;
+  def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, GR32:$src2),
+            (VCVTUSI2SDZrr_Int VR128X:$src1, GR32:$src2)>;
+  def : Pat<(int_x86_avx512_cvtusi2sd VR128X:$src1, (loadi32 addr:$src2)),
+            (VCVTUSI2SDZrm_Int VR128X:$src1, addr:$src2)>;
+} // Predicates = [HasAVX512]
 
 // Convert float/double to signed/unsigned int 32/64 with truncation
 multiclass avx512_cvt_s_all<bits<8> opc, string asm, X86VectorVTInfo _SrcRC,
                             X86VectorVTInfo _DstRC, SDNode OpNode,
-                            SDNode OpNodeRnd>{
+                            SDNode OpNodeRnd, string aliasStr>{
 let Predicates = [HasAVX512] in {
-  def rr : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+  def rr : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode _SrcRC.FRC:$src))]>, EVEX;
-  def rb : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
+  let hasSideEffects = 0 in
+  def rb : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.FRC:$src),
                 !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
                 []>, EVEX, EVEX_B;
-  def rm : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
+  def rm : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst), (ins _SrcRC.ScalarMemOp:$src),
               !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
               [(set _DstRC.RC:$dst, (OpNode (_SrcRC.ScalarLdFrag addr:$src)))]>,
               EVEX;
 
+  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
+          (!cast<Instruction>(NAME # "rr") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
+  def : InstAlias<asm # aliasStr # "\t\t{{sae}, $src, $dst|$dst, $src, {sae}}",
+          (!cast<Instruction>(NAME # "rb") _DstRC.RC:$dst, _SrcRC.FRC:$src), 0>;
+  def : InstAlias<asm # aliasStr # "\t{$src, $dst|$dst, $src}",
+          (!cast<Instruction>(NAME # "rm") _DstRC.RC:$dst,
+                                          _SrcRC.ScalarMemOp:$src), 0>;
+
   let isCodeGenOnly = 1 in {
-      def rr_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
-                !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-               [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
-                                     (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
-      def rb_Int : SI<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
-                !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
-                [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
-                                      (i32 FROUND_NO_EXC)))]>,
-                                      EVEX,VEX_LIG , EVEX_B;
-      let mayLoad = 1, hasSideEffects = 0 in
-        def rm_Int : SI<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
-                    (ins _SrcRC.MemOp:$src),
-                    !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
-                    []>, EVEX, VEX_LIG;
+    def rr_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+              !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+             [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
+                                   (i32 FROUND_CURRENT)))]>, EVEX, VEX_LIG;
+    def rb_Int : AVX512<opc, MRMSrcReg, (outs _DstRC.RC:$dst), (ins _SrcRC.RC:$src),
+              !strconcat(asm,"\t{{sae}, $src, $dst|$dst, $src, {sae}}"),
+              [(set _DstRC.RC:$dst, (OpNodeRnd (_SrcRC.VT _SrcRC.RC:$src),
+                                    (i32 FROUND_NO_EXC)))]>,
+                                    EVEX,VEX_LIG , EVEX_B;
+    let mayLoad = 1, hasSideEffects = 0 in
+      def rm_Int : AVX512<opc, MRMSrcMem, (outs _DstRC.RC:$dst),
+                  (ins _SrcRC.MemOp:$src),
+                  !strconcat(asm,"\t{$src, $dst|$dst, $src}"),
+                  []>, EVEX, VEX_LIG;
 
   } // isCodeGenOnly = 1
 } //HasAVX512
 }
 
 
-defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i32x_info,
-                        fp_to_sint,X86cvtts2IntRnd>,
+defm VCVTTSS2SIZ: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i32x_info,
+                        fp_to_sint, X86cvtts2IntRnd, "{l}">,
                         XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "cvttss2si", f32x_info, i64x_info,
-                        fp_to_sint,X86cvtts2IntRnd>,
+defm VCVTTSS2SI64Z: avx512_cvt_s_all<0x2C, "vcvttss2si", f32x_info, i64x_info,
+                        fp_to_sint, X86cvtts2IntRnd, "{q}">,
                         VEX_W, XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i32x_info,
-                        fp_to_sint,X86cvtts2IntRnd>,
+defm VCVTTSD2SIZ: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i32x_info,
+                        fp_to_sint, X86cvtts2IntRnd, "{l}">,
                         XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "cvttsd2si", f64x_info, i64x_info,
-                        fp_to_sint,X86cvtts2IntRnd>,
+defm VCVTTSD2SI64Z: avx512_cvt_s_all<0x2C, "vcvttsd2si", f64x_info, i64x_info,
+                        fp_to_sint, X86cvtts2IntRnd, "{q}">,
                         VEX_W, XD, EVEX_CD8<64, CD8VT1>;
 
-defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i32x_info,
-                        fp_to_uint,X86cvtts2UIntRnd>,
+defm VCVTTSS2USIZ: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i32x_info,
+                        fp_to_uint, X86cvtts2UIntRnd, "{l}">,
                         XS, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "cvttss2usi", f32x_info, i64x_info,
-                        fp_to_uint,X86cvtts2UIntRnd>,
+defm VCVTTSS2USI64Z: avx512_cvt_s_all<0x78, "vcvttss2usi", f32x_info, i64x_info,
+                        fp_to_uint, X86cvtts2UIntRnd, "{q}">,
                         XS,VEX_W, EVEX_CD8<32, CD8VT1>;
-defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i32x_info,
-                        fp_to_uint,X86cvtts2UIntRnd>,
+defm VCVTTSD2USIZ: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i32x_info,
+                        fp_to_uint, X86cvtts2UIntRnd, "{l}">,
                         XD, EVEX_CD8<64, CD8VT1>;
-defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "cvttsd2usi", f64x_info, i64x_info,
-                        fp_to_uint,X86cvtts2UIntRnd>,
+defm VCVTTSD2USI64Z: avx512_cvt_s_all<0x78, "vcvttsd2usi", f64x_info, i64x_info,
+                        fp_to_uint, X86cvtts2UIntRnd, "{q}">,
                         XD, VEX_W, EVEX_CD8<64, CD8VT1>;
 let Predicates = [HasAVX512] in {
   def : Pat<(i32 (int_x86_sse_cvttss2si (v4f32 VR128X:$src))),
-            (VCVTTSS2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+            (VCVTTSS2SIZrr_Int VR128X:$src)>;
+  def : Pat<(i32 (int_x86_sse_cvttss2si (sse_load_f32 addr:$src))),
+            (VCVTTSS2SIZrm_Int addr:$src)>;
   def : Pat<(i64 (int_x86_sse_cvttss2si64 (v4f32 VR128X:$src))),
-            (VCVTTSS2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR32X))>;
+            (VCVTTSS2SI64Zrr_Int VR128X:$src)>;
+  def : Pat<(i64 (int_x86_sse_cvttss2si64 (sse_load_f32 addr:$src))),
+            (VCVTTSS2SI64Zrm_Int addr:$src)>;
   def : Pat<(i32 (int_x86_sse2_cvttsd2si (v2f64 VR128X:$src))),
-            (VCVTTSD2SIZrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
+            (VCVTTSD2SIZrr_Int VR128X:$src)>;
+  def : Pat<(i32 (int_x86_sse2_cvttsd2si (sse_load_f64 addr:$src))),
+            (VCVTTSD2SIZrm_Int addr:$src)>;
   def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (v2f64 VR128X:$src))),
-            (VCVTTSD2SI64Zrr_Int (COPY_TO_REGCLASS VR128X:$src, FR64X))>;
-
+            (VCVTTSD2SI64Zrr_Int VR128X:$src)>;
+  def : Pat<(i64 (int_x86_sse2_cvttsd2si64 (sse_load_f64 addr:$src))),
+            (VCVTTSD2SI64Zrm_Int addr:$src)>;
 } // HasAVX512
 //===----------------------------------------------------------------------===//
 // AVX-512  Convert form float to double and back
@@ -5280,14 +6067,16 @@ multiclass avx512_cvt_fp_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
                          (ins _.RC:$src1, _Src.RC:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (OpNode (_.VT _.RC:$src1),
-                                       (_Src.VT _Src.RC:$src2)))>,
+                                       (_Src.VT _Src.RC:$src2),
+                                       (i32 FROUND_CURRENT)))>,
                          EVEX_4V, VEX_LIG, Sched<[WriteCvtF2F]>;
   defm rm : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
                          (ins _Src.RC:$src1, _Src.ScalarMemOp:$src2), OpcodeStr,
                          "$src2, $src1", "$src1, $src2",
                          (_.VT (OpNode (_.VT _.RC:$src1),
                                   (_Src.VT (scalar_to_vector
-                                            (_Src.ScalarLdFrag addr:$src2)))))>,
+                                            (_Src.ScalarLdFrag addr:$src2))),
+                                  (i32 FROUND_CURRENT)))>,
                          EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
@@ -5314,36 +6103,35 @@ multiclass avx512_cvt_fp_rc_scalar<bits<8> opc, string OpcodeStr, X86VectorVTInf
                         EVEX_4V, VEX_LIG, Sched<[WriteCvtF2FLd, ReadAfterLd]>,
                         EVEX_B, EVEX_RC;
 }
-multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_cvt_fp_scalar_sd2ss<bits<8> opc, string OpcodeStr,
                                   SDNode OpNodeRnd, X86VectorVTInfo _src,
                                                         X86VectorVTInfo _dst> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
              avx512_cvt_fp_rc_scalar<opc, OpcodeStr, _dst, _src,
-                               OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>,
-                               EVEX_V512, XD;
+                               OpNodeRnd>, VEX_W, EVEX_CD8<64, CD8VT1>, XD;
   }
 }
 
-multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_cvt_fp_scalar_ss2sd<bits<8> opc, string OpcodeStr,
                                     SDNode OpNodeRnd, X86VectorVTInfo _src,
                                                           X86VectorVTInfo _dst> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNode>,
+    defm Z : avx512_cvt_fp_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
              avx512_cvt_fp_sae_scalar<opc, OpcodeStr, _dst, _src, OpNodeRnd>,
-             EVEX_CD8<32, CD8VT1>, XS, EVEX_V512;
+             EVEX_CD8<32, CD8VT1>, XS;
   }
 }
-defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss", X86fround,
+defm VCVTSD2SS : avx512_cvt_fp_scalar_sd2ss<0x5A, "vcvtsd2ss",
                                          X86froundRnd, f64x_info, f32x_info>;
-defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd", X86fpext,
+defm VCVTSS2SD : avx512_cvt_fp_scalar_ss2sd<0x5A, "vcvtss2sd",
                                           X86fpextRnd,f32x_info, f64x_info >;
 
-def : Pat<(f64 (fextend FR32X:$src)),
+def : Pat<(f64 (fpextend FR32X:$src)),
           (COPY_TO_REGCLASS (VCVTSS2SDZrr (COPY_TO_REGCLASS FR32X:$src, VR128X),
                                (COPY_TO_REGCLASS FR32X:$src, VR128X)), VR128X)>,
           Requires<[HasAVX512]>;
-def : Pat<(f64 (fextend (loadf32 addr:$src))),
+def : Pat<(f64 (fpextend (loadf32 addr:$src))),
           (COPY_TO_REGCLASS (VCVTSS2SDZrm (v4f32 (IMPLICIT_DEF)), addr:$src), VR128X)>,
           Requires<[HasAVX512]>;
 
@@ -5356,7 +6144,7 @@ def : Pat<(f64 (extloadf32 addr:$src)),
                     (COPY_TO_REGCLASS (VMOVSSZrm addr:$src), VR128X)), VR128X)>,
           Requires<[HasAVX512, OptForSpeed]>;
 
-def : Pat<(f32 (fround FR64X:$src)),
+def : Pat<(f32 (fpround FR64X:$src)),
           (COPY_TO_REGCLASS (VCVTSD2SSZrr (COPY_TO_REGCLASS FR64X:$src, VR128X),
                     (COPY_TO_REGCLASS FR64X:$src, VR128X)), VR128X)>,
            Requires<[HasAVX512]>;
@@ -5368,14 +6156,14 @@ def : Pat<(f32 (fround FR64X:$src)),
 multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                          X86VectorVTInfo _Src, SDNode OpNode,
                          string Broadcast = _.BroadcastStr,
-                         string Alias = ""> {
+                         string Alias = "", X86MemOperand MemOp = _Src.MemOp> {
 
   defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                          (ins _Src.RC:$src), OpcodeStr, "$src", "$src",
                          (_.VT (OpNode (_Src.VT _Src.RC:$src)))>, EVEX;
 
   defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                         (ins _Src.MemOp:$src), OpcodeStr#Alias, "$src", "$src",
+                         (ins MemOp:$src), OpcodeStr#Alias, "$src", "$src",
                          (_.VT (OpNode (_Src.VT
                              (bitconvert (_Src.LdFrag addr:$src)))))>, EVEX;
 
@@ -5410,14 +6198,14 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
 // Extend Float to Double
 multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fextend>,
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, fpextend>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info,
                                 X86vfpextRnd>, EVEX_V512;
   }
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info,
-                               X86vfpext, "{1to2}">, EVEX_V128;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fextend>,
+                               X86vfpext, "{1to2}", "", f64mem>, EVEX_V128;
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend>,
                                      EVEX_V256;
   }
 }
@@ -5425,15 +6213,24 @@ multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr> {
 // Truncate Double to Float
 multiclass avx512_cvtpd2ps<bits<8> opc, string OpcodeStr> {
   let Predicates = [HasAVX512] in {
-    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fround>,
+    defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8f64_info, fpround>,
              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8f64_info,
                                X86vfproundRnd>, EVEX_V512;
   }
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2f64x_info,
                                X86vfpround, "{1to2}", "{x}">, EVEX_V128;
-    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fround,
+    defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4f64x_info, fpround,
                                "{1to4}", "{y}">, EVEX_V256;
+
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
   }
 }
 
@@ -5446,6 +6243,12 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src)),
             (VCVTPS2PDZrm addr:$src)>;
 
 let Predicates = [HasVLX] in {
+  let AddedComplexity = 15 in
+  def : Pat<(X86vzmovl (v2f64 (bitconvert
+                               (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
+            (VCVTPD2PSZ128rr VR128X:$src)>;
+  def : Pat<(v2f64 (extloadv2f32 addr:$src)),
+              (VCVTPS2PDZ128rm addr:$src)>;
   def : Pat<(v4f64 (extloadv4f32 addr:$src)),
               (VCVTPS2PDZ256rm addr:$src)>;
 }
@@ -5460,7 +6263,7 @@ multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
 
   let Predicates = [HasVLX] in {
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
-                                     OpNode128, "{1to2}">, EVEX_V128;
+                                     OpNode128, "{1to2}", "", i64mem>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode>,
                                      EVEX_V256;
   }
@@ -5515,8 +6318,8 @@ multiclass avx512_cvtps2dq<bits<8> opc, string OpcodeStr,
 }
 
 // Convert Double to Signed/Unsigned Doubleword with truncation
-multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNode, SDNode OpNodeRnd> {
+multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            SDNode OpNode128, SDNode OpNodeRnd> {
   let Predicates = [HasAVX512] in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i32x_info, v8f64_info, OpNode>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i32x_info, v8f64_info,
@@ -5524,13 +6327,22 @@ multiclass avx512_cvttpd2dq<bits<8> opc, string OpcodeStr,
   }
   let Predicates = [HasVLX] in {
     // we need "x"/"y" suffixes in order to distinguish between 128 and 256
-    // memory forms of these instructions in Asm Parcer. They have the same
+    // memory forms of these instructions in Asm Parser. They have the same
     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
     // due to the same reason.
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info, OpNode,
-                               "{1to2}", "{x}">, EVEX_V128;
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v2f64x_info,
+                               OpNode128, "{1to2}", "{x}">, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
                                "{1to4}", "{y}">, EVEX_V256;
+
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
   }
 }
 
@@ -5551,6 +6363,15 @@ multiclass avx512_cvtpd2dq<bits<8> opc, string OpcodeStr,
                                "{1to2}", "{x}">, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i32x_info, v4f64x_info, OpNode,
                                "{1to4}", "{y}">, EVEX_V256;
+
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, f128mem:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, f256mem:$src), 0>;
   }
 }
 
@@ -5614,15 +6435,15 @@ multiclass avx512_cvtps2qq<bits<8> opc, string OpcodeStr,
     // Explicitly specified broadcast string, since we take only 2 elements
     // from v4f32x_info source
     defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
-                               "{1to2}">, EVEX_V128;
+                               "{1to2}", "", f64mem>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
                                EVEX_V256;
   }
 }
 
 // Convert Float to Signed/Unsigned Quardword with truncation
-multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNode, SDNode OpNodeRnd> {
+multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                            SDNode OpNode128, SDNode OpNodeRnd> {
   let Predicates = [HasDQI] in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8i64_info, v8f32x_info, OpNode>,
              avx512_vcvt_fp_sae<opc, OpcodeStr, v8i64_info, v8f32x_info,
@@ -5631,16 +6452,16 @@ multiclass avx512_cvttps2qq<bits<8> opc, string OpcodeStr,
   let Predicates = [HasDQI, HasVLX] in {
     // Explicitly specified broadcast string, since we take only 2 elements
     // from v4f32x_info source
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode,
-                               "{1to2}">, EVEX_V128;
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2i64x_info, v4f32x_info, OpNode128,
+                               "{1to2}", "", f64mem>, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4i64x_info, v4f32x_info, OpNode>,
                                EVEX_V256;
   }
 }
 
 // Convert Signed/Unsigned Quardword to Float
-multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
-                                  SDNode OpNode, SDNode OpNodeRnd> {
+multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                                  SDNode OpNode128, SDNode OpNodeRnd> {
   let Predicates = [HasDQI] in {
     defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f32x_info, v8i64_info, OpNode>,
              avx512_vcvt_fp_rc<opc, OpcodeStr, v8f32x_info, v8i64_info,
@@ -5651,37 +6472,46 @@ multiclass avx512_cvtqq2ps<bits<8> opc, string OpcodeStr,
     // memory forms of these instructions in Asm Parcer. They have the same
     // dest type - 'v4i32x_info'. We also specify the broadcast string explicitly
     // due to the same reason.
-    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode,
+    defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v2i64x_info, OpNode128,
                                "{1to2}", "{x}">, EVEX_V128;
     defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f32x_info, v4i64x_info, OpNode,
                                "{1to4}", "{y}">, EVEX_V256;
+
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rr") VR128X:$dst, VR128X:$src), 0>;
+    def : InstAlias<OpcodeStr##"x\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z128rm") VR128X:$dst, i128mem:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rr") VR128X:$dst, VR256X:$src), 0>;
+    def : InstAlias<OpcodeStr##"y\t{$src, $dst|$dst, $src}",
+                    (!cast<Instruction>(NAME # "Z256rm") VR128X:$dst, i256mem:$src), 0>;
   }
 }
 
-defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86cvtdq2pd>, XS,
-                                EVEX_CD8<32, CD8VH>;
+defm VCVTDQ2PD : avx512_cvtdq2pd<0xE6, "vcvtdq2pd", sint_to_fp, X86VSintToFP>,
+                                XS, EVEX_CD8<32, CD8VH>;
 
 defm VCVTDQ2PS : avx512_cvtdq2ps<0x5B, "vcvtdq2ps", sint_to_fp,
                                 X86VSintToFpRnd>,
                                 PS, EVEX_CD8<32, CD8VF>;
 
 defm VCVTTPS2DQ : avx512_cvttps2dq<0x5B, "vcvttps2dq", fp_to_sint,
-                                X86VFpToSintRnd>,
+                                X86cvttp2siRnd>,
                                 XS, EVEX_CD8<32, CD8VF>;
 
-defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint,
-                                 X86VFpToSintRnd>,
+defm VCVTTPD2DQ : avx512_cvttpd2dq<0xE6, "vcvttpd2dq", fp_to_sint, X86cvttp2si,
+                                 X86cvttp2siRnd>,
                                  PD, VEX_W, EVEX_CD8<64, CD8VF>;
 
 defm VCVTTPS2UDQ : avx512_cvttps2dq<0x78, "vcvttps2udq", fp_to_uint,
-                                 X86VFpToUintRnd>, PS,
+                                 X86cvttp2uiRnd>, PS,
                                  EVEX_CD8<32, CD8VF>;
 
 defm VCVTTPD2UDQ : avx512_cvttpd2dq<0x78, "vcvttpd2udq", fp_to_uint,
-                                 X86VFpToUintRnd>, PS, VEX_W,
+                                 X86cvttp2ui, X86cvttp2uiRnd>, PS, VEX_W,
                                  EVEX_CD8<64, CD8VF>;
 
-defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86cvtudq2pd>,
+defm VCVTUDQ2PD : avx512_cvtdq2pd<0x7A, "vcvtudq2pd", uint_to_fp, X86VUintToFP>,
                                  XS, EVEX_CD8<32, CD8VH>;
 
 defm VCVTUDQ2PS : avx512_cvtdq2ps<0x7A, "vcvtudq2ps", uint_to_fp,
@@ -5717,18 +6547,18 @@ defm VCVTPS2UQQ : avx512_cvtps2qq<0x79, "vcvtps2uqq", X86cvtp2UInt,
                                  X86cvtp2UIntRnd>, PD, EVEX_CD8<32, CD8VH>;
 
 defm VCVTTPD2QQ : avx512_cvttpd2qq<0x7A, "vcvttpd2qq", fp_to_sint,
-                                 X86VFpToSintRnd>, VEX_W,
+                                 X86cvttp2siRnd>, VEX_W,
                                  PD, EVEX_CD8<64, CD8VF>;
 
-defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint,
-                                 X86VFpToSintRnd>, PD, EVEX_CD8<32, CD8VH>;
+defm VCVTTPS2QQ : avx512_cvttps2qq<0x7A, "vcvttps2qq", fp_to_sint, X86cvttp2si,
+                                 X86cvttp2siRnd>, PD, EVEX_CD8<32, CD8VH>;
 
 defm VCVTTPD2UQQ : avx512_cvttpd2qq<0x78, "vcvttpd2uqq", fp_to_uint,
-                                 X86VFpToUintRnd>, VEX_W,
+                                 X86cvttp2uiRnd>, VEX_W,
                                  PD, EVEX_CD8<64, CD8VF>;
 
-defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint,
-                                 X86VFpToUintRnd>, PD, EVEX_CD8<32, CD8VH>;
+defm VCVTTPS2UQQ : avx512_cvttps2qq<0x78, "vcvttps2uqq", fp_to_uint, X86cvttp2ui,
+                                 X86cvttp2uiRnd>, PD, EVEX_CD8<32, CD8VH>;
 
 defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
                             X86VSintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
@@ -5736,45 +6566,151 @@ defm VCVTQQ2PD : avx512_cvtqq2pd<0xE6, "vcvtqq2pd", sint_to_fp,
 defm VCVTUQQ2PD : avx512_cvtqq2pd<0x7A, "vcvtuqq2pd", uint_to_fp,
                             X86VUintToFpRnd>, VEX_W, XS, EVEX_CD8<64, CD8VF>;
 
-defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp,
+defm VCVTQQ2PS : avx512_cvtqq2ps<0x5B, "vcvtqq2ps", sint_to_fp, X86VSintToFP,
                             X86VSintToFpRnd>, VEX_W, PS, EVEX_CD8<64, CD8VF>;
 
-defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp,
+defm VCVTUQQ2PS : avx512_cvtqq2ps<0x7A, "vcvtuqq2ps", uint_to_fp, X86VUintToFP,
                             X86VUintToFpRnd>, VEX_W, XD, EVEX_CD8<64, CD8VF>;
 
 let Predicates = [HasAVX512, NoVLX] in {
 def : Pat<(v8i32 (fp_to_uint (v8f32 VR256X:$src1))),
           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
-           (v16f32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
 
 def : Pat<(v4i32 (fp_to_uint (v4f32 VR128X:$src1))),
           (EXTRACT_SUBREG (v16i32 (VCVTTPS2UDQZrr
-           (v16f32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
+           (v16f32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
 def : Pat<(v4i32 (fp_to_uint (v4f64 VR256X:$src1))),
           (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
-           (v8f64 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_xmm)>;
+           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                 VR256X:$src1, sub_ymm)))), sub_xmm)>;
+
+def : Pat<(v4i32 (X86cvttp2ui (v2f64 VR128X:$src))),
+          (EXTRACT_SUBREG (v8i32 (VCVTTPD2UDQZrr
+           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                 VR128X:$src, sub_xmm)))), sub_xmm)>;
 
 def : Pat<(v8f32 (uint_to_fp (v8i32 VR256X:$src1))),
           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
-           (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)))), sub_ymm)>;
+           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
 
 def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))),
           (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr
-           (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>;
+           (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
 
 def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))),
           (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
-           (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>;
+           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                 VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
+def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr
+           (v8i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                 VR128X:$src1, sub_xmm)))), sub_xmm)>;
+}
+
+let Predicates = [HasAVX512, HasVLX] in {
+  let AddedComplexity = 15 in {
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
+              (VCVTPD2DQZ128rr VR128X:$src)>;
+    def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))))),
+              (VCVTPD2UDQZ128rr VR128X:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
+              (VCVTTPD2DQZ128rr VR128X:$src)>;
+    def : Pat<(v4i32 (bitconvert (X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))))),
+              (VCVTTPD2UDQZ128rr VR128X:$src)>;
+  }
 }
 
 let Predicates = [HasAVX512] in {
-  def : Pat<(v8f32 (fround (loadv8f64 addr:$src))),
+  def : Pat<(v8f32 (fpround (loadv8f64 addr:$src))),
             (VCVTPD2PSZrm addr:$src)>;
   def : Pat<(v8f64 (extloadv8f32 addr:$src)),
             (VCVTPS2PDZrm addr:$src)>;
 }
 
+let Predicates = [HasDQI, HasVLX] in {
+  let AddedComplexity = 15 in {
+    def : Pat<(X86vzmovl (v2f64 (bitconvert
+                                (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
+              (VCVTQQ2PSZ128rr VR128X:$src)>;
+    def : Pat<(X86vzmovl (v2f64 (bitconvert
+                                (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
+              (VCVTUQQ2PSZ128rr VR128X:$src)>;
+  }
+}
+
+let Predicates = [HasDQI, NoVLX] in {
+def : Pat<(v2i64 (fp_to_sint (v2f64 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
+           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
+
+def : Pat<(v4i64 (fp_to_sint (v4f32 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8i64 (VCVTTPS2QQZrr
+           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
+def : Pat<(v4i64 (fp_to_sint (v4f64 VR256X:$src1))),
+          (EXTRACT_SUBREG (v8i64 (VCVTTPD2QQZrr
+           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+def : Pat<(v2i64 (fp_to_uint (v2f64 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
+           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
+
+def : Pat<(v4i64 (fp_to_uint (v4f32 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8i64 (VCVTTPS2UQQZrr
+           (v8f32 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_ymm)>;
+
+def : Pat<(v4i64 (fp_to_uint (v4f64 VR256X:$src1))),
+          (EXTRACT_SUBREG (v8i64 (VCVTTPD2UQQZrr
+           (v8f64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+def : Pat<(v4f32 (sint_to_fp (v4i64 VR256X:$src1))),
+          (EXTRACT_SUBREG (v8f32 (VCVTQQ2PSZrr
+           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
+
+def : Pat<(v2f64 (sint_to_fp (v2i64 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
+           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
+
+def : Pat<(v4f64 (sint_to_fp (v4i64 VR256X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTQQ2PDZrr
+           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
+
+def : Pat<(v4f32 (uint_to_fp (v4i64 VR256X:$src1))),
+          (EXTRACT_SUBREG (v8f32 (VCVTUQQ2PSZrr
+           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_xmm)>;
+
+def : Pat<(v2f64 (uint_to_fp (v2i64 VR128X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
+           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR128X:$src1, sub_xmm)))), sub_xmm)>;
+
+def : Pat<(v4f64 (uint_to_fp (v4i64 VR256X:$src1))),
+          (EXTRACT_SUBREG (v8f64 (VCVTUQQ2PDZrr
+           (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
+                                  VR256X:$src1, sub_ymm)))), sub_ymm)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Half precision conversion instructions
 //===----------------------------------------------------------------------===//
@@ -5816,14 +6752,13 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
                    (ins _src.RC:$src1, i32u8imm:$src2),
                    "vcvtps2ph", "$src2, $src1", "$src1, $src2",
                    (X86cvtps2ph (_src.VT _src.RC:$src1),
-                                (i32 imm:$src2),
-                                (i32 FROUND_CURRENT)),
-                   NoItinerary, 0, X86select>, AVX512AIi8Base;
+                                (i32 imm:$src2)),
+                   NoItinerary, 0, 0, X86select>, AVX512AIi8Base;
   def mr : AVX512AIi8<0x1D, MRMDestMem, (outs),
              (ins x86memop:$dst, _src.RC:$src1, i32u8imm:$src2),
              "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}",
              [(store (_dest.VT (X86cvtps2ph (_src.VT _src.RC:$src1),
-                                     (i32 imm:$src2), (i32 FROUND_CURRENT) )),
+                                     (i32 imm:$src2))),
                                      addr:$dst)]>;
   let hasSideEffects = 0, mayStore = 1 in
   def mrk : AVX512AIi8<0x1D, MRMDestMem, (outs),
@@ -5832,13 +6767,12 @@ multiclass avx512_cvtps2ph<X86VectorVTInfo _dest, X86VectorVTInfo _src,
               []>, EVEX_K;
 }
 multiclass avx512_cvtps2ph_sae<X86VectorVTInfo _dest, X86VectorVTInfo _src> {
-  defm rb : AVX512_maskable<0x1D, MRMDestReg, _dest ,(outs _dest.RC:$dst),
+  let hasSideEffects = 0 in
+  defm rb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest,
+                   (outs _dest.RC:$dst),
                    (ins _src.RC:$src1, i32u8imm:$src2),
                    "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2",
-                   (X86cvtps2ph (_src.VT _src.RC:$src1),
-                                (i32 imm:$src2),
-                                (i32 FROUND_NO_EXC)),
-                   NoItinerary, 0, X86select>, EVEX_B, AVX512AIi8Base;
+                   []>, EVEX_B, AVX512AIi8Base;
 }
 let Predicates = [HasAVX512] in {
   defm VCVTPS2PHZ : avx512_cvtps2ph<v16i16x_info, v16f32_info, f256mem>,
@@ -5852,25 +6786,72 @@ let Predicates = [HasAVX512] in {
   }
 }
 
+// Patterns for matching conversions from float to half-float and vice versa.
+let Predicates = [HasVLX] in {
+  // Use MXCSR.RC for rounding instead of explicitly specifying the default
+  // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
+  // configurations we support (the default). However, falling back to MXCSR is
+  // more consistent with other instructions, which are always controlled by it.
+  // It's encoded as 0b100.
+  def : Pat<(fp_to_f16 FR32X:$src),
+            (i16 (EXTRACT_SUBREG (VMOVPDI2DIZrr (VCVTPS2PHZ128rr
+              (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), sub_16bit))>;
+
+  def : Pat<(f16_to_fp GR16:$src),
+            (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
+              (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)), FR32X)) >;
+
+  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
+            (f32 (COPY_TO_REGCLASS (VCVTPH2PSZ128rr
+              (VCVTPS2PHZ128rr (COPY_TO_REGCLASS FR32X:$src, VR128X), 4)), FR32X)) >;
+}
+
+// Patterns for matching float to half-float conversion when AVX512 is supported
+// but F16C isn't. In that case we have to use 512-bit vectors.
+let Predicates = [HasAVX512, NoVLX, NoF16C] in {
+  def : Pat<(fp_to_f16 FR32X:$src),
+            (i16 (EXTRACT_SUBREG
+                  (VMOVPDI2DIZrr
+                   (v8i16 (EXTRACT_SUBREG
+                    (VCVTPS2PHZrr
+                     (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
+                      (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
+                      sub_xmm), 4), sub_xmm))), sub_16bit))>;
+
+  def : Pat<(f16_to_fp GR16:$src),
+            (f32 (COPY_TO_REGCLASS
+                  (v4f32 (EXTRACT_SUBREG
+                   (VCVTPH2PSZrr
+                    (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)),
+                     (v8i16 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128X)),
+                     sub_xmm)), sub_xmm)), FR32X))>;
+
+  def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32X:$src))),
+            (f32 (COPY_TO_REGCLASS
+                  (v4f32 (EXTRACT_SUBREG
+                          (VCVTPH2PSZrr
+                           (VCVTPS2PHZrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)),
+                            (v4f32 (COPY_TO_REGCLASS FR32X:$src, VR128X)),
+                            sub_xmm), 4)), sub_xmm)), FR32X))>;
+}
+
 //  Unordered/Ordered scalar fp compare with Sea and set EFLAGS
-multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _, SDNode OpNode,
+multiclass avx512_ord_cmp_sae<bits<8> opc, X86VectorVTInfo _,
                             string OpcodeStr> {
   def rb: AVX512<opc, MRMSrcReg, (outs), (ins _.RC:$src1, _.RC:$src2),
                  !strconcat(OpcodeStr, "\t{{sae}, $src2, $src1|$src1, $src2, {sae}}"),
-                 [(set EFLAGS, (OpNode (_.VT _.RC:$src1), _.RC:$src2,
-                                                        (i32 FROUND_NO_EXC)))],
-                 IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
+                 [], IIC_SSE_COMIS_RR>, EVEX, EVEX_B, VEX_LIG, EVEX_V128,
                  Sched<[WriteFAdd]>;
 }
 
 let Defs = [EFLAGS], Predicates = [HasAVX512] in {
-  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, X86ucomiSae, "vucomiss">,
+  defm VUCOMISSZ : avx512_ord_cmp_sae<0x2E, v4f32x_info, "vucomiss">,
                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
-  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, X86ucomiSae, "vucomisd">,
+  defm VUCOMISDZ : avx512_ord_cmp_sae<0x2E, v2f64x_info, "vucomisd">,
                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
-  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, X86comiSae, "vcomiss">,
+  defm VCOMISSZ : avx512_ord_cmp_sae<0x2F, v4f32x_info, "vcomiss">,
                                    AVX512PSIi8Base, EVEX_CD8<32, CD8VT1>;
-  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, X86comiSae, "vcomisd">,
+  defm VCOMISDZ : avx512_ord_cmp_sae<0x2F, v2f64x_info, "vcomisd">,
                                    AVX512PDIi8Base, VEX_W, EVEX_CD8<64, CD8VT1>;
 }
 
@@ -6275,7 +7256,7 @@ defm VRNDSCALESD : avx512_rndscale_scalar<0x0B, "vrndscalesd", f64x_info>, VEX_W
 multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               X86VectorVTInfo SrcInfo, X86VectorVTInfo DestInfo,
                               X86MemOperand x86memop> {
-
+  let ExeDomain = DestInfo.ExeDomain in
   defm rr  : AVX512_maskable<opc, MRMDestReg, DestInfo, (outs DestInfo.RC:$dst),
                       (ins SrcInfo.RC:$src1), OpcodeStr ,"$src1", "$src1",
                       (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src1)))>,
@@ -6301,7 +7282,8 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                       DestInfo.KRCWM:$mask ,
                                       SrcInfo.RC:$src1)>;
 
-  let mayStore = 1, mayLoad = 1, hasSideEffects = 0 in {
+  let mayStore = 1, mayLoad = 1, hasSideEffects = 0,
+      ExeDomain = DestInfo.ExeDomain in {
     def mr : AVX512XS8I<opc, MRMDestMem, (outs),
                (ins x86memop:$dst, SrcInfo.RC:$src),
                OpcodeStr # "\t{$src, $dst|$dst, $src}",
@@ -6328,23 +7310,6 @@ multiclass avx512_trunc_mr_lowering<X86VectorVTInfo SrcInfo,
                             addr:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src)>;
 }
 
-multiclass avx512_trunc_sat_mr_lowering<X86VectorVTInfo SrcInfo,
-                                        X86VectorVTInfo DestInfo, string sat > {
-
-  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
-                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
-                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), SrcInfo.MRC:$mask),
-           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mrk) addr:$ptr,
-                    (COPY_TO_REGCLASS SrcInfo.MRC:$mask, SrcInfo.KRCWM),
-                    (SrcInfo.VT SrcInfo.RC:$src))>;
-
-  def: Pat<(!cast<Intrinsic>("int_x86_avx512_mask_pmov"#sat#"_"#SrcInfo.Suffix#
-                               DestInfo.Suffix#"_mem_"#SrcInfo.Size)
-                  addr:$ptr, (SrcInfo.VT SrcInfo.RC:$src), -1),
-           (!cast<Instruction>(NAME#SrcInfo.ZSuffix##mr) addr:$ptr,
-                    (SrcInfo.VT SrcInfo.RC:$src))>;
-}
-
 multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
          AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
          X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
@@ -6370,140 +7335,111 @@ multiclass avx512_trunc<bits<8> opc, string OpcodeStr, SDNode OpNode,
                              truncFrag, mtruncFrag>, EVEX_V512;
 }
 
-multiclass avx512_trunc_sat<bits<8> opc, string OpcodeStr, SDNode OpNode,
-         AVX512VLVectorVTInfo VTSrcInfo, X86VectorVTInfo DestInfoZ128,
-         X86VectorVTInfo DestInfoZ256, X86VectorVTInfo DestInfoZ,
-         X86MemOperand x86memopZ128, X86MemOperand x86memopZ256,
-         X86MemOperand x86memopZ, string sat, Predicate prd = HasAVX512>{
-
-  let Predicates = [HasVLX, prd] in {
-    defm Z128:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info128,
-                             DestInfoZ128, x86memopZ128>,
-                avx512_trunc_sat_mr_lowering<VTSrcInfo.info128, DestInfoZ128,
-                             sat>, EVEX_V128;
-
-    defm Z256:  avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info256,
-                             DestInfoZ256, x86memopZ256>,
-                avx512_trunc_sat_mr_lowering<VTSrcInfo.info256, DestInfoZ256,
-                             sat>, EVEX_V256;
-  }
-  let Predicates = [prd] in
-    defm Z:     avx512_trunc_common<opc, OpcodeStr, OpNode, VTSrcInfo.info512,
-                             DestInfoZ, x86memopZ>,
-                avx512_trunc_sat_mr_lowering<VTSrcInfo.info512, DestInfoZ,
-                             sat>, EVEX_V512;
-}
-
-multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
                v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
-               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VO>;
-}
-multiclass avx512_trunc_sat_qb<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qb", OpNode, avx512vl_i64_info,
-               v16i8x_info, v16i8x_info, v16i8x_info, i16mem, i32mem, i64mem,
-               sat>, EVEX_CD8<8, CD8VO>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VO>;
 }
 
-multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
                v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
-               truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VQ>;
-}
-multiclass avx512_trunc_sat_qw<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qw", OpNode, avx512vl_i64_info,
-               v8i16x_info, v8i16x_info, v8i16x_info, i32mem, i64mem, i128mem,
-               sat>, EVEX_CD8<16, CD8VQ>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VQ>;
 }
 
-multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_qd<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i64_info,
                v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
-               truncstorevi32, masked_truncstorevi32>, EVEX_CD8<32, CD8VH>;
-}
-multiclass avx512_trunc_sat_qd<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"qd", OpNode, avx512vl_i64_info,
-               v4i32x_info, v4i32x_info, v8i32x_info, i64mem, i128mem, i256mem,
-               sat>, EVEX_CD8<32, CD8VH>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<32, CD8VH>;
 }
 
-multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_db<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
                v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
-               truncstorevi8, masked_truncstorevi8>, EVEX_CD8<8, CD8VQ>;
-}
-multiclass avx512_trunc_sat_db<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"db", OpNode, avx512vl_i32_info,
-               v16i8x_info, v16i8x_info, v16i8x_info, i32mem, i64mem, i128mem,
-               sat>, EVEX_CD8<8, CD8VQ>;
+               StoreNode, MaskedStoreNode>, EVEX_CD8<8, CD8VQ>;
 }
 
-multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_dw<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i32_info,
               v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
-              truncstorevi16, masked_truncstorevi16>, EVEX_CD8<16, CD8VH>;
-}
-multiclass avx512_trunc_sat_dw<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"dw", OpNode, avx512vl_i32_info,
-              v8i16x_info, v8i16x_info, v16i16x_info, i64mem, i128mem, i256mem,
-              sat>, EVEX_CD8<16, CD8VH>;
+              StoreNode, MaskedStoreNode>, EVEX_CD8<16, CD8VH>;
 }
 
-multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode> {
+multiclass avx512_trunc_wb<bits<8> opc, string OpcodeStr, SDNode OpNode,
+                           PatFrag StoreNode, PatFrag MaskedStoreNode> {
   defm NAME: avx512_trunc<opc, OpcodeStr, OpNode, avx512vl_i16_info,
               v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
-              truncstorevi8, masked_truncstorevi8,HasBWI>, EVEX_CD8<16, CD8VH>;
-}
-multiclass avx512_trunc_sat_wb<bits<8> opc, string sat, SDNode OpNode> {
-  defm NAME: avx512_trunc_sat<opc, "vpmov"##sat##"wb", OpNode, avx512vl_i16_info,
-              v16i8x_info, v16i8x_info, v32i8x_info, i64mem, i128mem, i256mem,
-              sat, HasBWI>, EVEX_CD8<16, CD8VH>;
-}
-
-defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb", X86vtrunc>;
-defm VPMOVSQB   : avx512_trunc_sat_qb<0x22, "s",   X86vtruncs>;
-defm VPMOVUSQB  : avx512_trunc_sat_qb<0x12, "us",  X86vtruncus>;
-
-defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw", X86vtrunc>;
-defm VPMOVSQW   : avx512_trunc_sat_qw<0x24, "s",   X86vtruncs>;
-defm VPMOVUSQW  : avx512_trunc_sat_qw<0x14, "us",  X86vtruncus>;
-
-defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd", X86vtrunc>;
-defm VPMOVSQD   : avx512_trunc_sat_qd<0x25, "s",   X86vtruncs>;
-defm VPMOVUSQD  : avx512_trunc_sat_qd<0x15, "us",  X86vtruncus>;
-
-defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc>;
-defm VPMOVSDB   : avx512_trunc_sat_db<0x21, "s",   X86vtruncs>;
-defm VPMOVUSDB  : avx512_trunc_sat_db<0x11, "us",  X86vtruncus>;
-
-defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc>;
-defm VPMOVSDW   : avx512_trunc_sat_dw<0x23, "s",   X86vtruncs>;
-defm VPMOVUSDW  : avx512_trunc_sat_dw<0x13, "us",  X86vtruncus>;
-
-defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc>;
-defm VPMOVSWB   : avx512_trunc_sat_wb<0x20, "s",   X86vtruncs>;
-defm VPMOVUSWB  : avx512_trunc_sat_wb<0x10, "us",  X86vtruncus>;
+              StoreNode, MaskedStoreNode, HasBWI>, EVEX_CD8<16, CD8VH>;
+}
+
+defm VPMOVQB    : avx512_trunc_qb<0x32, "vpmovqb",   X86vtrunc,
+                                  truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSQB   : avx512_trunc_qb<0x22, "vpmovsqb",  X86vtruncs,
+                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSQB  : avx512_trunc_qb<0x12, "vpmovusqb", X86vtruncus,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
+
+defm VPMOVQW    : avx512_trunc_qw<0x34, "vpmovqw",   X86vtrunc,
+                                  truncstorevi16, masked_truncstorevi16>;
+defm VPMOVSQW   : avx512_trunc_qw<0x24, "vpmovsqw",  X86vtruncs,
+                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
+defm VPMOVUSQW  : avx512_trunc_qw<0x14, "vpmovusqw", X86vtruncus,
+                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
+
+defm VPMOVQD    : avx512_trunc_qd<0x35, "vpmovqd",   X86vtrunc,
+                                  truncstorevi32, masked_truncstorevi32>;
+defm VPMOVSQD   : avx512_trunc_qd<0x25, "vpmovsqd",  X86vtruncs,
+                                  truncstore_s_vi32, masked_truncstore_s_vi32>;
+defm VPMOVUSQD  : avx512_trunc_qd<0x15, "vpmovusqd", X86vtruncus,
+                                  truncstore_us_vi32, masked_truncstore_us_vi32>;
+
+defm VPMOVDB    : avx512_trunc_db<0x31, "vpmovdb", X86vtrunc,
+                                  truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSDB   : avx512_trunc_db<0x21, "vpmovsdb",   X86vtruncs,
+                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSDB  : avx512_trunc_db<0x11, "vpmovusdb",  X86vtruncus,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
+
+defm VPMOVDW    : avx512_trunc_dw<0x33, "vpmovdw", X86vtrunc,
+                                  truncstorevi16, masked_truncstorevi16>;
+defm VPMOVSDW   : avx512_trunc_dw<0x23, "vpmovsdw",   X86vtruncs,
+                                  truncstore_s_vi16, masked_truncstore_s_vi16>;
+defm VPMOVUSDW  : avx512_trunc_dw<0x13, "vpmovusdw",  X86vtruncus,
+                                  truncstore_us_vi16, masked_truncstore_us_vi16>;
+
+defm VPMOVWB    : avx512_trunc_wb<0x30, "vpmovwb", X86vtrunc,
+                                  truncstorevi8, masked_truncstorevi8>;
+defm VPMOVSWB   : avx512_trunc_wb<0x20, "vpmovswb",   X86vtruncs,
+                                  truncstore_s_vi8, masked_truncstore_s_vi8>;
+defm VPMOVUSWB  : avx512_trunc_wb<0x10, "vpmovuswb",  X86vtruncus,
+                                  truncstore_us_vi8, masked_truncstore_us_vi8>;
 
 let Predicates = [HasAVX512, NoVLX] in {
 def: Pat<(v8i16 (X86vtrunc (v8i32 VR256X:$src))),
          (v8i16 (EXTRACT_SUBREG
-                 (v16i16 (VPMOVDWZrr (v16i32 (SUBREG_TO_REG (i32 0),
+                 (v16i16 (VPMOVDWZrr (v16i32 (INSERT_SUBREG (IMPLICIT_DEF),
                                           VR256X:$src, sub_ymm)))), sub_xmm))>;
 def: Pat<(v4i32 (X86vtrunc (v4i64 VR256X:$src))),
          (v4i32 (EXTRACT_SUBREG
-                 (v8i32 (VPMOVQDZrr (v8i64 (SUBREG_TO_REG (i32 0),
+                 (v8i32 (VPMOVQDZrr (v8i64 (INSERT_SUBREG (IMPLICIT_DEF),
                                            VR256X:$src, sub_ymm)))), sub_xmm))>;
 }
 
 let Predicates = [HasBWI, NoVLX] in {
 def: Pat<(v16i8 (X86vtrunc (v16i16 VR256X:$src))),
-         (v16i8 (EXTRACT_SUBREG  (VPMOVWBZrr (v32i16 (SUBREG_TO_REG (i32 0),
+         (v16i8 (EXTRACT_SUBREG (VPMOVWBZrr (v32i16 (INSERT_SUBREG (IMPLICIT_DEF),
                                             VR256X:$src, sub_ymm))), sub_xmm))>;
 }
 
 multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
               X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo,
               X86MemOperand x86memop, PatFrag LdFrag, SDPatternOperator OpNode>{
+  let ExeDomain = DestInfo.ExeDomain in {
   defm rr   : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
                     (ins SrcInfo.RC:$src), OpcodeStr ,"$src", "$src",
                     (DestInfo.VT (OpNode (SrcInfo.VT SrcInfo.RC:$src)))>,
@@ -6513,6 +7449,7 @@ multiclass avx512_extend_common<bits<8> opc, string OpcodeStr,
                   (ins x86memop:$src), OpcodeStr ,"$src", "$src",
                   (DestInfo.VT (LdFrag addr:$src))>,
                 EVEX;
+  }
 }
 
 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr,
@@ -6685,6 +7622,150 @@ let Predicates = [HasAVX512] in {
   defm : avx512_ext_lowering<"DQZ",    v8i64_info,   v8i32x_info,  extloadvi32>;
 }
 
+multiclass AVX512_pmovx_patterns<string OpcPrefix, string ExtTy,
+                                 SDNode ExtOp, PatFrag ExtLoad16> {
+  // 128-bit patterns
+  let Predicates = [HasVLX, HasBWI] in {
+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+  def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+  def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+  def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZ128rm) addr:$src)>;
+  }
+  let Predicates = [HasVLX] in {
+  def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZ128rm) addr:$src)>;
+
+  def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZ128rm) addr:$src)>;
+
+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+  def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZ128rm) addr:$src)>;
+
+  def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZ128rm) addr:$src)>;
+
+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+  def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZ128rm) addr:$src)>;
+  }
+  // 256-bit patterns
+  let Predicates = [HasVLX, HasBWI] in {
+  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+  def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+  def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZ256rm) addr:$src)>;
+  }
+  let Predicates = [HasVLX] in {
+  def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+  def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+  def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+  def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZ256rm) addr:$src)>;
+
+  def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZ256rm) addr:$src)>;
+
+  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+  def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+  def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZ256rm) addr:$src)>;
+
+  def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZ256rm) addr:$src)>;
+
+  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+  def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZ256rm) addr:$src)>;
+  }
+  // 512-bit patterns
+  let Predicates = [HasBWI] in {
+  def : Pat<(v32i16 (ExtOp (bc_v32i8 (loadv4i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BWZrm) addr:$src)>;
+  }
+  let Predicates = [HasAVX512] in {
+  def : Pat<(v16i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BDZrm) addr:$src)>;
+
+  def : Pat<(v8i64 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+  def : Pat<(v8i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#BQZrm) addr:$src)>;
+
+  def : Pat<(v16i32 (ExtOp (bc_v16i16 (loadv4i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WDZrm) addr:$src)>;
+
+  def : Pat<(v8i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#WQZrm) addr:$src)>;
+
+  def : Pat<(v8i64 (ExtOp (bc_v8i32 (loadv4i64 addr:$src)))),
+            (!cast<I>(OpcPrefix#DQZrm) addr:$src)>;
+  }
+}
+
+defm : AVX512_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>;
+defm : AVX512_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>;
+
 //===----------------------------------------------------------------------===//
 // GATHER - SCATTER Operations
 
@@ -6859,8 +7940,14 @@ defm VSCATTERPF1QPD: avx512_gather_scatter_prefetch<0xC7, MRM6m, "vscatterpf1qpd
                      VK8WM, vz512mem>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>;
 
 // Helper fragments to match sext vXi1 to vXiY.
-def v16i1sextv16i32  : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
-def v8i1sextv8i64  : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
+def v64i1sextv64i8 : PatLeaf<(v64i8
+                              (X86vsext
+                               (v64i1 (X86pcmpgtm
+                                (bc_v64i8 (v16i32 immAllZerosV)),
+                                VR512:$src))))>;
+def v32i1sextv32i16 : PatLeaf<(v32i16 (X86vsrai VR512:$src, (i8 15)))>;
+def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>;
+def v8i1sextv8i64   : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>;
 
 multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > {
 def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src),
@@ -6941,7 +8028,7 @@ defm VPMOVQ2M : avx512_convert_vector_to_mask<0x39, "vpmovq2m",
 // AVX-512 - COMPRESS and EXPAND
 //
 
-multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
+multiclass compress_by_vec_width_common<bits<8> opc, X86VectorVTInfo _,
                                  string OpcodeStr> {
   defm rr : AVX512_maskable<opc, MRMDestReg, _, (outs _.RC:$dst),
               (ins _.RC:$src1), OpcodeStr, "$src1", "$src1",
@@ -6956,19 +8043,28 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
   def mrk : AVX5128I<opc, MRMDestMem, (outs),
               (ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
               OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
-              [(store (_.VT (vselect _.KRCWM:$mask,
-                             (_.VT (X86compress  _.RC:$src)), _.ImmAllZerosV)),
-                addr:$dst)]>,
+              []>,
               EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
+multiclass compress_by_vec_width_lowering<X86VectorVTInfo _ > {
+
+  def : Pat<(X86mCompressingStore addr:$dst, _.KRCWM:$mask,
+                                               (_.VT _.RC:$src)),
+            (!cast<Instruction>(NAME#_.ZSuffix##mrk)
+                            addr:$dst, _.KRCWM:$mask, _.RC:$src)>;
+}
+
 multiclass compress_by_elt_width<bits<8> opc, string OpcodeStr,
                                  AVX512VLVectorVTInfo VTInfo> {
-  defm Z : compress_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+  defm Z : compress_by_vec_width_common<opc, VTInfo.info512, OpcodeStr>,
+           compress_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
 
   let Predicates = [HasVLX] in {
-    defm Z256 : compress_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
-    defm Z128 : compress_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+    defm Z256 : compress_by_vec_width_common<opc, VTInfo.info256, OpcodeStr>,
+                compress_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
+    defm Z128 : compress_by_vec_width_common<opc, VTInfo.info128, OpcodeStr>,
+                compress_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
   }
 }
 
@@ -6995,13 +8091,28 @@ multiclass expand_by_vec_width<bits<8> opc, X86VectorVTInfo _,
             AVX5128IBase, EVEX_CD8<_.EltSize, CD8VT1>;
 }
 
+multiclass expand_by_vec_width_lowering<X86VectorVTInfo _ > {
+
+  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask, undef)),
+            (!cast<Instruction>(NAME#_.ZSuffix##rmkz)
+                                        _.KRCWM:$mask, addr:$src)>;
+
+  def : Pat<(_.VT (X86mExpandingLoad addr:$src, _.KRCWM:$mask,
+                                               (_.VT _.RC:$src0))),
+            (!cast<Instruction>(NAME#_.ZSuffix##rmk)
+                            _.RC:$src0, _.KRCWM:$mask, addr:$src)>;
+}
+
 multiclass expand_by_elt_width<bits<8> opc, string OpcodeStr,
                                  AVX512VLVectorVTInfo VTInfo> {
-  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>, EVEX_V512;
+  defm Z : expand_by_vec_width<opc, VTInfo.info512, OpcodeStr>,
+           expand_by_vec_width_lowering<VTInfo.info512>, EVEX_V512;
 
   let Predicates = [HasVLX] in {
-    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>, EVEX_V256;
-    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>, EVEX_V128;
+    defm Z256 : expand_by_vec_width<opc, VTInfo.info256, OpcodeStr>,
+                expand_by_vec_width_lowering<VTInfo.info256>, EVEX_V256;
+    defm Z128 : expand_by_vec_width<opc, VTInfo.info128, OpcodeStr>,
+                expand_by_vec_width_lowering<VTInfo.info128>, EVEX_V128;
   }
 }
 
@@ -7019,7 +8130,8 @@ defm VEXPANDPD : expand_by_elt_width <0x88, "vexpandpd", avx512vl_f64_info>,
 //                               op(broadcast(eltVt),imm)
 //all instruction created with FROUND_CURRENT
 multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _>{
+                                      X86VectorVTInfo _>{
+  let ExeDomain = _.ExeDomain in {
   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix, "$src2, $src1", "$src1, $src2",
@@ -7039,11 +8151,13 @@ multiclass avx512_unary_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNo
                     (OpNode (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src1))),
                             (i32 imm:$src2),
                             (i32 FROUND_CURRENT))>, EVEX_B;
+  }
 }
 
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
 multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _>{
+  let ExeDomain = _.ExeDomain in
   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, i32u8imm:$src2),
                       OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
@@ -7073,7 +8187,8 @@ multiclass avx512_common_unary_fp_sae_packed_imm<string OpcodeStr,
 //                               op(reg_vec2,broadcast(eltVt),imm)
 //all instruction created with FROUND_CURRENT
 multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _>{
+                                X86VectorVTInfo _>{
+  let ExeDomain = _.ExeDomain in {
   defm rri : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -7096,13 +8211,14 @@ multiclass avx512_fp_packed_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src2))),
                             (i32 imm:$src3),
                             (i32 FROUND_CURRENT))>, EVEX_B;
+  }
 }
 
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
 //                               op(reg_vec2,mem_vec,imm)
 multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
                              X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo>{
-
+  let ExeDomain = DestInfo.ExeDomain in {
   defm rri : AVX512_maskable<opc, MRMSrcReg, DestInfo, (outs DestInfo.RC:$dst),
                   (ins SrcInfo.RC:$src1, SrcInfo.RC:$src2, u8imm:$src3),
                   OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -7116,6 +8232,7 @@ multiclass avx512_3Op_rm_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
                              (SrcInfo.VT (bitconvert
                                                 (SrcInfo.LdFrag addr:$src2))),
                              (i8 imm:$src3)))>;
+  }
 }
 
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm)
@@ -7125,6 +8242,7 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
                            X86VectorVTInfo _>:
   avx512_3Op_rm_imm8<opc, OpcodeStr, OpNode, _, _>{
 
+  let ExeDomain = _.ExeDomain in
   defm rmbi : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
                     OpcodeStr, "$src3, ${src2}"##_.BroadcastStr##", $src1",
@@ -7138,8 +8256,8 @@ multiclass avx512_3Op_imm8<bits<8> opc, string OpcodeStr, SDNode OpNode,
 //                                      op(reg_vec2,mem_scalar,imm)
 //all instruction created with FROUND_CURRENT
 multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                           X86VectorVTInfo _> {
-
+                                X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in {
   defm rri : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
@@ -7148,25 +8266,20 @@ multiclass avx512_fp_scalar_imm<bits<8> opc, string OpcodeStr, SDNode OpNode,
                               (i32 imm:$src3),
                               (i32 FROUND_CURRENT))>;
   defm rmi : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
-                    (ins _.RC:$src1, _.MemOp:$src2, i32u8imm:$src3),
+                    (ins _.RC:$src1, _.ScalarMemOp:$src2, i32u8imm:$src3),
                     OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
                     (OpNode (_.VT _.RC:$src1),
                             (_.VT (scalar_to_vector
                                       (_.ScalarLdFrag addr:$src2))),
                             (i32 imm:$src3),
                             (i32 FROUND_CURRENT))>;
-
-  let isAsmParserOnly = 1, mayLoad = 1, hasSideEffects = 0 in {
-    defm rmi_alt :AVX512_maskable_in_asm<opc, MRMSrcMem, _, (outs _.FRC:$dst),
-                    (ins _.FRC:$src1, _.ScalarMemOp:$src2, u8imm:$src3),
-                    OpcodeStr, "$src3, $src2, $src1", "$src1, $src2, $src3",
-                    []>;
   }
 }
 
 //handle instruction  reg_vec1 = op(reg_vec2,reg_vec3,imm),{sae}
 multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
                                              SDNode OpNode, X86VectorVTInfo _>{
+  let ExeDomain = _.ExeDomain in
   defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
                       OpcodeStr, "$src3, {sae}, $src2, $src1",
@@ -7439,14 +8552,64 @@ multiclass avx512_unary_rm_vl_all<bits<8> opc_b, bits<8> opc_w,
 
 defm VPABS : avx512_unary_rm_vl_all<0x1C, 0x1D, 0x1E, 0x1F, "vpabs", X86Abs>;
 
+def avx512_v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)),
+                                                      VR128X:$src))>;
+def avx512_v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128X:$src, (i8 15)))>;
+def avx512_v4i1sextv4i32  : PatLeaf<(v4i32 (X86vsrai VR128X:$src, (i8 31)))>;
+def avx512_v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)),
+                                                      VR256X:$src))>;
+def avx512_v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256X:$src, (i8 15)))>;
+def avx512_v8i1sextv8i32  : PatLeaf<(v8i32 (X86vsrai VR256X:$src, (i8 31)))>;
+
+let Predicates = [HasBWI, HasVLX] in {
+  def : Pat<(xor
+            (bc_v2i64 (avx512_v16i1sextv16i8)),
+            (bc_v2i64 (add (v16i8 VR128X:$src), (avx512_v16i1sextv16i8)))),
+            (VPABSBZ128rr VR128X:$src)>;
+  def : Pat<(xor
+            (bc_v2i64 (avx512_v8i1sextv8i16)),
+            (bc_v2i64 (add (v8i16 VR128X:$src), (avx512_v8i1sextv8i16)))),
+            (VPABSWZ128rr VR128X:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (avx512_v32i1sextv32i8)),
+            (bc_v4i64 (add (v32i8 VR256X:$src), (avx512_v32i1sextv32i8)))),
+            (VPABSBZ256rr VR256X:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (avx512_v16i1sextv16i16)),
+            (bc_v4i64 (add (v16i16 VR256X:$src), (avx512_v16i1sextv16i16)))),
+            (VPABSWZ256rr VR256X:$src)>;
+}
+let Predicates = [HasAVX512, HasVLX] in {
+  def : Pat<(xor
+            (bc_v2i64 (avx512_v4i1sextv4i32)),
+            (bc_v2i64 (add (v4i32 VR128X:$src), (avx512_v4i1sextv4i32)))),
+            (VPABSDZ128rr VR128X:$src)>;
+  def : Pat<(xor
+            (bc_v4i64 (avx512_v8i1sextv8i32)),
+            (bc_v4i64 (add (v8i32 VR256X:$src), (avx512_v8i1sextv8i32)))),
+            (VPABSDZ256rr VR256X:$src)>;
+}
+
+let Predicates = [HasAVX512] in {
 def : Pat<(xor
-          (bc_v16i32 (v16i1sextv16i32)),
-          (bc_v16i32 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
+          (bc_v8i64 (v16i1sextv16i32)),
+          (bc_v8i64 (add (v16i32 VR512:$src), (v16i1sextv16i32)))),
           (VPABSDZrr VR512:$src)>;
 def : Pat<(xor
           (bc_v8i64 (v8i1sextv8i64)),
           (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))),
           (VPABSQZrr VR512:$src)>;
+}
+let Predicates = [HasBWI] in {
+def : Pat<(xor
+          (bc_v8i64 (v64i1sextv64i8)),
+          (bc_v8i64 (add (v64i8 VR512:$src), (v64i1sextv64i8)))),
+          (VPABSBZrr VR512:$src)>;
+def : Pat<(xor
+          (bc_v8i64 (v32i1sextv32i16)),
+          (bc_v8i64 (add (v32i16 VR512:$src), (v32i1sextv32i16)))),
+          (VPABSWZrr VR512:$src)>;
+}
 
 multiclass avx512_ctlz<bits<8> opc, string OpcodeStr, Predicate prd>{
 
@@ -7503,16 +8666,22 @@ multiclass avx512_movddup<bits<8> opc, string OpcodeStr, SDNode OpNode>{
 
 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
 
+let Predicates = [HasVLX] in {
 def : Pat<(X86Movddup (loadv2f64 addr:$src)),
-          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+          (VMOVDDUPZ128rm addr:$src)>;
 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
-          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+          (VMOVDDUPZ128rm addr:$src)>;
+def : Pat<(v2f64 (X86VBroadcast f64:$src)),
+          (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+}
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - Unpack Instructions
 //===----------------------------------------------------------------------===//
-defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512>;
-defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512>;
+defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh, HasAVX512,
+                                 SSE_ALU_ITINS_S>;
+defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl, HasAVX512,
+                                 SSE_ALU_ITINS_S>;
 
 defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl,
                                        SSE_INTALU_ITINS_P, HasBWI>;
@@ -7730,22 +8899,22 @@ defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw",
                                        HasBWI>, EVEX_4V;
 
 multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _>{
-  let Constraints = "$src1 = $dst" in {
+                          X86VectorVTInfo _>{
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, u8imm:$src4),
                       OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
                       (OpNode (_.VT _.RC:$src1),
                               (_.VT _.RC:$src2),
                               (_.VT _.RC:$src3),
-                              (i8 imm:$src4))>, AVX512AIi8Base, EVEX_4V;
+                              (i8 imm:$src4)), 1, 1>, AVX512AIi8Base, EVEX_4V;
   defm rmi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src2, _.MemOp:$src3, u8imm:$src4),
                     OpcodeStr, "$src4, $src3, $src2", "$src2, $src3, $src4",
                     (OpNode (_.VT _.RC:$src1),
                             (_.VT _.RC:$src2),
                             (_.VT (bitconvert (_.LdFrag addr:$src3))),
-                            (i8 imm:$src4))>,
+                            (i8 imm:$src4)), 1, 0>,
                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
   defm rmbi : AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),
                     (ins _.RC:$src2, _.ScalarMemOp:$src3, u8imm:$src4),
@@ -7754,7 +8923,7 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
                     (OpNode (_.VT _.RC:$src1),
                             (_.VT _.RC:$src2),
                             (_.VT (X86VBroadcast(_.ScalarLdFrag addr:$src3))),
-                            (i8 imm:$src4))>, EVEX_B,
+                            (i8 imm:$src4)), 1, 0>, EVEX_B,
                     AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
   }// Constraints = "$src1 = $dst"
 }
@@ -7776,8 +8945,8 @@ defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W;
 //===----------------------------------------------------------------------===//
 
 multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                                                            X86VectorVTInfo _>{
-  let Constraints = "$src1 = $dst" in {
+                                  X86VectorVTInfo _>{
+  let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
     defm rri : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                         (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                          OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -7807,8 +8976,8 @@ multiclass avx512_fixupimm_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 multiclass avx512_fixupimm_packed_sae<bits<8> opc, string OpcodeStr,
-                                             SDNode OpNode, X86VectorVTInfo _>{
-let Constraints = "$src1 = $dst" in {
+                                      SDNode OpNode, X86VectorVTInfo _>{
+let Constraints = "$src1 = $dst", ExeDomain = _.ExeDomain in {
   defm rrib : AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, {sae}, $src3, $src2",
@@ -7823,7 +8992,8 @@ let Constraints = "$src1 = $dst" in {
 
 multiclass avx512_fixupimm_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                   X86VectorVTInfo _, X86VectorVTInfo _src3VT> {
-  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512] in {
+  let Constraints = "$src1 = $dst" , Predicates = [HasAVX512],
+      ExeDomain = _.ExeDomain in {
     defm rri : AVX512_maskable_3src_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
                       (ins _.RC:$src2, _.RC:$src3, i32u8imm:$src4),
                       OpcodeStr##_.Suffix, "$src4, $src3, $src2", "$src2, $src3, $src4",
@@ -7877,3 +9047,135 @@ defm VFIXUPIMMPS : avx512_fixupimm_packed_all<avx512vl_f32_info>,
                          EVEX_CD8<32, CD8VF>;
 defm VFIXUPIMMPD : avx512_fixupimm_packed_all<avx512vl_f64_info>,
                          EVEX_CD8<64, CD8VF>, VEX_W;
+
+
+
+// Patterns used to select SSE scalar fp arithmetic instructions from
+// either:
+//
+// (1) a scalar fp operation followed by a blend
+//
+// The effect is that the backend no longer emits unnecessary vector
+// insert instructions immediately after SSE scalar fp instructions
+// like addss or mulss.
+//
+// For example, given the following code:
+//   __m128 foo(__m128 A, __m128 B) {
+//     A[0] += B[0];
+//     return A;
+//   }
+//
+// Previously we generated:
+//   addss %xmm0, %xmm1
+//   movss %xmm1, %xmm0
+//
+// We now generate:
+//   addss %xmm1, %xmm0
+//
+// (2) a vector packed single/double fp operation followed by a vector insert
+//
+// The effect is that the backend converts the packed fp instruction
+// followed by a vector insert into a single SSE scalar fp instruction.
+//
+// For example, given the following code:
+//   __m128 foo(__m128 A, __m128 B) {
+//     __m128 C = A + B;
+//     return (__m128) {c[0], a[1], a[2], a[3]};
+//   }
+//
+// Previously we generated:
+//   addps %xmm0, %xmm1
+//   movss %xmm1, %xmm0
+//
+// We now generate:
+//   addss %xmm1, %xmm0
+
+// TODO: Some canonicalization in lowering would simplify the number of
+// patterns we have to try to match.
+multiclass AVX512_scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
+  let Predicates = [HasAVX512] in {
+    // extracted scalar math op with insert via movss
+    def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
+          (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
+          FR32X:$src))))),
+      (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
+          (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+
+    // extracted scalar math op with insert via blend
+    def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst), (v4f32 (scalar_to_vector
+          (Op (f32 (extractelt (v4f32 VR128X:$dst), (iPTR 0))),
+          FR32X:$src))), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst,
+          (COPY_TO_REGCLASS FR32X:$src, VR128X))>;
+
+    // vector math op with insert via movss
+    def : Pat<(v4f32 (X86Movss (v4f32 VR128X:$dst),
+          (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)))),
+      (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
+
+    // vector math op with insert via blend
+    def : Pat<(v4f32 (X86Blendi (v4f32 VR128X:$dst),
+          (Op (v4f32 VR128X:$dst), (v4f32 VR128X:$src)), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SSZrr_Int) v4f32:$dst, v4f32:$src)>;
+
+    // extracted masked scalar math op with insert via movss
+    def : Pat<(X86Movss (v4f32 VR128X:$src1),
+               (scalar_to_vector
+                (X86selects VK1WM:$mask,
+                            (Op (f32 (extractelt (v4f32 VR128X:$src1), (iPTR 0))),
+                                FR32X:$src2),
+                            FR32X:$src0))),
+      (!cast<I>("V"#OpcPrefix#SSZrr_Intk) (COPY_TO_REGCLASS FR32X:$src0, VR128X),
+          VK1WM:$mask, v4f32:$src1,
+          (COPY_TO_REGCLASS FR32X:$src2, VR128X))>;
+  }
+}
+
+defm : AVX512_scalar_math_f32_patterns<fadd, "ADD">;
+defm : AVX512_scalar_math_f32_patterns<fsub, "SUB">;
+defm : AVX512_scalar_math_f32_patterns<fmul, "MUL">;
+defm : AVX512_scalar_math_f32_patterns<fdiv, "DIV">;
+
+multiclass AVX512_scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
+  let Predicates = [HasAVX512] in {
+    // extracted scalar math op with insert via movsd
+    def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
+          (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
+          FR64X:$src))))),
+      (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
+          (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+
+    // extracted scalar math op with insert via blend
+    def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst), (v2f64 (scalar_to_vector
+          (Op (f64 (extractelt (v2f64 VR128X:$dst), (iPTR 0))),
+          FR64X:$src))), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst,
+          (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+
+    // vector math op with insert via movsd
+    def : Pat<(v2f64 (X86Movsd (v2f64 VR128X:$dst),
+          (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)))),
+      (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
+
+    // vector math op with insert via blend
+    def : Pat<(v2f64 (X86Blendi (v2f64 VR128X:$dst),
+          (Op (v2f64 VR128X:$dst), (v2f64 VR128X:$src)), (i8 1))),
+      (!cast<I>("V"#OpcPrefix#SDZrr_Int) v2f64:$dst, v2f64:$src)>;
+
+    // extracted masked scalar math op with insert via movss
+    def : Pat<(X86Movsd (v2f64 VR128X:$src1),
+               (scalar_to_vector
+                (X86selects VK1WM:$mask,
+                            (Op (f64 (extractelt (v2f64 VR128X:$src1), (iPTR 0))),
+                                FR64X:$src2),
+                            FR64X:$src0))),
+      (!cast<I>("V"#OpcPrefix#SDZrr_Intk) (COPY_TO_REGCLASS FR64X:$src0, VR128X),
+          VK1WM:$mask, v2f64:$src1,
+          (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
+  }
+}
+
+defm : AVX512_scalar_math_f64_patterns<fadd, "ADD">;
+defm : AVX512_scalar_math_f64_patterns<fsub, "SUB">;
+defm : AVX512_scalar_math_f64_patterns<fmul, "MUL">;
+defm : AVX512_scalar_math_f64_patterns<fdiv, "DIV">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
index 1a2e786661e9..bfd21c062aa2 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -625,7 +625,7 @@ def Xi32 : X86TypeInfo<i32, "l", GR32, loadi32, i32mem,
                        Imm32, i32imm, imm32_su, i32i8imm, i32immSExt8_su,
                        1, OpSize32, 0>;
 def Xi64 : X86TypeInfo<i64, "q", GR64, loadi64, i64mem,
-                       Imm32S, i64i32imm, i64immSExt32, i64i8imm, i64immSExt8,
+                       Imm32S, i64i32imm, i64immSExt32_su, i64i8imm, i64immSExt8_su,
                        1, OpSizeFixed, 1>;
 
 /// ITy - This instruction base class takes the type info for the instruction.
diff --git a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
index bcea6fa80350..ba970bc2048e 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrBuilder.h
@@ -24,9 +24,15 @@
 #ifndef LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
 #define LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
 
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
 #include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include <cassert>
 
 namespace llvm {
 
@@ -57,12 +63,11 @@ struct X86AddressMode {
     Base.Reg = 0;
   }
 
-
   void getFullAddress(SmallVectorImpl<MachineOperand> &MO) {
     assert(Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
 
     if (BaseType == X86AddressMode::RegBase)
-      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false,
+      MO.push_back(MachineOperand::CreateReg(Base.Reg, false, false, false,
                                              false, false, false, 0, false));
     else {
       assert(BaseType == X86AddressMode::FrameIndexBase);
@@ -70,44 +75,45 @@ struct X86AddressMode {
     }
 
     MO.push_back(MachineOperand::CreateImm(Scale));
-    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false,
-                                           false, false, false, 0, false));
+    MO.push_back(MachineOperand::CreateReg(IndexReg, false, false, false, false,
+                                           false, false, 0, false));
 
     if (GV)
       MO.push_back(MachineOperand::CreateGA(GV, Disp, GVOpFlags));
     else
       MO.push_back(MachineOperand::CreateImm(Disp));
 
-    MO.push_back(MachineOperand::CreateReg(0, false, false,
-                                           false, false, false, 0, false));
+    MO.push_back(MachineOperand::CreateReg(0, false, false, false, false, false,
+                                           false, 0, false));
   }
 };
 
 /// Compute the addressing mode from an machine instruction starting with the
 /// given operand.
-static inline X86AddressMode getAddressFromInstr(MachineInstr *MI,
+static inline X86AddressMode getAddressFromInstr(const MachineInstr *MI,
                                                  unsigned Operand) {
   X86AddressMode AM;
-  MachineOperand &Op = MI->getOperand(Operand);
-  if (Op.isReg()) {
+  const MachineOperand &Op0 = MI->getOperand(Operand);
+  if (Op0.isReg()) {
     AM.BaseType = X86AddressMode::RegBase;
-    AM.Base.Reg = Op.getReg();
+    AM.Base.Reg = Op0.getReg();
   } else {
     AM.BaseType = X86AddressMode::FrameIndexBase;
-    AM.Base.FrameIndex = Op.getIndex();
-  }
-  Op = MI->getOperand(Operand + 1);
-  if (Op.isImm())
-    AM.Scale = Op.getImm();
-  Op = MI->getOperand(Operand + 2);
-  if (Op.isImm())
-    AM.IndexReg = Op.getImm();
-  Op = MI->getOperand(Operand + 3);
-  if (Op.isGlobal()) {
-    AM.GV = Op.getGlobal();
-  } else {
-    AM.Disp = Op.getImm();
+    AM.Base.FrameIndex = Op0.getIndex();
   }
+
+  const MachineOperand &Op1 = MI->getOperand(Operand + 1);
+  AM.Scale = Op1.getImm();
+
+  const MachineOperand &Op2 = MI->getOperand(Operand + 2);
+  AM.IndexReg = Op2.getReg();
+
+  const MachineOperand &Op3 = MI->getOperand(Operand + 3);
+  if (Op3.isGlobal())
+    AM.GV = Op3.getGlobal();
+  else
+    AM.Disp = Op3.getImm();
+
   return AM;
 }
 
@@ -122,12 +128,28 @@ addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) {
   return MIB.addReg(Reg).addImm(1).addReg(0).addImm(0).addReg(0);
 }
 
+/// Replace the address used in the instruction with the direct memory
+/// reference.
+static inline void setDirectAddressInInstr(MachineInstr *MI, unsigned Operand,
+                                           unsigned Reg) {
+  // Direct memory address is in a form of: Reg, 1 (Scale), NoReg, 0, NoReg.
+  MI->getOperand(Operand).setReg(Reg);
+  MI->getOperand(Operand + 1).setImm(1);
+  MI->getOperand(Operand + 2).setReg(0);
+  MI->getOperand(Operand + 3).setImm(0);
+  MI->getOperand(Operand + 4).setReg(0);
+}
 
 static inline const MachineInstrBuilder &
 addOffset(const MachineInstrBuilder &MIB, int Offset) {
   return MIB.addImm(1).addReg(0).addImm(Offset).addReg(0);
 }
 
+static inline const MachineInstrBuilder &
+addOffset(const MachineInstrBuilder &MIB, const MachineOperand& Offset) {
+  return MIB.addImm(1).addReg(0).addOperand(Offset).addReg(0);
+}
+
 /// addRegOffset - This function is used to add a memory reference of the form
 /// [Reg + Offset], i.e., one with no scale or index, but with a
 /// displacement. An example is: DWORD PTR [EAX + 4].
@@ -177,7 +199,7 @@ static inline const MachineInstrBuilder &
 addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) {
   MachineInstr *MI = MIB;
   MachineFunction &MF = *MI->getParent()->getParent();
-  MachineFrameInfo &MFI = *MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const MCInstrDesc &MCID = MI->getDesc();
   auto Flags = MachineMemOperand::MONone;
   if (MCID.mayLoad())
@@ -206,6 +228,6 @@ addConstantPoolReference(const MachineInstrBuilder &MIB, unsigned CPI,
     .addConstantPoolIndex(CPI, 0, OpFlags).addReg(0);
 }
 
-} // End llvm namespace
+} // end namespace llvm
 
-#endif
+#endif // LLVM_LIB_TARGET_X86_X86INSTRBUILDER_H
diff --git a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
index 925f4efb5aa9..3c27eb8077d0 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -723,7 +723,7 @@ defm LOCK_DEC    : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;
 multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
                          SDPatternOperator frag, X86MemOperand x86memop,
                          InstrItinClass itin> {
-let isCodeGenOnly = 1 in {
+let isCodeGenOnly = 1, usesCustomInserter = 1 in {
   def NAME : I<Opc, Form, (outs), (ins x86memop:$ptr),
                !strconcat(mnemonic, "\t$ptr"),
                [(frag addr:$ptr)], itin>, TB, LOCK;
@@ -1025,53 +1025,6 @@ def : Pat<(store (i32 -1), addr:$dst), (OR32mi8 addr:$dst, -1)>;
 def : Pat<(store (i64 -1), addr:$dst), (OR64mi8 addr:$dst, -1)>;
 }
 
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable
-def : Pat<(i32 (X86Wrapper tconstpool  :$dst)), (MOV32ri tconstpool  :$dst)>;
-def : Pat<(i32 (X86Wrapper tjumptable  :$dst)), (MOV32ri tjumptable  :$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaltlsaddr:$dst)),(MOV32ri tglobaltlsaddr:$dst)>;
-def : Pat<(i32 (X86Wrapper tglobaladdr :$dst)), (MOV32ri tglobaladdr :$dst)>;
-def : Pat<(i32 (X86Wrapper texternalsym:$dst)), (MOV32ri texternalsym:$dst)>;
-def : Pat<(i32 (X86Wrapper mcsym:$dst)), (MOV32ri mcsym:$dst)>;
-def : Pat<(i32 (X86Wrapper tblockaddress:$dst)), (MOV32ri tblockaddress:$dst)>;
-
-def : Pat<(add GR32:$src1, (X86Wrapper tconstpool:$src2)),
-          (ADD32ri GR32:$src1, tconstpool:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tjumptable:$src2)),
-          (ADD32ri GR32:$src1, tjumptable:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tglobaladdr :$src2)),
-          (ADD32ri GR32:$src1, tglobaladdr:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper texternalsym:$src2)),
-          (ADD32ri GR32:$src1, texternalsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper mcsym:$src2)),
-          (ADD32ri GR32:$src1, mcsym:$src2)>;
-def : Pat<(add GR32:$src1, (X86Wrapper tblockaddress:$src2)),
-          (ADD32ri GR32:$src1, tblockaddress:$src2)>;
-
-def : Pat<(store (i32 (X86Wrapper tglobaladdr:$src)), addr:$dst),
-          (MOV32mi addr:$dst, tglobaladdr:$src)>;
-def : Pat<(store (i32 (X86Wrapper texternalsym:$src)), addr:$dst),
-          (MOV32mi addr:$dst, texternalsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper mcsym:$src)), addr:$dst),
-          (MOV32mi addr:$dst, mcsym:$src)>;
-def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
-          (MOV32mi addr:$dst, tblockaddress:$src)>;
-
-// ConstantPool GlobalAddress, ExternalSymbol, and JumpTable when not in small
-// code model mode, should use 'movabs'.  FIXME: This is really a hack, the
-//  'movabs' predicate should handle this sort of thing.
-def : Pat<(i64 (X86Wrapper tconstpool  :$dst)),
-          (MOV64ri tconstpool  :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tjumptable  :$dst)),
-          (MOV64ri tjumptable  :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tglobaladdr :$dst)),
-          (MOV64ri tglobaladdr :$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper texternalsym:$dst)),
-          (MOV64ri texternalsym:$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper mcsym:$dst)),
-          (MOV64ri mcsym:$dst)>, Requires<[FarData]>;
-def : Pat<(i64 (X86Wrapper tblockaddress:$dst)),
-          (MOV64ri tblockaddress:$dst)>, Requires<[FarData]>;
-
 // In kernel code model, we can get the address of a label
 // into a register with 'movq'.  FIXME: This is a hack, the 'imm' predicate of
 // the MOV64ri32 should accept these.
@@ -1289,15 +1242,13 @@ def : Pat<(i64 (anyext GR32:$src)),
 
 // Any instruction that defines a 32-bit result leaves the high half of the
 // register. Truncate can be lowered to EXTRACT_SUBREG. CopyFromReg may
-// be copying from a truncate. And x86's cmov doesn't do anything if the
-// condition is false. But any other 32-bit operation will zero-extend
+// be copying from a truncate. Any other 32-bit operation will zero-extend
 // up to 64 bits.
 def def32 : PatLeaf<(i32 GR32:$src), [{
   return N->getOpcode() != ISD::TRUNCATE &&
          N->getOpcode() != TargetOpcode::EXTRACT_SUBREG &&
          N->getOpcode() != ISD::CopyFromReg &&
-         N->getOpcode() != ISD::AssertSext &&
-         N->getOpcode() != X86ISD::CMOV;
+         N->getOpcode() != ISD::AssertSext;
 }]>;
 
 // In the case of a 32-bit def that is known to implicitly zero-extend,
@@ -1711,6 +1662,22 @@ defm : MaskedShiftAmountPats<sra, "SAR">;
 defm : MaskedShiftAmountPats<rotl, "ROL">;
 defm : MaskedShiftAmountPats<rotr, "ROR">;
 
+// Double shift amount is implicitly masked.
+multiclass MaskedDoubleShiftAmountPats<SDNode frag, string name> {
+  // (shift x (and y, 31)) ==> (shift x, y)
+  def : Pat<(frag GR16:$src1, GR16:$src2, (and CL, immShift32)),
+            (!cast<Instruction>(name # "16rrCL") GR16:$src1, GR16:$src2)>;
+  def : Pat<(frag GR32:$src1, GR32:$src2, (and CL, immShift32)),
+            (!cast<Instruction>(name # "32rrCL") GR32:$src1, GR32:$src2)>;
+
+  // (shift x (and y, 63)) ==> (shift x, y)
+  def : Pat<(frag GR64:$src1, GR64:$src2, (and CL, immShift64)),
+            (!cast<Instruction>(name # "64rrCL") GR64:$src1, GR64:$src2)>;
+}
+
+defm : MaskedDoubleShiftAmountPats<X86shld, "SHLD">;
+defm : MaskedDoubleShiftAmountPats<X86shrd, "SHRD">;
+
 // (anyext (setcc_carry)) -> (setcc_carry)
 def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C16r)>;
@@ -1719,9 +1686,6 @@ def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
 def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
           (SETB_C32r)>;
 
-
-
-
 //===----------------------------------------------------------------------===//
 // EFLAGS-defining Patterns
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Target/X86/X86InstrControl.td b/contrib/llvm/lib/Target/X86/X86InstrControl.td
index bb5f9117f032..4ea223e82be9 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrControl.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrControl.td
@@ -239,7 +239,6 @@ let isCall = 1 in
 
 
 // Tail call stuff.
-
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
   let Uses = [ESP] in {
@@ -257,6 +256,7 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                            (ins i32imm_pcrel:$dst),
                            "jmp\t$dst",
                            [], IIC_JMP_REL>;
+
   def TAILJMPr : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
                    "", [], IIC_JMP_REG>;  // FIXME: Remove encoding when JIT is dead.
   let mayLoad = 1 in
@@ -264,6 +264,21 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                    "jmp{l}\t{*}$dst", [], IIC_JMP_MEM>;
 }
 
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [ESP, EFLAGS] in {
+  def TCRETURNdicc : PseudoI<(outs),
+                     (ins i32imm_pcrel:$dst, i32imm:$offset, i32imm:$cond), []>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i32imm_pcrel:$dst, i32imm:$cond),
+                           "",
+                           [], IIC_JMP_REL>;
+}
+
 
 //===----------------------------------------------------------------------===//
 //  Call Instructions...
@@ -296,17 +311,18 @@ let isCall = 1, Uses = [RSP], SchedRW = [WriteJump] in {
 let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
     isCodeGenOnly = 1, Uses = [RSP], usesCustomInserter = 1,
     SchedRW = [WriteJump] in {
-  def TCRETURNdi64 : PseudoI<(outs),
-                      (ins i64i32imm_pcrel:$dst, i32imm:$offset),
-                      []>;
-  def TCRETURNri64 : PseudoI<(outs),
-                      (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
+  def TCRETURNdi64   : PseudoI<(outs),
+                        (ins i64i32imm_pcrel:$dst, i32imm:$offset),
+                        []>;
+  def TCRETURNri64   : PseudoI<(outs),
+                        (ins ptr_rc_tailcall:$dst, i32imm:$offset), []>;
   let mayLoad = 1 in
-  def TCRETURNmi64 : PseudoI<(outs),
-                       (ins i64mem_TC:$dst, i32imm:$offset), []>;
+  def TCRETURNmi64   : PseudoI<(outs),
+                        (ins i64mem_TC:$dst, i32imm:$offset), []>;
 
   def TAILJMPd64 : Ii32PCRel<0xE9, RawFrm, (outs), (ins i64i32imm_pcrel:$dst),
                    "jmp\t$dst", [], IIC_JMP_REL>;
+
   def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
                      "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
 
@@ -314,11 +330,8 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
   def TAILJMPm64 : I<0xFF, MRM4m, (outs), (ins i64mem_TC:$dst),
                      "jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
 
-  // Win64 wants jumps leaving the function to have a REX_W prefix.
+  // Win64 wants indirect jumps leaving the function to have a REX_W prefix.
   let hasREX_WPrefix = 1 in {
-    def TAILJMPd64_REX : Ii32PCRel<0xE9, RawFrm, (outs),
-                                   (ins i64i32imm_pcrel:$dst),
-                                   "rex64 jmp\t$dst", [], IIC_JMP_REL>;
     def TAILJMPr64_REX : I<0xFF, MRM4r, (outs), (ins ptr_rc_tailcall:$dst),
                            "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
 
@@ -327,3 +340,19 @@ let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1,
                            "rex64 jmp{q}\t{*}$dst", [], IIC_JMP_MEM>;
   }
 }
+
+// Conditional tail calls are similar to the above, but they are branches
+// rather than barriers, and they use EFLAGS.
+let isCall = 1, isTerminator = 1, isReturn = 1, isBranch = 1,
+    isCodeGenOnly = 1, SchedRW = [WriteJumpLd] in
+  let Uses = [RSP, EFLAGS] in {
+  def TCRETURNdi64cc : PseudoI<(outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$offset,
+                            i32imm:$cond), []>;
+
+  // This gets substituted to a conditional jump instruction in MC lowering.
+  def TAILJMPd64_CC : Ii32PCRel<0x80, RawFrm, (outs),
+                           (ins i64i32imm_pcrel:$dst, i32imm:$cond),
+                           "",
+                           [], IIC_JMP_REL>;
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA.td b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
index fd800cf077f7..4b19f801dae1 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA.td
@@ -39,7 +39,6 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                     PatFrag MemFrag128, PatFrag MemFrag256,
                     ValueType OpVT128, ValueType OpVT256,
                     SDPatternOperator Op = null_frag> {
-  let usesCustomInserter = 1 in
   def r     : FMA3<opc, MRMSrcReg, (outs VR128:$dst),
                    (ins VR128:$src1, VR128:$src2, VR128:$src3),
                    !strconcat(OpcodeStr,
@@ -55,8 +54,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                    [(set VR128:$dst, (OpVT128 (Op VR128:$src2, VR128:$src1,
                                                (MemFrag128 addr:$src3))))]>;
 
-  let usesCustomInserter = 1 in
-  def rY    : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
+  def Yr    : FMA3<opc, MRMSrcReg, (outs VR256:$dst),
                    (ins VR256:$src1, VR256:$src2, VR256:$src3),
                    !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -64,7 +62,7 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
                                                VR256:$src3)))]>, VEX_L;
 
   let mayLoad = 1 in
-  def mY    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
+  def Ym    : FMA3<opc, MRMSrcMem, (outs VR256:$dst),
                    (ins VR256:$src1, VR256:$src2, f256mem:$src3),
                    !strconcat(OpcodeStr,
                               "\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
@@ -74,60 +72,61 @@ multiclass fma3p_rm<bits<8> opc, string OpcodeStr,
 }
 
 multiclass fma3p_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
-                       string OpcodeStr, string PackTy,
+                       string OpcodeStr, string PackTy, string Suff,
                        PatFrag MemFrag128, PatFrag MemFrag256,
                        SDNode Op, ValueType OpTy128, ValueType OpTy256> {
-  defm r213 : fma3p_rm<opc213,
-                       !strconcat(OpcodeStr, "213", PackTy),
-                       MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
-  defm r132 : fma3p_rm<opc132,
-                       !strconcat(OpcodeStr, "132", PackTy),
-                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
-  defm r231 : fma3p_rm<opc231,
-                       !strconcat(OpcodeStr, "231", PackTy),
-                       MemFrag128, MemFrag256, OpTy128, OpTy256>;
+  defm NAME#213#Suff : fma3p_rm<opc213,
+                                !strconcat(OpcodeStr, "213", PackTy),
+                                MemFrag128, MemFrag256, OpTy128, OpTy256, Op>;
+  defm NAME#132#Suff : fma3p_rm<opc132,
+                                !strconcat(OpcodeStr, "132", PackTy),
+                                MemFrag128, MemFrag256, OpTy128, OpTy256>;
+  defm NAME#231#Suff : fma3p_rm<opc231,
+                                !strconcat(OpcodeStr, "231", PackTy),
+                                MemFrag128, MemFrag256, OpTy128, OpTy256>;
 }
 
 // Fused Multiply-Add
 let ExeDomain = SSEPackedSingle in {
-  defm VFMADDPS    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", loadv4f32,
-                                 loadv8f32, X86Fmadd, v4f32, v8f32>;
-  defm VFMSUBPS    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", loadv4f32,
-                                 loadv8f32, X86Fmsub, v4f32, v8f32>;
-  defm VFMADDSUBPS : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps",
-                                 loadv4f32, loadv8f32, X86Fmaddsub,
-                                 v4f32, v8f32>;
-  defm VFMSUBADDPS : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps",
-                                 loadv4f32, loadv8f32, X86Fmsubadd,
-                                 v4f32, v8f32>;
+  defm VFMADD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmadd, v4f32, v8f32>;
+  defm VFMSUB    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmsub, v4f32, v8f32>;
+  defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmaddsub,
+                               v4f32, v8f32>;
+  defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "ps", "PS",
+                               loadv4f32, loadv8f32, X86Fmsubadd,
+                               v4f32, v8f32>;
 }
 
 let ExeDomain = SSEPackedDouble in {
-  defm VFMADDPD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", loadv2f64,
-                                 loadv4f64, X86Fmadd, v2f64, v4f64>, VEX_W;
-  defm VFMSUBPD    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", loadv2f64,
-                                 loadv4f64, X86Fmsub, v2f64, v4f64>, VEX_W;
-  defm VFMADDSUBPD : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd",
-                                 loadv2f64, loadv4f64, X86Fmaddsub,
-                                 v2f64, v4f64>, VEX_W;
-  defm VFMSUBADDPD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd",
-                                 loadv2f64, loadv4f64, X86Fmsubadd,
-                                 v2f64, v4f64>, VEX_W;
+  defm VFMADD    : fma3p_forms<0x98, 0xA8, 0xB8, "vfmadd", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmadd, v2f64,
+                               v4f64>, VEX_W;
+  defm VFMSUB    : fma3p_forms<0x9A, 0xAA, 0xBA, "vfmsub", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmsub, v2f64,
+                               v4f64>, VEX_W;
+  defm VFMADDSUB : fma3p_forms<0x96, 0xA6, 0xB6, "vfmaddsub", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmaddsub,
+                               v2f64, v4f64>, VEX_W;
+  defm VFMSUBADD : fma3p_forms<0x97, 0xA7, 0xB7, "vfmsubadd", "pd", "PD",
+                               loadv2f64, loadv4f64, X86Fmsubadd,
+                               v2f64, v4f64>, VEX_W;
 }
 
 // Fused Negative Multiply-Add
 let ExeDomain = SSEPackedSingle in {
-  defm VFNMADDPS : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps",  loadv4f32,
-                               loadv8f32, X86Fnmadd, v4f32, v8f32>;
-  defm VFNMSUBPS : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps",  loadv4f32,
-                               loadv8f32, X86Fnmsub, v4f32, v8f32>;
+  defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "ps", "PS", loadv4f32,
+                             loadv8f32, X86Fnmadd, v4f32, v8f32>;
+  defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "ps", "PS", loadv4f32,
+                             loadv8f32, X86Fnmsub, v4f32, v8f32>;
 }
 let ExeDomain = SSEPackedDouble in {
-  defm VFNMADDPD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", loadv2f64,
-                               loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
-  defm VFNMSUBPD : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd",
-                               loadv2f64, loadv4f64, X86Fnmsub, v2f64,
-                               v4f64>, VEX_W;
+  defm VFNMADD : fma3p_forms<0x9C, 0xAC, 0xBC, "vfnmadd", "pd", "PD", loadv2f64,
+                             loadv4f64, X86Fnmadd, v2f64, v4f64>, VEX_W;
+  defm VFNMSUB : fma3p_forms<0x9E, 0xAE, 0xBE, "vfnmsub", "pd", "PD", loadv2f64,
+                             loadv4f64, X86Fnmsub, v2f64, v4f64>, VEX_W;
 }
 
 // All source register operands of FMA opcodes defined in fma3s_rm multiclass
@@ -143,7 +142,6 @@ let Constraints = "$src1 = $dst", isCommutable = 1, hasSideEffects = 0 in
 multiclass fma3s_rm<bits<8> opc, string OpcodeStr,
                     X86MemOperand x86memop, RegisterClass RC,
                     SDPatternOperator OpNode = null_frag> {
-  let usesCustomInserter = 1 in
   def r     : FMA3<opc, MRMSrcReg, (outs RC:$dst),
                    (ins RC:$src1, RC:$src2, RC:$src3),
                    !strconcat(OpcodeStr,
@@ -191,13 +189,15 @@ multiclass fma3s_rm_int<bits<8> opc, string OpcodeStr,
 }
 
 multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
-                       string OpStr, string PackTy,
+                       string OpStr, string PackTy, string Suff,
                        SDNode OpNode, RegisterClass RC,
                        X86MemOperand x86memop> {
-  defm r132 : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy), x86memop, RC>;
-  defm r213 : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy), x86memop, RC,
-                       OpNode>;
-  defm r231 : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy), x86memop, RC>;
+  defm NAME#132#Suff : fma3s_rm<opc132, !strconcat(OpStr, "132", PackTy),
+                                x86memop, RC>;
+  defm NAME#213#Suff : fma3s_rm<opc213, !strconcat(OpStr, "213", PackTy),
+                                x86memop, RC, OpNode>;
+  defm NAME#231#Suff : fma3s_rm<opc231, !strconcat(OpStr, "231", PackTy),
+                                x86memop, RC>;
 }
 
 // The FMA 213 form is created for lowering of scalar FMA intrinscis
@@ -210,42 +210,45 @@ multiclass fma3s_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
 // form of FMA*_Int instructions is done using an optimistic assumption that
 // such analysis will be implemented eventually.
 multiclass fma3s_int_forms<bits<8> opc132, bits<8> opc213, bits<8> opc231,
-                           string OpStr, string PackTy,
+                           string OpStr, string PackTy, string Suff,
                            RegisterClass RC, Operand memop> {
-  defm r132 : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
-                           memop, RC>;
-  defm r213 : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
-                           memop, RC>;
-  defm r231 : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
-                           memop, RC>;
+  defm NAME#132#Suff : fma3s_rm_int<opc132, !strconcat(OpStr, "132", PackTy),
+                                    memop, RC>;
+  defm NAME#213#Suff : fma3s_rm_int<opc213, !strconcat(OpStr, "213", PackTy),
+                                    memop, RC>;
+  defm NAME#231#Suff : fma3s_rm_int<opc231, !strconcat(OpStr, "231", PackTy),
+                                    memop, RC>;
 }
 
 multiclass fma3s<bits<8> opc132, bits<8> opc213, bits<8> opc231,
                  string OpStr, Intrinsic IntF32, Intrinsic IntF64,
                  SDNode OpNode> {
   let ExeDomain = SSEPackedSingle in
-  defm SS : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", OpNode,
-                        FR32, f32mem>,
-            fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", VR128, ssmem>;
+  defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "ss", "SS", OpNode,
+                          FR32, f32mem>,
+              fma3s_int_forms<opc132, opc213, opc231, OpStr, "ss", "SS",
+                              VR128, ssmem>;
 
   let ExeDomain = SSEPackedDouble in
-  defm SD : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", OpNode,
+  defm NAME : fma3s_forms<opc132, opc213, opc231, OpStr, "sd", "SD", OpNode,
                         FR64, f64mem>,
-            fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", VR128, sdmem>,
-            VEX_W;
+              fma3s_int_forms<opc132, opc213, opc231, OpStr, "sd", "SD",
+                              VR128, sdmem>, VEX_W;
 
   // These patterns use the 123 ordering, instead of 213, even though
   // they match the intrinsic to the 213 version of the instruction.
   // This is because src1 is tied to dest, and the scalar intrinsics
   // require the pass-through values to come from the first source
   // operand, not the second.
-  def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
-            (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SSr213r_Int") 
-             $src1, $src2, $src3), VR128)>;
+  let Predicates = [HasFMA] in {
+    def : Pat<(IntF32 VR128:$src1, VR128:$src2, VR128:$src3),
+              (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"213SSr_Int")
+               $src1, $src2, $src3), VR128)>;
 
-  def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
-            (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"SDr213r_Int") 
-             $src1, $src2, $src3), VR128)>;
+    def : Pat<(IntF64 VR128:$src1, VR128:$src2, VR128:$src3),
+              (COPY_TO_REGCLASS(!cast<Instruction>(NAME#"213SDr_Int")
+               $src1, $src2, $src3), VR128)>;
+  }
 }
 
 defm VFMADD : fma3s<0x99, 0xA9, 0xB9, "vfmadd", int_x86_fma_vfmadd_ss,
@@ -268,18 +271,18 @@ multiclass fma4s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                  X86MemOperand x86memop, ValueType OpVT, SDNode OpNode,
                  PatFrag mem_frag> {
   let isCommutable = 1 in
-  def rr : FMA4<opc, MRMSrcReg, (outs RC:$dst),
+  def rr : FMA4<opc, MRMSrcRegOp4, (outs RC:$dst),
            (ins RC:$src1, RC:$src2, RC:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set RC:$dst,
-             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
-  def rm : FMA4<opc, MRMSrcMem, (outs RC:$dst),
+             (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, VEX_W, VEX_LIG;
+  def rm : FMA4<opc, MRMSrcMemOp4, (outs RC:$dst),
            (ins RC:$src1, RC:$src2, x86memop:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set RC:$dst, (OpNode RC:$src1, RC:$src2,
-                           (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG, MemOp4;
+                           (mem_frag addr:$src3)))]>, VEX_W, VEX_LIG;
   def mr : FMA4<opc, MRMSrcMem, (outs RC:$dst),
            (ins RC:$src1, x86memop:$src2, RC:$src3),
            !strconcat(OpcodeStr,
@@ -298,19 +301,18 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
 multiclass fma4s_int<bits<8> opc, string OpcodeStr, Operand memop,
                      ComplexPattern mem_cpat, Intrinsic Int> {
 let isCodeGenOnly = 1 in {
-  let isCommutable = 1 in
-  def rr_Int : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+  def rr_Int : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                [(set VR128:$dst,
-                 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG, MemOp4;
-  def rm_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+                 (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, VEX_W, VEX_LIG;
+  def rm_Int : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
                (ins VR128:$src1, VR128:$src2, memop:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                [(set VR128:$dst, (Int VR128:$src1, VR128:$src2,
-                                  mem_cpat:$src3))]>, VEX_W, VEX_LIG, MemOp4;
+                                  mem_cpat:$src3))]>, VEX_W, VEX_LIG;
   def mr_Int : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
                (ins VR128:$src1, memop:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
@@ -324,19 +326,19 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
                  ValueType OpVT128, ValueType OpVT256,
                  PatFrag ld_frag128, PatFrag ld_frag256> {
   let isCommutable = 1 in
-  def rr : FMA4<opc, MRMSrcReg, (outs VR128:$dst),
+  def rr : FMA4<opc, MRMSrcRegOp4, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
              (OpVT128 (OpNode VR128:$src1, VR128:$src2, VR128:$src3)))]>,
-           VEX_W, MemOp4;
-  def rm : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
+           VEX_W;
+  def rm : FMA4<opc, MRMSrcMemOp4, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, f128mem:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst, (OpNode VR128:$src1, VR128:$src2,
-                              (ld_frag128 addr:$src3)))]>, VEX_W, MemOp4;
+                              (ld_frag128 addr:$src3)))]>, VEX_W;
   def mr : FMA4<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, f128mem:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
@@ -344,20 +346,20 @@ multiclass fma4p<bits<8> opc, string OpcodeStr, SDNode OpNode,
            [(set VR128:$dst,
              (OpNode VR128:$src1, (ld_frag128 addr:$src2), VR128:$src3))]>;
   let isCommutable = 1 in
-  def rrY : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+  def Yrr : FMA4<opc, MRMSrcRegOp4, (outs VR256:$dst),
            (ins VR256:$src1, VR256:$src2, VR256:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst,
              (OpVT256 (OpNode VR256:$src1, VR256:$src2, VR256:$src3)))]>,
-           VEX_W, MemOp4, VEX_L;
-  def rmY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+           VEX_W, VEX_L;
+  def Yrm : FMA4<opc, MRMSrcMemOp4, (outs VR256:$dst),
            (ins VR256:$src1, VR256:$src2, f256mem:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR256:$dst, (OpNode VR256:$src1, VR256:$src2,
-                              (ld_frag256 addr:$src3)))]>, VEX_W, MemOp4, VEX_L;
-  def mrY : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
+                              (ld_frag256 addr:$src3)))]>, VEX_W, VEX_L;
+  def Ymr : FMA4<opc, MRMSrcMem, (outs VR256:$dst),
            (ins VR256:$src1, f256mem:$src2, VR256:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
@@ -369,7 +371,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
                (ins VR128:$src1, VR128:$src2, VR128:$src3),
                !strconcat(OpcodeStr,
                "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>;
-  def rrY_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
+  def Yrr_REV : FMA4<opc, MRMSrcReg, (outs VR256:$dst),
                 (ins VR256:$src1, VR256:$src2, VR256:$src3),
                 !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA3Info.cpp b/contrib/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
new file mode 100644
index 000000000000..db83497ee69d
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA3Info.cpp
@@ -0,0 +1,285 @@
+//===-- X86InstrFMA3Info.cpp - X86 FMA3 Instruction Information -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the classes providing information
+// about existing X86 FMA3 opcodes, classifying and grouping them.
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86InstrFMA3Info.h"
+#include "X86InstrInfo.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Threading.h"
+using namespace llvm;
+
+/// This flag is used in the method llvm::call_once() used below to make the
+/// initialization of the map 'OpcodeToGroup' thread safe.
+LLVM_DEFINE_ONCE_FLAG(InitGroupsOnceFlag);
+
+static ManagedStatic<X86InstrFMA3Info> X86InstrFMA3InfoObj;
+X86InstrFMA3Info *X86InstrFMA3Info::getX86InstrFMA3Info() {
+  return &*X86InstrFMA3InfoObj;
+}
+
+void X86InstrFMA3Info::initRMGroup(const uint16_t *RegOpcodes,
+                                   const uint16_t *MemOpcodes, unsigned Attr) {
+  // Create a new instance of this class that would hold a group of FMA opcodes.
+  X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, MemOpcodes, Attr);
+
+  // Add the references from indvidual opcodes to the group holding them.
+  assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
+          !OpcodeToGroup[RegOpcodes[2]] && !OpcodeToGroup[MemOpcodes[0]] &&
+          !OpcodeToGroup[MemOpcodes[1]] && !OpcodeToGroup[MemOpcodes[2]]) &&
+         "Duplication or rewrite of elements in OpcodeToGroup.");
+  OpcodeToGroup[RegOpcodes[0]] = G;
+  OpcodeToGroup[RegOpcodes[1]] = G;
+  OpcodeToGroup[RegOpcodes[2]] = G;
+  OpcodeToGroup[MemOpcodes[0]] = G;
+  OpcodeToGroup[MemOpcodes[1]] = G;
+  OpcodeToGroup[MemOpcodes[2]] = G;
+}
+
+void X86InstrFMA3Info::initRGroup(const uint16_t *RegOpcodes, unsigned Attr) {
+  // Create a new instance of this class that would hold a group of FMA opcodes.
+  X86InstrFMA3Group *G = new X86InstrFMA3Group(RegOpcodes, nullptr, Attr);
+
+  // Add the references from indvidual opcodes to the group holding them.
+  assert((!OpcodeToGroup[RegOpcodes[0]] && !OpcodeToGroup[RegOpcodes[1]] &&
+          !OpcodeToGroup[RegOpcodes[2]]) &&
+         "Duplication or rewrite of elements in OpcodeToGroup.");
+  OpcodeToGroup[RegOpcodes[0]] = G;
+  OpcodeToGroup[RegOpcodes[1]] = G;
+  OpcodeToGroup[RegOpcodes[2]] = G;
+}
+
+void X86InstrFMA3Info::initMGroup(const uint16_t *MemOpcodes, unsigned Attr) {
+  // Create a new instance of this class that would hold a group of FMA opcodes.
+  X86InstrFMA3Group *G = new X86InstrFMA3Group(nullptr, MemOpcodes, Attr);
+
+  // Add the references from indvidual opcodes to the group holding them.
+  assert((!OpcodeToGroup[MemOpcodes[0]] && !OpcodeToGroup[MemOpcodes[1]] &&
+          !OpcodeToGroup[MemOpcodes[2]]) &&
+         "Duplication or rewrite of elements in OpcodeToGroup.");
+  OpcodeToGroup[MemOpcodes[0]] = G;
+  OpcodeToGroup[MemOpcodes[1]] = G;
+  OpcodeToGroup[MemOpcodes[2]] = G;
+}
+
+#define FMA3RM(R132, R213, R231, M132, M213, M231)                             \
+  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231};      \
+  static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231};      \
+  initRMGroup(Reg##R132, Mem##R132);
+
+#define FMA3RMA(R132, R213, R231, M132, M213, M231, Attrs)                     \
+  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231};      \
+  static const uint16_t Mem##R132[3] = {X86::M132, X86::M213, X86::M231};      \
+  initRMGroup(Reg##R132, Mem##R132, (Attrs));
+
+#define FMA3R(R132, R213, R231)                                                \
+  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231};      \
+  initRGroup(Reg##R132);
+
+#define FMA3RA(R132, R213, R231, Attrs)                                        \
+  static const uint16_t Reg##R132[3] = {X86::R132, X86::R213, X86::R231};      \
+  initRGroup(Reg##R132, (Attrs));
+
+#define FMA3M(M132, M213, M231)                                                \
+  static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231};      \
+  initMGroup(Mem##M132);
+
+#define FMA3MA(M132, M213, M231, Attrs)                                        \
+  static const uint16_t Mem##M132[3] = {X86::M132, X86::M213, X86::M231};      \
+  initMGroup(Mem##M132, (Attrs));
+
+#define FMA3_AVX2_VECTOR_GROUP(Name)                                           \
+  FMA3RM(Name##132PSr, Name##213PSr, Name##231PSr,                             \
+         Name##132PSm, Name##213PSm, Name##231PSm);                            \
+  FMA3RM(Name##132PDr, Name##213PDr, Name##231PDr,                             \
+         Name##132PDm, Name##213PDm, Name##231PDm);                            \
+  FMA3RM(Name##132PSYr, Name##213PSYr, Name##231PSYr,                          \
+         Name##132PSYm, Name##213PSYm, Name##231PSYm);                         \
+  FMA3RM(Name##132PDYr, Name##213PDYr, Name##231PDYr,                          \
+         Name##132PDYm, Name##213PDYm, Name##231PDYm);
+
+#define FMA3_AVX2_SCALAR_GROUP(Name)                                           \
+  FMA3RM(Name##132SSr, Name##213SSr, Name##231SSr,                             \
+         Name##132SSm, Name##213SSm, Name##231SSm);                            \
+  FMA3RM(Name##132SDr, Name##213SDr, Name##231SDr,                             \
+         Name##132SDm, Name##213SDm, Name##231SDm);                            \
+  FMA3RMA(Name##132SSr_Int, Name##213SSr_Int, Name##231SSr_Int,                \
+          Name##132SSm_Int, Name##213SSm_Int, Name##231SSm_Int,                \
+          X86InstrFMA3Group::X86FMA3Intrinsic);                                \
+  FMA3RMA(Name##132SDr_Int, Name##213SDr_Int, Name##231SDr_Int,                \
+          Name##132SDm_Int, Name##213SDm_Int, Name##231SDm_Int,                \
+          X86InstrFMA3Group::X86FMA3Intrinsic);
+
+#define FMA3_AVX2_FULL_GROUP(Name)                                             \
+  FMA3_AVX2_VECTOR_GROUP(Name);                                                \
+  FMA3_AVX2_SCALAR_GROUP(Name);
+
+#define FMA3_AVX512_VECTOR_GROUP(Name)                                         \
+  FMA3RM(Name##132PSZ128r, Name##213PSZ128r, Name##231PSZ128r,                 \
+         Name##132PSZ128m, Name##213PSZ128m, Name##231PSZ128m);                \
+  FMA3RM(Name##132PDZ128r, Name##213PDZ128r, Name##231PDZ128r,                 \
+         Name##132PDZ128m, Name##213PDZ128m, Name##231PDZ128m);                \
+  FMA3RM(Name##132PSZ256r, Name##213PSZ256r, Name##231PSZ256r,                 \
+         Name##132PSZ256m, Name##213PSZ256m, Name##231PSZ256m);                \
+  FMA3RM(Name##132PDZ256r, Name##213PDZ256r, Name##231PDZ256r,                 \
+         Name##132PDZ256m, Name##213PDZ256m, Name##231PDZ256m);                \
+  FMA3RM(Name##132PSZr,    Name##213PSZr,    Name##231PSZr,                    \
+         Name##132PSZm,    Name##213PSZm,    Name##231PSZm);                   \
+  FMA3RM(Name##132PDZr,    Name##213PDZr,    Name##231PDZr,                    \
+         Name##132PDZm,    Name##213PDZm,    Name##231PDZm);                   \
+  FMA3RMA(Name##132PSZ128rk, Name##213PSZ128rk, Name##231PSZ128rk,             \
+          Name##132PSZ128mk, Name##213PSZ128mk, Name##231PSZ128mk,             \
+          X86InstrFMA3Group::X86FMA3KMergeMasked);                             \
+  FMA3RMA(Name##132PDZ128rk, Name##213PDZ128rk, Name##231PDZ128rk,             \
+          Name##132PDZ128mk, Name##213PDZ128mk, Name##231PDZ128mk,             \
+          X86InstrFMA3Group::X86FMA3KMergeMasked);                             \
+  FMA3RMA(Name##132PSZ256rk, Name##213PSZ256rk, Name##231PSZ256rk,             \
+          Name##132PSZ256mk, Name##213PSZ256mk, Name##231PSZ256mk,             \
+          X86InstrFMA3Group::X86FMA3KMergeMasked);                             \
+  FMA3RMA(Name##132PDZ256rk, Name##213PDZ256rk, Name##231PDZ256rk,             \
+          Name##132PDZ256mk, Name##213PDZ256mk, Name##231PDZ256mk,             \
+          X86InstrFMA3Group::X86FMA3KMergeMasked);                             \
+  FMA3RMA(Name##132PSZrk,    Name##213PSZrk,    Name##231PSZrk,                \
+          Name##132PSZmk,    Name##213PSZmk,    Name##231PSZmk,                \
+          X86InstrFMA3Group::X86FMA3KMergeMasked);                             \
+  FMA3RMA(Name##132PDZrk,    Name##213PDZrk,    Name##231PDZrk,                \
+          Name##132PDZmk,    Name##213PDZmk,    Name##231PDZmk,                \
+          X86InstrFMA3Group::X86FMA3KMergeMasked);                             \
+  FMA3RMA(Name##132PSZ128rkz, Name##213PSZ128rkz, Name##231PSZ128rkz,          \
+          Name##132PSZ128mkz, Name##213PSZ128mkz, Name##231PSZ128mkz,          \
+          X86InstrFMA3Group::X86FMA3KZeroMasked);                              \
+  FMA3RMA(Name##132PDZ128rkz, Name##213PDZ128rkz, Name##231PDZ128rkz,          \
+          Name##132PDZ128mkz, Name##213PDZ128mkz, Name##231PDZ128mkz,          \
+          X86InstrFMA3Group::X86FMA3KZeroMasked);                              \
+  FMA3RMA(Name##132PSZ256rkz, Name##213PSZ256rkz, Name##231PSZ256rkz,          \
+          Name##132PSZ256mkz, Name##213PSZ256mkz, Name##231PSZ256mkz,          \
+          X86InstrFMA3Group::X86FMA3KZeroMasked);                              \
+  FMA3RMA(Name##132PDZ256rkz, Name##213PDZ256rkz, Name##231PDZ256rkz,          \
+          Name##132PDZ256mkz, Name##213PDZ256mkz, Name##231PDZ256mkz,          \
+          X86InstrFMA3Group::X86FMA3KZeroMasked);                              \
+  FMA3RMA(Name##132PSZrkz,    Name##213PSZrkz,    Name##231PSZrkz,             \
+          Name##132PSZmkz,    Name##213PSZmkz,    Name##231PSZmkz,             \
+          X86InstrFMA3Group::X86FMA3KZeroMasked);                              \
+  FMA3RMA(Name##132PDZrkz,    Name##213PDZrkz,    Name##231PDZrkz,             \
+          Name##132PDZmkz,    Name##213PDZmkz,    Name##231PDZmkz,             \
+          X86InstrFMA3Group::X86FMA3KZeroMasked);                              \
+  FMA3R(Name##132PSZrb, Name##213PSZrb, Name##231PSZrb);                       \
+  FMA3R(Name##132PDZrb, Name##213PDZrb, Name##231PDZrb);                       \
+  FMA3RA(Name##132PSZrbk, Name##213PSZrbk, Name##231PSZrbk,                    \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3RA(Name##132PDZrbk, Name##213PDZrbk, Name##231PDZrbk,                    \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3RA(Name##132PSZrbkz, Name##213PSZrbkz, Name##231PSZrbkz,                 \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3RA(Name##132PDZrbkz, Name##213PDZrbkz, Name##231PDZrbkz,                 \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3M(Name##132PSZ128mb, Name##213PSZ128mb, Name##231PSZ128mb);              \
+  FMA3M(Name##132PDZ128mb, Name##213PDZ128mb, Name##231PDZ128mb);              \
+  FMA3M(Name##132PSZ256mb, Name##213PSZ256mb, Name##231PSZ256mb);              \
+  FMA3M(Name##132PDZ256mb, Name##213PDZ256mb, Name##231PDZ256mb);              \
+  FMA3M(Name##132PSZmb, Name##213PSZmb, Name##231PSZmb);                       \
+  FMA3M(Name##132PDZmb, Name##213PDZmb, Name##231PDZmb);                       \
+  FMA3MA(Name##132PSZ128mbk, Name##213PSZ128mbk, Name##231PSZ128mbk,           \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3MA(Name##132PDZ128mbk, Name##213PDZ128mbk, Name##231PDZ128mbk,           \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3MA(Name##132PSZ256mbk, Name##213PSZ256mbk, Name##231PSZ256mbk,           \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3MA(Name##132PDZ256mbk, Name##213PDZ256mbk, Name##231PDZ256mbk,           \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3MA(Name##132PSZmbk,    Name##213PSZmbk,    Name##231PSZmbk,              \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3MA(Name##132PDZmbk,    Name##213PDZmbk,    Name##231PDZmbk,              \
+         X86InstrFMA3Group::X86FMA3KMergeMasked);                              \
+  FMA3MA(Name##132PSZ128mbkz, Name##213PSZ128mbkz, Name##231PSZ128mbkz,        \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3MA(Name##132PDZ128mbkz, Name##213PDZ128mbkz, Name##231PDZ128mbkz,        \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3MA(Name##132PSZ256mbkz, Name##213PSZ256mbkz, Name##231PSZ256mbkz,        \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3MA(Name##132PDZ256mbkz, Name##213PDZ256mbkz, Name##231PDZ256mbkz,        \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3MA(Name##132PSZmbkz, Name##213PSZmbkz, Name##231PSZmbkz,                 \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);                               \
+  FMA3MA(Name##132PDZmbkz, Name##213PDZmbkz, Name##231PDZmbkz,                 \
+         X86InstrFMA3Group::X86FMA3KZeroMasked);
+
+#define FMA3_AVX512_SCALAR_GROUP(Name)                                         \
+  FMA3RM(Name##132SSZr,      Name##213SSZr,     Name##231SSZr,                 \
+         Name##132SSZm,      Name##213SSZm,     Name##231SSZm);                \
+  FMA3RM(Name##132SDZr,      Name##213SDZr,     Name##231SDZr,                 \
+         Name##132SDZm,      Name##213SDZm,     Name##231SDZm);                \
+  FMA3RMA(Name##132SSZr_Int, Name##213SSZr_Int, Name##231SSZr_Int,             \
+          Name##132SSZm_Int, Name##213SSZm_Int, Name##231SSZm_Int,             \
+          X86InstrFMA3Group::X86FMA3Intrinsic);                                \
+  FMA3RMA(Name##132SDZr_Int, Name##213SDZr_Int, Name##231SDZr_Int,             \
+          Name##132SDZm_Int, Name##213SDZm_Int, Name##231SDZm_Int,             \
+          X86InstrFMA3Group::X86FMA3Intrinsic);                                \
+  FMA3RMA(Name##132SSZr_Intk, Name##213SSZr_Intk, Name##231SSZr_Intk,          \
+          Name##132SSZm_Intk, Name##213SSZm_Intk, Name##231SSZm_Intk,          \
+          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
+              X86InstrFMA3Group::X86FMA3KMergeMasked);                         \
+  FMA3RMA(Name##132SDZr_Intk, Name##213SDZr_Intk, Name##231SDZr_Intk,          \
+          Name##132SDZm_Intk, Name##213SDZm_Intk, Name##231SDZm_Intk,          \
+          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
+              X86InstrFMA3Group::X86FMA3KMergeMasked);                         \
+  FMA3RMA(Name##132SSZr_Intkz, Name##213SSZr_Intkz, Name##231SSZr_Intkz,       \
+          Name##132SSZm_Intkz, Name##213SSZm_Intkz, Name##231SSZm_Intkz,       \
+          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
+              X86InstrFMA3Group::X86FMA3KZeroMasked);                          \
+  FMA3RMA(Name##132SDZr_Intkz, Name##213SDZr_Intkz, Name##231SDZr_Intkz,       \
+          Name##132SDZm_Intkz, Name##213SDZm_Intkz, Name##231SDZm_Intkz,       \
+          X86InstrFMA3Group::X86FMA3Intrinsic |                                \
+              X86InstrFMA3Group::X86FMA3KZeroMasked);                          \
+  FMA3RA(Name##132SSZrb_Int, Name##213SSZrb_Int, Name##231SSZrb_Int,           \
+         X86InstrFMA3Group::X86FMA3Intrinsic);                                 \
+  FMA3RA(Name##132SDZrb_Int, Name##213SDZrb_Int, Name##231SDZrb_Int,           \
+         X86InstrFMA3Group::X86FMA3Intrinsic);                                 \
+  FMA3RA(Name##132SSZrb_Intk, Name##213SSZrb_Intk, Name##231SSZrb_Intk,        \
+         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
+             X86InstrFMA3Group::X86FMA3KMergeMasked);                          \
+  FMA3RA(Name##132SDZrb_Intk, Name##213SDZrb_Intk, Name##231SDZrb_Intk,        \
+         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
+             X86InstrFMA3Group::X86FMA3KMergeMasked);                          \
+  FMA3RA(Name##132SSZrb_Intkz, Name##213SSZrb_Intkz, Name##231SSZrb_Intkz,     \
+         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
+             X86InstrFMA3Group::X86FMA3KZeroMasked);                           \
+  FMA3RA(Name##132SDZrb_Intkz, Name##213SDZrb_Intkz, Name##231SDZrb_Intkz,     \
+         X86InstrFMA3Group::X86FMA3Intrinsic |                                 \
+             X86InstrFMA3Group::X86FMA3KZeroMasked);
+
+#define FMA3_AVX512_FULL_GROUP(Name)                                           \
+  FMA3_AVX512_VECTOR_GROUP(Name);                                              \
+  FMA3_AVX512_SCALAR_GROUP(Name);
+
+void X86InstrFMA3Info::initGroupsOnceImpl() {
+  FMA3_AVX2_FULL_GROUP(VFMADD);
+  FMA3_AVX2_FULL_GROUP(VFMSUB);
+  FMA3_AVX2_FULL_GROUP(VFNMADD);
+  FMA3_AVX2_FULL_GROUP(VFNMSUB);
+
+  FMA3_AVX2_VECTOR_GROUP(VFMADDSUB);
+  FMA3_AVX2_VECTOR_GROUP(VFMSUBADD);
+
+  FMA3_AVX512_FULL_GROUP(VFMADD);
+  FMA3_AVX512_FULL_GROUP(VFMSUB);
+  FMA3_AVX512_FULL_GROUP(VFNMADD);
+  FMA3_AVX512_FULL_GROUP(VFNMSUB);
+
+  FMA3_AVX512_VECTOR_GROUP(VFMADDSUB);
+  FMA3_AVX512_VECTOR_GROUP(VFMSUBADD);
+}
+
+void X86InstrFMA3Info::initGroupsOnce() {
+  llvm::call_once(InitGroupsOnceFlag,
+                  []() { getX86InstrFMA3Info()->initGroupsOnceImpl(); });
+}
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFMA3Info.h b/contrib/llvm/lib/Target/X86/X86InstrFMA3Info.h
new file mode 100644
index 000000000000..025cee3b2b90
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrFMA3Info.h
@@ -0,0 +1,315 @@
+//===-- X86InstrFMA3Info.h - X86 FMA3 Instruction Information -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the implementation of the classes providing information
+// about existing X86 FMA3 opcodes, classifying and grouping them.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
+#define LLVM_LIB_TARGET_X86_UTILS_X86INSTRFMA3INFO_H
+
+#include "X86.h"
+#include "llvm/ADT/DenseMap.h"
+#include <cassert>
+#include <set>
+
+namespace llvm {
+/// This class is used to group {132, 213, 231} forms of FMA opcodes together.
+/// Each of the groups has either 3 register opcodes, 3 memory opcodes,
+/// or 6 register and memory opcodes. Also, each group has an attrubutes field
+/// describing it.
+class X86InstrFMA3Group {
+private:
+  /// Reference to an array holding 3 forms of register FMA opcodes.
+  /// It may be set to nullptr if the group of FMA opcodes does not have
+  /// any register form opcodes.
+  const uint16_t *RegOpcodes;
+
+  /// Reference to an array holding 3 forms of memory FMA opcodes.
+  /// It may be set to nullptr if the group of FMA opcodes does not have
+  /// any register form opcodes.
+  const uint16_t *MemOpcodes;
+
+  /// This bitfield specifies the attributes associated with the created
+  /// FMA groups of opcodes.
+  unsigned Attributes;
+
+  static const unsigned Form132 = 0;
+  static const unsigned Form213 = 1;
+  static const unsigned Form231 = 2;
+
+public:
+  /// This bit must be set in the 'Attributes' field of FMA group if such
+  /// group of FMA opcodes consists of FMA intrinsic opcodes.
+  static const unsigned X86FMA3Intrinsic = 0x1;
+
+  /// This bit must be set in the 'Attributes' field of FMA group if such
+  /// group of FMA opcodes consists of AVX512 opcodes accepting a k-mask and
+  /// passing the elements from the 1st operand to the result of the operation
+  /// when the correpondings bits in the k-mask are unset.
+  static const unsigned X86FMA3KMergeMasked = 0x2;
+
+  /// This bit must be set in the 'Attributes' field of FMA group if such
+  /// group of FMA opcodes consists of AVX512 opcodes accepting a k-zeromask.
+  static const unsigned X86FMA3KZeroMasked = 0x4;
+
+  /// Constructor. Creates a new group of FMA opcodes with three register form
+  /// FMA opcodes \p RegOpcodes and three memory form FMA opcodes \p MemOpcodes.
+  /// The parameters \p RegOpcodes and \p MemOpcodes may be set to nullptr,
+  /// which means that the created group of FMA opcodes does not have the
+  /// corresponding (register or memory) opcodes.
+  /// The parameter \p Attr specifies the attributes describing the created
+  /// group.
+  X86InstrFMA3Group(const uint16_t *RegOpcodes, const uint16_t *MemOpcodes,
+                    unsigned Attr)
+      : RegOpcodes(RegOpcodes), MemOpcodes(MemOpcodes), Attributes(Attr) {
+    assert((RegOpcodes || MemOpcodes) &&
+           "Cannot create a group not having any opcodes.");
+  }
+
+  /// Returns a memory form opcode that is the equivalent of the given register
+  /// form opcode \p RegOpcode. 0 is returned if the group does not have
+  /// either register of memory opcodes.
+  unsigned getMemOpcode(unsigned RegOpcode) const {
+    if (!RegOpcodes || !MemOpcodes)
+      return 0;
+    for (unsigned Form = 0; Form < 3; Form++)
+      if (RegOpcodes[Form] == RegOpcode)
+        return MemOpcodes[Form];
+    return 0;
+  }
+
+  /// Returns the 132 form of FMA register opcode.
+  unsigned getReg132Opcode() const {
+    assert(RegOpcodes && "The group does not have register opcodes.");
+    return RegOpcodes[Form132];
+  }
+
+  /// Returns the 213 form of FMA register opcode.
+  unsigned getReg213Opcode() const {
+    assert(RegOpcodes && "The group does not have register opcodes.");
+    return RegOpcodes[Form213];
+  }
+
+  /// Returns the 231 form of FMA register opcode.
+  unsigned getReg231Opcode() const {
+    assert(RegOpcodes && "The group does not have register opcodes.");
+    return RegOpcodes[Form231];
+  }
+
+  /// Returns the 132 form of FMA memory opcode.
+  unsigned getMem132Opcode() const {
+    assert(MemOpcodes && "The group does not have memory opcodes.");
+    return MemOpcodes[Form132];
+  }
+
+  /// Returns the 213 form of FMA memory opcode.
+  unsigned getMem213Opcode() const {
+    assert(MemOpcodes && "The group does not have memory opcodes.");
+    return MemOpcodes[Form213];
+  }
+
+  /// Returns the 231 form of FMA memory opcode.
+  unsigned getMem231Opcode() const {
+    assert(MemOpcodes && "The group does not have memory opcodes.");
+    return MemOpcodes[Form231];
+  }
+
+  /// Returns true iff the group of FMA opcodes holds intrinsic opcodes.
+  bool isIntrinsic() const { return (Attributes & X86FMA3Intrinsic) != 0; }
+
+  /// Returns true iff the group of FMA opcodes holds k-merge-masked opcodes.
+  bool isKMergeMasked() const {
+    return (Attributes & X86FMA3KMergeMasked) != 0;
+  }
+
+  /// Returns true iff the group of FMA opcodes holds k-zero-masked opcodes.
+  bool isKZeroMasked() const { return (Attributes & X86FMA3KZeroMasked) != 0; }
+
+  /// Returns true iff the group of FMA opcodes holds any of k-masked opcodes.
+  bool isKMasked() const {
+    return (Attributes & (X86FMA3KMergeMasked | X86FMA3KZeroMasked)) != 0;
+  }
+
+  /// Returns true iff the given \p Opcode is a register opcode from the
+  /// groups of FMA opcodes.
+  bool isRegOpcodeFromGroup(unsigned Opcode) const {
+    if (!RegOpcodes)
+      return false;
+    for (unsigned Form = 0; Form < 3; Form++)
+      if (Opcode == RegOpcodes[Form])
+        return true;
+    return false;
+  }
+
+  /// Returns true iff the given \p Opcode is a memory opcode from the
+  /// groups of FMA opcodes.
+  bool isMemOpcodeFromGroup(unsigned Opcode) const {
+    if (!MemOpcodes)
+      return false;
+    for (unsigned Form = 0; Form < 3; Form++)
+      if (Opcode == MemOpcodes[Form])
+        return true;
+    return false;
+  }
+};
+
+/// This class provides information about all existing FMA3 opcodes
+///
+class X86InstrFMA3Info {
+private:
+  /// A map that is used to find the group of FMA opcodes using any FMA opcode
+  /// from the group.
+  DenseMap<unsigned, const X86InstrFMA3Group *> OpcodeToGroup;
+
+  /// Creates groups of FMA opcodes and initializes Opcode-to-Group map.
+  /// This method can be called many times, but the actual initialization is
+  /// called only once.
+  static void initGroupsOnce();
+
+  /// Creates groups of FMA opcodes and initializes Opcode-to-Group map.
+  /// This method must be called ONLY from initGroupsOnce(). Otherwise, such
+  /// call is not thread safe.
+  void initGroupsOnceImpl();
+
+  /// Creates one group of FMA opcodes having the register opcodes
+  /// \p RegOpcodes and memory opcodes \p MemOpcodes. The parameter \p Attr
+  /// specifies the attributes describing the created group.
+  void initRMGroup(const uint16_t *RegOpcodes,
+                   const uint16_t *MemOpcodes, unsigned Attr = 0);
+
+  /// Creates one group of FMA opcodes having only the register opcodes
+  /// \p RegOpcodes. The parameter \p Attr specifies the attributes describing
+  /// the created group.
+  void initRGroup(const uint16_t *RegOpcodes, unsigned Attr = 0);
+
+  /// Creates one group of FMA opcodes having only the memory opcodes
+  /// \p MemOpcodes. The parameter \p Attr specifies the attributes describing
+  /// the created group.
+  void initMGroup(const uint16_t *MemOpcodes, unsigned Attr = 0);
+
+public:
+  /// Returns the reference to an object of this class. It is assumed that
+  /// only one object may exist.
+  static X86InstrFMA3Info *getX86InstrFMA3Info();
+
+  /// Constructor. Just creates an object of the class.
+  X86InstrFMA3Info() {}
+
+  /// Destructor. Deallocates the memory used for FMA3 Groups.
+  ~X86InstrFMA3Info() {
+    std::set<const X86InstrFMA3Group *> DeletedGroups;
+    auto E = OpcodeToGroup.end();
+    for (auto I = OpcodeToGroup.begin(); I != E; I++) {
+      const X86InstrFMA3Group *G = I->second;
+      if (DeletedGroups.find(G) == DeletedGroups.end()) {
+        DeletedGroups.insert(G);
+        delete G;
+      }
+    }
+  }
+
+  /// Returns a reference to a group of FMA3 opcodes to where the given
+  /// \p Opcode is included. If the given \p Opcode is not recognized as FMA3
+  /// and not included into any FMA3 group, then nullptr is returned.
+  static const X86InstrFMA3Group *getFMA3Group(unsigned Opcode) {
+    // Ensure that the groups of opcodes are initialized.
+    initGroupsOnce();
+
+    // Find the group including the given opcode.
+    const X86InstrFMA3Info *FMA3Info = getX86InstrFMA3Info();
+    auto I = FMA3Info->OpcodeToGroup.find(Opcode);
+    if (I == FMA3Info->OpcodeToGroup.end())
+      return nullptr;
+
+    return I->second;
+  }
+
+  /// Returns true iff the given \p Opcode is recognized as FMA3 by this class.
+  static bool isFMA3(unsigned Opcode) {
+    return getFMA3Group(Opcode) != nullptr;
+  }
+
+  /// Iterator that is used to walk on FMA register opcodes having memory
+  /// form equivalents.
+  class rm_iterator {
+  private:
+    /// Iterator associated with the OpcodeToGroup map. It must always be
+    /// initialized with an entry from OpcodeToGroup for which I->first
+    /// points to a register FMA opcode and I->second points to a group of
+    /// FMA opcodes having memory form equivalent of I->first.
+    DenseMap<unsigned, const X86InstrFMA3Group *>::const_iterator I;
+
+  public:
+    /// Constructor. Creates rm_iterator. The parameter \p I must be an
+    /// iterator to OpcodeToGroup map entry having I->first pointing to
+    /// register form FMA opcode and I->second pointing to a group of FMA
+    /// opcodes holding memory form equivalent for I->fist.
+    rm_iterator(DenseMap<unsigned, const X86InstrFMA3Group *>::const_iterator I)
+        : I(I) {}
+
+    /// Returns the register form FMA opcode.
+    unsigned getRegOpcode() const { return I->first; };
+
+    /// Returns the memory form equivalent opcode for FMA register opcode
+    /// referenced by I->first.
+    unsigned getMemOpcode() const {
+      unsigned Opcode = I->first;
+      const X86InstrFMA3Group *Group = I->second;
+      return Group->getMemOpcode(Opcode);
+    }
+
+    /// Returns a reference to a group of FMA opcodes.
+    const X86InstrFMA3Group *getGroup() const { return I->second; }
+
+    bool operator==(const rm_iterator &OtherIt) const { return I == OtherIt.I; }
+    bool operator!=(const rm_iterator &OtherIt) const { return I != OtherIt.I; }
+
+    /// Increment. Advances the 'I' iterator to the next OpcodeToGroup entry
+    /// having I->first pointing to register form FMA and I->second pointing
+    /// to a group of FMA opcodes holding memory form equivalen for I->first.
+    rm_iterator &operator++() {
+      auto E = getX86InstrFMA3Info()->OpcodeToGroup.end();
+      for (++I; I != E; ++I) {
+        unsigned RegOpcode = I->first;
+        const X86InstrFMA3Group *Group = I->second;
+        if (Group->getMemOpcode(RegOpcode) != 0)
+          break;
+      }
+      return *this;
+    }
+  };
+
+  /// Returns rm_iterator pointing to the first entry of OpcodeToGroup map
+  /// with a register FMA opcode having memory form opcode equivalent.
+  static rm_iterator rm_begin() {
+    initGroupsOnce();
+    const X86InstrFMA3Info *FMA3Info = getX86InstrFMA3Info();
+    auto I = FMA3Info->OpcodeToGroup.begin();
+    auto E = FMA3Info->OpcodeToGroup.end();
+    while (I != E) {
+      unsigned Opcode = I->first;
+      const X86InstrFMA3Group *G = I->second;
+      if (G->getMemOpcode(Opcode) != 0)
+        break;
+      I++;
+    }
+    return rm_iterator(I);
+  }
+
+  /// Returns the last rm_iterator.
+  static rm_iterator rm_end() {
+    initGroupsOnce();
+    return rm_iterator(getX86InstrFMA3Info()->OpcodeToGroup.end());
+  }
+};
+} // namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
index 078dab41502a..10f3839ea8ed 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFPStack.td
@@ -711,19 +711,19 @@ def : Pat<(X86fildflag addr:$src, i64), (ILD_Fp64m64 addr:$src)>;
 
 // FP extensions map onto simple pseudo-value conversions if they are to/from
 // the FP stack.
-def : Pat<(f64 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
+def : Pat<(f64 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP64)>,
           Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
+def : Pat<(f80 (fpextend RFP32:$src)), (COPY_TO_REGCLASS RFP32:$src, RFP80)>,
            Requires<[FPStackf32]>;
-def : Pat<(f80 (fextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
+def : Pat<(f80 (fpextend RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP80)>,
            Requires<[FPStackf64]>;
 
 // FP truncations map onto simple pseudo-value conversions if they are to/from
 // the FP stack.  We have validated that only value-preserving truncations make
 // it through isel.
-def : Pat<(f32 (fround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
+def : Pat<(f32 (fpround RFP64:$src)), (COPY_TO_REGCLASS RFP64:$src, RFP32)>,
           Requires<[FPStackf32]>;
-def : Pat<(f32 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
+def : Pat<(f32 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP32)>,
            Requires<[FPStackf32]>;
-def : Pat<(f64 (fround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
+def : Pat<(f64 (fpround RFP80:$src)), (COPY_TO_REGCLASS RFP80:$src, RFP64)>,
            Requires<[FPStackf64]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFormats.td b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
index 5183adc834b1..610756aa37da 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFormats.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFormats.td
@@ -18,43 +18,53 @@ class Format<bits<7> val> {
   bits<7> Value = val;
 }
 
-def Pseudo     : Format<0>; def RawFrm     : Format<1>;
-def AddRegFrm  : Format<2>; def MRMDestReg : Format<3>;
-def MRMDestMem : Format<4>; def MRMSrcReg  : Format<5>;
-def MRMSrcMem  : Format<6>; def RawFrmMemOffs : Format<7>;
-def RawFrmSrc  : Format<8>; def RawFrmDst     : Format<9>;
-def RawFrmDstSrc: Format<10>;
-def RawFrmImm8 : Format<11>;
-def RawFrmImm16 : Format<12>;
-def MRMXr  : Format<14>; def MRMXm  : Format<15>;
-def MRM0r  : Format<16>; def MRM1r  : Format<17>; def MRM2r  : Format<18>;
-def MRM3r  : Format<19>; def MRM4r  : Format<20>; def MRM5r  : Format<21>;
-def MRM6r  : Format<22>; def MRM7r  : Format<23>;
-def MRM0m  : Format<24>; def MRM1m  : Format<25>; def MRM2m  : Format<26>;
-def MRM3m  : Format<27>; def MRM4m  : Format<28>; def MRM5m  : Format<29>;
-def MRM6m  : Format<30>; def MRM7m  : Format<31>;
-def MRM_C0 : Format<32>; def MRM_C1 : Format<33>; def MRM_C2 : Format<34>;
-def MRM_C3 : Format<35>; def MRM_C4 : Format<36>; def MRM_C5 : Format<37>;
-def MRM_C6 : Format<38>; def MRM_C7 : Format<39>; def MRM_C8 : Format<40>;
-def MRM_C9 : Format<41>; def MRM_CA : Format<42>; def MRM_CB : Format<43>;
-def MRM_CC : Format<44>; def MRM_CD : Format<45>; def MRM_CE : Format<46>;
-def MRM_CF : Format<47>; def MRM_D0 : Format<48>; def MRM_D1 : Format<49>;
-def MRM_D2 : Format<50>; def MRM_D3 : Format<51>; def MRM_D4 : Format<52>;
-def MRM_D5 : Format<53>; def MRM_D6 : Format<54>; def MRM_D7 : Format<55>;
-def MRM_D8 : Format<56>; def MRM_D9 : Format<57>; def MRM_DA : Format<58>;
-def MRM_DB : Format<59>; def MRM_DC : Format<60>; def MRM_DD : Format<61>;
-def MRM_DE : Format<62>; def MRM_DF : Format<63>; def MRM_E0 : Format<64>;
-def MRM_E1 : Format<65>; def MRM_E2 : Format<66>; def MRM_E3 : Format<67>;
-def MRM_E4 : Format<68>; def MRM_E5 : Format<69>; def MRM_E6 : Format<70>;
-def MRM_E7 : Format<71>; def MRM_E8 : Format<72>; def MRM_E9 : Format<73>;
-def MRM_EA : Format<74>; def MRM_EB : Format<75>; def MRM_EC : Format<76>;
-def MRM_ED : Format<77>; def MRM_EE : Format<78>; def MRM_EF : Format<79>;
-def MRM_F0 : Format<80>; def MRM_F1 : Format<81>; def MRM_F2 : Format<82>;
-def MRM_F3 : Format<83>; def MRM_F4 : Format<84>; def MRM_F5 : Format<85>;
-def MRM_F6 : Format<86>; def MRM_F7 : Format<87>; def MRM_F8 : Format<88>;
-def MRM_F9 : Format<89>; def MRM_FA : Format<90>; def MRM_FB : Format<91>;
-def MRM_FC : Format<92>; def MRM_FD : Format<93>; def MRM_FE : Format<94>;
-def MRM_FF : Format<95>;
+def Pseudo        : Format<0>;
+def RawFrm        : Format<1>;
+def AddRegFrm     : Format<2>;
+def RawFrmMemOffs : Format<3>;
+def RawFrmSrc     : Format<4>;
+def RawFrmDst     : Format<5>;
+def RawFrmDstSrc  : Format<6>;
+def RawFrmImm8    : Format<7>;
+def RawFrmImm16   : Format<8>;
+def MRMDestMem     : Format<32>;
+def MRMSrcMem      : Format<33>;
+def MRMSrcMem4VOp3 : Format<34>;
+def MRMSrcMemOp4   : Format<35>;
+def MRMXm  : Format<39>;
+def MRM0m  : Format<40>;  def MRM1m  : Format<41>;  def MRM2m  : Format<42>;
+def MRM3m  : Format<43>;  def MRM4m  : Format<44>;  def MRM5m  : Format<45>;
+def MRM6m  : Format<46>;  def MRM7m  : Format<47>;
+def MRMDestReg     : Format<48>;
+def MRMSrcReg      : Format<49>;
+def MRMSrcReg4VOp3 : Format<50>;
+def MRMSrcRegOp4   : Format<51>;
+def MRMXr  : Format<55>;
+def MRM0r  : Format<56>;  def MRM1r  : Format<57>;  def MRM2r  : Format<58>;
+def MRM3r  : Format<59>;  def MRM4r  : Format<60>;  def MRM5r  : Format<61>;
+def MRM6r  : Format<62>;  def MRM7r  : Format<63>;
+def MRM_C0 : Format<64>;  def MRM_C1 : Format<65>;  def MRM_C2 : Format<66>;
+def MRM_C3 : Format<67>;  def MRM_C4 : Format<68>;  def MRM_C5 : Format<69>;
+def MRM_C6 : Format<70>;  def MRM_C7 : Format<71>;  def MRM_C8 : Format<72>;
+def MRM_C9 : Format<73>;  def MRM_CA : Format<74>;  def MRM_CB : Format<75>;
+def MRM_CC : Format<76>;  def MRM_CD : Format<77>;  def MRM_CE : Format<78>;
+def MRM_CF : Format<79>;  def MRM_D0 : Format<80>;  def MRM_D1 : Format<81>;
+def MRM_D2 : Format<82>;  def MRM_D3 : Format<83>;  def MRM_D4 : Format<84>;
+def MRM_D5 : Format<85>;  def MRM_D6 : Format<86>;  def MRM_D7 : Format<87>;
+def MRM_D8 : Format<88>;  def MRM_D9 : Format<89>;  def MRM_DA : Format<90>;
+def MRM_DB : Format<91>;  def MRM_DC : Format<92>;  def MRM_DD : Format<93>;
+def MRM_DE : Format<94>;  def MRM_DF : Format<95>;  def MRM_E0 : Format<96>;
+def MRM_E1 : Format<97>;  def MRM_E2 : Format<98>;  def MRM_E3 : Format<99>;
+def MRM_E4 : Format<100>; def MRM_E5 : Format<101>; def MRM_E6 : Format<102>;
+def MRM_E7 : Format<103>; def MRM_E8 : Format<104>; def MRM_E9 : Format<105>;
+def MRM_EA : Format<106>; def MRM_EB : Format<107>; def MRM_EC : Format<108>;
+def MRM_ED : Format<109>; def MRM_EE : Format<110>; def MRM_EF : Format<111>;
+def MRM_F0 : Format<112>; def MRM_F1 : Format<113>; def MRM_F2 : Format<114>;
+def MRM_F3 : Format<115>; def MRM_F4 : Format<116>; def MRM_F5 : Format<117>;
+def MRM_F6 : Format<118>; def MRM_F7 : Format<119>; def MRM_F8 : Format<120>;
+def MRM_F9 : Format<121>; def MRM_FA : Format<122>; def MRM_FB : Format<123>;
+def MRM_FC : Format<124>; def MRM_FD : Format<125>; def MRM_FE : Format<126>;
+def MRM_FF : Format<127>;
 
 // ImmType - This specifies the immediate type used by an instruction. This is
 // part of the ad-hoc solution used to emit machine instruction encodings by our
@@ -65,12 +75,13 @@ class ImmType<bits<4> val> {
 def NoImm      : ImmType<0>;
 def Imm8       : ImmType<1>;
 def Imm8PCRel  : ImmType<2>;
-def Imm16      : ImmType<3>;
-def Imm16PCRel : ImmType<4>;
-def Imm32      : ImmType<5>;
-def Imm32PCRel : ImmType<6>;
-def Imm32S     : ImmType<7>;
-def Imm64      : ImmType<8>;
+def Imm8Reg    : ImmType<3>; // Register encoded in [7:4].
+def Imm16      : ImmType<4>;
+def Imm16PCRel : ImmType<5>;
+def Imm32      : ImmType<6>;
+def Imm32PCRel : ImmType<7>;
+def Imm32S     : ImmType<8>;
+def Imm64      : ImmType<9>;
 
 // FPFormat - This specifies what form this FP instruction has.  This is used by
 // the Floating-Point stackifier pass.
@@ -190,8 +201,6 @@ class TAXD : TA { Prefix OpPrefix = XD; }
 class VEX    { Encoding OpEnc = EncVEX; }
 class VEX_W  { bit hasVEX_WPrefix = 1; }
 class VEX_4V : VEX { bit hasVEX_4V = 1; }
-class VEX_4VOp3 : VEX { bit hasVEX_4VOp3 = 1; }
-class VEX_I8IMM { bit hasVEX_i8ImmReg = 1; }
 class VEX_L  { bit hasVEX_L = 1; }
 class VEX_LIG { bit ignoresVEX_L = 1; }
 class EVEX : VEX { Encoding OpEnc = EncEVEX; }
@@ -212,10 +221,8 @@ class EVEX_CD8<int esize, CD8VForm form> {
 }
 
 class Has3DNow0F0FOpcode  { bit has3DNow0F0FOpcode = 1; }
-class MemOp4 { bit hasMemOp4Prefix = 1; }
 class XOP { Encoding OpEnc = EncXOP; }
 class XOP_4V : XOP { bit hasVEX_4V = 1; }
-class XOP_4VOp3 : XOP { bit hasVEX_4VOp3 = 1; }
 
 class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
               string AsmStr,
@@ -265,10 +272,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   bits<2> OpEncBits = OpEnc.Value;
   bit hasVEX_WPrefix = 0;   // Does this inst set the VEX_W field?
   bit hasVEX_4V = 0;        // Does this inst require the VEX.VVVV field?
-  bit hasVEX_4VOp3 = 0;     // Does this inst require the VEX.VVVV field to
-                            // encode the third operand?
-  bit hasVEX_i8ImmReg = 0;  // Does this inst require the last source register
-                            // to be encoded in a immediate field?
   bit hasVEX_L = 0;         // Does this inst use large (256-bit) registers?
   bit ignoresVEX_L = 0;     // Does this instruction ignore the L-bit
   bit hasEVEX_K = 0;        // Does this inst require masking?
@@ -280,7 +283,6 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   // assigning to bits<7>.
   int CD8_EltSize = 0;      // Compressed disp8 form - element-size in bytes.
   bit has3DNow0F0FOpcode =0;// Wacky 3dNow! encoding?
-  bit hasMemOp4Prefix = 0;  // Same bit as VEX_W, but used for swapping operands
   bit hasEVEX_RC = 0;       // Explicitly specified rounding control in FP instruction.
 
   bits<2> EVEX_LL;
@@ -317,19 +319,15 @@ class X86Inst<bits<8> opcod, Format f, ImmType i, dag outs, dag ins,
   let TSFlags{38-31} = Opcode;
   let TSFlags{39}    = hasVEX_WPrefix;
   let TSFlags{40}    = hasVEX_4V;
-  let TSFlags{41}    = hasVEX_4VOp3;
-  let TSFlags{42}    = hasVEX_i8ImmReg;
-  let TSFlags{43}    = hasVEX_L;
-  let TSFlags{44}    = ignoresVEX_L;
-  let TSFlags{45}    = hasEVEX_K;
-  let TSFlags{46}    = hasEVEX_Z;
-  let TSFlags{47}    = hasEVEX_L2;
-  let TSFlags{48}    = hasEVEX_B;
+  let TSFlags{41}    = hasVEX_L;
+  let TSFlags{42}    = hasEVEX_K;
+  let TSFlags{43}    = hasEVEX_Z;
+  let TSFlags{44}    = hasEVEX_L2;
+  let TSFlags{45}    = hasEVEX_B;
   // If we run out of TSFlags bits, it's possible to encode this in 3 bits.
-  let TSFlags{55-49} = CD8_Scale;
-  let TSFlags{56}    = has3DNow0F0FOpcode;
-  let TSFlags{57}    = hasMemOp4Prefix;
-  let TSFlags{58}    = hasEVEX_RC;
+  let TSFlags{52-46} = CD8_Scale;
+  let TSFlags{53}    = has3DNow0F0FOpcode;
+  let TSFlags{54}    = hasEVEX_RC;
 }
 
 class PseudoI<dag oops, dag iops, list<dag> pattern>
@@ -351,6 +349,13 @@ class Ii8 <bits<8> o, Format f, dag outs, dag ins, string asm,
   let Pattern = pattern;
   let CodeSize = 3;
 }
+class Ii8Reg<bits<8> o, Format f, dag outs, dag ins, string asm,
+             list<dag> pattern, InstrItinClass itin = NoItinerary,
+             Domain d = GenericDomain>
+  : X86Inst<o, f, Imm8Reg, outs, ins, asm, itin, d> {
+  let Pattern = pattern;
+  let CodeSize = 3;
+}
 class Ii8PCRel<bits<8> o, Format f, dag outs, dag ins, string asm,
                list<dag> pattern, InstrItinClass itin = NoItinerary>
   : X86Inst<o, f, Imm8PCRel, outs, ins, asm, itin> {
@@ -785,7 +790,6 @@ class AVX512AIi8<bits<8> o, Format F, dag outs, dag ins, string asm,
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
         Requires<[HasAVX512]>;
 class AVX512AIi8Base : TAPD {
-  Domain ExeDomain = SSEPackedInt;
   ImmType ImmT = Imm8;
 }
 class AVX512Ii8<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -850,8 +854,8 @@ class FMA3<bits<8> o, Format F, dag outs, dag ins, string asm,
 // FMA4 Instruction Templates
 class FMA4<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag>pattern, InstrItinClass itin = NoItinerary>
-      : Ii8<o, F, outs, ins, asm, pattern, itin>, TAPD,
-        VEX_4V, VEX_I8IMM, FMASC, Requires<[HasFMA4]>;
+      : Ii8Reg<o, F, outs, ins, asm, pattern, itin>, TAPD,
+        VEX_4V, FMASC, Requires<[HasFMA4]>;
 
 // XOP 2, 3 and 4 Operand Instruction Template
 class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
@@ -859,17 +863,22 @@ class IXOP<bits<8> o, Format F, dag outs, dag ins, string asm,
       : I<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
          XOP9, Requires<[HasXOP]>;
 
-// XOP 2, 3 and 4 Operand Instruction Templates with imm byte
+// XOP 2 and 3 Operand Instruction Templates with imm byte
 class IXOPi8<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag> pattern, InstrItinClass itin = NoItinerary>
       : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
          XOP8, Requires<[HasXOP]>;
+// XOP 4 Operand Instruction Templates with imm byte
+class IXOPi8Reg<bits<8> o, Format F, dag outs, dag ins, string asm,
+           list<dag> pattern, InstrItinClass itin = NoItinerary>
+      : Ii8Reg<o, F, outs, ins, asm, pattern, itin, SSEPackedDouble>,
+         XOP8, Requires<[HasXOP]>;
 
 //  XOP 5 operand instruction (VEX encoding!)
 class IXOP5<bits<8> o, Format F, dag outs, dag ins, string asm,
            list<dag>pattern, InstrItinClass itin = NoItinerary>
-      : Ii8<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
-        VEX_4V, VEX_I8IMM, Requires<[HasXOP]>;
+      : Ii8Reg<o, F, outs, ins, asm, pattern, itin, SSEPackedInt>, TAPD,
+        VEX_4V, Requires<[HasXOP]>;
 
 // X86-64 Instruction templates...
 //
diff --git a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index ea54f049ec7a..c5689d7c698c 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -29,7 +29,6 @@ def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1,
 def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>;
 def load_mvmmx : PatFrag<(ops node:$ptr),
                          (x86mmx (MMX_X86movw2d (load node:$ptr)))>;
-def bc_mmx  : PatFrag<(ops node:$in), (x86mmx  (bitconvert node:$in))>;
 
 //===----------------------------------------------------------------------===//
 // SSE specific DAG Nodes.
@@ -56,8 +55,7 @@ def X86for     : SDNode<"X86ISD::FOR",       SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
 def X86fxor    : SDNode<"X86ISD::FXOR",      SDTFPBinOp,
                         [SDNPCommutative, SDNPAssociative]>;
-def X86fandn   : SDNode<"X86ISD::FANDN",     SDTFPBinOp,
-                        [SDNPCommutative, SDNPAssociative]>;
+def X86fandn   : SDNode<"X86ISD::FANDN",     SDTFPBinOp>;
 def X86frsqrt  : SDNode<"X86ISD::FRSQRT",    SDTFPUnaryOp>;
 def X86frcp    : SDNode<"X86ISD::FRCP",      SDTFPUnaryOp>;
 def X86frsqrt14s: SDNode<"X86ISD::FRSQRTS",  SDTFPBinOp>;
@@ -67,16 +65,8 @@ def X86fhsub   : SDNode<"X86ISD::FHSUB",     SDTFPBinOp>;
 def X86hadd    : SDNode<"X86ISD::HADD",      SDTIntBinOp>;
 def X86hsub    : SDNode<"X86ISD::HSUB",      SDTIntBinOp>;
 def X86comi    : SDNode<"X86ISD::COMI",      SDTX86CmpTest>;
-def X86comiSae : SDNode<"X86ISD::COMI",      SDTX86CmpTestSae>;
 def X86ucomi   : SDNode<"X86ISD::UCOMI",     SDTX86CmpTest>;
-def X86ucomiSae: SDNode<"X86ISD::UCOMI",     SDTX86CmpTestSae>;
 def X86cmps    : SDNode<"X86ISD::FSETCC",     SDTX86Cmps>;
-def X86cvtdq2pd: SDNode<"X86ISD::CVTDQ2PD",
-                 SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
-                                      SDTCisVT<1, v4i32>]>>;
-def X86cvtudq2pd: SDNode<"X86ISD::CVTUDQ2PD",
-                 SDTypeProfile<1, 1, [SDTCisVT<0, v2f64>,
-                                      SDTCisVT<1, v4i32>]>>;
 def X86pshufb  : SDNode<"X86ISD::PSHUFB",
                  SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i8>, SDTCisSameAs<0,1>,
                                       SDTCisSameAs<0,2>]>>;
@@ -84,7 +74,7 @@ def X86psadbw  : SDNode<"X86ISD::PSADBW",
                  SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
                                       SDTCVecEltisVT<1, i8>,
                                       SDTCisSameSizeAs<0,1>,
-                                      SDTCisSameAs<1,2>]>>;
+                                      SDTCisSameAs<1,2>]>, [SDNPCommutative]>;
 def X86dbpsadbw : SDNode<"X86ISD::DBPSADBW",
                   SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
                                        SDTCVecEltisVT<1, i8>,
@@ -144,25 +134,14 @@ def X86vfpround: SDNode<"X86ISD::VFPROUND",
                                              SDTCVecEltisVT<1, f64>,
                                              SDTCisSameSizeAs<0, 1>]>>;
 
-def X86fround: SDNode<"X86ISD::VFPROUND",
-                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
-                                             SDTCisSameAs<0, 1>,
-                                             SDTCVecEltisVT<2, f64>,
-                                             SDTCisSameSizeAs<0, 2>]>>;
-def X86froundRnd: SDNode<"X86ISD::VFPROUND",
+def X86froundRnd: SDNode<"X86ISD::VFPROUNDS_RND",
                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f32>,
                                              SDTCisSameAs<0, 1>,
                                              SDTCVecEltisVT<2, f64>,
                                              SDTCisSameSizeAs<0, 2>,
                                              SDTCisVT<3, i32>]>>;
 
-def X86fpext  : SDNode<"X86ISD::VFPEXT",
-                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
-                                             SDTCisSameAs<0, 1>,
-                                             SDTCVecEltisVT<2, f32>,
-                                             SDTCisSameSizeAs<0, 2>]>>;
-
-def X86fpextRnd  : SDNode<"X86ISD::VFPEXT",
+def X86fpextRnd  : SDNode<"X86ISD::VFPEXTS_RND",
                         SDTypeProfile<1, 3, [SDTCVecEltisVT<0, f64>,
                                              SDTCisSameAs<0, 1>,
                                              SDTCVecEltisVT<2, f32>,
@@ -176,7 +155,8 @@ def X86pcmpeq  : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>;
 def X86pcmpgt  : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>;
 
 def X86IntCmpMask : SDTypeProfile<1, 2,
-    [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>;
+    [SDTCisVec<0>, SDTCVecEltisVT<0, i1>, SDTCisSameAs<1, 2>, SDTCisInt<1>,
+     SDTCisSameNumEltsAs<0, 1>]>;
 def X86pcmpeqm  : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>;
 def X86pcmpgtm  : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>;
 
@@ -188,19 +168,19 @@ def X86CmpMaskCCRound :
       SDTypeProfile<1, 4, [SDTCisVec<0>,SDTCVecEltisVT<0, i1>,
                        SDTCisVec<1>, SDTCisSameAs<2, 1>,
                        SDTCisSameNumEltsAs<0, 1>, SDTCisVT<3, i8>,
-                       SDTCisInt<4>]>;
+                       SDTCisVT<4, i32>]>;
 def X86CmpMaskCCScalar :
       SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>;
 
 def X86CmpMaskCCScalarRound :
       SDTypeProfile<1, 4, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>,
-                           SDTCisInt<4>]>;
+                           SDTCisVT<4, i32>]>;
 
 def X86cmpm     : SDNode<"X86ISD::CMPM",     X86CmpMaskCC>;
 def X86cmpmRnd  : SDNode<"X86ISD::CMPM_RND", X86CmpMaskCCRound>;
 def X86cmpmu    : SDNode<"X86ISD::CMPMU",    X86CmpMaskCC>;
-def X86cmpms    : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalar>;
-def X86cmpmsRnd : SDNode<"X86ISD::FSETCC",   X86CmpMaskCCScalarRound>;
+def X86cmpms    : SDNode<"X86ISD::FSETCCM",   X86CmpMaskCCScalar>;
+def X86cmpmsRnd : SDNode<"X86ISD::FSETCCM_RND",   X86CmpMaskCCScalarRound>;
 
 def X86vshl    : SDNode<"X86ISD::VSHL",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
@@ -212,7 +192,9 @@ def X86vsra    : SDNode<"X86ISD::VSRA",
                         SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                       SDTCisVec<2>]>>;
 
-def X86vsrav   : SDNode<"X86ISD::VSRAV" , SDTIntShiftOp>;
+def X86vsrav   : SDNode<"X86ISD::VSRAV" ,
+                        SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
+                                             SDTCisSameAs<0,2>]>>;
 
 def X86vshli   : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
 def X86vsrli   : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
@@ -261,12 +243,12 @@ def SDTX86Testm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                        SDTCisSameAs<2, 1>, SDTCVecEltisVT<0, i1>,
                                        SDTCisSameNumEltsAs<0, 1>]>;
 
-def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp>;
+def X86addus   : SDNode<"X86ISD::ADDUS", SDTIntBinOp, [SDNPCommutative]>;
 def X86subus   : SDNode<"X86ISD::SUBUS", SDTIntBinOp>;
-def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp>;
+def X86adds    : SDNode<"X86ISD::ADDS", SDTIntBinOp, [SDNPCommutative]>;
 def X86subs    : SDNode<"X86ISD::SUBS", SDTIntBinOp>;
-def X86mulhrs  : SDNode<"X86ISD::MULHRS" , SDTIntBinOp>;
-def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp>;
+def X86mulhrs  : SDNode<"X86ISD::MULHRS", SDTIntBinOp, [SDNPCommutative]>;
+def X86avg     : SDNode<"X86ISD::AVG" , SDTIntBinOp, [SDNPCommutative]>;
 def X86ptest   : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>;
 def X86testp   : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>;
 def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>;
@@ -283,7 +265,7 @@ def X86select  : SDNode<"X86ISD::SELECT",
                                              SDTCisSameAs<2, 3>,
                                              SDTCisSameNumEltsAs<0, 1>]>>;
 
-def X86selects : SDNode<"X86ISD::SELECT",
+def X86selects : SDNode<"X86ISD::SELECTS",
                         SDTypeProfile<1, 3, [SDTCisVT<1, i1>,
                                              SDTCisSameAs<0, 2>,
                                              SDTCisSameAs<2, 3>]>>;
@@ -292,12 +274,14 @@ def X86pmuludq : SDNode<"X86ISD::PMULUDQ",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
                                              SDTCVecEltisVT<1, i32>,
                                              SDTCisSameSizeAs<0,1>,
-                                             SDTCisSameAs<1,2>]>>;
+                                             SDTCisSameAs<1,2>]>,
+                                             [SDNPCommutative]>;
 def X86pmuldq  : SDNode<"X86ISD::PMULDQ",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i64>,
                                              SDTCVecEltisVT<1, i32>,
                                              SDTCisSameSizeAs<0,1>,
-                                             SDTCisSameAs<1,2>]>>;
+                                             SDTCisSameAs<1,2>]>,
+                                             [SDNPCommutative]>;
 
 def X86extrqi : SDNode<"X86ISD::EXTRQI",
                   SDTypeProfile<1, 3, [SDTCisVT<0, v2i64>, SDTCisSameAs<0,1>,
@@ -393,7 +377,7 @@ def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>;
 def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>;
 
 def X86vpmaddubsw  : SDNode<"X86ISD::VPMADDUBSW" , SDTPack>;
-def X86vpmaddwd    : SDNode<"X86ISD::VPMADDWD"   , SDTPack>;
+def X86vpmaddwd    : SDNode<"X86ISD::VPMADDWD"   , SDTPack, [SDNPCommutative]>;
 
 def X86VPermilpv  : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>;
 def X86VPermilpi  : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>;
@@ -410,10 +394,12 @@ def X86VPermt2     : SDNode<"X86ISD::VPERMV3",
                                          SDTCisSameSizeAs<0,2>,
                                          SDTCisSameAs<0,3>]>, []>;
 
+// Even though the index operand should be integer, we need to make it match the
+// destination type so that we can pattern match the masked version where the
+// index is also the passthru operand.
 def X86VPermi2X   : SDNode<"X86ISD::VPERMIV3",
-                    SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>,
-                                         SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>,
-                                         SDTCisSameSizeAs<0,1>,
+                    SDTypeProfile<1, 3, [SDTCisVec<0>,
+                                         SDTCisSameAs<0,1>,
                                          SDTCisSameAs<0,2>,
                                          SDTCisSameAs<0,3>]>, []>;
 
@@ -462,9 +448,9 @@ def X86scalef    : SDNode<"X86ISD::SCALEF",         SDTFPBinOpRound>;
 def X86scalefs   : SDNode<"X86ISD::SCALEFS",        SDTFPBinOpRound>;
 def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",       SDTFPBinOpRound>;
 def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",   SDTFPUnaryOpRound>;
-def X86fsqrtRnds    : SDNode<"X86ISD::FSQRT_RND",   SDTFPBinOpRound>;
+def X86fsqrtRnds    : SDNode<"X86ISD::FSQRTS_RND", SDTFPBinOpRound>;
 def X86fgetexpRnd   : SDNode<"X86ISD::FGETEXP_RND", SDTFPUnaryOpRound>;
-def X86fgetexpRnds  : SDNode<"X86ISD::FGETEXP_RND", SDTFPBinOpRound>;
+def X86fgetexpRnds  : SDNode<"X86ISD::FGETEXPS_RND", SDTFPBinOpRound>;
 
 def X86Fmadd     : SDNode<"X86ISD::FMADD",     SDTFma>;
 def X86Fnmadd    : SDNode<"X86ISD::FNMADD",    SDTFma>;
@@ -480,6 +466,18 @@ def X86FnmsubRnd    : SDNode<"X86ISD::FNMSUB_RND",    SDTFmaRound>;
 def X86FmaddsubRnd  : SDNode<"X86ISD::FMADDSUB_RND",  SDTFmaRound>;
 def X86FmsubaddRnd  : SDNode<"X86ISD::FMSUBADD_RND",  SDTFmaRound>;
 
+// Scalar FMA intrinsics with passthru bits in operand 1.
+def X86FmaddRnds1   : SDNode<"X86ISD::FMADDS1_RND",     SDTFmaRound>;
+def X86FnmaddRnds1  : SDNode<"X86ISD::FNMADDS1_RND",    SDTFmaRound>;
+def X86FmsubRnds1   : SDNode<"X86ISD::FMSUBS1_RND",     SDTFmaRound>;
+def X86FnmsubRnds1  : SDNode<"X86ISD::FNMSUBS1_RND",    SDTFmaRound>;
+
+// Scalar FMA intrinsics with passthru bits in operand 3.
+def X86FmaddRnds3   : SDNode<"X86ISD::FMADDS3_RND",     SDTFmaRound>;
+def X86FnmaddRnds3  : SDNode<"X86ISD::FNMADDS3_RND",    SDTFmaRound>;
+def X86FmsubRnds3   : SDNode<"X86ISD::FMSUBS3_RND",     SDTFmaRound>;
+def X86FnmsubRnds3  : SDNode<"X86ISD::FNMSUBS3_RND",    SDTFmaRound>;
+
 def x86vpmadd52l     : SDNode<"X86ISD::VPMADD52L",     SDTFma>;
 def x86vpmadd52h     : SDNode<"X86ISD::VPMADD52H",     SDTFma>;
 
@@ -487,11 +485,11 @@ def X86rsqrt28   : SDNode<"X86ISD::RSQRT28",  SDTFPUnaryOpRound>;
 def X86rcp28     : SDNode<"X86ISD::RCP28",    SDTFPUnaryOpRound>;
 def X86exp2      : SDNode<"X86ISD::EXP2",     SDTFPUnaryOpRound>;
 
-def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28",   SDTFPBinOpRound>;
-def X86rcp28s    : SDNode<"X86ISD::RCP28",     SDTFPBinOpRound>;
-def X86RndScales : SDNode<"X86ISD::VRNDSCALE", SDTFPBinOpImmRound>;
-def X86Reduces   : SDNode<"X86ISD::VREDUCE",   SDTFPBinOpImmRound>;
-def X86GetMants  : SDNode<"X86ISD::VGETMANT",  SDTFPBinOpImmRound>;
+def X86rsqrt28s  : SDNode<"X86ISD::RSQRT28S",   SDTFPBinOpRound>;
+def X86rcp28s    : SDNode<"X86ISD::RCP28S",     SDTFPBinOpRound>;
+def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImmRound>;
+def X86Reduces   : SDNode<"X86ISD::VREDUCES",   SDTFPBinOpImmRound>;
+def X86GetMants  : SDNode<"X86ISD::VGETMANTS",  SDTFPBinOpImmRound>;
 
 def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
                                          SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>,
@@ -515,59 +513,69 @@ def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>,
 
 def SDTFloatToInt: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
                                         SDTCisInt<0>, SDTCisFP<1>]>;
-
 def SDTFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                            SDTCisInt<0>, SDTCisFP<1>,
                                            SDTCisVT<2, i32>]>;
 def SDTSFloatToIntRnd: SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisFP<1>,
                                             SDTCisVec<1>, SDTCisVT<2, i32>]>;
+
+def SDTVintToFP: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>,
+                                      SDTCisFP<0>, SDTCisInt<1>]>;
 def SDTVintToFPRound: SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>,
                                            SDTCisFP<0>, SDTCisInt<1>,
                                            SDTCisVT<2, i32>]>;
 
 // Scalar
-def X86SintToFpRnd  : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTintToFPRound>;
-def X86UintToFpRnd  : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTintToFPRound>;
+def X86SintToFpRnd  : SDNode<"X86ISD::SCALAR_SINT_TO_FP_RND",  SDTintToFPRound>;
+def X86UintToFpRnd  : SDNode<"X86ISD::SCALAR_UINT_TO_FP_RND",  SDTintToFPRound>;
 
-def X86cvtts2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTSFloatToIntRnd>;
-def X86cvtts2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTSFloatToIntRnd>;
+def X86cvtts2IntRnd  : SDNode<"X86ISD::CVTTS2SI_RND",  SDTSFloatToIntRnd>;
+def X86cvtts2UIntRnd : SDNode<"X86ISD::CVTTS2UI_RND",  SDTSFloatToIntRnd>;
 
-def  X86cvts2si  : SDNode<"X86ISD::SCALAR_FP_TO_SINT_RND", SDTSFloatToIntRnd>;
-def  X86cvts2usi : SDNode<"X86ISD::SCALAR_FP_TO_UINT_RND", SDTSFloatToIntRnd>;
+def  X86cvts2si  : SDNode<"X86ISD::CVTS2SI_RND", SDTSFloatToIntRnd>;
+def  X86cvts2usi : SDNode<"X86ISD::CVTS2UI_RND", SDTSFloatToIntRnd>;
 
 // Vector with rounding mode
 
 // cvtt fp-to-int staff
-def X86VFpToSintRnd   : SDNode<"ISD::FP_TO_SINT",  SDTFloatToIntRnd>;
-def X86VFpToUintRnd   : SDNode<"ISD::FP_TO_UINT",  SDTFloatToIntRnd>;
+def X86cvttp2siRnd    : SDNode<"X86ISD::CVTTP2SI_RND", SDTFloatToIntRnd>;
+def X86cvttp2uiRnd    : SDNode<"X86ISD::CVTTP2UI_RND", SDTFloatToIntRnd>;
 
-def X86VSintToFpRnd   : SDNode<"ISD::SINT_TO_FP",  SDTVintToFPRound>;
-def X86VUintToFpRnd   : SDNode<"ISD::UINT_TO_FP",  SDTVintToFPRound>;
+def X86VSintToFpRnd   : SDNode<"X86ISD::SINT_TO_FP_RND",  SDTVintToFPRound>;
+def X86VUintToFpRnd   : SDNode<"X86ISD::UINT_TO_FP_RND",  SDTVintToFPRound>;
 
 // cvt fp-to-int staff
-def X86cvtp2IntRnd      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTFloatToIntRnd>;
-def X86cvtp2UIntRnd     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToIntRnd>;
+def X86cvtp2IntRnd      : SDNode<"X86ISD::CVTP2SI_RND",  SDTFloatToIntRnd>;
+def X86cvtp2UIntRnd     : SDNode<"X86ISD::CVTP2UI_RND",  SDTFloatToIntRnd>;
 
 // Vector without rounding mode
-def X86cvtp2Int      : SDNode<"X86ISD::FP_TO_SINT_RND",  SDTFloatToInt>;
-def X86cvtp2UInt     : SDNode<"X86ISD::FP_TO_UINT_RND",  SDTFloatToInt>;
 
-def X86cvtph2ps     : SDNode<"ISD::FP16_TO_FP",
+// cvtt fp-to-int staff
+def X86cvttp2si      : SDNode<"X86ISD::CVTTP2SI",  SDTFloatToInt>;
+def X86cvttp2ui      : SDNode<"X86ISD::CVTTP2UI",  SDTFloatToInt>;
+
+def X86VSintToFP      : SDNode<"X86ISD::CVTSI2P",  SDTVintToFP>;
+def X86VUintToFP      : SDNode<"X86ISD::CVTUI2P",  SDTVintToFP>;
+
+// cvt int-to-fp staff
+def X86cvtp2Int      : SDNode<"X86ISD::CVTP2SI",  SDTFloatToInt>;
+def X86cvtp2UInt     : SDNode<"X86ISD::CVTP2UI",  SDTFloatToInt>;
+
+def X86cvtph2ps     : SDNode<"X86ISD::CVTPH2PS",
                               SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
                                                    SDTCVecEltisVT<1, i16>,
                                                    SDTCisVT<2, i32>]> >;
 
-def X86cvtps2ph   : SDNode<"ISD::FP_TO_FP16",
-                        SDTypeProfile<1, 3, [SDTCVecEltisVT<0, i16>,
+def X86cvtps2ph   : SDNode<"X86ISD::CVTPS2PH",
+                        SDTypeProfile<1, 2, [SDTCVecEltisVT<0, i16>,
                                              SDTCVecEltisVT<1, f32>,
-                                             SDTCisVT<2, i32>,
-                                             SDTCisVT<3, i32>]> >;
-def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT",
+                                             SDTCisVT<2, i32>]> >;
+def X86vfpextRnd  : SDNode<"X86ISD::VFPEXT_RND",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f64>,
                                              SDTCVecEltisVT<1, f32>,
                                              SDTCisOpSmallerThanOp<1, 0>,
                                              SDTCisVT<2, i32>]>>;
-def X86vfproundRnd: SDNode<"X86ISD::VFPROUND",
+def X86vfproundRnd: SDNode<"X86ISD::VFPROUND_RND",
                         SDTypeProfile<1, 2, [SDTCVecEltisVT<0, f32>,
                                              SDTCVecEltisVT<1, f64>,
                                              SDTCisOpSmallerThanOp<0, 1>,
@@ -621,9 +629,6 @@ def loadv4i64    : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>;
 // 512-bit load pattern fragments
 def loadv16f32   : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>;
 def loadv8f64    : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>;
-def loadv64i8    : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>;
-def loadv32i16   : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>;
-def loadv16i32   : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>;
 def loadv8i64    : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>;
 
 // 128-/256-/512-bit extload pattern fragments
@@ -631,15 +636,6 @@ def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>;
 def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>;
 def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>;
 
-// These are needed to match a scalar load that is used in a vector-only
-// math instruction such as the FP logical ops: andps, andnps, orps, xorps.
-// The memory operand is required to be a 128-bit load, so it must be converted
-// from a vector to a scalar.
-def loadf32_128 : PatFrag<(ops node:$ptr),
-  (f32 (extractelt (loadv4f32 node:$ptr), (iPTR 0)))>;
-def loadf64_128 : PatFrag<(ops node:$ptr),
-  (f64 (extractelt (loadv2f64 node:$ptr), (iPTR 0)))>;
-
 // Like 'store', but always requires 128-bit vector alignment.
 def alignedstore : PatFrag<(ops node:$val, node:$ptr),
                            (store node:$val, node:$ptr), [{
@@ -673,11 +669,6 @@ def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{
   return cast<LoadSDNode>(N)->getAlignment() >= 64;
 }]>;
 
-def alignedloadfsf32 : PatFrag<(ops node:$ptr),
-                               (f32 (alignedload node:$ptr))>;
-def alignedloadfsf64 : PatFrag<(ops node:$ptr),
-                               (f64 (alignedload node:$ptr))>;
-
 // 128-bit aligned load pattern fragments
 // NOTE: all 128-bit integer vector loads are promoted to v2i64
 def alignedloadv4f32 : PatFrag<(ops node:$ptr),
@@ -699,8 +690,6 @@ def alignedloadv4i64 : PatFrag<(ops node:$ptr),
 // 512-bit aligned load pattern fragments
 def alignedloadv16f32 : PatFrag<(ops node:$ptr),
                                 (v16f32 (alignedload512 node:$ptr))>;
-def alignedloadv16i32 : PatFrag<(ops node:$ptr),
-                                (v16i32 (alignedload512 node:$ptr))>;
 def alignedloadv8f64  : PatFrag<(ops node:$ptr),
                                 (v8f64  (alignedload512 node:$ptr))>;
 def alignedloadv8i64  : PatFrag<(ops node:$ptr),
@@ -717,9 +706,6 @@ def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{
          || cast<LoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
-def memopfsf32 : PatFrag<(ops node:$ptr), (f32   (memop node:$ptr))>;
-def memopfsf64 : PatFrag<(ops node:$ptr), (f64   (memop node:$ptr))>;
-
 // 128-bit memop pattern fragments
 // NOTE: all 128-bit integer vector loads are promoted to v2i64
 def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
@@ -853,6 +839,7 @@ def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>;
 def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>;
 
 // 512-bit bitconvert pattern fragments
+def bc_v64i8 : PatFrag<(ops node:$in), (v64i8 (bitconvert node:$in))>;
 def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>;
 def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>;
 def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>;
@@ -873,6 +860,10 @@ def fp32imm0 : PatLeaf<(f32 fpimm), [{
   return N->isExactlyValue(+0.0);
 }]>;
 
+def fp64imm0 : PatLeaf<(f64 fpimm), [{
+  return N->isExactlyValue(+0.0);
+}]>;
+
 def I8Imm : SDNodeXForm<imm, [{
   // Transformation function: get the low 8 bits.
   return getI8Imm((uint8_t)N->getZExtValue(), SDLoc(N));
@@ -940,30 +931,36 @@ def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec,
   return X86::isVINSERT256Index(N);
 }], INSERT_get_vinsert256_imm>;
 
-def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+def X86mload : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
-    return Load->getAlignment() >= 16;
-  return false;
+  return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
+    cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (X86mload node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedLoadSDNode>(N)->getAlignment() >= 16;
 }]>;
 
 def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
-    return Load->getAlignment() >= 32;
-  return false;
+                         (X86mload node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedLoadSDNode>(N)->getAlignment() >= 32;
 }]>;
 
 def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (masked_load node:$src1, node:$src2, node:$src3), [{
-  if (auto *Load = dyn_cast<MaskedLoadSDNode>(N))
-    return Load->getAlignment() >= 64;
-  return false;
+                         (X86mload node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedLoadSDNode>(N)->getAlignment() >= 64;
 }]>;
 
 def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (masked_load node:$src1, node:$src2, node:$src3), [{
-  return isa<MaskedLoadSDNode>(N);
+  return !cast<MaskedLoadSDNode>(N)->isExpandingLoad() &&
+    cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD;
+}]>;
+
+def X86mExpandingLoad : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                         (masked_load node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedLoadSDNode>(N)->isExpandingLoad();
 }]>;
 
 // Masked store fragments.
@@ -971,33 +968,34 @@ def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
 // do not support vector types (llvm-tblgen will fail).
 def X86mstore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                         (masked_store node:$src1, node:$src2, node:$src3), [{
-  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore();
+  return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
+         (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
 }]>;
 
 def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (X86mstore node:$src1, node:$src2, node:$src3), [{
-  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
-    return Store->getAlignment() >= 16;
-  return false;
+  return cast<MaskedStoreSDNode>(N)->getAlignment() >= 16;
 }]>;
 
 def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (X86mstore node:$src1, node:$src2, node:$src3), [{
-  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
-    return Store->getAlignment() >= 32;
-  return false;
+  return cast<MaskedStoreSDNode>(N)->getAlignment() >= 32;
 }]>;
 
 def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
                          (X86mstore node:$src1, node:$src2, node:$src3), [{
-  if (auto *Store = dyn_cast<MaskedStoreSDNode>(N))
-    return Store->getAlignment() >= 64;
-  return false;
+  return cast<MaskedStoreSDNode>(N)->getAlignment() >= 64;
 }]>;
 
 def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-                         (X86mstore node:$src1, node:$src2, node:$src3), [{
-  return isa<MaskedStoreSDNode>(N);
+                         (masked_store node:$src1, node:$src2, node:$src3), [{
+  return (!cast<MaskedStoreSDNode>(N)->isTruncatingStore()) &&
+         (!cast<MaskedStoreSDNode>(N)->isCompressingStore());
+}]>;
+
+def X86mCompressingStore : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                             (masked_store node:$src1, node:$src2, node:$src3), [{
+    return cast<MaskedStoreSDNode>(N)->isCompressingStore();
 }]>;
 
 // masked truncstore fragments
@@ -1022,3 +1020,80 @@ def masked_truncstorevi32 :
           (X86mtruncstore node:$src1, node:$src2, node:$src3), [{
   return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
 }]>;
+
+def X86TruncSStore : SDNode<"X86ISD::VTRUNCSTORES",  SDTStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86TruncUSStore : SDNode<"X86ISD::VTRUNCSTOREUS",  SDTStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86MTruncSStore : SDNode<"X86ISD::VMTRUNCSTORES",  SDTMaskedStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def X86MTruncUSStore : SDNode<"X86ISD::VMTRUNCSTOREUS",  SDTMaskedStore,
+                       [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+
+def truncstore_s_vi8 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncSStore node:$val, node:$ptr), [{
+  return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstore_us_vi8 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncUSStore node:$val, node:$ptr), [{
+  return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def truncstore_s_vi16 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncSStore node:$val, node:$ptr), [{
+  return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstore_us_vi16 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncUSStore node:$val, node:$ptr), [{
+  return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def truncstore_s_vi32 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncSStore node:$val, node:$ptr), [{
+  return cast<TruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def truncstore_us_vi32 : PatFrag<(ops node:$val, node:$ptr),
+                               (X86TruncUSStore node:$val, node:$ptr), [{
+  return cast<TruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def masked_truncstore_s_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                     (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def masked_truncstore_us_vi8 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
+}]>;
+
+def masked_truncstore_s_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def masked_truncstore_us_vi16 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
+}]>;
+
+def masked_truncstore_s_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def masked_truncstore_us_vi32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
+                               (X86MTruncUSStore node:$src1, node:$src2, node:$src3), [{
+  return cast<MaskedTruncUSStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
+}]>;
+
+def assertzext_i1 :
+  PatFrag<(ops node:$src), (assertzext node:$src), [{
+    return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i1;
+}]>;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
index 5f0aab9ddc68..579359794fbd 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -68,7 +68,7 @@ static cl::opt<unsigned>
 UndefRegClearance("undef-reg-clearance",
                   cl::desc("How many idle instructions we would like before "
                            "certain undef register reads"),
-                  cl::init(64), cl::Hidden);
+                  cl::init(128), cl::Hidden);
 
 enum {
   // Select which memory operand is being unfolded.
@@ -228,12 +228,16 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::SBB64ri32,   X86::SBB64mi32,  0 },
     { X86::SBB64ri8,    X86::SBB64mi8,   0 },
     { X86::SBB64rr,     X86::SBB64mr,    0 },
+    { X86::SHL16r1,     X86::SHL16m1,    0 },
     { X86::SHL16rCL,    X86::SHL16mCL,   0 },
     { X86::SHL16ri,     X86::SHL16mi,    0 },
+    { X86::SHL32r1,     X86::SHL32m1,    0 },
     { X86::SHL32rCL,    X86::SHL32mCL,   0 },
     { X86::SHL32ri,     X86::SHL32mi,    0 },
+    { X86::SHL64r1,     X86::SHL64m1,    0 },
     { X86::SHL64rCL,    X86::SHL64mCL,   0 },
     { X86::SHL64ri,     X86::SHL64mi,    0 },
+    { X86::SHL8r1,      X86::SHL8m1,     0 },
     { X86::SHL8rCL,     X86::SHL8mCL,    0 },
     { X86::SHL8ri,      X86::SHL8mi,     0 },
     { X86::SHLD16rrCL,  X86::SHLD16mrCL, 0 },
@@ -335,6 +339,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MOVAPDrr,    X86::MOVAPDmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::MOVAPSrr,    X86::MOVAPSmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::MOVDQArr,    X86::MOVDQAmr,      TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::MOVDQUrr,    X86::MOVDQUmr,      TB_FOLDED_STORE },
     { X86::MOVPDI2DIrr, X86::MOVPDI2DImr,   TB_FOLDED_STORE },
     { X86::MOVPQIto64rr,X86::MOVPQI2QImr,   TB_FOLDED_STORE },
     { X86::MOVSDto64rr, X86::MOVSDto64mr,   TB_FOLDED_STORE },
@@ -380,6 +385,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVAPDrr,   X86::VMOVAPDmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVAPSrr,   X86::VMOVAPSmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
     { X86::VMOVDQArr,   X86::VMOVDQAmr,     TB_FOLDED_STORE | TB_ALIGN_16 },
+    { X86::VMOVDQUrr,   X86::VMOVDQUmr,     TB_FOLDED_STORE },
     { X86::VMOVPDI2DIrr,X86::VMOVPDI2DImr,  TB_FOLDED_STORE },
     { X86::VMOVPQIto64rr, X86::VMOVPQI2QImr,TB_FOLDED_STORE },
     { X86::VMOVSDto64rr,X86::VMOVSDto64mr,  TB_FOLDED_STORE },
@@ -394,10 +400,20 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVAPDYrr,  X86::VMOVAPDYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVAPSYrr,  X86::VMOVAPSYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVDQAYrr,  X86::VMOVDQAYmr,    TB_FOLDED_STORE | TB_ALIGN_32 },
+    { X86::VMOVDQUYrr,  X86::VMOVDQUYmr,    TB_FOLDED_STORE },
     { X86::VMOVUPDYrr,  X86::VMOVUPDYmr,    TB_FOLDED_STORE },
     { X86::VMOVUPSYrr,  X86::VMOVUPSYmr,    TB_FOLDED_STORE },
 
     // AVX-512 foldable instructions
+    { X86::VEXTRACTF32x4Zrr,X86::VEXTRACTF32x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF32x8Zrr,X86::VEXTRACTF32x8Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF64x2Zrr,X86::VEXTRACTF64x2Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF64x4Zrr,X86::VEXTRACTF64x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI32x4Zrr,X86::VEXTRACTI32x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI32x8Zrr,X86::VEXTRACTI32x8Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI64x2Zrr,X86::VEXTRACTI64x2Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI64x4Zrr,X86::VEXTRACTI64x4Zmr, TB_FOLDED_STORE },
+    { X86::VEXTRACTPSZrr,   X86::VEXTRACTPSZmr,    TB_FOLDED_STORE },
     { X86::VMOVPDI2DIZrr,   X86::VMOVPDI2DIZmr, TB_FOLDED_STORE },
     { X86::VMOVAPDZrr,      X86::VMOVAPDZmr,    TB_FOLDED_STORE | TB_ALIGN_64 },
     { X86::VMOVAPSZrr,      X86::VMOVAPSZmr,    TB_FOLDED_STORE | TB_ALIGN_64 },
@@ -409,8 +425,27 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU16Zrr,    X86::VMOVDQU16Zmr,  TB_FOLDED_STORE },
     { X86::VMOVDQU32Zrr,    X86::VMOVDQU32Zmr,  TB_FOLDED_STORE },
     { X86::VMOVDQU64Zrr,    X86::VMOVDQU64Zmr,  TB_FOLDED_STORE },
+    { X86::VPMOVDBZrr,      X86::VPMOVDBZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVDWZrr,      X86::VPMOVDWZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVQDZrr,      X86::VPMOVQDZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVQWZrr,      X86::VPMOVQWZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVWBZrr,      X86::VPMOVWBZmr,    TB_FOLDED_STORE },
+    { X86::VPMOVSDBZrr,     X86::VPMOVSDBZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSDWZrr,     X86::VPMOVSDWZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSQDZrr,     X86::VPMOVSQDZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSQWZrr,     X86::VPMOVSQWZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVSWBZrr,     X86::VPMOVSWBZmr,   TB_FOLDED_STORE },
+    { X86::VPMOVUSDBZrr,    X86::VPMOVUSDBZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSDWZrr,    X86::VPMOVUSDWZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSQDZrr,    X86::VPMOVUSQDZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSQWZrr,    X86::VPMOVUSQWZmr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSWBZrr,    X86::VPMOVUSWBZmr,  TB_FOLDED_STORE },
 
     // AVX-512 foldable instructions (256-bit versions)
+    { X86::VEXTRACTF32x4Z256rr,X86::VEXTRACTF32x4Z256mr, TB_FOLDED_STORE },
+    { X86::VEXTRACTF64x2Z256rr,X86::VEXTRACTF64x2Z256mr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI32x4Z256rr,X86::VEXTRACTI32x4Z256mr, TB_FOLDED_STORE },
+    { X86::VEXTRACTI64x2Z256rr,X86::VEXTRACTI64x2Z256mr, TB_FOLDED_STORE },
     { X86::VMOVAPDZ256rr,      X86::VMOVAPDZ256mr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVAPSZ256rr,      X86::VMOVAPSZ256mr,    TB_FOLDED_STORE | TB_ALIGN_32 },
     { X86::VMOVDQA32Z256rr,    X86::VMOVDQA32Z256mr,  TB_FOLDED_STORE | TB_ALIGN_32 },
@@ -421,6 +456,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU16Z256rr,    X86::VMOVDQU16Z256mr,  TB_FOLDED_STORE },
     { X86::VMOVDQU32Z256rr,    X86::VMOVDQU32Z256mr,  TB_FOLDED_STORE },
     { X86::VMOVDQU64Z256rr,    X86::VMOVDQU64Z256mr,  TB_FOLDED_STORE },
+    { X86::VPMOVDWZ256rr,      X86::VPMOVDWZ256mr,    TB_FOLDED_STORE },
+    { X86::VPMOVQDZ256rr,      X86::VPMOVQDZ256mr,    TB_FOLDED_STORE },
+    { X86::VPMOVWBZ256rr,      X86::VPMOVWBZ256mr,    TB_FOLDED_STORE },
+    { X86::VPMOVSDWZ256rr,     X86::VPMOVSDWZ256mr,   TB_FOLDED_STORE },
+    { X86::VPMOVSQDZ256rr,     X86::VPMOVSQDZ256mr,   TB_FOLDED_STORE },
+    { X86::VPMOVSWBZ256rr,     X86::VPMOVSWBZ256mr,   TB_FOLDED_STORE },
+    { X86::VPMOVUSDWZ256rr,    X86::VPMOVUSDWZ256mr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSQDZ256rr,    X86::VPMOVUSQDZ256mr,  TB_FOLDED_STORE },
+    { X86::VPMOVUSWBZ256rr,    X86::VPMOVUSWBZ256mr,  TB_FOLDED_STORE },
 
     // AVX-512 foldable instructions (128-bit versions)
     { X86::VMOVAPDZ128rr,      X86::VMOVAPDZ128mr,    TB_FOLDED_STORE | TB_ALIGN_16 },
@@ -471,26 +515,26 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::IMUL32rri8,      X86::IMUL32rmi8,          0 },
     { X86::IMUL64rri32,     X86::IMUL64rmi32,         0 },
     { X86::IMUL64rri8,      X86::IMUL64rmi8,          0 },
-    { X86::Int_COMISDrr,    X86::Int_COMISDrm,        0 },
-    { X86::Int_COMISSrr,    X86::Int_COMISSrm,        0 },
-    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm,        0 },
-    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          0 },
-    { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        0 },
-    { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          0 },
-    { X86::CVTDQ2PDrr,      X86::CVTDQ2PDrm,          TB_ALIGN_16 },
+    { X86::Int_COMISDrr,    X86::Int_COMISDrm,        TB_NO_REVERSE },
+    { X86::Int_COMISSrr,    X86::Int_COMISSrm,        TB_NO_REVERSE },
+    { X86::CVTSD2SI64rr,    X86::CVTSD2SI64rm,        TB_NO_REVERSE },
+    { X86::CVTSD2SIrr,      X86::CVTSD2SIrm,          TB_NO_REVERSE },
+    { X86::CVTSS2SI64rr,    X86::CVTSS2SI64rm,        TB_NO_REVERSE },
+    { X86::CVTSS2SIrr,      X86::CVTSS2SIrm,          TB_NO_REVERSE },
+    { X86::CVTDQ2PDrr,      X86::CVTDQ2PDrm,          TB_NO_REVERSE },
     { X86::CVTDQ2PSrr,      X86::CVTDQ2PSrm,          TB_ALIGN_16 },
     { X86::CVTPD2DQrr,      X86::CVTPD2DQrm,          TB_ALIGN_16 },
     { X86::CVTPD2PSrr,      X86::CVTPD2PSrm,          TB_ALIGN_16 },
     { X86::CVTPS2DQrr,      X86::CVTPS2DQrm,          TB_ALIGN_16 },
-    { X86::CVTPS2PDrr,      X86::CVTPS2PDrm,          TB_ALIGN_16 },
+    { X86::CVTPS2PDrr,      X86::CVTPS2PDrm,          TB_NO_REVERSE },
     { X86::CVTTPD2DQrr,     X86::CVTTPD2DQrm,         TB_ALIGN_16 },
     { X86::CVTTPS2DQrr,     X86::CVTTPS2DQrm,         TB_ALIGN_16 },
-    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm,  0 },
-    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm,     0 },
-    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm,  0 },
-    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm,     0 },
-    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm,       0 },
-    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm,       0 },
+    { X86::Int_CVTTSD2SI64rr,X86::Int_CVTTSD2SI64rm,  TB_NO_REVERSE },
+    { X86::Int_CVTTSD2SIrr, X86::Int_CVTTSD2SIrm,     TB_NO_REVERSE },
+    { X86::Int_CVTTSS2SI64rr,X86::Int_CVTTSS2SI64rm,  TB_NO_REVERSE },
+    { X86::Int_CVTTSS2SIrr, X86::Int_CVTTSS2SIrm,     TB_NO_REVERSE },
+    { X86::Int_UCOMISDrr,   X86::Int_UCOMISDrm,       TB_NO_REVERSE },
+    { X86::Int_UCOMISSrr,   X86::Int_UCOMISSrm,       TB_NO_REVERSE },
     { X86::MOV16rr,         X86::MOV16rm,             0 },
     { X86::MOV32rr,         X86::MOV32rm,             0 },
     { X86::MOV64rr,         X86::MOV64rm,             0 },
@@ -503,6 +547,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MOVDI2PDIrr,     X86::MOVDI2PDIrm,         0 },
     { X86::MOVDI2SSrr,      X86::MOVDI2SSrm,          0 },
     { X86::MOVDQArr,        X86::MOVDQArm,            TB_ALIGN_16 },
+    { X86::MOVDQUrr,        X86::MOVDQUrm,            0 },
     { X86::MOVSHDUPrr,      X86::MOVSHDUPrm,          TB_ALIGN_16 },
     { X86::MOVSLDUPrr,      X86::MOVSLDUPrm,          TB_ALIGN_16 },
     { X86::MOVSX16rr8,      X86::MOVSX16rm8,          0 },
@@ -511,51 +556,53 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::MOVSX64rr16,     X86::MOVSX64rm16,         0 },
     { X86::MOVSX64rr32,     X86::MOVSX64rm32,         0 },
     { X86::MOVSX64rr8,      X86::MOVSX64rm8,          0 },
-    { X86::MOVUPDrr,        X86::MOVUPDrm,            TB_ALIGN_16 },
+    { X86::MOVUPDrr,        X86::MOVUPDrm,            0 },
     { X86::MOVUPSrr,        X86::MOVUPSrm,            0 },
-    { X86::MOVZPQILo2PQIrr, X86::MOVZPQILo2PQIrm,     TB_ALIGN_16 },
+    { X86::MOVZPQILo2PQIrr, X86::MOVQI2PQIrm,         TB_NO_REVERSE },
     { X86::MOVZX16rr8,      X86::MOVZX16rm8,          0 },
     { X86::MOVZX32rr16,     X86::MOVZX32rm16,         0 },
     { X86::MOVZX32_NOREXrr8, X86::MOVZX32_NOREXrm8,   0 },
     { X86::MOVZX32rr8,      X86::MOVZX32rm8,          0 },
-    { X86::PABSBrr128,      X86::PABSBrm128,          TB_ALIGN_16 },
-    { X86::PABSDrr128,      X86::PABSDrm128,          TB_ALIGN_16 },
-    { X86::PABSWrr128,      X86::PABSWrm128,          TB_ALIGN_16 },
+    { X86::PABSBrr,         X86::PABSBrm,             TB_ALIGN_16 },
+    { X86::PABSDrr,         X86::PABSDrm,             TB_ALIGN_16 },
+    { X86::PABSWrr,         X86::PABSWrm,             TB_ALIGN_16 },
     { X86::PCMPESTRIrr,     X86::PCMPESTRIrm,         TB_ALIGN_16 },
     { X86::PCMPESTRM128rr,  X86::PCMPESTRM128rm,      TB_ALIGN_16 },
     { X86::PCMPISTRIrr,     X86::PCMPISTRIrm,         TB_ALIGN_16 },
     { X86::PCMPISTRM128rr,  X86::PCMPISTRM128rm,      TB_ALIGN_16 },
     { X86::PHMINPOSUWrr128, X86::PHMINPOSUWrm128,     TB_ALIGN_16 },
-    { X86::PMOVSXBDrr,      X86::PMOVSXBDrm,          TB_ALIGN_16 },
-    { X86::PMOVSXBQrr,      X86::PMOVSXBQrm,          TB_ALIGN_16 },
-    { X86::PMOVSXBWrr,      X86::PMOVSXBWrm,          TB_ALIGN_16 },
-    { X86::PMOVSXDQrr,      X86::PMOVSXDQrm,          TB_ALIGN_16 },
-    { X86::PMOVSXWDrr,      X86::PMOVSXWDrm,          TB_ALIGN_16 },
-    { X86::PMOVSXWQrr,      X86::PMOVSXWQrm,          TB_ALIGN_16 },
-    { X86::PMOVZXBDrr,      X86::PMOVZXBDrm,          TB_ALIGN_16 },
-    { X86::PMOVZXBQrr,      X86::PMOVZXBQrm,          TB_ALIGN_16 },
-    { X86::PMOVZXBWrr,      X86::PMOVZXBWrm,          TB_ALIGN_16 },
-    { X86::PMOVZXDQrr,      X86::PMOVZXDQrm,          TB_ALIGN_16 },
-    { X86::PMOVZXWDrr,      X86::PMOVZXWDrm,          TB_ALIGN_16 },
-    { X86::PMOVZXWQrr,      X86::PMOVZXWQrm,          TB_ALIGN_16 },
+    { X86::PMOVSXBDrr,      X86::PMOVSXBDrm,          TB_NO_REVERSE },
+    { X86::PMOVSXBQrr,      X86::PMOVSXBQrm,          TB_NO_REVERSE },
+    { X86::PMOVSXBWrr,      X86::PMOVSXBWrm,          TB_NO_REVERSE },
+    { X86::PMOVSXDQrr,      X86::PMOVSXDQrm,          TB_NO_REVERSE },
+    { X86::PMOVSXWDrr,      X86::PMOVSXWDrm,          TB_NO_REVERSE },
+    { X86::PMOVSXWQrr,      X86::PMOVSXWQrm,          TB_NO_REVERSE },
+    { X86::PMOVZXBDrr,      X86::PMOVZXBDrm,          TB_NO_REVERSE },
+    { X86::PMOVZXBQrr,      X86::PMOVZXBQrm,          TB_NO_REVERSE },
+    { X86::PMOVZXBWrr,      X86::PMOVZXBWrm,          TB_NO_REVERSE },
+    { X86::PMOVZXDQrr,      X86::PMOVZXDQrm,          TB_NO_REVERSE },
+    { X86::PMOVZXWDrr,      X86::PMOVZXWDrm,          TB_NO_REVERSE },
+    { X86::PMOVZXWQrr,      X86::PMOVZXWQrm,          TB_NO_REVERSE },
     { X86::PSHUFDri,        X86::PSHUFDmi,            TB_ALIGN_16 },
     { X86::PSHUFHWri,       X86::PSHUFHWmi,           TB_ALIGN_16 },
     { X86::PSHUFLWri,       X86::PSHUFLWmi,           TB_ALIGN_16 },
     { X86::PTESTrr,         X86::PTESTrm,             TB_ALIGN_16 },
     { X86::RCPPSr,          X86::RCPPSm,              TB_ALIGN_16 },
     { X86::RCPSSr,          X86::RCPSSm,              0 },
-    { X86::RCPSSr_Int,      X86::RCPSSm_Int,          0 },
+    { X86::RCPSSr_Int,      X86::RCPSSm_Int,          TB_NO_REVERSE },
     { X86::ROUNDPDr,        X86::ROUNDPDm,            TB_ALIGN_16 },
     { X86::ROUNDPSr,        X86::ROUNDPSm,            TB_ALIGN_16 },
+    { X86::ROUNDSDr,        X86::ROUNDSDm,            0 },
+    { X86::ROUNDSSr,        X86::ROUNDSSm,            0 },
     { X86::RSQRTPSr,        X86::RSQRTPSm,            TB_ALIGN_16 },
     { X86::RSQRTSSr,        X86::RSQRTSSm,            0 },
-    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        0 },
+    { X86::RSQRTSSr_Int,    X86::RSQRTSSm_Int,        TB_NO_REVERSE },
     { X86::SQRTPDr,         X86::SQRTPDm,             TB_ALIGN_16 },
     { X86::SQRTPSr,         X86::SQRTPSm,             TB_ALIGN_16 },
     { X86::SQRTSDr,         X86::SQRTSDm,             0 },
-    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         0 },
+    { X86::SQRTSDr_Int,     X86::SQRTSDm_Int,         TB_NO_REVERSE },
     { X86::SQRTSSr,         X86::SQRTSSm,             0 },
-    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int,         0 },
+    { X86::SQRTSSr_Int,     X86::SQRTSSm_Int,         TB_NO_REVERSE },
     { X86::TEST16rr,        X86::TEST16rm,            0 },
     { X86::TEST32rr,        X86::TEST32rm,            0 },
     { X86::TEST64rr,        X86::TEST64rm,            0 },
@@ -586,29 +633,29 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PSWAPDrr,        X86::PSWAPDrm,            0 },
 
     // AVX 128-bit versions of foldable instructions
-    { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       0 },
-    { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       0 },
-    { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      0 },
-    { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      0 },
+    { X86::Int_VCOMISDrr,   X86::Int_VCOMISDrm,       TB_NO_REVERSE },
+    { X86::Int_VCOMISSrr,   X86::Int_VCOMISSrm,       TB_NO_REVERSE },
+    { X86::Int_VUCOMISDrr,  X86::Int_VUCOMISDrm,      TB_NO_REVERSE },
+    { X86::Int_VUCOMISSrr,  X86::Int_VUCOMISSrm,      TB_NO_REVERSE },
     { X86::VCVTTSD2SI64rr,  X86::VCVTTSD2SI64rm,      0 },
-    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,0 },
+    { X86::Int_VCVTTSD2SI64rr,X86::Int_VCVTTSD2SI64rm,TB_NO_REVERSE },
     { X86::VCVTTSD2SIrr,    X86::VCVTTSD2SIrm,        0 },
-    { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm,    0 },
+    { X86::Int_VCVTTSD2SIrr,X86::Int_VCVTTSD2SIrm,    TB_NO_REVERSE },
     { X86::VCVTTSS2SI64rr,  X86::VCVTTSS2SI64rm,      0 },
-    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,0 },
+    { X86::Int_VCVTTSS2SI64rr,X86::Int_VCVTTSS2SI64rm,TB_NO_REVERSE },
     { X86::VCVTTSS2SIrr,    X86::VCVTTSS2SIrm,        0 },
-    { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm,    0 },
-    { X86::VCVTSD2SI64rr,   X86::VCVTSD2SI64rm,       0 },
-    { X86::VCVTSD2SIrr,     X86::VCVTSD2SIrm,         0 },
-    { X86::VCVTSS2SI64rr,   X86::VCVTSS2SI64rm,       0 },
-    { X86::VCVTSS2SIrr,     X86::VCVTSS2SIrm,         0 },
-    { X86::VCVTDQ2PDrr,     X86::VCVTDQ2PDrm,         0 },
+    { X86::Int_VCVTTSS2SIrr,X86::Int_VCVTTSS2SIrm,    TB_NO_REVERSE },
+    { X86::VCVTSD2SI64rr,   X86::VCVTSD2SI64rm,       TB_NO_REVERSE },
+    { X86::VCVTSD2SIrr,     X86::VCVTSD2SIrm,         TB_NO_REVERSE },
+    { X86::VCVTSS2SI64rr,   X86::VCVTSS2SI64rm,       TB_NO_REVERSE },
+    { X86::VCVTSS2SIrr,     X86::VCVTSS2SIrm,         TB_NO_REVERSE },
+    { X86::VCVTDQ2PDrr,     X86::VCVTDQ2PDrm,         TB_NO_REVERSE },
     { X86::VCVTDQ2PSrr,     X86::VCVTDQ2PSrm,         0 },
-    { X86::VCVTPD2DQrr,     X86::VCVTPD2DQXrm,        0 },
-    { X86::VCVTPD2PSrr,     X86::VCVTPD2PSXrm,        0 },
+    { X86::VCVTPD2DQrr,     X86::VCVTPD2DQrm,         0 },
+    { X86::VCVTPD2PSrr,     X86::VCVTPD2PSrm,         0 },
     { X86::VCVTPS2DQrr,     X86::VCVTPS2DQrm,         0 },
-    { X86::VCVTPS2PDrr,     X86::VCVTPS2PDrm,         0 },
-    { X86::VCVTTPD2DQrr,    X86::VCVTTPD2DQXrm,       0 },
+    { X86::VCVTPS2PDrr,     X86::VCVTPS2PDrm,         TB_NO_REVERSE },
+    { X86::VCVTTPD2DQrr,    X86::VCVTTPD2DQrm,        0 },
     { X86::VCVTTPS2DQrr,    X86::VCVTTPS2DQrm,        0 },
     { X86::VMOV64toPQIrr,   X86::VMOVQI2PQIrm,        0 },
     { X86::VMOV64toSDrr,    X86::VMOV64toSDrm,        0 },
@@ -618,14 +665,15 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDI2PDIrr,    X86::VMOVDI2PDIrm,        0 },
     { X86::VMOVDI2SSrr,     X86::VMOVDI2SSrm,         0 },
     { X86::VMOVDQArr,       X86::VMOVDQArm,           TB_ALIGN_16 },
+    { X86::VMOVDQUrr,       X86::VMOVDQUrm,           0 },
     { X86::VMOVSLDUPrr,     X86::VMOVSLDUPrm,         0 },
     { X86::VMOVSHDUPrr,     X86::VMOVSHDUPrm,         0 },
     { X86::VMOVUPDrr,       X86::VMOVUPDrm,           0 },
     { X86::VMOVUPSrr,       X86::VMOVUPSrm,           0 },
-    { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm,    TB_ALIGN_16 },
-    { X86::VPABSBrr128,     X86::VPABSBrm128,         0 },
-    { X86::VPABSDrr128,     X86::VPABSDrm128,         0 },
-    { X86::VPABSWrr128,     X86::VPABSWrm128,         0 },
+    { X86::VMOVZPQILo2PQIrr,X86::VMOVQI2PQIrm,        TB_NO_REVERSE },
+    { X86::VPABSBrr,        X86::VPABSBrm,            0 },
+    { X86::VPABSDrr,        X86::VPABSDrm,            0 },
+    { X86::VPABSWrr,        X86::VPABSWrm,            0 },
     { X86::VPCMPESTRIrr,    X86::VPCMPESTRIrm,        0 },
     { X86::VPCMPESTRM128rr, X86::VPCMPESTRM128rm,     0 },
     { X86::VPCMPISTRIrr,    X86::VPCMPISTRIrm,        0 },
@@ -633,18 +681,18 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPHMINPOSUWrr128, X86::VPHMINPOSUWrm128,   0 },
     { X86::VPERMILPDri,     X86::VPERMILPDmi,         0 },
     { X86::VPERMILPSri,     X86::VPERMILPSmi,         0 },
-    { X86::VPMOVSXBDrr,     X86::VPMOVSXBDrm,         0 },
-    { X86::VPMOVSXBQrr,     X86::VPMOVSXBQrm,         0 },
-    { X86::VPMOVSXBWrr,     X86::VPMOVSXBWrm,         0 },
-    { X86::VPMOVSXDQrr,     X86::VPMOVSXDQrm,         0 },
-    { X86::VPMOVSXWDrr,     X86::VPMOVSXWDrm,         0 },
-    { X86::VPMOVSXWQrr,     X86::VPMOVSXWQrm,         0 },
-    { X86::VPMOVZXBDrr,     X86::VPMOVZXBDrm,         0 },
-    { X86::VPMOVZXBQrr,     X86::VPMOVZXBQrm,         0 },
-    { X86::VPMOVZXBWrr,     X86::VPMOVZXBWrm,         0 },
-    { X86::VPMOVZXDQrr,     X86::VPMOVZXDQrm,         0 },
-    { X86::VPMOVZXWDrr,     X86::VPMOVZXWDrm,         0 },
-    { X86::VPMOVZXWQrr,     X86::VPMOVZXWQrm,         0 },
+    { X86::VPMOVSXBDrr,     X86::VPMOVSXBDrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXBQrr,     X86::VPMOVSXBQrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXBWrr,     X86::VPMOVSXBWrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXDQrr,     X86::VPMOVSXDQrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXWDrr,     X86::VPMOVSXWDrm,         TB_NO_REVERSE },
+    { X86::VPMOVSXWQrr,     X86::VPMOVSXWQrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXBDrr,     X86::VPMOVZXBDrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXBQrr,     X86::VPMOVZXBQrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXBWrr,     X86::VPMOVZXBWrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXDQrr,     X86::VPMOVZXDQrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXWDrr,     X86::VPMOVZXWDrm,         TB_NO_REVERSE },
+    { X86::VPMOVZXWQrr,     X86::VPMOVZXWQrm,         TB_NO_REVERSE },
     { X86::VPSHUFDri,       X86::VPSHUFDmi,           0 },
     { X86::VPSHUFHWri,      X86::VPSHUFHWmi,          0 },
     { X86::VPSHUFLWri,      X86::VPSHUFLWmi,          0 },
@@ -661,18 +709,19 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VUCOMISSrr,      X86::VUCOMISSrm,          0 },
 
     // AVX 256-bit foldable instructions
-    { X86::VCVTDQ2PDYrr,    X86::VCVTDQ2PDYrm,        0 },
+    { X86::VCVTDQ2PDYrr,    X86::VCVTDQ2PDYrm,        TB_NO_REVERSE },
     { X86::VCVTDQ2PSYrr,    X86::VCVTDQ2PSYrm,        0 },
     { X86::VCVTPD2DQYrr,    X86::VCVTPD2DQYrm,        0 },
     { X86::VCVTPD2PSYrr,    X86::VCVTPD2PSYrm,        0 },
     { X86::VCVTPS2DQYrr,    X86::VCVTPS2DQYrm,        0 },
-    { X86::VCVTPS2PDYrr,    X86::VCVTPS2PDYrm,        0 },
+    { X86::VCVTPS2PDYrr,    X86::VCVTPS2PDYrm,        TB_NO_REVERSE },
     { X86::VCVTTPD2DQYrr,   X86::VCVTTPD2DQYrm,       0 },
     { X86::VCVTTPS2DQYrr,   X86::VCVTTPS2DQYrm,       0 },
     { X86::VMOVAPDYrr,      X86::VMOVAPDYrm,          TB_ALIGN_32 },
     { X86::VMOVAPSYrr,      X86::VMOVAPSYrm,          TB_ALIGN_32 },
     { X86::VMOVDDUPYrr,     X86::VMOVDDUPYrm,         0 },
     { X86::VMOVDQAYrr,      X86::VMOVDQAYrm,          TB_ALIGN_32 },
+    { X86::VMOVDQUYrr,      X86::VMOVDQUYrm,          0 },
     { X86::VMOVSLDUPYrr,    X86::VMOVSLDUPYrm,        0 },
     { X86::VMOVSHDUPYrr,    X86::VMOVSHDUPYrm,        0 },
     { X86::VMOVUPDYrr,      X86::VMOVUPDYrm,          0 },
@@ -699,31 +748,31 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VBROADCASTSSrr,  X86::VBROADCASTSSrm,      TB_NO_REVERSE },
     { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrm,     TB_NO_REVERSE },
     { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrm,     TB_NO_REVERSE },
-    { X86::VPABSBrr256,     X86::VPABSBrm256,         0 },
-    { X86::VPABSDrr256,     X86::VPABSDrm256,         0 },
-    { X86::VPABSWrr256,     X86::VPABSWrm256,         0 },
-    { X86::VPBROADCASTBrr,  X86::VPBROADCASTBrm,      0 },
-    { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm,     0 },
-    { X86::VPBROADCASTDrr,  X86::VPBROADCASTDrm,      0 },
-    { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm,     0 },
-    { X86::VPBROADCASTQrr,  X86::VPBROADCASTQrm,      0 },
-    { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm,     0 },
-    { X86::VPBROADCASTWrr,  X86::VPBROADCASTWrm,      0 },
-    { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm,     0 },
+    { X86::VPABSBYrr,       X86::VPABSBYrm,           0 },
+    { X86::VPABSDYrr,       X86::VPABSDYrm,           0 },
+    { X86::VPABSWYrr,       X86::VPABSWYrm,           0 },
+    { X86::VPBROADCASTBrr,  X86::VPBROADCASTBrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTBYrr, X86::VPBROADCASTBYrm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTDrr,  X86::VPBROADCASTDrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTDYrr, X86::VPBROADCASTDYrm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTQrr,  X86::VPBROADCASTQrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTQYrr, X86::VPBROADCASTQYrm,     TB_NO_REVERSE },
+    { X86::VPBROADCASTWrr,  X86::VPBROADCASTWrm,      TB_NO_REVERSE },
+    { X86::VPBROADCASTWYrr, X86::VPBROADCASTWYrm,     TB_NO_REVERSE },
     { X86::VPERMPDYri,      X86::VPERMPDYmi,          0 },
     { X86::VPERMQYri,       X86::VPERMQYmi,           0 },
-    { X86::VPMOVSXBDYrr,    X86::VPMOVSXBDYrm,        0 },
-    { X86::VPMOVSXBQYrr,    X86::VPMOVSXBQYrm,        0 },
+    { X86::VPMOVSXBDYrr,    X86::VPMOVSXBDYrm,        TB_NO_REVERSE },
+    { X86::VPMOVSXBQYrr,    X86::VPMOVSXBQYrm,        TB_NO_REVERSE },
     { X86::VPMOVSXBWYrr,    X86::VPMOVSXBWYrm,        0 },
     { X86::VPMOVSXDQYrr,    X86::VPMOVSXDQYrm,        0 },
     { X86::VPMOVSXWDYrr,    X86::VPMOVSXWDYrm,        0 },
-    { X86::VPMOVSXWQYrr,    X86::VPMOVSXWQYrm,        0 },
-    { X86::VPMOVZXBDYrr,    X86::VPMOVZXBDYrm,        0 },
-    { X86::VPMOVZXBQYrr,    X86::VPMOVZXBQYrm,        0 },
+    { X86::VPMOVSXWQYrr,    X86::VPMOVSXWQYrm,        TB_NO_REVERSE },
+    { X86::VPMOVZXBDYrr,    X86::VPMOVZXBDYrm,        TB_NO_REVERSE },
+    { X86::VPMOVZXBQYrr,    X86::VPMOVZXBQYrm,        TB_NO_REVERSE },
     { X86::VPMOVZXBWYrr,    X86::VPMOVZXBWYrm,        0 },
     { X86::VPMOVZXDQYrr,    X86::VPMOVZXDQYrm,        0 },
     { X86::VPMOVZXWDYrr,    X86::VPMOVZXWDYrm,        0 },
-    { X86::VPMOVZXWQYrr,    X86::VPMOVZXWQYrm,        0 },
+    { X86::VPMOVZXWQYrr,    X86::VPMOVZXWQYrm,        TB_NO_REVERSE },
     { X86::VPSHUFDYri,      X86::VPSHUFDYmi,          0 },
     { X86::VPSHUFHWYri,     X86::VPSHUFHWYmi,         0 },
     { X86::VPSHUFLWYri,     X86::VPSHUFLWYmi,         0 },
@@ -817,7 +866,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::TZMSK64rr,       X86::TZMSK64rm,           0 },
 
     // AVX-512 foldable instructions
+    { X86::VBROADCASTSSZr,   X86::VBROADCASTSSZm,     TB_NO_REVERSE },
+    { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm,     TB_NO_REVERSE },
+    { X86::VBROADCASTSDZr,   X86::VBROADCASTSDZm,     TB_NO_REVERSE },
+    { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm,     TB_NO_REVERSE },
     { X86::VMOV64toPQIZrr,   X86::VMOVQI2PQIZrm,      0 },
+    { X86::VMOVZPQILo2PQIZrr,X86::VMOVQI2PQIZrm,      TB_NO_REVERSE },
     { X86::VMOVDI2SSZrr,     X86::VMOVDI2SSZrm,       0 },
     { X86::VMOVAPDZrr,       X86::VMOVAPDZrm,         TB_ALIGN_64 },
     { X86::VMOVAPSZrr,       X86::VMOVAPSZrm,         TB_ALIGN_64 },
@@ -831,12 +885,31 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVUPSZrr,       X86::VMOVUPSZrm,         0 },
     { X86::VPABSDZrr,        X86::VPABSDZrm,          0 },
     { X86::VPABSQZrr,        X86::VPABSQZrm,          0 },
-    { X86::VBROADCASTSSZr,   X86::VBROADCASTSSZm,     TB_NO_REVERSE },
-    { X86::VBROADCASTSSZr_s, X86::VBROADCASTSSZm,     TB_NO_REVERSE },
-    { X86::VBROADCASTSDZr,   X86::VBROADCASTSDZm,     TB_NO_REVERSE },
-    { X86::VBROADCASTSDZr_s, X86::VBROADCASTSDZm,     TB_NO_REVERSE },
+    { X86::VPERMILPDZri,     X86::VPERMILPDZmi,       0 },
+    { X86::VPERMILPSZri,     X86::VPERMILPSZmi,       0 },
+    { X86::VPERMPDZri,       X86::VPERMPDZmi,         0 },
+    { X86::VPERMQZri,        X86::VPERMQZmi,          0 },
+    { X86::VPMOVSXBDZrr,     X86::VPMOVSXBDZrm,       0 },
+    { X86::VPMOVSXBQZrr,     X86::VPMOVSXBQZrm,       TB_NO_REVERSE },
+    { X86::VPMOVSXBWZrr,     X86::VPMOVSXBWZrm,       0 },
+    { X86::VPMOVSXDQZrr,     X86::VPMOVSXDQZrm,       0 },
+    { X86::VPMOVSXWDZrr,     X86::VPMOVSXWDZrm,       0 },
+    { X86::VPMOVSXWQZrr,     X86::VPMOVSXWQZrm,       0 },
+    { X86::VPMOVZXBDZrr,     X86::VPMOVZXBDZrm,       0 },
+    { X86::VPMOVZXBQZrr,     X86::VPMOVZXBQZrm,       TB_NO_REVERSE },
+    { X86::VPMOVZXBWZrr,     X86::VPMOVZXBWZrm,       0 },
+    { X86::VPMOVZXDQZrr,     X86::VPMOVZXDQZrm,       0 },
+    { X86::VPMOVZXWDZrr,     X86::VPMOVZXWDZrm,       0 },
+    { X86::VPMOVZXWQZrr,     X86::VPMOVZXWQZrm,       0 },
+    { X86::VPSHUFDZri,       X86::VPSHUFDZmi,         0 },
+    { X86::VPSHUFHWZri,      X86::VPSHUFHWZmi,        0 },
+    { X86::VPSHUFLWZri,      X86::VPSHUFLWZmi,        0 },
 
     // AVX-512 foldable instructions (256-bit versions)
+    { X86::VBROADCASTSSZ256r,    X86::VBROADCASTSSZ256m,    TB_NO_REVERSE },
+    { X86::VBROADCASTSSZ256r_s,  X86::VBROADCASTSSZ256m,    TB_NO_REVERSE },
+    { X86::VBROADCASTSDZ256r,    X86::VBROADCASTSDZ256m,    TB_NO_REVERSE },
+    { X86::VBROADCASTSDZ256r_s,  X86::VBROADCASTSDZ256m,    TB_NO_REVERSE },
     { X86::VMOVAPDZ256rr,        X86::VMOVAPDZ256rm,        TB_ALIGN_32 },
     { X86::VMOVAPSZ256rr,        X86::VMOVAPSZ256rm,        TB_ALIGN_32 },
     { X86::VMOVDQA32Z256rr,      X86::VMOVDQA32Z256rm,      TB_ALIGN_32 },
@@ -847,12 +920,29 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU64Z256rr,      X86::VMOVDQU64Z256rm,      0 },
     { X86::VMOVUPDZ256rr,        X86::VMOVUPDZ256rm,        0 },
     { X86::VMOVUPSZ256rr,        X86::VMOVUPSZ256rm,        0 },
-    { X86::VBROADCASTSSZ256r,    X86::VBROADCASTSSZ256m,    TB_NO_REVERSE },
-    { X86::VBROADCASTSSZ256r_s,  X86::VBROADCASTSSZ256m,    TB_NO_REVERSE },
-    { X86::VBROADCASTSDZ256r,    X86::VBROADCASTSDZ256m,    TB_NO_REVERSE },
-    { X86::VBROADCASTSDZ256r_s,  X86::VBROADCASTSDZ256m,    TB_NO_REVERSE },
+    { X86::VPERMILPDZ256ri,      X86::VPERMILPDZ256mi,      0 },
+    { X86::VPERMILPSZ256ri,      X86::VPERMILPSZ256mi,      0 },
+    { X86::VPERMPDZ256ri,        X86::VPERMPDZ256mi,        0 },
+    { X86::VPERMQZ256ri,         X86::VPERMQZ256mi,         0 },
+    { X86::VPMOVSXBDZ256rr,      X86::VPMOVSXBDZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ256rr,      X86::VPMOVSXBQZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ256rr,      X86::VPMOVSXBWZ256rm,      0 },
+    { X86::VPMOVSXDQZ256rr,      X86::VPMOVSXDQZ256rm,      0 },
+    { X86::VPMOVSXWDZ256rr,      X86::VPMOVSXWDZ256rm,      0 },
+    { X86::VPMOVSXWQZ256rr,      X86::VPMOVSXWQZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ256rr,      X86::VPMOVZXBDZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ256rr,      X86::VPMOVZXBQZ256rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ256rr,      X86::VPMOVZXBWZ256rm,      0 },
+    { X86::VPMOVZXDQZ256rr,      X86::VPMOVZXDQZ256rm,      0 },
+    { X86::VPMOVZXWDZ256rr,      X86::VPMOVZXWDZ256rm,      0 },
+    { X86::VPMOVZXWQZ256rr,      X86::VPMOVZXWQZ256rm,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ256ri,        X86::VPSHUFDZ256mi,        0 },
+    { X86::VPSHUFHWZ256ri,       X86::VPSHUFHWZ256mi,       0 },
+    { X86::VPSHUFLWZ256ri,       X86::VPSHUFLWZ256mi,       0 },
 
     // AVX-512 foldable instructions (128-bit versions)
+    { X86::VBROADCASTSSZ128r,    X86::VBROADCASTSSZ128m,    TB_NO_REVERSE },
+    { X86::VBROADCASTSSZ128r_s,  X86::VBROADCASTSSZ128m,    TB_NO_REVERSE },
     { X86::VMOVAPDZ128rr,        X86::VMOVAPDZ128rm,        TB_ALIGN_16 },
     { X86::VMOVAPSZ128rr,        X86::VMOVAPSZ128rm,        TB_ALIGN_16 },
     { X86::VMOVDQA32Z128rr,      X86::VMOVDQA32Z128rm,      TB_ALIGN_16 },
@@ -863,8 +953,24 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VMOVDQU64Z128rr,      X86::VMOVDQU64Z128rm,      0 },
     { X86::VMOVUPDZ128rr,        X86::VMOVUPDZ128rm,        0 },
     { X86::VMOVUPSZ128rr,        X86::VMOVUPSZ128rm,        0 },
-    { X86::VBROADCASTSSZ128r,    X86::VBROADCASTSSZ128m,    TB_NO_REVERSE },
-    { X86::VBROADCASTSSZ128r_s,  X86::VBROADCASTSSZ128m,    TB_NO_REVERSE },
+    { X86::VPERMILPDZ128ri,      X86::VPERMILPDZ128mi,      0 },
+    { X86::VPERMILPSZ128ri,      X86::VPERMILPSZ128mi,      0 },
+    { X86::VPMOVSXBDZ128rr,      X86::VPMOVSXBDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ128rr,      X86::VPMOVSXBQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ128rr,      X86::VPMOVSXBWZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXDQZ128rr,      X86::VPMOVSXDQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXWDZ128rr,      X86::VPMOVSXWDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVSXWQZ128rr,      X86::VPMOVSXWQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ128rr,      X86::VPMOVZXBDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ128rr,      X86::VPMOVZXBQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ128rr,      X86::VPMOVZXBWZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXDQZ128rr,      X86::VPMOVZXDQZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXWDZ128rr,      X86::VPMOVZXWDZ128rm,      TB_NO_REVERSE },
+    { X86::VPMOVZXWQZ128rr,      X86::VPMOVZXWQZ128rm,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ128ri,        X86::VPSHUFDZ128mi,        0 },
+    { X86::VPSHUFHWZ128ri,       X86::VPSHUFHWZ128mi,       0 },
+    { X86::VPSHUFLWZ128ri,       X86::VPSHUFLWZ128mi,       0 },
+
     // F16C foldable instructions
     { X86::VCVTPH2PSrr,        X86::VCVTPH2PSrm,            0 },
     { X86::VCVTPH2PSYrr,       X86::VCVTPH2PSYrm,           0 },
@@ -896,9 +1002,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::ADDPDrr,         X86::ADDPDrm,       TB_ALIGN_16 },
     { X86::ADDPSrr,         X86::ADDPSrm,       TB_ALIGN_16 },
     { X86::ADDSDrr,         X86::ADDSDrm,       0 },
-    { X86::ADDSDrr_Int,     X86::ADDSDrm_Int,   0 },
+    { X86::ADDSDrr_Int,     X86::ADDSDrm_Int,   TB_NO_REVERSE },
     { X86::ADDSSrr,         X86::ADDSSrm,       0 },
-    { X86::ADDSSrr_Int,     X86::ADDSSrm_Int,   0 },
+    { X86::ADDSSrr_Int,     X86::ADDSSrm_Int,   TB_NO_REVERSE },
     { X86::ADDSUBPDrr,      X86::ADDSUBPDrm,    TB_ALIGN_16 },
     { X86::ADDSUBPSrr,      X86::ADDSUBPSrm,    TB_ALIGN_16 },
     { X86::AND16rr,         X86::AND16rm,       0 },
@@ -970,24 +1076,11 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::DIVPDrr,         X86::DIVPDrm,       TB_ALIGN_16 },
     { X86::DIVPSrr,         X86::DIVPSrm,       TB_ALIGN_16 },
     { X86::DIVSDrr,         X86::DIVSDrm,       0 },
-    { X86::DIVSDrr_Int,     X86::DIVSDrm_Int,   0 },
+    { X86::DIVSDrr_Int,     X86::DIVSDrm_Int,   TB_NO_REVERSE },
     { X86::DIVSSrr,         X86::DIVSSrm,       0 },
-    { X86::DIVSSrr_Int,     X86::DIVSSrm_Int,   0 },
+    { X86::DIVSSrr_Int,     X86::DIVSSrm_Int,   TB_NO_REVERSE },
     { X86::DPPDrri,         X86::DPPDrmi,       TB_ALIGN_16 },
     { X86::DPPSrri,         X86::DPPSrmi,       TB_ALIGN_16 },
-
-    // Do not fold Fs* scalar logical op loads because there are no scalar
-    // load variants for these instructions. When folded, the load is required
-    // to be 128-bits, so the load size would not match.
-
-    { X86::FvANDNPDrr,      X86::FvANDNPDrm,    TB_ALIGN_16 },
-    { X86::FvANDNPSrr,      X86::FvANDNPSrm,    TB_ALIGN_16 },
-    { X86::FvANDPDrr,       X86::FvANDPDrm,     TB_ALIGN_16 },
-    { X86::FvANDPSrr,       X86::FvANDPSrm,     TB_ALIGN_16 },
-    { X86::FvORPDrr,        X86::FvORPDrm,      TB_ALIGN_16 },
-    { X86::FvORPSrr,        X86::FvORPSrm,      TB_ALIGN_16 },
-    { X86::FvXORPDrr,       X86::FvXORPDrm,     TB_ALIGN_16 },
-    { X86::FvXORPSrr,       X86::FvXORPSrm,     TB_ALIGN_16 },
     { X86::HADDPDrr,        X86::HADDPDrm,      TB_ALIGN_16 },
     { X86::HADDPSrr,        X86::HADDPSrm,      TB_ALIGN_16 },
     { X86::HSUBPDrr,        X86::HSUBPDrm,      TB_ALIGN_16 },
@@ -995,34 +1088,42 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::IMUL16rr,        X86::IMUL16rm,      0 },
     { X86::IMUL32rr,        X86::IMUL32rm,      0 },
     { X86::IMUL64rr,        X86::IMUL64rm,      0 },
-    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm,   0 },
-    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm,   0 },
-    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      0 },
+    { X86::Int_CMPSDrr,     X86::Int_CMPSDrm,   TB_NO_REVERSE },
+    { X86::Int_CMPSSrr,     X86::Int_CMPSSrm,   TB_NO_REVERSE },
+    { X86::Int_CVTSD2SSrr,  X86::Int_CVTSD2SSrm,      TB_NO_REVERSE },
     { X86::Int_CVTSI2SD64rr,X86::Int_CVTSI2SD64rm,    0 },
     { X86::Int_CVTSI2SDrr,  X86::Int_CVTSI2SDrm,      0 },
     { X86::Int_CVTSI2SS64rr,X86::Int_CVTSI2SS64rm,    0 },
     { X86::Int_CVTSI2SSrr,  X86::Int_CVTSI2SSrm,      0 },
-    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      0 },
+    { X86::Int_CVTSS2SDrr,  X86::Int_CVTSS2SDrm,      TB_NO_REVERSE },
     { X86::MAXPDrr,         X86::MAXPDrm,       TB_ALIGN_16 },
+    { X86::MAXCPDrr,        X86::MAXCPDrm,      TB_ALIGN_16 },
     { X86::MAXPSrr,         X86::MAXPSrm,       TB_ALIGN_16 },
+    { X86::MAXCPSrr,        X86::MAXCPSrm,      TB_ALIGN_16 },
     { X86::MAXSDrr,         X86::MAXSDrm,       0 },
-    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int,   0 },
+    { X86::MAXCSDrr,        X86::MAXCSDrm,      0 },
+    { X86::MAXSDrr_Int,     X86::MAXSDrm_Int,   TB_NO_REVERSE },
     { X86::MAXSSrr,         X86::MAXSSrm,       0 },
-    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int,   0 },
+    { X86::MAXCSSrr,        X86::MAXCSSrm,      0 },
+    { X86::MAXSSrr_Int,     X86::MAXSSrm_Int,   TB_NO_REVERSE },
     { X86::MINPDrr,         X86::MINPDrm,       TB_ALIGN_16 },
+    { X86::MINCPDrr,        X86::MINCPDrm,      TB_ALIGN_16 },
     { X86::MINPSrr,         X86::MINPSrm,       TB_ALIGN_16 },
+    { X86::MINCPSrr,        X86::MINCPSrm,      TB_ALIGN_16 },
     { X86::MINSDrr,         X86::MINSDrm,       0 },
-    { X86::MINSDrr_Int,     X86::MINSDrm_Int,   0 },
+    { X86::MINCSDrr,        X86::MINCSDrm,      0 },
+    { X86::MINSDrr_Int,     X86::MINSDrm_Int,   TB_NO_REVERSE },
     { X86::MINSSrr,         X86::MINSSrm,       0 },
-    { X86::MINSSrr_Int,     X86::MINSSrm_Int,   0 },
+    { X86::MINCSSrr,        X86::MINCSSrm,      0 },
+    { X86::MINSSrr_Int,     X86::MINSSrm_Int,   TB_NO_REVERSE },
     { X86::MOVLHPSrr,       X86::MOVHPSrm,      TB_NO_REVERSE },
     { X86::MPSADBWrri,      X86::MPSADBWrmi,    TB_ALIGN_16 },
     { X86::MULPDrr,         X86::MULPDrm,       TB_ALIGN_16 },
     { X86::MULPSrr,         X86::MULPSrm,       TB_ALIGN_16 },
     { X86::MULSDrr,         X86::MULSDrm,       0 },
-    { X86::MULSDrr_Int,     X86::MULSDrm_Int,   0 },
+    { X86::MULSDrr_Int,     X86::MULSDrm_Int,   TB_NO_REVERSE },
     { X86::MULSSrr,         X86::MULSSrm,       0 },
-    { X86::MULSSrr_Int,     X86::MULSSrm_Int,   0 },
+    { X86::MULSSrr_Int,     X86::MULSSrm_Int,   TB_NO_REVERSE },
     { X86::OR16rr,          X86::OR16rm,        0 },
     { X86::OR32rr,          X86::OR32rm,        0 },
     { X86::OR64rr,          X86::OR64rm,        0 },
@@ -1067,7 +1168,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PINSRDrr,        X86::PINSRDrm,      0 },
     { X86::PINSRQrr,        X86::PINSRQrm,      0 },
     { X86::PINSRWrri,       X86::PINSRWrmi,     0 },
-    { X86::PMADDUBSWrr128,  X86::PMADDUBSWrm128, TB_ALIGN_16 },
+    { X86::PMADDUBSWrr,     X86::PMADDUBSWrm,   TB_ALIGN_16 },
     { X86::PMADDWDrr,       X86::PMADDWDrm,     TB_ALIGN_16 },
     { X86::PMAXSWrr,        X86::PMAXSWrm,      TB_ALIGN_16 },
     { X86::PMAXUBrr,        X86::PMAXUBrm,      TB_ALIGN_16 },
@@ -1082,7 +1183,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PMAXUDrr,        X86::PMAXUDrm,      TB_ALIGN_16 },
     { X86::PMAXUWrr,        X86::PMAXUWrm,      TB_ALIGN_16 },
     { X86::PMULDQrr,        X86::PMULDQrm,      TB_ALIGN_16 },
-    { X86::PMULHRSWrr128,   X86::PMULHRSWrm128, TB_ALIGN_16 },
+    { X86::PMULHRSWrr,      X86::PMULHRSWrm,    TB_ALIGN_16 },
     { X86::PMULHUWrr,       X86::PMULHUWrm,     TB_ALIGN_16 },
     { X86::PMULHWrr,        X86::PMULHWrm,      TB_ALIGN_16 },
     { X86::PMULLDrr,        X86::PMULLDrm,      TB_ALIGN_16 },
@@ -1119,8 +1220,8 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::PUNPCKLQDQrr,    X86::PUNPCKLQDQrm,  TB_ALIGN_16 },
     { X86::PUNPCKLWDrr,     X86::PUNPCKLWDrm,   TB_ALIGN_16 },
     { X86::PXORrr,          X86::PXORrm,        TB_ALIGN_16 },
-    { X86::ROUNDSDr,        X86::ROUNDSDm,      0 },
-    { X86::ROUNDSSr,        X86::ROUNDSSm,      0 },
+    { X86::ROUNDSDr_Int,    X86::ROUNDSDm_Int,  TB_NO_REVERSE },
+    { X86::ROUNDSSr_Int,    X86::ROUNDSSm_Int,  TB_NO_REVERSE },
     { X86::SBB32rr,         X86::SBB32rm,       0 },
     { X86::SBB64rr,         X86::SBB64rm,       0 },
     { X86::SHUFPDrri,       X86::SHUFPDrmi,     TB_ALIGN_16 },
@@ -1132,9 +1233,9 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::SUBPDrr,         X86::SUBPDrm,       TB_ALIGN_16 },
     { X86::SUBPSrr,         X86::SUBPSrm,       TB_ALIGN_16 },
     { X86::SUBSDrr,         X86::SUBSDrm,       0 },
-    { X86::SUBSDrr_Int,     X86::SUBSDrm_Int,   0 },
+    { X86::SUBSDrr_Int,     X86::SUBSDrm_Int,   TB_NO_REVERSE },
     { X86::SUBSSrr,         X86::SUBSSrm,       0 },
-    { X86::SUBSSrr_Int,     X86::SUBSSrm_Int,   0 },
+    { X86::SUBSSrr_Int,     X86::SUBSSrm_Int,   TB_NO_REVERSE },
     // FIXME: TEST*rr -> swapped operand of TEST*mr.
     { X86::UNPCKHPDrr,      X86::UNPCKHPDrm,    TB_ALIGN_16 },
     { X86::UNPCKHPSrr,      X86::UNPCKHPSrm,    TB_ALIGN_16 },
@@ -1240,7 +1341,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
 
     // AVX 128-bit versions of foldable instructions
     { X86::VCVTSD2SSrr,       X86::VCVTSD2SSrm,        0 },
-    { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    0 },
+    { X86::Int_VCVTSD2SSrr,   X86::Int_VCVTSD2SSrm,    TB_NO_REVERSE },
     { X86::VCVTSI2SD64rr,     X86::VCVTSI2SD64rm,      0 },
     { X86::Int_VCVTSI2SD64rr, X86::Int_VCVTSI2SD64rm,  0 },
     { X86::VCVTSI2SDrr,       X86::VCVTSI2SDrm,        0 },
@@ -1250,21 +1351,13 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VCVTSI2SSrr,       X86::VCVTSI2SSrm,        0 },
     { X86::Int_VCVTSI2SSrr,   X86::Int_VCVTSI2SSrm,    0 },
     { X86::VCVTSS2SDrr,       X86::VCVTSS2SDrm,        0 },
-    { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    0 },
-    { X86::VRCPSSr,           X86::VRCPSSm,            0 },
-    { X86::VRCPSSr_Int,       X86::VRCPSSm_Int,        0 },
-    { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
-    { X86::VRSQRTSSr_Int,     X86::VRSQRTSSm_Int,      0 },
-    { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
-    { X86::VSQRTSDr_Int,      X86::VSQRTSDm_Int,       0 },
-    { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
-    { X86::VSQRTSSr_Int,      X86::VSQRTSSm_Int,       0 },
+    { X86::Int_VCVTSS2SDrr,   X86::Int_VCVTSS2SDrm,    TB_NO_REVERSE },
     { X86::VADDPDrr,          X86::VADDPDrm,           0 },
     { X86::VADDPSrr,          X86::VADDPSrm,           0 },
     { X86::VADDSDrr,          X86::VADDSDrm,           0 },
-    { X86::VADDSDrr_Int,      X86::VADDSDrm_Int,       0 },
+    { X86::VADDSDrr_Int,      X86::VADDSDrm_Int,       TB_NO_REVERSE },
     { X86::VADDSSrr,          X86::VADDSSrm,           0 },
-    { X86::VADDSSrr_Int,      X86::VADDSSrm_Int,       0 },
+    { X86::VADDSSrr_Int,      X86::VADDSSrm_Int,       TB_NO_REVERSE },
     { X86::VADDSUBPDrr,       X86::VADDSUBPDrm,        0 },
     { X86::VADDSUBPSrr,       X86::VADDSUBPSrm,        0 },
     { X86::VANDNPDrr,         X86::VANDNPDrm,          0 },
@@ -1282,48 +1375,45 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VDIVPDrr,          X86::VDIVPDrm,           0 },
     { X86::VDIVPSrr,          X86::VDIVPSrm,           0 },
     { X86::VDIVSDrr,          X86::VDIVSDrm,           0 },
-    { X86::VDIVSDrr_Int,      X86::VDIVSDrm_Int,       0 },
+    { X86::VDIVSDrr_Int,      X86::VDIVSDrm_Int,       TB_NO_REVERSE },
     { X86::VDIVSSrr,          X86::VDIVSSrm,           0 },
-    { X86::VDIVSSrr_Int,      X86::VDIVSSrm_Int,       0 },
+    { X86::VDIVSSrr_Int,      X86::VDIVSSrm_Int,       TB_NO_REVERSE },
     { X86::VDPPDrri,          X86::VDPPDrmi,           0 },
     { X86::VDPPSrri,          X86::VDPPSrmi,           0 },
-    // Do not fold VFs* loads because there are no scalar load variants for
-    // these instructions. When folded, the load is required to be 128-bits, so
-    // the load size would not match.
-    { X86::VFvANDNPDrr,       X86::VFvANDNPDrm,        0 },
-    { X86::VFvANDNPSrr,       X86::VFvANDNPSrm,        0 },
-    { X86::VFvANDPDrr,        X86::VFvANDPDrm,         0 },
-    { X86::VFvANDPSrr,        X86::VFvANDPSrm,         0 },
-    { X86::VFvORPDrr,         X86::VFvORPDrm,          0 },
-    { X86::VFvORPSrr,         X86::VFvORPSrm,          0 },
-    { X86::VFvXORPDrr,        X86::VFvXORPDrm,         0 },
-    { X86::VFvXORPSrr,        X86::VFvXORPSrm,         0 },
     { X86::VHADDPDrr,         X86::VHADDPDrm,          0 },
     { X86::VHADDPSrr,         X86::VHADDPSrm,          0 },
     { X86::VHSUBPDrr,         X86::VHSUBPDrm,          0 },
     { X86::VHSUBPSrr,         X86::VHSUBPSrm,          0 },
-    { X86::Int_VCMPSDrr,      X86::Int_VCMPSDrm,       0 },
-    { X86::Int_VCMPSSrr,      X86::Int_VCMPSSrm,       0 },
+    { X86::Int_VCMPSDrr,      X86::Int_VCMPSDrm,       TB_NO_REVERSE },
+    { X86::Int_VCMPSSrr,      X86::Int_VCMPSSrm,       TB_NO_REVERSE },
+    { X86::VMAXCPDrr,         X86::VMAXCPDrm,          0 },
+    { X86::VMAXCPSrr,         X86::VMAXCPSrm,          0 },
+    { X86::VMAXCSDrr,         X86::VMAXCSDrm,          0 },
+    { X86::VMAXCSSrr,         X86::VMAXCSSrm,          0 },
     { X86::VMAXPDrr,          X86::VMAXPDrm,           0 },
     { X86::VMAXPSrr,          X86::VMAXPSrm,           0 },
     { X86::VMAXSDrr,          X86::VMAXSDrm,           0 },
-    { X86::VMAXSDrr_Int,      X86::VMAXSDrm_Int,       0 },
+    { X86::VMAXSDrr_Int,      X86::VMAXSDrm_Int,       TB_NO_REVERSE },
     { X86::VMAXSSrr,          X86::VMAXSSrm,           0 },
-    { X86::VMAXSSrr_Int,      X86::VMAXSSrm_Int,       0 },
+    { X86::VMAXSSrr_Int,      X86::VMAXSSrm_Int,       TB_NO_REVERSE },
+    { X86::VMINCPDrr,         X86::VMINCPDrm,          0 },
+    { X86::VMINCPSrr,         X86::VMINCPSrm,          0 },
+    { X86::VMINCSDrr,         X86::VMINCSDrm,          0 },
+    { X86::VMINCSSrr,         X86::VMINCSSrm,          0 },
     { X86::VMINPDrr,          X86::VMINPDrm,           0 },
     { X86::VMINPSrr,          X86::VMINPSrm,           0 },
     { X86::VMINSDrr,          X86::VMINSDrm,           0 },
-    { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       0 },
+    { X86::VMINSDrr_Int,      X86::VMINSDrm_Int,       TB_NO_REVERSE },
     { X86::VMINSSrr,          X86::VMINSSrm,           0 },
-    { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       0 },
+    { X86::VMINSSrr_Int,      X86::VMINSSrm_Int,       TB_NO_REVERSE },
     { X86::VMOVLHPSrr,        X86::VMOVHPSrm,          TB_NO_REVERSE },
     { X86::VMPSADBWrri,       X86::VMPSADBWrmi,        0 },
     { X86::VMULPDrr,          X86::VMULPDrm,           0 },
     { X86::VMULPSrr,          X86::VMULPSrm,           0 },
     { X86::VMULSDrr,          X86::VMULSDrm,           0 },
-    { X86::VMULSDrr_Int,      X86::VMULSDrm_Int,       0 },
+    { X86::VMULSDrr_Int,      X86::VMULSDrm_Int,       TB_NO_REVERSE },
     { X86::VMULSSrr,          X86::VMULSSrm,           0 },
-    { X86::VMULSSrr_Int,      X86::VMULSSrm_Int,       0 },
+    { X86::VMULSSrr_Int,      X86::VMULSSrm_Int,       TB_NO_REVERSE },
     { X86::VORPDrr,           X86::VORPDrm,            0 },
     { X86::VORPSrr,           X86::VORPSrm,            0 },
     { X86::VPACKSSDWrr,       X86::VPACKSSDWrm,        0 },
@@ -1366,7 +1456,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPINSRDrr,         X86::VPINSRDrm,          0 },
     { X86::VPINSRQrr,         X86::VPINSRQrm,          0 },
     { X86::VPINSRWrri,        X86::VPINSRWrmi,         0 },
-    { X86::VPMADDUBSWrr128,   X86::VPMADDUBSWrm128,    0 },
+    { X86::VPMADDUBSWrr,      X86::VPMADDUBSWrm,       0 },
     { X86::VPMADDWDrr,        X86::VPMADDWDrm,         0 },
     { X86::VPMAXSWrr,         X86::VPMAXSWrm,          0 },
     { X86::VPMAXUBrr,         X86::VPMAXUBrm,          0 },
@@ -1381,7 +1471,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPMAXUDrr,         X86::VPMAXUDrm,          0 },
     { X86::VPMAXUWrr,         X86::VPMAXUWrm,          0 },
     { X86::VPMULDQrr,         X86::VPMULDQrm,          0 },
-    { X86::VPMULHRSWrr128,    X86::VPMULHRSWrm128,     0 },
+    { X86::VPMULHRSWrr,       X86::VPMULHRSWrm,        0 },
     { X86::VPMULHUWrr,        X86::VPMULHUWrm,         0 },
     { X86::VPMULHWrr,         X86::VPMULHWrm,          0 },
     { X86::VPMULLDrr,         X86::VPMULLDrm,          0 },
@@ -1418,16 +1508,26 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPUNPCKLQDQrr,     X86::VPUNPCKLQDQrm,      0 },
     { X86::VPUNPCKLWDrr,      X86::VPUNPCKLWDrm,       0 },
     { X86::VPXORrr,           X86::VPXORrm,            0 },
+    { X86::VRCPSSr,           X86::VRCPSSm,            0 },
+    { X86::VRCPSSr_Int,       X86::VRCPSSm_Int,        TB_NO_REVERSE },
+    { X86::VRSQRTSSr,         X86::VRSQRTSSm,          0 },
+    { X86::VRSQRTSSr_Int,     X86::VRSQRTSSm_Int,      TB_NO_REVERSE },
     { X86::VROUNDSDr,         X86::VROUNDSDm,          0 },
+    { X86::VROUNDSDr_Int,     X86::VROUNDSDm_Int,      TB_NO_REVERSE },
     { X86::VROUNDSSr,         X86::VROUNDSSm,          0 },
+    { X86::VROUNDSSr_Int,     X86::VROUNDSSm_Int,      TB_NO_REVERSE },
     { X86::VSHUFPDrri,        X86::VSHUFPDrmi,         0 },
     { X86::VSHUFPSrri,        X86::VSHUFPSrmi,         0 },
+    { X86::VSQRTSDr,          X86::VSQRTSDm,           0 },
+    { X86::VSQRTSDr_Int,      X86::VSQRTSDm_Int,       TB_NO_REVERSE },
+    { X86::VSQRTSSr,          X86::VSQRTSSm,           0 },
+    { X86::VSQRTSSr_Int,      X86::VSQRTSSm_Int,       TB_NO_REVERSE },
     { X86::VSUBPDrr,          X86::VSUBPDrm,           0 },
     { X86::VSUBPSrr,          X86::VSUBPSrm,           0 },
     { X86::VSUBSDrr,          X86::VSUBSDrm,           0 },
-    { X86::VSUBSDrr_Int,      X86::VSUBSDrm_Int,       0 },
+    { X86::VSUBSDrr_Int,      X86::VSUBSDrm_Int,       TB_NO_REVERSE },
     { X86::VSUBSSrr,          X86::VSUBSSrm,           0 },
-    { X86::VSUBSSrr_Int,      X86::VSUBSSrm_Int,       0 },
+    { X86::VSUBSSrr_Int,      X86::VSUBSSrm_Int,       TB_NO_REVERSE },
     { X86::VUNPCKHPDrr,       X86::VUNPCKHPDrm,        0 },
     { X86::VUNPCKHPSrr,       X86::VUNPCKHPSrm,        0 },
     { X86::VUNPCKLPDrr,       X86::VUNPCKLPDrm,        0 },
@@ -1458,8 +1558,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VHSUBPDYrr,        X86::VHSUBPDYrm,         0 },
     { X86::VHSUBPSYrr,        X86::VHSUBPSYrm,         0 },
     { X86::VINSERTF128rr,     X86::VINSERTF128rm,      0 },
+    { X86::VMAXCPDYrr,        X86::VMAXCPDYrm,         0 },
+    { X86::VMAXCPSYrr,        X86::VMAXCPSYrm,         0 },
     { X86::VMAXPDYrr,         X86::VMAXPDYrm,          0 },
     { X86::VMAXPSYrr,         X86::VMAXPSYrm,          0 },
+    { X86::VMINCPDYrr,        X86::VMINCPDYrm,         0 },
+    { X86::VMINCPSYrr,        X86::VMINCPSYrm,         0 },
     { X86::VMINPDYrr,         X86::VMINPDYrm,          0 },
     { X86::VMINPSYrr,         X86::VMINPSYrm,          0 },
     { X86::VMULPDYrr,         X86::VMULPDYrm,          0 },
@@ -1520,7 +1624,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPHSUBDYrr,        X86::VPHSUBDYrm,         0 },
     { X86::VPHSUBSWrr256,     X86::VPHSUBSWrm256,      0 },
     { X86::VPHSUBWYrr,        X86::VPHSUBWYrm,         0 },
-    { X86::VPMADDUBSWrr256,   X86::VPMADDUBSWrm256,    0 },
+    { X86::VPMADDUBSWYrr,     X86::VPMADDUBSWYrm,      0 },
     { X86::VPMADDWDYrr,       X86::VPMADDWDYrm,        0 },
     { X86::VPMAXSWYrr,        X86::VPMAXSWYrm,         0 },
     { X86::VPMAXUBYrr,        X86::VPMAXUBYrm,         0 },
@@ -1536,7 +1640,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPMAXUWYrr,        X86::VPMAXUWYrm,         0 },
     { X86::VMPSADBWYrri,      X86::VMPSADBWYrmi,       0 },
     { X86::VPMULDQYrr,        X86::VPMULDQYrm,         0 },
-    { X86::VPMULHRSWrr256,    X86::VPMULHRSWrm256,     0 },
+    { X86::VPMULHRSWYrr,      X86::VPMULHRSWYrm,       0 },
     { X86::VPMULHUWYrr,       X86::VPMULHUWYrm,        0 },
     { X86::VPMULHWYrr,        X86::VPMULHWYrm,         0 },
     { X86::VPMULLDYrr,        X86::VPMULLDYrm,         0 },
@@ -1559,8 +1663,6 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPSRAWYrr,         X86::VPSRAWYrm,          0 },
     { X86::VPSRAVDrr,         X86::VPSRAVDrm,          0 },
     { X86::VPSRAVDYrr,        X86::VPSRAVDYrm,         0 },
-    { X86::VPSRAVD_Intrr,     X86::VPSRAVD_Intrm,      0 },
-    { X86::VPSRAVD_IntYrr,    X86::VPSRAVD_IntYrm,     0 },
     { X86::VPSRLDYrr,         X86::VPSRLDYrm,          0 },
     { X86::VPSRLQYrr,         X86::VPSRLQYrm,          0 },
     { X86::VPSRLWYrr,         X86::VPSRLWYrm,          0 },
@@ -1588,37 +1690,45 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
 
     // FMA4 foldable patterns
     { X86::VFMADDSS4rr,       X86::VFMADDSS4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDSS4rr_Int,   X86::VFMADDSS4mr_Int,    TB_NO_REVERSE },
     { X86::VFMADDSD4rr,       X86::VFMADDSD4mr,        TB_ALIGN_NONE },
+    { X86::VFMADDSD4rr_Int,   X86::VFMADDSD4mr_Int,    TB_NO_REVERSE },
     { X86::VFMADDPS4rr,       X86::VFMADDPS4mr,        TB_ALIGN_NONE },
     { X86::VFMADDPD4rr,       X86::VFMADDPD4mr,        TB_ALIGN_NONE },
-    { X86::VFMADDPS4rrY,      X86::VFMADDPS4mrY,       TB_ALIGN_NONE },
-    { X86::VFMADDPD4rrY,      X86::VFMADDPD4mrY,       TB_ALIGN_NONE },
+    { X86::VFMADDPS4Yrr,      X86::VFMADDPS4Ymr,       TB_ALIGN_NONE },
+    { X86::VFMADDPD4Yrr,      X86::VFMADDPD4Ymr,       TB_ALIGN_NONE },
     { X86::VFNMADDSS4rr,      X86::VFNMADDSS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr_Int,  X86::VFNMADDSS4mr_Int,   TB_NO_REVERSE },
     { X86::VFNMADDSD4rr,      X86::VFNMADDSD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMADDSD4rr_Int,  X86::VFNMADDSD4mr_Int,   TB_NO_REVERSE },
     { X86::VFNMADDPS4rr,      X86::VFNMADDPS4mr,       TB_ALIGN_NONE },
     { X86::VFNMADDPD4rr,      X86::VFNMADDPD4mr,       TB_ALIGN_NONE },
-    { X86::VFNMADDPS4rrY,     X86::VFNMADDPS4mrY,      TB_ALIGN_NONE },
-    { X86::VFNMADDPD4rrY,     X86::VFNMADDPD4mrY,      TB_ALIGN_NONE },
+    { X86::VFNMADDPS4Yrr,     X86::VFNMADDPS4Ymr,      TB_ALIGN_NONE },
+    { X86::VFNMADDPD4Yrr,     X86::VFNMADDPD4Ymr,      TB_ALIGN_NONE },
     { X86::VFMSUBSS4rr,       X86::VFMSUBSS4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr_Int,   X86::VFMSUBSS4mr_Int,    TB_NO_REVERSE },
     { X86::VFMSUBSD4rr,       X86::VFMSUBSD4mr,        TB_ALIGN_NONE },
+    { X86::VFMSUBSD4rr_Int,   X86::VFMSUBSD4mr_Int,    TB_NO_REVERSE },
     { X86::VFMSUBPS4rr,       X86::VFMSUBPS4mr,        TB_ALIGN_NONE },
     { X86::VFMSUBPD4rr,       X86::VFMSUBPD4mr,        TB_ALIGN_NONE },
-    { X86::VFMSUBPS4rrY,      X86::VFMSUBPS4mrY,       TB_ALIGN_NONE },
-    { X86::VFMSUBPD4rrY,      X86::VFMSUBPD4mrY,       TB_ALIGN_NONE },
+    { X86::VFMSUBPS4Yrr,      X86::VFMSUBPS4Ymr,       TB_ALIGN_NONE },
+    { X86::VFMSUBPD4Yrr,      X86::VFMSUBPD4Ymr,       TB_ALIGN_NONE },
     { X86::VFNMSUBSS4rr,      X86::VFNMSUBSS4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr_Int,  X86::VFNMSUBSS4mr_Int,   TB_NO_REVERSE },
     { X86::VFNMSUBSD4rr,      X86::VFNMSUBSD4mr,       TB_ALIGN_NONE },
+    { X86::VFNMSUBSD4rr_Int,  X86::VFNMSUBSD4mr_Int,   TB_NO_REVERSE },
     { X86::VFNMSUBPS4rr,      X86::VFNMSUBPS4mr,       TB_ALIGN_NONE },
     { X86::VFNMSUBPD4rr,      X86::VFNMSUBPD4mr,       TB_ALIGN_NONE },
-    { X86::VFNMSUBPS4rrY,     X86::VFNMSUBPS4mrY,      TB_ALIGN_NONE },
-    { X86::VFNMSUBPD4rrY,     X86::VFNMSUBPD4mrY,      TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4Yrr,     X86::VFNMSUBPS4Ymr,      TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4Yrr,     X86::VFNMSUBPD4Ymr,      TB_ALIGN_NONE },
     { X86::VFMADDSUBPS4rr,    X86::VFMADDSUBPS4mr,     TB_ALIGN_NONE },
     { X86::VFMADDSUBPD4rr,    X86::VFMADDSUBPD4mr,     TB_ALIGN_NONE },
-    { X86::VFMADDSUBPS4rrY,   X86::VFMADDSUBPS4mrY,    TB_ALIGN_NONE },
-    { X86::VFMADDSUBPD4rrY,   X86::VFMADDSUBPD4mrY,    TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4Yrr,   X86::VFMADDSUBPS4Ymr,    TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4Yrr,   X86::VFMADDSUBPD4Ymr,    TB_ALIGN_NONE },
     { X86::VFMSUBADDPS4rr,    X86::VFMSUBADDPS4mr,     TB_ALIGN_NONE },
     { X86::VFMSUBADDPD4rr,    X86::VFMSUBADDPD4mr,     TB_ALIGN_NONE },
-    { X86::VFMSUBADDPS4rrY,   X86::VFMSUBADDPS4mrY,    TB_ALIGN_NONE },
-    { X86::VFMSUBADDPD4rrY,   X86::VFMSUBADDPD4mrY,    TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4Yrr,   X86::VFMSUBADDPS4Ymr,    TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4Yrr,   X86::VFMSUBADDPD4Ymr,    TB_ALIGN_NONE },
 
     // XOP foldable instructions
     { X86::VPCMOVrrr,         X86::VPCMOVrmr,           0 },
@@ -1678,38 +1788,107 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::ADOX64rr,          X86::ADOX64rm,            0 },
 
     // AVX-512 foldable instructions
-    { X86::VADDPSZrr,         X86::VADDPSZrm,           0 },
     { X86::VADDPDZrr,         X86::VADDPDZrm,           0 },
-    { X86::VADDSSZrr,         X86::VADDSSZrm,           0 },
-    { X86::VADDSSZrr_Int,     X86::VADDSSZrm_Int,       0 },
+    { X86::VADDPSZrr,         X86::VADDPSZrm,           0 },
     { X86::VADDSDZrr,         X86::VADDSDZrm,           0 },
-    { X86::VADDSDZrr_Int,     X86::VADDSDZrm_Int,       0 },
-    { X86::VSUBPSZrr,         X86::VSUBPSZrm,           0 },
-    { X86::VSUBPDZrr,         X86::VSUBPDZrm,           0 },
-    { X86::VSUBSSZrr,         X86::VSUBSSZrm,           0 },
-    { X86::VSUBSSZrr_Int,     X86::VSUBSSZrm_Int,       0 },
-    { X86::VSUBSDZrr,         X86::VSUBSDZrm,           0 },
-    { X86::VSUBSDZrr_Int,     X86::VSUBSDZrm_Int,       0 },
-    { X86::VMULPSZrr,         X86::VMULPSZrm,           0 },
-    { X86::VMULPDZrr,         X86::VMULPDZrm,           0 },
-    { X86::VMULSSZrr,         X86::VMULSSZrm,           0 },
-    { X86::VMULSSZrr_Int,     X86::VMULSSZrm_Int,       0 },
-    { X86::VMULSDZrr,         X86::VMULSDZrm,           0 },
-    { X86::VMULSDZrr_Int,     X86::VMULSDZrm_Int,       0 },
-    { X86::VDIVPSZrr,         X86::VDIVPSZrm,           0 },
+    { X86::VADDSDZrr_Int,     X86::VADDSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VADDSSZrr,         X86::VADDSSZrm,           0 },
+    { X86::VADDSSZrr_Int,     X86::VADDSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VALIGNDZrri,       X86::VALIGNDZrmi,         0 },
+    { X86::VALIGNQZrri,       X86::VALIGNQZrmi,         0 },
+    { X86::VANDNPDZrr,        X86::VANDNPDZrm,          0 },
+    { X86::VANDNPSZrr,        X86::VANDNPSZrm,          0 },
+    { X86::VANDPDZrr,         X86::VANDPDZrm,           0 },
+    { X86::VANDPSZrr,         X86::VANDPSZrm,           0 },
+    { X86::VBROADCASTSSZrkz,  X86::VBROADCASTSSZmkz,    TB_NO_REVERSE },
+    { X86::VBROADCASTSDZrkz,  X86::VBROADCASTSDZmkz,    TB_NO_REVERSE },
+    { X86::VCMPPDZrri,        X86::VCMPPDZrmi,          0 },
+    { X86::VCMPPSZrri,        X86::VCMPPSZrmi,          0 },
+    { X86::VCMPSDZrr,         X86::VCMPSDZrm,           0 },
+    { X86::VCMPSDZrr_Int,     X86::VCMPSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VCMPSSZrr,         X86::VCMPSSZrm,           0 },
+    { X86::VCMPSSZrr_Int,     X86::VCMPSSZrm_Int,       TB_NO_REVERSE },
     { X86::VDIVPDZrr,         X86::VDIVPDZrm,           0 },
-    { X86::VDIVSSZrr,         X86::VDIVSSZrm,           0 },
-    { X86::VDIVSSZrr_Int,     X86::VDIVSSZrm_Int,       0 },
+    { X86::VDIVPSZrr,         X86::VDIVPSZrm,           0 },
     { X86::VDIVSDZrr,         X86::VDIVSDZrm,           0 },
-    { X86::VDIVSDZrr_Int,     X86::VDIVSDZrm_Int,       0 },
-    { X86::VMINPSZrr,         X86::VMINPSZrm,           0 },
-    { X86::VMINPDZrr,         X86::VMINPDZrm,           0 },
-    { X86::VMAXPSZrr,         X86::VMAXPSZrm,           0 },
+    { X86::VDIVSDZrr_Int,     X86::VDIVSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VDIVSSZrr,         X86::VDIVSSZrm,           0 },
+    { X86::VDIVSSZrr_Int,     X86::VDIVSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VINSERTF32x4Zrr,   X86::VINSERTF32x4Zrm,     0 },
+    { X86::VINSERTF32x8Zrr,   X86::VINSERTF32x8Zrm,     0 },
+    { X86::VINSERTF64x2Zrr,   X86::VINSERTF64x2Zrm,     0 },
+    { X86::VINSERTF64x4Zrr,   X86::VINSERTF64x4Zrm,     0 },
+    { X86::VINSERTI32x4Zrr,   X86::VINSERTI32x4Zrm,     0 },
+    { X86::VINSERTI32x8Zrr,   X86::VINSERTI32x8Zrm,     0 },
+    { X86::VINSERTI64x2Zrr,   X86::VINSERTI64x2Zrm,     0 },
+    { X86::VINSERTI64x4Zrr,   X86::VINSERTI64x4Zrm,     0 },
+    { X86::VMAXCPDZrr,        X86::VMAXCPDZrm,          0 },
+    { X86::VMAXCPSZrr,        X86::VMAXCPSZrm,          0 },
+    { X86::VMAXCSDZrr,        X86::VMAXCSDZrm,          0 },
+    { X86::VMAXCSSZrr,        X86::VMAXCSSZrm,          0 },
     { X86::VMAXPDZrr,         X86::VMAXPDZrm,           0 },
+    { X86::VMAXPSZrr,         X86::VMAXPSZrm,           0 },
+    { X86::VMAXSDZrr,         X86::VMAXSDZrm,           0 },
+    { X86::VMAXSDZrr_Int,     X86::VMAXSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VMAXSSZrr,         X86::VMAXSSZrm,           0 },
+    { X86::VMAXSSZrr_Int,     X86::VMAXSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VMINCPDZrr,        X86::VMINCPDZrm,          0 },
+    { X86::VMINCPSZrr,        X86::VMINCPSZrm,          0 },
+    { X86::VMINCSDZrr,        X86::VMINCSDZrm,          0 },
+    { X86::VMINCSSZrr,        X86::VMINCSSZrm,          0 },
+    { X86::VMINPDZrr,         X86::VMINPDZrm,           0 },
+    { X86::VMINPSZrr,         X86::VMINPSZrm,           0 },
+    { X86::VMINSDZrr,         X86::VMINSDZrm,           0 },
+    { X86::VMINSDZrr_Int,     X86::VMINSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VMINSSZrr,         X86::VMINSSZrm,           0 },
+    { X86::VMINSSZrr_Int,     X86::VMINSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VMULPDZrr,         X86::VMULPDZrm,           0 },
+    { X86::VMULPSZrr,         X86::VMULPSZrm,           0 },
+    { X86::VMULSDZrr,         X86::VMULSDZrm,           0 },
+    { X86::VMULSDZrr_Int,     X86::VMULSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VMULSSZrr,         X86::VMULSSZrm,           0 },
+    { X86::VMULSSZrr_Int,     X86::VMULSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VORPDZrr,          X86::VORPDZrm,            0 },
+    { X86::VORPSZrr,          X86::VORPSZrm,            0 },
+    { X86::VPADDBZrr,         X86::VPADDBZrm,           0 },
     { X86::VPADDDZrr,         X86::VPADDDZrm,           0 },
     { X86::VPADDQZrr,         X86::VPADDQZrm,           0 },
-    { X86::VPERMPDZri,        X86::VPERMPDZmi,          0 },
+    { X86::VPADDSBZrr,        X86::VPADDSBZrm,          0 },
+    { X86::VPADDSWZrr,        X86::VPADDSWZrm,          0 },
+    { X86::VPADDUSBZrr,       X86::VPADDUSBZrm,         0 },
+    { X86::VPADDUSWZrr,       X86::VPADDUSWZrm,         0 },
+    { X86::VPADDWZrr,         X86::VPADDWZrm,           0 },
+    { X86::VPALIGNRZrri,      X86::VPALIGNRZrmi,        0 },
+    { X86::VPANDDZrr,         X86::VPANDDZrm,           0 },
+    { X86::VPANDNDZrr,        X86::VPANDNDZrm,          0 },
+    { X86::VPANDNQZrr,        X86::VPANDNQZrm,          0 },
+    { X86::VPANDQZrr,         X86::VPANDQZrm,           0 },
+    { X86::VPCMPBZrri,        X86::VPCMPBZrmi,          0 },
+    { X86::VPCMPDZrri,        X86::VPCMPDZrmi,          0 },
+    { X86::VPCMPEQBZrr,       X86::VPCMPEQBZrm,         0 },
+    { X86::VPCMPEQDZrr,       X86::VPCMPEQDZrm,         0 },
+    { X86::VPCMPEQQZrr,       X86::VPCMPEQQZrm,         0 },
+    { X86::VPCMPEQWZrr,       X86::VPCMPEQWZrm,         0 },
+    { X86::VPCMPGTBZrr,       X86::VPCMPGTBZrm,         0 },
+    { X86::VPCMPGTDZrr,       X86::VPCMPGTDZrm,         0 },
+    { X86::VPCMPGTQZrr,       X86::VPCMPGTQZrm,         0 },
+    { X86::VPCMPGTWZrr,       X86::VPCMPGTWZrm,         0 },
+    { X86::VPCMPQZrri,        X86::VPCMPQZrmi,          0 },
+    { X86::VPCMPUBZrri,       X86::VPCMPUBZrmi,         0 },
+    { X86::VPCMPUDZrri,       X86::VPCMPUDZrmi,         0 },
+    { X86::VPCMPUQZrri,       X86::VPCMPUQZrmi,         0 },
+    { X86::VPCMPUWZrri,       X86::VPCMPUWZrmi,         0 },
+    { X86::VPCMPWZrri,        X86::VPCMPWZrmi,          0 },
+    { X86::VPERMBZrr,         X86::VPERMBZrm,           0 },
+    { X86::VPERMDZrr,         X86::VPERMDZrm,           0 },
+    { X86::VPERMILPDZrr,      X86::VPERMILPDZrm,        0 },
+    { X86::VPERMILPSZrr,      X86::VPERMILPSZrm,        0 },
+    { X86::VPERMPDZrr,        X86::VPERMPDZrm,          0 },
     { X86::VPERMPSZrr,        X86::VPERMPSZrm,          0 },
+    { X86::VPERMQZrr,         X86::VPERMQZrm,           0 },
+    { X86::VPERMWZrr,         X86::VPERMWZrm,           0 },
+    { X86::VPMADDUBSWZrr,     X86::VPMADDUBSWZrm,       0 },
+    { X86::VPMADDWDZrr,       X86::VPMADDWDZrm,         0 },
     { X86::VPMAXSDZrr,        X86::VPMAXSDZrm,          0 },
     { X86::VPMAXSQZrr,        X86::VPMAXSQZrm,          0 },
     { X86::VPMAXUDZrr,        X86::VPMAXUDZrm,          0 },
@@ -1719,31 +1898,297 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPMINUDZrr,        X86::VPMINUDZrm,          0 },
     { X86::VPMINUQZrr,        X86::VPMINUQZrm,          0 },
     { X86::VPMULDQZrr,        X86::VPMULDQZrm,          0 },
+    { X86::VPMULUDQZrr,       X86::VPMULUDQZrm,         0 },
+    { X86::VPORDZrr,          X86::VPORDZrm,            0 },
+    { X86::VPORQZrr,          X86::VPORQZrm,            0 },
+    { X86::VPSHUFBZrr,        X86::VPSHUFBZrm,          0 },
     { X86::VPSLLVDZrr,        X86::VPSLLVDZrm,          0 },
     { X86::VPSLLVQZrr,        X86::VPSLLVQZrm,          0 },
     { X86::VPSRAVDZrr,        X86::VPSRAVDZrm,          0 },
     { X86::VPSRLVDZrr,        X86::VPSRLVDZrm,          0 },
     { X86::VPSRLVQZrr,        X86::VPSRLVQZrm,          0 },
+    { X86::VPSUBBZrr,         X86::VPSUBBZrm,           0 },
     { X86::VPSUBDZrr,         X86::VPSUBDZrm,           0 },
     { X86::VPSUBQZrr,         X86::VPSUBQZrm,           0 },
+    { X86::VPSUBSBZrr,        X86::VPSUBSBZrm,          0 },
+    { X86::VPSUBSWZrr,        X86::VPSUBSWZrm,          0 },
+    { X86::VPSUBUSBZrr,       X86::VPSUBUSBZrm,         0 },
+    { X86::VPSUBUSWZrr,       X86::VPSUBUSWZrm,         0 },
+    { X86::VPSUBWZrr,         X86::VPSUBWZrm,           0 },
+    { X86::VPUNPCKHBWZrr,     X86::VPUNPCKHBWZrm,       0 },
+    { X86::VPUNPCKHDQZrr,     X86::VPUNPCKHDQZrm,       0 },
+    { X86::VPUNPCKHQDQZrr,    X86::VPUNPCKHQDQZrm,      0 },
+    { X86::VPUNPCKHWDZrr,     X86::VPUNPCKHWDZrm,       0 },
+    { X86::VPUNPCKLBWZrr,     X86::VPUNPCKLBWZrm,       0 },
+    { X86::VPUNPCKLDQZrr,     X86::VPUNPCKLDQZrm,       0 },
+    { X86::VPUNPCKLQDQZrr,    X86::VPUNPCKLQDQZrm,      0 },
+    { X86::VPUNPCKLWDZrr,     X86::VPUNPCKLWDZrm,       0 },
+    { X86::VPXORDZrr,         X86::VPXORDZrm,           0 },
+    { X86::VPXORQZrr,         X86::VPXORQZrm,           0 },
     { X86::VSHUFPDZrri,       X86::VSHUFPDZrmi,         0 },
     { X86::VSHUFPSZrri,       X86::VSHUFPSZrmi,         0 },
-    { X86::VALIGNQZrri,       X86::VALIGNQZrmi,         0 },
-    { X86::VALIGNDZrri,       X86::VALIGNDZrmi,         0 },
-    { X86::VPMULUDQZrr,       X86::VPMULUDQZrm,         0 },
-    { X86::VBROADCASTSSZrkz,  X86::VBROADCASTSSZmkz,    TB_NO_REVERSE },
-    { X86::VBROADCASTSDZrkz,  X86::VBROADCASTSDZmkz,    TB_NO_REVERSE },
-
-    // AVX-512{F,VL} foldable instructions
-    { X86::VBROADCASTSSZ256rkz,  X86::VBROADCASTSSZ256mkz,      TB_NO_REVERSE },
-    { X86::VBROADCASTSDZ256rkz,  X86::VBROADCASTSDZ256mkz,      TB_NO_REVERSE },
-    { X86::VBROADCASTSSZ128rkz,  X86::VBROADCASTSSZ128mkz,      TB_NO_REVERSE },
+    { X86::VSUBPDZrr,         X86::VSUBPDZrm,           0 },
+    { X86::VSUBPSZrr,         X86::VSUBPSZrm,           0 },
+    { X86::VSUBSDZrr,         X86::VSUBSDZrm,           0 },
+    { X86::VSUBSDZrr_Int,     X86::VSUBSDZrm_Int,       TB_NO_REVERSE },
+    { X86::VSUBSSZrr,         X86::VSUBSSZrm,           0 },
+    { X86::VSUBSSZrr_Int,     X86::VSUBSSZrm_Int,       TB_NO_REVERSE },
+    { X86::VUNPCKHPDZrr,      X86::VUNPCKHPDZrm,        0 },
+    { X86::VUNPCKHPSZrr,      X86::VUNPCKHPSZrm,        0 },
+    { X86::VUNPCKLPDZrr,      X86::VUNPCKLPDZrm,        0 },
+    { X86::VUNPCKLPSZrr,      X86::VUNPCKLPSZrm,        0 },
+    { X86::VXORPDZrr,         X86::VXORPDZrm,           0 },
+    { X86::VXORPSZrr,         X86::VXORPSZrm,           0 },
 
     // AVX-512{F,VL} foldable instructions
     { X86::VADDPDZ128rr,      X86::VADDPDZ128rm,        0 },
     { X86::VADDPDZ256rr,      X86::VADDPDZ256rm,        0 },
     { X86::VADDPSZ128rr,      X86::VADDPSZ128rm,        0 },
     { X86::VADDPSZ256rr,      X86::VADDPSZ256rm,        0 },
+    { X86::VALIGNDZ128rri,    X86::VALIGNDZ128rmi,      0 },
+    { X86::VALIGNDZ256rri,    X86::VALIGNDZ256rmi,      0 },
+    { X86::VALIGNQZ128rri,    X86::VALIGNQZ128rmi,      0 },
+    { X86::VALIGNQZ256rri,    X86::VALIGNQZ256rmi,      0 },
+    { X86::VANDNPDZ128rr,     X86::VANDNPDZ128rm,       0 },
+    { X86::VANDNPDZ256rr,     X86::VANDNPDZ256rm,       0 },
+    { X86::VANDNPSZ128rr,     X86::VANDNPSZ128rm,       0 },
+    { X86::VANDNPSZ256rr,     X86::VANDNPSZ256rm,       0 },
+    { X86::VANDPDZ128rr,      X86::VANDPDZ128rm,        0 },
+    { X86::VANDPDZ256rr,      X86::VANDPDZ256rm,        0 },
+    { X86::VANDPSZ128rr,      X86::VANDPSZ128rm,        0 },
+    { X86::VANDPSZ256rr,      X86::VANDPSZ256rm,        0 },
+    { X86::VBROADCASTSSZ128rkz,  X86::VBROADCASTSSZ128mkz,      TB_NO_REVERSE },
+    { X86::VBROADCASTSSZ256rkz,  X86::VBROADCASTSSZ256mkz,      TB_NO_REVERSE },
+    { X86::VBROADCASTSDZ256rkz,  X86::VBROADCASTSDZ256mkz,      TB_NO_REVERSE },
+    { X86::VCMPPDZ128rri,     X86::VCMPPDZ128rmi,       0 },
+    { X86::VCMPPDZ256rri,     X86::VCMPPDZ256rmi,       0 },
+    { X86::VCMPPSZ128rri,     X86::VCMPPSZ128rmi,       0 },
+    { X86::VCMPPSZ256rri,     X86::VCMPPSZ256rmi,       0 },
+    { X86::VDIVPDZ128rr,      X86::VDIVPDZ128rm,        0 },
+    { X86::VDIVPDZ256rr,      X86::VDIVPDZ256rm,        0 },
+    { X86::VDIVPSZ128rr,      X86::VDIVPSZ128rm,        0 },
+    { X86::VDIVPSZ256rr,      X86::VDIVPSZ256rm,        0 },
+    { X86::VINSERTF32x4Z256rr,X86::VINSERTF32x4Z256rm,  0 },
+    { X86::VINSERTF64x2Z256rr,X86::VINSERTF64x2Z256rm,  0 },
+    { X86::VINSERTI32x4Z256rr,X86::VINSERTI32x4Z256rm,  0 },
+    { X86::VINSERTI64x2Z256rr,X86::VINSERTI64x2Z256rm,  0 },
+    { X86::VMAXCPDZ128rr,     X86::VMAXCPDZ128rm,       0 },
+    { X86::VMAXCPDZ256rr,     X86::VMAXCPDZ256rm,       0 },
+    { X86::VMAXCPSZ128rr,     X86::VMAXCPSZ128rm,       0 },
+    { X86::VMAXCPSZ256rr,     X86::VMAXCPSZ256rm,       0 },
+    { X86::VMAXPDZ128rr,      X86::VMAXPDZ128rm,        0 },
+    { X86::VMAXPDZ256rr,      X86::VMAXPDZ256rm,        0 },
+    { X86::VMAXPSZ128rr,      X86::VMAXPSZ128rm,        0 },
+    { X86::VMAXPSZ256rr,      X86::VMAXPSZ256rm,        0 },
+    { X86::VMINCPDZ128rr,     X86::VMINCPDZ128rm,       0 },
+    { X86::VMINCPDZ256rr,     X86::VMINCPDZ256rm,       0 },
+    { X86::VMINCPSZ128rr,     X86::VMINCPSZ128rm,       0 },
+    { X86::VMINCPSZ256rr,     X86::VMINCPSZ256rm,       0 },
+    { X86::VMINPDZ128rr,      X86::VMINPDZ128rm,        0 },
+    { X86::VMINPDZ256rr,      X86::VMINPDZ256rm,        0 },
+    { X86::VMINPSZ128rr,      X86::VMINPSZ128rm,        0 },
+    { X86::VMINPSZ256rr,      X86::VMINPSZ256rm,        0 },
+    { X86::VMULPDZ128rr,      X86::VMULPDZ128rm,        0 },
+    { X86::VMULPDZ256rr,      X86::VMULPDZ256rm,        0 },
+    { X86::VMULPSZ128rr,      X86::VMULPSZ128rm,        0 },
+    { X86::VMULPSZ256rr,      X86::VMULPSZ256rm,        0 },
+    { X86::VORPDZ128rr,       X86::VORPDZ128rm,         0 },
+    { X86::VORPDZ256rr,       X86::VORPDZ256rm,         0 },
+    { X86::VORPSZ128rr,       X86::VORPSZ128rm,         0 },
+    { X86::VORPSZ256rr,       X86::VORPSZ256rm,         0 },
+    { X86::VPADDBZ128rr,      X86::VPADDBZ128rm,        0 },
+    { X86::VPADDBZ256rr,      X86::VPADDBZ256rm,        0 },
+    { X86::VPADDDZ128rr,      X86::VPADDDZ128rm,        0 },
+    { X86::VPADDDZ256rr,      X86::VPADDDZ256rm,        0 },
+    { X86::VPADDQZ128rr,      X86::VPADDQZ128rm,        0 },
+    { X86::VPADDQZ256rr,      X86::VPADDQZ256rm,        0 },
+    { X86::VPADDSBZ128rr,     X86::VPADDSBZ128rm,       0 },
+    { X86::VPADDSBZ256rr,     X86::VPADDSBZ256rm,       0 },
+    { X86::VPADDSWZ128rr,     X86::VPADDSWZ128rm,       0 },
+    { X86::VPADDSWZ256rr,     X86::VPADDSWZ256rm,       0 },
+    { X86::VPADDUSBZ128rr,    X86::VPADDUSBZ128rm,      0 },
+    { X86::VPADDUSBZ256rr,    X86::VPADDUSBZ256rm,      0 },
+    { X86::VPADDUSWZ128rr,    X86::VPADDUSWZ128rm,      0 },
+    { X86::VPADDUSWZ256rr,    X86::VPADDUSWZ256rm,      0 },
+    { X86::VPADDWZ128rr,      X86::VPADDWZ128rm,        0 },
+    { X86::VPADDWZ256rr,      X86::VPADDWZ256rm,        0 },
+    { X86::VPALIGNRZ128rri,   X86::VPALIGNRZ128rmi,     0 },
+    { X86::VPALIGNRZ256rri,   X86::VPALIGNRZ256rmi,     0 },
+    { X86::VPANDDZ128rr,      X86::VPANDDZ128rm,        0 },
+    { X86::VPANDDZ256rr,      X86::VPANDDZ256rm,        0 },
+    { X86::VPANDNDZ128rr,     X86::VPANDNDZ128rm,       0 },
+    { X86::VPANDNDZ256rr,     X86::VPANDNDZ256rm,       0 },
+    { X86::VPANDNQZ128rr,     X86::VPANDNQZ128rm,       0 },
+    { X86::VPANDNQZ256rr,     X86::VPANDNQZ256rm,       0 },
+    { X86::VPANDQZ128rr,      X86::VPANDQZ128rm,        0 },
+    { X86::VPANDQZ256rr,      X86::VPANDQZ256rm,        0 },
+    { X86::VPCMPBZ128rri,     X86::VPCMPBZ128rmi,       0 },
+    { X86::VPCMPBZ256rri,     X86::VPCMPBZ256rmi,       0 },
+    { X86::VPCMPDZ128rri,     X86::VPCMPDZ128rmi,       0 },
+    { X86::VPCMPDZ256rri,     X86::VPCMPDZ256rmi,       0 },
+    { X86::VPCMPEQBZ128rr,    X86::VPCMPEQBZ128rm,      0 },
+    { X86::VPCMPEQBZ256rr,    X86::VPCMPEQBZ256rm,      0 },
+    { X86::VPCMPEQDZ128rr,    X86::VPCMPEQDZ128rm,      0 },
+    { X86::VPCMPEQDZ256rr,    X86::VPCMPEQDZ256rm,      0 },
+    { X86::VPCMPEQQZ128rr,    X86::VPCMPEQQZ128rm,      0 },
+    { X86::VPCMPEQQZ256rr,    X86::VPCMPEQQZ256rm,      0 },
+    { X86::VPCMPEQWZ128rr,    X86::VPCMPEQWZ128rm,      0 },
+    { X86::VPCMPEQWZ256rr,    X86::VPCMPEQWZ256rm,      0 },
+    { X86::VPCMPGTBZ128rr,    X86::VPCMPGTBZ128rm,      0 },
+    { X86::VPCMPGTBZ256rr,    X86::VPCMPGTBZ256rm,      0 },
+    { X86::VPCMPGTDZ128rr,    X86::VPCMPGTDZ128rm,      0 },
+    { X86::VPCMPGTDZ256rr,    X86::VPCMPGTDZ256rm,      0 },
+    { X86::VPCMPGTQZ128rr,    X86::VPCMPGTQZ128rm,      0 },
+    { X86::VPCMPGTQZ256rr,    X86::VPCMPGTQZ256rm,      0 },
+    { X86::VPCMPGTWZ128rr,    X86::VPCMPGTWZ128rm,      0 },
+    { X86::VPCMPGTWZ256rr,    X86::VPCMPGTWZ256rm,      0 },
+    { X86::VPCMPQZ128rri,     X86::VPCMPQZ128rmi,       0 },
+    { X86::VPCMPQZ256rri,     X86::VPCMPQZ256rmi,       0 },
+    { X86::VPCMPUBZ128rri,    X86::VPCMPUBZ128rmi,      0 },
+    { X86::VPCMPUBZ256rri,    X86::VPCMPUBZ256rmi,      0 },
+    { X86::VPCMPUDZ128rri,    X86::VPCMPUDZ128rmi,      0 },
+    { X86::VPCMPUDZ256rri,    X86::VPCMPUDZ256rmi,      0 },
+    { X86::VPCMPUQZ128rri,    X86::VPCMPUQZ128rmi,      0 },
+    { X86::VPCMPUQZ256rri,    X86::VPCMPUQZ256rmi,      0 },
+    { X86::VPCMPUWZ128rri,    X86::VPCMPUWZ128rmi,      0 },
+    { X86::VPCMPUWZ256rri,    X86::VPCMPUWZ256rmi,      0 },
+    { X86::VPCMPWZ128rri,     X86::VPCMPWZ128rmi,       0 },
+    { X86::VPCMPWZ256rri,     X86::VPCMPWZ256rmi,       0 },
+    { X86::VPERMBZ128rr,      X86::VPERMBZ128rm,        0 },
+    { X86::VPERMBZ256rr,      X86::VPERMBZ256rm,        0 },
+    { X86::VPERMDZ256rr,      X86::VPERMDZ256rm,        0 },
+    { X86::VPERMILPDZ128rr,   X86::VPERMILPDZ128rm,     0 },
+    { X86::VPERMILPDZ256rr,   X86::VPERMILPDZ256rm,     0 },
+    { X86::VPERMILPSZ128rr,   X86::VPERMILPSZ128rm,     0 },
+    { X86::VPERMILPSZ256rr,   X86::VPERMILPSZ256rm,     0 },
+    { X86::VPERMPDZ256rr,     X86::VPERMPDZ256rm,       0 },
+    { X86::VPERMPSZ256rr,     X86::VPERMPSZ256rm,       0 },
+    { X86::VPERMQZ256rr,      X86::VPERMQZ256rm,        0 },
+    { X86::VPERMWZ128rr,      X86::VPERMWZ128rm,        0 },
+    { X86::VPERMWZ256rr,      X86::VPERMWZ256rm,        0 },
+    { X86::VPMADDUBSWZ128rr,  X86::VPMADDUBSWZ128rm,    0 },
+    { X86::VPMADDUBSWZ256rr,  X86::VPMADDUBSWZ256rm,    0 },
+    { X86::VPMADDWDZ128rr,    X86::VPMADDWDZ128rm,      0 },
+    { X86::VPMADDWDZ256rr,    X86::VPMADDWDZ256rm,      0 },
+    { X86::VPORDZ128rr,       X86::VPORDZ128rm,         0 },
+    { X86::VPORDZ256rr,       X86::VPORDZ256rm,         0 },
+    { X86::VPORQZ128rr,       X86::VPORQZ128rm,         0 },
+    { X86::VPORQZ256rr,       X86::VPORQZ256rm,         0 },
+    { X86::VPSHUFBZ128rr,     X86::VPSHUFBZ128rm,       0 },
+    { X86::VPSHUFBZ256rr,     X86::VPSHUFBZ256rm,       0 },
+    { X86::VPSUBBZ128rr,      X86::VPSUBBZ128rm,        0 },
+    { X86::VPSUBBZ256rr,      X86::VPSUBBZ256rm,        0 },
+    { X86::VPSUBDZ128rr,      X86::VPSUBDZ128rm,        0 },
+    { X86::VPSUBDZ256rr,      X86::VPSUBDZ256rm,        0 },
+    { X86::VPSUBQZ128rr,      X86::VPSUBQZ128rm,        0 },
+    { X86::VPSUBQZ256rr,      X86::VPSUBQZ256rm,        0 },
+    { X86::VPSUBSBZ128rr,     X86::VPSUBSBZ128rm,       0 },
+    { X86::VPSUBSBZ256rr,     X86::VPSUBSBZ256rm,       0 },
+    { X86::VPSUBSWZ128rr,     X86::VPSUBSWZ128rm,       0 },
+    { X86::VPSUBSWZ256rr,     X86::VPSUBSWZ256rm,       0 },
+    { X86::VPSUBUSBZ128rr,    X86::VPSUBUSBZ128rm,      0 },
+    { X86::VPSUBUSBZ256rr,    X86::VPSUBUSBZ256rm,      0 },
+    { X86::VPSUBUSWZ128rr,    X86::VPSUBUSWZ128rm,      0 },
+    { X86::VPSUBUSWZ256rr,    X86::VPSUBUSWZ256rm,      0 },
+    { X86::VPSUBWZ128rr,      X86::VPSUBWZ128rm,        0 },
+    { X86::VPSUBWZ256rr,      X86::VPSUBWZ256rm,        0 },
+    { X86::VPUNPCKHBWZ128rr,  X86::VPUNPCKHBWZ128rm,    0 },
+    { X86::VPUNPCKHBWZ256rr,  X86::VPUNPCKHBWZ256rm,    0 },
+    { X86::VPUNPCKHDQZ128rr,  X86::VPUNPCKHDQZ128rm,    0 },
+    { X86::VPUNPCKHDQZ256rr,  X86::VPUNPCKHDQZ256rm,    0 },
+    { X86::VPUNPCKHQDQZ128rr, X86::VPUNPCKHQDQZ128rm,   0 },
+    { X86::VPUNPCKHQDQZ256rr, X86::VPUNPCKHQDQZ256rm,   0 },
+    { X86::VPUNPCKHWDZ128rr,  X86::VPUNPCKHWDZ128rm,    0 },
+    { X86::VPUNPCKHWDZ256rr,  X86::VPUNPCKHWDZ256rm,    0 },
+    { X86::VPUNPCKLBWZ128rr,  X86::VPUNPCKLBWZ128rm,    0 },
+    { X86::VPUNPCKLBWZ256rr,  X86::VPUNPCKLBWZ256rm,    0 },
+    { X86::VPUNPCKLDQZ128rr,  X86::VPUNPCKLDQZ128rm,    0 },
+    { X86::VPUNPCKLDQZ256rr,  X86::VPUNPCKLDQZ256rm,    0 },
+    { X86::VPUNPCKLQDQZ128rr, X86::VPUNPCKLQDQZ128rm,   0 },
+    { X86::VPUNPCKLQDQZ256rr, X86::VPUNPCKLQDQZ256rm,   0 },
+    { X86::VPUNPCKLWDZ128rr,  X86::VPUNPCKLWDZ128rm,    0 },
+    { X86::VPUNPCKLWDZ256rr,  X86::VPUNPCKLWDZ256rm,    0 },
+    { X86::VPXORDZ128rr,      X86::VPXORDZ128rm,        0 },
+    { X86::VPXORDZ256rr,      X86::VPXORDZ256rm,        0 },
+    { X86::VPXORQZ128rr,      X86::VPXORQZ128rm,        0 },
+    { X86::VPXORQZ256rr,      X86::VPXORQZ256rm,        0 },
+    { X86::VSUBPDZ128rr,      X86::VSUBPDZ128rm,        0 },
+    { X86::VSUBPDZ256rr,      X86::VSUBPDZ256rm,        0 },
+    { X86::VSUBPSZ128rr,      X86::VSUBPSZ128rm,        0 },
+    { X86::VSUBPSZ256rr,      X86::VSUBPSZ256rm,        0 },
+    { X86::VUNPCKHPDZ128rr,   X86::VUNPCKHPDZ128rm,     0 },
+    { X86::VUNPCKHPDZ256rr,   X86::VUNPCKHPDZ256rm,     0 },
+    { X86::VUNPCKHPSZ128rr,   X86::VUNPCKHPSZ128rm,     0 },
+    { X86::VUNPCKHPSZ256rr,   X86::VUNPCKHPSZ256rm,     0 },
+    { X86::VUNPCKLPDZ128rr,   X86::VUNPCKLPDZ128rm,     0 },
+    { X86::VUNPCKLPDZ256rr,   X86::VUNPCKLPDZ256rm,     0 },
+    { X86::VUNPCKLPSZ128rr,   X86::VUNPCKLPSZ128rm,     0 },
+    { X86::VUNPCKLPSZ256rr,   X86::VUNPCKLPSZ256rm,     0 },
+    { X86::VXORPDZ128rr,      X86::VXORPDZ128rm,        0 },
+    { X86::VXORPDZ256rr,      X86::VXORPDZ256rm,        0 },
+    { X86::VXORPSZ128rr,      X86::VXORPSZ128rm,        0 },
+    { X86::VXORPSZ256rr,      X86::VXORPSZ256rm,        0 },
+
+    // AVX-512 masked foldable instructions
+    { X86::VPERMILPDZrikz,    X86::VPERMILPDZmikz,      0 },
+    { X86::VPERMILPSZrikz,    X86::VPERMILPSZmikz,      0 },
+    { X86::VPERMPDZrikz,      X86::VPERMPDZmikz,        0 },
+    { X86::VPERMQZrikz,       X86::VPERMQZmikz,         0 },
+    { X86::VPMOVSXBDZrrkz,    X86::VPMOVSXBDZrmkz,      0 },
+    { X86::VPMOVSXBQZrrkz,    X86::VPMOVSXBQZrmkz,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZrrkz,    X86::VPMOVSXBWZrmkz,      0 },
+    { X86::VPMOVSXDQZrrkz,    X86::VPMOVSXDQZrmkz,      0 },
+    { X86::VPMOVSXWDZrrkz,    X86::VPMOVSXWDZrmkz,      0 },
+    { X86::VPMOVSXWQZrrkz,    X86::VPMOVSXWQZrmkz,      0 },
+    { X86::VPMOVZXBDZrrkz,    X86::VPMOVZXBDZrmkz,      0 },
+    { X86::VPMOVZXBQZrrkz,    X86::VPMOVZXBQZrmkz,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZrrkz,    X86::VPMOVZXBWZrmkz,      0 },
+    { X86::VPMOVZXDQZrrkz,    X86::VPMOVZXDQZrmkz,      0 },
+    { X86::VPMOVZXWDZrrkz,    X86::VPMOVZXWDZrmkz,      0 },
+    { X86::VPMOVZXWQZrrkz,    X86::VPMOVZXWQZrmkz,      0 },
+    { X86::VPSHUFDZrikz,      X86::VPSHUFDZmikz,        0 },
+    { X86::VPSHUFHWZrikz,     X86::VPSHUFHWZmikz,       0 },
+    { X86::VPSHUFLWZrikz,     X86::VPSHUFLWZmikz,       0 },
+
+    // AVX-512VL 256-bit masked foldable instructions
+    { X86::VPERMILPDZ256rikz, X86::VPERMILPDZ256mikz,   0 },
+    { X86::VPERMILPSZ256rikz, X86::VPERMILPSZ256mikz,   0 },
+    { X86::VPERMPDZ256rikz,   X86::VPERMPDZ256mikz,     0 },
+    { X86::VPERMQZ256rikz,    X86::VPERMQZ256mikz,      0 },
+    { X86::VPMOVSXBDZ256rrkz, X86::VPMOVSXBDZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ256rrkz, X86::VPMOVSXBQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ256rrkz, X86::VPMOVSXBWZ256rmkz,   0 },
+    { X86::VPMOVSXDQZ256rrkz, X86::VPMOVSXDQZ256rmkz,   0 },
+    { X86::VPMOVSXWDZ256rrkz, X86::VPMOVSXWDZ256rmkz,   0 },
+    { X86::VPMOVSXWQZ256rrkz, X86::VPMOVSXWQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ256rrkz, X86::VPMOVZXBDZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ256rrkz, X86::VPMOVZXBQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ256rrkz, X86::VPMOVZXBWZ256rmkz,   0 },
+    { X86::VPMOVZXDQZ256rrkz, X86::VPMOVZXDQZ256rmkz,   0 },
+    { X86::VPMOVZXWDZ256rrkz, X86::VPMOVZXWDZ256rmkz,   0 },
+    { X86::VPMOVZXWQZ256rrkz, X86::VPMOVZXWQZ256rmkz,   TB_NO_REVERSE },
+    { X86::VPSHUFDZ256rikz,   X86::VPSHUFDZ256mikz,     0 },
+    { X86::VPSHUFHWZ256rikz,  X86::VPSHUFHWZ256mikz,    0 },
+    { X86::VPSHUFLWZ256rikz,  X86::VPSHUFLWZ256mikz,    0 },
+
+    // AVX-512VL 128-bit masked foldable instructions
+    { X86::VPERMILPDZ128rikz, X86::VPERMILPDZ128mikz,   0 },
+    { X86::VPERMILPSZ128rikz, X86::VPERMILPSZ128mikz,   0 },
+    { X86::VPMOVSXBDZ128rrkz, X86::VPMOVSXBDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ128rrkz, X86::VPMOVSXBQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ128rrkz, X86::VPMOVSXBWZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXDQZ128rrkz, X86::VPMOVSXDQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXWDZ128rrkz, X86::VPMOVSXWDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVSXWQZ128rrkz, X86::VPMOVSXWQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ128rrkz, X86::VPMOVZXBDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ128rrkz, X86::VPMOVZXBQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ128rrkz, X86::VPMOVZXBWZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXDQZ128rrkz, X86::VPMOVZXDQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXWDZ128rrkz, X86::VPMOVZXWDZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPMOVZXWQZ128rrkz, X86::VPMOVZXWQZ128rmkz,   TB_NO_REVERSE },
+    { X86::VPSHUFDZ128rikz,   X86::VPSHUFDZ128mikz,     0 },
+    { X86::VPSHUFHWZ128rikz,  X86::VPSHUFHWZ128mikz,    0 },
+    { X86::VPSHUFLWZ128rikz,  X86::VPSHUFLWZ128mikz,    0 },
 
     // AES foldable instructions
     { X86::AESDECLASTrr,      X86::AESDECLASTrm,        TB_ALIGN_16 },
@@ -1773,170 +2218,47 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
   }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable3[] = {
-    // FMA foldable instructions
-    { X86::VFMADDSSr231r,         X86::VFMADDSSr231m,         TB_ALIGN_NONE },
-    { X86::VFMADDSSr231r_Int,     X86::VFMADDSSr231m_Int,     TB_ALIGN_NONE },
-    { X86::VFMADDSDr231r,         X86::VFMADDSDr231m,         TB_ALIGN_NONE },
-    { X86::VFMADDSDr231r_Int,     X86::VFMADDSDr231m_Int,     TB_ALIGN_NONE },
-    { X86::VFMADDSSr132r,         X86::VFMADDSSr132m,         TB_ALIGN_NONE },
-    { X86::VFMADDSSr132r_Int,     X86::VFMADDSSr132m_Int,     TB_ALIGN_NONE },
-    { X86::VFMADDSDr132r,         X86::VFMADDSDr132m,         TB_ALIGN_NONE },
-    { X86::VFMADDSDr132r_Int,     X86::VFMADDSDr132m_Int,     TB_ALIGN_NONE },
-    { X86::VFMADDSSr213r,         X86::VFMADDSSr213m,         TB_ALIGN_NONE },
-    { X86::VFMADDSSr213r_Int,     X86::VFMADDSSr213m_Int,     TB_ALIGN_NONE },
-    { X86::VFMADDSDr213r,         X86::VFMADDSDr213m,         TB_ALIGN_NONE },
-    { X86::VFMADDSDr213r_Int,     X86::VFMADDSDr213m_Int,     TB_ALIGN_NONE },
-
-    { X86::VFMADDPSr231r,         X86::VFMADDPSr231m,         TB_ALIGN_NONE },
-    { X86::VFMADDPDr231r,         X86::VFMADDPDr231m,         TB_ALIGN_NONE },
-    { X86::VFMADDPSr132r,         X86::VFMADDPSr132m,         TB_ALIGN_NONE },
-    { X86::VFMADDPDr132r,         X86::VFMADDPDr132m,         TB_ALIGN_NONE },
-    { X86::VFMADDPSr213r,         X86::VFMADDPSr213m,         TB_ALIGN_NONE },
-    { X86::VFMADDPDr213r,         X86::VFMADDPDr213m,         TB_ALIGN_NONE },
-    { X86::VFMADDPSr231rY,        X86::VFMADDPSr231mY,        TB_ALIGN_NONE },
-    { X86::VFMADDPDr231rY,        X86::VFMADDPDr231mY,        TB_ALIGN_NONE },
-    { X86::VFMADDPSr132rY,        X86::VFMADDPSr132mY,        TB_ALIGN_NONE },
-    { X86::VFMADDPDr132rY,        X86::VFMADDPDr132mY,        TB_ALIGN_NONE },
-    { X86::VFMADDPSr213rY,        X86::VFMADDPSr213mY,        TB_ALIGN_NONE },
-    { X86::VFMADDPDr213rY,        X86::VFMADDPDr213mY,        TB_ALIGN_NONE },
-
-    { X86::VFNMADDSSr231r,        X86::VFNMADDSSr231m,        TB_ALIGN_NONE },
-    { X86::VFNMADDSSr231r_Int,    X86::VFNMADDSSr231m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMADDSDr231r,        X86::VFNMADDSDr231m,        TB_ALIGN_NONE },
-    { X86::VFNMADDSDr231r_Int,    X86::VFNMADDSDr231m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMADDSSr132r,        X86::VFNMADDSSr132m,        TB_ALIGN_NONE },
-    { X86::VFNMADDSSr132r_Int,    X86::VFNMADDSSr132m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMADDSDr132r,        X86::VFNMADDSDr132m,        TB_ALIGN_NONE },
-    { X86::VFNMADDSDr132r_Int,    X86::VFNMADDSDr132m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMADDSSr213r,        X86::VFNMADDSSr213m,        TB_ALIGN_NONE },
-    { X86::VFNMADDSSr213r_Int,    X86::VFNMADDSSr213m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMADDSDr213r,        X86::VFNMADDSDr213m,        TB_ALIGN_NONE },
-    { X86::VFNMADDSDr213r_Int,    X86::VFNMADDSDr213m_Int,    TB_ALIGN_NONE },
-
-    { X86::VFNMADDPSr231r,        X86::VFNMADDPSr231m,        TB_ALIGN_NONE },
-    { X86::VFNMADDPDr231r,        X86::VFNMADDPDr231m,        TB_ALIGN_NONE },
-    { X86::VFNMADDPSr132r,        X86::VFNMADDPSr132m,        TB_ALIGN_NONE },
-    { X86::VFNMADDPDr132r,        X86::VFNMADDPDr132m,        TB_ALIGN_NONE },
-    { X86::VFNMADDPSr213r,        X86::VFNMADDPSr213m,        TB_ALIGN_NONE },
-    { X86::VFNMADDPDr213r,        X86::VFNMADDPDr213m,        TB_ALIGN_NONE },
-    { X86::VFNMADDPSr231rY,       X86::VFNMADDPSr231mY,       TB_ALIGN_NONE },
-    { X86::VFNMADDPDr231rY,       X86::VFNMADDPDr231mY,       TB_ALIGN_NONE },
-    { X86::VFNMADDPSr132rY,       X86::VFNMADDPSr132mY,       TB_ALIGN_NONE },
-    { X86::VFNMADDPDr132rY,       X86::VFNMADDPDr132mY,       TB_ALIGN_NONE },
-    { X86::VFNMADDPSr213rY,       X86::VFNMADDPSr213mY,       TB_ALIGN_NONE },
-    { X86::VFNMADDPDr213rY,       X86::VFNMADDPDr213mY,       TB_ALIGN_NONE },
-
-    { X86::VFMSUBSSr231r,         X86::VFMSUBSSr231m,         TB_ALIGN_NONE },
-    { X86::VFMSUBSSr231r_Int,     X86::VFMSUBSSr231m_Int,     TB_ALIGN_NONE },
-    { X86::VFMSUBSDr231r,         X86::VFMSUBSDr231m,         TB_ALIGN_NONE },
-    { X86::VFMSUBSDr231r_Int,     X86::VFMSUBSDr231m_Int,     TB_ALIGN_NONE },
-    { X86::VFMSUBSSr132r,         X86::VFMSUBSSr132m,         TB_ALIGN_NONE },
-    { X86::VFMSUBSSr132r_Int,     X86::VFMSUBSSr132m_Int,     TB_ALIGN_NONE },
-    { X86::VFMSUBSDr132r,         X86::VFMSUBSDr132m,         TB_ALIGN_NONE },
-    { X86::VFMSUBSDr132r_Int,     X86::VFMSUBSDr132m_Int,     TB_ALIGN_NONE },
-    { X86::VFMSUBSSr213r,         X86::VFMSUBSSr213m,         TB_ALIGN_NONE },
-    { X86::VFMSUBSSr213r_Int,     X86::VFMSUBSSr213m_Int,     TB_ALIGN_NONE },
-    { X86::VFMSUBSDr213r,         X86::VFMSUBSDr213m,         TB_ALIGN_NONE },
-    { X86::VFMSUBSDr213r_Int,     X86::VFMSUBSDr213m_Int,     TB_ALIGN_NONE },
-
-    { X86::VFMSUBPSr231r,         X86::VFMSUBPSr231m,         TB_ALIGN_NONE },
-    { X86::VFMSUBPDr231r,         X86::VFMSUBPDr231m,         TB_ALIGN_NONE },
-    { X86::VFMSUBPSr132r,         X86::VFMSUBPSr132m,         TB_ALIGN_NONE },
-    { X86::VFMSUBPDr132r,         X86::VFMSUBPDr132m,         TB_ALIGN_NONE },
-    { X86::VFMSUBPSr213r,         X86::VFMSUBPSr213m,         TB_ALIGN_NONE },
-    { X86::VFMSUBPDr213r,         X86::VFMSUBPDr213m,         TB_ALIGN_NONE },
-    { X86::VFMSUBPSr231rY,        X86::VFMSUBPSr231mY,        TB_ALIGN_NONE },
-    { X86::VFMSUBPDr231rY,        X86::VFMSUBPDr231mY,        TB_ALIGN_NONE },
-    { X86::VFMSUBPSr132rY,        X86::VFMSUBPSr132mY,        TB_ALIGN_NONE },
-    { X86::VFMSUBPDr132rY,        X86::VFMSUBPDr132mY,        TB_ALIGN_NONE },
-    { X86::VFMSUBPSr213rY,        X86::VFMSUBPSr213mY,        TB_ALIGN_NONE },
-    { X86::VFMSUBPDr213rY,        X86::VFMSUBPDr213mY,        TB_ALIGN_NONE },
-
-    { X86::VFNMSUBSSr231r,        X86::VFNMSUBSSr231m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBSSr231r_Int,    X86::VFNMSUBSSr231m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMSUBSDr231r,        X86::VFNMSUBSDr231m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBSDr231r_Int,    X86::VFNMSUBSDr231m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMSUBSSr132r,        X86::VFNMSUBSSr132m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBSSr132r_Int,    X86::VFNMSUBSSr132m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMSUBSDr132r,        X86::VFNMSUBSDr132m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBSDr132r_Int,    X86::VFNMSUBSDr132m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMSUBSSr213r,        X86::VFNMSUBSSr213m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBSSr213r_Int,    X86::VFNMSUBSSr213m_Int,    TB_ALIGN_NONE },
-    { X86::VFNMSUBSDr213r,        X86::VFNMSUBSDr213m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBSDr213r_Int,    X86::VFNMSUBSDr213m_Int,    TB_ALIGN_NONE },
-
-    { X86::VFNMSUBPSr231r,        X86::VFNMSUBPSr231m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBPDr231r,        X86::VFNMSUBPDr231m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBPSr132r,        X86::VFNMSUBPSr132m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBPDr132r,        X86::VFNMSUBPDr132m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBPSr213r,        X86::VFNMSUBPSr213m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBPDr213r,        X86::VFNMSUBPDr213m,        TB_ALIGN_NONE },
-    { X86::VFNMSUBPSr231rY,       X86::VFNMSUBPSr231mY,       TB_ALIGN_NONE },
-    { X86::VFNMSUBPDr231rY,       X86::VFNMSUBPDr231mY,       TB_ALIGN_NONE },
-    { X86::VFNMSUBPSr132rY,       X86::VFNMSUBPSr132mY,       TB_ALIGN_NONE },
-    { X86::VFNMSUBPDr132rY,       X86::VFNMSUBPDr132mY,       TB_ALIGN_NONE },
-    { X86::VFNMSUBPSr213rY,       X86::VFNMSUBPSr213mY,       TB_ALIGN_NONE },
-    { X86::VFNMSUBPDr213rY,       X86::VFNMSUBPDr213mY,       TB_ALIGN_NONE },
-
-    { X86::VFMADDSUBPSr231r,      X86::VFMADDSUBPSr231m,      TB_ALIGN_NONE },
-    { X86::VFMADDSUBPDr231r,      X86::VFMADDSUBPDr231m,      TB_ALIGN_NONE },
-    { X86::VFMADDSUBPSr132r,      X86::VFMADDSUBPSr132m,      TB_ALIGN_NONE },
-    { X86::VFMADDSUBPDr132r,      X86::VFMADDSUBPDr132m,      TB_ALIGN_NONE },
-    { X86::VFMADDSUBPSr213r,      X86::VFMADDSUBPSr213m,      TB_ALIGN_NONE },
-    { X86::VFMADDSUBPDr213r,      X86::VFMADDSUBPDr213m,      TB_ALIGN_NONE },
-    { X86::VFMADDSUBPSr231rY,     X86::VFMADDSUBPSr231mY,     TB_ALIGN_NONE },
-    { X86::VFMADDSUBPDr231rY,     X86::VFMADDSUBPDr231mY,     TB_ALIGN_NONE },
-    { X86::VFMADDSUBPSr132rY,     X86::VFMADDSUBPSr132mY,     TB_ALIGN_NONE },
-    { X86::VFMADDSUBPDr132rY,     X86::VFMADDSUBPDr132mY,     TB_ALIGN_NONE },
-    { X86::VFMADDSUBPSr213rY,     X86::VFMADDSUBPSr213mY,     TB_ALIGN_NONE },
-    { X86::VFMADDSUBPDr213rY,     X86::VFMADDSUBPDr213mY,     TB_ALIGN_NONE },
-
-    { X86::VFMSUBADDPSr231r,      X86::VFMSUBADDPSr231m,      TB_ALIGN_NONE },
-    { X86::VFMSUBADDPDr231r,      X86::VFMSUBADDPDr231m,      TB_ALIGN_NONE },
-    { X86::VFMSUBADDPSr132r,      X86::VFMSUBADDPSr132m,      TB_ALIGN_NONE },
-    { X86::VFMSUBADDPDr132r,      X86::VFMSUBADDPDr132m,      TB_ALIGN_NONE },
-    { X86::VFMSUBADDPSr213r,      X86::VFMSUBADDPSr213m,      TB_ALIGN_NONE },
-    { X86::VFMSUBADDPDr213r,      X86::VFMSUBADDPDr213m,      TB_ALIGN_NONE },
-    { X86::VFMSUBADDPSr231rY,     X86::VFMSUBADDPSr231mY,     TB_ALIGN_NONE },
-    { X86::VFMSUBADDPDr231rY,     X86::VFMSUBADDPDr231mY,     TB_ALIGN_NONE },
-    { X86::VFMSUBADDPSr132rY,     X86::VFMSUBADDPSr132mY,     TB_ALIGN_NONE },
-    { X86::VFMSUBADDPDr132rY,     X86::VFMSUBADDPDr132mY,     TB_ALIGN_NONE },
-    { X86::VFMSUBADDPSr213rY,     X86::VFMSUBADDPSr213mY,     TB_ALIGN_NONE },
-    { X86::VFMSUBADDPDr213rY,     X86::VFMSUBADDPDr213mY,     TB_ALIGN_NONE },
-
     // FMA4 foldable patterns
     { X86::VFMADDSS4rr,           X86::VFMADDSS4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDSS4rr_Int,       X86::VFMADDSS4rm_Int,       TB_NO_REVERSE },
     { X86::VFMADDSD4rr,           X86::VFMADDSD4rm,           TB_ALIGN_NONE },
+    { X86::VFMADDSD4rr_Int,       X86::VFMADDSD4rm_Int,       TB_NO_REVERSE },
     { X86::VFMADDPS4rr,           X86::VFMADDPS4rm,           TB_ALIGN_NONE },
     { X86::VFMADDPD4rr,           X86::VFMADDPD4rm,           TB_ALIGN_NONE },
-    { X86::VFMADDPS4rrY,          X86::VFMADDPS4rmY,          TB_ALIGN_NONE },
-    { X86::VFMADDPD4rrY,          X86::VFMADDPD4rmY,          TB_ALIGN_NONE },
+    { X86::VFMADDPS4Yrr,          X86::VFMADDPS4Yrm,          TB_ALIGN_NONE },
+    { X86::VFMADDPD4Yrr,          X86::VFMADDPD4Yrm,          TB_ALIGN_NONE },
     { X86::VFNMADDSS4rr,          X86::VFNMADDSS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDSS4rr_Int,      X86::VFNMADDSS4rm_Int,      TB_NO_REVERSE },
     { X86::VFNMADDSD4rr,          X86::VFNMADDSD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMADDSD4rr_Int,      X86::VFNMADDSD4rm_Int,      TB_NO_REVERSE },
     { X86::VFNMADDPS4rr,          X86::VFNMADDPS4rm,          TB_ALIGN_NONE },
     { X86::VFNMADDPD4rr,          X86::VFNMADDPD4rm,          TB_ALIGN_NONE },
-    { X86::VFNMADDPS4rrY,         X86::VFNMADDPS4rmY,         TB_ALIGN_NONE },
-    { X86::VFNMADDPD4rrY,         X86::VFNMADDPD4rmY,         TB_ALIGN_NONE },
+    { X86::VFNMADDPS4Yrr,         X86::VFNMADDPS4Yrm,         TB_ALIGN_NONE },
+    { X86::VFNMADDPD4Yrr,         X86::VFNMADDPD4Yrm,         TB_ALIGN_NONE },
     { X86::VFMSUBSS4rr,           X86::VFMSUBSS4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBSS4rr_Int,       X86::VFMSUBSS4rm_Int,       TB_NO_REVERSE },
     { X86::VFMSUBSD4rr,           X86::VFMSUBSD4rm,           TB_ALIGN_NONE },
+    { X86::VFMSUBSD4rr_Int,       X86::VFMSUBSD4rm_Int,       TB_NO_REVERSE },
     { X86::VFMSUBPS4rr,           X86::VFMSUBPS4rm,           TB_ALIGN_NONE },
     { X86::VFMSUBPD4rr,           X86::VFMSUBPD4rm,           TB_ALIGN_NONE },
-    { X86::VFMSUBPS4rrY,          X86::VFMSUBPS4rmY,          TB_ALIGN_NONE },
-    { X86::VFMSUBPD4rrY,          X86::VFMSUBPD4rmY,          TB_ALIGN_NONE },
+    { X86::VFMSUBPS4Yrr,          X86::VFMSUBPS4Yrm,          TB_ALIGN_NONE },
+    { X86::VFMSUBPD4Yrr,          X86::VFMSUBPD4Yrm,          TB_ALIGN_NONE },
     { X86::VFNMSUBSS4rr,          X86::VFNMSUBSS4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSS4rr_Int,      X86::VFNMSUBSS4rm_Int,      TB_NO_REVERSE },
     { X86::VFNMSUBSD4rr,          X86::VFNMSUBSD4rm,          TB_ALIGN_NONE },
+    { X86::VFNMSUBSD4rr_Int,      X86::VFNMSUBSD4rm_Int,      TB_NO_REVERSE },
     { X86::VFNMSUBPS4rr,          X86::VFNMSUBPS4rm,          TB_ALIGN_NONE },
     { X86::VFNMSUBPD4rr,          X86::VFNMSUBPD4rm,          TB_ALIGN_NONE },
-    { X86::VFNMSUBPS4rrY,         X86::VFNMSUBPS4rmY,         TB_ALIGN_NONE },
-    { X86::VFNMSUBPD4rrY,         X86::VFNMSUBPD4rmY,         TB_ALIGN_NONE },
+    { X86::VFNMSUBPS4Yrr,         X86::VFNMSUBPS4Yrm,         TB_ALIGN_NONE },
+    { X86::VFNMSUBPD4Yrr,         X86::VFNMSUBPD4Yrm,         TB_ALIGN_NONE },
     { X86::VFMADDSUBPS4rr,        X86::VFMADDSUBPS4rm,        TB_ALIGN_NONE },
     { X86::VFMADDSUBPD4rr,        X86::VFMADDSUBPD4rm,        TB_ALIGN_NONE },
-    { X86::VFMADDSUBPS4rrY,       X86::VFMADDSUBPS4rmY,       TB_ALIGN_NONE },
-    { X86::VFMADDSUBPD4rrY,       X86::VFMADDSUBPD4rmY,       TB_ALIGN_NONE },
+    { X86::VFMADDSUBPS4Yrr,       X86::VFMADDSUBPS4Yrm,       TB_ALIGN_NONE },
+    { X86::VFMADDSUBPD4Yrr,       X86::VFMADDSUBPD4Yrm,       TB_ALIGN_NONE },
     { X86::VFMSUBADDPS4rr,        X86::VFMSUBADDPS4rm,        TB_ALIGN_NONE },
     { X86::VFMSUBADDPD4rr,        X86::VFMSUBADDPD4rm,        TB_ALIGN_NONE },
-    { X86::VFMSUBADDPS4rrY,       X86::VFMSUBADDPS4rmY,       TB_ALIGN_NONE },
-    { X86::VFMSUBADDPD4rrY,       X86::VFMSUBADDPD4rmY,       TB_ALIGN_NONE },
+    { X86::VFMSUBADDPS4Yrr,       X86::VFMSUBADDPS4Yrm,       TB_ALIGN_NONE },
+    { X86::VFMSUBADDPD4Yrr,       X86::VFMSUBADDPD4Yrm,       TB_ALIGN_NONE },
 
     // XOP foldable instructions
     { X86::VPCMOVrrr,             X86::VPCMOVrrm,             0 },
@@ -1947,11 +2269,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VPERMIL2PSrrY,         X86::VPERMIL2PSrmY,         0 },
     { X86::VPPERMrrr,             X86::VPPERMrrm,             0 },
 
-    // AVX-512 VPERMI instructions with 3 source operands.
-    { X86::VPERMI2Drr,            X86::VPERMI2Drm,            0 },
-    { X86::VPERMI2Qrr,            X86::VPERMI2Qrm,            0 },
-    { X86::VPERMI2PSrr,           X86::VPERMI2PSrm,           0 },
-    { X86::VPERMI2PDrr,           X86::VPERMI2PDrm,           0 },
+    // AVX-512 instructions with 3 source operands.
     { X86::VBLENDMPDZrr,          X86::VBLENDMPDZrm,          0 },
     { X86::VBLENDMPSZrr,          X86::VBLENDMPSZrm,          0 },
     { X86::VPBLENDMDZrr,          X86::VPBLENDMDZrm,          0 },
@@ -1961,45 +2279,349 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
     { X86::VBROADCASTSSZ256rk,    X86::VBROADCASTSSZ256mk,    TB_NO_REVERSE },
     { X86::VBROADCASTSDZ256rk,    X86::VBROADCASTSDZ256mk,    TB_NO_REVERSE },
     { X86::VBROADCASTSSZ128rk,    X86::VBROADCASTSSZ128mk,    TB_NO_REVERSE },
-     // AVX-512 arithmetic instructions
-    { X86::VADDPSZrrkz,           X86::VADDPSZrmkz,           0 },
+    { X86::VPERMI2Brr,            X86::VPERMI2Brm,            0 },
+    { X86::VPERMI2Drr,            X86::VPERMI2Drm,            0 },
+    { X86::VPERMI2PSrr,           X86::VPERMI2PSrm,           0 },
+    { X86::VPERMI2PDrr,           X86::VPERMI2PDrm,           0 },
+    { X86::VPERMI2Qrr,            X86::VPERMI2Qrm,            0 },
+    { X86::VPERMI2Wrr,            X86::VPERMI2Wrm,            0 },
+    { X86::VPERMT2Brr,            X86::VPERMT2Brm,            0 },
+    { X86::VPERMT2Drr,            X86::VPERMT2Drm,            0 },
+    { X86::VPERMT2PSrr,           X86::VPERMT2PSrm,           0 },
+    { X86::VPERMT2PDrr,           X86::VPERMT2PDrm,           0 },
+    { X86::VPERMT2Qrr,            X86::VPERMT2Qrm,            0 },
+    { X86::VPERMT2Wrr,            X86::VPERMT2Wrm,            0 },
+    { X86::VPTERNLOGDZrri,        X86::VPTERNLOGDZrmi,        0 },
+    { X86::VPTERNLOGQZrri,        X86::VPTERNLOGQZrmi,        0 },
+
+    // AVX-512VL 256-bit instructions with 3 source operands.
+    { X86::VPERMI2B256rr,         X86::VPERMI2B256rm,         0 },
+    { X86::VPERMI2D256rr,         X86::VPERMI2D256rm,         0 },
+    { X86::VPERMI2PD256rr,        X86::VPERMI2PD256rm,        0 },
+    { X86::VPERMI2PS256rr,        X86::VPERMI2PS256rm,        0 },
+    { X86::VPERMI2Q256rr,         X86::VPERMI2Q256rm,         0 },
+    { X86::VPERMI2W256rr,         X86::VPERMI2W256rm,         0 },
+    { X86::VPERMT2B256rr,         X86::VPERMT2B256rm,         0 },
+    { X86::VPERMT2D256rr,         X86::VPERMT2D256rm,         0 },
+    { X86::VPERMT2PD256rr,        X86::VPERMT2PD256rm,        0 },
+    { X86::VPERMT2PS256rr,        X86::VPERMT2PS256rm,        0 },
+    { X86::VPERMT2Q256rr,         X86::VPERMT2Q256rm,         0 },
+    { X86::VPERMT2W256rr,         X86::VPERMT2W256rm,         0 },
+    { X86::VPTERNLOGDZ256rri,     X86::VPTERNLOGDZ256rmi,     0 },
+    { X86::VPTERNLOGQZ256rri,     X86::VPTERNLOGQZ256rmi,     0 },
+
+    // AVX-512VL 128-bit instructions with 3 source operands.
+    { X86::VPERMI2B128rr,         X86::VPERMI2B128rm,         0 },
+    { X86::VPERMI2D128rr,         X86::VPERMI2D128rm,         0 },
+    { X86::VPERMI2PD128rr,        X86::VPERMI2PD128rm,        0 },
+    { X86::VPERMI2PS128rr,        X86::VPERMI2PS128rm,        0 },
+    { X86::VPERMI2Q128rr,         X86::VPERMI2Q128rm,         0 },
+    { X86::VPERMI2W128rr,         X86::VPERMI2W128rm,         0 },
+    { X86::VPERMT2B128rr,         X86::VPERMT2B128rm,         0 },
+    { X86::VPERMT2D128rr,         X86::VPERMT2D128rm,         0 },
+    { X86::VPERMT2PD128rr,        X86::VPERMT2PD128rm,        0 },
+    { X86::VPERMT2PS128rr,        X86::VPERMT2PS128rm,        0 },
+    { X86::VPERMT2Q128rr,         X86::VPERMT2Q128rm,         0 },
+    { X86::VPERMT2W128rr,         X86::VPERMT2W128rm,         0 },
+    { X86::VPTERNLOGDZ128rri,     X86::VPTERNLOGDZ128rmi,     0 },
+    { X86::VPTERNLOGQZ128rri,     X86::VPTERNLOGQZ128rmi,     0 },
+
+    // AVX-512 masked instructions
     { X86::VADDPDZrrkz,           X86::VADDPDZrmkz,           0 },
-    { X86::VSUBPSZrrkz,           X86::VSUBPSZrmkz,           0 },
-    { X86::VSUBPDZrrkz,           X86::VSUBPDZrmkz,           0 },
-    { X86::VMULPSZrrkz,           X86::VMULPSZrmkz,           0 },
-    { X86::VMULPDZrrkz,           X86::VMULPDZrmkz,           0 },
-    { X86::VDIVPSZrrkz,           X86::VDIVPSZrmkz,           0 },
+    { X86::VADDPSZrrkz,           X86::VADDPSZrmkz,           0 },
+    { X86::VALIGNDZrrikz,         X86::VALIGNDZrmikz,         0 },
+    { X86::VALIGNQZrrikz,         X86::VALIGNQZrmikz,         0 },
+    { X86::VANDNPDZrrkz,          X86::VANDNPDZrmkz,          0 },
+    { X86::VANDNPSZrrkz,          X86::VANDNPSZrmkz,          0 },
+    { X86::VANDPDZrrkz,           X86::VANDPDZrmkz,           0 },
+    { X86::VANDPSZrrkz,           X86::VANDPSZrmkz,           0 },
     { X86::VDIVPDZrrkz,           X86::VDIVPDZrmkz,           0 },
-    { X86::VMINPSZrrkz,           X86::VMINPSZrmkz,           0 },
-    { X86::VMINPDZrrkz,           X86::VMINPDZrmkz,           0 },
-    { X86::VMAXPSZrrkz,           X86::VMAXPSZrmkz,           0 },
+    { X86::VDIVPSZrrkz,           X86::VDIVPSZrmkz,           0 },
+    { X86::VINSERTF32x4Zrrkz,     X86::VINSERTF32x4Zrmkz,     0 },
+    { X86::VINSERTF32x8Zrrkz,     X86::VINSERTF32x8Zrmkz,     0 },
+    { X86::VINSERTF64x2Zrrkz,     X86::VINSERTF64x2Zrmkz,     0 },
+    { X86::VINSERTF64x4Zrrkz,     X86::VINSERTF64x4Zrmkz,     0 },
+    { X86::VINSERTI32x4Zrrkz,     X86::VINSERTI32x4Zrmkz,     0 },
+    { X86::VINSERTI32x8Zrrkz,     X86::VINSERTI32x8Zrmkz,     0 },
+    { X86::VINSERTI64x2Zrrkz,     X86::VINSERTI64x2Zrmkz,     0 },
+    { X86::VINSERTI64x4Zrrkz,     X86::VINSERTI64x4Zrmkz,     0 },
+    { X86::VMAXCPDZrrkz,          X86::VMAXCPDZrmkz,          0 },
+    { X86::VMAXCPSZrrkz,          X86::VMAXCPSZrmkz,          0 },
     { X86::VMAXPDZrrkz,           X86::VMAXPDZrmkz,           0 },
-    // AVX-512{F,VL} arithmetic instructions 256-bit
-    { X86::VADDPSZ256rrkz,        X86::VADDPSZ256rmkz,        0 },
+    { X86::VMAXPSZrrkz,           X86::VMAXPSZrmkz,           0 },
+    { X86::VMINCPDZrrkz,          X86::VMINCPDZrmkz,          0 },
+    { X86::VMINCPSZrrkz,          X86::VMINCPSZrmkz,          0 },
+    { X86::VMINPDZrrkz,           X86::VMINPDZrmkz,           0 },
+    { X86::VMINPSZrrkz,           X86::VMINPSZrmkz,           0 },
+    { X86::VMULPDZrrkz,           X86::VMULPDZrmkz,           0 },
+    { X86::VMULPSZrrkz,           X86::VMULPSZrmkz,           0 },
+    { X86::VORPDZrrkz,            X86::VORPDZrmkz,            0 },
+    { X86::VORPSZrrkz,            X86::VORPSZrmkz,            0 },
+    { X86::VPADDBZrrkz,           X86::VPADDBZrmkz,           0 },
+    { X86::VPADDDZrrkz,           X86::VPADDDZrmkz,           0 },
+    { X86::VPADDQZrrkz,           X86::VPADDQZrmkz,           0 },
+    { X86::VPADDSBZrrkz,          X86::VPADDSBZrmkz,          0 },
+    { X86::VPADDSWZrrkz,          X86::VPADDSWZrmkz,          0 },
+    { X86::VPADDUSBZrrkz,         X86::VPADDUSBZrmkz,         0 },
+    { X86::VPADDUSWZrrkz,         X86::VPADDUSWZrmkz,         0 },
+    { X86::VPADDWZrrkz,           X86::VPADDWZrmkz,           0 },
+    { X86::VPALIGNRZrrikz,        X86::VPALIGNRZrmikz,        0 },
+    { X86::VPANDDZrrkz,           X86::VPANDDZrmkz,           0 },
+    { X86::VPANDNDZrrkz,          X86::VPANDNDZrmkz,          0 },
+    { X86::VPANDNQZrrkz,          X86::VPANDNQZrmkz,          0 },
+    { X86::VPANDQZrrkz,           X86::VPANDQZrmkz,           0 },
+    { X86::VPERMBZrrkz,           X86::VPERMBZrmkz,           0 },
+    { X86::VPERMDZrrkz,           X86::VPERMDZrmkz,           0 },
+    { X86::VPERMILPDZrrkz,        X86::VPERMILPDZrmkz,        0 },
+    { X86::VPERMILPSZrrkz,        X86::VPERMILPSZrmkz,        0 },
+    { X86::VPERMPDZrrkz,          X86::VPERMPDZrmkz,          0 },
+    { X86::VPERMPSZrrkz,          X86::VPERMPSZrmkz,          0 },
+    { X86::VPERMQZrrkz,           X86::VPERMQZrmkz,           0 },
+    { X86::VPERMWZrrkz,           X86::VPERMWZrmkz,           0 },
+    { X86::VPMADDUBSWZrrkz,       X86::VPMADDUBSWZrmkz,       0 },
+    { X86::VPMADDWDZrrkz,         X86::VPMADDWDZrmkz,         0 },
+    { X86::VPORDZrrkz,            X86::VPORDZrmkz,            0 },
+    { X86::VPORQZrrkz,            X86::VPORQZrmkz,            0 },
+    { X86::VPSHUFBZrrkz,          X86::VPSHUFBZrmkz,          0 },
+    { X86::VPSUBBZrrkz,           X86::VPSUBBZrmkz,           0 },
+    { X86::VPSUBDZrrkz,           X86::VPSUBDZrmkz,           0 },
+    { X86::VPSUBQZrrkz,           X86::VPSUBQZrmkz,           0 },
+    { X86::VPSUBSBZrrkz,          X86::VPSUBSBZrmkz,          0 },
+    { X86::VPSUBSWZrrkz,          X86::VPSUBSWZrmkz,          0 },
+    { X86::VPSUBUSBZrrkz,         X86::VPSUBUSBZrmkz,         0 },
+    { X86::VPSUBUSWZrrkz,         X86::VPSUBUSWZrmkz,         0 },
+    { X86::VPSUBWZrrkz,           X86::VPSUBWZrmkz,           0 },
+    { X86::VPUNPCKHBWZrrkz,       X86::VPUNPCKHBWZrmkz,       0 },
+    { X86::VPUNPCKHDQZrrkz,       X86::VPUNPCKHDQZrmkz,       0 },
+    { X86::VPUNPCKHQDQZrrkz,      X86::VPUNPCKHQDQZrmkz,      0 },
+    { X86::VPUNPCKHWDZrrkz,       X86::VPUNPCKHWDZrmkz,       0 },
+    { X86::VPUNPCKLBWZrrkz,       X86::VPUNPCKLBWZrmkz,       0 },
+    { X86::VPUNPCKLDQZrrkz,       X86::VPUNPCKLDQZrmkz,       0 },
+    { X86::VPUNPCKLQDQZrrkz,      X86::VPUNPCKLQDQZrmkz,      0 },
+    { X86::VPUNPCKLWDZrrkz,       X86::VPUNPCKLWDZrmkz,       0 },
+    { X86::VPXORDZrrkz,           X86::VPXORDZrmkz,           0 },
+    { X86::VPXORQZrrkz,           X86::VPXORQZrmkz,           0 },
+    { X86::VSUBPDZrrkz,           X86::VSUBPDZrmkz,           0 },
+    { X86::VSUBPSZrrkz,           X86::VSUBPSZrmkz,           0 },
+    { X86::VUNPCKHPDZrrkz,        X86::VUNPCKHPDZrmkz,        0 },
+    { X86::VUNPCKHPSZrrkz,        X86::VUNPCKHPSZrmkz,        0 },
+    { X86::VUNPCKLPDZrrkz,        X86::VUNPCKLPDZrmkz,        0 },
+    { X86::VUNPCKLPSZrrkz,        X86::VUNPCKLPSZrmkz,        0 },
+    { X86::VXORPDZrrkz,           X86::VXORPDZrmkz,           0 },
+    { X86::VXORPSZrrkz,           X86::VXORPSZrmkz,           0 },
+
+    // AVX-512{F,VL} masked arithmetic instructions 256-bit
     { X86::VADDPDZ256rrkz,        X86::VADDPDZ256rmkz,        0 },
-    { X86::VSUBPSZ256rrkz,        X86::VSUBPSZ256rmkz,        0 },
-    { X86::VSUBPDZ256rrkz,        X86::VSUBPDZ256rmkz,        0 },
-    { X86::VMULPSZ256rrkz,        X86::VMULPSZ256rmkz,        0 },
-    { X86::VMULPDZ256rrkz,        X86::VMULPDZ256rmkz,        0 },
-    { X86::VDIVPSZ256rrkz,        X86::VDIVPSZ256rmkz,        0 },
+    { X86::VADDPSZ256rrkz,        X86::VADDPSZ256rmkz,        0 },
+    { X86::VALIGNDZ256rrikz,      X86::VALIGNDZ256rmikz,      0 },
+    { X86::VALIGNQZ256rrikz,      X86::VALIGNQZ256rmikz,      0 },
+    { X86::VANDNPDZ256rrkz,       X86::VANDNPDZ256rmkz,       0 },
+    { X86::VANDNPSZ256rrkz,       X86::VANDNPSZ256rmkz,       0 },
+    { X86::VANDPDZ256rrkz,        X86::VANDPDZ256rmkz,        0 },
+    { X86::VANDPSZ256rrkz,        X86::VANDPSZ256rmkz,        0 },
     { X86::VDIVPDZ256rrkz,        X86::VDIVPDZ256rmkz,        0 },
-    { X86::VMINPSZ256rrkz,        X86::VMINPSZ256rmkz,        0 },
-    { X86::VMINPDZ256rrkz,        X86::VMINPDZ256rmkz,        0 },
-    { X86::VMAXPSZ256rrkz,        X86::VMAXPSZ256rmkz,        0 },
+    { X86::VDIVPSZ256rrkz,        X86::VDIVPSZ256rmkz,        0 },
+    { X86::VINSERTF32x4Z256rrkz,  X86::VINSERTF32x4Z256rmkz,  0 },
+    { X86::VINSERTF64x2Z256rrkz,  X86::VINSERTF64x2Z256rmkz,  0 },
+    { X86::VINSERTI32x4Z256rrkz,  X86::VINSERTI32x4Z256rmkz,  0 },
+    { X86::VINSERTI64x2Z256rrkz,  X86::VINSERTI64x2Z256rmkz,  0 },
+    { X86::VMAXCPDZ256rrkz,       X86::VMAXCPDZ256rmkz,       0 },
+    { X86::VMAXCPSZ256rrkz,       X86::VMAXCPSZ256rmkz,       0 },
     { X86::VMAXPDZ256rrkz,        X86::VMAXPDZ256rmkz,        0 },
-    // AVX-512{F,VL} arithmetic instructions 128-bit
-    { X86::VADDPSZ128rrkz,        X86::VADDPSZ128rmkz,        0 },
+    { X86::VMAXPSZ256rrkz,        X86::VMAXPSZ256rmkz,        0 },
+    { X86::VMINCPDZ256rrkz,       X86::VMINCPDZ256rmkz,       0 },
+    { X86::VMINCPSZ256rrkz,       X86::VMINCPSZ256rmkz,       0 },
+    { X86::VMINPDZ256rrkz,        X86::VMINPDZ256rmkz,        0 },
+    { X86::VMINPSZ256rrkz,        X86::VMINPSZ256rmkz,        0 },
+    { X86::VMULPDZ256rrkz,        X86::VMULPDZ256rmkz,        0 },
+    { X86::VMULPSZ256rrkz,        X86::VMULPSZ256rmkz,        0 },
+    { X86::VORPDZ256rrkz,         X86::VORPDZ256rmkz,         0 },
+    { X86::VORPSZ256rrkz,         X86::VORPSZ256rmkz,         0 },
+    { X86::VPADDBZ256rrkz,        X86::VPADDBZ256rmkz,        0 },
+    { X86::VPADDDZ256rrkz,        X86::VPADDDZ256rmkz,        0 },
+    { X86::VPADDQZ256rrkz,        X86::VPADDQZ256rmkz,        0 },
+    { X86::VPADDSBZ256rrkz,       X86::VPADDSBZ256rmkz,       0 },
+    { X86::VPADDSWZ256rrkz,       X86::VPADDSWZ256rmkz,       0 },
+    { X86::VPADDUSBZ256rrkz,      X86::VPADDUSBZ256rmkz,      0 },
+    { X86::VPADDUSWZ256rrkz,      X86::VPADDUSWZ256rmkz,      0 },
+    { X86::VPADDWZ256rrkz,        X86::VPADDWZ256rmkz,        0 },
+    { X86::VPALIGNRZ256rrikz,     X86::VPALIGNRZ256rmikz,     0 },
+    { X86::VPANDDZ256rrkz,        X86::VPANDDZ256rmkz,        0 },
+    { X86::VPANDNDZ256rrkz,       X86::VPANDNDZ256rmkz,       0 },
+    { X86::VPANDNQZ256rrkz,       X86::VPANDNQZ256rmkz,       0 },
+    { X86::VPANDQZ256rrkz,        X86::VPANDQZ256rmkz,        0 },
+    { X86::VPERMBZ256rrkz,        X86::VPERMBZ256rmkz,        0 },
+    { X86::VPERMDZ256rrkz,        X86::VPERMDZ256rmkz,        0 },
+    { X86::VPERMILPDZ256rrkz,     X86::VPERMILPDZ256rmkz,     0 },
+    { X86::VPERMILPSZ256rrkz,     X86::VPERMILPSZ256rmkz,     0 },
+    { X86::VPERMPDZ256rrkz,       X86::VPERMPDZ256rmkz,       0 },
+    { X86::VPERMPSZ256rrkz,       X86::VPERMPSZ256rmkz,       0 },
+    { X86::VPERMQZ256rrkz,        X86::VPERMQZ256rmkz,        0 },
+    { X86::VPERMWZ256rrkz,        X86::VPERMWZ256rmkz,        0 },
+    { X86::VPMADDUBSWZ256rrkz,    X86::VPMADDUBSWZ256rmkz,    0 },
+    { X86::VPMADDWDZ256rrkz,      X86::VPMADDWDZ256rmkz,      0 },
+    { X86::VPORDZ256rrkz,         X86::VPORDZ256rmkz,         0 },
+    { X86::VPORQZ256rrkz,         X86::VPORQZ256rmkz,         0 },
+    { X86::VPSHUFBZ256rrkz,       X86::VPSHUFBZ256rmkz,       0 },
+    { X86::VPSUBBZ256rrkz,        X86::VPSUBBZ256rmkz,        0 },
+    { X86::VPSUBDZ256rrkz,        X86::VPSUBDZ256rmkz,        0 },
+    { X86::VPSUBQZ256rrkz,        X86::VPSUBQZ256rmkz,        0 },
+    { X86::VPSUBSBZ256rrkz,       X86::VPSUBSBZ256rmkz,       0 },
+    { X86::VPSUBSWZ256rrkz,       X86::VPSUBSWZ256rmkz,       0 },
+    { X86::VPSUBUSBZ256rrkz,      X86::VPSUBUSBZ256rmkz,      0 },
+    { X86::VPSUBUSWZ256rrkz,      X86::VPSUBUSWZ256rmkz,      0 },
+    { X86::VPSUBWZ256rrkz,        X86::VPSUBWZ256rmkz,        0 },
+    { X86::VPUNPCKHBWZ256rrkz,    X86::VPUNPCKHBWZ256rmkz,    0 },
+    { X86::VPUNPCKHDQZ256rrkz,    X86::VPUNPCKHDQZ256rmkz,    0 },
+    { X86::VPUNPCKHQDQZ256rrkz,   X86::VPUNPCKHQDQZ256rmkz,   0 },
+    { X86::VPUNPCKHWDZ256rrkz,    X86::VPUNPCKHWDZ256rmkz,    0 },
+    { X86::VPUNPCKLBWZ256rrkz,    X86::VPUNPCKLBWZ256rmkz,    0 },
+    { X86::VPUNPCKLDQZ256rrkz,    X86::VPUNPCKLDQZ256rmkz,    0 },
+    { X86::VPUNPCKLQDQZ256rrkz,   X86::VPUNPCKLQDQZ256rmkz,   0 },
+    { X86::VPUNPCKLWDZ256rrkz,    X86::VPUNPCKLWDZ256rmkz,    0 },
+    { X86::VPXORDZ256rrkz,        X86::VPXORDZ256rmkz,        0 },
+    { X86::VPXORQZ256rrkz,        X86::VPXORQZ256rmkz,        0 },
+    { X86::VSUBPDZ256rrkz,        X86::VSUBPDZ256rmkz,        0 },
+    { X86::VSUBPSZ256rrkz,        X86::VSUBPSZ256rmkz,        0 },
+    { X86::VUNPCKHPDZ256rrkz,     X86::VUNPCKHPDZ256rmkz,     0 },
+    { X86::VUNPCKHPSZ256rrkz,     X86::VUNPCKHPSZ256rmkz,     0 },
+    { X86::VUNPCKLPDZ256rrkz,     X86::VUNPCKLPDZ256rmkz,     0 },
+    { X86::VUNPCKLPSZ256rrkz,     X86::VUNPCKLPSZ256rmkz,     0 },
+    { X86::VXORPDZ256rrkz,        X86::VXORPDZ256rmkz,        0 },
+    { X86::VXORPSZ256rrkz,        X86::VXORPSZ256rmkz,        0 },
+
+    // AVX-512{F,VL} masked arithmetic instructions 128-bit
     { X86::VADDPDZ128rrkz,        X86::VADDPDZ128rmkz,        0 },
-    { X86::VSUBPSZ128rrkz,        X86::VSUBPSZ128rmkz,        0 },
-    { X86::VSUBPDZ128rrkz,        X86::VSUBPDZ128rmkz,        0 },
-    { X86::VMULPSZ128rrkz,        X86::VMULPSZ128rmkz,        0 },
-    { X86::VMULPDZ128rrkz,        X86::VMULPDZ128rmkz,        0 },
-    { X86::VDIVPSZ128rrkz,        X86::VDIVPSZ128rmkz,        0 },
+    { X86::VADDPSZ128rrkz,        X86::VADDPSZ128rmkz,        0 },
+    { X86::VALIGNDZ128rrikz,      X86::VALIGNDZ128rmikz,      0 },
+    { X86::VALIGNQZ128rrikz,      X86::VALIGNQZ128rmikz,      0 },
+    { X86::VANDNPDZ128rrkz,       X86::VANDNPDZ128rmkz,       0 },
+    { X86::VANDNPSZ128rrkz,       X86::VANDNPSZ128rmkz,       0 },
+    { X86::VANDPDZ128rrkz,        X86::VANDPDZ128rmkz,        0 },
+    { X86::VANDPSZ128rrkz,        X86::VANDPSZ128rmkz,        0 },
     { X86::VDIVPDZ128rrkz,        X86::VDIVPDZ128rmkz,        0 },
-    { X86::VMINPSZ128rrkz,        X86::VMINPSZ128rmkz,        0 },
-    { X86::VMINPDZ128rrkz,        X86::VMINPDZ128rmkz,        0 },
+    { X86::VDIVPSZ128rrkz,        X86::VDIVPSZ128rmkz,        0 },
+    { X86::VMAXCPDZ128rrkz,       X86::VMAXCPDZ128rmkz,       0 },
+    { X86::VMAXCPSZ128rrkz,       X86::VMAXCPSZ128rmkz,       0 },
+    { X86::VMAXPDZ128rrkz,        X86::VMAXPDZ128rmkz,        0 },
     { X86::VMAXPSZ128rrkz,        X86::VMAXPSZ128rmkz,        0 },
-    { X86::VMAXPDZ128rrkz,        X86::VMAXPDZ128rmkz,        0 }
+    { X86::VMINCPDZ128rrkz,       X86::VMINCPDZ128rmkz,       0 },
+    { X86::VMINCPSZ128rrkz,       X86::VMINCPSZ128rmkz,       0 },
+    { X86::VMINPDZ128rrkz,        X86::VMINPDZ128rmkz,        0 },
+    { X86::VMINPSZ128rrkz,        X86::VMINPSZ128rmkz,        0 },
+    { X86::VMULPDZ128rrkz,        X86::VMULPDZ128rmkz,        0 },
+    { X86::VMULPSZ128rrkz,        X86::VMULPSZ128rmkz,        0 },
+    { X86::VORPDZ128rrkz,         X86::VORPDZ128rmkz,         0 },
+    { X86::VORPSZ128rrkz,         X86::VORPSZ128rmkz,         0 },
+    { X86::VPADDBZ128rrkz,        X86::VPADDBZ128rmkz,        0 },
+    { X86::VPADDDZ128rrkz,        X86::VPADDDZ128rmkz,        0 },
+    { X86::VPADDQZ128rrkz,        X86::VPADDQZ128rmkz,        0 },
+    { X86::VPADDSBZ128rrkz,       X86::VPADDSBZ128rmkz,       0 },
+    { X86::VPADDSWZ128rrkz,       X86::VPADDSWZ128rmkz,       0 },
+    { X86::VPADDUSBZ128rrkz,      X86::VPADDUSBZ128rmkz,      0 },
+    { X86::VPADDUSWZ128rrkz,      X86::VPADDUSWZ128rmkz,      0 },
+    { X86::VPADDWZ128rrkz,        X86::VPADDWZ128rmkz,        0 },
+    { X86::VPALIGNRZ128rrikz,     X86::VPALIGNRZ128rmikz,     0 },
+    { X86::VPANDDZ128rrkz,        X86::VPANDDZ128rmkz,        0 },
+    { X86::VPANDNDZ128rrkz,       X86::VPANDNDZ128rmkz,       0 },
+    { X86::VPANDNQZ128rrkz,       X86::VPANDNQZ128rmkz,       0 },
+    { X86::VPANDQZ128rrkz,        X86::VPANDQZ128rmkz,        0 },
+    { X86::VPERMBZ128rrkz,        X86::VPERMBZ128rmkz,        0 },
+    { X86::VPERMILPDZ128rrkz,     X86::VPERMILPDZ128rmkz,     0 },
+    { X86::VPERMILPSZ128rrkz,     X86::VPERMILPSZ128rmkz,     0 },
+    { X86::VPERMWZ128rrkz,        X86::VPERMWZ128rmkz,        0 },
+    { X86::VPMADDUBSWZ128rrkz,    X86::VPMADDUBSWZ128rmkz,    0 },
+    { X86::VPMADDWDZ128rrkz,      X86::VPMADDWDZ128rmkz,      0 },
+    { X86::VPORDZ128rrkz,         X86::VPORDZ128rmkz,         0 },
+    { X86::VPORQZ128rrkz,         X86::VPORQZ128rmkz,         0 },
+    { X86::VPSHUFBZ128rrkz,       X86::VPSHUFBZ128rmkz,       0 },
+    { X86::VPSUBBZ128rrkz,        X86::VPSUBBZ128rmkz,        0 },
+    { X86::VPSUBDZ128rrkz,        X86::VPSUBDZ128rmkz,        0 },
+    { X86::VPSUBQZ128rrkz,        X86::VPSUBQZ128rmkz,        0 },
+    { X86::VPSUBSBZ128rrkz,       X86::VPSUBSBZ128rmkz,       0 },
+    { X86::VPSUBSWZ128rrkz,       X86::VPSUBSWZ128rmkz,       0 },
+    { X86::VPSUBUSBZ128rrkz,      X86::VPSUBUSBZ128rmkz,      0 },
+    { X86::VPSUBUSWZ128rrkz,      X86::VPSUBUSWZ128rmkz,      0 },
+    { X86::VPSUBWZ128rrkz,        X86::VPSUBWZ128rmkz,        0 },
+    { X86::VPUNPCKHBWZ128rrkz,    X86::VPUNPCKHBWZ128rmkz,    0 },
+    { X86::VPUNPCKHDQZ128rrkz,    X86::VPUNPCKHDQZ128rmkz,    0 },
+    { X86::VPUNPCKHQDQZ128rrkz,   X86::VPUNPCKHQDQZ128rmkz,   0 },
+    { X86::VPUNPCKHWDZ128rrkz,    X86::VPUNPCKHWDZ128rmkz,    0 },
+    { X86::VPUNPCKLBWZ128rrkz,    X86::VPUNPCKLBWZ128rmkz,    0 },
+    { X86::VPUNPCKLDQZ128rrkz,    X86::VPUNPCKLDQZ128rmkz,    0 },
+    { X86::VPUNPCKLQDQZ128rrkz,   X86::VPUNPCKLQDQZ128rmkz,   0 },
+    { X86::VPUNPCKLWDZ128rrkz,    X86::VPUNPCKLWDZ128rmkz,    0 },
+    { X86::VPXORDZ128rrkz,        X86::VPXORDZ128rmkz,        0 },
+    { X86::VPXORQZ128rrkz,        X86::VPXORQZ128rmkz,        0 },
+    { X86::VSUBPDZ128rrkz,        X86::VSUBPDZ128rmkz,        0 },
+    { X86::VSUBPSZ128rrkz,        X86::VSUBPSZ128rmkz,        0 },
+    { X86::VUNPCKHPDZ128rrkz,     X86::VUNPCKHPDZ128rmkz,     0 },
+    { X86::VUNPCKHPSZ128rrkz,     X86::VUNPCKHPSZ128rmkz,     0 },
+    { X86::VUNPCKLPDZ128rrkz,     X86::VUNPCKLPDZ128rmkz,     0 },
+    { X86::VUNPCKLPSZ128rrkz,     X86::VUNPCKLPSZ128rmkz,     0 },
+    { X86::VXORPDZ128rrkz,        X86::VXORPDZ128rmkz,        0 },
+    { X86::VXORPSZ128rrkz,        X86::VXORPSZ128rmkz,        0 },
+
+    // AVX-512 masked foldable instructions
+    { X86::VPERMILPDZrik,         X86::VPERMILPDZmik,         0 },
+    { X86::VPERMILPSZrik,         X86::VPERMILPSZmik,         0 },
+    { X86::VPERMPDZrik,           X86::VPERMPDZmik,           0 },
+    { X86::VPERMQZrik,            X86::VPERMQZmik,            0 },
+    { X86::VPMOVSXBDZrrk,         X86::VPMOVSXBDZrmk,         0 },
+    { X86::VPMOVSXBQZrrk,         X86::VPMOVSXBQZrmk,         TB_NO_REVERSE },
+    { X86::VPMOVSXBWZrrk,         X86::VPMOVSXBWZrmk,         0 },
+    { X86::VPMOVSXDQZrrk,         X86::VPMOVSXDQZrmk,         0 },
+    { X86::VPMOVSXWDZrrk,         X86::VPMOVSXWDZrmk,         0 },
+    { X86::VPMOVSXWQZrrk,         X86::VPMOVSXWQZrmk,         0 },
+    { X86::VPMOVZXBDZrrk,         X86::VPMOVZXBDZrmk,         0 },
+    { X86::VPMOVZXBQZrrk,         X86::VPMOVZXBQZrmk,         TB_NO_REVERSE },
+    { X86::VPMOVZXBWZrrk,         X86::VPMOVZXBWZrmk,         0 },
+    { X86::VPMOVZXDQZrrk,         X86::VPMOVZXDQZrmk,         0 },
+    { X86::VPMOVZXWDZrrk,         X86::VPMOVZXWDZrmk,         0 },
+    { X86::VPMOVZXWQZrrk,         X86::VPMOVZXWQZrmk,         0 },
+    { X86::VPSHUFDZrik,           X86::VPSHUFDZmik,           0 },
+    { X86::VPSHUFHWZrik,          X86::VPSHUFHWZmik,          0 },
+    { X86::VPSHUFLWZrik,          X86::VPSHUFLWZmik,          0 },
+
+    // AVX-512VL 256-bit masked foldable instructions
+    { X86::VPERMILPDZ256rik,      X86::VPERMILPDZ256mik,      0 },
+    { X86::VPERMILPSZ256rik,      X86::VPERMILPSZ256mik,      0 },
+    { X86::VPERMPDZ256rik,        X86::VPERMPDZ256mik,        0 },
+    { X86::VPERMQZ256rik,         X86::VPERMQZ256mik,         0 },
+    { X86::VPMOVSXBDZ256rrk,      X86::VPMOVSXBDZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ256rrk,      X86::VPMOVSXBQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ256rrk,      X86::VPMOVSXBWZ256rmk,      0 },
+    { X86::VPMOVSXDQZ256rrk,      X86::VPMOVSXDQZ256rmk,      0 },
+    { X86::VPMOVSXWDZ256rrk,      X86::VPMOVSXWDZ256rmk,      0 },
+    { X86::VPMOVSXWQZ256rrk,      X86::VPMOVSXWQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ256rrk,      X86::VPMOVZXBDZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ256rrk,      X86::VPMOVZXBQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ256rrk,      X86::VPMOVZXBWZ256rmk,      0 },
+    { X86::VPMOVZXDQZ256rrk,      X86::VPMOVZXDQZ256rmk,      0 },
+    { X86::VPMOVZXWDZ256rrk,      X86::VPMOVZXWDZ256rmk,      0 },
+    { X86::VPMOVZXWQZ256rrk,      X86::VPMOVZXWQZ256rmk,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ256rik,        X86::VPSHUFDZ256mik,        0 },
+    { X86::VPSHUFHWZ256rik,       X86::VPSHUFHWZ256mik,       0 },
+    { X86::VPSHUFLWZ256rik,       X86::VPSHUFLWZ256mik,       0 },
+
+    // AVX-512VL 128-bit masked foldable instructions
+    { X86::VPERMILPDZ128rik,      X86::VPERMILPDZ128mik,      0 },
+    { X86::VPERMILPSZ128rik,      X86::VPERMILPSZ128mik,      0 },
+    { X86::VPMOVSXBDZ128rrk,      X86::VPMOVSXBDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBQZ128rrk,      X86::VPMOVSXBQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXBWZ128rrk,      X86::VPMOVSXBWZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXDQZ128rrk,      X86::VPMOVSXDQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXWDZ128rrk,      X86::VPMOVSXWDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVSXWQZ128rrk,      X86::VPMOVSXWQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBDZ128rrk,      X86::VPMOVZXBDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBQZ128rrk,      X86::VPMOVZXBQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXBWZ128rrk,      X86::VPMOVZXBWZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXDQZ128rrk,      X86::VPMOVZXDQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXWDZ128rrk,      X86::VPMOVZXWDZ128rmk,      TB_NO_REVERSE },
+    { X86::VPMOVZXWQZ128rrk,      X86::VPMOVZXWQZ128rmk,      TB_NO_REVERSE },
+    { X86::VPSHUFDZ128rik,        X86::VPSHUFDZ128mik,        0 },
+    { X86::VPSHUFHWZ128rik,       X86::VPSHUFHWZ128mik,       0 },
+    { X86::VPSHUFLWZ128rik,       X86::VPSHUFLWZ128mik,       0 },
   };
 
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable3) {
@@ -2008,47 +2630,348 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                   // Index 3, folded load
                   Entry.Flags | TB_INDEX_3 | TB_FOLDED_LOAD);
   }
+  auto I = X86InstrFMA3Info::rm_begin();
+  auto E = X86InstrFMA3Info::rm_end();
+  for (; I != E; ++I) {
+    if (!I.getGroup()->isKMasked()) {
+      // Intrinsic forms need to pass TB_NO_REVERSE.
+      if (I.getGroup()->isIntrinsic()) {
+        AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD | TB_NO_REVERSE);
+      } else {
+        AddTableEntry(RegOp2MemOpTable3, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_3 | TB_FOLDED_LOAD);
+      }
+    }
+  }
 
   static const X86MemoryFoldTableEntry MemoryFoldTable4[] = {
-     // AVX-512 foldable instructions
-    { X86::VADDPSZrrk,         X86::VADDPSZrmk,           0 },
+    // AVX-512 foldable masked instructions
     { X86::VADDPDZrrk,         X86::VADDPDZrmk,           0 },
-    { X86::VSUBPSZrrk,         X86::VSUBPSZrmk,           0 },
-    { X86::VSUBPDZrrk,         X86::VSUBPDZrmk,           0 },
-    { X86::VMULPSZrrk,         X86::VMULPSZrmk,           0 },
-    { X86::VMULPDZrrk,         X86::VMULPDZrmk,           0 },
-    { X86::VDIVPSZrrk,         X86::VDIVPSZrmk,           0 },
+    { X86::VADDPSZrrk,         X86::VADDPSZrmk,           0 },
+    { X86::VALIGNDZrrik,       X86::VALIGNDZrmik,         0 },
+    { X86::VALIGNQZrrik,       X86::VALIGNQZrmik,         0 },
+    { X86::VANDNPDZrrk,        X86::VANDNPDZrmk,          0 },
+    { X86::VANDNPSZrrk,        X86::VANDNPSZrmk,          0 },
+    { X86::VANDPDZrrk,         X86::VANDPDZrmk,           0 },
+    { X86::VANDPSZrrk,         X86::VANDPSZrmk,           0 },
     { X86::VDIVPDZrrk,         X86::VDIVPDZrmk,           0 },
-    { X86::VMINPSZrrk,         X86::VMINPSZrmk,           0 },
-    { X86::VMINPDZrrk,         X86::VMINPDZrmk,           0 },
-    { X86::VMAXPSZrrk,         X86::VMAXPSZrmk,           0 },
+    { X86::VDIVPSZrrk,         X86::VDIVPSZrmk,           0 },
+    { X86::VINSERTF32x4Zrrk,   X86::VINSERTF32x4Zrmk,     0 },
+    { X86::VINSERTF32x8Zrrk,   X86::VINSERTF32x8Zrmk,     0 },
+    { X86::VINSERTF64x2Zrrk,   X86::VINSERTF64x2Zrmk,     0 },
+    { X86::VINSERTF64x4Zrrk,   X86::VINSERTF64x4Zrmk,     0 },
+    { X86::VINSERTI32x4Zrrk,   X86::VINSERTI32x4Zrmk,     0 },
+    { X86::VINSERTI32x8Zrrk,   X86::VINSERTI32x8Zrmk,     0 },
+    { X86::VINSERTI64x2Zrrk,   X86::VINSERTI64x2Zrmk,     0 },
+    { X86::VINSERTI64x4Zrrk,   X86::VINSERTI64x4Zrmk,     0 },
+    { X86::VMAXCPDZrrk,        X86::VMAXCPDZrmk,          0 },
+    { X86::VMAXCPSZrrk,        X86::VMAXCPSZrmk,          0 },
     { X86::VMAXPDZrrk,         X86::VMAXPDZrmk,           0 },
-    // AVX-512{F,VL} foldable instructions 256-bit
-    { X86::VADDPSZ256rrk,      X86::VADDPSZ256rmk,        0 },
+    { X86::VMAXPSZrrk,         X86::VMAXPSZrmk,           0 },
+    { X86::VMINCPDZrrk,        X86::VMINCPDZrmk,          0 },
+    { X86::VMINCPSZrrk,        X86::VMINCPSZrmk,          0 },
+    { X86::VMINPDZrrk,         X86::VMINPDZrmk,           0 },
+    { X86::VMINPSZrrk,         X86::VMINPSZrmk,           0 },
+    { X86::VMULPDZrrk,         X86::VMULPDZrmk,           0 },
+    { X86::VMULPSZrrk,         X86::VMULPSZrmk,           0 },
+    { X86::VORPDZrrk,          X86::VORPDZrmk,            0 },
+    { X86::VORPSZrrk,          X86::VORPSZrmk,            0 },
+    { X86::VPADDBZrrk,         X86::VPADDBZrmk,           0 },
+    { X86::VPADDDZrrk,         X86::VPADDDZrmk,           0 },
+    { X86::VPADDQZrrk,         X86::VPADDQZrmk,           0 },
+    { X86::VPADDSBZrrk,        X86::VPADDSBZrmk,          0 },
+    { X86::VPADDSWZrrk,        X86::VPADDSWZrmk,          0 },
+    { X86::VPADDUSBZrrk,       X86::VPADDUSBZrmk,         0 },
+    { X86::VPADDUSWZrrk,       X86::VPADDUSWZrmk,         0 },
+    { X86::VPADDWZrrk,         X86::VPADDWZrmk,           0 },
+    { X86::VPALIGNRZrrik,      X86::VPALIGNRZrmik,        0 },
+    { X86::VPANDDZrrk,         X86::VPANDDZrmk,           0 },
+    { X86::VPANDNDZrrk,        X86::VPANDNDZrmk,          0 },
+    { X86::VPANDNQZrrk,        X86::VPANDNQZrmk,          0 },
+    { X86::VPANDQZrrk,         X86::VPANDQZrmk,           0 },
+    { X86::VPERMBZrrk,         X86::VPERMBZrmk,           0 },
+    { X86::VPERMDZrrk,         X86::VPERMDZrmk,           0 },
+    { X86::VPERMI2Brrk,        X86::VPERMI2Brmk,          0 },
+    { X86::VPERMI2Drrk,        X86::VPERMI2Drmk,          0 },
+    { X86::VPERMI2PSrrk,       X86::VPERMI2PSrmk,         0 },
+    { X86::VPERMI2PDrrk,       X86::VPERMI2PDrmk,         0 },
+    { X86::VPERMI2Qrrk,        X86::VPERMI2Qrmk,          0 },
+    { X86::VPERMI2Wrrk,        X86::VPERMI2Wrmk,          0 },
+    { X86::VPERMILPDZrrk,      X86::VPERMILPDZrmk,        0 },
+    { X86::VPERMILPSZrrk,      X86::VPERMILPSZrmk,        0 },
+    { X86::VPERMPDZrrk,        X86::VPERMPDZrmk,          0 },
+    { X86::VPERMPSZrrk,        X86::VPERMPSZrmk,          0 },
+    { X86::VPERMQZrrk,         X86::VPERMQZrmk,           0 },
+    { X86::VPERMT2Brrk,        X86::VPERMT2Brmk,          0 },
+    { X86::VPERMT2Drrk,        X86::VPERMT2Drmk,          0 },
+    { X86::VPERMT2PSrrk,       X86::VPERMT2PSrmk,         0 },
+    { X86::VPERMT2PDrrk,       X86::VPERMT2PDrmk,         0 },
+    { X86::VPERMT2Qrrk,        X86::VPERMT2Qrmk,          0 },
+    { X86::VPERMT2Wrrk,        X86::VPERMT2Wrmk,          0 },
+    { X86::VPERMWZrrk,         X86::VPERMWZrmk,           0 },
+    { X86::VPMADDUBSWZrrk,     X86::VPMADDUBSWZrmk,       0 },
+    { X86::VPMADDWDZrrk,       X86::VPMADDWDZrmk,         0 },
+    { X86::VPORDZrrk,          X86::VPORDZrmk,            0 },
+    { X86::VPORQZrrk,          X86::VPORQZrmk,            0 },
+    { X86::VPSHUFBZrrk,        X86::VPSHUFBZrmk,          0 },
+    { X86::VPSUBBZrrk,         X86::VPSUBBZrmk,           0 },
+    { X86::VPSUBDZrrk,         X86::VPSUBDZrmk,           0 },
+    { X86::VPSUBQZrrk,         X86::VPSUBQZrmk,           0 },
+    { X86::VPSUBSBZrrk,        X86::VPSUBSBZrmk,          0 },
+    { X86::VPSUBSWZrrk,        X86::VPSUBSWZrmk,          0 },
+    { X86::VPSUBUSBZrrk,       X86::VPSUBUSBZrmk,         0 },
+    { X86::VPSUBUSWZrrk,       X86::VPSUBUSWZrmk,         0 },
+    { X86::VPTERNLOGDZrrik,    X86::VPTERNLOGDZrmik,      0 },
+    { X86::VPTERNLOGQZrrik,    X86::VPTERNLOGQZrmik,      0 },
+    { X86::VPUNPCKHBWZrrk,     X86::VPUNPCKHBWZrmk,       0 },
+    { X86::VPUNPCKHDQZrrk,     X86::VPUNPCKHDQZrmk,       0 },
+    { X86::VPUNPCKHQDQZrrk,    X86::VPUNPCKHQDQZrmk,      0 },
+    { X86::VPUNPCKHWDZrrk,     X86::VPUNPCKHWDZrmk,       0 },
+    { X86::VPUNPCKLBWZrrk,     X86::VPUNPCKLBWZrmk,       0 },
+    { X86::VPUNPCKLDQZrrk,     X86::VPUNPCKLDQZrmk,       0 },
+    { X86::VPUNPCKLQDQZrrk,    X86::VPUNPCKLQDQZrmk,      0 },
+    { X86::VPUNPCKLWDZrrk,     X86::VPUNPCKLWDZrmk,       0 },
+    { X86::VPXORDZrrk,         X86::VPXORDZrmk,           0 },
+    { X86::VPXORQZrrk,         X86::VPXORQZrmk,           0 },
+    { X86::VSUBPDZrrk,         X86::VSUBPDZrmk,           0 },
+    { X86::VSUBPSZrrk,         X86::VSUBPSZrmk,           0 },
+    { X86::VUNPCKHPDZrrk,      X86::VUNPCKHPDZrmk,        0 },
+    { X86::VUNPCKHPSZrrk,      X86::VUNPCKHPSZrmk,        0 },
+    { X86::VUNPCKLPDZrrk,      X86::VUNPCKLPDZrmk,        0 },
+    { X86::VUNPCKLPSZrrk,      X86::VUNPCKLPSZrmk,        0 },
+    { X86::VXORPDZrrk,         X86::VXORPDZrmk,           0 },
+    { X86::VXORPSZrrk,         X86::VXORPSZrmk,           0 },
+
+    // AVX-512{F,VL} foldable masked instructions 256-bit
     { X86::VADDPDZ256rrk,      X86::VADDPDZ256rmk,        0 },
-    { X86::VSUBPSZ256rrk,      X86::VSUBPSZ256rmk,        0 },
-    { X86::VSUBPDZ256rrk,      X86::VSUBPDZ256rmk,        0 },
-    { X86::VMULPSZ256rrk,      X86::VMULPSZ256rmk,        0 },
-    { X86::VMULPDZ256rrk,      X86::VMULPDZ256rmk,        0 },
-    { X86::VDIVPSZ256rrk,      X86::VDIVPSZ256rmk,        0 },
+    { X86::VADDPSZ256rrk,      X86::VADDPSZ256rmk,        0 },
+    { X86::VALIGNDZ256rrik,    X86::VALIGNDZ256rmik,      0 },
+    { X86::VALIGNQZ256rrik,    X86::VALIGNQZ256rmik,      0 },
+    { X86::VANDNPDZ256rrk,     X86::VANDNPDZ256rmk,       0 },
+    { X86::VANDNPSZ256rrk,     X86::VANDNPSZ256rmk,       0 },
+    { X86::VANDPDZ256rrk,      X86::VANDPDZ256rmk,        0 },
+    { X86::VANDPSZ256rrk,      X86::VANDPSZ256rmk,        0 },
     { X86::VDIVPDZ256rrk,      X86::VDIVPDZ256rmk,        0 },
-    { X86::VMINPSZ256rrk,      X86::VMINPSZ256rmk,        0 },
-    { X86::VMINPDZ256rrk,      X86::VMINPDZ256rmk,        0 },
-    { X86::VMAXPSZ256rrk,      X86::VMAXPSZ256rmk,        0 },
+    { X86::VDIVPSZ256rrk,      X86::VDIVPSZ256rmk,        0 },
+    { X86::VINSERTF32x4Z256rrk,X86::VINSERTF32x4Z256rmk,  0 },
+    { X86::VINSERTF64x2Z256rrk,X86::VINSERTF64x2Z256rmk,  0 },
+    { X86::VINSERTI32x4Z256rrk,X86::VINSERTI32x4Z256rmk,  0 },
+    { X86::VINSERTI64x2Z256rrk,X86::VINSERTI64x2Z256rmk,  0 },
+    { X86::VMAXCPDZ256rrk,     X86::VMAXCPDZ256rmk,       0 },
+    { X86::VMAXCPSZ256rrk,     X86::VMAXCPSZ256rmk,       0 },
     { X86::VMAXPDZ256rrk,      X86::VMAXPDZ256rmk,        0 },
+    { X86::VMAXPSZ256rrk,      X86::VMAXPSZ256rmk,        0 },
+    { X86::VMINCPDZ256rrk,     X86::VMINCPDZ256rmk,       0 },
+    { X86::VMINCPSZ256rrk,     X86::VMINCPSZ256rmk,       0 },
+    { X86::VMINPDZ256rrk,      X86::VMINPDZ256rmk,        0 },
+    { X86::VMINPSZ256rrk,      X86::VMINPSZ256rmk,        0 },
+    { X86::VMULPDZ256rrk,      X86::VMULPDZ256rmk,        0 },
+    { X86::VMULPSZ256rrk,      X86::VMULPSZ256rmk,        0 },
+    { X86::VORPDZ256rrk,       X86::VORPDZ256rmk,         0 },
+    { X86::VORPSZ256rrk,       X86::VORPSZ256rmk,         0 },
+    { X86::VPADDBZ256rrk,      X86::VPADDBZ256rmk,        0 },
+    { X86::VPADDDZ256rrk,      X86::VPADDDZ256rmk,        0 },
+    { X86::VPADDQZ256rrk,      X86::VPADDQZ256rmk,        0 },
+    { X86::VPADDSBZ256rrk,     X86::VPADDSBZ256rmk,       0 },
+    { X86::VPADDSWZ256rrk,     X86::VPADDSWZ256rmk,       0 },
+    { X86::VPADDUSBZ256rrk,    X86::VPADDUSBZ256rmk,      0 },
+    { X86::VPADDUSWZ256rrk,    X86::VPADDUSWZ256rmk,      0 },
+    { X86::VPADDWZ256rrk,      X86::VPADDWZ256rmk,        0 },
+    { X86::VPALIGNRZ256rrik,   X86::VPALIGNRZ256rmik,     0 },
+    { X86::VPANDDZ256rrk,      X86::VPANDDZ256rmk,        0 },
+    { X86::VPANDNDZ256rrk,     X86::VPANDNDZ256rmk,       0 },
+    { X86::VPANDNQZ256rrk,     X86::VPANDNQZ256rmk,       0 },
+    { X86::VPANDQZ256rrk,      X86::VPANDQZ256rmk,        0 },
+    { X86::VPERMBZ256rrk,      X86::VPERMBZ256rmk,        0 },
+    { X86::VPERMDZ256rrk,      X86::VPERMDZ256rmk,        0 },
+    { X86::VPERMI2B256rrk,     X86::VPERMI2B256rmk,       0 },
+    { X86::VPERMI2D256rrk,     X86::VPERMI2D256rmk,       0 },
+    { X86::VPERMI2PD256rrk,    X86::VPERMI2PD256rmk,      0 },
+    { X86::VPERMI2PS256rrk,    X86::VPERMI2PS256rmk,      0 },
+    { X86::VPERMI2Q256rrk,     X86::VPERMI2Q256rmk,       0 },
+    { X86::VPERMI2W256rrk,     X86::VPERMI2W256rmk,       0 },
+    { X86::VPERMILPDZ256rrk,   X86::VPERMILPDZ256rmk,     0 },
+    { X86::VPERMILPSZ256rrk,   X86::VPERMILPSZ256rmk,     0 },
+    { X86::VPERMPDZ256rrk,     X86::VPERMPDZ256rmk,       0 },
+    { X86::VPERMPSZ256rrk,     X86::VPERMPSZ256rmk,       0 },
+    { X86::VPERMQZ256rrk,      X86::VPERMQZ256rmk,        0 },
+    { X86::VPERMT2B256rrk,     X86::VPERMT2B256rmk,       0 },
+    { X86::VPERMT2D256rrk,     X86::VPERMT2D256rmk,       0 },
+    { X86::VPERMT2PD256rrk,    X86::VPERMT2PD256rmk,      0 },
+    { X86::VPERMT2PS256rrk,    X86::VPERMT2PS256rmk,      0 },
+    { X86::VPERMT2Q256rrk,     X86::VPERMT2Q256rmk,       0 },
+    { X86::VPERMT2W256rrk,     X86::VPERMT2W256rmk,       0 },
+    { X86::VPERMWZ256rrk,      X86::VPERMWZ256rmk,        0 },
+    { X86::VPMADDUBSWZ256rrk,  X86::VPMADDUBSWZ256rmk,    0 },
+    { X86::VPMADDWDZ256rrk,    X86::VPMADDWDZ256rmk,      0 },
+    { X86::VPORDZ256rrk,       X86::VPORDZ256rmk,         0 },
+    { X86::VPORQZ256rrk,       X86::VPORQZ256rmk,         0 },
+    { X86::VPSHUFBZ256rrk,     X86::VPSHUFBZ256rmk,       0 },
+    { X86::VPSUBBZ256rrk,      X86::VPSUBBZ256rmk,        0 },
+    { X86::VPSUBDZ256rrk,      X86::VPSUBDZ256rmk,        0 },
+    { X86::VPSUBQZ256rrk,      X86::VPSUBQZ256rmk,        0 },
+    { X86::VPSUBSBZ256rrk,     X86::VPSUBSBZ256rmk,       0 },
+    { X86::VPSUBSWZ256rrk,     X86::VPSUBSWZ256rmk,       0 },
+    { X86::VPSUBUSBZ256rrk,    X86::VPSUBUSBZ256rmk,      0 },
+    { X86::VPSUBUSWZ256rrk,    X86::VPSUBUSWZ256rmk,      0 },
+    { X86::VPSUBWZ256rrk,      X86::VPSUBWZ256rmk,        0 },
+    { X86::VPTERNLOGDZ256rrik, X86::VPTERNLOGDZ256rmik,   0 },
+    { X86::VPTERNLOGQZ256rrik, X86::VPTERNLOGQZ256rmik,   0 },
+    { X86::VPUNPCKHBWZ256rrk,  X86::VPUNPCKHBWZ256rmk,    0 },
+    { X86::VPUNPCKHDQZ256rrk,  X86::VPUNPCKHDQZ256rmk,    0 },
+    { X86::VPUNPCKHQDQZ256rrk, X86::VPUNPCKHQDQZ256rmk,   0 },
+    { X86::VPUNPCKHWDZ256rrk,  X86::VPUNPCKHWDZ256rmk,    0 },
+    { X86::VPUNPCKLBWZ256rrk,  X86::VPUNPCKLBWZ256rmk,    0 },
+    { X86::VPUNPCKLDQZ256rrk,  X86::VPUNPCKLDQZ256rmk,    0 },
+    { X86::VPUNPCKLQDQZ256rrk, X86::VPUNPCKLQDQZ256rmk,   0 },
+    { X86::VPUNPCKLWDZ256rrk,  X86::VPUNPCKLWDZ256rmk,    0 },
+    { X86::VPXORDZ256rrk,      X86::VPXORDZ256rmk,        0 },
+    { X86::VPXORQZ256rrk,      X86::VPXORQZ256rmk,        0 },
+    { X86::VSUBPDZ256rrk,      X86::VSUBPDZ256rmk,        0 },
+    { X86::VSUBPSZ256rrk,      X86::VSUBPSZ256rmk,        0 },
+    { X86::VUNPCKHPDZ256rrk,   X86::VUNPCKHPDZ256rmk,     0 },
+    { X86::VUNPCKHPSZ256rrk,   X86::VUNPCKHPSZ256rmk,     0 },
+    { X86::VUNPCKLPDZ256rrk,   X86::VUNPCKLPDZ256rmk,     0 },
+    { X86::VUNPCKLPSZ256rrk,   X86::VUNPCKLPSZ256rmk,     0 },
+    { X86::VXORPDZ256rrk,      X86::VXORPDZ256rmk,        0 },
+    { X86::VXORPSZ256rrk,      X86::VXORPSZ256rmk,        0 },
+
     // AVX-512{F,VL} foldable instructions 128-bit
-    { X86::VADDPSZ128rrk,      X86::VADDPSZ128rmk,        0 },
     { X86::VADDPDZ128rrk,      X86::VADDPDZ128rmk,        0 },
-    { X86::VSUBPSZ128rrk,      X86::VSUBPSZ128rmk,        0 },
-    { X86::VSUBPDZ128rrk,      X86::VSUBPDZ128rmk,        0 },
-    { X86::VMULPSZ128rrk,      X86::VMULPSZ128rmk,        0 },
-    { X86::VMULPDZ128rrk,      X86::VMULPDZ128rmk,        0 },
-    { X86::VDIVPSZ128rrk,      X86::VDIVPSZ128rmk,        0 },
+    { X86::VADDPSZ128rrk,      X86::VADDPSZ128rmk,        0 },
+    { X86::VALIGNDZ128rrik,    X86::VALIGNDZ128rmik,      0 },
+    { X86::VALIGNQZ128rrik,    X86::VALIGNQZ128rmik,      0 },
+    { X86::VANDNPDZ128rrk,     X86::VANDNPDZ128rmk,       0 },
+    { X86::VANDNPSZ128rrk,     X86::VANDNPSZ128rmk,       0 },
+    { X86::VANDPDZ128rrk,      X86::VANDPDZ128rmk,        0 },
+    { X86::VANDPSZ128rrk,      X86::VANDPSZ128rmk,        0 },
     { X86::VDIVPDZ128rrk,      X86::VDIVPDZ128rmk,        0 },
-    { X86::VMINPSZ128rrk,      X86::VMINPSZ128rmk,        0 },
-    { X86::VMINPDZ128rrk,      X86::VMINPDZ128rmk,        0 },
+    { X86::VDIVPSZ128rrk,      X86::VDIVPSZ128rmk,        0 },
+    { X86::VMAXCPDZ128rrk,     X86::VMAXCPDZ128rmk,       0 },
+    { X86::VMAXCPSZ128rrk,     X86::VMAXCPSZ128rmk,       0 },
+    { X86::VMAXPDZ128rrk,      X86::VMAXPDZ128rmk,        0 },
     { X86::VMAXPSZ128rrk,      X86::VMAXPSZ128rmk,        0 },
-    { X86::VMAXPDZ128rrk,      X86::VMAXPDZ128rmk,        0 }
+    { X86::VMINCPDZ128rrk,     X86::VMINCPDZ128rmk,       0 },
+    { X86::VMINCPSZ128rrk,     X86::VMINCPSZ128rmk,       0 },
+    { X86::VMINPDZ128rrk,      X86::VMINPDZ128rmk,        0 },
+    { X86::VMINPSZ128rrk,      X86::VMINPSZ128rmk,        0 },
+    { X86::VMULPDZ128rrk,      X86::VMULPDZ128rmk,        0 },
+    { X86::VMULPSZ128rrk,      X86::VMULPSZ128rmk,        0 },
+    { X86::VORPDZ128rrk,       X86::VORPDZ128rmk,         0 },
+    { X86::VORPSZ128rrk,       X86::VORPSZ128rmk,         0 },
+    { X86::VPADDBZ128rrk,      X86::VPADDBZ128rmk,        0 },
+    { X86::VPADDDZ128rrk,      X86::VPADDDZ128rmk,        0 },
+    { X86::VPADDQZ128rrk,      X86::VPADDQZ128rmk,        0 },
+    { X86::VPADDSBZ128rrk,     X86::VPADDSBZ128rmk,       0 },
+    { X86::VPADDSWZ128rrk,     X86::VPADDSWZ128rmk,       0 },
+    { X86::VPADDUSBZ128rrk,    X86::VPADDUSBZ128rmk,      0 },
+    { X86::VPADDUSWZ128rrk,    X86::VPADDUSWZ128rmk,      0 },
+    { X86::VPADDWZ128rrk,      X86::VPADDWZ128rmk,        0 },
+    { X86::VPALIGNRZ128rrik,   X86::VPALIGNRZ128rmik,     0 },
+    { X86::VPANDDZ128rrk,      X86::VPANDDZ128rmk,        0 },
+    { X86::VPANDNDZ128rrk,     X86::VPANDNDZ128rmk,       0 },
+    { X86::VPANDNQZ128rrk,     X86::VPANDNQZ128rmk,       0 },
+    { X86::VPANDQZ128rrk,      X86::VPANDQZ128rmk,        0 },
+    { X86::VPERMBZ128rrk,      X86::VPERMBZ128rmk,        0 },
+    { X86::VPERMI2B128rrk,     X86::VPERMI2B128rmk,       0 },
+    { X86::VPERMI2D128rrk,     X86::VPERMI2D128rmk,       0 },
+    { X86::VPERMI2PD128rrk,    X86::VPERMI2PD128rmk,      0 },
+    { X86::VPERMI2PS128rrk,    X86::VPERMI2PS128rmk,      0 },
+    { X86::VPERMI2Q128rrk,     X86::VPERMI2Q128rmk,       0 },
+    { X86::VPERMI2W128rrk,     X86::VPERMI2W128rmk,       0 },
+    { X86::VPERMILPDZ128rrk,   X86::VPERMILPDZ128rmk,     0 },
+    { X86::VPERMILPSZ128rrk,   X86::VPERMILPSZ128rmk,     0 },
+    { X86::VPERMT2B128rrk,     X86::VPERMT2B128rmk,       0 },
+    { X86::VPERMT2D128rrk,     X86::VPERMT2D128rmk,       0 },
+    { X86::VPERMT2PD128rrk,    X86::VPERMT2PD128rmk,      0 },
+    { X86::VPERMT2PS128rrk,    X86::VPERMT2PS128rmk,      0 },
+    { X86::VPERMT2Q128rrk,     X86::VPERMT2Q128rmk,       0 },
+    { X86::VPERMT2W128rrk,     X86::VPERMT2W128rmk,       0 },
+    { X86::VPERMWZ128rrk,      X86::VPERMWZ128rmk,        0 },
+    { X86::VPMADDUBSWZ128rrk,  X86::VPMADDUBSWZ128rmk,    0 },
+    { X86::VPMADDWDZ128rrk,    X86::VPMADDWDZ128rmk,      0 },
+    { X86::VPORDZ128rrk,       X86::VPORDZ128rmk,         0 },
+    { X86::VPORQZ128rrk,       X86::VPORQZ128rmk,         0 },
+    { X86::VPSHUFBZ128rrk,     X86::VPSHUFBZ128rmk,       0 },
+    { X86::VPSUBBZ128rrk,      X86::VPSUBBZ128rmk,        0 },
+    { X86::VPSUBDZ128rrk,      X86::VPSUBDZ128rmk,        0 },
+    { X86::VPSUBQZ128rrk,      X86::VPSUBQZ128rmk,        0 },
+    { X86::VPSUBSBZ128rrk,     X86::VPSUBSBZ128rmk,       0 },
+    { X86::VPSUBSWZ128rrk,     X86::VPSUBSWZ128rmk,       0 },
+    { X86::VPSUBUSBZ128rrk,    X86::VPSUBUSBZ128rmk,      0 },
+    { X86::VPSUBUSWZ128rrk,    X86::VPSUBUSWZ128rmk,      0 },
+    { X86::VPSUBWZ128rrk,      X86::VPSUBWZ128rmk,        0 },
+    { X86::VPTERNLOGDZ128rrik, X86::VPTERNLOGDZ128rmik,   0 },
+    { X86::VPTERNLOGQZ128rrik, X86::VPTERNLOGQZ128rmik,   0 },
+    { X86::VPUNPCKHBWZ128rrk,  X86::VPUNPCKHBWZ128rmk,    0 },
+    { X86::VPUNPCKHDQZ128rrk,  X86::VPUNPCKHDQZ128rmk,    0 },
+    { X86::VPUNPCKHQDQZ128rrk, X86::VPUNPCKHQDQZ128rmk,   0 },
+    { X86::VPUNPCKHWDZ128rrk,  X86::VPUNPCKHWDZ128rmk,    0 },
+    { X86::VPUNPCKLBWZ128rrk,  X86::VPUNPCKLBWZ128rmk,    0 },
+    { X86::VPUNPCKLDQZ128rrk,  X86::VPUNPCKLDQZ128rmk,    0 },
+    { X86::VPUNPCKLQDQZ128rrk, X86::VPUNPCKLQDQZ128rmk,   0 },
+    { X86::VPUNPCKLWDZ128rrk,  X86::VPUNPCKLWDZ128rmk,    0 },
+    { X86::VPXORDZ128rrk,      X86::VPXORDZ128rmk,        0 },
+    { X86::VPXORQZ128rrk,      X86::VPXORQZ128rmk,        0 },
+    { X86::VSUBPDZ128rrk,      X86::VSUBPDZ128rmk,        0 },
+    { X86::VSUBPSZ128rrk,      X86::VSUBPSZ128rmk,        0 },
+    { X86::VUNPCKHPDZ128rrk,   X86::VUNPCKHPDZ128rmk,     0 },
+    { X86::VUNPCKHPSZ128rrk,   X86::VUNPCKHPSZ128rmk,     0 },
+    { X86::VUNPCKLPDZ128rrk,   X86::VUNPCKLPDZ128rmk,     0 },
+    { X86::VUNPCKLPSZ128rrk,   X86::VUNPCKLPSZ128rmk,     0 },
+    { X86::VXORPDZ128rrk,      X86::VXORPDZ128rmk,        0 },
+    { X86::VXORPSZ128rrk,      X86::VXORPSZ128rmk,        0 },
+
+    // 512-bit three source instructions with zero masking.
+    { X86::VPERMI2Brrkz,       X86::VPERMI2Brmkz,         0 },
+    { X86::VPERMI2Drrkz,       X86::VPERMI2Drmkz,         0 },
+    { X86::VPERMI2PSrrkz,      X86::VPERMI2PSrmkz,        0 },
+    { X86::VPERMI2PDrrkz,      X86::VPERMI2PDrmkz,        0 },
+    { X86::VPERMI2Qrrkz,       X86::VPERMI2Qrmkz,         0 },
+    { X86::VPERMI2Wrrkz,       X86::VPERMI2Wrmkz,         0 },
+    { X86::VPERMT2Brrkz,       X86::VPERMT2Brmkz,         0 },
+    { X86::VPERMT2Drrkz,       X86::VPERMT2Drmkz,         0 },
+    { X86::VPERMT2PSrrkz,      X86::VPERMT2PSrmkz,        0 },
+    { X86::VPERMT2PDrrkz,      X86::VPERMT2PDrmkz,        0 },
+    { X86::VPERMT2Qrrkz,       X86::VPERMT2Qrmkz,         0 },
+    { X86::VPERMT2Wrrkz,       X86::VPERMT2Wrmkz,         0 },
+    { X86::VPTERNLOGDZrrikz,   X86::VPTERNLOGDZrmikz,     0 },
+    { X86::VPTERNLOGQZrrikz,   X86::VPTERNLOGQZrmikz,     0 },
+
+    // 256-bit three source instructions with zero masking.
+    { X86::VPERMI2B256rrkz,    X86::VPERMI2B256rmkz,      0 },
+    { X86::VPERMI2D256rrkz,    X86::VPERMI2D256rmkz,      0 },
+    { X86::VPERMI2PD256rrkz,   X86::VPERMI2PD256rmkz,     0 },
+    { X86::VPERMI2PS256rrkz,   X86::VPERMI2PS256rmkz,     0 },
+    { X86::VPERMI2Q256rrkz,    X86::VPERMI2Q256rmkz,      0 },
+    { X86::VPERMI2W256rrkz,    X86::VPERMI2W256rmkz,      0 },
+    { X86::VPERMT2B256rrkz,    X86::VPERMT2B256rmkz,      0 },
+    { X86::VPERMT2D256rrkz,    X86::VPERMT2D256rmkz,      0 },
+    { X86::VPERMT2PD256rrkz,   X86::VPERMT2PD256rmkz,     0 },
+    { X86::VPERMT2PS256rrkz,   X86::VPERMT2PS256rmkz,     0 },
+    { X86::VPERMT2Q256rrkz,    X86::VPERMT2Q256rmkz,      0 },
+    { X86::VPERMT2W256rrkz,    X86::VPERMT2W256rmkz,      0 },
+    { X86::VPTERNLOGDZ256rrikz,X86::VPTERNLOGDZ256rmikz,  0 },
+    { X86::VPTERNLOGQZ256rrikz,X86::VPTERNLOGQZ256rmikz,  0 },
+
+    // 128-bit three source instructions with zero masking.
+    { X86::VPERMI2B128rrkz,    X86::VPERMI2B128rmkz,      0 },
+    { X86::VPERMI2D128rrkz,    X86::VPERMI2D128rmkz,      0 },
+    { X86::VPERMI2PD128rrkz,   X86::VPERMI2PD128rmkz,     0 },
+    { X86::VPERMI2PS128rrkz,   X86::VPERMI2PS128rmkz,     0 },
+    { X86::VPERMI2Q128rrkz,    X86::VPERMI2Q128rmkz,      0 },
+    { X86::VPERMI2W128rrkz,    X86::VPERMI2W128rmkz,      0 },
+    { X86::VPERMT2B128rrkz,    X86::VPERMT2B128rmkz,      0 },
+    { X86::VPERMT2D128rrkz,    X86::VPERMT2D128rmkz,      0 },
+    { X86::VPERMT2PD128rrkz,   X86::VPERMT2PD128rmkz,     0 },
+    { X86::VPERMT2PS128rrkz,   X86::VPERMT2PS128rmkz,     0 },
+    { X86::VPERMT2Q128rrkz,    X86::VPERMT2Q128rmkz,      0 },
+    { X86::VPERMT2W128rrkz,    X86::VPERMT2W128rmkz,      0 },
+    { X86::VPTERNLOGDZ128rrikz,X86::VPTERNLOGDZ128rmikz,  0 },
+    { X86::VPTERNLOGQZ128rrikz,X86::VPTERNLOGQZ128rmikz,  0 },
   };
 
   for (X86MemoryFoldTableEntry Entry : MemoryFoldTable4) {
@@ -2057,21 +2980,35 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
                   // Index 4, folded load
                   Entry.Flags | TB_INDEX_4 | TB_FOLDED_LOAD);
   }
+  for (I = X86InstrFMA3Info::rm_begin(); I != E; ++I) {
+    if (I.getGroup()->isKMasked()) {
+      // Intrinsics need to pass TB_NO_REVERSE.
+      if (I.getGroup()->isIntrinsic()) {
+        AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD | TB_NO_REVERSE);
+      } else {
+        AddTableEntry(RegOp2MemOpTable4, MemOp2RegOpTable,
+                      I.getRegOpcode(), I.getMemOpcode(),
+                      TB_ALIGN_NONE | TB_INDEX_4 | TB_FOLDED_LOAD);
+      }
+    }
+  }
 }
 
 void
 X86InstrInfo::AddTableEntry(RegOp2MemOpTableType &R2MTable,
                             MemOp2RegOpTableType &M2RTable,
                             uint16_t RegOp, uint16_t MemOp, uint16_t Flags) {
-    if ((Flags & TB_NO_FORWARD) == 0) {
-      assert(!R2MTable.count(RegOp) && "Duplicate entry!");
-      R2MTable[RegOp] = std::make_pair(MemOp, Flags);
-    }
-    if ((Flags & TB_NO_REVERSE) == 0) {
-      assert(!M2RTable.count(MemOp) &&
-           "Duplicated entries in unfolding maps?");
-      M2RTable[MemOp] = std::make_pair(RegOp, Flags);
-    }
+  if ((Flags & TB_NO_FORWARD) == 0) {
+    assert(!R2MTable.count(RegOp) && "Duplicate entry!");
+    R2MTable[RegOp] = std::make_pair(MemOp, Flags);
+  }
+  if ((Flags & TB_NO_REVERSE) == 0) {
+    assert(!M2RTable.count(MemOp) &&
+         "Duplicated entries in unfolding maps?");
+    M2RTable[MemOp] = std::make_pair(RegOp, Flags);
+  }
 }
 
 bool
@@ -2235,9 +3172,13 @@ static bool isFrameLoadOpcode(int Opcode) {
   case X86::VMOVAPSZrm:
   case X86::VMOVAPSZ128rm:
   case X86::VMOVAPSZ256rm:
+  case X86::VMOVAPSZ128rm_NOVLX:
+  case X86::VMOVAPSZ256rm_NOVLX:
   case X86::VMOVUPSZrm:
   case X86::VMOVUPSZ128rm:
   case X86::VMOVUPSZ256rm:
+  case X86::VMOVUPSZ128rm_NOVLX:
+  case X86::VMOVUPSZ256rm_NOVLX:
   case X86::VMOVAPDZrm:
   case X86::VMOVAPDZ128rm:
   case X86::VMOVAPDZ256rm:
@@ -2305,9 +3246,13 @@ static bool isFrameStoreOpcode(int Opcode) {
   case X86::VMOVUPSZmr:
   case X86::VMOVUPSZ128mr:
   case X86::VMOVUPSZ256mr:
+  case X86::VMOVUPSZ128mr_NOVLX:
+  case X86::VMOVUPSZ256mr_NOVLX:
   case X86::VMOVAPSZmr:
   case X86::VMOVAPSZ128mr:
   case X86::VMOVAPSZ256mr:
+  case X86::VMOVAPSZ128mr_NOVLX:
+  case X86::VMOVAPSZ256mr_NOVLX:
   case X86::VMOVUPDZmr:
   case X86::VMOVUPDZ128mr:
   case X86::VMOVUPDZ256mr:
@@ -2409,6 +3354,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   switch (MI.getOpcode()) {
   default: break;
   case X86::MOV8rm:
+  case X86::MOV8rm_NOREX:
   case X86::MOV16rm:
   case X86::MOV32rm:
   case X86::MOV64rm:
@@ -2418,6 +3364,7 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case X86::MOVAPSrm:
   case X86::MOVUPSrm:
   case X86::MOVAPDrm:
+  case X86::MOVUPDrm:
   case X86::MOVDQArm:
   case X86::MOVDQUrm:
   case X86::VMOVSSrm:
@@ -2425,25 +3372,27 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
+  case X86::VMOVUPDrm:
   case X86::VMOVDQArm:
   case X86::VMOVDQUrm:
   case X86::VMOVAPSYrm:
   case X86::VMOVUPSYrm:
   case X86::VMOVAPDYrm:
+  case X86::VMOVUPDYrm:
   case X86::VMOVDQAYrm:
   case X86::VMOVDQUYrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
-  case X86::FsVMOVAPSrm:
-  case X86::FsVMOVAPDrm:
-  case X86::FsMOVAPSrm:
-  case X86::FsMOVAPDrm:
   // AVX-512
+  case X86::VMOVSSZrm:
+  case X86::VMOVSDZrm:
   case X86::VMOVAPDZ128rm:
   case X86::VMOVAPDZ256rm:
   case X86::VMOVAPDZrm:
   case X86::VMOVAPSZ128rm:
   case X86::VMOVAPSZ256rm:
+  case X86::VMOVAPSZ128rm_NOVLX:
+  case X86::VMOVAPSZ256rm_NOVLX:
   case X86::VMOVAPSZrm:
   case X86::VMOVDQA32Z128rm:
   case X86::VMOVDQA32Z256rm:
@@ -2463,15 +3412,20 @@ bool X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
   case X86::VMOVDQU8Z128rm:
   case X86::VMOVDQU8Z256rm:
   case X86::VMOVDQU8Zrm:
+  case X86::VMOVUPDZ128rm:
+  case X86::VMOVUPDZ256rm:
+  case X86::VMOVUPDZrm:
   case X86::VMOVUPSZ128rm:
   case X86::VMOVUPSZ256rm:
+  case X86::VMOVUPSZ128rm_NOVLX:
+  case X86::VMOVUPSZ256rm_NOVLX:
   case X86::VMOVUPSZrm: {
     // Loads from constant pools are trivially rematerializable.
     if (MI.getOperand(1 + X86::AddrBaseReg).isReg() &&
         MI.getOperand(1 + X86::AddrScaleAmt).isImm() &&
         MI.getOperand(1 + X86::AddrIndexReg).isReg() &&
         MI.getOperand(1 + X86::AddrIndexReg).getReg() == 0 &&
-        MI.isInvariantLoad(AA)) {
+        MI.isDereferenceableInvariantLoad(AA)) {
       unsigned BaseReg = MI.getOperand(1 + X86::AddrBaseReg).getReg();
       if (BaseReg == 0 || BaseReg == X86::RIP)
         return true;
@@ -2694,24 +3648,8 @@ bool X86InstrInfo::classifyLEAReg(MachineInstr &MI, const MachineOperand &Src,
     ImplicitOp.setImplicit();
 
     NewSrc = getX86SubSuperRegister(Src.getReg(), 64);
-    MachineBasicBlock::LivenessQueryResult LQR =
-        MI.getParent()->computeRegisterLiveness(&getRegisterInfo(), NewSrc, MI);
-
-    switch (LQR) {
-    case MachineBasicBlock::LQR_Unknown:
-      // We can't give sane liveness flags to the instruction, abandon LEA
-      // formation.
-      return false;
-    case MachineBasicBlock::LQR_Live:
-      isKill = MI.killsRegister(SrcReg);
-      isUndef = false;
-      break;
-    default:
-      // The physreg itself is dead, so we have to use it as an <undef>.
-      isKill = false;
-      isUndef = true;
-      break;
-    }
+    isKill = Src.isKill();
+    isUndef = Src.isUndef();
   } else {
     // Virtual register of the wrong class, we have to create a temporary 64-bit
     // vreg to feed into the LEA.
@@ -3079,7 +4017,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA64r))
                           .addOperand(Dest)
                           .addOperand(Src),
-                      MI.getOperand(2).getImm());
+                      MI.getOperand(2));
     break;
   case X86::ADD32ri:
   case X86::ADD32ri8:
@@ -3102,7 +4040,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     if (ImplicitOp.getReg() != 0)
       MIB.addOperand(ImplicitOp);
 
-    NewMI = addOffset(MIB, MI.getOperand(2).getImm());
+    NewMI = addOffset(MIB, MI.getOperand(2));
     break;
   }
   case X86::ADD16ri:
@@ -3116,7 +4054,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
     NewMI = addOffset(BuildMI(MF, MI.getDebugLoc(), get(X86::LEA16r))
                           .addOperand(Dest)
                           .addOperand(Src),
-                      MI.getOperand(2).getImm());
+                      MI.getOperand(2));
     break;
   }
 
@@ -3133,156 +4071,236 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
   return NewMI;
 }
 
-/// Returns true if the given instruction opcode is FMA3.
-/// Otherwise, returns false.
-/// The second parameter is optional and is used as the second return from
-/// the function. It is set to true if the given instruction has FMA3 opcode
-/// that is used for lowering of scalar FMA intrinsics, and it is set to false
-/// otherwise.
-static bool isFMA3(unsigned Opcode, bool *IsIntrinsic = nullptr) {
-  if (IsIntrinsic)
-    *IsIntrinsic = false;
+/// This determines which of three possible cases of a three source commute
+/// the source indexes correspond to taking into account any mask operands.
+/// All prevents commuting a passthru operand. Returns -1 if the commute isn't
+/// possible.
+/// Case 0 - Possible to commute the first and second operands.
+/// Case 1 - Possible to commute the first and third operands.
+/// Case 2 - Possible to commute the second and third operands.
+static int getThreeSrcCommuteCase(uint64_t TSFlags, unsigned SrcOpIdx1,
+                                  unsigned SrcOpIdx2) {
+  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
+  if (SrcOpIdx1 > SrcOpIdx2)
+    std::swap(SrcOpIdx1, SrcOpIdx2);
 
-  switch (Opcode) {
-    case X86::VFMADDSDr132r:      case X86::VFMADDSDr132m:
-    case X86::VFMADDSSr132r:      case X86::VFMADDSSr132m:
-    case X86::VFMSUBSDr132r:      case X86::VFMSUBSDr132m:
-    case X86::VFMSUBSSr132r:      case X86::VFMSUBSSr132m:
-    case X86::VFNMADDSDr132r:     case X86::VFNMADDSDr132m:
-    case X86::VFNMADDSSr132r:     case X86::VFNMADDSSr132m:
-    case X86::VFNMSUBSDr132r:     case X86::VFNMSUBSDr132m:
-    case X86::VFNMSUBSSr132r:     case X86::VFNMSUBSSr132m:
-
-    case X86::VFMADDSDr213r:      case X86::VFMADDSDr213m:
-    case X86::VFMADDSSr213r:      case X86::VFMADDSSr213m:
-    case X86::VFMSUBSDr213r:      case X86::VFMSUBSDr213m:
-    case X86::VFMSUBSSr213r:      case X86::VFMSUBSSr213m:
-    case X86::VFNMADDSDr213r:     case X86::VFNMADDSDr213m:
-    case X86::VFNMADDSSr213r:     case X86::VFNMADDSSr213m:
-    case X86::VFNMSUBSDr213r:     case X86::VFNMSUBSDr213m:
-    case X86::VFNMSUBSSr213r:     case X86::VFNMSUBSSr213m:
-
-    case X86::VFMADDSDr231r:      case X86::VFMADDSDr231m:
-    case X86::VFMADDSSr231r:      case X86::VFMADDSSr231m:
-    case X86::VFMSUBSDr231r:      case X86::VFMSUBSDr231m:
-    case X86::VFMSUBSSr231r:      case X86::VFMSUBSSr231m:
-    case X86::VFNMADDSDr231r:     case X86::VFNMADDSDr231m:
-    case X86::VFNMADDSSr231r:     case X86::VFNMADDSSr231m:
-    case X86::VFNMSUBSDr231r:     case X86::VFNMSUBSDr231m:
-    case X86::VFNMSUBSSr231r:     case X86::VFNMSUBSSr231m:
-
-    case X86::VFMADDSUBPDr132r:   case X86::VFMADDSUBPDr132m:
-    case X86::VFMADDSUBPSr132r:   case X86::VFMADDSUBPSr132m:
-    case X86::VFMSUBADDPDr132r:   case X86::VFMSUBADDPDr132m:
-    case X86::VFMSUBADDPSr132r:   case X86::VFMSUBADDPSr132m:
-    case X86::VFMADDSUBPDr132rY:  case X86::VFMADDSUBPDr132mY:
-    case X86::VFMADDSUBPSr132rY:  case X86::VFMADDSUBPSr132mY:
-    case X86::VFMSUBADDPDr132rY:  case X86::VFMSUBADDPDr132mY:
-    case X86::VFMSUBADDPSr132rY:  case X86::VFMSUBADDPSr132mY:
-
-    case X86::VFMADDPDr132r:      case X86::VFMADDPDr132m:
-    case X86::VFMADDPSr132r:      case X86::VFMADDPSr132m:
-    case X86::VFMSUBPDr132r:      case X86::VFMSUBPDr132m:
-    case X86::VFMSUBPSr132r:      case X86::VFMSUBPSr132m:
-    case X86::VFNMADDPDr132r:     case X86::VFNMADDPDr132m:
-    case X86::VFNMADDPSr132r:     case X86::VFNMADDPSr132m:
-    case X86::VFNMSUBPDr132r:     case X86::VFNMSUBPDr132m:
-    case X86::VFNMSUBPSr132r:     case X86::VFNMSUBPSr132m:
-    case X86::VFMADDPDr132rY:     case X86::VFMADDPDr132mY:
-    case X86::VFMADDPSr132rY:     case X86::VFMADDPSr132mY:
-    case X86::VFMSUBPDr132rY:     case X86::VFMSUBPDr132mY:
-    case X86::VFMSUBPSr132rY:     case X86::VFMSUBPSr132mY:
-    case X86::VFNMADDPDr132rY:    case X86::VFNMADDPDr132mY:
-    case X86::VFNMADDPSr132rY:    case X86::VFNMADDPSr132mY:
-    case X86::VFNMSUBPDr132rY:    case X86::VFNMSUBPDr132mY:
-    case X86::VFNMSUBPSr132rY:    case X86::VFNMSUBPSr132mY:
-
-    case X86::VFMADDSUBPDr213r:   case X86::VFMADDSUBPDr213m:
-    case X86::VFMADDSUBPSr213r:   case X86::VFMADDSUBPSr213m:
-    case X86::VFMSUBADDPDr213r:   case X86::VFMSUBADDPDr213m:
-    case X86::VFMSUBADDPSr213r:   case X86::VFMSUBADDPSr213m:
-    case X86::VFMADDSUBPDr213rY:  case X86::VFMADDSUBPDr213mY:
-    case X86::VFMADDSUBPSr213rY:  case X86::VFMADDSUBPSr213mY:
-    case X86::VFMSUBADDPDr213rY:  case X86::VFMSUBADDPDr213mY:
-    case X86::VFMSUBADDPSr213rY:  case X86::VFMSUBADDPSr213mY:
-
-    case X86::VFMADDPDr213r:      case X86::VFMADDPDr213m:
-    case X86::VFMADDPSr213r:      case X86::VFMADDPSr213m:
-    case X86::VFMSUBPDr213r:      case X86::VFMSUBPDr213m:
-    case X86::VFMSUBPSr213r:      case X86::VFMSUBPSr213m:
-    case X86::VFNMADDPDr213r:     case X86::VFNMADDPDr213m:
-    case X86::VFNMADDPSr213r:     case X86::VFNMADDPSr213m:
-    case X86::VFNMSUBPDr213r:     case X86::VFNMSUBPDr213m:
-    case X86::VFNMSUBPSr213r:     case X86::VFNMSUBPSr213m:
-    case X86::VFMADDPDr213rY:     case X86::VFMADDPDr213mY:
-    case X86::VFMADDPSr213rY:     case X86::VFMADDPSr213mY:
-    case X86::VFMSUBPDr213rY:     case X86::VFMSUBPDr213mY:
-    case X86::VFMSUBPSr213rY:     case X86::VFMSUBPSr213mY:
-    case X86::VFNMADDPDr213rY:    case X86::VFNMADDPDr213mY:
-    case X86::VFNMADDPSr213rY:    case X86::VFNMADDPSr213mY:
-    case X86::VFNMSUBPDr213rY:    case X86::VFNMSUBPDr213mY:
-    case X86::VFNMSUBPSr213rY:    case X86::VFNMSUBPSr213mY:
-
-    case X86::VFMADDSUBPDr231r:   case X86::VFMADDSUBPDr231m:
-    case X86::VFMADDSUBPSr231r:   case X86::VFMADDSUBPSr231m:
-    case X86::VFMSUBADDPDr231r:   case X86::VFMSUBADDPDr231m:
-    case X86::VFMSUBADDPSr231r:   case X86::VFMSUBADDPSr231m:
-    case X86::VFMADDSUBPDr231rY:  case X86::VFMADDSUBPDr231mY:
-    case X86::VFMADDSUBPSr231rY:  case X86::VFMADDSUBPSr231mY:
-    case X86::VFMSUBADDPDr231rY:  case X86::VFMSUBADDPDr231mY:
-    case X86::VFMSUBADDPSr231rY:  case X86::VFMSUBADDPSr231mY:
-
-    case X86::VFMADDPDr231r:      case X86::VFMADDPDr231m:
-    case X86::VFMADDPSr231r:      case X86::VFMADDPSr231m:
-    case X86::VFMSUBPDr231r:      case X86::VFMSUBPDr231m:
-    case X86::VFMSUBPSr231r:      case X86::VFMSUBPSr231m:
-    case X86::VFNMADDPDr231r:     case X86::VFNMADDPDr231m:
-    case X86::VFNMADDPSr231r:     case X86::VFNMADDPSr231m:
-    case X86::VFNMSUBPDr231r:     case X86::VFNMSUBPDr231m:
-    case X86::VFNMSUBPSr231r:     case X86::VFNMSUBPSr231m:
-    case X86::VFMADDPDr231rY:     case X86::VFMADDPDr231mY:
-    case X86::VFMADDPSr231rY:     case X86::VFMADDPSr231mY:
-    case X86::VFMSUBPDr231rY:     case X86::VFMSUBPDr231mY:
-    case X86::VFMSUBPSr231rY:     case X86::VFMSUBPSr231mY:
-    case X86::VFNMADDPDr231rY:    case X86::VFNMADDPDr231mY:
-    case X86::VFNMADDPSr231rY:    case X86::VFNMADDPSr231mY:
-    case X86::VFNMSUBPDr231rY:    case X86::VFNMSUBPDr231mY:
-    case X86::VFNMSUBPSr231rY:    case X86::VFNMSUBPSr231mY:
-      return true;
+  unsigned Op1 = 1, Op2 = 2, Op3 = 3;
+  if (X86II::isKMasked(TSFlags)) {
+    // The k-mask operand cannot be commuted.
+    if (SrcOpIdx1 == 2)
+      return -1;
+
+    // For k-zero-masked operations it is Ok to commute the first vector
+    // operand.
+    // For regular k-masked operations a conservative choice is done as the
+    // elements of the first vector operand, for which the corresponding bit
+    // in the k-mask operand is set to 0, are copied to the result of the
+    // instruction.
+    // TODO/FIXME: The commute still may be legal if it is known that the
+    // k-mask operand is set to either all ones or all zeroes.
+    // It is also Ok to commute the 1st operand if all users of MI use only
+    // the elements enabled by the k-mask operand. For example,
+    //   v4 = VFMADD213PSZrk v1, k, v2, v3; // v1[i] = k[i] ? v2[i]*v1[i]+v3[i]
+    //                                                     : v1[i];
+    //   VMOVAPSZmrk <mem_addr>, k, v4; // this is the ONLY user of v4 ->
+    //                                  // Ok, to commute v1 in FMADD213PSZrk.
+    if (X86II::isKMergeMasked(TSFlags) && SrcOpIdx1 == Op1)
+      return -1;
+    Op2++;
+    Op3++;
+  }
+
+  if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op2)
+    return 0;
+  if (SrcOpIdx1 == Op1 && SrcOpIdx2 == Op3)
+    return 1;
+  if (SrcOpIdx1 == Op2 && SrcOpIdx2 == Op3)
+    return 2;
+  return -1;
+}
 
-    case X86::VFMADDSDr132r_Int:  case X86::VFMADDSDr132m_Int:
-    case X86::VFMADDSSr132r_Int:  case X86::VFMADDSSr132m_Int:
-    case X86::VFMSUBSDr132r_Int:  case X86::VFMSUBSDr132m_Int:
-    case X86::VFMSUBSSr132r_Int:  case X86::VFMSUBSSr132m_Int:
-    case X86::VFNMADDSDr132r_Int: case X86::VFNMADDSDr132m_Int:
-    case X86::VFNMADDSSr132r_Int: case X86::VFNMADDSSr132m_Int:
-    case X86::VFNMSUBSDr132r_Int: case X86::VFNMSUBSDr132m_Int:
-    case X86::VFNMSUBSSr132r_Int: case X86::VFNMSUBSSr132m_Int:
-
-    case X86::VFMADDSDr213r_Int:  case X86::VFMADDSDr213m_Int:
-    case X86::VFMADDSSr213r_Int:  case X86::VFMADDSSr213m_Int:
-    case X86::VFMSUBSDr213r_Int:  case X86::VFMSUBSDr213m_Int:
-    case X86::VFMSUBSSr213r_Int:  case X86::VFMSUBSSr213m_Int:
-    case X86::VFNMADDSDr213r_Int: case X86::VFNMADDSDr213m_Int:
-    case X86::VFNMADDSSr213r_Int: case X86::VFNMADDSSr213m_Int:
-    case X86::VFNMSUBSDr213r_Int: case X86::VFNMSUBSDr213m_Int:
-    case X86::VFNMSUBSSr213r_Int: case X86::VFNMSUBSSr213m_Int:
-
-    case X86::VFMADDSDr231r_Int:  case X86::VFMADDSDr231m_Int:
-    case X86::VFMADDSSr231r_Int:  case X86::VFMADDSSr231m_Int:
-    case X86::VFMSUBSDr231r_Int:  case X86::VFMSUBSDr231m_Int:
-    case X86::VFMSUBSSr231r_Int:  case X86::VFMSUBSSr231m_Int:
-    case X86::VFNMADDSDr231r_Int: case X86::VFNMADDSDr231m_Int:
-    case X86::VFNMADDSSr231r_Int: case X86::VFNMADDSSr231m_Int:
-    case X86::VFNMSUBSDr231r_Int: case X86::VFNMSUBSDr231m_Int:
-    case X86::VFNMSUBSSr231r_Int: case X86::VFNMSUBSSr231m_Int:
-      if (IsIntrinsic)
-        *IsIntrinsic = true;
-      return true;
-    default:
-      return false;
+unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
+    const MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2,
+    const X86InstrFMA3Group &FMA3Group) const {
+
+  unsigned Opc = MI.getOpcode();
+
+  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
+  if (SrcOpIdx1 > SrcOpIdx2)
+    std::swap(SrcOpIdx1, SrcOpIdx2);
+
+  // TODO: Commuting the 1st operand of FMA*_Int requires some additional
+  // analysis. The commute optimization is legal only if all users of FMA*_Int
+  // use only the lowest element of the FMA*_Int instruction. Such analysis are
+  // not implemented yet. So, just return 0 in that case.
+  // When such analysis are available this place will be the right place for
+  // calling it.
+  if (FMA3Group.isIntrinsic() && SrcOpIdx1 == 1)
+    return 0;
+
+  // Determine which case this commute is or if it can't be done.
+  int Case = getThreeSrcCommuteCase(MI.getDesc().TSFlags, SrcOpIdx1, SrcOpIdx2);
+  if (Case < 0)
+    return 0;
+
+  // Define the FMA forms mapping array that helps to map input FMA form
+  // to output FMA form to preserve the operation semantics after
+  // commuting the operands.
+  const unsigned Form132Index = 0;
+  const unsigned Form213Index = 1;
+  const unsigned Form231Index = 2;
+  static const unsigned FormMapping[][3] = {
+    // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
+    // FMA132 A, C, b; ==> FMA231 C, A, b;
+    // FMA213 B, A, c; ==> FMA213 A, B, c;
+    // FMA231 C, A, b; ==> FMA132 A, C, b;
+    { Form231Index, Form213Index, Form132Index },
+    // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
+    // FMA132 A, c, B; ==> FMA132 B, c, A;
+    // FMA213 B, a, C; ==> FMA231 C, a, B;
+    // FMA231 C, a, B; ==> FMA213 B, a, C;
+    { Form132Index, Form231Index, Form213Index },
+    // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
+    // FMA132 a, C, B; ==> FMA213 a, B, C;
+    // FMA213 b, A, C; ==> FMA132 b, C, A;
+    // FMA231 c, A, B; ==> FMA231 c, B, A;
+    { Form213Index, Form132Index, Form231Index }
+  };
+
+  unsigned FMAForms[3];
+  if (FMA3Group.isRegOpcodeFromGroup(Opc)) {
+    FMAForms[0] = FMA3Group.getReg132Opcode();
+    FMAForms[1] = FMA3Group.getReg213Opcode();
+    FMAForms[2] = FMA3Group.getReg231Opcode();
+  } else {
+    FMAForms[0] = FMA3Group.getMem132Opcode();
+    FMAForms[1] = FMA3Group.getMem213Opcode();
+    FMAForms[2] = FMA3Group.getMem231Opcode();
+  }
+  unsigned FormIndex;
+  for (FormIndex = 0; FormIndex < 3; FormIndex++)
+    if (Opc == FMAForms[FormIndex])
+      break;
+
+  // Everything is ready, just adjust the FMA opcode and return it.
+  FormIndex = FormMapping[Case][FormIndex];
+  return FMAForms[FormIndex];
+}
+
+static bool commuteVPTERNLOG(MachineInstr &MI, unsigned SrcOpIdx1,
+                             unsigned SrcOpIdx2) {
+  uint64_t TSFlags = MI.getDesc().TSFlags;
+
+  // Determine which case this commute is or if it can't be done.
+  int Case = getThreeSrcCommuteCase(TSFlags, SrcOpIdx1, SrcOpIdx2);
+  if (Case < 0)
+    return false;
+
+  // For each case we need to swap two pairs of bits in the final immediate.
+  static const uint8_t SwapMasks[3][4] = {
+    { 0x04, 0x10, 0x08, 0x20 }, // Swap bits 2/4 and 3/5.
+    { 0x02, 0x10, 0x08, 0x40 }, // Swap bits 1/4 and 3/6.
+    { 0x02, 0x04, 0x20, 0x40 }, // Swap bits 1/2 and 5/6.
+  };
+
+  uint8_t Imm = MI.getOperand(MI.getNumOperands()-1).getImm();
+  // Clear out the bits we are swapping.
+  uint8_t NewImm = Imm & ~(SwapMasks[Case][0] | SwapMasks[Case][1] |
+                           SwapMasks[Case][2] | SwapMasks[Case][3]);
+  // If the immediate had a bit of the pair set, then set the opposite bit.
+  if (Imm & SwapMasks[Case][0]) NewImm |= SwapMasks[Case][1];
+  if (Imm & SwapMasks[Case][1]) NewImm |= SwapMasks[Case][0];
+  if (Imm & SwapMasks[Case][2]) NewImm |= SwapMasks[Case][3];
+  if (Imm & SwapMasks[Case][3]) NewImm |= SwapMasks[Case][2];
+  MI.getOperand(MI.getNumOperands()-1).setImm(NewImm);
+
+  return true;
+}
+
+// Returns true if this is a VPERMI2 or VPERMT2 instrution that can be
+// commuted.
+static bool isCommutableVPERMV3Instruction(unsigned Opcode) {
+#define VPERM_CASES(Suffix) \
+  case X86::VPERMI2##Suffix##128rr:    case X86::VPERMT2##Suffix##128rr:    \
+  case X86::VPERMI2##Suffix##256rr:    case X86::VPERMT2##Suffix##256rr:    \
+  case X86::VPERMI2##Suffix##rr:       case X86::VPERMT2##Suffix##rr:       \
+  case X86::VPERMI2##Suffix##128rm:    case X86::VPERMT2##Suffix##128rm:    \
+  case X86::VPERMI2##Suffix##256rm:    case X86::VPERMT2##Suffix##256rm:    \
+  case X86::VPERMI2##Suffix##rm:       case X86::VPERMT2##Suffix##rm:       \
+  case X86::VPERMI2##Suffix##128rrkz:  case X86::VPERMT2##Suffix##128rrkz:  \
+  case X86::VPERMI2##Suffix##256rrkz:  case X86::VPERMT2##Suffix##256rrkz:  \
+  case X86::VPERMI2##Suffix##rrkz:     case X86::VPERMT2##Suffix##rrkz:     \
+  case X86::VPERMI2##Suffix##128rmkz:  case X86::VPERMT2##Suffix##128rmkz:  \
+  case X86::VPERMI2##Suffix##256rmkz:  case X86::VPERMT2##Suffix##256rmkz:  \
+  case X86::VPERMI2##Suffix##rmkz:     case X86::VPERMT2##Suffix##rmkz:
+
+#define VPERM_CASES_BROADCAST(Suffix) \
+  VPERM_CASES(Suffix) \
+  case X86::VPERMI2##Suffix##128rmb:   case X86::VPERMT2##Suffix##128rmb:   \
+  case X86::VPERMI2##Suffix##256rmb:   case X86::VPERMT2##Suffix##256rmb:   \
+  case X86::VPERMI2##Suffix##rmb:      case X86::VPERMT2##Suffix##rmb:      \
+  case X86::VPERMI2##Suffix##128rmbkz: case X86::VPERMT2##Suffix##128rmbkz: \
+  case X86::VPERMI2##Suffix##256rmbkz: case X86::VPERMT2##Suffix##256rmbkz: \
+  case X86::VPERMI2##Suffix##rmbkz:    case X86::VPERMT2##Suffix##rmbkz:
+
+  switch (Opcode) {
+  default: return false;
+  VPERM_CASES(B)
+  VPERM_CASES_BROADCAST(D)
+  VPERM_CASES_BROADCAST(PD)
+  VPERM_CASES_BROADCAST(PS)
+  VPERM_CASES_BROADCAST(Q)
+  VPERM_CASES(W)
+    return true;
   }
-  llvm_unreachable("Opcode not handled by the switch");
+#undef VPERM_CASES_BROADCAST
+#undef VPERM_CASES
+}
+
+// Returns commuted opcode for VPERMI2 and VPERMT2 instructions by switching
+// from the I opcod to the T opcode and vice versa.
+static unsigned getCommutedVPERMV3Opcode(unsigned Opcode) {
+#define VPERM_CASES(Orig, New) \
+  case X86::Orig##128rr:    return X86::New##128rr;   \
+  case X86::Orig##128rrkz:  return X86::New##128rrkz; \
+  case X86::Orig##128rm:    return X86::New##128rm;   \
+  case X86::Orig##128rmkz:  return X86::New##128rmkz; \
+  case X86::Orig##256rr:    return X86::New##256rr;   \
+  case X86::Orig##256rrkz:  return X86::New##256rrkz; \
+  case X86::Orig##256rm:    return X86::New##256rm;   \
+  case X86::Orig##256rmkz:  return X86::New##256rmkz; \
+  case X86::Orig##rr:       return X86::New##rr;      \
+  case X86::Orig##rrkz:     return X86::New##rrkz;    \
+  case X86::Orig##rm:       return X86::New##rm;      \
+  case X86::Orig##rmkz:     return X86::New##rmkz;
+
+#define VPERM_CASES_BROADCAST(Orig, New) \
+  VPERM_CASES(Orig, New) \
+  case X86::Orig##128rmb:   return X86::New##128rmb;   \
+  case X86::Orig##128rmbkz: return X86::New##128rmbkz; \
+  case X86::Orig##256rmb:   return X86::New##256rmb;   \
+  case X86::Orig##256rmbkz: return X86::New##256rmbkz; \
+  case X86::Orig##rmb:      return X86::New##rmb;      \
+  case X86::Orig##rmbkz:    return X86::New##rmbkz;
+
+  switch (Opcode) {
+  VPERM_CASES(VPERMI2B, VPERMT2B)
+  VPERM_CASES_BROADCAST(VPERMI2D,  VPERMT2D)
+  VPERM_CASES_BROADCAST(VPERMI2PD, VPERMT2PD)
+  VPERM_CASES_BROADCAST(VPERMI2PS, VPERMT2PS)
+  VPERM_CASES_BROADCAST(VPERMI2Q,  VPERMT2Q)
+  VPERM_CASES(VPERMI2W, VPERMT2W)
+  VPERM_CASES(VPERMT2B, VPERMI2B)
+  VPERM_CASES_BROADCAST(VPERMT2D,  VPERMI2D)
+  VPERM_CASES_BROADCAST(VPERMT2PD, VPERMI2PD)
+  VPERM_CASES_BROADCAST(VPERMT2PS, VPERMI2PS)
+  VPERM_CASES_BROADCAST(VPERMT2Q,  VPERMI2Q)
+  VPERM_CASES(VPERMT2W, VPERMI2W)
+  }
+
+  llvm_unreachable("Unreachable!");
+#undef VPERM_CASES_BROADCAST
+#undef VPERM_CASES
 }
 
 MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
@@ -3352,6 +4370,39 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
+  case X86::MOVSDrr:
+  case X86::MOVSSrr:
+  case X86::VMOVSDrr:
+  case X86::VMOVSSrr:{
+    // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
+    if (!Subtarget.hasSSE41())
+      return nullptr;
+
+    unsigned Mask, Opc;
+    switch (MI.getOpcode()) {
+    default: llvm_unreachable("Unreachable!");
+    case X86::MOVSDrr:  Opc = X86::BLENDPDrri;  Mask = 0x02; break;
+    case X86::MOVSSrr:  Opc = X86::BLENDPSrri;  Mask = 0x0E; break;
+    case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
+    case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+    }
+
+    // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy
+    // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS.
+    auto &MRI = MI.getParent()->getParent()->getRegInfo();
+    auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg());
+    unsigned VR128 = MRI.createVirtualRegister(VR128RC);
+    BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY),
+            VR128)
+        .addReg(MI.getOperand(2).getReg());
+
+    auto &WorkingMI = cloneIfNew(MI);
+    WorkingMI.setDesc(get(Opc));
+    WorkingMI.getOperand(2).setReg(VR128);
+    WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                   OpIdx1, OpIdx2);
+  }
   case X86::PCLMULQDQrr:
   case X86::VPCLMULQDQrr:{
     // SRC1 64bits = Imm[0] ? SRC1[127:64] : SRC1[63:0]
@@ -3364,12 +4415,24 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
+  case X86::CMPSDrr:
+  case X86::CMPSSrr:
   case X86::CMPPDrri:
   case X86::CMPPSrri:
+  case X86::VCMPSDrr:
+  case X86::VCMPSSrr:
   case X86::VCMPPDrri:
   case X86::VCMPPSrri:
   case X86::VCMPPDYrri:
-  case X86::VCMPPSYrri: {
+  case X86::VCMPPSYrri:
+  case X86::VCMPSDZrr:
+  case X86::VCMPSSZrr:
+  case X86::VCMPPDZrri:
+  case X86::VCMPPSZrri:
+  case X86::VCMPPDZ128rri:
+  case X86::VCMPPSZ128rri:
+  case X86::VCMPPDZ256rri:
+  case X86::VCMPPSZ256rri: {
     // Float comparison can be safely commuted for
     // Ordered/Unordered/Equal/NotEqual tests
     unsigned Imm = MI.getOperand(3).getImm() & 0x7;
@@ -3383,6 +4446,37 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
       return nullptr;
     }
   }
+  case X86::VPCMPBZ128rri: case X86::VPCMPUBZ128rri:
+  case X86::VPCMPBZ256rri: case X86::VPCMPUBZ256rri:
+  case X86::VPCMPBZrri:    case X86::VPCMPUBZrri:
+  case X86::VPCMPDZ128rri: case X86::VPCMPUDZ128rri:
+  case X86::VPCMPDZ256rri: case X86::VPCMPUDZ256rri:
+  case X86::VPCMPDZrri:    case X86::VPCMPUDZrri:
+  case X86::VPCMPQZ128rri: case X86::VPCMPUQZ128rri:
+  case X86::VPCMPQZ256rri: case X86::VPCMPUQZ256rri:
+  case X86::VPCMPQZrri:    case X86::VPCMPUQZrri:
+  case X86::VPCMPWZ128rri: case X86::VPCMPUWZ128rri:
+  case X86::VPCMPWZ256rri: case X86::VPCMPUWZ256rri:
+  case X86::VPCMPWZrri:    case X86::VPCMPUWZrri: {
+    // Flip comparison mode immediate (if necessary).
+    unsigned Imm = MI.getOperand(3).getImm() & 0x7;
+    switch (Imm) {
+    default: llvm_unreachable("Unreachable!");
+    case 0x01: Imm = 0x06; break; // LT  -> NLE
+    case 0x02: Imm = 0x05; break; // LE  -> NLT
+    case 0x05: Imm = 0x02; break; // NLT -> LE
+    case 0x06: Imm = 0x01; break; // NLE -> LT
+    case 0x00: // EQ
+    case 0x03: // FALSE
+    case 0x04: // NE
+    case 0x07: // TRUE
+      break;
+    }
+    auto &WorkingMI = cloneIfNew(MI);
+    WorkingMI.getOperand(3).setImm(Imm);
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                   OpIdx1, OpIdx2);
+  }
   case X86::VPCOMBri: case X86::VPCOMUBri:
   case X86::VPCOMDri: case X86::VPCOMUDri:
   case X86::VPCOMQri: case X86::VPCOMUQri:
@@ -3390,6 +4484,7 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     // Flip comparison mode immediate (if necessary).
     unsigned Imm = MI.getOperand(3).getImm() & 0x7;
     switch (Imm) {
+    default: llvm_unreachable("Unreachable!");
     case 0x00: Imm = 0x02; break; // LT -> GT
     case 0x01: Imm = 0x03; break; // LE -> GE
     case 0x02: Imm = 0x00; break; // GT -> LT
@@ -3398,7 +4493,6 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     case 0x05: // NE
     case 0x06: // FALSE
     case 0x07: // TRUE
-    default:
       break;
     }
     auto &WorkingMI = cloneIfNew(MI);
@@ -3417,6 +4511,22 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
+  case X86::MOVHLPSrr:
+  case X86::UNPCKHPDrr: {
+    if (!Subtarget.hasSSE2())
+      return nullptr;
+
+    unsigned Opc = MI.getOpcode();
+    switch (Opc) {
+      default: llvm_unreachable("Unreachable!");
+      case X86::MOVHLPSrr: Opc = X86::UNPCKHPDrr; break;
+      case X86::UNPCKHPDrr: Opc = X86::MOVHLPSrr; break;
+    }
+    auto &WorkingMI = cloneIfNew(MI);
+    WorkingMI.setDesc(get(Opc));
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                   OpIdx1, OpIdx2);
+  }
   case X86::CMOVB16rr:  case X86::CMOVB32rr:  case X86::CMOVB64rr:
   case X86::CMOVAE16rr: case X86::CMOVAE32rr: case X86::CMOVAE64rr:
   case X86::CMOVE16rr:  case X86::CMOVE32rr:  case X86::CMOVE64rr:
@@ -3490,9 +4600,44 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
     return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
                                                    OpIdx1, OpIdx2);
   }
-  default:
-    if (isFMA3(MI.getOpcode())) {
-      unsigned Opc = getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2);
+  case X86::VPTERNLOGDZrri:      case X86::VPTERNLOGDZrmi:
+  case X86::VPTERNLOGDZ128rri:   case X86::VPTERNLOGDZ128rmi:
+  case X86::VPTERNLOGDZ256rri:   case X86::VPTERNLOGDZ256rmi:
+  case X86::VPTERNLOGQZrri:      case X86::VPTERNLOGQZrmi:
+  case X86::VPTERNLOGQZ128rri:   case X86::VPTERNLOGQZ128rmi:
+  case X86::VPTERNLOGQZ256rri:   case X86::VPTERNLOGQZ256rmi:
+  case X86::VPTERNLOGDZrrik:     case X86::VPTERNLOGDZrmik:
+  case X86::VPTERNLOGDZ128rrik:  case X86::VPTERNLOGDZ128rmik:
+  case X86::VPTERNLOGDZ256rrik:  case X86::VPTERNLOGDZ256rmik:
+  case X86::VPTERNLOGQZrrik:     case X86::VPTERNLOGQZrmik:
+  case X86::VPTERNLOGQZ128rrik:  case X86::VPTERNLOGQZ128rmik:
+  case X86::VPTERNLOGQZ256rrik:  case X86::VPTERNLOGQZ256rmik:
+  case X86::VPTERNLOGDZrrikz:    case X86::VPTERNLOGDZrmikz:
+  case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
+  case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
+  case X86::VPTERNLOGQZrrikz:    case X86::VPTERNLOGQZrmikz:
+  case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
+  case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz: {
+    auto &WorkingMI = cloneIfNew(MI);
+    if (!commuteVPTERNLOG(WorkingMI, OpIdx1, OpIdx2))
+      return nullptr;
+    return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                   OpIdx1, OpIdx2);
+  }
+  default: {
+    if (isCommutableVPERMV3Instruction(MI.getOpcode())) {
+      unsigned Opc = getCommutedVPERMV3Opcode(MI.getOpcode());
+      auto &WorkingMI = cloneIfNew(MI);
+      WorkingMI.setDesc(get(Opc));
+      return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+                                                     OpIdx1, OpIdx2);
+    }
+
+    const X86InstrFMA3Group *FMA3Group =
+        X86InstrFMA3Info::getFMA3Group(MI.getOpcode());
+    if (FMA3Group) {
+      unsigned Opc =
+        getFMA3OpcodeToCommuteOperands(MI, OpIdx1, OpIdx2, *FMA3Group);
       if (Opc == 0)
         return nullptr;
       auto &WorkingMI = cloneIfNew(MI);
@@ -3503,22 +4648,54 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
 
     return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);
   }
+  }
 }
 
-bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
-                                             unsigned &SrcOpIdx1,
-                                             unsigned &SrcOpIdx2) const {
+bool X86InstrInfo::findFMA3CommutedOpIndices(
+    const MachineInstr &MI, unsigned &SrcOpIdx1, unsigned &SrcOpIdx2,
+    const X86InstrFMA3Group &FMA3Group) const {
 
-  unsigned RegOpsNum = isMem(MI, 3) ? 2 : 3;
+  if (!findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2))
+    return false;
+
+  // Check if we can adjust the opcode to preserve the semantics when
+  // commute the register operands.
+  return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2, FMA3Group) != 0;
+}
+
+bool X86InstrInfo::findThreeSrcCommutedOpIndices(const MachineInstr &MI,
+                                                 unsigned &SrcOpIdx1,
+                                                 unsigned &SrcOpIdx2) const {
+  uint64_t TSFlags = MI.getDesc().TSFlags;
+
+  unsigned FirstCommutableVecOp = 1;
+  unsigned LastCommutableVecOp = 3;
+  unsigned KMaskOp = 0;
+  if (X86II::isKMasked(TSFlags)) {
+    // The k-mask operand has index = 2 for masked and zero-masked operations.
+    KMaskOp = 2;
+
+    // The operand with index = 1 is used as a source for those elements for
+    // which the corresponding bit in the k-mask is set to 0.
+    if (X86II::isKMergeMasked(TSFlags))
+      FirstCommutableVecOp = 3;
+
+    LastCommutableVecOp++;
+  }
+
+  if (isMem(MI, LastCommutableVecOp))
+    LastCommutableVecOp--;
 
   // Only the first RegOpsNum operands are commutable.
   // Also, the value 'CommuteAnyOperandIndex' is valid here as it means
   // that the operand is not specified/fixed.
   if (SrcOpIdx1 != CommuteAnyOperandIndex &&
-      (SrcOpIdx1 < 1 || SrcOpIdx1 > RegOpsNum))
+      (SrcOpIdx1 < FirstCommutableVecOp || SrcOpIdx1 > LastCommutableVecOp ||
+       SrcOpIdx1 == KMaskOp))
     return false;
   if (SrcOpIdx2 != CommuteAnyOperandIndex &&
-      (SrcOpIdx2 < 1 || SrcOpIdx2 > RegOpsNum))
+      (SrcOpIdx2 < FirstCommutableVecOp || SrcOpIdx2 > LastCommutableVecOp ||
+       SrcOpIdx2 == KMaskOp))
     return false;
 
   // Look for two different register operands assumed to be commutable
@@ -3533,7 +4710,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
     if (SrcOpIdx1 == SrcOpIdx2)
       // Both of operands are not fixed. By default set one of commutable
       // operands to the last register operand of the instruction.
-      CommutableOpIdx2 = RegOpsNum;
+      CommutableOpIdx2 = LastCommutableVecOp;
     else if (SrcOpIdx2 == CommuteAnyOperandIndex)
       // Only one of operands is not fixed.
       CommutableOpIdx2 = SrcOpIdx1;
@@ -3541,7 +4718,12 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
     // CommutableOpIdx2 is well defined now. Let's choose another commutable
     // operand and assign its index to CommutableOpIdx1.
     unsigned Op2Reg = MI.getOperand(CommutableOpIdx2).getReg();
-    for (CommutableOpIdx1 = RegOpsNum; CommutableOpIdx1 > 0; CommutableOpIdx1--) {
+    for (CommutableOpIdx1 = LastCommutableVecOp;
+         CommutableOpIdx1 >= FirstCommutableVecOp; CommutableOpIdx1--) {
+      // Just ignore and skip the k-mask operand.
+      if (CommutableOpIdx1 == KMaskOp)
+        continue;
+
       // The commuted operands must have different registers.
       // Otherwise, the commute transformation does not change anything and
       // is useless then.
@@ -3550,7 +4732,7 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
     }
 
     // No appropriate commutable operands were found.
-    if (CommutableOpIdx1 == 0)
+    if (CommutableOpIdx1 < FirstCommutableVecOp)
       return false;
 
     // Assign the found pair of commutable indices to SrcOpIdx1 and SrcOpidx2
@@ -3560,208 +4742,34 @@ bool X86InstrInfo::findFMA3CommutedOpIndices(MachineInstr &MI,
       return false;
   }
 
-  // Check if we can adjust the opcode to preserve the semantics when
-  // commute the register operands.
-  return getFMA3OpcodeToCommuteOperands(MI, SrcOpIdx1, SrcOpIdx2) != 0;
-}
-
-unsigned X86InstrInfo::getFMA3OpcodeToCommuteOperands(
-    MachineInstr &MI, unsigned SrcOpIdx1, unsigned SrcOpIdx2) const {
-  unsigned Opc = MI.getOpcode();
-
-  // Define the array that holds FMA opcodes in groups
-  // of 3 opcodes(132, 213, 231) in each group.
-  static const uint16_t RegularOpcodeGroups[][3] = {
-    { X86::VFMADDSSr132r,   X86::VFMADDSSr213r,   X86::VFMADDSSr231r  },
-    { X86::VFMADDSDr132r,   X86::VFMADDSDr213r,   X86::VFMADDSDr231r  },
-    { X86::VFMADDPSr132r,   X86::VFMADDPSr213r,   X86::VFMADDPSr231r  },
-    { X86::VFMADDPDr132r,   X86::VFMADDPDr213r,   X86::VFMADDPDr231r  },
-    { X86::VFMADDPSr132rY,  X86::VFMADDPSr213rY,  X86::VFMADDPSr231rY },
-    { X86::VFMADDPDr132rY,  X86::VFMADDPDr213rY,  X86::VFMADDPDr231rY },
-    { X86::VFMADDSSr132m,   X86::VFMADDSSr213m,   X86::VFMADDSSr231m  },
-    { X86::VFMADDSDr132m,   X86::VFMADDSDr213m,   X86::VFMADDSDr231m  },
-    { X86::VFMADDPSr132m,   X86::VFMADDPSr213m,   X86::VFMADDPSr231m  },
-    { X86::VFMADDPDr132m,   X86::VFMADDPDr213m,   X86::VFMADDPDr231m  },
-    { X86::VFMADDPSr132mY,  X86::VFMADDPSr213mY,  X86::VFMADDPSr231mY },
-    { X86::VFMADDPDr132mY,  X86::VFMADDPDr213mY,  X86::VFMADDPDr231mY },
-
-    { X86::VFMSUBSSr132r,   X86::VFMSUBSSr213r,   X86::VFMSUBSSr231r  },
-    { X86::VFMSUBSDr132r,   X86::VFMSUBSDr213r,   X86::VFMSUBSDr231r  },
-    { X86::VFMSUBPSr132r,   X86::VFMSUBPSr213r,   X86::VFMSUBPSr231r  },
-    { X86::VFMSUBPDr132r,   X86::VFMSUBPDr213r,   X86::VFMSUBPDr231r  },
-    { X86::VFMSUBPSr132rY,  X86::VFMSUBPSr213rY,  X86::VFMSUBPSr231rY },
-    { X86::VFMSUBPDr132rY,  X86::VFMSUBPDr213rY,  X86::VFMSUBPDr231rY },
-    { X86::VFMSUBSSr132m,   X86::VFMSUBSSr213m,   X86::VFMSUBSSr231m  },
-    { X86::VFMSUBSDr132m,   X86::VFMSUBSDr213m,   X86::VFMSUBSDr231m  },
-    { X86::VFMSUBPSr132m,   X86::VFMSUBPSr213m,   X86::VFMSUBPSr231m  },
-    { X86::VFMSUBPDr132m,   X86::VFMSUBPDr213m,   X86::VFMSUBPDr231m  },
-    { X86::VFMSUBPSr132mY,  X86::VFMSUBPSr213mY,  X86::VFMSUBPSr231mY },
-    { X86::VFMSUBPDr132mY,  X86::VFMSUBPDr213mY,  X86::VFMSUBPDr231mY },
-
-    { X86::VFNMADDSSr132r,  X86::VFNMADDSSr213r,  X86::VFNMADDSSr231r  },
-    { X86::VFNMADDSDr132r,  X86::VFNMADDSDr213r,  X86::VFNMADDSDr231r  },
-    { X86::VFNMADDPSr132r,  X86::VFNMADDPSr213r,  X86::VFNMADDPSr231r  },
-    { X86::VFNMADDPDr132r,  X86::VFNMADDPDr213r,  X86::VFNMADDPDr231r  },
-    { X86::VFNMADDPSr132rY, X86::VFNMADDPSr213rY, X86::VFNMADDPSr231rY },
-    { X86::VFNMADDPDr132rY, X86::VFNMADDPDr213rY, X86::VFNMADDPDr231rY },
-    { X86::VFNMADDSSr132m,  X86::VFNMADDSSr213m,  X86::VFNMADDSSr231m  },
-    { X86::VFNMADDSDr132m,  X86::VFNMADDSDr213m,  X86::VFNMADDSDr231m  },
-    { X86::VFNMADDPSr132m,  X86::VFNMADDPSr213m,  X86::VFNMADDPSr231m  },
-    { X86::VFNMADDPDr132m,  X86::VFNMADDPDr213m,  X86::VFNMADDPDr231m  },
-    { X86::VFNMADDPSr132mY, X86::VFNMADDPSr213mY, X86::VFNMADDPSr231mY },
-    { X86::VFNMADDPDr132mY, X86::VFNMADDPDr213mY, X86::VFNMADDPDr231mY },
-
-    { X86::VFNMSUBSSr132r,  X86::VFNMSUBSSr213r,  X86::VFNMSUBSSr231r  },
-    { X86::VFNMSUBSDr132r,  X86::VFNMSUBSDr213r,  X86::VFNMSUBSDr231r  },
-    { X86::VFNMSUBPSr132r,  X86::VFNMSUBPSr213r,  X86::VFNMSUBPSr231r  },
-    { X86::VFNMSUBPDr132r,  X86::VFNMSUBPDr213r,  X86::VFNMSUBPDr231r  },
-    { X86::VFNMSUBPSr132rY, X86::VFNMSUBPSr213rY, X86::VFNMSUBPSr231rY },
-    { X86::VFNMSUBPDr132rY, X86::VFNMSUBPDr213rY, X86::VFNMSUBPDr231rY },
-    { X86::VFNMSUBSSr132m,  X86::VFNMSUBSSr213m,  X86::VFNMSUBSSr231m  },
-    { X86::VFNMSUBSDr132m,  X86::VFNMSUBSDr213m,  X86::VFNMSUBSDr231m  },
-    { X86::VFNMSUBPSr132m,  X86::VFNMSUBPSr213m,  X86::VFNMSUBPSr231m  },
-    { X86::VFNMSUBPDr132m,  X86::VFNMSUBPDr213m,  X86::VFNMSUBPDr231m  },
-    { X86::VFNMSUBPSr132mY, X86::VFNMSUBPSr213mY, X86::VFNMSUBPSr231mY },
-    { X86::VFNMSUBPDr132mY, X86::VFNMSUBPDr213mY, X86::VFNMSUBPDr231mY },
-
-    { X86::VFMADDSUBPSr132r,  X86::VFMADDSUBPSr213r,  X86::VFMADDSUBPSr231r  },
-    { X86::VFMADDSUBPDr132r,  X86::VFMADDSUBPDr213r,  X86::VFMADDSUBPDr231r  },
-    { X86::VFMADDSUBPSr132rY, X86::VFMADDSUBPSr213rY, X86::VFMADDSUBPSr231rY },
-    { X86::VFMADDSUBPDr132rY, X86::VFMADDSUBPDr213rY, X86::VFMADDSUBPDr231rY },
-    { X86::VFMADDSUBPSr132m,  X86::VFMADDSUBPSr213m,  X86::VFMADDSUBPSr231m  },
-    { X86::VFMADDSUBPDr132m,  X86::VFMADDSUBPDr213m,  X86::VFMADDSUBPDr231m  },
-    { X86::VFMADDSUBPSr132mY, X86::VFMADDSUBPSr213mY, X86::VFMADDSUBPSr231mY },
-    { X86::VFMADDSUBPDr132mY, X86::VFMADDSUBPDr213mY, X86::VFMADDSUBPDr231mY },
-
-    { X86::VFMSUBADDPSr132r,  X86::VFMSUBADDPSr213r,  X86::VFMSUBADDPSr231r  },
-    { X86::VFMSUBADDPDr132r,  X86::VFMSUBADDPDr213r,  X86::VFMSUBADDPDr231r  },
-    { X86::VFMSUBADDPSr132rY, X86::VFMSUBADDPSr213rY, X86::VFMSUBADDPSr231rY },
-    { X86::VFMSUBADDPDr132rY, X86::VFMSUBADDPDr213rY, X86::VFMSUBADDPDr231rY },
-    { X86::VFMSUBADDPSr132m,  X86::VFMSUBADDPSr213m,  X86::VFMSUBADDPSr231m  },
-    { X86::VFMSUBADDPDr132m,  X86::VFMSUBADDPDr213m,  X86::VFMSUBADDPDr231m  },
-    { X86::VFMSUBADDPSr132mY, X86::VFMSUBADDPSr213mY, X86::VFMSUBADDPSr231mY },
-    { X86::VFMSUBADDPDr132mY, X86::VFMSUBADDPDr213mY, X86::VFMSUBADDPDr231mY }
-  };
-
-  // Define the array that holds FMA*_Int opcodes in groups
-  // of 3 opcodes(132, 213, 231) in each group.
-  static const uint16_t IntrinOpcodeGroups[][3] = {
-    { X86::VFMADDSSr132r_Int,  X86::VFMADDSSr213r_Int,  X86::VFMADDSSr231r_Int },
-    { X86::VFMADDSDr132r_Int,  X86::VFMADDSDr213r_Int,  X86::VFMADDSDr231r_Int },
-    { X86::VFMADDSSr132m_Int,  X86::VFMADDSSr213m_Int,  X86::VFMADDSSr231m_Int },
-    { X86::VFMADDSDr132m_Int,  X86::VFMADDSDr213m_Int,  X86::VFMADDSDr231m_Int },
-
-    { X86::VFMSUBSSr132r_Int,  X86::VFMSUBSSr213r_Int,  X86::VFMSUBSSr231r_Int },
-    { X86::VFMSUBSDr132r_Int,  X86::VFMSUBSDr213r_Int,  X86::VFMSUBSDr231r_Int },
-    { X86::VFMSUBSSr132m_Int,  X86::VFMSUBSSr213m_Int,  X86::VFMSUBSSr231m_Int },
-    { X86::VFMSUBSDr132m_Int,  X86::VFMSUBSDr213m_Int,  X86::VFMSUBSDr231m_Int },
-
-    { X86::VFNMADDSSr132r_Int, X86::VFNMADDSSr213r_Int, X86::VFNMADDSSr231r_Int },
-    { X86::VFNMADDSDr132r_Int, X86::VFNMADDSDr213r_Int, X86::VFNMADDSDr231r_Int },
-    { X86::VFNMADDSSr132m_Int, X86::VFNMADDSSr213m_Int, X86::VFNMADDSSr231m_Int },
-    { X86::VFNMADDSDr132m_Int, X86::VFNMADDSDr213m_Int, X86::VFNMADDSDr231m_Int },
-
-    { X86::VFNMSUBSSr132r_Int, X86::VFNMSUBSSr213r_Int, X86::VFNMSUBSSr231r_Int },
-    { X86::VFNMSUBSDr132r_Int, X86::VFNMSUBSDr213r_Int, X86::VFNMSUBSDr231r_Int },
-    { X86::VFNMSUBSSr132m_Int, X86::VFNMSUBSSr213m_Int, X86::VFNMSUBSSr231m_Int },
-    { X86::VFNMSUBSDr132m_Int, X86::VFNMSUBSDr213m_Int, X86::VFNMSUBSDr231m_Int },
-  };
-
-  const unsigned Form132Index = 0;
-  const unsigned Form213Index = 1;
-  const unsigned Form231Index = 2;
-  const unsigned FormsNum = 3;
-
-  bool IsIntrinOpcode;
-  isFMA3(Opc, &IsIntrinOpcode);
-
-  size_t GroupsNum;
-  const uint16_t (*OpcodeGroups)[3];
-  if (IsIntrinOpcode) {
-    GroupsNum = array_lengthof(IntrinOpcodeGroups);
-    OpcodeGroups = IntrinOpcodeGroups;
-  } else {
-    GroupsNum = array_lengthof(RegularOpcodeGroups);
-    OpcodeGroups = RegularOpcodeGroups;
-  }
-
-  const uint16_t *FoundOpcodesGroup = nullptr;
-  size_t FormIndex;
-
-  // Look for the input opcode in the corresponding opcodes table.
-  for (size_t GroupIndex = 0; GroupIndex < GroupsNum && !FoundOpcodesGroup;
-         ++GroupIndex) {
-    for (FormIndex = 0; FormIndex < FormsNum; ++FormIndex) {
-      if (OpcodeGroups[GroupIndex][FormIndex] == Opc) {
-        FoundOpcodesGroup = OpcodeGroups[GroupIndex];
-        break;
-      }
-    }
-  }
-
-  // The input opcode does not match with any of the opcodes from the tables.
-  // The unsupported FMA opcode must be added to one of the two opcode groups
-  // defined above.
-  assert(FoundOpcodesGroup != nullptr && "Unexpected FMA3 opcode");
-
-  // Put the lowest index to SrcOpIdx1 to simplify the checks below.
-  if (SrcOpIdx1 > SrcOpIdx2)
-    std::swap(SrcOpIdx1, SrcOpIdx2);
-
-  // TODO: Commuting the 1st operand of FMA*_Int requires some additional
-  // analysis. The commute optimization is legal only if all users of FMA*_Int
-  // use only the lowest element of the FMA*_Int instruction. Such analysis are
-  // not implemented yet. So, just return 0 in that case.
-  // When such analysis are available this place will be the right place for
-  // calling it.
-  if (IsIntrinOpcode && SrcOpIdx1 == 1)
-    return 0;
-
-  unsigned Case;
-  if (SrcOpIdx1 == 1 && SrcOpIdx2 == 2)
-    Case = 0;
-  else if (SrcOpIdx1 == 1 && SrcOpIdx2 == 3)
-    Case = 1;
-  else if (SrcOpIdx1 == 2 && SrcOpIdx2 == 3)
-    Case = 2;
-  else
-    return 0;
-
-  // Define the FMA forms mapping array that helps to map input FMA form
-  // to output FMA form to preserve the operation semantics after
-  // commuting the operands.
-  static const unsigned FormMapping[][3] = {
-    // 0: SrcOpIdx1 == 1 && SrcOpIdx2 == 2;
-    // FMA132 A, C, b; ==> FMA231 C, A, b;
-    // FMA213 B, A, c; ==> FMA213 A, B, c;
-    // FMA231 C, A, b; ==> FMA132 A, C, b;
-    { Form231Index, Form213Index, Form132Index },
-    // 1: SrcOpIdx1 == 1 && SrcOpIdx2 == 3;
-    // FMA132 A, c, B; ==> FMA132 B, c, A;
-    // FMA213 B, a, C; ==> FMA231 C, a, B;
-    // FMA231 C, a, B; ==> FMA213 B, a, C;
-    { Form132Index, Form231Index, Form213Index },
-    // 2: SrcOpIdx1 == 2 && SrcOpIdx2 == 3;
-    // FMA132 a, C, B; ==> FMA213 a, B, C;
-    // FMA213 b, A, C; ==> FMA132 b, C, A;
-    // FMA231 c, A, B; ==> FMA231 c, B, A;
-    { Form213Index, Form132Index, Form231Index }
-  };
-
-  // Everything is ready, just adjust the FMA opcode and return it.
-  FormIndex = FormMapping[Case][FormIndex];
-  return FoundOpcodesGroup[FormIndex];
+  return true;
 }
 
 bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
                                          unsigned &SrcOpIdx2) const {
+  const MCInstrDesc &Desc = MI.getDesc();
+  if (!Desc.isCommutable())
+    return false;
+
   switch (MI.getOpcode()) {
+  case X86::CMPSDrr:
+  case X86::CMPSSrr:
   case X86::CMPPDrri:
   case X86::CMPPSrri:
+  case X86::VCMPSDrr:
+  case X86::VCMPSSrr:
   case X86::VCMPPDrri:
   case X86::VCMPPSrri:
   case X86::VCMPPDYrri:
-  case X86::VCMPPSYrri: {
+  case X86::VCMPPSYrri:
+  case X86::VCMPSDZrr:
+  case X86::VCMPSSZrr:
+  case X86::VCMPPDZrri:
+  case X86::VCMPPSZrri:
+  case X86::VCMPPDZ128rri:
+  case X86::VCMPPSZ128rri:
+  case X86::VCMPPDZ256rri:
+  case X86::VCMPPSZ256rri: {
     // Float comparison can be safely commuted for
     // Ordered/Unordered/Equal/NotEqual tests
     unsigned Imm = MI.getOperand(3).getImm() & 0x7;
@@ -3776,9 +4784,73 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
     }
     return false;
   }
+  case X86::MOVSDrr:
+  case X86::MOVSSrr:
+  case X86::VMOVSDrr:
+  case X86::VMOVSSrr: {
+    if (Subtarget.hasSSE41())
+      return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+    return false;
+  }
+  case X86::VPTERNLOGDZrri:      case X86::VPTERNLOGDZrmi:
+  case X86::VPTERNLOGDZ128rri:   case X86::VPTERNLOGDZ128rmi:
+  case X86::VPTERNLOGDZ256rri:   case X86::VPTERNLOGDZ256rmi:
+  case X86::VPTERNLOGQZrri:      case X86::VPTERNLOGQZrmi:
+  case X86::VPTERNLOGQZ128rri:   case X86::VPTERNLOGQZ128rmi:
+  case X86::VPTERNLOGQZ256rri:   case X86::VPTERNLOGQZ256rmi:
+  case X86::VPTERNLOGDZrrik:     case X86::VPTERNLOGDZrmik:
+  case X86::VPTERNLOGDZ128rrik:  case X86::VPTERNLOGDZ128rmik:
+  case X86::VPTERNLOGDZ256rrik:  case X86::VPTERNLOGDZ256rmik:
+  case X86::VPTERNLOGQZrrik:     case X86::VPTERNLOGQZrmik:
+  case X86::VPTERNLOGQZ128rrik:  case X86::VPTERNLOGQZ128rmik:
+  case X86::VPTERNLOGQZ256rrik:  case X86::VPTERNLOGQZ256rmik:
+  case X86::VPTERNLOGDZrrikz:    case X86::VPTERNLOGDZrmikz:
+  case X86::VPTERNLOGDZ128rrikz: case X86::VPTERNLOGDZ128rmikz:
+  case X86::VPTERNLOGDZ256rrikz: case X86::VPTERNLOGDZ256rmikz:
+  case X86::VPTERNLOGQZrrikz:    case X86::VPTERNLOGQZrmikz:
+  case X86::VPTERNLOGQZ128rrikz: case X86::VPTERNLOGQZ128rmikz:
+  case X86::VPTERNLOGQZ256rrikz: case X86::VPTERNLOGQZ256rmikz:
+    return findThreeSrcCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
   default:
-    if (isFMA3(MI.getOpcode()))
-      return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+    const X86InstrFMA3Group *FMA3Group =
+        X86InstrFMA3Info::getFMA3Group(MI.getOpcode());
+    if (FMA3Group)
+      return findFMA3CommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2, *FMA3Group);
+
+    // Handled masked instructions since we need to skip over the mask input
+    // and the preserved input.
+    if (Desc.TSFlags & X86II::EVEX_K) {
+      // First assume that the first input is the mask operand and skip past it.
+      unsigned CommutableOpIdx1 = Desc.getNumDefs() + 1;
+      unsigned CommutableOpIdx2 = Desc.getNumDefs() + 2;
+      // Check if the first input is tied. If there isn't one then we only
+      // need to skip the mask operand which we did above.
+      if ((MI.getDesc().getOperandConstraint(Desc.getNumDefs(),
+                                             MCOI::TIED_TO) != -1)) {
+        // If this is zero masking instruction with a tied operand, we need to
+        // move the first index back to the first input since this must
+        // be a 3 input instruction and we want the first two non-mask inputs.
+        // Otherwise this is a 2 input instruction with a preserved input and
+        // mask, so we need to move the indices to skip one more input.
+        if (Desc.TSFlags & X86II::EVEX_Z)
+          --CommutableOpIdx1;
+        else {
+          ++CommutableOpIdx1;
+          ++CommutableOpIdx2;
+        }
+      }
+
+      if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
+                                CommutableOpIdx1, CommutableOpIdx2))
+        return false;
+
+      if (!MI.getOperand(SrcOpIdx1).isReg() ||
+          !MI.getOperand(SrcOpIdx2).isReg())
+        // No idea.
+        return false;
+      return true;
+    }
+
     return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
   }
   return false;
@@ -4036,6 +5108,85 @@ bool X86InstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
   return !isPredicated(MI);
 }
 
+bool X86InstrInfo::isUnconditionalTailCall(const MachineInstr &MI) const {
+  switch (MI.getOpcode()) {
+  case X86::TCRETURNdi:
+  case X86::TCRETURNri:
+  case X86::TCRETURNmi:
+  case X86::TCRETURNdi64:
+  case X86::TCRETURNri64:
+  case X86::TCRETURNmi64:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool X86InstrInfo::canMakeTailCallConditional(
+    SmallVectorImpl<MachineOperand> &BranchCond,
+    const MachineInstr &TailCall) const {
+  if (TailCall.getOpcode() != X86::TCRETURNdi &&
+      TailCall.getOpcode() != X86::TCRETURNdi64) {
+    // Only direct calls can be done with a conditional branch.
+    return false;
+  }
+
+  if (Subtarget.isTargetWin64()) {
+    // Conditional tail calls confuse the Win64 unwinder.
+    // TODO: Allow them for "leaf" functions; PR30337.
+    return false;
+  }
+
+  assert(BranchCond.size() == 1);
+  if (BranchCond[0].getImm() > X86::LAST_VALID_COND) {
+    // Can't make a conditional tail call with this condition.
+    return false;
+  }
+
+  const X86MachineFunctionInfo *X86FI =
+      TailCall.getParent()->getParent()->getInfo<X86MachineFunctionInfo>();
+  if (X86FI->getTCReturnAddrDelta() != 0 ||
+      TailCall.getOperand(1).getImm() != 0) {
+    // A conditional tail call cannot do any stack adjustment.
+    return false;
+  }
+
+  return true;
+}
+
+void X86InstrInfo::replaceBranchWithTailCall(
+    MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &BranchCond,
+    const MachineInstr &TailCall) const {
+  assert(canMakeTailCallConditional(BranchCond, TailCall));
+
+  MachineBasicBlock::iterator I = MBB.end();
+  while (I != MBB.begin()) {
+    --I;
+    if (I->isDebugValue())
+      continue;
+    if (!I->isBranch())
+      assert(0 && "Can't find the branch to replace!");
+
+    X86::CondCode CC = getCondFromBranchOpc(I->getOpcode());
+    assert(BranchCond.size() == 1);
+    if (CC != BranchCond[0].getImm())
+      continue;
+
+    break;
+  }
+
+  unsigned Opc = TailCall.getOpcode() == X86::TCRETURNdi ? X86::TCRETURNdicc
+                                                         : X86::TCRETURNdi64cc;
+
+  auto MIB = BuildMI(MBB, I, MBB.findDebugLoc(I), get(Opc));
+  MIB->addOperand(TailCall.getOperand(0)); // Destination.
+  MIB.addImm(0); // Stack offset (not used).
+  MIB->addOperand(BranchCond[0]); // Condition.
+  MIB.copyImplicitOps(TailCall); // Regmask and (imp-used) parameters.
+
+  I->eraseFromParent();
+}
+
 // Given a MBB and its TBB, find the FBB which was a fallthrough MBB (it may
 // not be a fallthrough MBB now due to layout changes). Return nullptr if the
 // fallthrough MBB cannot be identified.
@@ -4296,7 +5447,10 @@ bool X86InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+unsigned X86InstrInfo::removeBranch(MachineBasicBlock &MBB,
+                                    int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.end();
   unsigned Count = 0;
 
@@ -4316,15 +5470,17 @@ unsigned X86InstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
   return Count;
 }
 
-unsigned X86InstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned X86InstrInfo::insertBranch(MachineBasicBlock &MBB,
                                     MachineBasicBlock *TBB,
                                     MachineBasicBlock *FBB,
                                     ArrayRef<MachineOperand> Cond,
-                                    const DebugLoc &DL) const {
+                                    const DebugLoc &DL,
+                                    int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
          "X86 branch conditions have one component!");
+  assert(!BytesAdded && "code size not handled");
 
   if (Cond.empty()) {
     // Unconditional branch?
@@ -4430,16 +5586,63 @@ static bool isHReg(unsigned Reg) {
 }
 
 // Try and copy between VR128/VR64 and GR64 registers.
-static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
+static unsigned CopyToFromAsymmetricReg(unsigned &DestReg, unsigned &SrcReg,
                                         const X86Subtarget &Subtarget) {
+  bool HasAVX = Subtarget.hasAVX();
+  bool HasAVX512 = Subtarget.hasAVX512();
+
+  // SrcReg(MaskReg) -> DestReg(GR64)
+  // SrcReg(MaskReg) -> DestReg(GR32)
+  // SrcReg(MaskReg) -> DestReg(GR16)
+  // SrcReg(MaskReg) -> DestReg(GR8)
+
+  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
+  if (X86::VK16RegClass.contains(SrcReg)) {
+    if (X86::GR64RegClass.contains(DestReg)) {
+      assert(Subtarget.hasBWI());
+      return X86::KMOVQrk;
+    }
+    if (X86::GR32RegClass.contains(DestReg))
+      return Subtarget.hasBWI() ? X86::KMOVDrk : X86::KMOVWrk;
+    if (X86::GR16RegClass.contains(DestReg)) {
+      DestReg = getX86SubSuperRegister(DestReg, 32);
+      return X86::KMOVWrk;
+    }
+    if (X86::GR8RegClass.contains(DestReg)) {
+      DestReg = getX86SubSuperRegister(DestReg, 32);
+      return Subtarget.hasDQI() ? X86::KMOVBrk : X86::KMOVWrk;
+    }
+  }
+
+  // SrcReg(GR64) -> DestReg(MaskReg)
+  // SrcReg(GR32) -> DestReg(MaskReg)
+  // SrcReg(GR16) -> DestReg(MaskReg)
+  // SrcReg(GR8)  -> DestReg(MaskReg)
+
+  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
+  if (X86::VK16RegClass.contains(DestReg)) {
+    if (X86::GR64RegClass.contains(SrcReg)) {
+      assert(Subtarget.hasBWI());
+      return X86::KMOVQkr;
+    }
+    if (X86::GR32RegClass.contains(SrcReg))
+      return Subtarget.hasBWI() ? X86::KMOVDkr : X86::KMOVWkr;
+    if (X86::GR16RegClass.contains(SrcReg)) {
+      SrcReg = getX86SubSuperRegister(SrcReg, 32);
+      return X86::KMOVWkr;
+    }
+    if (X86::GR8RegClass.contains(SrcReg)) {
+      SrcReg = getX86SubSuperRegister(SrcReg, 32);
+      return Subtarget.hasDQI() ? X86::KMOVBkr : X86::KMOVWkr;
+    }
+  }
+
 
   // SrcReg(VR128) -> DestReg(GR64)
   // SrcReg(VR64)  -> DestReg(GR64)
   // SrcReg(GR64)  -> DestReg(VR128)
   // SrcReg(GR64)  -> DestReg(VR64)
 
-  bool HasAVX = Subtarget.hasAVX();
-  bool HasAVX512 = Subtarget.hasAVX512();
   if (X86::GR64RegClass.contains(DestReg)) {
     if (X86::VR128XRegClass.contains(SrcReg))
       // Copy from a VR128 register to a GR64 register.
@@ -4479,96 +5682,13 @@ static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg,
   return 0;
 }
 
-static bool isMaskRegClass(const TargetRegisterClass *RC) {
-  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
-  return X86::VK16RegClass.hasSubClassEq(RC);
-}
-
-static bool MaskRegClassContains(unsigned Reg) {
-  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
-  return X86::VK16RegClass.contains(Reg);
-}
-
-static bool GRRegClassContains(unsigned Reg) {
-  return X86::GR64RegClass.contains(Reg) ||
-         X86::GR32RegClass.contains(Reg) ||
-         X86::GR16RegClass.contains(Reg) ||
-         X86::GR8RegClass.contains(Reg);
-}
-static
-unsigned copyPhysRegOpcode_AVX512_DQ(unsigned& DestReg, unsigned& SrcReg) {
-  if (MaskRegClassContains(SrcReg) && X86::GR8RegClass.contains(DestReg)) {
-    DestReg = getX86SubSuperRegister(DestReg, 32);
-    return X86::KMOVBrk;
-  }
-  if (MaskRegClassContains(DestReg) && X86::GR8RegClass.contains(SrcReg)) {
-    SrcReg = getX86SubSuperRegister(SrcReg, 32);
-    return X86::KMOVBkr;
-  }
-  return 0;
-}
-
-static
-unsigned copyPhysRegOpcode_AVX512_BW(unsigned& DestReg, unsigned& SrcReg) {
-  if (MaskRegClassContains(SrcReg) && MaskRegClassContains(DestReg))
-    return X86::KMOVQkk;
-  if (MaskRegClassContains(SrcReg) && X86::GR32RegClass.contains(DestReg))
-    return X86::KMOVDrk;
-  if (MaskRegClassContains(SrcReg) && X86::GR64RegClass.contains(DestReg))
-    return X86::KMOVQrk;
-  if (MaskRegClassContains(DestReg) && X86::GR32RegClass.contains(SrcReg))
-    return X86::KMOVDkr;
-  if (MaskRegClassContains(DestReg) && X86::GR64RegClass.contains(SrcReg))
-    return X86::KMOVQkr;
-  return 0;
-}
-
-static
-unsigned copyPhysRegOpcode_AVX512(unsigned& DestReg, unsigned& SrcReg,
-                                  const X86Subtarget &Subtarget)
-{
-  if (Subtarget.hasDQI())
-    if (auto Opc = copyPhysRegOpcode_AVX512_DQ(DestReg, SrcReg))
-      return Opc;
-  if (Subtarget.hasBWI())
-    if (auto Opc = copyPhysRegOpcode_AVX512_BW(DestReg, SrcReg))
-      return Opc;
-  if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
-    if (Subtarget.hasVLX())
-      return X86::VMOVAPSZ128rr;
-   DestReg = get512BitSuperRegister(DestReg);
-   SrcReg = get512BitSuperRegister(SrcReg);
-   return X86::VMOVAPSZrr;
-  }
-  if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
-    if (Subtarget.hasVLX())
-      return X86::VMOVAPSZ256rr;
-   DestReg = get512BitSuperRegister(DestReg);
-   SrcReg = get512BitSuperRegister(SrcReg);
-   return X86::VMOVAPSZrr;
-  }
-  if (X86::VR512RegClass.contains(DestReg, SrcReg))
-     return X86::VMOVAPSZrr;
-  if (MaskRegClassContains(DestReg) && MaskRegClassContains(SrcReg))
-    return X86::KMOVWkk;
-  if (MaskRegClassContains(DestReg) && GRRegClassContains(SrcReg)) {
-    SrcReg = getX86SubSuperRegister(SrcReg, 32);
-    return X86::KMOVWkr;
-  }
-  if (GRRegClassContains(DestReg) && MaskRegClassContains(SrcReg)) {
-    DestReg = getX86SubSuperRegister(DestReg, 32);
-    return X86::KMOVWrk;
-  }
-  return 0;
-}
-
 void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MI,
                                const DebugLoc &DL, unsigned DestReg,
                                unsigned SrcReg, bool KillSrc) const {
   // First deal with the normal symmetric copies.
   bool HasAVX = Subtarget.hasAVX();
-  bool HasAVX512 = Subtarget.hasAVX512();
+  bool HasVLX = Subtarget.hasVLX();
   unsigned Opc = 0;
   if (X86::GR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MOV64rr;
@@ -4590,12 +5710,41 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   }
   else if (X86::VR64RegClass.contains(DestReg, SrcReg))
     Opc = X86::MMX_MOVQ64rr;
-  else if (HasAVX512)
-    Opc = copyPhysRegOpcode_AVX512(DestReg, SrcReg, Subtarget);
-  else if (X86::VR128RegClass.contains(DestReg, SrcReg))
-    Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
-  else if (X86::VR256RegClass.contains(DestReg, SrcReg))
-    Opc = X86::VMOVAPSYrr;
+  else if (X86::VR128XRegClass.contains(DestReg, SrcReg)) {
+    if (HasVLX)
+      Opc = X86::VMOVAPSZ128rr;
+    else if (X86::VR128RegClass.contains(DestReg, SrcReg))
+      Opc = HasAVX ? X86::VMOVAPSrr : X86::MOVAPSrr;
+    else {
+      // If this an extended register and we don't have VLX we need to use a
+      // 512-bit move.
+      Opc = X86::VMOVAPSZrr;
+      const TargetRegisterInfo *TRI = &getRegisterInfo();
+      DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_xmm,
+                                         &X86::VR512RegClass);
+      SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_xmm,
+                                        &X86::VR512RegClass);
+    }
+  } else if (X86::VR256XRegClass.contains(DestReg, SrcReg)) {
+    if (HasVLX)
+      Opc = X86::VMOVAPSZ256rr;
+    else if (X86::VR256RegClass.contains(DestReg, SrcReg))
+      Opc = X86::VMOVAPSYrr;
+    else {
+      // If this an extended register and we don't have VLX we need to use a
+      // 512-bit move.
+      Opc = X86::VMOVAPSZrr;
+      const TargetRegisterInfo *TRI = &getRegisterInfo();
+      DestReg = TRI->getMatchingSuperReg(DestReg, X86::sub_ymm,
+                                         &X86::VR512RegClass);
+      SrcReg = TRI->getMatchingSuperReg(SrcReg, X86::sub_ymm,
+                                        &X86::VR512RegClass);
+    }
+  } else if (X86::VR512RegClass.contains(DestReg, SrcReg))
+    Opc = X86::VMOVAPSZrr;
+  // All KMASK RegClasses hold the same k registers, can be tested against anyone.
+  else if (X86::VK16RegClass.contains(DestReg, SrcReg))
+    Opc = Subtarget.hasBWI() ? X86::KMOVQkk : X86::KMOVWkk;
   if (!Opc)
     Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget);
 
@@ -4708,37 +5857,15 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   llvm_unreachable("Cannot emit physreg copy instruction");
 }
 
-static unsigned getLoadStoreMaskRegOpcode(const TargetRegisterClass *RC,
-                                          bool load) {
-  switch (RC->getSize()) {
-  default:
-    llvm_unreachable("Unknown spill size");
-  case 2:
-    return load ? X86::KMOVWkm : X86::KMOVWmk;
-  case 4:
-    return load ? X86::KMOVDkm : X86::KMOVDmk;
-  case 8:
-    return load ? X86::KMOVQkm : X86::KMOVQmk;
-  }
-}
-
 static unsigned getLoadStoreRegOpcode(unsigned Reg,
                                       const TargetRegisterClass *RC,
                                       bool isStackAligned,
                                       const X86Subtarget &STI,
                                       bool load) {
-  if (STI.hasAVX512()) {
-    if (isMaskRegClass(RC))
-      return getLoadStoreMaskRegOpcode(RC, load);
-    if (RC->getSize() == 4 && X86::FR32XRegClass.hasSubClassEq(RC))
-      return load ? X86::VMOVSSZrm : X86::VMOVSSZmr;
-    if (RC->getSize() == 8 && X86::FR64XRegClass.hasSubClassEq(RC))
-      return load ? X86::VMOVSDZrm : X86::VMOVSDZmr;
-    if (X86::VR512RegClass.hasSubClassEq(RC))
-      return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
-  }
-
   bool HasAVX = STI.hasAVX();
+  bool HasAVX512 = STI.hasAVX512();
+  bool HasVLX = STI.hasVLX();
+
   switch (RC->getSize()) {
   default:
     llvm_unreachable("Unknown spill size");
@@ -4751,69 +5878,85 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
         return load ? X86::MOV8rm_NOREX : X86::MOV8mr_NOREX;
     return load ? X86::MOV8rm : X86::MOV8mr;
   case 2:
+    if (X86::VK16RegClass.hasSubClassEq(RC))
+      return load ? X86::KMOVWkm : X86::KMOVWmk;
     assert(X86::GR16RegClass.hasSubClassEq(RC) && "Unknown 2-byte regclass");
     return load ? X86::MOV16rm : X86::MOV16mr;
   case 4:
     if (X86::GR32RegClass.hasSubClassEq(RC))
       return load ? X86::MOV32rm : X86::MOV32mr;
-    if (X86::FR32RegClass.hasSubClassEq(RC))
+    if (X86::FR32XRegClass.hasSubClassEq(RC))
       return load ?
-        (HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
-        (HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
+        (HasAVX512 ? X86::VMOVSSZrm : HasAVX ? X86::VMOVSSrm : X86::MOVSSrm) :
+        (HasAVX512 ? X86::VMOVSSZmr : HasAVX ? X86::VMOVSSmr : X86::MOVSSmr);
     if (X86::RFP32RegClass.hasSubClassEq(RC))
       return load ? X86::LD_Fp32m : X86::ST_Fp32m;
+    if (X86::VK32RegClass.hasSubClassEq(RC))
+      return load ? X86::KMOVDkm : X86::KMOVDmk;
     llvm_unreachable("Unknown 4-byte regclass");
   case 8:
     if (X86::GR64RegClass.hasSubClassEq(RC))
       return load ? X86::MOV64rm : X86::MOV64mr;
-    if (X86::FR64RegClass.hasSubClassEq(RC))
+    if (X86::FR64XRegClass.hasSubClassEq(RC))
       return load ?
-        (HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
-        (HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
+        (HasAVX512 ? X86::VMOVSDZrm : HasAVX ? X86::VMOVSDrm : X86::MOVSDrm) :
+        (HasAVX512 ? X86::VMOVSDZmr : HasAVX ? X86::VMOVSDmr : X86::MOVSDmr);
     if (X86::VR64RegClass.hasSubClassEq(RC))
       return load ? X86::MMX_MOVQ64rm : X86::MMX_MOVQ64mr;
     if (X86::RFP64RegClass.hasSubClassEq(RC))
       return load ? X86::LD_Fp64m : X86::ST_Fp64m;
+    if (X86::VK64RegClass.hasSubClassEq(RC))
+      return load ? X86::KMOVQkm : X86::KMOVQmk;
     llvm_unreachable("Unknown 8-byte regclass");
   case 10:
     assert(X86::RFP80RegClass.hasSubClassEq(RC) && "Unknown 10-byte regclass");
     return load ? X86::LD_Fp80m : X86::ST_FpP80m;
   case 16: {
-    assert((X86::VR128RegClass.hasSubClassEq(RC) ||
-            X86::VR128XRegClass.hasSubClassEq(RC))&& "Unknown 16-byte regclass");
+    assert(X86::VR128XRegClass.hasSubClassEq(RC) && "Unknown 16-byte regclass");
     // If stack is realigned we can use aligned stores.
-    if (X86::VR128RegClass.hasSubClassEq(RC)) {
-      if (isStackAligned)
-        return load ? (HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
-                    : (HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
-      else
-        return load ? (HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
-                    : (HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
-    }
-    assert(STI.hasVLX() && "Using extended register requires VLX");
     if (isStackAligned)
-      return load ? X86::VMOVAPSZ128rm : X86::VMOVAPSZ128mr;
+      return load ?
+        (HasVLX    ? X86::VMOVAPSZ128rm :
+         HasAVX512 ? X86::VMOVAPSZ128rm_NOVLX :
+         HasAVX    ? X86::VMOVAPSrm :
+                     X86::MOVAPSrm):
+        (HasVLX    ? X86::VMOVAPSZ128mr :
+         HasAVX512 ? X86::VMOVAPSZ128mr_NOVLX :
+         HasAVX    ? X86::VMOVAPSmr :
+                     X86::MOVAPSmr);
     else
-      return load ? X86::VMOVUPSZ128rm : X86::VMOVUPSZ128mr;
+      return load ?
+        (HasVLX    ? X86::VMOVUPSZ128rm :
+         HasAVX512 ? X86::VMOVUPSZ128rm_NOVLX :
+         HasAVX    ? X86::VMOVUPSrm :
+                     X86::MOVUPSrm):
+        (HasVLX    ? X86::VMOVUPSZ128mr :
+         HasAVX512 ? X86::VMOVUPSZ128mr_NOVLX :
+         HasAVX    ? X86::VMOVUPSmr :
+                     X86::MOVUPSmr);
   }
   case 32:
-    assert((X86::VR256RegClass.hasSubClassEq(RC) ||
-            X86::VR256XRegClass.hasSubClassEq(RC)) && "Unknown 32-byte regclass");
+    assert(X86::VR256XRegClass.hasSubClassEq(RC) && "Unknown 32-byte regclass");
     // If stack is realigned we can use aligned stores.
-    if (X86::VR256RegClass.hasSubClassEq(RC)) {
-      if (isStackAligned)
-        return load ? X86::VMOVAPSYrm : X86::VMOVAPSYmr;
-      else
-        return load ? X86::VMOVUPSYrm : X86::VMOVUPSYmr;
-    }
-    assert(STI.hasVLX() && "Using extended register requires VLX");
     if (isStackAligned)
-      return load ? X86::VMOVAPSZ256rm : X86::VMOVAPSZ256mr;
+      return load ?
+        (HasVLX    ? X86::VMOVAPSZ256rm :
+         HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX :
+                     X86::VMOVAPSYrm) :
+        (HasVLX    ? X86::VMOVAPSZ256mr :
+         HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX :
+                     X86::VMOVAPSYmr);
     else
-      return load ? X86::VMOVUPSZ256rm : X86::VMOVUPSZ256mr;
+      return load ?
+        (HasVLX    ? X86::VMOVUPSZ256rm :
+         HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX :
+                     X86::VMOVUPSYrm) :
+        (HasVLX    ? X86::VMOVUPSZ256mr :
+         HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX :
+                     X86::VMOVUPSYmr);
   case 64:
     assert(X86::VR512RegClass.hasSubClassEq(RC) && "Unknown 64-byte regclass");
-    assert(STI.hasVLX() && "Using 512-bit register requires AVX512");
+    assert(STI.hasAVX512() && "Using 512-bit register requires AVX512");
     if (isStackAligned)
       return load ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
     else
@@ -4851,8 +5994,7 @@ bool X86InstrInfo::getMemOpBaseRegImmOfs(MachineInstr &MemOp, unsigned &BaseReg,
 
   Offset = DispMO.getImm();
 
-  return MemOp.getOperand(MemRefBegin + X86::AddrIndexReg).getReg() ==
-         X86::NoRegister;
+  return true;
 }
 
 static unsigned getStoreRegOpcode(unsigned SrcReg,
@@ -4876,7 +6018,7 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
                                        const TargetRegisterClass *RC,
                                        const TargetRegisterInfo *TRI) const {
   const MachineFunction &MF = *MBB.getParent();
-  assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() &&
+  assert(MF.getFrameInfo().getObjectSize(FrameIdx) >= RC->getSize() &&
          "Stack slot too small for store");
   unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
   bool isAligned =
@@ -4954,6 +6096,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
   case X86::CMP16ri:
   case X86::CMP16ri8:
   case X86::CMP8ri:
+    if (!MI.getOperand(1).isImm())
+      return false;
     SrcReg = MI.getOperand(0).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
@@ -4985,6 +6129,8 @@ bool X86InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
   case X86::SUB16ri:
   case X86::SUB16ri8:
   case X86::SUB8ri:
+    if (!MI.getOperand(2).isImm())
+      return false;
     SrcReg = MI.getOperand(1).getReg();
     SrcReg2 = 0;
     CmpMask = ~0;
@@ -5263,9 +6409,9 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   // If the definition is in this basic block, RE points to the definition;
   // otherwise, RE is the rend of the basic block.
   MachineBasicBlock::reverse_iterator
-      RI = MachineBasicBlock::reverse_iterator(I),
+      RI = ++I.getReverse(),
       RE = CmpInstr.getParent() == MI->getParent()
-               ? MachineBasicBlock::reverse_iterator(++Def) /* points to MI */
+               ? Def.getReverse() /* points to MI */
                : CmpInstr.getParent()->rend();
   MachineInstr *Movr0Inst = nullptr;
   for (; RI != RE; ++RI) {
@@ -5411,9 +6557,8 @@ bool X86InstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, unsigned SrcReg,
   if (Movr0Inst) {
     // Look backwards until we find a def that doesn't use the current EFLAGS.
     Def = Sub;
-    MachineBasicBlock::reverse_iterator
-      InsertI = MachineBasicBlock::reverse_iterator(++Def),
-                InsertE = Sub->getParent()->rend();
+    MachineBasicBlock::reverse_iterator InsertI = Def.getReverse(),
+                                        InsertE = Sub->getParent()->rend();
     for (; InsertI != InsertE; ++InsertI) {
       MachineInstr *Instr = &*InsertI;
       if (!Instr->readsRegister(X86::EFLAGS, TRI) &&
@@ -5455,14 +6600,6 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
                                               const MachineRegisterInfo *MRI,
                                               unsigned &FoldAsLoadDefReg,
                                               MachineInstr *&DefMI) const {
-  if (FoldAsLoadDefReg == 0)
-    return nullptr;
-  // To be conservative, if there exists another load, clear the load candidate.
-  if (MI.mayLoad()) {
-    FoldAsLoadDefReg = 0;
-    return nullptr;
-  }
-
   // Check whether we can move DefMI here.
   DefMI = MRI->getVRegDef(FoldAsLoadDefReg);
   assert(DefMI);
@@ -5471,27 +6608,24 @@ MachineInstr *X86InstrInfo::optimizeLoadInstr(MachineInstr &MI,
     return nullptr;
 
   // Collect information about virtual register operands of MI.
-  unsigned SrcOperandId = 0;
-  bool FoundSrcOperand = false;
-  for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+  SmallVector<unsigned, 1> SrcOperandIds;
+  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
     MachineOperand &MO = MI.getOperand(i);
     if (!MO.isReg())
       continue;
     unsigned Reg = MO.getReg();
     if (Reg != FoldAsLoadDefReg)
       continue;
-    // Do not fold if we have a subreg use or a def or multiple uses.
-    if (MO.getSubReg() || MO.isDef() || FoundSrcOperand)
+    // Do not fold if we have a subreg use or a def.
+    if (MO.getSubReg() || MO.isDef())
       return nullptr;
-
-    SrcOperandId = i;
-    FoundSrcOperand = true;
+    SrcOperandIds.push_back(i);
   }
-  if (!FoundSrcOperand)
+  if (SrcOperandIds.empty())
     return nullptr;
 
   // Check whether we can fold the def into SrcOperandId.
-  if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandId, *DefMI)) {
+  if (MachineInstr *FoldMI = foldMemoryOperand(MI, SrcOperandIds, *DefMI)) {
     FoldAsLoadDefReg = 0;
     return FoldMI;
   }
@@ -5553,7 +6687,9 @@ static bool expandMOV32r1(MachineInstrBuilder &MIB, const TargetInstrInfo &TII,
   return true;
 }
 
-bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
+static bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB,
+                               const TargetInstrInfo &TII,
+                               const X86Subtarget &Subtarget) {
   MachineBasicBlock &MBB = *MIB->getParent();
   DebugLoc DL = MIB->getDebugLoc();
   int64_t Imm = MIB->getOperand(1).getImm();
@@ -5570,23 +6706,23 @@ bool X86InstrInfo::ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const {
     X86MachineFunctionInfo *X86FI =
         MBB.getParent()->getInfo<X86MachineFunctionInfo>();
     if (X86FI->getUsesRedZone()) {
-      MIB->setDesc(get(MIB->getOpcode() == X86::MOV32ImmSExti8 ? X86::MOV32ri
-                                                               : X86::MOV64ri));
+      MIB->setDesc(TII.get(MIB->getOpcode() ==
+                           X86::MOV32ImmSExti8 ? X86::MOV32ri : X86::MOV64ri));
       return true;
     }
 
     // 64-bit mode doesn't have 32-bit push/pop, so use 64-bit operations and
     // widen the register if necessary.
     StackAdjustment = 8;
-    BuildMI(MBB, I, DL, get(X86::PUSH64i8)).addImm(Imm);
-    MIB->setDesc(get(X86::POP64r));
+    BuildMI(MBB, I, DL, TII.get(X86::PUSH64i8)).addImm(Imm);
+    MIB->setDesc(TII.get(X86::POP64r));
     MIB->getOperand(0)
         .setReg(getX86SubSuperRegister(MIB->getOperand(0).getReg(), 64));
   } else {
     assert(MIB->getOpcode() == X86::MOV32ImmSExti8);
     StackAdjustment = 4;
-    BuildMI(MBB, I, DL, get(X86::PUSH32i8)).addImm(Imm);
-    MIB->setDesc(get(X86::POP32r));
+    BuildMI(MBB, I, DL, TII.get(X86::PUSH32i8)).addImm(Imm);
+    MIB->setDesc(TII.get(X86::POP32r));
   }
 
   // Build CFI if necessary.
@@ -5616,7 +6752,9 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
   unsigned Reg = MIB->getOperand(0).getReg();
   const GlobalValue *GV =
       cast<GlobalValue>((*MIB->memoperands_begin())->getValue());
-  auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant;
+  auto Flags = MachineMemOperand::MOLoad |
+               MachineMemOperand::MODereferenceable |
+               MachineMemOperand::MOInvariant;
   MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(
       MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 8, 8);
   MachineBasicBlock::iterator I = MIB.getInstr();
@@ -5629,6 +6767,53 @@ static void expandLoadStackGuard(MachineInstrBuilder &MIB,
   MIB.addReg(Reg, RegState::Kill).addImm(1).addReg(0).addImm(0).addReg(0);
 }
 
+// This is used to handle spills for 128/256-bit registers when we have AVX512,
+// but not VLX. If it uses an extended register we need to use an instruction
+// that loads the lower 128/256-bit, but is available with only AVX512F.
+static bool expandNOVLXLoad(MachineInstrBuilder &MIB,
+                            const TargetRegisterInfo *TRI,
+                            const MCInstrDesc &LoadDesc,
+                            const MCInstrDesc &BroadcastDesc,
+                            unsigned SubIdx) {
+  unsigned DestReg = MIB->getOperand(0).getReg();
+  // Check if DestReg is XMM16-31 or YMM16-31.
+  if (TRI->getEncodingValue(DestReg) < 16) {
+    // We can use a normal VEX encoded load.
+    MIB->setDesc(LoadDesc);
+  } else {
+    // Use a 128/256-bit VBROADCAST instruction.
+    MIB->setDesc(BroadcastDesc);
+    // Change the destination to a 512-bit register.
+    DestReg = TRI->getMatchingSuperReg(DestReg, SubIdx, &X86::VR512RegClass);
+    MIB->getOperand(0).setReg(DestReg);
+  }
+  return true;
+}
+
+// This is used to handle spills for 128/256-bit registers when we have AVX512,
+// but not VLX. If it uses an extended register we need to use an instruction
+// that stores the lower 128/256-bit, but is available with only AVX512F.
+static bool expandNOVLXStore(MachineInstrBuilder &MIB,
+                             const TargetRegisterInfo *TRI,
+                             const MCInstrDesc &StoreDesc,
+                             const MCInstrDesc &ExtractDesc,
+                             unsigned SubIdx) {
+  unsigned SrcReg = MIB->getOperand(X86::AddrNumOperands).getReg();
+  // Check if DestReg is XMM16-31 or YMM16-31.
+  if (TRI->getEncodingValue(SrcReg) < 16) {
+    // We can use a normal VEX encoded store.
+    MIB->setDesc(StoreDesc);
+  } else {
+    // Use a VEXTRACTF instruction.
+    MIB->setDesc(ExtractDesc);
+    // Change the destination to a 512-bit register.
+    SrcReg = TRI->getMatchingSuperReg(SrcReg, SubIdx, &X86::VR512RegClass);
+    MIB->getOperand(X86::AddrNumOperands).setReg(SrcReg);
+    MIB.addImm(0x0); // Append immediate to extract from the lower bits.
+  }
+
+  return true;
+}
 bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   bool HasAVX = Subtarget.hasAVX();
   MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);
@@ -5641,7 +6826,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     return expandMOV32r1(MIB, *this, /*MinusOne=*/ true);
   case X86::MOV32ImmSExti8:
   case X86::MOV64ImmSExti8:
-    return ExpandMOVImmSExti8(MIB);
+    return ExpandMOVImmSExti8(MIB, *this, Subtarget);
   case X86::SETB_C8r:
     return Expand2AddrUndef(MIB, get(X86::SBB8rr));
   case X86::SETB_C16r:
@@ -5663,6 +6848,9 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
     return Expand2AddrUndef(MIB, get(X86::VPXORDZ256rr));
   case X86::AVX512_512_SET0:
     return Expand2AddrUndef(MIB, get(X86::VPXORDZrr));
+  case X86::AVX512_FsFLD0SS:
+  case X86::AVX512_FsFLD0SD:
+    return Expand2AddrUndef(MIB, get(X86::VXORPSZ128rr));
   case X86::V_SETALLONES:
     return Expand2AddrUndef(MIB, get(HasAVX ? X86::VPCMPEQDrr : X86::PCMPEQDrr));
   case X86::AVX2_SETALLONES:
@@ -5676,6 +6864,30 @@ bool X86InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
        .addReg(Reg, RegState::Undef).addImm(0xff);
     return true;
   }
+  case X86::VMOVAPSZ128rm_NOVLX:
+    return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSrm),
+                           get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+  case X86::VMOVUPSZ128rm_NOVLX:
+    return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSrm),
+                           get(X86::VBROADCASTF32X4rm), X86::sub_xmm);
+  case X86::VMOVAPSZ256rm_NOVLX:
+    return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVAPSYrm),
+                           get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+  case X86::VMOVUPSZ256rm_NOVLX:
+    return expandNOVLXLoad(MIB, &getRegisterInfo(), get(X86::VMOVUPSYrm),
+                           get(X86::VBROADCASTF64X4rm), X86::sub_ymm);
+  case X86::VMOVAPSZ128mr_NOVLX:
+    return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSmr),
+                            get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
+  case X86::VMOVUPSZ128mr_NOVLX:
+    return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSmr),
+                            get(X86::VEXTRACTF32x4Zmr), X86::sub_xmm);
+  case X86::VMOVAPSZ256mr_NOVLX:
+    return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVAPSYmr),
+                            get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
+  case X86::VMOVUPSZ256mr_NOVLX:
+    return expandNOVLXStore(MIB, &getRegisterInfo(), get(X86::VMOVUPSYmr),
+                            get(X86::VEXTRACTF64x4Zmr), X86::sub_ymm);
   case X86::TEST8ri_NOREX:
     MI.setDesc(get(X86::TEST8ri));
     return true;
@@ -5801,6 +7013,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
   switch (MI.getOpcode()) {
   case X86::INSERTPSrr:
   case X86::VINSERTPSrr:
+  case X86::VINSERTPSZrr:
     // Attempt to convert the load of inserted vector into a fold load
     // of a single float.
     if (OpNum == 2) {
@@ -5814,8 +7027,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
         int PtrOffset = SrcIdx * 4;
         unsigned NewImm = (DstIdx << 4) | ZMask;
         unsigned NewOpCode =
-            (MI.getOpcode() == X86::VINSERTPSrr ? X86::VINSERTPSrm
-                                                : X86::INSERTPSrm);
+            (MI.getOpcode() == X86::VINSERTPSZrr) ? X86::VINSERTPSZrm :
+            (MI.getOpcode() == X86::VINSERTPSrr)  ? X86::VINSERTPSrm  :
+                                                    X86::INSERTPSrm;
         MachineInstr *NewMI =
             FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, PtrOffset);
         NewMI->getOperand(NewMI->getNumOperands() - 1).setImm(NewImm);
@@ -5825,6 +7039,7 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
     break;
   case X86::MOVHLPSrr:
   case X86::VMOVHLPSrr:
+  case X86::VMOVHLPSZrr:
     // Move the upper 64-bits of the second operand to the lower 64-bits.
     // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
     // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
@@ -5832,8 +7047,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
       unsigned RCSize = getRegClass(MI.getDesc(), OpNum, &RI, MF)->getSize();
       if (Size <= RCSize && 8 <= Align) {
         unsigned NewOpCode =
-            (MI.getOpcode() == X86::VMOVHLPSrr ? X86::VMOVLPSrm
-                                               : X86::MOVLPSrm);
+            (MI.getOpcode() == X86::VMOVHLPSZrr) ? X86::VMOVLPSZ128rm :
+            (MI.getOpcode() == X86::VMOVHLPSrr)  ? X86::VMOVLPSrm     :
+                                                   X86::MOVLPSrm;
         MachineInstr *NewMI =
             FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
         return NewMI;
@@ -6042,12 +7258,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   case X86::CVTSI2SD64rm:
   case X86::CVTSD2SSrr:
   case X86::CVTSD2SSrm:
-  case X86::Int_CVTSD2SSrr:
-  case X86::Int_CVTSD2SSrm:
   case X86::CVTSS2SDrr:
   case X86::CVTSS2SDrm:
-  case X86::Int_CVTSS2SDrr:
-  case X86::Int_CVTSS2SDrm:
   case X86::MOVHPDrm:
   case X86::MOVHPSrm:
   case X86::MOVLPDrm:
@@ -6058,10 +7270,8 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
   case X86::RCPSSm_Int:
   case X86::ROUNDSDr:
   case X86::ROUNDSDm:
-  case X86::ROUNDSDr_Int:
   case X86::ROUNDSSr:
   case X86::ROUNDSSm:
-  case X86::ROUNDSSr_Int:
   case X86::RSQRTSSr:
   case X86::RSQRTSSm:
   case X86::RSQRTSSr_Int:
@@ -6134,28 +7344,95 @@ static bool hasUndefRegUpdate(unsigned Opcode) {
   case X86::Int_VCVTSS2SDrr:
   case X86::Int_VCVTSS2SDrm:
   case X86::VRCPSSr:
+  case X86::VRCPSSr_Int:
   case X86::VRCPSSm:
   case X86::VRCPSSm_Int:
   case X86::VROUNDSDr:
   case X86::VROUNDSDm:
   case X86::VROUNDSDr_Int:
+  case X86::VROUNDSDm_Int:
   case X86::VROUNDSSr:
   case X86::VROUNDSSm:
   case X86::VROUNDSSr_Int:
+  case X86::VROUNDSSm_Int:
   case X86::VRSQRTSSr:
+  case X86::VRSQRTSSr_Int:
   case X86::VRSQRTSSm:
   case X86::VRSQRTSSm_Int:
   case X86::VSQRTSSr:
+  case X86::VSQRTSSr_Int:
   case X86::VSQRTSSm:
   case X86::VSQRTSSm_Int:
   case X86::VSQRTSDr:
+  case X86::VSQRTSDr_Int:
   case X86::VSQRTSDm:
   case X86::VSQRTSDm_Int:
-    // AVX-512
+  // AVX-512
+  case X86::VCVTSI2SSZrr:
+  case X86::VCVTSI2SSZrm:
+  case X86::VCVTSI2SSZrr_Int:
+  case X86::VCVTSI2SSZrrb_Int:
+  case X86::VCVTSI2SSZrm_Int:
+  case X86::VCVTSI642SSZrr:
+  case X86::VCVTSI642SSZrm:
+  case X86::VCVTSI642SSZrr_Int:
+  case X86::VCVTSI642SSZrrb_Int:
+  case X86::VCVTSI642SSZrm_Int:
+  case X86::VCVTSI2SDZrr:
+  case X86::VCVTSI2SDZrm:
+  case X86::VCVTSI2SDZrr_Int:
+  case X86::VCVTSI2SDZrrb_Int:
+  case X86::VCVTSI2SDZrm_Int:
+  case X86::VCVTSI642SDZrr:
+  case X86::VCVTSI642SDZrm:
+  case X86::VCVTSI642SDZrr_Int:
+  case X86::VCVTSI642SDZrrb_Int:
+  case X86::VCVTSI642SDZrm_Int:
+  case X86::VCVTUSI2SSZrr:
+  case X86::VCVTUSI2SSZrm:
+  case X86::VCVTUSI2SSZrr_Int:
+  case X86::VCVTUSI2SSZrrb_Int:
+  case X86::VCVTUSI2SSZrm_Int:
+  case X86::VCVTUSI642SSZrr:
+  case X86::VCVTUSI642SSZrm:
+  case X86::VCVTUSI642SSZrr_Int:
+  case X86::VCVTUSI642SSZrrb_Int:
+  case X86::VCVTUSI642SSZrm_Int:
+  case X86::VCVTUSI2SDZrr:
+  case X86::VCVTUSI2SDZrm:
+  case X86::VCVTUSI2SDZrr_Int:
+  case X86::VCVTUSI2SDZrm_Int:
+  case X86::VCVTUSI642SDZrr:
+  case X86::VCVTUSI642SDZrm:
+  case X86::VCVTUSI642SDZrr_Int:
+  case X86::VCVTUSI642SDZrrb_Int:
+  case X86::VCVTUSI642SDZrm_Int:
   case X86::VCVTSD2SSZrr:
+  case X86::VCVTSD2SSZrrb:
   case X86::VCVTSD2SSZrm:
   case X86::VCVTSS2SDZrr:
+  case X86::VCVTSS2SDZrrb:
   case X86::VCVTSS2SDZrm:
+  case X86::VRNDSCALESDr:
+  case X86::VRNDSCALESDrb:
+  case X86::VRNDSCALESDm:
+  case X86::VRNDSCALESSr:
+  case X86::VRNDSCALESSrb:
+  case X86::VRNDSCALESSm:
+  case X86::VRCP14SSrr:
+  case X86::VRCP14SSrm:
+  case X86::VRSQRT14SSrr:
+  case X86::VRSQRT14SSrm:
+  case X86::VSQRTSSZr:
+  case X86::VSQRTSSZr_Int:
+  case X86::VSQRTSSZrb_Int:
+  case X86::VSQRTSSZm:
+  case X86::VSQRTSSZm_Int:
+  case X86::VSQRTSDZr:
+  case X86::VSQRTSDZr_Int:
+  case X86::VSQRTSDZrb_Int:
+  case X86::VSQRTSDZm:
+  case X86::VSQRTSDZm_Int:
     return true;
   }
 
@@ -6233,9 +7510,17 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI,
   if (!MF.getFunction()->optForSize() && hasPartialRegUpdate(MI.getOpcode()))
     return nullptr;
 
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
-  unsigned Size = MFI->getObjectSize(FrameIndex);
-  unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
+  // Don't fold subreg spills, or reloads that use a high subreg.
+  for (auto Op : Ops) {
+    MachineOperand &MO = MI.getOperand(Op);
+    auto SubReg = MO.getSubReg();
+    if (SubReg && (MO.isDef() || SubReg == X86::sub_8bit_hi))
+      return nullptr;
+  }
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  unsigned Size = MFI.getObjectSize(FrameIndex);
+  unsigned Alignment = MFI.getObjectAlignment(FrameIndex);
   // If the function stack isn't realigned we don't want to fold instructions
   // that need increased alignment.
   if (!RI.needsStackRealignment(MF))
@@ -6295,15 +7580,26 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     // instruction isn't scalar (SS).
     switch (UserOpc) {
     case X86::ADDSSrr_Int: case X86::VADDSSrr_Int: case X86::VADDSSZrr_Int:
+    case X86::Int_CMPSSrr: case X86::Int_VCMPSSrr: case X86::VCMPSSZrr_Int:
     case X86::DIVSSrr_Int: case X86::VDIVSSrr_Int: case X86::VDIVSSZrr_Int:
+    case X86::MAXSSrr_Int: case X86::VMAXSSrr_Int: case X86::VMAXSSZrr_Int:
+    case X86::MINSSrr_Int: case X86::VMINSSrr_Int: case X86::VMINSSZrr_Int:
     case X86::MULSSrr_Int: case X86::VMULSSrr_Int: case X86::VMULSSZrr_Int:
     case X86::SUBSSrr_Int: case X86::VSUBSSrr_Int: case X86::VSUBSSZrr_Int:
-    case X86::VFMADDSSr132r_Int: case X86::VFNMADDSSr132r_Int:
-    case X86::VFMADDSSr213r_Int: case X86::VFNMADDSSr213r_Int:
-    case X86::VFMADDSSr231r_Int: case X86::VFNMADDSSr231r_Int:
-    case X86::VFMSUBSSr132r_Int: case X86::VFNMSUBSSr132r_Int:
-    case X86::VFMSUBSSr213r_Int: case X86::VFNMSUBSSr213r_Int:
-    case X86::VFMSUBSSr231r_Int: case X86::VFNMSUBSSr231r_Int:
+    case X86::VFMADDSS4rr_Int:   case X86::VFNMADDSS4rr_Int:
+    case X86::VFMSUBSS4rr_Int:   case X86::VFNMSUBSS4rr_Int:
+    case X86::VFMADD132SSr_Int:  case X86::VFNMADD132SSr_Int:
+    case X86::VFMADD213SSr_Int:  case X86::VFNMADD213SSr_Int:
+    case X86::VFMADD231SSr_Int:  case X86::VFNMADD231SSr_Int:
+    case X86::VFMSUB132SSr_Int:  case X86::VFNMSUB132SSr_Int:
+    case X86::VFMSUB213SSr_Int:  case X86::VFNMSUB213SSr_Int:
+    case X86::VFMSUB231SSr_Int:  case X86::VFNMSUB231SSr_Int:
+    case X86::VFMADD132SSZr_Int: case X86::VFNMADD132SSZr_Int:
+    case X86::VFMADD213SSZr_Int: case X86::VFNMADD213SSZr_Int:
+    case X86::VFMADD231SSZr_Int: case X86::VFNMADD231SSZr_Int:
+    case X86::VFMSUB132SSZr_Int: case X86::VFNMSUB132SSZr_Int:
+    case X86::VFMSUB213SSZr_Int: case X86::VFNMSUB213SSZr_Int:
+    case X86::VFMSUB231SSZr_Int: case X86::VFNMSUB231SSZr_Int:
       return false;
     default:
       return true;
@@ -6317,15 +7613,26 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI,
     // instruction isn't scalar (SD).
     switch (UserOpc) {
     case X86::ADDSDrr_Int: case X86::VADDSDrr_Int: case X86::VADDSDZrr_Int:
+    case X86::Int_CMPSDrr: case X86::Int_VCMPSDrr: case X86::VCMPSDZrr_Int:
     case X86::DIVSDrr_Int: case X86::VDIVSDrr_Int: case X86::VDIVSDZrr_Int:
+    case X86::MAXSDrr_Int: case X86::VMAXSDrr_Int: case X86::VMAXSDZrr_Int:
+    case X86::MINSDrr_Int: case X86::VMINSDrr_Int: case X86::VMINSDZrr_Int:
     case X86::MULSDrr_Int: case X86::VMULSDrr_Int: case X86::VMULSDZrr_Int:
     case X86::SUBSDrr_Int: case X86::VSUBSDrr_Int: case X86::VSUBSDZrr_Int:
-    case X86::VFMADDSDr132r_Int: case X86::VFNMADDSDr132r_Int:
-    case X86::VFMADDSDr213r_Int: case X86::VFNMADDSDr213r_Int:
-    case X86::VFMADDSDr231r_Int: case X86::VFNMADDSDr231r_Int:
-    case X86::VFMSUBSDr132r_Int: case X86::VFNMSUBSDr132r_Int:
-    case X86::VFMSUBSDr213r_Int: case X86::VFNMSUBSDr213r_Int:
-    case X86::VFMSUBSDr231r_Int: case X86::VFNMSUBSDr231r_Int:
+    case X86::VFMADDSD4rr_Int:   case X86::VFNMADDSD4rr_Int:
+    case X86::VFMSUBSD4rr_Int:   case X86::VFNMSUBSD4rr_Int:
+    case X86::VFMADD132SDr_Int:  case X86::VFNMADD132SDr_Int:
+    case X86::VFMADD213SDr_Int:  case X86::VFNMADD213SDr_Int:
+    case X86::VFMADD231SDr_Int:  case X86::VFNMADD231SDr_Int:
+    case X86::VFMSUB132SDr_Int:  case X86::VFNMSUB132SDr_Int:
+    case X86::VFMSUB213SDr_Int:  case X86::VFNMSUB213SDr_Int:
+    case X86::VFMSUB231SDr_Int:  case X86::VFNMSUB231SDr_Int:
+    case X86::VFMADD132SDZr_Int: case X86::VFNMADD132SDZr_Int:
+    case X86::VFMADD213SDZr_Int: case X86::VFNMADD213SDZr_Int:
+    case X86::VFMADD231SDZr_Int: case X86::VFNMADD231SDZr_Int:
+    case X86::VFMSUB132SDZr_Int: case X86::VFNMSUB132SDZr_Int:
+    case X86::VFMSUB213SDZr_Int: case X86::VFNMSUB213SDZr_Int:
+    case X86::VFMSUB231SDZr_Int: case X86::VFNMSUB231SDZr_Int:
       return false;
     default:
       return true;
@@ -6339,6 +7646,14 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
     MachineBasicBlock::iterator InsertPt, MachineInstr &LoadMI,
     LiveIntervals *LIS) const {
+
+  // TODO: Support the case where LoadMI loads a wide register, but MI
+  // only uses a subreg.
+  for (auto Op : Ops) {
+    if (MI.getOperand(Op).getSubReg())
+      return nullptr;
+  }
+
   // If loading from a FrameIndex, fold directly from the FrameIndex.
   unsigned NumOps = LoadMI.getDesc().getNumOperands();
   int FrameIndex;
@@ -6376,9 +7691,11 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
       Alignment = 16;
       break;
     case X86::FsFLD0SD:
+    case X86::AVX512_FsFLD0SD:
       Alignment = 8;
       break;
     case X86::FsFLD0SS:
+    case X86::AVX512_FsFLD0SS:
       Alignment = 4;
       break;
     default:
@@ -6415,7 +7732,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
   case X86::AVX512_512_SET0:
   case X86::AVX512_512_SETALLONES:
   case X86::FsFLD0SD:
-  case X86::FsFLD0SS: {
+  case X86::AVX512_FsFLD0SD:
+  case X86::FsFLD0SS:
+  case X86::AVX512_FsFLD0SS: {
     // Folding a V_SET0 or V_SETALLONES as a load, to ease register pressure.
     // Create a constant-pool entry and operands to load from it.
 
@@ -6441,9 +7760,9 @@ MachineInstr *X86InstrInfo::foldMemoryOperandImpl(
     MachineConstantPool &MCP = *MF.getConstantPool();
     Type *Ty;
     unsigned Opc = LoadMI.getOpcode();
-    if (Opc == X86::FsFLD0SS)
+    if (Opc == X86::FsFLD0SS || Opc == X86::AVX512_FsFLD0SS)
       Ty = Type::getFloatTy(MF.getFunction()->getContext());
-    else if (Opc == X86::FsFLD0SD)
+    else if (Opc == X86::FsFLD0SD || Opc == X86::AVX512_FsFLD0SD)
       Ty = Type::getDoubleTy(MF.getFunction()->getContext());
     else if (Opc == X86::AVX512_512_SET0 || Opc == X86::AVX512_512_SETALLONES)
       Ty = VectorType::get(Type::getInt32Ty(MF.getFunction()->getContext()),16);
@@ -6649,7 +7968,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
       return false;
     // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
     // memory access is slow above.
-    unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+    unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
     Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl,
@@ -6694,7 +8013,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
       return false;
     // FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
     // memory access is slow above.
-    unsigned Alignment = RC->getSize() == 32 ? 32 : 16;
+    unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16);
     bool isAligned = (*MMOs.first) &&
                      (*MMOs.first)->getAlignment() >= Alignment;
     SDNode *Store =
@@ -6746,8 +8065,6 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVSDrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
-  case X86::FsMOVAPSrm:
-  case X86::FsMOVAPDrm:
   case X86::MOVAPSrm:
   case X86::MOVUPSrm:
   case X86::MOVAPDrm:
@@ -6757,8 +8074,6 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   // AVX load instructions
   case X86::VMOVSSrm:
   case X86::VMOVSDrm:
-  case X86::FsVMOVAPSrm:
-  case X86::FsVMOVAPDrm:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
@@ -6776,6 +8091,8 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::VMOVSDZrm:
   case X86::VMOVAPSZ128rm:
   case X86::VMOVUPSZ128rm:
+  case X86::VMOVAPSZ128rm_NOVLX:
+  case X86::VMOVUPSZ128rm_NOVLX:
   case X86::VMOVAPDZ128rm:
   case X86::VMOVUPDZ128rm:
   case X86::VMOVDQU8Z128rm:
@@ -6786,6 +8103,8 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::VMOVDQU64Z128rm:
   case X86::VMOVAPSZ256rm:
   case X86::VMOVUPSZ256rm:
+  case X86::VMOVAPSZ256rm_NOVLX:
+  case X86::VMOVUPSZ256rm_NOVLX:
   case X86::VMOVAPDZ256rm:
   case X86::VMOVUPDZ256rm:
   case X86::VMOVDQU8Z256rm:
@@ -6823,8 +8142,6 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::MOVSDrm:
   case X86::MMX_MOVD64rm:
   case X86::MMX_MOVQ64rm:
-  case X86::FsMOVAPSrm:
-  case X86::FsMOVAPDrm:
   case X86::MOVAPSrm:
   case X86::MOVUPSrm:
   case X86::MOVAPDrm:
@@ -6834,8 +8151,6 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   // AVX load instructions
   case X86::VMOVSSrm:
   case X86::VMOVSDrm:
-  case X86::FsVMOVAPSrm:
-  case X86::FsVMOVAPDrm:
   case X86::VMOVAPSrm:
   case X86::VMOVUPSrm:
   case X86::VMOVAPDrm:
@@ -6853,6 +8168,8 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::VMOVSDZrm:
   case X86::VMOVAPSZ128rm:
   case X86::VMOVUPSZ128rm:
+  case X86::VMOVAPSZ128rm_NOVLX:
+  case X86::VMOVUPSZ128rm_NOVLX:
   case X86::VMOVAPDZ128rm:
   case X86::VMOVUPDZ128rm:
   case X86::VMOVDQU8Z128rm:
@@ -6863,6 +8180,8 @@ X86InstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
   case X86::VMOVDQU64Z128rm:
   case X86::VMOVAPSZ256rm:
   case X86::VMOVUPSZ256rm:
+  case X86::VMOVAPSZ256rm_NOVLX:
+  case X86::VMOVUPSZ256rm_NOVLX:
   case X86::VMOVAPDZ256rm:
   case X86::VMOVUPDZ256rm:
   case X86::VMOVDQU8Z256rm:
@@ -6960,8 +8279,8 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,
   return true;
 }
 
-bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
-                                          MachineInstr &Second) const {
+bool X86InstrInfo::shouldScheduleAdjacent(const MachineInstr &First,
+                                          const MachineInstr &Second) const {
   // Check if this processor supports macro-fusion. Since this is a minor
   // heuristic, we haven't specifically reserved a feature. hasAVX is a decent
   // proxy for SandyBridge+.
@@ -7120,7 +8439,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr &First,
 }
 
 bool X86InstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
   assert(Cond.size() == 1 && "Invalid X86 branch condition!");
   X86::CondCode CC = static_cast<X86::CondCode>(Cond[0].getImm());
   Cond[0].setImm(GetOppositeBranchCondition(CC));
@@ -7168,7 +8487,10 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::MOVAPSrr,   X86::MOVAPDrr,  X86::MOVDQArr  },
   { X86::MOVUPSmr,   X86::MOVUPDmr,  X86::MOVDQUmr  },
   { X86::MOVUPSrm,   X86::MOVUPDrm,  X86::MOVDQUrm  },
-  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr  },
+  { X86::MOVLPSmr,   X86::MOVLPDmr,  X86::MOVPQI2QImr },
+  { X86::MOVSSmr,    X86::MOVSSmr,   X86::MOVPDI2DImr },
+  { X86::MOVSDrm,    X86::MOVSDrm,   X86::MOVQI2PQIrm },
+  { X86::MOVSSrm,    X86::MOVSSrm,   X86::MOVDI2PDIrm },
   { X86::MOVNTPSmr,  X86::MOVNTPDmr, X86::MOVNTDQmr },
   { X86::ANDNPSrm,   X86::ANDNPDrm,  X86::PANDNrm   },
   { X86::ANDNPSrr,   X86::ANDNPDrr,  X86::PANDNrr   },
@@ -7184,7 +8506,10 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVAPSrr,  X86::VMOVAPDrr,  X86::VMOVDQArr  },
   { X86::VMOVUPSmr,  X86::VMOVUPDmr,  X86::VMOVDQUmr  },
   { X86::VMOVUPSrm,  X86::VMOVUPDrm,  X86::VMOVDQUrm  },
-  { X86::VMOVLPSmr,  X86::VMOVLPDmr,  X86::VMOVPQI2QImr  },
+  { X86::VMOVLPSmr,  X86::VMOVLPDmr,  X86::VMOVPQI2QImr },
+  { X86::VMOVSSmr,   X86::VMOVSSmr,   X86::VMOVPDI2DImr },
+  { X86::VMOVSDrm,   X86::VMOVSDrm,   X86::VMOVQI2PQIrm },
+  { X86::VMOVSSrm,   X86::VMOVSSrm,   X86::VMOVDI2PDIrm },
   { X86::VMOVNTPSmr, X86::VMOVNTPDmr, X86::VMOVNTDQmr },
   { X86::VANDNPSrm,  X86::VANDNPDrm,  X86::VPANDNrm   },
   { X86::VANDNPSrr,  X86::VANDNPDrr,  X86::VPANDNrr   },
@@ -7200,7 +8525,26 @@ static const uint16_t ReplaceableInstrs[][3] = {
   { X86::VMOVAPSYrr,   X86::VMOVAPDYrr,   X86::VMOVDQAYrr  },
   { X86::VMOVUPSYmr,   X86::VMOVUPDYmr,   X86::VMOVDQUYmr  },
   { X86::VMOVUPSYrm,   X86::VMOVUPDYrm,   X86::VMOVDQUYrm  },
-  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr }
+  { X86::VMOVNTPSYmr,  X86::VMOVNTPDYmr,  X86::VMOVNTDQYmr },
+  // AVX512 support
+  { X86::VMOVLPSZ128mr,  X86::VMOVLPDZ128mr,  X86::VMOVPQI2QIZmr  },
+  { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+  { X86::VMOVNTPSZ128mr, X86::VMOVNTPDZ128mr, X86::VMOVNTDQZ128mr },
+  { X86::VMOVNTPSZmr,    X86::VMOVNTPDZmr,    X86::VMOVNTDQZmr    },
+  { X86::VMOVSDZmr,      X86::VMOVSDZmr,      X86::VMOVPQI2QIZmr  },
+  { X86::VMOVSSZmr,      X86::VMOVSSZmr,      X86::VMOVPDI2DIZmr  },
+  { X86::VMOVSDZrm,      X86::VMOVSDZrm,      X86::VMOVQI2PQIZrm  },
+  { X86::VMOVSSZrm,      X86::VMOVSSZrm,      X86::VMOVDI2PDIZrm  },
+  { X86::VBROADCASTSSZ128r, X86::VBROADCASTSSZ128r, X86::VPBROADCASTDZ128r },
+  { X86::VBROADCASTSSZ128m, X86::VBROADCASTSSZ128m, X86::VPBROADCASTDZ128m },
+  { X86::VBROADCASTSSZ256r, X86::VBROADCASTSSZ256r, X86::VPBROADCASTDZ256r },
+  { X86::VBROADCASTSSZ256m, X86::VBROADCASTSSZ256m, X86::VPBROADCASTDZ256m },
+  { X86::VBROADCASTSSZr,    X86::VBROADCASTSSZr,    X86::VPBROADCASTDZr },
+  { X86::VBROADCASTSSZm,    X86::VBROADCASTSSZm,    X86::VPBROADCASTDZm },
+  { X86::VBROADCASTSDZ256r, X86::VBROADCASTSDZ256r, X86::VPBROADCASTQZ256r },
+  { X86::VBROADCASTSDZ256m, X86::VBROADCASTSDZ256m, X86::VPBROADCASTQZ256m },
+  { X86::VBROADCASTSDZr,    X86::VBROADCASTSDZr,    X86::VPBROADCASTQZr },
+  { X86::VBROADCASTSDZm,    X86::VBROADCASTSDZm,    X86::VPBROADCASTQZm },
 };
 
 static const uint16_t ReplaceableInstrsAVX2[][3] = {
@@ -7224,22 +8568,257 @@ static const uint16_t ReplaceableInstrsAVX2[][3] = {
   { X86::VBROADCASTSSYrr, X86::VBROADCASTSSYrr, X86::VPBROADCASTDYrr},
   { X86::VBROADCASTSSYrm, X86::VBROADCASTSSYrm, X86::VPBROADCASTDYrm},
   { X86::VBROADCASTSDYrr, X86::VBROADCASTSDYrr, X86::VPBROADCASTQYrr},
-  { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm}
+  { X86::VBROADCASTSDYrm, X86::VBROADCASTSDYrm, X86::VPBROADCASTQYrm},
+  { X86::VBROADCASTF128,  X86::VBROADCASTF128,  X86::VBROADCASTI128 },
+};
+
+static const uint16_t ReplaceableInstrsAVX512[][4] = {
+  // Two integer columns for 64-bit and 32-bit elements.
+  //PackedSingle        PackedDouble        PackedInt             PackedInt
+  { X86::VMOVAPSZ128mr, X86::VMOVAPDZ128mr, X86::VMOVDQA64Z128mr, X86::VMOVDQA32Z128mr  },
+  { X86::VMOVAPSZ128rm, X86::VMOVAPDZ128rm, X86::VMOVDQA64Z128rm, X86::VMOVDQA32Z128rm  },
+  { X86::VMOVAPSZ128rr, X86::VMOVAPDZ128rr, X86::VMOVDQA64Z128rr, X86::VMOVDQA32Z128rr  },
+  { X86::VMOVUPSZ128mr, X86::VMOVUPDZ128mr, X86::VMOVDQU64Z128mr, X86::VMOVDQU32Z128mr  },
+  { X86::VMOVUPSZ128rm, X86::VMOVUPDZ128rm, X86::VMOVDQU64Z128rm, X86::VMOVDQU32Z128rm  },
+  { X86::VMOVAPSZ256mr, X86::VMOVAPDZ256mr, X86::VMOVDQA64Z256mr, X86::VMOVDQA32Z256mr  },
+  { X86::VMOVAPSZ256rm, X86::VMOVAPDZ256rm, X86::VMOVDQA64Z256rm, X86::VMOVDQA32Z256rm  },
+  { X86::VMOVAPSZ256rr, X86::VMOVAPDZ256rr, X86::VMOVDQA64Z256rr, X86::VMOVDQA32Z256rr  },
+  { X86::VMOVUPSZ256mr, X86::VMOVUPDZ256mr, X86::VMOVDQU64Z256mr, X86::VMOVDQU32Z256mr  },
+  { X86::VMOVUPSZ256rm, X86::VMOVUPDZ256rm, X86::VMOVDQU64Z256rm, X86::VMOVDQU32Z256rm  },
+  { X86::VMOVAPSZmr,    X86::VMOVAPDZmr,    X86::VMOVDQA64Zmr,    X86::VMOVDQA32Zmr     },
+  { X86::VMOVAPSZrm,    X86::VMOVAPDZrm,    X86::VMOVDQA64Zrm,    X86::VMOVDQA32Zrm     },
+  { X86::VMOVAPSZrr,    X86::VMOVAPDZrr,    X86::VMOVDQA64Zrr,    X86::VMOVDQA32Zrr     },
+  { X86::VMOVUPSZmr,    X86::VMOVUPDZmr,    X86::VMOVDQU64Zmr,    X86::VMOVDQU32Zmr     },
+  { X86::VMOVUPSZrm,    X86::VMOVUPDZrm,    X86::VMOVDQU64Zrm,    X86::VMOVDQU32Zrm     },
+};
+
+static const uint16_t ReplaceableInstrsAVX512DQ[][4] = {
+  // Two integer columns for 64-bit and 32-bit elements.
+  //PackedSingle        PackedDouble        PackedInt           PackedInt
+  { X86::VANDNPSZ128rm, X86::VANDNPDZ128rm, X86::VPANDNQZ128rm, X86::VPANDNDZ128rm },
+  { X86::VANDNPSZ128rr, X86::VANDNPDZ128rr, X86::VPANDNQZ128rr, X86::VPANDNDZ128rr },
+  { X86::VANDPSZ128rm,  X86::VANDPDZ128rm,  X86::VPANDQZ128rm,  X86::VPANDDZ128rm  },
+  { X86::VANDPSZ128rr,  X86::VANDPDZ128rr,  X86::VPANDQZ128rr,  X86::VPANDDZ128rr  },
+  { X86::VORPSZ128rm,   X86::VORPDZ128rm,   X86::VPORQZ128rm,   X86::VPORDZ128rm   },
+  { X86::VORPSZ128rr,   X86::VORPDZ128rr,   X86::VPORQZ128rr,   X86::VPORDZ128rr   },
+  { X86::VXORPSZ128rm,  X86::VXORPDZ128rm,  X86::VPXORQZ128rm,  X86::VPXORDZ128rm  },
+  { X86::VXORPSZ128rr,  X86::VXORPDZ128rr,  X86::VPXORQZ128rr,  X86::VPXORDZ128rr  },
+  { X86::VANDNPSZ256rm, X86::VANDNPDZ256rm, X86::VPANDNQZ256rm, X86::VPANDNDZ256rm },
+  { X86::VANDNPSZ256rr, X86::VANDNPDZ256rr, X86::VPANDNQZ256rr, X86::VPANDNDZ256rr },
+  { X86::VANDPSZ256rm,  X86::VANDPDZ256rm,  X86::VPANDQZ256rm,  X86::VPANDDZ256rm  },
+  { X86::VANDPSZ256rr,  X86::VANDPDZ256rr,  X86::VPANDQZ256rr,  X86::VPANDDZ256rr  },
+  { X86::VORPSZ256rm,   X86::VORPDZ256rm,   X86::VPORQZ256rm,   X86::VPORDZ256rm   },
+  { X86::VORPSZ256rr,   X86::VORPDZ256rr,   X86::VPORQZ256rr,   X86::VPORDZ256rr   },
+  { X86::VXORPSZ256rm,  X86::VXORPDZ256rm,  X86::VPXORQZ256rm,  X86::VPXORDZ256rm  },
+  { X86::VXORPSZ256rr,  X86::VXORPDZ256rr,  X86::VPXORQZ256rr,  X86::VPXORDZ256rr  },
+  { X86::VANDNPSZrm,    X86::VANDNPDZrm,    X86::VPANDNQZrm,    X86::VPANDNDZrm    },
+  { X86::VANDNPSZrr,    X86::VANDNPDZrr,    X86::VPANDNQZrr,    X86::VPANDNDZrr    },
+  { X86::VANDPSZrm,     X86::VANDPDZrm,     X86::VPANDQZrm,     X86::VPANDDZrm     },
+  { X86::VANDPSZrr,     X86::VANDPDZrr,     X86::VPANDQZrr,     X86::VPANDDZrr     },
+  { X86::VORPSZrm,      X86::VORPDZrm,      X86::VPORQZrm,      X86::VPORDZrm      },
+  { X86::VORPSZrr,      X86::VORPDZrr,      X86::VPORQZrr,      X86::VPORDZrr      },
+  { X86::VXORPSZrm,     X86::VXORPDZrm,     X86::VPXORQZrm,     X86::VPXORDZrm     },
+  { X86::VXORPSZrr,     X86::VXORPDZrr,     X86::VPXORQZrr,     X86::VPXORDZrr     },
+};
+
+static const uint16_t ReplaceableInstrsAVX512DQMasked[][4] = {
+  // Two integer columns for 64-bit and 32-bit elements.
+  //PackedSingle          PackedDouble
+  //PackedInt             PackedInt
+  { X86::VANDNPSZ128rmk,  X86::VANDNPDZ128rmk,
+    X86::VPANDNQZ128rmk,  X86::VPANDNDZ128rmk  },
+  { X86::VANDNPSZ128rmkz, X86::VANDNPDZ128rmkz,
+    X86::VPANDNQZ128rmkz, X86::VPANDNDZ128rmkz },
+  { X86::VANDNPSZ128rrk,  X86::VANDNPDZ128rrk,
+    X86::VPANDNQZ128rrk,  X86::VPANDNDZ128rrk  },
+  { X86::VANDNPSZ128rrkz, X86::VANDNPDZ128rrkz,
+    X86::VPANDNQZ128rrkz, X86::VPANDNDZ128rrkz },
+  { X86::VANDPSZ128rmk,   X86::VANDPDZ128rmk,
+    X86::VPANDQZ128rmk,   X86::VPANDDZ128rmk   },
+  { X86::VANDPSZ128rmkz,  X86::VANDPDZ128rmkz,
+    X86::VPANDQZ128rmkz,  X86::VPANDDZ128rmkz  },
+  { X86::VANDPSZ128rrk,   X86::VANDPDZ128rrk,
+    X86::VPANDQZ128rrk,   X86::VPANDDZ128rrk   },
+  { X86::VANDPSZ128rrkz,  X86::VANDPDZ128rrkz,
+    X86::VPANDQZ128rrkz,  X86::VPANDDZ128rrkz  },
+  { X86::VORPSZ128rmk,    X86::VORPDZ128rmk,
+    X86::VPORQZ128rmk,    X86::VPORDZ128rmk    },
+  { X86::VORPSZ128rmkz,   X86::VORPDZ128rmkz,
+    X86::VPORQZ128rmkz,   X86::VPORDZ128rmkz   },
+  { X86::VORPSZ128rrk,    X86::VORPDZ128rrk,
+    X86::VPORQZ128rrk,    X86::VPORDZ128rrk    },
+  { X86::VORPSZ128rrkz,   X86::VORPDZ128rrkz,
+    X86::VPORQZ128rrkz,   X86::VPORDZ128rrkz   },
+  { X86::VXORPSZ128rmk,   X86::VXORPDZ128rmk,
+    X86::VPXORQZ128rmk,   X86::VPXORDZ128rmk   },
+  { X86::VXORPSZ128rmkz,  X86::VXORPDZ128rmkz,
+    X86::VPXORQZ128rmkz,  X86::VPXORDZ128rmkz  },
+  { X86::VXORPSZ128rrk,   X86::VXORPDZ128rrk,
+    X86::VPXORQZ128rrk,   X86::VPXORDZ128rrk   },
+  { X86::VXORPSZ128rrkz,  X86::VXORPDZ128rrkz,
+    X86::VPXORQZ128rrkz,  X86::VPXORDZ128rrkz  },
+  { X86::VANDNPSZ256rmk,  X86::VANDNPDZ256rmk,
+    X86::VPANDNQZ256rmk,  X86::VPANDNDZ256rmk  },
+  { X86::VANDNPSZ256rmkz, X86::VANDNPDZ256rmkz,
+    X86::VPANDNQZ256rmkz, X86::VPANDNDZ256rmkz },
+  { X86::VANDNPSZ256rrk,  X86::VANDNPDZ256rrk,
+    X86::VPANDNQZ256rrk,  X86::VPANDNDZ256rrk  },
+  { X86::VANDNPSZ256rrkz, X86::VANDNPDZ256rrkz,
+    X86::VPANDNQZ256rrkz, X86::VPANDNDZ256rrkz },
+  { X86::VANDPSZ256rmk,   X86::VANDPDZ256rmk,
+    X86::VPANDQZ256rmk,   X86::VPANDDZ256rmk   },
+  { X86::VANDPSZ256rmkz,  X86::VANDPDZ256rmkz,
+    X86::VPANDQZ256rmkz,  X86::VPANDDZ256rmkz  },
+  { X86::VANDPSZ256rrk,   X86::VANDPDZ256rrk,
+    X86::VPANDQZ256rrk,   X86::VPANDDZ256rrk   },
+  { X86::VANDPSZ256rrkz,  X86::VANDPDZ256rrkz,
+    X86::VPANDQZ256rrkz,  X86::VPANDDZ256rrkz  },
+  { X86::VORPSZ256rmk,    X86::VORPDZ256rmk,
+    X86::VPORQZ256rmk,    X86::VPORDZ256rmk    },
+  { X86::VORPSZ256rmkz,   X86::VORPDZ256rmkz,
+    X86::VPORQZ256rmkz,   X86::VPORDZ256rmkz   },
+  { X86::VORPSZ256rrk,    X86::VORPDZ256rrk,
+    X86::VPORQZ256rrk,    X86::VPORDZ256rrk    },
+  { X86::VORPSZ256rrkz,   X86::VORPDZ256rrkz,
+    X86::VPORQZ256rrkz,   X86::VPORDZ256rrkz   },
+  { X86::VXORPSZ256rmk,   X86::VXORPDZ256rmk,
+    X86::VPXORQZ256rmk,   X86::VPXORDZ256rmk   },
+  { X86::VXORPSZ256rmkz,  X86::VXORPDZ256rmkz,
+    X86::VPXORQZ256rmkz,  X86::VPXORDZ256rmkz  },
+  { X86::VXORPSZ256rrk,   X86::VXORPDZ256rrk,
+    X86::VPXORQZ256rrk,   X86::VPXORDZ256rrk   },
+  { X86::VXORPSZ256rrkz,  X86::VXORPDZ256rrkz,
+    X86::VPXORQZ256rrkz,  X86::VPXORDZ256rrkz  },
+  { X86::VANDNPSZrmk,     X86::VANDNPDZrmk,
+    X86::VPANDNQZrmk,     X86::VPANDNDZrmk     },
+  { X86::VANDNPSZrmkz,    X86::VANDNPDZrmkz,
+    X86::VPANDNQZrmkz,    X86::VPANDNDZrmkz    },
+  { X86::VANDNPSZrrk,     X86::VANDNPDZrrk,
+    X86::VPANDNQZrrk,     X86::VPANDNDZrrk     },
+  { X86::VANDNPSZrrkz,    X86::VANDNPDZrrkz,
+    X86::VPANDNQZrrkz,    X86::VPANDNDZrrkz    },
+  { X86::VANDPSZrmk,      X86::VANDPDZrmk,
+    X86::VPANDQZrmk,      X86::VPANDDZrmk      },
+  { X86::VANDPSZrmkz,     X86::VANDPDZrmkz,
+    X86::VPANDQZrmkz,     X86::VPANDDZrmkz     },
+  { X86::VANDPSZrrk,      X86::VANDPDZrrk,
+    X86::VPANDQZrrk,      X86::VPANDDZrrk      },
+  { X86::VANDPSZrrkz,     X86::VANDPDZrrkz,
+    X86::VPANDQZrrkz,     X86::VPANDDZrrkz     },
+  { X86::VORPSZrmk,       X86::VORPDZrmk,
+    X86::VPORQZrmk,       X86::VPORDZrmk       },
+  { X86::VORPSZrmkz,      X86::VORPDZrmkz,
+    X86::VPORQZrmkz,      X86::VPORDZrmkz      },
+  { X86::VORPSZrrk,       X86::VORPDZrrk,
+    X86::VPORQZrrk,       X86::VPORDZrrk       },
+  { X86::VORPSZrrkz,      X86::VORPDZrrkz,
+    X86::VPORQZrrkz,      X86::VPORDZrrkz      },
+  { X86::VXORPSZrmk,      X86::VXORPDZrmk,
+    X86::VPXORQZrmk,      X86::VPXORDZrmk      },
+  { X86::VXORPSZrmkz,     X86::VXORPDZrmkz,
+    X86::VPXORQZrmkz,     X86::VPXORDZrmkz     },
+  { X86::VXORPSZrrk,      X86::VXORPDZrrk,
+    X86::VPXORQZrrk,      X86::VPXORDZrrk      },
+  { X86::VXORPSZrrkz,     X86::VXORPDZrrkz,
+    X86::VPXORQZrrkz,     X86::VPXORDZrrkz     },
+  // Broadcast loads can be handled the same as masked operations to avoid
+  // changing element size.
+  { X86::VANDNPSZ128rmb,  X86::VANDNPDZ128rmb,
+    X86::VPANDNQZ128rmb,  X86::VPANDNDZ128rmb  },
+  { X86::VANDPSZ128rmb,   X86::VANDPDZ128rmb,
+    X86::VPANDQZ128rmb,   X86::VPANDDZ128rmb   },
+  { X86::VORPSZ128rmb,    X86::VORPDZ128rmb,
+    X86::VPORQZ128rmb,    X86::VPORDZ128rmb    },
+  { X86::VXORPSZ128rmb,   X86::VXORPDZ128rmb,
+    X86::VPXORQZ128rmb,   X86::VPXORDZ128rmb   },
+  { X86::VANDNPSZ256rmb,  X86::VANDNPDZ256rmb,
+    X86::VPANDNQZ256rmb,  X86::VPANDNDZ256rmb  },
+  { X86::VANDPSZ256rmb,   X86::VANDPDZ256rmb,
+    X86::VPANDQZ256rmb,   X86::VPANDDZ256rmb   },
+  { X86::VORPSZ256rmb,    X86::VORPDZ256rmb,
+    X86::VPORQZ256rmb,    X86::VPORDZ256rmb    },
+  { X86::VXORPSZ256rmb,   X86::VXORPDZ256rmb,
+    X86::VPXORQZ256rmb,   X86::VPXORDZ256rmb   },
+  { X86::VANDNPSZrmb,     X86::VANDNPDZrmb,
+    X86::VPANDNQZrmb,     X86::VPANDNDZrmb     },
+  { X86::VANDPSZrmb,      X86::VANDPDZrmb,
+    X86::VPANDQZrmb,      X86::VPANDDZrmb      },
+  { X86::VANDPSZrmb,      X86::VANDPDZrmb,
+    X86::VPANDQZrmb,      X86::VPANDDZrmb      },
+  { X86::VORPSZrmb,       X86::VORPDZrmb,
+    X86::VPORQZrmb,       X86::VPORDZrmb       },
+  { X86::VXORPSZrmb,      X86::VXORPDZrmb,
+    X86::VPXORQZrmb,      X86::VPXORDZrmb      },
+  { X86::VANDNPSZ128rmbk, X86::VANDNPDZ128rmbk,
+    X86::VPANDNQZ128rmbk, X86::VPANDNDZ128rmbk },
+  { X86::VANDPSZ128rmbk,  X86::VANDPDZ128rmbk,
+    X86::VPANDQZ128rmbk,  X86::VPANDDZ128rmbk  },
+  { X86::VORPSZ128rmbk,   X86::VORPDZ128rmbk,
+    X86::VPORQZ128rmbk,   X86::VPORDZ128rmbk   },
+  { X86::VXORPSZ128rmbk,  X86::VXORPDZ128rmbk,
+    X86::VPXORQZ128rmbk,  X86::VPXORDZ128rmbk  },
+  { X86::VANDNPSZ256rmbk, X86::VANDNPDZ256rmbk,
+    X86::VPANDNQZ256rmbk, X86::VPANDNDZ256rmbk },
+  { X86::VANDPSZ256rmbk,  X86::VANDPDZ256rmbk,
+    X86::VPANDQZ256rmbk,  X86::VPANDDZ256rmbk  },
+  { X86::VORPSZ256rmbk,   X86::VORPDZ256rmbk,
+    X86::VPORQZ256rmbk,   X86::VPORDZ256rmbk   },
+  { X86::VXORPSZ256rmbk,  X86::VXORPDZ256rmbk,
+    X86::VPXORQZ256rmbk,  X86::VPXORDZ256rmbk  },
+  { X86::VANDNPSZrmbk,    X86::VANDNPDZrmbk,
+    X86::VPANDNQZrmbk,    X86::VPANDNDZrmbk    },
+  { X86::VANDPSZrmbk,     X86::VANDPDZrmbk,
+    X86::VPANDQZrmbk,     X86::VPANDDZrmbk     },
+  { X86::VANDPSZrmbk,     X86::VANDPDZrmbk,
+    X86::VPANDQZrmbk,     X86::VPANDDZrmbk     },
+  { X86::VORPSZrmbk,      X86::VORPDZrmbk,
+    X86::VPORQZrmbk,      X86::VPORDZrmbk      },
+  { X86::VXORPSZrmbk,     X86::VXORPDZrmbk,
+    X86::VPXORQZrmbk,     X86::VPXORDZrmbk     },
+  { X86::VANDNPSZ128rmbkz,X86::VANDNPDZ128rmbkz,
+    X86::VPANDNQZ128rmbkz,X86::VPANDNDZ128rmbkz},
+  { X86::VANDPSZ128rmbkz, X86::VANDPDZ128rmbkz,
+    X86::VPANDQZ128rmbkz, X86::VPANDDZ128rmbkz },
+  { X86::VORPSZ128rmbkz,  X86::VORPDZ128rmbkz,
+    X86::VPORQZ128rmbkz,  X86::VPORDZ128rmbkz  },
+  { X86::VXORPSZ128rmbkz, X86::VXORPDZ128rmbkz,
+    X86::VPXORQZ128rmbkz, X86::VPXORDZ128rmbkz },
+  { X86::VANDNPSZ256rmbkz,X86::VANDNPDZ256rmbkz,
+    X86::VPANDNQZ256rmbkz,X86::VPANDNDZ256rmbkz},
+  { X86::VANDPSZ256rmbkz, X86::VANDPDZ256rmbkz,
+    X86::VPANDQZ256rmbkz, X86::VPANDDZ256rmbkz },
+  { X86::VORPSZ256rmbkz,  X86::VORPDZ256rmbkz,
+    X86::VPORQZ256rmbkz,  X86::VPORDZ256rmbkz  },
+  { X86::VXORPSZ256rmbkz, X86::VXORPDZ256rmbkz,
+    X86::VPXORQZ256rmbkz, X86::VPXORDZ256rmbkz },
+  { X86::VANDNPSZrmbkz,   X86::VANDNPDZrmbkz,
+    X86::VPANDNQZrmbkz,   X86::VPANDNDZrmbkz   },
+  { X86::VANDPSZrmbkz,    X86::VANDPDZrmbkz,
+    X86::VPANDQZrmbkz,    X86::VPANDDZrmbkz    },
+  { X86::VANDPSZrmbkz,    X86::VANDPDZrmbkz,
+    X86::VPANDQZrmbkz,    X86::VPANDDZrmbkz    },
+  { X86::VORPSZrmbkz,     X86::VORPDZrmbkz,
+    X86::VPORQZrmbkz,     X86::VPORDZrmbkz     },
+  { X86::VXORPSZrmbkz,    X86::VXORPDZrmbkz,
+    X86::VPXORQZrmbkz,    X86::VPXORDZrmbkz    },
 };
 
 // FIXME: Some shuffle and unpack instructions have equivalents in different
 // domains, but they require a bit more work than just switching opcodes.
 
-static const uint16_t *lookup(unsigned opcode, unsigned domain) {
-  for (const uint16_t (&Row)[3] : ReplaceableInstrs)
+static const uint16_t *lookup(unsigned opcode, unsigned domain,
+                              ArrayRef<uint16_t[3]> Table) {
+  for (const uint16_t (&Row)[3] : Table)
     if (Row[domain-1] == opcode)
       return Row;
   return nullptr;
 }
 
-static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
-  for (const uint16_t (&Row)[3] : ReplaceableInstrsAVX2)
-    if (Row[domain-1] == opcode)
+static const uint16_t *lookupAVX512(unsigned opcode, unsigned domain,
+                                    ArrayRef<uint16_t[4]> Table) {
+  // If this is the integer domain make sure to check both integer columns.
+  for (const uint16_t (&Row)[4] : Table)
+    if (Row[domain-1] == opcode || (domain == 3 && Row[3] == opcode))
       return Row;
   return nullptr;
 }
@@ -7247,12 +8826,25 @@ static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) {
 std::pair<uint16_t, uint16_t>
 X86InstrInfo::getExecutionDomain(const MachineInstr &MI) const {
   uint16_t domain = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
-  bool hasAVX2 = Subtarget.hasAVX2();
+  unsigned opcode = MI.getOpcode();
   uint16_t validDomains = 0;
-  if (domain && lookup(MI.getOpcode(), domain))
-    validDomains = 0xe;
-  else if (domain && lookupAVX2(MI.getOpcode(), domain))
-    validDomains = hasAVX2 ? 0xe : 0x6;
+  if (domain) {
+    if (lookup(MI.getOpcode(), domain, ReplaceableInstrs)) {
+      validDomains = 0xe;
+    } else if (lookup(opcode, domain, ReplaceableInstrsAVX2)) {
+      validDomains = Subtarget.hasAVX2() ? 0xe : 0x6;
+    } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512)) {
+      validDomains = 0xe;
+    } else if (lookupAVX512(opcode, domain, ReplaceableInstrsAVX512DQ)) {
+      validDomains = Subtarget.hasDQI() ? 0xe : 0x8;
+    } else if (const uint16_t *table = lookupAVX512(opcode, domain,
+                                             ReplaceableInstrsAVX512DQMasked)) {
+      if (domain == 1 || (domain == 3 && table[3] == opcode))
+        validDomains = Subtarget.hasDQI() ? 0xa : 0x8;
+      else
+        validDomains = Subtarget.hasDQI() ? 0xc : 0x8;
+    }
+  }
   return std::make_pair(domain, validDomains);
 }
 
@@ -7260,11 +8852,32 @@ void X86InstrInfo::setExecutionDomain(MachineInstr &MI, unsigned Domain) const {
   assert(Domain>0 && Domain<4 && "Invalid execution domain");
   uint16_t dom = (MI.getDesc().TSFlags >> X86II::SSEDomainShift) & 3;
   assert(dom && "Not an SSE instruction");
-  const uint16_t *table = lookup(MI.getOpcode(), dom);
+  const uint16_t *table = lookup(MI.getOpcode(), dom, ReplaceableInstrs);
   if (!table) { // try the other table
     assert((Subtarget.hasAVX2() || Domain < 3) &&
            "256-bit vector operations only available in AVX2");
-    table = lookupAVX2(MI.getOpcode(), dom);
+    table = lookup(MI.getOpcode(), dom, ReplaceableInstrsAVX2);
+  }
+  if (!table) { // try the AVX512 table
+    assert(Subtarget.hasAVX512() && "Requires AVX-512");
+    table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512);
+    // Don't change integer Q instructions to D instructions.
+    if (table && Domain == 3 && table[3] == MI.getOpcode())
+      Domain = 4;
+  }
+  if (!table) { // try the AVX512DQ table
+    assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
+    table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQ);
+    // Don't change integer Q instructions to D instructions and
+    // use D intructions if we started with a PS instruction.
+    if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
+      Domain = 4;
+  }
+  if (!table) { // try the AVX512DQMasked table
+    assert((Subtarget.hasDQI() || Domain >= 3) && "Requires AVX-512DQ");
+    table = lookupAVX512(MI.getOpcode(), dom, ReplaceableInstrsAVX512DQMasked);
+    if (table && Domain == 3 && (dom == 1 || table[3] == MI.getOpcode()))
+      Domain = 4;
   }
   assert(table && "Cannot change domain");
   MI.setDesc(get(table[Domain - 1]));
@@ -7275,32 +8888,6 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const {
   NopInst.setOpcode(X86::NOOP);
 }
 
-// This code must remain in sync with getJumpInstrTableEntryBound in this class!
-// In particular, getJumpInstrTableEntryBound must always return an upper bound
-// on the encoding lengths of the instructions generated by
-// getUnconditionalBranch and getTrap.
-void X86InstrInfo::getUnconditionalBranch(
-    MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const {
-  Branch.setOpcode(X86::JMP_1);
-  Branch.addOperand(MCOperand::createExpr(BranchTarget));
-}
-
-// This code must remain in sync with getJumpInstrTableEntryBound in this class!
-// In particular, getJumpInstrTableEntryBound must always return an upper bound
-// on the encoding lengths of the instructions generated by
-// getUnconditionalBranch and getTrap.
-void X86InstrInfo::getTrap(MCInst &MI) const {
-  MI.setOpcode(X86::TRAP);
-}
-
-// See getTrap and getUnconditionalBranch for conditions on the value returned
-// by this function.
-unsigned X86InstrInfo::getJumpInstrTableEntryBound() const {
-  // 5 bytes suffice: JMP_4 Symbol@PLT is uses 1 byte (E9) for the JMP_4 and 4
-  // bytes for the symbol offset. And TRAP is ud2, which is two bytes (0F 0B).
-  return 5;
-}
-
 bool X86InstrInfo::isHighLatencyDef(int opc) const {
   switch (opc) {
   default: return false;
@@ -7934,6 +9521,28 @@ X86InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
   return makeArrayRef(TargetFlags);
 }
 
+bool X86InstrInfo::isTailCall(const MachineInstr &Inst) const {
+  switch (Inst.getOpcode()) {
+    case X86::TCRETURNdi:
+    case X86::TCRETURNmi:
+    case X86::TCRETURNri:
+    case X86::TCRETURNdi64:
+    case X86::TCRETURNmi64:
+    case X86::TCRETURNri64:
+    case X86::TAILJMPd:
+    case X86::TAILJMPm:
+    case X86::TAILJMPr:
+    case X86::TAILJMPd64:
+    case X86::TAILJMPm64:
+    case X86::TAILJMPr64:
+    case X86::TAILJMPm64_REX:
+    case X86::TAILJMPr64_REX:
+      return true;
+    default:
+      return false;
+  }
+}
+
 namespace {
   /// Create Global Base Reg pass. This initializes the PIC
   /// global base register for x86-32.
@@ -7991,7 +9600,7 @@ namespace {
       return true;
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "X86 PIC Global Base Reg Initialization";
     }
 
@@ -8105,7 +9714,7 @@ namespace {
       return Copy;
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "Local Dynamic TLS Access Clean-up";
     }
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.h b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
index a8a9f629fc1d..8d746172dcbc 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.h
@@ -15,6 +15,7 @@
 #define LLVM_LIB_TARGET_X86_X86INSTRINFO_H
 
 #include "MCTargetDesc/X86BaseInfo.h"
+#include "X86InstrFMA3Info.h"
 #include "X86RegisterInfo.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Target/TargetInstrInfo.h"
@@ -265,7 +266,7 @@ public:
                              unsigned &SrcOpIdx2) const override;
 
   /// Returns true if the routine could find two commutable operands
-  /// in the given FMA instruction. Otherwise, returns false.
+  /// in the given FMA instruction \p MI. Otherwise, returns false.
   ///
   /// \p SrcOpIdx1 and \p SrcOpIdx2 are INPUT and OUTPUT arguments.
   /// The output indices of the commuted operands are returned in these
@@ -274,10 +275,12 @@ public:
   /// value 'CommuteAnyOperandIndex' which means that the corresponding
   /// operand index is not set and this method is free to pick any of
   /// available commutable operands.
+  /// The parameter \p FMA3Group keeps the reference to the group of relative
+  /// FMA3 opcodes including register/memory forms of 132/213/231 opcodes.
   ///
   /// For example, calling this method this way:
   ///     unsigned Idx1 = 1, Idx2 = CommuteAnyOperandIndex;
-  ///     findFMA3CommutedOpIndices(MI, Idx1, Idx2);
+  ///     findFMA3CommutedOpIndices(MI, Idx1, Idx2, FMA3Group);
   /// can be interpreted as a query asking if the operand #1 can be swapped
   /// with any other available operand (e.g. operand #2, operand #3, etc.).
   ///
@@ -286,24 +289,40 @@ public:
   ///     FMA213 #1, #2, #3
   /// results into instruction with adjusted opcode:
   ///     FMA231 #3, #2, #1
-  bool findFMA3CommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
-                                 unsigned &SrcOpIdx2) const;
+  bool findFMA3CommutedOpIndices(const MachineInstr &MI,
+                                 unsigned &SrcOpIdx1,
+                                 unsigned &SrcOpIdx2,
+                                 const X86InstrFMA3Group &FMA3Group) const;
 
   /// Returns an adjusted FMA opcode that must be used in FMA instruction that
-  /// performs the same computations as the given MI but which has the operands
-  /// \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
+  /// performs the same computations as the given \p MI but which has the
+  /// operands \p SrcOpIdx1 and \p SrcOpIdx2 commuted.
   /// It may return 0 if it is unsafe to commute the operands.
+  /// Note that a machine instruction (instead of its opcode) is passed as the
+  /// first parameter to make it possible to analyze the instruction's uses and
+  /// commute the first operand of FMA even when it seems unsafe when you look
+  /// at the opcode. For example, it is Ok to commute the first operand of
+  /// VFMADD*SD_Int, if ONLY the lowest 64-bit element of the result is used.
   ///
   /// The returned FMA opcode may differ from the opcode in the given \p MI.
   /// For example, commuting the operands #1 and #3 in the following FMA
   ///     FMA213 #1, #2, #3
   /// results into instruction with adjusted opcode:
   ///     FMA231 #3, #2, #1
-  unsigned getFMA3OpcodeToCommuteOperands(MachineInstr &MI, unsigned SrcOpIdx1,
-                                          unsigned SrcOpIdx2) const;
+  unsigned getFMA3OpcodeToCommuteOperands(const MachineInstr &MI,
+                                          unsigned SrcOpIdx1,
+                                          unsigned SrcOpIdx2,
+                                          const X86InstrFMA3Group &FMA3Group) const;
 
   // Branch analysis.
   bool isUnpredicatedTerminator(const MachineInstr &MI) const override;
+  bool isUnconditionalTailCall(const MachineInstr &MI) const override;
+  bool canMakeTailCallConditional(SmallVectorImpl<MachineOperand> &Cond,
+                                  const MachineInstr &TailCall) const override;
+  void replaceBranchWithTailCall(MachineBasicBlock &MBB,
+                                 SmallVectorImpl<MachineOperand> &Cond,
+                                 const MachineInstr &TailCall) const override;
+
   bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
                      MachineBasicBlock *&FBB,
                      SmallVectorImpl<MachineOperand> &Cond,
@@ -316,10 +335,12 @@ public:
                               TargetInstrInfo::MachineBranchPredicate &MBP,
                               bool AllowModify = false) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
   bool canInsertSelect(const MachineBasicBlock&, ArrayRef<MachineOperand> Cond,
                        unsigned, unsigned, int&, int&, int&) const override;
   void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
@@ -357,6 +378,10 @@ public:
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
+  /// Check whether the target can fold a load that feeds a subreg operand
+  /// (or a subreg operand that feeds a store).
+  bool isSubregFoldable() const override { return true; }
+
   /// foldMemoryOperand - If this target supports it, fold a load or store of
   /// the specified stack slot into the specified machine instruction for the
   /// specified operand(s).  If this is possible, the target should perform the
@@ -418,13 +443,13 @@ public:
                                int64_t Offset1, int64_t Offset2,
                                unsigned NumLoads) const override;
 
-  bool shouldScheduleAdjacent(MachineInstr &First,
-                              MachineInstr &Second) const override;
+  bool shouldScheduleAdjacent(const MachineInstr &First,
+                              const MachineInstr &Second) const override;
 
   void getNoopForMachoTarget(MCInst &NopInst) const override;
 
   bool
-  ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+  reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
 
   /// isSafeToMoveRegClassDefs - Return true if it's safe to move a machine
   /// instruction that defines the specified register class.
@@ -467,14 +492,6 @@ public:
                                       unsigned Size, unsigned Alignment,
                                       bool AllowCommute) const;
 
-  void
-  getUnconditionalBranch(MCInst &Branch,
-                         const MCSymbolRefExpr *BranchTarget) const override;
-
-  void getTrap(MCInst &MI) const override;
-
-  unsigned getJumpInstrTableEntryBound() const override;
-
   bool isHighLatencyDef(int opc) const override;
 
   bool hasHighOperandLatency(const TargetSchedModel &SchedModel,
@@ -529,6 +546,8 @@ public:
   ArrayRef<std::pair<unsigned, const char *>>
   getSerializableDirectMachineOperandTargetFlags() const override;
 
+  bool isTailCall(const MachineInstr &Inst) const override;
+
 protected:
   /// Commutes the operands in the given instruction by changing the operands
   /// order and/or changing the instruction's opcode and/or the immediate value
@@ -564,8 +583,24 @@ private:
   bool isFrameOperand(const MachineInstr &MI, unsigned int Op,
                       int &FrameIndex) const;
 
-  /// Expand the MOVImmSExti8 pseudo-instructions.
-  bool ExpandMOVImmSExti8(MachineInstrBuilder &MIB) const;
+  /// Returns true iff the routine could find two commutable operands in the
+  /// given machine instruction with 3 vector inputs.
+  /// The 'SrcOpIdx1' and 'SrcOpIdx2' are INPUT and OUTPUT arguments. Their
+  /// input values can be re-defined in this method only if the input values
+  /// are not pre-defined, which is designated by the special value
+  /// 'CommuteAnyOperandIndex' assigned to it.
+  /// If both of indices are pre-defined and refer to some operands, then the
+  /// method simply returns true if the corresponding operands are commutable
+  /// and returns false otherwise.
+  ///
+  /// For example, calling this method this way:
+  ///     unsigned Op1 = 1, Op2 = CommuteAnyOperandIndex;
+  ///     findThreeSrcCommutedOpIndices(MI, Op1, Op2);
+  /// can be interpreted as a query asking to find an operand that would be
+  /// commutable with the operand#1.
+  bool findThreeSrcCommutedOpIndices(const MachineInstr &MI,
+                                     unsigned &SrcOpIdx1,
+                                     unsigned &SrcOpIdx2) const;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86InstrInfo.td b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
index b19a8f3306aa..38036715a25a 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrInfo.td
@@ -765,6 +765,12 @@ def tls64baseaddr : ComplexPattern<i64, 5, "selectTLSADDRAddr",
 
 def vectoraddr : ComplexPattern<iPTR, 5, "selectVectorAddr", [],[SDNPWantParent]>;
 
+// A relocatable immediate is either an immediate operand or an operand that can
+// be relocated by the linker to an immediate, such as a regular symbol in
+// non-PIC code.
+def relocImm : ComplexPattern<iAny, 1, "selectRelocImm", [imm, X86Wrapper], [],
+                              0>;
+
 //===----------------------------------------------------------------------===//
 // X86 Instruction Predicate Definitions.
 def TruePredicate : Predicate<"true">;
@@ -832,6 +838,7 @@ def HasTBM       : Predicate<"Subtarget->hasTBM()">;
 def HasMOVBE     : Predicate<"Subtarget->hasMOVBE()">;
 def HasRDRAND    : Predicate<"Subtarget->hasRDRAND()">;
 def HasF16C      : Predicate<"Subtarget->hasF16C()">;
+def NoF16C       : Predicate<"!Subtarget->hasF16C()">;
 def HasFSGSBase  : Predicate<"Subtarget->hasFSGSBase()">;
 def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
 def HasBMI       : Predicate<"Subtarget->hasBMI()">;
@@ -876,8 +883,6 @@ def IsNaCl       : Predicate<"Subtarget->isTargetNaCl()">;
 def NotNaCl      : Predicate<"!Subtarget->isTargetNaCl()">;
 def SmallCode    : Predicate<"TM.getCodeModel() == CodeModel::Small">;
 def KernelCode   : Predicate<"TM.getCodeModel() == CodeModel::Kernel">;
-def FarData      : Predicate<"TM.getCodeModel() != CodeModel::Small &&"
-                             "TM.getCodeModel() != CodeModel::Kernel">;
 def NearData     : Predicate<"TM.getCodeModel() == CodeModel::Small ||"
                              "TM.getCodeModel() == CodeModel::Kernel">;
 def IsNotPIC     : Predicate<"!TM.isPositionIndependent()">;
@@ -889,6 +894,7 @@ def CallImmAddr  : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
 def FavorMemIndirectCall  : Predicate<"!Subtarget->callRegIndirect()">;
 def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
 def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
+def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
 def HasMFence    : Predicate<"Subtarget->hasMFence()">;
 
 //===----------------------------------------------------------------------===//
@@ -923,6 +929,7 @@ def X86_COND_S   : PatLeaf<(i8 15)>;
 def i16immSExt8  : ImmLeaf<i16, [{ return isInt<8>(Imm); }]>;
 def i32immSExt8  : ImmLeaf<i32, [{ return isInt<8>(Imm); }]>;
 def i64immSExt8  : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
+def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
 
 // If we have multiple users of an immediate, it's much smaller to reuse
 // the register, rather than encode the immediate in every instruction.
@@ -941,13 +948,16 @@ def i64immSExt8  : ImmLeaf<i64, [{ return isInt<8>(Imm); }]>;
 // Eventually, it would be nice to allow ConstantHoisting to merge constants
 // globally for potentially added savings.
 //
-def imm8_su : PatLeaf<(i8 imm), [{
+def imm8_su : PatLeaf<(i8 relocImm), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
+def imm16_su : PatLeaf<(i16 relocImm), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
-def imm16_su : PatLeaf<(i16 imm), [{
+def imm32_su : PatLeaf<(i32 relocImm), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
-def imm32_su : PatLeaf<(i32 imm), [{
+def i64immSExt32_su : PatLeaf<(i64immSExt32), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
 
@@ -957,10 +967,9 @@ def i16immSExt8_su : PatLeaf<(i16immSExt8), [{
 def i32immSExt8_su : PatLeaf<(i32immSExt8), [{
     return !shouldAvoidImmediateInstFormsForSize(N);
 }]>;
-
-
-def i64immSExt32 : ImmLeaf<i64, [{ return isInt<32>(Imm); }]>;
-
+def i64immSExt8_su : PatLeaf<(i64immSExt8), [{
+    return !shouldAvoidImmediateInstFormsForSize(N);
+}]>;
 
 // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
 // unsigned field.
@@ -1375,7 +1384,7 @@ def MOV16ri : Ii16<0xB8, AddRegFrm, (outs GR16:$dst), (ins i16imm:$src),
                    [(set GR16:$dst, imm:$src)], IIC_MOV>, OpSize16;
 def MOV32ri : Ii32<0xB8, AddRegFrm, (outs GR32:$dst), (ins i32imm:$src),
                    "mov{l}\t{$src, $dst|$dst, $src}",
-                   [(set GR32:$dst, imm:$src)], IIC_MOV>, OpSize32;
+                   [(set GR32:$dst, relocImm:$src)], IIC_MOV>, OpSize32;
 def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
                        "mov{q}\t{$src, $dst|$dst, $src}",
                        [(set GR64:$dst, i64immSExt32:$src)], IIC_MOV>;
@@ -1383,7 +1392,7 @@ def MOV64ri32 : RIi32S<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
 let isReMaterializable = 1 in {
 def MOV64ri : RIi64<0xB8, AddRegFrm, (outs GR64:$dst), (ins i64imm:$src),
                     "movabs{q}\t{$src, $dst|$dst, $src}",
-                    [(set GR64:$dst, imm:$src)], IIC_MOV>;
+                    [(set GR64:$dst, relocImm:$src)], IIC_MOV>;
 }
 
 // Longer forms that use a ModR/M byte. Needed for disassembler
@@ -1409,7 +1418,7 @@ def MOV32mi : Ii32<0xC7, MRM0m, (outs), (ins i32mem:$dst, i32imm:$src),
                    [(store (i32 imm32_su:$src), addr:$dst)], IIC_MOV_MEM>, OpSize32;
 def MOV64mi32 : RIi32S<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
                        "mov{q}\t{$src, $dst|$dst, $src}",
-                       [(store i64immSExt32:$src, addr:$dst)], IIC_MOV_MEM>;
+                       [(store i64immSExt32_su:$src, addr:$dst)], IIC_MOV_MEM>;
 } // SchedRW
 
 let hasSideEffects = 0 in {
@@ -2251,14 +2260,14 @@ let Predicates = [HasBMI] in {
 multiclass bmi_bextr_bzhi<bits<8> opc, string mnemonic, RegisterClass RC,
                           X86MemOperand x86memop, Intrinsic Int,
                           PatFrag ld_frag> {
-  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+  def rr : I<opc, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
              !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst, (Int RC:$src1, RC:$src2)), (implicit EFLAGS)]>,
-             T8PS, VEX_4VOp3;
-  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+             T8PS, VEX;
+  def rm : I<opc, MRMSrcMem4VOp3, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
              !strconcat(mnemonic, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              [(set RC:$dst, (Int (ld_frag addr:$src1), RC:$src2)),
-              (implicit EFLAGS)]>, T8PS, VEX_4VOp3;
+              (implicit EFLAGS)]>, T8PS, VEX;
 }
 
 let Predicates = [HasBMI], Defs = [EFLAGS] in {
@@ -2626,6 +2635,12 @@ def : MnemonicAlias<"ret", "retw", "att">, Requires<[In16BitMode]>;
 def : MnemonicAlias<"ret", "retl", "att">, Requires<[In32BitMode]>;
 def : MnemonicAlias<"ret", "retq", "att">, Requires<[In64BitMode]>;
 
+// Apply 'ret' behavior to 'retn'
+def : MnemonicAlias<"retn", "retw", "att">, Requires<[In16BitMode]>;
+def : MnemonicAlias<"retn", "retl", "att">, Requires<[In32BitMode]>;
+def : MnemonicAlias<"retn", "retq", "att">, Requires<[In64BitMode]>;
+def : MnemonicAlias<"retn", "ret", "intel">;
+
 def : MnemonicAlias<"sal", "shl", "intel">;
 def : MnemonicAlias<"salb", "shlb", "att">;
 def : MnemonicAlias<"salw", "shlw", "att">;
diff --git a/contrib/llvm/lib/Target/X86/X86InstrMMX.td b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
index 8d70691714dd..0bb106823983 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrMMX.td
@@ -150,8 +150,9 @@ multiclass SS3I_unop_rm_int_mm<bits<8> opc, string OpcodeStr,
 /// Binary MMX instructions requiring SSSE3.
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 multiclass SS3I_binop_rm_int_mm<bits<8> opc, string OpcodeStr,
-                             Intrinsic IntId64, OpndItins itins> {
-  let isCommutable = 0 in
+                             Intrinsic IntId64, OpndItins itins,
+                             bit Commutable = 0> {
+  let isCommutable = Commutable in
   def rr64 : MMXSS38I<opc, MRMSrcReg, (outs VR64:$dst),
        (ins VR64:$src1, VR64:$src2),
         !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
@@ -418,9 +419,9 @@ defm MMX_PMULHUW : MMXI_binop_rm_int<0xE4, "pmulhuw", int_x86_mmx_pmulhu_w,
 let Predicates = [HasSSE2] in
 defm MMX_PMULUDQ : MMXI_binop_rm_int<0xF4, "pmuludq", int_x86_mmx_pmulu_dq,
                                      MMX_PMUL_ITINS, 1>;
-let isCommutable = 1 in
 defm MMX_PMULHRSW : SS3I_binop_rm_int_mm<0x0B, "pmulhrsw",
-                                     int_x86_ssse3_pmul_hr_sw, MMX_PMUL_ITINS>;
+                                     int_x86_ssse3_pmul_hr_sw,
+                                     MMX_PMUL_ITINS, 1>;
 
 // -- Miscellanea
 defm MMX_PMADDWD : MMXI_binop_rm_int<0xF5, "pmaddwd", int_x86_mmx_pmadd_wd,
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSSE.td b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
index f91764a67d1c..9d6a89363044 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSSE.td
@@ -259,26 +259,24 @@ multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 /// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class
-multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC,
-                             string asm, string SSEVer, string FPSizeStr,
-                             Operand memopr, ComplexPattern mem_cpat,
-                             Domain d, OpndItins itins, bit Is2Addr = 1> {
-let isCodeGenOnly = 1 in {
+multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr,
+                               SDPatternOperator Int, RegisterClass RC,
+                               string asm, Operand memopr,
+                               ComplexPattern mem_cpat, Domain d,
+                               OpndItins itins, bit Is2Addr = 1> {
+let isCodeGenOnly = 1, hasSideEffects = 0 in {
   def rr_Int : SI_Int<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!cast<Intrinsic>(
-                 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, RC:$src2))], itins.rr, d>,
+       [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr, d>,
        Sched<[itins.Sched]>;
+  let mayLoad = 1 in
   def rm_Int : SI_Int<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2),
        !if(Is2Addr,
            !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"),
            !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse",
-                                          SSEVer, "_", OpcodeStr, FPSizeStr))
-             RC:$src1, mem_cpat:$src2))], itins.rm, d>,
+       [(set RC:$dst, (Int RC:$src1, mem_cpat:$src2))], itins.rm, d>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 }
@@ -372,13 +370,9 @@ def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)),
 // Implicitly promote a 32-bit scalar to a vector.
 def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
           (COPY_TO_REGCLASS FR32:$src, VR128)>;
-def : Pat<(v8f32 (scalar_to_vector FR32:$src)),
-          (COPY_TO_REGCLASS FR32:$src, VR128)>;
 // Implicitly promote a 64-bit scalar to a vector.
 def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
           (COPY_TO_REGCLASS FR64:$src, VR128)>;
-def : Pat<(v4f64 (scalar_to_vector FR64:$src)),
-          (COPY_TO_REGCLASS FR64:$src, VR128)>;
 
 // Bitcasts between 128-bit vector types. Return the original type since
 // no instruction is needed for the conversion
@@ -453,9 +447,9 @@ def : Pat<(v4f64  (bitconvert (v8f32  VR256:$src))), (v4f64  VR256:$src)>;
 let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
     isPseudo = 1, SchedRW = [WriteZero] in {
   def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "",
-                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>;
+                   [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1, NoVLX_Or_NoDQI]>;
   def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "",
-                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>;
+                   [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2, NoVLX_Or_NoDQI]>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -512,6 +506,7 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
 multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt,
                          X86MemOperand x86memop, string base_opc,
                          string asm_opr, Domain d = GenericDomain> {
+  let isCommutable = 1 in
   def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst),
               (ins VR128:$src1, RC:$src2),
               !strconcat(base_opc, asm_opr),
@@ -590,6 +585,8 @@ let Predicates = [UseAVX] in {
             (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
             (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
+  def : Pat<(v4f32 (X86vzload addr:$src)),
+            (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>;
 
   // MOVSDrm zeros the high parts of the register; represent this
   // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
@@ -609,6 +606,8 @@ let Predicates = [UseAVX] in {
   def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
                    (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
+  def : Pat<(v8f32 (X86vzload addr:$src)),
+            (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>;
   def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
                    (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))),
             (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
@@ -697,6 +696,8 @@ let Predicates = [UseSSE1] in {
             (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
   def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
             (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
+  def : Pat<(v4f32 (X86vzload addr:$src)),
+            (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
   }
 
   // Extract and store.
@@ -771,13 +772,12 @@ def : InstAlias<"vmovsd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
 multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC,
                             X86MemOperand x86memop, PatFrag ld_frag,
                             string asm, Domain d,
-                            OpndItins itins,
-                            bit IsReMaterializable = 1> {
+                            OpndItins itins> {
 let hasSideEffects = 0 in
   def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>,
            Sched<[WriteFShuffle]>;
-let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
   def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
               !strconcat(asm, "\t{$src, $dst|$dst, $src}"),
                    [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>,
@@ -795,7 +795,7 @@ defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32,
                               "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
                               PS, VEX;
 defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
-                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
                               PD, VEX;
 
 defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32,
@@ -808,7 +808,7 @@ defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32,
                               "movups", SSEPackedSingle, SSE_MOVU_ITINS>,
                               PS, VEX, VEX_L;
 defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64,
-                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
                               PD, VEX, VEX_L;
 }
 
@@ -825,7 +825,7 @@ defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64,
                               "movapd", SSEPackedDouble, SSE_MOVA_ITINS>,
                               PD;
 defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64,
-                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>,
+                              "movupd", SSEPackedDouble, SSE_MOVU_ITINS>,
                               PD;
 }
 
@@ -1028,7 +1028,7 @@ let Predicates = [HasAVX, NoVLX] in {
             (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>;
 }
 
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+let Predicates = [HasAVX, NoVLX] in {
   // 128-bit load/store
   def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
             (VMOVAPSmr addr:$dst, VR128:$src)>;
@@ -1077,29 +1077,6 @@ let Predicates = [UseSSE1] in {
             (MOVUPSmr addr:$dst, VR128:$src)>;
 }
 
-// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper
-// bits are disregarded. FIXME: Set encoding to pseudo!
-let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in {
-let isCodeGenOnly = 1 in {
-  def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
-                         "movaps\t{$src, $dst|$dst, $src}",
-                         [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
-                         IIC_SSE_MOVA_P_RM>, VEX;
-  def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
-                         "movapd\t{$src, $dst|$dst, $src}",
-                         [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
-                         IIC_SSE_MOVA_P_RM>, VEX;
-  def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
-                       "movaps\t{$src, $dst|$dst, $src}",
-                       [(set FR32:$dst, (alignedloadfsf32 addr:$src))],
-                       IIC_SSE_MOVA_P_RM>;
-  def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
-                       "movapd\t{$src, $dst|$dst, $src}",
-                       [(set FR64:$dst, (alignedloadfsf64 addr:$src))],
-                       IIC_SSE_MOVA_P_RM>;
-}
-}
-
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Move Low packed FP Instructions
 //===----------------------------------------------------------------------===//
@@ -1300,6 +1277,7 @@ let Predicates = [UseAVX] in {
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
+
   // Also handle an i64 load because that may get selected as a faster way to
   // load the data.
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
@@ -1307,6 +1285,11 @@ let Predicates = [UseAVX] in {
             (VMOVHPDrm VR128:$src1, addr:$src2)>;
 
   def : Pat<(store (f64 (extractelt
+                          (bc_v2f64 (v4f32 (X86Movhlps VR128:$src, VR128:$src))),
+                          (iPTR 0))), addr:$dst),
+            (VMOVHPDmr addr:$dst, VR128:$src)>;
+
+  def : Pat<(store (f64 (extractelt
                           (v2f64 (X86VPermilpi VR128:$src, (i8 1))),
                           (iPTR 0))), addr:$dst),
             (VMOVHPDmr addr:$dst, VR128:$src)>;
@@ -1332,6 +1315,7 @@ let Predicates = [UseSSE2] in {
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
                       (scalar_to_vector (loadf64 addr:$src2)))),
             (MOVHPDrm VR128:$src1, addr:$src2)>;
+
   // Also handle an i64 load because that may get selected as a faster way to
   // load the data.
   def : Pat<(v2f64 (X86Unpckl VR128:$src1,
@@ -1339,6 +1323,11 @@ let Predicates = [UseSSE2] in {
             (MOVHPDrm VR128:$src1, addr:$src2)>;
 
   def : Pat<(store (f64 (extractelt
+                          (bc_v2f64 (v4f32 (X86Movhlps VR128:$src, VR128:$src))),
+                          (iPTR 0))), addr:$dst),
+            (MOVHPDmr addr:$dst, VR128:$src)>;
+
+  def : Pat<(store (f64 (extractelt
                           (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))),
                           (iPTR 0))), addr:$dst),
             (MOVHPDmr addr:$dst, VR128:$src)>;
@@ -1371,6 +1360,7 @@ let Constraints = "$src1 = $dst", AddedComplexity = 20 in {
                       [(set VR128:$dst,
                         (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))],
                         IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>;
+  let isCommutable = 1 in
   def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
                                        (ins VR128:$src1, VR128:$src2),
                       "movhlps\t{$src2, $dst|$dst, $src2}",
@@ -1449,15 +1439,18 @@ multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
                         itins.rm>, Sched<[itins.Sched.Folded]>;
 }
 
-multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC,
-                       X86MemOperand x86memop, string asm, Domain d,
-                       OpndItins itins> {
+multiclass sse12_cvt_p<bits<8> opc, RegisterClass RC, X86MemOperand x86memop,
+                       ValueType DstTy, ValueType SrcTy, PatFrag ld_frag,
+                       string asm, Domain d, OpndItins itins> {
 let hasSideEffects = 0 in {
-  def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm,
-             [], itins.rr, d>, Sched<[itins.Sched]>;
+  def rr : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), asm,
+             [(set RC:$dst, (DstTy (sint_to_fp (SrcTy RC:$src))))],
+             itins.rr, d>, Sched<[itins.Sched]>;
   let mayLoad = 1 in
-  def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm,
-             [], itins.rm, d>, Sched<[itins.Sched.Folded]>;
+  def rm : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), asm,
+             [(set RC:$dst, (DstTy (sint_to_fp
+                                    (SrcTy (bitconvert (ld_frag addr:$src))))))],
+             itins.rm, d>, Sched<[itins.Sched.Folded]>;
 }
 }
 
@@ -1730,16 +1723,16 @@ defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64,
                                  ssmem, sse_load_f32, "cvtss2si",
                                  SSE_CVT_SS2SI_64>, XS, REX_W;
 
-defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+defm VCVTDQ2PS   : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, loadv2i64,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle, SSE_CVT_PS>,
-                               PS, VEX, Requires<[HasAVX]>;
-defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, VR256, i256mem,
+                               PS, VEX, Requires<[HasAVX, NoVLX]>;
+defm VCVTDQ2PSY  : sse12_cvt_p<0x5B, VR256, i256mem, v8f32, v8i32, loadv4i64,
                                "vcvtdq2ps\t{$src, $dst|$dst, $src}",
                                SSEPackedSingle, SSE_CVT_PS>,
-                               PS, VEX, VEX_L, Requires<[HasAVX]>;
+                               PS, VEX, VEX_L, Requires<[HasAVX, NoVLX]>;
 
-defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem,
+defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, i128mem, v4f32, v4i32, memopv2i64,
                             "cvtdq2ps\t{$src, $dst|$dst, $src}",
                             SSEPackedSingle, SSE_CVT_PS>,
                             PS, Requires<[UseSSE2]>;
@@ -1798,16 +1791,16 @@ def VCVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst),
                       Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
-def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
+def : Pat<(f32 (fpround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>,
           Requires<[UseAVX]>;
 
 def CVTSD2SSrr  : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (fround FR64:$src))],
+                      [(set FR32:$dst, (fpround FR64:$src))],
                       IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>;
 def CVTSD2SSrm  : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
                       "cvtsd2ss\t{$src, $dst|$dst, $src}",
-                      [(set FR32:$dst, (fround (loadf64 addr:$src)))],
+                      [(set FR32:$dst, (fpround (loadf64 addr:$src)))],
                       IIC_SSE_CVT_Scalar_RM>,
                       XD,
                   Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
@@ -1864,9 +1857,9 @@ def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst),
                     Sched<[WriteCvtF2FLd, ReadAfterLd]>;
 }
 
-def : Pat<(f64 (fextend FR32:$src)),
+def : Pat<(f64 (fpextend FR32:$src)),
     (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>;
-def : Pat<(fextend (loadf32 addr:$src)),
+def : Pat<(fpextend (loadf32 addr:$src)),
     (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>;
 
 def : Pat<(extloadf32 addr:$src),
@@ -1878,7 +1871,7 @@ def : Pat<(extloadf32 addr:$src),
 
 def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src),
                    "cvtss2sd\t{$src, $dst|$dst, $src}",
-                   [(set FR64:$dst, (fextend FR32:$src))],
+                   [(set FR64:$dst, (fpextend FR32:$src))],
                    IIC_SSE_CVT_Scalar_RR>, XS,
                  Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>;
 def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
@@ -1887,12 +1880,12 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
                    IIC_SSE_CVT_Scalar_RM>, XS,
                  Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>;
 
-// extload f32 -> f64.  This matches load+fextend because we have a hack in
+// extload f32 -> f64.  This matches load+fpextend because we have a hack in
 // the isel (PreprocessForFPConvert) that can introduce loads after dag
 // combine.
-// Since these loads aren't folded into the fextend, we have to match it
+// Since these loads aren't folded into the fpextend, we have to match it
 // explicitly here.
-def : Pat<(fextend (loadf32 addr:$src)),
+def : Pat<(fpextend (loadf32 addr:$src)),
           (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>;
 def : Pat<(extloadf32 addr:$src),
           (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>;
@@ -1962,134 +1955,98 @@ def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
 
 
 // Convert Packed Double FP to Packed DW Integers
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
 def VCVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "vcvtpd2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>,
+                       [(set VR128:$dst,
+                         (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))]>,
                        VEX, Sched<[WriteCvtF2I]>;
 
 // XMM only
 def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
                 (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>;
-def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                       "vcvtpd2dqx\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst,
-                         (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX,
-                       Sched<[WriteCvtF2ILd]>;
+def VCVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                      "vcvtpd2dq{x}\t{$src, $dst|$dst, $src}",
+                      [(set VR128:$dst,
+                        (v4i32 (X86cvtp2Int (loadv2f64 addr:$src))))]>, VEX,
+                      Sched<[WriteCvtF2ILd]>;
+def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}",
+                (VCVTPD2DQrm VR128:$dst, f128mem:$src), 0>;
 
 // YMM only
 def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                       "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
+                       "vcvtpd2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L,
-                       Sched<[WriteCvtF2I]>;
+                         (v4i32 (X86cvtp2Int (v4f64 VR256:$src))))]>,
+                       VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                        "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>,
+                         (v4i32 (X86cvtp2Int (loadv4f64 addr:$src))))]>,
                        VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
-def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}",
+def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
                 (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>;
+def : InstAlias<"vcvtpd2dqy\t{$src, $dst|$dst, $src}",
+                (VCVTPD2DQYrm VR128:$dst, f256mem:$src), 0>;
 }
 
 def CVTPD2DQrm  : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
-                        (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))],
+                        (v4i32 (X86cvtp2Int (memopv2f64 addr:$src))))],
                       IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
 def CVTPD2DQrr  : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvtpd2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))],
+                      [(set VR128:$dst,
+                        (v4i32 (X86cvtp2Int (v2f64 VR128:$src))))],
                       IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
 
 // Convert with truncation packed single/double fp to doubleword
 // SSE2 packed instructions with XS prefix
+let Predicates = [HasAVX, NoVLX] in {
 def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
-                           (int_x86_sse2_cvttps2dq VR128:$src))],
+                           (v4i32 (fp_to_sint (v4f32 VR128:$src))))],
                          IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                          "cvttps2dq\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst, (int_x86_sse2_cvttps2dq
-                                            (loadv4f32 addr:$src)))],
+                         [(set VR128:$dst,
+                           (v4i32 (fp_to_sint (loadv4f32 addr:$src))))],
                          IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>;
 def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
                           [(set VR256:$dst,
-                            (int_x86_avx_cvtt_ps2dq_256 VR256:$src))],
+                            (v8i32 (fp_to_sint (v8f32 VR256:$src))))],
                           IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src),
                           "cvttps2dq\t{$src, $dst|$dst, $src}",
-                          [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256
-                                             (loadv8f32 addr:$src)))],
+                          [(set VR256:$dst,
+                            (v8i32 (fp_to_sint (loadv8f32 addr:$src))))],
                           IIC_SSE_CVT_PS_RM>, VEX, VEX_L,
                           Sched<[WriteCvtF2ILd]>;
+}
 
 def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))],
+                       [(set VR128:$dst,
+                         (v4i32 (fp_to_sint (v4f32 VR128:$src))))],
                        IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                        "cvttps2dq\t{$src, $dst|$dst, $src}",
                        [(set VR128:$dst,
-                         (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))],
+                         (v4i32 (fp_to_sint (memopv4f32 addr:$src))))],
                        IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>;
 
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
-            (VCVTDQ2PSrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))),
-            (VCVTDQ2PSrm addr:$src)>;
-}
-
-let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
-            (VCVTDQ2PSrr VR128:$src)>;
-  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
-            (VCVTDQ2PSrm addr:$src)>;
-
-  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-            (VCVTTPS2DQrr VR128:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))),
-            (VCVTTPS2DQrm addr:$src)>;
-
-  def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))),
-            (VCVTDQ2PSYrr VR256:$src)>;
-  def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))),
-            (VCVTDQ2PSYrm addr:$src)>;
-
-  def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))),
-            (VCVTTPS2DQYrr VR256:$src)>;
-  def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))),
-            (VCVTTPS2DQYrm addr:$src)>;
-}
-
-let Predicates = [UseSSE2] in {
-  def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))),
-            (CVTDQ2PSrr VR128:$src)>;
-  def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))),
-            (CVTDQ2PSrm addr:$src)>;
-
-  def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src),
-            (CVTDQ2PSrr VR128:$src)>;
-  def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))),
-            (CVTDQ2PSrm addr:$src)>;
-
-  def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))),
-            (CVTTPS2DQrr VR128:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))),
-            (CVTTPS2DQrm addr:$src)>;
-}
-
+let Predicates = [HasAVX, NoVLX] in
 def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                         [(set VR128:$dst,
-                              (int_x86_sse2_cvttpd2dq VR128:$src))],
-                              IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
+                          (v4i32 (X86cvttp2si (v2f64 VR128:$src))))],
+                        IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>;
 
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
@@ -2098,66 +2055,92 @@ def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
 // XMM only
 def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>;
-def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                         "cvttpd2dqx\t{$src, $dst|$dst, $src}",
-                         [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                            (loadv2f64 addr:$src)))],
-                         IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+let Predicates = [HasAVX, NoVLX] in
+def VCVTTPD2DQrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                        "cvttpd2dq{x}\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst,
+                          (v4i32 (X86cvttp2si (loadv2f64 addr:$src))))],
+                        IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>;
+def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}",
+                (VCVTTPD2DQrm VR128:$dst, f128mem:$src), 0>;
 
 // YMM only
+let Predicates = [HasAVX, NoVLX] in {
 def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                         "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
+                         "cvttpd2dq\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
-                           (int_x86_avx_cvtt_pd2dq_256 VR256:$src))],
+                           (v4i32 (fp_to_sint (v4f64 VR256:$src))))],
                          IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>;
 def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                          "cvttpd2dq{y}\t{$src, $dst|$dst, $src}",
                          [(set VR128:$dst,
-                          (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))],
+                           (v4i32 (fp_to_sint (loadv4f64 addr:$src))))],
                          IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>;
-def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}",
+}
+def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
                 (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>;
+def : InstAlias<"vcvttpd2dqy\t{$src, $dst|$dst, $src}",
+                (VCVTTPD2DQYrm VR128:$dst, f256mem:$src), 0>;
 
 let Predicates = [HasAVX, NoVLX] in {
-  def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))),
-            (VCVTTPD2DQYrr VR256:$src)>;
-  def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))),
-            (VCVTTPD2DQYrm addr:$src)>;
+  let AddedComplexity = 15 in {
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+              (VCVTPD2DQrr VR128:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+              (VCVTTPD2DQrr VR128:$src)>;
+  }
 } // Predicates = [HasAVX]
 
 def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))],
+                      [(set VR128:$dst,
+                        (v4i32 (X86cvttp2si (v2f64 VR128:$src))))],
                       IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>;
 def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
                       "cvttpd2dq\t{$src, $dst|$dst, $src}",
-                      [(set VR128:$dst, (int_x86_sse2_cvttpd2dq
-                                        (memopv2f64 addr:$src)))],
-                                        IIC_SSE_CVT_PD_RM>,
-                      Sched<[WriteCvtF2ILd]>;
+                      [(set VR128:$dst,
+                        (v4i32 (X86cvttp2si (memopv2f64 addr:$src))))],
+                      IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>;
+
+let Predicates = [UseSSE2] in {
+  let AddedComplexity = 15 in {
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+              (CVTPD2DQrr VR128:$src)>;
+    def : Pat<(X86vzmovl (v2i64 (bitconvert
+                                 (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+              (CVTTPD2DQrr VR128:$src)>;
+  }
+} // Predicates = [UseSSE2]
 
 // Convert packed single to packed double
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
                   // SSE2 instructions without OpSize prefix
 def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
-                    [], IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
+                    [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))],
+                    IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>;
 def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                     "vcvtps2pd\t{$src, $dst|$dst, $src}",
                     [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
                     IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>;
 def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
-                     [], IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
+                     [(set VR256:$dst, (v4f64 (fpextend (v4f32 VR128:$src))))],
+                     IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src),
                      "vcvtps2pd\t{$src, $dst|$dst, $src}",
-                     [], IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
+                     [(set VR256:$dst, (v4f64 (extloadv4f32 addr:$src)))],
+                     IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
 }
 
 let Predicates = [UseSSE2] in {
 def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "cvtps2pd\t{$src, $dst|$dst, $src}",
-                   [], IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
+                   [(set VR128:$dst, (v2f64 (X86vfpext (v4f32 VR128:$src))))],
+                   IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>;
 def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
                    "cvtps2pd\t{$src, $dst|$dst, $src}",
                    [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))],
@@ -2165,136 +2148,118 @@ def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
 }
 
 // Convert Packed DW Integers to Packed Double FP
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
 let hasSideEffects = 0, mayLoad = 1 in
 def VCVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                        []>, VEX, Sched<[WriteCvtI2FLd]>;
+                        [(set VR128:$dst,
+                          (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+                        VEX, Sched<[WriteCvtI2FLd]>;
 def VCVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                         "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                        []>, VEX, Sched<[WriteCvtI2F]>;
+                        [(set VR128:$dst,
+                          (v2f64 (X86VSintToFP (v4i32 VR128:$src))))]>,
+                        VEX, Sched<[WriteCvtI2F]>;
 def VCVTDQ2PDYrm  : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src),
                          "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                         []>, VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
+                         [(set VR256:$dst,
+                           (v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))))]>,
+                         VEX, VEX_L, Sched<[WriteCvtI2FLd]>;
 def VCVTDQ2PDYrr  : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src),
                          "vcvtdq2pd\t{$src, $dst|$dst, $src}",
-                         []>, VEX, VEX_L, Sched<[WriteCvtI2F]>;
+                         [(set VR256:$dst,
+                           (v4f64 (sint_to_fp (v4i32 VR128:$src))))]>,
+                         VEX, VEX_L, Sched<[WriteCvtI2F]>;
 }
 
 let hasSideEffects = 0, mayLoad = 1 in
 def CVTDQ2PDrm  : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))],
                        IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
 def CVTDQ2PDrr  : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
-                       "cvtdq2pd\t{$src, $dst|$dst, $src}", [],
+                       "cvtdq2pd\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst,
+                         (v2f64 (X86VSintToFP (v4i32 VR128:$src))))],
                        IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>;
 
 // AVX register conversion intrinsics
-let Predicates = [HasAVX] in {
-  def : Pat<(v2f64 (X86cvtdq2pd (v4i32 VR128:$src))),
-            (VCVTDQ2PDrr VR128:$src)>;
-  def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (loadv2i64 addr:$src)))),
-            (VCVTDQ2PDrm addr:$src)>;
-  def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+let Predicates = [HasAVX, NoVLX] in {
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (VCVTDQ2PDrm addr:$src)>;
-
-  def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))),
-            (VCVTDQ2PDYrr VR128:$src)>;
-  def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))),
-            (VCVTDQ2PDYrm addr:$src)>;
-} // Predicates = [HasAVX]
+} // Predicates = [HasAVX, NoVLX]
 
 // SSE2 register conversion intrinsics
-let Predicates = [HasSSE2] in {
-  def : Pat<(v2f64 (X86cvtdq2pd (v4i32 VR128:$src))),
-            (CVTDQ2PDrr VR128:$src)>;
-  def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (loadv2i64 addr:$src)))),
-            (CVTDQ2PDrm addr:$src)>;
-  def : Pat<(v2f64 (X86cvtdq2pd (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
+let Predicates = [UseSSE2] in {
+  def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (CVTDQ2PDrm addr:$src)>;
-} // Predicates = [HasSSE2]
+} // Predicates = [UseSSE2]
 
 // Convert packed double to packed single
 // The assembler can recognize rr 256-bit instructions by seeing a ymm
 // register, but the same isn't true when using memory operands instead.
 // Provide other assembly rr and rm forms to address this explicitly.
+let Predicates = [HasAVX, NoVLX] in
 def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                       [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+                       [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))],
                        IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>;
 
 // XMM only
 def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>;
-def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
-                        "cvtpd2psx\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))],
-                        IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
+let Predicates = [HasAVX, NoVLX] in
+def VCVTPD2PSrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
+                       "cvtpd2ps{x}\t{$src, $dst|$dst, $src}",
+                       [(set VR128:$dst, (X86vfpround (loadv2f64 addr:$src)))],
+                       IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>;
+def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}",
+                (VCVTPD2PSrm VR128:$dst, f128mem:$src), 0>;
 
 // YMM only
+let Predicates = [HasAVX, NoVLX] in {
 def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src),
-                        "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (int_x86_avx_cvt_pd2_ps_256 VR256:$src))],
+                        "cvtpd2ps\t{$src, $dst|$dst, $src}",
+                        [(set VR128:$dst, (fpround VR256:$src))],
                         IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>;
 def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src),
                         "cvtpd2ps{y}\t{$src, $dst|$dst, $src}",
-                        [(set VR128:$dst,
-                          (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))],
+                        [(set VR128:$dst, (fpround (loadv4f64 addr:$src)))],
                         IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>;
-def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}",
+}
+def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
                 (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>;
+def : InstAlias<"vcvtpd2psy\t{$src, $dst|$dst, $src}",
+                (VCVTPD2PSYrm VR128:$dst, f256mem:$src), 0>;
 
 def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))],
+                     [(set VR128:$dst, (X86vfpround (v2f64 VR128:$src)))],
                      IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>;
 def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
                      "cvtpd2ps\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst,
-                       (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))],
+                     [(set VR128:$dst, (X86vfpround (memopv2f64 addr:$src)))],
                      IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>;
 
-
 // AVX 256-bit register conversion intrinsics
 // FIXME: Migrate SSE conversion intrinsics matching to use patterns as below
 // whenever possible to avoid declaring two versions of each one.
-let Predicates = [HasAVX] in {
-  def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src),
-            (VCVTDQ2PSYrr VR256:$src)>;
-  def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))),
-            (VCVTDQ2PSYrm addr:$src)>;
-}
 
 let Predicates = [HasAVX, NoVLX] in {
-  // Match fround and fextend for 128/256-bit conversions
-  def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+  // Match fpround and fpextend for 128/256-bit conversions
+  let AddedComplexity = 15 in
+  def : Pat<(X86vzmovl (v2f64 (bitconvert
+                               (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
             (VCVTPD2PSrr VR128:$src)>;
-  def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))),
-            (VCVTPD2PSXrm addr:$src)>;
-  def : Pat<(v4f32 (fround (v4f64 VR256:$src))),
-            (VCVTPD2PSYrr VR256:$src)>;
-  def : Pat<(v4f32 (fround (loadv4f64 addr:$src))),
-            (VCVTPD2PSYrm addr:$src)>;
-
-  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
-            (VCVTPS2PDrr VR128:$src)>;
-  def : Pat<(v4f64 (fextend (v4f32 VR128:$src))),
-            (VCVTPS2PDYrr VR128:$src)>;
-  def : Pat<(v4f64 (extloadv4f32 addr:$src)),
-            (VCVTPS2PDYrm addr:$src)>;
 }
 
 let Predicates = [UseSSE2] in {
-  // Match fround and fextend for 128 conversions
-  def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))),
+  // Match fpround and fpextend for 128 conversions
+  let AddedComplexity = 15 in
+  def : Pat<(X86vzmovl (v2f64 (bitconvert
+                               (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
             (CVTPD2PSrr VR128:$src)>;
-  def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))),
-            (CVTPD2PSrm addr:$src)>;
-
-  def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))),
-            (CVTPS2PDrr VR128:$src)>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -2306,6 +2271,7 @@ multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop,
                             Operand CC, SDNode OpNode, ValueType VT,
                             PatFrag ld_frag, string asm, string asm_alt,
                             OpndItins itins, ImmLeaf immLeaf> {
+  let isCommutable = 1 in
   def rr : SIi8<0xC2, MRMSrcReg,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm,
                 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, immLeaf:$cc))],
@@ -2351,9 +2317,9 @@ let Constraints = "$src1 = $dst" in {
                   SSE_ALU_F64S, i8immZExt3>, XD;
 }
 
-multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
+multiclass sse12_cmp_scalar_int<Operand memop, Operand CC,
                          Intrinsic Int, string asm, OpndItins itins,
-                         ImmLeaf immLeaf> {
+                         ImmLeaf immLeaf, ComplexPattern mem_cpat> {
   def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst),
                       (ins VR128:$src1, VR128:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
@@ -2361,30 +2327,30 @@ multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC,
                                                itins.rr>,
            Sched<[itins.Sched]>;
   def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst),
-                      (ins VR128:$src1, x86memop:$src, CC:$cc), asm,
+                      (ins VR128:$src1, memop:$src, CC:$cc), asm,
                         [(set VR128:$dst, (Int VR128:$src1,
-                                               (load addr:$src), immLeaf:$cc))],
+                                               mem_cpat:$src, immLeaf:$cc))],
                                                itins.rm>,
            Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
 let isCodeGenOnly = 1 in {
   // Aliases to match intrinsics which expect XMM operand(s).
-  defm Int_VCMPSS  : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss,
+  defm Int_VCMPSS  : sse12_cmp_scalar_int<ssmem, AVXCC, int_x86_sse_cmp_ss,
                        "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}",
-                       SSE_ALU_F32S, i8immZExt5>,
+                       SSE_ALU_F32S, i8immZExt5, sse_load_f32>,
                        XS, VEX_4V;
-  defm Int_VCMPSD  : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd,
+  defm Int_VCMPSD  : sse12_cmp_scalar_int<sdmem, AVXCC, int_x86_sse2_cmp_sd,
                        "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}",
-                       SSE_ALU_F32S, i8immZExt5>, // same latency as f32
+                       SSE_ALU_F32S, i8immZExt5, sse_load_f64>, // same latency as f32
                        XD, VEX_4V;
   let Constraints = "$src1 = $dst" in {
-    defm Int_CMPSS  : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss,
+    defm Int_CMPSS  : sse12_cmp_scalar_int<ssmem, SSECC, int_x86_sse_cmp_ss,
                          "cmp${cc}ss\t{$src, $dst|$dst, $src}",
-                         SSE_ALU_F32S, i8immZExt3>, XS;
-    defm Int_CMPSD  : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd,
+                         SSE_ALU_F32S, i8immZExt3, sse_load_f32>, XS;
+    defm Int_CMPSD  : sse12_cmp_scalar_int<sdmem, SSECC, int_x86_sse2_cmp_sd,
                          "cmp${cc}sd\t{$src, $dst|$dst, $src}",
-                         SSE_ALU_F64S, i8immZExt3>,
+                         SSE_ALU_F64S, i8immZExt3, sse_load_f64>,
                          XD;
 }
 }
@@ -2641,7 +2607,8 @@ let Predicates = [UseSSE2] in {
 multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt,
                                    PatFrag mem_frag, RegisterClass RC,
                                    X86MemOperand x86memop, string asm,
-                                   Domain d> {
+                                   Domain d, bit IsCommutable = 0> {
+    let isCommutable = IsCommutable in
     def rr : PI<opc, MRMSrcReg,
                 (outs RC:$dst), (ins RC:$src1, RC:$src2),
                 asm, [(set RC:$dst,
@@ -2689,7 +2656,7 @@ let Constraints = "$src1 = $dst" in {
                        SSEPackedSingle>, PS;
   defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64,
         VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}",
-                       SSEPackedDouble>, PD;
+                       SSEPackedDouble, 1>, PD;
   defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32,
         VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}",
                        SSEPackedSingle>, PS;
@@ -2810,84 +2777,6 @@ defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64,
 // SSE 1 & 2 - Logical Instructions
 //===----------------------------------------------------------------------===//
 
-// Multiclass for scalars using the X86 logical operation aliases for FP.
-multiclass sse12_fp_packed_scalar_logical_alias<
-    bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
-  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
-                FR32, f32, f128mem, loadf32_128, SSEPackedSingle, itins, 0>,
-                PS, VEX_4V;
-
-  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
-                FR64, f64, f128mem, loadf64_128, SSEPackedDouble, itins, 0>,
-                PD, VEX_4V;
-
-  let Constraints = "$src1 = $dst" in {
-    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32,
-                f32, f128mem, memopfsf32_128, SSEPackedSingle, itins>, PS;
-
-    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64,
-                f64, f128mem, memopfsf64_128, SSEPackedDouble, itins>, PD;
-  }
-}
-
-let isCodeGenOnly = 1 in {
-  defm FsAND  : sse12_fp_packed_scalar_logical_alias<0x54, "and", X86fand,
-                SSE_BIT_ITINS_P>;
-  defm FsOR   : sse12_fp_packed_scalar_logical_alias<0x56, "or", X86for,
-                SSE_BIT_ITINS_P>;
-  defm FsXOR  : sse12_fp_packed_scalar_logical_alias<0x57, "xor", X86fxor,
-                SSE_BIT_ITINS_P>;
-
-  let isCommutable = 0 in
-    defm FsANDN : sse12_fp_packed_scalar_logical_alias<0x55, "andn", X86fandn,
-                  SSE_BIT_ITINS_P>;
-}
-
-// Multiclass for vectors using the X86 logical operation aliases for FP.
-multiclass sse12_fp_packed_vector_logical_alias<
-    bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> {
-  let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
-  defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
-              VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,
-              PS, VEX_4V;
-
-  defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
-        VR128, v2f64, f128mem, loadv2f64, SSEPackedDouble, itins, 0>,
-        PD, VEX_4V;
-
-  defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,
-        VR256, v8f32, f256mem, loadv8f32, SSEPackedSingle, itins, 0>,
-        PS, VEX_4V, VEX_L;
-
-  defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode,
-        VR256, v4f64, f256mem, loadv4f64, SSEPackedDouble, itins, 0>,
-        PD, VEX_4V, VEX_L;
-  }
-
-  let Constraints = "$src1 = $dst" in {
-    defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128,
-                v4f32, f128mem, memopv4f32, SSEPackedSingle, itins>,
-                PS;
-
-    defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128,
-                v2f64, f128mem, memopv2f64, SSEPackedDouble, itins>,
-                PD;
-  }
-}
-
-let isCodeGenOnly = 1 in {
-  defm FvAND  : sse12_fp_packed_vector_logical_alias<0x54, "and", X86fand,
-                SSE_BIT_ITINS_P>;
-  defm FvOR   : sse12_fp_packed_vector_logical_alias<0x56, "or", X86for,
-                SSE_BIT_ITINS_P>;
-  defm FvXOR  : sse12_fp_packed_vector_logical_alias<0x57, "xor", X86fxor,
-                SSE_BIT_ITINS_P>;
-
-  let isCommutable = 0 in
-    defm FvANDN : sse12_fp_packed_vector_logical_alias<0x55, "andn", X86fandn,
-                  SSE_BIT_ITINS_P>;
-}
-
 /// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops
 ///
 multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
@@ -2895,7 +2784,8 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
   let Predicates = [HasAVX, NoVLX] in {
   defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle,
         !strconcat(OpcodeStr, "ps"), f256mem,
-        [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))],
+        [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
+                                  (bc_v4i64 (v8f32 VR256:$src2))))],
         [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)),
                            (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L;
 
@@ -2907,12 +2797,10 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
                                   (loadv4i64 addr:$src2)))], 0>,
                                   PD, VEX_4V, VEX_L;
 
-  // In AVX no need to add a pattern for 128-bit logical rr ps, because they
-  // are all promoted to v2i64, and the patterns are covered by the int
-  // version. This is needed in SSE only, because v2i64 isn't supported on
-  // SSE1, but only on SSE2.
   defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
-       !strconcat(OpcodeStr, "ps"), f128mem, [],
+       !strconcat(OpcodeStr, "ps"), f128mem,
+       [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                 (bc_v2i64 (v4f32 VR128:$src2))))],
        [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
                                  (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V;
 
@@ -2928,7 +2816,8 @@ multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr,
   let Constraints = "$src1 = $dst" in {
     defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle,
          !strconcat(OpcodeStr, "ps"), f128mem,
-         [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))],
+         [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
+                                   (bc_v2i64 (v4f32 VR128:$src2))))],
          [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)),
                                    (memopv2i64 addr:$src2)))]>, PS;
 
@@ -2947,19 +2836,124 @@ defm XOR  : sse12_fp_packed_logical<0x57, "xor", xor>;
 let isCommutable = 0 in
   defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>;
 
-// AVX1 requires type coercions in order to fold loads directly into logical
-// operations.
+// If only AVX1 is supported, we need to handle integer operations with
+// floating point instructions since the integer versions aren't available.
 let Predicates = [HasAVX1Only] in {
-  def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))),
+  def : Pat<(v4i64 (and VR256:$src1, VR256:$src2)),
+            (VANDPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (or VR256:$src1, VR256:$src2)),
+            (VORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (xor VR256:$src1, VR256:$src2)),
+            (VXORPSYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v4i64 (X86andnp VR256:$src1, VR256:$src2)),
+            (VANDNPSYrr VR256:$src1, VR256:$src2)>;
+
+  def : Pat<(and VR256:$src1, (loadv4i64 addr:$src2)),
             (VANDPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))),
+  def : Pat<(or VR256:$src1, (loadv4i64 addr:$src2)),
             (VORPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))),
+  def : Pat<(xor VR256:$src1, (loadv4i64 addr:$src2)),
             (VXORPSYrm VR256:$src1, addr:$src2)>;
-  def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))),
+  def : Pat<(X86andnp VR256:$src1, (loadv4i64 addr:$src2)),
             (VANDNPSYrm VR256:$src1, addr:$src2)>;
 }
 
+let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {
+  // Use packed logical operations for scalar ops.
+  def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (VANDPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+  def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (VORPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+  def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (VXORPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+  def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (VANDNPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+
+  def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (VANDPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+  def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (VORPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+  def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (VXORPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+  def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (VANDNPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+}
+
+let Predicates = [UseSSE1] in {
+  // Use packed logical operations for scalar ops.
+  def : Pat<(f32 (X86fand FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (ANDPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+  def : Pat<(f32 (X86for FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (ORPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+  def : Pat<(f32 (X86fxor FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (XORPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+  def : Pat<(f32 (X86fandn FR32:$src1, FR32:$src2)),
+            (COPY_TO_REGCLASS (ANDNPSrr
+                               (COPY_TO_REGCLASS FR32:$src1, VR128),
+                               (COPY_TO_REGCLASS FR32:$src2, VR128)), FR32)>;
+}
+
+let Predicates = [UseSSE2] in {
+  // Use packed logical operations for scalar ops.
+  def : Pat<(f64 (X86fand FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (ANDPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+  def : Pat<(f64 (X86for FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (ORPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+  def : Pat<(f64 (X86fxor FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (XORPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+  def : Pat<(f64 (X86fandn FR64:$src1, FR64:$src2)),
+            (COPY_TO_REGCLASS (ANDNPDrr
+                               (COPY_TO_REGCLASS FR64:$src1, VR128),
+                               (COPY_TO_REGCLASS FR64:$src2, VR128)), FR64)>;
+}
+
+// Patterns for packed operations when we don't have integer type available.
+def : Pat<(v4f32 (X86fand VR128:$src1, VR128:$src2)),
+          (ANDPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86for VR128:$src1, VR128:$src2)),
+          (ORPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86fxor VR128:$src1, VR128:$src2)),
+          (XORPSrr VR128:$src1, VR128:$src2)>;
+def : Pat<(v4f32 (X86fandn VR128:$src1, VR128:$src2)),
+          (ANDNPSrr VR128:$src1, VR128:$src2)>;
+
+def : Pat<(X86fand VR128:$src1, (memopv4f32 addr:$src2)),
+          (ANDPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86for VR128:$src1, (memopv4f32 addr:$src2)),
+          (ORPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86fxor VR128:$src1, (memopv4f32 addr:$src2)),
+          (XORPSrm VR128:$src1, addr:$src2)>;
+def : Pat<(X86fandn VR128:$src1, (memopv4f32 addr:$src2)),
+          (ANDNPSrm VR128:$src1, addr:$src2)>;
+
 //===----------------------------------------------------------------------===//
 // SSE 1 & 2 - Arithmetic Instructions
 //===----------------------------------------------------------------------===//
@@ -3025,20 +3019,22 @@ multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
 }
 
 multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
+                                      SDPatternOperator IntSS,
+                                      SDPatternOperator IntSD,
                                       SizeItins itins> {
-  defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
-                   !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+  defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
+                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
                    SSEPackedSingle, itins.s, 0>, XS, VEX_4V, VEX_LIG;
-  defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
-                   !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+  defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
+                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
                    SSEPackedDouble, itins.d, 0>, XD, VEX_4V, VEX_LIG;
 
   let Constraints = "$src1 = $dst" in {
-    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
-                   !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32,
+    defm SS : sse12_fp_scalar_int<opc, OpcodeStr, IntSS, VR128,
+                   !strconcat(OpcodeStr, "ss"), ssmem, sse_load_f32,
                    SSEPackedSingle, itins.s>, XS;
-    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128,
-                   !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64,
+    defm SD : sse12_fp_scalar_int<opc, OpcodeStr, IntSD, VR128,
+                   !strconcat(OpcodeStr, "sd"), sdmem, sse_load_f64,
                    SSEPackedDouble, itins.d>, XD;
   }
 }
@@ -3046,23 +3042,29 @@ multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr,
 // Binary Arithmetic instructions
 defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>,
            basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>,
-           basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>;
+           basic_sse12_fp_binop_s_int<0x58, "add", null_frag, null_frag,
+                                      SSE_ALU_ITINS_S>;
 defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>,
            basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>,
-           basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>;
+           basic_sse12_fp_binop_s_int<0x59, "mul", null_frag, null_frag,
+                                      SSE_MUL_ITINS_S>;
 let isCommutable = 0 in {
   defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>,
              basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>,
-             basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>;
+             basic_sse12_fp_binop_s_int<0x5C, "sub", null_frag, null_frag,
+                                        SSE_ALU_ITINS_S>;
   defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>,
              basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>,
-             basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>;
+             basic_sse12_fp_binop_s_int<0x5E, "div", null_frag, null_frag,
+                                        SSE_DIV_ITINS_S>;
   defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>,
              basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>,
-             basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>;
+             basic_sse12_fp_binop_s_int<0x5F, "max", int_x86_sse_max_ss,
+                                        int_x86_sse2_max_sd, SSE_ALU_ITINS_S>;
   defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>,
              basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>,
-             basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>;
+             basic_sse12_fp_binop_s_int<0x5D, "min", int_x86_sse_min_ss,
+                                        int_x86_sse2_min_sd, SSE_ALU_ITINS_S>;
 }
 
 let isCodeGenOnly = 1 in {
@@ -3145,9 +3147,15 @@ multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> {
 
   }
 
-  // Repeat everything for AVX, except for the movss + scalar combo...
-  // because that one shouldn't occur with AVX codegen?
-  let Predicates = [HasAVX] in {
+  // Repeat everything for AVX.
+  let Predicates = [UseAVX] in {
+    // extracted scalar math op with insert via movss
+    def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
+          (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
+          FR32:$src))))),
+      (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst,
+          (COPY_TO_REGCLASS FR32:$src, VR128))>;
+
     // extracted scalar math op with insert via blend
     def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector
           (Op (f32 (extractelt (v4f32 VR128:$dst), (iPTR 0))),
@@ -3203,7 +3211,7 @@ multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> {
   }
 
   // Repeat everything for AVX.
-  let Predicates = [HasAVX] in {
+  let Predicates = [UseAVX] in {
     // extracted scalar math op with insert via movsd
     def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector
           (Op (f64 (extractelt (v2f64 VR128:$dst), (iPTR 0))),
@@ -3287,8 +3295,8 @@ def SSE_RCPS : OpndItins<
 /// the HW instructions are 2 operand / destructive.
 multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                           ValueType vt, ValueType ScalarVT,
-                          X86MemOperand x86memop, Operand vec_memop,
-                          ComplexPattern mem_cpat, Intrinsic Intr,
+                          X86MemOperand x86memop,
+                          Intrinsic Intr,
                           SDNode OpNode, Domain d, OpndItins itins,
                           Predicate target, string Suffix> {
   let hasSideEffects = 0 in {
@@ -3308,23 +3316,17 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
             []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   let mayLoad = 1 in
-  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2),
+  def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, x86memop:$src2),
               !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
             []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
   }
 
   let Predicates = [target] in {
-  def : Pat<(vt (OpNode mem_cpat:$src)),
-            (vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int)
-                 (vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>;
   // These are unary operations, but they are modeled as having 2 source operands
   // because the high elements of the destination are unchanged in SSE.
   def : Pat<(Intr VR128:$src),
             (!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>;
-  def : Pat<(Intr (load addr:$src)),
-            (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m)
-                                      addr:$src), VR128))>;
   }
   // We don't want to fold scalar loads into these instructions unless
   // optimizing for size. This is because the folded instruction will have a
@@ -3334,16 +3336,15 @@ multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   // which has a clobber before the rcp, vs.
   // rcpss mem, %xmm0
   let Predicates = [target, OptForSize] in {
-    def : Pat<(Intr mem_cpat:$src),
+    def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
                (!cast<Instruction>(NAME#Suffix##m_Int)
-                      (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
+                      (vt (IMPLICIT_DEF)), addr:$src2)>;
   }
 }
 
 multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
                           ValueType vt, ValueType ScalarVT,
-                          X86MemOperand x86memop, Operand vec_memop,
-                          ComplexPattern mem_cpat,
+                          X86MemOperand x86memop,
                           Intrinsic Intr, SDNode OpNode, Domain d,
                           OpndItins itins, string Suffix> {
   let hasSideEffects = 0 in {
@@ -3361,7 +3362,7 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
              []>, Sched<[itins.Sched.Folded]>;
   let mayLoad = 1 in
   def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst),
-                (ins VR128:$src1, vec_memop:$src2),
+                (ins VR128:$src1, x86memop:$src2),
              !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
              []>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
   }
@@ -3382,21 +3383,18 @@ multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC,
   }
   let Predicates = [HasAVX] in {
    def : Pat<(Intr VR128:$src),
-             (!cast<Instruction>("V"#NAME#Suffix##r_Int) (vt (IMPLICIT_DEF)),
+             (!cast<Instruction>("V"#NAME#Suffix##r_Int) VR128:$src,
                                  VR128:$src)>;
   }
   let Predicates = [HasAVX, OptForSize] in {
-    def : Pat<(Intr mem_cpat:$src),
+    def : Pat<(Intr (scalar_to_vector (ScalarVT (load addr:$src2)))),
               (!cast<Instruction>("V"#NAME#Suffix##m_Int)
-                    (vt (IMPLICIT_DEF)), mem_cpat:$src)>;
+                    (vt (IMPLICIT_DEF)), addr:$src2)>;
   }
   let Predicates = [UseAVX, OptForSize] in {
     def : Pat<(ScalarVT (OpNode (load addr:$src))),
               (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)),
             addr:$src)>;
-    def : Pat<(vt (OpNode mem_cpat:$src)),
-              (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)),
-                                  mem_cpat:$src)>;
   }
 }
 
@@ -3475,11 +3473,10 @@ let Predicates = [HasAVX] in {
 multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           OpndItins itins> {
   defm SS        :  sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem,
-                      ssmem, sse_load_f32,
                       !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
                       SSEPackedSingle, itins, UseSSE1, "SS">, XS;
   defm V#NAME#SS  : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32,
-                      f32mem, ssmem, sse_load_f32,
+                      f32mem,
                       !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode,
                       SSEPackedSingle, itins, "SS">, XS, VEX_4V, VEX_LIG;
 }
@@ -3487,11 +3484,10 @@ multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
 multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           OpndItins itins> {
   defm SD         : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem,
-                         sdmem, sse_load_f64,
                          !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
                          OpNode, SSEPackedDouble, itins, UseSSE2, "SD">, XD;
   defm V#NAME#SD  : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64,
-                         f64mem, sdmem, sse_load_f64,
+                         f64mem,
                          !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd),
                          OpNode, SSEPackedDouble, itins, "SD">,
                          XD, VEX_4V, VEX_LIG;
@@ -3805,13 +3801,14 @@ def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src),
 }
 
 let SchedRW = [WriteMove] in {
-let hasSideEffects = 0 in
+let hasSideEffects = 0 in {
 def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>;
 
 def MOVDQUrr :   I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                    "movdqu\t{$src, $dst|$dst, $src}",
                    [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>;
+}
 
 // For Disassembler
 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in {
@@ -3874,85 +3871,12 @@ def SSE_PMADD : OpndItins<
 
 let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
-multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId,
-                            RegisterClass RC, PatFrag memop_frag,
-                            X86MemOperand x86memop,
-                            OpndItins itins,
-                            bit IsCommutable = 0,
-                            bit Is2Addr = 1> {
-  let isCommutable = IsCommutable in
-  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, RC:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>,
-      Sched<[itins.Sched]>;
-  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, x86memop:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))],
-       itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>;
-}
-
-multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128,
-                             Intrinsic IntId256, OpndItins itins,
-                             bit IsCommutable = 0> {
-let Predicates = [HasAVX] in
-  defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128,
-                                 VR128, loadv2i64, i128mem, itins,
-                                 IsCommutable, 0>, VEX_4V;
-
-let Constraints = "$src1 = $dst" in
-  defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64,
-                               i128mem, itins, IsCommutable, 1>;
-
-let Predicates = [HasAVX2] in
-  defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256,
-                                   VR256, loadv4i64, i256mem, itins,
-                                   IsCommutable, 0>, VEX_4V, VEX_L;
-}
-
-multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
-                         string OpcodeStr, SDNode OpNode,
-                         SDNode OpNode2, RegisterClass RC,
-                         ValueType DstVT, ValueType SrcVT,
-                         PatFrag ld_frag, ShiftOpndItins itins,
-                         bit Is2Addr = 1> {
-  // src2 is always 128-bit
-  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
-       (ins RC:$src1, VR128:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
-        itins.rr>, Sched<[WriteVecShift]>;
-  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
-       (ins RC:$src1, i128mem:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode RC:$src1,
-                       (SrcVT (bitconvert (ld_frag addr:$src2))))))], itins.rm>,
-      Sched<[WriteVecShiftLd, ReadAfterLd]>;
-  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
-       (ins RC:$src1, u8imm:$src2),
-       !if(Is2Addr,
-           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
-           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>,
-       Sched<[WriteVecShift]>;
-}
-
 /// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types
 multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode,
                          ValueType DstVT, ValueType SrcVT, RegisterClass RC,
                          PatFrag memop_frag, X86MemOperand x86memop,
-                         OpndItins itins,
-                         bit IsCommutable = 0, bit Is2Addr = 1> {
-  let isCommutable = IsCommutable in
+                         OpndItins itins, bit Is2Addr = 1> {
+  let isCommutable = 1 in
   def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
        (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
@@ -3984,9 +3908,9 @@ defm PADDSB  : PDI_binop_all<0xEC, "paddsb", X86adds, v16i8, v32i8,
 defm PADDSW  : PDI_binop_all<0xED, "paddsw", X86adds, v8i16, v16i16,
                              SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PADDUSB : PDI_binop_all<0xDC, "paddusb", X86addus, v16i8, v32i8,
-                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PADDUSW : PDI_binop_all<0xDD, "paddusw", X86addus, v8i16, v16i16,
-                             SSE_INTALU_ITINS_P, 0, NoVLX_Or_NoBWI>;
+                             SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMULLW  : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16,
                              SSE_INTMUL_ITINS_P, 1, NoVLX_Or_NoBWI>;
 defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16,
@@ -4022,184 +3946,141 @@ defm PAVGB   : PDI_binop_all<0xE0, "pavgb", X86avg, v16i8, v32i8,
 defm PAVGW   : PDI_binop_all<0xE3, "pavgw", X86avg, v8i16, v16i16,
                              SSE_INTALU_ITINS_P, 1, NoVLX_Or_NoBWI>;
 
-// Intrinsic forms
-defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd,
-                                 int_x86_avx2_pmadd_wd, SSE_PMADD, 1>;
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
+defm VPMADDWD : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
+                              loadv2i64, i128mem, SSE_PMADD, 0>, VEX_4V;
+
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
+defm VPMADDWDY : PDI_binop_rm2<0xF5, "vpmaddwd", X86vpmaddwd, v8i32, v16i16,
+                               VR256, loadv4i64, i256mem, SSE_PMADD,
+                               0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+defm PMADDWD : PDI_binop_rm2<0xF5, "pmaddwd", X86vpmaddwd, v4i32, v8i16, VR128,
+                             memopv2i64, i128mem, SSE_PMADD>;
 
 let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
 defm VPSADBW : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v2i64, v16i8, VR128,
-                             loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+                             loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>,
                              VEX_4V;
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
 defm VPSADBWY : PDI_binop_rm2<0xF6, "vpsadbw", X86psadbw, v4i64, v32i8, VR256,
-                             loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 1, 0>,
+                             loadv4i64, i256mem, SSE_INTMUL_ITINS_P, 0>,
                              VEX_4V, VEX_L;
 let Constraints = "$src1 = $dst" in
 defm PSADBW : PDI_binop_rm2<0xF6, "psadbw", X86psadbw, v2i64, v16i8, VR128,
-                            memopv2i64, i128mem, SSE_INTALU_ITINS_P, 1>;
+                            memopv2i64, i128mem, SSE_INTALU_ITINS_P>;
 
 let Predicates = [HasAVX, NoVLX] in
 defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128,
-                              loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>,
+                              loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 0>,
                               VEX_4V;
 let Predicates = [HasAVX2, NoVLX] in
 defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32,
                                VR256, loadv4i64, i256mem,
-                               SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L;
+                               SSE_INTMUL_ITINS_P, 0>, VEX_4V, VEX_L;
 let Constraints = "$src1 = $dst" in
 defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128,
-                             memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>;
+                             memopv2i64, i128mem, SSE_INTMUL_ITINS_P>;
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Logical Instructions
 //===---------------------------------------------------------------------===//
 
-let Predicates = [HasAVX, NoVLX] in {
-defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
-                            VR128, v4i32, v4i32, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
-                            VR128, v2i64, v2i64, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-
-defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
-                            VR128, v4i32, v4i32, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
-                            VR128, v2i64, v2i64, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-
-defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
-                            VR128, v4i32, v4i32, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-} // Predicates = [HasAVX, NoVLX]
+multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm,
+                         string OpcodeStr, SDNode OpNode,
+                         SDNode OpNode2, RegisterClass RC,
+                         ValueType DstVT, ValueType SrcVT,
+                         PatFrag ld_frag, bit Is2Addr = 1> {
+  // src2 is always 128-bit
+  def rr : PDI<opc, MRMSrcReg, (outs RC:$dst),
+       (ins RC:$src1, VR128:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))],
+       SSE_INTSHIFT_ITINS_P.rr>, Sched<[WriteVecShift]>;
+  def rm : PDI<opc, MRMSrcMem, (outs RC:$dst),
+       (ins RC:$src1, i128mem:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (DstVT (OpNode RC:$src1,
+                       (SrcVT (bitconvert (ld_frag addr:$src2))))))],
+       SSE_INTSHIFT_ITINS_P.rm>, Sched<[WriteVecShiftLd, ReadAfterLd]>;
+  def ri : PDIi8<opc2, ImmForm, (outs RC:$dst),
+       (ins RC:$src1, u8imm:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))],
+       SSE_INTSHIFT_ITINS_P.ri>, Sched<[WriteVecShift]>;
+}
 
-let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
-defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
-                            VR128, v8i16, v8i16, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
-                            VR128, v8i16, v8i16, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
-                            VR128, v8i16, v8i16, loadv2i64,
-                            SSE_INTSHIFT_ITINS_P, 0>, VEX_4V;
-} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
-
-
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] ,
-                                    Predicates = [HasAVX, NoVLX_Or_NoBWI]in {
-  // 128-bit logical shifts.
-  def VPSLLDQri : PDIi8<0x73, MRM7r,
-                    (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
-                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst,
-                      (v16i8 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>,
-                    VEX_4V;
-  def VPSRLDQri : PDIi8<0x73, MRM3r,
-                    (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
-                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR128:$dst,
-                      (v16i8 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>,
-                    VEX_4V;
-  // PSRADQri doesn't exist in SSE[1-3].
-} // Predicates = [HasAVX, NoVLX_Or_NoBWI]
+multiclass PDI_binop_rmi_all<bits<8> opc, bits<8> opc2, Format ImmForm,
+                             string OpcodeStr, SDNode OpNode,
+                             SDNode OpNode2, ValueType DstVT128,
+                             ValueType DstVT256, ValueType SrcVT,
+                             Predicate prd> {
+let Predicates = [HasAVX, prd] in
+  defm V#NAME : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
+                              OpNode, OpNode2, VR128, DstVT128, SrcVT,
+                              loadv2i64, 0>, VEX_4V;
+let Predicates = [HasAVX2, prd] in
+  defm V#NAME#Y : PDI_binop_rmi<opc, opc2, ImmForm, !strconcat("v", OpcodeStr),
+                                OpNode, OpNode2, VR256, DstVT256, SrcVT,
+                                loadv2i64, 0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+  defm NAME : PDI_binop_rmi<opc, opc2, ImmForm, OpcodeStr, OpNode, OpNode2,
+                           VR128, DstVT128, SrcVT, memopv2i64>;
+}
 
-let Predicates = [HasAVX2, NoVLX] in {
-defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli,
-                             VR256, v8i32, v4i32, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli,
-                             VR256, v4i64, v2i64, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-
-defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli,
-                             VR256, v8i32, v4i32, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli,
-                             VR256, v4i64, v2i64, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-
-defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai,
-                             VR256, v8i32, v4i32, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-}// Predicates = [HasAVX2, NoVLX]
+multiclass PDI_binop_ri<bits<8> opc, Format ImmForm, string OpcodeStr,
+                        SDNode OpNode, RegisterClass RC, ValueType VT,
+                        bit Is2Addr = 1> {
+  def ri : PDIi8<opc, ImmForm, (outs RC:$dst), (ins RC:$src1, u8imm:$src2),
+       !if(Is2Addr,
+           !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
+           !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
+       [(set RC:$dst, (VT (OpNode RC:$src1, (i8 imm:$src2))))],
+       IIC_SSE_INTSHDQ_P_RI>, Sched<[WriteVecShift]>;
+}
 
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
-defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli,
-                             VR256, v16i16, v8i16, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli,
-                             VR256, v16i16, v8i16, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai,
-                             VR256, v16i16, v8i16, loadv2i64,
-                             SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L;
-}// Predicates = [HasAVX2, NoVLX_Or_NoBWI]
-
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 ,
-                                    Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
-  // 256-bit logical shifts.
-  def VPSLLDQYri : PDIi8<0x73, MRM7r,
-                    (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
-                    "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR256:$dst,
-                      (v32i8 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>,
-                    VEX_4V, VEX_L;
-  def VPSRLDQYri : PDIi8<0x73, MRM3r,
-                    (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2),
-                    "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}",
-                    [(set VR256:$dst,
-                      (v32i8 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>,
-                    VEX_4V, VEX_L;
-  // PSRADQYri doesn't exist in SSE[1-3].
-} // Predicates = [HasAVX2, NoVLX_Or_NoBWI]
+multiclass PDI_binop_ri_all<bits<8> opc, Format ImmForm, string OpcodeStr,
+                           SDNode OpNode> {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in
+  defm V#NAME : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
+                             VR128, v16i8, 0>, VEX_4V;
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in
+  defm V#NAME#Y : PDI_binop_ri<opc, ImmForm, !strconcat("v", OpcodeStr), OpNode,
+                               VR256, v32i8, 0>, VEX_4V, VEX_L;
+let Constraints = "$src1 = $dst" in
+  defm NAME : PDI_binop_ri<opc, ImmForm, OpcodeStr, OpNode, VR128, v16i8>;
+}
 
-let Constraints = "$src1 = $dst" in {
-defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
-                           VR128, v8i16, v8i16, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
-                           VR128, v4i32, v4i32, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
-                           VR128, v2i64, v2i64, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-
-defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
-                           VR128, v8i16, v8i16, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
-                           VR128, v4i32, v4i32, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
-                           VR128, v2i64, v2i64, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-
-defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
-                           VR128, v8i16, v8i16, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
-                           VR128, v4i32, v4i32, memopv2i64,
-                           SSE_INTSHIFT_ITINS_P>;
-
-let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in {
-  // 128-bit logical shifts.
-  def PSLLDQri : PDIi8<0x73, MRM7r,
-                       (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
-                       "pslldq\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (v16i8 (X86vshldq VR128:$src1, (i8 imm:$src2))))],
-                       IIC_SSE_INTSHDQ_P_RI>;
-  def PSRLDQri : PDIi8<0x73, MRM3r,
-                       (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2),
-                       "psrldq\t{$src2, $dst|$dst, $src2}",
-                       [(set VR128:$dst,
-                         (v16i8 (X86vshrdq VR128:$src1, (i8 imm:$src2))))],
-                       IIC_SSE_INTSHDQ_P_RI>;
+let ExeDomain = SSEPackedInt in {
+  defm PSLLW : PDI_binop_rmi_all<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli,
+                                 v8i16, v16i16, v8i16, NoVLX_Or_NoBWI>;
+  defm PSLLD : PDI_binop_rmi_all<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli,
+                                 v4i32, v8i32, v4i32, NoVLX>;
+  defm PSLLQ : PDI_binop_rmi_all<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli,
+                                 v2i64, v4i64, v2i64, NoVLX>;
+
+  defm PSRLW : PDI_binop_rmi_all<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli,
+                                 v8i16, v16i16, v8i16, NoVLX_Or_NoBWI>;
+  defm PSRLD : PDI_binop_rmi_all<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli,
+                                 v4i32, v8i32, v4i32, NoVLX>;
+  defm PSRLQ : PDI_binop_rmi_all<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli,
+                                 v2i64, v4i64, v2i64, NoVLX>;
+
+  defm PSRAW : PDI_binop_rmi_all<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai,
+                                 v8i16, v16i16, v8i16, NoVLX_Or_NoBWI>;
+  defm PSRAD : PDI_binop_rmi_all<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai,
+                                 v4i32, v8i32, v4i32, NoVLX>;
+
+  defm PSLLDQ : PDI_binop_ri_all<0x73, MRM7r, "pslldq", X86vshldq>;
+  defm PSRLDQ : PDI_binop_ri_all<0x73, MRM3r, "psrldq", X86vshrdq>;
   // PSRADQri doesn't exist in SSE[1-3].
-}
-} // Constraints = "$src1 = $dst"
+} // ExeDomain = SSEPackedInt
 
 //===---------------------------------------------------------------------===//
 // SSE2 - Packed Integer Comparison Instructions
@@ -4651,6 +4532,7 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
 //===---------------------------------------------------------------------===//
 // Move Int Doubleword to Packed Double Int
 //
+let ExeDomain = SSEPackedInt in {
 def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
                       "movd\t{$src, $dst|$dst, $src}",
                       [(set VR128:$dst,
@@ -4701,11 +4583,12 @@ def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src),
                        "mov{d|q}\t{$src, $dst|$dst, $src}",
                        [(set FR64:$dst, (bitconvert GR64:$src))],
                        IIC_SSE_MOVDQ>, Sched<[WriteMove]>;
+} // ExeDomain = SSEPackedInt
 
 //===---------------------------------------------------------------------===//
 // Move Int Doubleword to Single Scalar
 //
-let isCodeGenOnly = 1 in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
   def VMOVDI2SSrr  : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set FR32:$dst, (bitconvert GR32:$src))],
@@ -4725,11 +4608,12 @@ let isCodeGenOnly = 1 in {
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))],
                         IIC_SSE_MOVDQ>, Sched<[WriteLoad]>;
-}
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int to Packed Double Int
 //
+let ExeDomain = SSEPackedInt in {
 def VMOVPDI2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
                        "movd\t{$src, $dst|$dst, $src}",
                        [(set GR32:$dst, (extractelt (v4i32 VR128:$src),
@@ -4751,6 +4635,7 @@ def MOVPDI2DImr  : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src),
                        [(store (i32 (extractelt (v4i32 VR128:$src),
                                      (iPTR 0))), addr:$dst)],
                                      IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+} // ExeDomain = SSEPackedInt
 
 def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))),
         (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>;
@@ -4767,6 +4652,7 @@ def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))),
 //===---------------------------------------------------------------------===//
 // Move Packed Doubleword Int first element to Doubleword Int
 //
+let ExeDomain = SSEPackedInt in {
 let SchedRW = [WriteMove] in {
 def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src),
                           "movq\t{$src, $dst|$dst, $src}",
@@ -4791,11 +4677,12 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
 def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
                         "mov{d|q}\t{$src, $dst|$dst, $src}",
                         [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
+} // ExeDomain = SSEPackedInt
 
 //===---------------------------------------------------------------------===//
 // Bitcast FR64 <-> GR64
 //
-let isCodeGenOnly = 1 in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
   let Predicates = [UseAVX] in
   def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src),
                           "movq\t{$src, $dst|$dst, $src}",
@@ -4822,12 +4709,12 @@ let isCodeGenOnly = 1 in {
                          "movq\t{$src, $dst|$dst, $src}",
                          [(store (i64 (bitconvert FR64:$src)), addr:$dst)],
                          IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
-}
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 //===---------------------------------------------------------------------===//
 // Move Scalar Single to Double Int
 //
-let isCodeGenOnly = 1 in {
+let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
   def VMOVSS2DIrr  : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src),
                         "movd\t{$src, $dst|$dst, $src}",
                         [(set GR32:$dst, (bitconvert FR32:$src))],
@@ -4844,7 +4731,7 @@ let isCodeGenOnly = 1 in {
                         "movd\t{$src, $dst|$dst, $src}",
                         [(store (i32 (bitconvert FR32:$src)), addr:$dst)],
                         IIC_SSE_MOVDQ>, Sched<[WriteStore]>;
-}
+} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
 
 let Predicates = [UseAVX] in {
   let AddedComplexity = 15 in {
@@ -4867,9 +4754,13 @@ let Predicates = [UseAVX] in {
               (VMOVDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
               (VMOVDI2PDIrm addr:$src)>;
+    def : Pat<(v4i32 (X86vzload addr:$src)),
+              (VMOVDI2PDIrm addr:$src)>;
     def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
                 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
               (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
+    def : Pat<(v8i32 (X86vzload addr:$src)),
+              (SUBREG_TO_REG (i64 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
   }
   // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
   def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -4892,6 +4783,8 @@ let Predicates = [UseSSE2] in {
               (MOVDI2PDIrm addr:$src)>;
     def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
               (MOVDI2PDIrm addr:$src)>;
+    def : Pat<(v4i32 (X86vzload addr:$src)),
+              (MOVDI2PDIrm addr:$src)>;
   }
 }
 
@@ -4960,43 +4853,30 @@ def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src),
 def : InstAlias<"vmovq\t{$src, $dst|$dst, $src}",
                 (VMOVPQI2QIrr VR128L:$dst, VR128H:$src), 0>;
 
-//===---------------------------------------------------------------------===//
-// Store / copy lower 64-bits of a XMM register.
-//
-let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in {
-def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                     "vmovq\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst,
-                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
-                                                 (loadi64 addr:$src))))))],
-                                                 IIC_SSE_MOVDQ>,
-                     XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>;
-
-def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
-                     "movq\t{$src, $dst|$dst, $src}",
-                     [(set VR128:$dst,
-                       (v2i64 (X86vzmovl (v2i64 (scalar_to_vector
-                                                 (loadi64 addr:$src))))))],
-                                                 IIC_SSE_MOVDQ>,
-                     XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>;
-} // ExeDomain, isCodeGenOnly, AddedComplexity
-
 let Predicates = [UseAVX], AddedComplexity = 20 in {
+  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+            (VMOVQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+            (VMOVQI2PQIrm addr:$src)>;
   def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
-            (VMOVZQI2PQIrm addr:$src)>;
+            (VMOVQI2PQIrm addr:$src)>;
   def : Pat<(v2i64 (X86vzload addr:$src)),
-            (VMOVZQI2PQIrm addr:$src)>;
+            (VMOVQI2PQIrm addr:$src)>;
   def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
               (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
+            (SUBREG_TO_REG (i64 0), (VMOVQI2PQIrm addr:$src), sub_xmm)>;
   def : Pat<(v4i64 (X86vzload addr:$src)),
-            (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>;
+            (SUBREG_TO_REG (i64 0), (VMOVQI2PQIrm addr:$src), sub_xmm)>;
 }
 
 let Predicates = [UseSSE2], AddedComplexity = 20 in {
+  def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+            (MOVQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+            (MOVQI2PQIrm addr:$src)>;
   def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))),
-            (MOVZQI2PQIrm addr:$src)>;
-  def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>;
+            (MOVQI2PQIrm addr:$src)>;
+  def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -5018,24 +4898,6 @@ def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
                       XS, Requires<[UseSSE2]>;
 } // ExeDomain, SchedRW
 
-let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in {
-let AddedComplexity = 20 in
-def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                        "vmovq\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst, (v2i64 (X86vzmovl
-                                             (loadv2i64 addr:$src))))],
-                                             IIC_SSE_MOVDQ>,
-                      XS, VEX, Requires<[UseAVX]>;
-let AddedComplexity = 20 in {
-def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src),
-                        "movq\t{$src, $dst|$dst, $src}",
-                    [(set VR128:$dst, (v2i64 (X86vzmovl
-                                             (loadv2i64 addr:$src))))],
-                                             IIC_SSE_MOVDQ>,
-                      XS, Requires<[UseSSE2]>;
-}
-} // ExeDomain, isCodeGenOnly, SchedRW
-
 let AddedComplexity = 20 in {
   let Predicates = [UseAVX] in {
     def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
@@ -5167,12 +5029,12 @@ let Predicates = [HasAVX] in {
             (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>;
 }
 
-let Predicates = [UseAVX, OptForSize] in {
-  def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
-            (VMOVDDUPrm addr:$src)>;
-  def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
-            (VMOVDDUPrm addr:$src)>;
-}
+let Predicates = [HasAVX, NoVLX] in
+def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
+          (VMOVDDUPrm addr:$src)>;
+let Predicates = [HasAVX1Only] in
+def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))),
+          (VMOVDDUPrm addr:$src)>;
 
 let Predicates = [UseSSE3] in {
   def : Pat<(X86Movddup (memopv2f64 addr:$src)),
@@ -5370,35 +5232,35 @@ let Constraints = "$src1 = $dst" in {
 /// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm<bits<8> opc, string OpcodeStr, ValueType vt,
                         SDNode OpNode, PatFrag ld_frag> {
-  def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
-                    (ins VR128:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst, (vt (OpNode VR128:$src)))],
-                    IIC_SSE_PABS_RR>, Sched<[WriteVecALU]>;
+  def rr : SS38I<opc, MRMSrcReg, (outs VR128:$dst),
+                 (ins VR128:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst, (vt (OpNode VR128:$src)))],
+                 IIC_SSE_PABS_RR>, Sched<[WriteVecALU]>;
 
-  def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
-                    (ins i128mem:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR128:$dst,
-                      (vt (OpNode (bitconvert (ld_frag addr:$src)))))],
-                    IIC_SSE_PABS_RM>, Sched<[WriteVecALULd]>;
+  def rm : SS38I<opc, MRMSrcMem, (outs VR128:$dst),
+                 (ins i128mem:$src),
+                 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                 [(set VR128:$dst,
+                   (vt (OpNode (bitconvert (ld_frag addr:$src)))))],
+                 IIC_SSE_PABS_RM>, Sched<[WriteVecALULd]>;
 }
 
 /// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}.
 multiclass SS3I_unop_rm_y<bits<8> opc, string OpcodeStr, ValueType vt,
                           SDNode OpNode> {
-  def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
-                    (ins VR256:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
-                    Sched<[WriteVecALU]>;
+  def Yrr : SS38I<opc, MRMSrcReg, (outs VR256:$dst),
+                  (ins VR256:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR256:$dst, (vt (OpNode VR256:$src)))]>,
+                  Sched<[WriteVecALU]>;
 
-  def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
-                    (ins i256mem:$src),
-                    !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
-                    [(set VR256:$dst,
-                      (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>,
-                    Sched<[WriteVecALULd]>;
+  def Yrm : SS38I<opc, MRMSrcMem, (outs VR256:$dst),
+                  (ins i256mem:$src),
+                  !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
+                  [(set VR256:$dst,
+                    (vt (OpNode (bitconvert (loadv4i64 addr:$src)))))]>,
+                  Sched<[WriteVecALULd]>;
 }
 
 // Helper fragments to match sext vXi1 to vXiY.
@@ -5419,19 +5281,21 @@ let Predicates = [HasAVX, NoVLX] in {
   defm VPABSD  : SS3I_unop_rm<0x1E, "vpabsd", v4i32, X86Abs, loadv2i64>, VEX;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
   def : Pat<(xor
             (bc_v2i64 (v16i1sextv16i8)),
             (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
-            (VPABSBrr128 VR128:$src)>;
+            (VPABSBrr VR128:$src)>;
   def : Pat<(xor
             (bc_v2i64 (v8i1sextv8i16)),
             (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
-            (VPABSWrr128 VR128:$src)>;
+            (VPABSWrr VR128:$src)>;
+}
+let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(xor
             (bc_v2i64 (v4i1sextv4i32)),
             (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
-            (VPABSDrr128 VR128:$src)>;
+            (VPABSDrr VR128:$src)>;
 }
 
 let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
@@ -5442,19 +5306,21 @@ let Predicates = [HasAVX2, NoVLX] in {
   defm VPABSD  : SS3I_unop_rm_y<0x1E, "vpabsd", v8i32, X86Abs>, VEX, VEX_L;
 }
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   def : Pat<(xor
             (bc_v4i64 (v32i1sextv32i8)),
             (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))),
-            (VPABSBrr256 VR256:$src)>;
+            (VPABSBYrr VR256:$src)>;
   def : Pat<(xor
             (bc_v4i64 (v16i1sextv16i16)),
             (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))),
-            (VPABSWrr256 VR256:$src)>;
+            (VPABSWYrr VR256:$src)>;
+}
+let Predicates = [HasAVX2, NoVLX] in {
   def : Pat<(xor
             (bc_v4i64 (v8i1sextv8i32)),
             (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))),
-            (VPABSDrr256 VR256:$src)>;
+            (VPABSDYrr VR256:$src)>;
 }
 
 defm PABSB : SS3I_unop_rm<0x1C, "pabsb", v16i8, X86Abs, memopv2i64>;
@@ -5465,15 +5331,15 @@ let Predicates = [UseSSSE3] in {
   def : Pat<(xor
             (bc_v2i64 (v16i1sextv16i8)),
             (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))),
-            (PABSBrr128 VR128:$src)>;
+            (PABSBrr VR128:$src)>;
   def : Pat<(xor
             (bc_v2i64 (v8i1sextv8i16)),
             (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))),
-            (PABSWrr128 VR128:$src)>;
+            (PABSWrr VR128:$src)>;
   def : Pat<(xor
             (bc_v2i64 (v4i1sextv4i32)),
             (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))),
-            (PABSDrr128 VR128:$src)>;
+            (PABSDrr VR128:$src)>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -5506,16 +5372,16 @@ def SSE_PMULHRSW : OpndItins<
 
 /// SS3I_binop_rm - Simple SSSE3 bin op
 multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
-                         ValueType OpVT, RegisterClass RC, PatFrag memop_frag,
-                         X86MemOperand x86memop, OpndItins itins,
-                         bit Is2Addr = 1> {
+                         ValueType DstVT, ValueType OpVT, RegisterClass RC,
+                         PatFrag memop_frag, X86MemOperand x86memop,
+                         OpndItins itins, bit Is2Addr = 1> {
   let isCommutable = 1 in
   def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst),
        (ins RC:$src1, RC:$src2),
        !if(Is2Addr,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
-       [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>,
+       [(set RC:$dst, (DstVT (OpNode (OpVT RC:$src1), RC:$src2)))], itins.rr>,
        Sched<[itins.Sched]>;
   def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst),
        (ins RC:$src1, x86memop:$src2),
@@ -5523,7 +5389,7 @@ multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode,
          !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
          !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")),
        [(set RC:$dst,
-         (OpVT (OpNode RC:$src1,
+         (DstVT (OpNode (OpVT RC:$src1),
           (bitconvert (memop_frag addr:$src2)))))], itins.rm>,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
@@ -5568,18 +5434,32 @@ multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr,
        Sched<[Sched.Folded, ReadAfterLd]>;
 }
 
+let ImmT = NoImm, Predicates = [HasAVX, NoVLX_Or_NoBWI] in {
+let isCommutable = 0 in {
+  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, v16i8,
+                                  VR128, loadv2i64, i128mem,
+                                  SSE_PSHUFB, 0>, VEX_4V;
+  defm VPMADDUBSW : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v8i16,
+                                  v16i8, VR128, loadv2i64, i128mem,
+                                  SSE_PMADD, 0>, VEX_4V;
+}
+defm VPMULHRSW    : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v8i16, v8i16,
+                                  VR128, loadv2i64, i128mem,
+                                  SSE_PMULHRSW, 0>, VEX_4V;
+}
+
 let ImmT = NoImm, Predicates = [HasAVX] in {
 let isCommutable = 0 in {
-  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128,
+  defm VPHADDW    : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, v8i16, VR128,
                                   loadv2i64, i128mem,
                                   SSE_PHADDSUBW, 0>, VEX_4V;
-  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128,
+  defm VPHADDD    : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, v4i32, VR128,
                                   loadv2i64, i128mem,
                                   SSE_PHADDSUBD, 0>, VEX_4V;
-  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128,
+  defm VPHSUBW    : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, v8i16, VR128,
                                   loadv2i64, i128mem,
                                   SSE_PHADDSUBW, 0>, VEX_4V;
-  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128,
+  defm VPHSUBD    : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, v4i32, VR128,
                                   loadv2i64, i128mem,
                                   SSE_PHADDSUBD, 0>, VEX_4V;
   defm VPSIGNB    : SS3I_binop_rm_int<0x08, "vpsignb",
@@ -5591,36 +5471,41 @@ let isCommutable = 0 in {
   defm VPSIGND    : SS3I_binop_rm_int<0x0A, "vpsignd",
                                       int_x86_ssse3_psign_d_128,
                                       SSE_PSIGN, loadv2i64, 0>, VEX_4V;
-  defm VPSHUFB    : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128,
-                                  loadv2i64, i128mem,
-                                  SSE_PSHUFB, 0>, VEX_4V;
   defm VPHADDSW   : SS3I_binop_rm_int<0x03, "vphaddsw",
                                       int_x86_ssse3_phadd_sw_128,
                                       SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
   defm VPHSUBSW   : SS3I_binop_rm_int<0x07, "vphsubsw",
                                       int_x86_ssse3_phsub_sw_128,
                                       SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V;
-  defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw",
-                                      int_x86_ssse3_pmadd_ub_sw_128,
-                                      SSE_PMADD, loadv2i64, 0>, VEX_4V;
 }
-defm VPMULHRSW    : SS3I_binop_rm_int<0x0B, "vpmulhrsw",
-                                      int_x86_ssse3_pmul_hr_sw_128,
-                                      SSE_PMULHRSW, loadv2i64, 0>, VEX_4V;
+}
+
+let ImmT = NoImm, Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
+let isCommutable = 0 in {
+  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, v32i8,
+                                  VR256, loadv4i64, i256mem,
+                                  SSE_PSHUFB, 0>, VEX_4V, VEX_L;
+  defm VPMADDUBSWY : SS3I_binop_rm<0x04, "vpmaddubsw", X86vpmaddubsw, v16i16,
+                                   v32i8, VR256, loadv4i64, i256mem,
+                                   SSE_PMADD, 0>, VEX_4V, VEX_L;
+}
+defm VPMULHRSWY   : SS3I_binop_rm<0x0B, "vpmulhrsw", X86mulhrs, v16i16, v16i16,
+                                  VR256, loadv4i64, i256mem,
+                                  SSE_PMULHRSW, 0>, VEX_4V, VEX_L;
 }
 
 let ImmT = NoImm, Predicates = [HasAVX2] in {
 let isCommutable = 0 in {
-  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256,
-                                  loadv4i64, i256mem,
+  defm VPHADDWY   : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, v16i16,
+                                  VR256, loadv4i64, i256mem,
                                   SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
-  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256,
+  defm VPHADDDY   : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, v8i32, VR256,
                                   loadv4i64, i256mem,
                                   SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
-  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256,
-                                  loadv4i64, i256mem,
+  defm VPHSUBWY   : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, v16i16,
+                                  VR256, loadv4i64, i256mem,
                                   SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
-  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256,
+  defm VPHSUBDY   : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, v8i32, VR256,
                                   loadv4i64, i256mem,
                                   SSE_PHADDSUBW, 0>, VEX_4V, VEX_L;
   defm VPSIGNBY   : SS3I_binop_rm_int_y<0x08, "vpsignb", int_x86_avx2_psign_b,
@@ -5629,34 +5514,25 @@ let isCommutable = 0 in {
                                         WriteVecALU>, VEX_4V, VEX_L;
   defm VPSIGNDY   : SS3I_binop_rm_int_y<0x0A, "vpsignd", int_x86_avx2_psign_d,
                                         WriteVecALU>, VEX_4V, VEX_L;
-  defm VPSHUFBY   : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256,
-                                  loadv4i64, i256mem,
-                                  SSE_PSHUFB, 0>, VEX_4V, VEX_L;
   defm VPHADDSW   : SS3I_binop_rm_int_y<0x03, "vphaddsw",
                                         int_x86_avx2_phadd_sw,
                                         WriteVecALU>, VEX_4V, VEX_L;
   defm VPHSUBSW   : SS3I_binop_rm_int_y<0x07, "vphsubsw",
                                         int_x86_avx2_phsub_sw,
                                         WriteVecALU>, VEX_4V, VEX_L;
-  defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw",
-                                       int_x86_avx2_pmadd_ub_sw,
-                                        WriteVecIMul>, VEX_4V, VEX_L;
 }
-defm VPMULHRSW    : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw",
-                                        int_x86_avx2_pmul_hr_sw,
-                                        WriteVecIMul>, VEX_4V, VEX_L;
 }
 
 // None of these have i8 immediate fields.
 let ImmT = NoImm, Constraints = "$src1 = $dst" in {
 let isCommutable = 0 in {
-  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128,
+  defm PHADDW    : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, v8i16, VR128,
                                  memopv2i64, i128mem, SSE_PHADDSUBW>;
-  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128,
+  defm PHADDD    : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, v4i32, VR128,
                                  memopv2i64, i128mem, SSE_PHADDSUBD>;
-  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128,
+  defm PHSUBW    : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, v8i16, VR128,
                                  memopv2i64, i128mem, SSE_PHADDSUBW>;
-  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128,
+  defm PHSUBD    : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, v4i32, VR128,
                                  memopv2i64, i128mem, SSE_PHADDSUBD>;
   defm PSIGNB    : SS3I_binop_rm_int<0x08, "psignb", int_x86_ssse3_psign_b_128,
                                      SSE_PSIGN, memopv2i64>;
@@ -5664,7 +5540,7 @@ let isCommutable = 0 in {
                                      SSE_PSIGN, memopv2i64>;
   defm PSIGND    : SS3I_binop_rm_int<0x0A, "psignd", int_x86_ssse3_psign_d_128,
                                      SSE_PSIGN, memopv2i64>;
-  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128,
+  defm PSHUFB    : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, v16i8, VR128,
                                  memopv2i64, i128mem, SSE_PSHUFB>;
   defm PHADDSW   : SS3I_binop_rm_int<0x03, "phaddsw",
                                      int_x86_ssse3_phadd_sw_128,
@@ -5672,13 +5548,12 @@ let isCommutable = 0 in {
   defm PHSUBSW   : SS3I_binop_rm_int<0x07, "phsubsw",
                                      int_x86_ssse3_phsub_sw_128,
                                      SSE_PHADDSUBSW, memopv2i64>;
-  defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw",
-                                     int_x86_ssse3_pmadd_ub_sw_128,
-                                     SSE_PMADD, memopv2i64>;
+  defm PMADDUBSW : SS3I_binop_rm<0x04, "pmaddubsw", X86vpmaddubsw, v8i16,
+                                 v16i8, VR128, memopv2i64, i128mem,
+                                 SSE_PMADD>;
 }
-defm PMULHRSW    : SS3I_binop_rm_int<0x0B, "pmulhrsw",
-                                     int_x86_ssse3_pmul_hr_sw_128,
-                                     SSE_PMULHRSW, memopv2i64>;
+defm PMULHRSW    : SS3I_binop_rm<0x0B, "pmulhrsw", X86mulhrs, v8i16, v8i16,
+                                 VR128, memopv2i64, i128mem, SSE_PMULHRSW>;
 }
 
 //===---------------------------------------------------------------------===//
@@ -5895,8 +5770,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
             (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
   def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
-  def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#BWYrm) addr:$src)>;
   }
   let Predicates = [HasAVX, NoVLX] in {
   def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
@@ -5923,8 +5796,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
             (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
   def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
-  def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#WDYrm) addr:$src)>;
 
   def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
             (!cast<I>(OpcPrefix#WQYrm) addr:$src)>;
@@ -5941,8 +5812,6 @@ multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtO
             (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
   def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))),
             (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
-  def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))),
-            (!cast<I>(OpcPrefix#DQYrm) addr:$src)>;
   }
 }
 
@@ -6342,10 +6211,10 @@ let Predicates = [UseAVX] in {
 // SSE4.1 - Round Instructions
 //===----------------------------------------------------------------------===//
 
-multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
-                            X86MemOperand x86memop, RegisterClass RC,
-                            PatFrag mem_frag32, PatFrag mem_frag64,
-                            Intrinsic V4F32Int, Intrinsic V2F64Int> {
+multiclass sse41_fp_unop_p<bits<8> opcps, bits<8> opcpd, string OpcodeStr,
+                           X86MemOperand x86memop, RegisterClass RC,
+                           PatFrag mem_frag32, PatFrag mem_frag64,
+                           Intrinsic V4F32Int, Intrinsic V2F64Int> {
 let ExeDomain = SSEPackedSingle in {
   // Intrinsic operation, reg.
   // Vector intrinsic operation, reg
@@ -6386,24 +6255,73 @@ let ExeDomain = SSEPackedDouble in {
 } // ExeDomain = SSEPackedDouble
 }
 
-multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd,
-                            string OpcodeStr,
-                            Intrinsic F32Int,
-                            Intrinsic F64Int, bit Is2Addr = 1> {
-let ExeDomain = GenericDomain in {
-  // Operation, reg.
-  let hasSideEffects = 0 in
+multiclass avx_fp_unop_rm<bits<8> opcss, bits<8> opcsd,
+                          string OpcodeStr> {
+let ExeDomain = GenericDomain, hasSideEffects = 0 in {
   def SSr : SS4AIi8<opcss, MRMSrcReg,
-      (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
-      !if(Is2Addr,
-          !strconcat(OpcodeStr,
-              "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-          !strconcat(OpcodeStr,
-              "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
+        (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3),
+        !strconcat(OpcodeStr,
+            "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
       []>, Sched<[WriteFAdd]>;
 
-  // Intrinsic operation, reg.
-  let isCodeGenOnly = 1 in
+  let mayLoad = 1 in
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
+        (outs FR32:$dst), (ins FR32:$src1, f32mem:$src2, i32u8imm:$src3),
+        !strconcat(OpcodeStr,
+             "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, Sched<[WriteFAddLd, ReadAfterLd]>;
+
+  def SDr : SS4AIi8<opcsd, MRMSrcReg,
+        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
+        !strconcat(OpcodeStr,
+              "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, Sched<[WriteFAdd]>;
+
+  let mayLoad = 1 in
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+        (outs FR64:$dst), (ins FR64:$src1, f64mem:$src2, i32u8imm:$src3),
+        !strconcat(OpcodeStr,
+             "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
+        []>, Sched<[WriteFAddLd, ReadAfterLd]>;
+} // ExeDomain = GenericDomain, hasSideEffects = 0
+}
+
+multiclass sse41_fp_unop_s<bits<8> opcss, bits<8> opcsd,
+                           string OpcodeStr> {
+let ExeDomain = GenericDomain, hasSideEffects = 0 in {
+  def SSr : SS4AIi8<opcss, MRMSrcReg,
+                    (outs FR32:$dst), (ins FR32:$src1, i32u8imm:$src2),
+                    !strconcat(OpcodeStr,
+                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, Sched<[WriteFAdd]>;
+
+  let mayLoad = 1 in
+  def SSm : SS4AIi8<opcss, MRMSrcMem,
+                    (outs FR32:$dst), (ins f32mem:$src1, i32u8imm:$src2),
+                    !strconcat(OpcodeStr,
+                               "ss\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, Sched<[WriteFAddLd, ReadAfterLd]>;
+
+  def SDr : SS4AIi8<opcsd, MRMSrcReg,
+                    (outs FR64:$dst), (ins FR64:$src1, i32u8imm:$src2),
+                    !strconcat(OpcodeStr,
+                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, Sched<[WriteFAdd]>;
+
+  let mayLoad = 1 in
+  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+                    (outs FR64:$dst), (ins f64mem:$src1, i32u8imm:$src2),
+                    !strconcat(OpcodeStr,
+                               "sd\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+                    []>, Sched<[WriteFAddLd, ReadAfterLd]>;
+} // ExeDomain = GenericDomain, hasSideEffects = 0
+}
+
+multiclass sse41_fp_binop_s<bits<8> opcss, bits<8> opcsd,
+                            string OpcodeStr,
+                            Intrinsic F32Int,
+                            Intrinsic F64Int, bit Is2Addr = 1> {
+let ExeDomain = GenericDomain, isCodeGenOnly = 1 in {
   def SSr_Int : SS4AIi8<opcss, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
         !if(Is2Addr,
@@ -6414,8 +6332,7 @@ let ExeDomain = GenericDomain in {
         [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>,
         Sched<[WriteFAdd]>;
 
-  // Intrinsic operation, mem.
-  def SSm : SS4AIi8<opcss, MRMSrcMem,
+  def SSm_Int : SS4AIi8<opcss, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -6426,19 +6343,6 @@ let ExeDomain = GenericDomain in {
              (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>,
         Sched<[WriteFAddLd, ReadAfterLd]>;
 
-  // Operation, reg.
-  let hasSideEffects = 0 in
-  def SDr : SS4AIi8<opcsd, MRMSrcReg,
-        (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3),
-        !if(Is2Addr,
-            !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"),
-            !strconcat(OpcodeStr,
-                "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")),
-        []>, Sched<[WriteFAdd]>;
-
-  // Intrinsic operation, reg.
-  let isCodeGenOnly = 1 in
   def SDr_Int : SS4AIi8<opcsd, MRMSrcReg,
         (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3),
         !if(Is2Addr,
@@ -6449,8 +6353,7 @@ let ExeDomain = GenericDomain in {
         [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>,
         Sched<[WriteFAdd]>;
 
-  // Intrinsic operation, mem.
-  def SDm : SS4AIi8<opcsd, MRMSrcMem,
+  def SDm_Int : SS4AIi8<opcsd, MRMSrcMem,
         (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3),
         !if(Is2Addr,
             !strconcat(OpcodeStr,
@@ -6460,23 +6363,24 @@ let ExeDomain = GenericDomain in {
         [(set VR128:$dst,
               (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>,
         Sched<[WriteFAddLd, ReadAfterLd]>;
-} // ExeDomain = GenericDomain
+} // ExeDomain = GenericDomain, isCodeGenOnly = 1
 }
 
 // FP round - roundss, roundps, roundsd, roundpd
 let Predicates = [HasAVX] in {
   // Intrinsic form
-  defm VROUND  : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128,
-                                  loadv4f32, loadv2f64,
-                                  int_x86_sse41_round_ps,
-                                  int_x86_sse41_round_pd>, VEX;
-  defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256,
-                                  loadv8f32, loadv4f64,
-                                  int_x86_avx_round_ps_256,
-                                  int_x86_avx_round_pd_256>, VEX, VEX_L;
-  defm VROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "vround",
-                                  int_x86_sse41_round_ss,
-                                  int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+  defm VROUND  : sse41_fp_unop_p<0x08, 0x09, "vround", f128mem, VR128,
+                                 loadv4f32, loadv2f64,
+                                 int_x86_sse41_round_ps,
+                                 int_x86_sse41_round_pd>, VEX;
+  defm VROUNDY : sse41_fp_unop_p<0x08, 0x09, "vround", f256mem, VR256,
+                                 loadv8f32, loadv4f64,
+                                 int_x86_avx_round_ps_256,
+                                 int_x86_avx_round_pd_256>, VEX, VEX_L;
+  defm VROUND  : sse41_fp_binop_s<0x0A, 0x0B, "vround",
+                                 int_x86_sse41_round_ss,
+                                 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG;
+  defm VROUND  : avx_fp_unop_rm<0x0A, 0x0B, "vround">, VEX_4V, VEX_LIG;                                 
 }
 
 let Predicates = [UseAVX] in {
@@ -6548,34 +6452,37 @@ let Predicates = [HasAVX] in {
             (VROUNDYPDr VR256:$src, (i32 0xB))>;
 }
 
-defm ROUND  : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128,
-                               memopv4f32, memopv2f64,
-                               int_x86_sse41_round_ps, int_x86_sse41_round_pd>;
+defm ROUND  : sse41_fp_unop_p<0x08, 0x09, "round", f128mem, VR128,
+                              memopv4f32, memopv2f64, int_x86_sse41_round_ps,
+                              int_x86_sse41_round_pd>;
+
+defm ROUND  : sse41_fp_unop_s<0x0A, 0x0B, "round">;
+
 let Constraints = "$src1 = $dst" in
-defm ROUND  : sse41_fp_binop_rm<0x0A, 0x0B, "round",
+defm ROUND  : sse41_fp_binop_s<0x0A, 0x0B, "round",
                                int_x86_sse41_round_ss, int_x86_sse41_round_sd>;
 
 let Predicates = [UseSSE41] in {
   def : Pat<(ffloor FR32:$src),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x9))>;
+            (ROUNDSSr FR32:$src, (i32 0x9))>;
   def : Pat<(f64 (ffloor FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x9))>;
+            (ROUNDSDr FR64:$src, (i32 0x9))>;
   def : Pat<(f32 (fnearbyint FR32:$src)),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>;
+            (ROUNDSSr FR32:$src, (i32 0xC))>;
   def : Pat<(f64 (fnearbyint FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>;
+            (ROUNDSDr FR64:$src, (i32 0xC))>;
   def : Pat<(f32 (fceil FR32:$src)),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xA))>;
+            (ROUNDSSr FR32:$src, (i32 0xA))>;
   def : Pat<(f64 (fceil FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xA))>;
+            (ROUNDSDr FR64:$src, (i32 0xA))>;
   def : Pat<(f32 (frint FR32:$src)),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>;
+            (ROUNDSSr FR32:$src, (i32 0x4))>;
   def : Pat<(f64 (frint FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>;
+            (ROUNDSDr FR64:$src, (i32 0x4))>;
   def : Pat<(f32 (ftrunc FR32:$src)),
-            (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xB))>;
+            (ROUNDSSr FR32:$src, (i32 0xB))>;
   def : Pat<(f64 (ftrunc FR64:$src)),
-            (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xB))>;
+            (ROUNDSDr FR64:$src, (i32 0xB))>;
 
   def : Pat<(v4f32 (ffloor VR128:$src)),
             (ROUNDPSr VR128:$src, (i32 0x9))>;
@@ -6867,10 +6774,10 @@ let Constraints = "$src1 = $dst" in {
 
 let Predicates = [HasAVX, NoVLX] in {
   defm VPMULLD  : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128,
-                                 memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
+                                 loadv2i64, i128mem, 0, SSE_PMULLD_ITINS>,
                                  VEX_4V;
   defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128,
-                                 memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
+                                 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
                                  VEX_4V;
 }
 let Predicates = [HasAVX2] in {
@@ -7029,22 +6936,22 @@ multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr,
                                     RegisterClass RC, X86MemOperand x86memop,
                                     PatFrag mem_frag, Intrinsic IntId,
                                     X86FoldableSchedWrite Sched> {
-  def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst),
+  def rr : Ii8Reg<opc, MRMSrcReg, (outs RC:$dst),
                   (ins RC:$src1, RC:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))],
-                  NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM,
+                  NoItinerary, SSEPackedInt>, TAPD, VEX_4V,
                 Sched<[Sched]>;
 
-  def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst),
+  def rm : Ii8Reg<opc, MRMSrcMem, (outs RC:$dst),
                   (ins RC:$src1, x86memop:$src2, RC:$src3),
                   !strconcat(OpcodeStr,
                     "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
                   [(set RC:$dst,
                         (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)),
                                RC:$src3))],
-                  NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM,
+                  NoItinerary, SSEPackedInt>, TAPD, VEX_4V,
                 Sched<[Sched.Folded, ReadAfterLd]>;
 }
 
@@ -7139,17 +7046,6 @@ let Predicates = [UseAVX] in {
             (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
   }
 
-  def : Pat<(v8f32 (X86vzmovl (insert_subvector undef,
-                   (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))),
-            (SUBREG_TO_REG (i32 0),
-                           (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)),
-                           sub_xmm)>;
-  def : Pat<(v4f64 (X86vzmovl (insert_subvector undef,
-                   (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))),
-            (SUBREG_TO_REG (i64 0),
-                           (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)),
-                           sub_xmm)>;
-
   // These will incur an FP/int domain crossing penalty, but it may be the only
   // way without AVX2. Do not add any complexity because we may be able to match
   // more optimal patterns defined earlier in this file.
@@ -7744,6 +7640,7 @@ defm : pclmul_alias<"lqlq", 0x00>;
 
 let Predicates = [HasSSE4A] in {
 
+let ExeDomain = SSEPackedInt in {
 let Constraints = "$src = $dst" in {
 def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst),
                  (ins VR128:$src, u8imm:$len, u8imm:$idx),
@@ -7767,6 +7664,7 @@ def INSERTQ  : I<0x79, MRMSrcReg, (outs VR128:$dst),
                  [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src,
                                     VR128:$mask))]>, XD;
 }
+} // ExeDomain = SSEPackedInt
 
 // Non-temporal (unaligned) scalar stores.
 let AddedComplexity = 400 in { // Prefer non-temporal versions
@@ -7832,23 +7730,50 @@ let ExeDomain = SSEPackedDouble, Predicates = [HasAVX2, NoVLX] in
 def VBROADCASTSDYrr  : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
                                          v4f64, v2f64, WriteFShuffle256>, VEX_L;
 
+//===----------------------------------------------------------------------===//
+// VBROADCAST*128 - Load from memory and broadcast 128-bit vector to both
+//                  halves of a 256-bit vector.
+//
 let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
 def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
                            (ins i128mem:$src),
                            "vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
                            Sched<[WriteLoad]>, VEX, VEX_L;
 
+let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX] in
 def VBROADCASTF128 : AVX8I<0x1A, MRMSrcMem, (outs VR256:$dst),
                            (ins f128mem:$src),
-                           "vbroadcastf128\t{$src, $dst|$dst, $src}",
-                           [(set VR256:$dst,
-                              (int_x86_avx_vbroadcastf128_pd_256 addr:$src))]>,
+                           "vbroadcastf128\t{$src, $dst|$dst, $src}", []>,
                            Sched<[WriteFShuffleLd]>, VEX, VEX_L;
 
-let Predicates = [HasAVX] in
-def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src),
+let Predicates = [HasAVX2, NoVLX] in {
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+          (VBROADCASTI128 addr:$src)>;
+def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+          (VBROADCASTI128 addr:$src)>;
+def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+          (VBROADCASTI128 addr:$src)>;
+def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+          (VBROADCASTI128 addr:$src)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+          (VBROADCASTF128 addr:$src)>;
+def : Pat<(v8f32 (X86SubVBroadcast (loadv4f32 addr:$src))),
           (VBROADCASTF128 addr:$src)>;
+}
 
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+          (VBROADCASTF128 addr:$src)>;
+def : Pat<(v8i32 (X86SubVBroadcast (bc_v4i32 (loadv2i64 addr:$src)))),
+          (VBROADCASTF128 addr:$src)>;
+def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
+          (VBROADCASTF128 addr:$src)>;
+def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
+          (VBROADCASTF128 addr:$src)>;
+}
 
 //===----------------------------------------------------------------------===//
 // VINSERTF128 - Insert packed floating-point values
@@ -7865,63 +7790,29 @@ def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst),
           []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L;
 }
 
-let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2),
+multiclass vinsert_lowering<string InstrStr, ValueType From, ValueType To,
+                            PatFrag memop_frag> {
+  def : Pat<(vinsert128_insert:$ins (To VR256:$src1), (From VR128:$src2),
                                    (iPTR imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
+            (!cast<Instruction>(InstrStr#rr) VR256:$src1, VR128:$src2,
+                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
+  def : Pat<(vinsert128_insert:$ins (To VR256:$src1),
+                                    (From (bitconvert (memop_frag addr:$src2))),
+                                    (iPTR imm)),
+            (!cast<Instruction>(InstrStr#rm) VR256:$src1, addr:$src2,
+                                       (INSERT_get_vinsert128_imm VR256:$ins))>;
+}
 
-def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
+let Predicates = [HasAVX, NoVLX] in {
+  defm : vinsert_lowering<"VINSERTF128", v4f32, v8f32, loadv4f32>;
+  defm : vinsert_lowering<"VINSERTF128", v2f64, v4f64, loadv2f64>;
 }
 
 let Predicates = [HasAVX1Only] in {
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
-                                   (iPTR imm)),
-          (VINSERTF128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
-                                   (bc_v4i32 (loadv2i64 addr:$src2)),
-                                   (iPTR imm)),
-          (VINSERTF128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
-                                   (bc_v16i8 (loadv2i64 addr:$src2)),
-                                   (iPTR imm)),
-          (VINSERTF128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
-                                   (bc_v8i16 (loadv2i64 addr:$src2)),
-                                   (iPTR imm)),
-          (VINSERTF128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
+  defm : vinsert_lowering<"VINSERTF128", v2i64, v4i64,  loadv2i64>;
+  defm : vinsert_lowering<"VINSERTF128", v4i32, v8i32,  loadv2i64>;
+  defm : vinsert_lowering<"VINSERTF128", v8i16, v16i16, loadv2i64>;
+  defm : vinsert_lowering<"VINSERTF128", v16i8, v32i8,  loadv2i64>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -7939,61 +7830,28 @@ def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs),
           []>, Sched<[WriteStore]>, VEX, VEX_L;
 }
 
+multiclass vextract_lowering<string InstrStr, ValueType From, ValueType To> {
+  def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
+            (To (!cast<Instruction>(InstrStr#rr)
+                                    (From VR256:$src1),
+                                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
+  def : Pat<(store (To (vextract128_extract:$ext (From VR256:$src1),
+                                                 (iPTR imm))), addr:$dst),
+            (!cast<Instruction>(InstrStr#mr) addr:$dst, VR256:$src1,
+             (EXTRACT_get_vextract128_imm VR128:$ext))>;
+}
+
 // AVX1 patterns
 let Predicates = [HasAVX, NoVLX] in {
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v4f32 (VEXTRACTF128rr
-                    (v8f32 VR256:$src1),
-                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v2f64 (VEXTRACTF128rr
-                    (v4f64 VR256:$src1),
-                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-
-def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTF128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTF128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
+  defm : vextract_lowering<"VEXTRACTF128", v8f32, v4f32>;
+  defm : vextract_lowering<"VEXTRACTF128", v4f64, v2f64>;
 }
 
 let Predicates = [HasAVX1Only] in {
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v2i64 (VEXTRACTF128rr
-                  (v4i64 VR256:$src1),
-                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v4i32 (VEXTRACTF128rr
-                  (v8i32 VR256:$src1),
-                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v8i16 (VEXTRACTF128rr
-                  (v16i16 VR256:$src1),
-                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v16i8 (VEXTRACTF128rr
-                  (v32i8 VR256:$src1),
-                  (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-
-def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTF128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTF128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTF128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTF128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
+  defm : vextract_lowering<"VEXTRACTF128", v4i64,  v2i64>;
+  defm : vextract_lowering<"VEXTRACTF128", v8i32,  v4i32>;
+  defm : vextract_lowering<"VEXTRACTF128", v16i16, v8i16>;
+  defm : vextract_lowering<"VEXTRACTF128", v32i8,  v16i8>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8239,7 +8097,7 @@ let Predicates = [HasF16C] in {
 }
 
 // Patterns for  matching conversions from float to half-float and vice versa.
-let Predicates = [HasF16C] in {
+let Predicates = [HasF16C, NoVLX] in {
   // Use MXCSR.RC for rounding instead of explicitly specifying the default
   // rounding mode (Nearest-Even, encoded as 0). Both are equivalent in the
   // configurations we support (the default). However, falling back to MXCSR is
@@ -8334,7 +8192,7 @@ defm VPBROADCASTD  : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32,
 defm VPBROADCASTQ  : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64,
                                     v2i64, v4i64, NoVLX>;
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
   // This means we'll encounter truncated i32 loads; match that here.
   def : Pat<(v8i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
@@ -8347,7 +8205,9 @@ let Predicates = [HasAVX2] in {
   def : Pat<(v16i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWYrm addr:$src)>;
+}
 
+let Predicates = [HasAVX2] in {
   // Provide aliases for broadcast from the same register class that
   // automatically does the extract.
   def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))),
@@ -8361,36 +8221,38 @@ let Predicates = [HasAVX2] in {
 let Predicates = [HasAVX2, NoVLX] in {
   // Provide fallback in case the load node that is used in the patterns above
   // is used by additional users, which prevents the pattern selection.
-    let AddedComplexity = 20 in {
     def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
               (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
     def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
               (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>;
     def : Pat<(v4f64 (X86VBroadcast FR64:$src)),
               (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
-    }
 }
 
-let Predicates = [HasAVX2, NoVLX_Or_NoBWI], AddedComplexity = 20 in {
+let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in {
   def : Pat<(v16i8 (X86VBroadcast GR8:$src)),
         (VPBROADCASTBrr (COPY_TO_REGCLASS
-                         (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
+                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                             GR8:$src, sub_8bit)),
                          VR128))>;
   def : Pat<(v32i8 (X86VBroadcast GR8:$src)),
         (VPBROADCASTBYrr (COPY_TO_REGCLASS
-                          (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)),
+                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                              GR8:$src, sub_8bit)),
                           VR128))>;
 
   def : Pat<(v8i16 (X86VBroadcast GR16:$src)),
         (VPBROADCASTWrr (COPY_TO_REGCLASS
-                         (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
+                         (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                             GR16:$src, sub_16bit)),
                          VR128))>;
   def : Pat<(v16i16 (X86VBroadcast GR16:$src)),
         (VPBROADCASTWYrr (COPY_TO_REGCLASS
-                          (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)),
+                          (i32 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                              GR16:$src, sub_16bit)),
                           VR128))>;
 }
-let Predicates = [HasAVX2, NoVLX], AddedComplexity = 20 in {
+let Predicates = [HasAVX2, NoVLX] in {
   def : Pat<(v4i32 (X86VBroadcast GR32:$src)),
             (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>;
   def : Pat<(v8i32 (X86VBroadcast GR32:$src)),
@@ -8418,13 +8280,13 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))),
 
   // Provide fallback in case the load node that is used in the patterns above
   // is used by additional users, which prevents the pattern selection.
-let Predicates = [HasAVX], AddedComplexity = 20 in {
+let Predicates = [HasAVX, NoVLX] in {
   // 128bit broadcasts:
   def : Pat<(v2f64 (X86VBroadcast f64:$src)),
             (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>;
 }
 
-let Predicates = [HasAVX, NoVLX], AddedComplexity = 20 in {
+let Predicates = [HasAVX1Only] in {
   def : Pat<(v4f32 (X86VBroadcast FR32:$src)),
             (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>;
   def : Pat<(v8f32 (X86VBroadcast FR32:$src)),
@@ -8560,42 +8422,10 @@ def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst),
 }
 
 let Predicates = [HasAVX2, NoVLX] in {
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTI128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTI128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTI128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2),
-                                   (iPTR imm)),
-          (VINSERTI128rr VR256:$src1, VR128:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-
-def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2),
-                                   (iPTR imm)),
-          (VINSERTI128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1),
-                                   (bc_v4i32 (loadv2i64 addr:$src2)),
-                                   (iPTR imm)),
-          (VINSERTI128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1),
-                                   (bc_v16i8 (loadv2i64 addr:$src2)),
-                                   (iPTR imm)),
-          (VINSERTI128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
-def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1),
-                                   (bc_v8i16 (loadv2i64 addr:$src2)),
-                                   (iPTR imm)),
-          (VINSERTI128rm VR256:$src1, addr:$src2,
-                         (INSERT_get_vinsert128_imm VR256:$ins))>;
+  defm : vinsert_lowering<"VINSERTI128", v2i64, v4i64,  loadv2i64>;
+  defm : vinsert_lowering<"VINSERTI128", v4i32, v8i32,  loadv2i64>;
+  defm : vinsert_lowering<"VINSERTI128", v8i16, v16i16, loadv2i64>;
+  defm : vinsert_lowering<"VINSERTI128", v16i8, v32i8,  loadv2i64>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8612,39 +8442,10 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
           Sched<[WriteStore]>, VEX, VEX_L;
 
 let Predicates = [HasAVX2, NoVLX] in {
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v2i64 (VEXTRACTI128rr
-                    (v4i64 VR256:$src1),
-                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v4i32 (VEXTRACTI128rr
-                    (v8i32 VR256:$src1),
-                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v8i16 (VEXTRACTI128rr
-                    (v16i16 VR256:$src1),
-                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
-          (v16i8 (VEXTRACTI128rr
-                    (v32i8 VR256:$src1),
-                    (EXTRACT_get_vextract128_imm VR128:$ext)))>;
-
-def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTI128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTI128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTI128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
-def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1),
-                         (iPTR imm))), addr:$dst),
-          (VEXTRACTI128mr addr:$dst, VR256:$src1,
-           (EXTRACT_get_vextract128_imm VR128:$ext))>;
+  defm : vextract_lowering<"VEXTRACTI128", v4i64,  v2i64>;
+  defm : vextract_lowering<"VEXTRACTI128", v8i32,  v4i32>;
+  defm : vextract_lowering<"VEXTRACTI128", v16i16, v8i16>;
+  defm : vextract_lowering<"VEXTRACTI128", v32i8,  v16i8>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -8689,12 +8490,12 @@ multiclass maskmov_lowering<string InstrStr, RegisterClass RC, ValueType VT,
     def: Pat<(X86mstore addr:$ptr, (MaskVT RC:$mask), (VT RC:$src)),
              (!cast<Instruction>(InstrStr#"mr") addr:$ptr, RC:$mask, RC:$src)>;
     // masked load
-    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), undef)),
+    def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), undef)),
              (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
-    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask),
+    def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask),
                               (VT (bitconvert (ZeroVT immAllZerosV))))),
              (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr)>;
-    def: Pat<(VT (masked_load addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
+    def: Pat<(VT (X86mload addr:$ptr, (MaskVT RC:$mask), (VT RC:$src0))),
              (!cast<Instruction>(BlendStr#"rr")
                  RC:$src0,
                  (!cast<Instruction>(InstrStr#"rm") RC:$mask, addr:$ptr),
@@ -8719,6 +8520,51 @@ let Predicates = [HasAVX2] in {
   defm : maskmov_lowering<"VPMASKMOVD", VR128, v4i32, v4i32, "VBLENDVPS", v4i32>;
   defm : maskmov_lowering<"VPMASKMOVQ", VR128, v2i64, v2i64, "VBLENDVPD", v4i32>;
 }
+
+//===----------------------------------------------------------------------===//
+// SubVector Broadcasts
+// Provide fallback in case the load node that is used in the patterns above
+// is used by additional users, which prevents the pattern selection.
+
+let Predicates = [HasAVX2, NoVLX] in {
+def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
+          (VINSERTI128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v2i64 VR128:$src), 1)>;
+def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
+          (VINSERTI128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v4i32 VR128:$src), 1)>;
+def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
+          (VINSERTI128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v8i16 VR128:$src), 1)>;
+def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
+          (VINSERTI128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v16i8 VR128:$src), 1)>;
+}
+
+let Predicates = [HasAVX, NoVLX] in {
+def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128:$src))),
+          (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v2f64 VR128:$src), 1)>;
+def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128:$src))),
+          (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v4f32 VR128:$src), 1)>;
+}
+
+let Predicates = [HasAVX1Only] in {
+def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128:$src))),
+          (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v2i64 VR128:$src), 1)>;
+def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128:$src))),
+          (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v4i32 VR128:$src), 1)>;
+def : Pat<(v16i16 (X86SubVBroadcast (v8i16 VR128:$src))),
+          (VINSERTF128rr (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v8i16 VR128:$src), 1)>;
+def : Pat<(v32i8 (X86SubVBroadcast (v16i8 VR128:$src))),
+          (VINSERTF128rr (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm),
+                         (v16i8 VR128:$src), 1)>;
+}
+
 //===----------------------------------------------------------------------===//
 // Variable Bit Shifts
 //
@@ -8758,23 +8604,35 @@ let Predicates = [HasAVX2, NoVLX] in {
   defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>;
   defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W;
   defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>;
-  let isCodeGenOnly = 1 in
-    defm VPSRAVD_Int : avx2_var_shift<0x46, "vpsravd", X86vsrav, v4i32, v8i32>;
+
+  def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)),
+            (VPSRAVDrr VR128:$src1, VR128:$src2)>;
+  def : Pat<(v4i32 (X86vsrav VR128:$src1,
+                    (bitconvert (loadv2i64 addr:$src2)))),
+            (VPSRAVDrm VR128:$src1, addr:$src2)>;
+  def : Pat<(v8i32 (X86vsrav VR256:$src1, VR256:$src2)),
+            (VPSRAVDYrr VR256:$src1, VR256:$src2)>;
+  def : Pat<(v8i32 (X86vsrav VR256:$src1,
+                    (bitconvert (loadv4i64 addr:$src2)))),
+            (VPSRAVDYrm VR256:$src1, addr:$src2)>;
 }
+
+
+
 //===----------------------------------------------------------------------===//
 // VGATHER - GATHER Operations
 multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256,
                        X86MemOperand memop128, X86MemOperand memop256> {
-  def rm  : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb),
+  def rm  : AVX28I<opc, MRMSrcMem4VOp3, (outs VR128:$dst, VR128:$mask_wb),
             (ins VR128:$src1, memop128:$src2, VR128:$mask),
             !strconcat(OpcodeStr,
               "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
-            []>, VEX_4VOp3;
-  def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb),
+            []>, VEX;
+  def Yrm : AVX28I<opc, MRMSrcMem4VOp3, (outs RC256:$dst, RC256:$mask_wb),
             (ins RC256:$src1, memop256:$src2, RC256:$mask),
             !strconcat(OpcodeStr,
               "\t{$mask, $src2, $dst|$dst, $src2, $mask}"),
-            []>, VEX_4VOp3, VEX_L;
+            []>, VEX, VEX_L;
 }
 
 let mayLoad = 1, hasSideEffects = 0, Constraints
diff --git a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
index c1df9780a0e0..e2be73532157 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td
@@ -591,37 +591,38 @@ def ROR64rCL : RI<0xD3, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
 
 def ROR8ri   : Ii8<0xC0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1, u8imm:$src2),
                    "ror{b}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR8:$dst, (rotr GR8:$src1, (i8 imm:$src2)))], IIC_SR>;
+                   [(set GR8:$dst, (rotr GR8:$src1, (i8 relocImm:$src2)))],
+                   IIC_SR>;
 def ROR16ri  : Ii8<0xC1, MRM1r, (outs GR16:$dst), (ins GR16:$src1, u8imm:$src2),
                    "ror{w}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR16:$dst, (rotr GR16:$src1, (i8 imm:$src2)))],
+                   [(set GR16:$dst, (rotr GR16:$src1, (i8 relocImm:$src2)))],
                    IIC_SR>, OpSize16;
 def ROR32ri  : Ii8<0xC1, MRM1r, (outs GR32:$dst), (ins GR32:$src1, u8imm:$src2),
                    "ror{l}\t{$src2, $dst|$dst, $src2}",
-                   [(set GR32:$dst, (rotr GR32:$src1, (i8 imm:$src2)))],
+                   [(set GR32:$dst, (rotr GR32:$src1, (i8 relocImm:$src2)))],
                    IIC_SR>, OpSize32;
 def ROR64ri  : RIi8<0xC1, MRM1r, (outs GR64:$dst),
                     (ins GR64:$src1, u8imm:$src2),
                     "ror{q}\t{$src2, $dst|$dst, $src2}",
-                    [(set GR64:$dst, (rotr GR64:$src1, (i8 imm:$src2)))],
+                    [(set GR64:$dst, (rotr GR64:$src1, (i8 relocImm:$src2)))],
                     IIC_SR>;
 
 // Rotate by 1
 def ROR8r1   : I<0xD0, MRM1r, (outs GR8 :$dst), (ins GR8 :$src1),
                  "ror{b}\t$dst",
-                 [(set GR8:$dst, (rotr GR8:$src1, (i8 1)))],
+                 [(set GR8:$dst, (rotl GR8:$src1, (i8 7)))],
                  IIC_SR>;
 def ROR16r1  : I<0xD1, MRM1r, (outs GR16:$dst), (ins GR16:$src1),
                  "ror{w}\t$dst",
-                 [(set GR16:$dst, (rotr GR16:$src1, (i8 1)))],
+                 [(set GR16:$dst, (rotl GR16:$src1, (i8 15)))],
                  IIC_SR>, OpSize16;
 def ROR32r1  : I<0xD1, MRM1r, (outs GR32:$dst), (ins GR32:$src1),
                  "ror{l}\t$dst",
-                 [(set GR32:$dst, (rotr GR32:$src1, (i8 1)))],
+                 [(set GR32:$dst, (rotl GR32:$src1, (i8 31)))],
                  IIC_SR>, OpSize32;
 def ROR64r1  : RI<0xD1, MRM1r, (outs GR64:$dst), (ins GR64:$src1),
                   "ror{q}\t$dst",
-                  [(set GR64:$dst, (rotr GR64:$src1, (i8 1)))],
+                  [(set GR64:$dst, (rotl GR64:$src1, (i8 63)))],
                   IIC_SR>;
 } // Constraints = "$src = $dst", SchedRW
 
@@ -873,19 +874,19 @@ let hasSideEffects = 0 in {
 
 multiclass bmi_shift<string asm, RegisterClass RC, X86MemOperand x86memop> {
 let hasSideEffects = 0 in {
-  def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2),
+  def rr : I<0xF7, MRMSrcReg4VOp3, (outs RC:$dst), (ins RC:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX_4VOp3, Sched<[WriteShift]>;
+             VEX, Sched<[WriteShift]>;
   let mayLoad = 1 in
-  def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
+  def rm : I<0xF7, MRMSrcMem4VOp3,
+             (outs RC:$dst), (ins x86memop:$src1, RC:$src2),
              !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>,
-             VEX_4VOp3,
-             Sched<[WriteShiftLd,
-                    // x86memop:$src1
-                    ReadDefault, ReadDefault, ReadDefault, ReadDefault,
-                    ReadDefault,
-                    // RC:$src1
-                    ReadAfterLd]>;
+             VEX, Sched<[WriteShiftLd,
+                         // x86memop:$src1
+                         ReadDefault, ReadDefault, ReadDefault, ReadDefault,
+                         ReadDefault,
+                         // RC:$src1
+                         ReadAfterLd]>;
 }
 }
 
diff --git a/contrib/llvm/lib/Target/X86/X86InstrSystem.td b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
index 6667bd2aec4a..9265d64b3230 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrSystem.td
@@ -23,7 +23,7 @@ let Defs = [RAX, RCX, RDX] in
 
 // CPU flow control instructions
 
-let isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in {
+let mayLoad = 1, mayStore = 0, hasSideEffects = 1 in {
   def TRAP    : I<0x0B, RawFrm, (outs), (ins), "ud2", [(trap)]>, TB;
   def UD2B    : I<0xB9, RawFrm, (outs), (ins), "ud2b", []>, TB;
 }
@@ -481,8 +481,11 @@ let Defs = [EDX, EAX], Uses = [ECX] in
   def XGETBV : I<0x01, MRM_D0, (outs), (ins), "xgetbv", []>, TB;
 
 let Uses = [EDX, EAX, ECX] in
-  def XSETBV : I<0x01, MRM_D1, (outs), (ins), "xsetbv", []>, TB;
-}
+  def XSETBV : I<0x01, MRM_D1, (outs), (ins), 
+                "xsetbv", 
+                [(int_x86_xsetbv ECX, EDX, EAX)]>, TB;
+
+} // HasXSAVE
 
 let Uses = [EDX, EAX] in {
 let Predicates = [HasXSAVE] in {
diff --git a/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
new file mode 100755
index 000000000000..5d2af829028a
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InstrTablesInfo.h
@@ -0,0 +1,1148 @@
+//===-- X86AVX512Info.h - X86 Instruction Tables Information ----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains related X86 Instruction Information Tables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H
+#define LLVM_LIB_TARGET_X86_X86INSTRTABLESINFO_H
+
+using namespace llvm;
+
+struct X86EvexToVexCompressTableEntry {
+  uint16_t EvexOpcode;
+  uint16_t VexOpcode;
+};
+
+
+
+// X86 EVEX encoded instructions that have a VEX 128 encoding
+// (table format: <EVEX opcode, VEX-128 opcode>).
+static const X86EvexToVexCompressTableEntry 
+   X86EvexToVex128CompressTable[] = {
+  // EVEX scalar with corresponding VEX.
+  { X86::Int_VCOMISDZrm         ,  X86::Int_VCOMISDrm            },
+  { X86::Int_VCOMISDZrr         ,  X86::Int_VCOMISDrr            },
+  { X86::Int_VCOMISSZrm         ,  X86::Int_VCOMISSrm            },
+  { X86::Int_VCOMISSZrr         ,  X86::Int_VCOMISSrr            },
+  { X86::Int_VUCOMISDZrm        ,  X86::Int_VUCOMISDrm           },
+  { X86::Int_VUCOMISDZrr        ,  X86::Int_VUCOMISDrr           },
+  { X86::Int_VUCOMISSZrm        ,  X86::Int_VUCOMISSrm           },
+  { X86::Int_VUCOMISSZrr        ,  X86::Int_VUCOMISSrr           },
+  { X86::VADDSDZrm              ,  X86::VADDSDrm                 },
+  { X86::VADDSDZrm_Int          ,  X86::VADDSDrm_Int             },
+  { X86::VADDSDZrr              ,  X86::VADDSDrr                 },
+  { X86::VADDSDZrr_Int          ,  X86::VADDSDrr_Int             },
+  { X86::VADDSSZrm              ,  X86::VADDSSrm                 },
+  { X86::VADDSSZrm_Int          ,  X86::VADDSSrm_Int             },
+  { X86::VADDSSZrr              ,  X86::VADDSSrr                 },
+  { X86::VADDSSZrr_Int          ,  X86::VADDSSrr_Int             },
+  { X86::VCOMISDZrm             ,  X86::VCOMISDrm                },
+  { X86::VCOMISDZrr             ,  X86::VCOMISDrr                },
+  { X86::VCOMISSZrm             ,  X86::VCOMISSrm                },
+  { X86::VCOMISSZrr             ,  X86::VCOMISSrr                },
+  { X86::VCVTSD2SI64Zrm         ,  X86::VCVTSD2SI64rm            },
+  { X86::VCVTSD2SI64Zrr         ,  X86::VCVTSD2SI64rr            },
+  { X86::VCVTSD2SIZrm           ,  X86::VCVTSD2SIrm              },
+  { X86::VCVTSD2SIZrr           ,  X86::VCVTSD2SIrr              },
+  { X86::VCVTSD2SSZrm           ,  X86::VCVTSD2SSrm              },
+  { X86::VCVTSD2SSZrr           ,  X86::VCVTSD2SSrr              },
+  { X86::VCVTSI2SDZrm           ,  X86::VCVTSI2SDrm              },
+  { X86::VCVTSI2SDZrm_Int       ,  X86::Int_VCVTSI2SDrm          },
+  { X86::VCVTSI2SDZrr           ,  X86::VCVTSI2SDrr              },
+  { X86::VCVTSI2SDZrr_Int       ,  X86::Int_VCVTSI2SDrr          },
+  { X86::VCVTSI2SSZrm           ,  X86::VCVTSI2SSrm              },
+  { X86::VCVTSI2SSZrm_Int       ,  X86::Int_VCVTSI2SSrm          },
+  { X86::VCVTSI2SSZrr           ,  X86::VCVTSI2SSrr              },
+  { X86::VCVTSI2SSZrr_Int       ,  X86::Int_VCVTSI2SSrr          },
+  { X86::VCVTSS2SDZrm           ,  X86::VCVTSS2SDrm              },
+  { X86::VCVTSS2SDZrr           ,  X86::VCVTSS2SDrr              },
+  { X86::VCVTSS2SI64Zrm         ,  X86::VCVTSS2SI64rm            },
+  { X86::VCVTSS2SI64Zrr         ,  X86::VCVTSS2SI64rr            },
+  { X86::VCVTSS2SIZrm           ,  X86::VCVTSS2SIrm              },
+  { X86::VCVTSS2SIZrr           ,  X86::VCVTSS2SIrr              },
+  { X86::VCVTTSD2SI64Zrm        ,  X86::VCVTTSD2SI64rm           },
+  { X86::VCVTTSD2SI64Zrm_Int    ,  X86::Int_VCVTTSD2SI64rm       },
+  { X86::VCVTTSD2SI64Zrr        ,  X86::VCVTTSD2SI64rr           },
+  { X86::VCVTTSD2SI64Zrr_Int    ,  X86::Int_VCVTTSD2SI64rr       },
+  { X86::VCVTTSD2SIZrm          ,  X86::VCVTTSD2SIrm             },
+  { X86::VCVTTSD2SIZrm_Int      ,  X86::Int_VCVTTSD2SIrm         },
+  { X86::VCVTTSD2SIZrr          ,  X86::VCVTTSD2SIrr             },
+  { X86::VCVTTSD2SIZrr_Int      ,  X86::Int_VCVTTSD2SIrr         },
+  { X86::VCVTTSS2SI64Zrm        ,  X86::VCVTTSS2SI64rm           },
+  { X86::VCVTTSS2SI64Zrm_Int    ,  X86::Int_VCVTTSS2SI64rm       },
+  { X86::VCVTTSS2SI64Zrr        ,  X86::VCVTTSS2SI64rr           },
+  { X86::VCVTTSS2SI64Zrr_Int    ,  X86::Int_VCVTTSS2SI64rr       },
+  { X86::VCVTTSS2SIZrm          ,  X86::VCVTTSS2SIrm             },
+  { X86::VCVTTSS2SIZrm_Int      ,  X86::Int_VCVTTSS2SIrm         },
+  { X86::VCVTTSS2SIZrr          ,  X86::VCVTTSS2SIrr             },
+  { X86::VCVTTSS2SIZrr_Int      ,  X86::Int_VCVTTSS2SIrr         },
+  { X86::VDIVSDZrm              ,  X86::VDIVSDrm                 },
+  { X86::VDIVSDZrm_Int          ,  X86::VDIVSDrm_Int             },
+  { X86::VDIVSDZrr              ,  X86::VDIVSDrr                 },
+  { X86::VDIVSDZrr_Int          ,  X86::VDIVSDrr_Int             },
+  { X86::VDIVSSZrm              ,  X86::VDIVSSrm                 },
+  { X86::VDIVSSZrm_Int          ,  X86::VDIVSSrm_Int             },
+  { X86::VDIVSSZrr              ,  X86::VDIVSSrr                 },
+  { X86::VDIVSSZrr_Int          ,  X86::VDIVSSrr_Int             },
+  { X86::VFMADD132SDZm          ,  X86::VFMADD132SDm             },
+  { X86::VFMADD132SDZm_Int      ,  X86::VFMADD132SDm_Int         },
+  { X86::VFMADD132SDZr          ,  X86::VFMADD132SDr             },
+  { X86::VFMADD132SDZr_Int      ,  X86::VFMADD132SDr_Int         },
+  { X86::VFMADD132SSZm          ,  X86::VFMADD132SSm             },
+  { X86::VFMADD132SSZm_Int      ,  X86::VFMADD132SSm_Int         },
+  { X86::VFMADD132SSZr          ,  X86::VFMADD132SSr             },
+  { X86::VFMADD132SSZr_Int      ,  X86::VFMADD132SSr_Int         },
+  { X86::VFMADD213SDZm          ,  X86::VFMADD213SDm             },
+  { X86::VFMADD213SDZm_Int      ,  X86::VFMADD213SDm_Int         },
+  { X86::VFMADD213SDZr          ,  X86::VFMADD213SDr             },
+  { X86::VFMADD213SDZr_Int      ,  X86::VFMADD213SDr_Int         },
+  { X86::VFMADD213SSZm          ,  X86::VFMADD213SSm             },
+  { X86::VFMADD213SSZm_Int      ,  X86::VFMADD213SSm_Int         },
+  { X86::VFMADD213SSZr          ,  X86::VFMADD213SSr             },
+  { X86::VFMADD213SSZr_Int      ,  X86::VFMADD213SSr_Int         },
+  { X86::VFMADD231SDZm          ,  X86::VFMADD231SDm             },
+  { X86::VFMADD231SDZm_Int      ,  X86::VFMADD231SDm_Int         },
+  { X86::VFMADD231SDZr          ,  X86::VFMADD231SDr             },
+  { X86::VFMADD231SDZr_Int      ,  X86::VFMADD231SDr_Int         },
+  { X86::VFMADD231SSZm          ,  X86::VFMADD231SSm             },
+  { X86::VFMADD231SSZm_Int      ,  X86::VFMADD231SSm_Int         },
+  { X86::VFMADD231SSZr          ,  X86::VFMADD231SSr             },
+  { X86::VFMADD231SSZr_Int      ,  X86::VFMADD231SSr_Int         },
+  { X86::VFMSUB132SDZm          ,  X86::VFMSUB132SDm             },
+  { X86::VFMSUB132SDZm_Int      ,  X86::VFMSUB132SDm_Int         },
+  { X86::VFMSUB132SDZr          ,  X86::VFMSUB132SDr             },
+  { X86::VFMSUB132SDZr_Int      ,  X86::VFMSUB132SDr_Int         },
+  { X86::VFMSUB132SSZm          ,  X86::VFMSUB132SSm             },
+  { X86::VFMSUB132SSZm_Int      ,  X86::VFMSUB132SSm_Int         },
+  { X86::VFMSUB132SSZr          ,  X86::VFMSUB132SSr             },
+  { X86::VFMSUB132SSZr_Int      ,  X86::VFMSUB132SSr_Int         },
+  { X86::VFMSUB213SDZm          ,  X86::VFMSUB213SDm             },
+  { X86::VFMSUB213SDZm_Int      ,  X86::VFMSUB213SDm_Int         },
+  { X86::VFMSUB213SDZr          ,  X86::VFMSUB213SDr             },
+  { X86::VFMSUB213SDZr_Int      ,  X86::VFMSUB213SDr_Int         },
+  { X86::VFMSUB213SSZm          ,  X86::VFMSUB213SSm             },
+  { X86::VFMSUB213SSZm_Int      ,  X86::VFMSUB213SSm_Int         },
+  { X86::VFMSUB213SSZr          ,  X86::VFMSUB213SSr             },
+  { X86::VFMSUB213SSZr_Int      ,  X86::VFMSUB213SSr_Int         },
+  { X86::VFMSUB231SDZm          ,  X86::VFMSUB231SDm             },
+  { X86::VFMSUB231SDZm_Int      ,  X86::VFMSUB231SDm_Int         },
+  { X86::VFMSUB231SDZr          ,  X86::VFMSUB231SDr             },
+  { X86::VFMSUB231SDZr_Int      ,  X86::VFMSUB231SDr_Int         },
+  { X86::VFMSUB231SSZm          ,  X86::VFMSUB231SSm             },
+  { X86::VFMSUB231SSZm_Int      ,  X86::VFMSUB231SSm_Int         },
+  { X86::VFMSUB231SSZr          ,  X86::VFMSUB231SSr             },
+  { X86::VFMSUB231SSZr_Int      ,  X86::VFMSUB231SSr_Int         },
+  { X86::VFNMADD132SDZm         ,  X86::VFNMADD132SDm            },
+  { X86::VFNMADD132SDZm_Int     ,  X86::VFNMADD132SDm_Int        },
+  { X86::VFNMADD132SDZr         ,  X86::VFNMADD132SDr            },
+  { X86::VFNMADD132SDZr_Int     ,  X86::VFNMADD132SDr_Int        },
+  { X86::VFNMADD132SSZm         ,  X86::VFNMADD132SSm            },
+  { X86::VFNMADD132SSZm_Int     ,  X86::VFNMADD132SSm_Int        },
+  { X86::VFNMADD132SSZr         ,  X86::VFNMADD132SSr            },
+  { X86::VFNMADD132SSZr_Int     ,  X86::VFNMADD132SSr_Int        },
+  { X86::VFNMADD213SDZm         ,  X86::VFNMADD213SDm            },
+  { X86::VFNMADD213SDZm_Int     ,  X86::VFNMADD213SDm_Int        },
+  { X86::VFNMADD213SDZr         ,  X86::VFNMADD213SDr            },
+  { X86::VFNMADD213SDZr_Int     ,  X86::VFNMADD213SDr_Int        },
+  { X86::VFNMADD213SSZm         ,  X86::VFNMADD213SSm            },
+  { X86::VFNMADD213SSZm_Int     ,  X86::VFNMADD213SSm_Int        },
+  { X86::VFNMADD213SSZr         ,  X86::VFNMADD213SSr            },
+  { X86::VFNMADD213SSZr_Int     ,  X86::VFNMADD213SSr_Int        },
+  { X86::VFNMADD231SDZm         ,  X86::VFNMADD231SDm            },
+  { X86::VFNMADD231SDZm_Int     ,  X86::VFNMADD231SDm_Int        },
+  { X86::VFNMADD231SDZr         ,  X86::VFNMADD231SDr            },
+  { X86::VFNMADD231SDZr_Int     ,  X86::VFNMADD231SDr_Int        },
+  { X86::VFNMADD231SSZm         ,  X86::VFNMADD231SSm            },
+  { X86::VFNMADD231SSZm_Int     ,  X86::VFNMADD231SSm_Int        },
+  { X86::VFNMADD231SSZr         ,  X86::VFNMADD231SSr            },
+  { X86::VFNMADD231SSZr_Int     ,  X86::VFNMADD231SSr_Int        },
+  { X86::VFNMSUB132SDZm         ,  X86::VFNMSUB132SDm            },
+  { X86::VFNMSUB132SDZm_Int     ,  X86::VFNMSUB132SDm_Int        },
+  { X86::VFNMSUB132SDZr         ,  X86::VFNMSUB132SDr            },
+  { X86::VFNMSUB132SDZr_Int     ,  X86::VFNMSUB132SDr_Int        },
+  { X86::VFNMSUB132SSZm         ,  X86::VFNMSUB132SSm            },
+  { X86::VFNMSUB132SSZm_Int     ,  X86::VFNMSUB132SSm_Int        },
+  { X86::VFNMSUB132SSZr         ,  X86::VFNMSUB132SSr            },
+  { X86::VFNMSUB132SSZr_Int     ,  X86::VFNMSUB132SSr_Int        },
+  { X86::VFNMSUB213SDZm         ,  X86::VFNMSUB213SDm            },
+  { X86::VFNMSUB213SDZm_Int     ,  X86::VFNMSUB213SDm_Int        },
+  { X86::VFNMSUB213SDZr         ,  X86::VFNMSUB213SDr            },
+  { X86::VFNMSUB213SDZr_Int     ,  X86::VFNMSUB213SDr_Int        },
+  { X86::VFNMSUB213SSZm         ,  X86::VFNMSUB213SSm            },
+  { X86::VFNMSUB213SSZm_Int     ,  X86::VFNMSUB213SSm_Int        },
+  { X86::VFNMSUB213SSZr         ,  X86::VFNMSUB213SSr            },
+  { X86::VFNMSUB213SSZr_Int     ,  X86::VFNMSUB213SSr_Int        },
+  { X86::VFNMSUB231SDZm         ,  X86::VFNMSUB231SDm            },
+  { X86::VFNMSUB231SDZm_Int     ,  X86::VFNMSUB231SDm_Int        },
+  { X86::VFNMSUB231SDZr         ,  X86::VFNMSUB231SDr            },
+  { X86::VFNMSUB231SDZr_Int     ,  X86::VFNMSUB231SDr_Int        },
+  { X86::VFNMSUB231SSZm         ,  X86::VFNMSUB231SSm            },
+  { X86::VFNMSUB231SSZm_Int     ,  X86::VFNMSUB231SSm_Int        },
+  { X86::VFNMSUB231SSZr         ,  X86::VFNMSUB231SSr            },
+  { X86::VFNMSUB231SSZr_Int     ,  X86::VFNMSUB231SSr_Int        },
+  { X86::VMAXCSDZrm             ,  X86::VMAXCSDrm                },
+  { X86::VMAXCSDZrr             ,  X86::VMAXCSDrr                },
+  { X86::VMAXCSSZrm             ,  X86::VMAXCSSrm                },
+  { X86::VMAXCSSZrr             ,  X86::VMAXCSSrr                },
+  { X86::VMAXSDZrm              ,  X86::VMAXSDrm                 },
+  { X86::VMAXSDZrm_Int          ,  X86::VMAXSDrm_Int             },
+  { X86::VMAXSDZrr              ,  X86::VMAXSDrr                 },
+  { X86::VMAXSDZrr_Int          ,  X86::VMAXSDrr_Int             },
+  { X86::VMAXSSZrm              ,  X86::VMAXSSrm                 },
+  { X86::VMAXSSZrm_Int          ,  X86::VMAXSSrm_Int             },
+  { X86::VMAXSSZrr              ,  X86::VMAXSSrr                 },
+  { X86::VMAXSSZrr_Int          ,  X86::VMAXSSrr_Int             },
+  { X86::VMINCSDZrm             ,  X86::VMINCSDrm                },
+  { X86::VMINCSDZrr             ,  X86::VMINCSDrr                },
+  { X86::VMINCSSZrm             ,  X86::VMINCSSrm                },
+  { X86::VMINCSSZrr             ,  X86::VMINCSSrr                },
+  { X86::VMINSDZrm              ,  X86::VMINSDrm                 },
+  { X86::VMINSDZrm_Int          ,  X86::VMINSDrm_Int             },
+  { X86::VMINSDZrr              ,  X86::VMINSDrr                 },
+  { X86::VMINSDZrr_Int          ,  X86::VMINSDrr_Int             },
+  { X86::VMINSSZrm              ,  X86::VMINSSrm                 },
+  { X86::VMINSSZrm_Int          ,  X86::VMINSSrm_Int             },
+  { X86::VMINSSZrr              ,  X86::VMINSSrr                 },
+  { X86::VMINSSZrr_Int          ,  X86::VMINSSrr_Int             },
+  { X86::VMOV64toSDZrr          ,  X86::VMOV64toSDrr             },
+  { X86::VMOVDI2SSZrm           ,  X86::VMOVDI2SSrm              },
+  { X86::VMOVDI2SSZrr           ,  X86::VMOVDI2SSrr              },
+  { X86::VMOVSDZmr              ,  X86::VMOVSDmr                 },
+  { X86::VMOVSDZrm              ,  X86::VMOVSDrm                 },
+  { X86::VMOVSDZrr              ,  X86::VMOVSDrr                 },
+  { X86::VMOVSSZmr              ,  X86::VMOVSSmr                 },
+  { X86::VMOVSSZrm              ,  X86::VMOVSSrm                 },
+  { X86::VMOVSSZrr              ,  X86::VMOVSSrr                 },
+  { X86::VMOVSSZrr_REV          ,  X86::VMOVSSrr_REV             },
+  { X86::VMULSDZrm              ,  X86::VMULSDrm                 },
+  { X86::VMULSDZrm_Int          ,  X86::VMULSDrm_Int             },
+  { X86::VMULSDZrr              ,  X86::VMULSDrr                 },
+  { X86::VMULSDZrr_Int          ,  X86::VMULSDrr_Int             },
+  { X86::VMULSSZrm              ,  X86::VMULSSrm                 },
+  { X86::VMULSSZrm_Int          ,  X86::VMULSSrm_Int             },
+  { X86::VMULSSZrr              ,  X86::VMULSSrr                 },
+  { X86::VMULSSZrr_Int          ,  X86::VMULSSrr_Int             },
+  { X86::VSQRTSDZm              ,  X86::VSQRTSDm                 },
+  { X86::VSQRTSDZm_Int          ,  X86::VSQRTSDm_Int             },
+  { X86::VSQRTSDZr              ,  X86::VSQRTSDr                 },
+  { X86::VSQRTSDZr_Int          ,  X86::VSQRTSDr_Int             },
+  { X86::VSQRTSSZm              ,  X86::VSQRTSSm                 },
+  { X86::VSQRTSSZm_Int          ,  X86::VSQRTSSm_Int             },
+  { X86::VSQRTSSZr              ,  X86::VSQRTSSr                 },
+  { X86::VSQRTSSZr_Int          ,  X86::VSQRTSSr_Int             },
+  { X86::VSUBSDZrm              ,  X86::VSUBSDrm                 },
+  { X86::VSUBSDZrm_Int          ,  X86::VSUBSDrm_Int             },
+  { X86::VSUBSDZrr              ,  X86::VSUBSDrr                 },
+  { X86::VSUBSDZrr_Int          ,  X86::VSUBSDrr_Int             },
+  { X86::VSUBSSZrm              ,  X86::VSUBSSrm                 },
+  { X86::VSUBSSZrm_Int          ,  X86::VSUBSSrm_Int             },
+  { X86::VSUBSSZrr              ,  X86::VSUBSSrr                 },
+  { X86::VSUBSSZrr_Int          ,  X86::VSUBSSrr_Int             },
+  { X86::VUCOMISDZrm            ,  X86::VUCOMISDrm               },
+  { X86::VUCOMISDZrr            ,  X86::VUCOMISDrr               },
+  { X86::VUCOMISSZrm            ,  X86::VUCOMISSrm               },
+  { X86::VUCOMISSZrr            ,  X86::VUCOMISSrr               },
+                                                                               
+  { X86::VMOV64toPQIZrr         ,   X86::VMOV64toPQIrr           },
+  { X86::VMOV64toSDZrr          ,   X86::VMOV64toSDrr            },
+  { X86::VMOVDI2PDIZrm          ,   X86::VMOVDI2PDIrm            },
+  { X86::VMOVDI2PDIZrr          ,   X86::VMOVDI2PDIrr            },
+  { X86::VMOVLHPSZrr            ,   X86::VMOVLHPSrr              },
+  { X86::VMOVHLPSZrr            ,   X86::VMOVHLPSrr              },  
+  { X86::VMOVPDI2DIZmr          ,   X86::VMOVPDI2DImr            },
+  { X86::VMOVPDI2DIZrr          ,   X86::VMOVPDI2DIrr            },
+  { X86::VMOVPQI2QIZmr          ,   X86::VMOVPQI2QImr            },
+  { X86::VMOVPQIto64Zrr         ,   X86::VMOVPQIto64rr           },
+  { X86::VMOVQI2PQIZrm          ,   X86::VMOVQI2PQIrm            },
+  { X86::VMOVZPQILo2PQIZrr      ,   X86::VMOVZPQILo2PQIrr        },
+                                                                               
+  { X86::VPEXTRBZmr             ,   X86::VPEXTRBmr               },
+  { X86::VPEXTRBZrr             ,   X86::VPEXTRBrr               },
+  { X86::VPEXTRDZmr             ,   X86::VPEXTRDmr               },
+  { X86::VPEXTRDZrr             ,   X86::VPEXTRDrr               },
+  { X86::VPEXTRQZmr             ,   X86::VPEXTRQmr               },
+  { X86::VPEXTRQZrr             ,   X86::VPEXTRQrr               },
+  { X86::VPEXTRWZmr             ,   X86::VPEXTRWmr               },
+  { X86::VPEXTRWZrr             ,   X86::VPEXTRWri               },
+                                                                               
+  { X86::VPINSRBZrm             ,   X86::VPINSRBrm               },
+  { X86::VPINSRBZrr             ,   X86::VPINSRBrr               },
+  { X86::VPINSRDZrm             ,   X86::VPINSRDrm               },
+  { X86::VPINSRDZrr             ,   X86::VPINSRDrr               },
+  { X86::VPINSRQZrm             ,   X86::VPINSRQrm               },
+  { X86::VPINSRQZrr             ,   X86::VPINSRQrr               },
+  { X86::VPINSRWZrm             ,   X86::VPINSRWrmi              },
+  { X86::VPINSRWZrr             ,   X86::VPINSRWrri              },
+
+  // EVEX 128 with corresponding VEX.
+  { X86::VADDPDZ128rm           ,    X86::VADDPDrm               },
+  { X86::VADDPDZ128rr           ,    X86::VADDPDrr               },
+  { X86::VADDPSZ128rm           ,    X86::VADDPSrm               },
+  { X86::VADDPSZ128rr           ,    X86::VADDPSrr               },
+  { X86::VANDNPDZ128rm          ,    X86::VANDNPDrm              },
+  { X86::VANDNPDZ128rr          ,    X86::VANDNPDrr              },
+  { X86::VANDNPSZ128rm          ,    X86::VANDNPSrm              },
+  { X86::VANDNPSZ128rr          ,    X86::VANDNPSrr              },
+  { X86::VANDPDZ128rm           ,    X86::VANDPDrm               },
+  { X86::VANDPDZ128rr           ,    X86::VANDPDrr               },
+  { X86::VANDPSZ128rm           ,    X86::VANDPSrm               },
+  { X86::VANDPSZ128rr           ,    X86::VANDPSrr               },      
+  { X86::VBROADCASTSSZ128m      ,    X86::VBROADCASTSSrm         },
+  { X86::VBROADCASTSSZ128r      ,    X86::VBROADCASTSSrr         },
+  { X86::VBROADCASTSSZ128r_s    ,    X86::VBROADCASTSSrr         },
+  { X86::VCVTDQ2PDZ128rm        ,    X86::VCVTDQ2PDrm            },
+  { X86::VCVTDQ2PDZ128rr        ,    X86::VCVTDQ2PDrr            },
+  { X86::VCVTDQ2PSZ128rm        ,    X86::VCVTDQ2PSrm            },
+  { X86::VCVTDQ2PSZ128rr        ,    X86::VCVTDQ2PSrr            },
+  { X86::VCVTPD2DQZ128rm        ,    X86::VCVTPD2DQrm            },
+  { X86::VCVTPD2DQZ128rr        ,    X86::VCVTPD2DQrr            },
+  { X86::VCVTPD2PSZ128rm        ,    X86::VCVTPD2PSrm            },
+  { X86::VCVTPD2PSZ128rr        ,    X86::VCVTPD2PSrr            },
+  { X86::VCVTPH2PSZ128rm        ,    X86::VCVTPH2PSrm            },
+  { X86::VCVTPH2PSZ128rr        ,    X86::VCVTPH2PSrr            },
+  { X86::VCVTPS2DQZ128rm        ,    X86::VCVTPS2DQrm            },
+  { X86::VCVTPS2DQZ128rr        ,    X86::VCVTPS2DQrr            },
+  { X86::VCVTPS2PDZ128rm        ,    X86::VCVTPS2PDrm            },
+  { X86::VCVTPS2PDZ128rr        ,    X86::VCVTPS2PDrr            },
+  { X86::VCVTPS2PHZ128mr        ,    X86::VCVTPS2PHmr            },
+  { X86::VCVTPS2PHZ128rr        ,    X86::VCVTPS2PHrr            },
+  { X86::VCVTTPD2DQZ128rm       ,    X86::VCVTTPD2DQrm           },
+  { X86::VCVTTPD2DQZ128rr       ,    X86::VCVTTPD2DQrr           },
+  { X86::VCVTTPS2DQZ128rm       ,    X86::VCVTTPS2DQrm           },
+  { X86::VCVTTPS2DQZ128rr       ,    X86::VCVTTPS2DQrr           },
+  { X86::VDIVPDZ128rm           ,    X86::VDIVPDrm               },
+  { X86::VDIVPDZ128rr           ,    X86::VDIVPDrr               },
+  { X86::VDIVPSZ128rm           ,    X86::VDIVPSrm               },
+  { X86::VDIVPSZ128rr           ,    X86::VDIVPSrr               },
+  { X86::VFMADD132PDZ128m       ,    X86::VFMADD132PDm           },
+  { X86::VFMADD132PDZ128r       ,    X86::VFMADD132PDr           },
+  { X86::VFMADD132PSZ128m       ,    X86::VFMADD132PSm           },
+  { X86::VFMADD132PSZ128r       ,    X86::VFMADD132PSr           },
+  { X86::VFMADD213PDZ128m       ,    X86::VFMADD213PDm           },
+  { X86::VFMADD213PDZ128r       ,    X86::VFMADD213PDr           },
+  { X86::VFMADD213PSZ128m       ,    X86::VFMADD213PSm           },
+  { X86::VFMADD213PSZ128r       ,    X86::VFMADD213PSr           },
+  { X86::VFMADD231PDZ128m       ,    X86::VFMADD231PDm           },
+  { X86::VFMADD231PDZ128r       ,    X86::VFMADD231PDr           },
+  { X86::VFMADD231PSZ128m       ,    X86::VFMADD231PSm           },
+  { X86::VFMADD231PSZ128r       ,    X86::VFMADD231PSr           },
+  { X86::VFMADDSUB132PDZ128m    ,    X86::VFMADDSUB132PDm        },
+  { X86::VFMADDSUB132PDZ128r    ,    X86::VFMADDSUB132PDr        },
+  { X86::VFMADDSUB132PSZ128m    ,    X86::VFMADDSUB132PSm        },
+  { X86::VFMADDSUB132PSZ128r    ,    X86::VFMADDSUB132PSr        },
+  { X86::VFMADDSUB213PDZ128m    ,    X86::VFMADDSUB213PDm        },
+  { X86::VFMADDSUB213PDZ128r    ,    X86::VFMADDSUB213PDr        },
+  { X86::VFMADDSUB213PSZ128m    ,    X86::VFMADDSUB213PSm        },
+  { X86::VFMADDSUB213PSZ128r    ,    X86::VFMADDSUB213PSr        },
+  { X86::VFMADDSUB231PDZ128m    ,    X86::VFMADDSUB231PDm        },
+  { X86::VFMADDSUB231PDZ128r    ,    X86::VFMADDSUB231PDr        },
+  { X86::VFMADDSUB231PSZ128m    ,    X86::VFMADDSUB231PSm        },
+  { X86::VFMADDSUB231PSZ128r    ,    X86::VFMADDSUB231PSr        },
+  { X86::VFMSUB132PDZ128m       ,    X86::VFMSUB132PDm           },
+  { X86::VFMSUB132PDZ128r       ,    X86::VFMSUB132PDr           },
+  { X86::VFMSUB132PSZ128m       ,    X86::VFMSUB132PSm           },
+  { X86::VFMSUB132PSZ128r       ,    X86::VFMSUB132PSr           },
+  { X86::VFMSUB213PDZ128m       ,    X86::VFMSUB213PDm           },
+  { X86::VFMSUB213PDZ128r       ,    X86::VFMSUB213PDr           },
+  { X86::VFMSUB213PSZ128m       ,    X86::VFMSUB213PSm           },
+  { X86::VFMSUB213PSZ128r       ,    X86::VFMSUB213PSr           },
+  { X86::VFMSUB231PDZ128m       ,    X86::VFMSUB231PDm           },
+  { X86::VFMSUB231PDZ128r       ,    X86::VFMSUB231PDr           },
+  { X86::VFMSUB231PSZ128m       ,    X86::VFMSUB231PSm           },
+  { X86::VFMSUB231PSZ128r       ,    X86::VFMSUB231PSr           },
+  { X86::VFMSUBADD132PDZ128m    ,    X86::VFMSUBADD132PDm        },
+  { X86::VFMSUBADD132PDZ128r    ,    X86::VFMSUBADD132PDr        },
+  { X86::VFMSUBADD132PSZ128m    ,    X86::VFMSUBADD132PSm        },
+  { X86::VFMSUBADD132PSZ128r    ,    X86::VFMSUBADD132PSr        },
+  { X86::VFMSUBADD213PDZ128m    ,    X86::VFMSUBADD213PDm        },
+  { X86::VFMSUBADD213PDZ128r    ,    X86::VFMSUBADD213PDr        },
+  { X86::VFMSUBADD213PSZ128m    ,    X86::VFMSUBADD213PSm        },
+  { X86::VFMSUBADD213PSZ128r    ,    X86::VFMSUBADD213PSr        },
+  { X86::VFMSUBADD231PDZ128m    ,    X86::VFMSUBADD231PDm        },
+  { X86::VFMSUBADD231PDZ128r    ,    X86::VFMSUBADD231PDr        },
+  { X86::VFMSUBADD231PSZ128m    ,    X86::VFMSUBADD231PSm        },
+  { X86::VFMSUBADD231PSZ128r    ,    X86::VFMSUBADD231PSr        },
+  { X86::VFNMADD132PDZ128m      ,    X86::VFNMADD132PDm          },
+  { X86::VFNMADD132PDZ128r      ,    X86::VFNMADD132PDr          },
+  { X86::VFNMADD132PSZ128m      ,    X86::VFNMADD132PSm          },
+  { X86::VFNMADD132PSZ128r      ,    X86::VFNMADD132PSr          },
+  { X86::VFNMADD213PDZ128m      ,    X86::VFNMADD213PDm          },
+  { X86::VFNMADD213PDZ128r      ,    X86::VFNMADD213PDr          },
+  { X86::VFNMADD213PSZ128m      ,    X86::VFNMADD213PSm          },
+  { X86::VFNMADD213PSZ128r      ,    X86::VFNMADD213PSr          },
+  { X86::VFNMADD231PDZ128m      ,    X86::VFNMADD231PDm          },
+  { X86::VFNMADD231PDZ128r      ,    X86::VFNMADD231PDr          },
+  { X86::VFNMADD231PSZ128m      ,    X86::VFNMADD231PSm          },
+  { X86::VFNMADD231PSZ128r      ,    X86::VFNMADD231PSr          },
+  { X86::VFNMSUB132PDZ128m      ,    X86::VFNMSUB132PDm          },
+  { X86::VFNMSUB132PDZ128r      ,    X86::VFNMSUB132PDr          },
+  { X86::VFNMSUB132PSZ128m      ,    X86::VFNMSUB132PSm          },
+  { X86::VFNMSUB132PSZ128r      ,    X86::VFNMSUB132PSr          },
+  { X86::VFNMSUB213PDZ128m      ,    X86::VFNMSUB213PDm          },
+  { X86::VFNMSUB213PDZ128r      ,    X86::VFNMSUB213PDr          },
+  { X86::VFNMSUB213PSZ128m      ,    X86::VFNMSUB213PSm          },
+  { X86::VFNMSUB213PSZ128r      ,    X86::VFNMSUB213PSr          },
+  { X86::VFNMSUB231PDZ128m      ,    X86::VFNMSUB231PDm          },
+  { X86::VFNMSUB231PDZ128r      ,    X86::VFNMSUB231PDr          },
+  { X86::VFNMSUB231PSZ128m      ,    X86::VFNMSUB231PSm          },
+  { X86::VFNMSUB231PSZ128r      ,    X86::VFNMSUB231PSr          },
+  { X86::VMAXCPDZ128rm          ,    X86::VMAXCPDrm              },
+  { X86::VMAXCPDZ128rr          ,    X86::VMAXCPDrr              },
+  { X86::VMAXCPSZ128rm          ,    X86::VMAXCPSrm              },
+  { X86::VMAXCPSZ128rr          ,    X86::VMAXCPSrr              },
+  { X86::VMAXPDZ128rm           ,    X86::VMAXPDrm               },
+  { X86::VMAXPDZ128rr           ,    X86::VMAXPDrr               },
+  { X86::VMAXPSZ128rm           ,    X86::VMAXPSrm               },
+  { X86::VMAXPSZ128rr           ,    X86::VMAXPSrr               },
+  { X86::VMINCPDZ128rm          ,    X86::VMINCPDrm              },
+  { X86::VMINCPDZ128rr          ,    X86::VMINCPDrr              },
+  { X86::VMINCPSZ128rm          ,    X86::VMINCPSrm              },
+  { X86::VMINCPSZ128rr          ,    X86::VMINCPSrr              },
+  { X86::VMINPDZ128rm           ,    X86::VMINPDrm               },
+  { X86::VMINPDZ128rr           ,    X86::VMINPDrr               },
+  { X86::VMINPSZ128rm           ,    X86::VMINPSrm               },
+  { X86::VMINPSZ128rr           ,    X86::VMINPSrr               },
+  { X86::VMOVAPDZ128mr          ,    X86::VMOVAPDmr              },
+  { X86::VMOVAPDZ128rm          ,    X86::VMOVAPDrm              },
+  { X86::VMOVAPDZ128rr          ,    X86::VMOVAPDrr              },
+  { X86::VMOVAPDZ128rr_REV      ,    X86::VMOVAPDrr_REV          },
+  { X86::VMOVAPSZ128mr          ,    X86::VMOVAPSmr              },    
+  { X86::VMOVAPSZ128rm          ,    X86::VMOVAPSrm              },    
+  { X86::VMOVAPSZ128rr          ,    X86::VMOVAPSrr              },
+  { X86::VMOVAPSZ128rr_REV      ,    X86::VMOVAPSrr_REV          },
+  { X86::VMOVDDUPZ128rm         ,    X86::VMOVDDUPrm             },
+  { X86::VMOVDDUPZ128rr         ,    X86::VMOVDDUPrr             },
+  { X86::VMOVDQA32Z128mr        ,    X86::VMOVDQAmr              },
+  { X86::VMOVDQA32Z128rm        ,    X86::VMOVDQArm              },
+  { X86::VMOVDQA32Z128rr        ,    X86::VMOVDQArr              },
+  { X86::VMOVDQA32Z128rr_REV    ,    X86::VMOVDQArr_REV          },
+  { X86::VMOVDQA64Z128mr        ,    X86::VMOVDQAmr              },
+  { X86::VMOVDQA64Z128rm        ,    X86::VMOVDQArm              },
+  { X86::VMOVDQA64Z128rr        ,    X86::VMOVDQArr              },
+  { X86::VMOVDQA64Z128rr_REV    ,    X86::VMOVDQArr_REV          },
+  { X86::VMOVDQU16Z128mr        ,    X86::VMOVDQUmr              },
+  { X86::VMOVDQU16Z128rm        ,    X86::VMOVDQUrm              },
+  { X86::VMOVDQU16Z128rr        ,    X86::VMOVDQUrr              },
+  { X86::VMOVDQU16Z128rr_REV    ,    X86::VMOVDQUrr_REV          },
+  { X86::VMOVDQU32Z128mr        ,    X86::VMOVDQUmr              },
+  { X86::VMOVDQU32Z128rm        ,    X86::VMOVDQUrm              },
+  { X86::VMOVDQU32Z128rr        ,    X86::VMOVDQUrr              },
+  { X86::VMOVDQU32Z128rr_REV    ,    X86::VMOVDQUrr_REV          },
+  { X86::VMOVDQU64Z128mr        ,    X86::VMOVDQUmr              },
+  { X86::VMOVDQU64Z128rm        ,    X86::VMOVDQUrm              },
+  { X86::VMOVDQU64Z128rr        ,    X86::VMOVDQUrr              },
+  { X86::VMOVDQU64Z128rr_REV    ,    X86::VMOVDQUrr_REV          },
+  { X86::VMOVDQU8Z128mr         ,    X86::VMOVDQUmr              },
+  { X86::VMOVDQU8Z128rm         ,    X86::VMOVDQUrm              },
+  { X86::VMOVDQU8Z128rr         ,    X86::VMOVDQUrr              },
+  { X86::VMOVDQU8Z128rr_REV     ,    X86::VMOVDQUrr_REV          },
+  { X86::VMOVHPDZ128mr          ,    X86::VMOVHPDmr              },
+  { X86::VMOVHPDZ128rm          ,    X86::VMOVHPDrm              },
+  { X86::VMOVHPSZ128mr          ,    X86::VMOVHPSmr              },
+  { X86::VMOVHPSZ128rm          ,    X86::VMOVHPSrm              },
+  { X86::VMOVLPDZ128mr          ,    X86::VMOVLPDmr              },
+  { X86::VMOVLPDZ128rm          ,    X86::VMOVLPDrm              },
+  { X86::VMOVLPSZ128mr          ,    X86::VMOVLPSmr              },
+  { X86::VMOVLPSZ128rm          ,    X86::VMOVLPSrm              },
+  { X86::VMOVNTDQAZ128rm        ,    X86::VMOVNTDQArm            },
+  { X86::VMOVNTDQZ128mr         ,    X86::VMOVNTDQmr             },
+  { X86::VMOVNTPDZ128mr         ,    X86::VMOVNTPDmr             },
+  { X86::VMOVNTPSZ128mr         ,    X86::VMOVNTPSmr             },
+  { X86::VMOVSHDUPZ128rm        ,    X86::VMOVSHDUPrm            },
+  { X86::VMOVSHDUPZ128rr        ,    X86::VMOVSHDUPrr            },
+  { X86::VMOVSLDUPZ128rm        ,    X86::VMOVSLDUPrm            },
+  { X86::VMOVSLDUPZ128rr        ,    X86::VMOVSLDUPrr            },
+  { X86::VMOVUPDZ128mr          ,    X86::VMOVUPDmr              },
+  { X86::VMOVUPDZ128rm          ,    X86::VMOVUPDrm              },
+  { X86::VMOVUPDZ128rr          ,    X86::VMOVUPDrr              },
+  { X86::VMOVUPDZ128rr_REV      ,    X86::VMOVUPDrr_REV          },
+  { X86::VMOVUPSZ128mr          ,    X86::VMOVUPSmr              },    
+  { X86::VMOVUPSZ128rm          ,    X86::VMOVUPSrm              },    
+  { X86::VMOVUPSZ128rr          ,    X86::VMOVUPSrr              },
+  { X86::VMOVUPSZ128rr_REV      ,    X86::VMOVUPSrr_REV          },
+  { X86::VMULPDZ128rm           ,    X86::VMULPDrm               },
+  { X86::VMULPDZ128rr           ,    X86::VMULPDrr               },
+  { X86::VMULPSZ128rm           ,    X86::VMULPSrm               },
+  { X86::VMULPSZ128rr           ,    X86::VMULPSrr               },
+  { X86::VORPDZ128rm            ,    X86::VORPDrm                },
+  { X86::VORPDZ128rr            ,    X86::VORPDrr                },
+  { X86::VORPSZ128rm            ,    X86::VORPSrm                },
+  { X86::VORPSZ128rr            ,    X86::VORPSrr                },
+  { X86::VPABSBZ128rm           ,    X86::VPABSBrm               },
+  { X86::VPABSBZ128rr           ,    X86::VPABSBrr               },
+  { X86::VPABSDZ128rm           ,    X86::VPABSDrm               },
+  { X86::VPABSDZ128rr           ,    X86::VPABSDrr               },
+  { X86::VPABSWZ128rm           ,    X86::VPABSWrm               },
+  { X86::VPABSWZ128rr           ,    X86::VPABSWrr               },
+  { X86::VPACKSSDWZ128rm        ,    X86::VPACKSSDWrm            },
+  { X86::VPACKSSDWZ128rr        ,    X86::VPACKSSDWrr            },
+  { X86::VPACKSSWBZ128rm        ,    X86::VPACKSSWBrm            },
+  { X86::VPACKSSWBZ128rr        ,    X86::VPACKSSWBrr            },
+  { X86::VPACKUSDWZ128rm        ,    X86::VPACKUSDWrm            },
+  { X86::VPACKUSDWZ128rr        ,    X86::VPACKUSDWrr            },
+  { X86::VPACKUSWBZ128rm        ,    X86::VPACKUSWBrm            },
+  { X86::VPACKUSWBZ128rr        ,    X86::VPACKUSWBrr            },
+  { X86::VPADDBZ128rm           ,    X86::VPADDBrm               },
+  { X86::VPADDBZ128rr           ,    X86::VPADDBrr               },
+  { X86::VPADDDZ128rm           ,    X86::VPADDDrm               },
+  { X86::VPADDDZ128rr           ,    X86::VPADDDrr               },
+  { X86::VPADDQZ128rm           ,    X86::VPADDQrm               },
+  { X86::VPADDQZ128rr           ,    X86::VPADDQrr               },
+  { X86::VPADDSBZ128rm          ,    X86::VPADDSBrm              },
+  { X86::VPADDSBZ128rr          ,    X86::VPADDSBrr              },
+  { X86::VPADDSWZ128rm          ,    X86::VPADDSWrm              },
+  { X86::VPADDSWZ128rr          ,    X86::VPADDSWrr              },
+  { X86::VPADDUSBZ128rm         ,    X86::VPADDUSBrm             },
+  { X86::VPADDUSBZ128rr         ,    X86::VPADDUSBrr             },
+  { X86::VPADDUSWZ128rm         ,    X86::VPADDUSWrm             },
+  { X86::VPADDUSWZ128rr         ,    X86::VPADDUSWrr             },
+  { X86::VPADDWZ128rm           ,    X86::VPADDWrm               },
+  { X86::VPADDWZ128rr           ,    X86::VPADDWrr               },
+  { X86::VPALIGNRZ128rmi        ,    X86::VPALIGNRrmi            },
+  { X86::VPALIGNRZ128rri        ,    X86::VPALIGNRrri            },
+  { X86::VPANDDZ128rm           ,    X86::VPANDrm                },
+  { X86::VPANDDZ128rr           ,    X86::VPANDrr                },
+  { X86::VPANDQZ128rm           ,    X86::VPANDrm                },
+  { X86::VPANDQZ128rr           ,    X86::VPANDrr                },
+  { X86::VPAVGBZ128rm           ,    X86::VPAVGBrm               },
+  { X86::VPAVGBZ128rr           ,    X86::VPAVGBrr               },
+  { X86::VPAVGWZ128rm           ,    X86::VPAVGWrm               },
+  { X86::VPAVGWZ128rr           ,    X86::VPAVGWrr               },
+  { X86::VPBROADCASTBZ128m      ,    X86::VPBROADCASTBrm         },
+  { X86::VPBROADCASTBZ128r      ,    X86::VPBROADCASTBrr         },
+  { X86::VPBROADCASTDZ128m      ,    X86::VPBROADCASTDrm         },
+  { X86::VPBROADCASTDZ128r      ,    X86::VPBROADCASTDrr         },
+  { X86::VPBROADCASTQZ128m      ,    X86::VPBROADCASTQrm         }, 
+  { X86::VPBROADCASTQZ128r      ,    X86::VPBROADCASTQrr         }, 
+  { X86::VPBROADCASTWZ128m      ,    X86::VPBROADCASTWrm         },    
+  { X86::VPBROADCASTWZ128r      ,    X86::VPBROADCASTWrr         },
+  { X86::VPERMILPDZ128mi        ,    X86::VPERMILPDmi            },
+  { X86::VPERMILPDZ128ri        ,    X86::VPERMILPDri            },
+  { X86::VPERMILPDZ128rm        ,    X86::VPERMILPDrm            },
+  { X86::VPERMILPDZ128rr        ,    X86::VPERMILPDrr            },
+  { X86::VPERMILPSZ128mi        ,    X86::VPERMILPSmi            },
+  { X86::VPERMILPSZ128ri        ,    X86::VPERMILPSri            },
+  { X86::VPERMILPSZ128rm        ,    X86::VPERMILPSrm            },
+  { X86::VPERMILPSZ128rr        ,    X86::VPERMILPSrr            },
+  { X86::VPMADDUBSWZ128rm       ,    X86::VPMADDUBSWrm           },
+  { X86::VPMADDUBSWZ128rr       ,    X86::VPMADDUBSWrr           },
+  { X86::VPMADDWDZ128rm         ,    X86::VPMADDWDrm             },
+  { X86::VPMADDWDZ128rr         ,    X86::VPMADDWDrr             },
+  { X86::VPMAXSBZ128rm          ,    X86::VPMAXSBrm              },
+  { X86::VPMAXSBZ128rr          ,    X86::VPMAXSBrr              },
+  { X86::VPMAXSDZ128rm          ,    X86::VPMAXSDrm              },
+  { X86::VPMAXSDZ128rr          ,    X86::VPMAXSDrr              },
+  { X86::VPMAXSWZ128rm          ,    X86::VPMAXSWrm              },
+  { X86::VPMAXSWZ128rr          ,    X86::VPMAXSWrr              },
+  { X86::VPMAXUBZ128rm          ,    X86::VPMAXUBrm              },
+  { X86::VPMAXUBZ128rr          ,    X86::VPMAXUBrr              },
+  { X86::VPMAXUDZ128rm          ,    X86::VPMAXUDrm              },
+  { X86::VPMAXUDZ128rr          ,    X86::VPMAXUDrr              },
+  { X86::VPMAXUWZ128rm          ,    X86::VPMAXUWrm              },
+  { X86::VPMAXUWZ128rr          ,    X86::VPMAXUWrr              },
+  { X86::VPMINSBZ128rm          ,    X86::VPMINSBrm              },
+  { X86::VPMINSBZ128rr          ,    X86::VPMINSBrr              },
+  { X86::VPMINSDZ128rm          ,    X86::VPMINSDrm              },
+  { X86::VPMINSDZ128rr          ,    X86::VPMINSDrr              },
+  { X86::VPMINSWZ128rm          ,    X86::VPMINSWrm              },
+  { X86::VPMINSWZ128rr          ,    X86::VPMINSWrr              },
+  { X86::VPMINUBZ128rm          ,    X86::VPMINUBrm              },
+  { X86::VPMINUBZ128rr          ,    X86::VPMINUBrr              },
+  { X86::VPMINUDZ128rm          ,    X86::VPMINUDrm              },
+  { X86::VPMINUDZ128rr          ,    X86::VPMINUDrr              },
+  { X86::VPMINUWZ128rm          ,    X86::VPMINUWrm              },
+  { X86::VPMINUWZ128rr          ,    X86::VPMINUWrr              },
+  { X86::VPMOVSXBDZ128rm        ,    X86::VPMOVSXBDrm            },
+  { X86::VPMOVSXBDZ128rr        ,    X86::VPMOVSXBDrr            },
+  { X86::VPMOVSXBQZ128rm        ,    X86::VPMOVSXBQrm            },
+  { X86::VPMOVSXBQZ128rr        ,    X86::VPMOVSXBQrr            },
+  { X86::VPMOVSXBWZ128rm        ,    X86::VPMOVSXBWrm            },
+  { X86::VPMOVSXBWZ128rr        ,    X86::VPMOVSXBWrr            },
+  { X86::VPMOVSXDQZ128rm        ,    X86::VPMOVSXDQrm            },
+  { X86::VPMOVSXDQZ128rr        ,    X86::VPMOVSXDQrr            },
+  { X86::VPMOVSXWDZ128rm        ,    X86::VPMOVSXWDrm            },
+  { X86::VPMOVSXWDZ128rr        ,    X86::VPMOVSXWDrr            },
+  { X86::VPMOVSXWQZ128rm        ,    X86::VPMOVSXWQrm            },
+  { X86::VPMOVSXWQZ128rr        ,    X86::VPMOVSXWQrr            },
+  { X86::VPMOVZXBDZ128rm        ,    X86::VPMOVZXBDrm            },
+  { X86::VPMOVZXBDZ128rr        ,    X86::VPMOVZXBDrr            },
+  { X86::VPMOVZXBQZ128rm        ,    X86::VPMOVZXBQrm            },
+  { X86::VPMOVZXBQZ128rr        ,    X86::VPMOVZXBQrr            },
+  { X86::VPMOVZXBWZ128rm        ,    X86::VPMOVZXBWrm            },
+  { X86::VPMOVZXBWZ128rr        ,    X86::VPMOVZXBWrr            },
+  { X86::VPMOVZXDQZ128rm        ,    X86::VPMOVZXDQrm            },
+  { X86::VPMOVZXDQZ128rr        ,    X86::VPMOVZXDQrr            },
+  { X86::VPMOVZXWDZ128rm        ,    X86::VPMOVZXWDrm            },
+  { X86::VPMOVZXWDZ128rr        ,    X86::VPMOVZXWDrr            },
+  { X86::VPMOVZXWQZ128rm        ,    X86::VPMOVZXWQrm            },
+  { X86::VPMOVZXWQZ128rr        ,    X86::VPMOVZXWQrr            },    
+  { X86::VPMULDQZ128rm          ,    X86::VPMULDQrm              },
+  { X86::VPMULDQZ128rr          ,    X86::VPMULDQrr              },
+  { X86::VPMULHRSWZ128rm        ,    X86::VPMULHRSWrm            },
+  { X86::VPMULHRSWZ128rr        ,    X86::VPMULHRSWrr            },
+  { X86::VPMULHUWZ128rm         ,    X86::VPMULHUWrm             },
+  { X86::VPMULHUWZ128rr         ,    X86::VPMULHUWrr             },
+  { X86::VPMULHWZ128rm          ,    X86::VPMULHWrm              },
+  { X86::VPMULHWZ128rr          ,    X86::VPMULHWrr              },
+  { X86::VPMULLDZ128rm          ,    X86::VPMULLDrm              },
+  { X86::VPMULLDZ128rr          ,    X86::VPMULLDrr              },
+  { X86::VPMULLWZ128rm          ,    X86::VPMULLWrm              },
+  { X86::VPMULLWZ128rr          ,    X86::VPMULLWrr              },
+  { X86::VPMULUDQZ128rm         ,    X86::VPMULUDQrm             },
+  { X86::VPMULUDQZ128rr         ,    X86::VPMULUDQrr             },
+  { X86::VPORDZ128rm            ,    X86::VPORrm                 },
+  { X86::VPORDZ128rr            ,    X86::VPORrr                 },
+  { X86::VPORQZ128rm            ,    X86::VPORrm                 },
+  { X86::VPORQZ128rr            ,    X86::VPORrr                 },
+  { X86::VPSADBWZ128rm          ,    X86::VPSADBWrm              },
+  { X86::VPSADBWZ128rr          ,    X86::VPSADBWrr              },
+  { X86::VPSHUFBZ128rm          ,    X86::VPSHUFBrm              },
+  { X86::VPSHUFBZ128rr          ,    X86::VPSHUFBrr              },
+  { X86::VPSHUFDZ128mi          ,    X86::VPSHUFDmi              },
+  { X86::VPSHUFDZ128ri          ,    X86::VPSHUFDri              },
+  { X86::VPSHUFHWZ128mi         ,    X86::VPSHUFHWmi             },
+  { X86::VPSHUFHWZ128ri         ,    X86::VPSHUFHWri             },
+  { X86::VPSHUFLWZ128mi         ,    X86::VPSHUFLWmi             },
+  { X86::VPSHUFLWZ128ri         ,    X86::VPSHUFLWri             },
+  { X86::VPSLLDQZ128rr          ,    X86::VPSLLDQri              },    
+  { X86::VPSLLDZ128ri           ,    X86::VPSLLDri               },
+  { X86::VPSLLDZ128rm           ,    X86::VPSLLDrm               },
+  { X86::VPSLLDZ128rr           ,    X86::VPSLLDrr               },    
+  { X86::VPSLLQZ128ri           ,    X86::VPSLLQri               },
+  { X86::VPSLLQZ128rm           ,    X86::VPSLLQrm               },
+  { X86::VPSLLQZ128rr           ,    X86::VPSLLQrr               },
+  { X86::VPSLLVDZ128rm          ,    X86::VPSLLVDrm              },
+  { X86::VPSLLVDZ128rr          ,    X86::VPSLLVDrr              },
+  { X86::VPSLLVQZ128rm          ,    X86::VPSLLVQrm              },
+  { X86::VPSLLVQZ128rr          ,    X86::VPSLLVQrr              },
+  { X86::VPSLLWZ128ri           ,    X86::VPSLLWri               },
+  { X86::VPSLLWZ128rm           ,    X86::VPSLLWrm               },
+  { X86::VPSLLWZ128rr           ,    X86::VPSLLWrr               },
+  { X86::VPSRADZ128ri           ,    X86::VPSRADri               },
+  { X86::VPSRADZ128rm           ,    X86::VPSRADrm               },
+  { X86::VPSRADZ128rr           ,    X86::VPSRADrr               },
+  { X86::VPSRAVDZ128rm          ,    X86::VPSRAVDrm              },
+  { X86::VPSRAVDZ128rr          ,    X86::VPSRAVDrr              },
+  { X86::VPSRAWZ128ri           ,    X86::VPSRAWri               },
+  { X86::VPSRAWZ128rm           ,    X86::VPSRAWrm               },
+  { X86::VPSRAWZ128rr           ,    X86::VPSRAWrr               },
+  { X86::VPSRLDQZ128rr          ,    X86::VPSRLDQri              },
+  { X86::VPSRLDZ128ri           ,    X86::VPSRLDri               },
+  { X86::VPSRLDZ128rm           ,    X86::VPSRLDrm               },
+  { X86::VPSRLDZ128rr           ,    X86::VPSRLDrr               },
+  { X86::VPSRLQZ128ri           ,    X86::VPSRLQri               },
+  { X86::VPSRLQZ128rm           ,    X86::VPSRLQrm               },
+  { X86::VPSRLQZ128rr           ,    X86::VPSRLQrr               },
+  { X86::VPSRLVDZ128rm          ,    X86::VPSRLVDrm              },
+  { X86::VPSRLVDZ128rr          ,    X86::VPSRLVDrr              },
+  { X86::VPSRLVQZ128rm          ,    X86::VPSRLVQrm              },
+  { X86::VPSRLVQZ128rr          ,    X86::VPSRLVQrr              },
+  { X86::VPSRLWZ128ri           ,    X86::VPSRLWri               },
+  { X86::VPSRLWZ128rm           ,    X86::VPSRLWrm               },
+  { X86::VPSRLWZ128rr           ,    X86::VPSRLWrr               },
+  { X86::VPSUBBZ128rm           ,    X86::VPSUBBrm               },
+  { X86::VPSUBBZ128rr           ,    X86::VPSUBBrr               },
+  { X86::VPSUBDZ128rm           ,    X86::VPSUBDrm               },
+  { X86::VPSUBDZ128rr           ,    X86::VPSUBDrr               },
+  { X86::VPSUBQZ128rm           ,    X86::VPSUBQrm               },
+  { X86::VPSUBQZ128rr           ,    X86::VPSUBQrr               },
+  { X86::VPSUBSBZ128rm          ,    X86::VPSUBSBrm              },
+  { X86::VPSUBSBZ128rr          ,    X86::VPSUBSBrr              },
+  { X86::VPSUBSWZ128rm          ,    X86::VPSUBSWrm              },
+  { X86::VPSUBSWZ128rr          ,    X86::VPSUBSWrr              },
+  { X86::VPSUBUSBZ128rm         ,    X86::VPSUBUSBrm             },
+  { X86::VPSUBUSBZ128rr         ,    X86::VPSUBUSBrr             },
+  { X86::VPSUBUSWZ128rm         ,    X86::VPSUBUSWrm             },
+  { X86::VPSUBUSWZ128rr         ,    X86::VPSUBUSWrr             },
+  { X86::VPSUBWZ128rm           ,    X86::VPSUBWrm               },
+  { X86::VPSUBWZ128rr           ,    X86::VPSUBWrr               },
+  { X86::VPUNPCKHBWZ128rm       ,    X86::VPUNPCKHBWrm           },
+  { X86::VPUNPCKHBWZ128rr       ,    X86::VPUNPCKHBWrr           },
+  { X86::VPUNPCKHDQZ128rm       ,    X86::VPUNPCKHDQrm           },
+  { X86::VPUNPCKHDQZ128rr       ,    X86::VPUNPCKHDQrr           },
+  { X86::VPUNPCKHQDQZ128rm      ,    X86::VPUNPCKHQDQrm          },
+  { X86::VPUNPCKHQDQZ128rr      ,    X86::VPUNPCKHQDQrr          },
+  { X86::VPUNPCKHWDZ128rm       ,    X86::VPUNPCKHWDrm           },
+  { X86::VPUNPCKHWDZ128rr       ,    X86::VPUNPCKHWDrr           },
+  { X86::VPUNPCKLBWZ128rm       ,    X86::VPUNPCKLBWrm           },
+  { X86::VPUNPCKLBWZ128rr       ,    X86::VPUNPCKLBWrr           },
+  { X86::VPUNPCKLDQZ128rm       ,    X86::VPUNPCKLDQrm           },
+  { X86::VPUNPCKLDQZ128rr       ,    X86::VPUNPCKLDQrr           },
+  { X86::VPUNPCKLQDQZ128rm      ,    X86::VPUNPCKLQDQrm          },
+  { X86::VPUNPCKLQDQZ128rr      ,    X86::VPUNPCKLQDQrr          },
+  { X86::VPUNPCKLWDZ128rm       ,    X86::VPUNPCKLWDrm           },
+  { X86::VPUNPCKLWDZ128rr       ,    X86::VPUNPCKLWDrr           },
+  { X86::VPXORDZ128rm           ,    X86::VPXORrm                },
+  { X86::VPXORDZ128rr           ,    X86::VPXORrr                },
+  { X86::VPXORQZ128rm           ,    X86::VPXORrm                },
+  { X86::VPXORQZ128rr           ,    X86::VPXORrr                },
+  { X86::VSHUFPDZ128rmi         ,    X86::VSHUFPDrmi             },
+  { X86::VSHUFPDZ128rri         ,    X86::VSHUFPDrri             },
+  { X86::VSHUFPSZ128rmi         ,    X86::VSHUFPSrmi             },
+  { X86::VSHUFPSZ128rri         ,    X86::VSHUFPSrri             },
+  { X86::VSQRTPDZ128m           ,    X86::VSQRTPDm               },
+  { X86::VSQRTPDZ128r           ,    X86::VSQRTPDr               },
+  { X86::VSQRTPSZ128m           ,    X86::VSQRTPSm               },
+  { X86::VSQRTPSZ128r           ,    X86::VSQRTPSr               },
+  { X86::VSUBPDZ128rm           ,    X86::VSUBPDrm               },
+  { X86::VSUBPDZ128rr           ,    X86::VSUBPDrr               },
+  { X86::VSUBPSZ128rm           ,    X86::VSUBPSrm               },
+  { X86::VSUBPSZ128rr           ,    X86::VSUBPSrr               },
+  { X86::VUNPCKHPDZ128rm        ,    X86::VUNPCKHPDrm            },
+  { X86::VUNPCKHPDZ128rr        ,    X86::VUNPCKHPDrr            },
+  { X86::VUNPCKHPSZ128rm        ,    X86::VUNPCKHPSrm            },
+  { X86::VUNPCKHPSZ128rr        ,    X86::VUNPCKHPSrr            },
+  { X86::VUNPCKLPDZ128rm        ,    X86::VUNPCKLPDrm            },
+  { X86::VUNPCKLPDZ128rr        ,    X86::VUNPCKLPDrr            },
+  { X86::VUNPCKLPSZ128rm        ,    X86::VUNPCKLPSrm            },
+  { X86::VUNPCKLPSZ128rr        ,    X86::VUNPCKLPSrr            },
+  { X86::VXORPDZ128rm           ,    X86::VXORPDrm               },
+  { X86::VXORPDZ128rr           ,    X86::VXORPDrr               },
+  { X86::VXORPSZ128rm           ,    X86::VXORPSrm               },
+  { X86::VXORPSZ128rr           ,    X86::VXORPSrr               },
+};
+
+
+// X86 EVEX encoded instructions that have a VEX 256 encoding
+// (table format: <EVEX opcode, VEX-256 opcode>).
+ static const X86EvexToVexCompressTableEntry 
+   X86EvexToVex256CompressTable[] = {
+  { X86::VADDPDZ256rm           ,     X86::VADDPDYrm             },
+  { X86::VADDPDZ256rr           ,     X86::VADDPDYrr             },
+  { X86::VADDPSZ256rm           ,     X86::VADDPSYrm             },
+  { X86::VADDPSZ256rr           ,     X86::VADDPSYrr             },
+  { X86::VANDNPDZ256rm          ,     X86::VANDNPDYrm            },
+  { X86::VANDNPDZ256rr          ,     X86::VANDNPDYrr            },
+  { X86::VANDNPSZ256rm          ,     X86::VANDNPSYrm            },
+  { X86::VANDNPSZ256rr          ,     X86::VANDNPSYrr            },
+  { X86::VANDPDZ256rm           ,     X86::VANDPDYrm             },
+  { X86::VANDPDZ256rr           ,     X86::VANDPDYrr             },
+  { X86::VANDPSZ256rm           ,     X86::VANDPSYrm             },
+  { X86::VANDPSZ256rr           ,     X86::VANDPSYrr             },
+  { X86::VBROADCASTSDZ256m      ,     X86::VBROADCASTSDYrm       }, 
+  { X86::VBROADCASTSDZ256r      ,     X86::VBROADCASTSDYrr       }, 
+  { X86::VBROADCASTSDZ256r_s    ,     X86::VBROADCASTSDYrr       }, 
+  { X86::VBROADCASTSSZ256m      ,     X86::VBROADCASTSSYrm       },
+  { X86::VBROADCASTSSZ256r      ,     X86::VBROADCASTSSYrr       }, 
+  { X86::VBROADCASTSSZ256r_s    ,     X86::VBROADCASTSSYrr       },
+  { X86::VCVTDQ2PDZ256rm        ,     X86::VCVTDQ2PDYrm          },
+  { X86::VCVTDQ2PDZ256rr        ,     X86::VCVTDQ2PDYrr          },
+  { X86::VCVTDQ2PSZ256rm        ,     X86::VCVTDQ2PSYrm          },
+  { X86::VCVTDQ2PSZ256rr        ,     X86::VCVTDQ2PSYrr          },
+  { X86::VCVTPD2DQZ256rm        ,     X86::VCVTPD2DQYrm          },
+  { X86::VCVTPD2DQZ256rr        ,     X86::VCVTPD2DQYrr          },
+  { X86::VCVTPD2PSZ256rm        ,     X86::VCVTPD2PSYrm          },
+  { X86::VCVTPD2PSZ256rr        ,     X86::VCVTPD2PSYrr          },
+  { X86::VCVTPH2PSZ256rm        ,     X86::VCVTPH2PSYrm          },
+  { X86::VCVTPH2PSZ256rr        ,     X86::VCVTPH2PSYrr          },
+  { X86::VCVTPS2DQZ256rm        ,     X86::VCVTPS2DQYrm          },
+  { X86::VCVTPS2DQZ256rr        ,     X86::VCVTPS2DQYrr          },
+  { X86::VCVTPS2PDZ256rm        ,     X86::VCVTPS2PDYrm          },
+  { X86::VCVTPS2PDZ256rr        ,     X86::VCVTPS2PDYrr          },
+  { X86::VCVTPS2PHZ256mr        ,     X86::VCVTPS2PHYmr          },
+  { X86::VCVTPS2PHZ256rr        ,     X86::VCVTPS2PHYrr          },
+  { X86::VCVTTPD2DQZ256rm       ,     X86::VCVTTPD2DQYrm         },
+  { X86::VCVTTPD2DQZ256rr       ,     X86::VCVTTPD2DQYrr         },
+  { X86::VCVTTPS2DQZ256rm       ,     X86::VCVTTPS2DQYrm         },
+  { X86::VCVTTPS2DQZ256rr       ,     X86::VCVTTPS2DQYrr         },
+  { X86::VDIVPDZ256rm           ,     X86::VDIVPDYrm             },
+  { X86::VDIVPDZ256rr           ,     X86::VDIVPDYrr             },
+  { X86::VDIVPSZ256rm           ,     X86::VDIVPSYrm             },
+  { X86::VDIVPSZ256rr           ,     X86::VDIVPSYrr             },
+  { X86::VFMADD132PDZ256m       ,     X86::VFMADD132PDYm         },
+  { X86::VFMADD132PDZ256r       ,     X86::VFMADD132PDYr         },
+  { X86::VFMADD132PSZ256m       ,     X86::VFMADD132PSYm         },
+  { X86::VFMADD132PSZ256r       ,     X86::VFMADD132PSYr         },
+  { X86::VFMADD213PDZ256m       ,     X86::VFMADD213PDYm         },
+  { X86::VFMADD213PDZ256r       ,     X86::VFMADD213PDYr         },
+  { X86::VFMADD213PSZ256m       ,     X86::VFMADD213PSYm         },
+  { X86::VFMADD213PSZ256r       ,     X86::VFMADD213PSYr         },
+  { X86::VFMADD231PDZ256m       ,     X86::VFMADD231PDYm         },
+  { X86::VFMADD231PDZ256r       ,     X86::VFMADD231PDYr         },
+  { X86::VFMADD231PSZ256m       ,     X86::VFMADD231PSYm         },
+  { X86::VFMADD231PSZ256r       ,     X86::VFMADD231PSYr         },
+  { X86::VFMADDSUB132PDZ256m    ,     X86::VFMADDSUB132PDYm      },
+  { X86::VFMADDSUB132PDZ256r    ,     X86::VFMADDSUB132PDYr      },
+  { X86::VFMADDSUB132PSZ256m    ,     X86::VFMADDSUB132PSYm      },
+  { X86::VFMADDSUB132PSZ256r    ,     X86::VFMADDSUB132PSYr      },
+  { X86::VFMADDSUB213PDZ256m    ,     X86::VFMADDSUB213PDYm      },
+  { X86::VFMADDSUB213PDZ256r    ,     X86::VFMADDSUB213PDYr      },
+  { X86::VFMADDSUB213PSZ256m    ,     X86::VFMADDSUB213PSYm      },
+  { X86::VFMADDSUB213PSZ256r    ,     X86::VFMADDSUB213PSYr      },
+  { X86::VFMADDSUB231PDZ256m    ,     X86::VFMADDSUB231PDYm      },
+  { X86::VFMADDSUB231PDZ256r    ,     X86::VFMADDSUB231PDYr      },
+  { X86::VFMADDSUB231PSZ256m    ,     X86::VFMADDSUB231PSYm      },
+  { X86::VFMADDSUB231PSZ256r    ,     X86::VFMADDSUB231PSYr      },
+  { X86::VFMSUB132PDZ256m       ,     X86::VFMSUB132PDYm         },
+  { X86::VFMSUB132PDZ256r       ,     X86::VFMSUB132PDYr         },
+  { X86::VFMSUB132PSZ256m       ,     X86::VFMSUB132PSYm         },
+  { X86::VFMSUB132PSZ256r       ,     X86::VFMSUB132PSYr         },
+  { X86::VFMSUB213PDZ256m       ,     X86::VFMSUB213PDYm         },
+  { X86::VFMSUB213PDZ256r       ,     X86::VFMSUB213PDYr         },
+  { X86::VFMSUB213PSZ256m       ,     X86::VFMSUB213PSYm         },
+  { X86::VFMSUB213PSZ256r       ,     X86::VFMSUB213PSYr         },
+  { X86::VFMSUB231PDZ256m       ,     X86::VFMSUB231PDYm         },
+  { X86::VFMSUB231PDZ256r       ,     X86::VFMSUB231PDYr         },
+  { X86::VFMSUB231PSZ256m       ,     X86::VFMSUB231PSYm         },
+  { X86::VFMSUB231PSZ256r       ,     X86::VFMSUB231PSYr         },
+  { X86::VFMSUBADD132PDZ256m    ,     X86::VFMSUBADD132PDYm      },
+  { X86::VFMSUBADD132PDZ256r    ,     X86::VFMSUBADD132PDYr      },
+  { X86::VFMSUBADD132PSZ256m    ,     X86::VFMSUBADD132PSYm      },
+  { X86::VFMSUBADD132PSZ256r    ,     X86::VFMSUBADD132PSYr      },
+  { X86::VFMSUBADD213PDZ256m    ,     X86::VFMSUBADD213PDYm      },
+  { X86::VFMSUBADD213PDZ256r    ,     X86::VFMSUBADD213PDYr      },
+  { X86::VFMSUBADD213PSZ256m    ,     X86::VFMSUBADD213PSYm      },
+  { X86::VFMSUBADD213PSZ256r    ,     X86::VFMSUBADD213PSYr      },
+  { X86::VFMSUBADD231PDZ256m    ,     X86::VFMSUBADD231PDYm      },
+  { X86::VFMSUBADD231PDZ256r    ,     X86::VFMSUBADD231PDYr      },
+  { X86::VFMSUBADD231PSZ256m    ,     X86::VFMSUBADD231PSYm      },
+  { X86::VFMSUBADD231PSZ256r    ,     X86::VFMSUBADD231PSYr      },
+  { X86::VFNMADD132PDZ256m      ,     X86::VFNMADD132PDYm        },
+  { X86::VFNMADD132PDZ256r      ,     X86::VFNMADD132PDYr        },
+  { X86::VFNMADD132PSZ256m      ,     X86::VFNMADD132PSYm        },
+  { X86::VFNMADD132PSZ256r      ,     X86::VFNMADD132PSYr        },
+  { X86::VFNMADD213PDZ256m      ,     X86::VFNMADD213PDYm        },
+  { X86::VFNMADD213PDZ256r      ,     X86::VFNMADD213PDYr        },
+  { X86::VFNMADD213PSZ256m      ,     X86::VFNMADD213PSYm        },
+  { X86::VFNMADD213PSZ256r      ,     X86::VFNMADD213PSYr        },
+  { X86::VFNMADD231PDZ256m      ,     X86::VFNMADD231PDYm        },
+  { X86::VFNMADD231PDZ256r      ,     X86::VFNMADD231PDYr        },
+  { X86::VFNMADD231PSZ256m      ,     X86::VFNMADD231PSYm        },
+  { X86::VFNMADD231PSZ256r      ,     X86::VFNMADD231PSYr        },
+  { X86::VFNMSUB132PDZ256m      ,     X86::VFNMSUB132PDYm        },
+  { X86::VFNMSUB132PDZ256r      ,     X86::VFNMSUB132PDYr        },
+  { X86::VFNMSUB132PSZ256m      ,     X86::VFNMSUB132PSYm        },
+  { X86::VFNMSUB132PSZ256r      ,     X86::VFNMSUB132PSYr        },
+  { X86::VFNMSUB213PDZ256m      ,     X86::VFNMSUB213PDYm        },
+  { X86::VFNMSUB213PDZ256r      ,     X86::VFNMSUB213PDYr        },
+  { X86::VFNMSUB213PSZ256m      ,     X86::VFNMSUB213PSYm        },
+  { X86::VFNMSUB213PSZ256r      ,     X86::VFNMSUB213PSYr        },
+  { X86::VFNMSUB231PDZ256m      ,     X86::VFNMSUB231PDYm        },
+  { X86::VFNMSUB231PDZ256r      ,     X86::VFNMSUB231PDYr        },
+  { X86::VFNMSUB231PSZ256m      ,     X86::VFNMSUB231PSYm        },
+  { X86::VFNMSUB231PSZ256r      ,     X86::VFNMSUB231PSYr        },
+  { X86::VMAXCPDZ256rm          ,     X86::VMAXCPDYrm            },
+  { X86::VMAXCPDZ256rr          ,     X86::VMAXCPDYrr            },
+  { X86::VMAXCPSZ256rm          ,     X86::VMAXCPSYrm            },
+  { X86::VMAXCPSZ256rr          ,     X86::VMAXCPSYrr            },
+  { X86::VMAXPDZ256rm           ,     X86::VMAXPDYrm             },
+  { X86::VMAXPDZ256rr           ,     X86::VMAXPDYrr             },
+  { X86::VMAXPSZ256rm           ,     X86::VMAXPSYrm             },
+  { X86::VMAXPSZ256rr           ,     X86::VMAXPSYrr             },
+  { X86::VMINCPDZ256rm          ,     X86::VMINCPDYrm            },
+  { X86::VMINCPDZ256rr          ,     X86::VMINCPDYrr            },
+  { X86::VMINCPSZ256rm          ,     X86::VMINCPSYrm            },
+  { X86::VMINCPSZ256rr          ,     X86::VMINCPSYrr            },
+  { X86::VMINPDZ256rm           ,     X86::VMINPDYrm             },
+  { X86::VMINPDZ256rr           ,     X86::VMINPDYrr             },
+  { X86::VMINPSZ256rm           ,     X86::VMINPSYrm             },
+  { X86::VMINPSZ256rr           ,     X86::VMINPSYrr             },
+  { X86::VMOVAPDZ256mr          ,     X86::VMOVAPDYmr            },
+  { X86::VMOVAPDZ256rm          ,     X86::VMOVAPDYrm            },
+  { X86::VMOVAPDZ256rr          ,     X86::VMOVAPDYrr            },
+  { X86::VMOVAPDZ256rr_REV      ,     X86::VMOVAPDYrr_REV        },
+  { X86::VMOVAPSZ256mr          ,     X86::VMOVAPSYmr            },    
+  { X86::VMOVAPSZ256rm          ,     X86::VMOVAPSYrm            },    
+  { X86::VMOVAPSZ256rr          ,     X86::VMOVAPSYrr            },
+  { X86::VMOVAPSZ256rr_REV      ,     X86::VMOVAPSYrr_REV        },
+  { X86::VMOVDDUPZ256rm         ,     X86::VMOVDDUPYrm           },
+  { X86::VMOVDDUPZ256rr         ,     X86::VMOVDDUPYrr           },
+  { X86::VMOVDQA32Z256mr        ,     X86::VMOVDQAYmr            },
+  { X86::VMOVDQA32Z256rm        ,     X86::VMOVDQAYrm            },
+  { X86::VMOVDQA32Z256rr        ,     X86::VMOVDQAYrr            },
+  { X86::VMOVDQA32Z256rr_REV    ,     X86::VMOVDQAYrr_REV        },
+  { X86::VMOVDQA64Z256mr        ,     X86::VMOVDQAYmr            },
+  { X86::VMOVDQA64Z256rm        ,     X86::VMOVDQAYrm            },
+  { X86::VMOVDQA64Z256rr        ,     X86::VMOVDQAYrr            },
+  { X86::VMOVDQA64Z256rr_REV    ,     X86::VMOVDQAYrr_REV        },
+  { X86::VMOVDQU16Z256mr        ,     X86::VMOVDQUYmr            },
+  { X86::VMOVDQU16Z256rm        ,     X86::VMOVDQUYrm            },
+  { X86::VMOVDQU16Z256rr        ,     X86::VMOVDQUYrr            },
+  { X86::VMOVDQU16Z256rr_REV    ,     X86::VMOVDQUYrr_REV        },
+  { X86::VMOVDQU32Z256mr        ,     X86::VMOVDQUYmr            },
+  { X86::VMOVDQU32Z256rm        ,     X86::VMOVDQUYrm            },
+  { X86::VMOVDQU32Z256rr        ,     X86::VMOVDQUYrr            },
+  { X86::VMOVDQU32Z256rr_REV    ,     X86::VMOVDQUYrr_REV        },
+  { X86::VMOVDQU64Z256mr        ,     X86::VMOVDQUYmr            },
+  { X86::VMOVDQU64Z256rm        ,     X86::VMOVDQUYrm            },
+  { X86::VMOVDQU64Z256rr        ,     X86::VMOVDQUYrr            },
+  { X86::VMOVDQU64Z256rr_REV    ,     X86::VMOVDQUYrr_REV        },
+  { X86::VMOVDQU8Z256mr         ,     X86::VMOVDQUYmr            },
+  { X86::VMOVDQU8Z256rm         ,     X86::VMOVDQUYrm            },
+  { X86::VMOVDQU8Z256rr         ,     X86::VMOVDQUYrr            },
+  { X86::VMOVDQU8Z256rr_REV     ,     X86::VMOVDQUYrr_REV        },
+  { X86::VMOVNTDQAZ256rm        ,     X86::VMOVNTDQAYrm          },
+  { X86::VMOVNTDQZ256mr         ,     X86::VMOVNTDQYmr           },
+  { X86::VMOVNTPDZ256mr         ,     X86::VMOVNTPDYmr           },
+  { X86::VMOVNTPSZ256mr         ,     X86::VMOVNTPSYmr           },
+  { X86::VMOVSHDUPZ256rm        ,     X86::VMOVSHDUPYrm          },
+  { X86::VMOVSHDUPZ256rr        ,     X86::VMOVSHDUPYrr          },
+  { X86::VMOVSLDUPZ256rm        ,     X86::VMOVSLDUPYrm          },
+  { X86::VMOVSLDUPZ256rr        ,     X86::VMOVSLDUPYrr          },
+  { X86::VMOVUPDZ256mr          ,     X86::VMOVUPDYmr            },
+  { X86::VMOVUPDZ256rm          ,     X86::VMOVUPDYrm            },
+  { X86::VMOVUPDZ256rr          ,     X86::VMOVUPDYrr            },
+  { X86::VMOVUPDZ256rr_REV      ,     X86::VMOVUPDYrr_REV        },
+  { X86::VMOVUPSZ256mr          ,     X86::VMOVUPSYmr            },
+  { X86::VMOVUPSZ256rm          ,     X86::VMOVUPSYrm            },
+  { X86::VMOVUPSZ256rr          ,     X86::VMOVUPSYrr            },
+  { X86::VMOVUPSZ256rr_REV      ,     X86::VMOVUPSYrr_REV        },
+  { X86::VMULPDZ256rm           ,     X86::VMULPDYrm             },
+  { X86::VMULPDZ256rr           ,     X86::VMULPDYrr             },
+  { X86::VMULPSZ256rm           ,     X86::VMULPSYrm             },
+  { X86::VMULPSZ256rr           ,     X86::VMULPSYrr             },
+  { X86::VORPDZ256rm            ,     X86::VORPDYrm              },
+  { X86::VORPDZ256rr            ,     X86::VORPDYrr              },
+  { X86::VORPSZ256rm            ,     X86::VORPSYrm              },
+  { X86::VORPSZ256rr            ,     X86::VORPSYrr              },
+  { X86::VPABSBZ256rm           ,     X86::VPABSBYrm             },
+  { X86::VPABSBZ256rr           ,     X86::VPABSBYrr             },
+  { X86::VPABSDZ256rm           ,     X86::VPABSDYrm             },
+  { X86::VPABSDZ256rr           ,     X86::VPABSDYrr             },
+  { X86::VPABSWZ256rm           ,     X86::VPABSWYrm             },
+  { X86::VPABSWZ256rr           ,     X86::VPABSWYrr             },
+  { X86::VPACKSSDWZ256rm        ,     X86::VPACKSSDWYrm          },
+  { X86::VPACKSSDWZ256rr        ,     X86::VPACKSSDWYrr          },
+  { X86::VPACKSSWBZ256rm        ,     X86::VPACKSSWBYrm          },
+  { X86::VPACKSSWBZ256rr        ,     X86::VPACKSSWBYrr          },
+  { X86::VPACKUSDWZ256rm        ,     X86::VPACKUSDWYrm          },
+  { X86::VPACKUSDWZ256rr        ,     X86::VPACKUSDWYrr          },
+  { X86::VPACKUSWBZ256rm        ,     X86::VPACKUSWBYrm          },
+  { X86::VPACKUSWBZ256rr        ,     X86::VPACKUSWBYrr          },
+  { X86::VPADDBZ256rm           ,     X86::VPADDBYrm             },
+  { X86::VPADDBZ256rr           ,     X86::VPADDBYrr             },
+  { X86::VPADDDZ256rm           ,     X86::VPADDDYrm             },
+  { X86::VPADDDZ256rr           ,     X86::VPADDDYrr             },
+  { X86::VPADDQZ256rm           ,     X86::VPADDQYrm             },
+  { X86::VPADDQZ256rr           ,     X86::VPADDQYrr             },
+  { X86::VPADDSBZ256rm          ,     X86::VPADDSBYrm            },
+  { X86::VPADDSBZ256rr          ,     X86::VPADDSBYrr            },
+  { X86::VPADDSWZ256rm          ,     X86::VPADDSWYrm            },
+  { X86::VPADDSWZ256rr          ,     X86::VPADDSWYrr            },
+  { X86::VPADDUSBZ256rm         ,     X86::VPADDUSBYrm           },
+  { X86::VPADDUSBZ256rr         ,     X86::VPADDUSBYrr           },
+  { X86::VPADDUSWZ256rm         ,     X86::VPADDUSWYrm           },
+  { X86::VPADDUSWZ256rr         ,     X86::VPADDUSWYrr           },
+  { X86::VPADDWZ256rm           ,     X86::VPADDWYrm             },
+  { X86::VPADDWZ256rr           ,     X86::VPADDWYrr             },
+  { X86::VPALIGNRZ256rmi        ,     X86::VPALIGNRYrmi          },
+  { X86::VPALIGNRZ256rri        ,     X86::VPALIGNRYrri          },
+  { X86::VPANDDZ256rm           ,     X86::VPANDYrm              },
+  { X86::VPANDDZ256rr           ,     X86::VPANDYrr              },
+  { X86::VPANDQZ256rm           ,     X86::VPANDYrm              },
+  { X86::VPANDQZ256rr           ,     X86::VPANDYrr              },
+  { X86::VPAVGBZ256rm           ,     X86::VPAVGBYrm             },
+  { X86::VPAVGBZ256rr           ,     X86::VPAVGBYrr             },
+  { X86::VPAVGWZ256rm           ,     X86::VPAVGWYrm             },
+  { X86::VPAVGWZ256rr           ,     X86::VPAVGWYrr             },
+  { X86::VPBROADCASTBZ256m      ,     X86::VPBROADCASTBYrm       }, 
+  { X86::VPBROADCASTBZ256r      ,     X86::VPBROADCASTBYrr       },   
+  { X86::VPBROADCASTDZ256m      ,     X86::VPBROADCASTDYrm       }, 
+  { X86::VPBROADCASTDZ256r      ,     X86::VPBROADCASTDYrr       },   
+  { X86::VPBROADCASTQZ256m      ,     X86::VPBROADCASTQYrm       }, 
+  { X86::VPBROADCASTQZ256r      ,     X86::VPBROADCASTQYrr       }, 
+  { X86::VPBROADCASTWZ256m      ,     X86::VPBROADCASTWYrm       }, 
+  { X86::VPBROADCASTWZ256r      ,     X86::VPBROADCASTWYrr       }, 
+  { X86::VPERMDZ256rm           ,     X86::VPERMDYrm             },
+  { X86::VPERMDZ256rr           ,     X86::VPERMDYrr             },
+  { X86::VPERMILPDZ256mi        ,     X86::VPERMILPDYmi          },
+  { X86::VPERMILPDZ256ri        ,     X86::VPERMILPDYri          },
+  { X86::VPERMILPDZ256rm        ,     X86::VPERMILPDYrm          },
+  { X86::VPERMILPDZ256rr        ,     X86::VPERMILPDYrr          },
+  { X86::VPERMILPSZ256mi        ,     X86::VPERMILPSYmi          },
+  { X86::VPERMILPSZ256ri        ,     X86::VPERMILPSYri          },
+  { X86::VPERMILPSZ256rm        ,     X86::VPERMILPSYrm          },
+  { X86::VPERMILPSZ256rr        ,     X86::VPERMILPSYrr          },
+  { X86::VPERMPDZ256mi          ,     X86::VPERMPDYmi            },
+  { X86::VPERMPDZ256ri          ,     X86::VPERMPDYri            },
+  { X86::VPERMPSZ256rm          ,     X86::VPERMPSYrm            },
+  { X86::VPERMPSZ256rr          ,     X86::VPERMPSYrr            },
+  { X86::VPERMQZ256mi           ,     X86::VPERMQYmi             },
+  { X86::VPERMQZ256ri           ,     X86::VPERMQYri             },
+  { X86::VPMADDUBSWZ256rm       ,     X86::VPMADDUBSWYrm         },
+  { X86::VPMADDUBSWZ256rr       ,     X86::VPMADDUBSWYrr         },
+  { X86::VPMADDWDZ256rm         ,     X86::VPMADDWDYrm           },
+  { X86::VPMADDWDZ256rr         ,     X86::VPMADDWDYrr           },
+  { X86::VPMAXSBZ256rm          ,     X86::VPMAXSBYrm            },
+  { X86::VPMAXSBZ256rr          ,     X86::VPMAXSBYrr            },
+  { X86::VPMAXSDZ256rm          ,     X86::VPMAXSDYrm            },
+  { X86::VPMAXSDZ256rr          ,     X86::VPMAXSDYrr            },
+  { X86::VPMAXSWZ256rm          ,     X86::VPMAXSWYrm            },
+  { X86::VPMAXSWZ256rr          ,     X86::VPMAXSWYrr            },
+  { X86::VPMAXUBZ256rm          ,     X86::VPMAXUBYrm            },
+  { X86::VPMAXUBZ256rr          ,     X86::VPMAXUBYrr            },
+  { X86::VPMAXUDZ256rm          ,     X86::VPMAXUDYrm            },
+  { X86::VPMAXUDZ256rr          ,     X86::VPMAXUDYrr            },
+  { X86::VPMAXUWZ256rm          ,     X86::VPMAXUWYrm            },
+  { X86::VPMAXUWZ256rr          ,     X86::VPMAXUWYrr            },
+  { X86::VPMINSBZ256rm          ,     X86::VPMINSBYrm            },
+  { X86::VPMINSBZ256rr          ,     X86::VPMINSBYrr            },
+  { X86::VPMINSDZ256rm          ,     X86::VPMINSDYrm            },
+  { X86::VPMINSDZ256rr          ,     X86::VPMINSDYrr            },
+  { X86::VPMINSWZ256rm          ,     X86::VPMINSWYrm            },
+  { X86::VPMINSWZ256rr          ,     X86::VPMINSWYrr            },
+  { X86::VPMINUBZ256rm          ,     X86::VPMINUBYrm            },
+  { X86::VPMINUBZ256rr          ,     X86::VPMINUBYrr            },
+  { X86::VPMINUDZ256rm          ,     X86::VPMINUDYrm            },
+  { X86::VPMINUDZ256rr          ,     X86::VPMINUDYrr            },
+  { X86::VPMINUWZ256rm          ,     X86::VPMINUWYrm            },
+  { X86::VPMINUWZ256rr          ,     X86::VPMINUWYrr            },
+  { X86::VPMOVSXBDZ256rm        ,     X86::VPMOVSXBDYrm          },
+  { X86::VPMOVSXBDZ256rr        ,     X86::VPMOVSXBDYrr          },
+  { X86::VPMOVSXBQZ256rm        ,     X86::VPMOVSXBQYrm          },
+  { X86::VPMOVSXBQZ256rr        ,     X86::VPMOVSXBQYrr          },
+  { X86::VPMOVSXBWZ256rm        ,     X86::VPMOVSXBWYrm          },
+  { X86::VPMOVSXBWZ256rr        ,     X86::VPMOVSXBWYrr          },
+  { X86::VPMOVSXDQZ256rm        ,     X86::VPMOVSXDQYrm          },
+  { X86::VPMOVSXDQZ256rr        ,     X86::VPMOVSXDQYrr          },
+  { X86::VPMOVSXWDZ256rm        ,     X86::VPMOVSXWDYrm          },
+  { X86::VPMOVSXWDZ256rr        ,     X86::VPMOVSXWDYrr          },
+  { X86::VPMOVSXWQZ256rm        ,     X86::VPMOVSXWQYrm          },
+  { X86::VPMOVSXWQZ256rr        ,     X86::VPMOVSXWQYrr          },
+  { X86::VPMOVZXBDZ256rm        ,     X86::VPMOVZXBDYrm          },
+  { X86::VPMOVZXBDZ256rr        ,     X86::VPMOVZXBDYrr          },
+  { X86::VPMOVZXBQZ256rm        ,     X86::VPMOVZXBQYrm          },
+  { X86::VPMOVZXBQZ256rr        ,     X86::VPMOVZXBQYrr          },
+  { X86::VPMOVZXBWZ256rm        ,     X86::VPMOVZXBWYrm          },
+  { X86::VPMOVZXBWZ256rr        ,     X86::VPMOVZXBWYrr          },
+  { X86::VPMOVZXDQZ256rm        ,     X86::VPMOVZXDQYrm          },
+  { X86::VPMOVZXDQZ256rr        ,     X86::VPMOVZXDQYrr          },
+  { X86::VPMOVZXWDZ256rm        ,     X86::VPMOVZXWDYrm          },
+  { X86::VPMOVZXWDZ256rr        ,     X86::VPMOVZXWDYrr          },
+  { X86::VPMOVZXWQZ256rm        ,     X86::VPMOVZXWQYrm          },
+  { X86::VPMOVZXWQZ256rr        ,     X86::VPMOVZXWQYrr          },
+  { X86::VPMULDQZ256rm          ,     X86::VPMULDQYrm            },
+  { X86::VPMULDQZ256rr          ,     X86::VPMULDQYrr            },
+  { X86::VPMULHRSWZ256rm        ,     X86::VPMULHRSWYrm          },
+  { X86::VPMULHRSWZ256rr        ,     X86::VPMULHRSWYrr          },
+  { X86::VPMULHUWZ256rm         ,     X86::VPMULHUWYrm           },
+  { X86::VPMULHUWZ256rr         ,     X86::VPMULHUWYrr           },
+  { X86::VPMULHWZ256rm          ,     X86::VPMULHWYrm            },
+  { X86::VPMULHWZ256rr          ,     X86::VPMULHWYrr            },
+  { X86::VPMULLDZ256rm          ,     X86::VPMULLDYrm            },
+  { X86::VPMULLDZ256rr          ,     X86::VPMULLDYrr            },
+  { X86::VPMULLWZ256rm          ,     X86::VPMULLWYrm            },
+  { X86::VPMULLWZ256rr          ,     X86::VPMULLWYrr            },
+  { X86::VPMULUDQZ256rm         ,     X86::VPMULUDQYrm           },
+  { X86::VPMULUDQZ256rr         ,     X86::VPMULUDQYrr           },
+  { X86::VPORDZ256rm            ,     X86::VPORYrm               },
+  { X86::VPORDZ256rr            ,     X86::VPORYrr               },
+  { X86::VPORQZ256rm            ,     X86::VPORYrm               },
+  { X86::VPORQZ256rr            ,     X86::VPORYrr               },
+  { X86::VPSADBWZ256rm          ,     X86::VPSADBWYrm            },
+  { X86::VPSADBWZ256rr          ,     X86::VPSADBWYrr            },
+  { X86::VPSHUFBZ256rm          ,     X86::VPSHUFBYrm            },
+  { X86::VPSHUFBZ256rr          ,     X86::VPSHUFBYrr            },
+  { X86::VPSHUFDZ256mi          ,     X86::VPSHUFDYmi            },
+  { X86::VPSHUFDZ256ri          ,     X86::VPSHUFDYri            },
+  { X86::VPSHUFHWZ256mi         ,     X86::VPSHUFHWYmi           },
+  { X86::VPSHUFHWZ256ri         ,     X86::VPSHUFHWYri           },
+  { X86::VPSHUFLWZ256mi         ,     X86::VPSHUFLWYmi           },
+  { X86::VPSHUFLWZ256ri         ,     X86::VPSHUFLWYri           },
+  { X86::VPSLLDQZ256rr          ,     X86::VPSLLDQYri            },
+  { X86::VPSLLDZ256ri           ,     X86::VPSLLDYri             },
+  { X86::VPSLLDZ256rm           ,     X86::VPSLLDYrm             },
+  { X86::VPSLLDZ256rr           ,     X86::VPSLLDYrr             },    
+  { X86::VPSLLQZ256ri           ,     X86::VPSLLQYri             },
+  { X86::VPSLLQZ256rm           ,     X86::VPSLLQYrm             },
+  { X86::VPSLLQZ256rr           ,     X86::VPSLLQYrr             },
+  { X86::VPSLLVDZ256rm          ,     X86::VPSLLVDYrm            },
+  { X86::VPSLLVDZ256rr          ,     X86::VPSLLVDYrr            },
+  { X86::VPSLLVQZ256rm          ,     X86::VPSLLVQYrm            },
+  { X86::VPSLLVQZ256rr          ,     X86::VPSLLVQYrr            },
+  { X86::VPSLLWZ256ri           ,     X86::VPSLLWYri             },
+  { X86::VPSLLWZ256rm           ,     X86::VPSLLWYrm             },
+  { X86::VPSLLWZ256rr           ,     X86::VPSLLWYrr             },    
+  { X86::VPSRADZ256ri           ,     X86::VPSRADYri             },
+  { X86::VPSRADZ256rm           ,     X86::VPSRADYrm             },
+  { X86::VPSRADZ256rr           ,     X86::VPSRADYrr             },
+  { X86::VPSRAVDZ256rm          ,     X86::VPSRAVDYrm            },
+  { X86::VPSRAVDZ256rr          ,     X86::VPSRAVDYrr            },
+  { X86::VPSRAWZ256ri           ,     X86::VPSRAWYri             },
+  { X86::VPSRAWZ256rm           ,     X86::VPSRAWYrm             },
+  { X86::VPSRAWZ256rr           ,     X86::VPSRAWYrr             },
+  { X86::VPSRLDQZ256rr          ,     X86::VPSRLDQYri            },
+  { X86::VPSRLDZ256ri           ,     X86::VPSRLDYri             },
+  { X86::VPSRLDZ256rm           ,     X86::VPSRLDYrm             },
+  { X86::VPSRLDZ256rr           ,     X86::VPSRLDYrr             },   
+  { X86::VPSRLQZ256ri           ,     X86::VPSRLQYri             },
+  { X86::VPSRLQZ256rm           ,     X86::VPSRLQYrm             },
+  { X86::VPSRLQZ256rr           ,     X86::VPSRLQYrr             },
+  { X86::VPSRLVDZ256rm          ,     X86::VPSRLVDYrm            },
+  { X86::VPSRLVDZ256rr          ,     X86::VPSRLVDYrr            },
+  { X86::VPSRLVQZ256rm          ,     X86::VPSRLVQYrm            },
+  { X86::VPSRLVQZ256rr          ,     X86::VPSRLVQYrr            },
+  { X86::VPSRLWZ256ri           ,     X86::VPSRLWYri             },
+  { X86::VPSRLWZ256rm           ,     X86::VPSRLWYrm             },
+  { X86::VPSRLWZ256rr           ,     X86::VPSRLWYrr             },
+  { X86::VPSUBBZ256rm           ,     X86::VPSUBBYrm             },
+  { X86::VPSUBBZ256rr           ,     X86::VPSUBBYrr             },
+  { X86::VPSUBDZ256rm           ,     X86::VPSUBDYrm             },
+  { X86::VPSUBDZ256rr           ,     X86::VPSUBDYrr             },
+  { X86::VPSUBQZ256rm           ,     X86::VPSUBQYrm             },
+  { X86::VPSUBQZ256rr           ,     X86::VPSUBQYrr             },
+  { X86::VPSUBSBZ256rm          ,     X86::VPSUBSBYrm            },
+  { X86::VPSUBSBZ256rr          ,     X86::VPSUBSBYrr            },
+  { X86::VPSUBSWZ256rm          ,     X86::VPSUBSWYrm            },
+  { X86::VPSUBSWZ256rr          ,     X86::VPSUBSWYrr            },
+  { X86::VPSUBUSBZ256rm         ,     X86::VPSUBUSBYrm           },
+  { X86::VPSUBUSBZ256rr         ,     X86::VPSUBUSBYrr           },
+  { X86::VPSUBUSWZ256rm         ,     X86::VPSUBUSWYrm           },
+  { X86::VPSUBUSWZ256rr         ,     X86::VPSUBUSWYrr           },
+  { X86::VPSUBWZ256rm           ,     X86::VPSUBWYrm             },
+  { X86::VPSUBWZ256rr           ,     X86::VPSUBWYrr             },
+  { X86::VPUNPCKHBWZ256rm       ,     X86::VPUNPCKHBWYrm         },
+  { X86::VPUNPCKHBWZ256rr       ,     X86::VPUNPCKHBWYrr         },
+  { X86::VPUNPCKHDQZ256rm       ,     X86::VPUNPCKHDQYrm         },
+  { X86::VPUNPCKHDQZ256rr       ,     X86::VPUNPCKHDQYrr         },
+  { X86::VPUNPCKHQDQZ256rm      ,     X86::VPUNPCKHQDQYrm        },
+  { X86::VPUNPCKHQDQZ256rr      ,     X86::VPUNPCKHQDQYrr        },
+  { X86::VPUNPCKHWDZ256rm       ,     X86::VPUNPCKHWDYrm         },
+  { X86::VPUNPCKHWDZ256rr       ,     X86::VPUNPCKHWDYrr         },
+  { X86::VPUNPCKLBWZ256rm       ,     X86::VPUNPCKLBWYrm         },
+  { X86::VPUNPCKLBWZ256rr       ,     X86::VPUNPCKLBWYrr         },
+  { X86::VPUNPCKLDQZ256rm       ,     X86::VPUNPCKLDQYrm         },
+  { X86::VPUNPCKLDQZ256rr       ,     X86::VPUNPCKLDQYrr         },
+  { X86::VPUNPCKLQDQZ256rm      ,     X86::VPUNPCKLQDQYrm        },
+  { X86::VPUNPCKLQDQZ256rr      ,     X86::VPUNPCKLQDQYrr        },
+  { X86::VPUNPCKLWDZ256rm       ,     X86::VPUNPCKLWDYrm         },
+  { X86::VPUNPCKLWDZ256rr       ,     X86::VPUNPCKLWDYrr         },
+  { X86::VPXORDZ256rm           ,     X86::VPXORYrm              },
+  { X86::VPXORDZ256rr           ,     X86::VPXORYrr              },
+  { X86::VPXORQZ256rm           ,     X86::VPXORYrm              },
+  { X86::VPXORQZ256rr           ,     X86::VPXORYrr              },
+  { X86::VSHUFPDZ256rmi         ,     X86::VSHUFPDYrmi           },
+  { X86::VSHUFPDZ256rri         ,     X86::VSHUFPDYrri           },
+  { X86::VSHUFPSZ256rmi         ,     X86::VSHUFPSYrmi           },
+  { X86::VSHUFPSZ256rri         ,     X86::VSHUFPSYrri           },
+  { X86::VSQRTPDZ256m           ,     X86::VSQRTPDYm             },
+  { X86::VSQRTPDZ256r           ,     X86::VSQRTPDYr             },
+  { X86::VSQRTPSZ256m           ,     X86::VSQRTPSYm             },
+  { X86::VSQRTPSZ256r           ,     X86::VSQRTPSYr             },
+  { X86::VSUBPDZ256rm           ,     X86::VSUBPDYrm             },
+  { X86::VSUBPDZ256rr           ,     X86::VSUBPDYrr             },
+  { X86::VSUBPSZ256rm           ,     X86::VSUBPSYrm             },
+  { X86::VSUBPSZ256rr           ,     X86::VSUBPSYrr             },
+  { X86::VUNPCKHPDZ256rm        ,     X86::VUNPCKHPDYrm          },
+  { X86::VUNPCKHPDZ256rr        ,     X86::VUNPCKHPDYrr          },
+  { X86::VUNPCKHPSZ256rm        ,     X86::VUNPCKHPSYrm          },
+  { X86::VUNPCKHPSZ256rr        ,     X86::VUNPCKHPSYrr          },
+  { X86::VUNPCKLPDZ256rm        ,     X86::VUNPCKLPDYrm          },
+  { X86::VUNPCKLPDZ256rr        ,     X86::VUNPCKLPDYrr          },
+  { X86::VUNPCKLPSZ256rm        ,     X86::VUNPCKLPSYrm          },
+  { X86::VUNPCKLPSZ256rr        ,     X86::VUNPCKLPSYrr          },
+  { X86::VXORPDZ256rm           ,     X86::VXORPDYrm             },
+  { X86::VXORPDZ256rr           ,     X86::VXORPDYrr             },
+  { X86::VXORPSZ256rm           ,     X86::VXORPSYrm             },
+  { X86::VXORPSZ256rr           ,     X86::VXORPSYrr             },
+};
+
+#endif
+\ No newline at end of file
diff --git a/contrib/llvm/lib/Target/X86/X86InstrXOP.td b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
index f49917b80f36..2b296e1e5b85 100644
--- a/contrib/llvm/lib/Target/X86/X86InstrXOP.td
+++ b/contrib/llvm/lib/Target/X86/X86InstrXOP.td
@@ -85,12 +85,12 @@ let ExeDomain = SSEPackedDouble in {
 
 multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
                   ValueType vt128> {
-  def rr : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+  def rr : IXOP<opc, MRMSrcReg4VOp3, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
               (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2))))]>,
-           XOP_4VOp3, Sched<[WriteVarVecShift]>;
+           XOP, Sched<[WriteVarVecShift]>;
   def rm : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
@@ -98,13 +98,20 @@ multiclass xop3op<bits<8> opc, string OpcodeStr, SDNode OpNode,
               (vt128 (OpNode (vt128 VR128:$src1),
                              (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>,
            XOP_4V, VEX_W, Sched<[WriteVarVecShift, ReadAfterLd]>;
-  def mr : IXOP<opc, MRMSrcMem, (outs VR128:$dst),
+  def mr : IXOP<opc, MRMSrcMem4VOp3, (outs VR128:$dst),
            (ins i128mem:$src1, VR128:$src2),
            !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
            [(set VR128:$dst,
               (vt128 (OpNode (vt128 (bitconvert (loadv2i64 addr:$src1))),
                              (vt128 VR128:$src2))))]>,
-             XOP_4VOp3, Sched<[WriteVarVecShift, ReadAfterLd]>;
+             XOP, Sched<[WriteVarVecShift, ReadAfterLd]>;
+  // For disassembler
+  let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def rr_REV : IXOP<opc, MRMSrcReg, (outs VR128:$dst),
+               (ins VR128:$src1, VR128:$src2),
+               !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
+               []>,
+               XOP_4V, VEX_W, Sched<[WriteVarVecShift]>;
 }
 
 let ExeDomain = SSEPackedInt in {
@@ -146,19 +153,19 @@ let ExeDomain = SSEPackedInt in {
 // Instruction where second source can be memory, but third must be register
 multiclass xop4opm2<bits<8> opc, string OpcodeStr, Intrinsic Int> {
   let isCommutable = 1 in
-  def rr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+  def rr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
            (ins VR128:$src1, VR128:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
-              (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V, VEX_I8IMM;
-  def rm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+              (Int VR128:$src1, VR128:$src2, VR128:$src3))]>, XOP_4V;
+  def rm : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
            (ins VR128:$src1, i128mem:$src2, VR128:$src3),
            !strconcat(OpcodeStr,
            "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
            [(set VR128:$dst,
               (Int VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
-              VR128:$src3))]>, XOP_4V, VEX_I8IMM;
+              VR128:$src3))]>, XOP_4V;
 }
 
 let ExeDomain = SSEPackedInt in {
@@ -224,37 +231,37 @@ let ExeDomain = SSEPackedInt in { // SSE integer instructions
 
 multiclass xop4op<bits<8> opc, string OpcodeStr, SDNode OpNode,
                   ValueType vt128> {
-  def rrr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+  def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
             (ins VR128:$src1, VR128:$src2, VR128:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
               (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
                              (vt128 VR128:$src3))))]>,
-            XOP_4V, VEX_I8IMM;
-  def rrm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+            XOP_4V;
+  def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
             (ins VR128:$src1, VR128:$src2, i128mem:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
               (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
                              (vt128 (bitconvert (loadv2i64 addr:$src3))))))]>,
-            XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
-  def rmr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+            XOP_4V, VEX_W;
+  def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
             (ins VR128:$src1, i128mem:$src2, VR128:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
               (v16i8 (OpNode (vt128 VR128:$src1), (vt128 (bitconvert (loadv2i64 addr:$src2))),
                              (vt128 VR128:$src3))))]>,
-            XOP_4V, VEX_I8IMM;
+            XOP_4V;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
-  def rrr_REV : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+  def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
                 (ins VR128:$src1, VR128:$src2, VR128:$src3),
                 !strconcat(OpcodeStr,
                 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-                []>, XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
+                []>, XOP_4V, VEX_W;
 }
 
 let ExeDomain = SSEPackedInt in {
@@ -265,66 +272,66 @@ let ExeDomain = SSEPackedInt in {
 multiclass xop4op_int<bits<8> opc, string OpcodeStr,
                       Intrinsic Int128, Intrinsic Int256> {
   // 128-bit Instruction
-  def rrr : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+  def rrr : IXOPi8Reg<opc, MRMSrcReg, (outs VR128:$dst),
             (ins VR128:$src1, VR128:$src2, VR128:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst, (Int128 VR128:$src1, VR128:$src2, VR128:$src3))]>,
-            XOP_4V, VEX_I8IMM;
-  def rrm : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+            XOP_4V;
+  def rrm : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR128:$dst),
             (ins VR128:$src1, VR128:$src2, i128mem:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
               (Int128 VR128:$src1, VR128:$src2,
                (bitconvert (loadv2i64 addr:$src3))))]>,
-            XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
-  def rmr : IXOPi8<opc, MRMSrcMem, (outs VR128:$dst),
+            XOP_4V, VEX_W;
+  def rmr : IXOPi8Reg<opc, MRMSrcMem, (outs VR128:$dst),
             (ins VR128:$src1, i128mem:$src2, VR128:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
             [(set VR128:$dst,
               (Int128 VR128:$src1, (bitconvert (loadv2i64 addr:$src2)),
                VR128:$src3))]>,
-            XOP_4V, VEX_I8IMM;
+            XOP_4V;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
-  def rrr_REV : IXOPi8<opc, MRMSrcReg, (outs VR128:$dst),
+  def rrr_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR128:$dst),
             (ins VR128:$src1, VR128:$src2, VR128:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-            []>, XOP_4V, VEX_I8IMM, VEX_W, MemOp4;
+            []>, XOP_4V, VEX_W;
 
   // 256-bit Instruction
-  def rrrY : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+  def rrrY : IXOPi8Reg<opc, MRMSrcReg, (outs VR256:$dst),
              (ins VR256:$src1, VR256:$src2, VR256:$src3),
              !strconcat(OpcodeStr,
              "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
              [(set VR256:$dst, (Int256 VR256:$src1, VR256:$src2, VR256:$src3))]>,
-             XOP_4V, VEX_I8IMM, VEX_L;
-  def rrmY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+             XOP_4V, VEX_L;
+  def rrmY : IXOPi8Reg<opc, MRMSrcMemOp4, (outs VR256:$dst),
              (ins VR256:$src1, VR256:$src2, i256mem:$src3),
              !strconcat(OpcodeStr,
              "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
              [(set VR256:$dst,
                (Int256 VR256:$src1, VR256:$src2,
                (bitconvert (loadv4i64 addr:$src3))))]>,
-             XOP_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L;
-  def rmrY : IXOPi8<opc, MRMSrcMem, (outs VR256:$dst),
+             XOP_4V, VEX_W, VEX_L;
+  def rmrY : IXOPi8Reg<opc, MRMSrcMem, (outs VR256:$dst),
              (ins VR256:$src1, f256mem:$src2, VR256:$src3),
              !strconcat(OpcodeStr,
              "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
              [(set VR256:$dst,
                (Int256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2)),
                 VR256:$src3))]>,
-             XOP_4V, VEX_I8IMM, VEX_L;
+             XOP_4V, VEX_L;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
-  def rrrY_REV : IXOPi8<opc, MRMSrcReg, (outs VR256:$dst),
+  def rrrY_REV : IXOPi8Reg<opc, MRMSrcRegOp4, (outs VR256:$dst),
             (ins VR256:$src1, VR256:$src2, VR256:$src3),
             !strconcat(OpcodeStr,
             "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"),
-            []>, XOP_4V, VEX_I8IMM, VEX_W, MemOp4, VEX_L;
+            []>, XOP_4V, VEX_W, VEX_L;
 }
 
 let ExeDomain = SSEPackedInt in {
@@ -353,7 +360,7 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
         [(set VR128:$dst,
            (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
                           (id128 VR128:$src3), (i8 imm:$src4))))]>;
-  def rm : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
+  def rm : IXOP5<opc, MRMSrcMemOp4, (outs VR128:$dst),
         (ins VR128:$src1, VR128:$src2, i128mem:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
@@ -361,7 +368,7 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
            (vt128 (OpNode (vt128 VR128:$src1), (vt128 VR128:$src2),
                           (id128 (bitconvert (loadv2i64 addr:$src3))),
                           (i8 imm:$src4))))]>,
-        VEX_W, MemOp4;
+        VEX_W;
   def mr : IXOP5<opc, MRMSrcMem, (outs VR128:$dst),
         (ins VR128:$src1, f128mem:$src2, VR128:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
@@ -372,11 +379,11 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           (id128 VR128:$src3), (i8 imm:$src4))))]>;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
-  def rr_REV : IXOP5<opc, MRMSrcReg, (outs VR128:$dst),
+  def rr_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR128:$dst),
         (ins VR128:$src1, VR128:$src2, VR128:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
-        []>, VEX_W, MemOp4;
+        []>, VEX_W;
 
   def rrY : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
         (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
@@ -385,14 +392,14 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
         [(set VR256:$dst,
            (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
                           (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
-  def rmY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
+  def rmY : IXOP5<opc, MRMSrcMemOp4, (outs VR256:$dst),
         (ins VR256:$src1, VR256:$src2, i256mem:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
         [(set VR256:$dst,
            (vt256 (OpNode (vt256 VR256:$src1), (vt256 VR256:$src2),
                           (id256 (bitconvert (loadv4i64 addr:$src3))),
-                          (i8 imm:$src4))))]>, VEX_W, MemOp4, VEX_L;
+                          (i8 imm:$src4))))]>, VEX_W, VEX_L;
   def mrY : IXOP5<opc, MRMSrcMem, (outs VR256:$dst),
         (ins VR256:$src1, f256mem:$src2, VR256:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
@@ -403,11 +410,11 @@ multiclass xop5op<bits<8> opc, string OpcodeStr, SDNode OpNode,
                           (id256 VR256:$src3), (i8 imm:$src4))))]>, VEX_L;
   // For disassembler
   let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
-  def rrY_REV : IXOP5<opc, MRMSrcReg, (outs VR256:$dst),
+  def rrY_REV : IXOP5<opc, MRMSrcRegOp4, (outs VR256:$dst),
         (ins VR256:$src1, VR256:$src2, VR256:$src3, u8imm:$src4),
         !strconcat(OpcodeStr,
         "\t{$src4, $src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3, $src4}"),
-        []>, VEX_W, MemOp4, VEX_L;
+        []>, VEX_W, VEX_L;
 }
 
 let ExeDomain = SSEPackedDouble in
diff --git a/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
new file mode 100644
index 000000000000..d9edf4676faf
--- /dev/null
+++ b/contrib/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -0,0 +1,221 @@
+//===--------- X86InterleavedAccess.cpp ----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===--------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the X86 implementation of the interleaved accesses
+/// optimization generating X86-specific instructions/intrinsics for
+/// interleaved access groups.
+///
+//===--------------------------------------------------------------------===//
+
+#include "X86ISelLowering.h"
+#include "X86TargetMachine.h"
+
+using namespace llvm;
+
+/// \brief This class holds necessary information to represent an interleaved
+/// access group and supports utilities to lower the group into
+/// X86-specific instructions/intrinsics.
+///  E.g. A group of interleaving access loads (Factor = 2; accessing every
+///       other element)
+///        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
+///        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
+///        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
+
+class X86InterleavedAccessGroup {
+  /// \brief Reference to the wide-load instruction of an interleaved access
+  /// group.
+  Instruction *const Inst;
+
+  /// \brief Reference to the shuffle(s), consumer(s) of the (load) 'Inst'.
+  ArrayRef<ShuffleVectorInst *> Shuffles;
+
+  /// \brief Reference to the starting index of each user-shuffle.
+  ArrayRef<unsigned> Indices;
+
+  /// \brief Reference to the interleaving stride in terms of elements.
+  const unsigned Factor;
+
+  /// \brief Reference to the underlying target.
+  const X86Subtarget &Subtarget;
+
+  const DataLayout &DL;
+
+  IRBuilder<> &Builder;
+
+  /// \brief Breaks down a vector \p 'Inst' of N elements into \p NumSubVectors
+  /// sub vectors of type \p T. Returns true and the sub-vectors in
+  /// \p DecomposedVectors if it decomposes the Inst, returns false otherwise.
+  bool decompose(Instruction *Inst, unsigned NumSubVectors, VectorType *T,
+                 SmallVectorImpl<Instruction *> &DecomposedVectors);
+
+  /// \brief Performs matrix transposition on a 4x4 matrix \p InputVectors and
+  /// returns the transposed-vectors in \p TransposedVectors.
+  /// E.g.
+  /// InputVectors:
+  ///   In-V0 = p1, p2, p3, p4
+  ///   In-V1 = q1, q2, q3, q4
+  ///   In-V2 = r1, r2, r3, r4
+  ///   In-V3 = s1, s2, s3, s4
+  /// OutputVectors:
+  ///   Out-V0 = p1, q1, r1, s1
+  ///   Out-V1 = p2, q2, r2, s2
+  ///   Out-V2 = p3, q3, r3, s3
+  ///   Out-V3 = P4, q4, r4, s4
+  void transpose_4x4(ArrayRef<Instruction *> InputVectors,
+                     SmallVectorImpl<Value *> &TrasposedVectors);
+
+public:
+  /// In order to form an interleaved access group X86InterleavedAccessGroup
+  /// requires a wide-load instruction \p 'I', a group of interleaved-vectors
+  /// \p Shuffs, reference to the first indices of each interleaved-vector
+  /// \p 'Ind' and the interleaving stride factor \p F. In order to generate
+  /// X86-specific instructions/intrinsics it also requires the underlying
+  /// target information \p STarget.
+  explicit X86InterleavedAccessGroup(Instruction *I,
+                                     ArrayRef<ShuffleVectorInst *> Shuffs,
+                                     ArrayRef<unsigned> Ind,
+                                     const unsigned F,
+                                     const X86Subtarget &STarget,
+                                     IRBuilder<> &B)
+      : Inst(I), Shuffles(Shuffs), Indices(Ind), Factor(F), Subtarget(STarget),
+        DL(Inst->getModule()->getDataLayout()), Builder(B) {}
+
+  /// \brief Returns true if this interleaved access group can be lowered into
+  /// x86-specific instructions/intrinsics, false otherwise.
+  bool isSupported() const;
+
+  /// \brief Lowers this interleaved access group into X86-specific
+  /// instructions/intrinsics.
+  bool lowerIntoOptimizedSequence();
+};
+
+bool X86InterleavedAccessGroup::isSupported() const {
+  VectorType *ShuffleVecTy = Shuffles[0]->getType();
+  uint64_t ShuffleVecSize = DL.getTypeSizeInBits(ShuffleVecTy);
+  Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
+
+  if (DL.getTypeSizeInBits(Inst->getType()) < Factor * ShuffleVecSize)
+    return false;
+
+  // Currently, lowering is supported for 64 bits on AVX.
+  if (!Subtarget.hasAVX() || ShuffleVecSize != 256 ||
+      DL.getTypeSizeInBits(ShuffleEltTy) != 64 || Factor != 4)
+    return false;
+
+  return true;
+}
+
+bool X86InterleavedAccessGroup::decompose(
+    Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
+    SmallVectorImpl<Instruction *> &DecomposedVectors) {
+  Type *VecTy = VecInst->getType();
+  (void)VecTy;
+  assert(VecTy->isVectorTy() &&
+         DL.getTypeSizeInBits(VecTy) >=
+             DL.getTypeSizeInBits(SubVecTy) * NumSubVectors &&
+         "Invalid Inst-size!!!");
+  assert(VecTy->getVectorElementType() == SubVecTy->getVectorElementType() &&
+         "Element type mismatched!!!");
+
+  if (!isa<LoadInst>(VecInst))
+    return false;
+
+  LoadInst *LI = cast<LoadInst>(VecInst);
+  Type *VecBasePtrTy = SubVecTy->getPointerTo(LI->getPointerAddressSpace());
+
+  Value *VecBasePtr =
+      Builder.CreateBitCast(LI->getPointerOperand(), VecBasePtrTy);
+
+  // Generate N loads of T type
+  for (unsigned i = 0; i < NumSubVectors; i++) {
+    // TODO: Support inbounds GEP
+    Value *NewBasePtr = Builder.CreateGEP(VecBasePtr, Builder.getInt32(i));
+    Instruction *NewLoad =
+        Builder.CreateAlignedLoad(NewBasePtr, LI->getAlignment());
+    DecomposedVectors.push_back(NewLoad);
+  }
+
+  return true;
+}
+
+void X86InterleavedAccessGroup::transpose_4x4(
+    ArrayRef<Instruction *> Matrix,
+    SmallVectorImpl<Value *> &TransposedMatrix) {
+  assert(Matrix.size() == 4 && "Invalid matrix size");
+  TransposedMatrix.resize(4);
+
+  // dst = src1[0,1],src2[0,1]
+  uint32_t IntMask1[] = {0, 1, 4, 5};
+  ArrayRef<uint32_t> Mask = makeArrayRef(IntMask1, 4);
+  Value *IntrVec1 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
+  Value *IntrVec2 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
+
+  // dst = src1[2,3],src2[2,3]
+  uint32_t IntMask2[] = {2, 3, 6, 7};
+  Mask = makeArrayRef(IntMask2, 4);
+  Value *IntrVec3 = Builder.CreateShuffleVector(Matrix[0], Matrix[2], Mask);
+  Value *IntrVec4 = Builder.CreateShuffleVector(Matrix[1], Matrix[3], Mask);
+
+  // dst = src1[0],src2[0],src1[2],src2[2]
+  uint32_t IntMask3[] = {0, 4, 2, 6};
+  Mask = makeArrayRef(IntMask3, 4);
+  TransposedMatrix[0] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
+  TransposedMatrix[2] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
+
+  // dst = src1[1],src2[1],src1[3],src2[3]
+  uint32_t IntMask4[] = {1, 5, 3, 7};
+  Mask = makeArrayRef(IntMask4, 4);
+  TransposedMatrix[1] = Builder.CreateShuffleVector(IntrVec1, IntrVec2, Mask);
+  TransposedMatrix[3] = Builder.CreateShuffleVector(IntrVec3, IntrVec4, Mask);
+}
+
+// Lowers this interleaved access group into X86-specific
+// instructions/intrinsics.
+bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
+  SmallVector<Instruction *, 4> DecomposedVectors;
+  VectorType *VecTy = Shuffles[0]->getType();
+  // Try to generate target-sized register(/instruction).
+  if (!decompose(Inst, Factor, VecTy, DecomposedVectors))
+    return false;
+
+  SmallVector<Value *, 4> TransposedVectors;
+  // Perform matrix-transposition in order to compute interleaved
+  // results by generating some sort of (optimized) target-specific
+  // instructions.
+  transpose_4x4(DecomposedVectors, TransposedVectors);
+
+  // Now replace the unoptimized-interleaved-vectors with the
+  // transposed-interleaved vectors.
+  for (unsigned i = 0; i < Shuffles.size(); i++)
+    Shuffles[i]->replaceAllUsesWith(TransposedVectors[Indices[i]]);
+
+  return true;
+}
+
+// Lower interleaved load(s) into target specific instructions/
+// intrinsics. Lowering sequence varies depending on the vector-types, factor,
+// number of shuffles and ISA.
+// Currently, lowering is supported for 4x64 bits with Factor = 4 on AVX.
+bool X86TargetLowering::lowerInterleavedLoad(
+    LoadInst *LI, ArrayRef<ShuffleVectorInst *> Shuffles,
+    ArrayRef<unsigned> Indices, unsigned Factor) const {
+  assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
+         "Invalid interleave factor");
+  assert(!Shuffles.empty() && "Empty shufflevector input");
+  assert(Shuffles.size() == Indices.size() &&
+         "Unmatched number of shufflevectors and indices");
+
+  // Create an interleaved access group.
+  IRBuilder<> Builder(LI);
+  X86InterleavedAccessGroup Grp(LI, Shuffles, Indices, Factor, Subtarget,
+                                Builder);
+
+  return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
+}
diff --git a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index b647d11e3866..df47b4ad583d 100644
--- a/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -21,9 +21,10 @@ namespace llvm {
 
 enum IntrinsicType : uint16_t {
   INTR_NO_TYPE,
-  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, ADX, FPCLASS, FPCLASSS,
-  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_2OP_IMM8, INTR_TYPE_3OP, INTR_TYPE_4OP,
+  GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST, XGETBV, ADX, FPCLASS, FPCLASSS,
+  INTR_TYPE_1OP, INTR_TYPE_2OP, INTR_TYPE_3OP, INTR_TYPE_4OP,
   CMP_MASK, CMP_MASK_CC,CMP_MASK_SCALAR_CC, VSHIFT, COMI, COMI_RM,
+  CVTPD2PS, CVTPD2PS_MASK,
   INTR_TYPE_1OP_MASK, INTR_TYPE_1OP_MASK_RM,
   INTR_TYPE_2OP_MASK, INTR_TYPE_2OP_MASK_RM, INTR_TYPE_2OP_IMM8_MASK,
   INTR_TYPE_3OP_MASK, INTR_TYPE_3OP_MASK_RM, INTR_TYPE_3OP_IMM8_MASK,
@@ -184,6 +185,79 @@ static const IntrinsicData IntrinsicsWithChain[] = {
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_wb_mem_512, TRUNCATE_TO_MEM_VI8,
                      X86ISD::VTRUNC, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_db_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_db_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_dw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_128, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_256, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qd_mem_512, TRUNCATE_TO_MEM_VI32,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_128, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_256, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_qw_mem_512, TRUNCATE_TO_MEM_VI16,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_128, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_256, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_mem_512, TRUNCATE_TO_MEM_VI8,
+                     X86ISD::VTRUNCUS, 0),
+
   X86_INTRINSIC_DATA(avx512_scatter_dpd_512, SCATTER, X86::VSCATTERDPDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpi_512, SCATTER, X86::VPSCATTERDDZmr, 0),
   X86_INTRINSIC_DATA(avx512_scatter_dpq_512, SCATTER, X86::VPSCATTERDQZmr, 0),
@@ -228,6 +302,7 @@ static const IntrinsicData IntrinsicsWithChain[] = {
 
   X86_INTRINSIC_DATA(subborrow_u32, ADX, X86ISD::SBB, 0),
   X86_INTRINSIC_DATA(subborrow_u64, ADX, X86ISD::SBB, 0),
+  X86_INTRINSIC_DATA(xgetbv, XGETBV, X86::XGETBV, 0),
   X86_INTRINSIC_DATA(xtest,     XTEST,  X86ISD::XTEST,  0),
 };
 
@@ -250,6 +325,11 @@ static const IntrinsicData* getIntrinsicWithChain(uint16_t IntNo) {
  * the alphabetical order.
  */
 static const IntrinsicData  IntrinsicsWithoutChain[] = {
+  X86_INTRINSIC_DATA(avx_cvt_pd2_ps_256,CVTPD2PS, ISD::FP_ROUND, 0),
+  X86_INTRINSIC_DATA(avx_cvt_pd2dq_256, INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(avx_cvtdq2_ps_256, INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(avx_cvtt_pd2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
+  X86_INTRINSIC_DATA(avx_cvtt_ps2dq_256,INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx_hadd_pd_256,   INTR_TYPE_2OP, X86ISD::FHADD, 0),
   X86_INTRINSIC_DATA(avx_hadd_ps_256,   INTR_TYPE_2OP, X86ISD::FHADD, 0),
   X86_INTRINSIC_DATA(avx_hsub_pd_256,   INTR_TYPE_2OP, X86ISD::FHSUB, 0),
@@ -288,8 +368,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx2_phadd_w, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(avx2_phsub_d, INTR_TYPE_2OP, X86ISD::HSUB, 0),
   X86_INTRINSIC_DATA(avx2_phsub_w, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+  X86_INTRINSIC_DATA(avx2_pmadd_ub_sw, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
+  X86_INTRINSIC_DATA(avx2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
   X86_INTRINSIC_DATA(avx2_pmovmskb, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
   X86_INTRINSIC_DATA(avx2_pmul_dq, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx2_pmul_hr_sw, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
   X86_INTRINSIC_DATA(avx2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
   X86_INTRINSIC_DATA(avx2_pmulhu_w, INTR_TYPE_2OP, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(avx2_pmulu_dq, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
@@ -353,21 +436,20 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_cvtq2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
   X86_INTRINSIC_DATA(avx512_cvtq2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
   X86_INTRINSIC_DATA(avx512_cvtq2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
-  X86_INTRINSIC_DATA(avx512_cvtsi2sd32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
-  X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2sd64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss32,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtsi2ss64,  INTR_TYPE_3OP, X86ISD::SCALAR_SINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttsd2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2si, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2si64, INTR_TYPE_2OP, X86ISD::CVTTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvttss2usi64, INTR_TYPE_2OP, X86ISD::CVTTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtusi2ss,   INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtusi642sd, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
+  X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::SCALAR_UINT_TO_FP_RND, 0),
   X86_INTRINSIC_DATA(avx512_cvtw2mask_128, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
   X86_INTRINSIC_DATA(avx512_cvtw2mask_256, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
   X86_INTRINSIC_DATA(avx512_cvtw2mask_512, CONVERT_TO_MASK, X86ISD::CVT2MASK, 0),
@@ -377,30 +459,14 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0),
   X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0),
 
-  X86_INTRINSIC_DATA(avx512_mask_add_pd_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_add_pd_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_pd_512, INTR_TYPE_2OP_MASK, ISD::FADD,
   X86ISD::FADD_RND),
-  X86_INTRINSIC_DATA(avx512_mask_add_ps_128, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_add_ps_256, INTR_TYPE_2OP_MASK, ISD::FADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_add_ps_512, INTR_TYPE_2OP_MASK, ISD::FADD,
   X86ISD::FADD_RND),
-  X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FADD,
-  X86ISD::FADD_RND),
-  X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FADD,
-  X86ISD::FADD_RND),
-  X86_INTRINSIC_DATA(avx512_mask_and_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_and_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_and_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_and_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_and_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_and_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FAND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_andn_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_andn_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_andn_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_andn_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_andn_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_andn_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FANDN, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FADD_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_add_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FADD_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC,
                      X86ISD::VBROADCAST, 0),
   X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC,
@@ -452,10 +518,10 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_q_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_sd,     CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
-                     X86ISD::FSETCC),
-  X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC, X86ISD::FSETCC,
-                     X86ISD::FSETCC),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_sd,     CMP_MASK_SCALAR_CC,
+                     X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
+  X86_INTRINSIC_DATA(avx512_mask_cmp_ss,     CMP_MASK_SCALAR_CC,
+                     X86ISD::FSETCCM, X86ISD::FSETCCM_RND),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_128,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_256,  CMP_MASK_CC, X86ISD::CMPM, 0),
   X86_INTRINSIC_DATA(avx512_mask_cmp_w_512,  CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -495,184 +561,168 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::CONFLICT, 0),
   X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK,
                      X86ISD::CONFLICT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTDQ2PD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, 0), // no rm
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_128, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_256, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtdq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, ISD::SINT_TO_FP), //er
+                     ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND), //er
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_256, INTR_TYPE_1OP_MASK,
-                    X86ISD::FP_TO_SINT_RND, 0),
+                    X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2dq_512, INTR_TYPE_1OP_MASK,
-                    X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+                    X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps,     INTR_TYPE_1OP_MASK,
                     X86ISD::VFPROUND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, INTR_TYPE_1OP_MASK_RM,
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_256, CVTPD2PS_MASK,
                      ISD::FP_ROUND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, INTR_TYPE_1OP_MASK_RM,
-                     ISD::FP_ROUND, X86ISD::VFPROUND),
+  X86_INTRINSIC_DATA(avx512_mask_cvtpd2ps_512, CVTPD2PS_MASK,
+                     ISD::FP_ROUND, X86ISD::VFPROUND_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2qq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2udq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtpd2uqq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2dq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_128, INTR_TYPE_1OP_MASK,
                      X86ISD::VFPEXT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_EXTEND, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_EXTEND, X86ISD::VFPEXT),
+                     ISD::FP_EXTEND, X86ISD::VFPEXT_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, 0),
+                     X86ISD::CVTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2qq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_SINT_RND, X86ISD::FP_TO_SINT_RND),
+                     X86ISD::CVTP2SI, X86ISD::CVTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2udq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, 0),
+                     X86ISD::CVTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtps2uqq_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::FP_TO_UINT_RND, X86ISD::FP_TO_UINT_RND),
+                     X86ISD::CVTP2UI, X86ISD::CVTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_128, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_256, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+                     ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_128, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, 0),
+                     X86ISD::CVTSI2P, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_256, INTR_TYPE_1OP_MASK,
                      ISD::SINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtqq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::SINT_TO_FP, ISD::SINT_TO_FP),
+                     ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtsd2ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::VFPROUND, 0),
+                     X86ISD::VFPROUNDS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtss2sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::VFPEXT, 0),
+                     X86ISD::VFPEXTS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_128, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_SINT, 0),
+                     X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2dq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+                     ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_128, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2qq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+                     ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_128, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_UINT, 0),
+                     X86ISD::CVTTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_UINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2udq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+                     ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_128, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_UINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_UINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttpd2uqq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+                     ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_128, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2dq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+                     ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_128, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_SINT, 0),
+                     X86ISD::CVTTP2SI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2qq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_SINT, ISD::FP_TO_SINT),
+                     ISD::FP_TO_SINT, X86ISD::CVTTP2SI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_128, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_UINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_UINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2udq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
+                     ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_128, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_UINT, 0),
+                     X86ISD::CVTTP2UI, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_256, INTR_TYPE_1OP_MASK,
                      ISD::FP_TO_UINT, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvttps2uqq_512, INTR_TYPE_1OP_MASK,
-                     ISD::FP_TO_UINT, ISD::FP_TO_UINT),
-  X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::CVTUDQ2PD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_256, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_cvtudq2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, 0), // no rm
+                     ISD::FP_TO_UINT, X86ISD::CVTTP2UI_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_128, INTR_TYPE_1OP_MASK,
                      ISD::UINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_256, INTR_TYPE_1OP_MASK,
                      ISD::UINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtudq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+                     ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_128, INTR_TYPE_1OP_MASK,
                      ISD::UINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_256, INTR_TYPE_1OP_MASK,
                      ISD::UINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2pd_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+                     ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_128, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, 0),
+                     X86ISD::CVTUI2P, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_256, INTR_TYPE_1OP_MASK,
                      ISD::UINT_TO_FP, 0),
   X86_INTRINSIC_DATA(avx512_mask_cvtuqq2ps_512, INTR_TYPE_1OP_MASK,
-                     ISD::UINT_TO_FP, ISD::UINT_TO_FP),
+                     ISD::UINT_TO_FP, X86ISD::UINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_128, INTR_TYPE_3OP_IMM8_MASK,
                      X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_256, INTR_TYPE_3OP_IMM8_MASK,
                      X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_mask_dbpsadbw_512, INTR_TYPE_3OP_IMM8_MASK,
                      X86ISD::DBPSADBW, 0),
-  X86_INTRINSIC_DATA(avx512_mask_div_pd_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_div_pd_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_pd_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
                      X86ISD::FDIV_RND),
-  X86_INTRINSIC_DATA(avx512_mask_div_ps_128, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_div_ps_256, INTR_TYPE_2OP_MASK, ISD::FDIV, 0),
   X86_INTRINSIC_DATA(avx512_mask_div_ps_512, INTR_TYPE_2OP_MASK, ISD::FDIV,
                      X86ISD::FDIV_RND),
-  X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
-                     X86ISD::FDIV_RND),
-  X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FDIV,
-                     X86ISD::FDIV_RND),
+  X86_INTRINSIC_DATA(avx512_mask_div_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FDIV_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_div_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FDIV_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_d_128,  COMPRESS_EXPAND_IN_REG,
                      X86ISD::EXPAND, 0),
   X86_INTRINSIC_DATA(avx512_mask_expand_d_256,  COMPRESS_EXPAND_IN_REG,
@@ -726,9 +776,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_getexp_ps_512, INTR_TYPE_1OP_MASK_RM,
                      X86ISD::FGETEXP_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
+                     X86ISD::FGETEXPS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FGETEXP_RND, 0),
+                     X86ISD::FGETEXPS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_pd_128, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::VGETMANT, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_pd_256, INTR_TYPE_2OP_MASK_RM,
@@ -742,9 +792,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_getmant_ps_512, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::VGETMANT, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_sd, INTR_TYPE_3OP_SCALAR_MASK_RM,
-                     X86ISD::VGETMANT, 0),
+                     X86ISD::VGETMANTS, 0),
   X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM,
-                     X86ISD::VGETMANT, 0),
+                     X86ISD::VGETMANTS, 0),
   X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC,
                      ISD::INSERT_SUBVECTOR, 0),
   X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC,
@@ -790,9 +840,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_max_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX,
                      X86ISD::FMAX_RND),
   X86_INTRINSIC_DATA(avx512_mask_max_sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FMAX, X86ISD::FMAX_RND),
+                     X86ISD::FMAX_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_max_ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FMAX, X86ISD::FMAX_RND),
+                     X86ISD::FMAX_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
@@ -802,31 +852,17 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_min_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FMIN,
                      X86ISD::FMIN_RND),
   X86_INTRINSIC_DATA(avx512_mask_min_sd_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FMIN, X86ISD::FMIN_RND),
+                     X86ISD::FMIN_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_min_ss_round, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FMIN, X86ISD::FMIN_RND),
-  X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK,
-                     X86ISD::MOVSD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK,
-                     X86ISD::MOVSS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_mul_pd_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_mul_pd_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
+                     X86ISD::FMIN_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_pd_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
                      X86ISD::FMUL_RND),
-  X86_INTRINSIC_DATA(avx512_mask_mul_ps_128, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_mul_ps_256, INTR_TYPE_2OP_MASK, ISD::FMUL, 0),
   X86_INTRINSIC_DATA(avx512_mask_mul_ps_512, INTR_TYPE_2OP_MASK, ISD::FMUL,
                      X86ISD::FMUL_RND),
-  X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FMUL,
-  X86ISD::FMUL_RND),
-  X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FMUL,
-  X86ISD::FMUL_RND),
-  X86_INTRINSIC_DATA(avx512_mask_or_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_or_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_or_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_or_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_or_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_or_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FOR, 0),
+  X86_INTRINSIC_DATA(avx512_mask_mul_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FMUL_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_mul_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FMUL_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_pabs_b_128, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pabs_b_256, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pabs_b_512, INTR_TYPE_1OP_MASK, X86ISD::ABS, 0),
@@ -851,18 +887,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_packuswb_128, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packuswb_256, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_packuswb_512, INTR_TYPE_2OP_MASK, X86ISD::PACKUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_b_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_b_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_b_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_d_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_d_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_d_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_q_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_q_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_q_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_w_128, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_w_256, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_padd_w_512, INTR_TYPE_2OP_MASK, ISD::ADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_padds_b_128, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
   X86_INTRINSIC_DATA(avx512_mask_padds_b_256, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
   X86_INTRINSIC_DATA(avx512_mask_padds_b_512, INTR_TYPE_2OP_MASK, X86ISD::ADDS, 0),
@@ -945,54 +969,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPMADDWD, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmaddw_d_512, INTR_TYPE_2OP_MASK,
                      X86ISD::VPMADDWD, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_b_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_d_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_q_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_128, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_256, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxs_w_512, INTR_TYPE_2OP_MASK, ISD::SMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_b_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_d_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_q_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_128, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_256, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmaxu_w_512, INTR_TYPE_2OP_MASK, ISD::UMAX, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_b_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_b_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_b_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_d_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_d_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_d_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_q_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_q_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_q_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_w_128, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_w_256, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmins_w_512, INTR_TYPE_2OP_MASK, ISD::SMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_b_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_b_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_b_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_d_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_d_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_d_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_q_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_q_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_q_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_w_128, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_w_256, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pminu_w_512, INTR_TYPE_2OP_MASK, ISD::UMIN, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_128, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNC, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmov_db_256, INTR_TYPE_1OP_MASK,
@@ -1065,42 +1041,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VTRUNCS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_128,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_256,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_512,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_128,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_256,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_512,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_128,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_256,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_512,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_128,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_256,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_512,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_128,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_256,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_512,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_128,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_256,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_512,  INTR_TYPE_1OP_MASK,
-                     X86ISD::VSEXT, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
@@ -1137,48 +1077,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VTRUNCUS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
                      X86ISD::VTRUNCUS, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_128, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_256, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_512, INTR_TYPE_1OP_MASK,
-                     X86ISD::VZEXT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::PMULDQ, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
-                     X86ISD::PMULDQ, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmul_dq_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::PMULDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_128, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_256, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmul_hr_sw_512, INTR_TYPE_2OP_MASK, X86ISD::MULHRS, 0),
@@ -1188,27 +1086,12 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_128, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_256, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmulhu_w_512, INTR_TYPE_2OP_MASK, ISD::MULHU, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_d_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_d_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_d_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_q_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_q_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_q_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_w_128, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_w_256, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmull_w_512, INTR_TYPE_2OP_MASK, ISD::MUL, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_128, INTR_TYPE_2OP_MASK,
                      X86ISD::MULTISHIFT, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_256, INTR_TYPE_2OP_MASK,
                      X86ISD::MULTISHIFT, 0),
   X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK,
                      X86ISD::MULTISHIFT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::PMULUDQ, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_256, INTR_TYPE_2OP_MASK,
-                     X86ISD::PMULUDQ, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pmulu_dq_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::PMULUDQ, 0),
   X86_INTRINSIC_DATA(avx512_mask_prol_d_128,  INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_prol_d_256,  INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0),
   X86_INTRINSIC_DATA(avx512_mask_prol_d_512,  INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0),
@@ -1233,105 +1116,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
   X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
   X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::PSHUFB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
-                    X86ISD::PSHUFB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
-                    X86ISD::PSHUFB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_d,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_d_128,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_d_256,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_di_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_di_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_di_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_q,        INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_q_128,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_q_256,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_qi_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_qi_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_qi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_w_128,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_w_256,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_w_512,    INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_wi_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_wi_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psll_wi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSHLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv_d,       INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv_q,       INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv16_hi,    INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv2_di,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv32hi,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv4_di,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv4_si,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv8_hi,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psllv8_si,     INTR_TYPE_2OP_MASK, ISD::SHL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_d,        INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_d_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_d_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_di_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_di_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_di_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_q,        INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_q_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_q_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_qi_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_qi_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_qi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_w_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_w_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_w_512,    INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_wi_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_wi_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psra_wi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRAI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_d,       INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_q,       INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_q_128,   INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav_q_256,   INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav16_hi,    INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav32_hi,    INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav4_si,     INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav8_hi,     INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrav8_si,     INTR_TYPE_2OP_MASK, X86ISD::VSRAV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_d,        INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_d_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_d_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_di_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_di_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_di_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_q,        INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_q_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_q_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_qi_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_qi_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_qi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_w_128,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_w_256,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_w_512,    INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_wi_128,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_wi_256,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrl_wi_512,   INTR_TYPE_2OP_IMM8_MASK, X86ISD::VSRLI, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv_d,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv_q,       INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv16_hi,    INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv2_di,     INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv32hi,     INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv4_di,     INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv4_si,     INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv8_hi,     INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psrlv8_si,     INTR_TYPE_2OP_MASK, ISD::SRL, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_b_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_b_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_b_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_d_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_d_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_d_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_q_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_q_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_q_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_w_128, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_w_256, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_psub_w_512, INTR_TYPE_2OP_MASK, ISD::SUB, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubs_b_128, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubs_b_256, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
   X86_INTRINSIC_DATA(avx512_mask_psubs_b_512, INTR_TYPE_2OP_MASK, X86ISD::SUBS, 0),
@@ -1370,8 +1154,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_reduce_ps_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
   X86_INTRINSIC_DATA(avx512_mask_reduce_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
   X86_INTRINSIC_DATA(avx512_mask_reduce_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VREDUCE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
-  X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCE, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCES, 0),
+  X86_INTRINSIC_DATA(avx512_mask_reduce_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::VREDUCES, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_128, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_pd_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
@@ -1379,9 +1163,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_256, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ps_512, INTR_TYPE_2OP_MASK_RM, X86ISD::VRNDSCALE, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_sd,   INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::VRNDSCALE, 0),
+                     X86ISD::VRNDSCALES, 0),
   X86_INTRINSIC_DATA(avx512_mask_rndscale_ss,   INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::VRNDSCALE, 0),
+                     X86ISD::VRNDSCALES, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_pd_128, INTR_TYPE_2OP_MASK_RM,
                      X86ISD::SCALEF, 0),
   X86_INTRINSIC_DATA(avx512_mask_scalef_pd_256, INTR_TYPE_2OP_MASK_RM,
@@ -1414,42 +1198,26 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::SHUF128, 0),
   X86_INTRINSIC_DATA(avx512_mask_shuf_i64x2_256, INTR_TYPE_3OP_IMM8_MASK,
                      X86ISD::SHUF128, 0),
-  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_128, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::SHUFP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_256, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::SHUFP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_shuf_pd_512, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::SHUFP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_128, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::SHUFP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_256, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::SHUFP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_shuf_ps_512, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::SHUFP, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+  X86_INTRINSIC_DATA(avx512_mask_sqrt_pd_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
                      X86ISD::FSQRT_RND),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_128, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_256, INTR_TYPE_1OP_MASK, ISD::FSQRT, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK_RM, ISD::FSQRT,
+  X86_INTRINSIC_DATA(avx512_mask_sqrt_ps_512, INTR_TYPE_1OP_MASK, ISD::FSQRT,
                      X86ISD::FSQRT_RND),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_sd, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FSQRT_RND, 0),
+                     X86ISD::FSQRTS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_sqrt_ss, INTR_TYPE_SCALAR_MASK_RM,
-                     X86ISD::FSQRT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sub_pd_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sub_pd_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
+                     X86ISD::FSQRTS_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_sub_pd_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
                      X86ISD::FSUB_RND),
-  X86_INTRINSIC_DATA(avx512_mask_sub_ps_128, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
-  X86_INTRINSIC_DATA(avx512_mask_sub_ps_256, INTR_TYPE_2OP_MASK, ISD::FSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask_sub_ps_512, INTR_TYPE_2OP_MASK, ISD::FSUB,
                      X86ISD::FSUB_RND),
-  X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FSUB,
-  X86ISD::FSUB_RND),
-  X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM, ISD::FSUB,
-  X86ISD::FSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask_sub_sd_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FSUB_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_sub_ss_round, INTR_TYPE_SCALAR_MASK_RM,
+                     X86ISD::FSUB_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_b_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_b_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_b_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
@@ -1462,30 +1230,18 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
   X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512,    CMP_MASK_CC,  X86ISD::CMPMU, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_d_128, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_d_256, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_q_128, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_q_256, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::VALIGN, 0),
-  X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_IMM8_MASK,
-                     X86ISD::VALIGN, 0),
   X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_128, INTR_TYPE_1OP_MASK_RM,
-                     ISD::FP16_TO_FP, 0),
+                     X86ISD::CVTPH2PS, 0),
   X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_256, INTR_TYPE_1OP_MASK_RM,
-                     ISD::FP16_TO_FP, 0),
+                     X86ISD::CVTPH2PS, 0),
   X86_INTRINSIC_DATA(avx512_mask_vcvtph2ps_512, INTR_TYPE_1OP_MASK_RM,
-                     ISD::FP16_TO_FP, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK_RM,
-                     ISD::FP_TO_FP16, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK_RM,
-                     ISD::FP_TO_FP16, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK_RM,
-                     ISD::FP_TO_FP16, 0),
+                     X86ISD::CVTPH2PS, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_128, INTR_TYPE_2OP_MASK,
+                     X86ISD::CVTPS2PH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_256, INTR_TYPE_2OP_MASK,
+                     X86ISD::CVTPS2PH, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vcvtps2ph_512, INTR_TYPE_2OP_MASK,
+                     X86ISD::CVTPS2PH, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_128, FMA_OP_MASK, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_256, FMA_OP_MASK, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_pd_512, FMA_OP_MASK, X86ISD::FMADD,
@@ -1495,8 +1251,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_vfmadd_ps_512, FMA_OP_MASK, X86ISD::FMADD,
                      X86ISD::FMADD_RND),
 
-  X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADD_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADD_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_sd, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask_vfmadd_ss, FMA_OP_SCALAR_MASK, X86ISD::FMADDS1_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_128, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_256, FMA_OP_MASK, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask_vfmaddsub_pd_512, FMA_OP_MASK, X86ISD::FMADDSUB,
@@ -1555,23 +1311,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_q_512, VPERM_3OP_MASK,
                     X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_128, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
+                    X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_256, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
+                    X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermi2var_qi_512, VPERM_3OP_MASK,
-                    X86ISD::VPERMV3, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::VPERMILPV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_256, INTR_TYPE_2OP_MASK,
-                     X86ISD::VPERMILPV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_pd_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::VPERMILPV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_128, INTR_TYPE_2OP_MASK,
-                     X86ISD::VPERMILPV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_256, INTR_TYPE_2OP_MASK,
-                     X86ISD::VPERMILPV, 0),
-  X86_INTRINSIC_DATA(avx512_mask_vpermilvar_ps_512, INTR_TYPE_2OP_MASK,
-                     X86ISD::VPERMILPV, 0),
+                    X86ISD::VPERMIV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_128, VPERM_3OP_MASK,
                     X86ISD::VPERMV3, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpermt2var_d_256, VPERM_3OP_MASK,
@@ -1620,12 +1364,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , FMA_OP_MASK,
                      X86ISD::VPMADD52L, 0),
-  X86_INTRINSIC_DATA(avx512_mask_xor_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_xor_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_xor_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
-  X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD,
@@ -1635,8 +1373,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ps_512, FMA_OP_MASK3, X86ISD::FMADD,
                      X86ISD::FMADD_RND),
 
-  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADD_RND, 0),
-  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADD_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmadd_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMADDS3_RND, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_128, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_256, FMA_OP_MASK3, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmaddsub_pd_512, FMA_OP_MASK3, X86ISD::FMADDSUB,
@@ -1654,6 +1392,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_256, FMA_OP_MASK3, X86ISD::FMSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ps_512, FMA_OP_MASK3, X86ISD::FMSUB,
                      X86ISD::FMSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FMSUBS3_RND, 0),
 
   X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_128, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfmsubadd_pd_256, FMA_OP_MASK3, X86ISD::FMSUBADD, 0),
@@ -1672,6 +1412,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_256, FMA_OP_MASK3, X86ISD::FNMSUB, 0),
   X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ps_512, FMA_OP_MASK3, X86ISD::FNMSUB,
                      X86ISD::FNMSUB_RND),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_sd, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0),
+  X86_INTRINSIC_DATA(avx512_mask3_vfnmsub_ss, FMA_OP_SCALAR_MASK3, X86ISD::FNMSUBS3_RND, 0),
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_128, FIXUPIMM_MASKZ,
                      X86ISD::VFIXUPIMM, 0),
   X86_INTRINSIC_DATA(avx512_maskz_fixupimm_pd_256, FIXUPIMM_MASKZ,
@@ -1709,8 +1451,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ps_512, FMA_OP_MASKZ, X86ISD::FMADD,
                      X86ISD::FMADD_RND),
 
-  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADD_RND, 0),
-  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADD_RND, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_sd, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0),
+  X86_INTRINSIC_DATA(avx512_maskz_vfmadd_ss, FMA_OP_SCALAR_MASKZ, X86ISD::FMADDS1_RND, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_128, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_256, FMA_OP_MASKZ, X86ISD::FMADDSUB, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vfmaddsub_pd_512, FMA_OP_MASKZ, X86ISD::FMADDSUB,
@@ -1768,7 +1510,49 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
                      X86ISD::VPMADD52L, 0),
   X86_INTRINSIC_DATA(avx512_maskz_vpmadd52l_uq_512, FMA_OP_MASKZ,
                      X86ISD::VPMADD52L, 0),
+  X86_INTRINSIC_DATA(avx512_pmul_dq_512, INTR_TYPE_2OP, X86ISD::PMULDQ, 0),
+  X86_INTRINSIC_DATA(avx512_pmulu_dq_512, INTR_TYPE_2OP, X86ISD::PMULUDQ, 0),
   X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0),
+  X86_INTRINSIC_DATA(avx512_pshuf_b_512, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
+  X86_INTRINSIC_DATA(avx512_psll_d_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx512_psll_q_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx512_psll_w_512, INTR_TYPE_2OP, X86ISD::VSHL, 0),
+  X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0),
+  X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx512_psra_q_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx512_psra_w_512, INTR_TYPE_2OP, X86ISD::VSRA, 0),
+  X86_INTRINSIC_DATA(avx512_psrai_d_512, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx512_psrai_q_128, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx512_psrai_q_256, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx512_psrai_q_512, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx512_psrai_w_512, VSHIFT, X86ISD::VSRAI, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_d_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_q_128, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_q_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_q_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_w_128, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_w_256, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrav_w_512, INTR_TYPE_2OP, X86ISD::VSRAV, 0),
+  X86_INTRINSIC_DATA(avx512_psrl_d_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrl_q_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrl_w_512, INTR_TYPE_2OP, X86ISD::VSRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
+  X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
   X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0),
   X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0),
   X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0),
@@ -1803,8 +1587,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRCPS, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
   X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
+  X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_RM, X86ISD::RCP28S, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::FRSQRT, 0),
@@ -1815,26 +1599,20 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::FRSQRTS, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_RM,X86ISD::RSQRT28, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
+  X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_RM,X86ISD::RSQRT28S, 0),
   X86_INTRINSIC_DATA(avx512_vcomi_sd, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
   X86_INTRINSIC_DATA(avx512_vcomi_ss, COMI_RM, X86ISD::COMI, X86ISD::UCOMI),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_SINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
-  X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP,
-                     X86ISD::SCALAR_FP_TO_UINT_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtsd2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2si32, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2si64, INTR_TYPE_2OP, X86ISD::CVTS2SI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2usi32, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vcvtss2usi64, INTR_TYPE_2OP, X86ISD::CVTS2UI_RND, 0),
+  X86_INTRINSIC_DATA(avx512_vpermilvar_pd_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
+  X86_INTRINSIC_DATA(avx512_vpermilvar_ps_512, INTR_TYPE_2OP, X86ISD::VPERMILPV, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_pd,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_pd_256,    INTR_TYPE_3OP, X86ISD::FMADD, 0),
   X86_INTRINSIC_DATA(fma_vfmadd_ps,        INTR_TYPE_3OP, X86ISD::FMADD, 0),
@@ -1883,6 +1661,11 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_comile_sd,    COMI, X86ISD::COMI, ISD::SETLE),
   X86_INTRINSIC_DATA(sse2_comilt_sd,    COMI, X86ISD::COMI, ISD::SETLT),
   X86_INTRINSIC_DATA(sse2_comineq_sd,   COMI, X86ISD::COMI, ISD::SETNE),
+  X86_INTRINSIC_DATA(sse2_cvtdq2ps,     INTR_TYPE_1OP, ISD::SINT_TO_FP, 0),
+  X86_INTRINSIC_DATA(sse2_cvtpd2dq,     INTR_TYPE_1OP, X86ISD::CVTP2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvtpd2ps,     INTR_TYPE_1OP, X86ISD::VFPROUND, 0),
+  X86_INTRINSIC_DATA(sse2_cvttpd2dq,    INTR_TYPE_1OP, X86ISD::CVTTP2SI, 0),
+  X86_INTRINSIC_DATA(sse2_cvttps2dq,    INTR_TYPE_1OP, ISD::FP_TO_SINT, 0),
   X86_INTRINSIC_DATA(sse2_max_pd,       INTR_TYPE_2OP, X86ISD::FMAX, 0),
   X86_INTRINSIC_DATA(sse2_min_pd,       INTR_TYPE_2OP, X86ISD::FMIN, 0),
   X86_INTRINSIC_DATA(sse2_movmsk_pd,    INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
@@ -1895,6 +1678,7 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(sse2_paddus_w,     INTR_TYPE_2OP, X86ISD::ADDUS, 0),
   X86_INTRINSIC_DATA(sse2_pavg_b,       INTR_TYPE_2OP, X86ISD::AVG, 0),
   X86_INTRINSIC_DATA(sse2_pavg_w,       INTR_TYPE_2OP, X86ISD::AVG, 0),
+  X86_INTRINSIC_DATA(sse2_pmadd_wd,     INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
   X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
   X86_INTRINSIC_DATA(sse2_pmulh_w,      INTR_TYPE_2OP, ISD::MULHS, 0),
   X86_INTRINSIC_DATA(sse2_pmulhu_w,     INTR_TYPE_2OP, ISD::MULHU, 0),
@@ -1943,6 +1727,8 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(ssse3_phadd_w_128, INTR_TYPE_2OP, X86ISD::HADD, 0),
   X86_INTRINSIC_DATA(ssse3_phsub_d_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
   X86_INTRINSIC_DATA(ssse3_phsub_w_128, INTR_TYPE_2OP, X86ISD::HSUB, 0),
+  X86_INTRINSIC_DATA(ssse3_pmadd_ub_sw_128, INTR_TYPE_2OP, X86ISD::VPMADDUBSW, 0),
+  X86_INTRINSIC_DATA(ssse3_pmul_hr_sw_128, INTR_TYPE_2OP, X86ISD::MULHRS, 0),
   X86_INTRINSIC_DATA(ssse3_pshuf_b_128, INTR_TYPE_2OP, X86ISD::PSHUFB, 0),
   X86_INTRINSIC_DATA(xop_vpcomb,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
   X86_INTRINSIC_DATA(xop_vpcomd,        INTR_TYPE_3OP, X86ISD::VPCOM, 0),
diff --git a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
index 906e3427b2ff..2f69df064e7f 100644
--- a/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
+++ b/contrib/llvm/lib/Target/X86/X86MCInstLower.cpp
@@ -16,6 +16,7 @@
 #include "X86RegisterInfo.h"
 #include "X86ShuffleDecodeConstantPool.h"
 #include "InstPrinter/X86ATTInstPrinter.h"
+#include "InstPrinter/X86InstComments.h"
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "Utils/X86ShuffleDecode.h"
 #include "llvm/ADT/Optional.h"
@@ -41,6 +42,7 @@
 #include "llvm/MC/MCSymbol.h"
 #include "llvm/MC/MCSymbolELF.h"
 #include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSectionMachO.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/ELF.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -68,9 +70,6 @@ public:
 
 private:
   MachineModuleInfoMachO &getMachOMMI() const;
-  Mangler *getMang() const {
-    return AsmPrinter.Mang;
-  }
 };
 
 } // end anonymous namespace
@@ -499,18 +498,18 @@ ReSimplify:
     break;
   }
 
-  // TAILJMPd, TAILJMPd64 - Lower to the correct jump instructions.
-  case X86::TAILJMPr:
+  // TAILJMPd, TAILJMPd64, TailJMPd_cc - Lower to the correct jump instruction.
+  { unsigned Opcode;
+  case X86::TAILJMPr:   Opcode = X86::JMP32r; goto SetTailJmpOpcode;
   case X86::TAILJMPd:
-  case X86::TAILJMPd64: {
-    unsigned Opcode;
-    switch (OutMI.getOpcode()) {
-    default: llvm_unreachable("Invalid opcode");
-    case X86::TAILJMPr: Opcode = X86::JMP32r; break;
-    case X86::TAILJMPd:
-    case X86::TAILJMPd64: Opcode = X86::JMP_1; break;
-    }
-
+  case X86::TAILJMPd64: Opcode = X86::JMP_1;  goto SetTailJmpOpcode;
+  case X86::TAILJMPd_CC:
+  case X86::TAILJMPd64_CC:
+    Opcode = X86::GetCondBranchFromCond(
+        static_cast<X86::CondCode>(MI->getOperand(1).getImm()));
+    goto SetTailJmpOpcode;
+
+  SetTailJmpOpcode:
     MCOperand Saved = OutMI.getOperand(0);
     OutMI = MCInst();
     OutMI.setOpcode(Opcode);
@@ -979,8 +978,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
   PatchPointOpers opers(&MI);
   unsigned ScratchIdx = opers.getNextScratchIdx();
   unsigned EncodedBytes = 0;
-  const MachineOperand &CalleeMO =
-    opers.getMetaOper(PatchPointOpers::TargetPos);
+  const MachineOperand &CalleeMO = opers.getCallTarget();
 
   // Check for null target. If target is non-null (i.e. is non-zero or is
   // symbolic) then emit a call.
@@ -1016,7 +1014,7 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
   }
 
   // Emit padding.
-  unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm();
+  unsigned NumBytes = opers.getNumPatchBytes();
   assert(NumBytes >= EncodedBytes &&
          "Patchpoint can't request size less than the length of a call.");
 
@@ -1024,22 +1022,12 @@ void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI,
            getSubtargetInfo());
 }
 
-void X86AsmPrinter::recordSled(MCSymbol *Sled, const MachineInstr &MI,
-                               SledKind Kind) {
-  auto Fn = MI.getParent()->getParent()->getFunction();
-  auto Attr = Fn->getFnAttribute("function-instrument");
-  bool AlwaysInstrument =
-      Attr.isStringAttribute() && Attr.getValueAsString() == "xray-always";
-  Sleds.emplace_back(
-      XRayFunctionEntry{Sled, CurrentFnSym, Kind, AlwaysInstrument, Fn});
-}
-
 void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
                                                   X86MCInstLower &MCIL) {
   // We want to emit the following pattern:
   //
+  //   .p2align 1, ...
   // .Lxray_sled_N:
-  //   .palign 2, ...
   //   jmp .tmpN
   //   # 9 bytes worth of noops
   // .tmpN
@@ -1051,8 +1039,8 @@ void X86AsmPrinter::LowerPATCHABLE_FUNCTION_ENTER(const MachineInstr &MI,
   //   call <relative offset, 32-bits>   // 5 bytes
   //
   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitCodeAlignment(2);
   OutStreamer->EmitLabel(CurSled);
-  OutStreamer->EmitCodeAlignment(4);
   auto Target = OutContext.createTempSymbol();
 
   // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
@@ -1074,12 +1062,14 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
   //
   // We should emit the RET followed by sleds.
   //
+  //   .p2align 1, ...
   // .Lxray_sled_N:
   //   ret  # or equivalent instruction
   //   # 10 bytes worth of noops
   //
   // This just makes sure that the alignment for the next instruction is 2.
   auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitCodeAlignment(2);
   OutStreamer->EmitLabel(CurSled);
   unsigned OpCode = MI.getOperand(0).getImm();
   MCInst Ret;
@@ -1092,28 +1082,86 @@ void X86AsmPrinter::LowerPATCHABLE_RET(const MachineInstr &MI,
   recordSled(CurSled, MI, SledKind::FUNCTION_EXIT);
 }
 
+void X86AsmPrinter::LowerPATCHABLE_TAIL_CALL(const MachineInstr &MI, X86MCInstLower &MCIL) {
+  // Like PATCHABLE_RET, we have the actual instruction in the operands to this
+  // instruction so we lower that particular instruction and its operands.
+  // Unlike PATCHABLE_RET though, we put the sled before the JMP, much like how
+  // we do it for PATCHABLE_FUNCTION_ENTER. The sled should be very similar to
+  // the PATCHABLE_FUNCTION_ENTER case, followed by the lowering of the actual
+  // tail call much like how we have it in PATCHABLE_RET.
+  auto CurSled = OutContext.createTempSymbol("xray_sled_", true);
+  OutStreamer->EmitCodeAlignment(2);
+  OutStreamer->EmitLabel(CurSled);
+  auto Target = OutContext.createTempSymbol();
+
+  // Use a two-byte `jmp`. This version of JMP takes an 8-bit relative offset as
+  // an operand (computed as an offset from the jmp instruction).
+  // FIXME: Find another less hacky way do force the relative jump.
+  OutStreamer->EmitBytes("\xeb\x09");
+  EmitNops(*OutStreamer, 9, Subtarget->is64Bit(), getSubtargetInfo());
+  OutStreamer->EmitLabel(Target);
+  recordSled(CurSled, MI, SledKind::TAIL_CALL);
+
+  unsigned OpCode = MI.getOperand(0).getImm();
+  MCInst TC;
+  TC.setOpcode(OpCode);
+
+  // Before emitting the instruction, add a comment to indicate that this is
+  // indeed a tail call.
+  OutStreamer->AddComment("TAILCALL");
+  for (auto &MO : make_range(MI.operands_begin() + 1, MI.operands_end()))
+    if (auto MaybeOperand = MCIL.LowerMachineOperand(&MI, MO))
+      TC.addOperand(MaybeOperand.getValue());
+  OutStreamer->EmitInstruction(TC, getSubtargetInfo());
+}
+
 void X86AsmPrinter::EmitXRayTable() {
   if (Sleds.empty())
     return;
+  
+  auto PrevSection = OutStreamer->getCurrentSectionOnly();
+  auto Fn = MF->getFunction();
+  MCSection *Section = nullptr;
   if (Subtarget->isTargetELF()) {
-    auto *Section = OutContext.getELFSection(
-        "xray_instr_map", ELF::SHT_PROGBITS,
-        ELF::SHF_ALLOC | ELF::SHF_GROUP | ELF::SHF_MERGE, 0,
-        CurrentFnSym->getName());
-    auto PrevSection = OutStreamer->getCurrentSectionOnly();
-    OutStreamer->SwitchSection(Section);
-    for (const auto &Sled : Sleds) {
-      OutStreamer->EmitSymbolValue(Sled.Sled, 8);
-      OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
-      auto Kind = static_cast<uint8_t>(Sled.Kind);
-      OutStreamer->EmitBytes(
-          StringRef(reinterpret_cast<const char *>(&Kind), 1));
-      OutStreamer->EmitBytes(
-          StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
-      OutStreamer->EmitZeros(14);
+    if (Fn->hasComdat()) {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC | ELF::SHF_GROUP, 0,
+                                         Fn->getComdat()->getName());
+    } else {
+      Section = OutContext.getELFSection("xray_instr_map", ELF::SHT_PROGBITS,
+                                         ELF::SHF_ALLOC);
     }
-    OutStreamer->SwitchSection(PrevSection);
+  } else if (Subtarget->isTargetMachO()) {
+    Section = OutContext.getMachOSection("__DATA", "xray_instr_map", 0,
+                                         SectionKind::getReadOnlyWithRel());
+  } else {
+    llvm_unreachable("Unsupported target");
   }
+
+  // Before we switch over, we force a reference to a label inside the
+  // xray_instr_map section. Since EmitXRayTable() is always called just
+  // before the function's end, we assume that this is happening after the
+  // last return instruction.
+  //
+  // We then align the reference to 16 byte boundaries, which we determined
+  // experimentally to be beneficial to avoid causing decoder stalls.
+  MCSymbol *Tmp = OutContext.createTempSymbol("xray_synthetic_", true);
+  OutStreamer->EmitCodeAlignment(16);
+  OutStreamer->EmitSymbolValue(Tmp, 8, false);
+  OutStreamer->SwitchSection(Section);
+  OutStreamer->EmitLabel(Tmp);
+  for (const auto &Sled : Sleds) {
+    OutStreamer->EmitSymbolValue(Sled.Sled, 8);
+    OutStreamer->EmitSymbolValue(CurrentFnSym, 8);
+    auto Kind = static_cast<uint8_t>(Sled.Kind);
+    OutStreamer->EmitBytes(
+        StringRef(reinterpret_cast<const char *>(&Kind), 1));
+    OutStreamer->EmitBytes(
+        StringRef(reinterpret_cast<const char *>(&Sled.AlwaysInstrument), 1));
+    OutStreamer->EmitZeros(14);
+  }
+  OutStreamer->SwitchSection(PrevSection);
+
   Sleds.clear();
 }
 
@@ -1152,9 +1200,9 @@ static const Constant *getConstantFromPool(const MachineInstr &MI,
   return C;
 }
 
-static std::string getShuffleComment(const MachineOperand &DstOp,
-                                     const MachineOperand &SrcOp1,
-                                     const MachineOperand &SrcOp2,
+static std::string getShuffleComment(const MachineInstr *MI,
+                                     unsigned SrcOp1Idx,
+                                     unsigned SrcOp2Idx,
                                      ArrayRef<int> Mask) {
   std::string Comment;
 
@@ -1167,7 +1215,10 @@ static std::string getShuffleComment(const MachineOperand &DstOp,
     return X86ATTInstPrinter::getRegisterName(RegNum);
   };
 
-  // TODO: Add support for specifying an AVX512 style mask register in the comment.
+  const MachineOperand &DstOp = MI->getOperand(0);
+  const MachineOperand &SrcOp1 = MI->getOperand(SrcOp1Idx);
+  const MachineOperand &SrcOp2 = MI->getOperand(SrcOp2Idx);
+
   StringRef DstName = DstOp.isReg() ? GetRegisterName(DstOp.getReg()) : "mem";
   StringRef Src1Name =
       SrcOp1.isReg() ? GetRegisterName(SrcOp1.getReg()) : "mem";
@@ -1182,7 +1233,26 @@ static std::string getShuffleComment(const MachineOperand &DstOp,
         ShuffleMask[i] -= e;
 
   raw_string_ostream CS(Comment);
-  CS << DstName << " = ";
+  CS << DstName;
+
+  // Handle AVX512 MASK/MASXZ write mask comments.
+  // MASK: zmmX {%kY}
+  // MASKZ: zmmX {%kY} {z}
+  if (SrcOp1Idx > 1) {
+    assert((SrcOp1Idx == 2 || SrcOp1Idx == 3) && "Unexpected writemask");
+
+    const MachineOperand &WriteMaskOp = MI->getOperand(SrcOp1Idx - 1);
+    if (WriteMaskOp.isReg()) {
+      CS << " {%" << GetRegisterName(WriteMaskOp.getReg()) << "}";
+
+      if (SrcOp1Idx == 2) {
+        CS << " {z}";
+      }
+    }
+  }
+
+  CS << " = ";
+
   for (int i = 0, e = ShuffleMask.size(); i != e; ++i) {
     if (i != 0)
       CS << ",";
@@ -1221,6 +1291,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   X86MCInstLower MCInstLowering(*MF, *this);
   const X86RegisterInfo *RI = MF->getSubtarget<X86Subtarget>().getRegisterInfo();
 
+  // Add a comment about EVEX-2-VEX compression for AVX-512 instrs that
+  // are compressed from EVEX encoding to VEX encoding.
+  if (TM.Options.MCOptions.ShowMCEncoding) {
+    if (MI->getAsmPrinterFlags() & AC_EVEX_2_VEX)
+      OutStreamer->AddComment("EVEX TO VEX Compression ", false);
+  }
+
   switch (MI->getOpcode()) {
   case TargetOpcode::DBG_VALUE:
     llvm_unreachable("Should be handled target independently");
@@ -1254,12 +1331,13 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::TAILJMPr:
   case X86::TAILJMPm:
   case X86::TAILJMPd:
+  case X86::TAILJMPd_CC:
   case X86::TAILJMPr64:
   case X86::TAILJMPm64:
   case X86::TAILJMPd64:
+  case X86::TAILJMPd64_CC:
   case X86::TAILJMPr64_REX:
   case X86::TAILJMPm64_REX:
-  case X86::TAILJMPd64_REX:
     // Lower these as normal, but add some comments.
     OutStreamer->AddComment("TAILCALL");
     break;
@@ -1364,6 +1442,9 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case TargetOpcode::PATCHABLE_RET:
     return LowerPATCHABLE_RET(*MI, MCInstLowering);
 
+  case TargetOpcode::PATCHABLE_TAIL_CALL:
+    return LowerPATCHABLE_TAIL_CALL(*MI, MCInstLowering);
+
   case X86::MORESTACK_RET:
     EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget)));
     return;
@@ -1377,37 +1458,45 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     return;
 
   case X86::SEH_PushReg:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm()));
     return;
 
   case X86::SEH_SaveReg:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()),
                                    MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_SaveXMM:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()),
                                    MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_StackAlloc:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIAllocStack(MI->getOperand(0).getImm());
     return;
 
   case X86::SEH_SetFrame:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()),
                                     MI->getOperand(1).getImm());
     return;
 
   case X86::SEH_PushFrame:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIPushFrame(MI->getOperand(0).getImm());
     return;
 
   case X86::SEH_EndPrologue:
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     OutStreamer->EmitWinCFIEndProlog();
     return;
 
   case X86::SEH_Epilogue: {
+    assert(MF->hasWinCFI() && "SEH_ instruction in function without WinCFI?");
     MachineBasicBlock::const_iterator MBBI(MI);
     // Check if preceded by a call and emit nop if so.
     for (MBBI = PrevCrossBBInst(MBBI);
@@ -1463,59 +1552,84 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
     assert(MI->getNumOperands() >= 6 &&
            "We should always have at least 6 operands!");
-    const MachineOperand &DstOp = MI->getOperand(0);
-    const MachineOperand &SrcOp = MI->getOperand(SrcIdx);
-    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
 
+    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
-      SmallVector<int, 16> Mask;
+      SmallVector<int, 64> Mask;
       DecodePSHUFBMask(C, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
     }
     break;
   }
 
+  case X86::VPERMILPSrm:
+  case X86::VPERMILPSYrm:
+  case X86::VPERMILPSZ128rm:
+  case X86::VPERMILPSZ128rmk:
+  case X86::VPERMILPSZ128rmkz:
+  case X86::VPERMILPSZ256rm:
+  case X86::VPERMILPSZ256rmk:
+  case X86::VPERMILPSZ256rmkz:
+  case X86::VPERMILPSZrm:
+  case X86::VPERMILPSZrmk:
+  case X86::VPERMILPSZrmkz:
   case X86::VPERMILPDrm:
   case X86::VPERMILPDYrm:
   case X86::VPERMILPDZ128rm:
+  case X86::VPERMILPDZ128rmk:
+  case X86::VPERMILPDZ128rmkz:
   case X86::VPERMILPDZ256rm:
-  case X86::VPERMILPDZrm: {
+  case X86::VPERMILPDZ256rmk:
+  case X86::VPERMILPDZ256rmkz:
+  case X86::VPERMILPDZrm:
+  case X86::VPERMILPDZrmk:
+  case X86::VPERMILPDZrmkz: {
     if (!OutStreamer->isVerboseAsm())
       break;
-    assert(MI->getNumOperands() > 5 &&
-           "We should always have at least 5 operands!");
-    const MachineOperand &DstOp = MI->getOperand(0);
-    const MachineOperand &SrcOp = MI->getOperand(1);
-    const MachineOperand &MaskOp = MI->getOperand(5);
-
-    if (auto *C = getConstantFromPool(*MI, MaskOp)) {
-      SmallVector<int, 8> Mask;
-      DecodeVPERMILPMask(C, 64, Mask);
-      if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+    unsigned SrcIdx, MaskIdx;
+    unsigned ElSize;
+    switch (MI->getOpcode()) {
+    default: llvm_unreachable("Invalid opcode");
+    case X86::VPERMILPSrm:
+    case X86::VPERMILPSYrm:
+    case X86::VPERMILPSZ128rm:
+    case X86::VPERMILPSZ256rm:
+    case X86::VPERMILPSZrm:
+      SrcIdx = 1; MaskIdx = 5; ElSize = 32; break;
+    case X86::VPERMILPSZ128rmkz:
+    case X86::VPERMILPSZ256rmkz:
+    case X86::VPERMILPSZrmkz:
+      SrcIdx = 2; MaskIdx = 6; ElSize = 32; break;
+    case X86::VPERMILPSZ128rmk:
+    case X86::VPERMILPSZ256rmk:
+    case X86::VPERMILPSZrmk:
+      SrcIdx = 3; MaskIdx = 7; ElSize = 32; break;
+    case X86::VPERMILPDrm:
+    case X86::VPERMILPDYrm:
+    case X86::VPERMILPDZ128rm:
+    case X86::VPERMILPDZ256rm:
+    case X86::VPERMILPDZrm:
+      SrcIdx = 1; MaskIdx = 5; ElSize = 64; break;
+    case X86::VPERMILPDZ128rmkz:
+    case X86::VPERMILPDZ256rmkz:
+    case X86::VPERMILPDZrmkz:
+      SrcIdx = 2; MaskIdx = 6; ElSize = 64; break;
+    case X86::VPERMILPDZ128rmk:
+    case X86::VPERMILPDZ256rmk:
+    case X86::VPERMILPDZrmk:
+      SrcIdx = 3; MaskIdx = 7; ElSize = 64; break;
     }
-    break;
-  }
 
-  case X86::VPERMILPSrm:
-  case X86::VPERMILPSYrm:
-  case X86::VPERMILPSZ128rm:
-  case X86::VPERMILPSZ256rm:
-  case X86::VPERMILPSZrm: {
-    if (!OutStreamer->isVerboseAsm())
-      break;
-    assert(MI->getNumOperands() > 5 &&
-           "We should always have at least 5 operands!");
-    const MachineOperand &DstOp = MI->getOperand(0);
-    const MachineOperand &SrcOp = MI->getOperand(1);
-    const MachineOperand &MaskOp = MI->getOperand(5);
+    assert(MI->getNumOperands() >= 6 &&
+           "We should always have at least 6 operands!");
 
+    const MachineOperand &MaskOp = MI->getOperand(MaskIdx);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
       SmallVector<int, 16> Mask;
-      DecodeVPERMILPMask(C, 32, Mask);
+      DecodeVPERMILPMask(C, ElSize, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp, SrcOp, Mask));
+        OutStreamer->AddComment(getShuffleComment(MI, SrcIdx, SrcIdx, Mask));
     }
     break;
   }
@@ -1526,14 +1640,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::VPERMIL2PSrmY: {
     if (!OutStreamer->isVerboseAsm())
       break;
-    assert(MI->getNumOperands() > 7 &&
-      "We should always have at least 7 operands!");
-    const MachineOperand &DstOp = MI->getOperand(0);
-    const MachineOperand &SrcOp1 = MI->getOperand(1);
-    const MachineOperand &SrcOp2 = MI->getOperand(2);
-    const MachineOperand &MaskOp = MI->getOperand(6);
-    const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
+    assert(MI->getNumOperands() >= 8 &&
+           "We should always have at least 8 operands!");
 
+    const MachineOperand &CtrlOp = MI->getOperand(MI->getNumOperands() - 1);
     if (!CtrlOp.isImm())
       break;
 
@@ -1544,11 +1654,12 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
     case X86::VPERMIL2PDrm: case X86::VPERMIL2PDrmY: ElSize = 64; break;
     }
 
+    const MachineOperand &MaskOp = MI->getOperand(6);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
       SmallVector<int, 16> Mask;
       DecodeVPERMIL2PMask(C, (unsigned)CtrlOp.getImm(), ElSize, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
+        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
     }
     break;
   }
@@ -1556,18 +1667,15 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   case X86::VPPERMrrm: {
     if (!OutStreamer->isVerboseAsm())
       break;
-    assert(MI->getNumOperands() > 6 &&
-           "We should always have at least 6 operands!");
-    const MachineOperand &DstOp = MI->getOperand(0);
-    const MachineOperand &SrcOp1 = MI->getOperand(1);
-    const MachineOperand &SrcOp2 = MI->getOperand(2);
-    const MachineOperand &MaskOp = MI->getOperand(6);
+    assert(MI->getNumOperands() >= 7 &&
+           "We should always have at least 7 operands!");
 
+    const MachineOperand &MaskOp = MI->getOperand(6);
     if (auto *C = getConstantFromPool(*MI, MaskOp)) {
       SmallVector<int, 16> Mask;
       DecodeVPPERMMask(C, Mask);
       if (!Mask.empty())
-        OutStreamer->AddComment(getShuffleComment(DstOp, SrcOp1, SrcOp2, Mask));
+        OutStreamer->AddComment(getShuffleComment(MI, 1, 2, Mask));
     }
     break;
   }
@@ -1605,7 +1713,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
   CASE_ALL_MOV_RM()
     if (!OutStreamer->isVerboseAsm())
       break;
-    if (MI->getNumOperands() > 4)
+    if (MI->getNumOperands() <= 4)
+      break;
     if (auto *C = getConstantFromPool(*MI, MI->getOperand(4))) {
       std::string Comment;
       raw_string_ostream CS(Comment);
diff --git a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
index 4da0fddda395..e1447006cd18 100644
--- a/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/contrib/llvm/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -44,12 +44,6 @@ static cl::opt<bool>
 STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
 STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
 
-class MemOpKey;
-
-/// \brief Returns a hash table key based on memory operands of \p MI. The
-/// number of the first memory operand of \p MI is specified through \p N.
-static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N);
-
 /// \brief Returns true if two machine operands are identical and they are not
 /// physical registers.
 static inline bool isIdenticalOp(const MachineOperand &MO1,
@@ -63,6 +57,7 @@ static bool isSimilarDispOp(const MachineOperand &MO1,
 /// \brief Returns true if the instruction is LEA.
 static inline bool isLEA(const MachineInstr &MI);
 
+namespace {
 /// A key based on instruction's memory operands.
 class MemOpKey {
 public:
@@ -95,6 +90,7 @@ public:
   // Address' displacement operand.
   const MachineOperand *Disp;
 };
+} // end anonymous namespace
 
 /// Provide DenseMapInfo for MemOpKey.
 namespace llvm {
@@ -168,6 +164,8 @@ template <> struct DenseMapInfo<MemOpKey> {
 };
 }
 
+/// \brief Returns a hash table key based on memory operands of \p MI. The
+/// number of the first memory operand of \p MI is specified through \p N.
 static inline MemOpKey getMemOpKey(const MachineInstr &MI, unsigned N) {
   assert((isLEA(MI) || MI.mayLoadOrStore()) &&
          "The instruction must be a LEA, a load or a store");
@@ -221,7 +219,7 @@ class OptimizeLEAPass : public MachineFunctionPass {
 public:
   OptimizeLEAPass() : MachineFunctionPass(ID) {}
 
-  const char *getPassName() const override { return "X86 LEA Optimize"; }
+  StringRef getPassName() const override { return "X86 LEA Optimize"; }
 
   /// \brief Loop over all of the basic blocks, replacing address
   /// calculations in load and store instructions, if it's already
@@ -237,7 +235,7 @@ private:
 
   /// \brief Choose the best \p LEA instruction from the \p List to replace
   /// address calculation in \p MI instruction. Return the address displacement
-  /// and the distance between \p MI and the choosen \p BestLEA in
+  /// and the distance between \p MI and the chosen \p BestLEA in
   /// \p AddrDispShift and \p Dist.
   bool chooseBestLEA(const SmallVectorImpl<MachineInstr *> &List,
                      const MachineInstr &MI, MachineInstr *&BestLEA,
@@ -551,10 +549,10 @@ bool OptimizeLEAPass::removeRedundantLEAs(MemOpMap &LEAs) {
         MachineInstr &Last = **I2;
         int64_t AddrDispShift;
 
-        // LEAs should be in occurence order in the list, so we can freely
+        // LEAs should be in occurrence order in the list, so we can freely
         // replace later LEAs with earlier ones.
         assert(calcInstrDist(First, Last) > 0 &&
-               "LEAs must be in occurence order in the list");
+               "LEAs must be in occurrence order in the list");
 
         // Check that the Last LEA instruction can be replaced by the First.
         if (!isReplaceable(First, Last, AddrDispShift)) {
diff --git a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 62a9aafc2cf3..3069d1fd3497 100644
--- a/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/contrib/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -57,10 +57,10 @@ namespace {
 
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "X86 Atom pad short functions";
     }
 
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 86750633aecc..65f438f94b04 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -128,21 +128,44 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
   if (RC == &X86::GR8_NOREXRegClass)
     return RC;
 
+  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
+
   const TargetRegisterClass *Super = RC;
   TargetRegisterClass::sc_iterator I = RC->getSuperClasses();
   do {
     switch (Super->getID()) {
+    case X86::FR32RegClassID:
+    case X86::FR64RegClassID:
+      // If AVX-512 isn't supported we should only inflate to these classes.
+      if (!Subtarget.hasAVX512() && Super->getSize() == RC->getSize())
+        return Super;
+      break;
+    case X86::VR128RegClassID:
+    case X86::VR256RegClassID:
+      // If VLX isn't supported we should only inflate to these classes.
+      if (!Subtarget.hasVLX() && Super->getSize() == RC->getSize())
+        return Super;
+      break;
+    case X86::VR128XRegClassID:
+    case X86::VR256XRegClassID:
+      // If VLX isn't support we shouldn't inflate to these classes.
+      if (Subtarget.hasVLX() && Super->getSize() == RC->getSize())
+        return Super;
+      break;
+    case X86::FR32XRegClassID:
+    case X86::FR64XRegClassID:
+      // If AVX-512 isn't support we shouldn't inflate to these classes.
+      if (Subtarget.hasAVX512() && Super->getSize() == RC->getSize())
+        return Super;
+      break;
     case X86::GR8RegClassID:
     case X86::GR16RegClassID:
     case X86::GR32RegClassID:
     case X86::GR64RegClassID:
-    case X86::FR32RegClassID:
-    case X86::FR64RegClassID:
     case X86::RFP32RegClassID:
     case X86::RFP64RegClassID:
     case X86::RFP80RegClassID:
-    case X86::VR128RegClassID:
-    case X86::VR256RegClassID:
+    case X86::VR512RegClassID:
       // Don't return a super-class that would shrink the spill size.
       // That can happen with the vector and float classes.
       if (Super->getSize() == RC->getSize())
@@ -241,13 +264,14 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
 
 const MCPhysReg *
 X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+  assert(MF && "MachineFunction required");
+
   const X86Subtarget &Subtarget = MF->getSubtarget<X86Subtarget>();
   bool HasSSE = Subtarget.hasSSE1();
   bool HasAVX = Subtarget.hasAVX();
   bool HasAVX512 = Subtarget.hasAVX512();
-  bool CallsEHReturn = MF->getMMI().callsEHReturn();
+  bool CallsEHReturn = MF->callsEHReturn();
 
-  assert(MF && "MachineFunction required");
   switch (MF->getFunction()->getCallingConv()) {
   case CallingConv::GHC:
   case CallingConv::HiPE:
@@ -282,11 +306,26 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   }
   case CallingConv::HHVM:
     return CSR_64_HHVM_SaveList;
+  case CallingConv::X86_RegCall:
+    if (Is64Bit) {
+      if (IsWin64) {
+        return (HasSSE ? CSR_Win64_RegCall_SaveList : 
+                         CSR_Win64_RegCall_NoSSE_SaveList);
+      } else {
+        return (HasSSE ? CSR_SysV64_RegCall_SaveList : 
+                         CSR_SysV64_RegCall_NoSSE_SaveList);
+      }
+    } else {
+      return (HasSSE ? CSR_32_RegCall_SaveList : 
+                       CSR_32_RegCall_NoSSE_SaveList);
+    }
   case CallingConv::Cold:
     if (Is64Bit)
       return CSR_64_MostRegs_SaveList;
     break;
   case CallingConv::X86_64_Win64:
+    if (!HasSSE)
+      return CSR_Win64_NoSSE_SaveList;
     return CSR_Win64_SaveList;
   case CallingConv::X86_64_SysV:
     if (CallsEHReturn)
@@ -313,8 +352,11 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
   }
 
   if (Is64Bit) {
-    if (IsWin64)
+    if (IsWin64) {
+      if (!HasSSE)
+        return CSR_Win64_NoSSE_SaveList;
       return CSR_Win64_SaveList;
+    }
     if (CallsEHReturn)
       return CSR_64EHRet_SaveList;
     if (Subtarget.getTargetLowering()->supportSwiftError() &&
@@ -378,6 +420,19 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
   }
   case CallingConv::HHVM:
     return CSR_64_HHVM_RegMask;
+  case CallingConv::X86_RegCall:
+    if (Is64Bit) {
+      if (IsWin64) { 
+        return (HasSSE ? CSR_Win64_RegCall_RegMask : 
+                         CSR_Win64_RegCall_NoSSE_RegMask);
+      } else {
+        return (HasSSE ? CSR_SysV64_RegCall_RegMask : 
+                         CSR_SysV64_RegCall_NoSSE_RegMask);
+      }
+    } else {
+      return (HasSSE ? CSR_32_RegCall_RegMask : 
+                       CSR_32_RegCall_NoSSE_RegMask);
+    }
   case CallingConv::Cold:
     if (Is64Bit)
       return CSR_64_MostRegs_RegMask;
@@ -503,6 +558,8 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
     }
   }
 
+  assert(checkAllSuperRegsMarked(Reserved,
+                                 {X86::SIL, X86::DIL, X86::BPL, X86::SPL}));
   return Reserved;
 }
 
@@ -526,12 +583,12 @@ void X86RegisterInfo::adjustStackMapLiveOutMask(uint32_t *Mask) const {
 // Stack Frame Processing methods
 //===----------------------------------------------------------------------===//
 
-static bool CantUseSP(const MachineFrameInfo *MFI) {
-  return MFI->hasVarSizedObjects() || MFI->hasOpaqueSPAdjustment();
+static bool CantUseSP(const MachineFrameInfo &MFI) {
+  return MFI.hasVarSizedObjects() || MFI.hasOpaqueSPAdjustment();
 }
 
 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-   const MachineFrameInfo *MFI = MF.getFrameInfo();
+   const MachineFrameInfo &MFI = MF.getFrameInfo();
 
    if (!EnableBasePointer)
      return false;
@@ -549,7 +606,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
   if (!TargetRegisterInfo::canRealignStack(MF))
     return false;
 
-  const MachineFrameInfo *MFI = MF.getFrameInfo();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
   const MachineRegisterInfo *MRI = &MF.getRegInfo();
 
   // Stack realignment requires a frame pointer.  If we already started
@@ -571,6 +628,35 @@ bool X86RegisterInfo::hasReservedSpillSlot(const MachineFunction &MF,
   llvm_unreachable("Unused function on X86. Otherwise need a test case.");
 }
 
+// tryOptimizeLEAtoMOV - helper function that tries to replace a LEA instruction
+// of the form 'lea (%esp), %ebx' --> 'mov %esp, %ebx'.
+// TODO: In this case we should be really trying first to entirely eliminate
+// this instruction which is a plain copy.
+static bool tryOptimizeLEAtoMOV(MachineBasicBlock::iterator II) {
+  MachineInstr &MI = *II;
+  unsigned Opc = II->getOpcode();
+  // Check if this is a LEA of the form 'lea (%esp), %ebx'
+  if ((Opc != X86::LEA32r && Opc != X86::LEA64r && Opc != X86::LEA64_32r) ||
+      MI.getOperand(2).getImm() != 1 ||
+      MI.getOperand(3).getReg() != X86::NoRegister ||
+      MI.getOperand(4).getImm() != 0 ||
+      MI.getOperand(5).getReg() != X86::NoRegister)
+    return false;
+  unsigned BasePtr = MI.getOperand(1).getReg();
+  // In X32 mode, ensure the base-pointer is a 32-bit operand, so the LEA will
+  // be replaced with a 32-bit operand MOV which will zero extend the upper
+  // 32-bits of the super register.
+  if (Opc == X86::LEA64_32r)
+    BasePtr = getX86SubSuperRegister(BasePtr, 32);
+  unsigned NewDestReg = MI.getOperand(0).getReg();
+  const X86InstrInfo *TII =
+      MI.getParent()->getParent()->getSubtarget<X86Subtarget>().getInstrInfo();
+  TII->copyPhysReg(*MI.getParent(), II, MI.getDebugLoc(), NewDestReg, BasePtr,
+                   MI.getOperand(1).isKill());
+  MI.eraseFromParent();
+  return true;
+}
+
 void
 X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
                                      int SPAdj, unsigned FIOperandNum,
@@ -611,19 +697,21 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   // For LEA64_32r when BasePtr is 32-bits (X32) we can use full-size 64-bit
   // register as source operand, semantic is the same and destination is
   // 32-bits. It saves one byte per lea in code since 0x67 prefix is avoided.
+  // Don't change BasePtr since it is used later for stack adjustment.
+  unsigned MachineBasePtr = BasePtr;
   if (Opc == X86::LEA64_32r && X86::GR32RegClass.contains(BasePtr))
-    BasePtr = getX86SubSuperRegister(BasePtr, 64);
+    MachineBasePtr = getX86SubSuperRegister(BasePtr, 64);
 
   // This must be part of a four operand memory reference.  Replace the
-  // FrameIndex with base register with EBP.  Add an offset to the offset.
-  MI.getOperand(FIOperandNum).ChangeToRegister(BasePtr, false);
+  // FrameIndex with base register.  Add an offset to the offset.
+  MI.getOperand(FIOperandNum).ChangeToRegister(MachineBasePtr, false);
 
   // Now add the frame object offset to the offset from EBP.
   int FIOffset;
   if (AfterFPPop) {
     // Tail call jmp happens after FP is popped.
-    const MachineFrameInfo *MFI = MF.getFrameInfo();
-    FIOffset = MFI->getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
+    const MachineFrameInfo &MFI = MF.getFrameInfo();
+    FIOffset = MFI.getObjectOffset(FrameIndex) - TFI->getOffsetOfLocalArea();
   } else
     FIOffset = TFI->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
 
@@ -645,7 +733,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
     int Offset = FIOffset + Imm;
     assert((!Is64Bit || isInt<32>((long long)FIOffset + Imm)) &&
            "Requesting 64-bit offset in 32-bit immediate!");
-    MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
+    if (Offset != 0 || !tryOptimizeLEAtoMOV(II))
+      MI.getOperand(FIOperandNum + 3).ChangeToImmediate(Offset);
   } else {
     // Offset is symbolic. This is extremely rare.
     uint64_t Offset = FIOffset +
@@ -667,13 +756,3 @@ X86RegisterInfo::getPtrSizedFrameRegister(const MachineFunction &MF) const {
     FrameReg = getX86SubSuperRegister(FrameReg, 32);
   return FrameReg;
 }
-
-unsigned llvm::get512BitSuperRegister(unsigned Reg) {
-  if (Reg >= X86::XMM0 && Reg <= X86::XMM31)
-    return X86::ZMM0 + (Reg - X86::XMM0);
-  if (Reg >= X86::YMM0 && Reg <= X86::YMM31)
-    return X86::ZMM0 + (Reg - X86::YMM0);
-  if (Reg >= X86::ZMM0 && Reg <= X86::ZMM31)
-    return Reg;
-  llvm_unreachable("Unexpected SIMD register");
-}
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
index 8d0094cbf3d6..58fa31e94fba 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.h
@@ -100,7 +100,7 @@ public:
   const MCPhysReg *
   getCalleeSavedRegs(const MachineFunction* MF) const override;
   const MCPhysReg *
-  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
+  getCalleeSavedRegsViaCopy(const MachineFunction *MF) const;
   const uint32_t *getCallPreservedMask(const MachineFunction &MF,
                                        CallingConv::ID) const override;
   const uint32_t *getNoPreservedMask() const override;
@@ -137,9 +137,6 @@ public:
   unsigned getSlotSize() const { return SlotSize; }
 };
 
-//get512BitRegister - X86 utility - returns 512-bit super register
-unsigned get512BitSuperRegister(unsigned Reg);
-
 } // End llvm namespace
 
 #endif
diff --git a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
index 373f9b4c65f2..372a15aff15a 100644
--- a/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
+++ b/contrib/llvm/lib/Target/X86/X86RegisterInfo.td
@@ -345,6 +345,8 @@ def GR32 : RegisterClass<"X86", [i32], 32,
 // GR64 - 64-bit GPRs. This oddly includes RIP, which isn't accurate, since
 // RIP isn't really a register and it can't be used anywhere except in an
 // address, but it doesn't cause trouble.
+// FIXME: it *does* cause trouble - CheckBaseRegAndIndexReg() has extra
+// tests because of the inclusion of RIP in this register class.
 def GR64 : RegisterClass<"X86", [i64], 64,
                          (add RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
                               RBX, R14, R15, R12, R13, RBP, RSP, RIP)>;
diff --git a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
index d02859b3dcbd..f031a281e5dd 100644
--- a/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.cpp
@@ -31,8 +31,8 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible(
   // alignment requirements.  Fall back to generic code if there are any
   // dynamic stack adjustments (hopefully rare) and the base pointer would
   // conflict if we had to use it.
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
-  if (!MFI->hasVarSizedObjects() && !MFI->hasOpaqueSPAdjustment())
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+  if (!MFI.hasVarSizedObjects() && !MFI.hasOpaqueSPAdjustment())
     return false;
 
   const X86RegisterInfo *TRI = static_cast<const X86RegisterInfo *>(
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 1adc92cfda63..11115524c810 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -14,6 +14,7 @@
 
 #include "X86ShuffleDecodeConstantPool.h"
 #include "Utils/X86ShuffleDecode.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/CodeGen/MachineValueType.h"
 #include "llvm/IR/Constants.h"
 
@@ -23,10 +24,12 @@
 
 namespace llvm {
 
-void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  // It is not an error for the PSHUFB mask to not be a vector of i8 because the
-  // constant pool uniques constants by their bit representation.
+static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
+                                SmallBitVector &UndefElts,
+                                SmallVectorImpl<uint64_t> &RawMask) {
+  // It is not an error for shuffle masks to not be a vector of
+  // MaskEltSizeInBits because the constant pool uniques constants by their
+  // bit representation.
   // e.g. the following take up the same space in the constant pool:
   //   i128 -170141183420855150465331762880109871104
   //
@@ -34,165 +37,161 @@ void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
   //
   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
   //              i32 -2147483648, i32 -2147483648>
+  Type *CstTy = C->getType();
+  if (!CstTy->isVectorTy())
+    return false;
+
+  Type *CstEltTy = CstTy->getVectorElementType();
+  if (!CstEltTy->isIntegerTy())
+    return false;
+
+  unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
+  unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
+  unsigned NumCstElts = CstTy->getVectorNumElements();
+
+  // Extract all the undef/constant element data and pack into single bitsets.
+  APInt UndefBits(CstSizeInBits, 0);
+  APInt MaskBits(CstSizeInBits, 0);
+  for (unsigned i = 0; i != NumCstElts; ++i) {
+    Constant *COp = C->getAggregateElement(i);
+    if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp)))
+      return false;
 
-#ifndef NDEBUG
-  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
-#endif
+    if (isa<UndefValue>(COp)) {
+      APInt EltUndef = APInt::getLowBitsSet(CstSizeInBits, CstEltSizeInBits);
+      UndefBits |= EltUndef.shl(i * CstEltSizeInBits);
+      continue;
+    }
 
-  if (!MaskTy->isVectorTy())
-    return;
-  int NumElts = MaskTy->getVectorNumElements();
+    APInt EltBits = cast<ConstantInt>(COp)->getValue();
+    EltBits = EltBits.zextOrTrunc(CstSizeInBits);
+    MaskBits |= EltBits.shl(i * CstEltSizeInBits);
+  }
 
-  Type *EltTy = MaskTy->getVectorElementType();
-  if (!EltTy->isIntegerTy())
-    return;
+  // Now extract the undef/constant bit data into the raw shuffle masks.
+  assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
+         "Unaligned shuffle mask size");
 
-  // The shuffle mask requires a byte vector - decode cases with
-  // wider elements as well.
-  unsigned BitWidth = cast<IntegerType>(EltTy)->getBitWidth();
-  if ((BitWidth % 8) != 0)
+  unsigned NumMaskElts = CstSizeInBits / MaskEltSizeInBits;
+  UndefElts = SmallBitVector(NumMaskElts, false);
+  RawMask.resize(NumMaskElts, 0);
+
+  for (unsigned i = 0; i != NumMaskElts; ++i) {
+    APInt EltUndef = UndefBits.lshr(i * MaskEltSizeInBits);
+    EltUndef = EltUndef.zextOrTrunc(MaskEltSizeInBits);
+
+    // Only treat the element as UNDEF if all bits are UNDEF, otherwise
+    // treat it as zero.
+    if (EltUndef.isAllOnesValue()) {
+      UndefElts[i] = true;
+      RawMask[i] = 0;
+      continue;
+    }
+
+    APInt EltBits = MaskBits.lshr(i * MaskEltSizeInBits);
+    EltBits = EltBits.zextOrTrunc(MaskEltSizeInBits);
+    RawMask[i] = EltBits.getZExtValue();
+  }
+
+  return true;
+}
+
+void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
+  Type *MaskTy = C->getType();
+  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+  (void)MaskTySize;
+  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+         "Unexpected vector size.");
+
+  // The shuffle mask requires a byte vector.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 32> RawMask;
+  if (!extractConstantMask(C, 8, UndefElts, RawMask))
     return;
 
-  int Scale = BitWidth / 8;
-  int NumBytes = NumElts * Scale;
-  ShuffleMask.reserve(NumBytes);
+  unsigned NumElts = RawMask.size();
+  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
+         "Unexpected number of vector elements.");
 
-  for (int i = 0; i != NumElts; ++i) {
-    Constant *COp = C->getAggregateElement(i);
-    if (!COp) {
-      ShuffleMask.clear();
-      return;
-    } else if (isa<UndefValue>(COp)) {
-      ShuffleMask.append(Scale, SM_SentinelUndef);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
 
-    APInt APElt = cast<ConstantInt>(COp)->getValue();
-    for (int j = 0; j != Scale; ++j) {
+    uint64_t Element = RawMask[i];
+    // If the high bit (7) of the byte is set, the element is zeroed.
+    if (Element & (1 << 7))
+      ShuffleMask.push_back(SM_SentinelZero);
+    else {
       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
       // lane of the vector we're inside.
-      int Base = ((i * Scale) + j) & ~0xf;
-
-      uint64_t Element = APElt.getLoBits(8).getZExtValue();
-      APElt = APElt.lshr(8);
-
-      // If the high bit (7) of the byte is set, the element is zeroed.
-      if (Element & (1 << 7))
-        ShuffleMask.push_back(SM_SentinelZero);
-      else {
-        // Only the least significant 4 bits of the byte are used.
-        int Index = Base + (Element & 0xf);
-        ShuffleMask.push_back(Index);
-      }
+      unsigned Base = i & ~0xf;
+
+      // Only the least significant 4 bits of the byte are used.
+      int Index = Base + (Element & 0xf);
+      ShuffleMask.push_back(Index);
     }
   }
-
-  assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
 }
 
 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
                         SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
-  // It is not an error for the PSHUFB mask to not be a vector of i8 because the
-  // constant pool uniques constants by their bit representation.
-  // e.g. the following take up the same space in the constant pool:
-  //   i128 -170141183420855150465331762880109871104
-  //
-  //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
-  //
-  //   <4 x i32> <i32 -2147483648, i32 -2147483648,
-  //              i32 -2147483648, i32 -2147483648>
-
-  if (ElSize != 32 && ElSize != 64)
-    return;
-
   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  if (MaskTySize != 128 && MaskTySize != 256 && MaskTySize != 512)
-    return;
-
-  // Only support vector types.
-  if (!MaskTy->isVectorTy())
+  (void)MaskTySize;
+  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+         "Unexpected vector size.");
+  assert((ElSize == 32 || ElSize == 64) && "Unexpected vector element size.");
+
+  // The shuffle mask requires elements the same size as the target.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 8> RawMask;
+  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
 
-  // Make sure its an integer type.
-  Type *VecEltTy = MaskTy->getVectorElementType();
-  if (!VecEltTy->isIntegerTy())
-    return;
-
-  // Support any element type from byte up to element size.
-  // This is necessary primarily because 64-bit elements get split to 32-bit
-  // in the constant pool on 32-bit target.
-  unsigned EltTySize = VecEltTy->getIntegerBitWidth();
-  if (EltTySize < 8 || EltTySize > ElSize)
-    return;
-
-  unsigned NumElements = MaskTySize / ElSize;
-  assert((NumElements == 2 || NumElements == 4 || NumElements == 8 ||
-          NumElements == 16) &&
+  unsigned NumElts = RawMask.size();
+  unsigned NumEltsPerLane = 128 / ElSize;
+  assert((NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) &&
          "Unexpected number of vector elements.");
-  ShuffleMask.reserve(NumElements);
-  unsigned NumElementsPerLane = 128 / ElSize;
-  unsigned Factor = ElSize / EltTySize;
 
-  for (unsigned i = 0; i < NumElements; ++i) {
-    Constant *COp = C->getAggregateElement(i * Factor);
-    if (!COp) {
-      ShuffleMask.clear();
-      return;
-    } else if (isa<UndefValue>(COp)) {
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
       ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
-    int Index = i & ~(NumElementsPerLane - 1);
-    uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
+
+    int Index = i & ~(NumEltsPerLane - 1);
+    uint64_t Element = RawMask[i];
     if (ElSize == 64)
       Index += (Element >> 1) & 0x1;
     else
       Index += Element & 0x3;
+
     ShuffleMask.push_back(Index);
   }
-
-  // TODO: Handle funny-looking vectors too.
 }
 
 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
                          SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
-
   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
-  if (MaskTySize != 128 && MaskTySize != 256)
-    return;
+  (void)MaskTySize;
+  assert((MaskTySize == 128 || MaskTySize == 256) && "Unexpected vector size.");
 
-  // Only support vector types.
-  if (!MaskTy->isVectorTy())
+  // The shuffle mask requires elements the same size as the target.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 8> RawMask;
+  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
 
-  // Make sure its an integer type.
-  Type *VecEltTy = MaskTy->getVectorElementType();
-  if (!VecEltTy->isIntegerTy())
-    return;
-
-  // Support any element type from byte up to element size.
-  // This is necessary primarily because 64-bit elements get split to 32-bit
-  // in the constant pool on 32-bit target.
-  unsigned EltTySize = VecEltTy->getIntegerBitWidth();
-  if (EltTySize < 8 || EltTySize > ElSize)
-    return;
-
-  unsigned NumElements = MaskTySize / ElSize;
-  assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
+  unsigned NumElts = RawMask.size();
+  unsigned NumEltsPerLane = 128 / ElSize;
+  assert((NumElts == 2 || NumElts == 4 || NumElts == 8) &&
          "Unexpected number of vector elements.");
-  ShuffleMask.reserve(NumElements);
-  unsigned NumElementsPerLane = 128 / ElSize;
-  unsigned Factor = ElSize / EltTySize;
 
-  for (unsigned i = 0; i < NumElements; ++i) {
-    Constant *COp = C->getAggregateElement(i * Factor);
-    if (!COp) {
-      ShuffleMask.clear();
-      return;
-    } else if (isa<UndefValue>(COp)) {
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
       ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
@@ -201,7 +200,7 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
     // Bits[3] - Match Bit.
     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
-    uint64_t Selector = cast<ConstantInt>(COp)->getZExtValue();
+    uint64_t Selector = RawMask[i];
     unsigned MatchBit = (Selector >> 3) & 0x1;
 
     // M2Z[0:1]     MatchBit
@@ -215,51 +214,34 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
       continue;
     }
 
-    int Index = i & ~(NumElementsPerLane - 1);
+    int Index = i & ~(NumEltsPerLane - 1);
     if (ElSize == 64)
       Index += (Selector >> 1) & 0x1;
     else
       Index += Selector & 0x3;
 
     int Src = (Selector >> 2) & 0x1;
-    Index += Src * NumElements;
+    Index += Src * NumElts;
     ShuffleMask.push_back(Index);
   }
-
-  // TODO: Handle funny-looking vectors too.
 }
 
 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
-  Type *MaskTy = C->getType();
-  assert(MaskTy->getPrimitiveSizeInBits() == 128);
-
-  // Only support vector types.
-  if (!MaskTy->isVectorTy())
-    return;
-
-  // Make sure its an integer type.
-  Type *VecEltTy = MaskTy->getVectorElementType();
-  if (!VecEltTy->isIntegerTy())
-    return;
+  assert(C->getType()->getPrimitiveSizeInBits() == 128 &&
+         "Unexpected vector size.");
 
-  // The shuffle mask requires a byte vector - decode cases with
-  // wider elements as well.
-  unsigned BitWidth = cast<IntegerType>(VecEltTy)->getBitWidth();
-  if ((BitWidth % 8) != 0)
+  // The shuffle mask requires a byte vector.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 32> RawMask;
+  if (!extractConstantMask(C, 8, UndefElts, RawMask))
     return;
 
-  int NumElts = MaskTy->getVectorNumElements();
-  int Scale = BitWidth / 8;
-  int NumBytes = NumElts * Scale;
-  ShuffleMask.reserve(NumBytes);
+  unsigned NumElts = RawMask.size();
+  assert(NumElts == 16 && "Unexpected number of vector elements.");
 
-  for (int i = 0; i != NumElts; ++i) {
-    Constant *COp = C->getAggregateElement(i);
-    if (!COp) {
-      ShuffleMask.clear();
-      return;
-    } else if (isa<UndefValue>(COp)) {
-      ShuffleMask.append(Scale, SM_SentinelUndef);
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
       continue;
     }
 
@@ -275,82 +257,77 @@ void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
     // 4 - 00h (zero - fill).
     // 5 - FFh (ones - fill).
     // 6 - Most significant bit of source byte replicated in all bit positions.
-    // 7 - Invert most significant bit of source byte and replicate in all bit positions.
-    APInt MaskElt = cast<ConstantInt>(COp)->getValue();
-    for (int j = 0; j != Scale; ++j) {
-      APInt Index = MaskElt.getLoBits(5);
-      APInt PermuteOp = MaskElt.lshr(5).getLoBits(3);
-      MaskElt = MaskElt.lshr(8);
-
-      if (PermuteOp == 4) {
-        ShuffleMask.push_back(SM_SentinelZero);
-        continue;
-      }
-      if (PermuteOp != 0) {
-        ShuffleMask.clear();
-        return;
-      }
-      ShuffleMask.push_back((int)Index.getZExtValue());
+    // 7 - Invert most significant bit of source byte and replicate in all bit
+    // positions.
+    uint64_t Element = RawMask[i];
+    uint64_t Index = Element & 0x1F;
+    uint64_t PermuteOp = (Element >> 5) & 0x7;
+
+    if (PermuteOp == 4) {
+      ShuffleMask.push_back(SM_SentinelZero);
+      continue;
+    }
+    if (PermuteOp != 0) {
+      ShuffleMask.clear();
+      return;
     }
+    ShuffleMask.push_back((int)Index);
   }
-
-  assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
 }
 
-void DecodeVPERMVMask(const Constant *C, MVT VT,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
                       SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
-  if (MaskTy->isVectorTy()) {
-    unsigned NumElements = MaskTy->getVectorNumElements();
-    if (NumElements == VT.getVectorNumElements()) {
-      unsigned EltMaskSize = Log2_64(NumElements);
-      for (unsigned i = 0; i < NumElements; ++i) {
-        Constant *COp = C->getAggregateElement(i);
-        if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
-          ShuffleMask.clear();
-          return;
-        }
-        if (isa<UndefValue>(COp))
-          ShuffleMask.push_back(SM_SentinelUndef);
-        else {
-          APInt Element = cast<ConstantInt>(COp)->getValue();
-          Element = Element.getLoBits(EltMaskSize);
-          ShuffleMask.push_back(Element.getZExtValue());
-        }
-      }
-    }
+  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+  (void)MaskTySize;
+  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+         "Unexpected vector size.");
+  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
+         "Unexpected vector element size.");
+
+  // The shuffle mask requires elements the same size as the target.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 8> RawMask;
+  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
     return;
+
+  unsigned NumElts = RawMask.size();
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
+    }
+    int Index = RawMask[i] & (NumElts - 1);
+    ShuffleMask.push_back(Index);
   }
-  // Scalar value; just broadcast it
-  if (!isa<ConstantInt>(C))
-    return;
-  uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
-  int NumElements = VT.getVectorNumElements();
-  Element &= (1 << NumElements) - 1;
-  for (int i = 0; i < NumElements; ++i)
-    ShuffleMask.push_back(Element);
 }
 
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
                        SmallVectorImpl<int> &ShuffleMask) {
   Type *MaskTy = C->getType();
-  unsigned NumElements = MaskTy->getVectorNumElements();
-  if (NumElements == VT.getVectorNumElements()) {
-    unsigned EltMaskSize = Log2_64(NumElements * 2);
-    for (unsigned i = 0; i < NumElements; ++i) {
-      Constant *COp = C->getAggregateElement(i);
-      if (!COp) {
-        ShuffleMask.clear();
-        return;
-      }
-      if (isa<UndefValue>(COp))
-        ShuffleMask.push_back(SM_SentinelUndef);
-      else {
-        APInt Element = cast<ConstantInt>(COp)->getValue();
-        Element = Element.getLoBits(EltMaskSize);
-        ShuffleMask.push_back(Element.getZExtValue());
-      }
+  unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
+  (void)MaskTySize;
+  assert((MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512) &&
+         "Unexpected vector size.");
+  assert((ElSize == 8 || ElSize == 16 || ElSize == 32 || ElSize == 64) &&
+         "Unexpected vector element size.");
+
+  // The shuffle mask requires elements the same size as the target.
+  SmallBitVector UndefElts;
+  SmallVector<uint64_t, 8> RawMask;
+  if (!extractConstantMask(C, ElSize, UndefElts, RawMask))
+    return;
+
+  unsigned NumElts = RawMask.size();
+
+  for (unsigned i = 0; i != NumElts; ++i) {
+    if (UndefElts[i]) {
+      ShuffleMask.push_back(SM_SentinelUndef);
+      continue;
     }
+    int Index = RawMask[i] & (NumElts*2 - 1);
+    ShuffleMask.push_back(Index);
   }
 }
 } // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
index d2565b849807..b703cbbd2b29 100644
--- a/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
+++ b/contrib/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.h
@@ -40,11 +40,11 @@ void DecodeVPERMIL2PMask(const Constant *C, unsigned MatchImm, unsigned ElSize,
 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERM W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMVMask(const Constant *C, MVT VT,
+void DecodeVPERMVMask(const Constant *C, unsigned ElSize,
                       SmallVectorImpl<int> &ShuffleMask);
 
 /// Decode a VPERMT2 W/D/Q/PS/PD mask from an IR-level vector constant.
-void DecodeVPERMV3Mask(const Constant *C, MVT VT,
+void DecodeVPERMV3Mask(const Constant *C, unsigned ElSize,
                        SmallVectorImpl<int> &ShuffleMask);
 
 } // llvm namespace
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
index 8f77682d2276..727ff70c3ff6 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -92,6 +92,10 @@ unsigned char X86Subtarget::classifyGlobalReference(const GlobalValue *GV,
   if (TM.getCodeModel() == CodeModel::Large)
     return X86II::MO_NO_FLAG;
 
+  // Absolute symbols can be referenced directly.
+  if (GV && GV->isAbsoluteSymbolRef())
+    return X86II::MO_NO_FLAG;
+
   if (TM.shouldAssumeDSOLocal(M, GV))
     return classifyLocalReference(GV);
 
@@ -228,6 +232,9 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
   else if (isTargetDarwin() || isTargetLinux() || isTargetSolaris() ||
            isTargetKFreeBSD() || In64BitMode)
     stackAlignment = 16;
+
+  assert((!isPMULLDSlow() || hasSSE41()) &&
+         "Feature Slow PMULLD can only be set on a subtarget with SSE4.1");
 }
 
 void X86Subtarget::initializeEnvironment() {
@@ -275,6 +282,7 @@ void X86Subtarget::initializeEnvironment() {
   HasMWAITX = false;
   HasMPX = false;
   IsBTMemSlow = false;
+  IsPMULLDSlow = false;
   IsSHLDSlow = false;
   IsUAMem16Slow = false;
   IsUAMem32Slow = false;
@@ -282,6 +290,9 @@ void X86Subtarget::initializeEnvironment() {
   HasCmpxchg16b = false;
   UseLeaForSP = false;
   HasFastPartialYMMWrite = false;
+  HasFastScalarFSQRT = false;
+  HasFastVectorFSQRT = false;
+  HasFastLZCNT = false;
   HasSlowDivide32 = false;
   HasSlowDivide64 = false;
   PadShortFunctions = false;
@@ -328,6 +339,26 @@ X86Subtarget::X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
     setPICStyle(PICStyles::GOT);
 }
 
+const CallLowering *X86Subtarget::getCallLowering() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getCallLowering();
+}
+
+const InstructionSelector *X86Subtarget::getInstructionSelector() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getInstructionSelector();
+}
+
+const LegalizerInfo *X86Subtarget::getLegalizerInfo() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getLegalizerInfo();
+}
+
+const RegisterBankInfo *X86Subtarget::getRegBankInfo() const {
+  assert(GISel && "Access to GlobalISel APIs not set");
+  return GISel->getRegBankInfo();
+}
+
 bool X86Subtarget::enableEarlyIfConversion() const {
   return hasCMov() && X86EarlyIfConv;
 }
diff --git a/contrib/llvm/lib/Target/X86/X86Subtarget.h b/contrib/llvm/lib/Target/X86/X86Subtarget.h
index a274b797ca8f..92c16214aa4a 100644
--- a/contrib/llvm/lib/Target/X86/X86Subtarget.h
+++ b/contrib/llvm/lib/Target/X86/X86Subtarget.h
@@ -19,6 +19,7 @@
 #include "X86InstrInfo.h"
 #include "X86SelectionDAGInfo.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/Target/TargetSubtargetInfo.h"
 #include <string>
@@ -177,6 +178,10 @@ protected:
   /// True if SHLD instructions are slow.
   bool IsSHLDSlow;
 
+  /// True if the PMULLD instruction is slow compared to PMULLW/PMULHW and
+  //  PMULUDQ.
+  bool IsPMULLDSlow;
+
   /// True if unaligned memory accesses of 16-bytes are slow.
   bool IsUAMem16Slow;
 
@@ -199,6 +204,14 @@ protected:
   /// of a YMM register without clearing the upper part.
   bool HasFastPartialYMMWrite;
 
+  /// True if hardware SQRTSS instruction is at least as fast (latency) as
+  /// RSQRTSS followed by a Newton-Raphson iteration.
+  bool HasFastScalarFSQRT;
+
+  /// True if hardware SQRTPS/VSQRTPS instructions are at least as fast
+  /// (throughput) as RSQRTPS/VRSQRTPS followed by a Newton-Raphson iteration.
+  bool HasFastVectorFSQRT;
+
   /// True if 8-bit divisions are significantly faster than
   /// 32-bit divisions and should be used when possible.
   bool HasSlowDivide32;
@@ -207,6 +220,9 @@ protected:
   /// 64-bit divisions and should be used when possible.
   bool HasSlowDivide64;
 
+  /// True if LZCNT instruction is fast.
+  bool HasFastLZCNT;
+
   /// True if the short functions should be padded to prevent
   /// a stall when returning too early.
   bool PadShortFunctions;
@@ -287,6 +303,10 @@ protected:
   /// Instruction itineraries for scheduling
   InstrItineraryData InstrItins;
 
+  /// Gather the accessor points to GlobalISel-related APIs.
+  /// This is used to avoid ifndefs spreading around while GISel is
+  /// an optional library.
+  std::unique_ptr<GISelAccessor> GISel;
 private:
 
   /// Override the stack alignment.
@@ -315,6 +335,9 @@ public:
   X86Subtarget(const Triple &TT, StringRef CPU, StringRef FS,
                const X86TargetMachine &TM, unsigned StackAlignOverride);
 
+  /// This object will take onwership of \p GISelAccessor.
+  void setGISelAccessor(GISelAccessor &GISel) { this->GISel.reset(&GISel); }
+
   const X86TargetLowering *getTargetLowering() const override {
     return &TLInfo;
   }
@@ -342,6 +365,11 @@ public:
   /// subtarget options.  Definition of function is auto generated by tblgen.
   void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
 
+  /// Methods used by Global ISel
+  const CallLowering *getCallLowering() const override;
+  const InstructionSelector *getInstructionSelector() const override;
+  const LegalizerInfo *getLegalizerInfo() const override;
+  const RegisterBankInfo *getRegBankInfo() const override;
 private:
   /// Initialize the full set of dependencies so we can use an initializer
   /// list for X86Subtarget.
@@ -428,12 +456,16 @@ public:
   bool hasMWAITX() const { return HasMWAITX; }
   bool isBTMemSlow() const { return IsBTMemSlow; }
   bool isSHLDSlow() const { return IsSHLDSlow; }
+  bool isPMULLDSlow() const { return IsPMULLDSlow; }
   bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
   bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
   bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
   bool hasCmpxchg16b() const { return HasCmpxchg16b; }
   bool useLeaForSP() const { return UseLeaForSP; }
   bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; }
+  bool hasFastScalarFSQRT() const { return HasFastScalarFSQRT; }
+  bool hasFastVectorFSQRT() const { return HasFastVectorFSQRT; }
+  bool hasFastLZCNT() const { return HasFastLZCNT; }
   bool hasSlowDivide32() const { return HasSlowDivide32; }
   bool hasSlowDivide64() const { return HasSlowDivide64; }
   bool padShortFunctions() const { return PadShortFunctions; }
@@ -450,6 +482,8 @@ public:
   bool hasPKU() const { return HasPKU; }
   bool hasMPX() const { return HasMPX; }
 
+  virtual bool isXRaySupported() const override { return is64Bit(); }
+
   bool isAtom() const { return X86ProcFamily == IntelAtom; }
   bool isSLM() const { return X86ProcFamily == IntelSLM; }
   bool useSoftFloat() const { return UseSoftFloat; }
@@ -465,7 +499,7 @@ public:
   bool isTargetFreeBSD() const { return TargetTriple.isOSFreeBSD(); }
   bool isTargetDragonFly() const { return TargetTriple.isOSDragonFly(); }
   bool isTargetSolaris() const { return TargetTriple.isOSSolaris(); }
-  bool isTargetPS4() const { return TargetTriple.isPS4(); }
+  bool isTargetPS4() const { return TargetTriple.isPS4CPU(); }
 
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
   bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
index 50c9c25a27c0..aa5cfc64e9eb 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -13,8 +13,12 @@
 
 #include "X86TargetMachine.h"
 #include "X86.h"
+#include "X86CallLowering.h"
 #include "X86TargetObjectFile.h"
 #include "X86TargetTransformInfo.h"
+#include "llvm/CodeGen/GlobalISel/GISelAccessor.h"
+#include "llvm/CodeGen/GlobalISel/IRTranslator.h"
+#include "llvm/CodeGen/MachineScheduler.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetPassConfig.h"
 #include "llvm/IR/Function.h"
@@ -35,12 +39,14 @@ void initializeWinEHStatePassPass(PassRegistry &);
 
 extern "C" void LLVMInitializeX86Target() {
   // Register the target.
-  RegisterTargetMachine<X86TargetMachine> X(TheX86_32Target);
-  RegisterTargetMachine<X86TargetMachine> Y(TheX86_64Target);
+  RegisterTargetMachine<X86TargetMachine> X(getTheX86_32Target());
+  RegisterTargetMachine<X86TargetMachine> Y(getTheX86_64Target());
 
   PassRegistry &PR = *PassRegistry::getPassRegistry();
+  initializeGlobalISel(PR);
   initializeWinEHStatePassPass(PR);
   initializeFixupBWInstPassPass(PR);
+  initializeEvexToVexInstPassPass(PR);
 }
 
 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
@@ -50,8 +56,12 @@ static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
     return make_unique<TargetLoweringObjectFileMachO>();
   }
 
+  if (TT.isOSFreeBSD())
+    return make_unique<X86FreeBSDTargetObjectFile>();
   if (TT.isOSLinux() || TT.isOSNaCl())
     return make_unique<X86LinuxNaClTargetObjectFile>();
+  if (TT.isOSFuchsia())
+    return make_unique<X86FuchsiaTargetObjectFile>();
   if (TT.isOSBinFormatELF())
     return make_unique<X86ELFTargetObjectFile>();
   if (TT.isKnownWindowsMSVCEnvironment() || TT.isWindowsCoreCLREnvironment())
@@ -151,32 +161,47 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT,
                                    CodeModel::Model CM, CodeGenOpt::Level OL)
     : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options,
                         getEffectiveRelocModel(TT, RM), CM, OL),
-      TLOF(createTLOF(getTargetTriple())),
-      Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) {
+      TLOF(createTLOF(getTargetTriple())) {
   // Windows stack unwinder gets confused when execution flow "falls through"
   // after a call to 'noreturn' function.
   // To prevent that, we emit a trap for 'unreachable' IR instructions.
   // (which on X86, happens to be the 'ud2' instruction)
   // On PS4, the "return address" of a 'noreturn' call must still be within
   // the calling function, and TrapUnreachable is an easy way to get that.
-  if (Subtarget.isTargetWin64() || Subtarget.isTargetPS4())
+  // The check here for 64-bit windows is a bit icky, but as we're unlikely
+  // to ever want to mix 32 and 64-bit windows code in a single module
+  // this should be fine.
+  if ((TT.isOSWindows() && TT.getArch() == Triple::x86_64) || TT.isPS4())
     this->Options.TrapUnreachable = true;
 
-  // By default (and when -ffast-math is on), enable estimate codegen for
-  // everything except scalar division. By default, use 1 refinement step for
-  // all operations. Defaults may be overridden by using command-line options.
-  // Scalar division estimates are disabled because they break too much
-  // real-world code. These defaults match GCC behavior.
-  this->Options.Reciprocals.setDefaults("sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("divf", false, 1);
-  this->Options.Reciprocals.setDefaults("vec-sqrtf", true, 1);
-  this->Options.Reciprocals.setDefaults("vec-divf", true, 1);
-
   initAsmInfo();
 }
 
 X86TargetMachine::~X86TargetMachine() {}
 
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+namespace {
+struct X86GISelActualAccessor : public GISelAccessor {
+  std::unique_ptr<CallLowering> CL;
+  X86GISelActualAccessor(CallLowering* CL): CL(CL) {}
+  const CallLowering *getCallLowering() const override {
+    return CL.get();
+  }
+  const InstructionSelector *getInstructionSelector() const override {
+    //TODO: Implement
+    return nullptr;
+  }
+  const LegalizerInfo *getLegalizerInfo() const override {
+    //TODO: Implement
+    return nullptr;
+  }
+  const RegisterBankInfo *getRegBankInfo() const override {
+    //TODO: Implement
+    return nullptr;
+  }
+};
+} // End anonymous namespace.
+#endif
 const X86Subtarget *
 X86TargetMachine::getSubtargetImpl(const Function &F) const {
   Attribute CPUAttr = F.getFnAttribute("target-cpu");
@@ -216,6 +241,13 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
     resetTargetOptions(F);
     I = llvm::make_unique<X86Subtarget>(TargetTriple, CPU, FS, *this,
                                         Options.StackAlignmentOverride);
+#ifndef LLVM_BUILD_GLOBAL_ISEL
+    GISelAccessor *GISel = new GISelAccessor();
+#else
+    X86GISelActualAccessor *GISel = new X86GISelActualAccessor(
+        new X86CallLowering(*I->getTargetLowering()));
+#endif
+    I->setGISelAccessor(*GISel);
   }
   return I.get();
 }
@@ -254,9 +286,22 @@ public:
     return getTM<X86TargetMachine>();
   }
 
+  ScheduleDAGInstrs *
+  createMachineScheduler(MachineSchedContext *C) const override {
+    ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+    DAG->addMutation(createMacroFusionDAGMutation(DAG->TII));
+    return DAG;
+  }
+
   void addIRPasses() override;
   bool addInstSelector() override;
-  bool addILPOpts() override;
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+  bool addIRTranslator() override;
+  bool addLegalizeMachineIR() override;
+  bool addRegBankSelect() override;
+  bool addGlobalInstructionSelect() override;
+#endif
+bool addILPOpts() override;
   bool addPreISel() override;
   void addPreRegAlloc() override;
   void addPostRegAlloc() override;
@@ -273,6 +318,9 @@ void X86PassConfig::addIRPasses() {
   addPass(createAtomicExpandPass(&getX86TargetMachine()));
 
   TargetPassConfig::addIRPasses();
+
+  if (TM->getOptLevel() != CodeGenOpt::None)
+    addPass(createInterleavedAccessPass(TM));
 }
 
 bool X86PassConfig::addInstSelector() {
@@ -288,6 +336,28 @@ bool X86PassConfig::addInstSelector() {
   return false;
 }
 
+#ifdef LLVM_BUILD_GLOBAL_ISEL
+bool X86PassConfig::addIRTranslator() {
+  addPass(new IRTranslator());
+  return false;
+}
+
+bool X86PassConfig::addLegalizeMachineIR() {
+  //TODO: Implement
+  return false;
+}
+
+bool X86PassConfig::addRegBankSelect() {
+  //TODO: Implement
+  return false;
+}
+
+bool X86PassConfig::addGlobalInstructionSelect() {
+  //TODO: Implement
+  return false;
+}
+#endif
+
 bool X86PassConfig::addILPOpts() {
   addPass(&EarlyIfConverterID);
   if (EnableMachineCombinerPass)
@@ -321,7 +391,7 @@ void X86PassConfig::addPreSched2() { addPass(createX86ExpandPseudoPass()); }
 
 void X86PassConfig::addPreEmitPass() {
   if (getOptLevel() != CodeGenOpt::None)
-    addPass(createExecutionDependencyFixPass(&X86::VR128RegClass));
+    addPass(createExecutionDependencyFixPass(&X86::VR128XRegClass));
 
   if (UseVZeroUpper)
     addPass(createX86IssueVZeroUpperPass());
@@ -330,5 +400,6 @@ void X86PassConfig::addPreEmitPass() {
     addPass(createX86FixupBWInsts());
     addPass(createX86PadShortFunctions());
     addPass(createX86FixupLEAs());
+    addPass(createX86EvexToVexInsts());
   }
 }
diff --git a/contrib/llvm/lib/Target/X86/X86TargetMachine.h b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
index 4734a44315a9..d756d07926dd 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetMachine.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetMachine.h
@@ -24,8 +24,6 @@ class StringRef;
 
 class X86TargetMachine final : public LLVMTargetMachine {
   std::unique_ptr<TargetLoweringObjectFile> TLOF;
-  X86Subtarget Subtarget;
-
   mutable StringMap<std::unique_ptr<X86Subtarget>> SubtargetMap;
 
 public:
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
index d664cff5f2c1..7f70829cb6c6 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.cpp
@@ -24,14 +24,13 @@ using namespace llvm;
 using namespace dwarf;
 
 const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
-    const GlobalValue *GV, unsigned Encoding, Mangler &Mang,
-    const TargetMachine &TM, MachineModuleInfo *MMI,
-    MCStreamer &Streamer) const {
+    const GlobalValue *GV, unsigned Encoding, const TargetMachine &TM,
+    MachineModuleInfo *MMI, MCStreamer &Streamer) const {
 
   // On Darwin/X86-64, we can reference dwarf symbols with foo@GOTPCREL+4, which
   // is an indirect pc-relative reference.
   if ((Encoding & DW_EH_PE_indirect) && (Encoding & DW_EH_PE_pcrel)) {
-    const MCSymbol *Sym = TM.getSymbol(GV, Mang);
+    const MCSymbol *Sym = TM.getSymbol(GV);
     const MCExpr *Res =
       MCSymbolRefExpr::create(Sym, MCSymbolRefExpr::VK_GOTPCREL, getContext());
     const MCExpr *Four = MCConstantExpr::create(4, getContext());
@@ -39,13 +38,13 @@ const MCExpr *X86_64MachoTargetObjectFile::getTTypeGlobalReference(
   }
 
   return TargetLoweringObjectFileMachO::getTTypeGlobalReference(
-      GV, Encoding, Mang, TM, MMI, Streamer);
+      GV, Encoding, TM, MMI, Streamer);
 }
 
 MCSymbol *X86_64MachoTargetObjectFile::getCFIPersonalitySymbol(
-    const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM,
+    const GlobalValue *GV, const TargetMachine &TM,
     MachineModuleInfo *MMI) const {
-  return TM.getSymbol(GV, Mang);
+  return TM.getSymbol(GV);
 }
 
 const MCExpr *X86_64MachoTargetObjectFile::getIndirectSymViaGOTPCRel(
@@ -67,6 +66,20 @@ const MCExpr *X86ELFTargetObjectFile::getDebugThreadLocalSymbol(
 }
 
 void
+X86FreeBSDTargetObjectFile::Initialize(MCContext &Ctx,
+                                       const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
+
+void
+X86FuchsiaTargetObjectFile::Initialize(MCContext &Ctx,
+                                       const TargetMachine &TM) {
+  TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+  InitializeELF(TM.Options.UseInitArray);
+}
+
+void
 X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
                                          const TargetMachine &TM) {
   TargetLoweringObjectFileELF::Initialize(Ctx, TM);
@@ -74,7 +87,7 @@ X86LinuxNaClTargetObjectFile::Initialize(MCContext &Ctx,
 }
 
 const MCExpr *X86WindowsTargetObjectFile::lowerRelativeReference(
-    const GlobalValue *LHS, const GlobalValue *RHS, Mangler &Mang,
+    const GlobalValue *LHS, const GlobalValue *RHS,
     const TargetMachine &TM) const {
   // Our symbols should exist in address space zero, cowardly no-op if
   // otherwise.
@@ -95,8 +108,9 @@ const MCExpr *X86WindowsTargetObjectFile::lowerRelativeReference(
       cast<GlobalVariable>(RHS)->hasInitializer() || RHS->hasSection())
     return nullptr;
 
-  return MCSymbolRefExpr::create(
-      TM.getSymbol(LHS, Mang), MCSymbolRefExpr::VK_COFF_IMGREL32, getContext());
+  return MCSymbolRefExpr::create(TM.getSymbol(LHS),
+                                 MCSymbolRefExpr::VK_COFF_IMGREL32,
+                                 getContext());
 }
 
 static std::string APIntToHexString(const APInt &AI) {
diff --git a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
index 2e703f1494fa..39d2e84e5ed7 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetObjectFile.h
@@ -19,15 +19,15 @@ namespace llvm {
   /// x86-64.
   class X86_64MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
   public:
-    const MCExpr *
-    getTTypeGlobalReference(const GlobalValue *GV, unsigned Encoding,
-                            Mangler &Mang, const TargetMachine &TM,
-                            MachineModuleInfo *MMI,
-                            MCStreamer &Streamer) const override;
+    const MCExpr *getTTypeGlobalReference(const GlobalValue *GV,
+                                          unsigned Encoding,
+                                          const TargetMachine &TM,
+                                          MachineModuleInfo *MMI,
+                                          MCStreamer &Streamer) const override;
 
     // getCFIPersonalitySymbol - The symbol that gets passed to
     // .cfi_personality.
-    MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV, Mangler &Mang,
+    MCSymbol *getCFIPersonalitySymbol(const GlobalValue *GV,
                                       const TargetMachine &TM,
                                       MachineModuleInfo *MMI) const override;
 
@@ -49,6 +49,17 @@ namespace llvm {
     const MCExpr *getDebugThreadLocalSymbol(const MCSymbol *Sym) const override;
   };
 
+  /// X86FreeBSDTargetObjectFile - This implementation is used for FreeBSD
+  /// on x86 and x86-64.
+  class X86FreeBSDTargetObjectFile : public X86ELFTargetObjectFile {
+    void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+  };
+
+  /// \brief This implementation is used for Fuchsia on x86-64.
+  class X86FuchsiaTargetObjectFile : public X86ELFTargetObjectFile {
+    void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
+  };
+
   /// X86LinuxNaClTargetObjectFile - This implementation is used for linux and
   /// Native Client on x86 and x86-64.
   class X86LinuxNaClTargetObjectFile : public X86ELFTargetObjectFile {
@@ -59,7 +70,6 @@ namespace llvm {
   class X86WindowsTargetObjectFile : public TargetLoweringObjectFileCOFF {
     const MCExpr *
     lowerRelativeReference(const GlobalValue *LHS, const GlobalValue *RHS,
-                           Mangler &Mang,
                            const TargetMachine &TM) const override;
 
     /// \brief Given a mergeable constant with the specified size and relocation
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index f44a8c662028..2b0e672d56f2 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -13,6 +13,31 @@
 /// independent and default TTI implementations handle the rest.
 ///
 //===----------------------------------------------------------------------===//
+/// About Cost Model numbers used below it's necessary to say the following:
+/// the numbers correspond to some "generic" X86 CPU instead of usage of
+/// concrete CPU model. Usually the numbers correspond to CPU where the feature
+/// apeared at the first time. For example, if we do Subtarget.hasSSE42() in
+/// the lookups below the cost is based on Nehalem as that was the first CPU
+/// to support that feature level and thus has most likely the worst case cost.
+/// Some examples of other technologies/CPUs:
+///   SSE 3   - Pentium4 / Athlon64
+///   SSE 4.1 - Penryn
+///   SSE 4.2 - Nehalem
+///   AVX     - Sandy Bridge
+///   AVX2    - Haswell
+///   AVX-512 - Xeon Phi / Skylake
+/// And some examples of instruction target dependent costs (latency)
+///                   divss     sqrtss          rsqrtss
+///   AMD K7            11-16     19              3
+///   Piledriver        9-24      13-15           5
+///   Jaguar            14        16              2
+///   Pentium II,III    18        30              2
+///   Nehalem           7-14      7-18            3
+///   Haswell           10-13     11              5
+/// TODO: Develop and implement  the target dependent cost model and
+/// specialize cost numbers for different Cost Model Targets such as throughput,
+/// code size, latency and uop count.
+//===----------------------------------------------------------------------===//
 
 #include "X86TargetTransformInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -115,6 +140,30 @@ int X86TTIImpl::getArithmeticInstrCost(
     return Cost;
   }
 
+  static const CostTblEntry AVX512BWUniformConstCostTable[] = {
+    { ISD::SDIV, MVT::v32i16,  6 }, // vpmulhw sequence
+    { ISD::UDIV, MVT::v32i16,  6 }, // vpmulhuw sequence
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasBWI()) {
+    if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX512UniformConstCostTable[] = {
+    { ISD::SDIV, MVT::v16i32, 15 }, // vpmuldq sequence
+    { ISD::UDIV, MVT::v16i32, 15 }, // vpmuludq sequence
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasAVX512()) {
+    if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
   static const CostTblEntry AVX2UniformConstCostTable[] = {
     { ISD::SRA,  MVT::v4i64,   4 }, // 2 x psrad + shuffle.
 
@@ -131,13 +180,76 @@ int X86TTIImpl::getArithmeticInstrCost(
       return LT.first * Entry->Cost;
   }
 
+  static const CostTblEntry SSE2UniformConstCostTable[] = {
+    { ISD::SDIV, MVT::v16i16, 12 }, // pmulhw sequence
+    { ISD::SDIV, MVT::v8i16,   6 }, // pmulhw sequence
+    { ISD::UDIV, MVT::v16i16, 12 }, // pmulhuw sequence
+    { ISD::UDIV, MVT::v8i16,   6 }, // pmulhuw sequence
+    { ISD::SDIV, MVT::v8i32,  38 }, // pmuludq sequence
+    { ISD::SDIV, MVT::v4i32,  19 }, // pmuludq sequence
+    { ISD::UDIV, MVT::v8i32,  30 }, // pmuludq sequence
+    { ISD::UDIV, MVT::v4i32,  15 }, // pmuludq sequence
+  };
+
+  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
+      ST->hasSSE2()) {
+    // pmuldq sequence.
+    if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX())
+      return LT.first * 30;
+    if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
+      return LT.first * 15;
+
+    if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX512DQCostTable[] = {
+    { ISD::MUL,  MVT::v2i64, 1 },
+    { ISD::MUL,  MVT::v4i64, 1 },
+    { ISD::MUL,  MVT::v8i64, 1 }
+  };
+
+  // Look for AVX512DQ lowering tricks for custom cases.
+  if (ST->hasDQI()) {
+    if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVX512BWCostTable[] = {
+    { ISD::MUL,   MVT::v64i8,     11 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,   MVT::v32i8,      4 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,   MVT::v16i8,      4 }, // extend/pmullw/trunc sequence.
+
+    // Vectorizing division is a bad idea. See the SSE2 table for more comments.
+    { ISD::SDIV,  MVT::v64i8,  64*20 },
+    { ISD::SDIV,  MVT::v32i16, 32*20 },
+    { ISD::SDIV,  MVT::v16i32, 16*20 },
+    { ISD::SDIV,  MVT::v8i64,   8*20 },
+    { ISD::UDIV,  MVT::v64i8,  64*20 },
+    { ISD::UDIV,  MVT::v32i16, 32*20 },
+    { ISD::UDIV,  MVT::v16i32, 16*20 },
+    { ISD::UDIV,  MVT::v8i64,   8*20 },
+  };
+
+  // Look for AVX512BW lowering tricks for custom cases.
+  if (ST->hasBWI()) {
+    if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
   static const CostTblEntry AVX512CostTable[] = {
     { ISD::SHL,     MVT::v16i32,    1 },
     { ISD::SRL,     MVT::v16i32,    1 },
     { ISD::SRA,     MVT::v16i32,    1 },
-    { ISD::SHL,     MVT::v8i64,    1 },
-    { ISD::SRL,     MVT::v8i64,    1 },
-    { ISD::SRA,     MVT::v8i64,    1 },
+    { ISD::SHL,     MVT::v8i64,     1 },
+    { ISD::SRL,     MVT::v8i64,     1 },
+    { ISD::SRA,     MVT::v8i64,     1 },
+
+    { ISD::MUL,     MVT::v32i8,    13 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,     MVT::v16i8,     5 }, // extend/pmullw/trunc sequence.
   };
 
   if (ST->hasAVX512()) {
@@ -220,6 +332,34 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v2i64,       4 }, // srl/xor/sub sequence.
     { ISD::SRA,  MVT::v4i64,       4 }, // srl/xor/sub sequence.
 
+    { ISD::MUL,   MVT::v32i8,     17 }, // extend/pmullw/trunc sequence.
+    { ISD::MUL,   MVT::v16i8,      7 }, // extend/pmullw/trunc sequence.
+
+    { ISD::FDIV,  MVT::f32,        7 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV,  MVT::v4f32,      7 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV,  MVT::v8f32,     14 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV,  MVT::f64,       14 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV,  MVT::v2f64,     14 }, // Haswell from http://www.agner.org/
+    { ISD::FDIV,  MVT::v4f64,     28 }, // Haswell from http://www.agner.org/
+  };
+
+  // Look for AVX2 lowering tricks for custom cases.
+  if (ST->hasAVX2()) {
+    if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry AVXCustomCostTable[] = {
+    { ISD::MUL,   MVT::v32i8,  26 }, // extend/pmullw/trunc sequence.
+
+    { ISD::FDIV,  MVT::f32,    14 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,  MVT::v4f32,  14 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,  MVT::v8f32,  28 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,  MVT::f64,    22 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,  MVT::v2f64,  22 }, // SNB from http://www.agner.org/
+    { ISD::FDIV,  MVT::v4f64,  44 }, // SNB from http://www.agner.org/
+
     // Vectorizing division is a bad idea. See the SSE2 table for more comments.
     { ISD::SDIV,  MVT::v32i8,  32*20 },
     { ISD::SDIV,  MVT::v16i16, 16*20 },
@@ -232,17 +372,28 @@ int X86TTIImpl::getArithmeticInstrCost(
   };
 
   // Look for AVX2 lowering tricks for custom cases.
-  if (ST->hasAVX2()) {
-    if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD,
+  if (ST->hasAVX()) {
+    if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
+  }
+
+  static const CostTblEntry SSE42FloatCostTable[] = {
+    { ISD::FDIV,  MVT::f32,   14 }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::f64,   22 }, // Nehalem from http://www.agner.org/
+    { ISD::FDIV,  MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/
+  };
+
+  if (ST->hasSSE42()) {
+    if (const auto *Entry = CostTableLookup(SSE42FloatCostTable, ISD,
                                             LT.second))
       return LT.first * Entry->Cost;
   }
 
   static const CostTblEntry
-  SSE2UniformConstCostTable[] = {
-    // We don't correctly identify costs of casts because they are marked as
-    // custom.
-    // Constant splats are cheaper for the following instructions.
+  SSE2UniformCostTable[] = {
+    // Uniform splats are cheaper for the following instructions.
     { ISD::SHL,  MVT::v16i8,  1 }, // psllw.
     { ISD::SHL,  MVT::v32i8,  2 }, // psllw.
     { ISD::SHL,  MVT::v8i16,  1 }, // psllw.
@@ -269,21 +420,13 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v8i32,  2 }, // psrad.
     { ISD::SRA,  MVT::v2i64,  4 }, // 2 x psrad + shuffle.
     { ISD::SRA,  MVT::v4i64,  8 }, // 2 x psrad + shuffle.
-
-    { ISD::SDIV, MVT::v8i16,  6 }, // pmulhw sequence
-    { ISD::UDIV, MVT::v8i16,  6 }, // pmulhuw sequence
-    { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
-    { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
   };
 
-  if (Op2Info == TargetTransformInfo::OK_UniformConstantValue &&
-      ST->hasSSE2()) {
-    // pmuldq sequence.
-    if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
-      return LT.first * 15;
-
-    if (const auto *Entry = CostTableLookup(SSE2UniformConstCostTable, ISD,
-                                            LT.second))
+  if (ST->hasSSE2() &&
+      ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) ||
+       (Op2Info == TargetTransformInfo::OK_UniformValue))) {
+    if (const auto *Entry =
+            CostTableLookup(SSE2UniformCostTable, ISD, LT.second))
       return LT.first * Entry->Cost;
   }
 
@@ -309,15 +452,35 @@ int X86TTIImpl::getArithmeticInstrCost(
       ISD = ISD::MUL;
   }
 
+  static const CostTblEntry SSE41CostTable[] = {
+    { ISD::SHL,  MVT::v16i8,    11 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v32i8,  2*11 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v8i16,    14 }, // pblendvb sequence.
+    { ISD::SHL,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+
+    { ISD::SRL,  MVT::v16i8,    12 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v32i8,  2*12 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v8i16,    14 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+    { ISD::SRL,  MVT::v4i32,    11 }, // Shift each lane + blend.
+    { ISD::SRL,  MVT::v8i32,  2*11 }, // Shift each lane + blend.
+
+    { ISD::SRA,  MVT::v16i8,    24 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v32i8,  2*24 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v8i16,    14 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v16i16, 2*14 }, // pblendvb sequence.
+    { ISD::SRA,  MVT::v4i32,    12 }, // Shift each lane + blend.
+    { ISD::SRA,  MVT::v8i32,  2*12 }, // Shift each lane + blend.
+  };
+
+  if (ST->hasSSE41()) {
+    if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second))
+      return LT.first * Entry->Cost;
+  }
+
   static const CostTblEntry SSE2CostTable[] = {
     // We don't correctly identify costs of casts because they are marked as
     // custom.
-    // For some cases, where the shift amount is a scalar we would be able
-    // to generate better code. Unfortunately, when this is the case the value
-    // (the splat) will get hoisted out of the loop, thereby making it invisible
-    // to ISel. The cost model must return worst case assumptions because it is
-    // used for vectorization and we don't want to make vectorized code worse
-    // than scalar code.
     { ISD::SHL,  MVT::v16i8,    26 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v32i8,  2*26 }, // cmpgtb sequence.
     { ISD::SHL,  MVT::v8i16,    32 }, // cmpgtb sequence.
@@ -345,6 +508,13 @@ int X86TTIImpl::getArithmeticInstrCost(
     { ISD::SRA,  MVT::v2i64,    12 }, // srl/xor/sub sequence.
     { ISD::SRA,  MVT::v4i64,  2*12 }, // srl/xor/sub sequence.
 
+    { ISD::MUL,  MVT::v16i8,    12 }, // extend/pmullw/trunc sequence.
+
+    { ISD::FDIV, MVT::f32,      23 }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::v4f32,    39 }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::f64,      38 }, // Pentium IV from http://www.agner.org/
+    { ISD::FDIV, MVT::v2f64,    69 }, // Pentium IV from http://www.agner.org/
+
     // It is not a good idea to vectorize division. We have to scalarize it and
     // in the process we will often end up having to spilling regular
     // registers. The overhead of division is going to dominate most kernels
@@ -372,16 +542,20 @@ int X86TTIImpl::getArithmeticInstrCost(
     // Two ops + 1 extract + 1 insert = 4.
     { ISD::MUL,     MVT::v16i16,   4 },
     { ISD::MUL,     MVT::v8i32,    4 },
+    { ISD::SUB,     MVT::v32i8,    4 },
+    { ISD::ADD,     MVT::v32i8,    4 },
+    { ISD::SUB,     MVT::v16i16,   4 },
+    { ISD::ADD,     MVT::v16i16,   4 },
     { ISD::SUB,     MVT::v8i32,    4 },
     { ISD::ADD,     MVT::v8i32,    4 },
     { ISD::SUB,     MVT::v4i64,    4 },
     { ISD::ADD,     MVT::v4i64,    4 },
     // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
-    // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
+    // are lowered as a series of long multiplies(3), shifts(3) and adds(2)
     // Because we believe v4i64 to be a legal type, we must also include the
-    // split factor of two in the cost table. Therefore, the cost here is 18
-    // instead of 9.
-    { ISD::MUL,     MVT::v4i64,    18 },
+    // split factor of two in the cost table. Therefore, the cost here is 16
+    // instead of 8.
+    { ISD::MUL,     MVT::v4i64,    16 },
   };
 
   // Look for AVX1 lowering tricks.
@@ -395,9 +569,10 @@ int X86TTIImpl::getArithmeticInstrCost(
   // Custom lowering of vectors.
   static const CostTblEntry CustomLowered[] = {
     // A v2i64/v4i64 and multiply is custom lowered as a series of long
-    // multiplies(3), shifts(4) and adds(2).
-    { ISD::MUL,     MVT::v2i64,    9 },
-    { ISD::MUL,     MVT::v4i64,    9 },
+    // multiplies(3), shifts(3) and adds(2).
+    { ISD::MUL,     MVT::v2i64,    8 },
+    { ISD::MUL,     MVT::v4i64,    8 },
+    { ISD::MUL,     MVT::v8i64,    8 }
   };
   if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second))
     return LT.first * Entry->Cost;
@@ -408,27 +583,123 @@ int X86TTIImpl::getArithmeticInstrCost(
       !ST->hasSSE41())
     return LT.first * 6;
 
+  static const CostTblEntry SSE1FloatCostTable[] = {
+    { ISD::FDIV, MVT::f32,   17 }, // Pentium III from http://www.agner.org/
+    { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/
+  };
+
+  if (ST->hasSSE1())
+    if (const auto *Entry = CostTableLookup(SSE1FloatCostTable, ISD,
+                                            LT.second))
+      return LT.first * Entry->Cost;
   // Fallback to the default implementation.
   return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
 }
 
 int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
                                Type *SubTp) {
-  // We only estimate the cost of reverse and alternate shuffles.
-  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
-    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
 
   if (Kind == TTI::SK_Reverse) {
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
-    int Cost = 1;
-    if (LT.second.getSizeInBits() > 128)
-      Cost = 3; // Extract + insert + copy.
 
-    // Multiple by the number of parts.
-    return Cost * LT.first;
-  }
+    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  1 }, // vpermb
+      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  1 }  // vpermb
+    };
+
+    if (ST->hasVBMI())
+      if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
+                                              ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX512BWShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v32i16, 1 }, // vpermw
+      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 1 }, // vpermw
+      { ISD::VECTOR_SHUFFLE, MVT::v64i8,  6 }  // vextracti64x4 + 2*vperm2i128
+                                               // + 2*pshufb + vinserti64x4
+    };
+
+    if (ST->hasBWI())
+      if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
+                                              ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX512ShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v8f64,  1 }, // vpermpd
+      { ISD::VECTOR_SHUFFLE, MVT::v16f32, 1 }, // vpermps
+      { ISD::VECTOR_SHUFFLE, MVT::v8i64,  1 }, // vpermq
+      { ISD::VECTOR_SHUFFLE, MVT::v16i32, 1 }, // vpermd
+    };
+
+    if (ST->hasAVX512())
+      if (const auto *Entry =
+              CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX2ShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  1 }, // vpermpd
+      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  1 }, // vpermps
+      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  1 }, // vpermq
+      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  1 }, // vpermd
+      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 2 }, // vperm2i128 + pshufb
+      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  2 }  // vperm2i128 + pshufb
+    };
+
+    if (ST->hasAVX2())
+      if (const auto *Entry =
+              CostTableLookup(AVX2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry AVX1ShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v4f64,  2 }, // vperm2f128 + vpermilpd
+      { ISD::VECTOR_SHUFFLE, MVT::v8f32,  2 }, // vperm2f128 + vpermilps
+      { ISD::VECTOR_SHUFFLE, MVT::v4i64,  2 }, // vperm2f128 + vpermilpd
+      { ISD::VECTOR_SHUFFLE, MVT::v8i32,  2 }, // vperm2f128 + vpermilps
+      { ISD::VECTOR_SHUFFLE, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb
+                                               // + vinsertf128
+      { ISD::VECTOR_SHUFFLE, MVT::v32i8,  4 }  // vextractf128 + 2*pshufb
+                                               // + vinsertf128
+    };
+
+    if (ST->hasAVX())
+      if (const auto *Entry =
+              CostTableLookup(AVX1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry SSSE3ShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 1 }, // pshufb
+      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 1 }  // pshufb
+    };
+
+    if (ST->hasSSSE3())
+      if (const auto *Entry =
+              CostTableLookup(SSSE3ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
 
-  if (Kind == TTI::SK_Alternate) {
+    static const CostTblEntry SSE2ShuffleTbl[] = {
+      { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, // shufpd
+      { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, // pshufd
+      { ISD::VECTOR_SHUFFLE, MVT::v4i32, 1 }, // pshufd
+      { ISD::VECTOR_SHUFFLE, MVT::v8i16, 3 }, // pshuflw + pshufhw  + pshufd
+      { ISD::VECTOR_SHUFFLE, MVT::v16i8, 9 }  // 2*pshuflw + 2*pshufhw
+                                              // + 2*pshufd + 2*unpck + packus
+    };
+
+    if (ST->hasSSE2())
+      if (const auto *Entry =
+              CostTableLookup(SSE2ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+    static const CostTblEntry SSE1ShuffleTbl[] = {
+        { ISD::VECTOR_SHUFFLE, MVT::v4f32, 1 }, // shufps
+    };
+
+    if (ST->hasSSE1())
+      if (const auto *Entry =
+              CostTableLookup(SSE1ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return LT.first * Entry->Cost;
+
+  } else if (Kind == TTI::SK_Alternate) {
     // 64-bit packed float vectors (v2f32) are widened to type v4f32.
     // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
@@ -518,7 +789,132 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     if (const auto *Entry = CostTableLookup(SSEAltShuffleTbl,
                                             ISD::VECTOR_SHUFFLE, LT.second))
       return LT.first * Entry->Cost;
-    return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
+
+  } else if (Kind == TTI::SK_PermuteTwoSrc) {
+    // We assume that source and destination have the same vector type.
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    int NumOfDests = LT.first;
+    int NumOfShufflesPerDest = LT.first * 2 - 1;
+    int NumOfShuffles = NumOfDests * NumOfShufflesPerDest;
+
+    static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+        {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermt2b
+        {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}, // vpermt2b
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}  // vpermt2b
+    };
+
+    if (ST->hasVBMI())
+      if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
+                                              ISD::VECTOR_SHUFFLE, LT.second))
+        return NumOfShuffles * Entry->Cost;
+
+    static const CostTblEntry AVX512BWShuffleTbl[] = {
+        {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermt2w
+        {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermt2w
+        {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},  // vpermt2w
+        {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3},  // zext + vpermt2w + trunc
+        {ISD::VECTOR_SHUFFLE, MVT::v64i8, 19}, // 6 * v32i8 + 1
+        {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3}   // zext + vpermt2w + trunc
+    };
+
+    if (ST->hasBWI())
+      if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
+                                              ISD::VECTOR_SHUFFLE, LT.second))
+        return NumOfShuffles * Entry->Cost;
+
+    static const CostTblEntry AVX512ShuffleTbl[] = {
+        {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1},  // vpermt2pd
+        {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermt2ps
+        {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1},  // vpermt2q
+        {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermt2d
+        {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1},  // vpermt2pd
+        {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1},  // vpermt2ps
+        {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1},  // vpermt2q
+        {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1},  // vpermt2d
+        {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // vpermt2pd
+        {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},  // vpermt2ps
+        {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // vpermt2q
+        {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}   // vpermt2d
+    };
+
+    if (ST->hasAVX512())
+      if (const auto *Entry =
+              CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+        return NumOfShuffles * Entry->Cost;
+
+  } else if (Kind == TTI::SK_PermuteSingleSrc) {
+    std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
+    if (LT.first == 1) {
+
+      static const CostTblEntry AVX512VBMIShuffleTbl[] = {
+          {ISD::VECTOR_SHUFFLE, MVT::v64i8, 1}, // vpermb
+          {ISD::VECTOR_SHUFFLE, MVT::v32i8, 1}  // vpermb
+      };
+
+      if (ST->hasVBMI())
+        if (const auto *Entry = CostTableLookup(AVX512VBMIShuffleTbl,
+                                                ISD::VECTOR_SHUFFLE, LT.second))
+          return Entry->Cost;
+
+      static const CostTblEntry AVX512BWShuffleTbl[] = {
+          {ISD::VECTOR_SHUFFLE, MVT::v32i16, 1}, // vpermw
+          {ISD::VECTOR_SHUFFLE, MVT::v16i16, 1}, // vpermw
+          {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1},  // vpermw
+          {ISD::VECTOR_SHUFFLE, MVT::v64i8, 8},  // extend to v32i16
+          {ISD::VECTOR_SHUFFLE, MVT::v32i8, 3}   // vpermw + zext/trunc
+      };
+
+      if (ST->hasBWI())
+        if (const auto *Entry = CostTableLookup(AVX512BWShuffleTbl,
+                                                ISD::VECTOR_SHUFFLE, LT.second))
+          return Entry->Cost;
+
+      static const CostTblEntry AVX512ShuffleTbl[] = {
+          {ISD::VECTOR_SHUFFLE, MVT::v8f64, 1},  // vpermpd
+          {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1},  // vpermpd
+          {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},  // vpermpd
+          {ISD::VECTOR_SHUFFLE, MVT::v16f32, 1}, // vpermps
+          {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1},  // vpermps
+          {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1},  // vpermps
+          {ISD::VECTOR_SHUFFLE, MVT::v8i64, 1},  // vpermq
+          {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1},  // vpermq
+          {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},  // vpermq
+          {ISD::VECTOR_SHUFFLE, MVT::v16i32, 1}, // vpermd
+          {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1},  // vpermd
+          {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1},  // vpermd
+          {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}   // pshufb
+      };
+
+      if (ST->hasAVX512())
+        if (const auto *Entry =
+            CostTableLookup(AVX512ShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second))
+          return Entry->Cost;
+
+    } else {
+      // We are going to permute multiple sources and the result will be in
+      // multiple destinations. Providing an accurate cost only for splits where
+      // the element type remains the same.
+
+      MVT LegalVT = LT.second;
+      if (LegalVT.getVectorElementType().getSizeInBits() ==
+              Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
+          LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
+
+        unsigned VecTySize = DL.getTypeStoreSize(Tp);
+        unsigned LegalVTSize = LegalVT.getStoreSize();
+        // Number of source vectors after legalization:
+        unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize;
+        // Number of destination vectors after legalization:
+        unsigned NumOfDests = LT.first;
+
+        Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
+                                           LegalVT.getVectorNumElements());
+
+        unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
+        return NumOfShuffles *
+               getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, 0, nullptr);
+      }
+    }
   }
 
   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
@@ -532,6 +928,13 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
   // potential massive combinations (elem_num x src_type x dst_type).
 
   static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = {
+    { ISD::SINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  1 },
+    { ISD::SINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  1 },
+    { ISD::SINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  1 },
+    { ISD::SINT_TO_FP,  MVT::v4f64,  MVT::v4i64,  1 },
+    { ISD::SINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  1 },
+    { ISD::SINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  1 },
+
     { ISD::UINT_TO_FP,  MVT::v2f32,  MVT::v2i64,  1 },
     { ISD::UINT_TO_FP,  MVT::v2f64,  MVT::v2i64,  1 },
     { ISD::UINT_TO_FP,  MVT::v4f32,  MVT::v4i64,  1 },
@@ -539,12 +942,19 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::UINT_TO_FP,  MVT::v8f32,  MVT::v8i64,  1 },
     { ISD::UINT_TO_FP,  MVT::v8f64,  MVT::v8i64,  1 },
 
-    { ISD::FP_TO_UINT,  MVT::v2i64, MVT::v2f32, 1 },
-    { ISD::FP_TO_UINT,  MVT::v4i64, MVT::v4f32, 1 },
-    { ISD::FP_TO_UINT,  MVT::v8i64, MVT::v8f32, 1 },
-    { ISD::FP_TO_UINT,  MVT::v2i64, MVT::v2f64, 1 },
-    { ISD::FP_TO_UINT,  MVT::v4i64, MVT::v4f64, 1 },
-    { ISD::FP_TO_UINT,  MVT::v8i64, MVT::v8f64, 1 },
+    { ISD::FP_TO_SINT,  MVT::v2i64,  MVT::v2f32,  1 },
+    { ISD::FP_TO_SINT,  MVT::v4i64,  MVT::v4f32,  1 },
+    { ISD::FP_TO_SINT,  MVT::v8i64,  MVT::v8f32,  1 },
+    { ISD::FP_TO_SINT,  MVT::v2i64,  MVT::v2f64,  1 },
+    { ISD::FP_TO_SINT,  MVT::v4i64,  MVT::v4f64,  1 },
+    { ISD::FP_TO_SINT,  MVT::v8i64,  MVT::v8f64,  1 },
+
+    { ISD::FP_TO_UINT,  MVT::v2i64,  MVT::v2f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v4i64,  MVT::v4f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v8i64,  MVT::v8f32,  1 },
+    { ISD::FP_TO_UINT,  MVT::v2i64,  MVT::v2f64,  1 },
+    { ISD::FP_TO_UINT,  MVT::v4i64,  MVT::v4f64,  1 },
+    { ISD::FP_TO_UINT,  MVT::v8i64,  MVT::v8f64,  1 },
   };
 
   // TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
@@ -779,6 +1189,8 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
     { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 2*10 },
     { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 15 },
 
+    { ISD::FP_TO_SINT,  MVT::v2i32,  MVT::v2f64,  3 },
+
     { ISD::ZERO_EXTEND, MVT::v4i16,  MVT::v4i8,   1 },
     { ISD::SIGN_EXTEND, MVT::v4i16,  MVT::v4i8,   6 },
     { ISD::ZERO_EXTEND, MVT::v4i32,  MVT::v4i8,   2 },
@@ -945,6 +1357,12 @@ int X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
 
 int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
                                       ArrayRef<Type *> Tys, FastMathFlags FMF) {
+  // Costs should match the codegen from:
+  // BITREVERSE: llvm\test\CodeGen\X86\vector-bitreverse.ll
+  // BSWAP: llvm\test\CodeGen\X86\bswap-vector.ll
+  // CTLZ: llvm\test\CodeGen\X86\vector-lzcnt-*.ll
+  // CTPOP: llvm\test\CodeGen\X86\vector-popcnt-*.ll
+  // CTTZ: llvm\test\CodeGen\X86\vector-tzcnt-*.ll
   static const CostTblEntry XOPCostTbl[] = {
     { ISD::BITREVERSE, MVT::v4i64,   4 },
     { ISD::BITREVERSE, MVT::v8i32,   4 },
@@ -966,7 +1384,25 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::BITREVERSE, MVT::v32i8,   5 },
     { ISD::BSWAP,      MVT::v4i64,   1 },
     { ISD::BSWAP,      MVT::v8i32,   1 },
-    { ISD::BSWAP,      MVT::v16i16,  1 }
+    { ISD::BSWAP,      MVT::v16i16,  1 },
+    { ISD::CTLZ,       MVT::v4i64,  23 },
+    { ISD::CTLZ,       MVT::v8i32,  18 },
+    { ISD::CTLZ,       MVT::v16i16, 14 },
+    { ISD::CTLZ,       MVT::v32i8,   9 },
+    { ISD::CTPOP,      MVT::v4i64,   7 },
+    { ISD::CTPOP,      MVT::v8i32,  11 },
+    { ISD::CTPOP,      MVT::v16i16,  9 },
+    { ISD::CTPOP,      MVT::v32i8,   6 },
+    { ISD::CTTZ,       MVT::v4i64,  10 },
+    { ISD::CTTZ,       MVT::v8i32,  14 },
+    { ISD::CTTZ,       MVT::v16i16, 12 },
+    { ISD::CTTZ,       MVT::v32i8,   9 },
+    { ISD::FSQRT,      MVT::f32,     7 }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f32,   7 }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v8f32,  14 }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::f64,    14 }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v2f64,  14 }, // Haswell from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f64,  28 }, // Haswell from http://www.agner.org/
   };
   static const CostTblEntry AVX1CostTbl[] = {
     { ISD::BITREVERSE, MVT::v4i64,  10 },
@@ -975,7 +1411,29 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::BITREVERSE, MVT::v32i8,  10 },
     { ISD::BSWAP,      MVT::v4i64,   4 },
     { ISD::BSWAP,      MVT::v8i32,   4 },
-    { ISD::BSWAP,      MVT::v16i16,  4 }
+    { ISD::BSWAP,      MVT::v16i16,  4 },
+    { ISD::CTLZ,       MVT::v4i64,  46 },
+    { ISD::CTLZ,       MVT::v8i32,  36 },
+    { ISD::CTLZ,       MVT::v16i16, 28 },
+    { ISD::CTLZ,       MVT::v32i8,  18 },
+    { ISD::CTPOP,      MVT::v4i64,  14 },
+    { ISD::CTPOP,      MVT::v8i32,  22 },
+    { ISD::CTPOP,      MVT::v16i16, 18 },
+    { ISD::CTPOP,      MVT::v32i8,  12 },
+    { ISD::CTTZ,       MVT::v4i64,  20 },
+    { ISD::CTTZ,       MVT::v8i32,  28 },
+    { ISD::CTTZ,       MVT::v16i16, 24 },
+    { ISD::CTTZ,       MVT::v32i8,  18 },
+    { ISD::FSQRT,      MVT::f32,    14 }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f32,  14 }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v8f32,  28 }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::f64,    21 }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v2f64,  21 }, // SNB from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v4f64,  43 }, // SNB from http://www.agner.org/
+  };
+  static const CostTblEntry SSE42CostTbl[] = {
+    { ISD::FSQRT, MVT::f32,   18 }, // Nehalem from http://www.agner.org/
+    { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/
   };
   static const CostTblEntry SSSE3CostTbl[] = {
     { ISD::BITREVERSE, MVT::v2i64,   5 },
@@ -984,12 +1442,42 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     { ISD::BITREVERSE, MVT::v16i8,   5 },
     { ISD::BSWAP,      MVT::v2i64,   1 },
     { ISD::BSWAP,      MVT::v4i32,   1 },
-    { ISD::BSWAP,      MVT::v8i16,   1 }
+    { ISD::BSWAP,      MVT::v8i16,   1 },
+    { ISD::CTLZ,       MVT::v2i64,  23 },
+    { ISD::CTLZ,       MVT::v4i32,  18 },
+    { ISD::CTLZ,       MVT::v8i16,  14 },
+    { ISD::CTLZ,       MVT::v16i8,   9 },
+    { ISD::CTPOP,      MVT::v2i64,   7 },
+    { ISD::CTPOP,      MVT::v4i32,  11 },
+    { ISD::CTPOP,      MVT::v8i16,   9 },
+    { ISD::CTPOP,      MVT::v16i8,   6 },
+    { ISD::CTTZ,       MVT::v2i64,  10 },
+    { ISD::CTTZ,       MVT::v4i32,  14 },
+    { ISD::CTTZ,       MVT::v8i16,  12 },
+    { ISD::CTTZ,       MVT::v16i8,   9 }
   };
   static const CostTblEntry SSE2CostTbl[] = {
     { ISD::BSWAP,      MVT::v2i64,   7 },
     { ISD::BSWAP,      MVT::v4i32,   7 },
-    { ISD::BSWAP,      MVT::v8i16,   7 }
+    { ISD::BSWAP,      MVT::v8i16,   7 },
+    { ISD::CTLZ,       MVT::v2i64,  25 },
+    { ISD::CTLZ,       MVT::v4i32,  26 },
+    { ISD::CTLZ,       MVT::v8i16,  20 },
+    { ISD::CTLZ,       MVT::v16i8,  17 },
+    { ISD::CTPOP,      MVT::v2i64,  12 },
+    { ISD::CTPOP,      MVT::v4i32,  15 },
+    { ISD::CTPOP,      MVT::v8i16,  13 },
+    { ISD::CTPOP,      MVT::v16i8,  10 },
+    { ISD::CTTZ,       MVT::v2i64,  14 },
+    { ISD::CTTZ,       MVT::v4i32,  18 },
+    { ISD::CTTZ,       MVT::v8i16,  16 },
+    { ISD::CTTZ,       MVT::v16i8,  13 },
+    { ISD::FSQRT,      MVT::f64,    32 }, // Nehalem from http://www.agner.org/
+    { ISD::FSQRT,      MVT::v2f64,  32 }, // Nehalem from http://www.agner.org/
+  };
+  static const CostTblEntry SSE1CostTbl[] = {
+    { ISD::FSQRT, MVT::f32,   28 }, // Pentium III from http://www.agner.org/
+    { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/
   };
 
   unsigned ISD = ISD::DELETED_NODE;
@@ -1002,6 +1490,18 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
   case Intrinsic::bswap:
     ISD = ISD::BSWAP;
     break;
+  case Intrinsic::ctlz:
+    ISD = ISD::CTLZ;
+    break;
+  case Intrinsic::ctpop:
+    ISD = ISD::CTPOP;
+    break;
+  case Intrinsic::cttz:
+    ISD = ISD::CTTZ;
+    break;
+  case Intrinsic::sqrt:
+    ISD = ISD::FSQRT;
+    break;
   }
 
   // Legalize the type.
@@ -1021,6 +1521,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy))
       return LT.first * Entry->Cost;
 
+  if (ST->hasSSE42())
+    if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
+
   if (ST->hasSSSE3())
     if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy))
       return LT.first * Entry->Cost;
@@ -1029,6 +1533,10 @@ int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
     if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy))
       return LT.first * Entry->Cost;
 
+  if (ST->hasSSE1())
+    if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy))
+      return LT.first * Entry->Cost;
+
   return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF);
 }
 
@@ -1352,7 +1860,7 @@ int X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
     // immediates here as the normal path expects bit 31 to be sign extended.
     if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue()))
       return TTI::TCC_Free;
-    // Fallthrough
+    LLVM_FALLTHROUGH;
   case Instruction::Add:
   case Instruction::Sub:
   case Instruction::Mul:
@@ -1556,13 +2064,14 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
   // Vector-4 of gather/scatter instruction does not exist on KNL.
   // We can extend it to 8 elements, but zeroing upper bits of
   // the mask vector will add more instructions. Right now we give the scalar
-  // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction is
-  // better in the VariableMask case.
+  // cost of vector-4 for KNL. TODO: Check, maybe the gather/scatter instruction
+  // is better in the VariableMask case.
   if (VF == 2 || (VF == 4 && !ST->hasVLX()))
     Scalarize = true;
 
   if (Scalarize)
-    return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, AddressSpace);
+    return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment,
+                           AddressSpace);
 
   return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace);
 }
@@ -1572,8 +2081,8 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy) {
   int DataWidth = isa<PointerType>(ScalarTy) ?
     DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
 
-  return (DataWidth >= 32 && ST->hasAVX()) ||
-         (DataWidth >= 8 && ST->hasBWI());
+  return ((DataWidth == 32 || DataWidth == 64) && ST->hasAVX()) ||
+         ((DataWidth == 8 || DataWidth == 16) && ST->hasBWI());
 }
 
 bool X86TTIImpl::isLegalMaskedStore(Type *DataType) {
@@ -1598,7 +2107,7 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy) {
     DL.getPointerSizeInBits() : ScalarTy->getPrimitiveSizeInBits();
 
   // AVX-512 allows gather and scatter
-  return DataWidth >= 32 && ST->hasAVX512();
+  return (DataWidth == 32 || DataWidth == 64) && ST->hasAVX512();
 }
 
 bool X86TTIImpl::isLegalMaskedScatter(Type *DataType) {
@@ -1620,3 +2129,122 @@ bool X86TTIImpl::areInlineCompatible(const Function *Caller,
   // correct.
   return (CallerBits & CalleeBits) == CalleeBits;
 }
+
+bool X86TTIImpl::enableInterleavedAccessVectorization() {
+  // TODO: We expect this to be beneficial regardless of arch,
+  // but there are currently some unexplained performance artifacts on Atom.
+  // As a temporary solution, disable on Atom.
+  return !(ST->isAtom() || ST->isSLM());
+}
+
+// Get estimation for interleaved load/store operations and strided load.
+// \p Indices contains indices for strided load.
+// \p Factor - the factor of interleaving.
+// AVX-512 provides 3-src shuffles that significantly reduces the cost.
+int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
+                                                 unsigned Factor,
+                                                 ArrayRef<unsigned> Indices,
+                                                 unsigned Alignment,
+                                                 unsigned AddressSpace) {
+
+  // VecTy for interleave memop is <VF*Factor x Elt>.
+  // So, for VF=4, Interleave Factor = 3, Element type = i32 we have
+  // VecTy = <12 x i32>.
+
+  // Calculate the number of memory operations (NumOfMemOps), required
+  // for load/store the VecTy.
+  MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
+  unsigned VecTySize = DL.getTypeStoreSize(VecTy);
+  unsigned LegalVTSize = LegalVT.getStoreSize();
+  unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
+
+  // Get the cost of one memory operation.
+  Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
+                                        LegalVT.getVectorNumElements());
+  unsigned MemOpCost =
+      getMemoryOpCost(Opcode, SingleMemOpTy, Alignment, AddressSpace);
+
+  if (Opcode == Instruction::Load) {
+    // Kind of shuffle depends on number of loaded values.
+    // If we load the entire data in one register, we can use a 1-src shuffle.
+    // Otherwise, we'll merge 2 sources in each operation.
+    TTI::ShuffleKind ShuffleKind =
+        (NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc;
+
+    unsigned ShuffleCost =
+        getShuffleCost(ShuffleKind, SingleMemOpTy, 0, nullptr);
+
+    unsigned NumOfLoadsInInterleaveGrp =
+        Indices.size() ? Indices.size() : Factor;
+    Type *ResultTy = VectorType::get(VecTy->getVectorElementType(),
+                                     VecTy->getVectorNumElements() / Factor);
+    unsigned NumOfResults =
+        getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
+        NumOfLoadsInInterleaveGrp;
+
+    // About a half of the loads may be folded in shuffles when we have only
+    // one result. If we have more than one result, we do not fold loads at all.
+    unsigned NumOfUnfoldedLoads =
+        NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2;
+
+    // Get a number of shuffle operations per result.
+    unsigned NumOfShufflesPerResult =
+        std::max((unsigned)1, (unsigned)(NumOfMemOps - 1));
+
+    // The SK_MergeTwoSrc shuffle clobbers one of src operands.
+    // When we have more than one destination, we need additional instructions
+    // to keep sources.
+    unsigned NumOfMoves = 0;
+    if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc)
+      NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2;
+
+    int Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost +
+               NumOfUnfoldedLoads * MemOpCost + NumOfMoves;
+
+    return Cost;
+  }
+
+  // Store.
+  assert(Opcode == Instruction::Store &&
+         "Expected Store Instruction at this  point");
+
+  // There is no strided stores meanwhile. And store can't be folded in
+  // shuffle.
+  unsigned NumOfSources = Factor; // The number of values to be merged.
+  unsigned ShuffleCost =
+      getShuffleCost(TTI::SK_PermuteTwoSrc, SingleMemOpTy, 0, nullptr);
+  unsigned NumOfShufflesPerStore = NumOfSources - 1;
+
+  // The SK_MergeTwoSrc shuffle clobbers one of src operands.
+  // We need additional instructions to keep sources.
+  unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2;
+  int Cost = NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) +
+             NumOfMoves;
+  return Cost;
+}
+
+int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                           unsigned Factor,
+                                           ArrayRef<unsigned> Indices,
+                                           unsigned Alignment,
+                                           unsigned AddressSpace) {
+  auto isSupportedOnAVX512 = [](Type *VecTy, bool &RequiresBW) {
+    RequiresBW = false;
+    Type *EltTy = VecTy->getVectorElementType();
+    if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
+        EltTy->isIntegerTy(32) || EltTy->isPointerTy())
+      return true;
+    if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8)) {
+      RequiresBW = true;
+      return true;
+    }
+    return false;
+  };
+  bool RequiresBW;
+  bool HasAVX512Solution = isSupportedOnAVX512(VecTy, RequiresBW);
+  if (ST->hasAVX512() && HasAVX512Solution && (!RequiresBW || ST->hasBWI()))
+    return getInterleavedMemoryOpCostAVX512(Opcode, VecTy, Factor, Indices,
+                                            Alignment, AddressSpace);
+  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
+                                           Alignment, AddressSpace);
+}
diff --git a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
index ab8046bb9fd4..f6bcb9f569e4 100644
--- a/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -43,13 +43,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl(F)),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  X86TTIImpl(const X86TTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  X86TTIImpl(X86TTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   /// \name Scalar TTI Implementations
   /// @{
   TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth);
@@ -87,6 +80,13 @@ public:
 
   int getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm);
 
+  int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
+                                 unsigned Factor, ArrayRef<unsigned> Indices,
+                                 unsigned Alignment, unsigned AddressSpace);
+  int getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
+                                 unsigned Factor, ArrayRef<unsigned> Indices,
+                                 unsigned Alignment, unsigned AddressSpace);
+
   int getIntImmCost(int64_t);
 
   int getIntImmCost(const APInt &Imm, Type *Ty);
@@ -100,6 +100,8 @@ public:
   bool isLegalMaskedScatter(Type *DataType);
   bool areInlineCompatible(const Function *Caller,
                            const Function *Callee) const;
+
+  bool enableInterleavedAccessVectorization();
 private:
   int getGSScalarCost(unsigned Opcode, Type *DataTy, bool VariableMask,
                       unsigned Alignment, unsigned AddressSpace);
diff --git a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
index 9320e1e2226f..9766b84be652 100644
--- a/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
+++ b/contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp
@@ -40,9 +40,9 @@ namespace {
     bool runOnMachineFunction(MachineFunction &MF) override;
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
-    const char *getPassName() const override {return "X86 vzeroupper inserter";}
+    StringRef getPassName() const override { return "X86 vzeroupper inserter"; }
 
   private:
 
diff --git a/contrib/llvm/lib/Target/X86/X86WinAllocaExpander.cpp b/contrib/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
index cc82074e685f..fc08f1582ad7 100644
--- a/contrib/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinAllocaExpander.cpp
@@ -63,7 +63,7 @@ private:
   unsigned SlotSize;
   int64_t StackProbeSize;
 
-  const char *getPassName() const override { return "X86 WinAlloca Expander"; }
+  StringRef getPassName() const override { return "X86 WinAlloca Expander"; }
   static char ID;
 };
 
@@ -225,6 +225,7 @@ void X86WinAllocaExpander::lower(MachineInstr* MI, Lowering L) {
       break;
 
     // Fall through to make any remaining adjustment.
+    LLVM_FALLTHROUGH;
   case Sub:
     assert(Amount > 0);
     if (Amount == SlotSize) {
diff --git a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
index 99387edef99a..bc14630584e5 100644
--- a/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
+++ b/contrib/llvm/lib/Target/X86/X86WinEHState.cpp
@@ -57,7 +57,7 @@ public:
 
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Windows 32-bit x86 EH state insertion";
   }
 
diff --git a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
index 2e8f762458a7..059b75ef482a 100644
--- a/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
+++ b/contrib/llvm/lib/Target/XCore/Disassembler/XCoreDisassembler.cpp
@@ -769,7 +769,7 @@ MCDisassembler::DecodeStatus XCoreDisassembler::getInstruction(
 }
 
 namespace llvm {
-  extern Target TheXCoreTarget;
+  Target &getTheXCoreTarget();
 }
 
 static MCDisassembler *createXCoreDisassembler(const Target &T,
@@ -780,6 +780,6 @@ static MCDisassembler *createXCoreDisassembler(const Target &T,
 
 extern "C" void LLVMInitializeXCoreDisassembler() {
   // Register the disassembler.
-  TargetRegistry::RegisterMCDisassembler(TheXCoreTarget,
+  TargetRegistry::RegisterMCDisassembler(getTheXCoreTarget(),
                                          createXCoreDisassembler);
 }
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
index 63ca1e7d4646..c5859b7786f7 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.cpp
@@ -124,26 +124,28 @@ static MCTargetStreamer *createTargetAsmStreamer(MCStreamer &S,
 // Force static initialization.
 extern "C" void LLVMInitializeXCoreTargetMC() {
   // Register the MC asm info.
-  RegisterMCAsmInfoFn X(TheXCoreTarget, createXCoreMCAsmInfo);
+  RegisterMCAsmInfoFn X(getTheXCoreTarget(), createXCoreMCAsmInfo);
 
   // Register the MC codegen info.
-  TargetRegistry::registerMCAdjustCodeGenOpts(TheXCoreTarget,
+  TargetRegistry::registerMCAdjustCodeGenOpts(getTheXCoreTarget(),
                                               adjustCodeGenOpts);
 
   // Register the MC instruction info.
-  TargetRegistry::RegisterMCInstrInfo(TheXCoreTarget, createXCoreMCInstrInfo);
+  TargetRegistry::RegisterMCInstrInfo(getTheXCoreTarget(),
+                                      createXCoreMCInstrInfo);
 
   // Register the MC register info.
-  TargetRegistry::RegisterMCRegInfo(TheXCoreTarget, createXCoreMCRegisterInfo);
+  TargetRegistry::RegisterMCRegInfo(getTheXCoreTarget(),
+                                    createXCoreMCRegisterInfo);
 
   // Register the MC subtarget info.
-  TargetRegistry::RegisterMCSubtargetInfo(TheXCoreTarget,
+  TargetRegistry::RegisterMCSubtargetInfo(getTheXCoreTarget(),
                                           createXCoreMCSubtargetInfo);
 
   // Register the MCInstPrinter
-  TargetRegistry::RegisterMCInstPrinter(TheXCoreTarget,
+  TargetRegistry::RegisterMCInstPrinter(getTheXCoreTarget(),
                                         createXCoreMCInstPrinter);
 
-  TargetRegistry::RegisterAsmTargetStreamer(TheXCoreTarget,
+  TargetRegistry::RegisterAsmTargetStreamer(getTheXCoreTarget(),
                                             createTargetAsmStreamer);
 }
diff --git a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
index 28e0275c72db..ac0f3fefbae7 100644
--- a/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
+++ b/contrib/llvm/lib/Target/XCore/MCTargetDesc/XCoreMCTargetDesc.h
@@ -18,8 +18,7 @@
 
 namespace llvm {
 class Target;
-
-extern Target TheXCoreTarget;
+Target &getTheXCoreTarget();
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
index c78cde9ac8a6..df5774c7e8ea 100644
--- a/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp
@@ -12,8 +12,11 @@
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
 
-Target llvm::TheXCoreTarget;
+Target &llvm::getTheXCoreTarget() {
+  static Target TheXCoreTarget;
+  return TheXCoreTarget;
+}
 
 extern "C" void LLVMInitializeXCoreTargetInfo() {
-  RegisterTarget<Triple::xcore> X(TheXCoreTarget, "xcore", "XCore");
+  RegisterTarget<Triple::xcore> X(getTheXCoreTarget(), "xcore", "XCore");
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
index be66e6cb8124..b35aa0b95821 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp
@@ -58,9 +58,7 @@ namespace {
                              std::unique_ptr<MCStreamer> Streamer)
         : AsmPrinter(TM, std::move(Streamer)), MCInstLowering(*this) {}
 
-    const char *getPassName() const override {
-      return "XCore Assembly Printer";
-    }
+    StringRef getPassName() const override { return "XCore Assembly Printer"; }
 
     void printInlineJT(const MachineInstr *MI, int opNum, raw_ostream &O,
                        const std::string &directive = ".jmptable");
@@ -115,8 +113,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     return;
 
   const DataLayout &DL = getDataLayout();
-  OutStreamer->SwitchSection(
-      getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
+  OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(GV, TM));
 
   MCSymbol *GVSym = getSymbol(GV);
   const Constant *C = GV->getInitializer();
@@ -140,7 +137,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
     if (GV->hasWeakLinkage() || GV->hasLinkOnceLinkage() ||
         GV->hasCommonLinkage())
       OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Weak);
-    // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case GlobalValue::InternalLinkage:
   case GlobalValue::PrivateLinkage:
     break;
@@ -156,8 +153,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
   unsigned Size = DL.getTypeAllocSize(C->getType());
   if (MAI->hasDotTypeDotSizeDirective()) {
     OutStreamer->EmitSymbolAttribute(GVSym, MCSA_ELF_TypeObject);
-    OutStreamer->emitELFSize(cast<MCSymbolELF>(GVSym),
-                             MCConstantExpr::create(Size, OutContext));
+    OutStreamer->emitELFSize(GVSym, MCConstantExpr::create(Size, OutContext));
   }
   OutStreamer->EmitLabel(GVSym);
 
@@ -172,7 +168,7 @@ void XCoreAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
 }
 
 void XCoreAsmPrinter::EmitFunctionBodyStart() {
-  MCInstLowering.Initialize(Mang, &MF->getContext());
+  MCInstLowering.Initialize(&MF->getContext());
 }
 
 /// EmitFunctionBodyEnd - Targets can override this to emit stuff after
@@ -300,5 +296,5 @@ void XCoreAsmPrinter::EmitInstruction(const MachineInstr *MI) {
 
 // Force static initialization.
 extern "C" void LLVMInitializeXCoreAsmPrinter() { 
-  RegisterAsmPrinter<XCoreAsmPrinter> X(TheXCoreTarget);
+  RegisterAsmPrinter<XCoreAsmPrinter> X(getTheXCoreTarget());
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
index 75a2eb0fdd2f..e0e2e0319964 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameLowering.cpp
@@ -61,8 +61,8 @@ static bool CompareSSIOffset(const StackSlotInfo& a, const StackSlotInfo& b) {
 static void EmitDefCfaRegister(MachineBasicBlock &MBB,
                                MachineBasicBlock::iterator MBBI,
                                const DebugLoc &dl, const TargetInstrInfo &TII,
-                               MachineModuleInfo *MMI, unsigned DRegNum) {
-  unsigned CFIIndex = MMI->addFrameInst(
+                               MachineFunction &MF, unsigned DRegNum) {
+  unsigned CFIIndex = MF.addFrameInst(
       MCCFIInstruction::createDefCfaRegister(nullptr, DRegNum));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
@@ -71,18 +71,20 @@ static void EmitDefCfaRegister(MachineBasicBlock &MBB,
 static void EmitDefCfaOffset(MachineBasicBlock &MBB,
                              MachineBasicBlock::iterator MBBI,
                              const DebugLoc &dl, const TargetInstrInfo &TII,
-                             MachineModuleInfo *MMI, int Offset) {
+                             int Offset) {
+  MachineFunction &MF = *MBB.getParent();
   unsigned CFIIndex =
-      MMI->addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
+      MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, -Offset));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
 }
 
 static void EmitCfiOffset(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI, const DebugLoc &dl,
-                          const TargetInstrInfo &TII, MachineModuleInfo *MMI,
-                          unsigned DRegNum, int Offset) {
-  unsigned CFIIndex = MMI->addFrameInst(
+                          const TargetInstrInfo &TII, unsigned DRegNum,
+                          int Offset) {
+  MachineFunction &MF = *MBB.getParent();
+  unsigned CFIIndex = MF.addFrameInst(
       MCCFIInstruction::createOffset(nullptr, DRegNum, Offset));
   BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
       .addCFIIndex(CFIIndex);
@@ -96,9 +98,8 @@ static void EmitCfiOffset(MachineBasicBlock &MBB,
 /// \param [in,out] Adjusted the current SP offset from the top of the frame.
 static void IfNeededExtSP(MachineBasicBlock &MBB,
                           MachineBasicBlock::iterator MBBI, const DebugLoc &dl,
-                          const TargetInstrInfo &TII, MachineModuleInfo *MMI,
-                          int OffsetFromTop, int &Adjusted, int FrameSize,
-                          bool emitFrameMoves) {
+                          const TargetInstrInfo &TII, int OffsetFromTop,
+                          int &Adjusted, int FrameSize, bool emitFrameMoves) {
   while (OffsetFromTop > Adjusted) {
     assert(Adjusted < FrameSize && "OffsetFromTop is beyond FrameSize");
     int remaining = FrameSize - Adjusted;
@@ -107,7 +108,7 @@ static void IfNeededExtSP(MachineBasicBlock &MBB,
     BuildMI(MBB, MBBI, dl, TII.get(Opcode)).addImm(OpImm);
     Adjusted += OpImm;
     if (emitFrameMoves)
-      EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4);
+      EmitDefCfaOffset(MBB, MBBI, dl, TII, Adjusted*4);
   }
 }
 
@@ -136,16 +137,16 @@ static void IfNeededLDAWSP(MachineBasicBlock &MBB,
 /// Registers are ordered according to their frame offset.
 /// As offsets are negative, the largest offsets will be first.
 static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
-                         MachineFrameInfo *MFI, XCoreFunctionInfo *XFI,
+                         MachineFrameInfo &MFI, XCoreFunctionInfo *XFI,
                          bool fetchLR, bool fetchFP) {
   if (fetchLR) {
-    int Offset = MFI->getObjectOffset(XFI->getLRSpillSlot());
+    int Offset = MFI.getObjectOffset(XFI->getLRSpillSlot());
     SpillList.push_back(StackSlotInfo(XFI->getLRSpillSlot(),
                                       Offset,
                                       XCore::LR));
   }
   if (fetchFP) {
-    int Offset = MFI->getObjectOffset(XFI->getFPSpillSlot());
+    int Offset = MFI.getObjectOffset(XFI->getFPSpillSlot());
     SpillList.push_back(StackSlotInfo(XFI->getFPSpillSlot(),
                                       Offset,
                                       FramePtr));
@@ -158,16 +159,16 @@ static void GetSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
 /// Registers are ordered according to their frame offset.
 /// As offsets are negative, the largest offsets will be first.
 static void GetEHSpillList(SmallVectorImpl<StackSlotInfo> &SpillList,
-                           MachineFrameInfo *MFI, XCoreFunctionInfo *XFI,
+                           MachineFrameInfo &MFI, XCoreFunctionInfo *XFI,
                            const Constant *PersonalityFn,
                            const TargetLowering *TL) {
   assert(XFI->hasEHSpillSlot() && "There are no EH register spill slots");
   const int *EHSlot = XFI->getEHSpillSlot();
   SpillList.push_back(
-      StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[0]),
+      StackSlotInfo(EHSlot[0], MFI.getObjectOffset(EHSlot[0]),
                     TL->getExceptionPointerRegister(PersonalityFn)));
   SpillList.push_back(
-      StackSlotInfo(EHSlot[0], MFI->getObjectOffset(EHSlot[1]),
+      StackSlotInfo(EHSlot[0], MFI.getObjectOffset(EHSlot[1]),
                     TL->getExceptionSelectorRegister(PersonalityFn)));
   std::sort(SpillList.begin(), SpillList.end(), CompareSSIOffset);
 }
@@ -176,7 +177,7 @@ static MachineMemOperand *getFrameIndexMMO(MachineBasicBlock &MBB,
                                            int FrameIndex,
                                            MachineMemOperand::Flags flags) {
   MachineFunction *MF = MBB.getParent();
-  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineMemOperand *MMO = MF->getMachineMemOperand(
       MachinePointerInfo::getFixedStack(*MF, FrameIndex), flags,
       MFI.getObjectSize(FrameIndex), MFI.getObjectAlignment(FrameIndex));
@@ -217,14 +218,14 @@ XCoreFrameLowering::XCoreFrameLowering(const XCoreSubtarget &sti)
 
 bool XCoreFrameLowering::hasFP(const MachineFunction &MF) const {
   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
-         MF.getFrameInfo()->hasVarSizedObjects();
+         MF.getFrameInfo().hasVarSizedObjects();
 }
 
 void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
                                       MachineBasicBlock &MBB) const {
   assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
   MachineBasicBlock::iterator MBBI = MBB.begin();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineModuleInfo *MMI = &MF.getMMI();
   const MCRegisterInfo *MRI = MMI->getContext().getRegisterInfo();
   const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo();
@@ -233,9 +234,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
   // to determine the end of the prologue.
   DebugLoc dl;
 
-  if (MFI->getMaxAlignment() > getStackAlignment())
+  if (MFI.getMaxAlignment() > getStackAlignment())
     report_fatal_error("emitPrologue unsupported alignment: "
-                       + Twine(MFI->getMaxAlignment()));
+                       + Twine(MFI.getMaxAlignment()));
 
   const AttributeSet &PAL = MF.getFunction()->getAttributes();
   if (PAL.hasAttrSomewhere(Attribute::Nest))
@@ -244,13 +245,13 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
 
   // Work out frame sizes.
   // We will adjust the SP in stages towards the final FrameSize.
-  assert(MFI->getStackSize()%4 == 0 && "Misaligned frame size");
-  const int FrameSize = MFI->getStackSize() / 4;
+  assert(MFI.getStackSize()%4 == 0 && "Misaligned frame size");
+  const int FrameSize = MFI.getStackSize() / 4;
   int Adjusted = 0;
 
   bool saveLR = XFI->hasLRSpillSlot();
   bool UseENTSP = saveLR && FrameSize
-                  && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0);
+                  && (MFI.getObjectOffset(XFI->getLRSpillSlot()) == 0);
   if (UseENTSP)
     saveLR = false;
   bool FP = hasFP(MF);
@@ -266,9 +267,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
     MIB->addRegisterKilled(XCore::LR, MF.getSubtarget().getRegisterInfo(),
                            true);
     if (emitFrameMoves) {
-      EmitDefCfaOffset(MBB, MBBI, dl, TII, MMI, Adjusted*4);
+      EmitDefCfaOffset(MBB, MBBI, dl, TII, Adjusted*4);
       unsigned DRegNum = MRI->getDwarfRegNum(XCore::LR, true);
-      EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, 0);
+      EmitCfiOffset(MBB, MBBI, dl, TII, DRegNum, 0);
     }
   }
 
@@ -281,7 +282,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
     assert(SpillList[i].Offset % 4 == 0 && "Misaligned stack offset");
     assert(SpillList[i].Offset <= 0 && "Unexpected positive stack offset");
     int OffsetFromTop = - SpillList[i].Offset/4;
-    IfNeededExtSP(MBB, MBBI, dl, TII, MMI, OffsetFromTop, Adjusted, FrameSize,
+    IfNeededExtSP(MBB, MBBI, dl, TII, OffsetFromTop, Adjusted, FrameSize,
                   emitFrameMoves);
     int Offset = Adjusted - OffsetFromTop;
     int Opcode = isImmU6(Offset) ? XCore::STWSP_ru6 : XCore::STWSP_lru6;
@@ -293,12 +294,12 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
                                       MachineMemOperand::MOStore));
     if (emitFrameMoves) {
       unsigned DRegNum = MRI->getDwarfRegNum(SpillList[i].Reg, true);
-      EmitCfiOffset(MBB, MBBI, dl, TII, MMI, DRegNum, SpillList[i].Offset);
+      EmitCfiOffset(MBB, MBBI, dl, TII, DRegNum, SpillList[i].Offset);
     }
   }
 
   // Complete any remaining Stack adjustment.
-  IfNeededExtSP(MBB, MBBI, dl, TII, MMI, FrameSize, Adjusted, FrameSize,
+  IfNeededExtSP(MBB, MBBI, dl, TII, FrameSize, Adjusted, FrameSize,
                 emitFrameMoves);
   assert(Adjusted==FrameSize && "IfNeededExtSP has not completed adjustment");
 
@@ -306,7 +307,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
     // Set the FP from the SP.
     BuildMI(MBB, MBBI, dl, TII.get(XCore::LDAWSP_ru6), FramePtr).addImm(0);
     if (emitFrameMoves)
-      EmitDefCfaRegister(MBB, MBBI, dl, TII, MMI,
+      EmitDefCfaRegister(MBB, MBBI, dl, TII, MF,
                          MRI->getDwarfRegNum(FramePtr, true));
   }
 
@@ -316,9 +317,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
       MachineBasicBlock::iterator Pos = SpillLabel.first;
       ++Pos;
       const CalleeSavedInfo &CSI = SpillLabel.second;
-      int Offset = MFI->getObjectOffset(CSI.getFrameIdx());
+      int Offset = MFI.getObjectOffset(CSI.getFrameIdx());
       unsigned DRegNum = MRI->getDwarfRegNum(CSI.getReg(), true);
-      EmitCfiOffset(MBB, Pos, dl, TII, MMI, DRegNum, Offset);
+      EmitCfiOffset(MBB, Pos, dl, TII, DRegNum, Offset);
     }
     if (XFI->hasEHSpillSlot()) {
       // The unwinder requires stack slot & CFI offsets for the exception info.
@@ -330,10 +331,10 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
       GetEHSpillList(SpillList, MFI, XFI, PersonalityFn,
                      MF.getSubtarget().getTargetLowering());
       assert(SpillList.size()==2 && "Unexpected SpillList size");
-      EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
+      EmitCfiOffset(MBB, MBBI, dl, TII,
                     MRI->getDwarfRegNum(SpillList[0].Reg, true),
                     SpillList[0].Offset);
-      EmitCfiOffset(MBB, MBBI, dl, TII, MMI,
+      EmitCfiOffset(MBB, MBBI, dl, TII,
                     MRI->getDwarfRegNum(SpillList[1].Reg, true),
                     SpillList[1].Offset);
     }
@@ -342,7 +343,7 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF,
 
 void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
                                      MachineBasicBlock &MBB) const {
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   const XCoreInstrInfo &TII = *MF.getSubtarget<XCoreSubtarget>().getInstrInfo();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
@@ -351,7 +352,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
 
   // Work out frame sizes.
   // We will adjust the SP in stages towards the final FrameSize.
-  int RemainingAdj = MFI->getStackSize();
+  int RemainingAdj = MFI.getStackSize();
   assert(RemainingAdj%4 == 0 && "Misaligned frame size");
   RemainingAdj /= 4;
 
@@ -377,7 +378,7 @@ void XCoreFrameLowering::emitEpilogue(MachineFunction &MF,
 
   bool restoreLR = XFI->hasLRSpillSlot();
   bool UseRETSP = restoreLR && RemainingAdj
-                  && (MFI->getObjectOffset(XFI->getLRSpillSlot()) == 0);
+                  && (MFI.getObjectOffset(XFI->getLRSpillSlot()) == 0);
   if (UseRETSP)
     restoreLR = false;
   bool FP = hasFP(MF);
@@ -490,8 +491,8 @@ MachineBasicBlock::iterator XCoreFrameLowering::eliminateCallFramePseudoInstr(
   if (!hasReservedCallFrame(MF)) {
     // Turn the adjcallstackdown instruction into 'extsp <amt>' and the
     // adjcallstackup instruction into 'ldaw sp, sp[<amt>]'
-    MachineInstr *Old = I;
-    uint64_t Amount = Old->getOperand(0).getImm();
+    MachineInstr &Old = *I;
+    uint64_t Amount = Old.getOperand(0).getImm();
     if (Amount != 0) {
       // We need to keep the stack aligned properly.  To do this, we round the
       // amount of space needed for the outgoing arguments up to the next
@@ -513,15 +514,14 @@ MachineBasicBlock::iterator XCoreFrameLowering::eliminateCallFramePseudoInstr(
       }
 
       MachineInstr *New;
-      if (Old->getOpcode() == XCore::ADJCALLSTACKDOWN) {
+      if (Old.getOpcode() == XCore::ADJCALLSTACKDOWN) {
         int Opcode = isU6 ? XCore::EXTSP_u6 : XCore::EXTSP_lu6;
-        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode))
-          .addImm(Amount);
+        New = BuildMI(MF, Old.getDebugLoc(), TII.get(Opcode)).addImm(Amount);
       } else {
-        assert(Old->getOpcode() == XCore::ADJCALLSTACKUP);
+        assert(Old.getOpcode() == XCore::ADJCALLSTACKUP);
         int Opcode = isU6 ? XCore::LDAWSP_ru6 : XCore::LDAWSP_lru6;
-        New=BuildMI(MF, Old->getDebugLoc(), TII.get(Opcode), XCore::SP)
-          .addImm(Amount);
+        New = BuildMI(MF, Old.getDebugLoc(), TII.get(Opcode), XCore::SP)
+                  .addImm(Amount);
       }
 
       // Replace the pseudo instruction with a new instruction...
@@ -543,12 +543,12 @@ void XCoreFrameLowering::determineCalleeSaves(MachineFunction &MF,
   bool LRUsed = MRI.isPhysRegModified(XCore::LR);
 
   if (!LRUsed && !MF.getFunction()->isVarArg() &&
-      MF.getFrameInfo()->estimateStackSize(MF))
+      MF.getFrameInfo().estimateStackSize(MF))
     // If we need to extend the stack it is more efficient to use entsp / retsp.
     // We force the LR to be saved so these instructions are used.
     LRUsed = true;
 
-  if (MF.getMMI().callsUnwindInit() || MF.getMMI().callsEHReturn()) {
+  if (MF.callsUnwindInit() || MF.callsEHReturn()) {
     // The unwinder expects to find spill slots for the exception info regs R0
     // & R1. These are used during llvm.eh.return() to 'restore' the exception
     // info. N.B. we do not spill or restore R0, R1 during normal operation.
@@ -574,7 +574,7 @@ void XCoreFrameLowering::
 processFunctionBeforeFrameFinalized(MachineFunction &MF,
                                     RegScavenger *RS) const {
   assert(RS && "requiresRegisterScavenging failed");
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
   // Reserve slots close to SP or frame pointer for Scavenging spills.
@@ -582,11 +582,11 @@ processFunctionBeforeFrameFinalized(MachineFunction &MF,
   // When using SP for large frames, we may need 2 scratch registers.
   // When using FP, for large or small frames, we may need 1 scratch register.
   if (XFI->isLargeFrame(MF) || hasFP(MF))
-    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),
-                                                       false));
+    RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
+                                                      RC->getAlignment(),
+                                                      false));
   if (XFI->isLargeFrame(MF) && !hasFP(MF))
-    RS->addScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
-                                                       RC->getAlignment(),
-                                                       false));
+    RS->addScavengingFrameIndex(MFI.CreateStackObject(RC->getSize(),
+                                                      RC->getAlignment(),
+                                                      false));
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp b/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
index bd6baef3271e..4b10e71be03d 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreFrameToArgsOffsetElim.cpp
@@ -29,10 +29,10 @@ namespace {
     bool runOnMachineFunction(MachineFunction &Fn) override;
     MachineFunctionProperties getRequiredProperties() const override {
       return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::AllVRegsAllocated);
+          MachineFunctionProperties::Property::NoVRegs);
     }
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "XCore FRAME_TO_ARGS_OFFSET Elimination";
     }
   };
@@ -48,17 +48,17 @@ FunctionPass *llvm::createXCoreFrameToArgsOffsetEliminationPass() {
 bool XCoreFTAOElim::runOnMachineFunction(MachineFunction &MF) {
   const XCoreInstrInfo &TII =
       *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
-  unsigned StackSize = MF.getFrameInfo()->getStackSize();
+  unsigned StackSize = MF.getFrameInfo().getStackSize();
   for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
        ++MFI) {
     MachineBasicBlock &MBB = *MFI;
     for (MachineBasicBlock::iterator MBBI = MBB.begin(), EE = MBB.end();
          MBBI != EE; ++MBBI) {
       if (MBBI->getOpcode() == XCore::FRAME_TO_ARGS_OFFSET) {
-        MachineInstr *OldInst = MBBI;
-        unsigned Reg = OldInst->getOperand(0).getReg();
+        MachineInstr &OldInst = *MBBI;
+        unsigned Reg = OldInst.getOperand(0).getReg();
         MBBI = TII.loadImmediate(MBB, MBBI, Reg, StackSize);
-        OldInst->eraseFromParent();
+        OldInst.eraseFromParent();
       }
     }
   }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index ce25cbcfd124..086d1d544f69 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -67,7 +67,7 @@ namespace {
     bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
                                       std::vector<SDValue> &OutOps) override;
 
-    const char *getPassName() const override {
+    StringRef getPassName() const override {
       return "XCore DAG->DAG Pattern Instruction Selection";
     }
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
index 6f6ac3bd5f4c..9244d594460f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreISelLowering.cpp
@@ -1260,7 +1260,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>();
 
@@ -1324,9 +1324,9 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
                << "\n";
       }
       // Create the frame index object for this incoming parameter...
-      int FI = MFI->CreateFixedObject(ObjSize,
-                                      LRSaveSize + VA.getLocMemOffset(),
-                                      true);
+      int FI = MFI.CreateFixedObject(ObjSize,
+                                     LRSaveSize + VA.getLocMemOffset(),
+                                     true);
 
       // Create the SelectionDAG nodes corresponding to a load
       //from this parameter
@@ -1352,7 +1352,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
       // address
       for (int i = array_lengthof(ArgRegs) - 1; i >= (int)FirstVAReg; --i) {
         // Create a stack slot
-        int FI = MFI->CreateFixedObject(4, offset, true);
+        int FI = MFI.CreateFixedObject(4, offset, true);
         if (i == (int)FirstVAReg) {
           XFI->setVarArgsFrameIndex(FI);
         }
@@ -1371,8 +1371,8 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
     } else {
       // This will point to the next argument passed via stack.
       XFI->setVarArgsFrameIndex(
-        MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
-                               true));
+        MFI.CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+                              true));
     }
   }
 
@@ -1391,7 +1391,7 @@ SDValue XCoreTargetLowering::LowerCCCArguments(
       unsigned Size = ArgDI->Flags.getByValSize();
       unsigned Align = std::max(StackSlotSize, ArgDI->Flags.getByValAlign());
       // Create a new object on the stack and copy the pointee into it.
-      int FI = MFI->CreateStackObject(Size, Align, false);
+      int FI = MFI.CreateStackObject(Size, Align, false);
       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
       InVals.push_back(FIN);
       MemOps.push_back(DAG.getMemcpy(Chain, dl, FIN, ArgDI->SDV,
@@ -1440,7 +1440,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
 
   XCoreFunctionInfo *XFI =
     DAG.getMachineFunction().getInfo<XCoreFunctionInfo>();
-  MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+  MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
 
   // CCValAssign - represent the assignment of
   // the return value to a location
@@ -1476,7 +1476,7 @@ XCoreTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
     int Offset = VA.getLocMemOffset();
     unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8;
     // Create the frame index object for the memory location.
-    int FI = MFI->CreateFixedObject(ObjSize, Offset, false);
+    int FI = MFI.CreateFixedObject(ObjSize, Offset, false);
 
     // Create a SelectionDAG node corresponding to a store
     // to this memory location.
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
index e0b3e7153da9..7a9c6fc93f8a 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -184,7 +184,7 @@ static inline XCore::CondCode GetOppositeBranchCondition(XCore::CondCode CC)
 ///    operands can be passed to other TargetInstrInfo methods to create new
 ///    branches.
 ///
-/// Note that RemoveBranch and InsertBranch must be implemented to support
+/// Note that removeBranch and insertBranch must be implemented to support
 /// cases where this method returns success.
 ///
 bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
@@ -201,8 +201,8 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
     return false;
 
   // Get the last instruction in the block.
-  MachineInstr *LastInst = I;
-  
+  MachineInstr *LastInst = &*I;
+
   // If there is only one terminator instruction, process it.
   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
     if (IsBRU(LastInst->getOpcode())) {
@@ -224,7 +224,7 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   }
   
   // Get the instruction before it if it's a terminator.
-  MachineInstr *SecondLastInst = I;
+  MachineInstr *SecondLastInst = &*I;
 
   // If there are three terminators, we don't know what sort of block this is.
   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
@@ -269,16 +269,18 @@ bool XCoreInstrInfo::analyzeBranch(MachineBasicBlock &MBB,
   return true;
 }
 
-unsigned XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,
+unsigned XCoreInstrInfo::insertBranch(MachineBasicBlock &MBB,
                                       MachineBasicBlock *TBB,
                                       MachineBasicBlock *FBB,
                                       ArrayRef<MachineOperand> Cond,
-                                      const DebugLoc &DL) const {
+                                      const DebugLoc &DL,
+                                      int *BytesAdded) const {
   // Shouldn't be a fall through.
-  assert(TBB && "InsertBranch must not be told to insert a fallthrough");
+  assert(TBB && "insertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 2 || Cond.size() == 0) &&
          "Unexpected number of components!");
-  
+  assert(!BytesAdded && "code size not handled");
+
   if (!FBB) { // One way branch.
     if (Cond.empty()) {
       // Unconditional branch
@@ -302,7 +304,9 @@ unsigned XCoreInstrInfo::InsertBranch(MachineBasicBlock &MBB,
 }
 
 unsigned
-XCoreInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
+XCoreInstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const {
+  assert(!BytesRemoved && "code size not handled");
+
   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
   if (I == MBB.end())
     return 0;
@@ -363,7 +367,7 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
   if (I != MBB.end() && !I->isDebugValue())
     DL = I->getDebugLoc();
   MachineFunction *MF = MBB.getParent();
-  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineMemOperand *MMO = MF->getMachineMemOperand(
       MachinePointerInfo::getFixedStack(*MF, FrameIndex),
       MachineMemOperand::MOStore, MFI.getObjectSize(FrameIndex),
@@ -385,7 +389,7 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
   if (I != MBB.end() && !I->isDebugValue())
     DL = I->getDebugLoc();
   MachineFunction *MF = MBB.getParent();
-  const MachineFrameInfo &MFI = *MF->getFrameInfo();
+  const MachineFrameInfo &MFI = MF->getFrameInfo();
   MachineMemOperand *MMO = MF->getMachineMemOperand(
       MachinePointerInfo::getFixedStack(*MF, FrameIndex),
       MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIndex),
@@ -396,11 +400,9 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
     .addMemOperand(MMO);
 }
 
-/// ReverseBranchCondition - Return the inverse opcode of the 
-/// specified Branch instruction.
 bool XCoreInstrInfo::
-ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
-  assert((Cond.size() == 2) && 
+reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
+  assert((Cond.size() == 2) &&
           "Invalid XCore branch condition!");
   Cond[0].setImm(GetOppositeBranchCondition((XCore::CondCode)Cond[0].getImm()));
   return false;
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
index 783bc6bab5d5..a377784caf4b 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h
@@ -55,11 +55,13 @@ public:
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+  unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                         MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
-                        const DebugLoc &DL) const override;
+                        const DebugLoc &DL,
+                        int *BytesAdded = nullptr) const override;
 
-  unsigned RemoveBranch(MachineBasicBlock &MBB) const override;
+  unsigned removeBranch(MachineBasicBlock &MBB,
+                        int *BytesRemoved = nullptr) const override;
 
   void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                    const DebugLoc &DL, unsigned DestReg, unsigned SrcReg,
@@ -77,7 +79,7 @@ public:
                             const TargetRegisterClass *RC,
                             const TargetRegisterInfo *TRI) const override;
 
-  bool ReverseBranchCondition(
+  bool reverseBranchCondition(
                           SmallVectorImpl<MachineOperand> &Cond) const override;
 
   // Emit code before MBBI to load immediate value into physical register Reg.
diff --git a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
index 8110b918c22b..f1d52d5a191f 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
+++ b/contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td
@@ -146,18 +146,10 @@ def immU6 : PatLeaf<(imm), [{
   return (uint32_t)N->getZExtValue() < (1 << 6);
 }]>;
 
-def immU10 : PatLeaf<(imm), [{
-  return (uint32_t)N->getZExtValue() < (1 << 10);
-}]>;
-
 def immU16 : PatLeaf<(imm), [{
   return (uint32_t)N->getZExtValue() < (1 << 16);
 }]>;
 
-def immU20 : PatLeaf<(imm), [{
-  return (uint32_t)N->getZExtValue() < (1 << 20);
-}]>;
-
 def immMskBitp : PatLeaf<(imm), [{ return immMskBitp(N); }]>;
 
 def immBitp : PatLeaf<(imm), [{
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
index 03c5fa2e9c42..7763ccc8f4af 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp
@@ -25,12 +25,9 @@
 using namespace llvm;
 
 XCoreMCInstLower::XCoreMCInstLower(class AsmPrinter &asmprinter)
-: Printer(asmprinter) {}
+    : Printer(asmprinter) {}
 
-void XCoreMCInstLower::Initialize(Mangler *M, MCContext *C) {
-  Mang = M;
-  Ctx = C;
-}
+void XCoreMCInstLower::Initialize(MCContext *C) { Ctx = C; }
 
 MCOperand XCoreMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
                                                MachineOperandType MOTy,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
index 569147872f23..8fb1593cc6e6 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreMCInstLower.h
@@ -25,11 +25,10 @@ namespace llvm {
 class LLVM_LIBRARY_VISIBILITY XCoreMCInstLower {
   typedef MachineOperand::MachineOperandType MachineOperandType;
   MCContext *Ctx;
-  Mangler *Mang;
   AsmPrinter &Printer;
 public:
   XCoreMCInstLower(class AsmPrinter &asmprinter);
-  void Initialize(Mangler *mang, MCContext *C);
+  void Initialize(MCContext *C);
   void Lower(const MachineInstr *MI, MCInst &OutMI) const;
   MCOperand LowerOperand(const MachineOperand& MO, unsigned offset = 0) const;
 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
index 6c770969e32e..e91536ca1e83 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreMachineFunctionInfo.cpp
@@ -17,7 +17,7 @@ void XCoreFunctionInfo::anchor() { }
 
 bool XCoreFunctionInfo::isLargeFrame(const MachineFunction &MF) const {
   if (CachedEStackSize == -1) {
-    CachedEStackSize = MF.getFrameInfo()->estimateStackSize(MF);
+    CachedEStackSize = MF.getFrameInfo().estimateStackSize(MF);
   }
   // isLargeFrame() is used when deciding if spill slots should be added to
   // allow eliminateFrameIndex() to scavenge registers.
@@ -36,12 +36,12 @@ int XCoreFunctionInfo::createLRSpillSlot(MachineFunction &MF) {
     return LRSpillSlot;
   }
   const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
-  MachineFrameInfo *MFI = MF.getFrameInfo();
+  MachineFrameInfo &MFI = MF.getFrameInfo();
   if (! MF.getFunction()->isVarArg()) {
     // A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
-    LRSpillSlot = MFI->CreateFixedObject(RC->getSize(), 0, true);
+    LRSpillSlot = MFI.CreateFixedObject(RC->getSize(), 0, true);
   } else {
-    LRSpillSlot = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+    LRSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
   }
   LRSpillSlotSet = true;
   return LRSpillSlot;
@@ -52,8 +52,8 @@ int XCoreFunctionInfo::createFPSpillSlot(MachineFunction &MF) {
     return FPSpillSlot;
   }
   const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  FPSpillSlot = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  FPSpillSlot = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
   FPSpillSlotSet = true;
   return FPSpillSlot;
 }
@@ -63,9 +63,9 @@ const int* XCoreFunctionInfo::createEHSpillSlot(MachineFunction &MF) {
     return EHSpillSlot;
   }
   const TargetRegisterClass *RC = &XCore::GRRegsRegClass;
-  MachineFrameInfo *MFI = MF.getFrameInfo();
-  EHSpillSlot[0] = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), true);
-  EHSpillSlot[1] = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+  MachineFrameInfo &MFI = MF.getFrameInfo();
+  EHSpillSlot[0] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
+  EHSpillSlot[1] = MFI.CreateStackObject(RC->getSize(), RC->getAlignment(), true);
   EHSpillSlotSet = true;
   return EHSpillSlot;
 }
diff --git a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
index 1cfb57dc3af3..d34e928b14f7 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -271,8 +271,8 @@ XCoreRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
       *static_cast<const XCoreInstrInfo *>(MF.getSubtarget().getInstrInfo());
 
   const XCoreFrameLowering *TFI = getFrameLowering(MF);
-  int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
-  int StackSize = MF.getFrameInfo()->getStackSize();
+  int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
+  int StackSize = MF.getFrameInfo().getStackSize();
 
   #ifndef NDEBUG
   DEBUG(errs() << "\nFunction         : " 
diff --git a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
index 61fbf0dc3d2c..c03b0afceba3 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreSelectionDAGInfo.cpp
@@ -20,7 +20,7 @@ SDValue XCoreSelectionDAGInfo::EmitTargetCodeForMemcpy(
     SelectionDAG &DAG, const SDLoc &dl, SDValue Chain, SDValue Dst, SDValue Src,
     SDValue Size, unsigned Align, bool isVolatile, bool AlwaysInline,
     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
-  unsigned SizeBitWidth = Size.getValueType().getSizeInBits();
+  unsigned SizeBitWidth = Size.getValueSizeInBits();
   // Call __memcpy_4 if the src, dst and size are all 4 byte aligned.
   if (!AlwaysInline && (Align & 3) == 0 &&
       DAG.MaskedValueIsZero(Size, APInt(SizeBitWidth, 3))) {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
index c3eab802f815..bf3138f2164a 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -89,7 +89,7 @@ void XCorePassConfig::addPreEmitPass() {
 
 // Force static initialization.
 extern "C" void LLVMInitializeXCoreTarget() {
-  RegisterTargetMachine<XCoreTargetMachine> X(TheXCoreTarget);
+  RegisterTargetMachine<XCoreTargetMachine> X(getTheXCoreTarget());
 }
 
 TargetIRAnalysis XCoreTargetMachine::getTargetIRAnalysis() {
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
index abe1deddc56e..ad8693fd325e 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.cpp
@@ -95,11 +95,9 @@ static unsigned getXCoreSectionFlags(SectionKind K, bool IsCPRel) {
   return Flags;
 }
 
-MCSection *
-XCoreTargetObjectFile::getExplicitSectionGlobal(const GlobalValue *GV,
-                                                SectionKind Kind, Mangler &Mang,
-                                                const TargetMachine &TM) const {
-  StringRef SectionName = GV->getSection();
+MCSection *XCoreTargetObjectFile::getExplicitSectionGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+  StringRef SectionName = GO->getSection();
   // Infer section flags from the section name if we can.
   bool IsCPRel = SectionName.startswith(".cp.");
   if (IsCPRel && !Kind.isReadOnly())
@@ -108,12 +106,10 @@ XCoreTargetObjectFile::getExplicitSectionGlobal(const GlobalValue *GV,
                                     getXCoreSectionFlags(Kind, IsCPRel));
 }
 
-MCSection *
-XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
-                                              SectionKind Kind, Mangler &Mang,
-                                              const TargetMachine &TM) const {
+MCSection *XCoreTargetObjectFile::SelectSectionForGlobal(
+    const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
 
-  bool UseCPRel = GV->isLocalLinkage(GV->getLinkage());
+  bool UseCPRel = GO->hasLocalLinkage();
 
   if (Kind.isText())                    return TextSection;
   if (UseCPRel) {
@@ -122,8 +118,8 @@ XCoreTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
     if (Kind.isMergeableConst8())       return MergeableConst8Section;
     if (Kind.isMergeableConst16())      return MergeableConst16Section;
   }
-  Type *ObjType = GV->getValueType();
-  auto &DL = GV->getParent()->getDataLayout();
+  Type *ObjType = GO->getValueType();
+  auto &DL = GO->getParent()->getDataLayout();
   if (TM.getCodeModel() == CodeModel::Small || !ObjType->isSized() ||
       DL.getTypeAllocSize(ObjType) < CodeModelLargeSize) {
     if (Kind.isReadOnly())              return UseCPRel? ReadOnlySection
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
index c129d757dea3..5eb423a7435e 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetObjectFile.h
@@ -25,12 +25,10 @@ static const unsigned CodeModelLargeSize = 256;
   public:
     void Initialize(MCContext &Ctx, const TargetMachine &TM) override;
 
-    MCSection *getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
-                                        Mangler &Mang,
+    MCSection *getExplicitSectionGlobal(const GlobalObject *GO, SectionKind Kind,
                                         const TargetMachine &TM) const override;
 
-    MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
-                                      Mangler &Mang,
+    MCSection *SelectSectionForGlobal(const GlobalObject *GO, SectionKind Kind,
                                       const TargetMachine &TM) const override;
 
     MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
diff --git a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
index b2cb889f1fc0..9617796f4861 100644
--- a/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -41,13 +41,6 @@ public:
       : BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
         TLI(ST->getTargetLowering()) {}
 
-  // Provide value semantics. MSVC requires that we spell all of these out.
-  XCoreTTIImpl(const XCoreTTIImpl &Arg)
-      : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
-  XCoreTTIImpl(XCoreTTIImpl &&Arg)
-      : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
-        TLI(std::move(Arg.TLI)) {}
-
   unsigned getNumberOfRegisters(bool Vector) {
     if (Vector) {
       return 0;
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
new file mode 100644
index 000000000000..a97db6fde454
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroCleanup.cpp
@@ -0,0 +1,134 @@
+//===- CoroCleanup.cpp - Coroutine Cleanup Pass ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass lowers all remaining coroutine intrinsics.
+//===----------------------------------------------------------------------===//
+
+#include "CoroInternal.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coro-cleanup"
+
+namespace {
+// Created on demand if CoroCleanup pass has work to do.
+struct Lowerer : coro::LowererBase {
+  IRBuilder<> Builder;
+  Lowerer(Module &M) : LowererBase(M), Builder(Context) {}
+  bool lowerRemainingCoroIntrinsics(Function &F);
+};
+}
+
+static void simplifyCFG(Function &F) {
+  llvm::legacy::FunctionPassManager FPM(F.getParent());
+  FPM.add(createCFGSimplificationPass());
+
+  FPM.doInitialization();
+  FPM.run(F);
+  FPM.doFinalization();
+}
+
+static void lowerSubFn(IRBuilder<> &Builder, CoroSubFnInst *SubFn) {
+  Builder.SetInsertPoint(SubFn);
+  Value *FrameRaw = SubFn->getFrame();
+  int Index = SubFn->getIndex();
+
+  auto *FrameTy = StructType::get(
+      SubFn->getContext(), {Builder.getInt8PtrTy(), Builder.getInt8PtrTy()});
+  PointerType *FramePtrTy = FrameTy->getPointerTo();
+
+  Builder.SetInsertPoint(SubFn);
+  auto *FramePtr = Builder.CreateBitCast(FrameRaw, FramePtrTy);
+  auto *Gep = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index);
+  auto *Load = Builder.CreateLoad(Gep);
+
+  SubFn->replaceAllUsesWith(Load);
+}
+
+bool Lowerer::lowerRemainingCoroIntrinsics(Function &F) {
+  bool Changed = false;
+
+  for (auto IB = inst_begin(F), E = inst_end(F); IB != E;) {
+    Instruction &I = *IB++;
+    if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+      switch (II->getIntrinsicID()) {
+      default:
+        continue;
+      case Intrinsic::coro_begin:
+        II->replaceAllUsesWith(II->getArgOperand(1));
+        break;
+      case Intrinsic::coro_free:
+        II->replaceAllUsesWith(II->getArgOperand(1));
+        break;
+      case Intrinsic::coro_alloc:
+        II->replaceAllUsesWith(ConstantInt::getTrue(Context));
+        break;
+      case Intrinsic::coro_id:
+        II->replaceAllUsesWith(ConstantTokenNone::get(Context));
+        break;
+      case Intrinsic::coro_subfn_addr:
+        lowerSubFn(Builder, cast<CoroSubFnInst>(II));
+        break;
+      }
+      II->eraseFromParent();
+      Changed = true;
+    }
+  }
+
+  if (Changed) {
+    // After replacement were made we can cleanup the function body a little.
+    simplifyCFG(F);
+  }
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                              Top Level Driver
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct CoroCleanup : FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+
+  CoroCleanup() : FunctionPass(ID) {}
+
+  std::unique_ptr<Lowerer> L;
+
+  // This pass has work to do only if we find intrinsics we are going to lower
+  // in the module.
+  bool doInitialization(Module &M) override {
+    if (coro::declaresIntrinsics(M, {"llvm.coro.alloc", "llvm.coro.begin",
+                                     "llvm.coro.subfn.addr", "llvm.coro.free",
+                                     "llvm.coro.id"}))
+      L = llvm::make_unique<Lowerer>(M);
+    return false;
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (L)
+      return L->lowerRemainingCoroIntrinsics(F);
+    return false;
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    if (!L)
+      AU.setPreservesAll();
+  }
+};
+}
+
+char CoroCleanup::ID = 0;
+INITIALIZE_PASS(CoroCleanup, "coro-cleanup",
+                "Lower all coroutine related intrinsics", false, false)
+
+Pass *llvm::createCoroCleanupPass() { return new CoroCleanup(); }
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
new file mode 100644
index 000000000000..e8bb0ca99d8a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroEarly.cpp
@@ -0,0 +1,218 @@
+//===- CoroEarly.cpp - Coroutine Early Function Pass ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass lowers coroutine intrinsics that hide the details of the exact
+// calling convention for coroutine resume and destroy functions and details of
+// the structure of the coroutine frame.
+//===----------------------------------------------------------------------===//
+
+#include "CoroInternal.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coro-early"
+
+namespace {
+// Created on demand if CoroEarly pass has work to do.
+class Lowerer : public coro::LowererBase {
+  IRBuilder<> Builder;
+  PointerType *const AnyResumeFnPtrTy;
+
+  void lowerResumeOrDestroy(CallSite CS, CoroSubFnInst::ResumeKind);
+  void lowerCoroPromise(CoroPromiseInst *Intrin);
+  void lowerCoroDone(IntrinsicInst *II);
+
+public:
+  Lowerer(Module &M)
+      : LowererBase(M), Builder(Context),
+        AnyResumeFnPtrTy(FunctionType::get(Type::getVoidTy(Context), Int8Ptr,
+                                           /*isVarArg=*/false)
+                             ->getPointerTo()) {}
+  bool lowerEarlyIntrinsics(Function &F);
+};
+}
+
+// Replace a direct call to coro.resume or coro.destroy with an indirect call to
+// an address returned by coro.subfn.addr intrinsic. This is done so that
+// CGPassManager recognizes devirtualization when CoroElide pass replaces a call
+// to coro.subfn.addr with an appropriate function address.
+void Lowerer::lowerResumeOrDestroy(CallSite CS,
+                                   CoroSubFnInst::ResumeKind Index) {
+  Value *ResumeAddr =
+      makeSubFnCall(CS.getArgOperand(0), Index, CS.getInstruction());
+  CS.setCalledFunction(ResumeAddr);
+  CS.setCallingConv(CallingConv::Fast);
+}
+
+// Coroutine promise field is always at the fixed offset from the beginning of
+// the coroutine frame. i8* coro.promise(i8*, i1 from) intrinsic adds an offset
+// to a passed pointer to move from coroutine frame to coroutine promise and
+// vice versa. Since we don't know exactly which coroutine frame it is, we build
+// a coroutine frame mock up starting with two function pointers, followed by a
+// properly aligned coroutine promise field.
+// TODO: Handle the case when coroutine promise alloca has align override.
+void Lowerer::lowerCoroPromise(CoroPromiseInst *Intrin) {
+  Value *Operand = Intrin->getArgOperand(0);
+  unsigned Alignement = Intrin->getAlignment();
+  Type *Int8Ty = Builder.getInt8Ty();
+
+  auto *SampleStruct =
+      StructType::get(Context, {AnyResumeFnPtrTy, AnyResumeFnPtrTy, Int8Ty});
+  const DataLayout &DL = TheModule.getDataLayout();
+  int64_t Offset = alignTo(
+      DL.getStructLayout(SampleStruct)->getElementOffset(2), Alignement);
+  if (Intrin->isFromPromise())
+    Offset = -Offset;
+
+  Builder.SetInsertPoint(Intrin);
+  Value *Replacement =
+      Builder.CreateConstInBoundsGEP1_32(Int8Ty, Operand, Offset);
+
+  Intrin->replaceAllUsesWith(Replacement);
+  Intrin->eraseFromParent();
+}
+
+// When a coroutine reaches final suspend point, it zeros out ResumeFnAddr in
+// the coroutine frame (it is UB to resume from a final suspend point).
+// The llvm.coro.done intrinsic is used to check whether a coroutine is
+// suspended at the final suspend point or not.
+void Lowerer::lowerCoroDone(IntrinsicInst *II) {
+  Value *Operand = II->getArgOperand(0);
+
+  // ResumeFnAddr is the first pointer sized element of the coroutine frame.
+  auto *FrameTy = Int8Ptr;
+  PointerType *FramePtrTy = FrameTy->getPointerTo();
+
+  Builder.SetInsertPoint(II);
+  auto *BCI = Builder.CreateBitCast(Operand, FramePtrTy);
+  auto *Gep = Builder.CreateConstInBoundsGEP1_32(FrameTy, BCI, 0);
+  auto *Load = Builder.CreateLoad(Gep);
+  auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
+
+  II->replaceAllUsesWith(Cond);
+  II->eraseFromParent();
+}
+
+// Prior to CoroSplit, calls to coro.begin needs to be marked as NoDuplicate,
+// as CoroSplit assumes there is exactly one coro.begin. After CoroSplit,
+// NoDuplicate attribute will be removed from coro.begin otherwise, it will
+// interfere with inlining.
+static void setCannotDuplicate(CoroIdInst *CoroId) {
+  for (User *U : CoroId->users())
+    if (auto *CB = dyn_cast<CoroBeginInst>(U))
+      CB->setCannotDuplicate();
+}
+
+bool Lowerer::lowerEarlyIntrinsics(Function &F) {
+  bool Changed = false;
+  CoroIdInst *CoroId = nullptr;
+  SmallVector<CoroFreeInst *, 4> CoroFrees;
+  for (auto IB = inst_begin(F), IE = inst_end(F); IB != IE;) {
+    Instruction &I = *IB++;
+    if (auto CS = CallSite(&I)) {
+      switch (CS.getIntrinsicID()) {
+      default:
+        continue;
+      case Intrinsic::coro_free:
+        CoroFrees.push_back(cast<CoroFreeInst>(&I));
+        break;
+      case Intrinsic::coro_suspend:
+        // Make sure that final suspend point is not duplicated as CoroSplit
+        // pass expects that there is at most one final suspend point.
+        if (cast<CoroSuspendInst>(&I)->isFinal())
+          CS.setCannotDuplicate();
+        break;
+      case Intrinsic::coro_end:
+        // Make sure that fallthrough coro.end is not duplicated as CoroSplit
+        // pass expects that there is at most one fallthrough coro.end.
+        if (cast<CoroEndInst>(&I)->isFallthrough())
+          CS.setCannotDuplicate();
+        break;
+      case Intrinsic::coro_id:
+        // Mark a function that comes out of the frontend that has a coro.id
+        // with a coroutine attribute.
+        if (auto *CII = cast<CoroIdInst>(&I)) {
+          if (CII->getInfo().isPreSplit()) {
+            F.addFnAttr(CORO_PRESPLIT_ATTR, UNPREPARED_FOR_SPLIT);
+            setCannotDuplicate(CII);
+            CII->setCoroutineSelf();
+            CoroId = cast<CoroIdInst>(&I);
+          }
+        }
+        break;
+      case Intrinsic::coro_resume:
+        lowerResumeOrDestroy(CS, CoroSubFnInst::ResumeIndex);
+        break;
+      case Intrinsic::coro_destroy:
+        lowerResumeOrDestroy(CS, CoroSubFnInst::DestroyIndex);
+        break;
+      case Intrinsic::coro_promise:
+        lowerCoroPromise(cast<CoroPromiseInst>(&I));
+        break;
+      case Intrinsic::coro_done:
+        lowerCoroDone(cast<IntrinsicInst>(&I));
+        break;
+      }
+      Changed = true;
+    }
+  }
+  // Make sure that all CoroFree reference the coro.id intrinsic.
+  // Token type is not exposed through coroutine C/C++ builtins to plain C, so
+  // we allow specifying none and fixing it up here.
+  if (CoroId)
+    for (CoroFreeInst *CF : CoroFrees)
+      CF->setArgOperand(0, CoroId);
+  return Changed;
+}
+
+//===----------------------------------------------------------------------===//
+//                              Top Level Driver
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct CoroEarly : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid.
+  CoroEarly() : FunctionPass(ID) {}
+
+  std::unique_ptr<Lowerer> L;
+
+  // This pass has work to do only if we find intrinsics we are going to lower
+  // in the module.
+  bool doInitialization(Module &M) override {
+    if (coro::declaresIntrinsics(M, {"llvm.coro.id", "llvm.coro.destroy",
+                                     "llvm.coro.done", "llvm.coro.end",
+                                     "llvm.coro.free", "llvm.coro.promise",
+                                     "llvm.coro.resume", "llvm.coro.suspend"}))
+      L = llvm::make_unique<Lowerer>(M);
+    return false;
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (!L)
+      return false;
+
+    return L->lowerEarlyIntrinsics(F);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+  }
+};
+}
+
+char CoroEarly::ID = 0;
+INITIALIZE_PASS(CoroEarly, "coro-early", "Lower early coroutine intrinsics",
+                false, false)
+
+Pass *llvm::createCoroEarlyPass() { return new CoroEarly(); }
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
new file mode 100644
index 000000000000..99974d8da64c
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroElide.cpp
@@ -0,0 +1,317 @@
+//===- CoroElide.cpp - Coroutine Frame Allocation Elision Pass ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass replaces dynamic allocation of coroutine frame with alloca and
+// replaces calls to llvm.coro.resume and llvm.coro.destroy with direct calls
+// to coroutine sub-functions.
+//===----------------------------------------------------------------------===//
+
+#include "CoroInternal.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ErrorHandling.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coro-elide"
+
+namespace {
+// Created on demand if CoroElide pass has work to do.
+struct Lowerer : coro::LowererBase {
+  SmallVector<CoroIdInst *, 4> CoroIds;
+  SmallVector<CoroBeginInst *, 1> CoroBegins;
+  SmallVector<CoroAllocInst *, 1> CoroAllocs;
+  SmallVector<CoroSubFnInst *, 4> ResumeAddr;
+  SmallVector<CoroSubFnInst *, 4> DestroyAddr;
+  SmallVector<CoroFreeInst *, 1> CoroFrees;
+
+  Lowerer(Module &M) : LowererBase(M) {}
+
+  void elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA);
+  bool shouldElide() const;
+  bool processCoroId(CoroIdInst *, AAResults &AA);
+};
+} // end anonymous namespace
+
+// Go through the list of coro.subfn.addr intrinsics and replace them with the
+// provided constant.
+static void replaceWithConstant(Constant *Value,
+                                SmallVectorImpl<CoroSubFnInst *> &Users) {
+  if (Users.empty())
+    return;
+
+  // See if we need to bitcast the constant to match the type of the intrinsic
+  // being replaced. Note: All coro.subfn.addr intrinsics return the same type,
+  // so we only need to examine the type of the first one in the list.
+  Type *IntrTy = Users.front()->getType();
+  Type *ValueTy = Value->getType();
+  if (ValueTy != IntrTy) {
+    // May need to tweak the function type to match the type expected at the
+    // use site.
+    assert(ValueTy->isPointerTy() && IntrTy->isPointerTy());
+    Value = ConstantExpr::getBitCast(Value, IntrTy);
+  }
+
+  // Now the value type matches the type of the intrinsic. Replace them all!
+  for (CoroSubFnInst *I : Users)
+    replaceAndRecursivelySimplify(I, Value);
+}
+
+// See if any operand of the call instruction references the coroutine frame.
+static bool operandReferences(CallInst *CI, AllocaInst *Frame, AAResults &AA) {
+  for (Value *Op : CI->operand_values())
+    if (AA.alias(Op, Frame) != NoAlias)
+      return true;
+  return false;
+}
+
+// Look for any tail calls referencing the coroutine frame and remove tail
+// attribute from them, since now coroutine frame resides on the stack and tail
+// call implies that the function does not references anything on the stack.
+static void removeTailCallAttribute(AllocaInst *Frame, AAResults &AA) {
+  Function &F = *Frame->getFunction();
+  MemoryLocation Mem(Frame);
+  for (Instruction &I : instructions(F))
+    if (auto *Call = dyn_cast<CallInst>(&I))
+      if (Call->isTailCall() && operandReferences(Call, Frame, AA)) {
+        // FIXME: If we ever hit this check. Evaluate whether it is more
+        // appropriate to retain musttail and allow the code to compile.
+        if (Call->isMustTailCall())
+          report_fatal_error("Call referring to the coroutine frame cannot be "
+                             "marked as musttail");
+        Call->setTailCall(false);
+      }
+}
+
+// Given a resume function @f.resume(%f.frame* %frame), returns %f.frame type.
+static Type *getFrameType(Function *Resume) {
+  auto *ArgType = Resume->getArgumentList().front().getType();
+  return cast<PointerType>(ArgType)->getElementType();
+}
+
+// Finds first non alloca instruction in the entry block of a function.
+static Instruction *getFirstNonAllocaInTheEntryBlock(Function *F) {
+  for (Instruction &I : F->getEntryBlock())
+    if (!isa<AllocaInst>(&I))
+      return &I;
+  llvm_unreachable("no terminator in the entry block");
+}
+
+// To elide heap allocations we need to suppress code blocks guarded by
+// llvm.coro.alloc and llvm.coro.free instructions.
+void Lowerer::elideHeapAllocations(Function *F, Type *FrameTy, AAResults &AA) {
+  LLVMContext &C = FrameTy->getContext();
+  auto *InsertPt =
+      getFirstNonAllocaInTheEntryBlock(CoroIds.front()->getFunction());
+
+  // Replacing llvm.coro.alloc with false will suppress dynamic
+  // allocation as it is expected for the frontend to generate the code that
+  // looks like:
+  //   id = coro.id(...)
+  //   mem = coro.alloc(id) ? malloc(coro.size()) : 0;
+  //   coro.begin(id, mem)
+  auto *False = ConstantInt::getFalse(C);
+  for (auto *CA : CoroAllocs) {
+    CA->replaceAllUsesWith(False);
+    CA->eraseFromParent();
+  }
+
+  // FIXME: Design how to transmit alignment information for every alloca that
+  // is spilled into the coroutine frame and recreate the alignment information
+  // here. Possibly we will need to do a mini SROA here and break the coroutine
+  // frame into individual AllocaInst recreating the original alignment.
+  auto *Frame = new AllocaInst(FrameTy, "", InsertPt);
+  auto *FrameVoidPtr =
+      new BitCastInst(Frame, Type::getInt8PtrTy(C), "vFrame", InsertPt);
+
+  for (auto *CB : CoroBegins) {
+    CB->replaceAllUsesWith(FrameVoidPtr);
+    CB->eraseFromParent();
+  }
+
+  // Since now coroutine frame lives on the stack we need to make sure that
+  // any tail call referencing it, must be made non-tail call.
+  removeTailCallAttribute(Frame, AA);
+}
+
+bool Lowerer::shouldElide() const {
+  // If no CoroAllocs, we cannot suppress allocation, so elision is not
+  // possible.
+  if (CoroAllocs.empty())
+    return false;
+
+  // Check that for every coro.begin there is a coro.destroy directly
+  // referencing the SSA value of that coro.begin. If the value escaped, then
+  // coro.destroy would have been referencing a memory location storing that
+  // value and not the virtual register.
+
+  SmallPtrSet<CoroBeginInst *, 8> ReferencedCoroBegins;
+
+  for (CoroSubFnInst *DA : DestroyAddr) {
+    if (auto *CB = dyn_cast<CoroBeginInst>(DA->getFrame()))
+      ReferencedCoroBegins.insert(CB);
+    else
+      return false;
+  }
+
+  // If size of the set is the same as total number of CoroBegins, means we
+  // found a coro.free or coro.destroy mentioning a coro.begin and we can
+  // perform heap elision.
+  return ReferencedCoroBegins.size() == CoroBegins.size();
+}
+
+bool Lowerer::processCoroId(CoroIdInst *CoroId, AAResults &AA) {
+  CoroBegins.clear();
+  CoroAllocs.clear();
+  CoroFrees.clear();
+  ResumeAddr.clear();
+  DestroyAddr.clear();
+
+  // Collect all coro.begin and coro.allocs associated with this coro.id.
+  for (User *U : CoroId->users()) {
+    if (auto *CB = dyn_cast<CoroBeginInst>(U))
+      CoroBegins.push_back(CB);
+    else if (auto *CA = dyn_cast<CoroAllocInst>(U))
+      CoroAllocs.push_back(CA);
+    else if (auto *CF = dyn_cast<CoroFreeInst>(U))
+      CoroFrees.push_back(CF);
+  }
+
+  // Collect all coro.subfn.addrs associated with coro.begin.
+  // Note, we only devirtualize the calls if their coro.subfn.addr refers to
+  // coro.begin directly. If we run into cases where this check is too
+  // conservative, we can consider relaxing the check.
+  for (CoroBeginInst *CB : CoroBegins) {
+    for (User *U : CB->users())
+      if (auto *II = dyn_cast<CoroSubFnInst>(U))
+        switch (II->getIndex()) {
+        case CoroSubFnInst::ResumeIndex:
+          ResumeAddr.push_back(II);
+          break;
+        case CoroSubFnInst::DestroyIndex:
+          DestroyAddr.push_back(II);
+          break;
+        default:
+          llvm_unreachable("unexpected coro.subfn.addr constant");
+        }
+  }
+
+  // PostSplit coro.id refers to an array of subfunctions in its Info
+  // argument.
+  ConstantArray *Resumers = CoroId->getInfo().Resumers;
+  assert(Resumers && "PostSplit coro.id Info argument must refer to an array"
+                     "of coroutine subfunctions");
+  auto *ResumeAddrConstant =
+      ConstantExpr::getExtractValue(Resumers, CoroSubFnInst::ResumeIndex);
+
+  replaceWithConstant(ResumeAddrConstant, ResumeAddr);
+
+  bool ShouldElide = shouldElide();
+
+  auto *DestroyAddrConstant = ConstantExpr::getExtractValue(
+      Resumers,
+      ShouldElide ? CoroSubFnInst::CleanupIndex : CoroSubFnInst::DestroyIndex);
+
+  replaceWithConstant(DestroyAddrConstant, DestroyAddr);
+
+  if (ShouldElide) {
+    auto *FrameTy = getFrameType(cast<Function>(ResumeAddrConstant));
+    elideHeapAllocations(CoroId->getFunction(), FrameTy, AA);
+    coro::replaceCoroFree(CoroId, /*Elide=*/true);
+  }
+
+  return true;
+}
+
+// See if there are any coro.subfn.addr instructions referring to coro.devirt
+// trigger, if so, replace them with a direct call to devirt trigger function.
+static bool replaceDevirtTrigger(Function &F) {
+  SmallVector<CoroSubFnInst *, 1> DevirtAddr;
+  for (auto &I : instructions(F))
+    if (auto *SubFn = dyn_cast<CoroSubFnInst>(&I))
+      if (SubFn->getIndex() == CoroSubFnInst::RestartTrigger)
+        DevirtAddr.push_back(SubFn);
+
+  if (DevirtAddr.empty())
+    return false;
+
+  Module &M = *F.getParent();
+  Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
+  assert(DevirtFn && "coro.devirt.fn not found");
+  replaceWithConstant(DevirtFn, DevirtAddr);
+
+  return true;
+}
+
+//===----------------------------------------------------------------------===//
+//                              Top Level Driver
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct CoroElide : FunctionPass {
+  static char ID;
+  CoroElide() : FunctionPass(ID) {}
+
+  std::unique_ptr<Lowerer> L;
+
+  bool doInitialization(Module &M) override {
+    if (coro::declaresIntrinsics(M, {"llvm.coro.id"}))
+      L = llvm::make_unique<Lowerer>(M);
+    return false;
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (!L)
+      return false;
+
+    bool Changed = false;
+
+    if (F.hasFnAttribute(CORO_PRESPLIT_ATTR))
+      Changed = replaceDevirtTrigger(F);
+
+    L->CoroIds.clear();
+
+    // Collect all PostSplit coro.ids.
+    for (auto &I : instructions(F))
+      if (auto *CII = dyn_cast<CoroIdInst>(&I))
+        if (CII->getInfo().isPostSplit())
+          // If it is the coroutine itself, don't touch it.
+          if (CII->getCoroutine() != CII->getFunction())
+            L->CoroIds.push_back(CII);
+
+    // If we did not find any coro.id, there is nothing to do.
+    if (L->CoroIds.empty())
+      return Changed;
+
+    AAResults &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
+
+    for (auto *CII : L->CoroIds)
+      Changed |= L->processCoroId(CII, AA);
+
+    return Changed;
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AAResultsWrapperPass>();
+  }
+};
+}
+
+char CoroElide::ID = 0;
+INITIALIZE_PASS_BEGIN(
+    CoroElide, "coro-elide",
+    "Coroutine frame allocation elision and indirect calls replacement", false,
+    false)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_END(
+    CoroElide, "coro-elide",
+    "Coroutine frame allocation elision and indirect calls replacement", false,
+    false)
+
+Pass *llvm::createCoroElidePass() { return new CoroElide(); }
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
new file mode 100644
index 000000000000..bb28558a29e2
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroFrame.cpp
@@ -0,0 +1,727 @@
+//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file contains classes used to discover if for a particular value
+// there from sue to definition that crosses a suspend block.
+//
+// Using the information discovered we form a Coroutine Frame structure to
+// contain those values. All uses of those values are replaced with appropriate
+// GEP + load from the coroutine frame. At the point of the definition we spill
+// the value into the coroutine frame.
+//
+// TODO: pack values tightly using liveness info.
+//===----------------------------------------------------------------------===//
+
+#include "CoroInternal.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/circular_raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
+// "coro-frame", which results in leaner debug spew.
+#define DEBUG_TYPE "coro-suspend-crossing"
+
+enum { SmallVectorThreshold = 32 };
+
+// Provides two way mapping between the blocks and numbers.
+namespace {
+class BlockToIndexMapping {
+  SmallVector<BasicBlock *, SmallVectorThreshold> V;
+
+public:
+  size_t size() const { return V.size(); }
+
+  BlockToIndexMapping(Function &F) {
+    for (BasicBlock &BB : F)
+      V.push_back(&BB);
+    std::sort(V.begin(), V.end());
+  }
+
+  size_t blockToIndex(BasicBlock *BB) const {
+    auto *I = std::lower_bound(V.begin(), V.end(), BB);
+    assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
+    return I - V.begin();
+  }
+
+  BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
+};
+} // end anonymous namespace
+
+// The SuspendCrossingInfo maintains data that allows to answer a question
+// whether given two BasicBlocks A and B there is a path from A to B that
+// passes through a suspend point.
+//
+// For every basic block 'i' it maintains a BlockData that consists of:
+//   Consumes:  a bit vector which contains a set of indices of blocks that can
+//              reach block 'i'
+//   Kills: a bit vector which contains a set of indices of blocks that can
+//          reach block 'i', but one of the path will cross a suspend point
+//   Suspend: a boolean indicating whether block 'i' contains a suspend point.
+//   End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
+//
+namespace {
+struct SuspendCrossingInfo {
+  BlockToIndexMapping Mapping;
+
+  struct BlockData {
+    BitVector Consumes;
+    BitVector Kills;
+    bool Suspend = false;
+    bool End = false;
+  };
+  SmallVector<BlockData, SmallVectorThreshold> Block;
+
+  iterator_range<succ_iterator> successors(BlockData const &BD) const {
+    BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
+    return llvm::successors(BB);
+  }
+
+  BlockData &getBlockData(BasicBlock *BB) {
+    return Block[Mapping.blockToIndex(BB)];
+  }
+
+  void dump() const;
+  void dump(StringRef Label, BitVector const &BV) const;
+
+  SuspendCrossingInfo(Function &F, coro::Shape &Shape);
+
+  bool hasPathCrossingSuspendPoint(BasicBlock *DefBB, BasicBlock *UseBB) const {
+    size_t const DefIndex = Mapping.blockToIndex(DefBB);
+    size_t const UseIndex = Mapping.blockToIndex(UseBB);
+
+    assert(Block[UseIndex].Consumes[DefIndex] && "use must consume def");
+    bool const Result = Block[UseIndex].Kills[DefIndex];
+    DEBUG(dbgs() << UseBB->getName() << " => " << DefBB->getName()
+                 << " answer is " << Result << "\n");
+    return Result;
+  }
+
+  bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
+    auto *I = cast<Instruction>(U);
+
+    // We rewrote PHINodes, so that only the ones with exactly one incoming
+    // value need to be analyzed.
+    if (auto *PN = dyn_cast<PHINode>(I))
+      if (PN->getNumIncomingValues() > 1)
+        return false;
+
+    BasicBlock *UseBB = I->getParent();
+    return hasPathCrossingSuspendPoint(DefBB, UseBB);
+  }
+
+  bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
+    return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
+  }
+
+  bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
+    return isDefinitionAcrossSuspend(I.getParent(), U);
+  }
+};
+} // end anonymous namespace
+
+LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
+                                                BitVector const &BV) const {
+  dbgs() << Label << ":";
+  for (size_t I = 0, N = BV.size(); I < N; ++I)
+    if (BV[I])
+      dbgs() << " " << Mapping.indexToBlock(I)->getName();
+  dbgs() << "\n";
+}
+
+LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
+  for (size_t I = 0, N = Block.size(); I < N; ++I) {
+    BasicBlock *const B = Mapping.indexToBlock(I);
+    dbgs() << B->getName() << ":\n";
+    dump("   Consumes", Block[I].Consumes);
+    dump("      Kills", Block[I].Kills);
+  }
+  dbgs() << "\n";
+}
+
+SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
+    : Mapping(F) {
+  const size_t N = Mapping.size();
+  Block.resize(N);
+
+  // Initialize every block so that it consumes itself
+  for (size_t I = 0; I < N; ++I) {
+    auto &B = Block[I];
+    B.Consumes.resize(N);
+    B.Kills.resize(N);
+    B.Consumes.set(I);
+  }
+
+  // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
+  // the code beyond coro.end is reachable during initial invocation of the
+  // coroutine.
+  for (auto *CE : Shape.CoroEnds)
+    getBlockData(CE->getParent()).End = true;
+
+  // Mark all suspend blocks and indicate that they kill everything they
+  // consume. Note, that crossing coro.save also requires a spill, as any code
+  // between coro.save and coro.suspend may resume the coroutine and all of the
+  // state needs to be saved by that time.
+  auto markSuspendBlock = [&](IntrinsicInst* BarrierInst) {
+    BasicBlock *SuspendBlock = BarrierInst->getParent();
+    auto &B = getBlockData(SuspendBlock);
+    B.Suspend = true;
+    B.Kills |= B.Consumes;
+  };
+  for (CoroSuspendInst *CSI : Shape.CoroSuspends) {
+    markSuspendBlock(CSI);
+    markSuspendBlock(CSI->getCoroSave());
+  }
+
+  // Iterate propagating consumes and kills until they stop changing.
+  int Iteration = 0;
+  (void)Iteration;
+
+  bool Changed;
+  do {
+    DEBUG(dbgs() << "iteration " << ++Iteration);
+    DEBUG(dbgs() << "==============\n");
+
+    Changed = false;
+    for (size_t I = 0; I < N; ++I) {
+      auto &B = Block[I];
+      for (BasicBlock *SI : successors(B)) {
+
+        auto SuccNo = Mapping.blockToIndex(SI);
+
+        // Saved Consumes and Kills bitsets so that it is easy to see
+        // if anything changed after propagation.
+        auto &S = Block[SuccNo];
+        auto SavedConsumes = S.Consumes;
+        auto SavedKills = S.Kills;
+
+        // Propagate Kills and Consumes from block B into its successor S.
+        S.Consumes |= B.Consumes;
+        S.Kills |= B.Kills;
+
+        // If block B is a suspend block, it should propagate kills into the
+        // its successor for every block B consumes.
+        if (B.Suspend) {
+          S.Kills |= B.Consumes;
+        }
+        if (S.Suspend) {
+          // If block S is a suspend block, it should kill all of the blocks it
+          // consumes.
+          S.Kills |= S.Consumes;
+        } else if (S.End) {
+          // If block S is an end block, it should not propagate kills as the
+          // blocks following coro.end() are reached during initial invocation
+          // of the coroutine while all the data are still available on the
+          // stack or in the registers.
+          S.Kills.reset();
+        } else {
+          // This is reached when S block it not Suspend nor coro.end and it
+          // need to make sure that it is not in the kill set.
+          S.Kills.reset(SuccNo);
+        }
+
+        // See if anything changed.
+        Changed |= (S.Kills != SavedKills) || (S.Consumes != SavedConsumes);
+
+        if (S.Kills != SavedKills) {
+          DEBUG(dbgs() << "\nblock " << I << " follower " << SI->getName()
+                       << "\n");
+          DEBUG(dump("S.Kills", S.Kills));
+          DEBUG(dump("SavedKills", SavedKills));
+        }
+        if (S.Consumes != SavedConsumes) {
+          DEBUG(dbgs() << "\nblock " << I << " follower " << SI << "\n");
+          DEBUG(dump("S.Consume", S.Consumes));
+          DEBUG(dump("SavedCons", SavedConsumes));
+        }
+      }
+    }
+  } while (Changed);
+  DEBUG(dump());
+}
+
+#undef DEBUG_TYPE // "coro-suspend-crossing"
+#define DEBUG_TYPE "coro-frame"
+
+// We build up the list of spills for every case where a use is separated
+// from the definition by a suspend point.
+
+struct Spill : std::pair<Value *, Instruction *> {
+  using base = std::pair<Value *, Instruction *>;
+
+  Spill(Value *Def, User *U) : base(Def, cast<Instruction>(U)) {}
+
+  Value *def() const { return first; }
+  Instruction *user() const { return second; }
+  BasicBlock *userBlock() const { return second->getParent(); }
+
+  std::pair<Value *, BasicBlock *> getKey() const {
+    return {def(), userBlock()};
+  }
+
+  bool operator<(Spill const &rhs) const { return getKey() < rhs.getKey(); }
+};
+
+// Note that there may be more than one record with the same value of Def in
+// the SpillInfo vector.
+using SpillInfo = SmallVector<Spill, 8>;
+
+#ifndef NDEBUG
+static void dump(StringRef Title, SpillInfo const &Spills) {
+  dbgs() << "------------- " << Title << "--------------\n";
+  Value *CurrentValue = nullptr;
+  for (auto const &E : Spills) {
+    if (CurrentValue != E.def()) {
+      CurrentValue = E.def();
+      CurrentValue->dump();
+    }
+    dbgs() << "   user: ";
+    E.user()->dump();
+  }
+}
+#endif
+
+// Build a struct that will keep state for an active coroutine.
+//   struct f.frame {
+//     ResumeFnTy ResumeFnAddr;
+//     ResumeFnTy DestroyFnAddr;
+//     int ResumeIndex;
+//     ... promise (if present) ...
+//     ... spills ...
+//   };
+static StructType *buildFrameType(Function &F, coro::Shape &Shape,
+                                  SpillInfo &Spills) {
+  LLVMContext &C = F.getContext();
+  SmallString<32> Name(F.getName());
+  Name.append(".Frame");
+  StructType *FrameTy = StructType::create(C, Name);
+  auto *FramePtrTy = FrameTy->getPointerTo();
+  auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
+                                 /*IsVarArgs=*/false);
+  auto *FnPtrTy = FnTy->getPointerTo();
+
+  // Figure out how wide should be an integer type storing the suspend index.
+  unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
+  Type *PromiseType = Shape.PromiseAlloca
+                          ? Shape.PromiseAlloca->getType()->getElementType()
+                          : Type::getInt1Ty(C);
+  SmallVector<Type *, 8> Types{FnPtrTy, FnPtrTy, PromiseType,
+                               Type::getIntNTy(C, IndexBits)};
+  Value *CurrentDef = nullptr;
+
+  // Create an entry for every spilled value.
+  for (auto const &S : Spills) {
+    if (CurrentDef == S.def())
+      continue;
+
+    CurrentDef = S.def();
+    // PromiseAlloca was already added to Types array earlier.
+    if (CurrentDef == Shape.PromiseAlloca)
+      continue;
+
+    Type *Ty = nullptr;
+    if (auto *AI = dyn_cast<AllocaInst>(CurrentDef))
+      Ty = AI->getAllocatedType();
+    else
+      Ty = CurrentDef->getType();
+
+    Types.push_back(Ty);
+  }
+  FrameTy->setBody(Types);
+
+  return FrameTy;
+}
+
+// Replace all alloca and SSA values that are accessed across suspend points
+// with GetElementPointer from coroutine frame + loads and stores. Create an
+// AllocaSpillBB that will become the new entry block for the resume parts of
+// the coroutine:
+//
+//    %hdl = coro.begin(...)
+//    whatever
+//
+// becomes:
+//
+//    %hdl = coro.begin(...)
+//    %FramePtr = bitcast i8* hdl to %f.frame*
+//    br label %AllocaSpillBB
+//
+//  AllocaSpillBB:
+//    ; geps corresponding to allocas that were moved to coroutine frame
+//    br label PostSpill
+//
+//  PostSpill:
+//    whatever
+//
+//
+static Instruction *insertSpills(SpillInfo &Spills, coro::Shape &Shape) {
+  auto *CB = Shape.CoroBegin;
+  IRBuilder<> Builder(CB->getNextNode());
+  PointerType *FramePtrTy = Shape.FrameTy->getPointerTo();
+  auto *FramePtr =
+      cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
+  Type *FrameTy = FramePtrTy->getElementType();
+
+  Value *CurrentValue = nullptr;
+  BasicBlock *CurrentBlock = nullptr;
+  Value *CurrentReload = nullptr;
+  unsigned Index = coro::Shape::LastKnownField;
+
+  // We need to keep track of any allocas that need "spilling"
+  // since they will live in the coroutine frame now, all access to them
+  // need to be changed, not just the access across suspend points
+  // we remember allocas and their indices to be handled once we processed
+  // all the spills.
+  SmallVector<std::pair<AllocaInst *, unsigned>, 4> Allocas;
+  // Promise alloca (if present) has a fixed field number (Shape::PromiseField)
+  if (Shape.PromiseAlloca)
+    Allocas.emplace_back(Shape.PromiseAlloca, coro::Shape::PromiseField);
+
+  // Create a load instruction to reload the spilled value from the coroutine
+  // frame.
+  auto CreateReload = [&](Instruction *InsertBefore) {
+    Builder.SetInsertPoint(InsertBefore);
+    auto *G = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, Index,
+                                                 CurrentValue->getName() +
+                                                     Twine(".reload.addr"));
+    return isa<AllocaInst>(CurrentValue)
+               ? G
+               : Builder.CreateLoad(G,
+                                    CurrentValue->getName() + Twine(".reload"));
+  };
+
+  for (auto const &E : Spills) {
+    // If we have not seen the value, generate a spill.
+    if (CurrentValue != E.def()) {
+      CurrentValue = E.def();
+      CurrentBlock = nullptr;
+      CurrentReload = nullptr;
+
+      ++Index;
+
+      if (auto *AI = dyn_cast<AllocaInst>(CurrentValue)) {
+        // Spilled AllocaInst will be replaced with GEP from the coroutine frame
+        // there is no spill required.
+        Allocas.emplace_back(AI, Index);
+        if (!AI->isStaticAlloca())
+          report_fatal_error("Coroutines cannot handle non static allocas yet");
+      } else {
+        // Otherwise, create a store instruction storing the value into the
+        // coroutine frame. For, argument, we will place the store instruction
+        // right after the coroutine frame pointer instruction, i.e. bitcase of
+        // coro.begin from i8* to %f.frame*. For all other values, the spill is
+        // placed immediately after the definition.
+        Builder.SetInsertPoint(
+            isa<Argument>(CurrentValue)
+                ? FramePtr->getNextNode()
+                : dyn_cast<Instruction>(E.def())->getNextNode());
+
+        auto *G = Builder.CreateConstInBoundsGEP2_32(
+            FrameTy, FramePtr, 0, Index,
+            CurrentValue->getName() + Twine(".spill.addr"));
+        Builder.CreateStore(CurrentValue, G);
+      }
+    }
+
+    // If we have not seen the use block, generate a reload in it.
+    if (CurrentBlock != E.userBlock()) {
+      CurrentBlock = E.userBlock();
+      CurrentReload = CreateReload(&*CurrentBlock->getFirstInsertionPt());
+    }
+
+    // If we have a single edge PHINode, remove it and replace it with a reload
+    // from the coroutine frame. (We already took care of multi edge PHINodes
+    // by rewriting them in the rewritePHIs function).
+    if (auto *PN = dyn_cast<PHINode>(E.user())) {
+      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
+                                                "values in the PHINode");
+      PN->replaceAllUsesWith(CurrentReload);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Replace all uses of CurrentValue in the current instruction with reload.
+    E.user()->replaceUsesOfWith(CurrentValue, CurrentReload);
+  }
+
+  BasicBlock *FramePtrBB = FramePtr->getParent();
+  Shape.AllocaSpillBlock =
+      FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB");
+  Shape.AllocaSpillBlock->splitBasicBlock(&Shape.AllocaSpillBlock->front(),
+                                          "PostSpill");
+
+  Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
+  // If we found any allocas, replace all of their remaining uses with Geps.
+  for (auto &P : Allocas) {
+    auto *G =
+        Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0, P.second);
+    // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G)) here,
+    // as we are changing location of the instruction.
+    G->takeName(P.first);
+    P.first->replaceAllUsesWith(G);
+    P.first->eraseFromParent();
+  }
+  return FramePtr;
+}
+
+static void rewritePHIs(BasicBlock &BB) {
+  // For every incoming edge we will create a block holding all
+  // incoming values in a single PHI nodes.
+  //
+  // loop:
+  //    %n.val = phi i32[%n, %entry], [%inc, %loop]
+  //
+  // It will create:
+  //
+  // loop.from.entry:
+  //    %n.loop.pre = phi i32 [%n, %entry]
+  //    br %label loop
+  // loop.from.loop:
+  //    %inc.loop.pre = phi i32 [%inc, %loop]
+  //    br %label loop
+  //
+  // After this rewrite, further analysis will ignore any phi nodes with more
+  // than one incoming edge.
+
+  // TODO: Simplify PHINodes in the basic block to remove duplicate
+  // predecessors.
+
+  SmallVector<BasicBlock *, 8> Preds(pred_begin(&BB), pred_end(&BB));
+  for (BasicBlock *Pred : Preds) {
+    auto *IncomingBB = SplitEdge(Pred, &BB);
+    IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
+    auto *PN = cast<PHINode>(&BB.front());
+    do {
+      int Index = PN->getBasicBlockIndex(IncomingBB);
+      Value *V = PN->getIncomingValue(Index);
+      PHINode *InputV = PHINode::Create(
+          V->getType(), 1, V->getName() + Twine(".") + BB.getName(),
+          &IncomingBB->front());
+      InputV->addIncoming(V, Pred);
+      PN->setIncomingValue(Index, InputV);
+      PN = dyn_cast<PHINode>(PN->getNextNode());
+    } while (PN);
+  }
+}
+
+static void rewritePHIs(Function &F) {
+  SmallVector<BasicBlock *, 8> WorkList;
+
+  for (BasicBlock &BB : F)
+    if (auto *PN = dyn_cast<PHINode>(&BB.front()))
+      if (PN->getNumIncomingValues() > 1)
+        WorkList.push_back(&BB);
+
+  for (BasicBlock *BB : WorkList)
+    rewritePHIs(*BB);
+}
+
+// Check for instructions that we can recreate on resume as opposed to spill
+// the result into a coroutine frame.
+static bool materializable(Instruction &V) {
+  return isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
+         isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V);
+}
+
+// Check for structural coroutine intrinsics that should not be spilled into
+// the coroutine frame.
+static bool isCoroutineStructureIntrinsic(Instruction &I) {
+  return isa<CoroIdInst>(&I) || isa<CoroBeginInst>(&I) ||
+         isa<CoroSaveInst>(&I) || isa<CoroSuspendInst>(&I);
+}
+
+// For every use of the value that is across suspend point, recreate that value
+// after a suspend point.
+static void rewriteMaterializableInstructions(IRBuilder<> &IRB,
+                                              SpillInfo const &Spills) {
+  BasicBlock *CurrentBlock = nullptr;
+  Instruction *CurrentMaterialization = nullptr;
+  Instruction *CurrentDef = nullptr;
+
+  for (auto const &E : Spills) {
+    // If it is a new definition, update CurrentXXX variables.
+    if (CurrentDef != E.def()) {
+      CurrentDef = cast<Instruction>(E.def());
+      CurrentBlock = nullptr;
+      CurrentMaterialization = nullptr;
+    }
+
+    // If we have not seen this block, materialize the value.
+    if (CurrentBlock != E.userBlock()) {
+      CurrentBlock = E.userBlock();
+      CurrentMaterialization = cast<Instruction>(CurrentDef)->clone();
+      CurrentMaterialization->setName(CurrentDef->getName());
+      CurrentMaterialization->insertBefore(
+          &*CurrentBlock->getFirstInsertionPt());
+    }
+
+    if (auto *PN = dyn_cast<PHINode>(E.user())) {
+      assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
+                                                "values in the PHINode");
+      PN->replaceAllUsesWith(CurrentMaterialization);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Replace all uses of CurrentDef in the current instruction with the
+    // CurrentMaterialization for the block.
+    E.user()->replaceUsesOfWith(CurrentDef, CurrentMaterialization);
+  }
+}
+
+// Move early uses of spilled variable after CoroBegin.
+// For example, if a parameter had address taken, we may end up with the code
+// like:
+//        define @f(i32 %n) {
+//          %n.addr = alloca i32
+//          store %n, %n.addr
+//          ...
+//          call @coro.begin
+//    we need to move the store after coro.begin
+static void moveSpillUsesAfterCoroBegin(Function &F, SpillInfo const &Spills,
+                                        CoroBeginInst *CoroBegin) {
+  DominatorTree DT(F);
+  SmallVector<Instruction *, 8> NeedsMoving;
+
+  Value *CurrentValue = nullptr;
+
+  for (auto const &E : Spills) {
+    if (CurrentValue == E.def())
+      continue;
+
+    CurrentValue = E.def();
+
+    for (User *U : CurrentValue->users()) {
+      Instruction *I = cast<Instruction>(U);
+      if (!DT.dominates(CoroBegin, I)) {
+        // TODO: Make this more robust. Currently if we run into a situation
+        // where simple instruction move won't work we panic and
+        // report_fatal_error.
+        for (User *UI : I->users()) {
+          if (!DT.dominates(CoroBegin, cast<Instruction>(UI)))
+            report_fatal_error("cannot move instruction since its users are not"
+                               " dominated by CoroBegin");
+        }
+
+        DEBUG(dbgs() << "will move: " << *I << "\n");
+        NeedsMoving.push_back(I);
+      }
+    }
+  }
+
+  Instruction *InsertPt = CoroBegin->getNextNode();
+  for (Instruction *I : NeedsMoving)
+    I->moveBefore(InsertPt);
+}
+
+// Splits the block at a particular instruction unless it is the first
+// instruction in the block with a single predecessor.
+static BasicBlock *splitBlockIfNotFirst(Instruction *I, const Twine &Name) {
+  auto *BB = I->getParent();
+  if (&BB->front() == I) {
+    if (BB->getSinglePredecessor()) {
+      BB->setName(Name);
+      return BB;
+    }
+  }
+  return BB->splitBasicBlock(I, Name);
+}
+
+// Split above and below a particular instruction so that it
+// will be all alone by itself in a block.
+static void splitAround(Instruction *I, const Twine &Name) {
+  splitBlockIfNotFirst(I, Name);
+  splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
+}
+
+void coro::buildCoroutineFrame(Function &F, Shape &Shape) {
+  // Lower coro.dbg.declare to coro.dbg.value, since we are going to rewrite
+  // access to local variables.
+  LowerDbgDeclare(F);
+
+  Shape.PromiseAlloca = Shape.CoroBegin->getId()->getPromise();
+  if (Shape.PromiseAlloca) {
+    Shape.CoroBegin->getId()->clearPromise();
+  }
+
+  // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
+  // intrinsics are in their own blocks to simplify the logic of building up
+  // SuspendCrossing data.
+  for (CoroSuspendInst *CSI : Shape.CoroSuspends) {
+    splitAround(CSI->getCoroSave(), "CoroSave");
+    splitAround(CSI, "CoroSuspend");
+  }
+
+  // Put fallthrough CoroEnd into its own block. Note: Shape::buildFrom places
+  // the fallthrough coro.end as the first element of CoroEnds array.
+  splitAround(Shape.CoroEnds.front(), "CoroEnd");
+
+  // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
+  // never has its definition separated from the PHI by the suspend point.
+  rewritePHIs(F);
+
+  // Build suspend crossing info.
+  SuspendCrossingInfo Checker(F, Shape);
+
+  IRBuilder<> Builder(F.getContext());
+  SpillInfo Spills;
+
+  // See if there are materializable instructions across suspend points.
+  for (Instruction &I : instructions(F))
+    if (materializable(I))
+      for (User *U : I.users())
+        if (Checker.isDefinitionAcrossSuspend(I, U))
+          Spills.emplace_back(&I, U);
+
+  // Rewrite materializable instructions to be materialized at the use point.
+  std::sort(Spills.begin(), Spills.end());
+  DEBUG(dump("Materializations", Spills));
+  rewriteMaterializableInstructions(Builder, Spills);
+
+  // Collect the spills for arguments and other not-materializable values.
+  Spills.clear();
+  for (Argument &A : F.getArgumentList())
+    for (User *U : A.users())
+      if (Checker.isDefinitionAcrossSuspend(A, U))
+        Spills.emplace_back(&A, U);
+
+  for (Instruction &I : instructions(F)) {
+    // Values returned from coroutine structure intrinsics should not be part
+    // of the Coroutine Frame.
+    if (isCoroutineStructureIntrinsic(I))
+      continue;
+    // The Coroutine Promise always included into coroutine frame, no need to
+    // check for suspend crossing.
+    if (Shape.PromiseAlloca == &I)
+      continue;
+
+    for (User *U : I.users())
+      if (Checker.isDefinitionAcrossSuspend(I, U)) {
+        // We cannot spill a token.
+        if (I.getType()->isTokenTy())
+          report_fatal_error(
+              "token definition is separated from the use by a suspend point");
+        assert(!materializable(I) &&
+               "rewriteMaterializable did not do its job");
+        Spills.emplace_back(&I, U);
+      }
+  }
+  std::sort(Spills.begin(), Spills.end());
+  DEBUG(dump("Spills", Spills));
+  moveSpillUsesAfterCoroBegin(F, Spills, Shape.CoroBegin);
+  Shape.FrameTy = buildFrameType(F, Shape, Spills);
+  Shape.FramePtr = insertSpills(Spills, Shape);
+}
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroInstr.h b/contrib/llvm/lib/Transforms/Coroutines/CoroInstr.h
new file mode 100644
index 000000000000..e03cef4bfc46
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroInstr.h
@@ -0,0 +1,318 @@
+//===-- CoroInstr.h - Coroutine Intrinsics Instruction Wrappers -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file defines classes that make it really easy to deal with intrinsic
+// functions with the isa/dyncast family of functions.  In particular, this
+// allows you to do things like:
+//
+//     if (auto *SF = dyn_cast<CoroSubFnInst>(Inst))
+//        ... SF->getFrame() ...
+//
+// All intrinsic function calls are instances of the call instruction, so these
+// are all subclasses of the CallInst class.  Note that none of these classes
+// has state or virtual methods, which is an important part of this gross/neat
+// hack working.
+//
+// The helpful comment above is borrowed from llvm/IntrinsicInst.h, we keep
+// coroutine intrinsic wrappers here since they are only used by the passes in
+// the Coroutine library.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IntrinsicInst.h"
+
+namespace llvm {
+
+/// This class represents the llvm.coro.subfn.addr instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSubFnInst : public IntrinsicInst {
+  enum { FrameArg, IndexArg };
+
+public:
+  enum ResumeKind {
+    RestartTrigger = -1,
+    ResumeIndex,
+    DestroyIndex,
+    CleanupIndex,
+    IndexLast,
+    IndexFirst = RestartTrigger
+  };
+
+  Value *getFrame() const { return getArgOperand(FrameArg); }
+  ResumeKind getIndex() const {
+    int64_t Index = getRawIndex()->getValue().getSExtValue();
+    assert(Index >= IndexFirst && Index < IndexLast &&
+           "unexpected CoroSubFnInst index argument");
+    return static_cast<ResumeKind>(Index);
+  }
+
+  ConstantInt *getRawIndex() const {
+    return cast<ConstantInt>(getArgOperand(IndexArg));
+  }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_subfn_addr;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.alloc instruction.
+class LLVM_LIBRARY_VISIBILITY CoroAllocInst : public IntrinsicInst {
+public:
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_alloc;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.alloc instruction.
+class LLVM_LIBRARY_VISIBILITY CoroIdInst : public IntrinsicInst {
+  enum { AlignArg, PromiseArg, CoroutineArg, InfoArg };
+
+public:
+  CoroAllocInst *getCoroAlloc() {
+    for (User *U : users())
+      if (auto *CA = dyn_cast<CoroAllocInst>(U))
+        return CA;
+    return nullptr;
+  }
+
+  IntrinsicInst *getCoroBegin() {
+    for (User *U : users())
+      if (auto *II = dyn_cast<IntrinsicInst>(U))
+        if (II->getIntrinsicID() == Intrinsic::coro_begin)
+          return II;
+    llvm_unreachable("no coro.begin associated with coro.id");
+  }
+
+  AllocaInst *getPromise() const {
+    Value *Arg = getArgOperand(PromiseArg);
+    return isa<ConstantPointerNull>(Arg)
+               ? nullptr
+               : cast<AllocaInst>(Arg->stripPointerCasts());
+  }
+
+  void clearPromise() {
+    Value *Arg = getArgOperand(PromiseArg);
+    setArgOperand(PromiseArg,
+                  ConstantPointerNull::get(Type::getInt8PtrTy(getContext())));
+    if (isa<AllocaInst>(Arg))
+      return;
+    assert((isa<BitCastInst>(Arg) || isa<GetElementPtrInst>(Arg)) &&
+           "unexpected instruction designating the promise");
+    // TODO: Add a check that any remaining users of Inst are after coro.begin
+    // or add code to move the users after coro.begin.
+    auto *Inst = cast<Instruction>(Arg);
+    if (Inst->use_empty()) {
+      Inst->eraseFromParent();
+      return;
+    }
+    Inst->moveBefore(getCoroBegin()->getNextNode());
+  }
+
+  // Info argument of coro.id is
+  //   fresh out of the frontend: null ;
+  //   outlined                 : {Init, Return, Susp1, Susp2, ...} ;
+  //   postsplit                : [resume, destroy, cleanup] ;
+  //
+  // If parts of the coroutine were outlined to protect against undesirable
+  // code motion, these functions will be stored in a struct literal referred to
+  // by the Info parameter. Note: this is only needed before coroutine is split.
+  //
+  // After coroutine is split, resume functions are stored in an array
+  // referred to by this parameter.
+
+  struct Info {
+    ConstantStruct *OutlinedParts = nullptr;
+    ConstantArray *Resumers = nullptr;
+
+    bool hasOutlinedParts() const { return OutlinedParts != nullptr; }
+    bool isPostSplit() const { return Resumers != nullptr; }
+    bool isPreSplit() const { return !isPostSplit(); }
+  };
+  Info getInfo() const {
+    Info Result;
+    auto *GV = dyn_cast<GlobalVariable>(getRawInfo());
+    if (!GV)
+      return Result;
+
+    assert(GV->isConstant() && GV->hasDefinitiveInitializer());
+    Constant *Initializer = GV->getInitializer();
+    if ((Result.OutlinedParts = dyn_cast<ConstantStruct>(Initializer)))
+      return Result;
+
+    Result.Resumers = cast<ConstantArray>(Initializer);
+    return Result;
+  }
+  Constant *getRawInfo() const {
+    return cast<Constant>(getArgOperand(InfoArg)->stripPointerCasts());
+  }
+
+  void setInfo(Constant *C) { setArgOperand(InfoArg, C); }
+
+  Function *getCoroutine() const {
+    return cast<Function>(getArgOperand(CoroutineArg)->stripPointerCasts());
+  }
+  void setCoroutineSelf() {
+    assert(isa<ConstantPointerNull>(getArgOperand(CoroutineArg)) &&
+           "Coroutine argument is already assigned");
+    auto *const Int8PtrTy = Type::getInt8PtrTy(getContext());
+    setArgOperand(CoroutineArg,
+                  ConstantExpr::getBitCast(getFunction(), Int8PtrTy));
+  }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_id;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.frame instruction.
+class LLVM_LIBRARY_VISIBILITY CoroFrameInst : public IntrinsicInst {
+public:
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_frame;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.free instruction.
+class LLVM_LIBRARY_VISIBILITY CoroFreeInst : public IntrinsicInst {
+  enum { IdArg, FrameArg };
+
+public:
+  Value *getFrame() const { return getArgOperand(FrameArg); }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_free;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This class represents the llvm.coro.begin instruction.
+class LLVM_LIBRARY_VISIBILITY CoroBeginInst : public IntrinsicInst {
+  enum { IdArg, MemArg };
+
+public:
+  CoroIdInst *getId() const { return cast<CoroIdInst>(getArgOperand(IdArg)); }
+
+  Value *getMem() const { return getArgOperand(MemArg); }
+
+  // Methods for support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_begin;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.save instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSaveInst : public IntrinsicInst {
+public:
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_save;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.promise instruction.
+class LLVM_LIBRARY_VISIBILITY CoroPromiseInst : public IntrinsicInst {
+  enum { FrameArg, AlignArg, FromArg };
+
+public:
+  bool isFromPromise() const {
+    return cast<Constant>(getArgOperand(FromArg))->isOneValue();
+  }
+  unsigned getAlignment() const {
+    return cast<ConstantInt>(getArgOperand(AlignArg))->getZExtValue();
+  }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_promise;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.suspend instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSuspendInst : public IntrinsicInst {
+  enum { SaveArg, FinalArg };
+
+public:
+  CoroSaveInst *getCoroSave() const {
+    Value *Arg = getArgOperand(SaveArg);
+    if (auto *SI = dyn_cast<CoroSaveInst>(Arg))
+      return SI;
+    assert(isa<ConstantTokenNone>(Arg));
+    return nullptr;
+  }
+  bool isFinal() const {
+    return cast<Constant>(getArgOperand(FinalArg))->isOneValue();
+  }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_suspend;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.size instruction.
+class LLVM_LIBRARY_VISIBILITY CoroSizeInst : public IntrinsicInst {
+public:
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_size;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+/// This represents the llvm.coro.end instruction.
+class LLVM_LIBRARY_VISIBILITY CoroEndInst : public IntrinsicInst {
+  enum { FrameArg, UnwindArg };
+
+public:
+  bool isFallthrough() const { return !isUnwind(); }
+  bool isUnwind() const {
+    return cast<Constant>(getArgOperand(UnwindArg))->isOneValue();
+  }
+
+  // Methods to support type inquiry through isa, cast, and dyn_cast:
+  static inline bool classof(const IntrinsicInst *I) {
+    return I->getIntrinsicID() == Intrinsic::coro_end;
+  }
+  static inline bool classof(const Value *V) {
+    return isa<IntrinsicInst>(V) && classof(cast<IntrinsicInst>(V));
+  }
+};
+
+} // End namespace llvm.
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroInternal.h b/contrib/llvm/lib/Transforms/Coroutines/CoroInternal.h
new file mode 100644
index 000000000000..1eac88dbac3a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroInternal.h
@@ -0,0 +1,107 @@
+//===- CoroInternal.h - Internal Coroutine interfaces ---------*- C++ -*---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Common definitions/declarations used internally by coroutine lowering passes.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H
+#define LLVM_LIB_TRANSFORMS_COROUTINES_COROINTERNAL_H
+
+#include "CoroInstr.h"
+#include "llvm/Transforms/Coroutines.h"
+
+namespace llvm {
+
+class CallGraph;
+class CallGraphSCC;
+class PassRegistry;
+
+void initializeCoroEarlyPass(PassRegistry &);
+void initializeCoroSplitPass(PassRegistry &);
+void initializeCoroElidePass(PassRegistry &);
+void initializeCoroCleanupPass(PassRegistry &);
+
+// CoroEarly pass marks every function that has coro.begin with a string
+// attribute "coroutine.presplit"="0". CoroSplit pass processes the coroutine
+// twice. First, it lets it go through complete IPO optimization pipeline as a
+// single function. It forces restart of the pipeline by inserting an indirect
+// call to an empty function "coro.devirt.trigger" which is devirtualized by
+// CoroElide pass that triggers a restart of the pipeline by CGPassManager.
+// When CoroSplit pass sees the same coroutine the second time, it splits it up,
+// adds coroutine subfunctions to the SCC to be processed by IPO pipeline.
+
+#define CORO_PRESPLIT_ATTR "coroutine.presplit"
+#define UNPREPARED_FOR_SPLIT "0"
+#define PREPARED_FOR_SPLIT "1"
+
+#define CORO_DEVIRT_TRIGGER_FN "coro.devirt.trigger"
+
+namespace coro {
+
+bool declaresIntrinsics(Module &M, std::initializer_list<StringRef>);
+void replaceAllCoroAllocs(CoroBeginInst *CB, bool Replacement);
+void replaceAllCoroFrees(CoroBeginInst *CB, Value *Replacement);
+void replaceCoroFree(CoroIdInst *CoroId, bool Elide);
+void updateCallGraph(Function &Caller, ArrayRef<Function *> Funcs,
+                     CallGraph &CG, CallGraphSCC &SCC);
+
+// Keeps data and helper functions for lowering coroutine intrinsics.
+struct LowererBase {
+  Module &TheModule;
+  LLVMContext &Context;
+  PointerType *const Int8Ptr;
+  FunctionType *const ResumeFnType;
+  ConstantPointerNull *const NullPtr;
+
+  LowererBase(Module &M);
+  Value *makeSubFnCall(Value *Arg, int Index, Instruction *InsertPt);
+};
+
+// Holds structural Coroutine Intrinsics for a particular function and other
+// values used during CoroSplit pass.
+struct LLVM_LIBRARY_VISIBILITY Shape {
+  CoroBeginInst *CoroBegin;
+  SmallVector<CoroEndInst *, 4> CoroEnds;
+  SmallVector<CoroSizeInst *, 2> CoroSizes;
+  SmallVector<CoroSuspendInst *, 4> CoroSuspends;
+
+  // Field Indexes for known coroutine frame fields.
+  enum {
+    ResumeField,
+    DestroyField,
+    PromiseField,
+    IndexField,
+    LastKnownField = IndexField
+  };
+
+  StructType *FrameTy;
+  Instruction *FramePtr;
+  BasicBlock *AllocaSpillBlock;
+  SwitchInst *ResumeSwitch;
+  AllocaInst *PromiseAlloca;
+  bool HasFinalSuspend;
+
+  IntegerType *getIndexType() const {
+    assert(FrameTy && "frame type not assigned");
+    return cast<IntegerType>(FrameTy->getElementType(IndexField));
+  }
+  ConstantInt *getIndex(uint64_t Value) const {
+    return ConstantInt::get(getIndexType(), Value);
+  }
+
+  Shape() = default;
+  explicit Shape(Function &F) { buildFrom(F); }
+  void buildFrom(Function &F);
+};
+
+void buildCoroutineFrame(Function &F, Shape &Shape);
+
+} // End namespace coro.
+} // End namespace llvm
+
+#endif
diff --git a/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
new file mode 100644
index 000000000000..7a3f4f60bae9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -0,0 +1,640 @@
+//===- CoroSplit.cpp - Converts a coroutine into a state machine ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This pass builds the coroutine frame and outlines resume and destroy parts
+// of the coroutine into separate functions.
+//
+// We present a coroutine to an LLVM as an ordinary function with suspension
+// points marked up with intrinsics. We let the optimizer party on the coroutine
+// as a single function for as long as possible. Shortly before the coroutine is
+// eligible to be inlined into its callers, we split up the coroutine into parts
+// corresponding to an initial, resume and destroy invocations of the coroutine,
+// add them to the current SCC and restart the IPO pipeline to optimize the
+// coroutine subfunctions we extracted before proceeding to the caller of the
+// coroutine.
+//===----------------------------------------------------------------------===//
+
+#include "CoroInternal.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "coro-split"
+
+// Create an entry block for a resume function with a switch that will jump to
+// suspend points.
+static BasicBlock *createResumeEntryBlock(Function &F, coro::Shape &Shape) {
+  LLVMContext &C = F.getContext();
+
+  // resume.entry:
+  //  %index.addr = getelementptr inbounds %f.Frame, %f.Frame* %FramePtr, i32 0,
+  //  i32 2
+  //  % index = load i32, i32* %index.addr
+  //  switch i32 %index, label %unreachable [
+  //    i32 0, label %resume.0
+  //    i32 1, label %resume.1
+  //    ...
+  //  ]
+
+  auto *NewEntry = BasicBlock::Create(C, "resume.entry", &F);
+  auto *UnreachBB = BasicBlock::Create(C, "unreachable", &F);
+
+  IRBuilder<> Builder(NewEntry);
+  auto *FramePtr = Shape.FramePtr;
+  auto *FrameTy = Shape.FrameTy;
+  auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
+      FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
+  auto *Index = Builder.CreateLoad(GepIndex, "index");
+  auto *Switch =
+      Builder.CreateSwitch(Index, UnreachBB, Shape.CoroSuspends.size());
+  Shape.ResumeSwitch = Switch;
+
+  size_t SuspendIndex = 0;
+  for (CoroSuspendInst *S : Shape.CoroSuspends) {
+    ConstantInt *IndexVal = Shape.getIndex(SuspendIndex);
+
+    // Replace CoroSave with a store to Index:
+    //    %index.addr = getelementptr %f.frame... (index field number)
+    //    store i32 0, i32* %index.addr1
+    auto *Save = S->getCoroSave();
+    Builder.SetInsertPoint(Save);
+    if (S->isFinal()) {
+      // Final suspend point is represented by storing zero in ResumeFnAddr.
+      auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(FrameTy, FramePtr, 0,
+                                                          0, "ResumeFn.addr");
+      auto *NullPtr = ConstantPointerNull::get(cast<PointerType>(
+          cast<PointerType>(GepIndex->getType())->getElementType()));
+      Builder.CreateStore(NullPtr, GepIndex);
+    } else {
+      auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(
+          FrameTy, FramePtr, 0, coro::Shape::IndexField, "index.addr");
+      Builder.CreateStore(IndexVal, GepIndex);
+    }
+    Save->replaceAllUsesWith(ConstantTokenNone::get(C));
+    Save->eraseFromParent();
+
+    // Split block before and after coro.suspend and add a jump from an entry
+    // switch:
+    //
+    //  whateverBB:
+    //    whatever
+    //    %0 = call i8 @llvm.coro.suspend(token none, i1 false)
+    //    switch i8 %0, label %suspend[i8 0, label %resume
+    //                                 i8 1, label %cleanup]
+    // becomes:
+    //
+    //  whateverBB:
+    //     whatever
+    //     br label %resume.0.landing
+    //
+    //  resume.0: ; <--- jump from the switch in the resume.entry
+    //     %0 = tail call i8 @llvm.coro.suspend(token none, i1 false)
+    //     br label %resume.0.landing
+    //
+    //  resume.0.landing:
+    //     %1 = phi i8[-1, %whateverBB], [%0, %resume.0]
+    //     switch i8 % 1, label %suspend [i8 0, label %resume
+    //                                    i8 1, label %cleanup]
+
+    auto *SuspendBB = S->getParent();
+    auto *ResumeBB =
+        SuspendBB->splitBasicBlock(S, "resume." + Twine(SuspendIndex));
+    auto *LandingBB = ResumeBB->splitBasicBlock(
+        S->getNextNode(), ResumeBB->getName() + Twine(".landing"));
+    Switch->addCase(IndexVal, ResumeBB);
+
+    cast<BranchInst>(SuspendBB->getTerminator())->setSuccessor(0, LandingBB);
+    auto *PN = PHINode::Create(Builder.getInt8Ty(), 2, "", &LandingBB->front());
+    S->replaceAllUsesWith(PN);
+    PN->addIncoming(Builder.getInt8(-1), SuspendBB);
+    PN->addIncoming(S, ResumeBB);
+
+    ++SuspendIndex;
+  }
+
+  Builder.SetInsertPoint(UnreachBB);
+  Builder.CreateUnreachable();
+
+  return NewEntry;
+}
+
+// In Resumers, we replace fallthrough coro.end with ret void and delete the
+// rest of the block.
+static void replaceFallthroughCoroEnd(IntrinsicInst *End,
+                                      ValueToValueMapTy &VMap) {
+  auto *NewE = cast<IntrinsicInst>(VMap[End]);
+  ReturnInst::Create(NewE->getContext(), nullptr, NewE);
+
+  // Remove the rest of the block, by splitting it into an unreachable block.
+  auto *BB = NewE->getParent();
+  BB->splitBasicBlock(NewE);
+  BB->getTerminator()->eraseFromParent();
+}
+
+// Rewrite final suspend point handling. We do not use suspend index to
+// represent the final suspend point. Instead we zero-out ResumeFnAddr in the
+// coroutine frame, since it is undefined behavior to resume a coroutine
+// suspended at the final suspend point. Thus, in the resume function, we can
+// simply remove the last case (when coro::Shape is built, the final suspend
+// point (if present) is always the last element of CoroSuspends array).
+// In the destroy function, we add a code sequence to check if ResumeFnAddress
+// is Null, and if so, jump to the appropriate label to handle cleanup from the
+// final suspend point.
+static void handleFinalSuspend(IRBuilder<> &Builder, Value *FramePtr,
+                               coro::Shape &Shape, SwitchInst *Switch,
+                               bool IsDestroy) {
+  assert(Shape.HasFinalSuspend);
+  auto FinalCase = --Switch->case_end();
+  BasicBlock *ResumeBB = FinalCase.getCaseSuccessor();
+  Switch->removeCase(FinalCase);
+  if (IsDestroy) {
+    BasicBlock *OldSwitchBB = Switch->getParent();
+    auto *NewSwitchBB = OldSwitchBB->splitBasicBlock(Switch, "Switch");
+    Builder.SetInsertPoint(OldSwitchBB->getTerminator());
+    auto *GepIndex = Builder.CreateConstInBoundsGEP2_32(Shape.FrameTy, FramePtr,
+                                                        0, 0, "ResumeFn.addr");
+    auto *Load = Builder.CreateLoad(GepIndex);
+    auto *NullPtr =
+        ConstantPointerNull::get(cast<PointerType>(Load->getType()));
+    auto *Cond = Builder.CreateICmpEQ(Load, NullPtr);
+    Builder.CreateCondBr(Cond, ResumeBB, NewSwitchBB);
+    OldSwitchBB->getTerminator()->eraseFromParent();
+  }
+}
+
+// Create a resume clone by cloning the body of the original function, setting
+// new entry block and replacing coro.suspend an appropriate value to force
+// resume or cleanup pass for every suspend point.
+static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
+                             BasicBlock *ResumeEntry, int8_t FnIndex) {
+  Module *M = F.getParent();
+  auto *FrameTy = Shape.FrameTy;
+  auto *FnPtrTy = cast<PointerType>(FrameTy->getElementType(0));
+  auto *FnTy = cast<FunctionType>(FnPtrTy->getElementType());
+
+  Function *NewF =
+      Function::Create(FnTy, GlobalValue::LinkageTypes::InternalLinkage,
+                       F.getName() + Suffix, M);
+  NewF->addAttribute(1, Attribute::NonNull);
+  NewF->addAttribute(1, Attribute::NoAlias);
+
+  ValueToValueMapTy VMap;
+  // Replace all args with undefs. The buildCoroutineFrame algorithm already
+  // rewritten access to the args that occurs after suspend points with loads
+  // and stores to/from the coroutine frame.
+  for (Argument &A : F.getArgumentList())
+    VMap[&A] = UndefValue::get(A.getType());
+
+  SmallVector<ReturnInst *, 4> Returns;
+
+  if (DISubprogram *SP = F.getSubprogram()) {
+    // If we have debug info, add mapping for the metadata nodes that should not
+    // be cloned by CloneFunctionInfo.
+    auto &MD = VMap.MD();
+    MD[SP->getUnit()].reset(SP->getUnit());
+    MD[SP->getType()].reset(SP->getType());
+    MD[SP->getFile()].reset(SP->getFile());
+  }
+  CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns);
+
+  // Remove old returns.
+  for (ReturnInst *Return : Returns)
+    changeToUnreachable(Return, /*UseLLVMTrap=*/false);
+
+  // Remove old return attributes.
+  NewF->removeAttributes(
+      AttributeSet::ReturnIndex,
+      AttributeSet::get(
+          NewF->getContext(), AttributeSet::ReturnIndex,
+          AttributeFuncs::typeIncompatible(NewF->getReturnType())));
+
+  // Make AllocaSpillBlock the new entry block.
+  auto *SwitchBB = cast<BasicBlock>(VMap[ResumeEntry]);
+  auto *Entry = cast<BasicBlock>(VMap[Shape.AllocaSpillBlock]);
+  Entry->moveBefore(&NewF->getEntryBlock());
+  Entry->getTerminator()->eraseFromParent();
+  BranchInst::Create(SwitchBB, Entry);
+  Entry->setName("entry" + Suffix);
+
+  // Clear all predecessors of the new entry block.
+  auto *Switch = cast<SwitchInst>(VMap[Shape.ResumeSwitch]);
+  Entry->replaceAllUsesWith(Switch->getDefaultDest());
+
+  IRBuilder<> Builder(&NewF->getEntryBlock().front());
+
+  // Remap frame pointer.
+  Argument *NewFramePtr = &NewF->getArgumentList().front();
+  Value *OldFramePtr = cast<Value>(VMap[Shape.FramePtr]);
+  NewFramePtr->takeName(OldFramePtr);
+  OldFramePtr->replaceAllUsesWith(NewFramePtr);
+
+  // Remap vFrame pointer.
+  auto *NewVFrame = Builder.CreateBitCast(
+      NewFramePtr, Type::getInt8PtrTy(Builder.getContext()), "vFrame");
+  Value *OldVFrame = cast<Value>(VMap[Shape.CoroBegin]);
+  OldVFrame->replaceAllUsesWith(NewVFrame);
+
+  // Rewrite final suspend handling as it is not done via switch (allows to
+  // remove final case from the switch, since it is undefined behavior to resume
+  // the coroutine suspended at the final suspend point.
+  if (Shape.HasFinalSuspend) {
+    auto *Switch = cast<SwitchInst>(VMap[Shape.ResumeSwitch]);
+    bool IsDestroy = FnIndex != 0;
+    handleFinalSuspend(Builder, NewFramePtr, Shape, Switch, IsDestroy);
+  }
+
+  // Replace coro suspend with the appropriate resume index.
+  // Replacing coro.suspend with (0) will result in control flow proceeding to
+  // a resume label associated with a suspend point, replacing it with (1) will
+  // result in control flow proceeding to a cleanup label associated with this
+  // suspend point.
+  auto *NewValue = Builder.getInt8(FnIndex ? 1 : 0);
+  for (CoroSuspendInst *CS : Shape.CoroSuspends) {
+    auto *MappedCS = cast<CoroSuspendInst>(VMap[CS]);
+    MappedCS->replaceAllUsesWith(NewValue);
+    MappedCS->eraseFromParent();
+  }
+
+  // Remove coro.end intrinsics.
+  replaceFallthroughCoroEnd(Shape.CoroEnds.front(), VMap);
+  // FIXME: coming in upcoming patches:
+  // replaceUnwindCoroEnds(Shape.CoroEnds, VMap);
+
+  // Eliminate coro.free from the clones, replacing it with 'null' in cleanup,
+  // to suppress deallocation code.
+  coro::replaceCoroFree(cast<CoroIdInst>(VMap[Shape.CoroBegin->getId()]),
+                        /*Elide=*/FnIndex == 2);
+
+  NewF->setCallingConv(CallingConv::Fast);
+
+  return NewF;
+}
+
+static void removeCoroEnds(coro::Shape &Shape) {
+  for (CoroEndInst *CE : Shape.CoroEnds)
+    CE->eraseFromParent();
+}
+
+static void replaceFrameSize(coro::Shape &Shape) {
+  if (Shape.CoroSizes.empty())
+    return;
+
+  // In the same function all coro.sizes should have the same result type.
+  auto *SizeIntrin = Shape.CoroSizes.back();
+  Module *M = SizeIntrin->getModule();
+  const DataLayout &DL = M->getDataLayout();
+  auto Size = DL.getTypeAllocSize(Shape.FrameTy);
+  auto *SizeConstant = ConstantInt::get(SizeIntrin->getType(), Size);
+
+  for (CoroSizeInst *CS : Shape.CoroSizes) {
+    CS->replaceAllUsesWith(SizeConstant);
+    CS->eraseFromParent();
+  }
+}
+
+// Create a global constant array containing pointers to functions provided and
+// set Info parameter of CoroBegin to point at this constant. Example:
+//
+//   @f.resumers = internal constant [2 x void(%f.frame*)*]
+//                    [void(%f.frame*)* @f.resume, void(%f.frame*)* @f.destroy]
+//   define void @f() {
+//     ...
+//     call i8* @llvm.coro.begin(i8* null, i32 0, i8* null,
+//                    i8* bitcast([2 x void(%f.frame*)*] * @f.resumers to i8*))
+//
+// Assumes that all the functions have the same signature.
+static void setCoroInfo(Function &F, CoroBeginInst *CoroBegin,
+                        std::initializer_list<Function *> Fns) {
+
+  SmallVector<Constant *, 4> Args(Fns.begin(), Fns.end());
+  assert(!Args.empty());
+  Function *Part = *Fns.begin();
+  Module *M = Part->getParent();
+  auto *ArrTy = ArrayType::get(Part->getType(), Args.size());
+
+  auto *ConstVal = ConstantArray::get(ArrTy, Args);
+  auto *GV = new GlobalVariable(*M, ConstVal->getType(), /*isConstant=*/true,
+                                GlobalVariable::PrivateLinkage, ConstVal,
+                                F.getName() + Twine(".resumers"));
+
+  // Update coro.begin instruction to refer to this constant.
+  LLVMContext &C = F.getContext();
+  auto *BC = ConstantExpr::getPointerCast(GV, Type::getInt8PtrTy(C));
+  CoroBegin->getId()->setInfo(BC);
+}
+
+// Store addresses of Resume/Destroy/Cleanup functions in the coroutine frame.
+static void updateCoroFrame(coro::Shape &Shape, Function *ResumeFn,
+                            Function *DestroyFn, Function *CleanupFn) {
+
+  IRBuilder<> Builder(Shape.FramePtr->getNextNode());
+  auto *ResumeAddr = Builder.CreateConstInBoundsGEP2_32(
+      Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::ResumeField,
+      "resume.addr");
+  Builder.CreateStore(ResumeFn, ResumeAddr);
+
+  Value *DestroyOrCleanupFn = DestroyFn;
+
+  CoroIdInst *CoroId = Shape.CoroBegin->getId();
+  if (CoroAllocInst *CA = CoroId->getCoroAlloc()) {
+    // If there is a CoroAlloc and it returns false (meaning we elide the
+    // allocation, use CleanupFn instead of DestroyFn).
+    DestroyOrCleanupFn = Builder.CreateSelect(CA, DestroyFn, CleanupFn);
+  }
+
+  auto *DestroyAddr = Builder.CreateConstInBoundsGEP2_32(
+      Shape.FrameTy, Shape.FramePtr, 0, coro::Shape::DestroyField,
+      "destroy.addr");
+  Builder.CreateStore(DestroyOrCleanupFn, DestroyAddr);
+}
+
+static void postSplitCleanup(Function &F) {
+  removeUnreachableBlocks(F);
+  llvm::legacy::FunctionPassManager FPM(F.getParent());
+
+  FPM.add(createVerifierPass());
+  FPM.add(createSCCPPass());
+  FPM.add(createCFGSimplificationPass());
+  FPM.add(createEarlyCSEPass());
+  FPM.add(createCFGSimplificationPass());
+
+  FPM.doInitialization();
+  FPM.run(F);
+  FPM.doFinalization();
+}
+
+// Coroutine has no suspend points. Remove heap allocation for the coroutine
+// frame if possible.
+static void handleNoSuspendCoroutine(CoroBeginInst *CoroBegin, Type *FrameTy) {
+  auto *CoroId = CoroBegin->getId();
+  auto *AllocInst = CoroId->getCoroAlloc();
+  coro::replaceCoroFree(CoroId, /*Elide=*/AllocInst != nullptr);
+  if (AllocInst) {
+    IRBuilder<> Builder(AllocInst);
+    // FIXME: Need to handle overaligned members.
+    auto *Frame = Builder.CreateAlloca(FrameTy);
+    auto *VFrame = Builder.CreateBitCast(Frame, Builder.getInt8PtrTy());
+    AllocInst->replaceAllUsesWith(Builder.getFalse());
+    AllocInst->eraseFromParent();
+    CoroBegin->replaceAllUsesWith(VFrame);
+  } else {
+    CoroBegin->replaceAllUsesWith(CoroBegin->getMem());
+  }
+  CoroBegin->eraseFromParent();
+}
+
+// look for a very simple pattern
+//    coro.save
+//    no other calls
+//    resume or destroy call
+//    coro.suspend
+//
+// If there are other calls between coro.save and coro.suspend, they can
+// potentially resume or destroy the coroutine, so it is unsafe to eliminate a
+// suspend point.
+static bool simplifySuspendPoint(CoroSuspendInst *Suspend,
+                                 CoroBeginInst *CoroBegin) {
+  auto *Save = Suspend->getCoroSave();
+  auto *BB = Suspend->getParent();
+  if (BB != Save->getParent())
+    return false;
+
+  CallSite SingleCallSite;
+
+  // Check that we have only one CallSite.
+  for (Instruction *I = Save->getNextNode(); I != Suspend;
+       I = I->getNextNode()) {
+    if (isa<CoroFrameInst>(I))
+      continue;
+    if (isa<CoroSubFnInst>(I))
+      continue;
+    if (CallSite CS = CallSite(I)) {
+      if (SingleCallSite)
+        return false;
+      else
+        SingleCallSite = CS;
+    }
+  }
+  auto *CallInstr = SingleCallSite.getInstruction();
+  if (!CallInstr)
+    return false;
+
+  auto *Callee = SingleCallSite.getCalledValue()->stripPointerCasts();
+
+  // See if the callsite is for resumption or destruction of the coroutine.
+  auto *SubFn = dyn_cast<CoroSubFnInst>(Callee);
+  if (!SubFn)
+    return false;
+
+  // Does not refer to the current coroutine, we cannot do anything with it.
+  if (SubFn->getFrame() != CoroBegin)
+    return false;
+
+  // Replace llvm.coro.suspend with the value that results in resumption over
+  // the resume or cleanup path.
+  Suspend->replaceAllUsesWith(SubFn->getRawIndex());
+  Suspend->eraseFromParent();
+  Save->eraseFromParent();
+
+  // No longer need a call to coro.resume or coro.destroy.
+  CallInstr->eraseFromParent();
+
+  if (SubFn->user_empty())
+    SubFn->eraseFromParent();
+
+  return true;
+}
+
+// Remove suspend points that are simplified.
+static void simplifySuspendPoints(coro::Shape &Shape) {
+  auto &S = Shape.CoroSuspends;
+  size_t I = 0, N = S.size();
+  if (N == 0)
+    return;
+  for (;;) {
+    if (simplifySuspendPoint(S[I], Shape.CoroBegin)) {
+      if (--N == I)
+        break;
+      std::swap(S[I], S[N]);
+      continue;
+    }
+    if (++I == N)
+      break;
+  }
+  S.resize(N);
+}
+
+static void splitCoroutine(Function &F, CallGraph &CG, CallGraphSCC &SCC) {
+  coro::Shape Shape(F);
+  if (!Shape.CoroBegin)
+    return;
+
+  simplifySuspendPoints(Shape);
+  buildCoroutineFrame(F, Shape);
+  replaceFrameSize(Shape);
+
+  // If there are no suspend points, no split required, just remove
+  // the allocation and deallocation blocks, they are not needed.
+  if (Shape.CoroSuspends.empty()) {
+    handleNoSuspendCoroutine(Shape.CoroBegin, Shape.FrameTy);
+    removeCoroEnds(Shape);
+    postSplitCleanup(F);
+    coro::updateCallGraph(F, {}, CG, SCC);
+    return;
+  }
+
+  auto *ResumeEntry = createResumeEntryBlock(F, Shape);
+  auto ResumeClone = createClone(F, ".resume", Shape, ResumeEntry, 0);
+  auto DestroyClone = createClone(F, ".destroy", Shape, ResumeEntry, 1);
+  auto CleanupClone = createClone(F, ".cleanup", Shape, ResumeEntry, 2);
+
+  // We no longer need coro.end in F.
+  removeCoroEnds(Shape);
+
+  postSplitCleanup(F);
+  postSplitCleanup(*ResumeClone);
+  postSplitCleanup(*DestroyClone);
+  postSplitCleanup(*CleanupClone);
+
+  // Store addresses resume/destroy/cleanup functions in the coroutine frame.
+  updateCoroFrame(Shape, ResumeClone, DestroyClone, CleanupClone);
+
+  // Create a constant array referring to resume/destroy/clone functions pointed
+  // by the last argument of @llvm.coro.info, so that CoroElide pass can
+  // determined correct function to call.
+  setCoroInfo(F, Shape.CoroBegin, {ResumeClone, DestroyClone, CleanupClone});
+
+  // Update call graph and add the functions we created to the SCC.
+  coro::updateCallGraph(F, {ResumeClone, DestroyClone, CleanupClone}, CG, SCC);
+}
+
+// When we see the coroutine the first time, we insert an indirect call to a
+// devirt trigger function and mark the coroutine that it is now ready for
+// split.
+static void prepareForSplit(Function &F, CallGraph &CG) {
+  Module &M = *F.getParent();
+#ifndef NDEBUG
+  Function *DevirtFn = M.getFunction(CORO_DEVIRT_TRIGGER_FN);
+  assert(DevirtFn && "coro.devirt.trigger function not found");
+#endif
+
+  F.addFnAttr(CORO_PRESPLIT_ATTR, PREPARED_FOR_SPLIT);
+
+  // Insert an indirect call sequence that will be devirtualized by CoroElide
+  // pass:
+  //    %0 = call i8* @llvm.coro.subfn.addr(i8* null, i8 -1)
+  //    %1 = bitcast i8* %0 to void(i8*)*
+  //    call void %1(i8* null)
+  coro::LowererBase Lowerer(M);
+  Instruction *InsertPt = F.getEntryBlock().getTerminator();
+  auto *Null = ConstantPointerNull::get(Type::getInt8PtrTy(F.getContext()));
+  auto *DevirtFnAddr =
+      Lowerer.makeSubFnCall(Null, CoroSubFnInst::RestartTrigger, InsertPt);
+  auto *IndirectCall = CallInst::Create(DevirtFnAddr, Null, "", InsertPt);
+
+  // Update CG graph with an indirect call we just added.
+  CG[&F]->addCalledFunction(IndirectCall, CG.getCallsExternalNode());
+}
+
+// Make sure that there is a devirtualization trigger function that CoroSplit
+// pass uses the force restart CGSCC pipeline. If devirt trigger function is not
+// found, we will create one and add it to the current SCC.
+static void createDevirtTriggerFunc(CallGraph &CG, CallGraphSCC &SCC) {
+  Module &M = CG.getModule();
+  if (M.getFunction(CORO_DEVIRT_TRIGGER_FN))
+    return;
+
+  LLVMContext &C = M.getContext();
+  auto *FnTy = FunctionType::get(Type::getVoidTy(C), Type::getInt8PtrTy(C),
+                                 /*IsVarArgs=*/false);
+  Function *DevirtFn =
+      Function::Create(FnTy, GlobalValue::LinkageTypes::PrivateLinkage,
+                       CORO_DEVIRT_TRIGGER_FN, &M);
+  DevirtFn->addFnAttr(Attribute::AlwaysInline);
+  auto *Entry = BasicBlock::Create(C, "entry", DevirtFn);
+  ReturnInst::Create(C, Entry);
+
+  auto *Node = CG.getOrInsertFunction(DevirtFn);
+
+  SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
+  Nodes.push_back(Node);
+  SCC.initialize(Nodes);
+}
+
+//===----------------------------------------------------------------------===//
+//                              Top Level Driver
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+struct CoroSplit : public CallGraphSCCPass {
+  static char ID; // Pass identification, replacement for typeid
+  CoroSplit() : CallGraphSCCPass(ID) {}
+
+  bool Run = false;
+
+  // A coroutine is identified by the presence of coro.begin intrinsic, if
+  // we don't have any, this pass has nothing to do.
+  bool doInitialization(CallGraph &CG) override {
+    Run = coro::declaresIntrinsics(CG.getModule(), {"llvm.coro.begin"});
+    return CallGraphSCCPass::doInitialization(CG);
+  }
+
+  bool runOnSCC(CallGraphSCC &SCC) override {
+    if (!Run)
+      return false;
+
+    // Find coroutines for processing.
+    SmallVector<Function *, 4> Coroutines;
+    for (CallGraphNode *CGN : SCC)
+      if (auto *F = CGN->getFunction())
+        if (F->hasFnAttribute(CORO_PRESPLIT_ATTR))
+          Coroutines.push_back(F);
+
+    if (Coroutines.empty())
+      return false;
+
+    CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+    createDevirtTriggerFunc(CG, SCC);
+
+    for (Function *F : Coroutines) {
+      Attribute Attr = F->getFnAttribute(CORO_PRESPLIT_ATTR);
+      StringRef Value = Attr.getValueAsString();
+      DEBUG(dbgs() << "CoroSplit: Processing coroutine '" << F->getName()
+                   << "' state: " << Value << "\n");
+      if (Value == UNPREPARED_FOR_SPLIT) {
+        prepareForSplit(*F, CG);
+        continue;
+      }
+      F->removeFnAttr(CORO_PRESPLIT_ATTR);
+      splitCoroutine(*F, CG, SCC);
+    }
+    return true;
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    CallGraphSCCPass::getAnalysisUsage(AU);
+  }
+};
+}
+
+char CoroSplit::ID = 0;
+INITIALIZE_PASS(
+    CoroSplit, "coro-split",
+    "Split coroutine into a set of functions driving its state machine", false,
+    false)
+
+Pass *llvm::createCoroSplitPass() { return new CoroSplit(); }
diff --git a/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp b/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
new file mode 100644
index 000000000000..877ec34b4d3b
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Coroutines/Coroutines.cpp
@@ -0,0 +1,314 @@
+//===-- Coroutines.cpp ----------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This file implements the common infrastructure for Coroutine Passes.
+//===----------------------------------------------------------------------===//
+
+#include "CoroInternal.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/Utils/Local.h"
+
+using namespace llvm;
+
+void llvm::initializeCoroutines(PassRegistry &Registry) {
+  initializeCoroEarlyPass(Registry);
+  initializeCoroSplitPass(Registry);
+  initializeCoroElidePass(Registry);
+  initializeCoroCleanupPass(Registry);
+}
+
+static void addCoroutineOpt0Passes(const PassManagerBuilder &Builder,
+                                   legacy::PassManagerBase &PM) {
+  PM.add(createCoroSplitPass());
+  PM.add(createCoroElidePass());
+
+  PM.add(createBarrierNoopPass());
+  PM.add(createCoroCleanupPass());
+}
+
+static void addCoroutineEarlyPasses(const PassManagerBuilder &Builder,
+                                    legacy::PassManagerBase &PM) {
+  PM.add(createCoroEarlyPass());
+}
+
+static void addCoroutineScalarOptimizerPasses(const PassManagerBuilder &Builder,
+                                              legacy::PassManagerBase &PM) {
+  PM.add(createCoroElidePass());
+}
+
+static void addCoroutineSCCPasses(const PassManagerBuilder &Builder,
+                                  legacy::PassManagerBase &PM) {
+  PM.add(createCoroSplitPass());
+}
+
+static void addCoroutineOptimizerLastPasses(const PassManagerBuilder &Builder,
+                                            legacy::PassManagerBase &PM) {
+  PM.add(createCoroCleanupPass());
+}
+
+void llvm::addCoroutinePassesToExtensionPoints(PassManagerBuilder &Builder) {
+  Builder.addExtension(PassManagerBuilder::EP_EarlyAsPossible,
+                       addCoroutineEarlyPasses);
+  Builder.addExtension(PassManagerBuilder::EP_EnabledOnOptLevel0,
+                       addCoroutineOpt0Passes);
+  Builder.addExtension(PassManagerBuilder::EP_CGSCCOptimizerLate,
+                       addCoroutineSCCPasses);
+  Builder.addExtension(PassManagerBuilder::EP_ScalarOptimizerLate,
+                       addCoroutineScalarOptimizerPasses);
+  Builder.addExtension(PassManagerBuilder::EP_OptimizerLast,
+                       addCoroutineOptimizerLastPasses);
+}
+
+// Construct the lowerer base class and initialize its members.
+coro::LowererBase::LowererBase(Module &M)
+    : TheModule(M), Context(M.getContext()),
+      Int8Ptr(Type::getInt8PtrTy(Context)),
+      ResumeFnType(FunctionType::get(Type::getVoidTy(Context), Int8Ptr,
+                                     /*isVarArg=*/false)),
+      NullPtr(ConstantPointerNull::get(Int8Ptr)) {}
+
+// Creates a sequence of instructions to obtain a resume function address using
+// llvm.coro.subfn.addr. It generates the following sequence:
+//
+//    call i8* @llvm.coro.subfn.addr(i8* %Arg, i8 %index)
+//    bitcast i8* %2 to void(i8*)*
+
+Value *coro::LowererBase::makeSubFnCall(Value *Arg, int Index,
+                                        Instruction *InsertPt) {
+  auto *IndexVal = ConstantInt::get(Type::getInt8Ty(Context), Index);
+  auto *Fn = Intrinsic::getDeclaration(&TheModule, Intrinsic::coro_subfn_addr);
+
+  assert(Index >= CoroSubFnInst::IndexFirst &&
+         Index < CoroSubFnInst::IndexLast &&
+         "makeSubFnCall: Index value out of range");
+  auto *Call = CallInst::Create(Fn, {Arg, IndexVal}, "", InsertPt);
+
+  auto *Bitcast =
+      new BitCastInst(Call, ResumeFnType->getPointerTo(), "", InsertPt);
+  return Bitcast;
+}
+
+#ifndef NDEBUG
+static bool isCoroutineIntrinsicName(StringRef Name) {
+  // NOTE: Must be sorted!
+  static const char *const CoroIntrinsics[] = {
+      "llvm.coro.alloc",   "llvm.coro.begin",   "llvm.coro.destroy",
+      "llvm.coro.done",    "llvm.coro.end",     "llvm.coro.frame",
+      "llvm.coro.free",    "llvm.coro.id",      "llvm.coro.param",
+      "llvm.coro.promise", "llvm.coro.resume",  "llvm.coro.save",
+      "llvm.coro.size",    "llvm.coro.subfn.addr", "llvm.coro.suspend",
+  };
+  return Intrinsic::lookupLLVMIntrinsicByName(CoroIntrinsics, Name) != -1;
+}
+#endif
+
+// Verifies if a module has named values listed. Also, in debug mode verifies
+// that names are intrinsic names.
+bool coro::declaresIntrinsics(Module &M,
+                              std::initializer_list<StringRef> List) {
+
+  for (StringRef Name : List) {
+    assert(isCoroutineIntrinsicName(Name) && "not a coroutine intrinsic");
+    if (M.getNamedValue(Name))
+      return true;
+  }
+
+  return false;
+}
+
+// Replace all coro.frees associated with the provided CoroId either with 'null'
+// if Elide is true and with its frame parameter otherwise.
+void coro::replaceCoroFree(CoroIdInst *CoroId, bool Elide) {
+  SmallVector<CoroFreeInst *, 4> CoroFrees;
+  for (User *U : CoroId->users())
+    if (auto CF = dyn_cast<CoroFreeInst>(U))
+      CoroFrees.push_back(CF);
+
+  if (CoroFrees.empty())
+    return;
+
+  Value *Replacement =
+      Elide ? ConstantPointerNull::get(Type::getInt8PtrTy(CoroId->getContext()))
+            : CoroFrees.front()->getFrame();
+
+  for (CoroFreeInst *CF : CoroFrees) {
+    CF->replaceAllUsesWith(Replacement);
+    CF->eraseFromParent();
+  }
+}
+
+// FIXME: This code is stolen from CallGraph::addToCallGraph(Function *F), which
+// happens to be private. It is better for this functionality exposed by the
+// CallGraph.
+static void buildCGN(CallGraph &CG, CallGraphNode *Node) {
+  Function *F = Node->getFunction();
+
+  // Look for calls by this function.
+  for (Instruction &I : instructions(F))
+    if (CallSite CS = CallSite(cast<Value>(&I))) {
+      const Function *Callee = CS.getCalledFunction();
+      if (!Callee || !Intrinsic::isLeaf(Callee->getIntrinsicID()))
+        // Indirect calls of intrinsics are not allowed so no need to check.
+        // We can be more precise here by using TargetArg returned by
+        // Intrinsic::isLeaf.
+        Node->addCalledFunction(CS, CG.getCallsExternalNode());
+      else if (!Callee->isIntrinsic())
+        Node->addCalledFunction(CS, CG.getOrInsertFunction(Callee));
+    }
+}
+
+// Rebuild CGN after we extracted parts of the code from ParentFunc into
+// NewFuncs. Builds CGNs for the NewFuncs and adds them to the current SCC.
+void coro::updateCallGraph(Function &ParentFunc, ArrayRef<Function *> NewFuncs,
+                           CallGraph &CG, CallGraphSCC &SCC) {
+  // Rebuild CGN from scratch for the ParentFunc
+  auto *ParentNode = CG[&ParentFunc];
+  ParentNode->removeAllCalledFunctions();
+  buildCGN(CG, ParentNode);
+
+  SmallVector<CallGraphNode *, 8> Nodes(SCC.begin(), SCC.end());
+
+  for (Function *F : NewFuncs) {
+    CallGraphNode *Callee = CG.getOrInsertFunction(F);
+    Nodes.push_back(Callee);
+    buildCGN(CG, Callee);
+  }
+
+  SCC.initialize(Nodes);
+}
+
+static void clear(coro::Shape &Shape) {
+  Shape.CoroBegin = nullptr;
+  Shape.CoroEnds.clear();
+  Shape.CoroSizes.clear();
+  Shape.CoroSuspends.clear();
+
+  Shape.FrameTy = nullptr;
+  Shape.FramePtr = nullptr;
+  Shape.AllocaSpillBlock = nullptr;
+  Shape.ResumeSwitch = nullptr;
+  Shape.PromiseAlloca = nullptr;
+  Shape.HasFinalSuspend = false;
+}
+
+static CoroSaveInst *createCoroSave(CoroBeginInst *CoroBegin,
+                                    CoroSuspendInst *SuspendInst) {
+  Module *M = SuspendInst->getModule();
+  auto *Fn = Intrinsic::getDeclaration(M, Intrinsic::coro_save);
+  auto *SaveInst =
+      cast<CoroSaveInst>(CallInst::Create(Fn, CoroBegin, "", SuspendInst));
+  assert(!SuspendInst->getCoroSave());
+  SuspendInst->setArgOperand(0, SaveInst);
+  return SaveInst;
+}
+
+// Collect "interesting" coroutine intrinsics.
+void coro::Shape::buildFrom(Function &F) {
+  size_t FinalSuspendIndex = 0;
+  clear(*this);
+  SmallVector<CoroFrameInst *, 8> CoroFrames;
+  for (Instruction &I : instructions(F)) {
+    if (auto II = dyn_cast<IntrinsicInst>(&I)) {
+      switch (II->getIntrinsicID()) {
+      default:
+        continue;
+      case Intrinsic::coro_size:
+        CoroSizes.push_back(cast<CoroSizeInst>(II));
+        break;
+      case Intrinsic::coro_frame:
+        CoroFrames.push_back(cast<CoroFrameInst>(II));
+        break;
+      case Intrinsic::coro_suspend:
+        CoroSuspends.push_back(cast<CoroSuspendInst>(II));
+        if (CoroSuspends.back()->isFinal()) {
+          if (HasFinalSuspend)
+            report_fatal_error(
+              "Only one suspend point can be marked as final");
+          HasFinalSuspend = true;
+          FinalSuspendIndex = CoroSuspends.size() - 1;
+        }
+        break;
+      case Intrinsic::coro_begin: {
+        auto CB = cast<CoroBeginInst>(II);
+        if (CB->getId()->getInfo().isPreSplit()) {
+          if (CoroBegin)
+            report_fatal_error(
+                "coroutine should have exactly one defining @llvm.coro.begin");
+          CB->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
+          CB->addAttribute(AttributeSet::ReturnIndex, Attribute::NoAlias);
+          CB->removeAttribute(AttributeSet::FunctionIndex,
+                              Attribute::NoDuplicate);
+          CoroBegin = CB;
+        }
+        break;
+      }
+      case Intrinsic::coro_end:
+        CoroEnds.push_back(cast<CoroEndInst>(II));
+        if (CoroEnds.back()->isFallthrough()) {
+          // Make sure that the fallthrough coro.end is the first element in the
+          // CoroEnds vector.
+          if (CoroEnds.size() > 1) {
+            if (CoroEnds.front()->isFallthrough())
+              report_fatal_error(
+                  "Only one coro.end can be marked as fallthrough");
+            std::swap(CoroEnds.front(), CoroEnds.back());
+          }
+        }
+        break;
+      }
+    }
+  }
+
+  // If for some reason, we were not able to find coro.begin, bailout.
+  if (!CoroBegin) {
+    // Replace coro.frame which are supposed to be lowered to the result of
+    // coro.begin with undef.
+    auto *Undef = UndefValue::get(Type::getInt8PtrTy(F.getContext()));
+    for (CoroFrameInst *CF : CoroFrames) {
+      CF->replaceAllUsesWith(Undef);
+      CF->eraseFromParent();
+    }
+
+    // Replace all coro.suspend with undef and remove related coro.saves if
+    // present.
+    for (CoroSuspendInst *CS : CoroSuspends) {
+      CS->replaceAllUsesWith(UndefValue::get(CS->getType()));
+      CS->eraseFromParent();
+      if (auto *CoroSave = CS->getCoroSave())
+        CoroSave->eraseFromParent();
+    }
+
+    // Replace all coro.ends with unreachable instruction.
+    for (CoroEndInst *CE : CoroEnds)
+      changeToUnreachable(CE, /*UseLLVMTrap=*/false);
+
+    return;
+  }
+
+  // The coro.free intrinsic is always lowered to the result of coro.begin.
+  for (CoroFrameInst *CF : CoroFrames) {
+    CF->replaceAllUsesWith(CoroBegin);
+    CF->eraseFromParent();
+  }
+
+  // Canonicalize coro.suspend by inserting a coro.save if needed.
+  for (CoroSuspendInst *CS : CoroSuspends)
+    if (!CS->getCoroSave())
+      createCoroSave(CoroBegin, CS);
+
+  // Move final suspend to be the last element in the CoroSuspends vector.
+  if (HasFinalSuspend &&
+      FinalSuspendIndex != CoroSuspends.size() - 1)
+    std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back());
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
new file mode 100644
index 000000000000..b7d96007c24a
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/AlwaysInliner.cpp
@@ -0,0 +1,158 @@
+//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a custom inliner that handles only functions that
+// are marked as "always inline".
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/CallingConv.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline"
+
+PreservedAnalyses AlwaysInlinerPass::run(Module &M, ModuleAnalysisManager &) {
+  InlineFunctionInfo IFI;
+  SmallSetVector<CallSite, 16> Calls;
+  bool Changed = false;
+  SmallVector<Function *, 16> InlinedFunctions;
+  for (Function &F : M)
+    if (!F.isDeclaration() && F.hasFnAttribute(Attribute::AlwaysInline) &&
+        isInlineViable(F)) {
+      Calls.clear();
+
+      for (User *U : F.users())
+        if (auto CS = CallSite(U))
+          if (CS.getCalledFunction() == &F)
+            Calls.insert(CS);
+
+      for (CallSite CS : Calls)
+        // FIXME: We really shouldn't be able to fail to inline at this point!
+        // We should do something to log or check the inline failures here.
+        Changed |= InlineFunction(CS, IFI);
+
+      // Remember to try and delete this function afterward. This both avoids
+      // re-walking the rest of the module and avoids dealing with any iterator
+      // invalidation issues while deleting functions.
+      InlinedFunctions.push_back(&F);
+    }
+
+  // Remove any live functions.
+  erase_if(InlinedFunctions, [&](Function *F) {
+    F->removeDeadConstantUsers();
+    return !F->isDefTriviallyDead();
+  });
+
+  // Delete the non-comdat ones from the module and also from our vector.
+  auto NonComdatBegin = partition(
+      InlinedFunctions, [&](Function *F) { return F->hasComdat(); });
+  for (Function *F : make_range(NonComdatBegin, InlinedFunctions.end()))
+    M.getFunctionList().erase(F);
+  InlinedFunctions.erase(NonComdatBegin, InlinedFunctions.end());
+
+  if (!InlinedFunctions.empty()) {
+    // Now we just have the comdat functions. Filter out the ones whose comdats
+    // are not actually dead.
+    filterDeadComdatFunctions(M, InlinedFunctions);
+    // The remaining functions are actually dead.
+    for (Function *F : InlinedFunctions)
+      M.getFunctionList().erase(F);
+  }
+
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
+
+namespace {
+
+/// Inliner pass which only handles "always inline" functions.
+///
+/// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner
+/// base class to provide several facilities such as array alloca merging.
+class AlwaysInlinerLegacyPass : public LegacyInlinerBase {
+
+public:
+  AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) {
+    initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  AlwaysInlinerLegacyPass(bool InsertLifetime)
+      : LegacyInlinerBase(ID, InsertLifetime) {
+    initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  /// Main run interface method.  We override here to avoid calling skipSCC().
+  bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); }
+
+  static char ID; // Pass identification, replacement for typeid
+
+  InlineCost getInlineCost(CallSite CS) override;
+
+  using llvm::Pass::doFinalization;
+  bool doFinalization(CallGraph &CG) override {
+    return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true);
+  }
+};
+}
+
+char AlwaysInlinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline",
+                      "Inliner for always_inline functions", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline",
+                    "Inliner for always_inline functions", false, false)
+
+Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) {
+  return new AlwaysInlinerLegacyPass(InsertLifetime);
+}
+
+/// \brief Get the inline cost for the always-inliner.
+///
+/// The always inliner *only* handles functions which are marked with the
+/// attribute to force inlining. As such, it is dramatically simpler and avoids
+/// using the powerful (but expensive) inline cost analysis. Instead it uses
+/// a very simple and boring direct walk of the instructions looking for
+/// impossible-to-inline constructs.
+///
+/// Note, it would be possible to go to some lengths to cache the information
+/// computed here, but as we only expect to do this for relatively few and
+/// small functions which have the explicit attribute to force inlining, it is
+/// likely not worth it in practice.
+InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallSite CS) {
+  Function *Callee = CS.getCalledFunction();
+
+  // Only inline direct calls to functions with always-inline attributes
+  // that are viable for inlining. FIXME: We shouldn't even get here for
+  // declarations.
+  if (Callee && !Callee->isDeclaration() &&
+      CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
+    return InlineCost::getAlways();
+
+  return InlineCost::getNever();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index 0716a3a9cbe9..65b7bad3b1ed 100644
--- a/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -40,7 +40,6 @@
 #include "llvm/Analysis/CallGraphSCCPass.h"
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
@@ -177,8 +176,7 @@ static bool isDenselyPacked(Type *type, const DataLayout &DL) {
 
   // For homogenous sequential types, check for padding within members.
   if (SequentialType *seqTy = dyn_cast<SequentialType>(type))
-    return isa<PointerType>(seqTy) ||
-           isDenselyPacked(seqTy->getElementType(), DL);
+    return isDenselyPacked(seqTy->getElementType(), DL);
 
   // Check for padding within and between elements of a struct.
   StructType *StructTy = cast<StructType>(type);
@@ -375,8 +373,8 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) {
 
   unsigned ArgNo = Arg->getArgNo();
 
-  // Look at all call sites of the function.  At this pointer we know we only
-  // have direct callees.
+  // Look at all call sites of the function.  At this point we know we only have
+  // direct callees.
   for (User *U : Callee->users()) {
     CallSite CS(U);
     assert(CS && "Should only have direct calls!");
@@ -600,7 +598,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, bool isByValOrInAlloca,
 
   // Because there could be several/many load instructions, remember which
   // blocks we know to be transparent to the load.
-  SmallPtrSet<BasicBlock*, 16> TranspBlocks;
+  df_iterator_default_set<BasicBlock*, 16> TranspBlocks;
 
   for (LoadInst *Load : Loads) {
     // Check to see if the load is invalidated from the start of the block to
@@ -836,7 +834,10 @@ DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
                     Type::getInt64Ty(F->getContext()));
               Ops.push_back(ConstantInt::get(IdxTy, II));
               // Keep track of the type we're currently indexing.
-              ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
+              if (auto *ElPTy = dyn_cast<PointerType>(ElTy))
+                ElTy = ElPTy->getElementType();
+              else
+                ElTy = cast<CompositeType>(ElTy)->getTypeAtIndex(II);
             }
             // And create a GEP to extract those indices.
             V = GetElementPtrInst::Create(ArgIndex.first, V, Ops,
@@ -886,8 +887,8 @@ DoPromotion(Function *F, SmallPtrSetImpl<Argument *> &ArgsToPromote,
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(AttributeSet::get(New->getContext(),
                                                           AttributesVec));
-      if (cast<CallInst>(Call)->isTailCall())
-        cast<CallInst>(New)->setTailCall();
+      cast<CallInst>(New)->setTailCallKind(
+          cast<CallInst>(Call)->getTailCallKind());
     }
     New->setDebugLoc(Call->getDebugLoc());
     Args.clear();
diff --git a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
index 58731eaf6e30..ba2e60dee3bc 100644
--- a/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/CrossDSOCFI.cpp
@@ -155,7 +155,7 @@ bool CrossDSOCFI::runOnModule(Module &M) {
   return true;
 }
 
-PreservedAnalyses CrossDSOCFIPass::run(Module &M, AnalysisManager<Module> &AM) {
+PreservedAnalyses CrossDSOCFIPass::run(Module &M, ModuleAnalysisManager &AM) {
   CrossDSOCFI Impl;
   bool Changed = Impl.runOnModule(M);
   if (!Changed)
diff --git a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index c8c895b18796..1a5ed4692211 100644
--- a/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -190,8 +190,8 @@ bool DeadArgumentEliminationPass::DeleteDeadVarargs(Function &Fn) {
       New = CallInst::Create(NF, Args, OpBundles, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(PAL);
-      if (cast<CallInst>(Call)->isTailCall())
-        cast<CallInst>(New)->setTailCall();
+      cast<CallInst>(New)->setTailCallKind(
+          cast<CallInst>(Call)->getTailCallKind());
     }
     New->setDebugLoc(Call->getDebugLoc());
 
@@ -270,7 +270,7 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
 
   SmallVector<unsigned, 8> UnusedArgs;
   for (Argument &Arg : Fn.args()) {
-    if (Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
+    if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() && !Arg.hasByValOrInAllocaAttr())
       UnusedArgs.push_back(Arg.getArgNo());
   }
 
@@ -896,8 +896,8 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
       New = CallInst::Create(NF, Args, OpBundles, "", Call);
       cast<CallInst>(New)->setCallingConv(CS.getCallingConv());
       cast<CallInst>(New)->setAttributes(NewCallPAL);
-      if (cast<CallInst>(Call)->isTailCall())
-        cast<CallInst>(New)->setTailCall();
+      cast<CallInst>(New)->setTailCallKind(
+          cast<CallInst>(Call)->getTailCallKind());
     }
     New->setDebugLoc(Call->getDebugLoc());
 
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 787f4342831d..402a66552c24 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -42,6 +42,7 @@ using namespace llvm;
 STATISTIC(NumReadNone, "Number of functions marked readnone");
 STATISTIC(NumReadOnly, "Number of functions marked readonly");
 STATISTIC(NumNoCapture, "Number of arguments marked nocapture");
+STATISTIC(NumReturned, "Number of arguments marked returned");
 STATISTIC(NumReadNoneArg, "Number of arguments marked readnone");
 STATISTIC(NumReadOnlyArg, "Number of arguments marked readonly");
 STATISTIC(NumNoAlias, "Number of function returns marked noalias");
@@ -331,23 +332,16 @@ struct ArgumentUsesTracker : public CaptureTracker {
 
 namespace llvm {
 template <> struct GraphTraits<ArgumentGraphNode *> {
-  typedef ArgumentGraphNode NodeType;
   typedef ArgumentGraphNode *NodeRef;
   typedef SmallVectorImpl<ArgumentGraphNode *>::iterator ChildIteratorType;
 
-  static inline NodeType *getEntryNode(NodeType *A) { return A; }
-  static inline ChildIteratorType child_begin(NodeType *N) {
-    return N->Uses.begin();
-  }
-  static inline ChildIteratorType child_end(NodeType *N) {
-    return N->Uses.end();
-  }
+  static NodeRef getEntryNode(NodeRef A) { return A; }
+  static ChildIteratorType child_begin(NodeRef N) { return N->Uses.begin(); }
+  static ChildIteratorType child_end(NodeRef N) { return N->Uses.end(); }
 };
 template <>
 struct GraphTraits<ArgumentGraph *> : public GraphTraits<ArgumentGraphNode *> {
-  static NodeType *getEntryNode(ArgumentGraph *AG) {
-    return AG->getEntryNode();
-  }
+  static NodeRef getEntryNode(ArgumentGraph *AG) { return AG->getEntryNode(); }
   static ChildIteratorType nodes_begin(ArgumentGraph *AG) {
     return AG->begin();
   }
@@ -447,8 +441,8 @@ determinePointerReadAttrs(Argument *A,
       // to a operand bundle use, these cannot participate in the optimistic SCC
       // analysis.  Instead, we model the operand bundle uses as arguments in
       // call to a function external to the SCC.
-      if (!SCCNodes.count(&*std::next(F->arg_begin(), UseIndex)) ||
-          IsOperandBundleUse) {
+      if (IsOperandBundleUse ||
+          !SCCNodes.count(&*std::next(F->arg_begin(), UseIndex))) {
 
         // The accessors used on CallSite here do the right thing for calls and
         // invokes with operand bundles.
@@ -484,6 +478,59 @@ determinePointerReadAttrs(Argument *A,
   return IsRead ? Attribute::ReadOnly : Attribute::ReadNone;
 }
 
+/// Deduce returned attributes for the SCC.
+static bool addArgumentReturnedAttrs(const SCCNodeSet &SCCNodes) {
+  bool Changed = false;
+
+  AttrBuilder B;
+  B.addAttribute(Attribute::Returned);
+
+  // Check each function in turn, determining if an argument is always returned.
+  for (Function *F : SCCNodes) {
+    // We can infer and propagate function attributes only when we know that the
+    // definition we'll get at link time is *exactly* the definition we see now.
+    // For more details, see GlobalValue::mayBeDerefined.
+    if (!F->hasExactDefinition())
+      continue;
+
+    if (F->getReturnType()->isVoidTy())
+      continue;
+
+    // There is nothing to do if an argument is already marked as 'returned'.
+    if (any_of(F->args(),
+               [](const Argument &Arg) { return Arg.hasReturnedAttr(); }))
+      continue;
+
+    auto FindRetArg = [&]() -> Value * {
+      Value *RetArg = nullptr;
+      for (BasicBlock &BB : *F)
+        if (auto *Ret = dyn_cast<ReturnInst>(BB.getTerminator())) {
+          // Note that stripPointerCasts should look through functions with
+          // returned arguments.
+          Value *RetVal = Ret->getReturnValue()->stripPointerCasts();
+          if (!isa<Argument>(RetVal) || RetVal->getType() != F->getReturnType())
+            return nullptr;
+
+          if (!RetArg)
+            RetArg = RetVal;
+          else if (RetArg != RetVal)
+            return nullptr;
+        }
+
+      return RetArg;
+    };
+
+    if (Value *RetArg = FindRetArg()) {
+      auto *A = cast<Argument>(RetArg);
+      A->addAttr(AttributeSet::get(F->getContext(), A->getArgNo() + 1, B));
+      ++NumReturned;
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
 /// Deduce nocapture attributes for the SCC.
 static bool addArgumentAttrs(const SCCNodeSet &SCCNodes) {
   bool Changed = false;
@@ -726,7 +773,8 @@ static bool isFunctionMallocLike(Function *F, const SCCNodeSet &SCCNodes) {
           break;
         if (CS.getCalledFunction() && SCCNodes.count(CS.getCalledFunction()))
           break;
-      } // fall-through
+        LLVM_FALLTHROUGH;
+      }
       default:
         return false; // Did not come from an allocation.
       }
@@ -986,9 +1034,11 @@ static bool addNoRecurseAttrs(const SCCNodeSet &SCCNodes) {
 }
 
 PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
-                                                  CGSCCAnalysisManager &AM) {
+                                                  CGSCCAnalysisManager &AM,
+                                                  LazyCallGraph &CG,
+                                                  CGSCCUpdateResult &) {
   FunctionAnalysisManager &FAM =
-      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C).getManager();
+      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
 
   // We pass a lambda into functions to wire them up to the analysis manager
   // for getting function analyses.
@@ -1025,6 +1075,7 @@ PreservedAnalyses PostOrderFunctionAttrsPass::run(LazyCallGraph::SCC &C,
   }
 
   bool Changed = false;
+  Changed |= addArgumentReturnedAttrs(SCCNodes);
   Changed |= addReadAttrs(SCCNodes, AARGetter);
   Changed |= addArgumentAttrs(SCCNodes);
 
@@ -1044,7 +1095,8 @@ namespace {
 struct PostOrderFunctionAttrsLegacyPass : public CallGraphSCCPass {
   static char ID; // Pass identification, replacement for typeid
   PostOrderFunctionAttrsLegacyPass() : CallGraphSCCPass(ID) {
-    initializePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry());
+    initializePostOrderFunctionAttrsLegacyPassPass(
+        *PassRegistry::getPassRegistry());
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
@@ -1066,7 +1118,9 @@ INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
 INITIALIZE_PASS_END(PostOrderFunctionAttrsLegacyPass, "functionattrs",
                     "Deduce function attributes", false, false)
 
-Pass *llvm::createPostOrderFunctionAttrsLegacyPass() { return new PostOrderFunctionAttrsLegacyPass(); }
+Pass *llvm::createPostOrderFunctionAttrsLegacyPass() {
+  return new PostOrderFunctionAttrsLegacyPass();
+}
 
 template <typename AARGetterT>
 static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
@@ -1090,6 +1144,7 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
     SCCNodes.insert(F);
   }
 
+  Changed |= addArgumentReturnedAttrs(SCCNodes);
   Changed |= addReadAttrs(SCCNodes, AARGetter);
   Changed |= addArgumentAttrs(SCCNodes);
 
@@ -1127,7 +1182,8 @@ namespace {
 struct ReversePostOrderFunctionAttrsLegacyPass : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
   ReversePostOrderFunctionAttrsLegacyPass() : ModulePass(ID) {
-    initializeReversePostOrderFunctionAttrsLegacyPassPass(*PassRegistry::getPassRegistry());
+    initializeReversePostOrderFunctionAttrsLegacyPassPass(
+        *PassRegistry::getPassRegistry());
   }
 
   bool runOnModule(Module &M) override;
@@ -1216,10 +1272,17 @@ bool ReversePostOrderFunctionAttrsLegacyPass::runOnModule(Module &M) {
 }
 
 PreservedAnalyses
-ReversePostOrderFunctionAttrsPass::run(Module &M, AnalysisManager<Module> &AM) {
+ReversePostOrderFunctionAttrsPass::run(Module &M, ModuleAnalysisManager &AM) {
   auto &CG = AM.getResult<CallGraphAnalysis>(M);
 
   bool Changed = deduceFunctionAttributeInRPO(M, CG);
+
+  // CallGraphAnalysis holds AssertingVH and must be invalidated eagerly so
+  // that other passes don't delete stuff from under it.
+  // FIXME: We need to invalidate this to avoid PR28400. Is there a better
+  // solution?
+  AM.invalidate<CallGraphAnalysis>(M);
+
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
index c9d075e76325..6dd95f8dcd55 100644
--- a/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/FunctionImport.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Verifier.h"
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/Linker/Linker.h"
 #include "llvm/Object/IRObjectFile.h"
@@ -49,6 +50,22 @@ static cl::opt<float>
                                "`import-instr-limit` threshold by this factor "
                                "before processing newly imported functions"));
 
+static cl::opt<float> ImportHotInstrFactor(
+    "import-hot-evolution-factor", cl::init(1.0), cl::Hidden,
+    cl::value_desc("x"),
+    cl::desc("As we import functions called from hot callsite, multiply the "
+             "`import-instr-limit` threshold by this factor "
+             "before processing newly imported functions"));
+
+static cl::opt<float> ImportHotMultiplier(
+    "import-hot-multiplier", cl::init(3.0), cl::Hidden, cl::value_desc("x"),
+    cl::desc("Multiply the `import-instr-limit` threshold for hot callsites"));
+
+// FIXME: This multiplier was not really tuned up.
+static cl::opt<float> ImportColdMultiplier(
+    "import-cold-multiplier", cl::init(0), cl::Hidden, cl::value_desc("N"),
+    cl::desc("Multiply the `import-instr-limit` threshold for cold callsites"));
+
 static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
                                   cl::desc("Print imported functions"));
 
@@ -95,8 +112,9 @@ static bool canBeExternallyReferenced(const GlobalValueSummary &Summary) {
   if (!Summary.needsRenaming())
     return true;
 
-  if (Summary.hasSection())
-    // Can't rename a global that needs renaming if has a section.
+  if (Summary.noRename())
+    // Can't externally reference a global that needs renaming if has a section
+    // or is referenced from inline assembly, for example.
     return false;
 
   return true;
@@ -130,9 +148,17 @@ static bool eligibleForImport(const ModuleSummaryIndex &Index,
     // FIXME: we may be able to import it by copying it without promotion.
     return false;
 
+  // Don't import functions that are not viable to inline.
+  if (Summary.isNotViableToInline())
+    return false;
+
   // Check references (and potential calls) in the same module. If the current
   // value references a global that can't be externally referenced it is not
-  // eligible for import.
+  // eligible for import. First check the flag set when we have possible
+  // opaque references (e.g. inline asm calls), then check the call and
+  // reference sets.
+  if (Summary.hasInlineAsmMaybeReferencingInternal())
+    return false;
   bool AllRefsCanBeExternallyReferenced =
       llvm::all_of(Summary.refs(), [&](const ValueInfo &VI) {
         return canBeExternallyReferenced(Index, VI.getGUID());
@@ -210,63 +236,17 @@ static const GlobalValueSummary *selectCallee(GlobalValue::GUID GUID,
   return selectCallee(Index, CalleeSummaryList->second, Threshold);
 }
 
-/// Mark the global \p GUID as export by module \p ExportModulePath if found in
-/// this module. If it is a GlobalVariable, we also mark any referenced global
-/// in the current module as exported.
-static void exportGlobalInModule(const ModuleSummaryIndex &Index,
-                                 StringRef ExportModulePath,
-                                 GlobalValue::GUID GUID,
-                                 FunctionImporter::ExportSetTy &ExportList) {
-  auto FindGlobalSummaryInModule =
-      [&](GlobalValue::GUID GUID) -> GlobalValueSummary *{
-        auto SummaryList = Index.findGlobalValueSummaryList(GUID);
-        if (SummaryList == Index.end())
-          // This global does not have a summary, it is not part of the ThinLTO
-          // process
-          return nullptr;
-        auto SummaryIter = llvm::find_if(
-            SummaryList->second,
-            [&](const std::unique_ptr<GlobalValueSummary> &Summary) {
-              return Summary->modulePath() == ExportModulePath;
-            });
-        if (SummaryIter == SummaryList->second.end())
-          return nullptr;
-        return SummaryIter->get();
-      };
-
-  auto *Summary = FindGlobalSummaryInModule(GUID);
-  if (!Summary)
-    return;
-  // We found it in the current module, mark as exported
-  ExportList.insert(GUID);
-
-  auto GVS = dyn_cast<GlobalVarSummary>(Summary);
-  if (!GVS)
-    return;
-  // FunctionImportGlobalProcessing::doPromoteLocalToGlobal() will always
-  // trigger importing  the initializer for `constant unnamed addr` globals that
-  // are referenced. We conservatively export all the referenced symbols for
-  // every global to workaround this, so that the ExportList is accurate.
-  // FIXME: with a "isConstant" flag in the summary we could be more targetted.
-  for (auto &Ref : GVS->refs()) {
-    auto GUID = Ref.getGUID();
-    auto *RefSummary = FindGlobalSummaryInModule(GUID);
-    if (RefSummary)
-      // Found a ref in the current module, mark it as exported
-      ExportList.insert(GUID);
-  }
-}
-
-using EdgeInfo = std::pair<const FunctionSummary *, unsigned /* Threshold */>;
+using EdgeInfo = std::tuple<const FunctionSummary *, unsigned /* Threshold */,
+                            GlobalValue::GUID>;
 
 /// Compute the list of functions to import for a given caller. Mark these
 /// imported functions and the symbols they reference in their source module as
 /// exported from their source module.
 static void computeImportForFunction(
     const FunctionSummary &Summary, const ModuleSummaryIndex &Index,
-    unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
+    const unsigned Threshold, const GVSummaryMapTy &DefinedGVSummaries,
     SmallVectorImpl<EdgeInfo> &Worklist,
-    FunctionImporter::ImportMapTy &ImportsForModule,
+    FunctionImporter::ImportMapTy &ImportList,
     StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
   for (auto &Edge : Summary.calls()) {
     auto GUID = Edge.first.getGUID();
@@ -277,7 +257,18 @@ static void computeImportForFunction(
       continue;
     }
 
-    auto *CalleeSummary = selectCallee(GUID, Threshold, Index);
+    auto GetBonusMultiplier = [](CalleeInfo::HotnessType Hotness) -> float {
+      if (Hotness == CalleeInfo::HotnessType::Hot)
+        return ImportHotMultiplier;
+      if (Hotness == CalleeInfo::HotnessType::Cold)
+        return ImportColdMultiplier;
+      return 1.0;
+    };
+
+    const auto NewThreshold =
+        Threshold * GetBonusMultiplier(Edge.second.Hotness);
+
+    auto *CalleeSummary = selectCallee(GUID, NewThreshold, Index);
     if (!CalleeSummary) {
       DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
       continue;
@@ -293,40 +284,59 @@ static void computeImportForFunction(
     } else
       ResolvedCalleeSummary = cast<FunctionSummary>(CalleeSummary);
 
-    assert(ResolvedCalleeSummary->instCount() <= Threshold &&
+    assert(ResolvedCalleeSummary->instCount() <= NewThreshold &&
            "selectCallee() didn't honor the threshold");
 
+    auto GetAdjustedThreshold = [](unsigned Threshold, bool IsHotCallsite) {
+      // Adjust the threshold for next level of imported functions.
+      // The threshold is different for hot callsites because we can then
+      // inline chains of hot calls.
+      if (IsHotCallsite)
+        return Threshold * ImportHotInstrFactor;
+      return Threshold * ImportInstrFactor;
+    };
+
+    bool IsHotCallsite = Edge.second.Hotness == CalleeInfo::HotnessType::Hot;
+    const auto AdjThreshold = GetAdjustedThreshold(Threshold, IsHotCallsite);
+
     auto ExportModulePath = ResolvedCalleeSummary->modulePath();
-    auto &ProcessedThreshold = ImportsForModule[ExportModulePath][GUID];
+    auto &ProcessedThreshold = ImportList[ExportModulePath][GUID];
     /// Since the traversal of the call graph is DFS, we can revisit a function
     /// a second time with a higher threshold. In this case, it is added back to
     /// the worklist with the new threshold.
-    if (ProcessedThreshold && ProcessedThreshold >= Threshold) {
+    if (ProcessedThreshold && ProcessedThreshold >= AdjThreshold) {
       DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
                    << ProcessedThreshold << "\n");
       continue;
     }
+    bool PreviouslyImported = ProcessedThreshold != 0;
     // Mark this function as imported in this module, with the current Threshold
-    ProcessedThreshold = Threshold;
+    ProcessedThreshold = AdjThreshold;
 
     // Make exports in the source module.
     if (ExportLists) {
       auto &ExportList = (*ExportLists)[ExportModulePath];
       ExportList.insert(GUID);
-      // Mark all functions and globals referenced by this function as exported
-      // to the outside if they are defined in the same source module.
-      for (auto &Edge : ResolvedCalleeSummary->calls()) {
-        auto CalleeGUID = Edge.first.getGUID();
-        exportGlobalInModule(Index, ExportModulePath, CalleeGUID, ExportList);
-      }
-      for (auto &Ref : ResolvedCalleeSummary->refs()) {
-        auto GUID = Ref.getGUID();
-        exportGlobalInModule(Index, ExportModulePath, GUID, ExportList);
+      if (!PreviouslyImported) {
+        // This is the first time this function was exported from its source
+        // module, so mark all functions and globals it references as exported
+        // to the outside if they are defined in the same source module.
+        // For efficiency, we unconditionally add all the referenced GUIDs
+        // to the ExportList for this module, and will prune out any not
+        // defined in the module later in a single pass.
+        for (auto &Edge : ResolvedCalleeSummary->calls()) {
+          auto CalleeGUID = Edge.first.getGUID();
+          ExportList.insert(CalleeGUID);
+        }
+        for (auto &Ref : ResolvedCalleeSummary->refs()) {
+          auto GUID = Ref.getGUID();
+          ExportList.insert(GUID);
+        }
       }
     }
 
     // Insert the newly imported function to the worklist.
-    Worklist.push_back(std::make_pair(ResolvedCalleeSummary, Threshold));
+    Worklist.emplace_back(ResolvedCalleeSummary, AdjThreshold, GUID);
   }
 }
 
@@ -335,7 +345,7 @@ static void computeImportForFunction(
 /// another module (that may require promotion).
 static void ComputeImportForModule(
     const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
-    FunctionImporter::ImportMapTy &ImportsForModule,
+    FunctionImporter::ImportMapTy &ImportList,
     StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
   // Worklist contains the list of function imported in this module, for which
   // we will analyse the callees and may import further down the callgraph.
@@ -353,21 +363,26 @@ static void ComputeImportForModule(
       continue;
     DEBUG(dbgs() << "Initalize import for " << GVSummary.first << "\n");
     computeImportForFunction(*FuncSummary, Index, ImportInstrLimit,
-                             DefinedGVSummaries, Worklist, ImportsForModule,
+                             DefinedGVSummaries, Worklist, ImportList,
                              ExportLists);
   }
 
+  // Process the newly imported functions and add callees to the worklist.
   while (!Worklist.empty()) {
     auto FuncInfo = Worklist.pop_back_val();
-    auto *Summary = FuncInfo.first;
-    auto Threshold = FuncInfo.second;
-
-    // Process the newly imported functions and add callees to the worklist.
-    // Adjust the threshold
-    Threshold = Threshold * ImportInstrFactor;
+    auto *Summary = std::get<0>(FuncInfo);
+    auto Threshold = std::get<1>(FuncInfo);
+    auto GUID = std::get<2>(FuncInfo);
+
+    // Check if we later added this summary with a higher threshold.
+    // If so, skip this entry.
+    auto ExportModulePath = Summary->modulePath();
+    auto &LatestProcessedThreshold = ImportList[ExportModulePath][GUID];
+    if (LatestProcessedThreshold > Threshold)
+      continue;
 
     computeImportForFunction(*Summary, Index, Threshold, DefinedGVSummaries,
-                             Worklist, ImportsForModule, ExportLists);
+                             Worklist, ImportList, ExportLists);
   }
 }
 
@@ -381,13 +396,29 @@ void llvm::ComputeCrossModuleImport(
     StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
   // For each module that has function defined, compute the import/export lists.
   for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
-    auto &ImportsForModule = ImportLists[DefinedGVSummaries.first()];
+    auto &ImportList = ImportLists[DefinedGVSummaries.first()];
     DEBUG(dbgs() << "Computing import for Module '"
                  << DefinedGVSummaries.first() << "'\n");
-    ComputeImportForModule(DefinedGVSummaries.second, Index, ImportsForModule,
+    ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
                            &ExportLists);
   }
 
+  // When computing imports we added all GUIDs referenced by anything
+  // imported from the module to its ExportList. Now we prune each ExportList
+  // of any not defined in that module. This is more efficient than checking
+  // while computing imports because some of the summary lists may be long
+  // due to linkonce (comdat) copies.
+  for (auto &ELI : ExportLists) {
+    const auto &DefinedGVSummaries =
+        ModuleToDefinedGVSummaries.lookup(ELI.first());
+    for (auto EI = ELI.second.begin(); EI != ELI.second.end();) {
+      if (!DefinedGVSummaries.count(*EI))
+        EI = ELI.second.erase(EI);
+      else
+        ++EI;
+    }
+  }
+
 #ifndef NDEBUG
   DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
                << " modules:\n");
@@ -436,40 +467,35 @@ void llvm::ComputeCrossModuleImportForModule(
 void llvm::gatherImportedSummariesForModule(
     StringRef ModulePath,
     const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
-    const StringMap<FunctionImporter::ImportMapTy> &ImportLists,
+    const FunctionImporter::ImportMapTy &ImportList,
     std::map<std::string, GVSummaryMapTy> &ModuleToSummariesForIndex) {
   // Include all summaries from the importing module.
   ModuleToSummariesForIndex[ModulePath] =
       ModuleToDefinedGVSummaries.lookup(ModulePath);
-  auto ModuleImports = ImportLists.find(ModulePath);
-  if (ModuleImports != ImportLists.end()) {
-    // Include summaries for imports.
-    for (auto &ILI : ModuleImports->second) {
-      auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()];
-      const auto &DefinedGVSummaries =
-          ModuleToDefinedGVSummaries.lookup(ILI.first());
-      for (auto &GI : ILI.second) {
-        const auto &DS = DefinedGVSummaries.find(GI.first);
-        assert(DS != DefinedGVSummaries.end() &&
-               "Expected a defined summary for imported global value");
-        SummariesForIndex[GI.first] = DS->second;
-      }
+  // Include summaries for imports.
+  for (auto &ILI : ImportList) {
+    auto &SummariesForIndex = ModuleToSummariesForIndex[ILI.first()];
+    const auto &DefinedGVSummaries =
+        ModuleToDefinedGVSummaries.lookup(ILI.first());
+    for (auto &GI : ILI.second) {
+      const auto &DS = DefinedGVSummaries.find(GI.first);
+      assert(DS != DefinedGVSummaries.end() &&
+             "Expected a defined summary for imported global value");
+      SummariesForIndex[GI.first] = DS->second;
     }
   }
 }
 
 /// Emit the files \p ModulePath will import from into \p OutputFilename.
-std::error_code llvm::EmitImportsFiles(
-    StringRef ModulePath, StringRef OutputFilename,
-    const StringMap<FunctionImporter::ImportMapTy> &ImportLists) {
-  auto ModuleImports = ImportLists.find(ModulePath);
+std::error_code
+llvm::EmitImportsFiles(StringRef ModulePath, StringRef OutputFilename,
+                       const FunctionImporter::ImportMapTy &ModuleImports) {
   std::error_code EC;
   raw_fd_ostream ImportsOS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
   if (EC)
     return EC;
-  if (ModuleImports != ImportLists.end())
-    for (auto &ILI : ModuleImports->second)
-      ImportsOS << ILI.first() << "\n";
+  for (auto &ILI : ModuleImports)
+    ImportsOS << ILI.first() << "\n";
   return std::error_code();
 }
 
@@ -489,6 +515,15 @@ void llvm::thinLTOResolveWeakForLinkerModule(
     DEBUG(dbgs() << "ODR fixing up linkage for `" << GV.getName() << "` from "
                  << GV.getLinkage() << " to " << NewLinkage << "\n");
     GV.setLinkage(NewLinkage);
+    // Remove functions converted to available_externally from comdats,
+    // as this is a declaration for the linker, and will be dropped eventually.
+    // It is illegal for comdats to contain declarations.
+    auto *GO = dyn_cast_or_null<GlobalObject>(&GV);
+    if (GO && GO->isDeclarationForLinker() && GO->hasComdat()) {
+      assert(GO->hasAvailableExternallyLinkage() &&
+             "Expected comdat on definition (possibly available external)");
+      GO->setComdat(nullptr);
+    }
   };
 
   // Process functions and global now
@@ -506,7 +541,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
   // Parse inline ASM and collect the list of symbols that are not defined in
   // the current module.
   StringSet<> AsmUndefinedRefs;
-  object::IRObjectFile::CollectAsmUndefinedRefs(
+  ModuleSymbolTable::CollectAsmSymbols(
       Triple(TheModule.getTargetTriple()), TheModule.getModuleInlineAsm(),
       [&AsmUndefinedRefs](StringRef Name, object::BasicSymbolRef::Flags Flags) {
         if (Flags & object::BasicSymbolRef::SF_Undefined)
@@ -561,7 +596,7 @@ void llvm::thinLTOInternalizeModule(Module &TheModule,
 // Automatically import functions in Module \p DestModule based on the summaries
 // index.
 //
-bool FunctionImporter::importFunctions(
+Expected<bool> FunctionImporter::importFunctions(
     Module &DestModule, const FunctionImporter::ImportMapTy &ImportList,
     bool ForceImportReferencedDiscardableSymbols) {
   DEBUG(dbgs() << "Starting import for Module "
@@ -579,13 +614,17 @@ bool FunctionImporter::importFunctions(
     // Get the module for the import
     const auto &FunctionsToImportPerModule = ImportList.find(Name);
     assert(FunctionsToImportPerModule != ImportList.end());
-    std::unique_ptr<Module> SrcModule = ModuleLoader(Name);
+    Expected<std::unique_ptr<Module>> SrcModuleOrErr = ModuleLoader(Name);
+    if (!SrcModuleOrErr)
+      return SrcModuleOrErr.takeError();
+    std::unique_ptr<Module> SrcModule = std::move(*SrcModuleOrErr);
     assert(&DestModule.getContext() == &SrcModule->getContext() &&
            "Context mismatch");
 
     // If modules were created with lazy metadata loading, materialize it
     // now, before linking it (otherwise this will be a noop).
-    SrcModule->materializeMetadata();
+    if (Error Err = SrcModule->materializeMetadata())
+      return std::move(Err);
     UpgradeDebugInfo(*SrcModule);
 
     auto &ImportGUIDs = FunctionsToImportPerModule->second;
@@ -600,7 +639,8 @@ bool FunctionImporter::importFunctions(
                    << " " << F.getName() << " from "
                    << SrcModule->getSourceFileName() << "\n");
       if (Import) {
-        F.materialize();
+        if (Error Err = F.materialize())
+          return std::move(Err);
         if (EnableImportMetadata) {
           // Add 'thinlto_src_module' metadata for statistics and debugging.
           F.setMetadata(
@@ -622,7 +662,8 @@ bool FunctionImporter::importFunctions(
                    << " " << GV.getName() << " from "
                    << SrcModule->getSourceFileName() << "\n");
       if (Import) {
-        GV.materialize();
+        if (Error Err = GV.materialize())
+          return std::move(Err);
         GlobalsToImport.insert(&GV);
       }
     }
@@ -648,9 +689,11 @@ bool FunctionImporter::importFunctions(
                        << " " << GO->getName() << " from "
                        << SrcModule->getSourceFileName() << "\n");
 #endif
-        GO->materialize();
+        if (Error Err = GO->materialize())
+          return std::move(Err);
         GlobalsToImport.insert(GO);
-        GA.materialize();
+        if (Error Err = GA.materialize())
+          return std::move(Err);
         GlobalsToImport.insert(&GA);
       }
     }
@@ -689,106 +732,94 @@ static cl::opt<std::string>
     SummaryFile("summary-file",
                 cl::desc("The summary file to use for function importing."));
 
-static void diagnosticHandler(const DiagnosticInfo &DI) {
-  raw_ostream &OS = errs();
-  DiagnosticPrinterRawOStream DP(OS);
-  DI.print(DP);
-  OS << '\n';
-}
+static bool doImportingForModule(Module &M) {
+  if (SummaryFile.empty())
+    report_fatal_error("error: -function-import requires -summary-file\n");
+  Expected<std::unique_ptr<ModuleSummaryIndex>> IndexPtrOrErr =
+      getModuleSummaryIndexForFile(SummaryFile);
+  if (!IndexPtrOrErr) {
+    logAllUnhandledErrors(IndexPtrOrErr.takeError(), errs(),
+                          "Error loading file '" + SummaryFile + "': ");
+    return false;
+  }
+  std::unique_ptr<ModuleSummaryIndex> Index = std::move(*IndexPtrOrErr);
+
+  // First step is collecting the import list.
+  FunctionImporter::ImportMapTy ImportList;
+  ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
+                                    ImportList);
+
+  // Conservatively mark all internal values as promoted. This interface is
+  // only used when doing importing via the function importing pass. The pass
+  // is only enabled when testing importing via the 'opt' tool, which does
+  // not do the ThinLink that would normally determine what values to promote.
+  for (auto &I : *Index) {
+    for (auto &S : I.second) {
+      if (GlobalValue::isLocalLinkage(S->linkage()))
+        S->setLinkage(GlobalValue::ExternalLinkage);
+    }
+  }
 
-/// Parse the summary index out of an IR file and return the summary
-/// index object if found, or nullptr if not.
-static std::unique_ptr<ModuleSummaryIndex> getModuleSummaryIndexForFile(
-    StringRef Path, std::string &Error,
-    const DiagnosticHandlerFunction &DiagnosticHandler) {
-  std::unique_ptr<MemoryBuffer> Buffer;
-  ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
-      MemoryBuffer::getFile(Path);
-  if (std::error_code EC = BufferOrErr.getError()) {
-    Error = EC.message();
-    return nullptr;
+  // Next we need to promote to global scope and rename any local values that
+  // are potentially exported to other modules.
+  if (renameModuleForThinLTO(M, *Index, nullptr)) {
+    errs() << "Error renaming module\n";
+    return false;
   }
-  Buffer = std::move(BufferOrErr.get());
-  ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
-      object::ModuleSummaryIndexObjectFile::create(Buffer->getMemBufferRef(),
-                                                   DiagnosticHandler);
-  if (std::error_code EC = ObjOrErr.getError()) {
-    Error = EC.message();
-    return nullptr;
+
+  // Perform the import now.
+  auto ModuleLoader = [&M](StringRef Identifier) {
+    return loadFile(Identifier, M.getContext());
+  };
+  FunctionImporter Importer(*Index, ModuleLoader);
+  Expected<bool> Result = Importer.importFunctions(
+      M, ImportList, !DontForceImportReferencedDiscardableSymbols);
+
+  // FIXME: Probably need to propagate Errors through the pass manager.
+  if (!Result) {
+    logAllUnhandledErrors(Result.takeError(), errs(),
+                          "Error importing module: ");
+    return false;
   }
-  return (*ObjOrErr)->takeIndex();
+
+  return *Result;
 }
 
 namespace {
 /// Pass that performs cross-module function import provided a summary file.
-class FunctionImportPass : public ModulePass {
-  /// Optional module summary index to use for importing, otherwise
-  /// the summary-file option must be specified.
-  const ModuleSummaryIndex *Index;
-
+class FunctionImportLegacyPass : public ModulePass {
 public:
   /// Pass identification, replacement for typeid
   static char ID;
 
   /// Specify pass name for debug output
-  const char *getPassName() const override { return "Function Importing"; }
+  StringRef getPassName() const override { return "Function Importing"; }
 
-  explicit FunctionImportPass(const ModuleSummaryIndex *Index = nullptr)
-      : ModulePass(ID), Index(Index) {}
+  explicit FunctionImportLegacyPass() : ModulePass(ID) {}
 
   bool runOnModule(Module &M) override {
     if (skipModule(M))
       return false;
 
-    if (SummaryFile.empty() && !Index)
-      report_fatal_error("error: -function-import requires -summary-file or "
-                         "file from frontend\n");
-    std::unique_ptr<ModuleSummaryIndex> IndexPtr;
-    if (!SummaryFile.empty()) {
-      if (Index)
-        report_fatal_error("error: -summary-file and index from frontend\n");
-      std::string Error;
-      IndexPtr =
-          getModuleSummaryIndexForFile(SummaryFile, Error, diagnosticHandler);
-      if (!IndexPtr) {
-        errs() << "Error loading file '" << SummaryFile << "': " << Error
-               << "\n";
-        return false;
-      }
-      Index = IndexPtr.get();
-    }
-
-    // First step is collecting the import list.
-    FunctionImporter::ImportMapTy ImportList;
-    ComputeCrossModuleImportForModule(M.getModuleIdentifier(), *Index,
-                                      ImportList);
-
-    // Next we need to promote to global scope and rename any local values that
-    // are potentially exported to other modules.
-    if (renameModuleForThinLTO(M, *Index, nullptr)) {
-      errs() << "Error renaming module\n";
-      return false;
-    }
-
-    // Perform the import now.
-    auto ModuleLoader = [&M](StringRef Identifier) {
-      return loadFile(Identifier, M.getContext());
-    };
-    FunctionImporter Importer(*Index, ModuleLoader);
-    return Importer.importFunctions(
-        M, ImportList, !DontForceImportReferencedDiscardableSymbols);
+    return doImportingForModule(M);
   }
 };
 } // anonymous namespace
 
-char FunctionImportPass::ID = 0;
-INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
-                      "Summary Based Function Import", false, false)
-INITIALIZE_PASS_END(FunctionImportPass, "function-import",
-                    "Summary Based Function Import", false, false)
+PreservedAnalyses FunctionImportPass::run(Module &M,
+                                          ModuleAnalysisManager &AM) {
+  if (!doImportingForModule(M))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+char FunctionImportLegacyPass::ID = 0;
+INITIALIZE_PASS(FunctionImportLegacyPass, "function-import",
+                "Summary Based Function Import", false, false)
 
 namespace llvm {
-Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr) {
-  return new FunctionImportPass(Index);
+Pass *createFunctionImportPass() {
+  return new FunctionImportLegacyPass();
 }
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
index 4c74698a1b61..7a04de3d12db 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp
@@ -162,45 +162,29 @@ PreservedAnalyses GlobalDCEPass::run(Module &M, ModuleAnalysisManager &) {
       GIF.setResolver(nullptr);
     }
 
-  if (!DeadFunctions.empty()) {
-    // Now that all interferences have been dropped, delete the actual objects
-    // themselves.
-    for (Function *F : DeadFunctions) {
-      RemoveUnusedGlobalValue(*F);
-      M.getFunctionList().erase(F);
-    }
-    NumFunctions += DeadFunctions.size();
+  // Now that all interferences have been dropped, delete the actual objects
+  // themselves.
+  auto EraseUnusedGlobalValue = [&](GlobalValue *GV) {
+    RemoveUnusedGlobalValue(*GV);
+    GV->eraseFromParent();
     Changed = true;
-  }
+  };
 
-  if (!DeadGlobalVars.empty()) {
-    for (GlobalVariable *GV : DeadGlobalVars) {
-      RemoveUnusedGlobalValue(*GV);
-      M.getGlobalList().erase(GV);
-    }
-    NumVariables += DeadGlobalVars.size();
-    Changed = true;
-  }
+  NumFunctions += DeadFunctions.size();
+  for (Function *F : DeadFunctions)
+    EraseUnusedGlobalValue(F);
 
-  // Now delete any dead aliases.
-  if (!DeadAliases.empty()) {
-    for (GlobalAlias *GA : DeadAliases) {
-      RemoveUnusedGlobalValue(*GA);
-      M.getAliasList().erase(GA);
-    }
-    NumAliases += DeadAliases.size();
-    Changed = true;
-  }
+  NumVariables += DeadGlobalVars.size();
+  for (GlobalVariable *GV : DeadGlobalVars)
+    EraseUnusedGlobalValue(GV);
 
-  // Now delete any dead aliases.
-  if (!DeadIFuncs.empty()) {
-    for (GlobalIFunc *GIF : DeadIFuncs) {
-      RemoveUnusedGlobalValue(*GIF);
-      M.getIFuncList().erase(GIF);
-    }
-    NumIFuncs += DeadIFuncs.size();
-    Changed = true;
-  }
+  NumAliases += DeadAliases.size();
+  for (GlobalAlias *GA : DeadAliases)
+    EraseUnusedGlobalValue(GA);
+
+  NumIFuncs += DeadIFuncs.size();
+  for (GlobalIFunc *GIF : DeadIFuncs)
+    EraseUnusedGlobalValue(GIF);
 
   // Make sure that all memory is released
   AliveGlobals.clear();
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index 99b12d4db0d0..5b0d5e3bc01e 100644
--- a/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -371,14 +371,14 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
   ++GEPI;  // Skip over the pointer index.
 
   // If this is a use of an array allocation, do a bit more checking for sanity.
-  if (ArrayType *AT = dyn_cast<ArrayType>(*GEPI)) {
-    uint64_t NumElements = AT->getNumElements();
+  if (GEPI.isSequential()) {
     ConstantInt *Idx = cast<ConstantInt>(U->getOperand(2));
 
     // Check to make sure that index falls within the array.  If not,
     // something funny is going on, so we won't do the optimization.
     //
-    if (Idx->getZExtValue() >= NumElements)
+    if (GEPI.isBoundedSequential() &&
+        Idx->getZExtValue() >= GEPI.getSequentialNumElements())
       return false;
 
     // We cannot scalar repl this level of the array unless any array
@@ -391,19 +391,13 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
     for (++GEPI; // Skip array index.
          GEPI != E;
          ++GEPI) {
-      uint64_t NumElements;
-      if (ArrayType *SubArrayTy = dyn_cast<ArrayType>(*GEPI))
-        NumElements = SubArrayTy->getNumElements();
-      else if (VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
-        NumElements = SubVectorTy->getNumElements();
-      else {
-        assert((*GEPI)->isStructTy() &&
-               "Indexed GEP type is not array, vector, or struct!");
+      if (GEPI.isStruct())
         continue;
-      }
 
       ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPI.getOperand());
-      if (!IdxVal || IdxVal->getZExtValue() >= NumElements)
+      if (!IdxVal ||
+          (GEPI.isBoundedSequential() &&
+           IdxVal->getZExtValue() >= GEPI.getSequentialNumElements()))
         return false;
     }
   }
@@ -473,12 +467,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const DataLayout &DL) {
         NGV->setAlignment(NewAlign);
     }
   } else if (SequentialType *STy = dyn_cast<SequentialType>(Ty)) {
-    unsigned NumElements = 0;
-    if (ArrayType *ATy = dyn_cast<ArrayType>(STy))
-      NumElements = ATy->getNumElements();
-    else
-      NumElements = cast<VectorType>(STy)->getNumElements();
-
+    unsigned NumElements = STy->getNumElements();
     if (NumElements > 16 && GV->hasNUsesOrMore(16))
       return nullptr; // It's not worth it.
     NewGlobals.reserve(NumElements);
@@ -1653,7 +1642,7 @@ static bool deleteIfDead(GlobalValue &GV,
                          SmallSet<const Comdat *, 8> &NotDiscardableComdats) {
   GV.removeDeadConstantUsers();
 
-  if (!GV.isDiscardableIfUnused())
+  if (!GV.isDiscardableIfUnused() && !GV.isDeclaration())
     return false;
 
   if (const Comdat *C = GV.getComdat())
@@ -1662,7 +1651,7 @@ static bool deleteIfDead(GlobalValue &GV,
 
   bool Dead;
   if (auto *F = dyn_cast<Function>(&GV))
-    Dead = F->isDefTriviallyDead();
+    Dead = (F->isDeclaration() && F->use_empty()) || F->isDefTriviallyDead();
   else
     Dead = GV.use_empty();
   if (!Dead)
@@ -1737,7 +1726,7 @@ static bool isPointerValueDeadOnEntryToFunction(
 
   for (auto *L : Loads) {
     auto *LTy = L->getType();
-    if (!std::any_of(Stores.begin(), Stores.end(), [&](StoreInst *S) {
+    if (none_of(Stores, [&](const StoreInst *S) {
           auto *STy = S->getValueOperand()->getType();
           // The load is only dominated by the store if DomTree says so
           // and the number of bits loaded in L is less than or equal to
@@ -2079,10 +2068,10 @@ OptimizeGlobalVars(Module &M, TargetLibraryInfo *TLI,
       GV->setLinkage(GlobalValue::InternalLinkage);
     // Simplify the initializer.
     if (GV->hasInitializer())
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(GV->getInitializer())) {
+      if (auto *C = dyn_cast<Constant>(GV->getInitializer())) {
         auto &DL = M.getDataLayout();
-        Constant *New = ConstantFoldConstantExpression(CE, DL, TLI);
-        if (New && New != CE)
+        Constant *New = ConstantFoldConstant(C, DL, TLI);
+        if (New && New != C)
           GV->setInitializer(New);
       }
 
@@ -2125,12 +2114,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
 
   ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
   SequentialType *InitTy = cast<SequentialType>(Init->getType());
-
-  uint64_t NumElts;
-  if (ArrayType *ATy = dyn_cast<ArrayType>(InitTy))
-    NumElts = ATy->getNumElements();
-  else
-    NumElts = InitTy->getVectorNumElements();
+  uint64_t NumElts = InitTy->getNumElements();
 
   // Break up the array into elements.
   for (uint64_t i = 0, e = NumElts; i != e; ++i)
@@ -2565,7 +2549,7 @@ static bool optimizeGlobalsInModule(
   return Changed;
 }
 
-PreservedAnalyses GlobalOptPass::run(Module &M, AnalysisManager<Module> &AM) {
+PreservedAnalyses GlobalOptPass::run(Module &M, ModuleAnalysisManager &AM) {
     auto &DL = M.getDataLayout();
     auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
     auto &FAM =
diff --git a/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp b/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp
new file mode 100644
index 000000000000..bbbd096e89c0
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/GlobalSplit.cpp
@@ -0,0 +1,171 @@
+//===- GlobalSplit.cpp - global variable splitter -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass uses inrange annotations on GEP indices to split globals where
+// beneficial. Clang currently attaches these annotations to references to
+// virtual table globals under the Itanium ABI for the benefit of the
+// whole-program virtual call optimization and control flow integrity passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+
+#include <set>
+
+using namespace llvm;
+
+namespace {
+
+bool splitGlobal(GlobalVariable &GV) {
+  // If the address of the global is taken outside of the module, we cannot
+  // apply this transformation.
+  if (!GV.hasLocalLinkage())
+    return false;
+
+  // We currently only know how to split ConstantStructs.
+  auto *Init = dyn_cast_or_null<ConstantStruct>(GV.getInitializer());
+  if (!Init)
+    return false;
+
+  // Verify that each user of the global is an inrange getelementptr constant.
+  // From this it follows that any loads from or stores to that global must use
+  // a pointer derived from an inrange getelementptr constant, which is
+  // sufficient to allow us to apply the splitting transform.
+  for (User *U : GV.users()) {
+    if (!isa<Constant>(U))
+      return false;
+
+    auto *GEP = dyn_cast<GEPOperator>(U);
+    if (!GEP || !GEP->getInRangeIndex() || *GEP->getInRangeIndex() != 1 ||
+        !isa<ConstantInt>(GEP->getOperand(1)) ||
+        !cast<ConstantInt>(GEP->getOperand(1))->isZero() ||
+        !isa<ConstantInt>(GEP->getOperand(2)))
+      return false;
+  }
+
+  SmallVector<MDNode *, 2> Types;
+  GV.getMetadata(LLVMContext::MD_type, Types);
+
+  const DataLayout &DL = GV.getParent()->getDataLayout();
+  const StructLayout *SL = DL.getStructLayout(Init->getType());
+
+  IntegerType *Int32Ty = Type::getInt32Ty(GV.getContext());
+
+  std::vector<GlobalVariable *> SplitGlobals(Init->getNumOperands());
+  for (unsigned I = 0; I != Init->getNumOperands(); ++I) {
+    // Build a global representing this split piece.
+    auto *SplitGV =
+        new GlobalVariable(*GV.getParent(), Init->getOperand(I)->getType(),
+                           GV.isConstant(), GlobalValue::PrivateLinkage,
+                           Init->getOperand(I), GV.getName() + "." + utostr(I));
+    SplitGlobals[I] = SplitGV;
+
+    unsigned SplitBegin = SL->getElementOffset(I);
+    unsigned SplitEnd = (I == Init->getNumOperands() - 1)
+                            ? SL->getSizeInBytes()
+                            : SL->getElementOffset(I + 1);
+
+    // Rebuild type metadata, adjusting by the split offset.
+    // FIXME: See if we can use DW_OP_piece to preserve debug metadata here.
+    for (MDNode *Type : Types) {
+      uint64_t ByteOffset = cast<ConstantInt>(
+              cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+              ->getZExtValue();
+      if (ByteOffset < SplitBegin || ByteOffset >= SplitEnd)
+        continue;
+      SplitGV->addMetadata(
+          LLVMContext::MD_type,
+          *MDNode::get(GV.getContext(),
+                       {ConstantAsMetadata::get(
+                            ConstantInt::get(Int32Ty, ByteOffset - SplitBegin)),
+                        Type->getOperand(1)}));
+    }
+  }
+
+  for (User *U : GV.users()) {
+    auto *GEP = cast<GEPOperator>(U);
+    unsigned I = cast<ConstantInt>(GEP->getOperand(2))->getZExtValue();
+    if (I >= SplitGlobals.size())
+      continue;
+
+    SmallVector<Value *, 4> Ops;
+    Ops.push_back(ConstantInt::get(Int32Ty, 0));
+    for (unsigned I = 3; I != GEP->getNumOperands(); ++I)
+      Ops.push_back(GEP->getOperand(I));
+
+    auto *NewGEP = ConstantExpr::getGetElementPtr(
+        SplitGlobals[I]->getInitializer()->getType(), SplitGlobals[I], Ops,
+        GEP->isInBounds());
+    GEP->replaceAllUsesWith(NewGEP);
+  }
+
+  // Finally, remove the original global. Any remaining uses refer to invalid
+  // elements of the global, so replace with undef.
+  if (!GV.use_empty())
+    GV.replaceAllUsesWith(UndefValue::get(GV.getType()));
+  GV.eraseFromParent();
+  return true;
+}
+
+bool splitGlobals(Module &M) {
+  // First, see if the module uses either of the llvm.type.test or
+  // llvm.type.checked.load intrinsics, which indicates that splitting globals
+  // may be beneficial.
+  Function *TypeTestFunc =
+      M.getFunction(Intrinsic::getName(Intrinsic::type_test));
+  Function *TypeCheckedLoadFunc =
+      M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load));
+  if ((!TypeTestFunc || TypeTestFunc->use_empty()) &&
+      (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
+    return false;
+
+  bool Changed = false;
+  for (auto I = M.global_begin(); I != M.global_end();) {
+    GlobalVariable &GV = *I;
+    ++I;
+    Changed |= splitGlobal(GV);
+  }
+  return Changed;
+}
+
+struct GlobalSplit : public ModulePass {
+  static char ID;
+  GlobalSplit() : ModulePass(ID) {
+    initializeGlobalSplitPass(*PassRegistry::getPassRegistry());
+  }
+  bool runOnModule(Module &M) {
+    if (skipModule(M))
+      return false;
+
+    return splitGlobals(M);
+  }
+};
+
+}
+
+INITIALIZE_PASS(GlobalSplit, "globalsplit", "Global splitter", false, false)
+char GlobalSplit::ID = 0;
+
+ModulePass *llvm::createGlobalSplitPass() {
+  return new GlobalSplit;
+}
+
+PreservedAnalyses GlobalSplitPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (!splitGlobals(M))
+    return PreservedAnalyses::all();
+  return PreservedAnalyses::none();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/IPO.cpp b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
index 3507eba81b2f..89518f3c5fae 100644
--- a/contrib/llvm/lib/Transforms/IPO/IPO.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/IPO.cpp
@@ -18,6 +18,7 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/FunctionAttrs.h"
 
 using namespace llvm;
@@ -31,8 +32,9 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeForceFunctionAttrsLegacyPassPass(Registry);
   initializeGlobalDCELegacyPassPass(Registry);
   initializeGlobalOptLegacyPassPass(Registry);
+  initializeGlobalSplitPass(Registry);
   initializeIPCPPass(Registry);
-  initializeAlwaysInlinerPass(Registry);
+  initializeAlwaysInlinerLegacyPassPass(Registry);
   initializeSimpleInlinerPass(Registry);
   initializeInferFunctionAttrsLegacyPassPass(Registry);
   initializeInternalizeLegacyPassPass(Registry);
@@ -53,7 +55,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
   initializeBarrierNoopPass(Registry);
   initializeEliminateAvailableExternallyLegacyPassPass(Registry);
   initializeSampleProfileLoaderLegacyPassPass(Registry);
-  initializeFunctionImportPassPass(Registry);
+  initializeFunctionImportLegacyPassPass(Registry);
   initializeWholeProgramDevirtPass(Registry);
 }
 
@@ -82,7 +84,7 @@ void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddAlwaysInlinerPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(llvm::createAlwaysInlinerPass());
+  unwrap(PM)->add(llvm::createAlwaysInlinerLegacyPass());
 }
 
 void LLVMAddGlobalDCEPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
index ab2d2bd8b02a..2ef299d9a2f0 100644
--- a/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InferFunctionAttrs.cpp
@@ -34,7 +34,7 @@ static bool inferAllPrototypeAttributes(Module &M,
 }
 
 PreservedAnalyses InferFunctionAttrsPass::run(Module &M,
-                                              AnalysisManager<Module> &AM) {
+                                              ModuleAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
 
   if (!inferAllPrototypeAttributes(M, TLI))
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp b/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
deleted file mode 100644
index cb1ab95ec2af..000000000000
--- a/contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp
+++ /dev/null
@@ -1,103 +0,0 @@
-//===- InlineAlways.cpp - Code to inline always_inline functions ----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a custom inliner that handles only functions that
-// are marked as "always inline".
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/IR/CallSite.h"
-#include "llvm/IR/CallingConv.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
-#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "inline"
-
-namespace {
-
-/// \brief Inliner pass which only handles "always inline" functions.
-class AlwaysInliner : public Inliner {
-
-public:
-  AlwaysInliner() : Inliner(ID, /*InsertLifetime*/ true) {
-    initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
-  }
-
-  AlwaysInliner(bool InsertLifetime) : Inliner(ID, InsertLifetime) {
-    initializeAlwaysInlinerPass(*PassRegistry::getPassRegistry());
-  }
-
-  /// Main run interface method.  We override here to avoid calling skipSCC().
-  bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); }
-
-  static char ID; // Pass identification, replacement for typeid
-
-  InlineCost getInlineCost(CallSite CS) override;
-
-  using llvm::Pass::doFinalization;
-  bool doFinalization(CallGraph &CG) override {
-    return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/ true);
-  }
-};
-
-}
-
-char AlwaysInliner::ID = 0;
-INITIALIZE_PASS_BEGIN(AlwaysInliner, "always-inline",
-                "Inliner for always_inline functions", false, false)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(AlwaysInliner, "always-inline",
-                "Inliner for always_inline functions", false, false)
-
-Pass *llvm::createAlwaysInlinerPass() { return new AlwaysInliner(); }
-
-Pass *llvm::createAlwaysInlinerPass(bool InsertLifetime) {
-  return new AlwaysInliner(InsertLifetime);
-}
-
-/// \brief Get the inline cost for the always-inliner.
-///
-/// The always inliner *only* handles functions which are marked with the
-/// attribute to force inlining. As such, it is dramatically simpler and avoids
-/// using the powerful (but expensive) inline cost analysis. Instead it uses
-/// a very simple and boring direct walk of the instructions looking for
-/// impossible-to-inline constructs.
-///
-/// Note, it would be possible to go to some lengths to cache the information
-/// computed here, but as we only expect to do this for relatively few and
-/// small functions which have the explicit attribute to force inlining, it is
-/// likely not worth it in practice.
-InlineCost AlwaysInliner::getInlineCost(CallSite CS) {
-  Function *Callee = CS.getCalledFunction();
-
-  // Only inline direct calls to functions with always-inline attributes
-  // that are viable for inlining. FIXME: We shouldn't even get here for
-  // declarations.
-  if (Callee && !Callee->isDeclaration() &&
-      CS.hasFnAttr(Attribute::AlwaysInline) && isInlineViable(*Callee))
-    return InlineCost::getAlways();
-
-  return InlineCost::getNever();
-}
diff --git a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
index 2aa650bd219d..1770445b413f 100644
--- a/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp
@@ -25,7 +25,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Transforms/IPO.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
+#include "llvm/Transforms/IPO/Inliner.h"
 
 using namespace llvm;
 
@@ -38,21 +38,17 @@ namespace {
 /// The common implementation of the inlining logic is shared between this
 /// inliner pass and the always inliner pass. The two passes use different cost
 /// analyses to determine when to inline.
-class SimpleInliner : public Inliner {
-  // This field is populated based on one of the following:
-  //  * optimization or size-optimization levels,
-  //  * the --inline-threshold flag, or
-  //  * a user specified value.
-  int DefaultThreshold;
+class SimpleInliner : public LegacyInlinerBase {
+
+  InlineParams Params;
 
 public:
-  SimpleInliner()
-      : Inliner(ID), DefaultThreshold(llvm::getDefaultInlineThreshold()) {
+  SimpleInliner() : LegacyInlinerBase(ID), Params(llvm::getInlineParams()) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
-  explicit SimpleInliner(int Threshold)
-      : Inliner(ID), DefaultThreshold(Threshold) {
+  explicit SimpleInliner(InlineParams Params)
+      : LegacyInlinerBase(ID), Params(Params) {
     initializeSimpleInlinerPass(*PassRegistry::getPassRegistry());
   }
 
@@ -61,7 +57,11 @@ public:
   InlineCost getInlineCost(CallSite CS) override {
     Function *Callee = CS.getCalledFunction();
     TargetTransformInfo &TTI = TTIWP->getTTI(*Callee);
-    return llvm::getInlineCost(CS, DefaultThreshold, TTI, ACT, PSI);
+    std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+        [&](Function &F) -> AssumptionCache & {
+      return ACT->getAssumptionCache(F);
+    };
+    return llvm::getInlineCost(CS, Params, TTI, GetAssumptionCache, PSI);
   }
 
   bool runOnSCC(CallGraphSCC &SCC) override;
@@ -69,39 +69,43 @@ public:
 
 private:
   TargetTransformInfoWrapperPass *TTIWP;
+
 };
 
 } // end anonymous namespace
 
 char SimpleInliner::ID = 0;
-INITIALIZE_PASS_BEGIN(SimpleInliner, "inline",
-                "Function Integration/Inlining", false, false)
+INITIALIZE_PASS_BEGIN(SimpleInliner, "inline", "Function Integration/Inlining",
+                      false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(SimpleInliner, "inline",
-                "Function Integration/Inlining", false, false)
+INITIALIZE_PASS_END(SimpleInliner, "inline", "Function Integration/Inlining",
+                    false, false)
 
 Pass *llvm::createFunctionInliningPass() { return new SimpleInliner(); }
 
 Pass *llvm::createFunctionInliningPass(int Threshold) {
-  return new SimpleInliner(Threshold);
+  return new SimpleInliner(llvm::getInlineParams(Threshold));
 }
 
 Pass *llvm::createFunctionInliningPass(unsigned OptLevel,
                                        unsigned SizeOptLevel) {
-  return new SimpleInliner(
-      llvm::computeThresholdFromOptLevels(OptLevel, SizeOptLevel));
+  return new SimpleInliner(llvm::getInlineParams(OptLevel, SizeOptLevel));
+}
+
+Pass *llvm::createFunctionInliningPass(InlineParams &Params) {
+  return new SimpleInliner(Params);
 }
 
 bool SimpleInliner::runOnSCC(CallGraphSCC &SCC) {
   TTIWP = &getAnalysis<TargetTransformInfoWrapperPass>();
-  return Inliner::runOnSCC(SCC);
+  return LegacyInlinerBase::runOnSCC(SCC);
 }
 
 void SimpleInliner::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfoWrapperPass>();
-  Inliner::getAnalysisUsage(AU);
+  LegacyInlinerBase::getAnalysisUsage(AU);
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
index 79535ca49780..3f4731c937d1 100644
--- a/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Inliner.cpp
@@ -13,6 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -20,19 +21,21 @@
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/InlineCost.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/IPO/InlinerPass.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "inline"
@@ -47,15 +50,44 @@ STATISTIC(NumMergedAllocas, "Number of allocas merged together");
 // if those would be more profitable and blocked inline steps.
 STATISTIC(NumCallerCallersAnalyzed, "Number of caller-callers analyzed");
 
-Inliner::Inliner(char &ID) : CallGraphSCCPass(ID), InsertLifetime(true) {}
-
-Inliner::Inliner(char &ID, bool InsertLifetime)
+/// Flag to disable manual alloca merging.
+///
+/// Merging of allocas was originally done as a stack-size saving technique
+/// prior to LLVM's code generator having support for stack coloring based on
+/// lifetime markers. It is now in the process of being removed. To experiment
+/// with disabling it and relying fully on lifetime marker based stack
+/// coloring, you can pass this flag to LLVM.
+static cl::opt<bool>
+    DisableInlinedAllocaMerging("disable-inlined-alloca-merging",
+                                cl::init(false), cl::Hidden);
+
+namespace {
+enum class InlinerFunctionImportStatsOpts {
+  No = 0,
+  Basic = 1,
+  Verbose = 2,
+};
+
+cl::opt<InlinerFunctionImportStatsOpts> InlinerFunctionImportStats(
+    "inliner-function-import-stats",
+    cl::init(InlinerFunctionImportStatsOpts::No),
+    cl::values(clEnumValN(InlinerFunctionImportStatsOpts::Basic, "basic",
+                          "basic statistics"),
+               clEnumValN(InlinerFunctionImportStatsOpts::Verbose, "verbose",
+                          "printing of statistics for each inlined function")),
+    cl::Hidden, cl::desc("Enable inliner stats for imported functions"));
+} // namespace
+
+LegacyInlinerBase::LegacyInlinerBase(char &ID)
+    : CallGraphSCCPass(ID), InsertLifetime(true) {}
+
+LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime)
     : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {}
 
 /// For this class, we declare that we require and preserve the call graph.
 /// If the derived class implements this method, it should
 /// always explicitly call the implementation here.
-void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
+void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<AssumptionCacheTracker>();
   AU.addRequired<ProfileSummaryInfoWrapperPass>();
   AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -63,62 +95,33 @@ void Inliner::getAnalysisUsage(AnalysisUsage &AU) const {
   CallGraphSCCPass::getAnalysisUsage(AU);
 }
 
+typedef DenseMap<ArrayType *, std::vector<AllocaInst *>> InlinedArrayAllocasTy;
 
-typedef DenseMap<ArrayType*, std::vector<AllocaInst*> >
-InlinedArrayAllocasTy;
-
-/// If it is possible to inline the specified call site,
-/// do so and update the CallGraph for this operation.
+/// Look at all of the allocas that we inlined through this call site.  If we
+/// have already inlined other allocas through other calls into this function,
+/// then we know that they have disjoint lifetimes and that we can merge them.
 ///
-/// This function also does some basic book-keeping to update the IR.  The
-/// InlinedArrayAllocas map keeps track of any allocas that are already
-/// available from other functions inlined into the caller.  If we are able to
-/// inline this call site we attempt to reuse already available allocas or add
-/// any new allocas to the set if not possible.
-static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
-                                 InlinedArrayAllocasTy &InlinedArrayAllocas,
-                                 int InlineHistory, bool InsertLifetime) {
-  Function *Callee = CS.getCalledFunction();
-  Function *Caller = CS.getCaller();
-
-  // We need to manually construct BasicAA directly in order to disable
-  // its use of other function analyses.
-  BasicAAResult BAR(createLegacyPMBasicAAResult(P, *Callee));
-
-  // Construct our own AA results for this function. We do this manually to
-  // work around the limitations of the legacy pass manager.
-  AAResults AAR(createLegacyPMAAResults(P, *Callee, BAR));
-
-  // Try to inline the function.  Get the list of static allocas that were
-  // inlined.
-  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
-    return false;
-
-  AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
+/// There are many heuristics possible for merging these allocas, and the
+/// different options have different tradeoffs.  One thing that we *really*
+/// don't want to hurt is SRoA: once inlining happens, often allocas are no
+/// longer address taken and so they can be promoted.
+///
+/// Our "solution" for that is to only merge allocas whose outermost type is an
+/// array type.  These are usually not promoted because someone is using a
+/// variable index into them.  These are also often the most important ones to
+/// merge.
+///
+/// A better solution would be to have real memory lifetime markers in the IR
+/// and not have the inliner do any merging of allocas at all.  This would
+/// allow the backend to do proper stack slot coloring of all allocas that
+/// *actually make it to the backend*, which is really what we want.
+///
+/// Because we don't have this information, we do this simple and useful hack.
+static void mergeInlinedArrayAllocas(
+    Function *Caller, InlineFunctionInfo &IFI,
+    InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory) {
+  SmallPtrSet<AllocaInst *, 16> UsedAllocas;
 
-  // Look at all of the allocas that we inlined through this call site.  If we
-  // have already inlined other allocas through other calls into this function,
-  // then we know that they have disjoint lifetimes and that we can merge them.
-  //
-  // There are many heuristics possible for merging these allocas, and the
-  // different options have different tradeoffs.  One thing that we *really*
-  // don't want to hurt is SRoA: once inlining happens, often allocas are no
-  // longer address taken and so they can be promoted.
-  //
-  // Our "solution" for that is to only merge allocas whose outermost type is an
-  // array type.  These are usually not promoted because someone is using a
-  // variable index into them.  These are also often the most important ones to
-  // merge.
-  //
-  // A better solution would be to have real memory lifetime markers in the IR
-  // and not have the inliner do any merging of allocas at all.  This would
-  // allow the backend to do proper stack slot coloring of all allocas that
-  // *actually make it to the backend*, which is really what we want.
-  //
-  // Because we don't have this information, we do this simple and useful hack.
-  //
-  SmallPtrSet<AllocaInst*, 16> UsedAllocas;
-  
   // When processing our SCC, check to see if CS was inlined from some other
   // call site.  For example, if we're processing "A" in this code:
   //   A() { B() }
@@ -131,25 +134,25 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
   // because their scopes are not disjoint.  We could make this smarter by
   // keeping track of the inline history for each alloca in the
   // InlinedArrayAllocas but this isn't likely to be a significant win.
-  if (InlineHistory != -1)  // Only do merging for top-level call sites in SCC.
-    return true;
-  
+  if (InlineHistory != -1) // Only do merging for top-level call sites in SCC.
+    return;
+
   // Loop over all the allocas we have so far and see if they can be merged with
   // a previously inlined alloca.  If not, remember that we had it.
-  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size();
-       AllocaNo != e; ++AllocaNo) {
+  for (unsigned AllocaNo = 0, e = IFI.StaticAllocas.size(); AllocaNo != e;
+       ++AllocaNo) {
     AllocaInst *AI = IFI.StaticAllocas[AllocaNo];
-    
+
     // Don't bother trying to merge array allocations (they will usually be
     // canonicalized to be an allocation *of* an array), or allocations whose
     // type is not itself an array (because we're afraid of pessimizing SRoA).
     ArrayType *ATy = dyn_cast<ArrayType>(AI->getAllocatedType());
     if (!ATy || AI->isArrayAllocation())
       continue;
-    
+
     // Get the list of all available allocas for this array type.
-    std::vector<AllocaInst*> &AllocasForType = InlinedArrayAllocas[ATy];
-    
+    std::vector<AllocaInst *> &AllocasForType = InlinedArrayAllocas[ATy];
+
     // Loop over the allocas in AllocasForType to see if we can reuse one.  Note
     // that we have to be careful not to reuse the same "available" alloca for
     // multiple different allocas that we just inlined, we use the 'UsedAllocas'
@@ -160,24 +163,24 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
 
       unsigned Align1 = AI->getAlignment(),
                Align2 = AvailableAlloca->getAlignment();
-      
+
       // The available alloca has to be in the right function, not in some other
       // function in this SCC.
       if (AvailableAlloca->getParent() != AI->getParent())
         continue;
-      
+
       // If the inlined function already uses this alloca then we can't reuse
       // it.
       if (!UsedAllocas.insert(AvailableAlloca).second)
         continue;
-      
+
       // Otherwise, we *can* reuse it, RAUW AI into AvailableAlloca and declare
       // success!
-      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI << "\n\t\tINTO: "
-                   << *AvailableAlloca << '\n');
-      
+      DEBUG(dbgs() << "    ***MERGED ALLOCA: " << *AI
+                   << "\n\t\tINTO: " << *AvailableAlloca << '\n');
+
       // Move affected dbg.declare calls immediately after the new alloca to
-      // avoid the situation when a dbg.declare preceeds its alloca.
+      // avoid the situation when a dbg.declare precedes its alloca.
       if (auto *L = LocalAsMetadata::getIfExists(AI))
         if (auto *MDV = MetadataAsValue::getIfExists(AI->getContext(), L))
           for (User *U : MDV->users())
@@ -209,7 +212,7 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
     // If we already nuked the alloca, we're done with it.
     if (MergedAwayAlloca)
       continue;
-    
+
     // If we were unable to merge away the alloca either because there are no
     // allocas of the right type available or because we reused them all
     // already, remember that this alloca came from an inlined function and mark
@@ -218,19 +221,51 @@ static bool InlineCallIfPossible(Pass &P, CallSite CS, InlineFunctionInfo &IFI,
     AllocasForType.push_back(AI);
     UsedAllocas.insert(AI);
   }
-  
-  return true;
 }
 
-static void emitAnalysis(CallSite CS, const Twine &Msg) {
+/// If it is possible to inline the specified call site,
+/// do so and update the CallGraph for this operation.
+///
+/// This function also does some basic book-keeping to update the IR.  The
+/// InlinedArrayAllocas map keeps track of any allocas that are already
+/// available from other functions inlined into the caller.  If we are able to
+/// inline this call site we attempt to reuse already available allocas or add
+/// any new allocas to the set if not possible.
+static bool InlineCallIfPossible(
+    CallSite CS, InlineFunctionInfo &IFI,
+    InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory,
+    bool InsertLifetime, function_ref<AAResults &(Function &)> &AARGetter,
+    ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
+  Function *Callee = CS.getCalledFunction();
   Function *Caller = CS.getCaller();
-  LLVMContext &Ctx = Caller->getContext();
-  DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
-  emitOptimizationRemarkAnalysis(Ctx, DEBUG_TYPE, *Caller, DLoc, Msg);
+
+  AAResults &AAR = AARGetter(*Callee);
+
+  // Try to inline the function.  Get the list of static allocas that were
+  // inlined.
+  if (!InlineFunction(CS, IFI, &AAR, InsertLifetime))
+    return false;
+
+  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
+    ImportedFunctionsStats.recordInline(*Caller, *Callee);
+
+  AttributeFuncs::mergeAttributesForInlining(*Caller, *Callee);
+
+  if (!DisableInlinedAllocaMerging)
+    mergeInlinedArrayAllocas(Caller, IFI, InlinedArrayAllocas, InlineHistory);
+
+  return true;
 }
 
-bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
-                               int &TotalSecondaryCost) {
+/// Return true if inlining of CS can block the caller from being
+/// inlined which is proved to be more beneficial. \p IC is the
+/// estimated inline cost associated with callsite \p CS.
+/// \p TotalAltCost will be set to the estimated cost of inlining the caller
+/// if \p CS is suppressed for inlining.
+static bool
+shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
+                 int &TotalSecondaryCost,
+                 function_ref<InlineCost(CallSite CS)> GetInlineCost) {
 
   // For now we only handle local or inline functions.
   if (!Caller->hasLocalLinkage() && !Caller->hasLinkOnceODRLinkage())
@@ -269,7 +304,7 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
       continue;
     }
 
-    InlineCost IC2 = getInlineCost(CS2);
+    InlineCost IC2 = GetInlineCost(CS2);
     ++NumCallerCallersAnalyzed;
     if (!IC2) {
       callerWillBeRemoved = false;
@@ -278,7 +313,7 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
     if (IC2.isAlways())
       continue;
 
-    // See if inlining or original callsite would erase the cost delta of
+    // See if inlining of the original callsite would erase the cost delta of
     // this callsite. We subtract off the penalty for the call instruction,
     // which we would be deleting.
     if (IC2.getCostDelta() <= CandidateCost) {
@@ -291,7 +326,7 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
   // be removed entirely.  We did not account for this above unless there
   // is only one caller of Caller.
   if (callerWillBeRemoved && !Caller->use_empty())
-    TotalSecondaryCost += InlineConstants::LastCallToStaticBonus;
+    TotalSecondaryCost -= InlineConstants::LastCallToStaticBonus;
 
   if (inliningPreventsSomeOuterInline && TotalSecondaryCost < IC.getCost())
     return true;
@@ -300,63 +335,73 @@ bool Inliner::shouldBeDeferred(Function *Caller, CallSite CS, InlineCost IC,
 }
 
 /// Return true if the inliner should attempt to inline at the given CallSite.
-bool Inliner::shouldInline(CallSite CS) {
-  InlineCost IC = getInlineCost(CS);
-  
+static bool shouldInline(CallSite CS,
+                         function_ref<InlineCost(CallSite CS)> GetInlineCost,
+                         OptimizationRemarkEmitter &ORE) {
+  using namespace ore;
+  InlineCost IC = GetInlineCost(CS);
+  Instruction *Call = CS.getInstruction();
+  Function *Callee = CS.getCalledFunction();
+
   if (IC.isAlways()) {
     DEBUG(dbgs() << "    Inlining: cost=always"
-          << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName()) +
-                         " should always be inlined (cost=always)");
+                 << ", Call: " << *CS.getInstruction() << "\n");
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "AlwaysInline", Call)
+             << NV("Callee", Callee)
+             << " should always be inlined (cost=always)");
     return true;
   }
-  
+
   if (IC.isNever()) {
     DEBUG(dbgs() << "    NOT Inlining: cost=never"
-          << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " should never be inlined (cost=never)"));
+                 << ", Call: " << *CS.getInstruction() << "\n");
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "NeverInline", Call)
+             << NV("Callee", Callee)
+             << " should never be inlined (cost=never)");
     return false;
   }
-  
+
   Function *Caller = CS.getCaller();
   if (!IC) {
     DEBUG(dbgs() << "    NOT Inlining: cost=" << IC.getCost()
-          << ", thres=" << (IC.getCostDelta() + IC.getCost())
-          << ", Call: " << *CS.getInstruction() << "\n");
-    emitAnalysis(CS, Twine(CS.getCalledFunction()->getName() +
-                           " too costly to inline (cost=") +
-                         Twine(IC.getCost()) + ", threshold=" +
-                         Twine(IC.getCostDelta() + IC.getCost()) + ")");
+                 << ", thres=" << (IC.getCostDelta() + IC.getCost())
+                 << ", Call: " << *CS.getInstruction() << "\n");
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "TooCostly", Call)
+             << NV("Callee", Callee) << " too costly to inline (cost="
+             << NV("Cost", IC.getCost()) << ", threshold="
+             << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
     return false;
   }
 
   int TotalSecondaryCost = 0;
-  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost)) {
+  if (shouldBeDeferred(Caller, CS, IC, TotalSecondaryCost, GetInlineCost)) {
     DEBUG(dbgs() << "    NOT Inlining: " << *CS.getInstruction()
-          << " Cost = " << IC.getCost()
-          << ", outer Cost = " << TotalSecondaryCost << '\n');
-    emitAnalysis(CS, Twine("Not inlining. Cost of inlining " +
-                           CS.getCalledFunction()->getName() +
-                           " increases the cost of inlining " +
-                           CS.getCaller()->getName() + " in other contexts"));
+                 << " Cost = " << IC.getCost()
+                 << ", outer Cost = " << TotalSecondaryCost << '\n');
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE,
+                                        "IncreaseCostInOtherContexts", Call)
+             << "Not inlining. Cost of inlining " << NV("Callee", Callee)
+             << " increases the cost of inlining " << NV("Caller", Caller)
+             << " in other contexts");
     return false;
   }
 
   DEBUG(dbgs() << "    Inlining: cost=" << IC.getCost()
-        << ", thres=" << (IC.getCostDelta() + IC.getCost())
-        << ", Call: " << *CS.getInstruction() << '\n');
-  emitAnalysis(
-      CS, CS.getCalledFunction()->getName() + Twine(" can be inlined into ") +
-              CS.getCaller()->getName() + " with cost=" + Twine(IC.getCost()) +
-              " (threshold=" + Twine(IC.getCostDelta() + IC.getCost()) + ")");
+               << ", thres=" << (IC.getCostDelta() + IC.getCost())
+               << ", Call: " << *CS.getInstruction() << '\n');
+  ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, "CanBeInlined", Call)
+           << NV("Callee", Callee) << " can be inlined into "
+           << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
+           << " (threshold="
+           << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")");
   return true;
 }
 
 /// Return true if the specified inline history ID
 /// indicates an inline history that includes the specified function.
-static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
-            const SmallVectorImpl<std::pair<Function*, int> > &InlineHistory) {
+static bool InlineHistoryIncludes(
+    Function *F, int InlineHistoryID,
+    const SmallVectorImpl<std::pair<Function *, int>> &InlineHistory) {
   while (InlineHistoryID != -1) {
     assert(unsigned(InlineHistoryID) < InlineHistory.size() &&
            "Invalid inline history ID");
@@ -367,23 +412,32 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID,
   return false;
 }
 
-bool Inliner::runOnSCC(CallGraphSCC &SCC) {
+bool LegacyInlinerBase::doInitialization(CallGraph &CG) {
+  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
+    ImportedFunctionsStats.setModuleInfo(CG.getModule());
+  return false; // No changes to CallGraph.
+}
+
+bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) {
   if (skipSCC(SCC))
     return false;
   return inlineCalls(SCC);
 }
 
-bool Inliner::inlineCalls(CallGraphSCC &SCC) {
-  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
-  ACT = &getAnalysis<AssumptionCacheTracker>();
-  PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(CG.getModule());
-  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-
-  SmallPtrSet<Function*, 8> SCCFunctions;
+static bool
+inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG,
+                std::function<AssumptionCache &(Function &)> GetAssumptionCache,
+                ProfileSummaryInfo *PSI, TargetLibraryInfo &TLI,
+                bool InsertLifetime,
+                function_ref<InlineCost(CallSite CS)> GetInlineCost,
+                function_ref<AAResults &(Function &)> AARGetter,
+                ImportedFunctionsInliningStatistics &ImportedFunctionsStats) {
+  SmallPtrSet<Function *, 8> SCCFunctions;
   DEBUG(dbgs() << "Inliner visiting SCC:");
   for (CallGraphNode *Node : SCC) {
     Function *F = Node->getFunction();
-    if (F) SCCFunctions.insert(F);
+    if (F)
+      SCCFunctions.insert(F);
     DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE"));
   }
 
@@ -391,17 +445,19 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
   // inline call sites in the original functions, not call sites that result
   // from inlining other functions.
   SmallVector<std::pair<CallSite, int>, 16> CallSites;
-  
+
   // When inlining a callee produces new call sites, we want to keep track of
   // the fact that they were inlined from the callee.  This allows us to avoid
   // infinite inlining in some obscure cases.  To represent this, we use an
   // index into the InlineHistory vector.
-  SmallVector<std::pair<Function*, int>, 8> InlineHistory;
+  SmallVector<std::pair<Function *, int>, 8> InlineHistory;
 
   for (CallGraphNode *Node : SCC) {
     Function *F = Node->getFunction();
-    if (!F) continue;
-    
+    if (!F || F->isDeclaration())
+      continue;
+
+    OptimizationRemarkEmitter ORE(F);
     for (BasicBlock &BB : *F)
       for (Instruction &I : BB) {
         CallSite CS(cast<Value>(&I));
@@ -409,14 +465,21 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
         // never be inlined.
         if (!CS || isa<IntrinsicInst>(I))
           continue;
-        
+
         // If this is a direct call to an external function, we can never inline
         // it.  If it is an indirect call, inlining may resolve it to be a
         // direct call, so we keep it.
         if (Function *Callee = CS.getCalledFunction())
-          if (Callee->isDeclaration())
+          if (Callee->isDeclaration()) {
+            using namespace ore;
+            ORE.emit(OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I)
+                     << NV("Callee", Callee) << " will not be inlined into "
+                     << NV("Caller", CS.getCaller())
+                     << " because its definition is unavailable"
+                     << setIsVerbose());
             continue;
-        
+          }
+
         CallSites.push_back(std::make_pair(CS, -1));
       }
   }
@@ -435,9 +498,8 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
       if (SCCFunctions.count(F))
         std::swap(CallSites[i--], CallSites[--FirstCallInSCC]);
 
-  
   InlinedArrayAllocasTy InlinedArrayAllocas;
-  InlineFunctionInfo InlineInfo(&CG, ACT);
+  InlineFunctionInfo InlineInfo(&CG, &GetAssumptionCache);
 
   // Now that we have all of the call sites, loop over them and inline them if
   // it looks profitable to do so.
@@ -450,7 +512,7 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
     // CallSites may be modified inside so ranged for loop can not be used.
     for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
       CallSite CS = CallSites[CSi].first;
-      
+
       Function *Caller = CS.getCaller();
       Function *Callee = CS.getCalledFunction();
 
@@ -459,16 +521,17 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
       // size.  This happens because IPSCCP propagates the result out of the
       // call and then we're left with the dead call.
       if (isInstructionTriviallyDead(CS.getInstruction(), &TLI)) {
-        DEBUG(dbgs() << "    -> Deleting dead call: "
-                     << *CS.getInstruction() << "\n");
+        DEBUG(dbgs() << "    -> Deleting dead call: " << *CS.getInstruction()
+                     << "\n");
         // Update the call graph by deleting the edge from Callee to Caller.
         CG[Caller]->removeCallEdgeFor(CS);
         CS.getInstruction()->eraseFromParent();
         ++NumCallsDeleted;
       } else {
         // We can only inline direct calls to non-declarations.
-        if (!Callee || Callee->isDeclaration()) continue;
-      
+        if (!Callee || Callee->isDeclaration())
+          continue;
+
         // If this call site was obtained by inlining another function, verify
         // that the include path for the function did not include the callee
         // itself.  If so, we'd be recursively inlining the same function,
@@ -478,37 +541,42 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
         if (InlineHistoryID != -1 &&
             InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory))
           continue;
-        
-        LLVMContext &CallerCtx = Caller->getContext();
 
         // Get DebugLoc to report. CS will be invalid after Inliner.
         DebugLoc DLoc = CS.getInstruction()->getDebugLoc();
+        BasicBlock *Block = CS.getParent();
+        // FIXME for new PM: because of the old PM we currently generate ORE and
+        // in turn BFI on demand.  With the new PM, the ORE dependency should
+        // just become a regular analysis dependency.
+        OptimizationRemarkEmitter ORE(Caller);
 
         // If the policy determines that we should inline this function,
         // try to do so.
-        if (!shouldInline(CS)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+        using namespace ore;
+        if (!shouldInline(CS, GetInlineCost, ORE)) {
+          ORE.emit(
+              OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+              << NV("Callee", Callee) << " will not be inlined into "
+              << NV("Caller", Caller));
           continue;
         }
 
         // Attempt to inline the function.
-        if (!InlineCallIfPossible(*this, CS, InlineInfo, InlinedArrayAllocas,
-                                  InlineHistoryID, InsertLifetime)) {
-          emitOptimizationRemarkMissed(CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-                                       Twine(Callee->getName() +
-                                             " will not be inlined into " +
-                                             Caller->getName()));
+        if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas,
+                                  InlineHistoryID, InsertLifetime, AARGetter,
+                                  ImportedFunctionsStats)) {
+          ORE.emit(
+              OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, Block)
+              << NV("Callee", Callee) << " will not be inlined into "
+              << NV("Caller", Caller));
           continue;
         }
         ++NumInlined;
 
         // Report the inline decision.
-        emitOptimizationRemark(
-            CallerCtx, DEBUG_TYPE, *Caller, DLoc,
-            Twine(Callee->getName() + " inlined into " + Caller->getName()));
+        ORE.emit(OptimizationRemark(DEBUG_TYPE, "Inlined", DLoc, Block)
+                 << NV("Callee", Callee) << " inlined into "
+                 << NV("Caller", Caller));
 
         // If inlining this function gave us any new call sites, throw them
         // onto our worklist to process.  They are useful inline candidates.
@@ -522,30 +590,30 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
             CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID));
         }
       }
-      
+
       // If we inlined or deleted the last possible call site to the function,
       // delete the function body now.
       if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
           // TODO: Can remove if in SCC now.
           !SCCFunctions.count(Callee) &&
-          
+
           // The function may be apparently dead, but if there are indirect
           // callgraph references to the node, we cannot delete it yet, this
           // could invalidate the CGSCC iterator.
           CG[Callee]->getNumReferences() == 0) {
-        DEBUG(dbgs() << "    -> Deleting dead function: "
-              << Callee->getName() << "\n");
+        DEBUG(dbgs() << "    -> Deleting dead function: " << Callee->getName()
+                     << "\n");
         CallGraphNode *CalleeNode = CG[Callee];
 
         // Remove any call graph edges from the callee to its callees.
         CalleeNode->removeAllCalledFunctions();
-        
+
         // Removing the node for callee from the call graph and delete it.
         delete CG.removeFunctionFromModule(CalleeNode);
         ++NumDeleted;
       }
 
-      // Remove this call site from the list.  If possible, use 
+      // Remove this call site from the list.  If possible, use
       // swap/pop_back for efficiency, but do not use it if doing so would
       // move a call site to a function in this SCC before the
       // 'FirstCallInSCC' barrier.
@@ -553,7 +621,7 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
         CallSites[CSi] = CallSites.back();
         CallSites.pop_back();
       } else {
-        CallSites.erase(CallSites.begin()+CSi);
+        CallSites.erase(CallSites.begin() + CSi);
       }
       --CSi;
 
@@ -565,17 +633,43 @@ bool Inliner::inlineCalls(CallGraphSCC &SCC) {
   return Changed;
 }
 
+bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) {
+  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+  ACT = &getAnalysis<AssumptionCacheTracker>();
+  PSI = getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  // We compute dedicated AA results for each function in the SCC as needed. We
+  // use a lambda referencing external objects so that they live long enough to
+  // be queried, but we re-use them each time.
+  Optional<BasicAAResult> BAR;
+  Optional<AAResults> AAR;
+  auto AARGetter = [&](Function &F) -> AAResults & {
+    BAR.emplace(createLegacyPMBasicAAResult(*this, F));
+    AAR.emplace(createLegacyPMAAResults(*this, F, *BAR));
+    return *AAR;
+  };
+  auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & {
+    return ACT->getAssumptionCache(F);
+  };
+  return inlineCallsImpl(SCC, CG, GetAssumptionCache, PSI, TLI, InsertLifetime,
+                         [this](CallSite CS) { return getInlineCost(CS); },
+                         AARGetter, ImportedFunctionsStats);
+}
+
 /// Remove now-dead linkonce functions at the end of
 /// processing to avoid breaking the SCC traversal.
-bool Inliner::doFinalization(CallGraph &CG) {
+bool LegacyInlinerBase::doFinalization(CallGraph &CG) {
+  if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No)
+    ImportedFunctionsStats.dump(InlinerFunctionImportStats ==
+                                InlinerFunctionImportStatsOpts::Verbose);
   return removeDeadFunctions(CG);
 }
 
 /// Remove dead functions that are not included in DNR (Do Not Remove) list.
-bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
-  SmallVector<CallGraphNode*, 16> FunctionsToRemove;
-  SmallVector<CallGraphNode *, 16> DeadFunctionsInComdats;
-  SmallDenseMap<const Comdat *, int, 16> ComdatEntriesAlive;
+bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG,
+                                            bool AlwaysInlineOnly) {
+  SmallVector<CallGraphNode *, 16> FunctionsToRemove;
+  SmallVector<Function *, 16> DeadFunctionsInComdats;
 
   auto RemoveCGN = [&](CallGraphNode *CGN) {
     // Remove any call graph edges from the function to its callees.
@@ -616,9 +710,8 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
     // The inliner doesn't visit non-function entities which are in COMDAT
     // groups so it is unsafe to do so *unless* the linkage is local.
     if (!F->hasLocalLinkage()) {
-      if (const Comdat *C = F->getComdat()) {
-        --ComdatEntriesAlive[C];
-        DeadFunctionsInComdats.push_back(CGN);
+      if (F->hasComdat()) {
+        DeadFunctionsInComdats.push_back(F);
         continue;
       }
     }
@@ -626,32 +719,11 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
     RemoveCGN(CGN);
   }
   if (!DeadFunctionsInComdats.empty()) {
-    // Count up all the entities in COMDAT groups
-    auto ComdatGroupReferenced = [&](const Comdat *C) {
-      auto I = ComdatEntriesAlive.find(C);
-      if (I != ComdatEntriesAlive.end())
-        ++(I->getSecond());
-    };
-    for (const Function &F : CG.getModule())
-      if (const Comdat *C = F.getComdat())
-        ComdatGroupReferenced(C);
-    for (const GlobalVariable &GV : CG.getModule().globals())
-      if (const Comdat *C = GV.getComdat())
-        ComdatGroupReferenced(C);
-    for (const GlobalAlias &GA : CG.getModule().aliases())
-      if (const Comdat *C = GA.getComdat())
-        ComdatGroupReferenced(C);
-    for (CallGraphNode *CGN : DeadFunctionsInComdats) {
-      Function *F = CGN->getFunction();
-      const Comdat *C = F->getComdat();
-      int NumAlive = ComdatEntriesAlive[C];
-      // We can remove functions in a COMDAT group if the entire group is dead.
-      assert(NumAlive >= 0);
-      if (NumAlive > 0)
-        continue;
-
-      RemoveCGN(CGN);
-    }
+    // Filter out the functions whose comdats remain alive.
+    filterDeadComdatFunctions(CG.getModule(), DeadFunctionsInComdats);
+    // Remove the rest.
+    for (Function *F : DeadFunctionsInComdats)
+      RemoveCGN(CG[F]);
   }
 
   if (FunctionsToRemove.empty())
@@ -665,12 +737,201 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly) {
   // here to do this, it doesn't matter which order the functions are deleted
   // in.
   array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end());
-  FunctionsToRemove.erase(std::unique(FunctionsToRemove.begin(),
-                                      FunctionsToRemove.end()),
-                          FunctionsToRemove.end());
+  FunctionsToRemove.erase(
+      std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()),
+      FunctionsToRemove.end());
   for (CallGraphNode *CGN : FunctionsToRemove) {
     delete CG.removeFunctionFromModule(CGN);
     ++NumDeleted;
   }
   return true;
 }
+
+PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC,
+                                   CGSCCAnalysisManager &AM, LazyCallGraph &CG,
+                                   CGSCCUpdateResult &UR) {
+  FunctionAnalysisManager &FAM =
+      AM.getResult<FunctionAnalysisManagerCGSCCProxy>(InitialC, CG)
+          .getManager();
+  const ModuleAnalysisManager &MAM =
+      AM.getResult<ModuleAnalysisManagerCGSCCProxy>(InitialC, CG).getManager();
+  bool Changed = false;
+
+  assert(InitialC.size() > 0 && "Cannot handle an empty SCC!");
+  Module &M = *InitialC.begin()->getFunction().getParent();
+  ProfileSummaryInfo *PSI = MAM.getCachedResult<ProfileSummaryAnalysis>(M);
+
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(F);
+  };
+
+  // Setup the data structure used to plumb customization into the
+  // `InlineFunction` routine.
+  InlineFunctionInfo IFI(/*cg=*/nullptr, &GetAssumptionCache);
+
+  auto GetInlineCost = [&](CallSite CS) {
+    Function &Callee = *CS.getCalledFunction();
+    auto &CalleeTTI = FAM.getResult<TargetIRAnalysis>(Callee);
+    return getInlineCost(CS, Params, CalleeTTI, GetAssumptionCache, PSI);
+  };
+
+  // We use a worklist of nodes to process so that we can handle if the SCC
+  // structure changes and some nodes are no longer part of the current SCC. We
+  // also need to use an updatable pointer for the SCC as a consequence.
+  SmallVector<LazyCallGraph::Node *, 16> Nodes;
+  for (auto &N : InitialC)
+    Nodes.push_back(&N);
+  auto *C = &InitialC;
+  auto *RC = &C->getOuterRefSCC();
+
+  // We also use a secondary worklist of call sites within a particular node to
+  // allow quickly continuing to inline through newly inlined call sites where
+  // possible.
+  SmallVector<std::pair<CallSite, int>, 16> Calls;
+
+  // When inlining a callee produces new call sites, we want to keep track of
+  // the fact that they were inlined from the callee.  This allows us to avoid
+  // infinite inlining in some obscure cases.  To represent this, we use an
+  // index into the InlineHistory vector.
+  SmallVector<std::pair<Function *, int>, 16> InlineHistory;
+
+  // Track a set vector of inlined callees so that we can augment the caller
+  // with all of their edges in the call graph before pruning out the ones that
+  // got simplified away.
+  SmallSetVector<Function *, 4> InlinedCallees;
+
+  // Track the dead functions to delete once finished with inlining calls. We
+  // defer deleting these to make it easier to handle the call graph updates.
+  SmallVector<Function *, 4> DeadFunctions;
+
+  do {
+    auto &N = *Nodes.pop_back_val();
+    if (CG.lookupSCC(N) != C)
+      continue;
+    Function &F = N.getFunction();
+    if (F.hasFnAttribute(Attribute::OptimizeNone))
+      continue;
+
+    // Get the remarks emission analysis for the caller.
+    auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+
+    // We want to generally process call sites top-down in order for
+    // simplifications stemming from replacing the call with the returned value
+    // after inlining to be visible to subsequent inlining decisions. So we
+    // walk the function backwards and then process the back of the vector.
+    // FIXME: Using reverse is a really bad way to do this. Instead we should
+    // do an actual PO walk of the function body.
+    for (Instruction &I : reverse(instructions(F)))
+      if (auto CS = CallSite(&I))
+        if (Function *Callee = CS.getCalledFunction())
+          if (!Callee->isDeclaration())
+            Calls.push_back({CS, -1});
+
+    bool DidInline = false;
+    while (!Calls.empty()) {
+      int InlineHistoryID;
+      CallSite CS;
+      std::tie(CS, InlineHistoryID) = Calls.pop_back_val();
+      Function &Callee = *CS.getCalledFunction();
+
+      if (InlineHistoryID != -1 &&
+          InlineHistoryIncludes(&Callee, InlineHistoryID, InlineHistory))
+        continue;
+
+      // Check whether we want to inline this callsite.
+      if (!shouldInline(CS, GetInlineCost, ORE))
+        continue;
+
+      if (!InlineFunction(CS, IFI))
+        continue;
+      DidInline = true;
+      InlinedCallees.insert(&Callee);
+
+      // Add any new callsites to defined functions to the worklist.
+      if (!IFI.InlinedCallSites.empty()) {
+        int NewHistoryID = InlineHistory.size();
+        InlineHistory.push_back({&Callee, InlineHistoryID});
+        for (CallSite &CS : reverse(IFI.InlinedCallSites))
+          if (Function *NewCallee = CS.getCalledFunction())
+            if (!NewCallee->isDeclaration())
+              Calls.push_back({CS, NewHistoryID});
+      }
+
+      // Merge the attributes based on the inlining.
+      AttributeFuncs::mergeAttributesForInlining(F, Callee);
+
+      // For local functions, check whether this makes the callee trivially
+      // dead. In that case, we can drop the body of the function eagerly
+      // which may reduce the number of callers of other functions to one,
+      // changing inline cost thresholds.
+      if (Callee.hasLocalLinkage()) {
+        // To check this we also need to nuke any dead constant uses (perhaps
+        // made dead by this operation on other functions).
+        Callee.removeDeadConstantUsers();
+        if (Callee.use_empty()) {
+          // Clear the body and queue the function itself for deletion when we
+          // finish inlining and call graph updates.
+          // Note that after this point, it is an error to do anything other
+          // than use the callee's address or delete it.
+          Callee.dropAllReferences();
+          assert(find(DeadFunctions, &Callee) == DeadFunctions.end() &&
+                 "Cannot put cause a function to become dead twice!");
+          DeadFunctions.push_back(&Callee);
+        }
+      }
+    }
+
+    if (!DidInline)
+      continue;
+    Changed = true;
+
+    // Add all the inlined callees' edges as ref edges to the caller. These are
+    // by definition trivial edges as we always have *some* transitive ref edge
+    // chain. While in some cases these edges are direct calls inside the
+    // callee, they have to be modeled in the inliner as reference edges as
+    // there may be a reference edge anywhere along the chain from the current
+    // caller to the callee that causes the whole thing to appear like
+    // a (transitive) reference edge that will require promotion to a call edge
+    // below.
+    for (Function *InlinedCallee : InlinedCallees) {
+      LazyCallGraph::Node &CalleeN = *CG.lookup(*InlinedCallee);
+      for (LazyCallGraph::Edge &E : CalleeN)
+        RC->insertTrivialRefEdge(N, *E.getNode());
+    }
+    InlinedCallees.clear();
+
+    // At this point, since we have made changes we have at least removed
+    // a call instruction. However, in the process we do some incremental
+    // simplification of the surrounding code. This simplification can
+    // essentially do all of the same things as a function pass and we can
+    // re-use the exact same logic for updating the call graph to reflect the
+    // change..
+    C = &updateCGAndAnalysisManagerForFunctionPass(CG, *C, N, AM, UR);
+    RC = &C->getOuterRefSCC();
+  } while (!Nodes.empty());
+
+  // Now that we've finished inlining all of the calls across this SCC, delete
+  // all of the trivially dead functions, updating the call graph and the CGSCC
+  // pass manager in the process.
+  //
+  // Note that this walks a pointer set which has non-deterministic order but
+  // that is OK as all we do is delete things and add pointers to unordered
+  // sets.
+  for (Function *DeadF : DeadFunctions) {
+    // Get the necessary information out of the call graph and nuke the
+    // function there.
+    auto &DeadC = *CG.lookupSCC(*CG.lookup(*DeadF));
+    auto &DeadRC = DeadC.getOuterRefSCC();
+    CG.removeDeadFunction(*DeadF);
+
+    // Mark the relevant parts of the call graph as invalid so we don't visit
+    // them.
+    UR.InvalidatedSCCs.insert(&DeadC);
+    UR.InvalidatedRefSCCs.insert(&DeadRC);
+
+    // And delete the actual function from the module.
+    M.getFunctionList().erase(DeadF);
+  }
+  return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
index 8c5c6f77077c..26db1465bb26 100644
--- a/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/Internalize.cpp
@@ -239,7 +239,7 @@ bool InternalizePass::internalizeModule(Module &M, CallGraph *CG) {
 
 InternalizePass::InternalizePass() : MustPreserveGV(PreserveAPIList()) {}
 
-PreservedAnalyses InternalizePass::run(Module &M, AnalysisManager<Module> &AM) {
+PreservedAnalyses InternalizePass::run(Module &M, ModuleAnalysisManager &AM) {
   if (!internalizeModule(M, AM.getCachedResult<CallGraphAnalysis>(M)))
     return PreservedAnalyses::all();
 
diff --git a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index 36089f0a8801..2948878cffc4 100644
--- a/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -13,8 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/IPO/LowerTypeTests.h"
-#include "llvm/Transforms/IPO.h"
 #include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/Constant.h"
@@ -23,14 +23,18 @@
 #include "llvm/IR/GlobalObject.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/TrailingObjects.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/ModuleUtils.h"
 
 using namespace llvm;
 using namespace lowertypetests;
@@ -79,8 +83,7 @@ bool BitSetInfo::containsValue(
     if (!Result)
       return false;
     COffset += APOffset.getZExtValue();
-    return containsValue(DL, GlobalLayout, GEP->getPointerOperand(),
-                         COffset);
+    return containsValue(DL, GlobalLayout, GEP->getPointerOperand(), COffset);
   }
 
   if (auto Op = dyn_cast<Operator>(V)) {
@@ -204,56 +207,115 @@ struct ByteArrayInfo {
   Constant *Mask;
 };
 
-struct LowerTypeTests : public ModulePass {
-  static char ID;
-  LowerTypeTests() : ModulePass(ID) {
-    initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
+/// A POD-like structure that we use to store a global reference together with
+/// its metadata types. In this pass we frequently need to query the set of
+/// metadata types referenced by a global, which at the IR level is an expensive
+/// operation involving a map lookup; this data structure helps to reduce the
+/// number of times we need to do this lookup.
+class GlobalTypeMember final : TrailingObjects<GlobalTypeMember, MDNode *> {
+  GlobalObject *GO;
+  size_t NTypes;
+
+  friend TrailingObjects;
+  size_t numTrailingObjects(OverloadToken<MDNode *>) const { return NTypes; }
+
+public:
+  static GlobalTypeMember *create(BumpPtrAllocator &Alloc, GlobalObject *GO,
+                                  ArrayRef<MDNode *> Types) {
+    auto *GTM = static_cast<GlobalTypeMember *>(Alloc.Allocate(
+        totalSizeToAlloc<MDNode *>(Types.size()), alignof(GlobalTypeMember)));
+    GTM->GO = GO;
+    GTM->NTypes = Types.size();
+    std::uninitialized_copy(Types.begin(), Types.end(),
+                            GTM->getTrailingObjects<MDNode *>());
+    return GTM;
   }
+  GlobalObject *getGlobal() const {
+    return GO;
+  }
+  ArrayRef<MDNode *> types() const {
+    return makeArrayRef(getTrailingObjects<MDNode *>(), NTypes);
+  }
+};
 
-  Module *M;
+class LowerTypeTestsModule {
+  Module &M;
 
   bool LinkerSubsectionsViaSymbols;
   Triple::ArchType Arch;
+  Triple::OSType OS;
   Triple::ObjectFormatType ObjectFormat;
-  IntegerType *Int1Ty;
-  IntegerType *Int8Ty;
-  IntegerType *Int32Ty;
-  Type *Int32PtrTy;
-  IntegerType *Int64Ty;
-  IntegerType *IntPtrTy;
+
+  IntegerType *Int1Ty = Type::getInt1Ty(M.getContext());
+  IntegerType *Int8Ty = Type::getInt8Ty(M.getContext());
+  IntegerType *Int32Ty = Type::getInt32Ty(M.getContext());
+  PointerType *Int32PtrTy = PointerType::getUnqual(Int32Ty);
+  IntegerType *Int64Ty = Type::getInt64Ty(M.getContext());
+  IntegerType *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext(), 0);
+
+  // Indirect function call index assignment counter for WebAssembly
+  uint64_t IndirectIndex = 1;
 
   // Mapping from type identifiers to the call sites that test them.
   DenseMap<Metadata *, std::vector<CallInst *>> TypeTestCallSites;
 
   std::vector<ByteArrayInfo> ByteArrayInfos;
 
+  Function *WeakInitializerFn = nullptr;
+
   BitSetInfo
   buildBitSet(Metadata *TypeId,
-              const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+              const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
   ByteArrayInfo *createByteArray(BitSetInfo &BSI);
   void allocateByteArrays();
   Value *createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI, ByteArrayInfo *&BAI,
                           Value *BitOffset);
-  void
-  lowerTypeTestCalls(ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
-                     const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
+  void lowerTypeTestCalls(
+      ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
+      const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout);
   Value *
   lowerBitSetCall(CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
                   Constant *CombinedGlobal,
                   const DenseMap<GlobalObject *, uint64_t> &GlobalLayout);
   void buildBitSetsFromGlobalVariables(ArrayRef<Metadata *> TypeIds,
-                                       ArrayRef<GlobalVariable *> Globals);
+                                       ArrayRef<GlobalTypeMember *> Globals);
   unsigned getJumpTableEntrySize();
   Type *getJumpTableEntryType();
-  Constant *createJumpTableEntry(GlobalObject *Src, Function *Dest,
-                                 unsigned Distance);
+  void createJumpTableEntry(raw_ostream &AsmOS, raw_ostream &ConstraintOS,
+                            SmallVectorImpl<Value *> &AsmArgs, Function *Dest);
   void verifyTypeMDNode(GlobalObject *GO, MDNode *Type);
   void buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
-                                 ArrayRef<Function *> Functions);
+                                 ArrayRef<GlobalTypeMember *> Functions);
+  void buildBitSetsFromFunctionsNative(ArrayRef<Metadata *> TypeIds,
+                                    ArrayRef<GlobalTypeMember *> Functions);
+  void buildBitSetsFromFunctionsWASM(ArrayRef<Metadata *> TypeIds,
+                                     ArrayRef<GlobalTypeMember *> Functions);
   void buildBitSetsFromDisjointSet(ArrayRef<Metadata *> TypeIds,
-                                   ArrayRef<GlobalObject *> Globals);
+                                   ArrayRef<GlobalTypeMember *> Globals);
+
+  void replaceWeakDeclarationWithJumpTablePtr(Function *F, Constant *JT);
+  void moveInitializerToModuleConstructor(GlobalVariable *GV);
+  void findGlobalVariableUsersOf(Constant *C,
+                                 SmallSetVector<GlobalVariable *, 8> &Out);
+
+  void createJumpTable(Function *F, ArrayRef<GlobalTypeMember *> Functions);
+
+public:
+  LowerTypeTestsModule(Module &M);
   bool lower();
-  bool runOnModule(Module &M) override;
+};
+
+struct LowerTypeTests : public ModulePass {
+  static char ID;
+  LowerTypeTests() : ModulePass(ID) {
+    initializeLowerTypeTestsPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnModule(Module &M) override {
+    if (skipModule(M))
+      return false;
+    return LowerTypeTestsModule(M).lower();
+  }
 };
 
 } // anonymous namespace
@@ -266,23 +328,21 @@ ModulePass *llvm::createLowerTypeTestsPass() { return new LowerTypeTests; }
 
 /// Build a bit set for TypeId using the object layouts in
 /// GlobalLayout.
-BitSetInfo LowerTypeTests::buildBitSet(
+BitSetInfo LowerTypeTestsModule::buildBitSet(
     Metadata *TypeId,
-    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+    const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
   BitSetBuilder BSB;
 
   // Compute the byte offset of each address associated with this type
   // identifier.
-  SmallVector<MDNode *, 2> Types;
   for (auto &GlobalAndOffset : GlobalLayout) {
-    Types.clear();
-    GlobalAndOffset.first->getMetadata(LLVMContext::MD_type, Types);
-    for (MDNode *Type : Types) {
+    for (MDNode *Type : GlobalAndOffset.first->types()) {
       if (Type->getOperand(1) != TypeId)
         continue;
       uint64_t Offset =
-          cast<ConstantInt>(cast<ConstantAsMetadata>(Type->getOperand(0))
-                                ->getValue())->getZExtValue();
+          cast<ConstantInt>(
+              cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
+              ->getZExtValue();
       BSB.addOffset(GlobalAndOffset.second + Offset);
     }
   }
@@ -305,14 +365,14 @@ static Value *createMaskedBitTest(IRBuilder<> &B, Value *Bits,
   return B.CreateICmpNE(MaskedBits, ConstantInt::get(BitsType, 0));
 }
 
-ByteArrayInfo *LowerTypeTests::createByteArray(BitSetInfo &BSI) {
+ByteArrayInfo *LowerTypeTestsModule::createByteArray(BitSetInfo &BSI) {
   // Create globals to stand in for byte arrays and masks. These never actually
   // get initialized, we RAUW and erase them later in allocateByteArrays() once
   // we know the offset and mask to use.
   auto ByteArrayGlobal = new GlobalVariable(
-      *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
-  auto MaskGlobal = new GlobalVariable(
-      *M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+      M, Int8Ty, /*isConstant=*/true, GlobalValue::PrivateLinkage, nullptr);
+  auto MaskGlobal = new GlobalVariable(M, Int8Ty, /*isConstant=*/true,
+                                       GlobalValue::PrivateLinkage, nullptr);
 
   ByteArrayInfos.emplace_back();
   ByteArrayInfo *BAI = &ByteArrayInfos.back();
@@ -324,7 +384,7 @@ ByteArrayInfo *LowerTypeTests::createByteArray(BitSetInfo &BSI) {
   return BAI;
 }
 
-void LowerTypeTests::allocateByteArrays() {
+void LowerTypeTestsModule::allocateByteArrays() {
   std::stable_sort(ByteArrayInfos.begin(), ByteArrayInfos.end(),
                    [](const ByteArrayInfo &BAI1, const ByteArrayInfo &BAI2) {
                      return BAI1.BitSize > BAI2.BitSize;
@@ -343,9 +403,9 @@ void LowerTypeTests::allocateByteArrays() {
     cast<GlobalVariable>(BAI->Mask->getOperand(0))->eraseFromParent();
   }
 
-  Constant *ByteArrayConst = ConstantDataArray::get(M->getContext(), BAB.Bytes);
+  Constant *ByteArrayConst = ConstantDataArray::get(M.getContext(), BAB.Bytes);
   auto ByteArray =
-      new GlobalVariable(*M, ByteArrayConst->getType(), /*isConstant=*/true,
+      new GlobalVariable(M, ByteArrayConst->getType(), /*isConstant=*/true,
                          GlobalValue::PrivateLinkage, ByteArrayConst);
 
   for (unsigned I = 0; I != ByteArrayInfos.size(); ++I) {
@@ -363,7 +423,7 @@ void LowerTypeTests::allocateByteArrays() {
       BAI->ByteArray->replaceAllUsesWith(GEP);
     } else {
       GlobalAlias *Alias = GlobalAlias::create(
-          Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, M);
+          Int8Ty, 0, GlobalValue::PrivateLinkage, "bits", GEP, &M);
       BAI->ByteArray->replaceAllUsesWith(Alias);
     }
     BAI->ByteArray->eraseFromParent();
@@ -377,8 +437,9 @@ void LowerTypeTests::allocateByteArrays() {
 
 /// Build a test that bit BitOffset is set in BSI, where
 /// BitSetGlobal is a global containing the bits in BSI.
-Value *LowerTypeTests::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
-                                        ByteArrayInfo *&BAI, Value *BitOffset) {
+Value *LowerTypeTestsModule::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
+                                              ByteArrayInfo *&BAI,
+                                              Value *BitOffset) {
   if (BSI.BitSize <= 64) {
     // If the bit set is sufficiently small, we can avoid a load by bit testing
     // a constant.
@@ -407,7 +468,7 @@ Value *LowerTypeTests::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
       // improving the security of the CFI mechanism based on this pass.
       ByteArray = GlobalAlias::create(BAI->ByteArray->getValueType(), 0,
                                       GlobalValue::PrivateLinkage, "bits_use",
-                                      ByteArray, M);
+                                      ByteArray, &M);
     }
 
     Value *ByteAddr = B.CreateGEP(Ty, ByteArray, BitOffset);
@@ -420,15 +481,15 @@ Value *LowerTypeTests::createBitSetTest(IRBuilder<> &B, BitSetInfo &BSI,
 
 /// Lower a llvm.type.test call to its implementation. Returns the value to
 /// replace the call with.
-Value *LowerTypeTests::lowerBitSetCall(
+Value *LowerTypeTestsModule::lowerBitSetCall(
     CallInst *CI, BitSetInfo &BSI, ByteArrayInfo *&BAI,
     Constant *CombinedGlobalIntAddr,
     const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
   Value *Ptr = CI->getArgOperand(0);
-  const DataLayout &DL = M->getDataLayout();
+  const DataLayout &DL = M.getDataLayout();
 
   if (BSI.containsValue(DL, GlobalLayout, Ptr))
-    return ConstantInt::getTrue(M->getContext());
+    return ConstantInt::getTrue(M.getContext());
 
   Constant *OffsetedGlobalAsInt = ConstantExpr::getAdd(
       CombinedGlobalIntAddr, ConstantInt::get(IntPtrTy, BSI.ByteOffset));
@@ -490,17 +551,18 @@ Value *LowerTypeTests::lowerBitSetCall(
 
 /// Given a disjoint set of type identifiers and globals, lay out the globals,
 /// build the bit sets and lower the llvm.type.test calls.
-void LowerTypeTests::buildBitSetsFromGlobalVariables(
-    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalVariable *> Globals) {
+void LowerTypeTestsModule::buildBitSetsFromGlobalVariables(
+    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
   // Build a new global with the combined contents of the referenced globals.
   // This global is a struct whose even-indexed elements contain the original
   // contents of the referenced globals and whose odd-indexed elements contain
   // any padding required to align the next element to the next power of 2.
   std::vector<Constant *> GlobalInits;
-  const DataLayout &DL = M->getDataLayout();
-  for (GlobalVariable *G : Globals) {
-    GlobalInits.push_back(G->getInitializer());
-    uint64_t InitSize = DL.getTypeAllocSize(G->getValueType());
+  const DataLayout &DL = M.getDataLayout();
+  for (GlobalTypeMember *G : Globals) {
+    GlobalVariable *GV = cast<GlobalVariable>(G->getGlobal());
+    GlobalInits.push_back(GV->getInitializer());
+    uint64_t InitSize = DL.getTypeAllocSize(GV->getValueType());
 
     // Compute the amount of padding required.
     uint64_t Padding = NextPowerOf2(InitSize - 1) - InitSize;
@@ -515,16 +577,16 @@ void LowerTypeTests::buildBitSetsFromGlobalVariables(
   }
   if (!GlobalInits.empty())
     GlobalInits.pop_back();
-  Constant *NewInit = ConstantStruct::getAnon(M->getContext(), GlobalInits);
+  Constant *NewInit = ConstantStruct::getAnon(M.getContext(), GlobalInits);
   auto *CombinedGlobal =
-      new GlobalVariable(*M, NewInit->getType(), /*isConstant=*/true,
+      new GlobalVariable(M, NewInit->getType(), /*isConstant=*/true,
                          GlobalValue::PrivateLinkage, NewInit);
 
   StructType *NewTy = cast<StructType>(NewInit->getType());
   const StructLayout *CombinedGlobalLayout = DL.getStructLayout(NewTy);
 
   // Compute the offsets of the original globals within the new global.
-  DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+  DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
   for (unsigned I = 0; I != Globals.size(); ++I)
     // Multiply by 2 to account for padding elements.
     GlobalLayout[Globals[I]] = CombinedGlobalLayout->getElementOffset(I * 2);
@@ -535,31 +597,36 @@ void LowerTypeTests::buildBitSetsFromGlobalVariables(
   // global from which we built the combined global, and replace references
   // to the original globals with references to the aliases.
   for (unsigned I = 0; I != Globals.size(); ++I) {
+    GlobalVariable *GV = cast<GlobalVariable>(Globals[I]->getGlobal());
+
     // Multiply by 2 to account for padding elements.
     Constant *CombinedGlobalIdxs[] = {ConstantInt::get(Int32Ty, 0),
                                       ConstantInt::get(Int32Ty, I * 2)};
     Constant *CombinedGlobalElemPtr = ConstantExpr::getGetElementPtr(
         NewInit->getType(), CombinedGlobal, CombinedGlobalIdxs);
     if (LinkerSubsectionsViaSymbols) {
-      Globals[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+      GV->replaceAllUsesWith(CombinedGlobalElemPtr);
     } else {
-      assert(Globals[I]->getType()->getAddressSpace() == 0);
+      assert(GV->getType()->getAddressSpace() == 0);
       GlobalAlias *GAlias = GlobalAlias::create(NewTy->getElementType(I * 2), 0,
-                                                Globals[I]->getLinkage(), "",
-                                                CombinedGlobalElemPtr, M);
-      GAlias->setVisibility(Globals[I]->getVisibility());
-      GAlias->takeName(Globals[I]);
-      Globals[I]->replaceAllUsesWith(GAlias);
+                                                GV->getLinkage(), "",
+                                                CombinedGlobalElemPtr, &M);
+      GAlias->setVisibility(GV->getVisibility());
+      GAlias->takeName(GV);
+      GV->replaceAllUsesWith(GAlias);
     }
-    Globals[I]->eraseFromParent();
+    GV->eraseFromParent();
   }
 }
 
-void LowerTypeTests::lowerTypeTestCalls(
+void LowerTypeTestsModule::lowerTypeTestCalls(
     ArrayRef<Metadata *> TypeIds, Constant *CombinedGlobalAddr,
-    const DenseMap<GlobalObject *, uint64_t> &GlobalLayout) {
+    const DenseMap<GlobalTypeMember *, uint64_t> &GlobalLayout) {
   Constant *CombinedGlobalIntAddr =
       ConstantExpr::getPtrToInt(CombinedGlobalAddr, IntPtrTy);
+  DenseMap<GlobalObject *, uint64_t> GlobalObjLayout;
+  for (auto &P : GlobalLayout)
+    GlobalObjLayout[P.first->getGlobal()] = P.second;
 
   // For each type identifier in this disjoint set...
   for (Metadata *TypeId : TypeIds) {
@@ -579,17 +646,16 @@ void LowerTypeTests::lowerTypeTestCalls(
     for (CallInst *CI : TypeTestCallSites[TypeId]) {
       ++NumTypeTestCallsLowered;
       Value *Lowered =
-          lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalLayout);
+          lowerBitSetCall(CI, BSI, BAI, CombinedGlobalIntAddr, GlobalObjLayout);
       CI->replaceAllUsesWith(Lowered);
       CI->eraseFromParent();
     }
   }
 }
 
-void LowerTypeTests::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
+void LowerTypeTestsModule::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
   if (Type->getNumOperands() != 2)
-    report_fatal_error(
-        "All operands of type metadata must have 2 elements");
+    report_fatal_error("All operands of type metadata must have 2 elements");
 
   if (GO->isThreadLocal())
     report_fatal_error("Bit set element may not be thread-local");
@@ -610,60 +676,172 @@ void LowerTypeTests::verifyTypeMDNode(GlobalObject *GO, MDNode *Type) {
 }
 
 static const unsigned kX86JumpTableEntrySize = 8;
+static const unsigned kARMJumpTableEntrySize = 4;
+
+unsigned LowerTypeTestsModule::getJumpTableEntrySize() {
+  switch (Arch) {
+    case Triple::x86:
+    case Triple::x86_64:
+      return kX86JumpTableEntrySize;
+    case Triple::arm:
+    case Triple::thumb:
+    case Triple::aarch64:
+      return kARMJumpTableEntrySize;
+    default:
+      report_fatal_error("Unsupported architecture for jump tables");
+  }
+}
 
-unsigned LowerTypeTests::getJumpTableEntrySize() {
-  if (Arch != Triple::x86 && Arch != Triple::x86_64)
+// Create a jump table entry for the target. This consists of an instruction
+// sequence containing a relative branch to Dest. Appends inline asm text,
+// constraints and arguments to AsmOS, ConstraintOS and AsmArgs.
+void LowerTypeTestsModule::createJumpTableEntry(
+    raw_ostream &AsmOS, raw_ostream &ConstraintOS,
+    SmallVectorImpl<Value *> &AsmArgs, Function *Dest) {
+  unsigned ArgIndex = AsmArgs.size();
+
+  if (Arch == Triple::x86 || Arch == Triple::x86_64) {
+    AsmOS << "jmp ${" << ArgIndex << ":c}@plt\n";
+    AsmOS << "int3\nint3\nint3\n";
+  } else if (Arch == Triple::arm || Arch == Triple::aarch64) {
+    AsmOS << "b $" << ArgIndex << "\n";
+  } else if (Arch == Triple::thumb) {
+    AsmOS << "b.w $" << ArgIndex << "\n";
+  } else {
     report_fatal_error("Unsupported architecture for jump tables");
+  }
 
-  return kX86JumpTableEntrySize;
+  ConstraintOS << (ArgIndex > 0 ? ",s" : "s");
+  AsmArgs.push_back(Dest);
 }
 
-// Create a constant representing a jump table entry for the target. This
-// consists of an instruction sequence containing a relative branch to Dest. The
-// constant will be laid out at address Src+(Len*Distance) where Len is the
-// target-specific jump table entry size.
-Constant *LowerTypeTests::createJumpTableEntry(GlobalObject *Src,
-                                               Function *Dest,
-                                               unsigned Distance) {
-  if (Arch != Triple::x86 && Arch != Triple::x86_64)
-    report_fatal_error("Unsupported architecture for jump tables");
+Type *LowerTypeTestsModule::getJumpTableEntryType() {
+  return ArrayType::get(Int8Ty, getJumpTableEntrySize());
+}
 
-  const unsigned kJmpPCRel32Code = 0xe9;
-  const unsigned kInt3Code = 0xcc;
+/// Given a disjoint set of type identifiers and functions, build the bit sets
+/// and lower the llvm.type.test calls, architecture dependently.
+void LowerTypeTestsModule::buildBitSetsFromFunctions(
+    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
+  if (Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm ||
+      Arch == Triple::thumb || Arch == Triple::aarch64)
+    buildBitSetsFromFunctionsNative(TypeIds, Functions);
+  else if (Arch == Triple::wasm32 || Arch == Triple::wasm64)
+    buildBitSetsFromFunctionsWASM(TypeIds, Functions);
+  else
+    report_fatal_error("Unsupported architecture for jump tables");
+}
 
-  ConstantInt *Jmp = ConstantInt::get(Int8Ty, kJmpPCRel32Code);
+void LowerTypeTestsModule::moveInitializerToModuleConstructor(
+    GlobalVariable *GV) {
+  if (WeakInitializerFn == nullptr) {
+    WeakInitializerFn = Function::Create(
+        FunctionType::get(Type::getVoidTy(M.getContext()),
+                          /* IsVarArg */ false),
+        GlobalValue::InternalLinkage, "__cfi_global_var_init", &M);
+    BasicBlock *BB =
+        BasicBlock::Create(M.getContext(), "entry", WeakInitializerFn);
+    ReturnInst::Create(M.getContext(), BB);
+    WeakInitializerFn->setSection(
+        ObjectFormat == Triple::MachO
+            ? "__TEXT,__StaticInit,regular,pure_instructions"
+            : ".text.startup");
+    // This code is equivalent to relocation application, and should run at the
+    // earliest possible time (i.e. with the highest priority).
+    appendToGlobalCtors(M, WeakInitializerFn, /* Priority */ 0);
+  }
 
-  // Build a constant representing the displacement between the constant's
-  // address and Dest. This will resolve to a PC32 relocation referring to Dest.
-  Constant *DestInt = ConstantExpr::getPtrToInt(Dest, IntPtrTy);
-  Constant *SrcInt = ConstantExpr::getPtrToInt(Src, IntPtrTy);
-  Constant *Disp = ConstantExpr::getSub(DestInt, SrcInt);
-  ConstantInt *DispOffset =
-      ConstantInt::get(IntPtrTy, Distance * kX86JumpTableEntrySize + 5);
-  Constant *OffsetedDisp = ConstantExpr::getSub(Disp, DispOffset);
-  OffsetedDisp = ConstantExpr::getTruncOrBitCast(OffsetedDisp, Int32Ty);
+  IRBuilder<> IRB(WeakInitializerFn->getEntryBlock().getTerminator());
+  GV->setConstant(false);
+  IRB.CreateAlignedStore(GV->getInitializer(), GV, GV->getAlignment());
+  GV->setInitializer(Constant::getNullValue(GV->getValueType()));
+}
 
-  ConstantInt *Int3 = ConstantInt::get(Int8Ty, kInt3Code);
+void LowerTypeTestsModule::findGlobalVariableUsersOf(
+    Constant *C, SmallSetVector<GlobalVariable *, 8> &Out) {
+  for (auto *U : C->users()){
+    if (auto *GV = dyn_cast<GlobalVariable>(U))
+      Out.insert(GV);
+    else if (auto *C2 = dyn_cast<Constant>(U))
+      findGlobalVariableUsersOf(C2, Out);
+  }
+}
 
-  Constant *Fields[] = {
-      Jmp, OffsetedDisp, Int3, Int3, Int3,
-  };
-  return ConstantStruct::getAnon(Fields, /*Packed=*/true);
+// Replace all uses of F with (F ? JT : 0).
+void LowerTypeTestsModule::replaceWeakDeclarationWithJumpTablePtr(
+    Function *F, Constant *JT) {
+  // The target expression can not appear in a constant initializer on most
+  // (all?) targets. Switch to a runtime initializer.
+  SmallSetVector<GlobalVariable *, 8> GlobalVarUsers;
+  findGlobalVariableUsersOf(F, GlobalVarUsers);
+  for (auto GV : GlobalVarUsers)
+    moveInitializerToModuleConstructor(GV);
+
+  // Can not RAUW F with an expression that uses F. Replace with a temporary
+  // placeholder first.
+  Function *PlaceholderFn =
+      Function::Create(cast<FunctionType>(F->getValueType()),
+                       GlobalValue::ExternalWeakLinkage, "", &M);
+  F->replaceAllUsesWith(PlaceholderFn);
+
+  Constant *Target = ConstantExpr::getSelect(
+      ConstantExpr::getICmp(CmpInst::ICMP_NE, F,
+                            Constant::getNullValue(F->getType())),
+      JT, Constant::getNullValue(F->getType()));
+  PlaceholderFn->replaceAllUsesWith(Target);
+  PlaceholderFn->eraseFromParent();
 }
 
-Type *LowerTypeTests::getJumpTableEntryType() {
-  if (Arch != Triple::x86 && Arch != Triple::x86_64)
-    report_fatal_error("Unsupported architecture for jump tables");
+void LowerTypeTestsModule::createJumpTable(
+    Function *F, ArrayRef<GlobalTypeMember *> Functions) {
+  std::string AsmStr, ConstraintStr;
+  raw_string_ostream AsmOS(AsmStr), ConstraintOS(ConstraintStr);
+  SmallVector<Value *, 16> AsmArgs;
+  AsmArgs.reserve(Functions.size() * 2);
 
-  return StructType::get(M->getContext(),
-                         {Int8Ty, Int32Ty, Int8Ty, Int8Ty, Int8Ty},
-                         /*Packed=*/true);
+  for (unsigned I = 0; I != Functions.size(); ++I)
+    createJumpTableEntry(AsmOS, ConstraintOS, AsmArgs,
+                         cast<Function>(Functions[I]->getGlobal()));
+
+  // Try to emit the jump table at the end of the text segment.
+  // Jump table must come after __cfi_check in the cross-dso mode.
+  // FIXME: this magic section name seems to do the trick.
+  F->setSection(ObjectFormat == Triple::MachO
+                    ? "__TEXT,__text,regular,pure_instructions"
+                    : ".text.cfi");
+  // Align the whole table by entry size.
+  F->setAlignment(getJumpTableEntrySize());
+  // Skip prologue.
+  // Disabled on win32 due to https://llvm.org/bugs/show_bug.cgi?id=28641#c3.
+  // Luckily, this function does not get any prologue even without the
+  // attribute.
+  if (OS != Triple::Win32)
+    F->addFnAttr(llvm::Attribute::Naked);
+  // Thumb jump table assembly needs Thumb2. The following attribute is added by
+  // Clang for -march=armv7.
+  if (Arch == Triple::thumb)
+    F->addFnAttr("target-cpu", "cortex-a8");
+
+  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
+  IRBuilder<> IRB(BB);
+
+  SmallVector<Type *, 16> ArgTypes;
+  ArgTypes.reserve(AsmArgs.size());
+  for (const auto &Arg : AsmArgs)
+    ArgTypes.push_back(Arg->getType());
+  InlineAsm *JumpTableAsm =
+      InlineAsm::get(FunctionType::get(IRB.getVoidTy(), ArgTypes, false),
+                     AsmOS.str(), ConstraintOS.str(),
+                     /*hasSideEffects=*/true);
+
+  IRB.CreateCall(JumpTableAsm, AsmArgs);
+  IRB.CreateUnreachable();
 }
 
 /// Given a disjoint set of type identifiers and functions, build a jump table
 /// for the functions, build the bit sets and lower the llvm.type.test calls.
-void LowerTypeTests::buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
-                                               ArrayRef<Function *> Functions) {
+void LowerTypeTestsModule::buildBitSetsFromFunctionsNative(
+    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
   // Unlike the global bitset builder, the function bitset builder cannot
   // re-arrange functions in a particular order and base its calculations on the
   // layout of the functions' entry points, as we have no idea how large a
@@ -697,39 +875,35 @@ void LowerTypeTests::buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
   // mov h, %ecx
   // ret
   //
-  // To create a jump table for these functions, we instruct the LLVM code
-  // generator to output a jump table in the .text section. This is done by
-  // representing the instructions in the jump table as an LLVM constant and
-  // placing them in a global variable in the .text section. The end result will
-  // (conceptually) look like this:
+  // We output the jump table as module-level inline asm string. The end result
+  // will (conceptually) look like this:
   //
-  // f:
-  // jmp .Ltmp0 ; 5 bytes
+  // f = .cfi.jumptable
+  // g = .cfi.jumptable + 4
+  // h = .cfi.jumptable + 8
+  // .cfi.jumptable:
+  // jmp f.cfi  ; 5 bytes
   // int3       ; 1 byte
   // int3       ; 1 byte
   // int3       ; 1 byte
-  //
-  // g:
-  // jmp .Ltmp1 ; 5 bytes
+  // jmp g.cfi  ; 5 bytes
   // int3       ; 1 byte
   // int3       ; 1 byte
   // int3       ; 1 byte
-  //
-  // h:
-  // jmp .Ltmp2 ; 5 bytes
+  // jmp h.cfi  ; 5 bytes
   // int3       ; 1 byte
   // int3       ; 1 byte
   // int3       ; 1 byte
   //
-  // .Ltmp0:
+  // f.cfi:
   // mov 0, %eax
   // ret
   //
-  // .Ltmp1:
+  // g.cfi:
   // mov 1, %eax
   // ret
   //
-  // .Ltmp2:
+  // h.cfi:
   // mov 2, %eax
   // ret
   //
@@ -743,60 +917,101 @@ void LowerTypeTests::buildBitSetsFromFunctions(ArrayRef<Metadata *> TypeIds,
   // normal case the check can be carried out using the same kind of simple
   // arithmetic that we normally use for globals.
 
+  // FIXME: find a better way to represent the jumptable in the IR.
+
   assert(!Functions.empty());
 
   // Build a simple layout based on the regular layout of jump tables.
-  DenseMap<GlobalObject *, uint64_t> GlobalLayout;
+  DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
   unsigned EntrySize = getJumpTableEntrySize();
   for (unsigned I = 0; I != Functions.size(); ++I)
     GlobalLayout[Functions[I]] = I * EntrySize;
 
-  // Create a constant to hold the jump table.
+  Function *JumpTableFn =
+      Function::Create(FunctionType::get(Type::getVoidTy(M.getContext()),
+                                         /* IsVarArg */ false),
+                       GlobalValue::PrivateLinkage, ".cfi.jumptable", &M);
   ArrayType *JumpTableType =
       ArrayType::get(getJumpTableEntryType(), Functions.size());
-  auto JumpTable = new GlobalVariable(*M, JumpTableType,
-                                      /*isConstant=*/true,
-                                      GlobalValue::PrivateLinkage, nullptr);
-  JumpTable->setSection(ObjectFormat == Triple::MachO
-                            ? "__TEXT,__text,regular,pure_instructions"
-                            : ".text");
+  auto JumpTable =
+      ConstantExpr::getPointerCast(JumpTableFn, JumpTableType->getPointerTo(0));
+
   lowerTypeTestCalls(TypeIds, JumpTable, GlobalLayout);
 
   // Build aliases pointing to offsets into the jump table, and replace
   // references to the original functions with references to the aliases.
   for (unsigned I = 0; I != Functions.size(); ++I) {
+    Function *F = cast<Function>(Functions[I]->getGlobal());
+
     Constant *CombinedGlobalElemPtr = ConstantExpr::getBitCast(
-        ConstantExpr::getGetElementPtr(
+        ConstantExpr::getInBoundsGetElementPtr(
             JumpTableType, JumpTable,
             ArrayRef<Constant *>{ConstantInt::get(IntPtrTy, 0),
                                  ConstantInt::get(IntPtrTy, I)}),
-        Functions[I]->getType());
-    if (LinkerSubsectionsViaSymbols || Functions[I]->isDeclarationForLinker()) {
-      Functions[I]->replaceAllUsesWith(CombinedGlobalElemPtr);
+        F->getType());
+    if (LinkerSubsectionsViaSymbols || F->isDeclarationForLinker()) {
+
+      if (F->isWeakForLinker())
+        replaceWeakDeclarationWithJumpTablePtr(F, CombinedGlobalElemPtr);
+      else
+        F->replaceAllUsesWith(CombinedGlobalElemPtr);
     } else {
-      assert(Functions[I]->getType()->getAddressSpace() == 0);
-      GlobalAlias *GAlias = GlobalAlias::create(Functions[I]->getValueType(), 0,
-                                                Functions[I]->getLinkage(), "",
-                                                CombinedGlobalElemPtr, M);
-      GAlias->setVisibility(Functions[I]->getVisibility());
-      GAlias->takeName(Functions[I]);
-      Functions[I]->replaceAllUsesWith(GAlias);
+      assert(F->getType()->getAddressSpace() == 0);
+
+      GlobalAlias *FAlias = GlobalAlias::create(F->getValueType(), 0,
+                                                F->getLinkage(), "",
+                                                CombinedGlobalElemPtr, &M);
+      FAlias->setVisibility(F->getVisibility());
+      FAlias->takeName(F);
+      if (FAlias->hasName())
+        F->setName(FAlias->getName() + ".cfi");
+      F->replaceAllUsesWith(FAlias);
     }
-    if (!Functions[I]->isDeclarationForLinker())
-      Functions[I]->setLinkage(GlobalValue::PrivateLinkage);
+    if (!F->isDeclarationForLinker())
+      F->setLinkage(GlobalValue::InternalLinkage);
   }
 
-  // Build and set the jump table's initializer.
-  std::vector<Constant *> JumpTableEntries;
-  for (unsigned I = 0; I != Functions.size(); ++I)
-    JumpTableEntries.push_back(
-        createJumpTableEntry(JumpTable, Functions[I], I));
-  JumpTable->setInitializer(
-      ConstantArray::get(JumpTableType, JumpTableEntries));
+  createJumpTable(JumpTableFn, Functions);
+}
+
+/// Assign a dummy layout using an incrementing counter, tag each function
+/// with its index represented as metadata, and lower each type test to an
+/// integer range comparison. During generation of the indirect function call
+/// table in the backend, it will assign the given indexes.
+/// Note: Dynamic linking is not supported, as the WebAssembly ABI has not yet
+/// been finalized.
+void LowerTypeTestsModule::buildBitSetsFromFunctionsWASM(
+    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Functions) {
+  assert(!Functions.empty());
+
+  // Build consecutive monotonic integer ranges for each call target set
+  DenseMap<GlobalTypeMember *, uint64_t> GlobalLayout;
+
+  for (GlobalTypeMember *GTM : Functions) {
+    Function *F = cast<Function>(GTM->getGlobal());
+
+    // Skip functions that are not address taken, to avoid bloating the table
+    if (!F->hasAddressTaken())
+      continue;
+
+    // Store metadata with the index for each function
+    MDNode *MD = MDNode::get(F->getContext(),
+                             ArrayRef<Metadata *>(ConstantAsMetadata::get(
+                                 ConstantInt::get(Int64Ty, IndirectIndex))));
+    F->setMetadata("wasm.index", MD);
+
+    // Assign the counter value
+    GlobalLayout[GTM] = IndirectIndex++;
+  }
+
+  // The indirect function table index space starts at zero, so pass a NULL
+  // pointer as the subtracted "jump table" offset.
+  lowerTypeTestCalls(TypeIds, ConstantPointerNull::get(Int32PtrTy),
+                     GlobalLayout);
 }
 
-void LowerTypeTests::buildBitSetsFromDisjointSet(
-    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalObject *> Globals) {
+void LowerTypeTestsModule::buildBitSetsFromDisjointSet(
+    ArrayRef<Metadata *> TypeIds, ArrayRef<GlobalTypeMember *> Globals) {
   llvm::DenseMap<Metadata *, uint64_t> TypeIdIndices;
   for (unsigned I = 0; I != TypeIds.size(); ++I)
     TypeIdIndices[TypeIds[I]] = I;
@@ -804,12 +1019,9 @@ void LowerTypeTests::buildBitSetsFromDisjointSet(
   // For each type identifier, build a set of indices that refer to members of
   // the type identifier.
   std::vector<std::set<uint64_t>> TypeMembers(TypeIds.size());
-  SmallVector<MDNode *, 2> Types;
   unsigned GlobalIndex = 0;
-  for (GlobalObject *GO : Globals) {
-    Types.clear();
-    GO->getMetadata(LLVMContext::MD_type, Types);
-    for (MDNode *Type : Types) {
+  for (GlobalTypeMember *GTM : Globals) {
+    for (MDNode *Type : GTM->types()) {
       // Type = { offset, type identifier }
       unsigned TypeIdIndex = TypeIdIndices[Type->getOperand(1)];
       TypeMembers[TypeIdIndex].insert(GlobalIndex);
@@ -833,32 +1045,32 @@ void LowerTypeTests::buildBitSetsFromDisjointSet(
     GLB.addFragment(MemSet);
 
   // Build the bitsets from this disjoint set.
-  if (Globals.empty() || isa<GlobalVariable>(Globals[0])) {
+  if (Globals.empty() || isa<GlobalVariable>(Globals[0]->getGlobal())) {
     // Build a vector of global variables with the computed layout.
-    std::vector<GlobalVariable *> OrderedGVs(Globals.size());
+    std::vector<GlobalTypeMember *> OrderedGVs(Globals.size());
     auto OGI = OrderedGVs.begin();
     for (auto &&F : GLB.Fragments) {
       for (auto &&Offset : F) {
-        auto GV = dyn_cast<GlobalVariable>(Globals[Offset]);
+        auto GV = dyn_cast<GlobalVariable>(Globals[Offset]->getGlobal());
         if (!GV)
           report_fatal_error("Type identifier may not contain both global "
                              "variables and functions");
-        *OGI++ = GV;
+        *OGI++ = Globals[Offset];
       }
     }
 
     buildBitSetsFromGlobalVariables(TypeIds, OrderedGVs);
   } else {
     // Build a vector of functions with the computed layout.
-    std::vector<Function *> OrderedFns(Globals.size());
+    std::vector<GlobalTypeMember *> OrderedFns(Globals.size());
     auto OFI = OrderedFns.begin();
     for (auto &&F : GLB.Fragments) {
       for (auto &&Offset : F) {
-        auto Fn = dyn_cast<Function>(Globals[Offset]);
+        auto Fn = dyn_cast<Function>(Globals[Offset]->getGlobal());
         if (!Fn)
           report_fatal_error("Type identifier may not contain both global "
                              "variables and functions");
-        *OFI++ = Fn;
+        *OFI++ = Globals[Offset];
       }
     }
 
@@ -867,31 +1079,54 @@ void LowerTypeTests::buildBitSetsFromDisjointSet(
 }
 
 /// Lower all type tests in this module.
-bool LowerTypeTests::lower() {
+LowerTypeTestsModule::LowerTypeTestsModule(Module &M) : M(M) {
+  Triple TargetTriple(M.getTargetTriple());
+  LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
+  Arch = TargetTriple.getArch();
+  OS = TargetTriple.getOS();
+  ObjectFormat = TargetTriple.getObjectFormat();
+}
+
+bool LowerTypeTestsModule::lower() {
   Function *TypeTestFunc =
-      M->getFunction(Intrinsic::getName(Intrinsic::type_test));
+      M.getFunction(Intrinsic::getName(Intrinsic::type_test));
   if (!TypeTestFunc || TypeTestFunc->use_empty())
     return false;
 
   // Equivalence class set containing type identifiers and the globals that
   // reference them. This is used to partition the set of type identifiers in
   // the module into disjoint sets.
-  typedef EquivalenceClasses<PointerUnion<GlobalObject *, Metadata *>>
+  typedef EquivalenceClasses<PointerUnion<GlobalTypeMember *, Metadata *>>
       GlobalClassesTy;
   GlobalClassesTy GlobalClasses;
 
-  // Verify the type metadata and build a mapping from type identifiers to their
-  // last observed index in the list of globals. This will be used later to
-  // deterministically order the list of type identifiers.
-  llvm::DenseMap<Metadata *, unsigned> TypeIdIndices;
+  // Verify the type metadata and build a few data structures to let us
+  // efficiently enumerate the type identifiers associated with a global:
+  // a list of GlobalTypeMembers (a GlobalObject stored alongside a vector
+  // of associated type metadata) and a mapping from type identifiers to their
+  // list of GlobalTypeMembers and last observed index in the list of globals.
+  // The indices will be used later to deterministically order the list of type
+  // identifiers.
+  BumpPtrAllocator Alloc;
+  struct TIInfo {
+    unsigned Index;
+    std::vector<GlobalTypeMember *> RefGlobals;
+  };
+  llvm::DenseMap<Metadata *, TIInfo> TypeIdInfo;
   unsigned I = 0;
   SmallVector<MDNode *, 2> Types;
-  for (GlobalObject &GO : M->global_objects()) {
+  for (GlobalObject &GO : M.global_objects()) {
     Types.clear();
     GO.getMetadata(LLVMContext::MD_type, Types);
+    if (Types.empty())
+      continue;
+
+    auto *GTM = GlobalTypeMember::create(Alloc, &GO, Types);
     for (MDNode *Type : Types) {
       verifyTypeMDNode(&GO, Type);
-      TypeIdIndices[cast<MDNode>(Type)->getOperand(1)] = ++I;
+      auto &Info = TypeIdInfo[cast<MDNode>(Type)->getOperand(1)];
+      Info.Index = ++I;
+      Info.RefGlobals.push_back(GTM);
     }
   }
 
@@ -900,8 +1135,7 @@ bool LowerTypeTests::lower() {
 
     auto BitSetMDVal = dyn_cast<MetadataAsValue>(CI->getArgOperand(1));
     if (!BitSetMDVal)
-      report_fatal_error(
-          "Second argument of llvm.type.test must be metadata");
+      report_fatal_error("Second argument of llvm.type.test must be metadata");
     auto BitSet = BitSetMDVal->getMetadata();
 
     // Add the call site to the list of call sites for this type identifier. We
@@ -920,14 +1154,9 @@ bool LowerTypeTests::lower() {
     GlobalClassesTy::member_iterator CurSet = GlobalClasses.findLeader(GCI);
 
     // Add the referenced globals to the type identifier's equivalence class.
-    for (GlobalObject &GO : M->global_objects()) {
-      Types.clear();
-      GO.getMetadata(LLVMContext::MD_type, Types);
-      for (MDNode *Type : Types)
-        if (Type->getOperand(1) == BitSet)
-          CurSet = GlobalClasses.unionSets(
-              CurSet, GlobalClasses.findLeader(GlobalClasses.insert(&GO)));
-    }
+    for (GlobalTypeMember *GTM : TypeIdInfo[BitSet].RefGlobals)
+      CurSet = GlobalClasses.unionSets(
+          CurSet, GlobalClasses.findLeader(GlobalClasses.insert(GTM)));
   }
 
   if (GlobalClasses.empty())
@@ -939,14 +1168,15 @@ bool LowerTypeTests::lower() {
   for (GlobalClassesTy::iterator I = GlobalClasses.begin(),
                                  E = GlobalClasses.end();
        I != E; ++I) {
-    if (!I->isLeader()) continue;
+    if (!I->isLeader())
+      continue;
     ++NumTypeIdDisjointSets;
 
     unsigned MaxIndex = 0;
     for (GlobalClassesTy::member_iterator MI = GlobalClasses.member_begin(I);
          MI != GlobalClasses.member_end(); ++MI) {
       if ((*MI).is<Metadata *>())
-        MaxIndex = std::max(MaxIndex, TypeIdIndices[MI->get<Metadata *>()]);
+        MaxIndex = std::max(MaxIndex, TypeIdInfo[MI->get<Metadata *>()].Index);
     }
     Sets.emplace_back(I, MaxIndex);
   }
@@ -960,20 +1190,20 @@ bool LowerTypeTests::lower() {
   for (const auto &S : Sets) {
     // Build the list of type identifiers in this disjoint set.
     std::vector<Metadata *> TypeIds;
-    std::vector<GlobalObject *> Globals;
+    std::vector<GlobalTypeMember *> Globals;
     for (GlobalClassesTy::member_iterator MI =
              GlobalClasses.member_begin(S.first);
          MI != GlobalClasses.member_end(); ++MI) {
       if ((*MI).is<Metadata *>())
         TypeIds.push_back(MI->get<Metadata *>());
       else
-        Globals.push_back(MI->get<GlobalObject *>());
+        Globals.push_back(MI->get<GlobalTypeMember *>());
     }
 
     // Order type identifiers by global index for determinism. This ordering is
     // stable as there is a one-to-one mapping between metadata and indices.
     std::sort(TypeIds.begin(), TypeIds.end(), [&](Metadata *M1, Metadata *M2) {
-      return TypeIdIndices[M1] < TypeIdIndices[M2];
+      return TypeIdInfo[M1].Index < TypeIdInfo[M2].Index;
     });
 
     // Build bitsets for this disjoint set.
@@ -985,35 +1215,9 @@ bool LowerTypeTests::lower() {
   return true;
 }
 
-// Initialization helper shared by the old and the new PM.
-static void init(LowerTypeTests *LTT, Module &M) {
-  LTT->M = &M;
-  const DataLayout &DL = M.getDataLayout();
-  Triple TargetTriple(M.getTargetTriple());
-  LTT->LinkerSubsectionsViaSymbols = TargetTriple.isMacOSX();
-  LTT->Arch = TargetTriple.getArch();
-  LTT->ObjectFormat = TargetTriple.getObjectFormat();
-  LTT->Int1Ty = Type::getInt1Ty(M.getContext());
-  LTT->Int8Ty = Type::getInt8Ty(M.getContext());
-  LTT->Int32Ty = Type::getInt32Ty(M.getContext());
-  LTT->Int32PtrTy = PointerType::getUnqual(LTT->Int32Ty);
-  LTT->Int64Ty = Type::getInt64Ty(M.getContext());
-  LTT->IntPtrTy = DL.getIntPtrType(M.getContext(), 0);
-  LTT->TypeTestCallSites.clear();
-}
-
-bool LowerTypeTests::runOnModule(Module &M) {
-  if (skipModule(M))
-    return false;
-  init(this, M);
-  return lower();
-}
-
 PreservedAnalyses LowerTypeTestsPass::run(Module &M,
-                                          AnalysisManager<Module> &AM) {
-  LowerTypeTests Impl;
-  init(&Impl, M);
-  bool Changed = Impl.lower();
+                                          ModuleAnalysisManager &AM) {
+  bool Changed = LowerTypeTestsModule(M).lower();
   if (!Changed)
     return PreservedAnalyses::all();
   return PreservedAnalyses::none();
diff --git a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
index fe653a75ddb5..e0bb0eb42b59 100644
--- a/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp
@@ -97,11 +97,9 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
-#include "llvm/IR/Operator.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/IR/ValueMap.h"
 #include "llvm/Pass.h"
@@ -110,6 +108,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/Utils/FunctionComparator.h"
 #include <vector>
 
 using namespace llvm;
@@ -130,328 +129,6 @@ static cl::opt<unsigned> NumFunctionsForSanityCheck(
 
 namespace {
 
-/// GlobalNumberState assigns an integer to each global value in the program,
-/// which is used by the comparison routine to order references to globals. This
-/// state must be preserved throughout the pass, because Functions and other
-/// globals need to maintain their relative order. Globals are assigned a number
-/// when they are first visited. This order is deterministic, and so the
-/// assigned numbers are as well. When two functions are merged, neither number
-/// is updated. If the symbols are weak, this would be incorrect. If they are
-/// strong, then one will be replaced at all references to the other, and so
-/// direct callsites will now see one or the other symbol, and no update is
-/// necessary. Note that if we were guaranteed unique names, we could just
-/// compare those, but this would not work for stripped bitcodes or for those
-/// few symbols without a name.
-class GlobalNumberState {
-  struct Config : ValueMapConfig<GlobalValue*> {
-    enum { FollowRAUW = false };
-  };
-  // Each GlobalValue is mapped to an identifier. The Config ensures when RAUW
-  // occurs, the mapping does not change. Tracking changes is unnecessary, and
-  // also problematic for weak symbols (which may be overwritten).
-  typedef ValueMap<GlobalValue *, uint64_t, Config> ValueNumberMap;
-  ValueNumberMap GlobalNumbers;
-  // The next unused serial number to assign to a global.
-  uint64_t NextNumber;
-  public:
-    GlobalNumberState() : GlobalNumbers(), NextNumber(0) {}
-    uint64_t getNumber(GlobalValue* Global) {
-      ValueNumberMap::iterator MapIter;
-      bool Inserted;
-      std::tie(MapIter, Inserted) = GlobalNumbers.insert({Global, NextNumber});
-      if (Inserted)
-        NextNumber++;
-      return MapIter->second;
-    }
-    void clear() {
-      GlobalNumbers.clear();
-    }
-};
-
-/// FunctionComparator - Compares two functions to determine whether or not
-/// they will generate machine code with the same behaviour. DataLayout is
-/// used if available. The comparator always fails conservatively (erring on the
-/// side of claiming that two functions are different).
-class FunctionComparator {
-public:
-  FunctionComparator(const Function *F1, const Function *F2,
-                     GlobalNumberState* GN)
-      : FnL(F1), FnR(F2), GlobalNumbers(GN) {}
-
-  /// Test whether the two functions have equivalent behaviour.
-  int compare();
-  /// Hash a function. Equivalent functions will have the same hash, and unequal
-  /// functions will have different hashes with high probability.
-  typedef uint64_t FunctionHash;
-  static FunctionHash functionHash(Function &);
-
-private:
-  /// Test whether two basic blocks have equivalent behaviour.
-  int cmpBasicBlocks(const BasicBlock *BBL, const BasicBlock *BBR) const;
-
-  /// Constants comparison.
-  /// Its analog to lexicographical comparison between hypothetical numbers
-  /// of next format:
-  /// <bitcastability-trait><raw-bit-contents>
-  ///
-  /// 1. Bitcastability.
-  /// Check whether L's type could be losslessly bitcasted to R's type.
-  /// On this stage method, in case when lossless bitcast is not possible
-  /// method returns -1 or 1, thus also defining which type is greater in
-  /// context of bitcastability.
-  /// Stage 0: If types are equal in terms of cmpTypes, then we can go straight
-  ///          to the contents comparison.
-  ///          If types differ, remember types comparison result and check
-  ///          whether we still can bitcast types.
-  /// Stage 1: Types that satisfies isFirstClassType conditions are always
-  ///          greater then others.
-  /// Stage 2: Vector is greater then non-vector.
-  ///          If both types are vectors, then vector with greater bitwidth is
-  ///          greater.
-  ///          If both types are vectors with the same bitwidth, then types
-  ///          are bitcastable, and we can skip other stages, and go to contents
-  ///          comparison.
-  /// Stage 3: Pointer types are greater than non-pointers. If both types are
-  ///          pointers of the same address space - go to contents comparison.
-  ///          Different address spaces: pointer with greater address space is
-  ///          greater.
-  /// Stage 4: Types are neither vectors, nor pointers. And they differ.
-  ///          We don't know how to bitcast them. So, we better don't do it,
-  ///          and return types comparison result (so it determines the
-  ///          relationship among constants we don't know how to bitcast).
-  ///
-  /// Just for clearance, let's see how the set of constants could look
-  /// on single dimension axis:
-  ///
-  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
-  /// Where: NFCT - Not a FirstClassType
-  ///        FCT - FirstClassTyp:
-  ///
-  /// 2. Compare raw contents.
-  /// It ignores types on this stage and only compares bits from L and R.
-  /// Returns 0, if L and R has equivalent contents.
-  /// -1 or 1 if values are different.
-  /// Pretty trivial:
-  /// 2.1. If contents are numbers, compare numbers.
-  ///    Ints with greater bitwidth are greater. Ints with same bitwidths
-  ///    compared by their contents.
-  /// 2.2. "And so on". Just to avoid discrepancies with comments
-  /// perhaps it would be better to read the implementation itself.
-  /// 3. And again about overall picture. Let's look back at how the ordered set
-  /// of constants will look like:
-  /// [NFCT], [FCT, "others"], [FCT, pointers], [FCT, vectors]
-  ///
-  /// Now look, what could be inside [FCT, "others"], for example:
-  /// [FCT, "others"] =
-  /// [
-  ///   [double 0.1], [double 1.23],
-  ///   [i32 1], [i32 2],
-  ///   { double 1.0 },       ; StructTyID, NumElements = 1
-  ///   { i32 1 },            ; StructTyID, NumElements = 1
-  ///   { double 1, i32 1 },  ; StructTyID, NumElements = 2
-  ///   { i32 1, double 1 }   ; StructTyID, NumElements = 2
-  /// ]
-  ///
-  /// Let's explain the order. Float numbers will be less than integers, just
-  /// because of cmpType terms: FloatTyID < IntegerTyID.
-  /// Floats (with same fltSemantics) are sorted according to their value.
-  /// Then you can see integers, and they are, like a floats,
-  /// could be easy sorted among each others.
-  /// The structures. Structures are grouped at the tail, again because of their
-  /// TypeID: StructTyID > IntegerTyID > FloatTyID.
-  /// Structures with greater number of elements are greater. Structures with
-  /// greater elements going first are greater.
-  /// The same logic with vectors, arrays and other possible complex types.
-  ///
-  /// Bitcastable constants.
-  /// Let's assume, that some constant, belongs to some group of
-  /// "so-called-equal" values with different types, and at the same time
-  /// belongs to another group of constants with equal types
-  /// and "really" equal values.
-  ///
-  /// Now, prove that this is impossible:
-  ///
-  /// If constant A with type TyA is bitcastable to B with type TyB, then:
-  /// 1. All constants with equal types to TyA, are bitcastable to B. Since
-  ///    those should be vectors (if TyA is vector), pointers
-  ///    (if TyA is pointer), or else (if TyA equal to TyB), those types should
-  ///    be equal to TyB.
-  /// 2. All constants with non-equal, but bitcastable types to TyA, are
-  ///    bitcastable to B.
-  ///    Once again, just because we allow it to vectors and pointers only.
-  ///    This statement could be expanded as below:
-  /// 2.1. All vectors with equal bitwidth to vector A, has equal bitwidth to
-  ///      vector B, and thus bitcastable to B as well.
-  /// 2.2. All pointers of the same address space, no matter what they point to,
-  ///      bitcastable. So if C is pointer, it could be bitcasted to A and to B.
-  /// So any constant equal or bitcastable to A is equal or bitcastable to B.
-  /// QED.
-  ///
-  /// In another words, for pointers and vectors, we ignore top-level type and
-  /// look at their particular properties (bit-width for vectors, and
-  /// address space for pointers).
-  /// If these properties are equal - compare their contents.
-  int cmpConstants(const Constant *L, const Constant *R) const;
-
-  /// Compares two global values by number. Uses the GlobalNumbersState to
-  /// identify the same gobals across function calls.
-  int cmpGlobalValues(GlobalValue *L, GlobalValue *R) const;
-
-  /// Assign or look up previously assigned numbers for the two values, and
-  /// return whether the numbers are equal. Numbers are assigned in the order
-  /// visited.
-  /// Comparison order:
-  /// Stage 0: Value that is function itself is always greater then others.
-  ///          If left and right values are references to their functions, then
-  ///          they are equal.
-  /// Stage 1: Constants are greater than non-constants.
-  ///          If both left and right are constants, then the result of
-  ///          cmpConstants is used as cmpValues result.
-  /// Stage 2: InlineAsm instances are greater than others. If both left and
-  ///          right are InlineAsm instances, InlineAsm* pointers casted to
-  ///          integers and compared as numbers.
-  /// Stage 3: For all other cases we compare order we meet these values in
-  ///          their functions. If right value was met first during scanning,
-  ///          then left value is greater.
-  ///          In another words, we compare serial numbers, for more details
-  ///          see comments for sn_mapL and sn_mapR.
-  int cmpValues(const Value *L, const Value *R) const;
-
-  /// Compare two Instructions for equivalence, similar to
-  /// Instruction::isSameOperationAs.
-  ///
-  /// Stages are listed in "most significant stage first" order:
-  /// On each stage below, we do comparison between some left and right
-  /// operation parts. If parts are non-equal, we assign parts comparison
-  /// result to the operation comparison result and exit from method.
-  /// Otherwise we proceed to the next stage.
-  /// Stages:
-  /// 1. Operations opcodes. Compared as numbers.
-  /// 2. Number of operands.
-  /// 3. Operation types. Compared with cmpType method.
-  /// 4. Compare operation subclass optional data as stream of bytes:
-  /// just convert it to integers and call cmpNumbers.
-  /// 5. Compare in operation operand types with cmpType in
-  /// most significant operand first order.
-  /// 6. Last stage. Check operations for some specific attributes.
-  /// For example, for Load it would be:
-  /// 6.1.Load: volatile (as boolean flag)
-  /// 6.2.Load: alignment (as integer numbers)
-  /// 6.3.Load: ordering (as underlying enum class value)
-  /// 6.4.Load: synch-scope (as integer numbers)
-  /// 6.5.Load: range metadata (as integer ranges)
-  /// On this stage its better to see the code, since its not more than 10-15
-  /// strings for particular instruction, and could change sometimes.
-  int cmpOperations(const Instruction *L, const Instruction *R) const;
-
-  /// Compare two GEPs for equivalent pointer arithmetic.
-  /// Parts to be compared for each comparison stage,
-  /// most significant stage first:
-  /// 1. Address space. As numbers.
-  /// 2. Constant offset, (using GEPOperator::accumulateConstantOffset method).
-  /// 3. Pointer operand type (using cmpType method).
-  /// 4. Number of operands.
-  /// 5. Compare operands, using cmpValues method.
-  int cmpGEPs(const GEPOperator *GEPL, const GEPOperator *GEPR) const;
-  int cmpGEPs(const GetElementPtrInst *GEPL,
-              const GetElementPtrInst *GEPR) const {
-    return cmpGEPs(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR));
-  }
-
-  /// cmpType - compares two types,
-  /// defines total ordering among the types set.
-  ///
-  /// Return values:
-  /// 0 if types are equal,
-  /// -1 if Left is less than Right,
-  /// +1 if Left is greater than Right.
-  ///
-  /// Description:
-  /// Comparison is broken onto stages. Like in lexicographical comparison
-  /// stage coming first has higher priority.
-  /// On each explanation stage keep in mind total ordering properties.
-  ///
-  /// 0. Before comparison we coerce pointer types of 0 address space to
-  /// integer.
-  /// We also don't bother with same type at left and right, so
-  /// just return 0 in this case.
-  ///
-  /// 1. If types are of different kind (different type IDs).
-  ///    Return result of type IDs comparison, treating them as numbers.
-  /// 2. If types are integers, check that they have the same width. If they
-  /// are vectors, check that they have the same count and subtype.
-  /// 3. Types have the same ID, so check whether they are one of:
-  /// * Void
-  /// * Float
-  /// * Double
-  /// * X86_FP80
-  /// * FP128
-  /// * PPC_FP128
-  /// * Label
-  /// * Metadata
-  /// We can treat these types as equal whenever their IDs are same.
-  /// 4. If Left and Right are pointers, return result of address space
-  /// comparison (numbers comparison). We can treat pointer types of same
-  /// address space as equal.
-  /// 5. If types are complex.
-  /// Then both Left and Right are to be expanded and their element types will
-  /// be checked with the same way. If we get Res != 0 on some stage, return it.
-  /// Otherwise return 0.
-  /// 6. For all other cases put llvm_unreachable.
-  int cmpTypes(Type *TyL, Type *TyR) const;
-
-  int cmpNumbers(uint64_t L, uint64_t R) const;
-  int cmpOrderings(AtomicOrdering L, AtomicOrdering R) const;
-  int cmpAPInts(const APInt &L, const APInt &R) const;
-  int cmpAPFloats(const APFloat &L, const APFloat &R) const;
-  int cmpInlineAsm(const InlineAsm *L, const InlineAsm *R) const;
-  int cmpMem(StringRef L, StringRef R) const;
-  int cmpAttrs(const AttributeSet L, const AttributeSet R) const;
-  int cmpRangeMetadata(const MDNode *L, const MDNode *R) const;
-  int cmpOperandBundlesSchema(const Instruction *L, const Instruction *R) const;
-
-  // The two functions undergoing comparison.
-  const Function *FnL, *FnR;
-
-  /// Assign serial numbers to values from left function, and values from
-  /// right function.
-  /// Explanation:
-  /// Being comparing functions we need to compare values we meet at left and
-  /// right sides.
-  /// Its easy to sort things out for external values. It just should be
-  /// the same value at left and right.
-  /// But for local values (those were introduced inside function body)
-  /// we have to ensure they were introduced at exactly the same place,
-  /// and plays the same role.
-  /// Let's assign serial number to each value when we meet it first time.
-  /// Values that were met at same place will be with same serial numbers.
-  /// In this case it would be good to explain few points about values assigned
-  /// to BBs and other ways of implementation (see below).
-  ///
-  /// 1. Safety of BB reordering.
-  /// It's safe to change the order of BasicBlocks in function.
-  /// Relationship with other functions and serial numbering will not be
-  /// changed in this case.
-  /// As follows from FunctionComparator::compare(), we do CFG walk: we start
-  /// from the entry, and then take each terminator. So it doesn't matter how in
-  /// fact BBs are ordered in function. And since cmpValues are called during
-  /// this walk, the numbering depends only on how BBs located inside the CFG.
-  /// So the answer is - yes. We will get the same numbering.
-  ///
-  /// 2. Impossibility to use dominance properties of values.
-  /// If we compare two instruction operands: first is usage of local
-  /// variable AL from function FL, and second is usage of local variable AR
-  /// from FR, we could compare their origins and check whether they are
-  /// defined at the same place.
-  /// But, we are still not able to compare operands of PHI nodes, since those
-  /// could be operands from further BBs we didn't scan yet.
-  /// So it's impossible to use dominance properties in general.
-  mutable DenseMap<const Value*, int> sn_mapL, sn_mapR;
-
-  // The global state we will use
-  GlobalNumberState* GlobalNumbers;
-};
-
 class FunctionNode {
   mutable AssertingVH<Function> F;
   FunctionComparator::FunctionHash Hash;
@@ -470,898 +147,6 @@ public:
 
   void release() { F = nullptr; }
 };
-} // end anonymous namespace
-
-int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
-  if (L < R) return -1;
-  if (L > R) return 1;
-  return 0;
-}
-
-int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
-  if ((int)L < (int)R) return -1;
-  if ((int)L > (int)R) return 1;
-  return 0;
-}
-
-int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
-  if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
-    return Res;
-  if (L.ugt(R)) return 1;
-  if (R.ugt(L)) return -1;
-  return 0;
-}
-
-int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
-  // Floats are ordered first by semantics (i.e. float, double, half, etc.),
-  // then by value interpreted as a bitstring (aka APInt).
-  const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
-  if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
-                           APFloat::semanticsPrecision(SR)))
-    return Res;
-  if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
-                           APFloat::semanticsMaxExponent(SR)))
-    return Res;
-  if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
-                           APFloat::semanticsMinExponent(SR)))
-    return Res;
-  if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
-                           APFloat::semanticsSizeInBits(SR)))
-    return Res;
-  return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
-}
-
-int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
-  // Prevent heavy comparison, compare sizes first.
-  if (int Res = cmpNumbers(L.size(), R.size()))
-    return Res;
-
-  // Compare strings lexicographically only when it is necessary: only when
-  // strings are equal in size.
-  return L.compare(R);
-}
-
-int FunctionComparator::cmpAttrs(const AttributeSet L,
-                                 const AttributeSet R) const {
-  if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
-    return Res;
-
-  for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
-    AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
-                           RE = R.end(i);
-    for (; LI != LE && RI != RE; ++LI, ++RI) {
-      Attribute LA = *LI;
-      Attribute RA = *RI;
-      if (LA < RA)
-        return -1;
-      if (RA < LA)
-        return 1;
-    }
-    if (LI != LE)
-      return 1;
-    if (RI != RE)
-      return -1;
-  }
-  return 0;
-}
-
-int FunctionComparator::cmpRangeMetadata(const MDNode *L,
-                                         const MDNode *R) const {
-  if (L == R)
-    return 0;
-  if (!L)
-    return -1;
-  if (!R)
-    return 1;
-  // Range metadata is a sequence of numbers. Make sure they are the same
-  // sequence.
-  // TODO: Note that as this is metadata, it is possible to drop and/or merge
-  // this data when considering functions to merge. Thus this comparison would
-  // return 0 (i.e. equivalent), but merging would become more complicated
-  // because the ranges would need to be unioned. It is not likely that
-  // functions differ ONLY in this metadata if they are actually the same
-  // function semantically.
-  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
-    return Res;
-  for (size_t I = 0; I < L->getNumOperands(); ++I) {
-    ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
-    ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
-    if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
-      return Res;
-  }
-  return 0;
-}
-
-int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
-                                                const Instruction *R) const {
-  ImmutableCallSite LCS(L);
-  ImmutableCallSite RCS(R);
-
-  assert(LCS && RCS && "Must be calls or invokes!");
-  assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
-
-  if (int Res =
-          cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
-    return Res;
-
-  for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
-    auto OBL = LCS.getOperandBundleAt(i);
-    auto OBR = RCS.getOperandBundleAt(i);
-
-    if (int Res = OBL.getTagName().compare(OBR.getTagName()))
-      return Res;
-
-    if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
-      return Res;
-  }
-
-  return 0;
-}
-
-/// Constants comparison:
-/// 1. Check whether type of L constant could be losslessly bitcasted to R
-/// type.
-/// 2. Compare constant contents.
-/// For more details see declaration comments.
-int FunctionComparator::cmpConstants(const Constant *L,
-                                     const Constant *R) const {
-
-  Type *TyL = L->getType();
-  Type *TyR = R->getType();
-
-  // Check whether types are bitcastable. This part is just re-factored
-  // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
-  // we also pack into result which type is "less" for us.
-  int TypesRes = cmpTypes(TyL, TyR);
-  if (TypesRes != 0) {
-    // Types are different, but check whether we can bitcast them.
-    if (!TyL->isFirstClassType()) {
-      if (TyR->isFirstClassType())
-        return -1;
-      // Neither TyL nor TyR are values of first class type. Return the result
-      // of comparing the types
-      return TypesRes;
-    }
-    if (!TyR->isFirstClassType()) {
-      if (TyL->isFirstClassType())
-        return 1;
-      return TypesRes;
-    }
-
-    // Vector -> Vector conversions are always lossless if the two vector types
-    // have the same size, otherwise not.
-    unsigned TyLWidth = 0;
-    unsigned TyRWidth = 0;
-
-    if (auto *VecTyL = dyn_cast<VectorType>(TyL))
-      TyLWidth = VecTyL->getBitWidth();
-    if (auto *VecTyR = dyn_cast<VectorType>(TyR))
-      TyRWidth = VecTyR->getBitWidth();
-
-    if (TyLWidth != TyRWidth)
-      return cmpNumbers(TyLWidth, TyRWidth);
-
-    // Zero bit-width means neither TyL nor TyR are vectors.
-    if (!TyLWidth) {
-      PointerType *PTyL = dyn_cast<PointerType>(TyL);
-      PointerType *PTyR = dyn_cast<PointerType>(TyR);
-      if (PTyL && PTyR) {
-        unsigned AddrSpaceL = PTyL->getAddressSpace();
-        unsigned AddrSpaceR = PTyR->getAddressSpace();
-        if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
-          return Res;
-      }
-      if (PTyL)
-        return 1;
-      if (PTyR)
-        return -1;
-
-      // TyL and TyR aren't vectors, nor pointers. We don't know how to
-      // bitcast them.
-      return TypesRes;
-    }
-  }
-
-  // OK, types are bitcastable, now check constant contents.
-
-  if (L->isNullValue() && R->isNullValue())
-    return TypesRes;
-  if (L->isNullValue() && !R->isNullValue())
-    return 1;
-  if (!L->isNullValue() && R->isNullValue())
-    return -1;
-
-  auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
-  auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
-  if (GlobalValueL && GlobalValueR) {
-    return cmpGlobalValues(GlobalValueL, GlobalValueR);
-  }
-
-  if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
-    return Res;
-
-  if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
-    const auto *SeqR = cast<ConstantDataSequential>(R);
-    // This handles ConstantDataArray and ConstantDataVector. Note that we
-    // compare the two raw data arrays, which might differ depending on the host
-    // endianness. This isn't a problem though, because the endiness of a module
-    // will affect the order of the constants, but this order is the same
-    // for a given input module and host platform.
-    return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
-  }
-
-  switch (L->getValueID()) {
-  case Value::UndefValueVal:
-  case Value::ConstantTokenNoneVal:
-    return TypesRes;
-  case Value::ConstantIntVal: {
-    const APInt &LInt = cast<ConstantInt>(L)->getValue();
-    const APInt &RInt = cast<ConstantInt>(R)->getValue();
-    return cmpAPInts(LInt, RInt);
-  }
-  case Value::ConstantFPVal: {
-    const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
-    const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
-    return cmpAPFloats(LAPF, RAPF);
-  }
-  case Value::ConstantArrayVal: {
-    const ConstantArray *LA = cast<ConstantArray>(L);
-    const ConstantArray *RA = cast<ConstantArray>(R);
-    uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
-    uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
-    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
-      return Res;
-    for (uint64_t i = 0; i < NumElementsL; ++i) {
-      if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
-                                 cast<Constant>(RA->getOperand(i))))
-        return Res;
-    }
-    return 0;
-  }
-  case Value::ConstantStructVal: {
-    const ConstantStruct *LS = cast<ConstantStruct>(L);
-    const ConstantStruct *RS = cast<ConstantStruct>(R);
-    unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
-    unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
-    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
-      return Res;
-    for (unsigned i = 0; i != NumElementsL; ++i) {
-      if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
-                                 cast<Constant>(RS->getOperand(i))))
-        return Res;
-    }
-    return 0;
-  }
-  case Value::ConstantVectorVal: {
-    const ConstantVector *LV = cast<ConstantVector>(L);
-    const ConstantVector *RV = cast<ConstantVector>(R);
-    unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
-    unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
-    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
-      return Res;
-    for (uint64_t i = 0; i < NumElementsL; ++i) {
-      if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
-                                 cast<Constant>(RV->getOperand(i))))
-        return Res;
-    }
-    return 0;
-  }
-  case Value::ConstantExprVal: {
-    const ConstantExpr *LE = cast<ConstantExpr>(L);
-    const ConstantExpr *RE = cast<ConstantExpr>(R);
-    unsigned NumOperandsL = LE->getNumOperands();
-    unsigned NumOperandsR = RE->getNumOperands();
-    if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
-      return Res;
-    for (unsigned i = 0; i < NumOperandsL; ++i) {
-      if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
-                                 cast<Constant>(RE->getOperand(i))))
-        return Res;
-    }
-    return 0;
-  }
-  case Value::BlockAddressVal: {
-    const BlockAddress *LBA = cast<BlockAddress>(L);
-    const BlockAddress *RBA = cast<BlockAddress>(R);
-    if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
-      return Res;
-    if (LBA->getFunction() == RBA->getFunction()) {
-      // They are BBs in the same function. Order by which comes first in the
-      // BB order of the function. This order is deterministic.
-      Function* F = LBA->getFunction();
-      BasicBlock *LBB = LBA->getBasicBlock();
-      BasicBlock *RBB = RBA->getBasicBlock();
-      if (LBB == RBB)
-        return 0;
-      for(BasicBlock &BB : F->getBasicBlockList()) {
-        if (&BB == LBB) {
-          assert(&BB != RBB);
-          return -1;
-        }
-        if (&BB == RBB)
-          return 1;
-      }
-      llvm_unreachable("Basic Block Address does not point to a basic block in "
-                       "its function.");
-      return -1;
-    } else {
-      // cmpValues said the functions are the same. So because they aren't
-      // literally the same pointer, they must respectively be the left and
-      // right functions.
-      assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
-      // cmpValues will tell us if these are equivalent BasicBlocks, in the
-      // context of their respective functions.
-      return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
-    }
-  }
-  default: // Unknown constant, abort.
-    DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
-    llvm_unreachable("Constant ValueID not recognized.");
-    return -1;
-  }
-}
-
-int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
-  return cmpNumbers(GlobalNumbers->getNumber(L), GlobalNumbers->getNumber(R));
-}
-
-/// cmpType - compares two types,
-/// defines total ordering among the types set.
-/// See method declaration comments for more details.
-int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
-  PointerType *PTyL = dyn_cast<PointerType>(TyL);
-  PointerType *PTyR = dyn_cast<PointerType>(TyR);
-
-  const DataLayout &DL = FnL->getParent()->getDataLayout();
-  if (PTyL && PTyL->getAddressSpace() == 0)
-    TyL = DL.getIntPtrType(TyL);
-  if (PTyR && PTyR->getAddressSpace() == 0)
-    TyR = DL.getIntPtrType(TyR);
-
-  if (TyL == TyR)
-    return 0;
-
-  if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
-    return Res;
-
-  switch (TyL->getTypeID()) {
-  default:
-    llvm_unreachable("Unknown type!");
-    // Fall through in Release mode.
-  case Type::IntegerTyID:
-    return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
-                      cast<IntegerType>(TyR)->getBitWidth());
-  case Type::VectorTyID: {
-    VectorType *VTyL = cast<VectorType>(TyL), *VTyR = cast<VectorType>(TyR);
-    if (int Res = cmpNumbers(VTyL->getNumElements(), VTyR->getNumElements()))
-      return Res;
-    return cmpTypes(VTyL->getElementType(), VTyR->getElementType());
-  }
-  // TyL == TyR would have returned true earlier, because types are uniqued.
-  case Type::VoidTyID:
-  case Type::FloatTyID:
-  case Type::DoubleTyID:
-  case Type::X86_FP80TyID:
-  case Type::FP128TyID:
-  case Type::PPC_FP128TyID:
-  case Type::LabelTyID:
-  case Type::MetadataTyID:
-  case Type::TokenTyID:
-    return 0;
-
-  case Type::PointerTyID: {
-    assert(PTyL && PTyR && "Both types must be pointers here.");
-    return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
-  }
-
-  case Type::StructTyID: {
-    StructType *STyL = cast<StructType>(TyL);
-    StructType *STyR = cast<StructType>(TyR);
-    if (STyL->getNumElements() != STyR->getNumElements())
-      return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
-
-    if (STyL->isPacked() != STyR->isPacked())
-      return cmpNumbers(STyL->isPacked(), STyR->isPacked());
-
-    for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
-      if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
-        return Res;
-    }
-    return 0;
-  }
-
-  case Type::FunctionTyID: {
-    FunctionType *FTyL = cast<FunctionType>(TyL);
-    FunctionType *FTyR = cast<FunctionType>(TyR);
-    if (FTyL->getNumParams() != FTyR->getNumParams())
-      return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
-
-    if (FTyL->isVarArg() != FTyR->isVarArg())
-      return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
-
-    if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
-      return Res;
-
-    for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
-      if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
-        return Res;
-    }
-    return 0;
-  }
-
-  case Type::ArrayTyID: {
-    ArrayType *ATyL = cast<ArrayType>(TyL);
-    ArrayType *ATyR = cast<ArrayType>(TyR);
-    if (ATyL->getNumElements() != ATyR->getNumElements())
-      return cmpNumbers(ATyL->getNumElements(), ATyR->getNumElements());
-    return cmpTypes(ATyL->getElementType(), ATyR->getElementType());
-  }
-  }
-}
-
-// Determine whether the two operations are the same except that pointer-to-A
-// and pointer-to-B are equivalent. This should be kept in sync with
-// Instruction::isSameOperationAs.
-// Read method declaration comments for more details.
-int FunctionComparator::cmpOperations(const Instruction *L,
-                                      const Instruction *R) const {
-  // Differences from Instruction::isSameOperationAs:
-  //  * replace type comparison with calls to cmpTypes.
-  //  * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
-  //  * because of the above, we don't test for the tail bit on calls later on.
-  if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
-    return Res;
-
-  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
-    return Res;
-
-  if (int Res = cmpTypes(L->getType(), R->getType()))
-    return Res;
-
-  if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
-                           R->getRawSubclassOptionalData()))
-    return Res;
-
-  // We have two instructions of identical opcode and #operands.  Check to see
-  // if all operands are the same type
-  for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
-    if (int Res =
-            cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
-      return Res;
-  }
-
-  // Check special state that is a part of some instructions.
-  if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
-    if (int Res = cmpTypes(AI->getAllocatedType(),
-                           cast<AllocaInst>(R)->getAllocatedType()))
-      return Res;
-    return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
-  }
-  if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
-    if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
-      return Res;
-    if (int Res =
-            cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
-      return Res;
-    if (int Res =
-            cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
-      return Res;
-    if (int Res =
-            cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
-      return Res;
-    return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
-        cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
-  }
-  if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
-    if (int Res =
-            cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
-      return Res;
-    if (int Res =
-            cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
-      return Res;
-    if (int Res =
-            cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
-      return Res;
-    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
-  }
-  if (const CmpInst *CI = dyn_cast<CmpInst>(L))
-    return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
-  if (const CallInst *CI = dyn_cast<CallInst>(L)) {
-    if (int Res = cmpNumbers(CI->getCallingConv(),
-                             cast<CallInst>(R)->getCallingConv()))
-      return Res;
-    if (int Res =
-            cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
-      return Res;
-    if (int Res = cmpOperandBundlesSchema(CI, R))
-      return Res;
-    return cmpRangeMetadata(
-        CI->getMetadata(LLVMContext::MD_range),
-        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
-  }
-  if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
-    if (int Res = cmpNumbers(II->getCallingConv(),
-                             cast<InvokeInst>(R)->getCallingConv()))
-      return Res;
-    if (int Res =
-            cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
-      return Res;
-    if (int Res = cmpOperandBundlesSchema(II, R))
-      return Res;
-    return cmpRangeMetadata(
-        II->getMetadata(LLVMContext::MD_range),
-        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
-  }
-  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
-    ArrayRef<unsigned> LIndices = IVI->getIndices();
-    ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
-    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
-      return Res;
-    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
-      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
-        return Res;
-    }
-    return 0;
-  }
-  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
-    ArrayRef<unsigned> LIndices = EVI->getIndices();
-    ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
-    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
-      return Res;
-    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
-      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
-        return Res;
-    }
-  }
-  if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
-    if (int Res =
-            cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
-      return Res;
-    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
-  }
-  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
-    if (int Res = cmpNumbers(CXI->isVolatile(),
-                             cast<AtomicCmpXchgInst>(R)->isVolatile()))
-      return Res;
-    if (int Res = cmpNumbers(CXI->isWeak(),
-                             cast<AtomicCmpXchgInst>(R)->isWeak()))
-      return Res;
-    if (int Res =
-            cmpOrderings(CXI->getSuccessOrdering(),
-                         cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
-      return Res;
-    if (int Res =
-            cmpOrderings(CXI->getFailureOrdering(),
-                         cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
-      return Res;
-    return cmpNumbers(CXI->getSynchScope(),
-                      cast<AtomicCmpXchgInst>(R)->getSynchScope());
-  }
-  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
-    if (int Res = cmpNumbers(RMWI->getOperation(),
-                             cast<AtomicRMWInst>(R)->getOperation()))
-      return Res;
-    if (int Res = cmpNumbers(RMWI->isVolatile(),
-                             cast<AtomicRMWInst>(R)->isVolatile()))
-      return Res;
-    if (int Res = cmpOrderings(RMWI->getOrdering(),
-                             cast<AtomicRMWInst>(R)->getOrdering()))
-      return Res;
-    return cmpNumbers(RMWI->getSynchScope(),
-                      cast<AtomicRMWInst>(R)->getSynchScope());
-  }
-  if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
-    const PHINode *PNR = cast<PHINode>(R);
-    // Ensure that in addition to the incoming values being identical
-    // (checked by the caller of this function), the incoming blocks
-    // are also identical.
-    for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
-      if (int Res =
-              cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
-        return Res;
-    }
-  }
-  return 0;
-}
-
-// Determine whether two GEP operations perform the same underlying arithmetic.
-// Read method declaration comments for more details.
-int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
-                                const GEPOperator *GEPR) const {
-
-  unsigned int ASL = GEPL->getPointerAddressSpace();
-  unsigned int ASR = GEPR->getPointerAddressSpace();
-
-  if (int Res = cmpNumbers(ASL, ASR))
-    return Res;
-
-  // When we have target data, we can reduce the GEP down to the value in bytes
-  // added to the address.
-  const DataLayout &DL = FnL->getParent()->getDataLayout();
-  unsigned BitWidth = DL.getPointerSizeInBits(ASL);
-  APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
-  if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
-      GEPR->accumulateConstantOffset(DL, OffsetR))
-    return cmpAPInts(OffsetL, OffsetR);
-  if (int Res = cmpTypes(GEPL->getSourceElementType(),
-                         GEPR->getSourceElementType()))
-    return Res;
-
-  if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
-    return Res;
-
-  for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
-    if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
-      return Res;
-  }
-
-  return 0;
-}
-
-int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
-                                     const InlineAsm *R) const {
-  // InlineAsm's are uniqued. If they are the same pointer, obviously they are
-  // the same, otherwise compare the fields.
-  if (L == R)
-    return 0;
-  if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
-    return Res;
-  if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
-    return Res;
-  if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
-    return Res;
-  if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
-    return Res;
-  if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
-    return Res;
-  if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
-    return Res;
-  llvm_unreachable("InlineAsm blocks were not uniqued.");
-  return 0;
-}
-
-/// Compare two values used by the two functions under pair-wise comparison. If
-/// this is the first time the values are seen, they're added to the mapping so
-/// that we will detect mismatches on next use.
-/// See comments in declaration for more details.
-int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
-  // Catch self-reference case.
-  if (L == FnL) {
-    if (R == FnR)
-      return 0;
-    return -1;
-  }
-  if (R == FnR) {
-    if (L == FnL)
-      return 0;
-    return 1;
-  }
-
-  const Constant *ConstL = dyn_cast<Constant>(L);
-  const Constant *ConstR = dyn_cast<Constant>(R);
-  if (ConstL && ConstR) {
-    if (L == R)
-      return 0;
-    return cmpConstants(ConstL, ConstR);
-  }
-
-  if (ConstL)
-    return 1;
-  if (ConstR)
-    return -1;
-
-  const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
-  const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
-
-  if (InlineAsmL && InlineAsmR)
-    return cmpInlineAsm(InlineAsmL, InlineAsmR);
-  if (InlineAsmL)
-    return 1;
-  if (InlineAsmR)
-    return -1;
-
-  auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
-       RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
-
-  return cmpNumbers(LeftSN.first->second, RightSN.first->second);
-}
-// Test whether two basic blocks have equivalent behaviour.
-int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
-                                       const BasicBlock *BBR) const {
-  BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
-  BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
-
-  do {
-    if (int Res = cmpValues(&*InstL, &*InstR))
-      return Res;
-
-    const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL);
-    const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR);
-
-    if (GEPL && !GEPR)
-      return 1;
-    if (GEPR && !GEPL)
-      return -1;
-
-    if (GEPL && GEPR) {
-      if (int Res =
-              cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
-        return Res;
-      if (int Res = cmpGEPs(GEPL, GEPR))
-        return Res;
-    } else {
-      if (int Res = cmpOperations(&*InstL, &*InstR))
-        return Res;
-      assert(InstL->getNumOperands() == InstR->getNumOperands());
-
-      for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
-        Value *OpL = InstL->getOperand(i);
-        Value *OpR = InstR->getOperand(i);
-        if (int Res = cmpValues(OpL, OpR))
-          return Res;
-        // cmpValues should ensure this is true.
-        assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
-      }
-    }
-
-    ++InstL;
-    ++InstR;
-  } while (InstL != InstLE && InstR != InstRE);
-
-  if (InstL != InstLE && InstR == InstRE)
-    return 1;
-  if (InstL == InstLE && InstR != InstRE)
-    return -1;
-  return 0;
-}
-
-// Test whether the two functions have equivalent behaviour.
-int FunctionComparator::compare() {
-  sn_mapL.clear();
-  sn_mapR.clear();
-
-  if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
-    return Res;
-
-  if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
-    return Res;
-
-  if (FnL->hasGC()) {
-    if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
-      return Res;
-  }
-
-  if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
-    return Res;
-
-  if (FnL->hasSection()) {
-    if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
-      return Res;
-  }
-
-  if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
-    return Res;
-
-  // TODO: if it's internal and only used in direct calls, we could handle this
-  // case too.
-  if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
-    return Res;
-
-  if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
-    return Res;
-
-  assert(FnL->arg_size() == FnR->arg_size() &&
-         "Identically typed functions have different numbers of args!");
-
-  // Visit the arguments so that they get enumerated in the order they're
-  // passed in.
-  for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
-                                    ArgRI = FnR->arg_begin(),
-                                    ArgLE = FnL->arg_end();
-       ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
-    if (cmpValues(&*ArgLI, &*ArgRI) != 0)
-      llvm_unreachable("Arguments repeat!");
-  }
-
-  // We do a CFG-ordered walk since the actual ordering of the blocks in the
-  // linked list is immaterial. Our walk starts at the entry block for both
-  // functions, then takes each block from each terminator in order. As an
-  // artifact, this also means that unreachable blocks are ignored.
-  SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
-  SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
-
-  FnLBBs.push_back(&FnL->getEntryBlock());
-  FnRBBs.push_back(&FnR->getEntryBlock());
-
-  VisitedBBs.insert(FnLBBs[0]);
-  while (!FnLBBs.empty()) {
-    const BasicBlock *BBL = FnLBBs.pop_back_val();
-    const BasicBlock *BBR = FnRBBs.pop_back_val();
-
-    if (int Res = cmpValues(BBL, BBR))
-      return Res;
-
-    if (int Res = cmpBasicBlocks(BBL, BBR))
-      return Res;
-
-    const TerminatorInst *TermL = BBL->getTerminator();
-    const TerminatorInst *TermR = BBR->getTerminator();
-
-    assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
-    for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
-      if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
-        continue;
-
-      FnLBBs.push_back(TermL->getSuccessor(i));
-      FnRBBs.push_back(TermR->getSuccessor(i));
-    }
-  }
-  return 0;
-}
-
-namespace {
-// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
-// hash of a sequence of 64bit ints, but the entire input does not need to be
-// available at once. This interface is necessary for functionHash because it
-// needs to accumulate the hash as the structure of the function is traversed
-// without saving these values to an intermediate buffer. This form of hashing
-// is not often needed, as usually the object to hash is just read from a
-// buffer.
-class HashAccumulator64 {
-  uint64_t Hash;
-public:
-  // Initialize to random constant, so the state isn't zero.
-  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
-  void add(uint64_t V) {
-     Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
-  }
-  // No finishing is required, because the entire hash value is used.
-  uint64_t getHash() { return Hash; }
-};
-} // end anonymous namespace
-
-// A function hash is calculated by considering only the number of arguments and
-// whether a function is varargs, the order of basic blocks (given by the
-// successors of each basic block in depth first order), and the order of
-// opcodes of each instruction within each of these basic blocks. This mirrors
-// the strategy compare() uses to compare functions by walking the BBs in depth
-// first order and comparing each instruction in sequence. Because this hash
-// does not look at the operands, it is insensitive to things such as the
-// target of calls and the constants used in the function, which makes it useful
-// when possibly merging functions which are the same modulo constants and call
-// targets.
-FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
-  HashAccumulator64 H;
-  H.add(F.isVarArg());
-  H.add(F.arg_size());
-  
-  SmallVector<const BasicBlock *, 8> BBs;
-  SmallSet<const BasicBlock *, 16> VisitedBBs;
-
-  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
-  // accumulating the hash of the function "structure." (BB and opcode sequence)
-  BBs.push_back(&F.getEntryBlock());
-  VisitedBBs.insert(BBs[0]);
-  while (!BBs.empty()) {
-    const BasicBlock *BB = BBs.pop_back_val();
-    // This random value acts as a block header, as otherwise the partition of
-    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
-    H.add(45798); 
-    for (auto &Inst : *BB) {
-      H.add(Inst.getOpcode());
-    }
-    const TerminatorInst *Term = BB->getTerminator();
-    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
-      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
-        continue;
-      BBs.push_back(Term->getSuccessor(i));
-    }
-  }
-  return H.getHash();
-}
-
-
-namespace {
 
 /// MergeFunctions finds functions which will generate identical machine code,
 /// by considering all pointer types to be equivalent. Once identified,
diff --git a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
index 49c44173491e..7ef3fc1fc2a7 100644
--- a/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PartialInlining.cpp
@@ -14,6 +14,9 @@
 
 #include "llvm/Transforms/IPO/PartialInlining.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
@@ -29,161 +32,193 @@ using namespace llvm;
 STATISTIC(NumPartialInlined, "Number of functions partially inlined");
 
 namespace {
+struct PartialInlinerImpl {
+  PartialInlinerImpl(InlineFunctionInfo IFI) : IFI(IFI) {}
+  bool run(Module &M);
+  Function *unswitchFunction(Function *F);
+
+private:
+  InlineFunctionInfo IFI;
+};
 struct PartialInlinerLegacyPass : public ModulePass {
   static char ID; // Pass identification, replacement for typeid
   PartialInlinerLegacyPass() : ModulePass(ID) {
     initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+  }
   bool runOnModule(Module &M) override {
     if (skipModule(M))
       return false;
-    ModuleAnalysisManager DummyMAM;
-    auto PA = Impl.run(M, DummyMAM);
-    return !PA.areAllPreserved();
-  }
-
-private:
-  PartialInlinerPass Impl;
-  };
-}
-
-char PartialInlinerLegacyPass::ID = 0;
-INITIALIZE_PASS(PartialInlinerLegacyPass, "partial-inliner", "Partial Inliner",
-                false, false)
 
-ModulePass *llvm::createPartialInliningPass() {
-  return new PartialInlinerLegacyPass();
+    AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
+    std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+        [&ACT](Function &F) -> AssumptionCache & {
+      return ACT->getAssumptionCache(F);
+    };
+    InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
+    return PartialInlinerImpl(IFI).run(M);
+  }
+};
 }
 
-Function *PartialInlinerPass::unswitchFunction(Function *F) {
+Function *PartialInlinerImpl::unswitchFunction(Function *F) {
   // First, verify that this function is an unswitching candidate...
-  BasicBlock *entryBlock = &F->front();
-  BranchInst *BR = dyn_cast<BranchInst>(entryBlock->getTerminator());
+  BasicBlock *EntryBlock = &F->front();
+  BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
   if (!BR || BR->isUnconditional())
     return nullptr;
-  
-  BasicBlock* returnBlock = nullptr;
-  BasicBlock* nonReturnBlock = nullptr;
-  unsigned returnCount = 0;
-  for (BasicBlock *BB : successors(entryBlock)) {
+
+  BasicBlock *ReturnBlock = nullptr;
+  BasicBlock *NonReturnBlock = nullptr;
+  unsigned ReturnCount = 0;
+  for (BasicBlock *BB : successors(EntryBlock)) {
     if (isa<ReturnInst>(BB->getTerminator())) {
-      returnBlock = BB;
-      returnCount++;
+      ReturnBlock = BB;
+      ReturnCount++;
     } else
-      nonReturnBlock = BB;
+      NonReturnBlock = BB;
   }
-  
-  if (returnCount != 1)
+
+  if (ReturnCount != 1)
     return nullptr;
-  
+
   // Clone the function, so that we can hack away on it.
   ValueToValueMapTy VMap;
-  Function* duplicateFunction = CloneFunction(F, VMap);
-  duplicateFunction->setLinkage(GlobalValue::InternalLinkage);
-  BasicBlock* newEntryBlock = cast<BasicBlock>(VMap[entryBlock]);
-  BasicBlock* newReturnBlock = cast<BasicBlock>(VMap[returnBlock]);
-  BasicBlock* newNonReturnBlock = cast<BasicBlock>(VMap[nonReturnBlock]);
-  
+  Function *DuplicateFunction = CloneFunction(F, VMap);
+  DuplicateFunction->setLinkage(GlobalValue::InternalLinkage);
+  BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[EntryBlock]);
+  BasicBlock *NewReturnBlock = cast<BasicBlock>(VMap[ReturnBlock]);
+  BasicBlock *NewNonReturnBlock = cast<BasicBlock>(VMap[NonReturnBlock]);
+
   // Go ahead and update all uses to the duplicate, so that we can just
   // use the inliner functionality when we're done hacking.
-  F->replaceAllUsesWith(duplicateFunction);
-  
+  F->replaceAllUsesWith(DuplicateFunction);
+
   // Special hackery is needed with PHI nodes that have inputs from more than
   // one extracted block.  For simplicity, just split the PHIs into a two-level
   // sequence of PHIs, some of which will go in the extracted region, and some
   // of which will go outside.
-  BasicBlock* preReturn = newReturnBlock;
-  newReturnBlock = newReturnBlock->splitBasicBlock(
-      newReturnBlock->getFirstNonPHI()->getIterator());
-  BasicBlock::iterator I = preReturn->begin();
-  Instruction *Ins = &newReturnBlock->front();
-  while (I != preReturn->end()) {
-    PHINode* OldPhi = dyn_cast<PHINode>(I);
-    if (!OldPhi) break;
-
-    PHINode *retPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
-    OldPhi->replaceAllUsesWith(retPhi);
-    Ins = newReturnBlock->getFirstNonPHI();
-
-    retPhi->addIncoming(&*I, preReturn);
-    retPhi->addIncoming(OldPhi->getIncomingValueForBlock(newEntryBlock),
-                        newEntryBlock);
-    OldPhi->removeIncomingValue(newEntryBlock);
-    
+  BasicBlock *PreReturn = NewReturnBlock;
+  NewReturnBlock = NewReturnBlock->splitBasicBlock(
+      NewReturnBlock->getFirstNonPHI()->getIterator());
+  BasicBlock::iterator I = PreReturn->begin();
+  Instruction *Ins = &NewReturnBlock->front();
+  while (I != PreReturn->end()) {
+    PHINode *OldPhi = dyn_cast<PHINode>(I);
+    if (!OldPhi)
+      break;
+
+    PHINode *RetPhi = PHINode::Create(OldPhi->getType(), 2, "", Ins);
+    OldPhi->replaceAllUsesWith(RetPhi);
+    Ins = NewReturnBlock->getFirstNonPHI();
+
+    RetPhi->addIncoming(&*I, PreReturn);
+    RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(NewEntryBlock),
+                        NewEntryBlock);
+    OldPhi->removeIncomingValue(NewEntryBlock);
+
     ++I;
   }
-  newEntryBlock->getTerminator()->replaceUsesOfWith(preReturn, newReturnBlock);
-  
+  NewEntryBlock->getTerminator()->replaceUsesOfWith(PreReturn, NewReturnBlock);
+
   // Gather up the blocks that we're going to extract.
-  std::vector<BasicBlock*> toExtract;
-  toExtract.push_back(newNonReturnBlock);
-  for (BasicBlock &BB : *duplicateFunction)
-    if (&BB != newEntryBlock && &BB != newReturnBlock &&
-        &BB != newNonReturnBlock)
-      toExtract.push_back(&BB);
+  std::vector<BasicBlock *> ToExtract;
+  ToExtract.push_back(NewNonReturnBlock);
+  for (BasicBlock &BB : *DuplicateFunction)
+    if (&BB != NewEntryBlock && &BB != NewReturnBlock &&
+        &BB != NewNonReturnBlock)
+      ToExtract.push_back(&BB);
 
   // The CodeExtractor needs a dominator tree.
   DominatorTree DT;
-  DT.recalculate(*duplicateFunction);
+  DT.recalculate(*DuplicateFunction);
+
+  // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
+  LoopInfo LI(DT);
+  BranchProbabilityInfo BPI(*DuplicateFunction, LI);
+  BlockFrequencyInfo BFI(*DuplicateFunction, BPI, LI);
 
   // Extract the body of the if.
-  Function* extractedFunction
-    = CodeExtractor(toExtract, &DT).extractCodeRegion();
-  
-  InlineFunctionInfo IFI;
-  
+  Function *ExtractedFunction =
+      CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false, &BFI, &BPI)
+          .extractCodeRegion();
+
   // Inline the top-level if test into all callers.
-  std::vector<User *> Users(duplicateFunction->user_begin(),
-                            duplicateFunction->user_end());
+  std::vector<User *> Users(DuplicateFunction->user_begin(),
+                            DuplicateFunction->user_end());
   for (User *User : Users)
     if (CallInst *CI = dyn_cast<CallInst>(User))
       InlineFunction(CI, IFI);
     else if (InvokeInst *II = dyn_cast<InvokeInst>(User))
       InlineFunction(II, IFI);
-  
+
   // Ditch the duplicate, since we're done with it, and rewrite all remaining
   // users (function pointers, etc.) back to the original function.
-  duplicateFunction->replaceAllUsesWith(F);
-  duplicateFunction->eraseFromParent();
-  
+  DuplicateFunction->replaceAllUsesWith(F);
+  DuplicateFunction->eraseFromParent();
+
   ++NumPartialInlined;
-  
-  return extractedFunction;
+
+  return ExtractedFunction;
 }
 
-PreservedAnalyses PartialInlinerPass::run(Module &M, ModuleAnalysisManager &) {
-  std::vector<Function*> worklist;
-  worklist.reserve(M.size());
+bool PartialInlinerImpl::run(Module &M) {
+  std::vector<Function *> Worklist;
+  Worklist.reserve(M.size());
   for (Function &F : M)
     if (!F.use_empty() && !F.isDeclaration())
-      worklist.push_back(&F);
-
-  bool changed = false;
-  while (!worklist.empty()) {
-    Function* currFunc = worklist.back();
-    worklist.pop_back();
-  
-    if (currFunc->use_empty()) continue;
-    
-    bool recursive = false;
-    for (User *U : currFunc->users())
-      if (Instruction* I = dyn_cast<Instruction>(U))
-        if (I->getParent()->getParent() == currFunc) {
-          recursive = true;
+      Worklist.push_back(&F);
+
+  bool Changed = false;
+  while (!Worklist.empty()) {
+    Function *CurrFunc = Worklist.back();
+    Worklist.pop_back();
+
+    if (CurrFunc->use_empty())
+      continue;
+
+    bool Recursive = false;
+    for (User *U : CurrFunc->users())
+      if (Instruction *I = dyn_cast<Instruction>(U))
+        if (I->getParent()->getParent() == CurrFunc) {
+          Recursive = true;
           break;
         }
-    if (recursive) continue;
-          
-    
-    if (Function* newFunc = unswitchFunction(currFunc)) {
-      worklist.push_back(newFunc);
-      changed = true;
+    if (Recursive)
+      continue;
+
+    if (Function *NewFunc = unswitchFunction(CurrFunc)) {
+      Worklist.push_back(NewFunc);
+      Changed = true;
     }
-    
   }
 
-  if (changed)
+  return Changed;
+}
+
+char PartialInlinerLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",
+                      "Partial Inliner", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",
+                    "Partial Inliner", false, false)
+
+ModulePass *llvm::createPartialInliningPass() {
+  return new PartialInlinerLegacyPass();
+}
+
+PreservedAnalyses PartialInlinerPass::run(Module &M,
+                                          ModuleAnalysisManager &AM) {
+  auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache =
+      [&FAM](Function &F) -> AssumptionCache & {
+    return FAM.getResult<AssumptionAnalysis>(F);
+  };
+  InlineFunctionInfo IFI(nullptr, &GetAssumptionCache);
+  if (PartialInlinerImpl(IFI).run(M))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index df6a48e05d42..293ddf21a68f 100644
--- a/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -19,6 +19,7 @@
 #include "llvm/Analysis/CFLAndersAliasAnalysis.h"
 #include "llvm/Analysis/CFLSteensAliasAnalysis.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/Passes.h"
 #include "llvm/Analysis/ScopedNoAliasAA.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -66,14 +67,13 @@ static cl::opt<bool>
 RunLoopRerolling("reroll-loops", cl::Hidden,
                  cl::desc("Run the loop rerolling pass"));
 
-static cl::opt<bool>
-RunFloat2Int("float-to-int", cl::Hidden, cl::init(true),
-             cl::desc("Run the float2int (float demotion) pass"));
-
 static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false),
                                     cl::Hidden,
                                     cl::desc("Run the load combining pass"));
 
+static cl::opt<bool> RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden,
+                               cl::desc("Run the NewGVN pass"));
+
 static cl::opt<bool>
 RunSLPAfterLoopVectorization("run-slp-after-loop-vectorization",
   cl::init(true), cl::Hidden,
@@ -91,8 +91,7 @@ static cl::opt<CFLAAType>
                         clEnumValN(CFLAAType::Andersen, "anders",
                                    "Enable inclusion-based CFL-AA"),
                         clEnumValN(CFLAAType::Both, "both",
-                                   "Enable both variants of CFL-aa"),
-                        clEnumValEnd));
+                                   "Enable both variants of CFL-AA")));
 
 static cl::opt<bool>
 EnableMLSM("mlsm", cl::init(true), cl::Hidden,
@@ -111,10 +110,17 @@ static cl::opt<bool> EnableLoopLoadElim(
     "enable-loop-load-elim", cl::init(true), cl::Hidden,
     cl::desc("Enable the LoopLoadElimination Pass"));
 
-static cl::opt<std::string> RunPGOInstrGen(
-    "profile-generate", cl::init(""), cl::Hidden,
-    cl::desc("Enable generation phase of PGO instrumentation and specify the "
-             "path of profile data file"));
+static cl::opt<bool>
+    EnablePrepareForThinLTO("prepare-for-thinlto", cl::init(false), cl::Hidden,
+                            cl::desc("Enable preparation for ThinLTO."));
+
+static cl::opt<bool> RunPGOInstrGen(
+    "profile-generate", cl::init(false), cl::Hidden,
+    cl::desc("Enable PGO instrumentation."));
+
+static cl::opt<std::string>
+    PGOOutputFile("profile-generate-file", cl::init(""), cl::Hidden,
+                      cl::desc("Specify the path of profile data file."));
 
 static cl::opt<std::string> RunPGOInstrUse(
     "profile-use", cl::init(""), cl::Hidden, cl::value_desc("filename"),
@@ -135,15 +141,19 @@ static cl::opt<int> PreInlineThreshold(
              "(default = 75)"));
 
 static cl::opt<bool> EnableGVNHoist(
-    "enable-gvn-hoist", cl::init(false), cl::Hidden,
-    cl::desc("Enable the experimental GVN Hoisting pass"));
+    "enable-gvn-hoist", cl::init(true), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass (default = on)"));
+
+static cl::opt<bool>
+    DisableLibCallsShrinkWrap("disable-libcalls-shrinkwrap", cl::init(false),
+                              cl::Hidden,
+                              cl::desc("Disable shrink-wrap library calls"));
 
 PassManagerBuilder::PassManagerBuilder() {
     OptLevel = 2;
     SizeLevel = 0;
     LibraryInfo = nullptr;
     Inliner = nullptr;
-    ModuleSummary = nullptr;
     DisableUnitAtATime = false;
     DisableUnrollLoops = false;
     BBVectorize = RunBBVectorization;
@@ -151,14 +161,16 @@ PassManagerBuilder::PassManagerBuilder() {
     LoopVectorize = RunLoopVectorization;
     RerollLoops = RunLoopRerolling;
     LoadCombine = RunLoadCombine;
+    NewGVN = RunNewGVN;
     DisableGVNLoadPRE = false;
     VerifyInput = false;
     VerifyOutput = false;
     MergeFunctions = false;
     PrepareForLTO = false;
-    PGOInstrGen = RunPGOInstrGen;
+    EnablePGOInstrGen = RunPGOInstrGen;
+    PGOInstrGen = PGOOutputFile;
     PGOInstrUse = RunPGOInstrUse;
-    PrepareForThinLTO = false;
+    PrepareForThinLTO = EnablePrepareForThinLTO;
     PerformThinLTO = false;
 }
 
@@ -243,24 +255,34 @@ void PassManagerBuilder::populateFunctionPassManager(
 
 // Do PGO instrumentation generation or use pass as the option specified.
 void PassManagerBuilder::addPGOInstrPasses(legacy::PassManagerBase &MPM) {
-  if (PGOInstrGen.empty() && PGOInstrUse.empty())
+  if (!EnablePGOInstrGen && PGOInstrUse.empty())
     return;
   // Perform the preinline and cleanup passes for O1 and above.
   // And avoid doing them if optimizing for size.
   if (OptLevel > 0 && SizeLevel == 0 && !DisablePreInliner) {
-    // Create preinline pass.
-    MPM.add(createFunctionInliningPass(PreInlineThreshold));
+    // Create preinline pass. We construct an InlineParams object and specify
+    // the threshold here to avoid the command line options of the regular
+    // inliner to influence pre-inlining. The only fields of InlineParams we
+    // care about are DefaultThreshold and HintThreshold.
+    InlineParams IP;
+    IP.DefaultThreshold = PreInlineThreshold;
+    // FIXME: The hint threshold has the same value used by the regular inliner.
+    // This should probably be lowered after performance testing.
+    IP.HintThreshold = 325;
+
+    MPM.add(createFunctionInliningPass(IP));
     MPM.add(createSROAPass());
     MPM.add(createEarlyCSEPass());             // Catch trivial redundancies
     MPM.add(createCFGSimplificationPass());    // Merge & remove BBs
     MPM.add(createInstructionCombiningPass()); // Combine silly seq's
     addExtensionsToPM(EP_Peephole, MPM);
   }
-  if (!PGOInstrGen.empty()) {
+  if (EnablePGOInstrGen) {
     MPM.add(createPGOInstrumentationGenLegacyPass());
     // Add the profile lowering pass.
     InstrProfOptions Options;
-    Options.InstrProfileOutput = PGOInstrGen;
+    if (!PGOInstrGen.empty())
+      Options.InstrProfileOutput = PGOInstrGen;
     MPM.add(createInstrProfilingLegacyPass(Options));
   }
   if (!PGOInstrUse.empty())
@@ -279,6 +301,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   MPM.add(createCFGSimplificationPass());     // Merge & remove BBs
   // Combine silly seq's
   addInstructionCombiningPass(MPM);
+  if (SizeLevel == 0 && !DisableLibCallsShrinkWrap)
+    MPM.add(createLibCallsShrinkWrapPass());
   addExtensionsToPM(EP_Peephole, MPM);
 
   MPM.add(createTailCallEliminationPass()); // Eliminate tail calls
@@ -304,7 +328,8 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
   if (OptLevel > 1) {
     if (EnableMLSM)
       MPM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds
-    MPM.add(createGVNPass(DisableGVNLoadPRE));  // Remove redundancies
+    MPM.add(NewGVN ? createNewGVNPass()
+                   : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
   }
   MPM.add(createMemCpyOptPass());             // Remove memcpy / form memset
   MPM.add(createSCCPPass());                  // Constant prop with SCCP
@@ -336,7 +361,9 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
       addInstructionCombiningPass(MPM);
       addExtensionsToPM(EP_Peephole, MPM);
       if (OptLevel > 1 && UseGVNAfterVectorization)
-        MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+        MPM.add(NewGVN
+                    ? createNewGVNPass()
+                    : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
       else
         MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
 
@@ -358,6 +385,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses(
 
 void PassManagerBuilder::populateModulePassManager(
     legacy::PassManagerBase &MPM) {
+  if (!PGOSampleUse.empty()) {
+    MPM.add(createPruneEHPass());
+    MPM.add(createSampleProfileLoaderPass(PGOSampleUse));
+  }
+
   // Allow forcing function attributes as a debugging and tuning aid.
   MPM.add(createForceFunctionAttrsLegacyPass());
 
@@ -380,6 +412,10 @@ void PassManagerBuilder::populateModulePassManager(
     else if (!GlobalExtensions->empty() || !Extensions.empty())
       MPM.add(createBarrierNoopPass());
 
+    if (PrepareForThinLTO)
+      // Rename anon globals to be able to export them in the summary.
+      MPM.add(createNameAnonGlobalPass());
+
     addExtensionsToPM(EP_EnabledOnOptLevel0, MPM);
     return;
   }
@@ -390,6 +426,16 @@ void PassManagerBuilder::populateModulePassManager(
 
   addInitialAliasAnalysisPasses(MPM);
 
+  // For ThinLTO there are two passes of indirect call promotion. The
+  // first is during the compile phase when PerformThinLTO=false and
+  // intra-module indirect call targets are promoted. The second is during
+  // the ThinLTO backend when PerformThinLTO=true, when we promote imported
+  // inter-module indirect calls. For that we perform indirect call promotion
+  // earlier in the pass pipeline, here before globalopt. Otherwise imported
+  // available_externally functions look unreferenced and are removed.
+  if (PerformThinLTO)
+    MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true));
+
   if (!DisableUnitAtATime) {
     // Infer attributes about declarations if possible.
     MPM.add(createInferFunctionAttrsLegacyPass());
@@ -412,11 +458,12 @@ void PassManagerBuilder::populateModulePassManager(
     /// PGO instrumentation is added during the compile phase for ThinLTO, do
     /// not run it a second time
     addPGOInstrPasses(MPM);
+    // Indirect call promotion that promotes intra-module targets only.
+    // For ThinLTO this is done earlier due to interactions with globalopt
+    // for imported functions.
+    MPM.add(createPGOIndirectCallPromotionLegacyPass());
   }
 
-  // Indirect call promotion that promotes intra-module targets only.
-  MPM.add(createPGOIndirectCallPromotionLegacyPass());
-
   if (EnableNonLTOGlobalsModRef)
     // We add a module alias analysis pass here. In part due to bugs in the
     // analysis infrastructure this "works" in that the analysis stays alive
@@ -435,6 +482,7 @@ void PassManagerBuilder::populateModulePassManager(
   if (OptLevel > 2)
     MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
 
+  addExtensionsToPM(EP_CGSCCOptimizerLate, MPM);
   addFunctionSimplificationPasses(MPM);
 
   // FIXME: This is a HACK! The inliner pass above implicitly creates a CGSCC
@@ -464,8 +512,8 @@ void PassManagerBuilder::populateModulePassManager(
   if (PrepareForThinLTO) {
     // Reduce the size of the IR as much as possible.
     MPM.add(createGlobalOptimizerPass());
-    // Rename anon function to be able to export them in the summary.
-    MPM.add(createNameAnonFunctionPass());
+    // Rename anon globals to be able to export them in the summary.
+    MPM.add(createNameAnonGlobalPass());
     return;
   }
 
@@ -502,8 +550,7 @@ void PassManagerBuilder::populateModulePassManager(
     // correct in the face of IR changes).
     MPM.add(createGlobalsAAWrapperPass());
 
-  if (RunFloat2Int)
-    MPM.add(createFloat2IntPass());
+  MPM.add(createFloat2IntPass());
 
   addExtensionsToPM(EP_VectorizerStart, MPM);
 
@@ -516,7 +563,7 @@ void PassManagerBuilder::populateModulePassManager(
   // into separate loop that would otherwise inhibit vectorization.  This is
   // currently only performed for loops marked with the metadata
   // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
-  MPM.add(createLoopDistributePass(/*ProcessAllLoopsByDefault=*/false));
+  MPM.add(createLoopDistributePass());
 
   MPM.add(createLoopVectorizePass(DisableUnrollLoops, LoopVectorize));
 
@@ -560,7 +607,9 @@ void PassManagerBuilder::populateModulePassManager(
       addInstructionCombiningPass(MPM);
       addExtensionsToPM(EP_Peephole, MPM);
       if (OptLevel > 1 && UseGVNAfterVectorization)
-        MPM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
+        MPM.add(NewGVN
+                    ? createNewGVNPass()
+                    : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies
       else
         MPM.add(createEarlyCSEPass());      // Catch trivial redundancies
 
@@ -585,10 +634,7 @@ void PassManagerBuilder::populateModulePassManager(
     // outer loop. LICM pass can help to promote the runtime check out if the
     // checked value is loop invariant.
     MPM.add(createLICMPass());
-
-    // Get rid of LCSSA nodes.
-    MPM.add(createInstructionSimplifierPass());
-  }
+ }
 
   // After vectorization and unrolling, assume intrinsics may tell us more
   // about pointer alignments.
@@ -609,6 +655,13 @@ void PassManagerBuilder::populateModulePassManager(
   if (MergeFunctions)
     MPM.add(createMergeFunctionsPass());
 
+  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+  // canonicalization pass that enables other optimizations. As a result,
+  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+  // result too early.
+  MPM.add(createLoopSinkPass());
+  // Get rid of LCSSA nodes.
+  MPM.add(createInstructionSimplifierPass());
   addExtensionsToPM(EP_OptimizerLast, MPM);
 }
 
@@ -620,9 +673,6 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   // Provide AliasAnalysis services for optimizations.
   addInitialAliasAnalysisPasses(PM);
 
-  if (ModuleSummary)
-    PM.add(createFunctionImportPass(ModuleSummary));
-
   // Allow forcing function attributes as a debugging and tuning aid.
   PM.add(createForceFunctionAttrsLegacyPass());
 
@@ -647,6 +697,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   PM.add(createPostOrderFunctionAttrsLegacyPass());
   PM.add(createReversePostOrderFunctionAttrsPass());
 
+  // Split globals using inrange annotations on GEP indices. This can help
+  // improve the quality of generated code when virtual constant propagation or
+  // control flow integrity are enabled.
+  PM.add(createGlobalSplitPass());
+
   // Apply whole-program devirtualization and virtual constant propagation.
   PM.add(createWholeProgramDevirtPass());
 
@@ -706,7 +761,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
   PM.add(createLICMPass());                 // Hoist loop invariants.
   if (EnableMLSM)
     PM.add(createMergedLoadStoreMotionPass()); // Merge ld/st in diamonds.
-  PM.add(createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
+  PM.add(NewGVN ? createNewGVNPass()
+                : createGVNPass(DisableGVNLoadPRE)); // Remove redundancies.
   PM.add(createMemCpyOptPass());            // Remove dead memcpys.
 
   // Nuke dead stores.
@@ -777,9 +833,6 @@ void PassManagerBuilder::populateThinLTOPassManager(
   if (VerifyInput)
     PM.add(createVerifierPass());
 
-  if (ModuleSummary)
-    PM.add(createFunctionImportPass(ModuleSummary));
-
   populateModulePassManager(PM);
 
   if (VerifyOutput)
diff --git a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
index 2aa3fa55cefd..d9acb9b1a743 100644
--- a/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/PruneEH.cpp
@@ -90,10 +90,7 @@ static bool runImpl(CallGraphSCC &SCC, CallGraph &CG) {
     if (!F) {
       SCCMightUnwind = true;
       SCCMightReturn = true;
-    } else if (F->isDeclaration() || F->isInterposable()) {
-      // Note: isInterposable (as opposed to hasExactDefinition) is fine above,
-      // since we're not inferring new attributes here, but only using existing,
-      // assumed to be correct, function attributes.
+    } else if (!F->hasExactDefinition()) {
       SCCMightUnwind |= !F->doesNotThrow();
       SCCMightReturn |= !F->doesNotReturn();
     } else {
diff --git a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 39de108edc06..6a43f8dbac48 100644
--- a/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -88,6 +88,52 @@ typedef DenseMap<Edge, uint64_t> EdgeWeightMap;
 typedef DenseMap<const BasicBlock *, SmallVector<const BasicBlock *, 8>>
     BlockEdgeMap;
 
+class SampleCoverageTracker {
+public:
+  SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
+
+  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
+                       uint32_t Discriminator, uint64_t Samples);
+  unsigned computeCoverage(unsigned Used, unsigned Total) const;
+  unsigned countUsedRecords(const FunctionSamples *FS) const;
+  unsigned countBodyRecords(const FunctionSamples *FS) const;
+  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
+  uint64_t countBodySamples(const FunctionSamples *FS) const;
+  void clear() {
+    SampleCoverage.clear();
+    TotalUsedSamples = 0;
+  }
+
+private:
+  typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
+  typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
+      FunctionSamplesCoverageMap;
+
+  /// Coverage map for sampling records.
+  ///
+  /// This map keeps a record of sampling records that have been matched to
+  /// an IR instruction. This is used to detect some form of staleness in
+  /// profiles (see flag -sample-profile-check-coverage).
+  ///
+  /// Each entry in the map corresponds to a FunctionSamples instance.  This is
+  /// another map that counts how many times the sample record at the
+  /// given location has been used.
+  FunctionSamplesCoverageMap SampleCoverage;
+
+  /// Number of samples used from the profile.
+  ///
+  /// When a sampling record is used for the first time, the samples from
+  /// that record are added to this accumulator.  Coverage is later computed
+  /// based on the total number of samples available in this function and
+  /// its callsites.
+  ///
+  /// Note that this accumulator tracks samples used from a single function
+  /// and all the inlined callsites. Strictly, we should have a map of counters
+  /// keyed by FunctionSamples pointers, but these stats are cleared after
+  /// every function, so we just need to keep a single counter.
+  uint64_t TotalUsedSamples;
+};
+
 /// \brief Sample profile pass.
 ///
 /// This pass reads profile data from the file specified by
@@ -110,9 +156,9 @@ protected:
   bool runOnFunction(Function &F);
   unsigned getFunctionLoc(Function &F);
   bool emitAnnotations(Function &F);
-  ErrorOr<uint64_t> getInstWeight(const Instruction &I) const;
-  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB) const;
-  const FunctionSamples *findCalleeFunctionSamples(const CallInst &I) const;
+  ErrorOr<uint64_t> getInstWeight(const Instruction &I);
+  ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
+  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
   bool inlineHotFunctions(Function &F);
   void printEdgeWeight(raw_ostream &OS, Edge E);
@@ -125,7 +171,7 @@ protected:
   void propagateWeights(Function &F);
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(Function &F);
-  bool propagateThroughEdges(Function &F);
+  bool propagateThroughEdges(Function &F, bool UpdateBlockCount);
   void computeDominanceAndLoopInfo(Function &F);
   unsigned getOffset(unsigned L, unsigned H) const;
   void clearFunctionData();
@@ -169,6 +215,8 @@ protected:
   /// \brief Successors for each basic block in the CFG.
   BlockEdgeMap Successors;
 
+  SampleCoverageTracker CoverageTracker;
+
   /// \brief Profile reader object.
   std::unique_ptr<SampleProfileReader> Reader;
 
@@ -176,7 +224,7 @@ protected:
   FunctionSamples *Samples;
 
   /// \brief Name of the profile file to load.
-  StringRef Filename;
+  std::string Filename;
 
   /// \brief Flag indicating whether the profile input loaded successfully.
   bool ProfileIsValid;
@@ -204,64 +252,17 @@ public:
   bool doInitialization(Module &M) override {
     return SampleLoader.doInitialization(M);
   }
-  const char *getPassName() const override { return "Sample profile pass"; }
+  StringRef getPassName() const override { return "Sample profile pass"; }
   bool runOnModule(Module &M) override;
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<AssumptionCacheTracker>();
   }
-private:
-  SampleProfileLoader SampleLoader;
-};
-
-class SampleCoverageTracker {
-public:
-  SampleCoverageTracker() : SampleCoverage(), TotalUsedSamples(0) {}
-
-  bool markSamplesUsed(const FunctionSamples *FS, uint32_t LineOffset,
-                       uint32_t Discriminator, uint64_t Samples);
-  unsigned computeCoverage(unsigned Used, unsigned Total) const;
-  unsigned countUsedRecords(const FunctionSamples *FS) const;
-  unsigned countBodyRecords(const FunctionSamples *FS) const;
-  uint64_t getTotalUsedSamples() const { return TotalUsedSamples; }
-  uint64_t countBodySamples(const FunctionSamples *FS) const;
-  void clear() {
-    SampleCoverage.clear();
-    TotalUsedSamples = 0;
-  }
 
 private:
-  typedef std::map<LineLocation, unsigned> BodySampleCoverageMap;
-  typedef DenseMap<const FunctionSamples *, BodySampleCoverageMap>
-      FunctionSamplesCoverageMap;
-
-  /// Coverage map for sampling records.
-  ///
-  /// This map keeps a record of sampling records that have been matched to
-  /// an IR instruction. This is used to detect some form of staleness in
-  /// profiles (see flag -sample-profile-check-coverage).
-  ///
-  /// Each entry in the map corresponds to a FunctionSamples instance.  This is
-  /// another map that counts how many times the sample record at the
-  /// given location has been used.
-  FunctionSamplesCoverageMap SampleCoverage;
-
-  /// Number of samples used from the profile.
-  ///
-  /// When a sampling record is used for the first time, the samples from
-  /// that record are added to this accumulator.  Coverage is later computed
-  /// based on the total number of samples available in this function and
-  /// its callsites.
-  ///
-  /// Note that this accumulator tracks samples used from a single function
-  /// and all the inlined callsites. Strictly, we should have a map of counters
-  /// keyed by FunctionSamples pointers, but these stats are cleared after
-  /// every function, so we just need to keep a single counter.
-  uint64_t TotalUsedSamples;
+  SampleProfileLoader SampleLoader;
 };
 
-SampleCoverageTracker CoverageTracker;
-
 /// Return true if the given callsite is hot wrt to its caller.
 ///
 /// Functions that were inlined in the original binary will be represented
@@ -451,7 +452,7 @@ void SampleProfileLoader::printBlockWeight(raw_ostream &OS,
 ///
 /// \returns the weight of \p Inst.
 ErrorOr<uint64_t>
-SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
+SampleProfileLoader::getInstWeight(const Instruction &Inst) {
   const DebugLoc &DLoc = Inst.getDebugLoc();
   if (!DLoc)
     return std::error_code();
@@ -460,18 +461,28 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
   if (!FS)
     return std::error_code();
 
-  // Ignore all dbg_value intrinsics.
-  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(&Inst);
-  if (II && II->getIntrinsicID() == Intrinsic::dbg_value)
+  // Ignore all intrinsics and branch instructions.
+  // Branch instruction usually contains debug info from sources outside of
+  // the residing basic block, thus we ignore them during annotation.
+  if (isa<BranchInst>(Inst) || isa<IntrinsicInst>(Inst))
     return std::error_code();
 
+  // If a call/invoke instruction is inlined in profile, but not inlined here,
+  // it means that the inlined callsite has no sample, thus the call
+  // instruction should have 0 count.
+  bool IsCall = isa<CallInst>(Inst) || isa<InvokeInst>(Inst);
+  if (IsCall && findCalleeFunctionSamples(Inst))
+    return 0;
+
   const DILocation *DIL = DLoc;
   unsigned Lineno = DLoc.getLine();
   unsigned HeaderLineno = DIL->getScope()->getSubprogram()->getLine();
 
   uint32_t LineOffset = getOffset(Lineno, HeaderLineno);
   uint32_t Discriminator = DIL->getDiscriminator();
-  ErrorOr<uint64_t> R = FS->findSamplesAt(LineOffset, Discriminator);
+  ErrorOr<uint64_t> R = IsCall
+                            ? FS->findCallSamplesAt(LineOffset, Discriminator)
+                            : FS->findSamplesAt(LineOffset, Discriminator);
   if (R) {
     bool FirstMark =
         CoverageTracker.markSamplesUsed(FS, LineOffset, Discriminator, R.get());
@@ -488,13 +499,6 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
                  << Inst << " (line offset: " << Lineno - HeaderLineno << "."
                  << DIL->getDiscriminator() << " - weight: " << R.get()
                  << ")\n");
-  } else {
-    // If a call instruction is inlined in profile, but not inlined here,
-    // it means that the inlined callsite has no sample, thus the call
-    // instruction should have 0 count.
-    const CallInst *CI = dyn_cast<CallInst>(&Inst);
-    if (CI && findCalleeFunctionSamples(*CI))
-      R = 0;
   }
   return R;
 }
@@ -508,23 +512,17 @@ SampleProfileLoader::getInstWeight(const Instruction &Inst) const {
 ///
 /// \returns the weight for \p BB.
 ErrorOr<uint64_t>
-SampleProfileLoader::getBlockWeight(const BasicBlock *BB) const {
-  DenseMap<uint64_t, uint64_t> CM;
+SampleProfileLoader::getBlockWeight(const BasicBlock *BB) {
+  uint64_t Max = 0;
+  bool HasWeight = false;
   for (auto &I : BB->getInstList()) {
     const ErrorOr<uint64_t> &R = getInstWeight(I);
-    if (R) CM[R.get()]++;
-  }
-  if (CM.size() == 0) return std::error_code();
-  uint64_t W = 0, C = 0;
-  for (const auto &C_W : CM) {
-    if (C_W.second == W) {
-      C = std::max(C, C_W.first);
-    } else if (C_W.second > W) {
-      C = C_W.first;
-      W = C_W.second;
+    if (R) {
+      Max = std::max(Max, R.get());
+      HasWeight = true;
     }
   }
-  return C;
+  return HasWeight ? ErrorOr<uint64_t>(Max) : std::error_code();
 }
 
 /// \brief Compute and store the weights of every basic block.
@@ -551,18 +549,18 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
 
 /// \brief Get the FunctionSamples for a call instruction.
 ///
-/// The FunctionSamples of a call instruction \p Inst is the inlined
+/// The FunctionSamples of a call/invoke instruction \p Inst is the inlined
 /// instance in which that call instruction is calling to. It contains
 /// all samples that resides in the inlined instance. We first find the
 /// inlined instance in which the call instruction is from, then we
 /// traverse its children to find the callsite with the matching
-/// location and callee function name.
+/// location.
 ///
-/// \param Inst Call instruction to query.
+/// \param Inst Call/Invoke instruction to query.
 ///
 /// \returns The FunctionSamples pointer to the inlined instance.
 const FunctionSamples *
-SampleProfileLoader::findCalleeFunctionSamples(const CallInst &Inst) const {
+SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
   const DILocation *DIL = Inst.getDebugLoc();
   if (!DIL) {
     return nullptr;
@@ -611,7 +609,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return FS;
 }
 
-
 /// \brief Iteratively inline hot callsites of a function.
 ///
 /// Iteratively traverse all callsites of the function \p F, and find if
@@ -627,22 +624,36 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 bool SampleProfileLoader::inlineHotFunctions(Function &F) {
   bool Changed = false;
   LLVMContext &Ctx = F.getContext();
+  std::function<AssumptionCache &(Function &)> GetAssumptionCache = [&](
+      Function &F) -> AssumptionCache & { return ACT->getAssumptionCache(F); };
   while (true) {
     bool LocalChanged = false;
-    SmallVector<CallInst *, 10> CIS;
+    SmallVector<Instruction *, 10> CIS;
     for (auto &BB : F) {
+      bool Hot = false;
+      SmallVector<Instruction *, 10> Candidates;
       for (auto &I : BB.getInstList()) {
-        CallInst *CI = dyn_cast<CallInst>(&I);
-        if (CI && callsiteIsHot(Samples, findCalleeFunctionSamples(*CI)))
-          CIS.push_back(CI);
+        const FunctionSamples *FS = nullptr;
+        if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
+            (FS = findCalleeFunctionSamples(I))) {
+          Candidates.push_back(&I);
+          if (callsiteIsHot(Samples, FS))
+            Hot = true;
+        }
+      }
+      if (Hot) {
+        CIS.insert(CIS.begin(), Candidates.begin(), Candidates.end());
       }
     }
-    for (auto CI : CIS) {
-      InlineFunctionInfo IFI(nullptr, ACT);
-      Function *CalledFunction = CI->getCalledFunction();
-      DebugLoc DLoc = CI->getDebugLoc();
-      uint64_t NumSamples = findCalleeFunctionSamples(*CI)->getTotalSamples();
-      if (InlineFunction(CI, IFI)) {
+    for (auto I : CIS) {
+      InlineFunctionInfo IFI(nullptr, ACT ? &GetAssumptionCache : nullptr);
+      CallSite CS(I);
+      Function *CalledFunction = CS.getCalledFunction();
+      if (!CalledFunction || !CalledFunction->getSubprogram())
+        continue;
+      DebugLoc DLoc = I->getDebugLoc();
+      uint64_t NumSamples = findCalleeFunctionSamples(*I)->getTotalSamples();
+      if (InlineFunction(CS, IFI)) {
         LocalChanged = true;
         emitOptimizationRemark(Ctx, DEBUG_TYPE, F, DLoc,
                                Twine("inlined hot callee '") +
@@ -693,6 +704,10 @@ void SampleProfileLoader::findEquivalencesFor(
     bool IsInSameLoop = LI->getLoopFor(BB1) == LI->getLoopFor(BB2);
     if (BB1 != BB2 && IsDomParent && IsInSameLoop) {
       EquivalenceClass[BB2] = EC;
+      // If BB2 is visited, then the entire EC should be marked as visited.
+      if (VisitedBlocks.count(BB2)) {
+        VisitedBlocks.insert(EC);
+      }
 
       // If BB2 is heavier than BB1, make BB2 have the same weight
       // as BB1.
@@ -705,7 +720,11 @@ void SampleProfileLoader::findEquivalencesFor(
       Weight = std::max(Weight, BlockWeights[BB2]);
     }
   }
-  BlockWeights[EC] = Weight;
+  if (EC == &EC->getParent()->getEntryBlock()) {
+    BlockWeights[EC] = Samples->getHeadSamples() + 1;
+  } else {
+    BlockWeights[EC] = Weight;
+  }
 }
 
 /// \brief Find equivalence classes.
@@ -796,9 +815,12 @@ uint64_t SampleProfileLoader::visitEdge(Edge E, unsigned *NumUnknownEdges,
 /// count of the basic block, if needed.
 ///
 /// \param F  Function to process.
+/// \param UpdateBlockCount  Whether we should update basic block counts that
+///                          has already been annotated.
 ///
 /// \returns  True if new weights were assigned to edges or blocks.
-bool SampleProfileLoader::propagateThroughEdges(Function &F) {
+bool SampleProfileLoader::propagateThroughEdges(Function &F,
+                                                bool UpdateBlockCount) {
   bool Changed = false;
   DEBUG(dbgs() << "\nPropagation through edges\n");
   for (const auto &BI : F) {
@@ -890,11 +912,35 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
             EdgeWeights[UnknownEdge] = BBWeight - TotalWeight;
           else
             EdgeWeights[UnknownEdge] = 0;
+          const BasicBlock *OtherEC;
+          if (i == 0)
+            OtherEC = EquivalenceClass[UnknownEdge.first];
+          else
+            OtherEC = EquivalenceClass[UnknownEdge.second];
+          // Edge weights should never exceed the BB weights it connects.
+          if (VisitedBlocks.count(OtherEC) &&
+              EdgeWeights[UnknownEdge] > BlockWeights[OtherEC])
+            EdgeWeights[UnknownEdge] = BlockWeights[OtherEC];
           VisitedEdges.insert(UnknownEdge);
           Changed = true;
           DEBUG(dbgs() << "Set weight for edge: ";
                 printEdgeWeight(dbgs(), UnknownEdge));
         }
+      } else if (VisitedBlocks.count(EC) && BlockWeights[EC] == 0) {
+        // If a block Weights 0, all its in/out edges should weight 0.
+        if (i == 0) {
+          for (auto *Pred : Predecessors[BB]) {
+            Edge E = std::make_pair(Pred, BB);
+            EdgeWeights[E] = 0;
+            VisitedEdges.insert(E);
+          }
+        } else {
+          for (auto *Succ : Successors[BB]) {
+            Edge E = std::make_pair(BB, Succ);
+            EdgeWeights[E] = 0;
+            VisitedEdges.insert(E);
+          }
+        }
       } else if (SelfReferentialEdge.first && VisitedBlocks.count(EC)) {
         uint64_t &BBWeight = BlockWeights[BB];
         // We have a self-referential edge and the weight of BB is known.
@@ -907,6 +953,11 @@ bool SampleProfileLoader::propagateThroughEdges(Function &F) {
         DEBUG(dbgs() << "Set self-referential edge weight to: ";
               printEdgeWeight(dbgs(), SelfReferentialEdge));
       }
+      if (UpdateBlockCount && !VisitedBlocks.count(EC) && TotalWeight > 0) {
+        BlockWeights[EC] = TotalWeight;
+        VisitedBlocks.insert(EC);
+        Changed = true;
+      }
     }
   }
 
@@ -966,7 +1017,21 @@ void SampleProfileLoader::propagateWeights(Function &F) {
 
   // Add an entry count to the function using the samples gathered
   // at the function entry.
-  F.setEntryCount(Samples->getHeadSamples());
+  F.setEntryCount(Samples->getHeadSamples() + 1);
+
+  // If BB weight is larger than its corresponding loop's header BB weight,
+  // use the BB weight to replace the loop header BB weight.
+  for (auto &BI : F) {
+    BasicBlock *BB = &BI;
+    Loop *L = LI->getLoopFor(BB);
+    if (!L) {
+      continue;
+    }
+    BasicBlock *Header = L->getHeader();
+    if (Header && BlockWeights[BB] > BlockWeights[Header]) {
+      BlockWeights[Header] = BlockWeights[BB];
+    }
+  }
 
   // Before propagation starts, build, for each block, a list of
   // unique predecessors and successors. This is necessary to handle
@@ -977,7 +1042,23 @@ void SampleProfileLoader::propagateWeights(Function &F) {
 
   // Propagate until we converge or we go past the iteration limit.
   while (Changed && I++ < SampleProfileMaxPropagateIterations) {
-    Changed = propagateThroughEdges(F);
+    Changed = propagateThroughEdges(F, false);
+  }
+
+  // The first propagation propagates BB counts from annotated BBs to unknown
+  // BBs. The 2nd propagation pass resets edges weights, and use all BB weights
+  // to propagate edge weights.
+  VisitedEdges.clear();
+  Changed = true;
+  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+    Changed = propagateThroughEdges(F, false);
+  }
+
+  // The 3rd propagation pass allows adjust annotated BB weights that are
+  // obviously wrong.
+  Changed = true;
+  while (Changed && I++ < SampleProfileMaxPropagateIterations) {
+    Changed = propagateThroughEdges(F, true);
   }
 
   // Generate MD_prof metadata for every branch instruction using the
@@ -994,7 +1075,7 @@ void SampleProfileLoader::propagateWeights(Function &F) {
           if (!dyn_cast<IntrinsicInst>(&I)) {
             SmallVector<uint32_t, 1> Weights;
             Weights.push_back(BlockWeights[BB]);
-            CI->setMetadata(LLVMContext::MD_prof, 
+            CI->setMetadata(LLVMContext::MD_prof,
                             MDB.createBranchWeights(Weights));
           }
         }
@@ -1023,7 +1104,9 @@ void SampleProfileLoader::propagateWeights(Function &F) {
         DEBUG(dbgs() << " (saturated due to uint32_t overflow)");
         Weight = std::numeric_limits<uint32_t>::max();
       }
-      Weights.push_back(static_cast<uint32_t>(Weight));
+      // Weight is added by one to avoid propagation errors introduced by
+      // 0 weights.
+      Weights.push_back(static_cast<uint32_t>(Weight + 1));
       if (Weight != 0) {
         if (Weight > MaxWeight) {
           MaxWeight = Weight;
@@ -1192,10 +1275,10 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
 
 char SampleProfileLoaderLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(SampleProfileLoaderLegacyPass, "sample-profile",
-                "Sample Profile loader", false, false)
+                      "Sample Profile loader", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_END(SampleProfileLoaderLegacyPass, "sample-profile",
-                "Sample Profile loader", false, false)
+                    "Sample Profile loader", false, false)
 
 bool SampleProfileLoader::doInitialization(Module &M) {
   auto &Ctx = M.getContext();
@@ -1232,12 +1315,13 @@ bool SampleProfileLoader::runOnModule(Module &M) {
       clearFunctionData();
       retval |= runOnFunction(F);
     }
-  M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
+  if (M.getProfileSummary() == nullptr)
+    M.setProfileSummary(Reader->getSummary().getMD(M.getContext()));
   return retval;
 }
 
 bool SampleProfileLoaderLegacyPass::runOnModule(Module &M) {
-  // FIXME: pass in AssumptionCache correctly for the new pass manager. 
+  // FIXME: pass in AssumptionCache correctly for the new pass manager.
   SampleLoader.setACT(&getAnalysis<AssumptionCacheTracker>());
   return SampleLoader.runOnModule(M);
 }
@@ -1251,7 +1335,7 @@ bool SampleProfileLoader::runOnFunction(Function &F) {
 }
 
 PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
-                                               AnalysisManager<Module> &AM) {
+                                               ModuleAnalysisManager &AM) {
 
   SampleProfileLoader SampleLoader(SampleProfileFile);
 
diff --git a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
index fd250366cef2..8f6f161428e8 100644
--- a/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp
@@ -219,7 +219,8 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
     if (I.hasLocalLinkage() && llvmUsedValues.count(&I) == 0)
       if (!PreserveDbgInfo || !I.getName().startswith("llvm.dbg"))
         I.setName(""); // Internal symbols can't participate in linkage
-    StripSymtab(I.getValueSymbolTable(), PreserveDbgInfo);
+    if (auto *Symtab = I.getValueSymbolTable())
+      StripSymtab(*Symtab, PreserveDbgInfo);
   }
 
   // Remove all names from types.
@@ -312,26 +313,29 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
   // replace the current list of potentially dead global variables/functions
   // with the live list.
   SmallVector<Metadata *, 64> LiveGlobalVariables;
-  SmallVector<Metadata *, 64> LiveSubprograms;
-  DenseSet<const MDNode *> VisitedSet;
-
-  std::set<DISubprogram *> LiveSPs;
-  for (Function &F : M) {
-    if (DISubprogram *SP = F.getSubprogram())
-      LiveSPs.insert(SP);
+  DenseSet<DIGlobalVariableExpression *> VisitedSet;
+
+  std::set<DIGlobalVariableExpression *> LiveGVs;
+  for (GlobalVariable &GV : M.globals()) {
+    SmallVector<DIGlobalVariableExpression *, 1> GVEs;
+    GV.getDebugInfo(GVEs);
+    for (auto *GVE : GVEs)
+      LiveGVs.insert(GVE);
   }
 
   for (DICompileUnit *DIC : F.compile_units()) {
     // Create our live global variable list.
     bool GlobalVariableChange = false;
-    for (DIGlobalVariable *DIG : DIC->getGlobalVariables()) {
+    for (auto *DIG : DIC->getGlobalVariables()) {
+      if (DIG->getExpression() && DIG->getExpression()->isConstant())
+        LiveGVs.insert(DIG);
+
       // Make sure we only visit each global variable only once.
       if (!VisitedSet.insert(DIG).second)
         continue;
 
-      // If the global variable referenced by DIG is not null, the global
-      // variable is live.
-      if (DIG->getVariable())
+      // If a global variable references DIG, the global variable is live.
+      if (LiveGVs.count(DIG))
         LiveGlobalVariables.push_back(DIG);
       else
         GlobalVariableChange = true;
@@ -345,7 +349,6 @@ bool StripDeadDebugInfo::runOnModule(Module &M) {
     }
 
     // Reset lists for the next iteration.
-    LiveSubprograms.clear();
     LiveGlobalVariables.clear();
   }
 
diff --git a/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
new file mode 100644
index 000000000000..3680cfc813a1
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp
@@ -0,0 +1,344 @@
+//===- ThinLTOBitcodeWriter.cpp - Bitcode writing pass for ThinLTO --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass prepares a module containing type metadata for ThinLTO by splitting
+// it into regular and thin LTO parts if possible, and writing both parts to
+// a multi-module bitcode file. Modules that do not contain type metadata are
+// written unmodified as a single module.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/TypeMetadataUtils.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/ScopedPrinter.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+using namespace llvm;
+
+namespace {
+
+// Produce a unique identifier for this module by taking the MD5 sum of the
+// names of the module's strong external symbols. This identifier is
+// normally guaranteed to be unique, or the program would fail to link due to
+// multiply defined symbols.
+//
+// If the module has no strong external symbols (such a module may still have a
+// semantic effect if it performs global initialization), we cannot produce a
+// unique identifier for this module, so we return the empty string, which
+// causes the entire module to be written as a regular LTO module.
+std::string getModuleId(Module *M) {
+  MD5 Md5;
+  bool ExportsSymbols = false;
+  auto AddGlobal = [&](GlobalValue &GV) {
+    if (GV.isDeclaration() || GV.getName().startswith("llvm.") ||
+        !GV.hasExternalLinkage())
+      return;
+    ExportsSymbols = true;
+    Md5.update(GV.getName());
+    Md5.update(ArrayRef<uint8_t>{0});
+  };
+
+  for (auto &F : *M)
+    AddGlobal(F);
+  for (auto &GV : M->globals())
+    AddGlobal(GV);
+  for (auto &GA : M->aliases())
+    AddGlobal(GA);
+  for (auto &IF : M->ifuncs())
+    AddGlobal(IF);
+
+  if (!ExportsSymbols)
+    return "";
+
+  MD5::MD5Result R;
+  Md5.final(R);
+
+  SmallString<32> Str;
+  MD5::stringifyResult(R, Str);
+  return ("$" + Str).str();
+}
+
+// Promote each local-linkage entity defined by ExportM and used by ImportM by
+// changing visibility and appending the given ModuleId.
+void promoteInternals(Module &ExportM, Module &ImportM, StringRef ModuleId) {
+  auto PromoteInternal = [&](GlobalValue &ExportGV) {
+    if (!ExportGV.hasLocalLinkage())
+      return;
+
+    GlobalValue *ImportGV = ImportM.getNamedValue(ExportGV.getName());
+    if (!ImportGV || ImportGV->use_empty())
+      return;
+
+    std::string NewName = (ExportGV.getName() + ModuleId).str();
+
+    ExportGV.setName(NewName);
+    ExportGV.setLinkage(GlobalValue::ExternalLinkage);
+    ExportGV.setVisibility(GlobalValue::HiddenVisibility);
+
+    ImportGV->setName(NewName);
+    ImportGV->setVisibility(GlobalValue::HiddenVisibility);
+  };
+
+  for (auto &F : ExportM)
+    PromoteInternal(F);
+  for (auto &GV : ExportM.globals())
+    PromoteInternal(GV);
+  for (auto &GA : ExportM.aliases())
+    PromoteInternal(GA);
+  for (auto &IF : ExportM.ifuncs())
+    PromoteInternal(IF);
+}
+
+// Promote all internal (i.e. distinct) type ids used by the module by replacing
+// them with external type ids formed using the module id.
+//
+// Note that this needs to be done before we clone the module because each clone
+// will receive its own set of distinct metadata nodes.
+void promoteTypeIds(Module &M, StringRef ModuleId) {
+  DenseMap<Metadata *, Metadata *> LocalToGlobal;
+  auto ExternalizeTypeId = [&](CallInst *CI, unsigned ArgNo) {
+    Metadata *MD =
+        cast<MetadataAsValue>(CI->getArgOperand(ArgNo))->getMetadata();
+
+    if (isa<MDNode>(MD) && cast<MDNode>(MD)->isDistinct()) {
+      Metadata *&GlobalMD = LocalToGlobal[MD];
+      if (!GlobalMD) {
+        std::string NewName =
+            (to_string(LocalToGlobal.size()) + ModuleId).str();
+        GlobalMD = MDString::get(M.getContext(), NewName);
+      }
+
+      CI->setArgOperand(ArgNo,
+                        MetadataAsValue::get(M.getContext(), GlobalMD));
+    }
+  };
+
+  if (Function *TypeTestFunc =
+          M.getFunction(Intrinsic::getName(Intrinsic::type_test))) {
+    for (const Use &U : TypeTestFunc->uses()) {
+      auto CI = cast<CallInst>(U.getUser());
+      ExternalizeTypeId(CI, 1);
+    }
+  }
+
+  if (Function *TypeCheckedLoadFunc =
+          M.getFunction(Intrinsic::getName(Intrinsic::type_checked_load))) {
+    for (const Use &U : TypeCheckedLoadFunc->uses()) {
+      auto CI = cast<CallInst>(U.getUser());
+      ExternalizeTypeId(CI, 2);
+    }
+  }
+
+  for (GlobalObject &GO : M.global_objects()) {
+    SmallVector<MDNode *, 1> MDs;
+    GO.getMetadata(LLVMContext::MD_type, MDs);
+
+    GO.eraseMetadata(LLVMContext::MD_type);
+    for (auto MD : MDs) {
+      auto I = LocalToGlobal.find(MD->getOperand(1));
+      if (I == LocalToGlobal.end()) {
+        GO.addMetadata(LLVMContext::MD_type, *MD);
+        continue;
+      }
+      GO.addMetadata(
+          LLVMContext::MD_type,
+          *MDNode::get(M.getContext(),
+                       ArrayRef<Metadata *>{MD->getOperand(0), I->second}));
+    }
+  }
+}
+
+// Drop unused globals, and drop type information from function declarations.
+// FIXME: If we made functions typeless then there would be no need to do this.
+void simplifyExternals(Module &M) {
+  FunctionType *EmptyFT =
+      FunctionType::get(Type::getVoidTy(M.getContext()), false);
+
+  for (auto I = M.begin(), E = M.end(); I != E;) {
+    Function &F = *I++;
+    if (F.isDeclaration() && F.use_empty()) {
+      F.eraseFromParent();
+      continue;
+    }
+
+    if (!F.isDeclaration() || F.getFunctionType() == EmptyFT)
+      continue;
+
+    Function *NewF =
+        Function::Create(EmptyFT, GlobalValue::ExternalLinkage, "", &M);
+    NewF->setVisibility(F.getVisibility());
+    NewF->takeName(&F);
+    F.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, F.getType()));
+    F.eraseFromParent();
+  }
+
+  for (auto I = M.global_begin(), E = M.global_end(); I != E;) {
+    GlobalVariable &GV = *I++;
+    if (GV.isDeclaration() && GV.use_empty()) {
+      GV.eraseFromParent();
+      continue;
+    }
+  }
+}
+
+void filterModule(
+    Module *M, std::function<bool(const GlobalValue *)> ShouldKeepDefinition) {
+  for (Function &F : *M) {
+    if (ShouldKeepDefinition(&F))
+      continue;
+
+    F.deleteBody();
+    F.clearMetadata();
+  }
+
+  for (GlobalVariable &GV : M->globals()) {
+    if (ShouldKeepDefinition(&GV))
+      continue;
+
+    GV.setInitializer(nullptr);
+    GV.setLinkage(GlobalValue::ExternalLinkage);
+    GV.clearMetadata();
+  }
+
+  for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E;) {
+    GlobalAlias *GA = &*I++;
+    if (ShouldKeepDefinition(GA))
+      continue;
+
+    GlobalObject *GO;
+    if (I->getValueType()->isFunctionTy())
+      GO = Function::Create(cast<FunctionType>(GA->getValueType()),
+                            GlobalValue::ExternalLinkage, "", M);
+    else
+      GO = new GlobalVariable(
+          *M, GA->getValueType(), false, GlobalValue::ExternalLinkage,
+          (Constant *)nullptr, "", (GlobalVariable *)nullptr,
+          GA->getThreadLocalMode(), GA->getType()->getAddressSpace());
+    GO->takeName(GA);
+    GA->replaceAllUsesWith(GO);
+    GA->eraseFromParent();
+  }
+}
+
+// If it's possible to split M into regular and thin LTO parts, do so and write
+// a multi-module bitcode file with the two parts to OS. Otherwise, write only a
+// regular LTO bitcode file to OS.
+void splitAndWriteThinLTOBitcode(raw_ostream &OS, Module &M) {
+  std::string ModuleId = getModuleId(&M);
+  if (ModuleId.empty()) {
+    // We couldn't generate a module ID for this module, just write it out as a
+    // regular LTO module.
+    WriteBitcodeToFile(&M, OS);
+    return;
+  }
+
+  promoteTypeIds(M, ModuleId);
+
+  auto IsInMergedM = [&](const GlobalValue *GV) {
+    auto *GVar = dyn_cast<GlobalVariable>(GV->getBaseObject());
+    if (!GVar)
+      return false;
+
+    SmallVector<MDNode *, 1> MDs;
+    GVar->getMetadata(LLVMContext::MD_type, MDs);
+    return !MDs.empty();
+  };
+
+  ValueToValueMapTy VMap;
+  std::unique_ptr<Module> MergedM(CloneModule(&M, VMap, IsInMergedM));
+
+  filterModule(&M, [&](const GlobalValue *GV) { return !IsInMergedM(GV); });
+
+  promoteInternals(*MergedM, M, ModuleId);
+  promoteInternals(M, *MergedM, ModuleId);
+
+  simplifyExternals(*MergedM);
+
+  SmallVector<char, 0> Buffer;
+  BitcodeWriter W(Buffer);
+
+  // FIXME: Try to re-use BSI and PFI from the original module here.
+  ModuleSummaryIndex Index = buildModuleSummaryIndex(M, nullptr, nullptr);
+  W.writeModule(&M, /*ShouldPreserveUseListOrder=*/false, &Index,
+                /*GenerateHash=*/true);
+
+  W.writeModule(MergedM.get());
+
+  OS << Buffer;
+}
+
+// Returns whether this module needs to be split because it uses type metadata.
+bool requiresSplit(Module &M) {
+  SmallVector<MDNode *, 1> MDs;
+  for (auto &GO : M.global_objects()) {
+    GO.getMetadata(LLVMContext::MD_type, MDs);
+    if (!MDs.empty())
+      return true;
+  }
+
+  return false;
+}
+
+void writeThinLTOBitcode(raw_ostream &OS, Module &M,
+                         const ModuleSummaryIndex *Index) {
+  // See if this module has any type metadata. If so, we need to split it.
+  if (requiresSplit(M))
+    return splitAndWriteThinLTOBitcode(OS, M);
+
+  // Otherwise we can just write it out as a regular module.
+  WriteBitcodeToFile(&M, OS, /*ShouldPreserveUseListOrder=*/false, Index,
+                     /*GenerateHash=*/true);
+}
+
+class WriteThinLTOBitcode : public ModulePass {
+  raw_ostream &OS; // raw_ostream to print on
+
+public:
+  static char ID; // Pass identification, replacement for typeid
+  WriteThinLTOBitcode() : ModulePass(ID), OS(dbgs()) {
+    initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
+  }
+
+  explicit WriteThinLTOBitcode(raw_ostream &o)
+      : ModulePass(ID), OS(o) {
+    initializeWriteThinLTOBitcodePass(*PassRegistry::getPassRegistry());
+  }
+
+  StringRef getPassName() const override { return "ThinLTO Bitcode Writer"; }
+
+  bool runOnModule(Module &M) override {
+    const ModuleSummaryIndex *Index =
+        &(getAnalysis<ModuleSummaryIndexWrapperPass>().getIndex());
+    writeThinLTOBitcode(OS, M, Index);
+    return true;
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+    AU.addRequired<ModuleSummaryIndexWrapperPass>();
+  }
+};
+} // anonymous namespace
+
+char WriteThinLTOBitcode::ID = 0;
+INITIALIZE_PASS_BEGIN(WriteThinLTOBitcode, "write-thinlto-bitcode",
+                      "Write ThinLTO Bitcode", false, true)
+INITIALIZE_PASS_DEPENDENCY(ModuleSummaryIndexWrapperPass)
+INITIALIZE_PASS_END(WriteThinLTOBitcode, "write-thinlto-bitcode",
+                    "Write ThinLTO Bitcode", false, true)
+
+ModulePass *llvm::createWriteThinLTOBitcodePass(raw_ostream &Str) {
+  return new WriteThinLTOBitcode(Str);
+}
diff --git a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 53eb4e2c9076..844cc0f70eed 100644
--- a/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/contrib/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -29,24 +29,43 @@
 
 #include "llvm/Transforms/IPO/WholeProgramDevirt.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseMapInfo.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/iterator_range.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/TypeMetadataUtils.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalAlias.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
-#include "llvm/Support/raw_ostream.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/IPO.h"
 #include "llvm/Transforms/Utils/Evaluator.h"
-#include "llvm/Transforms/Utils/Local.h"
-
+#include <algorithm>
+#include <cstddef>
+#include <map>
 #include <set>
+#include <string>
 
 using namespace llvm;
 using namespace wholeprogramdevirt;
@@ -166,7 +185,7 @@ void wholeprogramdevirt::setAfterReturnValues(
 
 VirtualCallTarget::VirtualCallTarget(Function *Fn, const TypeMemberInfo *TM)
     : Fn(Fn), TM(TM),
-      IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()) {}
+      IsBigEndian(Fn->getParent()->getDataLayout().isBigEndian()), WasDevirt(false) {}
 
 namespace {
 
@@ -178,7 +197,7 @@ struct VTableSlot {
   uint64_t ByteOffset;
 };
 
-}
+} // end anonymous namespace
 
 namespace llvm {
 
@@ -201,7 +220,7 @@ template <> struct DenseMapInfo<VTableSlot> {
   }
 };
 
-}
+} // end namespace llvm
 
 namespace {
 
@@ -216,15 +235,18 @@ struct VirtualCallSite {
   // of that field for details.
   unsigned *NumUnsafeUses;
 
-  void emitRemark() {
+  void emitRemark(const Twine &OptName, const Twine &TargetName) {
     Function *F = CS.getCaller();
-    emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F,
-                           CS.getInstruction()->getDebugLoc(),
-                           "devirtualized call");
+    emitOptimizationRemark(
+        F->getContext(), DEBUG_TYPE, *F,
+        CS.getInstruction()->getDebugLoc(),
+        OptName + ": devirtualized a call to " + TargetName);
   }
 
-  void replaceAndErase(Value *New) {
-    emitRemark();
+  void replaceAndErase(const Twine &OptName, const Twine &TargetName,
+                       bool RemarksEnabled, Value *New) {
+    if (RemarksEnabled)
+      emitRemark(OptName, TargetName);
     CS->replaceAllUsesWith(New);
     if (auto II = dyn_cast<InvokeInst>(CS.getInstruction())) {
       BranchInst::Create(II->getNormalDest(), CS.getInstruction());
@@ -243,6 +265,8 @@ struct DevirtModule {
   PointerType *Int8PtrTy;
   IntegerType *Int32Ty;
 
+  bool RemarksEnabled;
+
   MapVector<VTableSlot, std::vector<VirtualCallSite>> CallSlots;
 
   // This map keeps track of the number of "unsafe" uses of a loaded function
@@ -258,7 +282,10 @@ struct DevirtModule {
   DevirtModule(Module &M)
       : M(M), Int8Ty(Type::getInt8Ty(M.getContext())),
         Int8PtrTy(Type::getInt8PtrTy(M.getContext())),
-        Int32Ty(Type::getInt32Ty(M.getContext())) {}
+        Int32Ty(Type::getInt32Ty(M.getContext())),
+        RemarksEnabled(areRemarksEnabled()) {}
+
+  bool areRemarksEnabled();
 
   void scanTypeTestUsers(Function *TypeTestFunc, Function *AssumeFunc);
   void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
@@ -266,20 +293,21 @@ struct DevirtModule {
   void buildTypeIdentifierMap(
       std::vector<VTableBits> &Bits,
       DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
+  Constant *getPointerAtOffset(Constant *I, uint64_t Offset);
   bool
   tryFindVirtualCallTargets(std::vector<VirtualCallTarget> &TargetsForSlot,
                             const std::set<TypeMemberInfo> &TypeMemberInfos,
                             uint64_t ByteOffset);
-  bool trySingleImplDevirt(ArrayRef<VirtualCallTarget> TargetsForSlot,
+  bool trySingleImplDevirt(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
                            MutableArrayRef<VirtualCallSite> CallSites);
   bool tryEvaluateFunctionsWithArgs(
       MutableArrayRef<VirtualCallTarget> TargetsForSlot,
       ArrayRef<ConstantInt *> Args);
   bool tryUniformRetValOpt(IntegerType *RetType,
-                           ArrayRef<VirtualCallTarget> TargetsForSlot,
+                           MutableArrayRef<VirtualCallTarget> TargetsForSlot,
                            MutableArrayRef<VirtualCallSite> CallSites);
   bool tryUniqueRetValOpt(unsigned BitWidth,
-                          ArrayRef<VirtualCallTarget> TargetsForSlot,
+                          MutableArrayRef<VirtualCallTarget> TargetsForSlot,
                           MutableArrayRef<VirtualCallSite> CallSites);
   bool tryVirtualConstProp(MutableArrayRef<VirtualCallTarget> TargetsForSlot,
                            ArrayRef<VirtualCallSite> CallSites);
@@ -291,10 +319,12 @@ struct DevirtModule {
 
 struct WholeProgramDevirt : public ModulePass {
   static char ID;
+
   WholeProgramDevirt() : ModulePass(ID) {
     initializeWholeProgramDevirtPass(*PassRegistry::getPassRegistry());
   }
-  bool runOnModule(Module &M) {
+
+  bool runOnModule(Module &M) override {
     if (skipModule(M))
       return false;
 
@@ -302,7 +332,7 @@ struct WholeProgramDevirt : public ModulePass {
   }
 };
 
-} // anonymous namespace
+} // end anonymous namespace
 
 INITIALIZE_PASS(WholeProgramDevirt, "wholeprogramdevirt",
                 "Whole program devirtualization", false, false)
@@ -353,6 +383,38 @@ void DevirtModule::buildTypeIdentifierMap(
   }
 }
 
+Constant *DevirtModule::getPointerAtOffset(Constant *I, uint64_t Offset) {
+  if (I->getType()->isPointerTy()) {
+    if (Offset == 0)
+      return I;
+    return nullptr;
+  }
+
+  const DataLayout &DL = M.getDataLayout();
+
+  if (auto *C = dyn_cast<ConstantStruct>(I)) {
+    const StructLayout *SL = DL.getStructLayout(C->getType());
+    if (Offset >= SL->getSizeInBytes())
+      return nullptr;
+
+    unsigned Op = SL->getElementContainingOffset(Offset);
+    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+                              Offset - SL->getElementOffset(Op));
+  }
+  if (auto *C = dyn_cast<ConstantArray>(I)) {
+    ArrayType *VTableTy = C->getType();
+    uint64_t ElemSize = DL.getTypeAllocSize(VTableTy->getElementType());
+
+    unsigned Op = Offset / ElemSize;
+    if (Op >= C->getNumOperands())
+      return nullptr;
+
+    return getPointerAtOffset(cast<Constant>(I->getOperand(Op)),
+                              Offset % ElemSize);
+  }
+  return nullptr;
+}
+
 bool DevirtModule::tryFindVirtualCallTargets(
     std::vector<VirtualCallTarget> &TargetsForSlot,
     const std::set<TypeMemberInfo> &TypeMemberInfos, uint64_t ByteOffset) {
@@ -360,22 +422,12 @@ bool DevirtModule::tryFindVirtualCallTargets(
     if (!TM.Bits->GV->isConstant())
       return false;
 
-    auto Init = dyn_cast<ConstantArray>(TM.Bits->GV->getInitializer());
-    if (!Init)
-      return false;
-    ArrayType *VTableTy = Init->getType();
-
-    uint64_t ElemSize =
-        M.getDataLayout().getTypeAllocSize(VTableTy->getElementType());
-    uint64_t GlobalSlotOffset = TM.Offset + ByteOffset;
-    if (GlobalSlotOffset % ElemSize != 0)
-      return false;
-
-    unsigned Op = GlobalSlotOffset / ElemSize;
-    if (Op >= Init->getNumOperands())
+    Constant *Ptr = getPointerAtOffset(TM.Bits->GV->getInitializer(),
+                                       TM.Offset + ByteOffset);
+    if (!Ptr)
       return false;
 
-    auto Fn = dyn_cast<Function>(Init->getOperand(Op)->stripPointerCasts());
+    auto Fn = dyn_cast<Function>(Ptr->stripPointerCasts());
     if (!Fn)
       return false;
 
@@ -392,7 +444,7 @@ bool DevirtModule::tryFindVirtualCallTargets(
 }
 
 bool DevirtModule::trySingleImplDevirt(
-    ArrayRef<VirtualCallTarget> TargetsForSlot,
+    MutableArrayRef<VirtualCallTarget> TargetsForSlot,
     MutableArrayRef<VirtualCallSite> CallSites) {
   // See if the program contains a single implementation of this virtual
   // function.
@@ -401,9 +453,12 @@ bool DevirtModule::trySingleImplDevirt(
     if (TheFn != Target.Fn)
       return false;
 
+  if (RemarksEnabled)
+    TargetsForSlot[0].WasDevirt = true;
   // If so, update each call site to call that implementation directly.
   for (auto &&VCallSite : CallSites) {
-    VCallSite.emitRemark();
+    if (RemarksEnabled)
+      VCallSite.emitRemark("single-impl", TheFn->getName());
     VCallSite.CS.setCalledFunction(ConstantExpr::getBitCast(
         TheFn, VCallSite.CS.getCalledValue()->getType()));
     // This use is no longer unsafe.
@@ -441,7 +496,7 @@ bool DevirtModule::tryEvaluateFunctionsWithArgs(
 }
 
 bool DevirtModule::tryUniformRetValOpt(
-    IntegerType *RetType, ArrayRef<VirtualCallTarget> TargetsForSlot,
+    IntegerType *RetType, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
     MutableArrayRef<VirtualCallSite> CallSites) {
   // Uniform return value optimization. If all functions return the same
   // constant, replace all calls with that constant.
@@ -452,16 +507,20 @@ bool DevirtModule::tryUniformRetValOpt(
 
   auto TheRetValConst = ConstantInt::get(RetType, TheRetVal);
   for (auto Call : CallSites)
-    Call.replaceAndErase(TheRetValConst);
+    Call.replaceAndErase("uniform-ret-val", TargetsForSlot[0].Fn->getName(),
+                         RemarksEnabled, TheRetValConst);
+  if (RemarksEnabled)
+    for (auto &&Target : TargetsForSlot)
+      Target.WasDevirt = true;
   return true;
 }
 
 bool DevirtModule::tryUniqueRetValOpt(
-    unsigned BitWidth, ArrayRef<VirtualCallTarget> TargetsForSlot,
+    unsigned BitWidth, MutableArrayRef<VirtualCallTarget> TargetsForSlot,
     MutableArrayRef<VirtualCallSite> CallSites) {
   // IsOne controls whether we look for a 0 or a 1.
   auto tryUniqueRetValOptFor = [&](bool IsOne) {
-    const TypeMemberInfo *UniqueMember = 0;
+    const TypeMemberInfo *UniqueMember = nullptr;
     for (const VirtualCallTarget &Target : TargetsForSlot) {
       if (Target.RetVal == (IsOne ? 1 : 0)) {
         if (UniqueMember)
@@ -481,8 +540,14 @@ bool DevirtModule::tryUniqueRetValOpt(
       OneAddr = B.CreateConstGEP1_64(OneAddr, UniqueMember->Offset);
       Value *Cmp = B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
                                 Call.VTable, OneAddr);
-      Call.replaceAndErase(Cmp);
+      Call.replaceAndErase("unique-ret-val", TargetsForSlot[0].Fn->getName(),
+                           RemarksEnabled, Cmp);
     }
+    // Update devirtualization statistics for targets.
+    if (RemarksEnabled)
+      for (auto &&Target : TargetsForSlot)
+        Target.WasDevirt = true;
+
     return true;
   };
 
@@ -590,6 +655,10 @@ bool DevirtModule::tryVirtualConstProp(
       setAfterReturnValues(TargetsForSlot, AllocAfter, BitWidth, OffsetByte,
                            OffsetBit);
 
+    if (RemarksEnabled)
+      for (auto &&Target : TargetsForSlot)
+        Target.WasDevirt = true;
+
     // Rewrite each call to a load from OffsetByte/OffsetBit.
     for (auto Call : CSByConstantArg.second) {
       IRBuilder<> B(Call.CS.getInstruction());
@@ -599,11 +668,15 @@ bool DevirtModule::tryVirtualConstProp(
         Value *Bit = ConstantInt::get(Int8Ty, 1ULL << OffsetBit);
         Value *BitsAndBit = B.CreateAnd(Bits, Bit);
         auto IsBitSet = B.CreateICmpNE(BitsAndBit, ConstantInt::get(Int8Ty, 0));
-        Call.replaceAndErase(IsBitSet);
+        Call.replaceAndErase("virtual-const-prop-1-bit",
+                             TargetsForSlot[0].Fn->getName(),
+                             RemarksEnabled, IsBitSet);
       } else {
         Value *ValAddr = B.CreateBitCast(Addr, RetType->getPointerTo());
         Value *Val = B.CreateLoad(RetType, ValAddr);
-        Call.replaceAndErase(Val);
+        Call.replaceAndErase("virtual-const-prop",
+                             TargetsForSlot[0].Fn->getName(),
+                             RemarksEnabled, Val);
       }
     }
   }
@@ -655,6 +728,15 @@ void DevirtModule::rebuildGlobal(VTableBits &B) {
   B.GV->eraseFromParent();
 }
 
+bool DevirtModule::areRemarksEnabled() {
+  const auto &FL = M.getFunctionList();
+  if (FL.empty())
+    return false;
+  const Function &Fn = FL.front();
+  auto DI = OptimizationRemark(DEBUG_TYPE, Fn, DebugLoc(), "");
+  return DI.isEnabled();
+}
+
 void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc,
                                      Function *AssumeFunc) {
   // Find all virtual calls via a virtual table pointer %p under an assumption
@@ -806,6 +888,7 @@ bool DevirtModule::run() {
 
   // For each (type, offset) pair:
   bool DidVirtualConstProp = false;
+  std::map<std::string, Function*> DevirtTargets;
   for (auto &S : CallSlots) {
     // Search each of the members of the type identifier for the virtual
     // function implementation at offset S.first.ByteOffset, and add to
@@ -815,10 +898,26 @@ bool DevirtModule::run() {
                                    S.first.ByteOffset))
       continue;
 
-    if (trySingleImplDevirt(TargetsForSlot, S.second))
-      continue;
+    if (!trySingleImplDevirt(TargetsForSlot, S.second) &&
+        tryVirtualConstProp(TargetsForSlot, S.second))
+        DidVirtualConstProp = true;
 
-    DidVirtualConstProp |= tryVirtualConstProp(TargetsForSlot, S.second);
+    // Collect functions devirtualized at least for one call site for stats.
+    if (RemarksEnabled)
+      for (const auto &T : TargetsForSlot)
+        if (T.WasDevirt)
+          DevirtTargets[T.Fn->getName()] = T.Fn;
+  }
+
+  if (RemarksEnabled) {
+    // Generate remarks for each devirtualized function.
+    for (const auto &DT : DevirtTargets) {
+      Function *F = DT.second;
+      DISubprogram *SP = F->getSubprogram();
+      DebugLoc DL = SP ? DebugLoc::get(SP->getScopeLine(), 0, SP) : DebugLoc();
+      emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F, DL,
+                             Twine("devirtualized ") + F->getName());
+    }
   }
 
   // If we were able to eliminate all unsafe uses for a type checked load,
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 221a22007173..3bbc70ab21c6 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -1035,7 +1035,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
    // (A*B)+(A*C) -> A*(B+C) etc
@@ -1047,6 +1047,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     // X + (signbit) --> X ^ signbit
     if (Val->isSignBit())
       return BinaryOperator::CreateXor(LHS, RHS);
+
+    // Is this add the last step in a convoluted sext?
+    Value *X;
+    const APInt *C;
+    if (match(LHS, m_ZExt(m_Xor(m_Value(X), m_APInt(C)))) &&
+        C->isMinSignedValue() &&
+        C->sext(LHS->getType()->getScalarSizeInBits()) == *Val) {
+      // add(zext(xor i16 X, -32768), -32768) --> sext X
+      return CastInst::Create(Instruction::SExt, X, LHS->getType());
+    }
   }
 
   // FIXME: Use the match above instead of dyn_cast to allow these transforms
@@ -1144,7 +1154,7 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   // A+B --> A|B iff A and B have no bits set in common.
-  if (haveNoCommonBitsSet(LHS, RHS, DL, AC, &I, DT))
+  if (haveNoCommonBitsSet(LHS, RHS, DL, &AC, &I, &DT))
     return BinaryOperator::CreateOr(LHS, RHS);
 
   if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
@@ -1216,15 +1226,16 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
   if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) {
     // (add (sext x), cst) --> (sext (add x, cst'))
     if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
-      Constant *CI =
-        ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
-      if (LHSConv->hasOneUse() &&
-          ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
-          WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
-        // Insert the new, smaller add.
-        Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0),
-                                              CI, "addconv");
-        return new SExtInst(NewAdd, I.getType());
+      if (LHSConv->hasOneUse()) {
+        Constant *CI =
+            ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+        if (ConstantExpr::getSExt(CI, I.getType()) == RHSC &&
+            WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI, I)) {
+          // Insert the new, smaller add.
+          Value *NewAdd =
+              Builder->CreateNSWAdd(LHSConv->getOperand(0), CI, "addconv");
+          return new SExtInst(NewAdd, I.getType());
+        }
       }
     }
 
@@ -1246,6 +1257,44 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) {
     }
   }
 
+  // Check for (add (zext x), y), see if we can merge this into an
+  // integer add followed by a zext.
+  if (auto *LHSConv = dyn_cast<ZExtInst>(LHS)) {
+    // (add (zext x), cst) --> (zext (add x, cst'))
+    if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) {
+      if (LHSConv->hasOneUse()) {
+        Constant *CI =
+            ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType());
+        if (ConstantExpr::getZExt(CI, I.getType()) == RHSC &&
+            computeOverflowForUnsignedAdd(LHSConv->getOperand(0), CI, &I) ==
+                OverflowResult::NeverOverflows) {
+          // Insert the new, smaller add.
+          Value *NewAdd =
+              Builder->CreateNUWAdd(LHSConv->getOperand(0), CI, "addconv");
+          return new ZExtInst(NewAdd, I.getType());
+        }
+      }
+    }
+
+    // (add (zext x), (zext y)) --> (zext (add int x, y))
+    if (auto *RHSConv = dyn_cast<ZExtInst>(RHS)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of zexts), and if the
+      // integer add will not overflow.
+      if (LHSConv->getOperand(0)->getType() ==
+              RHSConv->getOperand(0)->getType() &&
+          (LHSConv->hasOneUse() || RHSConv->hasOneUse()) &&
+          computeOverflowForUnsignedAdd(LHSConv->getOperand(0),
+                                        RHSConv->getOperand(0),
+                                        &I) == OverflowResult::NeverOverflows) {
+        // Insert the new integer add.
+        Value *NewAdd = Builder->CreateNUWAdd(
+            LHSConv->getOperand(0), RHSConv->getOperand(0), "addconv");
+        return new ZExtInst(NewAdd, I.getType());
+      }
+    }
+  }
+
   // (add (xor A, B) (and A, B)) --> (or A, B)
   {
     Value *A = nullptr, *B = nullptr;
@@ -1307,7 +1356,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V =
-          SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, TLI, DT, AC))
+          SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(RHS)) {
@@ -1483,7 +1532,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(),
-                                 I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+                                 I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // (A*B)-(A*C) -> A*(B-C) etc
@@ -1544,34 +1593,35 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
         return CastInst::CreateZExtOrBitCast(X, Op1->getType());
   }
 
-  if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) {
+  const APInt *Op0C;
+  if (match(Op0, m_APInt(Op0C))) {
+    unsigned BitWidth = I.getType()->getScalarSizeInBits();
+
     // -(X >>u 31) -> (X >>s 31)
     // -(X >>s 31) -> (X >>u 31)
-    if (C->isZero()) {
+    if (*Op0C == 0) {
       Value *X;
-      ConstantInt *CI;
-      if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) &&
-          // Verify we are shifting out everything but the sign bit.
-          CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
-        return BinaryOperator::CreateAShr(X, CI);
-
-      if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) &&
-          // Verify we are shifting out everything but the sign bit.
-          CI->getValue() == I.getType()->getPrimitiveSizeInBits() - 1)
-        return BinaryOperator::CreateLShr(X, CI);
+      const APInt *ShAmt;
+      if (match(Op1, m_LShr(m_Value(X), m_APInt(ShAmt))) &&
+          *ShAmt == BitWidth - 1) {
+        Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
+        return BinaryOperator::CreateAShr(X, ShAmtOp);
+      }
+      if (match(Op1, m_AShr(m_Value(X), m_APInt(ShAmt))) &&
+          *ShAmt == BitWidth - 1) {
+        Value *ShAmtOp = cast<Instruction>(Op1)->getOperand(1);
+        return BinaryOperator::CreateLShr(X, ShAmtOp);
+      }
     }
 
     // Turn this into a xor if LHS is 2^n-1 and the remaining bits are known
     // zero.
-    APInt IntVal = C->getValue();
-    if ((IntVal + 1).isPowerOf2()) {
-      unsigned BitWidth = I.getType()->getScalarSizeInBits();
+    if ((*Op0C + 1).isPowerOf2()) {
       APInt KnownZero(BitWidth, 0);
       APInt KnownOne(BitWidth, 0);
       computeKnownBits(&I, KnownZero, KnownOne, 0, &I);
-      if ((IntVal | KnownZero).isAllOnesValue()) {
-        return BinaryOperator::CreateXor(Op1, C);
-      }
+      if ((*Op0C | KnownZero).isAllOnesValue())
+        return BinaryOperator::CreateXor(Op1, Op0);
     }
   }
 
@@ -1632,6 +1682,17 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) {
       if (Value *XNeg = dyn_castNegVal(X))
         return BinaryOperator::CreateShl(XNeg, Y);
 
+    // Subtracting -1/0 is the same as adding 1/0:
+    // sub [nsw] Op0, sext(bool Y) -> add [nsw] Op0, zext(bool Y)
+    // 'nuw' is dropped in favor of the canonical form.
+    if (match(Op1, m_SExt(m_Value(Y))) &&
+        Y->getType()->getScalarSizeInBits() == 1) {
+      Value *Zext = Builder->CreateZExt(Y, I.getType());
+      BinaryOperator *Add = BinaryOperator::CreateAdd(Op0, Zext);
+      Add->setHasNoSignedWrap(I.hasNoSignedWrap());
+      return Add;
+    }
+
     // X - A*-B -> X + A*B
     // X - -A*B -> X + A*B
     Value *A, *B;
@@ -1682,7 +1743,7 @@ Instruction *InstCombiner::visitFSub(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V =
-          SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, TLI, DT, AC))
+          SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // fsub nsz 0, X ==> fsub nsz -0.0, X
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 1a6459b3d689..a59b43d6af5f 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -98,12 +98,11 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
   IntegerType *ITy = dyn_cast<IntegerType>(I.getType());
 
   // Can't do vectors.
-  if (I.getType()->isVectorTy()) return nullptr;
+  if (I.getType()->isVectorTy())
+    return nullptr;
 
   // Can only do bitwise ops.
-  unsigned Op = I.getOpcode();
-  if (Op != Instruction::And && Op != Instruction::Or &&
-      Op != Instruction::Xor)
+  if (!I.isBitwiseLogicOp())
     return nullptr;
 
   Value *OldLHS = I.getOperand(0);
@@ -132,14 +131,7 @@ Value *InstCombiner::SimplifyBSwap(BinaryOperator &I) {
   Value *NewRHS = IsBswapRHS ? IntrRHS->getOperand(0) :
                   Builder->getInt(ConstRHS->getValue().byteSwap());
 
-  Value *BinOp = nullptr;
-  if (Op == Instruction::And)
-    BinOp = Builder->CreateAnd(NewLHS, NewRHS);
-  else if (Op == Instruction::Or)
-    BinOp = Builder->CreateOr(NewLHS, NewRHS);
-  else //if (Op == Instruction::Xor)
-    BinOp = Builder->CreateXor(NewLHS, NewRHS);
-
+  Value *BinOp = Builder->CreateBinOp(I.getOpcode(), NewLHS, NewRHS);
   Function *F = Intrinsic::getDeclaration(I.getModule(), Intrinsic::bswap, ITy);
   return Builder->CreateCall(F, BinOp);
 }
@@ -283,51 +275,31 @@ Instruction *InstCombiner::OptAndOp(Instruction *Op,
 }
 
 /// Emit a computation of: (V >= Lo && V < Hi) if Inside is true, otherwise
-/// (V < Lo || V >= Hi).  In practice, we emit the more efficient
-/// (V-Lo) \<u Hi-Lo.  This method expects that Lo <= Hi. isSigned indicates
-/// whether to treat the V, Lo and HI as signed or not. IB is the location to
-/// insert new instructions.
-Value *InstCombiner::InsertRangeTest(Value *V, Constant *Lo, Constant *Hi,
+/// (V < Lo || V >= Hi). This method expects that Lo <= Hi. IsSigned indicates
+/// whether to treat V, Lo, and Hi as signed or not.
+Value *InstCombiner::insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
                                      bool isSigned, bool Inside) {
-  assert(cast<ConstantInt>(ConstantExpr::getICmp((isSigned ?
-            ICmpInst::ICMP_SLE:ICmpInst::ICMP_ULE), Lo, Hi))->getZExtValue() &&
+  assert((isSigned ? Lo.sle(Hi) : Lo.ule(Hi)) &&
          "Lo is not <= Hi in range emission code!");
 
-  if (Inside) {
-    if (Lo == Hi)  // Trivially false.
-      return Builder->getFalse();
-
-    // V >= Min && V < Hi --> V < Hi
-    if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-      ICmpInst::Predicate pred = (isSigned ?
-        ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT);
-      return Builder->CreateICmp(pred, V, Hi);
-    }
-
-    // Emit V-Lo <u Hi-Lo
-    Constant *NegLo = ConstantExpr::getNeg(Lo);
-    Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
-    Constant *UpperBound = ConstantExpr::getAdd(NegLo, Hi);
-    return Builder->CreateICmpULT(Add, UpperBound);
-  }
-
-  if (Lo == Hi)  // Trivially true.
-    return Builder->getTrue();
+  Type *Ty = V->getType();
+  if (Lo == Hi)
+    return Inside ? ConstantInt::getFalse(Ty) : ConstantInt::getTrue(Ty);
 
-  // V < Min || V >= Hi -> V > Hi-1
-  Hi = SubOne(cast<ConstantInt>(Hi));
-  if (cast<ConstantInt>(Lo)->isMinValue(isSigned)) {
-    ICmpInst::Predicate pred = (isSigned ?
-        ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT);
-    return Builder->CreateICmp(pred, V, Hi);
+  // V >= Min && V <  Hi --> V <  Hi
+  // V <  Min || V >= Hi --> V >= Hi
+  ICmpInst::Predicate Pred = Inside ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGE;
+  if (isSigned ? Lo.isMinSignedValue() : Lo.isMinValue()) {
+    Pred = isSigned ? ICmpInst::getSignedPredicate(Pred) : Pred;
+    return Builder->CreateICmp(Pred, V, ConstantInt::get(Ty, Hi));
   }
 
-  // Emit V-Lo >u Hi-1-Lo
-  // Note that Hi has already had one subtracted from it, above.
-  ConstantInt *NegLo = cast<ConstantInt>(ConstantExpr::getNeg(Lo));
-  Value *Add = Builder->CreateAdd(V, NegLo, V->getName()+".off");
-  Constant *LowerBound = ConstantExpr::getAdd(NegLo, Hi);
-  return Builder->CreateICmpUGT(Add, LowerBound);
+  // V >= Lo && V <  Hi --> V - Lo u<  Hi - Lo
+  // V <  Lo || V >= Hi --> V - Lo u>= Hi - Lo
+  Value *VMinusLo =
+      Builder->CreateSub(V, ConstantInt::get(Ty, Lo), V->getName() + ".off");
+  Constant *HiMinusLo = ConstantInt::get(Ty, Hi - Lo);
+  return Builder->CreateICmp(Pred, VMinusLo, HiMinusLo);
 }
 
 /// Returns true iff Val consists of one contiguous run of 1s with any number
@@ -524,53 +496,6 @@ static unsigned conjugateICmpMask(unsigned Mask) {
   return NewMask;
 }
 
-/// Decompose an icmp into the form ((X & Y) pred Z) if possible.
-/// The returned predicate is either == or !=. Returns false if
-/// decomposition fails.
-static bool decomposeBitTestICmp(const ICmpInst *I, ICmpInst::Predicate &Pred,
-                                 Value *&X, Value *&Y, Value *&Z) {
-  ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
-  if (!C)
-    return false;
-
-  switch (I->getPredicate()) {
-  default:
-    return false;
-  case ICmpInst::ICMP_SLT:
-    // X < 0 is equivalent to (X & SignBit) != 0.
-    if (!C->isZero())
-      return false;
-    Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
-    Pred = ICmpInst::ICMP_NE;
-    break;
-  case ICmpInst::ICMP_SGT:
-    // X > -1 is equivalent to (X & SignBit) == 0.
-    if (!C->isAllOnesValue())
-      return false;
-    Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
-    Pred = ICmpInst::ICMP_EQ;
-    break;
-  case ICmpInst::ICMP_ULT:
-    // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
-    if (!C->getValue().isPowerOf2())
-      return false;
-    Y = ConstantInt::get(I->getContext(), -C->getValue());
-    Pred = ICmpInst::ICMP_EQ;
-    break;
-  case ICmpInst::ICMP_UGT:
-    // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
-    if (!(C->getValue() + 1).isPowerOf2())
-      return false;
-    Y = ConstantInt::get(I->getContext(), ~C->getValue());
-    Pred = ICmpInst::ICMP_NE;
-    break;
-  }
-
-  X = I->getOperand(0);
-  Z = ConstantInt::getNullValue(C->getType());
-  return true;
-}
-
 /// Handle (icmp(A & B) ==/!= C) &/| (icmp(A & D) ==/!= E)
 /// Return the set of pattern classes (from MaskedICmpType)
 /// that both LHS and RHS satisfy.
@@ -1001,7 +926,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
       if (LHSCst == SubOne(RHSCst)) // (X != 13 & X u< 14) -> X < 13
         return Builder->CreateICmpULT(Val, LHSCst);
       if (LHSCst->isNullValue())    // (X !=  0 & X u< 14) -> X-1 u< 13
-        return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+        return insertRangeTest(Val, LHSCst->getValue() + 1, RHSCst->getValue(),
+                               false, true);
       break;                        // (X != 13 & X u< 15) -> no change
     case ICmpInst::ICMP_SLT:
       if (LHSCst == SubOne(RHSCst)) // (X != 13 & X s< 14) -> X < 13
@@ -1065,7 +991,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         return Builder->CreateICmp(LHSCC, Val, RHSCst);
       break;                        // (X u> 13 & X != 15) -> no change
     case ICmpInst::ICMP_ULT:        // (X u> 13 & X u< 15) -> (X-14) <u 1
-      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, false, true);
+      return insertRangeTest(Val, LHSCst->getValue() + 1, RHSCst->getValue(),
+                             false, true);
     case ICmpInst::ICMP_SLT:        // (X u> 13 & X s< 15) -> no change
       break;
     }
@@ -1083,7 +1010,8 @@ Value *InstCombiner::FoldAndOfICmps(ICmpInst *LHS, ICmpInst *RHS) {
         return Builder->CreateICmp(LHSCC, Val, RHSCst);
       break;                        // (X s> 13 & X != 15) -> no change
     case ICmpInst::ICMP_SLT:        // (X s> 13 & X s< 15) -> (X-14) s< 1
-      return InsertRangeTest(Val, AddOne(LHSCst), RHSCst, true, true);
+      return insertRangeTest(Val, LHSCst->getValue() + 1, RHSCst->getValue(),
+                             true, true);
     case ICmpInst::ICMP_ULT:        // (X s> 13 & X u< 15) -> no change
       break;
     }
@@ -1170,34 +1098,73 @@ static Instruction *matchDeMorgansLaws(BinaryOperator &I,
         return BinaryOperator::CreateNot(LogicOp);
       }
 
-  // De Morgan's Law in disguise:
-  // (zext(bool A) ^ 1) & (zext(bool B) ^ 1) -> zext(~(A | B))
-  // (zext(bool A) ^ 1) | (zext(bool B) ^ 1) -> zext(~(A & B))
-  Value *A = nullptr;
-  Value *B = nullptr;
-  ConstantInt *C1 = nullptr;
-  if (match(Op0, m_OneUse(m_Xor(m_ZExt(m_Value(A)), m_ConstantInt(C1)))) &&
-      match(Op1, m_OneUse(m_Xor(m_ZExt(m_Value(B)), m_Specific(C1))))) {
-    // TODO: This check could be loosened to handle different type sizes.
-    // Alternatively, we could fix the definition of m_Not to recognize a not
-    // operation hidden by a zext?
-    if (A->getType()->isIntegerTy(1) && B->getType()->isIntegerTy(1) &&
-        C1->isOne()) {
-      Value *LogicOp = Builder->CreateBinOp(Opcode, A, B,
-                                            I.getName() + ".demorgan");
-      Value *Not = Builder->CreateNot(LogicOp);
-      return CastInst::CreateZExtOrBitCast(Not, I.getType());
+  return nullptr;
+}
+
+bool InstCombiner::shouldOptimizeCast(CastInst *CI) {
+  Value *CastSrc = CI->getOperand(0);
+
+  // Noop casts and casts of constants should be eliminated trivially.
+  if (CI->getSrcTy() == CI->getDestTy() || isa<Constant>(CastSrc))
+    return false;
+
+  // If this cast is paired with another cast that can be eliminated, we prefer
+  // to have it eliminated.
+  if (const auto *PrecedingCI = dyn_cast<CastInst>(CastSrc))
+    if (isEliminableCastPair(PrecedingCI, CI))
+      return false;
+
+  // If this is a vector sext from a compare, then we don't want to break the
+  // idiom where each element of the extended vector is either zero or all ones.
+  if (CI->getOpcode() == Instruction::SExt &&
+      isa<CmpInst>(CastSrc) && CI->getDestTy()->isVectorTy())
+    return false;
+
+  return true;
+}
+
+/// Fold {and,or,xor} (cast X), C.
+static Instruction *foldLogicCastConstant(BinaryOperator &Logic, CastInst *Cast,
+                                          InstCombiner::BuilderTy *Builder) {
+  Constant *C;
+  if (!match(Logic.getOperand(1), m_Constant(C)))
+    return nullptr;
+
+  auto LogicOpc = Logic.getOpcode();
+  Type *DestTy = Logic.getType();
+  Type *SrcTy = Cast->getSrcTy();
+
+  // If the first operand is bitcast, move the logic operation ahead of the
+  // bitcast (do the logic operation in the original type). This can eliminate
+  // bitcasts and allow combines that would otherwise be impeded by the bitcast.
+  Value *X;
+  if (match(Cast, m_BitCast(m_Value(X)))) {
+    Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy);
+    Value *NewOp = Builder->CreateBinOp(LogicOpc, X, NewConstant);
+    return CastInst::CreateBitOrPointerCast(NewOp, DestTy);
+  }
+
+  // Similarly, move the logic operation ahead of a zext if the constant is
+  // unchanged in the smaller source type. Performing the logic in a smaller
+  // type may provide more information to later folds, and the smaller logic
+  // instruction may be cheaper (particularly in the case of vectors).
+  if (match(Cast, m_OneUse(m_ZExt(m_Value(X))))) {
+    Constant *TruncC = ConstantExpr::getTrunc(C, SrcTy);
+    Constant *ZextTruncC = ConstantExpr::getZExt(TruncC, DestTy);
+    if (ZextTruncC == C) {
+      // LogicOpc (zext X), C --> zext (LogicOpc X, C)
+      Value *NewOp = Builder->CreateBinOp(LogicOpc, X, TruncC);
+      return new ZExtInst(NewOp, DestTy);
     }
   }
 
   return nullptr;
 }
 
+/// Fold {and,or,xor} (cast X), Y.
 Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
   auto LogicOpc = I.getOpcode();
-  assert((LogicOpc == Instruction::And || LogicOpc == Instruction::Or ||
-          LogicOpc == Instruction::Xor) &&
-         "Unexpected opcode for bitwise logic folding");
+  assert(I.isBitwiseLogicOp() && "Unexpected opcode for bitwise logic folding");
 
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
   CastInst *Cast0 = dyn_cast<CastInst>(Op0);
@@ -1211,18 +1178,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
   if (!SrcTy->isIntOrIntVectorTy())
     return nullptr;
 
-  // If one operand is a bitcast and the other is a constant, move the logic
-  // operation ahead of the bitcast. That is, do the logic operation in the
-  // original type. This can eliminate useless bitcasts and allow normal
-  // combines that would otherwise be impeded by the bitcast. Canonicalization
-  // ensures that if there is a constant operand, it will be the second operand.
-  Value *BC = nullptr;
-  Constant *C = nullptr;
-  if ((match(Op0, m_BitCast(m_Value(BC))) && match(Op1, m_Constant(C)))) {
-    Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy);
-    Value *NewOp = Builder->CreateBinOp(LogicOpc, BC, NewConstant, I.getName());
-    return CastInst::CreateBitOrPointerCast(NewOp, DestTy);
-  }
+  if (Instruction *Ret = foldLogicCastConstant(I, Cast0, Builder))
+    return Ret;
 
   CastInst *Cast1 = dyn_cast<CastInst>(Op1);
   if (!Cast1)
@@ -1237,12 +1194,8 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
   Value *Cast0Src = Cast0->getOperand(0);
   Value *Cast1Src = Cast1->getOperand(0);
 
-  // fold (logic (cast A), (cast B)) -> (cast (logic A, B))
-
-  // Only do this if the casts both really cause code to be generated.
-  if ((!isa<ICmpInst>(Cast0Src) || !isa<ICmpInst>(Cast1Src)) &&
-      ShouldOptimizeCast(CastOpcode, Cast0Src, DestTy) &&
-      ShouldOptimizeCast(CastOpcode, Cast1Src, DestTy)) {
+  // fold logic(cast(A), cast(B)) -> cast(logic(A, B))
+  if (shouldOptimizeCast(Cast0) && shouldOptimizeCast(Cast1)) {
     Value *NewOp = Builder->CreateBinOp(LogicOpc, Cast0Src, Cast1Src,
                                         I.getName());
     return CastInst::Create(CastOpcode, NewOp, DestTy);
@@ -1301,10 +1254,13 @@ static Instruction *foldBoolSextMaskToSelect(BinaryOperator &I) {
     Value *Zero = Constant::getNullValue(Op0->getType());
     return SelectInst::Create(X, Zero, Op1);
   }
-  
+
   return nullptr;
 }
 
+// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
+// here. We should standardize that construct where it is needed or choose some
+// other way to ensure that commutated variants of patterns are not missed.
 Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -1312,7 +1268,7 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyAndInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyAndInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // (A|B)&(A|C) -> A|(B&C) etc
@@ -1503,8 +1459,9 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(A, B);
 
     // ((~A) ^ B) & (A | B) -> (A & B)
+    // ((~A) ^ B) & (B | A) -> (A & B)
     if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
-        match(Op1, m_Or(m_Specific(A), m_Specific(B))))
+        match(Op1, m_c_Or(m_Specific(A), m_Specific(B))))
       return BinaryOperator::CreateAnd(A, B);
   }
 
@@ -1697,17 +1654,17 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       Value *Mask = nullptr;
       Value *Masked = nullptr;
       if (LAnd->getOperand(0) == RAnd->getOperand(0) &&
-          isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, AC, CxtI,
-                                 DT) &&
-          isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, AC, CxtI,
-                                 DT)) {
+          isKnownToBeAPowerOfTwo(LAnd->getOperand(1), DL, false, 0, &AC, CxtI,
+                                 &DT) &&
+          isKnownToBeAPowerOfTwo(RAnd->getOperand(1), DL, false, 0, &AC, CxtI,
+                                 &DT)) {
         Mask = Builder->CreateOr(LAnd->getOperand(1), RAnd->getOperand(1));
         Masked = Builder->CreateAnd(LAnd->getOperand(0), Mask);
       } else if (LAnd->getOperand(1) == RAnd->getOperand(1) &&
-                 isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, AC,
-                                        CxtI, DT) &&
-                 isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, AC,
-                                        CxtI, DT)) {
+                 isKnownToBeAPowerOfTwo(LAnd->getOperand(0), DL, false, 0, &AC,
+                                        CxtI, &DT) &&
+                 isKnownToBeAPowerOfTwo(RAnd->getOperand(0), DL, false, 0, &AC,
+                                        CxtI, &DT)) {
         Mask = Builder->CreateOr(LAnd->getOperand(0), RAnd->getOperand(0));
         Masked = Builder->CreateAnd(LAnd->getOperand(1), Mask);
       }
@@ -1825,7 +1782,7 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   // E.g. (icmp sgt x, n) | (icmp slt x, 0) --> icmp ugt x, n
   if (Value *V = simplifyRangeCheck(RHS, LHS, /*Inverted=*/true))
     return V;
- 
+
   // This only handles icmp of constants: (icmp1 A, C1) | (icmp2 B, C2).
   if (!LHSCst || !RHSCst) return nullptr;
 
@@ -1943,7 +1900,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       // this can cause overflow.
       if (RHSCst->isMaxValue(false))
         return LHS;
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), false, false);
+      return insertRangeTest(Val, LHSCst->getValue(), RHSCst->getValue() + 1,
+                             false, false);
     case ICmpInst::ICMP_SGT:        // (X u< 13 | X s> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X u< 13 | X != 15) -> X != 15
@@ -1963,7 +1921,8 @@ Value *InstCombiner::FoldOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
       // this can cause overflow.
       if (RHSCst->isMaxValue(true))
         return LHS;
-      return InsertRangeTest(Val, LHSCst, AddOne(RHSCst), true, false);
+      return insertRangeTest(Val, LHSCst->getValue(), RHSCst->getValue() + 1,
+                             true, false);
     case ICmpInst::ICMP_UGT:        // (X s< 13 | X u> 15) -> no change
       break;
     case ICmpInst::ICMP_NE:         // (X s< 13 | X != 15) -> X != 15
@@ -2119,6 +2078,9 @@ Instruction *InstCombiner::FoldXorWithConstants(BinaryOperator &I, Value *Op,
   return nullptr;
 }
 
+// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
+// here. We should standardize that construct where it is needed or choose some
+// other way to ensure that commutated variants of patterns are not missed.
 Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -2126,7 +2088,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyOrInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyOrInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // (A&B)|(A&C) -> A&(B|C) etc
@@ -2208,14 +2170,17 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
       match(Op1, m_Not(m_Specific(A))))
     return BinaryOperator::CreateOr(Builder->CreateNot(A), B);
 
-  // (A & (~B)) | (A ^ B) -> (A ^ B)
-  if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+  // (A & ~B) | (A ^ B) -> (A ^ B)
+  // (~B & A) | (A ^ B) -> (A ^ B)
+  if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
       match(Op1, m_Xor(m_Specific(A), m_Specific(B))))
     return BinaryOperator::CreateXor(A, B);
 
-  // (A ^ B) | ( A & (~B)) -> (A ^ B)
-  if (match(Op0, m_Xor(m_Value(A), m_Value(B))) &&
-      match(Op1, m_And(m_Specific(A), m_Not(m_Specific(B)))))
+  // Commute the 'or' operands.
+  // (A ^ B) | (A & ~B) -> (A ^ B)
+  // (A ^ B) | (~B & A) -> (A ^ B)
+  if (match(Op1, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+      match(Op0, m_Xor(m_Specific(A), m_Specific(B))))
     return BinaryOperator::CreateXor(A, B);
 
   // (A & C)|(B & D)
@@ -2385,14 +2350,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
         return BinaryOperator::CreateOr(Not, Op0);
       }
 
-  // (A & B) | ((~A) ^ B) -> (~A ^ B)
-  if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
-      match(Op1, m_Xor(m_Not(m_Specific(A)), m_Specific(B))))
-    return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
-
-  // ((~A) ^ B) | (A & B) -> (~A ^ B)
-  if (match(Op0, m_Xor(m_Not(m_Value(A)), m_Value(B))) &&
-      match(Op1, m_And(m_Specific(A), m_Specific(B))))
+  // (A & B) | (~A ^ B) -> (~A ^ B)
+  // (A & B) | (B ^ ~A) -> (~A ^ B)
+  // (B & A) | (~A ^ B) -> (~A ^ B)
+  // (B & A) | (B ^ ~A) -> (~A ^ B)
+  // The match order is important: match the xor first because the 'not'
+  // operation defines 'A'. We do not need to match the xor as Op0 because the
+  // xor was canonicalized to Op1 above.
+  if (match(Op1, m_c_Xor(m_Not(m_Value(A)), m_Value(B))) &&
+      match(Op0, m_c_And(m_Specific(A), m_Specific(B))))
     return BinaryOperator::CreateXor(Builder->CreateNot(A), B);
 
   if (SwappedForXor)
@@ -2472,6 +2438,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
   return Changed ? &I : nullptr;
 }
 
+// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
+// here. We should standardize that construct where it is needed or choose some
+// other way to ensure that commutated variants of patterns are not missed.
 Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   bool Changed = SimplifyAssociativeOrCommutative(I);
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@@ -2479,7 +2448,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyXorInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyXorInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // (A&B)^(A&C) -> A&(B^C) etc
@@ -2694,20 +2663,22 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
         return BinaryOperator::CreateXor(A, B);
     }
     // (A | ~B) ^ (~A | B) -> A ^ B
-    if (match(Op0I, m_Or(m_Value(A), m_Not(m_Value(B)))) &&
-        match(Op1I, m_Or(m_Not(m_Specific(A)), m_Specific(B)))) {
+    // (~B | A) ^ (~A | B) -> A ^ B
+    if (match(Op0I, m_c_Or(m_Value(A), m_Not(m_Value(B)))) &&
+        match(Op1I, m_Or(m_Not(m_Specific(A)), m_Specific(B))))
       return BinaryOperator::CreateXor(A, B);
-    }
+
     // (~A | B) ^ (A | ~B) -> A ^ B
     if (match(Op0I, m_Or(m_Not(m_Value(A)), m_Value(B))) &&
         match(Op1I, m_Or(m_Specific(A), m_Not(m_Specific(B))))) {
       return BinaryOperator::CreateXor(A, B);
     }
     // (A & ~B) ^ (~A & B) -> A ^ B
-    if (match(Op0I, m_And(m_Value(A), m_Not(m_Value(B)))) &&
-        match(Op1I, m_And(m_Not(m_Specific(A)), m_Specific(B)))) {
+    // (~B & A) ^ (~A & B) -> A ^ B
+    if (match(Op0I, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
+        match(Op1I, m_And(m_Not(m_Specific(A)), m_Specific(B))))
       return BinaryOperator::CreateXor(A, B);
-    }
+
     // (~A & B) ^ (A & ~B) -> A ^ B
     if (match(Op0I, m_And(m_Not(m_Value(A)), m_Value(B))) &&
         match(Op1I, m_And(m_Specific(A), m_Not(m_Specific(B))))) {
@@ -2743,9 +2714,10 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
       return BinaryOperator::CreateOr(A, B);
   }
 
-  Value *A = nullptr, *B = nullptr;
-  // (A & ~B) ^ (~A) -> ~(A & B)
-  if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
+  // (A & ~B) ^ ~A -> ~(A & B)
+  // (~B & A) ^ ~A -> ~(A & B)
+  Value *A, *B;
+  if (match(Op0, m_c_And(m_Value(A), m_Not(m_Value(B)))) &&
       match(Op1, m_Not(m_Specific(A))))
     return BinaryOperator::CreateNot(Builder->CreateAnd(A, B));
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 8acff91345d6..92369bd70b13 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -12,17 +12,47 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/None.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/InstructionSimplify.h"
-#include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Statepoint.h"
-#include "llvm/Transforms/Utils/BuildLibCalls.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/ValueHandle.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <cstring>
+#include <vector>
+
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -79,8 +109,8 @@ static Constant *getNegativeIsTrueBoolVec(ConstantDataVector *V) {
 }
 
 Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
-  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, AC, DT);
-  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, AC, DT);
+  unsigned DstAlign = getKnownAlignment(MI->getArgOperand(0), DL, MI, &AC, &DT);
+  unsigned SrcAlign = getKnownAlignment(MI->getArgOperand(1), DL, MI, &AC, &DT);
   unsigned MinAlign = std::min(DstAlign, SrcAlign);
   unsigned CopyAlign = MI->getAlignment();
 
@@ -162,10 +192,17 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
   L->setAlignment(SrcAlign);
   if (CopyMD)
     L->setMetadata(LLVMContext::MD_tbaa, CopyMD);
+  MDNode *LoopMemParallelMD =
+    MI->getMetadata(LLVMContext::MD_mem_parallel_loop_access);
+  if (LoopMemParallelMD)
+    L->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
+
   StoreInst *S = Builder->CreateStore(L, Dest, MI->isVolatile());
   S->setAlignment(DstAlign);
   if (CopyMD)
     S->setMetadata(LLVMContext::MD_tbaa, CopyMD);
+  if (LoopMemParallelMD)
+    S->setMetadata(LLVMContext::MD_mem_parallel_loop_access, LoopMemParallelMD);
 
   // Set the size of the copy to 0, it will be deleted on the next iteration.
   MI->setArgOperand(2, Constant::getNullValue(MemOpLength->getType()));
@@ -173,7 +210,7 @@ Instruction *InstCombiner::SimplifyMemTransfer(MemIntrinsic *MI) {
 }
 
 Instruction *InstCombiner::SimplifyMemSet(MemSetInst *MI) {
-  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, AC, DT);
+  unsigned Alignment = getKnownAlignment(MI->getDest(), DL, MI, &AC, &DT);
   if (MI->getAlignment() < Alignment) {
     MI->setAlignment(ConstantInt::get(MI->getAlignmentType(),
                                              Alignment, false));
@@ -221,8 +258,7 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
   bool ShiftLeft = false;
 
   switch (II.getIntrinsicID()) {
-  default:
-    return nullptr;
+  default: llvm_unreachable("Unexpected intrinsic!");
   case Intrinsic::x86_sse2_psra_d:
   case Intrinsic::x86_sse2_psra_w:
   case Intrinsic::x86_sse2_psrai_d:
@@ -231,6 +267,16 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
   case Intrinsic::x86_avx2_psra_w:
   case Intrinsic::x86_avx2_psrai_d:
   case Intrinsic::x86_avx2_psrai_w:
+  case Intrinsic::x86_avx512_psra_q_128:
+  case Intrinsic::x86_avx512_psrai_q_128:
+  case Intrinsic::x86_avx512_psra_q_256:
+  case Intrinsic::x86_avx512_psrai_q_256:
+  case Intrinsic::x86_avx512_psra_d_512:
+  case Intrinsic::x86_avx512_psra_q_512:
+  case Intrinsic::x86_avx512_psra_w_512:
+  case Intrinsic::x86_avx512_psrai_d_512:
+  case Intrinsic::x86_avx512_psrai_q_512:
+  case Intrinsic::x86_avx512_psrai_w_512:
     LogicalShift = false; ShiftLeft = false;
     break;
   case Intrinsic::x86_sse2_psrl_d:
@@ -245,6 +291,12 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
   case Intrinsic::x86_avx2_psrli_d:
   case Intrinsic::x86_avx2_psrli_q:
   case Intrinsic::x86_avx2_psrli_w:
+  case Intrinsic::x86_avx512_psrl_d_512:
+  case Intrinsic::x86_avx512_psrl_q_512:
+  case Intrinsic::x86_avx512_psrl_w_512:
+  case Intrinsic::x86_avx512_psrli_d_512:
+  case Intrinsic::x86_avx512_psrli_q_512:
+  case Intrinsic::x86_avx512_psrli_w_512:
     LogicalShift = true; ShiftLeft = false;
     break;
   case Intrinsic::x86_sse2_psll_d:
@@ -259,6 +311,12 @@ static Value *simplifyX86immShift(const IntrinsicInst &II,
   case Intrinsic::x86_avx2_pslli_d:
   case Intrinsic::x86_avx2_pslli_q:
   case Intrinsic::x86_avx2_pslli_w:
+  case Intrinsic::x86_avx512_psll_d_512:
+  case Intrinsic::x86_avx512_psll_q_512:
+  case Intrinsic::x86_avx512_psll_w_512:
+  case Intrinsic::x86_avx512_pslli_d_512:
+  case Intrinsic::x86_avx512_pslli_q_512:
+  case Intrinsic::x86_avx512_pslli_w_512:
     LogicalShift = true; ShiftLeft = true;
     break;
   }
@@ -334,10 +392,16 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
   bool ShiftLeft = false;
 
   switch (II.getIntrinsicID()) {
-  default:
-    return nullptr;
+  default: llvm_unreachable("Unexpected intrinsic!");
   case Intrinsic::x86_avx2_psrav_d:
   case Intrinsic::x86_avx2_psrav_d_256:
+  case Intrinsic::x86_avx512_psrav_q_128:
+  case Intrinsic::x86_avx512_psrav_q_256:
+  case Intrinsic::x86_avx512_psrav_d_512:
+  case Intrinsic::x86_avx512_psrav_q_512:
+  case Intrinsic::x86_avx512_psrav_w_128:
+  case Intrinsic::x86_avx512_psrav_w_256:
+  case Intrinsic::x86_avx512_psrav_w_512:
     LogicalShift = false;
     ShiftLeft = false;
     break;
@@ -345,6 +409,11 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
   case Intrinsic::x86_avx2_psrlv_d_256:
   case Intrinsic::x86_avx2_psrlv_q:
   case Intrinsic::x86_avx2_psrlv_q_256:
+  case Intrinsic::x86_avx512_psrlv_d_512:
+  case Intrinsic::x86_avx512_psrlv_q_512:
+  case Intrinsic::x86_avx512_psrlv_w_128:
+  case Intrinsic::x86_avx512_psrlv_w_256:
+  case Intrinsic::x86_avx512_psrlv_w_512:
     LogicalShift = true;
     ShiftLeft = false;
     break;
@@ -352,6 +421,11 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
   case Intrinsic::x86_avx2_psllv_d_256:
   case Intrinsic::x86_avx2_psllv_q:
   case Intrinsic::x86_avx2_psllv_q_256:
+  case Intrinsic::x86_avx512_psllv_d_512:
+  case Intrinsic::x86_avx512_psllv_q_512:
+  case Intrinsic::x86_avx512_psllv_w_128:
+  case Intrinsic::x86_avx512_psllv_w_256:
+  case Intrinsic::x86_avx512_psllv_w_512:
     LogicalShift = true;
     ShiftLeft = true;
     break;
@@ -400,7 +474,7 @@ static Value *simplifyX86varShift(const IntrinsicInst &II,
   // If all elements out of range or UNDEF, return vector of zeros/undefs.
   // ArithmeticShift should only hit this if they are all UNDEF.
   auto OutOfRange = [&](int Idx) { return (Idx < 0) || (BitWidth <= Idx); };
-  if (llvm::all_of(ShiftAmts, OutOfRange)) {
+  if (all_of(ShiftAmts, OutOfRange)) {
     SmallVector<Constant *, 8> ConstantVec;
     for (int Idx : ShiftAmts) {
       if (Idx < 0) {
@@ -547,7 +621,7 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
   // See if we're dealing with constant values.
   Constant *C0 = dyn_cast<Constant>(Op0);
   ConstantInt *CI0 =
-      C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+      C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
          : nullptr;
 
   // Attempt to constant fold.
@@ -630,7 +704,6 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
 static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
                                  APInt APLength, APInt APIndex,
                                  InstCombiner::BuilderTy &Builder) {
-
   // From AMD documentation: "The bit index and field length are each six bits
   // in length other bits of the field are ignored."
   APIndex = APIndex.zextOrTrunc(6);
@@ -686,10 +759,10 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
   Constant *C0 = dyn_cast<Constant>(Op0);
   Constant *C1 = dyn_cast<Constant>(Op1);
   ConstantInt *CI00 =
-      C0 ? dyn_cast<ConstantInt>(C0->getAggregateElement((unsigned)0))
+      C0 ? dyn_cast_or_null<ConstantInt>(C0->getAggregateElement((unsigned)0))
          : nullptr;
   ConstantInt *CI10 =
-      C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+      C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
          : nullptr;
 
   // Constant Fold - insert bottom Length bits starting at the Index'th bit.
@@ -732,11 +805,11 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
   auto *VecTy = cast<VectorType>(II.getType());
   auto *MaskEltTy = Type::getInt32Ty(II.getContext());
   unsigned NumElts = VecTy->getNumElements();
-  assert((NumElts == 16 || NumElts == 32) &&
+  assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
          "Unexpected number of elements in shuffle mask!");
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[32] = {NULL};
+  Constant *Indexes[64] = {nullptr};
 
   // Each byte in the shuffle control mask forms an index to permute the
   // corresponding byte in the destination operand.
@@ -776,12 +849,15 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
   if (!V)
     return nullptr;
 
+  auto *VecTy = cast<VectorType>(II.getType());
   auto *MaskEltTy = Type::getInt32Ty(II.getContext());
-  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-  assert(NumElts == 8 || NumElts == 4 || NumElts == 2);
+  unsigned NumElts = VecTy->getVectorNumElements();
+  bool IsPD = VecTy->getScalarType()->isDoubleTy();
+  unsigned NumLaneElts = IsPD ? 2 : 4;
+  assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[8] = {NULL};
+  Constant *Indexes[16] = {nullptr};
 
   // The intrinsics only read one or two bits, clear the rest.
   for (unsigned I = 0; I < NumElts; ++I) {
@@ -799,18 +875,13 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
 
     // The PD variants uses bit 1 to select per-lane element index, so
     // shift down to convert to generic shuffle mask index.
-    if (II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd ||
-        II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256)
+    if (IsPD)
       Index = Index.lshr(1);
 
     // The _256 variants are a bit trickier since the mask bits always index
     // into the corresponding 128 half. In order to convert to a generic
     // shuffle, we have to make that explicit.
-    if ((II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_ps_256 ||
-         II.getIntrinsicID() == Intrinsic::x86_avx_vpermilvar_pd_256) &&
-        ((NumElts / 2) <= I)) {
-      Index += APInt(32, NumElts / 2);
-    }
+    Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
 
     Indexes[I] = ConstantInt::get(MaskEltTy, Index);
   }
@@ -831,10 +902,11 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
   auto *VecTy = cast<VectorType>(II.getType());
   auto *MaskEltTy = Type::getInt32Ty(II.getContext());
   unsigned Size = VecTy->getNumElements();
-  assert(Size == 8 && "Unexpected shuffle mask size");
+  assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
+         "Unexpected shuffle mask size");
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[8] = {NULL};
+  Constant *Indexes[64] = {nullptr};
 
   for (unsigned I = 0; I < Size; ++I) {
     Constant *COp = V->getAggregateElement(I);
@@ -846,8 +918,8 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
       continue;
     }
 
-    APInt Index = cast<ConstantInt>(COp)->getValue();
-    Index = Index.zextOrTrunc(32).getLoBits(3);
+    uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
+    Index &= Size - 1;
     Indexes[I] = ConstantInt::get(MaskEltTy, Index);
   }
 
@@ -962,6 +1034,36 @@ static Value *simplifyX86vpcom(const IntrinsicInst &II,
   return nullptr;
 }
 
+// Emit a select instruction and appropriate bitcasts to help simplify
+// masked intrinsics.
+static Value *emitX86MaskSelect(Value *Mask, Value *Op0, Value *Op1,
+                                InstCombiner::BuilderTy &Builder) {
+  unsigned VWidth = Op0->getType()->getVectorNumElements();
+
+  // If the mask is all ones we don't need the select. But we need to check
+  // only the bit thats will be used in case VWidth is less than 8.
+  if (auto *C = dyn_cast<ConstantInt>(Mask))
+    if (C->getValue().zextOrTrunc(VWidth).isAllOnesValue())
+      return Op0;
+
+  auto *MaskTy = VectorType::get(Builder.getInt1Ty(),
+                         cast<IntegerType>(Mask->getType())->getBitWidth());
+  Mask = Builder.CreateBitCast(Mask, MaskTy);
+
+  // If we have less than 8 elements, then the starting mask was an i8 and
+  // we need to extract down to the right number of elements.
+  if (VWidth < 8) {
+    uint32_t Indices[4];
+    for (unsigned i = 0; i != VWidth; ++i)
+      Indices[i] = i;
+    Mask = Builder.CreateShuffleVector(Mask, Mask,
+                                       makeArrayRef(Indices, VWidth),
+                                       "extract");
+  }
+
+  return Builder.CreateSelect(Mask, Op0, Op1);
+}
+
 static Value *simplifyMinnumMaxnum(const IntrinsicInst &II) {
   Value *Arg0 = II.getArgOperand(0);
   Value *Arg1 = II.getArgOperand(1);
@@ -1104,6 +1206,50 @@ static Instruction *simplifyMaskedScatter(IntrinsicInst &II, InstCombiner &IC) {
   return nullptr;
 }
 
+static Instruction *foldCttzCtlz(IntrinsicInst &II, InstCombiner &IC) {
+  assert((II.getIntrinsicID() == Intrinsic::cttz ||
+          II.getIntrinsicID() == Intrinsic::ctlz) &&
+         "Expected cttz or ctlz intrinsic");
+  Value *Op0 = II.getArgOperand(0);
+  // FIXME: Try to simplify vectors of integers.
+  auto *IT = dyn_cast<IntegerType>(Op0->getType());
+  if (!IT)
+    return nullptr;
+
+  unsigned BitWidth = IT->getBitWidth();
+  APInt KnownZero(BitWidth, 0);
+  APInt KnownOne(BitWidth, 0);
+  IC.computeKnownBits(Op0, KnownZero, KnownOne, 0, &II);
+
+  // Create a mask for bits above (ctlz) or below (cttz) the first known one.
+  bool IsTZ = II.getIntrinsicID() == Intrinsic::cttz;
+  unsigned NumMaskBits = IsTZ ? KnownOne.countTrailingZeros()
+                              : KnownOne.countLeadingZeros();
+  APInt Mask = IsTZ ? APInt::getLowBitsSet(BitWidth, NumMaskBits)
+                    : APInt::getHighBitsSet(BitWidth, NumMaskBits);
+
+  // If all bits above (ctlz) or below (cttz) the first known one are known
+  // zero, this value is constant.
+  // FIXME: This should be in InstSimplify because we're replacing an
+  // instruction with a constant.
+  if ((Mask & KnownZero) == Mask) {
+    auto *C = ConstantInt::get(IT, APInt(BitWidth, NumMaskBits));
+    return IC.replaceInstUsesWith(II, C);
+  }
+
+  // If the input to cttz/ctlz is known to be non-zero,
+  // then change the 'ZeroIsUndef' parameter to 'true'
+  // because we know the zero behavior can't affect the result.
+  if (KnownOne != 0 || isKnownNonZero(Op0, IC.getDataLayout())) {
+    if (!match(II.getArgOperand(1), m_One())) {
+      II.setOperand(1, IC.Builder->getTrue());
+      return &II;
+    }
+  }
+
+  return nullptr;
+}
+
 // TODO: If the x86 backend knew how to convert a bool vector mask back to an
 // XMM register mask efficiently, we could transform all x86 masked intrinsics
 // to LLVM masked intrinsics and remove the x86 masked intrinsic defs.
@@ -1243,16 +1389,15 @@ Instruction *InstCombiner::visitVACopyInst(VACopyInst &I) {
 Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   auto Args = CI.arg_operands();
   if (Value *V = SimplifyCall(CI.getCalledValue(), Args.begin(), Args.end(), DL,
-                              TLI, DT, AC))
+                              &TLI, &DT, &AC))
     return replaceInstUsesWith(CI, V);
 
-  if (isFreeCall(&CI, TLI))
+  if (isFreeCall(&CI, &TLI))
     return visitFree(CI);
 
   // If the caller function is nounwind, mark the call as nounwind, even if the
   // callee isn't.
-  if (CI.getParent()->getParent()->doesNotThrow() &&
-      !CI.doesNotThrow()) {
+  if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) {
     CI.setDoesNotThrow();
     return &CI;
   }
@@ -1323,26 +1468,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     APInt DemandedElts = APInt::getLowBitsSet(Width, DemandedWidth);
     return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
   };
-  auto SimplifyDemandedVectorEltsHigh = [this](Value *Op, unsigned Width,
-                                              unsigned DemandedWidth) {
-    APInt UndefElts(Width, 0);
-    APInt DemandedElts = APInt::getHighBitsSet(Width, DemandedWidth);
-    return SimplifyDemandedVectorElts(Op, DemandedElts, UndefElts);
-  };
 
   switch (II->getIntrinsicID()) {
   default: break;
-  case Intrinsic::objectsize: {
-    uint64_t Size;
-    if (getObjectSize(II->getArgOperand(0), Size, DL, TLI)) {
-      APInt APSize(II->getType()->getIntegerBitWidth(), Size);
-      // Equality check to be sure that `Size` can fit in a value of type
-      // `II->getType()`
-      if (APSize == Size)
-        return replaceInstUsesWith(CI, ConstantInt::get(II->getType(), APSize));
-    }
+  case Intrinsic::objectsize:
+    if (ConstantInt *N =
+            lowerObjectSizeCall(II, DL, &TLI, /*MustSucceed=*/false))
+      return replaceInstUsesWith(CI, N);
     return nullptr;
-  }
+
   case Intrinsic::bswap: {
     Value *IIOperand = II->getArgOperand(0);
     Value *X = nullptr;
@@ -1397,41 +1531,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
                                           II->getArgOperand(0));
     }
     break;
-  case Intrinsic::cttz: {
-    // If all bits below the first known one are known zero,
-    // this value is constant.
-    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
-    // FIXME: Try to simplify vectors of integers.
-    if (!IT) break;
-    uint32_t BitWidth = IT->getBitWidth();
-    APInt KnownZero(BitWidth, 0);
-    APInt KnownOne(BitWidth, 0);
-    computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
-    unsigned TrailingZeros = KnownOne.countTrailingZeros();
-    APInt Mask(APInt::getLowBitsSet(BitWidth, TrailingZeros));
-    if ((Mask & KnownZero) == Mask)
-      return replaceInstUsesWith(CI, ConstantInt::get(IT,
-                                 APInt(BitWidth, TrailingZeros)));
 
-    }
-    break;
-  case Intrinsic::ctlz: {
-    // If all bits above the first known one are known zero,
-    // this value is constant.
-    IntegerType *IT = dyn_cast<IntegerType>(II->getArgOperand(0)->getType());
-    // FIXME: Try to simplify vectors of integers.
-    if (!IT) break;
-    uint32_t BitWidth = IT->getBitWidth();
-    APInt KnownZero(BitWidth, 0);
-    APInt KnownOne(BitWidth, 0);
-    computeKnownBits(II->getArgOperand(0), KnownZero, KnownOne, 0, II);
-    unsigned LeadingZeros = KnownOne.countLeadingZeros();
-    APInt Mask(APInt::getHighBitsSet(BitWidth, LeadingZeros));
-    if ((Mask & KnownZero) == Mask)
-      return replaceInstUsesWith(CI, ConstantInt::get(IT,
-                                 APInt(BitWidth, LeadingZeros)));
-
-    }
+  case Intrinsic::cttz:
+  case Intrinsic::ctlz:
+    if (auto *I = foldCttzCtlz(*II, *this))
+      return I;
     break;
 
   case Intrinsic::uadd_with_overflow:
@@ -1446,7 +1550,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       II->setArgOperand(1, LHS);
       return II;
     }
-    // fall through
+    LLVM_FALLTHROUGH;
 
   case Intrinsic::usub_with_overflow:
   case Intrinsic::ssub_with_overflow: {
@@ -1480,8 +1584,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_altivec_lvx:
   case Intrinsic::ppc_altivec_lvxl:
     // Turn PPC lvx -> load if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
-        16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
+                                   &DT) >= 16) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
@@ -1497,8 +1601,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::ppc_altivec_stvx:
   case Intrinsic::ppc_altivec_stvxl:
     // Turn stvx -> store if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
-        16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
+                                   &DT) >= 16) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -1514,8 +1618,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   }
   case Intrinsic::ppc_qpx_qvlfs:
     // Turn PPC QPX qvlfs -> load if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, AC, DT) >=
-        16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 16, DL, II, &AC,
+                                   &DT) >= 16) {
       Type *VTy = VectorType::get(Builder->getFloatTy(),
                                   II->getType()->getVectorNumElements());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
@@ -1526,8 +1630,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::ppc_qpx_qvlfd:
     // Turn PPC QPX qvlfd -> load if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, AC, DT) >=
-        32) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(0), 32, DL, II, &AC,
+                                   &DT) >= 32) {
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(0),
                                          PointerType::getUnqual(II->getType()));
       return new LoadInst(Ptr);
@@ -1535,8 +1639,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::ppc_qpx_qvstfs:
     // Turn PPC QPX qvstfs -> store if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, AC, DT) >=
-        16) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 16, DL, II, &AC,
+                                   &DT) >= 16) {
       Type *VTy = VectorType::get(Builder->getFloatTy(),
           II->getArgOperand(0)->getType()->getVectorNumElements());
       Value *TOp = Builder->CreateFPTrunc(II->getArgOperand(0), VTy);
@@ -1547,8 +1651,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::ppc_qpx_qvstfd:
     // Turn PPC QPX qvstfd -> store if the pointer is known aligned.
-    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, AC, DT) >=
-        32) {
+    if (getOrEnforceKnownAlignment(II->getArgOperand(1), 32, DL, II, &AC,
+                                   &DT) >= 32) {
       Type *OpPtrTy =
         PointerType::getUnqual(II->getArgOperand(0)->getType());
       Value *Ptr = Builder->CreateBitCast(II->getArgOperand(1), OpPtrTy);
@@ -1607,7 +1711,23 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_cvtsd2si:
   case Intrinsic::x86_sse2_cvtsd2si64:
   case Intrinsic::x86_sse2_cvttsd2si:
-  case Intrinsic::x86_sse2_cvttsd2si64: {
+  case Intrinsic::x86_sse2_cvttsd2si64:
+  case Intrinsic::x86_avx512_vcvtss2si32:
+  case Intrinsic::x86_avx512_vcvtss2si64:
+  case Intrinsic::x86_avx512_vcvtss2usi32:
+  case Intrinsic::x86_avx512_vcvtss2usi64:
+  case Intrinsic::x86_avx512_vcvtsd2si32:
+  case Intrinsic::x86_avx512_vcvtsd2si64:
+  case Intrinsic::x86_avx512_vcvtsd2usi32:
+  case Intrinsic::x86_avx512_vcvtsd2usi64:
+  case Intrinsic::x86_avx512_cvttss2si:
+  case Intrinsic::x86_avx512_cvttss2si64:
+  case Intrinsic::x86_avx512_cvttss2usi:
+  case Intrinsic::x86_avx512_cvttss2usi64:
+  case Intrinsic::x86_avx512_cvttsd2si:
+  case Intrinsic::x86_avx512_cvttsd2si64:
+  case Intrinsic::x86_avx512_cvttsd2usi:
+  case Intrinsic::x86_avx512_cvttsd2usi64: {
     // These intrinsics only demand the 0th element of their input vectors. If
     // we can simplify the input based on that, do so now.
     Value *Arg = II->getArgOperand(0);
@@ -1654,7 +1774,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_ucomigt_sd:
   case Intrinsic::x86_sse2_ucomile_sd:
   case Intrinsic::x86_sse2_ucomilt_sd:
-  case Intrinsic::x86_sse2_ucomineq_sd: {
+  case Intrinsic::x86_sse2_ucomineq_sd:
+  case Intrinsic::x86_avx512_vcomi_ss:
+  case Intrinsic::x86_avx512_vcomi_sd:
+  case Intrinsic::x86_avx512_mask_cmp_ss:
+  case Intrinsic::x86_avx512_mask_cmp_sd: {
     // These intrinsics only demand the 0th element of their input vectors. If
     // we can simplify the input based on that, do so now.
     bool MadeChange = false;
@@ -1674,50 +1798,155 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
-  case Intrinsic::x86_sse_add_ss:
-  case Intrinsic::x86_sse_sub_ss:
-  case Intrinsic::x86_sse_mul_ss:
-  case Intrinsic::x86_sse_div_ss:
+  case Intrinsic::x86_avx512_mask_add_ps_512:
+  case Intrinsic::x86_avx512_mask_div_ps_512:
+  case Intrinsic::x86_avx512_mask_mul_ps_512:
+  case Intrinsic::x86_avx512_mask_sub_ps_512:
+  case Intrinsic::x86_avx512_mask_add_pd_512:
+  case Intrinsic::x86_avx512_mask_div_pd_512:
+  case Intrinsic::x86_avx512_mask_mul_pd_512:
+  case Intrinsic::x86_avx512_mask_sub_pd_512:
+    // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
+    // IR operations.
+    if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
+      if (R->getValue() == 4) {
+        Value *Arg0 = II->getArgOperand(0);
+        Value *Arg1 = II->getArgOperand(1);
+
+        Value *V;
+        switch (II->getIntrinsicID()) {
+        default: llvm_unreachable("Case stmts out of sync!");
+        case Intrinsic::x86_avx512_mask_add_ps_512:
+        case Intrinsic::x86_avx512_mask_add_pd_512:
+          V = Builder->CreateFAdd(Arg0, Arg1);
+          break;
+        case Intrinsic::x86_avx512_mask_sub_ps_512:
+        case Intrinsic::x86_avx512_mask_sub_pd_512:
+          V = Builder->CreateFSub(Arg0, Arg1);
+          break;
+        case Intrinsic::x86_avx512_mask_mul_ps_512:
+        case Intrinsic::x86_avx512_mask_mul_pd_512:
+          V = Builder->CreateFMul(Arg0, Arg1);
+          break;
+        case Intrinsic::x86_avx512_mask_div_ps_512:
+        case Intrinsic::x86_avx512_mask_div_pd_512:
+          V = Builder->CreateFDiv(Arg0, Arg1);
+          break;
+        }
+
+        // Create a select for the masking.
+        V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
+                              *Builder);
+        return replaceInstUsesWith(*II, V);
+      }
+    }
+    break;
+
+  case Intrinsic::x86_avx512_mask_add_ss_round:
+  case Intrinsic::x86_avx512_mask_div_ss_round:
+  case Intrinsic::x86_avx512_mask_mul_ss_round:
+  case Intrinsic::x86_avx512_mask_sub_ss_round:
+  case Intrinsic::x86_avx512_mask_add_sd_round:
+  case Intrinsic::x86_avx512_mask_div_sd_round:
+  case Intrinsic::x86_avx512_mask_mul_sd_round:
+  case Intrinsic::x86_avx512_mask_sub_sd_round:
+    // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
+    // IR operations.
+    if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
+      if (R->getValue() == 4) {
+        // Extract the element as scalars.
+        Value *Arg0 = II->getArgOperand(0);
+        Value *Arg1 = II->getArgOperand(1);
+        Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
+        Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
+
+        Value *V;
+        switch (II->getIntrinsicID()) {
+        default: llvm_unreachable("Case stmts out of sync!");
+        case Intrinsic::x86_avx512_mask_add_ss_round:
+        case Intrinsic::x86_avx512_mask_add_sd_round:
+          V = Builder->CreateFAdd(LHS, RHS);
+          break;
+        case Intrinsic::x86_avx512_mask_sub_ss_round:
+        case Intrinsic::x86_avx512_mask_sub_sd_round:
+          V = Builder->CreateFSub(LHS, RHS);
+          break;
+        case Intrinsic::x86_avx512_mask_mul_ss_round:
+        case Intrinsic::x86_avx512_mask_mul_sd_round:
+          V = Builder->CreateFMul(LHS, RHS);
+          break;
+        case Intrinsic::x86_avx512_mask_div_ss_round:
+        case Intrinsic::x86_avx512_mask_div_sd_round:
+          V = Builder->CreateFDiv(LHS, RHS);
+          break;
+        }
+
+        // Handle the masking aspect of the intrinsic.
+        Value *Mask = II->getArgOperand(3);
+        auto *C = dyn_cast<ConstantInt>(Mask);
+        // We don't need a select if we know the mask bit is a 1.
+        if (!C || !C->getValue()[0]) {
+          // Cast the mask to an i1 vector and then extract the lowest element.
+          auto *MaskTy = VectorType::get(Builder->getInt1Ty(),
+                             cast<IntegerType>(Mask->getType())->getBitWidth());
+          Mask = Builder->CreateBitCast(Mask, MaskTy);
+          Mask = Builder->CreateExtractElement(Mask, (uint64_t)0);
+          // Extract the lowest element from the passthru operand.
+          Value *Passthru = Builder->CreateExtractElement(II->getArgOperand(2),
+                                                          (uint64_t)0);
+          V = Builder->CreateSelect(Mask, V, Passthru);
+        }
+
+        // Insert the result back into the original argument 0.
+        V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
+
+        return replaceInstUsesWith(*II, V);
+      }
+    }
+    LLVM_FALLTHROUGH;
+
+  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
+  case Intrinsic::x86_avx512_mask_max_ss_round:
+  case Intrinsic::x86_avx512_mask_min_ss_round:
+  case Intrinsic::x86_avx512_mask_max_sd_round:
+  case Intrinsic::x86_avx512_mask_min_sd_round:
+  case Intrinsic::x86_avx512_mask_vfmadd_ss:
+  case Intrinsic::x86_avx512_mask_vfmadd_sd:
+  case Intrinsic::x86_avx512_maskz_vfmadd_ss:
+  case Intrinsic::x86_avx512_maskz_vfmadd_sd:
+  case Intrinsic::x86_avx512_mask3_vfmadd_ss:
+  case Intrinsic::x86_avx512_mask3_vfmadd_sd:
+  case Intrinsic::x86_avx512_mask3_vfmsub_ss:
+  case Intrinsic::x86_avx512_mask3_vfmsub_sd:
+  case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
+  case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
+  case Intrinsic::x86_fma_vfmadd_ss:
+  case Intrinsic::x86_fma_vfmsub_ss:
+  case Intrinsic::x86_fma_vfnmadd_ss:
+  case Intrinsic::x86_fma_vfnmsub_ss:
+  case Intrinsic::x86_fma_vfmadd_sd:
+  case Intrinsic::x86_fma_vfmsub_sd:
+  case Intrinsic::x86_fma_vfnmadd_sd:
+  case Intrinsic::x86_fma_vfnmsub_sd:
+  case Intrinsic::x86_sse_cmp_ss:
   case Intrinsic::x86_sse_min_ss:
   case Intrinsic::x86_sse_max_ss:
-  case Intrinsic::x86_sse_cmp_ss:
-  case Intrinsic::x86_sse2_add_sd:
-  case Intrinsic::x86_sse2_sub_sd:
-  case Intrinsic::x86_sse2_mul_sd:
-  case Intrinsic::x86_sse2_div_sd:
+  case Intrinsic::x86_sse2_cmp_sd:
   case Intrinsic::x86_sse2_min_sd:
   case Intrinsic::x86_sse2_max_sd:
-  case Intrinsic::x86_sse2_cmp_sd: {
-    // These intrinsics only demand the lowest element of the second input
-    // vector.
-    Value *Arg1 = II->getArgOperand(1);
-    unsigned VWidth = Arg1->getType()->getVectorNumElements();
-    if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
-      II->setArgOperand(1, V);
-      return II;
-    }
-    break;
-  }
-
   case Intrinsic::x86_sse41_round_ss:
-  case Intrinsic::x86_sse41_round_sd: {
-    // These intrinsics demand the upper elements of the first input vector and
-    // the lowest element of the second input vector.
-    bool MadeChange = false;
-    Value *Arg0 = II->getArgOperand(0);
-    Value *Arg1 = II->getArgOperand(1);
-    unsigned VWidth = Arg0->getType()->getVectorNumElements();
-    if (Value *V = SimplifyDemandedVectorEltsHigh(Arg0, VWidth, VWidth - 1)) {
-      II->setArgOperand(0, V);
-      MadeChange = true;
-    }
-    if (Value *V = SimplifyDemandedVectorEltsLow(Arg1, VWidth, 1)) {
-      II->setArgOperand(1, V);
-      MadeChange = true;
-    }
-    if (MadeChange)
-      return II;
-    break;
+  case Intrinsic::x86_sse41_round_sd:
+  case Intrinsic::x86_xop_vfrcz_ss:
+  case Intrinsic::x86_xop_vfrcz_sd: {
+   unsigned VWidth = II->getType()->getVectorNumElements();
+   APInt UndefElts(VWidth, 0);
+   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+   if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+     if (V != II)
+       return replaceInstUsesWith(*II, V);
+     return II;
+   }
+   break;
   }
 
   // Constant fold ashr( <A x Bi>, Ci ).
@@ -1727,18 +1956,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_psrai_w:
   case Intrinsic::x86_avx2_psrai_d:
   case Intrinsic::x86_avx2_psrai_w:
+  case Intrinsic::x86_avx512_psrai_q_128:
+  case Intrinsic::x86_avx512_psrai_q_256:
+  case Intrinsic::x86_avx512_psrai_d_512:
+  case Intrinsic::x86_avx512_psrai_q_512:
+  case Intrinsic::x86_avx512_psrai_w_512:
   case Intrinsic::x86_sse2_psrli_d:
   case Intrinsic::x86_sse2_psrli_q:
   case Intrinsic::x86_sse2_psrli_w:
   case Intrinsic::x86_avx2_psrli_d:
   case Intrinsic::x86_avx2_psrli_q:
   case Intrinsic::x86_avx2_psrli_w:
+  case Intrinsic::x86_avx512_psrli_d_512:
+  case Intrinsic::x86_avx512_psrli_q_512:
+  case Intrinsic::x86_avx512_psrli_w_512:
   case Intrinsic::x86_sse2_pslli_d:
   case Intrinsic::x86_sse2_pslli_q:
   case Intrinsic::x86_sse2_pslli_w:
   case Intrinsic::x86_avx2_pslli_d:
   case Intrinsic::x86_avx2_pslli_q:
   case Intrinsic::x86_avx2_pslli_w:
+  case Intrinsic::x86_avx512_pslli_d_512:
+  case Intrinsic::x86_avx512_pslli_q_512:
+  case Intrinsic::x86_avx512_pslli_w_512:
     if (Value *V = simplifyX86immShift(*II, *Builder))
       return replaceInstUsesWith(*II, V);
     break;
@@ -1747,18 +1987,29 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_sse2_psra_w:
   case Intrinsic::x86_avx2_psra_d:
   case Intrinsic::x86_avx2_psra_w:
+  case Intrinsic::x86_avx512_psra_q_128:
+  case Intrinsic::x86_avx512_psra_q_256:
+  case Intrinsic::x86_avx512_psra_d_512:
+  case Intrinsic::x86_avx512_psra_q_512:
+  case Intrinsic::x86_avx512_psra_w_512:
   case Intrinsic::x86_sse2_psrl_d:
   case Intrinsic::x86_sse2_psrl_q:
   case Intrinsic::x86_sse2_psrl_w:
   case Intrinsic::x86_avx2_psrl_d:
   case Intrinsic::x86_avx2_psrl_q:
   case Intrinsic::x86_avx2_psrl_w:
+  case Intrinsic::x86_avx512_psrl_d_512:
+  case Intrinsic::x86_avx512_psrl_q_512:
+  case Intrinsic::x86_avx512_psrl_w_512:
   case Intrinsic::x86_sse2_psll_d:
   case Intrinsic::x86_sse2_psll_q:
   case Intrinsic::x86_sse2_psll_w:
   case Intrinsic::x86_avx2_psll_d:
   case Intrinsic::x86_avx2_psll_q:
-  case Intrinsic::x86_avx2_psll_w: {
+  case Intrinsic::x86_avx2_psll_w:
+  case Intrinsic::x86_avx512_psll_d_512:
+  case Intrinsic::x86_avx512_psll_q_512:
+  case Intrinsic::x86_avx512_psll_w_512: {
     if (Value *V = simplifyX86immShift(*II, *Builder))
       return replaceInstUsesWith(*II, V);
 
@@ -1780,16 +2031,50 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::x86_avx2_psllv_d_256:
   case Intrinsic::x86_avx2_psllv_q:
   case Intrinsic::x86_avx2_psllv_q_256:
+  case Intrinsic::x86_avx512_psllv_d_512:
+  case Intrinsic::x86_avx512_psllv_q_512:
+  case Intrinsic::x86_avx512_psllv_w_128:
+  case Intrinsic::x86_avx512_psllv_w_256:
+  case Intrinsic::x86_avx512_psllv_w_512:
   case Intrinsic::x86_avx2_psrav_d:
   case Intrinsic::x86_avx2_psrav_d_256:
+  case Intrinsic::x86_avx512_psrav_q_128:
+  case Intrinsic::x86_avx512_psrav_q_256:
+  case Intrinsic::x86_avx512_psrav_d_512:
+  case Intrinsic::x86_avx512_psrav_q_512:
+  case Intrinsic::x86_avx512_psrav_w_128:
+  case Intrinsic::x86_avx512_psrav_w_256:
+  case Intrinsic::x86_avx512_psrav_w_512:
   case Intrinsic::x86_avx2_psrlv_d:
   case Intrinsic::x86_avx2_psrlv_d_256:
   case Intrinsic::x86_avx2_psrlv_q:
   case Intrinsic::x86_avx2_psrlv_q_256:
+  case Intrinsic::x86_avx512_psrlv_d_512:
+  case Intrinsic::x86_avx512_psrlv_q_512:
+  case Intrinsic::x86_avx512_psrlv_w_128:
+  case Intrinsic::x86_avx512_psrlv_w_256:
+  case Intrinsic::x86_avx512_psrlv_w_512:
     if (Value *V = simplifyX86varShift(*II, *Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
+  case Intrinsic::x86_sse2_pmulu_dq:
+  case Intrinsic::x86_sse41_pmuldq:
+  case Intrinsic::x86_avx2_pmul_dq:
+  case Intrinsic::x86_avx2_pmulu_dq:
+  case Intrinsic::x86_avx512_pmul_dq_512:
+  case Intrinsic::x86_avx512_pmulu_dq_512: {
+    unsigned VWidth = II->getType()->getVectorNumElements();
+    APInt UndefElts(VWidth, 0);
+    APInt DemandedElts = APInt::getAllOnesValue(VWidth);
+    if (Value *V = SimplifyDemandedVectorElts(II, DemandedElts, UndefElts)) {
+      if (V != II)
+        return replaceInstUsesWith(*II, V);
+      return II;
+    }
+    break;
+  }
+
   case Intrinsic::x86_sse41_insertps:
     if (Value *V = simplifyX86insertps(*II, *Builder))
       return replaceInstUsesWith(*II, V);
@@ -1807,10 +2092,10 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // See if we're dealing with constant values.
     Constant *C1 = dyn_cast<Constant>(Op1);
     ConstantInt *CILength =
-        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)0))
+        C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)0))
            : nullptr;
     ConstantInt *CIIndex =
-        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+        C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
            : nullptr;
 
     // Attempt to simplify to a constant, shuffle vector or EXTRQI call.
@@ -1870,7 +2155,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     // See if we're dealing with constant values.
     Constant *C1 = dyn_cast<Constant>(Op1);
     ConstantInt *CI11 =
-        C1 ? dyn_cast<ConstantInt>(C1->getAggregateElement((unsigned)1))
+        C1 ? dyn_cast_or_null<ConstantInt>(C1->getAggregateElement((unsigned)1))
            : nullptr;
 
     // Attempt to simplify to a constant, shuffle vector or INSERTQI call.
@@ -1964,14 +2249,17 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
   case Intrinsic::x86_ssse3_pshuf_b_128:
   case Intrinsic::x86_avx2_pshuf_b:
+  case Intrinsic::x86_avx512_pshuf_b_512:
     if (Value *V = simplifyX86pshufb(*II, *Builder))
       return replaceInstUsesWith(*II, V);
     break;
 
   case Intrinsic::x86_avx_vpermilvar_ps:
   case Intrinsic::x86_avx_vpermilvar_ps_256:
+  case Intrinsic::x86_avx512_vpermilvar_ps_512:
   case Intrinsic::x86_avx_vpermilvar_pd:
   case Intrinsic::x86_avx_vpermilvar_pd_256:
+  case Intrinsic::x86_avx512_vpermilvar_pd_512:
     if (Value *V = simplifyX86vpermilvar(*II, *Builder))
       return replaceInstUsesWith(*II, V);
     break;
@@ -1982,6 +2270,28 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
       return replaceInstUsesWith(*II, V);
     break;
 
+  case Intrinsic::x86_avx512_mask_permvar_df_256:
+  case Intrinsic::x86_avx512_mask_permvar_df_512:
+  case Intrinsic::x86_avx512_mask_permvar_di_256:
+  case Intrinsic::x86_avx512_mask_permvar_di_512:
+  case Intrinsic::x86_avx512_mask_permvar_hi_128:
+  case Intrinsic::x86_avx512_mask_permvar_hi_256:
+  case Intrinsic::x86_avx512_mask_permvar_hi_512:
+  case Intrinsic::x86_avx512_mask_permvar_qi_128:
+  case Intrinsic::x86_avx512_mask_permvar_qi_256:
+  case Intrinsic::x86_avx512_mask_permvar_qi_512:
+  case Intrinsic::x86_avx512_mask_permvar_sf_256:
+  case Intrinsic::x86_avx512_mask_permvar_sf_512:
+  case Intrinsic::x86_avx512_mask_permvar_si_256:
+  case Intrinsic::x86_avx512_mask_permvar_si_512:
+    if (Value *V = simplifyX86vpermv(*II, *Builder)) {
+      // We simplified the permuting, now create a select for the masking.
+      V = emitX86MaskSelect(II->getArgOperand(3), V, II->getArgOperand(2),
+                            *Builder);
+      return replaceInstUsesWith(*II, V);
+    }
+    break;
+
   case Intrinsic::x86_avx_vperm2f128_pd_256:
   case Intrinsic::x86_avx_vperm2f128_ps_256:
   case Intrinsic::x86_avx_vperm2f128_si_256:
@@ -2104,7 +2414,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
   case Intrinsic::arm_neon_vst2lane:
   case Intrinsic::arm_neon_vst3lane:
   case Intrinsic::arm_neon_vst4lane: {
-    unsigned MemAlign = getKnownAlignment(II->getArgOperand(0), DL, II, AC, DT);
+    unsigned MemAlign =
+        getKnownAlignment(II->getArgOperand(0), DL, II, &AC, &DT);
     unsigned AlignArg = II->getNumArgOperands() - 1;
     ConstantInt *IntrAlign = dyn_cast<ConstantInt>(II->getArgOperand(AlignArg));
     if (IntrAlign && IntrAlign->getZExtValue() < MemAlign) {
@@ -2194,6 +2505,85 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
 
     break;
   }
+  case Intrinsic::amdgcn_class: {
+    enum  {
+      S_NAN = 1 << 0,        // Signaling NaN
+      Q_NAN = 1 << 1,        // Quiet NaN
+      N_INFINITY = 1 << 2,   // Negative infinity
+      N_NORMAL = 1 << 3,     // Negative normal
+      N_SUBNORMAL = 1 << 4,  // Negative subnormal
+      N_ZERO = 1 << 5,       // Negative zero
+      P_ZERO = 1 << 6,       // Positive zero
+      P_SUBNORMAL = 1 << 7,  // Positive subnormal
+      P_NORMAL = 1 << 8,     // Positive normal
+      P_INFINITY = 1 << 9    // Positive infinity
+    };
+
+    const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
+      N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL | P_NORMAL | P_INFINITY;
+
+    Value *Src0 = II->getArgOperand(0);
+    Value *Src1 = II->getArgOperand(1);
+    const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
+    if (!CMask) {
+      if (isa<UndefValue>(Src0))
+        return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+
+      if (isa<UndefValue>(Src1))
+        return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
+      break;
+    }
+
+    uint32_t Mask = CMask->getZExtValue();
+
+    // If all tests are made, it doesn't matter what the value is.
+    if ((Mask & FullMask) == FullMask)
+      return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), true));
+
+    if ((Mask & FullMask) == 0)
+      return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), false));
+
+    if (Mask == (S_NAN | Q_NAN)) {
+      // Equivalent of isnan. Replace with standard fcmp.
+      Value *FCmp = Builder->CreateFCmpUNO(Src0, Src0);
+      FCmp->takeName(II);
+      return replaceInstUsesWith(*II, FCmp);
+    }
+
+    const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
+    if (!CVal) {
+      if (isa<UndefValue>(Src0))
+        return replaceInstUsesWith(*II, UndefValue::get(II->getType()));
+
+      // Clamp mask to used bits
+      if ((Mask & FullMask) != Mask) {
+        CallInst *NewCall = Builder->CreateCall(II->getCalledFunction(),
+          { Src0, ConstantInt::get(Src1->getType(), Mask & FullMask) }
+        );
+
+        NewCall->takeName(II);
+        return replaceInstUsesWith(*II, NewCall);
+      }
+
+      break;
+    }
+
+    const APFloat &Val = CVal->getValueAPF();
+
+    bool Result =
+      ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
+      ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
+      ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
+      ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
+      ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
+      ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
+      ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
+      ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
+      ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
+      ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
+
+    return replaceInstUsesWith(*II, ConstantInt::get(II->getType(), Result));
+  }
   case Intrinsic::stackrestore: {
     // If the save is right next to the restore, remove the restore.  This can
     // happen when variable allocas are DCE'd.
@@ -2243,6 +2633,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
   case Intrinsic::lifetime_start:
+    // Asan needs to poison memory to detect invalid access which is possible
+    // even for empty lifetime range.
+    if (II->getFunction()->hasFnAttribute(Attribute::SanitizeAddress))
+      break;
+
     if (removeTriviallyEmptyRange(*II, Intrinsic::lifetime_start,
                                   Intrinsic::lifetime_end, *this))
       return nullptr;
@@ -2283,7 +2678,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
           RHS->getType()->isPointerTy() &&
           cast<Constant>(RHS)->isNullValue()) {
         LoadInst* LI = cast<LoadInst>(LHS);
-        if (isValidAssumeForContext(II, LI, DT)) {
+        if (isValidAssumeForContext(II, LI, &DT)) {
           MDNode *MD = MDNode::get(II->getContext(), None);
           LI->setMetadata(LLVMContext::MD_nonnull, MD);
           return eraseInstFromFunction(*II);
@@ -2329,7 +2724,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         return replaceInstUsesWith(*II, ConstantPointerNull::get(PT));
 
       // isKnownNonNull -> nonnull attribute
-      if (isKnownNonNullAt(DerivedPtr, II, DT))
+      if (isKnownNonNullAt(DerivedPtr, II, &DT))
         II->addAttribute(AttributeSet::ReturnIndex, Attribute::NonNull);
     }
 
@@ -2389,7 +2784,7 @@ Instruction *InstCombiner::tryOptimizeCall(CallInst *CI) {
   auto InstCombineRAUW = [this](Instruction *From, Value *With) {
     replaceInstUsesWith(*From, With);
   };
-  LibCallSimplifier Simplifier(DL, TLI, InstCombineRAUW);
+  LibCallSimplifier Simplifier(DL, &TLI, InstCombineRAUW);
   if (Value *With = Simplifier.optimizeCall(CI)) {
     ++NumSimplified;
     return CI->use_empty() ? CI : replaceInstUsesWith(*CI, With);
@@ -2477,8 +2872,7 @@ static IntrinsicInst *findInitTrampoline(Value *Callee) {
 
 /// Improvements for call and invoke instructions.
 Instruction *InstCombiner::visitCallSite(CallSite CS) {
-
-  if (isAllocLikeFn(CS.getInstruction(), TLI))
+  if (isAllocLikeFn(CS.getInstruction(), &TLI))
     return visitAllocSite(*CS.getInstruction());
 
   bool Changed = false;
@@ -2492,7 +2886,7 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
   for (Value *V : CS.args()) {
     if (V->getType()->isPointerTy() &&
         !CS.paramHasAttr(ArgNo + 1, Attribute::NonNull) &&
-        isKnownNonNullAt(V, CS.getInstruction(), DT))
+        isKnownNonNullAt(V, CS.getInstruction(), &DT))
       Indices.push_back(ArgNo + 1);
     ArgNo++;
   }
@@ -2613,14 +3007,14 @@ Instruction *InstCombiner::visitCallSite(CallSite CS) {
 /// If the callee is a constexpr cast of a function, attempt to move the cast to
 /// the arguments of the call/invoke.
 bool InstCombiner::transformConstExprCastCall(CallSite CS) {
-  Function *Callee =
-    dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
+  auto *Callee = dyn_cast<Function>(CS.getCalledValue()->stripPointerCasts());
   if (!Callee)
     return false;
-  // The prototype of thunks are a lie, don't try to directly call such
-  // functions.
+
+  // The prototype of a thunk is a lie. Don't directly call such a function.
   if (Callee->hasFnAttribute("thunk"))
     return false;
+
   Instruction *Caller = CS.getInstruction();
   const AttributeSet &CallerPAL = CS.getAttributes();
 
@@ -2842,8 +3236,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
     CallInst *CI = cast<CallInst>(Caller);
     NC = Builder->CreateCall(Callee, Args, OpBundles);
     NC->takeName(CI);
-    if (CI->isTailCall())
-      cast<CallInst>(NC)->setTailCall();
+    cast<CallInst>(NC)->setTailCallKind(CI->getTailCallKind());
     cast<CallInst>(NC)->setCallingConv(CI->getCallingConv());
     cast<CallInst>(NC)->setAttributes(NewCallerPAL);
   }
@@ -2966,7 +3359,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
           ++Idx;
           ++I;
-        } while (1);
+        } while (true);
       }
 
       // Add any function attributes.
@@ -3001,7 +3394,7 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
 
           ++Idx;
           ++I;
-        } while (1);
+        } while (true);
       }
 
       // Replace the trampoline call with a direct call.  Let the generic
@@ -3027,10 +3420,10 @@ InstCombiner::transformCallThroughTrampoline(CallSite CS,
         cast<InvokeInst>(NewCaller)->setAttributes(NewPAL);
       } else {
         NewCaller = CallInst::Create(NewCallee, NewArgs, OpBundles);
-        if (cast<CallInst>(Caller)->isTailCall())
-          cast<CallInst>(NewCaller)->setTailCall();
-        cast<CallInst>(NewCaller)->
-          setCallingConv(cast<CallInst>(Caller)->getCallingConv());
+        cast<CallInst>(NewCaller)->setTailCallKind(
+            cast<CallInst>(Caller)->getTailCallKind());
+        cast<CallInst>(NewCaller)->setCallingConv(
+            cast<CallInst>(Caller)->getCallingConv());
         cast<CallInst>(NewCaller)->setAttributes(NewPAL);
       }
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
index 20556157188f..e74b590e2b7c 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -12,6 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "InstCombineInternal.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/PatternMatch.h"
@@ -161,8 +162,8 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
   if (Constant *C = dyn_cast<Constant>(V)) {
     C = ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
     // If we got a constantexpr back, try to simplify it with DL info.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-      C = ConstantFoldConstantExpression(CE, DL, TLI);
+    if (Constant *FoldedC = ConstantFoldConstant(C, DL, &TLI))
+      C = FoldedC;
     return C;
   }
 
@@ -227,20 +228,14 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, Type *Ty,
   return InsertNewInstWith(Res, *I);
 }
 
+Instruction::CastOps InstCombiner::isEliminableCastPair(const CastInst *CI1,
+                                                        const CastInst *CI2) {
+  Type *SrcTy = CI1->getSrcTy();
+  Type *MidTy = CI1->getDestTy();
+  Type *DstTy = CI2->getDestTy();
 
-/// This function is a wrapper around CastInst::isEliminableCastPair. It
-/// simply extracts arguments and returns what that function returns.
-static Instruction::CastOps
-isEliminableCastPair(const CastInst *CI, ///< First cast instruction
-                     unsigned opcode,    ///< Opcode for the second cast
-                     Type *DstTy,        ///< Target type for the second cast
-                     const DataLayout &DL) {
-  Type *SrcTy = CI->getOperand(0)->getType();   // A from above
-  Type *MidTy = CI->getType();                  // B from above
-
-  // Get the opcodes of the two Cast instructions
-  Instruction::CastOps firstOp = Instruction::CastOps(CI->getOpcode());
-  Instruction::CastOps secondOp = Instruction::CastOps(opcode);
+  Instruction::CastOps firstOp = Instruction::CastOps(CI1->getOpcode());
+  Instruction::CastOps secondOp = Instruction::CastOps(CI2->getOpcode());
   Type *SrcIntPtrTy =
       SrcTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(SrcTy) : nullptr;
   Type *MidIntPtrTy =
@@ -260,54 +255,28 @@ isEliminableCastPair(const CastInst *CI, ///< First cast instruction
   return Instruction::CastOps(Res);
 }
 
-/// Return true if the cast from "V to Ty" actually results in any code being
-/// generated and is interesting to optimize out.
-/// If the cast can be eliminated by some other simple transformation, we prefer
-/// to do the simplification first.
-bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
-                                      Type *Ty) {
-  // Noop casts and casts of constants should be eliminated trivially.
-  if (V->getType() == Ty || isa<Constant>(V)) return false;
-
-  // If this is another cast that can be eliminated, we prefer to have it
-  // eliminated.
-  if (const CastInst *CI = dyn_cast<CastInst>(V))
-    if (isEliminableCastPair(CI, opc, Ty, DL))
-      return false;
-
-  // If this is a vector sext from a compare, then we don't want to break the
-  // idiom where each element of the extended vector is either zero or all ones.
-  if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
-    return false;
-
-  return true;
-}
-
-
 /// @brief Implement the transforms common to all CastInst visitors.
 Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
   Value *Src = CI.getOperand(0);
 
-  // Many cases of "cast of a cast" are eliminable. If it's eliminable we just
-  // eliminate it now.
-  if (CastInst *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
-    if (Instruction::CastOps opc =
-            isEliminableCastPair(CSrc, CI.getOpcode(), CI.getType(), DL)) {
+  // Try to eliminate a cast of a cast.
+  if (auto *CSrc = dyn_cast<CastInst>(Src)) {   // A->B->C cast
+    if (Instruction::CastOps NewOpc = isEliminableCastPair(CSrc, &CI)) {
       // The first cast (CSrc) is eliminable so we need to fix up or replace
       // the second cast (CI). CSrc will then have a good chance of being dead.
-      return CastInst::Create(opc, CSrc->getOperand(0), CI.getType());
+      return CastInst::Create(NewOpc, CSrc->getOperand(0), CI.getType());
     }
   }
 
-  // If we are casting a select then fold the cast into the select
-  if (SelectInst *SI = dyn_cast<SelectInst>(Src))
+  // If we are casting a select, then fold the cast into the select.
+  if (auto *SI = dyn_cast<SelectInst>(Src))
     if (Instruction *NV = FoldOpIntoSelect(CI, SI))
       return NV;
 
-  // If we are casting a PHI then fold the cast into the PHI
+  // If we are casting a PHI, then fold the cast into the PHI.
   if (isa<PHINode>(Src)) {
-    // We don't do this if this would create a PHI node with an illegal type if
-    // it is currently legal.
+    // Don't do this if it would create a PHI node with an illegal type from a
+    // legal type.
     if (!Src->getType()->isIntegerTy() || !CI.getType()->isIntegerTy() ||
         ShouldChangeType(CI.getType(), Src->getType()))
       if (Instruction *NV = FoldOpIntoPhi(CI))
@@ -474,19 +443,39 @@ static Instruction *foldVecTruncToExtElt(TruncInst &Trunc, InstCombiner &IC,
   return ExtractElementInst::Create(VecInput, IC.Builder->getInt32(Elt));
 }
 
+/// Try to narrow the width of bitwise logic instructions with constants.
+Instruction *InstCombiner::shrinkBitwiseLogic(TruncInst &Trunc) {
+  Type *SrcTy = Trunc.getSrcTy();
+  Type *DestTy = Trunc.getType();
+  if (isa<IntegerType>(SrcTy) && !ShouldChangeType(SrcTy, DestTy))
+    return nullptr;
+
+  BinaryOperator *LogicOp;
+  Constant *C;
+  if (!match(Trunc.getOperand(0), m_OneUse(m_BinOp(LogicOp))) ||
+      !LogicOp->isBitwiseLogicOp() ||
+      !match(LogicOp->getOperand(1), m_Constant(C)))
+    return nullptr;
+
+  // trunc (logic X, C) --> logic (trunc X, C')
+  Constant *NarrowC = ConstantExpr::getTrunc(C, DestTy);
+  Value *NarrowOp0 = Builder->CreateTrunc(LogicOp->getOperand(0), DestTy);
+  return BinaryOperator::Create(LogicOp->getOpcode(), NarrowOp0, NarrowC);
+}
+
 Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   if (Instruction *Result = commonCastTransforms(CI))
     return Result;
 
   // Test if the trunc is the user of a select which is part of a
   // minimum or maximum operation. If so, don't do any more simplification.
-  // Even simplifying demanded bits can break the canonical form of a 
+  // Even simplifying demanded bits can break the canonical form of a
   // min/max.
   Value *LHS, *RHS;
   if (SelectInst *SI = dyn_cast<SelectInst>(CI.getOperand(0)))
     if (matchSelectPattern(SI, LHS, RHS).Flavor != SPF_UNKNOWN)
       return nullptr;
-  
+
   // See if we can simplify any instructions used by the input whose sole
   // purpose is to compute bits we don't care about.
   if (SimplifyDemandedInstructionBits(CI))
@@ -562,14 +551,26 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
     }
   }
 
-  // Transform "trunc (and X, cst)" -> "and (trunc X), cst" so long as the dest
-  // type isn't non-native.
+  if (Instruction *I = shrinkBitwiseLogic(CI))
+    return I;
+
   if (Src->hasOneUse() && isa<IntegerType>(SrcTy) &&
-      ShouldChangeType(SrcTy, DestTy) &&
-      match(Src, m_And(m_Value(A), m_ConstantInt(Cst)))) {
-    Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
-    return BinaryOperator::CreateAnd(NewTrunc,
-                                     ConstantExpr::getTrunc(Cst, DestTy));
+      ShouldChangeType(SrcTy, DestTy)) {
+    // Transform "trunc (shl X, cst)" -> "shl (trunc X), cst" so long as the
+    // dest type is native and cst < dest size.
+    if (match(Src, m_Shl(m_Value(A), m_ConstantInt(Cst))) &&
+        !match(A, m_Shr(m_Value(), m_Constant()))) {
+      // Skip shifts of shift by constants. It undoes a combine in
+      // FoldShiftByConstant and is the extend in reg pattern.
+      const unsigned DestSize = DestTy->getScalarSizeInBits();
+      if (Cst->getValue().ult(DestSize)) {
+        Value *NewTrunc = Builder->CreateTrunc(A, DestTy, A->getName() + ".tr");
+
+        return BinaryOperator::Create(
+          Instruction::Shl, NewTrunc,
+          ConstantInt::get(DestTy, Cst->getValue().trunc(DestSize)));
+      }
+    }
   }
 
   if (Instruction *I = foldVecTruncToExtElt(CI, *this, DL))
@@ -578,10 +579,8 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
   return nullptr;
 }
 
-/// Transform (zext icmp) to bitwise / integer operations in order to eliminate
-/// the icmp.
-Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
-                                             bool DoXform) {
+Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
+                                             bool DoTransform) {
   // If we are just checking for a icmp eq of a single bit and zext'ing it
   // to an integer, then shift the bit to the appropriate place and then
   // cast to integer to avoid the comparison.
@@ -592,7 +591,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
     // zext (x >s -1) to i32 --> (x>>u31)^1  true if signbit clear.
     if ((ICI->getPredicate() == ICmpInst::ICMP_SLT && Op1CV == 0) ||
         (ICI->getPredicate() == ICmpInst::ICMP_SGT && Op1CV.isAllOnesValue())) {
-      if (!DoXform) return ICI;
+      if (!DoTransform) return ICI;
 
       Value *In = ICI->getOperand(0);
       Value *Sh = ConstantInt::get(In->getType(),
@@ -627,7 +626,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
       APInt KnownZeroMask(~KnownZero);
       if (KnownZeroMask.isPowerOf2()) { // Exactly 1 possible 1?
-        if (!DoXform) return ICI;
+        if (!DoTransform) return ICI;
 
         bool isNE = ICI->getPredicate() == ICmpInst::ICMP_NE;
         if (Op1CV != 0 && (Op1CV != KnownZeroMask)) {
@@ -655,7 +654,9 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
 
         if (CI.getType() == In->getType())
           return replaceInstUsesWith(CI, In);
-        return CastInst::CreateIntegerCast(In, CI.getType(), false/*ZExt*/);
+
+        Value *IntCast = Builder->CreateIntCast(In, CI.getType(), false);
+        return replaceInstUsesWith(CI, IntCast);
       }
     }
   }
@@ -678,7 +679,7 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
         APInt KnownBits = KnownZeroLHS | KnownOneLHS;
         APInt UnknownBit = ~KnownBits;
         if (UnknownBit.countPopulation() == 1) {
-          if (!DoXform) return ICI;
+          if (!DoTransform) return ICI;
 
           Value *Result = Builder->CreateXor(LHS, RHS);
 
@@ -760,9 +761,7 @@ static bool canEvaluateZExtd(Value *V, Type *Ty, unsigned &BitsToClear,
 
     // If the operation is an AND/OR/XOR and the bits to clear are zero in the
     // other side, BitsToClear is ok.
-    if (Tmp == 0 &&
-        (Opc == Instruction::And || Opc == Instruction::Or ||
-         Opc == Instruction::Xor)) {
+    if (Tmp == 0 && I->isBitwiseLogicOp()) {
       // We use MaskedValueIsZero here for generality, but the case we care
       // about the most is constant RHS.
       unsigned VSize = V->getType()->getScalarSizeInBits();
@@ -922,16 +921,26 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
 
   BinaryOperator *SrcI = dyn_cast<BinaryOperator>(Src);
   if (SrcI && SrcI->getOpcode() == Instruction::Or) {
-    // zext (or icmp, icmp) --> or (zext icmp), (zext icmp) if at least one
-    // of the (zext icmp) will be transformed.
+    // zext (or icmp, icmp) -> or (zext icmp), (zext icmp) if at least one
+    // of the (zext icmp) can be eliminated. If so, immediately perform the
+    // according elimination.
     ICmpInst *LHS = dyn_cast<ICmpInst>(SrcI->getOperand(0));
     ICmpInst *RHS = dyn_cast<ICmpInst>(SrcI->getOperand(1));
     if (LHS && RHS && LHS->hasOneUse() && RHS->hasOneUse() &&
         (transformZExtICmp(LHS, CI, false) ||
          transformZExtICmp(RHS, CI, false))) {
+      // zext (or icmp, icmp) -> or (zext icmp), (zext icmp)
       Value *LCast = Builder->CreateZExt(LHS, CI.getType(), LHS->getName());
       Value *RCast = Builder->CreateZExt(RHS, CI.getType(), RHS->getName());
-      return BinaryOperator::Create(Instruction::Or, LCast, RCast);
+      BinaryOperator *Or = BinaryOperator::Create(Instruction::Or, LCast, RCast);
+
+      // Perform the elimination.
+      if (auto *LZExt = dyn_cast<ZExtInst>(LCast))
+        transformZExtICmp(LHS, *LZExt);
+      if (auto *RZExt = dyn_cast<ZExtInst>(RCast))
+        transformZExtICmp(RHS, *RZExt);
+
+      return Or;
     }
   }
 
@@ -952,14 +961,6 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
     return BinaryOperator::CreateXor(Builder->CreateAnd(X, ZC), ZC);
   }
 
-  // zext (xor i1 X, true) to i32  --> xor (zext i1 X to i32), 1
-  if (SrcI && SrcI->hasOneUse() &&
-      SrcI->getType()->getScalarType()->isIntegerTy(1) &&
-      match(SrcI, m_Not(m_Value(X))) && (!X->hasOneUse() || !isa<CmpInst>(X))) {
-    Value *New = Builder->CreateZExt(X, CI.getType());
-    return BinaryOperator::CreateXor(New, ConstantInt::get(CI.getType(), 1));
-  }
-
   return nullptr;
 }
 
@@ -1132,7 +1133,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
   Type *SrcTy = Src->getType(), *DestTy = CI.getType();
 
   // If we know that the value being extended is positive, we can use a zext
-  // instead. 
+  // instead.
   bool KnownZero, KnownOne;
   ComputeSignBit(Src, KnownZero, KnownOne, 0, &CI);
   if (KnownZero) {
@@ -1238,14 +1239,14 @@ static Value *lookThroughFPExtensions(Value *V) {
     if (CFP->getType() == Type::getPPC_FP128Ty(V->getContext()))
       return V;  // No constant folding of this.
     // See if the value can be truncated to half and then reextended.
-    if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf))
+    if (Value *V = fitsInFPType(CFP, APFloat::IEEEhalf()))
       return V;
     // See if the value can be truncated to float and then reextended.
-    if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle))
+    if (Value *V = fitsInFPType(CFP, APFloat::IEEEsingle()))
       return V;
     if (CFP->getType()->isDoubleTy())
       return V;  // Won't shrink.
-    if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble))
+    if (Value *V = fitsInFPType(CFP, APFloat::IEEEdouble()))
       return V;
     // Don't try to shrink to various long double types.
   }
@@ -1789,6 +1790,205 @@ static Instruction *canonicalizeBitCastExtElt(BitCastInst &BitCast,
   return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
 }
 
+/// Change the type of a bitwise logic operation if we can eliminate a bitcast.
+static Instruction *foldBitCastBitwiseLogic(BitCastInst &BitCast,
+                                            InstCombiner::BuilderTy &Builder) {
+  Type *DestTy = BitCast.getType();
+  BinaryOperator *BO;
+  if (!DestTy->getScalarType()->isIntegerTy() ||
+      !match(BitCast.getOperand(0), m_OneUse(m_BinOp(BO))) ||
+      !BO->isBitwiseLogicOp())
+    return nullptr;
+  
+  // FIXME: This transform is restricted to vector types to avoid backend
+  // problems caused by creating potentially illegal operations. If a fix-up is
+  // added to handle that situation, we can remove this check.
+  if (!DestTy->isVectorTy() || !BO->getType()->isVectorTy())
+    return nullptr;
+  
+  Value *X;
+  if (match(BO->getOperand(0), m_OneUse(m_BitCast(m_Value(X)))) &&
+      X->getType() == DestTy && !isa<Constant>(X)) {
+    // bitcast(logic(bitcast(X), Y)) --> logic'(X, bitcast(Y))
+    Value *CastedOp1 = Builder.CreateBitCast(BO->getOperand(1), DestTy);
+    return BinaryOperator::Create(BO->getOpcode(), X, CastedOp1);
+  }
+
+  if (match(BO->getOperand(1), m_OneUse(m_BitCast(m_Value(X)))) &&
+      X->getType() == DestTy && !isa<Constant>(X)) {
+    // bitcast(logic(Y, bitcast(X))) --> logic'(bitcast(Y), X)
+    Value *CastedOp0 = Builder.CreateBitCast(BO->getOperand(0), DestTy);
+    return BinaryOperator::Create(BO->getOpcode(), CastedOp0, X);
+  }
+
+  return nullptr;
+}
+
+/// Change the type of a select if we can eliminate a bitcast.
+static Instruction *foldBitCastSelect(BitCastInst &BitCast,
+                                      InstCombiner::BuilderTy &Builder) {
+  Value *Cond, *TVal, *FVal;
+  if (!match(BitCast.getOperand(0),
+             m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+    return nullptr;
+
+  // A vector select must maintain the same number of elements in its operands.
+  Type *CondTy = Cond->getType();
+  Type *DestTy = BitCast.getType();
+  if (CondTy->isVectorTy()) {
+    if (!DestTy->isVectorTy())
+      return nullptr;
+    if (DestTy->getVectorNumElements() != CondTy->getVectorNumElements())
+      return nullptr;
+  }
+
+  // FIXME: This transform is restricted from changing the select between
+  // scalars and vectors to avoid backend problems caused by creating
+  // potentially illegal operations. If a fix-up is added to handle that
+  // situation, we can remove this check.
+  if (DestTy->isVectorTy() != TVal->getType()->isVectorTy())
+    return nullptr;
+
+  auto *Sel = cast<Instruction>(BitCast.getOperand(0));
+  Value *X;
+  if (match(TVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
+      !isa<Constant>(X)) {
+    // bitcast(select(Cond, bitcast(X), Y)) --> select'(Cond, X, bitcast(Y))
+    Value *CastedVal = Builder.CreateBitCast(FVal, DestTy);
+    return SelectInst::Create(Cond, X, CastedVal, "", nullptr, Sel);
+  }
+
+  if (match(FVal, m_OneUse(m_BitCast(m_Value(X)))) && X->getType() == DestTy &&
+      !isa<Constant>(X)) {
+    // bitcast(select(Cond, Y, bitcast(X))) --> select'(Cond, bitcast(Y), X)
+    Value *CastedVal = Builder.CreateBitCast(TVal, DestTy);
+    return SelectInst::Create(Cond, CastedVal, X, "", nullptr, Sel);
+  }
+
+  return nullptr;
+}
+
+/// Check if all users of CI are StoreInsts.
+static bool hasStoreUsersOnly(CastInst &CI) {
+  for (User *U : CI.users()) {
+    if (!isa<StoreInst>(U))
+      return false;
+  }
+  return true;
+}
+
+/// This function handles following case
+///
+///     A  ->  B    cast
+///     PHI
+///     B  ->  A    cast
+///
+/// All the related PHI nodes can be replaced by new PHI nodes with type A.
+/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
+Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
+  // BitCast used by Store can be handled in InstCombineLoadStoreAlloca.cpp.
+  if (hasStoreUsersOnly(CI))
+    return nullptr;
+
+  Value *Src = CI.getOperand(0);
+  Type *SrcTy = Src->getType();         // Type B
+  Type *DestTy = CI.getType();          // Type A
+
+  SmallVector<PHINode *, 4> PhiWorklist;
+  SmallSetVector<PHINode *, 4> OldPhiNodes;
+
+  // Find all of the A->B casts and PHI nodes.
+  // We need to inpect all related PHI nodes, but PHIs can be cyclic, so
+  // OldPhiNodes is used to track all known PHI nodes, before adding a new
+  // PHI to PhiWorklist, it is checked against and added to OldPhiNodes first.
+  PhiWorklist.push_back(PN);
+  OldPhiNodes.insert(PN);
+  while (!PhiWorklist.empty()) {
+    auto *OldPN = PhiWorklist.pop_back_val();
+    for (Value *IncValue : OldPN->incoming_values()) {
+      if (isa<Constant>(IncValue))
+        continue;
+
+      if (auto *LI = dyn_cast<LoadInst>(IncValue)) {
+        // If there is a sequence of one or more load instructions, each loaded
+        // value is used as address of later load instruction, bitcast is
+        // necessary to change the value type, don't optimize it. For
+        // simplicity we give up if the load address comes from another load.
+        Value *Addr = LI->getOperand(0);
+        if (Addr == &CI || isa<LoadInst>(Addr))
+          return nullptr;
+        if (LI->hasOneUse() && LI->isSimple())
+          continue;
+        // If a LoadInst has more than one use, changing the type of loaded
+        // value may create another bitcast.
+        return nullptr;
+      }
+
+      if (auto *PNode = dyn_cast<PHINode>(IncValue)) {
+        if (OldPhiNodes.insert(PNode))
+          PhiWorklist.push_back(PNode);
+        continue;
+      }
+
+      auto *BCI = dyn_cast<BitCastInst>(IncValue);
+      // We can't handle other instructions.
+      if (!BCI)
+        return nullptr;
+
+      // Verify it's a A->B cast.
+      Type *TyA = BCI->getOperand(0)->getType();
+      Type *TyB = BCI->getType();
+      if (TyA != DestTy || TyB != SrcTy)
+        return nullptr;
+    }
+  }
+
+  // For each old PHI node, create a corresponding new PHI node with a type A.
+  SmallDenseMap<PHINode *, PHINode *> NewPNodes;
+  for (auto *OldPN : OldPhiNodes) {
+    Builder->SetInsertPoint(OldPN);
+    PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands());
+    NewPNodes[OldPN] = NewPN;
+  }
+
+  // Fill in the operands of new PHI nodes.
+  for (auto *OldPN : OldPhiNodes) {
+    PHINode *NewPN = NewPNodes[OldPN];
+    for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
+      Value *V = OldPN->getOperand(j);
+      Value *NewV = nullptr;
+      if (auto *C = dyn_cast<Constant>(V)) {
+        NewV = ConstantExpr::getBitCast(C, DestTy);
+      } else if (auto *LI = dyn_cast<LoadInst>(V)) {
+        Builder->SetInsertPoint(LI->getNextNode());
+        NewV = Builder->CreateBitCast(LI, DestTy);
+        Worklist.Add(LI);
+      } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+        NewV = BCI->getOperand(0);
+      } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
+        NewV = NewPNodes[PrevPN];
+      }
+      assert(NewV);
+      NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
+    }
+  }
+
+  // If there is a store with type B, change it to type A.
+  for (User *U : PN->users()) {
+    auto *SI = dyn_cast<StoreInst>(U);
+    if (SI && SI->isSimple() && SI->getOperand(0) == PN) {
+      Builder->SetInsertPoint(SI);
+      auto *NewBC =
+          cast<BitCastInst>(Builder->CreateBitCast(NewPNodes[PN], SrcTy));
+      SI->setOperand(0, NewBC);
+      Worklist.Add(SI);
+      assert(hasStoreUsersOnly(*NewBC));
+    }
+  }
+
+  return replaceInstUsesWith(CI, NewPNodes[PN]);
+}
+
 Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
   // If the operands are integer typed then apply the integer transforms,
   // otherwise just apply the common ones.
@@ -1912,9 +2112,20 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
     }
   }
 
+  // Handle the A->B->A cast, and there is an intervening PHI node.
+  if (PHINode *PN = dyn_cast<PHINode>(Src))
+    if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
+      return I;
+
   if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
     return I;
 
+  if (Instruction *I = foldBitCastBitwiseLogic(CI, *Builder))
+    return I;
+
+  if (Instruction *I = foldBitCastSelect(CI, *Builder))
+    return I;
+
   if (SrcTy->isPointerTy())
     return commonPointerCastTransforms(CI);
   return commonCastTransforms(CI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 961497fe3c2d..012bfc7b4944 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -35,17 +35,12 @@ using namespace PatternMatch;
 // How many times is a select replaced by one of its operands?
 STATISTIC(NumSel, "Number of select opts");
 
-// Initialization Routines
 
-static ConstantInt *getOne(Constant *C) {
-  return ConstantInt::get(cast<IntegerType>(C->getType()), 1);
-}
-
-static ConstantInt *ExtractElement(Constant *V, Constant *Idx) {
+static ConstantInt *extractElement(Constant *V, Constant *Idx) {
   return cast<ConstantInt>(ConstantExpr::getExtractElement(V, Idx));
 }
 
-static bool HasAddOverflow(ConstantInt *Result,
+static bool hasAddOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
   if (!IsSigned)
@@ -58,28 +53,28 @@ static bool HasAddOverflow(ConstantInt *Result,
 
 /// Compute Result = In1+In2, returning true if the result overflowed for this
 /// type.
-static bool AddWithOverflow(Constant *&Result, Constant *In1,
+static bool addWithOverflow(Constant *&Result, Constant *In1,
                             Constant *In2, bool IsSigned = false) {
   Result = ConstantExpr::getAdd(In1, In2);
 
   if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
       Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
-      if (HasAddOverflow(ExtractElement(Result, Idx),
-                         ExtractElement(In1, Idx),
-                         ExtractElement(In2, Idx),
+      if (hasAddOverflow(extractElement(Result, Idx),
+                         extractElement(In1, Idx),
+                         extractElement(In2, Idx),
                          IsSigned))
         return true;
     }
     return false;
   }
 
-  return HasAddOverflow(cast<ConstantInt>(Result),
+  return hasAddOverflow(cast<ConstantInt>(Result),
                         cast<ConstantInt>(In1), cast<ConstantInt>(In2),
                         IsSigned);
 }
 
-static bool HasSubOverflow(ConstantInt *Result,
+static bool hasSubOverflow(ConstantInt *Result,
                            ConstantInt *In1, ConstantInt *In2,
                            bool IsSigned) {
   if (!IsSigned)
@@ -93,23 +88,23 @@ static bool HasSubOverflow(ConstantInt *Result,
 
 /// Compute Result = In1-In2, returning true if the result overflowed for this
 /// type.
-static bool SubWithOverflow(Constant *&Result, Constant *In1,
+static bool subWithOverflow(Constant *&Result, Constant *In1,
                             Constant *In2, bool IsSigned = false) {
   Result = ConstantExpr::getSub(In1, In2);
 
   if (VectorType *VTy = dyn_cast<VectorType>(In1->getType())) {
     for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i) {
       Constant *Idx = ConstantInt::get(Type::getInt32Ty(In1->getContext()), i);
-      if (HasSubOverflow(ExtractElement(Result, Idx),
-                         ExtractElement(In1, Idx),
-                         ExtractElement(In2, Idx),
+      if (hasSubOverflow(extractElement(Result, Idx),
+                         extractElement(In1, Idx),
+                         extractElement(In2, Idx),
                          IsSigned))
         return true;
     }
     return false;
   }
 
-  return HasSubOverflow(cast<ConstantInt>(Result),
+  return hasSubOverflow(cast<ConstantInt>(Result),
                         cast<ConstantInt>(In1), cast<ConstantInt>(In2),
                         IsSigned);
 }
@@ -126,26 +121,26 @@ static bool isBranchOnSignBitCheck(ICmpInst &I, bool isSignBit) {
 /// Given an exploded icmp instruction, return true if the comparison only
 /// checks the sign bit. If it only checks the sign bit, set TrueIfSigned if the
 /// result of the comparison is true when the input value is signed.
-static bool isSignBitCheck(ICmpInst::Predicate Pred, ConstantInt *RHS,
+static bool isSignBitCheck(ICmpInst::Predicate Pred, const APInt &RHS,
                            bool &TrueIfSigned) {
   switch (Pred) {
   case ICmpInst::ICMP_SLT:   // True if LHS s< 0
     TrueIfSigned = true;
-    return RHS->isZero();
+    return RHS == 0;
   case ICmpInst::ICMP_SLE:   // True if LHS s<= RHS and RHS == -1
     TrueIfSigned = true;
-    return RHS->isAllOnesValue();
+    return RHS.isAllOnesValue();
   case ICmpInst::ICMP_SGT:   // True if LHS s> -1
     TrueIfSigned = false;
-    return RHS->isAllOnesValue();
+    return RHS.isAllOnesValue();
   case ICmpInst::ICMP_UGT:
     // True if LHS u> RHS and RHS == high-bit-mask - 1
     TrueIfSigned = true;
-    return RHS->isMaxValue(true);
+    return RHS.isMaxSignedValue();
   case ICmpInst::ICMP_UGE:
     // True if LHS u>= RHS and RHS == high-bit-mask (2^7, 2^15, 2^31, etc)
     TrueIfSigned = true;
-    return RHS->getValue().isSignBit();
+    return RHS.isSignBit();
   default:
     return false;
   }
@@ -154,19 +149,20 @@ static bool isSignBitCheck(ICmpInst::Predicate Pred, ConstantInt *RHS,
 /// Returns true if the exploded icmp can be expressed as a signed comparison
 /// to zero and updates the predicate accordingly.
 /// The signedness of the comparison is preserved.
-static bool isSignTest(ICmpInst::Predicate &Pred, const ConstantInt *RHS) {
+/// TODO: Refactor with decomposeBitTestICmp()?
+static bool isSignTest(ICmpInst::Predicate &Pred, const APInt &C) {
   if (!ICmpInst::isSigned(Pred))
     return false;
 
-  if (RHS->isZero())
+  if (C == 0)
     return ICmpInst::isRelational(Pred);
 
-  if (RHS->isOne()) {
+  if (C == 1) {
     if (Pred == ICmpInst::ICMP_SLT) {
       Pred = ICmpInst::ICMP_SLE;
       return true;
     }
-  } else if (RHS->isAllOnesValue()) {
+  } else if (C.isAllOnesValue()) {
     if (Pred == ICmpInst::ICMP_SGT) {
       Pred = ICmpInst::ICMP_SGE;
       return true;
@@ -176,16 +172,10 @@ static bool isSignTest(ICmpInst::Predicate &Pred, const ConstantInt *RHS) {
   return false;
 }
 
-/// Return true if the constant is of the form 1+0+. This is the same as
-/// lowones(~X).
-static bool isHighOnes(const ConstantInt *CI) {
-  return (~CI->getValue() + 1).isPowerOf2();
-}
-
 /// Given a signed integer type and a set of known zero and one bits, compute
 /// the maximum and minimum values that could have the specified known zero and
 /// known one bits, returning them in Min/Max.
-static void ComputeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+static void computeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
                                                    const APInt &KnownOne,
                                                    APInt &Min, APInt &Max) {
   assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
@@ -208,7 +198,7 @@ static void ComputeSignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 /// Given an unsigned integer type and a set of known zero and one bits, compute
 /// the maximum and minimum values that could have the specified known zero and
 /// known one bits, returning them in Min/Max.
-static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
+static void computeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
                                                      const APInt &KnownOne,
                                                      APInt &Min, APInt &Max) {
   assert(KnownZero.getBitWidth() == KnownOne.getBitWidth() &&
@@ -231,9 +221,10 @@ static void ComputeUnsignedMinMaxValuesFromKnownBits(const APInt &KnownZero,
 ///
 /// If AndCst is non-null, then the loaded value is masked with that constant
 /// before doing the comparison. This handles cases like "A[i]&4 == 0".
-Instruction *InstCombiner::
-FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
-                             CmpInst &ICI, ConstantInt *AndCst) {
+Instruction *InstCombiner::foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+                                                        GlobalVariable *GV,
+                                                        CmpInst &ICI,
+                                                        ConstantInt *AndCst) {
   Constant *Init = GV->getInitializer();
   if (!isa<ConstantArray>(Init) && !isa<ConstantDataArray>(Init))
     return nullptr;
@@ -319,7 +310,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
 
     // Find out if the comparison would be true or false for the i'th element.
     Constant *C = ConstantFoldCompareInstOperands(ICI.getPredicate(), Elt,
-                                                  CompareRHS, DL, TLI);
+                                                  CompareRHS, DL, &TLI);
     // If the result is undef for this element, ignore it.
     if (isa<UndefValue>(C)) {
       // Extend range state machines to cover this element in case there is an
@@ -509,7 +500,7 @@ FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP, GlobalVariable *GV,
 ///
 /// If we can't emit an optimized form for this expression, this returns null.
 ///
-static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
+static Value *evaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
                                           const DataLayout &DL) {
   gep_type_iterator GTI = gep_type_begin(GEP);
 
@@ -526,7 +517,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
       if (CI->isZero()) continue;
 
       // Handle a struct index, which adds its field offset to the pointer.
-      if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+      if (StructType *STy = GTI.getStructTypeOrNull()) {
         Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
       } else {
         uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
@@ -556,7 +547,7 @@ static Value *EvaluateGEPOffsetExpression(User *GEP, InstCombiner &IC,
     if (CI->isZero()) continue;
 
     // Handle a struct index, which adds its field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       Offset += DL.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
     } else {
       uint64_t Size = DL.getTypeAllocSize(GTI.getIndexedType());
@@ -919,7 +910,7 @@ static Instruction *transformToIndexedCompare(GEPOperator *GEPLHS, Value *RHS,
 
 /// Fold comparisons between a GEP instruction and something else. At this point
 /// we know that the GEP is on the LHS of the comparison.
-Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+Instruction *InstCombiner::foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
                                        ICmpInst::Predicate Cond,
                                        Instruction &I) {
   // Don't transform signed compares of GEPs into index compares. Even if the
@@ -941,7 +932,7 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
     // This transformation (ignoring the base and scales) is valid because we
     // know pointers can't overflow since the gep is inbounds.  See if we can
     // output an optimized form.
-    Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, *this, DL);
+    Value *Offset = evaluateGEPOffsetExpression(GEPLHS, *this, DL);
 
     // If not, synthesize the offset the hard way.
     if (!Offset)
@@ -1003,12 +994,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
 
     // If one of the GEPs has all zero indices, recurse.
     if (GEPLHS->hasAllZeroIndices())
-      return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
+      return foldGEPICmp(GEPRHS, GEPLHS->getOperand(0),
                          ICmpInst::getSwappedPredicate(Cond), I);
 
     // If the other GEP has all zero indices, recurse.
     if (GEPRHS->hasAllZeroIndices())
-      return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
+      return foldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I);
 
     bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds();
     if (GEPLHS->getNumOperands() == GEPRHS->getNumOperands()) {
@@ -1056,8 +1047,9 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
   return transformToIndexedCompare(GEPLHS, RHS, Cond, DL);
 }
 
-Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
-                                         Value *Other) {
+Instruction *InstCombiner::foldAllocaCmp(ICmpInst &ICI,
+                                         const AllocaInst *Alloca,
+                                         const Value *Other) {
   assert(ICI.isEquality() && "Cannot fold non-equality comparison.");
 
   // It would be tempting to fold away comparisons between allocas and any
@@ -1076,8 +1068,8 @@ Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
 
   unsigned MaxIter = 32; // Break cycles and bound to constant-time.
 
-  SmallVector<Use *, 32> Worklist;
-  for (Use &U : Alloca->uses()) {
+  SmallVector<const Use *, 32> Worklist;
+  for (const Use &U : Alloca->uses()) {
     if (Worklist.size() >= MaxIter)
       return nullptr;
     Worklist.push_back(&U);
@@ -1086,8 +1078,8 @@ Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
   unsigned NumCmps = 0;
   while (!Worklist.empty()) {
     assert(Worklist.size() <= MaxIter);
-    Use *U = Worklist.pop_back_val();
-    Value *V = U->getUser();
+    const Use *U = Worklist.pop_back_val();
+    const Value *V = U->getUser();
     --MaxIter;
 
     if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V) || isa<PHINode>(V) ||
@@ -1096,7 +1088,7 @@ Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
     } else if (isa<LoadInst>(V)) {
       // Loading from the pointer doesn't escape it.
       continue;
-    } else if (auto *SI = dyn_cast<StoreInst>(V)) {
+    } else if (const auto *SI = dyn_cast<StoreInst>(V)) {
       // Storing *to* the pointer is fine, but storing the pointer escapes it.
       if (SI->getValueOperand() == U->get())
         return nullptr;
@@ -1105,7 +1097,7 @@ Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
       if (NumCmps++)
         return nullptr; // Found more than one cmp.
       continue;
-    } else if (auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
+    } else if (const auto *Intrin = dyn_cast<IntrinsicInst>(V)) {
       switch (Intrin->getIntrinsicID()) {
         // These intrinsics don't escape or compare the pointer. Memset is safe
         // because we don't allow ptrtoint. Memcpy and memmove are safe because
@@ -1120,7 +1112,7 @@ Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
     } else {
       return nullptr;
     }
-    for (Use &U : V->uses()) {
+    for (const Use &U : V->uses()) {
       if (Worklist.size() >= MaxIter)
         return nullptr;
       Worklist.push_back(&U);
@@ -1134,9 +1126,9 @@ Instruction *InstCombiner::FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca,
 }
 
 /// Fold "icmp pred (X+CI), X".
-Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
-                                            Value *X, ConstantInt *CI,
-                                            ICmpInst::Predicate Pred) {
+Instruction *InstCombiner::foldICmpAddOpConst(Instruction &ICI,
+                                              Value *X, ConstantInt *CI,
+                                              ICmpInst::Predicate Pred) {
   // From this point on, we know that (X+C <= X) --> (X+C < X) because C != 0,
   // so the values can never be equal.  Similarly for all other "or equals"
   // operators.
@@ -1181,52 +1173,995 @@ Instruction *InstCombiner::FoldICmpAddOpCst(Instruction &ICI,
   return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantExpr::getSub(SMax, C));
 }
 
-/// Fold "icmp pred, ([su]div X, DivRHS), CmpRHS" where DivRHS and CmpRHS are
-/// both known to be integer constants.
-Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
-                                          ConstantInt *DivRHS) {
-  ConstantInt *CmpRHS = cast<ConstantInt>(ICI.getOperand(1));
-  const APInt &CmpRHSV = CmpRHS->getValue();
+/// Handle "(icmp eq/ne (ashr/lshr AP2, A), AP1)" ->
+/// (icmp eq/ne A, Log2(AP2/AP1)) ->
+/// (icmp eq/ne A, Log2(AP2) - Log2(AP1)).
+Instruction *InstCombiner::foldICmpShrConstConst(ICmpInst &I, Value *A,
+                                                 const APInt &AP1,
+                                                 const APInt &AP2) {
+  assert(I.isEquality() && "Cannot fold icmp gt/lt");
+
+  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
+    if (I.getPredicate() == I.ICMP_NE)
+      Pred = CmpInst::getInversePredicate(Pred);
+    return new ICmpInst(Pred, LHS, RHS);
+  };
+
+  // Don't bother doing any work for cases which InstSimplify handles.
+  if (AP2 == 0)
+    return nullptr;
+
+  bool IsAShr = isa<AShrOperator>(I.getOperand(0));
+  if (IsAShr) {
+    if (AP2.isAllOnesValue())
+      return nullptr;
+    if (AP2.isNegative() != AP1.isNegative())
+      return nullptr;
+    if (AP2.sgt(AP1))
+      return nullptr;
+  }
+
+  if (!AP1)
+    // 'A' must be large enough to shift out the highest set bit.
+    return getICmp(I.ICMP_UGT, A,
+                   ConstantInt::get(A->getType(), AP2.logBase2()));
+
+  if (AP1 == AP2)
+    return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+
+  int Shift;
+  if (IsAShr && AP1.isNegative())
+    Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
+  else
+    Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
+
+  if (Shift > 0) {
+    if (IsAShr && AP1 == AP2.ashr(Shift)) {
+      // There are multiple solutions if we are comparing against -1 and the LHS
+      // of the ashr is not a power of two.
+      if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
+        return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
+      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+    } else if (AP1 == AP2.lshr(Shift)) {
+      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+    }
+  }
+
+  // Shifting const2 will never be equal to const1.
+  // FIXME: This should always be handled by InstSimplify?
+  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
+  return replaceInstUsesWith(I, TorF);
+}
+
+/// Handle "(icmp eq/ne (shl AP2, A), AP1)" ->
+/// (icmp eq/ne A, TrailingZeros(AP1) - TrailingZeros(AP2)).
+Instruction *InstCombiner::foldICmpShlConstConst(ICmpInst &I, Value *A,
+                                                 const APInt &AP1,
+                                                 const APInt &AP2) {
+  assert(I.isEquality() && "Cannot fold icmp gt/lt");
+
+  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
+    if (I.getPredicate() == I.ICMP_NE)
+      Pred = CmpInst::getInversePredicate(Pred);
+    return new ICmpInst(Pred, LHS, RHS);
+  };
+
+  // Don't bother doing any work for cases which InstSimplify handles.
+  if (AP2 == 0)
+    return nullptr;
+
+  unsigned AP2TrailingZeros = AP2.countTrailingZeros();
+
+  if (!AP1 && AP2TrailingZeros != 0)
+    return getICmp(
+        I.ICMP_UGE, A,
+        ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
+
+  if (AP1 == AP2)
+    return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+
+  // Get the distance between the lowest bits that are set.
+  int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
+
+  if (Shift > 0 && AP2.shl(Shift) == AP1)
+    return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+
+  // Shifting const2 will never be equal to const1.
+  // FIXME: This should always be handled by InstSimplify?
+  auto *TorF = ConstantInt::get(I.getType(), I.getPredicate() == I.ICMP_NE);
+  return replaceInstUsesWith(I, TorF);
+}
+
+/// The caller has matched a pattern of the form:
+///   I = icmp ugt (add (add A, B), CI2), CI1
+/// If this is of the form:
+///   sum = a + b
+///   if (sum+128 >u 255)
+/// Then replace it with llvm.sadd.with.overflow.i8.
+///
+static Instruction *processUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
+                                          ConstantInt *CI2, ConstantInt *CI1,
+                                          InstCombiner &IC) {
+  // The transformation we're trying to do here is to transform this into an
+  // llvm.sadd.with.overflow.  To do this, we have to replace the original add
+  // with a narrower add, and discard the add-with-constant that is part of the
+  // range check (if we can't eliminate it, this isn't profitable).
+
+  // In order to eliminate the add-with-constant, the compare can be its only
+  // use.
+  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
+  if (!AddWithCst->hasOneUse())
+    return nullptr;
+
+  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
+  if (!CI2->getValue().isPowerOf2())
+    return nullptr;
+  unsigned NewWidth = CI2->getValue().countTrailingZeros();
+  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31)
+    return nullptr;
+
+  // The width of the new add formed is 1 more than the bias.
+  ++NewWidth;
+
+  // Check to see that CI1 is an all-ones value with NewWidth bits.
+  if (CI1->getBitWidth() == NewWidth ||
+      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
+    return nullptr;
+
+  // This is only really a signed overflow check if the inputs have been
+  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
+  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
+  unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
+  if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
+      IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
+    return nullptr;
+
+  // In order to replace the original add with a narrower
+  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
+  // and truncates that discard the high bits of the add.  Verify that this is
+  // the case.
+  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
+  for (User *U : OrigAdd->users()) {
+    if (U == AddWithCst)
+      continue;
+
+    // Only accept truncates for now.  We would really like a nice recursive
+    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
+    // chain to see which bits of a value are actually demanded.  If the
+    // original add had another add which was then immediately truncated, we
+    // could still do the transformation.
+    TruncInst *TI = dyn_cast<TruncInst>(U);
+    if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
+      return nullptr;
+  }
+
+  // If the pattern matches, truncate the inputs to the narrower type and
+  // use the sadd_with_overflow intrinsic to efficiently compute both the
+  // result and the overflow bit.
+  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
+  Value *F = Intrinsic::getDeclaration(I.getModule(),
+                                       Intrinsic::sadd_with_overflow, NewType);
+
+  InstCombiner::BuilderTy *Builder = IC.Builder;
+
+  // Put the new code above the original add, in case there are any uses of the
+  // add between the add and the compare.
+  Builder->SetInsertPoint(OrigAdd);
+
+  Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName() + ".trunc");
+  Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName() + ".trunc");
+  CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
+  Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
+  Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
+
+  // The inner add was the result of the narrow add, zero extended to the
+  // wider type.  Replace it with the result computed by the intrinsic.
+  IC.replaceInstUsesWith(*OrigAdd, ZExt);
+
+  // The original icmp gets replaced with the overflow value.
+  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
+}
+
+// Fold icmp Pred X, C.
+Instruction *InstCombiner::foldICmpWithConstant(ICmpInst &Cmp) {
+  CmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *X = Cmp.getOperand(0);
+
+  const APInt *C;
+  if (!match(Cmp.getOperand(1), m_APInt(C)))
+    return nullptr;
+
+  Value *A = nullptr, *B = nullptr;
+
+  // Match the following pattern, which is a common idiom when writing
+  // overflow-safe integer arithmetic functions. The source performs an addition
+  // in wider type and explicitly checks for overflow using comparisons against
+  // INT_MIN and INT_MAX. Simplify by using the sadd_with_overflow intrinsic.
+  //
+  // TODO: This could probably be generalized to handle other overflow-safe
+  // operations if we worked out the formulas to compute the appropriate magic
+  // constants.
+  //
+  // sum = a + b
+  // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
+  {
+    ConstantInt *CI2; // I = icmp ugt (add (add A, B), CI2), CI
+    if (Pred == ICmpInst::ICMP_UGT &&
+        match(X, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+      if (Instruction *Res = processUGT_ADDCST_ADD(
+              Cmp, A, B, CI2, cast<ConstantInt>(Cmp.getOperand(1)), *this))
+        return Res;
+  }
+
+  // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
+  if (*C == 0 && Pred == ICmpInst::ICMP_SGT) {
+    SelectPatternResult SPR = matchSelectPattern(X, A, B);
+    if (SPR.Flavor == SPF_SMIN) {
+      if (isKnownPositive(A, DL))
+        return new ICmpInst(Pred, B, Cmp.getOperand(1));
+      if (isKnownPositive(B, DL))
+        return new ICmpInst(Pred, A, Cmp.getOperand(1));
+    }
+  }
+
+  // FIXME: Use m_APInt to allow folds for splat constants.
+  ConstantInt *CI = dyn_cast<ConstantInt>(Cmp.getOperand(1));
+  if (!CI)
+    return nullptr;
+
+  // Canonicalize icmp instructions based on dominating conditions.
+  BasicBlock *Parent = Cmp.getParent();
+  BasicBlock *Dom = Parent->getSinglePredecessor();
+  auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
+  ICmpInst::Predicate Pred2;
+  BasicBlock *TrueBB, *FalseBB;
+  ConstantInt *CI2;
+  if (BI && match(BI, m_Br(m_ICmp(Pred2, m_Specific(X), m_ConstantInt(CI2)),
+                           TrueBB, FalseBB)) &&
+      TrueBB != FalseBB) {
+    ConstantRange CR =
+        ConstantRange::makeAllowedICmpRegion(Pred, CI->getValue());
+    ConstantRange DominatingCR =
+        (Parent == TrueBB)
+            ? ConstantRange::makeExactICmpRegion(Pred2, CI2->getValue())
+            : ConstantRange::makeExactICmpRegion(
+                  CmpInst::getInversePredicate(Pred2), CI2->getValue());
+    ConstantRange Intersection = DominatingCR.intersectWith(CR);
+    ConstantRange Difference = DominatingCR.difference(CR);
+    if (Intersection.isEmptySet())
+      return replaceInstUsesWith(Cmp, Builder->getFalse());
+    if (Difference.isEmptySet())
+      return replaceInstUsesWith(Cmp, Builder->getTrue());
+
+    // If this is a normal comparison, it demands all bits. If it is a sign
+    // bit comparison, it only demands the sign bit.
+    bool UnusedBit;
+    bool IsSignBit = isSignBitCheck(Pred, CI->getValue(), UnusedBit);
+
+    // Canonicalizing a sign bit comparison that gets used in a branch,
+    // pessimizes codegen by generating branch on zero instruction instead
+    // of a test and branch. So we avoid canonicalizing in such situations
+    // because test and branch instruction has better branch displacement
+    // than compare and branch instruction.
+    if (!isBranchOnSignBitCheck(Cmp, IsSignBit) && !Cmp.isEquality()) {
+      if (auto *AI = Intersection.getSingleElement())
+        return new ICmpInst(ICmpInst::ICMP_EQ, X, Builder->getInt(*AI));
+      if (auto *AD = Difference.getSingleElement())
+        return new ICmpInst(ICmpInst::ICMP_NE, X, Builder->getInt(*AD));
+    }
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (trunc X, Y), C.
+Instruction *InstCombiner::foldICmpTruncConstant(ICmpInst &Cmp,
+                                                 Instruction *Trunc,
+                                                 const APInt *C) {
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *X = Trunc->getOperand(0);
+  if (*C == 1 && C->getBitWidth() > 1) {
+    // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
+    Value *V = nullptr;
+    if (Pred == ICmpInst::ICMP_SLT && match(X, m_Signum(m_Value(V))))
+      return new ICmpInst(ICmpInst::ICMP_SLT, V,
+                          ConstantInt::get(V->getType(), 1));
+  }
+
+  if (Cmp.isEquality() && Trunc->hasOneUse()) {
+    // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
+    // of the high bits truncated out of x are known.
+    unsigned DstBits = Trunc->getType()->getScalarSizeInBits(),
+             SrcBits = X->getType()->getScalarSizeInBits();
+    APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
+    computeKnownBits(X, KnownZero, KnownOne, 0, &Cmp);
+
+    // If all the high bits are known, we can do this xform.
+    if ((KnownZero | KnownOne).countLeadingOnes() >= SrcBits - DstBits) {
+      // Pull in the high bits from known-ones set.
+      APInt NewRHS = C->zext(SrcBits);
+      NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits - DstBits);
+      return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), NewRHS));
+    }
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (xor X, Y), C.
+Instruction *InstCombiner::foldICmpXorConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Xor,
+                                               const APInt *C) {
+  Value *X = Xor->getOperand(0);
+  Value *Y = Xor->getOperand(1);
+  const APInt *XorC;
+  if (!match(Y, m_APInt(XorC)))
+    return nullptr;
+
+  // If this is a comparison that tests the signbit (X < 0) or (x > -1),
+  // fold the xor.
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  if ((Pred == ICmpInst::ICMP_SLT && *C == 0) ||
+      (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())) {
+
+    // If the sign bit of the XorCst is not set, there is no change to
+    // the operation, just stop using the Xor.
+    if (!XorC->isNegative()) {
+      Cmp.setOperand(0, X);
+      Worklist.Add(Xor);
+      return &Cmp;
+    }
+
+    // Was the old condition true if the operand is positive?
+    bool isTrueIfPositive = Pred == ICmpInst::ICMP_SGT;
+
+    // If so, the new one isn't.
+    isTrueIfPositive ^= true;
+
+    Constant *CmpConstant = cast<Constant>(Cmp.getOperand(1));
+    if (isTrueIfPositive)
+      return new ICmpInst(ICmpInst::ICMP_SGT, X, SubOne(CmpConstant));
+    else
+      return new ICmpInst(ICmpInst::ICMP_SLT, X, AddOne(CmpConstant));
+  }
+
+  if (Xor->hasOneUse()) {
+    // (icmp u/s (xor X SignBit), C) -> (icmp s/u X, (xor C SignBit))
+    if (!Cmp.isEquality() && XorC->isSignBit()) {
+      Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
+                            : Cmp.getSignedPredicate();
+      return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC));
+    }
+
+    // (icmp u/s (xor X ~SignBit), C) -> (icmp s/u X, (xor C ~SignBit))
+    if (!Cmp.isEquality() && XorC->isMaxSignedValue()) {
+      Pred = Cmp.isSigned() ? Cmp.getUnsignedPredicate()
+                            : Cmp.getSignedPredicate();
+      Pred = Cmp.getSwappedPredicate(Pred);
+      return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), *C ^ *XorC));
+    }
+  }
+
+  // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
+  //   iff -C is a power of 2
+  if (Pred == ICmpInst::ICMP_UGT && *XorC == ~(*C) && (*C + 1).isPowerOf2())
+    return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
+
+  // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
+  //   iff -C is a power of 2
+  if (Pred == ICmpInst::ICMP_ULT && *XorC == -(*C) && C->isPowerOf2())
+    return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
+
+  return nullptr;
+}
+
+/// Fold icmp (and (sh X, Y), C2), C1.
+Instruction *InstCombiner::foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
+                                            const APInt *C1, const APInt *C2) {
+  BinaryOperator *Shift = dyn_cast<BinaryOperator>(And->getOperand(0));
+  if (!Shift || !Shift->isShift())
+    return nullptr;
+
+  // If this is: (X >> C3) & C2 != C1 (where any shift and any compare could
+  // exist), turn it into (X & (C2 << C3)) != (C1 << C3). This happens a LOT in
+  // code produced by the clang front-end, for bitfield access.
+  // This seemingly simple opportunity to fold away a shift turns out to be
+  // rather complicated. See PR17827 for details.
+  unsigned ShiftOpcode = Shift->getOpcode();
+  bool IsShl = ShiftOpcode == Instruction::Shl;
+  const APInt *C3;
+  if (match(Shift->getOperand(1), m_APInt(C3))) {
+    bool CanFold = false;
+    if (ShiftOpcode == Instruction::AShr) {
+      // There may be some constraints that make this possible, but nothing
+      // simple has been discovered yet.
+      CanFold = false;
+    } else if (ShiftOpcode == Instruction::Shl) {
+      // For a left shift, we can fold if the comparison is not signed. We can
+      // also fold a signed comparison if the mask value and comparison value
+      // are not negative. These constraints may not be obvious, but we can
+      // prove that they are correct using an SMT solver.
+      if (!Cmp.isSigned() || (!C2->isNegative() && !C1->isNegative()))
+        CanFold = true;
+    } else if (ShiftOpcode == Instruction::LShr) {
+      // For a logical right shift, we can fold if the comparison is not signed.
+      // We can also fold a signed comparison if the shifted mask value and the
+      // shifted comparison value are not negative. These constraints may not be
+      // obvious, but we can prove that they are correct using an SMT solver.
+      if (!Cmp.isSigned() ||
+          (!C2->shl(*C3).isNegative() && !C1->shl(*C3).isNegative()))
+        CanFold = true;
+    }
+
+    if (CanFold) {
+      APInt NewCst = IsShl ? C1->lshr(*C3) : C1->shl(*C3);
+      APInt SameAsC1 = IsShl ? NewCst.shl(*C3) : NewCst.lshr(*C3);
+      // Check to see if we are shifting out any of the bits being compared.
+      if (SameAsC1 != *C1) {
+        // If we shifted bits out, the fold is not going to work out. As a
+        // special case, check to see if this means that the result is always
+        // true or false now.
+        if (Cmp.getPredicate() == ICmpInst::ICMP_EQ)
+          return replaceInstUsesWith(Cmp, ConstantInt::getFalse(Cmp.getType()));
+        if (Cmp.getPredicate() == ICmpInst::ICMP_NE)
+          return replaceInstUsesWith(Cmp, ConstantInt::getTrue(Cmp.getType()));
+      } else {
+        Cmp.setOperand(1, ConstantInt::get(And->getType(), NewCst));
+        APInt NewAndCst = IsShl ? C2->lshr(*C3) : C2->shl(*C3);
+        And->setOperand(1, ConstantInt::get(And->getType(), NewAndCst));
+        And->setOperand(0, Shift->getOperand(0));
+        Worklist.Add(Shift); // Shift is dead.
+        return &Cmp;
+      }
+    }
+  }
+
+  // Turn ((X >> Y) & C2) == 0  into  (X & (C2 << Y)) == 0.  The latter is
+  // preferable because it allows the C2 << Y expression to be hoisted out of a
+  // loop if Y is invariant and X is not.
+  if (Shift->hasOneUse() && *C1 == 0 && Cmp.isEquality() &&
+      !Shift->isArithmeticShift() && !isa<Constant>(Shift->getOperand(0))) {
+    // Compute C2 << Y.
+    Value *NewShift =
+        IsShl ? Builder->CreateLShr(And->getOperand(1), Shift->getOperand(1))
+              : Builder->CreateShl(And->getOperand(1), Shift->getOperand(1));
+
+    // Compute X & (C2 << Y).
+    Value *NewAnd = Builder->CreateAnd(Shift->getOperand(0), NewShift);
+    Cmp.setOperand(0, NewAnd);
+    return &Cmp;
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (and X, C2), C1.
+Instruction *InstCombiner::foldICmpAndConstConst(ICmpInst &Cmp,
+                                                 BinaryOperator *And,
+                                                 const APInt *C1) {
+  const APInt *C2;
+  if (!match(And->getOperand(1), m_APInt(C2)))
+    return nullptr;
+
+  if (!And->hasOneUse() || !And->getOperand(0)->hasOneUse())
+    return nullptr;
+
+  // If the LHS is an 'and' of a truncate and we can widen the and/compare to
+  // the input width without changing the value produced, eliminate the cast:
+  //
+  // icmp (and (trunc W), C2), C1 -> icmp (and W, C2'), C1'
+  //
+  // We can do this transformation if the constants do not have their sign bits
+  // set or if it is an equality comparison. Extending a relational comparison
+  // when we're checking the sign bit would not work.
+  Value *W;
+  if (match(And->getOperand(0), m_Trunc(m_Value(W))) &&
+      (Cmp.isEquality() || (!C1->isNegative() && !C2->isNegative()))) {
+    // TODO: Is this a good transform for vectors? Wider types may reduce
+    // throughput. Should this transform be limited (even for scalars) by using
+    // ShouldChangeType()?
+    if (!Cmp.getType()->isVectorTy()) {
+      Type *WideType = W->getType();
+      unsigned WideScalarBits = WideType->getScalarSizeInBits();
+      Constant *ZextC1 = ConstantInt::get(WideType, C1->zext(WideScalarBits));
+      Constant *ZextC2 = ConstantInt::get(WideType, C2->zext(WideScalarBits));
+      Value *NewAnd = Builder->CreateAnd(W, ZextC2, And->getName());
+      return new ICmpInst(Cmp.getPredicate(), NewAnd, ZextC1);
+    }
+  }
+
+  if (Instruction *I = foldICmpAndShift(Cmp, And, C1, C2))
+    return I;
+
+  // (icmp pred (and (or (lshr A, B), A), 1), 0) -->
+  // (icmp pred (and A, (or (shl 1, B), 1), 0))
+  //
+  // iff pred isn't signed
+  if (!Cmp.isSigned() && *C1 == 0 && match(And->getOperand(1), m_One())) {
+    Constant *One = cast<Constant>(And->getOperand(1));
+    Value *Or = And->getOperand(0);
+    Value *A, *B, *LShr;
+    if (match(Or, m_Or(m_Value(LShr), m_Value(A))) &&
+        match(LShr, m_LShr(m_Specific(A), m_Value(B)))) {
+      unsigned UsesRemoved = 0;
+      if (And->hasOneUse())
+        ++UsesRemoved;
+      if (Or->hasOneUse())
+        ++UsesRemoved;
+      if (LShr->hasOneUse())
+        ++UsesRemoved;
+
+      // Compute A & ((1 << B) | 1)
+      Value *NewOr = nullptr;
+      if (auto *C = dyn_cast<Constant>(B)) {
+        if (UsesRemoved >= 1)
+          NewOr = ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
+      } else {
+        if (UsesRemoved >= 3)
+          NewOr = Builder->CreateOr(Builder->CreateShl(One, B, LShr->getName(),
+                                                       /*HasNUW=*/true),
+                                    One, Or->getName());
+      }
+      if (NewOr) {
+        Value *NewAnd = Builder->CreateAnd(A, NewOr, And->getName());
+        Cmp.setOperand(0, NewAnd);
+        return &Cmp;
+      }
+    }
+  }
+
+  // (X & C2) > C1 --> (X & C2) != 0, if any bit set in (X & C2) will produce a
+  // result greater than C1.
+  unsigned NumTZ = C2->countTrailingZeros();
+  if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && NumTZ < C2->getBitWidth() &&
+      APInt::getOneBitSet(C2->getBitWidth(), NumTZ).ugt(*C1)) {
+    Constant *Zero = Constant::getNullValue(And->getType());
+    return new ICmpInst(ICmpInst::ICMP_NE, And, Zero);
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (and X, Y), C.
+Instruction *InstCombiner::foldICmpAndConstant(ICmpInst &Cmp,
+                                               BinaryOperator *And,
+                                               const APInt *C) {
+  if (Instruction *I = foldICmpAndConstConst(Cmp, And, C))
+    return I;
+
+  // TODO: These all require that Y is constant too, so refactor with the above.
+
+  // Try to optimize things like "A[i] & 42 == 0" to index computations.
+  Value *X = And->getOperand(0);
+  Value *Y = And->getOperand(1);
+  if (auto *LI = dyn_cast<LoadInst>(X))
+    if (auto *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
+      if (auto *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+        if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+            !LI->isVolatile() && isa<ConstantInt>(Y)) {
+          ConstantInt *C2 = cast<ConstantInt>(Y);
+          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, Cmp, C2))
+            return Res;
+        }
+
+  if (!Cmp.isEquality())
+    return nullptr;
+
+  // X & -C == -C -> X >  u ~C
+  // X & -C != -C -> X <= u ~C
+  //   iff C is a power of 2
+  if (Cmp.getOperand(1) == Y && (-(*C)).isPowerOf2()) {
+    auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGT
+                                                          : CmpInst::ICMP_ULE;
+    return new ICmpInst(NewPred, X, SubOne(cast<Constant>(Cmp.getOperand(1))));
+  }
+
+  // (X & C2) == 0 -> (trunc X) >= 0
+  // (X & C2) != 0 -> (trunc X) <  0
+  //   iff C2 is a power of 2 and it masks the sign bit of a legal integer type.
+  const APInt *C2;
+  if (And->hasOneUse() && *C == 0 && match(Y, m_APInt(C2))) {
+    int32_t ExactLogBase2 = C2->exactLogBase2();
+    if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
+      Type *NTy = IntegerType::get(Cmp.getContext(), ExactLogBase2 + 1);
+      if (And->getType()->isVectorTy())
+        NTy = VectorType::get(NTy, And->getType()->getVectorNumElements());
+      Value *Trunc = Builder->CreateTrunc(X, NTy);
+      auto NewPred = Cmp.getPredicate() == CmpInst::ICMP_EQ ? CmpInst::ICMP_SGE
+                                                            : CmpInst::ICMP_SLT;
+      return new ICmpInst(NewPred, Trunc, Constant::getNullValue(NTy));
+    }
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (or X, Y), C.
+Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
+                                              const APInt *C) {
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  if (*C == 1) {
+    // icmp slt signum(V) 1 --> icmp slt V, 1
+    Value *V = nullptr;
+    if (Pred == ICmpInst::ICMP_SLT && match(Or, m_Signum(m_Value(V))))
+      return new ICmpInst(ICmpInst::ICMP_SLT, V,
+                          ConstantInt::get(V->getType(), 1));
+  }
+
+  if (!Cmp.isEquality() || *C != 0 || !Or->hasOneUse())
+    return nullptr;
+
+  Value *P, *Q;
+  if (match(Or, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
+    // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
+    // -> and (icmp eq P, null), (icmp eq Q, null).
+    Value *CmpP =
+        Builder->CreateICmp(Pred, P, ConstantInt::getNullValue(P->getType()));
+    Value *CmpQ =
+        Builder->CreateICmp(Pred, Q, ConstantInt::getNullValue(Q->getType()));
+    auto LogicOpc = Pred == ICmpInst::Predicate::ICMP_EQ ? Instruction::And
+                                                         : Instruction::Or;
+    return BinaryOperator::Create(LogicOpc, CmpP, CmpQ);
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (mul X, Y), C.
+Instruction *InstCombiner::foldICmpMulConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Mul,
+                                               const APInt *C) {
+  const APInt *MulC;
+  if (!match(Mul->getOperand(1), m_APInt(MulC)))
+    return nullptr;
+
+  // If this is a test of the sign bit and the multiply is sign-preserving with
+  // a constant operand, use the multiply LHS operand instead.
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  if (isSignTest(Pred, *C) && Mul->hasNoSignedWrap()) {
+    if (MulC->isNegative())
+      Pred = ICmpInst::getSwappedPredicate(Pred);
+    return new ICmpInst(Pred, Mul->getOperand(0),
+                        Constant::getNullValue(Mul->getType()));
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (shl 1, Y), C.
+static Instruction *foldICmpShlOne(ICmpInst &Cmp, Instruction *Shl,
+                                   const APInt *C) {
+  Value *Y;
+  if (!match(Shl, m_Shl(m_One(), m_Value(Y))))
+    return nullptr;
+
+  Type *ShiftType = Shl->getType();
+  uint32_t TypeBits = C->getBitWidth();
+  bool CIsPowerOf2 = C->isPowerOf2();
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  if (Cmp.isUnsigned()) {
+    // (1 << Y) pred C -> Y pred Log2(C)
+    if (!CIsPowerOf2) {
+      // (1 << Y) <  30 -> Y <= 4
+      // (1 << Y) <= 30 -> Y <= 4
+      // (1 << Y) >= 30 -> Y >  4
+      // (1 << Y) >  30 -> Y >  4
+      if (Pred == ICmpInst::ICMP_ULT)
+        Pred = ICmpInst::ICMP_ULE;
+      else if (Pred == ICmpInst::ICMP_UGE)
+        Pred = ICmpInst::ICMP_UGT;
+    }
+
+    // (1 << Y) >= 2147483648 -> Y >= 31 -> Y == 31
+    // (1 << Y) <  2147483648 -> Y <  31 -> Y != 31
+    unsigned CLog2 = C->logBase2();
+    if (CLog2 == TypeBits - 1) {
+      if (Pred == ICmpInst::ICMP_UGE)
+        Pred = ICmpInst::ICMP_EQ;
+      else if (Pred == ICmpInst::ICMP_ULT)
+        Pred = ICmpInst::ICMP_NE;
+    }
+    return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, CLog2));
+  } else if (Cmp.isSigned()) {
+    Constant *BitWidthMinusOne = ConstantInt::get(ShiftType, TypeBits - 1);
+    if (C->isAllOnesValue()) {
+      // (1 << Y) <= -1 -> Y == 31
+      if (Pred == ICmpInst::ICMP_SLE)
+        return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
+
+      // (1 << Y) >  -1 -> Y != 31
+      if (Pred == ICmpInst::ICMP_SGT)
+        return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
+    } else if (!(*C)) {
+      // (1 << Y) <  0 -> Y == 31
+      // (1 << Y) <= 0 -> Y == 31
+      if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
+        return new ICmpInst(ICmpInst::ICMP_EQ, Y, BitWidthMinusOne);
+
+      // (1 << Y) >= 0 -> Y != 31
+      // (1 << Y) >  0 -> Y != 31
+      if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
+        return new ICmpInst(ICmpInst::ICMP_NE, Y, BitWidthMinusOne);
+    }
+  } else if (Cmp.isEquality() && CIsPowerOf2) {
+    return new ICmpInst(Pred, Y, ConstantInt::get(ShiftType, C->logBase2()));
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (shl X, Y), C.
+Instruction *InstCombiner::foldICmpShlConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Shl,
+                                               const APInt *C) {
+  const APInt *ShiftVal;
+  if (Cmp.isEquality() && match(Shl->getOperand(0), m_APInt(ShiftVal)))
+    return foldICmpShlConstConst(Cmp, Shl->getOperand(1), *C, *ShiftVal);
+
+  const APInt *ShiftAmt;
+  if (!match(Shl->getOperand(1), m_APInt(ShiftAmt)))
+    return foldICmpShlOne(Cmp, Shl, C);
+
+  // Check that the shift amount is in range. If not, don't perform undefined
+  // shifts. When the shift is visited it will be simplified.
+  unsigned TypeBits = C->getBitWidth();
+  if (ShiftAmt->uge(TypeBits))
+    return nullptr;
+
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *X = Shl->getOperand(0);
+  if (Cmp.isEquality()) {
+    // If the shift is NUW, then it is just shifting out zeros, no need for an
+    // AND.
+    Constant *LShrC = ConstantInt::get(Shl->getType(), C->lshr(*ShiftAmt));
+    if (Shl->hasNoUnsignedWrap())
+      return new ICmpInst(Pred, X, LShrC);
+
+    // If the shift is NSW and we compare to 0, then it is just shifting out
+    // sign bits, no need for an AND either.
+    if (Shl->hasNoSignedWrap() && *C == 0)
+      return new ICmpInst(Pred, X, LShrC);
+
+    if (Shl->hasOneUse()) {
+      // Otherwise strength reduce the shift into an and.
+      Constant *Mask = ConstantInt::get(Shl->getType(),
+          APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt->getZExtValue()));
+
+      Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+      return new ICmpInst(Pred, And, LShrC);
+    }
+  }
+
+  // If this is a signed comparison to 0 and the shift is sign preserving,
+  // use the shift LHS operand instead; isSignTest may change 'Pred', so only
+  // do that if we're sure to not continue on in this function.
+  if (Shl->hasNoSignedWrap() && isSignTest(Pred, *C))
+    return new ICmpInst(Pred, X, Constant::getNullValue(X->getType()));
+
+  // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
+  bool TrueIfSigned = false;
+  if (Shl->hasOneUse() && isSignBitCheck(Pred, *C, TrueIfSigned)) {
+    // (X << 31) <s 0  --> (X & 1) != 0
+    Constant *Mask = ConstantInt::get(
+        X->getType(),
+        APInt::getOneBitSet(TypeBits, TypeBits - ShiftAmt->getZExtValue() - 1));
+    Value *And = Builder->CreateAnd(X, Mask, Shl->getName() + ".mask");
+    return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
+                        And, Constant::getNullValue(And->getType()));
+  }
+
+  // When the shift is nuw and pred is >u or <=u, comparison only really happens
+  // in the pre-shifted bits. Since InstSimplify canoncalizes <=u into <u, the
+  // <=u case can be further converted to match <u (see below).
+  if (Shl->hasNoUnsignedWrap() &&
+      (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_ULT)) {
+    // Derivation for the ult case:
+    // (X << S) <=u C is equiv to X <=u (C >> S) for all C
+    // (X << S) <u (C + 1) is equiv to X <u (C >> S) + 1 if C <u ~0u
+    // (X << S) <u C is equiv to X <u ((C - 1) >> S) + 1 if C >u 0
+    assert((Pred != ICmpInst::ICMP_ULT || C->ugt(0)) &&
+           "Encountered `ult 0` that should have been eliminated by "
+           "InstSimplify.");
+    APInt ShiftedC = Pred == ICmpInst::ICMP_ULT ? (*C - 1).lshr(*ShiftAmt) + 1
+                                                : C->lshr(*ShiftAmt);
+    return new ICmpInst(Pred, X, ConstantInt::get(X->getType(), ShiftedC));
+  }
+
+  // Transform (icmp pred iM (shl iM %v, N), C)
+  // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (C>>N))
+  // Transform the shl to a trunc if (trunc (C>>N)) has no loss and M-N.
+  // This enables us to get rid of the shift in favor of a trunc which can be
+  // free on the target. It has the additional benefit of comparing to a
+  // smaller constant, which will be target friendly.
+  unsigned Amt = ShiftAmt->getLimitedValue(TypeBits - 1);
+  if (Shl->hasOneUse() && Amt != 0 && C->countTrailingZeros() >= Amt &&
+      DL.isLegalInteger(TypeBits - Amt)) {
+    Type *TruncTy = IntegerType::get(Cmp.getContext(), TypeBits - Amt);
+    if (X->getType()->isVectorTy())
+      TruncTy = VectorType::get(TruncTy, X->getType()->getVectorNumElements());
+    Constant *NewC =
+        ConstantInt::get(TruncTy, C->ashr(*ShiftAmt).trunc(TypeBits - Amt));
+    return new ICmpInst(Pred, Builder->CreateTrunc(X, TruncTy), NewC);
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp ({al}shr X, Y), C.
+Instruction *InstCombiner::foldICmpShrConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Shr,
+                                               const APInt *C) {
+  // An exact shr only shifts out zero bits, so:
+  // icmp eq/ne (shr X, Y), 0 --> icmp eq/ne X, 0
+  Value *X = Shr->getOperand(0);
+  CmpInst::Predicate Pred = Cmp.getPredicate();
+  if (Cmp.isEquality() && Shr->isExact() && Shr->hasOneUse() && *C == 0)
+    return new ICmpInst(Pred, X, Cmp.getOperand(1));
+
+  const APInt *ShiftVal;
+  if (Cmp.isEquality() && match(Shr->getOperand(0), m_APInt(ShiftVal)))
+    return foldICmpShrConstConst(Cmp, Shr->getOperand(1), *C, *ShiftVal);
+
+  const APInt *ShiftAmt;
+  if (!match(Shr->getOperand(1), m_APInt(ShiftAmt)))
+    return nullptr;
+
+  // Check that the shift amount is in range. If not, don't perform undefined
+  // shifts. When the shift is visited it will be simplified.
+  unsigned TypeBits = C->getBitWidth();
+  unsigned ShAmtVal = ShiftAmt->getLimitedValue(TypeBits);
+  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+    return nullptr;
+
+  bool IsAShr = Shr->getOpcode() == Instruction::AShr;
+  if (!Cmp.isEquality()) {
+    // If we have an unsigned comparison and an ashr, we can't simplify this.
+    // Similarly for signed comparisons with lshr.
+    if (Cmp.isSigned() != IsAShr)
+      return nullptr;
+
+    // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
+    // by a power of 2.  Since we already have logic to simplify these,
+    // transform to div and then simplify the resultant comparison.
+    if (IsAShr && (!Shr->isExact() || ShAmtVal == TypeBits - 1))
+      return nullptr;
+
+    // Revisit the shift (to delete it).
+    Worklist.Add(Shr);
+
+    Constant *DivCst = ConstantInt::get(
+        Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+
+    Value *Tmp = IsAShr ? Builder->CreateSDiv(X, DivCst, "", Shr->isExact())
+                        : Builder->CreateUDiv(X, DivCst, "", Shr->isExact());
+
+    Cmp.setOperand(0, Tmp);
+
+    // If the builder folded the binop, just return it.
+    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
+    if (!TheDiv)
+      return &Cmp;
+
+    // Otherwise, fold this div/compare.
+    assert(TheDiv->getOpcode() == Instruction::SDiv ||
+           TheDiv->getOpcode() == Instruction::UDiv);
+
+    Instruction *Res = foldICmpDivConstant(Cmp, TheDiv, C);
+    assert(Res && "This div/cst should have folded!");
+    return Res;
+  }
+
+  // Handle equality comparisons of shift-by-constant.
+
+  // If the comparison constant changes with the shift, the comparison cannot
+  // succeed (bits of the comparison constant cannot match the shifted value).
+  // This should be known by InstSimplify and already be folded to true/false.
+  assert(((IsAShr && C->shl(ShAmtVal).ashr(ShAmtVal) == *C) ||
+          (!IsAShr && C->shl(ShAmtVal).lshr(ShAmtVal) == *C)) &&
+         "Expected icmp+shr simplify did not occur.");
+
+  // Check if the bits shifted out are known to be zero. If so, we can compare
+  // against the unshifted value:
+  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
+  Constant *ShiftedCmpRHS = ConstantInt::get(Shr->getType(), *C << ShAmtVal);
+  if (Shr->hasOneUse()) {
+    if (Shr->isExact())
+      return new ICmpInst(Pred, X, ShiftedCmpRHS);
+
+    // Otherwise strength reduce the shift into an 'and'.
+    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
+    Constant *Mask = ConstantInt::get(Shr->getType(), Val);
+    Value *And = Builder->CreateAnd(X, Mask, Shr->getName() + ".mask");
+    return new ICmpInst(Pred, And, ShiftedCmpRHS);
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp (udiv X, Y), C.
+Instruction *InstCombiner::foldICmpUDivConstant(ICmpInst &Cmp,
+                                                BinaryOperator *UDiv,
+                                                const APInt *C) {
+  const APInt *C2;
+  if (!match(UDiv->getOperand(0), m_APInt(C2)))
+    return nullptr;
+
+  assert(C2 != 0 && "udiv 0, X should have been simplified already.");
+
+  // (icmp ugt (udiv C2, Y), C) -> (icmp ule Y, C2/(C+1))
+  Value *Y = UDiv->getOperand(1);
+  if (Cmp.getPredicate() == ICmpInst::ICMP_UGT) {
+    assert(!C->isMaxValue() &&
+           "icmp ugt X, UINT_MAX should have been simplified already.");
+    return new ICmpInst(ICmpInst::ICMP_ULE, Y,
+                        ConstantInt::get(Y->getType(), C2->udiv(*C + 1)));
+  }
+
+  // (icmp ult (udiv C2, Y), C) -> (icmp ugt Y, C2/C)
+  if (Cmp.getPredicate() == ICmpInst::ICMP_ULT) {
+    assert(C != 0 && "icmp ult X, 0 should have been simplified already.");
+    return new ICmpInst(ICmpInst::ICMP_UGT, Y,
+                        ConstantInt::get(Y->getType(), C2->udiv(*C)));
+  }
+
+  return nullptr;
+}
+
+/// Fold icmp ({su}div X, Y), C.
+Instruction *InstCombiner::foldICmpDivConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Div,
+                                               const APInt *C) {
+  // Fold: icmp pred ([us]div X, C2), C -> range test
+  // Fold this div into the comparison, producing a range check.
+  // Determine, based on the divide type, what the range is being
+  // checked.  If there is an overflow on the low or high side, remember
+  // it, otherwise compute the range [low, hi) bounding the new value.
+  // See: InsertRangeTest above for the kinds of replacements possible.
+  const APInt *C2;
+  if (!match(Div->getOperand(1), m_APInt(C2)))
+    return nullptr;
 
   // FIXME: If the operand types don't match the type of the divide
   // then don't attempt this transform. The code below doesn't have the
   // logic to deal with a signed divide and an unsigned compare (and
-  // vice versa). This is because (x /s C1) <s C2  produces different
-  // results than (x /s C1) <u C2 or (x /u C1) <s C2 or even
-  // (x /u C1) <u C2.  Simply casting the operands and result won't
+  // vice versa). This is because (x /s C2) <s C  produces different
+  // results than (x /s C2) <u C or (x /u C2) <s C or even
+  // (x /u C2) <u C.  Simply casting the operands and result won't
   // work. :(  The if statement below tests that condition and bails
   // if it finds it.
-  bool DivIsSigned = DivI->getOpcode() == Instruction::SDiv;
-  if (!ICI.isEquality() && DivIsSigned != ICI.isSigned())
+  bool DivIsSigned = Div->getOpcode() == Instruction::SDiv;
+  if (!Cmp.isEquality() && DivIsSigned != Cmp.isSigned())
+    return nullptr;
+
+  // The ProdOV computation fails on divide by 0 and divide by -1. Cases with
+  // INT_MIN will also fail if the divisor is 1. Although folds of all these
+  // division-by-constant cases should be present, we can not assert that they
+  // have happened before we reach this icmp instruction.
+  if (*C2 == 0 || *C2 == 1 || (DivIsSigned && C2->isAllOnesValue()))
     return nullptr;
-  if (DivRHS->isZero())
-    return nullptr; // The ProdOV computation fails on divide by zero.
-  if (DivIsSigned && DivRHS->isAllOnesValue())
-    return nullptr; // The overflow computation also screws up here
-  if (DivRHS->isOne()) {
-    // This eliminates some funny cases with INT_MIN.
-    ICI.setOperand(0, DivI->getOperand(0));   // X/1 == X.
-    return &ICI;
-  }
-
-  // Compute Prod = CI * DivRHS. We are essentially solving an equation
-  // of form X/C1=C2. We solve for X by multiplying C1 (DivRHS) and
-  // C2 (CI). By solving for X we can turn this into a range check
-  // instead of computing a divide.
+
+  // TODO: We could do all of the computations below using APInt.
+  Constant *CmpRHS = cast<Constant>(Cmp.getOperand(1));
+  Constant *DivRHS = cast<Constant>(Div->getOperand(1));
+
+  // Compute Prod = CmpRHS * DivRHS. We are essentially solving an equation of
+  // form X / C2 = C. We solve for X by multiplying C2 (DivRHS) and C (CmpRHS).
+  // By solving for X, we can turn this into a range check instead of computing
+  // a divide.
   Constant *Prod = ConstantExpr::getMul(CmpRHS, DivRHS);
 
-  // Determine if the product overflows by seeing if the product is
-  // not equal to the divide. Make sure we do the same kind of divide
-  // as in the LHS instruction that we're folding.
-  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS) :
-                 ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
+  // Determine if the product overflows by seeing if the product is not equal to
+  // the divide. Make sure we do the same kind of divide as in the LHS
+  // instruction that we're folding.
+  bool ProdOV = (DivIsSigned ? ConstantExpr::getSDiv(Prod, DivRHS)
+                             : ConstantExpr::getUDiv(Prod, DivRHS)) != CmpRHS;
 
-  // Get the ICmp opcode
-  ICmpInst::Predicate Pred = ICI.getPredicate();
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
 
   // If the division is known to be exact, then there is no remainder from the
   // divide, so the covered range size is unit, otherwise it is the divisor.
-  ConstantInt *RangeSize = DivI->isExact() ? getOne(Prod) : DivRHS;
+  Constant *RangeSize =
+      Div->isExact() ? ConstantInt::get(Div->getType(), 1) : DivRHS;
 
   // Figure out the interval that is being checked.  For example, a comparison
   // like "X /u 5 == 0" is really checking that X is in the interval [0, 5).
@@ -1245,1134 +2180,1094 @@ Instruction *InstCombiner::FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
     if (!HiOverflow) {
       // If this is not an exact divide, then many values in the range collapse
       // to the same result value.
-      HiOverflow = AddWithOverflow(HiBound, LoBound, RangeSize, false);
+      HiOverflow = addWithOverflow(HiBound, LoBound, RangeSize, false);
     }
-  } else if (DivRHS->getValue().isStrictlyPositive()) { // Divisor is > 0.
-    if (CmpRHSV == 0) {       // (X / pos) op 0
+  } else if (C2->isStrictlyPositive()) { // Divisor is > 0.
+    if (*C == 0) {       // (X / pos) op 0
       // Can't overflow.  e.g.  X/2 op 0 --> [-1, 2)
       LoBound = ConstantExpr::getNeg(SubOne(RangeSize));
       HiBound = RangeSize;
-    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / pos) op pos
+    } else if (C->isStrictlyPositive()) {   // (X / pos) op pos
       LoBound = Prod;     // e.g.   X/5 op 3 --> [15, 20)
       HiOverflow = LoOverflow = ProdOV;
       if (!HiOverflow)
-        HiOverflow = AddWithOverflow(HiBound, Prod, RangeSize, true);
+        HiOverflow = addWithOverflow(HiBound, Prod, RangeSize, true);
     } else {                       // (X / pos) op neg
       // e.g. X/5 op -3  --> [-15-4, -15+1) --> [-19, -14)
       HiBound = AddOne(Prod);
       LoOverflow = HiOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow) {
-        ConstantInt *DivNeg =cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
-        LoOverflow = AddWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
+        Constant *DivNeg = ConstantExpr::getNeg(RangeSize);
+        LoOverflow = addWithOverflow(LoBound, HiBound, DivNeg, true) ? -1 : 0;
       }
     }
-  } else if (DivRHS->isNegative()) { // Divisor is < 0.
-    if (DivI->isExact())
-      RangeSize = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
-    if (CmpRHSV == 0) {       // (X / neg) op 0
+  } else if (C2->isNegative()) { // Divisor is < 0.
+    if (Div->isExact())
+      RangeSize = ConstantExpr::getNeg(RangeSize);
+    if (*C == 0) {       // (X / neg) op 0
       // e.g. X/-5 op 0  --> [-4, 5)
       LoBound = AddOne(RangeSize);
-      HiBound = cast<ConstantInt>(ConstantExpr::getNeg(RangeSize));
+      HiBound = ConstantExpr::getNeg(RangeSize);
       if (HiBound == DivRHS) {     // -INTMIN = INTMIN
         HiOverflow = 1;            // [INTMIN+1, overflow)
         HiBound = nullptr;         // e.g. X/INTMIN = 0 --> X > INTMIN
       }
-    } else if (CmpRHSV.isStrictlyPositive()) {   // (X / neg) op pos
+    } else if (C->isStrictlyPositive()) {   // (X / neg) op pos
       // e.g. X/-5 op 3  --> [-19, -14)
       HiBound = AddOne(Prod);
       HiOverflow = LoOverflow = ProdOV ? -1 : 0;
       if (!LoOverflow)
-        LoOverflow = AddWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
+        LoOverflow = addWithOverflow(LoBound, HiBound, RangeSize, true) ? -1:0;
     } else {                       // (X / neg) op neg
       LoBound = Prod;       // e.g. X/-5 op -3  --> [15, 20)
       LoOverflow = HiOverflow = ProdOV;
       if (!HiOverflow)
-        HiOverflow = SubWithOverflow(HiBound, Prod, RangeSize, true);
+        HiOverflow = subWithOverflow(HiBound, Prod, RangeSize, true);
     }
 
     // Dividing by a negative swaps the condition.  LT <-> GT
     Pred = ICmpInst::getSwappedPredicate(Pred);
   }
 
-  Value *X = DivI->getOperand(0);
+  Value *X = Div->getOperand(0);
   switch (Pred) {
-  default: llvm_unreachable("Unhandled icmp opcode!");
-  case ICmpInst::ICMP_EQ:
-    if (LoOverflow && HiOverflow)
-      return replaceInstUsesWith(ICI, Builder->getFalse());
-    if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
-                          ICmpInst::ICMP_UGE, X, LoBound);
-    if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
-                          ICmpInst::ICMP_ULT, X, HiBound);
-    return replaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
-                                                    DivIsSigned, true));
-  case ICmpInst::ICMP_NE:
-    if (LoOverflow && HiOverflow)
-      return replaceInstUsesWith(ICI, Builder->getTrue());
-    if (HiOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
-                          ICmpInst::ICMP_ULT, X, LoBound);
-    if (LoOverflow)
-      return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
-                          ICmpInst::ICMP_UGE, X, HiBound);
-    return replaceInstUsesWith(ICI, InsertRangeTest(X, LoBound, HiBound,
-                                                    DivIsSigned, false));
-  case ICmpInst::ICMP_ULT:
-  case ICmpInst::ICMP_SLT:
-    if (LoOverflow == +1)   // Low bound is greater than input range.
-      return replaceInstUsesWith(ICI, Builder->getTrue());
-    if (LoOverflow == -1)   // Low bound is less than input range.
-      return replaceInstUsesWith(ICI, Builder->getFalse());
-    return new ICmpInst(Pred, X, LoBound);
-  case ICmpInst::ICMP_UGT:
-  case ICmpInst::ICMP_SGT:
-    if (HiOverflow == +1)       // High bound greater than input range.
-      return replaceInstUsesWith(ICI, Builder->getFalse());
-    if (HiOverflow == -1)       // High bound less than input range.
-      return replaceInstUsesWith(ICI, Builder->getTrue());
-    if (Pred == ICmpInst::ICMP_UGT)
-      return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
-    return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
+    default: llvm_unreachable("Unhandled icmp opcode!");
+    case ICmpInst::ICMP_EQ:
+      if (LoOverflow && HiOverflow)
+        return replaceInstUsesWith(Cmp, Builder->getFalse());
+      if (HiOverflow)
+        return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                            ICmpInst::ICMP_UGE, X, LoBound);
+      if (LoOverflow)
+        return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                            ICmpInst::ICMP_ULT, X, HiBound);
+      return replaceInstUsesWith(
+          Cmp, insertRangeTest(X, LoBound->getUniqueInteger(),
+                               HiBound->getUniqueInteger(), DivIsSigned, true));
+    case ICmpInst::ICMP_NE:
+      if (LoOverflow && HiOverflow)
+        return replaceInstUsesWith(Cmp, Builder->getTrue());
+      if (HiOverflow)
+        return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SLT :
+                            ICmpInst::ICMP_ULT, X, LoBound);
+      if (LoOverflow)
+        return new ICmpInst(DivIsSigned ? ICmpInst::ICMP_SGE :
+                            ICmpInst::ICMP_UGE, X, HiBound);
+      return replaceInstUsesWith(Cmp,
+                                 insertRangeTest(X, LoBound->getUniqueInteger(),
+                                                 HiBound->getUniqueInteger(),
+                                                 DivIsSigned, false));
+    case ICmpInst::ICMP_ULT:
+    case ICmpInst::ICMP_SLT:
+      if (LoOverflow == +1)   // Low bound is greater than input range.
+        return replaceInstUsesWith(Cmp, Builder->getTrue());
+      if (LoOverflow == -1)   // Low bound is less than input range.
+        return replaceInstUsesWith(Cmp, Builder->getFalse());
+      return new ICmpInst(Pred, X, LoBound);
+    case ICmpInst::ICMP_UGT:
+    case ICmpInst::ICMP_SGT:
+      if (HiOverflow == +1)       // High bound greater than input range.
+        return replaceInstUsesWith(Cmp, Builder->getFalse());
+      if (HiOverflow == -1)       // High bound less than input range.
+        return replaceInstUsesWith(Cmp, Builder->getTrue());
+      if (Pred == ICmpInst::ICMP_UGT)
+        return new ICmpInst(ICmpInst::ICMP_UGE, X, HiBound);
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, HiBound);
   }
+
+  return nullptr;
 }
 
-/// Handle "icmp(([al]shr X, cst1), cst2)".
-Instruction *InstCombiner::FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *Shr,
-                                          ConstantInt *ShAmt) {
-  const APInt &CmpRHSV = cast<ConstantInt>(ICI.getOperand(1))->getValue();
+/// Fold icmp (sub X, Y), C.
+Instruction *InstCombiner::foldICmpSubConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Sub,
+                                               const APInt *C) {
+  Value *X = Sub->getOperand(0), *Y = Sub->getOperand(1);
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
 
-  // Check that the shift amount is in range.  If not, don't perform
-  // undefined shifts.  When the shift is visited it will be
-  // simplified.
-  uint32_t TypeBits = CmpRHSV.getBitWidth();
-  uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-  if (ShAmtVal >= TypeBits || ShAmtVal == 0)
+  // The following transforms are only worth it if the only user of the subtract
+  // is the icmp.
+  if (!Sub->hasOneUse())
     return nullptr;
 
-  if (!ICI.isEquality()) {
-    // If we have an unsigned comparison and an ashr, we can't simplify this.
-    // Similarly for signed comparisons with lshr.
-    if (ICI.isSigned() != (Shr->getOpcode() == Instruction::AShr))
-      return nullptr;
-
-    // Otherwise, all lshr and most exact ashr's are equivalent to a udiv/sdiv
-    // by a power of 2.  Since we already have logic to simplify these,
-    // transform to div and then simplify the resultant comparison.
-    if (Shr->getOpcode() == Instruction::AShr &&
-        (!Shr->isExact() || ShAmtVal == TypeBits - 1))
-      return nullptr;
-
-    // Revisit the shift (to delete it).
-    Worklist.Add(Shr);
+  if (Sub->hasNoSignedWrap()) {
+    // (icmp sgt (sub nsw X, Y), -1) -> (icmp sge X, Y)
+    if (Pred == ICmpInst::ICMP_SGT && C->isAllOnesValue())
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
 
-    Constant *DivCst =
-      ConstantInt::get(Shr->getType(), APInt::getOneBitSet(TypeBits, ShAmtVal));
+    // (icmp sgt (sub nsw X, Y), 0) -> (icmp sgt X, Y)
+    if (Pred == ICmpInst::ICMP_SGT && *C == 0)
+      return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
 
-    Value *Tmp =
-      Shr->getOpcode() == Instruction::AShr ?
-      Builder->CreateSDiv(Shr->getOperand(0), DivCst, "", Shr->isExact()) :
-      Builder->CreateUDiv(Shr->getOperand(0), DivCst, "", Shr->isExact());
+    // (icmp slt (sub nsw X, Y), 0) -> (icmp slt X, Y)
+    if (Pred == ICmpInst::ICMP_SLT && *C == 0)
+      return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
 
-    ICI.setOperand(0, Tmp);
-
-    // If the builder folded the binop, just return it.
-    BinaryOperator *TheDiv = dyn_cast<BinaryOperator>(Tmp);
-    if (!TheDiv)
-      return &ICI;
-
-    // Otherwise, fold this div/compare.
-    assert(TheDiv->getOpcode() == Instruction::SDiv ||
-           TheDiv->getOpcode() == Instruction::UDiv);
-
-    Instruction *Res = FoldICmpDivCst(ICI, TheDiv, cast<ConstantInt>(DivCst));
-    assert(Res && "This div/cst should have folded!");
-    return Res;
+    // (icmp slt (sub nsw X, Y), 1) -> (icmp sle X, Y)
+    if (Pred == ICmpInst::ICMP_SLT && *C == 1)
+      return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
   }
 
-  // If we are comparing against bits always shifted out, the
-  // comparison cannot succeed.
-  APInt Comp = CmpRHSV << ShAmtVal;
-  ConstantInt *ShiftedCmpRHS = Builder->getInt(Comp);
-  if (Shr->getOpcode() == Instruction::LShr)
-    Comp = Comp.lshr(ShAmtVal);
-  else
-    Comp = Comp.ashr(ShAmtVal);
+  const APInt *C2;
+  if (!match(X, m_APInt(C2)))
+    return nullptr;
 
-  if (Comp != CmpRHSV) { // Comparing against a bit that we know is zero.
-    bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-    Constant *Cst = Builder->getInt1(IsICMP_NE);
-    return replaceInstUsesWith(ICI, Cst);
-  }
+  // C2 - Y <u C -> (Y | (C - 1)) == C2
+  //   iff (C2 & (C - 1)) == C - 1 and C is a power of 2
+  if (Pred == ICmpInst::ICMP_ULT && C->isPowerOf2() &&
+      (*C2 & (*C - 1)) == (*C - 1))
+    return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateOr(Y, *C - 1), X);
 
-  // Otherwise, check to see if the bits shifted out are known to be zero.
-  // If so, we can compare against the unshifted value:
-  //  (X & 4) >> 1 == 2  --> (X & 4) == 4.
-  if (Shr->hasOneUse() && Shr->isExact())
-    return new ICmpInst(ICI.getPredicate(), Shr->getOperand(0), ShiftedCmpRHS);
+  // C2 - Y >u C -> (Y | C) != C2
+  //   iff C2 & C == C and C + 1 is a power of 2
+  if (Pred == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() && (*C2 & *C) == *C)
+    return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateOr(Y, *C), X);
 
-  if (Shr->hasOneUse()) {
-    // Otherwise strength reduce the shift into an and.
-    APInt Val(APInt::getHighBitsSet(TypeBits, TypeBits - ShAmtVal));
-    Constant *Mask = Builder->getInt(Val);
-
-    Value *And = Builder->CreateAnd(Shr->getOperand(0),
-                                    Mask, Shr->getName()+".mask");
-    return new ICmpInst(ICI.getPredicate(), And, ShiftedCmpRHS);
-  }
   return nullptr;
 }
 
-/// Handle "(icmp eq/ne (ashr/lshr const2, A), const1)" ->
-/// (icmp eq/ne A, Log2(const2/const1)) ->
-/// (icmp eq/ne A, Log2(const2) - Log2(const1)).
-Instruction *InstCombiner::FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
-                                             ConstantInt *CI1,
-                                             ConstantInt *CI2) {
-  assert(I.isEquality() && "Cannot fold icmp gt/lt");
-
-  auto getConstant = [&I, this](bool IsTrue) {
-    if (I.getPredicate() == I.ICMP_NE)
-      IsTrue = !IsTrue;
-    return replaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
-  };
-
-  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
-    if (I.getPredicate() == I.ICMP_NE)
-      Pred = CmpInst::getInversePredicate(Pred);
-    return new ICmpInst(Pred, LHS, RHS);
-  };
+/// Fold icmp (add X, Y), C.
+Instruction *InstCombiner::foldICmpAddConstant(ICmpInst &Cmp,
+                                               BinaryOperator *Add,
+                                               const APInt *C) {
+  Value *Y = Add->getOperand(1);
+  const APInt *C2;
+  if (Cmp.isEquality() || !match(Y, m_APInt(C2)))
+    return nullptr;
 
-  const APInt &AP1 = CI1->getValue();
-  const APInt &AP2 = CI2->getValue();
+  // Fold icmp pred (add X, C2), C.
+  Value *X = Add->getOperand(0);
+  Type *Ty = Add->getType();
+  auto CR =
+      ConstantRange::makeExactICmpRegion(Cmp.getPredicate(), *C).subtract(*C2);
+  const APInt &Upper = CR.getUpper();
+  const APInt &Lower = CR.getLower();
+  if (Cmp.isSigned()) {
+    if (Lower.isSignBit())
+      return new ICmpInst(ICmpInst::ICMP_SLT, X, ConstantInt::get(Ty, Upper));
+    if (Upper.isSignBit())
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, ConstantInt::get(Ty, Lower));
+  } else {
+    if (Lower.isMinValue())
+      return new ICmpInst(ICmpInst::ICMP_ULT, X, ConstantInt::get(Ty, Upper));
+    if (Upper.isMinValue())
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, ConstantInt::get(Ty, Lower));
+  }
 
-  // Don't bother doing any work for cases which InstSimplify handles.
-  if (AP2 == 0)
+  if (!Add->hasOneUse())
     return nullptr;
-  bool IsAShr = isa<AShrOperator>(Op);
-  if (IsAShr) {
-    if (AP2.isAllOnesValue())
-      return nullptr;
-    if (AP2.isNegative() != AP1.isNegative())
-      return nullptr;
-    if (AP2.sgt(AP1))
-      return nullptr;
-  }
 
-  if (!AP1)
-    // 'A' must be large enough to shift out the highest set bit.
-    return getICmp(I.ICMP_UGT, A,
-                   ConstantInt::get(A->getType(), AP2.logBase2()));
+  // X+C <u C2 -> (X & -C2) == C
+  //   iff C & (C2-1) == 0
+  //       C2 is a power of 2
+  if (Cmp.getPredicate() == ICmpInst::ICMP_ULT && C->isPowerOf2() &&
+      (*C2 & (*C - 1)) == 0)
+    return new ICmpInst(ICmpInst::ICMP_EQ, Builder->CreateAnd(X, -(*C)),
+                        ConstantExpr::getNeg(cast<Constant>(Y)));
+
+  // X+C >u C2 -> (X & ~C2) != C
+  //   iff C & C2 == 0
+  //       C2+1 is a power of 2
+  if (Cmp.getPredicate() == ICmpInst::ICMP_UGT && (*C + 1).isPowerOf2() &&
+      (*C2 & *C) == 0)
+    return new ICmpInst(ICmpInst::ICMP_NE, Builder->CreateAnd(X, ~(*C)),
+                        ConstantExpr::getNeg(cast<Constant>(Y)));
 
-  if (AP1 == AP2)
-    return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+  return nullptr;
+}
 
-  int Shift;
-  if (IsAShr && AP1.isNegative())
-    Shift = AP1.countLeadingOnes() - AP2.countLeadingOnes();
-  else
-    Shift = AP1.countLeadingZeros() - AP2.countLeadingZeros();
+/// Try to fold integer comparisons with a constant operand: icmp Pred X, C
+/// where X is some kind of instruction.
+Instruction *InstCombiner::foldICmpInstWithConstant(ICmpInst &Cmp) {
+  const APInt *C;
+  if (!match(Cmp.getOperand(1), m_APInt(C)))
+    return nullptr;
 
-  if (Shift > 0) {
-    if (IsAShr && AP1 == AP2.ashr(Shift)) {
-      // There are multiple solutions if we are comparing against -1 and the LHS
-      // of the ashr is not a power of two.
-      if (AP1.isAllOnesValue() && !AP2.isPowerOf2())
-        return getICmp(I.ICMP_UGE, A, ConstantInt::get(A->getType(), Shift));
-      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
-    } else if (AP1 == AP2.lshr(Shift)) {
-      return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+  BinaryOperator *BO;
+  if (match(Cmp.getOperand(0), m_BinOp(BO))) {
+    switch (BO->getOpcode()) {
+    case Instruction::Xor:
+      if (Instruction *I = foldICmpXorConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::And:
+      if (Instruction *I = foldICmpAndConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::Or:
+      if (Instruction *I = foldICmpOrConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::Mul:
+      if (Instruction *I = foldICmpMulConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::Shl:
+      if (Instruction *I = foldICmpShlConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::LShr:
+    case Instruction::AShr:
+      if (Instruction *I = foldICmpShrConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::UDiv:
+      if (Instruction *I = foldICmpUDivConstant(Cmp, BO, C))
+        return I;
+      LLVM_FALLTHROUGH;
+    case Instruction::SDiv:
+      if (Instruction *I = foldICmpDivConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::Sub:
+      if (Instruction *I = foldICmpSubConstant(Cmp, BO, C))
+        return I;
+      break;
+    case Instruction::Add:
+      if (Instruction *I = foldICmpAddConstant(Cmp, BO, C))
+        return I;
+      break;
+    default:
+      break;
     }
+    // TODO: These folds could be refactored to be part of the above calls.
+    if (Instruction *I = foldICmpBinOpEqualityWithConstant(Cmp, BO, C))
+      return I;
   }
-  // Shifting const2 will never be equal to const1.
-  return getConstant(false);
-}
 
-/// Handle "(icmp eq/ne (shl const2, A), const1)" ->
-/// (icmp eq/ne A, TrailingZeros(const1) - TrailingZeros(const2)).
-Instruction *InstCombiner::FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
-                                             ConstantInt *CI1,
-                                             ConstantInt *CI2) {
-  assert(I.isEquality() && "Cannot fold icmp gt/lt");
+  Instruction *LHSI;
+  if (match(Cmp.getOperand(0), m_Instruction(LHSI)) &&
+      LHSI->getOpcode() == Instruction::Trunc)
+    if (Instruction *I = foldICmpTruncConstant(Cmp, LHSI, C))
+      return I;
 
-  auto getConstant = [&I, this](bool IsTrue) {
-    if (I.getPredicate() == I.ICMP_NE)
-      IsTrue = !IsTrue;
-    return replaceInstUsesWith(I, ConstantInt::get(I.getType(), IsTrue));
-  };
+  if (Instruction *I = foldICmpIntrinsicWithConstant(Cmp, C))
+    return I;
 
-  auto getICmp = [&I](CmpInst::Predicate Pred, Value *LHS, Value *RHS) {
-    if (I.getPredicate() == I.ICMP_NE)
-      Pred = CmpInst::getInversePredicate(Pred);
-    return new ICmpInst(Pred, LHS, RHS);
-  };
-
-  const APInt &AP1 = CI1->getValue();
-  const APInt &AP2 = CI2->getValue();
+  return nullptr;
+}
 
-  // Don't bother doing any work for cases which InstSimplify handles.
-  if (AP2 == 0)
+/// Fold an icmp equality instruction with binary operator LHS and constant RHS:
+/// icmp eq/ne BO, C.
+Instruction *InstCombiner::foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
+                                                             BinaryOperator *BO,
+                                                             const APInt *C) {
+  // TODO: Some of these folds could work with arbitrary constants, but this
+  // function is limited to scalar and vector splat constants.
+  if (!Cmp.isEquality())
     return nullptr;
 
-  unsigned AP2TrailingZeros = AP2.countTrailingZeros();
-
-  if (!AP1 && AP2TrailingZeros != 0)
-    return getICmp(I.ICMP_UGE, A,
-                   ConstantInt::get(A->getType(), AP2.getBitWidth() - AP2TrailingZeros));
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  bool isICMP_NE = Pred == ICmpInst::ICMP_NE;
+  Constant *RHS = cast<Constant>(Cmp.getOperand(1));
+  Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
+
+  switch (BO->getOpcode()) {
+  case Instruction::SRem:
+    // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
+    if (*C == 0 && BO->hasOneUse()) {
+      const APInt *BOC;
+      if (match(BOp1, m_APInt(BOC)) && BOC->sgt(1) && BOC->isPowerOf2()) {
+        Value *NewRem = Builder->CreateURem(BOp0, BOp1, BO->getName());
+        return new ICmpInst(Pred, NewRem,
+                            Constant::getNullValue(BO->getType()));
+      }
+    }
+    break;
+  case Instruction::Add: {
+    // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
+    const APInt *BOC;
+    if (match(BOp1, m_APInt(BOC))) {
+      if (BO->hasOneUse()) {
+        Constant *SubC = ConstantExpr::getSub(RHS, cast<Constant>(BOp1));
+        return new ICmpInst(Pred, BOp0, SubC);
+      }
+    } else if (*C == 0) {
+      // Replace ((add A, B) != 0) with (A != -B) if A or B is
+      // efficiently invertible, or if the add has just this one use.
+      if (Value *NegVal = dyn_castNegVal(BOp1))
+        return new ICmpInst(Pred, BOp0, NegVal);
+      if (Value *NegVal = dyn_castNegVal(BOp0))
+        return new ICmpInst(Pred, NegVal, BOp1);
+      if (BO->hasOneUse()) {
+        Value *Neg = Builder->CreateNeg(BOp1);
+        Neg->takeName(BO);
+        return new ICmpInst(Pred, BOp0, Neg);
+      }
+    }
+    break;
+  }
+  case Instruction::Xor:
+    if (BO->hasOneUse()) {
+      if (Constant *BOC = dyn_cast<Constant>(BOp1)) {
+        // For the xor case, we can xor two constants together, eliminating
+        // the explicit xor.
+        return new ICmpInst(Pred, BOp0, ConstantExpr::getXor(RHS, BOC));
+      } else if (*C == 0) {
+        // Replace ((xor A, B) != 0) with (A != B)
+        return new ICmpInst(Pred, BOp0, BOp1);
+      }
+    }
+    break;
+  case Instruction::Sub:
+    if (BO->hasOneUse()) {
+      const APInt *BOC;
+      if (match(BOp0, m_APInt(BOC))) {
+        // Replace ((sub BOC, B) != C) with (B != BOC-C).
+        Constant *SubC = ConstantExpr::getSub(cast<Constant>(BOp0), RHS);
+        return new ICmpInst(Pred, BOp1, SubC);
+      } else if (*C == 0) {
+        // Replace ((sub A, B) != 0) with (A != B).
+        return new ICmpInst(Pred, BOp0, BOp1);
+      }
+    }
+    break;
+  case Instruction::Or: {
+    const APInt *BOC;
+    if (match(BOp1, m_APInt(BOC)) && BO->hasOneUse() && RHS->isAllOnesValue()) {
+      // Comparing if all bits outside of a constant mask are set?
+      // Replace (X | C) == -1 with (X & ~C) == ~C.
+      // This removes the -1 constant.
+      Constant *NotBOC = ConstantExpr::getNot(cast<Constant>(BOp1));
+      Value *And = Builder->CreateAnd(BOp0, NotBOC);
+      return new ICmpInst(Pred, And, NotBOC);
+    }
+    break;
+  }
+  case Instruction::And: {
+    const APInt *BOC;
+    if (match(BOp1, m_APInt(BOC))) {
+      // If we have ((X & C) == C), turn it into ((X & C) != 0).
+      if (C == BOC && C->isPowerOf2())
+        return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
+                            BO, Constant::getNullValue(RHS->getType()));
+
+      // Don't perform the following transforms if the AND has multiple uses
+      if (!BO->hasOneUse())
+        break;
 
-  if (AP1 == AP2)
-    return getICmp(I.ICMP_EQ, A, ConstantInt::getNullValue(A->getType()));
+      // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
+      if (BOC->isSignBit()) {
+        Constant *Zero = Constant::getNullValue(BOp0->getType());
+        auto NewPred = isICMP_NE ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
+        return new ICmpInst(NewPred, BOp0, Zero);
+      }
 
-  // Get the distance between the lowest bits that are set.
-  int Shift = AP1.countTrailingZeros() - AP2TrailingZeros;
+      // ((X & ~7) == 0) --> X < 8
+      if (*C == 0 && (~(*BOC) + 1).isPowerOf2()) {
+        Constant *NegBOC = ConstantExpr::getNeg(cast<Constant>(BOp1));
+        auto NewPred = isICMP_NE ? ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
+        return new ICmpInst(NewPred, BOp0, NegBOC);
+      }
+    }
+    break;
+  }
+  case Instruction::Mul:
+    if (*C == 0 && BO->hasNoSignedWrap()) {
+      const APInt *BOC;
+      if (match(BOp1, m_APInt(BOC)) && *BOC != 0) {
+        // The trivial case (mul X, 0) is handled by InstSimplify.
+        // General case : (mul X, C) != 0 iff X != 0
+        //                (mul X, C) == 0 iff X == 0
+        return new ICmpInst(Pred, BOp0, Constant::getNullValue(RHS->getType()));
+      }
+    }
+    break;
+  case Instruction::UDiv:
+    if (*C == 0) {
+      // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
+      auto NewPred = isICMP_NE ? ICmpInst::ICMP_ULE : ICmpInst::ICMP_UGT;
+      return new ICmpInst(NewPred, BOp1, BOp0);
+    }
+    break;
+  default:
+    break;
+  }
+  return nullptr;
+}
 
-  if (Shift > 0 && AP2.shl(Shift) == AP1)
-    return getICmp(I.ICMP_EQ, A, ConstantInt::get(A->getType(), Shift));
+/// Fold an icmp with LLVM intrinsic and constant operand: icmp Pred II, C.
+Instruction *InstCombiner::foldICmpIntrinsicWithConstant(ICmpInst &Cmp,
+                                                         const APInt *C) {
+  IntrinsicInst *II = dyn_cast<IntrinsicInst>(Cmp.getOperand(0));
+  if (!II || !Cmp.isEquality())
+    return nullptr;
 
-  // Shifting const2 will never be equal to const1.
-  return getConstant(false);
+  // Handle icmp {eq|ne} <intrinsic>, intcst.
+  switch (II->getIntrinsicID()) {
+  case Intrinsic::bswap:
+    Worklist.Add(II);
+    Cmp.setOperand(0, II->getArgOperand(0));
+    Cmp.setOperand(1, Builder->getInt(C->byteSwap()));
+    return &Cmp;
+  case Intrinsic::ctlz:
+  case Intrinsic::cttz:
+    // ctz(A) == bitwidth(A)  ->  A == 0 and likewise for !=
+    if (*C == C->getBitWidth()) {
+      Worklist.Add(II);
+      Cmp.setOperand(0, II->getArgOperand(0));
+      Cmp.setOperand(1, ConstantInt::getNullValue(II->getType()));
+      return &Cmp;
+    }
+    break;
+  case Intrinsic::ctpop: {
+    // popcount(A) == 0  ->  A == 0 and likewise for !=
+    // popcount(A) == bitwidth(A)  ->  A == -1 and likewise for !=
+    bool IsZero = *C == 0;
+    if (IsZero || *C == C->getBitWidth()) {
+      Worklist.Add(II);
+      Cmp.setOperand(0, II->getArgOperand(0));
+      auto *NewOp = IsZero ? Constant::getNullValue(II->getType())
+                           : Constant::getAllOnesValue(II->getType());
+      Cmp.setOperand(1, NewOp);
+      return &Cmp;
+    }
+    break;
+  }
+  default:
+    break;
+  }
+  return nullptr;
 }
 
-/// Handle "icmp (instr, intcst)".
-Instruction *InstCombiner::visitICmpInstWithInstAndIntCst(ICmpInst &ICI,
-                                                          Instruction *LHSI,
-                                                          ConstantInt *RHS) {
-  const APInt &RHSV = RHS->getValue();
+/// Handle icmp with constant (but not simple integer constant) RHS.
+Instruction *InstCombiner::foldICmpInstWithConstantNotInt(ICmpInst &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  Constant *RHSC = dyn_cast<Constant>(Op1);
+  Instruction *LHSI = dyn_cast<Instruction>(Op0);
+  if (!RHSC || !LHSI)
+    return nullptr;
 
   switch (LHSI->getOpcode()) {
-  case Instruction::Trunc:
-    if (RHS->isOne() && RHSV.getBitWidth() > 1) {
-      // icmp slt trunc(signum(V)) 1 --> icmp slt V, 1
-      Value *V = nullptr;
-      if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
-          match(LHSI->getOperand(0), m_Signum(m_Value(V))))
-        return new ICmpInst(ICmpInst::ICMP_SLT, V,
-                            ConstantInt::get(V->getType(), 1));
+  case Instruction::GetElementPtr:
+    // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
+    if (RHSC->isNullValue() &&
+        cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
+      return new ICmpInst(
+          I.getPredicate(), LHSI->getOperand(0),
+          Constant::getNullValue(LHSI->getOperand(0)->getType()));
+    break;
+  case Instruction::PHI:
+    // Only fold icmp into the PHI if the phi and icmp are in the same
+    // block.  If in the same block, we're encouraging jump threading.  If
+    // not, we are just pessimizing the code by making an i1 phi.
+    if (LHSI->getParent() == I.getParent())
+      if (Instruction *NV = FoldOpIntoPhi(I))
+        return NV;
+    break;
+  case Instruction::Select: {
+    // If either operand of the select is a constant, we can fold the
+    // comparison into the select arms, which will cause one to be
+    // constant folded and the select turned into a bitwise or.
+    Value *Op1 = nullptr, *Op2 = nullptr;
+    ConstantInt *CI = nullptr;
+    if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
+      Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+      CI = dyn_cast<ConstantInt>(Op1);
     }
-    if (ICI.isEquality() && LHSI->hasOneUse()) {
-      // Simplify icmp eq (trunc x to i8), 42 -> icmp eq x, 42|highbits if all
-      // of the high bits truncated out of x are known.
-      unsigned DstBits = LHSI->getType()->getPrimitiveSizeInBits(),
-             SrcBits = LHSI->getOperand(0)->getType()->getPrimitiveSizeInBits();
-      APInt KnownZero(SrcBits, 0), KnownOne(SrcBits, 0);
-      computeKnownBits(LHSI->getOperand(0), KnownZero, KnownOne, 0, &ICI);
-
-      // If all the high bits are known, we can do this xform.
-      if ((KnownZero|KnownOne).countLeadingOnes() >= SrcBits-DstBits) {
-        // Pull in the high bits from known-ones set.
-        APInt NewRHS = RHS->getValue().zext(SrcBits);
-        NewRHS |= KnownOne & APInt::getHighBitsSet(SrcBits, SrcBits-DstBits);
-        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            Builder->getInt(NewRHS));
-      }
+    if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
+      Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
+      CI = dyn_cast<ConstantInt>(Op2);
     }
+
+    // We only want to perform this transformation if it will not lead to
+    // additional code. This is true if either both sides of the select
+    // fold to a constant (in which case the icmp is replaced with a select
+    // which will usually simplify) or this is the only user of the
+    // select (in which case we are trading a select+icmp for a simpler
+    // select+icmp) or all uses of the select can be replaced based on
+    // dominance information ("Global cases").
+    bool Transform = false;
+    if (Op1 && Op2)
+      Transform = true;
+    else if (Op1 || Op2) {
+      // Local case
+      if (LHSI->hasOneUse())
+        Transform = true;
+      // Global cases
+      else if (CI && !CI->isZero())
+        // When Op1 is constant try replacing select with second operand.
+        // Otherwise Op2 is constant and try replacing select with first
+        // operand.
+        Transform =
+            replacedSelectWithOperand(cast<SelectInst>(LHSI), &I, Op1 ? 2 : 1);
+    }
+    if (Transform) {
+      if (!Op1)
+        Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1), RHSC,
+                                  I.getName());
+      if (!Op2)
+        Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2), RHSC,
+                                  I.getName());
+      return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
+    }
+    break;
+  }
+  case Instruction::IntToPtr:
+    // icmp pred inttoptr(X), null -> icmp pred X, 0
+    if (RHSC->isNullValue() &&
+        DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
+      return new ICmpInst(
+          I.getPredicate(), LHSI->getOperand(0),
+          Constant::getNullValue(LHSI->getOperand(0)->getType()));
     break;
 
-  case Instruction::Xor:         // (icmp pred (xor X, XorCst), CI)
-    if (ConstantInt *XorCst = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      // If this is a comparison that tests the signbit (X < 0) or (x > -1),
-      // fold the xor.
-      if ((ICI.getPredicate() == ICmpInst::ICMP_SLT && RHSV == 0) ||
-          (ICI.getPredicate() == ICmpInst::ICMP_SGT && RHSV.isAllOnesValue())) {
-        Value *CompareVal = LHSI->getOperand(0);
-
-        // If the sign bit of the XorCst is not set, there is no change to
-        // the operation, just stop using the Xor.
-        if (!XorCst->isNegative()) {
-          ICI.setOperand(0, CompareVal);
-          Worklist.Add(LHSI);
-          return &ICI;
-        }
+  case Instruction::Load:
+    // Try to optimize things like "A[i] > 4" to index computations.
+    if (GetElementPtrInst *GEP =
+            dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+      if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+        if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+            !cast<LoadInst>(LHSI)->isVolatile())
+          if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
+            return Res;
+    }
+    break;
+  }
 
-        // Was the old condition true if the operand is positive?
-        bool isTrueIfPositive = ICI.getPredicate() == ICmpInst::ICMP_SGT;
+  return nullptr;
+}
 
-        // If so, the new one isn't.
-        isTrueIfPositive ^= true;
+/// Try to fold icmp (binop), X or icmp X, (binop).
+Instruction *InstCombiner::foldICmpBinOp(ICmpInst &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
-        if (isTrueIfPositive)
-          return new ICmpInst(ICmpInst::ICMP_SGT, CompareVal,
-                              SubOne(RHS));
-        else
-          return new ICmpInst(ICmpInst::ICMP_SLT, CompareVal,
-                              AddOne(RHS));
-      }
+  // Special logic for binary operators.
+  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
+  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
+  if (!BO0 && !BO1)
+    return nullptr;
 
-      if (LHSI->hasOneUse()) {
-        // (icmp u/s (xor A SignBit), C) -> (icmp s/u A, (xor C SignBit))
-        if (!ICI.isEquality() && XorCst->getValue().isSignBit()) {
-          const APInt &SignBit = XorCst->getValue();
-          ICmpInst::Predicate Pred = ICI.isSigned()
-                                         ? ICI.getUnsignedPredicate()
-                                         : ICI.getSignedPredicate();
-          return new ICmpInst(Pred, LHSI->getOperand(0),
-                              Builder->getInt(RHSV ^ SignBit));
-        }
+  CmpInst::Predicate Pred = I.getPredicate();
+  bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
+  if (BO0 && isa<OverflowingBinaryOperator>(BO0))
+    NoOp0WrapProblem =
+        ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
+  if (BO1 && isa<OverflowingBinaryOperator>(BO1))
+    NoOp1WrapProblem =
+        ICmpInst::isEquality(Pred) ||
+        (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
+        (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
 
-        // (icmp u/s (xor A ~SignBit), C) -> (icmp s/u (xor C ~SignBit), A)
-        if (!ICI.isEquality() && XorCst->isMaxValue(true)) {
-          const APInt &NotSignBit = XorCst->getValue();
-          ICmpInst::Predicate Pred = ICI.isSigned()
-                                         ? ICI.getUnsignedPredicate()
-                                         : ICI.getSignedPredicate();
-          Pred = ICI.getSwappedPredicate(Pred);
-          return new ICmpInst(Pred, LHSI->getOperand(0),
-                              Builder->getInt(RHSV ^ NotSignBit));
-        }
-      }
+  // Analyze the case when either Op0 or Op1 is an add instruction.
+  // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+  Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
+  if (BO0 && BO0->getOpcode() == Instruction::Add) {
+    A = BO0->getOperand(0);
+    B = BO0->getOperand(1);
+  }
+  if (BO1 && BO1->getOpcode() == Instruction::Add) {
+    C = BO1->getOperand(0);
+    D = BO1->getOperand(1);
+  }
 
-      // (icmp ugt (xor X, C), ~C) -> (icmp ult X, C)
-      //   iff -C is a power of 2
-      if (ICI.getPredicate() == ICmpInst::ICMP_UGT &&
-          XorCst->getValue() == ~RHSV && (RHSV + 1).isPowerOf2())
-        return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0), XorCst);
-
-      // (icmp ult (xor X, C), -C) -> (icmp uge X, C)
-      //   iff -C is a power of 2
-      if (ICI.getPredicate() == ICmpInst::ICMP_ULT &&
-          XorCst->getValue() == -RHSV && RHSV.isPowerOf2())
-        return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0), XorCst);
+  // icmp (X+cst) < 0 --> X < -cst
+  if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero()))
+    if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B))
+      if (!RHSC->isMinValue(/*isSigned=*/true))
+        return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC));
+
+  // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
+  if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
+    return new ICmpInst(Pred, A == Op1 ? B : A,
+                        Constant::getNullValue(Op1->getType()));
+
+  // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
+  if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
+    return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
+                        C == Op0 ? D : C);
+
+  // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
+  if (A && C && (A == C || A == D || B == C || B == D) && NoOp0WrapProblem &&
+      NoOp1WrapProblem &&
+      // Try not to increase register pressure.
+      BO0->hasOneUse() && BO1->hasOneUse()) {
+    // Determine Y and Z in the form icmp (X+Y), (X+Z).
+    Value *Y, *Z;
+    if (A == C) {
+      // C + B == C + D  ->  B == D
+      Y = B;
+      Z = D;
+    } else if (A == D) {
+      // D + B == C + D  ->  B == C
+      Y = B;
+      Z = C;
+    } else if (B == C) {
+      // A + C == C + D  ->  A == D
+      Y = A;
+      Z = D;
+    } else {
+      assert(B == D);
+      // A + D == C + D  ->  A == C
+      Y = A;
+      Z = C;
     }
-    break;
-  case Instruction::And:         // (icmp pred (and X, AndCst), RHS)
-    if (LHSI->hasOneUse() && isa<ConstantInt>(LHSI->getOperand(1)) &&
-        LHSI->getOperand(0)->hasOneUse()) {
-      ConstantInt *AndCst = cast<ConstantInt>(LHSI->getOperand(1));
-
-      // If the LHS is an AND of a truncating cast, we can widen the
-      // and/compare to be the input width without changing the value
-      // produced, eliminating a cast.
-      if (TruncInst *Cast = dyn_cast<TruncInst>(LHSI->getOperand(0))) {
-        // We can do this transformation if either the AND constant does not
-        // have its sign bit set or if it is an equality comparison.
-        // Extending a relational comparison when we're checking the sign
-        // bit would not work.
-        if (ICI.isEquality() ||
-            (!AndCst->isNegative() && RHSV.isNonNegative())) {
-          Value *NewAnd =
-            Builder->CreateAnd(Cast->getOperand(0),
-                               ConstantExpr::getZExt(AndCst, Cast->getSrcTy()));
-          NewAnd->takeName(LHSI);
-          return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              ConstantExpr::getZExt(RHS, Cast->getSrcTy()));
-        }
-      }
-
-      // If the LHS is an AND of a zext, and we have an equality compare, we can
-      // shrink the and/compare to the smaller type, eliminating the cast.
-      if (ZExtInst *Cast = dyn_cast<ZExtInst>(LHSI->getOperand(0))) {
-        IntegerType *Ty = cast<IntegerType>(Cast->getSrcTy());
-        // Make sure we don't compare the upper bits, SimplifyDemandedBits
-        // should fold the icmp to true/false in that case.
-        if (ICI.isEquality() && RHSV.getActiveBits() <= Ty->getBitWidth()) {
-          Value *NewAnd =
-            Builder->CreateAnd(Cast->getOperand(0),
-                               ConstantExpr::getTrunc(AndCst, Ty));
-          NewAnd->takeName(LHSI);
-          return new ICmpInst(ICI.getPredicate(), NewAnd,
-                              ConstantExpr::getTrunc(RHS, Ty));
-        }
-      }
-
-      // If this is: (X >> C1) & C2 != C3 (where any shift and any compare
-      // could exist), turn it into (X & (C2 << C1)) != (C3 << C1).  This
-      // happens a LOT in code produced by the C front-end, for bitfield
-      // access.
-      BinaryOperator *Shift = dyn_cast<BinaryOperator>(LHSI->getOperand(0));
-      if (Shift && !Shift->isShift())
-        Shift = nullptr;
-
-      ConstantInt *ShAmt;
-      ShAmt = Shift ? dyn_cast<ConstantInt>(Shift->getOperand(1)) : nullptr;
-
-      // This seemingly simple opportunity to fold away a shift turns out to
-      // be rather complicated. See PR17827
-      // ( http://llvm.org/bugs/show_bug.cgi?id=17827 ) for details.
-      if (ShAmt) {
-        bool CanFold = false;
-        unsigned ShiftOpcode = Shift->getOpcode();
-        if (ShiftOpcode == Instruction::AShr) {
-          // There may be some constraints that make this possible,
-          // but nothing simple has been discovered yet.
-          CanFold = false;
-        } else if (ShiftOpcode == Instruction::Shl) {
-          // For a left shift, we can fold if the comparison is not signed.
-          // We can also fold a signed comparison if the mask value and
-          // comparison value are not negative. These constraints may not be
-          // obvious, but we can prove that they are correct using an SMT
-          // solver.
-          if (!ICI.isSigned() || (!AndCst->isNegative() && !RHS->isNegative()))
-            CanFold = true;
-        } else if (ShiftOpcode == Instruction::LShr) {
-          // For a logical right shift, we can fold if the comparison is not
-          // signed. We can also fold a signed comparison if the shifted mask
-          // value and the shifted comparison value are not negative.
-          // These constraints may not be obvious, but we can prove that they
-          // are correct using an SMT solver.
-          if (!ICI.isSigned())
-            CanFold = true;
-          else {
-            ConstantInt *ShiftedAndCst =
-              cast<ConstantInt>(ConstantExpr::getShl(AndCst, ShAmt));
-            ConstantInt *ShiftedRHSCst =
-              cast<ConstantInt>(ConstantExpr::getShl(RHS, ShAmt));
-            
-            if (!ShiftedAndCst->isNegative() && !ShiftedRHSCst->isNegative())
-              CanFold = true;
-          }
-        }
+    return new ICmpInst(Pred, Y, Z);
+  }
 
-        if (CanFold) {
-          Constant *NewCst;
-          if (ShiftOpcode == Instruction::Shl)
-            NewCst = ConstantExpr::getLShr(RHS, ShAmt);
-          else
-            NewCst = ConstantExpr::getShl(RHS, ShAmt);
-
-          // Check to see if we are shifting out any of the bits being
-          // compared.
-          if (ConstantExpr::get(ShiftOpcode, NewCst, ShAmt) != RHS) {
-            // If we shifted bits out, the fold is not going to work out.
-            // As a special case, check to see if this means that the
-            // result is always true or false now.
-            if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-              return replaceInstUsesWith(ICI, Builder->getFalse());
-            if (ICI.getPredicate() == ICmpInst::ICMP_NE)
-              return replaceInstUsesWith(ICI, Builder->getTrue());
+  // icmp slt (X + -1), Y -> icmp sle X, Y
+  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
+      match(B, m_AllOnes()))
+    return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
+
+  // icmp sge (X + -1), Y -> icmp sgt X, Y
+  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
+      match(B, m_AllOnes()))
+    return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
+
+  // icmp sle (X + 1), Y -> icmp slt X, Y
+  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE && match(B, m_One()))
+    return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
+
+  // icmp sgt (X + 1), Y -> icmp sge X, Y
+  if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT && match(B, m_One()))
+    return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
+
+  // icmp sgt X, (Y + -1) -> icmp sge X, Y
+  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
+      match(D, m_AllOnes()))
+    return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
+
+  // icmp sle X, (Y + -1) -> icmp slt X, Y
+  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
+      match(D, m_AllOnes()))
+    return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
+
+  // icmp sge X, (Y + 1) -> icmp sgt X, Y
+  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE && match(D, m_One()))
+    return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
+
+  // icmp slt X, (Y + 1) -> icmp sle X, Y
+  if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT && match(D, m_One()))
+    return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
+
+  // if C1 has greater magnitude than C2:
+  //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
+  //  s.t. C3 = C1 - C2
+  //
+  // if C2 has greater magnitude than C1:
+  //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
+  //  s.t. C3 = C2 - C1
+  if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
+      (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
+    if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
+      if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
+        const APInt &AP1 = C1->getValue();
+        const APInt &AP2 = C2->getValue();
+        if (AP1.isNegative() == AP2.isNegative()) {
+          APInt AP1Abs = C1->getValue().abs();
+          APInt AP2Abs = C2->getValue().abs();
+          if (AP1Abs.uge(AP2Abs)) {
+            ConstantInt *C3 = Builder->getInt(AP1 - AP2);
+            Value *NewAdd = Builder->CreateNSWAdd(A, C3);
+            return new ICmpInst(Pred, NewAdd, C);
           } else {
-            ICI.setOperand(1, NewCst);
-            Constant *NewAndCst;
-            if (ShiftOpcode == Instruction::Shl)
-              NewAndCst = ConstantExpr::getLShr(AndCst, ShAmt);
-            else
-              NewAndCst = ConstantExpr::getShl(AndCst, ShAmt);
-            LHSI->setOperand(1, NewAndCst);
-            LHSI->setOperand(0, Shift->getOperand(0));
-            Worklist.Add(Shift); // Shift is dead.
-            return &ICI;
+            ConstantInt *C3 = Builder->getInt(AP2 - AP1);
+            Value *NewAdd = Builder->CreateNSWAdd(C, C3);
+            return new ICmpInst(Pred, A, NewAdd);
           }
         }
       }
 
-      // Turn ((X >> Y) & C) == 0  into  (X & (C << Y)) == 0.  The later is
-      // preferable because it allows the C<<Y expression to be hoisted out
-      // of a loop if Y is invariant and X is not.
-      if (Shift && Shift->hasOneUse() && RHSV == 0 &&
-          ICI.isEquality() && !Shift->isArithmeticShift() &&
-          !isa<Constant>(Shift->getOperand(0))) {
-        // Compute C << Y.
-        Value *NS;
-        if (Shift->getOpcode() == Instruction::LShr) {
-          NS = Builder->CreateShl(AndCst, Shift->getOperand(1));
-        } else {
-          // Insert a logical shift.
-          NS = Builder->CreateLShr(AndCst, Shift->getOperand(1));
-        }
+  // Analyze the case when either Op0 or Op1 is a sub instruction.
+  // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
+  A = nullptr;
+  B = nullptr;
+  C = nullptr;
+  D = nullptr;
+  if (BO0 && BO0->getOpcode() == Instruction::Sub) {
+    A = BO0->getOperand(0);
+    B = BO0->getOperand(1);
+  }
+  if (BO1 && BO1->getOpcode() == Instruction::Sub) {
+    C = BO1->getOperand(0);
+    D = BO1->getOperand(1);
+  }
 
-        // Compute X & (C << Y).
-        Value *NewAnd =
-          Builder->CreateAnd(Shift->getOperand(0), NS, LHSI->getName());
+  // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
+  if (A == Op1 && NoOp0WrapProblem)
+    return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
+
+  // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
+  if (C == Op0 && NoOp1WrapProblem)
+    return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
+
+  // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
+  if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
+      // Try not to increase register pressure.
+      BO0->hasOneUse() && BO1->hasOneUse())
+    return new ICmpInst(Pred, A, C);
+
+  // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
+  if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
+      // Try not to increase register pressure.
+      BO0->hasOneUse() && BO1->hasOneUse())
+    return new ICmpInst(Pred, D, B);
+
+  // icmp (0-X) < cst --> x > -cst
+  if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
+    Value *X;
+    if (match(BO0, m_Neg(m_Value(X))))
+      if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
+        if (!RHSC->isMinValue(/*isSigned=*/true))
+          return new ICmpInst(I.getSwappedPredicate(), X,
+                              ConstantExpr::getNeg(RHSC));
+  }
 
-        ICI.setOperand(0, NewAnd);
-        return &ICI;
-      }
+  BinaryOperator *SRem = nullptr;
+  // icmp (srem X, Y), Y
+  if (BO0 && BO0->getOpcode() == Instruction::SRem && Op1 == BO0->getOperand(1))
+    SRem = BO0;
+  // icmp Y, (srem X, Y)
+  else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
+           Op0 == BO1->getOperand(1))
+    SRem = BO1;
+  if (SRem) {
+    // We don't check hasOneUse to avoid increasing register pressure because
+    // the value we use is the same value this instruction was already using.
+    switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
+    default:
+      break;
+    case ICmpInst::ICMP_EQ:
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    case ICmpInst::ICMP_NE:
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    case ICmpInst::ICMP_SGT:
+    case ICmpInst::ICMP_SGE:
+      return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
+                          Constant::getAllOnesValue(SRem->getType()));
+    case ICmpInst::ICMP_SLT:
+    case ICmpInst::ICMP_SLE:
+      return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
+                          Constant::getNullValue(SRem->getType()));
+    }
+  }
 
-      // (icmp pred (and (or (lshr X, Y), X), 1), 0) -->
-      //    (icmp pred (and X, (or (shl 1, Y), 1), 0))
-      //
-      // iff pred isn't signed
-      {
-        Value *X, *Y, *LShr;
-        if (!ICI.isSigned() && RHSV == 0) {
-          if (match(LHSI->getOperand(1), m_One())) {
-            Constant *One = cast<Constant>(LHSI->getOperand(1));
-            Value *Or = LHSI->getOperand(0);
-            if (match(Or, m_Or(m_Value(LShr), m_Value(X))) &&
-                match(LShr, m_LShr(m_Specific(X), m_Value(Y)))) {
-              unsigned UsesRemoved = 0;
-              if (LHSI->hasOneUse())
-                ++UsesRemoved;
-              if (Or->hasOneUse())
-                ++UsesRemoved;
-              if (LShr->hasOneUse())
-                ++UsesRemoved;
-              Value *NewOr = nullptr;
-              // Compute X & ((1 << Y) | 1)
-              if (auto *C = dyn_cast<Constant>(Y)) {
-                if (UsesRemoved >= 1)
-                  NewOr =
-                      ConstantExpr::getOr(ConstantExpr::getNUWShl(One, C), One);
-              } else {
-                if (UsesRemoved >= 3)
-                  NewOr = Builder->CreateOr(Builder->CreateShl(One, Y,
-                                                               LShr->getName(),
-                                                               /*HasNUW=*/true),
-                                            One, Or->getName());
-              }
-              if (NewOr) {
-                Value *NewAnd = Builder->CreateAnd(X, NewOr, LHSI->getName());
-                ICI.setOperand(0, NewAnd);
-                return &ICI;
-              }
-            }
-          }
+  if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() && BO0->hasOneUse() &&
+      BO1->hasOneUse() && BO0->getOperand(1) == BO1->getOperand(1)) {
+    switch (BO0->getOpcode()) {
+    default:
+      break;
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Xor:
+      if (I.isEquality()) // a+x icmp eq/ne b+x --> a icmp b
+        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                            BO1->getOperand(0));
+      // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+        if (CI->getValue().isSignBit()) {
+          ICmpInst::Predicate Pred =
+              I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
+          return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
         }
-      }
 
-      // Replace ((X & AndCst) > RHSV) with ((X & AndCst) != 0), if any
-      // bit set in (X & AndCst) will produce a result greater than RHSV.
-      if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
-        unsigned NTZ = AndCst->getValue().countTrailingZeros();
-        if ((NTZ < AndCst->getBitWidth()) &&
-            APInt::getOneBitSet(AndCst->getBitWidth(), NTZ).ugt(RHSV))
-          return new ICmpInst(ICmpInst::ICMP_NE, LHSI,
-                              Constant::getNullValue(RHS->getType()));
+        if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) {
+          ICmpInst::Predicate Pred =
+              I.isSigned() ? I.getUnsignedPredicate() : I.getSignedPredicate();
+          Pred = I.getSwappedPredicate(Pred);
+          return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
+        }
       }
-    }
-
-    // Try to optimize things like "A[i]&42 == 0" to index computations.
-    if (LoadInst *LI = dyn_cast<LoadInst>(LHSI->getOperand(0))) {
-      if (GetElementPtrInst *GEP =
-          dyn_cast<GetElementPtrInst>(LI->getOperand(0)))
-        if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
-          if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
-              !LI->isVolatile() && isa<ConstantInt>(LHSI->getOperand(1))) {
-            ConstantInt *C = cast<ConstantInt>(LHSI->getOperand(1));
-            if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV,ICI, C))
-              return Res;
-          }
-    }
+      break;
+    case Instruction::Mul:
+      if (!I.isEquality())
+        break;
 
-    // X & -C == -C -> X >  u ~C
-    // X & -C != -C -> X <= u ~C
-    //   iff C is a power of 2
-    if (ICI.isEquality() && RHS == LHSI->getOperand(1) && (-RHSV).isPowerOf2())
-      return new ICmpInst(
-          ICI.getPredicate() == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_UGT
-                                                  : ICmpInst::ICMP_ULE,
-          LHSI->getOperand(0), SubOne(RHS));
-
-    // (icmp eq (and %A, C), 0) -> (icmp sgt (trunc %A), -1)
-    //   iff C is a power of 2
-    if (ICI.isEquality() && LHSI->hasOneUse() && match(RHS, m_Zero())) {
-      if (auto *CI = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-        const APInt &AI = CI->getValue();
-        int32_t ExactLogBase2 = AI.exactLogBase2();
-        if (ExactLogBase2 != -1 && DL.isLegalInteger(ExactLogBase2 + 1)) {
-          Type *NTy = IntegerType::get(ICI.getContext(), ExactLogBase2 + 1);
-          Value *Trunc = Builder->CreateTrunc(LHSI->getOperand(0), NTy);
-          return new ICmpInst(ICI.getPredicate() == ICmpInst::ICMP_EQ
-                                  ? ICmpInst::ICMP_SGE
-                                  : ICmpInst::ICMP_SLT,
-                              Trunc, Constant::getNullValue(NTy));
+      if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
+        // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
+        // Mask = -1 >> count-trailing-zeros(Cst).
+        if (!CI->isZero() && !CI->isOne()) {
+          const APInt &AP = CI->getValue();
+          ConstantInt *Mask = ConstantInt::get(
+              I.getContext(),
+              APInt::getLowBitsSet(AP.getBitWidth(),
+                                   AP.getBitWidth() - AP.countTrailingZeros()));
+          Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
+          Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
+          return new ICmpInst(I.getPredicate(), And1, And2);
         }
       }
+      break;
+    case Instruction::UDiv:
+    case Instruction::LShr:
+      if (I.isSigned())
+        break;
+      LLVM_FALLTHROUGH;
+    case Instruction::SDiv:
+    case Instruction::AShr:
+      if (!BO0->isExact() || !BO1->isExact())
+        break;
+      return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                          BO1->getOperand(0));
+    case Instruction::Shl: {
+      bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
+      bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+      if (!NUW && !NSW)
+        break;
+      if (!NSW && I.isSigned())
+        break;
+      return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
+                          BO1->getOperand(0));
     }
-    break;
-
-  case Instruction::Or: {
-    if (RHS->isOne()) {
-      // icmp slt signum(V) 1 --> icmp slt V, 1
-      Value *V = nullptr;
-      if (ICI.getPredicate() == ICmpInst::ICMP_SLT &&
-          match(LHSI, m_Signum(m_Value(V))))
-        return new ICmpInst(ICmpInst::ICMP_SLT, V,
-                            ConstantInt::get(V->getType(), 1));
     }
+  }
 
-    if (!ICI.isEquality() || !RHS->isNullValue() || !LHSI->hasOneUse())
-      break;
-    Value *P, *Q;
-    if (match(LHSI, m_Or(m_PtrToInt(m_Value(P)), m_PtrToInt(m_Value(Q))))) {
-      // Simplify icmp eq (or (ptrtoint P), (ptrtoint Q)), 0
-      // -> and (icmp eq P, null), (icmp eq Q, null).
-      Value *ICIP = Builder->CreateICmp(ICI.getPredicate(), P,
-                                        Constant::getNullValue(P->getType()));
-      Value *ICIQ = Builder->CreateICmp(ICI.getPredicate(), Q,
-                                        Constant::getNullValue(Q->getType()));
-      Instruction *Op;
-      if (ICI.getPredicate() == ICmpInst::ICMP_EQ)
-        Op = BinaryOperator::CreateAnd(ICIP, ICIQ);
-      else
-        Op = BinaryOperator::CreateOr(ICIP, ICIQ);
-      return Op;
+  if (BO0) {
+    // Transform  A & (L - 1) `ult` L --> L != 0
+    auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
+    auto BitwiseAnd =
+        m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+
+    if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
+      auto *Zero = Constant::getNullValue(BO0->getType());
+      return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
     }
-    break;
   }
 
-  case Instruction::Mul: {       // (icmp pred (mul X, Val), CI)
-    ConstantInt *Val = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!Val) break;
+  return nullptr;
+}
 
-    // If this is a signed comparison to 0 and the mul is sign preserving,
-    // use the mul LHS operand instead.
-    ICmpInst::Predicate pred = ICI.getPredicate();
-    if (isSignTest(pred, RHS) && !Val->isZero() &&
-        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
-      return new ICmpInst(Val->isNegative() ?
-                          ICmpInst::getSwappedPredicate(pred) : pred,
-                          LHSI->getOperand(0),
-                          Constant::getNullValue(RHS->getType()));
+/// Fold icmp Pred min|max(X, Y), X.
+static Instruction *foldICmpWithMinMax(ICmpInst &Cmp) {
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *Op0 = Cmp.getOperand(0);
+  Value *X = Cmp.getOperand(1);
+
+  // Canonicalize minimum or maximum operand to LHS of the icmp.
+  if (match(X, m_c_SMin(m_Specific(Op0), m_Value())) ||
+      match(X, m_c_SMax(m_Specific(Op0), m_Value())) ||
+      match(X, m_c_UMin(m_Specific(Op0), m_Value())) ||
+      match(X, m_c_UMax(m_Specific(Op0), m_Value()))) {
+    std::swap(Op0, X);
+    Pred = Cmp.getSwappedPredicate();
+  }
 
-    break;
+  Value *Y;
+  if (match(Op0, m_c_SMin(m_Specific(X), m_Value(Y)))) {
+    // smin(X, Y)  == X --> X s<= Y
+    // smin(X, Y) s>= X --> X s<= Y
+    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SGE)
+      return new ICmpInst(ICmpInst::ICMP_SLE, X, Y);
+
+    // smin(X, Y) != X --> X s> Y
+    // smin(X, Y) s< X --> X s> Y
+    if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SLT)
+      return new ICmpInst(ICmpInst::ICMP_SGT, X, Y);
+
+    // These cases should be handled in InstSimplify:
+    // smin(X, Y) s<= X --> true
+    // smin(X, Y) s> X --> false
+    return nullptr;
   }
 
-  case Instruction::Shl: {       // (icmp pred (shl X, ShAmt), CI)
-    uint32_t TypeBits = RHSV.getBitWidth();
-    ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-    if (!ShAmt) {
-      Value *X;
-      // (1 << X) pred P2 -> X pred Log2(P2)
-      if (match(LHSI, m_Shl(m_One(), m_Value(X)))) {
-        bool RHSVIsPowerOf2 = RHSV.isPowerOf2();
-        ICmpInst::Predicate Pred = ICI.getPredicate();
-        if (ICI.isUnsigned()) {
-          if (!RHSVIsPowerOf2) {
-            // (1 << X) <  30 -> X <= 4
-            // (1 << X) <= 30 -> X <= 4
-            // (1 << X) >= 30 -> X >  4
-            // (1 << X) >  30 -> X >  4
-            if (Pred == ICmpInst::ICMP_ULT)
-              Pred = ICmpInst::ICMP_ULE;
-            else if (Pred == ICmpInst::ICMP_UGE)
-              Pred = ICmpInst::ICMP_UGT;
-          }
-          unsigned RHSLog2 = RHSV.logBase2();
-
-          // (1 << X) >= 2147483648 -> X >= 31 -> X == 31
-          // (1 << X) <  2147483648 -> X <  31 -> X != 31
-          if (RHSLog2 == TypeBits-1) {
-            if (Pred == ICmpInst::ICMP_UGE)
-              Pred = ICmpInst::ICMP_EQ;
-            else if (Pred == ICmpInst::ICMP_ULT)
-              Pred = ICmpInst::ICMP_NE;
-          }
+  if (match(Op0, m_c_SMax(m_Specific(X), m_Value(Y)))) {
+    // smax(X, Y)  == X --> X s>= Y
+    // smax(X, Y) s<= X --> X s>= Y
+    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_SLE)
+      return new ICmpInst(ICmpInst::ICMP_SGE, X, Y);
 
-          return new ICmpInst(Pred, X,
-                              ConstantInt::get(RHS->getType(), RHSLog2));
-        } else if (ICI.isSigned()) {
-          if (RHSV.isAllOnesValue()) {
-            // (1 << X) <= -1 -> X == 31
-            if (Pred == ICmpInst::ICMP_SLE)
-              return new ICmpInst(ICmpInst::ICMP_EQ, X,
-                                  ConstantInt::get(RHS->getType(), TypeBits-1));
-
-            // (1 << X) >  -1 -> X != 31
-            if (Pred == ICmpInst::ICMP_SGT)
-              return new ICmpInst(ICmpInst::ICMP_NE, X,
-                                  ConstantInt::get(RHS->getType(), TypeBits-1));
-          } else if (!RHSV) {
-            // (1 << X) <  0 -> X == 31
-            // (1 << X) <= 0 -> X == 31
-            if (Pred == ICmpInst::ICMP_SLT || Pred == ICmpInst::ICMP_SLE)
-              return new ICmpInst(ICmpInst::ICMP_EQ, X,
-                                  ConstantInt::get(RHS->getType(), TypeBits-1));
-
-            // (1 << X) >= 0 -> X != 31
-            // (1 << X) >  0 -> X != 31
-            if (Pred == ICmpInst::ICMP_SGT || Pred == ICmpInst::ICMP_SGE)
-              return new ICmpInst(ICmpInst::ICMP_NE, X,
-                                  ConstantInt::get(RHS->getType(), TypeBits-1));
-          }
-        } else if (ICI.isEquality()) {
-          if (RHSVIsPowerOf2)
-            return new ICmpInst(
-                Pred, X, ConstantInt::get(RHS->getType(), RHSV.logBase2()));
-        }
-      }
-      break;
-    }
+    // smax(X, Y) != X --> X s< Y
+    // smax(X, Y) s> X --> X s< Y
+    if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_SGT)
+      return new ICmpInst(ICmpInst::ICMP_SLT, X, Y);
 
-    // Check that the shift amount is in range.  If not, don't perform
-    // undefined shifts.  When the shift is visited it will be
-    // simplified.
-    if (ShAmt->uge(TypeBits))
-      break;
+    // These cases should be handled in InstSimplify:
+    // smax(X, Y) s>= X --> true
+    // smax(X, Y) s< X --> false
+    return nullptr;
+  }
 
-    if (ICI.isEquality()) {
-      // If we are comparing against bits always shifted out, the
-      // comparison cannot succeed.
-      Constant *Comp =
-        ConstantExpr::getShl(ConstantExpr::getLShr(RHS, ShAmt),
-                                                                 ShAmt);
-      if (Comp != RHS) {// Comparing against a bit that we know is zero.
-        bool IsICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-        Constant *Cst = Builder->getInt1(IsICMP_NE);
-        return replaceInstUsesWith(ICI, Cst);
-      }
+  if (match(Op0, m_c_UMin(m_Specific(X), m_Value(Y)))) {
+    // umin(X, Y)  == X --> X u<= Y
+    // umin(X, Y) u>= X --> X u<= Y
+    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_UGE)
+      return new ICmpInst(ICmpInst::ICMP_ULE, X, Y);
 
-      // If the shift is NUW, then it is just shifting out zeros, no need for an
-      // AND.
-      if (cast<BinaryOperator>(LHSI)->hasNoUnsignedWrap())
-        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            ConstantExpr::getLShr(RHS, ShAmt));
-
-      // If the shift is NSW and we compare to 0, then it is just shifting out
-      // sign bits, no need for an AND either.
-      if (cast<BinaryOperator>(LHSI)->hasNoSignedWrap() && RHSV == 0)
-        return new ICmpInst(ICI.getPredicate(), LHSI->getOperand(0),
-                            ConstantExpr::getLShr(RHS, ShAmt));
-
-      if (LHSI->hasOneUse()) {
-        // Otherwise strength reduce the shift into an and.
-        uint32_t ShAmtVal = (uint32_t)ShAmt->getLimitedValue(TypeBits);
-        Constant *Mask = Builder->getInt(APInt::getLowBitsSet(TypeBits,
-                                                          TypeBits - ShAmtVal));
-
-        Value *And =
-          Builder->CreateAnd(LHSI->getOperand(0),Mask, LHSI->getName()+".mask");
-        return new ICmpInst(ICI.getPredicate(), And,
-                            ConstantExpr::getLShr(RHS, ShAmt));
-      }
-    }
+    // umin(X, Y) != X --> X u> Y
+    // umin(X, Y) u< X --> X u> Y
+    if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_ULT)
+      return new ICmpInst(ICmpInst::ICMP_UGT, X, Y);
 
-    // If this is a signed comparison to 0 and the shift is sign preserving,
-    // use the shift LHS operand instead.
-    ICmpInst::Predicate pred = ICI.getPredicate();
-    if (isSignTest(pred, RHS) &&
-        cast<BinaryOperator>(LHSI)->hasNoSignedWrap())
-      return new ICmpInst(pred,
-                          LHSI->getOperand(0),
-                          Constant::getNullValue(RHS->getType()));
-
-    // Otherwise, if this is a comparison of the sign bit, simplify to and/test.
-    bool TrueIfSigned = false;
-    if (LHSI->hasOneUse() &&
-        isSignBitCheck(ICI.getPredicate(), RHS, TrueIfSigned)) {
-      // (X << 31) <s 0  --> (X&1) != 0
-      Constant *Mask = ConstantInt::get(LHSI->getOperand(0)->getType(),
-                                        APInt::getOneBitSet(TypeBits,
-                                            TypeBits-ShAmt->getZExtValue()-1));
-      Value *And =
-        Builder->CreateAnd(LHSI->getOperand(0), Mask, LHSI->getName()+".mask");
-      return new ICmpInst(TrueIfSigned ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ,
-                          And, Constant::getNullValue(And->getType()));
-    }
+    // These cases should be handled in InstSimplify:
+    // umin(X, Y) u<= X --> true
+    // umin(X, Y) u> X --> false
+    return nullptr;
+  }
 
-    // Transform (icmp pred iM (shl iM %v, N), CI)
-    // -> (icmp pred i(M-N) (trunc %v iM to i(M-N)), (trunc (CI>>N))
-    // Transform the shl to a trunc if (trunc (CI>>N)) has no loss and M-N.
-    // This enables to get rid of the shift in favor of a trunc which can be
-    // free on the target. It has the additional benefit of comparing to a
-    // smaller constant, which will be target friendly.
-    unsigned Amt = ShAmt->getLimitedValue(TypeBits-1);
-    if (LHSI->hasOneUse() &&
-        Amt != 0 && RHSV.countTrailingZeros() >= Amt) {
-      Type *NTy = IntegerType::get(ICI.getContext(), TypeBits - Amt);
-      Constant *NCI = ConstantExpr::getTrunc(
-                        ConstantExpr::getAShr(RHS,
-                          ConstantInt::get(RHS->getType(), Amt)),
-                        NTy);
-      return new ICmpInst(ICI.getPredicate(),
-                          Builder->CreateTrunc(LHSI->getOperand(0), NTy),
-                          NCI);
-    }
+  if (match(Op0, m_c_UMax(m_Specific(X), m_Value(Y)))) {
+    // umax(X, Y)  == X --> X u>= Y
+    // umax(X, Y) u<= X --> X u>= Y
+    if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_ULE)
+      return new ICmpInst(ICmpInst::ICMP_UGE, X, Y);
 
-    break;
+    // umax(X, Y) != X --> X u< Y
+    // umax(X, Y) u> X --> X u< Y
+    if (Pred == CmpInst::ICMP_NE || Pred == CmpInst::ICMP_UGT)
+      return new ICmpInst(ICmpInst::ICMP_ULT, X, Y);
+
+    // These cases should be handled in InstSimplify:
+    // umax(X, Y) u>= X --> true
+    // umax(X, Y) u< X --> false
+    return nullptr;
   }
 
-  case Instruction::LShr:         // (icmp pred (shr X, ShAmt), CI)
-  case Instruction::AShr: {
-    // Handle equality comparisons of shift-by-constant.
-    BinaryOperator *BO = cast<BinaryOperator>(LHSI);
-    if (ConstantInt *ShAmt = dyn_cast<ConstantInt>(LHSI->getOperand(1))) {
-      if (Instruction *Res = FoldICmpShrCst(ICI, BO, ShAmt))
-        return Res;
-    }
+  return nullptr;
+}
 
-    // Handle exact shr's.
-    if (ICI.isEquality() && BO->isExact() && BO->hasOneUse()) {
-      if (RHSV.isMinValue())
-        return new ICmpInst(ICI.getPredicate(), BO->getOperand(0), RHS);
+Instruction *InstCombiner::foldICmpEquality(ICmpInst &I) {
+  if (!I.isEquality())
+    return nullptr;
+
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  Value *A, *B, *C, *D;
+  if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
+    if (A == Op1 || B == Op1) { // (A^B) == A  ->  B == 0
+      Value *OtherVal = A == Op1 ? B : A;
+      return new ICmpInst(I.getPredicate(), OtherVal,
+                          Constant::getNullValue(A->getType()));
     }
-    break;
-  }
 
-  case Instruction::UDiv:
-    if (ConstantInt *DivLHS = dyn_cast<ConstantInt>(LHSI->getOperand(0))) {
-      Value *X = LHSI->getOperand(1);
-      const APInt &C1 = RHS->getValue();
-      const APInt &C2 = DivLHS->getValue();
-      assert(C2 != 0 && "udiv 0, X should have been simplified already.");
-      // (icmp ugt (udiv C2, X), C1) -> (icmp ule X, C2/(C1+1))
-      if (ICI.getPredicate() == ICmpInst::ICMP_UGT) {
-        assert(!C1.isMaxValue() &&
-               "icmp ugt X, UINT_MAX should have been simplified already.");
-        return new ICmpInst(ICmpInst::ICMP_ULE, X,
-                            ConstantInt::get(X->getType(), C2.udiv(C1 + 1)));
-      }
-      // (icmp ult (udiv C2, X), C1) -> (icmp ugt X, C2/C1)
-      if (ICI.getPredicate() == ICmpInst::ICMP_ULT) {
-        assert(C1 != 0 && "icmp ult X, 0 should have been simplified already.");
-        return new ICmpInst(ICmpInst::ICMP_UGT, X,
-                            ConstantInt::get(X->getType(), C2.udiv(C1)));
+    if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
+      // A^c1 == C^c2 --> A == C^(c1^c2)
+      ConstantInt *C1, *C2;
+      if (match(B, m_ConstantInt(C1)) && match(D, m_ConstantInt(C2)) &&
+          Op1->hasOneUse()) {
+        Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
+        Value *Xor = Builder->CreateXor(C, NC);
+        return new ICmpInst(I.getPredicate(), A, Xor);
       }
+
+      // A^B == A^D -> B == D
+      if (A == C)
+        return new ICmpInst(I.getPredicate(), B, D);
+      if (A == D)
+        return new ICmpInst(I.getPredicate(), B, C);
+      if (B == C)
+        return new ICmpInst(I.getPredicate(), A, D);
+      if (B == D)
+        return new ICmpInst(I.getPredicate(), A, C);
     }
-  // fall-through
-  case Instruction::SDiv:
-    // Fold: icmp pred ([us]div X, C1), C2 -> range test
-    // Fold this div into the comparison, producing a range check.
-    // Determine, based on the divide type, what the range is being
-    // checked.  If there is an overflow on the low or high side, remember
-    // it, otherwise compute the range [low, hi) bounding the new value.
-    // See: InsertRangeTest above for the kinds of replacements possible.
-    if (ConstantInt *DivRHS = dyn_cast<ConstantInt>(LHSI->getOperand(1)))
-      if (Instruction *R = FoldICmpDivCst(ICI, cast<BinaryOperator>(LHSI),
-                                          DivRHS))
-        return R;
-    break;
+  }
 
-  case Instruction::Sub: {
-    ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(0));
-    if (!LHSC) break;
-    const APInt &LHSV = LHSC->getValue();
-
-    // C1-X <u C2 -> (X|(C2-1)) == C1
-    //   iff C1 & (C2-1) == C2-1
-    //       C2 is a power of 2
-    if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
-        RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == (RHSV - 1))
-      return new ICmpInst(ICmpInst::ICMP_EQ,
-                          Builder->CreateOr(LHSI->getOperand(1), RHSV - 1),
-                          LHSC);
-
-    // C1-X >u C2 -> (X|C2) != C1
-    //   iff C1 & C2 == C2
-    //       C2+1 is a power of 2
-    if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
-        (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == RHSV)
-      return new ICmpInst(ICmpInst::ICMP_NE,
-                          Builder->CreateOr(LHSI->getOperand(1), RHSV), LHSC);
-    break;
+  if (match(Op1, m_Xor(m_Value(A), m_Value(B))) && (A == Op0 || B == Op0)) {
+    // A == (A^B)  ->  B == 0
+    Value *OtherVal = A == Op0 ? B : A;
+    return new ICmpInst(I.getPredicate(), OtherVal,
+                        Constant::getNullValue(A->getType()));
   }
 
-  case Instruction::Add:
-    // Fold: icmp pred (add X, C1), C2
-    if (!ICI.isEquality()) {
-      ConstantInt *LHSC = dyn_cast<ConstantInt>(LHSI->getOperand(1));
-      if (!LHSC) break;
-      const APInt &LHSV = LHSC->getValue();
+  // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
+  if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
+      match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
+    Value *X = nullptr, *Y = nullptr, *Z = nullptr;
+
+    if (A == C) {
+      X = B;
+      Y = D;
+      Z = A;
+    } else if (A == D) {
+      X = B;
+      Y = C;
+      Z = A;
+    } else if (B == C) {
+      X = A;
+      Y = D;
+      Z = B;
+    } else if (B == D) {
+      X = A;
+      Y = C;
+      Z = B;
+    }
 
-      ConstantRange CR = ICI.makeConstantRange(ICI.getPredicate(), RHSV)
-                            .subtract(LHSV);
+    if (X) { // Build (X^Y) & Z
+      Op1 = Builder->CreateXor(X, Y);
+      Op1 = Builder->CreateAnd(Op1, Z);
+      I.setOperand(0, Op1);
+      I.setOperand(1, Constant::getNullValue(Op1->getType()));
+      return &I;
+    }
+  }
 
-      if (ICI.isSigned()) {
-        if (CR.getLower().isSignBit()) {
-          return new ICmpInst(ICmpInst::ICMP_SLT, LHSI->getOperand(0),
-                              Builder->getInt(CR.getUpper()));
-        } else if (CR.getUpper().isSignBit()) {
-          return new ICmpInst(ICmpInst::ICMP_SGE, LHSI->getOperand(0),
-                              Builder->getInt(CR.getLower()));
-        }
-      } else {
-        if (CR.getLower().isMinValue()) {
-          return new ICmpInst(ICmpInst::ICMP_ULT, LHSI->getOperand(0),
-                              Builder->getInt(CR.getUpper()));
-        } else if (CR.getUpper().isMinValue()) {
-          return new ICmpInst(ICmpInst::ICMP_UGE, LHSI->getOperand(0),
-                              Builder->getInt(CR.getLower()));
-        }
-      }
+  // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
+  // and       (B & (1<<X)-1) == (zext A) --> A == (trunc B)
+  ConstantInt *Cst1;
+  if ((Op0->hasOneUse() && match(Op0, m_ZExt(m_Value(A))) &&
+       match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
+      (Op1->hasOneUse() && match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
+       match(Op1, m_ZExt(m_Value(A))))) {
+    APInt Pow2 = Cst1->getValue() + 1;
+    if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
+        Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
+      return new ICmpInst(I.getPredicate(), A,
+                          Builder->CreateTrunc(B, A->getType()));
+  }
 
-      // X-C1 <u C2 -> (X & -C2) == C1
-      //   iff C1 & (C2-1) == 0
-      //       C2 is a power of 2
-      if (ICI.getPredicate() == ICmpInst::ICMP_ULT && LHSI->hasOneUse() &&
-          RHSV.isPowerOf2() && (LHSV & (RHSV - 1)) == 0)
-        return new ICmpInst(ICmpInst::ICMP_EQ,
-                            Builder->CreateAnd(LHSI->getOperand(0), -RHSV),
-                            ConstantExpr::getNeg(LHSC));
-
-      // X-C1 >u C2 -> (X & ~C2) != C1
-      //   iff C1 & C2 == 0
-      //       C2+1 is a power of 2
-      if (ICI.getPredicate() == ICmpInst::ICMP_UGT && LHSI->hasOneUse() &&
-          (RHSV + 1).isPowerOf2() && (LHSV & RHSV) == 0)
-        return new ICmpInst(ICmpInst::ICMP_NE,
-                            Builder->CreateAnd(LHSI->getOperand(0), ~RHSV),
-                            ConstantExpr::getNeg(LHSC));
+  // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
+  // For lshr and ashr pairs.
+  if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+       match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
+      (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
+       match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
+    unsigned TypeBits = Cst1->getBitWidth();
+    unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+    if (ShAmt < TypeBits && ShAmt != 0) {
+      ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
+                                     ? ICmpInst::ICMP_UGE
+                                     : ICmpInst::ICMP_ULT;
+      Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+      APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
+      return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
     }
-    break;
   }
 
-  // Simplify icmp_eq and icmp_ne instructions with integer constant RHS.
-  if (ICI.isEquality()) {
-    bool isICMP_NE = ICI.getPredicate() == ICmpInst::ICMP_NE;
-
-    // If the first operand is (add|sub|and|or|xor|rem) with a constant, and
-    // the second operand is a constant, simplify a bit.
-    if (BinaryOperator *BO = dyn_cast<BinaryOperator>(LHSI)) {
-      switch (BO->getOpcode()) {
-      case Instruction::SRem:
-        // If we have a signed (X % (2^c)) == 0, turn it into an unsigned one.
-        if (RHSV == 0 && isa<ConstantInt>(BO->getOperand(1)) &&BO->hasOneUse()){
-          const APInt &V = cast<ConstantInt>(BO->getOperand(1))->getValue();
-          if (V.sgt(1) && V.isPowerOf2()) {
-            Value *NewRem =
-              Builder->CreateURem(BO->getOperand(0), BO->getOperand(1),
-                                  BO->getName());
-            return new ICmpInst(ICI.getPredicate(), NewRem,
-                                Constant::getNullValue(BO->getType()));
-          }
-        }
-        break;
-      case Instruction::Add:
-        // Replace ((add A, B) != C) with (A != C-B) if B & C are constants.
-        if (ConstantInt *BOp1C = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-          if (BO->hasOneUse())
-            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                ConstantExpr::getSub(RHS, BOp1C));
-        } else if (RHSV == 0) {
-          // Replace ((add A, B) != 0) with (A != -B) if A or B is
-          // efficiently invertible, or if the add has just this one use.
-          Value *BOp0 = BO->getOperand(0), *BOp1 = BO->getOperand(1);
-
-          if (Value *NegVal = dyn_castNegVal(BOp1))
-            return new ICmpInst(ICI.getPredicate(), BOp0, NegVal);
-          if (Value *NegVal = dyn_castNegVal(BOp0))
-            return new ICmpInst(ICI.getPredicate(), NegVal, BOp1);
-          if (BO->hasOneUse()) {
-            Value *Neg = Builder->CreateNeg(BOp1);
-            Neg->takeName(BO);
-            return new ICmpInst(ICI.getPredicate(), BOp0, Neg);
-          }
-        }
-        break;
-      case Instruction::Xor:
-        if (BO->hasOneUse()) {
-          if (Constant *BOC = dyn_cast<Constant>(BO->getOperand(1))) {
-            // For the xor case, we can xor two constants together, eliminating
-            // the explicit xor.
-            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                ConstantExpr::getXor(RHS, BOC));
-          } else if (RHSV == 0) {
-            // Replace ((xor A, B) != 0) with (A != B)
-            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                BO->getOperand(1));
-          }
-        }
-        break;
-      case Instruction::Sub:
-        if (BO->hasOneUse()) {
-          if (ConstantInt *BOp0C = dyn_cast<ConstantInt>(BO->getOperand(0))) {
-            // Replace ((sub A, B) != C) with (B != A-C) if A & C are constants.
-            return new ICmpInst(ICI.getPredicate(), BO->getOperand(1),
-                ConstantExpr::getSub(BOp0C, RHS));
-          } else if (RHSV == 0) {
-            // Replace ((sub A, B) != 0) with (A != B)
-            return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                BO->getOperand(1));
-          }
-        }
-        break;
-      case Instruction::Or:
-        // If bits are being or'd in that are not present in the constant we
-        // are comparing against, then the comparison could never succeed!
-        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-          Constant *NotCI = ConstantExpr::getNot(RHS);
-          if (!ConstantExpr::getAnd(BOC, NotCI)->isNullValue())
-            return replaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
-
-          // Comparing if all bits outside of a constant mask are set?
-          // Replace (X | C) == -1 with (X & ~C) == ~C.
-          // This removes the -1 constant.
-          if (BO->hasOneUse() && RHS->isAllOnesValue()) {
-            Constant *NotBOC = ConstantExpr::getNot(BOC);
-            Value *And = Builder->CreateAnd(BO->getOperand(0), NotBOC);
-            return new ICmpInst(ICI.getPredicate(), And, NotBOC);
-          }
-        }
-        break;
+  // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
+  if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
+      match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
+    unsigned TypeBits = Cst1->getBitWidth();
+    unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
+    if (ShAmt < TypeBits && ShAmt != 0) {
+      Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
+      APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
+      Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
+                                      I.getName() + ".mask");
+      return new ICmpInst(I.getPredicate(), And,
+                          Constant::getNullValue(Cst1->getType()));
+    }
+  }
 
-      case Instruction::And:
-        if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-          // If bits are being compared against that are and'd out, then the
-          // comparison can never succeed!
-          if ((RHSV & ~BOC->getValue()) != 0)
-            return replaceInstUsesWith(ICI, Builder->getInt1(isICMP_NE));
-
-          // If we have ((X & C) == C), turn it into ((X & C) != 0).
-          if (RHS == BOC && RHSV.isPowerOf2())
-            return new ICmpInst(isICMP_NE ? ICmpInst::ICMP_EQ :
-                                ICmpInst::ICMP_NE, LHSI,
-                                Constant::getNullValue(RHS->getType()));
-
-          // Don't perform the following transforms if the AND has multiple uses
-          if (!BO->hasOneUse())
-            break;
+  // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
+  // "icmp (and X, mask), cst"
+  uint64_t ShAmt = 0;
+  if (Op0->hasOneUse() &&
+      match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A), m_ConstantInt(ShAmt))))) &&
+      match(Op1, m_ConstantInt(Cst1)) &&
+      // Only do this when A has multiple uses.  This is most important to do
+      // when it exposes other optimizations.
+      !A->hasOneUse()) {
+    unsigned ASize = cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
+
+    if (ShAmt < ASize) {
+      APInt MaskV =
+          APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
+      MaskV <<= ShAmt;
 
-          // Replace (and X, (1 << size(X)-1) != 0) with x s< 0
-          if (BOC->getValue().isSignBit()) {
-            Value *X = BO->getOperand(0);
-            Constant *Zero = Constant::getNullValue(X->getType());
-            ICmpInst::Predicate pred = isICMP_NE ?
-              ICmpInst::ICMP_SLT : ICmpInst::ICMP_SGE;
-            return new ICmpInst(pred, X, Zero);
-          }
+      APInt CmpV = Cst1->getValue().zext(ASize);
+      CmpV <<= ShAmt;
 
-          // ((X & ~7) == 0) --> X < 8
-          if (RHSV == 0 && isHighOnes(BOC)) {
-            Value *X = BO->getOperand(0);
-            Constant *NegX = ConstantExpr::getNeg(BOC);
-            ICmpInst::Predicate pred = isICMP_NE ?
-              ICmpInst::ICMP_UGE : ICmpInst::ICMP_ULT;
-            return new ICmpInst(pred, X, NegX);
-          }
-        }
-        break;
-      case Instruction::Mul:
-        if (RHSV == 0 && BO->hasNoSignedWrap()) {
-          if (ConstantInt *BOC = dyn_cast<ConstantInt>(BO->getOperand(1))) {
-            // The trivial case (mul X, 0) is handled by InstSimplify
-            // General case : (mul X, C) != 0 iff X != 0
-            //                (mul X, C) == 0 iff X == 0
-            if (!BOC->isZero())
-              return new ICmpInst(ICI.getPredicate(), BO->getOperand(0),
-                                  Constant::getNullValue(RHS->getType()));
-          }
-        }
-        break;
-      default: break;
-      }
-    } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(LHSI)) {
-      // Handle icmp {eq|ne} <intrinsic>, intcst.
-      switch (II->getIntrinsicID()) {
-      case Intrinsic::bswap:
-        Worklist.Add(II);
-        ICI.setOperand(0, II->getArgOperand(0));
-        ICI.setOperand(1, Builder->getInt(RHSV.byteSwap()));
-        return &ICI;
-      case Intrinsic::ctlz:
-      case Intrinsic::cttz:
-        // ctz(A) == bitwidth(a)  ->  A == 0 and likewise for !=
-        if (RHSV == RHS->getType()->getBitWidth()) {
-          Worklist.Add(II);
-          ICI.setOperand(0, II->getArgOperand(0));
-          ICI.setOperand(1, ConstantInt::get(RHS->getType(), 0));
-          return &ICI;
-        }
-        break;
-      case Intrinsic::ctpop:
-        // popcount(A) == 0  ->  A == 0 and likewise for !=
-        if (RHS->isZero()) {
-          Worklist.Add(II);
-          ICI.setOperand(0, II->getArgOperand(0));
-          ICI.setOperand(1, RHS);
-          return &ICI;
-        }
-        break;
-      default:
-        break;
-      }
+      Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
+      return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
     }
   }
+
   return nullptr;
 }
 
 /// Handle icmp (cast x to y), (cast/cst). We only handle extending casts so
 /// far.
-Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICmp) {
+Instruction *InstCombiner::foldICmpWithCastAndCast(ICmpInst &ICmp) {
   const CastInst *LHSCI = cast<CastInst>(ICmp.getOperand(0));
   Value *LHSCIOp        = LHSCI->getOperand(0);
   Type *SrcTy     = LHSCIOp->getType();
@@ -2485,92 +3380,6 @@ Instruction *InstCombiner::visitICmpInstWithCastAndCast(ICmpInst &ICmp) {
   return BinaryOperator::CreateNot(Result);
 }
 
-/// The caller has matched a pattern of the form:
-///   I = icmp ugt (add (add A, B), CI2), CI1
-/// If this is of the form:
-///   sum = a + b
-///   if (sum+128 >u 255)
-/// Then replace it with llvm.sadd.with.overflow.i8.
-///
-static Instruction *ProcessUGT_ADDCST_ADD(ICmpInst &I, Value *A, Value *B,
-                                          ConstantInt *CI2, ConstantInt *CI1,
-                                          InstCombiner &IC) {
-  // The transformation we're trying to do here is to transform this into an
-  // llvm.sadd.with.overflow.  To do this, we have to replace the original add
-  // with a narrower add, and discard the add-with-constant that is part of the
-  // range check (if we can't eliminate it, this isn't profitable).
-
-  // In order to eliminate the add-with-constant, the compare can be its only
-  // use.
-  Instruction *AddWithCst = cast<Instruction>(I.getOperand(0));
-  if (!AddWithCst->hasOneUse()) return nullptr;
-
-  // If CI2 is 2^7, 2^15, 2^31, then it might be an sadd.with.overflow.
-  if (!CI2->getValue().isPowerOf2()) return nullptr;
-  unsigned NewWidth = CI2->getValue().countTrailingZeros();
-  if (NewWidth != 7 && NewWidth != 15 && NewWidth != 31) return nullptr;
-
-  // The width of the new add formed is 1 more than the bias.
-  ++NewWidth;
-
-  // Check to see that CI1 is an all-ones value with NewWidth bits.
-  if (CI1->getBitWidth() == NewWidth ||
-      CI1->getValue() != APInt::getLowBitsSet(CI1->getBitWidth(), NewWidth))
-    return nullptr;
-
-  // This is only really a signed overflow check if the inputs have been
-  // sign-extended; check for that condition. For example, if CI2 is 2^31 and
-  // the operands of the add are 64 bits wide, we need at least 33 sign bits.
-  unsigned NeededSignBits = CI1->getBitWidth() - NewWidth + 1;
-  if (IC.ComputeNumSignBits(A, 0, &I) < NeededSignBits ||
-      IC.ComputeNumSignBits(B, 0, &I) < NeededSignBits)
-    return nullptr;
-
-  // In order to replace the original add with a narrower
-  // llvm.sadd.with.overflow, the only uses allowed are the add-with-constant
-  // and truncates that discard the high bits of the add.  Verify that this is
-  // the case.
-  Instruction *OrigAdd = cast<Instruction>(AddWithCst->getOperand(0));
-  for (User *U : OrigAdd->users()) {
-    if (U == AddWithCst) continue;
-
-    // Only accept truncates for now.  We would really like a nice recursive
-    // predicate like SimplifyDemandedBits, but which goes downwards the use-def
-    // chain to see which bits of a value are actually demanded.  If the
-    // original add had another add which was then immediately truncated, we
-    // could still do the transformation.
-    TruncInst *TI = dyn_cast<TruncInst>(U);
-    if (!TI || TI->getType()->getPrimitiveSizeInBits() > NewWidth)
-      return nullptr;
-  }
-
-  // If the pattern matches, truncate the inputs to the narrower type and
-  // use the sadd_with_overflow intrinsic to efficiently compute both the
-  // result and the overflow bit.
-  Type *NewType = IntegerType::get(OrigAdd->getContext(), NewWidth);
-  Value *F = Intrinsic::getDeclaration(I.getModule(),
-                                       Intrinsic::sadd_with_overflow, NewType);
-
-  InstCombiner::BuilderTy *Builder = IC.Builder;
-
-  // Put the new code above the original add, in case there are any uses of the
-  // add between the add and the compare.
-  Builder->SetInsertPoint(OrigAdd);
-
-  Value *TruncA = Builder->CreateTrunc(A, NewType, A->getName()+".trunc");
-  Value *TruncB = Builder->CreateTrunc(B, NewType, B->getName()+".trunc");
-  CallInst *Call = Builder->CreateCall(F, {TruncA, TruncB}, "sadd");
-  Value *Add = Builder->CreateExtractValue(Call, 0, "sadd.result");
-  Value *ZExt = Builder->CreateZExt(Add, OrigAdd->getType());
-
-  // The inner add was the result of the narrow add, zero extended to the
-  // wider type.  Replace it with the result computed by the intrinsic.
-  IC.replaceInstUsesWith(*OrigAdd, ZExt);
-
-  // The original icmp gets replaced with the overflow value.
-  return ExtractValueInst::Create(Call, 1, "sadd.overflow");
-}
-
 bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
                                          Value *RHS, Instruction &OrigI,
                                          Value *&Result, Constant *&Overflow) {
@@ -2603,8 +3412,10 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
 
     if (OR == OverflowResult::AlwaysOverflows)
       return SetResult(Builder->CreateAdd(LHS, RHS), Builder->getTrue(), true);
+
+    // Fall through uadd into sadd
+    LLVM_FALLTHROUGH;
   }
-  // FALL THROUGH uadd into sadd
   case OCF_SIGNED_ADD: {
     // X + 0 -> {X, false}
     if (match(RHS, m_Zero()))
@@ -2644,7 +3455,8 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
                        true);
     if (OR == OverflowResult::AlwaysOverflows)
       return SetResult(Builder->CreateMul(LHS, RHS), Builder->getTrue(), true);
-  } // FALL THROUGH
+    LLVM_FALLTHROUGH;
+  }
   case OCF_SIGNED_MUL:
     // X * undef -> undef
     if (isa<UndefValue>(RHS))
@@ -2682,7 +3494,7 @@ bool InstCombiner::OptimizeOverflowCheck(OverflowCheckFlavor OCF, Value *LHS,
 /// \param OtherVal The other argument of compare instruction.
 /// \returns Instruction which must replace the compare instruction, NULL if no
 ///          replacement required.
-static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
+static Instruction *processUMulZExtIdiom(ICmpInst &I, Value *MulVal,
                                          Value *OtherVal, InstCombiner &IC) {
   // Don't bother doing this transformation for pointers, don't do it for
   // vectors.
@@ -2906,8 +3718,8 @@ static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal,
 /// When performing a comparison against a constant, it is possible that not all
 /// the bits in the LHS are demanded. This helper method computes the mask that
 /// IS demanded.
-static APInt DemandedBitsLHSMask(ICmpInst &I,
-                                 unsigned BitWidth, bool isSignCheck) {
+static APInt getDemandedBitsLHSMask(ICmpInst &I, unsigned BitWidth,
+                                    bool isSignCheck) {
   if (isSignCheck)
     return APInt::getSignBit(BitWidth);
 
@@ -2981,7 +3793,7 @@ static bool swapMayExposeCSEOpportunities(const Value * Op0,
 }
 
 /// \brief Check that one use is in the same block as the definition and all
-/// other uses are in blocks dominated by a given block
+/// other uses are in blocks dominated by a given block.
 ///
 /// \param DI Definition
 /// \param UI Use
@@ -2994,21 +3806,18 @@ bool InstCombiner::dominatesAllUses(const Instruction *DI,
                                     const Instruction *UI,
                                     const BasicBlock *DB) const {
   assert(DI && UI && "Instruction not defined\n");
-  // ignore incomplete definitions
+  // Ignore incomplete definitions.
   if (!DI->getParent())
     return false;
-  // DI and UI must be in the same block
+  // DI and UI must be in the same block.
   if (DI->getParent() != UI->getParent())
     return false;
-  // Protect from self-referencing blocks
+  // Protect from self-referencing blocks.
   if (DI->getParent() == DB)
     return false;
-  // DominatorTree available?
-  if (!DT)
-    return false;
   for (const User *U : DI->users()) {
     auto *Usr = cast<Instruction>(U);
-    if (Usr != UI && !DT->dominates(DB, Usr->getParent()))
+    if (Usr != UI && !DT.dominates(DB, Usr->getParent()))
       return false;
   }
   return true;
@@ -3067,8 +3876,7 @@ static bool isChainSelectCmpBranch(const SelectInst *SI) {
 /// are equal, the optimization can work only for EQ predicates. This is not a
 /// major restriction since a NE compare should be 'normalized' to an equal
 /// compare, which usually happens in the combiner and test case
-/// select-cmp-br.ll
-/// checks for it.
+/// select-cmp-br.ll checks for it.
 bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
                                              const ICmpInst *Icmp,
                                              const unsigned SIOpd) {
@@ -3076,7 +3884,7 @@ bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
   if (isChainSelectCmpBranch(SI) && Icmp->getPredicate() == ICmpInst::ICMP_EQ) {
     BasicBlock *Succ = SI->getParent()->getTerminator()->getSuccessor(1);
     // The check for the unique predecessor is not the best that can be
-    // done. But it protects efficiently against cases like  when SI's
+    // done. But it protects efficiently against cases like when SI's
     // home block has two successors, Succ and Succ1, and Succ1 predecessor
     // of Succ. Then SI can't be replaced by SIOpd because the use that gets
     // replaced can be reached on either path. So the uniqueness check
@@ -3093,6 +3901,239 @@ bool InstCombiner::replacedSelectWithOperand(SelectInst *SI,
   return false;
 }
 
+/// Try to fold the comparison based on range information we can get by checking
+/// whether bits are known to be zero or one in the inputs.
+Instruction *InstCombiner::foldICmpUsingKnownBits(ICmpInst &I) {
+  Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+  Type *Ty = Op0->getType();
+  ICmpInst::Predicate Pred = I.getPredicate();
+
+  // Get scalar or pointer size.
+  unsigned BitWidth = Ty->isIntOrIntVectorTy()
+                          ? Ty->getScalarSizeInBits()
+                          : DL.getTypeSizeInBits(Ty->getScalarType());
+
+  if (!BitWidth)
+    return nullptr;
+
+  // If this is a normal comparison, it demands all bits. If it is a sign bit
+  // comparison, it only demands the sign bit.
+  bool IsSignBit = false;
+  const APInt *CmpC;
+  if (match(Op1, m_APInt(CmpC))) {
+    bool UnusedBit;
+    IsSignBit = isSignBitCheck(Pred, *CmpC, UnusedBit);
+  }
+
+  APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
+  APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
+
+  if (SimplifyDemandedBits(I.getOperandUse(0),
+                           getDemandedBitsLHSMask(I, BitWidth, IsSignBit),
+                           Op0KnownZero, Op0KnownOne, 0))
+    return &I;
+
+  if (SimplifyDemandedBits(I.getOperandUse(1), APInt::getAllOnesValue(BitWidth),
+                           Op1KnownZero, Op1KnownOne, 0))
+    return &I;
+
+  // Given the known and unknown bits, compute a range that the LHS could be
+  // in.  Compute the Min, Max and RHS values based on the known bits. For the
+  // EQ and NE we use unsigned values.
+  APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
+  APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
+  if (I.isSigned()) {
+    computeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min,
+                                           Op0Max);
+    computeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min,
+                                           Op1Max);
+  } else {
+    computeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne, Op0Min,
+                                             Op0Max);
+    computeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne, Op1Min,
+                                             Op1Max);
+  }
+
+  // If Min and Max are known to be the same, then SimplifyDemandedBits
+  // figured out that the LHS is a constant. Constant fold this now, so that
+  // code below can assume that Min != Max.
+  if (!isa<Constant>(Op0) && Op0Min == Op0Max)
+    return new ICmpInst(Pred, ConstantInt::get(Op0->getType(), Op0Min), Op1);
+  if (!isa<Constant>(Op1) && Op1Min == Op1Max)
+    return new ICmpInst(Pred, Op0, ConstantInt::get(Op1->getType(), Op1Min));
+
+  // Based on the range information we know about the LHS, see if we can
+  // simplify this comparison.  For example, (x&4) < 8 is always true.
+  switch (Pred) {
+  default:
+    llvm_unreachable("Unknown icmp opcode!");
+  case ICmpInst::ICMP_EQ:
+  case ICmpInst::ICMP_NE: {
+    if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max)) {
+      return Pred == CmpInst::ICMP_EQ
+                 ? replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()))
+                 : replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    }
+
+    // If all bits are known zero except for one, then we know at most one bit
+    // is set. If the comparison is against zero, then this is a check to see if
+    // *that* bit is set.
+    APInt Op0KnownZeroInverted = ~Op0KnownZero;
+    if (~Op1KnownZero == 0) {
+      // If the LHS is an AND with the same constant, look through it.
+      Value *LHS = nullptr;
+      const APInt *LHSC;
+      if (!match(Op0, m_And(m_Value(LHS), m_APInt(LHSC))) ||
+          *LHSC != Op0KnownZeroInverted)
+        LHS = Op0;
+
+      Value *X;
+      if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
+        APInt ValToCheck = Op0KnownZeroInverted;
+        Type *XTy = X->getType();
+        if (ValToCheck.isPowerOf2()) {
+          // ((1 << X) & 8) == 0 -> X != 3
+          // ((1 << X) & 8) != 0 -> X == 3
+          auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
+          auto NewPred = ICmpInst::getInversePredicate(Pred);
+          return new ICmpInst(NewPred, X, CmpC);
+        } else if ((++ValToCheck).isPowerOf2()) {
+          // ((1 << X) & 7) == 0 -> X >= 3
+          // ((1 << X) & 7) != 0 -> X  < 3
+          auto *CmpC = ConstantInt::get(XTy, ValToCheck.countTrailingZeros());
+          auto NewPred =
+              Pred == CmpInst::ICMP_EQ ? CmpInst::ICMP_UGE : CmpInst::ICMP_ULT;
+          return new ICmpInst(NewPred, X, CmpC);
+        }
+      }
+
+      // Check if the LHS is 8 >>u x and the result is a power of 2 like 1.
+      const APInt *CI;
+      if (Op0KnownZeroInverted == 1 &&
+          match(LHS, m_LShr(m_Power2(CI), m_Value(X)))) {
+        // ((8 >>u X) & 1) == 0 -> X != 3
+        // ((8 >>u X) & 1) != 0 -> X == 3
+        unsigned CmpVal = CI->countTrailingZeros();
+        auto NewPred = ICmpInst::getInversePredicate(Pred);
+        return new ICmpInst(NewPred, X, ConstantInt::get(X->getType(), CmpVal));
+      }
+    }
+    break;
+  }
+  case ICmpInst::ICMP_ULT: {
+    if (Op0Max.ult(Op1Min)) // A <u B -> true if max(A) < min(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Min.uge(Op1Max)) // A <u B -> false if min(A) >= max(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    if (Op1Min == Op0Max) // A <u B -> A != B if max(A) == min(B)
+      return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+
+    const APInt *CmpC;
+    if (match(Op1, m_APInt(CmpC))) {
+      // A <u C -> A == C-1 if min(A)+1 == C
+      if (Op1Max == Op0Min + 1) {
+        Constant *CMinus1 = ConstantInt::get(Op0->getType(), *CmpC - 1);
+        return new ICmpInst(ICmpInst::ICMP_EQ, Op0, CMinus1);
+      }
+      // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
+      if (CmpC->isMinSignedValue()) {
+        Constant *AllOnes = Constant::getAllOnesValue(Op0->getType());
+        return new ICmpInst(ICmpInst::ICMP_SGT, Op0, AllOnes);
+      }
+    }
+    break;
+  }
+  case ICmpInst::ICMP_UGT: {
+    if (Op0Min.ugt(Op1Max)) // A >u B -> true if min(A) > max(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+
+    if (Op0Max.ule(Op1Min)) // A >u B -> false if max(A) <= max(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+    if (Op1Max == Op0Min) // A >u B -> A != B if min(A) == max(B)
+      return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+
+    const APInt *CmpC;
+    if (match(Op1, m_APInt(CmpC))) {
+      // A >u C -> A == C+1 if max(a)-1 == C
+      if (*CmpC == Op0Max - 1)
+        return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                            ConstantInt::get(Op1->getType(), *CmpC + 1));
+
+      // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
+      if (CmpC->isMaxSignedValue())
+        return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
+                            Constant::getNullValue(Op0->getType()));
+    }
+    break;
+  }
+  case ICmpInst::ICMP_SLT:
+    if (Op0Max.slt(Op1Min)) // A <s B -> true if max(A) < min(C)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Min.sge(Op1Max)) // A <s B -> false if min(A) >= max(C)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    if (Op1Min == Op0Max) // A <s B -> A != B if max(A) == min(B)
+      return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+      if (Op1Max == Op0Min + 1) // A <s C -> A == C-1 if min(A)+1 == C
+        return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                            Builder->getInt(CI->getValue() - 1));
+    }
+    break;
+  case ICmpInst::ICMP_SGT:
+    if (Op0Min.sgt(Op1Max)) // A >s B -> true if min(A) > max(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Max.sle(Op1Min)) // A >s B -> false if max(A) <= min(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+
+    if (Op1Max == Op0Min) // A >s B -> A != B if min(A) == max(B)
+      return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
+    if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
+      if (Op1Min == Op0Max - 1) // A >s C -> A == C+1 if max(A)-1 == C
+        return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
+                            Builder->getInt(CI->getValue() + 1));
+    }
+    break;
+  case ICmpInst::ICMP_SGE:
+    assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
+    if (Op0Min.sge(Op1Max)) // A >=s B -> true if min(A) >= max(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Max.slt(Op1Min)) // A >=s B -> false if max(A) < min(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    break;
+  case ICmpInst::ICMP_SLE:
+    assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
+    if (Op0Max.sle(Op1Min)) // A <=s B -> true if max(A) <= min(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Min.sgt(Op1Max)) // A <=s B -> false if min(A) > max(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    break;
+  case ICmpInst::ICMP_UGE:
+    assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
+    if (Op0Min.uge(Op1Max)) // A >=u B -> true if min(A) >= max(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Max.ult(Op1Min)) // A >=u B -> false if max(A) < min(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    break;
+  case ICmpInst::ICMP_ULE:
+    assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
+    if (Op0Max.ule(Op1Min)) // A <=u B -> true if max(A) <= min(B)
+      return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
+    if (Op0Min.ugt(Op1Max)) // A <=u B -> false if min(A) > max(B)
+      return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
+    break;
+  }
+
+  // Turn a signed comparison into an unsigned one if both operands are known to
+  // have the same sign.
+  if (I.isSigned() &&
+      ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
+       (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
+    return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
+
+  return nullptr;
+}
+
 /// If we have an icmp le or icmp ge instruction with a constant operand, turn
 /// it into the appropriate icmp lt or icmp gt instruction. This transform
 /// allows them to be folded in visitICmpInst.
@@ -3131,6 +4172,7 @@ static ICmpInst *canonicalizeCmpWithConstant(ICmpInst &I) {
 
       if (isa<UndefValue>(Elt))
         continue;
+
       // Bail out if we can't determine if this constant is min/max or if we
       // know that this constant is min/max.
       auto *CI = dyn_cast<ConstantInt>(Elt);
@@ -3167,7 +4209,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   }
 
   if (Value *V =
-          SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, TLI, DT, AC, &I))
+          SimplifyICmpInst(I.getPredicate(), Op0, Op1, DL, &TLI, &DT, &AC, &I))
     return replaceInstUsesWith(I, V);
 
   // comparing -val or val with non-zero is the same as just comparing val
@@ -3202,28 +4244,28 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
     case ICmpInst::ICMP_UGT:
       std::swap(Op0, Op1);                   // Change icmp ugt -> icmp ult
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_ULT:{                // icmp ult i1 A, B -> ~A & B
       Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
       return BinaryOperator::CreateAnd(Not, Op1);
     }
     case ICmpInst::ICMP_SGT:
       std::swap(Op0, Op1);                   // Change icmp sgt -> icmp slt
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_SLT: {               // icmp slt i1 A, B -> A & ~B
       Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
       return BinaryOperator::CreateAnd(Not, Op0);
     }
     case ICmpInst::ICMP_UGE:
       std::swap(Op0, Op1);                   // Change icmp uge -> icmp ule
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_ULE: {               // icmp ule i1 A, B -> ~A | B
       Value *Not = Builder->CreateNot(Op0, I.getName() + "tmp");
       return BinaryOperator::CreateOr(Not, Op1);
     }
     case ICmpInst::ICMP_SGE:
       std::swap(Op0, Op1);                   // Change icmp sge -> icmp sle
-      // FALL THROUGH
+      LLVM_FALLTHROUGH;
     case ICmpInst::ICMP_SLE: {               // icmp sle i1 A, B -> A | ~B
       Value *Not = Builder->CreateNot(Op1, I.getName() + "tmp");
       return BinaryOperator::CreateOr(Not, Op0);
@@ -3234,372 +4276,11 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   if (ICmpInst *NewICmp = canonicalizeCmpWithConstant(I))
     return NewICmp;
 
-  unsigned BitWidth = 0;
-  if (Ty->isIntOrIntVectorTy())
-    BitWidth = Ty->getScalarSizeInBits();
-  else // Get pointer size.
-    BitWidth = DL.getTypeSizeInBits(Ty->getScalarType());
-
-  bool isSignBit = false;
-
-  // See if we are doing a comparison with a constant.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    Value *A = nullptr, *B = nullptr;
-
-    // Match the following pattern, which is a common idiom when writing
-    // overflow-safe integer arithmetic function.  The source performs an
-    // addition in wider type, and explicitly checks for overflow using
-    // comparisons against INT_MIN and INT_MAX.  Simplify this by using the
-    // sadd_with_overflow intrinsic.
-    //
-    // TODO: This could probably be generalized to handle other overflow-safe
-    // operations if we worked out the formulas to compute the appropriate
-    // magic constants.
-    //
-    // sum = a + b
-    // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
-    {
-    ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
-      if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
-        return Res;
-    }
-
-    // (icmp sgt smin(PosA, B) 0) -> (icmp sgt B 0)
-    if (CI->isZero() && I.getPredicate() == ICmpInst::ICMP_SGT)
-      if (auto *SI = dyn_cast<SelectInst>(Op0)) {
-        SelectPatternResult SPR = matchSelectPattern(SI, A, B);
-        if (SPR.Flavor == SPF_SMIN) {
-          if (isKnownPositive(A, DL))
-            return new ICmpInst(I.getPredicate(), B, CI);
-          if (isKnownPositive(B, DL))
-            return new ICmpInst(I.getPredicate(), A, CI);
-        }
-      }
-    
-
-    // The following transforms are only 'worth it' if the only user of the
-    // subtraction is the icmp.
-    if (Op0->hasOneUse()) {
-      // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
-      if (I.isEquality() && CI->isZero() &&
-          match(Op0, m_Sub(m_Value(A), m_Value(B))))
-        return new ICmpInst(I.getPredicate(), A, B);
-
-      // (icmp sgt (sub nsw A B), -1) -> (icmp sge A, B)
-      if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isAllOnesValue() &&
-          match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
-        return new ICmpInst(ICmpInst::ICMP_SGE, A, B);
-
-      // (icmp sgt (sub nsw A B), 0) -> (icmp sgt A, B)
-      if (I.getPredicate() == ICmpInst::ICMP_SGT && CI->isZero() &&
-          match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
-        return new ICmpInst(ICmpInst::ICMP_SGT, A, B);
-
-      // (icmp slt (sub nsw A B), 0) -> (icmp slt A, B)
-      if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isZero() &&
-          match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
-        return new ICmpInst(ICmpInst::ICMP_SLT, A, B);
-
-      // (icmp slt (sub nsw A B), 1) -> (icmp sle A, B)
-      if (I.getPredicate() == ICmpInst::ICMP_SLT && CI->isOne() &&
-          match(Op0, m_NSWSub(m_Value(A), m_Value(B))))
-        return new ICmpInst(ICmpInst::ICMP_SLE, A, B);
-    }
-
-    if (I.isEquality()) {
-      ConstantInt *CI2;
-      if (match(Op0, m_AShr(m_ConstantInt(CI2), m_Value(A))) ||
-          match(Op0, m_LShr(m_ConstantInt(CI2), m_Value(A)))) {
-        // (icmp eq/ne (ashr/lshr const2, A), const1)
-        if (Instruction *Inst = FoldICmpCstShrCst(I, Op0, A, CI, CI2))
-          return Inst;
-      }
-      if (match(Op0, m_Shl(m_ConstantInt(CI2), m_Value(A)))) {
-        // (icmp eq/ne (shl const2, A), const1)
-        if (Instruction *Inst = FoldICmpCstShlCst(I, Op0, A, CI, CI2))
-          return Inst;
-      }
-    }
-
-    // If this comparison is a normal comparison, it demands all
-    // bits, if it is a sign bit comparison, it only demands the sign bit.
-    bool UnusedBit;
-    isSignBit = isSignBitCheck(I.getPredicate(), CI, UnusedBit);
-
-    // Canonicalize icmp instructions based on dominating conditions.
-    BasicBlock *Parent = I.getParent();
-    BasicBlock *Dom = Parent->getSinglePredecessor();
-    auto *BI = Dom ? dyn_cast<BranchInst>(Dom->getTerminator()) : nullptr;
-    ICmpInst::Predicate Pred;
-    BasicBlock *TrueBB, *FalseBB;
-    ConstantInt *CI2;
-    if (BI && match(BI, m_Br(m_ICmp(Pred, m_Specific(Op0), m_ConstantInt(CI2)),
-                             TrueBB, FalseBB)) &&
-        TrueBB != FalseBB) {
-      ConstantRange CR = ConstantRange::makeAllowedICmpRegion(I.getPredicate(),
-                                                              CI->getValue());
-      ConstantRange DominatingCR =
-          (Parent == TrueBB)
-              ? ConstantRange::makeExactICmpRegion(Pred, CI2->getValue())
-              : ConstantRange::makeExactICmpRegion(
-                    CmpInst::getInversePredicate(Pred), CI2->getValue());
-      ConstantRange Intersection = DominatingCR.intersectWith(CR);
-      ConstantRange Difference = DominatingCR.difference(CR);
-      if (Intersection.isEmptySet())
-        return replaceInstUsesWith(I, Builder->getFalse());
-      if (Difference.isEmptySet())
-        return replaceInstUsesWith(I, Builder->getTrue());
-      // Canonicalizing a sign bit comparison that gets used in a branch,
-      // pessimizes codegen by generating branch on zero instruction instead
-      // of a test and branch. So we avoid canonicalizing in such situations
-      // because test and branch instruction has better branch displacement
-      // than compare and branch instruction.
-      if (!isBranchOnSignBitCheck(I, isSignBit) && !I.isEquality()) {
-        if (auto *AI = Intersection.getSingleElement())
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0, Builder->getInt(*AI));
-        if (auto *AD = Difference.getSingleElement())
-          return new ICmpInst(ICmpInst::ICMP_NE, Op0, Builder->getInt(*AD));
-      }
-    }
-  }
-
-  // See if we can fold the comparison based on range information we can get
-  // by checking whether bits are known to be zero or one in the input.
-  if (BitWidth != 0) {
-    APInt Op0KnownZero(BitWidth, 0), Op0KnownOne(BitWidth, 0);
-    APInt Op1KnownZero(BitWidth, 0), Op1KnownOne(BitWidth, 0);
-
-    if (SimplifyDemandedBits(I.getOperandUse(0),
-                             DemandedBitsLHSMask(I, BitWidth, isSignBit),
-                             Op0KnownZero, Op0KnownOne, 0))
-      return &I;
-    if (SimplifyDemandedBits(I.getOperandUse(1),
-                             APInt::getAllOnesValue(BitWidth), Op1KnownZero,
-                             Op1KnownOne, 0))
-      return &I;
-
-    // Given the known and unknown bits, compute a range that the LHS could be
-    // in.  Compute the Min, Max and RHS values based on the known bits. For the
-    // EQ and NE we use unsigned values.
-    APInt Op0Min(BitWidth, 0), Op0Max(BitWidth, 0);
-    APInt Op1Min(BitWidth, 0), Op1Max(BitWidth, 0);
-    if (I.isSigned()) {
-      ComputeSignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
-                                             Op0Min, Op0Max);
-      ComputeSignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
-                                             Op1Min, Op1Max);
-    } else {
-      ComputeUnsignedMinMaxValuesFromKnownBits(Op0KnownZero, Op0KnownOne,
-                                               Op0Min, Op0Max);
-      ComputeUnsignedMinMaxValuesFromKnownBits(Op1KnownZero, Op1KnownOne,
-                                               Op1Min, Op1Max);
-    }
-
-    // If Min and Max are known to be the same, then SimplifyDemandedBits
-    // figured out that the LHS is a constant.  Just constant fold this now so
-    // that code below can assume that Min != Max.
-    if (!isa<Constant>(Op0) && Op0Min == Op0Max)
-      return new ICmpInst(I.getPredicate(),
-                          ConstantInt::get(Op0->getType(), Op0Min), Op1);
-    if (!isa<Constant>(Op1) && Op1Min == Op1Max)
-      return new ICmpInst(I.getPredicate(), Op0,
-                          ConstantInt::get(Op1->getType(), Op1Min));
-
-    // Based on the range information we know about the LHS, see if we can
-    // simplify this comparison.  For example, (x&4) < 8 is always true.
-    switch (I.getPredicate()) {
-    default: llvm_unreachable("Unknown icmp opcode!");
-    case ICmpInst::ICMP_EQ: {
-      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-
-      // If all bits are known zero except for one, then we know at most one
-      // bit is set.   If the comparison is against zero, then this is a check
-      // to see if *that* bit is set.
-      APInt Op0KnownZeroInverted = ~Op0KnownZero;
-      if (~Op1KnownZero == 0) {
-        // If the LHS is an AND with the same constant, look through it.
-        Value *LHS = nullptr;
-        ConstantInt *LHSC = nullptr;
-        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
-            LHSC->getValue() != Op0KnownZeroInverted)
-          LHS = Op0;
-
-        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
-        // then turn "((1 << x)&8) == 0" into "x != 3".
-        // or turn "((1 << x)&7) == 0" into "x > 2".
-        Value *X = nullptr;
-        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
-          APInt ValToCheck = Op0KnownZeroInverted;
-          if (ValToCheck.isPowerOf2()) {
-            unsigned CmpVal = ValToCheck.countTrailingZeros();
-            return new ICmpInst(ICmpInst::ICMP_NE, X,
-                                ConstantInt::get(X->getType(), CmpVal));
-          } else if ((++ValToCheck).isPowerOf2()) {
-            unsigned CmpVal = ValToCheck.countTrailingZeros() - 1;
-            return new ICmpInst(ICmpInst::ICMP_UGT, X,
-                                ConstantInt::get(X->getType(), CmpVal));
-          }
-        }
-
-        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
-        // then turn "((8 >>u x)&1) == 0" into "x != 3".
-        const APInt *CI;
-        if (Op0KnownZeroInverted == 1 &&
-            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
-          return new ICmpInst(ICmpInst::ICMP_NE, X,
-                              ConstantInt::get(X->getType(),
-                                               CI->countTrailingZeros()));
-      }
-      break;
-    }
-    case ICmpInst::ICMP_NE: {
-      if (Op0Max.ult(Op1Min) || Op0Min.ugt(Op1Max))
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-
-      // If all bits are known zero except for one, then we know at most one
-      // bit is set.   If the comparison is against zero, then this is a check
-      // to see if *that* bit is set.
-      APInt Op0KnownZeroInverted = ~Op0KnownZero;
-      if (~Op1KnownZero == 0) {
-        // If the LHS is an AND with the same constant, look through it.
-        Value *LHS = nullptr;
-        ConstantInt *LHSC = nullptr;
-        if (!match(Op0, m_And(m_Value(LHS), m_ConstantInt(LHSC))) ||
-            LHSC->getValue() != Op0KnownZeroInverted)
-          LHS = Op0;
-
-        // If the LHS is 1 << x, and we know the result is a power of 2 like 8,
-        // then turn "((1 << x)&8) != 0" into "x == 3".
-        // or turn "((1 << x)&7) != 0" into "x < 3".
-        Value *X = nullptr;
-        if (match(LHS, m_Shl(m_One(), m_Value(X)))) {
-          APInt ValToCheck = Op0KnownZeroInverted;
-          if (ValToCheck.isPowerOf2()) {
-            unsigned CmpVal = ValToCheck.countTrailingZeros();
-            return new ICmpInst(ICmpInst::ICMP_EQ, X,
-                                ConstantInt::get(X->getType(), CmpVal));
-          } else if ((++ValToCheck).isPowerOf2()) {
-            unsigned CmpVal = ValToCheck.countTrailingZeros();
-            return new ICmpInst(ICmpInst::ICMP_ULT, X,
-                                ConstantInt::get(X->getType(), CmpVal));
-          }
-        }
-
-        // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1,
-        // then turn "((8 >>u x)&1) != 0" into "x == 3".
-        const APInt *CI;
-        if (Op0KnownZeroInverted == 1 &&
-            match(LHS, m_LShr(m_Power2(CI), m_Value(X))))
-          return new ICmpInst(ICmpInst::ICMP_EQ, X,
-                              ConstantInt::get(X->getType(),
-                                               CI->countTrailingZeros()));
-      }
-      break;
-    }
-    case ICmpInst::ICMP_ULT:
-      if (Op0Max.ult(Op1Min))          // A <u B -> true if max(A) < min(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Min.uge(Op1Max))          // A <u B -> false if min(A) >= max(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-      if (Op1Min == Op0Max)            // A <u B -> A != B if max(A) == min(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Max == Op0Min+1)        // A <u C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              Builder->getInt(CI->getValue()-1));
-
-        // (x <u 2147483648) -> (x >s -1)  -> true if sign bit clear
-        if (CI->isMinValue(true))
-          return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
-                           Constant::getAllOnesValue(Op0->getType()));
-      }
-      break;
-    case ICmpInst::ICMP_UGT:
-      if (Op0Min.ugt(Op1Max))          // A >u B -> true if min(A) > max(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Max.ule(Op1Min))          // A >u B -> false if max(A) <= max(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-
-      if (Op1Max == Op0Min)            // A >u B -> A != B if min(A) == max(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Min == Op0Max-1)        // A >u C -> A == C+1 if max(a)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              Builder->getInt(CI->getValue()+1));
-
-        // (x >u 2147483647) -> (x <s 0)  -> true if sign bit set
-        if (CI->isMaxValue(true))
-          return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
-                              Constant::getNullValue(Op0->getType()));
-      }
-      break;
-    case ICmpInst::ICMP_SLT:
-      if (Op0Max.slt(Op1Min))          // A <s B -> true if max(A) < min(C)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Min.sge(Op1Max))          // A <s B -> false if min(A) >= max(C)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-      if (Op1Min == Op0Max)            // A <s B -> A != B if max(A) == min(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Max == Op0Min+1)        // A <s C -> A == C-1 if min(A)+1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              Builder->getInt(CI->getValue()-1));
-      }
-      break;
-    case ICmpInst::ICMP_SGT:
-      if (Op0Min.sgt(Op1Max))          // A >s B -> true if min(A) > max(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Max.sle(Op1Min))          // A >s B -> false if max(A) <= min(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-
-      if (Op1Max == Op0Min)            // A >s B -> A != B if min(A) == max(B)
-        return new ICmpInst(ICmpInst::ICMP_NE, Op0, Op1);
-      if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-        if (Op1Min == Op0Max-1)        // A >s C -> A == C+1 if max(A)-1 == C
-          return new ICmpInst(ICmpInst::ICMP_EQ, Op0,
-                              Builder->getInt(CI->getValue()+1));
-      }
-      break;
-    case ICmpInst::ICMP_SGE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_SGE with ConstantInt not folded!");
-      if (Op0Min.sge(Op1Max))          // A >=s B -> true if min(A) >= max(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Max.slt(Op1Min))          // A >=s B -> false if max(A) < min(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-      break;
-    case ICmpInst::ICMP_SLE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_SLE with ConstantInt not folded!");
-      if (Op0Max.sle(Op1Min))          // A <=s B -> true if max(A) <= min(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Min.sgt(Op1Max))          // A <=s B -> false if min(A) > max(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-      break;
-    case ICmpInst::ICMP_UGE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_UGE with ConstantInt not folded!");
-      if (Op0Min.uge(Op1Max))          // A >=u B -> true if min(A) >= max(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Max.ult(Op1Min))          // A >=u B -> false if max(A) < min(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-      break;
-    case ICmpInst::ICMP_ULE:
-      assert(!isa<ConstantInt>(Op1) && "ICMP_ULE with ConstantInt not folded!");
-      if (Op0Max.ule(Op1Min))          // A <=u B -> true if max(A) <= min(B)
-        return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-      if (Op0Min.ugt(Op1Max))          // A <=u B -> false if min(A) > max(B)
-        return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-      break;
-    }
+  if (Instruction *Res = foldICmpWithConstant(I))
+    return Res;
 
-    // Turn a signed comparison into an unsigned one if both operands
-    // are known to have the same sign.
-    if (I.isSigned() &&
-        ((Op0KnownZero.isNegative() && Op1KnownZero.isNegative()) ||
-         (Op0KnownOne.isNegative() && Op1KnownOne.isNegative())))
-      return new ICmpInst(I.getUnsignedPredicate(), Op0, Op1);
-  }
+  if (Instruction *Res = foldICmpUsingKnownBits(I))
+    return Res;
 
   // Test if the ICmpInst instruction is used exclusively by a select as
   // part of a minimum or maximum operation. If so, refrain from doing
@@ -3614,122 +4295,18 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
           (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
         return nullptr;
 
-  // See if we are doing a comparison between a constant and an instruction that
-  // can be folded into the comparison.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op1)) {
-    Value *A = nullptr, *B = nullptr;
-    // Since the RHS is a ConstantInt (CI), if the left hand side is an
-    // instruction, see if that instruction also has constants so that the
-    // instruction can be folded into the icmp
-    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      if (Instruction *Res = visitICmpInstWithInstAndIntCst(I, LHSI, CI))
-        return Res;
-
-    // (icmp eq/ne (udiv A, B), 0) -> (icmp ugt/ule i32 B, A)
-    if (I.isEquality() && CI->isZero() &&
-        match(Op0, m_UDiv(m_Value(A), m_Value(B)))) {
-      ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_EQ
-                                     ? ICmpInst::ICMP_UGT
-                                     : ICmpInst::ICMP_ULE;
-      return new ICmpInst(Pred, B, A);
-    }
-  }
-
-  // Handle icmp with constant (but not simple integer constant) RHS
-  if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
-    if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
-      switch (LHSI->getOpcode()) {
-      case Instruction::GetElementPtr:
-          // icmp pred GEP (P, int 0, int 0, int 0), null -> icmp pred P, null
-        if (RHSC->isNullValue() &&
-            cast<GetElementPtrInst>(LHSI)->hasAllZeroIndices())
-          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
-                  Constant::getNullValue(LHSI->getOperand(0)->getType()));
-        break;
-      case Instruction::PHI:
-        // Only fold icmp into the PHI if the phi and icmp are in the same
-        // block.  If in the same block, we're encouraging jump threading.  If
-        // not, we are just pessimizing the code by making an i1 phi.
-        if (LHSI->getParent() == I.getParent())
-          if (Instruction *NV = FoldOpIntoPhi(I))
-            return NV;
-        break;
-      case Instruction::Select: {
-        // If either operand of the select is a constant, we can fold the
-        // comparison into the select arms, which will cause one to be
-        // constant folded and the select turned into a bitwise or.
-        Value *Op1 = nullptr, *Op2 = nullptr;
-        ConstantInt *CI = nullptr;
-        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(1))) {
-          Op1 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
-          CI = dyn_cast<ConstantInt>(Op1);
-        }
-        if (Constant *C = dyn_cast<Constant>(LHSI->getOperand(2))) {
-          Op2 = ConstantExpr::getICmp(I.getPredicate(), C, RHSC);
-          CI = dyn_cast<ConstantInt>(Op2);
-        }
-
-        // We only want to perform this transformation if it will not lead to
-        // additional code. This is true if either both sides of the select
-        // fold to a constant (in which case the icmp is replaced with a select
-        // which will usually simplify) or this is the only user of the
-        // select (in which case we are trading a select+icmp for a simpler
-        // select+icmp) or all uses of the select can be replaced based on
-        // dominance information ("Global cases").
-        bool Transform = false;
-        if (Op1 && Op2)
-          Transform = true;
-        else if (Op1 || Op2) {
-          // Local case
-          if (LHSI->hasOneUse())
-            Transform = true;
-          // Global cases
-          else if (CI && !CI->isZero())
-            // When Op1 is constant try replacing select with second operand.
-            // Otherwise Op2 is constant and try replacing select with first
-            // operand.
-            Transform = replacedSelectWithOperand(cast<SelectInst>(LHSI), &I,
-                                                  Op1 ? 2 : 1);
-        }
-        if (Transform) {
-          if (!Op1)
-            Op1 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(1),
-                                      RHSC, I.getName());
-          if (!Op2)
-            Op2 = Builder->CreateICmp(I.getPredicate(), LHSI->getOperand(2),
-                                      RHSC, I.getName());
-          return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
-        }
-        break;
-      }
-      case Instruction::IntToPtr:
-        // icmp pred inttoptr(X), null -> icmp pred X, 0
-        if (RHSC->isNullValue() &&
-            DL.getIntPtrType(RHSC->getType()) == LHSI->getOperand(0)->getType())
-          return new ICmpInst(I.getPredicate(), LHSI->getOperand(0),
-                        Constant::getNullValue(LHSI->getOperand(0)->getType()));
-        break;
+  if (Instruction *Res = foldICmpInstWithConstant(I))
+    return Res;
 
-      case Instruction::Load:
-        // Try to optimize things like "A[i] > 4" to index computations.
-        if (GetElementPtrInst *GEP =
-              dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
-          if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
-            if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
-                !cast<LoadInst>(LHSI)->isVolatile())
-              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
-                return Res;
-        }
-        break;
-      }
-  }
+  if (Instruction *Res = foldICmpInstWithConstantNotInt(I))
+    return Res;
 
   // If we can optimize a 'icmp GEP, P' or 'icmp P, GEP', do so now.
   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op0))
-    if (Instruction *NI = FoldGEPICmp(GEP, Op1, I.getPredicate(), I))
+    if (Instruction *NI = foldGEPICmp(GEP, Op1, I.getPredicate(), I))
       return NI;
   if (GEPOperator *GEP = dyn_cast<GEPOperator>(Op1))
-    if (Instruction *NI = FoldGEPICmp(GEP, Op0,
+    if (Instruction *NI = foldGEPICmp(GEP, Op0,
                            ICmpInst::getSwappedPredicate(I.getPredicate()), I))
       return NI;
 
@@ -3737,10 +4314,10 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
   if (Op0->getType()->isPointerTy() && I.isEquality()) {
     assert(Op1->getType()->isPointerTy() && "Comparing pointer with non-pointer?");
     if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op0, DL)))
-      if (Instruction *New = FoldAllocaCmp(I, Alloca, Op1))
+      if (Instruction *New = foldAllocaCmp(I, Alloca, Op1))
         return New;
     if (auto *Alloca = dyn_cast<AllocaInst>(GetUnderlyingObject(Op1, DL)))
-      if (Instruction *New = FoldAllocaCmp(I, Alloca, Op0))
+      if (Instruction *New = foldAllocaCmp(I, Alloca, Op0))
         return New;
   }
 
@@ -3780,318 +4357,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     // For generality, we handle any zero-extension of any operand comparison
     // with a constant or another cast from the same type.
     if (isa<Constant>(Op1) || isa<CastInst>(Op1))
-      if (Instruction *R = visitICmpInstWithCastAndCast(I))
+      if (Instruction *R = foldICmpWithCastAndCast(I))
         return R;
   }
 
-  // Special logic for binary operators.
-  BinaryOperator *BO0 = dyn_cast<BinaryOperator>(Op0);
-  BinaryOperator *BO1 = dyn_cast<BinaryOperator>(Op1);
-  if (BO0 || BO1) {
-    CmpInst::Predicate Pred = I.getPredicate();
-    bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
-    if (BO0 && isa<OverflowingBinaryOperator>(BO0))
-      NoOp0WrapProblem = ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
-    if (BO1 && isa<OverflowingBinaryOperator>(BO1))
-      NoOp1WrapProblem = ICmpInst::isEquality(Pred) ||
-        (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
-        (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
-    // Analyze the case when either Op0 or Op1 is an add instruction.
-    // Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
-    Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
-    if (BO0 && BO0->getOpcode() == Instruction::Add) {
-      A = BO0->getOperand(0);
-      B = BO0->getOperand(1);
-    }
-    if (BO1 && BO1->getOpcode() == Instruction::Add) {
-      C = BO1->getOperand(0);
-      D = BO1->getOperand(1);
-    }
-
-    // icmp (X+cst) < 0 --> X < -cst
-    if (NoOp0WrapProblem && ICmpInst::isSigned(Pred) && match(Op1, m_Zero()))
-      if (ConstantInt *RHSC = dyn_cast_or_null<ConstantInt>(B))
-        if (!RHSC->isMinValue(/*isSigned=*/true))
-          return new ICmpInst(Pred, A, ConstantExpr::getNeg(RHSC));
-
-    // icmp (X+Y), X -> icmp Y, 0 for equalities or if there is no overflow.
-    if ((A == Op1 || B == Op1) && NoOp0WrapProblem)
-      return new ICmpInst(Pred, A == Op1 ? B : A,
-                          Constant::getNullValue(Op1->getType()));
-
-    // icmp X, (X+Y) -> icmp 0, Y for equalities or if there is no overflow.
-    if ((C == Op0 || D == Op0) && NoOp1WrapProblem)
-      return new ICmpInst(Pred, Constant::getNullValue(Op0->getType()),
-                          C == Op0 ? D : C);
-
-    // icmp (X+Y), (X+Z) -> icmp Y, Z for equalities or if there is no overflow.
-    if (A && C && (A == C || A == D || B == C || B == D) &&
-        NoOp0WrapProblem && NoOp1WrapProblem &&
-        // Try not to increase register pressure.
-        BO0->hasOneUse() && BO1->hasOneUse()) {
-      // Determine Y and Z in the form icmp (X+Y), (X+Z).
-      Value *Y, *Z;
-      if (A == C) {
-        // C + B == C + D  ->  B == D
-        Y = B;
-        Z = D;
-      } else if (A == D) {
-        // D + B == C + D  ->  B == C
-        Y = B;
-        Z = C;
-      } else if (B == C) {
-        // A + C == C + D  ->  A == D
-        Y = A;
-        Z = D;
-      } else {
-        assert(B == D);
-        // A + D == C + D  ->  A == C
-        Y = A;
-        Z = C;
-      }
-      return new ICmpInst(Pred, Y, Z);
-    }
-
-    // icmp slt (X + -1), Y -> icmp sle X, Y
-    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLT &&
-        match(B, m_AllOnes()))
-      return new ICmpInst(CmpInst::ICMP_SLE, A, Op1);
-
-    // icmp sge (X + -1), Y -> icmp sgt X, Y
-    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGE &&
-        match(B, m_AllOnes()))
-      return new ICmpInst(CmpInst::ICMP_SGT, A, Op1);
-
-    // icmp sle (X + 1), Y -> icmp slt X, Y
-    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SLE &&
-        match(B, m_One()))
-      return new ICmpInst(CmpInst::ICMP_SLT, A, Op1);
-
-    // icmp sgt (X + 1), Y -> icmp sge X, Y
-    if (A && NoOp0WrapProblem && Pred == CmpInst::ICMP_SGT &&
-        match(B, m_One()))
-      return new ICmpInst(CmpInst::ICMP_SGE, A, Op1);
-
-    // icmp sgt X, (Y + -1) -> icmp sge X, Y
-    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGT &&
-        match(D, m_AllOnes()))
-      return new ICmpInst(CmpInst::ICMP_SGE, Op0, C);
-
-    // icmp sle X, (Y + -1) -> icmp slt X, Y
-    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLE &&
-        match(D, m_AllOnes()))
-      return new ICmpInst(CmpInst::ICMP_SLT, Op0, C);
-
-    // icmp sge X, (Y + 1) -> icmp sgt X, Y
-    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SGE &&
-        match(D, m_One()))
-      return new ICmpInst(CmpInst::ICMP_SGT, Op0, C);
-
-    // icmp slt X, (Y + 1) -> icmp sle X, Y
-    if (C && NoOp1WrapProblem && Pred == CmpInst::ICMP_SLT &&
-        match(D, m_One()))
-      return new ICmpInst(CmpInst::ICMP_SLE, Op0, C);
-
-    // if C1 has greater magnitude than C2:
-    //  icmp (X + C1), (Y + C2) -> icmp (X + C3), Y
-    //  s.t. C3 = C1 - C2
-    //
-    // if C2 has greater magnitude than C1:
-    //  icmp (X + C1), (Y + C2) -> icmp X, (Y + C3)
-    //  s.t. C3 = C2 - C1
-    if (A && C && NoOp0WrapProblem && NoOp1WrapProblem &&
-        (BO0->hasOneUse() || BO1->hasOneUse()) && !I.isUnsigned())
-      if (ConstantInt *C1 = dyn_cast<ConstantInt>(B))
-        if (ConstantInt *C2 = dyn_cast<ConstantInt>(D)) {
-          const APInt &AP1 = C1->getValue();
-          const APInt &AP2 = C2->getValue();
-          if (AP1.isNegative() == AP2.isNegative()) {
-            APInt AP1Abs = C1->getValue().abs();
-            APInt AP2Abs = C2->getValue().abs();
-            if (AP1Abs.uge(AP2Abs)) {
-              ConstantInt *C3 = Builder->getInt(AP1 - AP2);
-              Value *NewAdd = Builder->CreateNSWAdd(A, C3);
-              return new ICmpInst(Pred, NewAdd, C);
-            } else {
-              ConstantInt *C3 = Builder->getInt(AP2 - AP1);
-              Value *NewAdd = Builder->CreateNSWAdd(C, C3);
-              return new ICmpInst(Pred, A, NewAdd);
-            }
-          }
-        }
-
-
-    // Analyze the case when either Op0 or Op1 is a sub instruction.
-    // Op0 = A - B (or A and B are null); Op1 = C - D (or C and D are null).
-    A = nullptr;
-    B = nullptr;
-    C = nullptr;
-    D = nullptr;
-    if (BO0 && BO0->getOpcode() == Instruction::Sub) {
-      A = BO0->getOperand(0);
-      B = BO0->getOperand(1);
-    }
-    if (BO1 && BO1->getOpcode() == Instruction::Sub) {
-      C = BO1->getOperand(0);
-      D = BO1->getOperand(1);
-    }
-
-    // icmp (X-Y), X -> icmp 0, Y for equalities or if there is no overflow.
-    if (A == Op1 && NoOp0WrapProblem)
-      return new ICmpInst(Pred, Constant::getNullValue(Op1->getType()), B);
-
-    // icmp X, (X-Y) -> icmp Y, 0 for equalities or if there is no overflow.
-    if (C == Op0 && NoOp1WrapProblem)
-      return new ICmpInst(Pred, D, Constant::getNullValue(Op0->getType()));
-
-    // icmp (Y-X), (Z-X) -> icmp Y, Z for equalities or if there is no overflow.
-    if (B && D && B == D && NoOp0WrapProblem && NoOp1WrapProblem &&
-        // Try not to increase register pressure.
-        BO0->hasOneUse() && BO1->hasOneUse())
-      return new ICmpInst(Pred, A, C);
-
-    // icmp (X-Y), (X-Z) -> icmp Z, Y for equalities or if there is no overflow.
-    if (A && C && A == C && NoOp0WrapProblem && NoOp1WrapProblem &&
-        // Try not to increase register pressure.
-        BO0->hasOneUse() && BO1->hasOneUse())
-      return new ICmpInst(Pred, D, B);
-
-    // icmp (0-X) < cst --> x > -cst
-    if (NoOp0WrapProblem && ICmpInst::isSigned(Pred)) {
-      Value *X;
-      if (match(BO0, m_Neg(m_Value(X))))
-        if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
-          if (!RHSC->isMinValue(/*isSigned=*/true))
-            return new ICmpInst(I.getSwappedPredicate(), X,
-                                ConstantExpr::getNeg(RHSC));
-    }
-
-    BinaryOperator *SRem = nullptr;
-    // icmp (srem X, Y), Y
-    if (BO0 && BO0->getOpcode() == Instruction::SRem &&
-        Op1 == BO0->getOperand(1))
-      SRem = BO0;
-    // icmp Y, (srem X, Y)
-    else if (BO1 && BO1->getOpcode() == Instruction::SRem &&
-             Op0 == BO1->getOperand(1))
-      SRem = BO1;
-    if (SRem) {
-      // We don't check hasOneUse to avoid increasing register pressure because
-      // the value we use is the same value this instruction was already using.
-      switch (SRem == BO0 ? ICmpInst::getSwappedPredicate(Pred) : Pred) {
-        default: break;
-        case ICmpInst::ICMP_EQ:
-          return replaceInstUsesWith(I, ConstantInt::getFalse(I.getType()));
-        case ICmpInst::ICMP_NE:
-          return replaceInstUsesWith(I, ConstantInt::getTrue(I.getType()));
-        case ICmpInst::ICMP_SGT:
-        case ICmpInst::ICMP_SGE:
-          return new ICmpInst(ICmpInst::ICMP_SGT, SRem->getOperand(1),
-                              Constant::getAllOnesValue(SRem->getType()));
-        case ICmpInst::ICMP_SLT:
-        case ICmpInst::ICMP_SLE:
-          return new ICmpInst(ICmpInst::ICMP_SLT, SRem->getOperand(1),
-                              Constant::getNullValue(SRem->getType()));
-      }
-    }
-
-    if (BO0 && BO1 && BO0->getOpcode() == BO1->getOpcode() &&
-        BO0->hasOneUse() && BO1->hasOneUse() &&
-        BO0->getOperand(1) == BO1->getOperand(1)) {
-      switch (BO0->getOpcode()) {
-      default: break;
-      case Instruction::Add:
-      case Instruction::Sub:
-      case Instruction::Xor:
-        if (I.isEquality())    // a+x icmp eq/ne b+x --> a icmp b
-          return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
-                              BO1->getOperand(0));
-        // icmp u/s (a ^ signbit), (b ^ signbit) --> icmp s/u a, b
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
-          if (CI->getValue().isSignBit()) {
-            ICmpInst::Predicate Pred = I.isSigned()
-                                           ? I.getUnsignedPredicate()
-                                           : I.getSignedPredicate();
-            return new ICmpInst(Pred, BO0->getOperand(0),
-                                BO1->getOperand(0));
-          }
-
-          if (BO0->getOpcode() == Instruction::Xor && CI->isMaxValue(true)) {
-            ICmpInst::Predicate Pred = I.isSigned()
-                                           ? I.getUnsignedPredicate()
-                                           : I.getSignedPredicate();
-            Pred = I.getSwappedPredicate(Pred);
-            return new ICmpInst(Pred, BO0->getOperand(0),
-                                BO1->getOperand(0));
-          }
-        }
-        break;
-      case Instruction::Mul:
-        if (!I.isEquality())
-          break;
-
-        if (ConstantInt *CI = dyn_cast<ConstantInt>(BO0->getOperand(1))) {
-          // a * Cst icmp eq/ne b * Cst --> a & Mask icmp b & Mask
-          // Mask = -1 >> count-trailing-zeros(Cst).
-          if (!CI->isZero() && !CI->isOne()) {
-            const APInt &AP = CI->getValue();
-            ConstantInt *Mask = ConstantInt::get(I.getContext(),
-                                    APInt::getLowBitsSet(AP.getBitWidth(),
-                                                         AP.getBitWidth() -
-                                                    AP.countTrailingZeros()));
-            Value *And1 = Builder->CreateAnd(BO0->getOperand(0), Mask);
-            Value *And2 = Builder->CreateAnd(BO1->getOperand(0), Mask);
-            return new ICmpInst(I.getPredicate(), And1, And2);
-          }
-        }
-        break;
-      case Instruction::UDiv:
-      case Instruction::LShr:
-        if (I.isSigned())
-          break;
-        // fall-through
-      case Instruction::SDiv:
-      case Instruction::AShr:
-        if (!BO0->isExact() || !BO1->isExact())
-          break;
-        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
-                            BO1->getOperand(0));
-      case Instruction::Shl: {
-        bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
-        bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
-        if (!NUW && !NSW)
-          break;
-        if (!NSW && I.isSigned())
-          break;
-        return new ICmpInst(I.getPredicate(), BO0->getOperand(0),
-                            BO1->getOperand(0));
-      }
-      }
-    }
-
-    if (BO0) {
-      // Transform  A & (L - 1) `ult` L --> L != 0
-      auto LSubOne = m_Add(m_Specific(Op1), m_AllOnes());
-      auto BitwiseAnd =
-          m_CombineOr(m_And(m_Value(), LSubOne), m_And(LSubOne, m_Value()));
+  if (Instruction *Res = foldICmpBinOp(I))
+    return Res;
 
-      if (match(BO0, BitwiseAnd) && I.getPredicate() == ICmpInst::ICMP_ULT) {
-        auto *Zero = Constant::getNullValue(BO0->getType());
-        return new ICmpInst(ICmpInst::ICMP_NE, Op1, Zero);
-      }
-    }
-  }
+  if (Instruction *Res = foldICmpWithMinMax(I))
+    return Res;
 
-  { Value *A, *B;
+  {
+    Value *A, *B;
     // Transform (A & ~B) == 0 --> (A & B) != 0
     // and       (A & ~B) != 0 --> (A & B) == 0
     // if A is a power of 2.
     if (match(Op0, m_And(m_Value(A), m_Not(m_Value(B)))) &&
         match(Op1, m_Zero()) &&
-        isKnownToBeAPowerOfTwo(A, DL, false, 0, AC, &I, DT) && I.isEquality())
+        isKnownToBeAPowerOfTwo(A, DL, false, 0, &AC, &I, &DT) && I.isEquality())
       return new ICmpInst(I.getInversePredicate(),
                           Builder->CreateAnd(A, B),
                           Op1);
@@ -4120,149 +4403,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
 
     // (zext a) * (zext b)  --> llvm.umul.with.overflow.
     if (match(Op0, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
-      if (Instruction *R = ProcessUMulZExtIdiom(I, Op0, Op1, *this))
+      if (Instruction *R = processUMulZExtIdiom(I, Op0, Op1, *this))
         return R;
     }
     if (match(Op1, m_Mul(m_ZExt(m_Value(A)), m_ZExt(m_Value(B))))) {
-      if (Instruction *R = ProcessUMulZExtIdiom(I, Op1, Op0, *this))
+      if (Instruction *R = processUMulZExtIdiom(I, Op1, Op0, *this))
         return R;
     }
   }
 
-  if (I.isEquality()) {
-    Value *A, *B, *C, *D;
-
-    if (match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
-      if (A == Op1 || B == Op1) {    // (A^B) == A  ->  B == 0
-        Value *OtherVal = A == Op1 ? B : A;
-        return new ICmpInst(I.getPredicate(), OtherVal,
-                            Constant::getNullValue(A->getType()));
-      }
-
-      if (match(Op1, m_Xor(m_Value(C), m_Value(D)))) {
-        // A^c1 == C^c2 --> A == C^(c1^c2)
-        ConstantInt *C1, *C2;
-        if (match(B, m_ConstantInt(C1)) &&
-            match(D, m_ConstantInt(C2)) && Op1->hasOneUse()) {
-          Constant *NC = Builder->getInt(C1->getValue() ^ C2->getValue());
-          Value *Xor = Builder->CreateXor(C, NC);
-          return new ICmpInst(I.getPredicate(), A, Xor);
-        }
-
-        // A^B == A^D -> B == D
-        if (A == C) return new ICmpInst(I.getPredicate(), B, D);
-        if (A == D) return new ICmpInst(I.getPredicate(), B, C);
-        if (B == C) return new ICmpInst(I.getPredicate(), A, D);
-        if (B == D) return new ICmpInst(I.getPredicate(), A, C);
-      }
-    }
-
-    if (match(Op1, m_Xor(m_Value(A), m_Value(B))) &&
-        (A == Op0 || B == Op0)) {
-      // A == (A^B)  ->  B == 0
-      Value *OtherVal = A == Op0 ? B : A;
-      return new ICmpInst(I.getPredicate(), OtherVal,
-                          Constant::getNullValue(A->getType()));
-    }
-
-    // (X&Z) == (Y&Z) -> (X^Y) & Z == 0
-    if (match(Op0, m_OneUse(m_And(m_Value(A), m_Value(B)))) &&
-        match(Op1, m_OneUse(m_And(m_Value(C), m_Value(D))))) {
-      Value *X = nullptr, *Y = nullptr, *Z = nullptr;
-
-      if (A == C) {
-        X = B; Y = D; Z = A;
-      } else if (A == D) {
-        X = B; Y = C; Z = A;
-      } else if (B == C) {
-        X = A; Y = D; Z = B;
-      } else if (B == D) {
-        X = A; Y = C; Z = B;
-      }
-
-      if (X) {   // Build (X^Y) & Z
-        Op1 = Builder->CreateXor(X, Y);
-        Op1 = Builder->CreateAnd(Op1, Z);
-        I.setOperand(0, Op1);
-        I.setOperand(1, Constant::getNullValue(Op1->getType()));
-        return &I;
-      }
-    }
-
-    // Transform (zext A) == (B & (1<<X)-1) --> A == (trunc B)
-    // and       (B & (1<<X)-1) == (zext A) --> A == (trunc B)
-    ConstantInt *Cst1;
-    if ((Op0->hasOneUse() &&
-         match(Op0, m_ZExt(m_Value(A))) &&
-         match(Op1, m_And(m_Value(B), m_ConstantInt(Cst1)))) ||
-        (Op1->hasOneUse() &&
-         match(Op0, m_And(m_Value(B), m_ConstantInt(Cst1))) &&
-         match(Op1, m_ZExt(m_Value(A))))) {
-      APInt Pow2 = Cst1->getValue() + 1;
-      if (Pow2.isPowerOf2() && isa<IntegerType>(A->getType()) &&
-          Pow2.logBase2() == cast<IntegerType>(A->getType())->getBitWidth())
-        return new ICmpInst(I.getPredicate(), A,
-                            Builder->CreateTrunc(B, A->getType()));
-    }
-
-    // (A >> C) == (B >> C) --> (A^B) u< (1 << C)
-    // For lshr and ashr pairs.
-    if ((match(Op0, m_OneUse(m_LShr(m_Value(A), m_ConstantInt(Cst1)))) &&
-         match(Op1, m_OneUse(m_LShr(m_Value(B), m_Specific(Cst1))))) ||
-        (match(Op0, m_OneUse(m_AShr(m_Value(A), m_ConstantInt(Cst1)))) &&
-         match(Op1, m_OneUse(m_AShr(m_Value(B), m_Specific(Cst1)))))) {
-      unsigned TypeBits = Cst1->getBitWidth();
-      unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
-      if (ShAmt < TypeBits && ShAmt != 0) {
-        ICmpInst::Predicate Pred = I.getPredicate() == ICmpInst::ICMP_NE
-                                       ? ICmpInst::ICMP_UGE
-                                       : ICmpInst::ICMP_ULT;
-        Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
-        APInt CmpVal = APInt::getOneBitSet(TypeBits, ShAmt);
-        return new ICmpInst(Pred, Xor, Builder->getInt(CmpVal));
-      }
-    }
-
-    // (A << C) == (B << C) --> ((A^B) & (~0U >> C)) == 0
-    if (match(Op0, m_OneUse(m_Shl(m_Value(A), m_ConstantInt(Cst1)))) &&
-        match(Op1, m_OneUse(m_Shl(m_Value(B), m_Specific(Cst1))))) {
-      unsigned TypeBits = Cst1->getBitWidth();
-      unsigned ShAmt = (unsigned)Cst1->getLimitedValue(TypeBits);
-      if (ShAmt < TypeBits && ShAmt != 0) {
-        Value *Xor = Builder->CreateXor(A, B, I.getName() + ".unshifted");
-        APInt AndVal = APInt::getLowBitsSet(TypeBits, TypeBits - ShAmt);
-        Value *And = Builder->CreateAnd(Xor, Builder->getInt(AndVal),
-                                        I.getName() + ".mask");
-        return new ICmpInst(I.getPredicate(), And,
-                            Constant::getNullValue(Cst1->getType()));
-      }
-    }
-
-    // Transform "icmp eq (trunc (lshr(X, cst1)), cst" to
-    // "icmp (and X, mask), cst"
-    uint64_t ShAmt = 0;
-    if (Op0->hasOneUse() &&
-        match(Op0, m_Trunc(m_OneUse(m_LShr(m_Value(A),
-                                           m_ConstantInt(ShAmt))))) &&
-        match(Op1, m_ConstantInt(Cst1)) &&
-        // Only do this when A has multiple uses.  This is most important to do
-        // when it exposes other optimizations.
-        !A->hasOneUse()) {
-      unsigned ASize =cast<IntegerType>(A->getType())->getPrimitiveSizeInBits();
-
-      if (ShAmt < ASize) {
-        APInt MaskV =
-          APInt::getLowBitsSet(ASize, Op0->getType()->getPrimitiveSizeInBits());
-        MaskV <<= ShAmt;
-
-        APInt CmpV = Cst1->getValue().zext(ASize);
-        CmpV <<= ShAmt;
-
-        Value *Mask = Builder->CreateAnd(A, Builder->getInt(MaskV));
-        return new ICmpInst(I.getPredicate(), Mask, Builder->getInt(CmpV));
-      }
-    }
-  }
+  if (Instruction *Res = foldICmpEquality(I))
+    return Res;
 
   // The 'cmpxchg' instruction returns an aggregate containing the old value and
   // an i1 which indicates whether or not we successfully did the swap.
@@ -4284,18 +4435,17 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     Value *X; ConstantInt *Cst;
     // icmp X+Cst, X
     if (match(Op0, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op1 == X)
-      return FoldICmpAddOpCst(I, X, Cst, I.getPredicate());
+      return foldICmpAddOpConst(I, X, Cst, I.getPredicate());
 
     // icmp X, X+Cst
     if (match(Op1, m_Add(m_Value(X), m_ConstantInt(Cst))) && Op0 == X)
-      return FoldICmpAddOpCst(I, X, Cst, I.getSwappedPredicate());
+      return foldICmpAddOpConst(I, X, Cst, I.getSwappedPredicate());
   }
   return Changed ? &I : nullptr;
 }
 
 /// Fold fcmp ([us]itofp x, cst) if possible.
-Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
-                                                Instruction *LHSI,
+Instruction *InstCombiner::foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
                                                 Constant *RHSC) {
   if (!isa<ConstantFP>(RHSC)) return nullptr;
   const APFloat &RHS = cast<ConstantFP>(RHSC)->getValueAPF();
@@ -4339,21 +4489,21 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
   // This would allow us to handle (fptosi (x >>s 62) to float) if x is i64 f.e.
   unsigned InputSize = IntTy->getScalarSizeInBits();
 
-  // Following test does NOT adjust InputSize downwards for signed inputs, 
-  // because the most negative value still requires all the mantissa bits 
+  // Following test does NOT adjust InputSize downwards for signed inputs,
+  // because the most negative value still requires all the mantissa bits
   // to distinguish it from one less than that value.
   if ((int)InputSize > MantissaWidth) {
     // Conversion would lose accuracy. Check if loss can impact comparison.
     int Exp = ilogb(RHS);
     if (Exp == APFloat::IEK_Inf) {
       int MaxExponent = ilogb(APFloat::getLargest(RHS.getSemantics()));
-      if (MaxExponent < (int)InputSize - !LHSUnsigned) 
+      if (MaxExponent < (int)InputSize - !LHSUnsigned)
         // Conversion could create infinity.
         return nullptr;
     } else {
-      // Note that if RHS is zero or NaN, then Exp is negative 
+      // Note that if RHS is zero or NaN, then Exp is negative
       // and first condition is trivially false.
-      if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned) 
+      if (MantissaWidth <= Exp && Exp <= (int)InputSize - !LHSUnsigned)
         // Conversion could affect comparison.
         return nullptr;
     }
@@ -4547,7 +4697,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
 
   if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1,
-                                  I.getFastMathFlags(), DL, TLI, DT, AC, &I))
+                                  I.getFastMathFlags(), DL, &TLI, &DT, &AC, &I))
     return replaceInstUsesWith(I, V);
 
   // Simplify 'fcmp pred X, X'
@@ -4601,17 +4751,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         const fltSemantics *Sem;
         // FIXME: This shouldn't be here.
         if (LHSExt->getSrcTy()->isHalfTy())
-          Sem = &APFloat::IEEEhalf;
+          Sem = &APFloat::IEEEhalf();
         else if (LHSExt->getSrcTy()->isFloatTy())
-          Sem = &APFloat::IEEEsingle;
+          Sem = &APFloat::IEEEsingle();
         else if (LHSExt->getSrcTy()->isDoubleTy())
-          Sem = &APFloat::IEEEdouble;
+          Sem = &APFloat::IEEEdouble();
         else if (LHSExt->getSrcTy()->isFP128Ty())
-          Sem = &APFloat::IEEEquad;
+          Sem = &APFloat::IEEEquad();
         else if (LHSExt->getSrcTy()->isX86_FP80Ty())
-          Sem = &APFloat::x87DoubleExtended;
+          Sem = &APFloat::x87DoubleExtended();
         else if (LHSExt->getSrcTy()->isPPC_FP128Ty())
-          Sem = &APFloat::PPCDoubleDouble;
+          Sem = &APFloat::PPCDoubleDouble();
         else
           break;
 
@@ -4641,7 +4791,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
         break;
       case Instruction::SIToFP:
       case Instruction::UIToFP:
-        if (Instruction *NV = FoldFCmp_IntToFP_Cst(I, LHSI, RHSC))
+        if (Instruction *NV = foldFCmpIntToFPConst(I, LHSI, RHSC))
           return NV;
         break;
       case Instruction::FSub: {
@@ -4658,7 +4808,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
           if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
             if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
                 !cast<LoadInst>(LHSI)->isVolatile())
-              if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+              if (Instruction *Res = foldCmpLoadFromIndexedGlobal(GEP, GV, I))
                 return Res;
         }
         break;
@@ -4667,7 +4817,7 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
           break;
 
         CallInst *CI = cast<CallInst>(LHSI);
-        Intrinsic::ID IID = getIntrinsicForCallSite(CI, TLI);
+        Intrinsic::ID IID = getIntrinsicForCallSite(CI, &TLI);
         if (IID != Intrinsic::fabs)
           break;
 
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index aa421ff594fb..3cefe715e567 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -84,6 +84,24 @@ static inline bool IsFreeToInvert(Value *V, bool WillInvertAllUses) {
   if (isa<ConstantInt>(V))
     return true;
 
+  // A vector of constant integers can be inverted easily.
+  Constant *CV;
+  if (V->getType()->isVectorTy() && match(V, PatternMatch::m_Constant(CV))) {
+    unsigned NumElts = V->getType()->getVectorNumElements();
+    for (unsigned i = 0; i != NumElts; ++i) {
+      Constant *Elt = CV->getAggregateElement(i);
+      if (!Elt)
+        return false;
+
+      if (isa<UndefValue>(Elt))
+        continue;
+
+      if (!isa<ConstantInt>(Elt))
+        return false;
+    }
+    return true;
+  }
+
   // Compares can be inverted if all of their uses are being modified to use the
   // ~V.
   if (isa<CmpInst>(V))
@@ -135,33 +153,10 @@ IntrinsicIDToOverflowCheckFlavor(unsigned ID) {
   }
 }
 
-/// \brief An IRBuilder inserter that adds new instructions to the instcombine
-/// worklist.
-class LLVM_LIBRARY_VISIBILITY InstCombineIRInserter
-    : public IRBuilderDefaultInserter {
-  InstCombineWorklist &Worklist;
-  AssumptionCache *AC;
-
-public:
-  InstCombineIRInserter(InstCombineWorklist &WL, AssumptionCache *AC)
-      : Worklist(WL), AC(AC) {}
-
-  void InsertHelper(Instruction *I, const Twine &Name, BasicBlock *BB,
-                    BasicBlock::iterator InsertPt) const {
-    IRBuilderDefaultInserter::InsertHelper(I, Name, BB, InsertPt);
-    Worklist.Add(I);
-
-    using namespace llvm::PatternMatch;
-    if (match(I, m_Intrinsic<Intrinsic::assume>()))
-      AC->registerAssumption(cast<CallInst>(I));
-  }
-};
-
 /// \brief The core instruction combiner logic.
 ///
 /// This class provides both the logic to recursively visit instructions and
-/// combine them, as well as the pass infrastructure for running this as part
-/// of the LLVM pass pipeline.
+/// combine them.
 class LLVM_LIBRARY_VISIBILITY InstCombiner
     : public InstVisitor<InstCombiner, Instruction *> {
   // FIXME: These members shouldn't be public.
@@ -171,7 +166,7 @@ public:
 
   /// \brief An IRBuilder that automatically inserts new instructions into the
   /// worklist.
-  typedef IRBuilder<TargetFolder, InstCombineIRInserter> BuilderTy;
+  typedef IRBuilder<TargetFolder, IRBuilderCallbackInserter> BuilderTy;
   BuilderTy *Builder;
 
 private:
@@ -183,10 +178,9 @@ private:
   AliasAnalysis *AA;
 
   // Required analyses.
-  // FIXME: These can never be null and should be references.
-  AssumptionCache *AC;
-  TargetLibraryInfo *TLI;
-  DominatorTree *DT;
+  AssumptionCache &AC;
+  TargetLibraryInfo &TLI;
+  DominatorTree &DT;
   const DataLayout &DL;
 
   // Optional analyses. When non-null, these can both be used to do better
@@ -198,8 +192,8 @@ private:
 public:
   InstCombiner(InstCombineWorklist &Worklist, BuilderTy *Builder,
                bool MinimizeSize, bool ExpensiveCombines, AliasAnalysis *AA,
-               AssumptionCache *AC, TargetLibraryInfo *TLI,
-               DominatorTree *DT, const DataLayout &DL, LoopInfo *LI)
+               AssumptionCache &AC, TargetLibraryInfo &TLI,
+               DominatorTree &DT, const DataLayout &DL, LoopInfo *LI)
       : Worklist(Worklist), Builder(Builder), MinimizeSize(MinimizeSize),
         ExpensiveCombines(ExpensiveCombines), AA(AA), AC(AC), TLI(TLI), DT(DT),
         DL(DL), LI(LI), MadeIRChange(false) {}
@@ -209,15 +203,15 @@ public:
   /// \returns true if the IR is changed.
   bool run();
 
-  AssumptionCache *getAssumptionCache() const { return AC; }
+  AssumptionCache &getAssumptionCache() const { return AC; }
 
   const DataLayout &getDataLayout() const { return DL; }
 
-  DominatorTree *getDominatorTree() const { return DT; }
+  DominatorTree &getDominatorTree() const { return DT; }
 
   LoopInfo *getLoopInfo() const { return LI; }
 
-  TargetLibraryInfo *getTargetLibraryInfo() const { return TLI; }
+  TargetLibraryInfo &getTargetLibraryInfo() const { return TLI; }
 
   // Visitation implementation - Implement instruction combining for different
   // instruction types.  The semantics are as follows:
@@ -262,29 +256,8 @@ public:
   Instruction *visitAShr(BinaryOperator &I);
   Instruction *visitLShr(BinaryOperator &I);
   Instruction *commonShiftTransforms(BinaryOperator &I);
-  Instruction *FoldFCmp_IntToFP_Cst(FCmpInst &I, Instruction *LHSI,
-                                    Constant *RHSC);
-  Instruction *FoldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
-                                            GlobalVariable *GV, CmpInst &ICI,
-                                            ConstantInt *AndCst = nullptr);
   Instruction *visitFCmpInst(FCmpInst &I);
   Instruction *visitICmpInst(ICmpInst &I);
-  Instruction *visitICmpInstWithCastAndCast(ICmpInst &ICI);
-  Instruction *visitICmpInstWithInstAndIntCst(ICmpInst &ICI, Instruction *LHS,
-                                              ConstantInt *RHS);
-  Instruction *FoldICmpDivCst(ICmpInst &ICI, BinaryOperator *DivI,
-                              ConstantInt *DivRHS);
-  Instruction *FoldICmpShrCst(ICmpInst &ICI, BinaryOperator *DivI,
-                              ConstantInt *DivRHS);
-  Instruction *FoldICmpCstShrCst(ICmpInst &I, Value *Op, Value *A,
-                                 ConstantInt *CI1, ConstantInt *CI2);
-  Instruction *FoldICmpCstShlCst(ICmpInst &I, Value *Op, Value *A,
-                                 ConstantInt *CI1, ConstantInt *CI2);
-  Instruction *FoldICmpAddOpCst(Instruction &ICI, Value *X, ConstantInt *CI,
-                                ICmpInst::Predicate Pred);
-  Instruction *FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
-                           ICmpInst::Predicate Cond, Instruction &I);
-  Instruction *FoldAllocaCmp(ICmpInst &ICI, AllocaInst *Alloca, Value *Other);
   Instruction *FoldShiftByConstant(Value *Op0, Constant *Op1,
                                    BinaryOperator &I);
   Instruction *commonCastTransforms(CastInst &CI);
@@ -302,14 +275,8 @@ public:
   Instruction *visitIntToPtr(IntToPtrInst &CI);
   Instruction *visitBitCast(BitCastInst &CI);
   Instruction *visitAddrSpaceCast(AddrSpaceCastInst &CI);
-  Instruction *FoldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI);
-  Instruction *FoldSelectIntoOp(SelectInst &SI, Value *, Value *);
-  Instruction *FoldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
-                            Value *A, Value *B, Instruction &Outer,
-                            SelectPatternFlavor SPF2, Value *C);
   Instruction *FoldItoFPtoI(Instruction &FI);
   Instruction *visitSelectInst(SelectInst &SI);
-  Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
   Instruction *visitCallInst(CallInst &CI);
   Instruction *visitInvokeInst(InvokeInst &II);
 
@@ -333,16 +300,16 @@ public:
   Instruction *visitVAStartInst(VAStartInst &I);
   Instruction *visitVACopyInst(VACopyInst &I);
 
-  // visitInstruction - Specify what to return for unhandled instructions...
+  /// Specify what to return for unhandled instructions.
   Instruction *visitInstruction(Instruction &I) { return nullptr; }
 
-  // True when DB dominates all uses of DI execpt UI.
-  // UI must be in the same block as DI.
-  // The routine checks that the DI parent and DB are different.
+  /// True when DB dominates all uses of DI except UI.
+  /// UI must be in the same block as DI.
+  /// The routine checks that the DI parent and DB are different.
   bool dominatesAllUses(const Instruction *DI, const Instruction *UI,
                         const BasicBlock *DB) const;
 
-  // Replace select with select operand SIOpd in SI-ICmp sequence when possible
+  /// Try to replace select with select operand SIOpd in SI-ICmp sequence.
   bool replacedSelectWithOperand(SelectInst *SI, const ICmpInst *Icmp,
                                  const unsigned SIOpd);
 
@@ -355,14 +322,16 @@ private:
                             SmallVectorImpl<Value *> &NewIndices);
   Instruction *FoldOpIntoSelect(Instruction &Op, SelectInst *SI);
 
-  /// \brief Classify whether a cast is worth optimizing.
+  /// Classify whether a cast is worth optimizing.
   ///
-  /// Returns true if the cast from "V to Ty" actually results in any code
-  /// being generated and is interesting to optimize out. If the cast can be
-  /// eliminated by some other simple transformation, we prefer to do the
-  /// simplification first.
-  bool ShouldOptimizeCast(Instruction::CastOps opcode, const Value *V,
-                          Type *Ty);
+  /// This is a helper to decide whether the simplification of
+  /// logic(cast(A), cast(B)) to cast(logic(A, B)) should be performed.
+  ///
+  /// \param CI The cast we are interested in.
+  ///
+  /// \return true if this cast actually results in any code being generated and
+  /// if it cannot already be eliminated by some other transformation.
+  bool shouldOptimizeCast(CastInst *CI);
 
   /// \brief Try to optimize a sequence of instructions checking if an operation
   /// on LHS and RHS overflows.
@@ -385,8 +354,22 @@ private:
   bool transformConstExprCastCall(CallSite CS);
   Instruction *transformCallThroughTrampoline(CallSite CS,
                                               IntrinsicInst *Tramp);
-  Instruction *transformZExtICmp(ICmpInst *ICI, Instruction &CI,
-                                 bool DoXform = true);
+
+  /// Transform (zext icmp) to bitwise / integer operations in order to
+  /// eliminate it.
+  ///
+  /// \param ICI The icmp of the (zext icmp) pair we are interested in.
+  /// \parem CI The zext of the (zext icmp) pair we are interested in.
+  /// \param DoTransform Pass false to just test whether the given (zext icmp)
+  /// would be transformed. Pass true to actually perform the transformation.
+  ///
+  /// \return null if the transformation cannot be performed. If the
+  /// transformation can be performed the new instruction that replaces the
+  /// (zext icmp) pair will be returned (if \p DoTransform is false the
+  /// unmodified \p ICI will be returned in this case).
+  Instruction *transformZExtICmp(ICmpInst *ICI, ZExtInst &CI,
+                                 bool DoTransform = true);
+
   Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI);
   bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS, Instruction &CxtI);
   bool WillNotOverflowSignedSub(Value *LHS, Value *RHS, Instruction &CxtI);
@@ -396,6 +379,21 @@ private:
   Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
   Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
   Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
+  Instruction *shrinkBitwiseLogic(TruncInst &Trunc);
+  Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
+
+  /// Determine if a pair of casts can be replaced by a single cast.
+  ///
+  /// \param CI1 The first of a pair of casts.
+  /// \param CI2 The second of a pair of casts.
+  ///
+  /// \return 0 if the cast pair cannot be eliminated, otherwise returns an
+  /// Instruction::CastOps value for a cast that can replace the pair, casting
+  /// CI1->getSrcTy() to CI2->getDstTy().
+  ///
+  /// \see CastInst::isEliminableCastPair
+  Instruction::CastOps isEliminableCastPair(const CastInst *CI1,
+                                            const CastInst *CI2);
 
 public:
   /// \brief Inserts an instruction \p New before instruction \p Old
@@ -476,30 +474,30 @@ public:
 
   void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
                         unsigned Depth, Instruction *CxtI) const {
-    return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
-                                  DT);
+    return llvm::computeKnownBits(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI,
+                                  &DT);
   }
 
   bool MaskedValueIsZero(Value *V, const APInt &Mask, unsigned Depth = 0,
                          Instruction *CxtI = nullptr) const {
-    return llvm::MaskedValueIsZero(V, Mask, DL, Depth, AC, CxtI, DT);
+    return llvm::MaskedValueIsZero(V, Mask, DL, Depth, &AC, CxtI, &DT);
   }
   unsigned ComputeNumSignBits(Value *Op, unsigned Depth = 0,
                               Instruction *CxtI = nullptr) const {
-    return llvm::ComputeNumSignBits(Op, DL, Depth, AC, CxtI, DT);
+    return llvm::ComputeNumSignBits(Op, DL, Depth, &AC, CxtI, &DT);
   }
   void ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne,
                       unsigned Depth = 0, Instruction *CxtI = nullptr) const {
-    return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, AC, CxtI,
-                                DT);
+    return llvm::ComputeSignBit(V, KnownZero, KnownOne, DL, Depth, &AC, CxtI,
+                                &DT);
   }
   OverflowResult computeOverflowForUnsignedMul(Value *LHS, Value *RHS,
                                                const Instruction *CxtI) {
-    return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, AC, CxtI, DT);
+    return llvm::computeOverflowForUnsignedMul(LHS, RHS, DL, &AC, CxtI, &DT);
   }
   OverflowResult computeOverflowForUnsignedAdd(Value *LHS, Value *RHS,
                                                const Instruction *CxtI) {
-    return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, AC, CxtI, DT);
+    return llvm::computeOverflowForUnsignedAdd(LHS, RHS, DL, &AC, CxtI, &DT);
   }
 
 private:
@@ -554,13 +552,82 @@ private:
   Instruction *FoldPHIArgLoadIntoPHI(PHINode &PN);
   Instruction *FoldPHIArgZextsIntoPHI(PHINode &PN);
 
+  /// Helper function for FoldPHIArgXIntoPHI() to get debug location for the
+  /// folded operation.
+  DebugLoc PHIArgMergedDebugLoc(PHINode &PN);
+
+  Instruction *foldGEPICmp(GEPOperator *GEPLHS, Value *RHS,
+                           ICmpInst::Predicate Cond, Instruction &I);
+  Instruction *foldAllocaCmp(ICmpInst &ICI, const AllocaInst *Alloca,
+                             const Value *Other);
+  Instruction *foldCmpLoadFromIndexedGlobal(GetElementPtrInst *GEP,
+                                            GlobalVariable *GV, CmpInst &ICI,
+                                            ConstantInt *AndCst = nullptr);
+  Instruction *foldFCmpIntToFPConst(FCmpInst &I, Instruction *LHSI,
+                                    Constant *RHSC);
+  Instruction *foldICmpAddOpConst(Instruction &ICI, Value *X, ConstantInt *CI,
+                                  ICmpInst::Predicate Pred);
+  Instruction *foldICmpWithCastAndCast(ICmpInst &ICI);
+
+  Instruction *foldICmpUsingKnownBits(ICmpInst &Cmp);
+  Instruction *foldICmpWithConstant(ICmpInst &Cmp);
+  Instruction *foldICmpInstWithConstant(ICmpInst &Cmp);
+  Instruction *foldICmpInstWithConstantNotInt(ICmpInst &Cmp);
+  Instruction *foldICmpBinOp(ICmpInst &Cmp);
+  Instruction *foldICmpEquality(ICmpInst &Cmp);
+
+  Instruction *foldICmpTruncConstant(ICmpInst &Cmp, Instruction *Trunc,
+                                     const APInt *C);
+  Instruction *foldICmpAndConstant(ICmpInst &Cmp, BinaryOperator *And,
+                                   const APInt *C);
+  Instruction *foldICmpXorConstant(ICmpInst &Cmp, BinaryOperator *Xor,
+                                   const APInt *C);
+  Instruction *foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or,
+                                  const APInt *C);
+  Instruction *foldICmpMulConstant(ICmpInst &Cmp, BinaryOperator *Mul,
+                                   const APInt *C);
+  Instruction *foldICmpShlConstant(ICmpInst &Cmp, BinaryOperator *Shl,
+                                   const APInt *C);
+  Instruction *foldICmpShrConstant(ICmpInst &Cmp, BinaryOperator *Shr,
+                                   const APInt *C);
+  Instruction *foldICmpUDivConstant(ICmpInst &Cmp, BinaryOperator *UDiv,
+                                    const APInt *C);
+  Instruction *foldICmpDivConstant(ICmpInst &Cmp, BinaryOperator *Div,
+                                   const APInt *C);
+  Instruction *foldICmpSubConstant(ICmpInst &Cmp, BinaryOperator *Sub,
+                                   const APInt *C);
+  Instruction *foldICmpAddConstant(ICmpInst &Cmp, BinaryOperator *Add,
+                                   const APInt *C);
+  Instruction *foldICmpAndConstConst(ICmpInst &Cmp, BinaryOperator *And,
+                                     const APInt *C1);
+  Instruction *foldICmpAndShift(ICmpInst &Cmp, BinaryOperator *And,
+                                const APInt *C1, const APInt *C2);
+  Instruction *foldICmpShrConstConst(ICmpInst &I, Value *ShAmt, const APInt &C1,
+                                     const APInt &C2);
+  Instruction *foldICmpShlConstConst(ICmpInst &I, Value *ShAmt, const APInt &C1,
+                                     const APInt &C2);
+
+  Instruction *foldICmpBinOpEqualityWithConstant(ICmpInst &Cmp,
+                                                 BinaryOperator *BO,
+                                                 const APInt *C);
+  Instruction *foldICmpIntrinsicWithConstant(ICmpInst &ICI, const APInt *C);
+
+  // Helpers of visitSelectInst().
+  Instruction *foldSelectExtConst(SelectInst &Sel);
+  Instruction *foldSelectOpOp(SelectInst &SI, Instruction *TI, Instruction *FI);
+  Instruction *foldSelectIntoOp(SelectInst &SI, Value *, Value *);
+  Instruction *foldSPFofSPF(Instruction *Inner, SelectPatternFlavor SPF1,
+                            Value *A, Value *B, Instruction &Outer,
+                            SelectPatternFlavor SPF2, Value *C);
+  Instruction *foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
+
   Instruction *OptAndOp(Instruction *Op, ConstantInt *OpRHS,
                         ConstantInt *AndRHS, BinaryOperator &TheAnd);
 
   Value *FoldLogicalPlusAnd(Value *LHS, Value *RHS, ConstantInt *Mask,
                             bool isSub, Instruction &I);
-  Value *InsertRangeTest(Value *V, Constant *Lo, Constant *Hi, bool isSigned,
-                         bool Inside);
+  Value *insertRangeTest(Value *V, const APInt &Lo, const APInt &Hi,
+                         bool isSigned, bool Inside);
   Instruction *PromoteCastOfAllocation(BitCastInst &CI, AllocaInst &AI);
   Instruction *MatchBSwap(BinaryOperator &I);
   bool SimplifyStoreAtEndOfBlock(StoreInst &SI);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index d88456ee4adc..5276bee4e0a2 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/Loads.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -59,14 +60,14 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
   // eliminate the markers.
 
   SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect;
-  ValuesToInspect.push_back(std::make_pair(V, false));
+  ValuesToInspect.emplace_back(V, false);
   while (!ValuesToInspect.empty()) {
     auto ValuePair = ValuesToInspect.pop_back_val();
     const bool IsOffset = ValuePair.second;
     for (auto &U : ValuePair.first->uses()) {
-      Instruction *I = cast<Instruction>(U.getUser());
+      auto *I = cast<Instruction>(U.getUser());
 
-      if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
+      if (auto *LI = dyn_cast<LoadInst>(I)) {
         // Ignore non-volatile loads, they are always ok.
         if (!LI->isSimple()) return false;
         continue;
@@ -74,14 +75,13 @@ isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy,
 
       if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) {
         // If uses of the bitcast are ok, we are ok.
-        ValuesToInspect.push_back(std::make_pair(I, IsOffset));
+        ValuesToInspect.emplace_back(I, IsOffset);
         continue;
       }
-      if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
         // If the GEP has all zero indices, it doesn't offset the pointer. If it
         // doesn't, it does.
-        ValuesToInspect.push_back(
-            std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices()));
+        ValuesToInspect.emplace_back(I, IsOffset || !GEP->hasAllZeroIndices());
         continue;
       }
 
@@ -286,7 +286,7 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
     SmallVector<Instruction *, 4> ToDelete;
     if (MemTransferInst *Copy = isOnlyCopiedFromConstantGlobal(&AI, ToDelete)) {
       unsigned SourceAlign = getOrEnforceKnownAlignment(
-          Copy->getSource(), AI.getAlignment(), DL, &AI, AC, DT);
+          Copy->getSource(), AI.getAlignment(), DL, &AI, &AC, &DT);
       if (AI.getAlignment() <= SourceAlign) {
         DEBUG(dbgs() << "Found alloca equal to global: " << AI << '\n');
         DEBUG(dbgs() << "  memcpy = " << *Copy << '\n');
@@ -308,6 +308,11 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
   return visitAllocSite(AI);
 }
 
+// Are we allowed to form a atomic load or store of this type?
+static bool isSupportedAtomicType(Type *Ty) {
+  return Ty->isIntegerTy() || Ty->isPointerTy() || Ty->isFloatingPointTy();
+}
+
 /// \brief Helper to combine a load to a new type.
 ///
 /// This just does the work of combining a load to a new type. It handles
@@ -319,6 +324,9 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
 /// point the \c InstCombiner currently is using.
 static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewTy,
                                       const Twine &Suffix = "") {
+  assert((!LI.isAtomic() || isSupportedAtomicType(NewTy)) &&
+         "can't fold an atomic load to requested type");
+  
   Value *Ptr = LI.getPointerOperand();
   unsigned AS = LI.getPointerAddressSpace();
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -380,8 +388,16 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
       break;
     case LLVMContext::MD_range:
       // FIXME: It would be nice to propagate this in some way, but the type
-      // conversions make it hard. If the new type is a pointer, we could
-      // translate it to !nonnull metadata.
+      // conversions make it hard.
+
+      // If it's a pointer now and the range does not contain 0, make it !nonnull.
+      if (NewTy->isPointerTy()) {
+        unsigned BitWidth = IC.getDataLayout().getTypeSizeInBits(NewTy);
+        if (!getConstantRangeFromMetadata(*N).contains(APInt(BitWidth, 0))) {
+          MDNode *NN = MDNode::get(LI.getContext(), None);
+          NewLoad->setMetadata(LLVMContext::MD_nonnull, NN);
+        }
+      }
       break;
     }
   }
@@ -392,6 +408,9 @@ static LoadInst *combineLoadToNewType(InstCombiner &IC, LoadInst &LI, Type *NewT
 ///
 /// Returns the newly created store instruction.
 static StoreInst *combineStoreToNewValue(InstCombiner &IC, StoreInst &SI, Value *V) {
+  assert((!SI.isAtomic() || isSupportedAtomicType(V->getType())) &&
+         "can't fold an atomic store of requested type");
+  
   Value *Ptr = SI.getPointerOperand();
   unsigned AS = SI.getPointerAddressSpace();
   SmallVector<std::pair<unsigned, MDNode *>, 8> MD;
@@ -466,6 +485,10 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   if (LI.use_empty())
     return nullptr;
 
+  // swifterror values can't be bitcasted.
+  if (LI.getPointerOperand()->isSwiftError())
+    return nullptr;
+
   Type *Ty = LI.getType();
   const DataLayout &DL = IC.getDataLayout();
 
@@ -475,8 +498,9 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   // size is a legal integer type.
   if (!Ty->isIntegerTy() && Ty->isSized() &&
       DL.isLegalInteger(DL.getTypeStoreSizeInBits(Ty)) &&
-      DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty)) {
-    if (std::all_of(LI.user_begin(), LI.user_end(), [&LI](User *U) {
+      DL.getTypeStoreSizeInBits(Ty) == DL.getTypeSizeInBits(Ty) &&
+      !DL.isNonIntegralPointerType(Ty)) {
+    if (all_of(LI.users(), [&LI](User *U) {
           auto *SI = dyn_cast<StoreInst>(U);
           return SI && SI->getPointerOperand() != &LI;
         })) {
@@ -501,14 +525,14 @@ static Instruction *combineLoadToOperationType(InstCombiner &IC, LoadInst &LI) {
   // as long as those are noops (i.e., the source or dest type have the same
   // bitwidth as the target's pointers).
   if (LI.hasOneUse())
-    if (auto* CI = dyn_cast<CastInst>(LI.user_back())) {
-      if (CI->isNoopCast(DL)) {
-        LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
-        CI->replaceAllUsesWith(NewLoad);
-        IC.eraseInstFromFunction(*CI);
-        return &LI;
-      }
-    }
+    if (auto* CI = dyn_cast<CastInst>(LI.user_back()))
+      if (CI->isNoopCast(DL))
+        if (!LI.isAtomic() || isSupportedAtomicType(CI->getDestTy())) {
+          LoadInst *NewLoad = combineLoadToNewType(IC, LI, CI->getDestTy());
+          CI->replaceAllUsesWith(NewLoad);
+          IC.eraseInstFromFunction(*CI);
+          return &LI;
+        }
 
   // FIXME: We should also canonicalize loads of vectors when their elements are
   // cast to other types.
@@ -802,7 +826,7 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
 
   // Attempt to improve the alignment.
   unsigned KnownAlign = getOrEnforceKnownAlignment(
-      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, AC, DT);
+      Op, DL.getPrefTypeAlignment(LI.getType()), DL, &LI, &AC, &DT);
   unsigned LoadAlign = LI.getAlignment();
   unsigned EffectiveLoadAlign =
       LoadAlign != 0 ? LoadAlign : DL.getABITypeAlignment(LI.getType());
@@ -825,11 +849,10 @@ Instruction *InstCombiner::visitLoadInst(LoadInst &LI) {
   // where there are several consecutive memory accesses to the same location,
   // separated by a few arithmetic operations.
   BasicBlock::iterator BBI(LI);
-  AAMDNodes AATags;
   bool IsLoadCSE = false;
   if (Value *AvailableVal =
       FindAvailableLoadedValue(&LI, LI.getParent(), BBI,
-                               DefMaxInstsToScan, AA, &AATags, &IsLoadCSE)) {
+                               DefMaxInstsToScan, AA, &IsLoadCSE)) {
     if (IsLoadCSE) {
       LoadInst *NLI = cast<LoadInst>(AvailableVal);
       unsigned KnownIDs[] = {
@@ -1005,19 +1028,26 @@ static bool combineStoreToValueType(InstCombiner &IC, StoreInst &SI) {
   if (!SI.isUnordered())
     return false;
 
+  // swifterror values can't be bitcasted.
+  if (SI.getPointerOperand()->isSwiftError())
+    return false;
+
   Value *V = SI.getValueOperand();
 
   // Fold away bit casts of the stored value by storing the original type.
   if (auto *BC = dyn_cast<BitCastInst>(V)) {
     V = BC->getOperand(0);
-    combineStoreToNewValue(IC, SI, V);
-    return true;
+    if (!SI.isAtomic() || isSupportedAtomicType(V->getType())) {
+      combineStoreToNewValue(IC, SI, V);
+      return true;
+    }
   }
 
-  if (Value *U = likeBitCastFromVector(IC, V)) {
-    combineStoreToNewValue(IC, SI, U);
-    return true;
-  }
+  if (Value *U = likeBitCastFromVector(IC, V))
+    if (!SI.isAtomic() || isSupportedAtomicType(U->getType())) {
+      combineStoreToNewValue(IC, SI, U);
+      return true;
+    }
 
   // FIXME: We should also canonicalize stores of vectors when their elements
   // are cast to other types.
@@ -1169,7 +1199,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 
   // Attempt to improve the alignment.
   unsigned KnownAlign = getOrEnforceKnownAlignment(
-      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, AC, DT);
+      Ptr, DL.getPrefTypeAlignment(Val->getType()), DL, &SI, &AC, &DT);
   unsigned StoreAlign = SI.getAlignment();
   unsigned EffectiveStoreAlign =
       StoreAlign != 0 ? StoreAlign : DL.getABITypeAlignment(Val->getType());
@@ -1293,7 +1323,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
   assert(SI.isUnordered() &&
          "this code has not been auditted for volatile or ordered store case");
-  
+
   BasicBlock *StoreBB = SI.getParent();
 
   // Check to see if the successor block has exactly two incoming edges.  If
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 788097f33f12..ac64671725f3 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -48,8 +48,8 @@ static Value *simplifyValueKnownNonZero(Value *V, InstCombiner &IC,
   BinaryOperator *I = dyn_cast<BinaryOperator>(V);
   if (I && I->isLogicalShift() &&
       isKnownToBeAPowerOfTwo(I->getOperand(0), IC.getDataLayout(), false, 0,
-                             IC.getAssumptionCache(), &CxtI,
-                             IC.getDominatorTree())) {
+                             &IC.getAssumptionCache(), &CxtI,
+                             &IC.getDominatorTree())) {
     // We know that this is an exact/nuw shift and that the input is a
     // non-zero context as well.
     if (Value *V2 = simplifyValueKnownNonZero(I->getOperand(0), IC, CxtI)) {
@@ -179,7 +179,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyMulInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyMulInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyUsingDistributiveLaws(I))
@@ -389,6 +389,80 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     }
   }
 
+  // Check for (mul (sext x), y), see if we can merge this into an
+  // integer mul followed by a sext.
+  if (SExtInst *Op0Conv = dyn_cast<SExtInst>(Op0)) {
+    // (mul (sext x), cst) --> (sext (mul x, cst'))
+    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+      if (Op0Conv->hasOneUse()) {
+        Constant *CI =
+            ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
+        if (ConstantExpr::getSExt(CI, I.getType()) == Op1C &&
+            WillNotOverflowSignedMul(Op0Conv->getOperand(0), CI, I)) {
+          // Insert the new, smaller mul.
+          Value *NewMul =
+              Builder->CreateNSWMul(Op0Conv->getOperand(0), CI, "mulconv");
+          return new SExtInst(NewMul, I.getType());
+        }
+      }
+    }
+
+    // (mul (sext x), (sext y)) --> (sext (mul int x, y))
+    if (SExtInst *Op1Conv = dyn_cast<SExtInst>(Op1)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of sexts), and if the
+      // integer mul will not overflow.
+      if (Op0Conv->getOperand(0)->getType() ==
+              Op1Conv->getOperand(0)->getType() &&
+          (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
+          WillNotOverflowSignedMul(Op0Conv->getOperand(0),
+                                   Op1Conv->getOperand(0), I)) {
+        // Insert the new integer mul.
+        Value *NewMul = Builder->CreateNSWMul(
+            Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
+        return new SExtInst(NewMul, I.getType());
+      }
+    }
+  }
+
+  // Check for (mul (zext x), y), see if we can merge this into an
+  // integer mul followed by a zext.
+  if (auto *Op0Conv = dyn_cast<ZExtInst>(Op0)) {
+    // (mul (zext x), cst) --> (zext (mul x, cst'))
+    if (ConstantInt *Op1C = dyn_cast<ConstantInt>(Op1)) {
+      if (Op0Conv->hasOneUse()) {
+        Constant *CI =
+            ConstantExpr::getTrunc(Op1C, Op0Conv->getOperand(0)->getType());
+        if (ConstantExpr::getZExt(CI, I.getType()) == Op1C &&
+            computeOverflowForUnsignedMul(Op0Conv->getOperand(0), CI, &I) ==
+                OverflowResult::NeverOverflows) {
+          // Insert the new, smaller mul.
+          Value *NewMul =
+              Builder->CreateNUWMul(Op0Conv->getOperand(0), CI, "mulconv");
+          return new ZExtInst(NewMul, I.getType());
+        }
+      }
+    }
+
+    // (mul (zext x), (zext y)) --> (zext (mul int x, y))
+    if (auto *Op1Conv = dyn_cast<ZExtInst>(Op1)) {
+      // Only do this if x/y have the same type, if at last one of them has a
+      // single use (so we don't increase the number of zexts), and if the
+      // integer mul will not overflow.
+      if (Op0Conv->getOperand(0)->getType() ==
+              Op1Conv->getOperand(0)->getType() &&
+          (Op0Conv->hasOneUse() || Op1Conv->hasOneUse()) &&
+          computeOverflowForUnsignedMul(Op0Conv->getOperand(0),
+                                        Op1Conv->getOperand(0),
+                                        &I) == OverflowResult::NeverOverflows) {
+        // Insert the new integer mul.
+        Value *NewMul = Builder->CreateNUWMul(
+            Op0Conv->getOperand(0), Op1Conv->getOperand(0), "mulconv");
+        return new ZExtInst(NewMul, I.getType());
+      }
+    }
+  }
+
   if (!I.hasNoSignedWrap() && WillNotOverflowSignedMul(Op0, Op1, I)) {
     Changed = true;
     I.setHasNoSignedWrap(true);
@@ -545,7 +619,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
     std::swap(Op0, Op1);
 
   if (Value *V =
-          SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, TLI, DT, AC))
+          SimplifyFMulInst(Op0, Op1, I.getFastMathFlags(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   bool AllowReassociate = I.hasUnsafeAlgebra();
@@ -709,7 +783,6 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
           BuilderTy::FastMathFlagGuard Guard(*Builder);
           Builder->setFastMathFlags(I.getFastMathFlags());
           Value *T = Builder->CreateFMul(Opnd1, Opnd1);
-
           Value *R = Builder->CreateFMul(T, Y);
           R->takeName(&I);
           return replaceInstUsesWith(I, R);
@@ -991,19 +1064,22 @@ static Instruction *foldUDivNegCst(Value *Op0, Value *Op1,
 }
 
 // X udiv (C1 << N), where C1 is "1<<C2"  -->  X >> (N+C2)
+// X udiv (zext (C1 << N)), where C1 is "1<<C2"  -->  X >> (N+C2)
 static Instruction *foldUDivShl(Value *Op0, Value *Op1, const BinaryOperator &I,
                                 InstCombiner &IC) {
-  Instruction *ShiftLeft = cast<Instruction>(Op1);
-  if (isa<ZExtInst>(ShiftLeft))
-    ShiftLeft = cast<Instruction>(ShiftLeft->getOperand(0));
-
-  const APInt &CI =
-      cast<Constant>(ShiftLeft->getOperand(0))->getUniqueInteger();
-  Value *N = ShiftLeft->getOperand(1);
-  if (CI != 1)
-    N = IC.Builder->CreateAdd(N, ConstantInt::get(N->getType(), CI.logBase2()));
-  if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
-    N = IC.Builder->CreateZExt(N, Z->getDestTy());
+  Value *ShiftLeft;
+  if (!match(Op1, m_ZExt(m_Value(ShiftLeft))))
+    ShiftLeft = Op1;
+
+  const APInt *CI;
+  Value *N;
+  if (!match(ShiftLeft, m_Shl(m_APInt(CI), m_Value(N))))
+    llvm_unreachable("match should never fail here!");
+  if (*CI != 1)
+    N = IC.Builder->CreateAdd(N,
+                              ConstantInt::get(N->getType(), CI->logBase2()));
+  if (Op1 != ShiftLeft)
+    N = IC.Builder->CreateZExt(N, Op1->getType());
   BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
   if (I.isExact())
     LShr->setIsExact();
@@ -1059,7 +1135,7 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyUDivInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyUDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1132,7 +1208,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySDivInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifySDivInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer div common cases
@@ -1195,7 +1271,7 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
         return BO;
       }
 
-      if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
+      if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
         // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
         // Safe because the only negative value (1 << Y) can take on is
         // INT_MIN, and X sdiv INT_MIN == X udiv INT_MIN == 0 if X doesn't have
@@ -1247,7 +1323,7 @@ Instruction *InstCombiner::visitFDiv(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyFDivInst(Op0, Op1, I.getFastMathFlags(),
-                                  DL, TLI, DT, AC))
+                                  DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (isa<Constant>(Op0))
@@ -1421,7 +1497,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifyURemInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifyURemInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *common = commonIRemTransforms(I))
@@ -1434,7 +1510,7 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
                           I.getType());
 
   // X urem Y -> X and Y-1, where Y is a power of 2,
-  if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, AC, &I, DT)) {
+  if (isKnownToBeAPowerOfTwo(Op1, DL, /*OrZero*/ true, 0, &AC, &I, &DT)) {
     Constant *N1 = Constant::getAllOnesValue(I.getType());
     Value *Add = Builder->CreateAdd(Op1, N1);
     return BinaryOperator::CreateAnd(Op0, Add);
@@ -1447,6 +1523,14 @@ Instruction *InstCombiner::visitURem(BinaryOperator &I) {
     return replaceInstUsesWith(I, Ext);
   }
 
+  // X urem C -> X < C ? X : X - C, where C >= signbit.
+  const APInt *DivisorC;
+  if (match(Op1, m_APInt(DivisorC)) && DivisorC->isNegative()) {
+    Value *Cmp = Builder->CreateICmpULT(Op0, Op1);
+    Value *Sub = Builder->CreateSub(Op0, Op1);
+    return SelectInst::Create(Cmp, Op0, Sub);
+  }
+
   return nullptr;
 }
 
@@ -1456,7 +1540,7 @@ Instruction *InstCombiner::visitSRem(BinaryOperator &I) {
   if (Value *V = SimplifyVectorOp(I))
     return replaceInstUsesWith(I, V);
 
-  if (Value *V = SimplifySRemInst(Op0, Op1, DL, TLI, DT, AC))
+  if (Value *V = SimplifySRemInst(Op0, Op1, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // Handle the integer rem common cases
@@ -1532,7 +1616,7 @@ Instruction *InstCombiner::visitFRem(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyFRemInst(Op0, Op1, I.getFastMathFlags(),
-                                  DL, TLI, DT, AC))
+                                  DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   // Handle cases involving: rem X, (select Cond, Y, Z)
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 79a4912332ff..184897f751fe 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -18,11 +18,27 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/DebugInfo.h"
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
 #define DEBUG_TYPE "instcombine"
 
+/// The PHI arguments will be folded into a single operation with a PHI node
+/// as input. The debug location of the single operation will be the merged
+/// locations of the original PHI node arguments.
+DebugLoc InstCombiner::PHIArgMergedDebugLoc(PHINode &PN) {
+  auto *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
+  DILocation *Loc = FirstInst->getDebugLoc();
+
+  for (unsigned i = 1; i != PN.getNumIncomingValues(); ++i) {
+    auto *I = cast<Instruction>(PN.getIncomingValue(i));
+    Loc = DILocation::getMergedLocation(Loc, I->getDebugLoc());
+  }
+
+  return Loc;
+}
+
 /// If we have something like phi [add (a,b), add(a,c)] and if a/b/c and the
 /// adds all have a single use, turn this into a phi and a single binop.
 Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
@@ -101,7 +117,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   if (CmpInst *CIOp = dyn_cast<CmpInst>(FirstInst)) {
     CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                      LHSVal, RHSVal);
-    NewCI->setDebugLoc(FirstInst->getDebugLoc());
+    NewCI->setDebugLoc(PHIArgMergedDebugLoc(PN));
     return NewCI;
   }
 
@@ -114,7 +130,7 @@ Instruction *InstCombiner::FoldPHIArgBinOpIntoPHI(PHINode &PN) {
   for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
     NewBinOp->andIRFlags(PN.getIncomingValue(i));
 
-  NewBinOp->setDebugLoc(FirstInst->getDebugLoc());
+  NewBinOp->setDebugLoc(PHIArgMergedDebugLoc(PN));
   return NewBinOp;
 }
 
@@ -223,7 +239,7 @@ Instruction *InstCombiner::FoldPHIArgGEPIntoPHI(PHINode &PN) {
       GetElementPtrInst::Create(FirstInst->getSourceElementType(), Base,
                                 makeArrayRef(FixedOperands).slice(1));
   if (AllInBounds) NewGEP->setIsInBounds();
-  NewGEP->setDebugLoc(FirstInst->getDebugLoc());
+  NewGEP->setDebugLoc(PHIArgMergedDebugLoc(PN));
   return NewGEP;
 }
 
@@ -383,7 +399,7 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
     for (Value *IncValue : PN.incoming_values())
       cast<LoadInst>(IncValue)->setVolatile(false);
 
-  NewLI->setDebugLoc(FirstLI->getDebugLoc());
+  NewLI->setDebugLoc(PHIArgMergedDebugLoc(PN));
   return NewLI;
 }
 
@@ -549,7 +565,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
   if (CastInst *FirstCI = dyn_cast<CastInst>(FirstInst)) {
     CastInst *NewCI = CastInst::Create(FirstCI->getOpcode(), PhiVal,
                                        PN.getType());
-    NewCI->setDebugLoc(FirstInst->getDebugLoc());
+    NewCI->setDebugLoc(PHIArgMergedDebugLoc(PN));
     return NewCI;
   }
 
@@ -560,14 +576,14 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
     for (unsigned i = 1, e = PN.getNumIncomingValues(); i != e; ++i)
       BinOp->andIRFlags(PN.getIncomingValue(i));
 
-    BinOp->setDebugLoc(FirstInst->getDebugLoc());
+    BinOp->setDebugLoc(PHIArgMergedDebugLoc(PN));
     return BinOp;
   }
 
   CmpInst *CIOp = cast<CmpInst>(FirstInst);
   CmpInst *NewCI = CmpInst::Create(CIOp->getOpcode(), CIOp->getPredicate(),
                                    PhiVal, ConstantOp);
-  NewCI->setDebugLoc(FirstInst->getDebugLoc());
+  NewCI->setDebugLoc(PHIArgMergedDebugLoc(PN));
   return NewCI;
 }
 
@@ -835,8 +851,8 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
         // needed piece.
         if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
           if (PHIsInspected.count(OldInVal)) {
-            unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
-                                          OldInVal)-PHIsToSlice.begin();
+            unsigned RefPHIId =
+                find(PHIsToSlice, OldInVal) - PHIsToSlice.begin();
             PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
                                               cast<Instruction>(Res)));
             ++UserE;
@@ -864,7 +880,7 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
-  if (Value *V = SimplifyInstruction(&PN, DL, TLI, DT, AC))
+  if (Value *V = SimplifyInstruction(&PN, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(PN, V);
 
   if (Instruction *Result = FoldPHIArgZextsIntoPHI(PN))
@@ -921,7 +937,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
       for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
         Instruction *CtxI = PN.getIncomingBlock(i)->getTerminator();
         Value *VA = PN.getIncomingValue(i);
-        if (isKnownNonZero(VA, DL, 0, AC, CtxI, DT)) {
+        if (isKnownNonZero(VA, DL, 0, &AC, CtxI, &DT)) {
           if (!NonZeroConst)
             NonZeroConst = GetAnyNonZeroConstInt(PN);
           PN.setIncomingValue(i, NonZeroConst);
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 8f1ff8ac0e66..36644845352e 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -15,6 +15,7 @@
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/PatternMatch.h"
 using namespace llvm;
 using namespace PatternMatch;
@@ -78,7 +79,7 @@ static Value *generateMinMaxSelectPattern(InstCombiner::BuilderTy *Builder,
 /// a bitmask indicating which operands of this instruction are foldable if they
 /// equal the other incoming value of the select.
 ///
-static unsigned GetSelectFoldableOperands(Instruction *I) {
+static unsigned getSelectFoldableOperands(Instruction *I) {
   switch (I->getOpcode()) {
   case Instruction::Add:
   case Instruction::Mul:
@@ -98,7 +99,7 @@ static unsigned GetSelectFoldableOperands(Instruction *I) {
 
 /// For the same transformation as the previous function, return the identity
 /// constant that goes into the select.
-static Constant *GetSelectFoldableConstant(Instruction *I) {
+static Constant *getSelectFoldableConstant(Instruction *I) {
   switch (I->getOpcode()) {
   default: llvm_unreachable("This cannot happen!");
   case Instruction::Add:
@@ -117,7 +118,7 @@ static Constant *GetSelectFoldableConstant(Instruction *I) {
 }
 
 /// We have (select c, TI, FI), and we know that TI and FI have the same opcode.
-Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
+Instruction *InstCombiner::foldSelectOpOp(SelectInst &SI, Instruction *TI,
                                           Instruction *FI) {
   // If this is a cast from the same type, merge.
   if (TI->getNumOperands() == 1 && TI->isCast()) {
@@ -154,19 +155,19 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
     }
 
     // Fold this by inserting a select from the input values.
-    Value *NewSI = Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
-                                         FI->getOperand(0), SI.getName()+".v");
+    Value *NewSI =
+        Builder->CreateSelect(SI.getCondition(), TI->getOperand(0),
+                              FI->getOperand(0), SI.getName() + ".v", &SI);
     return CastInst::Create(Instruction::CastOps(TI->getOpcode()), NewSI,
                             TI->getType());
   }
 
-  // TODO: This function ends awkwardly in unreachable - fix to be more normal.
-
   // Only handle binary operators with one-use here. As with the cast case
   // above, it may be possible to relax the one-use constraint, but that needs
   // be examined carefully since it may not reduce the total number of
   // instructions.
-  if (!isa<BinaryOperator>(TI) || !TI->hasOneUse() || !FI->hasOneUse())
+  BinaryOperator *BO = dyn_cast<BinaryOperator>(TI);
+  if (!BO || !TI->hasOneUse() || !FI->hasOneUse())
     return nullptr;
 
   // Figure out if the operations have any operands in common.
@@ -199,16 +200,11 @@ Instruction *InstCombiner::FoldSelectOpOp(SelectInst &SI, Instruction *TI,
   }
 
   // If we reach here, they do have operations in common.
-  Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT,
-                                       OtherOpF, SI.getName()+".v");
-
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(TI)) {
-    if (MatchIsOpZero)
-      return BinaryOperator::Create(BO->getOpcode(), MatchOp, NewSI);
-    else
-      return BinaryOperator::Create(BO->getOpcode(), NewSI, MatchOp);
-  }
-  llvm_unreachable("Shouldn't get here");
+  Value *NewSI = Builder->CreateSelect(SI.getCondition(), OtherOpT, OtherOpF,
+                                       SI.getName() + ".v", &SI);
+  Value *Op0 = MatchIsOpZero ? MatchOp : NewSI;
+  Value *Op1 = MatchIsOpZero ? NewSI : MatchOp;
+  return BinaryOperator::Create(BO->getOpcode(), Op0, Op1);
 }
 
 static bool isSelect01(Constant *C1, Constant *C2) {
@@ -226,14 +222,14 @@ static bool isSelect01(Constant *C1, Constant *C2) {
 
 /// Try to fold the select into one of the operands to allow further
 /// optimization.
-Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
+Instruction *InstCombiner::foldSelectIntoOp(SelectInst &SI, Value *TrueVal,
                                             Value *FalseVal) {
   // See the comment above GetSelectFoldableOperands for a description of the
   // transformation we are doing here.
   if (Instruction *TVI = dyn_cast<Instruction>(TrueVal)) {
     if (TVI->hasOneUse() && TVI->getNumOperands() == 2 &&
         !isa<Constant>(FalseVal)) {
-      if (unsigned SFO = GetSelectFoldableOperands(TVI)) {
+      if (unsigned SFO = getSelectFoldableOperands(TVI)) {
         unsigned OpToFold = 0;
         if ((SFO & 1) && FalseVal == TVI->getOperand(0)) {
           OpToFold = 1;
@@ -242,7 +238,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         }
 
         if (OpToFold) {
-          Constant *C = GetSelectFoldableConstant(TVI);
+          Constant *C = getSelectFoldableConstant(TVI);
           Value *OOp = TVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
           // between 0, 1 and -1.
@@ -263,7 +259,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
   if (Instruction *FVI = dyn_cast<Instruction>(FalseVal)) {
     if (FVI->hasOneUse() && FVI->getNumOperands() == 2 &&
         !isa<Constant>(TrueVal)) {
-      if (unsigned SFO = GetSelectFoldableOperands(FVI)) {
+      if (unsigned SFO = getSelectFoldableOperands(FVI)) {
         unsigned OpToFold = 0;
         if ((SFO & 1) && TrueVal == FVI->getOperand(0)) {
           OpToFold = 1;
@@ -272,7 +268,7 @@ Instruction *InstCombiner::FoldSelectIntoOp(SelectInst &SI, Value *TrueVal,
         }
 
         if (OpToFold) {
-          Constant *C = GetSelectFoldableConstant(FVI);
+          Constant *C = getSelectFoldableConstant(FVI);
           Value *OOp = FVI->getOperand(2-OpToFold);
           // Avoid creating select between 2 constants unless it's selecting
           // between 0, 1 and -1.
@@ -411,103 +407,151 @@ static Value *foldSelectCttzCtlz(ICmpInst *ICI, Value *TrueVal, Value *FalseVal,
   return nullptr;
 }
 
+/// Return true if we find and adjust an icmp+select pattern where the compare
+/// is with a constant that can be incremented or decremented to match the
+/// minimum or maximum idiom.
+static bool adjustMinMax(SelectInst &Sel, ICmpInst &Cmp) {
+  ICmpInst::Predicate Pred = Cmp.getPredicate();
+  Value *CmpLHS = Cmp.getOperand(0);
+  Value *CmpRHS = Cmp.getOperand(1);
+  Value *TrueVal = Sel.getTrueValue();
+  Value *FalseVal = Sel.getFalseValue();
+
+  // We may move or edit the compare, so make sure the select is the only user.
+  const APInt *CmpC;
+  if (!Cmp.hasOneUse() || !match(CmpRHS, m_APInt(CmpC)))
+    return false;
+
+  // These transforms only work for selects of integers or vector selects of
+  // integer vectors.
+  Type *SelTy = Sel.getType();
+  auto *SelEltTy = dyn_cast<IntegerType>(SelTy->getScalarType());
+  if (!SelEltTy || SelTy->isVectorTy() != Cmp.getType()->isVectorTy())
+    return false;
+
+  Constant *AdjustedRHS;
+  if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
+    AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC + 1);
+  else if (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
+    AdjustedRHS = ConstantInt::get(CmpRHS->getType(), *CmpC - 1);
+  else
+    return false;
+
+  // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
+  // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
+  if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
+      (CmpLHS == FalseVal && AdjustedRHS == TrueVal)) {
+    ; // Nothing to do here. Values match without any sign/zero extension.
+  }
+  // Types do not match. Instead of calculating this with mixed types, promote
+  // all to the larger type. This enables scalar evolution to analyze this
+  // expression.
+  else if (CmpRHS->getType()->getScalarSizeInBits() < SelEltTy->getBitWidth()) {
+    Constant *SextRHS = ConstantExpr::getSExt(AdjustedRHS, SelTy);
+
+    // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
+    // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
+    // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
+    // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
+    if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) && SextRHS == FalseVal) {
+      CmpLHS = TrueVal;
+      AdjustedRHS = SextRHS;
+    } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
+               SextRHS == TrueVal) {
+      CmpLHS = FalseVal;
+      AdjustedRHS = SextRHS;
+    } else if (Cmp.isUnsigned()) {
+      Constant *ZextRHS = ConstantExpr::getZExt(AdjustedRHS, SelTy);
+      // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
+      // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
+      // zext + signed compare cannot be changed:
+      //    0xff <s 0x00, but 0x00ff >s 0x0000
+      if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) && ZextRHS == FalseVal) {
+        CmpLHS = TrueVal;
+        AdjustedRHS = ZextRHS;
+      } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
+                 ZextRHS == TrueVal) {
+        CmpLHS = FalseVal;
+        AdjustedRHS = ZextRHS;
+      } else {
+        return false;
+      }
+    } else {
+      return false;
+    }
+  } else {
+    return false;
+  }
+
+  Pred = ICmpInst::getSwappedPredicate(Pred);
+  CmpRHS = AdjustedRHS;
+  std::swap(FalseVal, TrueVal);
+  Cmp.setPredicate(Pred);
+  Cmp.setOperand(0, CmpLHS);
+  Cmp.setOperand(1, CmpRHS);
+  Sel.setOperand(1, TrueVal);
+  Sel.setOperand(2, FalseVal);
+  Sel.swapProfMetadata();
+
+  // Move the compare instruction right before the select instruction. Otherwise
+  // the sext/zext value may be defined after the compare instruction uses it.
+  Cmp.moveBefore(&Sel);
+
+  return true;
+}
+
+/// If this is an integer min/max where the select's 'true' operand is a
+/// constant, canonicalize that constant to the 'false' operand:
+/// select (icmp Pred X, C), C, X --> select (icmp Pred' X, C), X, C
+static Instruction *
+canonicalizeMinMaxWithConstant(SelectInst &Sel, ICmpInst &Cmp,
+                               InstCombiner::BuilderTy &Builder) {
+  // TODO: We should also canonicalize min/max when the select has a different
+  // constant value than the cmp constant, but we need to fix the backend first.
+  if (!Cmp.hasOneUse() || !isa<Constant>(Cmp.getOperand(1)) ||
+      !isa<Constant>(Sel.getTrueValue()) ||
+      isa<Constant>(Sel.getFalseValue()) ||
+      Cmp.getOperand(1) != Sel.getTrueValue())
+    return nullptr;
+
+  // Canonicalize the compare predicate based on whether we have min or max.
+  Value *LHS, *RHS;
+  ICmpInst::Predicate NewPred;
+  SelectPatternResult SPR = matchSelectPattern(&Sel, LHS, RHS);
+  switch (SPR.Flavor) {
+  case SPF_SMIN: NewPred = ICmpInst::ICMP_SLT; break;
+  case SPF_UMIN: NewPred = ICmpInst::ICMP_ULT; break;
+  case SPF_SMAX: NewPred = ICmpInst::ICMP_SGT; break;
+  case SPF_UMAX: NewPred = ICmpInst::ICMP_UGT; break;
+  default: return nullptr;
+  }
+
+  // Canonicalize the constant to the right side.
+  if (isa<Constant>(LHS))
+    std::swap(LHS, RHS);
+
+  Value *NewCmp = Builder.CreateICmp(NewPred, LHS, RHS);
+  SelectInst *NewSel = SelectInst::Create(NewCmp, LHS, RHS, "", nullptr, &Sel);
+
+  // We swapped the select operands, so swap the metadata too.
+  NewSel->swapProfMetadata();
+  return NewSel;
+}
+
 /// Visit a SelectInst that has an ICmpInst as its first operand.
-Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
-                                                   ICmpInst *ICI) {
-  bool Changed = false;
+Instruction *InstCombiner::foldSelectInstWithICmp(SelectInst &SI,
+                                                  ICmpInst *ICI) {
+  if (Instruction *NewSel = canonicalizeMinMaxWithConstant(SI, *ICI, *Builder))
+    return NewSel;
+
+  bool Changed = adjustMinMax(SI, *ICI);
+
   ICmpInst::Predicate Pred = ICI->getPredicate();
   Value *CmpLHS = ICI->getOperand(0);
   Value *CmpRHS = ICI->getOperand(1);
   Value *TrueVal = SI.getTrueValue();
   Value *FalseVal = SI.getFalseValue();
 
-  // Check cases where the comparison is with a constant that
-  // can be adjusted to fit the min/max idiom. We may move or edit ICI
-  // here, so make sure the select is the only user.
-  if (ICI->hasOneUse())
-    if (ConstantInt *CI = dyn_cast<ConstantInt>(CmpRHS)) {
-      switch (Pred) {
-      default: break;
-      case ICmpInst::ICMP_ULT:
-      case ICmpInst::ICMP_SLT:
-      case ICmpInst::ICMP_UGT:
-      case ICmpInst::ICMP_SGT: {
-        // These transformations only work for selects over integers.
-        IntegerType *SelectTy = dyn_cast<IntegerType>(SI.getType());
-        if (!SelectTy)
-          break;
-
-        Constant *AdjustedRHS;
-        if (Pred == ICmpInst::ICMP_UGT || Pred == ICmpInst::ICMP_SGT)
-          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() + 1);
-        else // (Pred == ICmpInst::ICMP_ULT || Pred == ICmpInst::ICMP_SLT)
-          AdjustedRHS = ConstantInt::get(CI->getContext(), CI->getValue() - 1);
-
-        // X > C ? X : C+1  -->  X < C+1 ? C+1 : X
-        // X < C ? X : C-1  -->  X > C-1 ? C-1 : X
-        if ((CmpLHS == TrueVal && AdjustedRHS == FalseVal) ||
-            (CmpLHS == FalseVal && AdjustedRHS == TrueVal))
-          ; // Nothing to do here. Values match without any sign/zero extension.
-
-        // Types do not match. Instead of calculating this with mixed types
-        // promote all to the larger type. This enables scalar evolution to
-        // analyze this expression.
-        else if (CmpRHS->getType()->getScalarSizeInBits()
-                 < SelectTy->getBitWidth()) {
-          Constant *sextRHS = ConstantExpr::getSExt(AdjustedRHS, SelectTy);
-
-          // X = sext x; x >s c ? X : C+1 --> X = sext x; X <s C+1 ? C+1 : X
-          // X = sext x; x <s c ? X : C-1 --> X = sext x; X >s C-1 ? C-1 : X
-          // X = sext x; x >u c ? X : C+1 --> X = sext x; X <u C+1 ? C+1 : X
-          // X = sext x; x <u c ? X : C-1 --> X = sext x; X >u C-1 ? C-1 : X
-          if (match(TrueVal, m_SExt(m_Specific(CmpLHS))) &&
-                sextRHS == FalseVal) {
-            CmpLHS = TrueVal;
-            AdjustedRHS = sextRHS;
-          } else if (match(FalseVal, m_SExt(m_Specific(CmpLHS))) &&
-                     sextRHS == TrueVal) {
-            CmpLHS = FalseVal;
-            AdjustedRHS = sextRHS;
-          } else if (ICI->isUnsigned()) {
-            Constant *zextRHS = ConstantExpr::getZExt(AdjustedRHS, SelectTy);
-            // X = zext x; x >u c ? X : C+1 --> X = zext x; X <u C+1 ? C+1 : X
-            // X = zext x; x <u c ? X : C-1 --> X = zext x; X >u C-1 ? C-1 : X
-            // zext + signed compare cannot be changed:
-            //    0xff <s 0x00, but 0x00ff >s 0x0000
-            if (match(TrueVal, m_ZExt(m_Specific(CmpLHS))) &&
-                zextRHS == FalseVal) {
-              CmpLHS = TrueVal;
-              AdjustedRHS = zextRHS;
-            } else if (match(FalseVal, m_ZExt(m_Specific(CmpLHS))) &&
-                       zextRHS == TrueVal) {
-              CmpLHS = FalseVal;
-              AdjustedRHS = zextRHS;
-            } else
-              break;
-          } else
-            break;
-        } else
-          break;
-
-        Pred = ICmpInst::getSwappedPredicate(Pred);
-        CmpRHS = AdjustedRHS;
-        std::swap(FalseVal, TrueVal);
-        ICI->setPredicate(Pred);
-        ICI->setOperand(0, CmpLHS);
-        ICI->setOperand(1, CmpRHS);
-        SI.setOperand(1, TrueVal);
-        SI.setOperand(2, FalseVal);
-
-        // Move ICI instruction right before the select instruction. Otherwise
-        // the sext/zext value may be defined after the ICI instruction uses it.
-        ICI->moveBefore(&SI);
-
-        Changed = true;
-        break;
-      }
-      }
-    }
-
   // Transform (X >s -1) ? C1 : C2 --> ((X >>s 31) & (C2 - C1)) + C1
   // and       (X <s  0) ? C2 : C1 --> ((X >>s 31) & (C2 - C1)) + C1
   // FIXME: Type and constness constraints could be lifted, but we have to
@@ -623,7 +667,7 @@ Instruction *InstCombiner::visitSelectInstWithICmp(SelectInst &SI,
 ///
 /// because Y is not live in BB1/BB2.
 ///
-static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
+static bool canSelectOperandBeMappingIntoPredBlock(const Value *V,
                                                    const SelectInst &SI) {
   // If the value is a non-instruction value like a constant or argument, it
   // can always be mapped.
@@ -651,7 +695,7 @@ static bool CanSelectOperandBeMappingIntoPredBlock(const Value *V,
 
 /// We have an SPF (e.g. a min or max) of an SPF of the form:
 ///   SPF2(SPF1(A, B), C)
-Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
+Instruction *InstCombiner::foldSPFofSPF(Instruction *Inner,
                                         SelectPatternFlavor SPF1,
                                         Value *A, Value *B,
                                         Instruction &Outer,
@@ -675,28 +719,24 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
   }
 
   if (SPF1 == SPF2) {
-    if (ConstantInt *CB = dyn_cast<ConstantInt>(B)) {
-      if (ConstantInt *CC = dyn_cast<ConstantInt>(C)) {
-        const APInt &ACB = CB->getValue();
-        const APInt &ACC = CC->getValue();
-
-        // MIN(MIN(A, 23), 97) -> MIN(A, 23)
-        // MAX(MAX(A, 97), 23) -> MAX(A, 97)
-        if ((SPF1 == SPF_UMIN && ACB.ule(ACC)) ||
-            (SPF1 == SPF_SMIN && ACB.sle(ACC)) ||
-            (SPF1 == SPF_UMAX && ACB.uge(ACC)) ||
-            (SPF1 == SPF_SMAX && ACB.sge(ACC)))
-          return replaceInstUsesWith(Outer, Inner);
-
-        // MIN(MIN(A, 97), 23) -> MIN(A, 23)
-        // MAX(MAX(A, 23), 97) -> MAX(A, 97)
-        if ((SPF1 == SPF_UMIN && ACB.ugt(ACC)) ||
-            (SPF1 == SPF_SMIN && ACB.sgt(ACC)) ||
-            (SPF1 == SPF_UMAX && ACB.ult(ACC)) ||
-            (SPF1 == SPF_SMAX && ACB.slt(ACC))) {
-          Outer.replaceUsesOfWith(Inner, A);
-          return &Outer;
-        }
+    const APInt *CB, *CC;
+    if (match(B, m_APInt(CB)) && match(C, m_APInt(CC))) {
+      // MIN(MIN(A, 23), 97) -> MIN(A, 23)
+      // MAX(MAX(A, 97), 23) -> MAX(A, 97)
+      if ((SPF1 == SPF_UMIN && CB->ule(*CC)) ||
+          (SPF1 == SPF_SMIN && CB->sle(*CC)) ||
+          (SPF1 == SPF_UMAX && CB->uge(*CC)) ||
+          (SPF1 == SPF_SMAX && CB->sge(*CC)))
+        return replaceInstUsesWith(Outer, Inner);
+
+      // MIN(MIN(A, 97), 23) -> MIN(A, 23)
+      // MAX(MAX(A, 23), 97) -> MAX(A, 97)
+      if ((SPF1 == SPF_UMIN && CB->ugt(*CC)) ||
+          (SPF1 == SPF_SMIN && CB->sgt(*CC)) ||
+          (SPF1 == SPF_UMAX && CB->ult(*CC)) ||
+          (SPF1 == SPF_SMAX && CB->slt(*CC))) {
+        Outer.replaceUsesOfWith(Inner, A);
+        return &Outer;
       }
     }
   }
@@ -712,8 +752,9 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner,
   if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) ||
       (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) {
     SelectInst *SI = cast<SelectInst>(Inner);
-    Value *NewSI = Builder->CreateSelect(
-        SI->getCondition(), SI->getFalseValue(), SI->getTrueValue());
+    Value *NewSI =
+        Builder->CreateSelect(SI->getCondition(), SI->getFalseValue(),
+                              SI->getTrueValue(), SI->getName(), SI);
     return replaceInstUsesWith(Outer, NewSI);
   }
 
@@ -895,7 +936,7 @@ static Instruction *foldAddSubSelect(SelectInst &SI,
       if (AddOp != TI)
         std::swap(NewTrueOp, NewFalseOp);
       Value *NewSel = Builder.CreateSelect(CondVal, NewTrueOp, NewFalseOp,
-                                           SI.getName() + ".p");
+                                           SI.getName() + ".p", &SI);
 
       if (SI.getType()->isFPOrFPVectorTy()) {
         Instruction *RI =
@@ -912,6 +953,147 @@ static Instruction *foldAddSubSelect(SelectInst &SI,
   return nullptr;
 }
 
+Instruction *InstCombiner::foldSelectExtConst(SelectInst &Sel) {
+  Instruction *ExtInst;
+  if (!match(Sel.getTrueValue(), m_Instruction(ExtInst)) &&
+      !match(Sel.getFalseValue(), m_Instruction(ExtInst)))
+    return nullptr;
+
+  auto ExtOpcode = ExtInst->getOpcode();
+  if (ExtOpcode != Instruction::ZExt && ExtOpcode != Instruction::SExt)
+    return nullptr;
+
+  // TODO: Handle larger types? That requires adjusting FoldOpIntoSelect too.
+  Value *X = ExtInst->getOperand(0);
+  Type *SmallType = X->getType();
+  if (!SmallType->getScalarType()->isIntegerTy(1))
+    return nullptr;
+
+  Constant *C;
+  if (!match(Sel.getTrueValue(), m_Constant(C)) &&
+      !match(Sel.getFalseValue(), m_Constant(C)))
+    return nullptr;
+
+  // If the constant is the same after truncation to the smaller type and
+  // extension to the original type, we can narrow the select.
+  Value *Cond = Sel.getCondition();
+  Type *SelType = Sel.getType();
+  Constant *TruncC = ConstantExpr::getTrunc(C, SmallType);
+  Constant *ExtC = ConstantExpr::getCast(ExtOpcode, TruncC, SelType);
+  if (ExtC == C) {
+    Value *TruncCVal = cast<Value>(TruncC);
+    if (ExtInst == Sel.getFalseValue())
+      std::swap(X, TruncCVal);
+
+    // select Cond, (ext X), C --> ext(select Cond, X, C')
+    // select Cond, C, (ext X) --> ext(select Cond, C', X)
+    Value *NewSel = Builder->CreateSelect(Cond, X, TruncCVal, "narrow", &Sel);
+    return CastInst::Create(Instruction::CastOps(ExtOpcode), NewSel, SelType);
+  }
+
+  // If one arm of the select is the extend of the condition, replace that arm
+  // with the extension of the appropriate known bool value.
+  if (Cond == X) {
+    if (ExtInst == Sel.getTrueValue()) {
+      // select X, (sext X), C --> select X, -1, C
+      // select X, (zext X), C --> select X,  1, C
+      Constant *One = ConstantInt::getTrue(SmallType);
+      Constant *AllOnesOrOne = ConstantExpr::getCast(ExtOpcode, One, SelType);
+      return SelectInst::Create(Cond, AllOnesOrOne, C, "", nullptr, &Sel);
+    } else {
+      // select X, C, (sext X) --> select X, C, 0
+      // select X, C, (zext X) --> select X, C, 0
+      Constant *Zero = ConstantInt::getNullValue(SelType);
+      return SelectInst::Create(Cond, C, Zero, "", nullptr, &Sel);
+    }
+  }
+
+  return nullptr;
+}
+
+/// Try to transform a vector select with a constant condition vector into a
+/// shuffle for easier combining with other shuffles and insert/extract.
+static Instruction *canonicalizeSelectToShuffle(SelectInst &SI) {
+  Value *CondVal = SI.getCondition();
+  Constant *CondC;
+  if (!CondVal->getType()->isVectorTy() || !match(CondVal, m_Constant(CondC)))
+    return nullptr;
+
+  unsigned NumElts = CondVal->getType()->getVectorNumElements();
+  SmallVector<Constant *, 16> Mask;
+  Mask.reserve(NumElts);
+  Type *Int32Ty = Type::getInt32Ty(CondVal->getContext());
+  for (unsigned i = 0; i != NumElts; ++i) {
+    Constant *Elt = CondC->getAggregateElement(i);
+    if (!Elt)
+      return nullptr;
+
+    if (Elt->isOneValue()) {
+      // If the select condition element is true, choose from the 1st vector.
+      Mask.push_back(ConstantInt::get(Int32Ty, i));
+    } else if (Elt->isNullValue()) {
+      // If the select condition element is false, choose from the 2nd vector.
+      Mask.push_back(ConstantInt::get(Int32Ty, i + NumElts));
+    } else if (isa<UndefValue>(Elt)) {
+      // If the select condition element is undef, the shuffle mask is undef.
+      Mask.push_back(UndefValue::get(Int32Ty));
+    } else {
+      // Bail out on a constant expression.
+      return nullptr;
+    }
+  }
+
+  return new ShuffleVectorInst(SI.getTrueValue(), SI.getFalseValue(),
+                               ConstantVector::get(Mask));
+}
+
+/// Reuse bitcasted operands between a compare and select:
+/// select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
+/// bitcast (select (cmp (bitcast C), (bitcast D)), (bitcast C), (bitcast D))
+static Instruction *foldSelectCmpBitcasts(SelectInst &Sel,
+                                          InstCombiner::BuilderTy &Builder) {
+  Value *Cond = Sel.getCondition();
+  Value *TVal = Sel.getTrueValue();
+  Value *FVal = Sel.getFalseValue();
+
+  CmpInst::Predicate Pred;
+  Value *A, *B;
+  if (!match(Cond, m_Cmp(Pred, m_Value(A), m_Value(B))))
+    return nullptr;
+
+  // The select condition is a compare instruction. If the select's true/false
+  // values are already the same as the compare operands, there's nothing to do.
+  if (TVal == A || TVal == B || FVal == A || FVal == B)
+    return nullptr;
+
+  Value *C, *D;
+  if (!match(A, m_BitCast(m_Value(C))) || !match(B, m_BitCast(m_Value(D))))
+    return nullptr;
+
+  // select (cmp (bitcast C), (bitcast D)), (bitcast TSrc), (bitcast FSrc)
+  Value *TSrc, *FSrc;
+  if (!match(TVal, m_BitCast(m_Value(TSrc))) ||
+      !match(FVal, m_BitCast(m_Value(FSrc))))
+    return nullptr;
+
+  // If the select true/false values are *different bitcasts* of the same source
+  // operands, make the select operands the same as the compare operands and
+  // cast the result. This is the canonical select form for min/max.
+  Value *NewSel;
+  if (TSrc == C && FSrc == D) {
+    // select (cmp (bitcast C), (bitcast D)), (bitcast' C), (bitcast' D) -->
+    // bitcast (select (cmp A, B), A, B)
+    NewSel = Builder.CreateSelect(Cond, A, B, "", &Sel);
+  } else if (TSrc == D && FSrc == C) {
+    // select (cmp (bitcast C), (bitcast D)), (bitcast' D), (bitcast' C) -->
+    // bitcast (select (cmp A, B), B, A)
+    NewSel = Builder.CreateSelect(Cond, B, A, "", &Sel);
+  } else {
+    return nullptr;
+  }
+  return CastInst::CreateBitOrPointerCast(NewSel, Sel.getType());
+}
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
@@ -919,9 +1101,12 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Type *SelType = SI.getType();
 
   if (Value *V =
-          SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, TLI, DT, AC))
+          SimplifySelectInst(CondVal, TrueVal, FalseVal, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(SI, V);
 
+  if (Instruction *I = canonicalizeSelectToShuffle(SI))
+    return I;
+
   if (SelType->getScalarType()->isIntegerTy(1) &&
       TrueVal->getType() == CondVal->getType()) {
     if (match(TrueVal, m_One())) {
@@ -1085,7 +1270,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
 
   // See if we are selecting two values based on a comparison of the two values.
   if (ICmpInst *ICI = dyn_cast<ICmpInst>(CondVal))
-    if (Instruction *Result = visitSelectInstWithICmp(SI, ICI))
+    if (Instruction *Result = foldSelectInstWithICmp(SI, ICI))
       return Result;
 
   if (Instruction *Add = foldAddSubSelect(SI, *Builder))
@@ -1095,12 +1280,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   auto *TI = dyn_cast<Instruction>(TrueVal);
   auto *FI = dyn_cast<Instruction>(FalseVal);
   if (TI && FI && TI->getOpcode() == FI->getOpcode())
-    if (Instruction *IV = FoldSelectOpOp(SI, TI, FI))
+    if (Instruction *IV = foldSelectOpOp(SI, TI, FI))
       return IV;
 
+  if (Instruction *I = foldSelectExtConst(SI))
+    return I;
+
   // See if we can fold the select into one of our operands.
   if (SelType->isIntOrIntVectorTy() || SelType->isFPOrFPVectorTy()) {
-    if (Instruction *FoldI = FoldSelectIntoOp(SI, TrueVal, FalseVal))
+    if (Instruction *FoldI = foldSelectIntoOp(SI, TrueVal, FalseVal))
       return FoldI;
 
     Value *LHS, *RHS, *LHS2, *RHS2;
@@ -1124,9 +1312,9 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
           Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
         }
 
-        Value *NewSI = Builder->CreateCast(CastOp,
-                                           Builder->CreateSelect(Cmp, LHS, RHS),
-                                           SelType);
+        Value *NewSI = Builder->CreateCast(
+            CastOp, Builder->CreateSelect(Cmp, LHS, RHS, SI.getName(), &SI),
+            SelType);
         return replaceInstUsesWith(SI, NewSI);
       }
     }
@@ -1139,39 +1327,35 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
       // ABS(ABS(a)) -> ABS(a)
       // NABS(NABS(a)) -> NABS(a)
       if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor)
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
+        if (Instruction *R = foldSPFofSPF(cast<Instruction>(LHS),SPF2,LHS2,RHS2,
                                           SI, SPF, RHS))
           return R;
       if (SelectPatternFlavor SPF2 = matchSelectPattern(RHS, LHS2, RHS2).Flavor)
-        if (Instruction *R = FoldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
+        if (Instruction *R = foldSPFofSPF(cast<Instruction>(RHS),SPF2,LHS2,RHS2,
                                           SI, SPF, LHS))
           return R;
     }
 
     // MAX(~a, ~b) -> ~MIN(a, b)
-    if (SPF == SPF_SMAX || SPF == SPF_UMAX) {
-      if (IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
-          IsFreeToInvert(RHS, RHS->hasNUses(2))) {
-
-        // This transform adds a xor operation and that extra cost needs to be
-        // justified.  We look for simplifications that will result from
-        // applying this rule:
-
-        bool Profitable =
-            (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) ||
-            (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) ||
-            (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
-
-        if (Profitable) {
-          Value *NewLHS = Builder->CreateNot(LHS);
-          Value *NewRHS = Builder->CreateNot(RHS);
-          Value *NewCmp = SPF == SPF_SMAX
-                              ? Builder->CreateICmpSLT(NewLHS, NewRHS)
-                              : Builder->CreateICmpULT(NewLHS, NewRHS);
-          Value *NewSI =
-              Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
-          return replaceInstUsesWith(SI, NewSI);
-        }
+    if ((SPF == SPF_SMAX || SPF == SPF_UMAX) &&
+        IsFreeToInvert(LHS, LHS->hasNUses(2)) &&
+        IsFreeToInvert(RHS, RHS->hasNUses(2))) {
+      // For this transform to be profitable, we need to eliminate at least two
+      // 'not' instructions if we're going to add one 'not' instruction.
+      int NumberOfNots =
+          (LHS->hasNUses(2) && match(LHS, m_Not(m_Value()))) +
+          (RHS->hasNUses(2) && match(RHS, m_Not(m_Value()))) +
+          (SI.hasOneUse() && match(*SI.user_begin(), m_Not(m_Value())));
+
+      if (NumberOfNots >= 2) {
+        Value *NewLHS = Builder->CreateNot(LHS);
+        Value *NewRHS = Builder->CreateNot(RHS);
+        Value *NewCmp = SPF == SPF_SMAX
+                            ? Builder->CreateICmpSLT(NewLHS, NewRHS)
+                            : Builder->CreateICmpULT(NewLHS, NewRHS);
+        Value *NewSI =
+            Builder->CreateNot(Builder->CreateSelect(NewCmp, NewLHS, NewRHS));
+        return replaceInstUsesWith(SI, NewSI);
       }
     }
 
@@ -1182,8 +1366,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   // See if we can fold the select into a phi node if the condition is a select.
   if (isa<PHINode>(SI.getCondition()))
     // The true/false values have to be live in the PHI predecessor's blocks.
-    if (CanSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
-        CanSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
+    if (canSelectOperandBeMappingIntoPredBlock(TrueVal, SI) &&
+        canSelectOperandBeMappingIntoPredBlock(FalseVal, SI))
       if (Instruction *NV = FoldOpIntoPhi(SI))
         return NV;
 
@@ -1233,7 +1417,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     return &SI;
   }
 
-  if (VectorType* VecTy = dyn_cast<VectorType>(SelType)) {
+  if (VectorType *VecTy = dyn_cast<VectorType>(SelType)) {
     unsigned VWidth = VecTy->getNumElements();
     APInt UndefElts(VWidth, 0);
     APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -1266,5 +1450,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
     }
   }
 
+  if (Instruction *BitCastSel = foldSelectCmpBitcasts(SI, *Builder))
+    return BitCastSel;
+
   return nullptr;
 }
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
index 08e16a7ee1af..bc38c4aca348 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineShifts.cpp
@@ -39,10 +39,19 @@ Instruction *InstCombiner::commonShiftTransforms(BinaryOperator &I) {
     if (Instruction *Res = FoldShiftByConstant(Op0, CUI, I))
       return Res;
 
+  // (C1 shift (A add C2)) -> (C1 shift C2) shift A)
+  // iff A and C2 are both positive.
+  Value *A;
+  Constant *C;
+  if (match(Op0, m_Constant()) && match(Op1, m_Add(m_Value(A), m_Constant(C))))
+    if (isKnownNonNegative(A, DL) && isKnownNonNegative(C, DL))
+      return BinaryOperator::Create(
+          I.getOpcode(), Builder->CreateBinOp(I.getOpcode(), Op0, C), A);
+
   // X shift (A srem B) -> X shift (A and B-1) iff B is a power of 2.
   // Because shifts by negative values (which could occur if A were negative)
   // are undefined.
-  Value *A; const APInt *B;
+  const APInt *B;
   if (Op1->hasOneUse() && match(Op1, m_SRem(m_Value(A), m_Power2(B)))) {
     // FIXME: Should this get moved into SimplifyDemandedBits by saying we don't
     // demand the sign bit (and many others) here??
@@ -194,8 +203,10 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
     else
       V = IC.Builder->CreateLShr(C, NumBits);
     // If we got a constantexpr back, try to simplify it with TD info.
-    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
-      V = ConstantFoldConstantExpression(CE, DL, IC.getTargetLibraryInfo());
+    if (auto *C = dyn_cast<Constant>(V))
+      if (auto *FoldedC =
+              ConstantFoldConstant(C, DL, &IC.getTargetLibraryInfo()))
+        V = FoldedC;
     return V;
   }
 
@@ -317,7 +328,167 @@ static Value *GetShiftedValue(Value *V, unsigned NumBits, bool isLeftShift,
   }
 }
 
+/// Try to fold (X << C1) << C2, where the shifts are some combination of
+/// shl/ashr/lshr.
+static Instruction *
+foldShiftByConstOfShiftByConst(BinaryOperator &I, ConstantInt *COp1,
+                               InstCombiner::BuilderTy *Builder) {
+  Value *Op0 = I.getOperand(0);
+  uint32_t TypeBits = Op0->getType()->getScalarSizeInBits();
+
+  // Find out if this is a shift of a shift by a constant.
+  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
+  if (ShiftOp && !ShiftOp->isShift())
+    ShiftOp = nullptr;
+
+  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
+
+    // This is a constant shift of a constant shift. Be careful about hiding
+    // shl instructions behind bit masks. They are used to represent multiplies
+    // by a constant, and it is important that simple arithmetic expressions
+    // are still recognizable by scalar evolution.
+    //
+    // The transforms applied to shl are very similar to the transforms applied
+    // to mul by constant. We can be more aggressive about optimizing right
+    // shifts.
+    //
+    // Combinations of right and left shifts will still be optimized in
+    // DAGCombine where scalar evolution no longer applies.
+
+    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
+    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
+    uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
+    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
+    if (ShiftAmt1 == 0)
+      return nullptr; // Will be simplified in the future.
+    Value *X = ShiftOp->getOperand(0);
+
+    IntegerType *Ty = cast<IntegerType>(I.getType());
+
+    // Check for (X << c1) << c2  and  (X >> c1) >> c2
+    if (I.getOpcode() == ShiftOp->getOpcode()) {
+      uint32_t AmtSum = ShiftAmt1 + ShiftAmt2; // Fold into one big shift.
+      // If this is an oversized composite shift, then unsigned shifts become
+      // zero (handled in InstSimplify) and ashr saturates.
+      if (AmtSum >= TypeBits) {
+        if (I.getOpcode() != Instruction::AShr)
+          return nullptr;
+        AmtSum = TypeBits - 1; // Saturate to 31 for i32 ashr.
+      }
+
+      return BinaryOperator::Create(I.getOpcode(), X,
+                                    ConstantInt::get(Ty, AmtSum));
+    }
+
+    if (ShiftAmt1 == ShiftAmt2) {
+      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
+        return BinaryOperator::CreateAnd(
+            X, ConstantInt::get(I.getContext(), Mask));
+      }
+    } else if (ShiftAmt1 < ShiftAmt2) {
+      uint32_t ShiftDiff = ShiftAmt2 - ShiftAmt1;
+
+      // (X >>?,exact C1) << C2 --> X << (C2-C1)
+      // The inexact version is deferred to DAGCombine so we don't hide shl
+      // behind a bit mask.
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) {
+        assert(ShiftOp->getOpcode() == Instruction::LShr ||
+               ShiftOp->getOpcode() == Instruction::AShr);
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        BinaryOperator *NewShl =
+            BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst);
+        NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
+        NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
+        return NewShl;
+      }
+
+      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
+        if (ShiftOp->hasNoUnsignedWrap()) {
+          BinaryOperator *NewLShr =
+              BinaryOperator::Create(Instruction::LShr, X, ShiftDiffCst);
+          NewLShr->setIsExact(I.isExact());
+          return NewLShr;
+        }
+        Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
+
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(
+            Shift, ConstantInt::get(I.getContext(), Mask));
+      }
+
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+      if (I.getOpcode() == Instruction::AShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        if (ShiftOp->hasNoSignedWrap()) {
+          // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
+          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+          BinaryOperator *NewAShr =
+              BinaryOperator::Create(Instruction::AShr, X, ShiftDiffCst);
+          NewAShr->setIsExact(I.isExact());
+          return NewAShr;
+        }
+      }
+    } else {
+      assert(ShiftAmt2 < ShiftAmt1);
+      uint32_t ShiftDiff = ShiftAmt1 - ShiftAmt2;
+
+      // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
+      // The inexact version is deferred to DAGCombine so we don't hide shl
+      // behind a bit mask.
+      if (I.getOpcode() == Instruction::Shl &&
+          ShiftOp->getOpcode() != Instruction::Shl && ShiftOp->isExact()) {
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        BinaryOperator *NewShr =
+            BinaryOperator::Create(ShiftOp->getOpcode(), X, ShiftDiffCst);
+        NewShr->setIsExact(true);
+        return NewShr;
+      }
+
+      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
+      if (I.getOpcode() == Instruction::LShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+        if (ShiftOp->hasNoUnsignedWrap()) {
+          // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
+          BinaryOperator *NewShl =
+              BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst);
+          NewShl->setHasNoUnsignedWrap(true);
+          return NewShl;
+        }
+        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
 
+        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
+        return BinaryOperator::CreateAnd(
+            Shift, ConstantInt::get(I.getContext(), Mask));
+      }
+
+      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
+      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
+      if (I.getOpcode() == Instruction::AShr &&
+          ShiftOp->getOpcode() == Instruction::Shl) {
+        if (ShiftOp->hasNoSignedWrap()) {
+          // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
+          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
+          BinaryOperator *NewShl =
+              BinaryOperator::Create(Instruction::Shl, X, ShiftDiffCst);
+          NewShl->setHasNoSignedWrap(true);
+          return NewShl;
+        }
+      }
+    }
+  }
+
+  return nullptr;
+}
 
 Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
                                                BinaryOperator &I) {
@@ -455,9 +626,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
                                          V1->getName()+".mask");
           return BinaryOperator::Create(Op0BO->getOpcode(), YS, XM);
         }
+        LLVM_FALLTHROUGH;
       }
 
-      // FALL THROUGH.
       case Instruction::Sub: {
         // Turn ((X >> C) + Y) << C  ->  (X + (Y << C)) & (~0 << C)
         if (isLeftShift && Op0BO->getOperand(0)->hasOneUse() &&
@@ -539,157 +710,9 @@ Instruction *InstCombiner::FoldShiftByConstant(Value *Op0, Constant *Op1,
     }
   }
 
-  // Find out if this is a shift of a shift by a constant.
-  BinaryOperator *ShiftOp = dyn_cast<BinaryOperator>(Op0);
-  if (ShiftOp && !ShiftOp->isShift())
-    ShiftOp = nullptr;
-
-  if (ShiftOp && isa<ConstantInt>(ShiftOp->getOperand(1))) {
-
-    // This is a constant shift of a constant shift. Be careful about hiding
-    // shl instructions behind bit masks. They are used to represent multiplies
-    // by a constant, and it is important that simple arithmetic expressions
-    // are still recognizable by scalar evolution.
-    //
-    // The transforms applied to shl are very similar to the transforms applied
-    // to mul by constant. We can be more aggressive about optimizing right
-    // shifts.
-    //
-    // Combinations of right and left shifts will still be optimized in
-    // DAGCombine where scalar evolution no longer applies.
-
-    ConstantInt *ShiftAmt1C = cast<ConstantInt>(ShiftOp->getOperand(1));
-    uint32_t ShiftAmt1 = ShiftAmt1C->getLimitedValue(TypeBits);
-    uint32_t ShiftAmt2 = COp1->getLimitedValue(TypeBits);
-    assert(ShiftAmt2 != 0 && "Should have been simplified earlier");
-    if (ShiftAmt1 == 0) return nullptr;  // Will be simplified in the future.
-    Value *X = ShiftOp->getOperand(0);
-
-    IntegerType *Ty = cast<IntegerType>(I.getType());
-
-    // Check for (X << c1) << c2  and  (X >> c1) >> c2
-    if (I.getOpcode() == ShiftOp->getOpcode()) {
-      uint32_t AmtSum = ShiftAmt1+ShiftAmt2;   // Fold into one big shift.
-      // If this is oversized composite shift, then unsigned shifts get 0, ashr
-      // saturates.
-      if (AmtSum >= TypeBits) {
-        if (I.getOpcode() != Instruction::AShr)
-          return replaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-        AmtSum = TypeBits-1;  // Saturate to 31 for i32 ashr.
-      }
-
-      return BinaryOperator::Create(I.getOpcode(), X,
-                                    ConstantInt::get(Ty, AmtSum));
-    }
-
-    if (ShiftAmt1 == ShiftAmt2) {
-      // If we have ((X << C) >>u C), turn this into X & (-1 >>u C).
-      if (I.getOpcode() == Instruction::LShr &&
-          ShiftOp->getOpcode() == Instruction::Shl) {
-        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt1));
-        return BinaryOperator::CreateAnd(X,
-                                        ConstantInt::get(I.getContext(), Mask));
-      }
-    } else if (ShiftAmt1 < ShiftAmt2) {
-      uint32_t ShiftDiff = ShiftAmt2-ShiftAmt1;
+  if (Instruction *Folded = foldShiftByConstOfShiftByConst(I, COp1, Builder))
+    return Folded;
 
-      // (X >>?,exact C1) << C2 --> X << (C2-C1)
-      // The inexact version is deferred to DAGCombine so we don't hide shl
-      // behind a bit mask.
-      if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl &&
-          ShiftOp->isExact()) {
-        assert(ShiftOp->getOpcode() == Instruction::LShr ||
-               ShiftOp->getOpcode() == Instruction::AShr);
-        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
-                                                        X, ShiftDiffCst);
-        NewShl->setHasNoUnsignedWrap(I.hasNoUnsignedWrap());
-        NewShl->setHasNoSignedWrap(I.hasNoSignedWrap());
-        return NewShl;
-      }
-
-      // (X << C1) >>u C2  --> X >>u (C2-C1) & (-1 >> C2)
-      if (I.getOpcode() == Instruction::LShr &&
-          ShiftOp->getOpcode() == Instruction::Shl) {
-        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        // (X <<nuw C1) >>u C2 --> X >>u (C2-C1)
-        if (ShiftOp->hasNoUnsignedWrap()) {
-          BinaryOperator *NewLShr = BinaryOperator::Create(Instruction::LShr,
-                                                           X, ShiftDiffCst);
-          NewLShr->setIsExact(I.isExact());
-          return NewLShr;
-        }
-        Value *Shift = Builder->CreateLShr(X, ShiftDiffCst);
-
-        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(I.getContext(),Mask));
-      }
-
-      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
-      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
-      if (I.getOpcode() == Instruction::AShr &&
-          ShiftOp->getOpcode() == Instruction::Shl) {
-        if (ShiftOp->hasNoSignedWrap()) {
-          // (X <<nsw C1) >>s C2 --> X >>s (C2-C1)
-          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-          BinaryOperator *NewAShr = BinaryOperator::Create(Instruction::AShr,
-                                                           X, ShiftDiffCst);
-          NewAShr->setIsExact(I.isExact());
-          return NewAShr;
-        }
-      }
-    } else {
-      assert(ShiftAmt2 < ShiftAmt1);
-      uint32_t ShiftDiff = ShiftAmt1-ShiftAmt2;
-
-      // (X >>?exact C1) << C2 --> X >>?exact (C1-C2)
-      // The inexact version is deferred to DAGCombine so we don't hide shl
-      // behind a bit mask.
-      if (I.getOpcode() == Instruction::Shl &&
-          ShiftOp->getOpcode() != Instruction::Shl &&
-          ShiftOp->isExact()) {
-        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        BinaryOperator *NewShr = BinaryOperator::Create(ShiftOp->getOpcode(),
-                                                        X, ShiftDiffCst);
-        NewShr->setIsExact(true);
-        return NewShr;
-      }
-
-      // (X << C1) >>u C2  --> X << (C1-C2) & (-1 >> C2)
-      if (I.getOpcode() == Instruction::LShr &&
-          ShiftOp->getOpcode() == Instruction::Shl) {
-        ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-        if (ShiftOp->hasNoUnsignedWrap()) {
-          // (X <<nuw C1) >>u C2 --> X <<nuw (C1-C2)
-          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
-                                                          X, ShiftDiffCst);
-          NewShl->setHasNoUnsignedWrap(true);
-          return NewShl;
-        }
-        Value *Shift = Builder->CreateShl(X, ShiftDiffCst);
-
-        APInt Mask(APInt::getLowBitsSet(TypeBits, TypeBits - ShiftAmt2));
-        return BinaryOperator::CreateAnd(Shift,
-                                         ConstantInt::get(I.getContext(),Mask));
-      }
-
-      // We can't handle (X << C1) >>s C2, it shifts arbitrary bits in. However,
-      // we can handle (X <<nsw C1) >>s C2 since it only shifts in sign bits.
-      if (I.getOpcode() == Instruction::AShr &&
-          ShiftOp->getOpcode() == Instruction::Shl) {
-        if (ShiftOp->hasNoSignedWrap()) {
-          // (X <<nsw C1) >>s C2 --> X <<nsw (C1-C2)
-          ConstantInt *ShiftDiffCst = ConstantInt::get(Ty, ShiftDiff);
-          BinaryOperator *NewShl = BinaryOperator::Create(Instruction::Shl,
-                                                          X, ShiftDiffCst);
-          NewShl->setHasNoSignedWrap(true);
-          return NewShl;
-        }
-      }
-    }
-  }
   return nullptr;
 }
 
@@ -699,7 +722,7 @@ Instruction *InstCombiner::visitShl(BinaryOperator &I) {
 
   if (Value *V =
           SimplifyShlInst(I.getOperand(0), I.getOperand(1), I.hasNoSignedWrap(),
-                          I.hasNoUnsignedWrap(), DL, TLI, DT, AC))
+                          I.hasNoUnsignedWrap(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *V = commonShiftTransforms(I))
@@ -740,7 +763,7 @@ Instruction *InstCombiner::visitLShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyLShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
-                                  DL, TLI, DT, AC))
+                                  DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
@@ -784,7 +807,7 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) {
     return replaceInstUsesWith(I, V);
 
   if (Value *V = SimplifyAShrInst(I.getOperand(0), I.getOperand(1), I.isExact(),
-                                  DL, TLI, DT, AC))
+                                  DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(I, V);
 
   if (Instruction *R = commonShiftTransforms(I))
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index f3268d2c3471..8b930bd95dfe 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -981,6 +981,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
 
   bool MadeChange = false;
   APInt UndefElts2(VWidth, 0);
+  APInt UndefElts3(VWidth, 0);
   Value *TmpV;
   switch (I->getOpcode()) {
   default: break;
@@ -1020,8 +1021,8 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
   }
   case Instruction::ShuffleVector: {
     ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
-    uint64_t LHSVWidth =
-      cast<VectorType>(Shuffle->getOperand(0)->getType())->getNumElements();
+    unsigned LHSVWidth =
+      Shuffle->getOperand(0)->getType()->getVectorNumElements();
     APInt LeftDemanded(LHSVWidth, 0), RightDemanded(LHSVWidth, 0);
     for (unsigned i = 0; i < VWidth; i++) {
       if (DemandedElts[i]) {
@@ -1037,17 +1038,21 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       }
     }
 
-    APInt UndefElts4(LHSVWidth, 0);
+    APInt LHSUndefElts(LHSVWidth, 0);
     TmpV = SimplifyDemandedVectorElts(I->getOperand(0), LeftDemanded,
-                                      UndefElts4, Depth + 1);
+                                      LHSUndefElts, Depth + 1);
     if (TmpV) { I->setOperand(0, TmpV); MadeChange = true; }
 
-    APInt UndefElts3(LHSVWidth, 0);
+    APInt RHSUndefElts(LHSVWidth, 0);
     TmpV = SimplifyDemandedVectorElts(I->getOperand(1), RightDemanded,
-                                      UndefElts3, Depth + 1);
+                                      RHSUndefElts, Depth + 1);
     if (TmpV) { I->setOperand(1, TmpV); MadeChange = true; }
 
     bool NewUndefElts = false;
+    unsigned LHSIdx = -1u, LHSValIdx = -1u;
+    unsigned RHSIdx = -1u, RHSValIdx = -1u;
+    bool LHSUniform = true;
+    bool RHSUniform = true;
     for (unsigned i = 0; i < VWidth; i++) {
       unsigned MaskVal = Shuffle->getMaskValue(i);
       if (MaskVal == -1u) {
@@ -1056,18 +1061,59 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
         NewUndefElts = true;
         UndefElts.setBit(i);
       } else if (MaskVal < LHSVWidth) {
-        if (UndefElts4[MaskVal]) {
+        if (LHSUndefElts[MaskVal]) {
           NewUndefElts = true;
           UndefElts.setBit(i);
+        } else {
+          LHSIdx = LHSIdx == -1u ? i : LHSVWidth;
+          LHSValIdx = LHSValIdx == -1u ? MaskVal : LHSVWidth;
+          LHSUniform = LHSUniform && (MaskVal == i);
         }
       } else {
-        if (UndefElts3[MaskVal - LHSVWidth]) {
+        if (RHSUndefElts[MaskVal - LHSVWidth]) {
           NewUndefElts = true;
           UndefElts.setBit(i);
+        } else {
+          RHSIdx = RHSIdx == -1u ? i : LHSVWidth;
+          RHSValIdx = RHSValIdx == -1u ? MaskVal - LHSVWidth : LHSVWidth;
+          RHSUniform = RHSUniform && (MaskVal - LHSVWidth == i);
         }
       }
     }
 
+    // Try to transform shuffle with constant vector and single element from
+    // this constant vector to single insertelement instruction.
+    // shufflevector V, C, <v1, v2, .., ci, .., vm> ->
+    // insertelement V, C[ci], ci-n
+    if (LHSVWidth == Shuffle->getType()->getNumElements()) {
+      Value *Op = nullptr;
+      Constant *Value = nullptr;
+      unsigned Idx = -1u;
+
+      // Find constant vector with the single element in shuffle (LHS or RHS).
+      if (LHSIdx < LHSVWidth && RHSUniform) {
+        if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(0))) {
+          Op = Shuffle->getOperand(1);
+          Value = CV->getOperand(LHSValIdx);
+          Idx = LHSIdx;
+        }
+      }
+      if (RHSIdx < LHSVWidth && LHSUniform) {
+        if (auto *CV = dyn_cast<ConstantVector>(Shuffle->getOperand(1))) {
+          Op = Shuffle->getOperand(0);
+          Value = CV->getOperand(RHSValIdx);
+          Idx = RHSIdx;
+        }
+      }
+      // Found constant vector with single element - convert to insertelement.
+      if (Op && Value) {
+        Instruction *New = InsertElementInst::Create(
+            Op, Value, ConstantInt::get(Type::getInt32Ty(I->getContext()), Idx),
+            Shuffle->getName());
+        InsertNewInstWith(New, *Shuffle);
+        return New;
+      }
+    }
     if (NewUndefElts) {
       // Add additional discovered undefs.
       SmallVector<Constant*, 16> Elts;
@@ -1209,114 +1255,223 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     switch (II->getIntrinsicID()) {
     default: break;
 
+    case Intrinsic::x86_xop_vfrcz_ss:
+    case Intrinsic::x86_xop_vfrcz_sd:
+      // The instructions for these intrinsics are speced to zero upper bits not
+      // pass them through like other scalar intrinsics. So we shouldn't just
+      // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
+      // Instead we should return a zero vector.
+      if (!DemandedElts[0]) {
+        Worklist.Add(II);
+        return ConstantAggregateZero::get(II->getType());
+      }
+
+      // Only the lower element is used.
+      DemandedElts = 1;
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts, Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+      // Only the lower element is undefined. The high elements are zero.
+      UndefElts = UndefElts[0];
+      break;
+
     // Unary scalar-as-vector operations that work column-wise.
     case Intrinsic::x86_sse_rcp_ss:
     case Intrinsic::x86_sse_rsqrt_ss:
     case Intrinsic::x86_sse_sqrt_ss:
     case Intrinsic::x86_sse2_sqrt_sd:
-    case Intrinsic::x86_xop_vfrcz_ss:
-    case Intrinsic::x86_xop_vfrcz_sd:
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                         UndefElts, Depth + 1);
       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
 
       // If lowest element of a scalar op isn't used then use Arg0.
-      if (DemandedElts.getLoBits(1) != 1)
+      if (!DemandedElts[0]) {
+        Worklist.Add(II);
         return II->getArgOperand(0);
+      }
       // TODO: If only low elt lower SQRT to FSQRT (with rounding/exceptions
       // checks).
       break;
 
-    // Binary scalar-as-vector operations that work column-wise.  A dest element
-    // is a function of the corresponding input elements from the two inputs.
-    case Intrinsic::x86_sse_add_ss:
-    case Intrinsic::x86_sse_sub_ss:
-    case Intrinsic::x86_sse_mul_ss:
-    case Intrinsic::x86_sse_div_ss:
+    // Binary scalar-as-vector operations that work column-wise. The high
+    // elements come from operand 0. The low element is a function of both
+    // operands.
     case Intrinsic::x86_sse_min_ss:
     case Intrinsic::x86_sse_max_ss:
     case Intrinsic::x86_sse_cmp_ss:
-    case Intrinsic::x86_sse2_add_sd:
-    case Intrinsic::x86_sse2_sub_sd:
-    case Intrinsic::x86_sse2_mul_sd:
-    case Intrinsic::x86_sse2_div_sd:
     case Intrinsic::x86_sse2_min_sd:
     case Intrinsic::x86_sse2_max_sd:
-    case Intrinsic::x86_sse2_cmp_sd:
-    case Intrinsic::x86_sse41_round_ss:
-    case Intrinsic::x86_sse41_round_sd:
+    case Intrinsic::x86_sse2_cmp_sd: {
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                         UndefElts, Depth + 1);
       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+      // If lowest element of a scalar op isn't used then use Arg0.
+      if (!DemandedElts[0]) {
+        Worklist.Add(II);
+        return II->getArgOperand(0);
+      }
+
+      // Only lower element is used for operand 1.
+      DemandedElts = 1;
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
                                         UndefElts2, Depth + 1);
       if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
 
-      // If only the low elt is demanded and this is a scalarizable intrinsic,
-      // scalarize it now.
-      if (DemandedElts == 1) {
-        switch (II->getIntrinsicID()) {
-        default: break;
-        case Intrinsic::x86_sse_add_ss:
-        case Intrinsic::x86_sse_sub_ss:
-        case Intrinsic::x86_sse_mul_ss:
-        case Intrinsic::x86_sse_div_ss:
-        case Intrinsic::x86_sse2_add_sd:
-        case Intrinsic::x86_sse2_sub_sd:
-        case Intrinsic::x86_sse2_mul_sd:
-        case Intrinsic::x86_sse2_div_sd:
-          // TODO: Lower MIN/MAX/etc.
-          Value *LHS = II->getArgOperand(0);
-          Value *RHS = II->getArgOperand(1);
-          // Extract the element as scalars.
-          LHS = InsertNewInstWith(ExtractElementInst::Create(LHS,
-            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
-          RHS = InsertNewInstWith(ExtractElementInst::Create(RHS,
-            ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U)), *II);
-
-          switch (II->getIntrinsicID()) {
-          default: llvm_unreachable("Case stmts out of sync!");
-          case Intrinsic::x86_sse_add_ss:
-          case Intrinsic::x86_sse2_add_sd:
-            TmpV = InsertNewInstWith(BinaryOperator::CreateFAdd(LHS, RHS,
-                                                        II->getName()), *II);
-            break;
-          case Intrinsic::x86_sse_sub_ss:
-          case Intrinsic::x86_sse2_sub_sd:
-            TmpV = InsertNewInstWith(BinaryOperator::CreateFSub(LHS, RHS,
-                                                        II->getName()), *II);
-            break;
-          case Intrinsic::x86_sse_mul_ss:
-          case Intrinsic::x86_sse2_mul_sd:
-            TmpV = InsertNewInstWith(BinaryOperator::CreateFMul(LHS, RHS,
-                                                         II->getName()), *II);
-            break;
-          case Intrinsic::x86_sse_div_ss:
-          case Intrinsic::x86_sse2_div_sd:
-            TmpV = InsertNewInstWith(BinaryOperator::CreateFDiv(LHS, RHS,
-                                                         II->getName()), *II);
-            break;
-          }
-
-          Instruction *New =
-            InsertElementInst::Create(
-              UndefValue::get(II->getType()), TmpV,
-              ConstantInt::get(Type::getInt32Ty(I->getContext()), 0U, false),
-                                      II->getName());
-          InsertNewInstWith(New, *II);
-          return New;
-        }
+      // Lower element is undefined if both lower elements are undefined.
+      // Consider things like undef&0.  The result is known zero, not undef.
+      if (!UndefElts2[0])
+        UndefElts.clearBit(0);
+
+      break;
+    }
+
+    // Binary scalar-as-vector operations that work column-wise. The high
+    // elements come from operand 0 and the low element comes from operand 1.
+    case Intrinsic::x86_sse41_round_ss:
+    case Intrinsic::x86_sse41_round_sd: {
+      // Don't use the low element of operand 0.
+      APInt DemandedElts2 = DemandedElts;
+      DemandedElts2.clearBit(0);
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
+                                        UndefElts, Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+      // If lowest element of a scalar op isn't used then use Arg0.
+      if (!DemandedElts[0]) {
+        Worklist.Add(II);
+        return II->getArgOperand(0);
       }
 
+      // Only lower element is used for operand 1.
+      DemandedElts = 1;
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+                                        UndefElts2, Depth + 1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+      // Take the high undef elements from operand 0 and take the lower element
+      // from operand 1.
+      UndefElts.clearBit(0);
+      UndefElts |= UndefElts2[0];
+      break;
+    }
+
+    // Three input scalar-as-vector operations that work column-wise. The high
+    // elements come from operand 0 and the low element is a function of all
+    // three inputs.
+    case Intrinsic::x86_avx512_mask_add_ss_round:
+    case Intrinsic::x86_avx512_mask_div_ss_round:
+    case Intrinsic::x86_avx512_mask_mul_ss_round:
+    case Intrinsic::x86_avx512_mask_sub_ss_round:
+    case Intrinsic::x86_avx512_mask_max_ss_round:
+    case Intrinsic::x86_avx512_mask_min_ss_round:
+    case Intrinsic::x86_avx512_mask_add_sd_round:
+    case Intrinsic::x86_avx512_mask_div_sd_round:
+    case Intrinsic::x86_avx512_mask_mul_sd_round:
+    case Intrinsic::x86_avx512_mask_sub_sd_round:
+    case Intrinsic::x86_avx512_mask_max_sd_round:
+    case Intrinsic::x86_avx512_mask_min_sd_round:
+    case Intrinsic::x86_fma_vfmadd_ss:
+    case Intrinsic::x86_fma_vfmsub_ss:
+    case Intrinsic::x86_fma_vfnmadd_ss:
+    case Intrinsic::x86_fma_vfnmsub_ss:
+    case Intrinsic::x86_fma_vfmadd_sd:
+    case Intrinsic::x86_fma_vfmsub_sd:
+    case Intrinsic::x86_fma_vfnmadd_sd:
+    case Intrinsic::x86_fma_vfnmsub_sd:
+    case Intrinsic::x86_avx512_mask_vfmadd_ss:
+    case Intrinsic::x86_avx512_mask_vfmadd_sd:
+    case Intrinsic::x86_avx512_maskz_vfmadd_ss:
+    case Intrinsic::x86_avx512_maskz_vfmadd_sd:
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts, Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
       // If lowest element of a scalar op isn't used then use Arg0.
-      if (DemandedElts.getLoBits(1) != 1)
+      if (!DemandedElts[0]) {
+        Worklist.Add(II);
         return II->getArgOperand(0);
+      }
+
+      // Only lower element is used for operand 1 and 2.
+      DemandedElts = 1;
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+                                        UndefElts2, Depth + 1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
+                                        UndefElts3, Depth + 1);
+      if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+
+      // Lower element is undefined if all three lower elements are undefined.
+      // Consider things like undef&0.  The result is known zero, not undef.
+      if (!UndefElts2[0] || !UndefElts3[0])
+        UndefElts.clearBit(0);
 
-      // Output elements are undefined if both are undefined.  Consider things
-      // like undef&0.  The result is known zero, not undef.
-      UndefElts &= UndefElts2;
       break;
 
+    case Intrinsic::x86_avx512_mask3_vfmadd_ss:
+    case Intrinsic::x86_avx512_mask3_vfmadd_sd:
+    case Intrinsic::x86_avx512_mask3_vfmsub_ss:
+    case Intrinsic::x86_avx512_mask3_vfmsub_sd:
+    case Intrinsic::x86_avx512_mask3_vfnmsub_ss:
+    case Intrinsic::x86_avx512_mask3_vfnmsub_sd:
+      // These intrinsics get the passthru bits from operand 2.
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(2), DemandedElts,
+                                        UndefElts, Depth + 1);
+      if (TmpV) { II->setArgOperand(2, TmpV); MadeChange = true; }
+
+      // If lowest element of a scalar op isn't used then use Arg2.
+      if (!DemandedElts[0]) {
+        Worklist.Add(II);
+        return II->getArgOperand(2);
+      }
+
+      // Only lower element is used for operand 0 and 1.
+      DemandedElts = 1;
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts2, Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+                                        UndefElts3, Depth + 1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+      // Lower element is undefined if all three lower elements are undefined.
+      // Consider things like undef&0.  The result is known zero, not undef.
+      if (!UndefElts2[0] || !UndefElts3[0])
+        UndefElts.clearBit(0);
+
+      break;
+
+    case Intrinsic::x86_sse2_pmulu_dq:
+    case Intrinsic::x86_sse41_pmuldq:
+    case Intrinsic::x86_avx2_pmul_dq:
+    case Intrinsic::x86_avx2_pmulu_dq:
+    case Intrinsic::x86_avx512_pmul_dq_512:
+    case Intrinsic::x86_avx512_pmulu_dq_512: {
+      Value *Op0 = II->getArgOperand(0);
+      Value *Op1 = II->getArgOperand(1);
+      unsigned InnerVWidth = Op0->getType()->getVectorNumElements();
+      assert((VWidth * 2) == InnerVWidth && "Unexpected input size");
+
+      APInt InnerDemandedElts(InnerVWidth, 0);
+      for (unsigned i = 0; i != VWidth; ++i)
+        if (DemandedElts[i])
+          InnerDemandedElts.setBit(i * 2);
+
+      UndefElts2 = APInt(InnerVWidth, 0);
+      TmpV = SimplifyDemandedVectorElts(Op0, InnerDemandedElts, UndefElts2,
+                                        Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+      UndefElts3 = APInt(InnerVWidth, 0);
+      TmpV = SimplifyDemandedVectorElts(Op1, InnerDemandedElts, UndefElts3,
+                                        Depth + 1);
+      if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+      break;
+    }
+
     // SSE4A instructions leave the upper 64-bits of the 128-bit result
     // in an undefined state.
     case Intrinsic::x86_sse4a_extrq:
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index a76138756148..b2477f6c8633 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -145,7 +145,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
 
 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
   if (Value *V = SimplifyExtractElementInst(
-          EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC))
+          EI.getVectorOperand(), EI.getIndexOperand(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(EI, V);
 
   // If vector val is constant with all elements the same, replace EI with
@@ -413,6 +413,14 @@ static void replaceExtractElements(InsertElementInst *InsElt,
   if (InsertionBlock != InsElt->getParent())
     return;
 
+  // TODO: This restriction matches the check in visitInsertElementInst() and
+  // prevents an infinite loop caused by not turning the extract/insert pair
+  // into a shuffle. We really should not need either check, but we're lacking
+  // folds for shufflevectors because we're afraid to generate shuffle masks
+  // that the backend can't handle.
+  if (InsElt->hasOneUse() && isa<InsertElementInst>(InsElt->user_back()))
+    return;
+
   auto *WideVec = new ShuffleVectorInst(ExtVecOp, UndefValue::get(ExtVecType),
                                         ConstantVector::get(ExtendMask));
 
@@ -452,7 +460,7 @@ static ShuffleOps collectShuffleElements(Value *V,
                                          Value *PermittedRHS,
                                          InstCombiner &IC) {
   assert(V->getType()->isVectorTy() && "Invalid shuffle!");
-  unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
+  unsigned NumElts = V->getType()->getVectorNumElements();
 
   if (isa<UndefValue>(V)) {
     Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
@@ -566,6 +574,176 @@ Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) {
   return nullptr;
 }
 
+static bool isShuffleEquivalentToSelect(ShuffleVectorInst &Shuf) {
+  int MaskSize = Shuf.getMask()->getType()->getVectorNumElements();
+  int VecSize = Shuf.getOperand(0)->getType()->getVectorNumElements();
+
+  // A vector select does not change the size of the operands.
+  if (MaskSize != VecSize)
+    return false;
+
+  // Each mask element must be undefined or choose a vector element from one of
+  // the source operands without crossing vector lanes.
+  for (int i = 0; i != MaskSize; ++i) {
+    int Elt = Shuf.getMaskValue(i);
+    if (Elt != -1 && Elt != i && Elt != i + VecSize)
+      return false;
+  }
+
+  return true;
+}
+
+// Turn a chain of inserts that splats a value into a canonical insert + shuffle
+// splat. That is:
+// insertelt(insertelt(insertelt(insertelt X, %k, 0), %k, 1), %k, 2) ... ->
+// shufflevector(insertelt(X, %k, 0), undef, zero)
+static Instruction *foldInsSequenceIntoBroadcast(InsertElementInst &InsElt) {
+  // We are interested in the last insert in a chain. So, if this insert
+  // has a single user, and that user is an insert, bail.
+  if (InsElt.hasOneUse() && isa<InsertElementInst>(InsElt.user_back()))
+    return nullptr;
+
+  VectorType *VT = cast<VectorType>(InsElt.getType());
+  int NumElements = VT->getNumElements();
+
+  // Do not try to do this for a one-element vector, since that's a nop,
+  // and will cause an inf-loop.
+  if (NumElements == 1)
+    return nullptr;
+
+  Value *SplatVal = InsElt.getOperand(1);
+  InsertElementInst *CurrIE = &InsElt;  
+  SmallVector<bool, 16> ElementPresent(NumElements, false);
+
+  // Walk the chain backwards, keeping track of which indices we inserted into,
+  // until we hit something that isn't an insert of the splatted value.
+  while (CurrIE) {
+    ConstantInt *Idx = dyn_cast<ConstantInt>(CurrIE->getOperand(2));
+    if (!Idx || CurrIE->getOperand(1) != SplatVal)
+      return nullptr;
+
+    // Check none of the intermediate steps have any additional uses.
+    if ((CurrIE != &InsElt) && !CurrIE->hasOneUse())
+      return nullptr;
+
+    ElementPresent[Idx->getZExtValue()] = true;
+    CurrIE = dyn_cast<InsertElementInst>(CurrIE->getOperand(0));
+  }
+
+  // Make sure we've seen an insert into every element.
+  if (llvm::any_of(ElementPresent, [](bool Present) { return !Present; }))
+    return nullptr;
+
+  // All right, create the insert + shuffle.
+  Instruction *InsertFirst = InsertElementInst::Create(
+      UndefValue::get(VT), SplatVal,
+      ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), 0), "", &InsElt);
+
+  Constant *ZeroMask = ConstantAggregateZero::get(
+      VectorType::get(Type::getInt32Ty(InsElt.getContext()), NumElements));
+
+  return new ShuffleVectorInst(InsertFirst, UndefValue::get(VT), ZeroMask);
+}
+
+/// insertelt (shufflevector X, CVec, Mask|insertelt X, C1, CIndex1), C, CIndex
+/// --> shufflevector X, CVec', Mask'
+static Instruction *foldConstantInsEltIntoShuffle(InsertElementInst &InsElt) {
+  auto *Inst = dyn_cast<Instruction>(InsElt.getOperand(0));
+  // Bail out if the parent has more than one use. In that case, we'd be
+  // replacing the insertelt with a shuffle, and that's not a clear win.
+  if (!Inst || !Inst->hasOneUse())
+    return nullptr;
+  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(InsElt.getOperand(0))) {
+    // The shuffle must have a constant vector operand. The insertelt must have
+    // a constant scalar being inserted at a constant position in the vector.
+    Constant *ShufConstVec, *InsEltScalar;
+    uint64_t InsEltIndex;
+    if (!match(Shuf->getOperand(1), m_Constant(ShufConstVec)) ||
+        !match(InsElt.getOperand(1), m_Constant(InsEltScalar)) ||
+        !match(InsElt.getOperand(2), m_ConstantInt(InsEltIndex)))
+      return nullptr;
+
+    // Adding an element to an arbitrary shuffle could be expensive, but a
+    // shuffle that selects elements from vectors without crossing lanes is
+    // assumed cheap.
+    // If we're just adding a constant into that shuffle, it will still be
+    // cheap.
+    if (!isShuffleEquivalentToSelect(*Shuf))
+      return nullptr;
+
+    // From the above 'select' check, we know that the mask has the same number
+    // of elements as the vector input operands. We also know that each constant
+    // input element is used in its lane and can not be used more than once by
+    // the shuffle. Therefore, replace the constant in the shuffle's constant
+    // vector with the insertelt constant. Replace the constant in the shuffle's
+    // mask vector with the insertelt index plus the length of the vector
+    // (because the constant vector operand of a shuffle is always the 2nd
+    // operand).
+    Constant *Mask = Shuf->getMask();
+    unsigned NumElts = Mask->getType()->getVectorNumElements();
+    SmallVector<Constant *, 16> NewShufElts(NumElts);
+    SmallVector<Constant *, 16> NewMaskElts(NumElts);
+    for (unsigned I = 0; I != NumElts; ++I) {
+      if (I == InsEltIndex) {
+        NewShufElts[I] = InsEltScalar;
+        Type *Int32Ty = Type::getInt32Ty(Shuf->getContext());
+        NewMaskElts[I] = ConstantInt::get(Int32Ty, InsEltIndex + NumElts);
+      } else {
+        // Copy over the existing values.
+        NewShufElts[I] = ShufConstVec->getAggregateElement(I);
+        NewMaskElts[I] = Mask->getAggregateElement(I);
+      }
+    }
+
+    // Create new operands for a shuffle that includes the constant of the
+    // original insertelt. The old shuffle will be dead now.
+    return new ShuffleVectorInst(Shuf->getOperand(0),
+                                 ConstantVector::get(NewShufElts),
+                                 ConstantVector::get(NewMaskElts));
+  } else if (auto *IEI = dyn_cast<InsertElementInst>(Inst)) {
+    // Transform sequences of insertelements ops with constant data/indexes into
+    // a single shuffle op.
+    unsigned NumElts = InsElt.getType()->getNumElements();
+
+    uint64_t InsertIdx[2];
+    Constant *Val[2];
+    if (!match(InsElt.getOperand(2), m_ConstantInt(InsertIdx[0])) ||
+        !match(InsElt.getOperand(1), m_Constant(Val[0])) ||
+        !match(IEI->getOperand(2), m_ConstantInt(InsertIdx[1])) ||
+        !match(IEI->getOperand(1), m_Constant(Val[1])))
+      return nullptr;
+    SmallVector<Constant *, 16> Values(NumElts);
+    SmallVector<Constant *, 16> Mask(NumElts);
+    auto ValI = std::begin(Val);
+    // Generate new constant vector and mask.
+    // We have 2 values/masks from the insertelements instructions. Insert them
+    // into new value/mask vectors.
+    for (uint64_t I : InsertIdx) {
+      if (!Values[I]) {
+        assert(!Mask[I]);
+        Values[I] = *ValI;
+        Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()),
+                                   NumElts + I);
+      }
+      ++ValI;
+    }
+    // Remaining values are filled with 'undef' values.
+    for (unsigned I = 0; I < NumElts; ++I) {
+      if (!Values[I]) {
+        assert(!Mask[I]);
+        Values[I] = UndefValue::get(InsElt.getType()->getElementType());
+        Mask[I] = ConstantInt::get(Type::getInt32Ty(InsElt.getContext()), I);
+      }
+    }
+    // Create new operands for a shuffle that includes the constant of the
+    // original insertelt.
+    return new ShuffleVectorInst(IEI->getOperand(0),
+                                 ConstantVector::get(Values),
+                                 ConstantVector::get(Mask));
+  }
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
   Value *VecOp    = IE.getOperand(0);
   Value *ScalarOp = IE.getOperand(1);
@@ -616,7 +794,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
     }
   }
 
-  unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
+  unsigned VWidth = VecOp->getType()->getVectorNumElements();
   APInt UndefElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
   if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
@@ -625,6 +803,14 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
     return &IE;
   }
 
+  if (Instruction *Shuf = foldConstantInsEltIntoShuffle(IE))
+    return Shuf;
+
+  // Turn a sequence of inserts that broadcasts a scalar into a single
+  // insert + shufflevector.
+  if (Instruction *Broadcast = foldInsSequenceIntoBroadcast(IE))
+    return Broadcast;
+
   return nullptr;
 }
 
@@ -903,8 +1089,7 @@ static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask,
 //                 +--+--+--+--+
 static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI,
                                        SmallVector<int, 16> &Mask) {
-  unsigned LHSElems =
-      cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements();
+  unsigned LHSElems = SVI.getOperand(0)->getType()->getVectorNumElements();
   unsigned MaskElems = Mask.size();
   unsigned BegIdx = Mask.front();
   unsigned EndIdx = Mask.back();
@@ -928,7 +1113,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (isa<UndefValue>(SVI.getOperand(2)))
     return replaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
 
-  unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
+  unsigned VWidth = SVI.getType()->getVectorNumElements();
 
   APInt UndefElts(VWidth, 0);
   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
@@ -940,7 +1125,7 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
     MadeChange = true;
   }
 
-  unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
+  unsigned LHSWidth = LHS->getType()->getVectorNumElements();
 
   // Canonicalize shuffle(x    ,x,mask) -> shuffle(x, undef,mask')
   // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
@@ -1143,11 +1328,11 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
   if (LHSShuffle) {
     LHSOp0 = LHSShuffle->getOperand(0);
     LHSOp1 = LHSShuffle->getOperand(1);
-    LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
+    LHSOp0Width = LHSOp0->getType()->getVectorNumElements();
   }
   if (RHSShuffle) {
     RHSOp0 = RHSShuffle->getOperand(0);
-    RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
+    RHSOp0Width = RHSOp0->getType()->getVectorNumElements();
   }
   Value* newLHS = LHS;
   Value* newRHS = RHS;
diff --git a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index 377ccb9c37f7..9a52874c4c21 100644
--- a/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -177,11 +177,10 @@ static bool simplifyAssocCastAssoc(BinaryOperator *BinOp1) {
     return false;
 
   // TODO: Enhance logic for other BinOps and remove this check.
-  auto AssocOpcode = BinOp1->getOpcode();
-  if (AssocOpcode != Instruction::Xor && AssocOpcode != Instruction::And &&
-      AssocOpcode != Instruction::Or)
+  if (!BinOp1->isBitwiseLogicOp())
     return false;
 
+  auto AssocOpcode = BinOp1->getOpcode();
   auto *BinOp2 = dyn_cast<BinaryOperator>(Cast->getOperand(0));
   if (!BinOp2 || !BinOp2->hasOneUse() || BinOp2->getOpcode() != AssocOpcode)
     return false;
@@ -684,14 +683,14 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) {
       if (SI0->getCondition() == SI1->getCondition()) {
         Value *SI = nullptr;
         if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getFalseValue(),
-                                     SI1->getFalseValue(), DL, TLI, DT, AC))
+                                     SI1->getFalseValue(), DL, &TLI, &DT, &AC))
           SI = Builder->CreateSelect(SI0->getCondition(),
                                      Builder->CreateBinOp(TopLevelOpcode,
                                                           SI0->getTrueValue(),
                                                           SI1->getTrueValue()),
                                      V);
         if (Value *V = SimplifyBinOp(TopLevelOpcode, SI0->getTrueValue(),
-                                     SI1->getTrueValue(), DL, TLI, DT, AC))
+                                     SI1->getTrueValue(), DL, &TLI, &DT, &AC))
           SI = Builder->CreateSelect(
               SI0->getCondition(), V,
               Builder->CreateBinOp(TopLevelOpcode, SI0->getFalseValue(),
@@ -741,17 +740,18 @@ Value *InstCombiner::dyn_castFNegVal(Value *V, bool IgnoreZeroSign) const {
   return nullptr;
 }
 
-static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
+static Value *foldOperationIntoSelectOperand(Instruction &I, Value *SO,
                                              InstCombiner *IC) {
-  if (CastInst *CI = dyn_cast<CastInst>(&I)) {
-    return IC->Builder->CreateCast(CI->getOpcode(), SO, I.getType());
-  }
+  if (auto *Cast = dyn_cast<CastInst>(&I))
+    return IC->Builder->CreateCast(Cast->getOpcode(), SO, I.getType());
+
+  assert(I.isBinaryOp() && "Unexpected opcode for select folding");
 
   // Figure out if the constant is the left or the right argument.
   bool ConstIsRHS = isa<Constant>(I.getOperand(1));
   Constant *ConstOperand = cast<Constant>(I.getOperand(ConstIsRHS));
 
-  if (Constant *SOC = dyn_cast<Constant>(SO)) {
+  if (auto *SOC = dyn_cast<Constant>(SO)) {
     if (ConstIsRHS)
       return ConstantExpr::get(I.getOpcode(), SOC, ConstOperand);
     return ConstantExpr::get(I.getOpcode(), ConstOperand, SOC);
@@ -761,21 +761,13 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
   if (!ConstIsRHS)
     std::swap(Op0, Op1);
 
-  if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&I)) {
-    Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
-                                    SO->getName()+".op");
-    Instruction *FPInst = dyn_cast<Instruction>(RI);
-    if (FPInst && isa<FPMathOperator>(FPInst))
-      FPInst->copyFastMathFlags(BO);
-    return RI;
-  }
-  if (ICmpInst *CI = dyn_cast<ICmpInst>(&I))
-    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
-                                   SO->getName()+".cmp");
-  if (FCmpInst *CI = dyn_cast<FCmpInst>(&I))
-    return IC->Builder->CreateICmp(CI->getPredicate(), Op0, Op1,
-                                   SO->getName()+".cmp");
-  llvm_unreachable("Unknown binary instruction type!");
+  auto *BO = cast<BinaryOperator>(&I);
+  Value *RI = IC->Builder->CreateBinOp(BO->getOpcode(), Op0, Op1,
+                                       SO->getName() + ".op");
+  auto *FPInst = dyn_cast<Instruction>(RI);
+  if (FPInst && isa<FPMathOperator>(FPInst))
+    FPInst->copyFastMathFlags(BO);
+  return RI;
 }
 
 /// Given an instruction with a select as one operand and a constant as the
@@ -783,51 +775,53 @@ static Value *FoldOperationIntoSelectOperand(Instruction &I, Value *SO,
 /// This also works for Cast instructions, which obviously do not have a second
 /// operand.
 Instruction *InstCombiner::FoldOpIntoSelect(Instruction &Op, SelectInst *SI) {
-  // Don't modify shared select instructions
-  if (!SI->hasOneUse()) return nullptr;
-  Value *TV = SI->getOperand(1);
-  Value *FV = SI->getOperand(2);
-
-  if (isa<Constant>(TV) || isa<Constant>(FV)) {
-    // Bool selects with constant operands can be folded to logical ops.
-    if (SI->getType()->isIntegerTy(1)) return nullptr;
-
-    // If it's a bitcast involving vectors, make sure it has the same number of
-    // elements on both sides.
-    if (BitCastInst *BC = dyn_cast<BitCastInst>(&Op)) {
-      VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
-      VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
-
-      // Verify that either both or neither are vectors.
-      if ((SrcTy == nullptr) != (DestTy == nullptr)) return nullptr;
-      // If vectors, verify that they have the same number of elements.
-      if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
-        return nullptr;
-    }
+  // Don't modify shared select instructions.
+  if (!SI->hasOneUse())
+    return nullptr;
 
-    // Test if a CmpInst instruction is used exclusively by a select as
-    // part of a minimum or maximum operation. If so, refrain from doing
-    // any other folding. This helps out other analyses which understand
-    // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
-    // and CodeGen. And in this case, at least one of the comparison
-    // operands has at least one user besides the compare (the select),
-    // which would often largely negate the benefit of folding anyway.
-    if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
-      if (CI->hasOneUse()) {
-        Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
-        if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
-            (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
-          return nullptr;
-      }
-    }
+  Value *TV = SI->getTrueValue();
+  Value *FV = SI->getFalseValue();
+  if (!(isa<Constant>(TV) || isa<Constant>(FV)))
+    return nullptr;
 
-    Value *SelectTrueVal = FoldOperationIntoSelectOperand(Op, TV, this);
-    Value *SelectFalseVal = FoldOperationIntoSelectOperand(Op, FV, this);
+  // Bool selects with constant operands can be folded to logical ops.
+  if (SI->getType()->getScalarType()->isIntegerTy(1))
+    return nullptr;
+
+  // If it's a bitcast involving vectors, make sure it has the same number of
+  // elements on both sides.
+  if (auto *BC = dyn_cast<BitCastInst>(&Op)) {
+    VectorType *DestTy = dyn_cast<VectorType>(BC->getDestTy());
+    VectorType *SrcTy = dyn_cast<VectorType>(BC->getSrcTy());
+
+    // Verify that either both or neither are vectors.
+    if ((SrcTy == nullptr) != (DestTy == nullptr))
+      return nullptr;
 
-    return SelectInst::Create(SI->getCondition(),
-                              SelectTrueVal, SelectFalseVal);
+    // If vectors, verify that they have the same number of elements.
+    if (SrcTy && SrcTy->getNumElements() != DestTy->getNumElements())
+      return nullptr;
   }
-  return nullptr;
+
+  // Test if a CmpInst instruction is used exclusively by a select as
+  // part of a minimum or maximum operation. If so, refrain from doing
+  // any other folding. This helps out other analyses which understand
+  // non-obfuscated minimum and maximum idioms, such as ScalarEvolution
+  // and CodeGen. And in this case, at least one of the comparison
+  // operands has at least one user besides the compare (the select),
+  // which would often largely negate the benefit of folding anyway.
+  if (auto *CI = dyn_cast<CmpInst>(SI->getCondition())) {
+    if (CI->hasOneUse()) {
+      Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1);
+      if ((SI->getOperand(1) == Op0 && SI->getOperand(2) == Op1) ||
+          (SI->getOperand(2) == Op0 && SI->getOperand(1) == Op1))
+        return nullptr;
+    }
+  }
+
+  Value *NewTV = foldOperationIntoSelectOperand(Op, TV, this);
+  Value *NewFV = foldOperationIntoSelectOperand(Op, FV, this);
+  return SelectInst::Create(SI->getCondition(), NewTV, NewFV, "", nullptr, SI);
 }
 
 /// Given a binary operator, cast instruction, or select which has a PHI node as
@@ -877,7 +871,7 @@ Instruction *InstCombiner::FoldOpIntoPhi(Instruction &I) {
     // If the incoming non-constant value is in I's block, we will remove one
     // instruction, but insert another equivalent one, leading to infinite
     // instcombine.
-    if (isPotentiallyReachable(I.getParent(), NonConstBB, DT, LI))
+    if (isPotentiallyReachable(I.getParent(), NonConstBB, &DT, LI))
       return nullptr;
   }
 
@@ -1379,7 +1373,8 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) {
 Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   SmallVector<Value*, 8> Ops(GEP.op_begin(), GEP.op_end());
 
-  if (Value *V = SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, TLI, DT, AC))
+  if (Value *V =
+          SimplifyGEPInst(GEP.getSourceElementType(), Ops, DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(GEP, V);
 
   Value *PtrOp = GEP.getOperand(0);
@@ -1394,7 +1389,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
   for (User::op_iterator I = GEP.op_begin() + 1, E = GEP.op_end(); I != E;
        ++I, ++GTI) {
     // Skip indices into struct types.
-    if (isa<StructType>(*GTI))
+    if (GTI.isStruct())
       continue;
 
     // Index type should have the same width as IntPtr
@@ -1551,7 +1546,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     bool EndsWithSequential = false;
     for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
          I != E; ++I)
-      EndsWithSequential = !(*I)->isStructTy();
+      EndsWithSequential = I.isSequential();
 
     // Can we combine the two pointer arithmetics offsets?
     if (EndsWithSequential) {
@@ -1860,7 +1855,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
       if (!Offset) {
         // If the bitcast is of an allocation, and the allocation will be
         // converted to match the type of the cast, don't touch this.
-        if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, TLI)) {
+        if (isa<AllocaInst>(Operand) || isAllocationFn(Operand, &TLI)) {
           // See if the bitcast simplifies, if so, don't nuke this GEP yet.
           if (Instruction *I = visitBitCast(*BCI)) {
             if (I != BCI) {
@@ -1898,6 +1893,25 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     }
   }
 
+  if (!GEP.isInBounds()) {
+    unsigned PtrWidth =
+        DL.getPointerSizeInBits(PtrOp->getType()->getPointerAddressSpace());
+    APInt BasePtrOffset(PtrWidth, 0);
+    Value *UnderlyingPtrOp =
+            PtrOp->stripAndAccumulateInBoundsConstantOffsets(DL,
+                                                             BasePtrOffset);
+    if (auto *AI = dyn_cast<AllocaInst>(UnderlyingPtrOp)) {
+      if (GEP.accumulateConstantOffset(DL, BasePtrOffset) &&
+          BasePtrOffset.isNonNegative()) {
+        APInt AllocSize(PtrWidth, DL.getTypeAllocSize(AI->getAllocatedType()));
+        if (BasePtrOffset.ule(AllocSize)) {
+          return GetElementPtrInst::CreateInBounds(
+              PtrOp, makeArrayRef(Ops).slice(1), GEP.getName());
+        }
+      }
+    }
+  }
+
   return nullptr;
 }
 
@@ -1963,8 +1977,8 @@ isAllocSiteRemovable(Instruction *AI, SmallVectorImpl<WeakVH> &Users,
             MemIntrinsic *MI = cast<MemIntrinsic>(II);
             if (MI->isVolatile() || MI->getRawDest() != PI)
               return false;
+            LLVM_FALLTHROUGH;
           }
-          // fall through
           case Intrinsic::dbg_declare:
           case Intrinsic::dbg_value:
           case Intrinsic::invariant_start:
@@ -2002,7 +2016,7 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
   // to null and free calls, delete the calls and replace the comparisons with
   // true or false as appropriate.
   SmallVector<WeakVH, 64> Users;
-  if (isAllocSiteRemovable(&MI, Users, TLI)) {
+  if (isAllocSiteRemovable(&MI, Users, &TLI)) {
     for (unsigned i = 0, e = Users.size(); i != e; ++i) {
       // Lowering all @llvm.objectsize calls first because they may
       // use a bitcast/GEP of the alloca we are removing.
@@ -2013,12 +2027,9 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) {
 
       if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
         if (II->getIntrinsicID() == Intrinsic::objectsize) {
-          uint64_t Size;
-          if (!getObjectSize(II->getArgOperand(0), Size, DL, TLI)) {
-            ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
-            Size = CI->isZero() ? -1ULL : 0;
-          }
-          replaceInstUsesWith(*I, ConstantInt::get(I->getType(), Size));
+          ConstantInt *Result = lowerObjectSizeCall(II, DL, &TLI,
+                                                    /*MustSucceed=*/true);
+          replaceInstUsesWith(*I, Result);
           eraseInstFromFunction(*I);
           Users[i] = nullptr; // Skip examining in the next loop.
         }
@@ -2218,6 +2229,20 @@ Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
 
 Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   Value *Cond = SI.getCondition();
+  Value *Op0;
+  ConstantInt *AddRHS;
+  if (match(Cond, m_Add(m_Value(Op0), m_ConstantInt(AddRHS)))) {
+    // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
+    for (SwitchInst::CaseIt CaseIter : SI.cases()) {
+      Constant *NewCase = ConstantExpr::getSub(CaseIter.getCaseValue(), AddRHS);
+      assert(isa<ConstantInt>(NewCase) &&
+             "Result of expression should be constant");
+      CaseIter.setValue(cast<ConstantInt>(NewCase));
+    }
+    SI.setCondition(Op0);
+    return &SI;
+  }
+
   unsigned BitWidth = cast<IntegerType>(Cond->getType())->getBitWidth();
   APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0);
   computeKnownBits(Cond, KnownZero, KnownOne, 0, &SI);
@@ -2238,43 +2263,20 @@ Instruction *InstCombiner::visitSwitchInst(SwitchInst &SI) {
   // Shrink the condition operand if the new type is smaller than the old type.
   // This may produce a non-standard type for the switch, but that's ok because
   // the backend should extend back to a legal type for the target.
-  bool TruncCond = false;
   if (NewWidth > 0 && NewWidth < BitWidth) {
-    TruncCond = true;
     IntegerType *Ty = IntegerType::get(SI.getContext(), NewWidth);
     Builder->SetInsertPoint(&SI);
     Value *NewCond = Builder->CreateTrunc(Cond, Ty, "trunc");
     SI.setCondition(NewCond);
 
-    for (auto &C : SI.cases())
-      static_cast<SwitchInst::CaseIt *>(&C)->setValue(ConstantInt::get(
-          SI.getContext(), C.getCaseValue()->getValue().trunc(NewWidth)));
-  }
-
-  ConstantInt *AddRHS = nullptr;
-  if (match(Cond, m_Add(m_Value(), m_ConstantInt(AddRHS)))) {
-    Instruction *I = cast<Instruction>(Cond);
-    // Change 'switch (X+4) case 1:' into 'switch (X) case -3'.
-    for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e;
-         ++i) {
-      ConstantInt *CaseVal = i.getCaseValue();
-      Constant *LHS = CaseVal;
-      if (TruncCond) {
-        LHS = LeadingKnownZeros
-                  ? ConstantExpr::getZExt(CaseVal, Cond->getType())
-                  : ConstantExpr::getSExt(CaseVal, Cond->getType());
-      }
-      Constant *NewCaseVal = ConstantExpr::getSub(LHS, AddRHS);
-      assert(isa<ConstantInt>(NewCaseVal) &&
-             "Result of expression should be constant");
-      i.setValue(cast<ConstantInt>(NewCaseVal));
+    for (SwitchInst::CaseIt CaseIter : SI.cases()) {
+      APInt TruncatedCase = CaseIter.getCaseValue()->getValue().trunc(NewWidth);
+      CaseIter.setValue(ConstantInt::get(SI.getContext(), TruncatedCase));
     }
-    SI.setCondition(I->getOperand(0));
-    Worklist.Add(I);
     return &SI;
   }
 
-  return TruncCond ? &SI : nullptr;
+  return nullptr;
 }
 
 Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
@@ -2284,7 +2286,7 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
     return replaceInstUsesWith(EV, Agg);
 
   if (Value *V =
-          SimplifyExtractValueInst(Agg, EV.getIndices(), DL, TLI, DT, AC))
+          SimplifyExtractValueInst(Agg, EV.getIndices(), DL, &TLI, &DT, &AC))
     return replaceInstUsesWith(EV, V);
 
   if (InsertValueInst *IV = dyn_cast<InsertValueInst>(Agg)) {
@@ -2560,7 +2562,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) {
           // remove it from the filter.  An unexpected type handler may be
           // set up for a call site which throws an exception of the same
           // type caught.  In order for the exception thrown by the unexpected
-          // handler to propogate correctly, the filter must be correctly
+          // handler to propagate correctly, the filter must be correctly
           // described for the call site.
           //
           // Example:
@@ -2813,7 +2815,7 @@ bool InstCombiner::run() {
     if (I == nullptr) continue;  // skip null values.
 
     // Check to see if we can DCE the instruction.
-    if (isInstructionTriviallyDead(I, TLI)) {
+    if (isInstructionTriviallyDead(I, &TLI)) {
       DEBUG(dbgs() << "IC: DCE: " << *I << '\n');
       eraseInstFromFunction(*I);
       ++NumDeadInst;
@@ -2824,13 +2826,13 @@ bool InstCombiner::run() {
     // Instruction isn't dead, see if we can constant propagate it.
     if (!I->use_empty() &&
         (I->getNumOperands() == 0 || isa<Constant>(I->getOperand(0)))) {
-      if (Constant *C = ConstantFoldInstruction(I, DL, TLI)) {
+      if (Constant *C = ConstantFoldInstruction(I, DL, &TLI)) {
         DEBUG(dbgs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
 
         // Add operands to the worklist.
         replaceInstUsesWith(*I, C);
         ++NumConstProp;
-        if (isInstructionTriviallyDead(I, TLI))
+        if (isInstructionTriviallyDead(I, &TLI))
           eraseInstFromFunction(*I);
         MadeIRChange = true;
         continue;
@@ -2839,20 +2841,21 @@ bool InstCombiner::run() {
 
     // In general, it is possible for computeKnownBits to determine all bits in
     // a value even when the operands are not all constants.
-    if (ExpensiveCombines && !I->use_empty() && I->getType()->isIntegerTy()) {
-      unsigned BitWidth = I->getType()->getScalarSizeInBits();
+    Type *Ty = I->getType();
+    if (ExpensiveCombines && !I->use_empty() && Ty->isIntOrIntVectorTy()) {
+      unsigned BitWidth = Ty->getScalarSizeInBits();
       APInt KnownZero(BitWidth, 0);
       APInt KnownOne(BitWidth, 0);
       computeKnownBits(I, KnownZero, KnownOne, /*Depth*/0, I);
       if ((KnownZero | KnownOne).isAllOnesValue()) {
-        Constant *C = ConstantInt::get(I->getContext(), KnownOne);
+        Constant *C = ConstantInt::get(Ty, KnownOne);
         DEBUG(dbgs() << "IC: ConstFold (all bits known) to: " << *C <<
                         " from: " << *I << '\n');
 
         // Add operands to the worklist.
         replaceInstUsesWith(*I, C);
         ++NumConstProp;
-        if (isInstructionTriviallyDead(I, TLI))
+        if (isInstructionTriviallyDead(I, &TLI))
           eraseInstFromFunction(*I);
         MadeIRChange = true;
         continue;
@@ -2883,7 +2886,7 @@ bool InstCombiner::run() {
         // If the user is one of our immediate successors, and if that successor
         // only has us as a predecessors (we'd have to split the critical edge
         // otherwise), we can keep going.
-        if (UserIsSuccessor && UserParent->getSinglePredecessor()) {
+        if (UserIsSuccessor && UserParent->getUniquePredecessor()) {
           // Okay, the CFG is simple enough, try to sink this instruction.
           if (TryToSinkInstruction(I, UserParent)) {
             DEBUG(dbgs() << "IC: Sink: " << *I << '\n');
@@ -2941,14 +2944,12 @@ bool InstCombiner::run() {
 
         eraseInstFromFunction(*I);
       } else {
-#ifndef NDEBUG
         DEBUG(dbgs() << "IC: Mod = " << OrigI << '\n'
                      << "    New = " << *I << '\n');
-#endif
 
         // If the instruction was modified, it's possible that it is now dead.
         // if so, remove it.
-        if (isInstructionTriviallyDead(I, TLI)) {
+        if (isInstructionTriviallyDead(I, &TLI)) {
           eraseInstFromFunction(*I);
         } else {
           Worklist.Add(I);
@@ -2981,7 +2982,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
   Worklist.push_back(BB);
 
   SmallVector<Instruction*, 128> InstrsForInstCombineWorklist;
-  DenseMap<ConstantExpr*, Constant*> FoldedConstants;
+  DenseMap<Constant *, Constant *> FoldedConstants;
 
   do {
     BB = Worklist.pop_back_val();
@@ -3017,17 +3018,17 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB, const DataLayout &DL,
       // See if we can constant fold its operands.
       for (User::op_iterator i = Inst->op_begin(), e = Inst->op_end(); i != e;
            ++i) {
-        ConstantExpr *CE = dyn_cast<ConstantExpr>(i);
-        if (CE == nullptr)
+        if (!isa<ConstantVector>(i) && !isa<ConstantExpr>(i))
           continue;
 
-        Constant *&FoldRes = FoldedConstants[CE];
+        auto *C = cast<Constant>(i);
+        Constant *&FoldRes = FoldedConstants[C];
         if (!FoldRes)
-          FoldRes = ConstantFoldConstantExpression(CE, DL, TLI);
+          FoldRes = ConstantFoldConstant(C, DL, TLI);
         if (!FoldRes)
-          FoldRes = CE;
+          FoldRes = C;
 
-        if (FoldRes != CE) {
+        if (FoldRes != C) {
           *i = FoldRes;
           MadeIRChange = true;
         }
@@ -3120,8 +3121,15 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
 
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
-  IRBuilder<TargetFolder, InstCombineIRInserter> Builder(
-      F.getContext(), TargetFolder(DL), InstCombineIRInserter(Worklist, &AC));
+  IRBuilder<TargetFolder, IRBuilderCallbackInserter> Builder(
+      F.getContext(), TargetFolder(DL),
+      IRBuilderCallbackInserter([&Worklist, &AC](Instruction *I) {
+        Worklist.Add(I);
+
+        using namespace llvm::PatternMatch;
+        if (match(I, m_Intrinsic<Intrinsic::assume>()))
+          AC.registerAssumption(cast<CallInst>(I));
+      }));
 
   // Lower dbg.declare intrinsics otherwise their value may be clobbered
   // by instcombiner.
@@ -3137,7 +3145,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
     bool Changed = prepareICWorklistFromFunction(F, DL, &TLI, Worklist);
 
     InstCombiner IC(Worklist, &Builder, F.optForMinSize(), ExpensiveCombines,
-                    AA, &AC, &TLI, &DT, DL, LI);
+                    AA, AC, TLI, DT, DL, LI);
     Changed |= IC.run();
 
     if (!Changed)
@@ -3148,7 +3156,7 @@ combineInstructionsOverFunction(Function &F, InstCombineWorklist &Worklist,
 }
 
 PreservedAnalyses InstCombinePass::run(Function &F,
-                                       AnalysisManager<Function> &AM) {
+                                       FunctionAnalysisManager &AM) {
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 43d1b377f858..6a7cb0e45c63 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -54,6 +54,9 @@
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
 #include <algorithm>
+#include <iomanip>
+#include <limits>
+#include <sstream>
 #include <string>
 #include <system_error>
 
@@ -64,8 +67,8 @@ using namespace llvm;
 static const uint64_t kDefaultShadowScale = 3;
 static const uint64_t kDefaultShadowOffset32 = 1ULL << 29;
 static const uint64_t kDefaultShadowOffset64 = 1ULL << 44;
+static const uint64_t kDynamicShadowSentinel = ~(uint64_t)0;
 static const uint64_t kIOSShadowOffset32 = 1ULL << 30;
-static const uint64_t kIOSShadowOffset64 = 0x120200000;
 static const uint64_t kIOSSimShadowOffset32 = 1ULL << 30;
 static const uint64_t kIOSSimShadowOffset64 = kDefaultShadowOffset64;
 static const uint64_t kSmallX86_64ShadowOffset = 0x7FFF8000;  // < 2G.
@@ -78,8 +81,8 @@ static const uint64_t kAArch64_ShadowOffset64 = 1ULL << 36;
 static const uint64_t kFreeBSD_ShadowOffset32 = 1ULL << 30;
 static const uint64_t kFreeBSD_ShadowOffset64 = 1ULL << 46;
 static const uint64_t kWindowsShadowOffset32 = 3ULL << 28;
-// TODO(wwchrome): Experimental for asan Win64, may change.
-static const uint64_t kWindowsShadowOffset64 = 0x1ULL << 45;  // 32TB.
+// The shadow memory space is dynamically allocated.
+static const uint64_t kWindowsShadowOffset64 = kDynamicShadowSentinel;
 
 static const size_t kMinStackMallocSize = 1 << 6;   // 64B
 static const size_t kMaxStackMallocSize = 1 << 16;  // 64K
@@ -111,6 +114,7 @@ static const char *const kAsanStackFreeNameTemplate = "__asan_stack_free_";
 static const char *const kAsanGenPrefix = "__asan_gen_";
 static const char *const kODRGenPrefix = "__odr_asan_gen_";
 static const char *const kSanCovGenPrefix = "__sancov_gen_";
+static const char *const kAsanSetShadowPrefix = "__asan_set_shadow_";
 static const char *const kAsanPoisonStackMemoryName =
     "__asan_poison_stack_memory";
 static const char *const kAsanUnpoisonStackMemoryName =
@@ -121,6 +125,9 @@ static const char *const kAsanGlobalsRegisteredFlagName =
 static const char *const kAsanOptionDetectUseAfterReturn =
     "__asan_option_detect_stack_use_after_return";
 
+static const char *const kAsanShadowMemoryDynamicAddress =
+    "__asan_shadow_memory_dynamic_address";
+
 static const char *const kAsanAllocaPoison = "__asan_alloca_poison";
 static const char *const kAsanAllocasUnpoison = "__asan_allocas_unpoison";
 
@@ -153,6 +160,11 @@ static cl::opt<bool> ClAlwaysSlowPath(
     "asan-always-slow-path",
     cl::desc("use instrumentation with slow path for all accesses"), cl::Hidden,
     cl::init(false));
+static cl::opt<bool> ClForceDynamicShadow(
+    "asan-force-dynamic-shadow",
+    cl::desc("Load shadow address into a local variable for each function"),
+    cl::Hidden, cl::init(false));
+
 // This flag limits the number of instructions to be instrumented
 // in any given BB. Normally, this should be set to unlimited (INT_MAX),
 // but due to http://llvm.org/bugs/show_bug.cgi?id=12652 we temporary
@@ -164,6 +176,11 @@ static cl::opt<int> ClMaxInsnsToInstrumentPerBB(
 // This flag may need to be replaced with -f[no]asan-stack.
 static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"),
                              cl::Hidden, cl::init(true));
+static cl::opt<uint32_t> ClMaxInlinePoisoningSize(
+    "asan-max-inline-poisoning-size",
+    cl::desc(
+        "Inline shadow poisoning for blocks up to the given size in bytes."),
+    cl::Hidden, cl::init(64));
 static cl::opt<bool> ClUseAfterReturn("asan-use-after-return",
                                       cl::desc("Check stack-use-after-return"),
                                       cl::Hidden, cl::init(true));
@@ -196,9 +213,10 @@ static cl::opt<std::string> ClMemoryAccessCallbackPrefix(
     "asan-memory-access-callback-prefix",
     cl::desc("Prefix for memory access callbacks"), cl::Hidden,
     cl::init("__asan_"));
-static cl::opt<bool> ClInstrumentAllocas("asan-instrument-allocas",
-                                         cl::desc("instrument dynamic allocas"),
-                                         cl::Hidden, cl::init(true));
+static cl::opt<bool>
+    ClInstrumentDynamicAllocas("asan-instrument-dynamic-allocas",
+                               cl::desc("instrument dynamic allocas"),
+                               cl::Hidden, cl::init(true));
 static cl::opt<bool> ClSkipPromotableAllocas(
     "asan-skip-promotable-allocas",
     cl::desc("Do not instrument promotable allocas"), cl::Hidden,
@@ -250,7 +268,7 @@ static cl::opt<bool>
                              cl::desc("Use linker features to support dead "
                                       "code stripping of globals "
                                       "(Mach-O only)"),
-                             cl::Hidden, cl::init(false));
+                             cl::Hidden, cl::init(true));
 
 // Debug flags.
 static cl::opt<int> ClDebug("asan-debug", cl::desc("debug"), cl::Hidden,
@@ -261,7 +279,7 @@ static cl::opt<std::string> ClDebugFunc("asan-debug-func", cl::Hidden,
                                         cl::desc("Debug func"));
 static cl::opt<int> ClDebugMin("asan-debug-min", cl::desc("Debug min inst"),
                                cl::Hidden, cl::init(-1));
-static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug man inst"),
+static cl::opt<int> ClDebugMax("asan-debug-max", cl::desc("Debug max inst"),
                                cl::Hidden, cl::init(-1));
 
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
@@ -411,13 +429,19 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
       Mapping.Offset = kMIPS64_ShadowOffset64;
     else if (IsIOS)
       // If we're targeting iOS and x86, the binary is built for iOS simulator.
-      Mapping.Offset = IsX86_64 ? kIOSSimShadowOffset64 : kIOSShadowOffset64;
+      // We are using dynamic shadow offset on the 64-bit devices.
+      Mapping.Offset =
+        IsX86_64 ? kIOSSimShadowOffset64 : kDynamicShadowSentinel;
     else if (IsAArch64)
       Mapping.Offset = kAArch64_ShadowOffset64;
     else
       Mapping.Offset = kDefaultShadowOffset64;
   }
 
+  if (ClForceDynamicShadow) {
+    Mapping.Offset = kDynamicShadowSentinel;
+  }
+
   Mapping.Scale = kDefaultShadowScale;
   if (ClMappingScale.getNumOccurrences() > 0) {
     Mapping.Scale = ClMappingScale;
@@ -433,7 +457,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
   // we could OR the constant in a single instruction, but it's more
   // efficient to load it once and use indexed addressing.
   Mapping.OrShadowOffset = !IsAArch64 && !IsPPC64 && !IsSystemZ
-                           && !(Mapping.Offset & (Mapping.Offset - 1));
+                           && !(Mapping.Offset & (Mapping.Offset - 1))
+                           && Mapping.Offset != kDynamicShadowSentinel;
 
   return Mapping;
 }
@@ -450,35 +475,39 @@ struct AddressSanitizer : public FunctionPass {
                             bool UseAfterScope = false)
       : FunctionPass(ID), CompileKernel(CompileKernel || ClEnableKasan),
         Recover(Recover || ClRecover),
-        UseAfterScope(UseAfterScope || ClUseAfterScope) {
+        UseAfterScope(UseAfterScope || ClUseAfterScope),
+        LocalDynamicShadow(nullptr) {
     initializeAddressSanitizerPass(*PassRegistry::getPassRegistry());
   }
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "AddressSanitizerFunctionPass";
   }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
-  uint64_t getAllocaSizeInBytes(AllocaInst *AI) const {
+  uint64_t getAllocaSizeInBytes(const AllocaInst &AI) const {
     uint64_t ArraySize = 1;
-    if (AI->isArrayAllocation()) {
-      ConstantInt *CI = dyn_cast<ConstantInt>(AI->getArraySize());
+    if (AI.isArrayAllocation()) {
+      const ConstantInt *CI = dyn_cast<ConstantInt>(AI.getArraySize());
       assert(CI && "non-constant array size");
       ArraySize = CI->getZExtValue();
     }
-    Type *Ty = AI->getAllocatedType();
+    Type *Ty = AI.getAllocatedType();
     uint64_t SizeInBytes =
-        AI->getModule()->getDataLayout().getTypeAllocSize(Ty);
+        AI.getModule()->getDataLayout().getTypeAllocSize(Ty);
     return SizeInBytes * ArraySize;
   }
   /// Check if we want (and can) handle this alloca.
-  bool isInterestingAlloca(AllocaInst &AI);
+  bool isInterestingAlloca(const AllocaInst &AI);
 
   /// If it is an interesting memory access, return the PointerOperand
   /// and set IsWrite/Alignment. Otherwise return nullptr.
+  /// MaybeMask is an output parameter for the mask Value, if we're looking at a
+  /// masked load/store.
   Value *isInterestingMemoryAccess(Instruction *I, bool *IsWrite,
-                                   uint64_t *TypeSize, unsigned *Alignment);
+                                   uint64_t *TypeSize, unsigned *Alignment,
+                                   Value **MaybeMask = nullptr);
   void instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis, Instruction *I,
                      bool UseCalls, const DataLayout &DL);
   void instrumentPointerComparisonOrSubtraction(Instruction *I);
@@ -498,6 +527,7 @@ struct AddressSanitizer : public FunctionPass {
   Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);
   bool runOnFunction(Function &F) override;
   bool maybeInsertAsanInitAtFunctionEntry(Function &F);
+  void maybeInsertDynamicShadowAtFunctionEntry(Function &F);
   void markEscapedLocalAllocas(Function &F);
   bool doInitialization(Module &M) override;
   bool doFinalization(Module &M) override;
@@ -519,8 +549,12 @@ struct AddressSanitizer : public FunctionPass {
     FunctionStateRAII(AddressSanitizer *Pass) : Pass(Pass) {
       assert(Pass->ProcessedAllocas.empty() &&
              "last pass forgot to clear cache");
+      assert(!Pass->LocalDynamicShadow);
+    }
+    ~FunctionStateRAII() {
+      Pass->LocalDynamicShadow = nullptr;
+      Pass->ProcessedAllocas.clear();
     }
-    ~FunctionStateRAII() { Pass->ProcessedAllocas.clear(); }
   };
 
   LLVMContext *C;
@@ -544,8 +578,9 @@ struct AddressSanitizer : public FunctionPass {
   Function *AsanMemoryAccessCallbackSized[2][2];
   Function *AsanMemmove, *AsanMemcpy, *AsanMemset;
   InlineAsm *EmptyAsm;
+  Value *LocalDynamicShadow;
   GlobalsMetadata GlobalsMD;
-  DenseMap<AllocaInst *, bool> ProcessedAllocas;
+  DenseMap<const AllocaInst *, bool> ProcessedAllocas;
 
   friend struct FunctionStackPoisoner;
 };
@@ -558,14 +593,15 @@ class AddressSanitizerModule : public ModulePass {
         Recover(Recover || ClRecover) {}
   bool runOnModule(Module &M) override;
   static char ID;  // Pass identification, replacement for typeid
-  const char *getPassName() const override { return "AddressSanitizerModule"; }
+  StringRef getPassName() const override { return "AddressSanitizerModule"; }
 
- private:
+private:
   void initializeCallbacks(Module &M);
 
   bool InstrumentGlobals(IRBuilder<> &IRB, Module &M);
   bool ShouldInstrumentGlobal(GlobalVariable *G);
   bool ShouldUseMachOGlobalsSection() const;
+  StringRef getGlobalMetadataSection() const;
   void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
   void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
   size_t MinRedzoneSizeForGlobal() const {
@@ -606,12 +642,13 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   ShadowMapping Mapping;
 
   SmallVector<AllocaInst *, 16> AllocaVec;
-  SmallSetVector<AllocaInst *, 16> NonInstrumentedStaticAllocaVec;
+  SmallVector<AllocaInst *, 16> StaticAllocasToMoveUp;
   SmallVector<Instruction *, 8> RetVec;
   unsigned StackAlignment;
 
   Function *AsanStackMallocFunc[kMaxAsanStackMallocSizeClass + 1],
       *AsanStackFreeFunc[kMaxAsanStackMallocSizeClass + 1];
+  Function *AsanSetShadowFunc[0x100] = {};
   Function *AsanPoisonStackMemoryFunc, *AsanUnpoisonStackMemoryFunc;
   Function *AsanAllocaPoisonFunc, *AsanAllocasUnpoisonFunc;
 
@@ -622,7 +659,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
     uint64_t Size;
     bool DoPoison;
   };
-  SmallVector<AllocaPoisonCall, 8> AllocaPoisonCallVec;
+  SmallVector<AllocaPoisonCall, 8> DynamicAllocaPoisonCallVec;
+  SmallVector<AllocaPoisonCall, 8> StaticAllocaPoisonCallVec;
 
   SmallVector<AllocaInst *, 1> DynamicAllocaVec;
   SmallVector<IntrinsicInst *, 1> StackRestoreVec;
@@ -657,7 +695,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
     initializeCallbacks(*F.getParent());
 
-    poisonStack();
+    processDynamicAllocas();
+    processStaticAllocas();
 
     if (ClDebugStack) {
       DEBUG(dbgs() << F);
@@ -668,7 +707,8 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   // Finds all Alloca instructions and puts
   // poisoned red zones around all of them.
   // Then unpoison everything back before the function returns.
-  void poisonStack();
+  void processStaticAllocas();
+  void processDynamicAllocas();
 
   void createDynamicAllocasInitStorage();
 
@@ -676,6 +716,12 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   /// \brief Collect all Ret instructions.
   void visitReturnInst(ReturnInst &RI) { RetVec.push_back(&RI); }
 
+  /// \brief Collect all Resume instructions.
+  void visitResumeInst(ResumeInst &RI) { RetVec.push_back(&RI); }
+
+  /// \brief Collect all CatchReturnInst instructions.
+  void visitCleanupReturnInst(CleanupReturnInst &CRI) { RetVec.push_back(&CRI); }
+
   void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore,
                                         Value *SavedStack) {
     IRBuilder<> IRB(InstBefore);
@@ -724,7 +770,14 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
   /// \brief Collect Alloca instructions we want (and can) handle.
   void visitAllocaInst(AllocaInst &AI) {
     if (!ASan.isInterestingAlloca(AI)) {
-      if (AI.isStaticAlloca()) NonInstrumentedStaticAllocaVec.insert(&AI);
+      if (AI.isStaticAlloca()) {
+        // Skip over allocas that are present *before* the first instrumented
+        // alloca, we don't want to move those around.
+        if (AllocaVec.empty())
+          return;
+
+        StaticAllocasToMoveUp.push_back(&AI);
+      }
       return;
     }
 
@@ -761,7 +814,10 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
       return;
     bool DoPoison = (ID == Intrinsic::lifetime_end);
     AllocaPoisonCall APC = {&II, AI, SizeValue, DoPoison};
-    AllocaPoisonCallVec.push_back(APC);
+    if (AI->isStaticAlloca())
+      StaticAllocaPoisonCallVec.push_back(APC);
+    else if (ClInstrumentDynamicAllocas)
+      DynamicAllocaPoisonCallVec.push_back(APC);
   }
 
   void visitCallSite(CallSite CS) {
@@ -785,12 +841,21 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> {
 
   /// Finds alloca where the value comes from.
   AllocaInst *findAllocaForValue(Value *V);
-  void poisonRedZones(ArrayRef<uint8_t> ShadowBytes, IRBuilder<> &IRB,
-                      Value *ShadowBase, bool DoPoison);
+
+  // Copies bytes from ShadowBytes into shadow memory for indexes where
+  // ShadowMask is not zero. If ShadowMask[i] is zero, we assume that
+  // ShadowBytes[i] is constantly zero and doesn't need to be overwritten.
+  void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
+                    IRBuilder<> &IRB, Value *ShadowBase);
+  void copyToShadow(ArrayRef<uint8_t> ShadowMask, ArrayRef<uint8_t> ShadowBytes,
+                    size_t Begin, size_t End, IRBuilder<> &IRB,
+                    Value *ShadowBase);
+  void copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
+                          ArrayRef<uint8_t> ShadowBytes, size_t Begin,
+                          size_t End, IRBuilder<> &IRB, Value *ShadowBase);
+
   void poisonAlloca(Value *V, uint64_t Size, IRBuilder<> &IRB, bool DoPoison);
 
-  void SetShadowToStackAfterReturnInlined(IRBuilder<> &IRB, Value *ShadowBase,
-                                          int Size);
   Value *createAllocaForLayout(IRBuilder<> &IRB, const ASanStackFrameLayout &L,
                                bool Dynamic);
   PHINode *createPHI(IRBuilder<> &IRB, Value *Cond, Value *ValueIfTrue,
@@ -885,10 +950,15 @@ Value *AddressSanitizer::memToShadow(Value *Shadow, IRBuilder<> &IRB) {
   Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);
   if (Mapping.Offset == 0) return Shadow;
   // (Shadow >> scale) | offset
+  Value *ShadowBase;
+  if (LocalDynamicShadow)
+    ShadowBase = LocalDynamicShadow;
+  else
+    ShadowBase = ConstantInt::get(IntptrTy, Mapping.Offset);
   if (Mapping.OrShadowOffset)
-    return IRB.CreateOr(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
+    return IRB.CreateOr(Shadow, ShadowBase);
   else
-    return IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Mapping.Offset));
+    return IRB.CreateAdd(Shadow, ShadowBase);
 }
 
 // Instrument memset/memmove/memcpy
@@ -911,7 +981,7 @@ void AddressSanitizer::instrumentMemIntrinsic(MemIntrinsic *MI) {
 }
 
 /// Check if we want (and can) handle this alloca.
-bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
+bool AddressSanitizer::isInterestingAlloca(const AllocaInst &AI) {
   auto PreviouslySeenAllocaInfo = ProcessedAllocas.find(&AI);
 
   if (PreviouslySeenAllocaInfo != ProcessedAllocas.end())
@@ -920,7 +990,7 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
   bool IsInteresting =
       (AI.getAllocatedType()->isSized() &&
        // alloca() may be called with 0 size, ignore it.
-       ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(&AI) > 0) &&
+       ((!AI.isStaticAlloca()) || getAllocaSizeInBytes(AI) > 0) &&
        // We are only interested in allocas not promotable to registers.
        // Promotable allocas are common under -O0.
        (!ClSkipPromotableAllocas || !isAllocaPromotable(&AI)) &&
@@ -932,15 +1002,18 @@ bool AddressSanitizer::isInterestingAlloca(AllocaInst &AI) {
   return IsInteresting;
 }
 
-/// If I is an interesting memory access, return the PointerOperand
-/// and set IsWrite/Alignment. Otherwise return nullptr.
 Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
                                                    bool *IsWrite,
                                                    uint64_t *TypeSize,
-                                                   unsigned *Alignment) {
+                                                   unsigned *Alignment,
+                                                   Value **MaybeMask) {
   // Skip memory accesses inserted by another instrumentation.
   if (I->getMetadata("nosanitize")) return nullptr;
 
+  // Do not instrument the load fetching the dynamic shadow address.
+  if (LocalDynamicShadow == I)
+    return nullptr;
+
   Value *PtrOperand = nullptr;
   const DataLayout &DL = I->getModule()->getDataLayout();
   if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
@@ -967,6 +1040,37 @@ Value *AddressSanitizer::isInterestingMemoryAccess(Instruction *I,
     *TypeSize = DL.getTypeStoreSizeInBits(XCHG->getCompareOperand()->getType());
     *Alignment = 0;
     PtrOperand = XCHG->getPointerOperand();
+  } else if (auto CI = dyn_cast<CallInst>(I)) {
+    auto *F = dyn_cast<Function>(CI->getCalledValue());
+    if (F && (F->getName().startswith("llvm.masked.load.") ||
+              F->getName().startswith("llvm.masked.store."))) {
+      unsigned OpOffset = 0;
+      if (F->getName().startswith("llvm.masked.store.")) {
+        if (!ClInstrumentWrites)
+          return nullptr;
+        // Masked store has an initial operand for the value.
+        OpOffset = 1;
+        *IsWrite = true;
+      } else {
+        if (!ClInstrumentReads)
+          return nullptr;
+        *IsWrite = false;
+      }
+      // Only instrument if the mask is constant for now.
+      if (isa<ConstantVector>(CI->getOperand(2 + OpOffset))) {
+        auto BasePtr = CI->getOperand(0 + OpOffset);
+        auto Ty = cast<PointerType>(BasePtr->getType())->getElementType();
+        *TypeSize = DL.getTypeStoreSizeInBits(Ty);
+        if (auto AlignmentConstant =
+                dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
+          *Alignment = (unsigned)AlignmentConstant->getZExtValue();
+        else
+          *Alignment = 1; // No alignment guarantees. We probably got Undef
+        if (MaybeMask)
+          *MaybeMask = CI->getOperand(2 + OpOffset);
+        PtrOperand = BasePtr;
+      }
+    }
   }
 
   // Do not instrument acesses from different address spaces; we cannot deal
@@ -1025,13 +1129,55 @@ void AddressSanitizer::instrumentPointerComparisonOrSubtraction(
   IRB.CreateCall(F, Param);
 }
 
+static void doInstrumentAddress(AddressSanitizer *Pass, Instruction *I,
+                                Value *Addr, unsigned Alignment,
+                                unsigned Granularity, uint32_t TypeSize,
+                                bool IsWrite, Value *SizeArgument,
+                                bool UseCalls, uint32_t Exp) {
+  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
+  // if the data is properly aligned.
+  if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
+       TypeSize == 128) &&
+      (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
+    return Pass->instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr,
+                                   UseCalls, Exp);
+  Pass->instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
+                                         UseCalls, Exp);
+}
+
+static void instrumentMaskedLoadOrStore(AddressSanitizer *Pass,
+                                        const DataLayout &DL, Type *IntptrTy,
+                                        ConstantVector *Mask, Instruction *I,
+                                        Value *Addr, unsigned Alignment,
+                                        unsigned Granularity, uint32_t TypeSize,
+                                        bool IsWrite, Value *SizeArgument,
+                                        bool UseCalls, uint32_t Exp) {
+  auto *VTy = cast<PointerType>(Addr->getType())->getElementType();
+  uint64_t ElemTypeSize = DL.getTypeStoreSizeInBits(VTy->getScalarType());
+  unsigned Num = VTy->getVectorNumElements();
+  auto Zero = ConstantInt::get(IntptrTy, 0);
+  for (unsigned Idx = 0; Idx < Num; ++Idx) {
+    // dyn_cast as we might get UndefValue
+    auto Masked = dyn_cast<ConstantInt>(Mask->getOperand(Idx));
+    if (Masked && Masked->isAllOnesValue()) {
+      IRBuilder<> IRB(I);
+      auto InstrumentedAddress =
+          IRB.CreateGEP(Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});
+      doInstrumentAddress(Pass, I, InstrumentedAddress, Alignment, Granularity,
+                          ElemTypeSize, IsWrite, SizeArgument, UseCalls, Exp);
+    }
+  }
+}
+
 void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
                                      Instruction *I, bool UseCalls,
                                      const DataLayout &DL) {
   bool IsWrite = false;
   unsigned Alignment = 0;
   uint64_t TypeSize = 0;
-  Value *Addr = isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment);
+  Value *MaybeMask = nullptr;
+  Value *Addr =
+      isInterestingMemoryAccess(I, &IsWrite, &TypeSize, &Alignment, &MaybeMask);
   assert(Addr);
 
   // Optimization experiments.
@@ -1073,15 +1219,15 @@ void AddressSanitizer::instrumentMop(ObjectSizeOffsetVisitor &ObjSizeVis,
     NumInstrumentedReads++;
 
   unsigned Granularity = 1 << Mapping.Scale;
-  // Instrument a 1-, 2-, 4-, 8-, or 16- byte access with one check
-  // if the data is properly aligned.
-  if ((TypeSize == 8 || TypeSize == 16 || TypeSize == 32 || TypeSize == 64 ||
-       TypeSize == 128) &&
-      (Alignment >= Granularity || Alignment == 0 || Alignment >= TypeSize / 8))
-    return instrumentAddress(I, I, Addr, TypeSize, IsWrite, nullptr, UseCalls,
-                             Exp);
-  instrumentUnusualSizeOrAlignment(I, Addr, TypeSize, IsWrite, nullptr,
-                                   UseCalls, Exp);
+  if (MaybeMask) {
+    auto Mask = cast<ConstantVector>(MaybeMask);
+    instrumentMaskedLoadOrStore(this, DL, IntptrTy, Mask, I, Addr, Alignment,
+                                Granularity, TypeSize, IsWrite, nullptr,
+                                UseCalls, Exp);
+  } else {
+    doInstrumentAddress(this, I, Addr, Alignment, Granularity, TypeSize,
+                        IsWrite, nullptr, UseCalls, Exp);
+  }
 }
 
 Instruction *AddressSanitizer::generateCrashCode(Instruction *InsertBefore,
@@ -1361,6 +1507,16 @@ bool AddressSanitizerModule::ShouldUseMachOGlobalsSection() const {
   return false;
 }
 
+StringRef AddressSanitizerModule::getGlobalMetadataSection() const {
+  switch (TargetTriple.getObjectFormat()) {
+  case Triple::COFF:  return ".ASAN$GL";
+  case Triple::ELF:   return "asan_globals";
+  case Triple::MachO: return "__DATA,__asan_globals,regular";
+  default: break;
+  }
+  llvm_unreachable("unsupported object format");
+}
+
 void AddressSanitizerModule::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(*C);
 
@@ -1409,6 +1565,11 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   size_t n = GlobalsToChange.size();
   if (n == 0) return false;
 
+  auto &DL = M.getDataLayout();
+  bool UseComdatMetadata = TargetTriple.isOSBinFormatCOFF();
+  bool UseMachOGlobalsSection = ShouldUseMachOGlobalsSection();
+  bool UseMetadataArray = !(UseComdatMetadata || UseMachOGlobalsSection);
+
   // A global is described by a structure
   //   size_t beg;
   //   size_t size;
@@ -1422,7 +1583,20 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   StructType *GlobalStructTy =
       StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy,
                       IntptrTy, IntptrTy, IntptrTy, nullptr);
-  SmallVector<Constant *, 16> Initializers(n);
+  unsigned SizeOfGlobalStruct = DL.getTypeAllocSize(GlobalStructTy);
+  assert((isPowerOf2_32(SizeOfGlobalStruct) ||
+          !TargetTriple.isOSBinFormatCOFF()) &&
+         "global metadata will not be padded appropriately");
+  SmallVector<Constant *, 16> Initializers(UseMetadataArray ? n : 0);
+
+  // On recent Mach-O platforms, use a structure which binds the liveness of
+  // the global variable to the metadata struct. Keep the list of "Liveness" GV
+  // created to be added to llvm.compiler.used
+  StructType *LivenessTy  = nullptr;
+  if (UseMachOGlobalsSection)
+    LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr);
+  SmallVector<GlobalValue *, 16> LivenessGlobals(
+      UseMachOGlobalsSection ? n : 0);
 
   bool HasDynamicallyInitializedGlobals = false;
 
@@ -1431,7 +1605,6 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   GlobalVariable *ModuleName = createPrivateGlobalForString(
       M, M.getModuleIdentifier(), /*AllowMerging*/ false);
 
-  auto &DL = M.getDataLayout();
   for (size_t i = 0; i < n; i++) {
     static const uint64_t kMaxGlobalRedzone = 1 << 18;
     GlobalVariable *G = GlobalsToChange[i];
@@ -1472,6 +1645,21 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     NewGlobal->copyAttributesFrom(G);
     NewGlobal->setAlignment(MinRZ);
 
+    // Move null-terminated C strings to "__asan_cstring" section on Darwin.
+    if (TargetTriple.isOSBinFormatMachO() && !G->hasSection() &&
+        G->isConstant()) {
+      auto Seq = dyn_cast<ConstantDataSequential>(G->getInitializer());
+      if (Seq && Seq->isCString())
+        NewGlobal->setSection("__TEXT,__asan_cstring,regular");
+    }
+
+    // Transfer the debug info.  The payload starts at offset zero so we can
+    // copy the debug info over as is.
+    SmallVector<DIGlobalVariableExpression *, 1> GVs;
+    G->getDebugInfo(GVs);
+    for (auto *GV : GVs)
+      NewGlobal->addDebugInfo(GV);
+
     Value *Indices2[2];
     Indices2[0] = IRB.getInt32(0);
     Indices2[1] = IRB.getInt32(0);
@@ -1480,6 +1668,25 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
         ConstantExpr::getGetElementPtr(NewTy, NewGlobal, Indices2, true));
     NewGlobal->takeName(G);
     G->eraseFromParent();
+    G = NewGlobal;
+
+    if (UseComdatMetadata) {
+      // Get or create a COMDAT for G so that we can use it with our metadata.
+      Comdat *C = G->getComdat();
+      if (!C) {
+        if (!G->hasName()) {
+          // If G is unnamed, it must be internal. Give it an artificial name
+          // so we can put it in a comdat.
+          assert(G->hasLocalLinkage());
+          G->setName(Twine(kAsanGenPrefix) + "_anon_global");
+        }
+        C = M.getOrInsertComdat(G->getName());
+        // Make this IMAGE_COMDAT_SELECT_NODUPLICATES on COFF.
+        if (TargetTriple.isOSBinFormatCOFF())
+          C->setSelectionKind(Comdat::NoDuplicates);
+        G->setComdat(C);
+      }
+    }
 
     Constant *SourceLoc;
     if (!MD.SourceLoc.empty()) {
@@ -1492,7 +1699,8 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     Constant *ODRIndicator = ConstantExpr::getNullValue(IRB.getInt8PtrTy());
     GlobalValue *InstrumentedGlobal = NewGlobal;
 
-    bool CanUsePrivateAliases = TargetTriple.isOSBinFormatELF();
+    bool CanUsePrivateAliases =
+        TargetTriple.isOSBinFormatELF() || TargetTriple.isOSBinFormatMachO();
     if (CanUsePrivateAliases && ClUsePrivateAliasForGlobals) {
       // Create local alias for NewGlobal to avoid crash on ODR between
       // instrumented and non-instrumented libraries.
@@ -1515,7 +1723,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
       InstrumentedGlobal = GA;
     }
 
-    Initializers[i] = ConstantStruct::get(
+    Constant *Initializer = ConstantStruct::get(
         GlobalStructTy,
         ConstantExpr::getPointerCast(InstrumentedGlobal, IntptrTy),
         ConstantInt::get(IntptrTy, SizeInBytes),
@@ -1528,61 +1736,92 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
     if (ClInitializers && MD.IsDynInit) HasDynamicallyInitializedGlobals = true;
 
     DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n");
+
+    // If we aren't using separate metadata globals, add it to the initializer
+    // list and continue.
+    if (UseMetadataArray) {
+      Initializers[i] = Initializer;
+      continue;
+    }
+
+    // Create a separate metadata global and put it in the appropriate ASan
+    // global registration section.
+    GlobalVariable *Metadata = new GlobalVariable(
+        M, GlobalStructTy, false, GlobalVariable::InternalLinkage,
+        Initializer, Twine("__asan_global_") +
+                             GlobalValue::getRealLinkageName(G->getName()));
+    Metadata->setSection(getGlobalMetadataSection());
+
+    // The MSVC linker always inserts padding when linking incrementally. We
+    // cope with that by aligning each struct to its size, which must be a power
+    // of two.
+    if (TargetTriple.isOSBinFormatCOFF())
+      Metadata->setAlignment(SizeOfGlobalStruct);
+    else
+      Metadata->setAlignment(1); // Don't leave padding in between.
+
+    // On platforms that support comdats, put the metadata and the
+    // instrumented global in the same group. This ensures that the metadata
+    // is discarded if the instrumented global is discarded.
+    if (UseComdatMetadata) {
+      assert(G->hasComdat());
+      Metadata->setComdat(G->getComdat());
+      continue;
+    }
+    assert(UseMachOGlobalsSection);
+
+    // On recent Mach-O platforms, we emit the global metadata in a way that
+    // allows the linker to properly strip dead globals.
+    auto LivenessBinder = ConstantStruct::get(
+        LivenessTy, Initializer->getAggregateElement(0u),
+        ConstantExpr::getPointerCast(Metadata, IntptrTy), nullptr);
+    GlobalVariable *Liveness = new GlobalVariable(
+        M, LivenessTy, false, GlobalVariable::InternalLinkage, LivenessBinder,
+        Twine("__asan_binder_") + G->getName());
+    Liveness->setSection("__DATA,__asan_liveness,regular,live_support");
+    LivenessGlobals[i] = Liveness;
   }
 
+  // Create calls for poisoning before initializers run and unpoisoning after.
+  if (HasDynamicallyInitializedGlobals)
+    createInitializerPoisonCalls(M, ModuleName);
+
+  // Platforms with a dedicated metadata section don't need to emit any more
+  // code.
+  if (UseComdatMetadata)
+    return true;
 
   GlobalVariable *AllGlobals = nullptr;
   GlobalVariable *RegisteredFlag = nullptr;
 
-  // On recent Mach-O platforms, we emit the global metadata in a way that
-  // allows the linker to properly strip dead globals.
-  if (ShouldUseMachOGlobalsSection()) {
+  if (UseMachOGlobalsSection) {
     // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
     // to look up the loaded image that contains it. Second, we can store in it
     // whether registration has already occurred, to prevent duplicate
     // registration.
     //
-    // Common linkage allows us to coalesce needles defined in each object
-    // file so that there's only one per shared library.
+    // common linkage ensures that there is only one global per shared library.
     RegisteredFlag = new GlobalVariable(
         M, IntptrTy, false, GlobalVariable::CommonLinkage,
         ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
 
-    // We also emit a structure which binds the liveness of the global
-    // variable to the metadata struct.
-    StructType *LivenessTy = StructType::get(IntptrTy, IntptrTy, nullptr);
-
-    for (size_t i = 0; i < n; i++) {
-      GlobalVariable *Metadata = new GlobalVariable(
-          M, GlobalStructTy, false, GlobalVariable::InternalLinkage,
-          Initializers[i], "");
-      Metadata->setSection("__DATA,__asan_globals,regular");
-      Metadata->setAlignment(1); // don't leave padding in between
-
-      auto LivenessBinder = ConstantStruct::get(LivenessTy,
-          Initializers[i]->getAggregateElement(0u),
-          ConstantExpr::getPointerCast(Metadata, IntptrTy),
-          nullptr);
-      GlobalVariable *Liveness = new GlobalVariable(
-          M, LivenessTy, false, GlobalVariable::InternalLinkage,
-          LivenessBinder, "");
-      Liveness->setSection("__DATA,__asan_liveness,regular,live_support");
-    }
-  } else {
-    // On all other platfoms, we just emit an array of global metadata
-    // structures.
+    // Update llvm.compiler.used, adding the new liveness globals. This is
+    // needed so that during LTO these variables stay alive. The alternative
+    // would be to have the linker handling the LTO symbols, but libLTO
+    // current API does not expose access to the section for each symbol.
+    if (!LivenessGlobals.empty())
+      appendToCompilerUsed(M, LivenessGlobals);
+  } else if (UseMetadataArray) {
+    // On platforms that don't have a custom metadata section, we emit an array
+    // of global metadata structures.
     ArrayType *ArrayOfGlobalStructTy = ArrayType::get(GlobalStructTy, n);
     AllGlobals = new GlobalVariable(
         M, ArrayOfGlobalStructTy, false, GlobalVariable::InternalLinkage,
         ConstantArray::get(ArrayOfGlobalStructTy, Initializers), "");
   }
 
-  // Create calls for poisoning before initializers run and unpoisoning after.
-  if (HasDynamicallyInitializedGlobals)
-    createInitializerPoisonCalls(M, ModuleName);
-
   // Create a call to register the globals with the runtime.
-  if (ShouldUseMachOGlobalsSection()) {
+  if (UseMachOGlobalsSection) {
     IRB.CreateCall(AsanRegisterImageGlobals,
                    {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
   } else {
@@ -1599,7 +1838,7 @@ bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) {
   BasicBlock *AsanDtorBB = BasicBlock::Create(*C, "", AsanDtorFunction);
   IRBuilder<> IRB_Dtor(ReturnInst::Create(*C, AsanDtorBB));
 
-  if (ShouldUseMachOGlobalsSection()) {
+  if (UseMachOGlobalsSection) {
     IRB_Dtor.CreateCall(AsanUnregisterImageGlobals,
                         {IRB.CreatePointerCast(RegisteredFlag, IntptrTy)});
   } else {
@@ -1737,6 +1976,17 @@ bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) {
   return false;
 }
 
+void AddressSanitizer::maybeInsertDynamicShadowAtFunctionEntry(Function &F) {
+  // Generate code only when dynamic addressing is needed.
+  if (Mapping.Offset != kDynamicShadowSentinel)
+    return;
+
+  IRBuilder<> IRB(&F.front().front());
+  Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(
+      kAsanShadowMemoryDynamicAddress, IntptrTy);
+  LocalDynamicShadow = IRB.CreateLoad(GlobalDynamicAddress);
+}
+
 void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
   // Find the one possible call to llvm.localescape and pre-mark allocas passed
   // to it as uninteresting. This assumes we haven't started processing allocas
@@ -1768,20 +2018,29 @@ void AddressSanitizer::markEscapedLocalAllocas(Function &F) {
 bool AddressSanitizer::runOnFunction(Function &F) {
   if (&F == AsanCtorFunction) return false;
   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false;
-  DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
-  initializeCallbacks(*F.getParent());
+  if (!ClDebugFunc.empty() && ClDebugFunc == F.getName()) return false;
+  if (F.getName().startswith("__asan_")) return false;
 
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  bool FunctionModified = false;
 
   // If needed, insert __asan_init before checking for SanitizeAddress attr.
-  maybeInsertAsanInitAtFunctionEntry(F);
+  // This function needs to be called even if the function body is not
+  // instrumented.  
+  if (maybeInsertAsanInitAtFunctionEntry(F))
+    FunctionModified = true;
+  
+  // Leave if the function doesn't need instrumentation.
+  if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return FunctionModified;
 
-  if (!F.hasFnAttribute(Attribute::SanitizeAddress)) return false;
+  DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n");
 
-  if (!ClDebugFunc.empty() && ClDebugFunc != F.getName()) return false;
+  initializeCallbacks(*F.getParent());
+  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
 
   FunctionStateRAII CleanupObj(this);
 
+  maybeInsertDynamicShadowAtFunctionEntry(F);
+
   // We can't instrument allocas used with llvm.localescape. Only static allocas
   // can be passed to that intrinsic.
   markEscapedLocalAllocas(F);
@@ -1807,11 +2066,20 @@ bool AddressSanitizer::runOnFunction(Function &F) {
     int NumInsnsPerBB = 0;
     for (auto &Inst : BB) {
       if (LooksLikeCodeInBug11395(&Inst)) return false;
+      Value *MaybeMask = nullptr;
       if (Value *Addr = isInterestingMemoryAccess(&Inst, &IsWrite, &TypeSize,
-                                                  &Alignment)) {
+                                                  &Alignment, &MaybeMask)) {
         if (ClOpt && ClOptSameTemp) {
-          if (!TempsToInstrument.insert(Addr).second)
-            continue;  // We've seen this temp in the current BB.
+          // If we have a mask, skip instrumentation if we've already
+          // instrumented the full object. But don't add to TempsToInstrument
+          // because we might get another load/store with a different mask.
+          if (MaybeMask) {
+            if (TempsToInstrument.count(Addr))
+              continue; // We've seen this (whole) temp in the current BB.
+          } else {
+            if (!TempsToInstrument.insert(Addr).second)
+              continue; // We've seen this temp in the current BB.
+          }
         }
       } else if (ClInvalidPointerPairs &&
                  isInterestingPointerComparisonOrSubtraction(&Inst)) {
@@ -1874,11 +2142,13 @@ bool AddressSanitizer::runOnFunction(Function &F) {
     NumInstrumented++;
   }
 
-  bool res = NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty();
+  if (NumInstrumented > 0 || ChangedStack || !NoReturnCalls.empty())
+    FunctionModified = true;
 
-  DEBUG(dbgs() << "ASAN done instrumenting: " << res << " " << F << "\n");
+  DEBUG(dbgs() << "ASAN done instrumenting: " << FunctionModified << " "
+               << F << "\n");
 
-  return res;
+  return FunctionModified;
 }
 
 // Workaround for bug 11395: we don't want to instrument stack in functions
@@ -1913,6 +2183,15 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
                               IntptrTy, IntptrTy, nullptr));
   }
 
+  for (size_t Val : {0x00, 0xf1, 0xf2, 0xf3, 0xf5, 0xf8}) {
+    std::ostringstream Name;
+    Name << kAsanSetShadowPrefix;
+    Name << std::setw(2) << std::setfill('0') << std::hex << Val;
+    AsanSetShadowFunc[Val] =
+        checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+            Name.str(), IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
+  }
+
   AsanAllocaPoisonFunc = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
       kAsanAllocaPoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
   AsanAllocasUnpoisonFunc =
@@ -1920,33 +2199,95 @@ void FunctionStackPoisoner::initializeCallbacks(Module &M) {
           kAsanAllocasUnpoison, IRB.getVoidTy(), IntptrTy, IntptrTy, nullptr));
 }
 
-void FunctionStackPoisoner::poisonRedZones(ArrayRef<uint8_t> ShadowBytes,
-                                           IRBuilder<> &IRB, Value *ShadowBase,
-                                           bool DoPoison) {
-  size_t n = ShadowBytes.size();
-  size_t i = 0;
-  // We need to (un)poison n bytes of stack shadow. Poison as many as we can
-  // using 64-bit stores (if we are on 64-bit arch), then poison the rest
-  // with 32-bit stores, then with 16-byte stores, then with 8-byte stores.
-  for (size_t LargeStoreSizeInBytes = ASan.LongSize / 8;
-       LargeStoreSizeInBytes != 0; LargeStoreSizeInBytes /= 2) {
-    for (; i + LargeStoreSizeInBytes - 1 < n; i += LargeStoreSizeInBytes) {
-      uint64_t Val = 0;
-      for (size_t j = 0; j < LargeStoreSizeInBytes; j++) {
-        if (F.getParent()->getDataLayout().isLittleEndian())
-          Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
-        else
-          Val = (Val << 8) | ShadowBytes[i + j];
-      }
-      if (!Val) continue;
-      Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
-      Type *StoreTy = Type::getIntNTy(*C, LargeStoreSizeInBytes * 8);
-      Value *Poison = ConstantInt::get(StoreTy, DoPoison ? Val : 0);
-      IRB.CreateStore(Poison, IRB.CreateIntToPtr(Ptr, StoreTy->getPointerTo()));
+void FunctionStackPoisoner::copyToShadowInline(ArrayRef<uint8_t> ShadowMask,
+                                               ArrayRef<uint8_t> ShadowBytes,
+                                               size_t Begin, size_t End,
+                                               IRBuilder<> &IRB,
+                                               Value *ShadowBase) {
+  if (Begin >= End)
+    return;
+
+  const size_t LargestStoreSizeInBytes =
+      std::min<size_t>(sizeof(uint64_t), ASan.LongSize / 8);
+
+  const bool IsLittleEndian = F.getParent()->getDataLayout().isLittleEndian();
+
+  // Poison given range in shadow using larges store size with out leading and
+  // trailing zeros in ShadowMask. Zeros never change, so they need neither
+  // poisoning nor up-poisoning. Still we don't mind if some of them get into a
+  // middle of a store.
+  for (size_t i = Begin; i < End;) {
+    if (!ShadowMask[i]) {
+      assert(!ShadowBytes[i]);
+      ++i;
+      continue;
+    }
+
+    size_t StoreSizeInBytes = LargestStoreSizeInBytes;
+    // Fit store size into the range.
+    while (StoreSizeInBytes > End - i)
+      StoreSizeInBytes /= 2;
+
+    // Minimize store size by trimming trailing zeros.
+    for (size_t j = StoreSizeInBytes - 1; j && !ShadowMask[i + j]; --j) {
+      while (j <= StoreSizeInBytes / 2)
+        StoreSizeInBytes /= 2;
+    }
+
+    uint64_t Val = 0;
+    for (size_t j = 0; j < StoreSizeInBytes; j++) {
+      if (IsLittleEndian)
+        Val |= (uint64_t)ShadowBytes[i + j] << (8 * j);
+      else
+        Val = (Val << 8) | ShadowBytes[i + j];
     }
+
+    Value *Ptr = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
+    Value *Poison = IRB.getIntN(StoreSizeInBytes * 8, Val);
+    IRB.CreateAlignedStore(
+        Poison, IRB.CreateIntToPtr(Ptr, Poison->getType()->getPointerTo()), 1);
+
+    i += StoreSizeInBytes;
   }
 }
 
+void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
+                                         ArrayRef<uint8_t> ShadowBytes,
+                                         IRBuilder<> &IRB, Value *ShadowBase) {
+  copyToShadow(ShadowMask, ShadowBytes, 0, ShadowMask.size(), IRB, ShadowBase);
+}
+
+void FunctionStackPoisoner::copyToShadow(ArrayRef<uint8_t> ShadowMask,
+                                         ArrayRef<uint8_t> ShadowBytes,
+                                         size_t Begin, size_t End,
+                                         IRBuilder<> &IRB, Value *ShadowBase) {
+  assert(ShadowMask.size() == ShadowBytes.size());
+  size_t Done = Begin;
+  for (size_t i = Begin, j = Begin + 1; i < End; i = j++) {
+    if (!ShadowMask[i]) {
+      assert(!ShadowBytes[i]);
+      continue;
+    }
+    uint8_t Val = ShadowBytes[i];
+    if (!AsanSetShadowFunc[Val])
+      continue;
+
+    // Skip same values.
+    for (; j < End && ShadowMask[j] && Val == ShadowBytes[j]; ++j) {
+    }
+
+    if (j - i >= ClMaxInlinePoisoningSize) {
+      copyToShadowInline(ShadowMask, ShadowBytes, Done, i, IRB, ShadowBase);
+      IRB.CreateCall(AsanSetShadowFunc[Val],
+                     {IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i)),
+                      ConstantInt::get(IntptrTy, j - i)});
+      Done = j;
+    }
+  }
+
+  copyToShadowInline(ShadowMask, ShadowBytes, Done, End, IRB, ShadowBase);
+}
+
 // Fake stack allocator (asan_fake_stack.h) has 11 size classes
 // for every power of 2 from kMinStackMallocSize to kMaxAsanStackMallocSizeClass
 static int StackMallocSizeClass(uint64_t LocalStackSize) {
@@ -1957,26 +2298,6 @@ static int StackMallocSizeClass(uint64_t LocalStackSize) {
   llvm_unreachable("impossible LocalStackSize");
 }
 
-// Set Size bytes starting from ShadowBase to kAsanStackAfterReturnMagic.
-// We can not use MemSet intrinsic because it may end up calling the actual
-// memset. Size is a multiple of 8.
-// Currently this generates 8-byte stores on x86_64; it may be better to
-// generate wider stores.
-void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined(
-    IRBuilder<> &IRB, Value *ShadowBase, int Size) {
-  assert(!(Size % 8));
-
-  // kAsanStackAfterReturnMagic is 0xf5.
-  const uint64_t kAsanStackAfterReturnMagic64 = 0xf5f5f5f5f5f5f5f5ULL;
-
-  for (int i = 0; i < Size; i += 8) {
-    Value *p = IRB.CreateAdd(ShadowBase, ConstantInt::get(IntptrTy, i));
-    IRB.CreateStore(
-        ConstantInt::get(IRB.getInt64Ty(), kAsanStackAfterReturnMagic64),
-        IRB.CreateIntToPtr(p, IRB.getInt64Ty()->getPointerTo()));
-  }
-}
-
 PHINode *FunctionStackPoisoner::createPHI(IRBuilder<> &IRB, Value *Cond,
                                           Value *ValueIfTrue,
                                           Instruction *ThenTerm,
@@ -2015,37 +2336,39 @@ void FunctionStackPoisoner::createDynamicAllocasInitStorage() {
   DynamicAllocaLayout->setAlignment(32);
 }
 
-void FunctionStackPoisoner::poisonStack() {
-  assert(AllocaVec.size() > 0 || DynamicAllocaVec.size() > 0);
+void FunctionStackPoisoner::processDynamicAllocas() {
+  if (!ClInstrumentDynamicAllocas || DynamicAllocaVec.empty()) {
+    assert(DynamicAllocaPoisonCallVec.empty());
+    return;
+  }
 
-  // Insert poison calls for lifetime intrinsics for alloca.
-  bool HavePoisonedStaticAllocas = false;
-  for (const auto &APC : AllocaPoisonCallVec) {
+  // Insert poison calls for lifetime intrinsics for dynamic allocas.
+  for (const auto &APC : DynamicAllocaPoisonCallVec) {
     assert(APC.InsBefore);
     assert(APC.AI);
     assert(ASan.isInterestingAlloca(*APC.AI));
-    bool IsDynamicAlloca = !(*APC.AI).isStaticAlloca();
-    if (!ClInstrumentAllocas && IsDynamicAlloca)
-      continue;
+    assert(!APC.AI->isStaticAlloca());
 
     IRBuilder<> IRB(APC.InsBefore);
     poisonAlloca(APC.AI, APC.Size, IRB, APC.DoPoison);
     // Dynamic allocas will be unpoisoned unconditionally below in
     // unpoisonDynamicAllocas.
     // Flag that we need unpoison static allocas.
-    HavePoisonedStaticAllocas |= (APC.DoPoison && !IsDynamicAlloca);
   }
 
-  if (ClInstrumentAllocas && DynamicAllocaVec.size() > 0) {
-    // Handle dynamic allocas.
-    createDynamicAllocasInitStorage();
-    for (auto &AI : DynamicAllocaVec) handleDynamicAllocaCall(AI);
+  // Handle dynamic allocas.
+  createDynamicAllocasInitStorage();
+  for (auto &AI : DynamicAllocaVec)
+    handleDynamicAllocaCall(AI);
+  unpoisonDynamicAllocas();
+}
 
-    unpoisonDynamicAllocas();
+void FunctionStackPoisoner::processStaticAllocas() {
+  if (AllocaVec.empty()) {
+    assert(StaticAllocaPoisonCallVec.empty());
+    return;
   }
 
-  if (AllocaVec.empty()) return;
-
   int StackMallocIdx = -1;
   DebugLoc EntryDebugLocation;
   if (auto SP = F.getSubprogram())
@@ -2060,10 +2383,9 @@ void FunctionStackPoisoner::poisonStack() {
   // regular stack slots.
   auto InsBeforeB = InsBefore->getParent();
   assert(InsBeforeB == &F.getEntryBlock());
-  for (BasicBlock::iterator I(InsBefore); I != InsBeforeB->end(); ++I)
-    if (auto *AI = dyn_cast<AllocaInst>(I))
-      if (NonInstrumentedStaticAllocaVec.count(AI) > 0)
-        AI->moveBefore(InsBefore);
+  for (auto *AI : StaticAllocasToMoveUp)
+    if (AI->getParent() == InsBeforeB)
+      AI->moveBefore(InsBefore);
 
   // If we have a call to llvm.localescape, keep it in the entry block.
   if (LocalEscapeCall) LocalEscapeCall->moveBefore(InsBefore);
@@ -2072,16 +2394,46 @@ void FunctionStackPoisoner::poisonStack() {
   SVD.reserve(AllocaVec.size());
   for (AllocaInst *AI : AllocaVec) {
     ASanStackVariableDescription D = {AI->getName().data(),
-                                      ASan.getAllocaSizeInBytes(AI),
-                                      AI->getAlignment(), AI, 0};
+                                      ASan.getAllocaSizeInBytes(*AI),
+                                      0,
+                                      AI->getAlignment(),
+                                      AI,
+                                      0,
+                                      0};
     SVD.push_back(D);
   }
+
   // Minimal header size (left redzone) is 4 pointers,
   // i.e. 32 bytes on 64-bit platforms and 16 bytes in 32-bit platforms.
   size_t MinHeaderSize = ASan.LongSize / 2;
-  ASanStackFrameLayout L;
-  ComputeASanStackFrameLayout(SVD, 1ULL << Mapping.Scale, MinHeaderSize, &L);
-  DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n");
+  const ASanStackFrameLayout &L =
+      ComputeASanStackFrameLayout(SVD, 1ULL << Mapping.Scale, MinHeaderSize);
+
+  // Build AllocaToSVDMap for ASanStackVariableDescription lookup.
+  DenseMap<const AllocaInst *, ASanStackVariableDescription *> AllocaToSVDMap;
+  for (auto &Desc : SVD)
+    AllocaToSVDMap[Desc.AI] = &Desc;
+
+  // Update SVD with information from lifetime intrinsics.
+  for (const auto &APC : StaticAllocaPoisonCallVec) {
+    assert(APC.InsBefore);
+    assert(APC.AI);
+    assert(ASan.isInterestingAlloca(*APC.AI));
+    assert(APC.AI->isStaticAlloca());
+
+    ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI];
+    Desc.LifetimeSize = Desc.Size;
+    if (const DILocation *FnLoc = EntryDebugLocation.get()) {
+      if (const DILocation *LifetimeLoc = APC.InsBefore->getDebugLoc().get()) {
+        if (LifetimeLoc->getFile() == FnLoc->getFile())
+          if (unsigned Line = LifetimeLoc->getLine())
+            Desc.Line = std::min(Desc.Line ? Desc.Line : Line, Line);
+      }
+    }
+  }
+
+  auto DescriptionString = ComputeASanStackFrameDescription(SVD);
+  DEBUG(dbgs() << DescriptionString << " --- " << L.FrameSize << "\n");
   uint64_t LocalStackSize = L.FrameSize;
   bool DoStackMalloc = ClUseAfterReturn && !ASan.CompileKernel &&
                        LocalStackSize <= kMaxStackMallocSize;
@@ -2164,7 +2516,7 @@ void FunctionStackPoisoner::poisonStack() {
                     ConstantInt::get(IntptrTy, ASan.LongSize / 8)),
       IntptrPtrTy);
   GlobalVariable *StackDescriptionGlobal =
-      createPrivateGlobalForString(*F.getParent(), L.DescriptionString,
+      createPrivateGlobalForString(*F.getParent(), DescriptionString,
                                    /*AllowMerging*/ true);
   Value *Description = IRB.CreatePointerCast(StackDescriptionGlobal, IntptrTy);
   IRB.CreateStore(Description, BasePlus1);
@@ -2175,19 +2527,33 @@ void FunctionStackPoisoner::poisonStack() {
       IntptrPtrTy);
   IRB.CreateStore(IRB.CreatePointerCast(&F, IntptrTy), BasePlus2);
 
-  // Poison the stack redzones at the entry.
-  Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
-  poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true);
+  const auto &ShadowAfterScope = GetShadowBytesAfterScope(SVD, L);
 
-  auto UnpoisonStack = [&](IRBuilder<> &IRB) {
-    if (HavePoisonedStaticAllocas) {
-      // If we poisoned some allocas in llvm.lifetime analysis,
-      // unpoison whole stack frame now.
-      poisonAlloca(LocalStackBase, LocalStackSize, IRB, false);
-    } else {
-      poisonRedZones(L.ShadowBytes, IRB, ShadowBase, false);
+  // Poison the stack red zones at the entry.
+  Value *ShadowBase = ASan.memToShadow(LocalStackBase, IRB);
+  // As mask we must use most poisoned case: red zones and after scope.
+  // As bytes we can use either the same or just red zones only.
+  copyToShadow(ShadowAfterScope, ShadowAfterScope, IRB, ShadowBase);
+
+  if (!StaticAllocaPoisonCallVec.empty()) {
+    const auto &ShadowInScope = GetShadowBytes(SVD, L);
+
+    // Poison static allocas near lifetime intrinsics.
+    for (const auto &APC : StaticAllocaPoisonCallVec) {
+      const ASanStackVariableDescription &Desc = *AllocaToSVDMap[APC.AI];
+      assert(Desc.Offset % L.Granularity == 0);
+      size_t Begin = Desc.Offset / L.Granularity;
+      size_t End = Begin + (APC.Size + L.Granularity - 1) / L.Granularity;
+
+      IRBuilder<> IRB(APC.InsBefore);
+      copyToShadow(ShadowAfterScope,
+                   APC.DoPoison ? ShadowAfterScope : ShadowInScope, Begin, End,
+                   IRB, ShadowBase);
     }
-  };
+  }
+
+  SmallVector<uint8_t, 64> ShadowClean(ShadowAfterScope.size(), 0);
+  SmallVector<uint8_t, 64> ShadowAfterReturn;
 
   // (Un)poison the stack before all ret instructions.
   for (auto Ret : RetVec) {
@@ -2215,8 +2581,10 @@ void FunctionStackPoisoner::poisonStack() {
       IRBuilder<> IRBPoison(ThenTerm);
       if (StackMallocIdx <= 4) {
         int ClassSize = kMinStackMallocSize << StackMallocIdx;
-        SetShadowToStackAfterReturnInlined(IRBPoison, ShadowBase,
-                                           ClassSize >> Mapping.Scale);
+        ShadowAfterReturn.resize(ClassSize / L.Granularity,
+                                 kAsanStackUseAfterReturnMagic);
+        copyToShadow(ShadowAfterReturn, ShadowAfterReturn, IRBPoison,
+                     ShadowBase);
         Value *SavedFlagPtrPtr = IRBPoison.CreateAdd(
             FakeStack,
             ConstantInt::get(IntptrTy, ClassSize - ASan.LongSize / 8));
@@ -2233,9 +2601,9 @@ void FunctionStackPoisoner::poisonStack() {
       }
 
       IRBuilder<> IRBElse(ElseTerm);
-      UnpoisonStack(IRBElse);
+      copyToShadow(ShadowAfterScope, ShadowClean, IRBElse, ShadowBase);
     } else {
-      UnpoisonStack(IRBRet);
+      copyToShadow(ShadowAfterScope, ShadowClean, IRBRet, ShadowBase);
     }
   }
 
@@ -2264,7 +2632,7 @@ void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size,
 
 AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
   if (AllocaInst *AI = dyn_cast<AllocaInst>(V))
-    // We're intested only in allocas we can handle.
+    // We're interested only in allocas we can handle.
     return ASan.isInterestingAlloca(*AI) ? AI : nullptr;
   // See if we've already calculated (or started to calculate) alloca for a
   // given value.
@@ -2286,6 +2654,10 @@ AllocaInst *FunctionStackPoisoner::findAllocaForValue(Value *V) {
         return nullptr;
       Res = IncValueAI;
     }
+  } else if (GetElementPtrInst *EP = dyn_cast<GetElementPtrInst>(V)) {
+    Res = findAllocaForValue(EP->getPointerOperand());
+  } else {
+    DEBUG(dbgs() << "Alloca search canceled on unknown instruction: " << *V << "\n");
   }
   if (Res) AllocaForValue[V] = Res;
   return Res;
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
index 3cd7351cad62..3802f9fbf7db 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
+++ b/contrib/llvm/lib/Transforms/Instrumentation/CFGMST.h
@@ -78,6 +78,14 @@ public:
     return *It->second.get();
   }
 
+  // Give BB, return the auxiliary information if it's available.
+  BBInfo *findBBInfo(const BasicBlock *BB) const {
+    auto It = BBInfos.find(BB);
+    if (It == BBInfos.end())
+      return nullptr;
+    return It->second.get();
+  }
+
   // Traverse the CFG using a stack. Find all the edges and assign the weight.
   // Edges with large weight will be put into MST first so they are less likely
   // to be instrumented.
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
index fb80f87369f9..05eba6c4dc69 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/EfficiencySanitizer.cpp
@@ -99,12 +99,23 @@ static const char *const EsanWhichToolName = "__esan_which_tool";
 // FIXME: Try to place these shadow constants, the names of the __esan_*
 // interface functions, and the ToolType enum into a header shared between
 // llvm and compiler-rt.
-static const uint64_t ShadowMask = 0x00000fffffffffffull;
-static const uint64_t ShadowOffs[3] = { // Indexed by scale
-  0x0000130000000000ull,
-  0x0000220000000000ull,
-  0x0000440000000000ull,
+struct ShadowMemoryParams {
+  uint64_t ShadowMask;
+  uint64_t ShadowOffs[3];
 };
+
+static const ShadowMemoryParams ShadowParams47 = {
+    0x00000fffffffffffull,
+    {
+        0x0000130000000000ull, 0x0000220000000000ull, 0x0000440000000000ull,
+    }};
+
+static const ShadowMemoryParams ShadowParams40 = {
+    0x0fffffffffull,
+    {
+        0x1300000000ull, 0x2200000000ull, 0x4400000000ull,
+    }};
+
 // This array is indexed by the ToolType enum.
 static const int ShadowScale[] = {
   0, // ESAN_None.
@@ -154,7 +165,7 @@ public:
   EfficiencySanitizer(
       const EfficiencySanitizerOptions &Opts = EfficiencySanitizerOptions())
       : ModulePass(ID), Options(OverrideOptionsFromCL(Opts)) {}
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnModule(Module &M) override;
   static char ID;
@@ -219,6 +230,7 @@ private:
   // Remember the counter variable for each struct type to avoid
   // recomputing the variable name later during instrumentation.
   std::map<Type *, GlobalVariable *> StructTyMap;
+  ShadowMemoryParams ShadowParams;
 };
 } // namespace
 
@@ -231,7 +243,7 @@ INITIALIZE_PASS_END(
     EfficiencySanitizer, "esan",
     "EfficiencySanitizer: finds performance issues.", false, false)
 
-const char *EfficiencySanitizer::getPassName() const {
+StringRef EfficiencySanitizer::getPassName() const {
   return "EfficiencySanitizer";
 }
 
@@ -301,21 +313,21 @@ void EfficiencySanitizer::createStructCounterName(
   else
     NameStr += "struct.anon";
   // We allow the actual size of the StructCounterName to be larger than
-  // MaxStructCounterNameSize and append #NumFields and at least one
+  // MaxStructCounterNameSize and append $NumFields and at least one
   // field type id.
-  // Append #NumFields.
-  NameStr += "#";
+  // Append $NumFields.
+  NameStr += "$";
   Twine(StructTy->getNumElements()).toVector(NameStr);
   // Append struct field type ids in the reverse order.
   for (int i = StructTy->getNumElements() - 1; i >= 0; --i) {
-    NameStr += "#";
+    NameStr += "$";
     Twine(StructTy->getElementType(i)->getTypeID()).toVector(NameStr);
     if (NameStr.size() >= MaxStructCounterNameSize)
       break;
   }
   if (StructTy->isLiteral()) {
-    // End with # for literal struct.
-    NameStr += "#";
+    // End with $ for literal struct.
+    NameStr += "$";
   }
 }
 
@@ -528,6 +540,13 @@ void EfficiencySanitizer::createDestructor(Module &M, Constant *ToolInfoArg) {
 }
 
 bool EfficiencySanitizer::initOnModule(Module &M) {
+
+  Triple TargetTriple(M.getTargetTriple());
+  if (TargetTriple.getArch() == Triple::mips64 || TargetTriple.getArch() == Triple::mips64el)
+    ShadowParams = ShadowParams40;
+  else
+    ShadowParams = ShadowParams47;
+
   Ctx = &M.getContext();
   const DataLayout &DL = M.getDataLayout();
   IRBuilder<> IRB(M.getContext());
@@ -559,13 +578,13 @@ bool EfficiencySanitizer::initOnModule(Module &M) {
 
 Value *EfficiencySanitizer::appToShadow(Value *Shadow, IRBuilder<> &IRB) {
   // Shadow = ((App & Mask) + Offs) >> Scale
-  Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowMask));
+  Shadow = IRB.CreateAnd(Shadow, ConstantInt::get(IntptrTy, ShadowParams.ShadowMask));
   uint64_t Offs;
   int Scale = ShadowScale[Options.ToolType];
   if (Scale <= 2)
-    Offs = ShadowOffs[Scale];
+    Offs = ShadowParams.ShadowOffs[Scale];
   else
-    Offs = ShadowOffs[0] << Scale;
+    Offs = ShadowParams.ShadowOffs[0] << Scale;
   Shadow = IRB.CreateAdd(Shadow, ConstantInt::get(IntptrTy, Offs));
   if (Scale > 0)
     Shadow = IRB.CreateLShr(Shadow, Scale);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index b4070b602768..56d0f5e983ca 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -118,7 +118,8 @@ private:
   Function *insertFlush(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
   void insertIndirectCounterIncrement();
 
-  std::string mangleName(const DICompileUnit *CU, const char *NewStem);
+  enum class GCovFileType { GCNO, GCDA };
+  std::string mangleName(const DICompileUnit *CU, GCovFileType FileType);
 
   GCOVOptions Options;
 
@@ -141,7 +142,7 @@ public:
       : ModulePass(ID), Profiler(Opts) {
     initializeGCOVProfilerLegacyPassPass(*PassRegistry::getPassRegistry());
   }
-  const char *getPassName() const override { return "GCOV Profiler"; }
+  StringRef getPassName() const override { return "GCOV Profiler"; }
 
   bool runOnModule(Module &M) override { return Profiler.runOnModule(M); }
 
@@ -251,11 +252,7 @@ namespace {
   class GCOVBlock : public GCOVRecord {
    public:
     GCOVLines &getFile(StringRef Filename) {
-      GCOVLines *&Lines = LinesByFile[Filename];
-      if (!Lines) {
-        Lines = new GCOVLines(Filename, os);
-      }
-      return *Lines;
+      return LinesByFile.try_emplace(Filename, Filename, os).first->second;
     }
 
     void addEdge(GCOVBlock &Successor) {
@@ -264,9 +261,9 @@ namespace {
 
     void writeOut() {
       uint32_t Len = 3;
-      SmallVector<StringMapEntry<GCOVLines *> *, 32> SortedLinesByFile;
+      SmallVector<StringMapEntry<GCOVLines> *, 32> SortedLinesByFile;
       for (auto &I : LinesByFile) {
-        Len += I.second->length();
+        Len += I.second.length();
         SortedLinesByFile.push_back(&I);
       }
 
@@ -274,21 +271,17 @@ namespace {
       write(Len);
       write(Number);
 
-      std::sort(SortedLinesByFile.begin(), SortedLinesByFile.end(),
-                [](StringMapEntry<GCOVLines *> *LHS,
-                   StringMapEntry<GCOVLines *> *RHS) {
-        return LHS->getKey() < RHS->getKey();
-      });
+      std::sort(
+          SortedLinesByFile.begin(), SortedLinesByFile.end(),
+          [](StringMapEntry<GCOVLines> *LHS, StringMapEntry<GCOVLines> *RHS) {
+            return LHS->getKey() < RHS->getKey();
+          });
       for (auto &I : SortedLinesByFile)
-        I->getValue()->writeOut();
+        I->getValue().writeOut();
       write(0);
       write(0);
     }
 
-    ~GCOVBlock() {
-      DeleteContainerSeconds(LinesByFile);
-    }
-
     GCOVBlock(const GCOVBlock &RHS) : GCOVRecord(RHS), Number(RHS.Number) {
       // Only allow copy before edges and lines have been added. After that,
       // there are inter-block pointers (eg: edges) that won't take kindly to
@@ -306,7 +299,7 @@ namespace {
     }
 
     uint32_t Number;
-    StringMap<GCOVLines *> LinesByFile;
+    StringMap<GCOVLines> LinesByFile;
     SmallVector<GCOVBlock *, 4> OutEdges;
   };
 
@@ -426,24 +419,40 @@ namespace {
 }
 
 std::string GCOVProfiler::mangleName(const DICompileUnit *CU,
-                                     const char *NewStem) {
+                                     GCovFileType OutputType) {
+  bool Notes = OutputType == GCovFileType::GCNO;
+
   if (NamedMDNode *GCov = M->getNamedMetadata("llvm.gcov")) {
     for (int i = 0, e = GCov->getNumOperands(); i != e; ++i) {
       MDNode *N = GCov->getOperand(i);
-      if (N->getNumOperands() != 2) continue;
-      MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
-      MDNode *CompileUnit = dyn_cast<MDNode>(N->getOperand(1));
-      if (!GCovFile || !CompileUnit) continue;
-      if (CompileUnit == CU) {
-        SmallString<128> Filename = GCovFile->getString();
-        sys::path::replace_extension(Filename, NewStem);
-        return Filename.str();
+      bool ThreeElement = N->getNumOperands() == 3;
+      if (!ThreeElement && N->getNumOperands() != 2)
+        continue;
+      if (dyn_cast<MDNode>(N->getOperand(ThreeElement ? 2 : 1)) != CU)
+        continue;
+
+      if (ThreeElement) {
+        // These nodes have no mangling to apply, it's stored mangled in the
+        // bitcode.
+        MDString *NotesFile = dyn_cast<MDString>(N->getOperand(0));
+        MDString *DataFile = dyn_cast<MDString>(N->getOperand(1));
+        if (!NotesFile || !DataFile)
+          continue;
+        return Notes ? NotesFile->getString() : DataFile->getString();
       }
+
+      MDString *GCovFile = dyn_cast<MDString>(N->getOperand(0));
+      if (!GCovFile)
+        continue;
+
+      SmallString<128> Filename = GCovFile->getString();
+      sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
+      return Filename.str();
     }
   }
 
   SmallString<128> Filename = CU->getFilename();
-  sys::path::replace_extension(Filename, NewStem);
+  sys::path::replace_extension(Filename, Notes ? "gcno" : "gcda");
   StringRef FName = sys::path::filename(Filename);
   SmallString<128> CurPath;
   if (sys::fs::current_path(CurPath)) return FName;
@@ -461,7 +470,7 @@ bool GCOVProfiler::runOnModule(Module &M) {
 }
 
 PreservedAnalyses GCOVProfilerPass::run(Module &M,
-                                        AnalysisManager<Module> &AM) {
+                                        ModuleAnalysisManager &AM) {
 
   GCOVProfiler Profiler(GCOVOpts);
 
@@ -509,7 +518,7 @@ void GCOVProfiler::emitProfileNotes() {
       continue;
 
     std::error_code EC;
-    raw_fd_ostream out(mangleName(CU, "gcno"), EC, sys::fs::F_None);
+    raw_fd_ostream out(mangleName(CU, GCovFileType::GCNO), EC, sys::fs::F_None);
     std::string EdgeDestinations;
 
     unsigned FunctionIdent = 0;
@@ -857,7 +866,7 @@ Function *GCOVProfiler::insertCounterWriteout(
       if (CU->getDWOId())
         continue;
 
-      std::string FilenameGcda = mangleName(CU, "gcda");
+      std::string FilenameGcda = mangleName(CU, GCovFileType::GCDA);
       uint32_t CfgChecksum = FileChecksums.empty() ? 0 : FileChecksums[i];
       Builder.CreateCall(StartFile,
                          {Builder.CreateGlobalStringPtr(FilenameGcda),
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
index 202b94b19c4c..1ba13bdfe05a 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/IndirectCallPromotion.cpp
@@ -13,29 +13,38 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/Triple.h"
-#include "llvm/Analysis/CFG.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
 #include "llvm/Analysis/IndirectCallSiteVisitor.h"
+#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
-#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Type.h"
 #include "llvm/Pass.h"
-#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/PGOInstrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include <string>
-#include <utility>
+#include <cassert>
+#include <cstdint>
 #include <vector>
 
 using namespace llvm;
@@ -102,9 +111,7 @@ public:
         *PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override {
-    return "PGOIndirectCallPromotion";
-  }
+  StringRef getPassName() const override { return "PGOIndirectCallPromotion"; }
 
 private:
   bool runOnModule(Module &M) override;
@@ -208,6 +215,7 @@ public:
   ICallPromotionFunc(Function &Func, Module *Modu, InstrProfSymtab *Symtab)
       : F(Func), M(Modu), Symtab(Symtab) {
   }
+
   bool processFunction();
 };
 } // end anonymous namespace
@@ -474,7 +482,7 @@ static Instruction *createDirectCallInst(const Instruction *Inst,
                                      NewInst);
 
   // Clear the value profile data.
-  NewInst->setMetadata(LLVMContext::MD_prof, 0);
+  NewInst->setMetadata(LLVMContext::MD_prof, nullptr);
   CallSite NewCS(NewInst);
   FunctionType *DirectCalleeType = DirectCallee->getFunctionType();
   unsigned ParamNum = DirectCalleeType->getFunctionNumParams();
@@ -610,7 +618,7 @@ bool ICallPromotionFunc::processFunction() {
 
     Changed = true;
     // Adjust the MD.prof metadata. First delete the old one.
-    I->setMetadata(LLVMContext::MD_prof, 0);
+    I->setMetadata(LLVMContext::MD_prof, nullptr);
     // If all promoted, we don't need the MD.prof metadata.
     if (TotalCount == 0 || NumPromoted == NumVals)
       continue;
@@ -653,7 +661,7 @@ bool PGOIndirectCallPromotionLegacyPass::runOnModule(Module &M) {
   return promoteIndirectCalls(M, InLTO | ICPLTOMode);
 }
 
-PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, AnalysisManager<Module> &AM) {
+PreservedAnalyses PGOIndirectCallPromotion::run(Module &M, ModuleAnalysisManager &AM) {
   if (!promoteIndirectCalls(M, InLTO | ICPLTOMode))
     return PreservedAnalyses::all();
 
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index b11c6be696f3..8da3e31200f3 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -15,6 +15,7 @@
 
 #include "llvm/Transforms/InstrProfiling.h"
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
@@ -53,30 +54,38 @@ public:
   InstrProfilingLegacyPass() : ModulePass(ID), InstrProf() {}
   InstrProfilingLegacyPass(const InstrProfOptions &Options)
       : ModulePass(ID), InstrProf(Options) {}
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "Frontend instrumentation-based coverage lowering";
   }
 
-  bool runOnModule(Module &M) override { return InstrProf.run(M); }
+  bool runOnModule(Module &M) override {
+    return InstrProf.run(M, getAnalysis<TargetLibraryInfoWrapperPass>().getTLI());
+  }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 };
 
 } // anonymous namespace
 
-PreservedAnalyses InstrProfiling::run(Module &M, AnalysisManager<Module> &AM) {
-  if (!run(M))
+PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
+  if (!run(M, TLI))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
 }
 
 char InstrProfilingLegacyPass::ID = 0;
-INITIALIZE_PASS(InstrProfilingLegacyPass, "instrprof",
-                "Frontend instrumentation-based coverage lowering.", false,
-                false)
+INITIALIZE_PASS_BEGIN(
+    InstrProfilingLegacyPass, "instrprof",
+    "Frontend instrumentation-based coverage lowering.", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(
+    InstrProfilingLegacyPass, "instrprof",
+    "Frontend instrumentation-based coverage lowering.", false, false)
 
 ModulePass *
 llvm::createInstrProfilingLegacyPass(const InstrProfOptions &Options) {
@@ -107,10 +116,18 @@ StringRef InstrProfiling::getCoverageSection() const {
   return getInstrProfCoverageSectionName(isMachO());
 }
 
-bool InstrProfiling::run(Module &M) {
+static InstrProfIncrementInst *castToIncrementInst(Instruction *Instr) {
+  InstrProfIncrementInst *Inc = dyn_cast<InstrProfIncrementInstStep>(Instr);
+  if (Inc)
+    return Inc;
+  return dyn_cast<InstrProfIncrementInst>(Instr);
+}
+
+bool InstrProfiling::run(Module &M, const TargetLibraryInfo &TLI) {
   bool MadeChange = false;
 
   this->M = &M;
+  this->TLI = &TLI;
   NamesVar = nullptr;
   NamesSize = 0;
   ProfileDataMap.clear();
@@ -138,7 +155,8 @@ bool InstrProfiling::run(Module &M) {
     for (BasicBlock &BB : F)
       for (auto I = BB.begin(), E = BB.end(); I != E;) {
         auto Instr = I++;
-        if (auto *Inc = dyn_cast<InstrProfIncrementInst>(Instr)) {
+        InstrProfIncrementInst *Inc = castToIncrementInst(&*Instr);
+        if (Inc) {
           lowerIncrement(Inc);
           MadeChange = true;
         } else if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(Instr)) {
@@ -165,7 +183,8 @@ bool InstrProfiling::run(Module &M) {
   return true;
 }
 
-static Constant *getOrInsertValueProfilingCall(Module &M) {
+static Constant *getOrInsertValueProfilingCall(Module &M,
+                                               const TargetLibraryInfo &TLI) {
   LLVMContext &Ctx = M.getContext();
   auto *ReturnTy = Type::getVoidTy(M.getContext());
   Type *ParamTypes[] = {
@@ -174,8 +193,13 @@ static Constant *getOrInsertValueProfilingCall(Module &M) {
   };
   auto *ValueProfilingCallTy =
       FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
-  return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
-                               ValueProfilingCallTy);
+  Constant *Res = M.getOrInsertFunction(getInstrProfValueProfFuncName(),
+                                        ValueProfilingCallTy);
+  if (Function *FunRes = dyn_cast<Function>(Res)) {
+    if (auto AK = TLI.getExtAttrForI32Param(false))
+      FunRes->addAttribute(3, AK);
+  }
+  return Res;
 }
 
 void InstrProfiling::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {
@@ -209,8 +233,11 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
   Value *Args[3] = {Ind->getTargetValue(),
                     Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
                     Builder.getInt32(Index)};
-  Ind->replaceAllUsesWith(
-      Builder.CreateCall(getOrInsertValueProfilingCall(*M), Args));
+  CallInst *Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI),
+                                      Args);
+  if (auto AK = TLI->getExtAttrForI32Param(false))
+    Call->addAttribute(3, AK);
+  Ind->replaceAllUsesWith(Call);
   Ind->eraseFromParent();
 }
 
@@ -221,7 +248,7 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
   uint64_t Index = Inc->getIndex()->getZExtValue();
   Value *Addr = Builder.CreateConstInBoundsGEP2_64(Counters, 0, Index);
   Value *Count = Builder.CreateLoad(Addr, "pgocount");
-  Count = Builder.CreateAdd(Count, Builder.getInt64(1));
+  Count = Builder.CreateAdd(Count, Inc->getStep());
   Inc->replaceAllUsesWith(Builder.CreateStore(Count, Addr));
   Inc->eraseFromParent();
 }
@@ -268,33 +295,6 @@ static inline bool shouldRecordFunctionAddr(Function *F) {
   return F->hasAddressTaken() || F->hasLinkOnceLinkage();
 }
 
-static inline bool needsComdatForCounter(Function &F, Module &M) {
-
-  if (F.hasComdat())
-    return true;
-
-  Triple TT(M.getTargetTriple());
-  if (!TT.isOSBinFormatELF())
-    return false;
-
-  // See createPGOFuncNameVar for more details. To avoid link errors, profile
-  // counters for function with available_externally linkage needs to be changed
-  // to linkonce linkage. On ELF based systems, this leads to weak symbols to be
-  // created. Without using comdat, duplicate entries won't be removed by the
-  // linker leading to increased data segement size and raw profile size. Even
-  // worse, since the referenced counter from profile per-function data object
-  // will be resolved to the common strong definition, the profile counts for
-  // available_externally functions will end up being duplicated in raw profile
-  // data. This can result in distorted profile as the counts of those dups
-  // will be accumulated by the profile merger.
-  GlobalValue::LinkageTypes Linkage = F.getLinkage();
-  if (Linkage != GlobalValue::ExternalWeakLinkage &&
-      Linkage != GlobalValue::AvailableExternallyLinkage)
-    return false;
-
-  return true;
-}
-
 static inline Comdat *getOrCreateProfileComdat(Module &M, Function &F,
                                                InstrProfIncrementInst *Inc) {
   if (!needsComdatForCounter(F, M))
@@ -572,38 +572,30 @@ void InstrProfiling::emitRuntimeHook() {
 }
 
 void InstrProfiling::emitUses() {
-  if (UsedVars.empty())
-    return;
-
-  GlobalVariable *LLVMUsed = M->getGlobalVariable("llvm.used");
-  std::vector<Constant *> MergedVars;
-  if (LLVMUsed) {
-    // Collect the existing members of llvm.used.
-    ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
-    for (unsigned I = 0, E = Inits->getNumOperands(); I != E; ++I)
-      MergedVars.push_back(Inits->getOperand(I));
-    LLVMUsed->eraseFromParent();
-  }
-
-  Type *i8PTy = Type::getInt8PtrTy(M->getContext());
-  // Add uses for our data.
-  for (auto *Value : UsedVars)
-    MergedVars.push_back(
-        ConstantExpr::getBitCast(cast<Constant>(Value), i8PTy));
-
-  // Recreate llvm.used.
-  ArrayType *ATy = ArrayType::get(i8PTy, MergedVars.size());
-  LLVMUsed =
-      new GlobalVariable(*M, ATy, false, GlobalValue::AppendingLinkage,
-                         ConstantArray::get(ATy, MergedVars), "llvm.used");
-  LLVMUsed->setSection("llvm.metadata");
+  if (!UsedVars.empty())
+    appendToUsed(*M, UsedVars);
 }
 
 void InstrProfiling::emitInitialization() {
-  std::string InstrProfileOutput = Options.InstrProfileOutput;
+  StringRef InstrProfileOutput = Options.InstrProfileOutput;
+
+  if (!InstrProfileOutput.empty()) {
+    // Create variable for profile name.
+    Constant *ProfileNameConst =
+        ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
+    GlobalVariable *ProfileNameVar = new GlobalVariable(
+        *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
+        ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
+    Triple TT(M->getTargetTriple());
+    if (TT.supportsCOMDAT()) {
+      ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
+      ProfileNameVar->setComdat(M->getOrInsertComdat(
+          StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
+    }
+  }
 
   Constant *RegisterF = M->getFunction(getInstrProfRegFuncsName());
-  if (!RegisterF && InstrProfileOutput.empty())
+  if (!RegisterF)
     return;
 
   // Create the initialization function.
@@ -620,21 +612,6 @@ void InstrProfiling::emitInitialization() {
   IRBuilder<> IRB(BasicBlock::Create(M->getContext(), "", F));
   if (RegisterF)
     IRB.CreateCall(RegisterF, {});
-  if (!InstrProfileOutput.empty()) {
-    auto *Int8PtrTy = Type::getInt8PtrTy(M->getContext());
-    auto *SetNameTy = FunctionType::get(VoidTy, Int8PtrTy, false);
-    auto *SetNameF = Function::Create(SetNameTy, GlobalValue::ExternalLinkage,
-                                      getInstrProfFileOverriderFuncName(), M);
-
-    // Create variable for profile name.
-    Constant *ProfileNameConst =
-        ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
-    GlobalVariable *ProfileName =
-        new GlobalVariable(*M, ProfileNameConst->getType(), true,
-                           GlobalValue::PrivateLinkage, ProfileNameConst);
-
-    IRB.CreateCall(SetNameF, IRB.CreatePointerCast(ProfileName, Int8PtrTy));
-  }
   IRB.CreateRetVoid();
 
   appendToGlobalCtors(*M, F, 0);
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 970f9ab86e82..fafb0fcbd017 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -242,8 +242,8 @@ static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
 
 // mips64 Linux
 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
-  0x004000000000,  // AndMask
-  0,               // XorMask (not used)
+  0,               // AndMask (not used)
+  0x008000000000,  // XorMask
   0,               // ShadowBase (not used)
   0x002000000000,  // OriginBase
 };
@@ -312,11 +312,12 @@ static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
 /// uninitialized reads.
 class MemorySanitizer : public FunctionPass {
  public:
-  MemorySanitizer(int TrackOrigins = 0)
+  MemorySanitizer(int TrackOrigins = 0, bool Recover = false)
       : FunctionPass(ID),
         TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)),
+        Recover(Recover || ClKeepGoing),
         WarningFn(nullptr) {}
-  const char *getPassName() const override { return "MemorySanitizer"; }
+  StringRef getPassName() const override { return "MemorySanitizer"; }
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
@@ -329,6 +330,7 @@ class MemorySanitizer : public FunctionPass {
 
   /// \brief Track origins (allocation points) of uninitialized values.
   int TrackOrigins;
+  bool Recover;
 
   LLVMContext *C;
   Type *IntptrTy;
@@ -395,8 +397,8 @@ INITIALIZE_PASS_END(
     MemorySanitizer, "msan",
     "MemorySanitizer: detects uninitialized reads.", false, false)
 
-FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins) {
-  return new MemorySanitizer(TrackOrigins);
+FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins, bool Recover) {
+  return new MemorySanitizer(TrackOrigins, Recover);
 }
 
 /// \brief Create a non-const global initialized with the given string.
@@ -421,8 +423,8 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
   // Create the callback.
   // FIXME: this function should have "Cold" calling conv,
   // which is not yet implemented.
-  StringRef WarningFnName = ClKeepGoing ? "__msan_warning"
-                                        : "__msan_warning_noreturn";
+  StringRef WarningFnName = Recover ? "__msan_warning"
+                                    : "__msan_warning_noreturn";
   WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy(), nullptr);
 
   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
@@ -566,9 +568,9 @@ bool MemorySanitizer::doInitialization(Module &M) {
     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
                        IRB.getInt32(TrackOrigins), "__msan_track_origins");
 
-  if (ClKeepGoing)
+  if (Recover)
     new GlobalVariable(M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
-                       IRB.getInt32(ClKeepGoing), "__msan_keep_going");
+                       IRB.getInt32(Recover), "__msan_keep_going");
 
   return true;
 }
@@ -792,7 +794,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         }
         IRB.CreateCall(MS.WarningFn, {});
         IRB.CreateCall(MS.EmptyAsm, {});
-        // FIXME: Insert UnreachableInst if !ClKeepGoing?
+        // FIXME: Insert UnreachableInst if !MS.Recover?
         // This may invalidate some of the following checks and needs to be done
         // at the very end.
       }
@@ -815,7 +817,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                     getCleanShadow(ConvertedShadow), "_mscmp");
       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
           Cmp, OrigIns,
-          /* Unreachable */ !ClKeepGoing, MS.ColdCallWeights);
+          /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
 
       IRB.SetInsertPoint(CheckTerm);
       if (MS.TrackOrigins) {
@@ -2360,6 +2362,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case llvm::Intrinsic::x86_sse_cvttps2pi:
       handleVectorConvertIntrinsic(I, 2);
       break;
+
+    case llvm::Intrinsic::x86_avx512_psll_w_512:
+    case llvm::Intrinsic::x86_avx512_psll_d_512:
+    case llvm::Intrinsic::x86_avx512_psll_q_512:
+    case llvm::Intrinsic::x86_avx512_pslli_w_512:
+    case llvm::Intrinsic::x86_avx512_pslli_d_512:
+    case llvm::Intrinsic::x86_avx512_pslli_q_512:
+    case llvm::Intrinsic::x86_avx512_psrl_w_512:
+    case llvm::Intrinsic::x86_avx512_psrl_d_512:
+    case llvm::Intrinsic::x86_avx512_psrl_q_512:
+    case llvm::Intrinsic::x86_avx512_psra_w_512:
+    case llvm::Intrinsic::x86_avx512_psra_d_512:
+    case llvm::Intrinsic::x86_avx512_psra_q_512:
+    case llvm::Intrinsic::x86_avx512_psrli_w_512:
+    case llvm::Intrinsic::x86_avx512_psrli_d_512:
+    case llvm::Intrinsic::x86_avx512_psrli_q_512:
+    case llvm::Intrinsic::x86_avx512_psrai_w_512:
+    case llvm::Intrinsic::x86_avx512_psrai_d_512:
+    case llvm::Intrinsic::x86_avx512_psrai_q_512:
+    case llvm::Intrinsic::x86_avx512_psra_q_256:
+    case llvm::Intrinsic::x86_avx512_psra_q_128:
+    case llvm::Intrinsic::x86_avx512_psrai_q_256:
+    case llvm::Intrinsic::x86_avx512_psrai_q_128:
     case llvm::Intrinsic::x86_avx2_psll_w:
     case llvm::Intrinsic::x86_avx2_psll_d:
     case llvm::Intrinsic::x86_avx2_psll_q:
@@ -2412,14 +2437,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     case llvm::Intrinsic::x86_avx2_psllv_d:
     case llvm::Intrinsic::x86_avx2_psllv_d_256:
+    case llvm::Intrinsic::x86_avx512_psllv_d_512:
     case llvm::Intrinsic::x86_avx2_psllv_q:
     case llvm::Intrinsic::x86_avx2_psllv_q_256:
+    case llvm::Intrinsic::x86_avx512_psllv_q_512:
     case llvm::Intrinsic::x86_avx2_psrlv_d:
     case llvm::Intrinsic::x86_avx2_psrlv_d_256:
+    case llvm::Intrinsic::x86_avx512_psrlv_d_512:
     case llvm::Intrinsic::x86_avx2_psrlv_q:
     case llvm::Intrinsic::x86_avx2_psrlv_q_256:
+    case llvm::Intrinsic::x86_avx512_psrlv_q_512:
     case llvm::Intrinsic::x86_avx2_psrav_d:
     case llvm::Intrinsic::x86_avx2_psrav_d_256:
+    case llvm::Intrinsic::x86_avx512_psrav_d_512:
+    case llvm::Intrinsic::x86_avx512_psrav_q_128:
+    case llvm::Intrinsic::x86_avx512_psrav_q_256:
+    case llvm::Intrinsic::x86_avx512_psrav_q_512:
       handleVectorShiftIntrinsic(I, /* Variable */ true);
       break;
 
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index f54d8ad48146..28f4f7ea1455 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -51,6 +51,7 @@
 #include "llvm/Transforms/PGOInstrumentation.h"
 #include "CFGMST.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
@@ -59,6 +60,7 @@
 #include "llvm/Analysis/IndirectCallSiteVisitor.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
@@ -75,6 +77,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include <algorithm>
 #include <string>
+#include <unordered_map>
 #include <utility>
 #include <vector>
 
@@ -83,6 +86,7 @@ using namespace llvm;
 #define DEBUG_TYPE "pgo-instrumentation"
 
 STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
+STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
 STATISTIC(NumOfPGOEdge, "Number of edges.");
 STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
 STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
@@ -112,17 +116,83 @@ static cl::opt<unsigned> MaxNumAnnotations(
     cl::desc("Max number of annotations for a single indirect "
              "call callsite"));
 
+// Command line option to control appending FunctionHash to the name of a COMDAT
+// function. This is to avoid the hash mismatch caused by the preinliner.
+static cl::opt<bool> DoComdatRenaming(
+    "do-comdat-renaming", cl::init(true), cl::Hidden,
+    cl::desc("Append function hash to the name of COMDAT function to avoid "
+             "function hash mismatch due to the preinliner"));
+
 // Command line option to enable/disable the warning about missing profile
 // information.
-static cl::opt<bool> NoPGOWarnMissing("no-pgo-warn-missing", cl::init(false),
-                                      cl::Hidden);
+static cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function",
+                                     cl::init(false),
+                                     cl::Hidden);
 
 // Command line option to enable/disable the warning about a hash mismatch in
 // the profile data.
 static cl::opt<bool> NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false),
                                        cl::Hidden);
 
+// Command line option to enable/disable select instruction instrumentation.
+static cl::opt<bool> PGOInstrSelect("pgo-instr-select", cl::init(true),
+                                    cl::Hidden);
 namespace {
+
+/// The select instruction visitor plays three roles specified
+/// by the mode. In \c VM_counting mode, it simply counts the number of
+/// select instructions. In \c VM_instrument mode, it inserts code to count
+/// the number times TrueValue of select is taken. In \c VM_annotate mode,
+/// it reads the profile data and annotate the select instruction with metadata.
+enum VisitMode { VM_counting, VM_instrument, VM_annotate };
+class PGOUseFunc;
+
+/// Instruction Visitor class to visit select instructions.
+struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
+  Function &F;
+  unsigned NSIs = 0;             // Number of select instructions instrumented.
+  VisitMode Mode = VM_counting;  // Visiting mode.
+  unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
+  unsigned TotalNumCtrs = 0;     // Total number of counters
+  GlobalVariable *FuncNameVar = nullptr;
+  uint64_t FuncHash = 0;
+  PGOUseFunc *UseFunc = nullptr;
+
+  SelectInstVisitor(Function &Func) : F(Func) {}
+
+  void countSelects(Function &Func) {
+    Mode = VM_counting;
+    visit(Func);
+  }
+  // Visit the IR stream and instrument all select instructions. \p
+  // Ind is a pointer to the counter index variable; \p TotalNC
+  // is the total number of counters; \p FNV is the pointer to the
+  // PGO function name var; \p FHash is the function hash.
+  void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
+                         GlobalVariable *FNV, uint64_t FHash) {
+    Mode = VM_instrument;
+    CurCtrIdx = Ind;
+    TotalNumCtrs = TotalNC;
+    FuncHash = FHash;
+    FuncNameVar = FNV;
+    visit(Func);
+  }
+
+  // Visit the IR stream and annotate all select instructions.
+  void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
+    Mode = VM_annotate;
+    UseFunc = UF;
+    CurCtrIdx = Ind;
+    visit(Func);
+  }
+
+  void instrumentOneSelectInst(SelectInst &SI);
+  void annotateOneSelectInst(SelectInst &SI);
+  // Visit \p SI instruction and perform tasks according to visit mode.
+  void visitSelectInst(SelectInst &SI);
+  unsigned getNumOfSelectInsts() const { return NSIs; }
+};
+
 class PGOInstrumentationGenLegacyPass : public ModulePass {
 public:
   static char ID;
@@ -132,9 +202,7 @@ public:
         *PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override {
-    return "PGOInstrumentationGenPass";
-  }
+  StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
 
 private:
   bool runOnModule(Module &M) override;
@@ -157,9 +225,7 @@ public:
         *PassRegistry::getPassRegistry());
   }
 
-  const char *getPassName() const override {
-    return "PGOInstrumentationUsePass";
-  }
+  StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
 
 private:
   std::string ProfileFileName;
@@ -169,6 +235,7 @@ private:
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
   }
 };
+
 } // end anonymous namespace
 
 char PGOInstrumentationGenLegacyPass::ID = 0;
@@ -238,8 +305,13 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
 private:
   Function &F;
   void computeCFGHash();
+  void renameComdatFunction();
+  // A map that stores the Comdat group in function F.
+  std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
 
 public:
+  std::vector<Instruction *> IndirectCallSites;
+  SelectInstVisitor SIVisitor;
   std::string FuncName;
   GlobalVariable *FuncNameVar;
   // CFG hash value for this function.
@@ -255,18 +327,32 @@ public:
   // Return the auxiliary BB information.
   BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
 
+  // Return the auxiliary BB information if available.
+  BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
+
   // Dump edges and BB information.
   void dumpInfo(std::string Str = "") const {
     MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
                               Twine(FunctionHash) + "\t" + Str);
   }
 
-  FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false,
-                         BranchProbabilityInfo *BPI = nullptr,
-                         BlockFrequencyInfo *BFI = nullptr)
-      : F(Func), FunctionHash(0), MST(F, BPI, BFI) {
+  FuncPGOInstrumentation(
+      Function &Func,
+      std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+      bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
+      BlockFrequencyInfo *BFI = nullptr)
+      : F(Func), ComdatMembers(ComdatMembers), SIVisitor(Func), FunctionHash(0),
+        MST(F, BPI, BFI) {
+
+    // This should be done before CFG hash computation.
+    SIVisitor.countSelects(Func);
+    NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
+    IndirectCallSites = findIndirectCallSites(Func);
+
     FuncName = getPGOFuncName(F);
     computeCFGHash();
+    if (ComdatMembers.size())
+      renameComdatFunction();
     DEBUG(dumpInfo("after CFGMST"));
 
     NumOfPGOBB += MST.BBInfos.size();
@@ -281,6 +367,16 @@ public:
     if (CreateGlobalVar)
       FuncNameVar = createPGOFuncNameVar(F, FuncName);
   }
+
+  // Return the number of profile counters needed for the function.
+  unsigned getNumCounters() {
+    unsigned NumCounters = 0;
+    for (auto &E : this->MST.AllEdges) {
+      if (!E->InMST && !E->Removed)
+        NumCounters++;
+    }
+    return NumCounters + SIVisitor.getNumOfSelectInsts();
+  }
 };
 
 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
@@ -293,13 +389,102 @@ void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
     const TerminatorInst *TI = BB.getTerminator();
     for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
       BasicBlock *Succ = TI->getSuccessor(I);
-      uint32_t Index = getBBInfo(Succ).Index;
+      auto BI = findBBInfo(Succ);
+      if (BI == nullptr)
+        continue;
+      uint32_t Index = BI->Index;
       for (int J = 0; J < 4; J++)
         Indexes.push_back((char)(Index >> (J * 8)));
     }
   }
   JC.update(Indexes);
-  FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+  FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
+                 (uint64_t)IndirectCallSites.size() << 48 |
+                 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
+}
+
+// Check if we can safely rename this Comdat function.
+static bool canRenameComdat(
+    Function &F,
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+  if (F.getName().empty())
+    return false;
+  if (!needsComdatForCounter(F, *(F.getParent())))
+    return false;
+  // Only safe to do if this function may be discarded if it is not used
+  // in the compilation unit.
+  if (!GlobalValue::isDiscardableIfUnused(F.getLinkage()))
+    return false;
+
+  // For AvailableExternallyLinkage functions.
+  if (!F.hasComdat()) {
+    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
+    return true;
+  }
+
+  // FIXME: Current only handle those Comdat groups that only containing one
+  // function and function aliases.
+  // (1) For a Comdat group containing multiple functions, we need to have a
+  // unique postfix based on the hashes for each function. There is a
+  // non-trivial code refactoring to do this efficiently.
+  // (2) Variables can not be renamed, so we can not rename Comdat function in a
+  // group including global vars.
+  Comdat *C = F.getComdat();
+  for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
+    if (dyn_cast<GlobalAlias>(CM.second))
+      continue;
+    Function *FM = dyn_cast<Function>(CM.second);
+    if (FM != &F)
+      return false;
+  }
+  return true;
+}
+
+// Append the CFGHash to the Comdat function name.
+template <class Edge, class BBInfo>
+void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
+  if (!canRenameComdat(F, ComdatMembers))
+    return;
+  std::string OrigName = F.getName().str();
+  std::string NewFuncName =
+      Twine(F.getName() + "." + Twine(FunctionHash)).str();
+  F.setName(Twine(NewFuncName));
+  GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
+  FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
+  Comdat *NewComdat;
+  Module *M = F.getParent();
+  // For AvailableExternallyLinkage functions, change the linkage to
+  // LinkOnceODR and put them into comdat. This is because after renaming, there
+  // is no backup external copy available for the function.
+  if (!F.hasComdat()) {
+    assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
+    NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
+    F.setLinkage(GlobalValue::LinkOnceODRLinkage);
+    F.setComdat(NewComdat);
+    return;
+  }
+
+  // This function belongs to a single function Comdat group.
+  Comdat *OrigComdat = F.getComdat();
+  std::string NewComdatName =
+      Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
+  NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
+  NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
+
+  for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
+    if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
+      // For aliases, change the name directly.
+      assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
+      std::string OrigGAName = GA->getName().str();
+      GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
+      GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
+      continue;
+    }
+    // Must be a function.
+    Function *CF = dyn_cast<Function>(CM.second);
+    assert(CF);
+    CF->setComdat(NewComdat);
+  }
 }
 
 // Given a CFG E to be instrumented, find which BB to place the instrumented
@@ -340,15 +525,12 @@ BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
 
 // Visit all edge and instrument the edges not in MST, and do value profiling.
 // Critical edges will be split.
-static void instrumentOneFunc(Function &F, Module *M,
-                              BranchProbabilityInfo *BPI,
-                              BlockFrequencyInfo *BFI) {
-  unsigned NumCounters = 0;
-  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, true, BPI, BFI);
-  for (auto &E : FuncInfo.MST.AllEdges) {
-    if (!E->InMST && !E->Removed)
-      NumCounters++;
-  }
+static void instrumentOneFunc(
+    Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
+                                                   BFI);
+  unsigned NumCounters = FuncInfo.getNumCounters();
 
   uint32_t I = 0;
   Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
@@ -367,11 +549,16 @@ static void instrumentOneFunc(Function &F, Module *M,
          Builder.getInt32(I++)});
   }
 
+  // Now instrument select instructions:
+  FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
+                                       FuncInfo.FunctionHash);
+  assert(I == NumCounters);
+
   if (DisableValueProfiling)
     return;
 
   unsigned NumIndirectCallSites = 0;
-  for (auto &I : findIndirectCallSites(F)) {
+  for (auto &I : FuncInfo.IndirectCallSites) {
     CallSite CS(I);
     Value *Callee = CS.getCalledValue();
     DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
@@ -456,10 +643,12 @@ static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
 
 class PGOUseFunc {
 public:
-  PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr,
+  PGOUseFunc(Function &Func, Module *Modu,
+             std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
+             BranchProbabilityInfo *BPI = nullptr,
              BlockFrequencyInfo *BFI = nullptr)
-      : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI),
-        FreqAttr(FFA_Normal) {}
+      : F(Func), M(Modu), FuncInfo(Func, ComdatMembers, false, BPI, BFI),
+        CountPosition(0), ProfileCountSize(0), FreqAttr(FFA_Normal) {}
 
   // Read counts for the instrumented BB from profile.
   bool readCounters(IndexedInstrProfReader *PGOReader);
@@ -479,24 +668,37 @@ public:
   // Return the function hotness from the profile.
   FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
 
+  // Return the function hash.
+  uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
   // Return the profile record for this function;
   InstrProfRecord &getProfileRecord() { return ProfileRecord; }
 
+  // Return the auxiliary BB information.
+  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
+    return FuncInfo.getBBInfo(BB);
+  }
+
+  // Return the auxiliary BB information if available.
+  UseBBInfo *findBBInfo(const BasicBlock *BB) const {
+    return FuncInfo.findBBInfo(BB);
+  }
+
 private:
   Function &F;
   Module *M;
   // This member stores the shared information with class PGOGenFunc.
   FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
 
-  // Return the auxiliary BB information.
-  UseBBInfo &getBBInfo(const BasicBlock *BB) const {
-    return FuncInfo.getBBInfo(BB);
-  }
-
   // The maximum count value in the profile. This is only used in PGO use
   // compilation.
   uint64_t ProgramMaxCount;
 
+  // Position of counter that remains to be read.
+  uint32_t CountPosition;
+
+  // Total size of the profile count for this function.
+  uint32_t ProfileCountSize;
+
   // ProfileRecord for this function.
   InstrProfRecord ProfileRecord;
 
@@ -535,6 +737,7 @@ private:
 void PGOUseFunc::setInstrumentedCounts(
     const std::vector<uint64_t> &CountFromProfile) {
 
+  assert(FuncInfo.getNumCounters() == CountFromProfile.size());
   // Use a worklist as we will update the vector during the iteration.
   std::vector<PGOUseEdge *> WorkList;
   for (auto &E : FuncInfo.MST.AllEdges)
@@ -564,6 +767,8 @@ void PGOUseFunc::setInstrumentedCounts(
     NewEdge1.InMST = true;
     getBBInfo(InstrBB).setBBInfoCount(CountValue);
   }
+  ProfileCountSize =  CountFromProfile.size();
+  CountPosition = I;
 }
 
 // Set the count value for the unknown edge. There should be one and only one
@@ -594,7 +799,7 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) {
       bool SkipWarning = false;
       if (Err == instrprof_error::unknown_function) {
         NumOfPGOMissing++;
-        SkipWarning = NoPGOWarnMissing;
+        SkipWarning = !PGOWarnMissing;
       } else if (Err == instrprof_error::hash_mismatch ||
                  Err == instrprof_error::malformed) {
         NumOfPGOMismatch++;
@@ -663,27 +868,38 @@ void PGOUseFunc::populateCounters() {
     // For efficient traversal, it's better to start from the end as most
     // of the instrumented edges are at the end.
     for (auto &BB : reverse(F)) {
-      UseBBInfo &Count = getBBInfo(&BB);
-      if (!Count.CountValid) {
-        if (Count.UnknownCountOutEdge == 0) {
-          Count.CountValue = sumEdgeCount(Count.OutEdges);
-          Count.CountValid = true;
+      UseBBInfo *Count = findBBInfo(&BB);
+      if (Count == nullptr)
+        continue;
+      if (!Count->CountValid) {
+        if (Count->UnknownCountOutEdge == 0) {
+          Count->CountValue = sumEdgeCount(Count->OutEdges);
+          Count->CountValid = true;
           Changes = true;
-        } else if (Count.UnknownCountInEdge == 0) {
-          Count.CountValue = sumEdgeCount(Count.InEdges);
-          Count.CountValid = true;
+        } else if (Count->UnknownCountInEdge == 0) {
+          Count->CountValue = sumEdgeCount(Count->InEdges);
+          Count->CountValid = true;
           Changes = true;
         }
       }
-      if (Count.CountValid) {
-        if (Count.UnknownCountOutEdge == 1) {
-          uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges);
-          setEdgeCount(Count.OutEdges, Total);
+      if (Count->CountValid) {
+        if (Count->UnknownCountOutEdge == 1) {
+          uint64_t Total = 0;
+          uint64_t OutSum = sumEdgeCount(Count->OutEdges);
+          // If the one of the successor block can early terminate (no-return),
+          // we can end up with situation where out edge sum count is larger as
+          // the source BB's count is collected by a post-dominated block.
+          if (Count->CountValue > OutSum)
+            Total = Count->CountValue - OutSum;
+          setEdgeCount(Count->OutEdges, Total);
           Changes = true;
         }
-        if (Count.UnknownCountInEdge == 1) {
-          uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges);
-          setEdgeCount(Count.InEdges, Total);
+        if (Count->UnknownCountInEdge == 1) {
+          uint64_t Total = 0;
+          uint64_t InSum = sumEdgeCount(Count->InEdges);
+          if (Count->CountValue > InSum)
+            Total = Count->CountValue - InSum;
+          setEdgeCount(Count->InEdges, Total);
           Changes = true;
         }
       }
@@ -693,24 +909,50 @@ void PGOUseFunc::populateCounters() {
   DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
 #ifndef NDEBUG
   // Assert every BB has a valid counter.
-  for (auto &BB : F)
-    assert(getBBInfo(&BB).CountValid && "BB count is not valid");
+  for (auto &BB : F) {
+    auto BI = findBBInfo(&BB);
+    if (BI == nullptr)
+      continue;
+    assert(BI->CountValid && "BB count is not valid");
+  }
 #endif
   uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
   F.setEntryCount(FuncEntryCount);
   uint64_t FuncMaxCount = FuncEntryCount;
-  for (auto &BB : F)
-    FuncMaxCount = std::max(FuncMaxCount, getBBInfo(&BB).CountValue);
+  for (auto &BB : F) {
+    auto BI = findBBInfo(&BB);
+    if (BI == nullptr)
+      continue;
+    FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
+  }
   markFunctionAttributes(FuncEntryCount, FuncMaxCount);
 
+  // Now annotate select instructions
+  FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
+  assert(CountPosition == ProfileCountSize);
+
   DEBUG(FuncInfo.dumpInfo("after reading profile."));
 }
 
+static void setProfMetadata(Module *M, Instruction *TI,
+                            ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
+  MDBuilder MDB(M->getContext());
+  assert(MaxCount > 0 && "Bad max count");
+  uint64_t Scale = calculateCountScale(MaxCount);
+  SmallVector<unsigned, 4> Weights;
+  for (const auto &ECI : EdgeCounts)
+    Weights.push_back(scaleBranchCount(ECI, Scale));
+
+  DEBUG(dbgs() << "Weight is: ";
+        for (const auto &W : Weights) { dbgs() << W << " "; } 
+        dbgs() << "\n";);
+  TI->setMetadata(llvm::LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
+}
+
 // Assign the scaled count values to the BB with multiple out edges.
 void PGOUseFunc::setBranchWeights() {
   // Generate MD_prof metadata for every branch instruction.
   DEBUG(dbgs() << "\nSetting branch weights.\n");
-  MDBuilder MDB(M->getContext());
   for (auto &BB : F) {
     TerminatorInst *TI = BB.getTerminator();
     if (TI->getNumSuccessors() < 2)
@@ -723,7 +965,7 @@ void PGOUseFunc::setBranchWeights() {
     // We have a non-zero Branch BB.
     const UseBBInfo &BBCountInfo = getBBInfo(&BB);
     unsigned Size = BBCountInfo.OutEdges.size();
-    SmallVector<unsigned, 2> EdgeCounts(Size, 0);
+    SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
     uint64_t MaxCount = 0;
     for (unsigned s = 0; s < Size; s++) {
       const PGOUseEdge *E = BBCountInfo.OutEdges[s];
@@ -737,18 +979,62 @@ void PGOUseFunc::setBranchWeights() {
         MaxCount = EdgeCount;
       EdgeCounts[SuccNum] = EdgeCount;
     }
-    assert(MaxCount > 0 && "Bad max count");
-    uint64_t Scale = calculateCountScale(MaxCount);
-    SmallVector<unsigned, 4> Weights;
-    for (const auto &ECI : EdgeCounts)
-      Weights.push_back(scaleBranchCount(ECI, Scale));
-
-    TI->setMetadata(llvm::LLVMContext::MD_prof,
-                    MDB.createBranchWeights(Weights));
-    DEBUG(dbgs() << "Weight is: ";
-          for (const auto &W : Weights) { dbgs() << W << " "; }
-          dbgs() << "\n";);
+    setProfMetadata(M, TI, EdgeCounts, MaxCount);
+  }
+}
+
+void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
+  Module *M = F.getParent();
+  IRBuilder<> Builder(&SI);
+  Type *Int64Ty = Builder.getInt64Ty();
+  Type *I8PtrTy = Builder.getInt8PtrTy();
+  auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
+  Builder.CreateCall(
+      Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
+      {llvm::ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
+       Builder.getInt64(FuncHash),
+       Builder.getInt32(TotalNumCtrs), Builder.getInt32(*CurCtrIdx), Step});
+  ++(*CurCtrIdx);
+}
+
+void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
+  std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
+  assert(*CurCtrIdx < CountFromProfile.size() &&
+         "Out of bound access of counters");
+  uint64_t SCounts[2];
+  SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
+  ++(*CurCtrIdx);
+  uint64_t TotalCount = 0;
+  auto BI = UseFunc->findBBInfo(SI.getParent());
+  if (BI != nullptr)
+    TotalCount = BI->CountValue;
+  // False Count
+  SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
+  uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
+  if (MaxCount)
+    setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
+}
+
+void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
+  if (!PGOInstrSelect)
+    return;
+  // FIXME: do not handle this yet.
+  if (SI.getCondition()->getType()->isVectorTy())
+    return;
+
+  NSIs++;
+  switch (Mode) {
+  case VM_counting:
+    return;
+  case VM_instrument:
+    instrumentOneSelectInst(SI);
+    return;
+  case VM_annotate:
+    annotateOneSelectInst(SI);
+    return;
   }
+
+  llvm_unreachable("Unknown visiting mode");
 }
 
 // Traverse all the indirect callsites and annotate the instructions.
@@ -760,7 +1046,7 @@ void PGOUseFunc::annotateIndirectCallSites() {
   createPGOFuncNameMetadata(F, FuncInfo.FuncName);
 
   unsigned IndirectCallSiteIndex = 0;
-  auto IndirectCallSites = findIndirectCallSites(F);
+  auto &IndirectCallSites = FuncInfo.IndirectCallSites;
   unsigned NumValueSites =
       ProfileRecord.getNumValueSites(IPVK_IndirectCallTarget);
   if (NumValueSites != IndirectCallSites.size()) {
@@ -784,7 +1070,7 @@ void PGOUseFunc::annotateIndirectCallSites() {
 }
 } // end anonymous namespace
 
-// Create a COMDAT variable IR_LEVEL_PROF_VARNAME to make the runtime
+// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
 // aware this is an ir_level profile so it can set the version flag.
 static void createIRLevelProfileFlagVariable(Module &M) {
   Type *IntTy64 = Type::getInt64Ty(M.getContext());
@@ -792,26 +1078,47 @@ static void createIRLevelProfileFlagVariable(Module &M) {
   auto IRLevelVersionVariable = new GlobalVariable(
       M, IntTy64, true, GlobalVariable::ExternalLinkage,
       Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)),
-      INSTR_PROF_QUOTE(IR_LEVEL_PROF_VERSION_VAR));
+      INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
   IRLevelVersionVariable->setVisibility(GlobalValue::DefaultVisibility);
   Triple TT(M.getTargetTriple());
   if (!TT.supportsCOMDAT())
     IRLevelVersionVariable->setLinkage(GlobalValue::WeakAnyLinkage);
   else
     IRLevelVersionVariable->setComdat(M.getOrInsertComdat(
-        StringRef(INSTR_PROF_QUOTE(IR_LEVEL_PROF_VERSION_VAR))));
+        StringRef(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR))));
+}
+
+// Collect the set of members for each Comdat in module M and store
+// in ComdatMembers.
+static void collectComdatMembers(
+    Module &M,
+    std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
+  if (!DoComdatRenaming)
+    return;
+  for (Function &F : M)
+    if (Comdat *C = F.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &F));
+  for (GlobalVariable &GV : M.globals())
+    if (Comdat *C = GV.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GV));
+  for (GlobalAlias &GA : M.aliases())
+    if (Comdat *C = GA.getComdat())
+      ComdatMembers.insert(std::make_pair(C, &GA));
 }
 
 static bool InstrumentAllFunctions(
     Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI) {
   createIRLevelProfileFlagVariable(M);
+  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+  collectComdatMembers(M, ComdatMembers);
+
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
     auto *BPI = LookupBPI(F);
     auto *BFI = LookupBFI(F);
-    instrumentOneFunc(F, &M, BPI, BFI);
+    instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers);
   }
   return true;
 }
@@ -830,7 +1137,7 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
 }
 
 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
-                                             AnalysisManager<Module> &AM) {
+                                             ModuleAnalysisManager &AM) {
 
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   auto LookupBPI = [&FAM](Function &F) {
@@ -877,6 +1184,8 @@ static bool annotateAllFunctions(
     return false;
   }
 
+  std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
+  collectComdatMembers(M, ComdatMembers);
   std::vector<Function *> HotFunctions;
   std::vector<Function *> ColdFunctions;
   for (auto &F : M) {
@@ -884,7 +1193,7 @@ static bool annotateAllFunctions(
       continue;
     auto *BPI = LookupBPI(F);
     auto *BFI = LookupBFI(F);
-    PGOUseFunc Func(F, &M, BPI, BFI);
+    PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI);
     if (!Func.readCounters(PGOReader.get()))
       continue;
     Func.populateCounters();
@@ -910,7 +1219,6 @@ static bool annotateAllFunctions(
     F->addFnAttr(llvm::Attribute::Cold);
     DEBUG(dbgs() << "Set cold attribute to function: " << F->getName() << "\n");
   }
-
   return true;
 }
 
@@ -921,7 +1229,7 @@ PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename)
 }
 
 PreservedAnalyses PGOInstrumentationUse::run(Module &M,
-                                             AnalysisManager<Module> &AM) {
+                                             ModuleAnalysisManager &AM) {
 
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
   auto LookupBPI = [&FAM](Function &F) {
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 7d404473655d..5b4b1fb77134 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -67,11 +67,23 @@ static const char *const SanCovTraceEnterName =
 static const char *const SanCovTraceBBName =
     "__sanitizer_cov_trace_basic_block";
 static const char *const SanCovTracePCName = "__sanitizer_cov_trace_pc";
-static const char *const SanCovTraceCmpName = "__sanitizer_cov_trace_cmp";
+static const char *const SanCovTraceCmp1 = "__sanitizer_cov_trace_cmp1";
+static const char *const SanCovTraceCmp2 = "__sanitizer_cov_trace_cmp2";
+static const char *const SanCovTraceCmp4 = "__sanitizer_cov_trace_cmp4";
+static const char *const SanCovTraceCmp8 = "__sanitizer_cov_trace_cmp8";
+static const char *const SanCovTraceDiv4 = "__sanitizer_cov_trace_div4";
+static const char *const SanCovTraceDiv8 = "__sanitizer_cov_trace_div8";
+static const char *const SanCovTraceGep = "__sanitizer_cov_trace_gep";
 static const char *const SanCovTraceSwitchName = "__sanitizer_cov_trace_switch";
 static const char *const SanCovModuleCtorName = "sancov.module_ctor";
 static const uint64_t SanCtorAndDtorPriority = 2;
 
+static const char *const SanCovTracePCGuardSection = "__sancov_guards";
+static const char *const SanCovTracePCGuardName =
+    "__sanitizer_cov_trace_pc_guard";
+static const char *const SanCovTracePCGuardInitName =
+    "__sanitizer_cov_trace_pc_guard_init";
+
 static cl::opt<int> ClCoverageLevel(
     "sanitizer-coverage-level",
     cl::desc("Sanitizer Coverage. 0: none, 1: entry block, 2: all blocks, "
@@ -95,11 +107,22 @@ static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
                                            cl::desc("Experimental pc tracing"),
                                            cl::Hidden, cl::init(false));
 
+static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
+                                    cl::desc("pc tracing with a guard"),
+                                    cl::Hidden, cl::init(false));
+
 static cl::opt<bool>
-    ClExperimentalCMPTracing("sanitizer-coverage-experimental-trace-compares",
-                             cl::desc("Experimental tracing of CMP and similar "
-                                      "instructions"),
-                             cl::Hidden, cl::init(false));
+    ClCMPTracing("sanitizer-coverage-trace-compares",
+                 cl::desc("Tracing of CMP and similar instructions"),
+                 cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClDIVTracing("sanitizer-coverage-trace-divs",
+                                  cl::desc("Tracing of DIV instructions"),
+                                  cl::Hidden, cl::init(false));
+
+static cl::opt<bool> ClGEPTracing("sanitizer-coverage-trace-geps",
+                                  cl::desc("Tracing of GEP instructions"),
+                                  cl::Hidden, cl::init(false));
 
 static cl::opt<bool>
     ClPruneBlocks("sanitizer-coverage-prune-blocks",
@@ -147,9 +170,12 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
   Options.CoverageType = std::max(Options.CoverageType, CLOpts.CoverageType);
   Options.IndirectCalls |= CLOpts.IndirectCalls;
   Options.TraceBB |= ClExperimentalTracing;
-  Options.TraceCmp |= ClExperimentalCMPTracing;
+  Options.TraceCmp |= ClCMPTracing;
+  Options.TraceDiv |= ClDIVTracing;
+  Options.TraceGep |= ClGEPTracing;
   Options.Use8bitCounters |= ClUse8bitCounters;
   Options.TracePC |= ClExperimentalTracePC;
+  Options.TracePCGuard |= ClTracePCGuard;
   return Options;
 }
 
@@ -163,7 +189,7 @@ public:
   bool runOnModule(Module &M) override;
   bool runOnFunction(Function &F);
   static char ID; // Pass identification, replacement for typeid
-  const char *getPassName() const override { return "SanitizerCoverageModule"; }
+  StringRef getPassName() const override { return "SanitizerCoverageModule"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<DominatorTreeWrapperPass>();
@@ -174,11 +200,17 @@ private:
   void InjectCoverageForIndirectCalls(Function &F,
                                       ArrayRef<Instruction *> IndirCalls);
   void InjectTraceForCmp(Function &F, ArrayRef<Instruction *> CmpTraceTargets);
+  void InjectTraceForDiv(Function &F,
+                         ArrayRef<BinaryOperator *> DivTraceTargets);
+  void InjectTraceForGep(Function &F,
+                         ArrayRef<GetElementPtrInst *> GepTraceTargets);
   void InjectTraceForSwitch(Function &F,
                             ArrayRef<Instruction *> SwitchTraceTargets);
   bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
+  void CreateFunctionGuardArray(size_t NumGuards, Function &F);
   void SetNoSanitizeMetadata(Instruction *I);
-  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, bool UseCalls);
+  void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx,
+                             bool UseCalls);
   unsigned NumberOfInstrumentedBlocks() {
     return SanCovFunction->getNumUses() +
            SanCovWithCheckFunction->getNumUses() + SanCovTraceBB->getNumUses() +
@@ -187,17 +219,21 @@ private:
   Function *SanCovFunction;
   Function *SanCovWithCheckFunction;
   Function *SanCovIndirCallFunction, *SanCovTracePCIndir;
-  Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC;
-  Function *SanCovTraceCmpFunction;
+  Function *SanCovTraceEnter, *SanCovTraceBB, *SanCovTracePC, *SanCovTracePCGuard;
+  Function *SanCovTraceCmpFunction[4];
+  Function *SanCovTraceDivFunction[2];
+  Function *SanCovTraceGepFunction;
   Function *SanCovTraceSwitchFunction;
   InlineAsm *EmptyAsm;
-  Type *IntptrTy, *Int64Ty, *Int64PtrTy;
+  Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy;
   Module *CurModule;
   LLVMContext *C;
   const DataLayout *DL;
 
   GlobalVariable *GuardArray;
+  GlobalVariable *FunctionGuardArray;  // for trace-pc-guard.
   GlobalVariable *EightBitCounterArray;
+  bool HasSancovGuardsSection;
 
   SanitizerCoverageOptions Options;
 };
@@ -210,13 +246,16 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   C = &(M.getContext());
   DL = &M.getDataLayout();
   CurModule = &M;
+  HasSancovGuardsSection = false;
   IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
+  IntptrPtrTy = PointerType::getUnqual(IntptrTy);
   Type *VoidTy = Type::getVoidTy(*C);
   IRBuilder<> IRB(*C);
   Type *Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
-  Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
   Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
+  Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
   Int64Ty = IRB.getInt64Ty();
+  Int32Ty = IRB.getInt32Ty();
 
   SanCovFunction = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(SanCovName, VoidTy, Int32PtrTy, nullptr));
@@ -227,9 +266,28 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   SanCovIndirCallFunction =
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
           SanCovIndirCallName, VoidTy, IntptrTy, IntptrTy, nullptr));
-  SanCovTraceCmpFunction =
+  SanCovTraceCmpFunction[0] =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          SanCovTraceCmp1, VoidTy, IRB.getInt8Ty(), IRB.getInt8Ty(), nullptr));
+  SanCovTraceCmpFunction[1] = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(SanCovTraceCmp2, VoidTy, IRB.getInt16Ty(),
+                            IRB.getInt16Ty(), nullptr));
+  SanCovTraceCmpFunction[2] = checkSanitizerInterfaceFunction(
+      M.getOrInsertFunction(SanCovTraceCmp4, VoidTy, IRB.getInt32Ty(),
+                            IRB.getInt32Ty(), nullptr));
+  SanCovTraceCmpFunction[3] =
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-          SanCovTraceCmpName, VoidTy, Int64Ty, Int64Ty, Int64Ty, nullptr));
+          SanCovTraceCmp8, VoidTy, Int64Ty, Int64Ty, nullptr));
+
+  SanCovTraceDivFunction[0] =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          SanCovTraceDiv4, VoidTy, IRB.getInt32Ty(), nullptr));
+  SanCovTraceDivFunction[1] =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          SanCovTraceDiv8, VoidTy, Int64Ty, nullptr));
+  SanCovTraceGepFunction =
+      checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+          SanCovTraceGep, VoidTy, IntptrTy, nullptr));
   SanCovTraceSwitchFunction =
       checkSanitizerInterfaceFunction(M.getOrInsertFunction(
           SanCovTraceSwitchName, VoidTy, Int64Ty, Int64PtrTy, nullptr));
@@ -241,6 +299,8 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
 
   SanCovTracePC = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(SanCovTracePCName, VoidTy, nullptr));
+  SanCovTracePCGuard = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      SanCovTracePCGuardName, VoidTy, Int32PtrTy, nullptr));
   SanCovTraceEnter = checkSanitizerInterfaceFunction(
       M.getOrInsertFunction(SanCovTraceEnterName, VoidTy, Int32PtrTy, nullptr));
   SanCovTraceBB = checkSanitizerInterfaceFunction(
@@ -251,9 +311,10 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   Type *Int32Ty = IRB.getInt32Ty();
   Type *Int8Ty = IRB.getInt8Ty();
 
-  GuardArray =
-      new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
-                         nullptr, "__sancov_gen_cov_tmp");
+  if (!Options.TracePCGuard)
+    GuardArray =
+        new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,
+                           nullptr, "__sancov_gen_cov_tmp");
   if (Options.Use8bitCounters)
     EightBitCounterArray =
         new GlobalVariable(M, Int8Ty, false, GlobalVariable::ExternalLinkage,
@@ -264,17 +325,20 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
 
   auto N = NumberOfInstrumentedBlocks();
 
-  // Now we know how many elements we need. Create an array of guards
-  // with one extra element at the beginning for the size.
-  Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
-  GlobalVariable *RealGuardArray = new GlobalVariable(
-      M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
-      Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
-
-  // Replace the dummy array with the real one.
-  GuardArray->replaceAllUsesWith(
-      IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
-  GuardArray->eraseFromParent();
+  GlobalVariable *RealGuardArray = nullptr;
+  if (!Options.TracePCGuard) {
+    // Now we know how many elements we need. Create an array of guards
+    // with one extra element at the beginning for the size.
+    Type *Int32ArrayNTy = ArrayType::get(Int32Ty, N + 1);
+    RealGuardArray = new GlobalVariable(
+        M, Int32ArrayNTy, false, GlobalValue::PrivateLinkage,
+        Constant::getNullValue(Int32ArrayNTy), "__sancov_gen_cov");
+
+    // Replace the dummy array with the real one.
+    GuardArray->replaceAllUsesWith(
+        IRB.CreatePointerCast(RealGuardArray, Int32PtrTy));
+    GuardArray->eraseFromParent();
+  }
 
   GlobalVariable *RealEightBitCounterArray;
   if (Options.Use8bitCounters) {
@@ -293,11 +357,30 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
   // Create variable for module (compilation unit) name
   Constant *ModNameStrConst =
       ConstantDataArray::getString(M.getContext(), M.getName(), true);
-  GlobalVariable *ModuleName =
-      new GlobalVariable(M, ModNameStrConst->getType(), true,
-                         GlobalValue::PrivateLinkage, ModNameStrConst);
+  GlobalVariable *ModuleName = new GlobalVariable(
+      M, ModNameStrConst->getType(), true, GlobalValue::PrivateLinkage,
+      ModNameStrConst, "__sancov_gen_modname");
+  if (Options.TracePCGuard) {
+    if (HasSancovGuardsSection) {
+      Function *CtorFunc;
+      std::string SectionName(SanCovTracePCGuardSection);
+      GlobalVariable *Bounds[2];
+      const char *Prefix[2] = {"__start_", "__stop_"};
+      for (int i = 0; i < 2; i++) {
+        Bounds[i] = new GlobalVariable(M, Int32PtrTy, false,
+                                       GlobalVariable::ExternalLinkage, nullptr,
+                                       Prefix[i] + SectionName);
+        Bounds[i]->setVisibility(GlobalValue::HiddenVisibility);
+      }
+      std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+          M, SanCovModuleCtorName, SanCovTracePCGuardInitName,
+          {Int32PtrTy, Int32PtrTy},
+          {IRB.CreatePointerCast(Bounds[0], Int32PtrTy),
+            IRB.CreatePointerCast(Bounds[1], Int32PtrTy)});
 
-  if (!Options.TracePC) {
+      appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+    }
+  } else if (!Options.TracePC) {
     Function *CtorFunc;
     std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
         M, SanCovModuleCtorName, SanCovModuleInitName,
@@ -344,6 +427,14 @@ static bool isFullPostDominator(const BasicBlock *BB,
 
 static bool shouldInstrumentBlock(const Function& F, const BasicBlock *BB, const DominatorTree *DT,
                                   const PostDominatorTree *PDT) {
+  // Don't insert coverage for unreachable blocks: we will never call
+  // __sanitizer_cov() for them, so counting them in
+  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
+  // percentage. Also, unreachable instructions frequently have no debug
+  // locations.
+  if (isa<UnreachableInst>(BB->getTerminator()))
+    return false;
+
   if (!ClPruneBlocks || &F.getEntryBlock() == BB)
     return true;
 
@@ -355,6 +446,13 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
     return false;
   if (F.getName().find(".module_ctor") != std::string::npos)
     return false; // Should not instrument sanitizer init functions.
+  if (F.getName().startswith("__sanitizer_"))
+    return false;  // Don't instrument __sanitizer_* callbacks.
+  // Don't instrument MSVC CRT configuration helpers. They may run before normal
+  // initialization.
+  if (F.getName() == "__local_stdio_printf_options" ||
+      F.getName() == "__local_stdio_scanf_options")
+    return false;
   // Don't instrument functions using SEH for now. Splitting basic blocks like
   // we do for coverage breaks WinEHPrepare.
   // FIXME: Remove this when SEH no longer uses landingpad pattern matching.
@@ -367,6 +465,8 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
   SmallVector<BasicBlock *, 16> BlocksToInstrument;
   SmallVector<Instruction *, 8> CmpTraceTargets;
   SmallVector<Instruction *, 8> SwitchTraceTargets;
+  SmallVector<BinaryOperator *, 8> DivTraceTargets;
+  SmallVector<GetElementPtrInst *, 8> GepTraceTargets;
 
   const DominatorTree *DT =
       &getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
@@ -388,28 +488,53 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
         if (isa<SwitchInst>(&Inst))
           SwitchTraceTargets.push_back(&Inst);
       }
-    }
+      if (Options.TraceDiv)
+        if (BinaryOperator *BO = dyn_cast<BinaryOperator>(&Inst))
+          if (BO->getOpcode() == Instruction::SDiv ||
+              BO->getOpcode() == Instruction::UDiv)
+            DivTraceTargets.push_back(BO);
+      if (Options.TraceGep)
+        if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&Inst))
+          GepTraceTargets.push_back(GEP);
+   }
   }
 
   InjectCoverage(F, BlocksToInstrument);
   InjectCoverageForIndirectCalls(F, IndirCalls);
   InjectTraceForCmp(F, CmpTraceTargets);
   InjectTraceForSwitch(F, SwitchTraceTargets);
+  InjectTraceForDiv(F, DivTraceTargets);
+  InjectTraceForGep(F, GepTraceTargets);
   return true;
 }
+void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards,
+                                                       Function &F) {
+  if (!Options.TracePCGuard) return;
+  HasSancovGuardsSection = true;
+  ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards);
+  FunctionGuardArray = new GlobalVariable(
+      *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage,
+      Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_");
+  if (auto Comdat = F.getComdat())
+    FunctionGuardArray->setComdat(Comdat);
+  FunctionGuardArray->setSection(SanCovTracePCGuardSection);
+}
 
 bool SanitizerCoverageModule::InjectCoverage(Function &F,
                                              ArrayRef<BasicBlock *> AllBlocks) {
+  if (AllBlocks.empty()) return false;
   switch (Options.CoverageType) {
   case SanitizerCoverageOptions::SCK_None:
     return false;
   case SanitizerCoverageOptions::SCK_Function:
-    InjectCoverageAtBlock(F, F.getEntryBlock(), false);
+    CreateFunctionGuardArray(1, F);
+    InjectCoverageAtBlock(F, F.getEntryBlock(), 0, false);
     return true;
   default: {
     bool UseCalls = ClCoverageBlockThreshold < AllBlocks.size();
-    for (auto BB : AllBlocks)
-      InjectCoverageAtBlock(F, *BB, UseCalls);
+    CreateFunctionGuardArray(AllBlocks.size(), F);
+    for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
+      InjectCoverageAtBlock(F, *AllBlocks[i], i, UseCalls);
     return true;
   }
   }
@@ -439,7 +564,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
         *F.getParent(), Ty, false, GlobalValue::PrivateLinkage,
         Constant::getNullValue(Ty), "__sancov_gen_callee_cache");
     CalleeCache->setAlignment(CacheAlignment);
-    if (Options.TracePC)
+    if (Options.TracePC || Options.TracePCGuard)
       IRB.CreateCall(SanCovTracePCIndir,
                      IRB.CreatePointerCast(Callee, IntptrTy));
     else
@@ -476,6 +601,11 @@ void SanitizerCoverageModule::InjectTraceForSwitch(
           C = ConstantExpr::getCast(CastInst::ZExt, It.getCaseValue(), Int64Ty);
         Initializers.push_back(C);
       }
+      std::sort(Initializers.begin() + 2, Initializers.end(),
+                [](const Constant *A, const Constant *B) {
+                  return cast<ConstantInt>(A)->getLimitedValue() <
+                         cast<ConstantInt>(B)->getLimitedValue();
+                });
       ArrayType *ArrayOfInt64Ty = ArrayType::get(Int64Ty, Initializers.size());
       GlobalVariable *GV = new GlobalVariable(
           *CurModule, ArrayOfInt64Ty, false, GlobalVariable::InternalLinkage,
@@ -487,6 +617,35 @@ void SanitizerCoverageModule::InjectTraceForSwitch(
   }
 }
 
+void SanitizerCoverageModule::InjectTraceForDiv(
+    Function &, ArrayRef<BinaryOperator *> DivTraceTargets) {
+  for (auto BO : DivTraceTargets) {
+    IRBuilder<> IRB(BO);
+    Value *A1 = BO->getOperand(1);
+    if (isa<ConstantInt>(A1)) continue;
+    if (!A1->getType()->isIntegerTy())
+      continue;
+    uint64_t TypeSize = DL->getTypeStoreSizeInBits(A1->getType());
+    int CallbackIdx = TypeSize == 32 ? 0 :
+        TypeSize == 64 ? 1 : -1;
+    if (CallbackIdx < 0) continue;
+    auto Ty = Type::getIntNTy(*C, TypeSize);
+    IRB.CreateCall(SanCovTraceDivFunction[CallbackIdx],
+                   {IRB.CreateIntCast(A1, Ty, true)});
+  }
+}
+
+void SanitizerCoverageModule::InjectTraceForGep(
+    Function &, ArrayRef<GetElementPtrInst *> GepTraceTargets) {
+  for (auto GEP : GepTraceTargets) {
+    IRBuilder<> IRB(GEP);
+    for (auto I = GEP->idx_begin(); I != GEP->idx_end(); ++I)
+      if (!isa<ConstantInt>(*I) && (*I)->getType()->isIntegerTy())
+        IRB.CreateCall(SanCovTraceGepFunction,
+                       {IRB.CreateIntCast(*I, IntptrTy, true)});
+  }
+}
+
 void SanitizerCoverageModule::InjectTraceForCmp(
     Function &, ArrayRef<Instruction *> CmpTraceTargets) {
   for (auto I : CmpTraceTargets) {
@@ -497,12 +656,16 @@ void SanitizerCoverageModule::InjectTraceForCmp(
       if (!A0->getType()->isIntegerTy())
         continue;
       uint64_t TypeSize = DL->getTypeStoreSizeInBits(A0->getType());
+      int CallbackIdx = TypeSize == 8 ? 0 :
+                        TypeSize == 16 ? 1 :
+                        TypeSize == 32 ? 2 :
+                        TypeSize == 64 ? 3 : -1;
+      if (CallbackIdx < 0) continue;
       // __sanitizer_cov_trace_cmp((type_size << 32) | predicate, A0, A1);
+      auto Ty = Type::getIntNTy(*C, TypeSize);
       IRB.CreateCall(
-          SanCovTraceCmpFunction,
-          {ConstantInt::get(Int64Ty, (TypeSize << 32) | ICMP->getPredicate()),
-           IRB.CreateIntCast(A0, Int64Ty, true),
-           IRB.CreateIntCast(A1, Int64Ty, true)});
+          SanCovTraceCmpFunction[CallbackIdx],
+          {IRB.CreateIntCast(A0, Ty, true), IRB.CreateIntCast(A1, Ty, true)});
     }
   }
 }
@@ -513,16 +676,8 @@ void SanitizerCoverageModule::SetNoSanitizeMetadata(Instruction *I) {
 }
 
 void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
-                                                    bool UseCalls) {
-  // Don't insert coverage for unreachable blocks: we will never call
-  // __sanitizer_cov() for them, so counting them in
-  // NumberOfInstrumentedBlocks() might complicate calculation of code coverage
-  // percentage. Also, unreachable instructions frequently have no debug
-  // locations.
-  if (isa<UnreachableInst>(BB.getTerminator()))
-    return;
+                                                    size_t Idx, bool UseCalls) {
   BasicBlock::iterator IP = BB.getFirstInsertionPt();
-
   bool IsEntryBB = &BB == &F.getEntryBlock();
   DebugLoc EntryLoc;
   if (IsEntryBB) {
@@ -538,32 +693,52 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
 
   IRBuilder<> IRB(&*IP);
   IRB.SetCurrentDebugLocation(EntryLoc);
-  Value *GuardP = IRB.CreateAdd(
-      IRB.CreatePointerCast(GuardArray, IntptrTy),
-      ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
-  Type *Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
-  GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
   if (Options.TracePC) {
     IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
     IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
-  } else if (Options.TraceBB) {
-    IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
-  } else if (UseCalls) {
-    IRB.CreateCall(SanCovWithCheckFunction, GuardP);
-  } else {
-    LoadInst *Load = IRB.CreateLoad(GuardP);
-    Load->setAtomic(AtomicOrdering::Monotonic);
-    Load->setAlignment(4);
-    SetNoSanitizeMetadata(Load);
-    Value *Cmp =
-        IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
-    Instruction *Ins = SplitBlockAndInsertIfThen(
-        Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
-    IRB.SetInsertPoint(Ins);
-    IRB.SetCurrentDebugLocation(EntryLoc);
-    // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
-    IRB.CreateCall(SanCovFunction, GuardP);
+  } else if (Options.TracePCGuard) {
+    auto GuardPtr = IRB.CreateIntToPtr(
+        IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
+                      ConstantInt::get(IntptrTy, Idx * 4)),
+        Int32PtrTy);
+    if (!UseCalls) {
+      auto GuardLoad = IRB.CreateLoad(GuardPtr);
+      GuardLoad->setAtomic(AtomicOrdering::Monotonic);
+      GuardLoad->setAlignment(8);
+      SetNoSanitizeMetadata(GuardLoad);  // Don't instrument with e.g. asan.
+      auto Cmp = IRB.CreateICmpNE(
+          GuardLoad, Constant::getNullValue(GuardLoad->getType()));
+      auto Ins = SplitBlockAndInsertIfThen(
+          Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+      IRB.SetInsertPoint(Ins);
+      IRB.SetCurrentDebugLocation(EntryLoc);
+    }
+    IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
     IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
+  } else {
+    Value *GuardP = IRB.CreateAdd(
+        IRB.CreatePointerCast(GuardArray, IntptrTy),
+        ConstantInt::get(IntptrTy, (1 + NumberOfInstrumentedBlocks()) * 4));
+    GuardP = IRB.CreateIntToPtr(GuardP, Int32PtrTy);
+    if (Options.TraceBB) {
+      IRB.CreateCall(IsEntryBB ? SanCovTraceEnter : SanCovTraceBB, GuardP);
+    } else if (UseCalls) {
+      IRB.CreateCall(SanCovWithCheckFunction, GuardP);
+    } else {
+      LoadInst *Load = IRB.CreateLoad(GuardP);
+      Load->setAtomic(AtomicOrdering::Monotonic);
+      Load->setAlignment(4);
+      SetNoSanitizeMetadata(Load);
+      Value *Cmp =
+          IRB.CreateICmpSGE(Constant::getNullValue(Load->getType()), Load);
+      Instruction *Ins = SplitBlockAndInsertIfThen(
+          Cmp, &*IP, false, MDBuilder(*C).createBranchWeights(1, 100000));
+      IRB.SetInsertPoint(Ins);
+      IRB.SetCurrentDebugLocation(EntryLoc);
+      // __sanitizer_cov gets the PC of the instruction using GET_CALLER_PC.
+      IRB.CreateCall(SanCovFunction, GuardP);
+      IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
+    }
   }
 
   if (Options.Use8bitCounters) {
diff --git a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 41041c78db97..d9659694da46 100644
--- a/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/contrib/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
 
@@ -56,6 +57,10 @@ static cl::opt<bool>  ClInstrumentMemoryAccesses(
 static cl::opt<bool>  ClInstrumentFuncEntryExit(
     "tsan-instrument-func-entry-exit", cl::init(true),
     cl::desc("Instrument function entry and exit"), cl::Hidden);
+static cl::opt<bool>  ClHandleCxxExceptions(
+    "tsan-handle-cxx-exceptions", cl::init(true),
+    cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"),
+    cl::Hidden);
 static cl::opt<bool>  ClInstrumentAtomics(
     "tsan-instrument-atomics", cl::init(true),
     cl::desc("Instrument atomics"), cl::Hidden);
@@ -83,7 +88,7 @@ namespace {
 /// ThreadSanitizer: instrument the code in module to find races.
 struct ThreadSanitizer : public FunctionPass {
   ThreadSanitizer() : FunctionPass(ID) {}
-  const char *getPassName() const override;
+  StringRef getPassName() const override;
   void getAnalysisUsage(AnalysisUsage &AU) const override;
   bool runOnFunction(Function &F) override;
   bool doInitialization(Module &M) override;
@@ -99,12 +104,15 @@ struct ThreadSanitizer : public FunctionPass {
                                       const DataLayout &DL);
   bool addrPointsToConstantData(Value *Addr);
   int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
+  void InsertRuntimeIgnores(Function &F);
 
   Type *IntptrTy;
   IntegerType *OrdTy;
   // Callbacks to run-time library are computed in doInitialization.
   Function *TsanFuncEntry;
   Function *TsanFuncExit;
+  Function *TsanIgnoreBegin;
+  Function *TsanIgnoreEnd;
   // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
   static const size_t kNumberOfAccessSizes = 5;
   Function *TsanRead[kNumberOfAccessSizes];
@@ -135,9 +143,7 @@ INITIALIZE_PASS_END(
     "ThreadSanitizer: detects data races.",
     false, false)
 
-const char *ThreadSanitizer::getPassName() const {
-  return "ThreadSanitizer";
-}
+StringRef ThreadSanitizer::getPassName() const { return "ThreadSanitizer"; }
 
 void ThreadSanitizer::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetLibraryInfoWrapperPass>();
@@ -149,11 +155,17 @@ FunctionPass *llvm::createThreadSanitizerPass() {
 
 void ThreadSanitizer::initializeCallbacks(Module &M) {
   IRBuilder<> IRB(M.getContext());
+  AttributeSet Attr;
+  Attr = Attr.addAttribute(M.getContext(), AttributeSet::FunctionIndex, Attribute::NoUnwind);
   // Initialize the callbacks.
   TsanFuncEntry = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_func_entry", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+      "__tsan_func_entry", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
   TsanFuncExit = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__tsan_func_exit", IRB.getVoidTy(), nullptr));
+      M.getOrInsertFunction("__tsan_func_exit", Attr, IRB.getVoidTy(), nullptr));
+  TsanIgnoreBegin = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_ignore_thread_begin", Attr, IRB.getVoidTy(), nullptr));
+  TsanIgnoreEnd = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
+      "__tsan_ignore_thread_end", Attr, IRB.getVoidTy(), nullptr));
   OrdTy = IRB.getInt32Ty();
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const unsigned ByteSize = 1U << i;
@@ -162,31 +174,31 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
     std::string BitSizeStr = utostr(BitSize);
     SmallString<32> ReadName("__tsan_read" + ByteSizeStr);
     TsanRead[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        ReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+        ReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<32> WriteName("__tsan_write" + ByteSizeStr);
     TsanWrite[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        WriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+        WriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<64> UnalignedReadName("__tsan_unaligned_read" + ByteSizeStr);
     TsanUnalignedRead[i] =
         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            UnalignedReadName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+            UnalignedReadName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     SmallString<64> UnalignedWriteName("__tsan_unaligned_write" + ByteSizeStr);
     TsanUnalignedWrite[i] =
         checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-            UnalignedWriteName, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+            UnalignedWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
 
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
     TsanAtomicLoad[i] = checkSanitizerInterfaceFunction(
-        M.getOrInsertFunction(AtomicLoadName, Ty, PtrTy, OrdTy, nullptr));
+        M.getOrInsertFunction(AtomicLoadName, Attr, Ty, PtrTy, OrdTy, nullptr));
 
     SmallString<32> AtomicStoreName("__tsan_atomic" + BitSizeStr + "_store");
     TsanAtomicStore[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        AtomicStoreName, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
+        AtomicStoreName, Attr, IRB.getVoidTy(), PtrTy, Ty, OrdTy, nullptr));
 
     for (int op = AtomicRMWInst::FIRST_BINOP;
         op <= AtomicRMWInst::LAST_BINOP; ++op) {
@@ -210,32 +222,32 @@ void ThreadSanitizer::initializeCallbacks(Module &M) {
         continue;
       SmallString<32> RMWName("__tsan_atomic" + itostr(BitSize) + NamePart);
       TsanAtomicRMW[op][i] = checkSanitizerInterfaceFunction(
-          M.getOrInsertFunction(RMWName, Ty, PtrTy, Ty, OrdTy, nullptr));
+          M.getOrInsertFunction(RMWName, Attr, Ty, PtrTy, Ty, OrdTy, nullptr));
     }
 
     SmallString<32> AtomicCASName("__tsan_atomic" + BitSizeStr +
                                   "_compare_exchange_val");
     TsanAtomicCAS[i] = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-        AtomicCASName, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
+        AtomicCASName, Attr, Ty, PtrTy, Ty, Ty, OrdTy, OrdTy, nullptr));
   }
   TsanVptrUpdate = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("__tsan_vptr_update", IRB.getVoidTy(),
+      M.getOrInsertFunction("__tsan_vptr_update", Attr, IRB.getVoidTy(),
                             IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), nullptr));
   TsanVptrLoad = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_vptr_read", IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
+      "__tsan_vptr_read", Attr, IRB.getVoidTy(), IRB.getInt8PtrTy(), nullptr));
   TsanAtomicThreadFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_atomic_thread_fence", IRB.getVoidTy(), OrdTy, nullptr));
+      "__tsan_atomic_thread_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr));
   TsanAtomicSignalFence = checkSanitizerInterfaceFunction(M.getOrInsertFunction(
-      "__tsan_atomic_signal_fence", IRB.getVoidTy(), OrdTy, nullptr));
+      "__tsan_atomic_signal_fence", Attr, IRB.getVoidTy(), OrdTy, nullptr));
 
   MemmoveFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+      M.getOrInsertFunction("memmove", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
   MemcpyFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memcpy", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+      M.getOrInsertFunction("memcpy", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
                             IRB.getInt8PtrTy(), IntptrTy, nullptr));
   MemsetFn = checkSanitizerInterfaceFunction(
-      M.getOrInsertFunction("memset", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
+      M.getOrInsertFunction("memset", Attr, IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
                             IRB.getInt32Ty(), IntptrTy, nullptr));
 }
 
@@ -378,13 +390,21 @@ static bool isAtomic(Instruction *I) {
   return false;
 }
 
+void ThreadSanitizer::InsertRuntimeIgnores(Function &F) {
+  IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
+  IRB.CreateCall(TsanIgnoreBegin);
+  EscapeEnumerator EE(F, "tsan_ignore_cleanup", ClHandleCxxExceptions);
+  while (IRBuilder<> *AtExit = EE.Next()) {
+    AtExit->CreateCall(TsanIgnoreEnd);
+  }
+}
+
 bool ThreadSanitizer::runOnFunction(Function &F) {
   // This is required to prevent instrumenting call to __tsan_init from within
   // the module constructor.
   if (&F == TsanCtorFunction)
     return false;
   initializeCallbacks(*F.getParent());
-  SmallVector<Instruction*, 8> RetVec;
   SmallVector<Instruction*, 8> AllLoadsAndStores;
   SmallVector<Instruction*, 8> LocalLoadsAndStores;
   SmallVector<Instruction*, 8> AtomicAccesses;
@@ -403,8 +423,6 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
         AtomicAccesses.push_back(&Inst);
       else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
         LocalLoadsAndStores.push_back(&Inst);
-      else if (isa<ReturnInst>(Inst))
-        RetVec.push_back(&Inst);
       else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) {
         if (CallInst *CI = dyn_cast<CallInst>(&Inst))
           maybeMarkSanitizerLibraryCallNoBuiltin(CI, TLI);
@@ -440,6 +458,12 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
       Res |= instrumentMemIntrinsic(Inst);
     }
 
+  if (F.hasFnAttribute("sanitize_thread_no_checking_at_run_time")) {
+    assert(!F.hasFnAttribute(Attribute::SanitizeThread));
+    if (HasCalls)
+      InsertRuntimeIgnores(F);
+  }
+
   // Instrument function entry/exit points if there were instrumented accesses.
   if ((Res || HasCalls) && ClInstrumentFuncEntryExit) {
     IRBuilder<> IRB(F.getEntryBlock().getFirstNonPHI());
@@ -447,9 +471,10 @@ bool ThreadSanitizer::runOnFunction(Function &F) {
         Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress),
         IRB.getInt32(0));
     IRB.CreateCall(TsanFuncEntry, ReturnAddress);
-    for (auto RetInst : RetVec) {
-      IRBuilder<> IRBRet(RetInst);
-      IRBRet.CreateCall(TsanFuncExit, {});
+
+    EscapeEnumerator EE(F, "tsan_cleanup", ClHandleCxxExceptions);
+    while (IRBuilder<> *AtExit = EE.Next()) {
+      AtExit->CreateCall(TsanFuncExit, {});
     }
     Res = true;
   }
@@ -511,7 +536,7 @@ static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
   switch (ord) {
     case AtomicOrdering::NotAtomic:
       llvm_unreachable("unexpected atomic ordering!");
-    case AtomicOrdering::Unordered:              // Fall-through.
+    case AtomicOrdering::Unordered:              LLVM_FALLTHROUGH;
     case AtomicOrdering::Monotonic:              v = 0; break;
     // Not specified yet:
     // case AtomicOrdering::Consume:                v = 1; break;
@@ -551,11 +576,6 @@ bool ThreadSanitizer::instrumentMemIntrinsic(Instruction *I) {
   return false;
 }
 
-static Value *createIntOrPtrToIntCast(Value *V, Type* Ty, IRBuilder<> &IRB) {
-  return isa<PointerType>(V->getType()) ?
-    IRB.CreatePtrToInt(V, Ty) : IRB.CreateIntCast(V, Ty, false);
-}
-
 // Both llvm and ThreadSanitizer atomic operations are based on C++11/C1x
 // standards.  For background see C++11 standard.  A slightly older, publicly
 // available draft of the standard (not entirely up-to-date, but close enough
@@ -578,15 +598,9 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
                      createOrdering(&IRB, LI->getOrdering())};
     Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
-    if (Ty == OrigTy) {
-      Instruction *C = CallInst::Create(TsanAtomicLoad[Idx], Args);
-      ReplaceInstWithInst(I, C);
-    } else {
-      // We are loading a pointer, so we need to cast the return value.
-      Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args);
-      Instruction *Cast = CastInst::Create(Instruction::IntToPtr, C, OrigTy);
-      ReplaceInstWithInst(I, Cast);
-    }
+    Value *C = IRB.CreateCall(TsanAtomicLoad[Idx], Args);
+    Value *Cast = IRB.CreateBitOrPointerCast(C, OrigTy);
+    I->replaceAllUsesWith(Cast);
   } else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
     Value *Addr = SI->getPointerOperand();
     int Idx = getMemoryAccessFuncIndex(Addr, DL);
@@ -597,7 +611,7 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
-                     createIntOrPtrToIntCast(SI->getValueOperand(), Ty, IRB),
+                     IRB.CreateBitOrPointerCast(SI->getValueOperand(), Ty),
                      createOrdering(&IRB, SI->getOrdering())};
     CallInst *C = CallInst::Create(TsanAtomicStore[Idx], Args);
     ReplaceInstWithInst(I, C);
@@ -628,9 +642,9 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I, const DataLayout &DL) {
     Type *Ty = Type::getIntNTy(IRB.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     Value *CmpOperand =
-      createIntOrPtrToIntCast(CASI->getCompareOperand(), Ty, IRB);
+      IRB.CreateBitOrPointerCast(CASI->getCompareOperand(), Ty);
     Value *NewOperand =
-      createIntOrPtrToIntCast(CASI->getNewValOperand(), Ty, IRB);
+      IRB.CreateBitOrPointerCast(CASI->getNewValOperand(), Ty);
     Value *Args[] = {IRB.CreatePointerCast(Addr, PtrTy),
                      CmpOperand,
                      NewOperand,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
index d4fef10d96f0..c74827210364 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h
@@ -121,8 +121,7 @@ private:
   /// Declaration for objc_retainAutoreleaseReturnValue().
   Constant *RetainAutoreleaseRV;
 
-  Constant *getVoidRetI8XEntryPoint(Constant *&Decl,
-                                    const char *Name) {
+  Constant *getVoidRetI8XEntryPoint(Constant *&Decl, StringRef Name) {
     if (Decl)
       return Decl;
 
@@ -136,8 +135,7 @@ private:
     return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
   }
 
-  Constant *getI8XRetI8XEntryPoint(Constant *& Decl,
-                                   const char *Name,
+  Constant *getI8XRetI8XEntryPoint(Constant *&Decl, StringRef Name,
                                    bool NoUnwind = false) {
     if (Decl)
       return Decl;
@@ -155,8 +153,7 @@ private:
     return Decl = TheModule->getOrInsertFunction(Name, Fty, Attr);
   }
 
-  Constant *getI8XRetI8XXI8XEntryPoint(Constant *&Decl,
-                                       const char *Name) {
+  Constant *getI8XRetI8XXI8XEntryPoint(Constant *&Decl, StringRef Name) {
     if (Decl)
       return Decl;
 
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
index 11e2d03e17d9..23c1f5990ba5 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCContract.cpp
@@ -423,7 +423,7 @@ bool ObjCARCContract::tryToPeepholeInstruction(
       if (!optimizeRetainCall(F, Inst))
         return false;
       // If we succeed in our optimization, fall through.
-      // FALLTHROUGH
+      LLVM_FALLTHROUGH;
     case ARCInstKind::RetainRV:
     case ARCInstKind::ClaimRV: {
       // If we're compiling for a target which needs a special inline-asm
@@ -547,13 +547,13 @@ bool ObjCARCContract::runOnFunction(Function &F) {
 
     // Don't use GetArgRCIdentityRoot because we don't want to look through bitcasts
     // and such; to do the replacement, the argument must have type i8*.
-    Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
 
-    // TODO: Change this to a do-while.
-    for (;;) {
+    // Function for replacing uses of Arg dominated by Inst.
+    auto ReplaceArgUses = [Inst, this](Value *Arg) {
       // If we're compiling bugpointed code, don't get in trouble.
       if (!isa<Instruction>(Arg) && !isa<Argument>(Arg))
-        break;
+        return;
+
       // Look through the uses of the pointer.
       for (Value::use_iterator UI = Arg->use_begin(), UE = Arg->use_end();
            UI != UE; ) {
@@ -598,6 +598,15 @@ bool ObjCARCContract::runOnFunction(Function &F) {
           }
         }
       }
+    };
+
+
+    Value *Arg = cast<CallInst>(Inst)->getArgOperand(0);
+    Value *OrigArg = Arg;
+
+    // TODO: Change this to a do-while.
+    for (;;) {
+      ReplaceArgUses(Arg);
 
       // If Arg is a no-op casted pointer, strip one level of casts and iterate.
       if (const BitCastInst *BI = dyn_cast<BitCastInst>(Arg))
@@ -611,6 +620,24 @@ bool ObjCARCContract::runOnFunction(Function &F) {
       else
         break;
     }
+
+    // Replace bitcast users of Arg that are dominated by Inst.
+    SmallVector<BitCastInst *, 2> BitCastUsers;
+
+    // Add all bitcast users of the function argument first.
+    for (User *U : OrigArg->users())
+      if (auto *BC = dyn_cast<BitCastInst>(U))
+        BitCastUsers.push_back(BC);
+
+    // Replace the bitcasts with the call return. Iterate until list is empty.
+    while (!BitCastUsers.empty()) {
+      auto *BC = BitCastUsers.pop_back_val();
+      for (User *U : BC->users())
+        if (auto *B = dyn_cast<BitCastInst>(U))
+          BitCastUsers.push_back(B);
+
+      ReplaceArgUses(BC);
+    }
   }
 
   // If this function has no escaping allocas or suspicious vararg usage,
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
index a6907b56cf45..136d54a6cb75 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp
@@ -53,6 +53,11 @@ using namespace llvm::objcarc;
 /// \brief This is similar to GetRCIdentityRoot but it stops as soon
 /// as it finds a value with multiple uses.
 static const Value *FindSingleUseIdentifiedObject(const Value *Arg) {
+  // ConstantData (like ConstantPointerNull and UndefValue) is used across
+  // modules.  It's never a single-use value.
+  if (isa<ConstantData>(Arg))
+    return nullptr;
+
   if (Arg->hasOneUse()) {
     if (const BitCastInst *BC = dyn_cast<BitCastInst>(Arg))
       return FindSingleUseIdentifiedObject(BC->getOperand(0));
@@ -644,6 +649,12 @@ void ObjCARCOpt::OptimizeAutoreleaseRVCall(Function &F,
                                            ARCInstKind &Class) {
   // Check for a return of the pointer value.
   const Value *Ptr = GetArgRCIdentityRoot(AutoreleaseRV);
+
+  // If the argument is ConstantPointerNull or UndefValue, its other users
+  // aren't actually interesting to look at.
+  if (isa<ConstantData>(Ptr))
+    return;
+
   SmallVector<const Value *, 2> Users;
   Users.push_back(Ptr);
   do {
@@ -2075,12 +2086,11 @@ void ObjCARCOpt::OptimizeReturns(Function &F) {
   SmallPtrSet<const BasicBlock *, 4> Visited;
   for (BasicBlock &BB: F) {
     ReturnInst *Ret = dyn_cast<ReturnInst>(&BB.back());
-
-    DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
-
     if (!Ret)
       continue;
 
+    DEBUG(dbgs() << "Visiting: " << *Ret << "\n");
+
     const Value *Arg = GetRCIdentityRoot(Ret->getOperand(0));
 
     // Look for an ``autorelease'' instruction that is a predecessor of Ret and
diff --git a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
index df64fa32f3f8..a5afc8ad977c 100644
--- a/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
+++ b/contrib/llvm/lib/Transforms/ObjCARC/PtrState.cpp
@@ -201,7 +201,7 @@ bool BottomUpPtrState::MatchWithRetain() {
     // imprecise release, clear our reverse insertion points.
     if (OldSeq != S_Use || IsTrackingImpreciseReleases())
       ClearReverseInsertPts();
-  // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case S_CanRelease:
     return true;
   case S_None:
@@ -332,7 +332,7 @@ bool TopDownPtrState::MatchWithRelease(ARCMDKindCache &Cache,
   case S_CanRelease:
     if (OldSeq == S_Retain || ReleaseMetadata != nullptr)
       ClearReverseInsertPts();
-  // FALL THROUGH
+    LLVM_FALLTHROUGH;
   case S_Use:
     SetReleaseMetadata(ReleaseMetadata);
     SetTailCallRelease(cast<CallInst>(Release)->isTailCall());
diff --git a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
index 0eed0240c741..adc903cab31b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ADCE.cpp
@@ -15,14 +15,19 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Scalar/ADCE.h"
+
 #include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -34,9 +39,372 @@ using namespace llvm;
 #define DEBUG_TYPE "adce"
 
 STATISTIC(NumRemoved, "Number of instructions removed");
+STATISTIC(NumBranchesRemoved, "Number of branch instructions removed");
+
+// This is a tempoary option until we change the interface
+// to this pass based on optimization level.
+static cl::opt<bool> RemoveControlFlowFlag("adce-remove-control-flow",
+                                           cl::init(true), cl::Hidden);
+
+// This option enables removing of may-be-infinite loops which have no other
+// effect.
+static cl::opt<bool> RemoveLoops("adce-remove-loops", cl::init(false),
+                                 cl::Hidden);
+
+namespace {
+/// Information about Instructions
+struct InstInfoType {
+  /// True if the associated instruction is live.
+  bool Live = false;
+  /// Quick access to information for block containing associated Instruction.
+  struct BlockInfoType *Block = nullptr;
+};
+
+/// Information about basic blocks relevant to dead code elimination.
+struct BlockInfoType {
+  /// True when this block contains a live instructions.
+  bool Live = false;
+  /// True when this block ends in an unconditional branch.
+  bool UnconditionalBranch = false;
+  /// True when this block is known to have live PHI nodes.
+  bool HasLivePhiNodes = false;
+  /// Control dependence sources need to be live for this block.
+  bool CFLive = false;
+
+  /// Quick access to the LiveInfo for the terminator,
+  /// holds the value &InstInfo[Terminator]
+  InstInfoType *TerminatorLiveInfo = nullptr;
+
+  bool terminatorIsLive() const { return TerminatorLiveInfo->Live; }
+
+  /// Corresponding BasicBlock.
+  BasicBlock *BB = nullptr;
+
+  /// Cache of BB->getTerminator().
+  TerminatorInst *Terminator = nullptr;
+
+  /// Post-order numbering of reverse control flow graph.
+  unsigned PostOrder;
+};
+
+class AggressiveDeadCodeElimination {
+  Function &F;
+  PostDominatorTree &PDT;
+
+  /// Mapping of blocks to associated information, an element in BlockInfoVec.
+  DenseMap<BasicBlock *, BlockInfoType> BlockInfo;
+  bool isLive(BasicBlock *BB) { return BlockInfo[BB].Live; }
+
+  /// Mapping of instructions to associated information.
+  DenseMap<Instruction *, InstInfoType> InstInfo;
+  bool isLive(Instruction *I) { return InstInfo[I].Live; }
+
+  /// Instructions known to be live where we need to mark
+  /// reaching definitions as live.
+  SmallVector<Instruction *, 128> Worklist;
+  /// Debug info scopes around a live instruction.
+  SmallPtrSet<const Metadata *, 32> AliveScopes;
+
+  /// Set of blocks with not known to have live terminators.
+  SmallPtrSet<BasicBlock *, 16> BlocksWithDeadTerminators;
+
+  /// The set of blocks which we have determined whose control
+  /// dependence sources must be live and which have not had
+  /// those dependences analyized.
+  SmallPtrSet<BasicBlock *, 16> NewLiveBlocks;
+
+  /// Set up auxiliary data structures for Instructions and BasicBlocks and
+  /// initialize the Worklist to the set of must-be-live Instruscions.
+  void initialize();
+  /// Return true for operations which are always treated as live.
+  bool isAlwaysLive(Instruction &I);
+  /// Return true for instrumentation instructions for value profiling.
+  bool isInstrumentsConstant(Instruction &I);
+
+  /// Propagate liveness to reaching definitions.
+  void markLiveInstructions();
+  /// Mark an instruction as live.
+  void markLive(Instruction *I);
+  /// Mark a block as live.
+  void markLive(BlockInfoType &BB);
+  void markLive(BasicBlock *BB) { markLive(BlockInfo[BB]); }
+
+  /// Mark terminators of control predecessors of a PHI node live.
+  void markPhiLive(PHINode *PN);
+
+  /// Record the Debug Scopes which surround live debug information.
+  void collectLiveScopes(const DILocalScope &LS);
+  void collectLiveScopes(const DILocation &DL);
+
+  /// Analyze dead branches to find those whose branches are the sources
+  /// of control dependences impacting a live block. Those branches are
+  /// marked live.
+  void markLiveBranchesFromControlDependences();
+
+  /// Remove instructions not marked live, return if any any instruction
+  /// was removed.
+  bool removeDeadInstructions();
+
+  /// Identify connected sections of the control flow grap which have
+  /// dead terminators and rewrite the control flow graph to remove them.
+  void updateDeadRegions();
+
+  /// Set the BlockInfo::PostOrder field based on a post-order
+  /// numbering of the reverse control flow graph.
+  void computeReversePostOrder();
+
+  /// Make the terminator of this block an unconditional branch to \p Target.
+  void makeUnconditional(BasicBlock *BB, BasicBlock *Target);
+
+public:
+  AggressiveDeadCodeElimination(Function &F, PostDominatorTree &PDT)
+      : F(F), PDT(PDT) {}
+  bool performDeadCodeElimination();
+};
+}
+
+bool AggressiveDeadCodeElimination::performDeadCodeElimination() {
+  initialize();
+  markLiveInstructions();
+  return removeDeadInstructions();
+}
+
+static bool isUnconditionalBranch(TerminatorInst *Term) {
+  auto *BR = dyn_cast<BranchInst>(Term);
+  return BR && BR->isUnconditional();
+}
+
+void AggressiveDeadCodeElimination::initialize() {
+
+  auto NumBlocks = F.size();
+
+  // We will have an entry in the map for each block so we grow the
+  // structure to twice that size to keep the load factor low in the hash table.
+  BlockInfo.reserve(NumBlocks);
+  size_t NumInsts = 0;
+
+  // Iterate over blocks and initialize BlockInfoVec entries, count
+  // instructions to size the InstInfo hash table.
+  for (auto &BB : F) {
+    NumInsts += BB.size();
+    auto &Info = BlockInfo[&BB];
+    Info.BB = &BB;
+    Info.Terminator = BB.getTerminator();
+    Info.UnconditionalBranch = isUnconditionalBranch(Info.Terminator);
+  }
+
+  // Initialize instruction map and set pointers to block info.
+  InstInfo.reserve(NumInsts);
+  for (auto &BBInfo : BlockInfo)
+    for (Instruction &I : *BBInfo.second.BB)
+      InstInfo[&I].Block = &BBInfo.second;
+
+  // Since BlockInfoVec holds pointers into InstInfo and vice-versa, we may not
+  // add any more elements to either after this point.
+  for (auto &BBInfo : BlockInfo)
+    BBInfo.second.TerminatorLiveInfo = &InstInfo[BBInfo.second.Terminator];
+
+  // Collect the set of "root" instructions that are known live.
+  for (Instruction &I : instructions(F))
+    if (isAlwaysLive(I))
+      markLive(&I);
+
+  if (!RemoveControlFlowFlag)
+    return;
+
+  if (!RemoveLoops) {
+    // This stores state for the depth-first iterator. In addition
+    // to recording which nodes have been visited we also record whether
+    // a node is currently on the "stack" of active ancestors of the current
+    // node.
+    typedef DenseMap<BasicBlock *, bool>  StatusMap ;
+    class DFState : public StatusMap {
+    public:
+      std::pair<StatusMap::iterator, bool> insert(BasicBlock *BB) {
+        return StatusMap::insert(std::make_pair(BB, true));
+      }
+
+      // Invoked after we have visited all children of a node.
+      void completed(BasicBlock *BB) { (*this)[BB] = false; }
+
+      // Return true if \p BB is currently on the active stack
+      // of ancestors.
+      bool onStack(BasicBlock *BB) {
+        auto Iter = find(BB);
+        return Iter != end() && Iter->second;
+      }
+    } State;
+    
+    State.reserve(F.size());
+    // Iterate over blocks in depth-first pre-order and
+    // treat all edges to a block already seen as loop back edges
+    // and mark the branch live it if there is a back edge.
+    for (auto *BB: depth_first_ext(&F.getEntryBlock(), State)) {
+      TerminatorInst *Term = BB->getTerminator();
+      if (isLive(Term))
+        continue;
+
+      for (auto *Succ : successors(BB))
+        if (State.onStack(Succ)) {
+          // back edge....
+          markLive(Term);
+          break;
+        }
+    }
+  }
+
+  // Mark blocks live if there is no path from the block to the
+  // return of the function or a successor for which this is true.
+  // This protects IDFCalculator which cannot handle such blocks.
+  for (auto &BBInfoPair : BlockInfo) {
+    auto &BBInfo = BBInfoPair.second;
+    if (BBInfo.terminatorIsLive())
+      continue;
+    auto *BB = BBInfo.BB;
+    if (!PDT.getNode(BB)) {
+      markLive(BBInfo.Terminator);
+      continue;
+    }
+    for (auto *Succ : successors(BB))
+      if (!PDT.getNode(Succ)) {
+        markLive(BBInfo.Terminator);
+        break;
+      }
+  }
+
+  // Mark blocks live if there is no path from the block to the
+  // return of the function or a successor for which this is true.
+  // This protects IDFCalculator which cannot handle such blocks.
+  for (auto &BBInfoPair : BlockInfo) {
+    auto &BBInfo = BBInfoPair.second;
+    if (BBInfo.terminatorIsLive())
+      continue;
+    auto *BB = BBInfo.BB;
+    if (!PDT.getNode(BB)) {
+      DEBUG(dbgs() << "Not post-dominated by return: " << BB->getName()
+                   << '\n';);
+      markLive(BBInfo.Terminator);
+      continue;
+    }
+    for (auto *Succ : successors(BB))
+      if (!PDT.getNode(Succ)) {
+        DEBUG(dbgs() << "Successor not post-dominated by return: "
+                     << BB->getName() << '\n';);
+        markLive(BBInfo.Terminator);
+        break;
+      }
+  }
+
+  // Treat the entry block as always live
+  auto *BB = &F.getEntryBlock();
+  auto &EntryInfo = BlockInfo[BB];
+  EntryInfo.Live = true;
+  if (EntryInfo.UnconditionalBranch)
+    markLive(EntryInfo.Terminator);
+
+  // Build initial collection of blocks with dead terminators
+  for (auto &BBInfo : BlockInfo)
+    if (!BBInfo.second.terminatorIsLive())
+      BlocksWithDeadTerminators.insert(BBInfo.second.BB);
+}
+
+bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
+  // TODO -- use llvm::isInstructionTriviallyDead
+  if (I.isEHPad() || I.mayHaveSideEffects()) {
+    // Skip any value profile instrumentation calls if they are
+    // instrumenting constants.
+    if (isInstrumentsConstant(I))
+      return false;
+    return true;
+  }
+  if (!isa<TerminatorInst>(I))
+    return false;
+  if (RemoveControlFlowFlag && (isa<BranchInst>(I) || isa<SwitchInst>(I)))
+    return false;
+  return true;
+}
+
+// Check if this instruction is a runtime call for value profiling and
+// if it's instrumenting a constant.
+bool AggressiveDeadCodeElimination::isInstrumentsConstant(Instruction &I) {
+  // TODO -- move this test into llvm::isInstructionTriviallyDead
+  if (CallInst *CI = dyn_cast<CallInst>(&I))
+    if (Function *Callee = CI->getCalledFunction())
+      if (Callee->getName().equals(getInstrProfValueProfFuncName()))
+        if (isa<Constant>(CI->getArgOperand(0)))
+          return true;
+  return false;
+}
+
+void AggressiveDeadCodeElimination::markLiveInstructions() {
+
+  // Propagate liveness backwards to operands.
+  do {
+    // Worklist holds newly discovered live instructions
+    // where we need to mark the inputs as live.
+    while (!Worklist.empty()) {
+      Instruction *LiveInst = Worklist.pop_back_val();
+      DEBUG(dbgs() << "work live: "; LiveInst->dump(););
+
+      for (Use &OI : LiveInst->operands())
+        if (Instruction *Inst = dyn_cast<Instruction>(OI))
+          markLive(Inst);
+
+      if (auto *PN = dyn_cast<PHINode>(LiveInst))
+        markPhiLive(PN);
+    }
 
-static void collectLiveScopes(const DILocalScope &LS,
-                              SmallPtrSetImpl<const Metadata *> &AliveScopes) {
+    // After data flow liveness has been identified, examine which branch
+    // decisions are required to determine live instructions are executed.
+    markLiveBranchesFromControlDependences();
+
+  } while (!Worklist.empty());
+}
+
+void AggressiveDeadCodeElimination::markLive(Instruction *I) {
+
+  auto &Info = InstInfo[I];
+  if (Info.Live)
+    return;
+
+  DEBUG(dbgs() << "mark live: "; I->dump());
+  Info.Live = true;
+  Worklist.push_back(I);
+
+  // Collect the live debug info scopes attached to this instruction.
+  if (const DILocation *DL = I->getDebugLoc())
+    collectLiveScopes(*DL);
+
+  // Mark the containing block live
+  auto &BBInfo = *Info.Block;
+  if (BBInfo.Terminator == I) {
+    BlocksWithDeadTerminators.erase(BBInfo.BB);
+    // For live terminators, mark destination blocks
+    // live to preserve this control flow edges.
+    if (!BBInfo.UnconditionalBranch)
+      for (auto *BB : successors(I->getParent()))
+        markLive(BB);
+  }
+  markLive(BBInfo);
+}
+
+void AggressiveDeadCodeElimination::markLive(BlockInfoType &BBInfo) {
+  if (BBInfo.Live)
+    return;
+  DEBUG(dbgs() << "mark block live: " << BBInfo.BB->getName() << '\n');
+  BBInfo.Live = true;
+  if (!BBInfo.CFLive) {
+    BBInfo.CFLive = true;
+    NewLiveBlocks.insert(BBInfo.BB);
+  }
+
+  // Mark unconditional branches at the end of live
+  // blocks as live since there is no work to do for them later
+  if (BBInfo.UnconditionalBranch)
+    markLive(BBInfo.Terminator);
+}
+
+void AggressiveDeadCodeElimination::collectLiveScopes(const DILocalScope &LS) {
   if (!AliveScopes.insert(&LS).second)
     return;
 
@@ -44,75 +412,115 @@ static void collectLiveScopes(const DILocalScope &LS,
     return;
 
   // Tail-recurse through the scope chain.
-  collectLiveScopes(cast<DILocalScope>(*LS.getScope()), AliveScopes);
+  collectLiveScopes(cast<DILocalScope>(*LS.getScope()));
 }
 
-static void collectLiveScopes(const DILocation &DL,
-                              SmallPtrSetImpl<const Metadata *> &AliveScopes) {
+void AggressiveDeadCodeElimination::collectLiveScopes(const DILocation &DL) {
   // Even though DILocations are not scopes, shove them into AliveScopes so we
   // don't revisit them.
   if (!AliveScopes.insert(&DL).second)
     return;
 
   // Collect live scopes from the scope chain.
-  collectLiveScopes(*DL.getScope(), AliveScopes);
+  collectLiveScopes(*DL.getScope());
 
   // Tail-recurse through the inlined-at chain.
   if (const DILocation *IA = DL.getInlinedAt())
-    collectLiveScopes(*IA, AliveScopes);
+    collectLiveScopes(*IA);
 }
 
-// Check if this instruction is a runtime call for value profiling and
-// if it's instrumenting a constant.
-static bool isInstrumentsConstant(Instruction &I) {
-  if (CallInst *CI = dyn_cast<CallInst>(&I))
-    if (Function *Callee = CI->getCalledFunction())
-      if (Callee->getName().equals(getInstrProfValueProfFuncName()))
-        if (isa<Constant>(CI->getArgOperand(0)))
-          return true;
-  return false;
+void AggressiveDeadCodeElimination::markPhiLive(PHINode *PN) {
+  auto &Info = BlockInfo[PN->getParent()];
+  // Only need to check this once per block.
+  if (Info.HasLivePhiNodes)
+    return;
+  Info.HasLivePhiNodes = true;
+
+  // If a predecessor block is not live, mark it as control-flow live
+  // which will trigger marking live branches upon which
+  // that block is control dependent.
+  for (auto *PredBB : predecessors(Info.BB)) {
+    auto &Info = BlockInfo[PredBB];
+    if (!Info.CFLive) {
+      Info.CFLive = true;
+      NewLiveBlocks.insert(PredBB);
+    }
+  }
 }
 
-static bool aggressiveDCE(Function& F) {
-  SmallPtrSet<Instruction*, 32> Alive;
-  SmallVector<Instruction*, 128> Worklist;
+void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
 
-  // Collect the set of "root" instructions that are known live.
-  for (Instruction &I : instructions(F)) {
-    if (isa<TerminatorInst>(I) || I.isEHPad() || I.mayHaveSideEffects()) {
-      // Skip any value profile instrumentation calls if they are
-      // instrumenting constants.
-      if (isInstrumentsConstant(I))
-        continue;
-      Alive.insert(&I);
-      Worklist.push_back(&I);
-    }
+  if (BlocksWithDeadTerminators.empty())
+    return;
+
+  DEBUG({
+    dbgs() << "new live blocks:\n";
+    for (auto *BB : NewLiveBlocks)
+      dbgs() << "\t" << BB->getName() << '\n';
+    dbgs() << "dead terminator blocks:\n";
+    for (auto *BB : BlocksWithDeadTerminators)
+      dbgs() << "\t" << BB->getName() << '\n';
+  });
+
+  // The dominance frontier of a live block X in the reverse
+  // control graph is the set of blocks upon which X is control
+  // dependent. The following sequence computes the set of blocks
+  // which currently have dead terminators that are control
+  // dependence sources of a block which is in NewLiveBlocks.
+
+  SmallVector<BasicBlock *, 32> IDFBlocks;
+  ReverseIDFCalculator IDFs(PDT);
+  IDFs.setDefiningBlocks(NewLiveBlocks);
+  IDFs.setLiveInBlocks(BlocksWithDeadTerminators);
+  IDFs.calculate(IDFBlocks);
+  NewLiveBlocks.clear();
+
+  // Dead terminators which control live blocks are now marked live.
+  for (auto *BB : IDFBlocks) {
+    DEBUG(dbgs() << "live control in: " << BB->getName() << '\n');
+    markLive(BB->getTerminator());
   }
+}
 
-  // Propagate liveness backwards to operands.  Keep track of live debug info
-  // scopes.
-  SmallPtrSet<const Metadata *, 32> AliveScopes;
-  while (!Worklist.empty()) {
-    Instruction *Curr = Worklist.pop_back_val();
+//===----------------------------------------------------------------------===//
+//
+//  Routines to update the CFG and SSA information before removing dead code.
+//
+//===----------------------------------------------------------------------===//
+bool AggressiveDeadCodeElimination::removeDeadInstructions() {
 
-    // Collect the live debug info scopes attached to this instruction.
-    if (const DILocation *DL = Curr->getDebugLoc())
-      collectLiveScopes(*DL, AliveScopes);
+  // Updates control and dataflow around dead blocks
+  updateDeadRegions();
 
-    for (Use &OI : Curr->operands()) {
-      if (Instruction *Inst = dyn_cast<Instruction>(OI))
-        if (Alive.insert(Inst).second)
-          Worklist.push_back(Inst);
+  DEBUG({
+    for (Instruction &I : instructions(F)) {
+      // Check if the instruction is alive.
+      if (isLive(&I))
+        continue;
+
+      if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+        // Check if the scope of this variable location is alive.
+        if (AliveScopes.count(DII->getDebugLoc()->getScope()))
+          continue;
+
+        // If intrinsic is pointing at a live SSA value, there may be an
+        // earlier optimization bug: if we know the location of the variable,
+        // why isn't the scope of the location alive?
+        if (Value *V = DII->getVariableLocation())
+          if (Instruction *II = dyn_cast<Instruction>(V))
+            if (isLive(II))
+              dbgs() << "Dropping debug info for " << *DII << "\n";
+      }
     }
-  }
+  });
 
   // The inverse of the live set is the dead set.  These are those instructions
-  // which have no side effects and do not influence the control flow or return
+  // that have no side effects and do not influence the control flow or return
   // value of the function, and may therefore be deleted safely.
   // NOTE: We reuse the Worklist vector here for memory efficiency.
   for (Instruction &I : instructions(F)) {
     // Check if the instruction is alive.
-    if (Alive.count(&I))
+    if (isLive(&I))
       continue;
 
     if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) {
@@ -121,15 +529,6 @@ static bool aggressiveDCE(Function& F) {
         continue;
 
       // Fallthrough and drop the intrinsic.
-      DEBUG({
-        // If intrinsic is pointing at a live SSA value, there may be an
-        // earlier optimization bug: if we know the location of the variable,
-        // why isn't the scope of the location alive?
-        if (Value *V = DII->getVariableLocation())
-          if (Instruction *II = dyn_cast<Instruction>(V))
-            if (Alive.count(II))
-              dbgs() << "Dropping debug info for " << *DII << "\n";
-      });
     }
 
     // Prepare to delete.
@@ -145,8 +544,104 @@ static bool aggressiveDCE(Function& F) {
   return !Worklist.empty();
 }
 
-PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &) {
-  if (!aggressiveDCE(F))
+// A dead region is the set of dead blocks with a common live post-dominator.
+void AggressiveDeadCodeElimination::updateDeadRegions() {
+
+  DEBUG({
+    dbgs() << "final dead terminator blocks: " << '\n';
+    for (auto *BB : BlocksWithDeadTerminators)
+      dbgs() << '\t' << BB->getName()
+             << (BlockInfo[BB].Live ? " LIVE\n" : "\n");
+  });
+
+  // Don't compute the post ordering unless we needed it.
+  bool HavePostOrder = false;
+
+  for (auto *BB : BlocksWithDeadTerminators) {
+    auto &Info = BlockInfo[BB];
+    if (Info.UnconditionalBranch) {
+      InstInfo[Info.Terminator].Live = true;
+      continue;
+    }
+
+    if (!HavePostOrder) {
+      computeReversePostOrder();
+      HavePostOrder = true;
+    }
+
+    // Add an unconditional branch to the successor closest to the
+    // end of the function which insures a path to the exit for each
+    // live edge.
+    BlockInfoType *PreferredSucc = nullptr;
+    for (auto *Succ : successors(BB)) {
+      auto *Info = &BlockInfo[Succ];
+      if (!PreferredSucc || PreferredSucc->PostOrder < Info->PostOrder)
+        PreferredSucc = Info;
+    }
+    assert((PreferredSucc && PreferredSucc->PostOrder > 0) &&
+           "Failed to find safe successor for dead branc");
+    bool First = true;
+    for (auto *Succ : successors(BB)) {
+      if (!First || Succ != PreferredSucc->BB)
+        Succ->removePredecessor(BB);
+      else
+        First = false;
+    }
+    makeUnconditional(BB, PreferredSucc->BB);
+    NumBranchesRemoved += 1;
+  }
+}
+
+// reverse top-sort order
+void AggressiveDeadCodeElimination::computeReversePostOrder() {
+  
+  // This provides a post-order numbering of the reverse conrtol flow graph
+  // Note that it is incomplete in the presence of infinite loops but we don't
+  // need numbers blocks which don't reach the end of the functions since
+  // all branches in those blocks are forced live.
+  
+  // For each block without successors, extend the DFS from the bloack
+  // backward through the graph
+  SmallPtrSet<BasicBlock*, 16> Visited;
+  unsigned PostOrder = 0;
+  for (auto &BB : F) {
+    if (succ_begin(&BB) != succ_end(&BB))
+      continue;
+    for (BasicBlock *Block : inverse_post_order_ext(&BB,Visited))
+      BlockInfo[Block].PostOrder = PostOrder++;
+  }
+}
+
+void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB,
+                                                      BasicBlock *Target) {
+  TerminatorInst *PredTerm = BB->getTerminator();
+  // Collect the live debug info scopes attached to this instruction.
+  if (const DILocation *DL = PredTerm->getDebugLoc())
+    collectLiveScopes(*DL);
+
+  // Just mark live an existing unconditional branch
+  if (isUnconditionalBranch(PredTerm)) {
+    PredTerm->setSuccessor(0, Target);
+    InstInfo[PredTerm].Live = true;
+    return;
+  }
+  DEBUG(dbgs() << "making unconditional " << BB->getName() << '\n');
+  NumBranchesRemoved += 1;
+  IRBuilder<> Builder(PredTerm);
+  auto *NewTerm = Builder.CreateBr(Target);
+  InstInfo[NewTerm].Live = true;
+  if (const DILocation *DL = PredTerm->getDebugLoc())
+    NewTerm->setDebugLoc(DL);
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Pass Manager integration code
+//
+//===----------------------------------------------------------------------===//
+PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
+  auto &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
+  if (!AggressiveDeadCodeElimination(F, PDT).performDeadCodeElimination())
     return PreservedAnalyses::all();
 
   // FIXME: This should also 'preserve the CFG'.
@@ -162,21 +657,27 @@ struct ADCELegacyPass : public FunctionPass {
     initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnFunction(Function& F) override {
+  bool runOnFunction(Function &F) override {
     if (skipFunction(F))
       return false;
-    return aggressiveDCE(F);
+    auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+    return AggressiveDeadCodeElimination(F, PDT).performDeadCodeElimination();
   }
 
-  void getAnalysisUsage(AnalysisUsage& AU) const override {
-    AU.setPreservesCFG();
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    if (!RemoveControlFlowFlag)
+      AU.setPreservesCFG();
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
 };
 }
 
 char ADCELegacyPass::ID = 0;
-INITIALIZE_PASS(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
-                false, false)
+INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce",
+                      "Aggressive Dead Code Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
+                    false, false)
 
 FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 7f8b8ce91e79..c1df3173c0fc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -297,6 +297,11 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
   if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
     return false;
 
+  // Skip ConstantPointerNull and UndefValue.  Assumptions on these shouldn't
+  // affect other users.
+  if (isa<ConstantData>(AAPtr))
+    return false;
+
   const SCEV *AASCEV = SE->getSCEV(AAPtr);
 
   // Apply the assumption to all other users of the specified pointer.
@@ -434,6 +439,11 @@ AlignmentFromAssumptionsPass::run(Function &F, FunctionAnalysisManager &AM) {
   ScalarEvolution &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
   bool Changed = runImpl(F, AC, &SE, &DT);
+
+  // FIXME: We need to invalidate this to avoid PR28400. Is there a better
+  // solution?
+  AM.invalidate<ScalarEvolutionAnalysis>(F);
+
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
index 4f6225f4c7b0..251b38707769 100644
--- a/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/BDCE.cpp
@@ -39,6 +39,12 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
   SmallVector<Instruction*, 128> Worklist;
   bool Changed = false;
   for (Instruction &I : instructions(F)) {
+    // If the instruction has side effects and no non-dbg uses,
+    // skip it. This way we avoid computing known bits on an instruction
+    // that will not help us.
+    if (I.mayHaveSideEffects() && I.use_empty())
+      continue;
+
     if (I.getType()->isIntegerTy() &&
         !DB.getDemandedBits(&I).getBoolValue()) {
       // For live instructions that have all dead bits, first make them dead by
@@ -50,7 +56,7 @@ static bool bitTrackingDCE(Function &F, DemandedBits &DB) {
       // undef, poison, etc.
       Value *Zero = ConstantInt::get(I.getType(), 0);
       ++NumSimplified;
-      I.replaceAllUsesWith(Zero);
+      I.replaceNonMetadataUsesWith(Zero);
       Changed = true;
     }
     if (!DB.isInstructionDead(&I))
diff --git a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
index 913e939c2bd4..38262514c9ec 100644
--- a/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/ConstantHoisting.cpp
@@ -64,7 +64,7 @@ public:
 
   bool runOnFunction(Function &Fn) override;
 
-  const char *getPassName() const override { return "Constant Hoisting"; }
+  StringRef getPassName() const override { return "Constant Hoisting"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
@@ -444,7 +444,7 @@ void ConstantHoistingPass::findBaseConstants() {
 
 /// \brief Updates the operand at Idx in instruction Inst with the result of
 ///        instruction Mat. If the instruction is a PHI node then special
-///        handling for duplicate values form the same incomming basic block is
+///        handling for duplicate values form the same incoming basic block is
 ///        required.
 /// \return The update will always succeed, but the return value indicated if
 ///         Mat was used for the update or not.
diff --git a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index c0fed0533392..141e99b92cdc 100644
--- a/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
@@ -37,6 +38,7 @@ STATISTIC(NumCmps,      "Number of comparisons propagated");
 STATISTIC(NumReturns,   "Number of return values propagated");
 STATISTIC(NumDeadCases, "Number of switch cases removed");
 STATISTIC(NumSDivs,     "Number of sdiv converted to udiv");
+STATISTIC(NumAShrs,     "Number of ashr converted to lshr");
 STATISTIC(NumSRems,     "Number of srem converted to urem");
 
 namespace {
@@ -381,6 +383,78 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
   return true;
 }
 
+static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
+  if (SDI->getType()->isVectorTy() || hasLocalDefs(SDI))
+    return false;
+
+  Constant *Zero = ConstantInt::get(SDI->getType(), 0);
+  if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) !=
+      LazyValueInfo::True)
+    return false;
+
+  ++NumAShrs;
+  auto *BO = BinaryOperator::CreateLShr(SDI->getOperand(0), SDI->getOperand(1),
+                                        SDI->getName(), SDI);
+  BO->setIsExact(SDI->isExact());
+  SDI->replaceAllUsesWith(BO);
+  SDI->eraseFromParent();
+
+  return true;
+}
+
+static bool processAdd(BinaryOperator *AddOp, LazyValueInfo *LVI) {
+  typedef OverflowingBinaryOperator OBO;
+
+  if (AddOp->getType()->isVectorTy() || hasLocalDefs(AddOp))
+    return false;
+
+  bool NSW = AddOp->hasNoSignedWrap();
+  bool NUW = AddOp->hasNoUnsignedWrap();
+  if (NSW && NUW)
+    return false;
+
+  BasicBlock *BB = AddOp->getParent();
+
+  Value *LHS = AddOp->getOperand(0);
+  Value *RHS = AddOp->getOperand(1);
+
+  ConstantRange LRange = LVI->getConstantRange(LHS, BB, AddOp);
+
+  // Initialize RRange only if we need it. If we know that guaranteed no wrap
+  // range for the given LHS range is empty don't spend time calculating the
+  // range for the RHS.
+  Optional<ConstantRange> RRange;
+  auto LazyRRange = [&] () {
+      if (!RRange)
+        RRange = LVI->getConstantRange(RHS, BB, AddOp);
+      return RRange.getValue();
+  };
+
+  bool Changed = false;
+  if (!NUW) {
+    ConstantRange NUWRange =
+            LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange,
+                                              OBO::NoUnsignedWrap);
+    if (!NUWRange.isEmptySet()) {
+      bool NewNUW = NUWRange.contains(LazyRRange());
+      AddOp->setHasNoUnsignedWrap(NewNUW);
+      Changed |= NewNUW;
+    }
+  }
+  if (!NSW) {
+    ConstantRange NSWRange =
+            LRange.makeGuaranteedNoWrapRegion(BinaryOperator::Add, LRange,
+                                              OBO::NoSignedWrap);
+    if (!NSWRange.isEmptySet()) {
+      bool NewNSW = NSWRange.contains(LazyRRange());
+      AddOp->setHasNoSignedWrap(NewNSW);
+      Changed |= NewNSW;
+    }
+  }
+
+  return Changed;
+}
+
 static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
   if (Constant *C = LVI->getConstant(V, At->getParent(), At))
     return C;
@@ -407,9 +481,14 @@ static Constant *getConstantAt(Value *V, Instruction *At, LazyValueInfo *LVI) {
 static bool runImpl(Function &F, LazyValueInfo *LVI) {
   bool FnChanged = false;
 
-  for (BasicBlock &BB : F) {
+  // Visiting in a pre-order depth-first traversal causes us to simplify early
+  // blocks before querying later blocks (which require us to analyze early
+  // blocks).  Eagerly simplifying shallow blocks means there is strictly less
+  // work to do for deep blocks.  This also means we don't visit unreachable
+  // blocks. 
+  for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
     bool BBChanged = false;
-    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
       Instruction *II = &*BI++;
       switch (II->getOpcode()) {
       case Instruction::Select:
@@ -436,10 +515,16 @@ static bool runImpl(Function &F, LazyValueInfo *LVI) {
       case Instruction::SDiv:
         BBChanged |= processSDiv(cast<BinaryOperator>(II), LVI);
         break;
+      case Instruction::AShr:
+        BBChanged |= processAShr(cast<BinaryOperator>(II), LVI);
+        break;
+      case Instruction::Add:
+        BBChanged |= processAdd(cast<BinaryOperator>(II), LVI);
+        break;
       }
     }
 
-    Instruction *Term = BB.getTerminator();
+    Instruction *Term = BB->getTerminator();
     switch (Term->getOpcode()) {
     case Instruction::Switch:
       BBChanged |= processSwitch(cast<SwitchInst>(Term), LVI);
diff --git a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
index f73809d9f045..cc2a3cfaf9d1 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DCE.cpp
@@ -123,7 +123,7 @@ static bool eliminateDeadCode(Function &F, TargetLibraryInfo *TLI) {
   return MadeChange;
 }
 
-PreservedAnalyses DCEPass::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses DCEPass::run(Function &F, FunctionAnalysisManager &AM) {
   if (eliminateDeadCode(F, AM.getCachedResult<TargetLibraryAnalysis>(F)))
     return PreservedAnalyses::none();
   return PreservedAnalyses::all();
diff --git a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index ed58a87ae1a8..4d4c3baef3f5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -59,6 +59,8 @@ EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
 //===----------------------------------------------------------------------===//
 // Helper functions
 //===----------------------------------------------------------------------===//
+typedef std::map<int64_t, int64_t> OverlapIntervalsTy;
+typedef DenseMap<Instruction *, OverlapIntervalsTy> InstOverlapIntervalsTy;
 
 /// Delete this instruction.  Before we do, go through and zero out all the
 /// operands of this instruction.  If any of them become dead, delete them and
@@ -67,6 +69,8 @@ EnablePartialOverwriteTracking("enable-dse-partial-overwrite-tracking",
 static void
 deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
                       MemoryDependenceResults &MD, const TargetLibraryInfo &TLI,
+                      InstOverlapIntervalsTy &IOL,
+                      DenseMap<Instruction*, size_t> *InstrOrdering,
                       SmallSetVector<Value *, 16> *ValueSet = nullptr) {
   SmallVector<Instruction*, 32> NowDeadInsts;
 
@@ -99,13 +103,14 @@ deleteDeadInstruction(Instruction *I, BasicBlock::iterator *BBI,
           NowDeadInsts.push_back(OpI);
     }
 
+    if (ValueSet) ValueSet->remove(DeadInst);
+    InstrOrdering->erase(DeadInst);
+    IOL.erase(DeadInst);
 
     if (NewIter == DeadInst->getIterator())
       NewIter = DeadInst->eraseFromParent();
     else
       DeadInst->eraseFromParent();
-
-    if (ValueSet) ValueSet->remove(DeadInst);
   } while (!NowDeadInsts.empty());
   *BBI = NewIter;
 }
@@ -290,9 +295,6 @@ enum OverwriteResult {
 };
 }
 
-typedef DenseMap<Instruction *,
-                 std::map<int64_t, int64_t>> InstOverlapIntervalsTy;
-
 /// Return 'OverwriteComplete' if a store to the 'Later' location completely
 /// overwrites a store to the 'Earlier' location, 'OverwriteEnd' if the end of
 /// the 'Earlier' location is completely overwritten by 'Later',
@@ -438,9 +440,9 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   //
   // In this case we may want to trim the size of earlier to avoid generating
   // writes to addresses which will definitely be overwritten later
-  if (LaterOff > EarlierOff &&
-      LaterOff < int64_t(EarlierOff + Earlier.Size) &&
-      int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size))
+  if (!EnablePartialOverwriteTracking &&
+      (LaterOff > EarlierOff && LaterOff < int64_t(EarlierOff + Earlier.Size) &&
+       int64_t(LaterOff + Later.Size) >= int64_t(EarlierOff + Earlier.Size)))
     return OverwriteEnd;
 
   // Finally, we also need to check if the later store overwrites the beginning
@@ -452,9 +454,11 @@ static OverwriteResult isOverwrite(const MemoryLocation &Later,
   // In this case we may want to move the destination address and trim the size
   // of earlier to avoid generating writes to addresses which will definitely
   // be overwritten later.
-  if (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff) {
-    assert (int64_t(LaterOff + Later.Size) < int64_t(EarlierOff + Earlier.Size)
-            && "Expect to be handled as OverwriteComplete" );
+  if (!EnablePartialOverwriteTracking &&
+      (LaterOff <= EarlierOff && int64_t(LaterOff + Later.Size) > EarlierOff)) {
+    assert(int64_t(LaterOff + Later.Size) <
+               int64_t(EarlierOff + Earlier.Size) &&
+           "Expect to be handled as OverwriteComplete");
     return OverwriteBegin;
   }
   // Otherwise, they don't completely overlap.
@@ -505,7 +509,6 @@ static bool isPossibleSelfRead(Instruction *Inst,
   return true;
 }
 
-
 /// Returns true if the memory which is accessed by the second instruction is not
 /// modified between the first and the second instruction.
 /// Precondition: Second instruction must be dominated by the first
@@ -585,7 +588,9 @@ static void findUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
 /// to a field of that structure.
 static bool handleFree(CallInst *F, AliasAnalysis *AA,
                        MemoryDependenceResults *MD, DominatorTree *DT,
-                       const TargetLibraryInfo *TLI) {
+                       const TargetLibraryInfo *TLI,
+                       InstOverlapIntervalsTy &IOL,
+                       DenseMap<Instruction*, size_t> *InstrOrdering) {
   bool MadeChange = false;
 
   MemoryLocation Loc = MemoryLocation(F->getOperand(0));
@@ -612,9 +617,12 @@ static bool handleFree(CallInst *F, AliasAnalysis *AA,
       if (!AA->isMustAlias(F->getArgOperand(0), DepPointer))
         break;
 
+      DEBUG(dbgs() << "DSE: Dead Store to soon to be freed memory:\n  DEAD: "
+                   << *Dependency << '\n');
+
       // DCE instructions only used to calculate that store.
       BasicBlock::iterator BBI(Dependency);
-      deleteDeadInstruction(Dependency, &BBI, *MD, *TLI);
+      deleteDeadInstruction(Dependency, &BBI, *MD, *TLI, IOL, InstrOrdering);
       ++NumFastStores;
       MadeChange = true;
 
@@ -669,7 +677,9 @@ static void removeAccessedObjects(const MemoryLocation &LoadedLoc,
 /// ret void
 static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
                              MemoryDependenceResults *MD,
-                             const TargetLibraryInfo *TLI) {
+                             const TargetLibraryInfo *TLI,
+                             InstOverlapIntervalsTy &IOL,
+                             DenseMap<Instruction*, size_t> *InstrOrdering) {
   bool MadeChange = false;
 
   // Keep track of all of the stack objects that are dead at the end of the
@@ -728,7 +738,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
               dbgs() << '\n');
 
         // DCE instructions only used to calculate that store.
-        deleteDeadInstruction(Dead, &BBI, *MD, *TLI, &DeadStackObjects);
+        deleteDeadInstruction(Dead, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
         ++NumFastStores;
         MadeChange = true;
         continue;
@@ -737,7 +747,9 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
 
     // Remove any dead non-memory-mutating instructions.
     if (isInstructionTriviallyDead(&*BBI, TLI)) {
-      deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, &DeadStackObjects);
+      DEBUG(dbgs() << "DSE: Removing trivially dead instruction:\n  DEAD: "
+                   << *&*BBI << '\n');
+      deleteDeadInstruction(&*BBI, &BBI, *MD, *TLI, IOL, InstrOrdering, &DeadStackObjects);
       ++NumFastOther;
       MadeChange = true;
       continue;
@@ -819,10 +831,125 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
   return MadeChange;
 }
 
+static bool tryToShorten(Instruction *EarlierWrite, int64_t &EarlierOffset,
+                         int64_t &EarlierSize, int64_t LaterOffset,
+                         int64_t LaterSize, bool IsOverwriteEnd) {
+  // TODO: base this on the target vector size so that if the earlier
+  // store was too small to get vector writes anyway then its likely
+  // a good idea to shorten it
+  // Power of 2 vector writes are probably always a bad idea to optimize
+  // as any store/memset/memcpy is likely using vector instructions so
+  // shortening it to not vector size is likely to be slower
+  MemIntrinsic *EarlierIntrinsic = cast<MemIntrinsic>(EarlierWrite);
+  unsigned EarlierWriteAlign = EarlierIntrinsic->getAlignment();
+  if (!IsOverwriteEnd)
+    LaterOffset = int64_t(LaterOffset + LaterSize);
+
+  if (!(llvm::isPowerOf2_64(LaterOffset) && EarlierWriteAlign <= LaterOffset) &&
+      !((EarlierWriteAlign != 0) && LaterOffset % EarlierWriteAlign == 0))
+    return false;
+
+  DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
+               << (IsOverwriteEnd ? "END" : "BEGIN") << ": " << *EarlierWrite
+               << "\n  KILLER (offset " << LaterOffset << ", " << EarlierSize
+               << ")\n");
+
+  int64_t NewLength = IsOverwriteEnd
+                          ? LaterOffset - EarlierOffset
+                          : EarlierSize - (LaterOffset - EarlierOffset);
+
+  Value *EarlierWriteLength = EarlierIntrinsic->getLength();
+  Value *TrimmedLength =
+      ConstantInt::get(EarlierWriteLength->getType(), NewLength);
+  EarlierIntrinsic->setLength(TrimmedLength);
+
+  EarlierSize = NewLength;
+  if (!IsOverwriteEnd) {
+    int64_t OffsetMoved = (LaterOffset - EarlierOffset);
+    Value *Indices[1] = {
+        ConstantInt::get(EarlierWriteLength->getType(), OffsetMoved)};
+    GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
+        EarlierIntrinsic->getRawDest(), Indices, "", EarlierWrite);
+    EarlierIntrinsic->setDest(NewDestGEP);
+    EarlierOffset = EarlierOffset + OffsetMoved;
+  }
+  return true;
+}
+
+static bool tryToShortenEnd(Instruction *EarlierWrite,
+                            OverlapIntervalsTy &IntervalMap,
+                            int64_t &EarlierStart, int64_t &EarlierSize) {
+  if (IntervalMap.empty() || !isShortenableAtTheEnd(EarlierWrite))
+    return false;
+
+  OverlapIntervalsTy::iterator OII = --IntervalMap.end();
+  int64_t LaterStart = OII->second;
+  int64_t LaterSize = OII->first - LaterStart;
+
+  if (LaterStart > EarlierStart && LaterStart < EarlierStart + EarlierSize &&
+      LaterStart + LaterSize >= EarlierStart + EarlierSize) {
+    if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
+                     LaterSize, true)) {
+      IntervalMap.erase(OII);
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool tryToShortenBegin(Instruction *EarlierWrite,
+                              OverlapIntervalsTy &IntervalMap,
+                              int64_t &EarlierStart, int64_t &EarlierSize) {
+  if (IntervalMap.empty() || !isShortenableAtTheBeginning(EarlierWrite))
+    return false;
+
+  OverlapIntervalsTy::iterator OII = IntervalMap.begin();
+  int64_t LaterStart = OII->second;
+  int64_t LaterSize = OII->first - LaterStart;
+
+  if (LaterStart <= EarlierStart && LaterStart + LaterSize > EarlierStart) {
+    assert(LaterStart + LaterSize < EarlierStart + EarlierSize &&
+           "Should have been handled as OverwriteComplete");
+    if (tryToShorten(EarlierWrite, EarlierStart, EarlierSize, LaterStart,
+                     LaterSize, false)) {
+      IntervalMap.erase(OII);
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool removePartiallyOverlappedStores(AliasAnalysis *AA,
+                                            const DataLayout &DL,
+                                            InstOverlapIntervalsTy &IOL) {
+  bool Changed = false;
+  for (auto OI : IOL) {
+    Instruction *EarlierWrite = OI.first;
+    MemoryLocation Loc = getLocForWrite(EarlierWrite, *AA);
+    assert(isRemovable(EarlierWrite) && "Expect only removable instruction");
+    assert(Loc.Size != MemoryLocation::UnknownSize && "Unexpected mem loc");
+
+    const Value *Ptr = Loc.Ptr->stripPointerCasts();
+    int64_t EarlierStart = 0;
+    int64_t EarlierSize = int64_t(Loc.Size);
+    GetPointerBaseWithConstantOffset(Ptr, EarlierStart, DL);
+    OverlapIntervalsTy &IntervalMap = OI.second;
+    Changed |=
+        tryToShortenEnd(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
+    if (IntervalMap.empty())
+      continue;
+    Changed |=
+        tryToShortenBegin(EarlierWrite, IntervalMap, EarlierStart, EarlierSize);
+  }
+  return Changed;
+}
+
 static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
                                AliasAnalysis *AA, MemoryDependenceResults *MD,
                                const DataLayout &DL,
-                               const TargetLibraryInfo *TLI) {
+                               const TargetLibraryInfo *TLI,
+                               InstOverlapIntervalsTy &IOL,
+                               DenseMap<Instruction*, size_t> *InstrOrdering) {
   // Must be a store instruction.
   StoreInst *SI = dyn_cast<StoreInst>(Inst);
   if (!SI)
@@ -837,7 +964,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
       DEBUG(dbgs() << "DSE: Remove Store Of Load from same pointer:\n  LOAD: "
                    << *DepLoad << "\n  STORE: " << *SI << '\n');
 
-      deleteDeadInstruction(SI, &BBI, *MD, *TLI);
+      deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
       ++NumRedundantStores;
       return true;
     }
@@ -855,7 +982,7 @@ static bool eliminateNoopStore(Instruction *Inst, BasicBlock::iterator &BBI,
           dbgs() << "DSE: Remove null store to the calloc'ed object:\n  DEAD: "
                  << *Inst << "\n  OBJECT: " << *UnderlyingPointer << '\n');
 
-      deleteDeadInstruction(SI, &BBI, *MD, *TLI);
+      deleteDeadInstruction(SI, &BBI, *MD, *TLI, IOL, InstrOrdering);
       ++NumRedundantStores;
       return true;
     }
@@ -869,6 +996,12 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
   const DataLayout &DL = BB.getModule()->getDataLayout();
   bool MadeChange = false;
 
+  // FIXME: Maybe change this to use some abstraction like OrderedBasicBlock?
+  // The current OrderedBasicBlock can't deal with mutation at the moment.
+  size_t LastThrowingInstIndex = 0;
+  DenseMap<Instruction*, size_t> InstrOrdering;
+  size_t InstrIndex = 1;
+
   // A map of interval maps representing partially-overwritten value parts.
   InstOverlapIntervalsTy IOL;
 
@@ -876,7 +1009,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
   for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) {
     // Handle 'free' calls specially.
     if (CallInst *F = isFreeCall(&*BBI, TLI)) {
-      MadeChange |= handleFree(F, AA, MD, DT, TLI);
+      MadeChange |= handleFree(F, AA, MD, DT, TLI, IOL, &InstrOrdering);
       // Increment BBI after handleFree has potentially deleted instructions.
       // This ensures we maintain a valid iterator.
       ++BBI;
@@ -885,12 +1018,19 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
 
     Instruction *Inst = &*BBI++;
 
+    size_t CurInstNumber = InstrIndex++;
+    InstrOrdering.insert(std::make_pair(Inst, CurInstNumber));
+    if (Inst->mayThrow()) {
+      LastThrowingInstIndex = CurInstNumber;
+      continue;
+    }
+
     // Check to see if Inst writes to memory.  If not, continue.
     if (!hasMemoryWrite(Inst, *TLI))
       continue;
 
     // eliminateNoopStore will update in iterator, if necessary.
-    if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI)) {
+    if (eliminateNoopStore(Inst, BBI, AA, MD, DL, TLI, IOL, &InstrOrdering)) {
       MadeChange = true;
       continue;
     }
@@ -910,6 +1050,13 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
     if (!Loc.Ptr)
       continue;
 
+    // Loop until we find a store we can eliminate or a load that
+    // invalidates the analysis. Without an upper bound on the number of
+    // instructions examined, this analysis can become very time-consuming.
+    // However, the potential gain diminishes as we process more instructions
+    // without eliminating any of them. Therefore, we limit the number of
+    // instructions we look at.
+    auto Limit = MD->getDefaultBlockScanLimit();
     while (InstDep.isDef() || InstDep.isClobber()) {
       // Get the memory clobbered by the instruction we depend on.  MemDep will
       // skip any instructions that 'Loc' clearly doesn't interact with.  If we
@@ -924,6 +1071,31 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
       if (!DepLoc.Ptr)
         break;
 
+      // Make sure we don't look past a call which might throw. This is an
+      // issue because MemoryDependenceAnalysis works in the wrong direction:
+      // it finds instructions which dominate the current instruction, rather than
+      // instructions which are post-dominated by the current instruction.
+      //
+      // If the underlying object is a non-escaping memory allocation, any store
+      // to it is dead along the unwind edge. Otherwise, we need to preserve
+      // the store.
+      size_t DepIndex = InstrOrdering.lookup(DepWrite);
+      assert(DepIndex && "Unexpected instruction");
+      if (DepIndex <= LastThrowingInstIndex) {
+        const Value* Underlying = GetUnderlyingObject(DepLoc.Ptr, DL);
+        bool IsStoreDeadOnUnwind = isa<AllocaInst>(Underlying);
+        if (!IsStoreDeadOnUnwind) {
+            // We're looking for a call to an allocation function
+            // where the allocation doesn't escape before the last
+            // throwing instruction; PointerMayBeCaptured
+            // reasonably fast approximation.
+            IsStoreDeadOnUnwind = isAllocLikeFn(Underlying, TLI) &&
+                !PointerMayBeCaptured(Underlying, false, true);
+        }
+        if (!IsStoreDeadOnUnwind)
+          break;
+      }
+
       // If we find a write that is a) removable (i.e., non-volatile), b) is
       // completely obliterated by the store to 'Loc', and c) which we know that
       // 'Inst' doesn't load from, then we can remove it.
@@ -938,7 +1110,7 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
                 << *DepWrite << "\n  KILLER: " << *Inst << '\n');
 
           // Delete the store and now-dead instructions that feed it.
-          deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI);
+          deleteDeadInstruction(DepWrite, &BBI, *MD, *TLI, IOL, &InstrOrdering);
           ++NumFastStores;
           MadeChange = true;
 
@@ -948,48 +1120,14 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
         } else if ((OR == OverwriteEnd && isShortenableAtTheEnd(DepWrite)) ||
                    ((OR == OverwriteBegin &&
                      isShortenableAtTheBeginning(DepWrite)))) {
-          // TODO: base this on the target vector size so that if the earlier
-          // store was too small to get vector writes anyway then its likely
-          // a good idea to shorten it
-          // Power of 2 vector writes are probably always a bad idea to optimize
-          // as any store/memset/memcpy is likely using vector instructions so
-          // shortening it to not vector size is likely to be slower
-          MemIntrinsic *DepIntrinsic = cast<MemIntrinsic>(DepWrite);
-          unsigned DepWriteAlign = DepIntrinsic->getAlignment();
+          assert(!EnablePartialOverwriteTracking && "Do not expect to perform "
+                                                    "when partial-overwrite "
+                                                    "tracking is enabled");
+          int64_t EarlierSize = DepLoc.Size;
+          int64_t LaterSize = Loc.Size;
           bool IsOverwriteEnd = (OR == OverwriteEnd);
-          if (!IsOverwriteEnd)
-            InstWriteOffset = int64_t(InstWriteOffset + Loc.Size);
-
-          if ((llvm::isPowerOf2_64(InstWriteOffset) &&
-               DepWriteAlign <= InstWriteOffset) ||
-              ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) {
-
-            DEBUG(dbgs() << "DSE: Remove Dead Store:\n  OW "
-                         << (IsOverwriteEnd ? "END" : "BEGIN") << ": "
-                         << *DepWrite << "\n  KILLER (offset "
-                         << InstWriteOffset << ", " << DepLoc.Size << ")"
-                         << *Inst << '\n');
-
-            int64_t NewLength =
-                IsOverwriteEnd
-                    ? InstWriteOffset - DepWriteOffset
-                    : DepLoc.Size - (InstWriteOffset - DepWriteOffset);
-
-            Value *DepWriteLength = DepIntrinsic->getLength();
-            Value *TrimmedLength =
-                ConstantInt::get(DepWriteLength->getType(), NewLength);
-            DepIntrinsic->setLength(TrimmedLength);
-
-            if (!IsOverwriteEnd) {
-              int64_t OffsetMoved = (InstWriteOffset - DepWriteOffset);
-              Value *Indices[1] = {
-                  ConstantInt::get(DepWriteLength->getType(), OffsetMoved)};
-              GetElementPtrInst *NewDestGEP = GetElementPtrInst::CreateInBounds(
-                  DepIntrinsic->getRawDest(), Indices, "", DepWrite);
-              DepIntrinsic->setDest(NewDestGEP);
-            }
-            MadeChange = true;
-          }
+          MadeChange |= tryToShorten(DepWrite, DepWriteOffset, EarlierSize,
+                                    InstWriteOffset, LaterSize, IsOverwriteEnd);
         }
       }
 
@@ -1007,15 +1145,19 @@ static bool eliminateDeadStores(BasicBlock &BB, AliasAnalysis *AA,
       if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref)
         break;
 
-      InstDep = MD->getPointerDependencyFrom(Loc, false,
-                                             DepWrite->getIterator(), &BB);
+      InstDep = MD->getPointerDependencyFrom(Loc, /*isLoad=*/ false,
+                                             DepWrite->getIterator(), &BB,
+                                             /*QueryInst=*/ nullptr, &Limit);
     }
   }
 
+  if (EnablePartialOverwriteTracking)
+    MadeChange |= removePartiallyOverlappedStores(AA, DL, IOL);
+
   // If this block ends in a return, unwind, or unreachable, all allocas are
   // dead at its end, which means stores to them are also dead.
   if (BB.getTerminator()->getNumSuccessors() == 0)
-    MadeChange |= handleEndBlock(BB, AA, MD, TLI);
+    MadeChange |= handleEndBlock(BB, AA, MD, TLI, IOL, &InstrOrdering);
 
   return MadeChange;
 }
@@ -1029,6 +1171,7 @@ static bool eliminateDeadStores(Function &F, AliasAnalysis *AA,
     // cycles that will confuse alias analysis.
     if (DT->isReachableFromEntry(&BB))
       MadeChange |= eliminateDeadStores(BB, AA, MD, DT, TLI);
+
   return MadeChange;
 }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
index 0b16e2703dc4..9bf638dcbae3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -32,6 +32,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
 #include <deque>
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -251,6 +252,7 @@ public:
   const TargetTransformInfo &TTI;
   DominatorTree &DT;
   AssumptionCache &AC;
+  MemorySSA *MSSA;
   typedef RecyclingAllocator<
       BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
   typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
@@ -312,8 +314,8 @@ public:
 
   /// \brief Set up the EarlyCSE runner for a particular function.
   EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
-           DominatorTree &DT, AssumptionCache &AC)
-      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
+           DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
+      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
 
   bool run();
 
@@ -338,7 +340,7 @@ private:
   };
 
   // Contains all the needed information to create a stack for doing a depth
-  // first tranversal of the tree. This includes scopes for values, loads, and
+  // first traversal of the tree. This includes scopes for values, loads, and
   // calls as well as the generation. There is a child iterator so that the
   // children do not need to be store separately.
   class StackNode {
@@ -487,9 +489,85 @@ private:
     return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
                                                  ExpectedType);
   }
+
+  bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
+                           Instruction *EarlierInst, Instruction *LaterInst);
+
+  void removeMSSA(Instruction *Inst) {
+    if (!MSSA)
+      return;
+    // Removing a store here can leave MemorySSA in an unoptimized state by
+    // creating MemoryPhis that have identical arguments and by creating
+    // MemoryUses whose defining access is not an actual clobber.  We handle the
+    // phi case eagerly here.  The non-optimized MemoryUse case is lazily
+    // updated by MemorySSA getClobberingMemoryAccess.
+    if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst)) {
+      // Optimize MemoryPhi nodes that may become redundant by having all the
+      // same input values once MA is removed.
+      SmallVector<MemoryPhi *, 4> PhisToCheck;
+      SmallVector<MemoryAccess *, 8> WorkQueue;
+      WorkQueue.push_back(MA);
+      // Process MemoryPhi nodes in FIFO order using a ever-growing vector since
+      // we shouldn't be processing that many phis and this will avoid an
+      // allocation in almost all cases.
+      for (unsigned I = 0; I < WorkQueue.size(); ++I) {
+        MemoryAccess *WI = WorkQueue[I];
+
+        for (auto *U : WI->users())
+          if (MemoryPhi *MP = dyn_cast<MemoryPhi>(U))
+            PhisToCheck.push_back(MP);
+
+        MSSA->removeMemoryAccess(WI);
+
+        for (MemoryPhi *MP : PhisToCheck) {
+          MemoryAccess *FirstIn = MP->getIncomingValue(0);
+          if (all_of(MP->incoming_values(),
+                     [=](Use &In) { return In == FirstIn; }))
+            WorkQueue.push_back(MP);
+        }
+        PhisToCheck.clear();
+      }
+    }
+  }
 };
 }
 
+/// Determine if the memory referenced by LaterInst is from the same heap
+/// version as EarlierInst.
+/// This is currently called in two scenarios:
+///
+///   load p
+///   ...
+///   load p
+///
+/// and
+///
+///   x = load p
+///   ...
+///   store x, p
+///
+/// in both cases we want to verify that there are no possible writes to the
+/// memory referenced by p between the earlier and later instruction.
+bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
+                                   unsigned LaterGeneration,
+                                   Instruction *EarlierInst,
+                                   Instruction *LaterInst) {
+  // Check the simple memory generation tracking first.
+  if (EarlierGeneration == LaterGeneration)
+    return true;
+
+  if (!MSSA)
+    return false;
+
+  // Since we know LaterDef dominates LaterInst and EarlierInst dominates
+  // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
+  // EarlierInst and LaterInst and neither can any other write that potentially
+  // clobbers LaterInst.
+  MemoryAccess *LaterDef =
+      MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+  return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
+}
+
 bool EarlyCSE::processNode(DomTreeNode *Node) {
   bool Changed = false;
   BasicBlock *BB = Node->getBlock();
@@ -547,6 +625,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
     // Dead instructions should just be removed.
     if (isInstructionTriviallyDead(Inst, &TLI)) {
       DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+      removeMSSA(Inst);
       Inst->eraseFromParent();
       Changed = true;
       ++NumSimplify;
@@ -562,6 +641,19 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       continue;
     }
 
+    // Skip invariant.start intrinsics since they only read memory, and we can
+    // forward values across it. Also, we dont need to consume the last store
+    // since the semantics of invariant.start allow us to perform DSE of the
+    // last store, if there was a store following invariant.start. Consider:
+    //
+    // store 30, i8* p
+    // invariant.start(p)
+    // store 40, i8* p
+    // We can DSE the store to 30, since the store 40 to invariant location p
+    // causes undefined behaviour.
+    if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>()))
+      continue;
+
     if (match(Inst, m_Intrinsic<Intrinsic::experimental_guard>())) {
       if (auto *CondI =
               dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
@@ -588,6 +680,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         Changed = true;
       }
       if (isInstructionTriviallyDead(Inst, &TLI)) {
+        removeMSSA(Inst);
         Inst->eraseFromParent();
         Changed = true;
         Killed = true;
@@ -606,6 +699,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
         if (auto *I = dyn_cast<Instruction>(V))
           I->andIRFlags(Inst);
         Inst->replaceAllUsesWith(V);
+        removeMSSA(Inst);
         Inst->eraseFromParent();
         Changed = true;
         ++NumCSE;
@@ -631,24 +725,26 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       // generation or the load is known to be from an invariant location,
       // replace this instruction.
       //
-      // A dominating invariant load implies that the location loaded from is
-      // unchanging beginning at the point of the invariant load, so the load
-      // we're CSE'ing _away_ does not need to be invariant, only the available
-      // load we're CSE'ing _to_ does.
+      // If either the dominating load or the current load are invariant, then
+      // we can assume the current load loads the same value as the dominating
+      // load.
       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
       if (InVal.DefInst != nullptr &&
-          (InVal.Generation == CurrentGeneration || InVal.IsInvariant) &&
           InVal.MatchingId == MemInst.getMatchingId() &&
           // We don't yet handle removing loads with ordering of any kind.
           !MemInst.isVolatile() && MemInst.isUnordered() &&
           // We can't replace an atomic load with one which isn't also atomic.
-          InVal.IsAtomic >= MemInst.isAtomic()) {
+          InVal.IsAtomic >= MemInst.isAtomic() &&
+          (InVal.IsInvariant || MemInst.isInvariantLoad() ||
+           isSameMemGeneration(InVal.Generation, CurrentGeneration,
+                               InVal.DefInst, Inst))) {
         Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
         if (Op != nullptr) {
           DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
                        << "  to: " << *InVal.DefInst << '\n');
           if (!Inst->use_empty())
             Inst->replaceAllUsesWith(Op);
+          removeMSSA(Inst);
           Inst->eraseFromParent();
           Changed = true;
           ++NumCSELoad;
@@ -679,11 +775,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       // If we have an available version of this call, and if it is the right
       // generation, replace this instruction.
       std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
-      if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
+      if (InVal.first != nullptr &&
+          isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
+                              Inst)) {
         DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
                      << "  to: " << *InVal.first << '\n');
         if (!Inst->use_empty())
           Inst->replaceAllUsesWith(InVal.first);
+        removeMSSA(Inst);
         Inst->eraseFromParent();
         Changed = true;
         ++NumCSECall;
@@ -716,15 +815,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
       LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
       if (InVal.DefInst &&
           InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
-          InVal.Generation == CurrentGeneration &&
           InVal.MatchingId == MemInst.getMatchingId() &&
           // We don't yet handle removing stores with ordering of any kind.
-          !MemInst.isVolatile() && MemInst.isUnordered()) {
+          !MemInst.isVolatile() && MemInst.isUnordered() &&
+          isSameMemGeneration(InVal.Generation, CurrentGeneration,
+                              InVal.DefInst, Inst)) {
+        // It is okay to have a LastStore to a different pointer here if MemorySSA
+        // tells us that the load and store are from the same memory generation.
+        // In that case, LastStore should keep its present value since we're
+        // removing the current store.
         assert((!LastStore ||
                 ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
-                MemInst.getPointerOperand()) &&
-               "can't have an intervening store!");
+                    MemInst.getPointerOperand() ||
+                MSSA) &&
+               "can't have an intervening store if not using MemorySSA!");
         DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+        removeMSSA(Inst);
         Inst->eraseFromParent();
         Changed = true;
         ++NumDSE;
@@ -756,6 +862,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
           if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
             DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
                          << "  due to: " << *Inst << '\n');
+            removeMSSA(LastStore);
             LastStore->eraseFromParent();
             Changed = true;
             ++NumDSE;
@@ -847,13 +954,15 @@ bool EarlyCSE::run() {
 }
 
 PreservedAnalyses EarlyCSEPass::run(Function &F,
-                                    AnalysisManager<Function> &AM) {
+                                    FunctionAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
+  auto *MSSA =
+      UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
 
-  EarlyCSE CSE(TLI, TTI, DT, AC);
+  EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
 
   if (!CSE.run())
     return PreservedAnalyses::all();
@@ -863,6 +972,8 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
   PreservedAnalyses PA;
   PA.preserve<DominatorTreeAnalysis>();
   PA.preserve<GlobalsAA>();
+  if (UseMemorySSA)
+    PA.preserve<MemorySSAAnalysis>();
   return PA;
 }
 
@@ -874,12 +985,16 @@ namespace {
 /// canonicalize things as it goes. It is intended to be fast and catch obvious
 /// cases so that instcombine and other passes are more effective. It is
 /// expected that a later pass of GVN will catch the interesting/hard cases.
-class EarlyCSELegacyPass : public FunctionPass {
+template<bool UseMemorySSA>
+class EarlyCSELegacyCommonPass : public FunctionPass {
 public:
   static char ID;
 
-  EarlyCSELegacyPass() : FunctionPass(ID) {
-    initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
+  EarlyCSELegacyCommonPass() : FunctionPass(ID) {
+    if (UseMemorySSA)
+      initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
+    else
+      initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override {
@@ -890,8 +1005,10 @@ public:
     auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    auto *MSSA =
+        UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
 
-    EarlyCSE CSE(TLI, TTI, DT, AC);
+    EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
 
     return CSE.run();
   }
@@ -901,15 +1018,20 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addRequired<TargetTransformInfoWrapperPass>();
+    if (UseMemorySSA) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
     AU.addPreserved<GlobalsAAWrapperPass>();
     AU.setPreservesCFG();
   }
 };
 }
 
-char EarlyCSELegacyPass::ID = 0;
+using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
 
-FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
+template<>
+char EarlyCSELegacyPass::ID = 0;
 
 INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
                       false)
@@ -918,3 +1040,26 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
+
+using EarlyCSEMemSSALegacyPass =
+    EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
+
+template<>
+char EarlyCSEMemSSALegacyPass::ID = 0;
+
+FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
+  if (UseMemorySSA)
+    return new EarlyCSEMemSSALegacyPass();
+  else
+    return new EarlyCSELegacyPass();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
+                      "Early CSE w/ MemorySSA", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
+                    "Early CSE w/ MemorySSA", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 7aa6dc6992b6..545036d724ef 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -190,21 +190,14 @@ void Float2IntPass::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
       seen(I, badRange());
       break;
 
-    case Instruction::UIToFP: {
-      // Path terminated cleanly.
-      unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
-      APInt Min = APInt::getMinValue(BW).zextOrSelf(MaxIntegerBW+1);
-      APInt Max = APInt::getMaxValue(BW).zextOrSelf(MaxIntegerBW+1);
-      seen(I, validateRange(ConstantRange(Min, Max)));
-      continue;
-    }
-
+    case Instruction::UIToFP:
     case Instruction::SIToFP: {
-      // Path terminated cleanly.
+      // Path terminated cleanly - use the type of the integer input to seed
+      // the analysis.
       unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
-      APInt SMin = APInt::getSignedMinValue(BW).sextOrSelf(MaxIntegerBW+1);
-      APInt SMax = APInt::getSignedMaxValue(BW).sextOrSelf(MaxIntegerBW+1);
-      seen(I, validateRange(ConstantRange(SMin, SMax)));
+      auto Input = ConstantRange(BW, true);
+      auto CastOp = (Instruction::CastOps)I->getOpcode();
+      seen(I, validateRange(Input.castOp(CastOp, MaxIntegerBW+1)));
       continue;
     }
 
@@ -249,23 +242,12 @@ void Float2IntPass::walkForwards() {
       llvm_unreachable("Should have been handled in walkForwards!");
 
     case Instruction::FAdd:
-      Op = [](ArrayRef<ConstantRange> Ops) {
-        assert(Ops.size() == 2 && "FAdd is a binary operator!");
-        return Ops[0].add(Ops[1]);
-      };
-      break;
-
     case Instruction::FSub:
-      Op = [](ArrayRef<ConstantRange> Ops) {
-        assert(Ops.size() == 2 && "FSub is a binary operator!");
-        return Ops[0].sub(Ops[1]);
-      };
-      break;
-
     case Instruction::FMul:
-      Op = [](ArrayRef<ConstantRange> Ops) {
-        assert(Ops.size() == 2 && "FMul is a binary operator!");
-        return Ops[0].multiply(Ops[1]);
+      Op = [I](ArrayRef<ConstantRange> Ops) {
+        assert(Ops.size() == 2 && "its a binary operator!");
+        auto BinOp = (Instruction::BinaryOps) I->getOpcode();
+        return Ops[0].binaryOp(BinOp, Ops[1]);
       };
       break;
 
@@ -275,9 +257,12 @@ void Float2IntPass::walkForwards() {
     //
     case Instruction::FPToUI:
     case Instruction::FPToSI:
-      Op = [](ArrayRef<ConstantRange> Ops) {
+      Op = [I](ArrayRef<ConstantRange> Ops) {
         assert(Ops.size() == 1 && "FPTo[US]I is a unary operator!");
-        return Ops[0];
+        // Note: We're ignoring the casts output size here as that's what the
+        // caller expects.
+        auto CastOp = (Instruction::CastOps)I->getOpcode();
+        return Ops[0].castOp(CastOp, MaxIntegerBW+1);
       };
       break;
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
index a35a1062cbcd..9485bfd7c296 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Analysis/Loads.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/PHITransAddr.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -338,16 +339,9 @@ GVN::Expression GVN::ValueTable::createExtractvalueExpr(ExtractValueInst *EI) {
 //===----------------------------------------------------------------------===//
 
 GVN::ValueTable::ValueTable() : nextValueNumber(1) {}
-GVN::ValueTable::ValueTable(const ValueTable &Arg)
-    : valueNumbering(Arg.valueNumbering),
-      expressionNumbering(Arg.expressionNumbering), AA(Arg.AA), MD(Arg.MD),
-      DT(Arg.DT), nextValueNumber(Arg.nextValueNumber) {}
-GVN::ValueTable::ValueTable(ValueTable &&Arg)
-    : valueNumbering(std::move(Arg.valueNumbering)),
-      expressionNumbering(std::move(Arg.expressionNumbering)),
-      AA(std::move(Arg.AA)), MD(std::move(Arg.MD)), DT(std::move(Arg.DT)),
-      nextValueNumber(std::move(Arg.nextValueNumber)) {}
-GVN::ValueTable::~ValueTable() {}
+GVN::ValueTable::ValueTable(const ValueTable &) = default;
+GVN::ValueTable::ValueTable(ValueTable &&) = default;
+GVN::ValueTable::~ValueTable() = default;
 
 /// add - Insert a value into the table with a specified value number.
 void GVN::ValueTable::add(Value *V, uint32_t num) {
@@ -583,7 +577,7 @@ void GVN::ValueTable::verifyRemoved(const Value *V) const {
 //                                GVN Pass
 //===----------------------------------------------------------------------===//
 
-PreservedAnalyses GVN::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses GVN::run(Function &F, FunctionAnalysisManager &AM) {
   // FIXME: The order of evaluation of these 'getResult' calls is very
   // significant! Re-ordering these variables will cause GVN when run alone to
   // be less effective! We should fix memdep and basic-aa to not exhibit this
@@ -593,7 +587,9 @@ PreservedAnalyses GVN::run(Function &F, AnalysisManager<Function> &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
   auto &MemDep = AM.getResult<MemoryDependenceAnalysis>(F);
-  bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep);
+  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+  auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  bool Changed = runImpl(F, AC, DT, TLI, AA, &MemDep, LI, &ORE);
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
@@ -725,8 +721,9 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
   assert(CanCoerceMustAliasedValueToLoad(StoredVal, LoadedTy, DL) &&
          "precondition violation - materialization can't fail");
 
-  if (auto *CExpr = dyn_cast<ConstantExpr>(StoredVal))
-    StoredVal = ConstantFoldConstantExpression(CExpr, DL);
+  if (auto *C = dyn_cast<Constant>(StoredVal))
+    if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+      StoredVal = FoldedStoredVal;
 
   // If this is already the right type, just return it.
   Type *StoredValTy = StoredVal->getType();
@@ -759,8 +756,9 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
         StoredVal = IRB.CreateIntToPtr(StoredVal, LoadedTy);
     }
 
-    if (auto *CExpr = dyn_cast<ConstantExpr>(StoredVal))
-      StoredVal = ConstantFoldConstantExpression(CExpr, DL);
+    if (auto *C = dyn_cast<ConstantExpr>(StoredVal))
+      if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+        StoredVal = FoldedStoredVal;
 
     return StoredVal;
   }
@@ -804,8 +802,9 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal, Type *LoadedTy,
       StoredVal = IRB.CreateBitCast(StoredVal, LoadedTy, "bitcast");
   }
 
-  if (auto *CExpr = dyn_cast<ConstantExpr>(StoredVal))
-    StoredVal = ConstantFoldConstantExpression(CExpr, DL);
+  if (auto *C = dyn_cast<Constant>(StoredVal))
+    if (auto *FoldedStoredVal = ConstantFoldConstant(C, DL))
+      StoredVal = FoldedStoredVal;
 
   return StoredVal;
 }
@@ -838,16 +837,6 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
   // a must alias.  AA must have gotten confused.
   // FIXME: Study to see if/when this happens.  One case is forwarding a memset
   // to a load from the base of the memset.
-#if 0
-  if (LoadOffset == StoreOffset) {
-    dbgs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
-    << "Base       = " << *StoreBase << "\n"
-    << "Store Ptr  = " << *WritePtr << "\n"
-    << "Store Offs = " << StoreOffset << "\n"
-    << "Load Ptr   = " << *LoadPtr << "\n";
-    abort();
-  }
-#endif
 
   // If the load and store don't overlap at all, the store doesn't provide
   // anything to the load.  In this case, they really don't alias at all, AA
@@ -856,8 +845,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
 
   if ((WriteSizeInBits & 7) | (LoadSize & 7))
     return -1;
-  uint64_t StoreSize = WriteSizeInBits >> 3;  // Convert to bytes.
-  LoadSize >>= 3;
+  uint64_t StoreSize = WriteSizeInBits / 8;  // Convert to bytes.
+  LoadSize /= 8;
 
 
   bool isAAFailure = false;
@@ -866,17 +855,8 @@ static int AnalyzeLoadFromClobberingWrite(Type *LoadTy, Value *LoadPtr,
   else
     isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
 
-  if (isAAFailure) {
-#if 0
-    dbgs() << "STORE LOAD DEP WITH COMMON BASE:\n"
-    << "Base       = " << *StoreBase << "\n"
-    << "Store Ptr  = " << *WritePtr << "\n"
-    << "Store Offs = " << StoreOffset << "\n"
-    << "Load Ptr   = " << *LoadPtr << "\n";
-    abort();
-#endif
+  if (isAAFailure)
     return -1;
-  }
 
   // If the Load isn't completely contained within the stored bits, we don't
   // have all the bits to feed it.  We could do something crazy in the future
@@ -1229,6 +1209,38 @@ static bool isLifetimeStart(const Instruction *Inst) {
   return false;
 }
 
+/// \brief Try to locate the three instruction involved in a missed
+/// load-elimination case that is due to an intervening store.
+static void reportMayClobberedLoad(LoadInst *LI, MemDepResult DepInfo,
+                                   DominatorTree *DT,
+                                   OptimizationRemarkEmitter *ORE) {
+  using namespace ore;
+  User *OtherAccess = nullptr;
+
+  OptimizationRemarkMissed R(DEBUG_TYPE, "LoadClobbered", LI);
+  R << "load of type " << NV("Type", LI->getType()) << " not eliminated"
+    << setExtraArgs();
+
+  for (auto *U : LI->getPointerOperand()->users())
+    if (U != LI && (isa<LoadInst>(U) || isa<StoreInst>(U)) &&
+        DT->dominates(cast<Instruction>(U), LI)) {
+      // FIXME: for now give up if there are multiple memory accesses that
+      // dominate the load.  We need further analysis to decide which one is
+      // that we're forwarding from.
+      if (OtherAccess)
+        OtherAccess = nullptr;
+      else
+        OtherAccess = U;
+    }
+
+  if (OtherAccess)
+    R << " in favor of " << NV("OtherAccess", OtherAccess);
+
+  R << " because it is clobbered by " << NV("ClobberedBy", DepInfo.getInst());
+
+  ORE->emit(R);
+}
+
 bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
                                   Value *Address, AvailableValue &Res) {
 
@@ -1293,6 +1305,10 @@ bool GVN::AnalyzeLoadAvailability(LoadInst *LI, MemDepResult DepInfo,
       Instruction *I = DepInfo.getInst();
       dbgs() << " is clobbered by " << *I << '\n';
     );
+
+    if (ORE->allowExtraAnalysis())
+      reportMayClobberedLoad(LI, DepInfo, DT, ORE);
+
     return false;
   }
   assert(DepInfo.isDef() && "follows from above");
@@ -1605,10 +1621,21 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   if (V->getType()->getScalarType()->isPointerTy())
     MD->invalidateCachedPointerInfo(V);
   markInstructionForDeletion(LI);
+  ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadPRE", LI)
+            << "load eliminated by PRE");
   ++NumPRELoad;
   return true;
 }
 
+static void reportLoadElim(LoadInst *LI, Value *AvailableValue,
+                           OptimizationRemarkEmitter *ORE) {
+  using namespace ore;
+  ORE->emit(OptimizationRemark(DEBUG_TYPE, "LoadElim", LI)
+            << "load of type " << NV("Type", LI->getType()) << " eliminated"
+            << setExtraArgs() << " in favor of "
+            << NV("InfavorOfValue", AvailableValue));
+}
+
 /// Attempt to eliminate a load whose dependencies are
 /// non-local by performing PHI construction.
 bool GVN::processNonLocalLoad(LoadInst *LI) {
@@ -1673,12 +1700,16 @@ bool GVN::processNonLocalLoad(LoadInst *LI) {
     if (isa<PHINode>(V))
       V->takeName(LI);
     if (Instruction *I = dyn_cast<Instruction>(V))
-      if (LI->getDebugLoc())
+      // If instruction I has debug info, then we should not update it.
+      // Also, if I has a null DebugLoc, then it is still potentially incorrect
+      // to propagate LI's DebugLoc because LI may not post-dominate I.
+      if (LI->getDebugLoc() && ValuesPerBlock.size() != 1)
         I->setDebugLoc(LI->getDebugLoc());
     if (V->getType()->getScalarType()->isPointerTy())
       MD->invalidateCachedPointerInfo(V);
     markInstructionForDeletion(LI);
     ++NumGVNLoad;
+    reportLoadElim(LI, V, ORE);
     return true;
   }
 
@@ -1754,7 +1785,12 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) {
 
   // Patch the replacement so that it is not more restrictive than the value
   // being replaced.
-  ReplInst->andIRFlags(I);
+  // Note that if 'I' is a load being replaced by some operation, 
+  // for example, by an arithmetic operation, then andIRFlags()
+  // would just erase all math flags from the original arithmetic
+  // operation, which is clearly not wanted and not needed.
+  if (!isa<LoadInst>(I))
+    ReplInst->andIRFlags(I);
 
   // FIXME: If both the original and replacement value are part of the
   // same control-flow region (meaning that the execution of one
@@ -1820,6 +1856,7 @@ bool GVN::processLoad(LoadInst *L) {
     patchAndReplaceAllUsesWith(L, AvailableValue);
     markInstructionForDeletion(L);
     ++NumGVNLoad;
+    reportLoadElim(L, AvailableValue, ORE);
     // Tell MDA to rexamine the reused pointer since we might have more
     // information after forwarding it.
     if (MD && AvailableValue->getType()->getScalarType()->isPointerTy())
@@ -2197,7 +2234,8 @@ bool GVN::processInstruction(Instruction *I) {
 /// runOnFunction - This is the main transformation entry point for a function.
 bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
                   const TargetLibraryInfo &RunTLI, AAResults &RunAA,
-                  MemoryDependenceResults *RunMD) {
+                  MemoryDependenceResults *RunMD, LoopInfo *LI,
+                  OptimizationRemarkEmitter *RunORE) {
   AC = &RunAC;
   DT = &RunDT;
   VN.setDomTree(DT);
@@ -2205,6 +2243,7 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   VN.setAliasAnalysis(&RunAA);
   MD = RunMD;
   VN.setMemDep(MD);
+  ORE = RunORE;
 
   bool Changed = false;
   bool ShouldContinue = true;
@@ -2214,9 +2253,9 @@ bool GVN::runImpl(Function &F, AssumptionCache &RunAC, DominatorTree &RunDT,
   for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ) {
     BasicBlock *BB = &*FI++;
 
-    bool removedBlock =
-        MergeBlockIntoPredecessor(BB, DT, /* LoopInfo */ nullptr, MD);
-    if (removedBlock) ++NumGVNBlocks;
+    bool removedBlock = MergeBlockIntoPredecessor(BB, DT, LI, MD);
+    if (removedBlock)
+      ++NumGVNBlocks;
 
     Changed |= removedBlock;
   }
@@ -2711,13 +2750,17 @@ public:
     if (skipFunction(F))
       return false;
 
+    auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
+
     return Impl.runImpl(
         F, getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
         getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
         getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
         getAnalysis<AAResultsWrapperPass>().getAAResults(),
         NoLoads ? nullptr
-                : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep());
+                : &getAnalysis<MemoryDependenceWrapperPass>().getMemDep(),
+        LIWP ? &LIWP->getLoopInfo() : nullptr,
+        &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE());
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -2730,6 +2773,7 @@ public:
 
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
   }
 
 private:
@@ -2751,4 +2795,5 @@ INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(GVNLegacyPass, "gvn", "Global Value Numbering", false, false)
diff --git a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
index cce1db3874b7..90c26e13db78 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GVNHoist.cpp
@@ -9,20 +9,23 @@
 //
 // This pass hoists expressions from branches to a common dominator. It uses
 // GVN (global value numbering) to discover expressions computing the same
-// values. The primary goal is to reduce the code size, and in some
-// cases reduce critical path (by exposing more ILP).
+// values. The primary goals of code-hoisting are:
+// 1. To reduce the code size.
+// 2. In some cases reduce critical path (by exposing more ILP).
+//
 // Hoisting may affect the performance in some cases. To mitigate that, hoisting
 // is disabled in the following cases.
 // 1. Scalars across calls.
 // 2. geps when corresponding load/store cannot be hoisted.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/GVN.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/Transforms/Scalar.h"
-#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/MemorySSA.h"
 
 using namespace llvm;
@@ -47,15 +50,25 @@ static cl::opt<int> MaxNumberOfBBSInPath(
     cl::desc("Max number of basic blocks on the path between "
              "hoisting locations (default = 4, unlimited = -1)"));
 
+static cl::opt<int> MaxDepthInBB(
+    "gvn-hoist-max-depth", cl::Hidden, cl::init(100),
+    cl::desc("Hoist instructions from the beginning of the BB up to the "
+             "maximum specified depth (default = 100, unlimited = -1)"));
+
+static cl::opt<int>
+    MaxChainLength("gvn-hoist-max-chain-length", cl::Hidden, cl::init(10),
+                   cl::desc("Maximum length of dependent chains to hoist "
+                            "(default = 10, unlimited = -1)"));
+
 namespace {
 
 // Provides a sorting function based on the execution order of two instructions.
 struct SortByDFSIn {
 private:
-  DenseMap<const BasicBlock *, unsigned> &DFSNumber;
+  DenseMap<const Value *, unsigned> &DFSNumber;
 
 public:
-  SortByDFSIn(DenseMap<const BasicBlock *, unsigned> &D) : DFSNumber(D) {}
+  SortByDFSIn(DenseMap<const Value *, unsigned> &D) : DFSNumber(D) {}
 
   // Returns true when A executes before B.
   bool operator()(const Instruction *A, const Instruction *B) const {
@@ -68,16 +81,16 @@ public:
 
     const BasicBlock *BA = A->getParent();
     const BasicBlock *BB = B->getParent();
-    unsigned NA = DFSNumber[BA];
-    unsigned NB = DFSNumber[BB];
-    if (NA < NB)
-      return true;
-    if (NA == NB) {
-      // Sort them in the order they occur in the same basic block.
-      BasicBlock::const_iterator AI(A), BI(B);
-      return std::distance(AI, BI) < 0;
+    unsigned ADFS, BDFS;
+    if (BA == BB) {
+      ADFS = DFSNumber.lookup(A);
+      BDFS = DFSNumber.lookup(B);
+    } else {
+      ADFS = DFSNumber.lookup(BA);
+      BDFS = DFSNumber.lookup(BB);
     }
-    return false;
+    assert(ADFS && BDFS);
+    return ADFS < BDFS;
   }
 };
 
@@ -172,27 +185,80 @@ typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet;
 typedef SmallVector<Instruction *, 4> SmallVecInsn;
 typedef SmallVectorImpl<Instruction *> SmallVecImplInsn;
 
+static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) {
+  static const unsigned KnownIDs[] = {
+      LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
+      LLVMContext::MD_noalias,        LLVMContext::MD_range,
+      LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
+      LLVMContext::MD_invariant_group};
+  combineMetadata(ReplInst, I, KnownIDs);
+}
+
 // This pass hoists common computations across branches sharing common
 // dominator. The primary goal is to reduce the code size, and in some
 // cases reduce critical path (by exposing more ILP).
 class GVNHoist {
 public:
+  GVNHoist(DominatorTree *DT, AliasAnalysis *AA, MemoryDependenceResults *MD,
+           MemorySSA *MSSA, bool OptForMinSize)
+      : DT(DT), AA(AA), MD(MD), MSSA(MSSA), OptForMinSize(OptForMinSize),
+        HoistingGeps(OptForMinSize), HoistedCtr(0) {
+      // Hoist as far as possible when optimizing for code-size.
+      if (OptForMinSize)
+        MaxNumberOfBBSInPath = -1;
+  }
+
+  bool run(Function &F) {
+    VN.setDomTree(DT);
+    VN.setAliasAnalysis(AA);
+    VN.setMemDep(MD);
+    bool Res = false;
+    // Perform DFS Numbering of instructions.
+    unsigned BBI = 0;
+    for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+      DFSNumber[BB] = ++BBI;
+      unsigned I = 0;
+      for (auto &Inst : *BB)
+        DFSNumber[&Inst] = ++I;
+    }
+
+    int ChainLength = 0;
+
+    // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
+    while (1) {
+      if (MaxChainLength != -1 && ++ChainLength >= MaxChainLength)
+        return Res;
+
+      auto HoistStat = hoistExpressions(F);
+      if (HoistStat.first + HoistStat.second == 0)
+        return Res;
+
+      if (HoistStat.second > 0)
+        // To address a limitation of the current GVN, we need to rerun the
+        // hoisting after we hoisted loads or stores in order to be able to
+        // hoist all scalars dependent on the hoisted ld/st.
+        VN.clear();
+
+      Res = true;
+    }
+
+    return Res;
+  }
+
+private:
   GVN::ValueTable VN;
   DominatorTree *DT;
   AliasAnalysis *AA;
   MemoryDependenceResults *MD;
+  MemorySSA *MSSA;
   const bool OptForMinSize;
-  DenseMap<const BasicBlock *, unsigned> DFSNumber;
+  const bool HoistingGeps;
+  DenseMap<const Value *, unsigned> DFSNumber;
   BBSideEffectsSet BBSideEffects;
-  MemorySSA *MSSA;
   int HoistedCtr;
 
   enum InsKind { Unknown, Scalar, Load, Store };
 
-  GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md,
-           bool OptForMinSize)
-      : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {}
-
   // Return true when there are exception handling in BB.
   bool hasEH(const BasicBlock *BB) {
     auto It = BBSideEffects.find(BB);
@@ -213,24 +279,32 @@ public:
     return false;
   }
 
-  // Return true when all paths from A to the end of the function pass through
-  // either B or C.
-  bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B,
-                            const BasicBlock *C) {
-    // We fully copy the WL in order to be able to remove items from it.
-    SmallPtrSet<const BasicBlock *, 2> WL;
-    WL.insert(B);
-    WL.insert(C);
-
-    for (auto It = df_begin(A), E = df_end(A); It != E;) {
-      // There exists a path from A to the exit of the function if we are still
-      // iterating in DF traversal and we removed all instructions from the work
-      // list.
-      if (WL.empty())
+  // Return true when a successor of BB dominates A.
+  bool successorDominate(const BasicBlock *BB, const BasicBlock *A) {
+    for (const BasicBlock *Succ : BB->getTerminator()->successors())
+      if (DT->dominates(Succ, A))
+        return true;
+
+    return false;
+  }
+
+  // Return true when all paths from HoistBB to the end of the function pass
+  // through one of the blocks in WL.
+  bool hoistingFromAllPaths(const BasicBlock *HoistBB,
+                            SmallPtrSetImpl<const BasicBlock *> &WL) {
+
+    // Copy WL as the loop will remove elements from it.
+    SmallPtrSet<const BasicBlock *, 2> WorkList(WL.begin(), WL.end());
+
+    for (auto It = df_begin(HoistBB), E = df_end(HoistBB); It != E;) {
+      // There exists a path from HoistBB to the exit of the function if we are
+      // still iterating in DF traversal and we removed all instructions from
+      // the work list.
+      if (WorkList.empty())
         return false;
 
       const BasicBlock *BB = *It;
-      if (WL.erase(BB)) {
+      if (WorkList.erase(BB)) {
         // Stop DFS traversal when BB is in the work list.
         It.skipChildren();
         continue;
@@ -240,6 +314,11 @@ public:
       if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator()))
         return false;
 
+      // When reaching the back-edge of a loop, there may be a path through the
+      // loop that does not pass through B or C before exiting the loop.
+      if (successorDominate(BB, HoistBB))
+        return false;
+
       // Increment DFS traversal when not skipping children.
       ++It;
     }
@@ -248,40 +327,43 @@ public:
   }
 
   /* Return true when I1 appears before I2 in the instructions of BB.  */
-  bool firstInBB(BasicBlock *BB, const Instruction *I1, const Instruction *I2) {
-    for (Instruction &I : *BB) {
-      if (&I == I1)
-        return true;
-      if (&I == I2)
-        return false;
-    }
-
-    llvm_unreachable("I1 and I2 not found in BB");
+  bool firstInBB(const Instruction *I1, const Instruction *I2) {
+    assert(I1->getParent() == I2->getParent());
+    unsigned I1DFS = DFSNumber.lookup(I1);
+    unsigned I2DFS = DFSNumber.lookup(I2);
+    assert(I1DFS && I2DFS);
+    return I1DFS < I2DFS;
   }
-  // Return true when there are users of Def in BB.
-  bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB,
-                          const Instruction *OldPt) {
-    const BasicBlock *DefBB = Def->getBlock();
-    const BasicBlock *OldBB = OldPt->getParent();
 
-    for (User *U : Def->users())
-      if (auto *MU = dyn_cast<MemoryUse>(U)) {
-        BasicBlock *UBB = MU->getBlock();
-        // Only analyze uses in BB.
-        if (BB != UBB)
-          continue;
+  // Return true when there are memory uses of Def in BB.
+  bool hasMemoryUse(const Instruction *NewPt, MemoryDef *Def,
+                    const BasicBlock *BB) {
+    const MemorySSA::AccessList *Acc = MSSA->getBlockAccesses(BB);
+    if (!Acc)
+      return false;
 
-        // A use in the same block as the Def is on the path.
-        if (UBB == DefBB) {
-          assert(MSSA->locallyDominates(Def, MU) && "def not dominating use");
-          return true;
-        }
+    Instruction *OldPt = Def->getMemoryInst();
+    const BasicBlock *OldBB = OldPt->getParent();
+    const BasicBlock *NewBB = NewPt->getParent();
+    bool ReachedNewPt = false;
 
-        if (UBB != OldBB)
-          return true;
+    for (const MemoryAccess &MA : *Acc)
+      if (const MemoryUse *MU = dyn_cast<MemoryUse>(&MA)) {
+        Instruction *Insn = MU->getMemoryInst();
+
+        // Do not check whether MU aliases Def when MU occurs after OldPt.
+        if (BB == OldBB && firstInBB(OldPt, Insn))
+          break;
 
-        // It is only harmful to hoist when the use is before OldPt.
-        if (firstInBB(UBB, MU->getMemoryInst(), OldPt))
+        // Do not check whether MU aliases Def when MU occurs before NewPt.
+        if (BB == NewBB) {
+          if (!ReachedNewPt) {
+            if (firstInBB(Insn, NewPt))
+              continue;
+            ReachedNewPt = true;
+          }
+        }
+        if (defClobbersUseOrDef(Def, MU, *AA))
           return true;
       }
 
@@ -289,17 +371,18 @@ public:
   }
 
   // Return true when there are exception handling or loads of memory Def
-  // between OldPt and NewPt.
+  // between Def and NewPt.  This function is only called for stores: Def is
+  // the MemoryDef of the store to be hoisted.
 
   // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and
   // return true when the counter NBBsOnAllPaths reaces 0, except when it is
   // initialized to -1 which is unlimited.
-  bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt,
-                          MemoryAccess *Def, int &NBBsOnAllPaths) {
+  bool hasEHOrLoadsOnPath(const Instruction *NewPt, MemoryDef *Def,
+                          int &NBBsOnAllPaths) {
     const BasicBlock *NewBB = NewPt->getParent();
-    const BasicBlock *OldBB = OldPt->getParent();
+    const BasicBlock *OldBB = Def->getBlock();
     assert(DT->dominates(NewBB, OldBB) && "invalid path");
-    assert(DT->dominates(Def->getBlock(), NewBB) &&
+    assert(DT->dominates(Def->getDefiningAccess()->getBlock(), NewBB) &&
            "def does not dominate new hoisting point");
 
     // Walk all basic blocks reachable in depth-first iteration on the inverse
@@ -313,16 +396,16 @@ public:
         continue;
       }
 
+      // Stop walk once the limit is reached.
+      if (NBBsOnAllPaths == 0)
+        return true;
+
       // Impossible to hoist with exceptions on the path.
       if (hasEH(*I))
         return true;
 
       // Check that we do not move a store past loads.
-      if (hasMemoryUseOnPath(Def, *I, OldPt))
-        return true;
-
-      // Stop walk once the limit is reached.
-      if (NBBsOnAllPaths == 0)
+      if (hasMemoryUse(NewPt, Def, *I))
         return true;
 
       // -1 is unlimited number of blocks on all paths.
@@ -355,14 +438,14 @@ public:
         continue;
       }
 
-      // Impossible to hoist with exceptions on the path.
-      if (hasEH(*I))
-        return true;
-
       // Stop walk once the limit is reached.
       if (NBBsOnAllPaths == 0)
         return true;
 
+      // Impossible to hoist with exceptions on the path.
+      if (hasEH(*I))
+        return true;
+
       // -1 is unlimited number of blocks on all paths.
       if (NBBsOnAllPaths != -1)
         --NBBsOnAllPaths;
@@ -395,13 +478,13 @@ public:
 
     if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D))
       if (auto *UD = dyn_cast<MemoryUseOrDef>(D))
-        if (firstInBB(DBB, NewPt, UD->getMemoryInst()))
+        if (firstInBB(NewPt, UD->getMemoryInst()))
           // Cannot move the load or store to NewPt above its definition in D.
           return false;
 
     // Check for unsafe hoistings due to side effects.
     if (K == InsKind::Store) {
-      if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths))
+      if (hasEHOrLoadsOnPath(NewPt, dyn_cast<MemoryDef>(U), NBBsOnAllPaths))
         return false;
     } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths))
       return false;
@@ -417,23 +500,24 @@ public:
     return true;
   }
 
-  // Return true when it is safe to hoist scalar instructions from BB1 and BB2
-  // to HoistBB.
-  bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1,
-                         const BasicBlock *BB2, int &NBBsOnAllPaths) {
-    // Check that the hoisted expression is needed on all paths.  When HoistBB
-    // already contains an instruction to be hoisted, the expression is needed
-    // on all paths.  Enable scalar hoisting at -Oz as it is safe to hoist
-    // scalars to a place where they are partially needed.
-    if (!OptForMinSize && BB1 != HoistBB &&
-        !hoistingFromAllPaths(HoistBB, BB1, BB2))
-      return false;
+  // Return true when it is safe to hoist scalar instructions from all blocks in
+  // WL to HoistBB.
+  bool safeToHoistScalar(const BasicBlock *HoistBB,
+                         SmallPtrSetImpl<const BasicBlock *> &WL,
+                         int &NBBsOnAllPaths) {
+    // Enable scalar hoisting at -Oz as it is safe to hoist scalars to a place
+    // where they are partially needed.
+    if (OptForMinSize)
+      return true;
 
-    if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) ||
-        hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths))
+    // Check that the hoisted expression is needed on all paths.
+    if (!hoistingFromAllPaths(HoistBB, WL))
       return false;
 
-    // Safe to hoist scalars from BB1 and BB2 to HoistBB.
+    for (const BasicBlock *BB : WL)
+      if (hasEHOnPath(HoistBB, BB, NBBsOnAllPaths))
+        return false;
+
     return true;
   }
 
@@ -454,7 +538,7 @@ public:
       std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred);
     }
 
-    int NBBsOnAllPaths = MaxNumberOfBBSInPath;
+    int NumBBsOnAllPaths = MaxNumberOfBBSInPath;
 
     SmallVecImplInsn::iterator II = InstructionsToHoist.begin();
     SmallVecImplInsn::iterator Start = II;
@@ -462,7 +546,7 @@ public:
     BasicBlock *HoistBB = HoistPt->getParent();
     MemoryUseOrDef *UD;
     if (K != InsKind::Scalar)
-      UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(HoistPt));
+      UD = MSSA->getMemoryAccess(HoistPt);
 
     for (++II; II != InstructionsToHoist.end(); ++II) {
       Instruction *Insn = *II;
@@ -470,10 +554,12 @@ public:
       BasicBlock *NewHoistBB;
       Instruction *NewHoistPt;
 
-      if (BB == HoistBB) {
+      if (BB == HoistBB) { // Both are in the same Basic Block.
         NewHoistBB = HoistBB;
-        NewHoistPt = firstInBB(BB, Insn, HoistPt) ? Insn : HoistPt;
+        NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt;
       } else {
+        // If the hoisting point contains one of the instructions,
+        // then hoist there, otherwise hoist before the terminator.
         NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB);
         if (NewHoistBB == BB)
           NewHoistPt = Insn;
@@ -483,8 +569,12 @@ public:
           NewHoistPt = NewHoistBB->getTerminator();
       }
 
+      SmallPtrSet<const BasicBlock *, 2> WL;
+      WL.insert(HoistBB);
+      WL.insert(BB);
+
       if (K == InsKind::Scalar) {
-        if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) {
+        if (safeToHoistScalar(NewHoistBB, WL, NumBBsOnAllPaths)) {
           // Extend HoistPt to NewHoistPt.
           HoistPt = NewHoistPt;
           HoistBB = NewHoistBB;
@@ -498,13 +588,12 @@ public:
         // loading from the same address: for instance there may be a branch on
         // which the address of the load may not be initialized.
         if ((HoistBB == NewHoistBB || BB == NewHoistBB ||
-             hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) &&
+             hoistingFromAllPaths(NewHoistBB, WL)) &&
             // Also check that it is safe to move the load or store from HoistPt
             // to NewHoistPt, and from Insn to NewHoistPt.
-            safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) &&
-            safeToHoistLdSt(NewHoistPt, Insn,
-                            cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn)),
-                            K, NBBsOnAllPaths)) {
+            safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NumBBsOnAllPaths) &&
+            safeToHoistLdSt(NewHoistPt, Insn, MSSA->getMemoryAccess(Insn),
+                            K, NumBBsOnAllPaths)) {
           // Extend HoistPt to NewHoistPt.
           HoistPt = NewHoistPt;
           HoistBB = NewHoistBB;
@@ -520,10 +609,10 @@ public:
       // Start over from BB.
       Start = II;
       if (K != InsKind::Scalar)
-        UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(*Start));
+        UD = MSSA->getMemoryAccess(*Start);
       HoistPt = Insn;
       HoistBB = BB;
-      NBBsOnAllPaths = MaxNumberOfBBSInPath;
+      NumBBsOnAllPaths = MaxNumberOfBBSInPath;
     }
 
     // Save the last partition.
@@ -567,40 +656,88 @@ public:
     return true;
   }
 
-  Instruction *firstOfTwo(Instruction *I, Instruction *J) const {
-    for (Instruction &I1 : *I->getParent())
-      if (&I1 == I || &I1 == J)
-        return &I1;
-    llvm_unreachable("Both I and J must be from same BB");
+  // Same as allOperandsAvailable with recursive check for GEP operands.
+  bool allGepOperandsAvailable(const Instruction *I,
+                               const BasicBlock *HoistPt) const {
+    for (const Use &Op : I->operands())
+      if (const auto *Inst = dyn_cast<Instruction>(&Op))
+        if (!DT->dominates(Inst->getParent(), HoistPt)) {
+          if (const GetElementPtrInst *GepOp =
+                  dyn_cast<GetElementPtrInst>(Inst)) {
+            if (!allGepOperandsAvailable(GepOp, HoistPt))
+              return false;
+            // Gep is available if all operands of GepOp are available.
+          } else {
+            // Gep is not available if it has operands other than GEPs that are
+            // defined in blocks not dominating HoistPt.
+            return false;
+          }
+        }
+    return true;
   }
 
-  // Replace the use of From with To in Insn.
-  void replaceUseWith(Instruction *Insn, Value *From, Value *To) const {
-    for (Value::use_iterator UI = From->use_begin(), UE = From->use_end();
-         UI != UE;) {
-      Use &U = *UI++;
-      if (U.getUser() == Insn) {
-        U.set(To);
-        return;
+  // Make all operands of the GEP available.
+  void makeGepsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                         const SmallVecInsn &InstructionsToHoist,
+                         Instruction *Gep) const {
+    assert(allGepOperandsAvailable(Gep, HoistPt) &&
+           "GEP operands not available");
+
+    Instruction *ClonedGep = Gep->clone();
+    for (unsigned i = 0, e = Gep->getNumOperands(); i != e; ++i)
+      if (Instruction *Op = dyn_cast<Instruction>(Gep->getOperand(i))) {
+
+        // Check whether the operand is already available.
+        if (DT->dominates(Op->getParent(), HoistPt))
+          continue;
+
+        // As a GEP can refer to other GEPs, recursively make all the operands
+        // of this GEP available at HoistPt.
+        if (GetElementPtrInst *GepOp = dyn_cast<GetElementPtrInst>(Op))
+          makeGepsAvailable(ClonedGep, HoistPt, InstructionsToHoist, GepOp);
       }
+
+    // Copy Gep and replace its uses in Repl with ClonedGep.
+    ClonedGep->insertBefore(HoistPt->getTerminator());
+
+    // Conservatively discard any optimization hints, they may differ on the
+    // other paths.
+    ClonedGep->dropUnknownNonDebugMetadata();
+
+    // If we have optimization hints which agree with each other along different
+    // paths, preserve them.
+    for (const Instruction *OtherInst : InstructionsToHoist) {
+      const GetElementPtrInst *OtherGep;
+      if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst))
+        OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand());
+      else
+        OtherGep = cast<GetElementPtrInst>(
+            cast<StoreInst>(OtherInst)->getPointerOperand());
+      ClonedGep->andIRFlags(OtherGep);
     }
-    llvm_unreachable("should replace exactly once");
+
+    // Replace uses of Gep with ClonedGep in Repl.
+    Repl->replaceUsesOfWith(Gep, ClonedGep);
   }
 
-  bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt) const {
+  // In the case Repl is a load or a store, we make all their GEPs
+  // available: GEPs are not hoisted by default to avoid the address
+  // computations to be hoisted without the associated load or store.
+  bool makeGepOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt,
+                                const SmallVecInsn &InstructionsToHoist) const {
     // Check whether the GEP of a ld/st can be synthesized at HoistPt.
     GetElementPtrInst *Gep = nullptr;
     Instruction *Val = nullptr;
-    if (auto *Ld = dyn_cast<LoadInst>(Repl))
+    if (auto *Ld = dyn_cast<LoadInst>(Repl)) {
       Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand());
-    if (auto *St = dyn_cast<StoreInst>(Repl)) {
+    } else if (auto *St = dyn_cast<StoreInst>(Repl)) {
       Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand());
       Val = dyn_cast<Instruction>(St->getValueOperand());
       // Check that the stored value is available.
       if (Val) {
         if (isa<GetElementPtrInst>(Val)) {
           // Check whether we can compute the GEP at HoistPt.
-          if (!allOperandsAvailable(Val, HoistPt))
+          if (!allGepOperandsAvailable(Val, HoistPt))
             return false;
         } else if (!DT->dominates(Val->getParent(), HoistPt))
           return false;
@@ -608,20 +745,13 @@ public:
     }
 
     // Check whether we can compute the Gep at HoistPt.
-    if (!Gep || !allOperandsAvailable(Gep, HoistPt))
+    if (!Gep || !allGepOperandsAvailable(Gep, HoistPt))
       return false;
 
-    // Copy the gep before moving the ld/st.
-    Instruction *ClonedGep = Gep->clone();
-    ClonedGep->insertBefore(HoistPt->getTerminator());
-    replaceUseWith(Repl, Gep, ClonedGep);
+    makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Gep);
 
-    // Also copy Val when it is a GEP.
-    if (Val && isa<GetElementPtrInst>(Val)) {
-      Instruction *ClonedVal = Val->clone();
-      ClonedVal->insertBefore(HoistPt->getTerminator());
-      replaceUseWith(Repl, Val, ClonedVal);
-    }
+    if (Val && isa<GetElementPtrInst>(Val))
+      makeGepsAvailable(Repl, HoistPt, InstructionsToHoist, Val);
 
     return true;
   }
@@ -635,17 +765,21 @@ public:
       const SmallVecInsn &InstructionsToHoist = HP.second;
       Instruction *Repl = nullptr;
       for (Instruction *I : InstructionsToHoist)
-        if (I->getParent() == HoistPt) {
+        if (I->getParent() == HoistPt)
           // If there are two instructions in HoistPt to be hoisted in place:
           // update Repl to be the first one, such that we can rename the uses
           // of the second based on the first.
-          Repl = !Repl ? I : firstOfTwo(Repl, I);
-        }
+          if (!Repl || firstInBB(I, Repl))
+            Repl = I;
 
+      // Keep track of whether we moved the instruction so we know whether we
+      // should move the MemoryAccess.
+      bool MoveAccess = true;
       if (Repl) {
         // Repl is already in HoistPt: it remains in place.
         assert(allOperandsAvailable(Repl, HoistPt) &&
                "instruction depends on operands that are not available");
+        MoveAccess = false;
       } else {
         // When we do not find Repl in HoistPt, select the first in the list
         // and move it to HoistPt.
@@ -654,10 +788,39 @@ public:
         // We can move Repl in HoistPt only when all operands are available.
         // The order in which hoistings are done may influence the availability
         // of operands.
-        if (!allOperandsAvailable(Repl, HoistPt) &&
-            !makeOperandsAvailable(Repl, HoistPt))
-          continue;
-        Repl->moveBefore(HoistPt->getTerminator());
+        if (!allOperandsAvailable(Repl, HoistPt)) {
+
+          // When HoistingGeps there is nothing more we can do to make the
+          // operands available: just continue.
+          if (HoistingGeps)
+            continue;
+
+          // When not HoistingGeps we need to copy the GEPs.
+          if (!makeGepOperandsAvailable(Repl, HoistPt, InstructionsToHoist))
+            continue;
+        }
+
+        // Move the instruction at the end of HoistPt.
+        Instruction *Last = HoistPt->getTerminator();
+        MD->removeInstruction(Repl);
+        Repl->moveBefore(Last);
+
+        DFSNumber[Repl] = DFSNumber[Last]++;
+      }
+
+      MemoryAccess *NewMemAcc = MSSA->getMemoryAccess(Repl);
+
+      if (MoveAccess) {
+        if (MemoryUseOrDef *OldMemAcc =
+                dyn_cast_or_null<MemoryUseOrDef>(NewMemAcc)) {
+          // The definition of this ld/st will not change: ld/st hoisting is
+          // legal when the ld/st is not moved past its current definition.
+          MemoryAccess *Def = OldMemAcc->getDefiningAccess();
+          NewMemAcc =
+              MSSA->createMemoryAccessInBB(Repl, Def, HoistPt, MemorySSA::End);
+          OldMemAcc->replaceAllUsesWith(NewMemAcc);
+          MSSA->removeMemoryAccess(OldMemAcc);
+        }
       }
 
       if (isa<LoadInst>(Repl))
@@ -673,15 +836,54 @@ public:
       for (Instruction *I : InstructionsToHoist)
         if (I != Repl) {
           ++NR;
-          if (isa<LoadInst>(Repl))
+          if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) {
+            ReplacementLoad->setAlignment(
+                std::min(ReplacementLoad->getAlignment(),
+                         cast<LoadInst>(I)->getAlignment()));
             ++NumLoadsRemoved;
-          else if (isa<StoreInst>(Repl))
+          } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) {
+            ReplacementStore->setAlignment(
+                std::min(ReplacementStore->getAlignment(),
+                         cast<StoreInst>(I)->getAlignment()));
             ++NumStoresRemoved;
-          else if (isa<CallInst>(Repl))
+          } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) {
+            ReplacementAlloca->setAlignment(
+                std::max(ReplacementAlloca->getAlignment(),
+                         cast<AllocaInst>(I)->getAlignment()));
+          } else if (isa<CallInst>(Repl)) {
             ++NumCallsRemoved;
+          }
+
+          if (NewMemAcc) {
+            // Update the uses of the old MSSA access with NewMemAcc.
+            MemoryAccess *OldMA = MSSA->getMemoryAccess(I);
+            OldMA->replaceAllUsesWith(NewMemAcc);
+            MSSA->removeMemoryAccess(OldMA);
+          }
+
+          Repl->andIRFlags(I);
+          combineKnownMetadata(Repl, I);
           I->replaceAllUsesWith(Repl);
+          // Also invalidate the Alias Analysis cache.
+          MD->removeInstruction(I);
           I->eraseFromParent();
         }
+
+      // Remove MemorySSA phi nodes with the same arguments.
+      if (NewMemAcc) {
+        SmallPtrSet<MemoryPhi *, 4> UsePhis;
+        for (User *U : NewMemAcc->users())
+          if (MemoryPhi *Phi = dyn_cast<MemoryPhi>(U))
+            UsePhis.insert(Phi);
+
+        for (auto *Phi : UsePhis) {
+          auto In = Phi->incoming_values();
+          if (all_of(In, [&](Use &U) { return U == NewMemAcc; })) {
+            Phi->replaceAllUsesWith(NewMemAcc);
+            MSSA->removeMemoryAccess(Phi);
+          }
+        }
+      }
     }
 
     NumHoisted += NL + NS + NC + NI;
@@ -700,7 +902,17 @@ public:
     StoreInfo SI;
     CallInfo CI;
     for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
+      int InstructionNb = 0;
       for (Instruction &I1 : *BB) {
+        // Only hoist the first instructions in BB up to MaxDepthInBB. Hoisting
+        // deeper may increase the register pressure and compilation time.
+        if (MaxDepthInBB != -1 && InstructionNb++ >= MaxDepthInBB)
+          break;
+
+        // Do not value number terminator instructions.
+        if (isa<TerminatorInst>(&I1))
+          break;
+
         if (auto *Load = dyn_cast<LoadInst>(&I1))
           LI.insert(Load, VN);
         else if (auto *Store = dyn_cast<StoreInst>(&I1))
@@ -718,8 +930,12 @@ public:
             if (Call->mayThrow())
               break;
           }
+
+          if (Call->isConvergent())
+            break;
+
           CI.insert(Call, VN);
-        } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1))
+        } else if (HoistingGeps || !isa<GetElementPtrInst>(&I1))
           // Do not hoist scalars past calls that may write to memory because
           // that could result in spills later. geps are handled separately.
           // TODO: We can relax this for targets like AArch64 as they have more
@@ -737,39 +953,6 @@ public:
     computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store);
     return hoist(HPL);
   }
-
-  bool run(Function &F) {
-    VN.setDomTree(DT);
-    VN.setAliasAnalysis(AA);
-    VN.setMemDep(MD);
-    bool Res = false;
-
-    unsigned I = 0;
-    for (const BasicBlock *BB : depth_first(&F.getEntryBlock()))
-      DFSNumber.insert({BB, ++I});
-
-    // FIXME: use lazy evaluation of VN to avoid the fix-point computation.
-    while (1) {
-      // FIXME: only compute MemorySSA once. We need to update the analysis in
-      // the same time as transforming the code.
-      MemorySSA M(F, AA, DT);
-      MSSA = &M;
-
-      auto HoistStat = hoistExpressions(F);
-      if (HoistStat.first + HoistStat.second == 0) {
-        return Res;
-      }
-      if (HoistStat.second > 0) {
-        // To address a limitation of the current GVN, we need to rerun the
-        // hoisting after we hoisted loads in order to be able to hoist all
-        // scalars dependent on the hoisted loads. Same for stores.
-        VN.clear();
-      }
-      Res = true;
-    }
-
-    return Res;
-  }
 };
 
 class GVNHoistLegacyPass : public FunctionPass {
@@ -781,11 +964,14 @@ public:
   }
 
   bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
     auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep();
+    auto &MSSA = getAnalysis<MemorySSAWrapperPass>().getMSSA();
 
-    GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
+    GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
     return G.run(F);
   }
 
@@ -793,23 +979,25 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<MemoryDependenceWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<MemorySSAWrapperPass>();
   }
 };
 } // namespace
 
-PreservedAnalyses GVNHoistPass::run(Function &F,
-                                    AnalysisManager<Function> &AM) {
+PreservedAnalyses GVNHoistPass::run(Function &F, FunctionAnalysisManager &AM) {
   DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F);
   AliasAnalysis &AA = AM.getResult<AAManager>(F);
   MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F);
-
-  GVNHoist G(&DT, &AA, &MD, F.optForMinSize());
+  MemorySSA &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+  GVNHoist G(&DT, &AA, &MD, &MSSA, F.optForMinSize());
   if (!G.run(F))
     return PreservedAnalyses::all();
 
   PreservedAnalyses PA;
   PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<MemorySSAAnalysis>();
   return PA;
 }
 
@@ -817,6 +1005,7 @@ char GVNHoistLegacyPass::ID = 0;
 INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist",
                       "Early GVN Hoisting of Expressions", false, false)
 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
 INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist",
diff --git a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
index 7686e65efed9..b05ef002a456 100644
--- a/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/GuardWidening.cpp
@@ -46,6 +46,7 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
@@ -653,7 +654,7 @@ bool GuardWideningImpl::combineRangeChecks(
 }
 
 PreservedAnalyses GuardWideningPass::run(Function &F,
-                                         AnalysisManager<Function> &AM) {
+                                         FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &PDT = AM.getResult<PostDominatorTreeAnalysis>(F);
diff --git a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index e958563e2d10..6aeb5237ffe3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -79,8 +79,12 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
                clEnumValN(OnlyCheapRepl, "cheap",
                           "only replace exit value when the cost is cheap"),
                clEnumValN(AlwaysRepl, "always",
-                          "always replace exit value whenever possible"),
-               clEnumValEnd));
+                          "always replace exit value whenever possible")));
+
+static cl::opt<bool> UsePostIncrementRanges(
+  "indvars-post-increment-ranges", cl::Hidden,
+  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
+  cl::init(true));
 
 namespace {
 struct RewritePhi;
@@ -481,7 +485,7 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
                                           Type *ResultTy) {
   // Before expanding S into an expensive LLVM expression, see if we can use an
   // already existing value as the expansion for S.
-  if (Value *ExistingValue = Rewriter.findExistingExpansion(S, InsertPt, L))
+  if (Value *ExistingValue = Rewriter.getExactExistingExpansion(S, InsertPt, L))
     if (ExistingValue->getType() == ResultTy)
       return ExistingValue;
 
@@ -506,7 +510,8 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S,
 /// constant operands at the beginning of the loop.
 void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) {
   // Check a pre-condition.
-  assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
+  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+         "Indvars did not preserve LCSSA!");
 
   SmallVector<BasicBlock*, 8> ExitBlocks;
   L->getUniqueExitBlocks(ExitBlocks);
@@ -880,7 +885,6 @@ class WidenIV {
   // Parameters
   PHINode *OrigPhi;
   Type *WideType;
-  bool IsSigned;
 
   // Context
   LoopInfo        *LI;
@@ -888,31 +892,70 @@ class WidenIV {
   ScalarEvolution *SE;
   DominatorTree   *DT;
 
+  // Does the module have any calls to the llvm.experimental.guard intrinsic
+  // at all? If not we can avoid scanning instructions looking for guards.
+  bool HasGuards;
+
   // Result
   PHINode *WidePhi;
   Instruction *WideInc;
   const SCEV *WideIncExpr;
   SmallVectorImpl<WeakVH> &DeadInsts;
 
-  SmallPtrSet<Instruction*,16> Widened;
+  SmallPtrSet<Instruction *,16> Widened;
   SmallVector<NarrowIVDefUse, 8> NarrowIVUsers;
 
+  enum ExtendKind { ZeroExtended, SignExtended, Unknown };
+  // A map tracking the kind of extension used to widen each narrow IV 
+  // and narrow IV user.
+  // Key: pointer to a narrow IV or IV user.
+  // Value: the kind of extension used to widen this Instruction.
+  DenseMap<AssertingVH<Instruction>, ExtendKind> ExtendKindMap;
+
+  typedef std::pair<AssertingVH<Value>, AssertingVH<Instruction>> DefUserPair;
+  // A map with control-dependent ranges for post increment IV uses. The key is
+  // a pair of IV def and a use of this def denoting the context. The value is
+  // a ConstantRange representing possible values of the def at the given
+  // context.
+  DenseMap<DefUserPair, ConstantRange> PostIncRangeInfos;
+
+  Optional<ConstantRange> getPostIncRangeInfo(Value *Def,
+                                              Instruction *UseI) {
+    DefUserPair Key(Def, UseI);
+    auto It = PostIncRangeInfos.find(Key);
+    return It == PostIncRangeInfos.end()
+               ? Optional<ConstantRange>(None)
+               : Optional<ConstantRange>(It->second);
+  }
+
+  void calculatePostIncRanges(PHINode *OrigPhi);
+  void calculatePostIncRange(Instruction *NarrowDef, Instruction *NarrowUser);
+  void updatePostIncRangeInfo(Value *Def, Instruction *UseI, ConstantRange R) {
+    DefUserPair Key(Def, UseI);
+    auto It = PostIncRangeInfos.find(Key);
+    if (It == PostIncRangeInfos.end())
+      PostIncRangeInfos.insert({Key, R});
+    else
+      It->second = R.intersectWith(It->second);
+  }
+
 public:
   WidenIV(const WideIVInfo &WI, LoopInfo *LInfo,
           ScalarEvolution *SEv, DominatorTree *DTree,
-          SmallVectorImpl<WeakVH> &DI) :
+          SmallVectorImpl<WeakVH> &DI, bool HasGuards) :
     OrigPhi(WI.NarrowIV),
     WideType(WI.WidestNativeType),
-    IsSigned(WI.IsSigned),
     LI(LInfo),
     L(LI->getLoopFor(OrigPhi->getParent())),
     SE(SEv),
     DT(DTree),
+    HasGuards(HasGuards),
     WidePhi(nullptr),
     WideInc(nullptr),
     WideIncExpr(nullptr),
     DeadInsts(DI) {
     assert(L->getHeader() == OrigPhi->getParent() && "Phi must be an IV");
+    ExtendKindMap[OrigPhi] = WI.IsSigned ? SignExtended : ZeroExtended;
   }
 
   PHINode *createWideIV(SCEVExpander &Rewriter);
@@ -926,9 +969,13 @@ protected:
                                      const SCEVAddRecExpr *WideAR);
   Instruction *cloneBitwiseIVUser(NarrowIVDefUse DU);
 
-  const SCEVAddRecExpr *getWideRecurrence(Instruction *NarrowUse);
+  ExtendKind getExtendKind(Instruction *I);
+
+  typedef std::pair<const SCEVAddRecExpr *, ExtendKind> WidenedRecTy;
 
-  const SCEVAddRecExpr* getExtendedOperandRecurrence(NarrowIVDefUse DU);
+  WidenedRecTy getWideRecurrence(NarrowIVDefUse DU);
+
+  WidenedRecTy getExtendedOperandRecurrence(NarrowIVDefUse DU);
 
   const SCEV *getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
                               unsigned OpCode) const;
@@ -1002,6 +1049,7 @@ Instruction *WidenIV::cloneBitwiseIVUser(NarrowIVDefUse DU) {
   // about the narrow operand yet so must insert a [sz]ext. It is probably loop
   // invariant and will be folded or hoisted. If it actually comes from a
   // widened IV, it should be removed during a future call to widenIVUse.
+  bool IsSigned = getExtendKind(NarrowDef) == SignExtended;
   Value *LHS = (NarrowUse->getOperand(0) == NarrowDef)
                    ? WideDef
                    : createExtendInst(NarrowUse->getOperand(0), WideType,
@@ -1086,7 +1134,7 @@ Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
     return WideUse == WideAR;
   };
 
-  bool SignExtend = IsSigned;
+  bool SignExtend = getExtendKind(NarrowDef) == SignExtended;
   if (!GuessNonIVOperand(SignExtend)) {
     SignExtend = !SignExtend;
     if (!GuessNonIVOperand(SignExtend))
@@ -1112,6 +1160,12 @@ Instruction *WidenIV::cloneArithmeticIVUser(NarrowIVDefUse DU,
   return WideBO;
 }
 
+WidenIV::ExtendKind WidenIV::getExtendKind(Instruction *I) {
+  auto It = ExtendKindMap.find(I);
+  assert(It != ExtendKindMap.end() && "Instruction not yet extended!");
+  return It->second;
+}
+
 const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
                                      unsigned OpCode) const {
   if (OpCode == Instruction::Add)
@@ -1127,15 +1181,16 @@ const SCEV *WidenIV::getSCEVByOpCode(const SCEV *LHS, const SCEV *RHS,
 /// No-wrap operations can transfer sign extension of their result to their
 /// operands. Generate the SCEV value for the widened operation without
 /// actually modifying the IR yet. If the expression after extending the
-/// operands is an AddRec for this loop, return it.
-const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
+/// operands is an AddRec for this loop, return the AddRec and the kind of
+/// extension used.
+WidenIV::WidenedRecTy WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
 
   // Handle the common case of add<nsw/nuw>
   const unsigned OpCode = DU.NarrowUse->getOpcode();
   // Only Add/Sub/Mul instructions supported yet.
   if (OpCode != Instruction::Add && OpCode != Instruction::Sub &&
       OpCode != Instruction::Mul)
-    return nullptr;
+    return {nullptr, Unknown};
 
   // One operand (NarrowDef) has already been extended to WideDef. Now determine
   // if extending the other will lead to a recurrence.
@@ -1146,14 +1201,15 @@ const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
   const SCEV *ExtendOperExpr = nullptr;
   const OverflowingBinaryOperator *OBO =
     cast<OverflowingBinaryOperator>(DU.NarrowUse);
-  if (IsSigned && OBO->hasNoSignedWrap())
+  ExtendKind ExtKind = getExtendKind(DU.NarrowDef);
+  if (ExtKind == SignExtended && OBO->hasNoSignedWrap())
     ExtendOperExpr = SE->getSignExtendExpr(
       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
-  else if(!IsSigned && OBO->hasNoUnsignedWrap())
+  else if(ExtKind == ZeroExtended && OBO->hasNoUnsignedWrap())
     ExtendOperExpr = SE->getZeroExtendExpr(
       SE->getSCEV(DU.NarrowUse->getOperand(ExtendOperIdx)), WideType);
   else
-    return nullptr;
+    return {nullptr, Unknown};
 
   // When creating this SCEV expr, don't apply the current operations NSW or NUW
   // flags. This instruction may be guarded by control flow that the no-wrap
@@ -1171,33 +1227,49 @@ const SCEVAddRecExpr* WidenIV::getExtendedOperandRecurrence(NarrowIVDefUse DU) {
       dyn_cast<SCEVAddRecExpr>(getSCEVByOpCode(lhs, rhs, OpCode));
 
   if (!AddRec || AddRec->getLoop() != L)
-    return nullptr;
-  return AddRec;
+    return {nullptr, Unknown};
+
+  return {AddRec, ExtKind};
 }
 
 /// Is this instruction potentially interesting for further simplification after
 /// widening it's type? In other words, can the extend be safely hoisted out of
 /// the loop with SCEV reducing the value to a recurrence on the same loop. If
-/// so, return the sign or zero extended recurrence. Otherwise return NULL.
-const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) {
-  if (!SE->isSCEVable(NarrowUse->getType()))
-    return nullptr;
-
-  const SCEV *NarrowExpr = SE->getSCEV(NarrowUse);
-  if (SE->getTypeSizeInBits(NarrowExpr->getType())
-      >= SE->getTypeSizeInBits(WideType)) {
+/// so, return the extended recurrence and the kind of extension used. Otherwise
+/// return {nullptr, Unknown}.
+WidenIV::WidenedRecTy WidenIV::getWideRecurrence(NarrowIVDefUse DU) {
+  if (!SE->isSCEVable(DU.NarrowUse->getType()))
+    return {nullptr, Unknown};
+
+  const SCEV *NarrowExpr = SE->getSCEV(DU.NarrowUse);
+  if (SE->getTypeSizeInBits(NarrowExpr->getType()) >=
+      SE->getTypeSizeInBits(WideType)) {
     // NarrowUse implicitly widens its operand. e.g. a gep with a narrow
     // index. So don't follow this use.
-    return nullptr;
+    return {nullptr, Unknown};
   }
 
-  const SCEV *WideExpr = IsSigned ?
-    SE->getSignExtendExpr(NarrowExpr, WideType) :
-    SE->getZeroExtendExpr(NarrowExpr, WideType);
+  const SCEV *WideExpr;
+  ExtendKind ExtKind;
+  if (DU.NeverNegative) {
+    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+    if (isa<SCEVAddRecExpr>(WideExpr))
+      ExtKind = SignExtended;
+    else {
+      WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+      ExtKind = ZeroExtended;
+    }
+  } else if (getExtendKind(DU.NarrowDef) == SignExtended) {
+    WideExpr = SE->getSignExtendExpr(NarrowExpr, WideType);
+    ExtKind = SignExtended;
+  } else {
+    WideExpr = SE->getZeroExtendExpr(NarrowExpr, WideType);
+    ExtKind = ZeroExtended;
+  }
   const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(WideExpr);
   if (!AddRec || AddRec->getLoop() != L)
-    return nullptr;
-  return AddRec;
+    return {nullptr, Unknown};
+  return {AddRec, ExtKind};
 }
 
 /// This IV user cannot be widen. Replace this use of the original narrow IV
@@ -1233,7 +1305,7 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
   //
   //      (A) == icmp slt i32 sext(%narrow), sext(%val)
   //          == icmp slt i32 zext(%narrow), sext(%val)
-
+  bool IsSigned = getExtendKind(DU.NarrowDef) == SignExtended;
   if (!(DU.NeverNegative || IsSigned == Cmp->isSigned()))
     return false;
 
@@ -1258,6 +1330,8 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) {
 /// Determine whether an individual user of the narrow IV can be widened. If so,
 /// return the wide clone of the user.
 Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
+  assert(ExtendKindMap.count(DU.NarrowDef) &&
+         "Should already know the kind of extension used to widen NarrowDef");
 
   // Stop traversing the def-use chain at inner-loop phis or post-loop phis.
   if (PHINode *UsePhi = dyn_cast<PHINode>(DU.NarrowUse)) {
@@ -1288,8 +1362,19 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
       return nullptr;
     }
   }
+
+  // This narrow use can be widened by a sext if it's non-negative or its narrow
+  // def was widended by a sext. Same for zext.
+  auto canWidenBySExt = [&]() {
+    return DU.NeverNegative || getExtendKind(DU.NarrowDef) == SignExtended;
+  };
+  auto canWidenByZExt = [&]() {
+    return DU.NeverNegative || getExtendKind(DU.NarrowDef) == ZeroExtended;
+  };
+
   // Our raison d'etre! Eliminate sign and zero extension.
-  if (IsSigned ? isa<SExtInst>(DU.NarrowUse) : isa<ZExtInst>(DU.NarrowUse)) {
+  if ((isa<SExtInst>(DU.NarrowUse) && canWidenBySExt()) ||
+      (isa<ZExtInst>(DU.NarrowUse) && canWidenByZExt())) {
     Value *NewDef = DU.WideDef;
     if (DU.NarrowUse->getType() != WideType) {
       unsigned CastWidth = SE->getTypeSizeInBits(DU.NarrowUse->getType());
@@ -1327,11 +1412,12 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   }
 
   // Does this user itself evaluate to a recurrence after widening?
-  const SCEVAddRecExpr *WideAddRec = getWideRecurrence(DU.NarrowUse);
-  if (!WideAddRec)
-    WideAddRec = getExtendedOperandRecurrence(DU);
+  WidenedRecTy WideAddRec = getExtendedOperandRecurrence(DU);
+  if (!WideAddRec.first)
+    WideAddRec = getWideRecurrence(DU);
 
-  if (!WideAddRec) {
+  assert((WideAddRec.first == nullptr) == (WideAddRec.second == Unknown));
+  if (!WideAddRec.first) {
     // If use is a loop condition, try to promote the condition instead of
     // truncating the IV first.
     if (widenLoopCompare(DU))
@@ -1351,10 +1437,11 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   // Reuse the IV increment that SCEVExpander created as long as it dominates
   // NarrowUse.
   Instruction *WideUse = nullptr;
-  if (WideAddRec == WideIncExpr && Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
+  if (WideAddRec.first == WideIncExpr &&
+      Rewriter.hoistIVInc(WideInc, DU.NarrowUse))
     WideUse = WideInc;
   else {
-    WideUse = cloneIVUser(DU, WideAddRec);
+    WideUse = cloneIVUser(DU, WideAddRec.first);
     if (!WideUse)
       return nullptr;
   }
@@ -1363,13 +1450,14 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
   // evaluates to the same expression as the extended narrow use, but doesn't
   // absolutely guarantee it. Hence the following failsafe check. In rare cases
   // where it fails, we simply throw away the newly created wide use.
-  if (WideAddRec != SE->getSCEV(WideUse)) {
+  if (WideAddRec.first != SE->getSCEV(WideUse)) {
     DEBUG(dbgs() << "Wide use expression mismatch: " << *WideUse
-          << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec << "\n");
+          << ": " << *SE->getSCEV(WideUse) << " != " << *WideAddRec.first << "\n");
     DeadInsts.emplace_back(WideUse);
     return nullptr;
   }
 
+  ExtendKindMap[DU.NarrowUse] = WideAddRec.second;
   // Returning WideUse pushes it on the worklist.
   return WideUse;
 }
@@ -1378,7 +1466,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) {
 ///
 void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
   const SCEV *NarrowSCEV = SE->getSCEV(NarrowDef);
-  bool NeverNegative =
+  bool NonNegativeDef =
       SE->isKnownPredicate(ICmpInst::ICMP_SGE, NarrowSCEV,
                            SE->getConstant(NarrowSCEV->getType(), 0));
   for (User *U : NarrowDef->users()) {
@@ -1388,7 +1476,15 @@ void WidenIV::pushNarrowIVUsers(Instruction *NarrowDef, Instruction *WideDef) {
     if (!Widened.insert(NarrowUser).second)
       continue;
 
-    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef, NeverNegative);
+    bool NonNegativeUse = false;
+    if (!NonNegativeDef) {
+      // We might have a control-dependent range information for this context.
+      if (auto RangeInfo = getPostIncRangeInfo(NarrowDef, NarrowUser))
+        NonNegativeUse = RangeInfo->getSignedMin().isNonNegative();
+    }
+
+    NarrowIVUsers.emplace_back(NarrowDef, NarrowUser, WideDef,
+                               NonNegativeDef || NonNegativeUse);
   }
 }
 
@@ -1408,9 +1504,9 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
     return nullptr;
 
   // Widen the induction variable expression.
-  const SCEV *WideIVExpr = IsSigned ?
-    SE->getSignExtendExpr(AddRec, WideType) :
-    SE->getZeroExtendExpr(AddRec, WideType);
+  const SCEV *WideIVExpr = getExtendKind(OrigPhi) == SignExtended
+                               ? SE->getSignExtendExpr(AddRec, WideType)
+                               : SE->getZeroExtendExpr(AddRec, WideType);
 
   assert(SE->getEffectiveSCEVType(WideIVExpr->getType()) == WideType &&
          "Expect the new IV expression to preserve its type");
@@ -1428,6 +1524,19 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
       SE->properlyDominates(AddRec->getStepRecurrence(*SE), L->getHeader()) &&
       "Loop header phi recurrence inputs do not dominate the loop");
 
+  // Iterate over IV uses (including transitive ones) looking for IV increments
+  // of the form 'add nsw %iv, <const>'. For each increment and each use of
+  // the increment calculate control-dependent range information basing on
+  // dominating conditions inside of the loop (e.g. a range check inside of the
+  // loop). Calculated ranges are stored in PostIncRangeInfos map.
+  //
+  // Control-dependent range information is later used to prove that a narrow
+  // definition is not negative (see pushNarrowIVUsers). It's difficult to do
+  // this on demand because when pushNarrowIVUsers needs this information some
+  // of the dominating conditions might be already widened.
+  if (UsePostIncrementRanges)
+    calculatePostIncRanges(OrigPhi);
+
   // The rewriter provides a value for the desired IV expression. This may
   // either find an existing phi or materialize a new one. Either way, we
   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
@@ -1443,6 +1552,11 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
     WideInc =
       cast<Instruction>(WidePhi->getIncomingValueForBlock(LatchBlock));
     WideIncExpr = SE->getSCEV(WideInc);
+    // Propagate the debug location associated with the original loop increment
+    // to the new (widened) increment.
+    auto *OrigInc =
+      cast<Instruction>(OrigPhi->getIncomingValueForBlock(LatchBlock));
+    WideInc->setDebugLoc(OrigInc->getDebugLoc());
   }
 
   DEBUG(dbgs() << "Wide IV: " << *WidePhi << "\n");
@@ -1472,6 +1586,114 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   return WidePhi;
 }
 
+/// Calculates control-dependent range for the given def at the given context
+/// by looking at dominating conditions inside of the loop
+void WidenIV::calculatePostIncRange(Instruction *NarrowDef,
+                                    Instruction *NarrowUser) {
+  using namespace llvm::PatternMatch;
+
+  Value *NarrowDefLHS;
+  const APInt *NarrowDefRHS;
+  if (!match(NarrowDef, m_NSWAdd(m_Value(NarrowDefLHS),
+                                 m_APInt(NarrowDefRHS))) ||
+      !NarrowDefRHS->isNonNegative())
+    return;
+
+  auto UpdateRangeFromCondition = [&] (Value *Condition,
+                                       bool TrueDest) {
+    CmpInst::Predicate Pred;
+    Value *CmpRHS;
+    if (!match(Condition, m_ICmp(Pred, m_Specific(NarrowDefLHS),
+                                 m_Value(CmpRHS))))
+      return;
+
+    CmpInst::Predicate P =
+            TrueDest ? Pred : CmpInst::getInversePredicate(Pred);  
+
+    auto CmpRHSRange = SE->getSignedRange(SE->getSCEV(CmpRHS));
+    auto CmpConstrainedLHSRange =
+            ConstantRange::makeAllowedICmpRegion(P, CmpRHSRange);
+    auto NarrowDefRange =
+            CmpConstrainedLHSRange.addWithNoSignedWrap(*NarrowDefRHS);
+
+    updatePostIncRangeInfo(NarrowDef, NarrowUser, NarrowDefRange);
+  };
+
+  auto UpdateRangeFromGuards = [&](Instruction *Ctx) {
+    if (!HasGuards)
+      return;
+
+    for (Instruction &I : make_range(Ctx->getIterator().getReverse(),
+                                     Ctx->getParent()->rend())) {
+      Value *C = nullptr;
+      if (match(&I, m_Intrinsic<Intrinsic::experimental_guard>(m_Value(C))))
+        UpdateRangeFromCondition(C, /*TrueDest=*/true);
+    }
+  };
+
+  UpdateRangeFromGuards(NarrowUser);
+
+  BasicBlock *NarrowUserBB = NarrowUser->getParent();
+  // If NarrowUserBB is statically unreachable asking dominator queries may 
+  // yield surprising results. (e.g. the block may not have a dom tree node)
+  if (!DT->isReachableFromEntry(NarrowUserBB))
+    return;
+
+  for (auto *DTB = (*DT)[NarrowUserBB]->getIDom();
+       L->contains(DTB->getBlock());
+       DTB = DTB->getIDom()) {
+    auto *BB = DTB->getBlock();
+    auto *TI = BB->getTerminator();
+    UpdateRangeFromGuards(TI);
+
+    auto *BI = dyn_cast<BranchInst>(TI);
+    if (!BI || !BI->isConditional())
+      continue;
+
+    auto *TrueSuccessor = BI->getSuccessor(0);
+    auto *FalseSuccessor = BI->getSuccessor(1);
+
+    auto DominatesNarrowUser = [this, NarrowUser] (BasicBlockEdge BBE) {
+      return BBE.isSingleEdge() &&
+             DT->dominates(BBE, NarrowUser->getParent());
+    };
+
+    if (DominatesNarrowUser(BasicBlockEdge(BB, TrueSuccessor)))
+      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/true);
+
+    if (DominatesNarrowUser(BasicBlockEdge(BB, FalseSuccessor)))
+      UpdateRangeFromCondition(BI->getCondition(), /*TrueDest=*/false);
+  }
+}
+
+/// Calculates PostIncRangeInfos map for the given IV
+void WidenIV::calculatePostIncRanges(PHINode *OrigPhi) {
+  SmallPtrSet<Instruction *, 16> Visited;
+  SmallVector<Instruction *, 6> Worklist;
+  Worklist.push_back(OrigPhi);
+  Visited.insert(OrigPhi);
+
+  while (!Worklist.empty()) {
+    Instruction *NarrowDef = Worklist.pop_back_val();
+
+    for (Use &U : NarrowDef->uses()) {
+      auto *NarrowUser = cast<Instruction>(U.getUser());
+
+      // Don't go looking outside the current loop.
+      auto *NarrowUserLoop = (*LI)[NarrowUser->getParent()];
+      if (!NarrowUserLoop || !L->contains(NarrowUserLoop))
+        continue;
+
+      if (!Visited.insert(NarrowUser).second)
+        continue;
+
+      Worklist.push_back(NarrowUser);
+
+      calculatePostIncRange(NarrowDef, NarrowUser);
+    }
+  }
+}
+
 //===----------------------------------------------------------------------===//
 //  Live IV Reduction - Minimize IVs live across the loop.
 //===----------------------------------------------------------------------===//
@@ -1514,6 +1736,10 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
                                        LoopInfo *LI) {
   SmallVector<WideIVInfo, 8> WideIVs;
 
+  auto *GuardDecl = L->getBlocks()[0]->getModule()->getFunction(
+          Intrinsic::getName(Intrinsic::experimental_guard));
+  bool HasGuards = GuardDecl && !GuardDecl->use_empty();
+
   SmallVector<PHINode*, 8> LoopPhis;
   for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I) {
     LoopPhis.push_back(cast<PHINode>(I));
@@ -1543,7 +1769,7 @@ void IndVarSimplify::simplifyAndExtend(Loop *L,
     } while(!LoopPhis.empty());
 
     for (; !WideIVs.empty(); WideIVs.pop_back()) {
-      WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts);
+      WidenIV Widener(WideIVs.back(), LI, SE, DT, DeadInsts, HasGuards);
       if (PHINode *WidePhi = Widener.createWideIV(Rewriter)) {
         Changed = true;
         LoopPhis.push_back(WidePhi);
@@ -1963,6 +2189,11 @@ linearFunctionTestReplace(Loop *L,
 
   IRBuilder<> Builder(BI);
 
+  // The new loop exit condition should reuse the debug location of the
+  // original loop exit condition.
+  if (auto *Cond = dyn_cast<Instruction>(BI->getCondition()))
+    Builder.SetCurrentDebugLocation(Cond->getDebugLoc());
+
   // LFTR can ignore IV overflow and truncate to the width of
   // BECount. This avoids materializing the add(zext(add)) expression.
   unsigned CmpIndVarSize = SE->getTypeSizeInBits(CmpIndVar->getType());
@@ -1992,8 +2223,36 @@ linearFunctionTestReplace(Loop *L,
 
       DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
     } else {
-      CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
-                                      "lftr.wideiv");
+      // We try to extend trip count first. If that doesn't work we truncate IV.
+      // Zext(trunc(IV)) == IV implies equivalence of the following two:
+      // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
+      // one of the two holds, extend the trip count, otherwise we truncate IV.
+      bool Extended = false;
+      const SCEV *IV = SE->getSCEV(CmpIndVar);
+      const SCEV *ZExtTrunc =
+           SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+                                                     ExitCnt->getType()),
+                                 CmpIndVar->getType());
+
+      if (ZExtTrunc == IV) {
+        Extended = true;
+        ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+                                     "wide.trip.count");
+      } else {
+        const SCEV *SExtTrunc =
+          SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+                                                    ExitCnt->getType()),
+                                CmpIndVar->getType());
+        if (SExtTrunc == IV) {
+          Extended = true;
+          ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
+                                       "wide.trip.count");
+        }
+      }
+
+      if (!Extended)
+        CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
+                                        "lftr.wideiv");
     }
   }
   Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond");
@@ -2025,7 +2284,7 @@ void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) return;
 
-  Instruction *InsertPt = &*ExitBlock->getFirstInsertionPt();
+  BasicBlock::iterator InsertPt = ExitBlock->getFirstInsertionPt();
   BasicBlock::iterator I(Preheader->getTerminator());
   while (I != Preheader->begin()) {
     --I;
@@ -2094,9 +2353,9 @@ void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
       Done = true;
     }
 
-    ToMove->moveBefore(InsertPt);
+    ToMove->moveBefore(*ExitBlock, InsertPt);
     if (Done) break;
-    InsertPt = ToMove;
+    InsertPt = ToMove->getIterator();
   }
 }
 
@@ -2106,7 +2365,8 @@ void IndVarSimplify::sinkUnusedInvariants(Loop *L) {
 
 bool IndVarSimplify::run(Loop *L) {
   // We need (and expect!) the incoming loop to be in LCSSA.
-  assert(L->isRecursivelyLCSSAForm(*DT) && "LCSSA required to run indvars!");
+  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+         "LCSSA required to run indvars!");
 
   // If LoopSimplify form is not available, stay out of trouble. Some notes:
   //  - LSR currently only supports LoopSimplify-form loops. Indvars'
@@ -2199,7 +2459,8 @@ bool IndVarSimplify::run(Loop *L) {
   Changed |= DeleteDeadPHIs(L->getHeader(), TLI);
 
   // Check a post-condition.
-  assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!");
+  assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+         "Indvars did not preserve LCSSA!");
 
   // Verify that LFTR, and any other change have not interfered with SCEV's
   // ability to compute trip count.
@@ -2221,7 +2482,7 @@ bool IndVarSimplify::run(Loop *L) {
   return Changed;
 }
 
-PreservedAnalyses IndVarSimplifyPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses IndVarSimplifyPass::run(Loop &L, LoopAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
   const DataLayout &DL = F->getParent()->getDataLayout();
diff --git a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
index ec7f09a2d598..8e81541c2337 100644
--- a/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/InductiveRangeCheckElimination.cpp
@@ -43,21 +43,16 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
-#include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/IR/ValueHandle.h"
-#include "llvm/IR/Verifier.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -65,8 +60,7 @@
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/Transforms/Utils/LoopUtils.h"
-#include "llvm/Transforms/Utils/SimplifyIndVar.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
 
 using namespace llvm;
 
@@ -82,6 +76,11 @@ static cl::opt<bool> PrintRangeChecks("irce-print-range-checks", cl::Hidden,
 static cl::opt<int> MaxExitProbReciprocal("irce-max-exit-prob-reciprocal",
                                           cl::Hidden, cl::init(10));
 
+static cl::opt<bool> SkipProfitabilityChecks("irce-skip-profitability-checks",
+                                             cl::Hidden, cl::init(false));
+
+static const char *ClonedLoopTag = "irce.loop.clone";
+
 #define DEBUG_TYPE "irce"
 
 namespace {
@@ -152,11 +151,10 @@ public:
     OS << " Operand: " << getCheckUse()->getOperandNo() << "\n";
   }
 
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  LLVM_DUMP_METHOD
   void dump() {
     print(dbgs());
   }
-#endif
 
   Use *getCheckUse() const { return CheckUse; }
 
@@ -276,7 +274,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
 
   case ICmpInst::ICMP_SLE:
     std::swap(LHS, RHS);
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SGE:
     if (match(RHS, m_ConstantInt<0>())) {
       Index = LHS;
@@ -286,7 +284,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
 
   case ICmpInst::ICMP_SLT:
     std::swap(LHS, RHS);
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_SGT:
     if (match(RHS, m_ConstantInt<-1>())) {
       Index = LHS;
@@ -302,7 +300,7 @@ InductiveRangeCheck::parseRangeCheckICmp(Loop *L, ICmpInst *ICI,
 
   case ICmpInst::ICMP_ULT:
     std::swap(LHS, RHS);
-  // fallthrough
+    LLVM_FALLTHROUGH;
   case ICmpInst::ICMP_UGT:
     if (IsNonNegativeAndNotLoopVarying(LHS)) {
       Index = RHS;
@@ -392,7 +390,8 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
 
   BranchProbability LikelyTaken(15, 16);
 
-  if (BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
+  if (!SkipProfitabilityChecks &&
+      BPI.getEdgeProbability(BI->getParent(), (unsigned)0) < LikelyTaken)
     return;
 
   SmallPtrSet<Value *, 8> Visited;
@@ -400,6 +399,34 @@ void InductiveRangeCheck::extractRangeChecksFromBranch(
                                                   Checks, Visited);
 }
 
+// Add metadata to the loop L to disable loop optimizations. Callers need to
+// confirm that optimizing loop L is not beneficial.
+static void DisableAllLoopOptsOnLoop(Loop &L) {
+  // We do not care about any existing loopID related metadata for L, since we
+  // are setting all loop metadata to false.
+  LLVMContext &Context = L.getHeader()->getContext();
+  // Reserve first location for self reference to the LoopID metadata node.
+  MDNode *Dummy = MDNode::get(Context, {});
+  MDNode *DisableUnroll = MDNode::get(
+      Context, {MDString::get(Context, "llvm.loop.unroll.disable")});
+  Metadata *FalseVal =
+      ConstantAsMetadata::get(ConstantInt::get(Type::getInt1Ty(Context), 0));
+  MDNode *DisableVectorize = MDNode::get(
+      Context,
+      {MDString::get(Context, "llvm.loop.vectorize.enable"), FalseVal});
+  MDNode *DisableLICMVersioning = MDNode::get(
+      Context, {MDString::get(Context, "llvm.loop.licm_versioning.disable")});
+  MDNode *DisableDistribution= MDNode::get(
+      Context,
+      {MDString::get(Context, "llvm.loop.distribute.enable"), FalseVal});
+  MDNode *NewLoopID =
+      MDNode::get(Context, {Dummy, DisableUnroll, DisableVectorize,
+                            DisableLICMVersioning, DisableDistribution});
+  // Set operand 0 to refer to the loop id itself.
+  NewLoopID->replaceOperandWith(0, NewLoopID);
+  L.setLoopID(NewLoopID);
+}
+
 namespace {
 
 // Keeps track of the structure of a loop.  This is similar to llvm::Loop,
@@ -515,6 +542,11 @@ class LoopConstrainer {
   //
   void cloneLoop(ClonedLoop &CLResult, const char *Tag) const;
 
+  // Create the appropriate loop structure needed to describe a cloned copy of
+  // `Original`.  The clone is described by `VM`.
+  Loop *createClonedLoopStructure(Loop *Original, Loop *Parent,
+                                  ValueToValueMapTy &VM);
+
   // Rewrite the iteration space of the loop denoted by (LS, Preheader). The
   // iteration space of the rewritten loop ends at ExitLoopAt.  The start of the
   // iteration space is not changed.  `ExitLoopAt' is assumed to be slt
@@ -566,10 +598,12 @@ class LoopConstrainer {
   Function &F;
   LLVMContext &Ctx;
   ScalarEvolution &SE;
+  DominatorTree &DT;
+  LPPassManager &LPM;
+  LoopInfo &LI;
 
   // Information about the original loop we started out with.
   Loop &OriginalLoop;
-  LoopInfo &OriginalLoopInfo;
   const SCEV *LatchTakenCount;
   BasicBlock *OriginalPreheader;
 
@@ -585,12 +619,13 @@ class LoopConstrainer {
   LoopStructure MainLoopStructure;
 
 public:
-  LoopConstrainer(Loop &L, LoopInfo &LI, const LoopStructure &LS,
-                  ScalarEvolution &SE, InductiveRangeCheck::Range R)
+  LoopConstrainer(Loop &L, LoopInfo &LI, LPPassManager &LPM,
+                  const LoopStructure &LS, ScalarEvolution &SE,
+                  DominatorTree &DT, InductiveRangeCheck::Range R)
       : F(*L.getHeader()->getParent()), Ctx(L.getHeader()->getContext()),
-        SE(SE), OriginalLoop(L), OriginalLoopInfo(LI), LatchTakenCount(nullptr),
-        OriginalPreheader(nullptr), MainLoopPreheader(nullptr), Range(R),
-        MainLoopStructure(LS) {}
+        SE(SE), DT(DT), LPM(LPM), LI(LI), OriginalLoop(L),
+        LatchTakenCount(nullptr), OriginalPreheader(nullptr),
+        MainLoopPreheader(nullptr), Range(R), MainLoopStructure(LS) {}
 
   // Entry point for the algorithm.  Returns true on success.
   bool run();
@@ -622,9 +657,19 @@ static bool CanBeSMin(ScalarEvolution &SE, const SCEV *S) {
 Optional<LoopStructure>
 LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BPI,
                                   Loop &L, const char *&FailureReason) {
-  assert(L.isLoopSimplifyForm() && "should follow from addRequired<>");
+  if (!L.isLoopSimplifyForm()) {
+    FailureReason = "loop not in LoopSimplify form";
+    return None;
+  }
 
   BasicBlock *Latch = L.getLoopLatch();
+  assert(Latch && "Simplified loops only have one latch!");
+
+  if (Latch->getTerminator()->getMetadata(ClonedLoopTag)) {
+    FailureReason = "loop has already been cloned";
+    return None;
+  }
+
   if (!L.isLoopExiting(Latch)) {
     FailureReason = "no loop latch";
     return None;
@@ -648,7 +693,8 @@ LoopStructure::parseLoopStructure(ScalarEvolution &SE, BranchProbabilityInfo &BP
   BranchProbability ExitProbability =
     BPI.getEdgeProbability(LatchBr->getParent(), LatchBrExitIdx);
 
-  if (ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
+  if (!SkipProfitabilityChecks &&
+      ExitProbability > BranchProbability(1, MaxExitProbReciprocal)) {
     FailureReason = "short running loop, not profitable";
     return None;
   }
@@ -907,6 +953,11 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
     return static_cast<Value *>(It->second);
   };
 
+  auto *ClonedLatch =
+      cast<BasicBlock>(GetClonedValue(OriginalLoop.getLoopLatch()));
+  ClonedLatch->getTerminator()->setMetadata(ClonedLoopTag,
+                                            MDNode::get(Ctx, {}));
+
   Result.Structure = MainLoopStructure.map(GetClonedValue);
   Result.Structure.Tag = Tag;
 
@@ -924,17 +975,15 @@ void LoopConstrainer::cloneLoop(LoopConstrainer::ClonedLoop &Result,
     // to be edited to reflect that.  No phi nodes need to be introduced because
     // the loop is in LCSSA.
 
-    for (auto SBBI = succ_begin(OriginalBB), SBBE = succ_end(OriginalBB);
-         SBBI != SBBE; ++SBBI) {
-
-      if (OriginalLoop.contains(*SBBI))
+    for (auto *SBB : successors(OriginalBB)) {
+      if (OriginalLoop.contains(SBB))
         continue; // not an exit block
 
-      for (Instruction &I : **SBBI) {
-        if (!isa<PHINode>(&I))
+      for (Instruction &I : *SBB) {
+        auto *PN = dyn_cast<PHINode>(&I);
+        if (!PN)
           break;
 
-        PHINode *PN = cast<PHINode>(&I);
         Value *OldIncoming = PN->getIncomingValueForBlock(OriginalBB);
         PN->addIncoming(GetClonedValue(OldIncoming), ClonedBB);
       }
@@ -1020,11 +1069,11 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
 
   RewrittenRangeInfo RRI;
 
-  auto BBInsertLocation = std::next(Function::iterator(LS.Latch));
+  BasicBlock *BBInsertLocation = LS.Latch->getNextNode();
   RRI.ExitSelector = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".exit.selector",
-                                        &F, &*BBInsertLocation);
+                                        &F, BBInsertLocation);
   RRI.PseudoExit = BasicBlock::Create(Ctx, Twine(LS.Tag) + ".pseudo.exit", &F,
-                                      &*BBInsertLocation);
+                                      BBInsertLocation);
 
   BranchInst *PreheaderJump = cast<BranchInst>(Preheader->getTerminator());
   bool Increasing = LS.IndVarIncreasing;
@@ -1067,11 +1116,10 @@ LoopConstrainer::RewrittenRangeInfo LoopConstrainer::changeIterationSpaceEnd(
   // each of the PHI nodes in the loop header.  This feeds into the initial
   // value of the same PHI nodes if/when we continue execution.
   for (Instruction &I : *LS.Header) {
-    if (!isa<PHINode>(&I))
+    auto *PN = dyn_cast<PHINode>(&I);
+    if (!PN)
       break;
 
-    PHINode *PN = cast<PHINode>(&I);
-
     PHINode *NewPHI = PHINode::Create(PN->getType(), 2, PN->getName() + ".copy",
                                       BranchToContinuation);
 
@@ -1104,11 +1152,10 @@ void LoopConstrainer::rewriteIncomingValuesForPHIs(
 
   unsigned PHIIndex = 0;
   for (Instruction &I : *LS.Header) {
-    if (!isa<PHINode>(&I))
+    auto *PN = dyn_cast<PHINode>(&I);
+    if (!PN)
       break;
 
-    PHINode *PN = cast<PHINode>(&I);
-
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
       if (PN->getIncomingBlock(i) == ContinuationBlock)
         PN->setIncomingValue(i, RRI.PHIValuesAtPseudoExit[PHIIndex++]);
@@ -1125,10 +1172,10 @@ BasicBlock *LoopConstrainer::createPreheader(const LoopStructure &LS,
   BranchInst::Create(LS.Header, Preheader);
 
   for (Instruction &I : *LS.Header) {
-    if (!isa<PHINode>(&I))
+    auto *PN = dyn_cast<PHINode>(&I);
+    if (!PN)
       break;
 
-    PHINode *PN = cast<PHINode>(&I);
     for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i)
       replacePHIBlock(PN, OldPreheader, Preheader);
   }
@@ -1142,7 +1189,23 @@ void LoopConstrainer::addToParentLoopIfNeeded(ArrayRef<BasicBlock *> BBs) {
     return;
 
   for (BasicBlock *BB : BBs)
-    ParentLoop->addBasicBlockToLoop(BB, OriginalLoopInfo);
+    ParentLoop->addBasicBlockToLoop(BB, LI);
+}
+
+Loop *LoopConstrainer::createClonedLoopStructure(Loop *Original, Loop *Parent,
+                                                 ValueToValueMapTy &VM) {
+  Loop &New = LPM.addLoop(Parent);
+
+  // Add all of the blocks in Original to the new loop.
+  for (auto *BB : Original->blocks())
+    if (LI.getLoopFor(BB) == Original)
+      New.addBasicBlockToLoop(cast<BasicBlock>(VM[BB]), LI);
+
+  // Add all of the subloops to the new loop.
+  for (Loop *SubLoop : *Original)
+    createClonedLoopStructure(SubLoop, &New, VM);
+
+  return &New;
 }
 
 bool LoopConstrainer::run() {
@@ -1266,8 +1329,31 @@ bool LoopConstrainer::run() {
       std::remove(std::begin(NewBlocks), std::end(NewBlocks), nullptr);
 
   addToParentLoopIfNeeded(makeArrayRef(std::begin(NewBlocks), NewBlocksEnd));
-  addToParentLoopIfNeeded(PreLoop.Blocks);
-  addToParentLoopIfNeeded(PostLoop.Blocks);
+
+  DT.recalculate(F);
+
+  if (!PreLoop.Blocks.empty()) {
+    auto *L = createClonedLoopStructure(
+        &OriginalLoop, OriginalLoop.getParentLoop(), PreLoop.Map);
+    formLCSSARecursively(*L, DT, &LI, &SE);
+    simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+    // Pre loops are slow paths, we do not need to perform any loop
+    // optimizations on them.
+    DisableAllLoopOptsOnLoop(*L);
+  }
+
+  if (!PostLoop.Blocks.empty()) {
+    auto *L = createClonedLoopStructure(
+        &OriginalLoop, OriginalLoop.getParentLoop(), PostLoop.Map);
+    formLCSSARecursively(*L, DT, &LI, &SE);
+    simplifyLoop(L, &DT, &LI, &SE, nullptr, true);
+    // Post loops are slow paths, we do not need to perform any loop
+    // optimizations on them.
+    DisableAllLoopOptsOnLoop(*L);
+  }
+
+  formLCSSARecursively(OriginalLoop, DT, &LI, &SE);
+  simplifyLoop(&OriginalLoop, &DT, &LI, &SE, nullptr, true);
 
   return true;
 }
@@ -1439,8 +1525,9 @@ bool InductiveRangeCheckElimination::runOnLoop(Loop *L, LPPassManager &LPM) {
   if (!SafeIterRange.hasValue())
     return false;
 
-  LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LS,
-                     SE, SafeIterRange.getValue());
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  LoopConstrainer LC(*L, getAnalysis<LoopInfoWrapperPass>().getLoopInfo(), LPM,
+                     LS, SE, DT, SafeIterRange.getValue());
   bool Changed = LC.run();
 
   if (Changed) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
index 55ffc23e1308..1870c3deb4f3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/JumpThreading.cpp
@@ -134,7 +134,7 @@ bool JumpThreading::runOnFunction(Function &F) {
 }
 
 PreservedAnalyses JumpThreadingPass::run(Function &F,
-                                         AnalysisManager<Function> &AM) {
+                                         FunctionAnalysisManager &AM) {
 
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &LVI = AM.getResult<LazyValueAnalysis>(F);
@@ -951,12 +951,17 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   // Scan a few instructions up from the load, to see if it is obviously live at
   // the entry to its block.
   BasicBlock::iterator BBIt(LI);
-
+  bool IsLoadCSE;
   if (Value *AvailableVal =
-        FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan)) {
+        FindAvailableLoadedValue(LI, LoadBB, BBIt, DefMaxInstsToScan, nullptr, &IsLoadCSE)) {
     // If the value of the load is locally available within the block, just use
     // it.  This frequently occurs for reg2mem'd allocas.
 
+    if (IsLoadCSE) {
+      LoadInst *NLI = cast<LoadInst>(AvailableVal);
+      combineMetadataForCSE(NLI, LI);
+    };
+
     // If the returned value is the load itself, replace with an undef. This can
     // only happen in dead loops.
     if (AvailableVal == LI) AvailableVal = UndefValue::get(LI->getType());
@@ -983,6 +988,7 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
   typedef SmallVector<std::pair<BasicBlock*, Value*>, 8> AvailablePredsTy;
   AvailablePredsTy AvailablePreds;
   BasicBlock *OneUnavailablePred = nullptr;
+  SmallVector<LoadInst*, 8> CSELoads;
 
   // If we got here, the loaded value is transparent through to the start of the
   // block.  Check to see if it is available in any of the predecessor blocks.
@@ -993,17 +999,17 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
 
     // Scan the predecessor to see if the value is available in the pred.
     BBIt = PredBB->end();
-    AAMDNodes ThisAATags;
     Value *PredAvailable = FindAvailableLoadedValue(LI, PredBB, BBIt,
                                                     DefMaxInstsToScan,
-                                                    nullptr, &ThisAATags);
+                                                    nullptr,
+                                                    &IsLoadCSE);
     if (!PredAvailable) {
       OneUnavailablePred = PredBB;
       continue;
     }
 
-    // If AA tags disagree or are not present, forget about them.
-    if (AATags != ThisAATags) AATags = AAMDNodes();
+    if (IsLoadCSE)
+      CSELoads.push_back(cast<LoadInst>(PredAvailable));
 
     // If so, this load is partially redundant.  Remember this info so that we
     // can create a PHI node.
@@ -1101,6 +1107,10 @@ bool JumpThreadingPass::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
     PN->addIncoming(PredV, I->first);
   }
 
+  for (LoadInst *PredLI : CSELoads) {
+    combineMetadataForCSE(PredLI, LI);
+  }
+
   LI->replaceAllUsesWith(PN);
   LI->eraseFromParent();
 
@@ -1157,8 +1167,7 @@ FindMostPopularDest(BasicBlock *BB,
     for (unsigned i = 0; ; ++i) {
       assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
 
-      if (std::find(SamePopularity.begin(), SamePopularity.end(),
-                    TI->getSuccessor(i)) == SamePopularity.end())
+      if (!is_contained(SamePopularity, TI->getSuccessor(i)))
         continue;
 
       MostPopularDest = TI->getSuccessor(i);
@@ -1594,7 +1603,7 @@ bool JumpThreadingPass::ThreadEdge(BasicBlock *BB,
 }
 
 /// Create a new basic block that will be the predecessor of BB and successor of
-/// all blocks in Preds. When profile data is availble, update the frequency of
+/// all blocks in Preds. When profile data is available, update the frequency of
 /// this new block.
 BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
                                                ArrayRef<BasicBlock *> Preds,
@@ -1615,6 +1624,23 @@ BasicBlock *JumpThreadingPass::SplitBlockPreds(BasicBlock *BB,
   return PredBB;
 }
 
+bool JumpThreadingPass::doesBlockHaveProfileData(BasicBlock *BB) {
+  const TerminatorInst *TI = BB->getTerminator();
+  assert(TI->getNumSuccessors() > 1 && "not a split");
+
+  MDNode *WeightsNode = TI->getMetadata(LLVMContext::MD_prof);
+  if (!WeightsNode)
+    return false;
+
+  MDString *MDName = cast<MDString>(WeightsNode->getOperand(0));
+  if (MDName->getString() != "branch_weights")
+    return false;
+
+  // Ensure there are weights for all of the successors. Note that the first
+  // operand to the metadata node is a name, not a weight.
+  return WeightsNode->getNumOperands() == TI->getNumSuccessors() + 1;
+}
+
 /// Update the block frequency of BB and branch weight and the metadata on the
 /// edge BB->SuccBB. This is done by scaling the weight of BB->SuccBB by 1 -
 /// Freq(PredBB->BB) / Freq(BB->SuccBB).
@@ -1665,7 +1691,41 @@ void JumpThreadingPass::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
   for (int I = 0, E = BBSuccProbs.size(); I < E; I++)
     BPI->setEdgeProbability(BB, I, BBSuccProbs[I]);
 
-  if (BBSuccProbs.size() >= 2) {
+  // Update the profile metadata as well.
+  //
+  // Don't do this if the profile of the transformed blocks was statically
+  // estimated.  (This could occur despite the function having an entry
+  // frequency in completely cold parts of the CFG.)
+  //
+  // In this case we don't want to suggest to subsequent passes that the
+  // calculated weights are fully consistent.  Consider this graph:
+  //
+  //                 check_1
+  //             50% /  |
+  //             eq_1   | 50%
+  //                 \  |
+  //                 check_2
+  //             50% /  |
+  //             eq_2   | 50%
+  //                 \  |
+  //                 check_3
+  //             50% /  |
+  //             eq_3   | 50%
+  //                 \  |
+  //
+  // Assuming the blocks check_* all compare the same value against 1, 2 and 3,
+  // the overall probabilities are inconsistent; the total probability that the
+  // value is either 1, 2 or 3 is 150%.
+  //
+  // As a consequence if we thread eq_1 -> check_2 to check_3, check_2->check_3
+  // becomes 0%.  This is even worse if the edge whose probability becomes 0% is
+  // the loop exit edge.  Then based solely on static estimation we would assume
+  // the loop was extremely hot.
+  //
+  // FIXME this locally as well so that BPI and BFI are consistent as well.  We
+  // shouldn't make edges extremely likely or unlikely based solely on static
+  // estimation.
+  if (BBSuccProbs.size() >= 2 && doesBlockHaveProfileData(BB)) {
     SmallVector<uint32_t, 4> Weights;
     for (auto Prob : BBSuccProbs)
       Weights.push_back(Prob.getNumerator());
diff --git a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
index cdd17fc516a8..1cc5c8f0da84 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -100,10 +100,6 @@ static Instruction *
 CloneInstructionInExitBlock(Instruction &I, BasicBlock &ExitBlock, PHINode &PN,
                             const LoopInfo *LI,
                             const LoopSafetyInfo *SafetyInfo);
-static bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA,
-                               DominatorTree *DT, TargetLibraryInfo *TLI,
-                               Loop *CurLoop, AliasSetTracker *CurAST,
-                               LoopSafetyInfo *SafetyInfo);
 
 namespace {
 struct LoopInvariantCodeMotion {
@@ -128,8 +124,15 @@ struct LegacyLICMPass : public LoopPass {
   }
 
   bool runOnLoop(Loop *L, LPPassManager &LPM) override {
-    if (skipLoop(L))
+    if (skipLoop(L)) {
+      // If we have run LICM on a previous loop but now we are skipping
+      // (because we've hit the opt-bisect limit), we need to clear the
+      // loop alias information.
+      for (auto &LTAS : LICM.getLoopToAliasSetMap())
+        delete LTAS.second;
+      LICM.getLoopToAliasSetMap().clear();
       return false;
+    }
 
     auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
     return LICM.runOnLoop(L,
@@ -173,7 +176,7 @@ private:
 };
 }
 
-PreservedAnalyses LICMPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses LICMPass::run(Loop &L, LoopAnalysisManager &AM) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
@@ -247,24 +250,47 @@ bool LoopInvariantCodeMotion::runOnLoop(Loop *L, AliasAnalysis *AA,
 
   // Now that all loop invariants have been removed from the loop, promote any
   // memory references to scalars that we can.
-  if (!DisablePromotion && (Preheader || L->hasDedicatedExits())) {
+  // Don't sink stores from loops without dedicated block exits. Exits
+  // containing indirect branches are not transformed by loop simplify,
+  // make sure we catch that. An additional load may be generated in the
+  // preheader for SSA updater, so also avoid sinking when no preheader
+  // is available.
+  if (!DisablePromotion && Preheader && L->hasDedicatedExits()) {
+    // Figure out the loop exits and their insertion points
     SmallVector<BasicBlock *, 8> ExitBlocks;
-    SmallVector<Instruction *, 8> InsertPts;
-    PredIteratorCache PIC;
-
-    // Loop over all of the alias sets in the tracker object.
-    for (AliasSet &AS : *CurAST)
-      Changed |= promoteLoopAccessesToScalars(
-          AS, ExitBlocks, InsertPts, PIC, LI, DT, TLI, L, CurAST, &SafetyInfo);
-
-    // Once we have promoted values across the loop body we have to recursively
-    // reform LCSSA as any nested loop may now have values defined within the
-    // loop used in the outer loop.
-    // FIXME: This is really heavy handed. It would be a bit better to use an
-    // SSAUpdater strategy during promotion that was LCSSA aware and reformed
-    // it as it went.
-    if (Changed) {
-      formLCSSARecursively(*L, *DT, LI, SE);
+    L->getUniqueExitBlocks(ExitBlocks);
+
+    // We can't insert into a catchswitch.
+    bool HasCatchSwitch = llvm::any_of(ExitBlocks, [](BasicBlock *Exit) {
+      return isa<CatchSwitchInst>(Exit->getTerminator());
+    });
+
+    if (!HasCatchSwitch) {
+      SmallVector<Instruction *, 8> InsertPts;
+      InsertPts.reserve(ExitBlocks.size());
+      for (BasicBlock *ExitBlock : ExitBlocks)
+        InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+
+      PredIteratorCache PIC;
+
+      bool Promoted = false;
+
+      // Loop over all of the alias sets in the tracker object.
+      for (AliasSet &AS : *CurAST)
+        Promoted |=
+            promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts, PIC, LI, DT,
+                                         TLI, L, CurAST, &SafetyInfo);
+
+      // Once we have promoted values across the loop body we have to
+      // recursively reform LCSSA as any nested loop may now have values defined
+      // within the loop used in the outer loop.
+      // FIXME: This is really heavy handed. It would be a bit better to use an
+      // SSAUpdater strategy during promotion that was LCSSA aware and reformed
+      // it as it went.
+      if (Promoted)
+        formLCSSARecursively(*L, *DT, LI, SE);
+
+      Changed |= Promoted;
     }
   }
 
@@ -337,7 +363,7 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
     // operands of the instruction are loop invariant.
     //
     if (isNotUsedInLoop(I, CurLoop, SafetyInfo) &&
-        canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo)) {
+        canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo)) {
       ++II;
       Changed |= sink(I, LI, DT, CurLoop, CurAST, SafetyInfo);
     }
@@ -390,7 +416,7 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
       // is safe to hoist the instruction.
       //
       if (CurLoop->hasLoopInvariantOperands(&I) &&
-          canSinkOrHoistInst(I, AA, DT, TLI, CurLoop, CurAST, SafetyInfo) &&
+          canSinkOrHoistInst(I, AA, DT, CurLoop, CurAST, SafetyInfo) &&
           isSafeToExecuteUnconditionally(
               I, DT, CurLoop, SafetyInfo,
               CurLoop->getLoopPreheader()->getTerminator()))
@@ -436,12 +462,9 @@ void llvm::computeLoopSafetyInfo(LoopSafetyInfo *SafetyInfo, Loop *CurLoop) {
         SafetyInfo->BlockColors = colorEHFunclets(*Fn);
 }
 
-/// canSinkOrHoistInst - Return true if the hoister and sinker can handle this
-/// instruction.
-///
-bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
-                        TargetLibraryInfo *TLI, Loop *CurLoop,
-                        AliasSetTracker *CurAST, LoopSafetyInfo *SafetyInfo) {
+bool llvm::canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
+                              Loop *CurLoop, AliasSetTracker *CurAST,
+                              LoopSafetyInfo *SafetyInfo) {
   // Loads have extra constraints we have to verify before we can hoist them.
   if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
     if (!LI->isUnordered())
@@ -515,6 +538,11 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
       !isa<InsertValueInst>(I))
     return false;
 
+  // SafetyInfo is nullptr if we are checking for sinking from preheader to
+  // loop body. It will be always safe as there is no speculative execution.
+  if (!SafetyInfo)
+    return true;
+
   // TODO: Plumb the context instruction through to make hoisting and sinking
   // more powerful. Hoisting of loads already works due to the special casing
   // above.
@@ -876,23 +904,24 @@ bool llvm::promoteLoopAccessesToScalars(
   // is not safe, because *P may only be valid to access if 'c' is true.
   //
   // The safety property divides into two parts:
-  // 1) The memory may not be dereferenceable on entry to the loop.  In this
+  // p1) The memory may not be dereferenceable on entry to the loop.  In this
   //    case, we can't insert the required load in the preheader.
-  // 2) The memory model does not allow us to insert a store along any dynamic
+  // p2) The memory model does not allow us to insert a store along any dynamic
   //    path which did not originally have one.
   //
-  // It is safe to promote P if all uses are direct load/stores and if at
-  // least one is guaranteed to be executed.
-  bool GuaranteedToExecute = false;
-
-  // It is also safe to promote P if we can prove that speculating a load into
-  // the preheader is safe (i.e. proving dereferenceability on all
-  // paths through the loop), and that the memory can be proven thread local
-  // (so that the memory model requirement doesn't apply.)  We first establish
-  // the former, and then run a capture analysis below to establish the later.
-  // We can use any access within the alias set to prove dereferenceability
+  // If at least one store is guaranteed to execute, both properties are
+  // satisfied, and promotion is legal.
+  // This, however, is not a necessary condition. Even if no store/load is
+  // guaranteed to execute, we can still establish these properties:
+  // (p1) by proving that hoisting the load into the preheader is
+  // safe (i.e. proving dereferenceability on all paths through the loop). We
+  // can use any access within the alias set to prove dereferenceability,
   // since they're all must alias.
-  bool CanSpeculateLoad = false;
+  // (p2) by proving the memory is thread-local, so the memory model
+  // requirement does not apply, and stores are safe to insert.
+
+  bool DereferenceableInPH = false;
+  bool SafeToInsertStore = false;
 
   SmallVector<Instruction *, 64> LoopUses;
   SmallPtrSet<Value *, 4> PointerMustAliases;
@@ -901,15 +930,6 @@ bool llvm::promoteLoopAccessesToScalars(
   // us to prove better alignment.
   unsigned Alignment = 1;
   AAMDNodes AATags;
-  bool HasDedicatedExits = CurLoop->hasDedicatedExits();
-
-  // Don't sink stores from loops without dedicated block exits. Exits
-  // containing indirect branches are not transformed by loop simplify,
-  // make sure we catch that. An additional load may be generated in the
-  // preheader for SSA updater, so also avoid sinking when no preheader
-  // is available.
-  if (!HasDedicatedExits || !Preheader)
-    return false;
 
   const DataLayout &MDL = Preheader->getModule()->getDataLayout();
 
@@ -926,7 +946,6 @@ bool llvm::promoteLoopAccessesToScalars(
   // Check that all of the pointers in the alias set have the same type.  We
   // cannot (yet) promote a memory location that is loaded and stored in
   // different sizes.  While we are at it, collect alignment and AA info.
-  bool Changed = false;
   for (const auto &ASI : AS) {
     Value *ASIV = ASI.getValue();
     PointerMustAliases.insert(ASIV);
@@ -935,7 +954,7 @@ bool llvm::promoteLoopAccessesToScalars(
     // cannot (yet) promote a memory location that is loaded and stored in
     // different sizes.
     if (SomePtr->getType() != ASIV->getType())
-      return Changed;
+      return false;
 
     for (User *U : ASIV->users()) {
       // Ignore instructions that are outside the loop.
@@ -948,10 +967,10 @@ bool llvm::promoteLoopAccessesToScalars(
       if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) {
         assert(!Load->isVolatile() && "AST broken");
         if (!Load->isSimple())
-          return Changed;
+          return false;
 
-        if (!GuaranteedToExecute && !CanSpeculateLoad)
-          CanSpeculateLoad = isSafeToExecuteUnconditionally(
+        if (!DereferenceableInPH)
+          DereferenceableInPH = isSafeToExecuteUnconditionally(
               *Load, DT, CurLoop, SafetyInfo, Preheader->getTerminator());
       } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
         // Stores *of* the pointer are not interesting, only stores *to* the
@@ -960,35 +979,36 @@ bool llvm::promoteLoopAccessesToScalars(
           continue;
         assert(!Store->isVolatile() && "AST broken");
         if (!Store->isSimple())
-          return Changed;
-
-        // Note that we only check GuaranteedToExecute inside the store case
-        // so that we do not introduce stores where they did not exist before
-        // (which would break the LLVM concurrency model).
+          return false;
 
-        // If the alignment of this instruction allows us to specify a more
-        // restrictive (and performant) alignment and if we are sure this
-        // instruction will be executed, update the alignment.
-        // Larger is better, with the exception of 0 being the best alignment.
+        // If the store is guaranteed to execute, both properties are satisfied.
+        // We may want to check if a store is guaranteed to execute even if we
+        // already know that promotion is safe, since it may have higher
+        // alignment than any other guaranteed stores, in which case we can
+        // raise the alignment on the promoted store.
         unsigned InstAlignment = Store->getAlignment();
-        if ((InstAlignment > Alignment || InstAlignment == 0) &&
-            Alignment != 0) {
+        if (!InstAlignment)
+          InstAlignment =
+              MDL.getABITypeAlignment(Store->getValueOperand()->getType());
+
+        if (!DereferenceableInPH || !SafeToInsertStore ||
+            (InstAlignment > Alignment)) {
           if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
-            GuaranteedToExecute = true;
-            Alignment = InstAlignment;
+            DereferenceableInPH = true;
+            SafeToInsertStore = true;
+            Alignment = std::max(Alignment, InstAlignment);
           }
-        } else if (!GuaranteedToExecute) {
-          GuaranteedToExecute =
-              isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo);
         }
 
-        if (!GuaranteedToExecute && !CanSpeculateLoad) {
-          CanSpeculateLoad = isDereferenceableAndAlignedPointer(
+        // If the store is not guaranteed to execute, we may still get
+        // deref info through it.
+        if (!DereferenceableInPH) {
+          DereferenceableInPH = isDereferenceableAndAlignedPointer(
               Store->getPointerOperand(), Store->getAlignment(), MDL,
               Preheader->getTerminator(), DT);
         }
       } else
-        return Changed; // Not a load or store.
+        return false; // Not a load or store.
 
       // Merge the AA tags.
       if (LoopUses.empty()) {
@@ -1002,38 +1022,29 @@ bool llvm::promoteLoopAccessesToScalars(
     }
   }
 
-  // Check legality per comment above. Otherwise, we can't promote.
-  bool PromotionIsLegal = GuaranteedToExecute;
-  if (!PromotionIsLegal && CanSpeculateLoad) {
-    // If this is a thread local location, then we can insert stores along
-    // paths which originally didn't have them without violating the memory
-    // model.
-    Value *Object = GetUnderlyingObject(SomePtr, MDL);
-    PromotionIsLegal =
-        isAllocLikeFn(Object, TLI) && !PointerMayBeCaptured(Object, true, true);
-  }
-  if (!PromotionIsLegal)
-    return Changed;
 
-  // Figure out the loop exits and their insertion points, if this is the
-  // first promotion.
-  if (ExitBlocks.empty()) {
-    CurLoop->getUniqueExitBlocks(ExitBlocks);
-    InsertPts.clear();
-    InsertPts.reserve(ExitBlocks.size());
-    for (BasicBlock *ExitBlock : ExitBlocks)
-      InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());
+  // If we couldn't prove we can hoist the load, bail.
+  if (!DereferenceableInPH)
+    return false;
+
+  // We know we can hoist the load, but don't have a guaranteed store.
+  // Check whether the location is thread-local. If it is, then we can insert
+  // stores along paths which originally didn't have them without violating the
+  // memory model.
+  if (!SafeToInsertStore) {
+    Value *Object = GetUnderlyingObject(SomePtr, MDL);
+    SafeToInsertStore =
+        (isAllocLikeFn(Object, TLI) || isa<AllocaInst>(Object)) &&
+        !PointerMayBeCaptured(Object, true, true);
   }
 
-  // Can't insert into a catchswitch.
-  for (BasicBlock *ExitBlock : ExitBlocks)
-    if (isa<CatchSwitchInst>(ExitBlock->getTerminator()))
-      return Changed;
+  // If we've still failed to prove we can sink the store, give up.
+  if (!SafeToInsertStore)
+    return false;
 
   // Otherwise, this is safe to promote, lets do it!
   DEBUG(dbgs() << "LICM: Promoting value stored to in loop: " << *SomePtr
                << '\n');
-  Changed = true;
   ++NumPromoted;
 
   // Grab a debug location for the inserted loads/stores; given that the
@@ -1066,13 +1077,13 @@ bool llvm::promoteLoopAccessesToScalars(
   if (PreheaderLoad->use_empty())
     PreheaderLoad->eraseFromParent();
 
-  return Changed;
+  return true;
 }
 
 /// Returns an owning pointer to an alias set which incorporates aliasing info
 /// from L and all subloops of L.
-/// FIXME: In new pass manager, there is no helper functions to handle loop
-/// analysis such as cloneBasicBlockAnalysis. So the AST needs to be recompute
+/// FIXME: In new pass manager, there is no helper function to handle loop
+/// analysis such as cloneBasicBlockAnalysis, so the AST needs to be recomputed
 /// from scratch for every loop. Hook up with the helper functions when
 /// available in the new pass manager to avoid redundant computation.
 AliasSetTracker *
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
index dfe51a4ce44c..389f1c595aa4 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoadCombine.cpp
@@ -44,9 +44,6 @@ struct PointerOffsetPair {
 };
 
 struct LoadPOPPair {
-  LoadPOPPair() = default;
-  LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O)
-      : Load(L), POP(P), InsertOrder(O) {}
   LoadInst *Load;
   PointerOffsetPair POP;
   /// \brief The new load needs to be created before the first load in IR order.
@@ -71,7 +68,7 @@ public:
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
 
-  const char *getPassName() const override { return LDCOMBINE_NAME; }
+  StringRef getPassName() const override { return LDCOMBINE_NAME; }
   static char ID;
 
   typedef IRBuilder<TargetFolder> BuilderTy;
@@ -264,7 +261,7 @@ bool LoadCombine::runOnBasicBlock(BasicBlock &BB) {
     auto POP = getPointerOffsetPair(*LI);
     if (!POP.Pointer)
       continue;
-    LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++));
+    LoadMap[POP.Pointer].push_back({LI, std::move(POP), Index++});
     AST.add(LI);
   }
   if (combineLoads(LoadMap))
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
index 66b59d27dfde..d09af32a99fd 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDataPrefetch.cpp
@@ -11,14 +11,16 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
+
 #define DEBUG_TYPE "loop-data-prefetch"
-#include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
@@ -26,13 +28,13 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
@@ -59,77 +61,89 @@ static cl::opt<unsigned> MaxPrefetchIterationsAhead(
 
 STATISTIC(NumPrefetches, "Number of prefetches inserted");
 
-namespace llvm {
-  void initializeLoopDataPrefetchPass(PassRegistry&);
-}
-
 namespace {
 
-  class LoopDataPrefetch : public FunctionPass {
-  public:
-    static char ID; // Pass ID, replacement for typeid
-    LoopDataPrefetch() : FunctionPass(ID) {
-      initializeLoopDataPrefetchPass(*PassRegistry::getPassRegistry());
-    }
+/// Loop prefetch implementation class.
+class LoopDataPrefetch {
+public:
+  LoopDataPrefetch(AssumptionCache *AC, LoopInfo *LI, ScalarEvolution *SE,
+                   const TargetTransformInfo *TTI,
+                   OptimizationRemarkEmitter *ORE)
+      : AC(AC), LI(LI), SE(SE), TTI(TTI), ORE(ORE) {}
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<AssumptionCacheTracker>();
-      AU.addPreserved<DominatorTreeWrapperPass>();
-      AU.addRequired<LoopInfoWrapperPass>();
-      AU.addPreserved<LoopInfoWrapperPass>();
-      AU.addRequired<ScalarEvolutionWrapperPass>();
-      // FIXME: For some reason, preserving SE here breaks LSR (even if
-      // this pass changes nothing).
-      // AU.addPreserved<ScalarEvolutionWrapperPass>();
-      AU.addRequired<TargetTransformInfoWrapperPass>();
-    }
+  bool run();
 
-    bool runOnFunction(Function &F) override;
+private:
+  bool runOnLoop(Loop *L);
 
-  private:
-    bool runOnLoop(Loop *L);
+  /// \brief Check if the the stride of the accesses is large enough to
+  /// warrant a prefetch.
+  bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
 
-    /// \brief Check if the the stride of the accesses is large enough to
-    /// warrant a prefetch.
-    bool isStrideLargeEnough(const SCEVAddRecExpr *AR);
+  unsigned getMinPrefetchStride() {
+    if (MinPrefetchStride.getNumOccurrences() > 0)
+      return MinPrefetchStride;
+    return TTI->getMinPrefetchStride();
+  }
 
-    unsigned getMinPrefetchStride() {
-      if (MinPrefetchStride.getNumOccurrences() > 0)
-        return MinPrefetchStride;
-      return TTI->getMinPrefetchStride();
-    }
+  unsigned getPrefetchDistance() {
+    if (PrefetchDistance.getNumOccurrences() > 0)
+      return PrefetchDistance;
+    return TTI->getPrefetchDistance();
+  }
 
-    unsigned getPrefetchDistance() {
-      if (PrefetchDistance.getNumOccurrences() > 0)
-        return PrefetchDistance;
-      return TTI->getPrefetchDistance();
-    }
+  unsigned getMaxPrefetchIterationsAhead() {
+    if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
+      return MaxPrefetchIterationsAhead;
+    return TTI->getMaxPrefetchIterationsAhead();
+  }
 
-    unsigned getMaxPrefetchIterationsAhead() {
-      if (MaxPrefetchIterationsAhead.getNumOccurrences() > 0)
-        return MaxPrefetchIterationsAhead;
-      return TTI->getMaxPrefetchIterationsAhead();
-    }
+  AssumptionCache *AC;
+  LoopInfo *LI;
+  ScalarEvolution *SE;
+  const TargetTransformInfo *TTI;
+  OptimizationRemarkEmitter *ORE;
+};
+
+/// Legacy class for inserting loop data prefetches.
+class LoopDataPrefetchLegacyPass : public FunctionPass {
+public:
+  static char ID; // Pass ID, replacement for typeid
+  LoopDataPrefetchLegacyPass() : FunctionPass(ID) {
+    initializeLoopDataPrefetchLegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addRequired<ScalarEvolutionWrapperPass>();
+    // FIXME: For some reason, preserving SE here breaks LSR (even if
+    // this pass changes nothing).
+    // AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+  }
 
-    AssumptionCache *AC;
-    LoopInfo *LI;
-    ScalarEvolution *SE;
-    const TargetTransformInfo *TTI;
-    const DataLayout *DL;
+  bool runOnFunction(Function &F) override;
   };
 }
 
-char LoopDataPrefetch::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopDataPrefetch, "loop-data-prefetch",
+char LoopDataPrefetchLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
                       "Loop Data Prefetch", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopDataPrefetch, "loop-data-prefetch",
+INITIALIZE_PASS_END(LoopDataPrefetchLegacyPass, "loop-data-prefetch",
                     "Loop Data Prefetch", false, false)
 
-FunctionPass *llvm::createLoopDataPrefetchPass() { return new LoopDataPrefetch(); }
+FunctionPass *llvm::createLoopDataPrefetchPass() {
+  return new LoopDataPrefetchLegacyPass();
+}
 
 bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
   unsigned TargetMinStride = getMinPrefetchStride();
@@ -147,16 +161,46 @@ bool LoopDataPrefetch::isStrideLargeEnough(const SCEVAddRecExpr *AR) {
   return TargetMinStride <= AbsStride;
 }
 
-bool LoopDataPrefetch::runOnFunction(Function &F) {
+PreservedAnalyses LoopDataPrefetchPass::run(Function &F,
+                                            FunctionAnalysisManager &AM) {
+  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
+  ScalarEvolution *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+  AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+  OptimizationRemarkEmitter *ORE =
+      &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+  const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
+  bool Changed = LDP.run();
+
+  if (Changed) {
+    PreservedAnalyses PA;
+    PA.preserve<DominatorTreeAnalysis>();
+    PA.preserve<LoopAnalysis>();
+    return PA;
+  }
+
+  return PreservedAnalyses::all();
+}
+
+bool LoopDataPrefetchLegacyPass::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  DL = &F.getParent()->getDataLayout();
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  AssumptionCache *AC =
+      &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  OptimizationRemarkEmitter *ORE =
+      &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+  const TargetTransformInfo *TTI =
+      &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  LoopDataPrefetch LDP(AC, LI, SE, TTI, ORE);
+  return LDP.run();
+}
 
+bool LoopDataPrefetch::run() {
   // If PrefetchDistance is not set, don't run the pass.  This gives an
   // opportunity for targets to run this pass for selected subtargets only
   // (whose TTI sets PrefetchDistance).
@@ -185,19 +229,16 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
 
   // Calculate the number of iterations ahead to prefetch
   CodeMetrics Metrics;
-  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
-       I != IE; ++I) {
-
+  for (const auto BB : L->blocks()) {
     // If the loop already has prefetches, then assume that the user knows
     // what they are doing and don't add any more.
-    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
-         J != JE; ++J)
-      if (CallInst *CI = dyn_cast<CallInst>(J))
+    for (auto &I : *BB)
+      if (CallInst *CI = dyn_cast<CallInst>(&I))
         if (Function *F = CI->getCalledFunction())
           if (F->getIntrinsicID() == Intrinsic::prefetch)
             return MadeChange;
 
-    Metrics.analyzeBasicBlock(*I, *TTI, EphValues);
+    Metrics.analyzeBasicBlock(BB, *TTI, EphValues);
   }
   unsigned LoopSize = Metrics.NumInsts;
   if (!LoopSize)
@@ -210,23 +251,20 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
   if (ItersAhead > getMaxPrefetchIterationsAhead())
     return MadeChange;
 
-  Function *F = L->getHeader()->getParent();
   DEBUG(dbgs() << "Prefetching " << ItersAhead
                << " iterations ahead (loop size: " << LoopSize << ") in "
-               << F->getName() << ": " << *L);
+               << L->getHeader()->getParent()->getName() << ": " << *L);
 
   SmallVector<std::pair<Instruction *, const SCEVAddRecExpr *>, 16> PrefLoads;
-  for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
-       I != IE; ++I) {
-    for (BasicBlock::iterator J = (*I)->begin(), JE = (*I)->end();
-        J != JE; ++J) {
+  for (const auto BB : L->blocks()) {
+    for (auto &I : *BB) {
       Value *PtrValue;
       Instruction *MemI;
 
-      if (LoadInst *LMemI = dyn_cast<LoadInst>(J)) {
+      if (LoadInst *LMemI = dyn_cast<LoadInst>(&I)) {
         MemI = LMemI;
         PtrValue = LMemI->getPointerOperand();
-      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(J)) {
+      } else if (StoreInst *SMemI = dyn_cast<StoreInst>(&I)) {
         if (!PrefetchWrites) continue;
         MemI = SMemI;
         PtrValue = SMemI->getPointerOperand();
@@ -275,13 +313,13 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
 
       PrefLoads.push_back(std::make_pair(MemI, LSCEVAddRec));
 
-      Type *I8Ptr = Type::getInt8PtrTy((*I)->getContext(), PtrAddrSpace);
-      SCEVExpander SCEVE(*SE, J->getModule()->getDataLayout(), "prefaddr");
+      Type *I8Ptr = Type::getInt8PtrTy(BB->getContext(), PtrAddrSpace);
+      SCEVExpander SCEVE(*SE, I.getModule()->getDataLayout(), "prefaddr");
       Value *PrefPtrValue = SCEVE.expandCodeFor(NextLSCEV, I8Ptr, MemI);
 
       IRBuilder<> Builder(MemI);
-      Module *M = (*I)->getParent()->getParent();
-      Type *I32 = Type::getInt32Ty((*I)->getContext());
+      Module *M = BB->getParent()->getParent();
+      Type *I32 = Type::getInt32Ty(BB->getContext());
       Value *PrefetchFunc = Intrinsic::getDeclaration(M, Intrinsic::prefetch);
       Builder.CreateCall(
           PrefetchFunc,
@@ -291,9 +329,8 @@ bool LoopDataPrefetch::runOnLoop(Loop *L) {
       ++NumPrefetches;
       DEBUG(dbgs() << "  Access: " << *PtrValue << ", SCEV: " << *LSCEV
                    << "\n");
-      emitOptimizationRemark(F->getContext(), DEBUG_TYPE, *F,
-                             MemI->getDebugLoc(), "prefetched memory access");
-
+      ORE->emit(OptimizationRemark(DEBUG_TYPE, "Prefetched", MemI)
+                << "prefetched memory access");
 
       MadeChange = true;
     }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
index 19b2f89555c2..187e6e3073c7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -215,7 +215,7 @@ bool LoopDeletionPass::runImpl(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
   return Changed;
 }
 
-PreservedAnalyses LoopDeletionPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses LoopDeletionPass::run(Loop &L, LoopAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 7eca28ed2bb7..b2b2f72aa83d 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -28,6 +28,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPassManager.h"
@@ -72,11 +73,10 @@ static cl::opt<unsigned> PragmaDistributeSCEVCheckThreshold(
         "The maximum number of SCEV checks allowed for Loop "
         "Distribution for loop marked with #pragma loop distribute(enable)"));
 
-// Note that the initial value for this depends on whether the pass is invoked
-// directly or from the optimization pipeline.
 static cl::opt<bool> EnableLoopDistribute(
     "enable-loop-distribute", cl::Hidden,
-    cl::desc("Enable the new, experimental LoopDistribution Pass"));
+    cl::desc("Enable the new, experimental LoopDistribution Pass"),
+    cl::init(false));
 
 STATISTIC(NumLoopsDistributed, "Number of loops distributed");
 
@@ -605,11 +605,13 @@ public:
     DEBUG(dbgs() << "\nLDist: In \"" << L->getHeader()->getParent()->getName()
                  << "\" checking " << *L << "\n");
 
-    BasicBlock *PH = L->getLoopPreheader();
-    if (!PH)
-      return fail("no preheader");
     if (!L->getExitBlock())
-      return fail("multiple exit blocks");
+      return fail("MultipleExitBlocks", "multiple exit blocks");
+    if (!L->isLoopSimplifyForm())
+      return fail("NotLoopSimplifyForm",
+                  "loop is not in loop-simplify form");
+
+    BasicBlock *PH = L->getLoopPreheader();
 
     // LAA will check that we only have a single exiting block.
     LAI = &GetLAA(*L);
@@ -617,11 +619,12 @@ public:
     // Currently, we only distribute to isolate the part of the loop with
     // dependence cycles to enable partial vectorization.
     if (LAI->canVectorizeMemory())
-      return fail("memory operations are safe for vectorization");
+      return fail("MemOpsCanBeVectorized",
+                  "memory operations are safe for vectorization");
 
     auto *Dependences = LAI->getDepChecker().getDependences();
     if (!Dependences || Dependences->empty())
-      return fail("no unsafe dependences to isolate");
+      return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
 
     InstPartitionContainer Partitions(L, LI, DT);
 
@@ -674,14 +677,16 @@ public:
 
     DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
     if (Partitions.getSize() < 2)
-      return fail("cannot isolate unsafe dependencies");
+      return fail("CantIsolateUnsafeDeps",
+                  "cannot isolate unsafe dependencies");
 
     // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
     // should be able to vectorize these together.
     Partitions.mergeBeforePopulating();
     DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
     if (Partitions.getSize() < 2)
-      return fail("cannot isolate unsafe dependencies");
+      return fail("CantIsolateUnsafeDeps",
+                  "cannot isolate unsafe dependencies");
 
     // Now, populate the partitions with non-memory operations.
     Partitions.populateUsedSet();
@@ -693,7 +698,8 @@ public:
       DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
                    << Partitions);
       if (Partitions.getSize() < 2)
-        return fail("cannot isolate unsafe dependencies");
+        return fail("CantIsolateUnsafeDeps",
+                    "cannot isolate unsafe dependencies");
     }
 
     // Don't distribute the loop if we need too many SCEV run-time checks.
@@ -701,7 +707,8 @@ public:
     if (Pred.getComplexity() > (IsForced.getValueOr(false)
                                     ? PragmaDistributeSCEVCheckThreshold
                                     : DistributeSCEVCheckThreshold))
-      return fail("too many SCEV run-time checks needed.\n");
+      return fail("TooManySCEVRuntimeChecks",
+                  "too many SCEV run-time checks needed.\n");
 
     DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
     // We're done forming the partitions set up the reverse mapping from
@@ -742,36 +749,38 @@ public:
     DEBUG(Partitions.printBlocks());
 
     if (LDistVerify) {
-      LI->verify();
+      LI->verify(*DT);
       DT->verifyDomTree();
     }
 
     ++NumLoopsDistributed;
     // Report the success.
-    emitOptimizationRemark(F->getContext(), LDIST_NAME, *F, L->getStartLoc(),
-                           "distributed loop");
+    ORE->emit(OptimizationRemark(LDIST_NAME, "Distribute", L->getStartLoc(),
+                                 L->getHeader())
+              << "distributed loop");
     return true;
   }
 
   /// \brief Provide diagnostics then \return with false.
-  bool fail(llvm::StringRef Message) {
+  bool fail(StringRef RemarkName, StringRef Message) {
     LLVMContext &Ctx = F->getContext();
     bool Forced = isForced().getValueOr(false);
 
     DEBUG(dbgs() << "Skipping; " << Message << "\n");
 
     // With Rpass-missed report that distribution failed.
-    ORE->emitOptimizationRemarkMissed(
-        LDIST_NAME, L,
-        "loop not distributed: use -Rpass-analysis=loop-distribute for more "
-        "info");
+    ORE->emit(
+        OptimizationRemarkMissed(LDIST_NAME, "NotDistributed", L->getStartLoc(),
+                                 L->getHeader())
+        << "loop not distributed: use -Rpass-analysis=loop-distribute for more "
+           "info");
 
     // With Rpass-analysis report why.  This is on by default if distribution
     // was requested explicitly.
-    emitOptimizationRemarkAnalysis(
-        Ctx, Forced ? DiagnosticInfoOptimizationRemarkAnalysis::AlwaysPrint
-                    : LDIST_NAME,
-        *F, L->getStartLoc(), Twine("loop not distributed: ") + Message);
+    ORE->emit(OptimizationRemarkAnalysis(
+                  Forced ? OptimizationRemarkAnalysis::AlwaysPrint : LDIST_NAME,
+                  RemarkName, L->getStartLoc(), L->getHeader())
+              << "loop not distributed: " << Message);
 
     // Also issue a warning if distribution was requested explicitly but it
     // failed.
@@ -865,8 +874,7 @@ private:
 /// Shared implementation between new and old PMs.
 static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
                     ScalarEvolution *SE, OptimizationRemarkEmitter *ORE,
-                    std::function<const LoopAccessInfo &(Loop &)> &GetLAA,
-                    bool ProcessAllLoops) {
+                    std::function<const LoopAccessInfo &(Loop &)> &GetLAA) {
   // Build up a worklist of inner-loops to vectorize. This is necessary as the
   // act of distributing a loop creates new loops and can invalidate iterators
   // across the loops.
@@ -885,7 +893,7 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
 
     // If distribution was forced for the specific loop to be
     // enabled/disabled, follow that.  Otherwise use the global flag.
-    if (LDL.isForced().getValueOr(ProcessAllLoops))
+    if (LDL.isForced().getValueOr(EnableLoopDistribute))
       Changed |= LDL.processLoop(GetLAA);
   }
 
@@ -896,15 +904,8 @@ static bool runImpl(Function &F, LoopInfo *LI, DominatorTree *DT,
 /// \brief The pass class.
 class LoopDistributeLegacy : public FunctionPass {
 public:
-  /// \p ProcessAllLoopsByDefault specifies whether loop distribution should be
-  /// performed by default.  Pass -enable-loop-distribute={0,1} overrides this
-  /// default.  We use this to keep LoopDistribution off by default when invoked
-  /// from the optimization pipeline but on when invoked explicitly from opt.
-  LoopDistributeLegacy(bool ProcessAllLoopsByDefault = true)
-      : FunctionPass(ID), ProcessAllLoops(ProcessAllLoopsByDefault) {
+  LoopDistributeLegacy() : FunctionPass(ID) {
     // The default is set by the caller.
-    if (EnableLoopDistribute.getNumOccurrences() > 0)
-      ProcessAllLoops = EnableLoopDistribute;
     initializeLoopDistributeLegacyPass(*PassRegistry::getPassRegistry());
   }
 
@@ -920,7 +921,7 @@ public:
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
 
-    return runImpl(F, LI, DT, SE, ORE, GetLAA, ProcessAllLoops);
+    return runImpl(F, LI, DT, SE, ORE, GetLAA);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -931,26 +932,15 @@ public:
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
   }
 
   static char ID;
-
-private:
-  /// \brief Whether distribution should be on in this function.  The per-loop
-  /// pragma can override this.
-  bool ProcessAllLoops;
 };
 } // anonymous namespace
 
 PreservedAnalyses LoopDistributePass::run(Function &F,
                                           FunctionAnalysisManager &AM) {
-  // FIXME: This does not currently match the behavior from the old PM.
-  // ProcessAllLoops with the old PM defaults to true when invoked from opt and
-  // false when invoked from the optimization pipeline.
-  bool ProcessAllLoops = false;
-  if (EnableLoopDistribute.getNumOccurrences() > 0)
-    ProcessAllLoops = EnableLoopDistribute;
-
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
@@ -962,17 +952,18 @@ PreservedAnalyses LoopDistributePass::run(Function &F,
     return LAM.getResult<LoopAccessAnalysis>(L);
   };
 
-  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA, ProcessAllLoops);
+  bool Changed = runImpl(F, &LI, &DT, &SE, &ORE, GetLAA);
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
   PA.preserve<LoopAnalysis>();
   PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<GlobalsAA>();
   return PA;
 }
 
 char LoopDistributeLegacy::ID;
-static const char ldist_name[] = "Loop Distribition";
+static const char ldist_name[] = "Loop Distribution";
 
 INITIALIZE_PASS_BEGIN(LoopDistributeLegacy, LDIST_NAME, ldist_name, false,
                       false)
@@ -984,7 +975,5 @@ INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(LoopDistributeLegacy, LDIST_NAME, ldist_name, false, false)
 
 namespace llvm {
-FunctionPass *createLoopDistributePass(bool ProcessAllLoopsByDefault) {
-  return new LoopDistributeLegacy(ProcessAllLoopsByDefault);
-}
+FunctionPass *createLoopDistributePass() { return new LoopDistributeLegacy(); }
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 1468676a3543..fd167db11789 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -11,6 +11,12 @@
 // non-loop form.  In cases that this kicks in, it can be a significant
 // performance win.
 //
+// If compiling for code size we avoid idiom recognition if the resulting
+// code could be larger than the code for the original loop. One way this could
+// happen is if the loop is not removable after idiom recognition due to the
+// presence of non-idiom instructions. The initial implementation of the
+// heuristics applies to idioms in multi-block loops.
+//
 //===----------------------------------------------------------------------===//
 //
 // TODO List:
@@ -65,6 +71,12 @@ using namespace llvm;
 STATISTIC(NumMemSet, "Number of memset's formed from loop stores");
 STATISTIC(NumMemCpy, "Number of memcpy's formed from loop load+stores");
 
+static cl::opt<bool> UseLIRCodeSizeHeurs(
+    "use-lir-code-size-heurs",
+    cl::desc("Use loop idiom recognition code size heuristics when compiling"
+             "with -Os/-Oz"),
+    cl::init(true), cl::Hidden);
+
 namespace {
 
 class LoopIdiomRecognize {
@@ -76,6 +88,7 @@ class LoopIdiomRecognize {
   TargetLibraryInfo *TLI;
   const TargetTransformInfo *TTI;
   const DataLayout *DL;
+  bool ApplyCodeSizeHeuristics;
 
 public:
   explicit LoopIdiomRecognize(AliasAnalysis *AA, DominatorTree *DT,
@@ -117,8 +130,10 @@ private:
                                Instruction *TheStore,
                                SmallPtrSetImpl<Instruction *> &Stores,
                                const SCEVAddRecExpr *Ev, const SCEV *BECount,
-                               bool NegStride);
+                               bool NegStride, bool IsLoopMemset = false);
   bool processLoopStoreOfLoopLoad(StoreInst *SI, const SCEV *BECount);
+  bool avoidLIRForMultiBlockLoop(bool IsMemset = false,
+                                 bool IsLoopMemset = false);
 
   /// @}
   /// \name Noncountable Loop Idiom Handling
@@ -172,7 +187,7 @@ public:
 } // End anonymous namespace.
 
 PreservedAnalyses LoopIdiomRecognizePass::run(Loop &L,
-                                              AnalysisManager<Loop> &AM) {
+                                              LoopAnalysisManager &AM) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
@@ -229,6 +244,10 @@ bool LoopIdiomRecognize::runOnLoop(Loop *L) {
   if (Name == "memset" || Name == "memcpy")
     return false;
 
+  // Determine if code size heuristics need to be applied.
+  ApplyCodeSizeHeuristics =
+      L->getHeader()->getParent()->optForSize() && UseLIRCodeSizeHeurs;
+
   HasMemset = TLI->has(LibFunc::memset);
   HasMemsetPattern = TLI->has(LibFunc::memset_pattern16);
   HasMemcpy = TLI->has(LibFunc::memcpy);
@@ -689,7 +708,7 @@ bool LoopIdiomRecognize::processLoopMemSet(MemSetInst *MSI,
   bool NegStride = SizeInBytes == -Stride;
   return processLoopStridedStore(Pointer, (unsigned)SizeInBytes,
                                  MSI->getAlignment(), SplatValue, MSI, MSIs, Ev,
-                                 BECount, NegStride);
+                                 BECount, NegStride, /*IsLoopMemset=*/true);
 }
 
 /// mayLoopAccessLocation - Return true if the specified loop might access the
@@ -745,7 +764,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     Value *DestPtr, unsigned StoreSize, unsigned StoreAlignment,
     Value *StoredVal, Instruction *TheStore,
     SmallPtrSetImpl<Instruction *> &Stores, const SCEVAddRecExpr *Ev,
-    const SCEV *BECount, bool NegStride) {
+    const SCEV *BECount, bool NegStride, bool IsLoopMemset) {
   Value *SplatValue = isBytewiseValue(StoredVal);
   Constant *PatternValue = nullptr;
 
@@ -786,6 +805,9 @@ bool LoopIdiomRecognize::processLoopStridedStore(
     return false;
   }
 
+  if (avoidLIRForMultiBlockLoop(/*IsMemset=*/true, IsLoopMemset))
+    return false;
+
   // Okay, everything looks good, insert the memset.
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
@@ -917,6 +939,9 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
     return false;
   }
 
+  if (avoidLIRForMultiBlockLoop())
+    return false;
+
   // Okay, everything is safe, we can transform this!
 
   // The # stored bytes is (BECount+1)*Size.  Expand the trip count out to
@@ -948,6 +973,23 @@ bool LoopIdiomRecognize::processLoopStoreOfLoopLoad(StoreInst *SI,
   return true;
 }
 
+// When compiling for codesize we avoid idiom recognition for a multi-block loop
+// unless it is a loop_memset idiom or a memset/memcpy idiom in a nested loop.
+//
+bool LoopIdiomRecognize::avoidLIRForMultiBlockLoop(bool IsMemset,
+                                                   bool IsLoopMemset) {
+  if (ApplyCodeSizeHeuristics && CurLoop->getNumBlocks() > 1) {
+    if (!CurLoop->getParentLoop() && (!IsMemset || !IsLoopMemset)) {
+      DEBUG(dbgs() << "  " << CurLoop->getHeader()->getParent()->getName()
+                   << " : LIR " << (IsMemset ? "Memset" : "Memcpy")
+                   << " avoided: multi-block top-level loop\n");
+      return true;
+    }
+  }
+
+  return false;
+}
+
 bool LoopIdiomRecognize::runOnNoncountableLoop() {
   return recognizePopcount();
 }
@@ -1139,9 +1181,7 @@ bool LoopIdiomRecognize::recognizePopcount() {
 
   // It should have a preheader containing nothing but an unconditional branch.
   BasicBlock *PH = CurLoop->getLoopPreheader();
-  if (!PH)
-    return false;
-  if (&PH->front() != PH->getTerminator())
+  if (!PH || &PH->front() != PH->getTerminator())
     return false;
   auto *EntryBI = dyn_cast<BranchInst>(PH->getTerminator());
   if (!EntryBI || EntryBI->isConditional())
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
index 629cb87d7a91..f6620ad1ade5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInstSimplify.cpp
@@ -184,7 +184,7 @@ public:
 }
 
 PreservedAnalyses LoopInstSimplifyPass::run(Loop &L,
-                                            AnalysisManager<Loop> &AM) {
+                                            LoopAnalysisManager &AM) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
index 9241ec365277..e9f84edd1cbf 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopInterchange.cpp
@@ -44,6 +44,10 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-interchange"
 
+static cl::opt<int> LoopInterchangeCostThreshold(
+    "loop-interchange-threshold", cl::init(0), cl::Hidden,
+    cl::desc("Interchange if you gain more than this number"));
+
 namespace {
 
 typedef SmallVector<Loop *, 8> LoopVector;
@@ -75,30 +79,23 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
   typedef SmallVector<Value *, 16> ValueVector;
   ValueVector MemInstr;
 
-  if (Level > MaxLoopNestDepth) {
-    DEBUG(dbgs() << "Cannot handle loops of depth greater than "
-                 << MaxLoopNestDepth << "\n");
-    return false;
-  }
-
   // For each block.
   for (Loop::block_iterator BB = L->block_begin(), BE = L->block_end();
        BB != BE; ++BB) {
     // Scan the BB and collect legal loads and stores.
     for (BasicBlock::iterator I = (*BB)->begin(), E = (*BB)->end(); I != E;
          ++I) {
-      Instruction *Ins = dyn_cast<Instruction>(I);
-      if (!Ins)
-        return false;
-      LoadInst *Ld = dyn_cast<LoadInst>(I);
-      StoreInst *St = dyn_cast<StoreInst>(I);
-      if (!St && !Ld)
-        continue;
-      if (Ld && !Ld->isSimple())
-        return false;
-      if (St && !St->isSimple())
+      if (!isa<Instruction>(I))
         return false;
-      MemInstr.push_back(&*I);
+      if (LoadInst *Ld = dyn_cast<LoadInst>(I)) {
+        if (!Ld->isSimple())
+          return false;
+        MemInstr.push_back(&*I);
+      } else if (StoreInst *St = dyn_cast<StoreInst>(I)) {
+        if (!St->isSimple())
+          return false;
+        MemInstr.push_back(&*I);
+      }
     }
   }
 
@@ -110,66 +107,63 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
   for (I = MemInstr.begin(), IE = MemInstr.end(); I != IE; ++I) {
     for (J = I, JE = MemInstr.end(); J != JE; ++J) {
       std::vector<char> Dep;
-      Instruction *Src = dyn_cast<Instruction>(*I);
-      Instruction *Des = dyn_cast<Instruction>(*J);
-      if (Src == Des)
+      Instruction *Src = cast<Instruction>(*I);
+      Instruction *Dst = cast<Instruction>(*J);
+      if (Src == Dst)
         continue;
-      if (isa<LoadInst>(Src) && isa<LoadInst>(Des))
+      // Ignore Input dependencies.
+      if (isa<LoadInst>(Src) && isa<LoadInst>(Dst))
         continue;
-      if (auto D = DI->depends(Src, Des, true)) {
-        DEBUG(dbgs() << "Found Dependency between Src=" << Src << " Des=" << Des
-                     << "\n");
-        if (D->isFlow()) {
-          // TODO: Handle Flow dependence.Check if it is sufficient to populate
-          // the Dependence Matrix with the direction reversed.
-          DEBUG(dbgs() << "Flow dependence not handled");
-          return false;
-        }
-        if (D->isAnti()) {
-          DEBUG(dbgs() << "Found Anti dependence \n");
-          unsigned Levels = D->getLevels();
-          char Direction;
-          for (unsigned II = 1; II <= Levels; ++II) {
-            const SCEV *Distance = D->getDistance(II);
-            const SCEVConstant *SCEVConst =
-                dyn_cast_or_null<SCEVConstant>(Distance);
-            if (SCEVConst) {
-              const ConstantInt *CI = SCEVConst->getValue();
-              if (CI->isNegative())
-                Direction = '<';
-              else if (CI->isZero())
-                Direction = '=';
-              else
-                Direction = '>';
-              Dep.push_back(Direction);
-            } else if (D->isScalar(II)) {
-              Direction = 'S';
-              Dep.push_back(Direction);
-            } else {
-              unsigned Dir = D->getDirection(II);
-              if (Dir == Dependence::DVEntry::LT ||
-                  Dir == Dependence::DVEntry::LE)
-                Direction = '<';
-              else if (Dir == Dependence::DVEntry::GT ||
-                       Dir == Dependence::DVEntry::GE)
-                Direction = '>';
-              else if (Dir == Dependence::DVEntry::EQ)
-                Direction = '=';
-              else
-                Direction = '*';
-              Dep.push_back(Direction);
-            }
-          }
-          while (Dep.size() != Level) {
-            Dep.push_back('I');
+      // Track Output, Flow, and Anti dependencies.
+      if (auto D = DI->depends(Src, Dst, true)) {
+        assert(D->isOrdered() && "Expected an output, flow or anti dep.");
+        DEBUG(StringRef DepType =
+                  D->isFlow() ? "flow" : D->isAnti() ? "anti" : "output";
+              dbgs() << "Found " << DepType
+                     << " dependency between Src and Dst\n"
+                     << " Src:" << *Src << "\n Dst:" << *Dst << '\n');
+        unsigned Levels = D->getLevels();
+        char Direction;
+        for (unsigned II = 1; II <= Levels; ++II) {
+          const SCEV *Distance = D->getDistance(II);
+          const SCEVConstant *SCEVConst =
+              dyn_cast_or_null<SCEVConstant>(Distance);
+          if (SCEVConst) {
+            const ConstantInt *CI = SCEVConst->getValue();
+            if (CI->isNegative())
+              Direction = '<';
+            else if (CI->isZero())
+              Direction = '=';
+            else
+              Direction = '>';
+            Dep.push_back(Direction);
+          } else if (D->isScalar(II)) {
+            Direction = 'S';
+            Dep.push_back(Direction);
+          } else {
+            unsigned Dir = D->getDirection(II);
+            if (Dir == Dependence::DVEntry::LT ||
+                Dir == Dependence::DVEntry::LE)
+              Direction = '<';
+            else if (Dir == Dependence::DVEntry::GT ||
+                     Dir == Dependence::DVEntry::GE)
+              Direction = '>';
+            else if (Dir == Dependence::DVEntry::EQ)
+              Direction = '=';
+            else
+              Direction = '*';
+            Dep.push_back(Direction);
           }
+        }
+        while (Dep.size() != Level) {
+          Dep.push_back('I');
+        }
 
-          DepMatrix.push_back(Dep);
-          if (DepMatrix.size() > MaxMemInstrCount) {
-            DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
-                         << " dependencies inside loop\n");
-            return false;
-          }
+        DepMatrix.push_back(Dep);
+        if (DepMatrix.size() > MaxMemInstrCount) {
+          DEBUG(dbgs() << "Cannot handle more than " << MaxMemInstrCount
+                       << " dependencies inside loop\n");
+          return false;
         }
       }
     }
@@ -183,8 +177,8 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level,
 
 // A loop is moved from index 'from' to an index 'to'. Update the Dependence
 // matrix by exchanging the two columns.
-static void interChangeDepedencies(CharMatrix &DepMatrix, unsigned FromIndx,
-                                   unsigned ToIndx) {
+static void interChangeDependencies(CharMatrix &DepMatrix, unsigned FromIndx,
+                                    unsigned ToIndx) {
   unsigned numRows = DepMatrix.size();
   for (unsigned i = 0; i < numRows; ++i) {
     char TmpVal = DepMatrix[i][ToIndx];
@@ -211,7 +205,7 @@ static bool isOuterMostDepPositive(CharMatrix &DepMatrix, unsigned Row,
 static bool containsNoDependence(CharMatrix &DepMatrix, unsigned Row,
                                  unsigned Column) {
   for (unsigned i = 0; i < Column; ++i) {
-    if (DepMatrix[Row][i] != '=' || DepMatrix[Row][i] != 'S' ||
+    if (DepMatrix[Row][i] != '=' && DepMatrix[Row][i] != 'S' &&
         DepMatrix[Row][i] != 'I')
       return false;
   }
@@ -255,9 +249,8 @@ static bool validDepInterchange(CharMatrix &DepMatrix, unsigned Row,
 
 // Checks if it is legal to interchange 2 loops.
 // [Theorem] A permutation of the loops in a perfect nest is legal if and only
-// if
-// the direction matrix, after the same permutation is applied to its columns,
-// has no ">" direction as the leftmost non-"=" direction in any row.
+// if the direction matrix, after the same permutation is applied to its
+// columns, has no ">" direction as the leftmost non-"=" direction in any row.
 static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
                                       unsigned InnerLoopId,
                                       unsigned OuterLoopId) {
@@ -269,8 +262,7 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
     char OuterDep = DepMatrix[Row][OuterLoopId];
     if (InnerDep == '*' || OuterDep == '*')
       return false;
-    else if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep,
-                                  OuterDep))
+    if (!validDepInterchange(DepMatrix, Row, OuterLoopId, InnerDep, OuterDep))
       return false;
   }
   return true;
@@ -278,7 +270,9 @@ static bool isLegalToInterChangeLoops(CharMatrix &DepMatrix,
 
 static void populateWorklist(Loop &L, SmallVector<LoopVector, 8> &V) {
 
-  DEBUG(dbgs() << "Calling populateWorklist called\n");
+  DEBUG(dbgs() << "Calling populateWorklist on Func: "
+               << L.getHeader()->getParent()->getName() << " Loop: %"
+               << L.getHeader()->getName() << '\n');
   LoopVector LoopList;
   Loop *CurrentLoop = &L;
   const std::vector<Loop *> *Vec = &CurrentLoop->getSubLoops();
@@ -315,8 +309,7 @@ static PHINode *getInductionVariable(Loop *L, ScalarEvolution *SE) {
     if (!AddRec || !AddRec->isAffine())
       continue;
     const SCEV *Step = AddRec->getStepRecurrence(*SE);
-    const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
-    if (!C)
+    if (!isa<SCEVConstant>(Step))
       continue;
     // Found the induction variable.
     // FIXME: Handle loops with more than one induction variable. Note that,
@@ -474,7 +467,7 @@ struct LoopInterchange : public FunctionPass {
     for (Loop *L : LoopList) {
       const SCEV *ExitCountOuter = SE->getBackedgeTakenCount(L);
       if (ExitCountOuter == SE->getCouldNotCompute()) {
-        DEBUG(dbgs() << "Couldn't compute Backedge count\n");
+        DEBUG(dbgs() << "Couldn't compute backedge count\n");
         return false;
       }
       if (L->getNumBackEdges() != 1) {
@@ -482,7 +475,7 @@ struct LoopInterchange : public FunctionPass {
         return false;
       }
       if (!L->getExitingBlock()) {
-        DEBUG(dbgs() << "Loop Doesn't have unique exit block\n");
+        DEBUG(dbgs() << "Loop doesn't have unique exit block\n");
         return false;
       }
     }
@@ -498,27 +491,32 @@ struct LoopInterchange : public FunctionPass {
   bool processLoopList(LoopVector LoopList, Function &F) {
 
     bool Changed = false;
-    CharMatrix DependencyMatrix;
-    if (LoopList.size() < 2) {
+    unsigned LoopNestDepth = LoopList.size();
+    if (LoopNestDepth < 2) {
       DEBUG(dbgs() << "Loop doesn't contain minimum nesting level.\n");
       return false;
     }
+    if (LoopNestDepth > MaxLoopNestDepth) {
+      DEBUG(dbgs() << "Cannot handle loops of depth greater than "
+                   << MaxLoopNestDepth << "\n");
+      return false;
+    }
     if (!isComputableLoopNest(LoopList)) {
-      DEBUG(dbgs() << "Not vaild loop candidate for interchange\n");
+      DEBUG(dbgs() << "Not valid loop candidate for interchange\n");
       return false;
     }
-    Loop *OuterMostLoop = *(LoopList.begin());
 
-    DEBUG(dbgs() << "Processing LoopList of size = " << LoopList.size()
-                 << "\n");
+    DEBUG(dbgs() << "Processing LoopList of size = " << LoopNestDepth << "\n");
 
-    if (!populateDependencyMatrix(DependencyMatrix, LoopList.size(),
+    CharMatrix DependencyMatrix;
+    Loop *OuterMostLoop = *(LoopList.begin());
+    if (!populateDependencyMatrix(DependencyMatrix, LoopNestDepth,
                                   OuterMostLoop, DI)) {
-      DEBUG(dbgs() << "Populating Dependency matrix failed\n");
+      DEBUG(dbgs() << "Populating dependency matrix failed\n");
       return false;
     }
 #ifdef DUMP_DEP_MATRICIES
-    DEBUG(dbgs() << "Dependence before inter change \n");
+    DEBUG(dbgs() << "Dependence before interchange\n");
     printDepMatrix(DependencyMatrix);
 #endif
 
@@ -556,10 +554,10 @@ struct LoopInterchange : public FunctionPass {
       std::swap(LoopList[i - 1], LoopList[i]);
 
       // Update the DependencyMatrix
-      interChangeDepedencies(DependencyMatrix, i, i - 1);
+      interChangeDependencies(DependencyMatrix, i, i - 1);
       DT->recalculate(F);
 #ifdef DUMP_DEP_MATRICIES
-      DEBUG(dbgs() << "Dependence after inter change \n");
+      DEBUG(dbgs() << "Dependence after interchange\n");
       printDepMatrix(DependencyMatrix);
 #endif
       Changed |= Interchanged;
@@ -571,7 +569,7 @@ struct LoopInterchange : public FunctionPass {
                    unsigned OuterLoopId, BasicBlock *LoopNestExit,
                    std::vector<std::vector<char>> &DependencyMatrix) {
 
-    DEBUG(dbgs() << "Processing Innder Loop Id = " << InnerLoopId
+    DEBUG(dbgs() << "Processing Inner Loop Id = " << InnerLoopId
                  << " and OuterLoopId = " << OuterLoopId << "\n");
     Loop *InnerLoop = LoopList[InnerLoopId];
     Loop *OuterLoop = LoopList[OuterLoopId];
@@ -585,7 +583,7 @@ struct LoopInterchange : public FunctionPass {
     DEBUG(dbgs() << "Loops are legal to interchange\n");
     LoopInterchangeProfitability LIP(OuterLoop, InnerLoop, SE);
     if (!LIP.isProfitable(InnerLoopId, OuterLoopId, DependencyMatrix)) {
-      DEBUG(dbgs() << "Interchanging Loops not profitable\n");
+      DEBUG(dbgs() << "Interchanging loops not profitable\n");
       return false;
     }
 
@@ -599,8 +597,8 @@ struct LoopInterchange : public FunctionPass {
 
 } // end of namespace
 bool LoopInterchangeLegality::areAllUsesReductions(Instruction *Ins, Loop *L) {
-  return !std::any_of(Ins->user_begin(), Ins->user_end(), [=](User *U) -> bool {
-    PHINode *UserIns = dyn_cast<PHINode>(U);
+  return none_of(Ins->users(), [=](User *U) -> bool {
+    auto *UserIns = dyn_cast<PHINode>(U);
     RecurrenceDescriptor RD;
     return !UserIns || !RecurrenceDescriptor::isReductionPHI(UserIns, L, RD);
   });
@@ -626,8 +624,7 @@ bool LoopInterchangeLegality::containsUnsafeInstructionsInLatch(
     // Stores corresponding to reductions are safe while concluding if tightly
     // nested.
     if (StoreInst *L = dyn_cast<StoreInst>(I)) {
-      PHINode *PHI = dyn_cast<PHINode>(L->getOperand(0));
-      if (!PHI)
+      if (!isa<PHINode>(L->getOperand(0)))
         return true;
     } else if (I->mayHaveSideEffects() || I->mayReadFromMemory())
       return true;
@@ -640,30 +637,30 @@ bool LoopInterchangeLegality::tightlyNested(Loop *OuterLoop, Loop *InnerLoop) {
   BasicBlock *InnerLoopPreHeader = InnerLoop->getLoopPreheader();
   BasicBlock *OuterLoopLatch = OuterLoop->getLoopLatch();
 
-  DEBUG(dbgs() << "Checking if Loops are Tightly Nested\n");
+  DEBUG(dbgs() << "Checking if loops are tightly nested\n");
 
   // A perfectly nested loop will not have any branch in between the outer and
   // inner block i.e. outer header will branch to either inner preheader and
   // outerloop latch.
-  BranchInst *outerLoopHeaderBI =
+  BranchInst *OuterLoopHeaderBI =
       dyn_cast<BranchInst>(OuterLoopHeader->getTerminator());
-  if (!outerLoopHeaderBI)
+  if (!OuterLoopHeaderBI)
     return false;
-  unsigned num = outerLoopHeaderBI->getNumSuccessors();
-  for (unsigned i = 0; i < num; i++) {
-    if (outerLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
-        outerLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
+
+  for (unsigned i = 0, e = OuterLoopHeaderBI->getNumSuccessors(); i < e; ++i) {
+    if (OuterLoopHeaderBI->getSuccessor(i) != InnerLoopPreHeader &&
+        OuterLoopHeaderBI->getSuccessor(i) != OuterLoopLatch)
       return false;
   }
 
-  DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch \n");
+  DEBUG(dbgs() << "Checking instructions in Loop header and Loop latch\n");
   // We do not have any basic block in between now make sure the outer header
   // and outer loop latch doesn't contain any unsafe instructions.
   if (containsUnsafeInstructionsInHeader(OuterLoopHeader) ||
       containsUnsafeInstructionsInLatch(OuterLoopLatch))
     return false;
 
-  DEBUG(dbgs() << "Loops are perfectly nested \n");
+  DEBUG(dbgs() << "Loops are perfectly nested\n");
   // We have a perfect loop nest.
   return true;
 }
@@ -703,7 +700,7 @@ bool LoopInterchangeLegality::findInductionAndReductions(
     RecurrenceDescriptor RD;
     InductionDescriptor ID;
     PHINode *PHI = cast<PHINode>(I);
-    if (InductionDescriptor::isInductionPHI(PHI, SE, ID))
+    if (InductionDescriptor::isInductionPHI(PHI, L, SE, ID))
       Inductions.push_back(PHI);
     else if (RecurrenceDescriptor::isReductionPHI(PHI, L, RD))
       Reductions.push_back(PHI);
@@ -852,8 +849,8 @@ bool LoopInterchangeLegality::canInterchangeLoops(unsigned InnerLoopId,
 
   if (!isLegalToInterChangeLoops(DepMatrix, InnerLoopId, OuterLoopId)) {
     DEBUG(dbgs() << "Failed interchange InnerLoopId = " << InnerLoopId
-                 << "and OuterLoopId = " << OuterLoopId
-                 << "due to dependence\n");
+                 << " and OuterLoopId = " << OuterLoopId
+                 << " due to dependence\n");
     return false;
   }
 
@@ -946,9 +943,9 @@ int LoopInterchangeProfitability::getInstrOrderCost() {
   return GoodOrder - BadOrder;
 }
 
-static bool isProfitabileForVectorization(unsigned InnerLoopId,
-                                          unsigned OuterLoopId,
-                                          CharMatrix &DepMatrix) {
+static bool isProfitableForVectorization(unsigned InnerLoopId,
+                                         unsigned OuterLoopId,
+                                         CharMatrix &DepMatrix) {
   // TODO: Improve this heuristic to catch more cases.
   // If the inner loop is loop independent or doesn't carry any dependency it is
   // profitable to move this to outer position.
@@ -977,16 +974,15 @@ bool LoopInterchangeProfitability::isProfitable(unsigned InnerLoopId,
   // This is rough cost estimation algorithm. It counts the good and bad order
   // of induction variables in the instruction and allows reordering if number
   // of bad orders is more than good.
-  int Cost = 0;
-  Cost += getInstrOrderCost();
+  int Cost = getInstrOrderCost();
   DEBUG(dbgs() << "Cost = " << Cost << "\n");
-  if (Cost < 0)
+  if (Cost < -LoopInterchangeCostThreshold)
     return true;
 
   // It is not profitable as per current cache profitability model. But check if
   // we can move this loop outside to improve parallelism.
   bool ImprovesPar =
-      isProfitabileForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
+      isProfitableForVectorization(InnerLoopId, OuterLoopId, DepMatrix);
   return ImprovesPar;
 }
 
@@ -1022,8 +1018,6 @@ void LoopInterchangeTransform::restructureLoops(Loop *InnerLoop,
 }
 
 bool LoopInterchangeTransform::transform() {
-
-  DEBUG(dbgs() << "transform\n");
   bool Transformed = false;
   Instruction *InnerIndexVar;
 
@@ -1046,16 +1040,16 @@ bool LoopInterchangeTransform::transform() {
     // incremented/decremented.
     // TODO: This splitting logic may not work always. Fix this.
     splitInnerLoopLatch(InnerIndexVar);
-    DEBUG(dbgs() << "splitInnerLoopLatch Done\n");
+    DEBUG(dbgs() << "splitInnerLoopLatch done\n");
 
     // Splits the inner loops phi nodes out into a separate basic block.
     splitInnerLoopHeader();
-    DEBUG(dbgs() << "splitInnerLoopHeader Done\n");
+    DEBUG(dbgs() << "splitInnerLoopHeader done\n");
   }
 
   Transformed |= adjustLoopLinks();
   if (!Transformed) {
-    DEBUG(dbgs() << "adjustLoopLinks Failed\n");
+    DEBUG(dbgs() << "adjustLoopLinks failed\n");
     return false;
   }
 
@@ -1099,7 +1093,7 @@ void LoopInterchangeTransform::splitInnerLoopHeader() {
   }
 
   DEBUG(dbgs() << "Output of splitInnerLoopHeader InnerLoopHeaderSucc & "
-                  "InnerLoopHeader \n");
+                  "InnerLoopHeader\n");
 }
 
 /// \brief Move all instructions except the terminator from FromBB right before
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
index f29228c7659e..08e7acdaaf72 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopLoadElimination.cpp
@@ -20,17 +20,37 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/LoopVersioning.h"
 #include <forward_list>
+#include <cassert>
+#include <algorithm>
+#include <set>
+#include <tuple>
+#include <utility>
 
 #define LLE_OPTION "loop-load-elim"
 #define DEBUG_TYPE LLE_OPTION
@@ -47,7 +67,6 @@ static cl::opt<unsigned> LoadElimSCEVCheckThreshold(
     cl::desc("The maximum number of SCEV checks allowed for Loop "
              "Load Elimination"));
 
-
 STATISTIC(NumLoopLoadEliminted, "Number of loads eliminated by LLE");
 
 namespace {
@@ -113,10 +132,9 @@ bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L,
                                   DominatorTree *DT) {
   SmallVector<BasicBlock *, 8> Latches;
   L->getLoopLatches(Latches);
-  return std::all_of(Latches.begin(), Latches.end(),
-                     [&](const BasicBlock *Latch) {
-                       return DT->dominates(StoreBlock, Latch);
-                     });
+  return llvm::all_of(Latches, [&](const BasicBlock *Latch) {
+    return DT->dominates(StoreBlock, Latch);
+  });
 }
 
 /// \brief Return true if the load is not executed on all paths in the loop.
@@ -348,7 +366,7 @@ public:
     // Collect the pointers of the candidate loads.
     // FIXME: SmallSet does not work with std::inserter.
     std::set<Value *> CandLoadPtrs;
-    std::transform(Candidates.begin(), Candidates.end(),
+    transform(Candidates,
                    std::inserter(CandLoadPtrs, CandLoadPtrs.begin()),
                    std::mem_fn(&StoreToLoadForwardingCandidate::getLoadPtr));
 
@@ -499,6 +517,11 @@ public:
         return false;
       }
 
+      if (!L->isLoopSimplifyForm()) {
+        DEBUG(dbgs() << "Loop is not is loop-simplify form");
+        return false;
+      }
+
       // Point of no-return, start the transformation.  First, version the loop
       // if necessary.
 
@@ -581,11 +604,13 @@ public:
     AU.addRequired<ScalarEvolutionWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
   }
 
   static char ID;
 };
-}
+
+} // end anonymous namespace
 
 char LoopLoadElimination::ID;
 static const char LLE_name[] = "Loop Load Elimination";
@@ -599,7 +624,9 @@ INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_END(LoopLoadElimination, LLE_OPTION, LLE_name, false, false)
 
 namespace llvm {
+
 FunctionPass *createLoopLoadEliminationPass() {
   return new LoopLoadElimination();
 }
-}
+
+} // end namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
index d2f1b66076a6..86058fe0b1aa 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp
@@ -371,11 +371,12 @@ namespace {
     protected:
       typedef MapVector<Instruction*, BitVector> UsesTy;
 
-      bool findRootsRecursive(Instruction *IVU,
+      void findRootsRecursive(Instruction *IVU,
                               SmallInstructionSet SubsumedInsts);
       bool findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts);
       bool collectPossibleRoots(Instruction *Base,
                                 std::map<int64_t,Instruction*> &Roots);
+      bool validateRootSet(DAGRootSet &DRS);
 
       bool collectUsedInstructions(SmallInstructionSet &PossibleRedSet);
       void collectInLoopUserSet(const SmallInstructionVector &Roots,
@@ -739,11 +740,11 @@ void LoopReroll::DAGRootTracker::collectInLoopUserSet(
     collectInLoopUserSet(Root, Exclude, Final, Users);
 }
 
-static bool isSimpleLoadStore(Instruction *I) {
+static bool isUnorderedLoadStore(Instruction *I) {
   if (LoadInst *LI = dyn_cast<LoadInst>(I))
-    return LI->isSimple();
+    return LI->isUnordered();
   if (StoreInst *SI = dyn_cast<StoreInst>(I))
-    return SI->isSimple();
+    return SI->isUnordered();
   if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
     return !MI->isVolatile();
   return false;
@@ -827,7 +828,8 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
     Roots[V] = cast<Instruction>(I);
   }
 
-  if (Roots.empty())
+  // Make sure we have at least two roots.
+  if (Roots.empty() || (Roots.size() == 1 && BaseUsers.empty()))
     return false;
 
   // If we found non-loop-inc, non-root users of Base, assume they are
@@ -861,40 +863,61 @@ collectPossibleRoots(Instruction *Base, std::map<int64_t,Instruction*> &Roots) {
   return true;
 }
 
-bool LoopReroll::DAGRootTracker::
+void LoopReroll::DAGRootTracker::
 findRootsRecursive(Instruction *I, SmallInstructionSet SubsumedInsts) {
   // Does the user look like it could be part of a root set?
   // All its users must be simple arithmetic ops.
   if (I->getNumUses() > IL_MaxRerollIterations)
-    return false;
+    return;
 
-  if ((I->getOpcode() == Instruction::Mul ||
-       I->getOpcode() == Instruction::PHI) &&
-      I != IV &&
-      findRootsBase(I, SubsumedInsts))
-    return true;
+  if (I != IV && findRootsBase(I, SubsumedInsts))
+    return;
 
   SubsumedInsts.insert(I);
 
   for (User *V : I->users()) {
-    Instruction *I = dyn_cast<Instruction>(V);
-    if (std::find(LoopIncs.begin(), LoopIncs.end(), I) != LoopIncs.end())
+    Instruction *I = cast<Instruction>(V);
+    if (is_contained(LoopIncs, I))
       continue;
 
-    if (!I || !isSimpleArithmeticOp(I) ||
-        !findRootsRecursive(I, SubsumedInsts))
-      return false;
+    if (!isSimpleArithmeticOp(I))
+      continue;
+
+    // The recursive call makes a copy of SubsumedInsts.
+    findRootsRecursive(I, SubsumedInsts);
   }
+}
+
+bool LoopReroll::DAGRootTracker::validateRootSet(DAGRootSet &DRS) {
+  if (DRS.Roots.empty())
+    return false;
+
+  // Consider a DAGRootSet with N-1 roots (so N different values including
+  //   BaseInst).
+  // Define d = Roots[0] - BaseInst, which should be the same as
+  //   Roots[I] - Roots[I-1] for all I in [1..N).
+  // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
+  //   loop iteration J.
+  //
+  // Now, For the loop iterations to be consecutive:
+  //   D = d * N
+  const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(DRS.BaseInst));
+  if (!ADR)
+    return false;
+  unsigned N = DRS.Roots.size() + 1;
+  const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(DRS.Roots[0]), ADR);
+  const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
+  if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV))
+    return false;
+
   return true;
 }
 
 bool LoopReroll::DAGRootTracker::
 findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
-
-  // The base instruction needs to be a multiply so
-  // that we can erase it.
-  if (IVU->getOpcode() != Instruction::Mul &&
-      IVU->getOpcode() != Instruction::PHI)
+  // The base of a RootSet must be an AddRec, so it can be erased.
+  const auto *IVU_ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(IVU));
+  if (!IVU_ADR || IVU_ADR->getLoop() != L)
     return false;
 
   std::map<int64_t, Instruction*> V;
@@ -910,6 +933,8 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
   DAGRootSet DRS;
   DRS.BaseInst = nullptr;
 
+  SmallVector<DAGRootSet, 16> PotentialRootSets;
+
   for (auto &KV : V) {
     if (!DRS.BaseInst) {
       DRS.BaseInst = KV.second;
@@ -920,13 +945,22 @@ findRootsBase(Instruction *IVU, SmallInstructionSet SubsumedInsts) {
       DRS.Roots.push_back(KV.second);
     } else {
       // Linear sequence terminated.
-      RootSets.push_back(DRS);
+      if (!validateRootSet(DRS))
+        return false;
+
+      // Construct a new DAGRootSet with the next sequence.
+      PotentialRootSets.push_back(DRS);
       DRS.BaseInst = KV.second;
-      DRS.SubsumedInsts = SubsumedInsts;
       DRS.Roots.clear();
     }
   }
-  RootSets.push_back(DRS);
+
+  if (!validateRootSet(DRS))
+    return false;
+
+  PotentialRootSets.push_back(DRS);
+
+  RootSets.append(PotentialRootSets.begin(), PotentialRootSets.end());
 
   return true;
 }
@@ -940,8 +974,7 @@ bool LoopReroll::DAGRootTracker::findRoots() {
       if (isLoopIncrement(IVU, IV))
         LoopIncs.push_back(cast<Instruction>(IVU));
     }
-    if (!findRootsRecursive(IV, SmallInstructionSet()))
-      return false;
+    findRootsRecursive(IV, SmallInstructionSet());
     LoopIncs.push_back(IV);
   } else {
     if (!findRootsBase(IV, SmallInstructionSet()))
@@ -961,31 +994,6 @@ bool LoopReroll::DAGRootTracker::findRoots() {
     }
   }
 
-  // And ensure all loop iterations are consecutive. We rely on std::map
-  // providing ordered traversal.
-  for (auto &V : RootSets) {
-    const auto *ADR = dyn_cast<SCEVAddRecExpr>(SE->getSCEV(V.BaseInst));
-    if (!ADR)
-      return false;
-
-    // Consider a DAGRootSet with N-1 roots (so N different values including
-    //   BaseInst).
-    // Define d = Roots[0] - BaseInst, which should be the same as
-    //   Roots[I] - Roots[I-1] for all I in [1..N).
-    // Define D = BaseInst@J - BaseInst@J-1, where "@J" means the value at the
-    //   loop iteration J.
-    //
-    // Now, For the loop iterations to be consecutive:
-    //   D = d * N
-
-    unsigned N = V.Roots.size() + 1;
-    const SCEV *StepSCEV = SE->getMinusSCEV(SE->getSCEV(V.Roots[0]), ADR);
-    const SCEV *ScaleSCEV = SE->getConstant(StepSCEV->getType(), N);
-    if (ADR->getStepRecurrence(*SE) != SE->getMulExpr(StepSCEV, ScaleSCEV)) {
-      DEBUG(dbgs() << "LRR: Aborting because iterations are not consecutive\n");
-      return false;
-    }
-  }
   Scale = RootSets[0].Roots.size() + 1;
 
   if (Scale > IL_MaxRerollIterations) {
@@ -1088,7 +1096,7 @@ bool LoopReroll::DAGRootTracker::isBaseInst(Instruction *I) {
 
 bool LoopReroll::DAGRootTracker::isRootInst(Instruction *I) {
   for (auto &DRS : RootSets) {
-    if (std::find(DRS.Roots.begin(), DRS.Roots.end(), I) != DRS.Roots.end())
+    if (is_contained(DRS.Roots, I))
       return true;
   }
   return false;
@@ -1283,7 +1291,7 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
         // which while a valid (somewhat arbitrary) micro-optimization, is
         // needed because otherwise isSafeToSpeculativelyExecute returns
         // false on PHI nodes.
-        if (!isa<PHINode>(I) && !isSimpleLoadStore(I) &&
+        if (!isa<PHINode>(I) && !isUnorderedLoadStore(I) &&
             !isSafeToSpeculativelyExecute(I))
           // Intervening instructions cause side effects.
           FutureSideEffects = true;
@@ -1313,10 +1321,10 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
       // If we've past an instruction from a future iteration that may have
       // side effects, and this instruction might also, then we can't reorder
       // them, and this matching fails. As an exception, we allow the alias
-      // set tracker to handle regular (simple) load/store dependencies.
-      if (FutureSideEffects && ((!isSimpleLoadStore(BaseInst) &&
+      // set tracker to handle regular (unordered) load/store dependencies.
+      if (FutureSideEffects && ((!isUnorderedLoadStore(BaseInst) &&
                                  !isSafeToSpeculativelyExecute(BaseInst)) ||
-                                (!isSimpleLoadStore(RootInst) &&
+                                (!isUnorderedLoadStore(RootInst) &&
                                  !isSafeToSpeculativelyExecute(RootInst)))) {
         DEBUG(dbgs() << "LRR: iteration root match failed at " << *BaseInst <<
                         " vs. " << *RootInst <<
@@ -1412,13 +1420,12 @@ bool LoopReroll::DAGRootTracker::validate(ReductionTracker &Reductions) {
 void LoopReroll::DAGRootTracker::replace(const SCEV *IterCount) {
   BasicBlock *Header = L->getHeader();
   // Remove instructions associated with non-base iterations.
-  for (BasicBlock::reverse_iterator J = Header->rbegin();
-       J != Header->rend();) {
+  for (BasicBlock::reverse_iterator J = Header->rbegin(), JE = Header->rend();
+       J != JE;) {
     unsigned I = Uses[&*J].find_first();
     if (I > 0 && I < IL_All) {
-      Instruction *D = &*J;
-      DEBUG(dbgs() << "LRR: removing: " << *D << "\n");
-      D->eraseFromParent();
+      DEBUG(dbgs() << "LRR: removing: " << *J << "\n");
+      J++->eraseFromParent();
       continue;
     }
 
@@ -1499,8 +1506,8 @@ void LoopReroll::DAGRootTracker::replaceIV(Instruction *Inst,
   { // Limit the lifetime of SCEVExpander.
     const DataLayout &DL = Header->getModule()->getDataLayout();
     SCEVExpander Expander(*SE, DL, "reroll");
-    Value *NewIV =
-        Expander.expandCodeFor(NewIVSCEV, InstIV->getType(), &Header->front());
+    Value *NewIV = Expander.expandCodeFor(NewIVSCEV, Inst->getType(),
+                                          Header->getFirstNonPHIOrDbg());
 
     for (auto &KV : Uses)
       if (KV.second.find_first() == 0)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
index 7a06a25a7073..0225cc325700 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopRotation.cpp
@@ -326,6 +326,10 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
       // Otherwise, stick the new instruction into the new block!
       C->setName(Inst->getName());
       C->insertBefore(LoopEntryBranch);
+
+      if (auto *II = dyn_cast<IntrinsicInst>(C))
+        if (II->getIntrinsicID() == Intrinsic::assume)
+          AC->registerAssumption(II);
     }
   }
 
@@ -501,7 +505,8 @@ static bool shouldSpeculateInstrs(BasicBlock::iterator Begin,
       // GEPs are cheap if all indices are constant.
       if (!cast<GEPOperator>(I)->hasAllConstantIndices())
         return false;
-    // fall-thru to increment case
+      // fall-thru to increment case
+      LLVM_FALLTHROUGH;
     case Instruction::Add:
     case Instruction::Sub:
     case Instruction::And:
@@ -617,9 +622,10 @@ bool LoopRotate::processLoop(Loop *L) {
   return MadeChange;
 }
 
-LoopRotatePass::LoopRotatePass() {}
+LoopRotatePass::LoopRotatePass(bool EnableHeaderDuplication)
+    : EnableHeaderDuplication(EnableHeaderDuplication) {}
 
-PreservedAnalyses LoopRotatePass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses LoopRotatePass::run(Loop &L, LoopAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
 
@@ -631,7 +637,8 @@ PreservedAnalyses LoopRotatePass::run(Loop &L, AnalysisManager<Loop> &AM) {
   // Optional analyses.
   auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
   auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
-  LoopRotate LR(DefaultRotationThreshold, LI, TTI, AC, DT, SE);
+  int Threshold = EnableHeaderDuplication ? DefaultRotationThreshold : 0;
+  LoopRotate LR(Threshold, LI, TTI, AC, DT, SE);
 
   bool Changed = LR.processLoop(&L);
   if (!Changed)
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index ec227932c09e..d37339fc5fee 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -64,7 +64,7 @@ static bool simplifyLoopCFG(Loop &L, DominatorTree &DT, LoopInfo &LI) {
   return Changed;
 }
 
-PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, AnalysisManager<Loop> &AM) {
+PreservedAnalyses LoopSimplifyCFGPass::run(Loop &L, LoopAnalysisManager &AM) {
   const auto &FAM =
       AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
   Function *F = L.getHeader()->getParent();
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
new file mode 100644
index 000000000000..90309d7ebba6
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopSink.cpp
@@ -0,0 +1,333 @@
+//===-- LoopSink.cpp - Loop Sink Pass ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass does the inverse transformation of what LICM does.
+// It traverses all of the instructions in the loop's preheader and sinks
+// them to the loop body where frequency is lower than the loop's preheader.
+// This pass is a reverse-transformation of LICM. It differs from the Sink
+// pass in the following ways:
+//
+// * It only handles sinking of instructions from the loop's preheader to the
+//   loop's body
+// * It uses alias set tracker to get more accurate alias info
+// * It uses block frequency info to find the optimal sinking locations
+//
+// Overall algorithm:
+//
+// For I in Preheader:
+//   InsertBBs = BBs that uses I
+//   For BB in sorted(LoopBBs):
+//     DomBBs = BBs in InsertBBs that are dominated by BB
+//     if freq(DomBBs) > freq(BB)
+//       InsertBBs = UseBBs - DomBBs + BB
+//   For BB in InsertBBs:
+//     Insert I at BB's beginning
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasSetTracker.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loopsink"
+
+STATISTIC(NumLoopSunk, "Number of instructions sunk into loop");
+STATISTIC(NumLoopSunkCloned, "Number of cloned instructions sunk into loop");
+
+static cl::opt<unsigned> SinkFrequencyPercentThreshold(
+    "sink-freq-percent-threshold", cl::Hidden, cl::init(90),
+    cl::desc("Do not sink instructions that require cloning unless they "
+             "execute less than this percent of the time."));
+
+static cl::opt<unsigned> MaxNumberOfUseBBsForSinking(
+    "max-uses-for-sinking", cl::Hidden, cl::init(30),
+    cl::desc("Do not sink instructions that have too many uses."));
+
+/// Return adjusted total frequency of \p BBs.
+///
+/// * If there is only one BB, sinking instruction will not introduce code
+///   size increase. Thus there is no need to adjust the frequency.
+/// * If there are more than one BB, sinking would lead to code size increase.
+///   In this case, we add some "tax" to the total frequency to make it harder
+///   to sink. E.g.
+///     Freq(Preheader) = 100
+///     Freq(BBs) = sum(50, 49) = 99
+///   Even if Freq(BBs) < Freq(Preheader), we will not sink from Preheade to
+///   BBs as the difference is too small to justify the code size increase.
+///   To model this, The adjusted Freq(BBs) will be:
+///     AdjustedFreq(BBs) = 99 / SinkFrequencyPercentThreshold%
+static BlockFrequency adjustedSumFreq(SmallPtrSetImpl<BasicBlock *> &BBs,
+                                      BlockFrequencyInfo &BFI) {
+  BlockFrequency T = 0;
+  for (BasicBlock *B : BBs)
+    T += BFI.getBlockFreq(B);
+  if (BBs.size() > 1)
+    T /= BranchProbability(SinkFrequencyPercentThreshold, 100);
+  return T;
+}
+
+/// Return a set of basic blocks to insert sinked instructions.
+///
+/// The returned set of basic blocks (BBsToSinkInto) should satisfy:
+///
+/// * Inside the loop \p L
+/// * For each UseBB in \p UseBBs, there is at least one BB in BBsToSinkInto
+///   that domintates the UseBB
+/// * Has minimum total frequency that is no greater than preheader frequency
+///
+/// The purpose of the function is to find the optimal sinking points to
+/// minimize execution cost, which is defined as "sum of frequency of
+/// BBsToSinkInto".
+/// As a result, the returned BBsToSinkInto needs to have minimum total
+/// frequency.
+/// Additionally, if the total frequency of BBsToSinkInto exceeds preheader
+/// frequency, the optimal solution is not sinking (return empty set).
+///
+/// \p ColdLoopBBs is used to help find the optimal sinking locations.
+/// It stores a list of BBs that is:
+///
+/// * Inside the loop \p L
+/// * Has a frequency no larger than the loop's preheader
+/// * Sorted by BB frequency
+///
+/// The complexity of the function is O(UseBBs.size() * ColdLoopBBs.size()).
+/// To avoid expensive computation, we cap the maximum UseBBs.size() in its
+/// caller.
+static SmallPtrSet<BasicBlock *, 2>
+findBBsToSinkInto(const Loop &L, const SmallPtrSetImpl<BasicBlock *> &UseBBs,
+                  const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
+                  DominatorTree &DT, BlockFrequencyInfo &BFI) {
+  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto;
+  if (UseBBs.size() == 0)
+    return BBsToSinkInto;
+
+  BBsToSinkInto.insert(UseBBs.begin(), UseBBs.end());
+  SmallPtrSet<BasicBlock *, 2> BBsDominatedByColdestBB;
+
+  // For every iteration:
+  //   * Pick the ColdestBB from ColdLoopBBs
+  //   * Find the set BBsDominatedByColdestBB that satisfy:
+  //     - BBsDominatedByColdestBB is a subset of BBsToSinkInto
+  //     - Every BB in BBsDominatedByColdestBB is dominated by ColdestBB
+  //   * If Freq(ColdestBB) < Freq(BBsDominatedByColdestBB), remove
+  //     BBsDominatedByColdestBB from BBsToSinkInto, add ColdestBB to
+  //     BBsToSinkInto
+  for (BasicBlock *ColdestBB : ColdLoopBBs) {
+    BBsDominatedByColdestBB.clear();
+    for (BasicBlock *SinkedBB : BBsToSinkInto)
+      if (DT.dominates(ColdestBB, SinkedBB))
+        BBsDominatedByColdestBB.insert(SinkedBB);
+    if (BBsDominatedByColdestBB.size() == 0)
+      continue;
+    if (adjustedSumFreq(BBsDominatedByColdestBB, BFI) >
+        BFI.getBlockFreq(ColdestBB)) {
+      for (BasicBlock *DominatedBB : BBsDominatedByColdestBB) {
+        BBsToSinkInto.erase(DominatedBB);
+      }
+      BBsToSinkInto.insert(ColdestBB);
+    }
+  }
+
+  // If the total frequency of BBsToSinkInto is larger than preheader frequency,
+  // do not sink.
+  if (adjustedSumFreq(BBsToSinkInto, BFI) >
+      BFI.getBlockFreq(L.getLoopPreheader()))
+    BBsToSinkInto.clear();
+  return BBsToSinkInto;
+}
+
+// Sinks \p I from the loop \p L's preheader to its uses. Returns true if
+// sinking is successful.
+// \p LoopBlockNumber is used to sort the insertion blocks to ensure
+// determinism.
+static bool sinkInstruction(Loop &L, Instruction &I,
+                            const SmallVectorImpl<BasicBlock *> &ColdLoopBBs,
+                            const SmallDenseMap<BasicBlock *, int, 16> &LoopBlockNumber,
+                            LoopInfo &LI, DominatorTree &DT,
+                            BlockFrequencyInfo &BFI) {
+  // Compute the set of blocks in loop L which contain a use of I.
+  SmallPtrSet<BasicBlock *, 2> BBs;
+  for (auto &U : I.uses()) {
+    Instruction *UI = cast<Instruction>(U.getUser());
+    // We cannot sink I to PHI-uses.
+    if (dyn_cast<PHINode>(UI))
+      return false;
+    // We cannot sink I if it has uses outside of the loop.
+    if (!L.contains(LI.getLoopFor(UI->getParent())))
+      return false;
+    BBs.insert(UI->getParent());
+  }
+
+  // findBBsToSinkInto is O(BBs.size() * ColdLoopBBs.size()). We cap the max
+  // BBs.size() to avoid expensive computation.
+  // FIXME: Handle code size growth for min_size and opt_size.
+  if (BBs.size() > MaxNumberOfUseBBsForSinking)
+    return false;
+
+  // Find the set of BBs that we should insert a copy of I.
+  SmallPtrSet<BasicBlock *, 2> BBsToSinkInto =
+      findBBsToSinkInto(L, BBs, ColdLoopBBs, DT, BFI);
+  if (BBsToSinkInto.empty())
+    return false;
+
+  // Copy the final BBs into a vector and sort them using the total ordering
+  // of the loop block numbers as iterating the set doesn't give a useful
+  // order. No need to stable sort as the block numbers are a total ordering.
+  SmallVector<BasicBlock *, 2> SortedBBsToSinkInto;
+  SortedBBsToSinkInto.insert(SortedBBsToSinkInto.begin(), BBsToSinkInto.begin(),
+                             BBsToSinkInto.end());
+  std::sort(SortedBBsToSinkInto.begin(), SortedBBsToSinkInto.end(),
+            [&](BasicBlock *A, BasicBlock *B) {
+              return *LoopBlockNumber.find(A) < *LoopBlockNumber.find(B);
+            });
+
+  BasicBlock *MoveBB = *SortedBBsToSinkInto.begin();
+  // FIXME: Optimize the efficiency for cloned value replacement. The current
+  //        implementation is O(SortedBBsToSinkInto.size() * I.num_uses()).
+  for (BasicBlock *N : SortedBBsToSinkInto) {
+    if (N == MoveBB)
+      continue;
+    // Clone I and replace its uses.
+    Instruction *IC = I.clone();
+    IC->setName(I.getName());
+    IC->insertBefore(&*N->getFirstInsertionPt());
+    // Replaces uses of I with IC in N
+    for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); UI != UE;) {
+      Use &U = *UI++;
+      auto *I = cast<Instruction>(U.getUser());
+      if (I->getParent() == N)
+        U.set(IC);
+    }
+    // Replaces uses of I with IC in blocks dominated by N
+    replaceDominatedUsesWith(&I, IC, DT, N);
+    DEBUG(dbgs() << "Sinking a clone of " << I << " To: " << N->getName()
+                 << '\n');
+    NumLoopSunkCloned++;
+  }
+  DEBUG(dbgs() << "Sinking " << I << " To: " << MoveBB->getName() << '\n');
+  NumLoopSunk++;
+  I.moveBefore(&*MoveBB->getFirstInsertionPt());
+
+  return true;
+}
+
+/// Sinks instructions from loop's preheader to the loop body if the
+/// sum frequency of inserted copy is smaller than preheader's frequency.
+static bool sinkLoopInvariantInstructions(Loop &L, AAResults &AA, LoopInfo &LI,
+                                          DominatorTree &DT,
+                                          BlockFrequencyInfo &BFI,
+                                          ScalarEvolution *SE) {
+  BasicBlock *Preheader = L.getLoopPreheader();
+  if (!Preheader)
+    return false;
+
+  // Enable LoopSink only when runtime profile is available.
+  // With static profile, the sinking decision may be sub-optimal.
+  if (!Preheader->getParent()->getEntryCount())
+    return false;
+
+  const BlockFrequency PreheaderFreq = BFI.getBlockFreq(Preheader);
+  // If there are no basic blocks with lower frequency than the preheader then
+  // we can avoid the detailed analysis as we will never find profitable sinking
+  // opportunities.
+  if (all_of(L.blocks(), [&](const BasicBlock *BB) {
+        return BFI.getBlockFreq(BB) > PreheaderFreq;
+      }))
+    return false;
+
+  bool Changed = false;
+  AliasSetTracker CurAST(AA);
+
+  // Compute alias set.
+  for (BasicBlock *BB : L.blocks())
+    CurAST.add(*BB);
+
+  // Sort loop's basic blocks by frequency
+  SmallVector<BasicBlock *, 10> ColdLoopBBs;
+  SmallDenseMap<BasicBlock *, int, 16> LoopBlockNumber;
+  int i = 0;
+  for (BasicBlock *B : L.blocks())
+    if (BFI.getBlockFreq(B) < BFI.getBlockFreq(L.getLoopPreheader())) {
+      ColdLoopBBs.push_back(B);
+      LoopBlockNumber[B] = ++i;
+    }
+  std::stable_sort(ColdLoopBBs.begin(), ColdLoopBBs.end(),
+                   [&](BasicBlock *A, BasicBlock *B) {
+                     return BFI.getBlockFreq(A) < BFI.getBlockFreq(B);
+                   });
+
+  // Traverse preheader's instructions in reverse order becaue if A depends
+  // on B (A appears after B), A needs to be sinked first before B can be
+  // sinked.
+  for (auto II = Preheader->rbegin(), E = Preheader->rend(); II != E;) {
+    Instruction *I = &*II++;
+    if (!L.hasLoopInvariantOperands(I) ||
+        !canSinkOrHoistInst(*I, &AA, &DT, &L, &CurAST, nullptr))
+      continue;
+    if (sinkInstruction(L, *I, ColdLoopBBs, LoopBlockNumber, LI, DT, BFI))
+      Changed = true;
+  }
+
+  if (Changed && SE)
+    SE->forgetLoopDispositions(&L);
+  return Changed;
+}
+
+namespace {
+struct LegacyLoopSinkPass : public LoopPass {
+  static char ID;
+  LegacyLoopSinkPass() : LoopPass(ID) {
+    initializeLegacyLoopSinkPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnLoop(Loop *L, LPPassManager &LPM) override {
+    if (skipLoop(L))
+      return false;
+
+    auto *SE = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+    return sinkLoopInvariantInstructions(
+        *L, getAnalysis<AAResultsWrapperPass>().getAAResults(),
+        getAnalysis<LoopInfoWrapperPass>().getLoopInfo(),
+        getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+        getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(),
+        SE ? &SE->getSE() : nullptr);
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesCFG();
+    AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    getLoopAnalysisUsage(AU);
+  }
+};
+}
+
+char LegacyLoopSinkPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(LoopPass)
+INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_END(LegacyLoopSinkPass, "loop-sink", "Loop Sink", false, false)
+
+Pass *llvm::createLoopSinkPass() { return new LegacyLoopSinkPass(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 70bd9d3cca95..a61f646042ae 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -53,29 +53,64 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Hashing.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallBitVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
+#include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/ScalarEvolutionExpressions.h"
+#include "llvm/Analysis/ScalarEvolutionNormalization.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Operator.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <cstdlib>
+#include <iterator>
+#include <map>
+#include <tuple>
+#include <utility>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "loop-reduce"
@@ -139,7 +174,7 @@ public:
   void dump() const;
 };
 
-}
+} // end anonymous namespace
 
 void RegSortData::print(raw_ostream &OS) const {
   OS << "[NumUses=" << UsedByIndices.count() << ']';
@@ -178,7 +213,7 @@ public:
   const_iterator end() const   { return RegSequence.end(); }
 };
 
-}
+} // end anonymous namespace
 
 void
 RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
@@ -210,7 +245,7 @@ RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
     SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
     if (LUIdx < UsedByIndices.size())
       UsedByIndices[LUIdx] =
-        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : 0;
+        LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
     UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
   }
 }
@@ -301,7 +336,7 @@ struct Formula {
   void dump() const;
 };
 
-}
+} // end anonymous namespace
 
 /// Recursion helper for initialMatch.
 static void DoInitialMatch(const SCEV *S, Loop *L,
@@ -323,7 +358,7 @@ static void DoInitialMatch(const SCEV *S, Loop *L,
 
   // Look at addrec operands.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
-    if (!AR->getStart()->isZero()) {
+    if (!AR->getStart()->isZero() && AR->isAffine()) {
       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
                                       AR->getStepRecurrence(SE),
@@ -446,8 +481,7 @@ void Formula::deleteBaseReg(const SCEV *&S) {
 
 /// Test if this formula references the given register.
 bool Formula::referencesReg(const SCEV *S) const {
-  return S == ScaledReg ||
-         std::find(BaseRegs.begin(), BaseRegs.end(), S) != BaseRegs.end();
+  return S == ScaledReg || is_contained(BaseRegs, S);
 }
 
 /// Test whether this formula uses registers which are used by uses other than
@@ -567,7 +601,7 @@ static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
 
   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
-    if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+    if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
                                       IgnoreSignificantBits);
       if (!Step) return nullptr;
@@ -822,8 +856,10 @@ DeleteTriviallyDeadInstructions(SmallVectorImpl<WeakVH> &DeadInsts) {
 }
 
 namespace {
+
 class LSRUse;
-}
+
+} // end anonymous namespace
 
 /// \brief Check if the addressing mode defined by \p F is completely
 /// folded in \p LU at isel time.
@@ -883,7 +919,6 @@ public:
                    SmallPtrSetImpl<const SCEV *> &Regs,
                    const DenseSet<const SCEV *> &VisitedRegs,
                    const Loop *L,
-                   const SmallVectorImpl<int64_t> &Offsets,
                    ScalarEvolution &SE, DominatorTree &DT,
                    const LSRUse &LU,
                    SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
@@ -902,8 +937,144 @@ private:
                            ScalarEvolution &SE, DominatorTree &DT,
                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
 };
+  
+/// An operand value in an instruction which is to be replaced with some
+/// equivalent, possibly strength-reduced, replacement.
+struct LSRFixup {
+  /// The instruction which will be updated.
+  Instruction *UserInst;
 
-}
+  /// The operand of the instruction which will be replaced. The operand may be
+  /// used more than once; every instance will be replaced.
+  Value *OperandValToReplace;
+
+  /// If this user is to use the post-incremented value of an induction
+  /// variable, this variable is non-null and holds the loop associated with the
+  /// induction variable.
+  PostIncLoopSet PostIncLoops;
+
+  /// A constant offset to be added to the LSRUse expression.  This allows
+  /// multiple fixups to share the same LSRUse with different offsets, for
+  /// example in an unrolled loop.
+  int64_t Offset;
+
+  bool isUseFullyOutsideLoop(const Loop *L) const;
+
+  LSRFixup();
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
+/// SmallVectors of const SCEV*.
+struct UniquifierDenseMapInfo {
+  static SmallVector<const SCEV *, 4> getEmptyKey() {
+    SmallVector<const SCEV *, 4>  V;
+    V.push_back(reinterpret_cast<const SCEV *>(-1));
+    return V;
+  }
+
+  static SmallVector<const SCEV *, 4> getTombstoneKey() {
+    SmallVector<const SCEV *, 4> V;
+    V.push_back(reinterpret_cast<const SCEV *>(-2));
+    return V;
+  }
+
+  static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
+    return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
+  }
+
+  static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
+                      const SmallVector<const SCEV *, 4> &RHS) {
+    return LHS == RHS;
+  }
+};
+
+/// This class holds the state that LSR keeps for each use in IVUsers, as well
+/// as uses invented by LSR itself. It includes information about what kinds of
+/// things can be folded into the user, information about the user itself, and
+/// information about how the use may be satisfied.  TODO: Represent multiple
+/// users of the same expression in common?
+class LSRUse {
+  DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
+
+public:
+  /// An enum for a kind of use, indicating what types of scaled and immediate
+  /// operands it might support.
+  enum KindType {
+    Basic,   ///< A normal use, with no folding.
+    Special, ///< A special case of basic, allowing -1 scales.
+    Address, ///< An address use; folding according to TargetLowering
+    ICmpZero ///< An equality icmp with both operands folded into one.
+    // TODO: Add a generic icmp too?
+  };
+
+  typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
+
+  KindType Kind;
+  MemAccessTy AccessTy;
+
+  /// The list of operands which are to be replaced.
+  SmallVector<LSRFixup, 8> Fixups;
+
+  /// Keep track of the min and max offsets of the fixups.
+  int64_t MinOffset;
+  int64_t MaxOffset;
+
+  /// This records whether all of the fixups using this LSRUse are outside of
+  /// the loop, in which case some special-case heuristics may be used.
+  bool AllFixupsOutsideLoop;
+
+  /// RigidFormula is set to true to guarantee that this use will be associated
+  /// with a single formula--the one that initially matched. Some SCEV
+  /// expressions cannot be expanded. This allows LSR to consider the registers
+  /// used by those expressions without the need to expand them later after
+  /// changing the formula.
+  bool RigidFormula;
+
+  /// This records the widest use type for any fixup using this
+  /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
+  /// fixup widths to be equivalent, because the narrower one may be relying on
+  /// the implicit truncation to truncate away bogus bits.
+  Type *WidestFixupType;
+
+  /// A list of ways to build a value that can satisfy this user.  After the
+  /// list is populated, one of these is selected heuristically and used to
+  /// formulate a replacement for OperandValToReplace in UserInst.
+  SmallVector<Formula, 12> Formulae;
+
+  /// The set of register candidates used by all formulae in this LSRUse.
+  SmallPtrSet<const SCEV *, 4> Regs;
+
+  LSRUse(KindType K, MemAccessTy AT)
+      : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
+        AllFixupsOutsideLoop(true), RigidFormula(false),
+        WidestFixupType(nullptr) {}
+
+  LSRFixup &getNewFixup() {
+    Fixups.push_back(LSRFixup());
+    return Fixups.back();
+  }
+
+  void pushFixup(LSRFixup &f) {
+    Fixups.push_back(f);
+    if (f.Offset > MaxOffset)
+      MaxOffset = f.Offset;
+    if (f.Offset < MinOffset)
+      MinOffset = f.Offset;
+  }
+  
+  bool HasFormulaWithSameRegs(const Formula &F) const;
+  bool InsertFormula(const Formula &F);
+  void DeleteFormula(Formula &F);
+  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
+
+  void print(raw_ostream &OS) const;
+  void dump() const;
+};
+
+} // end anonymous namespace
 
 /// Tally up interesting quantities from the given register.
 void Cost::RateRegister(const SCEV *Reg,
@@ -975,7 +1146,6 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
                        SmallPtrSetImpl<const SCEV *> &Regs,
                        const DenseSet<const SCEV *> &VisitedRegs,
                        const Loop *L,
-                       const SmallVectorImpl<int64_t> &Offsets,
                        ScalarEvolution &SE, DominatorTree &DT,
                        const LSRUse &LU,
                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
@@ -1013,13 +1183,20 @@ void Cost::RateFormula(const TargetTransformInfo &TTI,
   ScaleCost += getScalingFactorCost(TTI, LU, F);
 
   // Tally up the non-zero immediates.
-  for (int64_t O : Offsets) {
+  for (const LSRFixup &Fixup : LU.Fixups) {
+    int64_t O = Fixup.Offset;
     int64_t Offset = (uint64_t)O + F.BaseOffset;
     if (F.BaseGV)
       ImmCost += 64; // Handle symbolic values conservatively.
                      // TODO: This should probably be the pointer size.
     else if (Offset != 0)
       ImmCost += APInt(64, Offset, true).getMinSignedBits();
+
+    // Check with target if this offset with this instruction is
+    // specifically not supported.
+    if ((isa<LoadInst>(Fixup.UserInst) || isa<StoreInst>(Fixup.UserInst)) &&
+        !TTI.isFoldableMemAccessOffset(Fixup.UserInst, Offset))
+      NumBaseAdds++;
   }
   assert(isValid() && "invalid cost");
 }
@@ -1066,44 +1243,8 @@ void Cost::dump() const {
   print(errs()); errs() << '\n';
 }
 
-namespace {
-
-/// An operand value in an instruction which is to be replaced with some
-/// equivalent, possibly strength-reduced, replacement.
-struct LSRFixup {
-  /// The instruction which will be updated.
-  Instruction *UserInst;
-
-  /// The operand of the instruction which will be replaced. The operand may be
-  /// used more than once; every instance will be replaced.
-  Value *OperandValToReplace;
-
-  /// If this user is to use the post-incremented value of an induction
-  /// variable, this variable is non-null and holds the loop associated with the
-  /// induction variable.
-  PostIncLoopSet PostIncLoops;
-
-  /// The index of the LSRUse describing the expression which this fixup needs,
-  /// minus an offset (below).
-  size_t LUIdx;
-
-  /// A constant offset to be added to the LSRUse expression.  This allows
-  /// multiple fixups to share the same LSRUse with different offsets, for
-  /// example in an unrolled loop.
-  int64_t Offset;
-
-  bool isUseFullyOutsideLoop(const Loop *L) const;
-
-  LSRFixup();
-
-  void print(raw_ostream &OS) const;
-  void dump() const;
-};
-
-}
-
 LSRFixup::LSRFixup()
-  : UserInst(nullptr), OperandValToReplace(nullptr), LUIdx(~size_t(0)),
+  : UserInst(nullptr), OperandValToReplace(nullptr),
     Offset(0) {}
 
 /// Test whether this fixup always uses its value outside of the given loop.
@@ -1139,9 +1280,6 @@ void LSRFixup::print(raw_ostream &OS) const {
     PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
   }
 
-  if (LUIdx != ~size_t(0))
-    OS << ", LUIdx=" << LUIdx;
-
   if (Offset != 0)
     OS << ", Offset=" << Offset;
 }
@@ -1151,102 +1289,6 @@ void LSRFixup::dump() const {
   print(errs()); errs() << '\n';
 }
 
-namespace {
-
-/// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
-/// SmallVectors of const SCEV*.
-struct UniquifierDenseMapInfo {
-  static SmallVector<const SCEV *, 4> getEmptyKey() {
-    SmallVector<const SCEV *, 4>  V;
-    V.push_back(reinterpret_cast<const SCEV *>(-1));
-    return V;
-  }
-
-  static SmallVector<const SCEV *, 4> getTombstoneKey() {
-    SmallVector<const SCEV *, 4> V;
-    V.push_back(reinterpret_cast<const SCEV *>(-2));
-    return V;
-  }
-
-  static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
-    return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
-  }
-
-  static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
-                      const SmallVector<const SCEV *, 4> &RHS) {
-    return LHS == RHS;
-  }
-};
-
-/// This class holds the state that LSR keeps for each use in IVUsers, as well
-/// as uses invented by LSR itself. It includes information about what kinds of
-/// things can be folded into the user, information about the user itself, and
-/// information about how the use may be satisfied.  TODO: Represent multiple
-/// users of the same expression in common?
-class LSRUse {
-  DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
-
-public:
-  /// An enum for a kind of use, indicating what types of scaled and immediate
-  /// operands it might support.
-  enum KindType {
-    Basic,   ///< A normal use, with no folding.
-    Special, ///< A special case of basic, allowing -1 scales.
-    Address, ///< An address use; folding according to TargetLowering
-    ICmpZero ///< An equality icmp with both operands folded into one.
-    // TODO: Add a generic icmp too?
-  };
-
-  typedef PointerIntPair<const SCEV *, 2, KindType> SCEVUseKindPair;
-
-  KindType Kind;
-  MemAccessTy AccessTy;
-
-  SmallVector<int64_t, 8> Offsets;
-  int64_t MinOffset;
-  int64_t MaxOffset;
-
-  /// This records whether all of the fixups using this LSRUse are outside of
-  /// the loop, in which case some special-case heuristics may be used.
-  bool AllFixupsOutsideLoop;
-
-  /// RigidFormula is set to true to guarantee that this use will be associated
-  /// with a single formula--the one that initially matched. Some SCEV
-  /// expressions cannot be expanded. This allows LSR to consider the registers
-  /// used by those expressions without the need to expand them later after
-  /// changing the formula.
-  bool RigidFormula;
-
-  /// This records the widest use type for any fixup using this
-  /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
-  /// fixup widths to be equivalent, because the narrower one may be relying on
-  /// the implicit truncation to truncate away bogus bits.
-  Type *WidestFixupType;
-
-  /// A list of ways to build a value that can satisfy this user.  After the
-  /// list is populated, one of these is selected heuristically and used to
-  /// formulate a replacement for OperandValToReplace in UserInst.
-  SmallVector<Formula, 12> Formulae;
-
-  /// The set of register candidates used by all formulae in this LSRUse.
-  SmallPtrSet<const SCEV *, 4> Regs;
-
-  LSRUse(KindType K, MemAccessTy AT)
-      : Kind(K), AccessTy(AT), MinOffset(INT64_MAX), MaxOffset(INT64_MIN),
-        AllFixupsOutsideLoop(true), RigidFormula(false),
-        WidestFixupType(nullptr) {}
-
-  bool HasFormulaWithSameRegs(const Formula &F) const;
-  bool InsertFormula(const Formula &F);
-  void DeleteFormula(Formula &F);
-  void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
-
-  void print(raw_ostream &OS) const;
-  void dump() const;
-};
-
-}
-
 /// Test whether this use as a formula which has the same registers as the given
 /// formula.
 bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
@@ -1334,9 +1376,9 @@ void LSRUse::print(raw_ostream &OS) const {
 
   OS << ", Offsets={";
   bool NeedComma = false;
-  for (int64_t O : Offsets) {
+  for (const LSRFixup &Fixup : Fixups) {
     if (NeedComma) OS << ',';
-    OS << O;
+    OS << Fixup.Offset;
     NeedComma = true;
   }
   OS << '}';
@@ -1638,14 +1680,16 @@ class LSRInstance {
   Instruction *IVIncInsertPos;
 
   /// Interesting factors between use strides.
-  SmallSetVector<int64_t, 8> Factors;
+  ///
+  /// We explicitly use a SetVector which contains a SmallSet, instead of the
+  /// default, a SmallDenseSet, because we need to use the full range of
+  /// int64_ts, and there's currently no good way of doing that with
+  /// SmallDenseSet.
+  SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
 
   /// Interesting use types, to facilitate truncation reuse.
   SmallSetVector<Type *, 4> Types;
 
-  /// The list of operands which are to be replaced.
-  SmallVector<LSRFixup, 16> Fixups;
-
   /// The list of interesting uses.
   SmallVector<LSRUse, 16> Uses;
 
@@ -1678,11 +1722,6 @@ class LSRInstance {
   void CollectInterestingTypesAndFactors();
   void CollectFixupsAndInitialFormulae();
 
-  LSRFixup &getNewFixup() {
-    Fixups.push_back(LSRFixup());
-    return Fixups.back();
-  }
-
   // Support for sharing of LSRUses between LSRFixups.
   typedef DenseMap<LSRUse::SCEVUseKindPair, size_t> UseMapTy;
   UseMapTy UseMap;
@@ -1752,16 +1791,16 @@ class LSRInstance {
                                   const LSRUse &LU,
                                   SCEVExpander &Rewriter) const;
 
-  Value *Expand(const LSRFixup &LF,
+  Value *Expand(const LSRUse &LU, const LSRFixup &LF,
                 const Formula &F,
                 BasicBlock::iterator IP,
                 SCEVExpander &Rewriter,
                 SmallVectorImpl<WeakVH> &DeadInsts) const;
-  void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+  void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
                      const Formula &F,
                      SCEVExpander &Rewriter,
                      SmallVectorImpl<WeakVH> &DeadInsts) const;
-  void Rewrite(const LSRFixup &LF,
+  void Rewrite(const LSRUse &LU, const LSRFixup &LF,
                const Formula &F,
                SCEVExpander &Rewriter,
                SmallVectorImpl<WeakVH> &DeadInsts) const;
@@ -1780,7 +1819,7 @@ public:
   void dump() const;
 };
 
-}
+} // end anonymous namespace
 
 /// If IV is used in a int-to-float cast inside the loop then try to eliminate
 /// the cast operation.
@@ -2068,10 +2107,30 @@ void
 LSRInstance::OptimizeLoopTermCond() {
   SmallPtrSet<Instruction *, 4> PostIncs;
 
+  // We need a different set of heuristics for rotated and non-rotated loops.
+  // If a loop is rotated then the latch is also the backedge, so inserting
+  // post-inc expressions just before the latch is ideal. To reduce live ranges
+  // it also makes sense to rewrite terminating conditions to use post-inc
+  // expressions.
+  //
+  // If the loop is not rotated then the latch is not a backedge; the latch
+  // check is done in the loop head. Adding post-inc expressions before the
+  // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
+  // in the loop body. In this case we do *not* want to use post-inc expressions
+  // in the latch check, and we want to insert post-inc expressions before
+  // the backedge.
   BasicBlock *LatchBlock = L->getLoopLatch();
   SmallVector<BasicBlock*, 8> ExitingBlocks;
   L->getExitingBlocks(ExitingBlocks);
+  if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {
+        return LatchBlock != BB;
+      })) {
+    // The backedge doesn't exit the loop; treat this as a head-tested loop.
+    IVIncInsertPos = LatchBlock->getTerminator();
+    return;
+  }
 
+  // Otherwise treat this as a rotated loop.
   for (BasicBlock *ExitingBlock : ExitingBlocks) {
 
     // Get the terminating condition for the loop if possible.  If we
@@ -2241,8 +2300,6 @@ bool LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
   LU.MinOffset = NewMinOffset;
   LU.MaxOffset = NewMaxOffset;
   LU.AccessTy = NewAccessTy;
-  if (NewOffset != LU.Offsets.back())
-    LU.Offsets.push_back(NewOffset);
   return true;
 }
 
@@ -2279,11 +2336,6 @@ std::pair<size_t, int64_t> LSRInstance::getUse(const SCEV *&Expr,
   Uses.push_back(LSRUse(Kind, AccessTy));
   LSRUse &LU = Uses[LUIdx];
 
-  // We don't need to track redundant offsets, but we don't need to go out
-  // of our way here to avoid them.
-  if (LU.Offsets.empty() || Offset != LU.Offsets.back())
-    LU.Offsets.push_back(Offset);
-
   LU.MinOffset = Offset;
   LU.MaxOffset = Offset;
   return std::make_pair(LUIdx, Offset);
@@ -2500,7 +2552,7 @@ bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
   if (!isa<SCEVConstant>(IncExpr)) {
     const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
     if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
-      return 0;
+      return false;
   }
 
   SmallPtrSet<const SCEV*, 8> Processed;
@@ -2797,9 +2849,8 @@ void LSRInstance::FinalizeChain(IVChain &Chain) {
   DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
 
   for (const IVInc &Inc : Chain) {
-    DEBUG(dbgs() << "        Inc: " << Inc.UserInst << "\n");
-    auto UseI = std::find(Inc.UserInst->op_begin(), Inc.UserInst->op_end(),
-                          Inc.IVOperand);
+    DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");
+    auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
     assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
     IVIncSet.insert(UseI);
   }
@@ -2932,39 +2983,34 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
   for (const IVStrideUse &U : IU) {
     Instruction *UserInst = U.getUser();
     // Skip IV users that are part of profitable IV Chains.
-    User::op_iterator UseI = std::find(UserInst->op_begin(), UserInst->op_end(),
-                                       U.getOperandValToReplace());
+    User::op_iterator UseI =
+        find(UserInst->operands(), U.getOperandValToReplace());
     assert(UseI != UserInst->op_end() && "cannot find IV operand");
     if (IVIncSet.count(UseI))
       continue;
 
-    // Record the uses.
-    LSRFixup &LF = getNewFixup();
-    LF.UserInst = UserInst;
-    LF.OperandValToReplace = U.getOperandValToReplace();
-    LF.PostIncLoops = U.getPostIncLoops();
-
     LSRUse::KindType Kind = LSRUse::Basic;
     MemAccessTy AccessTy;
-    if (isAddressUse(LF.UserInst, LF.OperandValToReplace)) {
+    if (isAddressUse(UserInst, U.getOperandValToReplace())) {
       Kind = LSRUse::Address;
-      AccessTy = getAccessType(LF.UserInst);
+      AccessTy = getAccessType(UserInst);
     }
 
     const SCEV *S = IU.getExpr(U);
-
+    PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
+    
     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
     // (N - i == 0), and this allows (N - i) to be the expression that we work
     // with rather than just N or i, so we can consider the register
     // requirements for both N and i at the same time. Limiting this code to
     // equality icmps is not a problem because all interesting loops use
     // equality icmps, thanks to IndVarSimplify.
-    if (ICmpInst *CI = dyn_cast<ICmpInst>(LF.UserInst))
+    if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst))
       if (CI->isEquality()) {
         // Swap the operands if needed to put the OperandValToReplace on the
         // left, for consistency.
         Value *NV = CI->getOperand(1);
-        if (NV == LF.OperandValToReplace) {
+        if (NV == U.getOperandValToReplace()) {
           CI->setOperand(1, CI->getOperand(0));
           CI->setOperand(0, NV);
           NV = CI->getOperand(1);
@@ -2977,7 +3023,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
           // S is normalized, so normalize N before folding it into S
           // to keep the result normalized.
           N = TransformForPostIncUse(Normalize, N, CI, nullptr,
-                                     LF.PostIncLoops, SE, DT);
+                                     TmpPostIncLoops, SE, DT);
           Kind = LSRUse::ICmpZero;
           S = SE.getMinusSCEV(N, S);
         }
@@ -2990,12 +3036,20 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
         Factors.insert(-1);
       }
 
-    // Set up the initial formula for this use.
+    // Get or create an LSRUse.
     std::pair<size_t, int64_t> P = getUse(S, Kind, AccessTy);
-    LF.LUIdx = P.first;
-    LF.Offset = P.second;
-    LSRUse &LU = Uses[LF.LUIdx];
+    size_t LUIdx = P.first;
+    int64_t Offset = P.second;
+    LSRUse &LU = Uses[LUIdx];
+
+    // Record the fixup.
+    LSRFixup &LF = LU.getNewFixup();
+    LF.UserInst = UserInst;
+    LF.OperandValToReplace = U.getOperandValToReplace();
+    LF.PostIncLoops = TmpPostIncLoops;
+    LF.Offset = Offset;
     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
+
     if (!LU.WidestFixupType ||
         SE.getTypeSizeInBits(LU.WidestFixupType) <
         SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
@@ -3003,8 +3057,8 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
 
     // If this is the first use of this LSRUse, give it a formula.
     if (LU.Formulae.empty()) {
-      InsertInitialFormula(S, LU, LF.LUIdx);
-      CountRegisters(LU.Formulae.back(), LF.LUIdx);
+      InsertInitialFormula(S, LU, LUIdx);
+      CountRegisters(LU.Formulae.back(), LUIdx);
     }
   }
 
@@ -3130,20 +3184,21 @@ LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
             continue;
         }
 
-        LSRFixup &LF = getNewFixup();
-        LF.UserInst = const_cast<Instruction *>(UserInst);
-        LF.OperandValToReplace = U;
         std::pair<size_t, int64_t> P = getUse(
             S, LSRUse::Basic, MemAccessTy());
-        LF.LUIdx = P.first;
-        LF.Offset = P.second;
-        LSRUse &LU = Uses[LF.LUIdx];
+        size_t LUIdx = P.first;
+        int64_t Offset = P.second;
+        LSRUse &LU = Uses[LUIdx];
+        LSRFixup &LF = LU.getNewFixup();
+        LF.UserInst = const_cast<Instruction *>(UserInst);
+        LF.OperandValToReplace = U;
+        LF.Offset = Offset;
         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
         if (!LU.WidestFixupType ||
             SE.getTypeSizeInBits(LU.WidestFixupType) <
             SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
           LU.WidestFixupType = LF.OperandValToReplace->getType();
-        InsertSupplementalFormula(US, LU, LF.LUIdx);
+        InsertSupplementalFormula(US, LU, LUIdx);
         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
         break;
       }
@@ -3175,7 +3230,7 @@ static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
     return nullptr;
   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
     // Split a non-zero base out of an addrec.
-    if (AR->getStart()->isZero())
+    if (AR->getStart()->isZero() || !AR->isAffine())
       return S;
 
     const SCEV *Remainder = CollectSubexprs(AR->getStart(),
@@ -3629,7 +3684,7 @@ struct WorkItem {
   void dump() const;
 };
 
-}
+} // end anonymous namespace
 
 void WorkItem::print(raw_ostream &OS) const {
   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
@@ -3872,8 +3927,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
       // the corresponding bad register from the Regs set.
       Cost CostF;
       Regs.clear();
-      CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, LU.Offsets, SE, DT, LU,
-                        &LoserRegs);
+      CostF.RateFormula(TTI, F, Regs, VisitedRegs, L, SE, DT, LU, &LoserRegs);
       if (CostF.isLoser()) {
         // During initial formula generation, undesirable formulae are generated
         // by uses within other loops that have some non-trivial address mode or
@@ -3906,8 +3960,7 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
 
         Cost CostBest;
         Regs.clear();
-        CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, LU.Offsets, SE,
-                             DT, LU);
+        CostBest.RateFormula(TTI, Best, Regs, VisitedRegs, L, SE, DT, LU);
         if (CostF < CostBest)
           std::swap(F, Best);
         DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
@@ -4053,25 +4106,13 @@ void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
 
       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
 
-      // Update the relocs to reference the new use.
-      for (LSRFixup &Fixup : Fixups) {
-        if (Fixup.LUIdx == LUIdx) {
-          Fixup.LUIdx = LUThatHas - &Uses.front();
-          Fixup.Offset += F.BaseOffset;
-          // Add the new offset to LUThatHas' offset list.
-          if (LUThatHas->Offsets.back() != Fixup.Offset) {
-            LUThatHas->Offsets.push_back(Fixup.Offset);
-            if (Fixup.Offset > LUThatHas->MaxOffset)
-              LUThatHas->MaxOffset = Fixup.Offset;
-            if (Fixup.Offset < LUThatHas->MinOffset)
-              LUThatHas->MinOffset = Fixup.Offset;
-          }
-          DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
-        }
-        if (Fixup.LUIdx == NumUses-1)
-          Fixup.LUIdx = LUIdx;
+      // Transfer the fixups of LU to LUThatHas.
+      for (LSRFixup &Fixup : LU.Fixups) {
+        Fixup.Offset += F.BaseOffset;
+        LUThatHas->pushFixup(Fixup);
+        DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
       }
-
+      
       // Delete formulae from the new use which are no longer legal.
       bool Any = false;
       for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
@@ -4137,9 +4178,10 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
     for (const SCEV *Reg : RegUses) {
       if (Taken.count(Reg))
         continue;
-      if (!Best)
+      if (!Best) {
         Best = Reg;
-      else {
+        BestNum = RegUses.getUsedByIndices(Reg).count();
+      } else {
         unsigned Count = RegUses.getUsedByIndices(Reg).count();
         if (Count > BestNum) {
           Best = Reg;
@@ -4229,8 +4271,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
     int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
     for (const SCEV *Reg : ReqRegs) {
       if ((F.ScaledReg && F.ScaledReg == Reg) ||
-          std::find(F.BaseRegs.begin(), F.BaseRegs.end(), Reg) !=
-          F.BaseRegs.end()) {
+          is_contained(F.BaseRegs, Reg)) {
         --NumReqRegsToFind;
         if (NumReqRegsToFind == 0)
           break;
@@ -4246,8 +4287,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
     // the current best, prune the search at that point.
     NewCost = CurCost;
     NewRegs = CurRegs;
-    NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, LU.Offsets, SE, DT,
-                        LU);
+    NewCost.RateFormula(TTI, F, NewRegs, VisitedRegs, L, SE, DT, LU);
     if (NewCost < SolutionCost) {
       Workspace.push_back(&F);
       if (Workspace.size() != Uses.size()) {
@@ -4313,7 +4353,7 @@ LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
                                  const SmallVectorImpl<Instruction *> &Inputs)
                                                                          const {
   Instruction *Tentative = &*IP;
-  for (;;) {
+  while (true) {
     bool AllDominate = true;
     Instruction *BetterPos = nullptr;
     // Don't bother attempting to insert before a catchswitch, their basic block
@@ -4430,12 +4470,12 @@ LSRInstance::AdjustInsertPositionForExpand(BasicBlock::iterator LowestIP,
 
 /// Emit instructions for the leading candidate expression for this LSRUse (this
 /// is called "expanding").
-Value *LSRInstance::Expand(const LSRFixup &LF,
+Value *LSRInstance::Expand(const LSRUse &LU,
+                           const LSRFixup &LF,
                            const Formula &F,
                            BasicBlock::iterator IP,
                            SCEVExpander &Rewriter,
                            SmallVectorImpl<WeakVH> &DeadInsts) const {
-  const LSRUse &LU = Uses[LF.LUIdx];
   if (LU.RigidFormula)
     return LF.OperandValToReplace;
 
@@ -4617,6 +4657,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
 /// effectively happens in their predecessor blocks, so the expression may need
 /// to be expanded in multiple places.
 void LSRInstance::RewriteForPHI(PHINode *PN,
+                                const LSRUse &LU,
                                 const LSRFixup &LF,
                                 const Formula &F,
                                 SCEVExpander &Rewriter,
@@ -4670,7 +4711,7 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
       if (!Pair.second)
         PN->setIncomingValue(i, Pair.first->second);
       else {
-        Value *FullV = Expand(LF, F, BB->getTerminator()->getIterator(),
+        Value *FullV = Expand(LU, LF, F, BB->getTerminator()->getIterator(),
                               Rewriter, DeadInsts);
 
         // If this is reuse-by-noop-cast, insert the noop cast.
@@ -4691,17 +4732,18 @@ void LSRInstance::RewriteForPHI(PHINode *PN,
 /// Emit instructions for the leading candidate expression for this LSRUse (this
 /// is called "expanding"), and update the UserInst to reference the newly
 /// expanded value.
-void LSRInstance::Rewrite(const LSRFixup &LF,
+void LSRInstance::Rewrite(const LSRUse &LU,
+                          const LSRFixup &LF,
                           const Formula &F,
                           SCEVExpander &Rewriter,
                           SmallVectorImpl<WeakVH> &DeadInsts) const {
   // First, find an insertion point that dominates UserInst. For PHI nodes,
   // find the nearest block which dominates all the relevant uses.
   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
-    RewriteForPHI(PN, LF, F, Rewriter, DeadInsts);
+    RewriteForPHI(PN, LU, LF, F, Rewriter, DeadInsts);
   } else {
     Value *FullV =
-        Expand(LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
+      Expand(LU, LF, F, LF.UserInst->getIterator(), Rewriter, DeadInsts);
 
     // If this is reuse-by-noop-cast, insert the noop cast.
     Type *OpTy = LF.OperandValToReplace->getType();
@@ -4717,7 +4759,7 @@ void LSRInstance::Rewrite(const LSRFixup &LF,
     // its new value may happen to be equal to LF.OperandValToReplace, in
     // which case doing replaceUsesOfWith leads to replacing both operands
     // with the same value. TODO: Reorganize this.
-    if (Uses[LF.LUIdx].Kind == LSRUse::ICmpZero)
+    if (LU.Kind == LSRUse::ICmpZero)
       LF.UserInst->setOperand(0, FullV);
     else
       LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
@@ -4750,11 +4792,11 @@ void LSRInstance::ImplementSolution(
   }
 
   // Expand the new value definitions and update the users.
-  for (const LSRFixup &Fixup : Fixups) {
-    Rewrite(Fixup, *Solution[Fixup.LUIdx], Rewriter, DeadInsts);
-
-    Changed = true;
-  }
+  for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
+    for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
+      Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], Rewriter, DeadInsts);
+      Changed = true;
+    }
 
   for (const IVChain &Chain : IVChainVec) {
     GenerateIVChain(Chain, Rewriter, DeadInsts);
@@ -4898,11 +4940,12 @@ void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
 
 void LSRInstance::print_fixups(raw_ostream &OS) const {
   OS << "LSR is examining the following fixup sites:\n";
-  for (const LSRFixup &LF : Fixups) {
-    dbgs() << "  ";
-    LF.print(OS);
-    OS << '\n';
-  }
+  for (const LSRUse &LU : Uses)
+    for (const LSRFixup &LF : LU.Fixups) {
+      dbgs() << "  ";
+      LF.print(OS);
+      OS << '\n';
+    }
 }
 
 void LSRInstance::print_uses(raw_ostream &OS) const {
@@ -4935,6 +4978,7 @@ namespace {
 class LoopStrengthReduce : public LoopPass {
 public:
   static char ID; // Pass ID, replacement for typeid
+
   LoopStrengthReduce();
 
 private:
@@ -4942,24 +4986,7 @@ private:
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 };
 
-}
-
-char LoopStrengthReduce::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
-                "Loop Strength Reduction", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
-                "Loop Strength Reduction", false, false)
-
-
-Pass *llvm::createLoopStrengthReducePass() {
-  return new LoopStrengthReduce();
-}
+} // end anonymous namespace
 
 LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
   initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
@@ -4985,16 +5012,9 @@ void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfoWrapperPass>();
 }
 
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
-  if (skipLoop(L))
-    return false;
-
-  auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
-  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
-  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-      *L->getHeader()->getParent());
+static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
+                               DominatorTree &DT, LoopInfo &LI,
+                               const TargetTransformInfo &TTI) {
   bool Changed = false;
 
   // Run the main LSR transformation.
@@ -5005,15 +5025,11 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   if (EnablePhiElim && L->isLoopSimplifyForm()) {
     SmallVector<WeakVH, 16> DeadInsts;
     const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
-    SCEVExpander Rewriter(getAnalysis<ScalarEvolutionWrapperPass>().getSE(), DL,
-                          "lsr");
+    SCEVExpander Rewriter(SE, DL, "lsr");
 #ifndef NDEBUG
     Rewriter.setDebugType(DEBUG_TYPE);
 #endif
-    unsigned numFolded = Rewriter.replaceCongruentIVs(
-        L, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(), DeadInsts,
-        &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
-            *L->getHeader()->getParent()));
+    unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
     if (numFolded) {
       Changed = true;
       DeleteTriviallyDeadInstructions(DeadInsts);
@@ -5022,3 +5038,50 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
   }
   return Changed;
 }
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
+  if (skipLoop(L))
+    return false;
+
+  auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
+  auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
+      *L->getHeader()->getParent());
+  return ReduceLoopStrength(L, IU, SE, DT, LI, TTI);
+}
+
+PreservedAnalyses LoopStrengthReducePass::run(Loop &L,
+                                              LoopAnalysisManager &AM) {
+  const auto &FAM =
+      AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+  Function *F = L.getHeader()->getParent();
+
+  auto &IU = AM.getResult<IVUsersAnalysis>(L);
+  auto *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+  auto *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+  auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
+  assert((SE && DT && LI && TTI) &&
+         "Analyses for Loop Strength Reduce not available");
+
+  if (!ReduceLoopStrength(&L, IU, *SE, *DT, *LI, *TTI))
+    return PreservedAnalyses::all();
+
+  return getLoopPassPreservedAnalyses();
+}
+
+char LoopStrengthReduce::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
+                      "Loop Strength Reduction", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
+                    "Loop Strength Reduction", false, false)
+
+Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 91af4a1922ce..f66369b30369 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -12,18 +12,19 @@
 // counts of loops easily.
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CodeMetrics.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/LoopPassManager.h"
 #include "llvm/Analysis/LoopUnrollAnalyzer.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/IR/IntrinsicInst.h"
@@ -45,16 +46,14 @@ static cl::opt<unsigned>
     UnrollThreshold("unroll-threshold", cl::Hidden,
                     cl::desc("The baseline cost threshold for loop unrolling"));
 
-static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
-    "unroll-percent-dynamic-cost-saved-threshold", cl::init(50), cl::Hidden,
-    cl::desc("The percentage of estimated dynamic cost which must be saved by "
-             "unrolling to allow unrolling up to the max threshold."));
-
-static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
-    "unroll-dynamic-cost-savings-discount", cl::init(100), cl::Hidden,
-    cl::desc("This is the amount discounted from the total unroll cost when "
-             "the unrolled form has a high dynamic cost savings (triggered by "
-             "the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
+static cl::opt<unsigned> UnrollMaxPercentThresholdBoost(
+    "unroll-max-percent-threshold-boost", cl::init(400), cl::Hidden,
+    cl::desc("The maximum 'boost' (represented as a percentage >= 100) applied "
+             "to the threshold when aggressively unrolling a loop due to the "
+             "dynamic cost savings. If completely unrolling a loop will reduce "
+             "the total runtime from X to Y, we boost the loop unroll "
+             "threshold to DefaultThreshold*std::min(MaxPercentThresholdBoost, "
+             "X/Y). This limit avoids excessive code bloat."));
 
 static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
     "unroll-max-iteration-count-to-analyze", cl::init(10), cl::Hidden,
@@ -90,43 +89,59 @@ static cl::opt<bool>
     UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
                   cl::desc("Unroll loops with run-time trip counts"));
 
+static cl::opt<unsigned> UnrollMaxUpperBound(
+    "unroll-max-upperbound", cl::init(8), cl::Hidden,
+    cl::desc(
+        "The max of trip count upper bound that is considered in unrolling"));
+
 static cl::opt<unsigned> PragmaUnrollThreshold(
     "pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden,
     cl::desc("Unrolled size limit for loops with an unroll(full) or "
              "unroll_count pragma."));
 
+static cl::opt<unsigned> FlatLoopTripCountThreshold(
+    "flat-loop-tripcount-threshold", cl::init(5), cl::Hidden,
+    cl::desc("If the runtime tripcount for the loop is lower than the "
+             "threshold, the loop is considered as flat and will be less "
+             "aggressively unrolled."));
+
+static cl::opt<bool>
+    UnrollAllowPeeling("unroll-allow-peeling", cl::Hidden,
+                       cl::desc("Allows loops to be peeled when the dynamic "
+                                "trip count is known to be low."));
+
 /// A magic value for use with the Threshold parameter to indicate
 /// that the loop unroll should be performed regardless of how much
 /// code expansion would result.
 static const unsigned NoThreshold = UINT_MAX;
 
-/// Default unroll count for loops with run-time trip count if
-/// -unroll-count is not set
-static const unsigned DefaultUnrollRuntimeCount = 8;
-
 /// Gather the various unrolling parameters based on the defaults, compiler
 /// flags, TTI overrides and user specified parameters.
 static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
     Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
-    Optional<bool> UserRuntime) {
+    Optional<bool> UserRuntime, Optional<bool> UserUpperBound) {
   TargetTransformInfo::UnrollingPreferences UP;
 
   // Set up the defaults
   UP.Threshold = 150;
-  UP.PercentDynamicCostSavedThreshold = 50;
-  UP.DynamicCostSavingsDiscount = 100;
+  UP.MaxPercentThresholdBoost = 400;
   UP.OptSizeThreshold = 0;
   UP.PartialThreshold = UP.Threshold;
   UP.PartialOptSizeThreshold = 0;
   UP.Count = 0;
+  UP.PeelCount = 0;
+  UP.DefaultUnrollRuntimeCount = 8;
   UP.MaxCount = UINT_MAX;
   UP.FullUnrollMaxCount = UINT_MAX;
+  UP.BEInsns = 2;
   UP.Partial = false;
   UP.Runtime = false;
   UP.AllowRemainder = true;
   UP.AllowExpensiveTripCount = false;
   UP.Force = false;
+  UP.UpperBound = false;
+  UP.AllowPeeling = false;
 
   // Override with any target specific settings
   TTI.getUnrollingPreferences(L, UP);
@@ -142,11 +157,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     UP.Threshold = UnrollThreshold;
     UP.PartialThreshold = UnrollThreshold;
   }
-  if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
-    UP.PercentDynamicCostSavedThreshold =
-        UnrollPercentDynamicCostSavedThreshold;
-  if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
-    UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+  if (UnrollMaxPercentThresholdBoost.getNumOccurrences() > 0)
+    UP.MaxPercentThresholdBoost = UnrollMaxPercentThresholdBoost;
   if (UnrollMaxCount.getNumOccurrences() > 0)
     UP.MaxCount = UnrollMaxCount;
   if (UnrollFullMaxCount.getNumOccurrences() > 0)
@@ -157,6 +169,10 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     UP.AllowRemainder = UnrollAllowRemainder;
   if (UnrollRuntime.getNumOccurrences() > 0)
     UP.Runtime = UnrollRuntime;
+  if (UnrollMaxUpperBound == 0)
+    UP.UpperBound = false;
+  if (UnrollAllowPeeling.getNumOccurrences() > 0)
+    UP.AllowPeeling = UnrollAllowPeeling;
 
   // Apply user values provided by argument
   if (UserThreshold.hasValue()) {
@@ -169,6 +185,8 @@ static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
     UP.Partial = *UserAllowPartial;
   if (UserRuntime.hasValue())
     UP.Runtime = *UserRuntime;
+  if (UserUpperBound.hasValue())
+    UP.UpperBound = *UserUpperBound;
 
   return UP;
 }
@@ -210,11 +228,11 @@ struct UnrolledInstStateKeyInfo {
 namespace {
 struct EstimatedUnrollCost {
   /// \brief The estimated cost after unrolling.
-  int UnrolledCost;
+  unsigned UnrolledCost;
 
   /// \brief The estimated dynamic cost of executing the instructions in the
   /// rolled form.
-  int RolledDynamicCost;
+  unsigned RolledDynamicCost;
 };
 }
 
@@ -234,7 +252,7 @@ struct EstimatedUnrollCost {
 static Optional<EstimatedUnrollCost>
 analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
                       ScalarEvolution &SE, const TargetTransformInfo &TTI,
-                      int MaxUnrolledLoopSize) {
+                      unsigned MaxUnrolledLoopSize) {
   // We want to be able to scale offsets by the trip count and add more offsets
   // to them without checking for overflows, and we already don't want to
   // analyze *massive* trip counts, so we force the max to be reasonably small.
@@ -258,14 +276,14 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
 
   // The estimated cost of the unrolled form of the loop. We try to estimate
   // this by simplifying as much as we can while computing the estimate.
-  int UnrolledCost = 0;
+  unsigned UnrolledCost = 0;
 
   // We also track the estimated dynamic (that is, actually executed) cost in
   // the rolled form. This helps identify cases when the savings from unrolling
   // aren't just exposing dead control flows, but actual reduced dynamic
   // instructions due to the simplifications which we expect to occur after
   // unrolling.
-  int RolledDynamicCost = 0;
+  unsigned RolledDynamicCost = 0;
 
   // We track the simplification of each instruction in each iteration. We use
   // this to recursively merge costs into the unrolled cost on-demand so that
@@ -412,6 +430,9 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
       // it.  We don't change the actual IR, just count optimization
       // opportunities.
       for (Instruction &I : *BB) {
+        if (isa<DbgInfoIntrinsic>(I))
+          continue;
+
         // Track this instruction's expected baseline cost when executing the
         // rolled loop form.
         RolledDynamicCost += TTI.getUserCost(&I);
@@ -429,16 +450,16 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
         if (IsFree)
           continue;
 
-        // If the instruction might have a side-effect recursively account for
-        // the cost of it and all the instructions leading up to it.
-        if (I.mayHaveSideEffects())
-          AddCostRecursively(I, Iteration);
-
         // Can't properly model a cost of a call.
         // FIXME: With a proper cost model we should be able to do it.
         if(isa<CallInst>(&I))
           return None;
 
+        // If the instruction might have a side-effect recursively account for
+        // the cost of it and all the instructions leading up to it.
+        if (I.mayHaveSideEffects())
+          AddCostRecursively(I, Iteration);
+
         // If unrolled body turns out to be too big, bail out.
         if (UnrolledCost > MaxUnrolledLoopSize) {
           DEBUG(dbgs() << "  Exceeded threshold.. exiting.\n"
@@ -529,7 +550,7 @@ analyzeLoopUnrollCost(const Loop *L, unsigned TripCount, DominatorTree &DT,
 static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
                                     bool &NotDuplicatable, bool &Convergent,
                                     const TargetTransformInfo &TTI,
-                                    AssumptionCache *AC) {
+                                    AssumptionCache *AC, unsigned BEInsns) {
   SmallPtrSet<const Value *, 32> EphValues;
   CodeMetrics::collectEphemeralValues(L, AC, EphValues);
 
@@ -548,7 +569,7 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
   // that each loop has at least three instructions (likely a conditional
   // branch, a comparison feeding that branch, and some kind of loop increment
   // feeding that comparison instruction).
-  LoopSize = std::max(LoopSize, 3u);
+  LoopSize = std::max(LoopSize, BEInsns + 1);
 
   return LoopSize;
 }
@@ -635,70 +656,38 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
   L->setLoopID(NewLoopID);
 }
 
-static bool canUnrollCompletely(Loop *L, unsigned Threshold,
-                                unsigned PercentDynamicCostSavedThreshold,
-                                unsigned DynamicCostSavingsDiscount,
-                                uint64_t UnrolledCost,
-                                uint64_t RolledDynamicCost) {
-  if (Threshold == NoThreshold) {
-    DEBUG(dbgs() << "  Can fully unroll, because no threshold is set.\n");
-    return true;
-  }
-
-  if (UnrolledCost <= Threshold) {
-    DEBUG(dbgs() << "  Can fully unroll, because unrolled cost: "
-                 << UnrolledCost << "<" << Threshold << "\n");
-    return true;
-  }
-
-  assert(UnrolledCost && "UnrolledCost can't be 0 at this point.");
-  assert(RolledDynamicCost >= UnrolledCost &&
-         "Cannot have a higher unrolled cost than a rolled cost!");
-
-  // Compute the percentage of the dynamic cost in the rolled form that is
-  // saved when unrolled. If unrolling dramatically reduces the estimated
-  // dynamic cost of the loop, we use a higher threshold to allow more
-  // unrolling.
-  unsigned PercentDynamicCostSaved =
-      (uint64_t)(RolledDynamicCost - UnrolledCost) * 100ull / RolledDynamicCost;
-
-  if (PercentDynamicCostSaved >= PercentDynamicCostSavedThreshold &&
-      (int64_t)UnrolledCost - (int64_t)DynamicCostSavingsDiscount <=
-          (int64_t)Threshold) {
-    DEBUG(dbgs() << "  Can fully unroll, because unrolling will reduce the "
-                    "expected dynamic cost by "
-                 << PercentDynamicCostSaved << "% (threshold: "
-                 << PercentDynamicCostSavedThreshold << "%)\n"
-                 << "  and the unrolled cost (" << UnrolledCost
-                 << ") is less than the max threshold ("
-                 << DynamicCostSavingsDiscount << ").\n");
-    return true;
-  }
+// Computes the boosting factor for complete unrolling.
+// If fully unrolling the loop would save a lot of RolledDynamicCost, it would
+// be beneficial to fully unroll the loop even if unrolledcost is large. We
+// use (RolledDynamicCost / UnrolledCost) to model the unroll benefits to adjust
+// the unroll threshold.
+static unsigned getFullUnrollBoostingFactor(const EstimatedUnrollCost &Cost,
+                                            unsigned MaxPercentThresholdBoost) {
+  if (Cost.RolledDynamicCost >= UINT_MAX / 100)
+    return 100;
+  else if (Cost.UnrolledCost != 0)
+    // The boosting factor is RolledDynamicCost / UnrolledCost
+    return std::min(100 * Cost.RolledDynamicCost / Cost.UnrolledCost,
+                    MaxPercentThresholdBoost);
+  else
+    return MaxPercentThresholdBoost;
+}
 
-  DEBUG(dbgs() << "  Too large to fully unroll:\n");
-  DEBUG(dbgs() << "    Threshold: " << Threshold << "\n");
-  DEBUG(dbgs() << "    Max threshold: " << DynamicCostSavingsDiscount << "\n");
-  DEBUG(dbgs() << "    Percent cost saved threshold: "
-               << PercentDynamicCostSavedThreshold << "%\n");
-  DEBUG(dbgs() << "    Unrolled cost: " << UnrolledCost << "\n");
-  DEBUG(dbgs() << "    Rolled dynamic cost: " << RolledDynamicCost << "\n");
-  DEBUG(dbgs() << "    Percent cost saved: " << PercentDynamicCostSaved
-               << "\n");
-  return false;
+// Returns loop size estimation for unrolled loop.
+static uint64_t getUnrolledLoopSize(
+    unsigned LoopSize,
+    TargetTransformInfo::UnrollingPreferences &UP) {
+  assert(LoopSize >= UP.BEInsns && "LoopSize should not be less than BEInsns!");
+  return (uint64_t)(LoopSize - UP.BEInsns) * UP.Count + UP.BEInsns;
 }
 
 // Returns true if unroll count was set explicitly.
 // Calculates unroll count and writes it to UP.Count.
-static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
-                               DominatorTree &DT, LoopInfo *LI,
-                               ScalarEvolution *SE, unsigned TripCount,
-                               unsigned TripMultiple, unsigned LoopSize,
-                               TargetTransformInfo::UnrollingPreferences &UP) {
-  // BEInsns represents number of instructions optimized when "back edge"
-  // becomes "fall through" in unrolled loop.
-  // For now we count a conditional branch on a backedge and a comparison
-  // feeding it.
-  unsigned BEInsns = 2;
+static bool computeUnrollCount(
+    Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI,
+    ScalarEvolution *SE, OptimizationRemarkEmitter *ORE, unsigned &TripCount,
+    unsigned MaxTripCount, unsigned &TripMultiple, unsigned LoopSize,
+    TargetTransformInfo::UnrollingPreferences &UP, bool &UseUpperBound) {
   // Check for explicit Count.
   // 1st priority is unroll count set by "unroll-count" option.
   bool UserUnrollCount = UnrollCount.getNumOccurrences() > 0;
@@ -706,8 +695,7 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
     UP.Count = UnrollCount;
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
-    if (UP.AllowRemainder &&
-        (LoopSize - BEInsns) * UP.Count + BEInsns < UP.Threshold)
+    if (UP.AllowRemainder && getUnrolledLoopSize(LoopSize, UP) < UP.Threshold)
       return true;
   }
 
@@ -719,13 +707,13 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
     UP.AllowExpensiveTripCount = true;
     UP.Force = true;
     if (UP.AllowRemainder &&
-        (LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+        getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
       return true;
   }
   bool PragmaFullUnroll = HasUnrollFullPragma(L);
   if (PragmaFullUnroll && TripCount != 0) {
     UP.Count = TripCount;
-    if ((LoopSize - BEInsns) * UP.Count + BEInsns < PragmaUnrollThreshold)
+    if (getUnrolledLoopSize(LoopSize, UP) < PragmaUnrollThreshold)
       return false;
   }
 
@@ -733,11 +721,6 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   bool ExplicitUnroll = PragmaCount > 0 || PragmaFullUnroll ||
                         PragmaEnableUnroll || UserUnrollCount;
 
-  uint64_t UnrolledSize;
-  DebugLoc LoopLoc = L->getStartLoc();
-  Function *F = L->getHeader()->getParent();
-  LLVMContext &Ctx = F->getContext();
-
   if (ExplicitUnroll && TripCount != 0) {
     // If the loop has an unrolling pragma, we want to be more aggressive with
     // unrolling limits. Set thresholds to at least the PragmaThreshold value
@@ -748,38 +731,48 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
   }
 
   // 3rd priority is full unroll count.
-  // Full unroll make sense only when TripCount could be staticaly calculated.
+  // Full unroll makes sense only when TripCount or its upper bound could be
+  // statically calculated.
   // Also we need to check if we exceed FullUnrollMaxCount.
-  if (TripCount && TripCount <= UP.FullUnrollMaxCount) {
+  // If using the upper bound to unroll, TripMultiple should be set to 1 because
+  // we do not know when loop may exit.
+  // MaxTripCount and ExactTripCount cannot both be non zero since we only
+  // compute the former when the latter is zero.
+  unsigned ExactTripCount = TripCount;
+  assert((ExactTripCount == 0 || MaxTripCount == 0) &&
+         "ExtractTripCound and MaxTripCount cannot both be non zero.");
+  unsigned FullUnrollTripCount = ExactTripCount ? ExactTripCount : MaxTripCount;
+  UP.Count = FullUnrollTripCount;
+  if (FullUnrollTripCount && FullUnrollTripCount <= UP.FullUnrollMaxCount) {
     // When computing the unrolled size, note that BEInsns are not replicated
     // like the rest of the loop body.
-    UnrolledSize = (uint64_t)(LoopSize - BEInsns) * TripCount + BEInsns;
-    if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
-                            UnrolledSize, UnrolledSize)) {
-      UP.Count = TripCount;
+    if (getUnrolledLoopSize(LoopSize, UP) < UP.Threshold) {
+      UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+      TripCount = FullUnrollTripCount;
+      TripMultiple = UP.UpperBound ? 1 : TripMultiple;
       return ExplicitUnroll;
     } else {
       // The loop isn't that small, but we still can fully unroll it if that
       // helps to remove a significant number of instructions.
       // To check that, run additional analysis on the loop.
       if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
-              L, TripCount, DT, *SE, TTI,
-              UP.Threshold + UP.DynamicCostSavingsDiscount))
-        if (canUnrollCompletely(L, UP.Threshold,
-                                UP.PercentDynamicCostSavedThreshold,
-                                UP.DynamicCostSavingsDiscount,
-                                Cost->UnrolledCost, Cost->RolledDynamicCost)) {
-          UP.Count = TripCount;
+              L, FullUnrollTripCount, DT, *SE, TTI,
+              UP.Threshold * UP.MaxPercentThresholdBoost / 100)) {
+        unsigned Boost =
+            getFullUnrollBoostingFactor(*Cost, UP.MaxPercentThresholdBoost);
+        if (Cost->UnrolledCost < UP.Threshold * Boost / 100) {
+          UseUpperBound = (MaxTripCount == FullUnrollTripCount);
+          TripCount = FullUnrollTripCount;
+          TripMultiple = UP.UpperBound ? 1 : TripMultiple;
           return ExplicitUnroll;
         }
+      }
     }
   }
 
   // 4rd priority is partial unrolling.
   // Try partial unroll only when TripCount could be staticaly calculated.
   if (TripCount) {
-    if (UP.Count == 0)
-      UP.Count = TripCount;
     UP.Partial |= ExplicitUnroll;
     if (!UP.Partial) {
       DEBUG(dbgs() << "  will not try to unroll partially because "
@@ -787,12 +780,14 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
       UP.Count = 0;
       return false;
     }
+    if (UP.Count == 0)
+      UP.Count = TripCount;
     if (UP.PartialThreshold != NoThreshold) {
       // Reduce unroll count to be modulo of TripCount for partial unrolling.
-      UnrolledSize = (uint64_t)(LoopSize - BEInsns) * UP.Count + BEInsns;
-      if (UnrolledSize > UP.PartialThreshold)
-        UP.Count = (std::max(UP.PartialThreshold, 3u) - BEInsns) /
-                   (LoopSize - BEInsns);
+      if (getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
+        UP.Count =
+            (std::max(UP.PartialThreshold, UP.BEInsns + 1) - UP.BEInsns) /
+            (LoopSize - UP.BEInsns);
       if (UP.Count > UP.MaxCount)
         UP.Count = UP.MaxCount;
       while (UP.Count != 0 && TripCount % UP.Count != 0)
@@ -802,19 +797,18 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
         // largest power-of-two factor that satisfies the threshold limit.
         // As we'll create fixup loop, do the type of unrolling only if
         // remainder loop is allowed.
-        UP.Count = DefaultUnrollRuntimeCount;
-        UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
-        while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+        UP.Count = UP.DefaultUnrollRuntimeCount;
+        while (UP.Count != 0 &&
+               getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
           UP.Count >>= 1;
-          UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
-        }
       }
       if (UP.Count < 2) {
         if (PragmaEnableUnroll)
-          emitOptimizationRemarkMissed(
-              Ctx, DEBUG_TYPE, *F, LoopLoc,
-              "Unable to unroll loop as directed by unroll(enable) pragma "
-              "because unrolled size is too large.");
+          ORE->emit(
+              OptimizationRemarkMissed(DEBUG_TYPE, "UnrollAsDirectedTooLarge",
+                                       L->getStartLoc(), L->getHeader())
+              << "Unable to unroll loop as directed by unroll(enable) pragma "
+                 "because unrolled size is too large.");
         UP.Count = 0;
       }
     } else {
@@ -822,26 +816,48 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
     }
     if ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount &&
         UP.Count != TripCount)
-      emitOptimizationRemarkMissed(
-          Ctx, DEBUG_TYPE, *F, LoopLoc,
-          "Unable to fully unroll loop as directed by unroll pragma because "
-          "unrolled size is too large.");
+      ORE->emit(
+          OptimizationRemarkMissed(DEBUG_TYPE, "FullUnrollAsDirectedTooLarge",
+                                   L->getStartLoc(), L->getHeader())
+          << "Unable to fully unroll loop as directed by unroll pragma because "
+             "unrolled size is too large.");
     return ExplicitUnroll;
   }
   assert(TripCount == 0 &&
          "All cases when TripCount is constant should be covered here.");
   if (PragmaFullUnroll)
-    emitOptimizationRemarkMissed(
-        Ctx, DEBUG_TYPE, *F, LoopLoc,
-        "Unable to fully unroll loop as directed by unroll(full) pragma "
-        "because loop has a runtime trip count.");
+    ORE->emit(
+        OptimizationRemarkMissed(DEBUG_TYPE,
+                                 "CantFullUnrollAsDirectedRuntimeTripCount",
+                                 L->getStartLoc(), L->getHeader())
+        << "Unable to fully unroll loop as directed by unroll(full) pragma "
+           "because loop has a runtime trip count.");
+
+  // 5th priority is loop peeling
+  computePeelCount(L, LoopSize, UP);
+  if (UP.PeelCount) {
+    UP.Runtime = false;
+    UP.Count = 1;
+    return ExplicitUnroll;
+  }
 
-  // 5th priority is runtime unrolling.
+  // 6th priority is runtime unrolling.
   // Don't unroll a runtime trip count loop when it is disabled.
   if (HasRuntimeUnrollDisablePragma(L)) {
     UP.Count = 0;
     return false;
   }
+  
+  // Check if the runtime trip count is too small when profile is available.
+  if (L->getHeader()->getParent()->getEntryCount()) {
+    if (auto ProfileTripCount = getLoopEstimatedTripCount(L)) {
+      if (*ProfileTripCount < FlatLoopTripCountThreshold)
+        return false;
+      else
+        UP.AllowExpensiveTripCount = true;
+    }
+  }  
+
   // Reduce count based on the type of unrolling and the threshold values.
   UP.Runtime |= PragmaEnableUnroll || PragmaCount > 0 || UserUnrollCount;
   if (!UP.Runtime) {
@@ -851,15 +867,13 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
     return false;
   }
   if (UP.Count == 0)
-    UP.Count = DefaultUnrollRuntimeCount;
-  UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
+    UP.Count = UP.DefaultUnrollRuntimeCount;
 
   // Reduce unroll count to be the largest power-of-two factor of
   // the original count which satisfies the threshold limit.
-  while (UP.Count != 0 && UnrolledSize > UP.PartialThreshold) {
+  while (UP.Count != 0 &&
+         getUnrolledLoopSize(LoopSize, UP) > UP.PartialThreshold)
     UP.Count >>= 1;
-    UnrolledSize = (LoopSize - BEInsns) * UP.Count + BEInsns;
-  }
 
 #ifndef NDEBUG
   unsigned OrigCount = UP.Count;
@@ -874,16 +888,19 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
                     "multiple, "
                  << TripMultiple << ".  Reducing unroll count from "
                  << OrigCount << " to " << UP.Count << ".\n");
+    using namespace ore;
     if (PragmaCount > 0 && !UP.AllowRemainder)
-      emitOptimizationRemarkMissed(
-          Ctx, DEBUG_TYPE, *F, LoopLoc,
-          Twine("Unable to unroll loop the number of times directed by "
-                "unroll_count pragma because remainder loop is restricted "
-                "(that could architecture specific or because the loop "
-                "contains a convergent instruction) and so must have an unroll "
-                "count that divides the loop trip multiple of ") +
-              Twine(TripMultiple) + ".  Unrolling instead " + Twine(UP.Count) +
-              " time(s).");
+      ORE->emit(
+          OptimizationRemarkMissed(DEBUG_TYPE,
+                                   "DifferentUnrollCountFromDirected",
+                                   L->getStartLoc(), L->getHeader())
+          << "Unable to unroll loop the number of times directed by "
+             "unroll_count pragma because remainder loop is restricted "
+             "(that could architecture specific or because the loop "
+             "contains a convergent instruction) and so must have an unroll "
+             "count that divides the loop trip multiple of "
+          << NV("TripMultiple", TripMultiple) << ".  Unrolling instead "
+          << NV("UnrollCount", UP.Count) << " time(s).");
   }
 
   if (UP.Count > UP.MaxCount)
@@ -896,22 +913,34 @@ static bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI,
 
 static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
                             ScalarEvolution *SE, const TargetTransformInfo &TTI,
-                            AssumptionCache &AC, bool PreserveLCSSA,
+                            AssumptionCache &AC, OptimizationRemarkEmitter &ORE,
+                            bool PreserveLCSSA,
                             Optional<unsigned> ProvidedCount,
                             Optional<unsigned> ProvidedThreshold,
                             Optional<bool> ProvidedAllowPartial,
-                            Optional<bool> ProvidedRuntime) {
+                            Optional<bool> ProvidedRuntime,
+                            Optional<bool> ProvidedUpperBound) {
   DEBUG(dbgs() << "Loop Unroll: F[" << L->getHeader()->getParent()->getName()
                << "] Loop %" << L->getHeader()->getName() << "\n");
-  if (HasUnrollDisablePragma(L)) {
+  if (HasUnrollDisablePragma(L)) 
+    return false;
+  if (!L->isLoopSimplifyForm()) { 
+    DEBUG(
+        dbgs() << "  Not unrolling loop which is not in loop-simplify form.\n");
     return false;
   }
 
   unsigned NumInlineCandidates;
   bool NotDuplicatable;
   bool Convergent;
+  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+      L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+      ProvidedRuntime, ProvidedUpperBound);
+  // Exit early if unrolling is disabled.
+  if (UP.Threshold == 0 && (!UP.Partial || UP.PartialThreshold == 0))
+    return false;
   unsigned LoopSize = ApproximateLoopSize(
-      L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC);
+      L, NumInlineCandidates, NotDuplicatable, Convergent, TTI, &AC, UP.BEInsns);
   DEBUG(dbgs() << "  Loop Size = " << LoopSize << "\n");
   if (NotDuplicatable) {
     DEBUG(dbgs() << "  Not unrolling loop which contains non-duplicatable"
@@ -922,14 +951,10 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
     DEBUG(dbgs() << "  Not unrolling loop with inlinable calls.\n");
     return false;
   }
-  if (!L->isLoopSimplifyForm()) {
-    DEBUG(
-        dbgs() << "  Not unrolling loop which is not in loop-simplify form.\n");
-    return false;
-  }
 
   // Find trip count and trip multiple if count is not available
   unsigned TripCount = 0;
+  unsigned MaxTripCount = 0;
   unsigned TripMultiple = 1;
   // If there are multiple exiting blocks but one of them is the latch, use the
   // latch for the trip count estimation. Otherwise insist on a single exiting
@@ -942,10 +967,6 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
     TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
   }
 
-  TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
-      L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
-      ProvidedRuntime);
-
   // If the loop contains a convergent operation, the prelude we'd add
   // to do the first few instructions before we hit the unrolled loop
   // is unsafe -- it adds a control-flow dependency to the convergent
@@ -961,8 +982,31 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
   if (Convergent)
     UP.AllowRemainder = false;
 
-  bool IsCountSetExplicitly = computeUnrollCount(L, TTI, DT, LI, SE, TripCount,
-                                                 TripMultiple, LoopSize, UP);
+  // Try to find the trip count upper bound if we cannot find the exact trip
+  // count.
+  bool MaxOrZero = false;
+  if (!TripCount) {
+    MaxTripCount = SE->getSmallConstantMaxTripCount(L);
+    MaxOrZero = SE->isBackedgeTakenCountMaxOrZero(L);
+    // We can unroll by the upper bound amount if it's generally allowed or if
+    // we know that the loop is executed either the upper bound or zero times.
+    // (MaxOrZero unrolling keeps only the first loop test, so the number of
+    // loop tests remains the same compared to the non-unrolled version, whereas
+    // the generic upper bound unrolling keeps all but the last loop test so the
+    // number of loop tests goes up which may end up being worse on targets with
+    // constriained branch predictor resources so is controlled by an option.)
+    // In addition we only unroll small upper bounds.
+    if (!(UP.UpperBound || MaxOrZero) || MaxTripCount > UnrollMaxUpperBound) {
+      MaxTripCount = 0;
+    }
+  }
+
+  // computeUnrollCount() decides whether it is beneficial to use upper bound to
+  // fully unroll the loop.
+  bool UseUpperBound = false;
+  bool IsCountSetExplicitly =
+      computeUnrollCount(L, TTI, DT, LI, SE, &ORE, TripCount, MaxTripCount,
+                         TripMultiple, LoopSize, UP, UseUpperBound);
   if (!UP.Count)
     return false;
   // Unroll factor (Count) must be less or equal to TripCount.
@@ -971,14 +1015,18 @@ static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
 
   // Unroll the loop.
   if (!UnrollLoop(L, UP.Count, TripCount, UP.Force, UP.Runtime,
-                  UP.AllowExpensiveTripCount, TripMultiple, LI, SE, &DT, &AC,
+                  UP.AllowExpensiveTripCount, UseUpperBound, MaxOrZero,
+                  TripMultiple, UP.PeelCount, LI, SE, &DT, &AC, &ORE,
                   PreserveLCSSA))
     return false;
 
   // If loop has an unroll count pragma or unrolled by explicitly set count
   // mark loop as unrolled to prevent unrolling beyond that requested.
-  if (IsCountSetExplicitly)
+  // If the loop was peeled, we already "used up" the profile information
+  // we had, so we don't want to unroll or peel again.
+  if (IsCountSetExplicitly || UP.PeelCount)
     SetLoopAlreadyUnrolled(L);
+
   return true;
 }
 
@@ -988,10 +1036,11 @@ public:
   static char ID; // Pass ID, replacement for typeid
   LoopUnroll(Optional<unsigned> Threshold = None,
              Optional<unsigned> Count = None,
-             Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+             Optional<bool> AllowPartial = None, Optional<bool> Runtime = None,
+             Optional<bool> UpperBound = None)
       : LoopPass(ID), ProvidedCount(std::move(Count)),
         ProvidedThreshold(Threshold), ProvidedAllowPartial(AllowPartial),
-        ProvidedRuntime(Runtime) {
+        ProvidedRuntime(Runtime), ProvidedUpperBound(UpperBound) {
     initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
   }
 
@@ -999,6 +1048,7 @@ public:
   Optional<unsigned> ProvidedThreshold;
   Optional<bool> ProvidedAllowPartial;
   Optional<bool> ProvidedRuntime;
+  Optional<bool> ProvidedUpperBound;
 
   bool runOnLoop(Loop *L, LPPassManager &) override {
     if (skipLoop(L))
@@ -1012,11 +1062,16 @@ public:
     const TargetTransformInfo &TTI =
         getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
     auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    // For the old PM, we can't use OptimizationRemarkEmitter as an analysis
+    // pass.  Function analyses need to be preserved across loop transformations
+    // but ORE cannot be preserved (see comment before the pass definition).
+    OptimizationRemarkEmitter ORE(&F);
     bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
 
-    return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
-                           ProvidedThreshold, ProvidedAllowPartial,
-                           ProvidedRuntime);
+    return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, ORE, PreserveLCSSA,
+                           ProvidedCount, ProvidedThreshold,
+                           ProvidedAllowPartial, ProvidedRuntime,
+                           ProvidedUpperBound);
   }
 
   /// This transformation requires natural loop information & requires that
@@ -1040,7 +1095,7 @@ INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
 INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
 
 Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
-                                 int Runtime) {
+                                 int Runtime, int UpperBound) {
   // TODO: It would make more sense for this function to take the optionals
   // directly, but that's dangerous since it would silently break out of tree
   // callers.
@@ -1048,9 +1103,51 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
                         Count == -1 ? None : Optional<unsigned>(Count),
                         AllowPartial == -1 ? None
                                            : Optional<bool>(AllowPartial),
-                        Runtime == -1 ? None : Optional<bool>(Runtime));
+                        Runtime == -1 ? None : Optional<bool>(Runtime),
+                        UpperBound == -1 ? None : Optional<bool>(UpperBound));
 }
 
 Pass *llvm::createSimpleLoopUnrollPass() {
-  return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+  return llvm::createLoopUnrollPass(-1, -1, 0, 0, 0);
+}
+
+PreservedAnalyses LoopUnrollPass::run(Loop &L, LoopAnalysisManager &AM) {
+  const auto &FAM =
+      AM.getResult<FunctionAnalysisManagerLoopProxy>(L).getManager();
+  Function *F = L.getHeader()->getParent();
+
+
+  DominatorTree *DT = FAM.getCachedResult<DominatorTreeAnalysis>(*F);
+  LoopInfo *LI = FAM.getCachedResult<LoopAnalysis>(*F);
+  ScalarEvolution *SE = FAM.getCachedResult<ScalarEvolutionAnalysis>(*F);
+  auto *TTI = FAM.getCachedResult<TargetIRAnalysis>(*F);
+  auto *AC = FAM.getCachedResult<AssumptionAnalysis>(*F);
+  auto *ORE = FAM.getCachedResult<OptimizationRemarkEmitterAnalysis>(*F);
+  if (!DT)
+    report_fatal_error(
+        "LoopUnrollPass: DominatorTreeAnalysis not cached at a higher level");
+  if (!LI)
+    report_fatal_error(
+        "LoopUnrollPass: LoopAnalysis not cached at a higher level");
+  if (!SE)
+    report_fatal_error(
+        "LoopUnrollPass: ScalarEvolutionAnalysis not cached at a higher level");
+  if (!TTI)
+    report_fatal_error(
+        "LoopUnrollPass: TargetIRAnalysis not cached at a higher level");
+  if (!AC)
+    report_fatal_error(
+        "LoopUnrollPass: AssumptionAnalysis not cached at a higher level");
+  if (!ORE)
+    report_fatal_error("LoopUnrollPass: OptimizationRemarkEmitterAnalysis not "
+                       "cached at a higher level");
+
+  bool Changed =
+      tryToUnrollLoop(&L, *DT, LI, SE, *TTI, *AC, *ORE, /*PreserveLCSSA*/ true,
+                      ProvidedCount, ProvidedThreshold, ProvidedAllowPartial,
+                      ProvidedRuntime, ProvidedUpperBound);
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+  return getLoopPassPreservedAnalyses();
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
index 71980e85e8ca..6f7682c96cef 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -210,7 +210,7 @@ namespace {
 
     bool runOnLoop(Loop *L, LPPassManager &LPM) override;
     bool processCurrentLoop();
-
+    bool isUnreachableDueToPreviousUnswitching(BasicBlock *);
     /// This transformation requires natural loop information & requires that
     /// loop preheaders be inserted into the CFG.
     ///
@@ -483,6 +483,35 @@ bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPM_Ref) {
   return Changed;
 }
 
+// Return true if the BasicBlock BB is unreachable from the loop header.
+// Return false, otherwise.
+bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) {
+  auto *Node = DT->getNode(BB)->getIDom();
+  BasicBlock *DomBB = Node->getBlock();
+  while (currentLoop->contains(DomBB)) {
+    BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator());
+
+    Node = DT->getNode(DomBB)->getIDom();
+    DomBB = Node->getBlock();
+
+    if (!BInst || !BInst->isConditional())
+      continue;
+
+    Value *Cond = BInst->getCondition();
+    if (!isa<ConstantInt>(Cond))
+      continue;
+
+    BasicBlock *UnreachableSucc =
+        Cond == ConstantInt::getTrue(Cond->getContext())
+            ? BInst->getSuccessor(1)
+            : BInst->getSuccessor(0);
+
+    if (DT->dominates(UnreachableSucc, BB))
+      return true;
+  }
+  return false;
+}
+
 /// Do actual work and unswitch loop if possible and profitable.
 bool LoopUnswitch::processCurrentLoop() {
   bool Changed = false;
@@ -593,6 +622,12 @@ bool LoopUnswitch::processCurrentLoop() {
       continue;
 
     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+      // Some branches may be rendered unreachable because of previous
+      // unswitching.
+      // Unswitch only those branches that are reachable.
+      if (isUnreachableDueToPreviousUnswitching(*I))
+        continue;
+ 
       // If this isn't branching on an invariant condition, we can't unswitch
       // it.
       if (BI->isConditional()) {
@@ -742,42 +777,6 @@ static Loop *CloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
   return &New;
 }
 
-static void copyMetadata(Instruction *DstInst, const Instruction *SrcInst,
-                         bool Swapped) {
-  if (!SrcInst || !SrcInst->hasMetadata())
-    return;
-
-  SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
-  SrcInst->getAllMetadata(MDs);
-  for (auto &MD : MDs) {
-    switch (MD.first) {
-    default:
-      break;
-    case LLVMContext::MD_prof:
-      if (Swapped && MD.second->getNumOperands() == 3 &&
-          isa<MDString>(MD.second->getOperand(0))) {
-        MDString *MDName = cast<MDString>(MD.second->getOperand(0));
-        if (MDName->getString() == "branch_weights") {
-          auto *ValT = cast_or_null<ConstantAsMetadata>(
-                           MD.second->getOperand(1))->getValue();
-          auto *ValF = cast_or_null<ConstantAsMetadata>(
-                           MD.second->getOperand(2))->getValue();
-          assert(ValT && ValF && "Invalid Operands of branch_weights");
-          auto NewMD =
-              MDBuilder(DstInst->getParent()->getContext())
-                  .createBranchWeights(cast<ConstantInt>(ValF)->getZExtValue(),
-                                       cast<ConstantInt>(ValT)->getZExtValue());
-          MD.second = NewMD;
-        }
-      }
-      // fallthrough.
-    case LLVMContext::MD_make_implicit:
-    case LLVMContext::MD_dbg:
-      DstInst->setMetadata(MD.first, MD.second);
-    }
-  }
-}
-
 /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst,
 /// otherwise branch to FalseDest. Insert the code immediately before InsertPt.
 void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
@@ -799,8 +798,10 @@ void LoopUnswitch::EmitPreheaderBranchOnCondition(Value *LIC, Constant *Val,
   }
 
   // Insert the new branch.
-  BranchInst *BI = BranchInst::Create(TrueDest, FalseDest, BranchVal, InsertPt);
-  copyMetadata(BI, TI, Swapped);
+  BranchInst *BI =
+      IRBuilder<>(InsertPt).CreateCondBr(BranchVal, TrueDest, FalseDest, TI);
+  if (Swapped)
+    BI->swapProfMetadata();
 
   // If either edge is critical, split it. This helps preserve LoopSimplify
   // form for enclosing loops.
@@ -1078,10 +1079,6 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
                                 F->getBasicBlockList(),
                                 NewBlocks[0]->getIterator(), F->end());
 
-  // FIXME: We could register any cloned assumptions instead of clearing the
-  // whole function's cache.
-  AC->clear();
-
   // Now we create the new Loop object for the versioned loop.
   Loop *NewLoop = CloneLoop(L, L->getParentLoop(), VMap, LI, LPM);
 
@@ -1131,10 +1128,15 @@ void LoopUnswitch::UnswitchNontrivialCondition(Value *LIC, Constant *Val,
   }
 
   // Rewrite the code to refer to itself.
-  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
-    for (Instruction &I : *NewBlocks[i])
+  for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) {
+    for (Instruction &I : *NewBlocks[i]) {
       RemapInstruction(&I, VMap,
                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
+      if (auto *II = dyn_cast<IntrinsicInst>(&I))
+        if (II->getIntrinsicID() == Intrinsic::assume)
+          AC->registerAssumption(II);
+    }
+  }
 
   // Rewrite the original preheader to select between versions of the loop.
   BranchInst *OldBR = cast<BranchInst>(loopPreheader->getTerminator());
diff --git a/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp b/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
index 0ccf0af7165b..c23d891b6504 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LoopVersioningLICM.cpp
@@ -92,8 +92,7 @@
 #include "llvm/Transforms/Utils/ValueMapper.h"
 
 #define DEBUG_TYPE "loop-versioning-licm"
-static const char* LICMVersioningMetaData =
-    "llvm.loop.licm_versioning.disable";
+static const char *LICMVersioningMetaData = "llvm.loop.licm_versioning.disable";
 
 using namespace llvm;
 
@@ -158,34 +157,48 @@ struct LoopVersioningLICM : public LoopPass {
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequiredID(LoopSimplifyID);
     AU.addRequired<ScalarEvolutionWrapperPass>();
-    AU.addRequired<TargetLibraryInfoWrapperPass>();
     AU.addPreserved<AAResultsWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
   }
 
   LoopVersioningLICM()
-      : LoopPass(ID), AA(nullptr), SE(nullptr), LI(nullptr), DT(nullptr),
-        TLI(nullptr), LAA(nullptr), LAI(nullptr), Changed(false),
-        Preheader(nullptr), CurLoop(nullptr), CurAST(nullptr),
-        LoopDepthThreshold(LVLoopDepthThreshold),
+      : LoopPass(ID), AA(nullptr), SE(nullptr), LAA(nullptr), LAI(nullptr),
+        CurLoop(nullptr), LoopDepthThreshold(LVLoopDepthThreshold),
         InvariantThreshold(LVInvarThreshold), LoadAndStoreCounter(0),
         InvariantCounter(0), IsReadOnlyLoop(true) {
     initializeLoopVersioningLICMPass(*PassRegistry::getPassRegistry());
   }
+  StringRef getPassName() const override { return "Loop Versioning for LICM"; }
 
-  AliasAnalysis *AA;         // Current AliasAnalysis information
-  ScalarEvolution *SE;       // Current ScalarEvolution
-  LoopInfo *LI;              // Current LoopInfo
-  DominatorTree *DT;         // Dominator Tree for the current Loop.
-  TargetLibraryInfo *TLI;    // TargetLibraryInfo for constant folding.
-  LoopAccessLegacyAnalysis *LAA;   // Current LoopAccessAnalysis
-  const LoopAccessInfo *LAI; // Current Loop's LoopAccessInfo
+  void reset() {
+    AA = nullptr;
+    SE = nullptr;
+    LAA = nullptr;
+    CurLoop = nullptr;
+    LoadAndStoreCounter = 0;
+    InvariantCounter = 0;
+    IsReadOnlyLoop = true;
+    CurAST.reset();
+  }
+
+  class AutoResetter {
+  public:
+    AutoResetter(LoopVersioningLICM &LVLICM) : LVLICM(LVLICM) {}
+    ~AutoResetter() { LVLICM.reset(); }
+
+  private:
+    LoopVersioningLICM &LVLICM;
+  };
 
-  bool Changed;            // Set to true when we change anything.
-  BasicBlock *Preheader;   // The preheader block of the current loop.
-  Loop *CurLoop;           // The current loop we are working on.
-  AliasSetTracker *CurAST; // AliasSet information for the current loop.
-  ValueToValueMap Strides;
+private:
+  AliasAnalysis *AA;             // Current AliasAnalysis information
+  ScalarEvolution *SE;           // Current ScalarEvolution
+  LoopAccessLegacyAnalysis *LAA; // Current LoopAccessAnalysis
+  const LoopAccessInfo *LAI;     // Current Loop's LoopAccessInfo
+
+  Loop *CurLoop; // The current loop we are working on.
+  std::unique_ptr<AliasSetTracker>
+      CurAST; // AliasSet information for the current loop.
 
   unsigned LoopDepthThreshold;  // Maximum loop nest threshold
   float InvariantThreshold;     // Minimum invariant threshold
@@ -200,15 +213,15 @@ struct LoopVersioningLICM : public LoopPass {
   bool isLoopAlreadyVisited();
   void setNoAliasToLoop(Loop *);
   bool instructionSafeForVersioning(Instruction *);
-  const char *getPassName() const override { return "Loop Versioning"; }
 };
 }
 
 /// \brief Check loop structure and confirms it's good for LoopVersioningLICM.
 bool LoopVersioningLICM::legalLoopStructure() {
-  // Loop must have a preheader, if not return false.
-  if (!CurLoop->getLoopPreheader()) {
-    DEBUG(dbgs() << "    loop preheader is missing\n");
+  // Loop must be in loop simplify form.
+  if (!CurLoop->isLoopSimplifyForm()) {
+    DEBUG(
+        dbgs() << "    loop is not in loop-simplify form.\n");
     return false;
   }
   // Loop should be innermost loop, if not return false.
@@ -244,11 +257,6 @@ bool LoopVersioningLICM::legalLoopStructure() {
     DEBUG(dbgs() << "    loop depth is more then threshold\n");
     return false;
   }
-  // Loop should have a dedicated exit block, if not return false.
-  if (!CurLoop->hasDedicatedExits()) {
-    DEBUG(dbgs() << "    loop does not has dedicated exit blocks\n");
-    return false;
-  }
   // We need to be able to compute the loop trip count in order
   // to generate the bound checks.
   const SCEV *ExitCount = SE->getBackedgeTakenCount(CurLoop);
@@ -505,29 +513,30 @@ void LoopVersioningLICM::setNoAliasToLoop(Loop *VerLoop) {
 }
 
 bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
+  // This will automatically release all resources hold by the current
+  // LoopVersioningLICM object.
+  AutoResetter Resetter(*this);
+
   if (skipLoop(L))
     return false;
-  Changed = false;
   // Get Analysis information.
-  LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
   LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
   LAI = nullptr;
   // Set Current Loop
   CurLoop = L;
-  // Get the preheader block.
-  Preheader = L->getLoopPreheader();
-  // Initial allocation
-  CurAST = new AliasSetTracker(*AA);
+  CurAST.reset(new AliasSetTracker(*AA));
 
   // Loop over the body of this loop, construct AST.
+  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
   for (auto *Block : L->getBlocks()) {
     if (LI->getLoopFor(Block) == L) // Ignore blocks in subloop.
       CurAST->add(*Block);          // Incorporate the specified basic block
   }
+
+  bool Changed = false;
+
   // Check feasiblity of LoopVersioningLICM.
   // If versioning found to be feasible and beneficial then proceed
   // else simply return, by cleaning up memory.
@@ -535,6 +544,7 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     // Do loop versioning.
     // Create memcheck for memory accessed inside loop.
     // Clone original loop, and set blocks properly.
+    DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
     LoopVersioning LVer(*LAI, CurLoop, LI, DT, SE, true);
     LVer.versionLoop();
     // Set Loop Versioning metaData for original loop.
@@ -548,8 +558,6 @@ bool LoopVersioningLICM::runOnLoop(Loop *L, LPPassManager &LPM) {
     setNoAliasToLoop(LVer.getVersionedLoop());
     Changed = true;
   }
-  // Delete allocated memory.
-  delete CurAST;
   return Changed;
 }
 
@@ -564,7 +572,6 @@ INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(LoopVersioningLICM, "loop-versioning-licm",
                     "Loop Versioning For LICM", false, false)
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 79f0db1163a4..52975ef35153 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -83,9 +83,8 @@ static bool handleSwitchExpect(SwitchInst &SI) {
   return true;
 }
 
-static bool handleBranchExpect(BranchInst &BI) {
-  if (BI.isUnconditional())
-    return false;
+// Handle both BranchInst and SelectInst.
+template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
 
   // Handle non-optimized IR code like:
   //   %expval = call i64 @llvm.expect.i64(i64 %conv1, i64 1)
@@ -98,9 +97,9 @@ static bool handleBranchExpect(BranchInst &BI) {
 
   CallInst *CI;
 
-  ICmpInst *CmpI = dyn_cast<ICmpInst>(BI.getCondition());
+  ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition());
   if (!CmpI) {
-    CI = dyn_cast<CallInst>(BI.getCondition());
+    CI = dyn_cast<CallInst>(BSI.getCondition());
   } else {
     if (CmpI->getPredicate() != CmpInst::ICMP_NE)
       return false;
@@ -129,15 +128,22 @@ static bool handleBranchExpect(BranchInst &BI) {
   else
     Node = MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight);
 
-  BI.setMetadata(LLVMContext::MD_prof, Node);
+  BSI.setMetadata(LLVMContext::MD_prof, Node);
 
   if (CmpI)
     CmpI->setOperand(0, ArgValue);
   else
-    BI.setCondition(ArgValue);
+    BSI.setCondition(ArgValue);
   return true;
 }
 
+static bool handleBranchExpect(BranchInst &BI) {
+  if (BI.isUnconditional())
+    return false;
+
+  return handleBrSelExpect<BranchInst>(BI);
+}
+
 static bool lowerExpectIntrinsic(Function &F) {
   bool Changed = false;
 
@@ -151,11 +157,19 @@ static bool lowerExpectIntrinsic(Function &F) {
         ExpectIntrinsicsHandled++;
     }
 
-    // Remove llvm.expect intrinsics.
-    for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); BI != BE;) {
-      CallInst *CI = dyn_cast<CallInst>(BI++);
-      if (!CI)
+    // Remove llvm.expect intrinsics. Iterate backwards in order
+    // to process select instructions before the intrinsic gets
+    // removed.
+    for (auto BI = BB.rbegin(), BE = BB.rend(); BI != BE;) {
+      Instruction *Inst = &*BI++;
+      CallInst *CI = dyn_cast<CallInst>(Inst);
+      if (!CI) {
+        if (SelectInst *SI = dyn_cast<SelectInst>(Inst)) {
+          if (handleBrSelExpect(*SI))
+            ExpectIntrinsicsHandled++;
+        }
         continue;
+      }
 
       Function *Fn = CI->getCalledFunction();
       if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp b/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
index 57491007d014..4f413715ffe6 100644
--- a/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LowerGuardIntrinsic.cpp
@@ -13,7 +13,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
@@ -24,6 +24,7 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 
 using namespace llvm;
@@ -34,10 +35,11 @@ static cl::opt<uint32_t> PredicatePassBranchWeight(
              "reciprocal of this value (default = 1 << 20)"));
 
 namespace {
-struct LowerGuardIntrinsic : public FunctionPass {
+struct LowerGuardIntrinsicLegacyPass : public FunctionPass {
   static char ID;
-  LowerGuardIntrinsic() : FunctionPass(ID) {
-    initializeLowerGuardIntrinsicPass(*PassRegistry::getPassRegistry());
+  LowerGuardIntrinsicLegacyPass() : FunctionPass(ID) {
+    initializeLowerGuardIntrinsicLegacyPassPass(
+        *PassRegistry::getPassRegistry());
   }
 
   bool runOnFunction(Function &F) override;
@@ -83,7 +85,7 @@ static void MakeGuardControlFlowExplicit(Function *DeoptIntrinsic,
   DeoptBlockTerm->eraseFromParent();
 }
 
-bool LowerGuardIntrinsic::runOnFunction(Function &F) {
+static bool lowerGuardIntrinsic(Function &F) {
   // Check if we can cheaply rule out the possibility of not having any work to
   // do.
   auto *GuardDecl = F.getParent()->getFunction(
@@ -113,11 +115,23 @@ bool LowerGuardIntrinsic::runOnFunction(Function &F) {
   return true;
 }
 
-char LowerGuardIntrinsic::ID = 0;
-INITIALIZE_PASS(LowerGuardIntrinsic, "lower-guard-intrinsic",
+bool LowerGuardIntrinsicLegacyPass::runOnFunction(Function &F) {
+  return lowerGuardIntrinsic(F);
+}
+
+char LowerGuardIntrinsicLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerGuardIntrinsicLegacyPass, "lower-guard-intrinsic",
                 "Lower the guard intrinsic to normal control flow", false,
                 false)
 
 Pass *llvm::createLowerGuardIntrinsicPass() {
-  return new LowerGuardIntrinsic();
+  return new LowerGuardIntrinsicLegacyPass();
+}
+
+PreservedAnalyses LowerGuardIntrinsicPass::run(Function &F,
+                                               FunctionAnalysisManager &AM) {
+  if (lowerGuardIntrinsic(F))
+    return PreservedAnalyses::none();
+
+  return PreservedAnalyses::all();
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index d64c658f8436..1b590140f70a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -52,7 +52,7 @@ static int64_t GetOffsetFromIndex(const GEPOperator *GEP, unsigned Idx,
     if (OpC->isZero()) continue;  // No offset.
 
     // Handle struct indices, which add their field offset to the pointer.
-    if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+    if (StructType *STy = GTI.getStructTypeOrNull()) {
       Offset += DL.getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
       continue;
     }
@@ -489,7 +489,8 @@ static unsigned findCommonAlignment(const DataLayout &DL, const StoreInst *SI,
 // It will lift the store and its argument + that anything that
 // may alias with these.
 // The method returns true if it was successful.
-static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) {
+static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P,
+                   const LoadInst *LI) {
   // If the store alias this position, early bail out.
   MemoryLocation StoreLoc = MemoryLocation::get(SI);
   if (AA.getModRefInfo(P, StoreLoc) != MRI_NoModRef)
@@ -506,12 +507,13 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) {
   SmallVector<Instruction*, 8> ToLift;
 
   // Memory locations of lifted instructions.
-  SmallVector<MemoryLocation, 8> MemLocs;
-  MemLocs.push_back(StoreLoc);
+  SmallVector<MemoryLocation, 8> MemLocs{StoreLoc};
 
   // Lifted callsites.
   SmallVector<ImmutableCallSite, 8> CallSites;
 
+  const MemoryLocation LoadLoc = MemoryLocation::get(LI);
+
   for (auto I = --SI->getIterator(), E = P->getIterator(); I != E; --I) {
     auto *C = &*I;
 
@@ -521,23 +523,25 @@ static bool moveUp(AliasAnalysis &AA, StoreInst *SI, Instruction *P) {
     if (Args.erase(C))
       NeedLift = true;
     else if (MayAlias) {
-      NeedLift = std::any_of(MemLocs.begin(), MemLocs.end(),
-        [C, &AA](const MemoryLocation &ML) {
-          return AA.getModRefInfo(C, ML);
-        });
+      NeedLift = any_of(MemLocs, [C, &AA](const MemoryLocation &ML) {
+        return AA.getModRefInfo(C, ML);
+      });
 
       if (!NeedLift)
-        NeedLift = std::any_of(CallSites.begin(), CallSites.end(),
-          [C, &AA](const ImmutableCallSite &CS) {
-            return AA.getModRefInfo(C, CS);
-          });
+        NeedLift = any_of(CallSites, [C, &AA](const ImmutableCallSite &CS) {
+          return AA.getModRefInfo(C, CS);
+        });
     }
 
     if (!NeedLift)
       continue;
 
     if (MayAlias) {
-      if (auto CS = ImmutableCallSite(C)) {
+      // Since LI is implicitly moved downwards past the lifted instructions,
+      // none of them may modify its source.
+      if (AA.getModRefInfo(C, LoadLoc) & MRI_Mod)
+        return false;
+      else if (auto CS = ImmutableCallSite(C)) {
         // If we can't lift this before P, it's game over.
         if (AA.getModRefInfo(P, CS) != MRI_NoModRef)
           return false;
@@ -612,7 +616,7 @@ bool MemCpyOptPass::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
         // position if nothing alias the store memory after this and the store
         // destination is not in the range.
         if (P && P != SI) {
-          if (!moveUp(AA, SI, P))
+          if (!moveUp(AA, SI, P, LI))
             P = nullptr;
         }
 
@@ -1082,10 +1086,10 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
       DestSize = Builder.CreateZExt(DestSize, SrcSize->getType());
   }
 
-  Value *MemsetLen =
-      Builder.CreateSelect(Builder.CreateICmpULE(DestSize, SrcSize),
-                           ConstantInt::getNullValue(DestSize->getType()),
-                           Builder.CreateSub(DestSize, SrcSize));
+  Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
+  Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
+  Value *MemsetLen = Builder.CreateSelect(
+      Ule, ConstantInt::getNullValue(DestSize->getType()), SizeDiff);
   Builder.CreateMemSet(Builder.CreateGEP(Dest, SrcSize), MemSet->getOperand(1),
                        MemsetLen, Align);
 
@@ -1110,8 +1114,11 @@ bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
 /// The \p MemCpy must have a Constant length.
 bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
                                                MemSetInst *MemSet) {
-  // This only makes sense on memcpy(..., memset(...), ...).
-  if (MemSet->getRawDest() != MemCpy->getRawSource())
+  AliasAnalysis &AA = LookupAliasAnalysis();
+
+  // Make sure that memcpy(..., memset(...), ...), that is we are memsetting and
+  // memcpying from the same address. Otherwise it is hard to reason about.
+  if (!AA.isMustAlias(MemSet->getRawDest(), MemCpy->getRawSource()))
     return false;
 
   ConstantInt *CopySize = cast<ConstantInt>(MemCpy->getLength());
diff --git a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
index 30261b755001..6a64c6b3619c 100644
--- a/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/MergedLoadStoreMotion.cpp
@@ -260,7 +260,7 @@ void MergedLoadStoreMotion::hoistInstruction(BasicBlock *BB,
   assert(HoistCand->getParent() != BB);
 
   // Intersect optional metadata.
-  HoistCand->intersectOptionalDataWith(ElseInst);
+  HoistCand->andIRFlags(ElseInst);
   HoistCand->dropUnknownNonDebugMetadata();
 
   // Prepend point for instruction insert
@@ -434,7 +434,7 @@ bool MergedLoadStoreMotion::sinkStore(BasicBlock *BB, StoreInst *S0,
     // Hoist the instruction.
     BasicBlock::iterator InsertPt = BB->getFirstInsertionPt();
     // Intersect optional metadata.
-    S0->intersectOptionalDataWith(S1);
+    S0->andIRFlags(S1);
     S0->dropUnknownNonDebugMetadata();
 
     // Create the new store to be inserted at the join point.
@@ -563,7 +563,6 @@ public:
   }
 
 private:
-  // This transformation requires dominator postdominator info
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.setPreservesCFG();
     AU.addRequired<AAResultsWrapperPass>();
@@ -590,7 +589,7 @@ INITIALIZE_PASS_END(MergedLoadStoreMotionLegacyPass, "mldst-motion",
                     "MergedLoadStoreMotion", false, false)
 
 PreservedAnalyses
-MergedLoadStoreMotionPass::run(Function &F, AnalysisManager<Function> &AM) {
+MergedLoadStoreMotionPass::run(Function &F, FunctionAnalysisManager &AM) {
   MergedLoadStoreMotion Impl;
   auto *MD = AM.getCachedResult<MemoryDependenceAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
diff --git a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
index ed754fa71025..0a3bf7b4c31b 100644
--- a/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/NaryReassociate.cpp
@@ -76,12 +76,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Analysis/AssumptionCache.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Scalar/NaryReassociate.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/Dominators.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/Support/Debug.h"
@@ -94,16 +90,15 @@ using namespace PatternMatch;
 #define DEBUG_TYPE "nary-reassociate"
 
 namespace {
-class NaryReassociate : public FunctionPass {
+class NaryReassociateLegacyPass : public FunctionPass {
 public:
   static char ID;
 
-  NaryReassociate(): FunctionPass(ID) {
-    initializeNaryReassociatePass(*PassRegistry::getPassRegistry());
+  NaryReassociateLegacyPass() : FunctionPass(ID) {
+    initializeNaryReassociateLegacyPassPass(*PassRegistry::getPassRegistry());
   }
 
   bool doInitialization(Module &M) override {
-    DL = &M.getDataLayout();
     return false;
   }
   bool runOnFunction(Function &F) override;
@@ -121,101 +116,73 @@ public:
   }
 
 private:
-  // Runs only one iteration of the dominator-based algorithm. See the header
-  // comments for why we need multiple iterations.
-  bool doOneIteration(Function &F);
-
-  // Reassociates I for better CSE.
-  Instruction *tryReassociate(Instruction *I);
-
-  // Reassociate GEP for better CSE.
-  Instruction *tryReassociateGEP(GetElementPtrInst *GEP);
-  // Try splitting GEP at the I-th index and see whether either part can be
-  // CSE'ed. This is a helper function for tryReassociateGEP.
-  //
-  // \p IndexedType The element type indexed by GEP's I-th index. This is
-  //                equivalent to
-  //                  GEP->getIndexedType(GEP->getPointerOperand(), 0-th index,
-  //                                      ..., i-th index).
-  GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
-                                              unsigned I, Type *IndexedType);
-  // Given GEP's I-th index = LHS + RHS, see whether &Base[..][LHS][..] or
-  // &Base[..][RHS][..] can be CSE'ed and rewrite GEP accordingly.
-  GetElementPtrInst *tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
-                                              unsigned I, Value *LHS,
-                                              Value *RHS, Type *IndexedType);
-
-  // Reassociate binary operators for better CSE.
-  Instruction *tryReassociateBinaryOp(BinaryOperator *I);
-
-  // A helper function for tryReassociateBinaryOp. LHS and RHS are explicitly
-  // passed.
-  Instruction *tryReassociateBinaryOp(Value *LHS, Value *RHS,
-                                      BinaryOperator *I);
-  // Rewrites I to (LHS op RHS) if LHS is computed already.
-  Instruction *tryReassociatedBinaryOp(const SCEV *LHS, Value *RHS,
-                                       BinaryOperator *I);
-
-  // Tries to match Op1 and Op2 by using V.
-  bool matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1, Value *&Op2);
-
-  // Gets SCEV for (LHS op RHS).
-  const SCEV *getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
-                            const SCEV *RHS);
-
-  // Returns the closest dominator of \c Dominatee that computes
-  // \c CandidateExpr. Returns null if not found.
-  Instruction *findClosestMatchingDominator(const SCEV *CandidateExpr,
-                                            Instruction *Dominatee);
-  // GetElementPtrInst implicitly sign-extends an index if the index is shorter
-  // than the pointer size. This function returns whether Index is shorter than
-  // GEP's pointer size, i.e., whether Index needs to be sign-extended in order
-  // to be an index of GEP.
-  bool requiresSignExtension(Value *Index, GetElementPtrInst *GEP);
-
-  AssumptionCache *AC;
-  const DataLayout *DL;
-  DominatorTree *DT;
-  ScalarEvolution *SE;
-  TargetLibraryInfo *TLI;
-  TargetTransformInfo *TTI;
-  // A lookup table quickly telling which instructions compute the given SCEV.
-  // Note that there can be multiple instructions at different locations
-  // computing to the same SCEV, so we map a SCEV to an instruction list.  For
-  // example,
-  //
-  //   if (p1)
-  //     foo(a + b);
-  //   if (p2)
-  //     bar(a + b);
-  DenseMap<const SCEV *, SmallVector<WeakVH, 2>> SeenExprs;
+  NaryReassociatePass Impl;
 };
 } // anonymous namespace
 
-char NaryReassociate::ID = 0;
-INITIALIZE_PASS_BEGIN(NaryReassociate, "nary-reassociate", "Nary reassociation",
-                      false, false)
+char NaryReassociateLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(NaryReassociateLegacyPass, "nary-reassociate",
+                      "Nary reassociation", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(NaryReassociate, "nary-reassociate", "Nary reassociation",
-                    false, false)
+INITIALIZE_PASS_END(NaryReassociateLegacyPass, "nary-reassociate",
+                    "Nary reassociation", false, false)
 
 FunctionPass *llvm::createNaryReassociatePass() {
-  return new NaryReassociate();
+  return new NaryReassociateLegacyPass();
 }
 
-bool NaryReassociate::runOnFunction(Function &F) {
+bool NaryReassociateLegacyPass::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
-  DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
-  SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
-  TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+  auto *TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+
+  return Impl.runImpl(F, AC, DT, SE, TLI, TTI);
+}
+
+PreservedAnalyses NaryReassociatePass::run(Function &F,
+                                           FunctionAnalysisManager &AM) {
+  auto *AC = &AM.getResult<AssumptionAnalysis>(F);
+  auto *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+  auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F);
+  auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+  bool Changed = runImpl(F, AC, DT, SE, TLI, TTI);
+
+  // FIXME: We need to invalidate this to avoid PR28400. Is there a better
+  // solution?
+  AM.invalidate<ScalarEvolutionAnalysis>(F);
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  // FIXME: This should also 'preserve the CFG'.
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<TargetLibraryAnalysis>();
+  return PA;
+}
+
+bool NaryReassociatePass::runImpl(Function &F, AssumptionCache *AC_,
+                                  DominatorTree *DT_, ScalarEvolution *SE_,
+                                  TargetLibraryInfo *TLI_,
+                                  TargetTransformInfo *TTI_) {
+  AC = AC_;
+  DT = DT_;
+  SE = SE_;
+  TLI = TLI_;
+  TTI = TTI_;
+  DL = &F.getParent()->getDataLayout();
 
   bool Changed = false, ChangedInThisIteration;
   do {
@@ -237,13 +204,13 @@ static bool isPotentiallyNaryReassociable(Instruction *I) {
   }
 }
 
-bool NaryReassociate::doOneIteration(Function &F) {
+bool NaryReassociatePass::doOneIteration(Function &F) {
   bool Changed = false;
   SeenExprs.clear();
-  // Process the basic blocks in pre-order of the dominator tree. This order
-  // ensures that all bases of a candidate are in Candidates when we process it.
-  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
-       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+  // Process the basic blocks in a depth first traversal of the dominator
+  // tree. This order ensures that all bases of a candidate are in Candidates
+  // when we process it.
+  for (const auto Node : depth_first(DT)) {
     BasicBlock *BB = Node->getBlock();
     for (auto I = BB->begin(); I != BB->end(); ++I) {
       if (SE->isSCEVable(I->getType()) && isPotentiallyNaryReassociable(&*I)) {
@@ -287,7 +254,7 @@ bool NaryReassociate::doOneIteration(Function &F) {
   return Changed;
 }
 
-Instruction *NaryReassociate::tryReassociate(Instruction *I) {
+Instruction *NaryReassociatePass::tryReassociate(Instruction *I) {
   switch (I->getOpcode()) {
   case Instruction::Add:
   case Instruction::Mul:
@@ -308,15 +275,16 @@ static bool isGEPFoldable(GetElementPtrInst *GEP,
                          Indices) == TargetTransformInfo::TCC_Free;
 }
 
-Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
+Instruction *NaryReassociatePass::tryReassociateGEP(GetElementPtrInst *GEP) {
   // Not worth reassociating GEP if it is foldable.
   if (isGEPFoldable(GEP, TTI))
     return nullptr;
 
   gep_type_iterator GTI = gep_type_begin(*GEP);
-  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
-    if (isa<SequentialType>(*GTI++)) {
-      if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1, *GTI)) {
+  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+    if (GTI.isSequential()) {
+      if (auto *NewGEP = tryReassociateGEPAtIndex(GEP, I - 1,
+                                                  GTI.getIndexedType())) {
         return NewGEP;
       }
     }
@@ -324,16 +292,16 @@ Instruction *NaryReassociate::tryReassociateGEP(GetElementPtrInst *GEP) {
   return nullptr;
 }
 
-bool NaryReassociate::requiresSignExtension(Value *Index,
-                                            GetElementPtrInst *GEP) {
+bool NaryReassociatePass::requiresSignExtension(Value *Index,
+                                                GetElementPtrInst *GEP) {
   unsigned PointerSizeInBits =
       DL->getPointerSizeInBits(GEP->getType()->getPointerAddressSpace());
   return cast<IntegerType>(Index->getType())->getBitWidth() < PointerSizeInBits;
 }
 
 GetElementPtrInst *
-NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
-                                          Type *IndexedType) {
+NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+                                              unsigned I, Type *IndexedType) {
   Value *IndexToSplit = GEP->getOperand(I + 1);
   if (SExtInst *SExt = dyn_cast<SExtInst>(IndexToSplit)) {
     IndexToSplit = SExt->getOperand(0);
@@ -366,9 +334,10 @@ NaryReassociate::tryReassociateGEPAtIndex(GetElementPtrInst *GEP, unsigned I,
   return nullptr;
 }
 
-GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
-    GetElementPtrInst *GEP, unsigned I, Value *LHS, Value *RHS,
-    Type *IndexedType) {
+GetElementPtrInst *
+NaryReassociatePass::tryReassociateGEPAtIndex(GetElementPtrInst *GEP,
+                                              unsigned I, Value *LHS,
+                                              Value *RHS, Type *IndexedType) {
   // Look for GEP's closest dominator that has the same SCEV as GEP except that
   // the I-th index is replaced with LHS.
   SmallVector<const SCEV *, 4> IndexExprs;
@@ -386,9 +355,8 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
     IndexExprs[I] =
         SE->getZeroExtendExpr(IndexExprs[I], GEP->getOperand(I)->getType());
   }
-  const SCEV *CandidateExpr = SE->getGEPExpr(
-      GEP->getSourceElementType(), SE->getSCEV(GEP->getPointerOperand()),
-      IndexExprs, GEP->isInBounds());
+  const SCEV *CandidateExpr = SE->getGEPExpr(cast<GEPOperator>(GEP),
+                                             IndexExprs);
 
   Value *Candidate = findClosestMatchingDominator(CandidateExpr, GEP);
   if (Candidate == nullptr)
@@ -437,7 +405,7 @@ GetElementPtrInst *NaryReassociate::tryReassociateGEPAtIndex(
   return NewGEP;
 }
 
-Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
+Instruction *NaryReassociatePass::tryReassociateBinaryOp(BinaryOperator *I) {
   Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
   if (auto *NewI = tryReassociateBinaryOp(LHS, RHS, I))
     return NewI;
@@ -446,8 +414,8 @@ Instruction *NaryReassociate::tryReassociateBinaryOp(BinaryOperator *I) {
   return nullptr;
 }
 
-Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
-                                                     BinaryOperator *I) {
+Instruction *NaryReassociatePass::tryReassociateBinaryOp(Value *LHS, Value *RHS,
+                                                         BinaryOperator *I) {
   Value *A = nullptr, *B = nullptr;
   // To be conservative, we reassociate I only when it is the only user of (A op
   // B).
@@ -470,9 +438,9 @@ Instruction *NaryReassociate::tryReassociateBinaryOp(Value *LHS, Value *RHS,
   return nullptr;
 }
 
-Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
-                                                      Value *RHS,
-                                                      BinaryOperator *I) {
+Instruction *NaryReassociatePass::tryReassociatedBinaryOp(const SCEV *LHSExpr,
+                                                          Value *RHS,
+                                                          BinaryOperator *I) {
   // Look for the closest dominator LHS of I that computes LHSExpr, and replace
   // I with LHS op RHS.
   auto *LHS = findClosestMatchingDominator(LHSExpr, I);
@@ -494,8 +462,8 @@ Instruction *NaryReassociate::tryReassociatedBinaryOp(const SCEV *LHSExpr,
   return NewI;
 }
 
-bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
-                                     Value *&Op2) {
+bool NaryReassociatePass::matchTernaryOp(BinaryOperator *I, Value *V,
+                                         Value *&Op1, Value *&Op2) {
   switch (I->getOpcode()) {
   case Instruction::Add:
     return match(V, m_Add(m_Value(Op1), m_Value(Op2)));
@@ -507,8 +475,9 @@ bool NaryReassociate::matchTernaryOp(BinaryOperator *I, Value *V, Value *&Op1,
   return false;
 }
 
-const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
-                                           const SCEV *RHS) {
+const SCEV *NaryReassociatePass::getBinarySCEV(BinaryOperator *I,
+                                               const SCEV *LHS,
+                                               const SCEV *RHS) {
   switch (I->getOpcode()) {
   case Instruction::Add:
     return SE->getAddExpr(LHS, RHS);
@@ -521,8 +490,8 @@ const SCEV *NaryReassociate::getBinarySCEV(BinaryOperator *I, const SCEV *LHS,
 }
 
 Instruction *
-NaryReassociate::findClosestMatchingDominator(const SCEV *CandidateExpr,
-                                              Instruction *Dominatee) {
+NaryReassociatePass::findClosestMatchingDominator(const SCEV *CandidateExpr,
+                                                  Instruction *Dominatee) {
   auto Pos = SeenExprs.find(CandidateExpr);
   if (Pos == SeenExprs.end())
     return nullptr;
diff --git a/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
new file mode 100644
index 000000000000..dee61b77412e
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -0,0 +1,2092 @@
+//===---- NewGVN.cpp - Global Value Numbering Pass --------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the new LLVM's Global Value Numbering pass.
+/// GVN partitions values computed by a function into congruence classes.
+/// Values ending up in the same congruence class are guaranteed to be the same
+/// for every execution of the program. In that respect, congruency is a
+/// compile-time approximation of equivalence of values at runtime.
+/// The algorithm implemented here uses a sparse formulation and it's based
+/// on the ideas described in the paper:
+/// "A Sparse Algorithm for Predicated Global Value Numbering" from
+/// Karthik Gargi.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/NewGVN.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/Loads.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/GlobalVariable.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/PredIteratorCache.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/GVNExpression.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/MemorySSA.h"
+#include "llvm/Transforms/Utils/SSAUpdater.h"
+#include <unordered_map>
+#include <utility>
+#include <vector>
+using namespace llvm;
+using namespace PatternMatch;
+using namespace llvm::GVNExpression;
+
+#define DEBUG_TYPE "newgvn"
+
+STATISTIC(NumGVNInstrDeleted, "Number of instructions deleted");
+STATISTIC(NumGVNBlocksDeleted, "Number of blocks deleted");
+STATISTIC(NumGVNOpsSimplified, "Number of Expressions simplified");
+STATISTIC(NumGVNPhisAllSame, "Number of PHIs whos arguments are all the same");
+
+//===----------------------------------------------------------------------===//
+//                                GVN Pass
+//===----------------------------------------------------------------------===//
+
+// Anchor methods.
+namespace llvm {
+namespace GVNExpression {
+Expression::~Expression() = default;
+BasicExpression::~BasicExpression() = default;
+CallExpression::~CallExpression() = default;
+LoadExpression::~LoadExpression() = default;
+StoreExpression::~StoreExpression() = default;
+AggregateValueExpression::~AggregateValueExpression() = default;
+PHIExpression::~PHIExpression() = default;
+}
+}
+
+// Congruence classes represent the set of expressions/instructions
+// that are all the same *during some scope in the function*.
+// That is, because of the way we perform equality propagation, and
+// because of memory value numbering, it is not correct to assume
+// you can willy-nilly replace any member with any other at any
+// point in the function.
+//
+// For any Value in the Member set, it is valid to replace any dominated member
+// with that Value.
+//
+// Every congruence class has a leader, and the leader is used to
+// symbolize instructions in a canonical way (IE every operand of an
+// instruction that is a member of the same congruence class will
+// always be replaced with leader during symbolization).
+// To simplify symbolization, we keep the leader as a constant if class can be
+// proved to be a constant value.
+// Otherwise, the leader is a randomly chosen member of the value set, it does
+// not matter which one is chosen.
+// Each congruence class also has a defining expression,
+// though the expression may be null.  If it exists, it can be used for forward
+// propagation and reassociation of values.
+//
+struct CongruenceClass {
+  using MemberSet = SmallPtrSet<Value *, 4>;
+  unsigned ID;
+  // Representative leader.
+  Value *RepLeader = nullptr;
+  // Defining Expression.
+  const Expression *DefiningExpr = nullptr;
+  // Actual members of this class.
+  MemberSet Members;
+
+  // True if this class has no members left.  This is mainly used for assertion
+  // purposes, and for skipping empty classes.
+  bool Dead = false;
+
+  explicit CongruenceClass(unsigned ID) : ID(ID) {}
+  CongruenceClass(unsigned ID, Value *Leader, const Expression *E)
+      : ID(ID), RepLeader(Leader), DefiningExpr(E) {}
+};
+
+namespace llvm {
+template <> struct DenseMapInfo<const Expression *> {
+  static const Expression *getEmptyKey() {
+    auto Val = static_cast<uintptr_t>(-1);
+    Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
+    return reinterpret_cast<const Expression *>(Val);
+  }
+  static const Expression *getTombstoneKey() {
+    auto Val = static_cast<uintptr_t>(~1U);
+    Val <<= PointerLikeTypeTraits<const Expression *>::NumLowBitsAvailable;
+    return reinterpret_cast<const Expression *>(Val);
+  }
+  static unsigned getHashValue(const Expression *V) {
+    return static_cast<unsigned>(V->getHashValue());
+  }
+  static bool isEqual(const Expression *LHS, const Expression *RHS) {
+    if (LHS == RHS)
+      return true;
+    if (LHS == getTombstoneKey() || RHS == getTombstoneKey() ||
+        LHS == getEmptyKey() || RHS == getEmptyKey())
+      return false;
+    return *LHS == *RHS;
+  }
+};
+} // end namespace llvm
+
+class NewGVN : public FunctionPass {
+  DominatorTree *DT;
+  const DataLayout *DL;
+  const TargetLibraryInfo *TLI;
+  AssumptionCache *AC;
+  AliasAnalysis *AA;
+  MemorySSA *MSSA;
+  MemorySSAWalker *MSSAWalker;
+  BumpPtrAllocator ExpressionAllocator;
+  ArrayRecycler<Value *> ArgRecycler;
+
+  // Congruence class info.
+  CongruenceClass *InitialClass;
+  std::vector<CongruenceClass *> CongruenceClasses;
+  unsigned NextCongruenceNum;
+
+  // Value Mappings.
+  DenseMap<Value *, CongruenceClass *> ValueToClass;
+  DenseMap<Value *, const Expression *> ValueToExpression;
+
+  // A table storing which memorydefs/phis represent a memory state provably
+  // equivalent to another memory state.
+  // We could use the congruence class machinery, but the MemoryAccess's are
+  // abstract memory states, so they can only ever be equivalent to each other,
+  // and not to constants, etc.
+  DenseMap<const MemoryAccess *, MemoryAccess *> MemoryAccessEquiv;
+
+  // Expression to class mapping.
+  using ExpressionClassMap = DenseMap<const Expression *, CongruenceClass *>;
+  ExpressionClassMap ExpressionToClass;
+
+  // Which values have changed as a result of leader changes.
+  SmallPtrSet<Value *, 8> ChangedValues;
+
+  // Reachability info.
+  using BlockEdge = BasicBlockEdge;
+  DenseSet<BlockEdge> ReachableEdges;
+  SmallPtrSet<const BasicBlock *, 8> ReachableBlocks;
+
+  // This is a bitvector because, on larger functions, we may have
+  // thousands of touched instructions at once (entire blocks,
+  // instructions with hundreds of uses, etc).  Even with optimization
+  // for when we mark whole blocks as touched, when this was a
+  // SmallPtrSet or DenseSet, for some functions, we spent >20% of all
+  // the time in GVN just managing this list.  The bitvector, on the
+  // other hand, efficiently supports test/set/clear of both
+  // individual and ranges, as well as "find next element" This
+  // enables us to use it as a worklist with essentially 0 cost.
+  BitVector TouchedInstructions;
+
+  DenseMap<const BasicBlock *, std::pair<unsigned, unsigned>> BlockInstRange;
+  DenseMap<const DomTreeNode *, std::pair<unsigned, unsigned>>
+      DominatedInstRange;
+
+#ifndef NDEBUG
+  // Debugging for how many times each block and instruction got processed.
+  DenseMap<const Value *, unsigned> ProcessedCount;
+#endif
+
+  // DFS info.
+  DenseMap<const BasicBlock *, std::pair<int, int>> DFSDomMap;
+  DenseMap<const Value *, unsigned> InstrDFS;
+  SmallVector<Value *, 32> DFSToInstr;
+
+  // Deletion info.
+  SmallPtrSet<Instruction *, 8> InstructionsToErase;
+
+public:
+  static char ID; // Pass identification, replacement for typeid.
+  NewGVN() : FunctionPass(ID) {
+    initializeNewGVNPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+  bool runGVN(Function &F, DominatorTree *DT, AssumptionCache *AC,
+              TargetLibraryInfo *TLI, AliasAnalysis *AA, MemorySSA *MSSA);
+
+private:
+  // This transformation requires dominator postdominator info.
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<MemorySSAWrapperPass>();
+    AU.addRequired<AAResultsWrapperPass>();
+
+    AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+
+  // Expression handling.
+  const Expression *createExpression(Instruction *, const BasicBlock *);
+  const Expression *createBinaryExpression(unsigned, Type *, Value *, Value *,
+                                           const BasicBlock *);
+  PHIExpression *createPHIExpression(Instruction *);
+  const VariableExpression *createVariableExpression(Value *);
+  const ConstantExpression *createConstantExpression(Constant *);
+  const Expression *createVariableOrConstant(Value *V, const BasicBlock *B);
+  const UnknownExpression *createUnknownExpression(Instruction *);
+  const StoreExpression *createStoreExpression(StoreInst *, MemoryAccess *,
+                                               const BasicBlock *);
+  LoadExpression *createLoadExpression(Type *, Value *, LoadInst *,
+                                       MemoryAccess *, const BasicBlock *);
+
+  const CallExpression *createCallExpression(CallInst *, MemoryAccess *,
+                                             const BasicBlock *);
+  const AggregateValueExpression *
+  createAggregateValueExpression(Instruction *, const BasicBlock *);
+  bool setBasicExpressionInfo(Instruction *, BasicExpression *,
+                              const BasicBlock *);
+
+  // Congruence class handling.
+  CongruenceClass *createCongruenceClass(Value *Leader, const Expression *E) {
+    auto *result = new CongruenceClass(NextCongruenceNum++, Leader, E);
+    CongruenceClasses.emplace_back(result);
+    return result;
+  }
+
+  CongruenceClass *createSingletonCongruenceClass(Value *Member) {
+    CongruenceClass *CClass = createCongruenceClass(Member, nullptr);
+    CClass->Members.insert(Member);
+    ValueToClass[Member] = CClass;
+    return CClass;
+  }
+  void initializeCongruenceClasses(Function &F);
+
+  // Value number an Instruction or MemoryPhi.
+  void valueNumberMemoryPhi(MemoryPhi *);
+  void valueNumberInstruction(Instruction *);
+
+  // Symbolic evaluation.
+  const Expression *checkSimplificationResults(Expression *, Instruction *,
+                                               Value *);
+  const Expression *performSymbolicEvaluation(Value *, const BasicBlock *);
+  const Expression *performSymbolicLoadEvaluation(Instruction *,
+                                                  const BasicBlock *);
+  const Expression *performSymbolicStoreEvaluation(Instruction *,
+                                                   const BasicBlock *);
+  const Expression *performSymbolicCallEvaluation(Instruction *,
+                                                  const BasicBlock *);
+  const Expression *performSymbolicPHIEvaluation(Instruction *,
+                                                 const BasicBlock *);
+  bool setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To);
+  const Expression *performSymbolicAggrValueEvaluation(Instruction *,
+                                                       const BasicBlock *);
+
+  // Congruence finding.
+  // Templated to allow them to work both on BB's and BB-edges.
+  template <class T>
+  Value *lookupOperandLeader(Value *, const User *, const T &) const;
+  void performCongruenceFinding(Value *, const Expression *);
+
+  // Reachability handling.
+  void updateReachableEdge(BasicBlock *, BasicBlock *);
+  void processOutgoingEdges(TerminatorInst *, BasicBlock *);
+  bool isOnlyReachableViaThisEdge(const BasicBlockEdge &) const;
+  Value *findConditionEquivalence(Value *, BasicBlock *) const;
+  MemoryAccess *lookupMemoryAccessEquiv(MemoryAccess *) const;
+
+  // Elimination.
+  struct ValueDFS;
+  void convertDenseToDFSOrdered(CongruenceClass::MemberSet &,
+                                std::vector<ValueDFS> &);
+
+  bool eliminateInstructions(Function &);
+  void replaceInstruction(Instruction *, Value *);
+  void markInstructionForDeletion(Instruction *);
+  void deleteInstructionsInBlock(BasicBlock *);
+
+  // New instruction creation.
+  void handleNewInstruction(Instruction *){};
+  void markUsersTouched(Value *);
+  void markMemoryUsersTouched(MemoryAccess *);
+
+  // Utilities.
+  void cleanupTables();
+  std::pair<unsigned, unsigned> assignDFSNumbers(BasicBlock *, unsigned);
+  void updateProcessedCount(Value *V);
+  void verifyMemoryCongruency();
+};
+
+char NewGVN::ID = 0;
+
+// createGVNPass - The public interface to this file.
+FunctionPass *llvm::createNewGVNPass() { return new NewGVN(); }
+
+template <typename T>
+static bool equalsLoadStoreHelper(const T &LHS, const Expression &RHS) {
+  if ((!isa<LoadExpression>(RHS) && !isa<StoreExpression>(RHS)) ||
+      !LHS.BasicExpression::equals(RHS)) {
+    return false;
+  } else if (const auto *L = dyn_cast<LoadExpression>(&RHS)) {
+    if (LHS.getDefiningAccess() != L->getDefiningAccess())
+      return false;
+  } else if (const auto *S = dyn_cast<StoreExpression>(&RHS)) {
+    if (LHS.getDefiningAccess() != S->getDefiningAccess())
+      return false;
+  }
+  return true;
+}
+
+bool LoadExpression::equals(const Expression &Other) const {
+  return equalsLoadStoreHelper(*this, Other);
+}
+
+bool StoreExpression::equals(const Expression &Other) const {
+  return equalsLoadStoreHelper(*this, Other);
+}
+
+#ifndef NDEBUG
+static std::string getBlockName(const BasicBlock *B) {
+  return DOTGraphTraits<const Function *>::getSimpleNodeLabel(B, nullptr);
+}
+#endif
+
+INITIALIZE_PASS_BEGIN(NewGVN, "newgvn", "Global Value Numbering", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
+INITIALIZE_PASS_END(NewGVN, "newgvn", "Global Value Numbering", false, false)
+
+PHIExpression *NewGVN::createPHIExpression(Instruction *I) {
+  BasicBlock *PhiBlock = I->getParent();
+  auto *PN = cast<PHINode>(I);
+  auto *E = new (ExpressionAllocator)
+      PHIExpression(PN->getNumOperands(), I->getParent());
+
+  E->allocateOperands(ArgRecycler, ExpressionAllocator);
+  E->setType(I->getType());
+  E->setOpcode(I->getOpcode());
+
+  auto ReachablePhiArg = [&](const Use &U) {
+    return ReachableBlocks.count(PN->getIncomingBlock(U));
+  };
+
+  // Filter out unreachable operands
+  auto Filtered = make_filter_range(PN->operands(), ReachablePhiArg);
+
+  std::transform(Filtered.begin(), Filtered.end(), op_inserter(E),
+                 [&](const Use &U) -> Value * {
+                   // Don't try to transform self-defined phis
+                   if (U == PN)
+                     return PN;
+                   const BasicBlockEdge BBE(PN->getIncomingBlock(U), PhiBlock);
+                   return lookupOperandLeader(U, I, BBE);
+                 });
+  return E;
+}
+
+// Set basic expression info (Arguments, type, opcode) for Expression
+// E from Instruction I in block B.
+bool NewGVN::setBasicExpressionInfo(Instruction *I, BasicExpression *E,
+                                    const BasicBlock *B) {
+  bool AllConstant = true;
+  if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
+    E->setType(GEP->getSourceElementType());
+  else
+    E->setType(I->getType());
+  E->setOpcode(I->getOpcode());
+  E->allocateOperands(ArgRecycler, ExpressionAllocator);
+
+  // Transform the operand array into an operand leader array, and keep track of
+  // whether all members are constant.
+  std::transform(I->op_begin(), I->op_end(), op_inserter(E), [&](Value *O) {
+    auto Operand = lookupOperandLeader(O, I, B);
+    AllConstant &= isa<Constant>(Operand);
+    return Operand;
+  });
+
+  return AllConstant;
+}
+
+const Expression *NewGVN::createBinaryExpression(unsigned Opcode, Type *T,
+                                                 Value *Arg1, Value *Arg2,
+                                                 const BasicBlock *B) {
+  auto *E = new (ExpressionAllocator) BasicExpression(2);
+
+  E->setType(T);
+  E->setOpcode(Opcode);
+  E->allocateOperands(ArgRecycler, ExpressionAllocator);
+  if (Instruction::isCommutative(Opcode)) {
+    // Ensure that commutative instructions that only differ by a permutation
+    // of their operands get the same value number by sorting the operand value
+    // numbers.  Since all commutative instructions have two operands it is more
+    // efficient to sort by hand rather than using, say, std::sort.
+    if (Arg1 > Arg2)
+      std::swap(Arg1, Arg2);
+  }
+  E->op_push_back(lookupOperandLeader(Arg1, nullptr, B));
+  E->op_push_back(lookupOperandLeader(Arg2, nullptr, B));
+
+  Value *V = SimplifyBinOp(Opcode, E->getOperand(0), E->getOperand(1), *DL, TLI,
+                           DT, AC);
+  if (const Expression *SimplifiedE = checkSimplificationResults(E, nullptr, V))
+    return SimplifiedE;
+  return E;
+}
+
+// Take a Value returned by simplification of Expression E/Instruction
+// I, and see if it resulted in a simpler expression. If so, return
+// that expression.
+// TODO: Once finished, this should not take an Instruction, we only
+// use it for printing.
+const Expression *NewGVN::checkSimplificationResults(Expression *E,
+                                                     Instruction *I, Value *V) {
+  if (!V)
+    return nullptr;
+  if (auto *C = dyn_cast<Constant>(V)) {
+    if (I)
+      DEBUG(dbgs() << "Simplified " << *I << " to "
+                   << " constant " << *C << "\n");
+    NumGVNOpsSimplified++;
+    assert(isa<BasicExpression>(E) &&
+           "We should always have had a basic expression here");
+
+    cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
+    ExpressionAllocator.Deallocate(E);
+    return createConstantExpression(C);
+  } else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
+    if (I)
+      DEBUG(dbgs() << "Simplified " << *I << " to "
+                   << " variable " << *V << "\n");
+    cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
+    ExpressionAllocator.Deallocate(E);
+    return createVariableExpression(V);
+  }
+
+  CongruenceClass *CC = ValueToClass.lookup(V);
+  if (CC && CC->DefiningExpr) {
+    if (I)
+      DEBUG(dbgs() << "Simplified " << *I << " to "
+                   << " expression " << *V << "\n");
+    NumGVNOpsSimplified++;
+    assert(isa<BasicExpression>(E) &&
+           "We should always have had a basic expression here");
+    cast<BasicExpression>(E)->deallocateOperands(ArgRecycler);
+    ExpressionAllocator.Deallocate(E);
+    return CC->DefiningExpr;
+  }
+  return nullptr;
+}
+
+const Expression *NewGVN::createExpression(Instruction *I,
+                                           const BasicBlock *B) {
+
+  auto *E = new (ExpressionAllocator) BasicExpression(I->getNumOperands());
+
+  bool AllConstant = setBasicExpressionInfo(I, E, B);
+
+  if (I->isCommutative()) {
+    // Ensure that commutative instructions that only differ by a permutation
+    // of their operands get the same value number by sorting the operand value
+    // numbers.  Since all commutative instructions have two operands it is more
+    // efficient to sort by hand rather than using, say, std::sort.
+    assert(I->getNumOperands() == 2 && "Unsupported commutative instruction!");
+    if (E->getOperand(0) > E->getOperand(1))
+      E->swapOperands(0, 1);
+  }
+
+  // Perform simplificaiton
+  // TODO: Right now we only check to see if we get a constant result.
+  // We may get a less than constant, but still better, result for
+  // some operations.
+  // IE
+  //  add 0, x -> x
+  //  and x, x -> x
+  // We should handle this by simply rewriting the expression.
+  if (auto *CI = dyn_cast<CmpInst>(I)) {
+    // Sort the operand value numbers so x<y and y>x get the same value
+    // number.
+    CmpInst::Predicate Predicate = CI->getPredicate();
+    if (E->getOperand(0) > E->getOperand(1)) {
+      E->swapOperands(0, 1);
+      Predicate = CmpInst::getSwappedPredicate(Predicate);
+    }
+    E->setOpcode((CI->getOpcode() << 8) | Predicate);
+    // TODO: 25% of our time is spent in SimplifyCmpInst with pointer operands
+    // TODO: Since we noop bitcasts, we may need to check types before
+    // simplifying, so that we don't end up simplifying based on a wrong
+    // type assumption. We should clean this up so we can use constants of the
+    // wrong type
+
+    assert(I->getOperand(0)->getType() == I->getOperand(1)->getType() &&
+           "Wrong types on cmp instruction");
+    if ((E->getOperand(0)->getType() == I->getOperand(0)->getType() &&
+         E->getOperand(1)->getType() == I->getOperand(1)->getType())) {
+      Value *V = SimplifyCmpInst(Predicate, E->getOperand(0), E->getOperand(1),
+                                 *DL, TLI, DT, AC);
+      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+        return SimplifiedE;
+    }
+  } else if (isa<SelectInst>(I)) {
+    if (isa<Constant>(E->getOperand(0)) ||
+        (E->getOperand(1)->getType() == I->getOperand(1)->getType() &&
+         E->getOperand(2)->getType() == I->getOperand(2)->getType())) {
+      Value *V = SimplifySelectInst(E->getOperand(0), E->getOperand(1),
+                                    E->getOperand(2), *DL, TLI, DT, AC);
+      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+        return SimplifiedE;
+    }
+  } else if (I->isBinaryOp()) {
+    Value *V = SimplifyBinOp(E->getOpcode(), E->getOperand(0), E->getOperand(1),
+                             *DL, TLI, DT, AC);
+    if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+      return SimplifiedE;
+  } else if (auto *BI = dyn_cast<BitCastInst>(I)) {
+    Value *V = SimplifyInstruction(BI, *DL, TLI, DT, AC);
+    if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+      return SimplifiedE;
+  } else if (isa<GetElementPtrInst>(I)) {
+    Value *V = SimplifyGEPInst(E->getType(),
+                               ArrayRef<Value *>(E->op_begin(), E->op_end()),
+                               *DL, TLI, DT, AC);
+    if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+      return SimplifiedE;
+  } else if (AllConstant) {
+    // We don't bother trying to simplify unless all of the operands
+    // were constant.
+    // TODO: There are a lot of Simplify*'s we could call here, if we
+    // wanted to.  The original motivating case for this code was a
+    // zext i1 false to i8, which we don't have an interface to
+    // simplify (IE there is no SimplifyZExt).
+
+    SmallVector<Constant *, 8> C;
+    for (Value *Arg : E->operands())
+      C.emplace_back(cast<Constant>(Arg));
+
+    if (Value *V = ConstantFoldInstOperands(I, C, *DL, TLI))
+      if (const Expression *SimplifiedE = checkSimplificationResults(E, I, V))
+        return SimplifiedE;
+  }
+  return E;
+}
+
+const AggregateValueExpression *
+NewGVN::createAggregateValueExpression(Instruction *I, const BasicBlock *B) {
+  if (auto *II = dyn_cast<InsertValueInst>(I)) {
+    auto *E = new (ExpressionAllocator)
+        AggregateValueExpression(I->getNumOperands(), II->getNumIndices());
+    setBasicExpressionInfo(I, E, B);
+    E->allocateIntOperands(ExpressionAllocator);
+    std::copy(II->idx_begin(), II->idx_end(), int_op_inserter(E));
+    return E;
+  } else if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
+    auto *E = new (ExpressionAllocator)
+        AggregateValueExpression(I->getNumOperands(), EI->getNumIndices());
+    setBasicExpressionInfo(EI, E, B);
+    E->allocateIntOperands(ExpressionAllocator);
+    std::copy(EI->idx_begin(), EI->idx_end(), int_op_inserter(E));
+    return E;
+  }
+  llvm_unreachable("Unhandled type of aggregate value operation");
+}
+
+const VariableExpression *NewGVN::createVariableExpression(Value *V) {
+  auto *E = new (ExpressionAllocator) VariableExpression(V);
+  E->setOpcode(V->getValueID());
+  return E;
+}
+
+const Expression *NewGVN::createVariableOrConstant(Value *V,
+                                                   const BasicBlock *B) {
+  auto Leader = lookupOperandLeader(V, nullptr, B);
+  if (auto *C = dyn_cast<Constant>(Leader))
+    return createConstantExpression(C);
+  return createVariableExpression(Leader);
+}
+
+const ConstantExpression *NewGVN::createConstantExpression(Constant *C) {
+  auto *E = new (ExpressionAllocator) ConstantExpression(C);
+  E->setOpcode(C->getValueID());
+  return E;
+}
+
+const UnknownExpression *NewGVN::createUnknownExpression(Instruction *I) {
+  auto *E = new (ExpressionAllocator) UnknownExpression(I);
+  E->setOpcode(I->getOpcode());
+  return E;
+}
+
+const CallExpression *NewGVN::createCallExpression(CallInst *CI,
+                                                   MemoryAccess *HV,
+                                                   const BasicBlock *B) {
+  // FIXME: Add operand bundles for calls.
+  auto *E =
+      new (ExpressionAllocator) CallExpression(CI->getNumOperands(), CI, HV);
+  setBasicExpressionInfo(CI, E, B);
+  return E;
+}
+
+// See if we have a congruence class and leader for this operand, and if so,
+// return it. Otherwise, return the operand itself.
+template <class T>
+Value *NewGVN::lookupOperandLeader(Value *V, const User *U, const T &B) const {
+  CongruenceClass *CC = ValueToClass.lookup(V);
+  if (CC && (CC != InitialClass))
+    return CC->RepLeader;
+  return V;
+}
+
+MemoryAccess *NewGVN::lookupMemoryAccessEquiv(MemoryAccess *MA) const {
+  MemoryAccess *Result = MemoryAccessEquiv.lookup(MA);
+  return Result ? Result : MA;
+}
+
+LoadExpression *NewGVN::createLoadExpression(Type *LoadType, Value *PointerOp,
+                                             LoadInst *LI, MemoryAccess *DA,
+                                             const BasicBlock *B) {
+  auto *E = new (ExpressionAllocator) LoadExpression(1, LI, DA);
+  E->allocateOperands(ArgRecycler, ExpressionAllocator);
+  E->setType(LoadType);
+
+  // Give store and loads same opcode so they value number together.
+  E->setOpcode(0);
+  E->op_push_back(lookupOperandLeader(PointerOp, LI, B));
+  if (LI)
+    E->setAlignment(LI->getAlignment());
+
+  // TODO: Value number heap versions. We may be able to discover
+  // things alias analysis can't on it's own (IE that a store and a
+  // load have the same value, and thus, it isn't clobbering the load).
+  return E;
+}
+
+const StoreExpression *NewGVN::createStoreExpression(StoreInst *SI,
+                                                     MemoryAccess *DA,
+                                                     const BasicBlock *B) {
+  auto *E =
+      new (ExpressionAllocator) StoreExpression(SI->getNumOperands(), SI, DA);
+  E->allocateOperands(ArgRecycler, ExpressionAllocator);
+  E->setType(SI->getValueOperand()->getType());
+
+  // Give store and loads same opcode so they value number together.
+  E->setOpcode(0);
+  E->op_push_back(lookupOperandLeader(SI->getPointerOperand(), SI, B));
+
+  // TODO: Value number heap versions. We may be able to discover
+  // things alias analysis can't on it's own (IE that a store and a
+  // load have the same value, and thus, it isn't clobbering the load).
+  return E;
+}
+
+const Expression *NewGVN::performSymbolicStoreEvaluation(Instruction *I,
+                                                         const BasicBlock *B) {
+  // Unlike loads, we never try to eliminate stores, so we do not check if they
+  // are simple and avoid value numbering them.
+  auto *SI = cast<StoreInst>(I);
+  // If this store's memorydef stores the same value as the last store, the
+  // memory accesses are equivalent.
+  // Get the expression, if any, for the RHS of the MemoryDef.
+  MemoryAccess *StoreAccess = MSSA->getMemoryAccess(SI);
+  MemoryAccess *StoreRHS = lookupMemoryAccessEquiv(
+      cast<MemoryDef>(StoreAccess)->getDefiningAccess());
+  const Expression *OldStore = createStoreExpression(SI, StoreRHS, B);
+  // See if this store expression already has a value, and it's the same as our
+  // current store.  FIXME: Right now, we only do this for simple stores.
+  if (SI->isSimple()) {
+    CongruenceClass *CC = ExpressionToClass.lookup(OldStore);
+    if (CC && CC->DefiningExpr && isa<StoreExpression>(CC->DefiningExpr) &&
+        CC->RepLeader == lookupOperandLeader(SI->getValueOperand(), SI, B))
+      return createStoreExpression(SI, StoreRHS, B);
+  }
+
+  return createStoreExpression(SI, StoreAccess, B);
+}
+
+const Expression *NewGVN::performSymbolicLoadEvaluation(Instruction *I,
+                                                        const BasicBlock *B) {
+  auto *LI = cast<LoadInst>(I);
+
+  // We can eliminate in favor of non-simple loads, but we won't be able to
+  // eliminate the loads themselves.
+  if (!LI->isSimple())
+    return nullptr;
+
+  Value *LoadAddressLeader = lookupOperandLeader(LI->getPointerOperand(), I, B);
+  // Load of undef is undef.
+  if (isa<UndefValue>(LoadAddressLeader))
+    return createConstantExpression(UndefValue::get(LI->getType()));
+
+  MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(I);
+
+  if (!MSSA->isLiveOnEntryDef(DefiningAccess)) {
+    if (auto *MD = dyn_cast<MemoryDef>(DefiningAccess)) {
+      Instruction *DefiningInst = MD->getMemoryInst();
+      // If the defining instruction is not reachable, replace with undef.
+      if (!ReachableBlocks.count(DefiningInst->getParent()))
+        return createConstantExpression(UndefValue::get(LI->getType()));
+    }
+  }
+
+  const Expression *E =
+      createLoadExpression(LI->getType(), LI->getPointerOperand(), LI,
+                           lookupMemoryAccessEquiv(DefiningAccess), B);
+  return E;
+}
+
+// Evaluate read only and pure calls, and create an expression result.
+const Expression *NewGVN::performSymbolicCallEvaluation(Instruction *I,
+                                                        const BasicBlock *B) {
+  auto *CI = cast<CallInst>(I);
+  if (AA->doesNotAccessMemory(CI))
+    return createCallExpression(CI, nullptr, B);
+  if (AA->onlyReadsMemory(CI)) {
+    MemoryAccess *DefiningAccess = MSSAWalker->getClobberingMemoryAccess(CI);
+    return createCallExpression(CI, lookupMemoryAccessEquiv(DefiningAccess), B);
+  }
+  return nullptr;
+}
+
+// Update the memory access equivalence table to say that From is equal to To,
+// and return true if this is different from what already existed in the table.
+bool NewGVN::setMemoryAccessEquivTo(MemoryAccess *From, MemoryAccess *To) {
+  DEBUG(dbgs() << "Setting " << *From << " equivalent to ");
+  if (!To)
+    DEBUG(dbgs() << "itself");
+  else
+    DEBUG(dbgs() << *To);
+  DEBUG(dbgs() << "\n");
+  auto LookupResult = MemoryAccessEquiv.find(From);
+  bool Changed = false;
+  // If it's already in the table, see if the value changed.
+  if (LookupResult != MemoryAccessEquiv.end()) {
+    if (To && LookupResult->second != To) {
+      // It wasn't equivalent before, and now it is.
+      LookupResult->second = To;
+      Changed = true;
+    } else if (!To) {
+      // It used to be equivalent to something, and now it's not.
+      MemoryAccessEquiv.erase(LookupResult);
+      Changed = true;
+    }
+  } else {
+    assert(!To &&
+           "Memory equivalence should never change from nothing to something");
+  }
+
+  return Changed;
+}
+// Evaluate PHI nodes symbolically, and create an expression result.
+const Expression *NewGVN::performSymbolicPHIEvaluation(Instruction *I,
+                                                       const BasicBlock *B) {
+  auto *E = cast<PHIExpression>(createPHIExpression(I));
+  if (E->op_empty()) {
+    DEBUG(dbgs() << "Simplified PHI node " << *I << " to undef"
+                 << "\n");
+    E->deallocateOperands(ArgRecycler);
+    ExpressionAllocator.Deallocate(E);
+    return createConstantExpression(UndefValue::get(I->getType()));
+  }
+
+  Value *AllSameValue = E->getOperand(0);
+
+  // See if all arguments are the same, ignoring undef arguments, because we can
+  // choose a value that is the same for them.
+  for (const Value *Arg : E->operands())
+    if (Arg != AllSameValue && !isa<UndefValue>(Arg)) {
+      AllSameValue = nullptr;
+      break;
+    }
+
+  if (AllSameValue) {
+    // It's possible to have phi nodes with cycles (IE dependent on
+    // other phis that are .... dependent on the original phi node),
+    // especially in weird CFG's where some arguments are unreachable, or
+    // uninitialized along certain paths.
+    // This can cause infinite loops  during evaluation (even if you disable
+    // the recursion below, you will simply ping-pong between congruence
+    // classes). If a phi node symbolically evaluates to another phi node,
+    // just leave it alone. If they are really the same, we will still
+    // eliminate them in favor of each other.
+    if (isa<PHINode>(AllSameValue))
+      return E;
+    NumGVNPhisAllSame++;
+    DEBUG(dbgs() << "Simplified PHI node " << *I << " to " << *AllSameValue
+                 << "\n");
+    E->deallocateOperands(ArgRecycler);
+    ExpressionAllocator.Deallocate(E);
+    if (auto *C = dyn_cast<Constant>(AllSameValue))
+      return createConstantExpression(C);
+    return createVariableExpression(AllSameValue);
+  }
+  return E;
+}
+
+const Expression *
+NewGVN::performSymbolicAggrValueEvaluation(Instruction *I,
+                                           const BasicBlock *B) {
+  if (auto *EI = dyn_cast<ExtractValueInst>(I)) {
+    auto *II = dyn_cast<IntrinsicInst>(EI->getAggregateOperand());
+    if (II && EI->getNumIndices() == 1 && *EI->idx_begin() == 0) {
+      unsigned Opcode = 0;
+      // EI might be an extract from one of our recognised intrinsics. If it
+      // is we'll synthesize a semantically equivalent expression instead on
+      // an extract value expression.
+      switch (II->getIntrinsicID()) {
+      case Intrinsic::sadd_with_overflow:
+      case Intrinsic::uadd_with_overflow:
+        Opcode = Instruction::Add;
+        break;
+      case Intrinsic::ssub_with_overflow:
+      case Intrinsic::usub_with_overflow:
+        Opcode = Instruction::Sub;
+        break;
+      case Intrinsic::smul_with_overflow:
+      case Intrinsic::umul_with_overflow:
+        Opcode = Instruction::Mul;
+        break;
+      default:
+        break;
+      }
+
+      if (Opcode != 0) {
+        // Intrinsic recognized. Grab its args to finish building the
+        // expression.
+        assert(II->getNumArgOperands() == 2 &&
+               "Expect two args for recognised intrinsics.");
+        return createBinaryExpression(Opcode, EI->getType(),
+                                      II->getArgOperand(0),
+                                      II->getArgOperand(1), B);
+      }
+    }
+  }
+
+  return createAggregateValueExpression(I, B);
+}
+
+// Substitute and symbolize the value before value numbering.
+const Expression *NewGVN::performSymbolicEvaluation(Value *V,
+                                                    const BasicBlock *B) {
+  const Expression *E = nullptr;
+  if (auto *C = dyn_cast<Constant>(V))
+    E = createConstantExpression(C);
+  else if (isa<Argument>(V) || isa<GlobalVariable>(V)) {
+    E = createVariableExpression(V);
+  } else {
+    // TODO: memory intrinsics.
+    // TODO: Some day, we should do the forward propagation and reassociation
+    // parts of the algorithm.
+    auto *I = cast<Instruction>(V);
+    switch (I->getOpcode()) {
+    case Instruction::ExtractValue:
+    case Instruction::InsertValue:
+      E = performSymbolicAggrValueEvaluation(I, B);
+      break;
+    case Instruction::PHI:
+      E = performSymbolicPHIEvaluation(I, B);
+      break;
+    case Instruction::Call:
+      E = performSymbolicCallEvaluation(I, B);
+      break;
+    case Instruction::Store:
+      E = performSymbolicStoreEvaluation(I, B);
+      break;
+    case Instruction::Load:
+      E = performSymbolicLoadEvaluation(I, B);
+      break;
+    case Instruction::BitCast: {
+      E = createExpression(I, B);
+    } break;
+
+    case Instruction::Add:
+    case Instruction::FAdd:
+    case Instruction::Sub:
+    case Instruction::FSub:
+    case Instruction::Mul:
+    case Instruction::FMul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::FDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::FRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    case Instruction::ICmp:
+    case Instruction::FCmp:
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::Select:
+    case Instruction::ExtractElement:
+    case Instruction::InsertElement:
+    case Instruction::ShuffleVector:
+    case Instruction::GetElementPtr:
+      E = createExpression(I, B);
+      break;
+    default:
+      return nullptr;
+    }
+  }
+  return E;
+}
+
+// There is an edge from 'Src' to 'Dst'.  Return true if every path from
+// the entry block to 'Dst' passes via this edge.  In particular 'Dst'
+// must not be reachable via another edge from 'Src'.
+bool NewGVN::isOnlyReachableViaThisEdge(const BasicBlockEdge &E) const {
+
+  // While in theory it is interesting to consider the case in which Dst has
+  // more than one predecessor, because Dst might be part of a loop which is
+  // only reachable from Src, in practice it is pointless since at the time
+  // GVN runs all such loops have preheaders, which means that Dst will have
+  // been changed to have only one predecessor, namely Src.
+  const BasicBlock *Pred = E.getEnd()->getSinglePredecessor();
+  const BasicBlock *Src = E.getStart();
+  assert((!Pred || Pred == Src) && "No edge between these basic blocks!");
+  (void)Src;
+  return Pred != nullptr;
+}
+
+void NewGVN::markUsersTouched(Value *V) {
+  // Now mark the users as touched.
+  for (auto *User : V->users()) {
+    assert(isa<Instruction>(User) && "Use of value not within an instruction?");
+    TouchedInstructions.set(InstrDFS[User]);
+  }
+}
+
+void NewGVN::markMemoryUsersTouched(MemoryAccess *MA) {
+  for (auto U : MA->users()) {
+    if (auto *MUD = dyn_cast<MemoryUseOrDef>(U))
+      TouchedInstructions.set(InstrDFS[MUD->getMemoryInst()]);
+    else
+      TouchedInstructions.set(InstrDFS[U]);
+  }
+}
+
+// Perform congruence finding on a given value numbering expression.
+void NewGVN::performCongruenceFinding(Value *V, const Expression *E) {
+
+  ValueToExpression[V] = E;
+  // This is guaranteed to return something, since it will at least find
+  // INITIAL.
+  CongruenceClass *VClass = ValueToClass[V];
+  assert(VClass && "Should have found a vclass");
+  // Dead classes should have been eliminated from the mapping.
+  assert(!VClass->Dead && "Found a dead class");
+
+  CongruenceClass *EClass;
+  if (const auto *VE = dyn_cast<VariableExpression>(E)) {
+    EClass = ValueToClass[VE->getVariableValue()];
+  } else {
+    auto lookupResult = ExpressionToClass.insert({E, nullptr});
+
+    // If it's not in the value table, create a new congruence class.
+    if (lookupResult.second) {
+      CongruenceClass *NewClass = createCongruenceClass(nullptr, E);
+      auto place = lookupResult.first;
+      place->second = NewClass;
+
+      // Constants and variables should always be made the leader.
+      if (const auto *CE = dyn_cast<ConstantExpression>(E))
+        NewClass->RepLeader = CE->getConstantValue();
+      else if (const auto *VE = dyn_cast<VariableExpression>(E))
+        NewClass->RepLeader = VE->getVariableValue();
+      else if (const auto *SE = dyn_cast<StoreExpression>(E))
+        NewClass->RepLeader = SE->getStoreInst()->getValueOperand();
+      else
+        NewClass->RepLeader = V;
+
+      EClass = NewClass;
+      DEBUG(dbgs() << "Created new congruence class for " << *V
+                   << " using expression " << *E << " at " << NewClass->ID
+                   << " and leader " << *(NewClass->RepLeader) << "\n");
+      DEBUG(dbgs() << "Hash value was " << E->getHashValue() << "\n");
+    } else {
+      EClass = lookupResult.first->second;
+      if (isa<ConstantExpression>(E))
+        assert(isa<Constant>(EClass->RepLeader) &&
+               "Any class with a constant expression should have a "
+               "constant leader");
+
+      assert(EClass && "Somehow don't have an eclass");
+
+      assert(!EClass->Dead && "We accidentally looked up a dead class");
+    }
+  }
+  bool WasInChanged = ChangedValues.erase(V);
+  if (VClass != EClass || WasInChanged) {
+    DEBUG(dbgs() << "Found class " << EClass->ID << " for expression " << E
+                 << "\n");
+
+    if (VClass != EClass) {
+      DEBUG(dbgs() << "New congruence class for " << V << " is " << EClass->ID
+                   << "\n");
+
+      VClass->Members.erase(V);
+      EClass->Members.insert(V);
+      ValueToClass[V] = EClass;
+      // See if we destroyed the class or need to swap leaders.
+      if (VClass->Members.empty() && VClass != InitialClass) {
+        if (VClass->DefiningExpr) {
+          VClass->Dead = true;
+          DEBUG(dbgs() << "Erasing expression " << *E << " from table\n");
+          ExpressionToClass.erase(VClass->DefiningExpr);
+        }
+      } else if (VClass->RepLeader == V) {
+        // FIXME: When the leader changes, the value numbering of
+        // everything may change, so we need to reprocess.
+        VClass->RepLeader = *(VClass->Members.begin());
+        for (auto M : VClass->Members) {
+          if (auto *I = dyn_cast<Instruction>(M))
+            TouchedInstructions.set(InstrDFS[I]);
+          ChangedValues.insert(M);
+        }
+      }
+    }
+
+    markUsersTouched(V);
+    if (auto *I = dyn_cast<Instruction>(V)) {
+      if (MemoryAccess *MA = MSSA->getMemoryAccess(I)) {
+        // If this is a MemoryDef, we need to update the equivalence table. If
+        // we
+        // determined the expression is congruent to a different memory state,
+        // use that different memory state.  If we determined it didn't, we
+        // update
+        // that as well. Note that currently, we do not guarantee the
+        // "different" memory state dominates us.  The goal is to make things
+        // that are congruent look congruent, not ensure we can eliminate one in
+        // favor of the other.
+        // Right now, the only way they can be equivalent is for store
+        // expresions.
+        if (!isa<MemoryUse>(MA)) {
+          if (E && isa<StoreExpression>(E) && EClass->Members.size() != 1) {
+            auto *DefAccess = cast<StoreExpression>(E)->getDefiningAccess();
+            setMemoryAccessEquivTo(MA, DefAccess != MA ? DefAccess : nullptr);
+          } else {
+            setMemoryAccessEquivTo(MA, nullptr);
+          }
+        }
+        markMemoryUsersTouched(MA);
+      }
+    }
+  }
+}
+
+// Process the fact that Edge (from, to) is reachable, including marking
+// any newly reachable blocks and instructions for processing.
+void NewGVN::updateReachableEdge(BasicBlock *From, BasicBlock *To) {
+  // Check if the Edge was reachable before.
+  if (ReachableEdges.insert({From, To}).second) {
+    // If this block wasn't reachable before, all instructions are touched.
+    if (ReachableBlocks.insert(To).second) {
+      DEBUG(dbgs() << "Block " << getBlockName(To) << " marked reachable\n");
+      const auto &InstRange = BlockInstRange.lookup(To);
+      TouchedInstructions.set(InstRange.first, InstRange.second);
+    } else {
+      DEBUG(dbgs() << "Block " << getBlockName(To)
+                   << " was reachable, but new edge {" << getBlockName(From)
+                   << "," << getBlockName(To) << "} to it found\n");
+
+      // We've made an edge reachable to an existing block, which may
+      // impact predicates. Otherwise, only mark the phi nodes as touched, as
+      // they are the only thing that depend on new edges. Anything using their
+      // values will get propagated to if necessary.
+      if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(To))
+        TouchedInstructions.set(InstrDFS[MemPhi]);
+
+      auto BI = To->begin();
+      while (isa<PHINode>(BI)) {
+        TouchedInstructions.set(InstrDFS[&*BI]);
+        ++BI;
+      }
+    }
+  }
+}
+
+// Given a predicate condition (from a switch, cmp, or whatever) and a block,
+// see if we know some constant value for it already.
+Value *NewGVN::findConditionEquivalence(Value *Cond, BasicBlock *B) const {
+  auto Result = lookupOperandLeader(Cond, nullptr, B);
+  if (isa<Constant>(Result))
+    return Result;
+  return nullptr;
+}
+
+// Process the outgoing edges of a block for reachability.
+void NewGVN::processOutgoingEdges(TerminatorInst *TI, BasicBlock *B) {
+  // Evaluate reachability of terminator instruction.
+  BranchInst *BR;
+  if ((BR = dyn_cast<BranchInst>(TI)) && BR->isConditional()) {
+    Value *Cond = BR->getCondition();
+    Value *CondEvaluated = findConditionEquivalence(Cond, B);
+    if (!CondEvaluated) {
+      if (auto *I = dyn_cast<Instruction>(Cond)) {
+        const Expression *E = createExpression(I, B);
+        if (const auto *CE = dyn_cast<ConstantExpression>(E)) {
+          CondEvaluated = CE->getConstantValue();
+        }
+      } else if (isa<ConstantInt>(Cond)) {
+        CondEvaluated = Cond;
+      }
+    }
+    ConstantInt *CI;
+    BasicBlock *TrueSucc = BR->getSuccessor(0);
+    BasicBlock *FalseSucc = BR->getSuccessor(1);
+    if (CondEvaluated && (CI = dyn_cast<ConstantInt>(CondEvaluated))) {
+      if (CI->isOne()) {
+        DEBUG(dbgs() << "Condition for Terminator " << *TI
+                     << " evaluated to true\n");
+        updateReachableEdge(B, TrueSucc);
+      } else if (CI->isZero()) {
+        DEBUG(dbgs() << "Condition for Terminator " << *TI
+                     << " evaluated to false\n");
+        updateReachableEdge(B, FalseSucc);
+      }
+    } else {
+      updateReachableEdge(B, TrueSucc);
+      updateReachableEdge(B, FalseSucc);
+    }
+  } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
+    // For switches, propagate the case values into the case
+    // destinations.
+
+    // Remember how many outgoing edges there are to every successor.
+    SmallDenseMap<BasicBlock *, unsigned, 16> SwitchEdges;
+
+    Value *SwitchCond = SI->getCondition();
+    Value *CondEvaluated = findConditionEquivalence(SwitchCond, B);
+    // See if we were able to turn this switch statement into a constant.
+    if (CondEvaluated && isa<ConstantInt>(CondEvaluated)) {
+      auto *CondVal = cast<ConstantInt>(CondEvaluated);
+      // We should be able to get case value for this.
+      auto CaseVal = SI->findCaseValue(CondVal);
+      if (CaseVal.getCaseSuccessor() == SI->getDefaultDest()) {
+        // We proved the value is outside of the range of the case.
+        // We can't do anything other than mark the default dest as reachable,
+        // and go home.
+        updateReachableEdge(B, SI->getDefaultDest());
+        return;
+      }
+      // Now get where it goes and mark it reachable.
+      BasicBlock *TargetBlock = CaseVal.getCaseSuccessor();
+      updateReachableEdge(B, TargetBlock);
+    } else {
+      for (unsigned i = 0, e = SI->getNumSuccessors(); i != e; ++i) {
+        BasicBlock *TargetBlock = SI->getSuccessor(i);
+        ++SwitchEdges[TargetBlock];
+        updateReachableEdge(B, TargetBlock);
+      }
+    }
+  } else {
+    // Otherwise this is either unconditional, or a type we have no
+    // idea about. Just mark successors as reachable.
+    for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) {
+      BasicBlock *TargetBlock = TI->getSuccessor(i);
+      updateReachableEdge(B, TargetBlock);
+    }
+
+    // This also may be a memory defining terminator, in which case, set it
+    // equivalent to nothing.
+    if (MemoryAccess *MA = MSSA->getMemoryAccess(TI))
+      setMemoryAccessEquivTo(MA, nullptr);
+  }
+}
+
+// The algorithm initially places the values of the routine in the INITIAL
+// congruence
+// class. The leader of INITIAL is the undetermined value `TOP`.
+// When the algorithm has finished, values still in INITIAL are unreachable.
+void NewGVN::initializeCongruenceClasses(Function &F) {
+  // FIXME now i can't remember why this is 2
+  NextCongruenceNum = 2;
+  // Initialize all other instructions to be in INITIAL class.
+  CongruenceClass::MemberSet InitialValues;
+  InitialClass = createCongruenceClass(nullptr, nullptr);
+  for (auto &B : F) {
+    if (auto *MP = MSSA->getMemoryAccess(&B))
+      MemoryAccessEquiv.insert({MP, MSSA->getLiveOnEntryDef()});
+
+    for (auto &I : B) {
+      InitialValues.insert(&I);
+      ValueToClass[&I] = InitialClass;
+      // All memory accesses are equivalent to live on entry to start. They must
+      // be initialized to something so that initial changes are noticed. For
+      // the maximal answer, we initialize them all to be the same as
+      // liveOnEntry.  Note that to save time, we only initialize the
+      // MemoryDef's for stores and all MemoryPhis to be equal.  Right now, no
+      // other expression can generate a memory equivalence.  If we start
+      // handling memcpy/etc, we can expand this.
+      if (isa<StoreInst>(&I))
+        MemoryAccessEquiv.insert(
+            {MSSA->getMemoryAccess(&I), MSSA->getLiveOnEntryDef()});
+    }
+  }
+  InitialClass->Members.swap(InitialValues);
+
+  // Initialize arguments to be in their own unique congruence classes
+  for (auto &FA : F.args())
+    createSingletonCongruenceClass(&FA);
+}
+
+void NewGVN::cleanupTables() {
+  for (unsigned i = 0, e = CongruenceClasses.size(); i != e; ++i) {
+    DEBUG(dbgs() << "Congruence class " << CongruenceClasses[i]->ID << " has "
+                 << CongruenceClasses[i]->Members.size() << " members\n");
+    // Make sure we delete the congruence class (probably worth switching to
+    // a unique_ptr at some point.
+    delete CongruenceClasses[i];
+    CongruenceClasses[i] = nullptr;
+  }
+
+  ValueToClass.clear();
+  ArgRecycler.clear(ExpressionAllocator);
+  ExpressionAllocator.Reset();
+  CongruenceClasses.clear();
+  ExpressionToClass.clear();
+  ValueToExpression.clear();
+  ReachableBlocks.clear();
+  ReachableEdges.clear();
+#ifndef NDEBUG
+  ProcessedCount.clear();
+#endif
+  DFSDomMap.clear();
+  InstrDFS.clear();
+  InstructionsToErase.clear();
+
+  DFSToInstr.clear();
+  BlockInstRange.clear();
+  TouchedInstructions.clear();
+  DominatedInstRange.clear();
+  MemoryAccessEquiv.clear();
+}
+
+std::pair<unsigned, unsigned> NewGVN::assignDFSNumbers(BasicBlock *B,
+                                                       unsigned Start) {
+  unsigned End = Start;
+  if (MemoryAccess *MemPhi = MSSA->getMemoryAccess(B)) {
+    InstrDFS[MemPhi] = End++;
+    DFSToInstr.emplace_back(MemPhi);
+  }
+
+  for (auto &I : *B) {
+    InstrDFS[&I] = End++;
+    DFSToInstr.emplace_back(&I);
+  }
+
+  // All of the range functions taken half-open ranges (open on the end side).
+  // So we do not subtract one from count, because at this point it is one
+  // greater than the last instruction.
+  return std::make_pair(Start, End);
+}
+
+void NewGVN::updateProcessedCount(Value *V) {
+#ifndef NDEBUG
+  if (ProcessedCount.count(V) == 0) {
+    ProcessedCount.insert({V, 1});
+  } else {
+    ProcessedCount[V] += 1;
+    assert(ProcessedCount[V] < 100 &&
+           "Seem to have processed the same Value a lot");
+  }
+#endif
+}
+// Evaluate MemoryPhi nodes symbolically, just like PHI nodes
+void NewGVN::valueNumberMemoryPhi(MemoryPhi *MP) {
+  // If all the arguments are the same, the MemoryPhi has the same value as the
+  // argument.
+  // Filter out unreachable blocks from our operands.
+  auto Filtered = make_filter_range(MP->operands(), [&](const Use &U) {
+    return ReachableBlocks.count(MP->getIncomingBlock(U));
+  });
+
+  assert(Filtered.begin() != Filtered.end() &&
+         "We should not be processing a MemoryPhi in a completely "
+         "unreachable block");
+
+  // Transform the remaining operands into operand leaders.
+  // FIXME: mapped_iterator should have a range version.
+  auto LookupFunc = [&](const Use &U) {
+    return lookupMemoryAccessEquiv(cast<MemoryAccess>(U));
+  };
+  auto MappedBegin = map_iterator(Filtered.begin(), LookupFunc);
+  auto MappedEnd = map_iterator(Filtered.end(), LookupFunc);
+
+  // and now check if all the elements are equal.
+  // Sadly, we can't use std::equals since these are random access iterators.
+  MemoryAccess *AllSameValue = *MappedBegin;
+  ++MappedBegin;
+  bool AllEqual = std::all_of(
+      MappedBegin, MappedEnd,
+      [&AllSameValue](const MemoryAccess *V) { return V == AllSameValue; });
+
+  if (AllEqual)
+    DEBUG(dbgs() << "Memory Phi value numbered to " << *AllSameValue << "\n");
+  else
+    DEBUG(dbgs() << "Memory Phi value numbered to itself\n");
+
+  if (setMemoryAccessEquivTo(MP, AllEqual ? AllSameValue : nullptr))
+    markMemoryUsersTouched(MP);
+}
+
+// Value number a single instruction, symbolically evaluating, performing
+// congruence finding, and updating mappings.
+void NewGVN::valueNumberInstruction(Instruction *I) {
+  DEBUG(dbgs() << "Processing instruction " << *I << "\n");
+  if (isInstructionTriviallyDead(I, TLI)) {
+    DEBUG(dbgs() << "Skipping unused instruction\n");
+    markInstructionForDeletion(I);
+    return;
+  }
+  if (!I->isTerminator()) {
+    const auto *Symbolized = performSymbolicEvaluation(I, I->getParent());
+    // If we couldn't come up with a symbolic expression, use the unknown
+    // expression
+    if (Symbolized == nullptr)
+      Symbolized = createUnknownExpression(I);
+    performCongruenceFinding(I, Symbolized);
+  } else {
+    // Handle terminators that return values. All of them produce values we
+    // don't currently understand.
+    if (!I->getType()->isVoidTy()){
+      auto *Symbolized = createUnknownExpression(I);
+      performCongruenceFinding(I, Symbolized);
+    }
+    processOutgoingEdges(dyn_cast<TerminatorInst>(I), I->getParent());
+  }
+}
+
+// Verify the that the memory equivalence table makes sense relative to the
+// congruence classes.
+void NewGVN::verifyMemoryCongruency() {
+  // Anything equivalent in the memory access table should be in the same
+  // congruence class.
+
+  // Filter out the unreachable and trivially dead entries, because they may
+  // never have been updated if the instructions were not processed.
+  auto ReachableAccessPred =
+      [&](const std::pair<const MemoryAccess *, MemoryAccess *> Pair) {
+        bool Result = ReachableBlocks.count(Pair.first->getBlock());
+        if (!Result)
+          return false;
+        if (auto *MemDef = dyn_cast<MemoryDef>(Pair.first))
+          return !isInstructionTriviallyDead(MemDef->getMemoryInst());
+        return true;
+      };
+
+  auto Filtered = make_filter_range(MemoryAccessEquiv, ReachableAccessPred);
+  for (auto KV : Filtered) {
+    assert(KV.first != KV.second &&
+           "We added a useless equivalence to the memory equivalence table");
+    // Unreachable instructions may not have changed because we never process
+    // them.
+    if (!ReachableBlocks.count(KV.first->getBlock()))
+      continue;
+    if (auto *FirstMUD = dyn_cast<MemoryUseOrDef>(KV.first)) {
+      auto *SecondMUD = dyn_cast<MemoryUseOrDef>(KV.second);
+      if (FirstMUD && SecondMUD) {
+        auto *FirstInst = FirstMUD->getMemoryInst();
+        auto *SecondInst = SecondMUD->getMemoryInst();
+        assert(
+            ValueToClass.lookup(FirstInst) == ValueToClass.lookup(SecondInst) &&
+            "The instructions for these memory operations should have been in "
+            "the same congruence class");
+      }
+    } else if (auto *FirstMP = dyn_cast<MemoryPhi>(KV.first)) {
+
+      // We can only sanely verify that MemoryDefs in the operand list all have
+      // the same class.
+      auto ReachableOperandPred = [&](const Use &U) {
+        return ReachableBlocks.count(FirstMP->getIncomingBlock(U)) &&
+               isa<MemoryDef>(U);
+
+      };
+      // All arguments should in the same class, ignoring unreachable arguments
+      auto FilteredPhiArgs =
+          make_filter_range(FirstMP->operands(), ReachableOperandPred);
+      SmallVector<const CongruenceClass *, 16> PhiOpClasses;
+      std::transform(FilteredPhiArgs.begin(), FilteredPhiArgs.end(),
+                     std::back_inserter(PhiOpClasses), [&](const Use &U) {
+                       const MemoryDef *MD = cast<MemoryDef>(U);
+                       return ValueToClass.lookup(MD->getMemoryInst());
+                     });
+      assert(std::equal(PhiOpClasses.begin(), PhiOpClasses.end(),
+                        PhiOpClasses.begin()) &&
+             "All MemoryPhi arguments should be in the same class");
+    }
+  }
+}
+
+// This is the main transformation entry point.
+bool NewGVN::runGVN(Function &F, DominatorTree *_DT, AssumptionCache *_AC,
+                    TargetLibraryInfo *_TLI, AliasAnalysis *_AA,
+                    MemorySSA *_MSSA) {
+  bool Changed = false;
+  DT = _DT;
+  AC = _AC;
+  TLI = _TLI;
+  AA = _AA;
+  MSSA = _MSSA;
+  DL = &F.getParent()->getDataLayout();
+  MSSAWalker = MSSA->getWalker();
+
+  // Count number of instructions for sizing of hash tables, and come
+  // up with a global dfs numbering for instructions.
+  unsigned ICount = 1;
+  // Add an empty instruction to account for the fact that we start at 1
+  DFSToInstr.emplace_back(nullptr);
+  // Note: We want RPO traversal of the blocks, which is not quite the same as
+  // dominator tree order, particularly with regard whether backedges get
+  // visited first or second, given a block with multiple successors.
+  // If we visit in the wrong order, we will end up performing N times as many
+  // iterations.
+  // The dominator tree does guarantee that, for a given dom tree node, it's
+  // parent must occur before it in the RPO ordering. Thus, we only need to sort
+  // the siblings.
+  DenseMap<const DomTreeNode *, unsigned> RPOOrdering;
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+  unsigned Counter = 0;
+  for (auto &B : RPOT) {
+    auto *Node = DT->getNode(B);
+    assert(Node && "RPO and Dominator tree should have same reachability");
+    RPOOrdering[Node] = ++Counter;
+  }
+  // Sort dominator tree children arrays into RPO.
+  for (auto &B : RPOT) {
+    auto *Node = DT->getNode(B);
+    if (Node->getChildren().size() > 1)
+      std::sort(Node->begin(), Node->end(),
+                [&RPOOrdering](const DomTreeNode *A, const DomTreeNode *B) {
+                  return RPOOrdering[A] < RPOOrdering[B];
+                });
+  }
+
+  // Now a standard depth first ordering of the domtree is equivalent to RPO.
+  auto DFI = df_begin(DT->getRootNode());
+  for (auto DFE = df_end(DT->getRootNode()); DFI != DFE; ++DFI) {
+    BasicBlock *B = DFI->getBlock();
+    const auto &BlockRange = assignDFSNumbers(B, ICount);
+    BlockInstRange.insert({B, BlockRange});
+    ICount += BlockRange.second - BlockRange.first;
+  }
+
+  // Handle forward unreachable blocks and figure out which blocks
+  // have single preds.
+  for (auto &B : F) {
+    // Assign numbers to unreachable blocks.
+    if (!DFI.nodeVisited(DT->getNode(&B))) {
+      const auto &BlockRange = assignDFSNumbers(&B, ICount);
+      BlockInstRange.insert({&B, BlockRange});
+      ICount += BlockRange.second - BlockRange.first;
+    }
+  }
+
+  TouchedInstructions.resize(ICount);
+  DominatedInstRange.reserve(F.size());
+  // Ensure we don't end up resizing the expressionToClass map, as
+  // that can be quite expensive. At most, we have one expression per
+  // instruction.
+  ExpressionToClass.reserve(ICount);
+
+  // Initialize the touched instructions to include the entry block.
+  const auto &InstRange = BlockInstRange.lookup(&F.getEntryBlock());
+  TouchedInstructions.set(InstRange.first, InstRange.second);
+  ReachableBlocks.insert(&F.getEntryBlock());
+
+  initializeCongruenceClasses(F);
+
+  // We start out in the entry block.
+  BasicBlock *LastBlock = &F.getEntryBlock();
+  while (TouchedInstructions.any()) {
+    // Walk through all the instructions in all the blocks in RPO.
+    for (int InstrNum = TouchedInstructions.find_first(); InstrNum != -1;
+         InstrNum = TouchedInstructions.find_next(InstrNum)) {
+      assert(InstrNum != 0 && "Bit 0 should never be set, something touched an "
+                              "instruction not in the lookup table");
+      Value *V = DFSToInstr[InstrNum];
+      BasicBlock *CurrBlock = nullptr;
+
+      if (auto *I = dyn_cast<Instruction>(V))
+        CurrBlock = I->getParent();
+      else if (auto *MP = dyn_cast<MemoryPhi>(V))
+        CurrBlock = MP->getBlock();
+      else
+        llvm_unreachable("DFSToInstr gave us an unknown type of instruction");
+
+      // If we hit a new block, do reachability processing.
+      if (CurrBlock != LastBlock) {
+        LastBlock = CurrBlock;
+        bool BlockReachable = ReachableBlocks.count(CurrBlock);
+        const auto &CurrInstRange = BlockInstRange.lookup(CurrBlock);
+
+        // If it's not reachable, erase any touched instructions and move on.
+        if (!BlockReachable) {
+          TouchedInstructions.reset(CurrInstRange.first, CurrInstRange.second);
+          DEBUG(dbgs() << "Skipping instructions in block "
+                       << getBlockName(CurrBlock)
+                       << " because it is unreachable\n");
+          continue;
+        }
+        updateProcessedCount(CurrBlock);
+      }
+
+      if (auto *MP = dyn_cast<MemoryPhi>(V)) {
+        DEBUG(dbgs() << "Processing MemoryPhi " << *MP << "\n");
+        valueNumberMemoryPhi(MP);
+      } else if (auto *I = dyn_cast<Instruction>(V)) {
+        valueNumberInstruction(I);
+      } else {
+        llvm_unreachable("Should have been a MemoryPhi or Instruction");
+      }
+      updateProcessedCount(V);
+      // Reset after processing (because we may mark ourselves as touched when
+      // we propagate equalities).
+      TouchedInstructions.reset(InstrNum);
+    }
+  }
+
+// FIXME: Move this to expensive checks when we are satisfied with NewGVN
+#ifndef NDEBUG
+  verifyMemoryCongruency();
+#endif
+  Changed |= eliminateInstructions(F);
+
+  // Delete all instructions marked for deletion.
+  for (Instruction *ToErase : InstructionsToErase) {
+    if (!ToErase->use_empty())
+      ToErase->replaceAllUsesWith(UndefValue::get(ToErase->getType()));
+
+    ToErase->eraseFromParent();
+  }
+
+  // Delete all unreachable blocks.
+  auto UnreachableBlockPred = [&](const BasicBlock &BB) {
+    return !ReachableBlocks.count(&BB);
+  };
+
+  for (auto &BB : make_filter_range(F, UnreachableBlockPred)) {
+    DEBUG(dbgs() << "We believe block " << getBlockName(&BB)
+                 << " is unreachable\n");
+    deleteInstructionsInBlock(&BB);
+    Changed = true;
+  }
+
+  cleanupTables();
+  return Changed;
+}
+
+bool NewGVN::runOnFunction(Function &F) {
+  if (skipFunction(F))
+    return false;
+  return runGVN(F, &getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
+                &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F),
+                &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(),
+                &getAnalysis<AAResultsWrapperPass>().getAAResults(),
+                &getAnalysis<MemorySSAWrapperPass>().getMSSA());
+}
+
+PreservedAnalyses NewGVNPass::run(Function &F, AnalysisManager<Function> &AM) {
+  NewGVN Impl;
+
+  // Apparently the order in which we get these results matter for
+  // the old GVN (see Chandler's comment in GVN.cpp). I'll keep
+  // the same order here, just in case.
+  auto &AC = AM.getResult<AssumptionAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto &AA = AM.getResult<AAManager>(F);
+  auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
+  bool Changed = Impl.runGVN(F, &DT, &AC, &TLI, &AA, &MSSA);
+  if (!Changed)
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+// Return true if V is a value that will always be available (IE can
+// be placed anywhere) in the function.  We don't do globals here
+// because they are often worse to put in place.
+// TODO: Separate cost from availability
+static bool alwaysAvailable(Value *V) {
+  return isa<Constant>(V) || isa<Argument>(V);
+}
+
+// Get the basic block from an instruction/value.
+static BasicBlock *getBlockForValue(Value *V) {
+  if (auto *I = dyn_cast<Instruction>(V))
+    return I->getParent();
+  return nullptr;
+}
+
+struct NewGVN::ValueDFS {
+  int DFSIn = 0;
+  int DFSOut = 0;
+  int LocalNum = 0;
+  // Only one of these will be set.
+  Value *Val = nullptr;
+  Use *U = nullptr;
+
+  bool operator<(const ValueDFS &Other) const {
+    // It's not enough that any given field be less than - we have sets
+    // of fields that need to be evaluated together to give a proper ordering.
+    // For example, if you have;
+    // DFS (1, 3)
+    // Val 0
+    // DFS (1, 2)
+    // Val 50
+    // We want the second to be less than the first, but if we just go field
+    // by field, we will get to Val 0 < Val 50 and say the first is less than
+    // the second. We only want it to be less than if the DFS orders are equal.
+    //
+    // Each LLVM instruction only produces one value, and thus the lowest-level
+    // differentiator that really matters for the stack (and what we use as as a
+    // replacement) is the local dfs number.
+    // Everything else in the structure is instruction level, and only affects
+    // the order in which we will replace operands of a given instruction.
+    //
+    // For a given instruction (IE things with equal dfsin, dfsout, localnum),
+    // the order of replacement of uses does not matter.
+    // IE given,
+    //  a = 5
+    //  b = a + a
+    // When you hit b, you will have two valuedfs with the same dfsin, out, and
+    // localnum.
+    // The .val will be the same as well.
+    // The .u's will be different.
+    // You will replace both, and it does not matter what order you replace them
+    // in (IE whether you replace operand 2, then operand 1, or operand 1, then
+    // operand 2).
+    // Similarly for the case of same dfsin, dfsout, localnum, but different
+    // .val's
+    //  a = 5
+    //  b  = 6
+    //  c = a + b
+    // in c, we will a valuedfs for a, and one for b,with everything the same
+    // but .val  and .u.
+    // It does not matter what order we replace these operands in.
+    // You will always end up with the same IR, and this is guaranteed.
+    return std::tie(DFSIn, DFSOut, LocalNum, Val, U) <
+           std::tie(Other.DFSIn, Other.DFSOut, Other.LocalNum, Other.Val,
+                    Other.U);
+  }
+};
+
+void NewGVN::convertDenseToDFSOrdered(CongruenceClass::MemberSet &Dense,
+                                      std::vector<ValueDFS> &DFSOrderedSet) {
+  for (auto D : Dense) {
+    // First add the value.
+    BasicBlock *BB = getBlockForValue(D);
+    // Constants are handled prior to ever calling this function, so
+    // we should only be left with instructions as members.
+    assert(BB && "Should have figured out a basic block for value");
+    ValueDFS VD;
+
+    std::pair<int, int> DFSPair = DFSDomMap[BB];
+    assert(DFSPair.first != -1 && DFSPair.second != -1 && "Invalid DFS Pair");
+    VD.DFSIn = DFSPair.first;
+    VD.DFSOut = DFSPair.second;
+    VD.Val = D;
+    // If it's an instruction, use the real local dfs number.
+    if (auto *I = dyn_cast<Instruction>(D))
+      VD.LocalNum = InstrDFS[I];
+    else
+      llvm_unreachable("Should have been an instruction");
+
+    DFSOrderedSet.emplace_back(VD);
+
+    // Now add the users.
+    for (auto &U : D->uses()) {
+      if (auto *I = dyn_cast<Instruction>(U.getUser())) {
+        ValueDFS VD;
+        // Put the phi node uses in the incoming block.
+        BasicBlock *IBlock;
+        if (auto *P = dyn_cast<PHINode>(I)) {
+          IBlock = P->getIncomingBlock(U);
+          // Make phi node users appear last in the incoming block
+          // they are from.
+          VD.LocalNum = InstrDFS.size() + 1;
+        } else {
+          IBlock = I->getParent();
+          VD.LocalNum = InstrDFS[I];
+        }
+        std::pair<int, int> DFSPair = DFSDomMap[IBlock];
+        VD.DFSIn = DFSPair.first;
+        VD.DFSOut = DFSPair.second;
+        VD.U = &U;
+        DFSOrderedSet.emplace_back(VD);
+      }
+    }
+  }
+}
+
+static void patchReplacementInstruction(Instruction *I, Value *Repl) {
+  // Patch the replacement so that it is not more restrictive than the value
+  // being replaced.
+  auto *Op = dyn_cast<BinaryOperator>(I);
+  auto *ReplOp = dyn_cast<BinaryOperator>(Repl);
+
+  if (Op && ReplOp)
+    ReplOp->andIRFlags(Op);
+
+  if (auto *ReplInst = dyn_cast<Instruction>(Repl)) {
+    // FIXME: If both the original and replacement value are part of the
+    // same control-flow region (meaning that the execution of one
+    // guarentees the executation of the other), then we can combine the
+    // noalias scopes here and do better than the general conservative
+    // answer used in combineMetadata().
+
+    // In general, GVN unifies expressions over different control-flow
+    // regions, and so we need a conservative combination of the noalias
+    // scopes.
+    unsigned KnownIDs[] = {
+        LLVMContext::MD_tbaa,           LLVMContext::MD_alias_scope,
+        LLVMContext::MD_noalias,        LLVMContext::MD_range,
+        LLVMContext::MD_fpmath,         LLVMContext::MD_invariant_load,
+        LLVMContext::MD_invariant_group};
+    combineMetadata(ReplInst, I, KnownIDs);
+  }
+}
+
+static void patchAndReplaceAllUsesWith(Instruction *I, Value *Repl) {
+  patchReplacementInstruction(I, Repl);
+  I->replaceAllUsesWith(Repl);
+}
+
+void NewGVN::deleteInstructionsInBlock(BasicBlock *BB) {
+  DEBUG(dbgs() << "  BasicBlock Dead:" << *BB);
+  ++NumGVNBlocksDeleted;
+
+  // Check to see if there are non-terminating instructions to delete.
+  if (isa<TerminatorInst>(BB->begin()))
+    return;
+
+  // Delete the instructions backwards, as it has a reduced likelihood of having
+  // to update as many def-use and use-def chains. Start after the terminator.
+  auto StartPoint = BB->rbegin();
+  ++StartPoint;
+  // Note that we explicitly recalculate BB->rend() on each iteration,
+  // as it may change when we remove the first instruction.
+  for (BasicBlock::reverse_iterator I(StartPoint); I != BB->rend();) {
+    Instruction &Inst = *I++;
+    if (!Inst.use_empty())
+      Inst.replaceAllUsesWith(UndefValue::get(Inst.getType()));
+    if (isa<LandingPadInst>(Inst))
+      continue;
+
+    Inst.eraseFromParent();
+    ++NumGVNInstrDeleted;
+  }
+}
+
+void NewGVN::markInstructionForDeletion(Instruction *I) {
+  DEBUG(dbgs() << "Marking " << *I << " for deletion\n");
+  InstructionsToErase.insert(I);
+}
+
+void NewGVN::replaceInstruction(Instruction *I, Value *V) {
+
+  DEBUG(dbgs() << "Replacing " << *I << " with " << *V << "\n");
+  patchAndReplaceAllUsesWith(I, V);
+  // We save the actual erasing to avoid invalidating memory
+  // dependencies until we are done with everything.
+  markInstructionForDeletion(I);
+}
+
+namespace {
+
+// This is a stack that contains both the value and dfs info of where
+// that value is valid.
+class ValueDFSStack {
+public:
+  Value *back() const { return ValueStack.back(); }
+  std::pair<int, int> dfs_back() const { return DFSStack.back(); }
+
+  void push_back(Value *V, int DFSIn, int DFSOut) {
+    ValueStack.emplace_back(V);
+    DFSStack.emplace_back(DFSIn, DFSOut);
+  }
+  bool empty() const { return DFSStack.empty(); }
+  bool isInScope(int DFSIn, int DFSOut) const {
+    if (empty())
+      return false;
+    return DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second;
+  }
+
+  void popUntilDFSScope(int DFSIn, int DFSOut) {
+
+    // These two should always be in sync at this point.
+    assert(ValueStack.size() == DFSStack.size() &&
+           "Mismatch between ValueStack and DFSStack");
+    while (
+        !DFSStack.empty() &&
+        !(DFSIn >= DFSStack.back().first && DFSOut <= DFSStack.back().second)) {
+      DFSStack.pop_back();
+      ValueStack.pop_back();
+    }
+  }
+
+private:
+  SmallVector<Value *, 8> ValueStack;
+  SmallVector<std::pair<int, int>, 8> DFSStack;
+};
+}
+
+bool NewGVN::eliminateInstructions(Function &F) {
+  // This is a non-standard eliminator. The normal way to eliminate is
+  // to walk the dominator tree in order, keeping track of available
+  // values, and eliminating them.  However, this is mildly
+  // pointless. It requires doing lookups on every instruction,
+  // regardless of whether we will ever eliminate it.  For
+  // instructions part of most singleton congruence classes, we know we
+  // will never eliminate them.
+
+  // Instead, this eliminator looks at the congruence classes directly, sorts
+  // them into a DFS ordering of the dominator tree, and then we just
+  // perform elimination straight on the sets by walking the congruence
+  // class member uses in order, and eliminate the ones dominated by the
+  // last member.   This is worst case O(E log E) where E = number of
+  // instructions in a single congruence class.  In theory, this is all
+  // instructions.   In practice, it is much faster, as most instructions are
+  // either in singleton congruence classes or can't possibly be eliminated
+  // anyway (if there are no overlapping DFS ranges in class).
+  // When we find something not dominated, it becomes the new leader
+  // for elimination purposes.
+  // TODO: If we wanted to be faster, We could remove any members with no
+  // overlapping ranges while sorting, as we will never eliminate anything
+  // with those members, as they don't dominate anything else in our set.
+
+  bool AnythingReplaced = false;
+
+  // Since we are going to walk the domtree anyway, and we can't guarantee the
+  // DFS numbers are updated, we compute some ourselves.
+  DT->updateDFSNumbers();
+
+  for (auto &B : F) {
+    if (!ReachableBlocks.count(&B)) {
+      for (const auto S : successors(&B)) {
+        for (auto II = S->begin(); isa<PHINode>(II); ++II) {
+          auto &Phi = cast<PHINode>(*II);
+          DEBUG(dbgs() << "Replacing incoming value of " << *II << " for block "
+                       << getBlockName(&B)
+                       << " with undef due to it being unreachable\n");
+          for (auto &Operand : Phi.incoming_values())
+            if (Phi.getIncomingBlock(Operand) == &B)
+              Operand.set(UndefValue::get(Phi.getType()));
+        }
+      }
+    }
+    DomTreeNode *Node = DT->getNode(&B);
+    if (Node)
+      DFSDomMap[&B] = {Node->getDFSNumIn(), Node->getDFSNumOut()};
+  }
+
+  for (CongruenceClass *CC : CongruenceClasses) {
+    // FIXME: We should eventually be able to replace everything still
+    // in the initial class with undef, as they should be unreachable.
+    // Right now, initial still contains some things we skip value
+    // numbering of (UNREACHABLE's, for example).
+    if (CC == InitialClass || CC->Dead)
+      continue;
+    assert(CC->RepLeader && "We should have had a leader");
+
+    // If this is a leader that is always available, and it's a
+    // constant or has no equivalences, just replace everything with
+    // it. We then update the congruence class with whatever members
+    // are left.
+    if (alwaysAvailable(CC->RepLeader)) {
+      SmallPtrSet<Value *, 4> MembersLeft;
+      for (auto M : CC->Members) {
+
+        Value *Member = M;
+
+        // Void things have no uses we can replace.
+        if (Member == CC->RepLeader || Member->getType()->isVoidTy()) {
+          MembersLeft.insert(Member);
+          continue;
+        }
+
+        DEBUG(dbgs() << "Found replacement " << *(CC->RepLeader) << " for "
+                     << *Member << "\n");
+        // Due to equality propagation, these may not always be
+        // instructions, they may be real values.  We don't really
+        // care about trying to replace the non-instructions.
+        if (auto *I = dyn_cast<Instruction>(Member)) {
+          assert(CC->RepLeader != I &&
+                 "About to accidentally remove our leader");
+          replaceInstruction(I, CC->RepLeader);
+          AnythingReplaced = true;
+
+          continue;
+        } else {
+          MembersLeft.insert(I);
+        }
+      }
+      CC->Members.swap(MembersLeft);
+
+    } else {
+      DEBUG(dbgs() << "Eliminating in congruence class " << CC->ID << "\n");
+      // If this is a singleton, we can skip it.
+      if (CC->Members.size() != 1) {
+
+        // This is a stack because equality replacement/etc may place
+        // constants in the middle of the member list, and we want to use
+        // those constant values in preference to the current leader, over
+        // the scope of those constants.
+        ValueDFSStack EliminationStack;
+
+        // Convert the members to DFS ordered sets and then merge them.
+        std::vector<ValueDFS> DFSOrderedSet;
+        convertDenseToDFSOrdered(CC->Members, DFSOrderedSet);
+
+        // Sort the whole thing.
+        sort(DFSOrderedSet.begin(), DFSOrderedSet.end());
+
+        for (auto &C : DFSOrderedSet) {
+          int MemberDFSIn = C.DFSIn;
+          int MemberDFSOut = C.DFSOut;
+          Value *Member = C.Val;
+          Use *MemberUse = C.U;
+
+          // We ignore void things because we can't get a value from them.
+          if (Member && Member->getType()->isVoidTy())
+            continue;
+
+          if (EliminationStack.empty()) {
+            DEBUG(dbgs() << "Elimination Stack is empty\n");
+          } else {
+            DEBUG(dbgs() << "Elimination Stack Top DFS numbers are ("
+                         << EliminationStack.dfs_back().first << ","
+                         << EliminationStack.dfs_back().second << ")\n");
+          }
+          if (Member && isa<Constant>(Member))
+            assert(isa<Constant>(CC->RepLeader));
+
+          DEBUG(dbgs() << "Current DFS numbers are (" << MemberDFSIn << ","
+                       << MemberDFSOut << ")\n");
+          // First, we see if we are out of scope or empty.  If so,
+          // and there equivalences, we try to replace the top of
+          // stack with equivalences (if it's on the stack, it must
+          // not have been eliminated yet).
+          // Then we synchronize to our current scope, by
+          // popping until we are back within a DFS scope that
+          // dominates the current member.
+          // Then, what happens depends on a few factors
+          // If the stack is now empty, we need to push
+          // If we have a constant or a local equivalence we want to
+          // start using, we also push.
+          // Otherwise, we walk along, processing members who are
+          // dominated by this scope, and eliminate them.
+          bool ShouldPush =
+              Member && (EliminationStack.empty() || isa<Constant>(Member));
+          bool OutOfScope =
+              !EliminationStack.isInScope(MemberDFSIn, MemberDFSOut);
+
+          if (OutOfScope || ShouldPush) {
+            // Sync to our current scope.
+            EliminationStack.popUntilDFSScope(MemberDFSIn, MemberDFSOut);
+            ShouldPush |= Member && EliminationStack.empty();
+            if (ShouldPush) {
+              EliminationStack.push_back(Member, MemberDFSIn, MemberDFSOut);
+            }
+          }
+
+          // If we get to this point, and the stack is empty we must have a use
+          // with nothing we can use to eliminate it, just skip it.
+          if (EliminationStack.empty())
+            continue;
+
+          // Skip the Value's, we only want to eliminate on their uses.
+          if (Member)
+            continue;
+          Value *Result = EliminationStack.back();
+
+          // Don't replace our existing users with ourselves, and don't replace
+          // phi node arguments with the result of the same phi node.
+          // IE tmp = phi(tmp11, undef); tmp11 = foo -> tmp = phi(tmp, undef)
+          if (MemberUse->get() == Result ||
+              (isa<PHINode>(Result) && MemberUse->getUser() == Result))
+            continue;
+
+          DEBUG(dbgs() << "Found replacement " << *Result << " for "
+                       << *MemberUse->get() << " in " << *(MemberUse->getUser())
+                       << "\n");
+
+          // If we replaced something in an instruction, handle the patching of
+          // metadata.
+          if (auto *ReplacedInst = dyn_cast<Instruction>(MemberUse->get()))
+            patchReplacementInstruction(ReplacedInst, Result);
+
+          assert(isa<Instruction>(MemberUse->getUser()));
+          MemberUse->set(Result);
+          AnythingReplaced = true;
+        }
+      }
+    }
+
+    // Cleanup the congruence class.
+    SmallPtrSet<Value *, 4> MembersLeft;
+    for (Value * Member : CC->Members) {
+      if (Member->getType()->isVoidTy()) {
+        MembersLeft.insert(Member);
+        continue;
+      }
+
+      if (auto *MemberInst = dyn_cast<Instruction>(Member)) {
+        if (isInstructionTriviallyDead(MemberInst)) {
+          // TODO: Don't mark loads of undefs.
+          markInstructionForDeletion(MemberInst);
+          continue;
+        }
+      }
+      MembersLeft.insert(Member);
+    }
+    CC->Members.swap(MembersLeft);
+  }
+
+  return AnythingReplaced;
+}
diff --git a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
index c4b3e3464f40..1a7ddc9585ba 100644
--- a/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp
@@ -123,7 +123,7 @@ static bool runPartiallyInlineLibCalls(Function &F, TargetLibraryInfo *TLI,
 }
 
 PreservedAnalyses
-PartiallyInlineLibCallsPass::run(Function &F, AnalysisManager<Function> &AM) {
+PartiallyInlineLibCallsPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   if (!runPartiallyInlineLibCalls(F, &TLI, &TTI))
diff --git a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
index e42e2c6d8aa4..181a324861e7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp
@@ -145,7 +145,8 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode1,
   return nullptr;
 }
 
-void ReassociatePass::BuildRankMap(Function &F) {
+void ReassociatePass::BuildRankMap(Function &F,
+                                   ReversePostOrderTraversal<Function*> &RPOT) {
   unsigned i = 2;
 
   // Assign distinct ranks to function arguments.
@@ -154,7 +155,7 @@ void ReassociatePass::BuildRankMap(Function &F) {
     DEBUG(dbgs() << "Calculated Rank[" << I->getName() << "] = " << i << "\n");
   }
 
-  ReversePostOrderTraversal<Function *> RPOT(&F);
+  // Traverse basic blocks in ReversePostOrder
   for (BasicBlock *BB : RPOT) {
     unsigned BBRank = RankMap[BB] = ++i << 16;
 
@@ -507,9 +508,10 @@ static bool LinearizeExprTree(BinaryOperator *I,
           continue;
         }
         // No uses outside the expression, try morphing it.
-      } else if (It != Leaves.end()) {
+      } else {
         // Already in the leaf map.
-        assert(Visited.count(Op) && "In leaf map but not visited!");
+        assert(It != Leaves.end() && Visited.count(Op) &&
+               "In leaf map but not visited!");
 
         // Update the number of paths to the leaf.
         IncorporateWeight(It->second, Weight, Opcode);
@@ -1776,6 +1778,12 @@ Value *ReassociatePass::OptimizeMul(BinaryOperator *I,
     return nullptr; // All distinct factors, so nothing left for us to do.
 
   IRBuilder<> Builder(I);
+  // The reassociate transformation for FP operations is performed only
+  // if unsafe algebra is permitted by FastMathFlags. Propagate those flags
+  // to the newly generated operations.
+  if (auto FPI = dyn_cast<FPMathOperator>(I))
+    Builder.setFastMathFlags(FPI->getFastMathFlags());
+
   Value *V = buildMinimalMultiplyDAG(Builder, Factors);
   if (Ops.empty())
     return V;
@@ -1863,6 +1871,8 @@ void ReassociatePass::RecursivelyEraseDeadInsts(
 /// Zap the given instruction, adding interesting operands to the work list.
 void ReassociatePass::EraseInst(Instruction *I) {
   assert(isInstructionTriviallyDead(I) && "Trivially dead instructions only!");
+  DEBUG(dbgs() << "Erasing dead inst: "; I->dump());
+
   SmallVector<Value*, 8> Ops(I->op_begin(), I->op_end());
   // Erase the dead instruction.
   ValueRankMap.erase(I);
@@ -2172,11 +2182,19 @@ void ReassociatePass::ReassociateExpression(BinaryOperator *I) {
 }
 
 PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
+  // Get the functions basic blocks in Reverse Post Order. This order is used by
+  // BuildRankMap to pre calculate ranks correctly. It also excludes dead basic
+  // blocks (it has been seen that the analysis in this pass could hang when
+  // analysing dead basic blocks).
+  ReversePostOrderTraversal<Function *> RPOT(&F);
+
   // Calculate the rank map for F.
-  BuildRankMap(F);
+  BuildRankMap(F, RPOT);
 
   MadeChange = false;
-  for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
+  // Traverse the same blocks that was analysed by BuildRankMap.
+  for (BasicBlock *BI : RPOT) {
+    assert(RankMap.count(&*BI) && "BB should be ranked.");
     // Optimize every instruction in the basic block.
     for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;)
       if (isInstructionTriviallyDead(&*II)) {
@@ -2196,8 +2214,10 @@ PreservedAnalyses ReassociatePass::run(Function &F, FunctionAnalysisManager &) {
     // trivially dead instructions have been removed.
     while (!ToRedo.empty()) {
       Instruction *I = ToRedo.pop_back_val();
-      if (isInstructionTriviallyDead(I))
+      if (isInstructionTriviallyDead(I)) {
         RecursivelyEraseDeadInsts(I, ToRedo);
+        MadeChange = true;
+      }
     }
 
     // Now that we have removed dead instructions, we can reoptimize the
diff --git a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index bab39a32677f..1de742050cb3 100644
--- a/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -453,7 +453,7 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
   if (isa<CallInst>(I) || isa<InvokeInst>(I))
     return BaseDefiningValueResult(I, true);
 
-  // I have absolutely no idea how to implement this part yet.  It's not
+  // TODO: I have absolutely no idea how to implement this part yet.  It's not
   // necessarily hard, I just haven't really looked at it yet.
   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
 
@@ -676,7 +676,8 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
 #ifndef NDEBUG
   auto isExpectedBDVType = [](Value *BDV) {
     return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
-           isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV);
+           isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) ||
+           isa<ShuffleVectorInst>(BDV);
   };
 #endif
 
@@ -719,9 +720,11 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
       } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
         visitIncomingValue(IE->getOperand(0)); // vector operand
         visitIncomingValue(IE->getOperand(1)); // scalar operand
-      } else {
-        // There is one known class of instructions we know we don't handle.
-        assert(isa<ShuffleVectorInst>(Current));
+      } else if (auto *SV = dyn_cast<ShuffleVectorInst>(Current)) {
+        visitIncomingValue(SV->getOperand(0));
+        visitIncomingValue(SV->getOperand(1));
+      }
+      else {
         llvm_unreachable("Unimplemented instruction case");
       }
     }
@@ -778,12 +781,17 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
         // useful in that it drives us to conflict if our input is.
         NewState =
             meetBDVState(NewState, getStateForInput(EE->getVectorOperand()));
-      } else {
+      } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)){
         // Given there's a inherent type mismatch between the operands, will
         // *always* produce Conflict.
-        auto *IE = cast<InsertElementInst>(BDV);
         NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(0)));
         NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(1)));
+      } else {
+        // The only instance this does not return a Conflict is when both the
+        // vector operands are the same vector.
+        auto *SV = cast<ShuffleVectorInst>(BDV);
+        NewState = meetBDVState(NewState, getStateForInput(SV->getOperand(0)));
+        NewState = meetBDVState(NewState, getStateForInput(SV->getOperand(1)));
       }
 
       BDVState OldState = States[BDV];
@@ -855,13 +863,18 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
         std::string Name = suffixed_name_or(I, ".base", "base_ee");
         return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
                                           EE);
-      } else {
-        auto *IE = cast<InsertElementInst>(I);
+      } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
         UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
         UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
         std::string Name = suffixed_name_or(I, ".base", "base_ie");
         return InsertElementInst::Create(VecUndef, ScalarUndef,
                                          IE->getOperand(2), Name, IE);
+      } else {
+        auto *SV = cast<ShuffleVectorInst>(I);
+        UndefValue *VecUndef = UndefValue::get(SV->getOperand(0)->getType());
+        std::string Name = suffixed_name_or(I, ".base", "base_sv");
+        return new ShuffleVectorInst(VecUndef, VecUndef, SV->getOperand(2),
+                                     Name, SV);
       }
     };
     Instruction *BaseInst = MakeBaseInstPlaceholder(I);
@@ -963,8 +976,7 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
       // Find the instruction which produces the base for each input.  We may
       // need to insert a bitcast.
       BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
-    } else {
-      auto *BaseIE = cast<InsertElementInst>(State.getBaseValue());
+    } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){
       auto *BdvIE = cast<InsertElementInst>(BDV);
       auto UpdateOperand = [&](int OperandIdx) {
         Value *InVal = BdvIE->getOperand(OperandIdx);
@@ -973,6 +985,16 @@ static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
       };
       UpdateOperand(0); // vector operand
       UpdateOperand(1); // scalar operand
+    } else {
+      auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue());
+      auto *BdvSV = cast<ShuffleVectorInst>(BDV);
+      auto UpdateOperand = [&](int OperandIdx) {
+        Value *InVal = BdvSV->getOperand(OperandIdx);
+        Value *Base = getBaseForInput(InVal, BaseSV);
+        BaseSV->setOperand(OperandIdx, Base);
+      };
+      UpdateOperand(0); // vector operand
+      UpdateOperand(1); // vector operand
     }
   }
 
@@ -1154,7 +1176,7 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
     return;
 
   auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
-    auto ValIt = std::find(LiveVec.begin(), LiveVec.end(), Val);
+    auto ValIt = find(LiveVec, Val);
     assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
     size_t Index = std::distance(LiveVec.begin(), ValIt);
     assert(Index < LiveVec.size() && "Bug in std::find?");
@@ -1273,6 +1295,24 @@ public:
 };
 }
 
+static StringRef getDeoptLowering(CallSite CS) {
+  const char *DeoptLowering = "deopt-lowering";
+  if (CS.hasFnAttr(DeoptLowering)) {
+    // FIXME: CallSite has a *really* confusing interface around attributes
+    // with values.  
+    const AttributeSet &CSAS = CS.getAttributes();
+    if (CSAS.hasAttribute(AttributeSet::FunctionIndex,
+                          DeoptLowering))
+      return CSAS.getAttribute(AttributeSet::FunctionIndex,
+                               DeoptLowering).getValueAsString();
+    Function *F = CS.getCalledFunction();
+    assert(F && F->hasFnAttribute(DeoptLowering));
+    return F->getFnAttribute(DeoptLowering).getValueAsString();
+  }
+  return "live-through";
+}
+    
+
 static void
 makeStatepointExplicitImpl(const CallSite CS, /* to replace */
                            const SmallVectorImpl<Value *> &BasePtrs,
@@ -1314,6 +1354,14 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
   if (SD.StatepointID)
     StatepointID = *SD.StatepointID;
 
+  // Pass through the requested lowering if any.  The default is live-through.
+  StringRef DeoptLowering = getDeoptLowering(CS);
+  if (DeoptLowering.equals("live-in"))
+    Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
+  else {
+    assert(DeoptLowering.equals("live-through") && "Unsupported value!");
+  }
+
   Value *CallTarget = CS.getCalledValue();
   if (Function *F = dyn_cast<Function>(CallTarget)) {
     if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
@@ -1347,7 +1395,7 @@ makeStatepointExplicitImpl(const CallSite CS, /* to replace */
         StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
         TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
 
-    Call->setTailCall(ToReplace->isTailCall());
+    Call->setTailCallKind(ToReplace->getTailCallKind());
     Call->setCallingConv(ToReplace->getCallingConv());
 
     // Currently we will fail on parameter attributes and on certain
@@ -1740,9 +1788,8 @@ static void relocationViaAlloca(
 /// tests in ways which make them less useful in testing fused safepoints.
 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
   SmallSet<T, 8> Seen;
-  Vec.erase(std::remove_if(Vec.begin(), Vec.end(), [&](const T &V) {
-              return !Seen.insert(V).second;
-            }), Vec.end());
+  Vec.erase(remove_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }),
+            Vec.end());
 }
 
 /// Insert holders so that each Value is obviously live through the entire
@@ -1784,38 +1831,33 @@ static void findLiveReferences(
 }
 
 // Helper function for the "rematerializeLiveValues". It walks use chain
-// starting from the "CurrentValue" until it meets "BaseValue". Only "simple"
-// values are visited (currently it is GEP's and casts). Returns true if it
-// successfully reached "BaseValue" and false otherwise.
-// Fills "ChainToBase" array with all visited values. "BaseValue" is not
-// recorded.
-static bool findRematerializableChainToBasePointer(
+// starting from the "CurrentValue" until it reaches the root of the chain, i.e.
+// the base or a value it cannot process. Only "simple" values are processed
+// (currently it is GEP's and casts). The returned root is  examined by the
+// callers of findRematerializableChainToBasePointer.  Fills "ChainToBase" array
+// with all visited values.
+static Value* findRematerializableChainToBasePointer(
   SmallVectorImpl<Instruction*> &ChainToBase,
-  Value *CurrentValue, Value *BaseValue) {
-
-  // We have found a base value
-  if (CurrentValue == BaseValue) {
-    return true;
-  }
+  Value *CurrentValue) {
 
   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
     ChainToBase.push_back(GEP);
     return findRematerializableChainToBasePointer(ChainToBase,
-                                                  GEP->getPointerOperand(),
-                                                  BaseValue);
+                                                  GEP->getPointerOperand());
   }
 
   if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
     if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
-      return false;
+      return CI;
 
     ChainToBase.push_back(CI);
     return findRematerializableChainToBasePointer(ChainToBase,
-                                                  CI->getOperand(0), BaseValue);
+                                                  CI->getOperand(0));
   }
 
-  // Not supported instruction in the chain
-  return false;
+  // We have reached the root of the chain, which is either equal to the base or
+  // is the first unsupported value along the use chain.
+  return CurrentValue;
 }
 
 // Helper function for the "rematerializeLiveValues". Compute cost of the use
@@ -1852,6 +1894,34 @@ chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
   return Cost;
 }
 
+static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
+
+  unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
+  if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
+      OrigRootPhi.getParent() != AlternateRootPhi.getParent())
+    return false;
+  // Map of incoming values and their corresponding basic blocks of
+  // OrigRootPhi.
+  SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues;
+  for (unsigned i = 0; i < PhiNum; i++)
+    CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] =
+        OrigRootPhi.getIncomingBlock(i);
+
+  // Both current and base PHIs should have same incoming values and
+  // the same basic blocks corresponding to the incoming values.
+  for (unsigned i = 0; i < PhiNum; i++) {
+    auto CIVI =
+        CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i));
+    if (CIVI == CurrentIncomingValues.end())
+      return false;
+    BasicBlock *CurrentIncomingBB = CIVI->second;
+    if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i))
+      return false;
+  }
+  return true;
+
+}
+
 // From the statepoint live set pick values that are cheaper to recompute then
 // to relocate. Remove this values from the live set, rematerialize them after
 // statepoint and record them in "Info" structure. Note that similar to
@@ -1869,16 +1939,38 @@ static void rematerializeLiveValues(CallSite CS,
     // For each live pointer find it's defining chain
     SmallVector<Instruction *, 3> ChainToBase;
     assert(Info.PointerToBase.count(LiveValue));
-    bool FoundChain =
+    Value *RootOfChain =
       findRematerializableChainToBasePointer(ChainToBase,
-                                             LiveValue,
-                                             Info.PointerToBase[LiveValue]);
+                                             LiveValue);
+
     // Nothing to do, or chain is too long
-    if (!FoundChain ||
-        ChainToBase.size() == 0 ||
+    if ( ChainToBase.size() == 0 ||
         ChainToBase.size() > ChainLengthThreshold)
       continue;
 
+    // Handle the scenario where the RootOfChain is not equal to the
+    // Base Value, but they are essentially the same phi values.
+    if (RootOfChain != Info.PointerToBase[LiveValue]) {
+      PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
+      PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]);
+      if (!OrigRootPhi || !AlternateRootPhi)
+        continue;
+      // PHI nodes that have the same incoming values, and belonging to the same
+      // basic blocks are essentially the same SSA value.  When the original phi
+      // has incoming values with different base pointers, the original phi is
+      // marked as conflict, and an additional `AlternateRootPhi` with the same
+      // incoming values get generated by the findBasePointer function. We need
+      // to identify the newly generated AlternateRootPhi (.base version of phi)
+      // and RootOfChain (the original phi node itself) are the same, so that we
+      // can rematerialize the gep and casts. This is a workaround for the
+      // deficieny in the findBasePointer algorithm.
+      if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
+        continue;
+      // Now that the phi nodes are proved to be the same, assert that
+      // findBasePointer's newly generated AlternateRootPhi is present in the
+      // liveset of the call.
+      assert(Info.LiveSet.count(AlternateRootPhi));
+    }
     // Compute cost of this chain
     unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
     // TODO: We can also account for cases when we will be able to remove some
@@ -1906,7 +1998,8 @@ static void rematerializeLiveValues(CallSite CS,
     // Utility function which clones all instructions from "ChainToBase"
     // and inserts them before "InsertBefore". Returns rematerialized value
     // which should be used after statepoint.
-    auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) {
+    auto rematerializeChain = [&ChainToBase](
+        Instruction *InsertBefore, Value *RootOfChain, Value *AlternateLiveBase) {
       Instruction *LastClonedValue = nullptr;
       Instruction *LastValue = nullptr;
       for (Instruction *Instr: ChainToBase) {
@@ -1926,14 +2019,24 @@ static void rematerializeLiveValues(CallSite CS,
           assert(LastValue);
           ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
 #ifndef NDEBUG
-          // Assert that cloned instruction does not use any instructions from
-          // this chain other than LastClonedValue
           for (auto OpValue : ClonedValue->operand_values()) {
-            assert(std::find(ChainToBase.begin(), ChainToBase.end(), OpValue) ==
-                       ChainToBase.end() &&
+            // Assert that cloned instruction does not use any instructions from
+            // this chain other than LastClonedValue
+            assert(!is_contained(ChainToBase, OpValue) &&
                    "incorrect use in rematerialization chain");
+            // Assert that the cloned instruction does not use the RootOfChain
+            // or the AlternateLiveBase.
+            assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
           }
 #endif
+        } else {
+          // For the first instruction, replace the use of unrelocated base i.e.
+          // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
+          // live set. They have been proved to be the same PHI nodes.  Note
+          // that the *only* use of the RootOfChain in the ChainToBase list is
+          // the first Value in the list.
+          if (RootOfChain != AlternateLiveBase)
+            ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase);
         }
 
         LastClonedValue = ClonedValue;
@@ -1948,7 +2051,8 @@ static void rematerializeLiveValues(CallSite CS,
     if (CS.isCall()) {
       Instruction *InsertBefore = CS.getInstruction()->getNextNode();
       assert(InsertBefore);
-      Instruction *RematerializedValue = rematerializeChain(InsertBefore);
+      Instruction *RematerializedValue = rematerializeChain(
+          InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
       Info.RematerializedValues[RematerializedValue] = LiveValue;
     } else {
       InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
@@ -1958,10 +2062,10 @@ static void rematerializeLiveValues(CallSite CS,
       Instruction *UnwindInsertBefore =
           &*Invoke->getUnwindDest()->getFirstInsertionPt();
 
-      Instruction *NormalRematerializedValue =
-          rematerializeChain(NormalInsertBefore);
-      Instruction *UnwindRematerializedValue =
-          rematerializeChain(UnwindInsertBefore);
+      Instruction *NormalRematerializedValue = rematerializeChain(
+          NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
+      Instruction *UnwindRematerializedValue = rematerializeChain(
+          UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
 
       Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
       Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
@@ -2268,8 +2372,7 @@ static bool shouldRewriteStatepointsIn(Function &F) {
 
 void RewriteStatepointsForGC::stripNonValidAttributes(Module &M) {
 #ifndef NDEBUG
-  assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) &&
-         "precondition!");
+  assert(any_of(M, shouldRewriteStatepointsIn) && "precondition!");
 #endif
 
   for (Function &F : M)
@@ -2546,8 +2649,8 @@ static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
   // call result is not live (normal), nor are it's arguments
   // (unless they're used again later).  This adjustment is
   // specifically what we need to relocate
-  BasicBlock::reverse_iterator rend(Inst->getIterator());
-  computeLiveInValues(BB->rbegin(), rend, LiveOut);
+  computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(),
+                      LiveOut);
   LiveOut.remove(Inst);
   Out.insert(LiveOut.begin(), LiveOut.end());
 }
diff --git a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
index f74f28abf85a..8a6be97d08c7 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -242,7 +242,7 @@ public:
   /// this method must be called.
   void AddTrackedFunction(Function *F) {
     // Add an entry, F -> undef.
-    if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+    if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
       MRVFunctionsTracked.insert(F);
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
         TrackedMultipleRetVals.insert(std::make_pair(std::make_pair(F, i),
@@ -272,7 +272,7 @@ public:
 
   std::vector<LatticeVal> getStructLatticeValueFor(Value *V) const {
     std::vector<LatticeVal> StructValues;
-    StructType *STy = dyn_cast<StructType>(V->getType());
+    auto *STy = dyn_cast<StructType>(V->getType());
     assert(STy && "getStructLatticeValueFor() can be called only on structs");
     for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
       auto I = StructValueState.find(std::make_pair(V, i));
@@ -300,23 +300,44 @@ public:
     return TrackedGlobals;
   }
 
+  /// getMRVFunctionsTracked - Get the set of functions which return multiple
+  /// values tracked by the pass.
+  const SmallPtrSet<Function *, 16> getMRVFunctionsTracked() {
+    return MRVFunctionsTracked;
+  }
+
   void markOverdefined(Value *V) {
-    assert(!V->getType()->isStructTy() && "Should use other method");
+    assert(!V->getType()->isStructTy() &&
+           "structs should use markAnythingOverdefined");
     markOverdefined(ValueState[V], V);
   }
 
   /// markAnythingOverdefined - Mark the specified value overdefined.  This
   /// works with both scalars and structs.
   void markAnythingOverdefined(Value *V) {
-    if (StructType *STy = dyn_cast<StructType>(V->getType()))
+    if (auto *STy = dyn_cast<StructType>(V->getType()))
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
         markOverdefined(getStructValueState(V, i), V);
     else
       markOverdefined(V);
   }
 
+  // isStructLatticeConstant - Return true if all the lattice values
+  // corresponding to elements of the structure are not overdefined,
+  // false otherwise.
+  bool isStructLatticeConstant(Function *F, StructType *STy) {
+    for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
+      const auto &It = TrackedMultipleRetVals.find(std::make_pair(F, i));
+      assert(It != TrackedMultipleRetVals.end());
+      LatticeVal LV = It->second;
+      if (LV.isOverdefined())
+        return false;
+    }
+    return true;
+  }
+
 private:
-  // pushToWorkList - Helper for markConstant/markForcedConstant
+  // pushToWorkList - Helper for markConstant/markForcedConstant/markOverdefined
   void pushToWorkList(LatticeVal &IV, Value *V) {
     if (IV.isOverdefined())
       return OverdefinedInstWorkList.push_back(V);
@@ -334,12 +355,12 @@ private:
   }
 
   void markConstant(Value *V, Constant *C) {
-    assert(!V->getType()->isStructTy() && "Should use other method");
+    assert(!V->getType()->isStructTy() && "structs should use mergeInValue");
     markConstant(ValueState[V], V, C);
   }
 
   void markForcedConstant(Value *V, Constant *C) {
-    assert(!V->getType()->isStructTy() && "Should use other method");
+    assert(!V->getType()->isStructTy() && "structs should use mergeInValue");
     LatticeVal &IV = ValueState[V];
     IV.markForcedConstant(C);
     DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
@@ -354,12 +375,12 @@ private:
     if (!IV.markOverdefined()) return;
 
     DEBUG(dbgs() << "markOverdefined: ";
-          if (Function *F = dyn_cast<Function>(V))
+          if (auto *F = dyn_cast<Function>(V))
             dbgs() << "Function '" << F->getName() << "'\n";
           else
             dbgs() << *V << '\n');
     // Only instructions go on the work list
-    OverdefinedInstWorkList.push_back(V);
+    pushToWorkList(IV, V);
   }
 
   void mergeInValue(LatticeVal &IV, Value *V, LatticeVal MergeWithV) {
@@ -374,7 +395,8 @@ private:
   }
 
   void mergeInValue(Value *V, LatticeVal MergeWithV) {
-    assert(!V->getType()->isStructTy() && "Should use other method");
+    assert(!V->getType()->isStructTy() &&
+           "non-structs should use markConstant");
     mergeInValue(ValueState[V], V, MergeWithV);
   }
 
@@ -392,7 +414,7 @@ private:
     if (!I.second)
       return LV;  // Common case, already in the map.
 
-    if (Constant *C = dyn_cast<Constant>(V)) {
+    if (auto *C = dyn_cast<Constant>(V)) {
       // Undef values remain unknown.
       if (!isa<UndefValue>(V))
         LV.markConstant(C);          // Constants are constant
@@ -418,7 +440,7 @@ private:
     if (!I.second)
       return LV;  // Common case, already in the map.
 
-    if (Constant *C = dyn_cast<Constant>(V)) {
+    if (auto *C = dyn_cast<Constant>(V)) {
       Constant *Elt = C->getAggregateElement(i);
 
       if (!Elt)
@@ -527,7 +549,7 @@ private:
 
   void visitInstruction(Instruction &I) {
     // If a new instruction is added to LLVM that we don't handle.
-    dbgs() << "SCCP: Don't know how to handle: " << I << '\n';
+    DEBUG(dbgs() << "SCCP: Don't know how to handle: " << I << '\n');
     markAnythingOverdefined(&I);   // Just in case
   }
 };
@@ -541,7 +563,7 @@ private:
 void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
                                        SmallVectorImpl<bool> &Succs) {
   Succs.resize(TI.getNumSuccessors());
-  if (BranchInst *BI = dyn_cast<BranchInst>(&TI)) {
+  if (auto *BI = dyn_cast<BranchInst>(&TI)) {
     if (BI->isUnconditional()) {
       Succs[0] = true;
       return;
@@ -568,7 +590,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
     return;
   }
 
-  if (SwitchInst *SI = dyn_cast<SwitchInst>(&TI)) {
+  if (auto *SI = dyn_cast<SwitchInst>(&TI)) {
     if (!SI->getNumCases()) {
       Succs[0] = true;
       return;
@@ -594,9 +616,7 @@ void SCCPSolver::getFeasibleSuccessors(TerminatorInst &TI,
     return;
   }
 
-#ifndef NDEBUG
-  dbgs() << "Unknown terminator instruction: " << TI << '\n';
-#endif
+  DEBUG(dbgs() << "Unknown terminator instruction: " << TI << '\n');
   llvm_unreachable("SCCP: Don't know how to handle this terminator!");
 }
 
@@ -612,7 +632,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
 
   // Check to make sure this edge itself is actually feasible now.
   TerminatorInst *TI = From->getTerminator();
-  if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+  if (auto *BI = dyn_cast<BranchInst>(TI)) {
     if (BI->isUnconditional())
       return true;
 
@@ -632,7 +652,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
   if (TI->isExceptional())
     return true;
 
-  if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+  if (auto *SI = dyn_cast<SwitchInst>(TI)) {
     if (SI->getNumCases() < 1)
       return true;
 
@@ -650,9 +670,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
   if (isa<IndirectBrInst>(TI))
     return true;
 
-#ifndef NDEBUG
-  dbgs() << "Unknown terminator instruction: " << *TI << '\n';
-#endif
+  DEBUG(dbgs() << "Unknown terminator instruction: " << *TI << '\n');
   llvm_unreachable("SCCP: Don't know how to handle this terminator!");
 }
 
@@ -747,7 +765,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
 
   // Handle functions that return multiple values.
   if (!TrackedMultipleRetVals.empty()) {
-    if (StructType *STy = dyn_cast<StructType>(ResultOp->getType()))
+    if (auto *STy = dyn_cast<StructType>(ResultOp->getType()))
       if (MRVFunctionsTracked.count(F))
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
           mergeInValue(TrackedMultipleRetVals[std::make_pair(F, i)], F,
@@ -806,7 +824,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
 }
 
 void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
-  StructType *STy = dyn_cast<StructType>(IVI.getType());
+  auto *STy = dyn_cast<StructType>(IVI.getType());
   if (!STy)
     return markOverdefined(&IVI);
 
@@ -898,7 +916,8 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
 
   // If this is an AND or OR with 0 or -1, it doesn't matter that the other
   // operand is overdefined.
-  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Or) {
+  if (I.getOpcode() == Instruction::And || I.getOpcode() == Instruction::Mul ||
+      I.getOpcode() == Instruction::Or) {
     LatticeVal *NonOverdefVal = nullptr;
     if (!V1State.isOverdefined())
       NonOverdefVal = &V1State;
@@ -906,25 +925,19 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
       NonOverdefVal = &V2State;
 
     if (NonOverdefVal) {
-      if (NonOverdefVal->isUnknown()) {
-        // Could annihilate value.
-        if (I.getOpcode() == Instruction::And)
-          markConstant(IV, &I, Constant::getNullValue(I.getType()));
-        else if (VectorType *PT = dyn_cast<VectorType>(I.getType()))
-          markConstant(IV, &I, Constant::getAllOnesValue(PT));
-        else
-          markConstant(IV, &I,
-                       Constant::getAllOnesValue(I.getType()));
+      if (NonOverdefVal->isUnknown())
         return;
-      }
 
-      if (I.getOpcode() == Instruction::And) {
+      if (I.getOpcode() == Instruction::And ||
+          I.getOpcode() == Instruction::Mul) {
         // X and 0 = 0
+        // X * 0 = 0
         if (NonOverdefVal->getConstant()->isNullValue())
           return markConstant(IV, &I, NonOverdefVal->getConstant());
       } else {
+        // X or -1 = -1
         if (ConstantInt *CI = NonOverdefVal->getConstantInt())
-          if (CI->isAllOnesValue())     // X or -1 = -1
+          if (CI->isAllOnesValue())
             return markConstant(IV, &I, NonOverdefVal->getConstant());
       }
     }
@@ -1044,7 +1057,7 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
     return;
 
   // Transform load (constant global) into the value loaded.
-  if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
+  if (auto *GV = dyn_cast<GlobalVariable>(Ptr)) {
     if (!TrackedGlobals.empty()) {
       // If we are tracking this global, merge in the known value for it.
       DenseMap<GlobalVariable*, LatticeVal>::iterator It =
@@ -1132,7 +1145,7 @@ CallOverdefined:
         continue;
       }
 
-      if (StructType *STy = dyn_cast<StructType>(AI->getType())) {
+      if (auto *STy = dyn_cast<StructType>(AI->getType())) {
         for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
           LatticeVal CallArg = getStructValueState(*CAI, i);
           mergeInValue(getStructValueState(&*AI, i), &*AI, CallArg);
@@ -1144,7 +1157,7 @@ CallOverdefined:
   }
 
   // If this is a single/zero retval case, see if we're tracking the function.
-  if (StructType *STy = dyn_cast<StructType>(F->getReturnType())) {
+  if (auto *STy = dyn_cast<StructType>(F->getReturnType())) {
     if (!MRVFunctionsTracked.count(F))
       goto CallOverdefined;  // Not tracking this callee.
 
@@ -1182,7 +1195,7 @@ void SCCPSolver::Solve() {
       // Update all of the users of this instruction's value.
       //
       for (User *U : I->users())
-        if (Instruction *UI = dyn_cast<Instruction>(U))
+        if (auto *UI = dyn_cast<Instruction>(U))
           OperandChangedState(UI);
     }
 
@@ -1201,7 +1214,7 @@ void SCCPSolver::Solve() {
       //
       if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
         for (User *U : I->users())
-          if (Instruction *UI = dyn_cast<Instruction>(U))
+          if (auto *UI = dyn_cast<Instruction>(U))
             OperandChangedState(UI);
     }
 
@@ -1246,7 +1259,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       // Look for instructions which produce undef values.
       if (I.getType()->isVoidTy()) continue;
 
-      if (StructType *STy = dyn_cast<StructType>(I.getType())) {
+      if (auto *STy = dyn_cast<StructType>(I.getType())) {
         // Only a few things that can be structs matter for undef.
 
         // Tracked calls must never be marked overdefined in ResolvedUndefsIn.
@@ -1386,8 +1399,8 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
               break;
         }
 
-        // undef >>a X -> all ones
-        markForcedConstant(&I, Constant::getAllOnesValue(ITy));
+        // undef >>a X -> 0
+        markForcedConstant(&I, Constant::getNullValue(ITy));
         return true;
       case Instruction::LShr:
       case Instruction::Shl:
@@ -1467,7 +1480,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
     // we force the branch to go one way or the other to make the successor
     // values live.  It doesn't really matter which way we force it.
     TerminatorInst *TI = BB.getTerminator();
-    if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
+    if (auto *BI = dyn_cast<BranchInst>(TI)) {
       if (!BI->isConditional()) continue;
       if (!getValueState(BI->getCondition()).isUnknown())
         continue;
@@ -1488,7 +1501,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
       return true;
     }
 
-    if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
+    if (auto *SI = dyn_cast<SwitchInst>(TI)) {
       if (!SI->getNumCases() || !getValueState(SI->getCondition()).isUnknown())
         continue;
 
@@ -1512,11 +1525,10 @@ static bool tryToReplaceWithConstant(SCCPSolver &Solver, Value *V) {
   Constant *Const = nullptr;
   if (V->getType()->isStructTy()) {
     std::vector<LatticeVal> IVs = Solver.getStructLatticeValueFor(V);
-    if (std::any_of(IVs.begin(), IVs.end(),
-                    [](LatticeVal &LV) { return LV.isOverdefined(); }))
+    if (any_of(IVs, [](const LatticeVal &LV) { return LV.isOverdefined(); }))
       return false;
     std::vector<Constant *> ConstVals;
-    StructType *ST = dyn_cast<StructType>(V->getType());
+    auto *ST = dyn_cast<StructType>(V->getType());
     for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i) {
       LatticeVal V = IVs[i];
       ConstVals.push_back(V.isConstant()
@@ -1599,7 +1611,7 @@ static bool runSCCP(Function &F, const DataLayout &DL,
   return MadeChanges;
 }
 
-PreservedAnalyses SCCPPass::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses SCCPPass::run(Function &F, FunctionAnalysisManager &AM) {
   const DataLayout &DL = F.getParent()->getDataLayout();
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   if (!runSCCP(F, DL, &TLI))
@@ -1657,7 +1669,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
 
   for (const Use &U : GV->uses()) {
     const User *UR = U.getUser();
-    if (const StoreInst *SI = dyn_cast<StoreInst>(UR)) {
+    if (const auto *SI = dyn_cast<StoreInst>(UR)) {
       if (SI->getOperand(0) == GV || SI->isVolatile())
         return true;  // Storing addr of GV.
     } else if (isa<InvokeInst>(UR) || isa<CallInst>(UR)) {
@@ -1665,7 +1677,7 @@ static bool AddressIsTaken(const GlobalValue *GV) {
       ImmutableCallSite CS(cast<Instruction>(UR));
       if (!CS.isCallee(&U))
         return true;
-    } else if (const LoadInst *LI = dyn_cast<LoadInst>(UR)) {
+    } else if (const auto *LI = dyn_cast<LoadInst>(UR)) {
       if (LI->isVolatile())
         return true;
     } else if (isa<BlockAddress>(UR)) {
@@ -1678,6 +1690,19 @@ static bool AddressIsTaken(const GlobalValue *GV) {
   return false;
 }
 
+static void findReturnsToZap(Function &F,
+                             SmallPtrSet<Function *, 32> &AddressTakenFunctions,
+                             SmallVector<ReturnInst *, 8> &ReturnsToZap) {
+  // We can only do this if we know that nothing else can call the function.
+  if (!F.hasLocalLinkage() || AddressTakenFunctions.count(&F))
+    return;
+
+  for (BasicBlock &BB : F)
+    if (auto *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
+      if (!isa<UndefValue>(RI->getOperand(0)))
+        ReturnsToZap.push_back(RI);
+}
+
 static bool runIPSCCP(Module &M, const DataLayout &DL,
                       const TargetLibraryInfo *TLI) {
   SCCPSolver Solver(DL, TLI);
@@ -1800,7 +1825,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
            UI != UE;) {
         // Grab the user and then increment the iterator early, as the user
         // will be deleted. Step past all adjacent uses from the same user.
-        Instruction *I = dyn_cast<Instruction>(*UI);
+        auto *I = dyn_cast<Instruction>(*UI);
         do { ++UI; } while (UI != UE && *UI == I);
 
         // Ignore blockaddress users; BasicBlock's dtor will handle them.
@@ -1812,10 +1837,10 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
           // if this is a branch or switch on undef.  Fold it manually as a
           // branch to the first successor.
 #ifndef NDEBUG
-          if (BranchInst *BI = dyn_cast<BranchInst>(I)) {
+          if (auto *BI = dyn_cast<BranchInst>(I)) {
             assert(BI->isConditional() && isa<UndefValue>(BI->getCondition()) &&
                    "Branch should be foldable!");
-          } else if (SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+          } else if (auto *SI = dyn_cast<SwitchInst>(I)) {
             assert(isa<UndefValue>(SI->getCondition()) && "Switch should fold");
           } else {
             llvm_unreachable("Didn't fold away reference to block!");
@@ -1853,21 +1878,20 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
   // whether other functions are optimizable.
   SmallVector<ReturnInst*, 8> ReturnsToZap;
 
-  // TODO: Process multiple value ret instructions also.
   const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
   for (const auto &I : RV) {
     Function *F = I.first;
     if (I.second.isOverdefined() || F->getReturnType()->isVoidTy())
       continue;
+    findReturnsToZap(*F, AddressTakenFunctions, ReturnsToZap);
+  }
 
-    // We can only do this if we know that nothing else can call the function.
-    if (!F->hasLocalLinkage() || AddressTakenFunctions.count(F))
-      continue;
-
-    for (BasicBlock &BB : *F)
-      if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator()))
-        if (!isa<UndefValue>(RI->getOperand(0)))
-          ReturnsToZap.push_back(RI);
+  for (const auto &F : Solver.getMRVFunctionsTracked()) {
+    assert(F->getReturnType()->isStructTy() &&
+           "The return type should be a struct");
+    StructType *STy = cast<StructType>(F->getReturnType());
+    if (Solver.isStructLatticeConstant(F, STy))
+      findReturnsToZap(*F, AddressTakenFunctions, ReturnsToZap);
   }
 
   // Zap all returns which we've identified as zap to change.
@@ -1896,7 +1920,7 @@ static bool runIPSCCP(Module &M, const DataLayout &DL,
   return MadeChanges;
 }
 
-PreservedAnalyses IPSCCPPass::run(Module &M, AnalysisManager<Module> &AM) {
+PreservedAnalyses IPSCCPPass::run(Module &M, ModuleAnalysisManager &AM) {
   const DataLayout &DL = M.getDataLayout();
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(M);
   if (!runIPSCCP(M, DL, &TLI))
diff --git a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
index 7d33259c030b..bfcb15530ef5 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SROA.cpp
@@ -44,12 +44,12 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Operator.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/TimeValue.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
@@ -432,19 +432,18 @@ class AllocaSlices::partition_iterator
         // cannot change the max split slice end because we just checked that
         // the prior partition ended prior to that max.
         P.SplitTails.erase(
-            std::remove_if(
-                P.SplitTails.begin(), P.SplitTails.end(),
-                [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
+            remove_if(P.SplitTails,
+                      [&](Slice *S) { return S->endOffset() <= P.EndOffset; }),
             P.SplitTails.end());
-        assert(std::any_of(P.SplitTails.begin(), P.SplitTails.end(),
-                           [&](Slice *S) {
-                             return S->endOffset() == MaxSplitSliceEndOffset;
-                           }) &&
+        assert(any_of(P.SplitTails,
+                      [&](Slice *S) {
+                        return S->endOffset() == MaxSplitSliceEndOffset;
+                      }) &&
                "Could not find the current max split slice offset!");
-        assert(std::all_of(P.SplitTails.begin(), P.SplitTails.end(),
-                           [&](Slice *S) {
-                             return S->endOffset() <= MaxSplitSliceEndOffset;
-                           }) &&
+        assert(all_of(P.SplitTails,
+                      [&](Slice *S) {
+                        return S->endOffset() <= MaxSplitSliceEndOffset;
+                      }) &&
                "Max split slice end offset is not actually the max!");
       }
     }
@@ -693,7 +692,7 @@ private:
           break;
 
         // Handle a struct index, which adds its field offset to the pointer.
-        if (StructType *STy = dyn_cast<StructType>(*GTI)) {
+        if (StructType *STy = GTI.getStructTypeOrNull()) {
           unsigned ElementIdx = OpC->getZExtValue();
           const StructLayout *SL = DL.getStructLayout(STy);
           GEPOffset +=
@@ -996,15 +995,13 @@ AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
     return;
   }
 
-  Slices.erase(std::remove_if(Slices.begin(), Slices.end(),
-                              [](const Slice &S) {
-                                return S.isDead();
-                              }),
+  Slices.erase(remove_if(Slices, [](const Slice &S) { return S.isDead(); }),
                Slices.end());
 
 #ifndef NDEBUG
   if (SROARandomShuffleSlices) {
-    std::mt19937 MT(static_cast<unsigned>(sys::TimeValue::now().msec()));
+    std::mt19937 MT(static_cast<unsigned>(
+        std::chrono::system_clock::now().time_since_epoch().count()));
     std::shuffle(Slices.begin(), Slices.end(), MT);
   }
 #endif
@@ -1815,10 +1812,10 @@ static VectorType *isVectorPromotionViable(Partition &P, const DataLayout &DL) {
   // do that until all the backends are known to produce good code for all
   // integer vector types.
   if (!HaveCommonEltTy) {
-    CandidateTys.erase(std::remove_if(CandidateTys.begin(), CandidateTys.end(),
-                                      [](VectorType *VTy) {
-                         return !VTy->getElementType()->isIntegerTy();
-                       }),
+    CandidateTys.erase(remove_if(CandidateTys,
+                                 [](VectorType *VTy) {
+                                   return !VTy->getElementType()->isIntegerTy();
+                                 }),
                        CandidateTys.end());
 
     // If there were no integer vector types, give up.
@@ -2486,8 +2483,8 @@ private:
     }
     V = convertValue(DL, IRB, V, NewAllocaTy);
     StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlignment());
+    Store->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
     Pass.DeadInsts.insert(&SI);
-    (void)Store;
     DEBUG(dbgs() << "          to: " << *Store << "\n");
     return true;
   }
@@ -2549,6 +2546,7 @@ private:
       NewSI = IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(V->getType()),
                                      SI.isVolatile());
     }
+    NewSI->copyMetadata(SI, LLVMContext::MD_mem_parallel_loop_access);
     if (SI.isVolatile())
       NewSI->setAtomic(SI.getOrdering(), SI.getSynchScope());
     Pass.DeadInsts.insert(&SI);
@@ -2878,6 +2876,17 @@ private:
     // Record this instruction for deletion.
     Pass.DeadInsts.insert(&II);
 
+    // Lifetime intrinsics are only promotable if they cover the whole alloca.
+    // Therefore, we drop lifetime intrinsics which don't cover the whole
+    // alloca.
+    // (In theory, intrinsics which partially cover an alloca could be
+    // promoted, but PromoteMemToReg doesn't handle that case.)
+    // FIXME: Check whether the alloca is promotable before dropping the
+    // lifetime intrinsics?
+    if (NewBeginOffset != NewAllocaBeginOffset ||
+        NewEndOffset != NewAllocaEndOffset)
+      return true;
+
     ConstantInt *Size =
         ConstantInt::get(cast<IntegerType>(II.getArgOperand(0)->getType()),
                          NewEndOffset - NewBeginOffset);
@@ -2890,6 +2899,7 @@ private:
 
     (void)New;
     DEBUG(dbgs() << "          to: " << *New << "\n");
+
     return true;
   }
 
@@ -3209,20 +3219,11 @@ static Type *getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset,
     return nullptr;
 
   if (SequentialType *SeqTy = dyn_cast<SequentialType>(Ty)) {
-    // We can't partition pointers...
-    if (SeqTy->isPointerTy())
-      return nullptr;
-
     Type *ElementTy = SeqTy->getElementType();
     uint64_t ElementSize = DL.getTypeAllocSize(ElementTy);
     uint64_t NumSkippedElements = Offset / ElementSize;
-    if (ArrayType *ArrTy = dyn_cast<ArrayType>(SeqTy)) {
-      if (NumSkippedElements >= ArrTy->getNumElements())
-        return nullptr;
-    } else if (VectorType *VecTy = dyn_cast<VectorType>(SeqTy)) {
-      if (NumSkippedElements >= VecTy->getNumElements())
-        return nullptr;
-    }
+    if (NumSkippedElements >= SeqTy->getNumElements())
+      return nullptr;
     Offset -= NumSkippedElements * ElementSize;
 
     // First check if we need to recurse.
@@ -3456,63 +3457,60 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
   // match relative to their starting offset. We have to verify this prior to
   // any rewriting.
   Stores.erase(
-      std::remove_if(Stores.begin(), Stores.end(),
-                     [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
-                       // Lookup the load we are storing in our map of split
-                       // offsets.
-                       auto *LI = cast<LoadInst>(SI->getValueOperand());
-                       // If it was completely unsplittable, then we're done,
-                       // and this store can't be pre-split.
-                       if (UnsplittableLoads.count(LI))
-                         return true;
-
-                       auto LoadOffsetsI = SplitOffsetsMap.find(LI);
-                       if (LoadOffsetsI == SplitOffsetsMap.end())
-                         return false; // Unrelated loads are definitely safe.
-                       auto &LoadOffsets = LoadOffsetsI->second;
-
-                       // Now lookup the store's offsets.
-                       auto &StoreOffsets = SplitOffsetsMap[SI];
-
-                       // If the relative offsets of each split in the load and
-                       // store match exactly, then we can split them and we
-                       // don't need to remove them here.
-                       if (LoadOffsets.Splits == StoreOffsets.Splits)
-                         return false;
-
-                       DEBUG(dbgs()
-                             << "    Mismatched splits for load and store:\n"
-                             << "      " << *LI << "\n"
-                             << "      " << *SI << "\n");
-
-                       // We've found a store and load that we need to split
-                       // with mismatched relative splits. Just give up on them
-                       // and remove both instructions from our list of
-                       // candidates.
-                       UnsplittableLoads.insert(LI);
-                       return true;
-                     }),
+      remove_if(Stores,
+                [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
+                  // Lookup the load we are storing in our map of split
+                  // offsets.
+                  auto *LI = cast<LoadInst>(SI->getValueOperand());
+                  // If it was completely unsplittable, then we're done,
+                  // and this store can't be pre-split.
+                  if (UnsplittableLoads.count(LI))
+                    return true;
+
+                  auto LoadOffsetsI = SplitOffsetsMap.find(LI);
+                  if (LoadOffsetsI == SplitOffsetsMap.end())
+                    return false; // Unrelated loads are definitely safe.
+                  auto &LoadOffsets = LoadOffsetsI->second;
+
+                  // Now lookup the store's offsets.
+                  auto &StoreOffsets = SplitOffsetsMap[SI];
+
+                  // If the relative offsets of each split in the load and
+                  // store match exactly, then we can split them and we
+                  // don't need to remove them here.
+                  if (LoadOffsets.Splits == StoreOffsets.Splits)
+                    return false;
+
+                  DEBUG(dbgs() << "    Mismatched splits for load and store:\n"
+                               << "      " << *LI << "\n"
+                               << "      " << *SI << "\n");
+
+                  // We've found a store and load that we need to split
+                  // with mismatched relative splits. Just give up on them
+                  // and remove both instructions from our list of
+                  // candidates.
+                  UnsplittableLoads.insert(LI);
+                  return true;
+                }),
       Stores.end());
   // Now we have to go *back* through all the stores, because a later store may
   // have caused an earlier store's load to become unsplittable and if it is
   // unsplittable for the later store, then we can't rely on it being split in
   // the earlier store either.
-  Stores.erase(std::remove_if(Stores.begin(), Stores.end(),
-                              [&UnsplittableLoads](StoreInst *SI) {
-                                auto *LI =
-                                    cast<LoadInst>(SI->getValueOperand());
-                                return UnsplittableLoads.count(LI);
-                              }),
+  Stores.erase(remove_if(Stores,
+                         [&UnsplittableLoads](StoreInst *SI) {
+                           auto *LI = cast<LoadInst>(SI->getValueOperand());
+                           return UnsplittableLoads.count(LI);
+                         }),
                Stores.end());
   // Once we've established all the loads that can't be split for some reason,
   // filter any that made it into our list out.
-  Loads.erase(std::remove_if(Loads.begin(), Loads.end(),
-                             [&UnsplittableLoads](LoadInst *LI) {
-                               return UnsplittableLoads.count(LI);
-                             }),
+  Loads.erase(remove_if(Loads,
+                        [&UnsplittableLoads](LoadInst *LI) {
+                          return UnsplittableLoads.count(LI);
+                        }),
               Loads.end());
 
-
   // If no loads or stores are left, there is no pre-splitting to be done for
   // this alloca.
   if (Loads.empty() && Stores.empty())
@@ -3570,6 +3568,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                          PartPtrTy, BasePtr->getName() + "."),
           getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
           LI->getName());
+      PLoad->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access); 
 
       // Append this load onto the list of split loads so we can find it later
       // to rewrite the stores.
@@ -3622,7 +3621,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
                                   APInt(DL.getPointerSizeInBits(), PartOffset),
                                   PartPtrTy, StoreBasePtr->getName() + "."),
             getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
-        (void)PStore;
+        PStore->copyMetadata(*LI, LLVMContext::MD_mem_parallel_loop_access);
         DEBUG(dbgs() << "      +" << PartOffset << ":" << *PStore << "\n");
       }
 
@@ -3770,9 +3769,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
   }
 
   // Remove the killed slices that have ben pre-split.
-  AS.erase(std::remove_if(AS.begin(), AS.end(), [](const Slice &S) {
-    return S.isDead();
-  }), AS.end());
+  AS.erase(remove_if(AS, [](const Slice &S) { return S.isDead(); }), AS.end());
 
   // Insert our new slices. This will sort and merge them into the sorted
   // sequence.
@@ -3787,8 +3784,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
   // Finally, don't try to promote any allocas that new require re-splitting.
   // They have already been added to the worklist above.
   PromotableAllocas.erase(
-      std::remove_if(
-          PromotableAllocas.begin(), PromotableAllocas.end(),
+      remove_if(
+          PromotableAllocas,
           [&](AllocaInst *AI) { return ResplitPromotableAllocas.count(AI); }),
       PromotableAllocas.end());
 
@@ -3985,16 +3982,16 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
   if (!IsSorted)
     std::sort(AS.begin(), AS.end());
 
-  /// \brief Describes the allocas introduced by rewritePartition
-  /// in order to migrate the debug info.
-  struct Piece {
+  /// Describes the allocas introduced by rewritePartition in order to migrate
+  /// the debug info.
+  struct Fragment {
     AllocaInst *Alloca;
     uint64_t Offset;
     uint64_t Size;
-    Piece(AllocaInst *AI, uint64_t O, uint64_t S)
+    Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
       : Alloca(AI), Offset(O), Size(S) {}
   };
-  SmallVector<Piece, 4> Pieces;
+  SmallVector<Fragment, 4> Fragments;
 
   // Rewrite each partition.
   for (auto &P : AS.partitions()) {
@@ -4005,7 +4002,7 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
         uint64_t AllocaSize = DL.getTypeSizeInBits(NewAI->getAllocatedType());
         // Don't include any padding.
         uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
-        Pieces.push_back(Piece(NewAI, P.beginOffset() * SizeOfByte, Size));
+        Fragments.push_back(Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
       }
     }
     ++NumPartitions;
@@ -4022,35 +4019,34 @@ bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
     auto *Expr = DbgDecl->getExpression();
     DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
     uint64_t AllocaSize = DL.getTypeSizeInBits(AI.getAllocatedType());
-    for (auto Piece : Pieces) {
-      // Create a piece expression describing the new partition or reuse AI's
+    for (auto Fragment : Fragments) {
+      // Create a fragment expression describing the new partition or reuse AI's
       // expression if there is only one partition.
-      auto *PieceExpr = Expr;
-      if (Piece.Size < AllocaSize || Expr->isBitPiece()) {
+      auto *FragmentExpr = Expr;
+      if (Fragment.Size < AllocaSize || Expr->isFragment()) {
         // If this alloca is already a scalar replacement of a larger aggregate,
-        // Piece.Offset describes the offset inside the scalar.
-        uint64_t Offset = Expr->isBitPiece() ? Expr->getBitPieceOffset() : 0;
-        uint64_t Start = Offset + Piece.Offset;
-        uint64_t Size = Piece.Size;
-        if (Expr->isBitPiece()) {
-          uint64_t AbsEnd = Expr->getBitPieceOffset() + Expr->getBitPieceSize();
+        // Fragment.Offset describes the offset inside the scalar.
+        auto ExprFragment = Expr->getFragmentInfo();
+        uint64_t Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
+        uint64_t Start = Offset + Fragment.Offset;
+        uint64_t Size = Fragment.Size;
+        if (ExprFragment) {
+          uint64_t AbsEnd =
+	    ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
           if (Start >= AbsEnd)
             // No need to describe a SROAed padding.
             continue;
           Size = std::min(Size, AbsEnd - Start);
         }
-        PieceExpr = DIB.createBitPieceExpression(Start, Size);
-      } else {
-        assert(Pieces.size() == 1 &&
-               "partition is as large as original alloca");
+        FragmentExpr = DIB.createFragmentExpression(Start, Size);
       }
 
       // Remove any existing dbg.declare intrinsic describing the same alloca.
-      if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Piece.Alloca))
+      if (DbgDeclareInst *OldDDI = FindAllocaDbgDeclare(Fragment.Alloca))
         OldDDI->eraseFromParent();
 
-      DIB.insertDeclare(Piece.Alloca, Var, PieceExpr, DbgDecl->getDebugLoc(),
-                        &AI);
+      DIB.insertDeclare(Fragment.Alloca, Var, FragmentExpr,
+                        DbgDecl->getDebugLoc(), &AI);
     }
   }
   return Changed;
@@ -4223,9 +4219,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
         auto IsInSet = [&](AllocaInst *AI) { return DeletedAllocas.count(AI); };
         Worklist.remove_if(IsInSet);
         PostPromotionWorklist.remove_if(IsInSet);
-        PromotableAllocas.erase(std::remove_if(PromotableAllocas.begin(),
-                                               PromotableAllocas.end(),
-                                               IsInSet),
+        PromotableAllocas.erase(remove_if(PromotableAllocas, IsInSet),
                                 PromotableAllocas.end());
         DeletedAllocas.clear();
       }
@@ -4247,7 +4241,7 @@ PreservedAnalyses SROA::runImpl(Function &F, DominatorTree &RunDT,
   return PA;
 }
 
-PreservedAnalyses SROA::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses SROA::run(Function &F, FunctionAnalysisManager &AM) {
   return runImpl(F, AM.getResult<DominatorTreeAnalysis>(F),
                  AM.getResult<AssumptionAnalysis>(F));
 }
@@ -4280,7 +4274,7 @@ public:
     AU.setPreservesCFG();
   }
 
-  const char *getPassName() const override { return "SROA"; }
+  StringRef getPassName() const override { return "SROA"; }
   static char ID;
 };
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
index f235b12e49cc..afe7483006ae 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -43,14 +43,17 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeDSELegacyPassPass(Registry);
   initializeGuardWideningLegacyPassPass(Registry);
   initializeGVNLegacyPassPass(Registry);
+  initializeNewGVNPass(Registry);
   initializeEarlyCSELegacyPassPass(Registry);
+  initializeEarlyCSEMemSSALegacyPassPass(Registry);
   initializeGVNHoistLegacyPassPass(Registry);
   initializeFlattenCFGPassPass(Registry);
   initializeInductiveRangeCheckEliminationPass(Registry);
   initializeIndVarSimplifyLegacyPassPass(Registry);
   initializeJumpThreadingPass(Registry);
   initializeLegacyLICMPassPass(Registry);
-  initializeLoopDataPrefetchPass(Registry);
+  initializeLegacyLoopSinkPassPass(Registry);
+  initializeLoopDataPrefetchLegacyPassPass(Registry);
   initializeLoopDeletionLegacyPassPass(Registry);
   initializeLoopAccessLegacyAnalysisPass(Registry);
   initializeLoopInstSimplifyLegacyPassPass(Registry);
@@ -64,10 +67,10 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeLoopIdiomRecognizeLegacyPassPass(Registry);
   initializeLowerAtomicLegacyPassPass(Registry);
   initializeLowerExpectIntrinsicPass(Registry);
-  initializeLowerGuardIntrinsicPass(Registry);
+  initializeLowerGuardIntrinsicLegacyPassPass(Registry);
   initializeMemCpyOptLegacyPassPass(Registry);
   initializeMergedLoadStoreMotionLegacyPassPass(Registry);
-  initializeNaryReassociatePass(Registry);
+  initializeNaryReassociateLegacyPassPass(Registry);
   initializePartiallyInlineLibCallsLegacyPassPass(Registry);
   initializeReassociateLegacyPassPass(Registry);
   initializeRegToMemPass(Registry);
@@ -80,7 +83,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
   initializeSinkingLegacyPassPass(Registry);
   initializeTailCallElimPass(Registry);
   initializeSeparateConstOffsetFromGEPPass(Registry);
-  initializeSpeculativeExecutionPass(Registry);
+  initializeSpeculativeExecutionLegacyPassPass(Registry);
   initializeStraightLineStrengthReducePass(Registry);
   initializeLoadCombinePass(Registry);
   initializePlaceBackedgeSafepointsImplPass(Registry);
@@ -124,6 +127,10 @@ void LLVMAddGVNPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createGVNPass());
 }
 
+void LLVMAddNewGVNPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createNewGVNPass());
+}
+
 void LLVMAddMergedLoadStoreMotionPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createMergedLoadStoreMotionPass());
 }
@@ -140,6 +147,10 @@ void LLVMAddJumpThreadingPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createJumpThreadingPass());
 }
 
+void LLVMAddLoopSinkPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createLoopSinkPass());
+}
+
 void LLVMAddLICMPass(LLVMPassManagerRef PM) {
   unwrap(PM)->add(createLICMPass());
 }
@@ -234,7 +245,11 @@ void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createEarlyCSEPass());
+  unwrap(PM)->add(createEarlyCSEPass(false/*=UseMemorySSA*/));
+}
+
+void LLVMAddEarlyCSEMemSSAPass(LLVMPassManagerRef PM) {
+  unwrap(PM)->add(createEarlyCSEPass(true/*=UseMemorySSA*/));
 }
 
 void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {
diff --git a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
index aed4a4ad4d26..39969e27367f 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Scalarizer.cpp
@@ -16,6 +16,7 @@
 
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstVisitor.h"
 #include "llvm/Pass.h"
@@ -148,6 +149,7 @@ public:
   bool visitPHINode(PHINode &);
   bool visitLoadInst(LoadInst &);
   bool visitStoreInst(StoreInst &);
+  bool visitCallInst(CallInst &I);
 
   static void registerOptions() {
     // This is disabled by default because having separate loads and stores
@@ -169,6 +171,8 @@ private:
 
   template<typename T> bool splitBinary(Instruction &, const T &);
 
+  bool splitCall(CallInst &CI);
+
   ScatterMap Scattered;
   GatherList Gathered;
   unsigned ParallelLoopAccessMDKind;
@@ -394,6 +398,77 @@ bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
   return true;
 }
 
+static bool isTriviallyScalariable(Intrinsic::ID ID) {
+  return isTriviallyVectorizable(ID);
+}
+
+// All of the current scalarizable intrinsics only have one mangled type.
+static Function *getScalarIntrinsicDeclaration(Module *M,
+                                               Intrinsic::ID ID,
+                                               VectorType *Ty) {
+  return Intrinsic::getDeclaration(M, ID, { Ty->getScalarType() });
+}
+
+/// If a call to a vector typed intrinsic function, split into a scalar call per
+/// element if possible for the intrinsic.
+bool Scalarizer::splitCall(CallInst &CI) {
+  VectorType *VT = dyn_cast<VectorType>(CI.getType());
+  if (!VT)
+    return false;
+
+  Function *F = CI.getCalledFunction();
+  if (!F)
+    return false;
+
+  Intrinsic::ID ID = F->getIntrinsicID();
+  if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID))
+    return false;
+
+  unsigned NumElems = VT->getNumElements();
+  unsigned NumArgs = CI.getNumArgOperands();
+
+  ValueVector ScalarOperands(NumArgs);
+  SmallVector<Scatterer, 8> Scattered(NumArgs);
+
+  Scattered.resize(NumArgs);
+
+  // Assumes that any vector type has the same number of elements as the return
+  // vector type, which is true for all current intrinsics.
+  for (unsigned I = 0; I != NumArgs; ++I) {
+    Value *OpI = CI.getOperand(I);
+    if (OpI->getType()->isVectorTy()) {
+      Scattered[I] = scatter(&CI, OpI);
+      assert(Scattered[I].size() == NumElems && "mismatched call operands");
+    } else {
+      ScalarOperands[I] = OpI;
+    }
+  }
+
+  ValueVector Res(NumElems);
+  ValueVector ScalarCallOps(NumArgs);
+
+  Function *NewIntrin = getScalarIntrinsicDeclaration(F->getParent(), ID, VT);
+  IRBuilder<> Builder(&CI);
+
+  // Perform actual scalarization, taking care to preserve any scalar operands.
+  for (unsigned Elem = 0; Elem < NumElems; ++Elem) {
+    ScalarCallOps.clear();
+
+    for (unsigned J = 0; J != NumArgs; ++J) {
+      if (hasVectorInstrinsicScalarOpd(ID, J))
+        ScalarCallOps.push_back(ScalarOperands[J]);
+      else
+        ScalarCallOps.push_back(Scattered[J][Elem]);
+    }
+
+    Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
+                                   CI.getName() + ".i" + Twine(Elem));
+  }
+
+  gather(&CI, Res);
+  return true;
+}
+
 bool Scalarizer::visitSelectInst(SelectInst &SI) {
   VectorType *VT = dyn_cast<VectorType>(SI.getType());
   if (!VT)
@@ -642,6 +717,10 @@ bool Scalarizer::visitStoreInst(StoreInst &SI) {
   return true;
 }
 
+bool Scalarizer::visitCallInst(CallInst &CI) {
+  return splitCall(CI);
+}
+
 // Delete the instructions that we scalarized.  If a full vector result
 // is still needed, recreate it using InsertElements.
 bool Scalarizer::finish() {
diff --git a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
index d6ae186698c7..4d594532c365 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp
@@ -722,7 +722,7 @@ bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize(
   for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end();
        I != E; ++I, ++GTI) {
     // Skip struct member indices which must be i32.
-    if (isa<SequentialType>(*GTI)) {
+    if (GTI.isSequential()) {
       if ((*I)->getType() != IntPtrTy) {
         *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP);
         Changed = true;
@@ -739,7 +739,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
   int64_t AccumulativeByteOffset = 0;
   gep_type_iterator GTI = gep_type_begin(*GEP);
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
-    if (isa<SequentialType>(*GTI)) {
+    if (GTI.isSequential()) {
       // Tries to extract a constant offset from this GEP index.
       int64_t ConstantOffset =
           ConstantOffsetExtractor::Find(GEP->getOperand(I), GEP, DT);
@@ -752,7 +752,7 @@ SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP,
             ConstantOffset * DL->getTypeAllocSize(GTI.getIndexedType());
       }
     } else if (LowerGEP) {
-      StructType *StTy = cast<StructType>(*GTI);
+      StructType *StTy = GTI.getStructType();
       uint64_t Field = cast<ConstantInt>(GEP->getOperand(I))->getZExtValue();
       // Skip field 0 as the offset is always 0.
       if (Field != 0) {
@@ -787,7 +787,7 @@ void SeparateConstOffsetFromGEP::lowerToSingleIndexGEPs(
   // Create an ugly GEP for each sequential index. We don't create GEPs for
   // structure indices, as they are accumulated in the constant offset index.
   for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
-    if (isa<SequentialType>(*GTI)) {
+    if (GTI.isSequential()) {
       Value *Idx = Variadic->getOperand(I);
       // Skip zero indices.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
@@ -848,7 +848,7 @@ SeparateConstOffsetFromGEP::lowerToArithmetics(GetElementPtrInst *Variadic,
   // don't create arithmetics for structure indices, as they are accumulated
   // in the constant offset index.
   for (unsigned I = 1, E = Variadic->getNumOperands(); I != E; ++I, ++GTI) {
-    if (isa<SequentialType>(*GTI)) {
+    if (GTI.isSequential()) {
       Value *Idx = Variadic->getOperand(I);
       // Skip zero indices.
       if (ConstantInt *CI = dyn_cast<ConstantInt>(Idx))
@@ -928,7 +928,7 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) {
   // handle the constant offset and won't need a new structure index.
   gep_type_iterator GTI = gep_type_begin(*GEP);
   for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
-    if (isa<SequentialType>(*GTI)) {
+    if (GTI.isSequential()) {
       // Splits this GEP index into a variadic part and a constant offset, and
       // uses the variadic part as the new index.
       Value *OldIdx = GEP->getOperand(I);
@@ -1150,8 +1150,7 @@ bool SeparateConstOffsetFromGEP::reuniteExts(Instruction *I) {
 bool SeparateConstOffsetFromGEP::reuniteExts(Function &F) {
   bool Changed = false;
   DominatingExprs.clear();
-  for (auto Node = GraphTraits<DominatorTree *>::nodes_begin(DT);
-       Node != GraphTraits<DominatorTree *>::nodes_end(DT); ++Node) {
+  for (const auto Node : depth_first(DT)) {
     BasicBlock *BB = Node->getBlock();
     for (auto I = BB->begin(); I != BB->end(); ) {
       Instruction *Cur = &*I++;
diff --git a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 2d0a21d2c518..f2723bd7af82 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -187,7 +187,7 @@ SimplifyCFGPass::SimplifyCFGPass(int BonusInstThreshold)
     : BonusInstThreshold(BonusInstThreshold) {}
 
 PreservedAnalyses SimplifyCFGPass::run(Function &F,
-                                       AnalysisManager<Function> &AM) {
+                                       FunctionAnalysisManager &AM) {
   auto &TTI = AM.getResult<TargetIRAnalysis>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
index d9a296c63122..c3f14a0f4b1e 100644
--- a/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/Sink.cpp
@@ -254,7 +254,7 @@ static bool iterativelySinkInstructions(Function &F, DominatorTree &DT,
   return EverMadeChange;
 }
 
-PreservedAnalyses SinkingPass::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses SinkingPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
diff --git a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
index 9bf2d6206819..a7c308b59877 100644
--- a/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/SpeculativeExecution.cpp
@@ -61,9 +61,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/Analysis/GlobalsModRef.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -101,58 +101,62 @@ static cl::opt<bool> SpecExecOnlyIfDivergentTarget(
 
 namespace {
 
-class SpeculativeExecution : public FunctionPass {
- public:
-   static char ID;
-   explicit SpeculativeExecution(bool OnlyIfDivergentTarget = false)
-       : FunctionPass(ID),
-         OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
-                               SpecExecOnlyIfDivergentTarget) {}
-
-   void getAnalysisUsage(AnalysisUsage &AU) const override;
-   bool runOnFunction(Function &F) override;
-
-   const char *getPassName() const override {
-     if (OnlyIfDivergentTarget)
-       return "Speculatively execute instructions if target has divergent "
-              "branches";
-     return "Speculatively execute instructions";
-   }
-
- private:
-  bool runOnBasicBlock(BasicBlock &B);
-  bool considerHoistingFromTo(BasicBlock &FromBlock, BasicBlock &ToBlock);
-
-  // If true, this pass is a nop unless the target architecture has branch
-  // divergence.
+class SpeculativeExecutionLegacyPass : public FunctionPass {
+public:
+  static char ID;
+  explicit SpeculativeExecutionLegacyPass(bool OnlyIfDivergentTarget = false)
+      : FunctionPass(ID), OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
+                                                SpecExecOnlyIfDivergentTarget),
+        Impl(OnlyIfDivergentTarget) {}
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+
+  StringRef getPassName() const override {
+    if (OnlyIfDivergentTarget)
+      return "Speculatively execute instructions if target has divergent "
+             "branches";
+    return "Speculatively execute instructions";
+  }
+
+private:
+  // Variable preserved purely for correct name printing.
   const bool OnlyIfDivergentTarget;
-  const TargetTransformInfo *TTI = nullptr;
+
+  SpeculativeExecutionPass Impl;
 };
 } // namespace
 
-char SpeculativeExecution::ID = 0;
-INITIALIZE_PASS_BEGIN(SpeculativeExecution, "speculative-execution",
+char SpeculativeExecutionLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(SpeculativeExecutionLegacyPass, "speculative-execution",
                       "Speculatively execute instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_END(SpeculativeExecution, "speculative-execution",
+INITIALIZE_PASS_END(SpeculativeExecutionLegacyPass, "speculative-execution",
                     "Speculatively execute instructions", false, false)
 
-void SpeculativeExecution::getAnalysisUsage(AnalysisUsage &AU) const {
+void SpeculativeExecutionLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<TargetTransformInfoWrapperPass>();
   AU.addPreserved<GlobalsAAWrapperPass>();
 }
 
-bool SpeculativeExecution::runOnFunction(Function &F) {
+bool SpeculativeExecutionLegacyPass::runOnFunction(Function &F) {
   if (skipFunction(F))
     return false;
 
-  TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+  return Impl.runImpl(F, TTI);
+}
+
+namespace llvm {
+
+bool SpeculativeExecutionPass::runImpl(Function &F, TargetTransformInfo *TTI) {
   if (OnlyIfDivergentTarget && !TTI->hasBranchDivergence()) {
     DEBUG(dbgs() << "Not running SpeculativeExecution because "
                     "TTI->hasBranchDivergence() is false.\n");
     return false;
   }
 
+  this->TTI = TTI;
   bool Changed = false;
   for (auto& B : F) {
     Changed |= runOnBasicBlock(B);
@@ -160,7 +164,7 @@ bool SpeculativeExecution::runOnFunction(Function &F) {
   return Changed;
 }
 
-bool SpeculativeExecution::runOnBasicBlock(BasicBlock &B) {
+bool SpeculativeExecutionPass::runOnBasicBlock(BasicBlock &B) {
   BranchInst *BI = dyn_cast<BranchInst>(B.getTerminator());
   if (BI == nullptr)
     return false;
@@ -220,6 +224,24 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
     case Instruction::Xor:
     case Instruction::ZExt:
     case Instruction::SExt:
+    case Instruction::Call:
+    case Instruction::BitCast:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::AddrSpaceCast:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    case Instruction::FPExt:
+    case Instruction::FPTrunc:
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+    case Instruction::ICmp:
+    case Instruction::FCmp:
       return TTI.getUserCost(I);
 
     default:
@@ -227,8 +249,8 @@ static unsigned ComputeSpeculationCost(const Instruction *I,
   }
 }
 
-bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
-                                                  BasicBlock &ToBlock) {
+bool SpeculativeExecutionPass::considerHoistingFromTo(
+    BasicBlock &FromBlock, BasicBlock &ToBlock) {
   SmallSet<const Instruction *, 8> NotHoisted;
   const auto AllPrecedingUsesFromBlockHoisted = [&NotHoisted](User *U) {
     for (Value* V : U->operand_values()) {
@@ -270,14 +292,28 @@ bool SpeculativeExecution::considerHoistingFromTo(BasicBlock &FromBlock,
   return true;
 }
 
-namespace llvm {
-
 FunctionPass *createSpeculativeExecutionPass() {
-  return new SpeculativeExecution();
+  return new SpeculativeExecutionLegacyPass();
 }
 
 FunctionPass *createSpeculativeExecutionIfHasBranchDivergencePass() {
-  return new SpeculativeExecution(/* OnlyIfDivergentTarget = */ true);
+  return new SpeculativeExecutionLegacyPass(/* OnlyIfDivergentTarget = */ true);
 }
 
+SpeculativeExecutionPass::SpeculativeExecutionPass(bool OnlyIfDivergentTarget)
+    : OnlyIfDivergentTarget(OnlyIfDivergentTarget ||
+                            SpecExecOnlyIfDivergentTarget) {}
+
+PreservedAnalyses SpeculativeExecutionPass::run(Function &F,
+                                                FunctionAnalysisManager &AM) {
+  auto *TTI = &AM.getResult<TargetIRAnalysis>(F);
+
+  bool Changed = runImpl(F, TTI);
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
 }  // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
index 292d0400a516..2be3f5c533b9 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StraightLineStrengthReduce.cpp
@@ -55,8 +55,6 @@
 //
 // - When (i' - i) is constant but i and i' are not, we could still perform
 //   SLSR.
-#include <vector>
-
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -68,6 +66,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include <list>
+#include <vector>
 
 using namespace llvm;
 using namespace PatternMatch;
@@ -80,7 +80,7 @@ class StraightLineStrengthReduce : public FunctionPass {
 public:
   // SLSR candidate. Such a candidate must be in one of the forms described in
   // the header comments.
-  struct Candidate : public ilist_node<Candidate> {
+  struct Candidate {
     enum Kind {
       Invalid, // reserved for the default constructor
       Add,     // B + i * S
@@ -200,7 +200,7 @@ private:
   DominatorTree *DT;
   ScalarEvolution *SE;
   TargetTransformInfo *TTI;
-  ilist<Candidate> Candidates;
+  std::list<Candidate> Candidates;
   // Temporarily holds all instructions that are unlinked (but not deleted) by
   // rewriteCandidateWithBasis. These instructions will be actually removed
   // after all rewriting finishes.
@@ -490,8 +490,8 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
     IndexExprs.push_back(SE->getSCEV(*I));
 
   gep_type_iterator GTI = gep_type_begin(GEP);
-  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I) {
-    if (!isa<SequentialType>(*GTI++))
+  for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) {
+    if (GTI.isStruct())
       continue;
 
     const SCEV *OrigIndexExpr = IndexExprs[I - 1];
@@ -499,11 +499,9 @@ void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP(
 
     // The base of this candidate is GEP's base plus the offsets of all
     // indices except this current one.
-    const SCEV *BaseExpr = SE->getGEPExpr(GEP->getSourceElementType(),
-                                          SE->getSCEV(GEP->getPointerOperand()),
-                                          IndexExprs, GEP->isInBounds());
+    const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs);
     Value *ArrayIdx = GEP->getOperand(I);
-    uint64_t ElementSize = DL->getTypeAllocSize(*GTI);
+    uint64_t ElementSize = DL->getTypeAllocSize(GTI.getIndexedType());
     if (ArrayIdx->getType()->getIntegerBitWidth() <=
         DL->getPointerSizeInBits(GEP->getAddressSpace())) {
       // Skip factoring if ArrayIdx is wider than the pointer size, because
@@ -674,11 +672,9 @@ bool StraightLineStrengthReduce::runOnFunction(Function &F) {
   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
   // Traverse the dominator tree in the depth-first order. This order makes sure
   // all bases of a candidate are in Candidates when we process it.
-  for (auto node = GraphTraits<DominatorTree *>::nodes_begin(DT);
-       node != GraphTraits<DominatorTree *>::nodes_end(DT); ++node) {
-    for (auto &I : *node->getBlock())
+  for (const auto Node : depth_first(DT))
+    for (auto &I : *(Node->getBlock()))
       allocateCandidatesAndFindBasis(&I);
-  }
 
   // Rewrite candidates in the reverse depth-first order. This order makes sure
   // a candidate being rewritten is not a basis for any other candidate.
diff --git a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
index e9ac39beae5a..fa2235e8439a 100644
--- a/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp
@@ -43,77 +43,58 @@ typedef SmallPtrSet<BasicBlock *, 8> BBSet;
 typedef MapVector<PHINode *, BBValueVector> PhiMap;
 typedef MapVector<BasicBlock *, BBVector> BB2BBVecMap;
 
-typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
 typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
 typedef DenseMap<BasicBlock *, Value *> BBPredicates;
 typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
 typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
 
 // The name for newly created blocks.
-
 static const char *const FlowBlockName = "Flow";
 
-/// @brief Find the nearest common dominator for multiple BasicBlocks
+/// Finds the nearest common dominator of a set of BasicBlocks.
 ///
-/// Helper class for StructurizeCFG
-/// TODO: Maybe move into common code
+/// For every BB you add to the set, you can specify whether we "remember" the
+/// block.  When you get the common dominator, you can also ask whether it's one
+/// of the blocks we remembered.
 class NearestCommonDominator {
   DominatorTree *DT;
+  BasicBlock *Result = nullptr;
+  bool ResultIsRemembered = false;
 
-  DTN2UnsignedMap IndexMap;
-
-  BasicBlock *Result;
-  unsigned ResultIndex;
-  bool ExplicitMentioned;
-
-public:
-  /// \brief Start a new query
-  NearestCommonDominator(DominatorTree *DomTree) {
-    DT = DomTree;
-    Result = nullptr;
-  }
-
-  /// \brief Add BB to the resulting dominator
-  void addBlock(BasicBlock *BB, bool Remember = true) {
-    DomTreeNode *Node = DT->getNode(BB);
-
+  /// Add BB to the resulting dominator.
+  void addBlock(BasicBlock *BB, bool Remember) {
     if (!Result) {
-      unsigned Numbering = 0;
-      for (;Node;Node = Node->getIDom())
-        IndexMap[Node] = ++Numbering;
       Result = BB;
-      ResultIndex = 1;
-      ExplicitMentioned = Remember;
+      ResultIsRemembered = Remember;
       return;
     }
 
-    for (;Node;Node = Node->getIDom())
-      if (IndexMap.count(Node))
-        break;
-      else
-        IndexMap[Node] = 0;
+    BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
+    if (NewResult != Result)
+      ResultIsRemembered = false;
+    if (NewResult == BB)
+      ResultIsRemembered |= Remember;
+    Result = NewResult;
+  }
 
-    assert(Node && "Dominator tree invalid!");
+public:
+  explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
 
-    unsigned Numbering = IndexMap[Node];
-    if (Numbering > ResultIndex) {
-      Result = Node->getBlock();
-      ResultIndex = Numbering;
-      ExplicitMentioned = Remember && (Result == BB);
-    } else if (Numbering == ResultIndex) {
-      ExplicitMentioned |= Remember;
-    }
+  void addBlock(BasicBlock *BB) {
+    addBlock(BB, /* Remember = */ false);
   }
 
-  /// \brief Is "Result" one of the BBs added with "Remember" = True?
-  bool wasResultExplicitMentioned() {
-    return ExplicitMentioned;
+  void addAndRememberBlock(BasicBlock *BB) {
+    addBlock(BB, /* Remember = */ true);
   }
 
-  /// \brief Get the query result
-  BasicBlock *getResult() {
-    return Result;
-  }
+  /// Get the nearest common dominator of all the BBs added via addBlock() and
+  /// addAndRememberBlock().
+  BasicBlock *result() { return Result; }
+
+  /// Is the BB returned by getResult() one of the blocks we added to the set
+  /// with addAndRememberBlock()?
+  bool resultIsRememberedBlock() { return ResultIsRemembered; }
 };
 
 /// @brief Transforms the control flow graph on one single entry/exit region
@@ -141,7 +122,7 @@ public:
 /// Control flow is expressed as a branch where the true exit goes into the
 /// "Then"/"Else" region, while the false exit skips the region
 /// The condition for the optional "Else" region is expressed as a PHI node.
-/// The incomming values of the PHI node are true for the "If" edge and false
+/// The incoming values of the PHI node are true for the "If" edge and false
 /// for the "Then" edge.
 ///
 /// Additionally to that even complicated loops look like this:
@@ -163,7 +144,6 @@ public:
 /// breaks and the false values expresses continue states.
 class StructurizeCFG : public RegionPass {
   bool SkipUniformRegions;
-  DivergenceAnalysis *DA;
 
   Type *Boolean;
   ConstantInt *BoolTrue;
@@ -176,7 +156,7 @@ class StructurizeCFG : public RegionPass {
   DominatorTree *DT;
   LoopInfo *LI;
 
-  RNVector Order;
+  SmallVector<RegionNode *, 8> Order;
   BBSet Visited;
 
   BBPhiMap DeletedPhis;
@@ -236,29 +216,19 @@ class StructurizeCFG : public RegionPass {
 
   void rebuildSSA();
 
-  bool hasOnlyUniformBranches(const Region *R);
-
 public:
   static char ID;
 
-  StructurizeCFG() :
-    RegionPass(ID), SkipUniformRegions(false) {
-    initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
-  }
-
-  StructurizeCFG(bool SkipUniformRegions) :
-    RegionPass(ID), SkipUniformRegions(SkipUniformRegions) {
+  explicit StructurizeCFG(bool SkipUniformRegions = false)
+      : RegionPass(ID), SkipUniformRegions(SkipUniformRegions) {
     initializeStructurizeCFGPass(*PassRegistry::getPassRegistry());
   }
 
-  using Pass::doInitialization;
   bool doInitialization(Region *R, RGPassManager &RGM) override;
 
   bool runOnRegion(Region *R, RGPassManager &RGM) override;
 
-  const char *getPassName() const override {
-    return "Structurize control flow";
-  }
+  StringRef getPassName() const override { return "Structurize control flow"; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     if (SkipUniformRegions)
@@ -266,6 +236,7 @@ public:
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
+
     AU.addPreserved<DominatorTreeWrapperPass>();
     RegionPass::getAnalysisUsage(AU);
   }
@@ -298,17 +269,13 @@ bool StructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
 
 /// \brief Build up the general order of nodes
 void StructurizeCFG::orderNodes() {
-  RNVector TempOrder;
   ReversePostOrderTraversal<Region*> RPOT(ParentRegion);
-  TempOrder.append(RPOT.begin(), RPOT.end());
-
-  std::map<Loop*, unsigned> LoopBlocks;
-
+  SmallDenseMap<Loop*, unsigned, 8> LoopBlocks;
 
   // The reverse post-order traversal of the list gives us an ordering close
   // to what we want.  The only problem with it is that sometimes backedges
   // for outer loops will be visited before backedges for inner loops.
-  for (RegionNode *RN : TempOrder) {
+  for (RegionNode *RN : RPOT) {
     BasicBlock *BB = RN->getEntry();
     Loop *Loop = LI->getLoopFor(BB);
     ++LoopBlocks[Loop];
@@ -316,19 +283,18 @@ void StructurizeCFG::orderNodes() {
 
   unsigned CurrentLoopDepth = 0;
   Loop *CurrentLoop = nullptr;
-  BBSet TempVisited;
-  for (RNVector::iterator I = TempOrder.begin(), E = TempOrder.end(); I != E; ++I) {
+  for (auto I = RPOT.begin(), E = RPOT.end(); I != E; ++I) {
     BasicBlock *BB = (*I)->getEntry();
     unsigned LoopDepth = LI->getLoopDepth(BB);
 
-    if (std::find(Order.begin(), Order.end(), *I) != Order.end())
+    if (is_contained(Order, *I))
       continue;
 
     if (LoopDepth < CurrentLoopDepth) {
       // Make sure we have visited all blocks in this loop before moving back to
       // the outer loop.
 
-      RNVector::iterator LoopI = I;
+      auto LoopI = I;
       while (unsigned &BlockCount = LoopBlocks[CurrentLoop]) {
         LoopI++;
         BasicBlock *LoopBB = (*LoopI)->getEntry();
@@ -340,9 +306,8 @@ void StructurizeCFG::orderNodes() {
     }
 
     CurrentLoop = LI->getLoopFor(BB);
-    if (CurrentLoop) {
+    if (CurrentLoop)
       LoopBlocks[CurrentLoop]--;
-    }
 
     CurrentLoopDepth = LoopDepth;
     Order.push_back(*I);
@@ -426,46 +391,40 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) {
   BBPredicates &Pred = Predicates[BB];
   BBPredicates &LPred = LoopPreds[BB];
 
-  for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
-       PI != PE; ++PI) {
-
+  for (BasicBlock *P : predecessors(BB)) {
     // Ignore it if it's a branch from outside into our region entry
-    if (!ParentRegion->contains(*PI))
+    if (!ParentRegion->contains(P))
       continue;
 
-    Region *R = RI->getRegionFor(*PI);
+    Region *R = RI->getRegionFor(P);
     if (R == ParentRegion) {
-
       // It's a top level block in our region
-      BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
+      BranchInst *Term = cast<BranchInst>(P->getTerminator());
       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
         BasicBlock *Succ = Term->getSuccessor(i);
         if (Succ != BB)
           continue;
 
-        if (Visited.count(*PI)) {
+        if (Visited.count(P)) {
           // Normal forward edge
           if (Term->isConditional()) {
             // Try to treat it like an ELSE block
             BasicBlock *Other = Term->getSuccessor(!i);
             if (Visited.count(Other) && !Loops.count(Other) &&
-                !Pred.count(Other) && !Pred.count(*PI)) {
+                !Pred.count(Other) && !Pred.count(P)) {
 
               Pred[Other] = BoolFalse;
-              Pred[*PI] = BoolTrue;
+              Pred[P] = BoolTrue;
               continue;
             }
           }
-          Pred[*PI] = buildCondition(Term, i, false);
-
+          Pred[P] = buildCondition(Term, i, false);
         } else {
           // Back edge
-          LPred[*PI] = buildCondition(Term, i, true);
+          LPred[P] = buildCondition(Term, i, true);
         }
       }
-
     } else {
-
       // It's an exit from a sub region
       while (R->getParent() != ParentRegion)
         R = R->getParent();
@@ -496,7 +455,6 @@ void StructurizeCFG::collectInfos() {
   Visited.clear();
 
   for (RegionNode *RN : reverse(Order)) {
-
     DEBUG(dbgs() << "Visiting: "
                  << (RN->isSubRegion() ? "SubRegion with entry: " : "")
                  << RN->getEntry()->getName() << " Loop Depth: "
@@ -533,25 +491,26 @@ void StructurizeCFG::insertConditions(bool Loops) {
     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
 
     NearestCommonDominator Dominator(DT);
-    Dominator.addBlock(Parent, false);
+    Dominator.addBlock(Parent);
 
     Value *ParentValue = nullptr;
-    for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-         PI != PE; ++PI) {
+    for (std::pair<BasicBlock *, Value *> BBAndPred : Preds) {
+      BasicBlock *BB = BBAndPred.first;
+      Value *Pred = BBAndPred.second;
 
-      if (PI->first == Parent) {
-        ParentValue = PI->second;
+      if (BB == Parent) {
+        ParentValue = Pred;
         break;
       }
-      PhiInserter.AddAvailableValue(PI->first, PI->second);
-      Dominator.addBlock(PI->first);
+      PhiInserter.AddAvailableValue(BB, Pred);
+      Dominator.addAndRememberBlock(BB);
     }
 
     if (ParentValue) {
       Term->setCondition(ParentValue);
     } else {
-      if (!Dominator.wasResultExplicitMentioned())
-        PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
+      if (!Dominator.resultIsRememberedBlock())
+        PhiInserter.AddAvailableValue(Dominator.result(), Default);
 
       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
     }
@@ -562,10 +521,10 @@ void StructurizeCFG::insertConditions(bool Loops) {
 /// them in DeletedPhis
 void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
   PhiMap &Map = DeletedPhis[To];
-  for (BasicBlock::iterator I = To->begin(), E = To->end();
-       I != E && isa<PHINode>(*I);) {
-
-    PHINode &Phi = cast<PHINode>(*I++);
+  for (Instruction &I : *To) {
+    if (!isa<PHINode>(I))
+      break;
+    PHINode &Phi = cast<PHINode>(I);
     while (Phi.getBasicBlockIndex(From) != -1) {
       Value *Deleted = Phi.removeIncomingValue(From, false);
       Map[&Phi].push_back(std::make_pair(From, Deleted));
@@ -575,10 +534,10 @@ void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
 
 /// \brief Add a dummy PHI value as soon as we knew the new predecessor
 void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
-  for (BasicBlock::iterator I = To->begin(), E = To->end();
-       I != E && isa<PHINode>(*I);) {
-
-    PHINode &Phi = cast<PHINode>(*I++);
+  for (Instruction &I : *To) {
+    if (!isa<PHINode>(I))
+      break;
+    PHINode &Phi = cast<PHINode>(I);
     Value *Undef = UndefValue::get(Phi.getType());
     Phi.addIncoming(Undef, From);
   }
@@ -589,7 +548,6 @@ void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
 void StructurizeCFG::setPhiValues() {
   SSAUpdater Updater;
   for (const auto &AddedPhi : AddedPhis) {
-
     BasicBlock *To = AddedPhi.first;
     const BBVector &From = AddedPhi.second;
 
@@ -598,7 +556,6 @@ void StructurizeCFG::setPhiValues() {
 
     PhiMap &Map = DeletedPhis[To];
     for (const auto &PI : Map) {
-
       PHINode *Phi = PI.first;
       Value *Undef = UndefValue::get(Phi->getType());
       Updater.Initialize(Phi->getType(), "");
@@ -606,18 +563,16 @@ void StructurizeCFG::setPhiValues() {
       Updater.AddAvailableValue(To, Undef);
 
       NearestCommonDominator Dominator(DT);
-      Dominator.addBlock(To, false);
+      Dominator.addBlock(To);
       for (const auto &VI : PI.second) {
-
         Updater.AddAvailableValue(VI.first, VI.second);
-        Dominator.addBlock(VI.first);
+        Dominator.addAndRememberBlock(VI.first);
       }
 
-      if (!Dominator.wasResultExplicitMentioned())
-        Updater.AddAvailableValue(Dominator.getResult(), Undef);
+      if (!Dominator.resultIsRememberedBlock())
+        Updater.AddAvailableValue(Dominator.result(), Undef);
 
       for (BasicBlock *FI : From) {
-
         int Idx = Phi->getBasicBlockIndex(FI);
         assert(Idx != -1);
         Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(FI));
@@ -636,10 +591,8 @@ void StructurizeCFG::killTerminator(BasicBlock *BB) {
     return;
 
   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
-       SI != SE; ++SI) {
-
+       SI != SE; ++SI)
     delPhiValues(BB, *SI);
-  }
 
   Term->eraseFromParent();
 }
@@ -653,10 +606,10 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
     BasicBlock *Dominator = nullptr;
 
     // Find all the edges from the sub region to the exit
-    for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
-         I != E;) {
+    for (auto BBI = pred_begin(OldExit), E = pred_end(OldExit); BBI != E;) {
+      // Incrememt BBI before mucking with BB's terminator.
+      BasicBlock *BB = *BBI++;
 
-      BasicBlock *BB = *I++;
       if (!SubRegion->contains(BB))
         continue;
 
@@ -680,7 +633,6 @@ void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
 
     // Update the region info
     SubRegion->replaceExit(NewExit);
-
   } else {
     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
     killTerminator(BB);
@@ -711,7 +663,6 @@ BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
     killTerminator(Entry);
     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
       return Entry;
-
   }
 
   // create a new flow node
@@ -726,13 +677,13 @@ BasicBlock *StructurizeCFG::needPrefix(bool NeedEmpty) {
 /// \brief Returns the region exit if possible, otherwise just a new flow node
 BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
                                         bool ExitUseAllowed) {
-  if (Order.empty() && ExitUseAllowed) {
-    BasicBlock *Exit = ParentRegion->getExit();
-    DT->changeImmediateDominator(Exit, Flow);
-    addPhiValues(Flow, Exit);
-    return Exit;
-  }
-  return getNextFlow(Flow);
+  if (!Order.empty() || !ExitUseAllowed)
+    return getNextFlow(Flow);
+
+  BasicBlock *Exit = ParentRegion->getExit();
+  DT->changeImmediateDominator(Exit, Flow);
+  addPhiValues(Flow, Exit);
+  return Exit;
 }
 
 /// \brief Set the previous node
@@ -741,16 +692,12 @@ void StructurizeCFG::setPrevNode(BasicBlock *BB) {
                                         : nullptr;
 }
 
-/// \brief Does BB dominate all the predicates of Node ?
+/// \brief Does BB dominate all the predicates of Node?
 bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
   BBPredicates &Preds = Predicates[Node->getEntry()];
-  for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
-       PI != PE; ++PI) {
-
-    if (!DT->dominates(BB, PI->first))
-      return false;
-  }
-  return true;
+  return llvm::all_of(Preds, [&](std::pair<BasicBlock *, Value *> Pred) {
+    return DT->dominates(BB, Pred.first);
+  });
 }
 
 /// \brief Can we predict that this node will always be called?
@@ -762,13 +709,14 @@ bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
   if (!PrevNode)
     return true;
 
-  for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
-       I != E; ++I) {
+  for (std::pair<BasicBlock*, Value*> Pred : Preds) {
+    BasicBlock *BB = Pred.first;
+    Value *V = Pred.second;
 
-    if (I->second != BoolTrue)
+    if (V != BoolTrue)
       return false;
 
-    if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
+    if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
       Dominated = true;
   }
 
@@ -883,30 +831,29 @@ void StructurizeCFG::createFlow() {
 /// no longer dominate all their uses. Not sure if this is really nessasary
 void StructurizeCFG::rebuildSSA() {
   SSAUpdater Updater;
-  for (auto *BB : ParentRegion->blocks())
-    for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
-         II != IE; ++II) {
-
+  for (BasicBlock *BB : ParentRegion->blocks())
+    for (Instruction &I : *BB) {
       bool Initialized = false;
-      for (auto I = II->use_begin(), E = II->use_end(); I != E;) {
-        Use &U = *I++;
+      // We may modify the use list as we iterate over it, so be careful to
+      // compute the next element in the use list at the top of the loop.
+      for (auto UI = I.use_begin(), E = I.use_end(); UI != E;) {
+        Use &U = *UI++;
         Instruction *User = cast<Instruction>(U.getUser());
         if (User->getParent() == BB) {
           continue;
-
         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
           if (UserPN->getIncomingBlock(U) == BB)
             continue;
         }
 
-        if (DT->dominates(&*II, User))
+        if (DT->dominates(&I, User))
           continue;
 
         if (!Initialized) {
-          Value *Undef = UndefValue::get(II->getType());
-          Updater.Initialize(II->getType(), "");
+          Value *Undef = UndefValue::get(I.getType());
+          Updater.Initialize(I.getType(), "");
           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
-          Updater.AddAvailableValue(BB, &*II);
+          Updater.AddAvailableValue(BB, &I);
           Initialized = true;
         }
         Updater.RewriteUseAfterInsertions(U);
@@ -914,13 +861,14 @@ void StructurizeCFG::rebuildSSA() {
     }
 }
 
-bool StructurizeCFG::hasOnlyUniformBranches(const Region *R) {
+static bool hasOnlyUniformBranches(const Region *R,
+                                   const DivergenceAnalysis &DA) {
   for (const BasicBlock *BB : R->blocks()) {
     const BranchInst *Br = dyn_cast<BranchInst>(BB->getTerminator());
     if (!Br || !Br->isConditional())
       continue;
 
-    if (!DA->isUniform(Br->getCondition()))
+    if (!DA.isUniform(Br->getCondition()))
       return false;
     DEBUG(dbgs() << "BB: " << BB->getName() << " has uniform terminator\n");
   }
@@ -933,9 +881,9 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
     return false;
 
   if (SkipUniformRegions) {
-    DA = &getAnalysis<DivergenceAnalysis>();
     // TODO: We could probably be smarter here with how we handle sub-regions.
-    if (hasOnlyUniformBranches(R)) {
+    auto &DA = getAnalysis<DivergenceAnalysis>();
+    if (hasOnlyUniformBranches(R, DA)) {
       DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R << '\n');
 
       // Mark all direct child block terminators as having been treated as
@@ -943,12 +891,11 @@ bool StructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
       // sub-regions are treated more cleverly, indirect children are not
       // marked as uniform.
       MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
-      Region::element_iterator E = R->element_end();
-      for (Region::element_iterator I = R->element_begin(); I != E; ++I) {
-        if (I->isSubRegion())
+      for (RegionNode *E : R->elements()) {
+        if (E->isSubRegion())
           continue;
 
-        if (Instruction *Term = I->getEntry()->getTerminator())
+        if (Instruction *Term = E->getEntry()->getTerminator())
           Term->setMetadata("structurizecfg.uniform", MD);
       }
 
diff --git a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index d5ff99750370..a6b9fee1d8ac 100644
--- a/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -236,7 +236,7 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls) {
       if (!CI || CI->isTailCall())
         continue;
 
-      bool IsNoTail = CI->isNoTailCall();
+      bool IsNoTail = CI->isNoTailCall() || CI->hasOperandBundles();
 
       if (!IsNoTail && CI->doesNotAccessMemory()) {
         // A call to a readnone function whose arguments are all things computed
@@ -347,7 +347,7 @@ static bool canMoveAboveCall(Instruction *I, CallInst *CI) {
   // return value of the call, it must only use things that are defined before
   // the call, or movable instructions between the call and the instruction
   // itself.
-  return std::find(I->op_begin(), I->op_end(), CI) == I->op_end();
+  return !is_contained(I->operands(), CI);
 }
 
 /// Return true if the specified value is the same when the return would exit
diff --git a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
index 7e50d4bb447e..df9d5da9e26e 100644
--- a/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ASanStackFrameLayout.cpp
@@ -12,7 +12,9 @@
 //===----------------------------------------------------------------------===//
 #include "llvm/Transforms/Utils/ASanStackFrameLayout.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -47,64 +49,102 @@ static size_t VarAndRedzoneSize(size_t Size, size_t Alignment) {
   return alignTo(Res, Alignment);
 }
 
-void
+ASanStackFrameLayout
 ComputeASanStackFrameLayout(SmallVectorImpl<ASanStackVariableDescription> &Vars,
-                            size_t Granularity, size_t MinHeaderSize,
-                            ASanStackFrameLayout *Layout) {
+                            size_t Granularity, size_t MinHeaderSize) {
   assert(Granularity >= 8 && Granularity <= 64 &&
          (Granularity & (Granularity - 1)) == 0);
   assert(MinHeaderSize >= 16 && (MinHeaderSize & (MinHeaderSize - 1)) == 0 &&
          MinHeaderSize >= Granularity);
-  size_t NumVars = Vars.size();
+  const size_t NumVars = Vars.size();
   assert(NumVars > 0);
   for (size_t i = 0; i < NumVars; i++)
     Vars[i].Alignment = std::max(Vars[i].Alignment, kMinAlignment);
 
   std::stable_sort(Vars.begin(), Vars.end(), CompareVars);
-  SmallString<2048> StackDescriptionStorage;
-  raw_svector_ostream StackDescription(StackDescriptionStorage);
-  StackDescription << NumVars;
-  Layout->FrameAlignment = std::max(Granularity, Vars[0].Alignment);
-  SmallVector<uint8_t, 64> &SB(Layout->ShadowBytes);
-  SB.clear();
+
+  ASanStackFrameLayout Layout;
+  Layout.Granularity = Granularity;
+  Layout.FrameAlignment = std::max(Granularity, Vars[0].Alignment);
   size_t Offset = std::max(std::max(MinHeaderSize, Granularity),
      Vars[0].Alignment);
   assert((Offset % Granularity) == 0);
-  SB.insert(SB.end(), Offset / Granularity, kAsanStackLeftRedzoneMagic);
   for (size_t i = 0; i < NumVars; i++) {
     bool IsLast = i == NumVars - 1;
     size_t Alignment = std::max(Granularity, Vars[i].Alignment);
     (void)Alignment;  // Used only in asserts.
     size_t Size = Vars[i].Size;
-    const char *Name = Vars[i].Name;
     assert((Alignment & (Alignment - 1)) == 0);
-    assert(Layout->FrameAlignment >= Alignment);
+    assert(Layout.FrameAlignment >= Alignment);
     assert((Offset % Alignment) == 0);
     assert(Size > 0);
-    StackDescription << " " << Offset << " " << Size << " " << strlen(Name)
-                     << " " << Name;
     size_t NextAlignment = IsLast ? Granularity
                    : std::max(Granularity, Vars[i + 1].Alignment);
-    size_t SizeWithRedzone = VarAndRedzoneSize(Vars[i].Size, NextAlignment);
-    SB.insert(SB.end(), Size / Granularity, 0);
-    if (Size % Granularity)
-      SB.insert(SB.end(), Size % Granularity);
-    SB.insert(SB.end(), (SizeWithRedzone - Size) / Granularity,
-        IsLast ? kAsanStackRightRedzoneMagic
-        : kAsanStackMidRedzoneMagic);
+    size_t SizeWithRedzone = VarAndRedzoneSize(Size, NextAlignment);
     Vars[i].Offset = Offset;
     Offset += SizeWithRedzone;
   }
   if (Offset % MinHeaderSize) {
-    size_t ExtraRedzone = MinHeaderSize - (Offset % MinHeaderSize);
-    SB.insert(SB.end(), ExtraRedzone / Granularity,
-              kAsanStackRightRedzoneMagic);
-    Offset += ExtraRedzone;
+    Offset += MinHeaderSize - (Offset % MinHeaderSize);
+  }
+  Layout.FrameSize = Offset;
+  assert((Layout.FrameSize % MinHeaderSize) == 0);
+  return Layout;
+}
+
+SmallString<64> ComputeASanStackFrameDescription(
+    const SmallVectorImpl<ASanStackVariableDescription> &Vars) {
+  SmallString<2048> StackDescriptionStorage;
+  raw_svector_ostream StackDescription(StackDescriptionStorage);
+  StackDescription << Vars.size();
+
+  for (const auto &Var : Vars) {
+    std::string Name = Var.Name;
+    if (Var.Line) {
+      Name += ":";
+      Name += to_string(Var.Line);
+    }
+    StackDescription << " " << Var.Offset << " " << Var.Size << " "
+                     << Name.size() << " " << Name;
   }
-  Layout->DescriptionString = StackDescription.str();
-  Layout->FrameSize = Offset;
-  assert((Layout->FrameSize % MinHeaderSize) == 0);
-  assert(Layout->FrameSize / Granularity == Layout->ShadowBytes.size());
+  return StackDescription.str();
+}
+
+SmallVector<uint8_t, 64>
+GetShadowBytes(const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+               const ASanStackFrameLayout &Layout) {
+  assert(Vars.size() > 0);
+  SmallVector<uint8_t, 64> SB;
+  SB.clear();
+  const size_t Granularity = Layout.Granularity;
+  SB.resize(Vars[0].Offset / Granularity, kAsanStackLeftRedzoneMagic);
+  for (const auto &Var : Vars) {
+    SB.resize(Var.Offset / Granularity, kAsanStackMidRedzoneMagic);
+
+    SB.resize(SB.size() + Var.Size / Granularity, 0);
+    if (Var.Size % Granularity)
+      SB.push_back(Var.Size % Granularity);
+  }
+  SB.resize(Layout.FrameSize / Granularity, kAsanStackRightRedzoneMagic);
+  return SB;
+}
+
+SmallVector<uint8_t, 64> GetShadowBytesAfterScope(
+    const SmallVectorImpl<ASanStackVariableDescription> &Vars,
+    const ASanStackFrameLayout &Layout) {
+  SmallVector<uint8_t, 64> SB = GetShadowBytes(Vars, Layout);
+  const size_t Granularity = Layout.Granularity;
+
+  for (const auto &Var : Vars) {
+    assert(Var.LifetimeSize <= Var.Size);
+    const size_t LifetimeShadowSize =
+        (Var.LifetimeSize + Granularity - 1) / Granularity;
+    const size_t Offset = Var.Offset / Granularity;
+    std::fill(SB.begin() + Offset, SB.begin() + Offset + LifetimeShadowSize,
+              kAsanStackUseAfterScopeMagic);
+  }
+
+  return SB;
 }
 
 } // llvm namespace
diff --git a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
index d034905b6572..2e95926c0b3f 100644
--- a/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/AddDiscriminators.cpp
@@ -57,12 +57,10 @@
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constants.h"
-#include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -159,20 +157,14 @@ static bool addDiscriminators(Function &F) {
   // If the function has debug information, but the user has disabled
   // discriminators, do nothing.
   // Simlarly, if the function has no debug info, do nothing.
-  // Finally, if this module is built with dwarf versions earlier than 4,
-  // do nothing (discriminator support is a DWARF 4 feature).
-  if (NoDiscriminators || !F.getSubprogram() ||
-      F.getParent()->getDwarfVersion() < 4)
+  if (NoDiscriminators || !F.getSubprogram())
     return false;
 
   bool Changed = false;
-  Module *M = F.getParent();
-  LLVMContext &Ctx = M->getContext();
-  DIBuilder Builder(*M, /*AllowUnresolved*/ false);
 
   typedef std::pair<StringRef, unsigned> Location;
-  typedef DenseMap<const BasicBlock *, Metadata *> BBScopeMap;
-  typedef DenseMap<Location, BBScopeMap> LocationBBMap;
+  typedef DenseSet<const BasicBlock *> BBSet;
+  typedef DenseMap<Location, BBSet> LocationBBMap;
   typedef DenseMap<Location, unsigned> LocationDiscriminatorMap;
   typedef DenseSet<Location> LocationSet;
 
@@ -184,32 +176,25 @@ static bool addDiscriminators(Function &F) {
   // discriminator for this instruction.
   for (BasicBlock &B : F) {
     for (auto &I : B.getInstList()) {
-      if (isa<DbgInfoIntrinsic>(&I))
+      if (isa<IntrinsicInst>(&I))
         continue;
       const DILocation *DIL = I.getDebugLoc();
       if (!DIL)
         continue;
       Location L = std::make_pair(DIL->getFilename(), DIL->getLine());
       auto &BBMap = LBM[L];
-      auto R = BBMap.insert(std::make_pair(&B, (Metadata *)nullptr));
+      auto R = BBMap.insert(&B);
       if (BBMap.size() == 1)
         continue;
-      bool InsertSuccess = R.second;
-      Metadata *&NewScope = R.first->second;
-      // If we could insert a different block in the same location, a
+      // If we could insert more than one block with the same line+file, a
       // discriminator is needed to distinguish both instructions.
-      if (InsertSuccess) {
-        auto *Scope = DIL->getScope();
-        auto *File =
-            Builder.createFile(DIL->getFilename(), Scope->getDirectory());
-        NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]);
-      }
-      I.setDebugLoc(DILocation::get(Ctx, DIL->getLine(), DIL->getColumn(),
-                                    NewScope, DIL->getInlinedAt()));
+      // Only the lowest 7 bits are used to represent a discriminator to fit
+      // it in 1 byte ULEB128 representation.
+      unsigned Discriminator = (R.second ? ++LDM[L] : LDM[L]) & 0x7f;
+      I.setDebugLoc(DIL->cloneWithDiscriminator(Discriminator));
       DEBUG(dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
-                   << DIL->getColumn() << ":"
-                   << dyn_cast<DILexicalBlockFile>(NewScope)->getDiscriminator()
-                   << I << "\n");
+                   << DIL->getColumn() << ":" << Discriminator << " " << I
+                   << "\n");
       Changed = true;
     }
   }
@@ -222,7 +207,7 @@ static bool addDiscriminators(Function &F) {
     LocationSet CallLocations;
     for (auto &I : B.getInstList()) {
       CallInst *Current = dyn_cast<CallInst>(&I);
-      if (!Current || isa<DbgInfoIntrinsic>(&I))
+      if (!Current || isa<IntrinsicInst>(&I))
         continue;
 
       DILocation *CurrentDIL = Current->getDebugLoc();
@@ -231,13 +216,8 @@ static bool addDiscriminators(Function &F) {
       Location L =
           std::make_pair(CurrentDIL->getFilename(), CurrentDIL->getLine());
       if (!CallLocations.insert(L).second) {
-        auto *Scope = CurrentDIL->getScope();
-        auto *File = Builder.createFile(CurrentDIL->getFilename(),
-                                        Scope->getDirectory());
-        auto *NewScope = Builder.createLexicalBlockFile(Scope, File, ++LDM[L]);
-        Current->setDebugLoc(DILocation::get(Ctx, CurrentDIL->getLine(),
-                                             CurrentDIL->getColumn(), NewScope,
-                                             CurrentDIL->getInlinedAt()));
+        Current->setDebugLoc(
+            CurrentDIL->cloneWithDiscriminator((++LDM[L]) & 0x7f));
         Changed = true;
       }
     }
@@ -249,7 +229,7 @@ bool AddDiscriminatorsLegacyPass::runOnFunction(Function &F) {
   return addDiscriminators(F);
 }
 PreservedAnalyses AddDiscriminatorsPass::run(Function &F,
-                                             AnalysisManager<Function> &AM) {
+                                             FunctionAnalysisManager &AM) {
   if (!addDiscriminators(F))
     return PreservedAnalyses::all();
 
diff --git a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 49b646a041f5..175cbd2ce0df 100644
--- a/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -15,7 +15,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
@@ -23,10 +23,10 @@
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 using namespace llvm;
 
@@ -72,6 +72,20 @@ FunctionPass *llvm::createBreakCriticalEdgesPass() {
   return new BreakCriticalEdges();
 }
 
+PreservedAnalyses BreakCriticalEdgesPass::run(Function &F,
+                                              FunctionAnalysisManager &AM) {
+  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+  auto *LI = AM.getCachedResult<LoopAnalysis>(F);
+  unsigned N = SplitAllCriticalEdges(F, CriticalEdgeSplittingOptions(DT, LI));
+  NumBroken += N;
+  if (N == 0)
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<LoopAnalysis>();
+  return PA;
+}
+
 //===----------------------------------------------------------------------===//
 //    Implementation of the external critical edge manipulation functions
 //===----------------------------------------------------------------------===//
diff --git a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
index f4260a9ff980..e61b04fbdd57 100644
--- a/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BuildLibCalls.cpp
@@ -250,6 +250,7 @@ bool llvm::inferLibFuncAttributes(Function &F, const TargetLibraryInfo &TLI) {
     Changed |= setDoesNotCapture(F, 2);
     return Changed;
   case LibFunc::memcpy:
+  case LibFunc::mempcpy:
   case LibFunc::memccpy:
   case LibFunc::memmove:
     Changed |= setDoesNotThrow(F);
diff --git a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 42287d3bb2e8..bc2cef26edcb 100644
--- a/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
 
@@ -82,13 +83,17 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
   Value *Dividend = I->getOperand(0);
   Value *Divisor = I->getOperand(1);
 
-  if (isa<ConstantInt>(Divisor) ||
-      (isa<ConstantInt>(Dividend) && isa<ConstantInt>(Divisor))) {
-    // Operations with immediate values should have
-    // been solved and replaced during compile time.
+  if (isa<ConstantInt>(Divisor)) {
+    // Division by a constant should have been been solved and replaced earlier
+    // in the pipeline.
     return false;
   }
 
+  // If the numerator is a constant, bail if it doesn't fit into BypassType.
+  if (ConstantInt *ConstDividend = dyn_cast<ConstantInt>(Dividend))
+    if (ConstDividend->getValue().getActiveBits() > BypassType->getBitWidth())
+      return false;
+
   // Basic Block is split before divide
   BasicBlock *MainBB = &*I->getParent();
   BasicBlock *SuccessorBB = MainBB->splitBasicBlock(I);
@@ -120,8 +125,7 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
                                                  BypassType);
 
   // udiv/urem because optimization only handles positive numbers
-  Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV,
-                                                      ShortDivisorV);
+  Value *ShortQuotientV = FastBuilder.CreateUDiv(ShortDividendV, ShortDivisorV);
   Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV,
                                                   ShortDivisorV);
   Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt,
@@ -151,7 +155,17 @@ static bool insertFastDiv(Instruction *I, IntegerType *BypassType,
   // Combine operands into a single value with OR for value testing below
   MainBB->getInstList().back().eraseFromParent();
   IRBuilder<> MainBuilder(MainBB, MainBB->end());
-  Value *OrV = MainBuilder.CreateOr(Dividend, Divisor);
+
+  // We should have bailed out above if the divisor is a constant, but the
+  // dividend may still be a constant.  Set OrV to our non-constant operands
+  // OR'ed together.
+  assert(!isa<ConstantInt>(Divisor));
+
+  Value *OrV;
+  if (!isa<ConstantInt>(Dividend))
+    OrV = MainBuilder.CreateOr(Dividend, Divisor);
+  else
+    OrV = Divisor;
 
   // BitMask is inverted to check if the operands are
   // larger than the bypass type
@@ -247,5 +261,12 @@ bool llvm::bypassSlowDivision(
     MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
   }
 
+  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
+  // create divrem machine instructions.  Now erase any unused divs / rems so we
+  // don't leave extra instructions sitting around.
+  for (auto &KV : DivCache)
+    for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
+      RecursivelyDeleteTriviallyDeadInstructions(Phi);
+
   return MadeChange;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
index 17e34c4ffa0f..7ebeb615d248 100644
--- a/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CloneModule.cpp
@@ -119,6 +119,11 @@ std::unique_ptr<Module> llvm::CloneModule(
     }
     if (I->hasInitializer())
       GV->setInitializer(MapValue(I->getInitializer(), VMap));
+
+    SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
+    I->getAllMetadata(MDs);
+    for (auto MD : MDs)
+      GV->addMetadata(MD.first, *MapMetadata(MD.second, VMap));
   }
 
   // Similarly, copy over function bodies now...
diff --git a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
index 3b15a0a3e60a..60ae3745c835 100644
--- a/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CmpInstAnalysis.cpp
@@ -18,29 +18,6 @@
 
 using namespace llvm;
 
-/// getICmpCode - Encode a icmp predicate into a three bit mask.  These bits
-/// are carefully arranged to allow folding of expressions such as:
-///
-///      (A < B) | (A > B) --> (A != B)
-///
-/// Note that this is only valid if the first and second predicates have the
-/// same sign. Is illegal to do: (A u< B) | (A s> B)
-///
-/// Three bits are used to represent the condition, as follows:
-///   0  A > B
-///   1  A == B
-///   2  A < B
-///
-/// <=>  Value  Definition
-/// 000     0   Always false
-/// 001     1   A >  B
-/// 010     2   A == B
-/// 011     3   A >= B
-/// 100     4   A <  B
-/// 101     5   A != B
-/// 110     6   A <= B
-/// 111     7   Always true
-///
 unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
   ICmpInst::Predicate Pred = InvertPred ? ICI->getInversePredicate()
                                         : ICI->getPredicate();
@@ -62,13 +39,6 @@ unsigned llvm::getICmpCode(const ICmpInst *ICI, bool InvertPred) {
   }
 }
 
-/// getICmpValue - This is the complement of getICmpCode, which turns an
-/// opcode and two operands into either a constant true or false, or the
-/// predicate for a new ICmp instruction. The sign is passed in to determine
-/// which kind of predicate to use in the new icmp instruction.
-/// Non-NULL return value will be a true or false constant.
-/// NULL return means a new ICmp is needed.  The predicate for which is
-/// output in NewICmpPred.
 Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
                           CmpInst::Predicate &NewICmpPred) {
   switch (Code) {
@@ -87,10 +57,52 @@ Value *llvm::getICmpValue(bool Sign, unsigned Code, Value *LHS, Value *RHS,
   return nullptr;
 }
 
-/// PredicatesFoldable - Return true if both predicates match sign or if at
-/// least one of them is an equality comparison (which is signless).
 bool llvm::PredicatesFoldable(ICmpInst::Predicate p1, ICmpInst::Predicate p2) {
   return (CmpInst::isSigned(p1) == CmpInst::isSigned(p2)) ||
          (CmpInst::isSigned(p1) && ICmpInst::isEquality(p2)) ||
          (CmpInst::isSigned(p2) && ICmpInst::isEquality(p1));
 }
+
+bool llvm::decomposeBitTestICmp(const ICmpInst *I, CmpInst::Predicate &Pred,
+                                Value *&X, Value *&Y, Value *&Z) {
+  ConstantInt *C = dyn_cast<ConstantInt>(I->getOperand(1));
+  if (!C)
+    return false;
+
+  switch (I->getPredicate()) {
+  default:
+    return false;
+  case ICmpInst::ICMP_SLT:
+    // X < 0 is equivalent to (X & SignBit) != 0.
+    if (!C->isZero())
+      return false;
+    Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  case ICmpInst::ICMP_SGT:
+    // X > -1 is equivalent to (X & SignBit) == 0.
+    if (!C->isAllOnesValue())
+      return false;
+    Y = ConstantInt::get(I->getContext(), APInt::getSignBit(C->getBitWidth()));
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case ICmpInst::ICMP_ULT:
+    // X <u 2^n is equivalent to (X & ~(2^n-1)) == 0.
+    if (!C->getValue().isPowerOf2())
+      return false;
+    Y = ConstantInt::get(I->getContext(), -C->getValue());
+    Pred = ICmpInst::ICMP_EQ;
+    break;
+  case ICmpInst::ICMP_UGT:
+    // X >u 2^n-1 is equivalent to (X & ~(2^n-1)) != 0.
+    if (!(C->getValue() + 1).isPowerOf2())
+      return false;
+    Y = ConstantInt::get(I->getContext(), ~C->getValue());
+    Pred = ICmpInst::ICMP_NE;
+    break;
+  }
+
+  X = I->getOperand(0);
+  Z = ConstantInt::getNullValue(C->getType());
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
index 9f2181f87cee..c514c9c9cd4a 100644
--- a/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CodeExtractor.cpp
@@ -17,6 +17,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/RegionInfo.h"
 #include "llvm/Analysis/RegionIterator.h"
@@ -26,9 +29,11 @@
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Pass.h"
+#include "llvm/Support/BlockFrequency.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/ErrorHandling.h"
@@ -49,7 +54,7 @@ AggregateArgsOpt("aggregate-extracted-args", cl::Hidden,
                  cl::desc("Aggregate arguments to code-extracted functions"));
 
 /// \brief Test whether a block is valid for extraction.
-static bool isBlockValidForExtraction(const BasicBlock &BB) {
+bool CodeExtractor::isBlockValidForExtraction(const BasicBlock &BB) {
   // Landing pads must be in the function where they were inserted for cleanup.
   if (BB.isEHPad())
     return false;
@@ -81,7 +86,7 @@ static SetVector<BasicBlock *> buildExtractionBlockSet(IteratorT BBBegin,
     if (!Result.insert(*BBBegin))
       llvm_unreachable("Repeated basic blocks in extraction input");
 
-    if (!isBlockValidForExtraction(**BBBegin)) {
+    if (!CodeExtractor::isBlockValidForExtraction(**BBBegin)) {
       Result.clear();
       return Result;
     }
@@ -119,23 +124,30 @@ buildExtractionBlockSet(const RegionNode &RN) {
   return buildExtractionBlockSet(R.block_begin(), R.block_end());
 }
 
-CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs)
-  : DT(nullptr), AggregateArgs(AggregateArgs||AggregateArgsOpt),
-    Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
+CodeExtractor::CodeExtractor(BasicBlock *BB, bool AggregateArgs,
+                             BlockFrequencyInfo *BFI,
+                             BranchProbabilityInfo *BPI)
+    : DT(nullptr), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+      BPI(BPI), Blocks(buildExtractionBlockSet(BB)), NumExitBlocks(~0U) {}
 
 CodeExtractor::CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT,
-                             bool AggregateArgs)
-  : DT(DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
-    Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
-
-CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs)
-  : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
-    Blocks(buildExtractionBlockSet(L.getBlocks())), NumExitBlocks(~0U) {}
+                             bool AggregateArgs, BlockFrequencyInfo *BFI,
+                             BranchProbabilityInfo *BPI)
+    : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+      BPI(BPI), Blocks(buildExtractionBlockSet(BBs)), NumExitBlocks(~0U) {}
+
+CodeExtractor::CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs,
+                             BlockFrequencyInfo *BFI,
+                             BranchProbabilityInfo *BPI)
+    : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+      BPI(BPI), Blocks(buildExtractionBlockSet(L.getBlocks())),
+      NumExitBlocks(~0U) {}
 
 CodeExtractor::CodeExtractor(DominatorTree &DT, const RegionNode &RN,
-                             bool AggregateArgs)
-  : DT(&DT), AggregateArgs(AggregateArgs||AggregateArgsOpt),
-    Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
+                             bool AggregateArgs, BlockFrequencyInfo *BFI,
+                             BranchProbabilityInfo *BPI)
+    : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI),
+      BPI(BPI), Blocks(buildExtractionBlockSet(RN)), NumExitBlocks(~0U) {}
 
 /// definedInRegion - Return true if the specified value is defined in the
 /// extracted region.
@@ -339,7 +351,22 @@ Function *CodeExtractor::constructFunction(const ValueSet &inputs,
   // If the old function is no-throw, so is the new one.
   if (oldFunction->doesNotThrow())
     newFunction->setDoesNotThrow();
-  
+
+  // Inherit the uwtable attribute if we need to.
+  if (oldFunction->hasUWTable())
+    newFunction->setHasUWTable();
+
+  // Inherit all of the target dependent attributes.
+  //  (e.g. If the extracted region contains a call to an x86.sse
+  //  instruction we need to make sure that the extracted region has the
+  //  "target-features" attribute allowing it to be lowered.
+  // FIXME: This should be changed to check to see if a specific
+  //           attribute can not be inherited.
+  AttributeSet OldFnAttrs = oldFunction->getAttributes().getFnAttributes();
+  AttrBuilder AB(OldFnAttrs, AttributeSet::FunctionIndex);
+  for (auto Attr : AB.td_attrs())
+    newFunction->addFnAttr(Attr.first, Attr.second);
+
   newFunction->getBasicBlockList().push_back(newRootNode);
 
   // Create an iterator to name all of the arguments we inserted.
@@ -672,6 +699,51 @@ void CodeExtractor::moveCodeToFunction(Function *newFunction) {
   }
 }
 
+void CodeExtractor::calculateNewCallTerminatorWeights(
+    BasicBlock *CodeReplacer,
+    DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
+    BranchProbabilityInfo *BPI) {
+  typedef BlockFrequencyInfoImplBase::Distribution Distribution;
+  typedef BlockFrequencyInfoImplBase::BlockNode BlockNode;
+
+  // Update the branch weights for the exit block.
+  TerminatorInst *TI = CodeReplacer->getTerminator();
+  SmallVector<unsigned, 8> BranchWeights(TI->getNumSuccessors(), 0);
+
+  // Block Frequency distribution with dummy node.
+  Distribution BranchDist;
+
+  // Add each of the frequencies of the successors.
+  for (unsigned i = 0, e = TI->getNumSuccessors(); i < e; ++i) {
+    BlockNode ExitNode(i);
+    uint64_t ExitFreq = ExitWeights[TI->getSuccessor(i)].getFrequency();
+    if (ExitFreq != 0)
+      BranchDist.addExit(ExitNode, ExitFreq);
+    else
+      BPI->setEdgeProbability(CodeReplacer, i, BranchProbability::getZero());
+  }
+
+  // Check for no total weight.
+  if (BranchDist.Total == 0)
+    return;
+
+  // Normalize the distribution so that they can fit in unsigned.
+  BranchDist.normalize();
+
+  // Create normalized branch weights and set the metadata.
+  for (unsigned I = 0, E = BranchDist.Weights.size(); I < E; ++I) {
+    const auto &Weight = BranchDist.Weights[I];
+
+    // Get the weight and update the current BFI.
+    BranchWeights[Weight.TargetNode.Index] = Weight.Amount;
+    BranchProbability BP(Weight.Amount, BranchDist.Total);
+    BPI->setEdgeProbability(CodeReplacer, Weight.TargetNode.Index, BP);
+  }
+  TI->setMetadata(
+      LLVMContext::MD_prof,
+      MDBuilder(TI->getContext()).createBranchWeights(BranchWeights));
+}
+
 Function *CodeExtractor::extractCodeRegion() {
   if (!isEligible())
     return nullptr;
@@ -682,6 +754,19 @@ Function *CodeExtractor::extractCodeRegion() {
   // block in the region.
   BasicBlock *header = *Blocks.begin();
 
+  // Calculate the entry frequency of the new function before we change the root
+  //   block.
+  BlockFrequency EntryFreq;
+  if (BFI) {
+    assert(BPI && "Both BPI and BFI are required to preserve profile info");
+    for (BasicBlock *Pred : predecessors(header)) {
+      if (Blocks.count(Pred))
+        continue;
+      EntryFreq +=
+          BFI->getBlockFreq(Pred) * BPI->getEdgeProbability(Pred, header);
+    }
+  }
+
   // If we have to split PHI nodes or the entry block, do so now.
   severSplitPHINodes(header);
 
@@ -705,12 +790,23 @@ Function *CodeExtractor::extractCodeRegion() {
   // Find inputs to, outputs from the code region.
   findInputsOutputs(inputs, outputs);
 
+  // Calculate the exit blocks for the extracted region and the total exit
+  //  weights for each of those blocks.
+  DenseMap<BasicBlock *, BlockFrequency> ExitWeights;
   SmallPtrSet<BasicBlock *, 1> ExitBlocks;
-  for (BasicBlock *Block : Blocks)
+  for (BasicBlock *Block : Blocks) {
     for (succ_iterator SI = succ_begin(Block), SE = succ_end(Block); SI != SE;
-         ++SI)
-      if (!Blocks.count(*SI))
+         ++SI) {
+      if (!Blocks.count(*SI)) {
+        // Update the branch weight for this successor.
+        if (BFI) {
+          BlockFrequency &BF = ExitWeights[*SI];
+          BF += BFI->getBlockFreq(Block) * BPI->getEdgeProbability(Block, *SI);
+        }
         ExitBlocks.insert(*SI);
+      }
+    }
+  }
   NumExitBlocks = ExitBlocks.size();
 
   // Construct new function based on inputs/outputs & add allocas for all defs.
@@ -719,10 +815,23 @@ Function *CodeExtractor::extractCodeRegion() {
                                             codeReplacer, oldFunction,
                                             oldFunction->getParent());
 
+  // Update the entry count of the function.
+  if (BFI) {
+    Optional<uint64_t> EntryCount =
+        BFI->getProfileCountFromFreq(EntryFreq.getFrequency());
+    if (EntryCount.hasValue())
+      newFunction->setEntryCount(EntryCount.getValue());
+    BFI->setBlockFreq(codeReplacer, EntryFreq.getFrequency());
+  }
+
   emitCallAndSwitchStatement(newFunction, codeReplacer, inputs, outputs);
 
   moveCodeToFunction(newFunction);
 
+  // Update the branch weights for the exit block.
+  if (BFI && NumExitBlocks > 1)
+    calculateNewCallTerminatorWeights(codeReplacer, ExitWeights, BPI);
+
   // Loop over all of the PHI nodes in the header block, and change any
   // references to the old incoming edge to be the new incoming edge.
   for (BasicBlock::iterator I = header->begin(); isa<PHINode>(I); ++I) {
diff --git a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
index b56ff684e8a8..6642a97a29c2 100644
--- a/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/CtorUtils.cpp
@@ -71,8 +71,8 @@ std::vector<Function *> parseGlobalCtors(GlobalVariable *GV) {
   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
   std::vector<Function *> Result;
   Result.reserve(CA->getNumOperands());
-  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
-    ConstantStruct *CS = cast<ConstantStruct>(*i);
+  for (auto &V : CA->operands()) {
+    ConstantStruct *CS = cast<ConstantStruct>(V);
     Result.push_back(dyn_cast<Function>(CS->getOperand(1)));
   }
   return Result;
@@ -94,10 +94,10 @@ GlobalVariable *findGlobalCtors(Module &M) {
     return GV;
   ConstantArray *CA = cast<ConstantArray>(GV->getInitializer());
 
-  for (User::op_iterator i = CA->op_begin(), e = CA->op_end(); i != e; ++i) {
-    if (isa<ConstantAggregateZero>(*i))
+  for (auto &V : CA->operands()) {
+    if (isa<ConstantAggregateZero>(V))
       continue;
-    ConstantStruct *CS = cast<ConstantStruct>(*i);
+    ConstantStruct *CS = cast<ConstantStruct>(V);
     if (isa<ConstantPointerNull>(CS->getOperand(1)))
       continue;
 
diff --git a/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
new file mode 100644
index 000000000000..8c2386554da5
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/EscapeEnumerator.cpp
@@ -0,0 +1,96 @@
+//===- EscapeEnumerator.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines a helper class that enumerates all possible exits from a function,
+// including exception handling.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/EscapeEnumerator.h"
+#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/Local.h"
+using namespace llvm;
+
+static Constant *getDefaultPersonalityFn(Module *M) {
+  LLVMContext &C = M->getContext();
+  Triple T(M->getTargetTriple());
+  EHPersonality Pers = getDefaultEHPersonality(T);
+  return M->getOrInsertFunction(getEHPersonalityName(Pers),
+                                FunctionType::get(Type::getInt32Ty(C), true));
+}
+
+IRBuilder<> *EscapeEnumerator::Next() {
+  if (Done)
+    return nullptr;
+
+  // Find all 'return', 'resume', and 'unwind' instructions.
+  while (StateBB != StateE) {
+    BasicBlock *CurBB = &*StateBB++;
+
+    // Branches and invokes do not escape, only unwind, resume, and return
+    // do.
+    TerminatorInst *TI = CurBB->getTerminator();
+    if (!isa<ReturnInst>(TI) && !isa<ResumeInst>(TI))
+      continue;
+
+    Builder.SetInsertPoint(TI);
+    return &Builder;
+  }
+
+  Done = true;
+
+  if (!HandleExceptions)
+    return nullptr;
+
+  if (F.doesNotThrow())
+    return nullptr;
+
+  // Find all 'call' instructions that may throw.
+  SmallVector<Instruction *, 16> Calls;
+  for (BasicBlock &BB : F)
+    for (Instruction &II : BB)
+      if (CallInst *CI = dyn_cast<CallInst>(&II))
+        if (!CI->doesNotThrow())
+          Calls.push_back(CI);
+
+  if (Calls.empty())
+    return nullptr;
+
+  // Create a cleanup block.
+  LLVMContext &C = F.getContext();
+  BasicBlock *CleanupBB = BasicBlock::Create(C, CleanupBBName, &F);
+  Type *ExnTy =
+      StructType::get(Type::getInt8PtrTy(C), Type::getInt32Ty(C), nullptr);
+  if (!F.hasPersonalityFn()) {
+    Constant *PersFn = getDefaultPersonalityFn(F.getParent());
+    F.setPersonalityFn(PersFn);
+  }
+
+  if (isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn()))) {
+    report_fatal_error("Funclet EH not supported");
+  }
+
+  LandingPadInst *LPad =
+      LandingPadInst::Create(ExnTy, 1, "cleanup.lpad", CleanupBB);
+  LPad->setCleanup(true);
+  ResumeInst *RI = ResumeInst::Create(LPad, CleanupBB);
+
+  // Transform the 'call' instructions into 'invoke's branching to the
+  // cleanup block. Go in reverse order to make prettier BB names.
+  SmallVector<Value *, 16> Args;
+  for (unsigned I = Calls.size(); I != 0;) {
+    CallInst *CI = cast<CallInst>(Calls[--I]);
+    changeToInvokeAndSplitBasicBlock(CI, CleanupBB);
+  }
+
+  Builder.SetInsertPoint(RI);
+  return &Builder;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
index cd130abf4519..4adf1754253d 100644
--- a/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Evaluator.cpp
@@ -203,9 +203,9 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
         return false;  // no volatile/atomic accesses.
       }
       Constant *Ptr = getVal(SI->getOperand(1));
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
+      if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
         DEBUG(dbgs() << "Folding constant ptr expression: " << *Ptr);
-        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+        Ptr = FoldedPtr;
         DEBUG(dbgs() << "; To: " << *Ptr << "\n");
       }
       if (!isSimpleEnoughPointerToCommit(Ptr)) {
@@ -249,8 +249,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
               Constant * const IdxList[] = {IdxZero, IdxZero};
 
               Ptr = ConstantExpr::getGetElementPtr(nullptr, Ptr, IdxList);
-              if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr))
-                Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+              if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI))
+                Ptr = FoldedPtr;
 
             // If we can't improve the situation by introspecting NewTy,
             // we have to give up.
@@ -324,8 +324,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
       }
 
       Constant *Ptr = getVal(LI->getOperand(0));
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ptr)) {
-        Ptr = ConstantFoldConstantExpression(CE, DL, TLI);
+      if (auto *FoldedPtr = ConstantFoldConstant(Ptr, DL, TLI)) {
+        Ptr = FoldedPtr;
         DEBUG(dbgs() << "Found a constant pointer expression, constant "
               "folding: " << *Ptr << "\n");
       }
@@ -512,8 +512,8 @@ bool Evaluator::EvaluateBlock(BasicBlock::iterator CurInst,
     }
 
     if (!CurInst->use_empty()) {
-      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(InstResult))
-        InstResult = ConstantFoldConstantExpression(CE, DL, TLI);
+      if (auto *FoldedInstResult = ConstantFoldConstant(InstResult, DL, TLI))
+        InstResult = FoldedInstResult;
 
       setVal(&*CurInst, InstResult);
     }
@@ -537,7 +537,7 @@ bool Evaluator::EvaluateFunction(Function *F, Constant *&RetVal,
                                  const SmallVectorImpl<Constant*> &ActualArgs) {
   // Check to see if this function is already executing (recursion).  If so,
   // bail out.  TODO: we might want to accept limited recursion.
-  if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
+  if (is_contained(CallStack, F))
     return false;
 
   CallStack.push_back(F);
diff --git a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
index 492ae9f69a65..7b96fbb11a14 100644
--- a/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp
@@ -463,19 +463,14 @@ bool FlattenCFGOpt::MergeIfRegion(BasicBlock *BB, IRBuilder<> &Builder) {
 }
 
 bool FlattenCFGOpt::run(BasicBlock *BB) {
-  bool Changed = false;
   assert(BB && BB->getParent() && "Block not embedded in function!");
   assert(BB->getTerminator() && "Degenerate basic block encountered!");
 
   IRBuilder<> Builder(BB);
 
-  if (FlattenParallelAndOr(BB, Builder))
+  if (FlattenParallelAndOr(BB, Builder) || MergeIfRegion(BB, Builder))
     return true;
-
-  if (MergeIfRegion(BB, Builder))
-    return true;
-
-  return Changed;
+  return false;
 }
 
 /// FlattenCFG - This function is used to flatten a CFG.  For
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
new file mode 100644
index 000000000000..81a7c4ceffab
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionComparator.cpp
@@ -0,0 +1,919 @@
+//===- FunctionComparator.h - Function Comparator -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the FunctionComparator and GlobalNumberState classes
+// which are used by the MergeFunctions pass for comparing functions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/FunctionComparator.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/IR/CallSite.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "functioncomparator"
+
+int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const {
+  if (L < R) return -1;
+  if (L > R) return 1;
+  return 0;
+}
+
+int FunctionComparator::cmpOrderings(AtomicOrdering L, AtomicOrdering R) const {
+  if ((int)L < (int)R) return -1;
+  if ((int)L > (int)R) return 1;
+  return 0;
+}
+
+int FunctionComparator::cmpAPInts(const APInt &L, const APInt &R) const {
+  if (int Res = cmpNumbers(L.getBitWidth(), R.getBitWidth()))
+    return Res;
+  if (L.ugt(R)) return 1;
+  if (R.ugt(L)) return -1;
+  return 0;
+}
+
+int FunctionComparator::cmpAPFloats(const APFloat &L, const APFloat &R) const {
+  // Floats are ordered first by semantics (i.e. float, double, half, etc.),
+  // then by value interpreted as a bitstring (aka APInt).
+  const fltSemantics &SL = L.getSemantics(), &SR = R.getSemantics();
+  if (int Res = cmpNumbers(APFloat::semanticsPrecision(SL),
+                           APFloat::semanticsPrecision(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMaxExponent(SL),
+                           APFloat::semanticsMaxExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsMinExponent(SL),
+                           APFloat::semanticsMinExponent(SR)))
+    return Res;
+  if (int Res = cmpNumbers(APFloat::semanticsSizeInBits(SL),
+                           APFloat::semanticsSizeInBits(SR)))
+    return Res;
+  return cmpAPInts(L.bitcastToAPInt(), R.bitcastToAPInt());
+}
+
+int FunctionComparator::cmpMem(StringRef L, StringRef R) const {
+  // Prevent heavy comparison, compare sizes first.
+  if (int Res = cmpNumbers(L.size(), R.size()))
+    return Res;
+
+  // Compare strings lexicographically only when it is necessary: only when
+  // strings are equal in size.
+  return L.compare(R);
+}
+
+int FunctionComparator::cmpAttrs(const AttributeSet L,
+                                 const AttributeSet R) const {
+  if (int Res = cmpNumbers(L.getNumSlots(), R.getNumSlots()))
+    return Res;
+
+  for (unsigned i = 0, e = L.getNumSlots(); i != e; ++i) {
+    AttributeSet::iterator LI = L.begin(i), LE = L.end(i), RI = R.begin(i),
+                           RE = R.end(i);
+    for (; LI != LE && RI != RE; ++LI, ++RI) {
+      Attribute LA = *LI;
+      Attribute RA = *RI;
+      if (LA < RA)
+        return -1;
+      if (RA < LA)
+        return 1;
+    }
+    if (LI != LE)
+      return 1;
+    if (RI != RE)
+      return -1;
+  }
+  return 0;
+}
+
+int FunctionComparator::cmpRangeMetadata(const MDNode *L,
+                                         const MDNode *R) const {
+  if (L == R)
+    return 0;
+  if (!L)
+    return -1;
+  if (!R)
+    return 1;
+  // Range metadata is a sequence of numbers. Make sure they are the same
+  // sequence.
+  // TODO: Note that as this is metadata, it is possible to drop and/or merge
+  // this data when considering functions to merge. Thus this comparison would
+  // return 0 (i.e. equivalent), but merging would become more complicated
+  // because the ranges would need to be unioned. It is not likely that
+  // functions differ ONLY in this metadata if they are actually the same
+  // function semantically.
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+  for (size_t I = 0; I < L->getNumOperands(); ++I) {
+    ConstantInt *LLow = mdconst::extract<ConstantInt>(L->getOperand(I));
+    ConstantInt *RLow = mdconst::extract<ConstantInt>(R->getOperand(I));
+    if (int Res = cmpAPInts(LLow->getValue(), RLow->getValue()))
+      return Res;
+  }
+  return 0;
+}
+
+int FunctionComparator::cmpOperandBundlesSchema(const Instruction *L,
+                                                const Instruction *R) const {
+  ImmutableCallSite LCS(L);
+  ImmutableCallSite RCS(R);
+
+  assert(LCS && RCS && "Must be calls or invokes!");
+  assert(LCS.isCall() == RCS.isCall() && "Can't compare otherwise!");
+
+  if (int Res =
+          cmpNumbers(LCS.getNumOperandBundles(), RCS.getNumOperandBundles()))
+    return Res;
+
+  for (unsigned i = 0, e = LCS.getNumOperandBundles(); i != e; ++i) {
+    auto OBL = LCS.getOperandBundleAt(i);
+    auto OBR = RCS.getOperandBundleAt(i);
+
+    if (int Res = OBL.getTagName().compare(OBR.getTagName()))
+      return Res;
+
+    if (int Res = cmpNumbers(OBL.Inputs.size(), OBR.Inputs.size()))
+      return Res;
+  }
+
+  return 0;
+}
+
+/// Constants comparison:
+/// 1. Check whether type of L constant could be losslessly bitcasted to R
+/// type.
+/// 2. Compare constant contents.
+/// For more details see declaration comments.
+int FunctionComparator::cmpConstants(const Constant *L,
+                                     const Constant *R) const {
+
+  Type *TyL = L->getType();
+  Type *TyR = R->getType();
+
+  // Check whether types are bitcastable. This part is just re-factored
+  // Type::canLosslesslyBitCastTo method, but instead of returning true/false,
+  // we also pack into result which type is "less" for us.
+  int TypesRes = cmpTypes(TyL, TyR);
+  if (TypesRes != 0) {
+    // Types are different, but check whether we can bitcast them.
+    if (!TyL->isFirstClassType()) {
+      if (TyR->isFirstClassType())
+        return -1;
+      // Neither TyL nor TyR are values of first class type. Return the result
+      // of comparing the types
+      return TypesRes;
+    }
+    if (!TyR->isFirstClassType()) {
+      if (TyL->isFirstClassType())
+        return 1;
+      return TypesRes;
+    }
+
+    // Vector -> Vector conversions are always lossless if the two vector types
+    // have the same size, otherwise not.
+    unsigned TyLWidth = 0;
+    unsigned TyRWidth = 0;
+
+    if (auto *VecTyL = dyn_cast<VectorType>(TyL))
+      TyLWidth = VecTyL->getBitWidth();
+    if (auto *VecTyR = dyn_cast<VectorType>(TyR))
+      TyRWidth = VecTyR->getBitWidth();
+
+    if (TyLWidth != TyRWidth)
+      return cmpNumbers(TyLWidth, TyRWidth);
+
+    // Zero bit-width means neither TyL nor TyR are vectors.
+    if (!TyLWidth) {
+      PointerType *PTyL = dyn_cast<PointerType>(TyL);
+      PointerType *PTyR = dyn_cast<PointerType>(TyR);
+      if (PTyL && PTyR) {
+        unsigned AddrSpaceL = PTyL->getAddressSpace();
+        unsigned AddrSpaceR = PTyR->getAddressSpace();
+        if (int Res = cmpNumbers(AddrSpaceL, AddrSpaceR))
+          return Res;
+      }
+      if (PTyL)
+        return 1;
+      if (PTyR)
+        return -1;
+
+      // TyL and TyR aren't vectors, nor pointers. We don't know how to
+      // bitcast them.
+      return TypesRes;
+    }
+  }
+
+  // OK, types are bitcastable, now check constant contents.
+
+  if (L->isNullValue() && R->isNullValue())
+    return TypesRes;
+  if (L->isNullValue() && !R->isNullValue())
+    return 1;
+  if (!L->isNullValue() && R->isNullValue())
+    return -1;
+
+  auto GlobalValueL = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(L));
+  auto GlobalValueR = const_cast<GlobalValue*>(dyn_cast<GlobalValue>(R));
+  if (GlobalValueL && GlobalValueR) {
+    return cmpGlobalValues(GlobalValueL, GlobalValueR);
+  }
+
+  if (int Res = cmpNumbers(L->getValueID(), R->getValueID()))
+    return Res;
+
+  if (const auto *SeqL = dyn_cast<ConstantDataSequential>(L)) {
+    const auto *SeqR = cast<ConstantDataSequential>(R);
+    // This handles ConstantDataArray and ConstantDataVector. Note that we
+    // compare the two raw data arrays, which might differ depending on the host
+    // endianness. This isn't a problem though, because the endiness of a module
+    // will affect the order of the constants, but this order is the same
+    // for a given input module and host platform.
+    return cmpMem(SeqL->getRawDataValues(), SeqR->getRawDataValues());
+  }
+
+  switch (L->getValueID()) {
+  case Value::UndefValueVal:
+  case Value::ConstantTokenNoneVal:
+    return TypesRes;
+  case Value::ConstantIntVal: {
+    const APInt &LInt = cast<ConstantInt>(L)->getValue();
+    const APInt &RInt = cast<ConstantInt>(R)->getValue();
+    return cmpAPInts(LInt, RInt);
+  }
+  case Value::ConstantFPVal: {
+    const APFloat &LAPF = cast<ConstantFP>(L)->getValueAPF();
+    const APFloat &RAPF = cast<ConstantFP>(R)->getValueAPF();
+    return cmpAPFloats(LAPF, RAPF);
+  }
+  case Value::ConstantArrayVal: {
+    const ConstantArray *LA = cast<ConstantArray>(L);
+    const ConstantArray *RA = cast<ConstantArray>(R);
+    uint64_t NumElementsL = cast<ArrayType>(TyL)->getNumElements();
+    uint64_t NumElementsR = cast<ArrayType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (uint64_t i = 0; i < NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LA->getOperand(i)),
+                                 cast<Constant>(RA->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantStructVal: {
+    const ConstantStruct *LS = cast<ConstantStruct>(L);
+    const ConstantStruct *RS = cast<ConstantStruct>(R);
+    unsigned NumElementsL = cast<StructType>(TyL)->getNumElements();
+    unsigned NumElementsR = cast<StructType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (unsigned i = 0; i != NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LS->getOperand(i)),
+                                 cast<Constant>(RS->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantVectorVal: {
+    const ConstantVector *LV = cast<ConstantVector>(L);
+    const ConstantVector *RV = cast<ConstantVector>(R);
+    unsigned NumElementsL = cast<VectorType>(TyL)->getNumElements();
+    unsigned NumElementsR = cast<VectorType>(TyR)->getNumElements();
+    if (int Res = cmpNumbers(NumElementsL, NumElementsR))
+      return Res;
+    for (uint64_t i = 0; i < NumElementsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LV->getOperand(i)),
+                                 cast<Constant>(RV->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::ConstantExprVal: {
+    const ConstantExpr *LE = cast<ConstantExpr>(L);
+    const ConstantExpr *RE = cast<ConstantExpr>(R);
+    unsigned NumOperandsL = LE->getNumOperands();
+    unsigned NumOperandsR = RE->getNumOperands();
+    if (int Res = cmpNumbers(NumOperandsL, NumOperandsR))
+      return Res;
+    for (unsigned i = 0; i < NumOperandsL; ++i) {
+      if (int Res = cmpConstants(cast<Constant>(LE->getOperand(i)),
+                                 cast<Constant>(RE->getOperand(i))))
+        return Res;
+    }
+    return 0;
+  }
+  case Value::BlockAddressVal: {
+    const BlockAddress *LBA = cast<BlockAddress>(L);
+    const BlockAddress *RBA = cast<BlockAddress>(R);
+    if (int Res = cmpValues(LBA->getFunction(), RBA->getFunction()))
+      return Res;
+    if (LBA->getFunction() == RBA->getFunction()) {
+      // They are BBs in the same function. Order by which comes first in the
+      // BB order of the function. This order is deterministic.
+      Function* F = LBA->getFunction();
+      BasicBlock *LBB = LBA->getBasicBlock();
+      BasicBlock *RBB = RBA->getBasicBlock();
+      if (LBB == RBB)
+        return 0;
+      for(BasicBlock &BB : F->getBasicBlockList()) {
+        if (&BB == LBB) {
+          assert(&BB != RBB);
+          return -1;
+        }
+        if (&BB == RBB)
+          return 1;
+      }
+      llvm_unreachable("Basic Block Address does not point to a basic block in "
+                       "its function.");
+      return -1;
+    } else {
+      // cmpValues said the functions are the same. So because they aren't
+      // literally the same pointer, they must respectively be the left and
+      // right functions.
+      assert(LBA->getFunction() == FnL && RBA->getFunction() == FnR);
+      // cmpValues will tell us if these are equivalent BasicBlocks, in the
+      // context of their respective functions.
+      return cmpValues(LBA->getBasicBlock(), RBA->getBasicBlock());
+    }
+  }
+  default: // Unknown constant, abort.
+    DEBUG(dbgs() << "Looking at valueID " << L->getValueID() << "\n");
+    llvm_unreachable("Constant ValueID not recognized.");
+    return -1;
+  }
+}
+
+int FunctionComparator::cmpGlobalValues(GlobalValue *L, GlobalValue *R) const {
+  uint64_t LNumber = GlobalNumbers->getNumber(L);
+  uint64_t RNumber = GlobalNumbers->getNumber(R);
+  return cmpNumbers(LNumber, RNumber);
+}
+
+/// cmpType - compares two types,
+/// defines total ordering among the types set.
+/// See method declaration comments for more details.
+int FunctionComparator::cmpTypes(Type *TyL, Type *TyR) const {
+  PointerType *PTyL = dyn_cast<PointerType>(TyL);
+  PointerType *PTyR = dyn_cast<PointerType>(TyR);
+
+  const DataLayout &DL = FnL->getParent()->getDataLayout();
+  if (PTyL && PTyL->getAddressSpace() == 0)
+    TyL = DL.getIntPtrType(TyL);
+  if (PTyR && PTyR->getAddressSpace() == 0)
+    TyR = DL.getIntPtrType(TyR);
+
+  if (TyL == TyR)
+    return 0;
+
+  if (int Res = cmpNumbers(TyL->getTypeID(), TyR->getTypeID()))
+    return Res;
+
+  switch (TyL->getTypeID()) {
+  default:
+    llvm_unreachable("Unknown type!");
+    // Fall through in Release mode.
+    LLVM_FALLTHROUGH;
+  case Type::IntegerTyID:
+    return cmpNumbers(cast<IntegerType>(TyL)->getBitWidth(),
+                      cast<IntegerType>(TyR)->getBitWidth());
+  // TyL == TyR would have returned true earlier, because types are uniqued.
+  case Type::VoidTyID:
+  case Type::FloatTyID:
+  case Type::DoubleTyID:
+  case Type::X86_FP80TyID:
+  case Type::FP128TyID:
+  case Type::PPC_FP128TyID:
+  case Type::LabelTyID:
+  case Type::MetadataTyID:
+  case Type::TokenTyID:
+    return 0;
+
+  case Type::PointerTyID: {
+    assert(PTyL && PTyR && "Both types must be pointers here.");
+    return cmpNumbers(PTyL->getAddressSpace(), PTyR->getAddressSpace());
+  }
+
+  case Type::StructTyID: {
+    StructType *STyL = cast<StructType>(TyL);
+    StructType *STyR = cast<StructType>(TyR);
+    if (STyL->getNumElements() != STyR->getNumElements())
+      return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+
+    if (STyL->isPacked() != STyR->isPacked())
+      return cmpNumbers(STyL->isPacked(), STyR->isPacked());
+
+    for (unsigned i = 0, e = STyL->getNumElements(); i != e; ++i) {
+      if (int Res = cmpTypes(STyL->getElementType(i), STyR->getElementType(i)))
+        return Res;
+    }
+    return 0;
+  }
+
+  case Type::FunctionTyID: {
+    FunctionType *FTyL = cast<FunctionType>(TyL);
+    FunctionType *FTyR = cast<FunctionType>(TyR);
+    if (FTyL->getNumParams() != FTyR->getNumParams())
+      return cmpNumbers(FTyL->getNumParams(), FTyR->getNumParams());
+
+    if (FTyL->isVarArg() != FTyR->isVarArg())
+      return cmpNumbers(FTyL->isVarArg(), FTyR->isVarArg());
+
+    if (int Res = cmpTypes(FTyL->getReturnType(), FTyR->getReturnType()))
+      return Res;
+
+    for (unsigned i = 0, e = FTyL->getNumParams(); i != e; ++i) {
+      if (int Res = cmpTypes(FTyL->getParamType(i), FTyR->getParamType(i)))
+        return Res;
+    }
+    return 0;
+  }
+
+  case Type::ArrayTyID:
+  case Type::VectorTyID: {
+    auto *STyL = cast<SequentialType>(TyL);
+    auto *STyR = cast<SequentialType>(TyR);
+    if (STyL->getNumElements() != STyR->getNumElements())
+      return cmpNumbers(STyL->getNumElements(), STyR->getNumElements());
+    return cmpTypes(STyL->getElementType(), STyR->getElementType());
+  }
+  }
+}
+
+// Determine whether the two operations are the same except that pointer-to-A
+// and pointer-to-B are equivalent. This should be kept in sync with
+// Instruction::isSameOperationAs.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpOperations(const Instruction *L,
+                                      const Instruction *R,
+                                      bool &needToCmpOperands) const {
+  needToCmpOperands = true;
+  if (int Res = cmpValues(L, R))
+    return Res;
+
+  // Differences from Instruction::isSameOperationAs:
+  //  * replace type comparison with calls to cmpTypes.
+  //  * we test for I->getRawSubclassOptionalData (nuw/nsw/tail) at the top.
+  //  * because of the above, we don't test for the tail bit on calls later on.
+  if (int Res = cmpNumbers(L->getOpcode(), R->getOpcode()))
+    return Res;
+
+  if (const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(L)) {
+    needToCmpOperands = false;
+    const GetElementPtrInst *GEPR = cast<GetElementPtrInst>(R);
+    if (int Res =
+            cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand()))
+      return Res;
+    return cmpGEPs(GEPL, GEPR);
+  }
+
+  if (int Res = cmpNumbers(L->getNumOperands(), R->getNumOperands()))
+    return Res;
+
+  if (int Res = cmpTypes(L->getType(), R->getType()))
+    return Res;
+
+  if (int Res = cmpNumbers(L->getRawSubclassOptionalData(),
+                           R->getRawSubclassOptionalData()))
+    return Res;
+
+  // We have two instructions of identical opcode and #operands.  Check to see
+  // if all operands are the same type
+  for (unsigned i = 0, e = L->getNumOperands(); i != e; ++i) {
+    if (int Res =
+            cmpTypes(L->getOperand(i)->getType(), R->getOperand(i)->getType()))
+      return Res;
+  }
+
+  // Check special state that is a part of some instructions.
+  if (const AllocaInst *AI = dyn_cast<AllocaInst>(L)) {
+    if (int Res = cmpTypes(AI->getAllocatedType(),
+                           cast<AllocaInst>(R)->getAllocatedType()))
+      return Res;
+    return cmpNumbers(AI->getAlignment(), cast<AllocaInst>(R)->getAlignment());
+  }
+  if (const LoadInst *LI = dyn_cast<LoadInst>(L)) {
+    if (int Res = cmpNumbers(LI->isVolatile(), cast<LoadInst>(R)->isVolatile()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getAlignment(), cast<LoadInst>(R)->getAlignment()))
+      return Res;
+    if (int Res =
+            cmpOrderings(LI->getOrdering(), cast<LoadInst>(R)->getOrdering()))
+      return Res;
+    if (int Res =
+            cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()))
+      return Res;
+    return cmpRangeMetadata(LI->getMetadata(LLVMContext::MD_range),
+        cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range));
+  }
+  if (const StoreInst *SI = dyn_cast<StoreInst>(L)) {
+    if (int Res =
+            cmpNumbers(SI->isVolatile(), cast<StoreInst>(R)->isVolatile()))
+      return Res;
+    if (int Res =
+            cmpNumbers(SI->getAlignment(), cast<StoreInst>(R)->getAlignment()))
+      return Res;
+    if (int Res =
+            cmpOrderings(SI->getOrdering(), cast<StoreInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(SI->getSynchScope(), cast<StoreInst>(R)->getSynchScope());
+  }
+  if (const CmpInst *CI = dyn_cast<CmpInst>(L))
+    return cmpNumbers(CI->getPredicate(), cast<CmpInst>(R)->getPredicate());
+  if (const CallInst *CI = dyn_cast<CallInst>(L)) {
+    if (int Res = cmpNumbers(CI->getCallingConv(),
+                             cast<CallInst>(R)->getCallingConv()))
+      return Res;
+    if (int Res =
+            cmpAttrs(CI->getAttributes(), cast<CallInst>(R)->getAttributes()))
+      return Res;
+    if (int Res = cmpOperandBundlesSchema(CI, R))
+      return Res;
+    return cmpRangeMetadata(
+        CI->getMetadata(LLVMContext::MD_range),
+        cast<CallInst>(R)->getMetadata(LLVMContext::MD_range));
+  }
+  if (const InvokeInst *II = dyn_cast<InvokeInst>(L)) {
+    if (int Res = cmpNumbers(II->getCallingConv(),
+                             cast<InvokeInst>(R)->getCallingConv()))
+      return Res;
+    if (int Res =
+            cmpAttrs(II->getAttributes(), cast<InvokeInst>(R)->getAttributes()))
+      return Res;
+    if (int Res = cmpOperandBundlesSchema(II, R))
+      return Res;
+    return cmpRangeMetadata(
+        II->getMetadata(LLVMContext::MD_range),
+        cast<InvokeInst>(R)->getMetadata(LLVMContext::MD_range));
+  }
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(L)) {
+    ArrayRef<unsigned> LIndices = IVI->getIndices();
+    ArrayRef<unsigned> RIndices = cast<InsertValueInst>(R)->getIndices();
+    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+      return Res;
+    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+        return Res;
+    }
+    return 0;
+  }
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(L)) {
+    ArrayRef<unsigned> LIndices = EVI->getIndices();
+    ArrayRef<unsigned> RIndices = cast<ExtractValueInst>(R)->getIndices();
+    if (int Res = cmpNumbers(LIndices.size(), RIndices.size()))
+      return Res;
+    for (size_t i = 0, e = LIndices.size(); i != e; ++i) {
+      if (int Res = cmpNumbers(LIndices[i], RIndices[i]))
+        return Res;
+    }
+  }
+  if (const FenceInst *FI = dyn_cast<FenceInst>(L)) {
+    if (int Res =
+            cmpOrderings(FI->getOrdering(), cast<FenceInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(FI->getSynchScope(), cast<FenceInst>(R)->getSynchScope());
+  }
+  if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(L)) {
+    if (int Res = cmpNumbers(CXI->isVolatile(),
+                             cast<AtomicCmpXchgInst>(R)->isVolatile()))
+      return Res;
+    if (int Res = cmpNumbers(CXI->isWeak(),
+                             cast<AtomicCmpXchgInst>(R)->isWeak()))
+      return Res;
+    if (int Res =
+            cmpOrderings(CXI->getSuccessOrdering(),
+                         cast<AtomicCmpXchgInst>(R)->getSuccessOrdering()))
+      return Res;
+    if (int Res =
+            cmpOrderings(CXI->getFailureOrdering(),
+                         cast<AtomicCmpXchgInst>(R)->getFailureOrdering()))
+      return Res;
+    return cmpNumbers(CXI->getSynchScope(),
+                      cast<AtomicCmpXchgInst>(R)->getSynchScope());
+  }
+  if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(L)) {
+    if (int Res = cmpNumbers(RMWI->getOperation(),
+                             cast<AtomicRMWInst>(R)->getOperation()))
+      return Res;
+    if (int Res = cmpNumbers(RMWI->isVolatile(),
+                             cast<AtomicRMWInst>(R)->isVolatile()))
+      return Res;
+    if (int Res = cmpOrderings(RMWI->getOrdering(),
+                             cast<AtomicRMWInst>(R)->getOrdering()))
+      return Res;
+    return cmpNumbers(RMWI->getSynchScope(),
+                      cast<AtomicRMWInst>(R)->getSynchScope());
+  }
+  if (const PHINode *PNL = dyn_cast<PHINode>(L)) {
+    const PHINode *PNR = cast<PHINode>(R);
+    // Ensure that in addition to the incoming values being identical
+    // (checked by the caller of this function), the incoming blocks
+    // are also identical.
+    for (unsigned i = 0, e = PNL->getNumIncomingValues(); i != e; ++i) {
+      if (int Res =
+              cmpValues(PNL->getIncomingBlock(i), PNR->getIncomingBlock(i)))
+        return Res;
+    }
+  }
+  return 0;
+}
+
+// Determine whether two GEP operations perform the same underlying arithmetic.
+// Read method declaration comments for more details.
+int FunctionComparator::cmpGEPs(const GEPOperator *GEPL,
+                                const GEPOperator *GEPR) const {
+
+  unsigned int ASL = GEPL->getPointerAddressSpace();
+  unsigned int ASR = GEPR->getPointerAddressSpace();
+
+  if (int Res = cmpNumbers(ASL, ASR))
+    return Res;
+
+  // When we have target data, we can reduce the GEP down to the value in bytes
+  // added to the address.
+  const DataLayout &DL = FnL->getParent()->getDataLayout();
+  unsigned BitWidth = DL.getPointerSizeInBits(ASL);
+  APInt OffsetL(BitWidth, 0), OffsetR(BitWidth, 0);
+  if (GEPL->accumulateConstantOffset(DL, OffsetL) &&
+      GEPR->accumulateConstantOffset(DL, OffsetR))
+    return cmpAPInts(OffsetL, OffsetR);
+  if (int Res = cmpTypes(GEPL->getSourceElementType(),
+                         GEPR->getSourceElementType()))
+    return Res;
+
+  if (int Res = cmpNumbers(GEPL->getNumOperands(), GEPR->getNumOperands()))
+    return Res;
+
+  for (unsigned i = 0, e = GEPL->getNumOperands(); i != e; ++i) {
+    if (int Res = cmpValues(GEPL->getOperand(i), GEPR->getOperand(i)))
+      return Res;
+  }
+
+  return 0;
+}
+
+int FunctionComparator::cmpInlineAsm(const InlineAsm *L,
+                                     const InlineAsm *R) const {
+  // InlineAsm's are uniqued. If they are the same pointer, obviously they are
+  // the same, otherwise compare the fields.
+  if (L == R)
+    return 0;
+  if (int Res = cmpTypes(L->getFunctionType(), R->getFunctionType()))
+    return Res;
+  if (int Res = cmpMem(L->getAsmString(), R->getAsmString()))
+    return Res;
+  if (int Res = cmpMem(L->getConstraintString(), R->getConstraintString()))
+    return Res;
+  if (int Res = cmpNumbers(L->hasSideEffects(), R->hasSideEffects()))
+    return Res;
+  if (int Res = cmpNumbers(L->isAlignStack(), R->isAlignStack()))
+    return Res;
+  if (int Res = cmpNumbers(L->getDialect(), R->getDialect()))
+    return Res;
+  llvm_unreachable("InlineAsm blocks were not uniqued.");
+  return 0;
+}
+
+/// Compare two values used by the two functions under pair-wise comparison. If
+/// this is the first time the values are seen, they're added to the mapping so
+/// that we will detect mismatches on next use.
+/// See comments in declaration for more details.
+int FunctionComparator::cmpValues(const Value *L, const Value *R) const {
+  // Catch self-reference case.
+  if (L == FnL) {
+    if (R == FnR)
+      return 0;
+    return -1;
+  }
+  if (R == FnR) {
+    if (L == FnL)
+      return 0;
+    return 1;
+  }
+
+  const Constant *ConstL = dyn_cast<Constant>(L);
+  const Constant *ConstR = dyn_cast<Constant>(R);
+  if (ConstL && ConstR) {
+    if (L == R)
+      return 0;
+    return cmpConstants(ConstL, ConstR);
+  }
+
+  if (ConstL)
+    return 1;
+  if (ConstR)
+    return -1;
+
+  const InlineAsm *InlineAsmL = dyn_cast<InlineAsm>(L);
+  const InlineAsm *InlineAsmR = dyn_cast<InlineAsm>(R);
+
+  if (InlineAsmL && InlineAsmR)
+    return cmpInlineAsm(InlineAsmL, InlineAsmR);
+  if (InlineAsmL)
+    return 1;
+  if (InlineAsmR)
+    return -1;
+
+  auto LeftSN = sn_mapL.insert(std::make_pair(L, sn_mapL.size())),
+       RightSN = sn_mapR.insert(std::make_pair(R, sn_mapR.size()));
+
+  return cmpNumbers(LeftSN.first->second, RightSN.first->second);
+}
+
+// Test whether two basic blocks have equivalent behaviour.
+int FunctionComparator::cmpBasicBlocks(const BasicBlock *BBL,
+                                       const BasicBlock *BBR) const {
+  BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end();
+  BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end();
+
+  do {
+    bool needToCmpOperands = true;
+    if (int Res = cmpOperations(&*InstL, &*InstR, needToCmpOperands))
+      return Res;
+    if (needToCmpOperands) {
+      assert(InstL->getNumOperands() == InstR->getNumOperands());
+
+      for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) {
+        Value *OpL = InstL->getOperand(i);
+        Value *OpR = InstR->getOperand(i);
+        if (int Res = cmpValues(OpL, OpR))
+          return Res;
+        // cmpValues should ensure this is true.
+        assert(cmpTypes(OpL->getType(), OpR->getType()) == 0);
+      }
+    }
+
+    ++InstL;
+    ++InstR;
+  } while (InstL != InstLE && InstR != InstRE);
+
+  if (InstL != InstLE && InstR == InstRE)
+    return 1;
+  if (InstL == InstLE && InstR != InstRE)
+    return -1;
+  return 0;
+}
+
+int FunctionComparator::compareSignature() const {
+  if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes()))
+    return Res;
+
+  if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC()))
+    return Res;
+
+  if (FnL->hasGC()) {
+    if (int Res = cmpMem(FnL->getGC(), FnR->getGC()))
+      return Res;
+  }
+
+  if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection()))
+    return Res;
+
+  if (FnL->hasSection()) {
+    if (int Res = cmpMem(FnL->getSection(), FnR->getSection()))
+      return Res;
+  }
+
+  if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg()))
+    return Res;
+
+  // TODO: if it's internal and only used in direct calls, we could handle this
+  // case too.
+  if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv()))
+    return Res;
+
+  if (int Res = cmpTypes(FnL->getFunctionType(), FnR->getFunctionType()))
+    return Res;
+
+  assert(FnL->arg_size() == FnR->arg_size() &&
+         "Identically typed functions have different numbers of args!");
+
+  // Visit the arguments so that they get enumerated in the order they're
+  // passed in.
+  for (Function::const_arg_iterator ArgLI = FnL->arg_begin(),
+       ArgRI = FnR->arg_begin(),
+       ArgLE = FnL->arg_end();
+       ArgLI != ArgLE; ++ArgLI, ++ArgRI) {
+    if (cmpValues(&*ArgLI, &*ArgRI) != 0)
+      llvm_unreachable("Arguments repeat!");
+  }
+  return 0;
+}
+
+// Test whether the two functions have equivalent behaviour.
+int FunctionComparator::compare() {
+  beginCompare();
+
+  if (int Res = compareSignature())
+    return Res;
+
+  // We do a CFG-ordered walk since the actual ordering of the blocks in the
+  // linked list is immaterial. Our walk starts at the entry block for both
+  // functions, then takes each block from each terminator in order. As an
+  // artifact, this also means that unreachable blocks are ignored.
+  SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs;
+  SmallPtrSet<const BasicBlock *, 32> VisitedBBs; // in terms of F1.
+
+  FnLBBs.push_back(&FnL->getEntryBlock());
+  FnRBBs.push_back(&FnR->getEntryBlock());
+
+  VisitedBBs.insert(FnLBBs[0]);
+  while (!FnLBBs.empty()) {
+    const BasicBlock *BBL = FnLBBs.pop_back_val();
+    const BasicBlock *BBR = FnRBBs.pop_back_val();
+
+    if (int Res = cmpValues(BBL, BBR))
+      return Res;
+
+    if (int Res = cmpBasicBlocks(BBL, BBR))
+      return Res;
+
+    const TerminatorInst *TermL = BBL->getTerminator();
+    const TerminatorInst *TermR = BBR->getTerminator();
+
+    assert(TermL->getNumSuccessors() == TermR->getNumSuccessors());
+    for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(TermL->getSuccessor(i)).second)
+        continue;
+
+      FnLBBs.push_back(TermL->getSuccessor(i));
+      FnRBBs.push_back(TermR->getSuccessor(i));
+    }
+  }
+  return 0;
+}
+
+namespace {
+
+// Accumulate the hash of a sequence of 64-bit integers. This is similar to a
+// hash of a sequence of 64bit ints, but the entire input does not need to be
+// available at once. This interface is necessary for functionHash because it
+// needs to accumulate the hash as the structure of the function is traversed
+// without saving these values to an intermediate buffer. This form of hashing
+// is not often needed, as usually the object to hash is just read from a
+// buffer.
+class HashAccumulator64 {
+  uint64_t Hash;
+public:
+  // Initialize to random constant, so the state isn't zero.
+  HashAccumulator64() { Hash = 0x6acaa36bef8325c5ULL; }
+  void add(uint64_t V) {
+     Hash = llvm::hashing::detail::hash_16_bytes(Hash, V);
+  }
+  // No finishing is required, because the entire hash value is used.
+  uint64_t getHash() { return Hash; }
+};
+} // end anonymous namespace
+
+// A function hash is calculated by considering only the number of arguments and
+// whether a function is varargs, the order of basic blocks (given by the
+// successors of each basic block in depth first order), and the order of
+// opcodes of each instruction within each of these basic blocks. This mirrors
+// the strategy compare() uses to compare functions by walking the BBs in depth
+// first order and comparing each instruction in sequence. Because this hash
+// does not look at the operands, it is insensitive to things such as the
+// target of calls and the constants used in the function, which makes it useful
+// when possibly merging functions which are the same modulo constants and call
+// targets.
+FunctionComparator::FunctionHash FunctionComparator::functionHash(Function &F) {
+  HashAccumulator64 H;
+  H.add(F.isVarArg());
+  H.add(F.arg_size());
+
+  SmallVector<const BasicBlock *, 8> BBs;
+  SmallSet<const BasicBlock *, 16> VisitedBBs;
+
+  // Walk the blocks in the same order as FunctionComparator::cmpBasicBlocks(),
+  // accumulating the hash of the function "structure." (BB and opcode sequence)
+  BBs.push_back(&F.getEntryBlock());
+  VisitedBBs.insert(BBs[0]);
+  while (!BBs.empty()) {
+    const BasicBlock *BB = BBs.pop_back_val();
+    // This random value acts as a block header, as otherwise the partition of
+    // opcodes into BBs wouldn't affect the hash, only the order of the opcodes
+    H.add(45798);
+    for (auto &Inst : *BB) {
+      H.add(Inst.getOpcode());
+    }
+    const TerminatorInst *Term = BB->getTerminator();
+    for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+      if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+        continue;
+      BBs.push_back(Term->getSuccessor(i));
+    }
+  }
+  return H.getHash();
+}
+
+
diff --git a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
index fcb25baf3216..440e36767edf 100644
--- a/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/FunctionImportUtils.cpp
@@ -48,7 +48,7 @@ bool FunctionImportGlobalProcessing::doImportAsDefinition(
                                                               GlobalsToImport);
 }
 
-bool FunctionImportGlobalProcessing::doPromoteLocalToGlobal(
+bool FunctionImportGlobalProcessing::shouldPromoteLocalToGlobal(
     const GlobalValue *SGV) {
   assert(SGV->hasLocalLinkage());
   // Both the imported references and the original local variable must
@@ -56,36 +56,42 @@ bool FunctionImportGlobalProcessing::doPromoteLocalToGlobal(
   if (!isPerformingImport() && !isModuleExporting())
     return false;
 
-  // Local const variables never need to be promoted unless they are address
-  // taken. The imported uses can simply use the clone created in this module.
-  // For now we are conservative in determining which variables are not
-  // address taken by checking the unnamed addr flag. To be more aggressive,
-  // the address taken information must be checked earlier during parsing
-  // of the module and recorded in the summary index for use when importing
-  // from that module.
-  auto *GVar = dyn_cast<GlobalVariable>(SGV);
-  if (GVar && GVar->isConstant() && GVar->hasGlobalUnnamedAddr())
-    return false;
+  // If we are exporting, we need to see whether this value is marked
+  // as NoRename in the summary. If we are importing, we may not have
+  // a summary in the distributed backend case (only summaries for values
+  // importes as defs, not references, are included in the index passed
+  // to the distributed backends).
+  if (isPerformingImport()) {
+    // We don't know for sure yet if we are importing this value (as either
+    // a reference or a def), since we are simply walking all values in the
+    // module. But by necessity if we end up importing it and it is local,
+    // it must be promoted, so unconditionally promote all values in the
+    // importing module.
+    return true;
+  }
 
-  if (GVar && GVar->hasSection())
-    // Some sections like "__DATA,__cfstring" are "magic" and promotion is not
-    // allowed. Just disable promotion on any GVar with sections right now.
-    return false;
+  // When exporting, consult the index.
+  auto Summaries = ImportIndex.findGlobalValueSummaryList(SGV->getGUID());
+  assert(Summaries != ImportIndex.end() &&
+         "Missing summary for global value when exporting");
+  assert(Summaries->second.size() == 1 && "Local has more than one summary");
+  auto Linkage = Summaries->second.front()->linkage();
+  if (!GlobalValue::isLocalLinkage(Linkage)) {
+    assert(!Summaries->second.front()->noRename());
+    return true;
+  }
 
-  // Eventually we only need to promote functions in the exporting module that
-  // are referenced by a potentially exported function (i.e. one that is in the
-  // summary index).
-  return true;
+  return false;
 }
 
-std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV) {
+std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV,
+                                                    bool DoPromote) {
   // For locals that must be promoted to global scope, ensure that
   // the promoted name uniquely identifies the copy in the original module,
   // using the ID assigned during combined index creation. When importing,
   // we rename all locals (not just those that are promoted) in order to
   // avoid naming conflicts between locals imported from different modules.
-  if (SGV->hasLocalLinkage() &&
-      (doPromoteLocalToGlobal(SGV) || isPerformingImport()))
+  if (SGV->hasLocalLinkage() && (DoPromote || isPerformingImport()))
     return ModuleSummaryIndex::getGlobalNameForLocal(
         SGV->getName(),
         ImportIndex.getModuleHash(SGV->getParent()->getModuleIdentifier()));
@@ -93,13 +99,14 @@ std::string FunctionImportGlobalProcessing::getName(const GlobalValue *SGV) {
 }
 
 GlobalValue::LinkageTypes
-FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV) {
+FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV,
+                                           bool DoPromote) {
   // Any local variable that is referenced by an exported function needs
   // to be promoted to global scope. Since we don't currently know which
   // functions reference which local variables/functions, we must treat
   // all as potentially exported if this module is exporting anything.
   if (isModuleExporting()) {
-    if (SGV->hasLocalLinkage() && doPromoteLocalToGlobal(SGV))
+    if (SGV->hasLocalLinkage() && DoPromote)
       return GlobalValue::ExternalLinkage;
     return SGV->getLinkage();
   }
@@ -164,7 +171,7 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV) {
   case GlobalValue::PrivateLinkage:
     // If we are promoting the local to global scope, it is handled
     // similarly to a normal externally visible global.
-    if (doPromoteLocalToGlobal(SGV)) {
+    if (DoPromote) {
       if (doImportAsDefinition(SGV) && !dyn_cast<GlobalAlias>(SGV))
         return GlobalValue::AvailableExternallyLinkage;
       else
@@ -190,14 +197,19 @@ FunctionImportGlobalProcessing::getLinkage(const GlobalValue *SGV) {
 }
 
 void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
+  bool DoPromote = false;
   if (GV.hasLocalLinkage() &&
-      (doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
-    GV.setName(getName(&GV));
-    GV.setLinkage(getLinkage(&GV));
+      ((DoPromote = shouldPromoteLocalToGlobal(&GV)) || isPerformingImport())) {
+    // Once we change the name or linkage it is difficult to determine
+    // again whether we should promote since shouldPromoteLocalToGlobal needs
+    // to locate the summary (based on GUID from name and linkage). Therefore,
+    // use DoPromote result saved above.
+    GV.setName(getName(&GV, DoPromote));
+    GV.setLinkage(getLinkage(&GV, DoPromote));
     if (!GV.hasLocalLinkage())
       GV.setVisibility(GlobalValue::HiddenVisibility);
   } else
-    GV.setLinkage(getLinkage(&GV));
+    GV.setLinkage(getLinkage(&GV, /* DoPromote */ false));
 
   // Remove functions imported as available externally defs from comdats,
   // as this is a declaration for the linker, and will be dropped eventually.
@@ -214,14 +226,6 @@ void FunctionImportGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
 }
 
 void FunctionImportGlobalProcessing::processGlobalsForThinLTO() {
-  if (!moduleCanBeRenamedForThinLTO(M)) {
-    // We would have blocked importing from this module by suppressing index
-    // generation. We still may be able to import into this module though.
-    assert(!isPerformingImport() &&
-           "Should have blocked importing from module with local used in ASM");
-    return;
-  }
-
   for (GlobalVariable &GV : M.globals())
     processGlobalForThinLTO(GV);
   for (Function &SF : M)
diff --git a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
index 266be41fbead..74ebcda8355c 100644
--- a/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp
@@ -20,9 +20,8 @@ using namespace llvm;
 /// and release, then return AcquireRelease.
 ///
 static AtomicOrdering strongerOrdering(AtomicOrdering X, AtomicOrdering Y) {
-  if (X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release)
-    return AtomicOrdering::AcquireRelease;
-  if (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release)
+  if ((X == AtomicOrdering::Acquire && Y == AtomicOrdering::Release) ||
+      (Y == AtomicOrdering::Acquire && X == AtomicOrdering::Release))
     return AtomicOrdering::AcquireRelease;
   return (AtomicOrdering)std::max((unsigned)X, (unsigned)Y);
 }
@@ -35,7 +34,7 @@ bool llvm::isSafeToDestroyConstant(const Constant *C) {
   if (isa<GlobalValue>(C))
     return false;
 
-  if (isa<ConstantInt>(C) || isa<ConstantFP>(C))
+  if (isa<ConstantData>(C))
     return false;
 
   for (const User *U : C->users())
diff --git a/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
new file mode 100644
index 000000000000..ed018bb73107
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/ImportedFunctionsInliningStatistics.cpp
@@ -0,0 +1,203 @@
+//===-- ImportedFunctionsInliningStats.cpp ----------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// Generating inliner statistics for imported functions, mostly useful for
+// ThinLTO.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/ImportedFunctionsInliningStatistics.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <iomanip>
+#include <sstream>
+using namespace llvm;
+
+ImportedFunctionsInliningStatistics::InlineGraphNode &
+ImportedFunctionsInliningStatistics::createInlineGraphNode(const Function &F) {
+
+  auto &ValueLookup = NodesMap[F.getName()];
+  if (!ValueLookup) {
+    ValueLookup = llvm::make_unique<InlineGraphNode>();
+    ValueLookup->Imported = F.getMetadata("thinlto_src_module") != nullptr;
+  }
+  return *ValueLookup;
+}
+
+void ImportedFunctionsInliningStatistics::recordInline(const Function &Caller,
+                                                       const Function &Callee) {
+
+  InlineGraphNode &CallerNode = createInlineGraphNode(Caller);
+  InlineGraphNode &CalleeNode = createInlineGraphNode(Callee);
+  CalleeNode.NumberOfInlines++;
+
+  if (!CallerNode.Imported && !CalleeNode.Imported) {
+    // Direct inline from not imported callee to not imported caller, so we
+    // don't have to add this to graph. It might be very helpful if you wanna
+    // get the inliner statistics in compile step where there are no imported
+    // functions. In this case the graph would be empty.
+    CalleeNode.NumberOfRealInlines++;
+    return;
+  }
+
+  CallerNode.InlinedCallees.push_back(&CalleeNode);
+  if (!CallerNode.Imported) {
+    // We could avoid second lookup, but it would make the code ultra ugly.
+    auto It = NodesMap.find(Caller.getName());
+    assert(It != NodesMap.end() && "The node should be already there.");
+    // Save Caller as a starting node for traversal. The string has to be one
+    // from map because Caller can disappear (and function name with it).
+    NonImportedCallers.push_back(It->first());
+  }
+}
+
+void ImportedFunctionsInliningStatistics::setModuleInfo(const Module &M) {
+  ModuleName = M.getName();
+  for (const auto &F : M.functions()) {
+    AllFunctions++;
+    ImportedFunctions += int(F.getMetadata("thinlto_src_module") != nullptr);
+  }
+}
+static std::string getStatString(const char *Msg, int32_t Fraction, int32_t All,
+                                 const char *PercentageOfMsg,
+                                 bool LineEnd = true) {
+  double Result = 0;
+  if (All != 0)
+    Result = 100 * static_cast<double>(Fraction) / All;
+
+  std::stringstream Str;
+  Str << std::setprecision(4) << Msg << ": " << Fraction << " [" << Result
+      << "% of " << PercentageOfMsg << "]";
+  if (LineEnd)
+    Str << "\n";
+  return Str.str();
+}
+
+void ImportedFunctionsInliningStatistics::dump(const bool Verbose) {
+  calculateRealInlines();
+  NonImportedCallers.clear();
+
+  int32_t InlinedImportedFunctionsCount = 0;
+  int32_t InlinedNotImportedFunctionsCount = 0;
+
+  int32_t InlinedImportedFunctionsToImportingModuleCount = 0;
+  int32_t InlinedNotImportedFunctionsToImportingModuleCount = 0;
+
+  const auto SortedNodes = getSortedNodes();
+  std::string Out;
+  Out.reserve(5000);
+  raw_string_ostream Ostream(Out);
+
+  Ostream << "------- Dumping inliner stats for [" << ModuleName
+          << "] -------\n";
+
+  if (Verbose)
+    Ostream << "-- List of inlined functions:\n";
+
+  for (const auto &Node : SortedNodes) {
+    assert(Node->second->NumberOfInlines >= Node->second->NumberOfRealInlines);
+    if (Node->second->NumberOfInlines == 0)
+      continue;
+
+    if (Node->second->Imported) {
+      InlinedImportedFunctionsCount++;
+      InlinedImportedFunctionsToImportingModuleCount +=
+          int(Node->second->NumberOfRealInlines > 0);
+    } else {
+      InlinedNotImportedFunctionsCount++;
+      InlinedNotImportedFunctionsToImportingModuleCount +=
+          int(Node->second->NumberOfRealInlines > 0);
+    }
+
+    if (Verbose)
+      Ostream << "Inlined "
+              << (Node->second->Imported ? "imported " : "not imported ")
+              << "function [" << Node->first() << "]"
+              << ": #inlines = " << Node->second->NumberOfInlines
+              << ", #inlines_to_importing_module = "
+              << Node->second->NumberOfRealInlines << "\n";
+  }
+
+  auto InlinedFunctionsCount =
+      InlinedImportedFunctionsCount + InlinedNotImportedFunctionsCount;
+  auto NotImportedFuncCount = AllFunctions - ImportedFunctions;
+  auto ImportedNotInlinedIntoModule =
+      ImportedFunctions - InlinedImportedFunctionsToImportingModuleCount;
+
+  Ostream << "-- Summary:\n"
+          << "All functions: " << AllFunctions
+          << ", imported functions: " << ImportedFunctions << "\n"
+          << getStatString("inlined functions", InlinedFunctionsCount,
+                           AllFunctions, "all functions")
+          << getStatString("imported functions inlined anywhere",
+                           InlinedImportedFunctionsCount, ImportedFunctions,
+                           "imported functions")
+          << getStatString("imported functions inlined into importing module",
+                           InlinedImportedFunctionsToImportingModuleCount,
+                           ImportedFunctions, "imported functions",
+                           /*LineEnd=*/false)
+          << getStatString(", remaining", ImportedNotInlinedIntoModule,
+                           ImportedFunctions, "imported functions")
+          << getStatString("non-imported functions inlined anywhere",
+                           InlinedNotImportedFunctionsCount,
+                           NotImportedFuncCount, "non-imported functions")
+          << getStatString(
+                 "non-imported functions inlined into importing module",
+                 InlinedNotImportedFunctionsToImportingModuleCount,
+                 NotImportedFuncCount, "non-imported functions");
+  Ostream.flush();
+  dbgs() << Out;
+}
+
+void ImportedFunctionsInliningStatistics::calculateRealInlines() {
+  // Removing duplicated Callers.
+  std::sort(NonImportedCallers.begin(), NonImportedCallers.end());
+  NonImportedCallers.erase(
+      std::unique(NonImportedCallers.begin(), NonImportedCallers.end()),
+      NonImportedCallers.end());
+
+  for (const auto &Name : NonImportedCallers) {
+    auto &Node = *NodesMap[Name];
+    if (!Node.Visited)
+      dfs(Node);
+  }
+}
+
+void ImportedFunctionsInliningStatistics::dfs(InlineGraphNode &GraphNode) {
+  assert(!GraphNode.Visited);
+  GraphNode.Visited = true;
+  for (auto *const InlinedFunctionNode : GraphNode.InlinedCallees) {
+    InlinedFunctionNode->NumberOfRealInlines++;
+    if (!InlinedFunctionNode->Visited)
+      dfs(*InlinedFunctionNode);
+  }
+}
+
+ImportedFunctionsInliningStatistics::SortedNodesTy
+ImportedFunctionsInliningStatistics::getSortedNodes() {
+  SortedNodesTy SortedNodes;
+  SortedNodes.reserve(NodesMap.size());
+  for (const NodesMapTy::value_type& Node : NodesMap)
+    SortedNodes.push_back(&Node);
+
+  std::sort(
+      SortedNodes.begin(), SortedNodes.end(),
+      [&](const SortedNodesTy::value_type &Lhs,
+          const SortedNodesTy::value_type &Rhs) {
+        if (Lhs->second->NumberOfInlines != Rhs->second->NumberOfInlines)
+          return Lhs->second->NumberOfInlines > Rhs->second->NumberOfInlines;
+        if (Lhs->second->NumberOfRealInlines != Rhs->second->NumberOfRealInlines)
+          return Lhs->second->NumberOfRealInlines >
+                 Rhs->second->NumberOfRealInlines;
+        return Lhs->first() < Rhs->first();
+      });
+  return SortedNodes;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
index e82c07fd7b59..a2ceded106b4 100644
--- a/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -14,6 +14,7 @@
 
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
@@ -228,7 +229,7 @@ static Value *getUnwindDestTokenHelper(Instruction *EHPad,
             Instruction *ChildPad = cast<Instruction>(Child);
             auto Memo = MemoMap.find(ChildPad);
             if (Memo == MemoMap.end()) {
-              // Haven't figure out this child pad yet; queue it.
+              // Haven't figured out this child pad yet; queue it.
               Worklist.push_back(ChildPad);
               continue;
             }
@@ -366,6 +367,10 @@ static Value *getUnwindDestToken(Instruction *EHPad,
   // search up the chain to try to find a funclet with information.  Put
   // null entries in the memo map to avoid re-processing as we go up.
   MemoMap[EHPad] = nullptr;
+#ifndef NDEBUG
+  SmallPtrSet<Instruction *, 4> TempMemos;
+  TempMemos.insert(EHPad);
+#endif
   Instruction *LastUselessPad = EHPad;
   Value *AncestorToken;
   for (AncestorToken = getParentPad(EHPad);
@@ -374,6 +379,13 @@ static Value *getUnwindDestToken(Instruction *EHPad,
     // Skip over catchpads since they just follow their catchswitches.
     if (isa<CatchPadInst>(AncestorPad))
       continue;
+    // If the MemoMap had an entry mapping AncestorPad to nullptr, since we
+    // haven't yet called getUnwindDestTokenHelper for AncestorPad in this
+    // call to getUnwindDestToken, that would mean that AncestorPad had no
+    // information in itself, its descendants, or its ancestors.  If that
+    // were the case, then we should also have recorded the lack of information
+    // for the descendant that we're coming from.  So assert that we don't
+    // find a null entry in the MemoMap for AncestorPad.
     assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);
     auto AncestorMemo = MemoMap.find(AncestorPad);
     if (AncestorMemo == MemoMap.end()) {
@@ -384,25 +396,85 @@ static Value *getUnwindDestToken(Instruction *EHPad,
     if (UnwindDestToken)
       break;
     LastUselessPad = AncestorPad;
+    MemoMap[LastUselessPad] = nullptr;
+#ifndef NDEBUG
+    TempMemos.insert(LastUselessPad);
+#endif
   }
 
-  // Since the whole tree under LastUselessPad has no information, it all must
-  // match UnwindDestToken; record that to avoid repeating the search.
+  // We know that getUnwindDestTokenHelper was called on LastUselessPad and
+  // returned nullptr (and likewise for EHPad and any of its ancestors up to
+  // LastUselessPad), so LastUselessPad has no information from below.  Since
+  // getUnwindDestTokenHelper must investigate all downward paths through
+  // no-information nodes to prove that a node has no information like this,
+  // and since any time it finds information it records it in the MemoMap for
+  // not just the immediately-containing funclet but also any ancestors also
+  // exited, it must be the case that, walking downward from LastUselessPad,
+  // visiting just those nodes which have not been mapped to an unwind dest
+  // by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since
+  // they are just used to keep getUnwindDestTokenHelper from repeating work),
+  // any node visited must have been exhaustively searched with no information
+  // for it found.
   SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);
   while (!Worklist.empty()) {
     Instruction *UselessPad = Worklist.pop_back_val();
-    assert(!MemoMap.count(UselessPad) || MemoMap[UselessPad] == nullptr);
+    auto Memo = MemoMap.find(UselessPad);
+    if (Memo != MemoMap.end() && Memo->second) {
+      // Here the name 'UselessPad' is a bit of a misnomer, because we've found
+      // that it is a funclet that does have information about unwinding to
+      // a particular destination; its parent was a useless pad.
+      // Since its parent has no information, the unwind edge must not escape
+      // the parent, and must target a sibling of this pad.  This local unwind
+      // gives us no information about EHPad.  Leave it and the subtree rooted
+      // at it alone.
+      assert(getParentPad(Memo->second) == getParentPad(UselessPad));
+      continue;
+    }
+    // We know we don't have information for UselesPad.  If it has an entry in
+    // the MemoMap (mapping it to nullptr), it must be one of the TempMemos
+    // added on this invocation of getUnwindDestToken; if a previous invocation
+    // recorded nullptr, it would have had to prove that the ancestors of
+    // UselessPad, which include LastUselessPad, had no information, and that
+    // in turn would have required proving that the descendants of
+    // LastUselesPad, which include EHPad, have no information about
+    // LastUselessPad, which would imply that EHPad was mapped to nullptr in
+    // the MemoMap on that invocation, which isn't the case if we got here.
+    assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));
+    // Assert as we enumerate users that 'UselessPad' doesn't have any unwind
+    // information that we'd be contradicting by making a map entry for it
+    // (which is something that getUnwindDestTokenHelper must have proved for
+    // us to get here).  Just assert on is direct users here; the checks in
+    // this downward walk at its descendants will verify that they don't have
+    // any unwind edges that exit 'UselessPad' either (i.e. they either have no
+    // unwind edges or unwind to a sibling).
     MemoMap[UselessPad] = UnwindDestToken;
     if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {
-      for (BasicBlock *HandlerBlock : CatchSwitch->handlers())
-        for (User *U : HandlerBlock->getFirstNonPHI()->users())
+      assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");
+      for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {
+        auto *CatchPad = HandlerBlock->getFirstNonPHI();
+        for (User *U : CatchPad->users()) {
+          assert(
+              (!isa<InvokeInst>(U) ||
+               (getParentPad(
+                    cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
+                CatchPad)) &&
+              "Expected useless pad");
           if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
             Worklist.push_back(cast<Instruction>(U));
+        }
+      }
     } else {
       assert(isa<CleanupPadInst>(UselessPad));
-      for (User *U : UselessPad->users())
+      for (User *U : UselessPad->users()) {
+        assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");
+        assert((!isa<InvokeInst>(U) ||
+                (getParentPad(
+                     cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==
+                 UselessPad)) &&
+               "Expected useless pad");
         if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))
           Worklist.push_back(cast<Instruction>(U));
+      }
     }
   }
 
@@ -463,37 +535,7 @@ static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(
 #endif // NDEBUG
     }
 
-    // Convert this function call into an invoke instruction.  First, split the
-    // basic block.
-    BasicBlock *Split =
-        BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
-
-    // Delete the unconditional branch inserted by splitBasicBlock
-    BB->getInstList().pop_back();
-
-    // Create the new invoke instruction.
-    SmallVector<Value*, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
-    SmallVector<OperandBundleDef, 1> OpBundles;
-
-    CI->getOperandBundlesAsDefs(OpBundles);
-
-    // Note: we're round tripping operand bundles through memory here, and that
-    // can potentially be avoided with a cleverer API design that we do not have
-    // as of this time.
-
-    InvokeInst *II =
-        InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge, InvokeArgs,
-                           OpBundles, CI->getName(), BB);
-    II->setDebugLoc(CI->getDebugLoc());
-    II->setCallingConv(CI->getCallingConv());
-    II->setAttributes(CI->getAttributes());
-    
-    // Make sure that anything using the call now uses the invoke!  This also
-    // updates the CallGraph if present, because it uses a WeakVH.
-    CI->replaceAllUsesWith(II);
-
-    // Delete the original call
-    Split->getInstList().pop_front();
+    changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);
     return BB;
   }
   return nullptr;
@@ -1053,8 +1095,10 @@ static void AddAliasScopeMetadata(CallSite CS, ValueToValueMapTy &VMap,
 /// If the inlined function has non-byval align arguments, then
 /// add @llvm.assume-based alignment assumptions to preserve this information.
 static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
-  if (!PreserveAlignmentAssumptions)
+  if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)
     return;
+
+  AssumptionCache *AC = &(*IFI.GetAssumptionCache)(*CS.getCaller());
   auto &DL = CS.getCaller()->getParent()->getDataLayout();
 
   // To avoid inserting redundant assumptions, we should check for assumptions
@@ -1077,13 +1121,12 @@ static void AddAlignmentAssumptions(CallSite CS, InlineFunctionInfo &IFI) {
       // If we can already prove the asserted alignment in the context of the
       // caller, then don't bother inserting the assumption.
       Value *Arg = CS.getArgument(I->getArgNo());
-      if (getKnownAlignment(Arg, DL, CS.getInstruction(),
-                            &IFI.ACT->getAssumptionCache(*CS.getCaller()),
-                            &DT) >= Align)
+      if (getKnownAlignment(Arg, DL, CS.getInstruction(), AC, &DT) >= Align)
         continue;
 
-      IRBuilder<>(CS.getInstruction())
-          .CreateAlignmentAssumption(DL, Arg, Align);
+      CallInst *NewAssumption = IRBuilder<>(CS.getInstruction())
+                                    .CreateAlignmentAssumption(DL, Arg, Align);
+      AC->registerAssumption(NewAssumption);
     }
   }
 }
@@ -1194,12 +1237,13 @@ static Value *HandleByValArgument(Value *Arg, Instruction *TheCall,
     if (ByValAlignment <= 1)  // 0 = unspecified, 1 = no particular alignment.
       return Arg;
 
+    AssumptionCache *AC =
+        IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
     const DataLayout &DL = Caller->getParent()->getDataLayout();
 
     // If the pointer is already known to be sufficiently aligned, or if we can
     // round it up to a larger alignment, then we don't need a temporary.
-    if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall,
-                                   &IFI.ACT->getAssumptionCache(*Caller)) >=
+    if (getOrEnforceKnownAlignment(Arg, ByValAlignment, DL, TheCall, AC) >=
         ByValAlignment)
       return Arg;
     
@@ -1304,7 +1348,7 @@ static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {
 /// Update inlined instructions' line numbers to
 /// to encode location where these instructions are inlined.
 static void fixupLineNumbers(Function *Fn, Function::iterator FI,
-                             Instruction *TheCall) {
+                             Instruction *TheCall, bool CalleeHasDebugInfo) {
   const DebugLoc &TheCallDL = TheCall->getDebugLoc();
   if (!TheCallDL)
     return;
@@ -1326,22 +1370,26 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI,
   for (; FI != Fn->end(); ++FI) {
     for (BasicBlock::iterator BI = FI->begin(), BE = FI->end();
          BI != BE; ++BI) {
-      DebugLoc DL = BI->getDebugLoc();
-      if (!DL) {
-        // If the inlined instruction has no line number, make it look as if it
-        // originates from the call location. This is important for
-        // ((__always_inline__, __nodebug__)) functions which must use caller
-        // location for all instructions in their function body.
-
-        // Don't update static allocas, as they may get moved later.
-        if (auto *AI = dyn_cast<AllocaInst>(BI))
-          if (allocaWouldBeStaticInEntry(AI))
-            continue;
-
-        BI->setDebugLoc(TheCallDL);
-      } else {
-        BI->setDebugLoc(updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
+      if (DebugLoc DL = BI->getDebugLoc()) {
+        BI->setDebugLoc(
+            updateInlinedAtInfo(DL, InlinedAtNode, BI->getContext(), IANodes));
+        continue;
       }
+
+      if (CalleeHasDebugInfo)
+        continue;
+      
+      // If the inlined instruction has no line number, make it look as if it
+      // originates from the call location. This is important for
+      // ((__always_inline__, __nodebug__)) functions which must use caller
+      // location for all instructions in their function body.
+
+      // Don't update static allocas, as they may get moved later.
+      if (auto *AI = dyn_cast<AllocaInst>(BI))
+        if (allocaWouldBeStaticInEntry(AI))
+          continue;
+
+      BI->setDebugLoc(TheCallDL);
     }
   }
 }
@@ -1597,8 +1645,11 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     if (IFI.CG)
       UpdateCallGraphAfterInlining(CS, FirstNewBlock, VMap, IFI);
 
-    // Update inlined instructions' line number information.
-    fixupLineNumbers(Caller, FirstNewBlock, TheCall);
+    // For 'nodebug' functions, the associated DISubprogram is always null.
+    // Conservatively avoid propagating the callsite debug location to
+    // instructions inlined from a function whose DISubprogram is not null.
+    fixupLineNumbers(Caller, FirstNewBlock, TheCall,
+                     CalledFunc->getSubprogram() != nullptr);
 
     // Clone existing noalias metadata if necessary.
     CloneAliasScopeMetadata(CS, VMap);
@@ -1609,10 +1660,15 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     // Propagate llvm.mem.parallel_loop_access if necessary.
     PropagateParallelLoopAccessMetadata(CS, VMap);
 
-    // FIXME: We could register any cloned assumptions instead of clearing the
-    // whole function's cache.
-    if (IFI.ACT)
-      IFI.ACT->getAssumptionCache(*Caller).clear();
+    // Register any cloned assumptions.
+    if (IFI.GetAssumptionCache)
+      for (BasicBlock &NewBlock :
+           make_range(FirstNewBlock->getIterator(), Caller->end()))
+        for (Instruction &I : NewBlock) {
+          if (auto *II = dyn_cast<IntrinsicInst>(&I))
+            if (II->getIntrinsicID() == Intrinsic::assume)
+              (*IFI.GetAssumptionCache)(*Caller).registerAssumption(II);
+        }
   }
 
   // If there are any alloca instructions in the block that used to be the entry
@@ -1708,6 +1764,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     IRBuilder<> builder(&FirstNewBlock->front());
     for (unsigned ai = 0, ae = IFI.StaticAllocas.size(); ai != ae; ++ai) {
       AllocaInst *AI = IFI.StaticAllocas[ai];
+      // Don't mark swifterror allocas. They can't have bitcast uses.
+      if (AI->isSwiftError())
+        continue;
 
       // If the alloca is already scoped to something smaller than the whole
       // function then there's no need to add redundant, less accurate markers.
@@ -1949,6 +2008,20 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
     std::swap(Returns, NormalReturns);
   }
 
+  // Now that all of the transforms on the inlined code have taken place but
+  // before we splice the inlined code into the CFG and lose track of which
+  // blocks were actually inlined, collect the call sites. We only do this if
+  // call graph updates weren't requested, as those provide value handle based
+  // tracking of inlined call sites instead.
+  if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) {
+    // Otherwise just collect the raw call sites that were inlined.
+    for (BasicBlock &NewBB :
+         make_range(FirstNewBlock->getIterator(), Caller->end()))
+      for (Instruction &I : NewBB)
+        if (auto CS = CallSite(&I))
+          IFI.InlinedCallSites.push_back(CS);
+  }
+
   // If we cloned in _exactly one_ basic block, and if that block ends in a
   // return instruction, we splice the body of the inlined callee directly into
   // the calling basic block.
@@ -2130,9 +2203,10 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI,
   // the entries are the same or undef).  If so, remove the PHI so it doesn't
   // block other optimizations.
   if (PHI) {
+    AssumptionCache *AC =
+        IFI.GetAssumptionCache ? &(*IFI.GetAssumptionCache)(*Caller) : nullptr;
     auto &DL = Caller->getParent()->getDataLayout();
-    if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr,
-                                       &IFI.ACT->getAssumptionCache(*Caller))) {
+    if (Value *V = SimplifyInstruction(PHI, DL, nullptr, nullptr, AC)) {
       PHI->replaceAllUsesWith(V);
       PHI->eraseFromParent();
     }
diff --git a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
index 0d5a25b8ebc5..68c6b74d5e5b 100644
--- a/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -51,10 +51,19 @@ using namespace llvm;
 
 STATISTIC(NumLCSSA, "Number of live out of a loop variables");
 
+#ifdef EXPENSIVE_CHECKS
+static bool VerifyLoopLCSSA = true;
+#else
+static bool VerifyLoopLCSSA = false;
+#endif
+static cl::opt<bool,true>
+VerifyLoopLCSSAFlag("verify-loop-lcssa", cl::location(VerifyLoopLCSSA),
+                    cl::desc("Verify loop lcssa form (time consuming)"));
+
 /// Return true if the specified block is in the list.
 static bool isExitBlock(BasicBlock *BB,
                         const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
-  return find(ExitBlocks, BB) != ExitBlocks.end();
+  return is_contained(ExitBlocks, BB);
 }
 
 /// For every instruction from the worklist, check to see if it has any uses
@@ -63,19 +72,25 @@ static bool isExitBlock(BasicBlock *BB,
 bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
                                     DominatorTree &DT, LoopInfo &LI) {
   SmallVector<Use *, 16> UsesToRewrite;
-  SmallVector<BasicBlock *, 8> ExitBlocks;
   SmallSetVector<PHINode *, 16> PHIsToRemove;
   PredIteratorCache PredCache;
   bool Changed = false;
 
+  // Cache the Loop ExitBlocks across this loop.  We expect to get a lot of
+  // instructions within the same loops, computing the exit blocks is
+  // expensive, and we're not mutating the loop structure.
+  SmallDenseMap<Loop*, SmallVector<BasicBlock *,1>> LoopExitBlocks;
+
   while (!Worklist.empty()) {
     UsesToRewrite.clear();
-    ExitBlocks.clear();
 
     Instruction *I = Worklist.pop_back_val();
     BasicBlock *InstBB = I->getParent();
     Loop *L = LI.getLoopFor(InstBB);
-    L->getExitBlocks(ExitBlocks);
+    if (!LoopExitBlocks.count(L))   
+      L->getExitBlocks(LoopExitBlocks[L]);
+    assert(LoopExitBlocks.count(L));
+    const SmallVectorImpl<BasicBlock *> &ExitBlocks = LoopExitBlocks[L];
 
     if (ExitBlocks.empty())
       continue;
@@ -186,14 +201,14 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
 
       // Otherwise, do full PHI insertion.
       SSAUpdate.RewriteUse(*UseToRewrite);
+    }
 
-      // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
-      // to post-process them to keep LCSSA form.
-      for (PHINode *InsertedPN : InsertedPHIs) {
-        if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
-          if (!L->contains(OtherLoop))
-            PostProcessPHIs.push_back(InsertedPN);
-      }
+    // SSAUpdater might have inserted phi-nodes inside other loops. We'll need
+    // to post-process them to keep LCSSA form.
+    for (PHINode *InsertedPN : InsertedPHIs) {
+      if (auto *OtherLoop = LI.getLoopFor(InsertedPN->getParent()))
+        if (!L->contains(OtherLoop))
+          PostProcessPHIs.push_back(InsertedPN);
     }
 
     // Post process PHI instructions that were inserted into another disjoint
@@ -229,7 +244,7 @@ blockDominatesAnExit(BasicBlock *BB,
                      DominatorTree &DT,
                      const SmallVectorImpl<BasicBlock *> &ExitBlocks) {
   DomTreeNode *DomNode = DT.getNode(BB);
-  return llvm::any_of(ExitBlocks, [&](BasicBlock * EB) {
+  return any_of(ExitBlocks, [&](BasicBlock *EB) {
     return DT.dominates(DomNode, DT.getNode(EB));
   });
 }
@@ -315,6 +330,19 @@ struct LCSSAWrapperPass : public FunctionPass {
   ScalarEvolution *SE;
 
   bool runOnFunction(Function &F) override;
+  void verifyAnalysis() const override {
+    // This check is very expensive. On the loop intensive compiles it may cause
+    // up to 10x slowdown. Currently it's disabled by default. LPPassManager
+    // always does limited form of the LCSSA verification. Similar reasoning
+    // was used for the LoopInfo verifier.
+    if (VerifyLoopLCSSA) {
+      assert(all_of(*LI,
+                    [&](Loop *L) {
+                      return L->isRecursivelyLCSSAForm(*DT, *LI);
+                    }) &&
+             "LCSSA form is broken!");
+    }
+  };
 
   /// This transformation requires natural loop information & requires that
   /// loop preheaders be inserted into the CFG.  It maintains both of these,
@@ -330,6 +358,10 @@ struct LCSSAWrapperPass : public FunctionPass {
     AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addPreserved<ScalarEvolutionWrapperPass>();
     AU.addPreserved<SCEVAAWrapperPass>();
+
+    // This is needed to perform LCSSA verification inside LPPassManager
+    AU.addRequired<LCSSAVerificationPass>();
+    AU.addPreserved<LCSSAVerificationPass>();
   }
 };
 }
@@ -339,6 +371,7 @@ INITIALIZE_PASS_BEGIN(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LCSSAVerificationPass)
 INITIALIZE_PASS_END(LCSSAWrapperPass, "lcssa", "Loop-Closed SSA Form Pass",
                     false, false)
 
@@ -355,7 +388,7 @@ bool LCSSAWrapperPass::runOnFunction(Function &F) {
   return formLCSSAOnAllLoops(LI, *DT, SE);
 }
 
-PreservedAnalyses LCSSAPass::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses LCSSAPass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &LI = AM.getResult<LoopAnalysis>(F);
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
diff --git a/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
new file mode 100644
index 000000000000..d97cd7582eaa
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LibCallsShrinkWrap.cpp
@@ -0,0 +1,571 @@
+//===-- LibCallsShrinkWrap.cpp ----------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass shrink-wraps a call to function if the result is not used.
+// The call can set errno but is otherwise side effect free. For example:
+//    sqrt(val);
+//  is transformed to
+//    if (val < 0)
+//      sqrt(val);
+//  Even if the result of library call is not being used, the compiler cannot
+//  safely delete the call because the function can set errno on error
+//  conditions.
+//  Note in many functions, the error condition solely depends on the incoming
+//  parameter. In this optimization, we can generate the condition can lead to
+//  the errno to shrink-wrap the call. Since the chances of hitting the error
+//  condition is low, the runtime call is effectively eliminated.
+//
+//  These partially dead calls are usually results of C++ abstraction penalty
+//  exposed by inlining.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstVisitor.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "libcalls-shrinkwrap"
+
+STATISTIC(NumWrappedOneCond, "Number of One-Condition Wrappers Inserted");
+STATISTIC(NumWrappedTwoCond, "Number of Two-Condition Wrappers Inserted");
+
+static cl::opt<bool> LibCallsShrinkWrapDoDomainError(
+    "libcalls-shrinkwrap-domain-error", cl::init(true), cl::Hidden,
+    cl::desc("Perform shrink-wrap on lib calls with domain errors"));
+static cl::opt<bool> LibCallsShrinkWrapDoRangeError(
+    "libcalls-shrinkwrap-range-error", cl::init(true), cl::Hidden,
+    cl::desc("Perform shrink-wrap on lib calls with range errors"));
+static cl::opt<bool> LibCallsShrinkWrapDoPoleError(
+    "libcalls-shrinkwrap-pole-error", cl::init(true), cl::Hidden,
+    cl::desc("Perform shrink-wrap on lib calls with pole errors"));
+
+namespace {
+class LibCallsShrinkWrapLegacyPass : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  explicit LibCallsShrinkWrapLegacyPass() : FunctionPass(ID) {
+    initializeLibCallsShrinkWrapLegacyPassPass(
+        *PassRegistry::getPassRegistry());
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  bool runOnFunction(Function &F) override;
+};
+}
+
+char LibCallsShrinkWrapLegacyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
+                      "Conditionally eliminate dead library calls", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(LibCallsShrinkWrapLegacyPass, "libcalls-shrinkwrap",
+                    "Conditionally eliminate dead library calls", false, false)
+
+namespace {
+class LibCallsShrinkWrap : public InstVisitor<LibCallsShrinkWrap> {
+public:
+  LibCallsShrinkWrap(const TargetLibraryInfo &TLI) : TLI(TLI), Changed(false){};
+  bool isChanged() const { return Changed; }
+  void visitCallInst(CallInst &CI) { checkCandidate(CI); }
+  void perform() {
+    for (auto &CI : WorkList) {
+      DEBUG(dbgs() << "CDCE calls: " << CI->getCalledFunction()->getName()
+                   << "\n");
+      if (perform(CI)) {
+        Changed = true;
+        DEBUG(dbgs() << "Transformed\n");
+      }
+    }
+  }
+
+private:
+  bool perform(CallInst *CI);
+  void checkCandidate(CallInst &CI);
+  void shrinkWrapCI(CallInst *CI, Value *Cond);
+  bool performCallDomainErrorOnly(CallInst *CI, const LibFunc::Func &Func);
+  bool performCallErrors(CallInst *CI, const LibFunc::Func &Func);
+  bool performCallRangeErrorOnly(CallInst *CI, const LibFunc::Func &Func);
+  Value *generateOneRangeCond(CallInst *CI, const LibFunc::Func &Func);
+  Value *generateTwoRangeCond(CallInst *CI, const LibFunc::Func &Func);
+  Value *generateCondForPow(CallInst *CI, const LibFunc::Func &Func);
+
+  // Create an OR of two conditions.
+  Value *createOrCond(CallInst *CI, CmpInst::Predicate Cmp, float Val,
+                      CmpInst::Predicate Cmp2, float Val2) {
+    IRBuilder<> BBBuilder(CI);
+    Value *Arg = CI->getArgOperand(0);
+    auto Cond2 = createCond(BBBuilder, Arg, Cmp2, Val2);
+    auto Cond1 = createCond(BBBuilder, Arg, Cmp, Val);
+    return BBBuilder.CreateOr(Cond1, Cond2);
+  }
+
+  // Create a single condition using IRBuilder.
+  Value *createCond(IRBuilder<> &BBBuilder, Value *Arg, CmpInst::Predicate Cmp,
+                    float Val) {
+    Constant *V = ConstantFP::get(BBBuilder.getContext(), APFloat(Val));
+    if (!Arg->getType()->isFloatTy())
+      V = ConstantExpr::getFPExtend(V, Arg->getType());
+    return BBBuilder.CreateFCmp(Cmp, Arg, V);
+  }
+
+  // Create a single condition.
+  Value *createCond(CallInst *CI, CmpInst::Predicate Cmp, float Val) {
+    IRBuilder<> BBBuilder(CI);
+    Value *Arg = CI->getArgOperand(0);
+    return createCond(BBBuilder, Arg, Cmp, Val);
+  }
+
+  const TargetLibraryInfo &TLI;
+  SmallVector<CallInst *, 16> WorkList;
+  bool Changed;
+};
+} // end anonymous namespace
+
+// Perform the transformation to calls with errno set by domain error.
+bool LibCallsShrinkWrap::performCallDomainErrorOnly(CallInst *CI,
+                                                    const LibFunc::Func &Func) {
+  Value *Cond = nullptr;
+
+  switch (Func) {
+  case LibFunc::acos:  // DomainError: (x < -1 || x > 1)
+  case LibFunc::acosf: // Same as acos
+  case LibFunc::acosl: // Same as acos
+  case LibFunc::asin:  // DomainError: (x < -1 || x > 1)
+  case LibFunc::asinf: // Same as asin
+  case LibFunc::asinl: // Same as asin
+  {
+    ++NumWrappedTwoCond;
+    Cond = createOrCond(CI, CmpInst::FCMP_OLT, -1.0f, CmpInst::FCMP_OGT, 1.0f);
+    break;
+  }
+  case LibFunc::cos:  // DomainError: (x == +inf || x == -inf)
+  case LibFunc::cosf: // Same as cos
+  case LibFunc::cosl: // Same as cos
+  case LibFunc::sin:  // DomainError: (x == +inf || x == -inf)
+  case LibFunc::sinf: // Same as sin
+  case LibFunc::sinl: // Same as sin
+  {
+    ++NumWrappedTwoCond;
+    Cond = createOrCond(CI, CmpInst::FCMP_OEQ, INFINITY, CmpInst::FCMP_OEQ,
+                        -INFINITY);
+    break;
+  }
+  case LibFunc::acosh:  // DomainError: (x < 1)
+  case LibFunc::acoshf: // Same as acosh
+  case LibFunc::acoshl: // Same as acosh
+  {
+    ++NumWrappedOneCond;
+    Cond = createCond(CI, CmpInst::FCMP_OLT, 1.0f);
+    break;
+  }
+  case LibFunc::sqrt:  // DomainError: (x < 0)
+  case LibFunc::sqrtf: // Same as sqrt
+  case LibFunc::sqrtl: // Same as sqrt
+  {
+    ++NumWrappedOneCond;
+    Cond = createCond(CI, CmpInst::FCMP_OLT, 0.0f);
+    break;
+  }
+  default:
+    return false;
+  }
+  shrinkWrapCI(CI, Cond);
+  return true;
+}
+
+// Perform the transformation to calls with errno set by range error.
+bool LibCallsShrinkWrap::performCallRangeErrorOnly(CallInst *CI,
+                                                   const LibFunc::Func &Func) {
+  Value *Cond = nullptr;
+
+  switch (Func) {
+  case LibFunc::cosh:
+  case LibFunc::coshf:
+  case LibFunc::coshl:
+  case LibFunc::exp:
+  case LibFunc::expf:
+  case LibFunc::expl:
+  case LibFunc::exp10:
+  case LibFunc::exp10f:
+  case LibFunc::exp10l:
+  case LibFunc::exp2:
+  case LibFunc::exp2f:
+  case LibFunc::exp2l:
+  case LibFunc::sinh:
+  case LibFunc::sinhf:
+  case LibFunc::sinhl: {
+    Cond = generateTwoRangeCond(CI, Func);
+    break;
+  }
+  case LibFunc::expm1:  // RangeError: (709, inf)
+  case LibFunc::expm1f: // RangeError: (88, inf)
+  case LibFunc::expm1l: // RangeError: (11356, inf)
+  {
+    Cond = generateOneRangeCond(CI, Func);
+    break;
+  }
+  default:
+    return false;
+  }
+  shrinkWrapCI(CI, Cond);
+  return true;
+}
+
+// Perform the transformation to calls with errno set by combination of errors.
+bool LibCallsShrinkWrap::performCallErrors(CallInst *CI,
+                                           const LibFunc::Func &Func) {
+  Value *Cond = nullptr;
+
+  switch (Func) {
+  case LibFunc::atanh:  // DomainError: (x < -1 || x > 1)
+                        // PoleError:   (x == -1 || x == 1)
+                        // Overall Cond: (x <= -1 || x >= 1)
+  case LibFunc::atanhf: // Same as atanh
+  case LibFunc::atanhl: // Same as atanh
+  {
+    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
+      return false;
+    ++NumWrappedTwoCond;
+    Cond = createOrCond(CI, CmpInst::FCMP_OLE, -1.0f, CmpInst::FCMP_OGE, 1.0f);
+    break;
+  }
+  case LibFunc::log:    // DomainError: (x < 0)
+                        // PoleError:   (x == 0)
+                        // Overall Cond: (x <= 0)
+  case LibFunc::logf:   // Same as log
+  case LibFunc::logl:   // Same as log
+  case LibFunc::log10:  // Same as log
+  case LibFunc::log10f: // Same as log
+  case LibFunc::log10l: // Same as log
+  case LibFunc::log2:   // Same as log
+  case LibFunc::log2f:  // Same as log
+  case LibFunc::log2l:  // Same as log
+  case LibFunc::logb:   // Same as log
+  case LibFunc::logbf:  // Same as log
+  case LibFunc::logbl:  // Same as log
+  {
+    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
+      return false;
+    ++NumWrappedOneCond;
+    Cond = createCond(CI, CmpInst::FCMP_OLE, 0.0f);
+    break;
+  }
+  case LibFunc::log1p:  // DomainError: (x < -1)
+                        // PoleError:   (x == -1)
+                        // Overall Cond: (x <= -1)
+  case LibFunc::log1pf: // Same as log1p
+  case LibFunc::log1pl: // Same as log1p
+  {
+    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError)
+      return false;
+    ++NumWrappedOneCond;
+    Cond = createCond(CI, CmpInst::FCMP_OLE, -1.0f);
+    break;
+  }
+  case LibFunc::pow: // DomainError: x < 0 and y is noninteger
+                     // PoleError:   x == 0 and y < 0
+                     // RangeError:  overflow or underflow
+  case LibFunc::powf:
+  case LibFunc::powl: {
+    if (!LibCallsShrinkWrapDoDomainError || !LibCallsShrinkWrapDoPoleError ||
+        !LibCallsShrinkWrapDoRangeError)
+      return false;
+    Cond = generateCondForPow(CI, Func);
+    if (Cond == nullptr)
+      return false;
+    break;
+  }
+  default:
+    return false;
+  }
+  assert(Cond && "performCallErrors should not see an empty condition");
+  shrinkWrapCI(CI, Cond);
+  return true;
+}
+
+// Checks if CI is a candidate for shrinkwrapping and put it into work list if
+// true.
+void LibCallsShrinkWrap::checkCandidate(CallInst &CI) {
+  if (CI.isNoBuiltin())
+    return;
+  // A possible improvement is to handle the calls with the return value being
+  // used. If there is API for fast libcall implementation without setting
+  // errno, we can use the same framework to direct/wrap the call to the fast
+  // API in the error free path, and leave the original call in the slow path.
+  if (!CI.use_empty())
+    return;
+
+  LibFunc::Func Func;
+  Function *Callee = CI.getCalledFunction();
+  if (!Callee)
+    return;
+  if (!TLI.getLibFunc(*Callee, Func) || !TLI.has(Func))
+    return;
+
+  if (CI.getNumArgOperands() == 0)
+    return;
+  // TODO: Handle long double in other formats.
+  Type *ArgType = CI.getArgOperand(0)->getType();
+  if (!(ArgType->isFloatTy() || ArgType->isDoubleTy() ||
+        ArgType->isX86_FP80Ty()))
+    return;
+
+  WorkList.push_back(&CI);
+}
+
+// Generate the upper bound condition for RangeError.
+Value *LibCallsShrinkWrap::generateOneRangeCond(CallInst *CI,
+                                                const LibFunc::Func &Func) {
+  float UpperBound;
+  switch (Func) {
+  case LibFunc::expm1: // RangeError: (709, inf)
+    UpperBound = 709.0f;
+    break;
+  case LibFunc::expm1f: // RangeError: (88, inf)
+    UpperBound = 88.0f;
+    break;
+  case LibFunc::expm1l: // RangeError: (11356, inf)
+    UpperBound = 11356.0f;
+    break;
+  default:
+    llvm_unreachable("Should be reach here");
+  }
+
+  ++NumWrappedOneCond;
+  return createCond(CI, CmpInst::FCMP_OGT, UpperBound);
+}
+
+// Generate the lower and upper bound condition for RangeError.
+Value *LibCallsShrinkWrap::generateTwoRangeCond(CallInst *CI,
+                                                const LibFunc::Func &Func) {
+  float UpperBound, LowerBound;
+  switch (Func) {
+  case LibFunc::cosh: // RangeError: (x < -710 || x > 710)
+  case LibFunc::sinh: // Same as cosh
+    LowerBound = -710.0f;
+    UpperBound = 710.0f;
+    break;
+  case LibFunc::coshf: // RangeError: (x < -89 || x > 89)
+  case LibFunc::sinhf: // Same as coshf
+    LowerBound = -89.0f;
+    UpperBound = 89.0f;
+    break;
+  case LibFunc::coshl: // RangeError: (x < -11357 || x > 11357)
+  case LibFunc::sinhl: // Same as coshl
+    LowerBound = -11357.0f;
+    UpperBound = 11357.0f;
+    break;
+  case LibFunc::exp: // RangeError: (x < -745 || x > 709)
+    LowerBound = -745.0f;
+    UpperBound = 709.0f;
+    break;
+  case LibFunc::expf: // RangeError: (x < -103 || x > 88)
+    LowerBound = -103.0f;
+    UpperBound = 88.0f;
+    break;
+  case LibFunc::expl: // RangeError: (x < -11399 || x > 11356)
+    LowerBound = -11399.0f;
+    UpperBound = 11356.0f;
+    break;
+  case LibFunc::exp10: // RangeError: (x < -323 || x > 308)
+    LowerBound = -323.0f;
+    UpperBound = 308.0f;
+    break;
+  case LibFunc::exp10f: // RangeError: (x < -45 || x > 38)
+    LowerBound = -45.0f;
+    UpperBound = 38.0f;
+    break;
+  case LibFunc::exp10l: // RangeError: (x < -4950 || x > 4932)
+    LowerBound = -4950.0f;
+    UpperBound = 4932.0f;
+    break;
+  case LibFunc::exp2: // RangeError: (x < -1074 || x > 1023)
+    LowerBound = -1074.0f;
+    UpperBound = 1023.0f;
+    break;
+  case LibFunc::exp2f: // RangeError: (x < -149 || x > 127)
+    LowerBound = -149.0f;
+    UpperBound = 127.0f;
+    break;
+  case LibFunc::exp2l: // RangeError: (x < -16445 || x > 11383)
+    LowerBound = -16445.0f;
+    UpperBound = 11383.0f;
+    break;
+  default:
+    llvm_unreachable("Should be reach here");
+  }
+
+  ++NumWrappedTwoCond;
+  return createOrCond(CI, CmpInst::FCMP_OGT, UpperBound, CmpInst::FCMP_OLT,
+                      LowerBound);
+}
+
+// For pow(x,y), We only handle the following cases:
+// (1) x is a constant && (x >= 1) && (x < MaxUInt8)
+//     Cond is: (y > 127)
+// (2) x is a value coming from an integer type.
+//   (2.1) if x's bit_size == 8
+//         Cond: (x <= 0 || y > 128)
+//   (2.2) if x's bit_size is 16
+//         Cond: (x <= 0 || y > 64)
+//   (2.3) if x's bit_size is 32
+//         Cond: (x <= 0 || y > 32)
+// Support for powl(x,y) and powf(x,y) are TBD.
+//
+// Note that condition can be more conservative than the actual condition
+// (i.e. we might invoke the calls that will not set the errno.).
+//
+Value *LibCallsShrinkWrap::generateCondForPow(CallInst *CI,
+                                              const LibFunc::Func &Func) {
+  // FIXME: LibFunc::powf and powl TBD.
+  if (Func != LibFunc::pow) {
+    DEBUG(dbgs() << "Not handled powf() and powl()\n");
+    return nullptr;
+  }
+
+  Value *Base = CI->getArgOperand(0);
+  Value *Exp = CI->getArgOperand(1);
+  IRBuilder<> BBBuilder(CI);
+
+  // Constant Base case.
+  if (ConstantFP *CF = dyn_cast<ConstantFP>(Base)) {
+    double D = CF->getValueAPF().convertToDouble();
+    if (D < 1.0f || D > APInt::getMaxValue(8).getZExtValue()) {
+      DEBUG(dbgs() << "Not handled pow(): constant base out of range\n");
+      return nullptr;
+    }
+
+    ++NumWrappedOneCond;
+    Constant *V = ConstantFP::get(CI->getContext(), APFloat(127.0f));
+    if (!Exp->getType()->isFloatTy())
+      V = ConstantExpr::getFPExtend(V, Exp->getType());
+    return BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+  }
+
+  // If the Base value coming from an integer type.
+  Instruction *I = dyn_cast<Instruction>(Base);
+  if (!I) {
+    DEBUG(dbgs() << "Not handled pow(): FP type base\n");
+    return nullptr;
+  }
+  unsigned Opcode = I->getOpcode();
+  if (Opcode == Instruction::UIToFP || Opcode == Instruction::SIToFP) {
+    unsigned BW = I->getOperand(0)->getType()->getPrimitiveSizeInBits();
+    float UpperV = 0.0f;
+    if (BW == 8)
+      UpperV = 128.0f;
+    else if (BW == 16)
+      UpperV = 64.0f;
+    else if (BW == 32)
+      UpperV = 32.0f;
+    else {
+      DEBUG(dbgs() << "Not handled pow(): type too wide\n");
+      return nullptr;
+    }
+
+    ++NumWrappedTwoCond;
+    Constant *V = ConstantFP::get(CI->getContext(), APFloat(UpperV));
+    Constant *V0 = ConstantFP::get(CI->getContext(), APFloat(0.0f));
+    if (!Exp->getType()->isFloatTy())
+      V = ConstantExpr::getFPExtend(V, Exp->getType());
+    if (!Base->getType()->isFloatTy())
+      V0 = ConstantExpr::getFPExtend(V0, Exp->getType());
+
+    Value *Cond = BBBuilder.CreateFCmp(CmpInst::FCMP_OGT, Exp, V);
+    Value *Cond0 = BBBuilder.CreateFCmp(CmpInst::FCMP_OLE, Base, V0);
+    return BBBuilder.CreateOr(Cond0, Cond);
+  }
+  DEBUG(dbgs() << "Not handled pow(): base not from integer convert\n");
+  return nullptr;
+}
+
+// Wrap conditions that can potentially generate errno to the library call.
+void LibCallsShrinkWrap::shrinkWrapCI(CallInst *CI, Value *Cond) {
+  assert(Cond != nullptr && "hrinkWrapCI is not expecting an empty call inst");
+  MDNode *BranchWeights =
+      MDBuilder(CI->getContext()).createBranchWeights(1, 2000);
+  TerminatorInst *NewInst =
+      SplitBlockAndInsertIfThen(Cond, CI, false, BranchWeights);
+  BasicBlock *CallBB = NewInst->getParent();
+  CallBB->setName("cdce.call");
+  CallBB->getSingleSuccessor()->setName("cdce.end");
+  CI->removeFromParent();
+  CallBB->getInstList().insert(CallBB->getFirstInsertionPt(), CI);
+  DEBUG(dbgs() << "== Basic Block After ==");
+  DEBUG(dbgs() << *CallBB->getSinglePredecessor() << *CallBB
+               << *CallBB->getSingleSuccessor() << "\n");
+}
+
+// Perform the transformation to a single candidate.
+bool LibCallsShrinkWrap::perform(CallInst *CI) {
+  LibFunc::Func Func;
+  Function *Callee = CI->getCalledFunction();
+  assert(Callee && "perform() should apply to a non-empty callee");
+  TLI.getLibFunc(*Callee, Func);
+  assert(Func && "perform() is not expecting an empty function");
+
+  if (LibCallsShrinkWrapDoDomainError && performCallDomainErrorOnly(CI, Func))
+    return true;
+
+  if (LibCallsShrinkWrapDoRangeError && performCallRangeErrorOnly(CI, Func))
+    return true;
+
+  return performCallErrors(CI, Func);
+}
+
+void LibCallsShrinkWrapLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.addPreserved<GlobalsAAWrapperPass>();
+  AU.addRequired<TargetLibraryInfoWrapperPass>();
+}
+
+static bool runImpl(Function &F, const TargetLibraryInfo &TLI) {
+  if (F.hasFnAttribute(Attribute::OptimizeForSize))
+    return false;
+  LibCallsShrinkWrap CCDCE(TLI);
+  CCDCE.visit(F);
+  CCDCE.perform();
+  return CCDCE.isChanged();
+}
+
+bool LibCallsShrinkWrapLegacyPass::runOnFunction(Function &F) {
+  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+  return runImpl(F, TLI);
+}
+
+namespace llvm {
+char &LibCallsShrinkWrapPassID = LibCallsShrinkWrapLegacyPass::ID;
+
+// Public interface to LibCallsShrinkWrap pass.
+FunctionPass *createLibCallsShrinkWrapPass() {
+  return new LibCallsShrinkWrapLegacyPass();
+}
+
+PreservedAnalyses LibCallsShrinkWrapPass::run(Function &F,
+                                              FunctionAnalysisManager &FAM) {
+  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+  bool Changed = runImpl(F, TLI);
+  if (!Changed)
+    return PreservedAnalyses::all();
+  auto PA = PreservedAnalyses();
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/Local.cpp b/contrib/llvm/lib/Transforms/Utils/Local.cpp
index f1838d891466..6e4174aa0cda 100644
--- a/contrib/llvm/lib/Transforms/Utils/Local.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Local.cpp
@@ -340,6 +340,10 @@ bool llvm::isInstructionTriviallyDead(Instruction *I,
     if (Constant *C = dyn_cast<Constant>(CI->getArgOperand(0)))
       return C->isNullValue() || isa<UndefValue>(C);
 
+  if (CallSite CS = CallSite(I))
+    if (isMathLibCallNoop(CS, TLI))
+      return true;
+
   return false;
 }
 
@@ -886,6 +890,17 @@ bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
     }
   }
 
+  // If the unconditional branch we replaced contains llvm.loop metadata, we
+  // add the metadata to the branch instructions in the predecessors.
+  unsigned LoopMDKind = BB->getContext().getMDKindID("llvm.loop");
+  Instruction *TI = BB->getTerminator();
+  if (TI)
+    if (MDNode *LoopMD = TI->getMetadata(LoopMDKind))
+      for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+        BasicBlock *Pred = *PI;
+        Pred->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+      }
+
   // Everything that jumped to BB now goes to Succ.
   BB->replaceAllUsesWith(Succ);
   if (!Succ->hasName()) Succ->takeName(BB);
@@ -1001,10 +1016,6 @@ static unsigned enforceKnownAlignment(Value *V, unsigned Align,
   return Align;
 }
 
-/// getOrEnforceKnownAlignment - If the specified pointer has an alignment that
-/// we can determine, return it, otherwise return 0.  If PrefAlign is specified,
-/// and it is more than the alignment of the ultimate object, see if we can
-/// increase the alignment of the ultimate object, making this check succeed.
 unsigned llvm::getOrEnforceKnownAlignment(Value *V, unsigned PrefAlign,
                                           const DataLayout &DL,
                                           const Instruction *CxtI,
@@ -1057,9 +1068,27 @@ static bool LdStHasDebugValue(DILocalVariable *DIVar, DIExpression *DIExpr,
   return false;
 }
 
+/// See if there is a dbg.value intrinsic for DIVar for the PHI node.
+static bool PhiHasDebugValue(DILocalVariable *DIVar, 
+                             DIExpression *DIExpr,
+                             PHINode *APN) {
+  // Since we can't guarantee that the original dbg.declare instrinsic
+  // is removed by LowerDbgDeclare(), we need to make sure that we are
+  // not inserting the same dbg.value intrinsic over and over.
+  DbgValueList DbgValues;
+  FindAllocaDbgValues(DbgValues, APN);
+  for (auto DVI : DbgValues) {
+    assert (DVI->getValue() == APN);
+    assert (DVI->getOffset() == 0);
+    if ((DVI->getVariable() == DIVar) && (DVI->getExpression() == DIExpr))
+      return true;
+  }
+  return false;
+}
+
 /// Inserts a llvm.dbg.value intrinsic before a store to an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
-bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            StoreInst *SI, DIBuilder &Builder) {
   auto *DIVar = DDI->getVariable();
   auto *DIExpr = DDI->getExpression();
@@ -1073,26 +1102,27 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
     ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
   if (ExtendedArg) {
-    // We're now only describing a subset of the variable. The piece we're
+    // We're now only describing a subset of the variable. The fragment we're
     // describing will always be smaller than the variable size, because
     // VariableSize == Size of Alloca described by DDI. Since SI stores
     // to the alloca described by DDI, if it's first operand is an extend,
     // we're guaranteed that before extension, the value was narrower than
     // the size of the alloca, hence the size of the described variable.
     SmallVector<uint64_t, 3> Ops;
-    unsigned PieceOffset = 0;
-    // If this already is a bit piece, we drop the bit piece from the expression
-    // and record the offset.
-    if (DIExpr->isBitPiece()) {
+    unsigned FragmentOffset = 0;
+    // If this already is a bit fragment, we drop the bit fragment from the
+    // expression and record the offset.
+    auto Fragment = DIExpr->getFragmentInfo();
+    if (Fragment) {
       Ops.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
-      PieceOffset = DIExpr->getBitPieceOffset();
+      FragmentOffset = Fragment->OffsetInBits;
     } else {
       Ops.append(DIExpr->elements_begin(), DIExpr->elements_end());
     }
-    Ops.push_back(dwarf::DW_OP_bit_piece);
-    Ops.push_back(PieceOffset); // Offset
+    Ops.push_back(dwarf::DW_OP_LLVM_fragment);
+    Ops.push_back(FragmentOffset);
     const DataLayout &DL = DDI->getModule()->getDataLayout();
-    Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
+    Ops.push_back(DL.getTypeSizeInBits(ExtendedArg->getType()));
     auto NewDIExpr = Builder.createExpression(Ops);
     if (!LdStHasDebugValue(DIVar, NewDIExpr, SI))
       Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, NewDIExpr,
@@ -1100,19 +1130,18 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   } else if (!LdStHasDebugValue(DIVar, DIExpr, SI))
     Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
                                     DDI->getDebugLoc(), SI);
-  return true;
 }
 
 /// Inserts a llvm.dbg.value intrinsic before a load of an alloca'd value
 /// that has an associated llvm.dbg.decl intrinsic.
-bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
                                            LoadInst *LI, DIBuilder &Builder) {
   auto *DIVar = DDI->getVariable();
   auto *DIExpr = DDI->getExpression();
   assert(DIVar && "Missing variable");
 
   if (LdStHasDebugValue(DIVar, DIExpr, LI))
-    return true;
+    return;
 
   // We are now tracking the loaded value instead of the address. In the
   // future if multi-location support is added to the IR, it might be
@@ -1121,7 +1150,28 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
   Instruction *DbgValue = Builder.insertDbgValueIntrinsic(
       LI, 0, DIVar, DIExpr, DDI->getDebugLoc(), (Instruction *)nullptr);
   DbgValue->insertAfter(LI);
-  return true;
+}
+
+/// Inserts a llvm.dbg.value intrinsic after a phi 
+/// that has an associated llvm.dbg.decl intrinsic.
+void llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
+                                           PHINode *APN, DIBuilder &Builder) {
+  auto *DIVar = DDI->getVariable();
+  auto *DIExpr = DDI->getExpression();
+  assert(DIVar && "Missing variable");
+
+  if (PhiHasDebugValue(DIVar, DIExpr, APN))
+    return;
+
+  BasicBlock *BB = APN->getParent();
+  auto InsertionPt = BB->getFirstInsertionPt();
+
+  // The block may be a catchswitch block, which does not have a valid
+  // insertion point.
+  // FIXME: Insert dbg.value markers in the successors when appropriate.
+  if (InsertionPt != BB->end())
+    Builder.insertDbgValueIntrinsic(APN, 0, DIVar, DIExpr, DDI->getDebugLoc(),
+                                    &*InsertionPt);
 }
 
 /// Determine whether this alloca is either a VLA or an array.
@@ -1191,6 +1241,16 @@ DbgDeclareInst *llvm::FindAllocaDbgDeclare(Value *V) {
   return nullptr;
 }
 
+/// FindAllocaDbgValues - Finds the llvm.dbg.value intrinsics describing the
+/// alloca 'V', if any.
+void llvm::FindAllocaDbgValues(DbgValueList &DbgValues, Value *V) {
+  if (auto *L = LocalAsMetadata::getIfExists(V))
+    if (auto *MDV = MetadataAsValue::getIfExists(V->getContext(), L))
+      for (User *U : MDV->users())
+        if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(U))
+          DbgValues.push_back(DVI);
+}
+
 static void DIExprAddDeref(SmallVectorImpl<uint64_t> &Expr) {
   Expr.push_back(dwarf::DW_OP_deref);
 }
@@ -1310,12 +1370,13 @@ unsigned llvm::removeAllNonTerminatorAndEHPadInstructions(BasicBlock *BB) {
   return NumDeadInst;
 }
 
-unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap) {
+unsigned llvm::changeToUnreachable(Instruction *I, bool UseLLVMTrap,
+                                   bool PreserveLCSSA) {
   BasicBlock *BB = I->getParent();
   // Loop over all of the successors, removing BB's entry from any PHI
   // nodes.
   for (BasicBlock *Successor : successors(BB))
-    Successor->removePredecessor(BB);
+    Successor->removePredecessor(BB, PreserveLCSSA);
 
   // Insert a call to llvm.trap right before this.  This turns the undefined
   // behavior into a hard fail instead of falling through into random code.
@@ -1360,6 +1421,43 @@ static void changeToCall(InvokeInst *II) {
   II->eraseFromParent();
 }
 
+BasicBlock *llvm::changeToInvokeAndSplitBasicBlock(CallInst *CI,
+                                                   BasicBlock *UnwindEdge) {
+  BasicBlock *BB = CI->getParent();
+
+  // Convert this function call into an invoke instruction.  First, split the
+  // basic block.
+  BasicBlock *Split =
+      BB->splitBasicBlock(CI->getIterator(), CI->getName() + ".noexc");
+
+  // Delete the unconditional branch inserted by splitBasicBlock
+  BB->getInstList().pop_back();
+
+  // Create the new invoke instruction.
+  SmallVector<Value *, 8> InvokeArgs(CI->arg_begin(), CI->arg_end());
+  SmallVector<OperandBundleDef, 1> OpBundles;
+
+  CI->getOperandBundlesAsDefs(OpBundles);
+
+  // Note: we're round tripping operand bundles through memory here, and that
+  // can potentially be avoided with a cleverer API design that we do not have
+  // as of this time.
+
+  InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, UnwindEdge,
+                                      InvokeArgs, OpBundles, CI->getName(), BB);
+  II->setDebugLoc(CI->getDebugLoc());
+  II->setCallingConv(CI->getCallingConv());
+  II->setAttributes(CI->getAttributes());
+
+  // Make sure that anything using the call now uses the invoke!  This also
+  // updates the CallGraph if present, because it uses a WeakVH.
+  CI->replaceAllUsesWith(II);
+
+  // Delete the original call
+  Split->getInstList().pop_front();
+  return Split;
+}
+
 static bool markAliveBlocks(Function &F,
                             SmallPtrSetImpl<BasicBlock*> &Reachable) {
 
@@ -1586,10 +1684,10 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
   SmallVector<std::pair<unsigned, MDNode *>, 4> Metadata;
   K->dropUnknownNonDebugMetadata(KnownIDs);
   K->getAllMetadataOtherThanDebugLoc(Metadata);
-  for (unsigned i = 0, n = Metadata.size(); i < n; ++i) {
-    unsigned Kind = Metadata[i].first;
+  for (const auto &MD : Metadata) {
+    unsigned Kind = MD.first;
     MDNode *JMD = J->getMetadata(Kind);
-    MDNode *KMD = Metadata[i].second;
+    MDNode *KMD = MD.second;
 
     switch (Kind) {
       default:
@@ -1646,6 +1744,17 @@ void llvm::combineMetadata(Instruction *K, const Instruction *J,
       K->setMetadata(LLVMContext::MD_invariant_group, JMD);
 }
 
+void llvm::combineMetadataForCSE(Instruction *K, const Instruction *J) {
+  unsigned KnownIDs[] = {
+      LLVMContext::MD_tbaa,            LLVMContext::MD_alias_scope,
+      LLVMContext::MD_noalias,         LLVMContext::MD_range,
+      LLVMContext::MD_invariant_load,  LLVMContext::MD_nonnull,
+      LLVMContext::MD_invariant_group, LLVMContext::MD_align,
+      LLVMContext::MD_dereferenceable,
+      LLVMContext::MD_dereferenceable_or_null};
+  combineMetadata(K, J, KnownIDs);
+}
+
 unsigned llvm::replaceDominatedUsesWith(Value *From, Value *To,
                                         DominatorTree &DT,
                                         const BasicBlockEdge &Root) {
@@ -1703,6 +1812,7 @@ bool llvm::callsGCLeafFunction(ImmutableCallSite CS) {
   return false;
 }
 
+namespace {
 /// A potential constituent of a bitreverse or bswap expression. See
 /// collectBitParts for a fuller explanation.
 struct BitPart {
@@ -1718,6 +1828,7 @@ struct BitPart {
 
   enum { Unset = -1 };
 };
+} // end anonymous namespace
 
 /// Analyze the specified subexpression and see if it is capable of providing
 /// pieces of a bswap or bitreverse. The subexpression provides a potential
@@ -1954,23 +2065,12 @@ bool llvm::recognizeBSwapOrBitReverseIdiom(
 // in ASan/MSan/TSan/DFSan, and thus make us miss some memory accesses,
 // we mark affected calls as NoBuiltin, which will disable optimization
 // in CodeGen.
-void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(CallInst *CI,
-                                          const TargetLibraryInfo *TLI) {
+void llvm::maybeMarkSanitizerLibraryCallNoBuiltin(
+    CallInst *CI, const TargetLibraryInfo *TLI) {
   Function *F = CI->getCalledFunction();
   LibFunc::Func Func;
-  if (!F || F->hasLocalLinkage() || !F->hasName() ||
-      !TLI->getLibFunc(F->getName(), Func))
-    return;
-  switch (Func) {
-    default: break;
-    case LibFunc::memcmp:
-    case LibFunc::memchr:
-    case LibFunc::strcpy:
-    case LibFunc::stpcpy:
-    case LibFunc::strcmp:
-    case LibFunc::strlen:
-    case LibFunc::strnlen:
-      CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin);
-      break;
-  }
+  if (F && !F->hasLocalLinkage() && F->hasName() &&
+      TLI->getLibFunc(F->getName(), Func) && TLI->hasOptimizedCodeGen(Func) &&
+      !F->doesNotAccessMemory())
+    CI->addAttribute(AttributeSet::FunctionIndex, Attribute::NoBuiltin);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
index 2846e8f235b7..00cda2af00c6 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp
@@ -361,25 +361,12 @@ static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
     // Fix LCSSA form for L. Some values, which previously were only used inside
     // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
     // in corresponding exit blocks.
+    // We don't need to form LCSSA recursively, because there cannot be uses
+    // inside a newly created loop of defs from inner loops as those would
+    // already be a use of an LCSSA phi node.
+    formLCSSA(*L, *DT, LI, SE);
 
-    // Go through all instructions in OuterLoopBlocks and check if they are
-    // using operands from the inner loop. In this case we'll need to fix LCSSA
-    // for these instructions.
-    SmallSetVector<Instruction *, 8> WorklistSet;
-    for (BasicBlock *OuterBB: OuterLoopBlocks) {
-      for (Instruction &I : *OuterBB) {
-        for (Value *Op : I.operands()) {
-          Instruction *OpI = dyn_cast<Instruction>(Op);
-          if (!OpI || !L->contains(OpI))
-            continue;
-          WorklistSet.insert(OpI);
-        }
-      }
-    }
-    SmallVector<Instruction *, 8> Worklist(WorklistSet.begin(),
-                                           WorklistSet.end());
-    formLCSSAForInstructions(Worklist, *DT, *LI);
-    assert(NewOuter->isRecursivelyLCSSAForm(*DT) &&
+    assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
            "LCSSA is broken after separating nested loops!");
   }
 
@@ -483,13 +470,21 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
   }
 
   // Now that all of the PHI nodes have been inserted and adjusted, modify the
-  // backedge blocks to just to the BEBlock instead of the header.
+  // backedge blocks to jump to the BEBlock instead of the header.
+  // If one of the backedges has llvm.loop metadata attached, we remove
+  // it from the backedge and add it to BEBlock.
+  unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
+  MDNode *LoopMD = nullptr;
   for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
     TerminatorInst *TI = BackedgeBlocks[i]->getTerminator();
+    if (!LoopMD)
+      LoopMD = TI->getMetadata(LoopMDKind);
+    TI->setMetadata(LoopMDKind, nullptr);
     for (unsigned Op = 0, e = TI->getNumSuccessors(); Op != e; ++Op)
       if (TI->getSuccessor(Op) == Header)
         TI->setSuccessor(Op, BEBlock);
   }
+  BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
 
   //===--- Update all analyses which we must preserve now -----------------===//
 
@@ -535,7 +530,7 @@ ReprocessLoop:
 
       // Zap the dead pred's terminator and replace it with unreachable.
       TerminatorInst *TI = P->getTerminator();
-      changeToUnreachable(TI, /*UseLLVMTrap=*/false);
+      changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA);
       Changed = true;
     }
   }
@@ -635,8 +630,10 @@ ReprocessLoop:
        (PN = dyn_cast<PHINode>(I++)); )
     if (Value *V = SimplifyInstruction(PN, DL, nullptr, DT, AC)) {
       if (SE) SE->forgetValue(PN);
-      PN->replaceAllUsesWith(V);
-      PN->eraseFromParent();
+      if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+      }
     }
 
   // If this loop has multiple exits and the exits all go to the same
@@ -821,8 +818,8 @@ bool LoopSimplify::runOnFunction(Function &F) {
   if (PreserveLCSSA) {
     assert(DT && "DT not available.");
     assert(LI && "LI not available.");
-    bool InLCSSA =
-        all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT); });
+    bool InLCSSA = all_of(
+        *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
     assert(InLCSSA && "Requested to preserve LCSSA, but it's already broken.");
   }
 #endif
@@ -833,8 +830,8 @@ bool LoopSimplify::runOnFunction(Function &F) {
 
 #ifndef NDEBUG
   if (PreserveLCSSA) {
-    bool InLCSSA =
-        all_of(*LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT); });
+    bool InLCSSA = all_of(
+        *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
     assert(InLCSSA && "LCSSA is broken after loop-simplify.");
   }
 #endif
@@ -842,7 +839,7 @@ bool LoopSimplify::runOnFunction(Function &F) {
 }
 
 PreservedAnalyses LoopSimplifyPass::run(Function &F,
-                                        AnalysisManager<Function> &AM) {
+                                        FunctionAnalysisManager &AM) {
   bool Changed = false;
   LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
   DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
@@ -854,6 +851,10 @@ PreservedAnalyses LoopSimplifyPass::run(Function &F,
   for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
     Changed |= simplifyLoop(*I, DT, LI, SE, AC, true /* PreserveLCSSA */);
 
+  // FIXME: We need to invalidate this to avoid PR28400. Is there a better
+  // solution?
+  AM.invalidate<ScalarEvolutionAnalysis>(F);
+
   if (!Changed)
     return PreservedAnalyses::all();
   PreservedAnalyses PA;
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 7f1f78fa8b41..e551e4b47ac1 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -23,11 +23,12 @@
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopIterator.h"
 #include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
@@ -46,7 +47,7 @@ STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
 STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
 
 static cl::opt<bool>
-UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(true), cl::Hidden,
+UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
                     cl::desc("Allow runtime unrolled loops to be unrolled "
                              "with epilog instead of prolog."));
 
@@ -177,14 +178,19 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
 /// branch instruction. However, if the trip count (and multiple) are not known,
 /// loop unrolling will mostly produce more code that is no faster.
 ///
-/// TripCount is generally defined as the number of times the loop header
-/// executes. UnrollLoop relaxes the definition to permit early exits: here
-/// TripCount is the iteration on which control exits LatchBlock if no early
-/// exits were taken. Note that UnrollLoop assumes that the loop counter test
-/// terminates LatchBlock in order to remove unnecesssary instances of the
-/// test. In other words, control may exit the loop prior to TripCount
-/// iterations via an early branch, but control may not exit the loop from the
-/// LatchBlock's terminator prior to TripCount iterations.
+/// TripCount is the upper bound of the iteration on which control exits
+/// LatchBlock. Control may exit the loop prior to TripCount iterations either
+/// via an early branch in other loop block or via LatchBlock terminator. This
+/// is relaxed from the general definition of trip count which is the number of
+/// times the loop header executes. Note that UnrollLoop assumes that the loop
+/// counter test is in LatchBlock in order to remove unnecesssary instances of
+/// the test.  If control can exit the loop from the LatchBlock's terminator
+/// prior to TripCount iterations, flag PreserveCondBr needs to be set.
+///
+/// PreserveCondBr indicates whether the conditional branch of the LatchBlock
+/// needs to be preserved.  It is needed when we use trip count upper bound to
+/// fully unroll the loop. If PreserveOnlyFirst is also set then only the first
+/// conditional branch needs to be preserved.
 ///
 /// Similarly, TripMultiple divides the number of times that the LatchBlock may
 /// execute without exiting the loop.
@@ -196,15 +202,21 @@ static bool needToInsertPhisForLCSSA(Loop *L, std::vector<BasicBlock *> Blocks,
 /// runtime-unroll the loop if computing RuntimeTripCount will be expensive and
 /// AllowExpensiveTripCount is false.
 ///
+/// If we want to perform PGO-based loop peeling, PeelCount is set to the 
+/// number of iterations we want to peel off.
+///
 /// The LoopInfo Analysis that is passed will be kept consistent.
 ///
 /// This utility preserves LoopInfo. It will also preserve ScalarEvolution and
 /// DominatorTree if they are non-null.
 bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                       bool AllowRuntime, bool AllowExpensiveTripCount,
-                      unsigned TripMultiple, LoopInfo *LI, ScalarEvolution *SE,
-                      DominatorTree *DT, AssumptionCache *AC,
+                      bool PreserveCondBr, bool PreserveOnlyFirst,
+                      unsigned TripMultiple, unsigned PeelCount, LoopInfo *LI,
+                      ScalarEvolution *SE, DominatorTree *DT,
+                      AssumptionCache *AC, OptimizationRemarkEmitter *ORE,
                       bool PreserveLCSSA) {
+
   BasicBlock *Preheader = L->getLoopPreheader();
   if (!Preheader) {
     DEBUG(dbgs() << "  Can't unroll; loop preheader-insertion failed.\n");
@@ -250,9 +262,8 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   if (TripCount != 0 && Count > TripCount)
     Count = TripCount;
 
-  // Don't enter the unroll code if there is nothing to do. This way we don't
-  // need to support "partial unrolling by 1".
-  if (TripCount == 0 && Count < 2)
+  // Don't enter the unroll code if there is nothing to do.
+  if (TripCount == 0 && Count < 2 && PeelCount == 0)
     return false;
 
   assert(Count > 0);
@@ -272,14 +283,22 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   // now we just recompute LCSSA for the outer loop, but it should be possible
   // to fix it in-place.
   bool NeedToFixLCSSA = PreserveLCSSA && CompletelyUnroll &&
-      std::any_of(ExitBlocks.begin(), ExitBlocks.end(),
-                  [&](BasicBlock *BB) { return isa<PHINode>(BB->begin()); });
+                        any_of(ExitBlocks, [](const BasicBlock *BB) {
+                          return isa<PHINode>(BB->begin());
+                        });
 
   // We assume a run-time trip count if the compiler cannot
   // figure out the loop trip count and the unroll-runtime
   // flag is specified.
   bool RuntimeTripCount = (TripCount == 0 && Count > 0 && AllowRuntime);
 
+  assert((!RuntimeTripCount || !PeelCount) &&
+         "Did not expect runtime trip-count unrolling "
+         "and peeling for the same loop");
+
+  if (PeelCount)
+    peelLoop(L, PeelCount, LI, SE, DT, PreserveLCSSA);
+
   // Loops containing convergent instructions must have a count that divides
   // their TripMultiple.
   DEBUG(
@@ -293,9 +312,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                "Unroll count must divide trip multiple if loop contains a "
                "convergent operation.");
       });
-  // Don't output the runtime loop remainder if Count is a multiple of
-  // TripMultiple.  Such a remainder is never needed, and is unsafe if the loop
-  // contains a convergent instruction.
+
   if (RuntimeTripCount && TripMultiple % Count != 0 &&
       !UnrollRuntimeLoopRemainder(L, Count, AllowExpensiveTripCount,
                                   UnrollRuntimeEpilog, LI, SE, DT, 
@@ -322,35 +339,40 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
       (unsigned)GreatestCommonDivisor64(Count, TripMultiple);
   }
 
+  using namespace ore;
   // Report the unrolling decision.
-  DebugLoc LoopLoc = L->getStartLoc();
-  Function *F = Header->getParent();
-  LLVMContext &Ctx = F->getContext();
-
   if (CompletelyUnroll) {
     DEBUG(dbgs() << "COMPLETELY UNROLLING loop %" << Header->getName()
           << " with trip count " << TripCount << "!\n");
-    emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
-                           Twine("completely unrolled loop with ") +
-                               Twine(TripCount) + " iterations");
+    ORE->emit(OptimizationRemark(DEBUG_TYPE, "FullyUnrolled", L->getStartLoc(),
+                                 L->getHeader())
+              << "completely unrolled loop with "
+              << NV("UnrollCount", TripCount) << " iterations");
+  } else if (PeelCount) {
+    DEBUG(dbgs() << "PEELING loop %" << Header->getName()
+                 << " with iteration count " << PeelCount << "!\n");
+    ORE->emit(OptimizationRemark(DEBUG_TYPE, "Peeled", L->getStartLoc(),
+                                 L->getHeader())
+              << " peeled loop by " << NV("PeelCount", PeelCount)
+              << " iterations");
   } else {
-    auto EmitDiag = [&](const Twine &T) {
-      emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc,
-                             "unrolled loop by a factor of " + Twine(Count) +
-                                 T);
-    };
+    OptimizationRemark Diag(DEBUG_TYPE, "PartialUnrolled", L->getStartLoc(),
+                            L->getHeader());
+    Diag << "unrolled loop by a factor of " << NV("UnrollCount", Count);
 
     DEBUG(dbgs() << "UNROLLING loop %" << Header->getName()
           << " by " << Count);
     if (TripMultiple == 0 || BreakoutTrip != TripMultiple) {
       DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip);
-      EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip));
+      ORE->emit(Diag << " with a breakout at trip "
+                     << NV("BreakoutTrip", BreakoutTrip));
     } else if (TripMultiple != 1) {
       DEBUG(dbgs() << " with " << TripMultiple << " trips per branch");
-      EmitDiag(" with " + Twine(TripMultiple) + " trips per branch");
+      ORE->emit(Diag << " with " << NV("TripMultiple", TripMultiple)
+                     << " trips per branch");
     } else if (RuntimeTripCount) {
       DEBUG(dbgs() << " with run-time trip count");
-      EmitDiag(" with run-time trip count");
+      ORE->emit(Diag << " with run-time trip count");
     }
     DEBUG(dbgs() << "!\n");
   }
@@ -382,6 +404,15 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
   LoopBlocksDFS::RPOIterator BlockEnd = DFS.endRPO();
 
   std::vector<BasicBlock*> UnrolledLoopBlocks = L->getBlocks();
+
+  // Loop Unrolling might create new loops. While we do preserve LoopInfo, we
+  // might break loop-simplified form for these loops (as they, e.g., would
+  // share the same exit blocks). We'll keep track of loops for which we can
+  // break this so that later we can re-simplify them.
+  SmallSetVector<Loop *, 4> LoopsToSimplify;
+  for (Loop *SubLoop : *L)
+    LoopsToSimplify.insert(SubLoop);
+
   for (unsigned It = 1; It != Count; ++It) {
     std::vector<BasicBlock*> NewBlocks;
     SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
@@ -412,6 +443,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
                  "Expected parent loop before sub-loop in RPO");
           NewLoop = new Loop;
           NewLoopParent->addChildLoop(NewLoop);
+          LoopsToSimplify.insert(NewLoop);
 
           // Forget the old loop, since its inputs may have changed.
           if (SE)
@@ -480,9 +512,14 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     }
 
     // Remap all instructions in the most recent iteration
-    for (BasicBlock *NewBlock : NewBlocks)
-      for (Instruction &I : *NewBlock)
+    for (BasicBlock *NewBlock : NewBlocks) {
+      for (Instruction &I : *NewBlock) {
         ::remapInstruction(&I, LastValueMap);
+        if (auto *II = dyn_cast<IntrinsicInst>(&I))
+          if (II->getIntrinsicID() == Intrinsic::assume)
+            AC->registerAssumption(II);
+      }
+    }
   }
 
   // Loop over the PHI nodes in the original block, setting incoming values.
@@ -524,12 +561,16 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     if (CompletelyUnroll) {
       if (j == 0)
         Dest = LoopExit;
-      NeedConditional = false;
-    }
-
-    // If we know the trip count or a multiple of it, we can safely use an
-    // unconditional branch for some iterations.
-    if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      // If using trip count upper bound to completely unroll, we need to keep
+      // the conditional branch except the last one because the loop may exit
+      // after any iteration.
+      assert(NeedConditional &&
+             "NeedCondition cannot be modified by both complete "
+             "unrolling and runtime unrolling");
+      NeedConditional = (PreserveCondBr && j && !(PreserveOnlyFirst && i != 0));
+    } else if (j != BreakoutTrip && (TripMultiple == 0 || j % TripMultiple != 0)) {
+      // If we know the trip count or a multiple of it, we can safely use an
+      // unconditional branch for some iterations.
       NeedConditional = false;
     }
 
@@ -595,10 +636,6 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     }
   }
 
-  // FIXME: We could register any cloned assumptions instead of clearing the
-  // whole function's cache.
-  AC->clear();
-
   // FIXME: We only preserve DT info for complete unrolling now. Incrementally
   // updating domtree after partial loop unrolling should also be easy.
   if (DT && !CompletelyUnroll)
@@ -607,7 +644,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     DEBUG(DT->verifyDomTree());
 
   // Simplify any new induction variables in the partially unrolled loop.
-  if (SE && !CompletelyUnroll) {
+  if (SE && !CompletelyUnroll && Count > 1) {
     SmallVector<WeakVH, 16> DeadInsts;
     simplifyLoopIVs(L, SE, DT, LI, DeadInsts);
 
@@ -636,6 +673,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     }
   }
 
+  // TODO: after peeling or unrolling, previously loop variant conditions are
+  // likely to fold to constants, eagerly propagating those here will require
+  // fewer cleanup passes to be run.  Alternatively, a LoopEarlyCSE might be
+  // appropriate.
+
   NumCompletelyUnrolled += CompletelyUnroll;
   ++NumUnrolled;
 
@@ -663,6 +705,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
     if (!OuterL && !CompletelyUnroll)
       OuterL = L;
     if (OuterL) {
+      // OuterL includes all loops for which we can break loop-simplify, so
+      // it's sufficient to simplify only it (it'll recursively simplify inner
+      // loops too).
+      // TODO: That potentially might be compile-time expensive. We should try
+      // to fix the loop-simplified form incrementally.
       simplifyLoop(OuterL, DT, LI, SE, AC, PreserveLCSSA);
 
       // LCSSA must be performed on the outermost affected loop. The unrolled
@@ -678,6 +725,10 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool Force,
       else
         assert(OuterL->isLCSSAForm(*DT) &&
                "Loops should be in LCSSA form after loop-unroll.");
+    } else {
+      // Simplify loops for which we might've broken loop-simplify form.
+      for (Loop *SubLoop : LoopsToSimplify)
+        simplifyLoop(SubLoop, DT, LI, SE, AC, PreserveLCSSA);
     }
   }
 
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
new file mode 100644
index 000000000000..dc526a20c903
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollPeel.cpp
@@ -0,0 +1,405 @@
+//===-- UnrollLoopPeel.cpp - Loop peeling utilities -----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements some loop unrolling utilities for peeling loops
+// with dynamically inferred (from PGO) trip counts. See LoopUnroll.cpp for
+// unrolling loops with compile-time constant trip counts.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Analysis/LoopPass.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+#include "llvm/Transforms/Utils/UnrollLoop.h"
+#include <algorithm>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-unroll"
+STATISTIC(NumPeeled, "Number of loops peeled");
+
+static cl::opt<unsigned> UnrollPeelMaxCount(
+    "unroll-peel-max-count", cl::init(7), cl::Hidden,
+    cl::desc("Max average trip count which will cause loop peeling."));
+
+static cl::opt<unsigned> UnrollForcePeelCount(
+    "unroll-force-peel-count", cl::init(0), cl::Hidden,
+    cl::desc("Force a peel count regardless of profiling information."));
+
+// Check whether we are capable of peeling this loop.
+static bool canPeel(Loop *L) {
+  // Make sure the loop is in simplified form
+  if (!L->isLoopSimplifyForm())
+    return false;
+
+  // Only peel loops that contain a single exit
+  if (!L->getExitingBlock() || !L->getUniqueExitBlock())
+    return false;
+
+  return true;
+}
+
+// Return the number of iterations we want to peel off.
+void llvm::computePeelCount(Loop *L, unsigned LoopSize,
+                            TargetTransformInfo::UnrollingPreferences &UP) {
+  UP.PeelCount = 0;
+  if (!canPeel(L))
+    return;
+
+  // Only try to peel innermost loops.
+  if (!L->empty())
+    return;
+
+  // If the user provided a peel count, use that.
+  bool UserPeelCount = UnrollForcePeelCount.getNumOccurrences() > 0;
+  if (UserPeelCount) {
+    DEBUG(dbgs() << "Force-peeling first " << UnrollForcePeelCount
+                 << " iterations.\n");
+    UP.PeelCount = UnrollForcePeelCount;
+    return;
+  }
+
+  // If we don't know the trip count, but have reason to believe the average
+  // trip count is low, peeling should be beneficial, since we will usually
+  // hit the peeled section.
+  // We only do this in the presence of profile information, since otherwise
+  // our estimates of the trip count are not reliable enough.
+  if (UP.AllowPeeling && L->getHeader()->getParent()->getEntryCount()) {
+    Optional<unsigned> PeelCount = getLoopEstimatedTripCount(L);
+    if (!PeelCount)
+      return;
+
+    DEBUG(dbgs() << "Profile-based estimated trip count is " << *PeelCount
+                 << "\n");
+
+    if (*PeelCount) {
+      if ((*PeelCount <= UnrollPeelMaxCount) &&
+          (LoopSize * (*PeelCount + 1) <= UP.Threshold)) {
+        DEBUG(dbgs() << "Peeling first " << *PeelCount << " iterations.\n");
+        UP.PeelCount = *PeelCount;
+        return;
+      }
+      DEBUG(dbgs() << "Requested peel count: " << *PeelCount << "\n");
+      DEBUG(dbgs() << "Max peel count: " << UnrollPeelMaxCount << "\n");
+      DEBUG(dbgs() << "Peel cost: " << LoopSize * (*PeelCount + 1) << "\n");
+      DEBUG(dbgs() << "Max peel cost: " << UP.Threshold << "\n");
+    }
+  }
+
+  return;
+}
+
+/// \brief Update the branch weights of the latch of a peeled-off loop
+/// iteration.
+/// This sets the branch weights for the latch of the recently peeled off loop
+/// iteration correctly. 
+/// Our goal is to make sure that:
+/// a) The total weight of all the copies of the loop body is preserved.
+/// b) The total weight of the loop exit is preserved.
+/// c) The body weight is reasonably distributed between the peeled iterations.
+///
+/// \param Header The copy of the header block that belongs to next iteration.
+/// \param LatchBR The copy of the latch branch that belongs to this iteration.
+/// \param IterNumber The serial number of the iteration that was just
+/// peeled off.
+/// \param AvgIters The average number of iterations we expect the loop to have.
+/// \param[in,out] PeeledHeaderWeight The total number of dynamic loop
+/// iterations that are unaccounted for. As an input, it represents the number
+/// of times we expect to enter the header of the iteration currently being
+/// peeled off. The output is the number of times we expect to enter the
+/// header of the next iteration.
+static void updateBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
+                                unsigned IterNumber, unsigned AvgIters,
+                                uint64_t &PeeledHeaderWeight) {
+
+  // FIXME: Pick a more realistic distribution.
+  // Currently the proportion of weight we assign to the fall-through
+  // side of the branch drops linearly with the iteration number, and we use
+  // a 0.9 fudge factor to make the drop-off less sharp...
+  if (PeeledHeaderWeight) {
+    uint64_t FallThruWeight =
+        PeeledHeaderWeight * ((float)(AvgIters - IterNumber) / AvgIters * 0.9);
+    uint64_t ExitWeight = PeeledHeaderWeight - FallThruWeight;
+    PeeledHeaderWeight -= ExitWeight;
+
+    unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+    MDBuilder MDB(LatchBR->getContext());
+    MDNode *WeightNode =
+        HeaderIdx ? MDB.createBranchWeights(ExitWeight, FallThruWeight)
+                  : MDB.createBranchWeights(FallThruWeight, ExitWeight);
+    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+  }
+}
+
+/// \brief Clones the body of the loop L, putting it between \p InsertTop and \p
+/// InsertBot.
+/// \param IterNumber The serial number of the iteration currently being
+/// peeled off.
+/// \param Exit The exit block of the original loop.
+/// \param[out] NewBlocks A list of the the blocks in the newly created clone
+/// \param[out] VMap The value map between the loop and the new clone.
+/// \param LoopBlocks A helper for DFS-traversal of the loop.
+/// \param LVMap A value-map that maps instructions from the original loop to
+/// instructions in the last peeled-off iteration.
+static void cloneLoopBlocks(Loop *L, unsigned IterNumber, BasicBlock *InsertTop,
+                            BasicBlock *InsertBot, BasicBlock *Exit,
+                            SmallVectorImpl<BasicBlock *> &NewBlocks,
+                            LoopBlocksDFS &LoopBlocks, ValueToValueMapTy &VMap,
+                            ValueToValueMapTy &LVMap, LoopInfo *LI) {
+
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *Latch = L->getLoopLatch();
+  BasicBlock *PreHeader = L->getLoopPreheader();
+
+  Function *F = Header->getParent();
+  LoopBlocksDFS::RPOIterator BlockBegin = LoopBlocks.beginRPO();
+  LoopBlocksDFS::RPOIterator BlockEnd = LoopBlocks.endRPO();
+  Loop *ParentLoop = L->getParentLoop();
+
+  // For each block in the original loop, create a new copy,
+  // and update the value map with the newly created values.
+  for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
+    BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, ".peel", F);
+    NewBlocks.push_back(NewBB);
+
+    if (ParentLoop)
+      ParentLoop->addBasicBlockToLoop(NewBB, *LI);
+
+    VMap[*BB] = NewBB;
+  }
+
+  // Hook-up the control flow for the newly inserted blocks.
+  // The new header is hooked up directly to the "top", which is either
+  // the original loop preheader (for the first iteration) or the previous
+  // iteration's exiting block (for every other iteration)
+  InsertTop->getTerminator()->setSuccessor(0, cast<BasicBlock>(VMap[Header]));
+
+  // Similarly, for the latch:
+  // The original exiting edge is still hooked up to the loop exit.
+  // The backedge now goes to the "bottom", which is either the loop's real
+  // header (for the last peeled iteration) or the copied header of the next
+  // iteration (for every other iteration)
+  BranchInst *LatchBR =
+      cast<BranchInst>(cast<BasicBlock>(VMap[Latch])->getTerminator());
+  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+  LatchBR->setSuccessor(HeaderIdx, InsertBot);
+  LatchBR->setSuccessor(1 - HeaderIdx, Exit);
+
+  // The new copy of the loop body starts with a bunch of PHI nodes
+  // that pick an incoming value from either the preheader, or the previous
+  // loop iteration. Since this copy is no longer part of the loop, we
+  // resolve this statically:
+  // For the first iteration, we use the value from the preheader directly.
+  // For any other iteration, we replace the phi with the value generated by
+  // the immediately preceding clone of the loop body (which represents
+  // the previous iteration).
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *NewPHI = cast<PHINode>(VMap[&*I]);
+    if (IterNumber == 0) {
+      VMap[&*I] = NewPHI->getIncomingValueForBlock(PreHeader);
+    } else {
+      Value *LatchVal = NewPHI->getIncomingValueForBlock(Latch);
+      Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+      if (LatchInst && L->contains(LatchInst))
+        VMap[&*I] = LVMap[LatchInst];
+      else
+        VMap[&*I] = LatchVal;
+    }
+    cast<BasicBlock>(VMap[Header])->getInstList().erase(NewPHI);
+  }
+
+  // Fix up the outgoing values - we need to add a value for the iteration
+  // we've just created. Note that this must happen *after* the incoming
+  // values are adjusted, since the value going out of the latch may also be
+  // a value coming into the header.
+  for (BasicBlock::iterator I = Exit->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PHI = cast<PHINode>(I);
+    Value *LatchVal = PHI->getIncomingValueForBlock(Latch);
+    Instruction *LatchInst = dyn_cast<Instruction>(LatchVal);
+    if (LatchInst && L->contains(LatchInst))
+      LatchVal = VMap[LatchVal];
+    PHI->addIncoming(LatchVal, cast<BasicBlock>(VMap[Latch]));
+  }
+
+  // LastValueMap is updated with the values for the current loop
+  // which are used the next time this function is called.
+  for (const auto &KV : VMap)
+    LVMap[KV.first] = KV.second;
+}
+
+/// \brief Peel off the first \p PeelCount iterations of loop \p L.
+///
+/// Note that this does not peel them off as a single straight-line block.
+/// Rather, each iteration is peeled off separately, and needs to check the
+/// exit condition.
+/// For loops that dynamically execute \p PeelCount iterations or less
+/// this provides a benefit, since the peeled off iterations, which account
+/// for the bulk of dynamic execution, can be further simplified by scalar
+/// optimizations.
+bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
+                    ScalarEvolution *SE, DominatorTree *DT,
+                    bool PreserveLCSSA) {
+  if (!canPeel(L))
+    return false;
+
+  LoopBlocksDFS LoopBlocks(L);
+  LoopBlocks.perform(LI);
+
+  BasicBlock *Header = L->getHeader();
+  BasicBlock *PreHeader = L->getLoopPreheader();
+  BasicBlock *Latch = L->getLoopLatch();
+  BasicBlock *Exit = L->getUniqueExitBlock();
+
+  Function *F = Header->getParent();
+
+  // Set up all the necessary basic blocks. It is convenient to split the
+  // preheader into 3 parts - two blocks to anchor the peeled copy of the loop
+  // body, and a new preheader for the "real" loop.
+
+  // Peeling the first iteration transforms.
+  //
+  // PreHeader:
+  // ...
+  // Header:
+  //   LoopBody
+  //   If (cond) goto Header
+  // Exit:
+  //
+  // into
+  //
+  // InsertTop:
+  //   LoopBody
+  //   If (!cond) goto Exit
+  // InsertBot:
+  // NewPreHeader:
+  // ...
+  // Header:
+  //  LoopBody
+  //  If (cond) goto Header
+  // Exit:
+  //
+  // Each following iteration will split the current bottom anchor in two,
+  // and put the new copy of the loop body between these two blocks. That is,
+  // after peeling another iteration from the example above, we'll split 
+  // InsertBot, and get:
+  //
+  // InsertTop:
+  //   LoopBody
+  //   If (!cond) goto Exit
+  // InsertBot:
+  //   LoopBody
+  //   If (!cond) goto Exit
+  // InsertBot.next:
+  // NewPreHeader:
+  // ...
+  // Header:
+  //  LoopBody
+  //  If (cond) goto Header
+  // Exit:
+
+  BasicBlock *InsertTop = SplitEdge(PreHeader, Header, DT, LI);
+  BasicBlock *InsertBot =
+      SplitBlock(InsertTop, InsertTop->getTerminator(), DT, LI);
+  BasicBlock *NewPreHeader =
+      SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+
+  InsertTop->setName(Header->getName() + ".peel.begin");
+  InsertBot->setName(Header->getName() + ".peel.next");
+  NewPreHeader->setName(PreHeader->getName() + ".peel.newph");
+
+  ValueToValueMapTy LVMap;
+
+  // If we have branch weight information, we'll want to update it for the
+  // newly created branches.
+  BranchInst *LatchBR =
+      cast<BranchInst>(cast<BasicBlock>(Latch)->getTerminator());
+  unsigned HeaderIdx = (LatchBR->getSuccessor(0) == Header ? 0 : 1);
+
+  uint64_t TrueWeight, FalseWeight;
+  uint64_t ExitWeight = 0, BackEdgeWeight = 0;
+  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
+    ExitWeight = HeaderIdx ? TrueWeight : FalseWeight;
+    BackEdgeWeight = HeaderIdx ? FalseWeight : TrueWeight;
+  }
+
+  // For each peeled-off iteration, make a copy of the loop.
+  for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
+    SmallVector<BasicBlock *, 8> NewBlocks;
+    ValueToValueMapTy VMap;
+
+    // The exit weight of the previous iteration is the header entry weight
+    // of the current iteration. So this is exactly how many dynamic iterations
+    // the current peeled-off static iteration uses up.
+    // FIXME: due to the way the distribution is constructed, we need a
+    // guard here to make sure we don't end up with non-positive weights.
+    if (ExitWeight < BackEdgeWeight)
+      BackEdgeWeight -= ExitWeight;
+    else
+      BackEdgeWeight = 1;
+
+    cloneLoopBlocks(L, Iter, InsertTop, InsertBot, Exit,
+                    NewBlocks, LoopBlocks, VMap, LVMap, LI);
+    updateBranchWeights(InsertBot, cast<BranchInst>(VMap[LatchBR]), Iter,
+                        PeelCount, ExitWeight);
+
+    InsertTop = InsertBot;
+    InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
+    InsertBot->setName(Header->getName() + ".peel.next");
+
+    F->getBasicBlockList().splice(InsertTop->getIterator(),
+                                  F->getBasicBlockList(),
+                                  NewBlocks[0]->getIterator(), F->end());
+
+    // Remap to use values from the current iteration instead of the
+    // previous one.
+    remapInstructionsInBlocks(NewBlocks, VMap);
+  }
+
+  // Now adjust the phi nodes in the loop header to get their initial values
+  // from the last peeled-off iteration instead of the preheader.
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PHI = cast<PHINode>(I);
+    Value *NewVal = PHI->getIncomingValueForBlock(Latch);
+    Instruction *LatchInst = dyn_cast<Instruction>(NewVal);
+    if (LatchInst && L->contains(LatchInst))
+      NewVal = LVMap[LatchInst];
+
+    PHI->setIncomingValue(PHI->getBasicBlockIndex(NewPreHeader), NewVal);
+  }
+
+  // Adjust the branch weights on the loop exit.
+  if (ExitWeight) {
+    MDBuilder MDB(LatchBR->getContext());
+    MDNode *WeightNode =
+        HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+                  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+    LatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+  }
+
+  // If the loop is nested, we changed the parent loop, update SE.
+  if (Loop *ParentLoop = L->getParentLoop())
+    SE->forgetLoop(ParentLoop);
+
+  NumPeeled++;
+
+  return true;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 861a50cf354d..5758a415f12b 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -112,6 +112,18 @@ static void ConnectProlog(Loop *L, Value *BECount, unsigned Count,
     }
   }
 
+  // Make sure that created prolog loop is in simplified form
+  SmallVector<BasicBlock *, 4> PrologExitPreds;
+  Loop *PrologLoop = LI->getLoopFor(PrologLatch);
+  if (PrologLoop) {
+    for (BasicBlock *PredBB : predecessors(PrologExit))
+      if (PrologLoop->contains(PredBB))
+        PrologExitPreds.push_back(PredBB);
+
+    SplitBlockPredecessors(PrologExit, PrologExitPreds, ".unr-lcssa", DT, LI,
+                           PreserveLCSSA);
+  }
+
   // Create a branch around the original loop, which is taken if there are no
   // iterations remaining to be executed after running the prologue.
   Instruction *InsertPt = PrologExit->getTerminator();
@@ -479,11 +491,6 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
   if (Log2_32(Count) > BEWidth)
     return false;
 
-  // If this loop is nested, then the loop unroller changes the code in the
-  // parent loop, so the Scalar Evolution pass needs to be run again.
-  if (Loop *ParentLoop = L->getParentLoop())
-    SE->forgetLoop(ParentLoop);
-
   BasicBlock *Latch = L->getLoopLatch();
 
   // Loop structure is the following:
@@ -673,6 +680,12 @@ bool llvm::UnrollRuntimeLoopRemainder(Loop *L, unsigned Count,
     ConnectProlog(L, BECount, Count, PrologExit, PreHeader, NewPreHeader,
                   VMap, DT, LI, PreserveLCSSA);
   }
+
+  // If this loop is nested, then the loop unroller changes the code in the
+  // parent loop, so the Scalar Evolution pass needs to be run again.
+  if (Loop *ParentLoop = L->getParentLoop())
+    SE->forgetLoop(ParentLoop);
+
   NumRuntimeUnrolled++;
   return true;
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 3902c67c6a01..09e9f1ddc7fe 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -11,14 +11,17 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
-#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@@ -26,7 +29,6 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Utils/LoopUtils.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
@@ -305,7 +307,7 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
         // The instruction used by an outside user must be the last instruction
         // before we feed back to the reduction phi. Otherwise, we loose VF-1
         // operations on the value.
-        if (std::find(Phi->op_begin(), Phi->op_end(), Cur) == Phi->op_end())
+        if (!is_contained(Phi->operands(), Cur))
           return false;
 
         ExitInstruction = Cur;
@@ -654,8 +656,8 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
 }
 
 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
-                                         const SCEV *Step)
-  : StartValue(Start), IK(K), Step(Step) {
+                                         const SCEV *Step, BinaryOperator *BOp)
+  : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
   assert(IK != IK_NoInduction && "Not an induction");
 
   // Start value type should match the induction kind and the value
@@ -672,7 +674,15 @@ InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
 
   assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
          "Step value should be constant for pointer induction");
-  assert(Step->getType()->isIntegerTy() && "StepValue is not an integer");
+  assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
+         "StepValue is not an integer");
+
+  assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
+         "StepValue is not FP for FpInduction");
+  assert((IK != IK_FpInduction || (InductionBinOp &&
+          (InductionBinOp->getOpcode() == Instruction::FAdd ||
+           InductionBinOp->getOpcode() == Instruction::FSub))) &&
+         "Binary opcode should be specified for FP induction");
 }
 
 int InductionDescriptor::getConsecutiveDirection() const {
@@ -693,6 +703,8 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
                                       const DataLayout& DL) const {
 
   SCEVExpander Exp(*SE, DL, "induction");
+  assert(Index->getType() == Step->getType() &&
+         "Index type does not match StepValue type");
   switch (IK) {
   case IK_IntInduction: {
     assert(Index->getType() == StartValue->getType() &&
@@ -717,29 +729,113 @@ Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
     return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
   }
   case IK_PtrInduction: {
-    assert(Index->getType() == Step->getType() &&
-           "Index type does not match StepValue type");
     assert(isa<SCEVConstant>(Step) &&
            "Expected constant step for pointer induction");
     const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
     Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
     return B.CreateGEP(nullptr, StartValue, Index);
   }
+  case IK_FpInduction: {
+    assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
+    assert(InductionBinOp &&
+           (InductionBinOp->getOpcode() == Instruction::FAdd ||
+            InductionBinOp->getOpcode() == Instruction::FSub) &&
+           "Original bin op should be defined for FP induction");
+
+    Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
+
+    // Floating point operations had to be 'fast' to enable the induction.
+    FastMathFlags Flags;
+    Flags.setUnsafeAlgebra();
+
+    Value *MulExp = B.CreateFMul(StepValue, Index);
+    if (isa<Instruction>(MulExp))
+      // We have to check, the MulExp may be a constant.
+      cast<Instruction>(MulExp)->setFastMathFlags(Flags);
+
+    Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
+                               MulExp, "induction");
+    if (isa<Instruction>(BOp))
+      cast<Instruction>(BOp)->setFastMathFlags(Flags);
+
+    return BOp;
+  }
   case IK_NoInduction:
     return nullptr;
   }
   llvm_unreachable("invalid enum");
 }
 
-bool InductionDescriptor::isInductionPHI(PHINode *Phi,
+bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
+                                           ScalarEvolution *SE,
+                                           InductionDescriptor &D) {
+
+  // Here we only handle FP induction variables.
+  assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
+
+  if (TheLoop->getHeader() != Phi->getParent())
+    return false;
+
+  // The loop may have multiple entrances or multiple exits; we can analyze
+  // this phi if it has a unique entry value and a unique backedge value.
+  if (Phi->getNumIncomingValues() != 2)
+    return false;
+  Value *BEValue = nullptr, *StartValue = nullptr;
+  if (TheLoop->contains(Phi->getIncomingBlock(0))) {
+    BEValue = Phi->getIncomingValue(0);
+    StartValue = Phi->getIncomingValue(1);
+  } else {
+    assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
+           "Unexpected Phi node in the loop"); 
+    BEValue = Phi->getIncomingValue(1);
+    StartValue = Phi->getIncomingValue(0);
+  }
+
+  BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
+  if (!BOp)
+    return false;
+
+  Value *Addend = nullptr;
+  if (BOp->getOpcode() == Instruction::FAdd) {
+    if (BOp->getOperand(0) == Phi)
+      Addend = BOp->getOperand(1);
+    else if (BOp->getOperand(1) == Phi)
+      Addend = BOp->getOperand(0);
+  } else if (BOp->getOpcode() == Instruction::FSub)
+    if (BOp->getOperand(0) == Phi)
+      Addend = BOp->getOperand(1);
+
+  if (!Addend)
+    return false;
+
+  // The addend should be loop invariant
+  if (auto *I = dyn_cast<Instruction>(Addend))
+    if (TheLoop->contains(I))
+      return false;
+
+  // FP Step has unknown SCEV
+  const SCEV *Step = SE->getUnknown(Addend);
+  D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
+  return true;
+}
+
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
                                          PredicatedScalarEvolution &PSE,
                                          InductionDescriptor &D,
                                          bool Assume) {
   Type *PhiTy = Phi->getType();
-  // We only handle integer and pointer inductions variables.
-  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
+
+  // Handle integer and pointer inductions variables.
+  // Now we handle also FP induction but not trying to make a
+  // recurrent expression from the PHI node in-place.
+
+  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
+      !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
     return false;
 
+  if (PhiTy->isFloatingPointTy())
+    return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
+
   const SCEV *PhiScev = PSE.getSCEV(Phi);
   const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
 
@@ -752,10 +848,10 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
     return false;
   }
 
-  return isInductionPHI(Phi, PSE.getSE(), D, AR);
+  return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
 }
 
-bool InductionDescriptor::isInductionPHI(PHINode *Phi,
+bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
                                          ScalarEvolution *SE,
                                          InductionDescriptor &D,
                                          const SCEV *Expr) {
@@ -773,7 +869,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
     return false;
   }
 
-  assert(AR->getLoop()->getHeader() == Phi->getParent() &&
+  assert(TheLoop->getHeader() == Phi->getParent() &&
          "PHI is an AddRec for a different loop?!");
   Value *StartValue =
     Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
@@ -781,7 +877,7 @@ bool InductionDescriptor::isInductionPHI(PHINode *Phi,
   // Calculate the pointer stride and check if it is consecutive.
   // The stride may be a constant or a loop invariant integer value.
   const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
-  if (!ConstStep && !SE->isLoopInvariant(Step, AR->getLoop()))
+  if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
     return false;
 
   if (PhiTy->isIntegerTy()) {
@@ -824,7 +920,7 @@ SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
     // be adapted into a pointer.
     for (auto &Inst : *Block) {
       auto Users = Inst.users();
-      if (std::any_of(Users.begin(), Users.end(), [&](User *U) {
+      if (any_of(Users, [&](User *U) {
             auto *Use = cast<Instruction>(U);
             return !L->contains(Use->getParent());
           }))
@@ -851,6 +947,10 @@ void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
   AU.addPreservedID(LoopSimplifyID);
   AU.addRequiredID(LCSSAID);
   AU.addPreservedID(LCSSAID);
+  // This is used in the LPPassManager to perform LCSSA verification on passes
+  // which preserve lcssa form
+  AU.addRequired<LCSSAVerificationPass>();
+  AU.addPreserved<LCSSAVerificationPass>();
 
   // Loop passes are designed to run inside of a loop pass manager which means
   // that any function analyses they require must be required by the first loop
@@ -967,3 +1067,36 @@ bool llvm::isGuaranteedToExecute(const Instruction &Inst,
   // just a special case of this.)
   return true;
 }
+
+Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
+  // Only support loops with a unique exiting block, and a latch.
+  if (!L->getExitingBlock())
+    return None;
+
+  // Get the branch weights for the the loop's backedge.
+  BranchInst *LatchBR =
+      dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
+  if (!LatchBR || LatchBR->getNumSuccessors() != 2)
+    return None;
+
+  assert((LatchBR->getSuccessor(0) == L->getHeader() ||
+          LatchBR->getSuccessor(1) == L->getHeader()) &&
+         "At least one edge out of the latch must go to the header");
+
+  // To estimate the number of times the loop body was executed, we want to
+  // know the number of times the backedge was taken, vs. the number of times
+  // we exited the loop.
+  uint64_t TrueVal, FalseVal;
+  if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
+    return None;
+
+  if (!TrueVal || !FalseVal)
+    return 0;
+
+  // Divide the count of the backedge by the count of the edge exiting the loop,
+  // rounding to nearest.
+  if (LatchBR->getSuccessor(0) == L->getHeader())
+    return (TrueVal + (FalseVal / 2)) / FalseVal;
+  else
+    return (FalseVal + (TrueVal / 2)) / TrueVal;
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
index b3c61691da30..29756d9dab7f 100644
--- a/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopVersioning.cpp
@@ -36,7 +36,7 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI,
     : VersionedLoop(L), NonVersionedLoop(nullptr), LAI(LAI), LI(LI), DT(DT),
       SE(SE) {
   assert(L->getExitBlock() && "No single exit block");
-  assert(L->getLoopPreheader() && "No preheader");
+  assert(L->isLoopSimplifyForm() && "Loop is not in loop-simplify form");
   if (UseLAIChecks) {
     setAliasChecks(LAI.getRuntimePointerChecking()->getChecks());
     setSCEVChecks(LAI.getPSE().getUnionPredicate());
@@ -278,8 +278,8 @@ public:
     bool Changed = false;
     for (Loop *L : Worklist) {
       const LoopAccessInfo &LAI = LAA->getInfo(L);
-      if (LAI.getNumRuntimePointerChecks() ||
-          !LAI.getPSE().getUnionPredicate().isAlwaysTrue()) {
+      if (L->isLoopSimplifyForm() && (LAI.getNumRuntimePointerChecks() ||
+          !LAI.getPSE().getUnionPredicate().isAlwaysTrue())) {
         LoopVersioning LVer(LAI, L, LI, DT, SE);
         LVer.versionLoop();
         LVer.annotateLoopWithNoAlias();
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
index 1b31c5ae580a..ee84541e526d 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp
@@ -14,6 +14,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/LowerInvoke.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/Instructions.h"
@@ -28,36 +29,29 @@ using namespace llvm;
 STATISTIC(NumInvokes, "Number of invokes replaced");
 
 namespace {
-  class LowerInvoke : public FunctionPass {
+  class LowerInvokeLegacyPass : public FunctionPass {
   public:
     static char ID; // Pass identification, replacement for typeid
-    explicit LowerInvoke() : FunctionPass(ID) {
-      initializeLowerInvokePass(*PassRegistry::getPassRegistry());
+    explicit LowerInvokeLegacyPass() : FunctionPass(ID) {
+      initializeLowerInvokeLegacyPassPass(*PassRegistry::getPassRegistry());
     }
     bool runOnFunction(Function &F) override;
   };
 }
 
-char LowerInvoke::ID = 0;
-INITIALIZE_PASS(LowerInvoke, "lowerinvoke",
+char LowerInvokeLegacyPass::ID = 0;
+INITIALIZE_PASS(LowerInvokeLegacyPass, "lowerinvoke",
                 "Lower invoke and unwind, for unwindless code generators",
                 false, false)
 
-char &llvm::LowerInvokePassID = LowerInvoke::ID;
-
-// Public Interface To the LowerInvoke pass.
-FunctionPass *llvm::createLowerInvokePass() {
-  return new LowerInvoke();
-}
-
-bool LowerInvoke::runOnFunction(Function &F) {
+static bool runImpl(Function &F) {
   bool Changed = false;
   for (BasicBlock &BB : F)
     if (InvokeInst *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
-      SmallVector<Value*,16> CallArgs(II->op_begin(), II->op_end() - 3);
+      SmallVector<Value *, 16> CallArgs(II->op_begin(), II->op_end() - 3);
       // Insert a normal call instruction...
-      CallInst *NewCall = CallInst::Create(II->getCalledValue(),
-                                           CallArgs, "", II);
+      CallInst *NewCall =
+          CallInst::Create(II->getCalledValue(), CallArgs, "", II);
       NewCall->takeName(II);
       NewCall->setCallingConv(II->getCallingConv());
       NewCall->setAttributes(II->getAttributes());
@@ -73,7 +67,28 @@ bool LowerInvoke::runOnFunction(Function &F) {
       // Remove the invoke instruction now.
       BB.getInstList().erase(II);
 
-      ++NumInvokes; Changed = true;
+      ++NumInvokes;
+      Changed = true;
     }
   return Changed;
 }
+
+bool LowerInvokeLegacyPass::runOnFunction(Function &F) {
+  return runImpl(F);
+}
+
+namespace llvm {
+char &LowerInvokePassID = LowerInvokeLegacyPass::ID;
+
+// Public Interface To the LowerInvoke pass.
+FunctionPass *createLowerInvokePass() { return new LowerInvokeLegacyPass(); }
+
+PreservedAnalyses LowerInvokePass::run(Function &F,
+                                       FunctionAnalysisManager &AM) {
+  bool Changed = runImpl(F);
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
index 5c07469869ff..75cd3bc8b2bf 100644
--- a/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp
@@ -478,10 +478,10 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI,
     // cases.
     assert(MaxPop > 0 && PopSucc);
     Default = PopSucc;
-    Cases.erase(std::remove_if(
-                    Cases.begin(), Cases.end(),
-                    [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
-                Cases.end());
+    Cases.erase(
+        remove_if(Cases,
+                  [PopSucc](const CaseRange &R) { return R.BB == PopSucc; }),
+        Cases.end());
 
     // If there are no cases left, just branch.
     if (Cases.empty()) {
diff --git a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
index 1419254bcb4f..24b3b12930ac 100644
--- a/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Mem2Reg.cpp
@@ -53,7 +53,7 @@ static bool promoteMemoryToRegister(Function &F, DominatorTree &DT,
   return Changed;
 }
 
-PreservedAnalyses PromotePass::run(Function &F, AnalysisManager<Function> &AM) {
+PreservedAnalyses PromotePass::run(Function &F, FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
   if (!promoteMemoryToRegister(F, DT, AC))
diff --git a/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp b/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp
index 8ba3cae43b18..1ce4225f09cc 100644
--- a/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/MemorySSA.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallBitVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
@@ -60,6 +61,11 @@ INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
 INITIALIZE_PASS_END(MemorySSAPrinterLegacyPass, "print-memoryssa",
                     "Memory SSA Printer", false, false)
 
+static cl::opt<unsigned> MaxCheckLimit(
+    "memssa-check-limit", cl::Hidden, cl::init(100),
+    cl::desc("The maximum number of stores/phis MemorySSA"
+             "will consider trying to walk past (default = 100)"));
+
 static cl::opt<bool>
     VerifyMemorySSA("verify-memoryssa", cl::init(false), cl::Hidden,
                     cl::desc("Verify MemorySSA in legacy printer pass."));
@@ -86,7 +92,963 @@ public:
       OS << "; " << *MA << "\n";
   }
 };
+}
+
+namespace {
+/// Our current alias analysis API differentiates heavily between calls and
+/// non-calls, and functions called on one usually assert on the other.
+/// This class encapsulates the distinction to simplify other code that wants
+/// "Memory affecting instructions and related data" to use as a key.
+/// For example, this class is used as a densemap key in the use optimizer.
+class MemoryLocOrCall {
+public:
+  MemoryLocOrCall() : IsCall(false) {}
+  MemoryLocOrCall(MemoryUseOrDef *MUD)
+      : MemoryLocOrCall(MUD->getMemoryInst()) {}
+  MemoryLocOrCall(const MemoryUseOrDef *MUD)
+      : MemoryLocOrCall(MUD->getMemoryInst()) {}
+
+  MemoryLocOrCall(Instruction *Inst) {
+    if (ImmutableCallSite(Inst)) {
+      IsCall = true;
+      CS = ImmutableCallSite(Inst);
+    } else {
+      IsCall = false;
+      // There is no such thing as a memorylocation for a fence inst, and it is
+      // unique in that regard.
+      if (!isa<FenceInst>(Inst))
+        Loc = MemoryLocation::get(Inst);
+    }
+  }
+
+  explicit MemoryLocOrCall(const MemoryLocation &Loc)
+      : IsCall(false), Loc(Loc) {}
+
+  bool IsCall;
+  ImmutableCallSite getCS() const {
+    assert(IsCall);
+    return CS;
+  }
+  MemoryLocation getLoc() const {
+    assert(!IsCall);
+    return Loc;
+  }
+
+  bool operator==(const MemoryLocOrCall &Other) const {
+    if (IsCall != Other.IsCall)
+      return false;
+
+    if (IsCall)
+      return CS.getCalledValue() == Other.CS.getCalledValue();
+    return Loc == Other.Loc;
+  }
+
+private:
+  union {
+      ImmutableCallSite CS;
+      MemoryLocation Loc;
+  };
+};
+}
+
+namespace llvm {
+template <> struct DenseMapInfo<MemoryLocOrCall> {
+  static inline MemoryLocOrCall getEmptyKey() {
+    return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getEmptyKey());
+  }
+  static inline MemoryLocOrCall getTombstoneKey() {
+    return MemoryLocOrCall(DenseMapInfo<MemoryLocation>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const MemoryLocOrCall &MLOC) {
+    if (MLOC.IsCall)
+      return hash_combine(MLOC.IsCall,
+                          DenseMapInfo<const Value *>::getHashValue(
+                              MLOC.getCS().getCalledValue()));
+    return hash_combine(
+        MLOC.IsCall, DenseMapInfo<MemoryLocation>::getHashValue(MLOC.getLoc()));
+  }
+  static bool isEqual(const MemoryLocOrCall &LHS, const MemoryLocOrCall &RHS) {
+    return LHS == RHS;
+  }
+};
+
+enum class Reorderability { Always, IfNoAlias, Never };
+
+/// This does one-way checks to see if Use could theoretically be hoisted above
+/// MayClobber. This will not check the other way around.
+///
+/// This assumes that, for the purposes of MemorySSA, Use comes directly after
+/// MayClobber, with no potentially clobbering operations in between them.
+/// (Where potentially clobbering ops are memory barriers, aliased stores, etc.)
+static Reorderability getLoadReorderability(const LoadInst *Use,
+                                            const LoadInst *MayClobber) {
+  bool VolatileUse = Use->isVolatile();
+  bool VolatileClobber = MayClobber->isVolatile();
+  // Volatile operations may never be reordered with other volatile operations.
+  if (VolatileUse && VolatileClobber)
+    return Reorderability::Never;
+
+  // The lang ref allows reordering of volatile and non-volatile operations.
+  // Whether an aliasing nonvolatile load and volatile load can be reordered,
+  // though, is ambiguous. Because it may not be best to exploit this ambiguity,
+  // we only allow volatile/non-volatile reordering if the volatile and
+  // non-volatile operations don't alias.
+  Reorderability Result = VolatileUse || VolatileClobber
+                              ? Reorderability::IfNoAlias
+                              : Reorderability::Always;
+
+  // If a load is seq_cst, it cannot be moved above other loads. If its ordering
+  // is weaker, it can be moved above other loads. We just need to be sure that
+  // MayClobber isn't an acquire load, because loads can't be moved above
+  // acquire loads.
+  //
+  // Note that this explicitly *does* allow the free reordering of monotonic (or
+  // weaker) loads of the same address.
+  bool SeqCstUse = Use->getOrdering() == AtomicOrdering::SequentiallyConsistent;
+  bool MayClobberIsAcquire = isAtLeastOrStrongerThan(MayClobber->getOrdering(),
+                                                     AtomicOrdering::Acquire);
+  if (SeqCstUse || MayClobberIsAcquire)
+    return Reorderability::Never;
+  return Result;
+}
+
+static bool instructionClobbersQuery(MemoryDef *MD,
+                                     const MemoryLocation &UseLoc,
+                                     const Instruction *UseInst,
+                                     AliasAnalysis &AA) {
+  Instruction *DefInst = MD->getMemoryInst();
+  assert(DefInst && "Defining instruction not actually an instruction");
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(DefInst)) {
+    // These intrinsics will show up as affecting memory, but they are just
+    // markers.
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+    case Intrinsic::invariant_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::assume:
+      return false;
+    default:
+      break;
+    }
+  }
+
+  ImmutableCallSite UseCS(UseInst);
+  if (UseCS) {
+    ModRefInfo I = AA.getModRefInfo(DefInst, UseCS);
+    return I != MRI_NoModRef;
+  }
+
+  if (auto *DefLoad = dyn_cast<LoadInst>(DefInst)) {
+    if (auto *UseLoad = dyn_cast<LoadInst>(UseInst)) {
+      switch (getLoadReorderability(UseLoad, DefLoad)) {
+      case Reorderability::Always:
+        return false;
+      case Reorderability::Never:
+        return true;
+      case Reorderability::IfNoAlias:
+        return !AA.isNoAlias(UseLoc, MemoryLocation::get(DefLoad));
+      }
+    }
+  }
+
+  return AA.getModRefInfo(DefInst, UseLoc) & MRI_Mod;
+}
+
+static bool instructionClobbersQuery(MemoryDef *MD, const MemoryUseOrDef *MU,
+                                     const MemoryLocOrCall &UseMLOC,
+                                     AliasAnalysis &AA) {
+  // FIXME: This is a temporary hack to allow a single instructionClobbersQuery
+  // to exist while MemoryLocOrCall is pushed through places.
+  if (UseMLOC.IsCall)
+    return instructionClobbersQuery(MD, MemoryLocation(), MU->getMemoryInst(),
+                                    AA);
+  return instructionClobbersQuery(MD, UseMLOC.getLoc(), MU->getMemoryInst(),
+                                  AA);
+}
+
+// Return true when MD may alias MU, return false otherwise.
+bool defClobbersUseOrDef(MemoryDef *MD, const MemoryUseOrDef *MU,
+                         AliasAnalysis &AA) {
+  return instructionClobbersQuery(MD, MU, MemoryLocOrCall(MU), AA);
+}
+}
+
+namespace {
+struct UpwardsMemoryQuery {
+  // True if our original query started off as a call
+  bool IsCall;
+  // The pointer location we started the query with. This will be empty if
+  // IsCall is true.
+  MemoryLocation StartingLoc;
+  // This is the instruction we were querying about.
+  const Instruction *Inst;
+  // The MemoryAccess we actually got called with, used to test local domination
+  const MemoryAccess *OriginalAccess;
+
+  UpwardsMemoryQuery()
+      : IsCall(false), Inst(nullptr), OriginalAccess(nullptr) {}
+
+  UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
+      : IsCall(ImmutableCallSite(Inst)), Inst(Inst), OriginalAccess(Access) {
+    if (!IsCall)
+      StartingLoc = MemoryLocation::get(Inst);
+  }
+};
+
+static bool lifetimeEndsAt(MemoryDef *MD, const MemoryLocation &Loc,
+                           AliasAnalysis &AA) {
+  Instruction *Inst = MD->getMemoryInst();
+  if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::lifetime_start:
+    case Intrinsic::lifetime_end:
+      return AA.isMustAlias(MemoryLocation(II->getArgOperand(1)), Loc);
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
+static bool isUseTriviallyOptimizableToLiveOnEntry(AliasAnalysis &AA,
+                                                   const Instruction *I) {
+  // If the memory can't be changed, then loads of the memory can't be
+  // clobbered.
+  //
+  // FIXME: We should handle invariant groups, as well. It's a bit harder,
+  // because we need to pay close attention to invariant group barriers.
+  return isa<LoadInst>(I) && (I->getMetadata(LLVMContext::MD_invariant_load) ||
+                              AA.pointsToConstantMemory(I));
+}
+
+/// Cache for our caching MemorySSA walker.
+class WalkerCache {
+  DenseMap<ConstMemoryAccessPair, MemoryAccess *> Accesses;
+  DenseMap<const MemoryAccess *, MemoryAccess *> Calls;
+
+public:
+  MemoryAccess *lookup(const MemoryAccess *MA, const MemoryLocation &Loc,
+                       bool IsCall) const {
+    ++NumClobberCacheLookups;
+    MemoryAccess *R = IsCall ? Calls.lookup(MA) : Accesses.lookup({MA, Loc});
+    if (R)
+      ++NumClobberCacheHits;
+    return R;
+  }
+
+  bool insert(const MemoryAccess *MA, MemoryAccess *To,
+              const MemoryLocation &Loc, bool IsCall) {
+    // This is fine for Phis, since there are times where we can't optimize
+    // them.  Making a def its own clobber is never correct, though.
+    assert((MA != To || isa<MemoryPhi>(MA)) &&
+           "Something can't clobber itself!");
+
+    ++NumClobberCacheInserts;
+    bool Inserted;
+    if (IsCall)
+      Inserted = Calls.insert({MA, To}).second;
+    else
+      Inserted = Accesses.insert({{MA, Loc}, To}).second;
+
+    return Inserted;
+  }
+
+  bool remove(const MemoryAccess *MA, const MemoryLocation &Loc, bool IsCall) {
+    return IsCall ? Calls.erase(MA) : Accesses.erase({MA, Loc});
+  }
+
+  void clear() {
+    Accesses.clear();
+    Calls.clear();
+  }
+
+  bool contains(const MemoryAccess *MA) const {
+    for (auto &P : Accesses)
+      if (P.first.first == MA || P.second == MA)
+        return true;
+    for (auto &P : Calls)
+      if (P.first == MA || P.second == MA)
+        return true;
+    return false;
+  }
+};
+
+/// Walks the defining uses of MemoryDefs. Stops after we hit something that has
+/// no defining use (e.g. a MemoryPhi or liveOnEntry). Note that, when comparing
+/// against a null def_chain_iterator, this will compare equal only after
+/// walking said Phi/liveOnEntry.
+struct def_chain_iterator
+    : public iterator_facade_base<def_chain_iterator, std::forward_iterator_tag,
+                                  MemoryAccess *> {
+  def_chain_iterator() : MA(nullptr) {}
+  def_chain_iterator(MemoryAccess *MA) : MA(MA) {}
+
+  MemoryAccess *operator*() const { return MA; }
+
+  def_chain_iterator &operator++() {
+    // N.B. liveOnEntry has a null defining access.
+    if (auto *MUD = dyn_cast<MemoryUseOrDef>(MA))
+      MA = MUD->getDefiningAccess();
+    else
+      MA = nullptr;
+    return *this;
+  }
+
+  bool operator==(const def_chain_iterator &O) const { return MA == O.MA; }
+
+private:
+  MemoryAccess *MA;
+};
+
+static iterator_range<def_chain_iterator>
+def_chain(MemoryAccess *MA, MemoryAccess *UpTo = nullptr) {
+#ifdef EXPENSIVE_CHECKS
+  assert((!UpTo || find(def_chain(MA), UpTo) != def_chain_iterator()) &&
+         "UpTo isn't in the def chain!");
+#endif
+  return make_range(def_chain_iterator(MA), def_chain_iterator(UpTo));
+}
+
+/// Verifies that `Start` is clobbered by `ClobberAt`, and that nothing
+/// inbetween `Start` and `ClobberAt` can clobbers `Start`.
+///
+/// This is meant to be as simple and self-contained as possible. Because it
+/// uses no cache, etc., it can be relatively expensive.
+///
+/// \param Start     The MemoryAccess that we want to walk from.
+/// \param ClobberAt A clobber for Start.
+/// \param StartLoc  The MemoryLocation for Start.
+/// \param MSSA      The MemorySSA isntance that Start and ClobberAt belong to.
+/// \param Query     The UpwardsMemoryQuery we used for our search.
+/// \param AA        The AliasAnalysis we used for our search.
+static void LLVM_ATTRIBUTE_UNUSED
+checkClobberSanity(MemoryAccess *Start, MemoryAccess *ClobberAt,
+                   const MemoryLocation &StartLoc, const MemorySSA &MSSA,
+                   const UpwardsMemoryQuery &Query, AliasAnalysis &AA) {
+  assert(MSSA.dominates(ClobberAt, Start) && "Clobber doesn't dominate start?");
+
+  if (MSSA.isLiveOnEntryDef(Start)) {
+    assert(MSSA.isLiveOnEntryDef(ClobberAt) &&
+           "liveOnEntry must clobber itself");
+    return;
+  }
+
+  bool FoundClobber = false;
+  DenseSet<MemoryAccessPair> VisitedPhis;
+  SmallVector<MemoryAccessPair, 8> Worklist;
+  Worklist.emplace_back(Start, StartLoc);
+  // Walk all paths from Start to ClobberAt, while looking for clobbers. If one
+  // is found, complain.
+  while (!Worklist.empty()) {
+    MemoryAccessPair MAP = Worklist.pop_back_val();
+    // All we care about is that nothing from Start to ClobberAt clobbers Start.
+    // We learn nothing from revisiting nodes.
+    if (!VisitedPhis.insert(MAP).second)
+      continue;
+
+    for (MemoryAccess *MA : def_chain(MAP.first)) {
+      if (MA == ClobberAt) {
+        if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+          // instructionClobbersQuery isn't essentially free, so don't use `|=`,
+          // since it won't let us short-circuit.
+          //
+          // Also, note that this can't be hoisted out of the `Worklist` loop,
+          // since MD may only act as a clobber for 1 of N MemoryLocations.
+          FoundClobber =
+              FoundClobber || MSSA.isLiveOnEntryDef(MD) ||
+              instructionClobbersQuery(MD, MAP.second, Query.Inst, AA);
+        }
+        break;
+      }
+
+      // We should never hit liveOnEntry, unless it's the clobber.
+      assert(!MSSA.isLiveOnEntryDef(MA) && "Hit liveOnEntry before clobber?");
+
+      if (auto *MD = dyn_cast<MemoryDef>(MA)) {
+        (void)MD;
+        assert(!instructionClobbersQuery(MD, MAP.second, Query.Inst, AA) &&
+               "Found clobber before reaching ClobberAt!");
+        continue;
+      }
+
+      assert(isa<MemoryPhi>(MA));
+      Worklist.append(upward_defs_begin({MA, MAP.second}), upward_defs_end());
+    }
+  }
+
+  // If ClobberAt is a MemoryPhi, we can assume something above it acted as a
+  // clobber. Otherwise, `ClobberAt` should've acted as a clobber at some point.
+  assert((isa<MemoryPhi>(ClobberAt) || FoundClobber) &&
+         "ClobberAt never acted as a clobber");
+}
+
+/// Our algorithm for walking (and trying to optimize) clobbers, all wrapped up
+/// in one class.
+class ClobberWalker {
+  /// Save a few bytes by using unsigned instead of size_t.
+  using ListIndex = unsigned;
+
+  /// Represents a span of contiguous MemoryDefs, potentially ending in a
+  /// MemoryPhi.
+  struct DefPath {
+    MemoryLocation Loc;
+    // Note that, because we always walk in reverse, Last will always dominate
+    // First. Also note that First and Last are inclusive.
+    MemoryAccess *First;
+    MemoryAccess *Last;
+    Optional<ListIndex> Previous;
+
+    DefPath(const MemoryLocation &Loc, MemoryAccess *First, MemoryAccess *Last,
+            Optional<ListIndex> Previous)
+        : Loc(Loc), First(First), Last(Last), Previous(Previous) {}
+
+    DefPath(const MemoryLocation &Loc, MemoryAccess *Init,
+            Optional<ListIndex> Previous)
+        : DefPath(Loc, Init, Init, Previous) {}
+  };
+
+  const MemorySSA &MSSA;
+  AliasAnalysis &AA;
+  DominatorTree &DT;
+  WalkerCache &WC;
+  UpwardsMemoryQuery *Query;
+  bool UseCache;
+
+  // Phi optimization bookkeeping
+  SmallVector<DefPath, 32> Paths;
+  DenseSet<ConstMemoryAccessPair> VisitedPhis;
+  DenseMap<const BasicBlock *, MemoryAccess *> WalkTargetCache;
+
+  void setUseCache(bool Use) { UseCache = Use; }
+  bool shouldIgnoreCache() const {
+    // UseCache will only be false when we're debugging, or when expensive
+    // checks are enabled. In either case, we don't care deeply about speed.
+    return LLVM_UNLIKELY(!UseCache);
+  }
+
+  void addCacheEntry(const MemoryAccess *What, MemoryAccess *To,
+                     const MemoryLocation &Loc) const {
+// EXPENSIVE_CHECKS because most of these queries are redundant.
+#ifdef EXPENSIVE_CHECKS
+    assert(MSSA.dominates(To, What));
+#endif
+    if (shouldIgnoreCache())
+      return;
+    WC.insert(What, To, Loc, Query->IsCall);
+  }
+
+  MemoryAccess *lookupCache(const MemoryAccess *MA, const MemoryLocation &Loc) {
+    return shouldIgnoreCache() ? nullptr : WC.lookup(MA, Loc, Query->IsCall);
+  }
+
+  void cacheDefPath(const DefPath &DN, MemoryAccess *Target) const {
+    if (shouldIgnoreCache())
+      return;
+
+    for (MemoryAccess *MA : def_chain(DN.First, DN.Last))
+      addCacheEntry(MA, Target, DN.Loc);
+
+    // DefPaths only express the path we walked. So, DN.Last could either be a
+    // thing we want to cache, or not.
+    if (DN.Last != Target)
+      addCacheEntry(DN.Last, Target, DN.Loc);
+  }
+
+  /// Find the nearest def or phi that `From` can legally be optimized to.
+  ///
+  /// FIXME: Deduplicate this with MSSA::findDominatingDef. Ideally, MSSA should
+  /// keep track of this information for us, and allow us O(1) lookups of this
+  /// info.
+  MemoryAccess *getWalkTarget(const MemoryPhi *From) {
+    assert(From->getNumOperands() && "Phi with no operands?");
+
+    BasicBlock *BB = From->getBlock();
+    auto At = WalkTargetCache.find(BB);
+    if (At != WalkTargetCache.end())
+      return At->second;
+
+    SmallVector<const BasicBlock *, 8> ToCache;
+    ToCache.push_back(BB);
+
+    MemoryAccess *Result = MSSA.getLiveOnEntryDef();
+    DomTreeNode *Node = DT.getNode(BB);
+    while ((Node = Node->getIDom())) {
+      auto At = WalkTargetCache.find(BB);
+      if (At != WalkTargetCache.end()) {
+        Result = At->second;
+        break;
+      }
+
+      auto *Accesses = MSSA.getBlockAccesses(Node->getBlock());
+      if (Accesses) {
+        auto Iter = find_if(reverse(*Accesses), [](const MemoryAccess &MA) {
+          return !isa<MemoryUse>(MA);
+        });
+        if (Iter != Accesses->rend()) {
+          Result = const_cast<MemoryAccess *>(&*Iter);
+          break;
+        }
+      }
+
+      ToCache.push_back(Node->getBlock());
+    }
+
+    for (const BasicBlock *BB : ToCache)
+      WalkTargetCache.insert({BB, Result});
+    return Result;
+  }
+
+  /// Result of calling walkToPhiOrClobber.
+  struct UpwardsWalkResult {
+    /// The "Result" of the walk. Either a clobber, the last thing we walked, or
+    /// both.
+    MemoryAccess *Result;
+    bool IsKnownClobber;
+    bool FromCache;
+  };
+
+  /// Walk to the next Phi or Clobber in the def chain starting at Desc.Last.
+  /// This will update Desc.Last as it walks. It will (optionally) also stop at
+  /// StopAt.
+  ///
+  /// This does not test for whether StopAt is a clobber
+  UpwardsWalkResult walkToPhiOrClobber(DefPath &Desc,
+                                       MemoryAccess *StopAt = nullptr) {
+    assert(!isa<MemoryUse>(Desc.Last) && "Uses don't exist in my world");
+
+    for (MemoryAccess *Current : def_chain(Desc.Last)) {
+      Desc.Last = Current;
+      if (Current == StopAt)
+        return {Current, false, false};
+
+      if (auto *MD = dyn_cast<MemoryDef>(Current))
+        if (MSSA.isLiveOnEntryDef(MD) ||
+            instructionClobbersQuery(MD, Desc.Loc, Query->Inst, AA))
+          return {MD, true, false};
+
+      // Cache checks must be done last, because if Current is a clobber, the
+      // cache will contain the clobber for Current.
+      if (MemoryAccess *MA = lookupCache(Current, Desc.Loc))
+        return {MA, true, true};
+    }
+
+    assert(isa<MemoryPhi>(Desc.Last) &&
+           "Ended at a non-clobber that's not a phi?");
+    return {Desc.Last, false, false};
+  }
+
+  void addSearches(MemoryPhi *Phi, SmallVectorImpl<ListIndex> &PausedSearches,
+                   ListIndex PriorNode) {
+    auto UpwardDefs = make_range(upward_defs_begin({Phi, Paths[PriorNode].Loc}),
+                                 upward_defs_end());
+    for (const MemoryAccessPair &P : UpwardDefs) {
+      PausedSearches.push_back(Paths.size());
+      Paths.emplace_back(P.second, P.first, PriorNode);
+    }
+  }
+
+  /// Represents a search that terminated after finding a clobber. This clobber
+  /// may or may not be present in the path of defs from LastNode..SearchStart,
+  /// since it may have been retrieved from cache.
+  struct TerminatedPath {
+    MemoryAccess *Clobber;
+    ListIndex LastNode;
+  };
+
+  /// Get an access that keeps us from optimizing to the given phi.
+  ///
+  /// PausedSearches is an array of indices into the Paths array. Its incoming
+  /// value is the indices of searches that stopped at the last phi optimization
+  /// target. It's left in an unspecified state.
+  ///
+  /// If this returns None, NewPaused is a vector of searches that terminated
+  /// at StopWhere. Otherwise, NewPaused is left in an unspecified state.
+  Optional<TerminatedPath>
+  getBlockingAccess(MemoryAccess *StopWhere,
+                    SmallVectorImpl<ListIndex> &PausedSearches,
+                    SmallVectorImpl<ListIndex> &NewPaused,
+                    SmallVectorImpl<TerminatedPath> &Terminated) {
+    assert(!PausedSearches.empty() && "No searches to continue?");
+
+    // BFS vs DFS really doesn't make a difference here, so just do a DFS with
+    // PausedSearches as our stack.
+    while (!PausedSearches.empty()) {
+      ListIndex PathIndex = PausedSearches.pop_back_val();
+      DefPath &Node = Paths[PathIndex];
+
+      // If we've already visited this path with this MemoryLocation, we don't
+      // need to do so again.
+      //
+      // NOTE: That we just drop these paths on the ground makes caching
+      // behavior sporadic. e.g. given a diamond:
+      //  A
+      // B C
+      //  D
+      //
+      // ...If we walk D, B, A, C, we'll only cache the result of phi
+      // optimization for A, B, and D; C will be skipped because it dies here.
+      // This arguably isn't the worst thing ever, since:
+      //   - We generally query things in a top-down order, so if we got below D
+      //     without needing cache entries for {C, MemLoc}, then chances are
+      //     that those cache entries would end up ultimately unused.
+      //   - We still cache things for A, so C only needs to walk up a bit.
+      // If this behavior becomes problematic, we can fix without a ton of extra
+      // work.
+      if (!VisitedPhis.insert({Node.Last, Node.Loc}).second)
+        continue;
+
+      UpwardsWalkResult Res = walkToPhiOrClobber(Node, /*StopAt=*/StopWhere);
+      if (Res.IsKnownClobber) {
+        assert(Res.Result != StopWhere || Res.FromCache);
+        // If this wasn't a cache hit, we hit a clobber when walking. That's a
+        // failure.
+        TerminatedPath Term{Res.Result, PathIndex};
+        if (!Res.FromCache || !MSSA.dominates(Res.Result, StopWhere))
+          return Term;
+
+        // Otherwise, it's a valid thing to potentially optimize to.
+        Terminated.push_back(Term);
+        continue;
+      }
+
+      if (Res.Result == StopWhere) {
+        // We've hit our target. Save this path off for if we want to continue
+        // walking.
+        NewPaused.push_back(PathIndex);
+        continue;
+      }
+
+      assert(!MSSA.isLiveOnEntryDef(Res.Result) && "liveOnEntry is a clobber");
+      addSearches(cast<MemoryPhi>(Res.Result), PausedSearches, PathIndex);
+    }
+
+    return None;
+  }
+
+  template <typename T, typename Walker>
+  struct generic_def_path_iterator
+      : public iterator_facade_base<generic_def_path_iterator<T, Walker>,
+                                    std::forward_iterator_tag, T *> {
+    generic_def_path_iterator() : W(nullptr), N(None) {}
+    generic_def_path_iterator(Walker *W, ListIndex N) : W(W), N(N) {}
+
+    T &operator*() const { return curNode(); }
+
+    generic_def_path_iterator &operator++() {
+      N = curNode().Previous;
+      return *this;
+    }
+
+    bool operator==(const generic_def_path_iterator &O) const {
+      if (N.hasValue() != O.N.hasValue())
+        return false;
+      return !N.hasValue() || *N == *O.N;
+    }
+
+  private:
+    T &curNode() const { return W->Paths[*N]; }
+
+    Walker *W;
+    Optional<ListIndex> N;
+  };
+
+  using def_path_iterator = generic_def_path_iterator<DefPath, ClobberWalker>;
+  using const_def_path_iterator =
+      generic_def_path_iterator<const DefPath, const ClobberWalker>;
+
+  iterator_range<def_path_iterator> def_path(ListIndex From) {
+    return make_range(def_path_iterator(this, From), def_path_iterator());
+  }
+
+  iterator_range<const_def_path_iterator> const_def_path(ListIndex From) const {
+    return make_range(const_def_path_iterator(this, From),
+                      const_def_path_iterator());
+  }
+
+  struct OptznResult {
+    /// The path that contains our result.
+    TerminatedPath PrimaryClobber;
+    /// The paths that we can legally cache back from, but that aren't
+    /// necessarily the result of the Phi optimization.
+    SmallVector<TerminatedPath, 4> OtherClobbers;
+  };
+
+  ListIndex defPathIndex(const DefPath &N) const {
+    // The assert looks nicer if we don't need to do &N
+    const DefPath *NP = &N;
+    assert(!Paths.empty() && NP >= &Paths.front() && NP <= &Paths.back() &&
+           "Out of bounds DefPath!");
+    return NP - &Paths.front();
+  }
+
+  /// Try to optimize a phi as best as we can. Returns a SmallVector of Paths
+  /// that act as legal clobbers. Note that this won't return *all* clobbers.
+  ///
+  /// Phi optimization algorithm tl;dr:
+  ///   - Find the earliest def/phi, A, we can optimize to
+  ///   - Find if all paths from the starting memory access ultimately reach A
+  ///     - If not, optimization isn't possible.
+  ///     - Otherwise, walk from A to another clobber or phi, A'.
+  ///       - If A' is a def, we're done.
+  ///       - If A' is a phi, try to optimize it.
+  ///
+  /// A path is a series of {MemoryAccess, MemoryLocation} pairs. A path
+  /// terminates when a MemoryAccess that clobbers said MemoryLocation is found.
+  OptznResult tryOptimizePhi(MemoryPhi *Phi, MemoryAccess *Start,
+                             const MemoryLocation &Loc) {
+    assert(Paths.empty() && VisitedPhis.empty() &&
+           "Reset the optimization state.");
+
+    Paths.emplace_back(Loc, Start, Phi, None);
+    // Stores how many "valid" optimization nodes we had prior to calling
+    // addSearches/getBlockingAccess. Necessary for caching if we had a blocker.
+    auto PriorPathsSize = Paths.size();
+
+    SmallVector<ListIndex, 16> PausedSearches;
+    SmallVector<ListIndex, 8> NewPaused;
+    SmallVector<TerminatedPath, 4> TerminatedPaths;
+
+    addSearches(Phi, PausedSearches, 0);
+
+    // Moves the TerminatedPath with the "most dominated" Clobber to the end of
+    // Paths.
+    auto MoveDominatedPathToEnd = [&](SmallVectorImpl<TerminatedPath> &Paths) {
+      assert(!Paths.empty() && "Need a path to move");
+      auto Dom = Paths.begin();
+      for (auto I = std::next(Dom), E = Paths.end(); I != E; ++I)
+        if (!MSSA.dominates(I->Clobber, Dom->Clobber))
+          Dom = I;
+      auto Last = Paths.end() - 1;
+      if (Last != Dom)
+        std::iter_swap(Last, Dom);
+    };
+
+    MemoryPhi *Current = Phi;
+    while (1) {
+      assert(!MSSA.isLiveOnEntryDef(Current) &&
+             "liveOnEntry wasn't treated as a clobber?");
+
+      MemoryAccess *Target = getWalkTarget(Current);
+      // If a TerminatedPath doesn't dominate Target, then it wasn't a legal
+      // optimization for the prior phi.
+      assert(all_of(TerminatedPaths, [&](const TerminatedPath &P) {
+        return MSSA.dominates(P.Clobber, Target);
+      }));
+
+      // FIXME: This is broken, because the Blocker may be reported to be
+      // liveOnEntry, and we'll happily wait for that to disappear (read: never)
+      // For the moment, this is fine, since we do nothing with blocker info.
+      if (Optional<TerminatedPath> Blocker = getBlockingAccess(
+              Target, PausedSearches, NewPaused, TerminatedPaths)) {
+        // Cache our work on the blocking node, since we know that's correct.
+        cacheDefPath(Paths[Blocker->LastNode], Blocker->Clobber);
+
+        // Find the node we started at. We can't search based on N->Last, since
+        // we may have gone around a loop with a different MemoryLocation.
+        auto Iter = find_if(def_path(Blocker->LastNode), [&](const DefPath &N) {
+          return defPathIndex(N) < PriorPathsSize;
+        });
+        assert(Iter != def_path_iterator());
+
+        DefPath &CurNode = *Iter;
+        assert(CurNode.Last == Current);
+
+        // Two things:
+        // A. We can't reliably cache all of NewPaused back. Consider a case
+        //    where we have two paths in NewPaused; one of which can't optimize
+        //    above this phi, whereas the other can. If we cache the second path
+        //    back, we'll end up with suboptimal cache entries. We can handle
+        //    cases like this a bit better when we either try to find all
+        //    clobbers that block phi optimization, or when our cache starts
+        //    supporting unfinished searches.
+        // B. We can't reliably cache TerminatedPaths back here without doing
+        //    extra checks; consider a case like:
+        //       T
+        //      / \
+        //     D   C
+        //      \ /
+        //       S
+        //    Where T is our target, C is a node with a clobber on it, D is a
+        //    diamond (with a clobber *only* on the left or right node, N), and
+        //    S is our start. Say we walk to D, through the node opposite N
+        //    (read: ignoring the clobber), and see a cache entry in the top
+        //    node of D. That cache entry gets put into TerminatedPaths. We then
+        //    walk up to C (N is later in our worklist), find the clobber, and
+        //    quit. If we append TerminatedPaths to OtherClobbers, we'll cache
+        //    the bottom part of D to the cached clobber, ignoring the clobber
+        //    in N. Again, this problem goes away if we start tracking all
+        //    blockers for a given phi optimization.
+        TerminatedPath Result{CurNode.Last, defPathIndex(CurNode)};
+        return {Result, {}};
+      }
+
+      // If there's nothing left to search, then all paths led to valid clobbers
+      // that we got from our cache; pick the nearest to the start, and allow
+      // the rest to be cached back.
+      if (NewPaused.empty()) {
+        MoveDominatedPathToEnd(TerminatedPaths);
+        TerminatedPath Result = TerminatedPaths.pop_back_val();
+        return {Result, std::move(TerminatedPaths)};
+      }
+
+      MemoryAccess *DefChainEnd = nullptr;
+      SmallVector<TerminatedPath, 4> Clobbers;
+      for (ListIndex Paused : NewPaused) {
+        UpwardsWalkResult WR = walkToPhiOrClobber(Paths[Paused]);
+        if (WR.IsKnownClobber)
+          Clobbers.push_back({WR.Result, Paused});
+        else
+          // Micro-opt: If we hit the end of the chain, save it.
+          DefChainEnd = WR.Result;
+      }
+
+      if (!TerminatedPaths.empty()) {
+        // If we couldn't find the dominating phi/liveOnEntry in the above loop,
+        // do it now.
+        if (!DefChainEnd)
+          for (MemoryAccess *MA : def_chain(Target))
+            DefChainEnd = MA;
+
+        // If any of the terminated paths don't dominate the phi we'll try to
+        // optimize, we need to figure out what they are and quit.
+        const BasicBlock *ChainBB = DefChainEnd->getBlock();
+        for (const TerminatedPath &TP : TerminatedPaths) {
+          // Because we know that DefChainEnd is as "high" as we can go, we
+          // don't need local dominance checks; BB dominance is sufficient.
+          if (DT.dominates(ChainBB, TP.Clobber->getBlock()))
+            Clobbers.push_back(TP);
+        }
+      }
+
+      // If we have clobbers in the def chain, find the one closest to Current
+      // and quit.
+      if (!Clobbers.empty()) {
+        MoveDominatedPathToEnd(Clobbers);
+        TerminatedPath Result = Clobbers.pop_back_val();
+        return {Result, std::move(Clobbers)};
+      }
+
+      assert(all_of(NewPaused,
+                    [&](ListIndex I) { return Paths[I].Last == DefChainEnd; }));
+
+      // Because liveOnEntry is a clobber, this must be a phi.
+      auto *DefChainPhi = cast<MemoryPhi>(DefChainEnd);
+
+      PriorPathsSize = Paths.size();
+      PausedSearches.clear();
+      for (ListIndex I : NewPaused)
+        addSearches(DefChainPhi, PausedSearches, I);
+      NewPaused.clear();
+
+      Current = DefChainPhi;
+    }
+  }
+
+  /// Caches everything in an OptznResult.
+  void cacheOptResult(const OptznResult &R) {
+    if (R.OtherClobbers.empty()) {
+      // If we're not going to be caching OtherClobbers, don't bother with
+      // marking visited/etc.
+      for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode))
+        cacheDefPath(N, R.PrimaryClobber.Clobber);
+      return;
+    }
+
+    // PrimaryClobber is our answer. If we can cache anything back, we need to
+    // stop caching when we visit PrimaryClobber.
+    SmallBitVector Visited(Paths.size());
+    for (const DefPath &N : const_def_path(R.PrimaryClobber.LastNode)) {
+      Visited[defPathIndex(N)] = true;
+      cacheDefPath(N, R.PrimaryClobber.Clobber);
+    }
+
+    for (const TerminatedPath &P : R.OtherClobbers) {
+      for (const DefPath &N : const_def_path(P.LastNode)) {
+        ListIndex NIndex = defPathIndex(N);
+        if (Visited[NIndex])
+          break;
+        Visited[NIndex] = true;
+        cacheDefPath(N, P.Clobber);
+      }
+    }
+  }
+
+  void verifyOptResult(const OptznResult &R) const {
+    assert(all_of(R.OtherClobbers, [&](const TerminatedPath &P) {
+      return MSSA.dominates(P.Clobber, R.PrimaryClobber.Clobber);
+    }));
+  }
+
+  void resetPhiOptznState() {
+    Paths.clear();
+    VisitedPhis.clear();
+  }
+
+public:
+  ClobberWalker(const MemorySSA &MSSA, AliasAnalysis &AA, DominatorTree &DT,
+                WalkerCache &WC)
+      : MSSA(MSSA), AA(AA), DT(DT), WC(WC), UseCache(true) {}
+
+  void reset() { WalkTargetCache.clear(); }
+
+  /// Finds the nearest clobber for the given query, optimizing phis if
+  /// possible.
+  MemoryAccess *findClobber(MemoryAccess *Start, UpwardsMemoryQuery &Q,
+                            bool UseWalkerCache = true) {
+    setUseCache(UseWalkerCache);
+    Query = &Q;
+
+    MemoryAccess *Current = Start;
+    // This walker pretends uses don't exist. If we're handed one, silently grab
+    // its def. (This has the nice side-effect of ensuring we never cache uses)
+    if (auto *MU = dyn_cast<MemoryUse>(Start))
+      Current = MU->getDefiningAccess();
+
+    DefPath FirstDesc(Q.StartingLoc, Current, Current, None);
+    // Fast path for the overly-common case (no crazy phi optimization
+    // necessary)
+    UpwardsWalkResult WalkResult = walkToPhiOrClobber(FirstDesc);
+    MemoryAccess *Result;
+    if (WalkResult.IsKnownClobber) {
+      cacheDefPath(FirstDesc, WalkResult.Result);
+      Result = WalkResult.Result;
+    } else {
+      OptznResult OptRes = tryOptimizePhi(cast<MemoryPhi>(FirstDesc.Last),
+                                          Current, Q.StartingLoc);
+      verifyOptResult(OptRes);
+      cacheOptResult(OptRes);
+      resetPhiOptznState();
+      Result = OptRes.PrimaryClobber.Clobber;
+    }
+
+#ifdef EXPENSIVE_CHECKS
+    checkClobberSanity(Current, Result, Q.StartingLoc, MSSA, Q, AA);
+#endif
+    return Result;
+  }
+
+  void verify(const MemorySSA *MSSA) { assert(MSSA == &this->MSSA); }
+};
+
+struct RenamePassData {
+  DomTreeNode *DTN;
+  DomTreeNode::const_iterator ChildIt;
+  MemoryAccess *IncomingVal;
+
+  RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
+                 MemoryAccess *M)
+      : DTN(D), ChildIt(It), IncomingVal(M) {}
+  void swap(RenamePassData &RHS) {
+    std::swap(DTN, RHS.DTN);
+    std::swap(ChildIt, RHS.ChildIt);
+    std::swap(IncomingVal, RHS.IncomingVal);
+  }
+};
+} // anonymous namespace
 
+namespace llvm {
 /// \brief A MemorySSAWalker that does AA walks and caching of lookups to
 /// disambiguate accesses.
 ///
@@ -121,59 +1083,39 @@ public:
 ///   ret i32 %r
 /// }
 class MemorySSA::CachingWalker final : public MemorySSAWalker {
+  WalkerCache Cache;
+  ClobberWalker Walker;
+  bool AutoResetWalker;
+
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
+  void verifyRemoved(MemoryAccess *);
+
 public:
   CachingWalker(MemorySSA *, AliasAnalysis *, DominatorTree *);
   ~CachingWalker() override;
 
-  MemoryAccess *getClobberingMemoryAccess(const Instruction *) override;
+  using MemorySSAWalker::getClobberingMemoryAccess;
+  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *) override;
   MemoryAccess *getClobberingMemoryAccess(MemoryAccess *,
-                                          MemoryLocation &) override;
+                                          const MemoryLocation &) override;
   void invalidateInfo(MemoryAccess *) override;
 
-protected:
-  struct UpwardsMemoryQuery;
-  MemoryAccess *doCacheLookup(const MemoryAccess *, const UpwardsMemoryQuery &,
-                              const MemoryLocation &);
-
-  void doCacheInsert(const MemoryAccess *, MemoryAccess *,
-                     const UpwardsMemoryQuery &, const MemoryLocation &);
+  /// Whether we call resetClobberWalker() after each time we *actually* walk to
+  /// answer a clobber query.
+  void setAutoResetWalker(bool AutoReset) { AutoResetWalker = AutoReset; }
 
-  void doCacheRemove(const MemoryAccess *, const UpwardsMemoryQuery &,
-                     const MemoryLocation &);
+  /// Drop the walker's persistent data structures. At the moment, this means
+  /// "drop the walker's cache of BasicBlocks ->
+  /// earliest-MemoryAccess-we-can-optimize-to". This is necessary if we're
+  /// going to have DT updates, if we remove MemoryAccesses, etc.
+  void resetClobberWalker() { Walker.reset(); }
 
-private:
-  MemoryAccessPair UpwardsDFSWalk(MemoryAccess *, const MemoryLocation &,
-                                  UpwardsMemoryQuery &, bool);
-  MemoryAccess *getClobberingMemoryAccess(MemoryAccess *, UpwardsMemoryQuery &);
-  bool instructionClobbersQuery(const MemoryDef *, UpwardsMemoryQuery &,
-                                const MemoryLocation &Loc) const;
-  void verifyRemoved(MemoryAccess *);
-  SmallDenseMap<ConstMemoryAccessPair, MemoryAccess *>
-      CachedUpwardsClobberingAccess;
-  DenseMap<const MemoryAccess *, MemoryAccess *> CachedUpwardsClobberingCall;
-  AliasAnalysis *AA;
-  DominatorTree *DT;
-};
-}
-
-namespace {
-struct RenamePassData {
-  DomTreeNode *DTN;
-  DomTreeNode::const_iterator ChildIt;
-  MemoryAccess *IncomingVal;
-
-  RenamePassData(DomTreeNode *D, DomTreeNode::const_iterator It,
-                 MemoryAccess *M)
-      : DTN(D), ChildIt(It), IncomingVal(M) {}
-  void swap(RenamePassData &RHS) {
-    std::swap(DTN, RHS.DTN);
-    std::swap(ChildIt, RHS.ChildIt);
-    std::swap(IncomingVal, RHS.IncomingVal);
+  void verify(const MemorySSA *MSSA) override {
+    MemorySSAWalker::verify(MSSA);
+    Walker.verify(MSSA);
   }
 };
-}
 
-namespace llvm {
 /// \brief Rename a single basic block into MemorySSA form.
 /// Uses the standard SSA renaming algorithm.
 /// \returns The new incoming value.
@@ -184,21 +1126,13 @@ MemoryAccess *MemorySSA::renameBlock(BasicBlock *BB,
   if (It != PerBlockAccesses.end()) {
     AccessList *Accesses = It->second.get();
     for (MemoryAccess &L : *Accesses) {
-      switch (L.getValueID()) {
-      case Value::MemoryUseVal:
-        cast<MemoryUse>(&L)->setDefiningAccess(IncomingVal);
-        break;
-      case Value::MemoryDefVal:
-        // We can't legally optimize defs, because we only allow single
-        // memory phis/uses on operations, and if we optimize these, we can
-        // end up with multiple reaching defs. Uses do not have this
-        // problem, since they do not produce a value
-        cast<MemoryDef>(&L)->setDefiningAccess(IncomingVal);
+      if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(&L)) {
+        if (MUD->getDefiningAccess() == nullptr)
+          MUD->setDefiningAccess(IncomingVal);
+        if (isa<MemoryDef>(&L))
+          IncomingVal = &L;
+      } else {
         IncomingVal = &L;
-        break;
-      case Value::MemoryPhiVal:
-        IncomingVal = &L;
-        break;
       }
     }
   }
@@ -295,21 +1229,10 @@ void MemorySSA::markUnreachableAsLiveOnEntry(BasicBlock *BB) {
 
 MemorySSA::MemorySSA(Function &Func, AliasAnalysis *AA, DominatorTree *DT)
     : AA(AA), DT(DT), F(Func), LiveOnEntryDef(nullptr), Walker(nullptr),
-      NextID(0) {
+      NextID(INVALID_MEMORYACCESS_ID) {
   buildMemorySSA();
 }
 
-MemorySSA::MemorySSA(MemorySSA &&MSSA)
-    : AA(MSSA.AA), DT(MSSA.DT), F(MSSA.F),
-      ValueToMemoryAccess(std::move(MSSA.ValueToMemoryAccess)),
-      PerBlockAccesses(std::move(MSSA.PerBlockAccesses)),
-      LiveOnEntryDef(std::move(MSSA.LiveOnEntryDef)),
-      Walker(std::move(MSSA.Walker)), NextID(MSSA.NextID) {
-  // Update the Walker MSSA pointer so it doesn't point to the moved-from MSSA
-  // object any more.
-  Walker->MSSA = this;
-}
-
 MemorySSA::~MemorySSA() {
   // Drop all our references
   for (const auto &Pair : PerBlockAccesses)
@@ -325,6 +1248,245 @@ MemorySSA::AccessList *MemorySSA::getOrCreateAccessList(const BasicBlock *BB) {
   return Res.first->second.get();
 }
 
+/// This class is a batch walker of all MemoryUse's in the program, and points
+/// their defining access at the thing that actually clobbers them.  Because it
+/// is a batch walker that touches everything, it does not operate like the
+/// other walkers.  This walker is basically performing a top-down SSA renaming
+/// pass, where the version stack is used as the cache.  This enables it to be
+/// significantly more time and memory efficient than using the regular walker,
+/// which is walking bottom-up.
+class MemorySSA::OptimizeUses {
+public:
+  OptimizeUses(MemorySSA *MSSA, MemorySSAWalker *Walker, AliasAnalysis *AA,
+               DominatorTree *DT)
+      : MSSA(MSSA), Walker(Walker), AA(AA), DT(DT) {
+    Walker = MSSA->getWalker();
+  }
+
+  void optimizeUses();
+
+private:
+  /// This represents where a given memorylocation is in the stack.
+  struct MemlocStackInfo {
+    // This essentially is keeping track of versions of the stack. Whenever
+    // the stack changes due to pushes or pops, these versions increase.
+    unsigned long StackEpoch;
+    unsigned long PopEpoch;
+    // This is the lower bound of places on the stack to check. It is equal to
+    // the place the last stack walk ended.
+    // Note: Correctness depends on this being initialized to 0, which densemap
+    // does
+    unsigned long LowerBound;
+    const BasicBlock *LowerBoundBlock;
+    // This is where the last walk for this memory location ended.
+    unsigned long LastKill;
+    bool LastKillValid;
+  };
+  void optimizeUsesInBlock(const BasicBlock *, unsigned long &, unsigned long &,
+                           SmallVectorImpl<MemoryAccess *> &,
+                           DenseMap<MemoryLocOrCall, MemlocStackInfo> &);
+  MemorySSA *MSSA;
+  MemorySSAWalker *Walker;
+  AliasAnalysis *AA;
+  DominatorTree *DT;
+};
+
+/// Optimize the uses in a given block This is basically the SSA renaming
+/// algorithm, with one caveat: We are able to use a single stack for all
+/// MemoryUses.  This is because the set of *possible* reaching MemoryDefs is
+/// the same for every MemoryUse.  The *actual* clobbering MemoryDef is just
+/// going to be some position in that stack of possible ones.
+///
+/// We track the stack positions that each MemoryLocation needs
+/// to check, and last ended at.  This is because we only want to check the
+/// things that changed since last time.  The same MemoryLocation should
+/// get clobbered by the same store (getModRefInfo does not use invariantness or
+/// things like this, and if they start, we can modify MemoryLocOrCall to
+/// include relevant data)
+void MemorySSA::OptimizeUses::optimizeUsesInBlock(
+    const BasicBlock *BB, unsigned long &StackEpoch, unsigned long &PopEpoch,
+    SmallVectorImpl<MemoryAccess *> &VersionStack,
+    DenseMap<MemoryLocOrCall, MemlocStackInfo> &LocStackInfo) {
+
+  /// If no accesses, nothing to do.
+  MemorySSA::AccessList *Accesses = MSSA->getWritableBlockAccesses(BB);
+  if (Accesses == nullptr)
+    return;
+
+  // Pop everything that doesn't dominate the current block off the stack,
+  // increment the PopEpoch to account for this.
+  while (!VersionStack.empty()) {
+    BasicBlock *BackBlock = VersionStack.back()->getBlock();
+    if (DT->dominates(BackBlock, BB))
+      break;
+    while (VersionStack.back()->getBlock() == BackBlock)
+      VersionStack.pop_back();
+    ++PopEpoch;
+  }
+  for (MemoryAccess &MA : *Accesses) {
+    auto *MU = dyn_cast<MemoryUse>(&MA);
+    if (!MU) {
+      VersionStack.push_back(&MA);
+      ++StackEpoch;
+      continue;
+    }
+
+    if (isUseTriviallyOptimizableToLiveOnEntry(*AA, MU->getMemoryInst())) {
+      MU->setDefiningAccess(MSSA->getLiveOnEntryDef(), true);
+      continue;
+    }
+
+    MemoryLocOrCall UseMLOC(MU);
+    auto &LocInfo = LocStackInfo[UseMLOC];
+    // If the pop epoch changed, it means we've removed stuff from top of
+    // stack due to changing blocks. We may have to reset the lower bound or
+    // last kill info.
+    if (LocInfo.PopEpoch != PopEpoch) {
+      LocInfo.PopEpoch = PopEpoch;
+      LocInfo.StackEpoch = StackEpoch;
+      // If the lower bound was in something that no longer dominates us, we
+      // have to reset it.
+      // We can't simply track stack size, because the stack may have had
+      // pushes/pops in the meantime.
+      // XXX: This is non-optimal, but only is slower cases with heavily
+      // branching dominator trees.  To get the optimal number of queries would
+      // be to make lowerbound and lastkill a per-loc stack, and pop it until
+      // the top of that stack dominates us.  This does not seem worth it ATM.
+      // A much cheaper optimization would be to always explore the deepest
+      // branch of the dominator tree first. This will guarantee this resets on
+      // the smallest set of blocks.
+      if (LocInfo.LowerBoundBlock && LocInfo.LowerBoundBlock != BB &&
+          !DT->dominates(LocInfo.LowerBoundBlock, BB)) {
+        // Reset the lower bound of things to check.
+        // TODO: Some day we should be able to reset to last kill, rather than
+        // 0.
+        LocInfo.LowerBound = 0;
+        LocInfo.LowerBoundBlock = VersionStack[0]->getBlock();
+        LocInfo.LastKillValid = false;
+      }
+    } else if (LocInfo.StackEpoch != StackEpoch) {
+      // If all that has changed is the StackEpoch, we only have to check the
+      // new things on the stack, because we've checked everything before.  In
+      // this case, the lower bound of things to check remains the same.
+      LocInfo.PopEpoch = PopEpoch;
+      LocInfo.StackEpoch = StackEpoch;
+    }
+    if (!LocInfo.LastKillValid) {
+      LocInfo.LastKill = VersionStack.size() - 1;
+      LocInfo.LastKillValid = true;
+    }
+
+    // At this point, we should have corrected last kill and LowerBound to be
+    // in bounds.
+    assert(LocInfo.LowerBound < VersionStack.size() &&
+           "Lower bound out of range");
+    assert(LocInfo.LastKill < VersionStack.size() &&
+           "Last kill info out of range");
+    // In any case, the new upper bound is the top of the stack.
+    unsigned long UpperBound = VersionStack.size() - 1;
+
+    if (UpperBound - LocInfo.LowerBound > MaxCheckLimit) {
+      DEBUG(dbgs() << "MemorySSA skipping optimization of " << *MU << " ("
+                   << *(MU->getMemoryInst()) << ")"
+                   << " because there are " << UpperBound - LocInfo.LowerBound
+                   << " stores to disambiguate\n");
+      // Because we did not walk, LastKill is no longer valid, as this may
+      // have been a kill.
+      LocInfo.LastKillValid = false;
+      continue;
+    }
+    bool FoundClobberResult = false;
+    while (UpperBound > LocInfo.LowerBound) {
+      if (isa<MemoryPhi>(VersionStack[UpperBound])) {
+        // For phis, use the walker, see where we ended up, go there
+        Instruction *UseInst = MU->getMemoryInst();
+        MemoryAccess *Result = Walker->getClobberingMemoryAccess(UseInst);
+        // We are guaranteed to find it or something is wrong
+        while (VersionStack[UpperBound] != Result) {
+          assert(UpperBound != 0);
+          --UpperBound;
+        }
+        FoundClobberResult = true;
+        break;
+      }
+
+      MemoryDef *MD = cast<MemoryDef>(VersionStack[UpperBound]);
+      // If the lifetime of the pointer ends at this instruction, it's live on
+      // entry.
+      if (!UseMLOC.IsCall && lifetimeEndsAt(MD, UseMLOC.getLoc(), *AA)) {
+        // Reset UpperBound to liveOnEntryDef's place in the stack
+        UpperBound = 0;
+        FoundClobberResult = true;
+        break;
+      }
+      if (instructionClobbersQuery(MD, MU, UseMLOC, *AA)) {
+        FoundClobberResult = true;
+        break;
+      }
+      --UpperBound;
+    }
+    // At the end of this loop, UpperBound is either a clobber, or lower bound
+    // PHI walking may cause it to be < LowerBound, and in fact, < LastKill.
+    if (FoundClobberResult || UpperBound < LocInfo.LastKill) {
+      MU->setDefiningAccess(VersionStack[UpperBound], true);
+      // We were last killed now by where we got to
+      LocInfo.LastKill = UpperBound;
+    } else {
+      // Otherwise, we checked all the new ones, and now we know we can get to
+      // LastKill.
+      MU->setDefiningAccess(VersionStack[LocInfo.LastKill], true);
+    }
+    LocInfo.LowerBound = VersionStack.size() - 1;
+    LocInfo.LowerBoundBlock = BB;
+  }
+}
+
+/// Optimize uses to point to their actual clobbering definitions.
+void MemorySSA::OptimizeUses::optimizeUses() {
+
+  // We perform a non-recursive top-down dominator tree walk
+  struct StackInfo {
+    const DomTreeNode *Node;
+    DomTreeNode::const_iterator Iter;
+  };
+
+  SmallVector<MemoryAccess *, 16> VersionStack;
+  SmallVector<StackInfo, 16> DomTreeWorklist;
+  DenseMap<MemoryLocOrCall, MemlocStackInfo> LocStackInfo;
+  VersionStack.push_back(MSSA->getLiveOnEntryDef());
+
+  unsigned long StackEpoch = 1;
+  unsigned long PopEpoch = 1;
+  for (const auto *DomNode : depth_first(DT->getRootNode()))
+    optimizeUsesInBlock(DomNode->getBlock(), StackEpoch, PopEpoch, VersionStack,
+                        LocStackInfo);
+}
+
+void MemorySSA::placePHINodes(
+    const SmallPtrSetImpl<BasicBlock *> &DefiningBlocks,
+    const DenseMap<const BasicBlock *, unsigned int> &BBNumbers) {
+  // Determine where our MemoryPhi's should go
+  ForwardIDFCalculator IDFs(*DT);
+  IDFs.setDefiningBlocks(DefiningBlocks);
+  SmallVector<BasicBlock *, 32> IDFBlocks;
+  IDFs.calculate(IDFBlocks);
+
+  std::sort(IDFBlocks.begin(), IDFBlocks.end(),
+            [&BBNumbers](const BasicBlock *A, const BasicBlock *B) {
+              return BBNumbers.lookup(A) < BBNumbers.lookup(B);
+            });
+
+  // Now place MemoryPhi nodes.
+  for (auto &BB : IDFBlocks) {
+    // Insert phi node
+    AccessList *Accesses = getOrCreateAccessList(BB);
+    MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
+    ValueToMemoryAccess[BB] = Phi;
+    // Phi's always are placed at the front of the block.
+    Accesses->push_front(Phi);
+  }
+}
+
 void MemorySSA::buildMemorySSA() {
   // We create an access to represent "live on entry", for things like
   // arguments or users of globals, where the memory they use is defined before
@@ -335,6 +1497,8 @@ void MemorySSA::buildMemorySSA() {
   BasicBlock &StartingPoint = F.getEntryBlock();
   LiveOnEntryDef = make_unique<MemoryDef>(F.getContext(), nullptr, nullptr,
                                           &StartingPoint, NextID++);
+  DenseMap<const BasicBlock *, unsigned int> BBNumbers;
+  unsigned NextBBNum = 0;
 
   // We maintain lists of memory accesses per-block, trading memory for time. We
   // could just look up the memory access for every possible instruction in the
@@ -344,6 +1508,7 @@ void MemorySSA::buildMemorySSA() {
   // Go through each block, figure out where defs occur, and chain together all
   // the accesses.
   for (BasicBlock &B : F) {
+    BBNumbers[&B] = NextBBNum++;
     bool InsertIntoDef = false;
     AccessList *Accesses = nullptr;
     for (Instruction &I : B) {
@@ -361,81 +1526,20 @@ void MemorySSA::buildMemorySSA() {
     if (Accesses)
       DefUseBlocks.insert(&B);
   }
-
-  // Compute live-in.
-  // Live in is normally defined as "all the blocks on the path from each def to
-  // each of it's uses".
-  // MemoryDef's are implicit uses of previous state, so they are also uses.
-  // This means we don't really have def-only instructions.  The only
-  // MemoryDef's that are not really uses are those that are of the LiveOnEntry
-  // variable (because LiveOnEntry can reach anywhere, and every def is a
-  // must-kill of LiveOnEntry).
-  // In theory, you could precisely compute live-in by using alias-analysis to
-  // disambiguate defs and uses to see which really pair up with which.
-  // In practice, this would be really expensive and difficult. So we simply
-  // assume all defs are also uses that need to be kept live.
-  // Because of this, the end result of this live-in computation will be "the
-  // entire set of basic blocks that reach any use".
-
-  SmallPtrSet<BasicBlock *, 32> LiveInBlocks;
-  SmallVector<BasicBlock *, 64> LiveInBlockWorklist(DefUseBlocks.begin(),
-                                                    DefUseBlocks.end());
-  // Now that we have a set of blocks where a value is live-in, recursively add
-  // predecessors until we find the full region the value is live.
-  while (!LiveInBlockWorklist.empty()) {
-    BasicBlock *BB = LiveInBlockWorklist.pop_back_val();
-
-    // The block really is live in here, insert it into the set.  If already in
-    // the set, then it has already been processed.
-    if (!LiveInBlocks.insert(BB).second)
-      continue;
-
-    // Since the value is live into BB, it is either defined in a predecessor or
-    // live into it to.
-    LiveInBlockWorklist.append(pred_begin(BB), pred_end(BB));
-  }
-
-  // Determine where our MemoryPhi's should go
-  ForwardIDFCalculator IDFs(*DT);
-  IDFs.setDefiningBlocks(DefiningBlocks);
-  IDFs.setLiveInBlocks(LiveInBlocks);
-  SmallVector<BasicBlock *, 32> IDFBlocks;
-  IDFs.calculate(IDFBlocks);
-
-  // Now place MemoryPhi nodes.
-  for (auto &BB : IDFBlocks) {
-    // Insert phi node
-    AccessList *Accesses = getOrCreateAccessList(BB);
-    MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
-    ValueToMemoryAccess.insert(std::make_pair(BB, Phi));
-    // Phi's always are placed at the front of the block.
-    Accesses->push_front(Phi);
-  }
+  placePHINodes(DefiningBlocks, BBNumbers);
 
   // Now do regular SSA renaming on the MemoryDef/MemoryUse. Visited will get
   // filled in with all blocks.
   SmallPtrSet<BasicBlock *, 16> Visited;
   renamePass(DT->getRootNode(), LiveOnEntryDef.get(), Visited);
 
-  MemorySSAWalker *Walker = getWalker();
+  CachingWalker *Walker = getWalkerImpl();
 
-  // Now optimize the MemoryUse's defining access to point to the nearest
-  // dominating clobbering def.
-  // This ensures that MemoryUse's that are killed by the same store are
-  // immediate users of that store, one of the invariants we guarantee.
-  for (auto DomNode : depth_first(DT)) {
-    BasicBlock *BB = DomNode->getBlock();
-    auto AI = PerBlockAccesses.find(BB);
-    if (AI == PerBlockAccesses.end())
-      continue;
-    AccessList *Accesses = AI->second.get();
-    for (auto &MA : *Accesses) {
-      if (auto *MU = dyn_cast<MemoryUse>(&MA)) {
-        Instruction *Inst = MU->getMemoryInst();
-        MU->setDefiningAccess(Walker->getClobberingMemoryAccess(Inst));
-      }
-    }
-  }
+  // We're doing a batch of updates; don't drop useful caches between them.
+  Walker->setAutoResetWalker(false);
+  OptimizeUses(this, Walker, AA, DT).optimizeUses();
+  Walker->setAutoResetWalker(true);
+  Walker->resetClobberWalker();
 
   // Mark the uses in unreachable blocks as live on entry, so that they go
   // somewhere.
@@ -444,7 +1548,9 @@ void MemorySSA::buildMemorySSA() {
       markUnreachableAsLiveOnEntry(&BB);
 }
 
-MemorySSAWalker *MemorySSA::getWalker() {
+MemorySSAWalker *MemorySSA::getWalker() { return getWalkerImpl(); }
+
+MemorySSA::CachingWalker *MemorySSA::getWalkerImpl() {
   if (Walker)
     return Walker.get();
 
@@ -456,9 +1562,10 @@ MemoryPhi *MemorySSA::createMemoryPhi(BasicBlock *BB) {
   assert(!getMemoryAccess(BB) && "MemoryPhi already exists for this BB");
   AccessList *Accesses = getOrCreateAccessList(BB);
   MemoryPhi *Phi = new MemoryPhi(BB->getContext(), BB, NextID++);
-  ValueToMemoryAccess.insert(std::make_pair(BB, Phi));
+  ValueToMemoryAccess[BB] = Phi;
   // Phi's always are placed at the front of the block.
   Accesses->push_front(Phi);
+  BlockNumberingValid.erase(BB);
   return Phi;
 }
 
@@ -481,39 +1588,64 @@ MemoryAccess *MemorySSA::createMemoryAccessInBB(Instruction *I,
   auto *Accesses = getOrCreateAccessList(BB);
   if (Point == Beginning) {
     // It goes after any phi nodes
-    auto AI = std::find_if(
-        Accesses->begin(), Accesses->end(),
-        [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
+    auto AI = find_if(
+        *Accesses, [](const MemoryAccess &MA) { return !isa<MemoryPhi>(MA); });
 
     Accesses->insert(AI, NewAccess);
   } else {
     Accesses->push_back(NewAccess);
   }
-
+  BlockNumberingValid.erase(BB);
   return NewAccess;
 }
-MemoryAccess *MemorySSA::createMemoryAccessBefore(Instruction *I,
-                                                  MemoryAccess *Definition,
-                                                  MemoryAccess *InsertPt) {
+
+MemoryUseOrDef *MemorySSA::createMemoryAccessBefore(Instruction *I,
+                                                    MemoryAccess *Definition,
+                                                    MemoryUseOrDef *InsertPt) {
   assert(I->getParent() == InsertPt->getBlock() &&
          "New and old access must be in the same block");
   MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
   auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
   Accesses->insert(AccessList::iterator(InsertPt), NewAccess);
+  BlockNumberingValid.erase(InsertPt->getBlock());
   return NewAccess;
 }
 
-MemoryAccess *MemorySSA::createMemoryAccessAfter(Instruction *I,
-                                                 MemoryAccess *Definition,
-                                                 MemoryAccess *InsertPt) {
+MemoryUseOrDef *MemorySSA::createMemoryAccessAfter(Instruction *I,
+                                                   MemoryAccess *Definition,
+                                                   MemoryAccess *InsertPt) {
   assert(I->getParent() == InsertPt->getBlock() &&
          "New and old access must be in the same block");
   MemoryUseOrDef *NewAccess = createDefinedAccess(I, Definition);
   auto *Accesses = getOrCreateAccessList(InsertPt->getBlock());
   Accesses->insertAfter(AccessList::iterator(InsertPt), NewAccess);
+  BlockNumberingValid.erase(InsertPt->getBlock());
   return NewAccess;
 }
 
+void MemorySSA::spliceMemoryAccessAbove(MemoryDef *Where,
+                                        MemoryUseOrDef *What) {
+  assert(What != getLiveOnEntryDef() &&
+         Where != getLiveOnEntryDef() && "Can't splice (above) LOE.");
+  assert(dominates(Where, What) && "Only upwards splices are permitted.");
+
+  if (Where == What)
+    return;
+  if (isa<MemoryDef>(What)) {
+    // TODO: possibly use removeMemoryAccess' more efficient RAUW
+    What->replaceAllUsesWith(What->getDefiningAccess());
+    What->setDefiningAccess(Where->getDefiningAccess());
+    Where->setDefiningAccess(What);
+  }
+  AccessList *Src = getWritableBlockAccesses(What->getBlock());
+  AccessList *Dest = getWritableBlockAccesses(Where->getBlock());
+  Dest->splice(AccessList::iterator(Where), *Src, What);
+
+  BlockNumberingValid.erase(What->getBlock());
+  if (What->getBlock() != Where->getBlock())
+    BlockNumberingValid.erase(Where->getBlock());
+}
+
 /// \brief Helper function to create new memory accesses
 MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
   // The assume intrinsic has a control dependency which we model by claiming
@@ -542,7 +1674,7 @@ MemoryUseOrDef *MemorySSA::createNewAccess(Instruction *I) {
     MUD = new MemoryDef(I->getContext(), nullptr, I, I->getParent(), NextID++);
   else
     MUD = new MemoryUse(I->getContext(), nullptr, I, I->getParent());
-  ValueToMemoryAccess.insert(std::make_pair(I, MUD));
+  ValueToMemoryAccess[I] = MUD;
   return MUD;
 }
 
@@ -611,6 +1743,7 @@ static MemoryAccess *onlySingleValue(MemoryPhi *MP) {
 void MemorySSA::removeFromLookups(MemoryAccess *MA) {
   assert(MA->use_empty() &&
          "Trying to remove memory access that still has uses");
+  BlockNumbering.erase(MA);
   if (MemoryUseOrDef *MUD = dyn_cast<MemoryUseOrDef>(MA))
     MUD->setDefiningAccess(nullptr);
   // Invalidate our walker's cache if necessary
@@ -624,7 +1757,9 @@ void MemorySSA::removeFromLookups(MemoryAccess *MA) {
   } else {
     MemoryInst = MA->getBlock();
   }
-  ValueToMemoryAccess.erase(MemoryInst);
+  auto VMA = ValueToMemoryAccess.find(MemoryInst);
+  if (VMA->second == MA)
+    ValueToMemoryAccess.erase(VMA);
 
   auto AccessIt = PerBlockAccesses.find(MA->getBlock());
   std::unique_ptr<AccessList> &Accesses = AccessIt->second;
@@ -652,8 +1787,27 @@ void MemorySSA::removeMemoryAccess(MemoryAccess *MA) {
   }
 
   // Re-point the uses at our defining access
-  if (!MA->use_empty())
-    MA->replaceAllUsesWith(NewDefTarget);
+  if (!MA->use_empty()) {
+    // Reset optimized on users of this store, and reset the uses.
+    // A few notes:
+    // 1. This is a slightly modified version of RAUW to avoid walking the
+    // uses twice here.
+    // 2. If we wanted to be complete, we would have to reset the optimized
+    // flags on users of phi nodes if doing the below makes a phi node have all
+    // the same arguments. Instead, we prefer users to removeMemoryAccess those
+    // phi nodes, because doing it here would be N^3.
+    if (MA->hasValueHandle())
+      ValueHandleBase::ValueIsRAUWd(MA, NewDefTarget);
+    // Note: We assume MemorySSA is not used in metadata since it's not really
+    // part of the IR.
+
+    while (!MA->use_empty()) {
+      Use &U = *MA->use_begin();
+      if (MemoryUse *MU = dyn_cast<MemoryUse>(U.getUser()))
+        MU->resetOptimized();
+      U.set(NewDefTarget);
+    }
+  }
 
   // The call below to erase will destroy MA, so we can't change the order we
   // are doing things here
@@ -674,6 +1828,7 @@ void MemorySSA::verifyMemorySSA() const {
   verifyDefUses(F);
   verifyDomination(F);
   verifyOrdering(F);
+  Walker->verify(this);
 }
 
 /// \brief Verify that the order and existence of MemoryAccesses matches the
@@ -717,70 +1872,38 @@ void MemorySSA::verifyOrdering(Function &F) const {
 /// \brief Verify the domination properties of MemorySSA by checking that each
 /// definition dominates all of its uses.
 void MemorySSA::verifyDomination(Function &F) const {
+#ifndef NDEBUG
   for (BasicBlock &B : F) {
     // Phi nodes are attached to basic blocks
-    if (MemoryPhi *MP = getMemoryAccess(&B)) {
-      for (User *U : MP->users()) {
-        BasicBlock *UseBlock;
-        // Phi operands are used on edges, we simulate the right domination by
-        // acting as if the use occurred at the end of the predecessor block.
-        if (MemoryPhi *P = dyn_cast<MemoryPhi>(U)) {
-          for (const auto &Arg : P->operands()) {
-            if (Arg == MP) {
-              UseBlock = P->getIncomingBlock(Arg);
-              break;
-            }
-          }
-        } else {
-          UseBlock = cast<MemoryAccess>(U)->getBlock();
-        }
-        (void)UseBlock;
-        assert(DT->dominates(MP->getBlock(), UseBlock) &&
-               "Memory PHI does not dominate it's uses");
-      }
-    }
+    if (MemoryPhi *MP = getMemoryAccess(&B))
+      for (const Use &U : MP->uses())
+        assert(dominates(MP, U) && "Memory PHI does not dominate it's uses");
 
     for (Instruction &I : B) {
       MemoryAccess *MD = dyn_cast_or_null<MemoryDef>(getMemoryAccess(&I));
       if (!MD)
         continue;
 
-      for (User *U : MD->users()) {
-        BasicBlock *UseBlock;
-        (void)UseBlock;
-        // Things are allowed to flow to phi nodes over their predecessor edge.
-        if (auto *P = dyn_cast<MemoryPhi>(U)) {
-          for (const auto &Arg : P->operands()) {
-            if (Arg == MD) {
-              UseBlock = P->getIncomingBlock(Arg);
-              break;
-            }
-          }
-        } else {
-          UseBlock = cast<MemoryAccess>(U)->getBlock();
-        }
-        assert(DT->dominates(MD->getBlock(), UseBlock) &&
-               "Memory Def does not dominate it's uses");
-      }
+      for (const Use &U : MD->uses())
+        assert(dominates(MD, U) && "Memory Def does not dominate it's uses");
     }
   }
+#endif
 }
 
 /// \brief Verify the def-use lists in MemorySSA, by verifying that \p Use
 /// appears in the use list of \p Def.
-///
-/// llvm_unreachable is used instead of asserts because this may be called in
-/// a build without asserts. In that case, we don't want this to turn into a
-/// nop.
+
 void MemorySSA::verifyUseInDefs(MemoryAccess *Def, MemoryAccess *Use) const {
+#ifndef NDEBUG
   // The live on entry use may cause us to get a NULL def here
-  if (!Def) {
-    if (!isLiveOnEntryDef(Use))
-      llvm_unreachable("Null def but use not point to live on entry def");
-  } else if (std::find(Def->user_begin(), Def->user_end(), Use) ==
-             Def->user_end()) {
-    llvm_unreachable("Did not find use in def's use list");
-  }
+  if (!Def)
+    assert(isLiveOnEntryDef(Use) &&
+           "Null def but use not point to live on entry def");
+  else
+    assert(is_contained(Def->users(), Use) &&
+           "Did not find use in def's use list");
+#endif
 }
 
 /// \brief Verify the immediate use information, by walking all the memory
@@ -798,21 +1921,35 @@ void MemorySSA::verifyDefUses(Function &F) const {
     }
 
     for (Instruction &I : B) {
-      if (MemoryAccess *MA = getMemoryAccess(&I)) {
-        assert(isa<MemoryUseOrDef>(MA) &&
-               "Found a phi node not attached to a bb");
-        verifyUseInDefs(cast<MemoryUseOrDef>(MA)->getDefiningAccess(), MA);
+      if (MemoryUseOrDef *MA = getMemoryAccess(&I)) {
+        verifyUseInDefs(MA->getDefiningAccess(), MA);
       }
     }
   }
 }
 
-MemoryAccess *MemorySSA::getMemoryAccess(const Value *I) const {
-  return ValueToMemoryAccess.lookup(I);
+MemoryUseOrDef *MemorySSA::getMemoryAccess(const Instruction *I) const {
+  return cast_or_null<MemoryUseOrDef>(ValueToMemoryAccess.lookup(I));
 }
 
 MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
-  return cast_or_null<MemoryPhi>(getMemoryAccess((const Value *)BB));
+  return cast_or_null<MemoryPhi>(ValueToMemoryAccess.lookup(cast<Value>(BB)));
+}
+
+/// Perform a local numbering on blocks so that instruction ordering can be
+/// determined in constant time.
+/// TODO: We currently just number in order.  If we numbered by N, we could
+/// allow at least N-1 sequences of insertBefore or insertAfter (and at least
+/// log2(N) sequences of mixed before and after) without needing to invalidate
+/// the numbering.
+void MemorySSA::renumberBlock(const BasicBlock *B) const {
+  // The pre-increment ensures the numbers really start at 1.
+  unsigned long CurrentNumber = 0;
+  const AccessList *AL = getBlockAccesses(B);
+  assert(AL != nullptr && "Asking to renumber an empty block");
+  for (const auto &I : *AL)
+    BlockNumbering[&I] = ++CurrentNumber;
+  BlockNumberingValid.insert(B);
 }
 
 /// \brief Determine, for two memory accesses in the same block,
@@ -821,9 +1958,10 @@ MemoryPhi *MemorySSA::getMemoryAccess(const BasicBlock *BB) const {
 bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
                                  const MemoryAccess *Dominatee) const {
 
-  assert((Dominator->getBlock() == Dominatee->getBlock()) &&
-         "Asking for local domination when accesses are in different blocks!");
+  const BasicBlock *DominatorBlock = Dominator->getBlock();
 
+  assert((DominatorBlock == Dominatee->getBlock()) &&
+         "Asking for local domination when accesses are in different blocks!");
   // A node dominates itself.
   if (Dominatee == Dominator)
     return true;
@@ -838,14 +1976,42 @@ bool MemorySSA::locallyDominates(const MemoryAccess *Dominator,
   if (isLiveOnEntryDef(Dominator))
     return true;
 
-  // Get the access list for the block
-  const AccessList *AccessList = getBlockAccesses(Dominator->getBlock());
-  AccessList::const_reverse_iterator It(Dominator->getIterator());
+  if (!BlockNumberingValid.count(DominatorBlock))
+    renumberBlock(DominatorBlock);
+
+  unsigned long DominatorNum = BlockNumbering.lookup(Dominator);
+  // All numbers start with 1
+  assert(DominatorNum != 0 && "Block was not numbered properly");
+  unsigned long DominateeNum = BlockNumbering.lookup(Dominatee);
+  assert(DominateeNum != 0 && "Block was not numbered properly");
+  return DominatorNum < DominateeNum;
+}
+
+bool MemorySSA::dominates(const MemoryAccess *Dominator,
+                          const MemoryAccess *Dominatee) const {
+  if (Dominator == Dominatee)
+    return true;
+
+  if (isLiveOnEntryDef(Dominatee))
+    return false;
+
+  if (Dominator->getBlock() != Dominatee->getBlock())
+    return DT->dominates(Dominator->getBlock(), Dominatee->getBlock());
+  return locallyDominates(Dominator, Dominatee);
+}
 
-  // If we hit the beginning of the access list before we hit dominatee, we must
-  // dominate it
-  return std::none_of(It, AccessList->rend(),
-                      [&](const MemoryAccess &MA) { return &MA == Dominatee; });
+bool MemorySSA::dominates(const MemoryAccess *Dominator,
+                          const Use &Dominatee) const {
+  if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Dominatee.getUser())) {
+    BasicBlock *UseBB = MP->getIncomingBlock(Dominatee);
+    // The def must dominate the incoming block of the phi.
+    if (UseBB != Dominator->getBlock())
+      return DT->dominates(Dominator->getBlock(), UseBB);
+    // If the UseBB and the DefBB are the same, compare locally.
+    return locallyDominates(Dominator, cast<MemoryAccess>(Dominatee));
+  }
+  // If it's not a PHI node use, the normal dominates can already handle it.
+  return dominates(Dominator, cast<MemoryAccess>(Dominatee.getUser()));
 }
 
 const static char LiveOnEntryStr[] = "liveOnEntry";
@@ -924,25 +2090,26 @@ bool MemorySSAPrinterLegacyPass::runOnFunction(Function &F) {
   return false;
 }
 
-char MemorySSAAnalysis::PassID;
+AnalysisKey MemorySSAAnalysis::Key;
 
-MemorySSA MemorySSAAnalysis::run(Function &F, AnalysisManager<Function> &AM) {
+MemorySSAAnalysis::Result MemorySSAAnalysis::run(Function &F,
+                                                 FunctionAnalysisManager &AM) {
   auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &AA = AM.getResult<AAManager>(F);
-  return MemorySSA(F, &AA, &DT);
+  return MemorySSAAnalysis::Result(make_unique<MemorySSA>(F, &AA, &DT));
 }
 
 PreservedAnalyses MemorySSAPrinterPass::run(Function &F,
                                             FunctionAnalysisManager &AM) {
   OS << "MemorySSA for function: " << F.getName() << "\n";
-  AM.getResult<MemorySSAAnalysis>(F).print(OS);
+  AM.getResult<MemorySSAAnalysis>(F).getMSSA().print(OS);
 
   return PreservedAnalyses::all();
 }
 
 PreservedAnalyses MemorySSAVerifierPass::run(Function &F,
                                              FunctionAnalysisManager &AM) {
-  AM.getResult<MemorySSAAnalysis>(F).verifyMemorySSA();
+  AM.getResult<MemorySSAAnalysis>(F).getMSSA().verifyMemorySSA();
 
   return PreservedAnalyses::all();
 }
@@ -978,41 +2145,11 @@ MemorySSAWalker::MemorySSAWalker(MemorySSA *M) : MSSA(M) {}
 
 MemorySSA::CachingWalker::CachingWalker(MemorySSA *M, AliasAnalysis *A,
                                         DominatorTree *D)
-    : MemorySSAWalker(M), AA(A), DT(D) {}
+    : MemorySSAWalker(M), Walker(*M, *A, *D, Cache), AutoResetWalker(true) {}
 
 MemorySSA::CachingWalker::~CachingWalker() {}
 
-struct MemorySSA::CachingWalker::UpwardsMemoryQuery {
-  // True if we saw a phi whose predecessor was a backedge
-  bool SawBackedgePhi;
-  // True if our original query started off as a call
-  bool IsCall;
-  // The pointer location we started the query with. This will be empty if
-  // IsCall is true.
-  MemoryLocation StartingLoc;
-  // This is the instruction we were querying about.
-  const Instruction *Inst;
-  // Set of visited Instructions for this query.
-  DenseSet<MemoryAccessPair> Visited;
-  // Vector of visited call accesses for this query. This is separated out
-  // because you can always cache and lookup the result of call queries (IE when
-  // IsCall == true) for every call in the chain. The calls have no AA location
-  // associated with them with them, and thus, no context dependence.
-  SmallVector<const MemoryAccess *, 32> VisitedCalls;
-  // The MemoryAccess we actually got called with, used to test local domination
-  const MemoryAccess *OriginalAccess;
-
-  UpwardsMemoryQuery()
-      : SawBackedgePhi(false), IsCall(false), Inst(nullptr),
-        OriginalAccess(nullptr) {}
-
-  UpwardsMemoryQuery(const Instruction *Inst, const MemoryAccess *Access)
-      : SawBackedgePhi(false), IsCall(ImmutableCallSite(Inst)), Inst(Inst),
-        OriginalAccess(Access) {}
-};
-
 void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
-
   // TODO: We can do much better cache invalidation with differently stored
   // caches.  For now, for MemoryUses, we simply remove them
   // from the cache, and kill the entire call/non-call cache for everything
@@ -1026,220 +2163,38 @@ void MemorySSA::CachingWalker::invalidateInfo(MemoryAccess *MA) {
   // itself.
 
   if (MemoryUse *MU = dyn_cast<MemoryUse>(MA)) {
-    UpwardsMemoryQuery Q;
-    Instruction *I = MU->getMemoryInst();
-    Q.IsCall = bool(ImmutableCallSite(I));
-    Q.Inst = I;
-    if (!Q.IsCall)
-      Q.StartingLoc = MemoryLocation::get(I);
-    doCacheRemove(MA, Q, Q.StartingLoc);
+    UpwardsMemoryQuery Q(MU->getMemoryInst(), MU);
+    Cache.remove(MU, Q.StartingLoc, Q.IsCall);
+    MU->resetOptimized();
   } else {
     // If it is not a use, the best we can do right now is destroy the cache.
-    CachedUpwardsClobberingCall.clear();
-    CachedUpwardsClobberingAccess.clear();
+    Cache.clear();
   }
 
 #ifdef EXPENSIVE_CHECKS
-  // Run this only when expensive checks are enabled.
   verifyRemoved(MA);
 #endif
 }
 
-void MemorySSA::CachingWalker::doCacheRemove(const MemoryAccess *M,
-                                             const UpwardsMemoryQuery &Q,
-                                             const MemoryLocation &Loc) {
-  if (Q.IsCall)
-    CachedUpwardsClobberingCall.erase(M);
-  else
-    CachedUpwardsClobberingAccess.erase({M, Loc});
-}
-
-void MemorySSA::CachingWalker::doCacheInsert(const MemoryAccess *M,
-                                             MemoryAccess *Result,
-                                             const UpwardsMemoryQuery &Q,
-                                             const MemoryLocation &Loc) {
-  // This is fine for Phis, since there are times where we can't optimize them.
-  // Making a def its own clobber is never correct, though.
-  assert((Result != M || isa<MemoryPhi>(M)) &&
-         "Something can't clobber itself!");
-  ++NumClobberCacheInserts;
-  if (Q.IsCall)
-    CachedUpwardsClobberingCall[M] = Result;
-  else
-    CachedUpwardsClobberingAccess[{M, Loc}] = Result;
-}
-
-MemoryAccess *
-MemorySSA::CachingWalker::doCacheLookup(const MemoryAccess *M,
-                                        const UpwardsMemoryQuery &Q,
-                                        const MemoryLocation &Loc) {
-  ++NumClobberCacheLookups;
-  MemoryAccess *Result;
-
-  if (Q.IsCall)
-    Result = CachedUpwardsClobberingCall.lookup(M);
-  else
-    Result = CachedUpwardsClobberingAccess.lookup({M, Loc});
-
-  if (Result)
-    ++NumClobberCacheHits;
-  return Result;
-}
-
-bool MemorySSA::CachingWalker::instructionClobbersQuery(
-    const MemoryDef *MD, UpwardsMemoryQuery &Q,
-    const MemoryLocation &Loc) const {
-  Instruction *DefMemoryInst = MD->getMemoryInst();
-  assert(DefMemoryInst && "Defining instruction not actually an instruction");
-
-  if (!Q.IsCall)
-    return AA->getModRefInfo(DefMemoryInst, Loc) & MRI_Mod;
-
-  // If this is a call, mark it for caching
-  if (ImmutableCallSite(DefMemoryInst))
-    Q.VisitedCalls.push_back(MD);
-  ModRefInfo I = AA->getModRefInfo(DefMemoryInst, ImmutableCallSite(Q.Inst));
-  return I != MRI_NoModRef;
-}
-
-MemoryAccessPair MemorySSA::CachingWalker::UpwardsDFSWalk(
-    MemoryAccess *StartingAccess, const MemoryLocation &Loc,
-    UpwardsMemoryQuery &Q, bool FollowingBackedge) {
-  MemoryAccess *ModifyingAccess = nullptr;
-
-  auto DFI = df_begin(StartingAccess);
-  for (auto DFE = df_end(StartingAccess); DFI != DFE;) {
-    MemoryAccess *CurrAccess = *DFI;
-    if (MSSA->isLiveOnEntryDef(CurrAccess))
-      return {CurrAccess, Loc};
-    // If this is a MemoryDef, check whether it clobbers our current query. This
-    // needs to be done before consulting the cache, because the cache reports
-    // the clobber for CurrAccess. If CurrAccess is a clobber for this query,
-    // and we ask the cache for information first, then we might skip this
-    // clobber, which is bad.
-    if (auto *MD = dyn_cast<MemoryDef>(CurrAccess)) {
-      // If we hit the top, stop following this path.
-      // While we can do lookups, we can't sanely do inserts here unless we were
-      // to track everything we saw along the way, since we don't know where we
-      // will stop.
-      if (instructionClobbersQuery(MD, Q, Loc)) {
-        ModifyingAccess = CurrAccess;
-        break;
-      }
-    }
-    if (auto CacheResult = doCacheLookup(CurrAccess, Q, Loc))
-      return {CacheResult, Loc};
-
-    // We need to know whether it is a phi so we can track backedges.
-    // Otherwise, walk all upward defs.
-    if (!isa<MemoryPhi>(CurrAccess)) {
-      ++DFI;
-      continue;
-    }
-
-#ifndef NDEBUG
-    // The loop below visits the phi's children for us. Because phis are the
-    // only things with multiple edges, skipping the children should always lead
-    // us to the end of the loop.
-    //
-    // Use a copy of DFI because skipChildren would kill our search stack, which
-    // would make caching anything on the way back impossible.
-    auto DFICopy = DFI;
-    assert(DFICopy.skipChildren() == DFE &&
-           "Skipping phi's children doesn't end the DFS?");
-#endif
-
-    const MemoryAccessPair PHIPair(CurrAccess, Loc);
-
-    // Don't try to optimize this phi again if we've already tried to do so.
-    if (!Q.Visited.insert(PHIPair).second) {
-      ModifyingAccess = CurrAccess;
-      break;
-    }
-
-    std::size_t InitialVisitedCallSize = Q.VisitedCalls.size();
-
-    // Recurse on PHI nodes, since we need to change locations.
-    // TODO: Allow graphtraits on pairs, which would turn this whole function
-    // into a normal single depth first walk.
-    MemoryAccess *FirstDef = nullptr;
-    for (auto MPI = upward_defs_begin(PHIPair), MPE = upward_defs_end();
-         MPI != MPE; ++MPI) {
-      bool Backedge =
-          !FollowingBackedge &&
-          DT->dominates(CurrAccess->getBlock(), MPI.getPhiArgBlock());
-
-      MemoryAccessPair CurrentPair =
-          UpwardsDFSWalk(MPI->first, MPI->second, Q, Backedge);
-      // All the phi arguments should reach the same point if we can bypass
-      // this phi. The alternative is that they hit this phi node, which
-      // means we can skip this argument.
-      if (FirstDef && CurrentPair.first != PHIPair.first &&
-          CurrentPair.first != FirstDef) {
-        ModifyingAccess = CurrAccess;
-        break;
-      }
-
-      if (!FirstDef)
-        FirstDef = CurrentPair.first;
-    }
-
-    // If we exited the loop early, go with the result it gave us.
-    if (!ModifyingAccess) {
-      assert(FirstDef && "Found a Phi with no upward defs?");
-      ModifyingAccess = FirstDef;
-    } else {
-      // If we can't optimize this Phi, then we can't safely cache any of the
-      // calls we visited when trying to optimize it. Wipe them out now.
-      Q.VisitedCalls.resize(InitialVisitedCallSize);
-    }
-    break;
-  }
-
-  if (!ModifyingAccess)
-    return {MSSA->getLiveOnEntryDef(), Q.StartingLoc};
-
-  const BasicBlock *OriginalBlock = StartingAccess->getBlock();
-  assert(DFI.getPathLength() > 0 && "We dropped our path?");
-  unsigned N = DFI.getPathLength();
-  // If we found a clobbering def, the last element in the path will be our
-  // clobber, so we don't want to cache that to itself. OTOH, if we optimized a
-  // phi, we can add the last thing in the path to the cache, since that won't
-  // be the result.
-  if (DFI.getPath(N - 1) == ModifyingAccess)
-    --N;
-  for (; N > 1; --N) {
-    MemoryAccess *CacheAccess = DFI.getPath(N - 1);
-    BasicBlock *CurrBlock = CacheAccess->getBlock();
-    if (!FollowingBackedge)
-      doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc);
-    if (DT->dominates(CurrBlock, OriginalBlock) &&
-        (CurrBlock != OriginalBlock || !FollowingBackedge ||
-         MSSA->locallyDominates(CacheAccess, StartingAccess)))
-      break;
-  }
-
-  // Cache everything else on the way back. The caller should cache
-  // StartingAccess for us.
-  for (; N > 1; --N) {
-    MemoryAccess *CacheAccess = DFI.getPath(N - 1);
-    doCacheInsert(CacheAccess, ModifyingAccess, Q, Loc);
-  }
-
-  return {ModifyingAccess, Loc};
-}
-
 /// \brief Walk the use-def chains starting at \p MA and find
 /// the MemoryAccess that actually clobbers Loc.
 ///
 /// \returns our clobbering memory access
 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
     MemoryAccess *StartingAccess, UpwardsMemoryQuery &Q) {
-  return UpwardsDFSWalk(StartingAccess, Q.StartingLoc, Q, false).first;
+  MemoryAccess *New = Walker.findClobber(StartingAccess, Q);
+#ifdef EXPENSIVE_CHECKS
+  MemoryAccess *NewNoCache =
+      Walker.findClobber(StartingAccess, Q, /*UseWalkerCache=*/false);
+  assert(NewNoCache == New && "Cache made us hand back a different result?");
+#endif
+  if (AutoResetWalker)
+    resetClobberWalker();
+  return New;
 }
 
 MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
-    MemoryAccess *StartingAccess, MemoryLocation &Loc) {
+    MemoryAccess *StartingAccess, const MemoryLocation &Loc) {
   if (isa<MemoryPhi>(StartingAccess))
     return StartingAccess;
 
@@ -1257,10 +2212,10 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
   UpwardsMemoryQuery Q;
   Q.OriginalAccess = StartingUseOrDef;
   Q.StartingLoc = Loc;
-  Q.Inst = StartingUseOrDef->getMemoryInst();
+  Q.Inst = I;
   Q.IsCall = false;
 
-  if (auto CacheResult = doCacheLookup(StartingUseOrDef, Q, Q.StartingLoc))
+  if (auto *CacheResult = Cache.lookup(StartingUseOrDef, Loc, Q.IsCall))
     return CacheResult;
 
   // Unlike the other function, do not walk to the def of a def, because we are
@@ -1270,9 +2225,6 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
                                      : StartingUseOrDef;
 
   MemoryAccess *Clobber = getClobberingMemoryAccess(DefiningAccess, Q);
-  // Only cache this if it wouldn't make Clobber point to itself.
-  if (Clobber != StartingAccess)
-    doCacheInsert(Q.OriginalAccess, Clobber, Q, Q.StartingLoc);
   DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
   DEBUG(dbgs() << *StartingUseOrDef << "\n");
   DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
@@ -1281,28 +2233,38 @@ MemoryAccess *MemorySSA::CachingWalker::getClobberingMemoryAccess(
 }
 
 MemoryAccess *
-MemorySSA::CachingWalker::getClobberingMemoryAccess(const Instruction *I) {
-  // There should be no way to lookup an instruction and get a phi as the
-  // access, since we only map BB's to PHI's. So, this must be a use or def.
-  auto *StartingAccess = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(I));
-
-  bool IsCall = bool(ImmutableCallSite(I));
-
+MemorySSA::CachingWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
+  auto *StartingAccess = dyn_cast<MemoryUseOrDef>(MA);
+  // If this is a MemoryPhi, we can't do anything.
+  if (!StartingAccess)
+    return MA;
+
+  // If this is an already optimized use or def, return the optimized result.
+  // Note: Currently, we do not store the optimized def result because we'd need
+  // a separate field, since we can't use it as the defining access.
+  if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
+    if (MU->isOptimized())
+      return MU->getDefiningAccess();
+
+  const Instruction *I = StartingAccess->getMemoryInst();
+  UpwardsMemoryQuery Q(I, StartingAccess);
   // We can't sanely do anything with a fences, they conservatively
   // clobber all memory, and have no locations to get pointers from to
   // try to disambiguate.
-  if (!IsCall && I->isFenceLike())
+  if (!Q.IsCall && I->isFenceLike())
     return StartingAccess;
 
-  UpwardsMemoryQuery Q;
-  Q.OriginalAccess = StartingAccess;
-  Q.IsCall = IsCall;
-  if (!Q.IsCall)
-    Q.StartingLoc = MemoryLocation::get(I);
-  Q.Inst = I;
-  if (auto CacheResult = doCacheLookup(StartingAccess, Q, Q.StartingLoc))
+  if (auto *CacheResult = Cache.lookup(StartingAccess, Q.StartingLoc, Q.IsCall))
     return CacheResult;
 
+  if (isUseTriviallyOptimizableToLiveOnEntry(*MSSA->AA, I)) {
+    MemoryAccess *LiveOnEntry = MSSA->getLiveOnEntryDef();
+    Cache.insert(StartingAccess, LiveOnEntry, Q.StartingLoc, Q.IsCall);
+    if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
+      MU->setDefiningAccess(LiveOnEntry, true);
+    return LiveOnEntry;
+  }
+
   // Start with the thing we already think clobbers this location
   MemoryAccess *DefiningAccess = StartingAccess->getDefiningAccess();
 
@@ -1312,50 +2274,32 @@ MemorySSA::CachingWalker::getClobberingMemoryAccess(const Instruction *I) {
     return DefiningAccess;
 
   MemoryAccess *Result = getClobberingMemoryAccess(DefiningAccess, Q);
-  // DFS won't cache a result for DefiningAccess. So, if DefiningAccess isn't
-  // our clobber, be sure that it gets a cache entry, too.
-  if (Result != DefiningAccess)
-    doCacheInsert(DefiningAccess, Result, Q, Q.StartingLoc);
-  doCacheInsert(Q.OriginalAccess, Result, Q, Q.StartingLoc);
-  // TODO: When this implementation is more mature, we may want to figure out
-  // what this additional caching buys us. It's most likely A Good Thing.
-  if (Q.IsCall)
-    for (const MemoryAccess *MA : Q.VisitedCalls)
-      if (MA != Result)
-        doCacheInsert(MA, Result, Q, Q.StartingLoc);
-
   DEBUG(dbgs() << "Starting Memory SSA clobber for " << *I << " is ");
   DEBUG(dbgs() << *DefiningAccess << "\n");
   DEBUG(dbgs() << "Final Memory SSA clobber for " << *I << " is ");
   DEBUG(dbgs() << *Result << "\n");
+  if (MemoryUse *MU = dyn_cast<MemoryUse>(StartingAccess))
+    MU->setDefiningAccess(Result, true);
 
   return Result;
 }
 
 // Verify that MA doesn't exist in any of the caches.
 void MemorySSA::CachingWalker::verifyRemoved(MemoryAccess *MA) {
-#ifndef NDEBUG
-  for (auto &P : CachedUpwardsClobberingAccess)
-    assert(P.first.first != MA && P.second != MA &&
-           "Found removed MemoryAccess in cache.");
-  for (auto &P : CachedUpwardsClobberingCall)
-    assert(P.first != MA && P.second != MA &&
-           "Found removed MemoryAccess in cache.");
-#endif // !NDEBUG
+  assert(!Cache.contains(MA) && "Found removed MemoryAccess in cache.");
 }
 
 MemoryAccess *
-DoNothingMemorySSAWalker::getClobberingMemoryAccess(const Instruction *I) {
-  MemoryAccess *MA = MSSA->getMemoryAccess(I);
+DoNothingMemorySSAWalker::getClobberingMemoryAccess(MemoryAccess *MA) {
   if (auto *Use = dyn_cast<MemoryUseOrDef>(MA))
     return Use->getDefiningAccess();
   return MA;
 }
 
 MemoryAccess *DoNothingMemorySSAWalker::getClobberingMemoryAccess(
-    MemoryAccess *StartingAccess, MemoryLocation &) {
+    MemoryAccess *StartingAccess, const MemoryLocation &) {
   if (auto *Use = dyn_cast<MemoryUseOrDef>(StartingAccess))
     return Use->getDefiningAccess();
   return StartingAccess;
 }
-}
+} // namespace llvm
diff --git a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
index eb9188518624..0d623df77a67 100644
--- a/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp
@@ -89,6 +89,44 @@ void llvm::appendToGlobalDtors(Module &M, Function *F, int Priority, Constant *D
   appendToGlobalArray("llvm.global_dtors", M, F, Priority, Data);
 }
 
+static void appendToUsedList(Module &M, StringRef Name, ArrayRef<GlobalValue *> Values) {
+  GlobalVariable *GV = M.getGlobalVariable(Name);
+  SmallPtrSet<Constant *, 16> InitAsSet;
+  SmallVector<Constant *, 16> Init;
+  if (GV) {
+    ConstantArray *CA = dyn_cast<ConstantArray>(GV->getInitializer());
+    for (auto &Op : CA->operands()) {
+      Constant *C = cast_or_null<Constant>(Op);
+      if (InitAsSet.insert(C).second)
+        Init.push_back(C);
+    }
+    GV->eraseFromParent();
+  }
+
+  Type *Int8PtrTy = llvm::Type::getInt8PtrTy(M.getContext());
+  for (auto *V : Values) {
+    Constant *C = ConstantExpr::getBitCast(V, Int8PtrTy);
+    if (InitAsSet.insert(C).second)
+      Init.push_back(C);
+  }
+
+  if (Init.empty())
+    return;
+
+  ArrayType *ATy = ArrayType::get(Int8PtrTy, Init.size());
+  GV = new llvm::GlobalVariable(M, ATy, false, GlobalValue::AppendingLinkage,
+                                ConstantArray::get(ATy, Init), Name);
+  GV->setSection("llvm.metadata");
+}
+
+void llvm::appendToUsed(Module &M, ArrayRef<GlobalValue *> Values) {
+  appendToUsedList(M, "llvm.used", Values);
+}
+
+void llvm::appendToCompilerUsed(Module &M, ArrayRef<GlobalValue *> Values) {
+  appendToUsedList(M, "llvm.compiler.used", Values);
+}
+
 Function *llvm::checkSanitizerInterfaceFunction(Constant *FuncOrBitcast) {
   if (isa<Function>(FuncOrBitcast))
     return cast<Function>(FuncOrBitcast);
@@ -104,7 +142,7 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
     ArrayRef<Type *> InitArgTypes, ArrayRef<Value *> InitArgs,
     StringRef VersionCheckName) {
   assert(!InitName.empty() && "Expected init function name");
-  assert(InitArgTypes.size() == InitArgTypes.size() &&
+  assert(InitArgs.size() == InitArgTypes.size() &&
          "Sanitizer's init function expects different number of arguments");
   Function *Ctor = Function::Create(
       FunctionType::get(Type::getVoidTy(M.getContext()), false),
@@ -126,3 +164,67 @@ std::pair<Function *, Function *> llvm::createSanitizerCtorAndInitFunctions(
   }
   return std::make_pair(Ctor, InitFunction);
 }
+
+void llvm::filterDeadComdatFunctions(
+    Module &M, SmallVectorImpl<Function *> &DeadComdatFunctions) {
+  // Build a map from the comdat to the number of entries in that comdat we
+  // think are dead. If this fully covers the comdat group, then the entire
+  // group is dead. If we find another entry in the comdat group though, we'll
+  // have to preserve the whole group.
+  SmallDenseMap<Comdat *, int, 16> ComdatEntriesCovered;
+  for (Function *F : DeadComdatFunctions) {
+    Comdat *C = F->getComdat();
+    assert(C && "Expected all input GVs to be in a comdat!");
+    ComdatEntriesCovered[C] += 1;
+  }
+
+  auto CheckComdat = [&](Comdat &C) {
+    auto CI = ComdatEntriesCovered.find(&C);
+    if (CI == ComdatEntriesCovered.end())
+      return;
+
+    // If this could have been covered by a dead entry, just subtract one to
+    // account for it.
+    if (CI->second > 0) {
+      CI->second -= 1;
+      return;
+    }
+
+    // If we've already accounted for all the entries that were dead, the
+    // entire comdat is alive so remove it from the map.
+    ComdatEntriesCovered.erase(CI);
+  };
+
+  auto CheckAllComdats = [&] {
+    for (Function &F : M.functions())
+      if (Comdat *C = F.getComdat()) {
+        CheckComdat(*C);
+        if (ComdatEntriesCovered.empty())
+          return;
+      }
+    for (GlobalVariable &GV : M.globals())
+      if (Comdat *C = GV.getComdat()) {
+        CheckComdat(*C);
+        if (ComdatEntriesCovered.empty())
+          return;
+      }
+    for (GlobalAlias &GA : M.aliases())
+      if (Comdat *C = GA.getComdat()) {
+        CheckComdat(*C);
+        if (ComdatEntriesCovered.empty())
+          return;
+      }
+  };
+  CheckAllComdats();
+
+  if (ComdatEntriesCovered.empty()) {
+    DeadComdatFunctions.clear();
+    return;
+  }
+
+  // Remove the entries that were not covering.
+  erase_if(DeadComdatFunctions, [&](GlobalValue *GV) {
+    return ComdatEntriesCovered.find(GV->getComdat()) ==
+           ComdatEntriesCovered.end();
+  });
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/NameAnonFunctions.cpp b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
index c4f3839d8482..34dc1cccdd5b 100644
--- a/contrib/llvm/lib/Transforms/Utils/NameAnonFunctions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/NameAnonGlobals.cpp
@@ -1,4 +1,4 @@
-//===- NameAnonFunctions.cpp - ThinLTO Summary-based Function Import ------===//
+//===- NameAnonGlobals.cpp - ThinLTO Support: Name Unnamed Globals --------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,11 +7,13 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements naming anonymous function to make sure they can be
-// refered to by ThinLTO.
+// This file implements naming anonymous globals to make sure they can be
+// referred to by ThinLTO.
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MD5.h"
@@ -19,8 +21,9 @@
 
 using namespace llvm;
 
+namespace {
 // Compute a "unique" hash for the module based on the name of the public
-// functions.
+// globals.
 class ModuleHasher {
   Module &TheModule;
   std::string TheHash;
@@ -57,46 +60,62 @@ public:
     return TheHash;
   }
 };
+} // end anonymous namespace
 
-// Rename all the anon functions in the module
-bool llvm::nameUnamedFunctions(Module &M) {
+// Rename all the anon globals in the module
+bool llvm::nameUnamedGlobals(Module &M) {
   bool Changed = false;
   ModuleHasher ModuleHash(M);
   int count = 0;
-  for (auto &F : M) {
-    if (F.hasName())
-      continue;
-    F.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++));
+  auto RenameIfNeed = [&](GlobalValue &GV) {
+    if (GV.hasName())
+      return;
+    GV.setName(Twine("anon.") + ModuleHash.get() + "." + Twine(count++));
     Changed = true;
-  }
+  };
+  for (auto &GO : M.global_objects())
+    RenameIfNeed(GO);
+  for (auto &GA : M.aliases())
+    RenameIfNeed(GA);
+
   return Changed;
 }
 
 namespace {
 
-// Simple pass that provides a name to every anon function.
-class NameAnonFunction : public ModulePass {
+// Legacy pass that provides a name to every anon globals.
+class NameAnonGlobalLegacyPass : public ModulePass {
 
 public:
   /// Pass identification, replacement for typeid
   static char ID;
 
   /// Specify pass name for debug output
-  const char *getPassName() const override { return "Name Anon Functions"; }
+  StringRef getPassName() const override { return "Name Anon Globals"; }
 
-  explicit NameAnonFunction() : ModulePass(ID) {}
+  explicit NameAnonGlobalLegacyPass() : ModulePass(ID) {}
 
-  bool runOnModule(Module &M) override { return nameUnamedFunctions(M); }
+  bool runOnModule(Module &M) override { return nameUnamedGlobals(M); }
 };
-char NameAnonFunction::ID = 0;
+char NameAnonGlobalLegacyPass::ID = 0;
 
 } // anonymous namespace
 
-INITIALIZE_PASS_BEGIN(NameAnonFunction, "name-anon-functions",
-                      "Provide a name to nameless functions", false, false)
-INITIALIZE_PASS_END(NameAnonFunction, "name-anon-functions",
-                    "Provide a name to nameless functions", false, false)
+PreservedAnalyses NameAnonGlobalPass::run(Module &M,
+                                          ModuleAnalysisManager &AM) {
+  if (!nameUnamedGlobals(M))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+INITIALIZE_PASS_BEGIN(NameAnonGlobalLegacyPass, "name-anon-globals",
+                      "Provide a name to nameless globals", false, false)
+INITIALIZE_PASS_END(NameAnonGlobalLegacyPass, "name-anon-globals",
+                    "Provide a name to nameless globals", false, false)
 
 namespace llvm {
-ModulePass *createNameAnonFunctionPass() { return new NameAnonFunction(); }
+ModulePass *createNameAnonGlobalPass() {
+  return new NameAnonGlobalLegacyPass();
+}
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index cbf385d56339..35faa6f65efd 100644
--- a/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -907,6 +907,8 @@ NextIteration:
 
         // The currently active variable for this block is now the PHI.
         IncomingVals[AllocaNo] = APN;
+        if (DbgDeclareInst *DDI = AllocaDbgDeclares[AllocaNo])
+          ConvertDebugDeclareToDebugValue(DDI, APN, DIB);
 
         // Get the next phi node.
         ++PNI;
diff --git a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
index 88b39dd7f664..8e93ee757a15 100644
--- a/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp
@@ -482,5 +482,5 @@ bool
 LoadAndStorePromoter::isInstInList(Instruction *I,
                                    const SmallVectorImpl<Instruction*> &Insts)
                                    const {
-  return std::find(Insts.begin(), Insts.end(), I) != Insts.end();
+  return is_contained(Insts, I);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index c197317ac771..3846b21c502e 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -11,27 +11,39 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/Constant.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
@@ -40,15 +52,29 @@
 #include "llvm/IR/Operator.h"
 #include "llvm/IR/PatternMatch.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IR/User.h"
+#include "llvm/IR/Value.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <algorithm>
+#include <cassert>
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+#include <iterator>
 #include <map>
 #include <set>
+#include <utility>
+#include <vector>
+
 using namespace llvm;
 using namespace PatternMatch;
 
@@ -110,6 +136,7 @@ STATISTIC(NumSinkCommons,
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
 namespace {
+
 // The first field contains the value that the switch produces when a certain
 // case group is selected, and the second field is a vector containing the
 // cases composing the case group.
@@ -168,13 +195,17 @@ public:
                  SmallPtrSetImpl<BasicBlock *> *LoopHeaders)
       : TTI(TTI), DL(DL), BonusInstThreshold(BonusInstThreshold), AC(AC),
         LoopHeaders(LoopHeaders) {}
+
   bool run(BasicBlock *BB);
 };
-}
+
+} // end anonymous namespace
 
 /// Return true if it is safe to merge these two
 /// terminator instructions together.
-static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
+static bool
+SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2,
+                       SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
   if (SI1 == SI2)
     return false; // Can't merge with self!
 
@@ -183,18 +214,22 @@ static bool SafeToMergeTerminators(TerminatorInst *SI1, TerminatorInst *SI2) {
   // conflicting incoming values from the two switch blocks.
   BasicBlock *SI1BB = SI1->getParent();
   BasicBlock *SI2BB = SI2->getParent();
-  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
 
+  SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
+  bool Fail = false;
   for (BasicBlock *Succ : successors(SI2BB))
     if (SI1Succs.count(Succ))
       for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
         PHINode *PN = cast<PHINode>(BBI);
         if (PN->getIncomingValueForBlock(SI1BB) !=
-            PN->getIncomingValueForBlock(SI2BB))
-          return false;
+            PN->getIncomingValueForBlock(SI2BB)) {
+          if (FailBlocks)
+            FailBlocks->insert(Succ);
+          Fail = true;
+        }
       }
 
-  return true;
+  return !Fail;
 }
 
 /// Return true if it is safe and profitable to merge these two terminator
@@ -621,7 +656,8 @@ private:
     }
   }
 };
-}
+
+} // end anonymous namespace
 
 static void EraseTerminatorInstAndDCECond(TerminatorInst *TI) {
   Instruction *Cond = nullptr;
@@ -706,7 +742,7 @@ static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
   if (V1->size() > V2->size())
     std::swap(V1, V2);
 
-  if (V1->size() == 0)
+  if (V1->empty())
     return false;
   if (V1->size() == 1) {
     // Just scan V2.
@@ -874,6 +910,7 @@ bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
 }
 
 namespace {
+
 /// This class implements a stable ordering of constant
 /// integers that does not depend on their address.  This is important for
 /// applications that sort ConstantInt's to ensure uniqueness.
@@ -882,7 +919,8 @@ struct ConstantIntOrdering {
     return LHS->getValue().ult(RHS->getValue());
   }
 };
-}
+
+} // end anonymous namespace
 
 static int ConstantIntSortPredicate(ConstantInt *const *P1,
                                     ConstantInt *const *P2) {
@@ -954,7 +992,16 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI,
     TerminatorInst *PTI = Pred->getTerminator();
     Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
 
-    if (PCV == CV && SafeToMergeTerminators(TI, PTI)) {
+    if (PCV == CV && TI != PTI) {
+      SmallSetVector<BasicBlock*, 4> FailBlocks;
+      if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
+        for (auto *Succ : FailBlocks) {
+          std::vector<BasicBlock*> Blocks = { TI->getParent() };
+          if (!SplitBlockPredecessors(Succ, Blocks, ".fold.split"))
+            return false;
+        }
+      }
+
       // Figure out which 'cases' to copy from SI to PSI.
       std::vector<ValueEqualityComparisonCase> BBCases;
       BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
@@ -1215,7 +1262,7 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
     BIParent->getInstList().splice(BI->getIterator(), BB1->getInstList(), I1);
     if (!I2->use_empty())
       I2->replaceAllUsesWith(I1);
-    I1->intersectOptionalDataWith(I2);
+    I1->andIRFlags(I2);
     unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
                            LLVMContext::MD_range,
                            LLVMContext::MD_fpmath,
@@ -1227,6 +1274,14 @@ static bool HoistThenElseCodeToIf(BranchInst *BI,
                            LLVMContext::MD_dereferenceable_or_null,
                            LLVMContext::MD_mem_parallel_loop_access};
     combineMetadata(I1, I2, KnownIDs);
+
+    // If the debug loc for I1 and I2 are different, as we are combining them
+    // into one instruction, we do not want to select debug loc randomly from 
+    // I1 or I2.
+    if (!isa<CallInst>(I1) &&  I1->getDebugLoc() != I2->getDebugLoc())
+      I1->setDebugLoc(
+          DILocation::getMergedLocation(I1->getDebugLoc(), I2->getDebugLoc()));
+ 
     I2->eraseFromParent();
     Changed = true;
 
@@ -1319,172 +1374,446 @@ HoistTerminator:
   return true;
 }
 
-/// Given an unconditional branch that goes to BBEnd,
-/// check whether BBEnd has only two predecessors and the other predecessor
-/// ends with an unconditional branch. If it is true, sink any common code
-/// in the two predecessors to BBEnd.
-static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
-  assert(BI1->isUnconditional());
-  BasicBlock *BB1 = BI1->getParent();
-  BasicBlock *BBEnd = BI1->getSuccessor(0);
-
-  // Check that BBEnd has two predecessors and the other predecessor ends with
-  // an unconditional branch.
-  pred_iterator PI = pred_begin(BBEnd), PE = pred_end(BBEnd);
-  BasicBlock *Pred0 = *PI++;
-  if (PI == PE) // Only one predecessor.
+// Is it legal to place a variable in operand \c OpIdx of \c I?
+// FIXME: This should be promoted to Instruction.
+static bool canReplaceOperandWithVariable(const Instruction *I,
+                                          unsigned OpIdx) {
+  // We can't have a PHI with a metadata type.
+  if (I->getOperand(OpIdx)->getType()->isMetadataTy())
     return false;
-  BasicBlock *Pred1 = *PI++;
-  if (PI != PE) // More than two predecessors.
-    return false;
-  BasicBlock *BB2 = (Pred0 == BB1) ? Pred1 : Pred0;
-  BranchInst *BI2 = dyn_cast<BranchInst>(BB2->getTerminator());
-  if (!BI2 || !BI2->isUnconditional())
+
+  // Early exit.
+  if (!isa<Constant>(I->getOperand(OpIdx)))
+    return true;
+
+  switch (I->getOpcode()) {
+  default:
+    return true;
+  case Instruction::Call:
+  case Instruction::Invoke:
+    // FIXME: many arithmetic intrinsics have no issue taking a
+    // variable, however it's hard to distingish these from
+    // specials such as @llvm.frameaddress that require a constant.
+    if (isa<IntrinsicInst>(I))
+      return false;
+
+    // Constant bundle operands may need to retain their constant-ness for
+    // correctness.
+    if (ImmutableCallSite(I).isBundleOperand(OpIdx))
+      return false;
+
+    return true;
+
+  case Instruction::ShuffleVector:
+    // Shufflevector masks are constant.
+    return OpIdx != 2;
+  case Instruction::ExtractValue:
+  case Instruction::InsertValue:
+    // All operands apart from the first are constant.
+    return OpIdx == 0;
+  case Instruction::Alloca:
     return false;
+  case Instruction::GetElementPtr:
+    if (OpIdx == 0)
+      return true;
+    gep_type_iterator It = std::next(gep_type_begin(I), OpIdx - 1);
+    return It.isSequential();
+  }
+}
 
-  // Gather the PHI nodes in BBEnd.
-  SmallDenseMap<std::pair<Value *, Value *>, PHINode *> JointValueMap;
-  Instruction *FirstNonPhiInBBEnd = nullptr;
-  for (BasicBlock::iterator I = BBEnd->begin(), E = BBEnd->end(); I != E; ++I) {
-    if (PHINode *PN = dyn_cast<PHINode>(I)) {
-      Value *BB1V = PN->getIncomingValueForBlock(BB1);
-      Value *BB2V = PN->getIncomingValueForBlock(BB2);
-      JointValueMap[std::make_pair(BB1V, BB2V)] = PN;
-    } else {
-      FirstNonPhiInBBEnd = &*I;
-      break;
-    }
+// All instructions in Insts belong to different blocks that all unconditionally
+// branch to a common successor. Analyze each instruction and return true if it
+// would be possible to sink them into their successor, creating one common
+// instruction instead. For every value that would be required to be provided by
+// PHI node (because an operand varies in each input block), add to PHIOperands.
+static bool canSinkInstructions(
+    ArrayRef<Instruction *> Insts,
+    DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
+  // Prune out obviously bad instructions to move. Any non-store instruction
+  // must have exactly one use, and we check later that use is by a single,
+  // common PHI instruction in the successor.
+  for (auto *I : Insts) {
+    // These instructions may change or break semantics if moved.
+    if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
+        I->getType()->isTokenTy())
+      return false;
+    // Everything must have only one use too, apart from stores which
+    // have no uses.
+    if (!isa<StoreInst>(I) && !I->hasOneUse())
+      return false;
   }
-  if (!FirstNonPhiInBBEnd)
-    return false;
 
-  // This does very trivial matching, with limited scanning, to find identical
-  // instructions in the two blocks.  We scan backward for obviously identical
-  // instructions in an identical order.
-  BasicBlock::InstListType::reverse_iterator RI1 = BB1->getInstList().rbegin(),
-                                             RE1 = BB1->getInstList().rend(),
-                                             RI2 = BB2->getInstList().rbegin(),
-                                             RE2 = BB2->getInstList().rend();
-  // Skip debug info.
-  while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1))
-    ++RI1;
-  if (RI1 == RE1)
-    return false;
-  while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2))
-    ++RI2;
-  if (RI2 == RE2)
-    return false;
-  // Skip the unconditional branches.
-  ++RI1;
-  ++RI2;
+  const Instruction *I0 = Insts.front();
+  for (auto *I : Insts)
+    if (!I->isSameOperationAs(I0))
+      return false;
 
-  bool Changed = false;
-  while (RI1 != RE1 && RI2 != RE2) {
-    // Skip debug info.
-    while (RI1 != RE1 && isa<DbgInfoIntrinsic>(&*RI1))
-      ++RI1;
-    if (RI1 == RE1)
-      return Changed;
-    while (RI2 != RE2 && isa<DbgInfoIntrinsic>(&*RI2))
-      ++RI2;
-    if (RI2 == RE2)
-      return Changed;
+  // All instructions in Insts are known to be the same opcode. If they aren't
+  // stores, check the only user of each is a PHI or in the same block as the
+  // instruction, because if a user is in the same block as an instruction
+  // we're contemplating sinking, it must already be determined to be sinkable.
+  if (!isa<StoreInst>(I0)) {
+    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+    auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
+    if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
+          auto *U = cast<Instruction>(*I->user_begin());
+          return (PNUse &&
+                  PNUse->getParent() == Succ &&
+                  PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
+                 U->getParent() == I->getParent();
+        }))
+      return false;
+  }
 
-    Instruction *I1 = &*RI1, *I2 = &*RI2;
-    auto InstPair = std::make_pair(I1, I2);
-    // I1 and I2 should have a single use in the same PHI node, and they
-    // perform the same operation.
-    // Cannot move control-flow-involving, volatile loads, vaarg, etc.
-    if (isa<PHINode>(I1) || isa<PHINode>(I2) || isa<TerminatorInst>(I1) ||
-        isa<TerminatorInst>(I2) || I1->isEHPad() || I2->isEHPad() ||
-        isa<AllocaInst>(I1) || isa<AllocaInst>(I2) ||
-        I1->mayHaveSideEffects() || I2->mayHaveSideEffects() ||
-        I1->mayReadOrWriteMemory() || I2->mayReadOrWriteMemory() ||
-        !I1->hasOneUse() || !I2->hasOneUse() || !JointValueMap.count(InstPair))
-      return Changed;
+  for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
+    if (I0->getOperand(OI)->getType()->isTokenTy())
+      // Don't touch any operand of token type.
+      return false;
+
+    // Because SROA can't handle speculating stores of selects, try not
+    // to sink loads or stores of allocas when we'd have to create a PHI for
+    // the address operand. Also, because it is likely that loads or stores
+    // of allocas will disappear when Mem2Reg/SROA is run, don't sink them.
+    // This can cause code churn which can have unintended consequences down
+    // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
+    // FIXME: This is a workaround for a deficiency in SROA - see
+    // https://llvm.org/bugs/show_bug.cgi?id=30188
+    if (OI == 1 && isa<StoreInst>(I0) &&
+        any_of(Insts, [](const Instruction *I) {
+          return isa<AllocaInst>(I->getOperand(1));
+        }))
+      return false;
+    if (OI == 0 && isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
+          return isa<AllocaInst>(I->getOperand(0));
+        }))
+      return false;
 
-    // Check whether we should swap the operands of ICmpInst.
-    // TODO: Add support of communativity.
-    ICmpInst *ICmp1 = dyn_cast<ICmpInst>(I1), *ICmp2 = dyn_cast<ICmpInst>(I2);
-    bool SwapOpnds = false;
-    if (ICmp1 && ICmp2 && ICmp1->getOperand(0) != ICmp2->getOperand(0) &&
-        ICmp1->getOperand(1) != ICmp2->getOperand(1) &&
-        (ICmp1->getOperand(0) == ICmp2->getOperand(1) ||
-         ICmp1->getOperand(1) == ICmp2->getOperand(0))) {
-      ICmp2->swapOperands();
-      SwapOpnds = true;
+    auto SameAsI0 = [&I0, OI](const Instruction *I) {
+      assert(I->getNumOperands() == I0->getNumOperands());
+      return I->getOperand(OI) == I0->getOperand(OI);
+    };
+    if (!all_of(Insts, SameAsI0)) {
+      if (!canReplaceOperandWithVariable(I0, OI))
+        // We can't create a PHI from this GEP.
+        return false;
+      // Don't create indirect calls! The called value is the final operand.
+      if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OI == OE - 1) {
+        // FIXME: if the call was *already* indirect, we should do this.
+        return false;
+      }
+      for (auto *I : Insts)
+        PHIOperands[I].push_back(I->getOperand(OI));
     }
-    if (!I1->isSameOperationAs(I2)) {
-      if (SwapOpnds)
-        ICmp2->swapOperands();
-      return Changed;
+  }
+  return true;
+}
+
+// Assuming canSinkLastInstruction(Blocks) has returned true, sink the last
+// instruction of every block in Blocks to their common successor, commoning
+// into one instruction.
+static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
+  auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
+
+  // canSinkLastInstruction returning true guarantees that every block has at
+  // least one non-terminator instruction.
+  SmallVector<Instruction*,4> Insts;
+  for (auto *BB : Blocks) {
+    Instruction *I = BB->getTerminator();
+    do {
+      I = I->getPrevNode();
+    } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
+    if (!isa<DbgInfoIntrinsic>(I))
+      Insts.push_back(I);
+  }
+
+  // The only checking we need to do now is that all users of all instructions
+  // are the same PHI node. canSinkLastInstruction should have checked this but
+  // it is slightly over-aggressive - it gets confused by commutative instructions
+  // so double-check it here.
+  Instruction *I0 = Insts.front();
+  if (!isa<StoreInst>(I0)) {
+    auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
+    if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
+          auto *U = cast<Instruction>(*I->user_begin());
+          return U == PNUse;
+        }))
+      return false;
+  }
+  
+  // We don't need to do any more checking here; canSinkLastInstruction should
+  // have done it all for us.
+  SmallVector<Value*, 4> NewOperands;
+  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
+    // This check is different to that in canSinkLastInstruction. There, we
+    // cared about the global view once simplifycfg (and instcombine) have
+    // completed - it takes into account PHIs that become trivially
+    // simplifiable.  However here we need a more local view; if an operand
+    // differs we create a PHI and rely on instcombine to clean up the very
+    // small mess we may make.
+    bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
+      return I->getOperand(O) != I0->getOperand(O);
+    });
+    if (!NeedPHI) {
+      NewOperands.push_back(I0->getOperand(O));
+      continue;
     }
 
-    // The operands should be either the same or they need to be generated
-    // with a PHI node after sinking. We only handle the case where there is
-    // a single pair of different operands.
-    Value *DifferentOp1 = nullptr, *DifferentOp2 = nullptr;
-    unsigned Op1Idx = ~0U;
-    for (unsigned I = 0, E = I1->getNumOperands(); I != E; ++I) {
-      if (I1->getOperand(I) == I2->getOperand(I))
-        continue;
-      // Early exit if we have more-than one pair of different operands or if
-      // we need a PHI node to replace a constant.
-      if (Op1Idx != ~0U || isa<Constant>(I1->getOperand(I)) ||
-          isa<Constant>(I2->getOperand(I))) {
-        // If we can't sink the instructions, undo the swapping.
-        if (SwapOpnds)
-          ICmp2->swapOperands();
-        return Changed;
+    // Create a new PHI in the successor block and populate it.
+    auto *Op = I0->getOperand(O);
+    assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
+    auto *PN = PHINode::Create(Op->getType(), Insts.size(),
+                               Op->getName() + ".sink", &BBEnd->front());
+    for (auto *I : Insts)
+      PN->addIncoming(I->getOperand(O), I->getParent());
+    NewOperands.push_back(PN);
+  }
+
+  // Arbitrarily use I0 as the new "common" instruction; remap its operands
+  // and move it to the start of the successor block.
+  for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
+    I0->getOperandUse(O).set(NewOperands[O]);
+  I0->moveBefore(&*BBEnd->getFirstInsertionPt());
+
+  // Update metadata and IR flags.
+  for (auto *I : Insts)
+    if (I != I0) {
+      combineMetadataForCSE(I0, I);
+      I0->andIRFlags(I);
+    }
+
+  if (!isa<StoreInst>(I0)) {
+    // canSinkLastInstruction checked that all instructions were used by
+    // one and only one PHI node. Find that now, RAUW it to our common
+    // instruction and nuke it.
+    assert(I0->hasOneUse());
+    auto *PN = cast<PHINode>(*I0->user_begin());
+    PN->replaceAllUsesWith(I0);
+    PN->eraseFromParent();
+  }
+
+  // Finally nuke all instructions apart from the common instruction.
+  for (auto *I : Insts)
+    if (I != I0)
+      I->eraseFromParent();
+
+  return true;
+}
+
+namespace {
+
+  // LockstepReverseIterator - Iterates through instructions
+  // in a set of blocks in reverse order from the first non-terminator.
+  // For example (assume all blocks have size n):
+  //   LockstepReverseIterator I([B1, B2, B3]);
+  //   *I-- = [B1[n], B2[n], B3[n]];
+  //   *I-- = [B1[n-1], B2[n-1], B3[n-1]];
+  //   *I-- = [B1[n-2], B2[n-2], B3[n-2]];
+  //   ...
+  class LockstepReverseIterator {
+    ArrayRef<BasicBlock*> Blocks;
+    SmallVector<Instruction*,4> Insts;
+    bool Fail;
+  public:
+    LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) :
+      Blocks(Blocks) {
+      reset();
+    }
+
+    void reset() {
+      Fail = false;
+      Insts.clear();
+      for (auto *BB : Blocks) {
+        Instruction *Inst = BB->getTerminator();
+        for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+          Inst = Inst->getPrevNode();
+        if (!Inst) {
+          // Block wasn't big enough.
+          Fail = true;
+          return;
+        }
+        Insts.push_back(Inst);
       }
-      DifferentOp1 = I1->getOperand(I);
-      Op1Idx = I;
-      DifferentOp2 = I2->getOperand(I);
     }
 
-    DEBUG(dbgs() << "SINK common instructions " << *I1 << "\n");
-    DEBUG(dbgs() << "                         " << *I2 << "\n");
-
-    // We insert the pair of different operands to JointValueMap and
-    // remove (I1, I2) from JointValueMap.
-    if (Op1Idx != ~0U) {
-      auto &NewPN = JointValueMap[std::make_pair(DifferentOp1, DifferentOp2)];
-      if (!NewPN) {
-        NewPN =
-            PHINode::Create(DifferentOp1->getType(), 2,
-                            DifferentOp1->getName() + ".sink", &BBEnd->front());
-        NewPN->addIncoming(DifferentOp1, BB1);
-        NewPN->addIncoming(DifferentOp2, BB2);
-        DEBUG(dbgs() << "Create PHI node " << *NewPN << "\n";);
+    bool isValid() const {
+      return !Fail;
+    }
+    
+    void operator -- () {
+      if (Fail)
+        return;
+      for (auto *&Inst : Insts) {
+        for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
+          Inst = Inst->getPrevNode();
+        // Already at beginning of block.
+        if (!Inst) {
+          Fail = true;
+          return;
+        }
       }
-      // I1 should use NewPN instead of DifferentOp1.
-      I1->setOperand(Op1Idx, NewPN);
     }
-    PHINode *OldPN = JointValueMap[InstPair];
-    JointValueMap.erase(InstPair);
-
-    // We need to update RE1 and RE2 if we are going to sink the first
-    // instruction in the basic block down.
-    bool UpdateRE1 = (I1 == &BB1->front()), UpdateRE2 = (I2 == &BB2->front());
-    // Sink the instruction.
-    BBEnd->getInstList().splice(FirstNonPhiInBBEnd->getIterator(),
-                                BB1->getInstList(), I1);
-    if (!OldPN->use_empty())
-      OldPN->replaceAllUsesWith(I1);
-    OldPN->eraseFromParent();
 
-    if (!I2->use_empty())
-      I2->replaceAllUsesWith(I1);
-    I1->intersectOptionalDataWith(I2);
-    // TODO: Use combineMetadata here to preserve what metadata we can
-    // (analogous to the hoisting case above).
-    I2->eraseFromParent();
+    ArrayRef<Instruction*> operator * () const {
+      return Insts;
+    }
+  };
+
+} // end anonymous namespace
+
+/// Given an unconditional branch that goes to BBEnd,
+/// check whether BBEnd has only two predecessors and the other predecessor
+/// ends with an unconditional branch. If it is true, sink any common code
+/// in the two predecessors to BBEnd.
+static bool SinkThenElseCodeToEnd(BranchInst *BI1) {
+  assert(BI1->isUnconditional());
+  BasicBlock *BBEnd = BI1->getSuccessor(0);
+
+  // We support two situations:
+  //   (1) all incoming arcs are unconditional
+  //   (2) one incoming arc is conditional
+  //
+  // (2) is very common in switch defaults and
+  // else-if patterns;
+  //
+  //   if (a) f(1);
+  //   else if (b) f(2);
+  //
+  // produces:
+  //
+  //       [if]
+  //      /    \
+  //    [f(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [f(2)]|
+  //       \    | /
+  //        [ end ]
+  //
+  // [end] has two unconditional predecessor arcs and one conditional. The
+  // conditional refers to the implicit empty 'else' arc. This conditional
+  // arc can also be caused by an empty default block in a switch.
+  //
+  // In this case, we attempt to sink code from all *unconditional* arcs.
+  // If we can sink instructions from these arcs (determined during the scan
+  // phase below) we insert a common successor for all unconditional arcs and
+  // connect that to [end], to enable sinking:
+  //
+  //       [if]
+  //      /    \
+  //    [x(1)] [if]
+  //      |     | \
+  //      |     |  \
+  //      |  [x(2)] |
+  //       \   /    |
+  //   [sink.split] |
+  //         \     /
+  //         [ end ]
+  //
+  SmallVector<BasicBlock*,4> UnconditionalPreds;
+  Instruction *Cond = nullptr;
+  for (auto *B : predecessors(BBEnd)) {
+    auto *T = B->getTerminator();
+    if (isa<BranchInst>(T) && cast<BranchInst>(T)->isUnconditional())
+      UnconditionalPreds.push_back(B);
+    else if ((isa<BranchInst>(T) || isa<SwitchInst>(T)) && !Cond)
+      Cond = T;
+    else
+      return false;
+  }
+  if (UnconditionalPreds.size() < 2)
+    return false;
+  
+  bool Changed = false;
+  // We take a two-step approach to tail sinking. First we scan from the end of
+  // each block upwards in lockstep. If the n'th instruction from the end of each
+  // block can be sunk, those instructions are added to ValuesToSink and we
+  // carry on. If we can sink an instruction but need to PHI-merge some operands
+  // (because they're not identical in each instruction) we add these to
+  // PHIOperands.
+  unsigned ScanIdx = 0;
+  SmallPtrSet<Value*,4> InstructionsToSink;
+  DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
+  LockstepReverseIterator LRI(UnconditionalPreds);
+  while (LRI.isValid() &&
+         canSinkInstructions(*LRI, PHIOperands)) {
+    DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0] << "\n");
+    InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
+    ++ScanIdx;
+    --LRI;
+  }
+
+  auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
+    unsigned NumPHIdValues = 0;
+    for (auto *I : *LRI)
+      for (auto *V : PHIOperands[I])
+        if (InstructionsToSink.count(V) == 0)
+          ++NumPHIdValues;
+    DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n");
+    unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
+    if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
+        NumPHIInsts++;
+    
+    return NumPHIInsts <= 1;
+  };
 
-    if (UpdateRE1)
-      RE1 = BB1->getInstList().rend();
-    if (UpdateRE2)
-      RE2 = BB2->getInstList().rend();
-    FirstNonPhiInBBEnd = &*I1;
+  if (ScanIdx > 0 && Cond) {
+    // Check if we would actually sink anything first! This mutates the CFG and
+    // adds an extra block. The goal in doing this is to allow instructions that
+    // couldn't be sunk before to be sunk - obviously, speculatable instructions
+    // (such as trunc, add) can be sunk and predicated already. So we check that
+    // we're going to sink at least one non-speculatable instruction.
+    LRI.reset();
+    unsigned Idx = 0;
+    bool Profitable = false;
+    while (ProfitableToSinkInstruction(LRI) && Idx < ScanIdx) {
+      if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
+        Profitable = true;
+        break;
+      }
+      --LRI;
+      ++Idx;
+    }
+    if (!Profitable)
+      return false;
+    
+    DEBUG(dbgs() << "SINK: Splitting edge\n");
+    // We have a conditional edge and we're going to sink some instructions.
+    // Insert a new block postdominating all blocks we're going to sink from.
+    if (!SplitBlockPredecessors(BI1->getSuccessor(0), UnconditionalPreds,
+                                ".sink.split"))
+      // Edges couldn't be split.
+      return false;
+    Changed = true;
+  }
+  
+  // Now that we've analyzed all potential sinking candidates, perform the
+  // actual sink. We iteratively sink the last non-terminator of the source
+  // blocks into their common successor unless doing so would require too
+  // many PHI instructions to be generated (currently only one PHI is allowed
+  // per sunk instruction).
+  //
+  // We can use InstructionsToSink to discount values needing PHI-merging that will
+  // actually be sunk in a later iteration. This allows us to be more
+  // aggressive in what we sink. This does allow a false positive where we
+  // sink presuming a later value will also be sunk, but stop half way through
+  // and never actually sink it which means we produce more PHIs than intended.
+  // This is unlikely in practice though.
+  for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
+    DEBUG(dbgs() << "SINK: Sink: "
+                 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
+                 << "\n");
+
+    // Because we've sunk every instruction in turn, the current instruction to
+    // sink is always at index 0.
+    LRI.reset();
+    if (!ProfitableToSinkInstruction(LRI)) {
+      // Too many PHIs would be created.
+      DEBUG(dbgs() << "SINK: stopping here, too many PHIs would be created!\n");
+      break;
+    }
+    
+    if (!sinkLastInstruction(UnconditionalPreds))
+      return Changed;
     NumSinkCommons++;
     Changed = true;
   }
@@ -1539,7 +1868,7 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
       continue;
     --MaxNumInstToLookAt;
 
-    // Could be calling an instruction that effects memory like free().
+    // Could be calling an instruction that affects memory like free().
     if (CurI.mayHaveSideEffects() && !isa<StoreInst>(CurI))
       return nullptr;
 
@@ -1822,7 +2151,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI, const DataLayout &DL) {
     return false;
 
   // Can't fold blocks that contain noduplicate or convergent calls.
-  if (llvm::any_of(*BB, [](const Instruction &I) {
+  if (any_of(*BB, [](const Instruction &I) {
         const CallInst *CI = dyn_cast<CallInst>(&I);
         return CI && (CI->cannotDuplicate() || CI->isConvergent());
       }))
@@ -2464,6 +2793,11 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, unsigned BonusInstThreshold) {
       PBI = New_PBI;
     }
 
+    // If BI was a loop latch, it may have had associated loop metadata.
+    // We need to copy it to the new latch, that is, PBI.
+    if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
+      PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
+
     // TODO: If BB is reachable from all paths through PredBlock, then we
     // could replace PBI's branch probabilities with BI's.
 
@@ -4150,18 +4484,28 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
 
 /// Return true if the backend will be able to handle
 /// initializing an array of constants like C.
-static bool ValidLookupTableConstant(Constant *C) {
+static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
   if (C->isThreadDependent())
     return false;
   if (C->isDLLImportDependent())
     return false;
 
-  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C))
-    return CE->isGEPWithNoNotionalOverIndexing();
+  if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
+      !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
+      !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
+    return false;
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    if (!CE->isGEPWithNoNotionalOverIndexing())
+      return false;
+    if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
+      return false;
+  }
+
+  if (!TTI.shouldBuildLookupTablesForConstant(C))
+    return false;
 
-  return isa<ConstantFP>(C) || isa<ConstantInt>(C) ||
-         isa<ConstantPointerNull>(C) || isa<GlobalValue>(C) ||
-         isa<UndefValue>(C);
+  return true;
 }
 
 /// If V is a Constant, return it. Otherwise, try to look up
@@ -4216,7 +4560,7 @@ static bool
 GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
                BasicBlock **CommonDest,
                SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
-               const DataLayout &DL) {
+               const DataLayout &DL, const TargetTransformInfo &TTI) {
   // The block from which we enter the common destination.
   BasicBlock *Pred = SI->getParent();
 
@@ -4228,7 +4572,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
        ++I) {
     if (TerminatorInst *T = dyn_cast<TerminatorInst>(I)) {
       // If the terminator is a simple branch, continue to the next block.
-      if (T->getNumSuccessors() != 1)
+      if (T->getNumSuccessors() != 1 || T->isExceptional())
         return false;
       Pred = CaseDest;
       CaseDest = T->getSuccessor(0);
@@ -4278,7 +4622,7 @@ GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
       return false;
 
     // Be conservative about which kinds of constants we support.
-    if (!ValidLookupTableConstant(ConstVal))
+    if (!ValidLookupTableConstant(ConstVal, TTI))
       return false;
 
     Res.push_back(std::make_pair(PHI, ConstVal));
@@ -4310,14 +4654,15 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
                                   BasicBlock *&CommonDest,
                                   SwitchCaseResultVectorTy &UniqueResults,
                                   Constant *&DefaultResult,
-                                  const DataLayout &DL) {
+                                  const DataLayout &DL,
+                                  const TargetTransformInfo &TTI) {
   for (auto &I : SI->cases()) {
     ConstantInt *CaseVal = I.getCaseValue();
 
     // Resulting value at phi nodes for this case value.
     SwitchCaseResultsTy Results;
     if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
-                        DL))
+                        DL, TTI))
       return false;
 
     // Only one value per case is permitted
@@ -4335,7 +4680,7 @@ static bool InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI,
   SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
   BasicBlock *DefaultDest = SI->getDefaultDest();
   GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
-                 DL);
+                 DL, TTI);
   // If the default value is not found abort unless the default destination
   // is unreachable.
   DefaultResult =
@@ -4414,7 +4759,8 @@ static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
 /// phi nodes in a common successor block with only two different
 /// constant values, replace the switch with select.
 static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
-                           AssumptionCache *AC, const DataLayout &DL) {
+                           AssumptionCache *AC, const DataLayout &DL,
+                           const TargetTransformInfo &TTI) {
   Value *const Cond = SI->getCondition();
   PHINode *PHI = nullptr;
   BasicBlock *CommonDest = nullptr;
@@ -4422,7 +4768,7 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
   SwitchCaseResultVectorTy UniqueResults;
   // Collect all the cases that will deliver the same value from the switch.
   if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
-                             DL))
+                             DL, TTI))
     return false;
   // Selects choose between maximum two values.
   if (UniqueResults.size() != 2)
@@ -4441,6 +4787,7 @@ static bool SwitchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
 }
 
 namespace {
+
 /// This class represents a lookup table that can be used to replace a switch.
 class SwitchLookupTable {
 public:
@@ -4497,7 +4844,8 @@ private:
   // For ArrayKind, this is the array.
   GlobalVariable *Array;
 };
-}
+
+} // end anonymous namespace
 
 SwitchLookupTable::SwitchLookupTable(
     Module &M, uint64_t TableSize, ConstantInt *Offset,
@@ -4860,7 +5208,7 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
     typedef SmallVector<std::pair<PHINode *, Constant *>, 4> ResultsTy;
     ResultsTy Results;
     if (!GetCaseResults(SI, CaseVal, CI.getCaseSuccessor(), &CommonDest,
-                        Results, DL))
+                        Results, DL, TTI))
       return false;
 
     // Append the result from this case to the list for each phi.
@@ -4886,8 +5234,9 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   // If the table has holes, we need a constant result for the default case
   // or a bitmask that fits in a register.
   SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
-  bool HasDefaultResults = GetCaseResults(SI, nullptr, SI->getDefaultDest(),
-                                          &CommonDest, DefaultResultsList, DL);
+  bool HasDefaultResults =
+      GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
+                     DefaultResultsList, DL, TTI);
 
   bool NeedMask = (TableHasHoles && !HasDefaultResults);
   if (NeedMask) {
@@ -5044,6 +5393,111 @@ static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
   return true;
 }
 
+static bool isSwitchDense(ArrayRef<int64_t> Values) {
+  // See also SelectionDAGBuilder::isDense(), which this function was based on.
+  uint64_t Diff = (uint64_t)Values.back() - (uint64_t)Values.front();
+  uint64_t Range = Diff + 1;
+  uint64_t NumCases = Values.size();
+  // 40% is the default density for building a jump table in optsize/minsize mode.
+  uint64_t MinDensity = 40;
+
+  return NumCases * 100 >= Range * MinDensity;
+}
+
+// Try and transform a switch that has "holes" in it to a contiguous sequence
+// of cases.
+//
+// A switch such as: switch(i) {case 5: case 9: case 13: case 17:} can be
+// range-reduced to: switch ((i-5) / 4) {case 0: case 1: case 2: case 3:}.
+//
+// This converts a sparse switch into a dense switch which allows better
+// lowering and could also allow transforming into a lookup table.
+static bool ReduceSwitchRange(SwitchInst *SI, IRBuilder<> &Builder,
+                              const DataLayout &DL,
+                              const TargetTransformInfo &TTI) {
+  auto *CondTy = cast<IntegerType>(SI->getCondition()->getType());
+  if (CondTy->getIntegerBitWidth() > 64 ||
+      !DL.fitsInLegalInteger(CondTy->getIntegerBitWidth()))
+    return false;
+  // Only bother with this optimization if there are more than 3 switch cases;
+  // SDAG will only bother creating jump tables for 4 or more cases.
+  if (SI->getNumCases() < 4)
+    return false;
+
+  // This transform is agnostic to the signedness of the input or case values. We
+  // can treat the case values as signed or unsigned. We can optimize more common
+  // cases such as a sequence crossing zero {-4,0,4,8} if we interpret case values
+  // as signed.
+  SmallVector<int64_t,4> Values;
+  for (auto &C : SI->cases())
+    Values.push_back(C.getCaseValue()->getValue().getSExtValue());
+  std::sort(Values.begin(), Values.end());
+
+  // If the switch is already dense, there's nothing useful to do here.
+  if (isSwitchDense(Values))
+    return false;
+
+  // First, transform the values such that they start at zero and ascend.
+  int64_t Base = Values[0];
+  for (auto &V : Values)
+    V -= Base;
+
+  // Now we have signed numbers that have been shifted so that, given enough
+  // precision, there are no negative values. Since the rest of the transform
+  // is bitwise only, we switch now to an unsigned representation.
+  uint64_t GCD = 0;
+  for (auto &V : Values)
+    GCD = GreatestCommonDivisor64(GCD, (uint64_t)V);
+
+  // This transform can be done speculatively because it is so cheap - it results
+  // in a single rotate operation being inserted. This can only happen if the
+  // factor extracted is a power of 2.
+  // FIXME: If the GCD is an odd number we can multiply by the multiplicative
+  // inverse of GCD and then perform this transform.
+  // FIXME: It's possible that optimizing a switch on powers of two might also
+  // be beneficial - flag values are often powers of two and we could use a CLZ
+  // as the key function.
+  if (GCD <= 1 || !isPowerOf2_64(GCD))
+    // No common divisor found or too expensive to compute key function.
+    return false;
+
+  unsigned Shift = Log2_64(GCD);
+  for (auto &V : Values)
+    V = (int64_t)((uint64_t)V >> Shift);
+
+  if (!isSwitchDense(Values))
+    // Transform didn't create a dense switch.
+    return false;
+
+  // The obvious transform is to shift the switch condition right and emit a
+  // check that the condition actually cleanly divided by GCD, i.e.
+  //   C & (1 << Shift - 1) == 0
+  // inserting a new CFG edge to handle the case where it didn't divide cleanly.
+  //
+  // A cheaper way of doing this is a simple ROTR(C, Shift). This performs the
+  // shift and puts the shifted-off bits in the uppermost bits. If any of these
+  // are nonzero then the switch condition will be very large and will hit the
+  // default case.
+
+  auto *Ty = cast<IntegerType>(SI->getCondition()->getType());
+  Builder.SetInsertPoint(SI);
+  auto *ShiftC = ConstantInt::get(Ty, Shift);
+  auto *Sub = Builder.CreateSub(SI->getCondition(), ConstantInt::get(Ty, Base));
+  auto *LShr = Builder.CreateLShr(Sub, ShiftC);
+  auto *Shl = Builder.CreateShl(Sub, Ty->getBitWidth() - Shift);
+  auto *Rot = Builder.CreateOr(LShr, Shl);
+  SI->replaceUsesOfWith(SI->getCondition(), Rot);
+
+  for (SwitchInst::CaseIt C = SI->case_begin(), E = SI->case_end(); C != E;
+       ++C) {
+    auto *Orig = C.getCaseValue();
+    auto Sub = Orig->getValue() - APInt(Ty->getBitWidth(), Base);
+    C.setValue(
+        cast<ConstantInt>(ConstantInt::get(Ty, Sub.lshr(ShiftC->getValue()))));
+  }
+  return true;
+}
+
 bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   BasicBlock *BB = SI->getParent();
 
@@ -5078,7 +5532,7 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (EliminateDeadSwitchCases(SI, AC, DL))
     return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
-  if (SwitchToSelect(SI, Builder, AC, DL))
+  if (SwitchToSelect(SI, Builder, AC, DL, TTI))
     return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
   if (ForwardSwitchConditionToPHI(SI))
@@ -5087,6 +5541,9 @@ bool SimplifyCFGOpt::SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder) {
   if (SwitchToLookupTable(SI, Builder, DL, TTI))
     return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
 
+  if (ReduceSwitchRange(SI, Builder, DL, TTI))
+    return SimplifyCFG(BB, TTI, BonusInstThreshold, AC) | true;
+
   return false;
 }
 
@@ -5397,7 +5854,10 @@ static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I) {
 
     // Now make sure that there are no instructions in between that can alter
     // control flow (eg. calls)
-    for (BasicBlock::iterator i = ++BasicBlock::iterator(I); &*i != Use; ++i)
+    for (BasicBlock::iterator
+             i = ++BasicBlock::iterator(I),
+             UI = BasicBlock::iterator(dyn_cast<Instruction>(Use));
+         i != UI; ++i)
       if (i == I->getParent()->end() || i->mayHaveSideEffects())
         return false;
 
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
index df299067094f..1220490123ce 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyInstructions.cpp
@@ -34,14 +34,14 @@ using namespace llvm;
 
 STATISTIC(NumSimplified, "Number of redundant instructions removed");
 
-static bool runImpl(Function &F, const DominatorTree *DT, const TargetLibraryInfo *TLI,
-                    AssumptionCache *AC) {
+static bool runImpl(Function &F, const DominatorTree *DT,
+                    const TargetLibraryInfo *TLI, AssumptionCache *AC) {
   const DataLayout &DL = F.getParent()->getDataLayout();
-  SmallPtrSet<const Instruction*, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
+  SmallPtrSet<const Instruction *, 8> S1, S2, *ToSimplify = &S1, *Next = &S2;
   bool Changed = false;
 
   do {
-    for (BasicBlock *BB : depth_first(&F.getEntryBlock()))
+    for (BasicBlock *BB : depth_first(&F.getEntryBlock())) {
       // Here be subtlety: the iterator must be incremented before the loop
       // body (not sure why), so a range-for loop won't work here.
       for (BasicBlock::iterator BI = BB->begin(), BE = BB->end(); BI != BE;) {
@@ -51,8 +51,9 @@ static bool runImpl(Function &F, const DominatorTree *DT, const TargetLibraryInf
         // empty and we only bother simplifying instructions that are in it.
         if (!ToSimplify->empty() && !ToSimplify->count(I))
           continue;
+
         // Don't waste time simplifying unused instructions.
-        if (!I->use_empty())
+        if (!I->use_empty()) {
           if (Value *V = SimplifyInstruction(I, DL, TLI, DT, AC)) {
             // Mark all uses for resimplification next time round the loop.
             for (User *U : I->users())
@@ -61,16 +62,17 @@ static bool runImpl(Function &F, const DominatorTree *DT, const TargetLibraryInf
             ++NumSimplified;
             Changed = true;
           }
-        bool res = RecursivelyDeleteTriviallyDeadInstructions(I, TLI);
-        if (res)  {
-          // RecursivelyDeleteTriviallyDeadInstruction can remove
-          // more than one instruction, so simply incrementing the
-          // iterator does not work. When instructions get deleted
-          // re-iterate instead.
-          BI = BB->begin(); BE = BB->end();
-          Changed |= res;
+        }
+        if (RecursivelyDeleteTriviallyDeadInstructions(I, TLI)) {
+          // RecursivelyDeleteTriviallyDeadInstruction can remove more than one
+          // instruction, so simply incrementing the iterator does not work.
+          // When instructions get deleted re-iterate instead.
+          BI = BB->begin();
+          BE = BB->end();
+          Changed = true;
         }
       }
+    }
 
     // Place the list of instructions to simplify on the next loop iteration
     // into ToSimplify.
@@ -90,6 +92,7 @@ namespace {
 
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.setPreservesCFG();
+      AU.addRequired<DominatorTreeWrapperPass>();
       AU.addRequired<AssumptionCacheTracker>();
       AU.addRequired<TargetLibraryInfoWrapperPass>();
     }
@@ -99,9 +102,8 @@ namespace {
       if (skipFunction(F))
         return false;
 
-      const DominatorTreeWrapperPass *DTWP =
-          getAnalysisIfAvailable<DominatorTreeWrapperPass>();
-      const DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+      const DominatorTree *DT =
+          &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
       const TargetLibraryInfo *TLI =
           &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
       AssumptionCache *AC =
@@ -115,6 +117,7 @@ char InstSimplifier::ID = 0;
 INITIALIZE_PASS_BEGIN(InstSimplifier, "instsimplify",
                       "Remove redundant instructions", false, false)
 INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(InstSimplifier, "instsimplify",
                     "Remove redundant instructions", false, false)
@@ -126,11 +129,11 @@ FunctionPass *llvm::createInstructionSimplifierPass() {
 }
 
 PreservedAnalyses InstSimplifierPass::run(Function &F,
-                                      AnalysisManager<Function> &AM) {
-  auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
+                                      FunctionAnalysisManager &AM) {
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
   auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
   auto &AC = AM.getResult<AssumptionAnalysis>(F);
-  bool Changed = runImpl(F, DT, &TLI, &AC);
+  bool Changed = runImpl(F, &DT, &TLI, &AC);
   if (!Changed)
     return PreservedAnalyses::all();
   // FIXME: This should also 'preserve the CFG'.
diff --git a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index c2986951e48f..c8f030f7eb83 100644
--- a/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -56,6 +56,38 @@ static bool ignoreCallingConv(LibFunc::Func Func) {
          Func == LibFunc::llabs || Func == LibFunc::strlen;
 }
 
+static bool isCallingConvCCompatible(CallInst *CI) {
+  switch(CI->getCallingConv()) {
+  default:
+    return false;
+  case llvm::CallingConv::C:
+    return true;
+  case llvm::CallingConv::ARM_APCS:
+  case llvm::CallingConv::ARM_AAPCS:
+  case llvm::CallingConv::ARM_AAPCS_VFP: {
+
+    // The iOS ABI diverges from the standard in some cases, so for now don't
+    // try to simplify those calls.
+    if (Triple(CI->getModule()->getTargetTriple()).isiOS())
+      return false;
+
+    auto *FuncTy = CI->getFunctionType();
+
+    if (!FuncTy->getReturnType()->isPointerTy() &&
+        !FuncTy->getReturnType()->isIntegerTy() &&
+        !FuncTy->getReturnType()->isVoidTy())
+      return false;
+
+    for (auto Param : FuncTy->params()) {
+      if (!Param->isPointerTy() && !Param->isIntegerTy())
+        return false;
+    }
+    return true;
+  }
+  }
+  return false;
+}
+
 /// Return true if it only matters that the value is equal or not-equal to zero.
 static bool isOnlyUsedInZeroEqualityComparison(Value *V) {
   for (User *U : V->users()) {
@@ -83,7 +115,7 @@ static bool isOnlyUsedInEqualityComparison(Value *V, Value *With) {
 }
 
 static bool callHasFloatingPointArgument(const CallInst *CI) {
-  return std::any_of(CI->op_begin(), CI->op_end(), [](const Use &OI) {
+  return any_of(CI->operands(), [](const Use &OI) {
     return OI->getType()->isFloatingPointTy();
   });
 }
@@ -868,7 +900,7 @@ static Value *valueHasFloatPrecision(Value *Val) {
   if (ConstantFP *Const = dyn_cast<ConstantFP>(Val)) {
     APFloat F = Const->getValueAPF();
     bool losesInfo;
-    (void)F.convert(APFloat::IEEEsingle, APFloat::rmNearestTiesToEven,
+    (void)F.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
                     &losesInfo);
     if (!losesInfo)
       return ConstantFP::get(Const->getContext(), F);
@@ -993,16 +1025,20 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     Ret = optimizeUnaryDoubleFP(CI, B, true);
 
   Value *Op1 = CI->getArgOperand(0), *Op2 = CI->getArgOperand(1);
+
+  // pow(1.0, x) -> 1.0
+  if (match(Op1, m_SpecificFP(1.0)))
+    return Op1;
+  // pow(2.0, x) -> llvm.exp2(x)
+  if (match(Op1, m_SpecificFP(2.0))) {
+    Value *Exp2 = Intrinsic::getDeclaration(CI->getModule(), Intrinsic::exp2,
+                                            CI->getType());
+    return B.CreateCall(Exp2, Op2, "exp2");
+  }
+
+  // There's no llvm.exp10 intrinsic yet, but, maybe, some day there will
+  // be one.
   if (ConstantFP *Op1C = dyn_cast<ConstantFP>(Op1)) {
-    // pow(1.0, x) -> 1.0
-    if (Op1C->isExactlyValue(1.0))
-      return Op1C;
-    // pow(2.0, x) -> exp2(x)
-    if (Op1C->isExactlyValue(2.0) &&
-        hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp2, LibFunc::exp2f,
-                        LibFunc::exp2l))
-      return emitUnaryFloatFnCall(Op2, TLI->getName(LibFunc::exp2), B,
-                                  Callee->getAttributes());
     // pow(10.0, x) -> exp10(x)
     if (Op1C->isExactlyValue(10.0) &&
         hasUnaryFloatFn(TLI, Op1->getType(), LibFunc::exp10, LibFunc::exp10f,
@@ -1048,6 +1084,9 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     if (CI->hasUnsafeAlgebra()) {
       IRBuilder<>::FastMathFlagGuard Guard(B);
       B.setFastMathFlags(CI->getFastMathFlags());
+
+      // Unlike other math intrinsics, sqrt has differerent semantics
+      // from the libc function. See LangRef for details.
       return emitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
                                   Callee->getAttributes());
     }
@@ -1090,7 +1129,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
     // We cannot readily convert a non-double type (like float) to a double.
     // So we first convert V to something which could be converted to double.
     bool ignored;
-    V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
+    V.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &ignored);
     
     // TODO: Should the new instructions propagate the 'fast' flag of the pow()?
     Value *FMul = getPow(InnerChain, V.convertToDouble(), B);
@@ -1428,6 +1467,12 @@ Value *LibCallSimplifier::optimizeSinCosPi(CallInst *CI, IRBuilder<> &B) {
   Value *Sin, *Cos, *SinCos;
   insertSinCosCall(B, CI->getCalledFunction(), Arg, IsFloat, Sin, Cos, SinCos);
 
+  auto replaceTrigInsts = [this](SmallVectorImpl<CallInst *> &Calls,
+                                 Value *Res) {
+    for (CallInst *C : Calls)
+      replaceAllUsesWith(C, Res);
+  };
+
   replaceTrigInsts(SinCalls, Sin);
   replaceTrigInsts(CosCalls, Cos);
   replaceTrigInsts(SinCosCalls, SinCos);
@@ -1472,32 +1517,16 @@ void LibCallSimplifier::classifyArgUse(
   }
 }
 
-void LibCallSimplifier::replaceTrigInsts(SmallVectorImpl<CallInst *> &Calls,
-                                         Value *Res) {
-  for (CallInst *C : Calls)
-    replaceAllUsesWith(C, Res);
-}
-
 //===----------------------------------------------------------------------===//
 // Integer Library Call Optimizations
 //===----------------------------------------------------------------------===//
 
 Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
-  Function *Callee = CI->getCalledFunction();
-  Value *Op = CI->getArgOperand(0);
-
-  // Constant fold.
-  if (ConstantInt *CI = dyn_cast<ConstantInt>(Op)) {
-    if (CI->isZero()) // ffs(0) -> 0.
-      return B.getInt32(0);
-    // ffs(c) -> cttz(c)+1
-    return B.getInt32(CI->getValue().countTrailingZeros() + 1);
-  }
-
   // ffs(x) -> x != 0 ? (i32)llvm.cttz(x)+1 : 0
+  Value *Op = CI->getArgOperand(0);
   Type *ArgType = Op->getType();
-  Value *F =
-      Intrinsic::getDeclaration(Callee->getParent(), Intrinsic::cttz, ArgType);
+  Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+                                       Intrinsic::cttz, ArgType);
   Value *V = B.CreateCall(F, {Op, B.getTrue()}, "cttz");
   V = B.CreateAdd(V, ConstantInt::get(V->getType(), 1));
   V = B.CreateIntCast(V, B.getInt32Ty(), false);
@@ -1506,6 +1535,18 @@ Value *LibCallSimplifier::optimizeFFS(CallInst *CI, IRBuilder<> &B) {
   return B.CreateSelect(Cond, V, B.getInt32(0));
 }
 
+Value *LibCallSimplifier::optimizeFls(CallInst *CI, IRBuilder<> &B) {
+  // fls(x) -> (i32)(sizeInBits(x) - llvm.ctlz(x, false))
+  Value *Op = CI->getArgOperand(0);
+  Type *ArgType = Op->getType();
+  Value *F = Intrinsic::getDeclaration(CI->getCalledFunction()->getParent(),
+                                       Intrinsic::ctlz, ArgType);
+  Value *V = B.CreateCall(F, {Op, B.getFalse()}, "ctlz");
+  V = B.CreateSub(ConstantInt::get(V->getType(), ArgType->getIntegerBitWidth()),
+                  V);
+  return B.CreateIntCast(V, CI->getType(), false);
+}
+
 Value *LibCallSimplifier::optimizeAbs(CallInst *CI, IRBuilder<> &B) {
   // abs(x) -> x >s -1 ? x : -x
   Value *Op = CI->getArgOperand(0);
@@ -1891,7 +1932,7 @@ Value *LibCallSimplifier::optimizeStringMemoryLibCall(CallInst *CI,
   if (TLI->getLibFunc(*Callee, Func) && TLI->has(Func)) {
     // Make sure we never change the calling convention.
     assert((ignoreCallingConv(Func) ||
-            CI->getCallingConv() == llvm::CallingConv::C) &&
+            isCallingConvCCompatible(CI)) &&
       "Optimizing string/memory libcall would change the calling convention");
     switch (Func) {
     case LibFunc::strcat:
@@ -1958,7 +1999,7 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
   SmallVector<OperandBundleDef, 2> OpBundles;
   CI->getOperandBundlesAsDefs(OpBundles);
   IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
-  bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
+  bool isCallingConvC = isCallingConvCCompatible(CI);
 
   // Command-line parameter overrides instruction attribute.
   if (EnableUnsafeFPShrink.getNumOccurrences() > 0)
@@ -2042,6 +2083,10 @@ Value *LibCallSimplifier::optimizeCall(CallInst *CI) {
     case LibFunc::ffsl:
     case LibFunc::ffsll:
       return optimizeFFS(CI, Builder);
+    case LibFunc::fls:
+    case LibFunc::flsl:
+    case LibFunc::flsll:
+      return optimizeFls(CI, Builder);
     case LibFunc::abs:
     case LibFunc::labs:
     case LibFunc::llabs:
@@ -2314,7 +2359,7 @@ Value *FortifiedLibCallSimplifier::optimizeCall(CallInst *CI) {
   SmallVector<OperandBundleDef, 2> OpBundles;
   CI->getOperandBundlesAsDefs(OpBundles);
   IRBuilder<> Builder(CI, /*FPMathTag=*/nullptr, OpBundles);
-  bool isCallingConvC = CI->getCallingConv() == llvm::CallingConv::C;
+  bool isCallingConvC = isCallingConvCCompatible(CI);
 
   // First, check that this is a known library functions and that the prototype
   // is correct.
diff --git a/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
new file mode 100644
index 000000000000..f3d3fadb51e9
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/StripGCRelocates.cpp
@@ -0,0 +1,80 @@
+//===- StripGCRelocates.cpp - Remove gc.relocates inserted by RewriteStatePoints===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This is a little utility pass that removes the gc.relocates inserted by
+// RewriteStatepointsForGC. Note that the generated IR is incorrect,
+// but this is useful as a single pass in itself, for analysis of IR, without
+// the GC.relocates. The statepoint and gc.result instrinsics would still be
+// present.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Statepoint.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+namespace {
+struct StripGCRelocates : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  StripGCRelocates() : FunctionPass(ID) {
+    initializeStripGCRelocatesPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &Info) const override {}
+
+  bool runOnFunction(Function &F) override;
+
+};
+char StripGCRelocates::ID = 0;
+}
+
+bool StripGCRelocates::runOnFunction(Function &F) {
+  // Nothing to do for declarations.
+  if (F.isDeclaration())
+    return false;
+  SmallVector<GCRelocateInst *, 20> GCRelocates;
+  // TODO: We currently do not handle gc.relocates that are in landing pads,
+  // i.e. not bound to a single statepoint token.
+  for (Instruction &I : instructions(F)) {
+    if (auto *GCR = dyn_cast<GCRelocateInst>(&I))
+      if (isStatepoint(GCR->getOperand(0)))
+        GCRelocates.push_back(GCR);
+  }
+  // All gc.relocates are bound to a single statepoint token. The order of
+  // visiting gc.relocates for deletion does not matter.
+  for (GCRelocateInst *GCRel : GCRelocates) {
+    Value *OrigPtr = GCRel->getDerivedPtr();
+    Value *ReplaceGCRel = OrigPtr;
+
+    // All gc_relocates are i8 addrspace(1)* typed, we need a bitcast from i8
+    // addrspace(1)* to the type of the OrigPtr, if the are not the same.
+    if (GCRel->getType() != OrigPtr->getType())
+      ReplaceGCRel = new BitCastInst(OrigPtr, GCRel->getType(), "cast", GCRel);
+
+    // Replace all uses of gc.relocate and delete the gc.relocate
+    // There maybe unncessary bitcasts back to the OrigPtr type, an instcombine
+    // pass would clear this up.
+    GCRel->replaceAllUsesWith(ReplaceGCRel);
+    GCRel->eraseFromParent();
+  }
+  return !GCRelocates.empty();
+}
+
+INITIALIZE_PASS(StripGCRelocates, "strip-gc-relocates",
+                "Strip gc.relocates inserted through RewriteStatepointsForGC",
+                true, false)
+FunctionPass *llvm::createStripGCRelocatesPass() {
+  return new StripGCRelocates();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
new file mode 100644
index 000000000000..66dbf335cb95
--- /dev/null
+++ b/contrib/llvm/lib/Transforms/Utils/StripNonLineTableDebugInfo.cpp
@@ -0,0 +1,42 @@
+//===- StripNonLineTableDebugInfo.cpp -- Strip parts of Debug Info --------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+namespace {
+
+/// This pass strips all debug info that is not related line tables.
+/// The result will be the same as if the program where compiled with
+/// -gline-tables-only.
+struct StripNonLineTableDebugInfo : public ModulePass {
+  static char ID; // Pass identification, replacement for typeid
+  StripNonLineTableDebugInfo() : ModulePass(ID) {
+    initializeStripNonLineTableDebugInfoPass(*PassRegistry::getPassRegistry());
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.setPreservesAll();
+  }
+
+  bool runOnModule(Module &M) override {
+    return llvm::stripNonLineTableDebugInfo(M);
+  }
+};
+}
+
+char StripNonLineTableDebugInfo::ID = 0;
+INITIALIZE_PASS(StripNonLineTableDebugInfo, "strip-nonlinetable-debuginfo",
+                "Strip all debug info except linetables", false, false)
+
+ModulePass *llvm::createStripNonLineTableDebugInfoPass() {
+  return new StripNonLineTableDebugInfo();
+}
diff --git a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
index 7523ca527b68..6d136636ce70 100644
--- a/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/SymbolRewriter.cpp
@@ -58,6 +58,7 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "symbol-rewriter"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
 #include "llvm/Pass.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -68,7 +69,6 @@
 #include "llvm/Support/SourceMgr.h"
 #include "llvm/Support/YAMLParser.h"
 #include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/SymbolRewriter.h"
 
 using namespace llvm;
 using namespace SymbolRewriter;
@@ -361,9 +361,11 @@ parseRewriteFunctionDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
   // TODO see if there is a more elegant solution to selecting the rewrite
   // descriptor type
   if (!Target.empty())
-    DL->push_back(new ExplicitRewriteFunctionDescriptor(Source, Target, Naked));
+    DL->push_back(llvm::make_unique<ExplicitRewriteFunctionDescriptor>(
+        Source, Target, Naked));
   else
-    DL->push_back(new PatternRewriteFunctionDescriptor(Source, Transform));
+    DL->push_back(
+        llvm::make_unique<PatternRewriteFunctionDescriptor>(Source, Transform));
 
   return true;
 }
@@ -421,11 +423,12 @@ parseRewriteGlobalVariableDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
   }
 
   if (!Target.empty())
-    DL->push_back(new ExplicitRewriteGlobalVariableDescriptor(Source, Target,
-                                                              /*Naked*/false));
+    DL->push_back(llvm::make_unique<ExplicitRewriteGlobalVariableDescriptor>(
+        Source, Target,
+        /*Naked*/ false));
   else
-    DL->push_back(new PatternRewriteGlobalVariableDescriptor(Source,
-                                                             Transform));
+    DL->push_back(llvm::make_unique<PatternRewriteGlobalVariableDescriptor>(
+        Source, Transform));
 
   return true;
 }
@@ -483,67 +486,80 @@ parseRewriteGlobalAliasDescriptor(yaml::Stream &YS, yaml::ScalarNode *K,
   }
 
   if (!Target.empty())
-    DL->push_back(new ExplicitRewriteNamedAliasDescriptor(Source, Target,
-                                                          /*Naked*/false));
+    DL->push_back(llvm::make_unique<ExplicitRewriteNamedAliasDescriptor>(
+        Source, Target,
+        /*Naked*/ false));
   else
-    DL->push_back(new PatternRewriteNamedAliasDescriptor(Source, Transform));
+    DL->push_back(llvm::make_unique<PatternRewriteNamedAliasDescriptor>(
+        Source, Transform));
 
   return true;
 }
 
 namespace {
-class RewriteSymbols : public ModulePass {
+class RewriteSymbolsLegacyPass : public ModulePass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  RewriteSymbols();
-  RewriteSymbols(SymbolRewriter::RewriteDescriptorList &DL);
+  RewriteSymbolsLegacyPass();
+  RewriteSymbolsLegacyPass(SymbolRewriter::RewriteDescriptorList &DL);
 
   bool runOnModule(Module &M) override;
 
 private:
-  void loadAndParseMapFiles();
-
-  SymbolRewriter::RewriteDescriptorList Descriptors;
+  RewriteSymbolPass Impl;
 };
 
-char RewriteSymbols::ID = 0;
+char RewriteSymbolsLegacyPass::ID = 0;
 
-RewriteSymbols::RewriteSymbols() : ModulePass(ID) {
-  initializeRewriteSymbolsPass(*PassRegistry::getPassRegistry());
-  loadAndParseMapFiles();
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass() : ModulePass(ID), Impl() {
+  initializeRewriteSymbolsLegacyPassPass(*PassRegistry::getPassRegistry());  
 }
 
-RewriteSymbols::RewriteSymbols(SymbolRewriter::RewriteDescriptorList &DL)
-    : ModulePass(ID) {
-  Descriptors.splice(Descriptors.begin(), DL);
+RewriteSymbolsLegacyPass::RewriteSymbolsLegacyPass(
+    SymbolRewriter::RewriteDescriptorList &DL)
+    : ModulePass(ID), Impl(DL) {}
+
+bool RewriteSymbolsLegacyPass::runOnModule(Module &M) {
+  return Impl.runImpl(M);
+}
 }
 
-bool RewriteSymbols::runOnModule(Module &M) {
+namespace llvm {
+PreservedAnalyses RewriteSymbolPass::run(Module &M, ModuleAnalysisManager &AM) {
+  if (!runImpl(M))
+    return PreservedAnalyses::all();
+
+  return PreservedAnalyses::none();
+}
+
+bool RewriteSymbolPass::runImpl(Module &M) {
   bool Changed;
 
   Changed = false;
   for (auto &Descriptor : Descriptors)
-    Changed |= Descriptor.performOnModule(M);
+    Changed |= Descriptor->performOnModule(M);
 
   return Changed;
 }
 
-void RewriteSymbols::loadAndParseMapFiles() {
+void RewriteSymbolPass::loadAndParseMapFiles() {
   const std::vector<std::string> MapFiles(RewriteMapFiles);
-  SymbolRewriter::RewriteMapParser parser;
+  SymbolRewriter::RewriteMapParser Parser;
 
   for (const auto &MapFile : MapFiles)
-    parser.parse(MapFile, &Descriptors);
+    Parser.parse(MapFile, &Descriptors);
 }
 }
 
-INITIALIZE_PASS(RewriteSymbols, "rewrite-symbols", "Rewrite Symbols", false,
-                false)
+INITIALIZE_PASS(RewriteSymbolsLegacyPass, "rewrite-symbols", "Rewrite Symbols",
+                false, false)
 
-ModulePass *llvm::createRewriteSymbolsPass() { return new RewriteSymbols(); }
+ModulePass *llvm::createRewriteSymbolsPass() {
+  return new RewriteSymbolsLegacyPass();
+}
 
 ModulePass *
 llvm::createRewriteSymbolsPass(SymbolRewriter::RewriteDescriptorList &DL) {
-  return new RewriteSymbols(DL);
+  return new RewriteSymbolsLegacyPass(DL);
 }
diff --git a/contrib/llvm/lib/Transforms/Utils/Utils.cpp b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
index 8f85f19efe38..7b9de2eadc61 100644
--- a/contrib/llvm/lib/Transforms/Utils/Utils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/Utils.cpp
@@ -25,16 +25,19 @@ void llvm::initializeTransformUtils(PassRegistry &Registry) {
   initializeBreakCriticalEdgesPass(Registry);
   initializeInstNamerPass(Registry);
   initializeLCSSAWrapperPassPass(Registry);
+  initializeLibCallsShrinkWrapLegacyPassPass(Registry);
   initializeLoopSimplifyPass(Registry);
-  initializeLowerInvokePass(Registry);
+  initializeLowerInvokeLegacyPassPass(Registry);
   initializeLowerSwitchPass(Registry);
-  initializeNameAnonFunctionPass(Registry);
+  initializeNameAnonGlobalLegacyPassPass(Registry);
   initializePromoteLegacyPassPass(Registry);
+  initializeStripNonLineTableDebugInfoPass(Registry);
   initializeUnifyFunctionExitNodesPass(Registry);
   initializeInstSimplifierPass(Registry);
   initializeMetaRenamerPass(Registry);
   initializeMemorySSAWrapperPassPass(Registry);
   initializeMemorySSAPrinterLegacyPassPass(Registry);
+  initializeStripGCRelocatesPass(Registry);
 }
 
 /// LLVMInitializeTransformUtils - C binding for initializeTransformUtilsPasses.
diff --git a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
index 2eade8cbe8ef..0e9baaf8649d 100644
--- a/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp
@@ -38,15 +38,6 @@ struct DelayedBasicBlock {
   BasicBlock *OldBB;
   std::unique_ptr<BasicBlock> TempBB;
 
-  // Explicit move for MSVC.
-  DelayedBasicBlock(DelayedBasicBlock &&X)
-      : OldBB(std::move(X.OldBB)), TempBB(std::move(X.TempBB)) {}
-  DelayedBasicBlock &operator=(DelayedBasicBlock &&X) {
-    OldBB = std::move(X.OldBB);
-    TempBB = std::move(X.TempBB);
-    return *this;
-  }
-
   DelayedBasicBlock(const BlockAddress &Old)
       : OldBB(Old.getBasicBlock()),
         TempBB(BasicBlock::Create(Old.getContext())) {}
@@ -184,17 +175,6 @@ class MDNodeMapper {
     bool HasChanged = false;
     unsigned ID = ~0u;
     TempMDNode Placeholder;
-
-    Data() {}
-    Data(Data &&X)
-        : HasChanged(std::move(X.HasChanged)), ID(std::move(X.ID)),
-          Placeholder(std::move(X.Placeholder)) {}
-    Data &operator=(Data &&X) {
-      HasChanged = std::move(X.HasChanged);
-      ID = std::move(X.ID);
-      Placeholder = std::move(X.Placeholder);
-      return *this;
-    }
   };
 
   /// A graph of uniqued nodes.
@@ -671,7 +651,7 @@ void MDNodeMapper::UniquedGraph::propagateChanges() {
       if (D.HasChanged)
         continue;
 
-      if (!llvm::any_of(N->operands(), [&](const Metadata *Op) {
+      if (none_of(N->operands(), [&](const Metadata *Op) {
             auto Where = Info.find(Op);
             return Where != Info.end() && Where->second.HasChanged;
           }))
diff --git a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
index af594cb751aa..c01740b27d59 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/BBVectorize.cpp
@@ -3148,7 +3148,7 @@ namespace {
                              LLVMContext::MD_noalias, LLVMContext::MD_fpmath,
                              LLVMContext::MD_invariant_group};
       combineMetadata(K, H, KnownIDs);
-      K->intersectOptionalDataWith(H);
+      K->andIRFlags(H);
 
       for (unsigned o = 0; o < NumOperands; ++o)
         K->setOperand(o, ReplacedOperands[o]);
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
index c8906bde15e0..c44a393cf846 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoadStoreVectorizer.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/OrderedBasicBlock.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
@@ -30,6 +31,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Vectorize.h"
 
 using namespace llvm;
@@ -40,13 +42,12 @@ STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
 
 namespace {
 
-// TODO: Remove this
-static const unsigned TargetBaseAlign = 4;
+// FIXME: Assuming stack alignment of 4 is always good enough
+static const unsigned StackAdjustedAlignment = 4;
+typedef SmallVector<Instruction *, 8> InstrList;
+typedef MapVector<Value *, InstrList> InstrListMap;
 
 class Vectorizer {
-  typedef SmallVector<Value *, 8> ValueList;
-  typedef MapVector<Value *, ValueList> ValueListMap;
-
   Function &F;
   AliasAnalysis &AA;
   DominatorTree &DT;
@@ -54,8 +55,6 @@ class Vectorizer {
   TargetTransformInfo &TTI;
   const DataLayout &DL;
   IRBuilder<> Builder;
-  ValueListMap StoreRefs;
-  ValueListMap LoadRefs;
 
 public:
   Vectorizer(Function &F, AliasAnalysis &AA, DominatorTree &DT,
@@ -94,45 +93,47 @@ private:
 
   /// Returns the first and the last instructions in Chain.
   std::pair<BasicBlock::iterator, BasicBlock::iterator>
-  getBoundaryInstrs(ArrayRef<Value *> Chain);
+  getBoundaryInstrs(ArrayRef<Instruction *> Chain);
 
   /// Erases the original instructions after vectorizing.
-  void eraseInstructions(ArrayRef<Value *> Chain);
+  void eraseInstructions(ArrayRef<Instruction *> Chain);
 
   /// "Legalize" the vector type that would be produced by combining \p
   /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
   /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
   /// expected to have more than 4 elements.
-  std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
-  splitOddVectorElts(ArrayRef<Value *> Chain, unsigned ElementSizeBits);
+  std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
+  splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
 
-  /// Checks for instructions which may affect the memory accessed
-  /// in the chain between \p From and \p To. Returns Index, where
-  /// \p Chain[0, Index) is the largest vectorizable chain prefix.
-  /// The elements of \p Chain should be all loads or all stores.
-  unsigned getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
-                                       BasicBlock::iterator From,
-                                       BasicBlock::iterator To);
+  /// Finds the largest prefix of Chain that's vectorizable, checking for
+  /// intervening instructions which may affect the memory accessed by the
+  /// instructions within Chain.
+  ///
+  /// The elements of \p Chain must be all loads or all stores and must be in
+  /// address order.
+  ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
 
   /// Collects load and store instructions to vectorize.
-  void collectInstructions(BasicBlock *BB);
+  std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
 
-  /// Processes the collected instructions, the \p Map. The elements of \p Map
+  /// Processes the collected instructions, the \p Map. The values of \p Map
   /// should be all loads or all stores.
-  bool vectorizeChains(ValueListMap &Map);
+  bool vectorizeChains(InstrListMap &Map);
 
   /// Finds the load/stores to consecutive memory addresses and vectorizes them.
-  bool vectorizeInstructions(ArrayRef<Value *> Instrs);
+  bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
 
   /// Vectorizes the load instructions in Chain.
-  bool vectorizeLoadChain(ArrayRef<Value *> Chain,
-                          SmallPtrSet<Value *, 16> *InstructionsProcessed);
+  bool
+  vectorizeLoadChain(ArrayRef<Instruction *> Chain,
+                     SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
 
   /// Vectorizes the store instructions in Chain.
-  bool vectorizeStoreChain(ArrayRef<Value *> Chain,
-                           SmallPtrSet<Value *, 16> *InstructionsProcessed);
+  bool
+  vectorizeStoreChain(ArrayRef<Instruction *> Chain,
+                      SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
 
-  /// Check if this load/store access is misaligned accesses
+  /// Check if this load/store access is misaligned accesses.
   bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
                           unsigned Alignment);
 };
@@ -147,7 +148,7 @@ public:
 
   bool runOnFunction(Function &F) override;
 
-  const char *getPassName() const override {
+  StringRef getPassName() const override {
     return "GPU Load and Store Vectorizer";
   }
 
@@ -177,6 +178,13 @@ Pass *llvm::createLoadStoreVectorizerPass() {
   return new LoadStoreVectorizer();
 }
 
+// The real propagateMetadata expects a SmallVector<Value*>, but we deal in
+// vectors of Instructions.
+static void propagateMetadata(Instruction *I, ArrayRef<Instruction *> IL) {
+  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
+  propagateMetadata(I, VL);
+}
+
 bool LoadStoreVectorizer::runOnFunction(Function &F) {
   // Don't vectorize when the attribute NoImplicitFloat is used.
   if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
@@ -198,7 +206,8 @@ bool Vectorizer::run() {
 
   // Scan the blocks in the function in post order.
   for (BasicBlock *BB : post_order(&F)) {
-    collectInstructions(BB);
+    InstrListMap LoadRefs, StoreRefs;
+    std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
     Changed |= vectorizeChains(LoadRefs);
     Changed |= vectorizeChains(StoreRefs);
   }
@@ -338,6 +347,7 @@ bool Vectorizer::isConsecutiveAccess(Value *A, Value *B) {
 }
 
 void Vectorizer::reorder(Instruction *I) {
+  OrderedBasicBlock OBB(I->getParent());
   SmallPtrSet<Instruction *, 16> InstructionsToMove;
   SmallVector<Instruction *, 16> Worklist;
 
@@ -350,11 +360,14 @@ void Vectorizer::reorder(Instruction *I) {
       if (!IM || IM->getOpcode() == Instruction::PHI)
         continue;
 
-      if (!DT.dominates(IM, I)) {
+      // If IM is in another BB, no need to move it, because this pass only
+      // vectorizes instructions within one BB.
+      if (IM->getParent() != I->getParent())
+        continue;
+
+      if (!OBB.dominates(IM, I)) {
         InstructionsToMove.insert(IM);
         Worklist.push_back(IM);
-        assert(IM->getParent() == IW->getParent() &&
-               "Instructions to move should be in the same basic block");
       }
     }
   }
@@ -362,7 +375,7 @@ void Vectorizer::reorder(Instruction *I) {
   // All instructions to move should follow I. Start from I, not from begin().
   for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
        ++BBI) {
-    if (!is_contained(InstructionsToMove, &*BBI))
+    if (!InstructionsToMove.count(&*BBI))
       continue;
     Instruction *IM = &*BBI;
     --BBI;
@@ -372,8 +385,8 @@ void Vectorizer::reorder(Instruction *I) {
 }
 
 std::pair<BasicBlock::iterator, BasicBlock::iterator>
-Vectorizer::getBoundaryInstrs(ArrayRef<Value *> Chain) {
-  Instruction *C0 = cast<Instruction>(Chain[0]);
+Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
+  Instruction *C0 = Chain[0];
   BasicBlock::iterator FirstInstr = C0->getIterator();
   BasicBlock::iterator LastInstr = C0->getIterator();
 
@@ -397,105 +410,152 @@ Vectorizer::getBoundaryInstrs(ArrayRef<Value *> Chain) {
   return std::make_pair(FirstInstr, ++LastInstr);
 }
 
-void Vectorizer::eraseInstructions(ArrayRef<Value *> Chain) {
+void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
   SmallVector<Instruction *, 16> Instrs;
-  for (Value *V : Chain) {
-    Value *PtrOperand = getPointerOperand(V);
+  for (Instruction *I : Chain) {
+    Value *PtrOperand = getPointerOperand(I);
     assert(PtrOperand && "Instruction must have a pointer operand.");
-    Instrs.push_back(cast<Instruction>(V));
+    Instrs.push_back(I);
     if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
       Instrs.push_back(GEP);
   }
 
   // Erase instructions.
-  for (Value *V : Instrs) {
-    Instruction *Instr = cast<Instruction>(V);
-    if (Instr->use_empty())
-      Instr->eraseFromParent();
-  }
+  for (Instruction *I : Instrs)
+    if (I->use_empty())
+      I->eraseFromParent();
 }
 
-std::pair<ArrayRef<Value *>, ArrayRef<Value *>>
-Vectorizer::splitOddVectorElts(ArrayRef<Value *> Chain,
+std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
+Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
                                unsigned ElementSizeBits) {
-  unsigned ElemSizeInBytes = ElementSizeBits / 8;
-  unsigned SizeInBytes = ElemSizeInBytes * Chain.size();
-  unsigned NumRight = (SizeInBytes % 4) / ElemSizeInBytes;
-  unsigned NumLeft = Chain.size() - NumRight;
+  unsigned ElementSizeBytes = ElementSizeBits / 8;
+  unsigned SizeBytes = ElementSizeBytes * Chain.size();
+  unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
+  if (NumLeft == Chain.size())
+    --NumLeft;
+  else if (NumLeft == 0)
+    NumLeft = 1;
   return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
 }
 
-unsigned Vectorizer::getVectorizablePrefixEndIdx(ArrayRef<Value *> Chain,
-                                                 BasicBlock::iterator From,
-                                                 BasicBlock::iterator To) {
-  SmallVector<std::pair<Value *, unsigned>, 16> MemoryInstrs;
-  SmallVector<std::pair<Value *, unsigned>, 16> ChainInstrs;
+ArrayRef<Instruction *>
+Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
+  // These are in BB order, unlike Chain, which is in address order.
+  SmallVector<Instruction *, 16> MemoryInstrs;
+  SmallVector<Instruction *, 16> ChainInstrs;
+
+  bool IsLoadChain = isa<LoadInst>(Chain[0]);
+  DEBUG({
+    for (Instruction *I : Chain) {
+      if (IsLoadChain)
+        assert(isa<LoadInst>(I) &&
+               "All elements of Chain must be loads, or all must be stores.");
+      else
+        assert(isa<StoreInst>(I) &&
+               "All elements of Chain must be loads, or all must be stores.");
+    }
+  });
 
-  unsigned InstrIdx = 0;
-  for (auto I = From; I != To; ++I, ++InstrIdx) {
+  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
     if (isa<LoadInst>(I) || isa<StoreInst>(I)) {
-      if (!is_contained(Chain, &*I))
-        MemoryInstrs.push_back({&*I, InstrIdx});
+      if (!is_contained(Chain, &I))
+        MemoryInstrs.push_back(&I);
       else
-        ChainInstrs.push_back({&*I, InstrIdx});
-    } else if (I->mayHaveSideEffects()) {
-      DEBUG(dbgs() << "LSV: Found side-effecting operation: " << *I << '\n');
-      return 0;
+        ChainInstrs.push_back(&I);
+    } else if (IsLoadChain && (I.mayWriteToMemory() || I.mayThrow())) {
+      DEBUG(dbgs() << "LSV: Found may-write/throw operation: " << I << '\n');
+      break;
+    } else if (!IsLoadChain && (I.mayReadOrWriteMemory() || I.mayThrow())) {
+      DEBUG(dbgs() << "LSV: Found may-read/write/throw operation: " << I
+                   << '\n');
+      break;
     }
   }
 
-  assert(Chain.size() == ChainInstrs.size() &&
-         "All instructions in the Chain must exist in [From, To).");
+  OrderedBasicBlock OBB(Chain[0]->getParent());
 
-  unsigned ChainIdx = 0;
-  for (auto EntryChain : ChainInstrs) {
-    Value *ChainInstrValue = EntryChain.first;
-    unsigned ChainInstrIdx = EntryChain.second;
-    for (auto EntryMem : MemoryInstrs) {
-      Value *MemInstrValue = EntryMem.first;
-      unsigned MemInstrIdx = EntryMem.second;
-      if (isa<LoadInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue))
+  // Loop until we find an instruction in ChainInstrs that we can't vectorize.
+  unsigned ChainInstrIdx = 0;
+  Instruction *BarrierMemoryInstr = nullptr;
+
+  for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
+    Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
+
+    // If a barrier memory instruction was found, chain instructions that follow
+    // will not be added to the valid prefix.
+    if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, ChainInstr))
+      break;
+
+    // Check (in BB order) if any instruction prevents ChainInstr from being
+    // vectorized. Find and store the first such "conflicting" instruction.
+    for (Instruction *MemInstr : MemoryInstrs) {
+      // If a barrier memory instruction was found, do not check past it.
+      if (BarrierMemoryInstr && OBB.dominates(BarrierMemoryInstr, MemInstr))
+        break;
+
+      if (isa<LoadInst>(MemInstr) && isa<LoadInst>(ChainInstr))
         continue;
 
       // We can ignore the alias as long as the load comes before the store,
       // because that means we won't be moving the load past the store to
       // vectorize it (the vectorized load is inserted at the location of the
       // first load in the chain).
-      if (isa<StoreInst>(MemInstrValue) && isa<LoadInst>(ChainInstrValue) &&
-          ChainInstrIdx < MemInstrIdx)
+      if (isa<StoreInst>(MemInstr) && isa<LoadInst>(ChainInstr) &&
+          OBB.dominates(ChainInstr, MemInstr))
         continue;
 
       // Same case, but in reverse.
-      if (isa<LoadInst>(MemInstrValue) && isa<StoreInst>(ChainInstrValue) &&
-          ChainInstrIdx > MemInstrIdx)
+      if (isa<LoadInst>(MemInstr) && isa<StoreInst>(ChainInstr) &&
+          OBB.dominates(MemInstr, ChainInstr))
         continue;
 
-      Instruction *M0 = cast<Instruction>(MemInstrValue);
-      Instruction *M1 = cast<Instruction>(ChainInstrValue);
-
-      if (!AA.isNoAlias(MemoryLocation::get(M0), MemoryLocation::get(M1))) {
+      if (!AA.isNoAlias(MemoryLocation::get(MemInstr),
+                        MemoryLocation::get(ChainInstr))) {
         DEBUG({
-          Value *Ptr0 = getPointerOperand(M0);
-          Value *Ptr1 = getPointerOperand(M1);
-
-          dbgs() << "LSV: Found alias.\n"
-                    "        Aliasing instruction and pointer:\n"
-                 << *MemInstrValue << " aliases " << *Ptr0 << '\n'
-                 << "        Aliased instruction and pointer:\n"
-                 << *ChainInstrValue << " aliases " << *Ptr1 << '\n';
+          dbgs() << "LSV: Found alias:\n"
+                    "  Aliasing instruction and pointer:\n"
+                 << "  " << *MemInstr << '\n'
+                 << "  " << *getPointerOperand(MemInstr) << '\n'
+                 << "  Aliased instruction and pointer:\n"
+                 << "  " << *ChainInstr << '\n'
+                 << "  " << *getPointerOperand(ChainInstr) << '\n';
         });
-
-        return ChainIdx;
+        // Save this aliasing memory instruction as a barrier, but allow other
+        // instructions that precede the barrier to be vectorized with this one.
+        BarrierMemoryInstr = MemInstr;
+        break;
       }
     }
-    ChainIdx++;
+    // Continue the search only for store chains, since vectorizing stores that
+    // precede an aliasing load is valid. Conversely, vectorizing loads is valid
+    // up to an aliasing store, but should not pull loads from further down in
+    // the basic block.
+    if (IsLoadChain && BarrierMemoryInstr) {
+      // The BarrierMemoryInstr is a store that precedes ChainInstr.
+      assert(OBB.dominates(BarrierMemoryInstr, ChainInstr));
+      break;
+    }
   }
-  return Chain.size();
+
+  // Find the largest prefix of Chain whose elements are all in
+  // ChainInstrs[0, ChainInstrIdx).  This is the largest vectorizable prefix of
+  // Chain.  (Recall that Chain is in address order, but ChainInstrs is in BB
+  // order.)
+  SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
+      ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
+  unsigned ChainIdx = 0;
+  for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
+    if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
+      break;
+  }
+  return Chain.slice(0, ChainIdx);
 }
 
-void Vectorizer::collectInstructions(BasicBlock *BB) {
-  LoadRefs.clear();
-  StoreRefs.clear();
+std::pair<InstrListMap, InstrListMap>
+Vectorizer::collectInstructions(BasicBlock *BB) {
+  InstrListMap LoadRefs;
+  InstrListMap StoreRefs;
 
   for (Instruction &I : *BB) {
     if (!I.mayReadOrWriteMemory())
@@ -505,6 +565,10 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
       if (!LI->isSimple())
         continue;
 
+      // Skip if it's not legal.
+      if (!TTI.isLegalToVectorizeLoad(LI))
+        continue;
+
       Type *Ty = LI->getType();
       if (!VectorType::isValidElementType(Ty->getScalarType()))
         continue;
@@ -525,14 +589,11 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
 
       // Make sure all the users of a vector are constant-index extracts.
       if (isa<VectorType>(Ty) && !all_of(LI->users(), [LI](const User *U) {
-            const Instruction *UI = cast<Instruction>(U);
-            return isa<ExtractElementInst>(UI) &&
-                   isa<ConstantInt>(UI->getOperand(1));
+            const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
+            return EEI && isa<ConstantInt>(EEI->getOperand(1));
           }))
         continue;
 
-      // TODO: Target hook to filter types.
-
       // Save the load locations.
       Value *ObjPtr = GetUnderlyingObject(Ptr, DL);
       LoadRefs[ObjPtr].push_back(LI);
@@ -541,6 +602,10 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
       if (!SI->isSimple())
         continue;
 
+      // Skip if it's not legal.
+      if (!TTI.isLegalToVectorizeStore(SI))
+        continue;
+
       Type *Ty = SI->getValueOperand()->getType();
       if (!VectorType::isValidElementType(Ty->getScalarType()))
         continue;
@@ -558,9 +623,8 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
         continue;
 
       if (isa<VectorType>(Ty) && !all_of(SI->users(), [SI](const User *U) {
-            const Instruction *UI = cast<Instruction>(U);
-            return isa<ExtractElementInst>(UI) &&
-                   isa<ConstantInt>(UI->getOperand(1));
+            const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
+            return EEI && isa<ConstantInt>(EEI->getOperand(1));
           }))
         continue;
 
@@ -569,12 +633,14 @@ void Vectorizer::collectInstructions(BasicBlock *BB) {
       StoreRefs[ObjPtr].push_back(SI);
     }
   }
+
+  return {LoadRefs, StoreRefs};
 }
 
-bool Vectorizer::vectorizeChains(ValueListMap &Map) {
+bool Vectorizer::vectorizeChains(InstrListMap &Map) {
   bool Changed = false;
 
-  for (const std::pair<Value *, ValueList> &Chain : Map) {
+  for (const std::pair<Value *, InstrList> &Chain : Map) {
     unsigned Size = Chain.second.size();
     if (Size < 2)
       continue;
@@ -584,7 +650,7 @@ bool Vectorizer::vectorizeChains(ValueListMap &Map) {
     // Process the stores in chunks of 64.
     for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
       unsigned Len = std::min<unsigned>(CE - CI, 64);
-      ArrayRef<Value *> Chunk(&Chain.second[CI], Len);
+      ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
       Changed |= vectorizeInstructions(Chunk);
     }
   }
@@ -592,9 +658,9 @@ bool Vectorizer::vectorizeChains(ValueListMap &Map) {
   return Changed;
 }
 
-bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
+bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
   DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size() << " instructions.\n");
-  SmallSetVector<int, 16> Heads, Tails;
+  SmallVector<int, 16> Heads, Tails;
   int ConsecutiveChain[64];
 
   // Do a quadratic search on all of the given stores and find all of the pairs
@@ -613,34 +679,34 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
             continue; // Should not insert.
         }
 
-        Tails.insert(j);
-        Heads.insert(i);
+        Tails.push_back(j);
+        Heads.push_back(i);
         ConsecutiveChain[i] = j;
       }
     }
   }
 
   bool Changed = false;
-  SmallPtrSet<Value *, 16> InstructionsProcessed;
+  SmallPtrSet<Instruction *, 16> InstructionsProcessed;
 
   for (int Head : Heads) {
     if (InstructionsProcessed.count(Instrs[Head]))
       continue;
-    bool longerChainExists = false;
+    bool LongerChainExists = false;
     for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
       if (Head == Tails[TIt] &&
           !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
-        longerChainExists = true;
+        LongerChainExists = true;
         break;
       }
-    if (longerChainExists)
+    if (LongerChainExists)
       continue;
 
     // We found an instr that starts a chain. Now follow the chain and try to
     // vectorize it.
-    SmallVector<Value *, 16> Operands;
+    SmallVector<Instruction *, 16> Operands;
     int I = Head;
-    while (I != -1 && (Tails.count(I) || Heads.count(I))) {
+    while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
       if (InstructionsProcessed.count(Instrs[I]))
         break;
 
@@ -661,13 +727,14 @@ bool Vectorizer::vectorizeInstructions(ArrayRef<Value *> Instrs) {
 }
 
 bool Vectorizer::vectorizeStoreChain(
-    ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
+    ArrayRef<Instruction *> Chain,
+    SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
   StoreInst *S0 = cast<StoreInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole load.
   Type *StoreTy;
-  for (const auto &V : Chain) {
-    StoreTy = cast<StoreInst>(V)->getValueOperand()->getType();
+  for (Instruction *I : Chain) {
+    StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
     if (StoreTy->isIntOrIntVectorTy())
       break;
 
@@ -683,40 +750,34 @@ bool Vectorizer::vectorizeStoreChain(
   unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
   unsigned VF = VecRegSize / Sz;
   unsigned ChainSize = Chain.size();
+  unsigned Alignment = getAlignment(S0);
 
   if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
     InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
   }
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-  unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
-  if (StopChain == 0) {
-    // There exists a side effect instruction, no vectorization possible.
+  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
+  if (NewChain.empty()) {
+    // No vectorization possible.
     InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
   }
-  if (StopChain == 1) {
+  if (NewChain.size() == 1) {
     // Failed after the first instruction. Discard it and try the smaller chain.
-    InstructionsProcessed->insert(Chain.front());
+    InstructionsProcessed->insert(NewChain.front());
     return false;
   }
 
   // Update Chain to the valid vectorizable subchain.
-  Chain = Chain.slice(0, StopChain);
+  Chain = NewChain;
   ChainSize = Chain.size();
 
-  // Store size should be 1B, 2B or multiple of 4B.
-  // TODO: Target hook for size constraint?
-  unsigned SzInBytes = (Sz / 8) * ChainSize;
-  if (SzInBytes > 2 && SzInBytes % 4 != 0) {
-    DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
-                    "or multiple of 4B. Splitting.\n");
-    if (SzInBytes == 3)
-      return vectorizeStoreChain(Chain.slice(0, ChainSize - 1),
-                                 InstructionsProcessed);
-
+  // Check if it's legal to vectorize this chain. If not, split the chain and
+  // try again.
+  unsigned EltSzInBytes = Sz / 8;
+  unsigned SzInBytes = EltSzInBytes * ChainSize;
+  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
     auto Chains = splitOddVectorElts(Chain, Sz);
     return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
            vectorizeStoreChain(Chains.second, InstructionsProcessed);
@@ -730,45 +791,41 @@ bool Vectorizer::vectorizeStoreChain(
   else
     VecTy = VectorType::get(StoreTy, Chain.size());
 
-  // If it's more than the max vector size, break it into two pieces.
-  // TODO: Target hook to control types to split to.
-  if (ChainSize > VF) {
-    DEBUG(dbgs() << "LSV: Vector factor is too big."
+  // If it's more than the max vector size or the target has a better
+  // vector factor, break it into two pieces.
+  unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
+  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
+    DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
                     " Creating two separate arrays.\n");
-    return vectorizeStoreChain(Chain.slice(0, VF), InstructionsProcessed) |
-           vectorizeStoreChain(Chain.slice(VF), InstructionsProcessed);
+    return vectorizeStoreChain(Chain.slice(0, TargetVF),
+                               InstructionsProcessed) |
+           vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
   }
 
   DEBUG({
     dbgs() << "LSV: Stores to vectorize:\n";
-    for (Value *V : Chain)
-      V->dump();
+    for (Instruction *I : Chain)
+      dbgs() << "  " << *I << "\n";
   });
 
   // We won't try again to vectorize the elements of the chain, regardless of
   // whether we succeed below.
   InstructionsProcessed->insert(Chain.begin(), Chain.end());
 
-  // Check alignment restrictions.
-  unsigned Alignment = getAlignment(S0);
-
   // If the store is going to be misaligned, don't vectorize it.
   if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
     if (S0->getPointerAddressSpace() != 0)
       return false;
 
-    // If we're storing to an object on the stack, we control its alignment,
-    // so we can cheat and change it!
-    Value *V = GetUnderlyingObject(S0->getPointerOperand(), DL);
-    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
-      AI->setAlignment(TargetBaseAlign);
-      Alignment = TargetBaseAlign;
-    } else {
+    unsigned NewAlign = getOrEnforceKnownAlignment(S0->getPointerOperand(),
+                                                   StackAdjustedAlignment,
+                                                   DL, S0, nullptr, &DT);
+    if (NewAlign < StackAdjustedAlignment)
       return false;
-    }
   }
 
-  // Set insert point.
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
   Builder.SetInsertPoint(&*Last);
 
   Value *Vec = UndefValue::get(VecTy);
@@ -803,9 +860,11 @@ bool Vectorizer::vectorizeStoreChain(
     }
   }
 
-  Value *Bitcast =
-      Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS));
-  StoreInst *SI = cast<StoreInst>(Builder.CreateStore(Vec, Bitcast));
+  // This cast is safe because Builder.CreateStore() always creates a bona fide
+  // StoreInst.
+  StoreInst *SI = cast<StoreInst>(
+      Builder.CreateStore(Vec, Builder.CreateBitCast(S0->getPointerOperand(),
+                                                     VecTy->getPointerTo(AS))));
   propagateMetadata(SI, Chain);
   SI->setAlignment(Alignment);
 
@@ -816,7 +875,8 @@ bool Vectorizer::vectorizeStoreChain(
 }
 
 bool Vectorizer::vectorizeLoadChain(
-    ArrayRef<Value *> Chain, SmallPtrSet<Value *, 16> *InstructionsProcessed) {
+    ArrayRef<Instruction *> Chain,
+    SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
   LoadInst *L0 = cast<LoadInst>(Chain[0]);
 
   // If the vector has an int element, default to int for the whole load.
@@ -838,39 +898,34 @@ bool Vectorizer::vectorizeLoadChain(
   unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
   unsigned VF = VecRegSize / Sz;
   unsigned ChainSize = Chain.size();
+  unsigned Alignment = getAlignment(L0);
 
   if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
     InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
   }
 
-  BasicBlock::iterator First, Last;
-  std::tie(First, Last) = getBoundaryInstrs(Chain);
-  unsigned StopChain = getVectorizablePrefixEndIdx(Chain, First, Last);
-  if (StopChain == 0) {
-    // There exists a side effect instruction, no vectorization possible.
+  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
+  if (NewChain.empty()) {
+    // No vectorization possible.
     InstructionsProcessed->insert(Chain.begin(), Chain.end());
     return false;
   }
-  if (StopChain == 1) {
+  if (NewChain.size() == 1) {
     // Failed after the first instruction. Discard it and try the smaller chain.
-    InstructionsProcessed->insert(Chain.front());
+    InstructionsProcessed->insert(NewChain.front());
     return false;
   }
 
   // Update Chain to the valid vectorizable subchain.
-  Chain = Chain.slice(0, StopChain);
+  Chain = NewChain;
   ChainSize = Chain.size();
 
-  // Load size should be 1B, 2B or multiple of 4B.
-  // TODO: Should size constraint be a target hook?
-  unsigned SzInBytes = (Sz / 8) * ChainSize;
-  if (SzInBytes > 2 && SzInBytes % 4 != 0) {
-    DEBUG(dbgs() << "LSV: Size should be 1B, 2B "
-                    "or multiple of 4B. Splitting.\n");
-    if (SzInBytes == 3)
-      return vectorizeLoadChain(Chain.slice(0, ChainSize - 1),
-                                InstructionsProcessed);
+  // Check if it's legal to vectorize this chain. If not, split the chain and
+  // try again.
+  unsigned EltSzInBytes = Sz / 8;
+  unsigned SzInBytes = EltSzInBytes * ChainSize;
+  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
     auto Chains = splitOddVectorElts(Chain, Sz);
     return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
            vectorizeLoadChain(Chains.second, InstructionsProcessed);
@@ -884,101 +939,99 @@ bool Vectorizer::vectorizeLoadChain(
   else
     VecTy = VectorType::get(LoadTy, Chain.size());
 
-  // If it's more than the max vector size, break it into two pieces.
-  // TODO: Target hook to control types to split to.
-  if (ChainSize > VF) {
-    DEBUG(dbgs() << "LSV: Vector factor is too big. "
-                    "Creating two separate arrays.\n");
-    return vectorizeLoadChain(Chain.slice(0, VF), InstructionsProcessed) |
-           vectorizeLoadChain(Chain.slice(VF), InstructionsProcessed);
+  // If it's more than the max vector size or the target has a better
+  // vector factor, break it into two pieces.
+  unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
+  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
+    DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
+                    " Creating two separate arrays.\n");
+    return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
+           vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
   }
 
   // We won't try again to vectorize the elements of the chain, regardless of
   // whether we succeed below.
   InstructionsProcessed->insert(Chain.begin(), Chain.end());
 
-  // Check alignment restrictions.
-  unsigned Alignment = getAlignment(L0);
-
   // If the load is going to be misaligned, don't vectorize it.
   if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
     if (L0->getPointerAddressSpace() != 0)
       return false;
 
-    // If we're loading from an object on the stack, we control its alignment,
-    // so we can cheat and change it!
-    Value *V = GetUnderlyingObject(L0->getPointerOperand(), DL);
-    if (AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V)) {
-      AI->setAlignment(TargetBaseAlign);
-      Alignment = TargetBaseAlign;
-    } else {
+    unsigned NewAlign = getOrEnforceKnownAlignment(L0->getPointerOperand(),
+                                                   StackAdjustedAlignment,
+                                                   DL, L0, nullptr, &DT);
+    if (NewAlign < StackAdjustedAlignment)
       return false;
-    }
+
+    Alignment = NewAlign;
   }
 
   DEBUG({
     dbgs() << "LSV: Loads to vectorize:\n";
-    for (Value *V : Chain)
-      V->dump();
+    for (Instruction *I : Chain)
+      I->dump();
   });
 
-  // Set insert point.
+  // getVectorizablePrefix already computed getBoundaryInstrs.  The value of
+  // Last may have changed since then, but the value of First won't have.  If it
+  // matters, we could compute getBoundaryInstrs only once and reuse it here.
+  BasicBlock::iterator First, Last;
+  std::tie(First, Last) = getBoundaryInstrs(Chain);
   Builder.SetInsertPoint(&*First);
 
   Value *Bitcast =
       Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
-
+  // This cast is safe because Builder.CreateLoad always creates a bona fide
+  // LoadInst.
   LoadInst *LI = cast<LoadInst>(Builder.CreateLoad(Bitcast));
   propagateMetadata(LI, Chain);
   LI->setAlignment(Alignment);
 
   if (VecLoadTy) {
     SmallVector<Instruction *, 16> InstrsToErase;
-    SmallVector<Instruction *, 16> InstrsToReorder;
-    InstrsToReorder.push_back(cast<Instruction>(Bitcast));
 
     unsigned VecWidth = VecLoadTy->getNumElements();
     for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
       for (auto Use : Chain[I]->users()) {
+        // All users of vector loads are ExtractElement instructions with
+        // constant indices, otherwise we would have bailed before now.
         Instruction *UI = cast<Instruction>(Use);
         unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
         unsigned NewIdx = Idx + I * VecWidth;
-        Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx));
-        Instruction *Extracted = cast<Instruction>(V);
-        if (Extracted->getType() != UI->getType())
-          Extracted = cast<Instruction>(
-              Builder.CreateBitCast(Extracted, UI->getType()));
+        Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
+                                                UI->getName());
+        if (V->getType() != UI->getType())
+          V = Builder.CreateBitCast(V, UI->getType());
 
         // Replace the old instruction.
-        UI->replaceAllUsesWith(Extracted);
+        UI->replaceAllUsesWith(V);
         InstrsToErase.push_back(UI);
       }
     }
 
-    for (Instruction *ModUser : InstrsToReorder)
-      reorder(ModUser);
+    // Bitcast might not be an Instruction, if the value being loaded is a
+    // constant.  In that case, no need to reorder anything.
+    if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
+      reorder(BitcastInst);
 
     for (auto I : InstrsToErase)
       I->eraseFromParent();
   } else {
-    SmallVector<Instruction *, 16> InstrsToReorder;
-    InstrsToReorder.push_back(cast<Instruction>(Bitcast));
-
     for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
-      Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(I));
-      Instruction *Extracted = cast<Instruction>(V);
-      Instruction *UI = cast<Instruction>(Chain[I]);
-      if (Extracted->getType() != UI->getType()) {
-        Extracted = cast<Instruction>(
-            Builder.CreateBitOrPointerCast(Extracted, UI->getType()));
+      Value *CV = Chain[I];
+      Value *V =
+          Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
+      if (V->getType() != CV->getType()) {
+        V = Builder.CreateBitOrPointerCast(V, CV->getType());
       }
 
       // Replace the old instruction.
-      UI->replaceAllUsesWith(Extracted);
+      CV->replaceAllUsesWith(V);
     }
 
-    for (Instruction *ModUser : InstrsToReorder)
-      reorder(ModUser);
+    if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
+      reorder(BitcastInst);
   }
 
   eraseInstructions(Chain);
@@ -990,10 +1043,14 @@ bool Vectorizer::vectorizeLoadChain(
 
 bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
                                     unsigned Alignment) {
+  if (Alignment % SzInBytes == 0)
+    return false;
+
   bool Fast = false;
-  bool Allows = TTI.allowsMisalignedMemoryAccesses(SzInBytes * 8, AddressSpace,
+  bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
+                                                   SzInBytes * 8, AddressSpace,
                                                    Alignment, &Fast);
-  // TODO: Remove TargetBaseAlign
-  return !(Allows && Fast) && (Alignment % SzInBytes) != 0 &&
-         (Alignment % TargetBaseAlign) != 0;
+  DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
+               << " and fast? " << Fast << "\n";);
+  return !Allows || !Fast;
 }
diff --git a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index ee5733d20f4f..8cde0c4cd607 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -191,7 +191,7 @@ static cl::opt<bool> EnableIndVarRegisterHeur(
     cl::desc("Count the induction variable only once when interleaving"));
 
 static cl::opt<bool> EnableCondStoresVectorization(
-    "enable-cond-stores-vec", cl::init(false), cl::Hidden,
+    "enable-cond-stores-vec", cl::init(true), cl::Hidden,
     cl::desc("Enable if predication of stores during vectorization."));
 
 static cl::opt<unsigned> MaxNestedScalarReductionIC(
@@ -213,6 +213,32 @@ static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
     cl::desc("The maximum number of SCEV checks allowed with a "
              "vectorize(enable) pragma"));
 
+/// Create an analysis remark that explains why vectorization failed
+///
+/// \p PassName is the name of the pass (e.g. can be AlwaysPrint).  \p
+/// RemarkName is the identifier for the remark.  If \p I is passed it is an
+/// instruction that prevents vectorization.  Otherwise \p TheLoop is used for
+/// the location of the remark.  \return the remark object that can be
+/// streamed to.
+static OptimizationRemarkAnalysis
+createMissedAnalysis(const char *PassName, StringRef RemarkName, Loop *TheLoop,
+                     Instruction *I = nullptr) {
+  Value *CodeRegion = TheLoop->getHeader();
+  DebugLoc DL = TheLoop->getStartLoc();
+
+  if (I) {
+    CodeRegion = I->getParent();
+    // If there is no debug location attached to the instruction, revert back to
+    // using the loop's.
+    if (I->getDebugLoc())
+      DL = I->getDebugLoc();
+  }
+
+  OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
+  R << "loop not vectorized: ";
+  return R;
+}
+
 namespace {
 
 // Forward declarations.
@@ -221,70 +247,13 @@ class LoopVectorizationLegality;
 class LoopVectorizationCostModel;
 class LoopVectorizationRequirements;
 
-// A traits type that is intended to be used in graph algorithms. The graph it
-// models starts at the loop header, and traverses the BasicBlocks that are in
-// the loop body, but not the loop header. Since the loop header is skipped,
-// the back edges are excluded.
-struct LoopBodyTraits {
-  using NodeRef = std::pair<const Loop *, BasicBlock *>;
-
-  // This wraps a const Loop * into the iterator, so we know which edges to
-  // filter out.
-  class WrappedSuccIterator
-      : public iterator_adaptor_base<
-            WrappedSuccIterator, succ_iterator,
-            typename std::iterator_traits<succ_iterator>::iterator_category,
-            NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
-    using BaseT = iterator_adaptor_base<
-        WrappedSuccIterator, succ_iterator,
-        typename std::iterator_traits<succ_iterator>::iterator_category,
-        NodeRef, std::ptrdiff_t, NodeRef *, NodeRef>;
-
-    const Loop *L;
-
-  public:
-    WrappedSuccIterator(succ_iterator Begin, const Loop *L)
-        : BaseT(Begin), L(L) {}
-
-    NodeRef operator*() const { return {L, *I}; }
-  };
-
-  struct LoopBodyFilter {
-    bool operator()(NodeRef N) const {
-      const Loop *L = N.first;
-      return N.second != L->getHeader() && L->contains(N.second);
-    }
-  };
-
-  using ChildIteratorType =
-      filter_iterator<WrappedSuccIterator, LoopBodyFilter>;
-
-  static NodeRef getEntryNode(const Loop &G) { return {&G, G.getHeader()}; }
-
-  static ChildIteratorType child_begin(NodeRef Node) {
-    return make_filter_range(make_range<WrappedSuccIterator>(
-                                 {succ_begin(Node.second), Node.first},
-                                 {succ_end(Node.second), Node.first}),
-                             LoopBodyFilter{})
-        .begin();
-  }
-
-  static ChildIteratorType child_end(NodeRef Node) {
-    return make_filter_range(make_range<WrappedSuccIterator>(
-                                 {succ_begin(Node.second), Node.first},
-                                 {succ_end(Node.second), Node.first}),
-                             LoopBodyFilter{})
-        .end();
-  }
-};
-
 /// Returns true if the given loop body has a cycle, excluding the loop
 /// itself.
 static bool hasCyclesInLoopBody(const Loop &L) {
   if (!L.empty())
     return true;
 
-  for (const auto SCC :
+  for (const auto &SCC :
        make_range(scc_iterator<Loop, LoopBodyTraits>::begin(L),
                   scc_iterator<Loop, LoopBodyTraits>::end(L))) {
     if (SCC.size() > 1) {
@@ -346,6 +315,41 @@ static GetElementPtrInst *getGEPInstruction(Value *Ptr) {
   return nullptr;
 }
 
+/// A helper function that returns the pointer operand of a load or store
+/// instruction.
+static Value *getPointerOperand(Value *I) {
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    return LI->getPointerOperand();
+  if (auto *SI = dyn_cast<StoreInst>(I))
+    return SI->getPointerOperand();
+  return nullptr;
+}
+
+/// A helper function that returns true if the given type is irregular. The
+/// type is irregular if its allocated size doesn't equal the store size of an
+/// element of the corresponding vector type at the given vectorization factor.
+static bool hasIrregularType(Type *Ty, const DataLayout &DL, unsigned VF) {
+
+  // Determine if an array of VF elements of type Ty is "bitcast compatible"
+  // with a <VF x Ty> vector.
+  if (VF > 1) {
+    auto *VectorTy = VectorType::get(Ty, VF);
+    return VF * DL.getTypeAllocSize(Ty) != DL.getTypeStoreSize(VectorTy);
+  }
+
+  // If the vectorization factor is one, we just check if an array of type Ty
+  // requires padding between elements.
+  return DL.getTypeAllocSizeInBits(Ty) != DL.getTypeSizeInBits(Ty);
+}
+
+/// A helper function that returns the reciprocal of the block probability of
+/// predicated blocks. If we return X, we are assuming the predicated block
+/// will execute once for for every X iterations of the loop header.
+///
+/// TODO: We should use actual block probability here, if available. Currently,
+///       we always assume predicated blocks have a 50% chance of executing.
+static unsigned getReciprocalPredBlockProb() { return 2; }
+
 /// InnerLoopVectorizer vectorizes loops which contain only one basic
 /// block to a specified vectorization factor (VF).
 /// This class performs the widening of scalars into vectors, or multiple
@@ -366,29 +370,21 @@ public:
                       LoopInfo *LI, DominatorTree *DT,
                       const TargetLibraryInfo *TLI,
                       const TargetTransformInfo *TTI, AssumptionCache *AC,
-                      unsigned VecWidth, unsigned UnrollFactor)
+                      OptimizationRemarkEmitter *ORE, unsigned VecWidth,
+                      unsigned UnrollFactor, LoopVectorizationLegality *LVL,
+                      LoopVectorizationCostModel *CM)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
-        AC(AC), VF(VecWidth), UF(UnrollFactor),
+        AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()), Induction(nullptr),
-        OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr),
-        VectorTripCount(nullptr), Legal(nullptr), AddedSafetyChecks(false) {}
+        OldInduction(nullptr), VectorLoopValueMap(UnrollFactor, VecWidth),
+        TripCount(nullptr), VectorTripCount(nullptr), Legal(LVL), Cost(CM),
+        AddedSafetyChecks(false) {}
 
   // Perform the actual loop widening (vectorization).
-  // MinimumBitWidths maps scalar integer values to the smallest bitwidth they
-  // can be validly truncated to. The cost model has assumed this truncation
-  // will happen when vectorizing. VecValuesToIgnore contains scalar values
-  // that the cost model has chosen to ignore because they will not be
-  // vectorized.
-  void vectorize(LoopVectorizationLegality *L,
-                 const MapVector<Instruction *, uint64_t> &MinimumBitWidths,
-                 SmallPtrSetImpl<const Value *> &VecValuesToIgnore) {
-    MinBWs = &MinimumBitWidths;
-    ValuesNotWidened = &VecValuesToIgnore;
-    Legal = L;
+  void vectorize() {
     // Create a new empty loop. Unlink the old loop and connect the new one.
     createEmptyLoop();
     // Widen each instruction in the old loop to a new one in the new loop.
-    // Use the Legality module to find the induction and reduction variables.
     vectorizeLoop();
   }
 
@@ -400,11 +396,18 @@ public:
 protected:
   /// A small list of PHINodes.
   typedef SmallVector<PHINode *, 4> PhiVector;
-  /// When we unroll loops we have multiple vector values for each scalar.
-  /// This data structure holds the unrolled and vectorized values that
-  /// originated from one scalar instruction.
+
+  /// A type for vectorized values in the new loop. Each value from the
+  /// original loop, when vectorized, is represented by UF vector values in the
+  /// new unrolled loop, where UF is the unroll factor.
   typedef SmallVector<Value *, 2> VectorParts;
 
+  /// A type for scalarized values in the new loop. Each value from the
+  /// original loop, when scalarized, is represented by UF x VF scalar values
+  /// in the new unrolled loop, where UF is the unroll factor and VF is the
+  /// vectorization factor.
+  typedef SmallVector<SmallVector<Value *, 4>, 2> ScalarParts;
+
   // When we if-convert we need to create edge masks. We have to cache values
   // so that we don't end up with exponential recursion/IR.
   typedef DenseMap<std::pair<BasicBlock *, BasicBlock *>, VectorParts>
@@ -434,7 +437,20 @@ protected:
   /// See PR14725.
   void fixLCSSAPHIs();
 
-  /// Shrinks vector element sizes based on information in "MinBWs".
+  /// Iteratively sink the scalarized operands of a predicated instruction into
+  /// the block that was created for it.
+  void sinkScalarOperands(Instruction *PredInst);
+
+  /// Predicate conditional instructions that require predication on their
+  /// respective conditions.
+  void predicateInstructions();
+
+  /// Collect the instructions from the original loop that would be trivially
+  /// dead in the vectorized loop if generated.
+  void collectTriviallyDeadInstructions();
+
+  /// Shrinks vector element sizes to the smallest bitwidth they can be legally
+  /// represented as.
   void truncateToMinimalBitwidths();
 
   /// A helper function that computes the predicate of the block BB, assuming
@@ -451,19 +467,19 @@ protected:
   /// Vectorize a single PHINode in a block. This method handles the induction
   /// variable canonicalization. It supports both VF = 1 for unrolled loops and
   /// arbitrary length vectors.
-  void widenPHIInstruction(Instruction *PN, VectorParts &Entry, unsigned UF,
-                           unsigned VF, PhiVector *PV);
+  void widenPHIInstruction(Instruction *PN, unsigned UF, unsigned VF,
+                           PhiVector *PV);
 
   /// Insert the new loop to the loop hierarchy and pass manager
   /// and update the analysis passes.
   void updateAnalysis();
 
   /// This instruction is un-vectorizable. Implement it as a sequence
-  /// of scalars. If \p IfPredicateStore is true we need to 'hide' each
+  /// of scalars. If \p IfPredicateInstr is true we need to 'hide' each
   /// scalarized instruction behind an if block predicated on the control
   /// dependence of the instruction.
   virtual void scalarizeInstruction(Instruction *Instr,
-                                    bool IfPredicateStore = false);
+                                    bool IfPredicateInstr = false);
 
   /// Vectorize Load and Store instructions,
   virtual void vectorizeMemoryInstruction(Instruction *Instr);
@@ -477,7 +493,10 @@ protected:
 
   /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
   /// to each vector element of Val. The sequence starts at StartIndex.
-  virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
+  /// \p Opcode is relevant for FP induction variable.
+  virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step,
+                               Instruction::BinaryOps Opcode =
+                               Instruction::BinaryOpsEnd);
 
   /// Compute scalar induction steps. \p ScalarIV is the scalar induction
   /// variable on which to base the steps, \p Step is the size of the step, and
@@ -488,23 +507,39 @@ protected:
 
   /// Create a vector induction phi node based on an existing scalar one. This
   /// currently only works for integer induction variables with a constant
-  /// step. If \p TruncType is non-null, instead of widening the original IV,
-  /// we widen a version of the IV truncated to \p TruncType.
+  /// step. \p EntryVal is the value from the original loop that maps to the
+  /// vector phi node. If \p EntryVal is a truncate instruction, instead of
+  /// widening the original IV, we widen a version of the IV truncated to \p
+  /// EntryVal's type.
   void createVectorIntInductionPHI(const InductionDescriptor &II,
-                                   VectorParts &Entry, IntegerType *TruncType);
+                                   Instruction *EntryVal);
 
   /// Widen an integer induction variable \p IV. If \p Trunc is provided, the
-  /// induction variable will first be truncated to the corresponding type. The
-  /// widened values are placed in \p Entry.
-  void widenIntInduction(PHINode *IV, VectorParts &Entry,
-                         TruncInst *Trunc = nullptr);
-
-  /// When we go over instructions in the basic block we rely on previous
-  /// values within the current basic block or on loop invariant values.
-  /// When we widen (vectorize) values we place them in the map. If the values
-  /// are not within the map, they have to be loop invariant, so we simply
-  /// broadcast them into a vector.
-  VectorParts &getVectorValue(Value *V);
+  /// induction variable will first be truncated to the corresponding type.
+  void widenIntInduction(PHINode *IV, TruncInst *Trunc = nullptr);
+
+  /// Returns true if an instruction \p I should be scalarized instead of
+  /// vectorized for the chosen vectorization factor.
+  bool shouldScalarizeInstruction(Instruction *I) const;
+
+  /// Returns true if we should generate a scalar version of \p IV.
+  bool needsScalarInduction(Instruction *IV) const;
+
+  /// Return a constant reference to the VectorParts corresponding to \p V from
+  /// the original loop. If the value has already been vectorized, the
+  /// corresponding vector entry in VectorLoopValueMap is returned. If,
+  /// however, the value has a scalar entry in VectorLoopValueMap, we construct
+  /// new vector values on-demand by inserting the scalar values into vectors
+  /// with an insertelement sequence. If the value has been neither vectorized
+  /// nor scalarized, it must be loop invariant, so we simply broadcast the
+  /// value into vectors.
+  const VectorParts &getVectorValue(Value *V);
+
+  /// Return a value in the new loop corresponding to \p V from the original
+  /// loop at unroll index \p Part and vector index \p Lane. If the value has
+  /// been vectorized but not scalarized, the necessary extractelement
+  /// instruction will be generated.
+  Value *getScalarValue(Value *V, unsigned Part, unsigned Lane);
 
   /// Try to vectorize the interleaved access group that \p Instr belongs to.
   void vectorizeInterleaveGroup(Instruction *Instr);
@@ -547,44 +582,112 @@ protected:
   /// vector of instructions.
   void addMetadata(ArrayRef<Value *> To, Instruction *From);
 
-  /// This is a helper class that holds the vectorizer state. It maps scalar
-  /// instructions to vector instructions. When the code is 'unrolled' then
-  /// then a single scalar value is mapped to multiple vector parts. The parts
-  /// are stored in the VectorPart type.
+  /// This is a helper class for maintaining vectorization state. It's used for
+  /// mapping values from the original loop to their corresponding values in
+  /// the new loop. Two mappings are maintained: one for vectorized values and
+  /// one for scalarized values. Vectorized values are represented with UF
+  /// vector values in the new loop, and scalarized values are represented with
+  /// UF x VF scalar values in the new loop. UF and VF are the unroll and
+  /// vectorization factors, respectively.
+  ///
+  /// Entries can be added to either map with initVector and initScalar, which
+  /// initialize and return a constant reference to the new entry. If a
+  /// non-constant reference to a vector entry is required, getVector can be
+  /// used to retrieve a mutable entry. We currently directly modify the mapped
+  /// values during "fix-up" operations that occur once the first phase of
+  /// widening is complete. These operations include type truncation and the
+  /// second phase of recurrence widening.
+  ///
+  /// Otherwise, entries from either map should be accessed using the
+  /// getVectorValue or getScalarValue functions from InnerLoopVectorizer.
+  /// getVectorValue and getScalarValue coordinate to generate a vector or
+  /// scalar value on-demand if one is not yet available. When vectorizing a
+  /// loop, we visit the definition of an instruction before its uses. When
+  /// visiting the definition, we either vectorize or scalarize the
+  /// instruction, creating an entry for it in the corresponding map. (In some
+  /// cases, such as induction variables, we will create both vector and scalar
+  /// entries.) Then, as we encounter uses of the definition, we derive values
+  /// for each scalar or vector use unless such a value is already available.
+  /// For example, if we scalarize a definition and one of its uses is vector,
+  /// we build the required vector on-demand with an insertelement sequence
+  /// when visiting the use. Otherwise, if the use is scalar, we can use the
+  /// existing scalar definition.
   struct ValueMap {
-    /// C'tor.  UnrollFactor controls the number of vectors ('parts') that
-    /// are mapped.
-    ValueMap(unsigned UnrollFactor) : UF(UnrollFactor) {}
-
-    /// \return True if 'Key' is saved in the Value Map.
-    bool has(Value *Key) const { return MapStorage.count(Key); }
-
-    /// Initializes a new entry in the map. Sets all of the vector parts to the
-    /// save value in 'Val'.
-    /// \return A reference to a vector with splat values.
-    VectorParts &splat(Value *Key, Value *Val) {
-      VectorParts &Entry = MapStorage[Key];
-      Entry.assign(UF, Val);
-      return Entry;
+
+    /// Construct an empty map with the given unroll and vectorization factors.
+    ValueMap(unsigned UnrollFactor, unsigned VecWidth)
+        : UF(UnrollFactor), VF(VecWidth) {
+      // The unroll and vectorization factors are only used in asserts builds
+      // to verify map entries are sized appropriately.
+      (void)UF;
+      (void)VF;
+    }
+
+    /// \return True if the map has a vector entry for \p Key.
+    bool hasVector(Value *Key) const { return VectorMapStorage.count(Key); }
+
+    /// \return True if the map has a scalar entry for \p Key.
+    bool hasScalar(Value *Key) const { return ScalarMapStorage.count(Key); }
+
+    /// \brief Map \p Key to the given VectorParts \p Entry, and return a
+    /// constant reference to the new vector map entry. The given key should
+    /// not already be in the map, and the given VectorParts should be
+    /// correctly sized for the current unroll factor.
+    const VectorParts &initVector(Value *Key, const VectorParts &Entry) {
+      assert(!hasVector(Key) && "Vector entry already initialized");
+      assert(Entry.size() == UF && "VectorParts has wrong dimensions");
+      VectorMapStorage[Key] = Entry;
+      return VectorMapStorage[Key];
+    }
+
+    /// \brief Map \p Key to the given ScalarParts \p Entry, and return a
+    /// constant reference to the new scalar map entry. The given key should
+    /// not already be in the map, and the given ScalarParts should be
+    /// correctly sized for the current unroll and vectorization factors.
+    const ScalarParts &initScalar(Value *Key, const ScalarParts &Entry) {
+      assert(!hasScalar(Key) && "Scalar entry already initialized");
+      assert(Entry.size() == UF &&
+             all_of(make_range(Entry.begin(), Entry.end()),
+                    [&](const SmallVectorImpl<Value *> &Values) -> bool {
+                      return Values.size() == VF;
+                    }) &&
+             "ScalarParts has wrong dimensions");
+      ScalarMapStorage[Key] = Entry;
+      return ScalarMapStorage[Key];
     }
 
-    ///\return A reference to the value that is stored at 'Key'.
-    VectorParts &get(Value *Key) {
-      VectorParts &Entry = MapStorage[Key];
-      if (Entry.empty())
-        Entry.resize(UF);
-      assert(Entry.size() == UF);
-      return Entry;
+    /// \return A reference to the vector map entry corresponding to \p Key.
+    /// The key should already be in the map. This function should only be used
+    /// when it's necessary to update values that have already been vectorized.
+    /// This is the case for "fix-up" operations including type truncation and
+    /// the second phase of recurrence vectorization. If a non-const reference
+    /// isn't required, getVectorValue should be used instead.
+    VectorParts &getVector(Value *Key) {
+      assert(hasVector(Key) && "Vector entry not initialized");
+      return VectorMapStorage.find(Key)->second;
     }
 
+    /// Retrieve an entry from the vector or scalar maps. The preferred way to
+    /// access an existing mapped entry is with getVectorValue or
+    /// getScalarValue from InnerLoopVectorizer. Until those functions can be
+    /// moved inside ValueMap, we have to declare them as friends.
+    friend const VectorParts &InnerLoopVectorizer::getVectorValue(Value *V);
+    friend Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
+                                                      unsigned Lane);
+
   private:
-    /// The unroll factor. Each entry in the map stores this number of vector
-    /// elements.
+    /// The unroll factor. Each entry in the vector map contains UF vector
+    /// values.
     unsigned UF;
 
-    /// Map storage. We use std::map and not DenseMap because insertions to a
-    /// dense map invalidates its iterators.
-    std::map<Value *, VectorParts> MapStorage;
+    /// The vectorization factor. Each entry in the scalar map contains UF x VF
+    /// scalar values.
+    unsigned VF;
+
+    /// The vector and scalar map storage. We use std::map and not DenseMap
+    /// because insertions to DenseMap invalidate its iterators.
+    std::map<Value *, VectorParts> VectorMapStorage;
+    std::map<Value *, ScalarParts> ScalarMapStorage;
   };
 
   /// The original loop.
@@ -605,6 +708,8 @@ protected:
   const TargetTransformInfo *TTI;
   /// Assumption Cache.
   AssumptionCache *AC;
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter *ORE;
 
   /// \brief LoopVersioning.  It's only set up (non-null) if memchecks were
   /// used.
@@ -646,41 +751,38 @@ protected:
   PHINode *Induction;
   /// The induction variable of the old basic block.
   PHINode *OldInduction;
-  /// Maps scalars to widened vectors.
-  ValueMap WidenMap;
-
-  /// A map of induction variables from the original loop to their
-  /// corresponding VF * UF scalarized values in the vectorized loop. The
-  /// purpose of ScalarIVMap is similar to that of WidenMap. Whereas WidenMap
-  /// maps original loop values to their vector versions in the new loop,
-  /// ScalarIVMap maps induction variables from the original loop that are not
-  /// vectorized to their scalar equivalents in the vector loop. Maintaining a
-  /// separate map for scalarized induction variables allows us to avoid
-  /// unnecessary scalar-to-vector-to-scalar conversions.
-  DenseMap<Value *, SmallVector<Value *, 8>> ScalarIVMap;
+
+  /// Maps values from the original loop to their corresponding values in the
+  /// vectorized loop. A key value can map to either vector values, scalar
+  /// values or both kinds of values, depending on whether the key was
+  /// vectorized and scalarized.
+  ValueMap VectorLoopValueMap;
 
   /// Store instructions that should be predicated, as a pair
   ///   <StoreInst, Predicate>
-  SmallVector<std::pair<StoreInst *, Value *>, 4> PredicatedStores;
+  SmallVector<std::pair<Instruction *, Value *>, 4> PredicatedInstructions;
   EdgeMaskCache MaskCache;
   /// Trip count of the original loop.
   Value *TripCount;
   /// Trip count of the widened loop (TripCount - TripCount % (VF*UF))
   Value *VectorTripCount;
 
-  /// Map of scalar integer values to the smallest bitwidth they can be legally
-  /// represented as. The vector equivalents of these values should be truncated
-  /// to this type.
-  const MapVector<Instruction *, uint64_t> *MinBWs;
-
-  /// A set of values that should not be widened. This is taken from
-  /// VecValuesToIgnore in the cost model.
-  SmallPtrSetImpl<const Value *> *ValuesNotWidened;
-
+  /// The legality analysis.
   LoopVectorizationLegality *Legal;
 
+  /// The profitablity analysis.
+  LoopVectorizationCostModel *Cost;
+
   // Record whether runtime checks are added.
   bool AddedSafetyChecks;
+
+  // Holds instructions from the original loop whose counterparts in the
+  // vectorized loop would be trivially dead if generated. For example,
+  // original induction update instructions can become dead because we
+  // separately emit induction "steps" when generating code for the new loop.
+  // Similarly, we create a new latch condition when setting up the structure
+  // of the new loop, so the old one can become dead.
+  SmallPtrSet<Instruction *, 4> DeadInstructions;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -689,16 +791,20 @@ public:
                     LoopInfo *LI, DominatorTree *DT,
                     const TargetLibraryInfo *TLI,
                     const TargetTransformInfo *TTI, AssumptionCache *AC,
-                    unsigned UnrollFactor)
-      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, 1,
-                            UnrollFactor) {}
+                    OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
+                    LoopVectorizationLegality *LVL,
+                    LoopVectorizationCostModel *CM)
+      : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
+                            UnrollFactor, LVL, CM) {}
 
 private:
   void scalarizeInstruction(Instruction *Instr,
-                            bool IfPredicateStore = false) override;
+                            bool IfPredicateInstr = false) override;
   void vectorizeMemoryInstruction(Instruction *Instr) override;
   Value *getBroadcastInstrs(Value *V) override;
-  Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
+  Value *getStepVector(Value *Val, int StartIdx, Value *Step,
+                       Instruction::BinaryOps Opcode =
+                       Instruction::BinaryOpsEnd) override;
   Value *reverseVector(Value *Vec) override;
 };
 
@@ -1149,12 +1255,13 @@ public:
     FK_Enabled = 1,    ///< Forcing enabled.
   };
 
-  LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
+  LoopVectorizeHints(const Loop *L, bool DisableInterleaving,
+                     OptimizationRemarkEmitter &ORE)
       : Width("vectorize.width", VectorizerParams::VectorizationFactor,
               HK_WIDTH),
         Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
         Force("vectorize.enable", FK_Undefined, HK_FORCE),
-        PotentiallyUnsafe(false), TheLoop(L) {
+        PotentiallyUnsafe(false), TheLoop(L), ORE(ORE) {
     // Populate values with existing loop metadata.
     getHintsFromMetadata();
 
@@ -1176,17 +1283,13 @@ public:
   bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
     if (getForce() == LoopVectorizeHints::FK_Disabled) {
       DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
-      emitOptimizationRemarkAnalysis(F->getContext(),
-                                     vectorizeAnalysisPassName(), *F,
-                                     L->getStartLoc(), emitRemark());
+      emitRemarkWithHints();
       return false;
     }
 
     if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
       DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
-      emitOptimizationRemarkAnalysis(F->getContext(),
-                                     vectorizeAnalysisPassName(), *F,
-                                     L->getStartLoc(), emitRemark());
+      emitRemarkWithHints();
       return false;
     }
 
@@ -1197,11 +1300,12 @@ public:
       // FIXME: Add interleave.disable metadata. This will allow
       // vectorize.disable to be used without disabling the pass and errors
       // to differentiate between disabled vectorization and a width of 1.
-      emitOptimizationRemarkAnalysis(
-          F->getContext(), vectorizeAnalysisPassName(), *F, L->getStartLoc(),
-          "loop not vectorized: vectorization and interleaving are explicitly "
-          "disabled, or vectorize width and interleave count are both set to "
-          "1");
+      ORE.emit(OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
+                                          "AllDisabled", L->getStartLoc(),
+                                          L->getHeader())
+               << "loop not vectorized: vectorization and interleaving are "
+                  "explicitly disabled, or vectorize width and interleave "
+                  "count are both set to 1");
       return false;
     }
 
@@ -1209,23 +1313,27 @@ public:
   }
 
   /// Dumps all the hint information.
-  std::string emitRemark() const {
-    VectorizationReport R;
+  void emitRemarkWithHints() const {
+    using namespace ore;
     if (Force.Value == LoopVectorizeHints::FK_Disabled)
-      R << "vectorization is explicitly disabled";
+      ORE.emit(OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
+                                        TheLoop->getStartLoc(),
+                                        TheLoop->getHeader())
+               << "loop not vectorized: vectorization is explicitly disabled");
     else {
-      R << "use -Rpass-analysis=loop-vectorize for more info";
+      OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
+                                 TheLoop->getStartLoc(), TheLoop->getHeader());
+      R << "loop not vectorized";
       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
-        R << " (Force=true";
+        R << " (Force=" << NV("Force", true);
         if (Width.Value != 0)
-          R << ", Vector Width=" << Width.Value;
+          R << ", Vector Width=" << NV("VectorWidth", Width.Value);
         if (Interleave.Value != 0)
-          R << ", Interleave Count=" << Interleave.Value;
+          R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
         R << ")";
       }
+      ORE.emit(R);
     }
-
-    return R.str();
   }
 
   unsigned getWidth() const { return Width.Value; }
@@ -1241,7 +1349,7 @@ public:
       return LV_NAME;
     if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
       return LV_NAME;
-    return DiagnosticInfoOptimizationRemarkAnalysis::AlwaysPrint;
+    return OptimizationRemarkAnalysis::AlwaysPrint;
   }
 
   bool allowReordering() const {
@@ -1379,19 +1487,23 @@ private:
 
   /// The loop these hints belong to.
   const Loop *TheLoop;
+
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter &ORE;
 };
 
-static void emitAnalysisDiag(const Function *TheFunction, const Loop *TheLoop,
+static void emitAnalysisDiag(const Loop *TheLoop,
                              const LoopVectorizeHints &Hints,
+                             OptimizationRemarkEmitter &ORE,
                              const LoopAccessReport &Message) {
   const char *Name = Hints.vectorizeAnalysisPassName();
-  LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, Name);
+  LoopAccessReport::emitAnalysis(Message, TheLoop, Name, ORE);
 }
 
 static void emitMissedWarning(Function *F, Loop *L,
-                              const LoopVectorizeHints &LH) {
-  emitOptimizationRemarkMissed(F->getContext(), LV_NAME, *F, L->getStartLoc(),
-                               LH.emitRemark());
+                              const LoopVectorizeHints &LH,
+                              OptimizationRemarkEmitter *ORE) {
+  LH.emitRemarkWithHints();
 
   if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
     if (LH.getWidth() != 1)
@@ -1425,12 +1537,12 @@ public:
       TargetLibraryInfo *TLI, AliasAnalysis *AA, Function *F,
       const TargetTransformInfo *TTI,
       std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
-      LoopVectorizationRequirements *R, LoopVectorizeHints *H)
-      : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F),
-        TTI(TTI), DT(DT), GetLAA(GetLAA), LAI(nullptr),
-        InterleaveInfo(PSE, L, DT, LI), Induction(nullptr),
-        WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R),
-        Hints(H) {}
+      OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
+      LoopVectorizeHints *H)
+      : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT),
+        GetLAA(GetLAA), LAI(nullptr), ORE(ORE), InterleaveInfo(PSE, L, DT, LI),
+        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
+        Requirements(R), Hints(H) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
@@ -1490,9 +1602,12 @@ public:
   /// Returns true if the value V is uniform within the loop.
   bool isUniform(Value *V);
 
-  /// Returns true if this instruction will remain scalar after vectorization.
+  /// Returns true if \p I is known to be uniform after vectorization.
   bool isUniformAfterVectorization(Instruction *I) { return Uniforms.count(I); }
 
+  /// Returns true if \p I is known to be scalar after vectorization.
+  bool isScalarAfterVectorization(Instruction *I) { return Scalars.count(I); }
+
   /// Returns the information that we collected about runtime memory check.
   const RuntimePointerChecking *getRuntimePointerChecking() const {
     return LAI->getRuntimePointerChecking();
@@ -1545,6 +1660,17 @@ public:
   bool isLegalMaskedGather(Type *DataType) {
     return TTI->isLegalMaskedGather(DataType);
   }
+  /// Returns true if the target machine can represent \p V as a masked gather
+  /// or scatter operation.
+  bool isLegalGatherOrScatter(Value *V) {
+    auto *LI = dyn_cast<LoadInst>(V);
+    auto *SI = dyn_cast<StoreInst>(V);
+    if (!LI && !SI)
+      return false;
+    auto *Ptr = getPointerOperand(V);
+    auto *Ty = cast<PointerType>(Ptr->getType())->getElementType();
+    return (LI && isLegalMaskedGather(Ty)) || (SI && isLegalMaskedScatter(Ty));
+  }
 
   /// Returns true if vector representation of the instruction \p I
   /// requires mask.
@@ -1553,6 +1679,21 @@ public:
   unsigned getNumLoads() const { return LAI->getNumLoads(); }
   unsigned getNumPredStores() const { return NumPredStores; }
 
+  /// Returns true if \p I is an instruction that will be scalarized with
+  /// predication. Such instructions include conditional stores and
+  /// instructions that may divide by zero.
+  bool isScalarWithPredication(Instruction *I);
+
+  /// Returns true if \p I is a memory instruction that has a consecutive or
+  /// consecutive-like pointer operand. Consecutive-like pointers are pointers
+  /// that are treated like consecutive pointers during vectorization. The
+  /// pointer operands of interleaved accesses are an example.
+  bool hasConsecutiveLikePtrOperand(Instruction *I);
+
+  /// Returns true if \p I is a memory instruction that must be scalarized
+  /// during vectorization.
+  bool memoryInstructionMustBeScalarized(Instruction *I, unsigned VF = 1);
+
 private:
   /// Check if a single basic block loop is vectorizable.
   /// At this point we know that this is a loop with a constant trip count
@@ -1569,9 +1710,24 @@ private:
   /// transformation.
   bool canVectorizeWithIfConvert();
 
-  /// Collect the variables that need to stay uniform after vectorization.
+  /// Collect the instructions that are uniform after vectorization. An
+  /// instruction is uniform if we represent it with a single scalar value in
+  /// the vectorized loop corresponding to each vector iteration. Examples of
+  /// uniform instructions include pointer operands of consecutive or
+  /// interleaved memory accesses. Note that although uniformity implies an
+  /// instruction will be scalar, the reverse is not true. In general, a
+  /// scalarized instruction will be represented by VF scalar values in the
+  /// vectorized loop, each corresponding to an iteration of the original
+  /// scalar loop.
   void collectLoopUniforms();
 
+  /// Collect the instructions that are scalar after vectorization. An
+  /// instruction is scalar if it is known to be uniform or will be scalarized
+  /// during vectorization. Non-uniform scalarized instructions will be
+  /// represented by VF values in the vectorized loop, each corresponding to an
+  /// iteration of the original scalar loop.
+  void collectLoopScalars();
+
   /// Return true if all of the instructions in the block can be speculatively
   /// executed. \p SafePtrs is a list of addresses that are known to be legal
   /// and we know that we can read from them without segfault.
@@ -1588,7 +1744,19 @@ private:
   /// VectorizationReport because the << operator of VectorizationReport returns
   /// LoopAccessReport.
   void emitAnalysis(const LoopAccessReport &Message) const {
-    emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
+    emitAnalysisDiag(TheLoop, *Hints, *ORE, Message);
+  }
+
+  /// Create an analysis remark that explains why vectorization failed
+  ///
+  /// \p RemarkName is the identifier for the remark.  If \p I is passed it is
+  /// an instruction that prevents vectorization.  Otherwise the loop is used
+  /// for the location of the remark.  \return the remark object that can be
+  /// streamed to.
+  OptimizationRemarkAnalysis
+  createMissedAnalysis(StringRef RemarkName, Instruction *I = nullptr) const {
+    return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+                                  RemarkName, TheLoop, I);
   }
 
   /// \brief If an access has a symbolic strides, this maps the pointer value to
@@ -1613,8 +1781,6 @@ private:
   PredicatedScalarEvolution &PSE;
   /// Target Library Info.
   TargetLibraryInfo *TLI;
-  /// Parent function
-  Function *TheFunction;
   /// Target Transform Info
   const TargetTransformInfo *TTI;
   /// Dominator Tree.
@@ -1624,6 +1790,8 @@ private:
   // And the loop-accesses info corresponding to this loop.  This pointer is
   // null until canVectorizeMemory sets it up.
   const LoopAccessInfo *LAI;
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter *ORE;
 
   /// The interleave access information contains groups of interleaved accesses
   /// with the same stride and close to each other.
@@ -1648,10 +1816,13 @@ private:
   /// Allowed outside users. This holds the induction and reduction
   /// vars which can be accessed from outside the loop.
   SmallPtrSet<Value *, 4> AllowedExit;
-  /// This set holds the variables which are known to be uniform after
-  /// vectorization.
+
+  /// Holds the instructions known to be uniform after vectorization.
   SmallPtrSet<Instruction *, 4> Uniforms;
 
+  /// Holds the instructions known to be scalar after vectorization.
+  SmallPtrSet<Instruction *, 4> Scalars;
+
   /// Can we assume the absence of NaNs.
   bool HasFunNoNaNAttr;
 
@@ -1679,10 +1850,11 @@ public:
                              LoopInfo *LI, LoopVectorizationLegality *Legal,
                              const TargetTransformInfo &TTI,
                              const TargetLibraryInfo *TLI, DemandedBits *DB,
-                             AssumptionCache *AC, const Function *F,
+                             AssumptionCache *AC,
+                             OptimizationRemarkEmitter *ORE, const Function *F,
                              const LoopVectorizeHints *Hints)
       : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
-        AC(AC), TheFunction(F), Hints(Hints) {}
+        AC(AC), ORE(ORE), TheFunction(F), Hints(Hints) {}
 
   /// Information about vectorization costs
   struct VectorizationFactor {
@@ -1732,6 +1904,29 @@ public:
   /// Collect values we want to ignore in the cost model.
   void collectValuesToIgnore();
 
+  /// \returns The smallest bitwidth each instruction can be represented with.
+  /// The vector equivalents of these instructions should be truncated to this
+  /// type.
+  const MapVector<Instruction *, uint64_t> &getMinimalBitwidths() const {
+    return MinBWs;
+  }
+
+  /// \returns True if it is more profitable to scalarize instruction \p I for
+  /// vectorization factor \p VF.
+  bool isProfitableToScalarize(Instruction *I, unsigned VF) const {
+    auto Scalars = InstsToScalarize.find(VF);
+    assert(Scalars != InstsToScalarize.end() &&
+           "VF not yet analyzed for scalarization profitability");
+    return Scalars->second.count(I);
+  }
+
+  /// \returns True if instruction \p I can be truncated to a smaller bitwidth
+  /// for vectorization factor \p VF.
+  bool canTruncateToMinimalBitwidth(Instruction *I, unsigned VF) const {
+    return VF > 1 && MinBWs.count(I) && !isProfitableToScalarize(I, VF) &&
+           !Legal->isScalarAfterVectorization(I);
+  }
+
 private:
   /// The vectorization cost is a combination of the cost itself and a boolean
   /// indicating whether any of the contributing operations will actually
@@ -1760,20 +1955,44 @@ private:
   /// as a vector operation.
   bool isConsecutiveLoadOrStore(Instruction *I);
 
-  /// Report an analysis message to assist the user in diagnosing loops that are
-  /// not vectorized.  These are handled as LoopAccessReport rather than
-  /// VectorizationReport because the << operator of VectorizationReport returns
-  /// LoopAccessReport.
-  void emitAnalysis(const LoopAccessReport &Message) const {
-    emitAnalysisDiag(TheFunction, TheLoop, *Hints, Message);
+  /// Create an analysis remark that explains why vectorization failed
+  ///
+  /// \p RemarkName is the identifier for the remark.  \return the remark object
+  /// that can be streamed to.
+  OptimizationRemarkAnalysis createMissedAnalysis(StringRef RemarkName) {
+    return ::createMissedAnalysis(Hints->vectorizeAnalysisPassName(),
+                                  RemarkName, TheLoop);
   }
 
-public:
   /// Map of scalar integer values to the smallest bitwidth they can be legally
   /// represented as. The vector equivalents of these values should be truncated
   /// to this type.
   MapVector<Instruction *, uint64_t> MinBWs;
 
+  /// A type representing the costs for instructions if they were to be
+  /// scalarized rather than vectorized. The entries are Instruction-Cost
+  /// pairs.
+  typedef DenseMap<Instruction *, unsigned> ScalarCostsTy;
+
+  /// A map holding scalar costs for different vectorization factors. The
+  /// presence of a cost for an instruction in the mapping indicates that the
+  /// instruction will be scalarized when vectorizing with the associated
+  /// vectorization factor. The entries are VF-ScalarCostTy pairs.
+  DenseMap<unsigned, ScalarCostsTy> InstsToScalarize;
+
+  /// Returns the expected difference in cost from scalarizing the expression
+  /// feeding a predicated instruction \p PredInst. The instructions to
+  /// scalarize and their scalar costs are collected in \p ScalarCosts. A
+  /// non-negative return value implies the expression will be scalarized.
+  /// Currently, only single-use chains are considered for scalarization.
+  int computePredInstDiscount(Instruction *PredInst, ScalarCostsTy &ScalarCosts,
+                              unsigned VF);
+
+  /// Collects the instructions to scalarize for each predicated instruction in
+  /// the loop.
+  void collectInstsToScalarize(unsigned VF);
+
+public:
   /// The loop that we evaluate.
   Loop *TheLoop;
   /// Predicated scalar evolution analysis.
@@ -1790,6 +2009,9 @@ public:
   DemandedBits *DB;
   /// Assumption cache.
   AssumptionCache *AC;
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter *ORE;
+
   const Function *TheFunction;
   /// Loop Vectorize Hint.
   const LoopVectorizeHints *Hints;
@@ -1813,8 +2035,8 @@ public:
 /// followed by a non-expert user.
 class LoopVectorizationRequirements {
 public:
-  LoopVectorizationRequirements()
-      : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr) {}
+  LoopVectorizationRequirements(OptimizationRemarkEmitter &ORE)
+      : NumRuntimePointerChecks(0), UnsafeAlgebraInst(nullptr), ORE(ORE) {}
 
   void addUnsafeAlgebraInst(Instruction *I) {
     // First unsafe algebra instruction.
@@ -1825,13 +2047,15 @@ public:
   void addRuntimePointerChecks(unsigned Num) { NumRuntimePointerChecks = Num; }
 
   bool doesNotMeet(Function *F, Loop *L, const LoopVectorizeHints &Hints) {
-    const char *Name = Hints.vectorizeAnalysisPassName();
+    const char *PassName = Hints.vectorizeAnalysisPassName();
     bool Failed = false;
     if (UnsafeAlgebraInst && !Hints.allowReordering()) {
-      emitOptimizationRemarkAnalysisFPCommute(
-          F->getContext(), Name, *F, UnsafeAlgebraInst->getDebugLoc(),
-          VectorizationReport() << "cannot prove it is safe to reorder "
-                                   "floating-point operations");
+      ORE.emit(
+          OptimizationRemarkAnalysisFPCommute(PassName, "CantReorderFPOps",
+                                              UnsafeAlgebraInst->getDebugLoc(),
+                                              UnsafeAlgebraInst->getParent())
+          << "loop not vectorized: cannot prove it is safe to reorder "
+             "floating-point operations");
       Failed = true;
     }
 
@@ -1842,10 +2066,11 @@ public:
         NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
     if ((ThresholdReached && !Hints.allowReordering()) ||
         PragmaThresholdReached) {
-      emitOptimizationRemarkAnalysisAliasing(
-          F->getContext(), Name, *F, L->getStartLoc(),
-          VectorizationReport()
-              << "cannot prove it is safe to reorder memory operations");
+      ORE.emit(OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
+                                                  L->getStartLoc(),
+                                                  L->getHeader())
+               << "loop not vectorized: cannot prove it is safe to reorder "
+                  "memory operations");
       DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
       Failed = true;
     }
@@ -1856,6 +2081,9 @@ public:
 private:
   unsigned NumRuntimePointerChecks;
   Instruction *UnsafeAlgebraInst;
+
+  /// Interface to emit optimization remarks.
+  OptimizationRemarkEmitter &ORE;
 };
 
 static void addAcyclicInnerLoop(Loop &L, SmallVectorImpl<Loop *> &V) {
@@ -1897,12 +2125,13 @@ struct LoopVectorize : public FunctionPass {
     auto *AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
     auto *LAA = &getAnalysis<LoopAccessLegacyAnalysis>();
     auto *DB = &getAnalysis<DemandedBitsWrapperPass>().getDemandedBits();
+    auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
 
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & { return LAA->getInfo(&L); };
 
     return Impl.runImpl(F, *SE, *LI, *TTI, *DT, *BFI, TLI, *DB, *AA, *AC,
-                        GetLAA);
+                        GetLAA, *ORE);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -1917,6 +2146,7 @@ struct LoopVectorize : public FunctionPass {
     AU.addRequired<AAResultsWrapperPass>();
     AU.addRequired<LoopAccessLegacyAnalysis>();
     AU.addRequired<DemandedBitsWrapperPass>();
+    AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addPreserved<LoopInfoWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
     AU.addPreserved<BasicAAWrapperPass>();
@@ -1949,7 +2179,7 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
 }
 
 void InnerLoopVectorizer::createVectorIntInductionPHI(
-    const InductionDescriptor &II, VectorParts &Entry, IntegerType *TruncType) {
+    const InductionDescriptor &II, Instruction *EntryVal) {
   Value *Start = II.getStartValue();
   ConstantInt *Step = II.getConstIntStepValue();
   assert(Step && "Can not widen an IV with a non-constant step");
@@ -1957,7 +2187,8 @@ void InnerLoopVectorizer::createVectorIntInductionPHI(
   // Construct the initial value of the vector IV in the vector loop preheader
   auto CurrIP = Builder.saveIP();
   Builder.SetInsertPoint(LoopVectorPreHeader->getTerminator());
-  if (TruncType) {
+  if (isa<TruncInst>(EntryVal)) {
+    auto *TruncType = cast<IntegerType>(EntryVal->getType());
     Step = ConstantInt::getSigned(TruncType, Step->getSExtValue());
     Start = Builder.CreateCast(Instruction::Trunc, Start, TruncType);
   }
@@ -1972,18 +2203,45 @@ void InnerLoopVectorizer::createVectorIntInductionPHI(
   // factor. The last of those goes into the PHI.
   PHINode *VecInd = PHINode::Create(SteppedStart->getType(), 2, "vec.ind",
                                     &*LoopVectorBody->getFirstInsertionPt());
-  Value *LastInduction = VecInd;
+  Instruction *LastInduction = VecInd;
+  VectorParts Entry(UF);
   for (unsigned Part = 0; Part < UF; ++Part) {
     Entry[Part] = LastInduction;
-    LastInduction = Builder.CreateAdd(LastInduction, SplatVF, "step.add");
+    LastInduction = cast<Instruction>(
+        Builder.CreateAdd(LastInduction, SplatVF, "step.add"));
   }
+  VectorLoopValueMap.initVector(EntryVal, Entry);
+  if (isa<TruncInst>(EntryVal))
+    addMetadata(Entry, EntryVal);
+
+  // Move the last step to the end of the latch block. This ensures consistent
+  // placement of all induction updates.
+  auto *LoopVectorLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
+  auto *Br = cast<BranchInst>(LoopVectorLatch->getTerminator());
+  auto *ICmp = cast<Instruction>(Br->getCondition());
+  LastInduction->moveBefore(ICmp);
+  LastInduction->setName("vec.ind.next");
 
   VecInd->addIncoming(SteppedStart, LoopVectorPreHeader);
-  VecInd->addIncoming(LastInduction, LoopVectorBody);
+  VecInd->addIncoming(LastInduction, LoopVectorLatch);
 }
 
-void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
-                                            TruncInst *Trunc) {
+bool InnerLoopVectorizer::shouldScalarizeInstruction(Instruction *I) const {
+  return Legal->isScalarAfterVectorization(I) ||
+         Cost->isProfitableToScalarize(I, VF);
+}
+
+bool InnerLoopVectorizer::needsScalarInduction(Instruction *IV) const {
+  if (shouldScalarizeInstruction(IV))
+    return true;
+  auto isScalarInst = [&](User *U) -> bool {
+    auto *I = cast<Instruction>(U);
+    return (OrigLoop->contains(I) && shouldScalarizeInstruction(I));
+  };
+  return any_of(IV->users(), isScalarInst);
+}
+
+void InnerLoopVectorizer::widenIntInduction(PHINode *IV, TruncInst *Trunc) {
 
   auto II = Legal->getInductionVars()->find(IV);
   assert(II != Legal->getInductionVars()->end() && "IV is not an induction");
@@ -1991,12 +2249,25 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
   auto ID = II->second;
   assert(IV->getType() == ID.getStartValue()->getType() && "Types must match");
 
-  // If a truncate instruction was provided, get the smaller type.
-  auto *TruncType = Trunc ? cast<IntegerType>(Trunc->getType()) : nullptr;
+  // The scalar value to broadcast. This will be derived from the canonical
+  // induction variable.
+  Value *ScalarIV = nullptr;
 
   // The step of the induction.
   Value *Step = nullptr;
 
+  // The value from the original loop to which we are mapping the new induction
+  // variable.
+  Instruction *EntryVal = Trunc ? cast<Instruction>(Trunc) : IV;
+
+  // True if we have vectorized the induction variable.
+  auto VectorizedIV = false;
+
+  // Determine if we want a scalar version of the induction variable. This is
+  // true if the induction variable itself is not widened, or if it has at
+  // least one user in the loop that is not widened.
+  auto NeedsScalarIV = VF > 1 && needsScalarInduction(EntryVal);
+
   // If the induction variable has a constant integer step value, go ahead and
   // get it now.
   if (ID.getConstIntStepValue())
@@ -2006,40 +2277,50 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
   // create the phi node, we will splat the scalar induction variable in each
   // loop iteration.
   if (VF > 1 && IV->getType() == Induction->getType() && Step &&
-      !ValuesNotWidened->count(IV))
-    return createVectorIntInductionPHI(ID, Entry, TruncType);
-
-  // The scalar value to broadcast. This will be derived from the canonical
-  // induction variable.
-  Value *ScalarIV = nullptr;
-
-  // Define the scalar induction variable and step values. If we were given a
-  // truncation type, truncate the canonical induction variable and constant
-  // step. Otherwise, derive these values from the induction descriptor.
-  if (TruncType) {
-    assert(Step && "Truncation requires constant integer step");
-    auto StepInt = cast<ConstantInt>(Step)->getSExtValue();
-    ScalarIV = Builder.CreateCast(Instruction::Trunc, Induction, TruncType);
-    Step = ConstantInt::getSigned(TruncType, StepInt);
-  } else {
-    ScalarIV = Induction;
-    auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
-    if (IV != OldInduction) {
-      ScalarIV = Builder.CreateSExtOrTrunc(ScalarIV, IV->getType());
-      ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
-      ScalarIV->setName("offset.idx");
-    }
-    if (!Step) {
-      SCEVExpander Exp(*PSE.getSE(), DL, "induction");
-      Step = Exp.expandCodeFor(ID.getStep(), ID.getStep()->getType(),
-                               &*Builder.GetInsertPoint());
+      !shouldScalarizeInstruction(EntryVal)) {
+    createVectorIntInductionPHI(ID, EntryVal);
+    VectorizedIV = true;
+  }
+
+  // If we haven't yet vectorized the induction variable, or if we will create
+  // a scalar one, we need to define the scalar induction variable and step
+  // values. If we were given a truncation type, truncate the canonical
+  // induction variable and constant step. Otherwise, derive these values from
+  // the induction descriptor.
+  if (!VectorizedIV || NeedsScalarIV) {
+    if (Trunc) {
+      auto *TruncType = cast<IntegerType>(Trunc->getType());
+      assert(Step && "Truncation requires constant integer step");
+      auto StepInt = cast<ConstantInt>(Step)->getSExtValue();
+      ScalarIV = Builder.CreateCast(Instruction::Trunc, Induction, TruncType);
+      Step = ConstantInt::getSigned(TruncType, StepInt);
+    } else {
+      ScalarIV = Induction;
+      auto &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
+      if (IV != OldInduction) {
+        ScalarIV = Builder.CreateSExtOrTrunc(ScalarIV, IV->getType());
+        ScalarIV = ID.transform(Builder, ScalarIV, PSE.getSE(), DL);
+        ScalarIV->setName("offset.idx");
+      }
+      if (!Step) {
+        SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+        Step = Exp.expandCodeFor(ID.getStep(), ID.getStep()->getType(),
+                                 &*Builder.GetInsertPoint());
+      }
     }
   }
 
-  // Splat the scalar induction variable, and build the necessary step vectors.
-  Value *Broadcasted = getBroadcastInstrs(ScalarIV);
-  for (unsigned Part = 0; Part < UF; ++Part)
-    Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
+  // If we haven't yet vectorized the induction variable, splat the scalar
+  // induction variable, and build the necessary step vectors.
+  if (!VectorizedIV) {
+    Value *Broadcasted = getBroadcastInstrs(ScalarIV);
+    VectorParts Entry(UF);
+    for (unsigned Part = 0; Part < UF; ++Part)
+      Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
+    VectorLoopValueMap.initVector(EntryVal, Entry);
+    if (Trunc)
+      addMetadata(Entry, Trunc);
+  }
 
   // If an induction variable is only used for counting loop iterations or
   // calculating addresses, it doesn't need to be widened. Create scalar steps
@@ -2047,38 +2328,64 @@ void InnerLoopVectorizer::widenIntInduction(PHINode *IV, VectorParts &Entry,
   // addition of the scalar steps will not increase the number of instructions
   // in the loop in the common case prior to InstCombine. We will be trading
   // one vector extract for each scalar step.
-  if (VF > 1 && ValuesNotWidened->count(IV)) {
-    auto *EntryVal = Trunc ? cast<Value>(Trunc) : IV;
+  if (NeedsScalarIV)
     buildScalarSteps(ScalarIV, Step, EntryVal);
-  }
 }
 
-Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
-                                          Value *Step) {
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, Value *Step,
+                                          Instruction::BinaryOps BinOp) {
+  // Create and check the types.
   assert(Val->getType()->isVectorTy() && "Must be a vector");
-  assert(Val->getType()->getScalarType()->isIntegerTy() &&
-         "Elem must be an integer");
-  assert(Step->getType() == Val->getType()->getScalarType() &&
-         "Step has wrong type");
-  // Create the types.
-  Type *ITy = Val->getType()->getScalarType();
-  VectorType *Ty = cast<VectorType>(Val->getType());
-  int VLen = Ty->getNumElements();
+  int VLen = Val->getType()->getVectorNumElements();
+
+  Type *STy = Val->getType()->getScalarType();
+  assert((STy->isIntegerTy() || STy->isFloatingPointTy()) &&
+         "Induction Step must be an integer or FP");
+  assert(Step->getType() == STy && "Step has wrong type");
+
   SmallVector<Constant *, 8> Indices;
 
+  if (STy->isIntegerTy()) {
+    // Create a vector of consecutive numbers from zero to VF.
+    for (int i = 0; i < VLen; ++i)
+      Indices.push_back(ConstantInt::get(STy, StartIdx + i));
+
+    // Add the consecutive indices to the vector value.
+    Constant *Cv = ConstantVector::get(Indices);
+    assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+    Step = Builder.CreateVectorSplat(VLen, Step);
+    assert(Step->getType() == Val->getType() && "Invalid step vec");
+    // FIXME: The newly created binary instructions should contain nsw/nuw flags,
+    // which can be found from the original scalar operations.
+    Step = Builder.CreateMul(Cv, Step);
+    return Builder.CreateAdd(Val, Step, "induction");
+  }
+
+  // Floating point induction.
+  assert((BinOp == Instruction::FAdd || BinOp == Instruction::FSub) &&
+         "Binary Opcode should be specified for FP induction");
   // Create a vector of consecutive numbers from zero to VF.
   for (int i = 0; i < VLen; ++i)
-    Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
+    Indices.push_back(ConstantFP::get(STy, (double)(StartIdx + i)));
 
   // Add the consecutive indices to the vector value.
   Constant *Cv = ConstantVector::get(Indices);
-  assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
+
   Step = Builder.CreateVectorSplat(VLen, Step);
-  assert(Step->getType() == Val->getType() && "Invalid step vec");
-  // FIXME: The newly created binary instructions should contain nsw/nuw flags,
-  // which can be found from the original scalar operations.
-  Step = Builder.CreateMul(Cv, Step);
-  return Builder.CreateAdd(Val, Step, "induction");
+
+  // Floating point operations had to be 'fast' to enable the induction.
+  FastMathFlags Flags;
+  Flags.setUnsafeAlgebra();
+
+  Value *MulOp = Builder.CreateFMul(Cv, Step);
+  if (isa<Instruction>(MulOp))
+    // Have to check, MulOp may be a constant
+    cast<Instruction>(MulOp)->setFastMathFlags(Flags);
+
+  Value *BOp = Builder.CreateBinOp(BinOp, Val, MulOp, "induction");
+  if (isa<Instruction>(BOp))
+    cast<Instruction>(BOp)->setFastMathFlags(Flags);
+  return BOp;
 }
 
 void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
@@ -2092,98 +2399,34 @@ void InnerLoopVectorizer::buildScalarSteps(Value *ScalarIV, Value *Step,
   assert(ScalarIVTy->isIntegerTy() && ScalarIVTy == Step->getType() &&
          "Val and Step should have the same integer type");
 
-  // Compute the scalar steps and save the results in ScalarIVMap.
-  for (unsigned Part = 0; Part < UF; ++Part)
-    for (unsigned I = 0; I < VF; ++I) {
-      auto *StartIdx = ConstantInt::get(ScalarIVTy, VF * Part + I);
+  // Determine the number of scalars we need to generate for each unroll
+  // iteration. If EntryVal is uniform, we only need to generate the first
+  // lane. Otherwise, we generate all VF values.
+  unsigned Lanes =
+      Legal->isUniformAfterVectorization(cast<Instruction>(EntryVal)) ? 1 : VF;
+
+  // Compute the scalar steps and save the results in VectorLoopValueMap.
+  ScalarParts Entry(UF);
+  for (unsigned Part = 0; Part < UF; ++Part) {
+    Entry[Part].resize(VF);
+    for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+      auto *StartIdx = ConstantInt::get(ScalarIVTy, VF * Part + Lane);
       auto *Mul = Builder.CreateMul(StartIdx, Step);
       auto *Add = Builder.CreateAdd(ScalarIV, Mul);
-      ScalarIVMap[EntryVal].push_back(Add);
+      Entry[Part][Lane] = Add;
     }
+  }
+  VectorLoopValueMap.initScalar(EntryVal, Entry);
 }
 
 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
-  assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr");
-  auto *SE = PSE.getSE();
-  // Make sure that the pointer does not point to structs.
-  if (Ptr->getType()->getPointerElementType()->isAggregateType())
-    return 0;
-
-  // If this value is a pointer induction variable, we know it is consecutive.
-  PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
-  if (Phi && Inductions.count(Phi)) {
-    InductionDescriptor II = Inductions[Phi];
-    return II.getConsecutiveDirection();
-  }
-
-  GetElementPtrInst *Gep = getGEPInstruction(Ptr);
-  if (!Gep)
-    return 0;
-
-  unsigned NumOperands = Gep->getNumOperands();
-  Value *GpPtr = Gep->getPointerOperand();
-  // If this GEP value is a consecutive pointer induction variable and all of
-  // the indices are constant, then we know it is consecutive.
-  Phi = dyn_cast<PHINode>(GpPtr);
-  if (Phi && Inductions.count(Phi)) {
-
-    // Make sure that the pointer does not point to structs.
-    PointerType *GepPtrType = cast<PointerType>(GpPtr->getType());
-    if (GepPtrType->getElementType()->isAggregateType())
-      return 0;
-
-    // Make sure that all of the index operands are loop invariant.
-    for (unsigned i = 1; i < NumOperands; ++i)
-      if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
-        return 0;
-
-    InductionDescriptor II = Inductions[Phi];
-    return II.getConsecutiveDirection();
-  }
-
-  unsigned InductionOperand = getGEPInductionOperand(Gep);
-
-  // Check that all of the gep indices are uniform except for our induction
-  // operand.
-  for (unsigned i = 0; i != NumOperands; ++i)
-    if (i != InductionOperand &&
-        !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop))
-      return 0;
 
-  // We can emit wide load/stores only if the last non-zero index is the
-  // induction variable.
-  const SCEV *Last = nullptr;
-  if (!getSymbolicStrides() || !getSymbolicStrides()->count(Gep))
-    Last = PSE.getSCEV(Gep->getOperand(InductionOperand));
-  else {
-    // Because of the multiplication by a stride we can have a s/zext cast.
-    // We are going to replace this stride by 1 so the cast is safe to ignore.
-    //
-    //  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-    //  %0 = trunc i64 %indvars.iv to i32
-    //  %mul = mul i32 %0, %Stride1
-    //  %idxprom = zext i32 %mul to i64  << Safe cast.
-    //  %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom
-    //
-    Last = replaceSymbolicStrideSCEV(PSE, *getSymbolicStrides(),
-                                     Gep->getOperand(InductionOperand), Gep);
-    if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(Last))
-      Last =
-          (C->getSCEVType() == scSignExtend || C->getSCEVType() == scZeroExtend)
-              ? C->getOperand()
-              : Last;
-  }
-  if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Last)) {
-    const SCEV *Step = AR->getStepRecurrence(*SE);
-
-    // The memory is consecutive because the last index is consecutive
-    // and all other indices are loop invariant.
-    if (Step->isOne())
-      return 1;
-    if (Step->isAllOnesValue())
-      return -1;
-  }
+  const ValueToValueMap &Strides = getSymbolicStrides() ? *getSymbolicStrides() :
+    ValueToValueMap();
 
+  int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
+  if (Stride == 1 || Stride == -1)
+    return Stride;
   return 0;
 }
 
@@ -2191,23 +2434,112 @@ bool LoopVectorizationLegality::isUniform(Value *V) {
   return LAI->isUniform(V);
 }
 
-InnerLoopVectorizer::VectorParts &
+const InnerLoopVectorizer::VectorParts &
 InnerLoopVectorizer::getVectorValue(Value *V) {
   assert(V != Induction && "The new induction variable should not be used.");
   assert(!V->getType()->isVectorTy() && "Can't widen a vector");
+  assert(!V->getType()->isVoidTy() && "Type does not produce a value");
 
   // If we have a stride that is replaced by one, do it here.
   if (Legal->hasStride(V))
     V = ConstantInt::get(V->getType(), 1);
 
   // If we have this scalar in the map, return it.
-  if (WidenMap.has(V))
-    return WidenMap.get(V);
+  if (VectorLoopValueMap.hasVector(V))
+    return VectorLoopValueMap.VectorMapStorage[V];
+
+  // If the value has not been vectorized, check if it has been scalarized
+  // instead. If it has been scalarized, and we actually need the value in
+  // vector form, we will construct the vector values on demand.
+  if (VectorLoopValueMap.hasScalar(V)) {
+
+    // Initialize a new vector map entry.
+    VectorParts Entry(UF);
+
+    // If we've scalarized a value, that value should be an instruction.
+    auto *I = cast<Instruction>(V);
+
+    // If we aren't vectorizing, we can just copy the scalar map values over to
+    // the vector map.
+    if (VF == 1) {
+      for (unsigned Part = 0; Part < UF; ++Part)
+        Entry[Part] = getScalarValue(V, Part, 0);
+      return VectorLoopValueMap.initVector(V, Entry);
+    }
+
+    // Get the last scalar instruction we generated for V. If the value is
+    // known to be uniform after vectorization, this corresponds to lane zero
+    // of the last unroll iteration. Otherwise, the last instruction is the one
+    // we created for the last vector lane of the last unroll iteration.
+    unsigned LastLane = Legal->isUniformAfterVectorization(I) ? 0 : VF - 1;
+    auto *LastInst = cast<Instruction>(getScalarValue(V, UF - 1, LastLane));
+
+    // Set the insert point after the last scalarized instruction. This ensures
+    // the insertelement sequence will directly follow the scalar definitions.
+    auto OldIP = Builder.saveIP();
+    auto NewIP = std::next(BasicBlock::iterator(LastInst));
+    Builder.SetInsertPoint(&*NewIP);
+
+    // However, if we are vectorizing, we need to construct the vector values.
+    // If the value is known to be uniform after vectorization, we can just
+    // broadcast the scalar value corresponding to lane zero for each unroll
+    // iteration. Otherwise, we construct the vector values using insertelement
+    // instructions. Since the resulting vectors are stored in
+    // VectorLoopValueMap, we will only generate the insertelements once.
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      Value *VectorValue = nullptr;
+      if (Legal->isUniformAfterVectorization(I)) {
+        VectorValue = getBroadcastInstrs(getScalarValue(V, Part, 0));
+      } else {
+        VectorValue = UndefValue::get(VectorType::get(V->getType(), VF));
+        for (unsigned Lane = 0; Lane < VF; ++Lane)
+          VectorValue = Builder.CreateInsertElement(
+              VectorValue, getScalarValue(V, Part, Lane),
+              Builder.getInt32(Lane));
+      }
+      Entry[Part] = VectorValue;
+    }
+    Builder.restoreIP(OldIP);
+    return VectorLoopValueMap.initVector(V, Entry);
+  }
 
   // If this scalar is unknown, assume that it is a constant or that it is
   // loop invariant. Broadcast V and save the value for future uses.
   Value *B = getBroadcastInstrs(V);
-  return WidenMap.splat(V, B);
+  return VectorLoopValueMap.initVector(V, VectorParts(UF, B));
+}
+
+Value *InnerLoopVectorizer::getScalarValue(Value *V, unsigned Part,
+                                           unsigned Lane) {
+
+  // If the value is not an instruction contained in the loop, it should
+  // already be scalar.
+  if (OrigLoop->isLoopInvariant(V))
+    return V;
+
+  assert(Lane > 0 ? !Legal->isUniformAfterVectorization(cast<Instruction>(V))
+                  : true && "Uniform values only have lane zero");
+
+  // If the value from the original loop has not been vectorized, it is
+  // represented by UF x VF scalar values in the new loop. Return the requested
+  // scalar value.
+  if (VectorLoopValueMap.hasScalar(V))
+    return VectorLoopValueMap.ScalarMapStorage[V][Part][Lane];
+
+  // If the value has not been scalarized, get its entry in VectorLoopValueMap
+  // for the given unroll part. If this entry is not a vector type (i.e., the
+  // vectorization factor is one), there is no need to generate an
+  // extractelement instruction.
+  auto *U = getVectorValue(V)[Part];
+  if (!U->getType()->isVectorTy()) {
+    assert(VF == 1 && "Value not scalarized has non-vector type");
+    return U;
+  }
+
+  // Otherwise, the value from the original loop has been vectorized and is
+  // represented by UF vector values. Extract and return the requested scalar
+  // value from the appropriate vector lane.
+  return Builder.CreateExtractElement(U, Builder.getInt32(Lane));
 }
 
 Value *InnerLoopVectorizer::reverseVector(Value *Vec) {
@@ -2355,7 +2687,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
 
   LoadInst *LI = dyn_cast<LoadInst>(Instr);
   StoreInst *SI = dyn_cast<StoreInst>(Instr);
-  Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+  Value *Ptr = getPointerOperand(Instr);
 
   // Prepare for the vector type of the interleaved load/store.
   Type *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
@@ -2365,15 +2697,20 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
 
   // Prepare for the new pointers.
   setDebugLocFromInst(Builder, Ptr);
-  VectorParts &PtrParts = getVectorValue(Ptr);
   SmallVector<Value *, 2> NewPtrs;
   unsigned Index = Group->getIndex(Instr);
+
+  // If the group is reverse, adjust the index to refer to the last vector lane
+  // instead of the first. We adjust the index from the first vector lane,
+  // rather than directly getting the pointer for lane VF - 1, because the
+  // pointer operand of the interleaved access is supposed to be uniform. For
+  // uniform instructions, we're only required to generate a value for the
+  // first vector lane in each unroll iteration.
+  if (Group->isReverse())
+    Index += (VF - 1) * Group->getFactor();
+
   for (unsigned Part = 0; Part < UF; Part++) {
-    // Extract the pointer for current instruction from the pointer vector. A
-    // reverse access uses the pointer in the last lane.
-    Value *NewPtr = Builder.CreateExtractElement(
-        PtrParts[Part],
-        Group->isReverse() ? Builder.getInt32(VF - 1) : Builder.getInt32(0));
+    Value *NewPtr = getScalarValue(Ptr, Part, 0);
 
     // Notice current instruction could be any index. Need to adjust the address
     // to the member of index 0.
@@ -2397,20 +2734,30 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
 
   // Vectorize the interleaved load group.
   if (LI) {
+
+    // For each unroll part, create a wide load for the group.
+    SmallVector<Value *, 2> NewLoads;
     for (unsigned Part = 0; Part < UF; Part++) {
-      Instruction *NewLoadInstr = Builder.CreateAlignedLoad(
+      auto *NewLoad = Builder.CreateAlignedLoad(
           NewPtrs[Part], Group->getAlignment(), "wide.vec");
+      addMetadata(NewLoad, Instr);
+      NewLoads.push_back(NewLoad);
+    }
 
-      for (unsigned i = 0; i < InterleaveFactor; i++) {
-        Instruction *Member = Group->getMember(i);
+    // For each member in the group, shuffle out the appropriate data from the
+    // wide loads.
+    for (unsigned I = 0; I < InterleaveFactor; ++I) {
+      Instruction *Member = Group->getMember(I);
 
-        // Skip the gaps in the group.
-        if (!Member)
-          continue;
+      // Skip the gaps in the group.
+      if (!Member)
+        continue;
 
-        Constant *StrideMask = getStridedMask(Builder, i, InterleaveFactor, VF);
+      VectorParts Entry(UF);
+      Constant *StrideMask = getStridedMask(Builder, I, InterleaveFactor, VF);
+      for (unsigned Part = 0; Part < UF; Part++) {
         Value *StridedVec = Builder.CreateShuffleVector(
-            NewLoadInstr, UndefVec, StrideMask, "strided.vec");
+            NewLoads[Part], UndefVec, StrideMask, "strided.vec");
 
         // If this member has different type, cast the result type.
         if (Member->getType() != ScalarTy) {
@@ -2418,12 +2765,10 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(Instruction *Instr) {
           StridedVec = Builder.CreateBitOrPointerCast(StridedVec, OtherVTy);
         }
 
-        VectorParts &Entry = WidenMap.get(Member);
         Entry[Part] =
             Group->isReverse() ? reverseVector(StridedVec) : StridedVec;
       }
-
-      addMetadata(NewLoadInstr, Instr);
+      VectorLoopValueMap.initVector(Member, Entry);
     }
     return;
   }
@@ -2479,7 +2824,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
 
   Type *ScalarDataTy = LI ? LI->getType() : SI->getValueOperand()->getType();
   Type *DataTy = VectorType::get(ScalarDataTy, VF);
-  Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
+  Value *Ptr = getPointerOperand(Instr);
   unsigned Alignment = LI ? LI->getAlignment() : SI->getAlignment();
   // An alignment of 0 means target abi alignment. We need to use the scalar's
   // target abi alignment in such a case.
@@ -2487,93 +2832,57 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   if (!Alignment)
     Alignment = DL.getABITypeAlignment(ScalarDataTy);
   unsigned AddressSpace = Ptr->getType()->getPointerAddressSpace();
-  uint64_t ScalarAllocatedSize = DL.getTypeAllocSize(ScalarDataTy);
-  uint64_t VectorElementSize = DL.getTypeStoreSize(DataTy) / VF;
-
-  if (SI && Legal->blockNeedsPredication(SI->getParent()) &&
-      !Legal->isMaskRequired(SI))
-    return scalarizeInstruction(Instr, true);
 
-  if (ScalarAllocatedSize != VectorElementSize)
-    return scalarizeInstruction(Instr);
+  // Scalarize the memory instruction if necessary.
+  if (Legal->memoryInstructionMustBeScalarized(Instr, VF))
+    return scalarizeInstruction(Instr, Legal->isScalarWithPredication(Instr));
 
-  // If the pointer is loop invariant scalarize the load.
-  if (LI && Legal->isUniform(Ptr))
-    return scalarizeInstruction(Instr);
-
-  // If the pointer is non-consecutive and gather/scatter is not supported
-  // scalarize the instruction.
+  // Determine if the pointer operand of the access is either consecutive or
+  // reverse consecutive.
   int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
   bool Reverse = ConsecutiveStride < 0;
-  bool CreateGatherScatter =
-      !ConsecutiveStride && ((LI && Legal->isLegalMaskedGather(ScalarDataTy)) ||
-                             (SI && Legal->isLegalMaskedScatter(ScalarDataTy)));
 
-  if (!ConsecutiveStride && !CreateGatherScatter)
-    return scalarizeInstruction(Instr);
+  // Determine if either a gather or scatter operation is legal.
+  bool CreateGatherScatter =
+      !ConsecutiveStride && Legal->isLegalGatherOrScatter(Instr);
 
-  Constant *Zero = Builder.getInt32(0);
-  VectorParts &Entry = WidenMap.get(Instr);
   VectorParts VectorGep;
 
   // Handle consecutive loads/stores.
   GetElementPtrInst *Gep = getGEPInstruction(Ptr);
   if (ConsecutiveStride) {
-    if (Gep && Legal->isInductionVariable(Gep->getPointerOperand())) {
-      setDebugLocFromInst(Builder, Gep);
-      Value *PtrOperand = Gep->getPointerOperand();
-      Value *FirstBasePtr = getVectorValue(PtrOperand)[0];
-      FirstBasePtr = Builder.CreateExtractElement(FirstBasePtr, Zero);
-
-      // Create the new GEP with the new induction variable.
-      GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-      Gep2->setOperand(0, FirstBasePtr);
-      Gep2->setName("gep.indvar.base");
-      Ptr = Builder.Insert(Gep2);
-    } else if (Gep) {
-      setDebugLocFromInst(Builder, Gep);
-      assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()),
-                                          OrigLoop) &&
-             "Base ptr must be invariant");
-      // The last index does not have to be the induction. It can be
-      // consecutive and be a function of the index. For example A[I+1];
+    if (Gep) {
       unsigned NumOperands = Gep->getNumOperands();
-      unsigned InductionOperand = getGEPInductionOperand(Gep);
-      // Create the new GEP with the new induction variable.
+#ifndef NDEBUG
+      // The original GEP that identified as a consecutive memory access
+      // should have only one loop-variant operand.
+      unsigned NumOfLoopVariantOps = 0;
+      for (unsigned i = 0; i < NumOperands; ++i)
+        if (!PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
+                                          OrigLoop))
+          NumOfLoopVariantOps++;
+      assert(NumOfLoopVariantOps == 1 &&
+             "Consecutive GEP should have only one loop-variant operand");
+#endif
       GetElementPtrInst *Gep2 = cast<GetElementPtrInst>(Gep->clone());
-
-      for (unsigned i = 0; i < NumOperands; ++i) {
-        Value *GepOperand = Gep->getOperand(i);
-        Instruction *GepOperandInst = dyn_cast<Instruction>(GepOperand);
-
-        // Update last index or loop invariant instruction anchored in loop.
-        if (i == InductionOperand ||
-            (GepOperandInst && OrigLoop->contains(GepOperandInst))) {
-          assert((i == InductionOperand ||
-                  PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst),
-                                               OrigLoop)) &&
-                 "Must be last index or loop invariant");
-
-          VectorParts &GEPParts = getVectorValue(GepOperand);
-
-          // If GepOperand is an induction variable, and there's a scalarized
-          // version of it available, use it. Otherwise, we will need to create
-          // an extractelement instruction.
-          Value *Index = ScalarIVMap.count(GepOperand)
-                             ? ScalarIVMap[GepOperand][0]
-                             : Builder.CreateExtractElement(GEPParts[0], Zero);
-
-          Gep2->setOperand(i, Index);
-          Gep2->setName("gep.indvar.idx");
-        }
-      }
+      Gep2->setName("gep.indvar");
+
+      // A new GEP is created for a 0-lane value of the first unroll iteration.
+      // The GEPs for the rest of the unroll iterations are computed below as an
+      // offset from this GEP.
+      for (unsigned i = 0; i < NumOperands; ++i)
+        // We can apply getScalarValue() for all GEP indices. It returns an
+        // original value for loop-invariant operand and 0-lane for consecutive
+        // operand.
+        Gep2->setOperand(i, getScalarValue(Gep->getOperand(i),
+                                           0, /* First unroll iteration */
+                                           0  /* 0-lane of the vector */ ));
+      setDebugLocFromInst(Builder, Gep);
       Ptr = Builder.Insert(Gep2);
+
     } else { // No GEP
-      // Use the induction element ptr.
-      assert(isa<PHINode>(Ptr) && "Invalid induction ptr");
       setDebugLocFromInst(Builder, Ptr);
-      VectorParts &PtrVal = getVectorValue(Ptr);
-      Ptr = Builder.CreateExtractElement(PtrVal[0], Zero);
+      Ptr = getScalarValue(Ptr, 0, 0);
     }
   } else {
     // At this point we should vector version of GEP for Gather or Scatter
@@ -2660,6 +2969,7 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
   // Handle loads.
   assert(LI && "Must have a load instruction");
   setDebugLocFromInst(Builder, LI);
+  VectorParts Entry(UF);
   for (unsigned Part = 0; Part < UF; ++Part) {
     Instruction *NewLI;
     if (CreateGatherScatter) {
@@ -2692,70 +3002,45 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) {
     }
     addMetadata(NewLI, LI);
   }
+  VectorLoopValueMap.initVector(Instr, Entry);
 }
 
 void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
-                                               bool IfPredicateStore) {
+                                               bool IfPredicateInstr) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
+  DEBUG(dbgs() << "LV: Scalarizing"
+               << (IfPredicateInstr ? " and predicating:" : ":") << *Instr
+               << '\n');
   // Holds vector parameters or scalars, in case of uniform vals.
   SmallVector<VectorParts, 4> Params;
 
   setDebugLocFromInst(Builder, Instr);
 
-  // Find all of the vectorized parameters.
-  for (Value *SrcOp : Instr->operands()) {
-    // If we are accessing the old induction variable, use the new one.
-    if (SrcOp == OldInduction) {
-      Params.push_back(getVectorValue(SrcOp));
-      continue;
-    }
-
-    // Try using previously calculated values.
-    auto *SrcInst = dyn_cast<Instruction>(SrcOp);
-
-    // If the src is an instruction that appeared earlier in the basic block,
-    // then it should already be vectorized.
-    if (SrcInst && OrigLoop->contains(SrcInst)) {
-      assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
-      // The parameter is a vector value from earlier.
-      Params.push_back(WidenMap.get(SrcInst));
-    } else {
-      // The parameter is a scalar from outside the loop. Maybe even a constant.
-      VectorParts Scalars;
-      Scalars.append(UF, SrcOp);
-      Params.push_back(Scalars);
-    }
-  }
-
-  assert(Params.size() == Instr->getNumOperands() &&
-         "Invalid number of operands");
-
   // Does this instruction return a value ?
   bool IsVoidRetTy = Instr->getType()->isVoidTy();
 
-  Value *UndefVec =
-      IsVoidRetTy ? nullptr
-                  : UndefValue::get(VectorType::get(Instr->getType(), VF));
-  // Create a new entry in the WidenMap and initialize it to Undef or Null.
-  VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+  // Initialize a new scalar map entry.
+  ScalarParts Entry(UF);
 
   VectorParts Cond;
-  if (IfPredicateStore) {
-    assert(Instr->getParent()->getSinglePredecessor() &&
-           "Only support single predecessor blocks");
-    Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
-                          Instr->getParent());
-  }
+  if (IfPredicateInstr)
+    Cond = createBlockInMask(Instr->getParent());
+
+  // Determine the number of scalars we need to generate for each unroll
+  // iteration. If the instruction is uniform, we only need to generate the
+  // first lane. Otherwise, we generate all VF values.
+  unsigned Lanes = Legal->isUniformAfterVectorization(Instr) ? 1 : VF;
 
   // For each vector unroll 'part':
   for (unsigned Part = 0; Part < UF; ++Part) {
+    Entry[Part].resize(VF);
     // For each scalar that we create:
-    for (unsigned Width = 0; Width < VF; ++Width) {
+    for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
 
       // Start if-block.
       Value *Cmp = nullptr;
-      if (IfPredicateStore) {
-        Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Width));
+      if (IfPredicateInstr) {
+        Cmp = Builder.CreateExtractElement(Cond[Part], Builder.getInt32(Lane));
         Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Cmp,
                                  ConstantInt::get(Cmp->getType(), 1));
       }
@@ -2763,18 +3048,11 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
       Instruction *Cloned = Instr->clone();
       if (!IsVoidRetTy)
         Cloned->setName(Instr->getName() + ".cloned");
-      // Replace the operands of the cloned instructions with extracted scalars.
-      for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
 
-        // If the operand is an induction variable, and there's a scalarized
-        // version of it available, use it. Otherwise, we will need to create
-        // an extractelement instruction if vectorizing.
-        auto *NewOp = Params[op][Part];
-        auto *ScalarOp = Instr->getOperand(op);
-        if (ScalarIVMap.count(ScalarOp))
-          NewOp = ScalarIVMap[ScalarOp][VF * Part + Width];
-        else if (NewOp->getType()->isVectorTy())
-          NewOp = Builder.CreateExtractElement(NewOp, Builder.getInt32(Width));
+      // Replace the operands of the cloned instructions with their scalar
+      // equivalents in the new loop.
+      for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
+        auto *NewOp = getScalarValue(Instr->getOperand(op), Part, Lane);
         Cloned->setOperand(op, NewOp);
       }
       addNewMetadata(Cloned, Instr);
@@ -2782,22 +3060,20 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
       // Place the cloned scalar in the new loop.
       Builder.Insert(Cloned);
 
+      // Add the cloned scalar to the scalar map entry.
+      Entry[Part][Lane] = Cloned;
+
       // If we just cloned a new assumption, add it the assumption cache.
       if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
         if (II->getIntrinsicID() == Intrinsic::assume)
           AC->registerAssumption(II);
 
-      // If the original scalar returns a value we need to place it in a vector
-      // so that future users will be able to use it.
-      if (!IsVoidRetTy)
-        VecResults[Part] = Builder.CreateInsertElement(VecResults[Part], Cloned,
-                                                       Builder.getInt32(Width));
       // End if-block.
-      if (IfPredicateStore)
-        PredicatedStores.push_back(
-            std::make_pair(cast<StoreInst>(Cloned), Cmp));
+      if (IfPredicateInstr)
+        PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
     }
   }
+  VectorLoopValueMap.initScalar(Instr, Entry);
 }
 
 PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
@@ -2811,10 +3087,12 @@ PHINode *InnerLoopVectorizer::createInductionVariable(Loop *L, Value *Start,
     Latch = Header;
 
   IRBuilder<> Builder(&*Header->getFirstInsertionPt());
-  setDebugLocFromInst(Builder, getDebugLocFromInstOrOperands(OldInduction));
+  Instruction *OldInst = getDebugLocFromInstOrOperands(OldInduction);
+  setDebugLocFromInst(Builder, OldInst);
   auto *Induction = Builder.CreatePHI(Start->getType(), 2, "index");
 
   Builder.SetInsertPoint(Latch->getTerminator());
+  setDebugLocFromInst(Builder, OldInst);
 
   // Create i+1 and fill the PHINode.
   Value *Next = Builder.CreateAdd(Induction, Step, "index.next");
@@ -3152,8 +3430,10 @@ void InnerLoopVectorizer::createEmptyLoop() {
       EndValue = CountRoundDown;
     } else {
       IRBuilder<> B(LoopBypassBlocks.back()->getTerminator());
-      Value *CRD = B.CreateSExtOrTrunc(CountRoundDown,
-                                       II.getStep()->getType(), "cast.crd");
+      Type *StepType = II.getStep()->getType();
+      Instruction::CastOps CastOp =
+        CastInst::getCastOpcode(CountRoundDown, true, StepType, true);
+      Value *CRD = B.CreateCast(CastOp, CountRoundDown, StepType, "cast.crd");
       const DataLayout &DL = OrigLoop->getHeader()->getModule()->getDataLayout();
       EndValue = II.transform(B, CRD, PSE.getSE(), DL);
       EndValue->setName("ind.end");
@@ -3201,7 +3481,7 @@ void InnerLoopVectorizer::createEmptyLoop() {
   if (MDNode *LID = OrigLoop->getLoopID())
     Lp->setLoopID(LID);
 
-  LoopVectorizeHints Hints(Lp, true);
+  LoopVectorizeHints Hints(Lp, true, *ORE);
   Hints.setAlreadyVectorized();
 }
 
@@ -3324,8 +3604,9 @@ static Value *addFastMathFlag(Value *V) {
   return V;
 }
 
-/// Estimate the overhead of scalarizing a value. Insert and Extract are set if
-/// the result needs to be inserted and/or extracted from vectors.
+/// \brief Estimate the overhead of scalarizing a value based on its type.
+/// Insert and Extract are set if the result needs to be inserted and/or
+/// extracted from vectors.
 static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
                                          const TargetTransformInfo &TTI) {
   if (Ty->isVoidTy())
@@ -3335,15 +3616,46 @@ static unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract,
   unsigned Cost = 0;
 
   for (unsigned I = 0, E = Ty->getVectorNumElements(); I < E; ++I) {
-    if (Insert)
-      Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
     if (Extract)
       Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, Ty, I);
+    if (Insert)
+      Cost += TTI.getVectorInstrCost(Instruction::InsertElement, Ty, I);
   }
 
   return Cost;
 }
 
+/// \brief Estimate the overhead of scalarizing an Instruction based on the
+/// types of its operands and return value.
+static unsigned getScalarizationOverhead(SmallVectorImpl<Type *> &OpTys,
+                                         Type *RetTy,
+                                         const TargetTransformInfo &TTI) {
+  unsigned ScalarizationCost =
+      getScalarizationOverhead(RetTy, true, false, TTI);
+
+  for (Type *Ty : OpTys)
+    ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
+
+  return ScalarizationCost;
+}
+
+/// \brief Estimate the overhead of scalarizing an instruction. This is a
+/// convenience wrapper for the type-based getScalarizationOverhead API.
+static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
+                                         const TargetTransformInfo &TTI) {
+  if (VF == 1)
+    return 0;
+
+  Type *RetTy = ToVectorTy(I->getType(), VF);
+
+  SmallVector<Type *, 4> OpTys;
+  unsigned OperandsNum = I->getNumOperands();
+  for (unsigned OpInd = 0; OpInd < OperandsNum; ++OpInd)
+    OpTys.push_back(ToVectorTy(I->getOperand(OpInd)->getType(), VF));
+
+  return getScalarizationOverhead(OpTys, RetTy, TTI);
+}
+
 // Estimate cost of a call instruction CI if it were vectorized with factor VF.
 // Return the cost of the instruction, including scalarization overhead if it's
 // needed. The flag NeedToScalarize shows if the call needs to be scalarized -
@@ -3374,10 +3686,7 @@ static unsigned getVectorCallCost(CallInst *CI, unsigned VF,
 
   // Compute costs of unpacking argument values for the scalar calls and
   // packing the return values to a vector.
-  unsigned ScalarizationCost =
-      getScalarizationOverhead(RetTy, true, false, TTI);
-  for (Type *Ty : Tys)
-    ScalarizationCost += getScalarizationOverhead(Ty, false, true, TTI);
+  unsigned ScalarizationCost = getScalarizationOverhead(Tys, RetTy, TTI);
 
   unsigned Cost = ScalarCallCost * VF + ScalarizationCost;
 
@@ -3434,8 +3743,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
   // later and will remove any ext/trunc pairs.
   //
   SmallPtrSet<Value *, 4> Erased;
-  for (const auto &KV : *MinBWs) {
-    VectorParts &Parts = WidenMap.get(KV.first);
+  for (const auto &KV : Cost->getMinimalBitwidths()) {
+    // If the value wasn't vectorized, we must maintain the original scalar
+    // type. The absence of the value from VectorLoopValueMap indicates that it
+    // wasn't vectorized.
+    if (!VectorLoopValueMap.hasVector(KV.first))
+      continue;
+    VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
     for (Value *&I : Parts) {
       if (Erased.count(I) || I->use_empty() || !isa<Instruction>(I))
         continue;
@@ -3526,8 +3840,13 @@ void InnerLoopVectorizer::truncateToMinimalBitwidths() {
   }
 
   // We'll have created a bunch of ZExts that are now parentless. Clean up.
-  for (const auto &KV : *MinBWs) {
-    VectorParts &Parts = WidenMap.get(KV.first);
+  for (const auto &KV : Cost->getMinimalBitwidths()) {
+    // If the value wasn't vectorized, we must maintain the original scalar
+    // type. The absence of the value from VectorLoopValueMap indicates that it
+    // wasn't vectorized.
+    if (!VectorLoopValueMap.hasVector(KV.first))
+      continue;
+    VectorParts &Parts = VectorLoopValueMap.getVector(KV.first);
     for (Value *&I : Parts) {
       ZExtInst *Inst = dyn_cast<ZExtInst>(I);
       if (Inst && Inst->use_empty()) {
@@ -3558,6 +3877,11 @@ void InnerLoopVectorizer::vectorizeLoop() {
   // are vectorized, so we can use them to construct the PHI.
   PhiVector PHIsToFix;
 
+  // Collect instructions from the original loop that will become trivially
+  // dead in the vectorized loop. We don't need to vectorize these
+  // instructions.
+  collectTriviallyDeadInstructions();
+
   // Scan the loop in a topological order to ensure that defs are vectorized
   // before users.
   LoopBlocksDFS DFS(OrigLoop);
@@ -3605,7 +3929,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
     Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator());
 
     // This is the vector-clone of the value that leaves the loop.
-    VectorParts &VectorExit = getVectorValue(LoopExitInst);
+    const VectorParts &VectorExit = getVectorValue(LoopExitInst);
     Type *VecTy = VectorExit[0]->getType();
 
     // Find the reduction identity variable. Zero for addition, or, xor,
@@ -3644,10 +3968,10 @@ void InnerLoopVectorizer::vectorizeLoop() {
 
     // Reductions do not have to start at zero. They can start with
     // any loop invariant values.
-    VectorParts &VecRdxPhi = WidenMap.get(Phi);
+    const VectorParts &VecRdxPhi = getVectorValue(Phi);
     BasicBlock *Latch = OrigLoop->getLoopLatch();
     Value *LoopVal = Phi->getIncomingValueForBlock(Latch);
-    VectorParts &Val = getVectorValue(LoopVal);
+    const VectorParts &Val = getVectorValue(LoopVal);
     for (unsigned part = 0; part < UF; ++part) {
       // Make sure to add the reduction stat value only to the
       // first unroll part.
@@ -3664,7 +3988,7 @@ void InnerLoopVectorizer::vectorizeLoop() {
     // instructions.
     Builder.SetInsertPoint(&*LoopMiddleBlock->getFirstInsertionPt());
 
-    VectorParts RdxParts = getVectorValue(LoopExitInst);
+    VectorParts &RdxParts = VectorLoopValueMap.getVector(LoopExitInst);
     setDebugLocFromInst(Builder, LoopExitInst);
 
     // If the vector reduction can be performed in a smaller type, we truncate
@@ -3797,17 +4121,8 @@ void InnerLoopVectorizer::vectorizeLoop() {
   // Make sure DomTree is updated.
   updateAnalysis();
 
-  // Predicate any stores.
-  for (auto KV : PredicatedStores) {
-    BasicBlock::iterator I(KV.first);
-    auto *BB = SplitBlock(I->getParent(), &*std::next(I), DT, LI);
-    auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
-                                        /*BranchWeights=*/nullptr, DT, LI);
-    I->moveBefore(T);
-    I->getParent()->setName("pred.store.if");
-    BB->setName("pred.store.continue");
-  }
-  DEBUG(DT->verifyDomTree());
+  predicateInstructions();
+
   // Remove redundant induction instructions.
   cse(LoopVectorBody);
 }
@@ -3882,7 +4197,7 @@ void InnerLoopVectorizer::fixFirstOrderRecurrence(PHINode *Phi) {
 
   // We constructed a temporary phi node in the first phase of vectorization.
   // This phi node will eventually be deleted.
-  auto &PhiParts = getVectorValue(Phi);
+  VectorParts &PhiParts = VectorLoopValueMap.getVector(Phi);
   Builder.SetInsertPoint(cast<Instruction>(PhiParts[0]));
 
   // Create a phi node for the new recurrence. The current value will either be
@@ -3974,10 +4289,217 @@ void InnerLoopVectorizer::fixLCSSAPHIs() {
   }
 }
 
+void InnerLoopVectorizer::collectTriviallyDeadInstructions() {
+  BasicBlock *Latch = OrigLoop->getLoopLatch();
+
+  // We create new control-flow for the vectorized loop, so the original
+  // condition will be dead after vectorization if it's only used by the
+  // branch.
+  auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
+  if (Cmp && Cmp->hasOneUse())
+    DeadInstructions.insert(Cmp);
+
+  // We create new "steps" for induction variable updates to which the original
+  // induction variables map. An original update instruction will be dead if
+  // all its users except the induction variable are dead.
+  for (auto &Induction : *Legal->getInductionVars()) {
+    PHINode *Ind = Induction.first;
+    auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+    if (all_of(IndUpdate->users(), [&](User *U) -> bool {
+          return U == Ind || DeadInstructions.count(cast<Instruction>(U));
+        }))
+      DeadInstructions.insert(IndUpdate);
+  }
+}
+
+void InnerLoopVectorizer::sinkScalarOperands(Instruction *PredInst) {
+
+  // The basic block and loop containing the predicated instruction.
+  auto *PredBB = PredInst->getParent();
+  auto *VectorLoop = LI->getLoopFor(PredBB);
+
+  // Initialize a worklist with the operands of the predicated instruction.
+  SetVector<Value *> Worklist(PredInst->op_begin(), PredInst->op_end());
+
+  // Holds instructions that we need to analyze again. An instruction may be
+  // reanalyzed if we don't yet know if we can sink it or not.
+  SmallVector<Instruction *, 8> InstsToReanalyze;
+
+  // Returns true if a given use occurs in the predicated block. Phi nodes use
+  // their operands in their corresponding predecessor blocks.
+  auto isBlockOfUsePredicated = [&](Use &U) -> bool {
+    auto *I = cast<Instruction>(U.getUser());
+    BasicBlock *BB = I->getParent();
+    if (auto *Phi = dyn_cast<PHINode>(I))
+      BB = Phi->getIncomingBlock(
+          PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
+    return BB == PredBB;
+  };
+
+  // Iteratively sink the scalarized operands of the predicated instruction
+  // into the block we created for it. When an instruction is sunk, it's
+  // operands are then added to the worklist. The algorithm ends after one pass
+  // through the worklist doesn't sink a single instruction.
+  bool Changed;
+  do {
+
+    // Add the instructions that need to be reanalyzed to the worklist, and
+    // reset the changed indicator.
+    Worklist.insert(InstsToReanalyze.begin(), InstsToReanalyze.end());
+    InstsToReanalyze.clear();
+    Changed = false;
+
+    while (!Worklist.empty()) {
+      auto *I = dyn_cast<Instruction>(Worklist.pop_back_val());
+
+      // We can't sink an instruction if it is a phi node, is already in the
+      // predicated block, is not in the loop, or may have side effects.
+      if (!I || isa<PHINode>(I) || I->getParent() == PredBB ||
+          !VectorLoop->contains(I) || I->mayHaveSideEffects())
+        continue;
+
+      // It's legal to sink the instruction if all its uses occur in the
+      // predicated block. Otherwise, there's nothing to do yet, and we may
+      // need to reanalyze the instruction.
+      if (!all_of(I->uses(), isBlockOfUsePredicated)) {
+        InstsToReanalyze.push_back(I);
+        continue;
+      }
+
+      // Move the instruction to the beginning of the predicated block, and add
+      // it's operands to the worklist.
+      I->moveBefore(&*PredBB->getFirstInsertionPt());
+      Worklist.insert(I->op_begin(), I->op_end());
+
+      // The sinking may have enabled other instructions to be sunk, so we will
+      // need to iterate.
+      Changed = true;
+    }
+  } while (Changed);
+}
+
+void InnerLoopVectorizer::predicateInstructions() {
+
+  // For each instruction I marked for predication on value C, split I into its
+  // own basic block to form an if-then construct over C. Since I may be fed by
+  // an extractelement instruction or other scalar operand, we try to
+  // iteratively sink its scalar operands into the predicated block. If I feeds
+  // an insertelement instruction, we try to move this instruction into the
+  // predicated block as well. For non-void types, a phi node will be created
+  // for the resulting value (either vector or scalar).
+  //
+  // So for some predicated instruction, e.g. the conditional sdiv in:
+  //
+  // for.body:
+  //  ...
+  //  %add = add nsw i32 %mul, %0
+  //  %cmp5 = icmp sgt i32 %2, 7
+  //  br i1 %cmp5, label %if.then, label %if.end
+  //
+  // if.then:
+  //  %div = sdiv i32 %0, %1
+  //  br label %if.end
+  //
+  // if.end:
+  //  %x.0 = phi i32 [ %div, %if.then ], [ %add, %for.body ]
+  //
+  // the sdiv at this point is scalarized and if-converted using a select.
+  // The inactive elements in the vector are not used, but the predicated
+  // instruction is still executed for all vector elements, essentially:
+  //
+  // vector.body:
+  //  ...
+  //  %17 = add nsw <2 x i32> %16, %wide.load
+  //  %29 = extractelement <2 x i32> %wide.load, i32 0
+  //  %30 = extractelement <2 x i32> %wide.load51, i32 0
+  //  %31 = sdiv i32 %29, %30
+  //  %32 = insertelement <2 x i32> undef, i32 %31, i32 0
+  //  %35 = extractelement <2 x i32> %wide.load, i32 1
+  //  %36 = extractelement <2 x i32> %wide.load51, i32 1
+  //  %37 = sdiv i32 %35, %36
+  //  %38 = insertelement <2 x i32> %32, i32 %37, i32 1
+  //  %predphi = select <2 x i1> %26, <2 x i32> %38, <2 x i32> %17
+  //
+  // Predication will now re-introduce the original control flow to avoid false
+  // side-effects by the sdiv instructions on the inactive elements, yielding
+  // (after cleanup):
+  //
+  // vector.body:
+  //  ...
+  //  %5 = add nsw <2 x i32> %4, %wide.load
+  //  %8 = icmp sgt <2 x i32> %wide.load52, <i32 7, i32 7>
+  //  %9 = extractelement <2 x i1> %8, i32 0
+  //  br i1 %9, label %pred.sdiv.if, label %pred.sdiv.continue
+  //
+  // pred.sdiv.if:
+  //  %10 = extractelement <2 x i32> %wide.load, i32 0
+  //  %11 = extractelement <2 x i32> %wide.load51, i32 0
+  //  %12 = sdiv i32 %10, %11
+  //  %13 = insertelement <2 x i32> undef, i32 %12, i32 0
+  //  br label %pred.sdiv.continue
+  //
+  // pred.sdiv.continue:
+  //  %14 = phi <2 x i32> [ undef, %vector.body ], [ %13, %pred.sdiv.if ]
+  //  %15 = extractelement <2 x i1> %8, i32 1
+  //  br i1 %15, label %pred.sdiv.if54, label %pred.sdiv.continue55
+  //
+  // pred.sdiv.if54:
+  //  %16 = extractelement <2 x i32> %wide.load, i32 1
+  //  %17 = extractelement <2 x i32> %wide.load51, i32 1
+  //  %18 = sdiv i32 %16, %17
+  //  %19 = insertelement <2 x i32> %14, i32 %18, i32 1
+  //  br label %pred.sdiv.continue55
+  //
+  // pred.sdiv.continue55:
+  //  %20 = phi <2 x i32> [ %14, %pred.sdiv.continue ], [ %19, %pred.sdiv.if54 ]
+  //  %predphi = select <2 x i1> %8, <2 x i32> %20, <2 x i32> %5
+
+  for (auto KV : PredicatedInstructions) {
+    BasicBlock::iterator I(KV.first);
+    BasicBlock *Head = I->getParent();
+    auto *BB = SplitBlock(Head, &*std::next(I), DT, LI);
+    auto *T = SplitBlockAndInsertIfThen(KV.second, &*I, /*Unreachable=*/false,
+                                        /*BranchWeights=*/nullptr, DT, LI);
+    I->moveBefore(T);
+    sinkScalarOperands(&*I);
+
+    I->getParent()->setName(Twine("pred.") + I->getOpcodeName() + ".if");
+    BB->setName(Twine("pred.") + I->getOpcodeName() + ".continue");
+
+    // If the instruction is non-void create a Phi node at reconvergence point.
+    if (!I->getType()->isVoidTy()) {
+      Value *IncomingTrue = nullptr;
+      Value *IncomingFalse = nullptr;
+
+      if (I->hasOneUse() && isa<InsertElementInst>(*I->user_begin())) {
+        // If the predicated instruction is feeding an insert-element, move it
+        // into the Then block; Phi node will be created for the vector.
+        InsertElementInst *IEI = cast<InsertElementInst>(*I->user_begin());
+        IEI->moveBefore(T);
+        IncomingTrue = IEI; // the new vector with the inserted element.
+        IncomingFalse = IEI->getOperand(0); // the unmodified vector
+      } else {
+        // Phi node will be created for the scalar predicated instruction.
+        IncomingTrue = &*I;
+        IncomingFalse = UndefValue::get(I->getType());
+      }
+
+      BasicBlock *PostDom = I->getParent()->getSingleSuccessor();
+      assert(PostDom && "Then block has multiple successors");
+      PHINode *Phi =
+          PHINode::Create(IncomingTrue->getType(), 2, "", &PostDom->front());
+      IncomingTrue->replaceAllUsesWith(Phi);
+      Phi->addIncoming(IncomingFalse, Head);
+      Phi->addIncoming(IncomingTrue, I->getParent());
+    }
+  }
+
+  DEBUG(DT->verifyDomTree());
+}
+
 InnerLoopVectorizer::VectorParts
 InnerLoopVectorizer::createEdgeMask(BasicBlock *Src, BasicBlock *Dst) {
-  assert(std::find(pred_begin(Dst), pred_end(Dst), Src) != pred_end(Dst) &&
-         "Invalid edge");
+  assert(is_contained(predecessors(Dst), Src) && "Invalid edge");
 
   // Look for cached value.
   std::pair<BasicBlock *, BasicBlock *> Edge(Src, Dst);
@@ -4033,12 +4555,12 @@ InnerLoopVectorizer::createBlockInMask(BasicBlock *BB) {
   return BlockMask;
 }
 
-void InnerLoopVectorizer::widenPHIInstruction(
-    Instruction *PN, InnerLoopVectorizer::VectorParts &Entry, unsigned UF,
-    unsigned VF, PhiVector *PV) {
+void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
+                                              unsigned VF, PhiVector *PV) {
   PHINode *P = cast<PHINode>(PN);
   // Handle recurrences.
   if (Legal->isReductionVariable(P) || Legal->isFirstOrderRecurrence(P)) {
+    VectorParts Entry(UF);
     for (unsigned part = 0; part < UF; ++part) {
       // This is phase one of vectorizing PHIs.
       Type *VecTy =
@@ -4046,6 +4568,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
       Entry[part] = PHINode::Create(
           VecTy, 2, "vec.phi", &*LoopVectorBody->getFirstInsertionPt());
     }
+    VectorLoopValueMap.initVector(P, Entry);
     PV->push_back(P);
     return;
   }
@@ -4066,10 +4589,11 @@ void InnerLoopVectorizer::widenPHIInstruction(
     // SELECT(Mask3, In3,
     //      SELECT(Mask2, In2,
     //                   ( ...)))
+    VectorParts Entry(UF);
     for (unsigned In = 0; In < NumIncoming; In++) {
       VectorParts Cond =
           createEdgeMask(P->getIncomingBlock(In), P->getParent());
-      VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
+      const VectorParts &In0 = getVectorValue(P->getIncomingValue(In));
 
       for (unsigned part = 0; part < UF; ++part) {
         // We might have single edge PHIs (blocks) - use an identity
@@ -4083,6 +4607,7 @@ void InnerLoopVectorizer::widenPHIInstruction(
                                              "predphi");
       }
     }
+    VectorLoopValueMap.initVector(P, Entry);
     return;
   }
 
@@ -4099,46 +4624,95 @@ void InnerLoopVectorizer::widenPHIInstruction(
   case InductionDescriptor::IK_NoInduction:
     llvm_unreachable("Unknown induction");
   case InductionDescriptor::IK_IntInduction:
-    return widenIntInduction(P, Entry);
-  case InductionDescriptor::IK_PtrInduction:
+    return widenIntInduction(P);
+  case InductionDescriptor::IK_PtrInduction: {
     // Handle the pointer induction variable case.
     assert(P->getType()->isPointerTy() && "Unexpected type.");
     // This is the normalized GEP that starts counting at zero.
     Value *PtrInd = Induction;
     PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
-    // This is the vector of results. Notice that we don't generate
-    // vector geps because scalar geps result in better code.
-    for (unsigned part = 0; part < UF; ++part) {
-      if (VF == 1) {
-        int EltIndex = part;
-        Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
-        Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
-        SclrGep->setName("next.gep");
-        Entry[part] = SclrGep;
-        continue;
-      }
-
-      Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
-      for (unsigned int i = 0; i < VF; ++i) {
-        int EltIndex = i + part * VF;
-        Constant *Idx = ConstantInt::get(PtrInd->getType(), EltIndex);
+    // Determine the number of scalars we need to generate for each unroll
+    // iteration. If the instruction is uniform, we only need to generate the
+    // first lane. Otherwise, we generate all VF values.
+    unsigned Lanes = Legal->isUniformAfterVectorization(P) ? 1 : VF;
+    // These are the scalar results. Notice that we don't generate vector GEPs
+    // because scalar GEPs result in better code.
+    ScalarParts Entry(UF);
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      Entry[Part].resize(VF);
+      for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+        Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
         Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
         Value *SclrGep = II.transform(Builder, GlobalIdx, PSE.getSE(), DL);
         SclrGep->setName("next.gep");
-        VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
-                                             Builder.getInt32(i), "insert.gep");
+        Entry[Part][Lane] = SclrGep;
       }
-      Entry[part] = VecVal;
     }
+    VectorLoopValueMap.initScalar(P, Entry);
     return;
   }
+  case InductionDescriptor::IK_FpInduction: {
+    assert(P->getType() == II.getStartValue()->getType() &&
+           "Types must match");
+    // Handle other induction variables that are now based on the
+    // canonical one.
+    assert(P != OldInduction && "Primary induction can be integer only");
+
+    Value *V = Builder.CreateCast(Instruction::SIToFP, Induction, P->getType());
+    V = II.transform(Builder, V, PSE.getSE(), DL);
+    V->setName("fp.offset.idx");
+
+    // Now we have scalar op: %fp.offset.idx = StartVal +/- Induction*StepVal
+
+    Value *Broadcasted = getBroadcastInstrs(V);
+    // After broadcasting the induction variable we need to make the vector
+    // consecutive by adding StepVal*0, StepVal*1, StepVal*2, etc.
+    Value *StepVal = cast<SCEVUnknown>(II.getStep())->getValue();
+    VectorParts Entry(UF);
+    for (unsigned part = 0; part < UF; ++part)
+      Entry[part] = getStepVector(Broadcasted, VF * part, StepVal,
+                                  II.getInductionOpcode());
+    VectorLoopValueMap.initVector(P, Entry);
+    return;
+  }
+  }
+}
+
+/// A helper function for checking whether an integer division-related
+/// instruction may divide by zero (in which case it must be predicated if
+/// executed conditionally in the scalar code).
+/// TODO: It may be worthwhile to generalize and check isKnownNonZero().
+/// Non-zero divisors that are non compile-time constants will not be
+/// converted into multiplication, so we will still end up scalarizing
+/// the division, but can do so w/o predication.
+static bool mayDivideByZero(Instruction &I) {
+  assert((I.getOpcode() == Instruction::UDiv ||
+          I.getOpcode() == Instruction::SDiv ||
+          I.getOpcode() == Instruction::URem ||
+          I.getOpcode() == Instruction::SRem) &&
+         "Unexpected instruction");
+  Value *Divisor = I.getOperand(1);
+  auto *CInt = dyn_cast<ConstantInt>(Divisor);
+  return !CInt || CInt->isZero();
 }
 
 void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
   // For each instruction in the old loop.
   for (Instruction &I : *BB) {
-    VectorParts &Entry = WidenMap.get(&I);
+
+    // If the instruction will become trivially dead when vectorized, we don't
+    // need to generate it.
+    if (DeadInstructions.count(&I))
+      continue;
+
+    // Scalarize instructions that should remain scalar after vectorization.
+    if (VF > 1 &&
+        !(isa<BranchInst>(&I) || isa<PHINode>(&I) ||
+          isa<DbgInfoIntrinsic>(&I)) &&
+        shouldScalarizeInstruction(&I)) {
+      scalarizeInstruction(&I, Legal->isScalarWithPredication(&I));
+      continue;
+    }
 
     switch (I.getOpcode()) {
     case Instruction::Br:
@@ -4147,21 +4721,27 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       continue;
     case Instruction::PHI: {
       // Vectorize PHINodes.
-      widenPHIInstruction(&I, Entry, UF, VF, PV);
+      widenPHIInstruction(&I, UF, VF, PV);
       continue;
     } // End of PHI.
 
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::SRem:
+    case Instruction::URem:
+      // Scalarize with predication if this instruction may divide by zero and
+      // block execution is conditional, otherwise fallthrough.
+      if (Legal->isScalarWithPredication(&I)) {
+        scalarizeInstruction(&I, true);
+        continue;
+      }
     case Instruction::Add:
     case Instruction::FAdd:
     case Instruction::Sub:
     case Instruction::FSub:
     case Instruction::Mul:
     case Instruction::FMul:
-    case Instruction::UDiv:
-    case Instruction::SDiv:
     case Instruction::FDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
     case Instruction::FRem:
     case Instruction::Shl:
     case Instruction::LShr:
@@ -4172,10 +4752,11 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // Just widen binops.
       auto *BinOp = cast<BinaryOperator>(&I);
       setDebugLocFromInst(Builder, BinOp);
-      VectorParts &A = getVectorValue(BinOp->getOperand(0));
-      VectorParts &B = getVectorValue(BinOp->getOperand(1));
+      const VectorParts &A = getVectorValue(BinOp->getOperand(0));
+      const VectorParts &B = getVectorValue(BinOp->getOperand(1));
 
       // Use this vector value for all users of the original instruction.
+      VectorParts Entry(UF);
       for (unsigned Part = 0; Part < UF; ++Part) {
         Value *V = Builder.CreateBinOp(BinOp->getOpcode(), A[Part], B[Part]);
 
@@ -4185,6 +4766,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Entry[Part] = V;
       }
 
+      VectorLoopValueMap.initVector(&I, Entry);
       addMetadata(Entry, BinOp);
       break;
     }
@@ -4201,20 +4783,19 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       // loop. This means that we can't just use the original 'cond' value.
       // We have to take the 'vectorized' value and pick the first lane.
       // Instcombine will make this a no-op.
-      VectorParts &Cond = getVectorValue(I.getOperand(0));
-      VectorParts &Op0 = getVectorValue(I.getOperand(1));
-      VectorParts &Op1 = getVectorValue(I.getOperand(2));
+      const VectorParts &Cond = getVectorValue(I.getOperand(0));
+      const VectorParts &Op0 = getVectorValue(I.getOperand(1));
+      const VectorParts &Op1 = getVectorValue(I.getOperand(2));
 
-      Value *ScalarCond =
-          (VF == 1)
-              ? Cond[0]
-              : Builder.CreateExtractElement(Cond[0], Builder.getInt32(0));
+      auto *ScalarCond = getScalarValue(I.getOperand(0), 0, 0);
 
+      VectorParts Entry(UF);
       for (unsigned Part = 0; Part < UF; ++Part) {
         Entry[Part] = Builder.CreateSelect(
             InvariantCond ? ScalarCond : Cond[Part], Op0[Part], Op1[Part]);
       }
 
+      VectorLoopValueMap.initVector(&I, Entry);
       addMetadata(Entry, &I);
       break;
     }
@@ -4225,8 +4806,9 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       bool FCmp = (I.getOpcode() == Instruction::FCmp);
       auto *Cmp = dyn_cast<CmpInst>(&I);
       setDebugLocFromInst(Builder, Cmp);
-      VectorParts &A = getVectorValue(Cmp->getOperand(0));
-      VectorParts &B = getVectorValue(Cmp->getOperand(1));
+      const VectorParts &A = getVectorValue(Cmp->getOperand(0));
+      const VectorParts &B = getVectorValue(Cmp->getOperand(1));
+      VectorParts Entry(UF);
       for (unsigned Part = 0; Part < UF; ++Part) {
         Value *C = nullptr;
         if (FCmp) {
@@ -4238,6 +4820,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Entry[Part] = C;
       }
 
+      VectorLoopValueMap.initVector(&I, Entry);
       addMetadata(Entry, &I);
       break;
     }
@@ -4268,8 +4851,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       auto ID = Legal->getInductionVars()->lookup(OldInduction);
       if (isa<TruncInst>(CI) && CI->getOperand(0) == OldInduction &&
           ID.getConstIntStepValue()) {
-        widenIntInduction(OldInduction, Entry, cast<TruncInst>(CI));
-        addMetadata(Entry, &I);
+        widenIntInduction(OldInduction, cast<TruncInst>(CI));
         break;
       }
 
@@ -4277,9 +4859,11 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
       Type *DestTy =
           (VF == 1) ? CI->getType() : VectorType::get(CI->getType(), VF);
 
-      VectorParts &A = getVectorValue(CI->getOperand(0));
+      const VectorParts &A = getVectorValue(CI->getOperand(0));
+      VectorParts Entry(UF);
       for (unsigned Part = 0; Part < UF; ++Part)
         Entry[Part] = Builder.CreateCast(CI->getOpcode(), A[Part], DestTy);
+      VectorLoopValueMap.initVector(&I, Entry);
       addMetadata(Entry, &I);
       break;
     }
@@ -4318,6 +4902,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         break;
       }
 
+      VectorParts Entry(UF);
       for (unsigned Part = 0; Part < UF; ++Part) {
         SmallVector<Value *, 4> Args;
         for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) {
@@ -4325,7 +4910,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
           // Some intrinsics have a scalar argument - don't replace it with a
           // vector.
           if (!UseVectorIntrinsic || !hasVectorInstrinsicScalarOpd(ID, i)) {
-            VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
+            const VectorParts &VectorArg = getVectorValue(CI->getArgOperand(i));
             Arg = VectorArg[Part];
           }
           Args.push_back(Arg);
@@ -4363,6 +4948,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
         Entry[Part] = V;
       }
 
+      VectorLoopValueMap.initVector(&I, Entry);
       addMetadata(Entry, &I);
       break;
     }
@@ -4414,7 +5000,8 @@ static bool canIfConvertPHINodes(BasicBlock *BB) {
 
 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   if (!EnableIfConversion) {
-    emitAnalysis(VectorizationReport() << "if-conversion is disabled");
+    ORE->emit(createMissedAnalysis("IfConversionDisabled")
+              << "if-conversion is disabled");
     return false;
   }
 
@@ -4428,12 +5015,9 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
     if (blockNeedsPredication(BB))
       continue;
 
-    for (Instruction &I : *BB) {
-      if (auto *LI = dyn_cast<LoadInst>(&I))
-        SafePointes.insert(LI->getPointerOperand());
-      else if (auto *SI = dyn_cast<StoreInst>(&I))
-        SafePointes.insert(SI->getPointerOperand());
-    }
+    for (Instruction &I : *BB)
+      if (auto *Ptr = getPointerOperand(&I))
+        SafePointes.insert(Ptr);
   }
 
   // Collect the blocks that need predication.
@@ -4441,21 +5025,21 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
   for (BasicBlock *BB : TheLoop->blocks()) {
     // We don't support switch statements inside loops.
     if (!isa<BranchInst>(BB->getTerminator())) {
-      emitAnalysis(VectorizationReport(BB->getTerminator())
-                   << "loop contains a switch statement");
+      ORE->emit(createMissedAnalysis("LoopContainsSwitch", BB->getTerminator())
+                << "loop contains a switch statement");
       return false;
     }
 
     // We must be able to predicate all blocks that need to be predicated.
     if (blockNeedsPredication(BB)) {
       if (!blockCanBePredicated(BB, SafePointes)) {
-        emitAnalysis(VectorizationReport(BB->getTerminator())
-                     << "control flow cannot be substituted for a select");
+        ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+                  << "control flow cannot be substituted for a select");
         return false;
       }
     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
-      emitAnalysis(VectorizationReport(BB->getTerminator())
-                   << "control flow cannot be substituted for a select");
+      ORE->emit(createMissedAnalysis("NoCFGForSelect", BB->getTerminator())
+                << "control flow cannot be substituted for a select");
       return false;
     }
   }
@@ -4468,8 +5052,8 @@ bool LoopVectorizationLegality::canVectorize() {
   // We must have a loop in canonical form. Loops with indirectbr in them cannot
   // be canonicalized.
   if (!TheLoop->getLoopPreheader()) {
-    emitAnalysis(VectorizationReport()
-                 << "loop control flow is not understood by vectorizer");
+    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+              << "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -4478,21 +5062,22 @@ bool LoopVectorizationLegality::canVectorize() {
   //
   // We can only vectorize innermost loops.
   if (!TheLoop->empty()) {
-    emitAnalysis(VectorizationReport() << "loop is not the innermost loop");
+    ORE->emit(createMissedAnalysis("NotInnermostLoop")
+              << "loop is not the innermost loop");
     return false;
   }
 
   // We must have a single backedge.
   if (TheLoop->getNumBackEdges() != 1) {
-    emitAnalysis(VectorizationReport()
-                 << "loop control flow is not understood by vectorizer");
+    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+              << "loop control flow is not understood by vectorizer");
     return false;
   }
 
   // We must have a single exiting block.
   if (!TheLoop->getExitingBlock()) {
-    emitAnalysis(VectorizationReport()
-                 << "loop control flow is not understood by vectorizer");
+    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+              << "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -4500,8 +5085,8 @@ bool LoopVectorizationLegality::canVectorize() {
   // checked at the end of each iteration. With that we can assume that all
   // instructions in the loop are executed the same number of times.
   if (TheLoop->getExitingBlock() != TheLoop->getLoopLatch()) {
-    emitAnalysis(VectorizationReport()
-                 << "loop control flow is not understood by vectorizer");
+    ORE->emit(createMissedAnalysis("CFGNotUnderstood")
+              << "loop control flow is not understood by vectorizer");
     return false;
   }
 
@@ -4519,8 +5104,8 @@ bool LoopVectorizationLegality::canVectorize() {
   // ScalarEvolution needs to be able to find the exit count.
   const SCEV *ExitCount = PSE.getBackedgeTakenCount();
   if (ExitCount == PSE.getSE()->getCouldNotCompute()) {
-    emitAnalysis(VectorizationReport()
-                 << "could not determine number of loop iterations");
+    ORE->emit(createMissedAnalysis("CantComputeNumberOfIterations")
+              << "could not determine number of loop iterations");
     DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n");
     return false;
   }
@@ -4537,9 +5122,6 @@ bool LoopVectorizationLegality::canVectorize() {
     return false;
   }
 
-  // Collect all of the variables that remain uniform after vectorization.
-  collectLoopUniforms();
-
   DEBUG(dbgs() << "LV: We can vectorize this loop"
                << (LAI->getRuntimePointerChecking()->Need
                        ? " (with a runtime bound check)"
@@ -4556,14 +5138,20 @@ bool LoopVectorizationLegality::canVectorize() {
   if (UseInterleaved)
     InterleaveInfo.analyzeInterleaving(*getSymbolicStrides());
 
+  // Collect all instructions that are known to be uniform after vectorization.
+  collectLoopUniforms();
+
+  // Collect all instructions that are known to be scalar after vectorization.
+  collectLoopScalars();
+
   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
 
   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
-    emitAnalysis(VectorizationReport()
-                 << "Too many SCEV assumptions need to be made and checked "
-                 << "at runtime");
+    ORE->emit(createMissedAnalysis("TooManySCEVRunTimeChecks")
+              << "Too many SCEV assumptions need to be made and checked "
+              << "at runtime");
     DEBUG(dbgs() << "LV: Too many SCEV checks needed.\n");
     return false;
   }
@@ -4621,10 +5209,12 @@ void LoopVectorizationLegality::addInductionPhi(
   const DataLayout &DL = Phi->getModule()->getDataLayout();
 
   // Get the widest type.
-  if (!WidestIndTy)
-    WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
-  else
-    WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
+  if (!PhiTy->isFloatingPointTy()) {
+    if (!WidestIndTy)
+      WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
+    else
+      WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
+  }
 
   // Int inductions are special because we only allow one IV.
   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
@@ -4667,8 +5257,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         // Check that this PHI type is allowed.
         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
             !PhiTy->isPointerTy()) {
-          emitAnalysis(VectorizationReport(Phi)
-                       << "loop control flow is not understood by vectorizer");
+          ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
+                    << "loop control flow is not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n");
           return false;
         }
@@ -4681,16 +5271,16 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           // identified reduction value with an outside user.
           if (!hasOutsideLoopUser(TheLoop, Phi, AllowedExit))
             continue;
-          emitAnalysis(VectorizationReport(Phi)
-                       << "value could not be identified as "
-                          "an induction or reduction variable");
+          ORE->emit(createMissedAnalysis("NeitherInductionNorReduction", Phi)
+                    << "value could not be identified as "
+                       "an induction or reduction variable");
           return false;
         }
 
         // We only allow if-converted PHIs with exactly two incoming values.
         if (Phi->getNumIncomingValues() != 2) {
-          emitAnalysis(VectorizationReport(Phi)
-                       << "control flow not understood by vectorizer");
+          ORE->emit(createMissedAnalysis("CFGNotUnderstood", Phi)
+                    << "control flow not understood by vectorizer");
           DEBUG(dbgs() << "LV: Found an invalid PHI.\n");
           return false;
         }
@@ -4705,8 +5295,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         }
 
         InductionDescriptor ID;
-        if (InductionDescriptor::isInductionPHI(Phi, PSE, ID)) {
+        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
           addInductionPhi(Phi, ID, AllowedExit);
+          if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
+            Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
           continue;
         }
 
@@ -4717,14 +5309,14 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
 
         // As a last resort, coerce the PHI to a AddRec expression
         // and re-try classifying it a an induction PHI.
-        if (InductionDescriptor::isInductionPHI(Phi, PSE, ID, true)) {
+        if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
           addInductionPhi(Phi, ID, AllowedExit);
           continue;
         }
 
-        emitAnalysis(VectorizationReport(Phi)
-                     << "value that could not be identified as "
-                        "reduction is used outside the loop");
+        ORE->emit(createMissedAnalysis("NonReductionValueUsedOutsideLoop", Phi)
+                  << "value that could not be identified as "
+                     "reduction is used outside the loop");
         DEBUG(dbgs() << "LV: Found an unidentified PHI." << *Phi << "\n");
         return false;
       } // end of PHI handling
@@ -4738,8 +5330,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
           !isa<DbgInfoIntrinsic>(CI) &&
           !(CI->getCalledFunction() && TLI &&
             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
-        emitAnalysis(VectorizationReport(CI)
-                     << "call instruction cannot be vectorized");
+        ORE->emit(createMissedAnalysis("CantVectorizeCall", CI)
+                  << "call instruction cannot be vectorized");
         DEBUG(dbgs() << "LV: Found a non-intrinsic, non-libfunc callsite.\n");
         return false;
       }
@@ -4750,8 +5342,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
                     getVectorIntrinsicIDForCall(CI, TLI), 1)) {
         auto *SE = PSE.getSE();
         if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) {
-          emitAnalysis(VectorizationReport(CI)
-                       << "intrinsic instruction cannot be vectorized");
+          ORE->emit(createMissedAnalysis("CantVectorizeIntrinsic", CI)
+                    << "intrinsic instruction cannot be vectorized");
           DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n");
           return false;
         }
@@ -4762,8 +5354,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if ((!VectorType::isValidElementType(I.getType()) &&
            !I.getType()->isVoidTy()) ||
           isa<ExtractElementInst>(I)) {
-        emitAnalysis(VectorizationReport(&I)
-                     << "instruction return type cannot be vectorized");
+        ORE->emit(createMissedAnalysis("CantVectorizeInstructionReturnType", &I)
+                  << "instruction return type cannot be vectorized");
         DEBUG(dbgs() << "LV: Found unvectorizable type.\n");
         return false;
       }
@@ -4772,8 +5364,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       if (auto *ST = dyn_cast<StoreInst>(&I)) {
         Type *T = ST->getValueOperand()->getType();
         if (!VectorType::isValidElementType(T)) {
-          emitAnalysis(VectorizationReport(ST)
-                       << "store instruction cannot be vectorized");
+          ORE->emit(createMissedAnalysis("CantVectorizeStore", ST)
+                    << "store instruction cannot be vectorized");
           return false;
         }
 
@@ -4791,8 +5383,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
       // Reduction instructions are allowed to have exit users.
       // All other instructions must not have external users.
       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
-        emitAnalysis(VectorizationReport(&I)
-                     << "value cannot be used outside the loop");
+        ORE->emit(createMissedAnalysis("ValueUsedOutsideLoop", &I)
+                  << "value cannot be used outside the loop");
         return false;
       }
 
@@ -4802,8 +5394,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
   if (!Induction) {
     DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
     if (Inductions.empty()) {
-      emitAnalysis(VectorizationReport()
-                   << "loop induction variable could not be identified");
+      ORE->emit(createMissedAnalysis("NoInductionVariable")
+                << "loop induction variable could not be identified");
       return false;
     }
   }
@@ -4817,12 +5409,132 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
   return true;
 }
 
+void LoopVectorizationLegality::collectLoopScalars() {
+
+  // If an instruction is uniform after vectorization, it will remain scalar.
+  Scalars.insert(Uniforms.begin(), Uniforms.end());
+
+  // Collect the getelementptr instructions that will not be vectorized. A
+  // getelementptr instruction is only vectorized if it is used for a legal
+  // gather or scatter operation.
+  for (auto *BB : TheLoop->blocks())
+    for (auto &I : *BB) {
+      if (auto *GEP = dyn_cast<GetElementPtrInst>(&I)) {
+        Scalars.insert(GEP);
+        continue;
+      }
+      auto *Ptr = getPointerOperand(&I);
+      if (!Ptr)
+        continue;
+      auto *GEP = getGEPInstruction(Ptr);
+      if (GEP && isLegalGatherOrScatter(&I))
+        Scalars.erase(GEP);
+    }
+
+  // An induction variable will remain scalar if all users of the induction
+  // variable and induction variable update remain scalar.
+  auto *Latch = TheLoop->getLoopLatch();
+  for (auto &Induction : *getInductionVars()) {
+    auto *Ind = Induction.first;
+    auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+
+    // Determine if all users of the induction variable are scalar after
+    // vectorization.
+    auto ScalarInd = all_of(Ind->users(), [&](User *U) -> bool {
+      auto *I = cast<Instruction>(U);
+      return I == IndUpdate || !TheLoop->contains(I) || Scalars.count(I);
+    });
+    if (!ScalarInd)
+      continue;
+
+    // Determine if all users of the induction variable update instruction are
+    // scalar after vectorization.
+    auto ScalarIndUpdate = all_of(IndUpdate->users(), [&](User *U) -> bool {
+      auto *I = cast<Instruction>(U);
+      return I == Ind || !TheLoop->contains(I) || Scalars.count(I);
+    });
+    if (!ScalarIndUpdate)
+      continue;
+
+    // The induction variable and its update instruction will remain scalar.
+    Scalars.insert(Ind);
+    Scalars.insert(IndUpdate);
+  }
+}
+
+bool LoopVectorizationLegality::hasConsecutiveLikePtrOperand(Instruction *I) {
+  if (isAccessInterleaved(I))
+    return true;
+  if (auto *Ptr = getPointerOperand(I))
+    return isConsecutivePtr(Ptr);
+  return false;
+}
+
+bool LoopVectorizationLegality::isScalarWithPredication(Instruction *I) {
+  if (!blockNeedsPredication(I->getParent()))
+    return false;
+  switch(I->getOpcode()) {
+  default:
+    break;
+  case Instruction::Store:
+    return !isMaskRequired(I);
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::SRem:
+  case Instruction::URem:
+    return mayDivideByZero(*I);
+  }
+  return false;
+}
+
+bool LoopVectorizationLegality::memoryInstructionMustBeScalarized(
+    Instruction *I, unsigned VF) {
+
+  // If the memory instruction is in an interleaved group, it will be
+  // vectorized and its pointer will remain uniform.
+  if (isAccessInterleaved(I))
+    return false;
+
+  // Get and ensure we have a valid memory instruction.
+  LoadInst *LI = dyn_cast<LoadInst>(I);
+  StoreInst *SI = dyn_cast<StoreInst>(I);
+  assert((LI || SI) && "Invalid memory instruction");
+
+  // If the pointer operand is uniform (loop invariant), the memory instruction
+  // will be scalarized.
+  auto *Ptr = getPointerOperand(I);
+  if (LI && isUniform(Ptr))
+    return true;
+
+  // If the pointer operand is non-consecutive and neither a gather nor a
+  // scatter operation is legal, the memory instruction will be scalarized.
+  if (!isConsecutivePtr(Ptr) && !isLegalGatherOrScatter(I))
+    return true;
+
+  // If the instruction is a store located in a predicated block, it will be
+  // scalarized.
+  if (isScalarWithPredication(I))
+    return true;
+
+  // If the instruction's allocated size doesn't equal it's type size, it
+  // requires padding and will be scalarized.
+  auto &DL = I->getModule()->getDataLayout();
+  auto *ScalarTy = LI ? LI->getType() : SI->getValueOperand()->getType();
+  if (hasIrregularType(ScalarTy, DL, VF))
+    return true;
+
+  // Otherwise, the memory instruction should be vectorized if the rest of the
+  // loop is.
+  return false;
+}
+
 void LoopVectorizationLegality::collectLoopUniforms() {
   // We now know that the loop is vectorizable!
-  // Collect variables that will remain uniform after vectorization.
+  // Collect instructions inside the loop that will remain uniform after
+  // vectorization.
 
-  // If V is not an instruction inside the current loop, it is a Value
-  // outside of the scope which we are interesting in.
+  // Global values, params and instructions outside of current loop are out of
+  // scope.
   auto isOutOfScope = [&](Value *V) -> bool {
     Instruction *I = dyn_cast<Instruction>(V);
     return (!I || !TheLoop->contains(I));
@@ -4830,30 +5542,75 @@ void LoopVectorizationLegality::collectLoopUniforms() {
 
   SetVector<Instruction *> Worklist;
   BasicBlock *Latch = TheLoop->getLoopLatch();
-  // Start with the conditional branch.
-  if (!isOutOfScope(Latch->getTerminator()->getOperand(0))) {
-    Instruction *Cmp = cast<Instruction>(Latch->getTerminator()->getOperand(0));
+
+  // Start with the conditional branch. If the branch condition is an
+  // instruction contained in the loop that is only used by the branch, it is
+  // uniform.
+  auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
+  if (Cmp && TheLoop->contains(Cmp) && Cmp->hasOneUse()) {
     Worklist.insert(Cmp);
     DEBUG(dbgs() << "LV: Found uniform instruction: " << *Cmp << "\n");
   }
 
-  // Also add all consecutive pointer values; these values will be uniform
-  // after vectorization (and subsequent cleanup).
-  for (auto *BB : TheLoop->blocks()) {
+  // Holds consecutive and consecutive-like pointers. Consecutive-like pointers
+  // are pointers that are treated like consecutive pointers during
+  // vectorization. The pointer operands of interleaved accesses are an
+  // example.
+  SmallSetVector<Instruction *, 8> ConsecutiveLikePtrs;
+
+  // Holds pointer operands of instructions that are possibly non-uniform.
+  SmallPtrSet<Instruction *, 8> PossibleNonUniformPtrs;
+
+  // Iterate over the instructions in the loop, and collect all
+  // consecutive-like pointer operands in ConsecutiveLikePtrs. If it's possible
+  // that a consecutive-like pointer operand will be scalarized, we collect it
+  // in PossibleNonUniformPtrs instead. We use two sets here because a single
+  // getelementptr instruction can be used by both vectorized and scalarized
+  // memory instructions. For example, if a loop loads and stores from the same
+  // location, but the store is conditional, the store will be scalarized, and
+  // the getelementptr won't remain uniform.
+  for (auto *BB : TheLoop->blocks())
     for (auto &I : *BB) {
-      if (I.getType()->isPointerTy() && isConsecutivePtr(&I)) {
-        Worklist.insert(&I);
-        DEBUG(dbgs() << "LV: Found uniform instruction: " << I << "\n");
-      }
+
+      // If there's no pointer operand, there's nothing to do.
+      auto *Ptr = dyn_cast_or_null<Instruction>(getPointerOperand(&I));
+      if (!Ptr)
+        continue;
+
+      // True if all users of Ptr are memory accesses that have Ptr as their
+      // pointer operand.
+      auto UsersAreMemAccesses = all_of(Ptr->users(), [&](User *U) -> bool {
+        return getPointerOperand(U) == Ptr;
+      });
+
+      // Ensure the memory instruction will not be scalarized, making its
+      // pointer operand non-uniform. If the pointer operand is used by some
+      // instruction other than a memory access, we're not going to check if
+      // that other instruction may be scalarized here. Thus, conservatively
+      // assume the pointer operand may be non-uniform.
+      if (!UsersAreMemAccesses || memoryInstructionMustBeScalarized(&I))
+        PossibleNonUniformPtrs.insert(Ptr);
+
+      // If the memory instruction will be vectorized and its pointer operand
+      // is consecutive-like, the pointer operand should remain uniform.
+      else if (hasConsecutiveLikePtrOperand(&I))
+        ConsecutiveLikePtrs.insert(Ptr);
+    }
+
+  // Add to the Worklist all consecutive and consecutive-like pointers that
+  // aren't also identified as possibly non-uniform.
+  for (auto *V : ConsecutiveLikePtrs)
+    if (!PossibleNonUniformPtrs.count(V)) {
+      DEBUG(dbgs() << "LV: Found uniform instruction: " << *V << "\n");
+      Worklist.insert(V);
     }
-  }
 
   // Expand Worklist in topological order: whenever a new instruction
   // is added , its users should be either already inside Worklist, or
   // out of scope. It ensures a uniform instruction will only be used
   // by uniform instructions or out of scope instructions.
   unsigned idx = 0;
-  do {
+  while (idx != Worklist.size()) {
     Instruction *I = Worklist[idx++];
 
     for (auto OV : I->operand_values()) {
@@ -4867,32 +5624,49 @@ void LoopVectorizationLegality::collectLoopUniforms() {
         DEBUG(dbgs() << "LV: Found uniform instruction: " << *OI << "\n");
       }
     }
-  } while (idx != Worklist.size());
+  }
+
+  // Returns true if Ptr is the pointer operand of a memory access instruction
+  // I, and I is known to not require scalarization.
+  auto isVectorizedMemAccessUse = [&](Instruction *I, Value *Ptr) -> bool {
+    return getPointerOperand(I) == Ptr && !memoryInstructionMustBeScalarized(I);
+  };
 
   // For an instruction to be added into Worklist above, all its users inside
-  // the current loop should be already added into Worklist. This condition
-  // cannot be true for phi instructions which is always in a dependence loop.
-  // Because any instruction in the dependence cycle always depends on others
-  // in the cycle to be added into Worklist first, the result is no ones in
-  // the cycle will be added into Worklist in the end.
-  // That is why we process PHI separately.
-  for (auto &Induction : *getInductionVars()) {
-    auto *PN = Induction.first;
-    auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
-    if (all_of(PN->users(),
-               [&](User *U) -> bool {
-                 return U == UpdateV || isOutOfScope(U) ||
-                        Worklist.count(cast<Instruction>(U));
-               }) &&
-        all_of(UpdateV->users(), [&](User *U) -> bool {
-          return U == PN || isOutOfScope(U) ||
-                 Worklist.count(cast<Instruction>(U));
-        })) {
-      Worklist.insert(cast<Instruction>(PN));
-      Worklist.insert(cast<Instruction>(UpdateV));
-      DEBUG(dbgs() << "LV: Found uniform instruction: " << *PN << "\n");
-      DEBUG(dbgs() << "LV: Found uniform instruction: " << *UpdateV << "\n");
-    }
+  // the loop should also be in Worklist. However, this condition cannot be
+  // true for phi nodes that form a cyclic dependence. We must process phi
+  // nodes separately. An induction variable will remain uniform if all users
+  // of the induction variable and induction variable update remain uniform.
+  // The code below handles both pointer and non-pointer induction variables.
+  for (auto &Induction : Inductions) {
+    auto *Ind = Induction.first;
+    auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
+
+    // Determine if all users of the induction variable are uniform after
+    // vectorization.
+    auto UniformInd = all_of(Ind->users(), [&](User *U) -> bool {
+      auto *I = cast<Instruction>(U);
+      return I == IndUpdate || !TheLoop->contains(I) || Worklist.count(I) ||
+             isVectorizedMemAccessUse(I, Ind);
+    });
+    if (!UniformInd)
+      continue;
+
+    // Determine if all users of the induction variable update instruction are
+    // uniform after vectorization.
+    auto UniformIndUpdate = all_of(IndUpdate->users(), [&](User *U) -> bool {
+      auto *I = cast<Instruction>(U);
+      return I == Ind || !TheLoop->contains(I) || Worklist.count(I) ||
+             isVectorizedMemAccessUse(I, IndUpdate);
+    });
+    if (!UniformIndUpdate)
+      continue;
+
+    // The induction variable and its update instruction will remain uniform.
+    Worklist.insert(Ind);
+    Worklist.insert(IndUpdate);
+    DEBUG(dbgs() << "LV: Found uniform instruction: " << *Ind << "\n");
+    DEBUG(dbgs() << "LV: Found uniform instruction: " << *IndUpdate << "\n");
   }
 
   Uniforms.insert(Worklist.begin(), Worklist.end());
@@ -4901,16 +5675,18 @@ void LoopVectorizationLegality::collectLoopUniforms() {
 bool LoopVectorizationLegality::canVectorizeMemory() {
   LAI = &(*GetLAA)(*TheLoop);
   InterleaveInfo.setLAI(LAI);
-  auto &OptionalReport = LAI->getReport();
-  if (OptionalReport)
-    emitAnalysis(VectorizationReport(*OptionalReport));
+  const OptimizationRemarkAnalysis *LAR = LAI->getReport();
+  if (LAR) {
+    OptimizationRemarkAnalysis VR(Hints->vectorizeAnalysisPassName(),
+                                  "loop not vectorized: ", *LAR);
+    ORE->emit(VR);
+  }
   if (!LAI->canVectorizeMemory())
     return false;
 
   if (LAI->hasStoreToLoopInvariantAddress()) {
-    emitAnalysis(
-        VectorizationReport()
-        << "write to a loop invariant address could not be vectorized");
+    ORE->emit(createMissedAnalysis("CantVectorizeStoreToLoopInvariantAddress")
+              << "write to a loop invariant address could not be vectorized");
     DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n");
     return false;
   }
@@ -4967,7 +5743,6 @@ bool LoopVectorizationLegality::blockCanBePredicated(
       }
     }
 
-    // We don't predicate stores at the moment.
     if (I.mayWriteToMemory()) {
       auto *SI = dyn_cast<StoreInst>(&I);
       // We only support predication of stores in basic blocks with one
@@ -4992,17 +5767,6 @@ bool LoopVectorizationLegality::blockCanBePredicated(
     }
     if (I.mayThrow())
       return false;
-
-    // The instructions below can trap.
-    switch (I.getOpcode()) {
-    default:
-      continue;
-    case Instruction::UDiv:
-    case Instruction::SDiv:
-    case Instruction::URem:
-    case Instruction::SRem:
-      return false;
-    }
   }
 
   return true;
@@ -5029,8 +5793,16 @@ void InterleavedAccessInfo::collectConstStrideAccesses(
       if (!LI && !SI)
         continue;
 
-      Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand();
-      int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides);
+      Value *Ptr = getPointerOperand(&I);
+      // We don't check wrapping here because we don't know yet if Ptr will be 
+      // part of a full group or a group with gaps. Checking wrapping for all 
+      // pointers (even those that end up in groups with no gaps) will be overly
+      // conservative. For full groups, wrapping should be ok since if we would 
+      // wrap around the address space we would do a memory access at nullptr
+      // even without the transformation. The wrapping checks are therefore
+      // deferred until after we've formed the interleaved groups.
+      int64_t Stride = getPtrStride(PSE, Ptr, TheLoop, Strides,
+                                    /*Assume=*/true, /*ShouldCheckWrap=*/false);
 
       const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
       PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
@@ -5234,20 +6006,66 @@ void InterleavedAccessInfo::analyzeInterleaving(
     if (Group->getNumMembers() != Group->getFactor())
       releaseGroup(Group);
 
-  // If there is a non-reversed interleaved load group with gaps, we will need
-  // to execute at least one scalar epilogue iteration. This will ensure that
-  // we don't speculatively access memory out-of-bounds. Note that we only need
-  // to look for a member at index factor - 1, since every group must have a
-  // member at index zero.
-  for (InterleaveGroup *Group : LoadGroups)
-    if (!Group->getMember(Group->getFactor() - 1)) {
+  // Remove interleaved groups with gaps (currently only loads) whose memory 
+  // accesses may wrap around. We have to revisit the getPtrStride analysis, 
+  // this time with ShouldCheckWrap=true, since collectConstStrideAccesses does 
+  // not check wrapping (see documentation there).
+  // FORNOW we use Assume=false; 
+  // TODO: Change to Assume=true but making sure we don't exceed the threshold 
+  // of runtime SCEV assumptions checks (thereby potentially failing to
+  // vectorize altogether). 
+  // Additional optional optimizations:
+  // TODO: If we are peeling the loop and we know that the first pointer doesn't 
+  // wrap then we can deduce that all pointers in the group don't wrap.
+  // This means that we can forcefully peel the loop in order to only have to 
+  // check the first pointer for no-wrap. When we'll change to use Assume=true 
+  // we'll only need at most one runtime check per interleaved group.
+  //
+  for (InterleaveGroup *Group : LoadGroups) {
+
+    // Case 1: A full group. Can Skip the checks; For full groups, if the wide
+    // load would wrap around the address space we would do a memory access at 
+    // nullptr even without the transformation. 
+    if (Group->getNumMembers() == Group->getFactor()) 
+      continue;
+
+    // Case 2: If first and last members of the group don't wrap this implies 
+    // that all the pointers in the group don't wrap.
+    // So we check only group member 0 (which is always guaranteed to exist),
+    // and group member Factor - 1; If the latter doesn't exist we rely on 
+    // peeling (if it is a non-reveresed accsess -- see Case 3).
+    Value *FirstMemberPtr = getPointerOperand(Group->getMember(0));
+    if (!getPtrStride(PSE, FirstMemberPtr, TheLoop, Strides, /*Assume=*/false, 
+                      /*ShouldCheckWrap=*/true)) {
+      DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+                      "first group member potentially pointer-wrapping.\n");
+      releaseGroup(Group);
+      continue;
+    }
+    Instruction *LastMember = Group->getMember(Group->getFactor() - 1);
+    if (LastMember) {
+      Value *LastMemberPtr = getPointerOperand(LastMember);
+      if (!getPtrStride(PSE, LastMemberPtr, TheLoop, Strides, /*Assume=*/false, 
+                        /*ShouldCheckWrap=*/true)) {
+        DEBUG(dbgs() << "LV: Invalidate candidate interleaved group due to "
+                        "last group member potentially pointer-wrapping.\n");
+        releaseGroup(Group);
+      }
+    }
+    else {
+      // Case 3: A non-reversed interleaved load group with gaps: We need
+      // to execute at least one scalar epilogue iteration. This will ensure 
+      // we don't speculatively access memory out-of-bounds. We only need
+      // to look for a member at index factor - 1, since every group must have 
+      // a member at index zero.
       if (Group->isReverse()) {
         releaseGroup(Group);
-      } else {
-        DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
-        RequiresScalarEpilogue = true;
+        continue;
       }
+      DEBUG(dbgs() << "LV: Interleaved group requires epilogue iteration.\n");
+      RequiresScalarEpilogue = true;
     }
+  }
 }
 
 LoopVectorizationCostModel::VectorizationFactor
@@ -5255,28 +6073,22 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
   // Width 1 means no vectorize
   VectorizationFactor Factor = {1U, 0U};
   if (OptForSize && Legal->getRuntimePointerChecking()->Need) {
-    emitAnalysis(
-        VectorizationReport()
-        << "runtime pointer checks needed. Enable vectorization of this "
-           "loop with '#pragma clang loop vectorize(enable)' when "
-           "compiling with -Os/-Oz");
+    ORE->emit(createMissedAnalysis("CantVersionLoopWithOptForSize")
+              << "runtime pointer checks needed. Enable vectorization of this "
+                 "loop with '#pragma clang loop vectorize(enable)' when "
+                 "compiling with -Os/-Oz");
     DEBUG(dbgs()
           << "LV: Aborting. Runtime ptr check is required with -Os/-Oz.\n");
     return Factor;
   }
 
   if (!EnableCondStoresVectorization && Legal->getNumPredStores()) {
-    emitAnalysis(
-        VectorizationReport()
-        << "store that is conditionally executed prevents vectorization");
+    ORE->emit(createMissedAnalysis("ConditionalStore")
+              << "store that is conditionally executed prevents vectorization");
     DEBUG(dbgs() << "LV: No vectorization. There are conditional stores.\n");
     return Factor;
   }
 
-  // Find the trip count.
-  unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
-  DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
-
   MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
   unsigned SmallestType, WidestType;
   std::tie(SmallestType, WidestType) = getSmallestAndWidestTypes();
@@ -5334,10 +6146,13 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
 
   // If we optimize the program for size, avoid creating the tail loop.
   if (OptForSize) {
-    // If we are unable to calculate the trip count then don't try to vectorize.
+    unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
+    DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
+
+    // If we don't know the precise trip count, don't try to vectorize.
     if (TC < 2) {
-      emitAnalysis(
-          VectorizationReport()
+      ORE->emit(
+          createMissedAnalysis("UnknownLoopCountComplexCFG")
           << "unable to calculate the loop count due to complex control flow");
       DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
       return Factor;
@@ -5351,11 +6166,11 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
     else {
       // If the trip count that we found modulo the vectorization factor is not
       // zero then we require a tail.
-      emitAnalysis(VectorizationReport()
-                   << "cannot optimize for size and vectorize at the "
-                      "same time. Enable vectorization of this loop "
-                      "with '#pragma clang loop vectorize(enable)' "
-                      "when compiling with -Os/-Oz");
+      ORE->emit(createMissedAnalysis("NoTailLoopWithOptForSize")
+                << "cannot optimize for size and vectorize at the "
+                   "same time. Enable vectorization of this loop "
+                   "with '#pragma clang loop vectorize(enable)' "
+                   "when compiling with -Os/-Oz");
       DEBUG(dbgs() << "LV: Aborting. A tail loop is required with -Os/-Oz.\n");
       return Factor;
     }
@@ -5367,6 +6182,7 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) {
     DEBUG(dbgs() << "LV: Using user VF " << UserVF << ".\n");
 
     Factor.Width = UserVF;
+    collectInstsToScalarize(UserVF);
     return Factor;
   }
 
@@ -5712,15 +6528,16 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
 
   for (unsigned int i = 0; i < Index; ++i) {
     Instruction *I = IdxToInstr[i];
-    // Ignore instructions that are never used within the loop.
-    if (!Ends.count(I))
-      continue;
 
     // Remove all of the instructions that end at this location.
     InstrList &List = TransposeEnds[i];
     for (Instruction *ToRemove : List)
       OpenIntervals.erase(ToRemove);
 
+    // Ignore instructions that are never used within the loop.
+    if (!Ends.count(I))
+      continue;
+
     // Skip ignored values.
     if (ValuesToIgnore.count(I))
       continue;
@@ -5772,10 +6589,160 @@ LoopVectorizationCostModel::calculateRegisterUsage(ArrayRef<unsigned> VFs) {
   return RUs;
 }
 
+void LoopVectorizationCostModel::collectInstsToScalarize(unsigned VF) {
+
+  // If we aren't vectorizing the loop, or if we've already collected the
+  // instructions to scalarize, there's nothing to do. Collection may already
+  // have occurred if we have a user-selected VF and are now computing the
+  // expected cost for interleaving.
+  if (VF < 2 || InstsToScalarize.count(VF))
+    return;
+
+  // Initialize a mapping for VF in InstsToScalalarize. If we find that it's
+  // not profitable to scalarize any instructions, the presence of VF in the
+  // map will indicate that we've analyzed it already.
+  ScalarCostsTy &ScalarCostsVF = InstsToScalarize[VF];
+
+  // Find all the instructions that are scalar with predication in the loop and
+  // determine if it would be better to not if-convert the blocks they are in.
+  // If so, we also record the instructions to scalarize.
+  for (BasicBlock *BB : TheLoop->blocks()) {
+    if (!Legal->blockNeedsPredication(BB))
+      continue;
+    for (Instruction &I : *BB)
+      if (Legal->isScalarWithPredication(&I)) {
+        ScalarCostsTy ScalarCosts;
+        if (computePredInstDiscount(&I, ScalarCosts, VF) >= 0)
+          ScalarCostsVF.insert(ScalarCosts.begin(), ScalarCosts.end());
+      }
+  }
+}
+
+int LoopVectorizationCostModel::computePredInstDiscount(
+    Instruction *PredInst, DenseMap<Instruction *, unsigned> &ScalarCosts,
+    unsigned VF) {
+
+  assert(!Legal->isUniformAfterVectorization(PredInst) &&
+         "Instruction marked uniform-after-vectorization will be predicated");
+
+  // Initialize the discount to zero, meaning that the scalar version and the
+  // vector version cost the same.
+  int Discount = 0;
+
+  // Holds instructions to analyze. The instructions we visit are mapped in
+  // ScalarCosts. Those instructions are the ones that would be scalarized if
+  // we find that the scalar version costs less.
+  SmallVector<Instruction *, 8> Worklist;
+
+  // Returns true if the given instruction can be scalarized.
+  auto canBeScalarized = [&](Instruction *I) -> bool {
+
+    // We only attempt to scalarize instructions forming a single-use chain
+    // from the original predicated block that would otherwise be vectorized.
+    // Although not strictly necessary, we give up on instructions we know will
+    // already be scalar to avoid traversing chains that are unlikely to be
+    // beneficial.
+    if (!I->hasOneUse() || PredInst->getParent() != I->getParent() ||
+        Legal->isScalarAfterVectorization(I))
+      return false;
+
+    // If the instruction is scalar with predication, it will be analyzed
+    // separately. We ignore it within the context of PredInst.
+    if (Legal->isScalarWithPredication(I))
+      return false;
+
+    // If any of the instruction's operands are uniform after vectorization,
+    // the instruction cannot be scalarized. This prevents, for example, a
+    // masked load from being scalarized.
+    //
+    // We assume we will only emit a value for lane zero of an instruction
+    // marked uniform after vectorization, rather than VF identical values.
+    // Thus, if we scalarize an instruction that uses a uniform, we would
+    // create uses of values corresponding to the lanes we aren't emitting code
+    // for. This behavior can be changed by allowing getScalarValue to clone
+    // the lane zero values for uniforms rather than asserting.
+    for (Use &U : I->operands())
+      if (auto *J = dyn_cast<Instruction>(U.get()))
+        if (Legal->isUniformAfterVectorization(J))
+          return false;
+
+    // Otherwise, we can scalarize the instruction.
+    return true;
+  };
+
+  // Returns true if an operand that cannot be scalarized must be extracted
+  // from a vector. We will account for this scalarization overhead below. Note
+  // that the non-void predicated instructions are placed in their own blocks,
+  // and their return values are inserted into vectors. Thus, an extract would
+  // still be required.
+  auto needsExtract = [&](Instruction *I) -> bool {
+    return TheLoop->contains(I) && !Legal->isScalarAfterVectorization(I);
+  };
+
+  // Compute the expected cost discount from scalarizing the entire expression
+  // feeding the predicated instruction. We currently only consider expressions
+  // that are single-use instruction chains.
+  Worklist.push_back(PredInst);
+  while (!Worklist.empty()) {
+    Instruction *I = Worklist.pop_back_val();
+
+    // If we've already analyzed the instruction, there's nothing to do.
+    if (ScalarCosts.count(I))
+      continue;
+
+    // Compute the cost of the vector instruction. Note that this cost already
+    // includes the scalarization overhead of the predicated instruction.
+    unsigned VectorCost = getInstructionCost(I, VF).first;
+
+    // Compute the cost of the scalarized instruction. This cost is the cost of
+    // the instruction as if it wasn't if-converted and instead remained in the
+    // predicated block. We will scale this cost by block probability after
+    // computing the scalarization overhead.
+    unsigned ScalarCost = VF * getInstructionCost(I, 1).first;
+
+    // Compute the scalarization overhead of needed insertelement instructions
+    // and phi nodes.
+    if (Legal->isScalarWithPredication(I) && !I->getType()->isVoidTy()) {
+      ScalarCost += getScalarizationOverhead(ToVectorTy(I->getType(), VF), true,
+                                             false, TTI);
+      ScalarCost += VF * TTI.getCFInstrCost(Instruction::PHI);
+    }
+
+    // Compute the scalarization overhead of needed extractelement
+    // instructions. For each of the instruction's operands, if the operand can
+    // be scalarized, add it to the worklist; otherwise, account for the
+    // overhead.
+    for (Use &U : I->operands())
+      if (auto *J = dyn_cast<Instruction>(U.get())) {
+        assert(VectorType::isValidElementType(J->getType()) &&
+               "Instruction has non-scalar type");
+        if (canBeScalarized(J))
+          Worklist.push_back(J);
+        else if (needsExtract(J))
+          ScalarCost += getScalarizationOverhead(ToVectorTy(J->getType(), VF),
+                                                 false, true, TTI);
+      }
+
+    // Scale the total scalar cost by block probability.
+    ScalarCost /= getReciprocalPredBlockProb();
+
+    // Compute the discount. A non-negative discount means the vector version
+    // of the instruction costs more, and scalarizing would be beneficial.
+    Discount += VectorCost - ScalarCost;
+    ScalarCosts[I] = ScalarCost;
+  }
+
+  return Discount;
+}
+
 LoopVectorizationCostModel::VectorizationCostTy
 LoopVectorizationCostModel::expectedCost(unsigned VF) {
   VectorizationCostTy Cost;
 
+  // Collect the instructions (and their associated costs) that will be more
+  // profitable to scalarize.
+  collectInstsToScalarize(VF);
+
   // For each block.
   for (BasicBlock *BB : TheLoop->blocks()) {
     VectorizationCostTy BlockCost;
@@ -5802,11 +6769,14 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
                    << VF << " For instruction: " << I << '\n');
     }
 
-    // We assume that if-converted blocks have a 50% chance of being executed.
-    // When the code is scalar then some of the blocks are avoided due to CF.
-    // When the code is vectorized we execute all code paths.
+    // If we are vectorizing a predicated block, it will have been
+    // if-converted. This means that the block's instructions (aside from
+    // stores and instructions that may divide by zero) will now be
+    // unconditionally executed. For the scalar case, we may not always execute
+    // the predicated block. Thus, scale the block's cost by the probability of
+    // executing it.
     if (VF == 1 && Legal->blockNeedsPredication(BB))
-      BlockCost.first /= 2;
+      BlockCost.first /= getReciprocalPredBlockProb();
 
     Cost.first += BlockCost.first;
     Cost.second |= BlockCost.second;
@@ -5815,19 +6785,6 @@ LoopVectorizationCostModel::expectedCost(unsigned VF) {
   return Cost;
 }
 
-/// \brief Check if the load/store instruction \p I may be translated into
-/// gather/scatter during vectorization.
-///
-/// Pointer \p Ptr specifies address in memory for the given scalar memory
-/// instruction. We need it to retrieve data type.
-/// Using gather/scatter is possible when it is supported by target.
-static bool isGatherOrScatterLegal(Instruction *I, Value *Ptr,
-                                   LoopVectorizationLegality *Legal) {
-  auto *DataTy = cast<PointerType>(Ptr->getType())->getElementType();
-  return (isa<LoadInst>(I) && Legal->isLegalMaskedGather(DataTy)) ||
-         (isa<StoreInst>(I) && Legal->isLegalMaskedScatter(DataTy));
-}
-
 /// \brief Check whether the address computation for a non-consecutive memory
 /// access looks like an unlikely candidate for being merged into the indexing
 /// mode.
@@ -5893,6 +6850,9 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I, unsigned VF) {
   if (Legal->isUniformAfterVectorization(I))
     VF = 1;
 
+  if (VF > 1 && isProfitableToScalarize(I, VF))
+    return VectorizationCostTy(InstsToScalarize[VF][I], false);
+
   Type *VectorTy;
   unsigned C = getInstructionCost(I, VF, VectorTy);
 
@@ -5905,7 +6865,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
                                                         unsigned VF,
                                                         Type *&VectorTy) {
   Type *RetTy = I->getType();
-  if (VF > 1 && MinBWs.count(I))
+  if (canTruncateToMinimalBitwidth(I, VF))
     RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
   VectorTy = ToVectorTy(RetTy, VF);
   auto SE = PSE.getSE();
@@ -5932,17 +6892,42 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
     // TODO: IF-converted IFs become selects.
     return 0;
   }
+  case Instruction::UDiv:
+  case Instruction::SDiv:
+  case Instruction::URem:
+  case Instruction::SRem:
+    // If we have a predicated instruction, it may not be executed for each
+    // vector lane. Get the scalarization cost and scale this amount by the
+    // probability of executing the predicated block. If the instruction is not
+    // predicated, we fall through to the next case.
+    if (VF > 1 && Legal->isScalarWithPredication(I)) {
+      unsigned Cost = 0;
+
+      // These instructions have a non-void type, so account for the phi nodes
+      // that we will create. This cost is likely to be zero. The phi node
+      // cost, if any, should be scaled by the block probability because it
+      // models a copy at the end of each predicated block.
+      Cost += VF * TTI.getCFInstrCost(Instruction::PHI);
+
+      // The cost of the non-predicated instruction.
+      Cost += VF * TTI.getArithmeticInstrCost(I->getOpcode(), RetTy);
+
+      // The cost of insertelement and extractelement instructions needed for
+      // scalarization.
+      Cost += getScalarizationOverhead(I, VF, TTI);
+
+      // Scale the cost by the probability of executing the predicated blocks.
+      // This assumes the predicated block for each vector lane is equally
+      // likely.
+      return Cost / getReciprocalPredBlockProb();
+    }
   case Instruction::Add:
   case Instruction::FAdd:
   case Instruction::Sub:
   case Instruction::FSub:
   case Instruction::Mul:
   case Instruction::FMul:
-  case Instruction::UDiv:
-  case Instruction::SDiv:
   case Instruction::FDiv:
-  case Instruction::URem:
-  case Instruction::SRem:
   case Instruction::FRem:
   case Instruction::Shl:
   case Instruction::LShr:
@@ -5965,7 +6950,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
         TargetTransformInfo::OP_None;
     Value *Op2 = I->getOperand(1);
 
-    // Check for a splat of a constant or for a non uniform vector of constants.
+    // Check for a splat or for a non uniform vector of constants.
     if (isa<ConstantInt>(Op2)) {
       ConstantInt *CInt = cast<ConstantInt>(Op2);
       if (CInt && CInt->getValue().isPowerOf2())
@@ -5980,6 +6965,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
           Op2VP = TargetTransformInfo::OP_PowerOf2;
         Op2VK = TargetTransformInfo::OK_UniformConstantValue;
       }
+    } else if (Legal->isUniform(Op2)) {
+      Op2VK = TargetTransformInfo::OK_UniformValue;
     }
 
     return TTI.getArithmeticInstrCost(I->getOpcode(), VectorTy, Op1VK, Op2VK,
@@ -5999,9 +6986,8 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
   case Instruction::FCmp: {
     Type *ValTy = I->getOperand(0)->getType();
     Instruction *Op0AsInstruction = dyn_cast<Instruction>(I->getOperand(0));
-    auto It = MinBWs.find(Op0AsInstruction);
-    if (VF > 1 && It != MinBWs.end())
-      ValTy = IntegerType::get(ValTy->getContext(), It->second);
+    if (canTruncateToMinimalBitwidth(Op0AsInstruction, VF))
+      ValTy = IntegerType::get(ValTy->getContext(), MinBWs[Op0AsInstruction]);
     VectorTy = ToVectorTy(ValTy, VF);
     return TTI.getCmpSelInstrCost(I->getOpcode(), VectorTy);
   }
@@ -6015,7 +7001,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
     unsigned Alignment = SI ? SI->getAlignment() : LI->getAlignment();
     unsigned AS =
         SI ? SI->getPointerAddressSpace() : LI->getPointerAddressSpace();
-    Value *Ptr = SI ? SI->getPointerOperand() : LI->getPointerOperand();
+    Value *Ptr = getPointerOperand(I);
     // We add the cost of address computation here instead of with the gep
     // instruction because only here we know whether the operation is
     // scalarized.
@@ -6072,41 +7058,43 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
       return Cost;
     }
 
-    // Scalarized loads/stores.
-    int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
-    bool UseGatherOrScatter =
-        (ConsecutiveStride == 0) && isGatherOrScatterLegal(I, Ptr, Legal);
+    // Check if the memory instruction will be scalarized.
+    if (Legal->memoryInstructionMustBeScalarized(I, VF)) {
+      unsigned Cost = 0;
+      Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
 
-    bool Reverse = ConsecutiveStride < 0;
-    const DataLayout &DL = I->getModule()->getDataLayout();
-    uint64_t ScalarAllocatedSize = DL.getTypeAllocSize(ValTy);
-    uint64_t VectorElementSize = DL.getTypeStoreSize(VectorTy) / VF;
-    if ((!ConsecutiveStride && !UseGatherOrScatter) ||
-        ScalarAllocatedSize != VectorElementSize) {
+      // True if the memory instruction's address computation is complex.
       bool IsComplexComputation =
           isLikelyComplexAddressComputation(Ptr, Legal, SE, TheLoop);
-      unsigned Cost = 0;
-      // The cost of extracting from the value vector and pointer vector.
-      Type *PtrTy = ToVectorTy(Ptr->getType(), VF);
-      for (unsigned i = 0; i < VF; ++i) {
-        //  The cost of extracting the pointer operand.
-        Cost += TTI.getVectorInstrCost(Instruction::ExtractElement, PtrTy, i);
-        // In case of STORE, the cost of ExtractElement from the vector.
-        // In case of LOAD, the cost of InsertElement into the returned
-        // vector.
-        Cost += TTI.getVectorInstrCost(SI ? Instruction::ExtractElement
-                                          : Instruction::InsertElement,
-                                       VectorTy, i);
-      }
 
-      // The cost of the scalar loads/stores.
+      // Get the cost of the scalar memory instruction and address computation.
       Cost += VF * TTI.getAddressComputationCost(PtrTy, IsComplexComputation);
       Cost += VF *
               TTI.getMemoryOpCost(I->getOpcode(), ValTy->getScalarType(),
                                   Alignment, AS);
+
+      // Get the overhead of the extractelement and insertelement instructions
+      // we might create due to scalarization.
+      Cost += getScalarizationOverhead(I, VF, TTI);
+
+      // If we have a predicated store, it may not be executed for each vector
+      // lane. Scale the cost by the probability of executing the predicated
+      // block.
+      if (Legal->isScalarWithPredication(I))
+        Cost /= getReciprocalPredBlockProb();
+
       return Cost;
     }
 
+    // Determine if the pointer operand of the access is either consecutive or
+    // reverse consecutive.
+    int ConsecutiveStride = Legal->isConsecutivePtr(Ptr);
+    bool Reverse = ConsecutiveStride < 0;
+
+    // Determine if either a gather or scatter operation is legal.
+    bool UseGatherOrScatter =
+        !ConsecutiveStride && Legal->isLegalGatherOrScatter(I);
+
     unsigned Cost = TTI.getAddressComputationCost(VectorTy);
     if (UseGatherOrScatter) {
       assert(ConsecutiveStride == 0 &&
@@ -6147,7 +7135,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
 
     Type *SrcScalarTy = I->getOperand(0)->getType();
     Type *SrcVecTy = ToVectorTy(SrcScalarTy, VF);
-    if (VF > 1 && MinBWs.count(I)) {
+    if (canTruncateToMinimalBitwidth(I, VF)) {
       // This cast is going to be shrunk. This may remove the cast or it might
       // turn it into slightly different cast. For example, if MinBW == 16,
       // "zext i8 %1 to i32" becomes "zext i8 %1 to i16".
@@ -6176,28 +7164,11 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
       return std::min(CallCost, getVectorIntrinsicCost(CI, VF, TTI, TLI));
     return CallCost;
   }
-  default: {
-    // We are scalarizing the instruction. Return the cost of the scalar
-    // instruction, plus the cost of insert and extract into vector
-    // elements, times the vector width.
-    unsigned Cost = 0;
-
-    if (!RetTy->isVoidTy() && VF != 1) {
-      unsigned InsCost =
-          TTI.getVectorInstrCost(Instruction::InsertElement, VectorTy);
-      unsigned ExtCost =
-          TTI.getVectorInstrCost(Instruction::ExtractElement, VectorTy);
-
-      // The cost of inserting the results plus extracting each one of the
-      // operands.
-      Cost += VF * (InsCost + ExtCost * I->getNumOperands());
-    }
-
+  default:
     // The cost of executing VF copies of the scalar instruction. This opcode
     // is unknown. Assume that it is the same as 'mul'.
-    Cost += VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy);
-    return Cost;
-  }
+    return VF * TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy) +
+           getScalarizationOverhead(I, VF, TTI);
   } // end of switch.
 }
 
@@ -6217,6 +7188,7 @@ INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 INITIALIZE_PASS_DEPENDENCY(LoopAccessLegacyAnalysis)
 INITIALIZE_PASS_DEPENDENCY(DemandedBitsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
 INITIALIZE_PASS_END(LoopVectorize, LV_NAME, lv_name, false, false)
 
 namespace llvm {
@@ -6226,14 +7198,11 @@ Pass *createLoopVectorizePass(bool NoUnrolling, bool AlwaysVectorize) {
 }
 
 bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
-  // Check for a store.
-  if (auto *ST = dyn_cast<StoreInst>(Inst))
-    return Legal->isConsecutivePtr(ST->getPointerOperand()) != 0;
-
-  // Check for a load.
-  if (auto *LI = dyn_cast<LoadInst>(Inst))
-    return Legal->isConsecutivePtr(LI->getPointerOperand()) != 0;
 
+  // Check if the pointer operand of a load or store instruction is
+  // consecutive.
+  if (auto *Ptr = getPointerOperand(Inst))
+    return Legal->isConsecutivePtr(Ptr);
   return false;
 }
 
@@ -6249,123 +7218,46 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
     VecValuesToIgnore.insert(Casts.begin(), Casts.end());
   }
 
-  // Ignore induction phis that are only used in either GetElementPtr or ICmp
-  // instruction to exit loop. Induction variables usually have large types and
-  // can have big impact when estimating register usage.
-  // This is for when VF > 1.
-  for (auto &Induction : *Legal->getInductionVars()) {
-    auto *PN = Induction.first;
-    auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
-
-    // Check that the PHI is only used by the induction increment (UpdateV) or
-    // by GEPs. Then check that UpdateV is only used by a compare instruction,
-    // the loop header PHI, or by GEPs.
-    // FIXME: Need precise def-use analysis to determine if this instruction
-    // variable will be vectorized.
-    if (all_of(PN->users(),
-               [&](const User *U) -> bool {
-                 return U == UpdateV || isa<GetElementPtrInst>(U);
-               }) &&
-        all_of(UpdateV->users(), [&](const User *U) -> bool {
-          return U == PN || isa<ICmpInst>(U) || isa<GetElementPtrInst>(U);
-        })) {
-      VecValuesToIgnore.insert(PN);
-      VecValuesToIgnore.insert(UpdateV);
-    }
-  }
-
-  // Ignore instructions that will not be vectorized.
-  // This is for when VF > 1.
-  for (BasicBlock *BB : TheLoop->blocks()) {
-    for (auto &Inst : *BB) {
-      switch (Inst.getOpcode())
-      case Instruction::GetElementPtr: {
-        // Ignore GEP if its last operand is an induction variable so that it is
-        // a consecutive load/store and won't be vectorized as scatter/gather
-        // pattern.
-
-        GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
-        unsigned NumOperands = Gep->getNumOperands();
-        unsigned InductionOperand = getGEPInductionOperand(Gep);
-        bool GepToIgnore = true;
-
-        // Check that all of the gep indices are uniform except for the
-        // induction operand.
-        for (unsigned i = 0; i != NumOperands; ++i) {
-          if (i != InductionOperand &&
-              !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
-                                            TheLoop)) {
-            GepToIgnore = false;
-            break;
-          }
-        }
-
-        if (GepToIgnore)
-          VecValuesToIgnore.insert(&Inst);
-        break;
-      }
-    }
-  }
+  // Insert values known to be scalar into VecValuesToIgnore. This is a
+  // conservative estimation of the values that will later be scalarized.
+  //
+  // FIXME: Even though an instruction is not scalar-after-vectoriztion, it may
+  //        still be scalarized. For example, we may find an instruction to be
+  //        more profitable for a given vectorization factor if it were to be
+  //        scalarized. But at this point, we haven't yet computed the
+  //        vectorization factor.
+  for (auto *BB : TheLoop->getBlocks())
+    for (auto &I : *BB)
+      if (Legal->isScalarAfterVectorization(&I))
+        VecValuesToIgnore.insert(&I);
 }
 
 void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
-                                             bool IfPredicateStore) {
+                                             bool IfPredicateInstr) {
   assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
   // Holds vector parameters or scalars, in case of uniform vals.
   SmallVector<VectorParts, 4> Params;
 
   setDebugLocFromInst(Builder, Instr);
 
-  // Find all of the vectorized parameters.
-  for (Value *SrcOp : Instr->operands()) {
-    // If we are accessing the old induction variable, use the new one.
-    if (SrcOp == OldInduction) {
-      Params.push_back(getVectorValue(SrcOp));
-      continue;
-    }
-
-    // Try using previously calculated values.
-    Instruction *SrcInst = dyn_cast<Instruction>(SrcOp);
-
-    // If the src is an instruction that appeared earlier in the basic block
-    // then it should already be vectorized.
-    if (SrcInst && OrigLoop->contains(SrcInst)) {
-      assert(WidenMap.has(SrcInst) && "Source operand is unavailable");
-      // The parameter is a vector value from earlier.
-      Params.push_back(WidenMap.get(SrcInst));
-    } else {
-      // The parameter is a scalar from outside the loop. Maybe even a constant.
-      VectorParts Scalars;
-      Scalars.append(UF, SrcOp);
-      Params.push_back(Scalars);
-    }
-  }
-
-  assert(Params.size() == Instr->getNumOperands() &&
-         "Invalid number of operands");
-
   // Does this instruction return a value ?
   bool IsVoidRetTy = Instr->getType()->isVoidTy();
 
-  Value *UndefVec = IsVoidRetTy ? nullptr : UndefValue::get(Instr->getType());
-  // Create a new entry in the WidenMap and initialize it to Undef or Null.
-  VectorParts &VecResults = WidenMap.splat(Instr, UndefVec);
+  // Initialize a new scalar map entry.
+  ScalarParts Entry(UF);
 
   VectorParts Cond;
-  if (IfPredicateStore) {
-    assert(Instr->getParent()->getSinglePredecessor() &&
-           "Only support single predecessor blocks");
-    Cond = createEdgeMask(Instr->getParent()->getSinglePredecessor(),
-                          Instr->getParent());
-  }
+  if (IfPredicateInstr)
+    Cond = createBlockInMask(Instr->getParent());
 
   // For each vector unroll 'part':
   for (unsigned Part = 0; Part < UF; ++Part) {
+    Entry[Part].resize(1);
     // For each scalar that we create:
 
     // Start an "if (pred) a[i] = ..." block.
     Value *Cmp = nullptr;
-    if (IfPredicateStore) {
+    if (IfPredicateInstr) {
       if (Cond[Part]->getType()->isVectorTy())
         Cond[Part] =
             Builder.CreateExtractElement(Cond[Part], Builder.getInt32(0));
@@ -6376,47 +7268,57 @@ void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
     Instruction *Cloned = Instr->clone();
     if (!IsVoidRetTy)
       Cloned->setName(Instr->getName() + ".cloned");
-    // Replace the operands of the cloned instructions with extracted scalars.
+
+    // Replace the operands of the cloned instructions with their scalar
+    // equivalents in the new loop.
     for (unsigned op = 0, e = Instr->getNumOperands(); op != e; ++op) {
-      Value *Op = Params[op][Part];
-      Cloned->setOperand(op, Op);
+      auto *NewOp = getScalarValue(Instr->getOperand(op), Part, 0);
+      Cloned->setOperand(op, NewOp);
     }
 
     // Place the cloned scalar in the new loop.
     Builder.Insert(Cloned);
 
+    // Add the cloned scalar to the scalar map entry.
+    Entry[Part][0] = Cloned;
+
     // If we just cloned a new assumption, add it the assumption cache.
     if (auto *II = dyn_cast<IntrinsicInst>(Cloned))
       if (II->getIntrinsicID() == Intrinsic::assume)
         AC->registerAssumption(II);
 
-    // If the original scalar returns a value we need to place it in a vector
-    // so that future users will be able to use it.
-    if (!IsVoidRetTy)
-      VecResults[Part] = Cloned;
-
     // End if-block.
-    if (IfPredicateStore)
-      PredicatedStores.push_back(std::make_pair(cast<StoreInst>(Cloned), Cmp));
+    if (IfPredicateInstr)
+      PredicatedInstructions.push_back(std::make_pair(Cloned, Cmp));
   }
+  VectorLoopValueMap.initScalar(Instr, Entry);
 }
 
 void InnerLoopUnroller::vectorizeMemoryInstruction(Instruction *Instr) {
   auto *SI = dyn_cast<StoreInst>(Instr);
-  bool IfPredicateStore = (SI && Legal->blockNeedsPredication(SI->getParent()));
+  bool IfPredicateInstr = (SI && Legal->blockNeedsPredication(SI->getParent()));
 
-  return scalarizeInstruction(Instr, IfPredicateStore);
+  return scalarizeInstruction(Instr, IfPredicateInstr);
 }
 
 Value *InnerLoopUnroller::reverseVector(Value *Vec) { return Vec; }
 
 Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
 
-Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step,
+                                        Instruction::BinaryOps BinOp) {
   // When unrolling and the VF is 1, we only need to add a simple scalar.
-  Type *ITy = Val->getType();
-  assert(!ITy->isVectorTy() && "Val must be a scalar");
-  Constant *C = ConstantInt::get(ITy, StartIdx);
+  Type *Ty = Val->getType();
+  assert(!Ty->isVectorTy() && "Val must be a scalar");
+
+  if (Ty->isFloatingPointTy()) {
+    Constant *C = ConstantFP::get(Ty, (double)StartIdx);
+
+    // Floating point operations had to be 'fast' to enable the unrolling.
+    Value *MulOp = addFastMathFlag(Builder.CreateFMul(C, Step));
+    return addFastMathFlag(Builder.CreateBinOp(BinOp, Val, MulOp));
+  }
+  Constant *C = ConstantInt::get(Ty, StartIdx);
   return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
 }
 
@@ -6465,7 +7367,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
                << L->getHeader()->getParent()->getName() << "\" from "
                << DebugLocStr << "\n");
 
-  LoopVectorizeHints Hints(L, DisableUnrolling);
+  LoopVectorizeHints Hints(L, DisableUnrolling, *ORE);
 
   DEBUG(dbgs() << "LV: Loop hints:"
                << " force="
@@ -6483,8 +7385,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // Looking at the diagnostic output is the only way to determine if a loop
   // was vectorized (other than looking at the IR or machine code), so it
   // is important to generate an optimization remark for each loop. Most of
-  // these messages are generated by emitOptimizationRemarkAnalysis. Remarks
-  // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are
+  // these messages are generated as OptimizationRemarkAnalysis. Remarks
+  // generated as OptimizationRemark and OptimizationRemarkMissed are
   // less verbose reporting vectorized loops and unvectorized loops that may
   // benefit from vectorization, respectively.
 
@@ -6495,17 +7397,18 @@ bool LoopVectorizePass::processLoop(Loop *L) {
 
   // Check the loop for a trip count threshold:
   // do not vectorize loops with a tiny trip count.
-  const unsigned TC = SE->getSmallConstantTripCount(L);
-  if (TC > 0u && TC < TinyTripCountVectorThreshold) {
+  const unsigned MaxTC = SE->getSmallConstantMaxTripCount(L);
+  if (MaxTC > 0u && MaxTC < TinyTripCountVectorThreshold) {
     DEBUG(dbgs() << "LV: Found a loop with a very small trip count. "
                  << "This loop is not worth vectorizing.");
     if (Hints.getForce() == LoopVectorizeHints::FK_Enabled)
       DEBUG(dbgs() << " But vectorizing was explicitly forced.\n");
     else {
       DEBUG(dbgs() << "\n");
-      emitAnalysisDiag(F, L, Hints, VectorizationReport()
-                                        << "vectorization is not beneficial "
-                                           "and is not explicitly forced");
+      ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(),
+                                     "NotBeneficial", L)
+                << "vectorization is not beneficial "
+                   "and is not explicitly forced");
       return false;
     }
   }
@@ -6513,17 +7416,17 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   PredicatedScalarEvolution PSE(*SE, *L);
 
   // Check if it is legal to vectorize the loop.
-  LoopVectorizationRequirements Requirements;
-  LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI,
+  LoopVectorizationRequirements Requirements(*ORE);
+  LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
                                 &Requirements, &Hints);
   if (!LVL.canVectorize()) {
     DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
-    emitMissedWarning(F, L, Hints);
+    emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
   // Use the cost model.
-  LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
+  LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, ORE, F,
                                 &Hints);
   CM.collectValuesToIgnore();
 
@@ -6551,11 +7454,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   if (F->hasFnAttribute(Attribute::NoImplicitFloat)) {
     DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat"
                     "attribute is used.\n");
-    emitAnalysisDiag(
-        F, L, Hints,
-        VectorizationReport()
-            << "loop not vectorized due to NoImplicitFloat attribute");
-    emitMissedWarning(F, L, Hints);
+    ORE->emit(createMissedAnalysis(Hints.vectorizeAnalysisPassName(),
+                                   "NoImplicitFloat", L)
+              << "loop not vectorized due to NoImplicitFloat attribute");
+    emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
@@ -6566,10 +7468,10 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   if (Hints.isPotentiallyUnsafe() &&
       TTI->isFPVectorizationPotentiallyUnsafe()) {
     DEBUG(dbgs() << "LV: Potentially unsafe FP op prevents vectorization.\n");
-    emitAnalysisDiag(F, L, Hints,
-                     VectorizationReport()
-                         << "loop not vectorized due to unsafe FP support.");
-    emitMissedWarning(F, L, Hints);
+    ORE->emit(
+        createMissedAnalysis(Hints.vectorizeAnalysisPassName(), "UnsafeFP", L)
+        << "loop not vectorized due to unsafe FP support.");
+    emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
@@ -6584,38 +7486,43 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   unsigned UserIC = Hints.getInterleave();
 
   // Identify the diagnostic messages that should be produced.
-  std::string VecDiagMsg, IntDiagMsg;
+  std::pair<StringRef, std::string> VecDiagMsg, IntDiagMsg;
   bool VectorizeLoop = true, InterleaveLoop = true;
-
   if (Requirements.doesNotMeet(F, L, Hints)) {
     DEBUG(dbgs() << "LV: Not vectorizing: loop did not meet vectorization "
                     "requirements.\n");
-    emitMissedWarning(F, L, Hints);
+    emitMissedWarning(F, L, Hints, ORE);
     return false;
   }
 
   if (VF.Width == 1) {
     DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n");
-    VecDiagMsg =
-        "the cost-model indicates that vectorization is not beneficial";
+    VecDiagMsg = std::make_pair(
+        "VectorizationNotBeneficial",
+        "the cost-model indicates that vectorization is not beneficial");
     VectorizeLoop = false;
   }
 
   if (IC == 1 && UserIC <= 1) {
     // Tell the user interleaving is not beneficial.
     DEBUG(dbgs() << "LV: Interleaving is not beneficial.\n");
-    IntDiagMsg =
-        "the cost-model indicates that interleaving is not beneficial";
+    IntDiagMsg = std::make_pair(
+        "InterleavingNotBeneficial",
+        "the cost-model indicates that interleaving is not beneficial");
     InterleaveLoop = false;
-    if (UserIC == 1)
-      IntDiagMsg +=
+    if (UserIC == 1) {
+      IntDiagMsg.first = "InterleavingNotBeneficialAndDisabled";
+      IntDiagMsg.second +=
           " and is explicitly disabled or interleave count is set to 1";
+    }
   } else if (IC > 1 && UserIC == 1) {
     // Tell the user interleaving is beneficial, but it explicitly disabled.
     DEBUG(dbgs()
           << "LV: Interleaving is beneficial but is explicitly disabled.");
-    IntDiagMsg = "the cost-model indicates that interleaving is beneficial "
-                 "but is explicitly disabled or interleave count is set to 1";
+    IntDiagMsg = std::make_pair(
+        "InterleavingBeneficialButDisabled",
+        "the cost-model indicates that interleaving is beneficial "
+        "but is explicitly disabled or interleave count is set to 1");
     InterleaveLoop = false;
   }
 
@@ -6626,40 +7533,48 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   const char *VAPassName = Hints.vectorizeAnalysisPassName();
   if (!VectorizeLoop && !InterleaveLoop) {
     // Do not vectorize or interleaving the loop.
-    emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
-                                   L->getStartLoc(), VecDiagMsg);
-    emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
-                                   L->getStartLoc(), IntDiagMsg);
+    ORE->emit(OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first,
+                                         L->getStartLoc(), L->getHeader())
+              << VecDiagMsg.second);
+    ORE->emit(OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
+                                         L->getStartLoc(), L->getHeader())
+              << IntDiagMsg.second);
     return false;
   } else if (!VectorizeLoop && InterleaveLoop) {
     DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
-    emitOptimizationRemarkAnalysis(F->getContext(), VAPassName, *F,
-                                   L->getStartLoc(), VecDiagMsg);
+    ORE->emit(OptimizationRemarkAnalysis(VAPassName, VecDiagMsg.first,
+                                         L->getStartLoc(), L->getHeader())
+              << VecDiagMsg.second);
   } else if (VectorizeLoop && !InterleaveLoop) {
     DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
                  << DebugLocStr << '\n');
-    emitOptimizationRemarkAnalysis(F->getContext(), LV_NAME, *F,
-                                   L->getStartLoc(), IntDiagMsg);
+    ORE->emit(OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
+                                         L->getStartLoc(), L->getHeader())
+              << IntDiagMsg.second);
   } else if (VectorizeLoop && InterleaveLoop) {
     DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width << ") in "
                  << DebugLocStr << '\n');
     DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
   }
 
+  using namespace ore;
   if (!VectorizeLoop) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
     // If we decided that it is not legal to vectorize the loop, then
     // interleave it.
-    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, IC);
-    Unroller.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
-
-    emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
-                           Twine("interleaved loop (interleaved count: ") +
-                               Twine(IC) + ")");
+    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
+                               &CM);
+    Unroller.vectorize();
+
+    ORE->emit(OptimizationRemark(LV_NAME, "Interleaved", L->getStartLoc(),
+                                 L->getHeader())
+              << "interleaved loop (interleaved count: "
+              << NV("InterleaveCount", IC) << ")");
   } else {
     // If we decided that it is *legal* to vectorize the loop, then do it.
-    InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, VF.Width, IC);
-    LB.vectorize(&LVL, CM.MinBWs, CM.VecValuesToIgnore);
+    InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
+                           &LVL, &CM);
+    LB.vectorize();
     ++LoopsVectorized;
 
     // Add metadata to disable runtime unrolling a scalar loop when there are
@@ -6669,10 +7584,11 @@ bool LoopVectorizePass::processLoop(Loop *L) {
       AddRuntimeUnrollDisableMetaData(L);
 
     // Report the vectorization decision.
-    emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(),
-                           Twine("vectorized loop (vectorization width: ") +
-                               Twine(VF.Width) + ", interleaved count: " +
-                               Twine(IC) + ")");
+    ORE->emit(OptimizationRemark(LV_NAME, "Vectorized", L->getStartLoc(),
+                                 L->getHeader())
+              << "vectorized loop (vectorization width: "
+              << NV("VectorizationFactor", VF.Width)
+              << ", interleaved count: " << NV("InterleaveCount", IC) << ")");
   }
 
   // Mark the loop as already vectorized to avoid vectorizing again.
@@ -6686,7 +7602,8 @@ bool LoopVectorizePass::runImpl(
     Function &F, ScalarEvolution &SE_, LoopInfo &LI_, TargetTransformInfo &TTI_,
     DominatorTree &DT_, BlockFrequencyInfo &BFI_, TargetLibraryInfo *TLI_,
     DemandedBits &DB_, AliasAnalysis &AA_, AssumptionCache &AC_,
-    std::function<const LoopAccessInfo &(Loop &)> &GetLAA_) {
+    std::function<const LoopAccessInfo &(Loop &)> &GetLAA_,
+    OptimizationRemarkEmitter &ORE_) {
 
   SE = &SE_;
   LI = &LI_;
@@ -6698,6 +7615,7 @@ bool LoopVectorizePass::runImpl(
   AC = &AC_;
   GetLAA = &GetLAA_;
   DB = &DB_;
+  ORE = &ORE_;
 
   // Compute some weights outside of the loop over the loops. Compute this
   // using a BranchProbability to re-use its scaling math.
@@ -6746,13 +7664,15 @@ PreservedAnalyses LoopVectorizePass::run(Function &F,
     auto &AA = AM.getResult<AAManager>(F);
     auto &AC = AM.getResult<AssumptionAnalysis>(F);
     auto &DB = AM.getResult<DemandedBitsAnalysis>(F);
+    auto &ORE = AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
 
     auto &LAM = AM.getResult<LoopAnalysisManagerFunctionProxy>(F).getManager();
     std::function<const LoopAccessInfo &(Loop &)> GetLAA =
         [&](Loop &L) -> const LoopAccessInfo & {
       return LAM.getResult<LoopAccessAnalysis>(L);
     };
-    bool Changed = runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA);
+    bool Changed =
+        runImpl(F, SE, LI, TTI, DT, BFI, TLI, DB, AA, AC, GetLAA, ORE);
     if (!Changed)
       return PreservedAnalyses::all();
     PreservedAnalyses PA;
diff --git a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fb94f7924ee0..bcaa8439cffa 100644
--- a/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -115,22 +115,22 @@ static bool isValidElementType(Type *Ty) {
          !Ty->isPPC_FP128Ty();
 }
 
-/// \returns the parent basic block if all of the instructions in \p VL
-/// are in the same block or null otherwise.
-static BasicBlock *getSameBlock(ArrayRef<Value *> VL) {
+/// \returns true if all of the instructions in \p VL are in the same block or
+/// false otherwise.
+static bool allSameBlock(ArrayRef<Value *> VL) {
   Instruction *I0 = dyn_cast<Instruction>(VL[0]);
   if (!I0)
-    return nullptr;
+    return false;
   BasicBlock *BB = I0->getParent();
   for (int i = 1, e = VL.size(); i < e; i++) {
     Instruction *I = dyn_cast<Instruction>(VL[i]);
     if (!I)
-      return nullptr;
+      return false;
 
     if (BB != I->getParent())
-      return nullptr;
+      return false;
   }
-  return BB;
+  return true;
 }
 
 /// \returns True if all of the values in \p VL are constants.
@@ -211,12 +211,12 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) {
 /// of each scalar operation (VL) that will be converted into a vector (I).
 /// Flag set: NSW, NUW, exact, and all of fast-math.
 static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
-  if (auto *VecOp = dyn_cast<BinaryOperator>(I)) {
-    if (auto *Intersection = dyn_cast<BinaryOperator>(VL[0])) {
+  if (auto *VecOp = dyn_cast<Instruction>(I)) {
+    if (auto *Intersection = dyn_cast<Instruction>(VL[0])) {
       // Intersection is initialized to the 0th scalar,
       // so start counting from index '1'.
       for (int i = 1, e = VL.size(); i < e; ++i) {
-        if (auto *Scalar = dyn_cast<BinaryOperator>(VL[i]))
+        if (auto *Scalar = dyn_cast<Instruction>(VL[i]))
           Intersection->andIRFlags(Scalar);
       }
       VecOp->copyIRFlags(Intersection);
@@ -224,15 +224,15 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
   }
 }
 
-/// \returns The type that all of the values in \p VL have or null if there
-/// are different types.
-static Type* getSameType(ArrayRef<Value *> VL) {
+/// \returns true if all of the values in \p VL have the same type or false
+/// otherwise.
+static bool allSameType(ArrayRef<Value *> VL) {
   Type *Ty = VL[0]->getType();
   for (int i = 1, e = VL.size(); i < e; i++)
     if (VL[i]->getType() != Ty)
-      return nullptr;
+      return false;
 
-  return Ty;
+  return true;
 }
 
 /// \returns True if Extract{Value,Element} instruction extracts element Idx.
@@ -393,6 +393,10 @@ public:
   /// \returns number of elements in vector if isomorphism exists, 0 otherwise.
   unsigned canMapToVector(Type *T, const DataLayout &DL) const;
 
+  /// \returns True if the VectorizableTree is both tiny and not fully
+  /// vectorizable. We do not vectorize such trees.
+  bool isTreeTinyAndNotFullyVectorizable();
+
 private:
   struct TreeEntry;
 
@@ -883,10 +887,10 @@ private:
   /// List of users to ignore during scheduling and that don't need extracting.
   ArrayRef<Value *> UserIgnoreList;
 
-  // Number of load-bundles, which contain consecutive loads.
+  // Number of load bundles that contain consecutive loads.
   int NumLoadsWantToKeepOrder;
 
-  // Number of load-bundles of size 2, which are consecutive loads if reversed.
+  // Number of load bundles that contain consecutive loads in reversed order.
   int NumLoadsWantToChangeOrder;
 
   // Analysis and block reference.
@@ -906,8 +910,11 @@ private:
   IRBuilder<> Builder;
 
   /// A map of scalar integer values to the smallest bit width with which they
-  /// can legally be represented.
-  MapVector<Value *, uint64_t> MinBWs;
+  /// can legally be represented. The values map to (width, signed) pairs,
+  /// where "width" indicates the minimum bit width and "signed" is True if the
+  /// value must be signed-extended, rather than zero-extended, back to its
+  /// original width.
+  MapVector<Value *, std::pair<uint64_t, bool>> MinBWs;
 };
 
 } // end namespace llvm
@@ -917,7 +924,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
                         ArrayRef<Value *> UserIgnoreLst) {
   deleteTree();
   UserIgnoreList = UserIgnoreLst;
-  if (!getSameType(Roots))
+  if (!allSameType(Roots))
     return;
   buildTree_rec(Roots, 0);
 
@@ -958,8 +965,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
         }
 
         // Ignore users in the user ignore list.
-        if (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), UserInst) !=
-            UserIgnoreList.end())
+        if (is_contained(UserIgnoreList, UserInst))
           continue;
 
         DEBUG(dbgs() << "SLP: Need to extract:" << *U << " from lane " <<
@@ -972,9 +978,8 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots,
 
 
 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
-  bool SameTy = allConstant(VL) || getSameType(VL); (void)SameTy;
   bool isAltShuffle = false;
-  assert(SameTy && "Invalid types!");
+  assert((allConstant(VL) || allSameType(VL)) && "Invalid types!");
 
   if (Depth == RecursionMaxDepth) {
     DEBUG(dbgs() << "SLP: Gathering due to max recursion depth.\n");
@@ -1007,7 +1012,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
   }
 
   // If all of the operands are identical or constant we have a simple solution.
-  if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) {
+  if (allConstant(VL) || isSplat(VL) || !allSameBlock(VL) || !Opcode) {
     DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n");
     newTreeEntry(VL, false);
     return;
@@ -1159,7 +1164,9 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
         DEBUG(dbgs() << "SLP: Gathering loads of non-packed type.\n");
         return;
       }
-      // Check if the loads are consecutive or of we need to swizzle them.
+
+      // Make sure all loads in the bundle are simple - we can't vectorize
+      // atomic or volatile loads.
       for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
         LoadInst *L = cast<LoadInst>(VL[i]);
         if (!L->isSimple()) {
@@ -1168,20 +1175,47 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
           DEBUG(dbgs() << "SLP: Gathering non-simple loads.\n");
           return;
         }
+      }
 
+      // Check if the loads are consecutive, reversed, or neither.
+      // TODO: What we really want is to sort the loads, but for now, check
+      // the two likely directions.
+      bool Consecutive = true;
+      bool ReverseConsecutive = true;
+      for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) {
         if (!isConsecutiveAccess(VL[i], VL[i + 1], *DL, *SE)) {
-          if (VL.size() == 2 && isConsecutiveAccess(VL[1], VL[0], *DL, *SE)) {
-            ++NumLoadsWantToChangeOrder;
-          }
-          BS.cancelScheduling(VL);
-          newTreeEntry(VL, false);
-          DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
-          return;
+          Consecutive = false;
+          break;
+        } else {
+          ReverseConsecutive = false;
         }
       }
-      ++NumLoadsWantToKeepOrder;
-      newTreeEntry(VL, true);
-      DEBUG(dbgs() << "SLP: added a vector of loads.\n");
+
+      if (Consecutive) {
+        ++NumLoadsWantToKeepOrder;
+        newTreeEntry(VL, true);
+        DEBUG(dbgs() << "SLP: added a vector of loads.\n");
+        return;
+      }
+
+      // If none of the load pairs were consecutive when checked in order,
+      // check the reverse order.
+      if (ReverseConsecutive)
+        for (unsigned i = VL.size() - 1; i > 0; --i)
+          if (!isConsecutiveAccess(VL[i], VL[i - 1], *DL, *SE)) {
+            ReverseConsecutive = false;
+            break;
+          }
+
+      BS.cancelScheduling(VL);
+      newTreeEntry(VL, false);
+
+      if (ReverseConsecutive) {
+        ++NumLoadsWantToChangeOrder;
+        DEBUG(dbgs() << "SLP: Gathering reversed loads.\n");
+      } else {
+        DEBUG(dbgs() << "SLP: Gathering non-consecutive loads.\n");
+      }
       return;
     }
     case Instruction::ZExt:
@@ -1541,8 +1575,8 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
   // If we have computed a smaller type for the expression, update VecTy so
   // that the costs will be accurate.
   if (MinBWs.count(VL[0]))
-    VecTy = VectorType::get(IntegerType::get(F->getContext(), MinBWs[VL[0]]),
-                            VL.size());
+    VecTy = VectorType::get(
+        IntegerType::get(F->getContext(), MinBWs[VL[0]].first), VL.size());
 
   if (E->NeedToGather) {
     if (allConstant(VL))
@@ -1553,7 +1587,7 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
     return getGatherCost(E->Scalars);
   }
   unsigned Opcode = getSameOpcode(VL);
-  assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL");
+  assert(Opcode && allSameType(VL) && allSameBlock(VL) && "Invalid VL");
   Instruction *VL0 = cast<Instruction>(VL[0]);
   switch (Opcode) {
     case Instruction::PHI: {
@@ -1762,7 +1796,10 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
   DEBUG(dbgs() << "SLP: Check whether the tree with height " <<
         VectorizableTree.size() << " is fully vectorizable .\n");
 
-  // We only handle trees of height 2.
+  // We only handle trees of heights 1 and 2.
+  if (VectorizableTree.size() == 1 && !VectorizableTree[0].NeedToGather)
+    return true;
+
   if (VectorizableTree.size() != 2)
     return false;
 
@@ -1779,6 +1816,27 @@ bool BoUpSLP::isFullyVectorizableTinyTree() {
   return true;
 }
 
+bool BoUpSLP::isTreeTinyAndNotFullyVectorizable() {
+
+  // We can vectorize the tree if its size is greater than or equal to the
+  // minimum size specified by the MinTreeSize command line option.
+  if (VectorizableTree.size() >= MinTreeSize)
+    return false;
+
+  // If we have a tiny tree (a tree whose size is less than MinTreeSize), we
+  // can vectorize it if we can prove it fully vectorizable.
+  if (isFullyVectorizableTinyTree())
+    return false;
+
+  assert(VectorizableTree.empty()
+             ? ExternalUses.empty()
+             : true && "We shouldn't have any external users");
+
+  // Otherwise, we can't vectorize the tree. It is both tiny and not fully
+  // vectorizable.
+  return true;
+}
+
 int BoUpSLP::getSpillCost() {
   // Walk from the bottom of the tree to the top, tracking which values are
   // live. When we see a call instruction that is not part of our tree,
@@ -1816,9 +1874,9 @@ int BoUpSLP::getSpillCost() {
       );
 
     // Now find the sequence of instructions between PrevInst and Inst.
-    BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
-        PrevInstIt(PrevInst->getIterator());
-    --PrevInstIt;
+    BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
+                                 PrevInstIt =
+                                     PrevInst->getIterator().getReverse();
     while (InstIt != PrevInstIt) {
       if (PrevInstIt == PrevInst->getParent()->rend()) {
         PrevInstIt = Inst->getParent()->rbegin();
@@ -1846,14 +1904,6 @@ int BoUpSLP::getTreeCost() {
   DEBUG(dbgs() << "SLP: Calculating cost for tree of size " <<
         VectorizableTree.size() << ".\n");
 
-  // We only vectorize tiny trees if it is fully vectorizable.
-  if (VectorizableTree.size() < MinTreeSize && !isFullyVectorizableTinyTree()) {
-    if (VectorizableTree.empty()) {
-      assert(!ExternalUses.size() && "We should not have any external users");
-    }
-    return INT_MAX;
-  }
-
   unsigned BundleWidth = VectorizableTree[0].Scalars.size();
 
   for (TreeEntry &TE : VectorizableTree) {
@@ -1882,10 +1932,12 @@ int BoUpSLP::getTreeCost() {
     auto *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
     auto *ScalarRoot = VectorizableTree[0].Scalars[0];
     if (MinBWs.count(ScalarRoot)) {
-      auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
+      auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
+      auto Extend =
+          MinBWs[ScalarRoot].second ? Instruction::SExt : Instruction::ZExt;
       VecTy = VectorType::get(MinTy, BundleWidth);
-      ExtractCost += TTI->getExtractWithExtendCost(
-          Instruction::SExt, EU.Scalar->getType(), VecTy, EU.Lane);
+      ExtractCost += TTI->getExtractWithExtendCost(Extend, EU.Scalar->getType(),
+                                                   VecTy, EU.Lane);
     } else {
       ExtractCost +=
           TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, EU.Lane);
@@ -2182,7 +2234,7 @@ void BoUpSLP::setInsertPointAfterBundle(ArrayRef<Value *> VL) {
 
   // Set the insertion point after the last instruction in the bundle. Set the
   // debug location to Front.
-  Builder.SetInsertPoint(BB, next(BasicBlock::iterator(LastInst)));
+  Builder.SetInsertPoint(BB, ++LastInst->getIterator());
   Builder.SetCurrentDebugLocation(Front->getDebugLoc());
 }
 
@@ -2383,6 +2435,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         V = Builder.CreateICmp(P0, L, R);
 
       E->VectorizedValue = V;
+      propagateIRFlags(E->VectorizedValue, E->Scalars);
       ++NumVectorInstructions;
       return V;
     }
@@ -2593,6 +2646,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         ExternalUses.push_back(ExternalUser(ScalarArg, cast<User>(V), 0));
 
       E->VectorizedValue = V;
+      propagateIRFlags(E->VectorizedValue, E->Scalars);
       ++NumVectorInstructions;
       return V;
     }
@@ -2669,7 +2723,7 @@ Value *BoUpSLP::vectorizeTree() {
     if (auto *I = dyn_cast<Instruction>(VectorRoot))
       Builder.SetInsertPoint(&*++BasicBlock::iterator(I));
     auto BundleWidth = VectorizableTree[0].Scalars.size();
-    auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot]);
+    auto *MinTy = IntegerType::get(F->getContext(), MinBWs[ScalarRoot].first);
     auto *VecTy = VectorType::get(MinTy, BundleWidth);
     auto *Trunc = Builder.CreateTrunc(VectorRoot, VecTy);
     VectorizableTree[0].VectorizedValue = Trunc;
@@ -2677,6 +2731,16 @@ Value *BoUpSLP::vectorizeTree() {
 
   DEBUG(dbgs() << "SLP: Extracting " << ExternalUses.size() << " values .\n");
 
+  // If necessary, sign-extend or zero-extend ScalarRoot to the larger type
+  // specified by ScalarType.
+  auto extend = [&](Value *ScalarRoot, Value *Ex, Type *ScalarType) {
+    if (!MinBWs.count(ScalarRoot))
+      return Ex;
+    if (MinBWs[ScalarRoot].second)
+      return Builder.CreateSExt(Ex, ScalarType);
+    return Builder.CreateZExt(Ex, ScalarType);
+  };
+
   // Extract all of the elements with the external uses.
   for (const auto &ExternalUse : ExternalUses) {
     Value *Scalar = ExternalUse.Scalar;
@@ -2684,8 +2748,7 @@ Value *BoUpSLP::vectorizeTree() {
 
     // Skip users that we already RAUW. This happens when one instruction
     // has multiple uses of the same value.
-    if (std::find(Scalar->user_begin(), Scalar->user_end(), User) ==
-        Scalar->user_end())
+    if (!is_contained(Scalar->users(), User))
       continue;
     assert(ScalarToTreeEntry.count(Scalar) && "Invalid scalar");
 
@@ -2712,8 +2775,7 @@ Value *BoUpSLP::vectorizeTree() {
               Builder.SetInsertPoint(PH->getIncomingBlock(i)->getTerminator());
             }
             Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-            if (MinBWs.count(ScalarRoot))
-              Ex = Builder.CreateSExt(Ex, Scalar->getType());
+            Ex = extend(ScalarRoot, Ex, Scalar->getType());
             CSEBlocks.insert(PH->getIncomingBlock(i));
             PH->setOperand(i, Ex);
           }
@@ -2721,16 +2783,14 @@ Value *BoUpSLP::vectorizeTree() {
       } else {
         Builder.SetInsertPoint(cast<Instruction>(User));
         Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-        if (MinBWs.count(ScalarRoot))
-          Ex = Builder.CreateSExt(Ex, Scalar->getType());
+        Ex = extend(ScalarRoot, Ex, Scalar->getType());
         CSEBlocks.insert(cast<Instruction>(User)->getParent());
         User->replaceUsesOfWith(Scalar, Ex);
      }
     } else {
       Builder.SetInsertPoint(&F->getEntryBlock().front());
       Value *Ex = Builder.CreateExtractElement(Vec, Lane);
-      if (MinBWs.count(ScalarRoot))
-        Ex = Builder.CreateSExt(Ex, Scalar->getType());
+      Ex = extend(ScalarRoot, Ex, Scalar->getType());
       CSEBlocks.insert(&F->getEntryBlock());
       User->replaceUsesOfWith(Scalar, Ex);
     }
@@ -2759,8 +2819,7 @@ Value *BoUpSLP::vectorizeTree() {
 
           assert((ScalarToTreeEntry.count(U) ||
                   // It is legal to replace users in the ignorelist by undef.
-                  (std::find(UserIgnoreList.begin(), UserIgnoreList.end(), U) !=
-                   UserIgnoreList.end())) &&
+                  is_contained(UserIgnoreList, U)) &&
                  "Replacing out-of-tree value with undef");
         }
 #endif
@@ -2853,7 +2912,7 @@ void BoUpSLP::optimizeGatherSequence() {
         }
       }
       if (In) {
-        assert(std::find(Visited.begin(), Visited.end(), In) == Visited.end());
+        assert(!is_contained(Visited, In));
         Visited.push_back(In);
       }
     }
@@ -2994,9 +3053,10 @@ bool BoUpSLP::BlockScheduling::extendSchedulingRegion(Value *V) {
   }
   // Search up and down at the same time, because we don't know if the new
   // instruction is above or below the existing scheduling region.
-  BasicBlock::reverse_iterator UpIter(ScheduleStart->getIterator());
+  BasicBlock::reverse_iterator UpIter =
+      ++ScheduleStart->getIterator().getReverse();
   BasicBlock::reverse_iterator UpperEnd = BB->rend();
-  BasicBlock::iterator DownIter(ScheduleEnd);
+  BasicBlock::iterator DownIter = ScheduleEnd->getIterator();
   BasicBlock::iterator LowerEnd = BB->end();
   for (;;) {
     if (++ScheduleRegionSize > ScheduleRegionSizeLimit) {
@@ -3451,6 +3511,11 @@ void BoUpSLP::computeMinimumValueSizes() {
         Mask.getBitWidth() - Mask.countLeadingZeros(), MaxBitWidth);
   }
 
+  // True if the roots can be zero-extended back to their original type, rather
+  // than sign-extended. We know that if the leading bits are not demanded, we
+  // can safely zero-extend. So we initialize IsKnownPositive to True.
+  bool IsKnownPositive = true;
+
   // If all the bits of the roots are demanded, we can try a little harder to
   // compute a narrower type. This can happen, for example, if the roots are
   // getelementptr indices. InstCombine promotes these indices to the pointer
@@ -3462,11 +3527,41 @@ void BoUpSLP::computeMinimumValueSizes() {
   // compute the number of high-order bits we can truncate.
   if (MaxBitWidth == DL->getTypeSizeInBits(TreeRoot[0]->getType())) {
     MaxBitWidth = 8u;
+
+    // Determine if the sign bit of all the roots is known to be zero. If not,
+    // IsKnownPositive is set to False.
+    IsKnownPositive = all_of(TreeRoot, [&](Value *R) {
+      bool KnownZero = false;
+      bool KnownOne = false;
+      ComputeSignBit(R, KnownZero, KnownOne, *DL);
+      return KnownZero;
+    });
+
+    // Determine the maximum number of bits required to store the scalar
+    // values.
     for (auto *Scalar : ToDemote) {
       auto NumSignBits = ComputeNumSignBits(Scalar, *DL, 0, AC, 0, DT);
       auto NumTypeBits = DL->getTypeSizeInBits(Scalar->getType());
       MaxBitWidth = std::max<unsigned>(NumTypeBits - NumSignBits, MaxBitWidth);
     }
+
+    // If we can't prove that the sign bit is zero, we must add one to the
+    // maximum bit width to account for the unknown sign bit. This preserves
+    // the existing sign bit so we can safely sign-extend the root back to the
+    // original type. Otherwise, if we know the sign bit is zero, we will
+    // zero-extend the root instead.
+    //
+    // FIXME: This is somewhat suboptimal, as there will be cases where adding
+    //        one to the maximum bit width will yield a larger-than-necessary
+    //        type. In general, we need to add an extra bit only if we can't
+    //        prove that the upper bit of the original type is equal to the
+    //        upper bit of the proposed smaller type. If these two bits are the
+    //        same (either zero or one) we know that sign-extending from the
+    //        smaller type will result in the same value. Here, since we can't
+    //        yet prove this, we are just making the proposed smaller type
+    //        larger to ensure correctness.
+    if (!IsKnownPositive)
+      ++MaxBitWidth;
   }
 
   // Round MaxBitWidth up to the next power-of-two.
@@ -3486,7 +3581,7 @@ void BoUpSLP::computeMinimumValueSizes() {
 
   // Finally, map the values we can demote to the maximum bit with we computed.
   for (auto *Scalar : ToDemote)
-    MinBWs[Scalar] = MaxBitWidth;
+    MinBWs[Scalar] = std::make_pair(MaxBitWidth, !IsKnownPositive);
 }
 
 namespace {
@@ -3642,8 +3737,7 @@ static bool hasValueBeenRAUWed(ArrayRef<Value *> VL, ArrayRef<WeakVH> VH,
   return !std::equal(VL.begin(), VL.end(), VH.begin());
 }
 
-bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
-                                            int CostThreshold, BoUpSLP &R,
+bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain, BoUpSLP &R,
                                             unsigned VecRegSize) {
   unsigned ChainLen = Chain.size();
   DEBUG(dbgs() << "SLP: Analyzing a store chain of length " << ChainLen
@@ -3672,12 +3766,15 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
     ArrayRef<Value *> Operands = Chain.slice(i, VF);
 
     R.buildTree(Operands);
+    if (R.isTreeTinyAndNotFullyVectorizable())
+      continue;
+
     R.computeMinimumValueSizes();
 
     int Cost = R.getTreeCost();
 
     DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
-    if (Cost < CostThreshold) {
+    if (Cost < -SLPCostThreshold) {
       DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
       R.vectorizeTree();
 
@@ -3691,7 +3788,7 @@ bool SLPVectorizerPass::vectorizeStoreChain(ArrayRef<Value *> Chain,
 }
 
 bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
-                                        int costThreshold, BoUpSLP &R) {
+                                        BoUpSLP &R) {
   SetVector<StoreInst *> Heads, Tails;
   SmallDenseMap<StoreInst *, StoreInst *> ConsecutiveChain;
 
@@ -3746,8 +3843,9 @@ bool SLPVectorizerPass::vectorizeStores(ArrayRef<StoreInst *> Stores,
 
     // FIXME: Is division-by-2 the correct step? Should we assert that the
     // register size is a power-of-2?
-    for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize(); Size /= 2) {
-      if (vectorizeStoreChain(Operands, costThreshold, R, Size)) {
+    for (unsigned Size = R.getMaxVecRegSize(); Size >= R.getMinVecRegSize();
+         Size /= 2) {
+      if (vectorizeStoreChain(Operands, R, Size)) {
         // Mark the vectorized stores so that we don't vectorize them again.
         VectorizedStores.insert(Operands.begin(), Operands.end());
         Changed = true;
@@ -3805,11 +3903,12 @@ bool SLPVectorizerPass::tryToVectorizePair(Value *A, Value *B, BoUpSLP &R) {
 
 bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
                                            ArrayRef<Value *> BuildVector,
-                                           bool allowReorder) {
+                                           bool AllowReorder) {
   if (VL.size() < 2)
     return false;
 
-  DEBUG(dbgs() << "SLP: Vectorizing a list of length = " << VL.size() << ".\n");
+  DEBUG(dbgs() << "SLP: Trying to vectorize a list of length = " << VL.size()
+               << ".\n");
 
   // Check that all of the parts are scalar instructions of the same type.
   Instruction *I0 = dyn_cast<Instruction>(VL[0]);
@@ -3818,10 +3917,11 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
   unsigned Opcode0 = I0->getOpcode();
 
-  // FIXME: Register size should be a parameter to this function, so we can
-  // try different vectorization factors.
   unsigned Sz = R.getVectorElementSize(I0);
-  unsigned VF = R.getMinVecRegSize() / Sz;
+  unsigned MinVF = std::max(2U, R.getMinVecRegSize() / Sz);
+  unsigned MaxVF = std::max<unsigned>(PowerOf2Floor(VL.size()), MinVF);
+  if (MaxVF < 2)
+    return false;
 
   for (Value *V : VL) {
     Type *Ty = V->getType();
@@ -3837,70 +3937,89 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
   // Keep track of values that were deleted by vectorizing in the loop below.
   SmallVector<WeakVH, 8> TrackValues(VL.begin(), VL.end());
 
-  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
-    unsigned OpsWidth = 0;
+  unsigned NextInst = 0, MaxInst = VL.size();
+  for (unsigned VF = MaxVF; NextInst + 1 < MaxInst && VF >= MinVF;
+       VF /= 2) {
+    // No actual vectorization should happen, if number of parts is the same as
+    // provided vectorization factor (i.e. the scalar type is used for vector
+    // code during codegen).
+    auto *VecTy = VectorType::get(VL[0]->getType(), VF);
+    if (TTI->getNumberOfParts(VecTy) == VF)
+      continue;
+    for (unsigned I = NextInst; I < MaxInst; ++I) {
+      unsigned OpsWidth = 0;
 
-    if (i + VF > e)
-      OpsWidth = e - i;
-    else
-      OpsWidth = VF;
+      if (I + VF > MaxInst)
+        OpsWidth = MaxInst - I;
+      else
+        OpsWidth = VF;
 
-    if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
-      break;
+      if (!isPowerOf2_32(OpsWidth) || OpsWidth < 2)
+        break;
 
-    // Check that a previous iteration of this loop did not delete the Value.
-    if (hasValueBeenRAUWed(VL, TrackValues, i, OpsWidth))
-      continue;
+      // Check that a previous iteration of this loop did not delete the Value.
+      if (hasValueBeenRAUWed(VL, TrackValues, I, OpsWidth))
+        continue;
 
-    DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
-                 << "\n");
-    ArrayRef<Value *> Ops = VL.slice(i, OpsWidth);
-
-    ArrayRef<Value *> BuildVectorSlice;
-    if (!BuildVector.empty())
-      BuildVectorSlice = BuildVector.slice(i, OpsWidth);
-
-    R.buildTree(Ops, BuildVectorSlice);
-    // TODO: check if we can allow reordering also for other cases than
-    // tryToVectorizePair()
-    if (allowReorder && R.shouldReorder()) {
-      assert(Ops.size() == 2);
-      assert(BuildVectorSlice.empty());
-      Value *ReorderedOps[] = { Ops[1], Ops[0] };
-      R.buildTree(ReorderedOps, None);
-    }
-    R.computeMinimumValueSizes();
-    int Cost = R.getTreeCost();
+      DEBUG(dbgs() << "SLP: Analyzing " << OpsWidth << " operations "
+                   << "\n");
+      ArrayRef<Value *> Ops = VL.slice(I, OpsWidth);
+
+      ArrayRef<Value *> BuildVectorSlice;
+      if (!BuildVector.empty())
+        BuildVectorSlice = BuildVector.slice(I, OpsWidth);
+
+      R.buildTree(Ops, BuildVectorSlice);
+      // TODO: check if we can allow reordering for more cases.
+      if (AllowReorder && R.shouldReorder()) {
+        // Conceptually, there is nothing actually preventing us from trying to
+        // reorder a larger list. In fact, we do exactly this when vectorizing
+        // reductions. However, at this point, we only expect to get here from
+        // tryToVectorizePair().
+        assert(Ops.size() == 2);
+        assert(BuildVectorSlice.empty());
+        Value *ReorderedOps[] = {Ops[1], Ops[0]};
+        R.buildTree(ReorderedOps, None);
+      }
+      if (R.isTreeTinyAndNotFullyVectorizable())
+        continue;
 
-    if (Cost < -SLPCostThreshold) {
-      DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
-      Value *VectorizedRoot = R.vectorizeTree();
-
-      // Reconstruct the build vector by extracting the vectorized root. This
-      // way we handle the case where some elements of the vector are undefined.
-      //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
-      if (!BuildVectorSlice.empty()) {
-        // The insert point is the last build vector instruction. The vectorized
-        // root will precede it. This guarantees that we get an instruction. The
-        // vectorized tree could have been constant folded.
-        Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
-        unsigned VecIdx = 0;
-        for (auto &V : BuildVectorSlice) {
-          IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
-                                      ++BasicBlock::iterator(InsertAfter));
-          Instruction *I = cast<Instruction>(V);
-          assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
-          Instruction *Extract = cast<Instruction>(Builder.CreateExtractElement(
-              VectorizedRoot, Builder.getInt32(VecIdx++)));
-          I->setOperand(1, Extract);
-          I->removeFromParent();
-          I->insertAfter(Extract);
-          InsertAfter = I;
+      R.computeMinimumValueSizes();
+      int Cost = R.getTreeCost();
+
+      if (Cost < -SLPCostThreshold) {
+        DEBUG(dbgs() << "SLP: Vectorizing list at cost:" << Cost << ".\n");
+        Value *VectorizedRoot = R.vectorizeTree();
+
+        // Reconstruct the build vector by extracting the vectorized root. This
+        // way we handle the case where some elements of the vector are
+        // undefined.
+        //  (return (inserelt <4 xi32> (insertelt undef (opd0) 0) (opd1) 2))
+        if (!BuildVectorSlice.empty()) {
+          // The insert point is the last build vector instruction. The
+          // vectorized root will precede it. This guarantees that we get an
+          // instruction. The vectorized tree could have been constant folded.
+          Instruction *InsertAfter = cast<Instruction>(BuildVectorSlice.back());
+          unsigned VecIdx = 0;
+          for (auto &V : BuildVectorSlice) {
+            IRBuilder<NoFolder> Builder(InsertAfter->getParent(),
+                                        ++BasicBlock::iterator(InsertAfter));
+            Instruction *I = cast<Instruction>(V);
+            assert(isa<InsertElementInst>(I) || isa<InsertValueInst>(I));
+            Instruction *Extract =
+                cast<Instruction>(Builder.CreateExtractElement(
+                    VectorizedRoot, Builder.getInt32(VecIdx++)));
+            I->setOperand(1, Extract);
+            I->removeFromParent();
+            I->insertAfter(Extract);
+            InsertAfter = I;
+          }
         }
+        // Move to the next bundle.
+        I += VF - 1;
+        NextInst = I + 1;
+        Changed = true;
       }
-      // Move to the next bundle.
-      i += VF - 1;
-      Changed = true;
     }
   }
 
@@ -3911,36 +4030,40 @@ bool SLPVectorizerPass::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
   if (!V)
     return false;
 
+  Value *P = V->getParent();
+
+  // Vectorize in current basic block only.
+  auto *Op0 = dyn_cast<Instruction>(V->getOperand(0));
+  auto *Op1 = dyn_cast<Instruction>(V->getOperand(1));
+  if (!Op0 || !Op1 || Op0->getParent() != P || Op1->getParent() != P)
+    return false;
+
   // Try to vectorize V.
-  if (tryToVectorizePair(V->getOperand(0), V->getOperand(1), R))
+  if (tryToVectorizePair(Op0, Op1, R))
     return true;
 
-  BinaryOperator *A = dyn_cast<BinaryOperator>(V->getOperand(0));
-  BinaryOperator *B = dyn_cast<BinaryOperator>(V->getOperand(1));
+  auto *A = dyn_cast<BinaryOperator>(Op0);
+  auto *B = dyn_cast<BinaryOperator>(Op1);
   // Try to skip B.
   if (B && B->hasOneUse()) {
-    BinaryOperator *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
-    BinaryOperator *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
-    if (tryToVectorizePair(A, B0, R)) {
+    auto *B0 = dyn_cast<BinaryOperator>(B->getOperand(0));
+    auto *B1 = dyn_cast<BinaryOperator>(B->getOperand(1));
+    if (B0 && B0->getParent() == P && tryToVectorizePair(A, B0, R))
       return true;
-    }
-    if (tryToVectorizePair(A, B1, R)) {
+    if (B1 && B1->getParent() == P && tryToVectorizePair(A, B1, R))
       return true;
-    }
   }
 
   // Try to skip A.
   if (A && A->hasOneUse()) {
-    BinaryOperator *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
-    BinaryOperator *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
-    if (tryToVectorizePair(A0, B, R)) {
+    auto *A0 = dyn_cast<BinaryOperator>(A->getOperand(0));
+    auto *A1 = dyn_cast<BinaryOperator>(A->getOperand(1));
+    if (A0 && A0->getParent() == P && tryToVectorizePair(A0, B, R))
       return true;
-    }
-    if (tryToVectorizePair(A1, B, R)) {
+    if (A1 && A1->getParent() == P && tryToVectorizePair(A1, B, R))
       return true;
-    }
   }
-  return 0;
+  return false;
 }
 
 /// \brief Generate a shuffle mask to be used in a reduction tree.
@@ -3973,7 +4096,7 @@ static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
   return ConstantVector::get(ShuffleMask);
 }
 
-
+namespace {
 /// Model horizontal reductions.
 ///
 /// A horizontal reduction is a tree of reduction operations (currently add and
@@ -4006,7 +4129,14 @@ class HorizontalReduction {
   SmallVector<Value *, 32> ReducedVals;
 
   BinaryOperator *ReductionRoot;
-  PHINode *ReductionPHI;
+  // After successfull horizontal reduction vectorization attempt for PHI node
+  // vectorizer tries to update root binary op by combining vectorized tree and
+  // the ReductionPHI node. But during vectorization this ReductionPHI can be
+  // vectorized itself and replaced by the undef value, while the instruction
+  // itself is marked for deletion. This 'marked for deletion' PHI node then can
+  // be used in new binary operation, causing "Use still stuck around after Def
+  // is destroyed" crash upon PHI node deletion.
+  WeakVH ReductionPHI;
 
   /// The opcode of the reduction.
   unsigned ReductionOpcode;
@@ -4025,14 +4155,13 @@ public:
   unsigned MinVecRegSize;
 
   HorizontalReduction(unsigned MinVecRegSize)
-      : ReductionRoot(nullptr), ReductionPHI(nullptr), ReductionOpcode(0),
-        ReducedValueOpcode(0), IsPairwiseReduction(false), ReduxWidth(0),
+      : ReductionRoot(nullptr), ReductionOpcode(0), ReducedValueOpcode(0),
+        IsPairwiseReduction(false), ReduxWidth(0),
         MinVecRegSize(MinVecRegSize) {}
 
   /// \brief Try to find a reduction tree.
   bool matchAssociativeReduction(PHINode *Phi, BinaryOperator *B) {
-    assert((!Phi ||
-            std::find(Phi->op_begin(), Phi->op_end(), B) != Phi->op_end()) &&
+    assert((!Phi || is_contained(Phi->operands(), B)) &&
            "Thi phi needs to use the binary operator");
 
     // We could have a initial reductions that is not an add.
@@ -4113,12 +4242,21 @@ public:
 
       // Visit left or right.
       Value *NextV = TreeN->getOperand(EdgeToVist);
-      // We currently only allow BinaryOperator's and SelectInst's as reduction
-      // values in our tree.
-      if (isa<BinaryOperator>(NextV) || isa<SelectInst>(NextV))
-        Stack.push_back(std::make_pair(cast<Instruction>(NextV), 0));
-      else if (NextV != Phi)
+      if (NextV != Phi) {
+        auto *I = dyn_cast<Instruction>(NextV);
+        // Continue analysis if the next operand is a reduction operation or
+        // (possibly) a reduced value. If the reduced value opcode is not set,
+        // the first met operation != reduction operation is considered as the
+        // reduced value class.
+        if (I && (!ReducedValueOpcode || I->getOpcode() == ReducedValueOpcode ||
+                  I->getOpcode() == ReductionOpcode)) {
+          if (!ReducedValueOpcode && I->getOpcode() != ReductionOpcode)
+            ReducedValueOpcode = I->getOpcode();
+          Stack.push_back(std::make_pair(I, 0));
+          continue;
+        }
         return false;
+      }
     }
     return true;
   }
@@ -4141,7 +4279,15 @@ public:
     unsigned i = 0;
 
     for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
-      V.buildTree(makeArrayRef(&ReducedVals[i], ReduxWidth), ReductionOps);
+      auto VL = makeArrayRef(&ReducedVals[i], ReduxWidth);
+      V.buildTree(VL, ReductionOps);
+      if (V.shouldReorder()) {
+        SmallVector<Value *, 8> Reversed(VL.rbegin(), VL.rend());
+        V.buildTree(Reversed, ReductionOps);
+      }
+      if (V.isTreeTinyAndNotFullyVectorizable())
+        continue;
+
       V.computeMinimumValueSizes();
 
       // Estimate cost.
@@ -4175,7 +4321,7 @@ public:
                                      ReducedVals[i]);
       }
       // Update users.
-      if (ReductionPHI) {
+      if (ReductionPHI && !isa<UndefValue>(ReductionPHI)) {
         assert(ReductionRoot && "Need a reduction operation");
         ReductionRoot->setOperand(0, VectorizedTree);
         ReductionRoot->setOperand(1, ReductionPHI);
@@ -4202,7 +4348,8 @@ private:
     int VecReduxCost = IsPairwiseReduction ? PairwiseRdxCost : SplittingRdxCost;
 
     int ScalarReduxCost =
-        ReduxWidth * TTI->getArithmeticInstrCost(ReductionOpcode, VecTy);
+        (ReduxWidth - 1) *
+        TTI->getArithmeticInstrCost(ReductionOpcode, ScalarTy);
 
     DEBUG(dbgs() << "SLP: Adding cost " << VecReduxCost - ScalarReduxCost
                  << " for reduction that starts with " << *FirstReducedVal
@@ -4254,6 +4401,7 @@ private:
     return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
   }
 };
+} // end anonymous namespace
 
 /// \brief Recognize construction of vectors like
 ///  %ra = insertelement <4 x float> undef, float %s0, i32 0
@@ -4354,7 +4502,7 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
     return nullptr;
 
   // There is a loop latch, return the incoming value if it comes from
-  // that. This reduction pattern occassionaly turns up.
+  // that. This reduction pattern occasionally turns up.
   if (P->getIncomingBlock(0) == BBLatch) {
     Rdx = P->getIncomingValue(0);
   } else if (P->getIncomingBlock(1) == BBLatch) {
@@ -4367,29 +4515,143 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
   return nullptr;
 }
 
+namespace {
+/// Tracks instructons and its children.
+class WeakVHWithLevel final : public CallbackVH {
+  /// Operand index of the instruction currently beeing analized.
+  unsigned Level = 0;
+  /// Is this the instruction that should be vectorized, or are we now
+  /// processing children (i.e. operands of this instruction) for potential
+  /// vectorization?
+  bool IsInitial = true;
+
+public:
+  explicit WeakVHWithLevel() = default;
+  WeakVHWithLevel(Value *V) : CallbackVH(V){};
+  /// Restart children analysis each time it is repaced by the new instruction.
+  void allUsesReplacedWith(Value *New) override {
+    setValPtr(New);
+    Level = 0;
+    IsInitial = true;
+  }
+  /// Check if the instruction was not deleted during vectorization.
+  bool isValid() const { return !getValPtr(); }
+  /// Is the istruction itself must be vectorized?
+  bool isInitial() const { return IsInitial; }
+  /// Try to vectorize children.
+  void clearInitial() { IsInitial = false; }
+  /// Are all children processed already?
+  bool isFinal() const {
+    assert(getValPtr() &&
+           (isa<Instruction>(getValPtr()) &&
+            cast<Instruction>(getValPtr())->getNumOperands() >= Level));
+    return getValPtr() &&
+           cast<Instruction>(getValPtr())->getNumOperands() == Level;
+  }
+  /// Get next child operation.
+  Value *nextOperand() {
+    assert(getValPtr() && isa<Instruction>(getValPtr()) &&
+           cast<Instruction>(getValPtr())->getNumOperands() > Level);
+    return cast<Instruction>(getValPtr())->getOperand(Level++);
+  }
+  virtual ~WeakVHWithLevel() = default;
+};
+} // namespace
+
 /// \brief Attempt to reduce a horizontal reduction.
 /// If it is legal to match a horizontal reduction feeding
-/// the phi node P with reduction operators BI, then check if it
-/// can be done.
+/// the phi node P with reduction operators Root in a basic block BB, then check
+/// if it can be done.
 /// \returns true if a horizontal reduction was matched and reduced.
 /// \returns false if a horizontal reduction was not matched.
-static bool canMatchHorizontalReduction(PHINode *P, BinaryOperator *BI,
-                                        BoUpSLP &R, TargetTransformInfo *TTI,
-                                        unsigned MinRegSize) {
+static bool canBeVectorized(
+    PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
+    TargetTransformInfo *TTI,
+    const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
   if (!ShouldVectorizeHor)
     return false;
 
-  HorizontalReduction HorRdx(MinRegSize);
-  if (!HorRdx.matchAssociativeReduction(P, BI))
+  if (!Root)
     return false;
 
-  // If there is a sufficient number of reduction values, reduce
-  // to a nearby power-of-2. Can safely generate oversized
-  // vectors and rely on the backend to split them to legal sizes.
-  HorRdx.ReduxWidth =
-    std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
+  if (Root->getParent() != BB)
+    return false;
+  SmallVector<WeakVHWithLevel, 8> Stack(1, Root);
+  SmallSet<Value *, 8> VisitedInstrs;
+  bool Res = false;
+  while (!Stack.empty()) {
+    Value *V = Stack.back();
+    if (!V) {
+      Stack.pop_back();
+      continue;
+    }
+    auto *Inst = dyn_cast<Instruction>(V);
+    if (!Inst || isa<PHINode>(Inst)) {
+      Stack.pop_back();
+      continue;
+    }
+    if (Stack.back().isInitial()) {
+      Stack.back().clearInitial();
+      if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
+        HorizontalReduction HorRdx(R.getMinVecRegSize());
+        if (HorRdx.matchAssociativeReduction(P, BI)) {
+          // If there is a sufficient number of reduction values, reduce
+          // to a nearby power-of-2. Can safely generate oversized
+          // vectors and rely on the backend to split them to legal sizes.
+          HorRdx.ReduxWidth =
+              std::max((uint64_t)4, PowerOf2Floor(HorRdx.numReductionValues()));
+
+          if (HorRdx.tryToReduce(R, TTI)) {
+            Res = true;
+            P = nullptr;
+            continue;
+          }
+        }
+        if (P) {
+          Inst = dyn_cast<Instruction>(BI->getOperand(0));
+          if (Inst == P)
+            Inst = dyn_cast<Instruction>(BI->getOperand(1));
+          if (!Inst) {
+            P = nullptr;
+            continue;
+          }
+        }
+      }
+      P = nullptr;
+      if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+        Res = true;
+        continue;
+      }
+    }
+    if (Stack.back().isFinal()) {
+      Stack.pop_back();
+      continue;
+    }
 
-  return HorRdx.tryToReduce(R, TTI);
+    if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
+      if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
+          Stack.size() < RecursionMaxDepth)
+        Stack.push_back(NextV);
+  }
+  return Res;
+}
+
+bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
+                                                 BasicBlock *BB, BoUpSLP &R,
+                                                 TargetTransformInfo *TTI) {
+  if (!V)
+    return false;
+  auto *I = dyn_cast<Instruction>(V);
+  if (!I)
+    return false;
+
+  if (!isa<BinaryOperator>(I))
+    P = nullptr;
+  // Try to match and vectorize a horizontal reduction.
+  return canBeVectorized(P, I, BB, R, TTI,
+                         [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
+                           return tryToVectorize(BI, R);
+                         });
 }
 
 bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
@@ -4459,65 +4721,42 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
       if (P->getNumIncomingValues() != 2)
         return Changed;
 
-      Value *Rdx = getReductionValue(DT, P, BB, LI);
-
-      // Check if this is a Binary Operator.
-      BinaryOperator *BI = dyn_cast_or_null<BinaryOperator>(Rdx);
-      if (!BI)
-        continue;
-
       // Try to match and vectorize a horizontal reduction.
-      if (canMatchHorizontalReduction(P, BI, R, TTI, R.getMinVecRegSize())) {
-        Changed = true;
-        it = BB->begin();
-        e = BB->end();
-        continue;
-      }
-
-     Value *Inst = BI->getOperand(0);
-      if (Inst == P)
-        Inst = BI->getOperand(1);
-
-      if (tryToVectorize(dyn_cast<BinaryOperator>(Inst), R)) {
-        // We would like to start over since some instructions are deleted
-        // and the iterator may become invalid value.
+      if (vectorizeRootInstruction(P, getReductionValue(DT, P, BB, LI), BB, R,
+                                   TTI)) {
         Changed = true;
         it = BB->begin();
         e = BB->end();
         continue;
       }
-
       continue;
     }
 
-    if (ShouldStartVectorizeHorAtStore)
-      if (StoreInst *SI = dyn_cast<StoreInst>(it))
-        if (BinaryOperator *BinOp =
-                dyn_cast<BinaryOperator>(SI->getValueOperand())) {
-          if (canMatchHorizontalReduction(nullptr, BinOp, R, TTI,
-                                          R.getMinVecRegSize()) ||
-              tryToVectorize(BinOp, R)) {
-            Changed = true;
-            it = BB->begin();
-            e = BB->end();
-            continue;
-          }
+    if (ShouldStartVectorizeHorAtStore) {
+      if (StoreInst *SI = dyn_cast<StoreInst>(it)) {
+        // Try to match and vectorize a horizontal reduction.
+        if (vectorizeRootInstruction(nullptr, SI->getValueOperand(), BB, R,
+                                     TTI)) {
+          Changed = true;
+          it = BB->begin();
+          e = BB->end();
+          continue;
         }
+      }
+    }
 
     // Try to vectorize horizontal reductions feeding into a return.
-    if (ReturnInst *RI = dyn_cast<ReturnInst>(it))
-      if (RI->getNumOperands() != 0)
-        if (BinaryOperator *BinOp =
-                dyn_cast<BinaryOperator>(RI->getOperand(0))) {
-          DEBUG(dbgs() << "SLP: Found a return to vectorize.\n");
-          if (tryToVectorizePair(BinOp->getOperand(0),
-                                 BinOp->getOperand(1), R)) {
-            Changed = true;
-            it = BB->begin();
-            e = BB->end();
-            continue;
-          }
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(it)) {
+      if (RI->getNumOperands() != 0) {
+        // Try to match and vectorize a horizontal reduction.
+        if (vectorizeRootInstruction(nullptr, RI->getOperand(0), BB, R, TTI)) {
+          Changed = true;
+          it = BB->begin();
+          e = BB->end();
+          continue;
         }
+      }
+    }
 
     // Try to vectorize trees that start at compare instructions.
     if (CmpInst *CI = dyn_cast<CmpInst>(it)) {
@@ -4530,16 +4769,14 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         continue;
       }
 
-      for (int i = 0; i < 2; ++i) {
-        if (BinaryOperator *BI = dyn_cast<BinaryOperator>(CI->getOperand(i))) {
-          if (tryToVectorizePair(BI->getOperand(0), BI->getOperand(1), R)) {
-            Changed = true;
-            // We would like to start over since some instructions are deleted
-            // and the iterator may become invalid value.
-            it = BB->begin();
-            e = BB->end();
-            break;
-          }
+      for (int I = 0; I < 2; ++I) {
+        if (vectorizeRootInstruction(nullptr, CI->getOperand(I), BB, R, TTI)) {
+          Changed = true;
+          // We would like to start over since some instructions are deleted
+          // and the iterator may become invalid value.
+          it = BB->begin();
+          e = BB->end();
+          break;
         }
       }
       continue;
@@ -4690,8 +4927,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
     //       may cause a significant compile-time increase.
     for (unsigned CI = 0, CE = it->second.size(); CI < CE; CI+=16) {
       unsigned Len = std::min<unsigned>(CE - CI, 16);
-      Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len),
-                                 -SLPCostThreshold, R);
+      Changed |= vectorizeStores(makeArrayRef(&it->second[CI], Len), R);
     }
   }
   return Changed;
diff --git a/contrib/llvm/tools/bugpoint/BugDriver.cpp b/contrib/llvm/tools/bugpoint/BugDriver.cpp
index 030749fa9ea0..78f35293d2a5 100644
--- a/contrib/llvm/tools/bugpoint/BugDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/BugDriver.cpp
@@ -29,20 +29,20 @@
 using namespace llvm;
 
 namespace llvm {
-  Triple TargetTriple;
+Triple TargetTriple;
 }
 
 // Anonymous namespace to define command line options for debugging.
 //
 namespace {
-  // Output - The user can specify a file containing the expected output of the
-  // program.  If this filename is set, it is used as the reference diff source,
-  // otherwise the raw input run through an interpreter is used as the reference
-  // source.
-  //
-  cl::opt<std::string>
-  OutputFile("output", cl::desc("Specify a reference program output "
-                                "(for miscompilation detection)"));
+// Output - The user can specify a file containing the expected output of the
+// program.  If this filename is set, it is used as the reference diff source,
+// otherwise the raw input run through an interpreter is used as the reference
+// source.
+//
+cl::opt<std::string> OutputFile("output",
+                                cl::desc("Specify a reference program output "
+                                         "(for miscompilation detection)"));
 }
 
 /// setNewProgram - If we reduce or update the program somehow, call this method
@@ -53,27 +53,26 @@ void BugDriver::setNewProgram(Module *M) {
   Program = M;
 }
 
-
 /// getPassesString - Turn a list of passes into a string which indicates the
 /// command line options that must be passed to add the passes.
 ///
 std::string llvm::getPassesString(const std::vector<std::string> &Passes) {
   std::string Result;
   for (unsigned i = 0, e = Passes.size(); i != e; ++i) {
-    if (i) Result += " ";
+    if (i)
+      Result += " ";
     Result += "-";
     Result += Passes[i];
   }
   return Result;
 }
 
-BugDriver::BugDriver(const char *toolname, bool find_bugs,
-                     unsigned timeout, unsigned memlimit, bool use_valgrind,
-                     LLVMContext& ctxt)
-  : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
-    Program(nullptr), Interpreter(nullptr), SafeInterpreter(nullptr),
-    cc(nullptr), run_find_bugs(find_bugs), Timeout(timeout),
-    MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
+BugDriver::BugDriver(const char *toolname, bool find_bugs, unsigned timeout,
+                     unsigned memlimit, bool use_valgrind, LLVMContext &ctxt)
+    : Context(ctxt), ToolName(toolname), ReferenceOutputFile(OutputFile),
+      Program(nullptr), Interpreter(nullptr), SafeInterpreter(nullptr),
+      cc(nullptr), run_find_bugs(find_bugs), Timeout(timeout),
+      MemoryLimit(memlimit), UseValgrind(use_valgrind) {}
 
 BugDriver::~BugDriver() {
   delete Program;
@@ -123,13 +122,15 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
 
   // Load the first input file.
   Program = parseInputFile(Filenames[0], Context).release();
-  if (!Program) return true;
+  if (!Program)
+    return true;
 
   outs() << "Read input file      : '" << Filenames[0] << "'\n";
 
   for (unsigned i = 1, e = Filenames.size(); i != e; ++i) {
     std::unique_ptr<Module> M = parseInputFile(Filenames[i], Context);
-    if (!M.get()) return true;
+    if (!M.get())
+      return true;
 
     outs() << "Linking in input file: '" << Filenames[i] << "'\n";
     if (Linker::linkModules(*Program, std::move(M)))
@@ -142,16 +143,14 @@ bool BugDriver::addSources(const std::vector<std::string> &Filenames) {
   return false;
 }
 
-
-
 /// run - The top level method that is invoked after all of the instance
 /// variables are set up from command line arguments.
 ///
-bool BugDriver::run(std::string &ErrMsg) {
+Error BugDriver::run() {
   if (run_find_bugs) {
     // Rearrange the passes and apply them to the program. Repeat this process
     // until the user kills the program or we find a bug.
-    return runManyPasses(PassesToRun, ErrMsg);
+    return runManyPasses(PassesToRun);
   }
 
   // If we're not running as a child, the first thing that we must do is
@@ -168,15 +167,14 @@ bool BugDriver::run(std::string &ErrMsg) {
   }
 
   // Set up the execution environment, selecting a method to run LLVM bitcode.
-  if (initializeExecutionEnvironment()) return true;
+  if (Error E = initializeExecutionEnvironment())
+    return E;
 
   // Test to see if we have a code generator crash.
   outs() << "Running the code generator to test for a crash: ";
-  std::string Error;
-  compileProgram(Program, &Error);
-  if (!Error.empty()) {
-    outs() << Error;
-    return debugCodeGeneratorCrash(ErrMsg);
+  if (Error E = compileProgram(Program)) {
+    outs() << toString(std::move(E));
+    return debugCodeGeneratorCrash();
   }
   outs() << '\n';
 
@@ -187,8 +185,9 @@ bool BugDriver::run(std::string &ErrMsg) {
   bool CreatedOutput = false;
   if (ReferenceOutputFile.empty()) {
     outs() << "Generating reference output from raw program: ";
-    if (!createReferenceFile(Program)) {
-      return debugCodeGeneratorCrash(ErrMsg);
+    if (Error E = createReferenceFile(Program)) {
+      errs() << toString(std::move(E));
+      return debugCodeGeneratorCrash();
     }
     CreatedOutput = true;
   }
@@ -202,34 +201,33 @@ bool BugDriver::run(std::string &ErrMsg) {
   // matches, then we assume there is a miscompilation bug and try to
   // diagnose it.
   outs() << "*** Checking the code generator...\n";
-  bool Diff = diffProgram(Program, "", "", false, &Error);
-  if (!Error.empty()) {
-    errs() << Error;
-    return debugCodeGeneratorCrash(ErrMsg);
+  Expected<bool> Diff = diffProgram(Program, "", "", false);
+  if (Error E = Diff.takeError()) {
+    errs() << toString(std::move(E));
+    return debugCodeGeneratorCrash();
   }
-  if (!Diff) {
+  if (!*Diff) {
     outs() << "\n*** Output matches: Debugging miscompilation!\n";
-    debugMiscompilation(&Error);
-    if (!Error.empty()) {
-      errs() << Error;
-      return debugCodeGeneratorCrash(ErrMsg);
+    if (Error E = debugMiscompilation()) {
+      errs() << toString(std::move(E));
+      return debugCodeGeneratorCrash();
     }
-    return false;
+    return Error::success();
   }
 
   outs() << "\n*** Input program does not match reference diff!\n";
   outs() << "Debugging code generator problem!\n";
-  bool Failure = debugCodeGenerator(&Error);
-  if (!Error.empty()) {
-    errs() << Error;
-    return debugCodeGeneratorCrash(ErrMsg);
+  if (Error E = debugCodeGenerator()) {
+    errs() << toString(std::move(E));
+    return debugCodeGeneratorCrash();
   }
-  return Failure;
+  return Error::success();
 }
 
-void llvm::PrintFunctionList(const std::vector<Function*> &Funcs) {
+void llvm::PrintFunctionList(const std::vector<Function *> &Funcs) {
   unsigned NumPrint = Funcs.size();
-  if (NumPrint > 10) NumPrint = 10;
+  if (NumPrint > 10)
+    NumPrint = 10;
   for (unsigned i = 0; i != NumPrint; ++i)
     outs() << " " << Funcs[i]->getName();
   if (NumPrint < Funcs.size())
@@ -237,9 +235,10 @@ void llvm::PrintFunctionList(const std::vector<Function*> &Funcs) {
   outs().flush();
 }
 
-void llvm::PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs) {
+void llvm::PrintGlobalVariableList(const std::vector<GlobalVariable *> &GVs) {
   unsigned NumPrint = GVs.size();
-  if (NumPrint > 10) NumPrint = 10;
+  if (NumPrint > 10)
+    NumPrint = 10;
   for (unsigned i = 0; i != NumPrint; ++i)
     outs() << " " << GVs[i]->getName();
   if (NumPrint < GVs.size())
diff --git a/contrib/llvm/tools/bugpoint/BugDriver.h b/contrib/llvm/tools/bugpoint/BugDriver.h
index 52ec2c005649..af80e8b2b77d 100644
--- a/contrib/llvm/tools/bugpoint/BugDriver.h
+++ b/contrib/llvm/tools/bugpoint/BugDriver.h
@@ -17,6 +17,7 @@
 #define LLVM_TOOLS_BUGPOINT_BUGDRIVER_H
 
 #include "llvm/IR/ValueMap.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <memory>
 #include <string>
@@ -45,13 +46,13 @@ extern bool DisableSimplifyCFG;
 extern bool BugpointIsInterrupted;
 
 class BugDriver {
-  LLVMContext& Context;
+  LLVMContext &Context;
   const char *ToolName;            // argv[0] of bugpoint
   std::string ReferenceOutputFile; // Name of `good' output file
-  Module *Program;             // The raw program, linked together
+  Module *Program;                 // The raw program, linked together
   std::vector<std::string> PassesToRun;
-  AbstractInterpreter *Interpreter;   // How to run the program
-  AbstractInterpreter *SafeInterpreter;  // To generate reference output, etc.
+  AbstractInterpreter *Interpreter;     // How to run the program
+  AbstractInterpreter *SafeInterpreter; // To generate reference output, etc.
   CC *cc;
   bool run_find_bugs;
   unsigned Timeout;
@@ -63,14 +64,13 @@ class BugDriver {
   friend class ReduceMisCodegenFunctions;
 
 public:
-  BugDriver(const char *toolname, bool find_bugs,
-            unsigned timeout, unsigned memlimit, bool use_valgrind,
-            LLVMContext& ctxt);
+  BugDriver(const char *toolname, bool find_bugs, unsigned timeout,
+            unsigned memlimit, bool use_valgrind, LLVMContext &ctxt);
   ~BugDriver();
 
   const char *getToolName() const { return ToolName; }
 
-  LLVMContext& getContext() const { return Context; }
+  LLVMContext &getContext() const { return Context; }
 
   // Set up methods... these methods are used to copy information about the
   // command line arguments into instance variables of BugDriver.
@@ -80,31 +80,29 @@ public:
   void setPassesToRun(const std::vector<std::string> &PTR) {
     PassesToRun = PTR;
   }
-  const std::vector<std::string> &getPassesToRun() const {
-    return PassesToRun;
-  }
+  const std::vector<std::string> &getPassesToRun() const { return PassesToRun; }
 
   /// run - The top level method that is invoked after all of the instance
   /// variables are set up from command line arguments. The \p as_child argument
   /// indicates whether the driver is to run in parent mode or child mode.
   ///
-  bool run(std::string &ErrMsg);
+  Error run();
 
   /// debugOptimizerCrash - This method is called when some optimizer pass
   /// crashes on input.  It attempts to prune down the testcase to something
   /// reasonable, and figure out exactly which pass is crashing.
   ///
-  bool debugOptimizerCrash(const std::string &ID = "passes");
+  Error debugOptimizerCrash(const std::string &ID = "passes");
 
   /// debugCodeGeneratorCrash - This method is called when the code generator
   /// crashes on an input.  It attempts to reduce the input as much as possible
   /// while still causing the code generator to crash.
-  bool debugCodeGeneratorCrash(std::string &Error);
+  Error debugCodeGeneratorCrash();
 
   /// debugMiscompilation - This method is used when the passes selected are not
   /// crashing, but the generated output is semantically different from the
   /// input.
-  void debugMiscompilation(std::string *Error);
+  Error debugMiscompilation();
 
   /// debugPassMiscompilation - This method is called when the specified pass
   /// miscompiles Program as input.  It tries to reduce the testcase to
@@ -118,13 +116,12 @@ public:
   /// compileSharedObject - This method creates a SharedObject from a given
   /// BitcodeFile for debugging a code generator.
   ///
-  std::string compileSharedObject(const std::string &BitcodeFile,
-                                  std::string &Error);
+  Expected<std::string> compileSharedObject(const std::string &BitcodeFile);
 
   /// debugCodeGenerator - This method narrows down a module to a function or
   /// set of functions, using the CBE as a ``safe'' code generator for other
   /// functions that are not under consideration.
-  bool debugCodeGenerator(std::string *Error);
+  Error debugCodeGenerator();
 
   /// isExecutingJIT - Returns true if bugpoint is currently testing the JIT
   ///
@@ -142,62 +139,56 @@ public:
 
   AbstractInterpreter *switchToSafeInterpreter() {
     AbstractInterpreter *Old = Interpreter;
-    Interpreter = (AbstractInterpreter*)SafeInterpreter;
+    Interpreter = (AbstractInterpreter *)SafeInterpreter;
     return Old;
   }
 
-  void switchToInterpreter(AbstractInterpreter *AI) {
-    Interpreter = AI;
-  }
+  void switchToInterpreter(AbstractInterpreter *AI) { Interpreter = AI; }
 
   /// setNewProgram - If we reduce or update the program somehow, call this
   /// method to update bugdriver with it.  This deletes the old module and sets
   /// the specified one as the current program.
   void setNewProgram(Module *M);
 
-  /// compileProgram - Try to compile the specified module, returning false and
-  /// setting Error if an error occurs.  This is used for code generation
+  /// Try to compile the specified module. This is used for code generation
   /// crash testing.
-  ///
-  void compileProgram(Module *M, std::string *Error) const;
+  Error compileProgram(Module *M) const;
 
   /// executeProgram - This method runs "Program", capturing the output of the
   /// program to a file.  A recommended filename may be optionally specified.
   ///
-  std::string executeProgram(const Module *Program,
-                             std::string OutputFilename,
-                             std::string Bitcode,
-                             const std::string &SharedObjects,
-                             AbstractInterpreter *AI,
-                             std::string *Error) const;
+  Expected<std::string> executeProgram(const Module *Program,
+                                       std::string OutputFilename,
+                                       std::string Bitcode,
+                                       const std::string &SharedObjects,
+                                       AbstractInterpreter *AI) const;
 
   /// executeProgramSafely - Used to create reference output with the "safe"
   /// backend, if reference output is not provided.  If there is a problem with
   /// the code generator (e.g., llc crashes), this will return false and set
   /// Error.
   ///
-  std::string executeProgramSafely(const Module *Program,
-                                   const std::string &OutputFile,
-                                   std::string *Error) const;
+  Expected<std::string>
+  executeProgramSafely(const Module *Program,
+                       const std::string &OutputFile) const;
 
   /// createReferenceFile - calls compileProgram and then records the output
-  /// into ReferenceOutputFile. Returns true if reference file created, false 
+  /// into ReferenceOutputFile. Returns true if reference file created, false
   /// otherwise. Note: initializeExecutionEnvironment should be called BEFORE
   /// this function.
   ///
-  bool createReferenceFile(Module *M, const std::string &Filename
-                                            = "bugpoint.reference.out-%%%%%%%");
+  Error createReferenceFile(Module *M, const std::string &Filename =
+                                           "bugpoint.reference.out-%%%%%%%");
 
   /// diffProgram - This method executes the specified module and diffs the
   /// output against the file specified by ReferenceOutputFile.  If the output
   /// is different, 1 is returned.  If there is a problem with the code
   /// generator (e.g., llc crashes), this will return -1 and set Error.
   ///
-  bool diffProgram(const Module *Program,
-                   const std::string &BitcodeFile = "",
-                   const std::string &SharedObj = "",
-                   bool RemoveBitcode = false,
-                   std::string *Error = nullptr) const;
+  Expected<bool> diffProgram(const Module *Program,
+                             const std::string &BitcodeFile = "",
+                             const std::string &SharedObj = "",
+                             bool RemoveBitcode = false) const;
 
   /// EmitProgressBitcode - This function is used to output M to a file named
   /// "bugpoint-ID.bc".
@@ -251,31 +242,27 @@ public:
   /// or failed, unless Quiet is set.  ExtraArgs specifies additional arguments
   /// to pass to the child bugpoint instance.
   ///
-  bool runPasses(Module *Program,
-                 const std::vector<std::string> &PassesToRun,
+  bool runPasses(Module *Program, const std::vector<std::string> &PassesToRun,
                  std::string &OutputFilename, bool DeleteOutput = false,
                  bool Quiet = false, unsigned NumExtraArgs = 0,
-                 const char * const *ExtraArgs = nullptr) const;
+                 const char *const *ExtraArgs = nullptr) const;
 
   /// runPasses - Just like the method above, but this just returns true or
   /// false indicating whether or not the optimizer crashed on the specified
   /// input (true = crashed).  Does not produce any output.
   ///
-  bool runPasses(Module *M,
-                 const std::vector<std::string> &PassesToRun) const {
+  bool runPasses(Module *M, const std::vector<std::string> &PassesToRun) const {
     std::string Filename;
     return runPasses(M, PassesToRun, Filename, true);
   }
-                 
-  /// runManyPasses - Take the specified pass list and create different 
-  /// combinations of passes to compile the program with. Compile the program with
-  /// each set and mark test to see if it compiled correctly. If the passes 
-  /// compiled correctly output nothing and rearrange the passes into a new order.
-  /// If the passes did not compile correctly, output the command required to 
-  /// recreate the failure. This returns true if a compiler error is found.
-  ///
-  bool runManyPasses(const std::vector<std::string> &AllPasses,
-                     std::string &ErrMsg);
+
+  /// Take the specified pass list and create different combinations of passes
+  /// to compile the program with. Compile the program with each set and mark
+  /// test to see if it compiled correctly. If the passes compiled correctly
+  /// output nothing and rearrange the passes into a new order. If the passes
+  /// did not compile correctly, output the command required to recreate the
+  /// failure.
+  Error runManyPasses(const std::vector<std::string> &AllPasses);
 
   /// writeProgramToFile - This writes the current "Program" to the named
   /// bitcode file.  If an error occurs, true is returned.
@@ -288,7 +275,7 @@ private:
   /// initializeExecutionEnvironment - This method is used to set up the
   /// environment for executing LLVM programs.
   ///
-  bool initializeExecutionEnvironment();
+  Error initializeExecutionEnvironment();
 };
 
 ///  Given a bitcode or assembly input filename, parse and return it, or return
@@ -304,11 +291,11 @@ std::string getPassesString(const std::vector<std::string> &Passes);
 
 /// PrintFunctionList - prints out list of problematic functions
 ///
-void PrintFunctionList(const std::vector<Function*> &Funcs);
+void PrintFunctionList(const std::vector<Function *> &Funcs);
 
 /// PrintGlobalVariableList - prints out list of problematic global variables
 ///
-void PrintGlobalVariableList(const std::vector<GlobalVariable*> &GVs);
+void PrintGlobalVariableList(const std::vector<GlobalVariable *> &GVs);
 
 // DeleteGlobalInitializer - "Remove" the global variable by deleting its
 // initializer, making it external.
diff --git a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
index b25e6ecec19b..0cae0669477f 100644
--- a/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
+++ b/contrib/llvm/tools/bugpoint/CrashDebugger.cpp
@@ -16,8 +16,10 @@
 #include "ToolRunner.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
+#include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/LegacyPassManager.h"
@@ -28,57 +30,60 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include <set>
 using namespace llvm;
 
 namespace {
-  cl::opt<bool>
-  KeepMain("keep-main",
-           cl::desc("Force function reduction to keep main"),
-           cl::init(false));
-  cl::opt<bool>
-  NoGlobalRM ("disable-global-remove",
-         cl::desc("Do not remove global variables"),
-         cl::init(false));
-
-  cl::opt<bool>
-  ReplaceFuncsWithNull("replace-funcs-with-null",
-         cl::desc("When stubbing functions, replace all uses will null"),
-         cl::init(false));
-  cl::opt<bool>
-  DontReducePassList("disable-pass-list-reduction",
-                     cl::desc("Skip pass list reduction steps"),
-                     cl::init(false));
-
-  cl::opt<bool> NoNamedMDRM("disable-namedmd-remove",
-                            cl::desc("Do not remove global named metadata"),
-                            cl::init(false));
-  cl::opt<bool> VerboseErrors("verbose-errors",
+cl::opt<bool> KeepMain("keep-main",
+                       cl::desc("Force function reduction to keep main"),
+                       cl::init(false));
+cl::opt<bool> NoGlobalRM("disable-global-remove",
+                         cl::desc("Do not remove global variables"),
+                         cl::init(false));
+
+cl::opt<bool> ReplaceFuncsWithNull(
+    "replace-funcs-with-null",
+    cl::desc("When stubbing functions, replace all uses will null"),
+    cl::init(false));
+cl::opt<bool> DontReducePassList("disable-pass-list-reduction",
+                                 cl::desc("Skip pass list reduction steps"),
+                                 cl::init(false));
+
+cl::opt<bool> NoNamedMDRM("disable-namedmd-remove",
+                          cl::desc("Do not remove global named metadata"),
+                          cl::init(false));
+cl::opt<bool> NoStripDebugInfo("disable-strip-debuginfo",
+                               cl::desc("Do not strip debug info metadata"),
+                               cl::init(false));
+cl::opt<bool> NoStripDebugTypeInfo("disable-strip-debug-types",
+                               cl::desc("Do not strip debug type info metadata"),
+                               cl::init(false));
+cl::opt<bool> VerboseErrors("verbose-errors",
                             cl::desc("Print the output of crashing program"),
                             cl::init(false));
 }
 
 namespace llvm {
-  class ReducePassList : public ListReducer<std::string> {
-    BugDriver &BD;
-  public:
-    ReducePassList(BugDriver &bd) : BD(bd) {}
-
-    // doTest - Return true iff running the "removed" passes succeeds, and
-    // running the "Kept" passes fail when run on the output of the "removed"
-    // passes.  If we return true, we update the current module of bugpoint.
-    //
-    TestResult doTest(std::vector<std::string> &Removed,
-                      std::vector<std::string> &Kept,
-                      std::string &Error) override;
-  };
+class ReducePassList : public ListReducer<std::string> {
+  BugDriver &BD;
+
+public:
+  ReducePassList(BugDriver &bd) : BD(bd) {}
+
+  // Return true iff running the "removed" passes succeeds, and running the
+  // "Kept" passes fail when run on the output of the "removed" passes.  If we
+  // return true, we update the current module of bugpoint.
+  Expected<TestResult> doTest(std::vector<std::string> &Removed,
+                              std::vector<std::string> &Kept) override;
+};
 }
 
-ReducePassList::TestResult
+Expected<ReducePassList::TestResult>
 ReducePassList::doTest(std::vector<std::string> &Prefix,
-                       std::vector<std::string> &Suffix,
-                       std::string &Error) {
+                       std::vector<std::string> &Suffix) {
   std::string PrefixOutput;
   Module *OrigProgram = nullptr;
   if (!Prefix.empty()) {
@@ -98,11 +103,11 @@ ReducePassList::doTest(std::vector<std::string> &Prefix,
     sys::fs::remove(PrefixOutput);
   }
 
-  outs() << "Checking to see if these passes crash: "
-         << getPassesString(Suffix) << ": ";
+  outs() << "Checking to see if these passes crash: " << getPassesString(Suffix)
+         << ": ";
 
   if (BD.runPasses(BD.getProgram(), Suffix)) {
-    delete OrigProgram;            // The suffix crashes alone...
+    delete OrigProgram; // The suffix crashes alone...
     return KeepSuffix;
   }
 
@@ -115,44 +120,43 @@ ReducePassList::doTest(std::vector<std::string> &Prefix,
 }
 
 namespace {
-  /// ReduceCrashingGlobalVariables - This works by removing the global
-  /// variable's initializer and seeing if the program still crashes. If it
-  /// does, then we keep that program and try again.
-  ///
-  class ReduceCrashingGlobalVariables : public ListReducer<GlobalVariable*> {
-    BugDriver &BD;
-    bool (*TestFn)(const BugDriver &, Module *);
-  public:
-    ReduceCrashingGlobalVariables(BugDriver &bd,
-                                  bool (*testFn)(const BugDriver &, Module *))
+/// ReduceCrashingGlobalVariables - This works by removing the global
+/// variable's initializer and seeing if the program still crashes. If it
+/// does, then we keep that program and try again.
+///
+class ReduceCrashingGlobalVariables : public ListReducer<GlobalVariable *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+  ReduceCrashingGlobalVariables(BugDriver &bd,
+                                bool (*testFn)(const BugDriver &, Module *))
       : BD(bd), TestFn(testFn) {}
 
-    TestResult doTest(std::vector<GlobalVariable*> &Prefix,
-                      std::vector<GlobalVariable*> &Kept,
-                      std::string &Error) override {
-      if (!Kept.empty() && TestGlobalVariables(Kept))
-        return KeepSuffix;
-      if (!Prefix.empty() && TestGlobalVariables(Prefix))
-        return KeepPrefix;
-      return NoFailure;
-    }
+  Expected<TestResult> doTest(std::vector<GlobalVariable *> &Prefix,
+                              std::vector<GlobalVariable *> &Kept) override {
+    if (!Kept.empty() && TestGlobalVariables(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestGlobalVariables(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
 
-    bool TestGlobalVariables(std::vector<GlobalVariable*> &GVs);
-  };
+  bool TestGlobalVariables(std::vector<GlobalVariable *> &GVs);
+};
 }
 
-bool
-ReduceCrashingGlobalVariables::TestGlobalVariables(
-                              std::vector<GlobalVariable*> &GVs) {
+bool ReduceCrashingGlobalVariables::TestGlobalVariables(
+    std::vector<GlobalVariable *> &GVs) {
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
-  std::set<GlobalVariable*> GVSet;
+  std::set<GlobalVariable *> GVSet;
 
   for (unsigned i = 0, e = GVs.size(); i != e; ++i) {
-    GlobalVariable* CMGV = cast<GlobalVariable>(VMap[GVs[i]]);
+    GlobalVariable *CMGV = cast<GlobalVariable>(VMap[GVs[i]]);
     assert(CMGV && "Global Variable not in module?!");
     GVSet.insert(CMGV);
   }
@@ -172,7 +176,7 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
 
   // Try running the hacked up program...
   if (TestFn(BD, M)) {
-    BD.setNewProgram(M);        // It crashed, keep the trimmed version...
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
 
     // Make sure to use global variable pointers that point into the now-current
     // module.
@@ -185,45 +189,46 @@ ReduceCrashingGlobalVariables::TestGlobalVariables(
 }
 
 namespace {
-  /// ReduceCrashingFunctions reducer - This works by removing functions and
-  /// seeing if the program still crashes. If it does, then keep the newer,
-  /// smaller program.
-  ///
-  class ReduceCrashingFunctions : public ListReducer<Function*> {
-    BugDriver &BD;
-    bool (*TestFn)(const BugDriver &, Module *);
-  public:
-    ReduceCrashingFunctions(BugDriver &bd,
-                            bool (*testFn)(const BugDriver &, Module *))
+/// ReduceCrashingFunctions reducer - This works by removing functions and
+/// seeing if the program still crashes. If it does, then keep the newer,
+/// smaller program.
+///
+class ReduceCrashingFunctions : public ListReducer<Function *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+  ReduceCrashingFunctions(BugDriver &bd,
+                          bool (*testFn)(const BugDriver &, Module *))
       : BD(bd), TestFn(testFn) {}
 
-    TestResult doTest(std::vector<Function*> &Prefix,
-                      std::vector<Function*> &Kept,
-                      std::string &Error) override {
-      if (!Kept.empty() && TestFuncs(Kept))
-        return KeepSuffix;
-      if (!Prefix.empty() && TestFuncs(Prefix))
-        return KeepPrefix;
-      return NoFailure;
-    }
+  Expected<TestResult> doTest(std::vector<Function *> &Prefix,
+                              std::vector<Function *> &Kept) override {
+    if (!Kept.empty() && TestFuncs(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestFuncs(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
 
-    bool TestFuncs(std::vector<Function*> &Prefix);
-  };
+  bool TestFuncs(std::vector<Function *> &Prefix);
+};
 }
 
-static void RemoveFunctionReferences(Module *M, const char* Name) {
+static void RemoveFunctionReferences(Module *M, const char *Name) {
   auto *UsedVar = M->getGlobalVariable(Name, true);
-  if (!UsedVar || !UsedVar->hasInitializer()) return;
+  if (!UsedVar || !UsedVar->hasInitializer())
+    return;
   if (isa<ConstantAggregateZero>(UsedVar->getInitializer())) {
     assert(UsedVar->use_empty());
     UsedVar->eraseFromParent();
     return;
   }
   auto *OldUsedVal = cast<ConstantArray>(UsedVar->getInitializer());
-  std::vector<Constant*> Used;
-  for(Value *V : OldUsedVal->operand_values()) {
+  std::vector<Constant *> Used;
+  for (Value *V : OldUsedVal->operand_values()) {
     Constant *Op = cast<Constant>(V->stripPointerCasts());
-    if(!Op->isNullValue()) {
+    if (!Op->isNullValue()) {
       Used.push_back(cast<Constant>(V));
     }
   }
@@ -234,11 +239,9 @@ static void RemoveFunctionReferences(Module *M, const char* Name) {
   UsedVar->setInitializer(NewUsedVal);
 }
 
-bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
+bool ReduceCrashingFunctions::TestFuncs(std::vector<Function *> &Funcs) {
   // If main isn't present, claim there is no problem.
-  if (KeepMain && std::find(Funcs.begin(), Funcs.end(),
-                            BD.getProgram()->getFunction("main")) ==
-                      Funcs.end())
+  if (KeepMain && !is_contained(Funcs, BD.getProgram()->getFunction("main")))
     return false;
 
   // Clone the program to try hacking it apart...
@@ -246,7 +249,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
   Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
-  std::set<Function*> Functions;
+  std::set<Function *> Functions;
   for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
     Function *CMF = cast<Function>(VMap[Funcs[i]]);
     assert(CMF && "Function not in module?!");
@@ -265,7 +268,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
       if (!I.isDeclaration() && !Functions.count(&I))
         DeleteFunctionBody(&I);
   } else {
-    std::vector<GlobalValue*> ToRemove;
+    std::vector<GlobalValue *> ToRemove;
     // First, remove aliases to functions we're about to purge.
     for (GlobalAlias &Alias : M->aliases()) {
       GlobalObject *Root = Alias.getBaseObject();
@@ -307,7 +310,7 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
   }
   // Try running the hacked up program...
   if (TestFn(BD, M)) {
-    BD.setNewProgram(M);        // It crashed, keep the trimmed version...
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
 
     // Make sure to use function pointers that point into the now-current
     // module.
@@ -318,48 +321,87 @@ bool ReduceCrashingFunctions::TestFuncs(std::vector<Function*> &Funcs) {
   return false;
 }
 
-
 namespace {
-  /// ReduceCrashingBlocks reducer - This works by setting the terminators of
-  /// all terminators except the specified basic blocks to a 'ret' instruction,
-  /// then running the simplify-cfg pass.  This has the effect of chopping up
-  /// the CFG really fast which can reduce large functions quickly.
-  ///
-  class ReduceCrashingBlocks : public ListReducer<const BasicBlock*> {
-    BugDriver &BD;
-    bool (*TestFn)(const BugDriver &, Module *);
-  public:
-    ReduceCrashingBlocks(BugDriver &bd,
-                         bool (*testFn)(const BugDriver &, Module *))
-      : BD(bd), TestFn(testFn) {}
+/// Simplify the CFG without completely destroying it.
+/// This is not well defined, but basically comes down to "try to eliminate
+/// unreachable blocks and constant fold terminators without deciding that
+/// certain undefined behavior cuts off the program at the legs".
+void simpleSimplifyCfg(Function &F, SmallVectorImpl<BasicBlock *> &BBs) {
+  if (F.empty())
+    return;
 
-    TestResult doTest(std::vector<const BasicBlock*> &Prefix,
-                      std::vector<const BasicBlock*> &Kept,
-                      std::string &Error) override {
-      if (!Kept.empty() && TestBlocks(Kept))
-        return KeepSuffix;
-      if (!Prefix.empty() && TestBlocks(Prefix))
-        return KeepPrefix;
-      return NoFailure;
-    }
+  for (auto *BB : BBs) {
+    ConstantFoldTerminator(BB);
+    MergeBlockIntoPredecessor(BB);
+  }
 
-    bool TestBlocks(std::vector<const BasicBlock*> &Prefix);
-  };
+  // Remove unreachable blocks
+  // removeUnreachableBlocks can't be used here, it will turn various
+  // undefined behavior into unreachables, but bugpoint was the thing that
+  // generated the undefined behavior, and we don't want it to kill the entire
+  // program.
+  SmallPtrSet<BasicBlock *, 16> Visited;
+  for (auto *BB : depth_first(&F.getEntryBlock()))
+    Visited.insert(BB);
+
+  SmallVector<BasicBlock *, 16> Unreachable;
+  for (auto &BB : F)
+    if (!Visited.count(&BB))
+      Unreachable.push_back(&BB);
+
+  // The dead BB's may be in a dead cycle or otherwise have references to each
+  // other.  Because of this, we have to drop all references first, then delete
+  // them all at once.
+  for (auto *BB : Unreachable) {
+    for (BasicBlock *Successor : successors(&*BB))
+      if (Visited.count(Successor))
+        Successor->removePredecessor(&*BB);
+    BB->dropAllReferences();
+  }
+  for (auto *BB : Unreachable)
+    BB->eraseFromParent();
+}
+/// ReduceCrashingBlocks reducer - This works by setting the terminators of
+/// all terminators except the specified basic blocks to a 'ret' instruction,
+/// then running the simplify-cfg pass.  This has the effect of chopping up
+/// the CFG really fast which can reduce large functions quickly.
+///
+class ReduceCrashingBlocks : public ListReducer<const BasicBlock *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+  ReduceCrashingBlocks(BugDriver &BD,
+                       bool (*testFn)(const BugDriver &, Module *))
+      : BD(BD), TestFn(testFn) {}
+
+  Expected<TestResult> doTest(std::vector<const BasicBlock *> &Prefix,
+                              std::vector<const BasicBlock *> &Kept) override {
+    if (!Kept.empty() && TestBlocks(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestBlocks(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestBlocks(std::vector<const BasicBlock *> &Prefix);
+};
 }
 
-bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
+bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock *> &BBs) {
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
-  SmallPtrSet<BasicBlock*, 8> Blocks;
+  SmallPtrSet<BasicBlock *, 8> Blocks;
   for (unsigned i = 0, e = BBs.size(); i != e; ++i)
     Blocks.insert(cast<BasicBlock>(VMap[BBs[i]]));
 
   outs() << "Checking for crash with only these blocks:";
   unsigned NumPrint = Blocks.size();
-  if (NumPrint > 10) NumPrint = 10;
+  if (NumPrint > 10)
+    NumPrint = 10;
   for (unsigned i = 0, e = NumPrint; i != e; ++i)
     outs() << " " << BBs[i]->getName();
   if (NumPrint < Blocks.size())
@@ -392,26 +434,33 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
   // a "persistent mapping" by turning basic blocks into <function, name> pairs.
   // This won't work well if blocks are unnamed, but that is just the risk we
   // have to take.
-  std::vector<std::pair<std::string, std::string> > BlockInfo;
+  std::vector<std::pair<std::string, std::string>> BlockInfo;
 
   for (BasicBlock *BB : Blocks)
     BlockInfo.emplace_back(BB->getParent()->getName(), BB->getName());
 
-  // Now run the CFG simplify pass on the function...
+  SmallVector<BasicBlock *, 16> ToProcess;
+  for (auto &F : *M) {
+    for (auto &BB : F)
+      if (!Blocks.count(&BB))
+        ToProcess.push_back(&BB);
+    simpleSimplifyCfg(F, ToProcess);
+    ToProcess.clear();
+  }
+  // Verify we didn't break anything
   std::vector<std::string> Passes;
-  Passes.push_back("simplifycfg");
   Passes.push_back("verify");
   std::unique_ptr<Module> New = BD.runPassesOn(M, Passes);
   delete M;
   if (!New) {
-    errs() << "simplifycfg failed!\n";
+    errs() << "verify failed!\n";
     exit(1);
   }
   M = New.release();
 
   // Try running on the hacked up program...
   if (TestFn(BD, M)) {
-    BD.setNewProgram(M);      // It crashed, keep the trimmed version...
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
 
     // Make sure to use basic block pointers that point into the now-current
     // module, and that they don't include any deleted blocks.
@@ -419,51 +468,253 @@ bool ReduceCrashingBlocks::TestBlocks(std::vector<const BasicBlock*> &BBs) {
     const ValueSymbolTable &GST = M->getValueSymbolTable();
     for (unsigned i = 0, e = BlockInfo.size(); i != e; ++i) {
       Function *F = cast<Function>(GST.lookup(BlockInfo[i].first));
-      ValueSymbolTable &ST = F->getValueSymbolTable();
-      Value* V = ST.lookup(BlockInfo[i].second);
+      Value *V = F->getValueSymbolTable()->lookup(BlockInfo[i].second);
       if (V && V->getType() == Type::getLabelTy(V->getContext()))
         BBs.push_back(cast<BasicBlock>(V));
     }
     return true;
   }
-  delete M;  // It didn't crash, try something else.
+  delete M; // It didn't crash, try something else.
   return false;
 }
 
 namespace {
-  /// ReduceCrashingInstructions reducer - This works by removing the specified
-  /// non-terminator instructions and replacing them with undef.
-  ///
-  class ReduceCrashingInstructions : public ListReducer<const Instruction*> {
-    BugDriver &BD;
-    bool (*TestFn)(const BugDriver &, Module *);
-  public:
-    ReduceCrashingInstructions(BugDriver &bd,
-                               bool (*testFn)(const BugDriver &, Module *))
-      : BD(bd), TestFn(testFn) {}
+/// ReduceCrashingConditionals reducer - This works by changing
+/// conditional branches to unconditional ones, then simplifying the CFG
+/// This has the effect of chopping up the CFG really fast which can reduce
+/// large functions quickly.
+///
+class ReduceCrashingConditionals : public ListReducer<const BasicBlock *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+  bool Direction;
+
+public:
+  ReduceCrashingConditionals(BugDriver &bd,
+                             bool (*testFn)(const BugDriver &, Module *),
+                             bool Direction)
+      : BD(bd), TestFn(testFn), Direction(Direction) {}
+
+  Expected<TestResult> doTest(std::vector<const BasicBlock *> &Prefix,
+                              std::vector<const BasicBlock *> &Kept) override {
+    if (!Kept.empty() && TestBlocks(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestBlocks(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestBlocks(std::vector<const BasicBlock *> &Prefix);
+};
+}
+
+bool ReduceCrashingConditionals::TestBlocks(
+    std::vector<const BasicBlock *> &BBs) {
+  // Clone the program to try hacking it apart...
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
+
+  // Convert list to set for fast lookup...
+  SmallPtrSet<const BasicBlock *, 8> Blocks;
+  for (const auto *BB : BBs)
+    Blocks.insert(cast<BasicBlock>(VMap[BB]));
+
+  outs() << "Checking for crash with changing conditionals to always jump to "
+         << (Direction ? "true" : "false") << ":";
+  unsigned NumPrint = Blocks.size();
+  if (NumPrint > 10)
+    NumPrint = 10;
+  for (unsigned i = 0, e = NumPrint; i != e; ++i)
+    outs() << " " << BBs[i]->getName();
+  if (NumPrint < Blocks.size())
+    outs() << "... <" << Blocks.size() << " total>";
+  outs() << ": ";
+
+  // Loop over and delete any hack up any blocks that are not listed...
+  for (auto &F : *M)
+    for (auto &BB : F)
+      if (!Blocks.count(&BB)) {
+        auto *BR = dyn_cast<BranchInst>(BB.getTerminator());
+        if (!BR || !BR->isConditional())
+          continue;
+        if (Direction)
+          BR->setCondition(ConstantInt::getTrue(BR->getContext()));
+        else
+          BR->setCondition(ConstantInt::getFalse(BR->getContext()));
+      }
+
+  // The following may destroy some blocks, so we save them first
+  std::vector<std::pair<std::string, std::string>> BlockInfo;
+
+  for (const BasicBlock *BB : Blocks)
+    BlockInfo.emplace_back(BB->getParent()->getName(), BB->getName());
+
+  SmallVector<BasicBlock *, 16> ToProcess;
+  for (auto &F : *M) {
+    for (auto &BB : F)
+      if (!Blocks.count(&BB))
+        ToProcess.push_back(&BB);
+    simpleSimplifyCfg(F, ToProcess);
+    ToProcess.clear();
+  }
+  // Verify we didn't break anything
+  std::vector<std::string> Passes;
+  Passes.push_back("verify");
+  std::unique_ptr<Module> New = BD.runPassesOn(M, Passes);
+  delete M;
+  if (!New) {
+    errs() << "verify failed!\n";
+    exit(1);
+  }
+  M = New.release();
+
+  // Try running on the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
+
+    // Make sure to use basic block pointers that point into the now-current
+    // module, and that they don't include any deleted blocks.
+    BBs.clear();
+    const ValueSymbolTable &GST = M->getValueSymbolTable();
+    for (auto &BI : BlockInfo) {
+      auto *F = cast<Function>(GST.lookup(BI.first));
+      Value *V = F->getValueSymbolTable()->lookup(BI.second);
+      if (V && V->getType() == Type::getLabelTy(V->getContext()))
+        BBs.push_back(cast<BasicBlock>(V));
+    }
+    return true;
+  }
+  delete M; // It didn't crash, try something else.
+  return false;
+}
+
+namespace {
+/// SimplifyCFG reducer - This works by calling SimplifyCFG on each basic block
+/// in the program.
+
+class ReduceSimplifyCFG : public ListReducer<const BasicBlock *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+  TargetTransformInfo TTI;
+
+public:
+  ReduceSimplifyCFG(BugDriver &bd, bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn), TTI(bd.getProgram()->getDataLayout()) {}
+
+  Expected<TestResult> doTest(std::vector<const BasicBlock *> &Prefix,
+                              std::vector<const BasicBlock *> &Kept) override {
+    if (!Kept.empty() && TestBlocks(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestBlocks(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestBlocks(std::vector<const BasicBlock *> &Prefix);
+};
+}
+
+bool ReduceSimplifyCFG::TestBlocks(std::vector<const BasicBlock *> &BBs) {
+  // Clone the program to try hacking it apart...
+  ValueToValueMapTy VMap;
+  Module *M = CloneModule(BD.getProgram(), VMap).release();
+
+  // Convert list to set for fast lookup...
+  SmallPtrSet<const BasicBlock *, 8> Blocks;
+  for (const auto *BB : BBs)
+    Blocks.insert(cast<BasicBlock>(VMap[BB]));
+
+  outs() << "Checking for crash with CFG simplifying:";
+  unsigned NumPrint = Blocks.size();
+  if (NumPrint > 10)
+    NumPrint = 10;
+  for (unsigned i = 0, e = NumPrint; i != e; ++i)
+    outs() << " " << BBs[i]->getName();
+  if (NumPrint < Blocks.size())
+    outs() << "... <" << Blocks.size() << " total>";
+  outs() << ": ";
+
+  // The following may destroy some blocks, so we save them first
+  std::vector<std::pair<std::string, std::string>> BlockInfo;
+
+  for (const BasicBlock *BB : Blocks)
+    BlockInfo.emplace_back(BB->getParent()->getName(), BB->getName());
 
-    TestResult doTest(std::vector<const Instruction*> &Prefix,
-                      std::vector<const Instruction*> &Kept,
-                      std::string &Error) override {
-      if (!Kept.empty() && TestInsts(Kept))
-        return KeepSuffix;
-      if (!Prefix.empty() && TestInsts(Prefix))
-        return KeepPrefix;
-      return NoFailure;
+  // Loop over and delete any hack up any blocks that are not listed...
+  for (auto &F : *M)
+    // Loop over all of the basic blocks and remove them if they are unneeded.
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end();) {
+      if (!Blocks.count(&*BBIt)) {
+        ++BBIt;
+        continue;
+      }
+      SimplifyCFG(&*BBIt++, TTI, 1);
     }
+  // Verify we didn't break anything
+  std::vector<std::string> Passes;
+  Passes.push_back("verify");
+  std::unique_ptr<Module> New = BD.runPassesOn(M, Passes);
+  delete M;
+  if (!New) {
+    errs() << "verify failed!\n";
+    exit(1);
+  }
+  M = New.release();
 
-    bool TestInsts(std::vector<const Instruction*> &Prefix);
-  };
+  // Try running on the hacked up program...
+  if (TestFn(BD, M)) {
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
+
+    // Make sure to use basic block pointers that point into the now-current
+    // module, and that they don't include any deleted blocks.
+    BBs.clear();
+    const ValueSymbolTable &GST = M->getValueSymbolTable();
+    for (auto &BI : BlockInfo) {
+      auto *F = cast<Function>(GST.lookup(BI.first));
+      Value *V = F->getValueSymbolTable()->lookup(BI.second);
+      if (V && V->getType() == Type::getLabelTy(V->getContext()))
+        BBs.push_back(cast<BasicBlock>(V));
+    }
+    return true;
+  }
+  delete M; // It didn't crash, try something else.
+  return false;
+}
+
+namespace {
+/// ReduceCrashingInstructions reducer - This works by removing the specified
+/// non-terminator instructions and replacing them with undef.
+///
+class ReduceCrashingInstructions : public ListReducer<const Instruction *> {
+  BugDriver &BD;
+  bool (*TestFn)(const BugDriver &, Module *);
+
+public:
+  ReduceCrashingInstructions(BugDriver &bd,
+                             bool (*testFn)(const BugDriver &, Module *))
+      : BD(bd), TestFn(testFn) {}
+
+  Expected<TestResult> doTest(std::vector<const Instruction *> &Prefix,
+                              std::vector<const Instruction *> &Kept) override {
+    if (!Kept.empty() && TestInsts(Kept))
+      return KeepSuffix;
+    if (!Prefix.empty() && TestInsts(Prefix))
+      return KeepPrefix;
+    return NoFailure;
+  }
+
+  bool TestInsts(std::vector<const Instruction *> &Prefix);
+};
 }
 
-bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
-                                           &Insts) {
+bool ReduceCrashingInstructions::TestInsts(
+    std::vector<const Instruction *> &Insts) {
   // Clone the program to try hacking it apart...
   ValueToValueMapTy VMap;
   Module *M = CloneModule(BD.getProgram(), VMap).release();
 
   // Convert list to set for fast lookup...
-  SmallPtrSet<Instruction*, 32> Instructions;
+  SmallPtrSet<Instruction *, 32> Instructions;
   for (unsigned i = 0, e = Insts.size(); i != e; ++i) {
     assert(!isa<TerminatorInst>(Insts[i]));
     Instructions.insert(cast<Instruction>(VMap[Insts[i]]));
@@ -489,12 +740,12 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
 
   // Verify that this is still valid.
   legacy::PassManager Passes;
-  Passes.add(createVerifierPass());
+  Passes.add(createVerifierPass(/*FatalErrors=*/false));
   Passes.run(*M);
 
   // Try running on the hacked up program...
   if (TestFn(BD, M)) {
-    BD.setNewProgram(M);      // It crashed, keep the trimmed version...
+    BD.setNewProgram(M); // It crashed, keep the trimmed version...
 
     // Make sure to use instruction pointers that point into the now-current
     // module, and that they don't include any deleted blocks.
@@ -503,7 +754,7 @@ bool ReduceCrashingInstructions::TestInsts(std::vector<const Instruction*>
       Insts.push_back(Inst);
     return true;
   }
-  delete M;  // It didn't crash, try something else.
+  delete M; // It didn't crash, try something else.
   return false;
 }
 
@@ -519,9 +770,8 @@ public:
                         bool (*testFn)(const BugDriver &, Module *))
       : BD(bd), TestFn(testFn) {}
 
-  TestResult doTest(std::vector<std::string> &Prefix,
-                    std::vector<std::string> &Kept,
-                    std::string &Error) override {
+  Expected<TestResult> doTest(std::vector<std::string> &Prefix,
+                              std::vector<std::string> &Kept) override {
     if (!Kept.empty() && TestNamedMDs(Kept))
       return KeepSuffix;
     if (!Prefix.empty() && TestNamedMDs(Prefix))
@@ -566,7 +816,7 @@ bool ReduceCrashingNamedMD::TestNamedMDs(std::vector<std::string> &NamedMDs) {
 
   // Verify that this is still valid.
   legacy::PassManager Passes;
-  Passes.add(createVerifierPass());
+  Passes.add(createVerifierPass(/*FatalErrors=*/false));
   Passes.run(*M);
 
   // Try running on the hacked up program...
@@ -589,9 +839,8 @@ public:
                            bool (*testFn)(const BugDriver &, Module *))
       : BD(bd), TestFn(testFn) {}
 
-  TestResult doTest(std::vector<const MDNode *> &Prefix,
-                    std::vector<const MDNode *> &Kept,
-                    std::string &Error) override {
+  Expected<TestResult> doTest(std::vector<const MDNode *> &Prefix,
+                              std::vector<const MDNode *> &Kept) override {
     if (!Kept.empty() && TestNamedMDOps(Kept))
       return KeepSuffix;
     if (!Prefix.empty() && TestNamedMDOps(Prefix))
@@ -634,7 +883,7 @@ bool ReduceCrashingNamedMDOps::TestNamedMDOps(
 
   // Verify that this is still valid.
   legacy::PassManager Passes;
-  Passes.add(createVerifierPass());
+  Passes.add(createVerifierPass(/*FatalErrors=*/false));
   Passes.run(*M);
 
   // Try running on the hacked up program...
@@ -652,9 +901,9 @@ bool ReduceCrashingNamedMDOps::TestNamedMDOps(
   return false;
 }
 
-static void ReduceGlobalInitializers(BugDriver &BD,
-                                     bool (*TestFn)(const BugDriver &, Module *),
-                                     std::string &Error) {
+static Error ReduceGlobalInitializers(BugDriver &BD,
+                                      bool (*TestFn)(const BugDriver &,
+                                                     Module *)) {
   if (BD.getProgram()->global_begin() != BD.getProgram()->global_end()) {
     // Now try to reduce the number of global variable initializers in the
     // module to something small.
@@ -671,32 +920,35 @@ static void ReduceGlobalInitializers(BugDriver &BD,
       }
 
     if (!DeletedInit) {
-      delete M;  // No change made...
+      delete M; // No change made...
     } else {
       // See if the program still causes a crash...
       outs() << "\nChecking to see if we can delete global inits: ";
 
-      if (TestFn(BD, M)) {      // Still crashes?
+      if (TestFn(BD, M)) { // Still crashes?
         BD.setNewProgram(M);
         outs() << "\n*** Able to remove all global initializers!\n";
-      } else {                  // No longer crashes?
+      } else { // No longer crashes?
         outs() << "  - Removing all global inits hides problem!\n";
         delete M;
 
-        std::vector<GlobalVariable*> GVs;
+        std::vector<GlobalVariable *> GVs;
 
         for (Module::global_iterator I = BD.getProgram()->global_begin(),
-               E = BD.getProgram()->global_end(); I != E; ++I)
+                                     E = BD.getProgram()->global_end();
+             I != E; ++I)
           if (I->hasInitializer())
             GVs.push_back(&*I);
 
         if (GVs.size() > 1 && !BugpointIsInterrupted) {
           outs() << "\n*** Attempting to reduce the number of global "
-                    << "variables in the testcase\n";
+                 << "variables in the testcase\n";
 
           unsigned OldSize = GVs.size();
-          ReduceCrashingGlobalVariables(BD, TestFn).reduceList(GVs, Error);
-          assert(!Error.empty());
+          Expected<bool> Result =
+              ReduceCrashingGlobalVariables(BD, TestFn).reduceList(GVs);
+          if (Error E = Result.takeError())
+            return E;
 
           if (GVs.size() < OldSize)
             BD.EmitProgressBitcode(BD.getProgram(), "reduced-global-variables");
@@ -704,28 +956,32 @@ static void ReduceGlobalInitializers(BugDriver &BD,
       }
     }
   }
+  return Error::success();
 }
 
-static void ReduceInsts(BugDriver &BD,
-                        bool (*TestFn)(const BugDriver &, Module *),
-                        std::string &Error) {
+static Error ReduceInsts(BugDriver &BD,
+                        bool (*TestFn)(const BugDriver &, Module *)) {
   // Attempt to delete instructions using bisection. This should help out nasty
   // cases with large basic blocks where the problem is at one end.
   if (!BugpointIsInterrupted) {
-    std::vector<const Instruction*> Insts;
+    std::vector<const Instruction *> Insts;
     for (const Function &F : *BD.getProgram())
       for (const BasicBlock &BB : F)
         for (const Instruction &I : BB)
           if (!isa<TerminatorInst>(&I))
             Insts.push_back(&I);
 
-    ReduceCrashingInstructions(BD, TestFn).reduceList(Insts, Error);
+    Expected<bool> Result =
+        ReduceCrashingInstructions(BD, TestFn).reduceList(Insts);
+    if (Error E = Result.takeError())
+      return E;
   }
 
   unsigned Simplification = 2;
   do {
     if (BugpointIsInterrupted)
-      return;
+      // TODO: Should we distinguish this with an "interrupted error"?
+      return Error::success();
     --Simplification;
     outs() << "\n*** Attempting to reduce testcase by deleting instruc"
            << "tions: Simplification Level #" << Simplification << '\n';
@@ -745,7 +1001,8 @@ static void ReduceInsts(BugDriver &BD,
     // function, attempting to delete them.
     unsigned CurInstructionNum = 0;
     for (Module::const_iterator FI = BD.getProgram()->begin(),
-           E = BD.getProgram()->end(); FI != E; ++FI)
+                                E = BD.getProgram()->end();
+         FI != E; ++FI)
       if (!FI->isDeclaration())
         for (Function::const_iterator BI = FI->begin(), E = FI->end(); BI != E;
              ++BI)
@@ -755,7 +1012,8 @@ static void ReduceInsts(BugDriver &BD,
               --InstructionsToSkipBeforeDeleting;
             } else {
               if (BugpointIsInterrupted)
-                return;
+                // TODO: Should this be some kind of interrupted error?
+                return Error::success();
 
               if (I->isEHPad() || I->getType()->isTokenTy())
                 continue;
@@ -770,7 +1028,7 @@ static void ReduceInsts(BugDriver &BD,
                 // to reduce the testcase...
                 BD.setNewProgram(M.release());
                 InstructionsToSkipBeforeDeleting = CurInstructionNum;
-                goto TryAgain;  // I wish I had a multi-level break here!
+                goto TryAgain; // I wish I had a multi-level break here!
               }
             }
           }
@@ -782,57 +1040,111 @@ static void ReduceInsts(BugDriver &BD,
 
   } while (Simplification);
   BD.EmitProgressBitcode(BD.getProgram(), "reduced-instructions");
+  return Error::success();
 }
 
-
 /// DebugACrash - Given a predicate that determines whether a component crashes
 /// on a program, try to destructively reduce the program while still keeping
 /// the predicate true.
-static bool DebugACrash(BugDriver &BD,
-                        bool (*TestFn)(const BugDriver &, Module *),
-                        std::string &Error) {
+static Error DebugACrash(BugDriver &BD,
+                         bool (*TestFn)(const BugDriver &, Module *)) {
   // See if we can get away with nuking some of the global variable initializers
   // in the program...
   if (!NoGlobalRM)
-    ReduceGlobalInitializers(BD, TestFn, Error);
+    if (Error E = ReduceGlobalInitializers(BD, TestFn))
+      return E;
 
   // Now try to reduce the number of functions in the module to something small.
-  std::vector<Function*> Functions;
+  std::vector<Function *> Functions;
   for (Function &F : *BD.getProgram())
     if (!F.isDeclaration())
       Functions.push_back(&F);
 
   if (Functions.size() > 1 && !BugpointIsInterrupted) {
     outs() << "\n*** Attempting to reduce the number of functions "
-      "in the testcase\n";
+              "in the testcase\n";
 
     unsigned OldSize = Functions.size();
-    ReduceCrashingFunctions(BD, TestFn).reduceList(Functions, Error);
+    Expected<bool> Result =
+        ReduceCrashingFunctions(BD, TestFn).reduceList(Functions);
+    if (Error E = Result.takeError())
+      return E;
 
     if (Functions.size() < OldSize)
       BD.EmitProgressBitcode(BD.getProgram(), "reduced-function");
   }
 
+  // Attempt to change conditional branches into unconditional branches to
+  // eliminate blocks.
+  if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
+    std::vector<const BasicBlock *> Blocks;
+    for (Function &F : *BD.getProgram())
+      for (BasicBlock &BB : F)
+        Blocks.push_back(&BB);
+    unsigned OldSize = Blocks.size();
+    Expected<bool> Result =
+        ReduceCrashingConditionals(BD, TestFn, true).reduceList(Blocks);
+    if (Error E = Result.takeError())
+      return E;
+    Result = ReduceCrashingConditionals(BD, TestFn, false).reduceList(Blocks);
+    if (Error E = Result.takeError())
+      return E;
+    if (Blocks.size() < OldSize)
+      BD.EmitProgressBitcode(BD.getProgram(), "reduced-conditionals");
+  }
+
   // Attempt to delete entire basic blocks at a time to speed up
   // convergence... this actually works by setting the terminator of the blocks
   // to a return instruction then running simplifycfg, which can potentially
   // shrinks the code dramatically quickly
   //
   if (!DisableSimplifyCFG && !BugpointIsInterrupted) {
-    std::vector<const BasicBlock*> Blocks;
+    std::vector<const BasicBlock *> Blocks;
     for (Function &F : *BD.getProgram())
       for (BasicBlock &BB : F)
         Blocks.push_back(&BB);
     unsigned OldSize = Blocks.size();
-    ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks, Error);
+    Expected<bool> Result = ReduceCrashingBlocks(BD, TestFn).reduceList(Blocks);
+    if (Error E = Result.takeError())
+      return E;
     if (Blocks.size() < OldSize)
       BD.EmitProgressBitcode(BD.getProgram(), "reduced-blocks");
   }
 
+  if (!DisableSimplifyCFG & !BugpointIsInterrupted) {
+    std::vector<const BasicBlock *> Blocks;
+    for (Function &F : *BD.getProgram())
+      for (BasicBlock &BB : F)
+        Blocks.push_back(&BB);
+    unsigned OldSize = Blocks.size();
+    Expected<bool> Result = ReduceSimplifyCFG(BD, TestFn).reduceList(Blocks);
+    if (Error E = Result.takeError())
+      return E;
+    if (Blocks.size() < OldSize)
+      BD.EmitProgressBitcode(BD.getProgram(), "reduced-simplifycfg");
+  }
+
   // Attempt to delete instructions using bisection. This should help out nasty
   // cases with large basic blocks where the problem is at one end.
   if (!BugpointIsInterrupted)
-    ReduceInsts(BD, TestFn, Error);
+    if (Error E = ReduceInsts(BD, TestFn))
+      return E;
+
+  // Attempt to strip debug info metadata.
+  auto stripMetadata = [&](std::function<bool(Module &)> strip) {
+    std::unique_ptr<Module> M = CloneModule(BD.getProgram());
+    strip(*M);
+    if (TestFn(BD, M.get()))
+      BD.setNewProgram(M.release());
+  };
+  if (!NoStripDebugInfo && !BugpointIsInterrupted) {
+    outs() << "\n*** Attempting to strip the debug info: ";
+    stripMetadata(StripDebugInfo);
+  }
+  if (!NoStripDebugTypeInfo && !BugpointIsInterrupted) {
+    outs() << "\n*** Attempting to strip the debug type info: ";
+    stripMetadata(stripNonLineTableDebugInfo);
+  }
 
   if (!NoNamedMDRM) {
     if (!BugpointIsInterrupted) {
@@ -842,7 +1154,10 @@ static bool DebugACrash(BugDriver &BD,
       std::vector<std::string> NamedMDNames;
       for (auto &NamedMD : BD.getProgram()->named_metadata())
         NamedMDNames.push_back(NamedMD.getName().str());
-      ReduceCrashingNamedMD(BD, TestFn).reduceList(NamedMDNames, Error);
+      Expected<bool> Result =
+          ReduceCrashingNamedMD(BD, TestFn).reduceList(NamedMDNames);
+      if (Error E = Result.takeError())
+        return E;
     }
 
     if (!BugpointIsInterrupted) {
@@ -852,7 +1167,10 @@ static bool DebugACrash(BugDriver &BD,
       for (auto &NamedMD : BD.getProgram()->named_metadata())
         for (auto op : NamedMD.operands())
           NamedMDOps.push_back(op);
-      ReduceCrashingNamedMDOps(BD, TestFn).reduceList(NamedMDOps, Error);
+      Expected<bool> Result =
+          ReduceCrashingNamedMDOps(BD, TestFn).reduceList(NamedMDOps);
+      if (Error E = Result.takeError())
+        return E;
     }
     BD.EmitProgressBitcode(BD.getProgram(), "reduced-named-md");
   }
@@ -865,7 +1183,7 @@ static bool DebugACrash(BugDriver &BD,
 
     // Find out if the pass still crashes on the cleaned up program...
     if (TestFn(BD, M)) {
-      BD.setNewProgram(M);     // Yup, it does, keep the reduced version...
+      BD.setNewProgram(M); // Yup, it does, keep the reduced version...
     } else {
       delete M;
     }
@@ -873,7 +1191,7 @@ static bool DebugACrash(BugDriver &BD,
 
   BD.EmitProgressBitcode(BD.getProgram(), "reduced-simplified");
 
-  return false;
+  return Error::success();
 }
 
 static bool TestForOptimizerCrash(const BugDriver &BD, Module *M) {
@@ -884,14 +1202,15 @@ static bool TestForOptimizerCrash(const BugDriver &BD, Module *M) {
 /// It attempts to prune down the testcase to something reasonable, and figure
 /// out exactly which pass is crashing.
 ///
-bool BugDriver::debugOptimizerCrash(const std::string &ID) {
+Error BugDriver::debugOptimizerCrash(const std::string &ID) {
   outs() << "\n*** Debugging optimizer crash!\n";
 
-  std::string Error;
   // Reduce the list of passes which causes the optimizer to crash...
-  if (!BugpointIsInterrupted && !DontReducePassList)
-    ReducePassList(*this).reduceList(PassesToRun, Error);
-  assert(Error.empty());
+  if (!BugpointIsInterrupted && !DontReducePassList) {
+    Expected<bool> Result = ReducePassList(*this).reduceList(PassesToRun);
+    if (Error E = Result.takeError())
+      return E;
+  }
 
   outs() << "\n*** Found crashing pass"
          << (PassesToRun.size() == 1 ? ": " : "es: ")
@@ -899,20 +1218,18 @@ bool BugDriver::debugOptimizerCrash(const std::string &ID) {
 
   EmitProgressBitcode(Program, ID);
 
-  bool Success = DebugACrash(*this, TestForOptimizerCrash, Error);
-  assert(Error.empty());
-  return Success;
+  return DebugACrash(*this, TestForOptimizerCrash);
 }
 
 static bool TestForCodeGenCrash(const BugDriver &BD, Module *M) {
-  std::string Error;
-  BD.compileProgram(M, &Error);
-  if (!Error.empty()) {
+  if (Error E = BD.compileProgram(M)) {
     if (VerboseErrors)
-      errs() << Error << "\n";
-    else
+      errs() << toString(std::move(E)) << "\n";
+    else {
+      consumeError(std::move(E));
       errs() << "<crash>\n";
-    return true;  // Tool is still crashing.
+    }
+    return true; // Tool is still crashing.
   }
   errs() << '\n';
   return false;
@@ -921,8 +1238,8 @@ static bool TestForCodeGenCrash(const BugDriver &BD, Module *M) {
 /// debugCodeGeneratorCrash - This method is called when the code generator
 /// crashes on an input.  It attempts to reduce the input as much as possible
 /// while still causing the code generator to crash.
-bool BugDriver::debugCodeGeneratorCrash(std::string &Error) {
+Error BugDriver::debugCodeGeneratorCrash() {
   errs() << "*** Debugging code generator crash!\n";
 
-  return DebugACrash(*this, TestForCodeGenCrash, Error);
+  return DebugACrash(*this, TestForCodeGenCrash);
 }
diff --git a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
index ab9c05fa924a..b26ba93cd616 100644
--- a/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/ExecutionDriver.cpp
@@ -25,112 +25,111 @@
 using namespace llvm;
 
 namespace {
-  // OutputType - Allow the user to specify the way code should be run, to test
-  // for miscompilation.
-  //
-  enum OutputType {
-    AutoPick, RunLLI, RunJIT, RunLLC, RunLLCIA, LLC_Safe, CompileCustom, Custom
-  };
-
-  cl::opt<double>
-  AbsTolerance("abs-tolerance", cl::desc("Absolute error tolerated"),
-               cl::init(0.0));
-  cl::opt<double>
-  RelTolerance("rel-tolerance", cl::desc("Relative error tolerated"),
-               cl::init(0.0));
-
-  cl::opt<OutputType>
-  InterpreterSel(cl::desc("Specify the \"test\" i.e. suspect back-end:"),
-                 cl::values(clEnumValN(AutoPick, "auto", "Use best guess"),
-                            clEnumValN(RunLLI, "run-int",
-                                       "Execute with the interpreter"),
-                            clEnumValN(RunJIT, "run-jit", "Execute with JIT"),
-                            clEnumValN(RunLLC, "run-llc", "Compile with LLC"),
-                            clEnumValN(RunLLCIA, "run-llc-ia",
-                                  "Compile with LLC with integrated assembler"),
-                            clEnumValN(LLC_Safe, "llc-safe", "Use LLC for all"),
-                            clEnumValN(CompileCustom, "compile-custom",
-                            "Use -compile-command to define a command to "
-                            "compile the bitcode. Useful to avoid linking."),
-                            clEnumValN(Custom, "run-custom",
-                            "Use -exec-command to define a command to execute "
-                            "the bitcode. Useful for cross-compilation."),
-                            clEnumValEnd),
-                 cl::init(AutoPick));
-
-  cl::opt<OutputType>
-  SafeInterpreterSel(cl::desc("Specify \"safe\" i.e. known-good backend:"),
-              cl::values(clEnumValN(AutoPick, "safe-auto", "Use best guess"),
-                         clEnumValN(RunLLC, "safe-run-llc", "Compile with LLC"),
-                         clEnumValN(Custom, "safe-run-custom",
-                         "Use -exec-command to define a command to execute "
-                         "the bitcode. Useful for cross-compilation."),
-                         clEnumValEnd),
-                     cl::init(AutoPick));
-
-  cl::opt<std::string>
-  SafeInterpreterPath("safe-path",
-                   cl::desc("Specify the path to the \"safe\" backend program"),
-                   cl::init(""));
-
-  cl::opt<bool>
-  AppendProgramExitCode("append-exit-code",
-      cl::desc("Append the exit code to the output so it gets diff'd too"),
-      cl::init(false));
-
-  cl::opt<std::string>
-  InputFile("input", cl::init("/dev/null"),
-            cl::desc("Filename to pipe in as stdin (default: /dev/null)"));
-
-  cl::list<std::string>
-  AdditionalSOs("additional-so",
-                cl::desc("Additional shared objects to load "
-                         "into executing programs"));
-
-  cl::list<std::string>
-  AdditionalLinkerArgs("Xlinker",
-      cl::desc("Additional arguments to pass to the linker"));
-
-  cl::opt<std::string>
-  CustomCompileCommand("compile-command", cl::init("llc"),
-      cl::desc("Command to compile the bitcode (use with -compile-custom) "
-               "(default: llc)"));
-
-  cl::opt<std::string>
-  CustomExecCommand("exec-command", cl::init("simulate"),
-      cl::desc("Command to execute the bitcode (use with -run-custom) "
-               "(default: simulate)"));
+// OutputType - Allow the user to specify the way code should be run, to test
+// for miscompilation.
+//
+enum OutputType {
+  AutoPick,
+  RunLLI,
+  RunJIT,
+  RunLLC,
+  RunLLCIA,
+  LLC_Safe,
+  CompileCustom,
+  Custom
+};
+
+cl::opt<double> AbsTolerance("abs-tolerance",
+                             cl::desc("Absolute error tolerated"),
+                             cl::init(0.0));
+cl::opt<double> RelTolerance("rel-tolerance",
+                             cl::desc("Relative error tolerated"),
+                             cl::init(0.0));
+
+cl::opt<OutputType> InterpreterSel(
+    cl::desc("Specify the \"test\" i.e. suspect back-end:"),
+    cl::values(clEnumValN(AutoPick, "auto", "Use best guess"),
+               clEnumValN(RunLLI, "run-int", "Execute with the interpreter"),
+               clEnumValN(RunJIT, "run-jit", "Execute with JIT"),
+               clEnumValN(RunLLC, "run-llc", "Compile with LLC"),
+               clEnumValN(RunLLCIA, "run-llc-ia",
+                          "Compile with LLC with integrated assembler"),
+               clEnumValN(LLC_Safe, "llc-safe", "Use LLC for all"),
+               clEnumValN(CompileCustom, "compile-custom",
+                          "Use -compile-command to define a command to "
+                          "compile the bitcode. Useful to avoid linking."),
+               clEnumValN(Custom, "run-custom",
+                          "Use -exec-command to define a command to execute "
+                          "the bitcode. Useful for cross-compilation.")),
+    cl::init(AutoPick));
+
+cl::opt<OutputType> SafeInterpreterSel(
+    cl::desc("Specify \"safe\" i.e. known-good backend:"),
+    cl::values(clEnumValN(AutoPick, "safe-auto", "Use best guess"),
+               clEnumValN(RunLLC, "safe-run-llc", "Compile with LLC"),
+               clEnumValN(Custom, "safe-run-custom",
+                          "Use -exec-command to define a command to execute "
+                          "the bitcode. Useful for cross-compilation.")),
+    cl::init(AutoPick));
+
+cl::opt<std::string> SafeInterpreterPath(
+    "safe-path", cl::desc("Specify the path to the \"safe\" backend program"),
+    cl::init(""));
+
+cl::opt<bool> AppendProgramExitCode(
+    "append-exit-code",
+    cl::desc("Append the exit code to the output so it gets diff'd too"),
+    cl::init(false));
+
+cl::opt<std::string>
+    InputFile("input", cl::init("/dev/null"),
+              cl::desc("Filename to pipe in as stdin (default: /dev/null)"));
+
+cl::list<std::string>
+    AdditionalSOs("additional-so", cl::desc("Additional shared objects to load "
+                                            "into executing programs"));
+
+cl::list<std::string> AdditionalLinkerArgs(
+    "Xlinker", cl::desc("Additional arguments to pass to the linker"));
+
+cl::opt<std::string> CustomCompileCommand(
+    "compile-command", cl::init("llc"),
+    cl::desc("Command to compile the bitcode (use with -compile-custom) "
+             "(default: llc)"));
+
+cl::opt<std::string> CustomExecCommand(
+    "exec-command", cl::init("simulate"),
+    cl::desc("Command to execute the bitcode (use with -run-custom) "
+             "(default: simulate)"));
 }
 
 namespace llvm {
-  // Anything specified after the --args option are taken as arguments to the
-  // program being debugged.
-  cl::list<std::string>
-  InputArgv("args", cl::Positional, cl::desc("<program arguments>..."),
-            cl::ZeroOrMore, cl::PositionalEatsArgs);
-
-  cl::opt<std::string>
-  OutputPrefix("output-prefix", cl::init("bugpoint"),
-            cl::desc("Prefix to use for outputs (default: 'bugpoint')"));
+// Anything specified after the --args option are taken as arguments to the
+// program being debugged.
+cl::list<std::string> InputArgv("args", cl::Positional,
+                                cl::desc("<program arguments>..."),
+                                cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+cl::opt<std::string>
+    OutputPrefix("output-prefix", cl::init("bugpoint"),
+                 cl::desc("Prefix to use for outputs (default: 'bugpoint')"));
 }
 
 namespace {
-  cl::list<std::string>
-  ToolArgv("tool-args", cl::Positional, cl::desc("<tool arguments>..."),
-           cl::ZeroOrMore, cl::PositionalEatsArgs);
-
-  cl::list<std::string>
-  SafeToolArgv("safe-tool-args", cl::Positional,
-               cl::desc("<safe-tool arguments>..."),
-               cl::ZeroOrMore, cl::PositionalEatsArgs);
-
-  cl::opt<std::string>
-  CCBinary("gcc", cl::init(""), cl::desc("The gcc binary to use."));
-
-  cl::list<std::string>
-  CCToolArgv("gcc-tool-args", cl::Positional,
-              cl::desc("<gcc-tool arguments>..."),
-              cl::ZeroOrMore, cl::PositionalEatsArgs);
+cl::list<std::string> ToolArgv("tool-args", cl::Positional,
+                               cl::desc("<tool arguments>..."), cl::ZeroOrMore,
+                               cl::PositionalEatsArgs);
+
+cl::list<std::string> SafeToolArgv("safe-tool-args", cl::Positional,
+                                   cl::desc("<safe-tool arguments>..."),
+                                   cl::ZeroOrMore, cl::PositionalEatsArgs);
+
+cl::opt<std::string> CCBinary("gcc", cl::init(""),
+                              cl::desc("The gcc binary to use."));
+
+cl::list<std::string> CCToolArgv("gcc-tool-args", cl::Positional,
+                                 cl::desc("<gcc-tool arguments>..."),
+                                 cl::ZeroOrMore, cl::PositionalEatsArgs);
 }
 
 //===----------------------------------------------------------------------===//
@@ -140,7 +139,7 @@ namespace {
 /// initializeExecutionEnvironment - This method is used to set up the
 /// environment for executing LLVM programs.
 ///
-bool BugDriver::initializeExecutionEnvironment() {
+Error BugDriver::initializeExecutionEnvironment() {
   outs() << "Initializing execution environment: ";
 
   // Create an instance of the AbstractInterpreter interface as specified on
@@ -159,19 +158,18 @@ bool BugDriver::initializeExecutionEnvironment() {
   case AutoPick:
     if (!Interpreter) {
       InterpreterSel = RunJIT;
-      Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
-                                                   &ToolArgv);
+      Interpreter =
+          AbstractInterpreter::createJIT(getToolName(), Message, &ToolArgv);
     }
     if (!Interpreter) {
       InterpreterSel = RunLLC;
-      Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                   CCBinary, &ToolArgv,
-                                                   &CCToolArgv);
+      Interpreter = AbstractInterpreter::createLLC(
+          getToolName(), Message, CCBinary, &ToolArgv, &CCToolArgv);
     }
     if (!Interpreter) {
       InterpreterSel = RunLLI;
-      Interpreter = AbstractInterpreter::createLLI(getToolName(), Message,
-                                                   &ToolArgv);
+      Interpreter =
+          AbstractInterpreter::createLLI(getToolName(), Message, &ToolArgv);
     }
     if (!Interpreter) {
       InterpreterSel = AutoPick;
@@ -179,28 +177,27 @@ bool BugDriver::initializeExecutionEnvironment() {
     }
     break;
   case RunLLI:
-    Interpreter = AbstractInterpreter::createLLI(getToolName(), Message,
-                                                 &ToolArgv);
+    Interpreter =
+        AbstractInterpreter::createLLI(getToolName(), Message, &ToolArgv);
     break;
   case RunLLC:
   case RunLLCIA:
   case LLC_Safe:
-    Interpreter = AbstractInterpreter::createLLC(getToolName(), Message,
-                                                 CCBinary, &ToolArgv,
-                                                 &CCToolArgv,
-                                                 InterpreterSel == RunLLCIA);
+    Interpreter = AbstractInterpreter::createLLC(
+        getToolName(), Message, CCBinary, &ToolArgv, &CCToolArgv,
+        InterpreterSel == RunLLCIA);
     break;
   case RunJIT:
-    Interpreter = AbstractInterpreter::createJIT(getToolName(), Message,
-                                                 &ToolArgv);
+    Interpreter =
+        AbstractInterpreter::createJIT(getToolName(), Message, &ToolArgv);
     break;
   case CompileCustom:
-    Interpreter =
-      AbstractInterpreter::createCustomCompiler(Message, CustomCompileCommand);
+    Interpreter = AbstractInterpreter::createCustomCompiler(
+        Message, CustomCompileCommand);
     break;
   case Custom:
     Interpreter =
-      AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+        AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
     break;
   }
   if (!Interpreter)
@@ -215,25 +212,19 @@ bool BugDriver::initializeExecutionEnvironment() {
   switch (SafeInterpreterSel) {
   case AutoPick:
     // In "llc-safe" mode, default to using LLC as the "safe" backend.
-    if (!SafeInterpreter &&
-        InterpreterSel == LLC_Safe) {
+    if (!SafeInterpreter && InterpreterSel == LLC_Safe) {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
-      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       CCBinary,
-                                                       &SafeToolArgs,
-                                                       &CCToolArgv);
+      SafeInterpreter = AbstractInterpreter::createLLC(
+          Path.c_str(), Message, CCBinary, &SafeToolArgs, &CCToolArgv);
     }
 
-    if (!SafeInterpreter &&
-        InterpreterSel != RunLLC &&
+    if (!SafeInterpreter && InterpreterSel != RunLLC &&
         InterpreterSel != RunJIT) {
       SafeInterpreterSel = RunLLC;
       SafeToolArgs.push_back("--relocation-model=pic");
-      SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                       CCBinary,
-                                                       &SafeToolArgs,
-                                                       &CCToolArgv);
+      SafeInterpreter = AbstractInterpreter::createLLC(
+          Path.c_str(), Message, CCBinary, &SafeToolArgs, &CCToolArgv);
     }
     if (!SafeInterpreter) {
       SafeInterpreterSel = AutoPick;
@@ -243,34 +234,42 @@ bool BugDriver::initializeExecutionEnvironment() {
   case RunLLC:
   case RunLLCIA:
     SafeToolArgs.push_back("--relocation-model=pic");
-    SafeInterpreter = AbstractInterpreter::createLLC(Path.c_str(), Message,
-                                                     CCBinary, &SafeToolArgs,
-                                                     &CCToolArgv,
-                                                SafeInterpreterSel == RunLLCIA);
+    SafeInterpreter = AbstractInterpreter::createLLC(
+        Path.c_str(), Message, CCBinary, &SafeToolArgs, &CCToolArgv,
+        SafeInterpreterSel == RunLLCIA);
     break;
   case Custom:
     SafeInterpreter =
-      AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
+        AbstractInterpreter::createCustomExecutor(Message, CustomExecCommand);
     break;
   default:
     Message = "Sorry, this back-end is not supported by bugpoint as the "
               "\"safe\" backend right now!\n";
     break;
   }
-  if (!SafeInterpreter) { outs() << Message << "\nExiting.\n"; exit(1); }
+  if (!SafeInterpreter) {
+    outs() << Message << "\nExiting.\n";
+    exit(1);
+  }
 
   cc = CC::create(Message, CCBinary, &CCToolArgv);
-  if (!cc) { outs() << Message << "\nExiting.\n"; exit(1); }
+  if (!cc) {
+    outs() << Message << "\nExiting.\n";
+    exit(1);
+  }
 
   // If there was an error creating the selected interpreter, quit with error.
-  return Interpreter == nullptr;
+  if (Interpreter == nullptr)
+    return make_error<StringError>("Failed to init execution environment",
+                                   inconvertibleErrorCode());
+  return Error::success();
 }
 
 /// compileProgram - Try to compile the specified module, returning false and
 /// setting Error if an error occurs.  This is used for code generation
 /// crash testing.
 ///
-void BugDriver::compileProgram(Module *M, std::string *Error) const {
+Error BugDriver::compileProgram(Module *M) const {
   // Emit the program to a bitcode file...
   SmallString<128> BitcodeFile;
   int BitcodeFD;
@@ -291,21 +290,20 @@ void BugDriver::compileProgram(Module *M, std::string *Error) const {
   FileRemover BitcodeFileRemover(BitcodeFile.str(), !SaveTemps);
 
   // Actually compile the program!
-  Interpreter->compileProgram(BitcodeFile.str(), Error, Timeout, MemoryLimit);
+  return Interpreter->compileProgram(BitcodeFile.str(), Timeout, MemoryLimit);
 }
 
-
 /// executeProgram - This method runs "Program", capturing the output of the
 /// program to a file, returning the filename of the file.  A recommended
 /// filename may be optionally specified.
 ///
-std::string BugDriver::executeProgram(const Module *Program,
-                                      std::string OutputFile,
-                                      std::string BitcodeFile,
-                                      const std::string &SharedObj,
-                                      AbstractInterpreter *AI,
-                                      std::string *Error) const {
-  if (!AI) AI = Interpreter;
+Expected<std::string> BugDriver::executeProgram(const Module *Program,
+                                                std::string OutputFile,
+                                                std::string BitcodeFile,
+                                                const std::string &SharedObj,
+                                                AbstractInterpreter *AI) const {
+  if (!AI)
+    AI = Interpreter;
   assert(AI && "Interpreter should have been created already!");
   bool CreatedBitcode = false;
   if (BitcodeFile.empty()) {
@@ -315,15 +313,15 @@ std::string BugDriver::executeProgram(const Module *Program,
     std::error_code EC = sys::fs::createUniqueFile(
         OutputPrefix + "-test-program-%%%%%%%.bc", UniqueFD, UniqueFilename);
     if (EC) {
-      errs() << ToolName << ": Error making unique filename: "
-             << EC.message() << "!\n";
+      errs() << ToolName << ": Error making unique filename: " << EC.message()
+             << "!\n";
       exit(1);
     }
     BitcodeFile = UniqueFilename.str();
 
     if (writeProgramToFile(BitcodeFile, UniqueFD, Program)) {
-      errs() << ToolName << ": Error emitting bitcode to file '"
-             << BitcodeFile << "'!\n";
+      errs() << ToolName << ": Error emitting bitcode to file '" << BitcodeFile
+             << "'!\n";
       exit(1);
     }
     CreatedBitcode = true;
@@ -331,17 +329,17 @@ std::string BugDriver::executeProgram(const Module *Program,
 
   // Remove the temporary bitcode file when we are done.
   std::string BitcodePath(BitcodeFile);
-  FileRemover BitcodeFileRemover(BitcodePath,
-    CreatedBitcode && !SaveTemps);
+  FileRemover BitcodeFileRemover(BitcodePath, CreatedBitcode && !SaveTemps);
 
-  if (OutputFile.empty()) OutputFile = OutputPrefix + "-execution-output-%%%%%%%";
+  if (OutputFile.empty())
+    OutputFile = OutputPrefix + "-execution-output-%%%%%%%";
 
   // Check to see if this is a valid output filename...
   SmallString<128> UniqueFile;
   std::error_code EC = sys::fs::createUniqueFile(OutputFile, UniqueFile);
   if (EC) {
-    errs() << ToolName << ": Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << ToolName << ": Error making unique filename: " << EC.message()
+           << "\n";
     exit(1);
   }
   OutputFile = UniqueFile.str();
@@ -351,28 +349,32 @@ std::string BugDriver::executeProgram(const Module *Program,
   if (!SharedObj.empty())
     SharedObjs.push_back(SharedObj);
 
-  int RetVal = AI->ExecuteProgram(BitcodeFile, InputArgv, InputFile, OutputFile,
-                                  Error, AdditionalLinkerArgs, SharedObjs,
-                                  Timeout, MemoryLimit);
-  if (!Error->empty())
-    return OutputFile;
+  Expected<int> RetVal = AI->ExecuteProgram(BitcodeFile, InputArgv, InputFile,
+                                            OutputFile, AdditionalLinkerArgs,
+                                            SharedObjs, Timeout, MemoryLimit);
+  if (Error E = RetVal.takeError())
+    return std::move(E);
 
-  if (RetVal == -1) {
+  if (*RetVal == -1) {
     errs() << "<timeout>";
     static bool FirstTimeout = true;
     if (FirstTimeout) {
-      outs() << "\n"
- "*** Program execution timed out!  This mechanism is designed to handle\n"
- "    programs stuck in infinite loops gracefully.  The -timeout option\n"
- "    can be used to change the timeout threshold or disable it completely\n"
- "    (with -timeout=0).  This message is only displayed once.\n";
+      outs()
+          << "\n"
+             "*** Program execution timed out!  This mechanism is designed to "
+             "handle\n"
+             "    programs stuck in infinite loops gracefully.  The -timeout "
+             "option\n"
+             "    can be used to change the timeout threshold or disable it "
+             "completely\n"
+             "    (with -timeout=0).  This message is only displayed once.\n";
       FirstTimeout = false;
     }
   }
 
   if (AppendProgramExitCode) {
     std::ofstream outFile(OutputFile.c_str(), std::ios_base::app);
-    outFile << "exit " << RetVal << '\n';
+    outFile << "exit " << *RetVal << '\n';
     outFile.close();
   }
 
@@ -383,30 +385,27 @@ std::string BugDriver::executeProgram(const Module *Program,
 /// executeProgramSafely - Used to create reference output with the "safe"
 /// backend, if reference output is not provided.
 ///
-std::string BugDriver::executeProgramSafely(const Module *Program,
-                                            const std::string &OutputFile,
-                                            std::string *Error) const {
-  return executeProgram(Program, OutputFile, "", "", SafeInterpreter, Error);
+Expected<std::string>
+BugDriver::executeProgramSafely(const Module *Program,
+                                const std::string &OutputFile) const {
+  return executeProgram(Program, OutputFile, "", "", SafeInterpreter);
 }
 
-std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
-                                           std::string &Error) {
+Expected<std::string>
+BugDriver::compileSharedObject(const std::string &BitcodeFile) {
   assert(Interpreter && "Interpreter should have been created already!");
   std::string OutputFile;
 
   // Using the known-good backend.
-  CC::FileType FT = SafeInterpreter->OutputCode(BitcodeFile, OutputFile,
-                                                 Error);
-  if (!Error.empty())
-    return "";
+  Expected<CC::FileType> FT =
+      SafeInterpreter->OutputCode(BitcodeFile, OutputFile);
+  if (Error E = FT.takeError())
+    return std::move(E);
 
   std::string SharedObjectFile;
-  bool Failure = cc->MakeSharedObject(OutputFile, FT, SharedObjectFile,
-                                       AdditionalLinkerArgs, Error);
-  if (!Error.empty())
-    return "";
-  if (Failure)
-    exit(1);
+  if (Error E = cc->MakeSharedObject(OutputFile, *FT, SharedObjectFile,
+                                     AdditionalLinkerArgs))
+    return std::move(E);
 
   // Remove the intermediate C file
   sys::fs::remove(OutputFile);
@@ -419,25 +418,27 @@ std::string BugDriver::compileSharedObject(const std::string &BitcodeFile,
 /// otherwise. Note: initializeExecutionEnvironment should be called BEFORE
 /// this function.
 ///
-bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
-  std::string Error;
-  compileProgram(Program, &Error);
-  if (!Error.empty())
-    return false;
+Error BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
+  if (Error E = compileProgram(Program))
+    return E;
 
-  ReferenceOutputFile = executeProgramSafely(Program, Filename, &Error);
-  if (!Error.empty()) {
-    errs() << Error;
+  Expected<std::string> Result = executeProgramSafely(Program, Filename);
+  if (Error E = Result.takeError()) {
     if (Interpreter != SafeInterpreter) {
-      errs() << "*** There is a bug running the \"safe\" backend.  Either"
-             << " debug it (for example with the -run-jit bugpoint option,"
-             << " if JIT is being used as the \"safe\" backend), or fix the"
-             << " error some other way.\n";
+      E = joinErrors(
+              std::move(E),
+              make_error<StringError>(
+                  "*** There is a bug running the \"safe\" backend.  Either"
+                  " debug it (for example with the -run-jit bugpoint option,"
+                  " if JIT is being used as the \"safe\" backend), or fix the"
+                  " error some other way.\n",
+                  inconvertibleErrorCode()));
     }
-    return false;
+    return E;
   }
+  ReferenceOutputFile = *Result;
   outs() << "\nReference output is: " << ReferenceOutputFile << "\n\n";
-  return true;
+  return Error::success();
 }
 
 /// diffProgram - This method executes the specified module and diffs the
@@ -445,31 +446,28 @@ bool BugDriver::createReferenceFile(Module *M, const std::string &Filename) {
 /// is different, 1 is returned.  If there is a problem with the code
 /// generator (e.g., llc crashes), this will set ErrMsg.
 ///
-bool BugDriver::diffProgram(const Module *Program,
-                            const std::string &BitcodeFile,
-                            const std::string &SharedObject,
-                            bool RemoveBitcode,
-                            std::string *ErrMsg) const {
+Expected<bool> BugDriver::diffProgram(const Module *Program,
+                                      const std::string &BitcodeFile,
+                                      const std::string &SharedObject,
+                                      bool RemoveBitcode) const {
   // Execute the program, generating an output file...
-  std::string Output(
-      executeProgram(Program, "", BitcodeFile, SharedObject, nullptr, ErrMsg));
-  if (!ErrMsg->empty())
-    return false;
+  Expected<std::string> Output =
+      executeProgram(Program, "", BitcodeFile, SharedObject, nullptr);
+  if (Error E = Output.takeError())
+    return std::move(E);
 
   std::string Error;
   bool FilesDifferent = false;
-  if (int Diff = DiffFilesWithTolerance(ReferenceOutputFile,
-                                        Output,
+  if (int Diff = DiffFilesWithTolerance(ReferenceOutputFile, *Output,
                                         AbsTolerance, RelTolerance, &Error)) {
     if (Diff == 2) {
       errs() << "While diffing output: " << Error << '\n';
       exit(1);
     }
     FilesDifferent = true;
-  }
-  else {
+  } else {
     // Remove the generated output if there are no differences.
-    sys::fs::remove(Output);
+    sys::fs::remove(*Output);
   }
 
   // Remove the bitcode file if we are supposed to.
@@ -478,7 +476,4 @@ bool BugDriver::diffProgram(const Module *Program,
   return FilesDifferent;
 }
 
-bool BugDriver::isExecutingJIT() {
-  return InterpreterSel == RunJIT;
-}
-
+bool BugDriver::isExecutingJIT() { return InterpreterSel == RunJIT; }
diff --git a/contrib/llvm/tools/bugpoint/ExtractFunction.cpp b/contrib/llvm/tools/bugpoint/ExtractFunction.cpp
index de596a57d83c..d57613ec5e37 100644
--- a/contrib/llvm/tools/bugpoint/ExtractFunction.cpp
+++ b/contrib/llvm/tools/bugpoint/ExtractFunction.cpp
@@ -37,50 +37,49 @@ using namespace llvm;
 #define DEBUG_TYPE "bugpoint"
 
 namespace llvm {
-  bool DisableSimplifyCFG = false;
-  extern cl::opt<std::string> OutputPrefix;
+bool DisableSimplifyCFG = false;
+extern cl::opt<std::string> OutputPrefix;
 } // End llvm namespace
 
 namespace {
-  cl::opt<bool>
-  NoDCE ("disable-dce",
-         cl::desc("Do not use the -dce pass to reduce testcases"));
-  cl::opt<bool, true>
-  NoSCFG("disable-simplifycfg", cl::location(DisableSimplifyCFG),
-         cl::desc("Do not use the -simplifycfg pass to reduce testcases"));
-
-  Function* globalInitUsesExternalBA(GlobalVariable* GV) {
-    if (!GV->hasInitializer())
-      return nullptr;
-
-    Constant *I = GV->getInitializer();
-
-    // walk the values used by the initializer
-    // (and recurse into things like ConstantExpr)
-    std::vector<Constant*> Todo;
-    std::set<Constant*> Done;
-    Todo.push_back(I);
-
-    while (!Todo.empty()) {
-      Constant* V = Todo.back();
-      Todo.pop_back();
-      Done.insert(V);
-
-      if (BlockAddress *BA = dyn_cast<BlockAddress>(V)) {
-        Function *F = BA->getFunction();
-        if (F->isDeclaration())
-          return F;
-      }
+cl::opt<bool> NoDCE("disable-dce",
+                    cl::desc("Do not use the -dce pass to reduce testcases"));
+cl::opt<bool, true>
+    NoSCFG("disable-simplifycfg", cl::location(DisableSimplifyCFG),
+           cl::desc("Do not use the -simplifycfg pass to reduce testcases"));
+
+Function *globalInitUsesExternalBA(GlobalVariable *GV) {
+  if (!GV->hasInitializer())
+    return nullptr;
 
-      for (User::op_iterator i = V->op_begin(), e = V->op_end(); i != e; ++i) {
-        Constant *C = dyn_cast<Constant>(*i);
-        if (C && !isa<GlobalValue>(C) && !Done.count(C))
-          Todo.push_back(C);
-      }
+  Constant *I = GV->getInitializer();
+
+  // walk the values used by the initializer
+  // (and recurse into things like ConstantExpr)
+  std::vector<Constant *> Todo;
+  std::set<Constant *> Done;
+  Todo.push_back(I);
+
+  while (!Todo.empty()) {
+    Constant *V = Todo.back();
+    Todo.pop_back();
+    Done.insert(V);
+
+    if (BlockAddress *BA = dyn_cast<BlockAddress>(V)) {
+      Function *F = BA->getFunction();
+      if (F->isDeclaration())
+        return F;
+    }
+
+    for (User::op_iterator i = V->op_begin(), e = V->op_end(); i != e; ++i) {
+      Constant *C = dyn_cast<Constant>(*i);
+      if (C && !isa<GlobalValue>(C) && !Done.count(C))
+        Todo.push_back(C);
     }
-    return nullptr;
   }
-}  // end anonymous namespace
+  return nullptr;
+}
+} // end anonymous namespace
 
 std::unique_ptr<Module>
 BugDriver::deleteInstructionFromProgram(const Instruction *I,
@@ -92,10 +91,10 @@ BugDriver::deleteInstructionFromProgram(const Instruction *I,
   const Function *PF = PBB->getParent();
 
   Module::iterator RFI = Clone->begin(); // Get iterator to corresponding fn
-  std::advance(RFI, std::distance(PF->getParent()->begin(),
-                                  Module::const_iterator(PF)));
+  std::advance(
+      RFI, std::distance(PF->getParent()->begin(), Module::const_iterator(PF)));
 
-  Function::iterator RBI = RFI->begin();  // Get iterator to corresponding BB
+  Function::iterator RBI = RFI->begin(); // Get iterator to corresponding BB
   std::advance(RBI, std::distance(PF->begin(), Function::const_iterator(PBB)));
 
   BasicBlock::iterator RI = RBI->begin(); // Get iterator to corresponding inst
@@ -116,7 +115,7 @@ BugDriver::deleteInstructionFromProgram(const Instruction *I,
   if (Simplification > 1 && !NoDCE)
     Passes.push_back("dce");
   if (Simplification && !DisableSimplifyCFG)
-    Passes.push_back("simplifycfg");      // Delete dead control flow
+    Passes.push_back("simplifycfg"); // Delete dead control flow
 
   Passes.push_back("verify");
   std::unique_ptr<Module> New = runPassesOn(Clone, Passes);
@@ -184,15 +183,16 @@ static void eliminateAliases(GlobalValue *GV) {
   // GlobalAlias MAY NOT reference declarations.
   for (;;) {
     // 1. Find aliases
-    SmallVector<GlobalAlias*,1> aliases;
+    SmallVector<GlobalAlias *, 1> aliases;
     Module *M = GV->getParent();
-    for (Module::alias_iterator I=M->alias_begin(), E=M->alias_end(); I!=E; ++I)
+    for (Module::alias_iterator I = M->alias_begin(), E = M->alias_end();
+         I != E; ++I)
       if (I->getAliasee()->stripPointerCasts() == GV)
         aliases.push_back(&*I);
     if (aliases.empty())
       break;
     // 2. Resolve aliases
-    for (unsigned i=0, e=aliases.size(); i<e; ++i) {
+    for (unsigned i = 0, e = aliases.size(); i < e; ++i) {
       aliases[i]->replaceAllUsesWith(aliases[i]->getAliasee());
       aliases[i]->eraseFromParent();
     }
@@ -202,7 +202,8 @@ static void eliminateAliases(GlobalValue *GV) {
 }
 
 //
-// DeleteGlobalInitializer - "Remove" the global variable by deleting its initializer,
+// DeleteGlobalInitializer - "Remove" the global variable by deleting its
+// initializer,
 // making it external.
 //
 void llvm::DeleteGlobalInitializer(GlobalVariable *GV) {
@@ -225,23 +226,20 @@ void llvm::DeleteFunctionBody(Function *F) {
 
 /// GetTorInit - Given a list of entries for static ctors/dtors, return them
 /// as a constant array.
-static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
+static Constant *GetTorInit(std::vector<std::pair<Function *, int>> &TorList) {
   assert(!TorList.empty() && "Don't create empty tor list!");
-  std::vector<Constant*> ArrayElts;
+  std::vector<Constant *> ArrayElts;
   Type *Int32Ty = Type::getInt32Ty(TorList[0].first->getContext());
 
   StructType *STy =
       StructType::get(Int32Ty, TorList[0].first->getType(), nullptr);
   for (unsigned i = 0, e = TorList.size(); i != e; ++i) {
-    Constant *Elts[] = {
-      ConstantInt::get(Int32Ty, TorList[i].second),
-      TorList[i].first
-    };
+    Constant *Elts[] = {ConstantInt::get(Int32Ty, TorList[i].second),
+                        TorList[i].first};
     ArrayElts.push_back(ConstantStruct::get(STy, Elts));
   }
-  return ConstantArray::get(ArrayType::get(ArrayElts[0]->getType(), 
-                                           ArrayElts.size()),
-                            ArrayElts);
+  return ConstantArray::get(
+      ArrayType::get(ArrayElts[0]->getType(), ArrayElts.size()), ArrayElts);
 }
 
 /// SplitStaticCtorDtor - A module was recently split into two parts, M1/M2, and
@@ -251,23 +249,26 @@ static Constant *GetTorInit(std::vector<std::pair<Function*, int> > &TorList) {
 static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
                                 ValueToValueMapTy &VMap) {
   GlobalVariable *GV = M1->getNamedGlobal(GlobalName);
-  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() ||
-      !GV->use_empty()) return;
-  
-  std::vector<std::pair<Function*, int> > M1Tors, M2Tors;
+  if (!GV || GV->isDeclaration() || GV->hasLocalLinkage() || !GV->use_empty())
+    return;
+
+  std::vector<std::pair<Function *, int>> M1Tors, M2Tors;
   ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer());
-  if (!InitList) return;
-  
+  if (!InitList)
+    return;
+
   for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
-    if (ConstantStruct *CS = dyn_cast<ConstantStruct>(InitList->getOperand(i))){
-      if (CS->getNumOperands() != 2) return;  // Not array of 2-element structs.
-      
+    if (ConstantStruct *CS =
+            dyn_cast<ConstantStruct>(InitList->getOperand(i))) {
+      if (CS->getNumOperands() != 2)
+        return; // Not array of 2-element structs.
+
       if (CS->getOperand(1)->isNullValue())
-        break;  // Found a null terminator, stop here.
-      
+        break; // Found a null terminator, stop here.
+
       ConstantInt *CI = dyn_cast<ConstantInt>(CS->getOperand(0));
       int Priority = CI ? CI->getSExtValue() : 0;
-      
+
       Constant *FP = CS->getOperand(1);
       if (ConstantExpr *CE = dyn_cast<ConstantExpr>(FP))
         if (CE->isCast())
@@ -283,13 +284,12 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
       }
     }
   }
-  
+
   GV->eraseFromParent();
   if (!M1Tors.empty()) {
     Constant *M1Init = GetTorInit(M1Tors);
     new GlobalVariable(*M1, M1Init->getType(), false,
-                       GlobalValue::AppendingLinkage,
-                       M1Init, GlobalName);
+                       GlobalValue::AppendingLinkage, M1Init, GlobalName);
   }
 
   GV = M2->getNamedGlobal(GlobalName);
@@ -300,8 +300,7 @@ static void SplitStaticCtorDtor(const char *GlobalName, Module *M1, Module *M2,
   if (!M2Tors.empty()) {
     Constant *M2Init = GetTorInit(M2Tors);
     new GlobalVariable(*M2, M2Init->getType(), false,
-                       GlobalValue::AppendingLinkage,
-                       M2Init, GlobalName);
+                       GlobalValue::AppendingLinkage, M2Init, GlobalName);
   }
 }
 
@@ -330,10 +329,9 @@ llvm::SplitFunctionsOutOfModule(Module *M, const std::vector<Function *> &F,
     DEBUG(TNOF->printAsOperand(errs(), false));
     DEBUG(errs() << "\n");
     TestFunctions.insert(cast<Function>(NewVMap[TNOF]));
-    DeleteFunctionBody(TNOF);       // Function is now external in this module!
+    DeleteFunctionBody(TNOF); // Function is now external in this module!
   }
 
-  
   // Remove the Safe functions from the Test module
   for (Function &I : *New)
     if (!TestFunctions.count(&I))
@@ -348,8 +346,9 @@ llvm::SplitFunctionsOutOfModule(Module *M, const std::vector<Function *> &F,
                   "the global '";
         GV->printAsOperand(errs(), false);
         errs() << "' with an initializer that references blockaddresses "
-                  "from safe function '" << SafeFn->getName()
-               << "' and from test function '" << TestFn->getName() << "'.\n";
+                  "from safe function '"
+               << SafeFn->getName() << "' and from test function '"
+               << TestFn->getName() << "'.\n";
         exit(1);
       }
       DeleteGlobalInitializer(&I); // Delete the initializer to make it external
@@ -387,12 +386,13 @@ BugDriver::extractMappedBlocksFromModule(const std::vector<BasicBlock *> &BBs,
   sys::RemoveFileOnSignal(Filename);
 
   tool_output_file BlocksToNotExtractFile(Filename.c_str(), FD);
-  for (std::vector<BasicBlock*>::const_iterator I = BBs.begin(), E = BBs.end();
+  for (std::vector<BasicBlock *>::const_iterator I = BBs.begin(), E = BBs.end();
        I != E; ++I) {
     BasicBlock *BB = *I;
     // If the BB doesn't have a name, give it one so we have something to key
     // off of.
-    if (!BB->hasName()) BB->setName("tmpbb");
+    if (!BB->hasName())
+      BB->setName("tmpbb");
     BlocksToNotExtractFile.os() << BB->getParent()->getName() << " "
                                 << BB->getName() << "\n";
   }
diff --git a/contrib/llvm/tools/bugpoint/FindBugs.cpp b/contrib/llvm/tools/bugpoint/FindBugs.cpp
index a0c859b7f33e..156f4d0d78fe 100644
--- a/contrib/llvm/tools/bugpoint/FindBugs.cpp
+++ b/contrib/llvm/tools/bugpoint/FindBugs.cpp
@@ -7,8 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file defines an interface that allows bugpoint to choose different 
-// combinations of optimizations to run on the selected input. Bugpoint will 
+// This file defines an interface that allows bugpoint to choose different
+// combinations of optimizations to run on the selected input. Bugpoint will
 // run these optimizations and record the success/failure of each. This way
 // we can hopefully spot bugs in the optimizations.
 //
@@ -23,92 +23,82 @@
 #include <ctime>
 using namespace llvm;
 
-/// runManyPasses - Take the specified pass list and create different 
-/// combinations of passes to compile the program with. Compile the program with
-/// each set and mark test to see if it compiled correctly. If the passes 
-/// compiled correctly output nothing and rearrange the passes into a new order.
-/// If the passes did not compile correctly, output the command required to 
-/// recreate the failure. This returns true if a compiler error is found.
-///
-bool BugDriver::runManyPasses(const std::vector<std::string> &AllPasses,
-                              std::string &ErrMsg) {
+Error
+BugDriver::runManyPasses(const std::vector<std::string> &AllPasses) {
   setPassesToRun(AllPasses);
   outs() << "Starting bug finding procedure...\n\n";
-  
+
   // Creating a reference output if necessary
-  if (initializeExecutionEnvironment()) return false;
-  
+  if (Error E = initializeExecutionEnvironment())
+    return E;
+
   outs() << "\n";
   if (ReferenceOutputFile.empty()) {
     outs() << "Generating reference output from raw program: \n";
-    if (!createReferenceFile(Program))
-      return false;
+    if (Error E = createReferenceFile(Program))
+      return E;
   }
-  
+
   srand(time(nullptr));
-  
+
   unsigned num = 1;
-  while(1) {  
+  while (1) {
     //
     // Step 1: Randomize the order of the optimizer passes.
     //
     std::random_shuffle(PassesToRun.begin(), PassesToRun.end());
-    
+
     //
     // Step 2: Run optimizer passes on the program and check for success.
     //
     outs() << "Running selected passes on program to test for crash: ";
-    for(int i = 0, e = PassesToRun.size(); i != e; i++) {
+    for (int i = 0, e = PassesToRun.size(); i != e; i++) {
       outs() << "-" << PassesToRun[i] << " ";
     }
-    
+
     std::string Filename;
-    if(runPasses(Program, PassesToRun, Filename, false)) {
+    if (runPasses(Program, PassesToRun, Filename, false)) {
       outs() << "\n";
       outs() << "Optimizer passes caused failure!\n\n";
-      debugOptimizerCrash();
-      return true;
+      return debugOptimizerCrash();
     } else {
       outs() << "Combination " << num << " optimized successfully!\n";
     }
-    
+
     //
     // Step 3: Compile the optimized code.
     //
     outs() << "Running the code generator to test for a crash: ";
-    std::string Error;
-    compileProgram(Program, &Error);
-    if (!Error.empty()) {
+    if (Error E = compileProgram(Program)) {
       outs() << "\n*** compileProgram threw an exception: ";
-      outs() << Error;
-      return debugCodeGeneratorCrash(ErrMsg);
+      outs() << toString(std::move(E));
+      return debugCodeGeneratorCrash();
     }
     outs() << '\n';
-    
+
     //
-    // Step 4: Run the program and compare its output to the reference 
+    // Step 4: Run the program and compare its output to the reference
     // output (created above).
     //
     outs() << "*** Checking if passes caused miscompliation:\n";
-    bool Diff = diffProgram(Program, Filename, "", false, &Error);
-    if (Error.empty() && Diff) {
-      outs() << "\n*** diffProgram returned true!\n";
-      debugMiscompilation(&Error);
-      if (Error.empty())
-        return true;
+    Expected<bool> Diff = diffProgram(Program, Filename, "", false);
+    if (Error E = Diff.takeError()) {
+      errs() << toString(std::move(E));
+      return debugCodeGeneratorCrash();
     }
-    if (!Error.empty()) {
-      errs() << Error;
-      debugCodeGeneratorCrash(ErrMsg);
-      return true;
+    if (*Diff) {
+      outs() << "\n*** diffProgram returned true!\n";
+      Error E = debugMiscompilation();
+      if (!E)
+        return Error::success();
     }
     outs() << "\n*** diff'd output matches!\n";
-    
+
     sys::fs::remove(Filename);
-    
+
     outs() << "\n\n";
     num++;
-  } //end while
-  
+  } // end while
+
   // Unreachable.
 }
diff --git a/contrib/llvm/tools/bugpoint/ListReducer.h b/contrib/llvm/tools/bugpoint/ListReducer.h
index f08bc97a9988..dcfa11d06927 100644
--- a/contrib/llvm/tools/bugpoint/ListReducer.h
+++ b/contrib/llvm/tools/bugpoint/ListReducer.h
@@ -15,44 +15,42 @@
 #ifndef LLVM_TOOLS_BUGPOINT_LISTREDUCER_H
 #define LLVM_TOOLS_BUGPOINT_LISTREDUCER_H
 
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cstdlib>
 #include <vector>
 
 namespace llvm {
-  
-  extern bool BugpointIsInterrupted;
 
-template<typename ElTy>
-struct ListReducer {
+extern bool BugpointIsInterrupted;
+
+template <typename ElTy> struct ListReducer {
   enum TestResult {
-    NoFailure,         // No failure of the predicate was detected
-    KeepSuffix,        // The suffix alone satisfies the predicate
-    KeepPrefix,        // The prefix alone satisfies the predicate
-    InternalError      // Encountered an error trying to run the predicate
+    NoFailure,  // No failure of the predicate was detected
+    KeepSuffix, // The suffix alone satisfies the predicate
+    KeepPrefix  // The prefix alone satisfies the predicate
   };
 
   virtual ~ListReducer() {}
 
-  // doTest - This virtual function should be overriden by subclasses to
-  // implement the test desired.  The testcase is only required to test to see
-  // if the Kept list still satisfies the property, but if it is going to check
-  // the prefix anyway, it can.
-  //
-  virtual TestResult doTest(std::vector<ElTy> &Prefix,
-                            std::vector<ElTy> &Kept,
-                            std::string &Error) = 0;
-
-  // reduceList - This function attempts to reduce the length of the specified
-  // list while still maintaining the "test" property.  This is the core of the
-  // "work" that bugpoint does.
-  //
-  bool reduceList(std::vector<ElTy> &TheList, std::string &Error) {
+  /// This virtual function should be overriden by subclasses to implement the
+  /// test desired.  The testcase is only required to test to see if the Kept
+  /// list still satisfies the property, but if it is going to check the prefix
+  /// anyway, it can.
+  virtual Expected<TestResult> doTest(std::vector<ElTy> &Prefix,
+                                      std::vector<ElTy> &Kept) = 0;
+
+  /// This function attempts to reduce the length of the specified list while
+  /// still maintaining the "test" property.  This is the core of the "work"
+  /// that bugpoint does.
+  Expected<bool> reduceList(std::vector<ElTy> &TheList) {
     std::vector<ElTy> empty;
     std::srand(0x6e5ea738); // Seed the random number generator
-    switch (doTest(TheList, empty, Error)) {
+    Expected<TestResult> Result = doTest(TheList, empty);
+    if (Error E = Result.takeError())
+      return std::move(E);
+    switch (*Result) {
     case KeepPrefix:
       if (TheList.size() == 1) // we are done, it's the base case and it fails
         return true;
@@ -66,10 +64,6 @@ struct ListReducer {
 
     case NoFailure:
       return false; // there is no failure with the full set of passes/funcs!
-
-    case InternalError:
-      assert(!Error.empty());
-      return true;
     }
 
     // Maximal number of allowed splitting iterations,
@@ -77,12 +71,12 @@ struct ListReducer {
     const unsigned MaxIterationsWithoutProgress = 3;
 
     // Maximal number of allowed single-element trim iterations. We add a
-    // threshhold here as single-element reductions may otherwise take a
+    // threshold here as single-element reductions may otherwise take a
     // very long time to complete.
     const unsigned MaxTrimIterationsWithoutBackJump = 3;
     bool ShufflingEnabled = true;
 
-Backjump:
+  Backjump:
     unsigned MidTop = TheList.size();
     unsigned MaxIterations = MaxIterationsWithoutProgress;
     unsigned NumOfIterationsWithoutProgress = 0;
@@ -96,18 +90,23 @@ Backjump:
       // If the loop doesn't make satisfying progress, try shuffling.
       // The purpose of shuffling is to avoid the heavy tails of the
       // distribution (improving the speed of convergence).
-      if (ShufflingEnabled && 
-          NumOfIterationsWithoutProgress > MaxIterations) {
+      if (ShufflingEnabled && NumOfIterationsWithoutProgress > MaxIterations) {
         std::vector<ElTy> ShuffledList(TheList);
         std::random_shuffle(ShuffledList.begin(), ShuffledList.end());
         errs() << "\n\n*** Testing shuffled set...\n\n";
-        // Check that random shuffle doesn't loose the bug
-        if (doTest(ShuffledList, empty, Error) == KeepPrefix) {
+        // Check that random shuffle doesn't lose the bug
+        Expected<TestResult> Result = doTest(ShuffledList, empty);
+        // TODO: Previously, this error was ignored and we treated it as if
+        // shuffling hid the bug. This should really either be consumeError if
+        // that behaviour was sensible, or we should propagate the error.
+        assert(!Result.takeError() && "Shuffling caused internal error?");
+
+        if (*Result == KeepPrefix) {
           // If the bug is still here, use the shuffled list.
           TheList.swap(ShuffledList);
           MidTop = TheList.size();
-          // Must increase the shuffling treshold to avoid the small 
-          // probability of inifinite looping without making progress.
+          // Must increase the shuffling treshold to avoid the small
+          // probability of infinite looping without making progress.
           MaxIterations += 2;
           errs() << "\n\n*** Shuffling does not hide the bug...\n\n";
         } else {
@@ -116,12 +115,15 @@ Backjump:
         }
         NumOfIterationsWithoutProgress = 0;
       }
-      
+
       unsigned Mid = MidTop / 2;
-      std::vector<ElTy> Prefix(TheList.begin(), TheList.begin()+Mid);
-      std::vector<ElTy> Suffix(TheList.begin()+Mid, TheList.end());
+      std::vector<ElTy> Prefix(TheList.begin(), TheList.begin() + Mid);
+      std::vector<ElTy> Suffix(TheList.begin() + Mid, TheList.end());
 
-      switch (doTest(Prefix, Suffix, Error)) {
+      Expected<TestResult> Result = doTest(Prefix, Suffix);
+      if (Error E = Result.takeError())
+        return std::move(E);
+      switch (*Result) {
       case KeepSuffix:
         // The property still holds.  We can just drop the prefix elements, and
         // shorten the list to the "kept" elements.
@@ -145,10 +147,7 @@ Backjump:
         MidTop = Mid;
         NumOfIterationsWithoutProgress++;
         break;
-      case InternalError:
-        return true;  // Error was set by doTest.
       }
-      assert(Error.empty() && "doTest did not return InternalError for error");
     }
 
     // Probability of backjumping from the trimming loop back to the binary
@@ -156,41 +155,43 @@ Backjump:
     const int BackjumpProbability = 10;
 
     // Okay, we trimmed as much off the top and the bottom of the list as we
-    // could.  If there is more than two elements in the list, try deleting 
+    // could.  If there is more than two elements in the list, try deleting
     // interior elements and testing that.
     //
     if (TheList.size() > 2) {
       bool Changed = true;
       std::vector<ElTy> EmptyList;
       unsigned TrimIterations = 0;
-      while (Changed) {  // Trimming loop.
+      while (Changed) { // Trimming loop.
         Changed = false;
-        
+
         // If the binary split reduction loop made an unfortunate sequence of
         // splits, the trimming loop might be left off with a huge number of
         // remaining elements (large search space). Backjumping out of that
-        // search space and attempting a different split can significantly 
+        // search space and attempting a different split can significantly
         // improve the convergence speed.
         if (std::rand() % 100 < BackjumpProbability)
           goto Backjump;
-        
-        for (unsigned i = 1; i < TheList.size()-1; ++i) { // Check interior elts
+
+        for (unsigned i = 1; i < TheList.size() - 1; ++i) {
+          // Check interior elts
           if (BugpointIsInterrupted) {
             errs() << "\n\n*** Reduction Interrupted, cleaning up...\n\n";
             return true;
           }
-          
+
           std::vector<ElTy> TestList(TheList);
-          TestList.erase(TestList.begin()+i);
+          TestList.erase(TestList.begin() + i);
 
-          if (doTest(EmptyList, TestList, Error) == KeepSuffix) {
+          Expected<TestResult> Result = doTest(EmptyList, TestList);
+          if (Error E = Result.takeError())
+            return std::move(E);
+          if (*Result == KeepSuffix) {
             // We can trim down the list!
             TheList.swap(TestList);
-            --i;  // Don't skip an element of the list
+            --i; // Don't skip an element of the list
             Changed = true;
           }
-          if (!Error.empty())
-            return true;
         }
         if (TrimIterations >= MaxTrimIterationsWithoutBackJump)
           break;
diff --git a/contrib/llvm/tools/bugpoint/Miscompilation.cpp b/contrib/llvm/tools/bugpoint/Miscompilation.cpp
index 40955ed3caa9..792fab07bf11 100644
--- a/contrib/llvm/tools/bugpoint/Miscompilation.cpp
+++ b/contrib/llvm/tools/bugpoint/Miscompilation.cpp
@@ -15,7 +15,7 @@
 #include "BugDriver.h"
 #include "ListReducer.h"
 #include "ToolRunner.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
+#include "llvm/Config/config.h" // for HAVE_LINK_R
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Instructions.h"
@@ -30,70 +30,73 @@
 using namespace llvm;
 
 namespace llvm {
-  extern cl::opt<std::string> OutputPrefix;
-  extern cl::list<std::string> InputArgv;
+extern cl::opt<std::string> OutputPrefix;
+extern cl::list<std::string> InputArgv;
 } // end namespace llvm
 
 namespace {
-  static llvm::cl::opt<bool>
-    DisableLoopExtraction("disable-loop-extraction",
-        cl::desc("Don't extract loops when searching for miscompilations"),
-        cl::init(false));
-  static llvm::cl::opt<bool>
-    DisableBlockExtraction("disable-block-extraction",
-        cl::desc("Don't extract blocks when searching for miscompilations"),
-        cl::init(false));
-
-  class ReduceMiscompilingPasses : public ListReducer<std::string> {
-    BugDriver &BD;
-  public:
-    ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
-
-    TestResult doTest(std::vector<std::string> &Prefix,
-                      std::vector<std::string> &Suffix,
-                      std::string &Error) override;
-  };
+static llvm::cl::opt<bool> DisableLoopExtraction(
+    "disable-loop-extraction",
+    cl::desc("Don't extract loops when searching for miscompilations"),
+    cl::init(false));
+static llvm::cl::opt<bool> DisableBlockExtraction(
+    "disable-block-extraction",
+    cl::desc("Don't extract blocks when searching for miscompilations"),
+    cl::init(false));
+
+class ReduceMiscompilingPasses : public ListReducer<std::string> {
+  BugDriver &BD;
+
+public:
+  ReduceMiscompilingPasses(BugDriver &bd) : BD(bd) {}
+
+  Expected<TestResult> doTest(std::vector<std::string> &Prefix,
+                              std::vector<std::string> &Suffix) override;
+};
 } // end anonymous namespace
 
 /// TestResult - After passes have been split into a test group and a control
 /// group, see if they still break the program.
 ///
-ReduceMiscompilingPasses::TestResult
+Expected<ReduceMiscompilingPasses::TestResult>
 ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
-                                 std::vector<std::string> &Suffix,
-                                 std::string &Error) {
+                                 std::vector<std::string> &Suffix) {
   // First, run the program with just the Suffix passes.  If it is still broken
   // with JUST the kept passes, discard the prefix passes.
   outs() << "Checking to see if '" << getPassesString(Suffix)
          << "' compiles correctly: ";
 
   std::string BitcodeResult;
-  if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false/*delete*/,
-                   true/*quiet*/)) {
+  if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false /*delete*/,
+                   true /*quiet*/)) {
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
     BD.setPassesToRun(Suffix);
-    BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
-    exit(BD.debugOptimizerCrash());
+    BD.EmitProgressBitcode(BD.getProgram(), "pass-error", false);
+    // TODO: This should propagate the error instead of exiting.
+    if (Error E = BD.debugOptimizerCrash())
+      exit(1);
+    exit(0);
   }
 
   // Check to see if the finished program matches the reference output...
-  bool Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
-                             true /*delete bitcode*/, &Error);
-  if (!Error.empty())
-    return InternalError;
-  if (Diff) {
+  Expected<bool> Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
+                                       true /*delete bitcode*/);
+  if (Error E = Diff.takeError())
+    return std::move(E);
+  if (*Diff) {
     outs() << " nope.\n";
     if (Suffix.empty()) {
       errs() << BD.getToolName() << ": I'm confused: the test fails when "
              << "no passes are run, nondeterministic program?\n";
       exit(1);
     }
-    return KeepSuffix;         // Miscompilation detected!
+    return KeepSuffix; // Miscompilation detected!
   }
-  outs() << " yup.\n";      // No miscompilation!
+  outs() << " yup.\n"; // No miscompilation!
 
-  if (Prefix.empty()) return NoFailure;
+  if (Prefix.empty())
+    return NoFailure;
 
   // Next, see if the program is broken if we run the "prefix" passes first,
   // then separately run the "kept" passes.
@@ -106,25 +109,28 @@ ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
   // kept passes, we can update our bitcode file to include the result of the
   // prefix passes, then discard the prefix passes.
   //
-  if (BD.runPasses(BD.getProgram(), Prefix, BitcodeResult, false/*delete*/,
-                   true/*quiet*/)) {
+  if (BD.runPasses(BD.getProgram(), Prefix, BitcodeResult, false /*delete*/,
+                   true /*quiet*/)) {
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
     BD.setPassesToRun(Prefix);
-    BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
-    exit(BD.debugOptimizerCrash());
+    BD.EmitProgressBitcode(BD.getProgram(), "pass-error", false);
+    // TODO: This should propagate the error instead of exiting.
+    if (Error E = BD.debugOptimizerCrash())
+      exit(1);
+    exit(0);
   }
 
   // If the prefix maintains the predicate by itself, only keep the prefix!
-  Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "", false, &Error);
-  if (!Error.empty())
-    return InternalError;
-  if (Diff) {
+  Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "", false);
+  if (Error E = Diff.takeError())
+    return std::move(E);
+  if (*Diff) {
     outs() << " nope.\n";
     sys::fs::remove(BitcodeResult);
     return KeepPrefix;
   }
-  outs() << " yup.\n";      // No miscompilation!
+  outs() << " yup.\n"; // No miscompilation!
 
   // Ok, so now we know that the prefix passes work, try running the suffix
   // passes on the result of the prefix passes.
@@ -143,72 +149,74 @@ ReduceMiscompilingPasses::doTest(std::vector<std::string> &Prefix,
     return NoFailure;
 
   outs() << "Checking to see if '" << getPassesString(Suffix)
-            << "' passes compile correctly after the '"
-            << getPassesString(Prefix) << "' passes: ";
+         << "' passes compile correctly after the '" << getPassesString(Prefix)
+         << "' passes: ";
 
   std::unique_ptr<Module> OriginalInput(
       BD.swapProgramIn(PrefixOutput.release()));
-  if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false/*delete*/,
-                   true/*quiet*/)) {
+  if (BD.runPasses(BD.getProgram(), Suffix, BitcodeResult, false /*delete*/,
+                   true /*quiet*/)) {
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
     BD.setPassesToRun(Suffix);
-    BD.EmitProgressBitcode(BD.getProgram(), "pass-error",  false);
-    exit(BD.debugOptimizerCrash());
+    BD.EmitProgressBitcode(BD.getProgram(), "pass-error", false);
+    // TODO: This should propagate the error instead of exiting.
+    if (Error E = BD.debugOptimizerCrash())
+      exit(1);
+    exit(0);
   }
 
   // Run the result...
   Diff = BD.diffProgram(BD.getProgram(), BitcodeResult, "",
-                        true /*delete bitcode*/, &Error);
-  if (!Error.empty())
-    return InternalError;
-  if (Diff) {
+                        true /*delete bitcode*/);
+  if (Error E = Diff.takeError())
+    return std::move(E);
+  if (*Diff) {
     outs() << " nope.\n";
     return KeepSuffix;
   }
 
   // Otherwise, we must not be running the bad pass anymore.
-  outs() << " yup.\n";      // No miscompilation!
+  outs() << " yup.\n"; // No miscompilation!
   // Restore orig program & free test.
   delete BD.swapProgramIn(OriginalInput.release());
   return NoFailure;
 }
 
 namespace {
-  class ReduceMiscompilingFunctions : public ListReducer<Function*> {
-    BugDriver &BD;
-    bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
-                   std::unique_ptr<Module>, std::string &);
-
-  public:
-    ReduceMiscompilingFunctions(BugDriver &bd,
-                                bool (*F)(BugDriver &, std::unique_ptr<Module>,
-                                          std::unique_ptr<Module>,
-                                          std::string &))
-        : BD(bd), TestFn(F) {}
-
-    TestResult doTest(std::vector<Function*> &Prefix,
-                      std::vector<Function*> &Suffix,
-                      std::string &Error) override {
-      if (!Suffix.empty()) {
-        bool Ret = TestFuncs(Suffix, Error);
-        if (!Error.empty())
-          return InternalError;
-        if (Ret)
-          return KeepSuffix;
-      }
-      if (!Prefix.empty()) {
-        bool Ret = TestFuncs(Prefix, Error);
-        if (!Error.empty())
-          return InternalError;
-        if (Ret)
-          return KeepPrefix;
-      }
-      return NoFailure;
+class ReduceMiscompilingFunctions : public ListReducer<Function *> {
+  BugDriver &BD;
+  Expected<bool> (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                           std::unique_ptr<Module>);
+
+public:
+  ReduceMiscompilingFunctions(BugDriver &bd,
+                              Expected<bool> (*F)(BugDriver &,
+                                                  std::unique_ptr<Module>,
+                                                  std::unique_ptr<Module>))
+      : BD(bd), TestFn(F) {}
+
+  Expected<TestResult> doTest(std::vector<Function *> &Prefix,
+                              std::vector<Function *> &Suffix) override {
+    if (!Suffix.empty()) {
+      Expected<bool> Ret = TestFuncs(Suffix);
+      if (Error E = Ret.takeError())
+        return std::move(E);
+      if (*Ret)
+        return KeepSuffix;
+    }
+    if (!Prefix.empty()) {
+      Expected<bool> Ret = TestFuncs(Prefix);
+      if (Error E = Ret.takeError())
+        return std::move(E);
+      if (*Ret)
+        return KeepPrefix;
     }
+    return NoFailure;
+  }
 
-    bool TestFuncs(const std::vector<Function*> &Prefix, std::string &Error);
-  };
+  Expected<bool> TestFuncs(const std::vector<Function *> &Prefix);
+};
 } // end anonymous namespace
 
 /// Given two modules, link them together and run the program, checking to see
@@ -217,31 +225,31 @@ namespace {
 /// output is different. If the DeleteInputs argument is set to true then this
 /// function deletes both input modules before it returns.
 ///
-static std::unique_ptr<Module> testMergedProgram(const BugDriver &BD,
-                                                 std::unique_ptr<Module> M1,
-                                                 std::unique_ptr<Module> M2,
-                                                 std::string &Error,
-                                                 bool &Broken) {
+static Expected<std::unique_ptr<Module>>
+testMergedProgram(const BugDriver &BD, std::unique_ptr<Module> M1,
+                  std::unique_ptr<Module> M2, bool &Broken) {
   if (Linker::linkModules(*M1, std::move(M2)))
+    // TODO: Shouldn't we thread the error up instead of exiting?
     exit(1);
 
   // Execute the program.
-  Broken = BD.diffProgram(M1.get(), "", "", false, &Error);
-  if (!Error.empty())
-    return nullptr;
-  return M1;
+  Expected<bool> Diff = BD.diffProgram(M1.get(), "", "", false);
+  if (Error E = Diff.takeError())
+    return std::move(E);
+  Broken = *Diff;
+  return std::move(M1);
 }
 
 /// TestFuncs - split functions in a Module into two groups: those that are
 /// under consideration for miscompilation vs. those that are not, and test
 /// accordingly. Each group of functions becomes a separate Module.
 ///
-bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
-                                            std::string &Error) {
+Expected<bool>
+ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function *> &Funcs) {
   // Test to see if the function is misoptimized if we ONLY run it on the
   // functions listed in Funcs.
   outs() << "Checking to see if the program is misoptimized when "
-         << (Funcs.size()==1 ? "this function is" : "these functions are")
+         << (Funcs.size() == 1 ? "this function is" : "these functions are")
          << " run through the pass"
          << (BD.getPassesToRun().size() == 1 ? "" : "es") << ":";
   PrintFunctionList(Funcs);
@@ -258,7 +266,7 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
   Module *Clone = CloneModule(BD.getProgram(), VMap).release();
   Module *Orig = BD.swapProgramIn(Clone);
 
-  std::vector<Function*> FuncsOnClone;
+  std::vector<Function *> FuncsOnClone;
   for (unsigned i = 0, e = Funcs.size(); i != e; ++i) {
     Function *F = cast<Function>(VMap[Funcs[i]]);
     FuncsOnClone.push_back(F);
@@ -270,8 +278,8 @@ bool ReduceMiscompilingFunctions::TestFuncs(const std::vector<Function*> &Funcs,
   std::unique_ptr<Module> ToOptimize =
       SplitFunctionsOutOfModule(ToNotOptimize.get(), FuncsOnClone, VMap);
 
-  bool Broken =
-      TestFn(BD, std::move(ToOptimize), std::move(ToNotOptimize), Error);
+  Expected<bool> Broken =
+      TestFn(BD, std::move(ToOptimize), std::move(ToNotOptimize));
 
   delete BD.swapProgramIn(Orig);
 
@@ -294,14 +302,15 @@ static void DisambiguateGlobalSymbols(Module *M) {
 /// if we can extract the loops in the region without obscuring the bug.  If so,
 /// it reduces the amount of code identified.
 ///
-static bool ExtractLoops(BugDriver &BD,
-                         bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
-                                        std::unique_ptr<Module>, std::string &),
-                         std::vector<Function *> &MiscompiledFunctions,
-                         std::string &Error) {
+static Expected<bool>
+ExtractLoops(BugDriver &BD,
+             Expected<bool> (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                                      std::unique_ptr<Module>),
+             std::vector<Function *> &MiscompiledFunctions) {
   bool MadeChange = false;
   while (1) {
-    if (BugpointIsInterrupted) return MadeChange;
+    if (BugpointIsInterrupted)
+      return MadeChange;
 
     ValueToValueMapTy VMap;
     std::unique_ptr<Module> ToNotOptimize = CloneModule(BD.getProgram(), VMap);
@@ -326,14 +335,16 @@ static bool ExtractLoops(BugDriver &BD,
     // extraction.
     AbstractInterpreter *AI = BD.switchToSafeInterpreter();
     bool Failure;
-    std::unique_ptr<Module> New =
+    Expected<std::unique_ptr<Module>> New =
         testMergedProgram(BD, std::move(ToOptimizeLoopExtracted),
-                          std::move(ToNotOptimize), Error, Failure);
-    if (!New)
+                          std::move(ToNotOptimize), Failure);
+    if (Error E = New.takeError())
+      return std::move(E);
+    if (!*New)
       return false;
 
     // Delete the original and set the new program.
-    Module *Old = BD.swapProgramIn(New.release());
+    Module *Old = BD.swapProgramIn(New->release());
     for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
       MiscompiledFunctions[i] = cast<Function>(VMap[MiscompiledFunctions[i]]);
     delete Old;
@@ -353,8 +364,8 @@ static bool ExtractLoops(BugDriver &BD,
       BD.writeProgramToFile(OutputPrefix + "-loop-extract-fail-to-le.bc",
                             ToOptimizeLoopExtracted.get());
 
-      errs() << "Please submit the "
-             << OutputPrefix << "-loop-extract-fail-*.bc files.\n";
+      errs() << "Please submit the " << OutputPrefix
+             << "-loop-extract-fail-*.bc files.\n";
       delete ToOptimize;
       return MadeChange;
     }
@@ -370,20 +381,20 @@ static bool ExtractLoops(BugDriver &BD,
     for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
       MiscompiledFunctions[i] = cast<Function>(VMap[MiscompiledFunctions[i]]);
 
-    Failure = TestFn(BD, std::move(ToOptimizeLoopExtracted),
-                     std::move(ToNotOptimize), Error);
-    if (!Error.empty())
-      return false;
+    Expected<bool> Result = TestFn(BD, std::move(ToOptimizeLoopExtracted),
+                                   std::move(ToNotOptimize));
+    if (Error E = Result.takeError())
+      return std::move(E);
 
     ToOptimizeLoopExtracted = std::move(TOLEBackup);
     ToNotOptimize = std::move(TNOBackup);
 
-    if (!Failure) {
+    if (!*Result) {
       outs() << "*** Loop extraction masked the problem.  Undoing.\n";
       // If the program is not still broken, then loop extraction did something
       // that masked the error.  Stop loop extraction now.
 
-      std::vector<std::pair<std::string, FunctionType*> > MisCompFunctions;
+      std::vector<std::pair<std::string, FunctionType *>> MisCompFunctions;
       for (Function *F : MiscompiledFunctions) {
         MisCompFunctions.emplace_back(F->getName(), F->getFunctionType());
       }
@@ -406,9 +417,10 @@ static bool ExtractLoops(BugDriver &BD,
 
     outs() << "*** Loop extraction successful!\n";
 
-    std::vector<std::pair<std::string, FunctionType*> > MisCompFunctions;
+    std::vector<std::pair<std::string, FunctionType *>> MisCompFunctions;
     for (Module::iterator I = ToOptimizeLoopExtracted->begin(),
-           E = ToOptimizeLoopExtracted->end(); I != E; ++I)
+                          E = ToOptimizeLoopExtracted->end();
+         I != E; ++I)
       if (!I->isDeclaration())
         MisCompFunctions.emplace_back(I->getName(), I->getFunctionType());
 
@@ -436,47 +448,48 @@ static bool ExtractLoops(BugDriver &BD,
 }
 
 namespace {
-  class ReduceMiscompiledBlocks : public ListReducer<BasicBlock*> {
-    BugDriver &BD;
-    bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
-                   std::unique_ptr<Module>, std::string &);
-    std::vector<Function*> FunctionsBeingTested;
-  public:
-    ReduceMiscompiledBlocks(BugDriver &bd,
-                            bool (*F)(BugDriver &, std::unique_ptr<Module>,
-                                      std::unique_ptr<Module>, std::string &),
-                            const std::vector<Function *> &Fns)
-        : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
-
-    TestResult doTest(std::vector<BasicBlock*> &Prefix,
-                      std::vector<BasicBlock*> &Suffix,
-                      std::string &Error) override {
-      if (!Suffix.empty()) {
-        bool Ret = TestFuncs(Suffix, Error);
-        if (!Error.empty())
-          return InternalError;
-        if (Ret)
-          return KeepSuffix;
-      }
-      if (!Prefix.empty()) {
-        bool Ret = TestFuncs(Prefix, Error);
-        if (!Error.empty())
-          return InternalError;
-        if (Ret)
-          return KeepPrefix;
-      }
-      return NoFailure;
+class ReduceMiscompiledBlocks : public ListReducer<BasicBlock *> {
+  BugDriver &BD;
+  Expected<bool> (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                           std::unique_ptr<Module>);
+  std::vector<Function *> FunctionsBeingTested;
+
+public:
+  ReduceMiscompiledBlocks(BugDriver &bd,
+                          Expected<bool> (*F)(BugDriver &,
+                                              std::unique_ptr<Module>,
+                                              std::unique_ptr<Module>),
+                          const std::vector<Function *> &Fns)
+      : BD(bd), TestFn(F), FunctionsBeingTested(Fns) {}
+
+  Expected<TestResult> doTest(std::vector<BasicBlock *> &Prefix,
+                              std::vector<BasicBlock *> &Suffix) override {
+    if (!Suffix.empty()) {
+      Expected<bool> Ret = TestFuncs(Suffix);
+      if (Error E = Ret.takeError())
+        return std::move(E);
+      if (*Ret)
+        return KeepSuffix;
     }
+    if (!Prefix.empty()) {
+      Expected<bool> Ret = TestFuncs(Prefix);
+      if (Error E = Ret.takeError())
+        return std::move(E);
+      if (*Ret)
+        return KeepPrefix;
+    }
+    return NoFailure;
+  }
 
-    bool TestFuncs(const std::vector<BasicBlock*> &BBs, std::string &Error);
-  };
+  Expected<bool> TestFuncs(const std::vector<BasicBlock *> &BBs);
+};
 } // end anonymous namespace
 
 /// TestFuncs - Extract all blocks for the miscompiled functions except for the
 /// specified blocks.  If the problem still exists, return true.
 ///
-bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
-                                        std::string &Error) {
+Expected<bool>
+ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock *> &BBs) {
   // Test to see if the function is misoptimized if we ONLY run it on the
   // functions listed in Funcs.
   outs() << "Checking to see if the program is misoptimized when all ";
@@ -484,7 +497,8 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
     outs() << "but these " << BBs.size() << " blocks are extracted: ";
     for (unsigned i = 0, e = BBs.size() < 10 ? BBs.size() : 10; i != e; ++i)
       outs() << BBs[i]->getName() << " ";
-    if (BBs.size() > 10) outs() << "...";
+    if (BBs.size() > 10)
+      outs() << "...";
   } else {
     outs() << "blocks are extracted.";
   }
@@ -494,8 +508,8 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
   ValueToValueMapTy VMap;
   Module *Clone = CloneModule(BD.getProgram(), VMap).release();
   Module *Orig = BD.swapProgramIn(Clone);
-  std::vector<Function*> FuncsOnClone;
-  std::vector<BasicBlock*> BBsOnClone;
+  std::vector<Function *> FuncsOnClone;
+  std::vector<BasicBlock *> BBsOnClone;
   for (unsigned i = 0, e = FunctionsBeingTested.size(); i != e; ++i) {
     Function *F = cast<Function>(VMap[FunctionsBeingTested[i]]);
     FuncsOnClone.push_back(F);
@@ -514,7 +528,7 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
   // or something, in which case bugpoint can't chase down this possibility.
   if (std::unique_ptr<Module> New =
           BD.extractMappedBlocksFromModule(BBsOnClone, ToOptimize.get())) {
-    bool Ret = TestFn(BD, std::move(New), std::move(ToNotOptimize), Error);
+    Expected<bool> Ret = TestFn(BD, std::move(New), std::move(ToNotOptimize));
     delete BD.swapProgramIn(Orig);
     return Ret;
   }
@@ -525,15 +539,15 @@ bool ReduceMiscompiledBlocks::TestFuncs(const std::vector<BasicBlock*> &BBs,
 /// Given a reduced list of functions that still expose the bug, extract as many
 /// basic blocks from the region as possible without obscuring the bug.
 ///
-static bool ExtractBlocks(BugDriver &BD,
-                          bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
-                                         std::unique_ptr<Module>,
-                                         std::string &),
-                          std::vector<Function *> &MiscompiledFunctions,
-                          std::string &Error) {
-  if (BugpointIsInterrupted) return false;
-
-  std::vector<BasicBlock*> Blocks;
+static Expected<bool>
+ExtractBlocks(BugDriver &BD,
+              Expected<bool> (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                                       std::unique_ptr<Module>),
+              std::vector<Function *> &MiscompiledFunctions) {
+  if (BugpointIsInterrupted)
+    return false;
+
+  std::vector<BasicBlock *> Blocks;
   for (unsigned i = 0, e = MiscompiledFunctions.size(); i != e; ++i)
     for (BasicBlock &BB : *MiscompiledFunctions[i])
       Blocks.push_back(&BB);
@@ -544,17 +558,18 @@ static bool ExtractBlocks(BugDriver &BD,
   unsigned OldSize = Blocks.size();
 
   // Check to see if all blocks are extractible first.
-  bool Ret = ReduceMiscompiledBlocks(BD, TestFn, MiscompiledFunctions)
-                                  .TestFuncs(std::vector<BasicBlock*>(), Error);
-  if (!Error.empty())
-    return false;
-  if (Ret) {
+  Expected<bool> Ret = ReduceMiscompiledBlocks(BD, TestFn, MiscompiledFunctions)
+                           .TestFuncs(std::vector<BasicBlock *>());
+  if (Error E = Ret.takeError())
+    return std::move(E);
+  if (*Ret) {
     Blocks.clear();
   } else {
-    ReduceMiscompiledBlocks(BD, TestFn,
-                            MiscompiledFunctions).reduceList(Blocks, Error);
-    if (!Error.empty())
-      return false;
+    Expected<bool> Ret =
+        ReduceMiscompiledBlocks(BD, TestFn, MiscompiledFunctions)
+            .reduceList(Blocks);
+    if (Error E = Ret.takeError())
+      return std::move(E);
     if (Blocks.size() == OldSize)
       return false;
   }
@@ -578,9 +593,9 @@ static bool ExtractBlocks(BugDriver &BD,
   // together.
   delete ToExtract;
 
-  std::vector<std::pair<std::string, FunctionType*> > MisCompFunctions;
-  for (Module::iterator I = Extracted->begin(), E = Extracted->end();
-       I != E; ++I)
+  std::vector<std::pair<std::string, FunctionType *>> MisCompFunctions;
+  for (Module::iterator I = Extracted->begin(), E = Extracted->end(); I != E;
+       ++I)
     if (!I->isDeclaration())
       MisCompFunctions.emplace_back(I->getName(), I->getFunctionType());
 
@@ -605,28 +620,28 @@ static bool ExtractBlocks(BugDriver &BD,
 /// This is a generic driver to narrow down miscompilations, either in an
 /// optimization or a code generator.
 ///
-static std::vector<Function *>
-DebugAMiscompilation(BugDriver &BD,
-                     bool (*TestFn)(BugDriver &, std::unique_ptr<Module>,
-                                    std::unique_ptr<Module>, std::string &),
-                     std::string &Error) {
+static Expected<std::vector<Function *>> DebugAMiscompilation(
+    BugDriver &BD,
+    Expected<bool> (*TestFn)(BugDriver &, std::unique_ptr<Module>,
+                             std::unique_ptr<Module>)) {
   // Okay, now that we have reduced the list of passes which are causing the
   // failure, see if we can pin down which functions are being
   // miscompiled... first build a list of all of the non-external functions in
   // the program.
-  std::vector<Function*> MiscompiledFunctions;
+  std::vector<Function *> MiscompiledFunctions;
   Module *Prog = BD.getProgram();
   for (Function &F : *Prog)
     if (!F.isDeclaration())
       MiscompiledFunctions.push_back(&F);
 
   // Do the reduction...
-  if (!BugpointIsInterrupted)
-    ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
-                                                       Error);
-  if (!Error.empty()) {
-    errs() << "\n***Cannot reduce functions: ";
-    return MiscompiledFunctions;
+  if (!BugpointIsInterrupted) {
+    Expected<bool> Ret = ReduceMiscompilingFunctions(BD, TestFn)
+                             .reduceList(MiscompiledFunctions);
+    if (Error E = Ret.takeError()) {
+      errs() << "\n***Cannot reduce functions: ";
+      return std::move(E);
+    }
   }
   outs() << "\n*** The following function"
          << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
@@ -638,20 +653,20 @@ DebugAMiscompilation(BugDriver &BD,
   // trigger the problem.
 
   if (!BugpointIsInterrupted && !DisableLoopExtraction) {
-    bool Ret = ExtractLoops(BD, TestFn, MiscompiledFunctions, Error);
-    if (!Error.empty())
-      return MiscompiledFunctions;
-    if (Ret) {
+    Expected<bool> Ret = ExtractLoops(BD, TestFn, MiscompiledFunctions);
+    if (Error E = Ret.takeError())
+      return std::move(E);
+    if (*Ret) {
       // Okay, we extracted some loops and the problem still appears.  See if
       // we can eliminate some of the created functions from being candidates.
       DisambiguateGlobalSymbols(BD.getProgram());
 
       // Do the reduction...
       if (!BugpointIsInterrupted)
-        ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
-                                                           Error);
-      if (!Error.empty())
-        return MiscompiledFunctions;
+        Ret = ReduceMiscompilingFunctions(BD, TestFn)
+                  .reduceList(MiscompiledFunctions);
+      if (Error E = Ret.takeError())
+        return std::move(E);
 
       outs() << "\n*** The following function"
              << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
@@ -662,19 +677,19 @@ DebugAMiscompilation(BugDriver &BD,
   }
 
   if (!BugpointIsInterrupted && !DisableBlockExtraction) {
-    bool Ret = ExtractBlocks(BD, TestFn, MiscompiledFunctions, Error);
-    if (!Error.empty())
-      return MiscompiledFunctions;
-    if (Ret) {
+    Expected<bool> Ret = ExtractBlocks(BD, TestFn, MiscompiledFunctions);
+    if (Error E = Ret.takeError())
+      return std::move(E);
+    if (*Ret) {
       // Okay, we extracted some blocks and the problem still appears.  See if
       // we can eliminate some of the created functions from being candidates.
       DisambiguateGlobalSymbols(BD.getProgram());
 
       // Do the reduction...
-      ReduceMiscompilingFunctions(BD, TestFn).reduceList(MiscompiledFunctions,
-                                                         Error);
-      if (!Error.empty())
-        return MiscompiledFunctions;
+      Ret = ReduceMiscompilingFunctions(BD, TestFn)
+                .reduceList(MiscompiledFunctions);
+      if (Error E = Ret.takeError())
+        return std::move(E);
 
       outs() << "\n*** The following function"
              << (MiscompiledFunctions.size() == 1 ? " is" : "s are")
@@ -691,8 +706,8 @@ DebugAMiscompilation(BugDriver &BD,
 /// the program is misoptimized.  If so, return true.  In any case, both module
 /// arguments are deleted.
 ///
-static bool TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
-                          std::unique_ptr<Module> Safe, std::string &Error) {
+static Expected<bool> TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
+                                    std::unique_ptr<Module> Safe) {
   // Run the optimization passes on ToOptimize, producing a transformed version
   // of the functions being tested.
   outs() << "  Optimizing functions being tested: ";
@@ -702,16 +717,20 @@ static bool TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
     errs() << " Error running this sequence of passes"
            << " on the input program!\n";
     delete BD.swapProgramIn(Test.get());
-    BD.EmitProgressBitcode(Test.get(), "pass-error",  false);
-    return BD.debugOptimizerCrash();
+    BD.EmitProgressBitcode(Test.get(), "pass-error", false);
+    if (Error E = BD.debugOptimizerCrash())
+      return std::move(E);
+    return false;
   }
   outs() << "done.\n";
 
   outs() << "  Checking to see if the merged program executes correctly: ";
   bool Broken;
-  std::unique_ptr<Module> New = testMergedProgram(
-      BD, std::move(Optimized), std::move(Safe), Error, Broken);
-  if (New) {
+  auto Result =
+      testMergedProgram(BD, std::move(Optimized), std::move(Safe), Broken);
+  if (Error E = Result.takeError())
+    return std::move(E);
+  if (auto New = std::move(*Result)) {
     outs() << (Broken ? " nope.\n" : " yup.\n");
     // Delete the original and set the new program.
     delete BD.swapProgramIn(New.release());
@@ -723,41 +742,47 @@ static bool TestOptimizer(BugDriver &BD, std::unique_ptr<Module> Test,
 /// crashing, but the generated output is semantically different from the
 /// input.
 ///
-void BugDriver::debugMiscompilation(std::string *Error) {
+Error BugDriver::debugMiscompilation() {
   // Make sure something was miscompiled...
-  if (!BugpointIsInterrupted)
-    if (!ReduceMiscompilingPasses(*this).reduceList(PassesToRun, *Error)) {
-      if (Error->empty())
-        errs() << "*** Optimized program matches reference output!  No problem"
-               << " detected...\nbugpoint can't help you with your problem!\n";
-      return;
-    }
+  if (!BugpointIsInterrupted) {
+    Expected<bool> Result =
+        ReduceMiscompilingPasses(*this).reduceList(PassesToRun);
+    if (Error E = Result.takeError())
+      return E;
+    if (!*Result)
+      return make_error<StringError>(
+          "*** Optimized program matches reference output!  No problem"
+          " detected...\nbugpoint can't help you with your problem!\n",
+          inconvertibleErrorCode());
+  }
 
   outs() << "\n*** Found miscompiling pass"
          << (getPassesToRun().size() == 1 ? "" : "es") << ": "
          << getPassesString(getPassesToRun()) << '\n';
   EmitProgressBitcode(Program, "passinput");
 
-  std::vector<Function *> MiscompiledFunctions =
-    DebugAMiscompilation(*this, TestOptimizer, *Error);
-  if (!Error->empty())
-    return;
+  Expected<std::vector<Function *>> MiscompiledFunctions =
+      DebugAMiscompilation(*this, TestOptimizer);
+  if (Error E = MiscompiledFunctions.takeError())
+    return E;
 
   // Output a bunch of bitcode files for the user...
   outs() << "Outputting reduced bitcode files which expose the problem:\n";
   ValueToValueMapTy VMap;
   Module *ToNotOptimize = CloneModule(getProgram(), VMap).release();
   Module *ToOptimize =
-      SplitFunctionsOutOfModule(ToNotOptimize, MiscompiledFunctions, VMap)
+      SplitFunctionsOutOfModule(ToNotOptimize, *MiscompiledFunctions, VMap)
           .release();
 
   outs() << "  Non-optimized portion: ";
   EmitProgressBitcode(ToNotOptimize, "tonotoptimize", true);
-  delete ToNotOptimize;  // Delete hacked module.
+  delete ToNotOptimize; // Delete hacked module.
 
   outs() << "  Portion that is input to optimizer: ";
   EmitProgressBitcode(ToOptimize, "tooptimize");
-  delete ToOptimize;      // Delete hacked module.
+  delete ToOptimize; // Delete hacked module.
+
+  return Error::success();
 }
 
 /// Get the specified modules ready for code generator testing.
@@ -769,7 +794,8 @@ static void CleanupAndPrepareModules(BugDriver &BD,
   Test = BD.performFinalCleanups(Test.get());
 
   // If we are executing the JIT, we have several nasty issues to take care of.
-  if (!BD.isExecutingJIT()) return;
+  if (!BD.isExecutingJIT())
+    return;
 
   // First, if the main function is in the Safe module, we must add a stub to
   // the Test module to call into it.  Thus, we create a new function `main'
@@ -788,11 +814,12 @@ static void CleanupAndPrepareModules(BugDriver &BD,
                                                 GlobalValue::ExternalLinkage,
                                                 oldMain->getName(), Test.get());
       // Set up and remember the argument list for the main function.
-      std::vector<Value*> args;
-      for (Function::arg_iterator
-             I = newMain->arg_begin(), E = newMain->arg_end(),
-             OI = oldMain->arg_begin(); I != E; ++I, ++OI) {
-        I->setName(OI->getName());    // Copy argument names from oldMain
+      std::vector<Value *> args;
+      for (Function::arg_iterator I = newMain->arg_begin(),
+                                  E = newMain->arg_end(),
+                                  OI = oldMain->arg_begin();
+           I != E; ++I, ++OI) {
+        I->setName(OI->getName()); // Copy argument names from oldMain
         args.push_back(&*I);
       }
 
@@ -811,11 +838,9 @@ static void CleanupAndPrepareModules(BugDriver &BD,
 
   // Add the resolver to the Safe module.
   // Prototype: void *getPointerToNamedFunction(const char* Name)
-  Constant *resolverFunc =
-    Safe->getOrInsertFunction("getPointerToNamedFunction",
-                    Type::getInt8PtrTy(Safe->getContext()),
-                    Type::getInt8PtrTy(Safe->getContext()),
-                       (Type *)nullptr);
+  Constant *resolverFunc = Safe->getOrInsertFunction(
+      "getPointerToNamedFunction", Type::getInt8PtrTy(Safe->getContext()),
+      Type::getInt8PtrTy(Safe->getContext()), (Type *)nullptr);
 
   // Use the function we just added to get addresses of functions we need.
   for (Module::iterator F = Safe->begin(), E = Safe->end(); F != E; ++F) {
@@ -827,21 +852,20 @@ static void CleanupAndPrepareModules(BugDriver &BD,
       if (TestFn && !TestFn->isDeclaration()) {
         // 1. Add a string constant with its name to the global file
         Constant *InitArray =
-          ConstantDataArray::getString(F->getContext(), F->getName());
-        GlobalVariable *funcName =
-          new GlobalVariable(*Safe, InitArray->getType(), true /*isConstant*/,
-                             GlobalValue::InternalLinkage, InitArray,
-                             F->getName() + "_name");
+            ConstantDataArray::getString(F->getContext(), F->getName());
+        GlobalVariable *funcName = new GlobalVariable(
+            *Safe, InitArray->getType(), true /*isConstant*/,
+            GlobalValue::InternalLinkage, InitArray, F->getName() + "_name");
 
         // 2. Use `GetElementPtr *funcName, 0, 0' to convert the string to an
         // sbyte* so it matches the signature of the resolver function.
 
         // GetElementPtr *funcName, ulong 0, ulong 0
-        std::vector<Constant*> GEPargs(2,
-                     Constant::getNullValue(Type::getInt32Ty(F->getContext())));
+        std::vector<Constant *> GEPargs(
+            2, Constant::getNullValue(Type::getInt32Ty(F->getContext())));
         Value *GEP = ConstantExpr::getGetElementPtr(InitArray->getType(),
                                                     funcName, GEPargs);
-        std::vector<Value*> ResolverArgs;
+        std::vector<Value *> ResolverArgs;
         ResolverArgs.push_back(GEP);
 
         // Rewrite uses of F in global initializers, etc. to uses of a wrapper
@@ -849,23 +873,21 @@ static void CleanupAndPrepareModules(BugDriver &BD,
         if (!F->use_empty()) {
           // Create a new global to hold the cached function pointer.
           Constant *NullPtr = ConstantPointerNull::get(F->getType());
-          GlobalVariable *Cache =
-            new GlobalVariable(*F->getParent(), F->getType(),
-                               false, GlobalValue::InternalLinkage,
-                               NullPtr,F->getName()+".fpcache");
+          GlobalVariable *Cache = new GlobalVariable(
+              *F->getParent(), F->getType(), false,
+              GlobalValue::InternalLinkage, NullPtr, F->getName() + ".fpcache");
 
           // Construct a new stub function that will re-route calls to F
           FunctionType *FuncTy = F->getFunctionType();
-          Function *FuncWrapper = Function::Create(FuncTy,
-                                                   GlobalValue::InternalLinkage,
-                                                   F->getName() + "_wrapper",
-                                                   F->getParent());
-          BasicBlock *EntryBB  = BasicBlock::Create(F->getContext(),
-                                                    "entry", FuncWrapper);
-          BasicBlock *DoCallBB = BasicBlock::Create(F->getContext(),
-                                                    "usecache", FuncWrapper);
-          BasicBlock *LookupBB = BasicBlock::Create(F->getContext(),
-                                                    "lookupfp", FuncWrapper);
+          Function *FuncWrapper =
+              Function::Create(FuncTy, GlobalValue::InternalLinkage,
+                               F->getName() + "_wrapper", F->getParent());
+          BasicBlock *EntryBB =
+              BasicBlock::Create(F->getContext(), "entry", FuncWrapper);
+          BasicBlock *DoCallBB =
+              BasicBlock::Create(F->getContext(), "usecache", FuncWrapper);
+          BasicBlock *LookupBB =
+              BasicBlock::Create(F->getContext(), "lookupfp", FuncWrapper);
 
           // Check to see if we already looked up the value.
           Value *CachedVal = new LoadInst(Cache, "fpcache", EntryBB);
@@ -876,26 +898,25 @@ static void CleanupAndPrepareModules(BugDriver &BD,
           // Resolve the call to function F via the JIT API:
           //
           // call resolver(GetElementPtr...)
-          CallInst *Resolver =
-            CallInst::Create(resolverFunc, ResolverArgs, "resolver", LookupBB);
+          CallInst *Resolver = CallInst::Create(resolverFunc, ResolverArgs,
+                                                "resolver", LookupBB);
 
           // Cast the result from the resolver to correctly-typed function.
-          CastInst *CastedResolver =
-            new BitCastInst(Resolver,
-                            PointerType::getUnqual(F->getFunctionType()),
-                            "resolverCast", LookupBB);
+          CastInst *CastedResolver = new BitCastInst(
+              Resolver, PointerType::getUnqual(F->getFunctionType()),
+              "resolverCast", LookupBB);
 
           // Save the value in our cache.
           new StoreInst(CastedResolver, Cache, LookupBB);
           BranchInst::Create(DoCallBB, LookupBB);
 
-          PHINode *FuncPtr = PHINode::Create(NullPtr->getType(), 2,
-                                             "fp", DoCallBB);
+          PHINode *FuncPtr =
+              PHINode::Create(NullPtr->getType(), 2, "fp", DoCallBB);
           FuncPtr->addIncoming(CastedResolver, LookupBB);
           FuncPtr->addIncoming(CachedVal, EntryBB);
 
           // Save the argument list.
-          std::vector<Value*> Args;
+          std::vector<Value *> Args;
           for (Argument &A : FuncWrapper->args())
             Args.push_back(&A);
 
@@ -904,9 +925,9 @@ static void CleanupAndPrepareModules(BugDriver &BD,
             CallInst::Create(FuncPtr, Args, "", DoCallBB);
             ReturnInst::Create(F->getContext(), DoCallBB);
           } else {
-            CallInst *Call = CallInst::Create(FuncPtr, Args,
-                                              "retval", DoCallBB);
-            ReturnInst::Create(F->getContext(),Call, DoCallBB);
+            CallInst *Call =
+                CallInst::Create(FuncPtr, Args, "retval", DoCallBB);
+            ReturnInst::Create(F->getContext(), Call, DoCallBB);
           }
 
           // Use the wrapper function instead of the old function
@@ -926,9 +947,9 @@ static void CleanupAndPrepareModules(BugDriver &BD,
 /// the program is miscompiled by the code generator under test.  If so, return
 /// true.  In any case, both module arguments are deleted.
 ///
-static bool TestCodeGenerator(BugDriver &BD, std::unique_ptr<Module> Test,
-                              std::unique_ptr<Module> Safe,
-                              std::string &Error) {
+static Expected<bool> TestCodeGenerator(BugDriver &BD,
+                                        std::unique_ptr<Module> Test,
+                                        std::unique_ptr<Module> Safe) {
   CleanupAndPrepareModules(BD, Test, Safe.get());
 
   SmallString<128> TestModuleBC;
@@ -936,8 +957,8 @@ static bool TestCodeGenerator(BugDriver &BD, std::unique_ptr<Module> Test,
   std::error_code EC = sys::fs::createTemporaryFile("bugpoint.test", "bc",
                                                     TestModuleFD, TestModuleBC);
   if (EC) {
-    errs() << BD.getToolName() << "Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << BD.getToolName()
+           << "Error making unique filename: " << EC.message() << "\n";
     exit(1);
   }
   if (BD.writeProgramToFile(TestModuleBC.str(), TestModuleFD, Test.get())) {
@@ -954,33 +975,33 @@ static bool TestCodeGenerator(BugDriver &BD, std::unique_ptr<Module> Test,
   EC = sys::fs::createTemporaryFile("bugpoint.safe", "bc", SafeModuleFD,
                                     SafeModuleBC);
   if (EC) {
-    errs() << BD.getToolName() << "Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << BD.getToolName()
+           << "Error making unique filename: " << EC.message() << "\n";
     exit(1);
   }
 
   if (BD.writeProgramToFile(SafeModuleBC.str(), SafeModuleFD, Safe.get())) {
-    errs() << "Error writing bitcode to `" << SafeModuleBC
-           << "'\nExiting.";
+    errs() << "Error writing bitcode to `" << SafeModuleBC << "'\nExiting.";
     exit(1);
   }
 
   FileRemover SafeModuleBCRemover(SafeModuleBC.str(), !SaveTemps);
 
-  std::string SharedObject = BD.compileSharedObject(SafeModuleBC.str(), Error);
-  if (!Error.empty())
-    return false;
+  Expected<std::string> SharedObject =
+      BD.compileSharedObject(SafeModuleBC.str());
+  if (Error E = SharedObject.takeError())
+    return std::move(E);
 
-  FileRemover SharedObjectRemover(SharedObject, !SaveTemps);
+  FileRemover SharedObjectRemover(*SharedObject, !SaveTemps);
 
   // Run the code generator on the `Test' code, loading the shared library.
   // The function returns whether or not the new output differs from reference.
-  bool Result = BD.diffProgram(BD.getProgram(), TestModuleBC.str(),
-                               SharedObject, false, &Error);
-  if (!Error.empty())
-    return false;
+  Expected<bool> Result =
+      BD.diffProgram(BD.getProgram(), TestModuleBC.str(), *SharedObject, false);
+  if (Error E = Result.takeError())
+    return std::move(E);
 
-  if (Result)
+  if (*Result)
     errs() << ": still failing!\n";
   else
     errs() << ": didn't fail.\n";
@@ -990,35 +1011,34 @@ static bool TestCodeGenerator(BugDriver &BD, std::unique_ptr<Module> Test,
 
 /// debugCodeGenerator - debug errors in LLC, LLI, or CBE.
 ///
-bool BugDriver::debugCodeGenerator(std::string *Error) {
-  if ((void*)SafeInterpreter == (void*)Interpreter) {
-    std::string Result = executeProgramSafely(Program, "bugpoint.safe.out",
-                                              Error);
-    if (Error->empty()) {
+Error BugDriver::debugCodeGenerator() {
+  if ((void *)SafeInterpreter == (void *)Interpreter) {
+    Expected<std::string> Result =
+        executeProgramSafely(Program, "bugpoint.safe.out");
+    if (Result) {
       outs() << "\n*** The \"safe\" i.e. 'known good' backend cannot match "
              << "the reference diff.  This may be due to a\n    front-end "
              << "bug or a bug in the original program, but this can also "
              << "happen if bugpoint isn't running the program with the "
              << "right flags or input.\n    I left the result of executing "
              << "the program with the \"safe\" backend in this file for "
-             << "you: '"
-             << Result << "'.\n";
+             << "you: '" << *Result << "'.\n";
     }
-    return true;
+    return Error::success();
   }
 
   DisambiguateGlobalSymbols(Program);
 
-  std::vector<Function*> Funcs = DebugAMiscompilation(*this, TestCodeGenerator,
-                                                      *Error);
-  if (!Error->empty())
-    return true;
+  Expected<std::vector<Function *>> Funcs =
+      DebugAMiscompilation(*this, TestCodeGenerator);
+  if (Error E = Funcs.takeError())
+    return E;
 
   // Split the module into the two halves of the program we want.
   ValueToValueMapTy VMap;
   std::unique_ptr<Module> ToNotCodeGen = CloneModule(getProgram(), VMap);
   std::unique_ptr<Module> ToCodeGen =
-      SplitFunctionsOutOfModule(ToNotCodeGen.get(), Funcs, VMap);
+      SplitFunctionsOutOfModule(ToNotCodeGen.get(), *Funcs, VMap);
 
   // Condition the modules
   CleanupAndPrepareModules(*this, ToCodeGen, ToNotCodeGen.get());
@@ -1028,14 +1048,13 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
   std::error_code EC = sys::fs::createTemporaryFile("bugpoint.test", "bc",
                                                     TestModuleFD, TestModuleBC);
   if (EC) {
-    errs() << getToolName() << "Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << getToolName() << "Error making unique filename: " << EC.message()
+           << "\n";
     exit(1);
   }
 
   if (writeProgramToFile(TestModuleBC.str(), TestModuleFD, ToCodeGen.get())) {
-    errs() << "Error writing bitcode to `" << TestModuleBC
-           << "'\nExiting.";
+    errs() << "Error writing bitcode to `" << TestModuleBC << "'\nExiting.";
     exit(1);
   }
 
@@ -1045,47 +1064,41 @@ bool BugDriver::debugCodeGenerator(std::string *Error) {
   EC = sys::fs::createTemporaryFile("bugpoint.safe", "bc", SafeModuleFD,
                                     SafeModuleBC);
   if (EC) {
-    errs() << getToolName() << "Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << getToolName() << "Error making unique filename: " << EC.message()
+           << "\n";
     exit(1);
   }
 
   if (writeProgramToFile(SafeModuleBC.str(), SafeModuleFD,
                          ToNotCodeGen.get())) {
-    errs() << "Error writing bitcode to `" << SafeModuleBC
-           << "'\nExiting.";
+    errs() << "Error writing bitcode to `" << SafeModuleBC << "'\nExiting.";
     exit(1);
   }
-  std::string SharedObject = compileSharedObject(SafeModuleBC.str(), *Error);
-  if (!Error->empty())
-    return true;
+  Expected<std::string> SharedObject = compileSharedObject(SafeModuleBC.str());
+  if (Error E = SharedObject.takeError())
+    return E;
 
   outs() << "You can reproduce the problem with the command line: \n";
   if (isExecutingJIT()) {
-    outs() << "  lli -load " << SharedObject << " " << TestModuleBC;
+    outs() << "  lli -load " << *SharedObject << " " << TestModuleBC;
   } else {
-    outs() << "  llc " << TestModuleBC << " -o " << TestModuleBC
-           << ".s\n";
-    outs() << "  cc " << SharedObject << " " << TestModuleBC.str()
-              << ".s -o " << TestModuleBC << ".exe";
-#if defined (HAVE_LINK_R)
-    outs() << " -Wl,-R.";
-#endif
-    outs() << "\n";
-    outs() << "  " << TestModuleBC << ".exe";
+    outs() << "  llc " << TestModuleBC << " -o " << TestModuleBC << ".s\n";
+    outs() << "  cc " << *SharedObject << " " << TestModuleBC.str() << ".s -o "
+           << TestModuleBC << ".exe\n";
+    outs() << "  ./" << TestModuleBC << ".exe";
   }
   for (unsigned i = 0, e = InputArgv.size(); i != e; ++i)
     outs() << " " << InputArgv[i];
   outs() << '\n';
   outs() << "The shared object was created with:\n  llc -march=c "
          << SafeModuleBC.str() << " -o temporary.c\n"
-         << "  cc -xc temporary.c -O2 -o " << SharedObject;
+         << "  cc -xc temporary.c -O2 -o " << *SharedObject;
   if (TargetTriple.getArch() == Triple::sparc)
-    outs() << " -G";              // Compile a shared library, `-G' for Sparc
+    outs() << " -G"; // Compile a shared library, `-G' for Sparc
   else
-    outs() << " -fPIC -shared";   // `-shared' for Linux/X86, maybe others
+    outs() << " -fPIC -shared"; // `-shared' for Linux/X86, maybe others
 
   outs() << " -fno-strict-aliasing\n";
 
-  return false;
+  return Error::success();
 }
diff --git a/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp b/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
index 2cc2f4471a58..246580c8bdbe 100644
--- a/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
+++ b/contrib/llvm/tools/bugpoint/OptimizerDriver.cpp
@@ -16,7 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "BugDriver.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
@@ -39,7 +39,7 @@ using namespace llvm;
 #define DEBUG_TYPE "bugpoint"
 
 namespace llvm {
-  extern cl::opt<std::string> OutputPrefix;
+extern cl::opt<std::string> OutputPrefix;
 }
 
 static cl::opt<bool> PreserveBitcodeUseListOrder(
@@ -48,12 +48,12 @@ static cl::opt<bool> PreserveBitcodeUseListOrder(
     cl::init(true), cl::Hidden);
 
 namespace {
-  // ChildOutput - This option captures the name of the child output file that
-  // is set up by the parent bugpoint process
-  cl::opt<std::string> ChildOutput("child-output", cl::ReallyHidden);
-  cl::opt<std::string> OptCmd("opt-command", cl::init(""),
-                              cl::desc("Path to opt. (default: search path "
-                                       "for 'opt'.)"));
+// ChildOutput - This option captures the name of the child output file that
+// is set up by the parent bugpoint process
+cl::opt<std::string> ChildOutput("child-output", cl::ReallyHidden);
+cl::opt<std::string> OptCmd("opt-command", cl::init(""),
+                            cl::desc("Path to opt. (default: search path "
+                                     "for 'opt'.)"));
 }
 
 /// writeProgramToFile - This writes the current "Program" to the named bitcode
@@ -84,26 +84,26 @@ bool BugDriver::writeProgramToFile(const std::string &Filename,
   return true;
 }
 
-
 /// EmitProgressBitcode - This function is used to output the current Program
 /// to a file named "bugpoint-ID.bc".
 ///
-void BugDriver::EmitProgressBitcode(const Module *M,
-                                    const std::string &ID,
-                                    bool NoFlyer)  const {
+void BugDriver::EmitProgressBitcode(const Module *M, const std::string &ID,
+                                    bool NoFlyer) const {
   // Output the input to the current pass to a bitcode file, emit a message
   // telling the user how to reproduce it: opt -foo blah.bc
   //
   std::string Filename = OutputPrefix + "-" + ID + ".bc";
   if (writeProgramToFile(Filename, M)) {
-    errs() <<  "Error opening file '" << Filename << "' for writing!\n";
+    errs() << "Error opening file '" << Filename << "' for writing!\n";
     return;
   }
 
   outs() << "Emitted bitcode to '" << Filename << "'\n";
-  if (NoFlyer || PassesToRun.empty()) return;
+  if (NoFlyer || PassesToRun.empty())
+    return;
   outs() << "\n*** You can reproduce the problem with: ";
-  if (UseValgrind) outs() << "valgrind ";
+  if (UseValgrind)
+    outs() << "valgrind ";
   outs() << "opt " << Filename;
   for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
     outs() << " -load " << PluginLoader::getPlugin(i);
@@ -111,8 +111,9 @@ void BugDriver::EmitProgressBitcode(const Module *M,
   outs() << " " << getPassesString(PassesToRun) << "\n";
 }
 
-cl::opt<bool> SilencePasses("silence-passes",
-        cl::desc("Suppress output of running passes (both stdout and stderr)"));
+cl::opt<bool> SilencePasses(
+    "silence-passes",
+    cl::desc("Suppress output of running passes (both stdout and stderr)"));
 
 static cl::list<std::string> OptArgs("opt-args", cl::Positional,
                                      cl::desc("<opt arguments>..."),
@@ -130,15 +131,15 @@ bool BugDriver::runPasses(Module *Program,
                           const std::vector<std::string> &Passes,
                           std::string &OutputFilename, bool DeleteOutput,
                           bool Quiet, unsigned NumExtraArgs,
-                          const char * const *ExtraArgs) const {
+                          const char *const *ExtraArgs) const {
   // setup the output file name
   outs().flush();
   SmallString<128> UniqueFilename;
   std::error_code EC = sys::fs::createUniqueFile(
       OutputPrefix + "-output-%%%%%%%.bc", UniqueFilename);
   if (EC) {
-    errs() << getToolName() << ": Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << getToolName()
+           << ": Error making unique filename: " << EC.message() << "\n";
     return 1;
   }
   OutputFilename = UniqueFilename.str();
@@ -149,8 +150,8 @@ bool BugDriver::runPasses(Module *Program,
   EC = sys::fs::createUniqueFile(OutputPrefix + "-input-%%%%%%%.bc", InputFD,
                                  InputFilename);
   if (EC) {
-    errs() << getToolName() << ": Error making unique filename: "
-           << EC.message() << "\n";
+    errs() << getToolName()
+           << ": Error making unique filename: " << EC.message() << "\n";
     return 1;
   }
 
@@ -193,7 +194,7 @@ bool BugDriver::runPasses(Module *Program,
   InFile.keep();
 
   // setup the child process' arguments
-  SmallVector<const char*, 8> Args;
+  SmallVector<const char *, 8> Args;
   if (UseValgrind) {
     Args.push_back("valgrind");
     Args.push_back("--error-exitcode=1");
@@ -208,14 +209,16 @@ bool BugDriver::runPasses(Module *Program,
     Args.push_back(OptArgs[i].c_str());
   std::vector<std::string> pass_args;
   for (unsigned i = 0, e = PluginLoader::getNumPlugins(); i != e; ++i) {
-    pass_args.push_back( std::string("-load"));
-    pass_args.push_back( PluginLoader::getPlugin(i));
+    pass_args.push_back(std::string("-load"));
+    pass_args.push_back(PluginLoader::getPlugin(i));
   }
   for (std::vector<std::string>::const_iterator I = Passes.begin(),
-       E = Passes.end(); I != E; ++I )
-    pass_args.push_back( std::string("-") + (*I) );
+                                                E = Passes.end();
+       I != E; ++I)
+    pass_args.push_back(std::string("-") + (*I));
   for (std::vector<std::string>::const_iterator I = pass_args.begin(),
-       E = pass_args.end(); I != E; ++I )
+                                                E = pass_args.end();
+       I != E; ++I)
     Args.push_back(I->c_str());
   Args.push_back(InputFilename.c_str());
   for (unsigned i = 0; i < NumExtraArgs; ++i)
@@ -223,10 +226,9 @@ bool BugDriver::runPasses(Module *Program,
   Args.push_back(nullptr);
 
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = Args.size()-1; i != e; ++i)
-          errs() << " " << Args[i];
-        errs() << "\n";
-        );
+        for (unsigned i = 0, e = Args.size() - 1; i != e; ++i) errs()
+        << " " << Args[i];
+        errs() << "\n";);
 
   // Redirect stdout and stderr to nowhere if SilencePasses is given
   StringRef Nowhere;
@@ -264,21 +266,19 @@ bool BugDriver::runPasses(Module *Program,
   return result != 0;
 }
 
-
 std::unique_ptr<Module>
 BugDriver::runPassesOn(Module *M, const std::vector<std::string> &Passes,
-                       unsigned NumExtraArgs,
-                       const char *const *ExtraArgs) {
+                       unsigned NumExtraArgs, const char *const *ExtraArgs) {
   std::string BitcodeResult;
-  if (runPasses(M, Passes, BitcodeResult, false/*delete*/, true/*quiet*/,
+  if (runPasses(M, Passes, BitcodeResult, false /*delete*/, true /*quiet*/,
                 NumExtraArgs, ExtraArgs)) {
     return nullptr;
   }
 
   std::unique_ptr<Module> Ret = parseInputFile(BitcodeResult, Context);
   if (!Ret) {
-    errs() << getToolName() << ": Error reading bitcode file '"
-           << BitcodeResult << "'!\n";
+    errs() << getToolName() << ": Error reading bitcode file '" << BitcodeResult
+           << "'!\n";
     exit(1);
   }
   sys::fs::remove(BitcodeResult);
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.cpp b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
index 4af4b10c4d78..4633d6437336 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.cpp
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.cpp
@@ -12,7 +12,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ToolRunner.h"
-#include "llvm/Config/config.h"   // for HAVE_LINK_R
+#include "llvm/Config/config.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
@@ -27,46 +27,40 @@ using namespace llvm;
 #define DEBUG_TYPE "toolrunner"
 
 namespace llvm {
-  cl::opt<bool>
-  SaveTemps("save-temps", cl::init(false), cl::desc("Save temporary files"));
+cl::opt<bool> SaveTemps("save-temps", cl::init(false),
+                        cl::desc("Save temporary files"));
 }
 
 namespace {
-  cl::opt<std::string>
-  RemoteClient("remote-client",
-               cl::desc("Remote execution client (rsh/ssh)"));
+cl::opt<std::string>
+    RemoteClient("remote-client",
+                 cl::desc("Remote execution client (rsh/ssh)"));
 
-  cl::opt<std::string>
-  RemoteHost("remote-host",
-             cl::desc("Remote execution (rsh/ssh) host"));
+cl::opt<std::string> RemoteHost("remote-host",
+                                cl::desc("Remote execution (rsh/ssh) host"));
 
-  cl::opt<std::string>
-  RemotePort("remote-port",
-             cl::desc("Remote execution (rsh/ssh) port"));
+cl::opt<std::string> RemotePort("remote-port",
+                                cl::desc("Remote execution (rsh/ssh) port"));
 
-  cl::opt<std::string>
-  RemoteUser("remote-user",
-             cl::desc("Remote execution (rsh/ssh) user id"));
+cl::opt<std::string> RemoteUser("remote-user",
+                                cl::desc("Remote execution (rsh/ssh) user id"));
 
-  cl::opt<std::string>
-  RemoteExtra("remote-extra-options",
-          cl::desc("Remote execution (rsh/ssh) extra options"));
+cl::opt<std::string>
+    RemoteExtra("remote-extra-options",
+                cl::desc("Remote execution (rsh/ssh) extra options"));
 }
 
 /// RunProgramWithTimeout - This function provides an alternate interface
 /// to the sys::Program::ExecuteAndWait interface.
 /// @see sys::Program::ExecuteAndWait
-static int RunProgramWithTimeout(StringRef ProgramPath,
-                                 const char **Args,
-                                 StringRef StdInFile,
-                                 StringRef StdOutFile,
-                                 StringRef StdErrFile,
-                                 unsigned NumSeconds = 0,
+static int RunProgramWithTimeout(StringRef ProgramPath, const char **Args,
+                                 StringRef StdInFile, StringRef StdOutFile,
+                                 StringRef StdErrFile, unsigned NumSeconds = 0,
                                  unsigned MemoryLimit = 0,
                                  std::string *ErrMsg = nullptr) {
-  const StringRef *Redirects[3] = { &StdInFile, &StdOutFile, &StdErrFile };
-  return sys::ExecuteAndWait(ProgramPath, Args, nullptr, Redirects,
-                             NumSeconds, MemoryLimit, ErrMsg);
+  const StringRef *Redirects[3] = {&StdInFile, &StdOutFile, &StdErrFile};
+  return sys::ExecuteAndWait(ProgramPath, Args, nullptr, Redirects, NumSeconds,
+                             MemoryLimit, ErrMsg);
 }
 
 /// RunProgramRemotelyWithTimeout - This function runs the given program
@@ -76,13 +70,12 @@ static int RunProgramWithTimeout(StringRef ProgramPath,
 /// code otherwise.
 /// @see sys::Program::ExecuteAndWait
 static int RunProgramRemotelyWithTimeout(StringRef RemoteClientPath,
-                                         const char **Args,
-                                         StringRef StdInFile,
+                                         const char **Args, StringRef StdInFile,
                                          StringRef StdOutFile,
                                          StringRef StdErrFile,
                                          unsigned NumSeconds = 0,
                                          unsigned MemoryLimit = 0) {
-  const StringRef *Redirects[3] = { &StdInFile, &StdOutFile, &StdErrFile };
+  const StringRef *Redirects[3] = {&StdInFile, &StdOutFile, &StdErrFile};
 
   // Run the program remotely with the remote client
   int ReturnCode = sys::ExecuteAndWait(RemoteClientPath, Args, nullptr,
@@ -112,9 +105,8 @@ static int RunProgramRemotelyWithTimeout(StringRef RemoteClientPath,
   return ReturnCode;
 }
 
-static std::string ProcessFailure(StringRef ProgPath, const char** Args,
-                                  unsigned Timeout = 0,
-                                  unsigned MemoryLimit = 0) {
+static Error ProcessFailure(StringRef ProgPath, const char **Args,
+                            unsigned Timeout = 0, unsigned MemoryLimit = 0) {
   std::ostringstream OS;
   OS << "\nError running tool:\n ";
   for (const char **Arg = Args; *Arg; ++Arg)
@@ -144,51 +136,48 @@ static std::string ProcessFailure(StringRef ProgPath, const char** Args,
   }
 
   sys::fs::remove(ErrorFilename.c_str());
-  return OS.str();
+  return make_error<StringError>(OS.str(), inconvertibleErrorCode());
 }
 
 //===---------------------------------------------------------------------===//
 // LLI Implementation of AbstractIntepreter interface
 //
 namespace {
-  class LLI : public AbstractInterpreter {
-    std::string LLIPath;          // The path to the LLI executable
-    std::vector<std::string> ToolArgs; // Args to pass to LLI
-  public:
-    LLI(const std::string &Path, const std::vector<std::string> *Args)
+class LLI : public AbstractInterpreter {
+  std::string LLIPath;               // The path to the LLI executable
+  std::vector<std::string> ToolArgs; // Args to pass to LLI
+public:
+  LLI(const std::string &Path, const std::vector<std::string> *Args)
       : LLIPath(Path) {
-      ToolArgs.clear ();
-      if (Args) { ToolArgs = *Args; }
+    ToolArgs.clear();
+    if (Args) {
+      ToolArgs = *Args;
     }
+  }
 
-    int ExecuteProgram(const std::string &Bitcode,
-                       const std::vector<std::string> &Args,
-                       const std::string &InputFile,
-                       const std::string &OutputFile,
-                       std::string *Error,
-                       const std::vector<std::string> &CCArgs,
-                       const std::vector<std::string> &SharedLibs =
-                       std::vector<std::string>(),
-                       unsigned Timeout = 0,
-                       unsigned MemoryLimit = 0) override;
-  };
+  Expected<int> ExecuteProgram(
+      const std::string &Bitcode, const std::vector<std::string> &Args,
+      const std::string &InputFile, const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs,
+      const std::vector<std::string> &SharedLibs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0) override;
+};
 }
 
-int LLI::ExecuteProgram(const std::string &Bitcode,
-                        const std::vector<std::string> &Args,
-                        const std::string &InputFile,
-                        const std::string &OutputFile,
-                        std::string *Error,
-                        const std::vector<std::string> &CCArgs,
-                        const std::vector<std::string> &SharedLibs,
-                        unsigned Timeout,
-                        unsigned MemoryLimit) {
-  std::vector<const char*> LLIArgs;
+Expected<int> LLI::ExecuteProgram(const std::string &Bitcode,
+                                  const std::vector<std::string> &Args,
+                                  const std::string &InputFile,
+                                  const std::string &OutputFile,
+                                  const std::vector<std::string> &CCArgs,
+                                  const std::vector<std::string> &SharedLibs,
+                                  unsigned Timeout, unsigned MemoryLimit) {
+  std::vector<const char *> LLIArgs;
   LLIArgs.push_back(LLIPath.c_str());
   LLIArgs.push_back("-force-interpreter=true");
 
   for (std::vector<std::string>::const_iterator i = SharedLibs.begin(),
-         e = SharedLibs.end(); i != e; ++i) {
+                                                e = SharedLibs.end();
+       i != e; ++i) {
     LLIArgs.push_back("-load");
     LLIArgs.push_back((*i).c_str());
   }
@@ -199,26 +188,25 @@ int LLI::ExecuteProgram(const std::string &Bitcode,
 
   LLIArgs.push_back(Bitcode.c_str());
   // Add optional parameters to the running program from Argv
-  for (unsigned i=0, e = Args.size(); i != e; ++i)
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
     LLIArgs.push_back(Args[i].c_str());
   LLIArgs.push_back(nullptr);
 
-  outs() << "<lli>"; outs().flush();
+  outs() << "<lli>";
+  outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = LLIArgs.size()-1; i != e; ++i)
-          errs() << " " << LLIArgs[i];
-        errs() << "\n";
-        );
-  return RunProgramWithTimeout(LLIPath, &LLIArgs[0],
-      InputFile, OutputFile, OutputFile,
-      Timeout, MemoryLimit, Error);
+        for (unsigned i = 0, e = LLIArgs.size() - 1; i != e; ++i) errs()
+        << " " << LLIArgs[i];
+        errs() << "\n";);
+  return RunProgramWithTimeout(LLIPath, &LLIArgs[0], InputFile, OutputFile,
+                               OutputFile, Timeout, MemoryLimit);
 }
 
-void AbstractInterpreter::anchor() { }
+void AbstractInterpreter::anchor() {}
 
 #if defined(LLVM_ON_UNIX)
 const char EXESuffix[] = "";
-#elif defined (LLVM_ON_WIN32)
+#elif defined(LLVM_ON_WIN32)
 const char EXESuffix[] = "exe";
 #endif
 
@@ -248,11 +236,11 @@ static std::string PrependMainExecutablePath(const std::string &ExeName,
 }
 
 // LLI create method - Try to find the LLI executable
-AbstractInterpreter *AbstractInterpreter::createLLI(const char *Argv0,
-                                                    std::string &Message,
-                                     const std::vector<std::string> *ToolArgs) {
+AbstractInterpreter *
+AbstractInterpreter::createLLI(const char *Argv0, std::string &Message,
+                               const std::vector<std::string> *ToolArgs) {
   std::string LLIPath =
-      PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t) & createLLI);
+      PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createLLI);
   if (!LLIPath.empty()) {
     Message = "Found lli: " + LLIPath + "\n";
     return new LLI(LLIPath, ToolArgs);
@@ -269,42 +257,35 @@ AbstractInterpreter *AbstractInterpreter::createLLI(const char *Argv0,
 // example, to compile a bitcode fragment without linking or executing, then
 // using a custom wrapper script to check for compiler errors.
 namespace {
-  class CustomCompiler : public AbstractInterpreter {
-    std::string CompilerCommand;
-    std::vector<std::string> CompilerArgs;
-  public:
-    CustomCompiler(const std::string &CompilerCmd,
-                   std::vector<std::string> CompArgs)
-        : CompilerCommand(CompilerCmd), CompilerArgs(std::move(CompArgs)) {}
-
-    void compileProgram(const std::string &Bitcode,
-                        std::string *Error,
-                        unsigned Timeout = 0,
-                        unsigned MemoryLimit = 0) override;
-
-    int ExecuteProgram(const std::string &Bitcode,
-                       const std::vector<std::string> &Args,
-                       const std::string &InputFile,
-                       const std::string &OutputFile,
-                       std::string *Error,
-                       const std::vector<std::string> &CCArgs =
-                       std::vector<std::string>(),
-                       const std::vector<std::string> &SharedLibs =
-                       std::vector<std::string>(),
-                       unsigned Timeout = 0,
-                       unsigned MemoryLimit = 0) override {
-      *Error = "Execution not supported with -compile-custom";
-      return -1;
-    }
-  };
+class CustomCompiler : public AbstractInterpreter {
+  std::string CompilerCommand;
+  std::vector<std::string> CompilerArgs;
+
+public:
+  CustomCompiler(const std::string &CompilerCmd,
+                 std::vector<std::string> CompArgs)
+      : CompilerCommand(CompilerCmd), CompilerArgs(std::move(CompArgs)) {}
+
+  Error compileProgram(const std::string &Bitcode, unsigned Timeout = 0,
+                       unsigned MemoryLimit = 0) override;
+
+  Expected<int> ExecuteProgram(
+      const std::string &Bitcode, const std::vector<std::string> &Args,
+      const std::string &InputFile, const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs = std::vector<std::string>(),
+      const std::vector<std::string> &SharedLibs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0) override {
+    return make_error<StringError>(
+        "Execution not supported with -compile-custom",
+        inconvertibleErrorCode());
+  }
+};
 }
 
-void CustomCompiler::compileProgram(const std::string &Bitcode,
-                                    std::string *Error,
-                                    unsigned Timeout,
-                                    unsigned MemoryLimit) {
+Error CustomCompiler::compileProgram(const std::string &Bitcode,
+                                     unsigned Timeout, unsigned MemoryLimit) {
 
-  std::vector<const char*> ProgramArgs;
+  std::vector<const char *> ProgramArgs;
   ProgramArgs.push_back(CompilerCommand.c_str());
 
   for (std::size_t i = 0; i < CompilerArgs.size(); ++i)
@@ -316,11 +297,11 @@ void CustomCompiler::compileProgram(const std::string &Bitcode,
   for (unsigned i = 0, e = CompilerArgs.size(); i != e; ++i)
     ProgramArgs.push_back(CompilerArgs[i].c_str());
 
-  if (RunProgramWithTimeout(CompilerCommand, &ProgramArgs[0],
-                             "", "", "",
-                             Timeout, MemoryLimit, Error))
-    *Error = ProcessFailure(CompilerCommand, &ProgramArgs[0],
-                           Timeout, MemoryLimit);
+  if (RunProgramWithTimeout(CompilerCommand, &ProgramArgs[0], "", "", "",
+                            Timeout, MemoryLimit))
+    return ProcessFailure(CompilerCommand, &ProgramArgs[0], Timeout,
+                          MemoryLimit);
+  return Error::success();
 }
 
 //===---------------------------------------------------------------------===//
@@ -330,38 +311,32 @@ void CustomCompiler::compileProgram(const std::string &Bitcode,
 // for example, to invoke a cross compiler for code generation followed by
 // a simulator that executes the generated binary.
 namespace {
-  class CustomExecutor : public AbstractInterpreter {
-    std::string ExecutionCommand;
-    std::vector<std::string> ExecutorArgs;
-  public:
-    CustomExecutor(const std::string &ExecutionCmd,
-                   std::vector<std::string> ExecArgs)
-        : ExecutionCommand(ExecutionCmd), ExecutorArgs(std::move(ExecArgs)) {}
-
-    int ExecuteProgram(const std::string &Bitcode,
-                       const std::vector<std::string> &Args,
-                       const std::string &InputFile,
-                       const std::string &OutputFile,
-                       std::string *Error,
-                       const std::vector<std::string> &CCArgs,
-                       const std::vector<std::string> &SharedLibs =
-                         std::vector<std::string>(),
-                       unsigned Timeout = 0,
-                       unsigned MemoryLimit = 0) override;
-  };
+class CustomExecutor : public AbstractInterpreter {
+  std::string ExecutionCommand;
+  std::vector<std::string> ExecutorArgs;
+
+public:
+  CustomExecutor(const std::string &ExecutionCmd,
+                 std::vector<std::string> ExecArgs)
+      : ExecutionCommand(ExecutionCmd), ExecutorArgs(std::move(ExecArgs)) {}
+
+  Expected<int> ExecuteProgram(
+      const std::string &Bitcode, const std::vector<std::string> &Args,
+      const std::string &InputFile, const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs,
+      const std::vector<std::string> &SharedLibs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0) override;
+};
 }
 
-int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
-                        const std::vector<std::string> &Args,
-                        const std::string &InputFile,
-                        const std::string &OutputFile,
-                        std::string *Error,
-                        const std::vector<std::string> &CCArgs,
-                        const std::vector<std::string> &SharedLibs,
-                        unsigned Timeout,
-                        unsigned MemoryLimit) {
-
-  std::vector<const char*> ProgramArgs;
+Expected<int> CustomExecutor::ExecuteProgram(
+    const std::string &Bitcode, const std::vector<std::string> &Args,
+    const std::string &InputFile, const std::string &OutputFile,
+    const std::vector<std::string> &CCArgs,
+    const std::vector<std::string> &SharedLibs, unsigned Timeout,
+    unsigned MemoryLimit) {
+
+  std::vector<const char *> ProgramArgs;
   ProgramArgs.push_back(ExecutionCommand.c_str());
 
   for (std::size_t i = 0; i < ExecutorArgs.size(); ++i)
@@ -373,10 +348,8 @@ int CustomExecutor::ExecuteProgram(const std::string &Bitcode,
   for (unsigned i = 0, e = Args.size(); i != e; ++i)
     ProgramArgs.push_back(Args[i].c_str());
 
-  return RunProgramWithTimeout(
-    ExecutionCommand,
-    &ProgramArgs[0], InputFile, OutputFile,
-    OutputFile, Timeout, MemoryLimit, Error);
+  return RunProgramWithTimeout(ExecutionCommand, &ProgramArgs[0], InputFile,
+                               OutputFile, OutputFile, Timeout, MemoryLimit);
 }
 
 // Tokenize the CommandLine to the command and the args to allow
@@ -400,9 +373,9 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
   while (std::string::npos != pos || std::string::npos != lastPos) {
     std::string token = CommandLine.substr(lastPos, pos - lastPos);
     if (Command == "")
-       Command = token;
+      Command = token;
     else
-       Args.push_back(token);
+      Args.push_back(token);
     // Skip delimiters.  Note the "not_of"
     lastPos = CommandLine.find_first_not_of(delimiters, pos);
     // Find next "non-delimiter"
@@ -411,9 +384,8 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
 
   auto Path = sys::findProgramByName(Command);
   if (!Path) {
-    Message =
-      std::string("Cannot find '") + Command +
-      "' in PATH: " + Path.getError().message() + "\n";
+    Message = std::string("Cannot find '") + Command + "' in PATH: " +
+              Path.getError().message() + "\n";
     return;
   }
   CmdPath = *Path;
@@ -424,8 +396,7 @@ static void lexCommand(std::string &Message, const std::string &CommandLine,
 // Custom execution environment create method, takes the execution command
 // as arguments
 AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
-                    std::string &Message,
-                    const std::string &CompileCommandLine) {
+    std::string &Message, const std::string &CompileCommandLine) {
 
   std::string CmdPath;
   std::vector<std::string> Args;
@@ -438,10 +409,9 @@ AbstractInterpreter *AbstractInterpreter::createCustomCompiler(
 
 // Custom execution environment create method, takes the execution command
 // as arguments
-AbstractInterpreter *AbstractInterpreter::createCustomExecutor(
-                    std::string &Message,
-                    const std::string &ExecCommandLine) {
-
+AbstractInterpreter *
+AbstractInterpreter::createCustomExecutor(std::string &Message,
+                                          const std::string &ExecCommandLine) {
 
   std::string CmdPath;
   std::vector<std::string> Args;
@@ -455,9 +425,9 @@ AbstractInterpreter *AbstractInterpreter::createCustomExecutor(
 //===----------------------------------------------------------------------===//
 // LLC Implementation of AbstractIntepreter interface
 //
-CC::FileType LLC::OutputCode(const std::string &Bitcode,
-                              std::string &OutputAsmFile, std::string &Error,
-                              unsigned Timeout, unsigned MemoryLimit) {
+Expected<CC::FileType> LLC::OutputCode(const std::string &Bitcode,
+                                       std::string &OutputAsmFile,
+                                       unsigned Timeout, unsigned MemoryLimit) {
   const char *Suffix = (UseIntegratedAssembler ? ".llc.o" : ".llc.s");
 
   SmallString<128> UniqueFile;
@@ -477,69 +447,68 @@ CC::FileType LLC::OutputCode(const std::string &Bitcode,
 
   LLCArgs.push_back("-o");
   LLCArgs.push_back(OutputAsmFile.c_str()); // Output to the Asm file
-  LLCArgs.push_back(Bitcode.c_str());      // This is the input bitcode
+  LLCArgs.push_back(Bitcode.c_str());       // This is the input bitcode
 
   if (UseIntegratedAssembler)
     LLCArgs.push_back("-filetype=obj");
 
-  LLCArgs.push_back (nullptr);
+  LLCArgs.push_back(nullptr);
 
   outs() << (UseIntegratedAssembler ? "<llc-ia>" : "<llc>");
   outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = LLCArgs.size()-1; i != e; ++i)
-          errs() << " " << LLCArgs[i];
-        errs() << "\n";
-        );
-  if (RunProgramWithTimeout(LLCPath, &LLCArgs[0],
-                            "", "", "",
-                            Timeout, MemoryLimit))
-    Error = ProcessFailure(LLCPath, &LLCArgs[0],
-                           Timeout, MemoryLimit);
+        for (unsigned i = 0, e = LLCArgs.size() - 1; i != e; ++i) errs()
+        << " " << LLCArgs[i];
+        errs() << "\n";);
+  if (RunProgramWithTimeout(LLCPath, &LLCArgs[0], "", "", "", Timeout,
+                            MemoryLimit))
+    return ProcessFailure(LLCPath, &LLCArgs[0], Timeout, MemoryLimit);
   return UseIntegratedAssembler ? CC::ObjectFile : CC::AsmFile;
 }
 
-void LLC::compileProgram(const std::string &Bitcode, std::string *Error,
-                         unsigned Timeout, unsigned MemoryLimit) {
+Error LLC::compileProgram(const std::string &Bitcode, unsigned Timeout,
+                          unsigned MemoryLimit) {
   std::string OutputAsmFile;
-  OutputCode(Bitcode, OutputAsmFile, *Error, Timeout, MemoryLimit);
+  Expected<CC::FileType> Result =
+      OutputCode(Bitcode, OutputAsmFile, Timeout, MemoryLimit);
   sys::fs::remove(OutputAsmFile);
+  if (Error E = Result.takeError())
+    return E;
+  return Error::success();
 }
 
-int LLC::ExecuteProgram(const std::string &Bitcode,
-                        const std::vector<std::string> &Args,
-                        const std::string &InputFile,
-                        const std::string &OutputFile,
-                        std::string *Error,
-                        const std::vector<std::string> &ArgsForCC,
-                        const std::vector<std::string> &SharedLibs,
-                        unsigned Timeout,
-                        unsigned MemoryLimit) {
+Expected<int> LLC::ExecuteProgram(const std::string &Bitcode,
+                                  const std::vector<std::string> &Args,
+                                  const std::string &InputFile,
+                                  const std::string &OutputFile,
+                                  const std::vector<std::string> &ArgsForCC,
+                                  const std::vector<std::string> &SharedLibs,
+                                  unsigned Timeout, unsigned MemoryLimit) {
 
   std::string OutputAsmFile;
-  CC::FileType FileKind = OutputCode(Bitcode, OutputAsmFile, *Error, Timeout,
-                                      MemoryLimit);
+  Expected<CC::FileType> FileKind =
+      OutputCode(Bitcode, OutputAsmFile, Timeout, MemoryLimit);
   FileRemover OutFileRemover(OutputAsmFile, !SaveTemps);
+  if (Error E = FileKind.takeError())
+    return std::move(E);
 
   std::vector<std::string> CCArgs(ArgsForCC);
   CCArgs.insert(CCArgs.end(), SharedLibs.begin(), SharedLibs.end());
 
   // Assuming LLC worked, compile the result with CC and run it.
-  return cc->ExecuteProgram(OutputAsmFile, Args, FileKind,
-                             InputFile, OutputFile, Error, CCArgs,
-                             Timeout, MemoryLimit);
+  return cc->ExecuteProgram(OutputAsmFile, Args, *FileKind, InputFile,
+                            OutputFile, CCArgs, Timeout, MemoryLimit);
 }
 
 /// createLLC - Try to find the LLC executable
 ///
-LLC *AbstractInterpreter::createLLC(const char *Argv0,
-                                    std::string &Message,
+LLC *AbstractInterpreter::createLLC(const char *Argv0, std::string &Message,
                                     const std::string &CCBinary,
                                     const std::vector<std::string> *Args,
                                     const std::vector<std::string> *CCArgs,
                                     bool UseIntegratedAssembler) {
   std::string LLCPath =
-      PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t) & createLLC);
+      PrependMainExecutablePath("llc", Argv0, (void *)(intptr_t)&createLLC);
   if (LLCPath.empty()) {
     Message = "Cannot find `llc' in executable directory!\n";
     return nullptr;
@@ -558,41 +527,36 @@ LLC *AbstractInterpreter::createLLC(const char *Argv0,
 // JIT Implementation of AbstractIntepreter interface
 //
 namespace {
-  class JIT : public AbstractInterpreter {
-    std::string LLIPath;          // The path to the LLI executable
-    std::vector<std::string> ToolArgs; // Args to pass to LLI
-  public:
-    JIT(const std::string &Path, const std::vector<std::string> *Args)
+class JIT : public AbstractInterpreter {
+  std::string LLIPath;               // The path to the LLI executable
+  std::vector<std::string> ToolArgs; // Args to pass to LLI
+public:
+  JIT(const std::string &Path, const std::vector<std::string> *Args)
       : LLIPath(Path) {
-      ToolArgs.clear ();
-      if (Args) { ToolArgs = *Args; }
+    ToolArgs.clear();
+    if (Args) {
+      ToolArgs = *Args;
     }
+  }
 
-    int ExecuteProgram(const std::string &Bitcode,
-                       const std::vector<std::string> &Args,
-                       const std::string &InputFile,
-                       const std::string &OutputFile,
-                       std::string *Error,
-                       const std::vector<std::string> &CCArgs =
-                         std::vector<std::string>(),
-                       const std::vector<std::string> &SharedLibs =
-                         std::vector<std::string>(),
-                       unsigned Timeout = 0,
-                       unsigned MemoryLimit = 0) override;
-  };
+  Expected<int> ExecuteProgram(
+      const std::string &Bitcode, const std::vector<std::string> &Args,
+      const std::string &InputFile, const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs = std::vector<std::string>(),
+      const std::vector<std::string> &SharedLibs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0) override;
+};
 }
 
-int JIT::ExecuteProgram(const std::string &Bitcode,
-                        const std::vector<std::string> &Args,
-                        const std::string &InputFile,
-                        const std::string &OutputFile,
-                        std::string *Error,
-                        const std::vector<std::string> &CCArgs,
-                        const std::vector<std::string> &SharedLibs,
-                        unsigned Timeout,
-                        unsigned MemoryLimit) {
+Expected<int> JIT::ExecuteProgram(const std::string &Bitcode,
+                                  const std::vector<std::string> &Args,
+                                  const std::string &InputFile,
+                                  const std::string &OutputFile,
+                                  const std::vector<std::string> &CCArgs,
+                                  const std::vector<std::string> &SharedLibs,
+                                  unsigned Timeout, unsigned MemoryLimit) {
   // Construct a vector of parameters, incorporating those from the command-line
-  std::vector<const char*> JITArgs;
+  std::vector<const char *> JITArgs;
   JITArgs.push_back(LLIPath.c_str());
   JITArgs.push_back("-force-interpreter=false");
 
@@ -606,28 +570,28 @@ int JIT::ExecuteProgram(const std::string &Bitcode,
   }
   JITArgs.push_back(Bitcode.c_str());
   // Add optional parameters to the running program from Argv
-  for (unsigned i=0, e = Args.size(); i != e; ++i)
+  for (unsigned i = 0, e = Args.size(); i != e; ++i)
     JITArgs.push_back(Args[i].c_str());
   JITArgs.push_back(nullptr);
 
-  outs() << "<jit>"; outs().flush();
+  outs() << "<jit>";
+  outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i=0, e = JITArgs.size()-1; i != e; ++i)
-          errs() << " " << JITArgs[i];
-        errs() << "\n";
-        );
+        for (unsigned i = 0, e = JITArgs.size() - 1; i != e; ++i) errs()
+        << " " << JITArgs[i];
+        errs() << "\n";);
   DEBUG(errs() << "\nSending output to " << OutputFile << "\n");
-  return RunProgramWithTimeout(LLIPath, &JITArgs[0],
-      InputFile, OutputFile, OutputFile,
-      Timeout, MemoryLimit, Error);
+  return RunProgramWithTimeout(LLIPath, &JITArgs[0], InputFile, OutputFile,
+                               OutputFile, Timeout, MemoryLimit);
 }
 
 /// createJIT - Try to find the LLI executable
 ///
-AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
-                   std::string &Message, const std::vector<std::string> *Args) {
+AbstractInterpreter *
+AbstractInterpreter::createJIT(const char *Argv0, std::string &Message,
+                               const std::vector<std::string> *Args) {
   std::string LLIPath =
-      PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t) & createJIT);
+      PrependMainExecutablePath("lli", Argv0, (void *)(intptr_t)&createJIT);
   if (!LLIPath.empty()) {
     Message = "Found lli: " + LLIPath + "\n";
     return new JIT(LLIPath, Args);
@@ -641,9 +605,10 @@ AbstractInterpreter *AbstractInterpreter::createJIT(const char *Argv0,
 // CC abstraction
 //
 
-static bool IsARMArchitecture(std::vector<const char*> Args) {
-  for (std::vector<const char*>::const_iterator
-         I = Args.begin(), E = Args.end(); I != E; ++I) {
+static bool IsARMArchitecture(std::vector<const char *> Args) {
+  for (std::vector<const char *>::const_iterator I = Args.begin(),
+                                                 E = Args.end();
+       I != E; ++I) {
     if (StringRef(*I).equals_lower("-arch")) {
       ++I;
       if (I != E && StringRef(*I).startswith_lower("arm"))
@@ -654,24 +619,23 @@ static bool IsARMArchitecture(std::vector<const char*> Args) {
   return false;
 }
 
-int CC::ExecuteProgram(const std::string &ProgramFile,
-                        const std::vector<std::string> &Args,
-                        FileType fileType,
-                        const std::string &InputFile,
-                        const std::string &OutputFile,
-                        std::string *Error,
-                        const std::vector<std::string> &ArgsForCC,
-                        unsigned Timeout,
-                        unsigned MemoryLimit) {
-  std::vector<const char*> CCArgs;
+Expected<int> CC::ExecuteProgram(const std::string &ProgramFile,
+                                 const std::vector<std::string> &Args,
+                                 FileType fileType,
+                                 const std::string &InputFile,
+                                 const std::string &OutputFile,
+                                 const std::vector<std::string> &ArgsForCC,
+                                 unsigned Timeout, unsigned MemoryLimit) {
+  std::vector<const char *> CCArgs;
 
   CCArgs.push_back(CCPath.c_str());
 
   if (TargetTriple.getArch() == Triple::x86)
     CCArgs.push_back("-m32");
 
-  for (std::vector<std::string>::const_iterator
-         I = ccArgs.begin(), E = ccArgs.end(); I != E; ++I)
+  for (std::vector<std::string>::const_iterator I = ccArgs.begin(),
+                                                E = ccArgs.end();
+       I != E; ++I)
     CCArgs.push_back(I->c_str());
 
   // Specify -x explicitly in case the extension is wonky
@@ -691,7 +655,7 @@ int CC::ExecuteProgram(const std::string &ProgramFile,
     }
   }
 
-  CCArgs.push_back(ProgramFile.c_str());  // Specify the input filename.
+  CCArgs.push_back(ProgramFile.c_str()); // Specify the input filename.
 
   CCArgs.push_back("-x");
   CCArgs.push_back("none");
@@ -713,27 +677,22 @@ int CC::ExecuteProgram(const std::string &ProgramFile,
   for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i)
     CCArgs.push_back(ArgsForCC[i].c_str());
 
-  CCArgs.push_back("-lm");                // Hard-code the math library...
-  CCArgs.push_back("-O2");                // Optimize the program a bit...
-#if defined (HAVE_LINK_R)
-  CCArgs.push_back("-Wl,-R.");            // Search this dir for .so files
-#endif
+  CCArgs.push_back("-lm"); // Hard-code the math library...
+  CCArgs.push_back("-O2"); // Optimize the program a bit...
   if (TargetTriple.getArch() == Triple::sparc)
     CCArgs.push_back("-mcpu=v9");
-  CCArgs.push_back(nullptr);                    // NULL terminator
+  CCArgs.push_back(nullptr); // NULL terminator
 
-  outs() << "<CC>"; outs().flush();
+  outs() << "<CC>";
+  outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = CCArgs.size()-1; i != e; ++i)
-          errs() << " " << CCArgs[i];
-        errs() << "\n";
-        );
-  if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) {
-    *Error = ProcessFailure(CCPath, &CCArgs[0]);
-    return -1;
-  }
+        for (unsigned i = 0, e = CCArgs.size() - 1; i != e; ++i) errs()
+        << " " << CCArgs[i];
+        errs() << "\n";);
+  if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", ""))
+    return ProcessFailure(CCPath, &CCArgs[0]);
 
-  std::vector<const char*> ProgramArgs;
+  std::vector<const char *> ProgramArgs;
 
   // Declared here so that the destructor only runs after
   // ProgramArgs is used.
@@ -758,7 +717,7 @@ int CC::ExecuteProgram(const std::string &ProgramFile,
 
     // Full path to the binary. We need to cd to the exec directory because
     // there is a dylib there that the exec expects to find in the CWD
-    char* env_pwd = getenv("PWD");
+    char *env_pwd = getenv("PWD");
     Exec = "cd ";
     Exec += env_pwd;
     Exec += "; ./";
@@ -769,44 +728,44 @@ int CC::ExecuteProgram(const std::string &ProgramFile,
   // Add optional parameters to the running program from Argv
   for (unsigned i = 0, e = Args.size(); i != e; ++i)
     ProgramArgs.push_back(Args[i].c_str());
-  ProgramArgs.push_back(nullptr);                // NULL terminator
+  ProgramArgs.push_back(nullptr); // NULL terminator
 
   // Now that we have a binary, run it!
-  outs() << "<program>"; outs().flush();
+  outs() << "<program>";
+  outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = ProgramArgs.size()-1; i != e; ++i)
-          errs() << " " << ProgramArgs[i];
-        errs() << "\n";
-        );
+        for (unsigned i = 0, e = ProgramArgs.size() - 1; i != e; ++i) errs()
+        << " " << ProgramArgs[i];
+        errs() << "\n";);
 
   FileRemover OutputBinaryRemover(OutputBinary.str(), !SaveTemps);
 
   if (RemoteClientPath.empty()) {
     DEBUG(errs() << "<run locally>");
+    std::string Error;
     int ExitCode = RunProgramWithTimeout(OutputBinary.str(), &ProgramArgs[0],
                                          InputFile, OutputFile, OutputFile,
-                                         Timeout, MemoryLimit, Error);
+                                         Timeout, MemoryLimit, &Error);
     // Treat a signal (usually SIGSEGV) or timeout as part of the program output
     // so that crash-causing miscompilation is handled seamlessly.
     if (ExitCode < -1) {
       std::ofstream outFile(OutputFile.c_str(), std::ios_base::app);
-      outFile << *Error << '\n';
+      outFile << Error << '\n';
       outFile.close();
-      Error->clear();
     }
     return ExitCode;
   } else {
-    outs() << "<run remotely>"; outs().flush();
-    return RunProgramRemotelyWithTimeout(RemoteClientPath,
-        &ProgramArgs[0], InputFile, OutputFile,
-        OutputFile, Timeout, MemoryLimit);
+    outs() << "<run remotely>";
+    outs().flush();
+    return RunProgramRemotelyWithTimeout(RemoteClientPath, &ProgramArgs[0],
+                                         InputFile, OutputFile, OutputFile,
+                                         Timeout, MemoryLimit);
   }
 }
 
-int CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
-                          std::string &OutputFile,
-                          const std::vector<std::string> &ArgsForCC,
-                          std::string &Error) {
+Error CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
+                           std::string &OutputFile,
+                           const std::vector<std::string> &ArgsForCC) {
   SmallString<128> UniqueFilename;
   std::error_code EC = sys::fs::createUniqueFile(
       InputFile + "-%%%%%%%" + LTDL_SHLIB_EXT, UniqueFilename);
@@ -816,15 +775,16 @@ int CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   }
   OutputFile = UniqueFilename.str();
 
-  std::vector<const char*> CCArgs;
+  std::vector<const char *> CCArgs;
 
   CCArgs.push_back(CCPath.c_str());
 
   if (TargetTriple.getArch() == Triple::x86)
     CCArgs.push_back("-m32");
 
-  for (std::vector<std::string>::const_iterator
-         I = ccArgs.begin(), E = ccArgs.end(); I != E; ++I)
+  for (std::vector<std::string>::const_iterator I = ccArgs.begin(),
+                                                E = ccArgs.end();
+       I != E; ++I)
     CCArgs.push_back(I->c_str());
 
   // Compile the C/asm file into a shared object
@@ -833,25 +793,25 @@ int CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
     CCArgs.push_back(fileType == AsmFile ? "assembler" : "c");
   }
   CCArgs.push_back("-fno-strict-aliasing");
-  CCArgs.push_back(InputFile.c_str());   // Specify the input filename.
+  CCArgs.push_back(InputFile.c_str()); // Specify the input filename.
   CCArgs.push_back("-x");
   CCArgs.push_back("none");
   if (TargetTriple.getArch() == Triple::sparc)
-    CCArgs.push_back("-G");       // Compile a shared library, `-G' for Sparc
+    CCArgs.push_back("-G"); // Compile a shared library, `-G' for Sparc
   else if (TargetTriple.isOSDarwin()) {
     // link all source files into a single module in data segment, rather than
     // generating blocks. dynamic_lookup requires that you set
     // MACOSX_DEPLOYMENT_TARGET=10.3 in your env.  FIXME: it would be better for
     // bugpoint to just pass that in the environment of CC.
     CCArgs.push_back("-single_module");
-    CCArgs.push_back("-dynamiclib");   // `-dynamiclib' for MacOS X/PowerPC
+    CCArgs.push_back("-dynamiclib"); // `-dynamiclib' for MacOS X/PowerPC
     CCArgs.push_back("-undefined");
     CCArgs.push_back("dynamic_lookup");
   } else
-    CCArgs.push_back("-shared");  // `-shared' for Linux/X86, maybe others
+    CCArgs.push_back("-shared"); // `-shared' for Linux/X86, maybe others
 
   if (TargetTriple.getArch() == Triple::x86_64)
-    CCArgs.push_back("-fPIC");   // Requires shared objs to contain PIC
+    CCArgs.push_back("-fPIC"); // Requires shared objs to contain PIC
 
   if (TargetTriple.getArch() == Triple::sparc)
     CCArgs.push_back("-mcpu=v9");
@@ -860,36 +820,29 @@ int CC::MakeSharedObject(const std::string &InputFile, FileType fileType,
   CCArgs.push_back(OutputFile.c_str()); // Output to the right filename.
   CCArgs.push_back("-O2");              // Optimize the program a bit.
 
-
-
   // Add any arguments intended for CC. We locate them here because this is
   // most likely -L and -l options that need to come before other libraries but
   // after the source. Other options won't be sensitive to placement on the
   // command line, so this should be safe.
   for (unsigned i = 0, e = ArgsForCC.size(); i != e; ++i)
     CCArgs.push_back(ArgsForCC[i].c_str());
-  CCArgs.push_back(nullptr);                    // NULL terminator
-
+  CCArgs.push_back(nullptr); // NULL terminator
 
-
-  outs() << "<CC>"; outs().flush();
+  outs() << "<CC>";
+  outs().flush();
   DEBUG(errs() << "\nAbout to run:\t";
-        for (unsigned i = 0, e = CCArgs.size()-1; i != e; ++i)
-          errs() << " " << CCArgs[i];
-        errs() << "\n";
-        );
-  if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", "")) {
-    Error = ProcessFailure(CCPath, &CCArgs[0]);
-    return 1;
-  }
-  return 0;
+        for (unsigned i = 0, e = CCArgs.size() - 1; i != e; ++i) errs()
+        << " " << CCArgs[i];
+        errs() << "\n";);
+  if (RunProgramWithTimeout(CCPath, &CCArgs[0], "", "", ""))
+    return ProcessFailure(CCPath, &CCArgs[0]);
+  return Error::success();;
 }
 
 /// create - Try to find the CC executable
 ///
-CC *CC::create(std::string &Message,
-                 const std::string &CCBinary,
-                 const std::vector<std::string> *Args) {
+CC *CC::create(std::string &Message, const std::string &CCBinary,
+               const std::vector<std::string> *Args) {
   auto CCPath = sys::findProgramByName(CCBinary);
   if (!CCPath) {
     Message = "Cannot find `" + CCBinary + "' in PATH: " +
diff --git a/contrib/llvm/tools/bugpoint/ToolRunner.h b/contrib/llvm/tools/bugpoint/ToolRunner.h
index 3accd70ed4ce..f218ad534ee9 100644
--- a/contrib/llvm/tools/bugpoint/ToolRunner.h
+++ b/contrib/llvm/tools/bugpoint/ToolRunner.h
@@ -19,7 +19,7 @@
 
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/SystemUtils.h"
 #include <exception>
@@ -36,20 +36,21 @@ class LLC;
 // CC abstraction
 //
 class CC {
-  std::string CCPath;                // The path to the cc executable.
-  std::string RemoteClientPath;       // The path to the rsh / ssh executable.
+  std::string CCPath;              // The path to the cc executable.
+  std::string RemoteClientPath;    // The path to the rsh / ssh executable.
   std::vector<std::string> ccArgs; // CC-specific arguments.
   CC(StringRef ccPath, StringRef RemotePath,
-      const std::vector<std::string> *CCArgs)
-    : CCPath(ccPath), RemoteClientPath(RemotePath) {
-    if (CCArgs) ccArgs = *CCArgs;
+     const std::vector<std::string> *CCArgs)
+      : CCPath(ccPath), RemoteClientPath(RemotePath) {
+    if (CCArgs)
+      ccArgs = *CCArgs;
   }
+
 public:
   enum FileType { AsmFile, ObjectFile, CFile };
 
-  static CC *create(std::string &Message,
-                     const std::string &CCBinary,
-                     const std::vector<std::string> *Args);
+  static CC *create(std::string &Message, const std::string &CCBinary,
+                    const std::vector<std::string> *Args);
 
   /// ExecuteProgram - Execute the program specified by "ProgramFile" (which is
   /// either a .s file, or a .c file, specified by FileType), with the specified
@@ -58,27 +59,21 @@ public:
   /// option specifies optional native shared objects that can be loaded into
   /// the program for execution.
   ///
-  int ExecuteProgram(const std::string &ProgramFile,
-                     const std::vector<std::string> &Args,
-                     FileType fileType,
-                     const std::string &InputFile,
-                     const std::string &OutputFile,
-                     std::string *Error = nullptr,
-                     const std::vector<std::string> &CCArgs =
-                         std::vector<std::string>(),
-                     unsigned Timeout = 0,
-                     unsigned MemoryLimit = 0);
+  Expected<int> ExecuteProgram(
+      const std::string &ProgramFile, const std::vector<std::string> &Args,
+      FileType fileType, const std::string &InputFile,
+      const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0);
 
   /// MakeSharedObject - This compiles the specified file (which is either a .c
   /// file or a .s file) into a shared object.
   ///
-  int MakeSharedObject(const std::string &InputFile, FileType fileType,
-                       std::string &OutputFile,
-                       const std::vector<std::string> &ArgsForCC,
-                       std::string &Error);
+  Error MakeSharedObject(const std::string &InputFile, FileType fileType,
+                         std::string &OutputFile,
+                         const std::vector<std::string> &ArgsForCC);
 };
 
-
 //===---------------------------------------------------------------------===//
 /// AbstractInterpreter Class - Subclasses of this class are used to execute
 /// LLVM bitcode in a variety of ways.  This abstract interface hides this
@@ -86,66 +81,63 @@ public:
 ///
 class AbstractInterpreter {
   virtual void anchor();
+
 public:
   static LLC *createLLC(const char *Argv0, std::string &Message,
-                        const std::string              &CCBinary,
+                        const std::string &CCBinary,
                         const std::vector<std::string> *Args = nullptr,
                         const std::vector<std::string> *CCArgs = nullptr,
                         bool UseIntegratedAssembler = false);
 
-  static AbstractInterpreter*
+  static AbstractInterpreter *
   createLLI(const char *Argv0, std::string &Message,
             const std::vector<std::string> *Args = nullptr);
 
-  static AbstractInterpreter*
+  static AbstractInterpreter *
   createJIT(const char *Argv0, std::string &Message,
             const std::vector<std::string> *Args = nullptr);
 
-  static AbstractInterpreter*
+  static AbstractInterpreter *
   createCustomCompiler(std::string &Message,
                        const std::string &CompileCommandLine);
 
-  static AbstractInterpreter*
+  static AbstractInterpreter *
   createCustomExecutor(std::string &Message,
                        const std::string &ExecCommandLine);
 
-
   virtual ~AbstractInterpreter() {}
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
   /// the code generator.  It returns false if the code generator fails.
-  virtual void compileProgram(const std::string &Bitcode, std::string *Error,
-                              unsigned Timeout = 0, unsigned MemoryLimit = 0) {}
-
-  /// OutputCode - Compile the specified program from bitcode to code
-  /// understood by the CC driver (either C or asm).  If the code generator
-  /// fails, it sets Error, otherwise, this function returns the type of code
-  /// emitted.
-  virtual CC::FileType OutputCode(const std::string &Bitcode,
-                                   std::string &OutFile, std::string &Error,
-                                   unsigned Timeout = 0,
-                                   unsigned MemoryLimit = 0) {
-    Error = "OutputCode not supported by this AbstractInterpreter!";
-    return CC::AsmFile;
+  virtual Error compileProgram(const std::string &Bitcode, unsigned Timeout = 0,
+                               unsigned MemoryLimit = 0) {
+    return Error::success();
+  }
+
+  /// Compile the specified program from bitcode to code understood by the CC
+  /// driver (either C or asm).  Returns an error if the code generator fails,,
+  /// otherwise, the type of code emitted.
+  virtual Expected<CC::FileType> OutputCode(const std::string &Bitcode,
+                                            std::string &OutFile,
+                                            unsigned Timeout = 0,
+                                            unsigned MemoryLimit = 0) {
+    return make_error<StringError>(
+        "OutputCode not supported by this AbstractInterpreter!",
+        inconvertibleErrorCode());
   }
 
   /// ExecuteProgram - Run the specified bitcode file, emitting output to the
   /// specified filename.  This sets RetVal to the exit code of the program or
-  /// returns false if a problem was encountered that prevented execution of
+  /// returns an Error if a problem was encountered that prevented execution of
   /// the program.
   ///
-  virtual int ExecuteProgram(const std::string &Bitcode,
-                             const std::vector<std::string> &Args,
-                             const std::string &InputFile,
-                             const std::string &OutputFile,
-                             std::string *Error,
-                             const std::vector<std::string> &CCArgs =
-                               std::vector<std::string>(),
-                             const std::vector<std::string> &SharedLibs =
-                               std::vector<std::string>(),
-                             unsigned Timeout = 0,
-                             unsigned MemoryLimit = 0) = 0;
+  virtual Expected<int> ExecuteProgram(
+      const std::string &Bitcode, const std::vector<std::string> &Args,
+      const std::string &InputFile, const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs = std::vector<std::string>(),
+      const std::vector<std::string> &SharedLibs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0) = 0;
 };
 
 //===---------------------------------------------------------------------===//
@@ -156,43 +148,34 @@ class LLC : public AbstractInterpreter {
   std::vector<std::string> ToolArgs; // Extra args to pass to LLC.
   CC *cc;
   bool UseIntegratedAssembler;
+
 public:
-  LLC(const std::string &llcPath, CC *cc,
-      const std::vector<std::string> *Args,
+  LLC(const std::string &llcPath, CC *cc, const std::vector<std::string> *Args,
       bool useIntegratedAssembler)
-    : LLCPath(llcPath), cc(cc),
-      UseIntegratedAssembler(useIntegratedAssembler) {
+      : LLCPath(llcPath), cc(cc),
+        UseIntegratedAssembler(useIntegratedAssembler) {
     ToolArgs.clear();
-    if (Args) ToolArgs = *Args;
+    if (Args)
+      ToolArgs = *Args;
   }
   ~LLC() override { delete cc; }
 
   /// compileProgram - Compile the specified program from bitcode to executable
   /// code.  This does not produce any output, it is only used when debugging
   /// the code generator.  Returns false if the code generator fails.
-  void compileProgram(const std::string &Bitcode, std::string *Error,
-                      unsigned Timeout = 0, unsigned MemoryLimit = 0) override;
-
-  int ExecuteProgram(const std::string &Bitcode,
-                     const std::vector<std::string> &Args,
-                     const std::string &InputFile,
-                     const std::string &OutputFile,
-                     std::string *Error,
-                     const std::vector<std::string> &CCArgs =
-                       std::vector<std::string>(),
-                     const std::vector<std::string> &SharedLibs =
-                        std::vector<std::string>(),
-                     unsigned Timeout = 0,
-                     unsigned MemoryLimit = 0) override;
-
-  /// OutputCode - Compile the specified program from bitcode to code
-  /// understood by the CC driver (either C or asm).  If the code generator
-  /// fails, it sets Error, otherwise, this function returns the type of code
-  /// emitted.
-  CC::FileType OutputCode(const std::string &Bitcode,
-                           std::string &OutFile, std::string &Error,
-                           unsigned Timeout = 0,
-                           unsigned MemoryLimit = 0) override;
+  Error compileProgram(const std::string &Bitcode, unsigned Timeout = 0,
+                       unsigned MemoryLimit = 0) override;
+
+  Expected<int> ExecuteProgram(
+      const std::string &Bitcode, const std::vector<std::string> &Args,
+      const std::string &InputFile, const std::string &OutputFile,
+      const std::vector<std::string> &CCArgs = std::vector<std::string>(),
+      const std::vector<std::string> &SharedLibs = std::vector<std::string>(),
+      unsigned Timeout = 0, unsigned MemoryLimit = 0) override;
+
+  Expected<CC::FileType> OutputCode(const std::string &Bitcode,
+                                    std::string &OutFile, unsigned Timeout = 0,
+                                    unsigned MemoryLimit = 0) override;
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/tools/bugpoint/bugpoint.cpp b/contrib/llvm/tools/bugpoint/bugpoint.cpp
index 28565f1daac3..a5de953b2b75 100644
--- a/contrib/llvm/tools/bugpoint/bugpoint.cpp
+++ b/contrib/llvm/tools/bugpoint/bugpoint.cpp
@@ -27,82 +27,85 @@
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/Valgrind.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 
-//Enable this macro to debug bugpoint itself.
+// Enable this macro to debug bugpoint itself.
 //#define DEBUG_BUGPOINT 1
 
 using namespace llvm;
 
 static cl::opt<bool>
-FindBugs("find-bugs", cl::desc("Run many different optimization sequences "
-                               "on program to find bugs"), cl::init(false));
+    FindBugs("find-bugs", cl::desc("Run many different optimization sequences "
+                                   "on program to find bugs"),
+             cl::init(false));
 
 static cl::list<std::string>
-InputFilenames(cl::Positional, cl::OneOrMore,
-               cl::desc("<input llvm ll/bc files>"));
+    InputFilenames(cl::Positional, cl::OneOrMore,
+                   cl::desc("<input llvm ll/bc files>"));
 
-static cl::opt<unsigned>
-TimeoutValue("timeout", cl::init(300), cl::value_desc("seconds"),
-             cl::desc("Number of seconds program is allowed to run before it "
-                      "is killed (default is 300s), 0 disables timeout"));
+static cl::opt<unsigned> TimeoutValue(
+    "timeout", cl::init(300), cl::value_desc("seconds"),
+    cl::desc("Number of seconds program is allowed to run before it "
+             "is killed (default is 300s), 0 disables timeout"));
 
 static cl::opt<int>
-MemoryLimit("mlimit", cl::init(-1), cl::value_desc("MBytes"),
-            cl::desc("Maximum amount of memory to use. 0 disables check."
-                     " Defaults to 400MB (800MB under valgrind)."));
+    MemoryLimit("mlimit", cl::init(-1), cl::value_desc("MBytes"),
+                cl::desc("Maximum amount of memory to use. 0 disables check."
+                         " Defaults to 400MB (800MB under valgrind)."));
 
 static cl::opt<bool>
-UseValgrind("enable-valgrind",
-            cl::desc("Run optimizations through valgrind"));
+    UseValgrind("enable-valgrind",
+                cl::desc("Run optimizations through valgrind"));
 
 // The AnalysesList is automatically populated with registered Passes by the
 // PassNameParser.
 //
-static cl::list<const PassInfo*, bool, PassNameParser>
-PassList(cl::desc("Passes available:"), cl::ZeroOrMore);
+static cl::list<const PassInfo *, bool, PassNameParser>
+    PassList(cl::desc("Passes available:"), cl::ZeroOrMore);
 
 static cl::opt<bool>
-StandardLinkOpts("std-link-opts",
-                 cl::desc("Include the standard link time optimizations"));
+    StandardLinkOpts("std-link-opts",
+                     cl::desc("Include the standard link time optimizations"));
 
 static cl::opt<bool>
-OptLevelO1("O1",
-           cl::desc("Optimization level 1. Identical to 'opt -O1'"));
+    OptLevelO1("O1", cl::desc("Optimization level 1. Identical to 'opt -O1'"));
 
 static cl::opt<bool>
-OptLevelO2("O2",
-           cl::desc("Optimization level 2. Identical to 'opt -O2'"));
+    OptLevelO2("O2", cl::desc("Optimization level 2. Identical to 'opt -O2'"));
+
+static cl::opt<bool> OptLevelOs(
+    "Os",
+    cl::desc(
+        "Like -O2 with extra optimizations for size. Similar to clang -Os"));
 
 static cl::opt<bool>
-OptLevelO3("O3",
-           cl::desc("Optimization level 3. Identical to 'opt -O3'"));
+    OptLevelO3("O3", cl::desc("Optimization level 3. Identical to 'opt -O3'"));
 
 static cl::opt<std::string>
-OverrideTriple("mtriple", cl::desc("Override target triple for module"));
+    OverrideTriple("mtriple", cl::desc("Override target triple for module"));
 
 /// BugpointIsInterrupted - Set to true when the user presses ctrl-c.
 bool llvm::BugpointIsInterrupted = false;
 
 #ifndef DEBUG_BUGPOINT
-static void BugpointInterruptFunction() {
-  BugpointIsInterrupted = true;
-}
+static void BugpointInterruptFunction() { BugpointIsInterrupted = true; }
 #endif
 
 // Hack to capture a pass list.
 namespace {
-  class AddToDriver : public legacy::FunctionPassManager {
-    BugDriver &D;
-  public:
-    AddToDriver(BugDriver &_D) : FunctionPassManager(nullptr), D(_D) {}
-
-    void add(Pass *P) override {
-      const void *ID = P->getPassID();
-      const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
-      D.addPass(PI->getPassArgument());
-    }
-  };
+class AddToDriver : public legacy::FunctionPassManager {
+  BugDriver &D;
+
+public:
+  AddToDriver(BugDriver &_D) : FunctionPassManager(nullptr), D(_D) {}
+
+  void add(Pass *P) override {
+    const void *ID = P->getPassID();
+    const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(ID);
+    D.addPass(PI->getPassArgument());
+  }
+};
 }
 
 #ifdef LINK_POLLY_INTO_TOOLS
@@ -115,7 +118,7 @@ int main(int argc, char **argv) {
 #ifndef DEBUG_BUGPOINT
   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
   llvm::PrettyStackTraceProgram X(argc, argv);
-  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
 #endif
 
   // Initialize passes
@@ -160,9 +163,10 @@ int main(int argc, char **argv) {
       MemoryLimit = 400;
   }
 
-  BugDriver D(argv[0], FindBugs, TimeoutValue, MemoryLimit,
-              UseValgrind, Context);
-  if (D.addSources(InputFilenames)) return 1;
+  BugDriver D(argv[0], FindBugs, TimeoutValue, MemoryLimit, UseValgrind,
+              Context);
+  if (D.addSources(InputFilenames))
+    return 1;
 
   AddToDriver PM(D);
 
@@ -175,9 +179,9 @@ int main(int argc, char **argv) {
   if (OptLevelO1 || OptLevelO2 || OptLevelO3) {
     PassManagerBuilder Builder;
     if (OptLevelO1)
-      Builder.Inliner = createAlwaysInlinerPass();
-    else if (OptLevelO2)
-      Builder.Inliner = createFunctionInliningPass(225);
+      Builder.Inliner = createAlwaysInlinerLegacyPass();
+    else if (OptLevelOs || OptLevelO2)
+      Builder.Inliner = createFunctionInliningPass(2, OptLevelOs ? 1 : 0);
     else
       Builder.Inliner = createFunctionInliningPass(275);
     Builder.populateFunctionPassManager(PM);
@@ -187,17 +191,15 @@ int main(int argc, char **argv) {
   for (const PassInfo *PI : PassList)
     D.addPass(PI->getPassArgument());
 
-  // Bugpoint has the ability of generating a plethora of core files, so to
-  // avoid filling up the disk, we prevent it
+// Bugpoint has the ability of generating a plethora of core files, so to
+// avoid filling up the disk, we prevent it
 #ifndef DEBUG_BUGPOINT
   sys::Process::PreventCoreFiles();
 #endif
 
-  std::string Error;
-  bool Failure = D.run(Error);
-  if (!Error.empty()) {
-    errs() << Error;
+  if (Error E = D.run()) {
+    errs() << toString(std::move(E));
     return 1;
   }
-  return Failure;
+  return 0;
 }
diff --git a/contrib/llvm/tools/llc/llc.cpp b/contrib/llvm/tools/llc/llc.cpp
index 9b6086ad4ea8..aa0beb45b4e9 100644
--- a/contrib/llvm/tools/llc/llc.cpp
+++ b/contrib/llvm/tools/llc/llc.cpp
@@ -118,6 +118,22 @@ static cl::opt<bool> DiscardValueNames(
     cl::desc("Discard names from Value (other than GlobalValue)."),
     cl::init(false), cl::Hidden);
 
+static cl::opt<std::string> StopBefore("stop-before",
+    cl::desc("Stop compilation before a specific pass"),
+    cl::value_desc("pass-name"), cl::init(""));
+
+static cl::opt<std::string> StopAfter("stop-after",
+    cl::desc("Stop compilation after a specific pass"),
+    cl::value_desc("pass-name"), cl::init(""));
+
+static cl::opt<std::string> StartBefore("start-before",
+    cl::desc("Resume compilation before a specific pass"),
+    cl::value_desc("pass-name"), cl::init(""));
+
+static cl::opt<std::string> StartAfter("start-after",
+    cl::desc("Resume compilation after a specific pass"),
+    cl::value_desc("pass-name"), cl::init(""));
+
 namespace {
 static ManagedStatic<std::vector<std::string>> RunPassNames;
 
@@ -246,7 +262,9 @@ int main(int argc, char **argv) {
   initializeCodeGen(*Registry);
   initializeLoopStrengthReducePass(*Registry);
   initializeLowerIntrinsicsPass(*Registry);
+  initializeCountingFunctionInserterPass(*Registry);
   initializeUnreachableBlockElimLegacyPassPass(*Registry);
+  initializeConstantHoistingLegacyPassPass(*Registry);
 
   // Register the target printer for --version.
   cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
@@ -295,6 +313,20 @@ static bool addPass(PassManagerBase &PM, const char *argv0,
   return false;
 }
 
+static AnalysisID getPassID(const char *argv0, const char *OptionName,
+                            StringRef PassName) {
+  if (PassName.empty())
+    return nullptr;
+
+  const PassRegistry &PR = *PassRegistry::getPassRegistry();
+  const PassInfo *PI = PR.getPassInfo(PassName);
+  if (!PI) {
+    errs() << argv0 << ": " << OptionName << " pass is not registered.\n";
+    exit(1);
+  }
+  return PI->getTypeInfo();
+}
+
 static int compileModule(char **argv, LLVMContext &Context) {
   // Load the module to be compiled...
   SMDiagnostic Err;
@@ -425,12 +457,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
       OS = BOS.get();
     }
 
-    AnalysisID StartBeforeID = nullptr;
-    AnalysisID StartAfterID = nullptr;
-    AnalysisID StopAfterID = nullptr;
-    const PassRegistry *PR = PassRegistry::getPassRegistry();
     if (!RunPassNames->empty()) {
-      if (!StartAfter.empty() || !StopAfter.empty()) {
+      if (!StartAfter.empty() || !StopAfter.empty() || !StartBefore.empty() ||
+          !StopBefore.empty()) {
         errs() << argv[0] << ": start-after and/or stop-after passes are "
                              "redundant when run-pass is specified.\n";
         return 1;
@@ -442,8 +471,9 @@ static int compileModule(char **argv, LLVMContext &Context) {
       LLVMTargetMachine &LLVMTM = static_cast<LLVMTargetMachine&>(*Target);
       TargetPassConfig &TPC = *LLVMTM.createPassConfig(PM);
       PM.add(&TPC);
-      LLVMTM.addMachineModuleInfo(PM);
-      LLVMTM.addMachineFunctionAnalysis(PM, MIR.get());
+      MachineModuleInfo *MMI = new MachineModuleInfo(&LLVMTM);
+      MMI->setMachineFunctionInitializer(MIR.get());
+      PM.add(MMI);
       TPC.printAndVerify("");
 
       for (const std::string &RunPassName : *RunPassNames) {
@@ -452,27 +482,25 @@ static int compileModule(char **argv, LLVMContext &Context) {
       }
       PM.add(createPrintMIRPass(*OS));
     } else {
-      if (!StartAfter.empty()) {
-        const PassInfo *PI = PR->getPassInfo(StartAfter);
-        if (!PI) {
-          errs() << argv[0] << ": start-after pass is not registered.\n";
-          return 1;
-        }
-        StartAfterID = PI->getTypeInfo();
+      const char *argv0 = argv[0];
+      AnalysisID StartBeforeID = getPassID(argv0, "start-before", StartBefore);
+      AnalysisID StartAfterID = getPassID(argv0, "start-after", StartAfter);
+      AnalysisID StopAfterID = getPassID(argv0, "stop-after", StopAfter);
+      AnalysisID StopBeforeID = getPassID(argv0, "stop-before", StopBefore);
+
+      if (StartBeforeID && StartAfterID) {
+        errs() << argv[0] << ": -start-before and -start-after specified!\n";
+        return 1;
       }
-      if (!StopAfter.empty()) {
-        const PassInfo *PI = PR->getPassInfo(StopAfter);
-        if (!PI) {
-          errs() << argv[0] << ": stop-after pass is not registered.\n";
-          return 1;
-        }
-        StopAfterID = PI->getTypeInfo();
+      if (StopBeforeID && StopAfterID) {
+        errs() << argv[0] << ": -stop-before and -stop-after specified!\n";
+        return 1;
       }
 
       // Ask the target to add backend passes as necessary.
       if (Target->addPassesToEmitFile(PM, *OS, FileType, NoVerify,
-                                      StartBeforeID, StartAfterID, StopAfterID,
-                                      MIR.get())) {
+                                      StartBeforeID, StartAfterID, StopBeforeID,
+                                      StopAfterID, MIR.get())) {
         errs() << argv[0] << ": target does not support generation of this"
                << " file type!\n";
         return 1;
diff --git a/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp b/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp
index f6d2413655ea..77b1d47a9466 100644
--- a/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp
+++ b/contrib/llvm/tools/lli/ChildTarget/ChildTarget.cpp
@@ -53,23 +53,12 @@ int main(int argc, char *argv[]) {
     RTDyldMemoryManager::deregisterEHFramesInProcess(Addr, Size);
   };
 
-  FDRPCChannel Channel(InFD, OutFD);
-  typedef remote::OrcRemoteTargetServer<FDRPCChannel, HostOrcArch> JITServer;
+  FDRawChannel Channel(InFD, OutFD);
+  typedef remote::OrcRemoteTargetServer<FDRawChannel, HostOrcArch> JITServer;
   JITServer Server(Channel, SymbolLookup, RegisterEHFrames, DeregisterEHFrames);
 
-  while (1) {
-    uint32_t RawId;
-    ExitOnErr(Server.startReceivingFunction(Channel, RawId));
-    auto Id = static_cast<JITServer::JITFuncId>(RawId);
-    switch (Id) {
-    case JITServer::TerminateSessionId:
-      ExitOnErr(Server.handleTerminateSession());
-      return 0;
-    default:
-      ExitOnErr(Server.handleKnownFunction(Id));
-      break;
-    }
-  }
+  while (!Server.receivedTerminate())
+    ExitOnErr(Server.handleOne());
 
   close(InFD);
   close(OutFD);
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.cpp b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
index b13e7696627f..ec61ce5e1545 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.cpp
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.cpp
@@ -18,7 +18,7 @@ using namespace llvm;
 
 namespace {
 
-  enum class DumpKind { NoDump, DumpFuncsToStdOut, DumpModsToStdErr,
+  enum class DumpKind { NoDump, DumpFuncsToStdOut, DumpModsToStdOut,
                         DumpModsToDisk };
 
   cl::opt<DumpKind> OrcDumpKind("orc-lazy-debug",
@@ -30,15 +30,14 @@ namespace {
                                   clEnumValN(DumpKind::DumpFuncsToStdOut,
                                              "funcs-to-stdout",
                                              "Dump function names to stdout."),
-                                  clEnumValN(DumpKind::DumpModsToStdErr,
-                                             "mods-to-stderr",
-                                             "Dump modules to stderr."),
+                                  clEnumValN(DumpKind::DumpModsToStdOut,
+                                             "mods-to-stdout",
+                                             "Dump modules to stdout."),
                                   clEnumValN(DumpKind::DumpModsToDisk,
                                              "mods-to-disk",
                                              "Dump modules to the current "
                                              "working directory. (WARNING: "
-                                             "will overwrite existing files)."),
-                                  clEnumValEnd),
+                                             "will overwrite existing files).")),
                                 cl::Hidden);
 
   cl::opt<bool> OrcInlineStubs("orc-lazy-inline-stubs",
@@ -71,9 +70,9 @@ OrcLazyJIT::TransformFtor OrcLazyJIT::createDebugDumper() {
       return M;
     };
 
-  case DumpKind::DumpModsToStdErr:
+  case DumpKind::DumpModsToStdOut:
     return [](std::unique_ptr<Module> M) {
-             dbgs() << "----- Module Start -----\n" << *M
+             outs() << "----- Module Start -----\n" << *M
                     << "----- Module End -----\n";
 
              return M;
@@ -101,11 +100,12 @@ CodeGenOpt::Level getOptLevel();
 
 
 template <typename PtrTy>
-static PtrTy fromTargetAddress(orc::TargetAddress Addr) {
+static PtrTy fromTargetAddress(JITTargetAddress Addr) {
   return reinterpret_cast<PtrTy>(static_cast<uintptr_t>(Addr));
 }
 
-int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
+int llvm::runOrcLazyJIT(std::vector<std::unique_ptr<Module>> Ms,
+                        const std::vector<std::string> &Args) {
   // Add the program's symbols into the JIT's search space.
   if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
     errs() << "Error loading program symbols.\n";
@@ -143,16 +143,19 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
                OrcInlineStubs);
 
   // Add the module, look up main and run it.
-  auto MainHandle = J.addModule(std::move(M));
-  auto MainSym = J.findSymbolIn(MainHandle, "main");
+  J.addModuleSet(std::move(Ms));
+  auto MainSym = J.findSymbol("main");
 
   if (!MainSym) {
     errs() << "Could not find main function.\n";
     return 1;
   }
 
-  typedef int (*MainFnPtr)(int, char*[]);
+  typedef int (*MainFnPtr)(int, const char*[]);
+  std::vector<const char *> ArgV;
+  for (auto &Arg : Args)
+    ArgV.push_back(Arg.c_str());
   auto Main = fromTargetAddress<MainFnPtr>(MainSym.getAddress());
-  return Main(ArgC, ArgV);
+  return Main(ArgV.size(), (const char**)ArgV.data());
 }
 
diff --git a/contrib/llvm/tools/lli/OrcLazyJIT.h b/contrib/llvm/tools/lli/OrcLazyJIT.h
index 733bdd8c04d5..05319c345484 100644
--- a/contrib/llvm/tools/lli/OrcLazyJIT.h
+++ b/contrib/llvm/tools/lli/OrcLazyJIT.h
@@ -38,7 +38,7 @@ public:
   typedef orc::CompileOnDemandLayer<IRDumpLayerT, CompileCallbackMgr> CODLayerT;
   typedef CODLayerT::IndirectStubsManagerBuilderT
     IndirectStubsManagerBuilder;
-  typedef CODLayerT::ModuleSetHandleT ModuleHandleT;
+  typedef CODLayerT::ModuleSetHandleT ModuleSetHandleT;
 
   OrcLazyJIT(std::unique_ptr<TargetMachine> TM,
              std::unique_ptr<CompileCallbackMgr> CCMgr,
@@ -62,18 +62,35 @@ public:
       DtorRunner.runViaLayer(CODLayer);
   }
 
-  ModuleHandleT addModule(std::unique_ptr<Module> M) {
-    // Attach a data-layout if one isn't already present.
-    if (M->getDataLayout().isDefault())
-      M->setDataLayout(DL);
+  ModuleSetHandleT addModuleSet(std::vector<std::unique_ptr<Module>> Ms) {
+    // Attach a data-layouts if they aren't already present.
+    for (auto &M : Ms)
+      if (M->getDataLayout().isDefault())
+        M->setDataLayout(DL);
 
-    // Record the static constructors and destructors. We have to do this before
-    // we hand over ownership of the module to the JIT.
+    // Rename, bump linkage and record static constructors and destructors.
+    // We have to do this before we hand over ownership of the module to the
+    // JIT.
     std::vector<std::string> CtorNames, DtorNames;
-    for (auto Ctor : orc::getConstructors(*M))
-      CtorNames.push_back(mangle(Ctor.Func->getName()));
-    for (auto Dtor : orc::getDestructors(*M))
-      DtorNames.push_back(mangle(Dtor.Func->getName()));
+    {
+      unsigned CtorId = 0, DtorId = 0;
+      for (auto &M : Ms) {
+        for (auto Ctor : orc::getConstructors(*M)) {
+          std::string NewCtorName = ("$static_ctor." + Twine(CtorId++)).str();
+          Ctor.Func->setName(NewCtorName);
+          Ctor.Func->setLinkage(GlobalValue::ExternalLinkage);
+          Ctor.Func->setVisibility(GlobalValue::HiddenVisibility);
+          CtorNames.push_back(mangle(NewCtorName));
+        }
+        for (auto Dtor : orc::getDestructors(*M)) {
+          std::string NewDtorName = ("$static_dtor." + Twine(DtorId++)).str();
+          Dtor.Func->setLinkage(GlobalValue::ExternalLinkage);
+          Dtor.Func->setVisibility(GlobalValue::HiddenVisibility);
+          DtorNames.push_back(mangle(Dtor.Func->getName()));
+          Dtor.Func->setName(NewDtorName);
+        }
+      }
+    }
 
     // Symbol resolution order:
     //   1) Search the JIT symbols.
@@ -81,27 +98,21 @@ public:
     //   3) Search the host process (LLI)'s symbol table.
     auto Resolver =
       orc::createLambdaResolver(
-        [this](const std::string &Name) {
+        [this](const std::string &Name) -> JITSymbol {
           if (auto Sym = CODLayer.findSymbol(Name, true))
-            return Sym.toRuntimeDyldSymbol();
-          if (auto Sym = CXXRuntimeOverrides.searchOverrides(Name))
             return Sym;
-
-          if (auto Addr =
-              RTDyldMemoryManager::getSymbolAddressInProcess(Name))
-            return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
-
-          return RuntimeDyld::SymbolInfo(nullptr);
+          return CXXRuntimeOverrides.searchOverrides(Name);
         },
         [](const std::string &Name) {
-          return RuntimeDyld::SymbolInfo(nullptr);
+          if (auto Addr =
+              RTDyldMemoryManager::getSymbolAddressInProcess(Name))
+            return JITSymbol(Addr, JITSymbolFlags::Exported);
+          return JITSymbol(nullptr);
         }
       );
 
     // Add the module to the JIT.
-    std::vector<std::unique_ptr<Module>> S;
-    S.push_back(std::move(M));
-    auto H = CODLayer.addModuleSet(std::move(S),
+    auto H = CODLayer.addModuleSet(std::move(Ms),
 				   llvm::make_unique<SectionMemoryManager>(),
 				   std::move(Resolver));
 
@@ -115,11 +126,11 @@ public:
     return H;
   }
 
-  orc::JITSymbol findSymbol(const std::string &Name) {
+  JITSymbol findSymbol(const std::string &Name) {
     return CODLayer.findSymbol(mangle(Name), true);
   }
 
-  orc::JITSymbol findSymbolIn(ModuleHandleT H, const std::string &Name) {
+  JITSymbol findSymbolIn(ModuleSetHandleT H, const std::string &Name) {
     return CODLayer.findSymbolIn(H, mangle(Name), true);
   }
 
@@ -156,7 +167,8 @@ private:
   std::vector<orc::CtorDtorRunner<CODLayerT>> IRStaticDestructorRunners;
 };
 
-int runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]);
+int runOrcLazyJIT(std::vector<std::unique_ptr<Module>> Ms,
+                  const std::vector<std::string> &Args);
 
 } // end namespace llvm
 
diff --git a/contrib/llvm/tools/lli/RemoteJITUtils.h b/contrib/llvm/tools/lli/RemoteJITUtils.h
index 15068d2faf6f..89a514202567 100644
--- a/contrib/llvm/tools/lli/RemoteJITUtils.h
+++ b/contrib/llvm/tools/lli/RemoteJITUtils.h
@@ -14,7 +14,7 @@
 #ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
 #define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
 
-#include "llvm/ExecutionEngine/Orc/RPCChannel.h"
+#include "llvm/ExecutionEngine/Orc/RawByteChannel.h"
 #include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
 #include <mutex>
 
@@ -25,9 +25,9 @@
 #endif
 
 /// RPC channel that reads from and writes from file descriptors.
-class FDRPCChannel final : public llvm::orc::remote::RPCChannel {
+class FDRawChannel final : public llvm::orc::rpc::RawByteChannel {
 public:
-  FDRPCChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
+  FDRawChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
 
   llvm::Error readBytes(char *Dst, unsigned Size) override {
     assert(Dst && "Attempt to read into null.");
@@ -72,18 +72,19 @@ private:
 };
 
 // launch the remote process (see lli.cpp) and return a channel to it.
-std::unique_ptr<FDRPCChannel> launchRemote();
+std::unique_ptr<FDRawChannel> launchRemote();
 
 namespace llvm {
 
-// ForwardingMM - Adapter to connect MCJIT to Orc's Remote memory manager.
+// ForwardingMM - Adapter to connect MCJIT to Orc's Remote8
+// memory manager.
 class ForwardingMemoryManager : public llvm::RTDyldMemoryManager {
 public:
   void setMemMgr(std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr) {
     this->MemMgr = std::move(MemMgr);
   }
 
-  void setResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver) {
+  void setResolver(std::unique_ptr<JITSymbolResolver> Resolver) {
     this->Resolver = std::move(Resolver);
   }
 
@@ -134,18 +135,18 @@ public:
   // Don't hide the sibling notifyObjectLoaded from RTDyldMemoryManager.
   using RTDyldMemoryManager::notifyObjectLoaded;
 
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+  JITSymbol findSymbol(const std::string &Name) override {
     return Resolver->findSymbol(Name);
   }
 
-  RuntimeDyld::SymbolInfo
+  JITSymbol
   findSymbolInLogicalDylib(const std::string &Name) override {
     return Resolver->findSymbolInLogicalDylib(Name);
   }
 
 private:
   std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr;
-  std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver;
+  std::unique_ptr<JITSymbolResolver> Resolver;
 };
 }
 
diff --git a/contrib/llvm/tools/lli/lli.cpp b/contrib/llvm/tools/lli/lli.cpp
index 92de5da47216..0823ff469de6 100644
--- a/contrib/llvm/tools/lli/lli.cpp
+++ b/contrib/llvm/tools/lli/lli.cpp
@@ -18,7 +18,7 @@
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
 #include "llvm/ExecutionEngine/Interpreter.h"
@@ -91,8 +91,7 @@ namespace {
                                            "Orc-based MCJIT replacement"),
                                 clEnumValN(JITKind::OrcLazy,
                                            "orc-lazy",
-                                           "Orc-based lazy JIT."),
-                                clEnumValEnd));
+                                           "Orc-based lazy JIT.")));
 
   // The MCJIT supports building for a target address space separate from
   // the JIT compilation process. Use a forked process and a copying
@@ -194,8 +193,7 @@ namespace {
           clEnumValN(Reloc::PIC_, "pic",
                      "Fully relocatable, position independent code"),
           clEnumValN(Reloc::DynamicNoPIC, "dynamic-no-pic",
-                     "Relocatable external references, non-relocatable code"),
-          clEnumValEnd));
+                     "Relocatable external references, non-relocatable code")));
 
   cl::opt<llvm::CodeModel::Model>
   CMModel("code-model",
@@ -210,8 +208,7 @@ namespace {
                      clEnumValN(CodeModel::Medium, "medium",
                                 "Medium code model"),
                      clEnumValN(CodeModel::Large, "large",
-                                "Large code model"),
-                     clEnumValEnd));
+                                "Large code model")));
 
   cl::opt<bool>
   GenerateSoftFloatCalls("soft-float",
@@ -228,8 +225,7 @@ namespace {
                      clEnumValN(FloatABI::Soft, "soft",
                                 "Soft float ABI (implied by -soft-float)"),
                      clEnumValN(FloatABI::Hard, "hard",
-                                "Hard float ABI (uses FP registers)"),
-                     clEnumValEnd));
+                                "Hard float ABI (uses FP registers)")));
 
   ExitOnError ExitOnErr;
 }
@@ -275,7 +271,7 @@ public:
       return nullptr;
     // Load the object from the cache filename
     ErrorOr<std::unique_ptr<MemoryBuffer>> IRObjectBuffer =
-        MemoryBuffer::getFile(CacheName.c_str(), -1, false);
+        MemoryBuffer::getFile(CacheName, -1, false);
     // If the file isn't there, that's OK.
     if (!IRObjectBuffer)
       return nullptr;
@@ -310,7 +306,7 @@ private:
 };
 
 // On Mingw and Cygwin, an external symbol named '__main' is called from the
-// generated 'main' function to allow static intialization.  To avoid linking
+// generated 'main' function to allow static initialization.  To avoid linking
 // problems with remote targets (because lli's remote target support does not
 // currently handle external linking) we add a secondary module which defines
 // an empty '__main' function.
@@ -361,6 +357,12 @@ CodeGenOpt::Level getOptLevel() {
   llvm_unreachable("Unrecognized opt level.");
 }
 
+LLVM_ATTRIBUTE_NORETURN
+static void reportError(SMDiagnostic Err, const char *ProgName) {
+  Err.print(ProgName, errs());
+  exit(1);
+}
+
 //===----------------------------------------------------------------------===//
 // main Driver function
 //
@@ -392,14 +394,24 @@ int main(int argc, char **argv, char * const *envp) {
   SMDiagnostic Err;
   std::unique_ptr<Module> Owner = parseIRFile(InputFile, Err, Context);
   Module *Mod = Owner.get();
-  if (!Mod) {
-    Err.print(argv[0], errs());
-    return 1;
+  if (!Mod)
+    reportError(Err, argv[0]);
+
+  if (UseJITKind == JITKind::OrcLazy) {
+    std::vector<std::unique_ptr<Module>> Ms;
+    Ms.push_back(std::move(Owner));
+    for (auto &ExtraMod : ExtraModules) {
+      Ms.push_back(parseIRFile(ExtraMod, Err, Context));
+      if (!Ms.back())
+        reportError(Err, argv[0]);
+    }
+    std::vector<std::string> Args;
+    Args.push_back(InputFile);
+    for (auto &Arg : InputArgv)
+      Args.push_back(Arg);
+    return runOrcLazyJIT(std::move(Ms), Args);
   }
 
-  if (UseJITKind == JITKind::OrcLazy)
-    return runOrcLazyJIT(std::move(Owner), argc, argv);
-
   if (EnableCacheManager) {
     std::string CacheName("file:");
     CacheName.append(InputFile);
@@ -408,11 +420,10 @@ int main(int argc, char **argv, char * const *envp) {
 
   // If not jitting lazily, load the whole bitcode file eagerly too.
   if (NoLazyCompilation) {
-    if (std::error_code EC = Mod->materializeAll()) {
-      errs() << argv[0] << ": bitcode didn't read correctly.\n";
-      errs() << "Reason: " << EC.message() << "\n";
-      exit(1);
-    }
+    // Use *argv instead of argv[0] to work around a wrong GCC warning.
+    ExitOnError ExitOnErr(std::string(*argv) +
+                          ": bitcode didn't read correctly: ");
+    ExitOnErr(Mod->materializeAll());
   }
 
   std::string ErrorMsg;
@@ -477,10 +488,8 @@ int main(int argc, char **argv, char * const *envp) {
   // Load any additional modules specified on the command line.
   for (unsigned i = 0, e = ExtraModules.size(); i != e; ++i) {
     std::unique_ptr<Module> XMod = parseIRFile(ExtraModules[i], Err, Context);
-    if (!XMod) {
-      Err.print(argv[0], errs());
-      return 1;
-    }
+    if (!XMod)
+      reportError(Err, argv[0]);
     if (EnableCacheManager) {
       std::string CacheName("file:");
       CacheName.append(ExtraModules[i]);
@@ -495,8 +504,7 @@ int main(int argc, char **argv, char * const *envp) {
     if (!Obj) {
       // TODO: Actually report errors helpfully.
       consumeError(Obj.takeError());
-      Err.print(argv[0], errs());
-      return 1;
+      reportError(Err, argv[0]);
     }
     object::OwningBinary<object::ObjectFile> &O = Obj.get();
     EE->addObjectFile(std::move(O));
@@ -505,10 +513,8 @@ int main(int argc, char **argv, char * const *envp) {
   for (unsigned i = 0, e = ExtraArchives.size(); i != e; ++i) {
     ErrorOr<std::unique_ptr<MemoryBuffer>> ArBufOrErr =
         MemoryBuffer::getFileOrSTDIN(ExtraArchives[i]);
-    if (!ArBufOrErr) {
-      Err.print(argv[0], errs());
-      return 1;
-    }
+    if (!ArBufOrErr)
+      reportError(Err, argv[0]);
     std::unique_ptr<MemoryBuffer> &ArBuf = ArBufOrErr.get();
 
     Expected<std::unique_ptr<object::Archive>> ArOrErr =
@@ -519,7 +525,7 @@ int main(int argc, char **argv, char * const *envp) {
       logAllUnhandledErrors(ArOrErr.takeError(), OS, "");
       OS.flush();
       errs() << Buf;
-      return 1;
+      exit(1);
     }
     std::unique_ptr<object::Archive> &Ar = ArOrErr.get();
 
@@ -645,19 +651,20 @@ int main(int argc, char **argv, char * const *envp) {
     // MCJIT itself. FIXME.
 
     // Lanch the remote process and get a channel to it.
-    std::unique_ptr<FDRPCChannel> C = launchRemote();
+    std::unique_ptr<FDRawChannel> C = launchRemote();
     if (!C) {
       errs() << "Failed to launch remote JIT.\n";
       exit(1);
     }
 
     // Create a remote target client running over the channel.
-    typedef orc::remote::OrcRemoteTargetClient<orc::remote::RPCChannel> MyRemote;
-    MyRemote R = ExitOnErr(MyRemote::Create(*C));
+    typedef orc::remote::OrcRemoteTargetClient<orc::rpc::RawByteChannel>
+      MyRemote;
+    auto R = ExitOnErr(MyRemote::Create(*C));
 
     // Create a remote memory manager.
     std::unique_ptr<MyRemote::RCMemoryManager> RemoteMM;
-    ExitOnErr(R.createRemoteMemoryManager(RemoteMM));
+    ExitOnErr(R->createRemoteMemoryManager(RemoteMM));
 
     // Forward MCJIT's memory manager calls to the remote memory manager.
     static_cast<ForwardingMemoryManager*>(RTDyldMM)->setMemMgr(
@@ -668,20 +675,20 @@ int main(int argc, char **argv, char * const *envp) {
       orc::createLambdaResolver(
         [](const std::string &Name) { return nullptr; },
         [&](const std::string &Name) {
-          if (auto Addr = ExitOnErr(R.getSymbolAddress(Name)))
-	    return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
-          return RuntimeDyld::SymbolInfo(nullptr);
+          if (auto Addr = ExitOnErr(R->getSymbolAddress(Name)))
+	    return JITSymbol(Addr, JITSymbolFlags::Exported);
+          return JITSymbol(nullptr);
         }
       ));
 
     // Grab the target address of the JIT'd main function on the remote and call
     // it.
     // FIXME: argv and envp handling.
-    orc::TargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str());
+    JITTargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str());
     EE->finalizeObject();
     DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at 0x"
                  << format("%llx", Entry) << "\n");
-    Result = ExitOnErr(R.callIntVoid(Entry));
+    Result = ExitOnErr(R->callIntVoid(Entry));
 
     // Like static constructors, the remote target MCJIT support doesn't handle
     // this yet. It could. FIXME.
@@ -692,13 +699,13 @@ int main(int argc, char **argv, char * const *envp) {
     EE.reset();
 
     // Signal the remote target that we're done JITing.
-    ExitOnErr(R.terminateSession());
+    ExitOnErr(R->terminateSession());
   }
 
   return Result;
 }
 
-std::unique_ptr<FDRPCChannel> launchRemote() {
+std::unique_ptr<FDRawChannel> launchRemote() {
 #ifndef LLVM_ON_UNIX
   llvm_unreachable("launchRemote not supported on non-Unix platforms");
 #else
@@ -748,6 +755,6 @@ std::unique_ptr<FDRPCChannel> launchRemote() {
   close(PipeFD[1][1]);
 
   // Return an RPC channel connected to our end of the pipes.
-  return llvm::make_unique<FDRPCChannel>(PipeFD[1][0], PipeFD[0][1]);
+  return llvm::make_unique<FDRawChannel>(PipeFD[1][0], PipeFD[0][1]);
 #endif
 }
diff --git a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
index 865152b6af83..b99a396da62a 100644
--- a/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
+++ b/contrib/llvm/tools/llvm-ar/llvm-ar.cpp
@@ -21,6 +21,7 @@
 #include "llvm/Object/ArchiveWriter.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/Chrono.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Errc.h"
 #include "llvm/Support/FileSystem.h"
@@ -93,7 +94,7 @@ static cl::opt<Format>
     FormatOpt("format", cl::desc("Archive format to create"),
               cl::values(clEnumValN(Default, "default", "default"),
                          clEnumValN(GNU, "gnu", "gnu"),
-                         clEnumValN(BSD, "bsd", "bsd"), clEnumValEnd));
+                         clEnumValN(BSD, "bsd", "bsd")));
 
 static std::string Options;
 
@@ -116,6 +117,7 @@ static cl::extrahelp MoreHelp(
   "  [o] - preserve original dates\n"
   "  [s] - create an archive index (cf. ranlib)\n"
   "  [S] - do not build a symbol table\n"
+  "  [T] - create a thin archive\n"
   "  [u] - update only files newer than archive contents\n"
   "\nMODIFIERS (generic):\n"
   "  [c] - do not warn if the library had to be created\n"
@@ -317,8 +319,8 @@ static void doPrint(StringRef Name, const object::Archive::Child &C) {
   if (Verbose)
     outs() << "Printing " << Name << "\n";
 
-  ErrorOr<StringRef> DataOrErr = C.getBuffer();
-  failIfError(DataOrErr.getError());
+  Expected<StringRef> DataOrErr = C.getBuffer();
+  failIfError(DataOrErr.takeError());
   StringRef Data = *DataOrErr;
   outs().write(Data.data(), Data.size());
 }
@@ -337,18 +339,31 @@ static void printMode(unsigned mode) {
 // modification time are also printed.
 static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
   if (Verbose) {
-    sys::fs::perms Mode = C.getAccessMode();
+    Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
+    failIfError(ModeOrErr.takeError());
+    sys::fs::perms Mode = ModeOrErr.get();
     printMode((Mode >> 6) & 007);
     printMode((Mode >> 3) & 007);
     printMode(Mode & 007);
-    outs() << ' ' << C.getUID();
-    outs() << '/' << C.getGID();
-    ErrorOr<uint64_t> Size = C.getSize();
-    failIfError(Size.getError());
+    Expected<unsigned> UIDOrErr = C.getUID();
+    failIfError(UIDOrErr.takeError());
+    outs() << ' ' << UIDOrErr.get();
+    Expected<unsigned> GIDOrErr = C.getGID();
+    failIfError(GIDOrErr.takeError());
+    outs() << '/' << GIDOrErr.get();
+    Expected<uint64_t> Size = C.getSize();
+    failIfError(Size.takeError());
     outs() << ' ' << format("%6llu", Size.get());
-    outs() << ' ' << C.getLastModified().str();
+    auto ModTimeOrErr = C.getLastModified();
+    failIfError(ModTimeOrErr.takeError());
+    outs() << ' ' << ModTimeOrErr.get();
     outs() << ' ';
   }
+
+  if (C.getParent()->isThin()) {
+    outs() << sys::path::parent_path(ArchiveName);
+    outs() << '/';
+  }
   outs() << Name << "\n";
 }
 
@@ -356,7 +371,9 @@ static void doDisplayTable(StringRef Name, const object::Archive::Child &C) {
 // system.
 static void doExtract(StringRef Name, const object::Archive::Child &C) {
   // Retain the original mode.
-  sys::fs::perms Mode = C.getAccessMode();
+  Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
+  failIfError(ModeOrErr.takeError());
+  sys::fs::perms Mode = ModeOrErr.get();
 
   int FD;
   failIfError(sys::fs::openFileForWrite(Name, FD, sys::fs::F_None, Mode), Name);
@@ -365,7 +382,9 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
     raw_fd_ostream file(FD, false);
 
     // Get the data and its length
-    StringRef Data = *C.getBuffer();
+    Expected<StringRef> BufOrErr = C.getBuffer();
+    failIfError(BufOrErr.takeError());
+    StringRef Data = BufOrErr.get();
 
     // Write the data.
     file.write(Data.data(), Data.size());
@@ -373,9 +392,12 @@ static void doExtract(StringRef Name, const object::Archive::Child &C) {
 
   // If we're supposed to retain the original modification times, etc. do so
   // now.
-  if (OriginalDates)
+  if (OriginalDates) {
+    auto ModTimeOrErr = C.getLastModified();
+    failIfError(ModTimeOrErr.takeError());
     failIfError(
-        sys::fs::setLastModificationAndAccessTime(FD, C.getLastModified()));
+        sys::fs::setLastModificationAndAccessTime(FD, ModTimeOrErr.get()));
+  }
 
   if (close(FD))
     fail("Could not close the file");
@@ -406,14 +428,14 @@ static void performReadOperation(ArchiveOperation Operation,
 
   bool Filter = !Members.empty();
   {
-    Error Err;
+    Error Err = Error::success();
     for (auto &C : OldArchive->children(Err)) {
-      ErrorOr<StringRef> NameOrErr = C.getName();
-      failIfError(NameOrErr.getError());
+      Expected<StringRef> NameOrErr = C.getName();
+      failIfError(NameOrErr.takeError());
       StringRef Name = NameOrErr.get();
 
       if (Filter) {
-        auto I = std::find(Members.begin(), Members.end(), Name);
+        auto I = find(Members, Name);
         if (I == Members.end())
           continue;
         Members.erase(I);
@@ -482,10 +504,9 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
   if (Operation == QuickAppend || Members.empty())
     return IA_AddOldMember;
 
-  auto MI =
-      std::find_if(Members.begin(), Members.end(), [Name](StringRef Path) {
-        return Name == sys::path::filename(Path);
-      });
+  auto MI = find_if(Members, [Name](StringRef Path) {
+    return Name == sys::path::filename(Path);
+  });
 
   if (MI == Members.end())
     return IA_AddOldMember;
@@ -510,7 +531,9 @@ static InsertAction computeInsertAction(ArchiveOperation Operation,
     // operation.
     sys::fs::file_status Status;
     failIfError(sys::fs::status(*MI, Status), *MI);
-    if (Status.getLastModificationTime() < Member.getLastModified()) {
+    auto ModTimeOrErr = Member.getLastModified();
+    failIfError(ModTimeOrErr.takeError());
+    if (Status.getLastModificationTime() < ModTimeOrErr.get()) {
       if (PosName.empty())
         return IA_AddOldMember;
       return IA_MoveOldMember;
@@ -533,11 +556,11 @@ computeNewArchiveMembers(ArchiveOperation Operation,
   int InsertPos = -1;
   StringRef PosName = sys::path::filename(RelPos);
   if (OldArchive) {
-    Error Err;
+    Error Err = Error::success();
     for (auto &Child : OldArchive->children(Err)) {
       int Pos = Ret.size();
-      ErrorOr<StringRef> NameOrErr = Child.getName();
-      failIfError(NameOrErr.getError());
+      Expected<StringRef> NameOrErr = Child.getName();
+      failIfError(NameOrErr.takeError());
       StringRef Name = NameOrErr.get();
       if (Name == PosName) {
         assert(AddAfter || AddBefore);
@@ -705,7 +728,7 @@ static int performOperation(ArchiveOperation Operation,
     fail("error opening '" + ArchiveName + "': " + EC.message() + "!");
 
   if (!EC) {
-    Error Err;
+    Error Err = Error::success();
     object::Archive Archive(Buf.get()->getMemBufferRef(), Err);
     EC = errorToErrorCode(std::move(Err));
     failIfError(EC,
@@ -767,7 +790,7 @@ static void runMRIScript() {
       Archives.push_back(std::move(*LibOrErr));
       object::Archive &Lib = *Archives.back();
       {
-        Error Err;
+        Error Err = Error::success();
         for (auto &Member : Lib.children(Err))
           addMember(NewMembers, Member);
         failIfError(std::move(Err));
diff --git a/contrib/llvm/tools/llvm-as/llvm-as.cpp b/contrib/llvm/tools/llvm-as/llvm-as.cpp
index 89a6ee5edfd8..06bc2e990105 100644
--- a/contrib/llvm/tools/llvm-as/llvm-as.cpp
+++ b/contrib/llvm/tools/llvm-as/llvm-as.cpp
@@ -16,7 +16,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/AsmParser/Parser.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Verifier.h"
diff --git a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
index 319e34d882dc..f97a18448f0a 100644
--- a/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
+++ b/contrib/llvm/tools/llvm-bcanalyzer/llvm-bcanalyzer.cpp
@@ -28,9 +28,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitstreamReader.h"
 #include "llvm/Bitcode/LLVMBitCodes.h"
-#include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Format.h"
@@ -84,7 +84,7 @@ enum CurStreamTypeType {
 /// GetBlockName - Return a symbolic block name if known, otherwise return
 /// null.
 static const char *GetBlockName(unsigned BlockID,
-                                const BitstreamReader &StreamFile,
+                                const BitstreamBlockInfo &BlockInfo,
                                 CurStreamTypeType CurStreamType) {
   // Standard blocks for all bitcode files.
   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
@@ -94,8 +94,8 @@ static const char *GetBlockName(unsigned BlockID,
   }
 
   // Check to see if we have a blockinfo record for this block, with a name.
-  if (const BitstreamReader::BlockInfo *Info =
-        StreamFile.getBlockInfo(BlockID)) {
+  if (const BitstreamBlockInfo::BlockInfo *Info =
+          BlockInfo.getBlockInfo(BlockID)) {
     if (!Info->Name.empty())
       return Info->Name.c_str();
   }
@@ -128,7 +128,7 @@ static const char *GetBlockName(unsigned BlockID,
 /// GetCodeName - Return a symbolic code name if known, otherwise return
 /// null.
 static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
-                               const BitstreamReader &StreamFile,
+                               const BitstreamBlockInfo &BlockInfo,
                                CurStreamTypeType CurStreamType) {
   // Standard blocks for all bitcode files.
   if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) {
@@ -144,8 +144,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
   }
 
   // Check to see if we have a blockinfo record for this record, with a name.
-  if (const BitstreamReader::BlockInfo *Info =
-        StreamFile.getBlockInfo(BlockID)) {
+  if (const BitstreamBlockInfo::BlockInfo *Info =
+        BlockInfo.getBlockInfo(BlockID)) {
     for (unsigned i = 0, e = Info->RecordNames.size(); i != e; ++i)
       if (Info->RecordNames[i].first == CodeID)
         return Info->RecordNames[i].second.c_str();
@@ -311,6 +311,7 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FS, COMBINED_ALIAS)
       STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME)
       STRINGIFY_CODE(FS, VERSION)
+      STRINGIFY_CODE(FS, TYPE_TESTS)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch(CodeID) {
@@ -352,6 +353,8 @@ static const char *GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(METADATA, OBJC_PROPERTY)
       STRINGIFY_CODE(METADATA, IMPORTED_ENTITY)
       STRINGIFY_CODE(METADATA, MODULE)
+      STRINGIFY_CODE(METADATA, INDEX_OFFSET)
+      STRINGIFY_CODE(METADATA, INDEX)
     }
   case bitc::METADATA_KIND_BLOCK_ID:
     switch (CodeID) {
@@ -412,14 +415,14 @@ static std::map<unsigned, PerBlockIDStats> BlockIDStats;
 
 
 
-/// Error - All bitcode analysis errors go through this function, making this a
+/// ReportError - All bitcode analysis errors go through this function, making this a
 /// good place to breakpoint if debugging.
-static bool Error(const Twine &Err) {
+static bool ReportError(const Twine &Err) {
   errs() << Err << "\n";
   return true;
 }
 
-static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent,
+static bool decodeMetadataStringsBlob(StringRef Indent,
                                       ArrayRef<uint64_t> Record,
                                       StringRef Blob) {
   if (Blob.empty())
@@ -433,21 +436,15 @@ static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent,
   outs() << " num-strings = " << NumStrings << " {\n";
 
   StringRef Lengths = Blob.slice(0, StringsOffset);
-  SimpleBitstreamCursor R(Reader);
-  R.jumpToPointer(Lengths.begin());
-
-  // Ensure that Blob doesn't get invalidated, even if this is reading from a
-  // StreamingMemoryObject with corrupt data.
-  R.setArtificialByteLimit(R.getCurrentByteNo() + StringsOffset);
-
+  SimpleBitstreamCursor R(Lengths);
   StringRef Strings = Blob.drop_front(StringsOffset);
   do {
     if (R.AtEndOfStream())
-      return Error("bad length");
+      return ReportError("bad length");
 
     unsigned Size = R.ReadVBR(6);
     if (Strings.size() < Size)
-      return Error("truncated chars");
+      return ReportError("truncated chars");
 
     outs() << Indent << "    '";
     outs().write_escaped(Strings.slice(0, Size), /*hex=*/true);
@@ -459,20 +456,20 @@ static bool decodeMetadataStringsBlob(BitstreamReader &Reader, StringRef Indent,
   return false;
 }
 
-static bool decodeBlob(unsigned Code, unsigned BlockID, BitstreamReader &Reader,
-                       StringRef Indent, ArrayRef<uint64_t> Record,
-                       StringRef Blob) {
+static bool decodeBlob(unsigned Code, unsigned BlockID, StringRef Indent,
+                       ArrayRef<uint64_t> Record, StringRef Blob) {
   if (BlockID != bitc::METADATA_BLOCK_ID)
     return true;
   if (Code != bitc::METADATA_STRINGS)
     return true;
 
-  return decodeMetadataStringsBlob(Reader, Indent, Record, Blob);
+  return decodeMetadataStringsBlob(Indent, Record, Blob);
 }
 
 /// ParseBlock - Read a block, updating statistics, etc.
-static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
-                       unsigned IndentLevel, CurStreamTypeType CurStreamType) {
+static bool ParseBlock(BitstreamCursor &Stream, BitstreamBlockInfo &BlockInfo,
+                       unsigned BlockID, unsigned IndentLevel,
+                       CurStreamTypeType CurStreamType) {
   std::string Indent(IndentLevel*2, ' ');
   uint64_t BlockBitStart = Stream.GetCurrentBitNo();
 
@@ -485,15 +482,19 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
   bool DumpRecords = Dump;
   if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
     if (Dump) outs() << Indent << "<BLOCKINFO_BLOCK/>\n";
-    if (BitstreamCursor(Stream).ReadBlockInfoBlock())
-      return Error("Malformed BlockInfoBlock");
+    Optional<BitstreamBlockInfo> NewBlockInfo =
+        Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+    if (!NewBlockInfo)
+      return ReportError("Malformed BlockInfoBlock");
+    BlockInfo = std::move(*NewBlockInfo);
+    Stream.JumpToBit(BlockBitStart);
     // It's not really interesting to dump the contents of the blockinfo block.
     DumpRecords = false;
   }
 
   unsigned NumWords = 0;
   if (Stream.EnterSubBlock(BlockID, &NumWords))
-    return Error("Malformed block record");
+    return ReportError("Malformed block record");
 
   // Keep it for later, when we see a MODULE_HASH record
   uint64_t BlockEntryPos = Stream.getCurrentByteNo();
@@ -501,8 +502,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
   const char *BlockName = nullptr;
   if (DumpRecords) {
     outs() << Indent << "<";
-    if ((BlockName = GetBlockName(BlockID, *Stream.getBitStreamReader(),
-                                  CurStreamType)))
+    if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType)))
       outs() << BlockName;
     else
       outs() << "UnknownBlock" << BlockID;
@@ -516,10 +516,13 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
 
   SmallVector<uint64_t, 64> Record;
 
+  // Keep the offset to the metadata index if seen.
+  uint64_t MetadataIndexOffset = 0;
+
   // Read all the records for this block.
   while (1) {
     if (Stream.AtEndOfStream())
-      return Error("Premature end of bitstream");
+      return ReportError("Premature end of bitstream");
 
     uint64_t RecordStartBit = Stream.GetCurrentBitNo();
 
@@ -528,7 +531,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
     
     switch (Entry.Kind) {
     case BitstreamEntry::Error:
-      return Error("malformed bitcode file");
+      return ReportError("malformed bitcode file");
     case BitstreamEntry::EndBlock: {
       uint64_t BlockBitEnd = Stream.GetCurrentBitNo();
       BlockStats.NumBits += BlockBitEnd-BlockBitStart;
@@ -544,7 +547,8 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
         
     case BitstreamEntry::SubBlock: {
       uint64_t SubBlockBitStart = Stream.GetCurrentBitNo();
-      if (ParseBlock(Stream, Entry.ID, IndentLevel+1, CurStreamType))
+      if (ParseBlock(Stream, BlockInfo, Entry.ID, IndentLevel + 1,
+                     CurStreamType))
         return true;
       ++BlockStats.NumSubBlocks;
       uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo();
@@ -569,7 +573,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
     ++BlockStats.NumRecords;
 
     StringRef Blob;
-    unsigned CurrentRecordPos = Stream.getCurrentByteNo();
+    unsigned CurrentRecordPos = Stream.GetCurrentBitNo();
     unsigned Code = Stream.readRecord(Entry.ID, Record, &Blob);
 
     // Increment the # occurrences of this code.
@@ -586,14 +590,11 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
     if (DumpRecords) {
       outs() << Indent << "  <";
       if (const char *CodeName =
-            GetCodeName(Code, BlockID, *Stream.getBitStreamReader(),
-                        CurStreamType))
+              GetCodeName(Code, BlockID, BlockInfo, CurStreamType))
         outs() << CodeName;
       else
         outs() << "UnknownCode" << Code;
-      if (NonSymbolic &&
-          GetCodeName(Code, BlockID, *Stream.getBitStreamReader(),
-                      CurStreamType))
+      if (NonSymbolic && GetCodeName(Code, BlockID, BlockInfo, CurStreamType))
         outs() << " codeid=" << Code;
       const BitCodeAbbrev *Abbv = nullptr;
       if (Entry.ID != bitc::UNABBREV_RECORD) {
@@ -604,6 +605,27 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
       for (unsigned i = 0, e = Record.size(); i != e; ++i)
         outs() << " op" << i << "=" << (int64_t)Record[i];
 
+      // If we found a metadata index, let's verify that we had an offset before
+      // and validate its forward reference offset was correct!
+      if (BlockID == bitc::METADATA_BLOCK_ID) {
+        if (Code == bitc::METADATA_INDEX_OFFSET) {
+          if (Record.size() != 2)
+            outs() << "(Invalid record)";
+          else {
+            auto Offset = Record[0] + (Record[1] << 32);
+            MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset;
+          }
+        }
+        if (Code == bitc::METADATA_INDEX) {
+          outs() << " (offset ";
+          if (MetadataIndexOffset == RecordStartBit)
+            outs() << "match)";
+          else
+            outs() << "mismatch: " << MetadataIndexOffset << " vs "
+                   << RecordStartBit << ")";
+        }
+      }
+
       // If we found a module hash, let's verify that it matches!
       if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH) {
         if (Record.size() != 5)
@@ -613,7 +635,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
           SHA1 Hasher;
           StringRef Hash;
           {
-            int BlockSize = CurrentRecordPos - BlockEntryPos;
+            int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos;
             auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize);
             Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize));
             Hash = Hasher.result();
@@ -658,8 +680,7 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
         }
       }
 
-      if (Blob.data() && decodeBlob(Code, BlockID, *Stream.getBitStreamReader(),
-                                    Indent, Record, Blob)) {
+      if (Blob.data() && decodeBlob(Code, BlockID, Indent, Record, Blob)) {
         outs() << " blob data = ";
         if (ShowBinaryBlobs) {
           outs() << "'";
@@ -681,6 +702,10 @@ static bool ParseBlock(BitstreamCursor &Stream, unsigned BlockID,
 
       outs() << "\n";
     }
+
+    // Make sure that we can skip the current record.
+    Stream.JumpToBit(CurrentRecordPos);
+    Stream.skipRecord(Entry.ID);
   }
 }
 
@@ -694,18 +719,17 @@ static void PrintSize(uint64_t Bits) {
 
 static bool openBitcodeFile(StringRef Path,
                             std::unique_ptr<MemoryBuffer> &MemBuf,
-                            BitstreamReader &StreamFile,
                             BitstreamCursor &Stream,
                             CurStreamTypeType &CurStreamType) {
   // Read the input file.
   ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr =
       MemoryBuffer::getFileOrSTDIN(Path);
   if (std::error_code EC = MemBufOrErr.getError())
-    return Error(Twine("Error reading '") + Path + "': " + EC.message());
+    return ReportError(Twine("ReportError reading '") + Path + "': " + EC.message());
   MemBuf = std::move(MemBufOrErr.get());
 
   if (MemBuf->getBufferSize() & 3)
-    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+    return ReportError("Bitcode stream should be a multiple of 4 bytes in length");
 
   const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
   const unsigned char *EndBufPtr = BufPtr + MemBuf->getBufferSize();
@@ -714,7 +738,7 @@ static bool openBitcodeFile(StringRef Path,
   // The magic number is 0x0B17C0DE stored in little endian.
   if (isBitcodeWrapper(BufPtr, EndBufPtr)) {
     if (MemBuf->getBufferSize() < BWH_HeaderSize)
-      return Error("Invalid bitcode wrapper header");
+      return ReportError("Invalid bitcode wrapper header");
 
     if (Dump) {
       unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]);
@@ -732,12 +756,10 @@ static bool openBitcodeFile(StringRef Path,
     }
 
     if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true))
-      return Error("Invalid bitcode wrapper header");
+      return ReportError("Invalid bitcode wrapper header");
   }
 
-  StreamFile = BitstreamReader(BufPtr, EndBufPtr);
-  Stream = BitstreamCursor(StreamFile);
-  StreamFile.CollectBlockInfoNames();
+  Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr));
 
   // Read the stream signature.
   char Signature[6];
@@ -761,40 +783,40 @@ static bool openBitcodeFile(StringRef Path,
 /// AnalyzeBitcode - Analyze the bitcode file specified by InputFilename.
 static int AnalyzeBitcode() {
   std::unique_ptr<MemoryBuffer> StreamBuffer;
-  BitstreamReader StreamFile;
   BitstreamCursor Stream;
+  BitstreamBlockInfo BlockInfo;
   CurStreamTypeType CurStreamType;
-  if (openBitcodeFile(InputFilename, StreamBuffer, StreamFile, Stream,
-                      CurStreamType))
+  if (openBitcodeFile(InputFilename, StreamBuffer, Stream, CurStreamType))
     return true;
+  Stream.setBlockInfo(&BlockInfo);
 
   // Read block info from BlockInfoFilename, if specified.
   // The block info must be a top-level block.
   if (!BlockInfoFilename.empty()) {
     std::unique_ptr<MemoryBuffer> BlockInfoBuffer;
-    BitstreamReader BlockInfoFile;
     BitstreamCursor BlockInfoCursor;
     CurStreamTypeType BlockInfoStreamType;
-    if (openBitcodeFile(BlockInfoFilename, BlockInfoBuffer, BlockInfoFile,
-                        BlockInfoCursor, BlockInfoStreamType))
+    if (openBitcodeFile(BlockInfoFilename, BlockInfoBuffer, BlockInfoCursor,
+                        BlockInfoStreamType))
       return true;
 
     while (!BlockInfoCursor.AtEndOfStream()) {
       unsigned Code = BlockInfoCursor.ReadCode();
       if (Code != bitc::ENTER_SUBBLOCK)
-        return Error("Invalid record at top-level in block info file");
+        return ReportError("Invalid record at top-level in block info file");
 
       unsigned BlockID = BlockInfoCursor.ReadSubBlockID();
       if (BlockID == bitc::BLOCKINFO_BLOCK_ID) {
-        if (BlockInfoCursor.ReadBlockInfoBlock())
-          return Error("Malformed BlockInfoBlock in block info file");
+        Optional<BitstreamBlockInfo> NewBlockInfo =
+            BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true);
+        if (!NewBlockInfo)
+          return ReportError("Malformed BlockInfoBlock in block info file");
+        BlockInfo = std::move(*NewBlockInfo);
         break;
       }
 
       BlockInfoCursor.SkipBlock();
     }
-
-    StreamFile.takeBlockInfo(std::move(BlockInfoFile));
   }
 
   unsigned NumTopBlocks = 0;
@@ -803,18 +825,18 @@ static int AnalyzeBitcode() {
   while (!Stream.AtEndOfStream()) {
     unsigned Code = Stream.ReadCode();
     if (Code != bitc::ENTER_SUBBLOCK)
-      return Error("Invalid record at top-level");
+      return ReportError("Invalid record at top-level");
 
     unsigned BlockID = Stream.ReadSubBlockID();
 
-    if (ParseBlock(Stream, BlockID, 0, CurStreamType))
+    if (ParseBlock(Stream, BlockInfo, BlockID, 0, CurStreamType))
       return true;
     ++NumTopBlocks;
   }
 
   if (Dump) outs() << "\n\n";
 
-  uint64_t BufferSizeBits = StreamFile.getBitcodeBytes().getExtent() * CHAR_BIT;
+  uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT;
   // Print a summary of the read file.
   outs() << "Summary of " << InputFilename << ":\n";
   outs() << "         Total size: ";
@@ -833,8 +855,8 @@ static int AnalyzeBitcode() {
   for (std::map<unsigned, PerBlockIDStats>::iterator I = BlockIDStats.begin(),
        E = BlockIDStats.end(); I != E; ++I) {
     outs() << "  Block ID #" << I->first;
-    if (const char *BlockName = GetBlockName(I->first, StreamFile,
-                                             CurStreamType))
+    if (const char *BlockName =
+            GetBlockName(I->first, BlockInfo, CurStreamType))
       outs() << " (" << BlockName << ")";
     outs() << ":\n";
 
@@ -898,9 +920,8 @@ static int AnalyzeBitcode() {
           outs() << "        ";
 
         outs() << "  ";
-        if (const char *CodeName =
-              GetCodeName(FreqPairs[i].second, I->first, StreamFile,
-                          CurStreamType))
+        if (const char *CodeName = GetCodeName(FreqPairs[i].second, I->first,
+                                               BlockInfo, CurStreamType))
           outs() << CodeName << "\n";
         else
           outs() << "UnknownCode" << FreqPairs[i].second << "\n";
diff --git a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
index 0a4d1a67d610..0a9807ab0033 100644
--- a/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
+++ b/contrib/llvm/tools/llvm-cov/CodeCoverage.cpp
@@ -30,6 +30,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/Process.h"
 #include "llvm/Support/Program.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/ToolOutputFile.h"
 #include <functional>
@@ -38,6 +39,9 @@
 using namespace llvm;
 using namespace coverage;
 
+void exportCoverageDataToJson(const coverage::CoverageMapping &CoverageMapping,
+                              raw_ostream &OS);
+
 namespace {
 /// \brief The implementation of the coverage tool.
 class CodeCoverageTool {
@@ -46,24 +50,28 @@ public:
     /// \brief The show command.
     Show,
     /// \brief The report command.
-    Report
+    Report,
+    /// \brief The export command.
+    Export
   };
 
+  int run(Command Cmd, int argc, const char **argv);
+
+private:
   /// \brief Print the error message to the error output stream.
   void error(const Twine &Message, StringRef Whence = "");
 
-  /// \brief Record (but do not print) an error message in a thread-safe way.
-  void deferError(const Twine &Message, StringRef Whence = "");
-
-  /// \brief Record (but do not print) a warning message in a thread-safe way.
-  void deferWarning(const Twine &Message, StringRef Whence = "");
-
-  /// \brief Print (and then clear) all deferred error and warning messages.
-  void consumeDeferredMessages();
+  /// \brief Print the warning message to the error output stream.
+  void warning(const Twine &Message, StringRef Whence = "");
 
-  /// \brief Append a reference to a private copy of \p Path into SourceFiles.
+  /// \brief Convert \p Path into an absolute path and append it to the list
+  /// of collected paths.
   void addCollectedPath(const std::string &Path);
 
+  /// \brief If \p Path is a regular file, collect the path. If it's a
+  /// directory, recursively collect all of the paths within the directory.
+  void collectPaths(const std::string &Path);
+
   /// \brief Return a memory buffer for the given source file.
   ErrorOr<const MemoryBuffer &> getSourceFile(StringRef SourceFile);
 
@@ -81,16 +89,21 @@ public:
   std::unique_ptr<SourceCoverageView>
   createSourceFileView(StringRef SourceFile, const CoverageMapping &Coverage);
 
-  /// \brief Load the coverage mapping data. Return nullptr if an error occured.
+  /// \brief Load the coverage mapping data. Return nullptr if an error occurred.
   std::unique_ptr<CoverageMapping> load();
 
+  /// \brief Remove input source files which aren't mapped by \p Coverage.
+  void removeUnmappedInputs(const CoverageMapping &Coverage);
+
   /// \brief If a demangler is available, demangle all symbol names.
   void demangleSymbols(const CoverageMapping &Coverage);
 
   /// \brief Demangle \p Sym if possible. Otherwise, just return \p Sym.
   StringRef getSymbolForHumans(StringRef Sym) const;
 
-  int run(Command Cmd, int argc, const char **argv);
+  /// \brief Write out a source file view to the filesystem.
+  void writeSourceFileView(StringRef SourceFile, CoverageMapping *Coverage,
+                           CoveragePrinter *Printer, bool ShowFilenames);
 
   typedef llvm::function_ref<int(int, const char **)> CommandLineParserType;
 
@@ -100,25 +113,34 @@ public:
   int report(int argc, const char **argv,
              CommandLineParserType commandLineParser);
 
-  std::string ObjectFilename;
+  int export_(int argc, const char **argv,
+              CommandLineParserType commandLineParser);
+
+  std::vector<StringRef> ObjectFilenames;
   CoverageViewOptions ViewOpts;
-  std::string PGOFilename;
   CoverageFiltersMatchAll Filters;
-  std::vector<StringRef> SourceFiles;
+
+  /// The path to the indexed profile.
+  std::string PGOFilename;
+
+  /// A list of input source files.
+  std::vector<std::string> SourceFiles;
+
+  /// Whether or not we're in -filename-equivalence mode.
   bool CompareFilenamesOnly;
+
+  /// In -filename-equivalence mode, this maps absolute paths from the
+  /// coverage mapping data to input source files.
   StringMap<std::string> RemappedFilenames;
+
+  /// The architecture the coverage mapping data targets.
   std::string CoverageArch;
 
-private:
   /// A cache for demangled symbol names.
   StringMap<std::string> DemangledNames;
 
-  /// File paths (absolute, or otherwise) to input source files.
-  std::vector<std::string> CollectedPaths;
-
   /// Errors and warnings which have not been printed.
-  std::mutex DeferredMessagesLock;
-  std::vector<std::string> DeferredMessages;
+  std::mutex ErrsLock;
 
   /// A container for input source file buffers.
   std::mutex LoadedSourceFilesLock;
@@ -138,29 +160,57 @@ static std::string getErrorString(const Twine &Message, StringRef Whence,
 }
 
 void CodeCoverageTool::error(const Twine &Message, StringRef Whence) {
-  errs() << getErrorString(Message, Whence, false);
+  std::unique_lock<std::mutex> Guard{ErrsLock};
+  ViewOpts.colored_ostream(errs(), raw_ostream::RED)
+      << getErrorString(Message, Whence, false);
 }
 
-void CodeCoverageTool::deferError(const Twine &Message, StringRef Whence) {
-  std::unique_lock<std::mutex> Guard{DeferredMessagesLock};
-  DeferredMessages.emplace_back(getErrorString(Message, Whence, false));
+void CodeCoverageTool::warning(const Twine &Message, StringRef Whence) {
+  std::unique_lock<std::mutex> Guard{ErrsLock};
+  ViewOpts.colored_ostream(errs(), raw_ostream::RED)
+      << getErrorString(Message, Whence, true);
 }
 
-void CodeCoverageTool::deferWarning(const Twine &Message, StringRef Whence) {
-  std::unique_lock<std::mutex> Guard{DeferredMessagesLock};
-  DeferredMessages.emplace_back(getErrorString(Message, Whence, true));
+void CodeCoverageTool::addCollectedPath(const std::string &Path) {
+  if (CompareFilenamesOnly) {
+    SourceFiles.emplace_back(Path);
+  } else {
+    SmallString<128> EffectivePath(Path);
+    if (std::error_code EC = sys::fs::make_absolute(EffectivePath)) {
+      error(EC.message(), Path);
+      return;
+    }
+    sys::path::remove_dots(EffectivePath, /*remove_dot_dots=*/true);
+    SourceFiles.emplace_back(EffectivePath.str());
+  }
 }
 
-void CodeCoverageTool::consumeDeferredMessages() {
-  std::unique_lock<std::mutex> Guard{DeferredMessagesLock};
-  for (const std::string &Message : DeferredMessages)
-    ViewOpts.colored_ostream(errs(), raw_ostream::RED) << Message;
-  DeferredMessages.clear();
-}
+void CodeCoverageTool::collectPaths(const std::string &Path) {
+  llvm::sys::fs::file_status Status;
+  llvm::sys::fs::status(Path, Status);
+  if (!llvm::sys::fs::exists(Status)) {
+    if (CompareFilenamesOnly)
+      addCollectedPath(Path);
+    else
+      error("Missing source file", Path);
+    return;
+  }
 
-void CodeCoverageTool::addCollectedPath(const std::string &Path) {
-  CollectedPaths.push_back(Path);
-  SourceFiles.emplace_back(CollectedPaths.back());
+  if (llvm::sys::fs::is_regular_file(Status)) {
+    addCollectedPath(Path);
+    return;
+  }
+
+  if (llvm::sys::fs::is_directory(Status)) {
+    std::error_code EC;
+    for (llvm::sys::fs::recursive_directory_iterator F(Path, EC), E;
+         F != E && !EC; F.increment(EC)) {
+      if (llvm::sys::fs::is_regular_file(F->path()))
+        addCollectedPath(F->path());
+    }
+    if (EC)
+      warning(EC.message(), Path);
+  }
 }
 
 ErrorOr<const MemoryBuffer &>
@@ -177,7 +227,7 @@ CodeCoverageTool::getSourceFile(StringRef SourceFile) {
       return *Files.second;
   auto Buffer = MemoryBuffer::getFile(SourceFile);
   if (auto EC = Buffer.getError()) {
-    deferError(EC.message(), SourceFile);
+    error(EC.message(), SourceFile);
     return EC;
   }
   LoadedSourceFiles.emplace_back(SourceFile, std::move(Buffer.get()));
@@ -241,21 +291,24 @@ CodeCoverageTool::createSourceFileView(StringRef SourceFile,
   attachExpansionSubViews(*View, Expansions, Coverage);
 
   for (const auto *Function : Coverage.getInstantiations(SourceFile)) {
-    auto SubViewCoverage = Coverage.getCoverageForFunction(*Function);
-    auto SubViewExpansions = SubViewCoverage.getExpansions();
-    auto SubView = SourceCoverageView::create(
-        getSymbolForHumans(Function->Name), SourceBuffer.get(), ViewOpts,
-        std::move(SubViewCoverage));
-    attachExpansionSubViews(*SubView, SubViewExpansions, Coverage);
+    std::unique_ptr<SourceCoverageView> SubView{nullptr};
+
+    StringRef Funcname = getSymbolForHumans(Function->Name);
 
-    if (SubView) {
-      unsigned FileID = Function->CountedRegions.front().FileID;
-      unsigned Line = 0;
-      for (const auto &CR : Function->CountedRegions)
-        if (CR.FileID == FileID)
-          Line = std::max(CR.LineEnd, Line);
-      View->addInstantiation(Function->Name, Line, std::move(SubView));
+    if (Function->ExecutionCount > 0) {
+      auto SubViewCoverage = Coverage.getCoverageForFunction(*Function);
+      auto SubViewExpansions = SubViewCoverage.getExpansions();
+      SubView = SourceCoverageView::create(
+          Funcname, SourceBuffer.get(), ViewOpts, std::move(SubViewCoverage));
+      attachExpansionSubViews(*SubView, SubViewExpansions, Coverage);
     }
+
+    unsigned FileID = Function->CountedRegions.front().FileID;
+    unsigned Line = 0;
+    for (const auto &CR : Function->CountedRegions)
+      if (CR.FileID == FileID)
+        Line = std::max(CR.LineEnd, Line);
+    View->addInstantiation(Funcname, Line, std::move(SubView));
   }
   return View;
 }
@@ -272,39 +325,59 @@ static bool modifiedTimeGT(StringRef LHS, StringRef RHS) {
 }
 
 std::unique_ptr<CoverageMapping> CodeCoverageTool::load() {
-  if (modifiedTimeGT(ObjectFilename, PGOFilename))
-    errs() << "warning: profile data may be out of date - object is newer\n";
-  auto CoverageOrErr = CoverageMapping::load(ObjectFilename, PGOFilename,
-                                             CoverageArch);
+  for (StringRef ObjectFilename : ObjectFilenames)
+    if (modifiedTimeGT(ObjectFilename, PGOFilename))
+      warning("profile data may be out of date - object is newer",
+              ObjectFilename);
+  auto CoverageOrErr =
+      CoverageMapping::load(ObjectFilenames, PGOFilename, CoverageArch);
   if (Error E = CoverageOrErr.takeError()) {
-    colored_ostream(errs(), raw_ostream::RED)
-        << "error: Failed to load coverage: " << toString(std::move(E)) << "\n";
+    error("Failed to load coverage: " + toString(std::move(E)),
+          join(ObjectFilenames.begin(), ObjectFilenames.end(), ", "));
     return nullptr;
   }
   auto Coverage = std::move(CoverageOrErr.get());
   unsigned Mismatched = Coverage->getMismatchedCount();
-  if (Mismatched) {
-    colored_ostream(errs(), raw_ostream::RED)
-        << "warning: " << Mismatched << " functions have mismatched data. ";
-    errs() << "\n";
-  }
+  if (Mismatched)
+    warning(utostr(Mismatched) + " functions have mismatched data");
 
-  if (CompareFilenamesOnly) {
-    auto CoveredFiles = Coverage.get()->getUniqueSourceFiles();
+  if (!SourceFiles.empty())
+    removeUnmappedInputs(*Coverage);
+
+  demangleSymbols(*Coverage);
+
+  return Coverage;
+}
+
+void CodeCoverageTool::removeUnmappedInputs(const CoverageMapping &Coverage) {
+  std::vector<StringRef> CoveredFiles = Coverage.getUniqueSourceFiles();
+
+  auto UncoveredFilesIt = SourceFiles.end();
+  if (!CompareFilenamesOnly) {
+    // The user may have specified source files which aren't in the coverage
+    // mapping. Filter these files away.
+    UncoveredFilesIt = std::remove_if(
+        SourceFiles.begin(), SourceFiles.end(), [&](const std::string &SF) {
+          return !std::binary_search(CoveredFiles.begin(), CoveredFiles.end(),
+                                     SF);
+        });
+  } else {
     for (auto &SF : SourceFiles) {
       StringRef SFBase = sys::path::filename(SF);
-      for (const auto &CF : CoveredFiles)
+      for (const auto &CF : CoveredFiles) {
         if (SFBase == sys::path::filename(CF)) {
           RemappedFilenames[CF] = SF;
           SF = CF;
           break;
         }
+      }
     }
+    UncoveredFilesIt = std::remove_if(
+        SourceFiles.begin(), SourceFiles.end(),
+        [&](const std::string &SF) { return !RemappedFilenames.count(SF); });
   }
 
-  demangleSymbols(*Coverage);
-
-  return Coverage;
+  SourceFiles.erase(UncoveredFilesIt, SourceFiles.end());
 }
 
 void CodeCoverageTool::demangleSymbols(const CoverageMapping &Coverage) {
@@ -390,14 +463,43 @@ StringRef CodeCoverageTool::getSymbolForHumans(StringRef Sym) const {
   return DemangledName->getValue();
 }
 
+void CodeCoverageTool::writeSourceFileView(StringRef SourceFile,
+                                           CoverageMapping *Coverage,
+                                           CoveragePrinter *Printer,
+                                           bool ShowFilenames) {
+  auto View = createSourceFileView(SourceFile, *Coverage);
+  if (!View) {
+    warning("The file '" + SourceFile + "' isn't covered.");
+    return;
+  }
+
+  auto OSOrErr = Printer->createViewFile(SourceFile, /*InToplevel=*/false);
+  if (Error E = OSOrErr.takeError()) {
+    error("Could not create view file!", toString(std::move(E)));
+    return;
+  }
+  auto OS = std::move(OSOrErr.get());
+
+  View->print(*OS.get(), /*Wholefile=*/true,
+              /*ShowSourceName=*/ShowFilenames);
+  Printer->closeViewFile(std::move(OS));
+}
+
 int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
-  cl::opt<std::string, true> ObjectFilename(
-      cl::Positional, cl::Required, cl::location(this->ObjectFilename),
-      cl::desc("Covered executable or object file."));
+  cl::opt<std::string> CovFilename(
+      cl::Positional, cl::desc("Covered executable or object file."));
+
+  cl::list<std::string> CovFilenames(
+      "object", cl::desc("Coverage executable or object file"), cl::ZeroOrMore,
+      cl::CommaSeparated);
 
   cl::list<std::string> InputSourceFiles(
       cl::Positional, cl::desc("<Source files>"), cl::ZeroOrMore);
 
+  cl::opt<bool> DebugDumpCollectedPaths(
+      "dump-collected-paths", cl::Optional, cl::Hidden,
+      cl::desc("Show the collected paths to source files"));
+
   cl::opt<std::string, true> PGOFilename(
       "instr-profile", cl::Required, cl::location(this->PGOFilename),
       cl::desc(
@@ -414,8 +516,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
       cl::values(clEnumValN(CoverageViewOptions::OutputFormat::Text, "text",
                             "Text output"),
                  clEnumValN(CoverageViewOptions::OutputFormat::HTML, "html",
-                            "HTML output"),
-                 clEnumValEnd),
+                            "HTML output")),
       cl::init(CoverageViewOptions::OutputFormat::Text));
 
   cl::opt<bool> FilenameEquivalence(
@@ -472,6 +573,15 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
     ViewOpts.Debug = DebugDump;
     CompareFilenamesOnly = FilenameEquivalence;
 
+    if (!CovFilename.empty())
+      ObjectFilenames.emplace_back(CovFilename);
+    for (const std::string &Filename : CovFilenames)
+      ObjectFilenames.emplace_back(Filename);
+    if (ObjectFilenames.empty()) {
+      errs() << "No filenames specified!\n";
+      ::exit(1);
+    }
+
     ViewOpts.Format = Format;
     switch (ViewOpts.Format) {
     case CoverageViewOptions::OutputFormat::Text:
@@ -481,7 +591,7 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
       break;
     case CoverageViewOptions::OutputFormat::HTML:
       if (UseColor == cl::BOU_FALSE)
-        error("Color output cannot be disabled when generating html.");
+        errs() << "Color output cannot be disabled when generating html.\n";
       ViewOpts.Colors = true;
       break;
     }
@@ -530,20 +640,20 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
 
     if (!Arch.empty() &&
         Triple(Arch).getArch() == llvm::Triple::ArchType::UnknownArch) {
-      errs() << "error: Unknown architecture: " << Arch << "\n";
+      error("Unknown architecture: " + Arch);
       return 1;
     }
     CoverageArch = Arch;
 
-    for (const auto &File : InputSourceFiles) {
-      SmallString<128> Path(File);
-      if (!CompareFilenamesOnly)
-        if (std::error_code EC = sys::fs::make_absolute(Path)) {
-          errs() << "error: " << File << ": " << EC.message();
-          return 1;
-        }
-      addCollectedPath(Path.str());
+    for (const std::string &File : InputSourceFiles)
+      collectPaths(File);
+
+    if (DebugDumpCollectedPaths) {
+      for (const std::string &SF : SourceFiles)
+        outs() << SF << '\n';
+      ::exit(0);
     }
+
     return 0;
   };
 
@@ -552,6 +662,8 @@ int CodeCoverageTool::run(Command Cmd, int argc, const char **argv) {
     return show(argc, argv, commandLineParser);
   case Report:
     return report(argc, argv, commandLineParser);
+  case Export:
+    return export_(argc, argv, commandLineParser);
   }
   return 0;
 }
@@ -591,6 +703,15 @@ int CodeCoverageTool::show(int argc, const char **argv,
   cl::alias ShowOutputDirectoryA("o", cl::desc("Alias for --output-dir"),
                                  cl::aliasopt(ShowOutputDirectory));
 
+  cl::opt<uint32_t> TabSize(
+      "tab-size", cl::init(2),
+      cl::desc(
+          "Set tab expansion size for html coverage reports (default = 2)"));
+
+  cl::opt<std::string> ProjectTitle(
+      "project-title", cl::Optional,
+      cl::desc("Set project title for the coverage report"));
+
   auto Err = commandLineParser(argc, argv);
   if (Err)
     return Err;
@@ -603,6 +724,8 @@ int CodeCoverageTool::show(int argc, const char **argv,
   ViewOpts.ShowExpandedRegions = ShowExpansions;
   ViewOpts.ShowFunctionInstantiations = ShowInstantiations;
   ViewOpts.ShowOutputDirectory = ShowOutputDirectory;
+  ViewOpts.TabSize = TabSize;
+  ViewOpts.ProjectTitle = ProjectTitle;
 
   if (ViewOpts.hasOutputDirectory()) {
     if (auto E = sys::fs::create_directories(ViewOpts.ShowOutputDirectory)) {
@@ -611,6 +734,19 @@ int CodeCoverageTool::show(int argc, const char **argv,
     }
   }
 
+  sys::fs::file_status Status;
+  if (sys::fs::status(PGOFilename, Status)) {
+    error("profdata file error: can not get the file status. \n");
+    return 1;
+  }
+
+  auto ModifiedTime = Status.getLastModificationTime();
+  std::string ModifiedTimeStr = to_string(ModifiedTime);
+  size_t found = ModifiedTimeStr.rfind(':');
+  ViewOpts.CreatedTimeStr = (found != std::string::npos)
+                                ? "Created: " + ModifiedTimeStr.substr(0, found)
+                                : "Created: " + ModifiedTimeStr;
+
   auto Coverage = load();
   if (!Coverage)
     return 1;
@@ -632,9 +768,7 @@ int CodeCoverageTool::show(int argc, const char **argv,
 
       auto mainView = createFunctionView(Function, *Coverage);
       if (!mainView) {
-        ViewOpts.colored_ostream(errs(), raw_ostream::RED)
-            << "warning: Could not read coverage for '" << Function.Name << "'."
-            << "\n";
+        warning("Could not read coverage for '" + Function.Name + "'.");
         continue;
       }
 
@@ -646,7 +780,9 @@ int CodeCoverageTool::show(int argc, const char **argv,
   }
 
   // Show files
-  bool ShowFilenames = SourceFiles.size() != 1;
+  bool ShowFilenames =
+      (SourceFiles.size() != 1) || ViewOpts.hasOutputDirectory() ||
+      (ViewOpts.Format == CoverageViewOptions::OutputFormat::HTML);
 
   if (SourceFiles.empty())
     // Get the source files from the function coverage mapping.
@@ -655,43 +791,27 @@ int CodeCoverageTool::show(int argc, const char **argv,
 
   // Create an index out of the source files.
   if (ViewOpts.hasOutputDirectory()) {
-    if (Error E = Printer->createIndexFile(SourceFiles)) {
+    if (Error E = Printer->createIndexFile(SourceFiles, *Coverage)) {
       error("Could not create index file!", toString(std::move(E)));
       return 1;
     }
   }
 
-  // In -output-dir mode, it's safe to use multiple threads to print files.
-  unsigned ThreadCount = 1;
-  if (ViewOpts.hasOutputDirectory())
-    ThreadCount = std::thread::hardware_concurrency();
-  ThreadPool Pool(ThreadCount);
-
-  for (StringRef SourceFile : SourceFiles) {
-    Pool.async([this, SourceFile, &Coverage, &Printer, ShowFilenames] {
-      auto View = createSourceFileView(SourceFile, *Coverage);
-      if (!View) {
-        deferWarning("The file '" + SourceFile.str() + "' isn't covered.");
-        return;
-      }
-
-      auto OSOrErr = Printer->createViewFile(SourceFile, /*InToplevel=*/false);
-      if (Error E = OSOrErr.takeError()) {
-        deferError("Could not create view file!", toString(std::move(E)));
-        return;
-      }
-      auto OS = std::move(OSOrErr.get());
-
-      View->print(*OS.get(), /*Wholefile=*/true,
-                  /*ShowSourceName=*/ShowFilenames);
-      Printer->closeViewFile(std::move(OS));
-    });
+  // FIXME: Sink the hardware_concurrency() == 1 check into ThreadPool.
+  if (!ViewOpts.hasOutputDirectory() ||
+      std::thread::hardware_concurrency() == 1) {
+    for (const std::string &SourceFile : SourceFiles)
+      writeSourceFileView(SourceFile, Coverage.get(), Printer.get(),
+                          ShowFilenames);
+  } else {
+    // In -output-dir mode, it's safe to use multiple threads to print files.
+    ThreadPool Pool;
+    for (const std::string &SourceFile : SourceFiles)
+      Pool.async(&CodeCoverageTool::writeSourceFileView, this, SourceFile,
+                 Coverage.get(), Printer.get(), ShowFilenames);
+    Pool.wait();
   }
 
-  Pool.wait();
-
-  consumeDeferredMessages();
-
   return 0;
 }
 
@@ -708,7 +828,7 @@ int CodeCoverageTool::report(int argc, const char **argv,
   if (!Coverage)
     return 1;
 
-  CoverageReport Report(ViewOpts, std::move(Coverage));
+  CoverageReport Report(ViewOpts, *Coverage.get());
   if (SourceFiles.empty())
     Report.renderFileReports(llvm::outs());
   else
@@ -716,6 +836,24 @@ int CodeCoverageTool::report(int argc, const char **argv,
   return 0;
 }
 
+int CodeCoverageTool::export_(int argc, const char **argv,
+                              CommandLineParserType commandLineParser) {
+
+  auto Err = commandLineParser(argc, argv);
+  if (Err)
+    return Err;
+
+  auto Coverage = load();
+  if (!Coverage) {
+    error("Could not load coverage information");
+    return 1;
+  }
+
+  exportCoverageDataToJson(*Coverage.get(), outs());
+
+  return 0;
+}
+
 int showMain(int argc, const char *argv[]) {
   CodeCoverageTool Tool;
   return Tool.run(CodeCoverageTool::Show, argc, argv);
@@ -725,3 +863,8 @@ int reportMain(int argc, const char *argv[]) {
   CodeCoverageTool Tool;
   return Tool.run(CodeCoverageTool::Report, argc, argv);
 }
+
+int exportMain(int argc, const char *argv[]) {
+  CodeCoverageTool Tool;
+  return Tool.run(CodeCoverageTool::Export, argc, argv);
+}
diff --git a/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
new file mode 100644
index 000000000000..ef50bba21238
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cov/CoverageExporterJson.cpp
@@ -0,0 +1,421 @@
+//===- CoverageExporterJson.cpp - Code coverage export --------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements export of code coverage data to JSON.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+//
+// The json code coverage export follows the following format
+// Root: dict => Root Element containing metadata
+// -- Data: array => Homogeneous array of one or more export objects
+// ---- Export: dict => Json representation of one CoverageMapping
+// ------ Files: array => List of objects describing coverage for files
+// -------- File: dict => Coverage for a single file
+// ---------- Segments: array => List of Segments contained in the file
+// ------------ Segment: dict => Describes a segment of the file with a counter
+// ---------- Expansions: array => List of expansion records
+// ------------ Expansion: dict => Object that descibes a single expansion
+// -------------- CountedRegion: dict => The region to be expanded
+// -------------- TargetRegions: array => List of Regions in the expansion
+// ---------------- CountedRegion: dict => Single Region in the expansion
+// ---------- Summary: dict => Object summarizing the coverage for this file
+// ------------ LineCoverage: dict => Object summarizing line coverage
+// ------------ FunctionCoverage: dict => Object summarizing function coverage
+// ------------ RegionCoverage: dict => Object summarizing region coverage
+// ------ Functions: array => List of objects describing coverage for functions
+// -------- Function: dict => Coverage info for a single function
+// ---------- Filenames: array => List of filenames that the function relates to
+// ---- Summary: dict => Object summarizing the coverage for the entire binary
+// ------ LineCoverage: dict => Object summarizing line coverage
+// ------ FunctionCoverage: dict => Object summarizing function coverage
+// ------ InstantiationCoverage: dict => Object summarizing inst. coverage
+// ------ RegionCoverage: dict => Object summarizing region coverage
+//
+//===----------------------------------------------------------------------===//
+
+#include "CoverageReport.h"
+#include "CoverageSummaryInfo.h"
+#include "CoverageViewOptions.h"
+#include "llvm/ProfileData/Coverage/CoverageMapping.h"
+#include <stack>
+
+/// \brief The semantic version combined as a string.
+#define LLVM_COVERAGE_EXPORT_JSON_STR "2.0.0"
+
+/// \brief Unique type identifier for JSON coverage export.
+#define LLVM_COVERAGE_EXPORT_JSON_TYPE_STR "llvm.coverage.json.export"
+
+using namespace llvm;
+using namespace coverage;
+
+class CoverageExporterJson {
+  /// \brief Output stream to print JSON to.
+  raw_ostream &OS;
+
+  /// \brief The full CoverageMapping object to export.
+  const CoverageMapping &Coverage;
+
+  /// \brief States that the JSON rendering machine can be in.
+  enum JsonState { None, NonEmptyElement, EmptyElement };
+
+  /// \brief Tracks state of the JSON output.
+  std::stack<JsonState> State;
+
+  /// \brief Emit a serialized scalar.
+  void emitSerialized(const int64_t Value) { OS << Value; }
+
+  /// \brief Emit a serialized string.
+  void emitSerialized(const std::string &Value) {
+    OS << "\"";
+    for (char C : Value) {
+      if (C != '\\')
+        OS << C;
+      else
+        OS << "\\\\";
+    }
+    OS << "\"";
+  }
+
+  /// \brief Emit a comma if there is a previous element to delimit.
+  void emitComma() {
+    if (State.top() == JsonState::NonEmptyElement) {
+      OS << ",";
+    } else if (State.top() == JsonState::EmptyElement) {
+      State.pop();
+      assert((State.size() >= 1) && "Closed too many JSON elements");
+      State.push(JsonState::NonEmptyElement);
+    }
+  }
+
+  /// \brief Emit a starting dictionary/object character.
+  void emitDictStart() {
+    emitComma();
+    State.push(JsonState::EmptyElement);
+    OS << "{";
+  }
+
+  /// \brief Emit a dictionary/object key but no value.
+  void emitDictKey(const std::string &Key) {
+    emitComma();
+    emitSerialized(Key);
+    OS << ":";
+    State.pop();
+    assert((State.size() >= 1) && "Closed too many JSON elements");
+
+    // We do not want to emit a comma after this key.
+    State.push(JsonState::EmptyElement);
+  }
+
+  /// \brief Emit a dictionary/object key/value pair.
+  template <typename V>
+  void emitDictElement(const std::string &Key, const V &Value) {
+    emitComma();
+    emitSerialized(Key);
+    OS << ":";
+    emitSerialized(Value);
+  }
+
+  /// \brief Emit a closing dictionary/object character.
+  void emitDictEnd() {
+    State.pop();
+    assert((State.size() >= 1) && "Closed too many JSON elements");
+    OS << "}";
+  }
+
+  /// \brief Emit a starting array character.
+  void emitArrayStart() {
+    emitComma();
+    State.push(JsonState::EmptyElement);
+    OS << "[";
+  }
+
+  /// \brief Emit an array element.
+  template <typename V> void emitArrayElement(const V &Value) {
+    emitComma();
+    emitSerialized(Value);
+  }
+
+  /// \brief emit a closing array character.
+  void emitArrayEnd() {
+    State.pop();
+    assert((State.size() >= 1) && "Closed too many JSON elements");
+    OS << "]";
+  }
+
+  /// \brief Render the CoverageMapping object.
+  void renderRoot() {
+    // Start Root of JSON object.
+    emitDictStart();
+
+    emitDictElement("version", LLVM_COVERAGE_EXPORT_JSON_STR);
+    emitDictElement("type", LLVM_COVERAGE_EXPORT_JSON_TYPE_STR);
+    emitDictKey("data");
+
+    // Start List of Exports.
+    emitArrayStart();
+
+    // Start Export.
+    emitDictStart();
+
+    emitDictKey("files");
+
+    FileCoverageSummary Totals = FileCoverageSummary("Totals");
+    std::vector<std::string> SourceFiles;
+    for (StringRef SF : Coverage.getUniqueSourceFiles())
+      SourceFiles.emplace_back(SF);
+    auto FileReports =
+        CoverageReport::prepareFileReports(Coverage, Totals, SourceFiles);
+    renderFiles(SourceFiles, FileReports);
+
+    emitDictKey("functions");
+    renderFunctions(Coverage.getCoveredFunctions());
+
+    emitDictKey("totals");
+    renderSummary(Totals);
+
+    // End Export.
+    emitDictEnd();
+
+    // End List of Exports.
+    emitArrayEnd();
+
+    // End Root of JSON Object.
+    emitDictEnd();
+
+    assert((State.top() == JsonState::None) &&
+           "All Elements In JSON were Closed");
+  }
+
+  /// \brief Render an array of all the given functions.
+  void
+  renderFunctions(const iterator_range<FunctionRecordIterator> &Functions) {
+    // Start List of Functions.
+    emitArrayStart();
+
+    for (const auto &Function : Functions) {
+      // Start Function.
+      emitDictStart();
+
+      emitDictElement("name", Function.Name);
+      emitDictElement("count", Function.ExecutionCount);
+      emitDictKey("regions");
+
+      renderRegions(Function.CountedRegions);
+
+      emitDictKey("filenames");
+
+      // Start Filenames for Function.
+      emitArrayStart();
+
+      for (const auto &FileName : Function.Filenames)
+        emitArrayElement(FileName);
+
+      // End Filenames for Function.
+      emitArrayEnd();
+
+      // End Function.
+      emitDictEnd();
+    }
+
+    // End List of Functions.
+    emitArrayEnd();
+  }
+
+  /// \brief Render an array of all the source files, also pass back a Summary.
+  void renderFiles(ArrayRef<std::string> SourceFiles,
+                   ArrayRef<FileCoverageSummary> FileReports) {
+    // Start List of Files.
+    emitArrayStart();
+
+    for (unsigned I = 0, E = SourceFiles.size(); I < E; ++I) {
+      // Render the file.
+      auto FileCoverage = Coverage.getCoverageForFile(SourceFiles[I]);
+      renderFile(FileCoverage, FileReports[I]);
+    }
+
+    // End List of Files.
+    emitArrayEnd();
+  }
+
+  /// \brief Render a single file.
+  void renderFile(const CoverageData &FileCoverage,
+                  const FileCoverageSummary &FileReport) {
+    // Start File.
+    emitDictStart();
+
+    emitDictElement("filename", FileCoverage.getFilename());
+    emitDictKey("segments");
+
+    // Start List of Segments.
+    emitArrayStart();
+
+    for (const auto &Segment : FileCoverage)
+      renderSegment(Segment);
+
+    // End List of Segments.
+    emitArrayEnd();
+
+    emitDictKey("expansions");
+
+    // Start List of Expansions.
+    emitArrayStart();
+
+    for (const auto &Expansion : FileCoverage.getExpansions())
+      renderExpansion(Expansion);
+
+    // End List of Expansions.
+    emitArrayEnd();
+
+    emitDictKey("summary");
+    renderSummary(FileReport);
+
+    // End File.
+    emitDictEnd();
+  }
+
+  /// \brief Render a CoverageSegment.
+  void renderSegment(const CoverageSegment &Segment) {
+    // Start Segment.
+    emitArrayStart();
+
+    emitArrayElement(Segment.Line);
+    emitArrayElement(Segment.Col);
+    emitArrayElement(Segment.Count);
+    emitArrayElement(Segment.HasCount);
+    emitArrayElement(Segment.IsRegionEntry);
+
+    // End Segment.
+    emitArrayEnd();
+  }
+
+  /// \brief Render an ExpansionRecord.
+  void renderExpansion(const ExpansionRecord &Expansion) {
+    // Start Expansion.
+    emitDictStart();
+
+    // Mark the beginning and end of this expansion in the source file.
+    emitDictKey("source_region");
+    renderRegion(Expansion.Region);
+
+    // Enumerate the coverage information for the expansion.
+    emitDictKey("target_regions");
+    renderRegions(Expansion.Function.CountedRegions);
+
+    emitDictKey("filenames");
+    // Start List of Filenames to map the fileIDs.
+    emitArrayStart();
+    for (const auto &Filename : Expansion.Function.Filenames)
+      emitArrayElement(Filename);
+    // End List of Filenames.
+    emitArrayEnd();
+
+    // End Expansion.
+    emitDictEnd();
+  }
+
+  /// \brief Render a list of CountedRegions.
+  void renderRegions(ArrayRef<CountedRegion> Regions) {
+    // Start List of Regions.
+    emitArrayStart();
+
+    for (const auto &Region : Regions)
+      renderRegion(Region);
+
+    // End List of Regions.
+    emitArrayEnd();
+  }
+
+  /// \brief Render a single CountedRegion.
+  void renderRegion(const CountedRegion &Region) {
+    // Start CountedRegion.
+    emitArrayStart();
+
+    emitArrayElement(Region.LineStart);
+    emitArrayElement(Region.ColumnStart);
+    emitArrayElement(Region.LineEnd);
+    emitArrayElement(Region.ColumnEnd);
+    emitArrayElement(Region.ExecutionCount);
+    emitArrayElement(Region.FileID);
+    emitArrayElement(Region.ExpandedFileID);
+    emitArrayElement(Region.Kind);
+
+    // End CountedRegion.
+    emitArrayEnd();
+  }
+
+  /// \brief Render a FileCoverageSummary.
+  void renderSummary(const FileCoverageSummary &Summary) {
+    // Start Summary for the file.
+    emitDictStart();
+
+    emitDictKey("lines");
+
+    // Start Line Coverage Summary.
+    emitDictStart();
+    emitDictElement("count", Summary.LineCoverage.NumLines);
+    emitDictElement("covered", Summary.LineCoverage.Covered);
+    emitDictElement("percent", Summary.LineCoverage.getPercentCovered());
+    // End Line Coverage Summary.
+    emitDictEnd();
+
+    emitDictKey("functions");
+
+    // Start Function Coverage Summary.
+    emitDictStart();
+    emitDictElement("count", Summary.FunctionCoverage.NumFunctions);
+    emitDictElement("covered", Summary.FunctionCoverage.Executed);
+    emitDictElement("percent", Summary.FunctionCoverage.getPercentCovered());
+    // End Function Coverage Summary.
+    emitDictEnd();
+
+    emitDictKey("instantiations");
+
+    // Start Instantiation Coverage Summary.
+    emitDictStart();
+    emitDictElement("count", Summary.InstantiationCoverage.NumFunctions);
+    emitDictElement("covered", Summary.InstantiationCoverage.Executed);
+    emitDictElement("percent",
+                    Summary.InstantiationCoverage.getPercentCovered());
+    // End Function Coverage Summary.
+    emitDictEnd();
+
+    emitDictKey("regions");
+
+    // Start Region Coverage Summary.
+    emitDictStart();
+    emitDictElement("count", Summary.RegionCoverage.NumRegions);
+    emitDictElement("covered", Summary.RegionCoverage.Covered);
+    emitDictElement("notcovered", Summary.RegionCoverage.NotCovered);
+    emitDictElement("percent", Summary.RegionCoverage.getPercentCovered());
+    // End Region Coverage Summary.
+    emitDictEnd();
+
+    // End Summary for the file.
+    emitDictEnd();
+  }
+
+public:
+  CoverageExporterJson(const CoverageMapping &CoverageMapping, raw_ostream &OS)
+      : OS(OS), Coverage(CoverageMapping) {
+    State.push(JsonState::None);
+  }
+
+  /// \brief Print the CoverageMapping.
+  void print() { renderRoot(); }
+};
+
+/// \brief Export the given CoverageMapping to a JSON Format.
+void exportCoverageDataToJson(const CoverageMapping &CoverageMapping,
+                              raw_ostream &OS) {
+  auto Exporter = CoverageExporterJson(CoverageMapping, OS);
+
+  Exporter.print();
+}
diff --git a/contrib/llvm/tools/llvm-cov/CoverageReport.cpp b/contrib/llvm/tools/llvm-cov/CoverageReport.cpp
index 10e53b3f1f72..e88cb186acd6 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageReport.cpp
+++ b/contrib/llvm/tools/llvm-cov/CoverageReport.cpp
@@ -13,14 +13,19 @@
 
 #include "CoverageReport.h"
 #include "RenderingSupport.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include <numeric>
 
 using namespace llvm;
+
 namespace {
+
 /// \brief Helper struct which prints trimmed and aligned columns.
 struct Column {
-  enum TrimKind { NoTrim, WidthTrim, LeftTrim, RightTrim };
+  enum TrimKind { NoTrim, WidthTrim, RightTrim };
 
   enum AlignmentKind { LeftAlignment, RightAlignment };
 
@@ -42,144 +47,204 @@ struct Column {
     return *this;
   }
 
-  void render(raw_ostream &OS) const;
-};
-
-raw_ostream &operator<<(raw_ostream &OS, const Column &Value) {
-  Value.render(OS);
-  return OS;
-}
-}
-
-void Column::render(raw_ostream &OS) const {
-  if (Str.size() <= Width) {
-    if (Alignment == RightAlignment) {
-      OS.indent(Width - Str.size());
+  void render(raw_ostream &OS) const {
+    if (Str.size() <= Width) {
+      if (Alignment == RightAlignment) {
+        OS.indent(Width - Str.size());
+        OS << Str;
+        return;
+      }
       OS << Str;
+      OS.indent(Width - Str.size());
       return;
     }
-    OS << Str;
-    OS.indent(Width - Str.size());
-    return;
-  }
 
-  switch (Trim) {
-  case NoTrim:
-    OS << Str;
-    break;
-  case WidthTrim:
-    OS << Str.substr(0, Width);
-    break;
-  case LeftTrim:
-    OS << "..." << Str.substr(Str.size() - Width + 3);
-    break;
-  case RightTrim:
-    OS << Str.substr(0, Width - 3) << "...";
-    break;
+    switch (Trim) {
+    case NoTrim:
+      OS << Str;
+      break;
+    case WidthTrim:
+      OS << Str.substr(0, Width);
+      break;
+    case RightTrim:
+      OS << Str.substr(0, Width - 3) << "...";
+      break;
+    }
   }
-}
+};
 
-static Column column(StringRef Str, unsigned Width) {
-  return Column(Str, Width);
+raw_ostream &operator<<(raw_ostream &OS, const Column &Value) {
+  Value.render(OS);
+  return OS;
 }
 
+Column column(StringRef Str, unsigned Width) { return Column(Str, Width); }
+
 template <typename T>
-static Column column(StringRef Str, unsigned Width, const T &Value) {
+Column column(StringRef Str, unsigned Width, const T &Value) {
   return Column(Str, Width).set(Value);
 }
 
-static size_t FileReportColumns[] = {25, 10, 8, 8, 10, 10};
-static size_t FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
+// Specify the default column widths.
+size_t FileReportColumns[] = {25, 12, 18, 10, 12, 18, 10,
+                              16, 16, 10, 12, 18, 10};
+size_t FunctionReportColumns[] = {25, 10, 8, 8, 10, 8, 8};
 
-/// \brief  Adjust column widths to fit long file paths and function names.
-static void adjustColumnWidths(coverage::CoverageMapping *CM) {
-  for (StringRef Filename : CM->getUniqueSourceFiles()) {
+/// \brief Adjust column widths to fit long file paths and function names.
+void adjustColumnWidths(ArrayRef<StringRef> Files,
+                        ArrayRef<StringRef> Functions) {
+  for (StringRef Filename : Files)
     FileReportColumns[0] = std::max(FileReportColumns[0], Filename.size());
-    for (const auto &F : CM->getCoveredFunctions(Filename)) {
-      FunctionReportColumns[0] =
-          std::max(FunctionReportColumns[0], F.Name.size());
-    }
-  }
+  for (StringRef Funcname : Functions)
+    FunctionReportColumns[0] =
+        std::max(FunctionReportColumns[0], Funcname.size());
 }
 
-/// \brief Prints a horizontal divider which spans across the given columns.
-template <typename T, size_t N>
-static void renderDivider(T (&Columns)[N], raw_ostream &OS) {
-  unsigned Length = 0;
-  for (unsigned I = 0; I < N; ++I)
-    Length += Columns[I];
-  for (unsigned I = 0; I < Length; ++I)
+/// \brief Prints a horizontal divider long enough to cover the given column
+/// widths.
+void renderDivider(ArrayRef<size_t> ColumnWidths, raw_ostream &OS) {
+  size_t Length = std::accumulate(ColumnWidths.begin(), ColumnWidths.end(), 0);
+  for (size_t I = 0; I < Length; ++I)
     OS << '-';
 }
 
-/// \brief Return the color which correponds to the coverage
-/// percentage of a certain metric.
+/// \brief Return the color which correponds to the coverage percentage of a
+/// certain metric.
 template <typename T>
-static raw_ostream::Colors determineCoveragePercentageColor(const T &Info) {
+raw_ostream::Colors determineCoveragePercentageColor(const T &Info) {
   if (Info.isFullyCovered())
     return raw_ostream::GREEN;
   return Info.getPercentCovered() >= 80.0 ? raw_ostream::YELLOW
                                           : raw_ostream::RED;
 }
 
-void CoverageReport::render(const FileCoverageSummary &File, raw_ostream &OS) {
-  OS << column(File.Name, FileReportColumns[0], Column::NoTrim)
+/// \brief Determine the length of the longest common prefix of the strings in
+/// \p Strings.
+unsigned getLongestCommonPrefixLen(ArrayRef<std::string> Strings) {
+  unsigned LCP = Strings[0].size();
+  for (unsigned I = 1, E = Strings.size(); LCP > 0 && I < E; ++I) {
+    unsigned Cursor;
+    StringRef S = Strings[I];
+    for (Cursor = 0; Cursor < LCP && Cursor < S.size(); ++Cursor)
+      if (Strings[0][Cursor] != S[Cursor])
+        break;
+    LCP = std::min(LCP, Cursor);
+  }
+  return LCP;
+}
+
+} // end anonymous namespace
+
+namespace llvm {
+
+void CoverageReport::render(const FileCoverageSummary &File,
+                            raw_ostream &OS) const {
+  auto FileCoverageColor =
+      determineCoveragePercentageColor(File.RegionCoverage);
+  auto FuncCoverageColor =
+      determineCoveragePercentageColor(File.FunctionCoverage);
+  auto InstantiationCoverageColor =
+      determineCoveragePercentageColor(File.InstantiationCoverage);
+  auto LineCoverageColor = determineCoveragePercentageColor(File.LineCoverage);
+  SmallString<256> FileName = File.Name;
+  sys::path::remove_dots(FileName, /*remove_dot_dots=*/true);
+  sys::path::native(FileName);
+  OS << column(FileName, FileReportColumns[0], Column::NoTrim)
      << format("%*u", FileReportColumns[1],
                (unsigned)File.RegionCoverage.NumRegions);
-  Options.colored_ostream(OS, File.RegionCoverage.isFullyCovered()
-                                  ? raw_ostream::GREEN
-                                  : raw_ostream::RED)
-      << format("%*u", FileReportColumns[2], (unsigned)File.RegionCoverage.NotCovered);
-  Options.colored_ostream(OS,
-                          determineCoveragePercentageColor(File.RegionCoverage))
-      << format("%*.2f", FileReportColumns[3] - 1,
-                File.RegionCoverage.getPercentCovered()) << '%';
+  Options.colored_ostream(OS, FileCoverageColor) << format(
+      "%*u", FileReportColumns[2], (unsigned)File.RegionCoverage.NotCovered);
+  if (File.RegionCoverage.NumRegions)
+    Options.colored_ostream(OS, FileCoverageColor)
+        << format("%*.2f", FileReportColumns[3] - 1,
+                  File.RegionCoverage.getPercentCovered())
+        << '%';
+  else
+    OS << column("-", FileReportColumns[3], Column::RightAlignment);
   OS << format("%*u", FileReportColumns[4],
                (unsigned)File.FunctionCoverage.NumFunctions);
-  Options.colored_ostream(
-      OS, determineCoveragePercentageColor(File.FunctionCoverage))
-      << format("%*.2f", FileReportColumns[5] - 1,
-                File.FunctionCoverage.getPercentCovered()) << '%';
+  OS << format("%*u", FileReportColumns[5],
+               (unsigned)(File.FunctionCoverage.NumFunctions -
+                          File.FunctionCoverage.Executed));
+  if (File.FunctionCoverage.NumFunctions)
+    Options.colored_ostream(OS, FuncCoverageColor)
+        << format("%*.2f", FileReportColumns[6] - 1,
+                  File.FunctionCoverage.getPercentCovered())
+        << '%';
+  else
+    OS << column("-", FileReportColumns[6], Column::RightAlignment);
+  OS << format("%*u", FileReportColumns[7],
+               (unsigned)File.InstantiationCoverage.NumFunctions);
+  OS << format("%*u", FileReportColumns[8],
+               (unsigned)(File.InstantiationCoverage.NumFunctions -
+                          File.InstantiationCoverage.Executed));
+  if (File.InstantiationCoverage.NumFunctions)
+    Options.colored_ostream(OS, InstantiationCoverageColor)
+        << format("%*.2f", FileReportColumns[9] - 1,
+                  File.InstantiationCoverage.getPercentCovered())
+        << '%';
+  else
+    OS << column("-", FileReportColumns[9], Column::RightAlignment);
+  OS << format("%*u", FileReportColumns[10],
+               (unsigned)File.LineCoverage.NumLines);
+  Options.colored_ostream(OS, LineCoverageColor) << format(
+      "%*u", FileReportColumns[11], (unsigned)File.LineCoverage.NotCovered);
+  if (File.LineCoverage.NumLines)
+    Options.colored_ostream(OS, LineCoverageColor)
+        << format("%*.2f", FileReportColumns[12] - 1,
+                  File.LineCoverage.getPercentCovered())
+        << '%';
+  else
+    OS << column("-", FileReportColumns[12], Column::RightAlignment);
   OS << "\n";
 }
 
 void CoverageReport::render(const FunctionCoverageSummary &Function,
-                            raw_ostream &OS) {
+                            raw_ostream &OS) const {
+  auto FuncCoverageColor =
+      determineCoveragePercentageColor(Function.RegionCoverage);
+  auto LineCoverageColor =
+      determineCoveragePercentageColor(Function.LineCoverage);
   OS << column(Function.Name, FunctionReportColumns[0], Column::RightTrim)
      << format("%*u", FunctionReportColumns[1],
                (unsigned)Function.RegionCoverage.NumRegions);
-  Options.colored_ostream(OS, Function.RegionCoverage.isFullyCovered()
-                                  ? raw_ostream::GREEN
-                                  : raw_ostream::RED)
+  Options.colored_ostream(OS, FuncCoverageColor)
       << format("%*u", FunctionReportColumns[2],
                 (unsigned)Function.RegionCoverage.NotCovered);
   Options.colored_ostream(
       OS, determineCoveragePercentageColor(Function.RegionCoverage))
       << format("%*.2f", FunctionReportColumns[3] - 1,
-                Function.RegionCoverage.getPercentCovered()) << '%';
+                Function.RegionCoverage.getPercentCovered())
+      << '%';
   OS << format("%*u", FunctionReportColumns[4],
                (unsigned)Function.LineCoverage.NumLines);
-  Options.colored_ostream(OS, Function.LineCoverage.isFullyCovered()
-                                  ? raw_ostream::GREEN
-                                  : raw_ostream::RED)
+  Options.colored_ostream(OS, LineCoverageColor)
       << format("%*u", FunctionReportColumns[5],
                 (unsigned)Function.LineCoverage.NotCovered);
   Options.colored_ostream(
       OS, determineCoveragePercentageColor(Function.LineCoverage))
       << format("%*.2f", FunctionReportColumns[6] - 1,
-                Function.LineCoverage.getPercentCovered()) << '%';
+                Function.LineCoverage.getPercentCovered())
+      << '%';
   OS << "\n";
 }
 
-void CoverageReport::renderFunctionReports(ArrayRef<StringRef> Files,
+void CoverageReport::renderFunctionReports(ArrayRef<std::string> Files,
                                            raw_ostream &OS) {
-  adjustColumnWidths(Coverage.get());
   bool isFirst = true;
   for (StringRef Filename : Files) {
+    auto Functions = Coverage.getCoveredFunctions(Filename);
+
     if (isFirst)
       isFirst = false;
     else
       OS << "\n";
+
+    std::vector<StringRef> Funcnames;
+    for (const auto &F : Functions)
+      Funcnames.emplace_back(F.Name);
+    adjustColumnWidths({}, Funcnames);
+
     OS << "File '" << Filename << "':\n";
     OS << column("Name", FunctionReportColumns[0])
        << column("Regions", FunctionReportColumns[1], Column::RightAlignment)
@@ -192,7 +257,7 @@ void CoverageReport::renderFunctionReports(ArrayRef<StringRef> Files,
     renderDivider(FunctionReportColumns, OS);
     OS << "\n";
     FunctionCoverageSummary Totals("TOTAL");
-    for (const auto &F : Coverage->getCoveredFunctions(Filename)) {
+    for (const auto &F : Functions) {
       FunctionCoverageSummary Function = FunctionCoverageSummary::get(F);
       ++Totals.ExecutionCount;
       Totals.RegionCoverage += Function.RegionCoverage;
@@ -207,29 +272,84 @@ void CoverageReport::renderFunctionReports(ArrayRef<StringRef> Files,
   }
 }
 
-void CoverageReport::renderFileReports(raw_ostream &OS) {
-  adjustColumnWidths(Coverage.get());
+std::vector<FileCoverageSummary>
+CoverageReport::prepareFileReports(const coverage::CoverageMapping &Coverage,
+                                   FileCoverageSummary &Totals,
+                                   ArrayRef<std::string> Files) {
+  std::vector<FileCoverageSummary> FileReports;
+  unsigned LCP = 0;
+  if (Files.size() > 1)
+    LCP = getLongestCommonPrefixLen(Files);
+
+  for (StringRef Filename : Files) {
+    FileCoverageSummary Summary(Filename.drop_front(LCP));
+
+    // Map source locations to aggregate function coverage summaries.
+    DenseMap<std::pair<unsigned, unsigned>, FunctionCoverageSummary> Summaries;
+
+    for (const auto &F : Coverage.getCoveredFunctions(Filename)) {
+      FunctionCoverageSummary Function = FunctionCoverageSummary::get(F);
+      auto StartLoc = F.CountedRegions[0].startLoc();
+
+      auto UniquedSummary = Summaries.insert({StartLoc, Function});
+      if (!UniquedSummary.second)
+        UniquedSummary.first->second.update(Function);
+
+      Summary.addInstantiation(Function);
+      Totals.addInstantiation(Function);
+    }
+
+    for (const auto &UniquedSummary : Summaries) {
+      const FunctionCoverageSummary &FCS = UniquedSummary.second;
+      Summary.addFunction(FCS);
+      Totals.addFunction(FCS);
+    }
+
+    FileReports.push_back(Summary);
+  }
+
+  return FileReports;
+}
+
+void CoverageReport::renderFileReports(raw_ostream &OS) const {
+  std::vector<std::string> UniqueSourceFiles;
+  for (StringRef SF : Coverage.getUniqueSourceFiles())
+    UniqueSourceFiles.emplace_back(SF.str());
+  renderFileReports(OS, UniqueSourceFiles);
+}
+
+void CoverageReport::renderFileReports(raw_ostream &OS,
+                                       ArrayRef<std::string> Files) const {
+  FileCoverageSummary Totals("TOTAL");
+  auto FileReports = prepareFileReports(Coverage, Totals, Files);
+
+  std::vector<StringRef> Filenames;
+  for (const FileCoverageSummary &FCS : FileReports)
+    Filenames.emplace_back(FCS.Name);
+  adjustColumnWidths(Filenames, {});
+
   OS << column("Filename", FileReportColumns[0])
      << column("Regions", FileReportColumns[1], Column::RightAlignment)
-     << column("Miss", FileReportColumns[2], Column::RightAlignment)
+     << column("Missed Regions", FileReportColumns[2], Column::RightAlignment)
      << column("Cover", FileReportColumns[3], Column::RightAlignment)
      << column("Functions", FileReportColumns[4], Column::RightAlignment)
-     << column("Executed", FileReportColumns[5], Column::RightAlignment)
-     << "\n";
+     << column("Missed Functions", FileReportColumns[5], Column::RightAlignment)
+     << column("Executed", FileReportColumns[6], Column::RightAlignment)
+     << column("Instantiations", FileReportColumns[7], Column::RightAlignment)
+     << column("Missed Insts.", FileReportColumns[8], Column::RightAlignment)
+     << column("Executed", FileReportColumns[9], Column::RightAlignment)
+     << column("Lines", FileReportColumns[10], Column::RightAlignment)
+     << column("Missed Lines", FileReportColumns[11], Column::RightAlignment)
+     << column("Cover", FileReportColumns[12], Column::RightAlignment) << "\n";
   renderDivider(FileReportColumns, OS);
   OS << "\n";
 
-  FileCoverageSummary Totals("TOTAL");
-  for (StringRef Filename : Coverage->getUniqueSourceFiles()) {
-    FileCoverageSummary Summary(Filename);
-    for (const auto &F : Coverage->getCoveredFunctions(Filename)) {
-      FunctionCoverageSummary Function = FunctionCoverageSummary::get(F);
-      Summary.addFunction(Function);
-      Totals.addFunction(Function);
-    }
-    render(Summary, OS);
-  }
+  for (const FileCoverageSummary &FCS : FileReports)
+    render(FCS, OS);
+
   renderDivider(FileReportColumns, OS);
   OS << "\n";
   render(Totals, OS);
 }
+
+} // end namespace llvm
diff --git a/contrib/llvm/tools/llvm-cov/CoverageReport.h b/contrib/llvm/tools/llvm-cov/CoverageReport.h
index bb3d734b52a5..7a416497e258 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageReport.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageReport.h
@@ -22,20 +22,30 @@ namespace llvm {
 /// \brief Displays the code coverage report.
 class CoverageReport {
   const CoverageViewOptions &Options;
-  std::unique_ptr<coverage::CoverageMapping> Coverage;
+  const coverage::CoverageMapping &Coverage;
 
-  void render(const FileCoverageSummary &File, raw_ostream &OS);
-  void render(const FunctionCoverageSummary &Function, raw_ostream &OS);
+  void render(const FileCoverageSummary &File, raw_ostream &OS) const;
+  void render(const FunctionCoverageSummary &Function, raw_ostream &OS) const;
 
 public:
   CoverageReport(const CoverageViewOptions &Options,
-                 std::unique_ptr<coverage::CoverageMapping> Coverage)
-      : Options(Options), Coverage(std::move(Coverage)) {}
+                 const coverage::CoverageMapping &Coverage)
+      : Options(Options), Coverage(Coverage) {}
 
-  void renderFunctionReports(ArrayRef<StringRef> Files, raw_ostream &OS);
+  void renderFunctionReports(ArrayRef<std::string> Files, raw_ostream &OS);
 
-  void renderFileReports(raw_ostream &OS);
+  /// Prepare file reports for the files specified in \p Files.
+  static std::vector<FileCoverageSummary>
+  prepareFileReports(const coverage::CoverageMapping &Coverage,
+                     FileCoverageSummary &Totals, ArrayRef<std::string> Files);
+
+  /// Render file reports for every unique file in the coverage mapping.
+  void renderFileReports(raw_ostream &OS) const;
+
+  /// Render file reports for the files specified in \p Files.
+  void renderFileReports(raw_ostream &OS, ArrayRef<std::string> Files) const;
 };
-}
+
+} // end namespace llvm
 
 #endif // LLVM_COV_COVERAGEREPORT_H
diff --git a/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp b/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp
index de8975097e49..21aa7ff73a05 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp
+++ b/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.cpp
@@ -19,7 +19,7 @@ using namespace coverage;
 
 FunctionCoverageSummary
 FunctionCoverageSummary::get(const coverage::FunctionRecord &Function) {
-  // Compute the region coverage
+  // Compute the region coverage.
   size_t NumCodeRegions = 0, CoveredRegions = 0;
   for (auto &CR : Function.CountedRegions) {
     if (CR.Kind != CounterMappingRegion::CodeRegion)
@@ -67,5 +67,17 @@ FunctionCoverageSummary::get(const coverage::FunctionRecord &Function) {
   return FunctionCoverageSummary(
       Function.Name, Function.ExecutionCount,
       RegionCoverageInfo(CoveredRegions, NumCodeRegions),
-      LineCoverageInfo(CoveredLines, 0, NumLines));
+      LineCoverageInfo(CoveredLines, NumLines));
+}
+
+void FunctionCoverageSummary::update(const FunctionCoverageSummary &Summary) {
+  ExecutionCount += Summary.ExecutionCount;
+  RegionCoverage.Covered =
+      std::max(RegionCoverage.Covered, Summary.RegionCoverage.Covered);
+  RegionCoverage.NotCovered =
+      std::min(RegionCoverage.NotCovered, Summary.RegionCoverage.NotCovered);
+  LineCoverage.Covered =
+      std::max(LineCoverage.Covered, Summary.LineCoverage.Covered);
+  LineCoverage.NotCovered =
+      std::min(LineCoverage.NotCovered, Summary.LineCoverage.NotCovered);
 }
diff --git a/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.h b/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.h
index 822742b635e9..c04a4d42ccd7 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageSummaryInfo.h
@@ -61,33 +61,27 @@ struct LineCoverageInfo {
   /// \brief The number of lines that weren't executed.
   size_t NotCovered;
 
-  /// \brief The number of lines that aren't code.
-  size_t NonCodeLines;
-
   /// \brief The total number of lines in a function/file.
   size_t NumLines;
 
-  LineCoverageInfo()
-      : Covered(0), NotCovered(0), NonCodeLines(0), NumLines(0) {}
+  LineCoverageInfo() : Covered(0), NotCovered(0), NumLines(0) {}
 
-  LineCoverageInfo(size_t Covered, size_t NumNonCodeLines, size_t NumLines)
-      : Covered(Covered), NotCovered(NumLines - NumNonCodeLines - Covered),
-        NonCodeLines(NumNonCodeLines), NumLines(NumLines) {}
+  LineCoverageInfo(size_t Covered, size_t NumLines)
+      : Covered(Covered), NotCovered(NumLines - Covered), NumLines(NumLines) {}
 
   LineCoverageInfo &operator+=(const LineCoverageInfo &RHS) {
     Covered += RHS.Covered;
     NotCovered += RHS.NotCovered;
-    NonCodeLines += RHS.NonCodeLines;
     NumLines += RHS.NumLines;
     return *this;
   }
 
-  bool isFullyCovered() const { return Covered == (NumLines - NonCodeLines); }
+  bool isFullyCovered() const { return Covered == NumLines; }
 
   double getPercentCovered() const {
-    if (NumLines - NonCodeLines == 0)
+    if (NumLines == 0)
       return 0.0;
-    return double(Covered) / double(NumLines - NonCodeLines) * 100.0;
+    return double(Covered) / double(NumLines) * 100.0;
   }
 };
 
@@ -139,6 +133,10 @@ struct FunctionCoverageSummary {
   /// mapping record.
   static FunctionCoverageSummary
   get(const coverage::FunctionRecord &Function);
+
+  /// \brief Update the summary with information from another instantiation
+  /// of this function.
+  void update(const FunctionCoverageSummary &Summary);
 };
 
 /// \brief A summary of file's code coverage.
@@ -147,6 +145,7 @@ struct FileCoverageSummary {
   RegionCoverageInfo RegionCoverage;
   LineCoverageInfo LineCoverage;
   FunctionCoverageInfo FunctionCoverage;
+  FunctionCoverageInfo InstantiationCoverage;
 
   FileCoverageSummary(StringRef Name) : Name(Name) {}
 
@@ -155,6 +154,10 @@ struct FileCoverageSummary {
     LineCoverage += Function.LineCoverage;
     FunctionCoverage.addFunction(/*Covered=*/Function.ExecutionCount > 0);
   }
+
+  void addInstantiation(const FunctionCoverageSummary &Function) {
+    InstantiationCoverage.addFunction(/*Covered=*/Function.ExecutionCount > 0);
+  }
 };
 
 } // namespace llvm
diff --git a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
index 350c264a2876..266b380b7d39 100644
--- a/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
+++ b/contrib/llvm/tools/llvm-cov/CoverageViewOptions.h
@@ -34,6 +34,9 @@ struct CoverageViewOptions {
   OutputFormat Format;
   std::string ShowOutputDirectory;
   std::vector<std::string> DemanglerOpts;
+  uint32_t TabSize;
+  std::string ProjectTitle;
+  std::string CreatedTimeStr;
 
   /// \brief Change the output's stream color if the colors are enabled.
   ColoredRawOstream colored_ostream(raw_ostream &OS,
@@ -46,6 +49,19 @@ struct CoverageViewOptions {
 
   /// \brief Check if a demangler has been specified.
   bool hasDemangler() const { return !DemanglerOpts.empty(); }
+
+  /// \brief Check if a project title has been specified.
+  bool hasProjectTitle() const { return !ProjectTitle.empty(); }
+
+  /// \brief Check if the created time of the profile data file is available.
+  bool hasCreatedTime() const { return !CreatedTimeStr.empty(); }
+
+  /// \brief Get the LLVM version string.
+  std::string getLLVMVersionString() const {
+    std::string VersionString = "Generated by llvm-cov -- llvm version ";
+    VersionString += LLVM_VERSION_STRING;
+    return VersionString;
+  }
 };
 }
 
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
index baf7c148bb80..52b8ff1747fe 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageView.cpp
@@ -29,7 +29,8 @@ void CoveragePrinter::StreamDestructor::operator()(raw_ostream *OS) const {
 }
 
 std::string CoveragePrinter::getOutputPath(StringRef Path, StringRef Extension,
-                                           bool InToplevel, bool Relative) {
+                                           bool InToplevel,
+                                           bool Relative) const {
   assert(Extension.size() && "The file extension may not be empty");
 
   SmallString<256> FullPath;
@@ -46,13 +47,14 @@ std::string CoveragePrinter::getOutputPath(StringRef Path, StringRef Extension,
 
   auto PathFilename = (sys::path::filename(Path) + "." + Extension).str();
   sys::path::append(FullPath, PathFilename);
+  sys::path::native(FullPath);
 
   return FullPath.str();
 }
 
 Expected<CoveragePrinter::OwnedStream>
 CoveragePrinter::createOutputStream(StringRef Path, StringRef Extension,
-                                    bool InToplevel) {
+                                    bool InToplevel) const {
   if (!Opts.hasOutputDirectory())
     return OwnedStream(&outs());
 
@@ -81,6 +83,25 @@ CoveragePrinter::create(const CoverageViewOptions &Opts) {
   llvm_unreachable("Unknown coverage output format!");
 }
 
+unsigned SourceCoverageView::getFirstUncoveredLineNo() {
+  auto CheckIfUncovered = [](const coverage::CoverageSegment &S) {
+    return S.HasCount && S.Count == 0;
+  };
+  // L is less than R if (1) it's an uncovered segment (has a 0 count), and (2)
+  // either R is not an uncovered segment, or L has a lower line number than R.
+  const auto MinSegIt =
+      std::min_element(CoverageInfo.begin(), CoverageInfo.end(),
+                       [CheckIfUncovered](const coverage::CoverageSegment &L,
+                                          const coverage::CoverageSegment &R) {
+                         return (CheckIfUncovered(L) &&
+                                 (!CheckIfUncovered(R) || (L.Line < R.Line)));
+                       });
+  if (CheckIfUncovered(*MinSegIt))
+    return (*MinSegIt).Line;
+  // There is no uncovered line, return zero.
+  return 0;
+}
+
 std::string SourceCoverageView::formatCount(uint64_t N) {
   std::string Number = utostr(N);
   int Len = Number.size();
@@ -112,15 +133,22 @@ SourceCoverageView::create(StringRef SourceName, const MemoryBuffer &File,
                            coverage::CoverageData &&CoverageInfo) {
   switch (Options.Format) {
   case CoverageViewOptions::OutputFormat::Text:
-    return llvm::make_unique<SourceCoverageViewText>(SourceName, File, Options,
-                                                     std::move(CoverageInfo));
+    return llvm::make_unique<SourceCoverageViewText>(
+        SourceName, File, Options, std::move(CoverageInfo));
   case CoverageViewOptions::OutputFormat::HTML:
-    return llvm::make_unique<SourceCoverageViewHTML>(SourceName, File, Options,
-                                                     std::move(CoverageInfo));
+    return llvm::make_unique<SourceCoverageViewHTML>(
+        SourceName, File, Options, std::move(CoverageInfo));
   }
   llvm_unreachable("Unknown coverage output format!");
 }
 
+std::string SourceCoverageView::getSourceName() const {
+  SmallString<128> SourceText(SourceName);
+  sys::path::remove_dots(SourceText, /*remove_dot_dots=*/true);
+  sys::path::native(SourceText);
+  return SourceText.str();
+}
+
 void SourceCoverageView::addExpansion(
     const coverage::CounterMappingRegion &Region,
     std::unique_ptr<SourceCoverageView> View) {
@@ -135,11 +163,17 @@ void SourceCoverageView::addInstantiation(
 
 void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
                                bool ShowSourceName, unsigned ViewDepth) {
-  if (ShowSourceName)
-    renderSourceName(OS);
+  if (WholeFile && getOptions().hasOutputDirectory())
+    renderTitle(OS, "Coverage Report");
 
   renderViewHeader(OS);
 
+  if (ShowSourceName)
+    renderSourceName(OS, WholeFile);
+
+  renderTableHeader(OS, (ViewDepth > 0) ? 0 : getFirstUncoveredLineNo(),
+                    ViewDepth);
+
   // We need the expansions and instantiations sorted so we can go through them
   // while we iterate lines.
   std::sort(ExpansionSubViews.begin(), ExpansionSubViews.end());
@@ -182,10 +216,10 @@ void SourceCoverageView::print(raw_ostream &OS, bool WholeFile,
         LineCount.addRegionStartCount(S->Count);
 
     renderLinePrefix(OS, ViewDepth);
-    if (getOptions().ShowLineStats)
-      renderLineCoverageColumn(OS, LineCount);
     if (getOptions().ShowLineNumbers)
       renderLineNumberColumn(OS, LI.line_number());
+    if (getOptions().ShowLineStats)
+      renderLineCoverageColumn(OS, LineCount);
 
     // If there are expansion subviews, we want to highlight the first one.
     unsigned ExpansionColumn = 0;
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageView.h b/contrib/llvm/tools/llvm-cov/SourceCoverageView.h
index feef959f86bf..9cb608fed608 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageView.h
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageView.h
@@ -57,15 +57,6 @@ struct InstantiationView {
   InstantiationView(StringRef FunctionName, unsigned Line,
                     std::unique_ptr<SourceCoverageView> View)
       : FunctionName(FunctionName), Line(Line), View(std::move(View)) {}
-  InstantiationView(InstantiationView &&RHS)
-      : FunctionName(std::move(RHS.FunctionName)), Line(std::move(RHS.Line)),
-        View(std::move(RHS.View)) {}
-  InstantiationView &operator=(InstantiationView &&RHS) {
-    FunctionName = std::move(RHS.FunctionName);
-    Line = std::move(RHS.Line);
-    View = std::move(RHS.View);
-    return *this;
-  }
 
   friend bool operator<(const InstantiationView &LHS,
                         const InstantiationView &RHS) {
@@ -99,8 +90,6 @@ struct LineCoverageStats {
 
 /// \brief A file manager that handles format-aware file creation.
 class CoveragePrinter {
-  const CoverageViewOptions &Opts;
-
 public:
   struct StreamDestructor {
     void operator()(raw_ostream *OS) const;
@@ -109,18 +98,20 @@ public:
   using OwnedStream = std::unique_ptr<raw_ostream, StreamDestructor>;
 
 protected:
+  const CoverageViewOptions &Opts;
+
   CoveragePrinter(const CoverageViewOptions &Opts) : Opts(Opts) {}
 
   /// \brief Return `OutputDir/ToplevelDir/Path.Extension`. If \p InToplevel is
   /// false, skip the ToplevelDir component. If \p Relative is false, skip the
   /// OutputDir component.
   std::string getOutputPath(StringRef Path, StringRef Extension,
-                            bool InToplevel, bool Relative = true);
+                            bool InToplevel, bool Relative = true) const;
 
   /// \brief If directory output is enabled, create a file in that directory
   /// at the path given by getOutputPath(). Otherwise, return stdout.
   Expected<OwnedStream> createOutputStream(StringRef Path, StringRef Extension,
-                                           bool InToplevel);
+                                           bool InToplevel) const;
 
   /// \brief Return the sub-directory name for file coverage reports.
   static StringRef getCoverageDir() { return "coverage"; }
@@ -142,7 +133,8 @@ public:
   virtual void closeViewFile(OwnedStream OS) = 0;
 
   /// \brief Create an index which lists reports for the given source files.
-  virtual Error createIndexFile(ArrayRef<StringRef> SourceFiles) = 0;
+  virtual Error createIndexFile(ArrayRef<std::string> SourceFiles,
+                                const coverage::CoverageMapping &Coverage) = 0;
 
   /// @}
 };
@@ -172,6 +164,9 @@ class SourceCoverageView {
   /// on display.
   std::vector<InstantiationView> InstantiationSubViews;
 
+  /// Get the first uncovered line number for the source file.
+  unsigned getFirstUncoveredLineNo();
+
 protected:
   struct LineRef {
     StringRef Line;
@@ -192,7 +187,7 @@ protected:
   virtual void renderViewFooter(raw_ostream &OS) = 0;
 
   /// \brief Render the source name for the view.
-  virtual void renderSourceName(raw_ostream &OS) = 0;
+  virtual void renderSourceName(raw_ostream &OS, bool WholeFile) = 0;
 
   /// \brief Render the line prefix at the given \p ViewDepth.
   virtual void renderLinePrefix(raw_ostream &OS, unsigned ViewDepth) = 0;
@@ -236,6 +231,14 @@ protected:
   virtual void renderInstantiationView(raw_ostream &OS, InstantiationView &ISV,
                                        unsigned ViewDepth) = 0;
 
+  /// \brief Render \p Title, a project title if one is available, and the
+  /// created time.
+  virtual void renderTitle(raw_ostream &OS, StringRef CellText) = 0;
+
+  /// \brief Render the table header for a given source file.
+  virtual void renderTableHeader(raw_ostream &OS, unsigned FirstUncoveredLineNo,
+                                 unsigned IndentLevel) = 0;
+
   /// @}
 
   /// \brief Format a count using engineering notation with 3 significant
@@ -262,7 +265,8 @@ public:
 
   virtual ~SourceCoverageView() {}
 
-  StringRef getSourceName() const { return SourceName; }
+  /// \brief Return the source name formatted for the host OS.
+  std::string getSourceName() const;
 
   const CoverageViewOptions &getOptions() const { return Options; }
 
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
index 81963e5c5447..64b888e89d79 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.cpp
@@ -11,43 +11,76 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "CoverageReport.h"
 #include "SourceCoverageViewHTML.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
 #include "llvm/Support/Path.h"
 
 using namespace llvm;
 
 namespace {
 
+// Return a string with the special characters in \p Str escaped.
+std::string escape(StringRef Str, const CoverageViewOptions &Opts) {
+  std::string Result;
+  unsigned ColNum = 0; // Record the column number.
+  for (char C : Str) {
+    ++ColNum;
+    if (C == '&')
+      Result += "&amp;";
+    else if (C == '<')
+      Result += "&lt;";
+    else if (C == '>')
+      Result += "&gt;";
+    else if (C == '\"')
+      Result += "&quot;";
+    else if (C == '\n' || C == '\r') {
+      Result += C;
+      ColNum = 0;
+    } else if (C == '\t') {
+      // Replace '\t' with TabSize spaces.
+      unsigned NumSpaces = Opts.TabSize - (--ColNum % Opts.TabSize);
+      for (unsigned I = 0; I < NumSpaces; ++I)
+        Result += "&nbsp;";
+      ColNum += NumSpaces;
+    } else
+      Result += C;
+  }
+  return Result;
+}
+
+// Create a \p Name tag around \p Str, and optionally set its \p ClassName.
+std::string tag(const std::string &Name, const std::string &Str,
+                const std::string &ClassName = "") {
+  std::string Tag = "<" + Name;
+  if (ClassName != "")
+    Tag += " class='" + ClassName + "'";
+  return Tag + ">" + Str + "</" + Name + ">";
+}
+
+// Create an anchor to \p Link with the label \p Str.
+std::string a(const std::string &Link, const std::string &Str,
+              const std::string &TargetName = "") {
+  std::string Name = TargetName.empty() ? "" : ("name='" + TargetName + "' ");
+  return "<a " + Name + "href='" + Link + "'>" + Str + "</a>";
+}
+
 const char *BeginHeader =
   "<head>"
     "<meta name='viewport' content='width=device-width,initial-scale=1'>"
     "<meta charset='UTF-8'>";
 
 const char *CSSForCoverage =
-  "<style>"
-R"(
-
-.red {
-  background-color: #FFD0D0;
+    R"(.red {
+  background-color: #ffd0d0;
 }
 .cyan {
   background-color: cyan;
 }
-.black {
-  background-color: black;
-  color: white;
-}
-.green {
-  background-color: #98FFA6;
-  color: white;
-}
-.magenta {
-  background-color: #F998FF;
-  color: white;
-}
 body {
   font-family: -apple-system, sans-serif;
 }
@@ -59,10 +92,11 @@ pre {
   padding: 5px 10px;
   border-bottom: 1px solid #dbdbdb;
   background-color: #eee;
+  line-height: 35px;
 }
 .centered {
   display: table;
-  margin-left: auto;
+  margin-left: left;
   margin-right: auto;
   border: 1px solid #dbdbdb;
   border-radius: 3px;
@@ -79,6 +113,28 @@ pre {
 table {
   border-collapse: collapse;
 }
+.light-row {
+  background: #ffffff;
+  border: 1px solid #dbdbdb;
+}
+.column-entry {
+  text-align: right;
+}
+.column-entry-left {
+  text-align: left;
+}
+.column-entry-yellow {
+  text-align: right;
+  background-color: #ffffd0;
+}
+.column-entry-red {
+  text-align: right;
+  background-color: #ffd0d0;
+}
+.column-entry-green {
+  text-align: right;
+  background-color: #d0ffd0;
+}
 .line-number {
   text-align: right;
   color: #aaa;
@@ -140,9 +196,7 @@ td:first-child {
 td:last-child {
   border-right: none;
 }
-
-)"
-  "</style>";
+)";
 
 const char *EndHeader = "</head>";
 
@@ -170,48 +224,40 @@ const char *BeginTable = "<table>";
 
 const char *EndTable = "</table>";
 
-void emitPrelude(raw_ostream &OS) {
-  OS << "<!doctype html>"
-        "<html>"
-     << BeginHeader << CSSForCoverage << EndHeader << "<body>"
-     << BeginCenteredDiv;
-}
+const char *ProjectTitleTag = "h1";
 
-void emitEpilog(raw_ostream &OS) {
-  OS << EndCenteredDiv << "</body>"
-                          "</html>";
-}
+const char *ReportTitleTag = "h2";
 
-// Return a string with the special characters in \p Str escaped.
-std::string escape(StringRef Str) {
-  std::string Result;
-  for (char C : Str) {
-    if (C == '&')
-      Result += "&amp;";
-    else if (C == '<')
-      Result += "&lt;";
-    else if (C == '>')
-      Result += "&gt;";
-    else if (C == '\"')
-      Result += "&quot;";
-    else
-      Result += C;
-  }
-  return Result;
+const char *CreatedTimeTag = "h4";
+
+std::string getPathToStyle(StringRef ViewPath) {
+  std::string PathToStyle = "";
+  std::string PathSep = sys::path::get_separator();
+  unsigned NumSeps = ViewPath.count(PathSep);
+  for (unsigned I = 0, E = NumSeps; I < E; ++I)
+    PathToStyle += ".." + PathSep;
+  return PathToStyle + "style.css";
 }
 
-// Create a \p Name tag around \p Str, and optionally set its \p ClassName.
-std::string tag(const std::string &Name, const std::string &Str,
-                const std::string &ClassName = "") {
-  std::string Tag = "<" + Name;
-  if (ClassName != "")
-    Tag += " class='" + ClassName + "'";
-  return Tag + ">" + Str + "</" + Name + ">";
+void emitPrelude(raw_ostream &OS, const CoverageViewOptions &Opts,
+                 const std::string &PathToStyle = "") {
+  OS << "<!doctype html>"
+        "<html>"
+     << BeginHeader;
+
+  // Link to a stylesheet if one is available. Otherwise, use the default style.
+  if (PathToStyle.empty())
+    OS << "<style>" << CSSForCoverage << "</style>";
+  else
+    OS << "<link rel='stylesheet' type='text/css' href='"
+       << escape(PathToStyle, Opts) << "'>";
+
+  OS << EndHeader << "<body>";
 }
 
-// Create an anchor to \p Link with the label \p Str.
-std::string a(const std::string &Link, const std::string &Str) {
-  return "<a href='" + Link + "'>" + Str + "</a>";
+void emitEpilog(raw_ostream &OS) {
+  OS << "</body>"
+     << "</html>";
 }
 
 } // anonymous namespace
@@ -223,7 +269,14 @@ CoveragePrinterHTML::createViewFile(StringRef Path, bool InToplevel) {
     return OSOrErr;
 
   OwnedStream OS = std::move(OSOrErr.get());
-  emitPrelude(*OS.get());
+
+  if (!Opts.hasOutputDirectory()) {
+    emitPrelude(*OS.get(), Opts);
+  } else {
+    std::string ViewPath = getOutputPath(Path, "html", InToplevel);
+    emitPrelude(*OS.get(), Opts, getPathToStyle(ViewPath));
+  }
+
   return std::move(OS);
 }
 
@@ -231,39 +284,134 @@ void CoveragePrinterHTML::closeViewFile(OwnedStream OS) {
   emitEpilog(*OS.get());
 }
 
-Error CoveragePrinterHTML::createIndexFile(ArrayRef<StringRef> SourceFiles) {
+/// Emit column labels for the table in the index.
+static void emitColumnLabelsForIndex(raw_ostream &OS) {
+  SmallVector<std::string, 4> Columns;
+  Columns.emplace_back(tag("td", "Filename", "column-entry-left"));
+  for (const char *Label : {"Function Coverage", "Instantiation Coverage",
+                            "Line Coverage", "Region Coverage"})
+    Columns.emplace_back(tag("td", Label, "column-entry"));
+  OS << tag("tr", join(Columns.begin(), Columns.end(), ""));
+}
+
+/// Render a file coverage summary (\p FCS) in a table row. If \p IsTotals is
+/// false, link the summary to \p SF.
+void CoveragePrinterHTML::emitFileSummary(raw_ostream &OS, StringRef SF,
+                                          const FileCoverageSummary &FCS,
+                                          bool IsTotals) const {
+  SmallVector<std::string, 8> Columns;
+
+  // Format a coverage triple and add the result to the list of columns.
+  auto AddCoverageTripleToColumn = [&Columns](unsigned Hit, unsigned Total,
+                                              float Pctg) {
+    std::string S;
+    {
+      raw_string_ostream RSO{S};
+      if (Total)
+        RSO << format("%*.2f", 7, Pctg) << "% ";
+      else
+        RSO << "- ";
+      RSO << '(' << Hit << '/' << Total << ')';
+    }
+    const char *CellClass = "column-entry-yellow";
+    if (Hit == Total)
+      CellClass = "column-entry-green";
+    else if (Pctg < 80.0)
+      CellClass = "column-entry-red";
+    Columns.emplace_back(tag("td", tag("pre", S), CellClass));
+  };
+
+  // Simplify the display file path, and wrap it in a link if requested.
+  std::string Filename;
+  if (IsTotals) {
+    Filename = "TOTALS";
+  } else {
+    SmallString<128> LinkTextStr(sys::path::relative_path(FCS.Name));
+    sys::path::remove_dots(LinkTextStr, /*remove_dot_dots=*/true);
+    sys::path::native(LinkTextStr);
+    std::string LinkText = escape(LinkTextStr, Opts);
+    std::string LinkTarget =
+        escape(getOutputPath(SF, "html", /*InToplevel=*/false), Opts);
+    Filename = a(LinkTarget, LinkText);
+  }
+
+  Columns.emplace_back(tag("td", tag("pre", Filename)));
+  AddCoverageTripleToColumn(FCS.FunctionCoverage.Executed,
+                            FCS.FunctionCoverage.NumFunctions,
+                            FCS.FunctionCoverage.getPercentCovered());
+  AddCoverageTripleToColumn(FCS.InstantiationCoverage.Executed,
+                            FCS.InstantiationCoverage.NumFunctions,
+                            FCS.InstantiationCoverage.getPercentCovered());
+  AddCoverageTripleToColumn(FCS.LineCoverage.Covered, FCS.LineCoverage.NumLines,
+                            FCS.LineCoverage.getPercentCovered());
+  AddCoverageTripleToColumn(FCS.RegionCoverage.Covered,
+                            FCS.RegionCoverage.NumRegions,
+                            FCS.RegionCoverage.getPercentCovered());
+
+  OS << tag("tr", join(Columns.begin(), Columns.end(), ""), "light-row");
+}
+
+Error CoveragePrinterHTML::createIndexFile(
+    ArrayRef<std::string> SourceFiles,
+    const coverage::CoverageMapping &Coverage) {
+  // Emit the default stylesheet.
+  auto CSSOrErr = createOutputStream("style", "css", /*InToplevel=*/true);
+  if (Error E = CSSOrErr.takeError())
+    return E;
+
+  OwnedStream CSS = std::move(CSSOrErr.get());
+  CSS->operator<<(CSSForCoverage);
+
+  // Emit a file index along with some coverage statistics.
   auto OSOrErr = createOutputStream("index", "html", /*InToplevel=*/true);
   if (Error E = OSOrErr.takeError())
     return E;
   auto OS = std::move(OSOrErr.get());
   raw_ostream &OSRef = *OS.get();
 
+  assert(Opts.hasOutputDirectory() && "No output directory for index file");
+  emitPrelude(OSRef, Opts, getPathToStyle(""));
+
+  // Emit some basic information about the coverage report.
+  if (Opts.hasProjectTitle())
+    OSRef << tag(ProjectTitleTag, escape(Opts.ProjectTitle, Opts));
+  OSRef << tag(ReportTitleTag, "Coverage Report");
+  if (Opts.hasCreatedTime())
+    OSRef << tag(CreatedTimeTag, escape(Opts.CreatedTimeStr, Opts));
+
+  // Emit a link to some documentation.
+  OSRef << tag("p", "Click " +
+                        a("http://clang.llvm.org/docs/"
+                          "SourceBasedCodeCoverage.html#interpreting-reports",
+                          "here") +
+                        " for information about interpreting this report.");
+
   // Emit a table containing links to reports for each file in the covmapping.
-  emitPrelude(OSRef);
-  OSRef << BeginSourceNameDiv << "Index" << EndSourceNameDiv;
-  OSRef << BeginTable;
-  for (StringRef SF : SourceFiles) {
-    std::string LinkText = escape(sys::path::relative_path(SF));
-    std::string LinkTarget =
-        escape(getOutputPath(SF, "html", /*InToplevel=*/false));
-    OSRef << tag("tr", tag("td", tag("pre", a(LinkTarget, LinkText), "code")));
-  }
-  OSRef << EndTable;
+  OSRef << BeginCenteredDiv << BeginTable;
+  emitColumnLabelsForIndex(OSRef);
+  FileCoverageSummary Totals("TOTALS");
+  auto FileReports =
+      CoverageReport::prepareFileReports(Coverage, Totals, SourceFiles);
+  for (unsigned I = 0, E = FileReports.size(); I < E; ++I)
+    emitFileSummary(OSRef, SourceFiles[I], FileReports[I]);
+  emitFileSummary(OSRef, "Totals", Totals, /*IsTotals=*/true);
+  OSRef << EndTable << EndCenteredDiv
+        << tag("h5", escape(Opts.getLLVMVersionString(), Opts));
   emitEpilog(OSRef);
 
   return Error::success();
 }
 
 void SourceCoverageViewHTML::renderViewHeader(raw_ostream &OS) {
-  OS << BeginTable;
+  OS << BeginCenteredDiv << BeginTable;
 }
 
 void SourceCoverageViewHTML::renderViewFooter(raw_ostream &OS) {
-  OS << EndTable;
+  OS << EndTable << EndCenteredDiv;
 }
 
-void SourceCoverageViewHTML::renderSourceName(raw_ostream &OS) {
-  OS << BeginSourceNameDiv << tag("pre", escape(getSourceName()))
+void SourceCoverageViewHTML::renderSourceName(raw_ostream &OS, bool WholeFile) {
+  OS << BeginSourceNameDiv << tag("pre", escape(getSourceName(), getOptions()))
      << EndSourceNameDiv;
 }
 
@@ -287,6 +435,7 @@ void SourceCoverageViewHTML::renderLine(
     raw_ostream &OS, LineRef L, const coverage::CoverageSegment *WrappedSegment,
     CoverageSegmentArray Segments, unsigned ExpansionCol, unsigned) {
   StringRef Line = L.Line;
+  unsigned LineNo = L.LineNo;
 
   // Steps for handling text-escaping, highlighting, and tooltip creation:
   //
@@ -299,33 +448,32 @@ void SourceCoverageViewHTML::renderLine(
 
   unsigned LCol = 1;
   auto Snip = [&](unsigned Start, unsigned Len) {
-    assert(Start + Len <= Line.size() && "Snippet extends past the EOL");
     Snippets.push_back(Line.substr(Start, Len));
     LCol += Len;
   };
 
   Snip(LCol - 1, Segments.empty() ? 0 : (Segments.front()->Col - 1));
 
-  for (unsigned I = 1, E = Segments.size(); I < E; ++I) {
-    assert(LCol == Segments[I - 1]->Col && "Snippet start position is wrong");
+  for (unsigned I = 1, E = Segments.size(); I < E; ++I)
     Snip(LCol - 1, Segments[I]->Col - LCol);
-  }
 
   // |Line| + 1 is needed to avoid underflow when, e.g |Line| = 0 and LCol = 1.
   Snip(LCol - 1, Line.size() + 1 - LCol);
-  assert(LCol == Line.size() + 1 && "Final snippet doesn't reach the EOL");
 
   // 2. Escape all of the snippets.
 
   for (unsigned I = 0, E = Snippets.size(); I < E; ++I)
-    Snippets[I] = escape(Snippets[I]);
+    Snippets[I] = escape(Snippets[I], getOptions());
 
-  // 3. Use \p WrappedSegment to set the highlight for snippets 0 and 1. Use
-  //    segment 1 to set the highlight for snippet 2, segment 2 to set the
-  //    highlight for snippet 3, and so on.
+  // 3. Use \p WrappedSegment to set the highlight for snippet 0. Use segment
+  //    1 to set the highlight for snippet 2, segment 2 to set the highlight for
+  //    snippet 3, and so on.
 
   Optional<std::string> Color;
-  auto Highlight = [&](const std::string &Snippet) {
+  SmallVector<std::pair<unsigned, unsigned>, 2> HighlightedRanges;
+  auto Highlight = [&](const std::string &Snippet, unsigned LC, unsigned RC) {
+    if (getOptions().Debug)
+      HighlightedRanges.emplace_back(LC, RC);
     return tag("span", Snippet, Color.getValue());
   };
 
@@ -333,14 +481,13 @@ void SourceCoverageViewHTML::renderLine(
     return S && S->HasCount && S->Count == 0;
   };
 
-  if (CheckIfUncovered(WrappedSegment) ||
-      CheckIfUncovered(Segments.empty() ? nullptr : Segments.front())) {
+  if (CheckIfUncovered(WrappedSegment)) {
     Color = "red";
-    Snippets[0] = Highlight(Snippets[0]);
-    Snippets[1] = Highlight(Snippets[1]);
+    if (!Snippets[0].empty())
+      Snippets[0] = Highlight(Snippets[0], 1, 1 + Snippets[0].size());
   }
 
-  for (unsigned I = 1, E = Segments.size(); I < E; ++I) {
+  for (unsigned I = 0, E = Segments.size(); I < E; ++I) {
     const auto *CurSeg = Segments[I];
     if (CurSeg->Col == ExpansionCol)
       Color = "cyan";
@@ -350,7 +497,22 @@ void SourceCoverageViewHTML::renderLine(
       Color = None;
 
     if (Color.hasValue())
-      Snippets[I + 1] = Highlight(Snippets[I + 1]);
+      Snippets[I + 1] = Highlight(Snippets[I + 1], CurSeg->Col,
+                                  CurSeg->Col + Snippets[I + 1].size());
+  }
+
+  if (Color.hasValue() && Segments.empty())
+    Snippets.back() = Highlight(Snippets.back(), 1, 1 + Snippets.back().size());
+
+  if (getOptions().Debug) {
+    for (const auto &Range : HighlightedRanges) {
+      errs() << "Highlighted line " << LineNo << ", " << Range.first << " -> ";
+      if (Range.second == 0)
+        errs() << "?";
+      else
+        errs() << Range.second;
+      errs() << "\n";
+    }
   }
 
   // 4. Snippets[1:N+1] correspond to \p Segments[0:N]: use these to generate
@@ -373,7 +535,7 @@ void SourceCoverageViewHTML::renderLine(
 
       Snippets[I + 1] =
           tag("div", Snippets[I + 1] + tag("span", formatCount(CurSeg->Count),
-                                          "tooltip-content"),
+                                           "tooltip-content"),
               "tooltip");
     }
   }
@@ -402,7 +564,10 @@ void SourceCoverageViewHTML::renderLineCoverageColumn(
 
 void SourceCoverageViewHTML::renderLineNumberColumn(raw_ostream &OS,
                                                     unsigned LineNo) {
-  OS << tag("td", tag("pre", utostr(uint64_t(LineNo))), "line-number");
+  std::string LineNoStr = utostr(uint64_t(LineNo));
+  std::string TargetName = "L" + LineNoStr;
+  OS << tag("td", a("#" + TargetName, tag("pre", LineNoStr), TargetName),
+            "line-number");
 }
 
 void SourceCoverageViewHTML::renderRegionMarkers(raw_ostream &,
@@ -431,6 +596,43 @@ void SourceCoverageViewHTML::renderInstantiationView(raw_ostream &OS,
                                                      InstantiationView &ISV,
                                                      unsigned ViewDepth) {
   OS << BeginExpansionDiv;
-  ISV.View->print(OS, /*WholeFile=*/false, /*ShowSourceName=*/true, ViewDepth);
+  if (!ISV.View)
+    OS << BeginSourceNameDiv
+       << tag("pre",
+              escape("Unexecuted instantiation: " + ISV.FunctionName.str(),
+                     getOptions()))
+       << EndSourceNameDiv;
+  else
+    ISV.View->print(OS, /*WholeFile=*/false, /*ShowSourceName=*/true,
+                    ViewDepth);
   OS << EndExpansionDiv;
 }
+
+void SourceCoverageViewHTML::renderTitle(raw_ostream &OS, StringRef Title) {
+  if (getOptions().hasProjectTitle())
+    OS << tag(ProjectTitleTag, escape(getOptions().ProjectTitle, getOptions()));
+  OS << tag(ReportTitleTag, escape(Title, getOptions()));
+  if (getOptions().hasCreatedTime())
+    OS << tag(CreatedTimeTag,
+              escape(getOptions().CreatedTimeStr, getOptions()));
+}
+
+void SourceCoverageViewHTML::renderTableHeader(raw_ostream &OS,
+                                               unsigned FirstUncoveredLineNo,
+                                               unsigned ViewDepth) {
+  std::string SourceLabel;
+  if (FirstUncoveredLineNo == 0) {
+    SourceLabel = tag("td", tag("pre", "Source"));
+  } else {
+    std::string LinkTarget = "#L" + utostr(uint64_t(FirstUncoveredLineNo));
+    SourceLabel =
+        tag("td", tag("pre", "Source (" +
+                                 a(LinkTarget, "jump to first uncovered line") +
+                                 ")"));
+  }
+
+  renderLinePrefix(OS, ViewDepth);
+  OS << tag("td", tag("pre", "Line")) << tag("td", tag("pre", "Count"))
+     << SourceLabel;
+  renderLineSuffix(OS, ViewDepth);
+}
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.h b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.h
index 50ecf2bf8997..94b08a5e7fc4 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.h
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageViewHTML.h
@@ -18,6 +18,8 @@
 
 namespace llvm {
 
+struct FileCoverageSummary;
+
 /// \brief A coverage printer for html output.
 class CoveragePrinterHTML : public CoveragePrinter {
 public:
@@ -26,10 +28,16 @@ public:
 
   void closeViewFile(OwnedStream OS) override;
 
-  Error createIndexFile(ArrayRef<StringRef> SourceFiles) override;
+  Error createIndexFile(ArrayRef<std::string> SourceFiles,
+                        const coverage::CoverageMapping &Coverage) override;
 
   CoveragePrinterHTML(const CoverageViewOptions &Opts)
       : CoveragePrinter(Opts) {}
+
+private:
+  void emitFileSummary(raw_ostream &OS, StringRef SF,
+                       const FileCoverageSummary &FCS,
+                       bool IsTotals = false) const;
 };
 
 /// \brief A code coverage view which supports html-based rendering.
@@ -38,7 +46,7 @@ class SourceCoverageViewHTML : public SourceCoverageView {
 
   void renderViewFooter(raw_ostream &OS) override;
 
-  void renderSourceName(raw_ostream &OS) override;
+  void renderSourceName(raw_ostream &OS, bool WholeFile) override;
 
   void renderLinePrefix(raw_ostream &OS, unsigned ViewDepth) override;
 
@@ -70,6 +78,11 @@ class SourceCoverageViewHTML : public SourceCoverageView {
   void renderRegionMarkers(raw_ostream &OS, CoverageSegmentArray Segments,
                            unsigned ViewDepth) override;
 
+  void renderTitle(raw_ostream &OS, StringRef Title) override;
+
+  void renderTableHeader(raw_ostream &OS, unsigned FirstUncoveredLineNo,
+                         unsigned IndentLevel) override;
+
 public:
   SourceCoverageViewHTML(StringRef SourceName, const MemoryBuffer &File,
                          const CoverageViewOptions &Options,
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.cpp b/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.cpp
index ae9d6daed08f..4ad120f642eb 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.cpp
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.cpp
@@ -11,6 +11,7 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "CoverageReport.h"
 #include "SourceCoverageViewText.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
@@ -27,15 +28,20 @@ void CoveragePrinterText::closeViewFile(OwnedStream OS) {
   OS->operator<<('\n');
 }
 
-Error CoveragePrinterText::createIndexFile(ArrayRef<StringRef> SourceFiles) {
+Error CoveragePrinterText::createIndexFile(
+    ArrayRef<std::string> SourceFiles,
+    const coverage::CoverageMapping &Coverage) {
   auto OSOrErr = createOutputStream("index", "txt", /*InToplevel=*/true);
   if (Error E = OSOrErr.takeError())
     return E;
   auto OS = std::move(OSOrErr.get());
   raw_ostream &OSRef = *OS.get();
 
-  for (StringRef SF : SourceFiles)
-    OSRef << getOutputPath(SF, "txt", /*InToplevel=*/false) << '\n';
+  CoverageReport Report(Opts, Coverage);
+  Report.renderFileReports(OSRef, SourceFiles);
+
+  Opts.colored_ostream(OSRef, raw_ostream::CYAN) << "\n"
+                                                 << Opts.getLLVMVersionString();
 
   return Error::success();
 }
@@ -63,7 +69,7 @@ void SourceCoverageViewText::renderViewHeader(raw_ostream &) {}
 
 void SourceCoverageViewText::renderViewFooter(raw_ostream &) {}
 
-void SourceCoverageViewText::renderSourceName(raw_ostream &OS) {
+void SourceCoverageViewText::renderSourceName(raw_ostream &OS, bool WholeFile) {
   getOptions().colored_ostream(OS, raw_ostream::CYAN) << getSourceName()
                                                       << ":\n";
 }
@@ -209,5 +215,25 @@ void SourceCoverageViewText::renderInstantiationView(raw_ostream &OS,
                                                      unsigned ViewDepth) {
   renderLinePrefix(OS, ViewDepth);
   OS << ' ';
-  ISV.View->print(OS, /*WholeFile=*/false, /*ShowSourceName=*/true, ViewDepth);
+  if (!ISV.View)
+    getOptions().colored_ostream(OS, raw_ostream::RED)
+        << "Unexecuted instantiation: " << ISV.FunctionName << "\n";
+  else
+    ISV.View->print(OS, /*WholeFile=*/false, /*ShowSourceName=*/true,
+                    ViewDepth);
+}
+
+void SourceCoverageViewText::renderTitle(raw_ostream &OS, StringRef Title) {
+  if (getOptions().hasProjectTitle())
+    getOptions().colored_ostream(OS, raw_ostream::CYAN)
+        << getOptions().ProjectTitle << "\n";
+
+  getOptions().colored_ostream(OS, raw_ostream::CYAN) << Title << "\n";
+
+  if (getOptions().hasCreatedTime())
+    getOptions().colored_ostream(OS, raw_ostream::CYAN)
+        << getOptions().CreatedTimeStr << "\n";
 }
+
+void SourceCoverageViewText::renderTableHeader(raw_ostream &, unsigned,
+                                               unsigned) {}
diff --git a/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.h b/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.h
index b2331247b37b..c3f20de92978 100644
--- a/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.h
+++ b/contrib/llvm/tools/llvm-cov/SourceCoverageViewText.h
@@ -26,7 +26,8 @@ public:
 
   void closeViewFile(OwnedStream OS) override;
 
-  Error createIndexFile(ArrayRef<StringRef> SourceFiles) override;
+  Error createIndexFile(ArrayRef<std::string> SourceFiles,
+                        const coverage::CoverageMapping &Coverage) override;
 
   CoveragePrinterText(const CoverageViewOptions &Opts)
       : CoveragePrinter(Opts) {}
@@ -38,7 +39,7 @@ class SourceCoverageViewText : public SourceCoverageView {
 
   void renderViewFooter(raw_ostream &OS) override;
 
-  void renderSourceName(raw_ostream &OS) override;
+  void renderSourceName(raw_ostream &OS, bool WholeFile) override;
 
   void renderLinePrefix(raw_ostream &OS, unsigned ViewDepth) override;
 
@@ -70,6 +71,11 @@ class SourceCoverageViewText : public SourceCoverageView {
   void renderRegionMarkers(raw_ostream &OS, CoverageSegmentArray Segments,
                            unsigned ViewDepth) override;
 
+  void renderTitle(raw_ostream &OS, StringRef Title) override;
+
+  void renderTableHeader(raw_ostream &OS, unsigned FirstUncoveredLineNo,
+                         unsigned IndentLevel) override;
+
 public:
   SourceCoverageViewText(StringRef SourceName, const MemoryBuffer &File,
                          const CoverageViewOptions &Options,
diff --git a/contrib/llvm/tools/llvm-cov/llvm-cov.cpp b/contrib/llvm/tools/llvm-cov/llvm-cov.cpp
index ba60cd91da90..158415870250 100644
--- a/contrib/llvm/tools/llvm-cov/llvm-cov.cpp
+++ b/contrib/llvm/tools/llvm-cov/llvm-cov.cpp
@@ -30,6 +30,9 @@ int showMain(int argc, const char *argv[]);
 /// \brief The main entry point for the 'report' subcommand.
 int reportMain(int argc, const char *argv[]);
 
+/// \brief The main entry point for the 'export' subcommand.
+int exportMain(int argc, const char *argv[]);
+
 /// \brief The main entry point for the 'convert-for-testing' subcommand.
 int convertForTestingMain(int argc, const char *argv[]);
 
@@ -38,12 +41,14 @@ int gcovMain(int argc, const char *argv[]);
 
 /// \brief Top level help.
 static int helpMain(int argc, const char *argv[]) {
-  errs() << "Usage: llvm-cov {gcov|report|show} [OPTION]...\n\n"
+  errs() << "Usage: llvm-cov {export|gcov|report|show} [OPTION]...\n\n"
          << "Shows code coverage information.\n\n"
          << "Subcommands:\n"
+         << "  export: Export instrprof file to structured format.\n"
          << "  gcov:   Work with the gcov format.\n"
-         << "  show:   Annotate source files using instrprof style coverage.\n"
-         << "  report: Summarize instrprof style coverage information.\n";
+         << "  report: Summarize instrprof style coverage information.\n"
+         << "  show:   Annotate source files using instrprof style coverage.\n";
+
   return 0;
 }
 
@@ -68,6 +73,7 @@ int main(int argc, const char **argv) {
     typedef int (*MainFunction)(int, const char *[]);
     MainFunction Func = StringSwitch<MainFunction>(argv[1])
                             .Case("convert-for-testing", convertForTestingMain)
+                            .Case("export", exportMain)
                             .Case("gcov", gcovMain)
                             .Case("report", reportMain)
                             .Case("show", showMain)
diff --git a/contrib/llvm/tools/llvm-cxxdump/Error.cpp b/contrib/llvm/tools/llvm-cxxdump/Error.cpp
index ff9f0f579066..d59547e3a2ce 100644
--- a/contrib/llvm/tools/llvm-cxxdump/Error.cpp
+++ b/contrib/llvm/tools/llvm-cxxdump/Error.cpp
@@ -22,7 +22,7 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class cxxdump_error_category : public std::error_category {
 public:
-  const char *name() const LLVM_NOEXCEPT override { return "llvm.cxxdump"; }
+  const char *name() const noexcept override { return "llvm.cxxdump"; }
   std::string message(int ev) const override {
     switch (static_cast<cxxdump_error>(ev)) {
     case cxxdump_error::success:
diff --git a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
index c92d20d6ccf1..b10759ad05c0 100644
--- a/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
+++ b/contrib/llvm/tools/llvm-cxxdump/llvm-cxxdump.cpp
@@ -490,7 +490,7 @@ static void dumpCXXData(const ObjectFile *Obj) {
 }
 
 static void dumpArchive(const Archive *Arc) {
-  Error Err;
+  Error Err = Error::success();
   for (auto &ArcC : Arc->children(Err)) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = ArcC.getAsBinary();
     if (!ChildOrErr) {
@@ -502,7 +502,7 @@ static void dumpArchive(const Archive *Arc) {
         OS.flush();
         reportError(Arc->getFileName(), Buf);
       }
-      ChildOrErr.takeError();
+      consumeError(ChildOrErr.takeError());
       continue;
     }
 
diff --git a/contrib/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/contrib/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
new file mode 100644
index 000000000000..80a54bbf63d8
--- /dev/null
+++ b/contrib/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp
@@ -0,0 +1,33 @@
+//===-- llvm-c++filt.cpp --------------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstdlib>
+#include <iostream>
+
+using namespace llvm;
+
+static void demangle(llvm::raw_ostream &OS, const char *Mangled) {
+  int Status;
+  char *Demangled = itaniumDemangle(Mangled, nullptr, nullptr, &Status);
+  OS << (Demangled ? Demangled : Mangled) << '\n';
+  free(Demangled);
+}
+
+int main(int argc, char **argv) {
+  if (argc == 1)
+    for (std::string Mangled; std::getline(std::cin, Mangled);)
+      demangle(llvm::outs(), Mangled.c_str());
+  else
+    for (int I = 1; I < argc; ++I)
+      demangle(llvm::outs(), argv[I]);
+
+  return EXIT_SUCCESS;
+}
diff --git a/contrib/llvm/tools/llvm-dis/llvm-dis.cpp b/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
index 88333aeb6889..0c1d723a7b10 100644
--- a/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
+++ b/contrib/llvm/tools/llvm-dis/llvm-dis.cpp
@@ -17,7 +17,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/IR/LLVMContext.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DiagnosticInfo.h"
@@ -26,7 +26,6 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/DataStream.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormattedStream.h"
@@ -138,35 +137,19 @@ static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) {
     exit(1);
 }
 
-static Expected<std::unique_ptr<Module>> openInputFile(LLVMContext &Context) {
-  if (MaterializeMetadata) {
-    ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr =
-        MemoryBuffer::getFileOrSTDIN(InputFilename);
-    if (!MBOrErr)
-      return errorCodeToError(MBOrErr.getError());
-    ErrorOr<std::unique_ptr<Module>> MOrErr =
-        getLazyBitcodeModule(std::move(*MBOrErr), Context,
-                             /*ShouldLazyLoadMetadata=*/true);
-    if (!MOrErr)
-      return errorCodeToError(MOrErr.getError());
-    (*MOrErr)->materializeMetadata();
-    return std::move(*MOrErr);
-  } else {
-    std::string ErrorMessage;
-    std::unique_ptr<DataStreamer> Streamer =
-        getDataFileStreamer(InputFilename, &ErrorMessage);
-    if (!Streamer)
-      return make_error<StringError>(ErrorMessage, inconvertibleErrorCode());
-    std::string DisplayFilename;
-    if (InputFilename == "-")
-      DisplayFilename = "<stdin>";
-    else
-      DisplayFilename = InputFilename;
-    ErrorOr<std::unique_ptr<Module>> MOrErr =
-        getStreamedBitcodeModule(DisplayFilename, std::move(Streamer), Context);
-    (*MOrErr)->materializeAll();
-    return std::move(*MOrErr);
-  }
+static ExitOnError ExitOnErr;
+
+static std::unique_ptr<Module> openInputFile(LLVMContext &Context) {
+  std::unique_ptr<MemoryBuffer> MB =
+      ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename)));
+  std::unique_ptr<Module> M =
+      ExitOnErr(getOwningLazyBitcodeModule(std::move(MB), Context,
+                                           /*ShouldLazyLoadMetadata=*/true));
+  if (MaterializeMetadata)
+    ExitOnErr(M->materializeMetadata());
+  else
+    ExitOnErr(M->materializeAll());
+  return M;
 }
 
 int main(int argc, char **argv) {
@@ -174,6 +157,8 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
   PrettyStackTraceProgram X(argc, argv);
 
+  ExitOnErr.setBanner(std::string(argv[0]) + ": error: ");
+
   LLVMContext Context;
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
 
@@ -181,16 +166,7 @@ int main(int argc, char **argv) {
 
   cl::ParseCommandLineOptions(argc, argv, "llvm .bc -> .ll disassembler\n");
 
-  Expected<std::unique_ptr<Module>> MOrErr = openInputFile(Context);
-  if (!MOrErr) {
-    handleAllErrors(MOrErr.takeError(), [&](ErrorInfoBase &EIB) {
-      errs() << argv[0] << ": ";
-      EIB.log(errs());
-      errs() << '\n';
-    });
-    return 1;
-  }
-  std::unique_ptr<Module> M = std::move(*MOrErr);
+  std::unique_ptr<Module> M = openInputFile(Context);
 
   // Just use stdout.  We won't actually print anything on it.
   if (DontPrint)
diff --git a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
index 4b3a011f861d..84fa0e4d2d9e 100644
--- a/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
+++ b/contrib/llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp
@@ -71,7 +71,12 @@ static cl::opt<DIDumpType> DumpType(
         clEnumValN(DIDT_StrOffsetsDwo, "str_offsets.dwo",
                    ".debug_str_offsets.dwo"),
         clEnumValN(DIDT_CUIndex, "cu_index", ".debug_cu_index"),
-        clEnumValN(DIDT_TUIndex, "tu_index", ".debug_tu_index"), clEnumValEnd));
+        clEnumValN(DIDT_GdbIndex, "gdb_index", ".gdb_index"),
+        clEnumValN(DIDT_TUIndex, "tu_index", ".debug_tu_index")));
+
+static cl::opt<bool>
+    SummarizeTypes("summarize-types",
+                   cl::desc("Abbreviate the description of type unit entries"));
 
 static void error(StringRef Filename, std::error_code EC) {
   if (!EC)
@@ -86,7 +91,7 @@ static void DumpObjectFile(ObjectFile &Obj, Twine Filename) {
   outs() << Filename.str() << ":\tfile format " << Obj.getFileFormatName()
          << "\n\n";
   // Dump the complete DWARF structure.
-  DICtx->dump(outs(), DumpType);
+  DICtx->dump(outs(), DumpType, false, SummarizeTypes);
 }
 
 static void DumpInput(StringRef Filename) {
@@ -107,7 +112,7 @@ static void DumpInput(StringRef Filename) {
       auto MachOOrErr = ObjForArch.getAsObjectFile();
       error(Filename, errorToErrorCode(MachOOrErr.takeError()));
       DumpObjectFile(**MachOOrErr,
-                     Filename + " (" + ObjForArch.getArchTypeName() + ")");
+                     Filename + " (" + ObjForArch.getArchFlagName() + ")");
     }
 }
 
diff --git a/contrib/llvm/tools/llvm-extract/llvm-extract.cpp b/contrib/llvm/tools/llvm-extract/llvm-extract.cpp
index 900c461ebd86..aa1eda2f094a 100644
--- a/contrib/llvm/tools/llvm-extract/llvm-extract.cpp
+++ b/contrib/llvm/tools/llvm-extract/llvm-extract.cpp
@@ -22,6 +22,7 @@
 #include "llvm/IRReader/IRReader.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -222,12 +223,10 @@ int main(int argc, char **argv) {
     }
   }
 
-  auto Materialize = [&](GlobalValue &GV) {
-    if (std::error_code EC = GV.materialize()) {
-      errs() << argv[0] << ": error reading input: " << EC.message() << "\n";
-      exit(1);
-    }
-  };
+  // Use *argv instead of argv[0] to work around a wrong GCC warning.
+  ExitOnError ExitOnErr(std::string(*argv) + ": error reading input: ");
+
+  auto Materialize = [&](GlobalValue &GV) { ExitOnErr(GV.materialize()); };
 
   // Materialize requisite global values.
   if (!DeleteFn) {
@@ -251,7 +250,7 @@ int main(int argc, char **argv) {
     // Now that we have all the GVs we want, mark the module as fully
     // materialized.
     // FIXME: should the GVExtractionPass handle this?
-    M->materializeAll();
+    ExitOnErr(M->materializeAll());
   }
 
   // In addition to deleting all other functions, we also want to spiff it
diff --git a/contrib/llvm/tools/llvm-link/llvm-link.cpp b/contrib/llvm/tools/llvm-link/llvm-link.cpp
index 185ae2a82a17..43431ac3398a 100644
--- a/contrib/llvm/tools/llvm-link/llvm-link.cpp
+++ b/contrib/llvm/tools/llvm-link/llvm-link.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/AutoUpgrade.h"
 #include "llvm/IR/DiagnosticInfo.h"
 #include "llvm/IR/DiagnosticPrinter.h"
@@ -82,8 +82,11 @@ static cl::opt<bool>
 Force("f", cl::desc("Enable binary output on terminals"));
 
 static cl::opt<bool>
-OutputAssembly("S",
-         cl::desc("Write output as LLVM assembly"), cl::Hidden);
+    DisableLazyLoad("disable-lazy-loading",
+                    cl::desc("Disable lazy module loading"));
+
+static cl::opt<bool>
+    OutputAssembly("S", cl::desc("Write output as LLVM assembly"), cl::Hidden);
 
 static cl::opt<bool>
 Verbose("v", cl::desc("Print information about actions taken"));
@@ -105,6 +108,8 @@ static cl::opt<bool> PreserveAssemblyUseListOrder(
     cl::desc("Preserve use-list order when writing LLVM assembly."),
     cl::init(false), cl::Hidden);
 
+static ExitOnError ExitOnErr;
+
 // Read the specified bitcode file in and return it. This routine searches the
 // link path for the specified file to try to find it...
 //
@@ -114,15 +119,19 @@ static std::unique_ptr<Module> loadFile(const char *argv0,
                                         bool MaterializeMetadata = true) {
   SMDiagnostic Err;
   if (Verbose) errs() << "Loading '" << FN << "'\n";
-  std::unique_ptr<Module> Result =
-      getLazyIRFileModule(FN, Err, Context, !MaterializeMetadata);
+  std::unique_ptr<Module> Result;
+  if (DisableLazyLoad)
+    Result = parseIRFile(FN, Err, Context);
+  else
+    Result = getLazyIRFileModule(FN, Err, Context, !MaterializeMetadata);
+
   if (!Result) {
     Err.print(argv0, errs());
     return nullptr;
   }
 
   if (MaterializeMetadata) {
-    Result->materializeMetadata();
+    ExitOnErr(Result->materializeMetadata());
     UpgradeDebugInfo(*Result);
   }
 
@@ -171,7 +180,7 @@ Module &ModuleLazyLoaderCache::operator()(const char *argv0,
 }
 } // anonymous namespace
 
-static void diagnosticHandler(const DiagnosticInfo &DI) {
+static void diagnosticHandler(const DiagnosticInfo &DI, void *C) {
   unsigned Severity = DI.getSeverity();
   switch (Severity) {
   case DS_Error:
@@ -192,23 +201,13 @@ static void diagnosticHandler(const DiagnosticInfo &DI) {
   errs() << '\n';
 }
 
-static void diagnosticHandlerWithContext(const DiagnosticInfo &DI, void *C) {
-  diagnosticHandler(DI);
-}
-
 /// Import any functions requested via the -import option.
 static bool importFunctions(const char *argv0, LLVMContext &Context,
                             Linker &L) {
   if (SummaryIndex.empty())
     return true;
-  ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
-      llvm::getModuleSummaryIndexForFile(SummaryIndex, diagnosticHandler);
-  std::error_code EC = IndexOrErr.getError();
-  if (EC) {
-    errs() << EC.message() << '\n';
-    return false;
-  }
-  auto Index = std::move(IndexOrErr.get());
+  std::unique_ptr<ModuleSummaryIndex> Index =
+      ExitOnErr(llvm::getModuleSummaryIndexForFile(SummaryIndex));
 
   // Map of Module -> List of globals to import from the Module
   std::map<StringRef, DenseSet<const GlobalValue *>> ModuleToGlobalsToImportMap;
@@ -257,7 +256,7 @@ static bool importFunctions(const char *argv0, LLVMContext &Context,
     auto &Entry = ModuleToGlobalsToImportMap[SrcModule.getModuleIdentifier()];
     Entry.insert(F);
 
-    F->materialize();
+    ExitOnErr(F->materialize());
   }
 
   // Do the actual import of globals now, one Module at a time
@@ -270,7 +269,7 @@ static bool importFunctions(const char *argv0, LLVMContext &Context,
 
     // If modules were created with lazy metadata loading, materialize it
     // now, before linking it (otherwise this will be a noop).
-    SrcModule->materializeMetadata();
+    ExitOnErr(SrcModule->materializeMetadata());
     UpgradeDebugInfo(*SrcModule);
 
     // Linkage Promotion and renaming
@@ -310,14 +309,18 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
     // If a module summary index is supplied, load it so linkInModule can treat
     // local functions/variables as exported and promote if necessary.
     if (!SummaryIndex.empty()) {
-      ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
-          llvm::getModuleSummaryIndexForFile(SummaryIndex, diagnosticHandler);
-      std::error_code EC = IndexOrErr.getError();
-      if (EC) {
-        errs() << EC.message() << '\n';
-        return false;
+      std::unique_ptr<ModuleSummaryIndex> Index =
+          ExitOnErr(llvm::getModuleSummaryIndexForFile(SummaryIndex));
+
+      // Conservatively mark all internal values as promoted, since this tool
+      // does not do the ThinLink that would normally determine what values to
+      // promote.
+      for (auto &I : *Index) {
+        for (auto &S : I.second) {
+          if (GlobalValue::isLocalLinkage(S->linkage()))
+            S->setLinkage(GlobalValue::ExternalLinkage);
+        }
       }
-      auto Index = std::move(IndexOrErr.get());
 
       // Promotion
       if (renameModuleForThinLTO(*M, *Index))
@@ -341,8 +344,10 @@ int main(int argc, char **argv) {
   sys::PrintStackTraceOnErrorSignal(argv[0]);
   PrettyStackTraceProgram X(argc, argv);
 
+  ExitOnErr.setBanner(std::string(argv[0]) + ": ");
+
   LLVMContext Context;
-  Context.setDiagnosticHandler(diagnosticHandlerWithContext, nullptr, true);
+  Context.setDiagnosticHandler(diagnosticHandler, nullptr, true);
 
   llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
   cl::ParseCommandLineOptions(argc, argv, "llvm linker\n");
diff --git a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
index b3b617b941d3..475350c8ecf1 100644
--- a/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
+++ b/contrib/llvm/tools/llvm-lto/llvm-lto.cpp
@@ -13,7 +13,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/StringSet.h"
-#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/LLVMContext.h"
@@ -42,6 +43,11 @@ static cl::opt<char>
                            "(default = '-O2')"),
              cl::Prefix, cl::ZeroOrMore, cl::init('2'));
 
+static cl::opt<bool>
+    IndexStats("thinlto-index-stats",
+               cl::desc("Print statistic for the index in every input files"),
+               cl::init(false));
+
 static cl::opt<bool> DisableVerify(
     "disable-verify", cl::init(false),
     cl::desc("Do not run the verifier during the optimization pipeline"));
@@ -97,8 +103,7 @@ cl::opt<ThinLTOModes> ThinLTOMode(
                    "(requires -thinlto-index)."),
         clEnumValN(THINOPT, "optimize", "Perform ThinLTO optimizations."),
         clEnumValN(THINCODEGEN, "codegen", "CodeGen (expected to match llc)"),
-        clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end"),
-        clEnumValEnd));
+        clEnumValN(THINALL, "run", "Perform ThinLTO end-to-end")));
 
 static cl::opt<std::string>
     ThinLTOIndex("thinlto-index",
@@ -120,6 +125,16 @@ static cl::opt<std::string> ThinLTOModuleId(
 static cl::opt<std::string>
     ThinLTOCacheDir("thinlto-cache-dir", cl::desc("Enable ThinLTO caching."));
 
+static cl::opt<std::string> ThinLTOSaveTempsPrefix(
+    "thinlto-save-temps",
+    cl::desc("Save ThinLTO temp files using filenames created by adding "
+             "suffixes to the given file path prefix."));
+
+static cl::opt<std::string> ThinLTOGeneratedObjectsDir(
+    "thinlto-save-objects",
+    cl::desc("Save ThinLTO generated object files using filenames created in "
+             "the given directory."));
+
 static cl::opt<bool>
     SaveModuleFile("save-merged-module", cl::init(false),
                    cl::desc("Write merged LTO module to file before CodeGen"));
@@ -187,7 +202,7 @@ static void handleDiagnostics(lto_codegen_diagnostic_severity_t Severity,
 }
 
 static std::string CurrentActivity;
-static void diagnosticHandler(const DiagnosticInfo &DI) {
+static void diagnosticHandler(const DiagnosticInfo &DI, void *Context) {
   raw_ostream &OS = errs();
   OS << "llvm-lto: ";
   switch (DI.getSeverity()) {
@@ -216,11 +231,6 @@ static void diagnosticHandler(const DiagnosticInfo &DI) {
     exit(1);
 }
 
-static void diagnosticHandlerWithContext(const DiagnosticInfo &DI,
-                                         void *Context) {
-  diagnosticHandler(DI);
-}
-
 static void error(const Twine &Msg) {
   errs() << "llvm-lto: " << Msg << '\n';
   exit(1);
@@ -237,7 +247,7 @@ static void error(const ErrorOr<T> &V, const Twine &Prefix) {
 }
 
 static void maybeVerifyModule(const Module &Mod) {
-  if (!DisableVerify && verifyModule(Mod))
+  if (!DisableVerify && verifyModule(Mod, &errs()))
     error("Broken Module");
 }
 
@@ -250,7 +260,7 @@ getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
   Buffer = std::move(BufferOrErr.get());
   CurrentActivity = ("loading file '" + Path + "'").str();
   std::unique_ptr<LLVMContext> Context = llvm::make_unique<LLVMContext>();
-  Context->setDiagnosticHandler(diagnosticHandlerWithContext, nullptr, true);
+  Context->setDiagnosticHandler(diagnosticHandler, nullptr, true);
   ErrorOr<std::unique_ptr<LTOModule>> Ret = LTOModule::createInLocalContext(
       std::move(Context), Buffer->getBufferStart(), Buffer->getBufferSize(),
       Options, Path);
@@ -259,6 +269,37 @@ getLocalLTOModule(StringRef Path, std::unique_ptr<MemoryBuffer> &Buffer,
   return std::move(*Ret);
 }
 
+/// Print some statistics on the index for each input files.
+void printIndexStats() {
+  for (auto &Filename : InputFilenames) {
+    ExitOnError ExitOnErr("llvm-lto: error loading file '" + Filename + "': ");
+    std::unique_ptr<ModuleSummaryIndex> Index =
+        ExitOnErr(llvm::getModuleSummaryIndexForFile(Filename));
+    // Skip files without a module summary.
+    if (!Index)
+      report_fatal_error(Filename + " does not contain an index");
+
+    unsigned Calls = 0, Refs = 0, Functions = 0, Alias = 0, Globals = 0;
+    for (auto &Summaries : *Index) {
+      for (auto &Summary : Summaries.second) {
+        Refs += Summary->refs().size();
+        if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
+          Functions++;
+          Calls += FuncSummary->calls().size();
+        } else if (isa<AliasSummary>(Summary.get()))
+          Alias++;
+        else
+          Globals++;
+      }
+    }
+    outs() << "Index " << Filename << " contains "
+           << (Alias + Globals + Functions) << " nodes (" << Functions
+           << " functions, " << Alias << " alias, " << Globals
+           << " globals) and " << (Calls + Refs) << " edges (" << Refs
+           << " refs and " << Calls << " calls)\n";
+  }
+}
+
 /// \brief List symbols in each IR file.
 ///
 /// The main point here is to provide lit-testable coverage for the LTOModule
@@ -286,12 +327,9 @@ static void createCombinedModuleSummaryIndex() {
   ModuleSummaryIndex CombinedIndex;
   uint64_t NextModuleId = 0;
   for (auto &Filename : InputFilenames) {
-    CurrentActivity = "loading file '" + Filename + "'";
-    ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
-        llvm::getModuleSummaryIndexForFile(Filename, diagnosticHandler);
-    error(IndexOrErr, "error " + CurrentActivity);
-    std::unique_ptr<ModuleSummaryIndex> Index = std::move(IndexOrErr.get());
-    CurrentActivity = "";
+    ExitOnError ExitOnErr("llvm-lto: error loading file '" + Filename + "': ");
+    std::unique_ptr<ModuleSummaryIndex> Index =
+        ExitOnErr(llvm::getModuleSummaryIndexForFile(Filename));
     // Skip files without a module summary.
     if (!Index)
       continue;
@@ -356,11 +394,9 @@ loadAllFilesForIndex(const ModuleSummaryIndex &Index) {
 std::unique_ptr<ModuleSummaryIndex> loadCombinedIndex() {
   if (ThinLTOIndex.empty())
     report_fatal_error("Missing -thinlto-index for ThinLTO promotion stage");
-  auto CurrentActivity = "loading file '" + ThinLTOIndex + "'";
-  ErrorOr<std::unique_ptr<ModuleSummaryIndex>> IndexOrErr =
-      llvm::getModuleSummaryIndexForFile(ThinLTOIndex, diagnosticHandler);
-  error(IndexOrErr, "error " + CurrentActivity);
-  return std::move(IndexOrErr.get());
+  ExitOnError ExitOnErr("llvm-lto: error loading file '" + ThinLTOIndex +
+                        "': ");
+  return ExitOnErr(llvm::getModuleSummaryIndexForFile(ThinLTOIndex));
 }
 
 static std::unique_ptr<Module> loadModule(StringRef Filename,
@@ -446,6 +482,8 @@ private:
     }
 
     auto CombinedIndex = ThinGenerator.linkCombinedIndex();
+    if (!CombinedIndex)
+      report_fatal_error("ThinLink didn't create an index");
     std::error_code EC;
     raw_fd_ostream OS(OutputFilename, EC, sys::fs::OpenFlags::F_None);
     error(EC, "error opening the file '" + OutputFilename + "'");
@@ -672,6 +710,15 @@ private:
       ThinGenerator.addModule(Filename, InputBuffers.back()->getBuffer());
     }
 
+    if (!ThinLTOSaveTempsPrefix.empty())
+      ThinGenerator.setSaveTempsDir(ThinLTOSaveTempsPrefix);
+
+    if (!ThinLTOGeneratedObjectsDir.empty()) {
+      ThinGenerator.setGeneratedObjectsDirectory(ThinLTOGeneratedObjectsDir);
+      ThinGenerator.run();
+      return;
+    }
+
     ThinGenerator.run();
 
     auto &Binaries = ThinGenerator.getProducedBinaries();
@@ -718,14 +765,19 @@ int main(int argc, char **argv) {
     return 0;
   }
 
+  if (IndexStats) {
+    printIndexStats();
+    return 0;
+  }
+
   if (CheckHasObjC) {
     for (auto &Filename : InputFilenames) {
-      ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
-          MemoryBuffer::getFile(Filename);
-      error(BufferOrErr, "error loading file '" + Filename + "'");
+      ExitOnError ExitOnErr(std::string(*argv) + ": error loading file '" +
+                            Filename + "': ");
+      std::unique_ptr<MemoryBuffer> BufferOrErr =
+          ExitOnErr(errorOrToExpected(MemoryBuffer::getFile(Filename)));
       auto Buffer = std::move(BufferOrErr.get());
-      LLVMContext Ctx;
-      if (llvm::isBitcodeContainingObjCCategory(*Buffer, Ctx))
+      if (ExitOnErr(llvm::isBitcodeContainingObjCCategory(*Buffer)))
         outs() << "Bitcode " << Filename << " contains ObjC\n";
       else
         outs() << "Bitcode " << Filename << " does not contain ObjC\n";
@@ -749,7 +801,7 @@ int main(int argc, char **argv) {
   unsigned BaseArg = 0;
 
   LLVMContext Context;
-  Context.setDiagnosticHandler(diagnosticHandlerWithContext, nullptr, true);
+  Context.setDiagnosticHandler(diagnosticHandler, nullptr, true);
 
   LTOCodeGenerator CodeGen(Context);
 
@@ -771,7 +823,7 @@ int main(int argc, char **argv) {
   for (unsigned i = BaseArg; i < InputFilenames.size(); ++i) {
     CurrentActivity = "loading file '" + InputFilenames[i] + "'";
     ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr =
-        LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options);
+        LTOModule::createFromFile(Context, InputFilenames[i], Options);
     std::unique_ptr<LTOModule> &Module = *ModuleOrErr;
     CurrentActivity = "";
 
@@ -799,11 +851,11 @@ int main(int argc, char **argv) {
 
   // Add all the exported symbols to the table of symbols to preserve.
   for (unsigned i = 0; i < ExportedSymbols.size(); ++i)
-    CodeGen.addMustPreserveSymbol(ExportedSymbols[i].c_str());
+    CodeGen.addMustPreserveSymbol(ExportedSymbols[i]);
 
   // Add all the dso symbols to the table of symbols to expose.
   for (unsigned i = 0; i < KeptDSOSyms.size(); ++i)
-    CodeGen.addMustPreserveSymbol(KeptDSOSyms[i].c_str());
+    CodeGen.addMustPreserveSymbol(KeptDSOSyms[i]);
 
   // Set cpu and attrs strings for the default target/subtarget.
   CodeGen.setCpu(MCPU.c_str());
@@ -818,7 +870,7 @@ int main(int argc, char **argv) {
   }
 
   if (!attrs.empty())
-    CodeGen.setAttr(attrs.c_str());
+    CodeGen.setAttr(attrs);
 
   if (FileType.getNumOccurrences())
     CodeGen.setFileType(FileType);
@@ -835,7 +887,7 @@ int main(int argc, char **argv) {
       ModuleFilename += ".merged.bc";
       std::string ErrMsg;
 
-      if (!CodeGen.writeMergedModules(ModuleFilename.c_str()))
+      if (!CodeGen.writeMergedModules(ModuleFilename))
         error("writing merged module failed.");
     }
 
diff --git a/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp b/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
new file mode 100644
index 000000000000..c09311a05b90
--- /dev/null
+++ b/contrib/llvm/tools/llvm-lto2/llvm-lto2.cpp
@@ -0,0 +1,274 @@
+//===-- llvm-lto2: test harness for the resolution-based LTO interface ----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program takes in a list of bitcode files, links them and performs
+// link-time optimization according to the provided symbol resolutions using the
+// resolution-based LTO interface, and outputs one or more object files.
+//
+// This program is intended to eventually replace llvm-lto which uses the legacy
+// LTO interface.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/LTO/Caching.h"
+#include "llvm/CodeGen/CommandFlags.h"
+#include "llvm/IR/DiagnosticPrinter.h"
+#include "llvm/LTO/LTO.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/Threading.h"
+
+using namespace llvm;
+using namespace lto;
+using namespace object;
+
+static cl::opt<char>
+    OptLevel("O", cl::desc("Optimization level. [-O0, -O1, -O2, or -O3] "
+                           "(default = '-O2')"),
+             cl::Prefix, cl::ZeroOrMore, cl::init('2'));
+
+static cl::opt<char> CGOptLevel(
+    "cg-opt-level",
+    cl::desc("Codegen optimization level (0, 1, 2 or 3, default = '2')"),
+    cl::init('2'));
+
+static cl::list<std::string> InputFilenames(cl::Positional, cl::OneOrMore,
+                                            cl::desc("<input bitcode files>"));
+
+static cl::opt<std::string> OutputFilename("o", cl::Required,
+                                           cl::desc("Output filename"),
+                                           cl::value_desc("filename"));
+
+static cl::opt<std::string> CacheDir("cache-dir", cl::desc("Cache Directory"),
+                                     cl::value_desc("directory"));
+
+static cl::opt<std::string> OptPipeline("opt-pipeline",
+                                        cl::desc("Optimizer Pipeline"),
+                                        cl::value_desc("pipeline"));
+
+static cl::opt<std::string> AAPipeline("aa-pipeline",
+                                       cl::desc("Alias Analysis Pipeline"),
+                                       cl::value_desc("aapipeline"));
+
+static cl::opt<bool> SaveTemps("save-temps", cl::desc("Save temporary files"));
+
+static cl::opt<bool>
+    ThinLTODistributedIndexes("thinlto-distributed-indexes", cl::init(false),
+                              cl::desc("Write out individual index and "
+                                       "import files for the "
+                                       "distributed backend case"));
+
+static cl::opt<int> Threads("thinlto-threads",
+                            cl::init(llvm::heavyweight_hardware_concurrency()));
+
+static cl::list<std::string> SymbolResolutions(
+    "r",
+    cl::desc("Specify a symbol resolution: filename,symbolname,resolution\n"
+             "where \"resolution\" is a sequence (which may be empty) of the\n"
+             "following characters:\n"
+             " p - prevailing: the linker has chosen this definition of the\n"
+             "     symbol\n"
+             " l - local: the definition of this symbol is unpreemptable at\n"
+             "     runtime and is known to be in this linkage unit\n"
+             " x - externally visible: the definition of this symbol is\n"
+             "     visible outside of the LTO unit\n"
+             "A resolution for each symbol must be specified."),
+    cl::ZeroOrMore);
+
+static cl::opt<std::string> OverrideTriple(
+    "override-triple",
+    cl::desc("Replace target triples in input files with this triple"));
+
+static cl::opt<std::string> DefaultTriple(
+    "default-triple",
+    cl::desc(
+        "Replace unspecified target triples in input files with this triple"));
+
+static void check(Error E, std::string Msg) {
+  if (!E)
+    return;
+  handleAllErrors(std::move(E), [&](ErrorInfoBase &EIB) {
+    errs() << "llvm-lto: " << Msg << ": " << EIB.message().c_str() << '\n';
+  });
+  exit(1);
+}
+
+template <typename T> static T check(Expected<T> E, std::string Msg) {
+  if (E)
+    return std::move(*E);
+  check(E.takeError(), Msg);
+  return T();
+}
+
+static void check(std::error_code EC, std::string Msg) {
+  check(errorCodeToError(EC), Msg);
+}
+
+template <typename T> static T check(ErrorOr<T> E, std::string Msg) {
+  if (E)
+    return std::move(*E);
+  check(E.getError(), Msg);
+  return T();
+}
+
+int main(int argc, char **argv) {
+  InitializeAllTargets();
+  InitializeAllTargetMCs();
+  InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
+
+  cl::ParseCommandLineOptions(argc, argv, "Resolution-based LTO test harness");
+
+  // FIXME: Workaround PR30396 which means that a symbol can appear
+  // more than once if it is defined in module-level assembly and
+  // has a GV declaration. We allow (file, symbol) pairs to have multiple
+  // resolutions and apply them in the order observed.
+  std::map<std::pair<std::string, std::string>, std::list<SymbolResolution>>
+      CommandLineResolutions;
+  for (std::string R : SymbolResolutions) {
+    StringRef Rest = R;
+    StringRef FileName, SymbolName;
+    std::tie(FileName, Rest) = Rest.split(',');
+    if (Rest.empty()) {
+      llvm::errs() << "invalid resolution: " << R << '\n';
+      return 1;
+    }
+    std::tie(SymbolName, Rest) = Rest.split(',');
+    SymbolResolution Res;
+    for (char C : Rest) {
+      if (C == 'p')
+        Res.Prevailing = true;
+      else if (C == 'l')
+        Res.FinalDefinitionInLinkageUnit = true;
+      else if (C == 'x')
+        Res.VisibleToRegularObj = true;
+      else
+        llvm::errs() << "invalid character " << C << " in resolution: " << R
+                     << '\n';
+    }
+    CommandLineResolutions[{FileName, SymbolName}].push_back(Res);
+  }
+
+  std::vector<std::unique_ptr<MemoryBuffer>> MBs;
+
+  Config Conf;
+  Conf.DiagHandler = [](const DiagnosticInfo &DI) {
+    DiagnosticPrinterRawOStream DP(errs());
+    DI.print(DP);
+    errs() << '\n';
+    exit(1);
+  };
+
+  Conf.CPU = MCPU;
+  Conf.Options = InitTargetOptionsFromCodeGenFlags();
+  Conf.MAttrs = MAttrs;
+  if (auto RM = getRelocModel())
+    Conf.RelocModel = *RM;
+  Conf.CodeModel = CMModel;
+
+  if (SaveTemps)
+    check(Conf.addSaveTemps(OutputFilename + "."),
+          "Config::addSaveTemps failed");
+
+  // Run a custom pipeline, if asked for.
+  Conf.OptPipeline = OptPipeline;
+  Conf.AAPipeline = AAPipeline;
+
+  Conf.OptLevel = OptLevel - '0';
+  switch (CGOptLevel) {
+  case '0':
+    Conf.CGOptLevel = CodeGenOpt::None;
+    break;
+  case '1':
+    Conf.CGOptLevel = CodeGenOpt::Less;
+    break;
+  case '2':
+    Conf.CGOptLevel = CodeGenOpt::Default;
+    break;
+  case '3':
+    Conf.CGOptLevel = CodeGenOpt::Aggressive;
+    break;
+  default:
+    llvm::errs() << "invalid cg optimization level: " << CGOptLevel << '\n';
+    return 1;
+  }
+
+  Conf.OverrideTriple = OverrideTriple;
+  Conf.DefaultTriple = DefaultTriple;
+
+  ThinBackend Backend;
+  if (ThinLTODistributedIndexes)
+    Backend = createWriteIndexesThinBackend("", "", true, "");
+  else
+    Backend = createInProcessThinBackend(Threads);
+  LTO Lto(std::move(Conf), std::move(Backend));
+
+  bool HasErrors = false;
+  for (std::string F : InputFilenames) {
+    std::unique_ptr<MemoryBuffer> MB = check(MemoryBuffer::getFile(F), F);
+    std::unique_ptr<InputFile> Input =
+        check(InputFile::create(MB->getMemBufferRef()), F);
+
+    std::vector<SymbolResolution> Res;
+    for (const InputFile::Symbol &Sym : Input->symbols()) {
+      auto I = CommandLineResolutions.find({F, Sym.getName()});
+      if (I == CommandLineResolutions.end()) {
+        llvm::errs() << argv[0] << ": missing symbol resolution for " << F
+                     << ',' << Sym.getName() << '\n';
+        HasErrors = true;
+      } else {
+        Res.push_back(I->second.front());
+        I->second.pop_front();
+        if (I->second.empty())
+          CommandLineResolutions.erase(I);
+      }
+    }
+
+    if (HasErrors)
+      continue;
+
+    MBs.push_back(std::move(MB));
+    check(Lto.add(std::move(Input), Res), F);
+  }
+
+  if (!CommandLineResolutions.empty()) {
+    HasErrors = true;
+    for (auto UnusedRes : CommandLineResolutions)
+      llvm::errs() << argv[0] << ": unused symbol resolution for "
+                   << UnusedRes.first.first << ',' << UnusedRes.first.second
+                   << '\n';
+  }
+  if (HasErrors)
+    return 1;
+
+  auto AddStream =
+      [&](size_t Task) -> std::unique_ptr<lto::NativeObjectStream> {
+    std::string Path = OutputFilename + "." + utostr(Task);
+
+    std::error_code EC;
+    auto S = llvm::make_unique<raw_fd_ostream>(Path, EC, sys::fs::F_None);
+    check(EC, Path);
+    return llvm::make_unique<lto::NativeObjectStream>(std::move(S));
+  };
+
+  auto AddFile = [&](size_t Task, StringRef Path) {
+    auto ReloadedBufferOrErr = MemoryBuffer::getFile(Path);
+    if (auto EC = ReloadedBufferOrErr.getError())
+      report_fatal_error(Twine("Can't reload cached file '") + Path + "': " +
+                         EC.message() + "\n");
+
+    *AddStream(Task)->OS << (*ReloadedBufferOrErr)->getBuffer();
+  };
+
+  NativeObjectCache Cache;
+  if (!CacheDir.empty())
+    Cache = localCache(CacheDir, AddFile);
+
+  check(Lto.run(AddStream, Cache), "LTO::run failed");
+}
diff --git a/contrib/llvm/tools/llvm-mc/Disassembler.cpp b/contrib/llvm/tools/llvm-mc/Disassembler.cpp
index 8185947fc5e5..acc5a5f4cab2 100644
--- a/contrib/llvm/tools/llvm-mc/Disassembler.cpp
+++ b/contrib/llvm/tools/llvm-mc/Disassembler.cpp
@@ -66,7 +66,7 @@ static bool PrintInsts(const MCDisassembler &DisAsm,
       SM.PrintMessage(SMLoc::getFromPointer(Bytes.second[Index]),
                       SourceMgr::DK_Warning,
                       "potentially undefined instruction encoding");
-      // Fall through
+      LLVM_FALLTHROUGH;
 
     case MCDisassembler::Success:
       Streamer.EmitInstruction(Inst, STI);
diff --git a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
index 00f19cb328c3..497fb1987764 100644
--- a/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
+++ b/contrib/llvm/tools/llvm-mc/llvm-mc.cpp
@@ -66,8 +66,7 @@ CompressDebugSections("compress-debug-sections", cl::ValueOptional,
     clEnumValN(DebugCompressionType::DCT_Zlib, "zlib",
       "Use zlib compression"),
     clEnumValN(DebugCompressionType::DCT_ZlibGnu, "zlib-gnu",
-      "Use zlib-gnu compression (depricated)"),
-    clEnumValEnd));
+      "Use zlib-gnu compression (deprecated)")));
 
 static cl::opt<bool>
 ShowInst("show-inst", cl::desc("Show internal instruction representation"));
@@ -105,8 +104,7 @@ FileType("filetype", cl::init(OFT_AssemblyFile),
        clEnumValN(OFT_Null, "null",
                   "Don't emit anything (for timing purposes)"),
        clEnumValN(OFT_ObjectFile, "obj",
-                  "Emit a native object ('.o') file"),
-       clEnumValEnd));
+                  "Emit a native object ('.o') file")));
 
 static cl::list<std::string>
 IncludeDirs("I", cl::desc("Directory of include files"),
@@ -148,8 +146,7 @@ CMModel("code-model",
                    clEnumValN(CodeModel::Medium, "medium",
                               "Medium code model"),
                    clEnumValN(CodeModel::Large, "large",
-                              "Large code model"),
-                   clEnumValEnd));
+                              "Large code model")));
 
 static cl::opt<bool>
 NoInitialTextSection("n", cl::desc("Don't assume assembly file starts "
@@ -190,8 +187,7 @@ Action(cl::desc("Action to perform:"),
                   clEnumValN(AC_Disassemble, "disassemble",
                              "Disassemble strings of hex bytes"),
                   clEnumValN(AC_MDisassemble, "mdis",
-                             "Marked up disassembly of strings of hex bytes"),
-                  clEnumValEnd));
+                             "Marked up disassembly of strings of hex bytes")));
 
 static const Target *GetTarget(const char *ProgName) {
   // Figure out the target triple.
@@ -314,6 +310,78 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI,
     case AsmToken::Slash:          OS << "Slash"; break;
     case AsmToken::Star:           OS << "Star"; break;
     case AsmToken::Tilde:          OS << "Tilde"; break;
+    case AsmToken::PercentCall16:
+      OS << "PercentCall16";
+      break;
+    case AsmToken::PercentCall_Hi:
+      OS << "PercentCall_Hi";
+      break;
+    case AsmToken::PercentCall_Lo:
+      OS << "PercentCall_Lo";
+      break;
+    case AsmToken::PercentDtprel_Hi:
+      OS << "PercentDtprel_Hi";
+      break;
+    case AsmToken::PercentDtprel_Lo:
+      OS << "PercentDtprel_Lo";
+      break;
+    case AsmToken::PercentGot:
+      OS << "PercentGot";
+      break;
+    case AsmToken::PercentGot_Disp:
+      OS << "PercentGot_Disp";
+      break;
+    case AsmToken::PercentGot_Hi:
+      OS << "PercentGot_Hi";
+      break;
+    case AsmToken::PercentGot_Lo:
+      OS << "PercentGot_Lo";
+      break;
+    case AsmToken::PercentGot_Ofst:
+      OS << "PercentGot_Ofst";
+      break;
+    case AsmToken::PercentGot_Page:
+      OS << "PercentGot_Page";
+      break;
+    case AsmToken::PercentGottprel:
+      OS << "PercentGottprel";
+      break;
+    case AsmToken::PercentGp_Rel:
+      OS << "PercentGp_Rel";
+      break;
+    case AsmToken::PercentHi:
+      OS << "PercentHi";
+      break;
+    case AsmToken::PercentHigher:
+      OS << "PercentHigher";
+      break;
+    case AsmToken::PercentHighest:
+      OS << "PercentHighest";
+      break;
+    case AsmToken::PercentLo:
+      OS << "PercentLo";
+      break;
+    case AsmToken::PercentNeg:
+      OS << "PercentNeg";
+      break;
+    case AsmToken::PercentPcrel_Hi:
+      OS << "PercentPcrel_Hi";
+      break;
+    case AsmToken::PercentPcrel_Lo:
+      OS << "PercentPcrel_Lo";
+      break;
+    case AsmToken::PercentTlsgd:
+      OS << "PercentTlsgd";
+      break;
+    case AsmToken::PercentTlsldm:
+      OS << "PercentTlsldm";
+      break;
+    case AsmToken::PercentTprel_Hi:
+      OS << "PercentTprel_Hi";
+      break;
+    case AsmToken::PercentTprel_Lo:
+      OS << "PercentTprel_Lo";
+      break;
     }
 
     // Print the token string.
@@ -325,22 +393,22 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI,
   return Error;
 }
 
-static int fillCommandLineSymbols(MCAsmParser &Parser){
-  for(auto &I: DefineSymbol){
+static int fillCommandLineSymbols(MCAsmParser &Parser) {
+  for (auto &I: DefineSymbol) {
     auto Pair = StringRef(I).split('=');
-    if(Pair.second.empty()){
-      errs() << "error: defsym must be of the form: sym=value: " << I;
+    auto Sym = Pair.first;
+    auto Val = Pair.second;
+
+    if (Sym.empty() || Val.empty()) {
+      errs() << "error: defsym must be of the form: sym=value: " << I << "\n";
       return 1;
     }
     int64_t Value;
-    if(Pair.second.getAsInteger(0, Value)){
-      errs() << "error: Value is not an integer: " << Pair.second;
+    if (Val.getAsInteger(0, Value)) {
+      errs() << "error: Value is not an integer: " << Val << "\n";
       return 1;
     }
-    auto &Context = Parser.getContext();
-    auto Symbol = Context.getOrCreateSymbol(Pair.first);
-    Parser.getStreamer().EmitAssignment(Symbol,
-                                        MCConstantExpr::create(Value, Context));
+    Parser.getContext().setSymbolValue(Parser.getStreamer(), Sym, Value);
   }
   return 0;
 }
@@ -495,6 +563,14 @@ int main(int argc, char **argv) {
     IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
                                         *MAI, *MCII, *MRI);
 
+    if (!IP) {
+      errs()
+          << "error: unable to create instruction printer for target triple '"
+          << TheTriple.normalize() << "' with assembly variant "
+          << OutputAsmVariant << ".\n";
+      return 1;
+    }
+
     // Set the display preference for hex vs. decimal immediates.
     IP->setPrintImmHex(PrintImmHex);
 
@@ -503,7 +579,7 @@ int main(int argc, char **argv) {
     MCAsmBackend *MAB = nullptr;
     if (ShowEncoding) {
       CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
-      MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU);
+      MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU, MCOptions);
     }
     auto FOut = llvm::make_unique<formatted_raw_ostream>(*OS);
     Str.reset(TheTarget->createAsmStreamer(
@@ -524,7 +600,8 @@ int main(int argc, char **argv) {
     }
 
     MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
-    MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU);
+    MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU,
+                                                      MCOptions);
     Str.reset(TheTarget->createMCObjectStreamer(
         TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll,
         MCOptions.MCIncrementalLinkerCompatible,
diff --git a/contrib/llvm/tools/llvm-modextract/llvm-modextract.cpp b/contrib/llvm/tools/llvm-modextract/llvm-modextract.cpp
new file mode 100644
index 000000000000..6c2e364be448
--- /dev/null
+++ b/contrib/llvm/tools/llvm-modextract/llvm-modextract.cpp
@@ -0,0 +1,74 @@
+//===-- llvm-modextract.cpp - LLVM module extractor utility ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This program is for testing features that rely on multi-module bitcode files.
+// It takes a multi-module bitcode file, extracts one of the modules and writes
+// it to the output file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+    BinaryExtract("b", cl::desc("Whether to perform binary extraction"));
+
+static cl::opt<std::string> OutputFilename("o", cl::Required,
+                                           cl::desc("Output filename"),
+                                           cl::value_desc("filename"));
+
+static cl::opt<std::string>
+    InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<unsigned> ModuleIndex("n", cl::Required,
+                                     cl::desc("Index of module to extract"),
+                                     cl::value_desc("index"));
+
+int main(int argc, char **argv) {
+  cl::ParseCommandLineOptions(argc, argv, "Module extractor");
+
+  ExitOnError ExitOnErr("llvm-modextract: error: ");
+
+  std::unique_ptr<MemoryBuffer> MB =
+      ExitOnErr(errorOrToExpected(MemoryBuffer::getFileOrSTDIN(InputFilename)));
+  std::vector<BitcodeModule> Ms = ExitOnErr(getBitcodeModuleList(*MB));
+
+  LLVMContext Context;
+  if (ModuleIndex >= Ms.size()) {
+    errs() << "llvm-modextract: error: module index out of range; bitcode file "
+              "contains "
+           << Ms.size() << " module(s)\n";
+    return 1;
+  }
+
+  std::error_code EC;
+  std::unique_ptr<tool_output_file> Out(
+      new tool_output_file(OutputFilename, EC, sys::fs::F_None));
+  ExitOnErr(errorCodeToError(EC));
+
+  if (BinaryExtract) {
+    SmallVector<char, 0> Header;
+    BitcodeWriter Writer(Header);
+    Out->os() << Header << Ms[ModuleIndex].getBuffer();
+    Out->keep();
+    return 0;
+  }
+
+  std::unique_ptr<Module> M = ExitOnErr(Ms[ModuleIndex].parseModule(Context));
+  WriteBitcodeToFile(M.get(), Out->os());
+
+  Out->keep();
+  return 0;
+}
diff --git a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
index 9b2f5220badb..aa89f89c31d5 100644
--- a/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
+++ b/contrib/llvm/tools/llvm-nm/llvm-nm.cpp
@@ -56,7 +56,7 @@ cl::opt<OutputFormatTy> OutputFormat(
     "format", cl::desc("Specify output format"),
     cl::values(clEnumVal(bsd, "BSD format"), clEnumVal(sysv, "System V format"),
                clEnumVal(posix, "POSIX.2 format"),
-               clEnumVal(darwin, "Darwin -m format"), clEnumValEnd),
+               clEnumVal(darwin, "Darwin -m format")),
     cl::init(bsd));
 cl::alias OutputFormat2("f", cl::desc("Alias for --format"),
                         cl::aliasopt(OutputFormat));
@@ -143,7 +143,7 @@ enum Radix { d, o, x };
 cl::opt<Radix>
     AddressRadix("radix", cl::desc("Radix (o/d/x) for printing symbol Values"),
                  cl::values(clEnumVal(d, "decimal"), clEnumVal(o, "octal"),
-                            clEnumVal(x, "hexadecimal"), clEnumValEnd),
+                            clEnumVal(x, "hexadecimal")),
                  cl::init(x));
 cl::alias RadixAlias("t", cl::desc("Alias for --radix"),
                      cl::aliasopt(AddressRadix));
@@ -198,13 +198,14 @@ static void error(llvm::Error E, StringRef FileName, const Archive::Child &C,
   HadError = true;
   errs() << ToolName << ": " << FileName;
 
-  ErrorOr<StringRef> NameOrErr = C.getName();
+  Expected<StringRef> NameOrErr = C.getName();
   // TODO: if we have a error getting the name then it would be nice to print
   // the index of which archive member this is and or its offset in the
   // archive instead of "???" as the name.
-  if (NameOrErr.getError())
+  if (!NameOrErr) {
+    consumeError(NameOrErr.takeError());
     errs() << "(" << "???" << ")";
-  else
+  } else
     errs() << "(" << NameOrErr.get() << ")";
 
   if (!ArchitectureName.empty())
@@ -264,14 +265,8 @@ static bool compareSymbolName(const NMSymbol &A, const NMSymbol &B) {
 }
 
 static char isSymbolList64Bit(SymbolicFile &Obj) {
-  if (isa<IRObjectFile>(Obj)) {
-    IRObjectFile *IRobj = dyn_cast<IRObjectFile>(&Obj);
-    Module &M = IRobj->getModule();
-    if (M.getTargetTriple().empty())
-      return false;
-    Triple T(M.getTargetTriple());
-    return T.isArch64Bit();
-  }
+  if (auto *IRObj = dyn_cast<IRObjectFile>(&Obj))
+    return Triple(IRObj->getTargetTriple()).isArch64Bit();
   if (isa<COFFObjectFile>(Obj))
     return false;
   if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
@@ -318,10 +313,10 @@ static void darwinPrintSymbol(SymbolicFile &Obj, SymbolListT::iterator I,
       NType |= MachO::N_SECT;
       if (SymFlags & SymbolRef::SF_Const)
         NSect = 3;
-      else {
-        IRObjectFile *IRobj = dyn_cast<IRObjectFile>(&Obj);
-        NSect = (getSymbolNMTypeChar(*IRobj, I->Sym) == 't') ? 1 : 2;
-      }
+      else if (SymFlags & SymbolRef::SF_Executable)
+        NSect = 1;
+      else
+        NSect = 2;
     }
     if (SymFlags & SymbolRef::SF_Weak)
       NDesc |= MachO::N_WEAK_DEF;
@@ -596,6 +591,10 @@ static void darwinPrintStab(MachOObjectFile *MachO, SymbolListT::iterator I) {
   outs() << Str;
 }
 
+static bool symbolIsDefined(const NMSymbol &Sym) {
+  return Sym.TypeChar != 'U' && Sym.TypeChar != 'w' && Sym.TypeChar != 'v';
+}
+
 static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
                                    const std::string &ArchiveName,
                                    const std::string &ArchitectureName) {
@@ -682,24 +681,28 @@ static void sortAndPrintSymbolList(SymbolicFile &Obj, bool printName,
     char SymbolAddrStr[18] = "";
     char SymbolSizeStr[18] = "";
 
-    if (OutputFormat == sysv || I->TypeChar == 'U') {
-      if (OutputFormat == posix)
+    // If the format is SysV or the symbol isn't defined, then print spaces.
+    if (OutputFormat == sysv || !symbolIsDefined(*I)) {
+      if (OutputFormat == posix) {
         format(printFormat, I->Address)
           .print(SymbolAddrStr, sizeof(SymbolAddrStr));
-      else
+        format(printFormat, I->Size)
+            .print(SymbolSizeStr, sizeof(SymbolSizeStr));
+      } else {
         strcpy(SymbolAddrStr, printBlanks);
+        strcpy(SymbolSizeStr, printBlanks);
+      }
     }
-    if (OutputFormat == sysv)
-      strcpy(SymbolSizeStr, printBlanks);
 
-    if (I->TypeChar != 'U') {
+    // Otherwise, print the symbol address and size.
+    if (symbolIsDefined(*I)) {
       if (Obj.isIR())
         strcpy(SymbolAddrStr, printDashes);
       else
         format(printFormat, I->Address)
           .print(SymbolAddrStr, sizeof(SymbolAddrStr));
+      format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
     }
-    format(printFormat, I->Size).print(SymbolSizeStr, sizeof(SymbolSizeStr));
 
     // If OutputFormat is darwin or we are printing Mach-O symbols in hex and
     // we have a MachOObjectFile, call darwinPrintSymbol to print as darwin's
@@ -769,6 +772,9 @@ static char getSymbolNMTypeChar(ELFObjectFileBase &Obj,
       break;
     case ELF::SHT_NOBITS:
       return 'b';
+    case ELF::SHT_INIT_ARRAY:
+    case ELF::SHT_FINI_ARRAY:
+      return 't';
     }
   }
 
@@ -876,15 +882,17 @@ static char getSymbolNMTypeChar(MachOObjectFile &Obj, basic_symbol_iterator I) {
   return '?';
 }
 
-static char getSymbolNMTypeChar(const GlobalValue &GV) {
+static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
+  uint32_t Flags = I->getFlags();
   // FIXME: should we print 'b'? At the IR level we cannot be sure if this
   // will be in bss or not, but we could approximate.
-  return GV.getValueType()->isFunctionTy() ? 't' : 'd';
-}
-
-static char getSymbolNMTypeChar(IRObjectFile &Obj, basic_symbol_iterator I) {
-  const GlobalValue *GV = Obj.getSymbolGV(I->getRawDataRefImpl());
-  return !GV ? 't' : getSymbolNMTypeChar(*GV);
+  if (Flags & SymbolRef::SF_Executable)
+    return 't';
+  else if (Triple(Obj.getTargetTriple()).isOSDarwin() &&
+           (Flags & SymbolRef::SF_Const))
+    return 's';
+  else
+    return 'd';
 }
 
 static bool isObject(SymbolicFile &Obj, basic_symbol_iterator I) {
@@ -909,12 +917,8 @@ static char getNMTypeChar(SymbolicFile &Obj, basic_symbol_iterator I) {
   char Ret = '?';
   if (Symflags & object::SymbolRef::SF_Absolute)
     Ret = 'a';
-  else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj)) {
+  else if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj))
     Ret = getSymbolNMTypeChar(*IR, I);
-    Triple Host(sys::getDefaultTargetTriple());
-    if (Ret == 'd' && Host.isOSDarwin() && Symflags & SymbolRef::SF_Const)
-      Ret = 's';
-  }
   else if (COFFObjectFile *COFF = dyn_cast<COFFObjectFile>(&Obj))
     Ret = getSymbolNMTypeChar(*COFF, I);
   else if (MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(&Obj))
@@ -994,13 +998,8 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
     uint32_t SymFlags = Sym.getFlags();
     if (!DebugSyms && (SymFlags & SymbolRef::SF_FormatSpecific))
       continue;
-    if (WithoutAliases) {
-      if (IRObjectFile *IR = dyn_cast<IRObjectFile>(&Obj)) {
-        const GlobalValue *GV = IR->getSymbolGV(Sym.getRawDataRefImpl());
-        if (GV && isa<GlobalAlias>(GV))
-          continue;
-      }
-    }
+    if (WithoutAliases && (SymFlags & SymbolRef::SF_Indirect))
+      continue;
     // If a "-s segname sectname" option was specified and this is a Mach-O
     // file and this section appears in this file, Nsect will be non-zero then
     // see if this symbol is a symbol from that section and if not skip it.
@@ -1050,9 +1049,9 @@ dumpSymbolNamesFromObject(SymbolicFile &Obj, bool printName,
 // architectures was specificed.  If not then an error is generated and this
 // routine returns false.  Else it returns true.
 static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
-  MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O);
+  auto *MachO = dyn_cast<MachOObjectFile>(O);
 
-  if (!MachO || ArchAll || ArchFlags.size() == 0)
+  if (!MachO || ArchAll || ArchFlags.empty())
     return true;
 
   MachO::mach_header H;
@@ -1065,9 +1064,9 @@ static bool checkMachOAndArchFlags(SymbolicFile *O, std::string &Filename) {
     H = MachO->MachOObjectFile::getHeader();
     T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype);
   }
-  if (std::none_of(
-          ArchFlags.begin(), ArchFlags.end(),
-          [&](const std::string &Name) { return Name == T.getArchName(); })) {
+  if (none_of(ArchFlags, [&](const std::string &Name) {
+        return Name == T.getArchName();
+      })) {
     error("No architecture specified", Filename);
     return false;
   }
@@ -1084,7 +1083,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
   Expected<std::unique_ptr<Binary>> BinaryOrErr = createBinary(
       BufferOrErr.get()->getMemBufferRef(), NoLLVMBitcode ? nullptr : &Context);
   if (!BinaryOrErr) {
-    error(errorToErrorCode(BinaryOrErr.takeError()), Filename);
+    error(BinaryOrErr.takeError(), Filename);
     return;
   }
   Binary &Bin = *BinaryOrErr.get();
@@ -1096,12 +1095,14 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       if (I != E) {
         outs() << "Archive map\n";
         for (; I != E; ++I) {
-          ErrorOr<Archive::Child> C = I->getMember();
-          if (error(C.getError()))
-            return;
-          ErrorOr<StringRef> FileNameOrErr = C->getName();
-          if (error(FileNameOrErr.getError()))
+          Expected<Archive::Child> C = I->getMember();
+          if (!C)
+            error(C.takeError(), Filename);
+          Expected<StringRef> FileNameOrErr = C->getName();
+          if (!FileNameOrErr) {
+            error(FileNameOrErr.takeError(), Filename);
             return;
+          }
           StringRef SymName = I->getName();
           outs() << SymName << " in " << FileNameOrErr.get() << "\n";
         }
@@ -1110,7 +1111,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
     }
 
     {
-      Error Err;
+      Error Err = Error::success();
       for (auto &C : A->children(Err)) {
         Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary(&Context);
         if (!ChildOrErr) {
@@ -1147,7 +1148,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
         for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
                                                    E = UB->end_objects();
              I != E; ++I) {
-          if (ArchFlags[i] == I->getArchTypeName()) {
+          if (ArchFlags[i] == I->getArchFlagName()) {
             ArchFound = true;
             Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
                 I->getAsObjectFile();
@@ -1159,10 +1160,10 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
               ObjectFile &Obj = *ObjOrErr.get();
               if (ArchFlags.size() > 1) {
                 if (PrintFileName)
-                  ArchitectureName = I->getArchTypeName();
+                  ArchitectureName = I->getArchFlagName();
                 else
                   outs() << "\n" << Obj.getFileName() << " (for architecture "
-                         << I->getArchTypeName() << ")"
+                         << I->getArchFlagName() << ")"
                          << ":\n";
               }
               dumpSymbolNamesFromObject(Obj, false, ArchiveName,
@@ -1170,12 +1171,12 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
             } else if (auto E = isNotObjectErrorInvalidFileType(
                        ObjOrErr.takeError())) {
               error(std::move(E), Filename, ArchFlags.size() > 1 ?
-                    StringRef(I->getArchTypeName()) : StringRef());
+                    StringRef(I->getArchFlagName()) : StringRef());
               continue;
             } else if (Expected<std::unique_ptr<Archive>> AOrErr =
                            I->getAsArchive()) {
               std::unique_ptr<Archive> &A = *AOrErr;
-              Error Err;
+              Error Err = Error::success();
               for (auto &C : A->children(Err)) {
                 Expected<std::unique_ptr<Binary>> ChildOrErr =
                     C.getAsBinary(&Context);
@@ -1183,7 +1184,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
                   if (auto E = isNotObjectErrorInvalidFileType(
                                        ChildOrErr.takeError())) {
                     error(std::move(E), Filename, C, ArchFlags.size() > 1 ?
-                          StringRef(I->getArchTypeName()) : StringRef());
+                          StringRef(I->getArchFlagName()) : StringRef());
                   }
                   continue;
                 }
@@ -1192,12 +1193,12 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
                   if (PrintFileName) {
                     ArchiveName = A->getFileName();
                     if (ArchFlags.size() > 1)
-                      ArchitectureName = I->getArchTypeName();
+                      ArchitectureName = I->getArchFlagName();
                   } else {
                     outs() << "\n" << A->getFileName();
                     outs() << "(" << O->getFileName() << ")";
                     if (ArchFlags.size() > 1) {
-                      outs() << " (for architecture " << I->getArchTypeName()
+                      outs() << " (for architecture " << I->getArchFlagName()
                              << ")";
                     }
                     outs() << ":\n";
@@ -1211,7 +1212,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
             } else {
               consumeError(AOrErr.takeError());
               error(Filename + " for architecture " +
-                    StringRef(I->getArchTypeName()) +
+                    StringRef(I->getArchFlagName()) +
                     " is not a Mach-O file or an archive file",
                     "Mach-O universal file");
             }
@@ -1232,7 +1233,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
                                                  E = UB->end_objects();
            I != E; ++I) {
-        if (HostArchName == I->getArchTypeName()) {
+        if (HostArchName == I->getArchFlagName()) {
           Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
           std::string ArchiveName;
           ArchiveName.clear();
@@ -1246,7 +1247,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
           } else if (Expected<std::unique_ptr<Archive>> AOrErr =
                          I->getAsArchive()) {
             std::unique_ptr<Archive> &A = *AOrErr;
-            Error Err;
+            Error Err = Error::success();
             for (auto &C : A->children(Err)) {
               Expected<std::unique_ptr<Binary>> ChildOrErr =
                   C.getAsBinary(&Context);
@@ -1272,7 +1273,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
           } else {
             consumeError(AOrErr.takeError());
             error(Filename + " for architecture " +
-                  StringRef(I->getArchTypeName()) +
+                  StringRef(I->getArchFlagName()) +
                   " is not a Mach-O file or an archive file",
                   "Mach-O universal file");
           }
@@ -1295,25 +1296,25 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
         ObjectFile &Obj = *ObjOrErr.get();
         if (PrintFileName) {
           if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
-            ArchitectureName = I->getArchTypeName();
+            ArchitectureName = I->getArchFlagName();
         } else {
           if (moreThanOneArch)
             outs() << "\n";
           outs() << Obj.getFileName();
           if (isa<MachOObjectFile>(Obj) && moreThanOneArch)
-            outs() << " (for architecture " << I->getArchTypeName() << ")";
+            outs() << " (for architecture " << I->getArchFlagName() << ")";
           outs() << ":\n";
         }
         dumpSymbolNamesFromObject(Obj, false, ArchiveName, ArchitectureName);
       } else if (auto E = isNotObjectErrorInvalidFileType(
                  ObjOrErr.takeError())) {
         error(std::move(E), Filename, moreThanOneArch ?
-              StringRef(I->getArchTypeName()) : StringRef());
+              StringRef(I->getArchFlagName()) : StringRef());
         continue;
       } else if (Expected<std::unique_ptr<Archive>> AOrErr =
                   I->getAsArchive()) {
         std::unique_ptr<Archive> &A = *AOrErr;
-        Error Err;
+        Error Err = Error::success();
         for (auto &C : A->children(Err)) {
           Expected<std::unique_ptr<Binary>> ChildOrErr =
             C.getAsBinary(&Context);
@@ -1328,13 +1329,13 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
             if (PrintFileName) {
               ArchiveName = A->getFileName();
               if (isa<MachOObjectFile>(O) && moreThanOneArch)
-                ArchitectureName = I->getArchTypeName();
+                ArchitectureName = I->getArchFlagName();
             } else {
               outs() << "\n" << A->getFileName();
               if (isa<MachOObjectFile>(O)) {
                 outs() << "(" << O->getFileName() << ")";
                 if (moreThanOneArch)
-                  outs() << " (for architecture " << I->getArchTypeName()
+                  outs() << " (for architecture " << I->getArchFlagName()
                          << ")";
               } else
                 outs() << ":" << O->getFileName();
@@ -1348,7 +1349,7 @@ static void dumpSymbolNamesFromFile(std::string &Filename) {
       } else {
         consumeError(AOrErr.takeError());
         error(Filename + " for architecture " +
-              StringRef(I->getArchTypeName()) +
+              StringRef(I->getArchFlagName()) +
               " is not a Mach-O file or an archive file",
               "Mach-O universal file");
       }
diff --git a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
index 3ec6a1f73750..db549bbe3eec 100644
--- a/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/COFFDump.cpp
@@ -17,6 +17,7 @@
 
 #include "llvm-objdump.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/SourceMgr.h"
@@ -176,12 +177,10 @@ resolveSectionAndAddress(const COFFObjectFile *Obj, const SymbolRef &Sym,
 // the function returns the symbol used for the relocation at the offset.
 static std::error_code resolveSymbol(const std::vector<RelocationRef> &Rels,
                                      uint64_t Offset, SymbolRef &Sym) {
-  for (std::vector<RelocationRef>::const_iterator I = Rels.begin(),
-                                                  E = Rels.end();
-                                                  I != E; ++I) {
-    uint64_t Ofs = I->getOffset();
+  for (auto &R : Rels) {
+    uint64_t Ofs = R.getOffset();
     if (Ofs == Offset) {
-      Sym = *I->getSymbol();
+      Sym = *R.getSymbol();
       return std::error_code();
     }
   }
@@ -353,7 +352,7 @@ static void printImportTables(const COFFObjectFile *Obj) {
     return;
   outs() << "The Import Tables:\n";
   for (const ImportDirectoryEntryRef &DirRef : Obj->import_directories()) {
-    const import_directory_table_entry *Dir;
+    const coff_import_directory_table_entry *Dir;
     StringRef Name;
     if (DirRef.getImportTableEntry(Dir)) return;
     if (DirRef.getName(Name)) return;
@@ -617,6 +616,29 @@ void llvm::printCOFFFileHeader(const object::ObjectFile *Obj) {
   printExportTable(file);
 }
 
+void llvm::printCOFFSymbolTable(const object::COFFImportFile *i) {
+  unsigned Index = 0;
+  bool IsCode = i->getCOFFImportHeader()->getType() == COFF::IMPORT_CODE;
+
+  for (const object::BasicSymbolRef &Sym : i->symbols()) {
+    std::string Name;
+    raw_string_ostream NS(Name);
+
+    Sym.printName(NS);
+    NS.flush();
+
+    outs() << "[" << format("%2d", Index) << "]"
+           << "(sec " << format("%2d", 0) << ")"
+           << "(fl 0x00)" // Flag bits, which COFF doesn't have.
+           << "(ty " << format("%3x", (IsCode && Index) ? 32 : 0) << ")"
+           << "(scl " << format("%3x", 0) << ") "
+           << "(nx " << 0 << ") "
+           << "0x" << format("%08x", 0) << " " << Name << '\n';
+
+    ++Index;
+  }
+}
+
 void llvm::printCOFFSymbolTable(const COFFObjectFile *coff) {
   for (unsigned SI = 0, SE = coff->getNumberOfSymbols(); SI != SE; ++SI) {
     ErrorOr<COFFSymbolRef> Symbol = coff->getSymbol(SI);
diff --git a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
index 7b44e3916c48..aa6aa94ccd9e 100644
--- a/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -24,22 +24,26 @@ using namespace llvm::object;
 template <class ELFT> void printProgramHeaders(const ELFFile<ELFT> *o) {
   typedef ELFFile<ELFT> ELFO;
   outs() << "Program Header:\n";
-  for (const typename ELFO::Elf_Phdr &Phdr : o->program_headers()) {
+  auto ProgramHeaderOrError = o->program_headers();
+  if (!ProgramHeaderOrError)
+    report_fatal_error(
+        errorToErrorCode(ProgramHeaderOrError.takeError()).message());
+  for (const typename ELFO::Elf_Phdr &Phdr : *ProgramHeaderOrError) {
     switch (Phdr.p_type) {
-    case ELF::PT_LOAD:
-      outs() << "    LOAD ";
-      break;
-    case ELF::PT_GNU_STACK:
-      outs() << "   STACK ";
+    case ELF::PT_DYNAMIC:
+      outs() << " DYNAMIC ";
       break;
     case ELF::PT_GNU_EH_FRAME:
       outs() << "EH_FRAME ";
       break;
+    case ELF::PT_GNU_STACK:
+      outs() << "   STACK ";
+      break;
     case ELF::PT_INTERP:
       outs() << "  INTERP ";
       break;
-    case ELF::PT_DYNAMIC:
-      outs() << " DYNAMIC ";
+    case ELF::PT_LOAD:
+      outs() << "    LOAD ";
       break;
     case ELF::PT_PHDR:
       outs() << "    PHDR ";
diff --git a/contrib/llvm/tools/llvm-objdump/MachODump.cpp b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
index 4d950f1d7bd7..c43ccc211811 100644
--- a/contrib/llvm/tools/llvm-objdump/MachODump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/MachODump.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Config/config.h"
 #include "llvm/DebugInfo/DIContext.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/Demangle/Demangle.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -48,10 +49,6 @@
 #include <cstring>
 #include <system_error>
 
-#if HAVE_CXXABI_H
-#include <cxxabi.h>
-#endif
-
 #ifdef HAVE_LIBXAR
 extern "C" {
 #include <xar/xar.h>
@@ -74,6 +71,9 @@ static cl::opt<bool> FullLeadingAddr("full-leading-addr",
 static cl::opt<bool> NoLeadingAddr("no-leading-addr",
                                    cl::desc("Print no leading address"));
 
+static cl::opt<bool> NoLeadingHeaders("no-leading-headers",
+                                      cl::desc("Print no leading headers"));
+
 cl::opt<bool> llvm::UniversalHeaders("universal-headers",
                                      cl::desc("Print Mach-O universal headers "
                                               "(requires -macho)"));
@@ -131,7 +131,7 @@ cl::opt<bool>
 
 cl::opt<std::string> llvm::DisSymName(
     "dis-symname",
-    cl::desc("disassemble just this symbol's instructions (requires -macho"));
+    cl::desc("disassemble just this symbol's instructions (requires -macho)"));
 
 static cl::opt<bool> NoSymbolicOperands(
     "no-symbolic-operands",
@@ -186,22 +186,12 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj,
 struct SymbolSorter {
   bool operator()(const SymbolRef &A, const SymbolRef &B) {
     Expected<SymbolRef::Type> ATypeOrErr = A.getType();
-    if (!ATypeOrErr) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(ATypeOrErr.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!ATypeOrErr)
+      report_error(A.getObject()->getFileName(), ATypeOrErr.takeError());
     SymbolRef::Type AType = *ATypeOrErr;
     Expected<SymbolRef::Type> BTypeOrErr = B.getType();
-    if (!BTypeOrErr) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(BTypeOrErr.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!BTypeOrErr)
+      report_error(B.getObject()->getFileName(), BTypeOrErr.takeError());
     SymbolRef::Type BType = *BTypeOrErr;
     uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue();
     uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue();
@@ -298,13 +288,8 @@ static void getSectionsAndSymbols(MachOObjectFile *MachOObj,
                                   uint64_t &BaseSegmentAddress) {
   for (const SymbolRef &Symbol : MachOObj->symbols()) {
     Expected<StringRef> SymName = Symbol.getName();
-    if (!SymName) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(SymName.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!SymName)
+      report_error(MachOObj->getFileName(), SymName.takeError());
     if (!SymName->startswith("ltmp"))
       Symbols.push_back(Symbol);
   }
@@ -383,13 +368,8 @@ static void PrintIndirectSymbolTable(MachOObjectFile *O, bool verbose,
         symbol_iterator Sym = O->getSymbolByIndex(indirect_symbol);
         SymbolRef Symbol = *Sym;
         Expected<StringRef> SymName = Symbol.getName();
-        if (!SymName) {
-          std::string Buf;
-          raw_string_ostream OS(Buf);
-          logAllUnhandledErrors(SymName.takeError(), OS, "");
-          OS.flush();
-          report_fatal_error(Buf);
-        }
+        if (!SymName)
+          report_error(O->getFileName(), SymName.takeError());
         outs() << *SymName;
       } else {
         outs() << "?";
@@ -615,25 +595,15 @@ static void CreateSymbolAddressMap(MachOObjectFile *O,
   // Create a map of symbol addresses to symbol names.
   for (const SymbolRef &Symbol : O->symbols()) {
     Expected<SymbolRef::Type> STOrErr = Symbol.getType();
-    if (!STOrErr) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(STOrErr.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!STOrErr)
+      report_error(O->getFileName(), STOrErr.takeError());
     SymbolRef::Type ST = *STOrErr;
     if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
         ST == SymbolRef::ST_Other) {
       uint64_t Address = Symbol.getValue();
       Expected<StringRef> SymNameOrErr = Symbol.getName();
-      if (!SymNameOrErr) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SymNameOrErr.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!SymNameOrErr)
+        report_error(O->getFileName(), SymNameOrErr.takeError());
       StringRef SymName = *SymNameOrErr;
       if (!SymName.startswith(".objc"))
         (*AddrMap)[Address] = SymName;
@@ -862,29 +832,22 @@ static void DumpLiteralPointerSection(MachOObjectFile *O,
     }
 
     // First look for an external relocation entry for this literal pointer.
-    auto Reloc = std::find_if(
-        Relocs.begin(), Relocs.end(),
-        [&](const std::pair<uint64_t, SymbolRef> &P) { return P.first == i; });
+    auto Reloc = find_if(Relocs, [&](const std::pair<uint64_t, SymbolRef> &P) {
+      return P.first == i;
+    });
     if (Reloc != Relocs.end()) {
       symbol_iterator RelocSym = Reloc->second;
       Expected<StringRef> SymName = RelocSym->getName();
-      if (!SymName) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SymName.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!SymName)
+        report_error(O->getFileName(), SymName.takeError());
       outs() << "external relocation entry for symbol:" << *SymName << "\n";
       continue;
     }
 
     // For local references see what the section the literal pointer points to.
-    auto Sect = std::find_if(LiteralSections.begin(), LiteralSections.end(),
-                             [&](const SectionRef &R) {
-                               return lp >= R.getAddress() &&
-                                      lp < R.getAddress() + R.getSize();
-                             });
+    auto Sect = find_if(LiteralSections, [&](const SectionRef &R) {
+      return lp >= R.getAddress() && lp < R.getAddress() + R.getSize();
+    });
     if (Sect == LiteralSections.end()) {
       outs() << format("0x%" PRIx64, lp) << " (not in a literal section)\n";
       continue;
@@ -1191,30 +1154,30 @@ static void DumpInfoPlistSectionContents(StringRef Filename,
 // architectures were specified.  If not then an error is generated and this
 // routine returns false.  Else it returns true.
 static bool checkMachOAndArchFlags(ObjectFile *O, StringRef Filename) {
-  if (isa<MachOObjectFile>(O) && !ArchAll && ArchFlags.size() != 0) {
-    MachOObjectFile *MachO = dyn_cast<MachOObjectFile>(O);
-    bool ArchFound = false;
-    MachO::mach_header H;
-    MachO::mach_header_64 H_64;
-    Triple T;
-    if (MachO->is64Bit()) {
-      H_64 = MachO->MachOObjectFile::getHeader64();
-      T = MachOObjectFile::getArchTriple(H_64.cputype, H_64.cpusubtype);
-    } else {
-      H = MachO->MachOObjectFile::getHeader();
-      T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype);
-    }
-    unsigned i;
-    for (i = 0; i < ArchFlags.size(); ++i) {
-      if (ArchFlags[i] == T.getArchName())
-        ArchFound = true;
-      break;
-    }
-    if (!ArchFound) {
-      errs() << "llvm-objdump: file: " + Filename + " does not contain "
-             << "architecture: " + ArchFlags[i] + "\n";
-      return false;
-    }
+  auto *MachO = dyn_cast<MachOObjectFile>(O);
+
+  if (!MachO || ArchAll || ArchFlags.empty())
+    return true;
+
+  MachO::mach_header H;
+  MachO::mach_header_64 H_64;
+  Triple T;
+  const char *McpuDefault, *ArchFlag;
+  if (MachO->is64Bit()) {
+    H_64 = MachO->MachOObjectFile::getHeader64();
+    T = MachOObjectFile::getArchTriple(H_64.cputype, H_64.cpusubtype,
+                                       &McpuDefault, &ArchFlag);
+  } else {
+    H = MachO->MachOObjectFile::getHeader();
+    T = MachOObjectFile::getArchTriple(H.cputype, H.cpusubtype,
+                                       &McpuDefault, &ArchFlag);
+  }
+  const std::string ArchFlagName(ArchFlag);
+  if (none_of(ArchFlags, [&](const std::string &Name) {
+        return Name == ArchFlagName;
+      })) {
+    errs() << "llvm-objdump: " + Filename + ": No architecture specified.\n";
+    return false;
   }
   return true;
 }
@@ -1225,7 +1188,7 @@ static void printObjcMetaData(MachOObjectFile *O, bool verbose);
 // archive member and or in a slice of a universal file.  It prints the
 // the file name and header info and then processes it according to the
 // command line options.
-static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
+static void ProcessMachO(StringRef Name, MachOObjectFile *MachOOF,
                          StringRef ArchiveMemberName = StringRef(),
                          StringRef ArchitectureName = StringRef()) {
   // If we are doing some processing here on the Mach-O file print the header
@@ -1234,16 +1197,37 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
   if (Disassemble || PrivateHeaders || ExportsTrie || Rebase || Bind || SymbolTable ||
       LazyBind || WeakBind || IndirectSymbols || DataInCode || LinkOptHints ||
       DylibsUsed || DylibId || ObjcMetaData || (FilterSections.size() != 0)) {
-    outs() << Filename;
-    if (!ArchiveMemberName.empty())
-      outs() << '(' << ArchiveMemberName << ')';
-    if (!ArchitectureName.empty())
-      outs() << " (architecture " << ArchitectureName << ")";
-    outs() << ":\n";
+    if (!NoLeadingHeaders) {
+      outs() << Name;
+      if (!ArchiveMemberName.empty())
+        outs() << '(' << ArchiveMemberName << ')';
+      if (!ArchitectureName.empty())
+        outs() << " (architecture " << ArchitectureName << ")";
+      outs() << ":\n";
+    }
+  }
+  // To use the report_error() form with an ArchiveName and FileName set
+  // these up based on what is passed for Name and ArchiveMemberName.
+  StringRef ArchiveName;
+  StringRef FileName;
+  if (!ArchiveMemberName.empty()) {
+    ArchiveName = Name;
+    FileName = ArchiveMemberName;
+  } else {
+    ArchiveName = StringRef();
+    FileName = Name;
   }
 
+  // If we need the symbol table to do the operation then check it here to
+  // produce a good error message as to where the Mach-O file comes from in
+  // the error message.
+  if (Disassemble || IndirectSymbols || FilterSections.size() != 0 ||
+      UnwindInfo)
+    if (Error Err = MachOOF->checkSymbolTable())
+      report_error(ArchiveName, FileName, std::move(Err), ArchitectureName);
+
   if (Disassemble)
-    DisassembleMachO(Filename, MachOOF, "__TEXT", "__text");
+    DisassembleMachO(FileName, MachOOF, "__TEXT", "__text");
   if (IndirectSymbols)
     PrintIndirectSymbols(MachOOF, !NonVerbose);
   if (DataInCode)
@@ -1257,17 +1241,15 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
   if (SectionContents)
     PrintSectionContents(MachOOF);
   if (FilterSections.size() != 0)
-    DumpSectionContents(Filename, MachOOF, !NonVerbose);
+    DumpSectionContents(FileName, MachOOF, !NonVerbose);
   if (InfoPlist)
-    DumpInfoPlistSectionContents(Filename, MachOOF);
+    DumpInfoPlistSectionContents(FileName, MachOOF);
   if (DylibsUsed)
     PrintDylibs(MachOOF, false);
   if (DylibId)
     PrintDylibs(MachOOF, true);
-  if (SymbolTable) {
-    StringRef ArchiveName = ArchiveMemberName == StringRef() ? "" : Filename;
+  if (SymbolTable)
     PrintSymbolTable(MachOOF, ArchiveName, ArchitectureName);
-  }
   if (UnwindInfo)
     printMachOUnwindInfo(MachOOF);
   if (PrivateHeaders) {
@@ -1441,7 +1423,7 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
       }
     }
     if (verbose) {
-      outs() << OFA.getArchTypeName() << "\n";
+      outs() << OFA.getArchFlagName() << "\n";
       printCPUType(cputype, cpusubtype & ~MachO::CPU_SUBTYPE_MASK);
     } else {
       outs() << i << "\n";
@@ -1472,11 +1454,15 @@ static void printMachOUniversalHeaders(const object::MachOUniversalBinary *UB,
   }
 }
 
-static void printArchiveChild(const Archive::Child &C, bool verbose,
-                              bool print_offset) {
+static void printArchiveChild(StringRef Filename, const Archive::Child &C,
+                              bool verbose, bool print_offset,
+                              StringRef ArchitectureName = StringRef()) {
   if (print_offset)
     outs() << C.getChildOffset() << "\t";
-  sys::fs::perms Mode = C.getAccessMode();
+  Expected<sys::fs::perms> ModeOrErr = C.getAccessMode();
+  if (!ModeOrErr)
+    report_error(Filename, C, ModeOrErr.takeError(), ArchitectureName);
+  sys::fs::perms Mode = ModeOrErr.get();
   if (verbose) {
     // FIXME: this first dash, "-", is for (Mode & S_IFMT) == S_IFREG.
     // But there is nothing in sys::fs::perms for S_IFMT or S_IFREG.
@@ -1494,20 +1480,27 @@ static void printArchiveChild(const Archive::Child &C, bool verbose,
     outs() << format("0%o ", Mode);
   }
 
-  unsigned UID = C.getUID();
+  Expected<unsigned> UIDOrErr = C.getUID();
+  if (!UIDOrErr)
+    report_error(Filename, C, UIDOrErr.takeError(), ArchitectureName);
+  unsigned UID = UIDOrErr.get();
   outs() << format("%3d/", UID);
-  unsigned GID = C.getGID();
+  Expected<unsigned> GIDOrErr = C.getGID();
+  if (!GIDOrErr)
+    report_error(Filename, C, GIDOrErr.takeError(), ArchitectureName);
+  unsigned GID = GIDOrErr.get();
   outs() << format("%-3d ", GID);
-  ErrorOr<uint64_t> Size = C.getRawSize();
-  if (std::error_code EC = Size.getError())
-    report_fatal_error(EC.message());
+  Expected<uint64_t> Size = C.getRawSize();
+  if (!Size)
+    report_error(Filename, C, Size.takeError(), ArchitectureName);
   outs() << format("%5" PRId64, Size.get()) << " ";
 
   StringRef RawLastModified = C.getRawLastModified();
   if (verbose) {
     unsigned Seconds;
     if (RawLastModified.getAsInteger(10, Seconds))
-      outs() << "(date: \"%s\" contains non-decimal chars) " << RawLastModified;
+      outs() << "(date: \"" << RawLastModified
+             << "\" contains non-decimal chars) ";
     else {
       // Since cime(3) returns a 26 character string of the form:
       // "Sun Sep 16 01:03:52 1973\n\0"
@@ -1520,26 +1513,37 @@ static void printArchiveChild(const Archive::Child &C, bool verbose,
   }
 
   if (verbose) {
-    ErrorOr<StringRef> NameOrErr = C.getName();
-    if (NameOrErr.getError()) {
-      StringRef RawName = C.getRawName();
+    Expected<StringRef> NameOrErr = C.getName();
+    if (!NameOrErr) {
+      consumeError(NameOrErr.takeError());
+      Expected<StringRef> NameOrErr = C.getRawName();
+      if (!NameOrErr)
+        report_error(Filename, C, NameOrErr.takeError(), ArchitectureName);
+      StringRef RawName = NameOrErr.get();
       outs() << RawName << "\n";
     } else {
       StringRef Name = NameOrErr.get();
       outs() << Name << "\n";
     }
   } else {
-    StringRef RawName = C.getRawName();
+    Expected<StringRef> NameOrErr = C.getRawName();
+    if (!NameOrErr)
+      report_error(Filename, C, NameOrErr.takeError(), ArchitectureName);
+    StringRef RawName = NameOrErr.get();
     outs() << RawName << "\n";
   }
 }
 
-static void printArchiveHeaders(Archive *A, bool verbose, bool print_offset) {
-  Error Err;
+static void printArchiveHeaders(StringRef Filename, Archive *A, bool verbose,
+                                bool print_offset,
+                                StringRef ArchitectureName = StringRef()) {
+  Error Err = Error::success();
+  ;
   for (const auto &C : A->children(Err, false))
-    printArchiveChild(C, verbose, print_offset);
+    printArchiveChild(Filename, C, verbose, print_offset, ArchitectureName);
+
   if (Err)
-    report_fatal_error(std::move(Err));
+    report_error(StringRef(), Filename, std::move(Err), ArchitectureName);
 }
 
 // ParseInputMachO() parses the named Mach-O file in Filename and handles the
@@ -1569,8 +1573,9 @@ void llvm::ParseInputMachO(StringRef Filename) {
   if (Archive *A = dyn_cast<Archive>(&Bin)) {
     outs() << "Archive : " << Filename << "\n";
     if (ArchiveHeaders)
-      printArchiveHeaders(A, !NonVerbose, ArchiveMemberOffsets);
-    Error Err;
+      printArchiveHeaders(Filename, A, !NonVerbose, ArchiveMemberOffsets);
+
+    Error Err = Error::success();
     for (auto &C : A->children(Err)) {
       Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
       if (!ChildOrErr) {
@@ -1602,13 +1607,13 @@ void llvm::ParseInputMachO(StringRef Filename) {
         for (MachOUniversalBinary::object_iterator I = UB->begin_objects(),
                                                    E = UB->end_objects();
              I != E; ++I) {
-          if (ArchFlags[i] == I->getArchTypeName()) {
+          if (ArchFlags[i] == I->getArchFlagName()) {
             ArchFound = true;
             Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
                 I->getAsObjectFile();
             std::string ArchitectureName = "";
             if (ArchFlags.size() > 1)
-              ArchitectureName = I->getArchTypeName();
+              ArchitectureName = I->getArchFlagName();
             if (ObjOrErr) {
               ObjectFile &O = *ObjOrErr.get();
               if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&O))
@@ -1626,8 +1631,9 @@ void llvm::ParseInputMachO(StringRef Filename) {
                 outs() << " (architecture " << ArchitectureName << ")";
               outs() << "\n";
               if (ArchiveHeaders)
-                printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
-              Error Err;
+                printArchiveHeaders(Filename, A.get(), !NonVerbose,
+                                    ArchiveMemberOffsets, ArchitectureName);
+              Error Err = Error::success();
               for (auto &C : A->children(Err)) {
                 Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
                 if (!ChildOrErr) {
@@ -1644,7 +1650,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
             } else {
               consumeError(AOrErr.takeError());
               error("Mach-O universal file: " + Filename + " for " +
-                    "architecture " + StringRef(I->getArchTypeName()) +
+                    "architecture " + StringRef(I->getArchFlagName()) +
                     " is not a Mach-O file or an archive file");
             }
           }
@@ -1664,7 +1670,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
                                                  E = UB->end_objects();
            I != E; ++I) {
         if (MachOObjectFile::getHostArch().getArchName() ==
-            I->getArchTypeName()) {
+            I->getArchFlagName()) {
           Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
           std::string ArchiveName;
           ArchiveName.clear();
@@ -1681,8 +1687,9 @@ void llvm::ParseInputMachO(StringRef Filename) {
             std::unique_ptr<Archive> &A = *AOrErr;
             outs() << "Archive : " << Filename << "\n";
             if (ArchiveHeaders)
-              printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
-            Error Err;
+              printArchiveHeaders(Filename, A.get(), !NonVerbose,
+                                  ArchiveMemberOffsets);
+            Error Err = Error::success();
             for (auto &C : A->children(Err)) {
               Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
               if (!ChildOrErr) {
@@ -1699,7 +1706,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
           } else {
             consumeError(AOrErr.takeError());
             error("Mach-O universal file: " + Filename + " for architecture " +
-                  StringRef(I->getArchTypeName()) +
+                  StringRef(I->getArchFlagName()) +
                   " is not a Mach-O file or an archive file");
           }
           return;
@@ -1715,7 +1722,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
       Expected<std::unique_ptr<ObjectFile>> ObjOrErr = I->getAsObjectFile();
       std::string ArchitectureName = "";
       if (moreThanOneArch)
-        ArchitectureName = I->getArchTypeName();
+        ArchitectureName = I->getArchFlagName();
       if (ObjOrErr) {
         ObjectFile &Obj = *ObjOrErr.get();
         if (MachOObjectFile *MachOOF = dyn_cast<MachOObjectFile>(&Obj))
@@ -1732,8 +1739,9 @@ void llvm::ParseInputMachO(StringRef Filename) {
           outs() << " (architecture " << ArchitectureName << ")";
         outs() << "\n";
         if (ArchiveHeaders)
-          printArchiveHeaders(A.get(), !NonVerbose, ArchiveMemberOffsets);
-        Error Err;
+          printArchiveHeaders(Filename, A.get(), !NonVerbose,
+                              ArchiveMemberOffsets, ArchitectureName);
+        Error Err = Error::success();
         for (auto &C : A->children(Err)) {
           Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
           if (!ChildOrErr) {
@@ -1753,7 +1761,7 @@ void llvm::ParseInputMachO(StringRef Filename) {
       } else {
         consumeError(AOrErr.takeError());
         error("Mach-O universal file: " + Filename + " for architecture " +
-              StringRef(I->getArchTypeName()) +
+              StringRef(I->getArchFlagName()) +
               " is not a Mach-O file or an archive file");
       }
     }
@@ -1879,13 +1887,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     }
     if (reloc_found && isExtern) {
       Expected<StringRef> SymName = Symbol.getName();
-      if (!SymName) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SymName.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!SymName)
+        report_error(info->O->getFileName(), SymName.takeError());
       const char *name = SymName->data();
       op_info->AddSymbol.Present = 1;
       op_info->AddSymbol.Name = name;
@@ -1954,13 +1957,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
       if (info->O->getAnyRelocationPCRel(RE))
         op_info->Value -= Pc + Offset + Size;
       Expected<StringRef> SymName = Symbol.getName();
-      if (!SymName) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SymName.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!SymName)
+        report_error(info->O->getFileName(), SymName.takeError());
       const char *name = SymName->data();
       unsigned Type = info->O->getAnyRelocationType(RE);
       if (Type == MachO::X86_64_RELOC_SUBTRACTOR) {
@@ -1976,13 +1974,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
           symbol_iterator RelocSymNext = info->O->getSymbolByIndex(SymbolNum);
           Symbol = *RelocSymNext;
           Expected<StringRef> SymNameNext = Symbol.getName();
-          if (!SymNameNext) {
-            std::string Buf;
-            raw_string_ostream OS(Buf);
-            logAllUnhandledErrors(SymNameNext.takeError(), OS, "");
-            OS.flush();
-            report_fatal_error(Buf);
-          }
+          if (!SymNameNext)
+            report_error(info->O->getFileName(), SymNameNext.takeError());
           name = SymNameNext->data();
         }
       }
@@ -2015,11 +2008,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     bool r_scattered = false;
     uint32_t r_value, pair_r_value, r_type, r_length, other_half;
     auto Reloc =
-        std::find_if(info->S.relocations().begin(), info->S.relocations().end(),
-                     [&](const RelocationRef &Reloc) {
-                       uint64_t RelocOffset = Reloc.getOffset();
-                       return RelocOffset == sect_offset;
-                     });
+        find_if(info->S.relocations(), [&](const RelocationRef &Reloc) {
+          uint64_t RelocOffset = Reloc.getOffset();
+          return RelocOffset == sect_offset;
+        });
 
     if (Reloc == info->S.relocations().end())
       return 0;
@@ -2054,13 +2046,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
 
     if (isExtern) {
       Expected<StringRef> SymName = Symbol.getName();
-      if (!SymName) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SymName.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!SymName)
+        report_error(info->O->getFileName(), SymName.takeError());
       const char *name = SymName->data();
       op_info->AddSymbol.Present = 1;
       op_info->AddSymbol.Name = name;
@@ -2082,7 +2069,7 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     // If we have a branch that is not an external relocation entry then
     // return 0 so the code in tryAddingSymbolicOperand() can use the
     // SymbolLookUp call back with the branch target address to look up the
-    // symbol and possiblity add an annotation for a symbol stub.
+    // symbol and possibility add an annotation for a symbol stub.
     if (isExtern == 0 && (r_type == MachO::ARM_RELOC_BR24 ||
                           r_type == MachO::ARM_THUMB_RELOC_BR22))
       return 0;
@@ -2154,11 +2141,10 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     uint64_t sect_addr = info->S.getAddress();
     uint64_t sect_offset = (Pc + Offset) - sect_addr;
     auto Reloc =
-        std::find_if(info->S.relocations().begin(), info->S.relocations().end(),
-                     [&](const RelocationRef &Reloc) {
-                       uint64_t RelocOffset = Reloc.getOffset();
-                       return RelocOffset == sect_offset;
-                     });
+        find_if(info->S.relocations(), [&](const RelocationRef &Reloc) {
+          uint64_t RelocOffset = Reloc.getOffset();
+          return RelocOffset == sect_offset;
+        });
 
     if (Reloc == info->S.relocations().end())
       return 0;
@@ -2179,13 +2165,8 @@ static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
     if (!info->O->getPlainRelocationExternal(RE))
       return 0;
     Expected<StringRef> SymName = Reloc->getSymbol()->getName();
-    if (!SymName) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(SymName.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!SymName)
+      report_error(info->O->getFileName(), SymName.takeError());
     const char *name = SymName->data();
     op_info->AddSymbol.Present = 1;
     op_info->AddSymbol.Name = name;
@@ -2314,13 +2295,8 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
               symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
               SymbolRef Symbol = *Sym;
               Expected<StringRef> SymName = Symbol.getName();
-              if (!SymName) {
-                std::string Buf;
-                raw_string_ostream OS(Buf);
-                logAllUnhandledErrors(SymName.takeError(), OS, "");
-                OS.flush();
-                report_fatal_error(Buf);
-              }
+              if (!SymName)
+                report_error(info->O->getFileName(), SymName.takeError());
               const char *name = SymName->data();
               return name;
             }
@@ -2354,13 +2330,8 @@ static const char *GuessIndirectSymbol(uint64_t ReferenceValue,
               symbol_iterator Sym = info->O->getSymbolByIndex(indirect_symbol);
               SymbolRef Symbol = *Sym;
               Expected<StringRef> SymName = Symbol.getName();
-              if (!SymName) {
-                std::string Buf;
-                raw_string_ostream OS(Buf);
-                logAllUnhandledErrors(SymName.takeError(), OS, "");
-                OS.flush();
-                report_fatal_error(Buf);
-              }
+              if (!SymName)
+                report_error(info->O->getFileName(), SymName.takeError());
               const char *name = SymName->data();
               return name;
             }
@@ -2588,13 +2559,8 @@ static const char *get_symbol_64(uint32_t sect_offset, SectionRef S,
   if (reloc_found && isExtern) {
     n_value = Symbol.getValue();
     Expected<StringRef> NameOrError = Symbol.getName();
-    if (!NameOrError) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(NameOrError.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!NameOrError)
+      report_error(info->O->getFileName(), NameOrError.takeError());
     StringRef Name = *NameOrError;
     if (!Name.empty()) {
       SymbolName = Name.data();
@@ -4391,12 +4357,7 @@ static bool print_class_ro64_t(uint64_t p, struct DisassembleInfo *info,
   r = get_pointer_64(p, offset, left, S, info);
   if (r == nullptr || left < sizeof(struct class_ro64_t))
     return false;
-  memset(&cro, '\0', sizeof(struct class_ro64_t));
-  if (left < sizeof(struct class_ro64_t)) {
-    memcpy(&cro, r, left);
-    outs() << "   (class_ro_t entends past the end of the section)\n";
-  } else
-    memcpy(&cro, r, sizeof(struct class_ro64_t));
+  memcpy(&cro, r, sizeof(struct class_ro64_t));
   if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
     swapStruct(cro);
   outs() << "                    flags " << format("0x%" PRIx32, cro.flags);
@@ -4593,12 +4554,7 @@ static void print_class64_t(uint64_t p, struct DisassembleInfo *info) {
   r = get_pointer_64(p, offset, left, S, info);
   if (r == nullptr || left < sizeof(struct class64_t))
     return;
-  memset(&c, '\0', sizeof(struct class64_t));
-  if (left < sizeof(struct class64_t)) {
-    memcpy(&c, r, left);
-    outs() << "   (class_t entends past the end of the section)\n";
-  } else
-    memcpy(&c, r, sizeof(struct class64_t));
+  memcpy(&c, r, sizeof(struct class64_t));
   if (info->O->isLittleEndian() != sys::IsLittleEndianHost)
     swapStruct(c);
 
@@ -6214,11 +6170,9 @@ static const char *GuessLiteralPointer(uint64_t ReferenceValue,
 // Out type and the ReferenceName will also be set which is added as a comment
 // to the disassembled instruction.
 //
-#if HAVE_CXXABI_H
 // If the symbol name is a C++ mangled name then the demangled name is
 // returned through ReferenceName and ReferenceType is set to
 // LLVMDisassembler_ReferenceType_DeMangled_Name .
-#endif
 //
 // When this is called to get a symbol name for a branch target then the
 // ReferenceType will be LLVMDisassembler_ReferenceType_In_Branch and then
@@ -6253,21 +6207,18 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
       method_reference(info, ReferenceType, ReferenceName);
       if (*ReferenceType != LLVMDisassembler_ReferenceType_Out_Objc_Message)
         *ReferenceType = LLVMDisassembler_ReferenceType_Out_SymbolStub;
-    } else
-#if HAVE_CXXABI_H
-        if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
+    } else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
       if (info->demangled_name != nullptr)
         free(info->demangled_name);
       int status;
       info->demangled_name =
-          abi::__cxa_demangle(SymbolName + 1, nullptr, nullptr, &status);
+          itaniumDemangle(SymbolName + 1, nullptr, nullptr, &status);
       if (info->demangled_name != nullptr) {
         *ReferenceName = info->demangled_name;
         *ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
       } else
         *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
     } else
-#endif
       *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
   } else if (*ReferenceType == LLVMDisassembler_ReferenceType_In_PCrel_Load) {
     *ReferenceName =
@@ -6356,20 +6307,17 @@ static const char *SymbolizerSymbolLookUp(void *DisInfo,
         GuessLiteralPointer(ReferenceValue, ReferencePC, ReferenceType, info);
     if (*ReferenceName == nullptr)
       *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
-  }
-#if HAVE_CXXABI_H
-  else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
+  } else if (SymbolName != nullptr && strncmp(SymbolName, "__Z", 3) == 0) {
     if (info->demangled_name != nullptr)
       free(info->demangled_name);
     int status;
     info->demangled_name =
-        abi::__cxa_demangle(SymbolName + 1, nullptr, nullptr, &status);
+        itaniumDemangle(SymbolName + 1, nullptr, nullptr, &status);
     if (info->demangled_name != nullptr) {
       *ReferenceName = info->demangled_name;
       *ReferenceType = LLVMDisassembler_ReferenceType_DeMangled_Name;
     }
   }
-#endif
   else {
     *ReferenceName = nullptr;
     *ReferenceType = LLVMDisassembler_ReferenceType_InOut_None;
@@ -6387,7 +6335,7 @@ static void emitComments(raw_svector_ostream &CommentStream,
   // Flush the stream before taking its content.
   StringRef Comments = CommentsToEmit.str();
   // Get the default information for printing a comment.
-  const char *CommentBegin = MAI.getCommentString();
+  StringRef CommentBegin = MAI.getCommentString();
   unsigned CommentColumn = MAI.getCommentColumn();
   bool IsFirst = true;
   while (!Comments.empty()) {
@@ -6604,25 +6552,15 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     bool DisSymNameFound = false;
     for (const SymbolRef &Symbol : MachOOF->symbols()) {
       Expected<SymbolRef::Type> STOrErr = Symbol.getType();
-      if (!STOrErr) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(STOrErr.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!STOrErr)
+        report_error(MachOOF->getFileName(), STOrErr.takeError());
       SymbolRef::Type ST = *STOrErr;
       if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data ||
           ST == SymbolRef::ST_Other) {
         uint64_t Address = Symbol.getValue();
         Expected<StringRef> SymNameOrErr = Symbol.getName();
-        if (!SymNameOrErr) {
-          std::string Buf;
-          raw_string_ostream OS(Buf);
-          logAllUnhandledErrors(SymNameOrErr.takeError(), OS, "");
-          OS.flush();
-          report_fatal_error(Buf);
-        }
+        if (!SymNameOrErr)
+          report_error(MachOOF->getFileName(), SymNameOrErr.takeError());
         StringRef SymName = *SymNameOrErr;
         AddrMap[Address] = SymName;
         if (!DisSymName.empty() && DisSymName == SymName)
@@ -6669,23 +6607,13 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
     // Disassemble symbol by symbol.
     for (unsigned SymIdx = 0; SymIdx != Symbols.size(); SymIdx++) {
       Expected<StringRef> SymNameOrErr = Symbols[SymIdx].getName();
-      if (!SymNameOrErr) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(SymNameOrErr.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!SymNameOrErr)
+        report_error(MachOOF->getFileName(), SymNameOrErr.takeError());
       StringRef SymName = *SymNameOrErr;
 
       Expected<SymbolRef::Type> STOrErr = Symbols[SymIdx].getType();
-      if (!STOrErr) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(STOrErr.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!STOrErr)
+        report_error(MachOOF->getFileName(), STOrErr.takeError());
       SymbolRef::Type ST = *STOrErr;
       if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data)
         continue;
@@ -6740,13 +6668,8 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF,
       uint64_t NextSymIdx = SymIdx + 1;
       while (Symbols.size() > NextSymIdx) {
         Expected<SymbolRef::Type> STOrErr = Symbols[NextSymIdx].getType();
-        if (!STOrErr) {
-          std::string Buf;
-          raw_string_ostream OS(Buf);
-          logAllUnhandledErrors(STOrErr.takeError(), OS, "");
-          OS.flush();
-          report_fatal_error(Buf);
-        }
+        if (!STOrErr)
+          report_error(MachOOF->getFileName(), STOrErr.takeError());
         SymbolRef::Type NextSymType = *STOrErr;
         if (NextSymType == SymbolRef::ST_Function) {
           containsNextSym =
@@ -6992,13 +6915,8 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
                                       StringRef &Name, uint64_t &Addend) {
   if (Reloc.getSymbol() != Obj->symbol_end()) {
     Expected<StringRef> NameOrErr = Reloc.getSymbol()->getName();
-    if (!NameOrErr) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(NameOrErr.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!NameOrErr)
+      report_error(Obj->getFileName(), NameOrErr.takeError());
     Name = *NameOrErr;
     Addend = Addr;
     return;
@@ -7022,24 +6940,14 @@ static void findUnwindRelocNameAddend(const MachOObjectFile *Obj,
   --Sym;
 
   auto SectOrErr = Sym->second.getSection();
-  if (!SectOrErr) {
-    std::string Buf;
-    raw_string_ostream OS(Buf);
-    logAllUnhandledErrors(SectOrErr.takeError(), OS, "");
-    OS.flush();
-    report_fatal_error(Buf);
-  }
+  if (!SectOrErr)
+    report_error(Obj->getFileName(), SectOrErr.takeError());
   section_iterator SymSection = *SectOrErr;
   if (RelocSection == *SymSection) {
     // There's a valid symbol in the same section before this reference.
     Expected<StringRef> NameOrErr = Sym->second.getName();
-    if (!NameOrErr) {
-      std::string Buf;
-      raw_string_ostream OS(Buf);
-      logAllUnhandledErrors(NameOrErr.takeError(), OS, "");
-      OS.flush();
-      report_fatal_error(Buf);
-    }
+    if (!NameOrErr)
+      report_error(Obj->getFileName(), NameOrErr.takeError());
     Name = *NameOrErr;
     Addend = Addr - Sym->first;
     return;
@@ -7072,8 +6980,10 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
                                std::map<uint64_t, SymbolRef> &Symbols,
                                const SectionRef &CompactUnwind) {
 
-  assert(Obj->isLittleEndian() &&
-         "There should not be a big-endian .o with __compact_unwind");
+  if (!Obj->isLittleEndian()) {
+    outs() << "Skipping big-endian __compact_unwind section\n";
+    return;
+  }
 
   bool Is64 = Obj->is64Bit();
   uint32_t PointerSize = Is64 ? sizeof(uint64_t) : sizeof(uint32_t);
@@ -7105,8 +7015,10 @@ printMachOCompactUnwindSection(const MachOObjectFile *Obj,
       Entry.PersonalityReloc = Reloc;
     else if (OffsetInEntry == 2 * PointerSize + 2 * sizeof(uint32_t))
       Entry.LSDAReloc = Reloc;
-    else
-      llvm_unreachable("Unexpected relocation in __compact_unwind section");
+    else {
+      outs() << "Invalid relocation in __compact_unwind section\n";
+      return;
+    }
   }
 
   // Finally, we're ready to print the data we've gathered.
@@ -7212,8 +7124,10 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
                                         std::map<uint64_t, SymbolRef> &Symbols,
                                         const SectionRef &UnwindInfo) {
 
-  assert(Obj->isLittleEndian() &&
-         "There should not be a big-endian .o with __unwind_info");
+  if (!Obj->isLittleEndian()) {
+    outs() << "Skipping big-endian __unwind_info section\n";
+    return;
+  }
 
   outs() << "Contents of __unwind_info section:\n";
 
@@ -7228,7 +7142,10 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
   uint32_t Version = readNext<uint32_t>(Pos);
   outs() << "  Version:                                   "
          << format("0x%" PRIx32, Version) << '\n';
-  assert(Version == 1 && "only understand version 1");
+  if (Version != 1) {
+    outs() << "    Skipping section with unknown version\n";
+    return;
+  }
 
   uint32_t CommonEncodingsStart = readNext<uint32_t>(Pos);
   outs() << "  Common encodings array section offset:     "
@@ -7368,7 +7285,8 @@ static void printMachOUnwindInfoSection(const MachOObjectFile *Obj,
       printCompressedSecondLevelUnwindPage(Pos, IndexEntries[i].FunctionOffset,
                                            CommonEncodings);
     else
-      llvm_unreachable("Do not know how to print this kind of 2nd level page");
+      outs() << "    Skipping 2nd level page with unknown kind " << Kind
+             << '\n';
   }
 }
 
@@ -7735,9 +7653,9 @@ static void PrintSegmentCommand(uint32_t cmd, uint32_t cmdsize,
     if ((initprot &
          ~(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE |
            MachO::VM_PROT_EXECUTE)) != 0)
-      outs() << "  initprot ?" << format("0x%08" PRIx32, initprot) << "\n";
+      outs() << " initprot ?" << format("0x%08" PRIx32, initprot) << "\n";
     else {
-      outs() << "  initprot ";
+      outs() << " initprot ";
       outs() << ((initprot & MachO::VM_PROT_READ) ? "r" : "-");
       outs() << ((initprot & MachO::VM_PROT_WRITE) ? "w" : "-");
       outs() << ((initprot & MachO::VM_PROT_EXECUTE) ? "x\n" : "-\n");
@@ -8630,6 +8548,63 @@ static void Print_x86_exception_state_t(MachO::x86_exception_state64_t &exc64) {
   outs() << " faultvaddr " << format("0x%016" PRIx64, exc64.faultvaddr) << "\n";
 }
 
+static void Print_arm_thread_state32_t(MachO::arm_thread_state32_t &cpu32) {
+  outs() << "\t    r0  " << format("0x%08" PRIx32, cpu32.r[0]);
+  outs() << " r1     "   << format("0x%08" PRIx32, cpu32.r[1]);
+  outs() << " r2  "      << format("0x%08" PRIx32, cpu32.r[2]);
+  outs() << " r3  "      << format("0x%08" PRIx32, cpu32.r[3]) << "\n";
+  outs() << "\t    r4  " << format("0x%08" PRIx32, cpu32.r[4]);
+  outs() << " r5     "   << format("0x%08" PRIx32, cpu32.r[5]);
+  outs() << " r6  "      << format("0x%08" PRIx32, cpu32.r[6]);
+  outs() << " r7  "      << format("0x%08" PRIx32, cpu32.r[7]) << "\n";
+  outs() << "\t    r8  " << format("0x%08" PRIx32, cpu32.r[8]);
+  outs() << " r9     "   << format("0x%08" PRIx32, cpu32.r[9]);
+  outs() << " r10 "      << format("0x%08" PRIx32, cpu32.r[10]);
+  outs() << " r11 "      << format("0x%08" PRIx32, cpu32.r[11]) << "\n";
+  outs() << "\t    r12 " << format("0x%08" PRIx32, cpu32.r[12]);
+  outs() << " sp     "   << format("0x%08" PRIx32, cpu32.sp);
+  outs() << " lr  "      << format("0x%08" PRIx32, cpu32.lr);
+  outs() << " pc  "      << format("0x%08" PRIx32, cpu32.pc) << "\n";
+  outs() << "\t   cpsr " << format("0x%08" PRIx32, cpu32.cpsr) << "\n";
+}
+
+static void Print_arm_thread_state64_t(MachO::arm_thread_state64_t &cpu64) {
+  outs() << "\t    x0  " << format("0x%016" PRIx64, cpu64.x[0]);
+  outs() << " x1  "      << format("0x%016" PRIx64, cpu64.x[1]);
+  outs() << " x2  "      << format("0x%016" PRIx64, cpu64.x[2]) << "\n";
+  outs() << "\t    x3  " << format("0x%016" PRIx64, cpu64.x[3]);
+  outs() << " x4  "      << format("0x%016" PRIx64, cpu64.x[4]);
+  outs() << " x5  "      << format("0x%016" PRIx64, cpu64.x[5]) << "\n";
+  outs() << "\t    x6  " << format("0x%016" PRIx64, cpu64.x[6]);
+  outs() << " x7  "      << format("0x%016" PRIx64, cpu64.x[7]);
+  outs() << " x8  "      << format("0x%016" PRIx64, cpu64.x[8]) << "\n";
+  outs() << "\t    x9  " << format("0x%016" PRIx64, cpu64.x[9]);
+  outs() << " x10 "      << format("0x%016" PRIx64, cpu64.x[10]);
+  outs() << " x11 "      << format("0x%016" PRIx64, cpu64.x[11]) << "\n";
+  outs() << "\t    x12 " << format("0x%016" PRIx64, cpu64.x[12]);
+  outs() << " x13 "      << format("0x%016" PRIx64, cpu64.x[13]);
+  outs() << " x14 "      << format("0x%016" PRIx64, cpu64.x[14]) << "\n";
+  outs() << "\t    x15 " << format("0x%016" PRIx64, cpu64.x[15]);
+  outs() << " x16 "      << format("0x%016" PRIx64, cpu64.x[16]);
+  outs() << " x17 "      << format("0x%016" PRIx64, cpu64.x[17]) << "\n";
+  outs() << "\t    x18 " << format("0x%016" PRIx64, cpu64.x[18]);
+  outs() << " x19 "      << format("0x%016" PRIx64, cpu64.x[19]);
+  outs() << " x20 "      << format("0x%016" PRIx64, cpu64.x[20]) << "\n";
+  outs() << "\t    x21 " << format("0x%016" PRIx64, cpu64.x[21]);
+  outs() << " x22 "      << format("0x%016" PRIx64, cpu64.x[22]);
+  outs() << " x23 "      << format("0x%016" PRIx64, cpu64.x[23]) << "\n";
+  outs() << "\t    x24 " << format("0x%016" PRIx64, cpu64.x[24]);
+  outs() << " x25 "      << format("0x%016" PRIx64, cpu64.x[25]);
+  outs() << " x26 "      << format("0x%016" PRIx64, cpu64.x[26]) << "\n";
+  outs() << "\t    x27 " << format("0x%016" PRIx64, cpu64.x[27]);
+  outs() << " x28 "      << format("0x%016" PRIx64, cpu64.x[28]);
+  outs() << "  fp "      << format("0x%016" PRIx64, cpu64.fp) << "\n";
+  outs() << "\t     lr " << format("0x%016" PRIx64, cpu64.lr);
+  outs() << " sp  "      << format("0x%016" PRIx64, cpu64.sp);
+  outs() << "  pc "      << format("0x%016" PRIx64, cpu64.pc) << "\n";
+  outs() << "\t   cpsr " << format("0x%08"  PRIx32, cpu64.cpsr) << "\n";
+}
+
 static void PrintThreadCommand(MachO::thread_command t, const char *Ptr,
                                bool isLittleEndian, uint32_t cputype) {
   if (t.cmd == MachO::LC_THREAD)
@@ -8786,6 +8761,100 @@ static void PrintThreadCommand(MachO::thread_command t, const char *Ptr,
         begin += count * sizeof(uint32_t);
       }
     }
+  } else if (cputype == MachO::CPU_TYPE_ARM) {
+    while (begin < end) {
+      if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
+        memcpy((char *)&flavor, begin, sizeof(uint32_t));
+        begin += sizeof(uint32_t);
+      } else {
+        flavor = 0;
+        begin = end;
+      }
+      if (isLittleEndian != sys::IsLittleEndianHost)
+        sys::swapByteOrder(flavor);
+      if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
+        memcpy((char *)&count, begin, sizeof(uint32_t));
+        begin += sizeof(uint32_t);
+      } else {
+        count = 0;
+        begin = end;
+      }
+      if (isLittleEndian != sys::IsLittleEndianHost)
+        sys::swapByteOrder(count);
+      if (flavor == MachO::ARM_THREAD_STATE) {
+        outs() << "     flavor ARM_THREAD_STATE\n";
+        if (count == MachO::ARM_THREAD_STATE_COUNT)
+          outs() << "      count ARM_THREAD_STATE_COUNT\n";
+        else
+          outs() << "      count " << count
+                 << " (not ARM_THREAD_STATE_COUNT)\n";
+        MachO::arm_thread_state32_t cpu32;
+        left = end - begin;
+        if (left >= sizeof(MachO::arm_thread_state32_t)) {
+          memcpy(&cpu32, begin, sizeof(MachO::arm_thread_state32_t));
+          begin += sizeof(MachO::arm_thread_state32_t);
+        } else {
+          memset(&cpu32, '\0', sizeof(MachO::arm_thread_state32_t));
+          memcpy(&cpu32, begin, left);
+          begin += left;
+        }
+        if (isLittleEndian != sys::IsLittleEndianHost)
+          swapStruct(cpu32);
+        Print_arm_thread_state32_t(cpu32);
+      } else {
+        outs() << "     flavor " << flavor << " (unknown)\n";
+        outs() << "      count " << count << "\n";
+        outs() << "      state (unknown)\n";
+        begin += count * sizeof(uint32_t);
+      }
+    }
+  } else if (cputype == MachO::CPU_TYPE_ARM64) {
+    while (begin < end) {
+      if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
+        memcpy((char *)&flavor, begin, sizeof(uint32_t));
+        begin += sizeof(uint32_t);
+      } else {
+        flavor = 0;
+        begin = end;
+      }
+      if (isLittleEndian != sys::IsLittleEndianHost)
+        sys::swapByteOrder(flavor);
+      if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
+        memcpy((char *)&count, begin, sizeof(uint32_t));
+        begin += sizeof(uint32_t);
+      } else {
+        count = 0;
+        begin = end;
+      }
+      if (isLittleEndian != sys::IsLittleEndianHost)
+        sys::swapByteOrder(count);
+      if (flavor == MachO::ARM_THREAD_STATE64) {
+        outs() << "     flavor ARM_THREAD_STATE64\n";
+        if (count == MachO::ARM_THREAD_STATE64_COUNT)
+          outs() << "      count ARM_THREAD_STATE64_COUNT\n";
+        else
+          outs() << "      count " << count
+                 << " (not ARM_THREAD_STATE64_COUNT)\n";
+        MachO::arm_thread_state64_t cpu64;
+        left = end - begin;
+        if (left >= sizeof(MachO::arm_thread_state64_t)) {
+          memcpy(&cpu64, begin, sizeof(MachO::arm_thread_state64_t));
+          begin += sizeof(MachO::arm_thread_state64_t);
+        } else {
+          memset(&cpu64, '\0', sizeof(MachO::arm_thread_state64_t));
+          memcpy(&cpu64, begin, left);
+          begin += left;
+        }
+        if (isLittleEndian != sys::IsLittleEndianHost)
+          swapStruct(cpu64);
+        Print_arm_thread_state64_t(cpu64);
+      } else {
+        outs() << "     flavor " << flavor << " (unknown)\n";
+        outs() << "      count " << count << "\n";
+        outs() << "      state (unknown)\n";
+        begin += count * sizeof(uint32_t);
+      }
+    }
   } else {
     while (begin < end) {
       if (end - begin > (ptrdiff_t)sizeof(uint32_t)) {
diff --git a/contrib/llvm/tools/llvm-objdump/WasmDump.cpp b/contrib/llvm/tools/llvm-objdump/WasmDump.cpp
new file mode 100644
index 000000000000..0d8ffba6ba45
--- /dev/null
+++ b/contrib/llvm/tools/llvm-objdump/WasmDump.cpp
@@ -0,0 +1,28 @@
+//===-- WasmDump.cpp - wasm-specific dumper ---------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file implements the wasm-specific dumper for llvm-objdump.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm-objdump.h"
+#include "llvm/Object/Wasm.h"
+
+using namespace llvm;
+using namespace object;
+
+void llvm::printWasmFileHeader(const object::ObjectFile *Obj) {
+  const WasmObjectFile *File = dyn_cast<const WasmObjectFile>(Obj);
+
+  outs() << "Program Header:\n";
+  outs() << "Version: 0x";
+  outs().write_hex(File->getHeader().Version);
+  outs() << "\n";
+}
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
index ed55c918b58a..8373f0ce3f1e 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -23,6 +23,7 @@
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/FaultMaps.h"
 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/Symbolize/Symbolize.h"
 #include "llvm/MC/MCAsmInfo.h"
 #include "llvm/MC/MCContext.h"
 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
@@ -36,6 +37,7 @@
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFF.h"
+#include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/ELFObjectFile.h"
 #include "llvm/Object/MachO.h"
 #include "llvm/Object/ObjectFile.h"
@@ -60,6 +62,7 @@
 #include <cstring>
 #include <system_error>
 #include <utility>
+#include <unordered_map>
 
 using namespace llvm;
 using namespace object;
@@ -185,9 +188,29 @@ cl::opt<bool> PrintFaultMaps("fault-map-section",
 
 cl::opt<DIDumpType> llvm::DwarfDumpType(
     "dwarf", cl::init(DIDT_Null), cl::desc("Dump of dwarf debug sections:"),
-    cl::values(clEnumValN(DIDT_Frames, "frames", ".debug_frame"),
-               clEnumValEnd));
+    cl::values(clEnumValN(DIDT_Frames, "frames", ".debug_frame")));
 
+cl::opt<bool> PrintSource(
+    "source",
+    cl::desc(
+        "Display source inlined with disassembly. Implies disassmble object"));
+
+cl::alias PrintSourceShort("S", cl::desc("Alias for -source"),
+                           cl::aliasopt(PrintSource));
+
+cl::opt<bool> PrintLines("line-numbers",
+                         cl::desc("Display source line numbers with "
+                                  "disassembly. Implies disassemble object"));
+
+cl::alias PrintLinesShort("l", cl::desc("Alias for -line-numbers"),
+                          cl::aliasopt(PrintLines));
+
+cl::opt<unsigned long long>
+    StartAddress("start-address", cl::desc("Disassemble beginning at address"),
+                 cl::value_desc("address"), cl::init(0));
+cl::opt<unsigned long long>
+    StopAddress("stop-address", cl::desc("Stop disassembly at address"),
+                cl::value_desc("address"), cl::init(UINT64_MAX));
 static StringRef ToolName;
 
 namespace {
@@ -240,18 +263,17 @@ private:
   llvm::object::ObjectFile const &Object;
 };
 SectionFilter ToolSectionFilter(llvm::object::ObjectFile const &O) {
-  return SectionFilter([](llvm::object::SectionRef const &S) {
-                         if(FilterSections.empty())
-                           return true;
-                         llvm::StringRef String;
-                         std::error_code error = S.getName(String);
-                         if (error)
-                           return false;
-                         return std::find(FilterSections.begin(),
-                                          FilterSections.end(),
-                                          String) != FilterSections.end();
-                       },
-                       O);
+  return SectionFilter(
+      [](llvm::object::SectionRef const &S) {
+        if (FilterSections.empty())
+          return true;
+        llvm::StringRef String;
+        std::error_code error = S.getName(String);
+        if (error)
+          return false;
+        return is_contained(FilterSections, String);
+      },
+      O);
 }
 }
 
@@ -271,6 +293,12 @@ LLVM_ATTRIBUTE_NORETURN void llvm::error(Twine Message) {
 }
 
 LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
+                                                Twine Message) {
+  errs() << ToolName << ": '" << File << "': " << Message << ".\n";
+  exit(1);
+}
+
+LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef File,
                                                 std::error_code EC) {
   assert(EC);
   errs() << ToolName << ": '" << File << "': " << EC.message() << ".\n";
@@ -297,14 +325,14 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
   if (ArchiveName != "")
     errs() << ArchiveName << "(" << FileName << ")";
   else
-    errs() << FileName;
+    errs() << "'" << FileName << "'";
   if (!ArchitectureName.empty())
     errs() << " (for architecture " << ArchitectureName << ")";
   std::string Buf;
   raw_string_ostream OS(Buf);
   logAllUnhandledErrors(std::move(E), OS, "");
   OS.flush();
-  errs() << " " << Buf;
+  errs() << ": " << Buf;
   exit(1);
 }
 
@@ -312,13 +340,14 @@ LLVM_ATTRIBUTE_NORETURN void llvm::report_error(StringRef ArchiveName,
                                                 const object::Archive::Child &C,
                                                 llvm::Error E,
                                                 StringRef ArchitectureName) {
-  ErrorOr<StringRef> NameOrErr = C.getName();
+  Expected<StringRef> NameOrErr = C.getName();
   // TODO: if we have a error getting the name then it would be nice to print
   // the index of which archive member this is and or its offset in the
   // archive instead of "???" as the name.
-  if (NameOrErr.getError())
+  if (!NameOrErr) {
+    consumeError(NameOrErr.takeError());
     llvm::report_error(ArchiveName, "???", std::move(E), ArchitectureName);
-  else
+  } else
     llvm::report_error(ArchiveName, NameOrErr.get(), std::move(E),
                        ArchitectureName);
 }
@@ -347,8 +376,12 @@ static const Target *getTarget(const ObjectFile *Obj = nullptr) {
   std::string Error;
   const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
                                                          Error);
-  if (!TheTarget)
-    report_fatal_error("can't find target: " + Error);
+  if (!TheTarget) {
+    if (Obj)
+      report_error(Obj->getFileName(), "can't find target: " + Error);
+    else
+      error("can't find target: " + Error);
+  }
 
   // Update the triple name and return the found target.
   TripleName = TheTriple.getTriple();
@@ -360,13 +393,104 @@ bool llvm::RelocAddressLess(RelocationRef a, RelocationRef b) {
 }
 
 namespace {
+class SourcePrinter {
+protected:
+  DILineInfo OldLineInfo;
+  const ObjectFile *Obj;
+  std::unique_ptr<symbolize::LLVMSymbolizer> Symbolizer;
+  // File name to file contents of source
+  std::unordered_map<std::string, std::unique_ptr<MemoryBuffer>> SourceCache;
+  // Mark the line endings of the cached source
+  std::unordered_map<std::string, std::vector<StringRef>> LineCache;
+
+private:
+  bool cacheSource(std::string File);
+
+public:
+  virtual ~SourcePrinter() {}
+  SourcePrinter() : Obj(nullptr), Symbolizer(nullptr) {}
+  SourcePrinter(const ObjectFile *Obj, StringRef DefaultArch) : Obj(Obj) {
+    symbolize::LLVMSymbolizer::Options SymbolizerOpts(
+        DILineInfoSpecifier::FunctionNameKind::None, true, false, false,
+        DefaultArch);
+    Symbolizer.reset(new symbolize::LLVMSymbolizer(SymbolizerOpts));
+  }
+  virtual void printSourceLine(raw_ostream &OS, uint64_t Address,
+                               StringRef Delimiter = "; ");
+};
+
+bool SourcePrinter::cacheSource(std::string File) {
+  auto BufferOrError = MemoryBuffer::getFile(File);
+  if (!BufferOrError)
+    return false;
+  // Chomp the file to get lines
+  size_t BufferSize = (*BufferOrError)->getBufferSize();
+  const char *BufferStart = (*BufferOrError)->getBufferStart();
+  for (const char *Start = BufferStart, *End = BufferStart;
+       End < BufferStart + BufferSize; End++)
+    if (*End == '\n' || End == BufferStart + BufferSize - 1 ||
+        (*End == '\r' && *(End + 1) == '\n')) {
+      LineCache[File].push_back(StringRef(Start, End - Start));
+      if (*End == '\r')
+        End++;
+      Start = End + 1;
+    }
+  SourceCache[File] = std::move(*BufferOrError);
+  return true;
+}
+
+void SourcePrinter::printSourceLine(raw_ostream &OS, uint64_t Address,
+                                    StringRef Delimiter) {
+  if (!Symbolizer)
+    return;
+  DILineInfo LineInfo = DILineInfo();
+  auto ExpectecLineInfo =
+      Symbolizer->symbolizeCode(Obj->getFileName(), Address);
+  if (!ExpectecLineInfo)
+    consumeError(ExpectecLineInfo.takeError());
+  else
+    LineInfo = *ExpectecLineInfo;
+
+  if ((LineInfo.FileName == "<invalid>") || OldLineInfo.Line == LineInfo.Line ||
+      LineInfo.Line == 0)
+    return;
+
+  if (PrintLines)
+    OS << Delimiter << LineInfo.FileName << ":" << LineInfo.Line << "\n";
+  if (PrintSource) {
+    if (SourceCache.find(LineInfo.FileName) == SourceCache.end())
+      if (!cacheSource(LineInfo.FileName))
+        return;
+    auto FileBuffer = SourceCache.find(LineInfo.FileName);
+    if (FileBuffer != SourceCache.end()) {
+      auto LineBuffer = LineCache.find(LineInfo.FileName);
+      if (LineBuffer != LineCache.end())
+        // Vector begins at 0, line numbers are non-zero
+        OS << Delimiter << LineBuffer->second[LineInfo.Line - 1].ltrim()
+           << "\n";
+    }
+  }
+  OldLineInfo = LineInfo;
+}
+
+static bool isArmElf(const ObjectFile *Obj) {
+  return (Obj->isELF() &&
+          (Obj->getArch() == Triple::aarch64 ||
+           Obj->getArch() == Triple::aarch64_be ||
+           Obj->getArch() == Triple::arm || Obj->getArch() == Triple::armeb ||
+           Obj->getArch() == Triple::thumb ||
+           Obj->getArch() == Triple::thumbeb));
+}
+
 class PrettyPrinter {
 public:
   virtual ~PrettyPrinter(){}
   virtual void printInst(MCInstPrinter &IP, const MCInst *MI,
                          ArrayRef<uint8_t> Bytes, uint64_t Address,
                          raw_ostream &OS, StringRef Annot,
-                         MCSubtargetInfo const &STI) {
+                         MCSubtargetInfo const &STI, SourcePrinter *SP) {
+    if (SP && (PrintSource || PrintLines))
+      SP->printSourceLine(OS, Address);
     OS << format("%8" PRIx64 ":", Address);
     if (!NoShowRawInsn) {
       OS << "\t";
@@ -392,10 +516,11 @@ public:
       OS << format("%08" PRIx32, opcode);
     }
   }
-  void printInst(MCInstPrinter &IP, const MCInst *MI,
-                 ArrayRef<uint8_t> Bytes, uint64_t Address,
-                 raw_ostream &OS, StringRef Annot,
-                 MCSubtargetInfo const &STI) override {
+  void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
+                 uint64_t Address, raw_ostream &OS, StringRef Annot,
+                 MCSubtargetInfo const &STI, SourcePrinter *SP) override {
+    if (SP && (PrintSource || PrintLines))
+      SP->printSourceLine(OS, Address, "");
     if (!MI) {
       printLead(Bytes, Address, OS);
       OS << " <unknown>";
@@ -416,6 +541,8 @@ public:
     while(!HeadTail.first.empty()) {
       OS << Separator;
       Separator = "\n";
+      if (SP && (PrintSource || PrintLines))
+        SP->printSourceLine(OS, Address, "");
       printLead(Bytes, Address, OS);
       OS << Preamble;
       Preamble = "   ";
@@ -440,13 +567,9 @@ HexagonPrettyPrinter HexagonPrettyPrinterInst;
 
 class AMDGCNPrettyPrinter : public PrettyPrinter {
 public:
-  void printInst(MCInstPrinter &IP,
-                 const MCInst *MI,
-                 ArrayRef<uint8_t> Bytes,
-                 uint64_t Address,
-                 raw_ostream &OS,
-                 StringRef Annot,
-                 MCSubtargetInfo const &STI) override {
+  void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
+                 uint64_t Address, raw_ostream &OS, StringRef Annot,
+                 MCSubtargetInfo const &STI, SourcePrinter *SP) override {
     if (!MI) {
       OS << " <unknown>";
       return;
@@ -471,6 +594,26 @@ public:
 };
 AMDGCNPrettyPrinter AMDGCNPrettyPrinterInst;
 
+class BPFPrettyPrinter : public PrettyPrinter {
+public:
+  void printInst(MCInstPrinter &IP, const MCInst *MI, ArrayRef<uint8_t> Bytes,
+                 uint64_t Address, raw_ostream &OS, StringRef Annot,
+                 MCSubtargetInfo const &STI, SourcePrinter *SP) override {
+    if (SP && (PrintSource || PrintLines))
+      SP->printSourceLine(OS, Address);
+    OS << format("%8" PRId64 ":", Address / 8);
+    if (!NoShowRawInsn) {
+      OS << "\t";
+      dumpBytes(Bytes, OS);
+    }
+    if (MI)
+      IP.printInst(MI, OS, "", STI);
+    else
+      OS << " <unknown>";
+  }
+};
+BPFPrettyPrinter BPFPrettyPrinterInst;
+
 PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
   switch(Triple.getArch()) {
   default:
@@ -479,6 +622,9 @@ PrettyPrinter &selectPrettyPrinter(Triple const &Triple) {
     return HexagonPrettyPrinterInst;
   case Triple::amdgcn:
     return AMDGCNPrettyPrinterInst;
+  case Triple::bpfel:
+  case Triple::bpfeb:
+    return BPFPrettyPrinterInst;
   }
 }
 }
@@ -495,22 +641,22 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
 
   const ELFFile<ELFT> &EF = *Obj->getELFFile();
 
-  ErrorOr<const Elf_Shdr *> SecOrErr = EF.getSection(Rel.d.a);
-  if (std::error_code EC = SecOrErr.getError())
-    return EC;
+  auto SecOrErr = EF.getSection(Rel.d.a);
+  if (!SecOrErr)
+    return errorToErrorCode(SecOrErr.takeError());
   const Elf_Shdr *Sec = *SecOrErr;
-  ErrorOr<const Elf_Shdr *> SymTabOrErr = EF.getSection(Sec->sh_link);
-  if (std::error_code EC = SymTabOrErr.getError())
-    return EC;
+  auto SymTabOrErr = EF.getSection(Sec->sh_link);
+  if (!SymTabOrErr)
+    return errorToErrorCode(SymTabOrErr.takeError());
   const Elf_Shdr *SymTab = *SymTabOrErr;
   assert(SymTab->sh_type == ELF::SHT_SYMTAB ||
          SymTab->sh_type == ELF::SHT_DYNSYM);
-  ErrorOr<const Elf_Shdr *> StrTabSec = EF.getSection(SymTab->sh_link);
-  if (std::error_code EC = StrTabSec.getError())
-    return EC;
-  ErrorOr<StringRef> StrTabOrErr = EF.getStringTable(*StrTabSec);
-  if (std::error_code EC = StrTabOrErr.getError())
-    return EC;
+  auto StrTabSec = EF.getSection(SymTab->sh_link);
+  if (!StrTabSec)
+    return errorToErrorCode(StrTabSec.takeError());
+  auto StrTabOrErr = EF.getStringTable(*StrTabSec);
+  if (!StrTabOrErr)
+    return errorToErrorCode(StrTabOrErr.takeError());
   StringRef StrTab = *StrTabOrErr;
   uint8_t type = RelRef.getType();
   StringRef res;
@@ -536,9 +682,9 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
     if (!SymSI)
       return errorToErrorCode(SymSI.takeError());
     const Elf_Shdr *SymSec = Obj->getSection((*SymSI)->getRawDataRefImpl());
-    ErrorOr<StringRef> SecName = EF.getSectionName(SymSec);
-    if (std::error_code EC = SecName.getError())
-      return EC;
+    auto SecName = EF.getSectionName(SymSec);
+    if (!SecName)
+      return errorToErrorCode(SecName.takeError());
     Target = *SecName;
   } else {
     Expected<StringRef> SymName = symb->getName(StrTab);
@@ -574,6 +720,7 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
     }
     break;
   case ELF::EM_LANAI:
+  case ELF::EM_AVR:
   case ELF::EM_AARCH64: {
     std::string fmtbuf;
     raw_string_ostream fmt(fmtbuf);
@@ -590,6 +737,7 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
   case ELF::EM_HEXAGON:
   case ELF::EM_MIPS:
   case ELF::EM_BPF:
+  case ELF::EM_RISCV:
     res = Target;
     break;
   case ELF::EM_WEBASSEMBLY:
@@ -657,23 +805,13 @@ static void printRelocationTargetName(const MachOObjectFile *O,
     for (const SymbolRef &Symbol : O->symbols()) {
       std::error_code ec;
       Expected<uint64_t> Addr = Symbol.getAddress();
-      if (!Addr) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(Addr.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!Addr)
+        report_error(O->getFileName(), Addr.takeError());
       if (*Addr != Val)
         continue;
       Expected<StringRef> Name = Symbol.getName();
-      if (!Name) {
-        std::string Buf;
-        raw_string_ostream OS(Buf);
-        logAllUnhandledErrors(Name.takeError(), OS, "");
-        OS.flush();
-        report_fatal_error(Buf);
-      }
+      if (!Name)
+        report_error(O->getFileName(), Name.takeError());
       fmt << *Name;
       return;
     }
@@ -688,7 +826,7 @@ static void printRelocationTargetName(const MachOObjectFile *O,
       if (Addr != Val)
         continue;
       if ((ec = Section.getName(Name)))
-        report_fatal_error(ec.message());
+        report_error(O->getFileName(), ec);
       fmt << Name;
       return;
     }
@@ -705,7 +843,8 @@ static void printRelocationTargetName(const MachOObjectFile *O,
     symbol_iterator SI = O->symbol_begin();
     advance(SI, Val);
     Expected<StringRef> SOrErr = SI->getName();
-    error(errorToErrorCode(SOrErr.takeError()));
+    if (!SOrErr)
+      report_error(O->getFileName(), SOrErr.takeError());
     S = *SOrErr;
   } else {
     section_iterator SI = O->section_begin();
@@ -756,8 +895,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
       // NOTE: Scattered relocations don't exist on x86_64.
       unsigned RType = Obj->getAnyRelocationType(RENext);
       if (RType != MachO::X86_64_RELOC_UNSIGNED)
-        report_fatal_error("Expected X86_64_RELOC_UNSIGNED after "
-                           "X86_64_RELOC_SUBTRACTOR.");
+        report_error(Obj->getFileName(), "Expected X86_64_RELOC_UNSIGNED after "
+                     "X86_64_RELOC_SUBTRACTOR.");
 
       // The X86_64_RELOC_UNSIGNED contains the minuend symbol;
       // X86_64_RELOC_SUBTRACTOR contains the subtrahend.
@@ -805,8 +944,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
       unsigned RType = Obj->getAnyRelocationType(RENext);
 
       if (RType != MachO::GENERIC_RELOC_PAIR)
-        report_fatal_error("Expected GENERIC_RELOC_PAIR after "
-                           "GENERIC_RELOC_SECTDIFF.");
+        report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+                     "GENERIC_RELOC_SECTDIFF.");
 
       printRelocationTargetName(Obj, RE, fmt);
       fmt << "-";
@@ -826,8 +965,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
         // GENERIC_RELOC_PAIR.
         unsigned RType = Obj->getAnyRelocationType(RENext);
         if (RType != MachO::GENERIC_RELOC_PAIR)
-          report_fatal_error("Expected GENERIC_RELOC_PAIR after "
-                             "GENERIC_RELOC_LOCAL_SECTDIFF.");
+          report_error(Obj->getFileName(), "Expected GENERIC_RELOC_PAIR after "
+                       "GENERIC_RELOC_LOCAL_SECTDIFF.");
 
         printRelocationTargetName(Obj, RE, fmt);
         fmt << "-";
@@ -866,8 +1005,8 @@ static std::error_code getRelocationValueString(const MachOObjectFile *Obj,
         // ARM_RELOC_PAIR.
         unsigned RType = Obj->getAnyRelocationType(RENext);
         if (RType != MachO::ARM_RELOC_PAIR)
-          report_fatal_error("Expected ARM_RELOC_PAIR after "
-                             "ARM_RELOC_HALF");
+          report_error(Obj->getFileName(), "Expected ARM_RELOC_PAIR after "
+                       "ARM_RELOC_HALF");
 
         // NOTE: The half of the target virtual address is stashed in the
         // address field of the secondary relocation, but we can't reverse
@@ -939,7 +1078,23 @@ static bool getHidden(RelocationRef RelRef) {
   return false;
 }
 
+static uint8_t getElfSymbolType(const ObjectFile *Obj, const SymbolRef &Sym) {
+  assert(Obj->isELF());
+  if (auto *Elf32LEObj = dyn_cast<ELF32LEObjectFile>(Obj))
+    return Elf32LEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+  if (auto *Elf64LEObj = dyn_cast<ELF64LEObjectFile>(Obj))
+    return Elf64LEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+  if (auto *Elf32BEObj = dyn_cast<ELF32BEObjectFile>(Obj))
+    return Elf32BEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+  if (auto *Elf64BEObj = cast<ELF64BEObjectFile>(Obj))
+    return Elf64BEObj->getSymbol(Sym.getRawDataRefImpl())->getType();
+  llvm_unreachable("Unsupported binary format");
+}
+
 static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
+  if (StartAddress > StopAddress)
+    error("Start address should be less than stop address");
+
   const Target *TheTarget = getTarget(Obj);
 
   // Package up features to be passed to target/subtarget
@@ -952,27 +1107,34 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   std::unique_ptr<const MCRegisterInfo> MRI(
       TheTarget->createMCRegInfo(TripleName));
   if (!MRI)
-    report_fatal_error("error: no register info for target " + TripleName);
+    report_error(Obj->getFileName(), "no register info for target " +
+                 TripleName);
 
   // Set up disassembler.
   std::unique_ptr<const MCAsmInfo> AsmInfo(
       TheTarget->createMCAsmInfo(*MRI, TripleName));
   if (!AsmInfo)
-    report_fatal_error("error: no assembly info for target " + TripleName);
+    report_error(Obj->getFileName(), "no assembly info for target " +
+                 TripleName);
   std::unique_ptr<const MCSubtargetInfo> STI(
       TheTarget->createMCSubtargetInfo(TripleName, MCPU, Features.getString()));
   if (!STI)
-    report_fatal_error("error: no subtarget info for target " + TripleName);
+    report_error(Obj->getFileName(), "no subtarget info for target " +
+                 TripleName);
   std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
   if (!MII)
-    report_fatal_error("error: no instruction info for target " + TripleName);
-  std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
-  MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
+    report_error(Obj->getFileName(), "no instruction info for target " +
+                 TripleName);
+  MCObjectFileInfo MOFI;
+  MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
+  // FIXME: for now initialize MCObjectFileInfo with default values
+  MOFI.InitMCObjectFileInfo(Triple(TripleName), false, CodeModel::Default, Ctx);
 
   std::unique_ptr<MCDisassembler> DisAsm(
     TheTarget->createMCDisassembler(*STI, Ctx));
   if (!DisAsm)
-    report_fatal_error("error: no disassembler for target " + TripleName);
+    report_error(Obj->getFileName(), "no disassembler for target " +
+                 TripleName);
 
   std::unique_ptr<const MCInstrAnalysis> MIA(
       TheTarget->createMCInstrAnalysis(MII.get()));
@@ -981,14 +1143,16 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
   std::unique_ptr<MCInstPrinter> IP(TheTarget->createMCInstPrinter(
       Triple(TripleName), AsmPrinterVariant, *AsmInfo, *MII, *MRI));
   if (!IP)
-    report_fatal_error("error: no instruction printer for target " +
-                       TripleName);
+    report_error(Obj->getFileName(), "no instruction printer for target " +
+                 TripleName);
   IP->setPrintImmHex(PrintImmHex);
   PrettyPrinter &PIP = selectPrettyPrinter(Triple(TripleName));
 
   StringRef Fmt = Obj->getBytesInAddress() > 4 ? "\t\t%016" PRIx64 ":  " :
                                                  "\t\t\t%08" PRIx64 ":  ";
 
+  SourcePrinter SP(Obj, TheTarget->getName());
+
   // Create a mapping, RelocSecs = SectionRelocMap[S], where sections
   // in RelocSecs contain the relocations for section S.
   std::error_code EC;
@@ -1001,25 +1165,33 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
 
   // Create a mapping from virtual address to symbol name.  This is used to
   // pretty print the symbols while disassembling.
-  typedef std::vector<std::pair<uint64_t, StringRef>> SectionSymbolsTy;
+  typedef std::vector<std::tuple<uint64_t, StringRef, uint8_t>> SectionSymbolsTy;
   std::map<SectionRef, SectionSymbolsTy> AllSymbols;
   for (const SymbolRef &Symbol : Obj->symbols()) {
     Expected<uint64_t> AddressOrErr = Symbol.getAddress();
-    error(errorToErrorCode(AddressOrErr.takeError()));
+    if (!AddressOrErr)
+      report_error(Obj->getFileName(), AddressOrErr.takeError());
     uint64_t Address = *AddressOrErr;
 
     Expected<StringRef> Name = Symbol.getName();
-    error(errorToErrorCode(Name.takeError()));
+    if (!Name)
+      report_error(Obj->getFileName(), Name.takeError());
     if (Name->empty())
       continue;
 
     Expected<section_iterator> SectionOrErr = Symbol.getSection();
-    error(errorToErrorCode(SectionOrErr.takeError()));
+    if (!SectionOrErr)
+      report_error(Obj->getFileName(), SectionOrErr.takeError());
     section_iterator SecI = *SectionOrErr;
     if (SecI == Obj->section_end())
       continue;
 
-    AllSymbols[*SecI].emplace_back(Address, *Name);
+    uint8_t SymbolType = ELF::STT_NOTYPE;
+    if (Obj->isELF())
+      SymbolType = getElfSymbolType(Obj, Symbol);
+
+    AllSymbols[*SecI].emplace_back(Address, *Name, SymbolType);
+
   }
 
   // Create a mapping from virtual address to section.
@@ -1051,7 +1223,7 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
         Sec = SectionAddresses.end();
 
       if (Sec != SectionAddresses.end())
-        AllSymbols[Sec->second].emplace_back(VA, Name);
+        AllSymbols[Sec->second].emplace_back(VA, Name, ELF::STT_NOTYPE);
     }
   }
 
@@ -1073,20 +1245,36 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     SectionSymbolsTy &Symbols = AllSymbols[Section];
     std::vector<uint64_t> DataMappingSymsAddr;
     std::vector<uint64_t> TextMappingSymsAddr;
-    if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+    if (isArmElf(Obj)) {
       for (const auto &Symb : Symbols) {
-        uint64_t Address = Symb.first;
-        StringRef Name = Symb.second;
+        uint64_t Address = std::get<0>(Symb);
+        StringRef Name = std::get<1>(Symb);
         if (Name.startswith("$d"))
           DataMappingSymsAddr.push_back(Address - SectionAddr);
         if (Name.startswith("$x"))
           TextMappingSymsAddr.push_back(Address - SectionAddr);
+        if (Name.startswith("$a"))
+          TextMappingSymsAddr.push_back(Address - SectionAddr);
+        if (Name.startswith("$t"))
+          TextMappingSymsAddr.push_back(Address - SectionAddr);
       }
     }
 
     std::sort(DataMappingSymsAddr.begin(), DataMappingSymsAddr.end());
     std::sort(TextMappingSymsAddr.begin(), TextMappingSymsAddr.end());
 
+    if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
+      // AMDGPU disassembler uses symbolizer for printing labels
+      std::unique_ptr<MCRelocationInfo> RelInfo(
+        TheTarget->createMCRelocationInfo(TripleName, Ctx));
+      if (RelInfo) {
+        std::unique_ptr<MCSymbolizer> Symbolizer(
+          TheTarget->createMCSymbolizer(
+            TripleName, nullptr, nullptr, &Symbols, &Ctx, std::move(RelInfo)));
+        DisAsm->setSymbolizer(std::move(Symbolizer));
+      }
+    }
+
     // Make a list of all the relocations for this section.
     std::vector<RelocationRef> Rels;
     if (InlineRelocs) {
@@ -1107,14 +1295,22 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     }
     StringRef name;
     error(Section.getName(name));
+
+    if ((SectionAddr <= StopAddress) &&
+        (SectionAddr + SectSize) >= StartAddress) {
     outs() << "Disassembly of section ";
     if (!SegmentName.empty())
       outs() << SegmentName << ",";
     outs() << name << ':';
+    }
 
     // If the section has no symbol at the start, just insert a dummy one.
-    if (Symbols.empty() || Symbols[0].first != 0)
-      Symbols.insert(Symbols.begin(), std::make_pair(SectionAddr, name));
+    if (Symbols.empty() || std::get<0>(Symbols[0]) != 0) {
+      Symbols.insert(Symbols.begin(),
+                     std::make_tuple(SectionAddr, name, Section.isText()
+                                                            ? ELF::STT_FUNC
+                                                            : ELF::STT_OBJECT));
+    }
 
     SmallString<40> Comments;
     raw_svector_ostream CommentStream(Comments);
@@ -1131,12 +1327,11 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
     std::vector<RelocationRef>::const_iterator rel_end = Rels.end();
     // Disassemble symbol by symbol.
     for (unsigned si = 0, se = Symbols.size(); si != se; ++si) {
-
-      uint64_t Start = Symbols[si].first - SectionAddr;
+      uint64_t Start = std::get<0>(Symbols[si]) - SectionAddr;
       // The end is either the section end or the beginning of the next
       // symbol.
       uint64_t End =
-          (si == se - 1) ? SectSize : Symbols[si + 1].first - SectionAddr;
+          (si == se - 1) ? SectSize : std::get<0>(Symbols[si + 1]) - SectionAddr;
       // Don't try to disassemble beyond the end of section contents.
       if (End > SectSize)
         End = SectSize;
@@ -1144,19 +1339,37 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       if (Start >= End)
         continue;
 
+      // Check if we need to skip symbol
+      // Skip if the symbol's data is not between StartAddress and StopAddress
+      if (End + SectionAddr < StartAddress ||
+          Start + SectionAddr > StopAddress) {
+        continue;
+      }
+
+      // Stop disassembly at the stop address specified
+      if (End + SectionAddr > StopAddress)
+        End = StopAddress - SectionAddr;
+
       if (Obj->isELF() && Obj->getArch() == Triple::amdgcn) {
         // make size 4 bytes folded
         End = Start + ((End - Start) & ~0x3ull);
-        Start += 256; // add sizeof(amd_kernel_code_t)
-        // cut trailing zeroes - up to 256 bytes (align)
-        const uint64_t EndAlign = 256;
-        const auto Limit = End - (std::min)(EndAlign, End - Start);
-        while (End > Limit &&
-          *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0)
-          End -= 4;
+        if (std::get<2>(Symbols[si]) == ELF::STT_AMDGPU_HSA_KERNEL) {
+          // skip amd_kernel_code_t at the begining of kernel symbol (256 bytes)
+          Start += 256;
+        }
+        if (si == se - 1 ||
+            std::get<2>(Symbols[si + 1]) == ELF::STT_AMDGPU_HSA_KERNEL) {
+          // cut trailing zeroes at the end of kernel
+          // cut up to 256 bytes
+          const uint64_t EndAlign = 256;
+          const auto Limit = End - (std::min)(EndAlign, End - Start);
+          while (End > Limit &&
+            *reinterpret_cast<const support::ulittle32_t*>(&Bytes[End - 4]) == 0)
+            End -= 4;
+        }
       }
 
-      outs() << '\n' << Symbols[si].second << ":\n";
+      outs() << '\n' << std::get<1>(Symbols[si]) << ":\n";
 
 #ifndef NDEBUG
       raw_ostream &DebugOut = DebugFlag ? dbgs() : nulls();
@@ -1167,10 +1380,18 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
       for (Index = Start; Index < End; Index += Size) {
         MCInst Inst;
 
+        if (Index + SectionAddr < StartAddress ||
+            Index + SectionAddr > StopAddress) {
+          // skip byte by byte till StartAddress is reached
+          Size = 1;
+          continue;
+        }
         // AArch64 ELF binaries can interleave data and text in the
         // same section. We rely on the markers introduced to
-        // understand what we need to dump.
-        if (Obj->isELF() && Obj->getArch() == Triple::aarch64) {
+        // understand what we need to dump. If the data marker is within a
+        // function, it is denoted as a word/short etc
+        if (isArmElf(Obj) && std::get<2>(Symbols[si]) != ELF::STT_OBJECT &&
+            !DisassembleAll) {
           uint64_t Stride = 0;
 
           auto DAI = std::lower_bound(DataMappingSymsAddr.begin(),
@@ -1183,15 +1404,41 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
               if (Index + 4 <= End) {
                 Stride = 4;
                 dumpBytes(Bytes.slice(Index, 4), outs());
-                outs() << "\t.word";
+                outs() << "\t.word\t";
+                uint32_t Data = 0;
+                if (Obj->isLittleEndian()) {
+                  const auto Word =
+                      reinterpret_cast<const support::ulittle32_t *>(
+                          Bytes.data() + Index);
+                  Data = *Word;
+                } else {
+                  const auto Word = reinterpret_cast<const support::ubig32_t *>(
+                      Bytes.data() + Index);
+                  Data = *Word;
+                }
+                outs() << "0x" << format("%08" PRIx32, Data);
               } else if (Index + 2 <= End) {
                 Stride = 2;
                 dumpBytes(Bytes.slice(Index, 2), outs());
-                outs() << "\t.short";
+                outs() << "\t\t.short\t";
+                uint16_t Data = 0;
+                if (Obj->isLittleEndian()) {
+                  const auto Short =
+                      reinterpret_cast<const support::ulittle16_t *>(
+                          Bytes.data() + Index);
+                  Data = *Short;
+                } else {
+                  const auto Short =
+                      reinterpret_cast<const support::ubig16_t *>(Bytes.data() +
+                                                                  Index);
+                  Data = *Short;
+                }
+                outs() << "0x" << format("%04" PRIx16, Data);
               } else {
                 Stride = 1;
                 dumpBytes(Bytes.slice(Index, 1), outs());
-                outs() << "\t.byte";
+                outs() << "\t\t.byte\t";
+                outs() << "0x" << format("%02" PRIx8, Bytes.slice(Index, 1)[0]);
               }
               Index += Stride;
               outs() << "\n";
@@ -1203,17 +1450,62 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
           }
         }
 
+        // If there is a data symbol inside an ELF text section and we are only
+        // disassembling text (applicable all architectures),
+        // we are in a situation where we must print the data and not
+        // disassemble it.
+        if (Obj->isELF() && std::get<2>(Symbols[si]) == ELF::STT_OBJECT &&
+            !DisassembleAll && Section.isText()) {
+          // print out data up to 8 bytes at a time in hex and ascii
+          uint8_t AsciiData[9] = {'\0'};
+          uint8_t Byte;
+          int NumBytes = 0;
+
+          for (Index = Start; Index < End; Index += 1) {
+            if (((SectionAddr + Index) < StartAddress) ||
+                ((SectionAddr + Index) > StopAddress))
+              continue;
+            if (NumBytes == 0) {
+              outs() << format("%8" PRIx64 ":", SectionAddr + Index);
+              outs() << "\t";
+            }
+            Byte = Bytes.slice(Index)[0];
+            outs() << format(" %02x", Byte);
+            AsciiData[NumBytes] = isprint(Byte) ? Byte : '.';
+
+            uint8_t IndentOffset = 0;
+            NumBytes++;
+            if (Index == End - 1 || NumBytes > 8) {
+              // Indent the space for less than 8 bytes data.
+              // 2 spaces for byte and one for space between bytes
+              IndentOffset = 3 * (8 - NumBytes);
+              for (int Excess = 8 - NumBytes; Excess < 8; Excess++)
+                AsciiData[Excess] = '\0';
+              NumBytes = 8;
+            }
+            if (NumBytes == 8) {
+              AsciiData[8] = '\0';
+              outs() << std::string(IndentOffset, ' ') << "         ";
+              outs() << reinterpret_cast<char *>(AsciiData);
+              outs() << '\n';
+              NumBytes = 0;
+            }
+          }
+        }
         if (Index >= End)
           break;
 
+        // Disassemble a real instruction or a data when disassemble all is
+        // provided
         bool Disassembled = DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
                                                    SectionAddr + Index, DebugOut,
                                                    CommentStream);
         if (Size == 0)
           Size = 1;
+
         PIP.printInst(*IP, Disassembled ? &Inst : nullptr,
-                      Bytes.slice(Index, Size),
-                      SectionAddr + Index, outs(), "", *STI);
+                      Bytes.slice(Index, Size), SectionAddr + Index, outs(), "",
+                      *STI, &SP);
         outs() << CommentStream.str();
         Comments.clear();
 
@@ -1252,8 +1544,8 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
               auto TargetSym = std::upper_bound(
                   TargetSectionSymbols->begin(), TargetSectionSymbols->end(),
                   Target, [](uint64_t LHS,
-                              const std::pair<uint64_t, StringRef> &RHS) {
-                    return LHS < RHS.first;
+                             const std::tuple<uint64_t, StringRef, uint8_t> &RHS) {
+                    return LHS < std::get<0>(RHS);
                   });
               if (TargetSym != TargetSectionSymbols->begin()) {
                 --TargetSym;
@@ -1278,7 +1570,10 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
           SmallString<32> val;
 
           // If this relocation is hidden, skip it.
-          if (hidden) goto skip_print_rel;
+          if (hidden || ((SectionAddr + addr) < StartAddress)) {
+            ++rel_cur;
+            continue;
+          }
 
           // Stop when rel_cur's address is past the current instruction.
           if (addr >= Index + Size) break;
@@ -1286,8 +1581,6 @@ static void DisassembleObject(const ObjectFile *Obj, bool InlineRelocs) {
           error(getRelocationValueString(*rel_cur, val));
           outs() << format(Fmt.data(), SectionAddr + addr) << name
                  << "\t" << val << "\n";
-
-        skip_print_rel:
           ++rel_cur;
         }
       }
@@ -1314,7 +1607,7 @@ void llvm::PrintRelocations(const ObjectFile *Obj) {
       uint64_t address = Reloc.getOffset();
       SmallString<32> relocname;
       SmallString<32> valuestr;
-      if (hidden)
+      if (address < StartAddress || address > StopAddress || hidden)
         continue;
       Reloc.getTypeName(relocname);
       error(getRelocationValueString(Reloc, valuestr));
@@ -1403,15 +1696,21 @@ void llvm::PrintSymbolTable(const ObjectFile *o, StringRef ArchiveName,
   for (const SymbolRef &Symbol : o->symbols()) {
     Expected<uint64_t> AddressOrError = Symbol.getAddress();
     if (!AddressOrError)
-      report_error(ArchiveName, o->getFileName(), AddressOrError.takeError());
+      report_error(ArchiveName, o->getFileName(), AddressOrError.takeError(),
+                   ArchitectureName);
     uint64_t Address = *AddressOrError;
+    if ((Address < StartAddress) || (Address > StopAddress))
+      continue;
     Expected<SymbolRef::Type> TypeOrError = Symbol.getType();
     if (!TypeOrError)
-      report_error(ArchiveName, o->getFileName(), TypeOrError.takeError());
+      report_error(ArchiveName, o->getFileName(), TypeOrError.takeError(),
+                   ArchitectureName);
     SymbolRef::Type Type = *TypeOrError;
     uint32_t Flags = Symbol.getFlags();
     Expected<section_iterator> SectionOrErr = Symbol.getSection();
-    error(errorToErrorCode(SectionOrErr.takeError()));
+    if (!SectionOrErr)
+      report_error(ArchiveName, o->getFileName(), SectionOrErr.takeError(),
+                   ArchitectureName);
     section_iterator Section = *SectionOrErr;
     StringRef Name;
     if (Type == SymbolRef::ST_Debug && Section != o->section_end()) {
@@ -1629,27 +1928,20 @@ static void printFaultMaps(const ObjectFile *Obj) {
   outs() << FMP;
 }
 
-static void printPrivateFileHeaders(const ObjectFile *o) {
-  if (o->isELF())
-    printELFFileHeader(o);
-  else if (o->isCOFF())
-    printCOFFFileHeader(o);
-  else if (o->isMachO()) {
-    printMachOFileHeader(o);
-    printMachOLoadCommands(o);
-  } else
-    report_fatal_error("Invalid/Unsupported object file format");
-}
-
-static void printFirstPrivateFileHeader(const ObjectFile *o) {
+static void printPrivateFileHeaders(const ObjectFile *o, bool onlyFirst) {
   if (o->isELF())
-    printELFFileHeader(o);
-  else if (o->isCOFF())
-    printCOFFFileHeader(o);
-  else if (o->isMachO())
+    return printELFFileHeader(o);
+  if (o->isCOFF())
+    return printCOFFFileHeader(o);
+  if (o->isWasm())
+    return printWasmFileHeader(o);
+  if (o->isMachO()) {
     printMachOFileHeader(o);
-  else
-    report_fatal_error("Invalid/Unsupported object file format");
+    if (!onlyFirst)
+      printMachOLoadCommands(o);
+    return;
+  }
+  report_error(o->getFileName(), "Invalid/Unsupported object file format");
 }
 
 static void DumpObject(const ObjectFile *o, const Archive *a = nullptr) {
@@ -1676,10 +1968,8 @@ static void DumpObject(const ObjectFile *o, const Archive *a = nullptr) {
     PrintSymbolTable(o, ArchiveName);
   if (UnwindInfo)
     PrintUnwindInfo(o);
-  if (PrivateHeaders)
-    printPrivateFileHeaders(o);
-  if (FirstPrivateHeader)
-    printFirstPrivateFileHeader(o);
+  if (PrivateHeaders || FirstPrivateHeader)
+    printPrivateFileHeaders(o, FirstPrivateHeader);
   if (ExportsTrie)
     printExportsTrie(o);
   if (Rebase)
@@ -1701,9 +1991,23 @@ static void DumpObject(const ObjectFile *o, const Archive *a = nullptr) {
   }
 }
 
+static void DumpObject(const COFFImportFile *I, const Archive *A) {
+  StringRef ArchiveName = A ? A->getFileName() : "";
+
+  // Avoid other output when using a raw option.
+  if (!RawClangAST)
+    outs() << '\n'
+           << ArchiveName << "(" << I->getFileName() << ")"
+           << ":\tfile format COFF-import-file"
+           << "\n\n";
+
+  if (SymbolTable)
+    printCOFFSymbolTable(I);
+}
+
 /// @brief Dump each object file in \a a;
 static void DumpArchive(const Archive *a) {
-  Error Err;
+  Error Err = Error::success();
   for (auto &C : a->children(Err)) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = C.getAsBinary();
     if (!ChildOrErr) {
@@ -1713,6 +2017,8 @@ static void DumpArchive(const Archive *a) {
     }
     if (ObjectFile *o = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
       DumpObject(o, a);
+    else if (COFFImportFile *I = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
+      DumpObject(I, a);
     else
       report_error(a->getFileName(), object_error::invalid_file_type);
   }
@@ -1768,7 +2074,7 @@ int main(int argc, char **argv) {
   if (InputFilenames.size() == 0)
     InputFilenames.push_back("a.out");
 
-  if (DisassembleAll)
+  if (DisassembleAll || PrintSource || PrintLines)
     Disassemble = true;
   if (!Disassemble
       && !Relocations
diff --git a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
index 5b10ee87ca86..dace82af3d07 100644
--- a/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
+++ b/contrib/llvm/tools/llvm-objdump/llvm-objdump.h
@@ -20,6 +20,7 @@ class StringRef;
 
 namespace object {
   class COFFObjectFile;
+  class COFFImportFile;
   class MachOObjectFile;
   class ObjectFile;
   class Archive;
@@ -74,9 +75,11 @@ void printMachOLazyBindTable(const object::MachOObjectFile* o);
 void printMachOWeakBindTable(const object::MachOObjectFile* o);
 void printELFFileHeader(const object::ObjectFile *o);
 void printCOFFFileHeader(const object::ObjectFile *o);
+void printCOFFSymbolTable(const object::COFFImportFile *i);
 void printCOFFSymbolTable(const object::COFFObjectFile *o);
 void printMachOFileHeader(const object::ObjectFile *o);
 void printMachOLoadCommands(const object::ObjectFile *o);
+void printWasmFileHeader(const object::ObjectFile *o);
 void printExportsTrie(const object::ObjectFile *o);
 void printRebaseTable(const object::ObjectFile *o);
 void printBindTable(const object::ObjectFile *o);
@@ -89,6 +92,7 @@ void PrintSectionContents(const object::ObjectFile *o);
 void PrintSymbolTable(const object::ObjectFile *o, StringRef ArchiveName,
                       StringRef ArchitectureName = StringRef());
 LLVM_ATTRIBUTE_NORETURN void error(Twine Message);
+LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, Twine Message);
 LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, std::error_code EC);
 LLVM_ATTRIBUTE_NORETURN void report_error(StringRef File, llvm::Error E);
 LLVM_ATTRIBUTE_NORETURN void report_error(StringRef FileName,
diff --git a/contrib/llvm/tools/llvm-pdbdump/CompilandDumper.cpp b/contrib/llvm/tools/llvm-pdbdump/CompilandDumper.cpp
index 5ec37cbbb5a0..05141818660e 100644
--- a/contrib/llvm/tools/llvm-pdbdump/CompilandDumper.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/CompilandDumper.cpp
@@ -76,6 +76,15 @@ void CompilandDumper::start(const PDBSymbolCompiland &Symbol,
         if (LineStart != LineEnd)
           WithColor(Printer, StatementColor).get() << " - " << LineEnd;
 
+        uint32_t ColumnStart = Line->getColumnNumber();
+        uint32_t ColumnEnd = Line->getColumnNumberEnd();
+        if (ColumnStart != 0 || ColumnEnd != 0) {
+          Printer << ", Column: ";
+          WithColor(Printer, StatementColor).get() << ColumnStart;
+          if (ColumnEnd != ColumnStart)
+            WithColor(Printer, StatementColor).get() << " - " << ColumnEnd;
+        }
+
         Printer << ", Address: ";
         if (Line->getLength() > 0) {
           uint64_t AddrStart = Line->getVirtualAddress();
diff --git a/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp
index 6d94295d1269..98c67ec9ef3b 100644
--- a/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.cpp
@@ -13,11 +13,13 @@
 #include "llvm/DebugInfo/CodeView/EnumTables.h"
 #include "llvm/DebugInfo/CodeView/ModuleSubstreamVisitor.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumper.h"
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
+#include "llvm/DebugInfo/MSF/StreamReader.h"
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/EnumTables.h"
+#include "llvm/DebugInfo/PDB/Raw/GlobalsStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ISectionContribVisitor.h"
-#include "llvm/DebugInfo/PDB/Raw/IndexedStreamData.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
 #include "llvm/DebugInfo/PDB/Raw/ModInfo.h"
 #include "llvm/DebugInfo/PDB/Raw/ModStream.h"
@@ -31,15 +33,58 @@
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
+namespace {
+struct PageStats {
+  explicit PageStats(const BitVector &FreePages)
+      : Upm(FreePages), ActualUsedPages(FreePages.size()),
+        MultiUsePages(FreePages.size()), UseAfterFreePages(FreePages.size()) {
+    const_cast<BitVector &>(Upm).flip();
+    // To calculate orphaned pages, we start with the set of pages that the
+    // MSF thinks are used.  Each time we find one that actually *is* used,
+    // we unset it.  Whichever bits remain set at the end are orphaned.
+    OrphanedPages = Upm;
+  }
+
+  // The inverse of the MSF File's copy of the Fpm.  The basis for which we
+  // determine the allocation status of each page.
+  const BitVector Upm;
+
+  // Pages which are marked as used in the FPM and are used at least once.
+  BitVector ActualUsedPages;
+
+  // Pages which are marked as used in the FPM but are used more than once.
+  BitVector MultiUsePages;
+
+  // Pages which are marked as used in the FPM but are not used at all.
+  BitVector OrphanedPages;
+
+  // Pages which are marked free in the FPM but are used.
+  BitVector UseAfterFreePages;
+};
+}
+
+static void recordKnownUsedPage(PageStats &Stats, uint32_t UsedIndex) {
+  if (Stats.Upm.test(UsedIndex)) {
+    if (Stats.ActualUsedPages.test(UsedIndex))
+      Stats.MultiUsePages.set(UsedIndex);
+    Stats.ActualUsedPages.set(UsedIndex);
+    Stats.OrphanedPages.reset(UsedIndex);
+  } else {
+    // The MSF doesn't think this page is used, but it is.
+    Stats.UseAfterFreePages.set(UsedIndex);
+  }
+}
+
 static void printSectionOffset(llvm::raw_ostream &OS,
                                const SectionOffset &Off) {
   OS << Off.Off << ", " << Off.Isect;
 }
 
 LLVMOutputStyle::LLVMOutputStyle(PDBFile &File)
-    : File(File), P(outs()), TD(&P, false) {}
+    : File(File), P(outs()), Dumper(&P, false) {}
 
 Error LLVMOutputStyle::dump() {
   if (auto EC = dumpFileHeaders())
@@ -48,16 +93,19 @@ Error LLVMOutputStyle::dump() {
   if (auto EC = dumpStreamSummary())
     return EC;
 
+  if (auto EC = dumpFreePageMap())
+    return EC;
+
   if (auto EC = dumpStreamBlocks())
     return EC;
 
-  if (auto EC = dumpStreamData())
+  if (auto EC = dumpBlockRanges())
     return EC;
 
-  if (auto EC = dumpInfoStream())
+  if (auto EC = dumpStreamBytes())
     return EC;
 
-  if (auto EC = dumpNamedStream())
+  if (auto EC = dumpInfoStream())
     return EC;
 
   if (auto EC = dumpTpiStream(StreamTPI))
@@ -75,6 +123,9 @@ Error LLVMOutputStyle::dump() {
   if (auto EC = dumpSectionMap())
     return EC;
 
+  if (auto EC = dumpGlobalsStream())
+    return EC;
+
   if (auto EC = dumpPublicsStream())
     return EC;
 
@@ -110,9 +161,9 @@ Error LLVMOutputStyle::dumpFileHeaders() {
   return Error::success();
 }
 
-Error LLVMOutputStyle::dumpStreamSummary() {
-  if (!opts::raw::DumpStreamSummary)
-    return Error::success();
+void LLVMOutputStyle::discoverStreamPurposes() {
+  if (!StreamPurposes.empty())
+    return;
 
   // It's OK if we fail to load some of these streams, we still attempt to print
   // what we can.
@@ -121,7 +172,6 @@ Error LLVMOutputStyle::dumpStreamSummary() {
   auto Ipi = File.getPDBIpiStream();
   auto Info = File.getPDBInfoStream();
 
-  ListScope L(P, "Streams");
   uint32_t StreamCount = File.getNumStreams();
   std::unordered_map<uint16_t, const ModuleInfoEx *> ModStreams;
   std::unordered_map<uint16_t, std::string> NamedStreams;
@@ -138,9 +188,8 @@ Error LLVMOutputStyle::dumpStreamSummary() {
     }
   }
 
+  StreamPurposes.resize(StreamCount);
   for (uint16_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) {
-    std::string Label("Stream ");
-    Label += to_string(StreamIdx);
     std::string Value;
     if (StreamIdx == OldMSFDirectory)
       Value = "Old MSF Directory";
@@ -211,11 +260,7 @@ Error LLVMOutputStyle::dumpStreamSummary() {
         Value = "???";
       }
     }
-    Value = "[" + Value + "]";
-    Value =
-        Value + " (" + to_string(File.getStreamByteSize(StreamIdx)) + " bytes)";
-
-    P.printString(Label, Value);
+    StreamPurposes[StreamIdx] = Value;
   }
 
   // Consume errors from missing streams.
@@ -227,11 +272,105 @@ Error LLVMOutputStyle::dumpStreamSummary() {
     consumeError(Ipi.takeError());
   if (!Info)
     consumeError(Info.takeError());
+}
+
+Error LLVMOutputStyle::dumpStreamSummary() {
+  if (!opts::raw::DumpStreamSummary)
+    return Error::success();
+
+  discoverStreamPurposes();
+
+  uint32_t StreamCount = File.getNumStreams();
+
+  ListScope L(P, "Streams");
+  for (uint16_t StreamIdx = 0; StreamIdx < StreamCount; ++StreamIdx) {
+    std::string Label("Stream ");
+    Label += to_string(StreamIdx);
+
+    std::string Value = "[" + StreamPurposes[StreamIdx] + "] (";
+    Value += to_string(File.getStreamByteSize(StreamIdx));
+    Value += " bytes)";
+
+    P.printString(Label, Value);
+  }
 
   P.flush();
   return Error::success();
 }
 
+Error LLVMOutputStyle::dumpFreePageMap() {
+  if (!opts::raw::DumpPageStats)
+    return Error::success();
+
+  // Start with used pages instead of free pages because
+  // the number of free pages is far larger than used pages.
+  BitVector FPM = File.getMsfLayout().FreePageMap;
+
+  PageStats PS(FPM);
+
+  recordKnownUsedPage(PS, 0); // MSF Super Block
+
+  uint32_t BlocksPerSection = msf::getFpmIntervalLength(File.getMsfLayout());
+  uint32_t NumSections = msf::getNumFpmIntervals(File.getMsfLayout());
+  for (uint32_t I = 0; I < NumSections; ++I) {
+    uint32_t Fpm0 = 1 + BlocksPerSection * I;
+    // 2 Fpm blocks spaced at `getBlockSize()` block intervals
+    recordKnownUsedPage(PS, Fpm0);
+    recordKnownUsedPage(PS, Fpm0 + 1);
+  }
+
+  recordKnownUsedPage(PS, File.getBlockMapIndex()); // Stream Table
+
+  for (auto DB : File.getDirectoryBlockArray())
+    recordKnownUsedPage(PS, DB);
+
+  // Record pages used by streams. Note that pages for stream 0
+  // are considered being unused because that's what MSVC tools do.
+  // Stream 0 doesn't contain actual data, so it makes some sense,
+  // though it's a bit confusing to us.
+  for (auto &SE : File.getStreamMap().drop_front(1))
+    for (auto &S : SE)
+      recordKnownUsedPage(PS, S);
+
+  dumpBitVector("Msf Free Pages", FPM);
+  dumpBitVector("Orphaned Pages", PS.OrphanedPages);
+  dumpBitVector("Multiply Used Pages", PS.MultiUsePages);
+  dumpBitVector("Use After Free Pages", PS.UseAfterFreePages);
+  return Error::success();
+}
+
+void LLVMOutputStyle::dumpBitVector(StringRef Name, const BitVector &V) {
+  std::vector<uint32_t> Vec;
+  for (uint32_t I = 0, E = V.size(); I != E; ++I)
+    if (V[I])
+      Vec.push_back(I);
+  P.printList(Name, Vec);
+}
+
+Error LLVMOutputStyle::dumpGlobalsStream() {
+  if (!opts::raw::DumpGlobals)
+    return Error::success();
+  if (!File.hasPDBGlobalsStream()) {
+    P.printString("Globals Stream not present");
+    return Error::success();
+  }
+
+  auto Globals = File.getPDBGlobalsStream();
+  if (!Globals)
+    return Globals.takeError();
+  DictScope D(P, "Globals Stream");
+
+  auto Dbi = File.getPDBDbiStream();
+  if (!Dbi)
+    return Dbi.takeError();
+
+  P.printNumber("Stream number", Dbi->getGlobalSymbolStreamIndex());
+  P.printNumber("Number of buckets", Globals->getNumBuckets());
+  P.printList("Hash Buckets", Globals->getHashBuckets());
+
+  return Error::success();
+}
+
 Error LLVMOutputStyle::dumpStreamBlocks() {
   if (!opts::raw::DumpStreamBlocks)
     return Error::success();
@@ -247,29 +386,64 @@ Error LLVMOutputStyle::dumpStreamBlocks() {
   return Error::success();
 }
 
-Error LLVMOutputStyle::dumpStreamData() {
-  uint32_t StreamCount = File.getNumStreams();
-  StringRef DumpStreamStr = opts::raw::DumpStreamDataIdx;
-  uint32_t DumpStreamNum;
-  if (DumpStreamStr.getAsInteger(/*Radix=*/0U, DumpStreamNum))
+Error LLVMOutputStyle::dumpBlockRanges() {
+  if (!opts::raw::DumpBlockRange.hasValue())
+    return Error::success();
+  auto &R = *opts::raw::DumpBlockRange;
+  uint32_t Max = R.Max.getValueOr(R.Min);
+
+  if (Max < R.Min)
+    return make_error<StringError>(
+        "Invalid block range specified.  Max < Min",
+        std::make_error_code(std::errc::bad_address));
+  if (Max >= File.getBlockCount())
+    return make_error<StringError>(
+        "Invalid block range specified.  Requested block out of bounds",
+        std::make_error_code(std::errc::bad_address));
+
+  DictScope D(P, "Block Data");
+  for (uint32_t I = R.Min; I <= Max; ++I) {
+    auto ExpectedData = File.getBlockData(I, File.getBlockSize());
+    if (!ExpectedData)
+      return ExpectedData.takeError();
+    std::string Label;
+    llvm::raw_string_ostream S(Label);
+    S << "Block " << I;
+    S.flush();
+    P.printBinaryBlock(Label, *ExpectedData);
+  }
+
+  return Error::success();
+}
+
+Error LLVMOutputStyle::dumpStreamBytes() {
+  if (opts::raw::DumpStreamData.empty())
     return Error::success();
 
-  if (DumpStreamNum >= StreamCount)
-    return make_error<RawError>(raw_error_code::no_stream);
-
-  auto S = MappedBlockStream::createIndexedStream(DumpStreamNum, File);
-  if (!S)
-    return S.takeError();
-  codeview::StreamReader R(**S);
-  while (R.bytesRemaining() > 0) {
-    ArrayRef<uint8_t> Data;
-    uint32_t BytesToReadInBlock = std::min(
-        R.bytesRemaining(), static_cast<uint32_t>(File.getBlockSize()));
-    if (auto EC = R.readBytes(Data, BytesToReadInBlock))
+  discoverStreamPurposes();
+
+  DictScope D(P, "Stream Data");
+  for (uint32_t SI : opts::raw::DumpStreamData) {
+    if (SI >= File.getNumStreams())
+      return make_error<RawError>(raw_error_code::no_stream);
+
+    auto S = MappedBlockStream::createIndexedStream(File.getMsfLayout(),
+                                                    File.getMsfBuffer(), SI);
+    if (!S)
+      continue;
+    DictScope DD(P, "Stream");
+
+    P.printNumber("Index", SI);
+    P.printString("Type", StreamPurposes[SI]);
+    P.printNumber("Size", S->getLength());
+    auto Blocks = File.getMsfLayout().StreamMap[SI];
+    P.printList("Blocks", Blocks);
+
+    StreamReader R(*S);
+    ArrayRef<uint8_t> StreamData;
+    if (auto EC = R.readBytes(StreamData, S->getLength()))
       return EC;
-    P.printBinaryBlock(
-        "Data",
-        StringRef(reinterpret_cast<const char *>(Data.begin()), Data.size()));
+    P.printBinaryBlock("Data", StreamData);
   }
   return Error::success();
 }
@@ -277,6 +451,10 @@ Error LLVMOutputStyle::dumpStreamData() {
 Error LLVMOutputStyle::dumpInfoStream() {
   if (!opts::raw::DumpHeaders)
     return Error::success();
+  if (!File.hasPDBInfoStream()) {
+    P.printString("PDB Stream not present");
+    return Error::success();
+  }
   auto IS = File.getPDBInfoStream();
   if (!IS)
     return IS.takeError();
@@ -289,49 +467,6 @@ Error LLVMOutputStyle::dumpInfoStream() {
   return Error::success();
 }
 
-Error LLVMOutputStyle::dumpNamedStream() {
-  if (opts::raw::DumpStreamDataName.empty())
-    return Error::success();
-
-  auto IS = File.getPDBInfoStream();
-  if (!IS)
-    return IS.takeError();
-
-  uint32_t NameStreamIndex =
-      IS->getNamedStreamIndex(opts::raw::DumpStreamDataName);
-  if (NameStreamIndex == 0 || NameStreamIndex >= File.getNumStreams())
-    return make_error<RawError>(raw_error_code::no_stream);
-
-  if (NameStreamIndex != 0) {
-    std::string Name("Stream '");
-    Name += opts::raw::DumpStreamDataName;
-    Name += "'";
-    DictScope D(P, Name);
-    P.printNumber("Index", NameStreamIndex);
-
-    auto NameStream =
-        MappedBlockStream::createIndexedStream(NameStreamIndex, File);
-    if (!NameStream)
-      return NameStream.takeError();
-    codeview::StreamReader Reader(**NameStream);
-
-    NameHashTable NameTable;
-    if (auto EC = NameTable.load(Reader))
-      return EC;
-
-    P.printHex("Signature", NameTable.getSignature());
-    P.printNumber("Version", NameTable.getHashVersion());
-    P.printNumber("Name Count", NameTable.getNameCount());
-    ListScope L(P, "Names");
-    for (uint32_t ID : NameTable.name_ids()) {
-      StringRef Str = NameTable.getStringForID(ID);
-      if (!Str.empty())
-        P.printString(to_string(ID), Str);
-    }
-  }
-  return Error::success();
-}
-
 static void printTypeIndexOffset(raw_ostream &OS,
                                  const TypeIndexOffset &TIOff) {
   OS << "{" << TIOff.Type.getIndex() << ", " << TIOff.Offset << "}";
@@ -358,11 +493,19 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
   StringRef Label;
   StringRef VerLabel;
   if (StreamIdx == StreamTPI) {
+    if (!File.hasPDBTpiStream()) {
+      P.printString("Type Info Stream (TPI) not present");
+      return Error::success();
+    }
     DumpRecordBytes = opts::raw::DumpTpiRecordBytes;
     DumpRecords = opts::raw::DumpTpiRecords;
     Label = "Type Info Stream (TPI)";
     VerLabel = "TPI Version";
   } else if (StreamIdx == StreamIPI) {
+    if (!File.hasPDBIpiStream()) {
+      P.printString("Type Info Stream (IPI) not present");
+      return Error::success();
+    }
     DumpRecordBytes = opts::raw::DumpIpiRecordBytes;
     DumpRecords = opts::raw::DumpIpiRecords;
     Label = "Type Info Stream (IPI)";
@@ -389,12 +532,12 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
       DictScope DD(P, "");
 
       if (DumpRecords) {
-        if (auto EC = TD.dump(Type))
+        if (auto EC = Dumper.dump(Type))
           return EC;
       }
 
       if (DumpRecordBytes)
-        P.printBinaryBlock("Bytes", Type.Data);
+        P.printBinaryBlock("Bytes", Type.content());
     }
     dumpTpiHash(P, *Tpi);
     if (HadError)
@@ -405,16 +548,16 @@ Error LLVMOutputStyle::dumpTpiStream(uint32_t StreamIdx) {
     // iterate them in order to build the list of types so that we can print
     // them when dumping module symbols. So when they want to dump symbols
     // but not types, use a null output stream.
-    ScopedPrinter *OldP = TD.getPrinter();
-    TD.setPrinter(nullptr);
+    ScopedPrinter *OldP = Dumper.getPrinter();
+    Dumper.setPrinter(nullptr);
 
     bool HadError = false;
     for (auto &Type : Tpi->types(&HadError)) {
-      if (auto EC = TD.dump(Type))
+      if (auto EC = Dumper.dump(Type))
         return EC;
     }
 
-    TD.setPrinter(OldP);
+    Dumper.setPrinter(OldP);
     dumpTpiHash(P, *Tpi);
     if (HadError)
       return make_error<RawError>(raw_error_code::corrupt_file,
@@ -429,6 +572,10 @@ Error LLVMOutputStyle::dumpDbiStream() {
                      opts::raw::DumpModuleFiles || opts::raw::DumpLineInfo;
   if (!opts::raw::DumpHeaders && !DumpModules)
     return Error::success();
+  if (!File.hasPDBDbiStream()) {
+    P.printString("DBI Stream not present");
+    return Error::success();
+  }
 
   auto DS = File.getPDBDbiStream();
   if (!DS)
@@ -484,24 +631,28 @@ Error LLVMOutputStyle::dumpDbiStream() {
           (opts::raw::DumpModuleSyms || opts::raw::DumpSymRecordBytes);
       if (HasModuleDI && (ShouldDumpSymbols || opts::raw::DumpLineInfo)) {
         auto ModStreamData = MappedBlockStream::createIndexedStream(
-            Modi.Info.getModuleStreamIndex(), File);
-        if (!ModStreamData)
-          return ModStreamData.takeError();
-        ModStream ModS(Modi.Info, std::move(*ModStreamData));
+            File.getMsfLayout(), File.getMsfBuffer(),
+            Modi.Info.getModuleStreamIndex());
+
+        ModStream ModS(Modi.Info, std::move(ModStreamData));
         if (auto EC = ModS.reload())
           return EC;
 
         if (ShouldDumpSymbols) {
           ListScope SS(P, "Symbols");
-          codeview::CVSymbolDumper SD(P, TD, nullptr, false);
+          codeview::CVSymbolDumper SD(P, Dumper, nullptr, false);
           bool HadError = false;
-          for (const auto &S : ModS.symbols(&HadError)) {
-            DictScope DD(P, "");
-
-            if (opts::raw::DumpModuleSyms)
-              SD.dump(S);
+          for (auto S : ModS.symbols(&HadError)) {
+            DictScope LL(P, "");
+            if (opts::raw::DumpModuleSyms) {
+              if (auto EC = SD.dump(S)) {
+                llvm::consumeError(std::move(EC));
+                HadError = true;
+                break;
+              }
+            }
             if (opts::raw::DumpSymRecordBytes)
-              P.printBinaryBlock("Bytes", S.Data);
+              P.printBinaryBlock("Bytes", S.content());
           }
           if (HadError)
             return make_error<RawError>(
@@ -517,7 +668,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
           public:
             RecordVisitor(ScopedPrinter &P, PDBFile &F) : P(P), F(F) {}
             Error visitUnknown(ModuleSubstreamKind Kind,
-                               StreamRef Stream) override {
+                               ReadableStreamRef Stream) override {
               DictScope DD(P, "Unknown");
               ArrayRef<uint8_t> Data;
               StreamReader R(Stream);
@@ -530,7 +681,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
               return Error::success();
             }
             Error
-            visitFileChecksums(StreamRef Data,
+            visitFileChecksums(ReadableStreamRef Data,
                                const FileChecksumArray &Checksums) override {
               DictScope DD(P, "FileChecksums");
               for (const auto &C : Checksums) {
@@ -546,7 +697,8 @@ Error LLVMOutputStyle::dumpDbiStream() {
               return Error::success();
             }
 
-            Error visitLines(StreamRef Data, const LineSubstreamHeader *Header,
+            Error visitLines(ReadableStreamRef Data,
+                             const LineSubstreamHeader *Header,
                              const LineInfoArray &Lines) override {
               DictScope DD(P, "Lines");
               for (const auto &L : Lines) {
@@ -610,6 +762,10 @@ Error LLVMOutputStyle::dumpDbiStream() {
 Error LLVMOutputStyle::dumpSectionContribs() {
   if (!opts::raw::DumpSectionContribs)
     return Error::success();
+  if (!File.hasPDBDbiStream()) {
+    P.printString("DBI Stream not present");
+    return Error::success();
+  }
 
   auto Dbi = File.getPDBDbiStream();
   if (!Dbi)
@@ -657,6 +813,10 @@ Error LLVMOutputStyle::dumpSectionContribs() {
 Error LLVMOutputStyle::dumpSectionMap() {
   if (!opts::raw::DumpSectionMap)
     return Error::success();
+  if (!File.hasPDBDbiStream()) {
+    P.printString("DBI Stream not present");
+    return Error::success();
+  }
 
   auto Dbi = File.getPDBDbiStream();
   if (!Dbi)
@@ -666,7 +826,6 @@ Error LLVMOutputStyle::dumpSectionMap() {
   for (auto &M : Dbi->getSectionMap()) {
     DictScope D(P, "Entry");
     P.printFlags("Flags", M.Flags, getOMFSegMapDescFlagNames());
-    P.printNumber("Flags", M.Flags);
     P.printNumber("Ovl", M.Ovl);
     P.printNumber("Group", M.Group);
     P.printNumber("Frame", M.Frame);
@@ -682,11 +841,15 @@ Error LLVMOutputStyle::dumpSectionMap() {
 Error LLVMOutputStyle::dumpPublicsStream() {
   if (!opts::raw::DumpPublics)
     return Error::success();
+  if (!File.hasPDBPublicsStream()) {
+    P.printString("Publics Stream not present");
+    return Error::success();
+  }
 
-  DictScope D(P, "Publics Stream");
   auto Publics = File.getPDBPublicsStream();
   if (!Publics)
     return Publics.takeError();
+  DictScope D(P, "Publics Stream");
 
   auto Dbi = File.getPDBDbiStream();
   if (!Dbi)
@@ -702,14 +865,17 @@ Error LLVMOutputStyle::dumpPublicsStream() {
   P.printList("Section Offsets", Publics->getSectionOffsets(),
               printSectionOffset);
   ListScope L(P, "Symbols");
-  codeview::CVSymbolDumper SD(P, TD, nullptr, false);
+  codeview::CVSymbolDumper SD(P, Dumper, nullptr, false);
   bool HadError = false;
   for (auto S : Publics->getSymbols(&HadError)) {
     DictScope DD(P, "");
 
-    SD.dump(S);
+    if (auto EC = SD.dump(S)) {
+      HadError = true;
+      break;
+    }
     if (opts::raw::DumpSymRecordBytes)
-      P.printBinaryBlock("Bytes", S.Data);
+      P.printBinaryBlock("Bytes", S.content());
   }
   if (HadError)
     return make_error<RawError>(
@@ -722,6 +888,10 @@ Error LLVMOutputStyle::dumpPublicsStream() {
 Error LLVMOutputStyle::dumpSectionHeaders() {
   if (!opts::raw::DumpSectionHeaders)
     return Error::success();
+  if (!File.hasPDBDbiStream()) {
+    P.printString("DBI Stream not present");
+    return Error::success();
+  }
 
   auto Dbi = File.getPDBDbiStream();
   if (!Dbi)
@@ -751,6 +921,10 @@ Error LLVMOutputStyle::dumpSectionHeaders() {
 Error LLVMOutputStyle::dumpFpoStream() {
   if (!opts::raw::DumpFpo)
     return Error::success();
+  if (!File.hasPDBDbiStream()) {
+    P.printString("DBI Stream not present");
+    return Error::success();
+  }
 
   auto Dbi = File.getPDBDbiStream();
   if (!Dbi)
diff --git a/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.h b/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.h
index 77935d102205..72a3fd4aba5c 100644
--- a/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.h
+++ b/contrib/llvm/tools/llvm-pdbdump/LLVMOutputStyle.h
@@ -16,6 +16,7 @@
 #include "llvm/Support/ScopedPrinter.h"
 
 namespace llvm {
+class BitVector;
 namespace pdb {
 class LLVMOutputStyle : public OutputStyle {
 public:
@@ -24,12 +25,16 @@ public:
   Error dump() override;
 
 private:
+  void discoverStreamPurposes();
+
   Error dumpFileHeaders();
   Error dumpStreamSummary();
+  Error dumpFreePageMap();
+  Error dumpBlockRanges();
+  Error dumpGlobalsStream();
+  Error dumpStreamBytes();
   Error dumpStreamBlocks();
-  Error dumpStreamData();
   Error dumpInfoStream();
-  Error dumpNamedStream();
   Error dumpTpiStream(uint32_t StreamIdx);
   Error dumpDbiStream();
   Error dumpSectionContribs();
@@ -38,11 +43,14 @@ private:
   Error dumpSectionHeaders();
   Error dumpFpoStream();
 
+  void dumpBitVector(StringRef Name, const BitVector &V);
+
   void flush();
 
   PDBFile &File;
   ScopedPrinter P;
-  codeview::CVTypeDumper TD;
+  codeview::CVTypeDumper Dumper;
+  std::vector<std::string> StreamPurposes;
 };
 }
 }
diff --git a/contrib/llvm/tools/llvm-pdbdump/PdbYaml.cpp b/contrib/llvm/tools/llvm-pdbdump/PdbYaml.cpp
index e83f24e87742..34e0611e1464 100644
--- a/contrib/llvm/tools/llvm-pdbdump/PdbYaml.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/PdbYaml.cpp
@@ -9,16 +9,38 @@
 
 #include "PdbYaml.h"
 
+#include "YamlSerializationContext.h"
+#include "YamlSymbolDumper.h"
+#include "YamlTypeDumper.h"
+
+#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
 #include "llvm/DebugInfo/PDB/PDBExtras.h"
+#include "llvm/DebugInfo/PDB/PDBTypes.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiHashing.h"
 
 using namespace llvm;
-using namespace llvm::yaml;
 using namespace llvm::pdb;
 using namespace llvm::pdb::yaml;
+using namespace llvm::yaml;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint32_t)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::StringRef)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::NamedStreamMapping)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbDbiModuleInfo)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbSymbolRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbTpiRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::StreamBlockList)
 
 namespace llvm {
 namespace yaml {
+
 template <> struct ScalarTraits<llvm::pdb::PDB_UniqueId> {
   static void output(const llvm::pdb::PDB_UniqueId &S, void *,
                      llvm::raw_ostream &OS) {
@@ -35,7 +57,7 @@ template <> struct ScalarTraits<llvm::pdb::PDB_UniqueId> {
         Scalar[24] != '-')
       return "GUID sections are not properly delineated with dashes";
 
-    char *OutBuffer = S.Guid;
+    uint8_t *OutBuffer = S.Guid;
     for (auto Iter = Scalar.begin(); Iter != Scalar.end();) {
       if (*Iter == '-' || *Iter == '{' || *Iter == '}') {
         ++Iter;
@@ -101,6 +123,16 @@ template <> struct ScalarEnumerationTraits<llvm::pdb::PdbRaw_ImplVer> {
     io.enumCase(Value, "VC140", llvm::pdb::PdbRaw_ImplVer::PdbImplVC140);
   }
 };
+
+template <> struct ScalarEnumerationTraits<llvm::pdb::PdbRaw_TpiVer> {
+  static void enumeration(IO &io, llvm::pdb::PdbRaw_TpiVer &Value) {
+    io.enumCase(Value, "VC40", llvm::pdb::PdbRaw_TpiVer::PdbTpiV40);
+    io.enumCase(Value, "VC41", llvm::pdb::PdbRaw_TpiVer::PdbTpiV41);
+    io.enumCase(Value, "VC50", llvm::pdb::PdbRaw_TpiVer::PdbTpiV50);
+    io.enumCase(Value, "VC70", llvm::pdb::PdbRaw_TpiVer::PdbTpiV70);
+    io.enumCase(Value, "VC80", llvm::pdb::PdbRaw_TpiVer::PdbTpiV80);
+  }
+};
 }
 }
 
@@ -110,9 +142,11 @@ void MappingTraits<PdbObject>::mapping(IO &IO, PdbObject &Obj) {
   IO.mapOptional("StreamMap", Obj.StreamMap);
   IO.mapOptional("PdbStream", Obj.PdbStream);
   IO.mapOptional("DbiStream", Obj.DbiStream);
+  IO.mapOptionalWithContext("TpiStream", Obj.TpiStream, Obj.Allocator);
+  IO.mapOptionalWithContext("IpiStream", Obj.IpiStream, Obj.Allocator);
 }
 
-void MappingTraits<MsfHeaders>::mapping(IO &IO, MsfHeaders &Obj) {
+void MappingTraits<MSFHeaders>::mapping(IO &IO, MSFHeaders &Obj) {
   IO.mapRequired("SuperBlock", Obj.SuperBlock);
   IO.mapRequired("NumDirectoryBlocks", Obj.NumDirectoryBlocks);
   IO.mapRequired("DirectoryBlocks", Obj.DirectoryBlocks);
@@ -153,6 +187,21 @@ void MappingTraits<PdbDbiStream>::mapping(IO &IO, PdbDbiStream &Obj) {
   IO.mapRequired("PdbDllRbld", Obj.PdbDllRbld);
   IO.mapRequired("Flags", Obj.Flags);
   IO.mapRequired("MachineType", Obj.MachineType);
+  IO.mapOptional("Modules", Obj.ModInfos);
+}
+
+void MappingContextTraits<PdbTpiStream, BumpPtrAllocator>::mapping(
+    IO &IO, pdb::yaml::PdbTpiStream &Obj, BumpPtrAllocator &Allocator) {
+  // Create a single serialization context that will be passed through the
+  // entire process of serializing / deserializing a Tpi Stream.  This is
+  // especially important when we are going from Pdb -> Yaml because we need
+  // to maintain state in a TypeTableBuilder across mappings, and at the end of
+  // the entire process, we need to have one TypeTableBuilder that has every
+  // record.
+  pdb::yaml::SerializationContext Context(IO, Allocator);
+
+  IO.mapRequired("Version", Obj.Version);
+  IO.mapRequired("Records", Obj.Records, Context);
 }
 
 void MappingTraits<NamedStreamMapping>::mapping(IO &IO,
@@ -160,3 +209,62 @@ void MappingTraits<NamedStreamMapping>::mapping(IO &IO,
   IO.mapRequired("Name", Obj.StreamName);
   IO.mapRequired("StreamNum", Obj.StreamNumber);
 }
+
+void MappingTraits<PdbSymbolRecord>::mapping(IO &IO, PdbSymbolRecord &Obj) {
+  codeview::SymbolVisitorCallbackPipeline Pipeline;
+  codeview::SymbolDeserializer Deserializer(nullptr);
+  codeview::yaml::YamlSymbolDumper Dumper(IO);
+
+  if (IO.outputting()) {
+    // For PDB to Yaml, deserialize into a high level record type, then dump it.
+    Pipeline.addCallbackToPipeline(Deserializer);
+    Pipeline.addCallbackToPipeline(Dumper);
+  } else {
+    return;
+  }
+
+  codeview::CVSymbolVisitor Visitor(Pipeline);
+  consumeError(Visitor.visitSymbolRecord(Obj.Record));
+}
+
+void MappingTraits<PdbModiStream>::mapping(IO &IO, PdbModiStream &Obj) {
+  IO.mapRequired("Signature", Obj.Signature);
+  IO.mapRequired("Records", Obj.Symbols);
+}
+
+void MappingTraits<PdbDbiModuleInfo>::mapping(IO &IO, PdbDbiModuleInfo &Obj) {
+  IO.mapRequired("Module", Obj.Mod);
+  IO.mapRequired("ObjFile", Obj.Obj);
+  IO.mapOptional("SourceFiles", Obj.SourceFiles);
+  IO.mapOptional("Modi", Obj.Modi);
+}
+
+void MappingContextTraits<PdbTpiRecord, pdb::yaml::SerializationContext>::
+    mapping(IO &IO, pdb::yaml::PdbTpiRecord &Obj,
+            pdb::yaml::SerializationContext &Context) {
+  codeview::TypeVisitorCallbackPipeline Pipeline;
+  codeview::TypeDeserializer Deserializer;
+  codeview::TypeSerializer Serializer(Context.Allocator);
+  pdb::TpiHashUpdater Hasher;
+
+  if (IO.outputting()) {
+    // For PDB to Yaml, deserialize into a high level record type, then dump it.
+    Pipeline.addCallbackToPipeline(Deserializer);
+    Pipeline.addCallbackToPipeline(Context.Dumper);
+  } else {
+    // For Yaml to PDB, extract from the high level record type, then write it
+    // to bytes.
+
+    // This might be interpreted as a hack, but serializing FieldList
+    // sub-records requires having access to the same serializer being used by
+    // the FieldList itself.
+    Context.ActiveSerializer = &Serializer;
+    Pipeline.addCallbackToPipeline(Context.Dumper);
+    Pipeline.addCallbackToPipeline(Serializer);
+    Pipeline.addCallbackToPipeline(Hasher);
+  }
+
+  codeview::CVTypeVisitor Visitor(Pipeline);
+  consumeError(Visitor.visitTypeRecord(Obj.Record));
+  Context.ActiveSerializer = nullptr;
+}
diff --git a/contrib/llvm/tools/llvm-pdbdump/PdbYaml.h b/contrib/llvm/tools/llvm-pdbdump/PdbYaml.h
index a7389af22057..398186f16d72 100644
--- a/contrib/llvm/tools/llvm-pdbdump/PdbYaml.h
+++ b/contrib/llvm/tools/llvm-pdbdump/PdbYaml.h
@@ -13,8 +13,10 @@
 #include "OutputStyle.h"
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/MSF/MSFCommon.h"
 #include "llvm/DebugInfo/PDB/PDBTypes.h"
-#include "llvm/DebugInfo/PDB/Raw/MsfCommon.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/Support/Endian.h"
@@ -26,7 +28,9 @@ namespace llvm {
 namespace pdb {
 
 namespace yaml {
-struct MsfHeaders {
+struct SerializationContext;
+
+struct MSFHeaders {
   msf::SuperBlock SuperBlock;
   uint32_t NumDirectoryBlocks;
   std::vector<uint32_t> DirectoryBlocks;
@@ -51,6 +55,22 @@ struct PdbInfoStream {
   std::vector<NamedStreamMapping> NamedStreams;
 };
 
+struct PdbSymbolRecord {
+  codeview::CVSymbol Record;
+};
+
+struct PdbModiStream {
+  uint32_t Signature;
+  std::vector<PdbSymbolRecord> Symbols;
+};
+
+struct PdbDbiModuleInfo {
+  StringRef Obj;
+  StringRef Mod;
+  std::vector<StringRef> SourceFiles;
+  Optional<PdbModiStream> Modi;
+};
+
 struct PdbDbiStream {
   PdbRaw_DbiVer VerHeader;
   uint32_t Age;
@@ -59,14 +79,35 @@ struct PdbDbiStream {
   uint16_t PdbDllRbld;
   uint16_t Flags;
   PDB_Machine MachineType;
+
+  std::vector<PdbDbiModuleInfo> ModInfos;
+};
+
+struct PdbTpiRecord {
+  codeview::CVType Record;
+};
+
+struct PdbTpiFieldListRecord {
+  codeview::CVMemberRecord Record;
+};
+
+struct PdbTpiStream {
+  PdbRaw_TpiVer Version;
+  std::vector<PdbTpiRecord> Records;
 };
 
 struct PdbObject {
-  Optional<MsfHeaders> Headers;
+  explicit PdbObject(BumpPtrAllocator &Allocator) : Allocator(Allocator) {}
+
+  Optional<MSFHeaders> Headers;
   Optional<std::vector<uint32_t>> StreamSizes;
   Optional<std::vector<StreamBlockList>> StreamMap;
   Optional<PdbInfoStream> PdbStream;
   Optional<PdbDbiStream> DbiStream;
+  Optional<PdbTpiStream> TpiStream;
+  Optional<PdbTpiStream> IpiStream;
+
+  BumpPtrAllocator &Allocator;
 };
 }
 }
@@ -79,12 +120,12 @@ template <> struct MappingTraits<pdb::yaml::PdbObject> {
   static void mapping(IO &IO, pdb::yaml::PdbObject &Obj);
 };
 
-template <> struct MappingTraits<pdb::yaml::MsfHeaders> {
-  static void mapping(IO &IO, pdb::yaml::MsfHeaders &Obj);
+template <> struct MappingTraits<pdb::yaml::MSFHeaders> {
+  static void mapping(IO &IO, pdb::yaml::MSFHeaders &Obj);
 };
 
-template <> struct MappingTraits<pdb::msf::SuperBlock> {
-  static void mapping(IO &IO, pdb::msf::SuperBlock &SB);
+template <> struct MappingTraits<msf::SuperBlock> {
+  static void mapping(IO &IO, msf::SuperBlock &SB);
 };
 
 template <> struct MappingTraits<pdb::yaml::StreamBlockList> {
@@ -99,14 +140,35 @@ template <> struct MappingTraits<pdb::yaml::PdbDbiStream> {
   static void mapping(IO &IO, pdb::yaml::PdbDbiStream &Obj);
 };
 
+template <>
+struct MappingContextTraits<pdb::yaml::PdbTpiStream, llvm::BumpPtrAllocator> {
+  static void mapping(IO &IO, pdb::yaml::PdbTpiStream &Obj,
+                      llvm::BumpPtrAllocator &Allocator);
+};
+
 template <> struct MappingTraits<pdb::yaml::NamedStreamMapping> {
   static void mapping(IO &IO, pdb::yaml::NamedStreamMapping &Obj);
 };
+
+template <> struct MappingTraits<pdb::yaml::PdbSymbolRecord> {
+  static void mapping(IO &IO, pdb::yaml::PdbSymbolRecord &Obj);
+};
+
+template <> struct MappingTraits<pdb::yaml::PdbModiStream> {
+  static void mapping(IO &IO, pdb::yaml::PdbModiStream &Obj);
+};
+
+template <> struct MappingTraits<pdb::yaml::PdbDbiModuleInfo> {
+  static void mapping(IO &IO, pdb::yaml::PdbDbiModuleInfo &Obj);
+};
+
+template <>
+struct MappingContextTraits<pdb::yaml::PdbTpiRecord,
+                            pdb::yaml::SerializationContext> {
+  static void mapping(IO &IO, pdb::yaml::PdbTpiRecord &Obj,
+                      pdb::yaml::SerializationContext &Context);
+};
 }
 }
 
-LLVM_YAML_IS_SEQUENCE_VECTOR(uint32_t)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::NamedStreamMapping)
-LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::StreamBlockList)
-
 #endif // LLVM_TOOLS_LLVMPDBDUMP_PDBYAML_H
diff --git a/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp
index a8b6202d5d16..3f2733d701a8 100644
--- a/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.cpp
@@ -12,19 +12,29 @@
 #include "PdbYaml.h"
 #include "llvm-pdbdump.h"
 
+#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
 #include "llvm/DebugInfo/PDB/Raw/DbiStream.h"
 #include "llvm/DebugInfo/PDB/Raw/InfoStream.h"
+#include "llvm/DebugInfo/PDB/Raw/ModStream.h"
 #include "llvm/DebugInfo/PDB/Raw/PDBFile.h"
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
 
 using namespace llvm;
 using namespace llvm::pdb;
 
-YAMLOutputStyle::YAMLOutputStyle(PDBFile &File) : File(File), Out(outs()) {}
+YAMLOutputStyle::YAMLOutputStyle(PDBFile &File)
+    : File(File), Out(outs()), Obj(File.getAllocator()) {}
 
 Error YAMLOutputStyle::dump() {
   if (opts::pdb2yaml::StreamDirectory)
     opts::pdb2yaml::StreamMetadata = true;
+  if (opts::pdb2yaml::DbiModuleSyms)
+    opts::pdb2yaml::DbiModuleInfo = true;
+  if (opts::pdb2yaml::DbiModuleSourceFileInfo)
+    opts::pdb2yaml::DbiModuleInfo = true;
+  if (opts::pdb2yaml::DbiModuleInfo)
+    opts::pdb2yaml::DbiStream = true;
 
   if (auto EC = dumpFileHeaders())
     return EC;
@@ -41,6 +51,12 @@ Error YAMLOutputStyle::dump() {
   if (auto EC = dumpDbiStream())
     return EC;
 
+  if (auto EC = dumpTpiStream())
+    return EC;
+
+  if (auto EC = dumpIpiStream())
+    return EC;
+
   flush();
   return Error::success();
 }
@@ -49,7 +65,7 @@ Error YAMLOutputStyle::dumpFileHeaders() {
   if (opts::pdb2yaml::NoFileHeaders)
     return Error::success();
 
-  yaml::MsfHeaders Headers;
+  yaml::MSFHeaders Headers;
   Obj.Headers.emplace();
   Obj.Headers->SuperBlock.NumBlocks = File.getBlockCount();
   Obj.Headers->SuperBlock.BlockMapAddr = File.getBlockMapIndex();
@@ -133,6 +149,79 @@ Error YAMLOutputStyle::dumpDbiStream() {
   Obj.DbiStream->PdbDllRbld = DS.getPdbDllRbld();
   Obj.DbiStream->PdbDllVersion = DS.getPdbDllVersion();
   Obj.DbiStream->VerHeader = DS.getDbiVersion();
+  if (opts::pdb2yaml::DbiModuleInfo) {
+    for (const auto &MI : DS.modules()) {
+      yaml::PdbDbiModuleInfo DMI;
+      DMI.Mod = MI.Info.getModuleName();
+      DMI.Obj = MI.Info.getObjFileName();
+      if (opts::pdb2yaml::DbiModuleSourceFileInfo)
+        DMI.SourceFiles = MI.SourceFiles;
+
+      if (opts::pdb2yaml::DbiModuleSyms &&
+          MI.Info.getModuleStreamIndex() != kInvalidStreamIndex) {
+        DMI.Modi.emplace();
+        auto ModStreamData = msf::MappedBlockStream::createIndexedStream(
+            File.getMsfLayout(), File.getMsfBuffer(),
+            MI.Info.getModuleStreamIndex());
+
+        pdb::ModStream ModS(MI.Info, std::move(ModStreamData));
+        if (auto EC = ModS.reload())
+          return EC;
+
+        DMI.Modi->Signature = ModS.signature();
+        bool HadError = false;
+        for (auto &Sym : ModS.symbols(&HadError)) {
+          pdb::yaml::PdbSymbolRecord Record{Sym};
+          DMI.Modi->Symbols.push_back(Record);
+        }
+      }
+      Obj.DbiStream->ModInfos.push_back(DMI);
+    }
+  }
+  return Error::success();
+}
+
+Error YAMLOutputStyle::dumpTpiStream() {
+  if (!opts::pdb2yaml::TpiStream)
+    return Error::success();
+
+  auto TpiS = File.getPDBTpiStream();
+  if (!TpiS)
+    return TpiS.takeError();
+
+  auto &TS = TpiS.get();
+  Obj.TpiStream.emplace();
+  Obj.TpiStream->Version = TS.getTpiVersion();
+  for (auto &Record : TS.types(nullptr)) {
+    yaml::PdbTpiRecord R;
+    // It's not necessary to set R.RecordData here.  That only exists as a
+    // way to have the `PdbTpiRecord` structure own the memory that `R.Record`
+    // references.  In the case of reading an existing PDB though, that memory
+    // is owned by the backing stream.
+    R.Record = Record;
+    Obj.TpiStream->Records.push_back(R);
+  }
+
+  return Error::success();
+}
+
+Error YAMLOutputStyle::dumpIpiStream() {
+  if (!opts::pdb2yaml::IpiStream)
+    return Error::success();
+
+  auto IpiS = File.getPDBIpiStream();
+  if (!IpiS)
+    return IpiS.takeError();
+
+  auto &IS = IpiS.get();
+  Obj.IpiStream.emplace();
+  Obj.IpiStream->Version = IS.getTpiVersion();
+  for (auto &Record : IS.types(nullptr)) {
+    yaml::PdbTpiRecord R;
+    R.Record = Record;
+    Obj.IpiStream->Records.push_back(R);
+  }
+
   return Error::success();
 }
 
diff --git a/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.h b/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.h
index d36dfec5f25a..540dee4121e6 100644
--- a/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.h
+++ b/contrib/llvm/tools/llvm-pdbdump/YAMLOutputStyle.h
@@ -31,6 +31,8 @@ private:
   Error dumpStreamDirectory();
   Error dumpPDBStream();
   Error dumpDbiStream();
+  Error dumpTpiStream();
+  Error dumpIpiStream();
 
   void flush();
 
diff --git a/contrib/llvm/tools/llvm-pdbdump/YamlSerializationContext.h b/contrib/llvm/tools/llvm-pdbdump/YamlSerializationContext.h
new file mode 100644
index 000000000000..dcf29d249d60
--- /dev/null
+++ b/contrib/llvm/tools/llvm-pdbdump/YamlSerializationContext.h
@@ -0,0 +1,39 @@
+//===- YamlSerializationContext.h ----------------------------- *- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_YAMLSERIALIZATIONCONTEXT_H
+#define LLVM_TOOLS_LLVMPDBDUMP_YAMLSERIALIZATIONCONTEXT_H
+
+#include "PdbYaml.h"
+#include "YamlTypeDumper.h"
+#include "llvm/Support/Allocator.h"
+
+namespace llvm {
+namespace codeview {
+class TypeSerializer;
+}
+namespace yaml {
+class IO;
+}
+
+namespace pdb {
+namespace yaml {
+struct SerializationContext {
+  explicit SerializationContext(llvm::yaml::IO &IO, BumpPtrAllocator &Allocator)
+      : Dumper(IO, *this), Allocator(Allocator) {}
+
+  codeview::yaml::YamlTypeDumperCallbacks Dumper;
+  BumpPtrAllocator &Allocator;
+  codeview::TypeSerializer *ActiveSerializer = nullptr;
+};
+}
+}
+}
+
+#endif
+\ No newline at end of file
diff --git a/contrib/llvm/tools/llvm-pdbdump/YamlSymbolDumper.cpp b/contrib/llvm/tools/llvm-pdbdump/YamlSymbolDumper.cpp
new file mode 100644
index 000000000000..210260a03b5f
--- /dev/null
+++ b/contrib/llvm/tools/llvm-pdbdump/YamlSymbolDumper.cpp
@@ -0,0 +1,412 @@
+//===- YamlSymbolDumper.cpp ----------------------------------- *- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "YamlSymbolDumper.h"
+#include "PdbYaml.h"
+#include "YamlTypeDumper.h"
+
+#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
+#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::codeview::yaml;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint64_t)
+LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind)
+LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
+LLVM_YAML_IS_SEQUENCE_VECTOR(CVType)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbTpiFieldListRecord)
+
+namespace llvm {
+namespace yaml {
+void ScalarEnumerationTraits<SymbolKind>::enumeration(IO &io,
+                                                      SymbolKind &Value) {
+  auto SymbolNames = getSymbolTypeNames();
+  for (const auto &E : SymbolNames)
+    io.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+
+template <> struct ScalarBitSetTraits<CompileSym2Flags> {
+  static void bitset(IO &io, CompileSym2Flags &Flags) {
+    auto FlagNames = getCompileSym2FlagNames();
+    for (const auto &E : FlagNames) {
+      io.bitSetCase(Flags, E.Name.str().c_str(),
+                    static_cast<CompileSym2Flags>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarBitSetTraits<CompileSym3Flags> {
+  static void bitset(IO &io, CompileSym3Flags &Flags) {
+    auto FlagNames = getCompileSym3FlagNames();
+    for (const auto &E : FlagNames) {
+      io.bitSetCase(Flags, E.Name.str().c_str(),
+                    static_cast<CompileSym3Flags>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarBitSetTraits<ExportFlags> {
+  static void bitset(IO &io, ExportFlags &Flags) {
+    auto FlagNames = getExportSymFlagNames();
+    for (const auto &E : FlagNames) {
+      io.bitSetCase(Flags, E.Name.str().c_str(),
+                    static_cast<ExportFlags>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarBitSetTraits<LocalSymFlags> {
+  static void bitset(IO &io, LocalSymFlags &Flags) {
+    auto FlagNames = getLocalFlagNames();
+    for (const auto &E : FlagNames) {
+      io.bitSetCase(Flags, E.Name.str().c_str(),
+                    static_cast<LocalSymFlags>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarBitSetTraits<ProcSymFlags> {
+  static void bitset(IO &io, ProcSymFlags &Flags) {
+    auto FlagNames = getProcSymFlagNames();
+    for (const auto &E : FlagNames) {
+      io.bitSetCase(Flags, E.Name.str().c_str(),
+                    static_cast<ProcSymFlags>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarBitSetTraits<FrameProcedureOptions> {
+  static void bitset(IO &io, FrameProcedureOptions &Flags) {
+    auto FlagNames = getFrameProcSymFlagNames();
+    for (const auto &E : FlagNames) {
+      io.bitSetCase(Flags, E.Name.str().c_str(),
+                    static_cast<FrameProcedureOptions>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarEnumerationTraits<CPUType> {
+  static void enumeration(IO &io, CPUType &Cpu) {
+    auto CpuNames = getCPUTypeNames();
+    for (const auto &E : CpuNames) {
+      io.enumCase(Cpu, E.Name.str().c_str(), static_cast<CPUType>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarEnumerationTraits<RegisterId> {
+  static void enumeration(IO &io, RegisterId &Reg) {
+    auto RegNames = getRegisterNames();
+    for (const auto &E : RegNames) {
+      io.enumCase(Reg, E.Name.str().c_str(), static_cast<RegisterId>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarEnumerationTraits<TrampolineType> {
+  static void enumeration(IO &io, TrampolineType &Tramp) {
+    auto TrampNames = getTrampolineNames();
+    for (const auto &E : TrampNames) {
+      io.enumCase(Tramp, E.Name.str().c_str(),
+                  static_cast<TrampolineType>(E.Value));
+    }
+  }
+};
+
+template <> struct ScalarEnumerationTraits<ThunkOrdinal> {
+  static void enumeration(IO &io, ThunkOrdinal &Ord) {
+    auto ThunkNames = getThunkOrdinalNames();
+    for (const auto &E : ThunkNames) {
+      io.enumCase(Ord, E.Name.str().c_str(),
+                  static_cast<ThunkOrdinal>(E.Value));
+    }
+  }
+};
+
+void MappingTraits<ScopeEndSym>::mapping(IO &IO, ScopeEndSym &Obj) {}
+
+void MappingTraits<Thunk32Sym>::mapping(IO &IO, Thunk32Sym &Thunk) {
+  IO.mapRequired("Parent", Thunk.Parent);
+  IO.mapRequired("End", Thunk.End);
+  IO.mapRequired("Next", Thunk.Next);
+  IO.mapRequired("Off", Thunk.Offset);
+  IO.mapRequired("Seg", Thunk.Segment);
+  IO.mapRequired("Len", Thunk.Length);
+  IO.mapRequired("Ordinal", Thunk.Thunk);
+}
+
+void MappingTraits<TrampolineSym>::mapping(IO &IO, TrampolineSym &Tramp) {
+  IO.mapRequired("Type", Tramp.Type);
+  IO.mapRequired("Size", Tramp.Size);
+  IO.mapRequired("ThunkOff", Tramp.ThunkOffset);
+  IO.mapRequired("TargetOff", Tramp.TargetOffset);
+  IO.mapRequired("ThunkSection", Tramp.ThunkSection);
+  IO.mapRequired("TargetSection", Tramp.TargetSection);
+}
+
+void MappingTraits<SectionSym>::mapping(IO &IO, SectionSym &Section) {
+  IO.mapRequired("SectionNumber", Section.SectionNumber);
+  IO.mapRequired("Alignment", Section.Alignment);
+  IO.mapRequired("Rva", Section.Rva);
+  IO.mapRequired("Length", Section.Length);
+  IO.mapRequired("Characteristics", Section.Characteristics);
+  IO.mapRequired("Name", Section.Name);
+}
+
+void MappingTraits<CoffGroupSym>::mapping(IO &IO, CoffGroupSym &CoffGroup) {
+  IO.mapRequired("Size", CoffGroup.Size);
+  IO.mapRequired("Characteristics", CoffGroup.Characteristics);
+  IO.mapRequired("Offset", CoffGroup.Offset);
+  IO.mapRequired("Segment", CoffGroup.Segment);
+  IO.mapRequired("Name", CoffGroup.Name);
+}
+
+void MappingTraits<ExportSym>::mapping(IO &IO, ExportSym &Export) {
+  IO.mapRequired("Ordinal", Export.Ordinal);
+  IO.mapRequired("Flags", Export.Flags);
+  IO.mapRequired("Name", Export.Name);
+}
+
+void MappingTraits<ProcSym>::mapping(IO &IO, ProcSym &Proc) {
+  // TODO: Print the linkage name
+
+  IO.mapRequired("PtrParent", Proc.Parent);
+  IO.mapRequired("PtrEnd", Proc.End);
+  IO.mapRequired("PtrNext", Proc.Next);
+  IO.mapRequired("CodeSize", Proc.CodeSize);
+  IO.mapRequired("DbgStart", Proc.DbgStart);
+  IO.mapRequired("DbgEnd", Proc.DbgEnd);
+  IO.mapRequired("FunctionType", Proc.FunctionType);
+  IO.mapRequired("Segment", Proc.Segment);
+  IO.mapRequired("Flags", Proc.Flags);
+  IO.mapRequired("DisplayName", Proc.Name);
+}
+
+void MappingTraits<RegisterSym>::mapping(IO &IO, RegisterSym &Register) {
+  IO.mapRequired("Type", Register.Index);
+  IO.mapRequired("Seg", Register.Register);
+  IO.mapRequired("Name", Register.Name);
+}
+
+void MappingTraits<PublicSym32>::mapping(IO &IO, PublicSym32 &Public) {
+  IO.mapRequired("Type", Public.Index);
+  IO.mapRequired("Seg", Public.Segment);
+  IO.mapRequired("Off", Public.Offset);
+  IO.mapRequired("Name", Public.Name);
+}
+
+void MappingTraits<ProcRefSym>::mapping(IO &IO, ProcRefSym &ProcRef) {
+  IO.mapRequired("SumName", ProcRef.SumName);
+  IO.mapRequired("SymOffset", ProcRef.SymOffset);
+  IO.mapRequired("Mod", ProcRef.Module);
+  IO.mapRequired("Name", ProcRef.Name);
+}
+
+void MappingTraits<EnvBlockSym>::mapping(IO &IO, EnvBlockSym &EnvBlock) {
+  IO.mapRequired("Entries", EnvBlock.Fields);
+}
+
+void MappingTraits<InlineSiteSym>::mapping(IO &IO, InlineSiteSym &InlineSite) {
+  IO.mapRequired("PtrParent", InlineSite.Parent);
+  IO.mapRequired("PtrEnd", InlineSite.End);
+  IO.mapRequired("Inlinee", InlineSite.Inlinee);
+  // TODO: The binary annotations
+}
+
+void MappingTraits<LocalSym>::mapping(IO &IO, LocalSym &Local) {
+  IO.mapRequired("Type", Local.Type);
+  IO.mapRequired("Flags", Local.Flags);
+  IO.mapRequired("VarName", Local.Name);
+}
+
+void MappingTraits<DefRangeSym>::mapping(IO &IO, DefRangeSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<DefRangeSubfieldSym>::mapping(IO &IO,
+                                                 DefRangeSubfieldSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<DefRangeRegisterSym>::mapping(IO &IO,
+                                                 DefRangeRegisterSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<DefRangeFramePointerRelSym>::mapping(
+    IO &IO, DefRangeFramePointerRelSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<DefRangeSubfieldRegisterSym>::mapping(
+    IO &IO, DefRangeSubfieldRegisterSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<DefRangeFramePointerRelFullScopeSym>::mapping(
+    IO &IO, DefRangeFramePointerRelFullScopeSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<DefRangeRegisterRelSym>::mapping(
+    IO &IO, DefRangeRegisterRelSym &Obj) {
+  // TODO: Print the subfields
+}
+
+void MappingTraits<BlockSym>::mapping(IO &IO, BlockSym &Block) {
+  // TODO: Print the linkage name
+  IO.mapRequired("PtrParent", Block.Parent);
+  IO.mapRequired("PtrEnd", Block.End);
+  IO.mapRequired("CodeSize", Block.CodeSize);
+  IO.mapRequired("Segment", Block.Segment);
+  IO.mapRequired("BlockName", Block.Name);
+}
+
+void MappingTraits<LabelSym>::mapping(IO &IO, LabelSym &Label) {
+  // TODO: Print the linkage name
+  IO.mapRequired("Segment", Label.Segment);
+  IO.mapRequired("Flags", Label.Flags);
+  IO.mapRequired("Flags", Label.Flags);
+  IO.mapRequired("DisplayName", Label.Name);
+}
+
+void MappingTraits<ObjNameSym>::mapping(IO &IO, ObjNameSym &ObjName) {
+  IO.mapRequired("Signature", ObjName.Signature);
+  IO.mapRequired("ObjectName", ObjName.Name);
+}
+
+void MappingTraits<Compile2Sym>::mapping(IO &IO, Compile2Sym &Compile2) {
+  IO.mapRequired("Flags", Compile2.Flags);
+  IO.mapRequired("Machine", Compile2.Machine);
+  IO.mapRequired("FrontendMajor", Compile2.VersionFrontendMajor);
+  IO.mapRequired("FrontendMinor", Compile2.VersionFrontendMinor);
+  IO.mapRequired("FrontendBuild", Compile2.VersionFrontendBuild);
+  IO.mapRequired("BackendMajor", Compile2.VersionBackendMajor);
+  IO.mapRequired("BackendMinor", Compile2.VersionBackendMinor);
+  IO.mapRequired("BackendBuild", Compile2.VersionBackendBuild);
+  IO.mapRequired("Version", Compile2.Version);
+}
+
+void MappingTraits<Compile3Sym>::mapping(IO &IO, Compile3Sym &Compile3) {
+  IO.mapRequired("Flags", Compile3.Flags);
+  IO.mapRequired("Machine", Compile3.Machine);
+  IO.mapRequired("FrontendMajor", Compile3.VersionFrontendMajor);
+  IO.mapRequired("FrontendMinor", Compile3.VersionFrontendMinor);
+  IO.mapRequired("FrontendBuild", Compile3.VersionFrontendBuild);
+  IO.mapRequired("FrontendQFE", Compile3.VersionFrontendQFE);
+  IO.mapRequired("BackendMajor", Compile3.VersionBackendMajor);
+  IO.mapRequired("BackendMinor", Compile3.VersionBackendMinor);
+  IO.mapRequired("BackendBuild", Compile3.VersionBackendBuild);
+  IO.mapRequired("BackendQFE", Compile3.VersionBackendQFE);
+  IO.mapRequired("Version", Compile3.Version);
+}
+
+void MappingTraits<FrameProcSym>::mapping(IO &IO, FrameProcSym &FrameProc) {
+  IO.mapRequired("TotalFrameBytes", FrameProc.TotalFrameBytes);
+  IO.mapRequired("PaddingFrameBytes", FrameProc.PaddingFrameBytes);
+  IO.mapRequired("OffsetToPadding", FrameProc.OffsetToPadding);
+  IO.mapRequired("BytesOfCalleeSavedRegisters",
+                 FrameProc.BytesOfCalleeSavedRegisters);
+  IO.mapRequired("OffsetOfExceptionHandler",
+                 FrameProc.OffsetOfExceptionHandler);
+  IO.mapRequired("SectionIdOfExceptionHandler",
+                 FrameProc.SectionIdOfExceptionHandler);
+  IO.mapRequired("Flags", FrameProc.Flags);
+}
+
+void MappingTraits<CallSiteInfoSym>::mapping(IO &IO,
+                                             CallSiteInfoSym &CallSiteInfo) {
+  // TODO: Map Linkage Name
+  IO.mapRequired("Segment", CallSiteInfo.Segment);
+  IO.mapRequired("Type", CallSiteInfo.Type);
+}
+
+void MappingTraits<FileStaticSym>::mapping(IO &IO, FileStaticSym &FileStatic) {
+  IO.mapRequired("Index", FileStatic.Index);
+  IO.mapRequired("ModFilenameOffset", FileStatic.ModFilenameOffset);
+  IO.mapRequired("Flags", FileStatic.Flags);
+  IO.mapRequired("Name", FileStatic.Name);
+}
+
+void MappingTraits<HeapAllocationSiteSym>::mapping(
+    IO &IO, HeapAllocationSiteSym &HeapAllocSite) {
+  // TODO: Map Linkage Name
+  IO.mapRequired("Segment", HeapAllocSite.Segment);
+  IO.mapRequired("CallInstructionSize", HeapAllocSite.CallInstructionSize);
+  IO.mapRequired("Type", HeapAllocSite.Type);
+}
+
+void MappingTraits<FrameCookieSym>::mapping(IO &IO,
+                                            FrameCookieSym &FrameCookie) {
+  // TODO: Map Linkage Name
+  IO.mapRequired("Register", FrameCookie.Register);
+  IO.mapRequired("CookieKind", FrameCookie.CookieKind);
+  IO.mapRequired("Flags", FrameCookie.Flags);
+}
+
+void MappingTraits<CallerSym>::mapping(IO &IO, CallerSym &Caller) {
+  // TODO: Correctly handle the ArrayRef in here.
+  std::vector<TypeIndex> Indices(Caller.Indices);
+  IO.mapRequired("FuncID", Indices);
+}
+
+void MappingTraits<UDTSym>::mapping(IO &IO, UDTSym &UDT) {
+  IO.mapRequired("Type", UDT.Type);
+  IO.mapRequired("UDTName", UDT.Name);
+}
+
+void MappingTraits<BuildInfoSym>::mapping(IO &IO, BuildInfoSym &BuildInfo) {
+  IO.mapRequired("BuildId", BuildInfo.BuildId);
+}
+
+void MappingTraits<BPRelativeSym>::mapping(IO &IO, BPRelativeSym &BPRel) {
+  IO.mapRequired("Offset", BPRel.Offset);
+  IO.mapRequired("Type", BPRel.Type);
+  IO.mapRequired("VarName", BPRel.Name);
+}
+
+void MappingTraits<RegRelativeSym>::mapping(IO &IO, RegRelativeSym &RegRel) {
+  IO.mapRequired("Offset", RegRel.Offset);
+  IO.mapRequired("Type", RegRel.Type);
+  IO.mapRequired("Register", RegRel.Register);
+  IO.mapRequired("VarName", RegRel.Name);
+}
+
+void MappingTraits<ConstantSym>::mapping(IO &IO, ConstantSym &Constant) {
+  IO.mapRequired("Type", Constant.Type);
+  IO.mapRequired("Value", Constant.Value);
+  IO.mapRequired("Name", Constant.Name);
+}
+
+void MappingTraits<DataSym>::mapping(IO &IO, DataSym &Data) {
+  // TODO: Map linkage name
+  IO.mapRequired("Type", Data.Type);
+  IO.mapRequired("DisplayName", Data.Name);
+}
+
+void MappingTraits<ThreadLocalDataSym>::mapping(IO &IO,
+                                                ThreadLocalDataSym &Data) {
+  // TODO: Map linkage name
+  IO.mapRequired("Type", Data.Type);
+  IO.mapRequired("DisplayName", Data.Name);
+}
+}
+}
+
+Error llvm::codeview::yaml::YamlSymbolDumper::visitSymbolBegin(CVSymbol &CVR) {
+  YamlIO.mapRequired("Kind", CVR.Type);
+  return Error::success();
+}
diff --git a/contrib/llvm/tools/llvm-pdbdump/YamlSymbolDumper.h b/contrib/llvm/tools/llvm-pdbdump/YamlSymbolDumper.h
new file mode 100644
index 000000000000..61e63f96719a
--- /dev/null
+++ b/contrib/llvm/tools/llvm-pdbdump/YamlSymbolDumper.h
@@ -0,0 +1,66 @@
+//===- YamlSymbolDumper.h ------------------------------------- *- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_YAMLSYMBOLDUMPER_H
+#define LLVM_TOOLS_LLVMPDBDUMP_YAMLSYMBOLDUMPER_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbacks.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace pdb {
+namespace yaml {
+struct SerializationContext;
+}
+}
+namespace codeview {
+namespace yaml {
+class YamlSymbolDumper : public SymbolVisitorCallbacks {
+public:
+  YamlSymbolDumper(llvm::yaml::IO &IO) : YamlIO(IO) {}
+
+  virtual Error visitSymbolBegin(CVSymbol &Record) override;
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownRecord(CVSymbol &CVR, Name &Record) override {               \
+    visitKnownRecordImpl(#Name, CVR, Record);                                  \
+    return Error::success();                                                   \
+  }
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def"
+
+private:
+  template <typename T>
+  void visitKnownRecordImpl(const char *Name, CVSymbol &Type, T &Record) {
+    YamlIO.mapRequired(Name, Record);
+  }
+
+  llvm::yaml::IO &YamlIO;
+};
+}
+}
+}
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<codeview::SymbolKind> {
+  static void enumeration(IO &io, codeview::SymbolKind &Value);
+};
+
+#define SYMBOL_RECORD(EnumName, EnumVal, Name)                                 \
+  template <> struct MappingTraits<codeview::Name> {                           \
+    static void mapping(IO &IO, codeview::Name &Obj);                          \
+  };
+#define SYMBOL_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/CVSymbolTypes.def"
+}
+}
+
+#endif
diff --git a/contrib/llvm/tools/llvm-pdbdump/YamlTypeDumper.cpp b/contrib/llvm/tools/llvm-pdbdump/YamlTypeDumper.cpp
new file mode 100644
index 000000000000..5c527c71c7e4
--- /dev/null
+++ b/contrib/llvm/tools/llvm-pdbdump/YamlTypeDumper.cpp
@@ -0,0 +1,599 @@
+//===- YamlTypeDumper.cpp ------------------------------------- *- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "YamlTypeDumper.h"
+#include "PdbYaml.h"
+#include "YamlSerializationContext.h"
+
+#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/EnumTables.h"
+#include "llvm/DebugInfo/CodeView/TypeDeserializer.h"
+#include "llvm/DebugInfo/CodeView/TypeRecord.h"
+#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiHashing.h"
+
+using namespace llvm;
+using namespace llvm::codeview;
+using namespace llvm::codeview::yaml;
+
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(TypeIndex)
+LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(uint64_t)
+LLVM_YAML_IS_SEQUENCE_VECTOR(OneMethodRecord)
+LLVM_YAML_IS_SEQUENCE_VECTOR(VFTableSlotKind)
+LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
+LLVM_YAML_IS_SEQUENCE_VECTOR(CVType)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::pdb::yaml::PdbTpiFieldListRecord)
+
+namespace {
+struct FieldListRecordSplitter : public TypeVisitorCallbacks {
+public:
+  explicit FieldListRecordSplitter(
+      std::vector<llvm::pdb::yaml::PdbTpiFieldListRecord> &Records)
+      : Records(Records) {}
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownMember(CVMemberRecord &CVT, Name##Record &Record) override { \
+    visitKnownMemberImpl(CVT);                                                 \
+    return Error::success();                                                   \
+  }
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+
+private:
+  void visitKnownMemberImpl(CVMemberRecord &CVT) {
+    llvm::pdb::yaml::PdbTpiFieldListRecord R;
+    R.Record = CVT;
+    Records.push_back(std::move(R));
+  }
+
+  std::vector<llvm::pdb::yaml::PdbTpiFieldListRecord> &Records;
+};
+}
+
+namespace llvm {
+namespace yaml {
+template <> struct ScalarEnumerationTraits<PointerToMemberRepresentation> {
+  static void enumeration(IO &IO, PointerToMemberRepresentation &Value) {
+    IO.enumCase(Value, "Unknown", PointerToMemberRepresentation::Unknown);
+    IO.enumCase(Value, "SingleInheritanceData",
+                PointerToMemberRepresentation::SingleInheritanceData);
+    IO.enumCase(Value, "MultipleInheritanceData",
+                PointerToMemberRepresentation::MultipleInheritanceData);
+    IO.enumCase(Value, "VirtualInheritanceData",
+                PointerToMemberRepresentation::VirtualInheritanceData);
+    IO.enumCase(Value, "GeneralData",
+                PointerToMemberRepresentation::GeneralData);
+    IO.enumCase(Value, "SingleInheritanceFunction",
+                PointerToMemberRepresentation::SingleInheritanceFunction);
+    IO.enumCase(Value, "MultipleInheritanceFunction",
+                PointerToMemberRepresentation::MultipleInheritanceFunction);
+    IO.enumCase(Value, "VirtualInheritanceFunction",
+                PointerToMemberRepresentation::VirtualInheritanceFunction);
+    IO.enumCase(Value, "GeneralFunction",
+                PointerToMemberRepresentation::GeneralFunction);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<VFTableSlotKind> {
+  static void enumeration(IO &IO, VFTableSlotKind &Kind) {
+    IO.enumCase(Kind, "Near16", VFTableSlotKind::Near16);
+    IO.enumCase(Kind, "Far16", VFTableSlotKind::Far16);
+    IO.enumCase(Kind, "This", VFTableSlotKind::This);
+    IO.enumCase(Kind, "Outer", VFTableSlotKind::Outer);
+    IO.enumCase(Kind, "Meta", VFTableSlotKind::Meta);
+    IO.enumCase(Kind, "Near", VFTableSlotKind::Near);
+    IO.enumCase(Kind, "Far", VFTableSlotKind::Far);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<CallingConvention> {
+  static void enumeration(IO &IO, CallingConvention &Value) {
+    IO.enumCase(Value, "NearC", CallingConvention::NearC);
+    IO.enumCase(Value, "FarC", CallingConvention::FarC);
+    IO.enumCase(Value, "NearPascal", CallingConvention::NearPascal);
+    IO.enumCase(Value, "FarPascal", CallingConvention::FarPascal);
+    IO.enumCase(Value, "NearFast", CallingConvention::NearFast);
+    IO.enumCase(Value, "FarFast", CallingConvention::FarFast);
+    IO.enumCase(Value, "NearStdCall", CallingConvention::NearStdCall);
+    IO.enumCase(Value, "FarStdCall", CallingConvention::FarStdCall);
+    IO.enumCase(Value, "NearSysCall", CallingConvention::NearSysCall);
+    IO.enumCase(Value, "FarSysCall", CallingConvention::FarSysCall);
+    IO.enumCase(Value, "ThisCall", CallingConvention::ThisCall);
+    IO.enumCase(Value, "MipsCall", CallingConvention::MipsCall);
+    IO.enumCase(Value, "Generic", CallingConvention::Generic);
+    IO.enumCase(Value, "AlphaCall", CallingConvention::AlphaCall);
+    IO.enumCase(Value, "PpcCall", CallingConvention::PpcCall);
+    IO.enumCase(Value, "SHCall", CallingConvention::SHCall);
+    IO.enumCase(Value, "ArmCall", CallingConvention::ArmCall);
+    IO.enumCase(Value, "AM33Call", CallingConvention::AM33Call);
+    IO.enumCase(Value, "TriCall", CallingConvention::TriCall);
+    IO.enumCase(Value, "SH5Call", CallingConvention::SH5Call);
+    IO.enumCase(Value, "M32RCall", CallingConvention::M32RCall);
+    IO.enumCase(Value, "ClrCall", CallingConvention::ClrCall);
+    IO.enumCase(Value, "Inline", CallingConvention::Inline);
+    IO.enumCase(Value, "NearVector", CallingConvention::NearVector);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<PointerKind> {
+  static void enumeration(IO &IO, PointerKind &Kind) {
+    IO.enumCase(Kind, "Near16", PointerKind::Near16);
+    IO.enumCase(Kind, "Far16", PointerKind::Far16);
+    IO.enumCase(Kind, "Huge16", PointerKind::Huge16);
+    IO.enumCase(Kind, "BasedOnSegment", PointerKind::BasedOnSegment);
+    IO.enumCase(Kind, "BasedOnValue", PointerKind::BasedOnValue);
+    IO.enumCase(Kind, "BasedOnSegmentValue", PointerKind::BasedOnSegmentValue);
+    IO.enumCase(Kind, "BasedOnAddress", PointerKind::BasedOnAddress);
+    IO.enumCase(Kind, "BasedOnSegmentAddress",
+                PointerKind::BasedOnSegmentAddress);
+    IO.enumCase(Kind, "BasedOnType", PointerKind::BasedOnType);
+    IO.enumCase(Kind, "BasedOnSelf", PointerKind::BasedOnSelf);
+    IO.enumCase(Kind, "Near32", PointerKind::Near32);
+    IO.enumCase(Kind, "Far32", PointerKind::Far32);
+    IO.enumCase(Kind, "Near64", PointerKind::Near64);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<PointerMode> {
+  static void enumeration(IO &IO, PointerMode &Mode) {
+    IO.enumCase(Mode, "Pointer", PointerMode::Pointer);
+    IO.enumCase(Mode, "LValueReference", PointerMode::LValueReference);
+    IO.enumCase(Mode, "PointerToDataMember", PointerMode::PointerToDataMember);
+    IO.enumCase(Mode, "PointerToMemberFunction",
+                PointerMode::PointerToMemberFunction);
+    IO.enumCase(Mode, "RValueReference", PointerMode::RValueReference);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<HfaKind> {
+  static void enumeration(IO &IO, HfaKind &Value) {
+    IO.enumCase(Value, "None", HfaKind::None);
+    IO.enumCase(Value, "Float", HfaKind::Float);
+    IO.enumCase(Value, "Double", HfaKind::Double);
+    IO.enumCase(Value, "Other", HfaKind::Other);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<MemberAccess> {
+  static void enumeration(IO &IO, MemberAccess &Access) {
+    IO.enumCase(Access, "None", MemberAccess::None);
+    IO.enumCase(Access, "Private", MemberAccess::Private);
+    IO.enumCase(Access, "Protected", MemberAccess::Protected);
+    IO.enumCase(Access, "Public", MemberAccess::Public);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<MethodKind> {
+  static void enumeration(IO &IO, MethodKind &Kind) {
+    IO.enumCase(Kind, "Vanilla", MethodKind::Vanilla);
+    IO.enumCase(Kind, "Virtual", MethodKind::Virtual);
+    IO.enumCase(Kind, "Static", MethodKind::Static);
+    IO.enumCase(Kind, "Friend", MethodKind::Friend);
+    IO.enumCase(Kind, "IntroducingVirtual", MethodKind::IntroducingVirtual);
+    IO.enumCase(Kind, "PureVirtual", MethodKind::PureVirtual);
+    IO.enumCase(Kind, "PureIntroducingVirtual",
+                MethodKind::PureIntroducingVirtual);
+  }
+};
+
+template <> struct ScalarEnumerationTraits<WindowsRTClassKind> {
+  static void enumeration(IO &IO, WindowsRTClassKind &Value) {
+    IO.enumCase(Value, "None", WindowsRTClassKind::None);
+    IO.enumCase(Value, "Ref", WindowsRTClassKind::RefClass);
+    IO.enumCase(Value, "Value", WindowsRTClassKind::ValueClass);
+    IO.enumCase(Value, "Interface", WindowsRTClassKind::Interface);
+  }
+};
+
+template <> struct ScalarBitSetTraits<PointerOptions> {
+  static void bitset(IO &IO, PointerOptions &Options) {
+    IO.bitSetCase(Options, "None", PointerOptions::None);
+    IO.bitSetCase(Options, "Flat32", PointerOptions::Flat32);
+    IO.bitSetCase(Options, "Volatile", PointerOptions::Volatile);
+    IO.bitSetCase(Options, "Const", PointerOptions::Const);
+    IO.bitSetCase(Options, "Unaligned", PointerOptions::Unaligned);
+    IO.bitSetCase(Options, "Restrict", PointerOptions::Restrict);
+    IO.bitSetCase(Options, "WinRTSmartPointer",
+                  PointerOptions::WinRTSmartPointer);
+  }
+};
+
+template <> struct ScalarBitSetTraits<ModifierOptions> {
+  static void bitset(IO &IO, ModifierOptions &Options) {
+    IO.bitSetCase(Options, "None", ModifierOptions::None);
+    IO.bitSetCase(Options, "Const", ModifierOptions::Const);
+    IO.bitSetCase(Options, "Volatile", ModifierOptions::Volatile);
+    IO.bitSetCase(Options, "Unaligned", ModifierOptions::Unaligned);
+  }
+};
+
+template <> struct ScalarBitSetTraits<FunctionOptions> {
+  static void bitset(IO &IO, FunctionOptions &Options) {
+    IO.bitSetCase(Options, "None", FunctionOptions::None);
+    IO.bitSetCase(Options, "CxxReturnUdt", FunctionOptions::CxxReturnUdt);
+    IO.bitSetCase(Options, "Constructor", FunctionOptions::Constructor);
+    IO.bitSetCase(Options, "ConstructorWithVirtualBases",
+                  FunctionOptions::ConstructorWithVirtualBases);
+  }
+};
+
+template <> struct ScalarBitSetTraits<ClassOptions> {
+  static void bitset(IO &IO, ClassOptions &Options) {
+    IO.bitSetCase(Options, "None", ClassOptions::None);
+    IO.bitSetCase(Options, "HasConstructorOrDestructor",
+                  ClassOptions::HasConstructorOrDestructor);
+    IO.bitSetCase(Options, "HasOverloadedOperator",
+                  ClassOptions::HasOverloadedOperator);
+    IO.bitSetCase(Options, "Nested", ClassOptions::Nested);
+    IO.bitSetCase(Options, "ContainsNestedClass",
+                  ClassOptions::ContainsNestedClass);
+    IO.bitSetCase(Options, "HasOverloadedAssignmentOperator",
+                  ClassOptions::HasOverloadedAssignmentOperator);
+    IO.bitSetCase(Options, "HasConversionOperator",
+                  ClassOptions::HasConversionOperator);
+    IO.bitSetCase(Options, "ForwardReference", ClassOptions::ForwardReference);
+    IO.bitSetCase(Options, "Scoped", ClassOptions::Scoped);
+    IO.bitSetCase(Options, "HasUniqueName", ClassOptions::HasUniqueName);
+    IO.bitSetCase(Options, "Sealed", ClassOptions::Sealed);
+    IO.bitSetCase(Options, "Intrinsic", ClassOptions::Intrinsic);
+  }
+};
+
+template <> struct ScalarBitSetTraits<MethodOptions> {
+  static void bitset(IO &IO, MethodOptions &Options) {
+    IO.bitSetCase(Options, "None", MethodOptions::None);
+    IO.bitSetCase(Options, "Pseudo", MethodOptions::Pseudo);
+    IO.bitSetCase(Options, "NoInherit", MethodOptions::NoInherit);
+    IO.bitSetCase(Options, "NoConstruct", MethodOptions::NoConstruct);
+    IO.bitSetCase(Options, "CompilerGenerated",
+                  MethodOptions::CompilerGenerated);
+    IO.bitSetCase(Options, "Sealed", MethodOptions::Sealed);
+  }
+};
+
+void ScalarTraits<APSInt>::output(const APSInt &S, void *,
+                                  llvm::raw_ostream &OS) {
+  S.print(OS, true);
+}
+StringRef ScalarTraits<APSInt>::input(StringRef Scalar, void *Ctx, APSInt &S) {
+  S = APSInt(Scalar);
+  return "";
+}
+
+bool ScalarTraits<APSInt>::mustQuote(StringRef Scalar) { return false; }
+
+void MappingContextTraits<CVType, pdb::yaml::SerializationContext>::mapping(
+    IO &IO, CVType &Record, pdb::yaml::SerializationContext &Context) {
+  if (IO.outputting()) {
+    codeview::TypeDeserializer Deserializer;
+
+    codeview::TypeVisitorCallbackPipeline Pipeline;
+    Pipeline.addCallbackToPipeline(Deserializer);
+    Pipeline.addCallbackToPipeline(Context.Dumper);
+
+    codeview::CVTypeVisitor Visitor(Pipeline);
+    consumeError(Visitor.visitTypeRecord(Record));
+  }
+}
+
+void MappingTraits<StringIdRecord>::mapping(IO &IO, StringIdRecord &String) {
+  IO.mapRequired("Id", String.Id);
+  IO.mapRequired("String", String.String);
+}
+
+void MappingTraits<ArgListRecord>::mapping(IO &IO, ArgListRecord &Args) {
+  IO.mapRequired("ArgIndices", Args.StringIndices);
+}
+
+void MappingTraits<ClassRecord>::mapping(IO &IO, ClassRecord &Class) {
+  IO.mapRequired("MemberCount", Class.MemberCount);
+  IO.mapRequired("Options", Class.Options);
+  IO.mapRequired("FieldList", Class.FieldList);
+  IO.mapRequired("Name", Class.Name);
+  IO.mapRequired("UniqueName", Class.UniqueName);
+  IO.mapRequired("DerivationList", Class.DerivationList);
+  IO.mapRequired("VTableShape", Class.VTableShape);
+  IO.mapRequired("Size", Class.Size);
+}
+
+void MappingTraits<UnionRecord>::mapping(IO &IO, UnionRecord &Union) {
+  IO.mapRequired("MemberCount", Union.MemberCount);
+  IO.mapRequired("Options", Union.Options);
+  IO.mapRequired("FieldList", Union.FieldList);
+  IO.mapRequired("Name", Union.Name);
+  IO.mapRequired("UniqueName", Union.UniqueName);
+  IO.mapRequired("Size", Union.Size);
+}
+
+void MappingTraits<EnumRecord>::mapping(IO &IO, EnumRecord &Enum) {
+  IO.mapRequired("NumEnumerators", Enum.MemberCount);
+  IO.mapRequired("Options", Enum.Options);
+  IO.mapRequired("FieldList", Enum.FieldList);
+  IO.mapRequired("Name", Enum.Name);
+  IO.mapRequired("UniqueName", Enum.UniqueName);
+  IO.mapRequired("UnderlyingType", Enum.UnderlyingType);
+}
+
+void MappingTraits<ArrayRecord>::mapping(IO &IO, ArrayRecord &AT) {
+  IO.mapRequired("ElementType", AT.ElementType);
+  IO.mapRequired("IndexType", AT.IndexType);
+  IO.mapRequired("Size", AT.Size);
+  IO.mapRequired("Name", AT.Name);
+}
+
+void MappingTraits<VFTableRecord>::mapping(IO &IO, VFTableRecord &VFT) {
+  IO.mapRequired("CompleteClass", VFT.CompleteClass);
+  IO.mapRequired("OverriddenVFTable", VFT.OverriddenVFTable);
+  IO.mapRequired("VFPtrOffset", VFT.VFPtrOffset);
+  IO.mapRequired("MethodNames", VFT.MethodNames);
+}
+
+void MappingTraits<MemberFuncIdRecord>::mapping(IO &IO,
+                                                MemberFuncIdRecord &Id) {
+  IO.mapRequired("ClassType", Id.ClassType);
+  IO.mapRequired("FunctionType", Id.FunctionType);
+  IO.mapRequired("Name", Id.Name);
+}
+
+void MappingTraits<ProcedureRecord>::mapping(IO &IO, ProcedureRecord &Proc) {
+  IO.mapRequired("ReturnType", Proc.ReturnType);
+  IO.mapRequired("CallConv", Proc.CallConv);
+  IO.mapRequired("Options", Proc.Options);
+  IO.mapRequired("ParameterCount", Proc.ParameterCount);
+  IO.mapRequired("ArgumentList", Proc.ArgumentList);
+}
+
+void MappingTraits<MemberFunctionRecord>::mapping(IO &IO,
+                                                  MemberFunctionRecord &MF) {
+  IO.mapRequired("ReturnType", MF.ReturnType);
+  IO.mapRequired("ClassType", MF.ClassType);
+  IO.mapRequired("ThisType", MF.ThisType);
+  IO.mapRequired("CallConv", MF.CallConv);
+  IO.mapRequired("Options", MF.Options);
+  IO.mapRequired("ParameterCount", MF.ParameterCount);
+  IO.mapRequired("ArgumentList", MF.ArgumentList);
+  IO.mapRequired("ThisPointerAdjustment", MF.ThisPointerAdjustment);
+}
+
+void MappingTraits<MethodOverloadListRecord>::mapping(
+    IO &IO, MethodOverloadListRecord &MethodList) {
+  IO.mapRequired("Methods", MethodList.Methods);
+}
+
+void MappingTraits<FuncIdRecord>::mapping(IO &IO, FuncIdRecord &Func) {
+  IO.mapRequired("ParentScope", Func.ParentScope);
+  IO.mapRequired("FunctionType", Func.FunctionType);
+  IO.mapRequired("Name", Func.Name);
+}
+
+void MappingTraits<TypeServer2Record>::mapping(IO &IO, TypeServer2Record &TS) {
+  IO.mapRequired("Guid", TS.Guid);
+  IO.mapRequired("Age", TS.Age);
+  IO.mapRequired("Name", TS.Name);
+}
+
+void MappingTraits<PointerRecord>::mapping(IO &IO, PointerRecord &Ptr) {
+  IO.mapRequired("ReferentType", Ptr.ReferentType);
+  IO.mapRequired("Attrs", Ptr.Attrs);
+  IO.mapOptional("MemberInfo", Ptr.MemberInfo);
+}
+
+void MappingTraits<MemberPointerInfo>::mapping(IO &IO, MemberPointerInfo &MPI) {
+  IO.mapRequired("ContainingType", MPI.ContainingType);
+  IO.mapRequired("Representation", MPI.Representation);
+}
+
+void MappingTraits<ModifierRecord>::mapping(IO &IO, ModifierRecord &Mod) {
+  IO.mapRequired("ModifiedType", Mod.ModifiedType);
+  IO.mapRequired("Modifiers", Mod.Modifiers);
+}
+
+void MappingTraits<BitFieldRecord>::mapping(IO &IO, BitFieldRecord &BitField) {
+  IO.mapRequired("Type", BitField.Type);
+  IO.mapRequired("BitSize", BitField.BitSize);
+  IO.mapRequired("BitOffset", BitField.BitOffset);
+}
+
+void MappingTraits<VFTableShapeRecord>::mapping(IO &IO,
+                                                VFTableShapeRecord &Shape) {
+  IO.mapRequired("Slots", Shape.Slots);
+}
+
+void MappingTraits<UdtSourceLineRecord>::mapping(IO &IO,
+                                                 UdtSourceLineRecord &Line) {
+  IO.mapRequired("UDT", Line.UDT);
+  IO.mapRequired("SourceFile", Line.SourceFile);
+  IO.mapRequired("LineNumber", Line.LineNumber);
+}
+
+void MappingTraits<UdtModSourceLineRecord>::mapping(
+    IO &IO, UdtModSourceLineRecord &Line) {
+  IO.mapRequired("UDT", Line.UDT);
+  IO.mapRequired("SourceFile", Line.SourceFile);
+  IO.mapRequired("LineNumber", Line.LineNumber);
+  IO.mapRequired("Module", Line.Module);
+}
+
+void MappingTraits<BuildInfoRecord>::mapping(IO &IO, BuildInfoRecord &Args) {
+  IO.mapRequired("ArgIndices", Args.ArgIndices);
+}
+
+void MappingTraits<NestedTypeRecord>::mapping(IO &IO,
+                                              NestedTypeRecord &Nested) {
+  IO.mapRequired("Type", Nested.Type);
+  IO.mapRequired("Name", Nested.Name);
+}
+
+void MappingTraits<OneMethodRecord>::mapping(IO &IO, OneMethodRecord &Method) {
+  IO.mapRequired("Type", Method.Type);
+  IO.mapRequired("Attrs", Method.Attrs.Attrs);
+  IO.mapRequired("VFTableOffset", Method.VFTableOffset);
+  IO.mapRequired("Name", Method.Name);
+}
+
+void MappingTraits<OverloadedMethodRecord>::mapping(
+    IO &IO, OverloadedMethodRecord &Method) {
+  IO.mapRequired("NumOverloads", Method.NumOverloads);
+  IO.mapRequired("MethodList", Method.MethodList);
+  IO.mapRequired("Name", Method.Name);
+}
+
+void MappingTraits<DataMemberRecord>::mapping(IO &IO, DataMemberRecord &Field) {
+  IO.mapRequired("Attrs", Field.Attrs.Attrs);
+  IO.mapRequired("Type", Field.Type);
+  IO.mapRequired("FieldOffset", Field.FieldOffset);
+  IO.mapRequired("Name", Field.Name);
+}
+
+void MappingTraits<StaticDataMemberRecord>::mapping(
+    IO &IO, StaticDataMemberRecord &Field) {
+  IO.mapRequired("Attrs", Field.Attrs.Attrs);
+  IO.mapRequired("Type", Field.Type);
+  IO.mapRequired("Name", Field.Name);
+}
+
+void MappingTraits<VFPtrRecord>::mapping(IO &IO, VFPtrRecord &VFTable) {
+  IO.mapRequired("Type", VFTable.Type);
+}
+
+void MappingTraits<EnumeratorRecord>::mapping(IO &IO, EnumeratorRecord &Enum) {
+  IO.mapRequired("Attrs", Enum.Attrs.Attrs);
+  IO.mapRequired("Value", Enum.Value);
+  IO.mapRequired("Name", Enum.Name);
+}
+
+void MappingTraits<BaseClassRecord>::mapping(IO &IO, BaseClassRecord &Base) {
+  IO.mapRequired("Attrs", Base.Attrs.Attrs);
+  IO.mapRequired("Type", Base.Type);
+  IO.mapRequired("Offset", Base.Offset);
+}
+
+void MappingTraits<VirtualBaseClassRecord>::mapping(
+    IO &IO, VirtualBaseClassRecord &Base) {
+  IO.mapRequired("Attrs", Base.Attrs.Attrs);
+  IO.mapRequired("BaseType", Base.BaseType);
+  IO.mapRequired("VBPtrType", Base.VBPtrType);
+  IO.mapRequired("VBPtrOffset", Base.VBPtrOffset);
+  IO.mapRequired("VTableIndex", Base.VTableIndex);
+}
+
+void MappingTraits<ListContinuationRecord>::mapping(
+    IO &IO, ListContinuationRecord &Cont) {
+  IO.mapRequired("ContinuationIndex", Cont.ContinuationIndex);
+}
+
+void ScalarTraits<codeview::TypeIndex>::output(const codeview::TypeIndex &S,
+                                               void *, llvm::raw_ostream &OS) {
+  OS << S.getIndex();
+}
+StringRef ScalarTraits<codeview::TypeIndex>::input(StringRef Scalar, void *Ctx,
+                                                   codeview::TypeIndex &S) {
+  uint32_t I;
+  StringRef Result = ScalarTraits<uint32_t>::input(Scalar, Ctx, I);
+  if (!Result.empty())
+    return Result;
+  S = TypeIndex(I);
+  return "";
+}
+bool ScalarTraits<codeview::TypeIndex>::mustQuote(StringRef Scalar) {
+  return false;
+}
+
+void ScalarEnumerationTraits<TypeLeafKind>::enumeration(IO &io,
+                                                        TypeLeafKind &Value) {
+  auto TypeLeafNames = getTypeLeafNames();
+  for (const auto &E : TypeLeafNames)
+    io.enumCase(Value, E.Name.str().c_str(), E.Value);
+}
+}
+}
+
+Error llvm::codeview::yaml::YamlTypeDumperCallbacks::visitTypeBegin(
+    CVType &CVR) {
+  YamlIO.mapRequired("Kind", CVR.Type);
+  return Error::success();
+}
+
+Error llvm::codeview::yaml::YamlTypeDumperCallbacks::visitMemberBegin(
+    CVMemberRecord &Record) {
+  YamlIO.mapRequired("Kind", Record.Kind);
+  return Error::success();
+}
+
+void llvm::codeview::yaml::YamlTypeDumperCallbacks::visitKnownRecordImpl(
+    const char *Name, CVType &CVR, FieldListRecord &FieldList) {
+  std::vector<llvm::pdb::yaml::PdbTpiFieldListRecord> FieldListRecords;
+  if (YamlIO.outputting()) {
+    // If we are outputting, then `FieldList.Data` contains a huge chunk of data
+    // representing the serialized list of members.  We need to split it up into
+    // individual CVType records where each record represents an individual
+    // member.  This way, we can simply map the entire thing as a Yaml sequence,
+    // which will recurse back to the standard handler for top-level fields
+    // (top-level and member fields all have the exact same Yaml syntax so use
+    // the same parser).
+    FieldListRecordSplitter Splitter(FieldListRecords);
+    CVTypeVisitor V(Splitter);
+    consumeError(V.visitFieldListMemberStream(FieldList.Data));
+    YamlIO.mapRequired("FieldList", FieldListRecords, Context);
+  } else {
+    // If we are not outputting, then the array contains no data starting out,
+    // and is instead populated from the sequence represented by the yaml --
+    // again, using the same logic that we use for top-level records.
+    assert(Context.ActiveSerializer && "There is no active serializer!");
+    codeview::TypeVisitorCallbackPipeline Pipeline;
+    pdb::TpiHashUpdater Hasher;
+
+    // For Yaml to PDB, dump it (to fill out the record fields from the Yaml)
+    // then serialize those fields to bytes, then update their hashes.
+    Pipeline.addCallbackToPipeline(Context.Dumper);
+    Pipeline.addCallbackToPipeline(*Context.ActiveSerializer);
+    Pipeline.addCallbackToPipeline(Hasher);
+
+    codeview::CVTypeVisitor Visitor(Pipeline);
+    YamlIO.mapRequired("FieldList", FieldListRecords, Visitor);
+  }
+}
+
+namespace llvm {
+namespace yaml {
+template <>
+struct MappingContextTraits<pdb::yaml::PdbTpiFieldListRecord,
+                            pdb::yaml::SerializationContext> {
+  static void mapping(IO &IO, pdb::yaml::PdbTpiFieldListRecord &Obj,
+                      pdb::yaml::SerializationContext &Context) {
+    assert(IO.outputting());
+    codeview::TypeVisitorCallbackPipeline Pipeline;
+
+    msf::ByteStream Data(Obj.Record.Data);
+    msf::StreamReader FieldReader(Data);
+    codeview::FieldListDeserializer Deserializer(FieldReader);
+
+    // For PDB to Yaml, deserialize into a high level record type, then dump
+    // it.
+    Pipeline.addCallbackToPipeline(Deserializer);
+    Pipeline.addCallbackToPipeline(Context.Dumper);
+
+    codeview::CVTypeVisitor Visitor(Pipeline);
+    consumeError(Visitor.visitMemberRecord(Obj.Record));
+  }
+};
+
+template <>
+struct MappingContextTraits<pdb::yaml::PdbTpiFieldListRecord,
+                            codeview::CVTypeVisitor> {
+  static void mapping(IO &IO, pdb::yaml::PdbTpiFieldListRecord &Obj,
+                      codeview::CVTypeVisitor &Visitor) {
+    consumeError(Visitor.visitMemberRecord(Obj.Record));
+  }
+};
+}
+}
diff --git a/contrib/llvm/tools/llvm-pdbdump/YamlTypeDumper.h b/contrib/llvm/tools/llvm-pdbdump/YamlTypeDumper.h
new file mode 100644
index 000000000000..3f15ba0bf85d
--- /dev/null
+++ b/contrib/llvm/tools/llvm-pdbdump/YamlTypeDumper.h
@@ -0,0 +1,116 @@
+//===- YamlTypeDumper.h --------------------------------------- *- C++ --*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVMPDBDUMP_YAMLTYPEDUMPER_H
+#define LLVM_TOOLS_LLVMPDBDUMP_YAMLTYPEDUMPER_H
+
+#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbacks.h"
+#include "llvm/Support/YAMLTraits.h"
+
+namespace llvm {
+namespace pdb {
+namespace yaml {
+struct SerializationContext;
+}
+}
+namespace codeview {
+namespace yaml {
+class YamlTypeDumperCallbacks : public TypeVisitorCallbacks {
+public:
+  YamlTypeDumperCallbacks(llvm::yaml::IO &IO,
+                          llvm::pdb::yaml::SerializationContext &Context)
+      : YamlIO(IO), Context(Context) {}
+
+  virtual Error visitTypeBegin(CVType &Record) override;
+  virtual Error visitMemberBegin(CVMemberRecord &Record) override;
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  Error visitKnownRecord(CVRecord<TypeLeafKind> &CVR, Name##Record &Record)    \
+      override {                                                               \
+    visitKnownRecordImpl(#Name, CVR, Record);                                  \
+    return Error::success();                                                   \
+  }
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  Error visitKnownMember(CVMemberRecord &CVR, Name##Record &Record) override { \
+    visitKnownMemberImpl(#Name, Record);                                       \
+    return Error::success();                                                   \
+  }
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+
+private:
+  template <typename T> void visitKnownMemberImpl(const char *Name, T &Record) {
+    YamlIO.mapRequired(Name, Record);
+  }
+
+  template <typename T>
+  void visitKnownRecordImpl(const char *Name, CVType &Type, T &Record) {
+    YamlIO.mapRequired(Name, Record);
+  }
+
+  void visitKnownRecordImpl(const char *Name, CVType &CVR,
+                            FieldListRecord &FieldList);
+
+  llvm::yaml::IO &YamlIO;
+  llvm::pdb::yaml::SerializationContext &Context;
+};
+}
+}
+namespace pdb {
+namespace yaml {
+struct SerializationContext;
+}
+}
+}
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarTraits<APSInt> {
+  static void output(const APSInt &S, void *, llvm::raw_ostream &OS);
+  static StringRef input(StringRef Scalar, void *Ctx, APSInt &S);
+  static bool mustQuote(StringRef Scalar);
+};
+
+template <> struct ScalarTraits<codeview::TypeIndex> {
+  static void output(const codeview::TypeIndex &S, void *,
+                     llvm::raw_ostream &OS);
+  static StringRef input(StringRef Scalar, void *Ctx, codeview::TypeIndex &S);
+  static bool mustQuote(StringRef Scalar);
+};
+
+template <> struct MappingTraits<codeview::MemberPointerInfo> {
+  static void mapping(IO &IO, codeview::MemberPointerInfo &Obj);
+};
+
+template <>
+struct MappingContextTraits<codeview::CVType, pdb::yaml::SerializationContext> {
+  static void mapping(IO &IO, codeview::CVType &Obj,
+                      pdb::yaml::SerializationContext &Context);
+};
+
+template <> struct ScalarEnumerationTraits<codeview::TypeLeafKind> {
+  static void enumeration(IO &io, codeview::TypeLeafKind &Value);
+};
+
+#define TYPE_RECORD(EnumName, EnumVal, Name)                                   \
+  template <> struct MappingTraits<codeview::Name##Record> {                   \
+    static void mapping(IO &IO, codeview::Name##Record &Obj);                  \
+  };
+#define MEMBER_RECORD(EnumName, EnumVal, Name)                                 \
+  TYPE_RECORD(EnumName, EnumVal, Name)
+#define TYPE_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#define MEMBER_RECORD_ALIAS(EnumName, EnumVal, Name, AliasName)
+#include "llvm/DebugInfo/CodeView/TypeRecords.def"
+}
+}
+
+#endif
diff --git a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp
index 62f215ddc8ae..b356a28d2189 100644
--- a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -29,7 +29,8 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/Config/config.h"
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
+#include "llvm/DebugInfo/MSF/MSFBuilder.h"
 #include "llvm/DebugInfo/PDB/GenericError.h"
 #include "llvm/DebugInfo/PDB/IPDBEnumChildren.h"
 #include "llvm/DebugInfo/PDB/IPDBRawSymbol.h"
@@ -49,6 +50,8 @@
 #include "llvm/DebugInfo/PDB/Raw/RawConstants.h"
 #include "llvm/DebugInfo/PDB/Raw/RawError.h"
 #include "llvm/DebugInfo/PDB/Raw/RawSession.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStream.h"
+#include "llvm/DebugInfo/PDB/Raw/TpiStreamBuilder.h"
 #include "llvm/Support/COM.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
@@ -59,35 +62,16 @@
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/Regex.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
 
 using namespace llvm;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::pdb;
 
-namespace {
-// A simple adapter that acts like a ByteStream but holds ownership over
-// and underlying FileOutputBuffer.
-class FileBufferByteStream : public ByteStream<true> {
-public:
-  FileBufferByteStream(std::unique_ptr<FileOutputBuffer> Buffer)
-      : ByteStream(MutableArrayRef<uint8_t>(Buffer->getBufferStart(),
-                                            Buffer->getBufferEnd())),
-        FileBuffer(std::move(Buffer)) {}
-
-  Error commit() const override {
-    if (FileBuffer->commit())
-      return llvm::make_error<RawError>(raw_error_code::not_writable);
-    return Error::success();
-  }
-
-private:
-  std::unique_ptr<FileOutputBuffer> FileBuffer;
-};
-}
-
 namespace opts {
 
 cl::SubCommand RawSubcommand("raw", "Dump raw structure of the PDB file");
@@ -186,6 +170,20 @@ cl::opt<bool> DumpStreamBlocks("stream-blocks",
 cl::opt<bool> DumpStreamSummary("stream-summary",
                                 cl::desc("dump summary of the PDB streams"),
                                 cl::cat(MsfOptions), cl::sub(RawSubcommand));
+cl::opt<bool> DumpPageStats(
+    "page-stats",
+    cl::desc("dump allocation stats of the pages in the MSF file"),
+    cl::cat(MsfOptions), cl::sub(RawSubcommand));
+cl::opt<std::string>
+    DumpBlockRangeOpt("block-data", cl::value_desc("start[-end]"),
+                      cl::desc("Dump binary data from specified range."),
+                      cl::cat(MsfOptions), cl::sub(RawSubcommand));
+llvm::Optional<BlockRange> DumpBlockRange;
+
+cl::list<uint32_t>
+    DumpStreamData("stream-data", cl::CommaSeparated, cl::ZeroOrMore,
+                   cl::desc("Dump binary data from specified streams."),
+                   cl::cat(MsfOptions), cl::sub(RawSubcommand));
 
 // TYPE OPTIONS
 cl::opt<bool>
@@ -217,6 +215,8 @@ cl::opt<bool> DumpLineInfo("line-info",
                            cl::cat(FileOptions), cl::sub(RawSubcommand));
 
 // SYMBOL OPTIONS
+cl::opt<bool> DumpGlobals("globals", cl::desc("dump globals stream data"),
+                          cl::cat(SymbolOptions), cl::sub(RawSubcommand));
 cl::opt<bool> DumpModuleSyms("module-syms", cl::desc("dump module symbols"),
                              cl::cat(SymbolOptions), cl::sub(RawSubcommand));
 cl::opt<bool> DumpPublics("publics", cl::desc("dump Publics stream data"),
@@ -238,14 +238,6 @@ cl::opt<bool> DumpSectionHeaders("section-headers",
 cl::opt<bool> DumpFpo("fpo", cl::desc("dump FPO records"), cl::cat(MiscOptions),
                       cl::sub(RawSubcommand));
 
-cl::opt<std::string> DumpStreamDataIdx("stream", cl::desc("dump stream data"),
-                                       cl::cat(MiscOptions),
-                                       cl::sub(RawSubcommand));
-cl::opt<std::string> DumpStreamDataName("stream-name",
-                                        cl::desc("dump stream data"),
-                                        cl::cat(MiscOptions),
-                                        cl::sub(RawSubcommand));
-
 cl::opt<bool> RawAll("all", cl::desc("Implies most other options."),
                      cl::cat(MiscOptions), cl::sub(RawSubcommand));
 
@@ -285,6 +277,29 @@ cl::opt<bool> PdbStream("pdb-stream",
 cl::opt<bool> DbiStream("dbi-stream",
                         cl::desc("Dump the DBI Stream (Stream 2)"),
                         cl::sub(PdbToYamlSubcommand), cl::init(false));
+cl::opt<bool>
+    DbiModuleInfo("dbi-module-info",
+                  cl::desc("Dump DBI Module Information (implies -dbi-stream)"),
+                  cl::sub(PdbToYamlSubcommand), cl::init(false));
+
+cl::opt<bool> DbiModuleSyms(
+    "dbi-module-syms",
+    cl::desc("Dump DBI Module Information (implies -dbi-module-info)"),
+    cl::sub(PdbToYamlSubcommand), cl::init(false));
+
+cl::opt<bool> DbiModuleSourceFileInfo(
+    "dbi-module-source-info",
+    cl::desc(
+        "Dump DBI Module Source File Information (implies -dbi-module-info"),
+    cl::sub(PdbToYamlSubcommand), cl::init(false));
+
+cl::opt<bool> TpiStream("tpi-stream",
+                        cl::desc("Dump the TPI Stream (Stream 3)"),
+                        cl::sub(PdbToYamlSubcommand), cl::init(false));
+
+cl::opt<bool> IpiStream("ipi-stream",
+                        cl::desc("Dump the IPI Stream (Stream 5)"),
+                        cl::sub(PdbToYamlSubcommand), cl::init(false));
 
 cl::list<std::string> InputFilename(cl::Positional,
                                     cl::desc("<input PDB file>"), cl::Required,
@@ -295,6 +310,7 @@ cl::list<std::string> InputFilename(cl::Positional,
 static ExitOnError ExitOnErr;
 
 static void yamlToPdb(StringRef Path) {
+  BumpPtrAllocator Allocator;
   ErrorOr<std::unique_ptr<MemoryBuffer>> ErrorOrBuffer =
       MemoryBuffer::getFileOrSTDIN(Path, /*FileSize=*/-1,
                                    /*RequiresNullTerminator=*/false);
@@ -306,54 +322,23 @@ static void yamlToPdb(StringRef Path) {
   std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get();
 
   llvm::yaml::Input In(Buffer->getBuffer());
-  pdb::yaml::PdbObject YamlObj;
+  pdb::yaml::PdbObject YamlObj(Allocator);
   In >> YamlObj;
   if (!YamlObj.Headers.hasValue())
     ExitOnErr(make_error<GenericError>(generic_error_code::unspecified,
                                        "Yaml does not contain MSF headers"));
 
-  auto OutFileOrError = FileOutputBuffer::create(
-      opts::yaml2pdb::YamlPdbOutputFile, YamlObj.Headers->FileSize);
-  if (OutFileOrError.getError())
-    ExitOnErr(make_error<GenericError>(generic_error_code::invalid_path,
-                                       opts::yaml2pdb::YamlPdbOutputFile));
-
-  auto FileByteStream =
-      llvm::make_unique<FileBufferByteStream>(std::move(*OutFileOrError));
-  PDBFileBuilder Builder(std::move(FileByteStream));
-
-  ExitOnErr(Builder.initialize(YamlObj.Headers->SuperBlock));
-  ExitOnErr(Builder.getMsfBuilder().setDirectoryBlocksHint(
-      YamlObj.Headers->DirectoryBlocks));
-  if (!YamlObj.StreamSizes.hasValue()) {
-    ExitOnErr(make_error<GenericError>(
-        generic_error_code::unspecified,
-        "Cannot generate a PDB when stream sizes are not known"));
-  }
-
-  if (YamlObj.StreamMap.hasValue()) {
-    if (YamlObj.StreamMap->size() != YamlObj.StreamSizes->size()) {
-      ExitOnErr(make_error<GenericError>(generic_error_code::unspecified,
-                                         "YAML specifies different number of "
-                                         "streams in stream sizes and stream "
-                                         "map"));
-    }
+  PDBFileBuilder Builder(Allocator);
 
-    auto &Sizes = *YamlObj.StreamSizes;
-    auto &Map = *YamlObj.StreamMap;
-    for (uint32_t I = 0; I < Sizes.size(); ++I) {
-      uint32_t Size = Sizes[I];
-      std::vector<uint32_t> Blocks;
-      for (auto E : Map[I].Blocks)
-        Blocks.push_back(E);
-      ExitOnErr(Builder.getMsfBuilder().addStream(Size, Blocks));
-    }
-  } else {
-    auto &Sizes = *YamlObj.StreamSizes;
-    for (auto S : Sizes) {
-      ExitOnErr(Builder.getMsfBuilder().addStream(S));
-    }
-  }
+  ExitOnErr(Builder.initialize(YamlObj.Headers->SuperBlock.BlockSize));
+  // Add each of the reserved streams.  We ignore stream metadata in the
+  // yaml, because we will reconstruct our own view of the streams.  For
+  // example, the YAML may say that there were 20 streams in the original
+  // PDB, but maybe we only dump a subset of those 20 streams, so we will
+  // have fewer, and the ones we do have may end up with different indices
+  // than the ones in the original PDB.  So we just start with a clean slate.
+  for (uint32_t I = 0; I < kSpecialStreamCount; ++I)
+    ExitOnErr(Builder.getMsfBuilder().addStream(0));
 
   if (YamlObj.PdbStream.hasValue()) {
     auto &InfoBuilder = Builder.getInfoBuilder();
@@ -375,13 +360,28 @@ static void yamlToPdb(StringRef Path) {
     DbiBuilder.setPdbDllRbld(YamlObj.DbiStream->PdbDllRbld);
     DbiBuilder.setPdbDllVersion(YamlObj.DbiStream->PdbDllVersion);
     DbiBuilder.setVersionHeader(YamlObj.DbiStream->VerHeader);
+    for (const auto &MI : YamlObj.DbiStream->ModInfos) {
+      ExitOnErr(DbiBuilder.addModuleInfo(MI.Obj, MI.Mod));
+      for (auto S : MI.SourceFiles)
+        ExitOnErr(DbiBuilder.addModuleSourceFile(MI.Mod, S));
+    }
+  }
+
+  if (YamlObj.TpiStream.hasValue()) {
+    auto &TpiBuilder = Builder.getTpiBuilder();
+    TpiBuilder.setVersionHeader(YamlObj.TpiStream->Version);
+    for (const auto &R : YamlObj.TpiStream->Records)
+      TpiBuilder.addTypeRecord(R.Record);
   }
 
-  auto Pdb = Builder.build();
-  ExitOnErr(Pdb.takeError());
+  if (YamlObj.IpiStream.hasValue()) {
+    auto &IpiBuilder = Builder.getIpiBuilder();
+    IpiBuilder.setVersionHeader(YamlObj.IpiStream->Version);
+    for (const auto &R : YamlObj.IpiStream->Records)
+      IpiBuilder.addTypeRecord(R.Record);
+  }
 
-  auto &PdbFile = *Pdb;
-  ExitOnErr(PdbFile->commit());
+  ExitOnErr(Builder.commit(opts::yaml2pdb::YamlPdbOutputFile));
 }
 
 static void pdb2Yaml(StringRef Path) {
@@ -539,16 +539,33 @@ int main(int argc_, const char *argv_[]) {
   llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
 
   cl::ParseCommandLineOptions(argv.size(), argv.data(), "LLVM PDB Dumper\n");
+  if (!opts::raw::DumpBlockRangeOpt.empty()) {
+    llvm::Regex R("^([0-9]+)(-([0-9]+))?$");
+    llvm::SmallVector<llvm::StringRef, 2> Matches;
+    if (!R.match(opts::raw::DumpBlockRangeOpt, &Matches)) {
+      errs() << "Argument '" << opts::raw::DumpBlockRangeOpt
+             << "' invalid format.\n";
+      errs().flush();
+      exit(1);
+    }
+    opts::raw::DumpBlockRange.emplace();
+    Matches[1].getAsInteger(10, opts::raw::DumpBlockRange->Min);
+    if (!Matches[3].empty()) {
+      opts::raw::DumpBlockRange->Max.emplace();
+      Matches[3].getAsInteger(10, *opts::raw::DumpBlockRange->Max);
+    }
+  }
 
-  // These options are shared by two subcommands.
-  if ((opts::PdbToYamlSubcommand || opts::RawSubcommand) && opts::raw::RawAll) {
+  if (opts::RawSubcommand && opts::raw::RawAll) {
     opts::raw::DumpHeaders = true;
     opts::raw::DumpModules = true;
     opts::raw::DumpModuleFiles = true;
     opts::raw::DumpModuleSyms = true;
+    opts::raw::DumpGlobals = true;
     opts::raw::DumpPublics = true;
     opts::raw::DumpSectionHeaders = true;
     opts::raw::DumpStreamSummary = true;
+    opts::raw::DumpPageStats = true;
     opts::raw::DumpStreamBlocks = true;
     opts::raw::DumpTpiRecords = true;
     opts::raw::DumpTpiHash = true;
@@ -579,12 +596,10 @@ int main(int argc_, const char *argv_[]) {
     }
 
     // When adding filters for excluded compilands and types, we need to
-    // remember
-    // that these are regexes.  So special characters such as * and \ need to be
-    // escaped in the regex.  In the case of a literal \, this means it needs to
-    // be escaped again in the C++.  So matching a single \ in the input
-    // requires
-    // 4 \es in the C++.
+    // remember that these are regexes.  So special characters such as * and \
+    // need to be escaped in the regex.  In the case of a literal \, this means
+    // it needs to be escaped again in the C++.  So matching a single \ in the
+    // input requires 4 \es in the C++.
     if (opts::pretty::ExcludeCompilerGenerated) {
       opts::pretty::ExcludeTypes.push_back("__vc_attributes");
       opts::pretty::ExcludeCompilands.push_back("\\* Linker \\*");
diff --git a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h
index 0a66515f4a01..42d847a23fcc 100644
--- a/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h
+++ b/contrib/llvm/tools/llvm-pdbdump/llvm-pdbdump.h
@@ -10,6 +10,7 @@
 #ifndef LLVM_TOOLS_LLVMPDBDUMP_LLVMPDBDUMP_H
 #define LLVM_TOOLS_LLVMPDBDUMP_LLVMPDBDUMP_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/raw_ostream.h"
 
@@ -34,16 +35,24 @@ extern llvm::cl::list<std::string> IncludeCompilands;
 }
 
 namespace raw {
+struct BlockRange {
+  uint32_t Min;
+  llvm::Optional<uint32_t> Max;
+};
+
+extern llvm::Optional<BlockRange> DumpBlockRange;
+extern llvm::cl::list<uint32_t> DumpStreamData;
+
+extern llvm::cl::opt<bool> DumpGlobals;
 extern llvm::cl::opt<bool> DumpHeaders;
 extern llvm::cl::opt<bool> DumpStreamBlocks;
 extern llvm::cl::opt<bool> DumpStreamSummary;
+extern llvm::cl::opt<bool> DumpPageStats;
 extern llvm::cl::opt<bool> DumpTpiHash;
 extern llvm::cl::opt<bool> DumpTpiRecordBytes;
 extern llvm::cl::opt<bool> DumpTpiRecords;
 extern llvm::cl::opt<bool> DumpIpiRecords;
 extern llvm::cl::opt<bool> DumpIpiRecordBytes;
-extern llvm::cl::opt<std::string> DumpStreamDataIdx;
-extern llvm::cl::opt<std::string> DumpStreamDataName;
 extern llvm::cl::opt<bool> DumpModules;
 extern llvm::cl::opt<bool> DumpModuleFiles;
 extern llvm::cl::opt<bool> DumpModuleSyms;
@@ -62,6 +71,11 @@ extern llvm::cl::opt<bool> StreamMetadata;
 extern llvm::cl::opt<bool> StreamDirectory;
 extern llvm::cl::opt<bool> PdbStream;
 extern llvm::cl::opt<bool> DbiStream;
+extern llvm::cl::opt<bool> DbiModuleInfo;
+extern llvm::cl::opt<bool> DbiModuleSyms;
+extern llvm::cl::opt<bool> DbiModuleSourceFileInfo;
+extern llvm::cl::opt<bool> TpiStream;
+extern llvm::cl::opt<bool> IpiStream;
 extern llvm::cl::list<std::string> InputFilename;
 }
 }
diff --git a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 8e4b4c3d4ed5..6715566a166c 100644
--- a/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/contrib/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Support/Path.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadPool.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 
@@ -108,18 +109,79 @@ static void handleMergeWriterError(Error E, StringRef WhenceFile = "",
 }
 
 struct WeightedFile {
-  StringRef Filename;
+  std::string Filename;
   uint64_t Weight;
-
-  WeightedFile() {}
-
-  WeightedFile(StringRef F, uint64_t W) : Filename{F}, Weight{W} {}
 };
 typedef SmallVector<WeightedFile, 5> WeightedFileVector;
 
+/// Keep track of merged data and reported errors.
+struct WriterContext {
+  std::mutex Lock;
+  InstrProfWriter Writer;
+  Error Err;
+  StringRef ErrWhence;
+  std::mutex &ErrLock;
+  SmallSet<instrprof_error, 4> &WriterErrorCodes;
+
+  WriterContext(bool IsSparse, std::mutex &ErrLock,
+                SmallSet<instrprof_error, 4> &WriterErrorCodes)
+      : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""),
+        ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
+};
+
+/// Load an input into a writer context.
+static void loadInput(const WeightedFile &Input, WriterContext *WC) {
+  std::unique_lock<std::mutex> CtxGuard{WC->Lock};
+
+  // If there's a pending hard error, don't do more work.
+  if (WC->Err)
+    return;
+
+  WC->ErrWhence = Input.Filename;
+
+  auto ReaderOrErr = InstrProfReader::create(Input.Filename);
+  if (Error E = ReaderOrErr.takeError()) {
+    // Skip the empty profiles by returning sliently.
+    instrprof_error IPE = InstrProfError::take(std::move(E));
+    if (IPE != instrprof_error::empty_raw_profile)
+      WC->Err = make_error<InstrProfError>(IPE);
+    return;
+  }
+
+  auto Reader = std::move(ReaderOrErr.get());
+  bool IsIRProfile = Reader->isIRLevelProfile();
+  if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) {
+    WC->Err = make_error<StringError>(
+        "Merge IR generated profile with Clang generated profile.",
+        std::error_code());
+    return;
+  }
+
+  for (auto &I : *Reader) {
+    const StringRef FuncName = I.Name;
+    if (Error E = WC->Writer.addRecord(std::move(I), Input.Weight)) {
+      // Only show hint the first time an error occurs.
+      instrprof_error IPE = InstrProfError::take(std::move(E));
+      std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
+      bool firstTime = WC->WriterErrorCodes.insert(IPE).second;
+      handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
+                             FuncName, firstTime);
+    }
+  }
+  if (Reader->hasError())
+    WC->Err = Reader->getError();
+}
+
+/// Merge the \p Src writer context into \p Dst.
+static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
+  if (Error E = Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer)))
+    Dst->Err = std::move(E);
+}
+
 static void mergeInstrProfile(const WeightedFileVector &Inputs,
                               StringRef OutputFilename,
-                              ProfileFormat OutputFormat, bool OutputSparse) {
+                              ProfileFormat OutputFormat, bool OutputSparse,
+                              unsigned NumThreads) {
   if (OutputFilename.compare("-") == 0)
     exitWithError("Cannot write indexed profdata format to stdout.");
 
@@ -131,30 +193,59 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
   if (EC)
     exitWithErrorCode(EC, OutputFilename);
 
-  InstrProfWriter Writer(OutputSparse);
+  std::mutex ErrorLock;
   SmallSet<instrprof_error, 4> WriterErrorCodes;
-  for (const auto &Input : Inputs) {
-    auto ReaderOrErr = InstrProfReader::create(Input.Filename);
-    if (Error E = ReaderOrErr.takeError())
-      exitWithError(std::move(E), Input.Filename);
-
-    auto Reader = std::move(ReaderOrErr.get());
-    bool IsIRProfile = Reader->isIRLevelProfile();
-    if (Writer.setIsIRLevelProfile(IsIRProfile))
-      exitWithError("Merge IR generated profile with Clang generated profile.");
-
-    for (auto &I : *Reader) {
-      if (Error E = Writer.addRecord(std::move(I), Input.Weight)) {
-        // Only show hint the first time an error occurs.
-        instrprof_error IPE = InstrProfError::take(std::move(E));
-        bool firstTime = WriterErrorCodes.insert(IPE).second;
-        handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
-                               I.Name, firstTime);
-      }
+
+  // If NumThreads is not specified, auto-detect a good default.
+  if (NumThreads == 0)
+    NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(),
+                                       unsigned(Inputs.size() / 2)));
+
+  // Initialize the writer contexts.
+  SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
+  for (unsigned I = 0; I < NumThreads; ++I)
+    Contexts.emplace_back(llvm::make_unique<WriterContext>(
+        OutputSparse, ErrorLock, WriterErrorCodes));
+
+  if (NumThreads == 1) {
+    for (const auto &Input : Inputs)
+      loadInput(Input, Contexts[0].get());
+  } else {
+    ThreadPool Pool(NumThreads);
+
+    // Load the inputs in parallel (N/NumThreads serial steps).
+    unsigned Ctx = 0;
+    for (const auto &Input : Inputs) {
+      Pool.async(loadInput, Input, Contexts[Ctx].get());
+      Ctx = (Ctx + 1) % NumThreads;
     }
-    if (Reader->hasError())
-      exitWithError(Reader->getError(), Input.Filename);
+    Pool.wait();
+
+    // Merge the writer contexts together (~ lg(NumThreads) serial steps).
+    unsigned Mid = Contexts.size() / 2;
+    unsigned End = Contexts.size();
+    assert(Mid > 0 && "Expected more than one context");
+    do {
+      for (unsigned I = 0; I < Mid; ++I)
+        Pool.async(mergeWriterContexts, Contexts[I].get(),
+                   Contexts[I + Mid].get());
+      Pool.wait();
+      if (End & 1) {
+        Pool.async(mergeWriterContexts, Contexts[0].get(),
+                   Contexts[End - 1].get());
+        Pool.wait();
+      }
+      End = Mid;
+      Mid /= 2;
+    } while (Mid > 0);
   }
+
+  // Handle deferred hard errors encountered during merging.
+  for (std::unique_ptr<WriterContext> &WC : Contexts)
+    if (WC->Err)
+      exitWithError(std::move(WC->Err), WC->ErrWhence);
+
+  InstrProfWriter &Writer = Contexts[0]->Writer;
   if (OutputFormat == PF_Text)
     Writer.writeText(Output);
   else
@@ -216,11 +307,7 @@ static WeightedFile parseWeightedFile(const StringRef &WeightedFilename) {
   if (WeightStr.getAsInteger(10, Weight) || Weight < 1)
     exitWithError("Input weight must be a positive integer.");
 
-  if (!sys::fs::exists(FileName))
-    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
-                      FileName);
-
-  return WeightedFile(FileName, Weight);
+  return {FileName, Weight};
 }
 
 static std::unique_ptr<MemoryBuffer>
@@ -235,6 +322,40 @@ getInputFilenamesFileBuf(const StringRef &InputFilenamesFile) {
   return std::move(*BufOrError);
 }
 
+static void addWeightedInput(WeightedFileVector &WNI, const WeightedFile &WF) {
+  StringRef Filename = WF.Filename;
+  uint64_t Weight = WF.Weight;
+
+  // If it's STDIN just pass it on.
+  if (Filename == "-") {
+    WNI.push_back({Filename, Weight});
+    return;
+  }
+
+  llvm::sys::fs::file_status Status;
+  llvm::sys::fs::status(Filename, Status);
+  if (!llvm::sys::fs::exists(Status))
+    exitWithErrorCode(make_error_code(errc::no_such_file_or_directory),
+                      Filename);
+  // If it's a source file, collect it.
+  if (llvm::sys::fs::is_regular_file(Status)) {
+    WNI.push_back({Filename, Weight});
+    return;
+  }
+
+  if (llvm::sys::fs::is_directory(Status)) {
+    std::error_code EC;
+    for (llvm::sys::fs::recursive_directory_iterator F(Filename, EC), E;
+         F != E && !EC; F.increment(EC)) {
+      if (llvm::sys::fs::is_regular_file(F->path())) {
+        addWeightedInput(WNI, {F->path(), Weight});
+      }
+    }
+    if (EC)
+      exitWithErrorCode(EC, Filename);
+  }
+}
+
 static void parseInputFilenamesFile(MemoryBuffer *Buffer,
                                     WeightedFileVector &WFV) {
   if (!Buffer)
@@ -250,9 +371,9 @@ static void parseInputFilenamesFile(MemoryBuffer *Buffer,
       continue;
     // If there's no comma, it's an unweighted profile.
     else if (SanitizedEntry.find(',') == StringRef::npos)
-      WFV.emplace_back(SanitizedEntry, 1);
+      addWeightedInput(WFV, {SanitizedEntry, 1});
     else
-      WFV.emplace_back(parseWeightedFile(SanitizedEntry));
+      addWeightedInput(WFV, parseWeightedFile(SanitizedEntry));
   }
 }
 
@@ -278,24 +399,28 @@ static int merge_main(int argc, const char *argv[]) {
   cl::opt<ProfileKinds> ProfileKind(
       cl::desc("Profile kind:"), cl::init(instr),
       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
-                 clEnumVal(sample, "Sample profile"), clEnumValEnd));
+                 clEnumVal(sample, "Sample profile")));
   cl::opt<ProfileFormat> OutputFormat(
       cl::desc("Format of output profile"), cl::init(PF_Binary),
       cl::values(clEnumValN(PF_Binary, "binary", "Binary encoding (default)"),
                  clEnumValN(PF_Text, "text", "Text encoding"),
                  clEnumValN(PF_GCC, "gcc",
-                            "GCC encoding (only meaningful for -sample)"),
-                 clEnumValEnd));
+                            "GCC encoding (only meaningful for -sample)")));
   cl::opt<bool> OutputSparse("sparse", cl::init(false),
       cl::desc("Generate a sparse profile (only meaningful for -instr)"));
+  cl::opt<unsigned> NumThreads(
+      "num-threads", cl::init(0),
+      cl::desc("Number of merge threads to use (default: autodetect)"));
+  cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
+                        cl::aliasopt(NumThreads));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
 
   WeightedFileVector WeightedInputs;
   for (StringRef Filename : InputFilenames)
-    WeightedInputs.emplace_back(Filename, 1);
+    addWeightedInput(WeightedInputs, {Filename, 1});
   for (StringRef WeightedFilename : WeightedInputFilenames)
-    WeightedInputs.emplace_back(parseWeightedFile(WeightedFilename));
+    addWeightedInput(WeightedInputs, parseWeightedFile(WeightedFilename));
 
   // Make sure that the file buffer stays alive for the duration of the
   // weighted input vector's lifetime.
@@ -314,7 +439,7 @@ static int merge_main(int argc, const char *argv[]) {
 
   if (ProfileKind == instr)
     mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,
-                      OutputSparse);
+                      OutputSparse, NumThreads);
   else
     mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
 
@@ -343,6 +468,7 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
   uint64_t TotalNumValueSites = 0;
   uint64_t TotalNumValueSitesWithValueProfile = 0;
   uint64_t TotalNumValues = 0;
+  std::vector<unsigned> ICHistogram;
   for (const auto &Func : *Reader) {
     bool Show =
         ShowAllFunctions || (!ShowFunction.empty() &&
@@ -395,8 +521,12 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
           std::unique_ptr<InstrProfValueData[]> VD =
               Func.getValueForSite(IPVK_IndirectCallTarget, I);
           TotalNumValues += NV;
-          if (NV)
+          if (NV) {
             TotalNumValueSitesWithValueProfile++;
+            if (NV > ICHistogram.size())
+              ICHistogram.resize(NV, 0);
+            ICHistogram[NV - 1]++;
+          }
           for (uint32_t V = 0; V < NV; V++) {
             OS << "\t[ " << I << ", ";
             OS << Symtab.getFuncName(VD[V].Value) << ", " << VD[V].Count
@@ -423,6 +553,11 @@ static int showInstrProfile(const std::string &Filename, bool ShowCounts,
     OS << "Total Number of Sites With Values : "
        << TotalNumValueSitesWithValueProfile << "\n";
     OS << "Total Number of Profiled Values : " << TotalNumValues << "\n";
+
+    OS << "IC Value histogram : \n\tNumTargets, SiteCount\n";
+    for (unsigned I = 0; I < ICHistogram.size(); I++) {
+      OS << "\t" << I + 1 << ", " << ICHistogram[I] << "\n";
+    }
   }
 
   if (ShowDetailedSummary) {
@@ -492,7 +627,7 @@ static int show_main(int argc, const char *argv[]) {
   cl::opt<ProfileKinds> ProfileKind(
       cl::desc("Profile kind:"), cl::init(instr),
       cl::values(clEnumVal(instr, "Instrumentation profile (default)"),
-                 clEnumVal(sample, "Sample profile"), clEnumValEnd));
+                 clEnumVal(sample, "Sample profile")));
 
   cl::ParseCommandLineOptions(argc, argv, "LLVM profile data summary\n");
 
@@ -545,6 +680,7 @@ int main(int argc, const char *argv[]) {
       errs() << "OVERVIEW: LLVM profile data tools\n\n"
              << "USAGE: " << ProgName << " <command> [args...]\n"
              << "USAGE: " << ProgName << " <command> -help\n\n"
+             << "See each individual command --help for more details.\n"
              << "Available commands: merge, show\n";
       return 0;
     }
diff --git a/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h b/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
index 59c9b713d85e..903a246ccfd8 100644
--- a/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/ARMEHABIPrinter.h
@@ -349,11 +349,12 @@ template <typename ET>
 ErrorOr<StringRef>
 PrinterContext<ET>::FunctionAtAddress(unsigned Section,
                                       uint64_t Address) const {
-  ErrorOr<StringRef> StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
-  error(StrTableOrErr.getError());
+  auto StrTableOrErr = ELF->getStringTableForSymtab(*Symtab);
+  if (!StrTableOrErr)
+    error(StrTableOrErr.takeError());
   StringRef StrTable = *StrTableOrErr;
 
-  for (const Elf_Sym &Sym : ELF->symbols(Symtab))
+  for (const Elf_Sym &Sym : unwrapOrError(ELF->symbols(Symtab)))
     if (Sym.st_shndx == Section && Sym.st_value == Address &&
         Sym.getType() == ELF::STT_FUNC) {
       auto NameOrErr = Sym.getName(StrTable);
@@ -379,15 +380,16 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
   /// handling table.  Use this symbol to recover the actual exception handling
   /// table.
 
-  for (const Elf_Shdr &Sec : ELF->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) {
     if (Sec.sh_type != ELF::SHT_REL || Sec.sh_info != IndexSectionIndex)
       continue;
 
-    ErrorOr<const Elf_Shdr *> SymTabOrErr = ELF->getSection(Sec.sh_link);
-    error(SymTabOrErr.getError());
+    auto SymTabOrErr = ELF->getSection(Sec.sh_link);
+    if (!SymTabOrErr)
+      error(SymTabOrErr.takeError());
     const Elf_Shdr *SymTab = *SymTabOrErr;
 
-    for (const Elf_Rel &R : ELF->rels(&Sec)) {
+    for (const Elf_Rel &R : unwrapOrError(ELF->rels(&Sec))) {
       if (R.r_offset != static_cast<unsigned>(IndexTableOffset))
         continue;
 
@@ -396,12 +398,12 @@ PrinterContext<ET>::FindExceptionTable(unsigned IndexSectionIndex,
       RelA.r_info = R.r_info;
       RelA.r_addend = 0;
 
-      const Elf_Sym *Symbol = ELF->getRelocationSymbol(&RelA, SymTab);
+      const Elf_Sym *Symbol =
+          unwrapOrError(ELF->getRelocationSymbol(&RelA, SymTab));
 
-      ErrorOr<const Elf_Shdr *> Ret =
-          ELF->getSection(Symbol, SymTab, ShndxTable);
-      if (std::error_code EC = Ret.getError())
-        report_fatal_error(EC.message());
+      auto Ret = ELF->getSection(Symbol, SymTab, ShndxTable);
+      if (!Ret)
+        report_fatal_error(errorToErrorCode(Ret.takeError()).message());
       return *Ret;
     }
   }
@@ -412,7 +414,7 @@ template <typename ET>
 void PrinterContext<ET>::PrintExceptionTable(const Elf_Shdr *IT,
                                              const Elf_Shdr *EHT,
                                              uint64_t TableEntryOffset) const {
-  ErrorOr<ArrayRef<uint8_t> > Contents = ELF->getSectionContents(EHT);
+  Expected<ArrayRef<uint8_t>> Contents = ELF->getSectionContents(EHT);
   if (!Contents)
     return;
 
@@ -479,7 +481,7 @@ void PrinterContext<ET>::PrintOpcodes(const uint8_t *Entry,
 template <typename ET>
 void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
                                          const Elf_Shdr *IT) const {
-  ErrorOr<ArrayRef<uint8_t> > Contents = ELF->getSectionContents(IT);
+  Expected<ArrayRef<uint8_t>> Contents = ELF->getSectionContents(IT);
   if (!Contents)
     return;
 
@@ -532,7 +534,7 @@ void PrinterContext<ET>::PrintIndexTable(unsigned SectionIndex,
       const Elf_Shdr *EHT =
         FindExceptionTable(SectionIndex, Entry * IndexTableEntrySize + 4);
 
-      if (ErrorOr<StringRef> Name = ELF->getSectionName(EHT))
+      if (auto Name = ELF->getSectionName(EHT))
         SW.printString("ExceptionHandlingTable", *Name);
 
       uint64_t TableEntryOffset = PREL31(Word1, IT->sh_addr);
@@ -548,12 +550,12 @@ void PrinterContext<ET>::PrintUnwindInformation() const {
   DictScope UI(SW, "UnwindInformation");
 
   int SectionIndex = 0;
-  for (const Elf_Shdr &Sec : ELF->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(ELF->sections())) {
     if (Sec.sh_type == ELF::SHT_ARM_EXIDX) {
       DictScope UIT(SW, "UnwindIndexTable");
 
       SW.printNumber("SectionIndex", SectionIndex);
-      if (ErrorOr<StringRef> SectionName = ELF->getSectionName(&Sec))
+      if (auto SectionName = ELF->getSectionName(&Sec))
         SW.printString("SectionName", *SectionName);
       SW.printHex("SectionOffset", Sec.sh_offset);
 
diff --git a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
index 348f5b435564..0ca186519cd2 100644
--- a/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/COFFDumper.cpp
@@ -22,11 +22,10 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/DebugInfo/CodeView/ByteStream.h"
 #include "llvm/DebugInfo/CodeView/CodeView.h"
 #include "llvm/DebugInfo/CodeView/Line.h"
-#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
 #include "llvm/DebugInfo/CodeView/RecordSerialization.h"
+#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumpDelegate.h"
 #include "llvm/DebugInfo/CodeView/SymbolDumper.h"
 #include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -34,6 +33,8 @@
 #include "llvm/DebugInfo/CodeView/TypeIndex.h"
 #include "llvm/DebugInfo/CodeView/TypeRecord.h"
 #include "llvm/DebugInfo/CodeView/TypeStreamMerger.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
+#include "llvm/DebugInfo/MSF/ByteStream.h"
 #include "llvm/Object/COFF.h"
 #include "llvm/Object/ObjectFile.h"
 #include "llvm/Support/COFF.h"
@@ -53,6 +54,7 @@
 using namespace llvm;
 using namespace llvm::object;
 using namespace llvm::codeview;
+using namespace llvm::msf;
 using namespace llvm::support;
 using namespace llvm::Win64EH;
 
@@ -77,8 +79,7 @@ public:
   void printCOFFBaseReloc() override;
   void printCOFFDebugDirectory() override;
   void printCodeViewDebugInfo() override;
-  void
-  mergeCodeViewTypes(llvm::codeview::MemoryTypeTableBuilder &CVTypes) override;
+  void mergeCodeViewTypes(llvm::codeview::TypeTableBuilder &CVTypes) override;
   void printStackMap() const override;
 private:
   void printSymbol(const SymbolRef &Sym);
@@ -152,8 +153,13 @@ public:
     Sec = Obj->getCOFFSection(SR);
   }
 
-  uint32_t getRecordOffset(ArrayRef<uint8_t> Record) override {
-    return Record.data() - SectionContents.bytes_begin();
+  uint32_t getRecordOffset(msf::StreamReader Reader) override {
+    ArrayRef<uint8_t> Data;
+    if (auto EC = Reader.readLongestContiguousChunk(Data)) {
+      llvm::consumeError(std::move(EC));
+      return 0;
+    }
+    return Data.data() - SectionContents.bytes_begin();
   }
 
   void printRelocatedField(StringRef Label, uint32_t RelocOffset,
@@ -669,14 +675,16 @@ void COFFDumper::printCOFFDebugDirectory() {
     W.printHex("AddressOfRawData", D.AddressOfRawData);
     W.printHex("PointerToRawData", D.PointerToRawData);
     if (D.Type == COFF::IMAGE_DEBUG_TYPE_CODEVIEW) {
-      const debug_pdb_info *PDBInfo;
+      const codeview::DebugInfo *DebugInfo;
       StringRef PDBFileName;
-      error(Obj->getDebugPDBInfo(&D, PDBInfo, PDBFileName));
+      error(Obj->getDebugPDBInfo(&D, DebugInfo, PDBFileName));
       DictScope PDBScope(W, "PDBInfo");
-      W.printHex("PDBSignature", PDBInfo->Signature);
-      W.printBinary("PDBGUID", makeArrayRef(PDBInfo->Guid));
-      W.printNumber("PDBAge", PDBInfo->Age);
-      W.printString("PDBFileName", PDBFileName);
+      W.printHex("PDBSignature", DebugInfo->Signature.CVSignature);
+      if (DebugInfo->Signature.CVSignature == OMF::Signature::PDB70) {
+        W.printBinary("PDBGUID", makeArrayRef(DebugInfo->PDB70.Signature));
+        W.printNumber("PDBAge", DebugInfo->PDB70.Age);
+        W.printString("PDBFileName", PDBFileName);
+      }
     } else {
       // FIXME: Type values of 12 and 13 are commonly observed but are not in
       // the documented type enum.  Figure out what they mean.
@@ -831,8 +839,10 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
     }
     case ModuleSubstreamKind::FrameData: {
       // First four bytes is a relocation against the function.
+      msf::ByteStream S(Contents);
+      msf::StreamReader SR(S);
       const uint32_t *CodePtr;
-      error(consumeObject(Contents, CodePtr));
+      error(SR.readObject(CodePtr));
       StringRef LinkageName;
       error(resolveSymbolName(Obj->getCOFFSection(Section), SectionContents,
                               CodePtr, LinkageName));
@@ -840,9 +850,9 @@ void COFFDumper::printCodeViewSymbolSection(StringRef SectionName,
 
       // To find the active frame description, search this array for the
       // smallest PC range that includes the current PC.
-      while (!Contents.empty()) {
+      while (!SR.empty()) {
         const FrameData *FD;
-        error(consumeObject(Contents, FD));
+        error(SR.readObject(FD));
 
         if (FD->FrameFunc >= CVStringTable.size())
           error(object_error::parse_failed);
@@ -953,7 +963,7 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
                                                         SectionContents);
 
   CVSymbolDumper CVSD(W, CVTD, std::move(CODD), opts::CodeViewSubsectionBytes);
-  ByteStream<> Stream(BinaryData);
+  ByteStream Stream(BinaryData);
   CVSymbolArray Symbols;
   StreamReader Reader(Stream);
   if (auto EC = Reader.readArray(Symbols, Reader.getLength())) {
@@ -962,19 +972,20 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
     error(object_error::parse_failed);
   }
 
-  if (!CVSD.dump(Symbols)) {
+  if (auto EC = CVSD.dump(Symbols)) {
     W.flush();
-    error(object_error::parse_failed);
+    error(std::move(EC));
   }
   W.flush();
 }
 
 void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) {
-  StringRef Data = Subsection;
-  while (!Data.empty()) {
+  msf::ByteStream S(Subsection);
+  msf::StreamReader SR(S);
+  while (!SR.empty()) {
     DictScope S(W, "FileChecksum");
     const FileChecksum *FC;
-    error(consumeObject(Data, FC));
+    error(SR.readObject(FC));
     if (FC->FileNameOffset >= CVStringTable.size())
       error(object_error::parse_failed);
     StringRef Filename =
@@ -983,27 +994,30 @@ void COFFDumper::printCodeViewFileChecksums(StringRef Subsection) {
     W.printHex("ChecksumSize", FC->ChecksumSize);
     W.printEnum("ChecksumKind", uint8_t(FC->ChecksumKind),
                 makeArrayRef(FileChecksumKindNames));
-    if (FC->ChecksumSize >= Data.size())
+    if (FC->ChecksumSize >= SR.bytesRemaining())
       error(object_error::parse_failed);
-    StringRef ChecksumBytes = Data.substr(0, FC->ChecksumSize);
+    ArrayRef<uint8_t> ChecksumBytes;
+    error(SR.readBytes(ChecksumBytes, FC->ChecksumSize));
     W.printBinary("ChecksumBytes", ChecksumBytes);
     unsigned PaddedSize = alignTo(FC->ChecksumSize + sizeof(FileChecksum), 4) -
                           sizeof(FileChecksum);
-    if (PaddedSize > Data.size())
+    PaddedSize -= ChecksumBytes.size();
+    if (PaddedSize > SR.bytesRemaining())
       error(object_error::parse_failed);
-    Data = Data.drop_front(PaddedSize);
+    error(SR.skip(PaddedSize));
   }
 }
 
 void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) {
-  StringRef Data = Subsection;
+  msf::ByteStream S(Subsection);
+  msf::StreamReader SR(S);
   uint32_t Signature;
-  error(consume(Data, Signature));
+  error(SR.readInteger(Signature));
   bool HasExtraFiles = Signature == unsigned(InlineeLinesSignature::ExtraFiles);
 
-  while (!Data.empty()) {
+  while (!SR.empty()) {
     const InlineeSourceLine *ISL;
-    error(consumeObject(Data, ISL));
+    error(SR.readObject(ISL));
     DictScope S(W, "InlineeSourceLine");
     printTypeIndex("Inlinee", ISL->Inlinee);
     printFileNameForOffset("FileID", ISL->FileID);
@@ -1011,12 +1025,12 @@ void COFFDumper::printCodeViewInlineeLines(StringRef Subsection) {
 
     if (HasExtraFiles) {
       uint32_t ExtraFileCount;
-      error(consume(Data, ExtraFileCount));
+      error(SR.readInteger(ExtraFileCount));
       W.printNumber("ExtraFileCount", ExtraFileCount);
       ListScope ExtraFiles(W, "ExtraFiles");
       for (unsigned I = 0; I < ExtraFileCount; ++I) {
         uint32_t FileID;
-        error(consume(Data, FileID));
+        error(SR.readInteger(FileID));
         printFileNameForOffset("FileID", FileID);
       }
     }
@@ -1048,7 +1062,7 @@ void COFFDumper::printFileNameForOffset(StringRef Label, uint32_t FileOffset) {
   W.printHex(Label, getFileNameForFileOffset(FileOffset), FileOffset);
 }
 
-void COFFDumper::mergeCodeViewTypes(MemoryTypeTableBuilder &CVTypes) {
+void COFFDumper::mergeCodeViewTypes(TypeTableBuilder &CVTypes) {
   for (const SectionRef &S : Obj->sections()) {
     StringRef SectionName;
     error(S.getName(SectionName));
@@ -1061,7 +1075,7 @@ void COFFDumper::mergeCodeViewTypes(MemoryTypeTableBuilder &CVTypes) {
         error(object_error::parse_failed);
       ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(Data.data()),
                               Data.size());
-      ByteStream<> Stream(Bytes);
+      ByteStream Stream(Bytes);
       CVTypeArray Types;
       StreamReader Reader(Stream);
       if (auto EC = Reader.readArray(Types, Reader.getLength())) {
@@ -1524,18 +1538,18 @@ void COFFDumper::printStackMap() const {
   if (Obj->isLittleEndian())
     prettyPrintStackMap(
                       llvm::outs(),
-                      StackMapV1Parser<support::little>(StackMapContentsArray));
+                      StackMapV2Parser<support::little>(StackMapContentsArray));
   else
     prettyPrintStackMap(llvm::outs(),
-                        StackMapV1Parser<support::big>(StackMapContentsArray));
+                        StackMapV2Parser<support::big>(StackMapContentsArray));
 }
 
-void llvm::dumpCodeViewMergedTypes(
-    ScopedPrinter &Writer, llvm::codeview::MemoryTypeTableBuilder &CVTypes) {
+void llvm::dumpCodeViewMergedTypes(ScopedPrinter &Writer,
+                                   llvm::codeview::TypeTableBuilder &CVTypes) {
   // Flatten it first, then run our dumper on it.
   ListScope S(Writer, "MergedTypeStream");
   SmallString<0> Buf;
-  CVTypes.ForEachRecord([&](TypeIndex TI, StringRef Record) {
+  CVTypes.ForEachRecord([&](TypeIndex TI, ArrayRef<uint8_t> Record) {
     Buf.append(Record.begin(), Record.end());
   });
   CVTypeDumper CVTD(&Writer, opts::CodeViewSubsectionBytes);
diff --git a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
index 06fbe8d3fccb..997af568d394 100644
--- a/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -56,6 +56,7 @@ using namespace ELF;
   typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;                          \
   typedef typename ELFO::Elf_Rel Elf_Rel;                                      \
   typedef typename ELFO::Elf_Rela Elf_Rela;                                    \
+  typedef typename ELFO::Elf_Rel_Range Elf_Rel_Range;                          \
   typedef typename ELFO::Elf_Rela_Range Elf_Rela_Range;                        \
   typedef typename ELFO::Elf_Phdr Elf_Phdr;                                    \
   typedef typename ELFO::Elf_Half Elf_Half;                                    \
@@ -63,6 +64,12 @@ using namespace ELF;
   typedef typename ELFO::Elf_Word Elf_Word;                                    \
   typedef typename ELFO::Elf_Hash Elf_Hash;                                    \
   typedef typename ELFO::Elf_GnuHash Elf_GnuHash;                              \
+  typedef typename ELFO::Elf_Sym_Range Elf_Sym_Range;                          \
+  typedef typename ELFO::Elf_Versym Elf_Versym;                                \
+  typedef typename ELFO::Elf_Verneed Elf_Verneed;                              \
+  typedef typename ELFO::Elf_Vernaux Elf_Vernaux;                              \
+  typedef typename ELFO::Elf_Verdef Elf_Verdef;                                \
+  typedef typename ELFO::Elf_Verdaux Elf_Verdaux;                              \
   typedef typename ELFO::uintX_t uintX_t;
 
 namespace {
@@ -122,34 +129,17 @@ public:
   void printMipsReginfo() override;
   void printMipsOptions() override;
 
+  void printAMDGPURuntimeMD() override;
+
   void printStackMap() const override;
 
   void printHashHistogram() override;
 
+  void printNotes() override;
+
 private:
   std::unique_ptr<DumpStyle<ELFT>> ELFDumperStyle;
-  typedef ELFFile<ELFT> ELFO;
-  typedef typename ELFO::Elf_Shdr Elf_Shdr;
-  typedef typename ELFO::Elf_Sym Elf_Sym;
-  typedef typename ELFO::Elf_Sym_Range Elf_Sym_Range;
-  typedef typename ELFO::Elf_Dyn Elf_Dyn;
-  typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;
-  typedef typename ELFO::Elf_Rel Elf_Rel;
-  typedef typename ELFO::Elf_Rela Elf_Rela;
-  typedef typename ELFO::Elf_Rel_Range Elf_Rel_Range;
-  typedef typename ELFO::Elf_Rela_Range Elf_Rela_Range;
-  typedef typename ELFO::Elf_Phdr Elf_Phdr;
-  typedef typename ELFO::Elf_Half Elf_Half;
-  typedef typename ELFO::Elf_Hash Elf_Hash;
-  typedef typename ELFO::Elf_GnuHash Elf_GnuHash;
-  typedef typename ELFO::Elf_Ehdr Elf_Ehdr;
-  typedef typename ELFO::Elf_Word Elf_Word;
-  typedef typename ELFO::uintX_t uintX_t;
-  typedef typename ELFO::Elf_Versym Elf_Versym;
-  typedef typename ELFO::Elf_Verneed Elf_Verneed;
-  typedef typename ELFO::Elf_Vernaux Elf_Vernaux;
-  typedef typename ELFO::Elf_Verdef Elf_Verdef;
-  typedef typename ELFO::Elf_Verdaux Elf_Verdaux;
+  TYPEDEF_ELF_TYPES(ELFT)
 
   DynRegionInfo checkDRI(DynRegionInfo DRI) {
     if (DRI.Addr < Obj->base() ||
@@ -258,7 +248,7 @@ void ELFDumper<ELFT>::printSymbolsHelper(bool IsDynamic) const {
     if (!DotSymtabSec)
       return;
     StrTable = unwrapOrError(Obj->getStringTableForSymtab(*DotSymtabSec));
-    Syms = Obj->symbols(DotSymtabSec);
+    Syms = unwrapOrError(Obj->symbols(DotSymtabSec));
     SymtabName = unwrapOrError(Obj->getSectionName(DotSymtabSec));
     Entries = DotSymtabSec->getEntityCount();
   }
@@ -292,6 +282,7 @@ public:
                            bool IsDynamic) = 0;
   virtual void printProgramHeaders(const ELFFile<ELFT> *Obj) = 0;
   virtual void printHashHistogram(const ELFFile<ELFT> *Obj) = 0;
+  virtual void printNotes(const ELFFile<ELFT> *Obj) = 0;
   const ELFDumper<ELFT> *dumper() const { return Dumper; }
 private:
   const ELFDumper<ELFT> *Dumper;
@@ -314,6 +305,7 @@ public:
                                   size_t Offset) override;
   void printProgramHeaders(const ELFO *Obj) override;
   void printHashHistogram(const ELFFile<ELFT> *Obj) override;
+  void printNotes(const ELFFile<ELFT> *Obj) override;
 
 private:
   struct Field {
@@ -338,6 +330,8 @@ private:
     OS.flush();
     return OS;
   }
+  void printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym, uint32_t Sym,
+                         StringRef StrTable, uint32_t Bucket);
   void printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
                        const Elf_Rela &R, bool IsRela);
   void printSymbol(const ELFO *Obj, const Elf_Sym *Symbol, const Elf_Sym *First,
@@ -367,6 +361,7 @@ public:
   void printDynamicRelocations(const ELFO *Obj) override;
   void printProgramHeaders(const ELFO *Obj) override;
   void printHashHistogram(const ELFFile<ELFT> *Obj) override;
+  void printNotes(const ELFFile<ELFT> *Obj) override;
 
 private:
   void printRelocation(const ELFO *Obj, Elf_Rela Rel, const Elf_Shdr *SymTab);
@@ -656,8 +651,8 @@ StringRef ELFDumper<ELFT>::getSymbolVersion(StringRef StrTab,
                        sizeof(Elf_Sym);
 
   // Get the corresponding version index entry
-  const Elf_Versym *vs =
-      Obj->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index);
+  const Elf_Versym *vs = unwrapOrError(
+      Obj->template getEntry<Elf_Versym>(dot_gnu_version_sec, entry_index));
   size_t version_index = vs->vs_index & ELF::VERSYM_VERSION;
 
   // Special markers for unversioned symbols.
@@ -705,11 +700,11 @@ std::string ELFDumper<ELFT>::getFullSymbolName(const Elf_Sym *Symbol,
   return FullSymbolName;
 }
 
-template <typename ELFO>
+template <typename ELFT>
 static void
-getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
-                    const typename ELFO::Elf_Sym *FirstSym,
-                    ArrayRef<typename ELFO::Elf_Word> ShndxTable,
+getSectionNameIndex(const ELFFile<ELFT> &Obj, const typename ELFT::Sym *Symbol,
+                    const typename ELFT::Sym *FirstSym,
+                    ArrayRef<typename ELFT::Word> ShndxTable,
                     StringRef &SectionName, unsigned &SectionIndex) {
   SectionIndex = Symbol->st_shndx;
   if (Symbol->isUndefined())
@@ -726,9 +721,9 @@ getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
     SectionName = "Reserved";
   else {
     if (SectionIndex == SHN_XINDEX)
-      SectionIndex =
-          Obj.getExtendedSymbolTableIndex(Symbol, FirstSym, ShndxTable);
-    const typename ELFO::Elf_Shdr *Sec =
+      SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
+          Symbol, FirstSym, ShndxTable));
+    const typename ELFT::Shdr *Sec =
         unwrapOrError(Obj.getSection(SectionIndex));
     SectionName = unwrapOrError(Obj.getSectionName(Sec));
   }
@@ -737,7 +732,7 @@ getSectionNameIndex(const ELFO &Obj, const typename ELFO::Elf_Sym *Symbol,
 template <class ELFO>
 static const typename ELFO::Elf_Shdr *
 findNotEmptySectionByAddress(const ELFO *Obj, uint64_t Addr) {
-  for (const auto &Shdr : Obj->sections())
+  for (const auto &Shdr : unwrapOrError(Obj->sections()))
     if (Shdr.sh_addr == Addr && Shdr.sh_size > 0)
       return &Shdr;
   return nullptr;
@@ -746,7 +741,7 @@ findNotEmptySectionByAddress(const ELFO *Obj, uint64_t Addr) {
 template <class ELFO>
 static const typename ELFO::Elf_Shdr *findSectionByName(const ELFO &Obj,
                                                         StringRef Name) {
-  for (const auto &Shdr : Obj.sections()) {
+  for (const auto &Shdr : unwrapOrError(Obj.sections())) {
     if (Name == unwrapOrError(Obj.getSectionName(&Shdr)))
       return &Shdr;
   }
@@ -954,6 +949,7 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
   ENUM_ENT(EM_78KOR,         "EM_78KOR"),
   ENUM_ENT(EM_56800EX,       "EM_56800EX"),
   ENUM_ENT(EM_AMDGPU,        "EM_AMDGPU"),
+  ENUM_ENT(EM_RISCV,         "RISC-V"),
   ENUM_ENT(EM_WEBASSEMBLY,   "EM_WEBASSEMBLY"),
   ENUM_ENT(EM_LANAI,         "EM_LANAI"),
   ENUM_ENT(EM_BPF,           "EM_BPF"),
@@ -1073,6 +1069,10 @@ static const EnumEntry<unsigned> ElfAMDGPUSectionFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, SHF_AMDGPU_HSA_AGENT)
 };
 
+static const EnumEntry<unsigned> ElfARMSectionFlags[] = {
+  LLVM_READOBJ_ENUM_ENT(ELF, SHF_ARM_PURECODE)
+};
+
 static const EnumEntry<unsigned> ElfHexagonSectionFlags[] = {
   LLVM_READOBJ_ENUM_ENT(ELF, SHF_HEX_GPREL)
 };
@@ -1163,6 +1163,11 @@ static const char *getElfSegmentType(unsigned Arch, unsigned Type) {
 
   LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_STACK);
   LLVM_READOBJ_ENUM_CASE(ELF, PT_GNU_RELRO);
+
+  LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_RANDOMIZE);
+  LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_WXNEEDED);
+  LLVM_READOBJ_ENUM_CASE(ELF, PT_OPENBSD_BOOTDATA);
+
   default: return "";
   }
 }
@@ -1309,7 +1314,7 @@ ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
     : ObjDumper(Writer), Obj(Obj) {
 
   SmallVector<const Elf_Phdr *, 4> LoadSegments;
-  for (const Elf_Phdr &Phdr : Obj->program_headers()) {
+  for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     if (Phdr.p_type == ELF::PT_DYNAMIC) {
       DynamicTable = createDRIFrom(&Phdr, sizeof(Elf_Dyn));
       continue;
@@ -1319,7 +1324,7 @@ ELFDumper<ELFT>::ELFDumper(const ELFFile<ELFT> *Obj, ScopedPrinter &Writer)
     LoadSegments.push_back(&Phdr);
   }
 
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     switch (Sec.sh_type) {
     case ELF::SHT_SYMTAB:
       if (DotSymtabSec != nullptr)
@@ -1491,10 +1496,37 @@ void ELFDumper<ELFT>::printDynamicSymbols() {
 template <class ELFT> void ELFDumper<ELFT>::printHashHistogram() {
   ELFDumperStyle->printHashHistogram(Obj);
 }
+
+template <class ELFT> void ELFDumper<ELFT>::printNotes() {
+  ELFDumperStyle->printNotes(Obj);
+}
+
 #define LLVM_READOBJ_TYPE_CASE(name) \
   case DT_##name: return #name
 
-static const char *getTypeString(uint64_t Type) {
+static const char *getTypeString(unsigned Arch, uint64_t Type) {
+  switch (Arch) {
+  case EM_HEXAGON:
+    switch (Type) {
+    LLVM_READOBJ_TYPE_CASE(HEXAGON_SYMSZ);
+    LLVM_READOBJ_TYPE_CASE(HEXAGON_VER);
+    LLVM_READOBJ_TYPE_CASE(HEXAGON_PLT);
+    }
+  case EM_MIPS:
+    switch (Type) {
+    LLVM_READOBJ_TYPE_CASE(MIPS_RLD_MAP_REL);
+    LLVM_READOBJ_TYPE_CASE(MIPS_RLD_VERSION);
+    LLVM_READOBJ_TYPE_CASE(MIPS_FLAGS);
+    LLVM_READOBJ_TYPE_CASE(MIPS_BASE_ADDRESS);
+    LLVM_READOBJ_TYPE_CASE(MIPS_LOCAL_GOTNO);
+    LLVM_READOBJ_TYPE_CASE(MIPS_SYMTABNO);
+    LLVM_READOBJ_TYPE_CASE(MIPS_UNREFEXTNO);
+    LLVM_READOBJ_TYPE_CASE(MIPS_GOTSYM);
+    LLVM_READOBJ_TYPE_CASE(MIPS_RLD_MAP);
+    LLVM_READOBJ_TYPE_CASE(MIPS_PLTGOT);
+    LLVM_READOBJ_TYPE_CASE(MIPS_OPTIONS);
+    }
+  }
   switch (Type) {
   LLVM_READOBJ_TYPE_CASE(BIND_NOW);
   LLVM_READOBJ_TYPE_CASE(DEBUG);
@@ -1540,17 +1572,7 @@ static const char *getTypeString(uint64_t Type) {
   LLVM_READOBJ_TYPE_CASE(GNU_HASH);
   LLVM_READOBJ_TYPE_CASE(TLSDESC_PLT);
   LLVM_READOBJ_TYPE_CASE(TLSDESC_GOT);
-  LLVM_READOBJ_TYPE_CASE(MIPS_RLD_VERSION);
-  LLVM_READOBJ_TYPE_CASE(MIPS_RLD_MAP_REL);
-  LLVM_READOBJ_TYPE_CASE(MIPS_FLAGS);
-  LLVM_READOBJ_TYPE_CASE(MIPS_BASE_ADDRESS);
-  LLVM_READOBJ_TYPE_CASE(MIPS_LOCAL_GOTNO);
-  LLVM_READOBJ_TYPE_CASE(MIPS_SYMTABNO);
-  LLVM_READOBJ_TYPE_CASE(MIPS_UNREFEXTNO);
-  LLVM_READOBJ_TYPE_CASE(MIPS_GOTSYM);
-  LLVM_READOBJ_TYPE_CASE(MIPS_RLD_MAP);
-  LLVM_READOBJ_TYPE_CASE(MIPS_PLTGOT);
-  LLVM_READOBJ_TYPE_CASE(MIPS_OPTIONS);
+  LLVM_READOBJ_TYPE_CASE(AUXILIARY);
   default: return "unknown";
   }
 }
@@ -1656,7 +1678,7 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
       OS << "RELA";
       break;
     }
-  // Fallthrough.
+    LLVM_FALLTHROUGH;
   case DT_PLTGOT:
   case DT_HASH:
   case DT_STRTAB:
@@ -1711,6 +1733,9 @@ void ELFDumper<ELFT>::printValue(uint64_t Type, uint64_t Value) {
   case DT_SONAME:
     OS << "LibrarySoname (" << getDynamicString(Value) << ")";
     break;
+  case DT_AUXILIARY:
+    OS << "Auxiliary library: [" << getDynamicString(Value) << "]";
+    break;
   case DT_RPATH:
   case DT_RUNPATH:
     OS << getDynamicString(Value);
@@ -1779,7 +1804,7 @@ void ELFDumper<ELFT>::printDynamicTable() {
     uintX_t Tag = Entry.getTag();
     ++I;
     W.startLine() << "  " << format_hex(Tag, Is64 ? 18 : 10, opts::Output != opts::GNU) << " "
-                  << format("%-21s", getTypeString(Tag));
+                  << format("%-21s", getTypeString(Obj->getHeader()->e_machine, Tag));
     printValue(Tag, Entry.getVal());
     OS << "\n";
   }
@@ -1852,7 +1877,7 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
   }
 
   DictScope BA(W, "BuildAttributes");
-  for (const ELFO::Elf_Shdr &Sec : Obj->sections()) {
+  for (const ELFO::Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     if (Sec.sh_type != ELF::SHT_ARM_ATTRIBUTES)
       continue;
 
@@ -1875,14 +1900,8 @@ template <> void ELFDumper<ELFType<support::little, false>>::printAttributes() {
 namespace {
 template <class ELFT> class MipsGOTParser {
 public:
-  typedef object::ELFFile<ELFT> ELFO;
-  typedef typename ELFO::Elf_Shdr Elf_Shdr;
-  typedef typename ELFO::Elf_Sym Elf_Sym;
-  typedef typename ELFO::Elf_Dyn_Range Elf_Dyn_Range;
+  TYPEDEF_ELF_TYPES(ELFT)
   typedef typename ELFO::Elf_Addr GOTEntry;
-  typedef typename ELFO::Elf_Rel Elf_Rel;
-  typedef typename ELFO::Elf_Rela Elf_Rela;
-
   MipsGOTParser(ELFDumper<ELFT> *Dumper, const ELFO *Obj,
                 Elf_Dyn_Range DynTable, ScopedPrinter &W);
 
@@ -2068,19 +2087,21 @@ template <class ELFT> void MipsGOTParser<ELFT>::parsePLT() {
 
     switch (PLTRelShdr->sh_type) {
     case ELF::SHT_REL:
-      for (const Elf_Rel *RI = Obj->rel_begin(PLTRelShdr),
-                         *RE = Obj->rel_end(PLTRelShdr);
-           RI != RE && It != PLTEnd; ++RI, ++It) {
-        const Elf_Sym *Sym = Obj->getRelocationSymbol(&*RI, SymTable);
+      for (const Elf_Rel &Rel : unwrapOrError(Obj->rels(PLTRelShdr))) {
+        const Elf_Sym *Sym =
+            unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTable));
         printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, StrTable, Sym);
+        if (++It == PLTEnd)
+          break;
       }
       break;
     case ELF::SHT_RELA:
-      for (const Elf_Rela *RI = Obj->rela_begin(PLTRelShdr),
-                          *RE = Obj->rela_end(PLTRelShdr);
-           RI != RE && It != PLTEnd; ++RI, ++It) {
-        const Elf_Sym *Sym = Obj->getRelocationSymbol(&*RI, SymTable);
+      for (const Elf_Rela &Rel : unwrapOrError(Obj->relas(PLTRelShdr))) {
+        const Elf_Sym *Sym =
+            unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTable));
         printPLTEntry(PLTShdr->sh_addr, PLTBegin, It, StrTable, Sym);
+        if (++It == PLTEnd)
+          break;
       }
       break;
     }
@@ -2335,9 +2356,39 @@ template <class ELFT> void ELFDumper<ELFT>::printMipsOptions() {
   }
 }
 
+template <class ELFT> void ELFDumper<ELFT>::printAMDGPURuntimeMD() {
+  const Elf_Shdr *Shdr = findSectionByName(*Obj, ".note");
+  if (!Shdr) {
+    W.startLine() << "There is no .note section in the file.\n";
+    return;
+  }
+  ArrayRef<uint8_t> Sec = unwrapOrError(Obj->getSectionContents(Shdr));
+
+  const uint32_t RuntimeMDNoteType = 7;
+  for (auto I = reinterpret_cast<const Elf_Word *>(&Sec[0]),
+       E = I + Sec.size()/4; I != E;) {
+    uint32_t NameSZ = I[0];
+    uint32_t DescSZ = I[1];
+    uint32_t Type = I[2];
+    I += 3;
+
+    StringRef Name;
+    if (NameSZ) {
+      Name = StringRef(reinterpret_cast<const char *>(I), NameSZ - 1);
+      I += alignTo<4>(NameSZ)/4;
+    }
+
+    if (Name == "AMD" && Type == RuntimeMDNoteType) {
+      StringRef Desc(reinterpret_cast<const char *>(I), DescSZ);
+      W.printString(Desc);
+    }
+    I += alignTo<4>(DescSZ)/4;
+  }
+}
+
 template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
   const Elf_Shdr *StackMapSection = nullptr;
-  for (const auto &Sec : Obj->sections()) {
+  for (const auto &Sec : unwrapOrError(Obj->sections())) {
     StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
     if (Name == ".llvm_stackmaps") {
       StackMapSection = &Sec;
@@ -2352,7 +2403,7 @@ template <class ELFT> void ELFDumper<ELFT>::printStackMap() const {
   ArrayRef<uint8_t> StackMapContentsArray =
       unwrapOrError(Obj->getSectionContents(StackMapSection));
 
-  prettyPrintStackMap(llvm::outs(), StackMapV1Parser<ELFT::TargetEndianness>(
+  prettyPrintStackMap(llvm::outs(), StackMapV2Parser<ELFT::TargetEndianness>(
                                         StackMapContentsArray));
 }
 
@@ -2390,8 +2441,7 @@ template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
   OS << "\n";
   Str = printEnum(e->e_ident[ELF::EI_OSABI], makeArrayRef(ElfOSABI));
   printFields(OS, "OS/ABI:", Str);
-  Str = "0x" + to_hexString(e->e_version);
-  Str = to_hexString(e->e_ident[ELF::EI_ABIVERSION]);
+  Str = "0x" + to_hexString(e->e_ident[ELF::EI_ABIVERSION]);
   printFields(OS, "ABI Version:", Str);
   Str = printEnum(e->e_type, makeArrayRef(ElfObjectFileType));
   printFields(OS, "Type:", Str);
@@ -2424,13 +2474,13 @@ template <class ELFT> void GNUStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
 template <class ELFT> void GNUStyle<ELFT>::printGroupSections(const ELFO *Obj) {
   uint32_t SectionIndex = 0;
   bool HasGroups = false;
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     if (Sec.sh_type == ELF::SHT_GROUP) {
       HasGroups = true;
       const Elf_Shdr *Symtab = unwrapOrError(Obj->getSection(Sec.sh_link));
       StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab));
       const Elf_Sym *Signature =
-          Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info);
+          unwrapOrError(Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info));
       ArrayRef<Elf_Word> Data = unwrapOrError(
           Obj->template getSectionContentsAsArray<Elf_Word>(&Sec));
       StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
@@ -2467,7 +2517,7 @@ void GNUStyle<ELFT>::printRelocation(const ELFO *Obj, const Elf_Shdr *SymTab,
   // fixed width.
   Field Fields[5] = {0, 10 + Bias, 19 + 2 * Bias, 42 + 2 * Bias, 53 + 2 * Bias};
   Obj->getRelocationTypeName(R.getType(Obj->isMips64EL()), RelocName);
-  Sym = Obj->getRelocationSymbol(&R, SymTab);
+  Sym = unwrapOrError(Obj->getRelocationSymbol(&R, SymTab));
   if (Sym && Sym->getType() == ELF::STT_SECTION) {
     const Elf_Shdr *Sec = unwrapOrError(
         Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable()));
@@ -2518,7 +2568,7 @@ static inline void printRelocHeader(raw_ostream &OS, bool Is64, bool IsRela) {
 
 template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
   bool HasRelocSections = false;
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA)
       continue;
     HasRelocSections = true;
@@ -2531,7 +2581,7 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
     printRelocHeader(OS,  ELFT::Is64Bits, (Sec.sh_type == ELF::SHT_RELA));
     const Elf_Shdr *SymTab = unwrapOrError(Obj->getSection(Sec.sh_link));
     if (Sec.sh_type == ELF::SHT_REL) {
-      for (const auto &R : Obj->rels(&Sec)) {
+      for (const auto &R : unwrapOrError(Obj->rels(&Sec))) {
         Elf_Rela Rela;
         Rela.r_offset = R.r_offset;
         Rela.r_info = R.r_info;
@@ -2539,7 +2589,7 @@ template <class ELFT> void GNUStyle<ELFT>::printRelocations(const ELFO *Obj) {
         printRelocation(Obj, SymTab, Rela, false);
       }
     } else {
-      for (const auto &R : Obj->relas(&Sec))
+      for (const auto &R : unwrapOrError(Obj->relas(&Sec)))
         printRelocation(Obj, SymTab, R, true);
     }
   }
@@ -2664,7 +2714,7 @@ template <class ELFT> void GNUStyle<ELFT>::printSections(const ELFO *Obj) {
     printField(f);
   OS << "\n";
 
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     Number = to_string(SectionIndex);
     Fields[0].Str = Number;
     Fields[1].Str = unwrapOrError(Obj->getSectionName(&Sec));
@@ -2738,8 +2788,8 @@ std::string GNUStyle<ELFT>::getSymbolSectionNdx(const ELFO *Obj,
   case ELF::SHN_COMMON:
     return "COM";
   case ELF::SHN_XINDEX:
-    SectionIndex = Obj->getExtendedSymbolTableIndex(
-        Symbol, FirstSym, this->dumper()->getShndxTable());
+    SectionIndex = unwrapOrError(object::getExtendedSymbolTableIndex<ELFT>(
+        Symbol, FirstSym, this->dumper()->getShndxTable()));
   default:
     // Find if:
     // Processor specific
@@ -2812,15 +2862,120 @@ void GNUStyle<ELFT>::printSymbol(const ELFO *Obj, const Elf_Sym *Symbol,
     printField(Entry);
   OS << "\n";
 }
+template <class ELFT>
+void GNUStyle<ELFT>::printHashedSymbol(const ELFO *Obj, const Elf_Sym *FirstSym,
+                                       uint32_t Sym, StringRef StrTable,
+                                       uint32_t Bucket) {
+  std::string Num, Buc, Name, Value, Size, Binding, Type, Visibility, Section;
+  unsigned Width, Bias = 0;
+  if (ELFT::Is64Bits) {
+    Bias = 8;
+    Width = 16;
+  } else {
+    Bias = 0;
+    Width = 8;
+  }
+  Field Fields[9] = {0,         6,         11,        20 + Bias, 25 + Bias,
+                     34 + Bias, 41 + Bias, 49 + Bias, 53 + Bias};
+  Num = to_string(format_decimal(Sym, 5));
+  Buc = to_string(format_decimal(Bucket, 3)) + ":";
+
+  const auto Symbol = FirstSym + Sym;
+  Value = to_string(format_hex_no_prefix(Symbol->st_value, Width));
+  Size = to_string(format_decimal(Symbol->st_size, 5));
+  unsigned char SymbolType = Symbol->getType();
+  if (Obj->getHeader()->e_machine == ELF::EM_AMDGPU &&
+      SymbolType >= ELF::STT_LOOS && SymbolType < ELF::STT_HIOS)
+    Type = printEnum(SymbolType, makeArrayRef(AMDGPUSymbolTypes));
+  else
+    Type = printEnum(SymbolType, makeArrayRef(ElfSymbolTypes));
+  unsigned Vis = Symbol->getVisibility();
+  Binding = printEnum(Symbol->getBinding(), makeArrayRef(ElfSymbolBindings));
+  Visibility = printEnum(Vis, makeArrayRef(ElfSymbolVisibilities));
+  Section = getSymbolSectionNdx(Obj, Symbol, FirstSym);
+  Name = this->dumper()->getFullSymbolName(Symbol, StrTable, true);
+  Fields[0].Str = Num;
+  Fields[1].Str = Buc;
+  Fields[2].Str = Value;
+  Fields[3].Str = Size;
+  Fields[4].Str = Type;
+  Fields[5].Str = Binding;
+  Fields[6].Str = Visibility;
+  Fields[7].Str = Section;
+  Fields[8].Str = Name;
+  for (auto &Entry : Fields)
+    printField(Entry);
+  OS << "\n";
+}
 
 template <class ELFT> void GNUStyle<ELFT>::printSymbols(const ELFO *Obj) {
+  if (opts::DynamicSymbols)
+    return;
   this->dumper()->printSymbolsHelper(true);
   this->dumper()->printSymbolsHelper(false);
 }
 
 template <class ELFT>
 void GNUStyle<ELFT>::printDynamicSymbols(const ELFO *Obj) {
-  this->dumper()->printSymbolsHelper(true);
+  if (this->dumper()->getDynamicStringTable().size() == 0)
+    return;
+  auto StringTable = this->dumper()->getDynamicStringTable();
+  auto DynSyms = this->dumper()->dynamic_symbols();
+  auto GnuHash = this->dumper()->getGnuHashTable();
+  auto SysVHash = this->dumper()->getHashTable();
+
+  // If no hash or .gnu.hash found, try using symbol table
+  if (GnuHash == nullptr && SysVHash == nullptr)
+    this->dumper()->printSymbolsHelper(true);
+
+  // Try printing .hash
+  if (this->dumper()->getHashTable()) {
+    OS << "\n Symbol table of .hash for image:\n";
+    if (ELFT::Is64Bits)
+      OS << "  Num Buc:    Value          Size   Type   Bind Vis      Ndx Name";
+    else
+      OS << "  Num Buc:    Value  Size   Type   Bind Vis      Ndx Name";
+    OS << "\n";
+
+    uint32_t NBuckets = SysVHash->nbucket;
+    uint32_t NChains = SysVHash->nchain;
+    auto Buckets = SysVHash->buckets();
+    auto Chains = SysVHash->chains();
+    for (uint32_t Buc = 0; Buc < NBuckets; Buc++) {
+      if (Buckets[Buc] == ELF::STN_UNDEF)
+        continue;
+      for (uint32_t Ch = Buckets[Buc]; Ch < NChains; Ch = Chains[Ch]) {
+        if (Ch == ELF::STN_UNDEF)
+          break;
+        printHashedSymbol(Obj, &DynSyms[0], Ch, StringTable, Buc);
+      }
+    }
+  }
+
+  // Try printing .gnu.hash
+  if (GnuHash) {
+    OS << "\n Symbol table of .gnu.hash for image:\n";
+    if (ELFT::Is64Bits)
+      OS << "  Num Buc:    Value          Size   Type   Bind Vis      Ndx Name";
+    else
+      OS << "  Num Buc:    Value  Size   Type   Bind Vis      Ndx Name";
+    OS << "\n";
+    uint32_t NBuckets = GnuHash->nbuckets;
+    auto Buckets = GnuHash->buckets();
+    for (uint32_t Buc = 0; Buc < NBuckets; Buc++) {
+      if (Buckets[Buc] == ELF::STN_UNDEF)
+        continue;
+      uint32_t Index = Buckets[Buc];
+      uint32_t GnuHashable = Index - GnuHash->symndx;
+      // Print whole chain
+      while (true) {
+        printHashedSymbol(Obj, &DynSyms[0], Index++, StringTable, Buc);
+        // Chain ends at symbol with stopper bit
+        if ((GnuHash->values(DynSyms.size())[GnuHashable++] & 1) == 1)
+          break;
+      }
+    }
+  }
 }
 
 static inline std::string printPhdrFlags(unsigned Flag) {
@@ -2912,7 +3067,7 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   else
     OS << "  Type           Offset   VirtAddr   PhysAddr   FileSiz "
        << "MemSiz  Flg Align\n";
-  for (const auto &Phdr : Obj->program_headers()) {
+  for (const auto &Phdr : unwrapOrError(Obj->program_headers())) {
     Type = getElfPtType(Header->e_machine, Phdr.p_type);
     Offset = to_string(format_hex(Phdr.p_offset, 8));
     VMA = to_string(format_hex(Phdr.p_vaddr, Width));
@@ -2939,10 +3094,10 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   }
   OS << "\n Section to Segment mapping:\n  Segment Sections...\n";
   int Phnum = 0;
-  for (const Elf_Phdr &Phdr : Obj->program_headers()) {
+  for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     std::string Sections;
     OS << format("   %2.2d     ", Phnum++);
-    for (const Elf_Shdr &Sec : Obj->sections()) {
+    for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
       // Check if each section is in a segment and then print mapping.
       // readelf additionally makes sure it does not print zero sized sections
       // at end of segments and for PT_DYNAMIC both start and end of section
@@ -3161,6 +3316,125 @@ void GNUStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
   }
 }
 
+static std::string getGNUNoteTypeName(const uint32_t NT) {
+  static const struct {
+    uint32_t ID;
+    const char *Name;
+  } Notes[] = {
+      {ELF::NT_GNU_ABI_TAG, "NT_GNU_ABI_TAG (ABI version tag)"},
+      {ELF::NT_GNU_HWCAP, "NT_GNU_HWCAP (DSO-supplied software HWCAP info)"},
+      {ELF::NT_GNU_BUILD_ID, "NT_GNU_BUILD_ID (unique build ID bitstring)"},
+      {ELF::NT_GNU_GOLD_VERSION, "NT_GNU_GOLD_VERSION (gold version)"},
+  };
+
+  for (const auto &Note : Notes)
+    if (Note.ID == NT)
+      return std::string(Note.Name);
+
+  std::string string;
+  raw_string_ostream OS(string);
+  OS << format("Unknown note type (0x%08x)", NT);
+  return string;
+}
+
+template <typename ELFT>
+static void printGNUNote(raw_ostream &OS, uint32_t NoteType,
+                         ArrayRef<typename ELFFile<ELFT>::Elf_Word> Words) {
+  switch (NoteType) {
+  default:
+    return;
+  case ELF::NT_GNU_ABI_TAG: {
+    static const char *OSNames[] = {
+        "Linux", "Hurd", "Solaris", "FreeBSD", "NetBSD", "Syllable", "NaCl",
+    };
+
+    StringRef OSName = "Unknown";
+    if (Words[0] < array_lengthof(OSNames))
+      OSName = OSNames[Words[0]];
+    uint32_t Major = Words[1], Minor = Words[2], Patch = Words[3];
+
+    if (Words.size() < 4)
+      OS << "    <corrupt GNU_ABI_TAG>";
+    else
+      OS << "    OS: " << OSName << ", ABI: " << Major << "." << Minor << "."
+         << Patch;
+    break;
+  }
+  case ELF::NT_GNU_BUILD_ID: {
+    OS << "    Build ID: ";
+    ArrayRef<uint8_t> ID(reinterpret_cast<const uint8_t *>(Words.data()),
+                         Words.size() * 4);
+    for (const auto &B : ID)
+      OS << format_hex_no_prefix(B, 2);
+    break;
+  }
+  case ELF::NT_GNU_GOLD_VERSION:
+    OS << "    Version: "
+       << StringRef(reinterpret_cast<const char *>(Words.data()),
+                    Words.size() * 4);
+    break;
+  }
+
+  OS << '\n';
+}
+
+template <class ELFT>
+void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
+  const Elf_Ehdr *e = Obj->getHeader();
+  bool IsCore = e->e_type == ELF::ET_CORE;
+
+  auto process = [&](const typename ELFFile<ELFT>::Elf_Off Offset,
+                     const typename ELFFile<ELFT>::Elf_Addr Size) {
+    if (Size <= 0)
+      return;
+
+    const auto *P = static_cast<const uint8_t *>(Obj->base() + Offset);
+    const auto *E = P + Size;
+
+    OS << "Displaying notes found at file offset " << format_hex(Offset, 10)
+       << " with length " << format_hex(Size, 10) << ":\n"
+       << "  Owner                 Data size\tDescription\n";
+
+    while (P < E) {
+      const Elf_Word *Words = reinterpret_cast<const Elf_Word *>(&P[0]);
+
+      uint32_t NameSize = Words[0];
+      uint32_t DescriptorSize = Words[1];
+      uint32_t Type = Words[2];
+
+      ArrayRef<Elf_Word> Descriptor(&Words[3 + (alignTo<4>(NameSize) / 4)],
+                                    alignTo<4>(DescriptorSize) / 4);
+
+      StringRef Name;
+      if (NameSize)
+        Name =
+            StringRef(reinterpret_cast<const char *>(&Words[3]), NameSize - 1);
+
+      OS << "  " << Name << std::string(22 - NameSize, ' ')
+         << format_hex(DescriptorSize, 10) << '\t';
+
+      if (Name == "GNU") {
+        OS << getGNUNoteTypeName(Type) << '\n';
+        printGNUNote<ELFT>(OS, Type, Descriptor);
+      }
+      OS << '\n';
+
+      P = P + 3 * sizeof(Elf_Word) * alignTo<4>(NameSize) +
+          alignTo<4>(DescriptorSize);
+    }
+  };
+
+  if (IsCore) {
+    for (const auto &P : unwrapOrError(Obj->program_headers()))
+      if (P.p_type == PT_NOTE)
+        process(P.p_offset, P.p_filesz);
+  } else {
+    for (const auto &S : unwrapOrError(Obj->sections()))
+      if (S.sh_type == SHT_NOTE)
+        process(S.sh_offset, S.sh_size);
+  }
+}
+
 template <class ELFT> void LLVMStyle<ELFT>::printFileHeaders(const ELFO *Obj) {
   const Elf_Ehdr *e = Obj->getHeader();
   {
@@ -3210,12 +3484,13 @@ void LLVMStyle<ELFT>::printGroupSections(const ELFO *Obj) {
   DictScope Lists(W, "Groups");
   uint32_t SectionIndex = 0;
   bool HasGroups = false;
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     if (Sec.sh_type == ELF::SHT_GROUP) {
       HasGroups = true;
       const Elf_Shdr *Symtab = unwrapOrError(Obj->getSection(Sec.sh_link));
       StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab));
-      const Elf_Sym *Sym = Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info);
+      const Elf_Sym *Sym =
+          unwrapOrError(Obj->template getEntry<Elf_Sym>(Symtab, Sec.sh_info));
       auto Data = unwrapOrError(
           Obj->template getSectionContentsAsArray<Elf_Word>(&Sec));
       DictScope D(W, "Group");
@@ -3244,7 +3519,7 @@ template <class ELFT> void LLVMStyle<ELFT>::printRelocations(const ELFO *Obj) {
   ListScope D(W, "Relocations");
 
   int SectionNumber = -1;
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     ++SectionNumber;
 
     if (Sec.sh_type != ELF::SHT_REL && Sec.sh_type != ELF::SHT_RELA)
@@ -3268,7 +3543,7 @@ void LLVMStyle<ELFT>::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) {
 
   switch (Sec->sh_type) {
   case ELF::SHT_REL:
-    for (const Elf_Rel &R : Obj->rels(Sec)) {
+    for (const Elf_Rel &R : unwrapOrError(Obj->rels(Sec))) {
       Elf_Rela Rela;
       Rela.r_offset = R.r_offset;
       Rela.r_info = R.r_info;
@@ -3277,7 +3552,7 @@ void LLVMStyle<ELFT>::printRelocations(const Elf_Shdr *Sec, const ELFO *Obj) {
     }
     break;
   case ELF::SHT_RELA:
-    for (const Elf_Rela &R : Obj->relas(Sec))
+    for (const Elf_Rela &R : unwrapOrError(Obj->relas(Sec)))
       printRelocation(Obj, R, SymTab);
     break;
   }
@@ -3289,7 +3564,7 @@ void LLVMStyle<ELFT>::printRelocation(const ELFO *Obj, Elf_Rela Rel,
   SmallString<32> RelocName;
   Obj->getRelocationTypeName(Rel.getType(Obj->isMips64EL()), RelocName);
   StringRef TargetName;
-  const Elf_Sym *Sym = Obj->getRelocationSymbol(&Rel, SymTab);
+  const Elf_Sym *Sym = unwrapOrError(Obj->getRelocationSymbol(&Rel, SymTab));
   if (Sym && Sym->getType() == ELF::STT_SECTION) {
     const Elf_Shdr *Sec = unwrapOrError(
         Obj->getSection(Sym, SymTab, this->dumper()->getShndxTable()));
@@ -3318,7 +3593,7 @@ template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
   ListScope SectionsD(W, "Sections");
 
   int SectionIndex = -1;
-  for (const Elf_Shdr &Sec : Obj->sections()) {
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
     ++SectionIndex;
 
     StringRef Name = unwrapOrError(Obj->getSectionName(&Sec));
@@ -3336,6 +3611,10 @@ template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
       SectionFlags.insert(SectionFlags.end(), std::begin(ElfAMDGPUSectionFlags),
                           std::end(ElfAMDGPUSectionFlags));
       break;
+    case EM_ARM:
+      SectionFlags.insert(SectionFlags.end(), std::begin(ElfARMSectionFlags),
+                          std::end(ElfARMSectionFlags));
+      break;
     case EM_HEXAGON:
       SectionFlags.insert(SectionFlags.end(),
                           std::begin(ElfHexagonSectionFlags),
@@ -3376,11 +3655,12 @@ template <class ELFT> void LLVMStyle<ELFT>::printSections(const ELFO *Obj) {
       const Elf_Shdr *Symtab = this->dumper()->getDotSymtabSec();
       StringRef StrTable = unwrapOrError(Obj->getStringTableForSymtab(*Symtab));
 
-      for (const Elf_Sym &Sym : Obj->symbols(Symtab)) {
+      for (const Elf_Sym &Sym : unwrapOrError(Obj->symbols(Symtab))) {
         const Elf_Shdr *SymSec = unwrapOrError(
             Obj->getSection(&Sym, Symtab, this->dumper()->getShndxTable()));
         if (SymSec == &Sec)
-          printSymbol(Obj, &Sym, Obj->symbol_begin(Symtab), StrTable, false);
+          printSymbol(Obj, &Sym, unwrapOrError(Obj->symbols(Symtab)).begin(),
+                      StrTable, false);
       }
     }
 
@@ -3512,7 +3792,7 @@ template <class ELFT>
 void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   ListScope L(W, "ProgramHeaders");
 
-  for (const Elf_Phdr &Phdr : Obj->program_headers()) {
+  for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     DictScope P(W, "ProgramHeader");
     W.printHex("Type",
                getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type),
@@ -3526,7 +3806,14 @@ void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
     W.printNumber("Alignment", Phdr.p_align);
   }
 }
+
 template <class ELFT>
 void LLVMStyle<ELFT>::printHashHistogram(const ELFFile<ELFT> *Obj) {
   W.startLine() << "Hash Histogram not implemented!\n";
 }
+
+template <class ELFT>
+void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
+  W.startLine() << "printNotes not implemented!\n";
+}
+
diff --git a/contrib/llvm/tools/llvm-readobj/Error.cpp b/contrib/llvm/tools/llvm-readobj/Error.cpp
index 492eb3309497..03d349440e6b 100644
--- a/contrib/llvm/tools/llvm-readobj/Error.cpp
+++ b/contrib/llvm/tools/llvm-readobj/Error.cpp
@@ -22,12 +22,12 @@ namespace {
 // deal with the Error value directly, rather than converting to error_code.
 class _readobj_error_category : public std::error_category {
 public:
-  const char* name() const LLVM_NOEXCEPT override;
+  const char* name() const noexcept override;
   std::string message(int ev) const override;
 };
 } // namespace
 
-const char *_readobj_error_category::name() const LLVM_NOEXCEPT {
+const char *_readobj_error_category::name() const noexcept {
   return "llvm.readobj";
 }
 
diff --git a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
index 3773df250578..01b074170ba5 100644
--- a/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
+++ b/contrib/llvm/tools/llvm-readobj/MachODumper.cpp
@@ -669,10 +669,10 @@ void MachODumper::printStackMap() const {
   if (Obj->isLittleEndian())
      prettyPrintStackMap(
                       llvm::outs(),
-                      StackMapV1Parser<support::little>(StackMapContentsArray));
+                      StackMapV2Parser<support::little>(StackMapContentsArray));
   else
      prettyPrintStackMap(llvm::outs(),
-                         StackMapV1Parser<support::big>(StackMapContentsArray));
+                         StackMapV2Parser<support::big>(StackMapContentsArray));
 }
 
 void MachODumper::printMachODataInCode() {
diff --git a/contrib/llvm/tools/llvm-readobj/ObjDumper.h b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
index a39fc260b549..c91558ecbfa7 100644
--- a/contrib/llvm/tools/llvm-readobj/ObjDumper.h
+++ b/contrib/llvm/tools/llvm-readobj/ObjDumper.h
@@ -19,7 +19,7 @@ class COFFImportFile;
 class ObjectFile;
 }
 namespace codeview {
-class MemoryTypeTableBuilder;
+class TypeTableBuilder;
 }
 
 class ScopedPrinter;
@@ -47,6 +47,7 @@ public:
   virtual void printVersionInfo() {}
   virtual void printGroupSections() {}
   virtual void printHashHistogram() {}
+  virtual void printNotes() {}
 
   // Only implemented for ARM ELF at this time.
   virtual void printAttributes() { }
@@ -57,6 +58,9 @@ public:
   virtual void printMipsReginfo() { }
   virtual void printMipsOptions() { }
 
+  // Only implemented for AMDGPU ELF at this time.
+  virtual void printAMDGPURuntimeMD() {}
+
   // Only implemented for PE/COFF.
   virtual void printCOFFImports() { }
   virtual void printCOFFExports() { }
@@ -64,8 +68,7 @@ public:
   virtual void printCOFFBaseReloc() { }
   virtual void printCOFFDebugDirectory() { }
   virtual void printCodeViewDebugInfo() { }
-  virtual void
-  mergeCodeViewTypes(llvm::codeview::MemoryTypeTableBuilder &CVTypes) {}
+  virtual void mergeCodeViewTypes(llvm::codeview::TypeTableBuilder &CVTypes) {}
 
   // Only implemented for MachO.
   virtual void printMachODataInCode() { }
@@ -96,7 +99,7 @@ std::error_code createMachODumper(const object::ObjectFile *Obj,
 void dumpCOFFImportFile(const object::COFFImportFile *File);
 
 void dumpCodeViewMergedTypes(ScopedPrinter &Writer,
-                             llvm::codeview::MemoryTypeTableBuilder &CVTypes);
+                             llvm::codeview::TypeTableBuilder &CVTypes);
 
 } // namespace llvm
 
diff --git a/contrib/llvm/tools/llvm-readobj/StackMapPrinter.h b/contrib/llvm/tools/llvm-readobj/StackMapPrinter.h
index 92645bcf9172..f4ed68e92d78 100644
--- a/contrib/llvm/tools/llvm-readobj/StackMapPrinter.h
+++ b/contrib/llvm/tools/llvm-readobj/StackMapPrinter.h
@@ -24,7 +24,8 @@ void prettyPrintStackMap(OStreamT &OS, const StackMapParserT &SMP) {
   // Functions:
   for (const auto &F : SMP.functions())
     OS << "\n  Function address: " << F.getFunctionAddress()
-       << ", stack size: " << F.getStackSize();
+       << ", stack size: " << F.getStackSize()
+       << ", callsite record count: " << F.getRecordCount();
 
   // Constants:
   OS << "\nNum Constants: " << SMP.getNumConstants();
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
index c293919fd952..970e1545de0a 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.cpp
@@ -22,7 +22,7 @@
 #include "llvm-readobj.h"
 #include "Error.h"
 #include "ObjDumper.h"
-#include "llvm/DebugInfo/CodeView/MemoryTypeTableBuilder.h"
+#include "llvm/DebugInfo/CodeView/TypeTableBuilder.h"
 #include "llvm/Object/Archive.h"
 #include "llvm/Object/COFFImportFile.h"
 #include "llvm/Object/ELFObjectFile.h"
@@ -92,6 +92,10 @@ namespace opts {
     cl::desc("Alias for --relocations"),
     cl::aliasopt(Relocations));
 
+  // -notes, -n
+  cl::opt<bool> Notes("notes", cl::desc("Display the ELF notes in the file"));
+  cl::alias NotesShort("n", cl::desc("Alias for --notes"), cl::aliasopt(Notes));
+
   // -dyn-relocations
   cl::opt<bool> DynRelocs("dyn-relocations",
     cl::desc("Display the dynamic relocation entries in the file"));
@@ -120,6 +124,8 @@ namespace opts {
   // -dynamic-table
   cl::opt<bool> DynamicTable("dynamic-table",
     cl::desc("Display the ELF .dynamic section table"));
+  cl::alias DynamicTableShort("d", cl::desc("Alias for --dynamic-table"),
+                              cl::aliasopt(DynamicTable));
 
   // -needed-libs
   cl::opt<bool> NeededLibraries("needed-libs",
@@ -128,6 +134,8 @@ namespace opts {
   // -program-headers
   cl::opt<bool> ProgramHeaders("program-headers",
     cl::desc("Display ELF program headers"));
+  cl::alias ProgramHeadersShort("l", cl::desc("Alias for --program-headers"),
+                                cl::aliasopt(ProgramHeaders));
 
   // -hash-table
   cl::opt<bool> HashTable("hash-table",
@@ -158,7 +166,7 @@ namespace opts {
   // -arm-attributes, -a
   cl::opt<bool> ARMAttributes("arm-attributes",
                               cl::desc("Display the ARM attributes section"));
-  cl::alias ARMAttributesShort("-a", cl::desc("Alias for --arm-attributes"),
+  cl::alias ARMAttributesShort("a", cl::desc("Alias for --arm-attributes"),
                                cl::aliasopt(ARMAttributes));
 
   // -mips-plt-got
@@ -178,6 +186,10 @@ namespace opts {
   cl::opt<bool> MipsOptions("mips-options",
                             cl::desc("Display the MIPS .MIPS.options section"));
 
+  // -amdgpu-runtime-metadata
+  cl::opt<bool> AMDGPURuntimeMD("amdgpu-runtime-metadata",
+                                cl::desc("Display AMDGPU runtime metadata"));
+
   // -coff-imports
   cl::opt<bool>
   COFFImports("coff-imports", cl::desc("Display the PE/COFF import table"));
@@ -256,7 +268,7 @@ namespace opts {
   cl::opt<OutputStyleTy>
       Output("elf-output-style", cl::desc("Specify ELF dump style"),
              cl::values(clEnumVal(LLVM, "LLVM default style"),
-                        clEnumVal(GNU, "GNU readelf style"), clEnumValEnd),
+                        clEnumVal(GNU, "GNU readelf style")),
              cl::init(LLVM));
 } // namespace opts
 
@@ -268,10 +280,16 @@ LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg) {
   exit(1);
 }
 
-void error(std::error_code EC) {
+void error(Error EC) {
   if (!EC)
     return;
+  handleAllErrors(std::move(EC),
+                  [&](const ErrorInfoBase &EI) { reportError(EI.message()); });
+}
 
+void error(std::error_code EC) {
+  if (!EC)
+    return;
   reportError(EC.message());
 }
 
@@ -317,8 +335,15 @@ static bool isMipsArch(unsigned Arch) {
     return false;
   }
 }
+namespace {
+struct ReadObjTypeTableBuilder {
+  ReadObjTypeTableBuilder() : Allocator(), Builder(Allocator) {}
 
-static llvm::codeview::MemoryTypeTableBuilder CVTypes;
+  llvm::BumpPtrAllocator Allocator;
+  llvm::codeview::TypeTableBuilder Builder;
+};
+}
+static ReadObjTypeTableBuilder CVTypes;
 
 /// @brief Creates an format-specific object file dumper.
 static std::error_code createDumper(const ObjectFile *Obj,
@@ -394,10 +419,15 @@ static void dumpObject(const ObjectFile *Obj) {
       if (opts::MipsOptions)
         Dumper->printMipsOptions();
     }
+    if (Obj->getArch() == llvm::Triple::amdgcn)
+      if (opts::AMDGPURuntimeMD)
+        Dumper->printAMDGPURuntimeMD();
     if (opts::SectionGroups)
       Dumper->printGroupSections();
     if (opts::HashHistogram)
       Dumper->printHashHistogram();
+    if (opts::Notes)
+      Dumper->printNotes();
   }
   if (Obj->isCOFF()) {
     if (opts::COFFImports)
@@ -413,7 +443,7 @@ static void dumpObject(const ObjectFile *Obj) {
     if (opts::CodeView)
       Dumper->printCodeViewDebugInfo();
     if (opts::CodeViewMergedTypes)
-      Dumper->mergeCodeViewTypes(CVTypes);
+      Dumper->mergeCodeViewTypes(CVTypes.Builder);
   }
   if (Obj->isMachO()) {
     if (opts::MachODataInCode)
@@ -435,7 +465,7 @@ static void dumpObject(const ObjectFile *Obj) {
 
 /// @brief Dumps each object file in \a Arc;
 static void dumpArchive(const Archive *Arc) {
-  Error Err;
+  Error Err = Error::success();
   for (auto &Child : Arc->children(Err)) {
     Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
     if (!ChildOrErr) {
@@ -450,6 +480,8 @@ static void dumpArchive(const Archive *Arc) {
     }
     if (ObjectFile *Obj = dyn_cast<ObjectFile>(&*ChildOrErr.get()))
       dumpObject(Obj);
+    else if (COFFImportFile *Imp = dyn_cast<COFFImportFile>(&*ChildOrErr.get()))
+      dumpCOFFImportFile(Imp);
     else
       reportError(Arc->getFileName(), readobj_error::unrecognized_file_format);
   }
@@ -516,7 +548,7 @@ int main(int argc, const char *argv[]) {
 
   if (opts::CodeViewMergedTypes) {
     ScopedPrinter W(outs());
-    dumpCodeViewMergedTypes(W, CVTypes);
+    dumpCodeViewMergedTypes(W, CVTypes.Builder);
   }
 
   return 0;
diff --git a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
index b169c0029183..015692085e5e 100644
--- a/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
+++ b/contrib/llvm/tools/llvm-readobj/llvm-readobj.h
@@ -23,7 +23,8 @@ namespace llvm {
 
   // Various helper functions.
   LLVM_ATTRIBUTE_NORETURN void reportError(Twine Msg);
-  void error(std::error_code ec);
+  void error(std::error_code EC);
+  void error(llvm::Error EC);
   template <class T> T unwrapOrError(ErrorOr<T> EO) {
     if (EO)
       return *EO;
diff --git a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
index b1460e35de80..4e1caa0400f1 100644
--- a/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
+++ b/contrib/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp
@@ -66,8 +66,7 @@ Action(cl::desc("Action to perform:"),
                   clEnumValN(AC_PrintObjectLineInfo, "printobjline",
                              "Like -printlineinfo but does not load the object first"),
                   clEnumValN(AC_Verify, "verify",
-                             "Load, link and verify the resulting memory image."),
-                  clEnumValEnd));
+                             "Load, link and verify the resulting memory image.")));
 
 static cl::opt<std::string>
 EntryPoint("entry",
@@ -165,11 +164,11 @@ public:
     DummyExterns[Name] = Addr;
   }
 
-  RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+  JITSymbol findSymbol(const std::string &Name) override {
     auto I = DummyExterns.find(Name);
 
     if (I != DummyExterns.end())
-      return RuntimeDyld::SymbolInfo(I->second, JITSymbolFlags::Exported);
+      return JITSymbol(I->second, JITSymbolFlags::Exported);
 
     return RTDyldMemoryManager::findSymbol(Name);
   }
@@ -533,7 +532,7 @@ applySpecificSectionMappings(RuntimeDyldChecker &Checker) {
 // Remaps section addresses for -verify mode. The following command line options
 // can be used to customize the layout of the memory within the phony target's
 // address space:
-// -target-addr-start <s> -- Specify where the phony target addres range starts.
+// -target-addr-start <s> -- Specify where the phony target address range starts.
 // -target-addr-end   <e> -- Specify where the phony target address range ends.
 // -target-section-sep <d> -- Specify how big a gap should be left between the
 //                            end of one section and the start of the next.
@@ -607,7 +606,7 @@ static void remapSectionsAndSymbols(const llvm::Triple &TargetTriple,
 
   // Add dummy symbols to the memory manager.
   for (const auto &Mapping : DummySymbolMappings) {
-    size_t EqualsIdx = Mapping.find_first_of("=");
+    size_t EqualsIdx = Mapping.find_first_of('=');
 
     if (EqualsIdx == StringRef::npos)
       report_fatal_error("Invalid dummy symbol specification '" + Mapping +
diff --git a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
index 0b887ea9b4cd..731a24d0ac2d 100644
--- a/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
+++ b/contrib/llvm/tools/llvm-stress/llvm-stress.cpp
@@ -67,7 +67,7 @@ public:
     return false;
   }
 
-  const char *getValueName() const override { return "IR scalar type"; }
+  StringRef getValueName() const override { return "IR scalar type"; }
 };
 }
 
diff --git a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 7ee4ba19603b..fc37dea4c484 100644
--- a/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/contrib/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -46,8 +46,7 @@ static cl::opt<FunctionNameKind> ClPrintFunctions(
                clEnumValN(FunctionNameKind::ShortName, "short",
                           "print short function name"),
                clEnumValN(FunctionNameKind::LinkageName, "linkage",
-                          "print function linkage name"),
-               clEnumValEnd));
+                          "print function linkage name")));
 
 static cl::opt<bool>
     ClUseRelativeAddress("relative-address", cl::init(false),
diff --git a/contrib/llvm/tools/llvm-xray/llvm-xray.cc b/contrib/llvm/tools/llvm-xray/llvm-xray.cc
new file mode 100644
index 000000000000..ac5faaa408b5
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/llvm-xray.cc
@@ -0,0 +1,41 @@
+//===- llvm-xray.cc - XRay Tool Main Program ------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the main entry point for the suite of XRay tools. All
+// additional functionality are implemented as subcommands.
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic usage:
+//
+//   llvm-xray [options] <subcommand> [subcommand-specific options]
+//
+#include "xray-registry.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::xray;
+
+int main(int argc, char *argv[]) {
+  cl::ParseCommandLineOptions(argc, argv,
+                              "XRay Tools\n\n"
+                              "  This program consolidates multiple XRay trace "
+                              "processing tools for convenient access.\n");
+  for (auto *SC : cl::getRegisteredSubcommands()) {
+    if (*SC)
+      if (auto C = dispatch(SC)) {
+        ExitOnError("llvm-xray: ")(C());
+        return 0;
+      }
+  }
+
+  cl::PrintHelpMessage(false, true);
+}
diff --git a/contrib/llvm/tools/llvm-xray/xray-extract.cc b/contrib/llvm/tools/llvm-xray/xray-extract.cc
new file mode 100644
index 000000000000..e51b64c8ad6e
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-extract.cc
@@ -0,0 +1,240 @@
+//===- xray-extract.cc - XRay Instrumentation Map Extraction --------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the xray-extract.h interface.
+//
+// FIXME: Support other XRay-instrumented binary formats other than ELF.
+//
+//===----------------------------------------------------------------------===//
+
+#include <type_traits>
+#include <utility>
+
+#include "xray-extract.h"
+
+#include "xray-registry.h"
+#include "xray-sleds.h"
+#include "llvm/Object/ELF.h"
+#include "llvm/Object/ObjectFile.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace llvm::xray;
+using namespace llvm::yaml;
+
+// llvm-xray extract
+// ----------------------------------------------------------------------------
+static cl::SubCommand Extract("extract", "Extract instrumentation maps");
+static cl::opt<std::string> ExtractInput(cl::Positional,
+                                         cl::desc("<input file>"), cl::Required,
+                                         cl::sub(Extract));
+static cl::opt<std::string>
+    ExtractOutput("output", cl::value_desc("output file"), cl::init("-"),
+                  cl::desc("output file; use '-' for stdout"),
+                  cl::sub(Extract));
+static cl::alias ExtractOutput2("o", cl::aliasopt(ExtractOutput),
+                                cl::desc("Alias for -output"),
+                                cl::sub(Extract));
+
+struct YAMLXRaySledEntry {
+  int32_t FuncId;
+  Hex64 Address;
+  Hex64 Function;
+  SledEntry::FunctionKinds Kind;
+  bool AlwaysInstrument;
+};
+
+namespace llvm {
+namespace yaml {
+
+template <> struct ScalarEnumerationTraits<SledEntry::FunctionKinds> {
+  static void enumeration(IO &IO, SledEntry::FunctionKinds &Kind) {
+    IO.enumCase(Kind, "function-enter", SledEntry::FunctionKinds::ENTRY);
+    IO.enumCase(Kind, "function-exit", SledEntry::FunctionKinds::EXIT);
+    IO.enumCase(Kind, "tail-exit", SledEntry::FunctionKinds::TAIL);
+  }
+};
+
+template <> struct MappingTraits<YAMLXRaySledEntry> {
+  static void mapping(IO &IO, YAMLXRaySledEntry &Entry) {
+    IO.mapRequired("id", Entry.FuncId);
+    IO.mapRequired("address", Entry.Address);
+    IO.mapRequired("function", Entry.Function);
+    IO.mapRequired("kind", Entry.Kind);
+    IO.mapRequired("always-instrument", Entry.AlwaysInstrument);
+  }
+
+  static constexpr bool flow = true;
+};
+}
+}
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(YAMLXRaySledEntry)
+
+namespace {
+
+llvm::Error LoadBinaryInstrELF(
+    StringRef Filename, std::deque<SledEntry> &OutputSleds,
+    InstrumentationMapExtractor::FunctionAddressMap &InstrMap,
+    InstrumentationMapExtractor::FunctionAddressReverseMap &FunctionIds) {
+  auto ObjectFile = object::ObjectFile::createObjectFile(Filename);
+
+  if (!ObjectFile)
+    return ObjectFile.takeError();
+
+  // FIXME: Maybe support other ELF formats. For now, 64-bit Little Endian only.
+  if (!ObjectFile->getBinary()->isELF())
+    return make_error<StringError>(
+        "File format not supported (only does ELF).",
+        std::make_error_code(std::errc::not_supported));
+  if (ObjectFile->getBinary()->getArch() != Triple::x86_64)
+    return make_error<StringError>(
+        "File format not supported (only does ELF little endian 64-bit).",
+        std::make_error_code(std::errc::not_supported));
+
+  // Find the section named "xray_instr_map".
+  StringRef Contents = "";
+  const auto &Sections = ObjectFile->getBinary()->sections();
+  auto I = find_if(Sections, [&](object::SectionRef Section) {
+    StringRef Name = "";
+    if (Section.getName(Name))
+      return false;
+    return Name == "xray_instr_map";
+  });
+  if (I == Sections.end())
+    return make_error<StringError>(
+        "Failed to find XRay instrumentation map.",
+        std::make_error_code(std::errc::not_supported));
+  if (I->getContents(Contents))
+    return make_error<StringError>(
+        "Failed to get contents of 'xray_instr_map' section.",
+        std::make_error_code(std::errc::executable_format_error));
+
+  // Copy the instrumentation map data into the Sleds data structure.
+  auto C = Contents.bytes_begin();
+  static constexpr size_t ELF64SledEntrySize = 32;
+
+  if ((C - Contents.bytes_end()) % ELF64SledEntrySize != 0)
+    return make_error<StringError>(
+        "Instrumentation map entries not evenly divisible by size of an XRay "
+        "sled entry in ELF64.",
+        std::make_error_code(std::errc::executable_format_error));
+
+  int32_t FuncId = 1;
+  uint64_t CurFn = 0;
+  std::deque<SledEntry> Sleds;
+  for (; C != Contents.bytes_end(); C += ELF64SledEntrySize) {
+    DataExtractor Extractor(
+        StringRef(reinterpret_cast<const char *>(C), ELF64SledEntrySize), true,
+        8);
+    Sleds.push_back({});
+    auto &Entry = Sleds.back();
+    uint32_t OffsetPtr = 0;
+    Entry.Address = Extractor.getU64(&OffsetPtr);
+    Entry.Function = Extractor.getU64(&OffsetPtr);
+    auto Kind = Extractor.getU8(&OffsetPtr);
+    switch (Kind) {
+    case 0: // ENTRY
+      Entry.Kind = SledEntry::FunctionKinds::ENTRY;
+      break;
+    case 1: // EXIT
+      Entry.Kind = SledEntry::FunctionKinds::EXIT;
+      break;
+    case 2: // TAIL
+      Entry.Kind = SledEntry::FunctionKinds::TAIL;
+      break;
+    default:
+      return make_error<StringError>(
+          Twine("Encountered unknown sled type ") + "'" + Twine(int32_t{Kind}) +
+              "'.",
+          std::make_error_code(std::errc::executable_format_error));
+    }
+    auto AlwaysInstrument = Extractor.getU8(&OffsetPtr);
+    Entry.AlwaysInstrument = AlwaysInstrument != 0;
+
+    // We replicate the function id generation scheme implemented in the runtime
+    // here. Ideally we should be able to break it out, or output this map from
+    // the runtime, but that's a design point we can discuss later on. For now,
+    // we replicate the logic and move on.
+    if (CurFn == 0) {
+      CurFn = Entry.Function;
+      InstrMap[FuncId] = Entry.Function;
+      FunctionIds[Entry.Function] = FuncId;
+    }
+    if (Entry.Function != CurFn) {
+      ++FuncId;
+      CurFn = Entry.Function;
+      InstrMap[FuncId] = Entry.Function;
+      FunctionIds[Entry.Function] = FuncId;
+    }
+  }
+  OutputSleds = std::move(Sleds);
+  return llvm::Error::success();
+}
+
+} // namespace
+
+InstrumentationMapExtractor::InstrumentationMapExtractor(std::string Filename,
+                                                         InputFormats Format,
+                                                         Error &EC) {
+  ErrorAsOutParameter ErrAsOutputParam(&EC);
+  switch (Format) {
+  case InputFormats::ELF: {
+    EC = handleErrors(
+        LoadBinaryInstrELF(Filename, Sleds, FunctionAddresses, FunctionIds),
+        [](std::unique_ptr<ErrorInfoBase> E) {
+          return joinErrors(
+              make_error<StringError>(
+                  Twine("Cannot extract instrumentation map from '") +
+                      ExtractInput + "'.",
+                  std::make_error_code(std::errc::executable_format_error)),
+              std::move(E));
+        });
+    break;
+  }
+  default:
+    llvm_unreachable("Input format type not supported yet.");
+    break;
+  }
+}
+
+void InstrumentationMapExtractor::exportAsYAML(raw_ostream &OS) {
+  // First we translate the sleds into the YAMLXRaySledEntry objects in a deque.
+  std::vector<YAMLXRaySledEntry> YAMLSleds;
+  YAMLSleds.reserve(Sleds.size());
+  for (const auto &Sled : Sleds) {
+    YAMLSleds.push_back({FunctionIds[Sled.Function], Sled.Address,
+                         Sled.Function, Sled.Kind, Sled.AlwaysInstrument});
+  }
+  Output Out(OS);
+  Out << YAMLSleds;
+}
+
+static CommandRegistration Unused(&Extract, []() -> Error {
+  Error Err = Error::success();
+  xray::InstrumentationMapExtractor Extractor(
+      ExtractInput, InstrumentationMapExtractor::InputFormats::ELF, Err);
+  if (Err)
+    return Err;
+
+  std::error_code EC;
+  raw_fd_ostream OS(ExtractOutput, EC, sys::fs::OpenFlags::F_Text);
+  if (EC)
+    return make_error<StringError>(
+        Twine("Cannot open file '") + ExtractOutput + "' for writing.", EC);
+  Extractor.exportAsYAML(OS);
+  return Error::success();
+});
diff --git a/contrib/llvm/tools/llvm-xray/xray-extract.h b/contrib/llvm/tools/llvm-xray/xray-extract.h
new file mode 100644
index 000000000000..91e4db36805f
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-extract.h
@@ -0,0 +1,58 @@
+//===- xray-extract.h - XRay Instrumentation Map Extraction ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the interface for extracting the instrumentation map from an
+// XRay-instrumented binary.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_XRAY_EXTRACT_H
+#define LLVM_TOOLS_XRAY_EXTRACT_H
+
+#include <deque>
+#include <map>
+#include <string>
+#include <unordered_map>
+
+#include "xray-sleds.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+namespace xray {
+
+class InstrumentationMapExtractor {
+public:
+  typedef std::unordered_map<int32_t, uint64_t> FunctionAddressMap;
+  typedef std::unordered_map<uint64_t, int32_t> FunctionAddressReverseMap;
+
+  enum class InputFormats { ELF, YAML };
+
+private:
+  std::deque<SledEntry> Sleds;
+  FunctionAddressMap FunctionAddresses;
+  FunctionAddressReverseMap FunctionIds;
+
+public:
+  /// Loads the instrumentation map from |Filename|. Updates |EC| in case there
+  /// were errors encountered opening the file. |Format| defines what the input
+  /// instrumentation map is in.
+  InstrumentationMapExtractor(std::string Filename, InputFormats Format,
+                              Error &EC);
+
+  const FunctionAddressMap &getFunctionAddresses() { return FunctionAddresses; }
+
+  /// Exports the loaded function address map as YAML through |OS|.
+  void exportAsYAML(raw_ostream &OS);
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_TOOLS_XRAY_EXTRACT_H
diff --git a/contrib/llvm/tools/llvm-xray/xray-registry.cc b/contrib/llvm/tools/llvm-xray/xray-registry.cc
new file mode 100644
index 000000000000..36d3a2e58f97
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-registry.cc
@@ -0,0 +1,41 @@
+//===- xray-registry.cc - Implement a command registry. -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement a simple subcommand registry.
+//
+//===----------------------------------------------------------------------===//
+#include "xray-registry.h"
+
+#include "llvm/Support/ManagedStatic.h"
+#include <unordered_map>
+
+namespace llvm {
+namespace xray {
+
+using HandlerType = std::function<Error()>;
+
+ManagedStatic<std::unordered_map<cl::SubCommand *, HandlerType>> Commands;
+
+CommandRegistration::CommandRegistration(cl::SubCommand *SC,
+                                         HandlerType Command) {
+  assert(Commands->count(SC) == 0 &&
+         "Attempting to overwrite a command handler");
+  assert(Command && "Attempting to register an empty std::function<Error()>");
+  (*Commands)[SC] = Command;
+}
+
+HandlerType dispatch(cl::SubCommand *SC) {
+  auto It = Commands->find(SC);
+  assert(It != Commands->end() &&
+         "Attempting to dispatch on un-registered SubCommand.");
+  return It->second;
+}
+
+} // namespace xray
+} // namespace llvm
diff --git a/contrib/llvm/tools/llvm-xray/xray-registry.h b/contrib/llvm/tools/llvm-xray/xray-registry.h
new file mode 100644
index 000000000000..6eab016273f5
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-registry.h
@@ -0,0 +1,41 @@
+//===- xray-registry.h - Define registry mechanism for commands. ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement a simple subcommand registry.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TOOLS_LLVM_XRAY_XRAY_REGISTRY_H
+#define TOOLS_LLVM_XRAY_XRAY_REGISTRY_H
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+namespace xray {
+
+// Use |CommandRegistration| as a global initialiser that registers a function
+// and associates it with |SC|. This requires that a command has not been
+// registered to a given |SC|.
+//
+// Usage:
+//
+//   // At namespace scope.
+//   static CommandRegistration Unused(&MySubCommand, [] { ... });
+//
+struct CommandRegistration {
+  CommandRegistration(cl::SubCommand *SC, std::function<Error()> Command);
+};
+
+// Requires that |SC| is not null and has an associated function to it.
+std::function<Error()> dispatch(cl::SubCommand *SC);
+
+} // namespace xray
+} // namespace llvm
+
+#endif // TOOLS_LLVM_XRAY_XRAY_REGISTRY_H
diff --git a/contrib/llvm/tools/llvm-xray/xray-sleds.h b/contrib/llvm/tools/llvm-xray/xray-sleds.h
new file mode 100644
index 000000000000..99279579ed47
--- /dev/null
+++ b/contrib/llvm/tools/llvm-xray/xray-sleds.h
@@ -0,0 +1,32 @@
+//===- xray-sleds.h - XRay Sleds Data Structure ---------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines the structure used to represent XRay instrumentation map entries.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_XRAY_XRAY_SLEDS_H
+#define LLVM_TOOLS_LLVM_XRAY_XRAY_SLEDS_H
+
+namespace llvm {
+namespace xray {
+
+struct SledEntry {
+  enum class FunctionKinds { ENTRY, EXIT, TAIL };
+
+  uint64_t Address;
+  uint64_t Function;
+  FunctionKinds Kind;
+  bool AlwaysInstrument;
+};
+
+} // namespace xray
+} // namespace llvm
+
+#endif // LLVM_TOOLS_LLVM_XRAY_XRAY_SLEDS_H
diff --git a/contrib/llvm/tools/opt/NewPMDriver.cpp b/contrib/llvm/tools/opt/NewPMDriver.cpp
index f7b2f18d09d9..acdf2639b3c7 100644
--- a/contrib/llvm/tools/opt/NewPMDriver.cpp
+++ b/contrib/llvm/tools/opt/NewPMDriver.cpp
@@ -47,12 +47,13 @@ static cl::opt<std::string>
                         "pipeline for handling managed aliasing queries"),
                cl::Hidden);
 
-bool llvm::runPassPipeline(StringRef Arg0, LLVMContext &Context, Module &M,
+bool llvm::runPassPipeline(StringRef Arg0, Module &M,
                            TargetMachine *TM, tool_output_file *Out,
                            StringRef PassPipeline, OutputKind OK,
                            VerifierKind VK,
                            bool ShouldPreserveAssemblyUseListOrder,
-                           bool ShouldPreserveBitcodeUseListOrder) {
+                           bool ShouldPreserveBitcodeUseListOrder,
+                           bool EmitSummaryIndex, bool EmitModuleHash) {
   PassBuilder PB(TM);
 
   // Specially handle the alias analysis manager so that we can register
@@ -100,8 +101,8 @@ bool llvm::runPassPipeline(StringRef Arg0, LLVMContext &Context, Module &M,
         PrintModulePass(Out->os(), "", ShouldPreserveAssemblyUseListOrder));
     break;
   case OK_OutputBitcode:
-    MPM.addPass(
-        BitcodeWriterPass(Out->os(), ShouldPreserveBitcodeUseListOrder));
+    MPM.addPass(BitcodeWriterPass(Out->os(), ShouldPreserveBitcodeUseListOrder,
+                                  EmitSummaryIndex, EmitModuleHash));
     break;
   }
 
diff --git a/contrib/llvm/tools/opt/NewPMDriver.h b/contrib/llvm/tools/opt/NewPMDriver.h
index 66a470d49ba8..04022e7ec562 100644
--- a/contrib/llvm/tools/opt/NewPMDriver.h
+++ b/contrib/llvm/tools/opt/NewPMDriver.h
@@ -47,12 +47,13 @@ enum VerifierKind {
 /// inclusion of the new pass manager headers and the old headers into the same
 /// file. It's interface is consequentially somewhat ad-hoc, but will go away
 /// when the transition finishes.
-bool runPassPipeline(StringRef Arg0, LLVMContext &Context, Module &M,
+bool runPassPipeline(StringRef Arg0, Module &M,
                      TargetMachine *TM, tool_output_file *Out,
                      StringRef PassPipeline, opt_tool::OutputKind OK,
                      opt_tool::VerifierKind VK,
                      bool ShouldPreserveAssemblyUseListOrder,
-                     bool ShouldPreserveBitcodeUseListOrder);
+                     bool ShouldPreserveBitcodeUseListOrder,
+                     bool EmitSummaryIndex, bool EmitModuleHash);
 }
 
 #endif
diff --git a/contrib/llvm/tools/opt/PassPrinters.cpp b/contrib/llvm/tools/opt/PassPrinters.cpp
index d6994894ca05..65a53038fc50 100644
--- a/contrib/llvm/tools/opt/PassPrinters.cpp
+++ b/contrib/llvm/tools/opt/PassPrinters.cpp
@@ -46,7 +46,7 @@ struct FunctionPassPrinter : public FunctionPass {
     return false;
   }
 
-  const char *getPassName() const override { return PassName.c_str(); }
+  StringRef getPassName() const override { return PassName; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -83,7 +83,7 @@ struct CallGraphSCCPassPrinter : public CallGraphSCCPass {
     return false;
   }
 
-  const char *getPassName() const override { return PassName.c_str(); }
+  StringRef getPassName() const override { return PassName; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -115,7 +115,7 @@ struct ModulePassPrinter : public ModulePass {
     return false;
   }
 
-  const char *getPassName() const override { return PassName.c_str(); }
+  StringRef getPassName() const override { return PassName; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -148,7 +148,7 @@ struct LoopPassPrinter : public LoopPass {
     return false;
   }
 
-  const char *getPassName() const override { return PassName.c_str(); }
+  StringRef getPassName() const override { return PassName; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -183,7 +183,7 @@ struct RegionPassPrinter : public RegionPass {
     return false;
   }
 
-  const char *getPassName() const override { return PassName.c_str(); }
+  StringRef getPassName() const override { return PassName; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(PassToPrint->getTypeInfo());
@@ -217,7 +217,7 @@ struct BasicBlockPassPrinter : public BasicBlockPass {
     return false;
   }
 
-  const char *getPassName() const override { return PassName.c_str(); }
+  StringRef getPassName() const override { return PassName; }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(PassToPrint->getTypeInfo());
diff --git a/contrib/llvm/tools/opt/opt.cpp b/contrib/llvm/tools/opt/opt.cpp
index 0a4ce1dfe8ef..a93c06c1d13d 100644
--- a/contrib/llvm/tools/opt/opt.cpp
+++ b/contrib/llvm/tools/opt/opt.cpp
@@ -49,7 +49,10 @@
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/YAMLTraits.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Coroutines.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <algorithm>
@@ -96,6 +99,10 @@ static cl::opt<bool>
 OutputAssembly("S", cl::desc("Write output as LLVM assembly"));
 
 static cl::opt<bool>
+    OutputThinLTOBC("thinlto-bc",
+                    cl::desc("Write output as ThinLTO-ready bitcode"));
+
+static cl::opt<bool>
 NoVerify("disable-verify", cl::desc("Do not run the verifier"), cl::Hidden);
 
 static cl::opt<bool>
@@ -121,6 +128,10 @@ StandardLinkOpts("std-link-opts",
                  cl::desc("Include the standard link time optimizations"));
 
 static cl::opt<bool>
+OptLevelO0("O0",
+  cl::desc("Optimization level 0. Similar to clang -O0"));
+
+static cl::opt<bool>
 OptLevelO1("O1",
            cl::desc("Optimization level 1. Similar to clang -O1"));
 
@@ -215,11 +226,21 @@ static cl::opt<bool> DiscardValueNames(
     cl::desc("Discard names from Value (other than GlobalValue)."),
     cl::init(false), cl::Hidden);
 
+static cl::opt<bool> Coroutines(
+  "enable-coroutines",
+  cl::desc("Enable coroutine passes."),
+  cl::init(false), cl::Hidden);
+
 static cl::opt<bool> PassRemarksWithHotness(
     "pass-remarks-with-hotness",
     cl::desc("With PGO, include profile count in optimization remarks"),
     cl::Hidden);
 
+static cl::opt<std::string>
+    RemarksFilename("pass-remarks-output",
+                    cl::desc("YAML output filename for pass remarks"),
+                    cl::value_desc("filename"));
+
 static inline void addPass(legacy::PassManagerBase &PM, Pass *P) {
   // Add the pass to the pass manager...
   PM.add(P);
@@ -249,7 +270,7 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
   } else if (OptLevel > 1) {
     Builder.Inliner = createFunctionInliningPass(OptLevel, SizeLevel);
   } else {
-    Builder.Inliner = createAlwaysInlinerPass();
+    Builder.Inliner = createAlwaysInlinerLegacyPass();
   }
   Builder.DisableUnitAtATime = !UnitAtATime;
   Builder.DisableUnrollLoops = (DisableLoopUnrolling.getNumOccurrences() > 0) ?
@@ -274,6 +295,9 @@ static void AddOptimizationPasses(legacy::PassManagerBase &MPM,
           TM->addEarlyAsPossiblePasses(PM);
         });
 
+  if (Coroutines)
+    addCoroutinePassesToExtensionPoints(Builder);
+
   Builder.populateFunctionPassManager(FPM);
   Builder.populateModulePassManager(MPM);
 }
@@ -344,10 +368,12 @@ int main(int argc, char **argv) {
   InitializeAllTargets();
   InitializeAllTargetMCs();
   InitializeAllAsmPrinters();
+  InitializeAllAsmParsers();
 
   // Initialize passes
   PassRegistry &Registry = *PassRegistry::getPassRegistry();
   initializeCore(Registry);
+  initializeCoroutines(Registry);
   initializeScalarOpts(Registry);
   initializeObjCARCOpts(Registry);
   initializeVectorization(Registry);
@@ -361,7 +387,7 @@ int main(int argc, char **argv) {
   // supported.
   initializeCodeGenPreparePass(Registry);
   initializeAtomicExpandPass(Registry);
-  initializeRewriteSymbolsPass(Registry);
+  initializeRewriteSymbolsLegacyPassPass(Registry);
   initializeWinEHPreparePass(Registry);
   initializeDwarfEHPreparePass(Registry);
   initializeSafeStackPass(Registry);
@@ -369,6 +395,7 @@ int main(int argc, char **argv) {
   initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
   initializeGlobalMergePass(Registry);
   initializeInterleavedAccessPass(Registry);
+  initializeCountingFunctionInserterPass(Registry);
   initializeUnreachableBlockElimLegacyPassPass(Registry);
 
 #ifdef LINK_POLLY_INTO_TOOLS
@@ -392,6 +419,19 @@ int main(int argc, char **argv) {
   if (PassRemarksWithHotness)
     Context.setDiagnosticHotnessRequested(true);
 
+  std::unique_ptr<tool_output_file> YamlFile;
+  if (RemarksFilename != "") {
+    std::error_code EC;
+    YamlFile = llvm::make_unique<tool_output_file>(RemarksFilename, EC,
+                                                   sys::fs::F_None);
+    if (EC) {
+      errs() << EC.message() << '\n';
+      return 1;
+    }
+    Context.setDiagnosticsOutputFile(
+        llvm::make_unique<yaml::Output>(YamlFile->os()));
+  }
+
   // Load the input module...
   std::unique_ptr<Module> M = parseIRFile(InputFilename, Err, Context);
 
@@ -474,9 +514,10 @@ int main(int argc, char **argv) {
     // The user has asked to use the new pass manager and provided a pipeline
     // string. Hand off the rest of the functionality to the new code for that
     // layer.
-    return runPassPipeline(argv[0], Context, *M, TM.get(), Out.get(),
+    return runPassPipeline(argv[0], *M, TM.get(), Out.get(),
                            PassPipeline, OK, VK, PreserveAssemblyUseListOrder,
-                           PreserveBitcodeUseListOrder)
+                           PreserveBitcodeUseListOrder, EmitSummaryIndex,
+                           EmitModuleHash)
                ? 0
                : 1;
   }
@@ -505,7 +546,8 @@ int main(int argc, char **argv) {
                                                      : TargetIRAnalysis()));
 
   std::unique_ptr<legacy::FunctionPassManager> FPasses;
-  if (OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz || OptLevelO3) {
+  if (OptLevelO0 || OptLevelO1 || OptLevelO2 || OptLevelOs || OptLevelOz ||
+      OptLevelO3) {
     FPasses.reset(new legacy::FunctionPassManager(M.get()));
     FPasses->add(createTargetTransformInfoWrapperPass(
         TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()));
@@ -537,6 +579,11 @@ int main(int argc, char **argv) {
       StandardLinkOpts = false;
     }
 
+    if (OptLevelO0 && OptLevelO0.getPosition() < PassList.getPosition(i)) {
+      AddOptimizationPasses(Passes, *FPasses, TM.get(), 0, 0);
+      OptLevelO0 = false;
+    }
+
     if (OptLevelO1 && OptLevelO1.getPosition() < PassList.getPosition(i)) {
       AddOptimizationPasses(Passes, *FPasses, TM.get(), 1, 0);
       OptLevelO1 = false;
@@ -609,6 +656,9 @@ int main(int argc, char **argv) {
     StandardLinkOpts = false;
   }
 
+  if (OptLevelO0)
+    AddOptimizationPasses(Passes, *FPasses, TM.get(), 0, 0);
+
   if (OptLevelO1)
     AddOptimizationPasses(Passes, *FPasses, TM.get(), 1, 0);
 
@@ -658,7 +708,9 @@ int main(int argc, char **argv) {
       if (EmitModuleHash)
         report_fatal_error("Text output is incompatible with -module-hash");
       Passes.add(createPrintModulePass(*OS, "", PreserveAssemblyUseListOrder));
-    } else
+    } else if (OutputThinLTOBC)
+      Passes.add(createWriteThinLTOBitcodePass(*OS));
+    else
       Passes.add(createBitcodeWriterPass(*OS, PreserveBitcodeUseListOrder,
                                          EmitSummaryIndex, EmitModuleHash));
   }
@@ -690,6 +742,8 @@ int main(int argc, char **argv) {
                 "the compile-twice option\n";
       Out->os() << BOS->str();
       Out->keep();
+      if (YamlFile)
+        YamlFile->keep();
       return 1;
     }
     Out->os() << BOS->str();
@@ -699,5 +753,8 @@ int main(int argc, char **argv) {
   if (!NoOutput || PrintBreakpoints)
     Out->keep();
 
+  if (YamlFile)
+    YamlFile->keep();
+
   return 0;
 }
diff --git a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
index 76228e0b6581..1272d2b9f287 100644
--- a/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmMatcherEmitter.cpp
@@ -97,6 +97,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenTarget.h"
+#include "SubtargetFeatureInfo.h"
+#include "Types.h"
+#include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/PointerUnion.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -126,7 +129,6 @@ MatchPrefix("match-prefix", cl::init(""),
 
 namespace {
 class AsmMatcherInfo;
-struct SubtargetFeatureInfo;
 
 // Register sets are used as keys in some second-order sets TableGen creates
 // when generating its data structures. This means that the order of two
@@ -353,6 +355,7 @@ public:
   std::string TokenizingCharacters;
   std::string SeparatorCharacters;
   std::string BreakCharacters;
+  std::string Name;
   int AsmVariantNo;
 };
 
@@ -556,20 +559,17 @@ struct MatchableInfo {
   /// findAsmOperand - Find the AsmOperand with the specified name and
   /// suboperand index.
   int findAsmOperand(StringRef N, int SubOpIdx) const {
-    auto I = std::find_if(AsmOperands.begin(), AsmOperands.end(),
-                          [&](const AsmOperand &Op) {
-                            return Op.SrcOpName == N && Op.SubOpIdx == SubOpIdx;
-                          });
+    auto I = find_if(AsmOperands, [&](const AsmOperand &Op) {
+      return Op.SrcOpName == N && Op.SubOpIdx == SubOpIdx;
+    });
     return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
   }
 
   /// findAsmOperandNamed - Find the first AsmOperand with the specified name.
   /// This does not check the suboperand index.
   int findAsmOperandNamed(StringRef N) const {
-    auto I = std::find_if(AsmOperands.begin(), AsmOperands.end(),
-                          [&](const AsmOperand &Op) {
-                            return Op.SrcOpName == N;
-                          });
+    auto I = find_if(AsmOperands,
+                     [&](const AsmOperand &Op) { return Op.SrcOpName == N; });
     return (I != AsmOperands.end()) ? I - AsmOperands.begin() : -1;
   }
 
@@ -653,28 +653,6 @@ private:
   void addAsmOperand(StringRef Token, bool IsIsolatedToken = false);
 };
 
-/// SubtargetFeatureInfo - Helper class for storing information on a subtarget
-/// feature which participates in instruction matching.
-struct SubtargetFeatureInfo {
-  /// \brief The predicate record for this feature.
-  Record *TheDef;
-
-  /// \brief An unique index assigned to represent this feature.
-  uint64_t Index;
-
-  SubtargetFeatureInfo(Record *D, uint64_t Idx) : TheDef(D), Index(Idx) {}
-
-  /// \brief The name of the enumerated constant identifying this feature.
-  std::string getEnumName() const {
-    return "Feature_" + TheDef->getName();
-  }
-
-  void dump() const {
-    errs() << getEnumName() << " " << Index << "\n";
-    TheDef->dump();
-  }
-};
-
 struct OperandMatchEntry {
   unsigned OperandMask;
   const MatchableInfo* MI;
@@ -754,7 +732,7 @@ public:
                  CodeGenTarget &Target,
                  RecordKeeper &Records);
 
-  /// buildInfo - Construct the various tables used during matching.
+  /// Construct the various tables used during matching.
   void buildInfo();
 
   /// buildOperandMatchInfo - Build the necessary information to handle user
@@ -774,9 +752,9 @@ public:
   }
 
   bool hasOptionalOperands() const {
-    return std::find_if(Classes.begin(), Classes.end(),
-                        [](const ClassInfo& Class){ return Class.IsOptional; })
-              != Classes.end();
+    return find_if(Classes, [](const ClassInfo &Class) {
+             return Class.IsOptional;
+           }) != Classes.end();
   }
 };
 
@@ -1311,10 +1289,10 @@ buildRegisterClasses(SmallPtrSetImpl<Record*> &SingletonRegisters) {
 
     if (CI->ValueName.empty()) {
       CI->ClassName = Rec->getName();
-      CI->Name = "MCK_" + Rec->getName();
+      CI->Name = "MCK_" + Rec->getName().str();
       CI->ValueName = Rec->getName();
     } else
-      CI->ValueName = CI->ValueName + "," + Rec->getName();
+      CI->ValueName = CI->ValueName + "," + Rec->getName().str();
   }
 }
 
@@ -1437,21 +1415,15 @@ void AsmMatcherInfo::buildOperandMatchInfo() {
 
 void AsmMatcherInfo::buildInfo() {
   // Build information about all of the AssemblerPredicates.
-  std::vector<Record*> AllPredicates =
-    Records.getAllDerivedDefinitions("Predicate");
-  for (Record *Pred : AllPredicates) {
-    // Ignore predicates that are not intended for the assembler.
-    if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
-      continue;
-
-    if (Pred->getName().empty())
-      PrintFatalError(Pred->getLoc(), "Predicate has no name!");
-
-    SubtargetFeatures.insert(std::make_pair(
-        Pred, SubtargetFeatureInfo(Pred, SubtargetFeatures.size())));
-    DEBUG(SubtargetFeatures.find(Pred)->second.dump());
-    assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!");
-  }
+  const std::vector<std::pair<Record *, SubtargetFeatureInfo>>
+      &SubtargetFeaturePairs = SubtargetFeatureInfo::getAll(Records);
+  SubtargetFeatures.insert(SubtargetFeaturePairs.begin(),
+                           SubtargetFeaturePairs.end());
+#ifndef NDEBUG
+  for (const auto &Pair : SubtargetFeatures)
+    DEBUG(Pair.second.dump());
+#endif // NDEBUG
+  assert(SubtargetFeatures.size() <= 64 && "Too many subtarget features!");
 
   bool HasMnemonicFirst = AsmParser->getValueAsBit("HasMnemonicFirst");
 
@@ -1471,6 +1443,7 @@ void AsmMatcherInfo::buildInfo() {
         AsmVariant->getValueAsString("SeparatorCharacters");
     Variant.BreakCharacters =
         AsmVariant->getValueAsString("BreakCharacters");
+    Variant.Name = AsmVariant->getValueAsString("Name");
     Variant.AsmVariantNo = AsmVariant->getValueAsInt("Variant");
 
     for (const CodeGenInstruction *CGI : Target.getInstructionsByEnumValue()) {
@@ -1484,6 +1457,11 @@ void AsmMatcherInfo::buildInfo() {
       if (CGI->TheDef->getValueAsBit("isCodeGenOnly"))
         continue;
 
+      // Ignore instructions for different instructions
+      const std::string V = CGI->TheDef->getValueAsString("AsmVariantName");
+      if (!V.empty() && V != Variant.Name)
+        continue;
+
       auto II = llvm::make_unique<MatchableInfo>(*CGI);
 
       II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
@@ -1512,6 +1490,10 @@ void AsmMatcherInfo::buildInfo() {
             .startswith( MatchPrefix))
         continue;
 
+      const std::string V = Alias->TheDef->getValueAsString("AsmVariantName");
+      if (!V.empty() && V != Variant.Name)
+        continue;
+
       auto II = llvm::make_unique<MatchableInfo>(std::move(Alias));
 
       II->initialize(*this, SingletonRegisters, Variant, HasMnemonicFirst);
@@ -1613,15 +1595,15 @@ void AsmMatcherInfo::buildInfo() {
   // Reorder classes so that classes precede super classes.
   Classes.sort();
 
-#ifndef NDEBUG
-  // Verify that the table is now sorted
+#ifdef EXPENSIVE_CHECKS
+  // Verify that the table is sorted and operator < works transitively.
   for (auto I = Classes.begin(), E = Classes.end(); I != E; ++I) {
     for (auto J = I; J != E; ++J) {
       assert(!(*J < *I));
       assert(I == J || !J->isSubsetOf(*I));
     }
   }
-#endif // NDEBUG
+#endif
 }
 
 /// buildInstructionOperandReference - The specified operand is a reference to a
@@ -1814,13 +1796,13 @@ void MatchableInfo::buildAliasResultOperands() {
   }
 }
 
-static unsigned getConverterOperandID(const std::string &Name,
-                                      SmallSetVector<std::string, 16> &Table,
-                                      bool &IsNew) {
-  IsNew = Table.insert(Name);
+static unsigned
+getConverterOperandID(const std::string &Name,
+                      SmallSetVector<CachedHashString, 16> &Table,
+                      bool &IsNew) {
+  IsNew = Table.insert(CachedHashString(Name));
 
-  unsigned ID = IsNew ? Table.size() - 1 :
-    std::find(Table.begin(), Table.end(), Name) - Table.begin();
+  unsigned ID = IsNew ? Table.size() - 1 : find(Table, Name) - Table.begin();
 
   assert(ID < Table.size());
 
@@ -1831,13 +1813,13 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
                              std::vector<std::unique_ptr<MatchableInfo>> &Infos,
                              bool HasMnemonicFirst, bool HasOptionalOperands,
                              raw_ostream &OS) {
-  SmallSetVector<std::string, 16> OperandConversionKinds;
-  SmallSetVector<std::string, 16> InstructionConversionKinds;
+  SmallSetVector<CachedHashString, 16> OperandConversionKinds;
+  SmallSetVector<CachedHashString, 16> InstructionConversionKinds;
   std::vector<std::vector<uint8_t> > ConversionTable;
   size_t MaxRowLength = 2; // minimum is custom converter plus terminator.
 
   // TargetOperandClass - This is the target's operand class, like X86Operand.
-  std::string TargetOperandClass = Target.getName() + "Operand";
+  std::string TargetOperandClass = Target.getName().str() + "Operand";
 
   // Write the convert function to a separate stream, so we can drop it after
   // the enum. We'll build up the conversion handlers for the individual
@@ -1904,9 +1886,9 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
   // Pre-populate the operand conversion kinds with the standard always
   // available entries.
-  OperandConversionKinds.insert("CVT_Done");
-  OperandConversionKinds.insert("CVT_Reg");
-  OperandConversionKinds.insert("CVT_Tied");
+  OperandConversionKinds.insert(CachedHashString("CVT_Done"));
+  OperandConversionKinds.insert(CachedHashString("CVT_Reg"));
+  OperandConversionKinds.insert(CachedHashString("CVT_Tied"));
   enum { CVT_Done, CVT_Reg, CVT_Tied };
 
   for (auto &II : Infos) {
@@ -1918,13 +1900,13 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
       II->ConversionFnKind = Signature;
 
       // Check if we have already generated this signature.
-      if (!InstructionConversionKinds.insert(Signature))
+      if (!InstructionConversionKinds.insert(CachedHashString(Signature)))
         continue;
 
       // Remember this converter for the kind enum.
       unsigned KindID = OperandConversionKinds.size();
-      OperandConversionKinds.insert("CVT_" +
-                                    getEnumNameForToken(AsmMatchConverter));
+      OperandConversionKinds.insert(
+          CachedHashString("CVT_" + getEnumNameForToken(AsmMatchConverter)));
 
       // Add the converter row for this instruction.
       ConversionTable.emplace_back();
@@ -2072,7 +2054,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
           Reg = "0";
         } else {
           Reg = getQualifiedName(OpInfo.Register);
-          Name = "reg" + OpInfo.Register->getName();
+          Name = "reg" + OpInfo.Register->getName().str();
         }
         Signature += "__" + Name;
         Name = "CVT_" + Name;
@@ -2106,7 +2088,7 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
     // Save the signature. If we already have it, don't add a new row
     // to the table.
-    if (!InstructionConversionKinds.insert(Signature))
+    if (!InstructionConversionKinds.insert(CachedHashString(Signature)))
       continue;
 
     // Add the row to the table.
@@ -2123,14 +2105,14 @@ static void emitConvertFuncs(CodeGenTarget &Target, StringRef ClassName,
 
   // Output the operand conversion kind enum.
   OS << "enum OperatorConversionKind {\n";
-  for (const std::string &Converter : OperandConversionKinds)
+  for (const auto &Converter : OperandConversionKinds)
     OS << "  " << Converter << ",\n";
   OS << "  CVT_NUM_CONVERTERS\n";
   OS << "};\n\n";
 
   // Output the instruction conversion kind enum.
   OS << "enum InstructionConversionKind {\n";
-  for (const std::string &Signature : InstructionConversionKinds)
+  for (const auto &Signature : InstructionConversionKinds)
     OS << "  " << Signature << ",\n";
   OS << "  CVT_NUM_SIGNATURES\n";
   OS << "};\n\n";
@@ -2382,40 +2364,6 @@ static void emitMatchRegisterAltName(CodeGenTarget &Target, Record *AsmParser,
   OS << "}\n\n";
 }
 
-static const char *getMinimalTypeForRange(uint64_t Range) {
-  assert(Range <= 0xFFFFFFFFFFFFFFFFULL && "Enum too large");
-  if (Range > 0xFFFFFFFFULL)
-    return "uint64_t";
-  if (Range > 0xFFFF)
-    return "uint32_t";
-  if (Range > 0xFF)
-    return "uint16_t";
-  return "uint8_t";
-}
-
-static const char *getMinimalRequiredFeaturesType(const AsmMatcherInfo &Info) {
-  uint64_t MaxIndex = Info.SubtargetFeatures.size();
-  if (MaxIndex > 0)
-    MaxIndex--;
-  return getMinimalTypeForRange(1ULL << MaxIndex);
-}
-
-/// emitSubtargetFeatureFlagEnumeration - Emit the subtarget feature flag
-/// definitions.
-static void emitSubtargetFeatureFlagEnumeration(AsmMatcherInfo &Info,
-                                                raw_ostream &OS) {
-  OS << "// Flags for subtarget features that participate in "
-     << "instruction matching.\n";
-  OS << "enum SubtargetFeatureFlag : " << getMinimalRequiredFeaturesType(Info)
-     << " {\n";
-  for (const auto &SF : Info.SubtargetFeatures) {
-    const SubtargetFeatureInfo &SFI = SF.second;
-    OS << "  " << SFI.getEnumName() << " = (1ULL << " << SFI.Index << "),\n";
-  }
-  OS << "  Feature_None = 0\n";
-  OS << "};\n\n";
-}
-
 /// emitOperandDiagnosticTypes - Emit the operand matching diagnostic types.
 static void emitOperandDiagnosticTypes(AsmMatcherInfo &Info, raw_ostream &OS) {
   // Get the set of diagnostic types from all of the operand classes.
@@ -2456,55 +2404,6 @@ static void emitGetSubtargetFeatureName(AsmMatcherInfo &Info, raw_ostream &OS) {
   OS << "}\n\n";
 }
 
-/// emitComputeAvailableFeatures - Emit the function to compute the list of
-/// available features given a subtarget.
-static void emitComputeAvailableFeatures(AsmMatcherInfo &Info,
-                                         raw_ostream &OS) {
-  std::string ClassName =
-    Info.AsmParser->getValueAsString("AsmParserClassName");
-
-  OS << "uint64_t " << Info.Target.getName() << ClassName << "::\n"
-     << "ComputeAvailableFeatures(const FeatureBitset& FB) const {\n";
-  OS << "  uint64_t Features = 0;\n";
-  for (const auto &SF : Info.SubtargetFeatures) {
-    const SubtargetFeatureInfo &SFI = SF.second;
-
-    OS << "  if (";
-    std::string CondStorage =
-      SFI.TheDef->getValueAsString("AssemblerCondString");
-    StringRef Conds = CondStorage;
-    std::pair<StringRef,StringRef> Comma = Conds.split(',');
-    bool First = true;
-    do {
-      if (!First)
-        OS << " && ";
-
-      bool Neg = false;
-      StringRef Cond = Comma.first;
-      if (Cond[0] == '!') {
-        Neg = true;
-        Cond = Cond.substr(1);
-      }
-
-      OS << "(";
-      if (Neg)
-        OS << "!";
-      OS << "FB[" << Info.Target.getName() << "::" << Cond << "])";
-
-      if (Comma.second.empty())
-        break;
-
-      First = false;
-      Comma = Comma.second.split(',');
-    } while (true);
-
-    OS << ")\n";
-    OS << "    Features |= " << SFI.getEnumName() << ";\n";
-  }
-  OS << "  return Features;\n";
-  OS << "}\n\n";
-}
-
 static std::string GetAliasRequiredFeatures(Record *R,
                                             const AsmMatcherInfo &Info) {
   std::vector<Record*> ReqFeatures = R->getValueAsListOfDefs("Predicates");
@@ -2646,7 +2545,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the static custom operand parsing table;
   OS << "namespace {\n";
   OS << "  struct OperandMatchEntry {\n";
-  OS << "    " << getMinimalRequiredFeaturesType(Info)
+  OS << "    " << getMinimalTypeForEnumBitfield(Info.SubtargetFeatures.size())
                << " RequiredFeatures;\n";
   OS << "    " << getMinimalTypeForRange(MaxMnemonicIndex)
                << " Mnemonic;\n";
@@ -2719,8 +2618,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
 
   // Emit the operand class switch to call the correct custom parser for
   // the found operand class.
-  OS << Target.getName() << ClassName << "::OperandMatchResultTy "
-     << Target.getName() << ClassName << "::\n"
+  OS << "OperandMatchResultTy " << Target.getName() << ClassName << "::\n"
      << "tryCustomParseOperand(OperandVector"
      << " &Operands,\n                      unsigned MCK) {\n\n"
      << "  switch(MCK) {\n";
@@ -2741,8 +2639,7 @@ static void emitCustomOperandParsing(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the static custom operand parser. This code is very similar with
   // the other matcher. Also use MatchResultTy here just in case we go for
   // a better error handling.
-  OS << Target.getName() << ClassName << "::OperandMatchResultTy "
-     << Target.getName() << ClassName << "::\n"
+  OS << "OperandMatchResultTy " << Target.getName() << ClassName << "::\n"
      << "MatchOperandParserImpl(OperandVector"
      << " &Operands,\n                       StringRef Mnemonic) {\n";
 
@@ -2822,15 +2719,15 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
                       const std::unique_ptr<MatchableInfo> &b){
                      return *a < *b;});
 
-#ifndef NDEBUG
-  // Verify that the table is now sorted
+#ifdef EXPENSIVE_CHECKS
+  // Verify that the table is sorted and operator < works transitively.
   for (auto I = Info.Matchables.begin(), E = Info.Matchables.end(); I != E;
        ++I) {
     for (auto J = I; J != E; ++J) {
       assert(!(**J < **I));
     }
   }
-#endif // NDEBUG
+#endif
 
   DEBUG_WITH_TYPE("instruction_info", {
       for (const auto &MI : Info.Matchables)
@@ -2896,11 +2793,6 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
      << "                                unsigned VariantID = 0);\n";
 
   if (!Info.OperandMatchInfo.empty()) {
-    OS << "\n  enum OperandMatchResultTy {\n";
-    OS << "    MatchOperand_Success,    // operand matched successfully\n";
-    OS << "    MatchOperand_NoMatch,    // operand did not match\n";
-    OS << "    MatchOperand_ParseFail   // operand matched but had errors\n";
-    OS << "  };\n";
     OS << "  OperandMatchResultTy MatchOperandParserImpl(\n";
     OS << "    OperandVector &Operands,\n";
     OS << "    StringRef Mnemonic);\n";
@@ -2922,7 +2814,8 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "#undef GET_REGISTER_MATCHER\n\n";
 
   // Emit the subtarget feature enumeration.
-  emitSubtargetFeatureFlagEnumeration(Info, OS);
+  SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
+      Info.SubtargetFeatures, OS);
 
   // Emit the function to match a register name to number.
   // This should be omitted for Mips target
@@ -2967,7 +2860,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   emitValidateOperandClass(Info, OS);
 
   // Emit the available features compute function.
-  emitComputeAvailableFeatures(Info, OS);
+  SubtargetFeatureInfo::emitComputeAvailableFeatures(
+      Info.Target.getName(), ClassName, "ComputeAvailableFeatures",
+      Info.SubtargetFeatures, OS);
 
   StringToOffsetTable StringTable;
 
@@ -3005,7 +2900,7 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    uint16_t Opcode;\n";
   OS << "    " << getMinimalTypeForRange(Info.Matchables.size())
                << " ConvertFn;\n";
-  OS << "    " << getMinimalRequiredFeaturesType(Info)
+  OS << "    " << getMinimalTypeForEnumBitfield(Info.SubtargetFeatures.size())
                << " RequiredFeatures;\n";
   OS << "    " << getMinimalTypeForRange(
                       std::distance(Info.Classes.begin(), Info.Classes.end()))
@@ -3254,8 +3149,24 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   OS << "    }\n";
   OS << "\n";
   OS << "    Inst.clear();\n\n";
+  OS << "    Inst.setOpcode(it->Opcode);\n";
+  // Verify the instruction with the target-specific match predicate function.
+  OS << "    // We have a potential match but have not rendered the operands.\n"
+     << "    // Check the target predicate to handle any context sensitive\n"
+        "    // constraints.\n"
+     << "    // For example, Ties that are referenced multiple times must be\n"
+        "    // checked here to ensure the input is the same for each match\n"
+        "    // constraints. If we leave it any later the ties will have been\n"
+        "    // canonicalized\n"
+     << "    unsigned MatchResult;\n"
+     << "    if ((MatchResult = checkEarlyTargetMatchPredicate(Inst, "
+        "Operands)) != Match_Success) {\n"
+     << "      Inst.clear();\n"
+     << "      RetCode = MatchResult;\n"
+     << "      HadMatchOtherThanPredicate = true;\n"
+     << "      continue;\n"
+     << "    }\n\n";
   OS << "    if (matchingInlineAsm) {\n";
-  OS << "      Inst.setOpcode(it->Opcode);\n";
   OS << "      convertToMapAndConstraints(it->ConvertFn, Operands);\n";
   OS << "      return Match_Success;\n";
   OS << "    }\n\n";
@@ -3272,7 +3183,6 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
   // Verify the instruction with the target-specific match predicate function.
   OS << "    // We have a potential match. Check the target predicate to\n"
      << "    // handle any context sensitive constraints.\n"
-     << "    unsigned MatchResult;\n"
      << "    if ((MatchResult = checkTargetMatchPredicate(Inst)) !="
      << " Match_Success) {\n"
      << "      Inst.clear();\n"
@@ -3289,7 +3199,9 @@ void AsmMatcherEmitter::run(raw_ostream &OS) {
 
   if (HasDeprecation) {
     OS << "    std::string Info;\n";
-    OS << "    if (MII.get(Inst.getOpcode()).getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
+    OS << "    if (!getParser().getTargetParser().\n";
+    OS << "        getTargetOptions().MCNoDeprecatedWarn &&\n";
+    OS << "        MII.get(Inst.getOpcode()).getDeprecatedInfo(Inst, getSTI(), Info)) {\n";
     OS << "      SMLoc Loc = ((" << Target.getName()
        << "Operand&)*Operands[0]).getStartLoc();\n";
     OS << "      getParser().Warning(Loc, Info, None);\n";
diff --git a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
index fc2138f7e8ea..a7c6104aaa21 100644
--- a/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/AsmWriterEmitter.cpp
@@ -13,32 +13,53 @@
 //===----------------------------------------------------------------------===//
 
 #include "AsmWriterInst.h"
+#include "CodeGenInstruction.h"
+#include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
 #include "SequenceToOffsetTable.h"
+#include "Types.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include <algorithm>
 #include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <deque>
+#include <iterator>
 #include <map>
+#include <set>
+#include <string>
+#include <tuple>
 #include <utility>
 #include <vector>
+
 using namespace llvm;
 
 #define DEBUG_TYPE "asm-writer-emitter"
 
 namespace {
+
 class AsmWriterEmitter {
   RecordKeeper &Records;
   CodeGenTarget Target;
   ArrayRef<const CodeGenInstruction *> NumberedInstructions;
   std::vector<AsmWriterInst> Instructions;
+
 public:
   AsmWriterEmitter(RecordKeeper &R);
 
@@ -54,10 +75,11 @@ private:
                                  std::vector<unsigned> &InstOpsUsed,
                                  bool PassSubtarget) const;
 };
+
 } // end anonymous namespace
 
 static void PrintCases(std::vector<std::pair<std::string,
-                       AsmWriterOperand> > &OpsToPrint, raw_ostream &O,
+                       AsmWriterOperand>> &OpsToPrint, raw_ostream &O,
                        bool PassSubtarget) {
   O << "    case " << OpsToPrint.back().first << ":";
   AsmWriterOperand TheOp = OpsToPrint.back().second;
@@ -76,7 +98,6 @@ static void PrintCases(std::vector<std::pair<std::string,
   O << "\n      break;\n";
 }
 
-
 /// EmitInstructions - Emit the last instruction in the vector and any other
 /// instructions that are suitably similar to it.
 static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
@@ -115,14 +136,14 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
       // emit a switch for just this operand now.
       O << "    switch (MI->getOpcode()) {\n";
       O << "    default: llvm_unreachable(\"Unexpected opcode.\");\n";
-      std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint;
+      std::vector<std::pair<std::string, AsmWriterOperand>> OpsToPrint;
       OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" +
-                                          FirstInst.CGI->TheDef->getName(),
+                                          FirstInst.CGI->TheDef->getName().str(),
                                           FirstInst.Operands[i]));
 
       for (const AsmWriterInst &AWI : SimilarInsts) {
-        OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+
-                                            AWI.CGI->TheDef->getName(),
+        OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::" +
+                                            AWI.CGI->TheDef->getName().str(),
                                             AWI.Operands[i]));
       }
       std::reverse(OpsToPrint.begin(), OpsToPrint.end());
@@ -140,7 +161,6 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
                           std::vector<std::vector<unsigned>> &InstIdxs,
                           std::vector<unsigned> &InstOpsUsed,
                           bool PassSubtarget) const {
-
   // This vector parallels UniqueOperandCommands, keeping track of which
   // instructions each case are used for.  It is a comma separated string of
   // enums.
@@ -157,8 +177,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
 
     // Check to see if we already have 'Command' in UniqueOperandCommands.
     // If not, add it.
-    auto I = std::find(UniqueOperandCommands.begin(),
-                       UniqueOperandCommands.end(), Command);
+    auto I = llvm::find(UniqueOperandCommands, Command);
     if (I != UniqueOperandCommands.end()) {
       size_t idx = I - UniqueOperandCommands.begin();
       InstrsForCase[idx] += ", ";
@@ -225,7 +244,6 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
   }
 }
 
-
 static void UnescapeString(std::string &Str) {
   for (unsigned i = 0; i != Str.size(); ++i) {
     if (Str[i] == '\\' && i != Str.size()-1) {
@@ -317,7 +335,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
 
   std::vector<std::vector<std::string>> TableDrivenOperandPrinters;
 
-  while (1) {
+  while (true) {
     std::vector<std::string> UniqueOperandCommands;
     std::vector<std::vector<unsigned>> InstIdxs;
     std::vector<unsigned> NumInstOpsHandled;
@@ -451,10 +469,8 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   }
 
   // Okay, delete instructions with no operand info left.
-  auto I = std::remove_if(Instructions.begin(), Instructions.end(),
-                          [](AsmWriterInst &Inst) {
-                            return Inst.Operands.empty();
-                          });
+  auto I = llvm::remove_if(Instructions,
+                     [](AsmWriterInst &Inst) { return Inst.Operands.empty(); });
   Instructions.erase(I, Instructions.end());
 
 
@@ -480,15 +496,6 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
   O << "}\n";
 }
 
-static const char *getMinimalTypeForRange(uint64_t Range) {
-  assert(Range < 0xFFFFFFFFULL && "Enum too large");
-  if (Range > 0xFFFF)
-    return "uint32_t";
-  if (Range > 0xFF)
-    return "uint16_t";
-  return "uint8_t";
-}
-
 static void
 emitRegisterNameString(raw_ostream &O, StringRef AltName,
                        const std::deque<CodeGenRegister> &Registers) {
@@ -533,7 +540,7 @@ emitRegisterNameString(raw_ostream &O, StringRef AltName,
   StringTable.emit(O, printChar);
   O << "  };\n\n";
 
-  O << "  static const " << getMinimalTypeForRange(StringTable.size()-1)
+  O << "  static const " << getMinimalTypeForRange(StringTable.size() - 1, 32)
     << " RegAsmOffset" << AltName << "[] = {";
   for (unsigned i = 0, e = Registers.size(); i != e; ++i) {
     if ((i % 14) == 0)
@@ -595,6 +602,7 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
 }
 
 namespace {
+
 // IAPrinter - Holds information about an InstAlias. Two InstAliases match if
 // they both have the same conditionals. In which case, we cannot print out the
 // alias for that pattern.
@@ -604,6 +612,7 @@ class IAPrinter {
 
   std::string Result;
   std::string AsmString;
+
 public:
   IAPrinter(std::string R, std::string AS)
       : Result(std::move(R)), AsmString(std::move(AS)) {}
@@ -725,9 +734,10 @@ static unsigned CountNumOperands(StringRef AsmString, unsigned Variant) {
 }
 
 namespace {
+
 struct AliasPriorityComparator {
   typedef std::pair<CodeGenInstAlias, int> ValueType;
-  bool operator()(const ValueType &LHS, const ValueType &RHS) {
+  bool operator()(const ValueType &LHS, const ValueType &RHS) const {
     if (LHS.second ==  RHS.second) {
       // We don't actually care about the order, but for consistency it
       // shouldn't depend on pointer comparisons.
@@ -738,8 +748,8 @@ struct AliasPriorityComparator {
     return LHS.second > RHS.second;
   }
 };
-}
 
+} // end anonymous namespace
 
 void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
   Record *AsmWriter = Target.getAsmWriter();
@@ -814,14 +824,14 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
         NumMIOps += Operand.getMINumOperands();
 
       std::string Cond;
-      Cond = std::string("MI->getNumOperands() == ") + llvm::utostr(NumMIOps);
+      Cond = std::string("MI->getNumOperands() == ") + utostr(NumMIOps);
       IAP.addCond(Cond);
 
       bool CantHandle = false;
 
       unsigned MIOpNum = 0;
       for (unsigned i = 0, e = LastOpNo; i != e; ++i) {
-        std::string Op = "MI->getOperand(" + llvm::utostr(MIOpNum) + ")";
+        std::string Op = "MI->getOperand(" + utostr(MIOpNum) + ")";
 
         const CodeGenInstAlias::ResultOperand &RO = CGA.ResultOperands[i];
 
@@ -838,9 +848,8 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
               Rec->isSubClassOf("Operand")) {
             std::string PrintMethod = Rec->getValueAsString("PrintMethod");
             if (PrintMethod != "" && PrintMethod != "printOperand") {
-              PrintMethodIdx = std::find(PrintMethods.begin(),
-                                         PrintMethods.end(), PrintMethod) -
-                               PrintMethods.begin();
+              PrintMethodIdx =
+                  llvm::find(PrintMethods, PrintMethod) - PrintMethods.begin();
               if (static_cast<unsigned>(PrintMethodIdx) == PrintMethods.size())
                 PrintMethods.push_back(PrintMethod);
             }
@@ -856,12 +865,12 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
               Record *R = CGA.ResultOperands[i].getRecord();
               if (R->isSubClassOf("RegisterOperand"))
                 R = R->getValueAsDef("RegClass");
-              Cond = std::string("MRI.getRegClass(") + Target.getName() + "::" +
-                     R->getName() + "RegClassID)"
-                                    ".contains(" + Op + ".getReg())";
+              Cond = std::string("MRI.getRegClass(") + Target.getName().str() +
+                     "::" + R->getName().str() + "RegClassID).contains(" + Op +
+                     ".getReg())";
             } else {
               Cond = Op + ".getReg() == MI->getOperand(" +
-                     llvm::utostr(IAP.getOpIndex(ROName)) + ").getReg()";
+                     utostr(IAP.getOpIndex(ROName)) + ").getReg()";
             }
           } else {
             // Assume all printable operands are desired for now. This can be
@@ -878,8 +887,8 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
               } else
                 break; // No conditions on this operand at all
             }
-            Cond = Target.getName() + ClassName + "ValidateMCOperand(" +
-                   Op + ", STI, " + llvm::utostr(Entry) + ")";
+            Cond = Target.getName().str() + ClassName + "ValidateMCOperand(" +
+                   Op + ", STI, " + utostr(Entry) + ")";
           }
           // for all subcases of ResultOperand::K_Record:
           IAP.addCond(Cond);
@@ -890,8 +899,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
           // MCInst will. An MCExpr could be present, for example.
           IAP.addCond(Op + ".isImm()");
 
-          Cond = Op + ".getImm() == " +
-                 llvm::utostr(CGA.ResultOperands[i].getImm());
+          Cond = Op + ".getImm() == " + itostr(CGA.ResultOperands[i].getImm());
           IAP.addCond(Cond);
           break;
         }
@@ -903,8 +911,8 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
             break;
           }
 
-          Cond = Op + ".getReg() == " + Target.getName() + "::" +
-                 CGA.ResultOperands[i].getRegister()->getName();
+          Cond = Op + ".getReg() == " + Target.getName().str() + "::" +
+                 CGA.ResultOperands[i].getRegister()->getName().str();
           IAP.addCond(Cond);
           break;
         }
@@ -1112,7 +1120,6 @@ void AsmWriterEmitter::run(raw_ostream &O) {
   EmitPrintAliasInstruction(O);
 }
 
-
 namespace llvm {
 
 void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS) {
@@ -1120,4 +1127,4 @@ void EmitAsmWriter(RecordKeeper &RK, raw_ostream &OS) {
   AsmWriterEmitter(RK).run(OS);
 }
 
-} // End llvm namespace
+} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/Attributes.cpp b/contrib/llvm/utils/TableGen/Attributes.cpp
index 58dbe5767ada..927f6e0e5b44 100644
--- a/contrib/llvm/utils/TableGen/Attributes.cpp
+++ b/contrib/llvm/utils/TableGen/Attributes.cpp
@@ -156,7 +156,7 @@ void Attributes::printStrBoolAttrClasses(raw_ostream &OS,
   OS << "// StrBoolAttr classes\n";
   for (const auto *R : Records) {
     OS << "struct " << R->getName() << "Attr : StrBoolAttr {\n";
-    OS << "  static const char *getKind() {\n";
+    OS << "  static StringRef getKind() {\n";
     OS << "    return \"" << R->getValueAsString("AttrString") << "\";\n";
     OS << "  }\n";
     OS << "};\n";
diff --git a/contrib/llvm/utils/TableGen/CTagsEmitter.cpp b/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
index 35f4ad6dd5bf..5213cd904462 100644
--- a/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/CTagsEmitter.cpp
@@ -37,7 +37,7 @@ public:
   void emit(raw_ostream &OS) const {
     const MemoryBuffer *CurMB =
         SrcMgr.getMemoryBuffer(SrcMgr.FindBufferContainingLoc(Loc));
-    const char *BufferName = CurMB->getBufferIdentifier();
+    auto BufferName = CurMB->getBufferIdentifier();
     std::pair<unsigned, unsigned> LineAndColumn = SrcMgr.getLineAndColumn(Loc);
     OS << *Id << "\t" << BufferName << "\t" << LineAndColumn.first << "\n";
   }
diff --git a/contrib/llvm/utils/TableGen/CodeEmitterGen.cpp b/contrib/llvm/utils/TableGen/CodeEmitterGen.cpp
index 400913e32745..f34c0ded0a35 100644
--- a/contrib/llvm/utils/TableGen/CodeEmitterGen.cpp
+++ b/contrib/llvm/utils/TableGen/CodeEmitterGen.cpp
@@ -13,24 +13,35 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
+#include "SubtargetFeatureInfo.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
-#include "llvm/Support/Debug.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
+#include <cassert>
+#include <cstdint>
 #include <map>
+#include <set>
 #include <string>
+#include <utility>
 #include <vector>
+
 using namespace llvm;
 
 namespace {
 
 class CodeEmitterGen {
   RecordKeeper &Records;
+
 public:
   CodeEmitterGen(RecordKeeper &R) : Records(R) {}
 
   void run(raw_ostream &o);
+
 private:
   int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);
   std::string getInstructionCase(Record *R, CodeGenTarget &Target);
@@ -173,7 +184,6 @@ AddCodeToMergeInOperand(Record *R, BitsInit *BI, const std::string &VarName,
   }
 }
 
-
 std::string CodeEmitterGen::getInstructionCase(Record *R,
                                                CodeGenTarget &Target) {
   std::string Case;
@@ -259,7 +269,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
   o << "    UINT64_C(0)\n  };\n";
 
   // Map to accumulate all the cases.
-  std::map<std::string, std::vector<std::string> > CaseMap;
+  std::map<std::string, std::vector<std::string>> CaseMap;
 
   // Construct all cases statement for each opcode
   for (std::vector<Record*>::iterator IC = Insts.begin(), EC = Insts.end();
@@ -269,7 +279,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
         R->getValueAsBit("isPseudo"))
       continue;
     const std::string &InstName = R->getValueAsString("Namespace") + "::"
-      + R->getName();
+      + R->getName().str();
     std::string Case = getInstructionCase(R, Target);
 
     CaseMap[Case].push_back(InstName);
@@ -283,7 +293,7 @@ void CodeEmitterGen::run(raw_ostream &o) {
     << "  switch (opcode) {\n";
 
   // Emit each case statement
-  std::map<std::string, std::vector<std::string> >::iterator IE, EE;
+  std::map<std::string, std::vector<std::string>>::iterator IE, EE;
   for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {
     const std::string &Case = IE->first;
     std::vector<std::string> &InstList = IE->second;
@@ -307,9 +317,72 @@ void CodeEmitterGen::run(raw_ostream &o) {
     << "  }\n"
     << "  return Value;\n"
     << "}\n\n";
+
+  const auto &All = SubtargetFeatureInfo::getAll(Records);
+  std::map<Record *, SubtargetFeatureInfo, LessRecordByID> SubtargetFeatures;
+  SubtargetFeatures.insert(All.begin(), All.end());
+
+  o << "#ifdef ENABLE_INSTR_PREDICATE_VERIFIER\n"
+    << "#undef ENABLE_INSTR_PREDICATE_VERIFIER\n"
+    << "#include <sstream>\n\n";
+
+  // Emit the subtarget feature enumeration.
+  SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(SubtargetFeatures,
+                                                            o);
+
+  // Emit the name table for error messages.
+  o << "#ifndef NDEBUG\n";
+  SubtargetFeatureInfo::emitNameTable(SubtargetFeatures, o);
+  o << "#endif // NDEBUG\n";
+
+  // Emit the available features compute function.
+  SubtargetFeatureInfo::emitComputeAvailableFeatures(
+      Target.getName(), "MCCodeEmitter", "computeAvailableFeatures",
+      SubtargetFeatures, o);
+
+  // Emit the predicate verifier.
+  o << "void " << Target.getName()
+    << "MCCodeEmitter::verifyInstructionPredicates(\n"
+    << "    const MCInst &Inst, uint64_t AvailableFeatures) const {\n"
+    << "#ifndef NDEBUG\n"
+    << "  static uint64_t RequiredFeatures[] = {\n";
+  unsigned InstIdx = 0;
+  for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) {
+    o << "    ";
+    for (Record *Predicate : Inst->TheDef->getValueAsListOfDefs("Predicates")) {
+      const auto &I = SubtargetFeatures.find(Predicate);
+      if (I != SubtargetFeatures.end())
+        o << I->second.getEnumName() << " | ";
+    }
+    o << "0, // " << Inst->TheDef->getName() << " = " << InstIdx << "\n";
+    InstIdx++;
+  }
+  o << "  };\n\n";
+  o << "  assert(Inst.getOpcode() < " << InstIdx << ");\n";
+  o << "  uint64_t MissingFeatures =\n"
+    << "      (AvailableFeatures & RequiredFeatures[Inst.getOpcode()]) ^\n"
+    << "      RequiredFeatures[Inst.getOpcode()];\n"
+    << "  if (MissingFeatures) {\n"
+    << "    std::ostringstream Msg;\n"
+    << "    Msg << \"Attempting to emit \" << "
+       "MCII.getName(Inst.getOpcode()).str()\n"
+    << "        << \" instruction but the \";\n"
+    << "    for (unsigned i = 0; i < 8 * sizeof(MissingFeatures); ++i)\n"
+    << "      if (MissingFeatures & (1ULL << i))\n"
+    << "        Msg << SubtargetFeatureNames[i] << \" \";\n"
+    << "    Msg << \"predicate(s) are not met\";\n"
+    << "    report_fatal_error(Msg.str());\n"
+    << "  }\n"
+    << "#else\n"
+    << "// Silence unused variable warning on targets that don't use MCII for "
+       "other purposes (e.g. BPF).\n"
+    << "(void)MCII;\n"
+    << "#endif // NDEBUG\n";
+  o << "}\n";
+  o << "#endif\n";
 }
 
-} // End anonymous namespace
+} // end anonymous namespace
 
 namespace llvm {
 
@@ -318,4 +391,4 @@ void EmitCodeEmitter(RecordKeeper &RK, raw_ostream &OS) {
   CodeEmitterGen(RK).run(OS);
 }
 
-} // End llvm namespace
+} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
index 307a95373c9d..b82a76bb035d 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.cpp
@@ -108,24 +108,24 @@ bool EEVT::TypeSet::FillWithPossibleTypes(TreePattern &TP,
 /// hasIntegerTypes - Return true if this TypeSet contains iAny or an
 /// integer value type.
 bool EEVT::TypeSet::hasIntegerTypes() const {
-  return std::any_of(TypeVec.begin(), TypeVec.end(), isInteger);
+  return any_of(TypeVec, isInteger);
 }
 
 /// hasFloatingPointTypes - Return true if this TypeSet contains an fAny or
 /// a floating point value type.
 bool EEVT::TypeSet::hasFloatingPointTypes() const {
-  return std::any_of(TypeVec.begin(), TypeVec.end(), isFloatingPoint);
+  return any_of(TypeVec, isFloatingPoint);
 }
 
 /// hasScalarTypes - Return true if this TypeSet contains a scalar value type.
 bool EEVT::TypeSet::hasScalarTypes() const {
-  return std::any_of(TypeVec.begin(), TypeVec.end(), isScalar);
+  return any_of(TypeVec, isScalar);
 }
 
 /// hasVectorTypes - Return true if this TypeSet contains a vAny or a vector
 /// value type.
 bool EEVT::TypeSet::hasVectorTypes() const {
-  return std::any_of(TypeVec.begin(), TypeVec.end(), isVector);
+  return any_of(TypeVec, isVector);
 }
 
 
@@ -239,8 +239,7 @@ bool EEVT::TypeSet::EnforceInteger(TreePattern &TP) {
   TypeSet InputSet(*this);
 
   // Filter out all the fp types.
-  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
-                               std::not1(std::ptr_fun(isInteger))),
+  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isInteger))),
                 TypeVec.end());
 
   if (TypeVec.empty()) {
@@ -265,8 +264,7 @@ bool EEVT::TypeSet::EnforceFloatingPoint(TreePattern &TP) {
   TypeSet InputSet(*this);
 
   // Filter out all the integer types.
-  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
-                               std::not1(std::ptr_fun(isFloatingPoint))),
+  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isFloatingPoint))),
                 TypeVec.end());
 
   if (TypeVec.empty()) {
@@ -292,8 +290,7 @@ bool EEVT::TypeSet::EnforceScalar(TreePattern &TP) {
   TypeSet InputSet(*this);
 
   // Filter out all the vector types.
-  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
-                               std::not1(std::ptr_fun(isScalar))),
+  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isScalar))),
                 TypeVec.end());
 
   if (TypeVec.empty()) {
@@ -317,8 +314,7 @@ bool EEVT::TypeSet::EnforceVector(TreePattern &TP) {
   bool MadeChange = false;
 
   // Filter out all the scalar types.
-  TypeVec.erase(std::remove_if(TypeVec.begin(), TypeVec.end(),
-                               std::not1(std::ptr_fun(isVector))),
+  TypeVec.erase(remove_if(TypeVec, std::not1(std::ptr_fun(isVector))),
                 TypeVec.end());
 
   if (TypeVec.empty()) {
@@ -395,16 +391,15 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
                 A.getSizeInBits() < B.getSizeInBits());
       });
 
-    auto I = std::remove_if(Other.TypeVec.begin(), Other.TypeVec.end(),
-      [Smallest](MVT OtherVT) {
-        // Don't compare vector and non-vector types.
-        if (OtherVT.isVector() != Smallest.isVector())
-          return false;
-        // The getSizeInBits() check here is only needed for vectors, but is
-        // a subset of the scalar check for scalars so no need to qualify.
-        return OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits()||
-               OtherVT.getSizeInBits() < Smallest.getSizeInBits();
-      });
+    auto I = remove_if(Other.TypeVec, [Smallest](MVT OtherVT) {
+      // Don't compare vector and non-vector types.
+      if (OtherVT.isVector() != Smallest.isVector())
+        return false;
+      // The getSizeInBits() check here is only needed for vectors, but is
+      // a subset of the scalar check for scalars so no need to qualify.
+      return OtherVT.getScalarSizeInBits() <= Smallest.getScalarSizeInBits() ||
+             OtherVT.getSizeInBits() < Smallest.getSizeInBits();
+    });
     MadeChange |= I != Other.TypeVec.end(); // If we're about to remove types.
     Other.TypeVec.erase(I, Other.TypeVec.end());
 
@@ -428,14 +423,13 @@ bool EEVT::TypeSet::EnforceSmallerThan(EEVT::TypeSet &Other, TreePattern &TP) {
                (A.getScalarSizeInBits() == B.getScalarSizeInBits() &&
                 A.getSizeInBits() < B.getSizeInBits());
       });
-    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
-      [Largest](MVT OtherVT) {
-        // Don't compare vector and non-vector types.
-        if (OtherVT.isVector() != Largest.isVector())
-          return false;
-        return OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
-               OtherVT.getSizeInBits() > Largest.getSizeInBits();
-      });
+    auto I = remove_if(TypeVec, [Largest](MVT OtherVT) {
+      // Don't compare vector and non-vector types.
+      if (OtherVT.isVector() != Largest.isVector())
+        return false;
+      return OtherVT.getScalarSizeInBits() >= Largest.getScalarSizeInBits() ||
+             OtherVT.getSizeInBits() > Largest.getSizeInBits();
+    });
     MadeChange |= I != TypeVec.end(); // If we're about to remove types.
     TypeVec.erase(I, TypeVec.end());
 
@@ -460,10 +454,9 @@ bool EEVT::TypeSet::EnforceVectorEltTypeIs(MVT::SimpleValueType VT,
   TypeSet InputSet(*this);
 
   // Filter out all the types which don't have the right element type.
-  auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
-    [VT](MVT VVT) {
-      return VVT.getVectorElementType().SimpleTy != VT;
-    });
+  auto I = remove_if(TypeVec, [VT](MVT VVT) {
+    return VVT.getVectorElementType().SimpleTy != VT;
+  });
   MadeChange |= I != TypeVec.end();
   TypeVec.erase(I, TypeVec.end());
 
@@ -547,10 +540,9 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
     // Only keep types that have less elements than VTOperand.
     TypeSet InputSet(VTOperand);
 
-    auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
-                            [NumElems](MVT VVT) {
-                              return VVT.getVectorNumElements() >= NumElems;
-                            });
+    auto I = remove_if(VTOperand.TypeVec, [NumElems](MVT VVT) {
+      return VVT.getVectorNumElements() >= NumElems;
+    });
     MadeChange |= I != VTOperand.TypeVec.end();
     VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
 
@@ -571,10 +563,9 @@ bool EEVT::TypeSet::EnforceVectorSubVectorTypeIs(EEVT::TypeSet &VTOperand,
     // Only keep types that have more elements than 'this'.
     TypeSet InputSet(*this);
 
-    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
-                            [NumElems](MVT VVT) {
-                              return VVT.getVectorNumElements() <= NumElems;
-                            });
+    auto I = remove_if(TypeVec, [NumElems](MVT VVT) {
+      return VVT.getVectorNumElements() <= NumElems;
+    });
     MadeChange |= I != TypeVec.end();
     TypeVec.erase(I, TypeVec.end());
 
@@ -609,10 +600,9 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
     // Only keep types that have same elements as 'this'.
     TypeSet InputSet(VTOperand);
 
-    auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
-                            [NumElems](MVT VVT) {
-                              return VVT.getVectorNumElements() != NumElems;
-                            });
+    auto I = remove_if(VTOperand.TypeVec, [NumElems](MVT VVT) {
+      return VVT.getVectorNumElements() != NumElems;
+    });
     MadeChange |= I != VTOperand.TypeVec.end();
     VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
 
@@ -629,10 +619,9 @@ bool EEVT::TypeSet::EnforceVectorSameNumElts(EEVT::TypeSet &VTOperand,
     // Only keep types that have same elements as VTOperand.
     TypeSet InputSet(*this);
 
-    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
-                            [NumElems](MVT VVT) {
-                              return VVT.getVectorNumElements() != NumElems;
-                            });
+    auto I = remove_if(TypeVec, [NumElems](MVT VVT) {
+      return VVT.getVectorNumElements() != NumElems;
+    });
     MadeChange |= I != TypeVec.end();
     TypeVec.erase(I, TypeVec.end());
 
@@ -663,10 +652,8 @@ bool EEVT::TypeSet::EnforceSameSize(EEVT::TypeSet &VTOperand,
     // Only keep types that have the same size as 'this'.
     TypeSet InputSet(VTOperand);
 
-    auto I = std::remove_if(VTOperand.TypeVec.begin(), VTOperand.TypeVec.end(),
-                            [&](MVT VT) {
-                              return VT.getSizeInBits() != Size;
-                            });
+    auto I = remove_if(VTOperand.TypeVec,
+                       [&](MVT VT) { return VT.getSizeInBits() != Size; });
     MadeChange |= I != VTOperand.TypeVec.end();
     VTOperand.TypeVec.erase(I, VTOperand.TypeVec.end());
 
@@ -683,10 +670,8 @@ bool EEVT::TypeSet::EnforceSameSize(EEVT::TypeSet &VTOperand,
     // Only keep types that have the same size as VTOperand.
     TypeSet InputSet(*this);
 
-    auto I = std::remove_if(TypeVec.begin(), TypeVec.end(),
-                            [&](MVT VT) {
-                              return VT.getSizeInBits() != Size;
-                            });
+    auto I =
+        remove_if(TypeVec, [&](MVT VT) { return VT.getSizeInBits() != Size; });
     MadeChange |= I != TypeVec.end();
     TypeVec.erase(I, TypeVec.end());
 
@@ -770,7 +755,7 @@ bool TreePredicateFn::isAlwaysTrue() const {
 /// Return the name to use in the generated code to reference this, this is
 /// "Predicate_foo" if from a pattern fragment "foo".
 std::string TreePredicateFn::getFnName() const {
-  return "Predicate_" + PatFragRec->getRecord()->getName();
+  return "Predicate_" + PatFragRec->getRecord()->getName().str();
 }
 
 /// getCodeToRunOnSDNode - Return the code for the function body that
@@ -820,14 +805,9 @@ static unsigned getPatternSize(const TreePatternNode *P,
   if (P->isLeaf() && isa<IntInit>(P->getLeafValue()))
     Size += 2;
 
-  // FIXME: This is a hack to statically increase the priority of patterns
-  // which maps a sub-dag to a complex pattern. e.g. favors LEA over ADD.
-  // Later we can allow complexity / cost for each pattern to be (optionally)
-  // specified. To get best possible pattern match we'll need to dynamically
-  // calculate the complexity of all patterns a dag can potentially map to.
   const ComplexPattern *AM = P->getComplexPatternInfo(CGP);
   if (AM) {
-    Size += AM->getNumOperands() * 3;
+    Size += AM->getComplexity();
 
     // We don't want to count any children twice, so return early.
     return Size;
@@ -2026,7 +2006,7 @@ bool TreePatternNode::canPatternMatch(std::string &Reason,
     // Scan all of the operands of the node and make sure that only the last one
     // is a constant node, unless the RHS also is.
     if (!OnlyOnRHSOfCommutative(getChild(getNumChildren()-1))) {
-      bool Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id.
+      unsigned Skip = isCommIntrinsic ? 1 : 0; // First operand is intrinsic id.
       for (unsigned i = Skip, e = getNumChildren()-1; i != e; ++i)
         if (OnlyOnRHSOfCommutative(getChild(i))) {
           Reason="Immediate value must be on the RHS of commutative operators!";
@@ -2092,8 +2072,8 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     ///   (foo GPR, imm) -> (foo GPR, (imm))
     if (R->isSubClassOf("SDNode") || R->isSubClassOf("PatFrag"))
       return ParseTreePattern(
-        DagInit::get(DI, "",
-                     std::vector<std::pair<Init*, std::string> >()),
+        DagInit::get(DI, nullptr,
+                     std::vector<std::pair<Init*, StringInit*> >()),
         OpName);
 
     // Input argument?
@@ -2147,7 +2127,8 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
     if (Dag->getNumArgs() != 1)
       error("Type cast only takes one operand!");
 
-    TreePatternNode *New = ParseTreePattern(Dag->getArg(0), Dag->getArgName(0));
+    TreePatternNode *New = ParseTreePattern(Dag->getArg(0),
+                                            Dag->getArgNameStr(0));
 
     // Apply the type cast.
     assert(New->getNumTypes() == 1 && "FIXME: Unhandled");
@@ -2198,7 +2179,7 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
 
   // Parse all the operands.
   for (unsigned i = 0, e = Dag->getNumArgs(); i != e; ++i)
-    Children.push_back(ParseTreePattern(Dag->getArg(i), Dag->getArgName(i)));
+    Children.push_back(ParseTreePattern(Dag->getArg(i), Dag->getArgNameStr(i)));
 
   // If the operator is an intrinsic, then this is just syntactic sugar for for
   // (intrinsic_* <number>, ..children..).  Pick the right intrinsic node, and
@@ -2246,9 +2227,9 @@ TreePatternNode *TreePattern::ParseTreePattern(Init *TheInit, StringRef OpName){
   TreePatternNode *Result = new TreePatternNode(Operator, Children, NumResults);
   Result->setName(OpName);
 
-  if (!Dag->getName().empty()) {
+  if (Dag->getName()) {
     assert(Result->getName().empty());
-    Result->setName(Dag->getName());
+    Result->setName(Dag->getNameStr());
   }
   return Result;
 }
@@ -2504,13 +2485,14 @@ void CodeGenDAGPatterns::ParsePatternFragments(bool OutFrags) {
       if (!isa<DefInit>(OpsList->getArg(j)) ||
           cast<DefInit>(OpsList->getArg(j))->getDef()->getName() != "node")
         P->error("Operands list should all be 'node' values.");
-      if (OpsList->getArgName(j).empty())
+      if (!OpsList->getArgName(j))
         P->error("Operands list should have names for each operand!");
-      if (!OperandsSet.count(OpsList->getArgName(j)))
-        P->error("'" + OpsList->getArgName(j) +
+      StringRef ArgNameStr = OpsList->getArgNameStr(j);
+      if (!OperandsSet.count(ArgNameStr))
+        P->error("'" + ArgNameStr +
                  "' does not occur in pattern or was multiply specified!");
-      OperandsSet.erase(OpsList->getArgName(j));
-      Args.push_back(OpsList->getArgName(j));
+      OperandsSet.erase(ArgNameStr);
+      Args.push_back(ArgNameStr);
     }
 
     if (!OperandsSet.empty())
@@ -2562,11 +2544,11 @@ void CodeGenDAGPatterns::ParseDefaultOperands() {
 
     // Clone the DefaultInfo dag node, changing the operator from 'ops' to
     // SomeSDnode so that we can parse this.
-    std::vector<std::pair<Init*, std::string> > Ops;
+    std::vector<std::pair<Init*, StringInit*> > Ops;
     for (unsigned op = 0, e = DefaultInfo->getNumArgs(); op != e; ++op)
       Ops.push_back(std::make_pair(DefaultInfo->getArg(op),
                                    DefaultInfo->getArgName(op)));
-    DagInit *DI = DagInit::get(SomeSDNode, "", Ops);
+    DagInit *DI = DagInit::get(SomeSDNode, nullptr, Ops);
 
     // Create a TreePattern to parse this.
     TreePattern P(DefaultOps[i], DI, false, *this);
@@ -3602,10 +3584,9 @@ static void CombineChildVariants(TreePatternNode *Orig,
     //   (and GPRC:$a, GPRC:$b) -> (and GPRC:$b, GPRC:$a)
     // which are the same pattern.  Ignore the dups.
     if (R->canPatternMatch(ErrString, CDP) &&
-        std::none_of(OutVariants.begin(), OutVariants.end(),
-                     [&](TreePatternNode *Variant) {
-                       return R->isIsomorphicTo(Variant, DepVars);
-                     }))
+        none_of(OutVariants, [&](TreePatternNode *Variant) {
+          return R->isIsomorphicTo(Variant, DepVars);
+        }))
       OutVariants.push_back(R.release());
 
     // Increment indices to the next permutation by incrementing the
diff --git a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
index 819c4b8492cb..97401cd81713 100644
--- a/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/contrib/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -412,8 +412,7 @@ public:
   }
   void addPredicateFn(const TreePredicateFn &Fn) {
     assert(!Fn.isAlwaysTrue() && "Empty predicate string!");
-    if (std::find(PredicateFns.begin(), PredicateFns.end(), Fn) ==
-          PredicateFns.end())
+    if (!is_contained(PredicateFns, Fn))
       PredicateFns.push_back(Fn);
   }
 
@@ -810,11 +809,13 @@ public:
                    LessRecordByID>::const_iterator pf_iterator;
   pf_iterator pf_begin() const { return PatternFragments.begin(); }
   pf_iterator pf_end() const { return PatternFragments.end(); }
+  iterator_range<pf_iterator> ptfs() const { return PatternFragments; }
 
   // Patterns to match information.
   typedef std::vector<PatternToMatch>::const_iterator ptm_iterator;
   ptm_iterator ptm_begin() const { return PatternsToMatch.begin(); }
   ptm_iterator ptm_end() const { return PatternsToMatch.end(); }
+  iterator_range<ptm_iterator> ptms() const { return PatternsToMatch; }
 
   /// Parse the Pattern for an instruction, and insert the result in DAGInsts.
   typedef std::map<Record*, DAGInstruction, LessRecordByID> DAGInstMap;
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
index ec802363030e..bb2ec2a64e49 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.cpp
@@ -53,13 +53,13 @@ CGIOperandList::CGIOperandList(Record *R) : TheDef(R) {
   OperandList.reserve(e);
   for (unsigned i = 0; i != e; ++i){
     Init *ArgInit;
-    std::string ArgName;
+    StringRef ArgName;
     if (i < NumDefs) {
       ArgInit = OutDI->getArg(i);
-      ArgName = OutDI->getArgName(i);
+      ArgName = OutDI->getArgNameStr(i);
     } else {
       ArgInit = InDI->getArg(i-NumDefs);
-      ArgName = InDI->getArgName(i-NumDefs);
+      ArgName = InDI->getArgNameStr(i-NumDefs);
     }
 
     DefInit *Arg = dyn_cast<DefInit>(ArgInit);
@@ -165,7 +165,7 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
   std::string SubOpName;
 
   // Check to see if this is $foo.bar.
-  std::string::size_type DotIdx = OpName.find_first_of(".");
+  std::string::size_type DotIdx = OpName.find_first_of('.');
   if (DotIdx != std::string::npos) {
     SubOpName = OpName.substr(DotIdx+1);
     if (SubOpName.empty())
@@ -193,7 +193,7 @@ CGIOperandList::ParseOperandName(const std::string &Op, bool AllowWholeOp) {
 
   // Find the operand with the right name.
   for (unsigned i = 0, e = MIOpInfo->getNumArgs(); i != e; ++i)
-    if (MIOpInfo->getArgName(i) == SubOpName)
+    if (MIOpInfo->getArgNameStr(i) == SubOpName)
       return std::make_pair(OpIdx, i);
 
   // Otherwise, didn't find it!
@@ -309,6 +309,7 @@ CodeGenInstruction::CodeGenInstruction(Record *R)
   isSelect     = R->getValueAsBit("isSelect");
   isBarrier    = R->getValueAsBit("isBarrier");
   isCall       = R->getValueAsBit("isCall");
+  isAdd        = R->getValueAsBit("isAdd");
   canFoldAsLoad = R->getValueAsBit("canFoldAsLoad");
   isPredicable = Operands.isPredicable || R->getValueAsBit("isPredicable");
   isConvertibleToThreeAddress = R->getValueAsBit("isConvertibleToThreeAddress");
@@ -448,10 +449,10 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
   if (ADI && ADI->getDef() == InstOpRec) {
     // If the operand is a record, it must have a name, and the record type
     // must match up with the instruction's argument type.
-    if (Result->getArgName(AliasOpNo).empty())
+    if (!Result->getArgName(AliasOpNo))
       PrintFatalError(Loc, "result argument #" + Twine(AliasOpNo) +
                            " must have a name!");
-    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ResultRecord);
+    ResOp = ResultOperand(Result->getArgNameStr(AliasOpNo), ResultRecord);
     return true;
   }
 
@@ -469,7 +470,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
     if (!T.getRegisterClass(InstOpRec)
               .hasSubClass(&T.getRegisterClass(ADI->getDef())))
       return false;
-    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ResultRecord);
+    ResOp = ResultOperand(Result->getArgNameStr(AliasOpNo), ResultRecord);
     return true;
   }
 
@@ -491,7 +492,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
                       " is not a member of the " + InstOpRec->getName() +
                       " register class!");
 
-    if (!Result->getArgName(AliasOpNo).empty())
+    if (Result->getArgName(AliasOpNo))
       PrintFatalError(Loc, "result fixed register argument must "
                       "not have a name!");
 
@@ -520,7 +521,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
     if (hasSubOps || !InstOpRec->isSubClassOf("Operand"))
       return false;
     // Integer arguments can't have names.
-    if (!Result->getArgName(AliasOpNo).empty())
+    if (Result->getArgName(AliasOpNo))
       PrintFatalError(Loc, "result argument #" + Twine(AliasOpNo) +
                       " must not have a name!");
     ResOp = ResultOperand(II->getValue());
@@ -551,7 +552,7 @@ bool CodeGenInstAlias::tryAliasOpMatch(DagInit *Result, unsigned AliasOpNo,
     // MIOperandInfo perhaps?
     if (InstOpRec->getValueInit("Type") != ADI->getDef()->getValueInit("Type"))
       return false;
-    ResOp = ResultOperand(Result->getArgName(AliasOpNo), ADI->getDef());
+    ResOp = ResultOperand(Result->getArgNameStr(AliasOpNo), ADI->getDef());
     return true;
   }
 
@@ -596,14 +597,14 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, unsigned Variant,
   StringMap<Record*> NameClass;
   for (unsigned i = 0, e = Result->getNumArgs(); i != e; ++i) {
     DefInit *ADI = dyn_cast<DefInit>(Result->getArg(i));
-    if (!ADI || Result->getArgName(i).empty())
+    if (!ADI || !Result->getArgName(i))
       continue;
     // Verify we don't have something like: (someinst GR16:$foo, GR32:$foo)
     // $foo can exist multiple times in the result list, but it must have the
     // same type.
-    Record *&Entry = NameClass[Result->getArgName(i)];
+    Record *&Entry = NameClass[Result->getArgNameStr(i)];
     if (Entry && Entry != ADI->getDef())
-      PrintFatalError(R->getLoc(), "result value $" + Result->getArgName(i) +
+      PrintFatalError(R->getLoc(), "result value $" + Result->getArgNameStr(i) +
                       " is both " + Entry->getName() + " and " +
                       ADI->getDef()->getName() + "!");
     Entry = ADI->getDef();
@@ -646,9 +647,9 @@ CodeGenInstAlias::CodeGenInstAlias(Record *R, unsigned Variant,
 
           // Take care to instantiate each of the suboperands with the correct
           // nomenclature: $foo.bar
-          ResultOperands.emplace_back(Result->getArgName(AliasOpNo) + "." +
-                                          MIOI->getArgName(SubOp),
-                                      SubRec);
+          ResultOperands.emplace_back(
+            Result->getArgName(AliasOpNo)->getAsUnquotedString() + "." +
+            MIOI->getArgName(SubOp)->getAsUnquotedString(), SubRec);
           ResultInstOperandIndex.push_back(std::make_pair(i, SubOp));
          }
          ++AliasOpNo;
diff --git a/contrib/llvm/utils/TableGen/CodeGenInstruction.h b/contrib/llvm/utils/TableGen/CodeGenInstruction.h
index 8e5a03d7b743..75db17b59ac3 100644
--- a/contrib/llvm/utils/TableGen/CodeGenInstruction.h
+++ b/contrib/llvm/utils/TableGen/CodeGenInstruction.h
@@ -230,6 +230,7 @@ template <typename T> class ArrayRef;
     bool isSelect : 1;
     bool isBarrier : 1;
     bool isCall : 1;
+    bool isAdd : 1;
     bool canFoldAsLoad : 1;
     bool mayLoad : 1;
     bool mayLoad_Unset : 1;
diff --git a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
index ea3ec67d62ed..6df0e6a62caf 100644
--- a/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
+++ b/contrib/llvm/utils/TableGen/CodeGenIntrinsics.h
@@ -62,15 +62,23 @@ struct CodeGenIntrinsic {
   /// accesses that may be performed by the intrinsics. Analogous to
   /// \c FunctionModRefBehaviour.
   enum ModRefBits {
+    /// The intrinsic may access memory that is otherwise inaccessible via
+    /// LLVM IR.
+    MR_InaccessibleMem = 1,
+
+    /// The intrinsic may access memory through pointer arguments.
+    /// LLVM IR.
+    MR_ArgMem = 2,
+
     /// The intrinsic may access memory anywhere, i.e. it is not restricted
     /// to access through pointer arguments.
-    MR_Anywhere = 1,
+    MR_Anywhere = 4 | MR_ArgMem | MR_InaccessibleMem,
 
     /// The intrinsic may read memory.
-    MR_Ref = 2,
+    MR_Ref = 8,
 
     /// The intrinsic may write memory.
-    MR_Mod = 4,
+    MR_Mod = 16,
 
     /// The intrinsic may both read and write memory.
     MR_ModRef = MR_Ref | MR_Mod,
@@ -80,11 +88,18 @@ struct CodeGenIntrinsic {
   /// properties (IntrReadMem, IntrArgMemOnly, etc.).
   enum ModRefBehavior {
     NoMem = 0,
-    ReadArgMem = MR_Ref,
+    ReadArgMem = MR_Ref | MR_ArgMem,
+    ReadInaccessibleMem = MR_Ref | MR_InaccessibleMem,
+    ReadInaccessibleMemOrArgMem = MR_Ref | MR_ArgMem | MR_InaccessibleMem,
     ReadMem = MR_Ref | MR_Anywhere,
-    WriteArgMem = MR_Mod,
+    WriteArgMem = MR_Mod | MR_ArgMem,
+    WriteInaccessibleMem = MR_Mod | MR_InaccessibleMem,
+    WriteInaccessibleMemOrArgMem = MR_Mod | MR_ArgMem | MR_InaccessibleMem,
     WriteMem = MR_Mod | MR_Anywhere,
-    ReadWriteArgMem = MR_ModRef,
+    ReadWriteArgMem = MR_ModRef | MR_ArgMem,
+    ReadWriteInaccessibleMem = MR_ModRef | MR_InaccessibleMem,
+    ReadWriteInaccessibleMemOrArgMem = MR_ModRef | MR_ArgMem |
+                                       MR_InaccessibleMem,
     ReadWriteMem = MR_ModRef | MR_Anywhere,
   };
   ModRefBehavior ModRef;
diff --git a/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp b/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp
index 527f530da479..8032d7b3ee95 100644
--- a/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenMapTable.cpp
@@ -542,6 +542,7 @@ static void emitEnums(raw_ostream &OS, RecordKeeper &Records) {
       for (unsigned j = i+1; j < FieldValues.size(); j++) {
         if (CurVal == FieldValues[j]) {
           FieldValues.erase(FieldValues.begin()+j);
+          --j;
         }
       }
     }
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
index 626144fbe855..c03e0d1fcf6b 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -14,13 +14,33 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/IntEqClasses.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <cassert>
+#include <cstdint>
+#include <iterator>
+#include <map>
+#include <set>
+#include <string>
+#include <tuple>
+#include <utility>
+#include <vector>
 
 using namespace llvm;
 
@@ -31,7 +51,7 @@ using namespace llvm;
 //===----------------------------------------------------------------------===//
 
 CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
-  : TheDef(R), EnumValue(Enum), LaneMask(0), AllSuperRegsCovered(true) {
+  : TheDef(R), EnumValue(Enum), AllSuperRegsCovered(true) {
   Name = R->getName();
   if (R->getValue("Namespace"))
     Namespace = R->getValueAsString("Namespace");
@@ -42,7 +62,7 @@ CodeGenSubRegIndex::CodeGenSubRegIndex(Record *R, unsigned Enum)
 CodeGenSubRegIndex::CodeGenSubRegIndex(StringRef N, StringRef Nspace,
                                        unsigned Enum)
   : TheDef(nullptr), Name(N), Namespace(Nspace), Size(-1), Offset(-1),
-    EnumValue(Enum), LaneMask(0), AllSuperRegsCovered(true) {
+    EnumValue(Enum), AllSuperRegsCovered(true) {
 }
 
 std::string CodeGenSubRegIndex::getQualifiedName() const {
@@ -82,19 +102,19 @@ void CodeGenSubRegIndex::updateComponents(CodeGenRegBank &RegBank) {
   }
 }
 
-unsigned CodeGenSubRegIndex::computeLaneMask() const {
+LaneBitmask CodeGenSubRegIndex::computeLaneMask() const {
   // Already computed?
-  if (LaneMask)
+  if (LaneMask.any())
     return LaneMask;
 
   // Recursion guard, shouldn't be required.
-  LaneMask = ~0u;
+  LaneMask = LaneBitmask::getAll();
 
   // The lane mask is simply the union of all sub-indices.
-  unsigned M = 0;
+  LaneBitmask M;
   for (const auto &C : Composed)
     M |= C.second->computeLaneMask();
-  assert(M && "Missing lane mask, sub-register cycle?");
+  assert(M.any() && "Missing lane mask, sub-register cycle?");
   LaneMask = M;
   return LaneMask;
 }
@@ -145,12 +165,13 @@ void CodeGenRegister::buildObjectGraph(CodeGenRegBank &RegBank) {
   }
 }
 
-const std::string &CodeGenRegister::getName() const {
+const StringRef CodeGenRegister::getName() const {
   assert(TheDef && "no def");
   return TheDef->getName();
 }
 
 namespace {
+
 // Iterate over all register units in a set of registers.
 class RegUnitIterator {
   CodeGenRegister::Vec::const_iterator RegI, RegE;
@@ -158,7 +179,7 @@ class RegUnitIterator {
 
 public:
   RegUnitIterator(const CodeGenRegister::Vec &Regs):
-    RegI(Regs.begin()), RegE(Regs.end()), UnitI(), UnitE() {
+    RegI(Regs.begin()), RegE(Regs.end()) {
 
     if (RegI != RegE) {
       UnitI = (*RegI)->getRegUnits().begin();
@@ -190,7 +211,8 @@ protected:
     }
   }
 };
-} // namespace
+
+} // end anonymous namespace
 
 // Return true of this unit appears in RegUnits.
 static bool hasRegUnit(CodeGenRegister::RegUnitList &RegUnits, unsigned Unit) {
@@ -538,6 +560,7 @@ unsigned CodeGenRegister::getWeight(const CodeGenRegBank &RegBank) const {
 // sub-registers. We provide a SetTheory expander class that returns the new
 // registers.
 namespace {
+
 struct TupleExpander : SetTheory::Expander {
   void expand(SetTheory &ST, Record *Def, SetTheory::RecSet &Elts) override {
     std::vector<Record*> Indices = Def->getValueAsListOfDefs("SubRegIndices");
@@ -639,7 +662,8 @@ struct TupleExpander : SetTheory::Expander {
     }
   }
 };
-}
+
+} // end anonymous namespace
 
 //===----------------------------------------------------------------------===//
 //                            CodeGenRegisterClass
@@ -654,8 +678,7 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
   : TheDef(R),
     Name(R->getName()),
     TopoSigs(RegBank.getNumTopoSigs()),
-    EnumValue(-1),
-    LaneMask(0) {
+    EnumValue(-1) {
   // Rename anonymous register classes.
   if (R->getName().size() > 9 && R->getName()[9] == '.') {
     static unsigned AnonCounter = 0;
@@ -767,13 +790,15 @@ bool CodeGenRegisterClass::contains(const CodeGenRegister *Reg) const {
 }
 
 namespace llvm {
+
   raw_ostream &operator<<(raw_ostream &OS, const CodeGenRegisterClass::Key &K) {
     OS << "{ S=" << K.SpillSize << ", A=" << K.SpillAlignment;
     for (const auto R : *K.Members)
       OS << ", " << R->getName();
     return OS << " }";
   }
-}
+
+} // end namespace llvm
 
 // This is a simple lexicographical order that can be used to search for sets.
 // It is not the same as the topological order provided by TopoOrderRC.
@@ -813,7 +838,7 @@ static bool TopoOrderRC(const CodeGenRegisterClass &PA,
   auto *A = &PA;
   auto *B = &PB;
   if (A == B)
-    return 0;
+    return false;
 
   // Order by ascending spill size.
   if (A->SpillSize < B->SpillSize)
@@ -1167,7 +1192,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
   // First assign individual bits to all the leaf indices.
   unsigned Bit = 0;
   // Determine mask of lanes that cover their registers.
-  CoveringLanes = ~0u;
+  CoveringLanes = LaneBitmask::getAll();
   for (auto &Idx : SubRegIndices) {
     if (Idx.getComposites().empty()) {
       if (Bit > 32) {
@@ -1175,10 +1200,10 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
           Twine("Ran out of lanemask bits to represent subregister ")
           + Idx.getName());
       }
-      Idx.LaneMask = 1u << Bit;
+      Idx.LaneMask = LaneBitmask(1 << Bit);
       ++Bit;
     } else {
-      Idx.LaneMask = 0;
+      Idx.LaneMask = LaneBitmask::getNone();
     }
   }
 
@@ -1197,9 +1222,12 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
       // Moving from a class with no subregisters we just had a single lane:
       // The subregister must be a leaf subregister and only occupies 1 bit.
       // Move the bit from the class without subregisters into that position.
-      unsigned DstBit = Log2_32(Idx.LaneMask);
-      assert(Idx.LaneMask == 1u << DstBit && "Must be a leaf subregister");
-      MaskRolPair MaskRol = { 1, (uint8_t)DstBit };
+      static_assert(sizeof(Idx.LaneMask.getAsInteger()) == 4,
+                    "Change Log2_32 to a proper one");
+      unsigned DstBit = Log2_32(Idx.LaneMask.getAsInteger());
+      assert(Idx.LaneMask == LaneBitmask(1 << DstBit) &&
+             "Must be a leaf subregister");
+      MaskRolPair MaskRol = { LaneBitmask(1), (uint8_t)DstBit };
       LaneTransforms.push_back(MaskRol);
     } else {
       // Go through all leaf subregisters and find the ones that compose with
@@ -1213,8 +1241,8 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
           continue;
         // Replicate the behaviour from the lane mask generation loop above.
         unsigned SrcBit = NextBit;
-        unsigned SrcMask = 1u << SrcBit;
-        if (NextBit < 31)
+        LaneBitmask SrcMask = LaneBitmask(1 << SrcBit);
+        if (NextBit < LaneBitmask::BitWidth-1)
           ++NextBit;
         assert(Idx2.LaneMask == SrcMask);
 
@@ -1227,16 +1255,19 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
         assert(Composite->getComposites().empty());
 
         // Create Mask+Rotate operation and merge with existing ops if possible.
-        unsigned DstBit = Log2_32(Composite->LaneMask);
+        static_assert(sizeof(Composite->LaneMask.getAsInteger()) == 4,
+                      "Change Log2_32 to a proper one");
+        unsigned DstBit = Log2_32(Composite->LaneMask.getAsInteger());
         int Shift = DstBit - SrcBit;
-        uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift : 32+Shift;
+        uint8_t RotateLeft = Shift >= 0 ? (uint8_t)Shift
+                                        : LaneBitmask::BitWidth + Shift;
         for (auto &I : LaneTransforms) {
           if (I.RotateLeft == RotateLeft) {
             I.Mask |= SrcMask;
-            SrcMask = 0;
+            SrcMask = LaneBitmask::getNone();
           }
         }
-        if (SrcMask != 0) {
+        if (SrcMask.any()) {
           MaskRolPair MaskRol = { SrcMask, RotateLeft };
           LaneTransforms.push_back(MaskRol);
         }
@@ -1247,13 +1278,13 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
     // 0xffffffff (including some irrelevant invalid bits) so that it should
     // merge with more entries later while compressing the table.
     if (LaneTransforms.size() == 1)
-      LaneTransforms[0].Mask = ~0u;
+      LaneTransforms[0].Mask = LaneBitmask::getAll();
 
     // Further compression optimization: For invalid compositions resulting
     // in a sequence with 0 entries we can just pick any other. Choose
     // Mask 0xffffffff with Rotation 0.
     if (LaneTransforms.size() == 0) {
-      MaskRolPair P = { ~0u, 0 };
+      MaskRolPair P = { LaneBitmask::getAll(), 0 };
       LaneTransforms.push_back(P);
     }
   }
@@ -1263,7 +1294,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
 
   // Inherit lanes from composites.
   for (const auto &Idx : SubRegIndices) {
-    unsigned Mask = Idx.computeLaneMask();
+    LaneBitmask Mask = Idx.computeLaneMask();
     // If some super-registers without CoveredBySubRegs use this index, we can
     // no longer assume that the lanes are covering their registers.
     if (!Idx.AllSuperRegsCovered)
@@ -1272,7 +1303,7 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
 
   // Compute lane mask combinations for register classes.
   for (auto &RegClass : RegClasses) {
-    unsigned LaneMask = 0;
+    LaneBitmask LaneMask;
     for (const auto &SubRegIndex : SubRegIndices) {
       if (RegClass.getSubClassWithSubReg(&SubRegIndex) == nullptr)
         continue;
@@ -1281,14 +1312,15 @@ void CodeGenRegBank::computeSubRegLaneMasks() {
 
     // For classes without any subregisters set LaneMask to 1 instead of 0.
     // This makes it easier for client code to handle classes uniformly.
-    if (LaneMask == 0)
-      LaneMask = 1;
+    if (LaneMask.none())
+      LaneMask = LaneBitmask(1);
 
     RegClass.LaneMask = LaneMask;
   }
 }
 
 namespace {
+
 // UberRegSet is a helper class for computeRegUnitWeights. Each UberRegSet is
 // the transitive closure of the union of overlapping register
 // classes. Together, the UberRegSets form a partition of the registers. If we
@@ -1307,12 +1339,13 @@ namespace {
 // their weight increased.
 struct UberRegSet {
   CodeGenRegister::Vec Regs;
-  unsigned Weight;
+  unsigned Weight = 0;
   CodeGenRegister::RegUnitList SingularDeterminants;
 
-  UberRegSet(): Weight(0) {}
+  UberRegSet() = default;
 };
-} // namespace
+
+} // end anonymous namespace
 
 // Partition registers into UberRegSets, where each set is the transitive
 // closure of the union of overlapping register classes.
@@ -1321,7 +1354,6 @@ struct UberRegSet {
 static void computeUberSets(std::vector<UberRegSet> &UberSets,
                             std::vector<UberRegSet*> &RegSets,
                             CodeGenRegBank &RegBank) {
-
   const auto &Registers = RegBank.getRegisters();
 
   // The Register EnumValue is one greater than its index into Registers.
@@ -1756,8 +1788,7 @@ void CodeGenRegBank::computeRegUnitSets() {
     std::vector<unsigned> RUSets;
     for (unsigned i = 0, e = RegUnitSets.size(); i != e; ++i) {
       RegUnitSet &RUSet = RegUnitSets[i];
-      if (std::find(RUSet.Units.begin(), RUSet.Units.end(), UnitIdx)
-          == RUSet.Units.end())
+      if (!is_contained(RUSet.Units, UnitIdx))
         continue;
       RUSets.push_back(i);
     }
@@ -1781,7 +1812,8 @@ void CodeGenRegBank::computeRegUnitLaneMasks() {
   for (auto &Register : Registers) {
     // Create an initial lane mask for all register units.
     const auto &RegUnits = Register.getRegUnits();
-    CodeGenRegister::RegUnitLaneMaskList RegUnitLaneMasks(RegUnits.count(), 0);
+    CodeGenRegister::RegUnitLaneMaskList
+        RegUnitLaneMasks(RegUnits.count(), LaneBitmask::getNone());
     // Iterate through SubRegisters.
     typedef CodeGenRegister::SubRegMap SubRegMap;
     const SubRegMap &SubRegs = Register.getSubRegs();
@@ -1790,11 +1822,11 @@ void CodeGenRegBank::computeRegUnitLaneMasks() {
       CodeGenRegister *SubReg = S->second;
       // Ignore non-leaf subregisters, their lane masks are fully covered by
       // the leaf subregisters anyway.
-      if (SubReg->getSubRegs().size() != 0)
+      if (!SubReg->getSubRegs().empty())
         continue;
       CodeGenSubRegIndex *SubRegIndex = S->first;
       const CodeGenRegister *SubRegister = S->second;
-      unsigned LaneMask = SubRegIndex->LaneMask;
+      LaneBitmask LaneMask = SubRegIndex->LaneMask;
       // Distribute LaneMask to Register Units touched.
       for (unsigned SUI : SubRegister->getRegUnits()) {
         bool Found = false;
@@ -2009,7 +2041,6 @@ void CodeGenRegBank::inferMatchingSuperRegClass(CodeGenRegisterClass *RC,
   }
 }
 
-
 //
 // Infer missing register classes.
 //
diff --git a/contrib/llvm/utils/TableGen/CodeGenRegisters.h b/contrib/llvm/utils/TableGen/CodeGenRegisters.h
index b8d47aa4ff82..3ed26fa401a1 100644
--- a/contrib/llvm/utils/TableGen/CodeGenRegisters.h
+++ b/contrib/llvm/utils/TableGen/CodeGenRegisters.h
@@ -18,20 +18,29 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/SparseBitVector.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/LaneBitmask.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/SetTheory.h"
-#include <cstdlib>
+#include <cassert>
+#include <cstdint>
 #include <deque>
 #include <list>
 #include <map>
 #include <string>
+#include <utility>
 #include <vector>
 
 namespace llvm {
+
   class CodeGenRegBank;
   template <typename T, typename Vector, typename Set> class SetVector;
 
@@ -39,8 +48,9 @@ namespace llvm {
   /// Mask the bits specified in Mask, then rotate them Rol bits to the left
   /// assuming a wraparound at 32bits.
   struct MaskRolPair {
-    unsigned Mask;
+    LaneBitmask Mask;
     uint8_t RotateLeft;
+
     bool operator==(const MaskRolPair Other) const {
       return Mask == Other.Mask && RotateLeft == Other.RotateLeft;
     }
@@ -59,7 +69,7 @@ namespace llvm {
     uint16_t Size;
     uint16_t Offset;
     const unsigned EnumValue;
-    mutable unsigned LaneMask;
+    mutable LaneBitmask LaneMask;
     mutable SmallVector<MaskRolPair,1> CompositionLaneMaskTransform;
 
     // Are all super-registers containing this SubRegIndex covered by their
@@ -111,7 +121,7 @@ namespace llvm {
     const CompMap &getComposites() const { return Composed; }
 
     // Compute LaneMask from Composed. Return LaneMask.
-    unsigned computeLaneMask() const;
+    LaneBitmask computeLaneMask() const;
 
   private:
     CompMap Composed;
@@ -136,7 +146,7 @@ namespace llvm {
 
     CodeGenRegister(Record *R, unsigned Enum);
 
-    const std::string &getName() const;
+    const StringRef getName() const;
 
     // Extract more information from TheDef. This is used to build an object
     // graph after all CodeGenRegister objects have been created.
@@ -197,7 +207,7 @@ namespace llvm {
 
     // List of register units in ascending order.
     typedef SparseBitVector<> RegUnitList;
-    typedef SmallVector<unsigned, 16> RegUnitLaneMaskList;
+    typedef SmallVector<LaneBitmask, 16> RegUnitLaneMaskList;
 
     // How many entries in RegUnitList are native?
     RegUnitList NativeRegUnits;
@@ -206,7 +216,7 @@ namespace llvm {
     // This is only valid after computeSubRegs() completes.
     const RegUnitList &getRegUnits() const { return RegUnits; }
 
-    ArrayRef<unsigned> getRegUnitLaneMasks() const {
+    ArrayRef<LaneBitmask> getRegUnitLaneMasks() const {
       return makeArrayRef(RegUnitLaneMasks).slice(0, NativeRegUnits.count());
     }
 
@@ -266,7 +276,7 @@ namespace llvm {
   class CodeGenRegisterClass {
     CodeGenRegister::Vec Members;
     // Allocation orders. Order[0] always contains all registers in Members.
-    std::vector<SmallVector<Record*, 16> > Orders;
+    std::vector<SmallVector<Record*, 16>> Orders;
     // Bit mask of sub-classes including this, indexed by their EnumValue.
     BitVector SubClasses;
     // List of super-classes, topologocally ordered to have the larger classes
@@ -307,7 +317,7 @@ namespace llvm {
     std::string AltOrderSelect;
     uint8_t AllocationPriority;
     /// Contains the combination of the lane masks of all subregisters.
-    unsigned LaneMask;
+    LaneBitmask LaneMask;
     /// True if there are at least 2 subregisters which do not interfere.
     bool HasDisjunctSubRegs;
     bool CoveredBySubRegs;
@@ -463,10 +473,10 @@ namespace llvm {
 
     std::string Name;
     std::vector<unsigned> Units;
-    unsigned Weight; // Cache the sum of all unit weights.
-    unsigned Order;  // Cache the sort key.
+    unsigned Weight = 0; // Cache the sum of all unit weights.
+    unsigned Order = 0;  // Cache the sort key.
 
-    RegUnitSet() : Weight(0), Order(0) {}
+    RegUnitSet() = default;
   };
 
   // Base vector for identifying TopoSigs. The contents uniquely identify a
@@ -515,7 +525,7 @@ namespace llvm {
     // NOTE: This could grow beyond the number of register classes when we map
     // register units to lists of unit sets. If the list of unit sets does not
     // already exist for a register class, we create a new entry in this vector.
-    std::vector<std::vector<unsigned> > RegClassUnitSets;
+    std::vector<std::vector<unsigned>> RegClassUnitSets;
 
     // Give each register unit set an order based on sorting criteria.
     std::vector<unsigned> RegUnitSetOrder;
@@ -532,6 +542,7 @@ namespace llvm {
     void computeInferredRegisterClasses();
     void inferCommonSubClass(CodeGenRegisterClass *RC);
     void inferSubClassWithSubReg(CodeGenRegisterClass *RC);
+
     void inferMatchingSuperRegClass(CodeGenRegisterClass *RC) {
       inferMatchingSuperRegClass(RC, RegClasses.begin());
     }
@@ -590,6 +601,7 @@ namespace llvm {
     }
 
     const std::deque<CodeGenRegister> &getRegisters() { return Registers; }
+
     const StringMap<CodeGenRegister*> &getRegistersByName() {
       return RegistersByName;
     }
@@ -674,6 +686,7 @@ namespace llvm {
     unsigned getRegSetIDAt(unsigned Order) const {
       return RegUnitSetOrder[Order];
     }
+
     const RegUnitSet &getRegSetAt(unsigned Order) const {
       return RegUnitSets[RegUnitSetOrder[Order]];
     }
@@ -721,8 +734,9 @@ namespace llvm {
     // Bit mask of lanes that cover their registers. A sub-register index whose
     // LaneMask is contained in CoveringLanes will be completely covered by
     // another sub-register with the same or larger lane mask.
-    unsigned CoveringLanes;
+    LaneBitmask CoveringLanes;
   };
-}
 
-#endif
+} // end namespace llvm
+
+#endif // LLVM_UTILS_TABLEGEN_CODEGENREGISTERS_H
diff --git a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
index d1b141e3160f..cae1cf4b861e 100644
--- a/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenSchedule.cpp
@@ -12,12 +12,21 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenInstruction.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/TableGen/Error.h"
+#include <algorithm>
+#include <iterator>
+#include <utility>
 
 using namespace llvm;
 
@@ -31,6 +40,7 @@ static void dumpIdxVec(ArrayRef<unsigned> V) {
 #endif
 
 namespace {
+
 // (instrs a, b, ...) Evaluate and union all arguments. Identical to AddOp.
 struct InstrsOp : public SetTheory::Operator {
   void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
@@ -76,6 +86,7 @@ struct InstRegexOp : public SetTheory::Operator {
     }
   }
 };
+
 } // end anonymous namespace
 
 /// CodeGenModels ctor interprets machine model records and populates maps.
@@ -356,8 +367,7 @@ bool CodeGenSchedModels::hasReadOfWrite(Record *WriteDef) const {
       continue;
 
     RecVec ValidWrites = ReadDef->getValueAsListOfDefs("ValidWrites");
-    if (std::find(ValidWrites.begin(), ValidWrites.end(), WriteDef)
-        != ValidWrites.end()) {
+    if (is_contained(ValidWrites, WriteDef)) {
       return true;
     }
   }
@@ -365,6 +375,7 @@ bool CodeGenSchedModels::hasReadOfWrite(Record *WriteDef) const {
 }
 
 namespace llvm {
+
 void splitSchedReadWrites(const RecVec &RWDefs,
                           RecVec &WriteDefs, RecVec &ReadDefs) {
   for (RecIter RWI = RWDefs.begin(), RWE = RWDefs.end(); RWI != RWE; ++RWI) {
@@ -376,7 +387,8 @@ void splitSchedReadWrites(const RecVec &RWDefs,
     }
   }
 }
-} // namespace llvm
+
+} // end namespace llvm
 
 // Split the SchedReadWrites defs and call findRWs for each list.
 void CodeGenSchedModels::findRWs(const RecVec &RWDefs,
@@ -574,11 +586,14 @@ void CodeGenSchedModels::collectSchedClasses() {
         dbgs() << " " << SchedReads[*RI].Name;
       dbgs() << '\n';
     }
-    for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(),
-           PE = ProcModels.end(); PI != PE; ++PI) {
-      if (!std::count(ProcIndices.begin(), ProcIndices.end(), PI->Index))
-        dbgs() << "No machine model for " << Inst->TheDef->getName()
-               << " on processor " << PI->ModelName << '\n';
+    // If ProcIndices contains zero, the class applies to all processors.
+    if (!std::count(ProcIndices.begin(), ProcIndices.end(), 0)) {
+      for (std::vector<CodeGenProcModel>::iterator PI = ProcModels.begin(),
+             PE = ProcModels.end(); PI != PE; ++PI) {
+        if (!std::count(ProcIndices.begin(), ProcIndices.end(), PI->Index))
+          dbgs() << "No machine model for " << Inst->TheDef->getName()
+                 << " on processor " << PI->ModelName << '\n';
+      }
     }
   }
 }
@@ -674,7 +689,7 @@ void CodeGenSchedModels::createInstRWClass(Record *InstRWDef) {
   // intersects with an existing class via a previous InstRWDef. Instrs that do
   // not intersect with an existing class refer back to their former class as
   // determined from ItinDef or SchedRW.
-  SmallVector<std::pair<unsigned, SmallVector<Record *, 8> >, 4> ClassInstrs;
+  SmallVector<std::pair<unsigned, SmallVector<Record *, 8>>, 4> ClassInstrs;
   // Sort Instrs into sets.
   const RecVec *InstDefs = Sets.expand(InstRWDef);
   if (InstDefs->empty())
@@ -913,6 +928,7 @@ void CodeGenSchedModels::inferFromInstRWs(unsigned SCIdx) {
 }
 
 namespace {
+
 // Helper for substituteVariantOperand.
 struct TransVariant {
   Record *VarOrSeqDef;  // Variant or sequence.
@@ -969,7 +985,8 @@ private:
     std::vector<TransVariant> &IntersectingVariants);
   void pushVariant(const TransVariant &VInfo, bool IsRead);
 };
-} // anonymous
+
+} // end anonymous namespace
 
 // Return true if this predicate is mutually exclusive with a PredTerm. This
 // degenerates into checking if the predicate is mutually exclusive with any
@@ -982,7 +999,6 @@ private:
 // conditions implicitly negate any prior condition.
 bool PredTransitions::mutuallyExclusive(Record *PredDef,
                                         ArrayRef<PredCheck> Term) {
-
   for (ArrayRef<PredCheck>::iterator I = Term.begin(), E = Term.end();
        I != E; ++I) {
     if (I->Predicate == PredDef)
@@ -1029,7 +1045,7 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions,
   for (ArrayRef<PredTransition>::iterator
          PTI = Transitions.begin(), PTE = Transitions.end();
        PTI != PTE; ++PTI) {
-    for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
            WSI = PTI->WriteSequences.begin(), WSE = PTI->WriteSequences.end();
          WSI != WSE; ++WSI) {
       for (SmallVectorImpl<unsigned>::const_iterator
@@ -1038,7 +1054,7 @@ static bool hasVariant(ArrayRef<PredTransition> Transitions,
           return true;
       }
     }
-    for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
            RSI = PTI->ReadSequences.begin(), RSE = PTI->ReadSequences.end();
          RSI != RSE; ++RSI) {
       for (SmallVectorImpl<unsigned>::const_iterator
@@ -1145,7 +1161,6 @@ void PredTransitions::getIntersectingVariants(
 // specified by VInfo.
 void PredTransitions::
 pushVariant(const TransVariant &VInfo, bool IsRead) {
-
   PredTransition &Trans = TransVec[VInfo.TransVecIdx];
 
   // If this operand transition is reached through a processor-specific alias,
@@ -1168,7 +1183,7 @@ pushVariant(const TransVariant &VInfo, bool IsRead) {
 
   const CodeGenSchedRW &SchedRW = SchedModels.getSchedRW(VInfo.RWIdx, IsRead);
 
-  SmallVectorImpl<SmallVector<unsigned,4> > &RWSequences = IsRead
+  SmallVectorImpl<SmallVector<unsigned,4>> &RWSequences = IsRead
     ? Trans.ReadSequences : Trans.WriteSequences;
   if (SchedRW.IsVariadic) {
     unsigned OperIdx = RWSequences.size()-1;
@@ -1264,7 +1279,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
   TransVec.back().ProcIndices = Trans.ProcIndices;
 
   // Visit each original write sequence.
-  for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+  for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
          WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end();
        WSI != WSE; ++WSI) {
     // Push a new (empty) write sequence onto all partial Transitions.
@@ -1275,7 +1290,7 @@ void PredTransitions::substituteVariants(const PredTransition &Trans) {
     substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx);
   }
   // Visit each original read sequence.
-  for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+  for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
          RSI = Trans.ReadSequences.begin(), RSE = Trans.ReadSequences.end();
        RSI != RSE; ++RSI) {
     // Push a new (empty) read sequence onto all partial Transitions.
@@ -1296,7 +1311,7 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
   for (ArrayRef<PredTransition>::iterator
          I = LastTransitions.begin(), E = LastTransitions.end(); I != E; ++I) {
     IdxVec OperWritesVariant;
-    for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
            WSI = I->WriteSequences.begin(), WSE = I->WriteSequences.end();
          WSI != WSE; ++WSI) {
       // Create a new write representing the expanded sequence.
@@ -1304,7 +1319,7 @@ static void inferFromTransitions(ArrayRef<PredTransition> LastTransitions,
         SchedModels.findOrInsertRW(*WSI, /*IsRead=*/false));
     }
     IdxVec OperReadsVariant;
-    for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
            RSI = I->ReadSequences.begin(), RSE = I->ReadSequences.end();
          RSI != RSE; ++RSI) {
       // Create a new read representing the expanded sequence.
@@ -1400,8 +1415,7 @@ bool CodeGenSchedModels::hasSuperGroup(RecVec &SubUnits, CodeGenProcModel &PM) {
       PM.ProcResourceDefs[i]->getValueAsListOfDefs("Resources");
     RecIter RI = SubUnits.begin(), RE = SubUnits.end();
     for ( ; RI != RE; ++RI) {
-      if (std::find(SuperUnits.begin(), SuperUnits.end(), *RI)
-          == SuperUnits.end()) {
+      if (!is_contained(SuperUnits, *RI)) {
         break;
       }
     }
@@ -1500,9 +1514,7 @@ void CodeGenSchedModels::collectProcResources() {
     if (!(*RI)->getValueInit("SchedModel")->isComplete())
       continue;
     CodeGenProcModel &PM = getProcModel((*RI)->getValueAsDef("SchedModel"));
-    RecIter I = std::find(PM.ProcResourceDefs.begin(),
-                          PM.ProcResourceDefs.end(), *RI);
-    if (I == PM.ProcResourceDefs.end())
+    if (!is_contained(PM.ProcResourceDefs, *RI))
       PM.ProcResourceDefs.push_back(*RI);
   }
   // Finalize each ProcModel by sorting the record arrays.
@@ -1568,15 +1580,14 @@ void CodeGenSchedModels::checkCompleteness() {
       const CodeGenSchedClass &SC = getSchedClass(SCIdx);
       if (!SC.Writes.empty())
         continue;
-      if (SC.ItinClassDef != nullptr)
+      if (SC.ItinClassDef != nullptr &&
+          SC.ItinClassDef->getName() != "NoItinerary")
         continue;
 
       const RecVec &InstRWs = SC.InstRWs;
-      auto I = std::find_if(InstRWs.begin(), InstRWs.end(),
-                            [&ProcModel] (const Record *R) {
-                              return R->getValueAsDef("SchedModel") ==
-                                     ProcModel.ModelDef;
-                            });
+      auto I = find_if(InstRWs, [&ProcModel](const Record *R) {
+        return R->getValueAsDef("SchedModel") == ProcModel.ModelDef;
+      });
       if (I == InstRWs.end()) {
         PrintError("'" + ProcModel.ModelName + "' lacks information for '" +
                    Inst->TheDef->getName() + "'");
@@ -1660,7 +1671,6 @@ void CodeGenSchedModels::collectRWResources(unsigned RWIdx, bool IsRead,
 void CodeGenSchedModels::collectRWResources(ArrayRef<unsigned> Writes,
                                             ArrayRef<unsigned> Reads,
                                             ArrayRef<unsigned> ProcIndices) {
-
   for (unsigned Idx : Writes)
     collectRWResources(Idx, /*IsRead=*/false, ProcIndices);
 
@@ -1668,7 +1678,6 @@ void CodeGenSchedModels::collectRWResources(ArrayRef<unsigned> Writes,
     collectRWResources(Idx, /*IsRead=*/true, ProcIndices);
 }
 
-
 // Find the processor's resource units for this kind of resource.
 Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind,
                                              const CodeGenProcModel &PM) const {
@@ -1716,13 +1725,11 @@ Record *CodeGenSchedModels::findProcResUnits(Record *ProcResKind,
 // Iteratively add a resource and its super resources.
 void CodeGenSchedModels::addProcResource(Record *ProcResKind,
                                          CodeGenProcModel &PM) {
-  for (;;) {
+  while (true) {
     Record *ProcResUnits = findProcResUnits(ProcResKind, PM);
 
     // See if this ProcResource is already associated with this processor.
-    RecIter I = std::find(PM.ProcResourceDefs.begin(),
-                          PM.ProcResourceDefs.end(), ProcResUnits);
-    if (I != PM.ProcResourceDefs.end())
+    if (is_contained(PM.ProcResourceDefs, ProcResUnits))
       return;
 
     PM.ProcResourceDefs.push_back(ProcResUnits);
@@ -1741,8 +1748,7 @@ void CodeGenSchedModels::addWriteRes(Record *ProcWriteResDef, unsigned PIdx) {
   assert(PIdx && "don't add resources to an invalid Processor model");
 
   RecVec &WRDefs = ProcModels[PIdx].WriteResDefs;
-  RecIter WRI = std::find(WRDefs.begin(), WRDefs.end(), ProcWriteResDef);
-  if (WRI != WRDefs.end())
+  if (is_contained(WRDefs, ProcWriteResDef))
     return;
   WRDefs.push_back(ProcWriteResDef);
 
@@ -1758,15 +1764,13 @@ void CodeGenSchedModels::addWriteRes(Record *ProcWriteResDef, unsigned PIdx) {
 void CodeGenSchedModels::addReadAdvance(Record *ProcReadAdvanceDef,
                                         unsigned PIdx) {
   RecVec &RADefs = ProcModels[PIdx].ReadAdvanceDefs;
-  RecIter I = std::find(RADefs.begin(), RADefs.end(), ProcReadAdvanceDef);
-  if (I != RADefs.end())
+  if (is_contained(RADefs, ProcReadAdvanceDef))
     return;
   RADefs.push_back(ProcReadAdvanceDef);
 }
 
 unsigned CodeGenProcModel::getProcResourceIdx(Record *PRDef) const {
-  RecIter PRPos = std::find(ProcResourceDefs.begin(), ProcResourceDefs.end(),
-                            PRDef);
+  RecIter PRPos = find(ProcResourceDefs, PRDef);
   if (PRPos == ProcResourceDefs.end())
     PrintFatalError(PRDef->getLoc(), "ProcResource def is not included in "
                     "the ProcResources list for " + ModelName);
@@ -1842,7 +1846,7 @@ void PredTransitions::dump() const {
              << ":" << PCI->Predicate->getName();
     }
     dbgs() << "},\n  => {";
-    for (SmallVectorImpl<SmallVector<unsigned,4> >::const_iterator
+    for (SmallVectorImpl<SmallVector<unsigned,4>>::const_iterator
            WSI = TI->WriteSequences.begin(), WSE = TI->WriteSequences.end();
          WSI != WSE; ++WSI) {
       dbgs() << "(";
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
index 245b9eeeed85..6503d5af2d48 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -138,7 +138,7 @@ std::string llvm::getQualifiedName(const Record *R) {
   if (R->getValue("Namespace"))
      Namespace = R->getValueAsString("Namespace");
   if (Namespace.empty()) return R->getName();
-  return Namespace + "::" + R->getName();
+  return Namespace + "::" + R->getName().str();
 }
 
 
@@ -157,7 +157,7 @@ CodeGenTarget::CodeGenTarget(RecordKeeper &records)
 CodeGenTarget::~CodeGenTarget() {
 }
 
-const std::string &CodeGenTarget::getName() const {
+const StringRef CodeGenTarget::getName() const {
   return TargetRec->getName();
 }
 
@@ -301,7 +301,7 @@ GetInstByName(const char *Name,
 /// their enum value.
 void CodeGenTarget::ComputeInstrsByEnum() const {
   static const char *const FixedInstrs[] = {
-#define HANDLE_TARGET_OPCODE(OPC, NUM) #OPC,
+#define HANDLE_TARGET_OPCODE(OPC) #OPC,
 #include "llvm/Target/TargetOpcodes.def"
       nullptr};
   const auto &Insts = getInstructions();
@@ -393,6 +393,16 @@ ComplexPattern::ComplexPattern(Record *R) {
   SelectFunc  = R->getValueAsString("SelectFunc");
   RootNodes   = R->getValueAsListOfDefs("RootNodes");
 
+  // FIXME: This is a hack to statically increase the priority of patterns which
+  // maps a sub-dag to a complex pattern. e.g. favors LEA over ADD. To get best
+  // possible pattern match we'll need to dynamically calculate the complexity
+  // of all patterns a dag can potentially map to.
+  int64_t RawComplexity = R->getValueAsInt("Complexity");
+  if (RawComplexity == -1)
+    Complexity = NumOperands * 3;
+  else
+    Complexity = RawComplexity;
+
   // Parse the properties.
   Properties = 0;
   std::vector<Record*> PropList = R->getValueAsListOfDefs("Properties");
@@ -550,8 +560,7 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
       // overloaded, all the types can be specified directly.
       assert(((!TyEl->isSubClassOf("LLVMExtendedType") &&
                !TyEl->isSubClassOf("LLVMTruncatedType") &&
-               !TyEl->isSubClassOf("LLVMVectorSameWidth") &&
-               !TyEl->isSubClassOf("LLVMPointerToElt")) ||
+               !TyEl->isSubClassOf("LLVMVectorSameWidth")) ||
               VT == MVT::iAny || VT == MVT::vAny) &&
              "Expected iAny or vAny type");
     } else
@@ -584,7 +593,12 @@ CodeGenIntrinsic::CodeGenIntrinsic(Record *R) {
     else if (Property->getName() == "IntrWriteMem")
       ModRef = ModRefBehavior(ModRef & ~MR_Ref);
     else if (Property->getName() == "IntrArgMemOnly")
-      ModRef = ModRefBehavior(ModRef & ~MR_Anywhere);
+      ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_ArgMem);
+    else if (Property->getName() == "IntrInaccessibleMemOnly")
+      ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_InaccessibleMem);
+    else if (Property->getName() == "IntrInaccessibleMemOrArgMemOnly")
+      ModRef = ModRefBehavior((ModRef & ~MR_Anywhere) | MR_ArgMem |
+                              MR_InaccessibleMem);
     else if (Property->getName() == "Commutative")
       isCommutative = true;
     else if (Property->getName() == "Throws")
diff --git a/contrib/llvm/utils/TableGen/CodeGenTarget.h b/contrib/llvm/utils/TableGen/CodeGenTarget.h
index 85a8c1b18878..c822e940ffae 100644
--- a/contrib/llvm/utils/TableGen/CodeGenTarget.h
+++ b/contrib/llvm/utils/TableGen/CodeGenTarget.h
@@ -82,7 +82,7 @@ public:
   ~CodeGenTarget();
 
   Record *getTargetRecord() const { return TargetRec; }
-  const std::string &getName() const;
+  const StringRef getName() const;
 
   /// getInstNamespace - Return the target-specific instruction namespace.
   ///
@@ -139,7 +139,7 @@ public:
   /// supported by the target (i.e. there are registers that directly hold it).
   bool isLegalValueType(MVT::SimpleValueType VT) const {
     ArrayRef<MVT::SimpleValueType> LegalVTs = getLegalValueTypes();
-    return std::find(LegalVTs.begin(), LegalVTs.end(), VT) != LegalVTs.end();
+    return is_contained(LegalVTs, VT);
   }
 
   CodeGenSchedModels &getSchedModels() const;
@@ -196,8 +196,8 @@ class ComplexPattern {
   std::string SelectFunc;
   std::vector<Record*> RootNodes;
   unsigned Properties; // Node properties
+  unsigned Complexity;
 public:
-  ComplexPattern() : NumOperands(0) {}
   ComplexPattern(Record *R);
 
   MVT::SimpleValueType getValueType() const { return Ty; }
@@ -207,6 +207,7 @@ public:
     return RootNodes;
   }
   bool hasProperty(enum SDNP Prop) const { return Properties & (1 << Prop); }
+  unsigned getComplexity() const { return Complexity; }
 };
 
 } // End llvm namespace
diff --git a/contrib/llvm/utils/TableGen/DAGISelEmitter.cpp b/contrib/llvm/utils/TableGen/DAGISelEmitter.cpp
index 0fe3bbd8d788..60fe866f194d 100644
--- a/contrib/llvm/utils/TableGen/DAGISelEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelEmitter.cpp
@@ -121,7 +121,7 @@ struct PatternSortingPredicate {
 
 void DAGISelEmitter::run(raw_ostream &OS) {
   emitSourceFileHeader("DAG Instruction Selector for the " +
-                       CGP.getTargetInfo().getName() + " target", OS);
+                       CGP.getTargetInfo().getName().str() + " target", OS);
 
   OS << "// *** NOTE: This file is #included into the middle of the target\n"
      << "// *** instruction selector class.  These functions are really "
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
index 4110e9725b5a..aafc1157893e 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherGen.cpp
@@ -250,7 +250,7 @@ void MatcherGen::EmitLeafMatchCode(const TreePatternNode *N) {
   // If we have a physreg reference like (mul gpr:$src, EAX) then we need to
   // record the register
   if (LeafRec->isSubClassOf("Register")) {
-    AddMatcher(new RecordMatcher("physreg input "+LeafRec->getName(),
+    AddMatcher(new RecordMatcher("physreg input "+LeafRec->getName().str(),
                                  NextRecordedOperandNo));
     PhysRegInputs.push_back(std::make_pair(LeafRec, NextRecordedOperandNo++));
     return;
@@ -354,7 +354,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
   unsigned OpNo = 0;
   if (N->NodeHasProperty(SDNPHasChain, CGP)) {
     // Record the node and remember it in our chained nodes list.
-    AddMatcher(new RecordMatcher("'" + N->getOperator()->getName() +
+    AddMatcher(new RecordMatcher("'" + N->getOperator()->getName().str() +
                                          "' chained node",
                                  NextRecordedOperandNo));
     // Remember all of the input chains our pattern will match.
@@ -418,7 +418,7 @@ void MatcherGen::EmitOperatorMatchCode(const TreePatternNode *N,
     // TODO: This redundantly records nodes with both glues and chains.
 
     // Record the node and remember it in our chained nodes list.
-    AddMatcher(new RecordMatcher("'" + N->getOperator()->getName() +
+    AddMatcher(new RecordMatcher("'" + N->getOperator()->getName().str() +
                                          "' glue output node",
                                  NextRecordedOperandNo));
   }
@@ -887,7 +887,7 @@ EmitResultInstructionAsOperand(const TreePatternNode *N,
   assert((!ResultVTs.empty() || TreeHasOutGlue || NodeHasChain) &&
          "Node has no result");
 
-  AddMatcher(new EmitNodeMatcher(II.Namespace+"::"+II.TheDef->getName(),
+  AddMatcher(new EmitNodeMatcher(II.Namespace+"::"+II.TheDef->getName().str(),
                                  ResultVTs, InstOps,
                                  NodeHasChain, TreeHasInGlue, TreeHasOutGlue,
                                  NodeHasMemRefs, NumFixedArityOperands,
diff --git a/contrib/llvm/utils/TableGen/DAGISelMatcherOpt.cpp b/contrib/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
index ad385fac0438..783b35e745f8 100644
--- a/contrib/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
+++ b/contrib/llvm/utils/TableGen/DAGISelMatcherOpt.cpp
@@ -200,8 +200,15 @@ static void FactorNodes(std::unique_ptr<Matcher> &MatcherPtr) {
     std::unique_ptr<Matcher> Child(Scope->takeChild(i));
     FactorNodes(Child);
     
-    if (Matcher *N = Child.release())
-      OptionsToMatch.push_back(N);
+    if (Child) {
+      // If the child is a ScopeMatcher we can just merge its contents.
+      if (auto *SM = dyn_cast<ScopeMatcher>(Child.get())) {
+        for (unsigned j = 0, e = SM->getNumChildren(); j != e; ++j)
+          OptionsToMatch.push_back(SM->takeChild(j));
+      } else {
+        OptionsToMatch.push_back(Child.release());
+      }
+    }
   }
   
   SmallVector<Matcher*, 32> NewOptionsToMatch;
@@ -414,9 +421,7 @@ static void FactorNodes(std::unique_ptr<Matcher> &MatcherPtr) {
         }
         
         Matcher *Entries[2] = { PrevMatcher, MatcherWithoutCTM };
-        std::unique_ptr<Matcher> Case(new ScopeMatcher(Entries));
-        FactorNodes(Case);
-        Cases[Entry-1].second = Case.release();
+        Cases[Entry-1].second = new ScopeMatcher(Entries);
         continue;
       }
       
@@ -424,6 +429,16 @@ static void FactorNodes(std::unique_ptr<Matcher> &MatcherPtr) {
       Cases.push_back(std::make_pair(CTMTy, MatcherWithoutCTM));
     }
     
+    // Make sure we recursively factor any scopes we may have created.
+    for (auto &M : Cases) {
+      if (ScopeMatcher *SM = dyn_cast<ScopeMatcher>(M.second)) {
+        std::unique_ptr<Matcher> Scope(SM);
+        FactorNodes(Scope);
+        M.second = Scope.release();
+        assert(M.second && "null matcher");
+      }
+    }
+
     if (Cases.size() != 1) {
       MatcherPtr.reset(new SwitchTypeMatcher(Cases));
     } else {
diff --git a/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp b/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
index e31caaf3c98c..f879a5bae215 100644
--- a/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DFAPacketizerEmitter.cpp
@@ -19,14 +19,18 @@
 
 #include "CodeGenTarget.h"
 #include "llvm/ADT/DenseSet.h"
-#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cassert>
+#include <cstdint>
 #include <map>
+#include <set>
 #include <string>
-#include <queue>
+#include <vector>
 
 using namespace llvm;
 
@@ -58,6 +62,7 @@ typedef int64_t                 DFAStateInput;
 #define DFA_TBLTYPE             "int64_t" // For generating DFAStateInputTable.
 
 namespace {
+
   DFAInput addDFAFuncUnits(DFAInput Inp, unsigned FuncUnits) {
     return (Inp << DFA_MAX_RESOURCES) | FuncUnits;
   }
@@ -67,12 +72,13 @@ namespace {
   /// DFAPacketizerEmitter.cpp.
   DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
     DFAInput InsnInput = 0;
-    assert ((InsnClass.size() <= DFA_MAX_RESTERMS) &&
-            "Exceeded maximum number of DFA terms");
+    assert((InsnClass.size() <= DFA_MAX_RESTERMS) &&
+           "Exceeded maximum number of DFA terms");
     for (auto U : InsnClass)
       InsnInput = addDFAFuncUnits(InsnInput, U);
     return InsnInput;
   }
+
 } // end anonymous namespace
 
 // --------------------------------------------------------------------
@@ -98,6 +104,7 @@ void dbgsIndent(unsigned indent);
 // for resource tracking.
 //
 namespace {
+
 class DFAPacketizerEmitter {
 private:
   std::string TargetName;
@@ -150,10 +157,8 @@ public:
 
   void run(raw_ostream &OS);
 };
-} // end anonymous namespace
 
 //
-//
 // State represents the usage of machine resources if the packet contains
 // a set of instruction classes.
 //
@@ -174,7 +179,6 @@ public:
 // A State instance also contains a collection of transitions from that state:
 // a map from inputs to new states.
 //
-namespace {
 class State {
  public:
   static int currentStateNum;
@@ -204,6 +208,7 @@ class State {
   //
   bool canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
                         std::map<unsigned, unsigned> &ComboBitToBitsMap) const;
+
   //
   // AddInsnClass - Return all combinations of resource reservation
   // which are possible from this state (PossibleStates).
@@ -214,6 +219,7 @@ class State {
   void AddInsnClass(std::vector<unsigned> &InsnClass,
                         std::map<unsigned, unsigned> &ComboBitToBitsMap,
                         std::set<unsigned> &PossibleStates) const;
+
   //
   // AddInsnClassStages - Return all combinations of resource reservation
   // resulting from the cross product of all stages for this InsnClass
@@ -225,31 +231,31 @@ class State {
                         unsigned prevState, unsigned origState,
                         DenseSet<unsigned> &VisitedResourceStates,
                         std::set<unsigned> &PossibleStates) const;
+
   //
   // addTransition - Add a transition from this state given the input InsnClass
   //
   void addTransition(std::vector<unsigned> InsnClass, const State *To) const;
+
   //
   // hasTransition - Returns true if there is a transition from this state
   // given the input InsnClass
   //
   bool hasTransition(std::vector<unsigned> InsnClass) const;
 };
-} // end anonymous namespace
 
 //
 // class DFA: deterministic finite automaton for processor resource tracking.
 //
-namespace {
 class DFA {
 public:
-  DFA();
+  DFA() = default;
 
   // Set of states. Need to keep this sorted to emit the transition table.
   typedef std::set<State> StateSet;
   StateSet states;
 
-  State *currentState;
+  State *currentState = nullptr;
 
   //
   // Modify the DFA.
@@ -263,6 +269,7 @@ public:
                  int numInsnClasses = 0,
                  int maxResources = 0, int numCombos = 0, int maxStages = 0);
 };
+
 } // end anonymous namespace
 
 #ifndef NDEBUG
@@ -314,8 +321,6 @@ void dbgsIndent(unsigned indent) {
 State::State() :
   stateNum(currentStateNum++), isInitial(false) {}
 
-DFA::DFA(): currentState(nullptr) {}
-
 //
 // addTransition - Add a transition from this state given the input InsnClass
 //
@@ -370,7 +375,6 @@ void State::AddInsnClassStages(std::vector<unsigned> &InsnClass,
                         unsigned prevState, unsigned origState,
                         DenseSet<unsigned> &VisitedResourceStates,
                         std::set<unsigned> &PossibleStates) const {
-
   assert((chkstage < numstages) && "AddInsnClassStages: stage out of range");
   unsigned thisStage = InsnClass[chkstage];
 
@@ -469,7 +473,6 @@ bool State::canMaybeAddInsnClass(std::vector<unsigned> &InsnClass,
                     std::map<unsigned, unsigned> &ComboBitToBitsMap) const {
   for (std::set<unsigned>::const_iterator SI = stateInfo.begin();
        SI != stateInfo.end(); ++SI) {
-
     // Check to see if all required resources are available.
     bool available = true;
 
@@ -514,8 +517,7 @@ const State &DFA::newState() {
 int State::currentStateNum = 0;
 
 DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
-  TargetName(CodeGenTarget(R).getName()),
-  allInsnClasses(), Records(R) {}
+  TargetName(CodeGenTarget(R).getName()), Records(R) {}
 
 //
 // writeTableAndAPI - Print out a table representing the DFA and the
@@ -531,7 +533,6 @@ DFAPacketizerEmitter::DFAPacketizerEmitter(RecordKeeper &R):
 void DFA::writeTableAndAPI(raw_ostream &OS, const std::string &TargetName,
                            int numInsnClasses,
                            int maxResources, int numCombos, int maxStages) {
-
   unsigned numStates = states.size();
 
   DEBUG(dbgs() << "-----------------------------------------------------------------------------\n");
@@ -783,7 +784,7 @@ int DFAPacketizerEmitter::collectOneInsnClass(const std::string &ProcName,
     DEBUG(dbgs() << " (bits: 0x" << utohexstr(UnitBitValue) << ")\n");
   }
 
-  if (UnitBits.size() > 0)
+  if (!UnitBits.empty())
     allInsnClasses.push_back(UnitBits);
 
   DEBUG({
@@ -831,7 +832,6 @@ int DFAPacketizerEmitter::collectAllInsnClasses(const std::string &ProcName,
 // Run the worklist algorithm to generate the DFA.
 //
 void DFAPacketizerEmitter::run(raw_ostream &OS) {
-
   // Collect processor iteraries.
   std::vector<Record*> ProcItinList =
     Records.getAllDerivedDefinitions("ProcessorItineraries");
@@ -890,7 +890,6 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
   Initial->isInitial = true;
   Initial->stateInfo.insert(0x0);
   SmallVector<const State*, 32> WorkList;
-//  std::queue<State*> WorkList;
   std::map<std::set<unsigned>, const State*> Visited;
 
   WorkList.push_back(Initial);
@@ -937,7 +936,7 @@ void DFAPacketizerEmitter::run(raw_ostream &OS) {
           current->canMaybeAddInsnClass(InsnClass, ComboBitToBitsMap)) {
         const State *NewState = nullptr;
         current->AddInsnClass(InsnClass, ComboBitToBitsMap, NewStateResources);
-        if (NewStateResources.size() == 0) {
+        if (NewStateResources.empty()) {
           DEBUG(dbgs() << "  Skipped - no new states generated\n");
           continue;
         }
@@ -989,4 +988,4 @@ void EmitDFAPacketizer(RecordKeeper &RK, raw_ostream &OS) {
   DFAPacketizerEmitter(RK).run(OS);
 }
 
-} // end namespaec llvm
+} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp b/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp
index 44815b05f274..6e1d8dde981c 100644
--- a/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/DisassemblerEmitter.cpp
@@ -104,7 +104,7 @@ extern void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
 
 void EmitDisassembler(RecordKeeper &Records, raw_ostream &OS) {
   CodeGenTarget Target(Records);
-  emitSourceFileHeader(" * " + Target.getName() + " Disassembler", OS);
+  emitSourceFileHeader(" * " + Target.getName().str() + " Disassembler", OS);
 
   // X86 uses a custom disassembler.
   if (Target.getName() == "X86") {
diff --git a/contrib/llvm/utils/TableGen/FastISelEmitter.cpp b/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
index debb12c4f511..43c6a9826330 100644
--- a/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FastISelEmitter.cpp
@@ -564,8 +564,7 @@ void FastISelMap::collectPatterns(CodeGenDAGPatterns &CGP) {
     Operands.PrintManglingSuffix(SuffixOS, ImmediatePredicates, true);
     SuffixOS.flush();
     if (!StringSwitch<bool>(ManglingSuffix)
-        .Cases("", "r", "rr", "ri", "rf", true)
-        .Cases("rri", "i", "f", true)
+        .Cases("", "r", "rr", "ri", "i", "f", true)
         .Default(false))
       continue;
 
@@ -874,7 +873,7 @@ void EmitFastISel(RecordKeeper &RK, raw_ostream &OS) {
   CodeGenDAGPatterns CGP(RK);
   const CodeGenTarget &Target = CGP.getTargetInfo();
   emitSourceFileHeader("\"Fast\" Instruction Selector for the " +
-                       Target.getName() + " target", OS);
+                       Target.getName().str() + " target", OS);
 
   // Determine the target's namespace name.
   std::string InstNS = Target.getInstNamespace() + "::";
diff --git a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
index 0506400b90f6..e1aaeccb08d0 100644
--- a/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/FixedLenDecoderEmitter.cpp
@@ -12,21 +12,32 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CodeGenInstruction.h"
 #include "CodeGenTarget.h"
 #include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
-#include "llvm/ADT/Twine.h"
 #include "llvm/MC/MCFixedLenDisassembler.h"
-#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
+#include <algorithm>
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
 #include <map>
+#include <memory>
+#include <set>
 #include <string>
 #include <utility>
 #include <vector>
@@ -36,6 +47,7 @@ using namespace llvm;
 #define DEBUG_TYPE "decoder-emitter"
 
 namespace {
+
 struct EncodingField {
   unsigned Base, Width, Offset;
   EncodingField(unsigned B, unsigned W, unsigned O)
@@ -66,8 +78,8 @@ typedef std::vector<uint8_t> DecoderTable;
 typedef uint32_t DecoderFixup;
 typedef std::vector<DecoderFixup> FixupList;
 typedef std::vector<FixupList> FixupScopeList;
-typedef SmallSetVector<std::string, 16> PredicateSet;
-typedef SmallSetVector<std::string, 16> DecoderSet;
+typedef SmallSetVector<CachedHashString, 16> PredicateSet;
+typedef SmallSetVector<CachedHashString, 16> DecoderSet;
 struct DecoderTableInfo {
   DecoderTable Table;
   FixupScopeList FixupStack;
@@ -75,13 +87,10 @@ struct DecoderTableInfo {
   DecoderSet Decoders;
 };
 
-} // End anonymous namespace
-
-namespace {
 class FixedLenDecoderEmitter {
   ArrayRef<const CodeGenInstruction *> NumberedInstructions;
-public:
 
+public:
   // Defaults preserved here for documentation, even though they aren't
   // strictly necessary given the way that this is currently being called.
   FixedLenDecoderEmitter(RecordKeeper &R, std::string PredicateNamespace,
@@ -111,13 +120,15 @@ public:
 
 private:
   CodeGenTarget Target;
+
 public:
   std::string PredicateNamespace;
   std::string GuardPrefix, GuardPostfix;
   std::string ReturnOK, ReturnFail;
   std::string Locals;
 };
-} // End anonymous namespace
+
+} // end anonymous namespace
 
 // The set (BIT_TRUE, BIT_FALSE, BIT_UNSET) represents a ternary logic system
 // for a bit value.
@@ -134,12 +145,15 @@ typedef enum {
 static bool ValueSet(bit_value_t V) {
   return (V == BIT_TRUE || V == BIT_FALSE);
 }
+
 static bool ValueNotSet(bit_value_t V) {
   return (V == BIT_UNSET);
 }
+
 static int Value(bit_value_t V) {
   return ValueNotSet(V) ? -1 : (V == BIT_FALSE ? 0 : 1);
 }
+
 static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
   if (BitInit *bit = dyn_cast<BitInit>(bits.getBit(index)))
     return bit->getValue() ? BIT_TRUE : BIT_FALSE;
@@ -147,6 +161,7 @@ static bit_value_t bitFromBits(const BitsInit &bits, unsigned index) {
   // The bit is uninitialized.
   return BIT_UNSET;
 }
+
 // Prints the bit value for each position.
 static void dumpBits(raw_ostream &o, const BitsInit &bits) {
   for (unsigned index = bits.getNumBits(); index > 0; --index) {
@@ -166,19 +181,18 @@ static void dumpBits(raw_ostream &o, const BitsInit &bits) {
   }
 }
 
-static BitsInit &getBitsField(const Record &def, const char *str) {
+static BitsInit &getBitsField(const Record &def, StringRef str) {
   BitsInit *bits = def.getValueAsBitsInit(str);
   return *bits;
 }
 
-// Forward declaration.
-namespace {
-class FilterChooser;
-} // End anonymous namespace
-
 // Representation of the instruction to work on.
 typedef std::vector<bit_value_t> insn_t;
 
+namespace {
+
+class FilterChooser;
+
 /// Filter - Filter works with FilterChooser to produce the decoding tree for
 /// the ISA.
 ///
@@ -215,7 +229,6 @@ typedef std::vector<bit_value_t> insn_t;
 /// decoder could try to decode the even/odd register numbering and assign to
 /// VST4q8a or VST4q8b, but for the time being, the decoder chooses the "a"
 /// version and return the Opcode since the two have the same Asm format string.
-namespace {
 class Filter {
 protected:
   const FilterChooser *Owner;// points to the FilterChooser who owns this filter
@@ -224,7 +237,7 @@ protected:
   bool Mixed; // a mixed region contains both set and unset bits
 
   // Map of well-known segment value to the set of uid's with that value.
-  std::map<uint64_t, std::vector<unsigned> > FilteredInstructions;
+  std::map<uint64_t, std::vector<unsigned>> FilteredInstructions;
 
   // Set of uid's with non-constant segment values.
   std::vector<unsigned> VariableInstructions;
@@ -239,11 +252,18 @@ protected:
   unsigned LastOpcFiltered;
 
 public:
+  Filter(Filter &&f);
+  Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
+
+  ~Filter() = default;
+
   unsigned getNumFiltered() const { return NumFiltered; }
+
   unsigned getSingletonOpc() const {
     assert(NumFiltered == 1);
     return LastOpcFiltered;
   }
+
   // Return the filter chooser for the group of instructions without constant
   // segment values.
   const FilterChooser &getVariableFC() const {
@@ -252,11 +272,6 @@ public:
     return *(FilterChooserMap.find((unsigned)-1)->second);
   }
 
-  Filter(Filter &&f);
-  Filter(FilterChooser &owner, unsigned startBit, unsigned numBits, bool mixed);
-
-  ~Filter();
-
   // Divides the decoding task into sub tasks and delegates them to the
   // inferior FilterChooser's.
   //
@@ -272,8 +287,9 @@ public:
   // Returns the number of fanout produced by the filter.  More fanout implies
   // the filter distinguishes more categories of instructions.
   unsigned usefulness() const;
-}; // End of class Filter
-} // End anonymous namespace
+}; // end class Filter
+
+} // end anonymous namespace
 
 // These are states of our finite state machines used in FilterChooser's
 // filterProcessor() which produces the filter candidates to use.
@@ -301,6 +317,7 @@ typedef enum {
 /// decoding tree.  And each case is delegated to an inferior FilterChooser to
 /// decide what further remaining bits to look at.
 namespace {
+
 class FilterChooser {
 protected:
   friend class Filter;
@@ -312,7 +329,7 @@ protected:
   const std::vector<unsigned> &Opcodes;
 
   // Lookup table for the operand decoding of instructions.
-  const std::map<unsigned, std::vector<OperandInfo> > &Operands;
+  const std::map<unsigned, std::vector<OperandInfo>> &Operands;
 
   // Vector of candidate filters.
   std::vector<Filter> Filters;
@@ -333,16 +350,13 @@ protected:
   // Parent emitter
   const FixedLenDecoderEmitter *Emitter;
 
-  FilterChooser(const FilterChooser &) = delete;
-  void operator=(const FilterChooser &) = delete;
 public:
-
   FilterChooser(ArrayRef<const CodeGenInstruction *> Insts,
                 const std::vector<unsigned> &IDs,
-                const std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 unsigned BW,
                 const FixedLenDecoderEmitter *E)
-    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops), Filters(),
+    : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
       FilterBitValues(BW, BIT_UNFILTERED), Parent(nullptr), BestIndex(-1),
       BitWidth(BW), Emitter(E) {
     doFilter();
@@ -350,16 +364,18 @@ public:
 
   FilterChooser(ArrayRef<const CodeGenInstruction *> Insts,
                 const std::vector<unsigned> &IDs,
-                const std::map<unsigned, std::vector<OperandInfo> > &Ops,
+                const std::map<unsigned, std::vector<OperandInfo>> &Ops,
                 const std::vector<bit_value_t> &ParentFilterBitValues,
                 const FilterChooser &parent)
     : AllInstructions(Insts), Opcodes(IDs), Operands(Ops),
-      Filters(), FilterBitValues(ParentFilterBitValues),
-      Parent(&parent), BestIndex(-1), BitWidth(parent.BitWidth),
-      Emitter(parent.Emitter) {
+      FilterBitValues(ParentFilterBitValues), Parent(&parent), BestIndex(-1),
+      BitWidth(parent.BitWidth), Emitter(parent.Emitter) {
     doFilter();
   }
 
+  FilterChooser(const FilterChooser &) = delete;
+  void operator=(const FilterChooser &) = delete;
+
   unsigned getBitWidth() const { return BitWidth; }
 
 protected:
@@ -384,7 +400,7 @@ protected:
   }
 
   // Returns the record name.
-  const std::string &nameWithID(unsigned Opcode) const {
+  const StringRef nameWithID(unsigned Opcode) const {
     return AllInstructions[Opcode]->TheDef->getName();
   }
 
@@ -476,7 +492,8 @@ public:
   // instructions.
   void emitTableEntries(DecoderTableInfo &TableInfo) const;
 };
-} // End anonymous namespace
+
+} // end anonymous namespace
 
 ///////////////////////////
 //                       //
@@ -527,9 +544,6 @@ Filter::Filter(FilterChooser &owner, unsigned startBit, unsigned numBits,
          && "Filter returns no instruction categories");
 }
 
-Filter::~Filter() {
-}
-
 // Divides the decoding task into sub tasks and delegates them to the
 // inferior FilterChooser's.
 //
@@ -1071,7 +1085,7 @@ void FilterChooser::emitDecoder(raw_ostream &OS, unsigned Indentation,
 
   for (const auto &Op : Operands.find(Opc)->second) {
     // If a custom instruction decoder was specified, use that.
-    if (Op.numFields() == 0 && Op.Decoder.size()) {
+    if (Op.numFields() == 0 && !Op.Decoder.empty()) {
       HasCompleteDecoder = Op.HasCompleteDecoder;
       OS.indent(Indentation) << Emitter->GuardPrefix << Op.Decoder
         << "(MI, insn, Address, Decoder)"
@@ -1106,11 +1120,9 @@ unsigned FilterChooser::getDecoderIndex(DecoderSet &Decoders,
   // overkill for now, though.
 
   // Make sure the predicate is in the table.
-  Decoders.insert(StringRef(Decoder));
+  Decoders.insert(CachedHashString(Decoder));
   // Now figure out the index for when we write out the table.
-  DecoderSet::const_iterator P = std::find(Decoders.begin(),
-                                           Decoders.end(),
-                                           Decoder.str());
+  DecoderSet::const_iterator P = find(Decoders, Decoder.str());
   return (unsigned)(P - Decoders.begin());
 }
 
@@ -1143,7 +1155,7 @@ bool FilterChooser::emitPredicateMatch(raw_ostream &o, unsigned &Indentation,
 
     StringRef SR(P);
     std::pair<StringRef, StringRef> pairs = SR.split(',');
-    while (pairs.second.size()) {
+    while (!pairs.second.empty()) {
       emitSinglePredicateMatch(o, pairs.first, Emitter->PredicateNamespace);
       o << " && ";
       pairs = pairs.second.split(',');
@@ -1181,11 +1193,9 @@ unsigned FilterChooser::getPredicateIndex(DecoderTableInfo &TableInfo,
   // overkill for now, though.
 
   // Make sure the predicate is in the table.
-  TableInfo.Predicates.insert(Predicate.str());
+  TableInfo.Predicates.insert(CachedHashString(Predicate));
   // Now figure out the index for when we write out the table.
-  PredicateSet::const_iterator P = std::find(TableInfo.Predicates.begin(),
-                                             TableInfo.Predicates.end(),
-                                             Predicate.str());
+  PredicateSet::const_iterator P = find(TableInfo.Predicates, Predicate);
   return (unsigned)(P - TableInfo.Predicates.begin());
 }
 
@@ -1374,7 +1384,6 @@ void FilterChooser::emitSingletonTableEntry(DecoderTableInfo &TableInfo,
   Best.getVariableFC().emitTableEntries(TableInfo);
 }
 
-
 // Assign a single filter and run with it.  Top level API client can initialize
 // with a single filter to start the filtering process.
 void FilterChooser::runSingleFilter(unsigned startBit, unsigned numBit,
@@ -1692,9 +1701,37 @@ void FilterChooser::emitTableEntries(DecoderTableInfo &TableInfo) const {
   }
 }
 
+static std::string findOperandDecoderMethod(TypedInit *TI) {
+  std::string Decoder;
+
+  RecordRecTy *Type = cast<RecordRecTy>(TI->getType());
+  Record *TypeRecord = Type->getRecord();
+
+  RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
+  StringInit *String = DecoderString ?
+    dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
+  if (String) {
+    Decoder = String->getValue();
+    if (!Decoder.empty())
+      return Decoder;
+  }
+
+  if (TypeRecord->isSubClassOf("RegisterOperand"))
+    TypeRecord = TypeRecord->getValueAsDef("RegClass");
+
+  if (TypeRecord->isSubClassOf("RegisterClass")) {
+    Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
+  } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
+    Decoder = "DecodePointerLikeRegClass" +
+      utostr(TypeRecord->getValueAsInt("RegClassKind"));
+  }
+
+  return Decoder;
+}
+
 static bool populateInstruction(CodeGenTarget &Target,
                        const CodeGenInstruction &CGI, unsigned Opc,
-                       std::map<unsigned, std::vector<OperandInfo> > &Operands){
+                       std::map<unsigned, std::vector<OperandInfo>> &Operands){
   const Record &Def = *CGI.TheDef;
   // If all the bit positions are not specified; do not decode this instruction.
   // We are bound to fail!  For proper disassembly, the well-known encoding bits
@@ -1722,13 +1759,15 @@ static bool populateInstruction(CodeGenTarget &Target,
   // Gather the outputs/inputs of the instruction, so we can find their
   // positions in the encoding.  This assumes for now that they appear in the
   // MCInst in the order that they're listed.
-  std::vector<std::pair<Init*, std::string> > InOutOperands;
+  std::vector<std::pair<Init*, StringRef>> InOutOperands;
   DagInit *Out  = Def.getValueAsDag("OutOperandList");
   DagInit *In  = Def.getValueAsDag("InOperandList");
   for (unsigned i = 0; i < Out->getNumArgs(); ++i)
-    InOutOperands.push_back(std::make_pair(Out->getArg(i), Out->getArgName(i)));
+    InOutOperands.push_back(std::make_pair(Out->getArg(i),
+                                           Out->getArgNameStr(i)));
   for (unsigned i = 0; i < In->getNumArgs(); ++i)
-    InOutOperands.push_back(std::make_pair(In->getArg(i), In->getArgName(i)));
+    InOutOperands.push_back(std::make_pair(In->getArg(i),
+                                           In->getArgNameStr(i)));
 
   // Search for tied operands, so that we can correctly instantiate
   // operands that are not explicitly represented in the encoding.
@@ -1743,7 +1782,7 @@ static bool populateInstruction(CodeGenTarget &Target,
     }
   }
 
-  std::map<std::string, std::vector<OperandInfo> > NumberedInsnOperands;
+  std::map<std::string, std::vector<OperandInfo>> NumberedInsnOperands;
   std::set<std::string> NumberedInsnOperandsNoTie;
   if (Target.getInstructionSet()->
         getValueAsBit("decodePositionallyEncodedOperands")) {
@@ -1828,7 +1867,7 @@ static bool populateInstruction(CodeGenTarget &Target,
                       Name << "(" << SO.first << ", " << SO.second << ") => " <<
                       Vals[i].getName() << "\n");
 
-      std::string Decoder = "";
+      std::string Decoder;
       Record *TypeRecord = CGI.Operands[SO.first].Rec;
 
       RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
@@ -1852,7 +1891,7 @@ static bool populateInstruction(CodeGenTarget &Target,
       if (TypeRecord->isSubClassOf("RegisterOperand"))
         TypeRecord = TypeRecord->getValueAsDef("RegClass");
       if (TypeRecord->isSubClassOf("RegisterClass")) {
-        Decoder = "Decode" + TypeRecord->getName() + "RegisterClass";
+        Decoder = "Decode" + TypeRecord->getName().str() + "RegisterClass";
         isReg = true;
       } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
         Decoder = "DecodePointerLikeRegClass" +
@@ -1917,33 +1956,13 @@ static bool populateInstruction(CodeGenTarget &Target,
       continue;
     }
 
-    std::string Decoder = "";
-
-    // At this point, we can locate the field, but we need to know how to
-    // interpret it.  As a first step, require the target to provide callbacks
-    // for decoding register classes.
-    // FIXME: This need to be extended to handle instructions with custom
-    // decoder methods, and operands with (simple) MIOperandInfo's.
     TypedInit *TI = cast<TypedInit>(Op.first);
-    RecordRecTy *Type = cast<RecordRecTy>(TI->getType());
-    Record *TypeRecord = Type->getRecord();
-    bool isReg = false;
-    if (TypeRecord->isSubClassOf("RegisterOperand"))
-      TypeRecord = TypeRecord->getValueAsDef("RegClass");
-    if (TypeRecord->isSubClassOf("RegisterClass")) {
-      Decoder = "Decode" + TypeRecord->getName() + "RegisterClass";
-      isReg = true;
-    } else if (TypeRecord->isSubClassOf("PointerLikeRegClass")) {
-      Decoder = "DecodePointerLikeRegClass" +
-                utostr(TypeRecord->getValueAsInt("RegClassKind"));
-      isReg = true;
-    }
 
-    RecordVal *DecoderString = TypeRecord->getValue("DecoderMethod");
-    StringInit *String = DecoderString ?
-      dyn_cast<StringInit>(DecoderString->getValue()) : nullptr;
-    if (!isReg && String && String->getValue() != "")
-      Decoder = String->getValue();
+    // At this point, we can locate the decoder field, but we need to know how
+    // to interpret it.  As a first step, require the target to provide
+    // callbacks for decoding register classes.
+    std::string Decoder = findOperandDecoderMethod(TI);
+    Record *TypeRecord = cast<RecordRecTy>(TI->getType())->getRecord();
 
     RecordVal *HasCompleteDecoderVal =
       TypeRecord->getValue("hasCompleteDecoder");
@@ -2009,7 +2028,6 @@ static bool populateInstruction(CodeGenTarget &Target,
 
   Operands[Opc] = InsnOperands;
 
-
 #if 0
   DEBUG({
       // Dumps the instruction encoding bits.
@@ -2062,7 +2080,7 @@ static void emitDecodeInstruction(formatted_raw_ostream &OS) {
      << "  const uint8_t *Ptr = DecodeTable;\n"
      << "  uint32_t CurFieldValue = 0;\n"
      << "  DecodeStatus S = MCDisassembler::Success;\n"
-     << "  for (;;) {\n"
+     << "  while (true) {\n"
      << "    ptrdiff_t Loc = Ptr - DecodeTable;\n"
      << "    switch (*Ptr) {\n"
      << "    default:\n"
@@ -2230,8 +2248,8 @@ void FixedLenDecoderEmitter::run(raw_ostream &o) {
   // Parameterize the decoders based on namespace and instruction width.
   NumberedInstructions = Target.getInstructionsByEnumValue();
   std::map<std::pair<std::string, unsigned>,
-           std::vector<unsigned> > OpcMap;
-  std::map<unsigned, std::vector<OperandInfo> > Operands;
+           std::vector<unsigned>> OpcMap;
+  std::map<unsigned, std::vector<OperandInfo>> Operands;
 
   for (unsigned i = 0; i < NumberedInstructions.size(); ++i) {
     const CodeGenInstruction *Inst = NumberedInstructions[i];
@@ -2304,4 +2322,4 @@ void EmitFixedLenDecoder(RecordKeeper &RK, raw_ostream &OS,
                          ROK, RFail, L).run(OS);
 }
 
-} // End llvm namespace
+} // end namespace llvm
diff --git a/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
new file mode 100644
index 000000000000..2bc6181045c5
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -0,0 +1,388 @@
+//===- GlobalISelEmitter.cpp - Generate an instruction selector -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This tablegen backend emits code for use by the GlobalISel instruction
+/// selector. See include/llvm/CodeGen/TargetGlobalISel.td.
+///
+/// This file analyzes the patterns recognized by the SelectionDAGISel tablegen
+/// backend, filters out the ones that are unsupported, maps
+/// SelectionDAG-specific constructs to their GlobalISel counterpart
+/// (when applicable: MVT to LLT;  SDNode to generic Instruction).
+///
+/// Not all patterns are supported: pass the tablegen invocation
+/// "-warn-on-skipped-patterns" to emit a warning when a pattern is skipped,
+/// as well as why.
+///
+/// The generated file defines a single method:
+///     bool <Target>InstructionSelector::selectImpl(MachineInstr &I) const;
+/// intended to be used in InstructionSelector::select as the first-step
+/// selector for the patterns that don't require complex C++.
+///
+/// FIXME: We'll probably want to eventually define a base
+/// "TargetGenInstructionSelector" class.
+///
+//===----------------------------------------------------------------------===//
+
+#include "CodeGenDAGPatterns.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+#include "llvm/TableGen/TableGenBackend.h"
+#include <string>
+using namespace llvm;
+
+#define DEBUG_TYPE "gisel-emitter"
+
+STATISTIC(NumPatternTotal, "Total number of patterns");
+STATISTIC(NumPatternSkipped, "Number of patterns skipped");
+STATISTIC(NumPatternEmitted, "Number of patterns emitted");
+
+static cl::opt<bool> WarnOnSkippedPatterns(
+    "warn-on-skipped-patterns",
+    cl::desc("Explain why a pattern was skipped for inclusion "
+             "in the GlobalISel selector"),
+    cl::init(false));
+
+namespace {
+
+class GlobalISelEmitter {
+public:
+  explicit GlobalISelEmitter(RecordKeeper &RK);
+  void run(raw_ostream &OS);
+
+private:
+  const RecordKeeper &RK;
+  const CodeGenDAGPatterns CGP;
+  const CodeGenTarget &Target;
+
+  /// Keep track of the equivalence between SDNodes and Instruction.
+  /// This is defined using 'GINodeEquiv' in the target description.
+  DenseMap<Record *, const CodeGenInstruction *> NodeEquivs;
+
+  void gatherNodeEquivs();
+  const CodeGenInstruction *findNodeEquiv(Record *N);
+
+  struct SkipReason {
+    std::string Reason;
+  };
+
+  /// Analyze pattern \p P, possibly emitting matching code for it to \p OS.
+  /// Otherwise, return a reason why this pattern was skipped for emission.
+  Optional<SkipReason> runOnPattern(const PatternToMatch &P,
+                                    raw_ostream &OS);
+};
+
+} // end anonymous namespace
+
+//===- Helper functions ---------------------------------------------------===//
+
+/// Convert an MVT to an equivalent LLT if possible, or the invalid LLT() for
+/// MVTs that don't map cleanly to an LLT (e.g., iPTR, *any, ...).
+static Optional<std::string> MVTToLLT(MVT::SimpleValueType SVT) {
+  std::string TyStr;
+  raw_string_ostream OS(TyStr);
+  MVT VT(SVT);
+  if (VT.isVector() && VT.getVectorNumElements() != 1) {
+    OS << "LLT::vector(" << VT.getVectorNumElements() << ", "
+       << VT.getScalarSizeInBits() << ")";
+  } else if (VT.isInteger() || VT.isFloatingPoint()) {
+    OS << "LLT::scalar(" << VT.getSizeInBits() << ")";
+  } else {
+    return None;
+  }
+  OS.flush();
+  return TyStr;
+}
+
+static bool isTrivialOperatorNode(const TreePatternNode *N) {
+  return !N->isLeaf() && !N->hasAnyPredicate() && !N->getTransformFn();
+}
+
+//===- Matchers -----------------------------------------------------------===//
+
+struct Matcher {
+  virtual ~Matcher() {}
+  virtual void emit(raw_ostream &OS) const = 0;
+};
+
+raw_ostream &operator<<(raw_ostream &S, const Matcher &M) {
+  M.emit(S);
+  return S;
+}
+
+struct MatchAction {
+  virtual ~MatchAction() {}
+  virtual void emit(raw_ostream &OS) const = 0;
+};
+
+raw_ostream &operator<<(raw_ostream &S, const MatchAction &A) {
+  A.emit(S);
+  return S;
+}
+
+struct MatchOpcode : public Matcher {
+  MatchOpcode(const CodeGenInstruction *I) : I(I) {}
+  const CodeGenInstruction *I;
+
+  virtual void emit(raw_ostream &OS) const {
+    OS << "I.getOpcode() == " << I->Namespace << "::" << I->TheDef->getName();
+  }
+};
+
+struct MatchRegOpType : public Matcher {
+  MatchRegOpType(unsigned OpIdx, std::string Ty)
+      : OpIdx(OpIdx), Ty(Ty) {}
+  unsigned OpIdx;
+  std::string Ty;
+
+  virtual void emit(raw_ostream &OS) const {
+    OS << "MRI.getType(I.getOperand(" << OpIdx << ").getReg()) == (" << Ty
+       << ")";
+  }
+};
+
+struct MatchRegOpBank : public Matcher {
+  MatchRegOpBank(unsigned OpIdx, const CodeGenRegisterClass &RC)
+      : OpIdx(OpIdx), RC(RC) {}
+  unsigned OpIdx;
+  const CodeGenRegisterClass &RC;
+
+  virtual void emit(raw_ostream &OS) const {
+    OS << "(&RBI.getRegBankFromRegClass(" << RC.getQualifiedName()
+       << "RegClass) == RBI.getRegBank(I.getOperand(" << OpIdx
+       << ").getReg(), MRI, TRI))";
+  }
+};
+
+struct MatchMBBOp : public Matcher {
+  MatchMBBOp(unsigned OpIdx) : OpIdx(OpIdx) {}
+  unsigned OpIdx;
+
+  virtual void emit(raw_ostream &OS) const {
+    OS << "I.getOperand(" << OpIdx << ").isMBB()";
+  }
+};
+
+struct MutateOpcode : public MatchAction {
+  MutateOpcode(const CodeGenInstruction *I) : I(I) {}
+  const CodeGenInstruction *I;
+
+  virtual void emit(raw_ostream &OS) const {
+    OS << "I.setDesc(TII.get(" << I->Namespace << "::" << I->TheDef->getName()
+       << "));";
+  }
+};
+
+class MatcherEmitter {
+  const PatternToMatch &P;
+
+public:
+  std::vector<std::unique_ptr<Matcher>> Matchers;
+  std::vector<std::unique_ptr<MatchAction>> Actions;
+
+  MatcherEmitter(const PatternToMatch &P) : P(P) {}
+
+  void emit(raw_ostream &OS) {
+    if (Matchers.empty())
+      llvm_unreachable("Unexpected empty matcher!");
+
+    OS << "  // Src: " << *P.getSrcPattern() << "\n"
+       << "  // Dst: " << *P.getDstPattern() << "\n";
+
+    OS << "  if ((" << *Matchers.front() << ")";
+    for (auto &MA : makeArrayRef(Matchers).drop_front())
+      OS << " &&\n      (" << *MA << ")";
+    OS << ") {\n";
+
+    for (auto &MA : Actions)
+      OS << "    " << *MA << "\n";
+
+    OS << "    constrainSelectedInstRegOperands(I, TII, TRI, RBI);\n";
+    OS << "    return true;\n";
+    OS << "  }\n";
+  }
+};
+
+//===- GlobalISelEmitter class --------------------------------------------===//
+
+void GlobalISelEmitter::gatherNodeEquivs() {
+  assert(NodeEquivs.empty());
+  for (Record *Equiv : RK.getAllDerivedDefinitions("GINodeEquiv"))
+    NodeEquivs[Equiv->getValueAsDef("Node")] =
+        &Target.getInstruction(Equiv->getValueAsDef("I"));
+}
+
+const CodeGenInstruction *GlobalISelEmitter::findNodeEquiv(Record *N) {
+  return NodeEquivs.lookup(N);
+}
+
+GlobalISelEmitter::GlobalISelEmitter(RecordKeeper &RK)
+    : RK(RK), CGP(RK), Target(CGP.getTargetInfo()) {}
+
+//===- Emitter ------------------------------------------------------------===//
+
+Optional<GlobalISelEmitter::SkipReason>
+GlobalISelEmitter::runOnPattern(const PatternToMatch &P, raw_ostream &OS) {
+
+  // Keep track of the matchers and actions to emit.
+  MatcherEmitter M(P);
+
+  // First, analyze the whole pattern.
+  // If the entire pattern has a predicate (e.g., target features), ignore it.
+  if (!P.getPredicates()->getValues().empty())
+    return SkipReason{"Pattern has a predicate"};
+
+  // Physreg imp-defs require additional logic.  Ignore the pattern.
+  if (!P.getDstRegs().empty())
+    return SkipReason{"Pattern defines a physical register"};
+
+  // Next, analyze the pattern operators.
+  TreePatternNode *Src = P.getSrcPattern();
+  TreePatternNode *Dst = P.getDstPattern();
+
+  // If the root of either pattern isn't a simple operator, ignore it.
+  if (!isTrivialOperatorNode(Dst))
+    return SkipReason{"Dst pattern root isn't a trivial operator"};
+  if (!isTrivialOperatorNode(Src))
+    return SkipReason{"Src pattern root isn't a trivial operator"};
+
+  Record *DstOp = Dst->getOperator();
+  if (!DstOp->isSubClassOf("Instruction"))
+    return SkipReason{"Pattern operator isn't an instruction"};
+
+  auto &DstI = Target.getInstruction(DstOp);
+
+  auto SrcGIOrNull = findNodeEquiv(Src->getOperator());
+  if (!SrcGIOrNull)
+    return SkipReason{"Pattern operator lacks an equivalent Instruction"};
+  auto &SrcGI = *SrcGIOrNull;
+
+  // The operators look good: match the opcode and mutate it to the new one.
+  M.Matchers.emplace_back(new MatchOpcode(&SrcGI));
+  M.Actions.emplace_back(new MutateOpcode(&DstI));
+
+  // Next, analyze the children, only accepting patterns that don't require
+  // any change to operands.
+  if (Src->getNumChildren() != Dst->getNumChildren())
+    return SkipReason{"Src/dst patterns have a different # of children"};
+
+  unsigned OpIdx = 0;
+
+  // Start with the defined operands (i.e., the results of the root operator).
+  if (DstI.Operands.NumDefs != Src->getExtTypes().size())
+    return SkipReason{"Src pattern results and dst MI defs are different"};
+
+  for (const EEVT::TypeSet &Ty : Src->getExtTypes()) {
+    Record *DstIOpRec = DstI.Operands[OpIdx].Rec;
+    if (!DstIOpRec->isSubClassOf("RegisterClass"))
+      return SkipReason{"Dst MI def isn't a register class"};
+
+    auto OpTyOrNone = MVTToLLT(Ty.getConcrete());
+    if (!OpTyOrNone)
+      return SkipReason{"Dst operand has an unsupported type"};
+
+    M.Matchers.emplace_back(new MatchRegOpType(OpIdx, *OpTyOrNone));
+    M.Matchers.emplace_back(
+        new MatchRegOpBank(OpIdx, Target.getRegisterClass(DstIOpRec)));
+    ++OpIdx;
+  }
+
+  // Finally match the used operands (i.e., the children of the root operator).
+  for (unsigned i = 0, e = Src->getNumChildren(); i != e; ++i) {
+    auto *SrcChild = Src->getChild(i);
+    auto *DstChild = Dst->getChild(i);
+
+    // Patterns can reorder operands.  Ignore those for now.
+    if (SrcChild->getName() != DstChild->getName())
+      return SkipReason{"Src/dst pattern children not in same order"};
+
+    // The only non-leaf child we accept is 'bb': it's an operator because
+    // BasicBlockSDNode isn't inline, but in MI it's just another operand.
+    if (!SrcChild->isLeaf()) {
+      if (DstChild->isLeaf() ||
+          SrcChild->getOperator() != DstChild->getOperator())
+        return SkipReason{"Src/dst pattern child operator mismatch"};
+
+      if (SrcChild->getOperator()->isSubClassOf("SDNode")) {
+        auto &ChildSDNI = CGP.getSDNodeInfo(SrcChild->getOperator());
+        if (ChildSDNI.getSDClassName() == "BasicBlockSDNode") {
+          M.Matchers.emplace_back(new MatchMBBOp(OpIdx++));
+          continue;
+        }
+      }
+      return SkipReason{"Src pattern child isn't a leaf node"};
+    }
+
+    if (SrcChild->getLeafValue() != DstChild->getLeafValue())
+      return SkipReason{"Src/dst pattern child leaf mismatch"};
+
+    // Otherwise, we're looking for a bog-standard RegisterClass operand.
+    if (SrcChild->hasAnyPredicate())
+      return SkipReason{"Src pattern child has predicate"};
+    auto *ChildRec = cast<DefInit>(SrcChild->getLeafValue())->getDef();
+    if (!ChildRec->isSubClassOf("RegisterClass"))
+      return SkipReason{"Src pattern child isn't a RegisterClass"};
+
+    ArrayRef<EEVT::TypeSet> ChildTypes = SrcChild->getExtTypes();
+    if (ChildTypes.size() != 1)
+      return SkipReason{"Src pattern child has multiple results"};
+
+    auto OpTyOrNone = MVTToLLT(ChildTypes.front().getConcrete());
+    if (!OpTyOrNone)
+      return SkipReason{"Src operand has an unsupported type"};
+
+    M.Matchers.emplace_back(new MatchRegOpType(OpIdx, *OpTyOrNone));
+    M.Matchers.emplace_back(
+        new MatchRegOpBank(OpIdx, Target.getRegisterClass(ChildRec)));
+    ++OpIdx;
+  }
+
+  // We're done with this pattern!  Emit the processed result.
+  M.emit(OS);
+  ++NumPatternEmitted;
+  return None;
+}
+
+void GlobalISelEmitter::run(raw_ostream &OS) {
+  // Track the GINodeEquiv definitions.
+  gatherNodeEquivs();
+
+  emitSourceFileHeader(("Global Instruction Selector for the " +
+                       Target.getName() + " target").str(), OS);
+  OS << "bool " << Target.getName()
+     << "InstructionSelector::selectImpl"
+        "(MachineInstr &I) const {\n  const MachineRegisterInfo &MRI = "
+        "I.getParent()->getParent()->getRegInfo();\n";
+
+  // Look through the SelectionDAG patterns we found, possibly emitting some.
+  for (const PatternToMatch &Pat : CGP.ptms()) {
+    ++NumPatternTotal;
+    if (auto SkipReason = runOnPattern(Pat, OS)) {
+      if (WarnOnSkippedPatterns) {
+        PrintWarning(Pat.getSrcRecord()->getLoc(),
+                     "Skipped pattern: " + SkipReason->Reason);
+      }
+      ++NumPatternSkipped;
+    }
+  }
+
+  OS << "  return false;\n}\n";
+}
+
+//===----------------------------------------------------------------------===//
+
+namespace llvm {
+void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS) {
+  GlobalISelEmitter(RK).run(OS);
+}
+} // End llvm namespace
diff --git a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
index 02461cc0508d..ab7d964cd671 100644
--- a/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/InstrInfoEmitter.cpp
@@ -13,22 +13,29 @@
 //===----------------------------------------------------------------------===//
 
 #include "CodeGenDAGPatterns.h"
+#include "CodeGenInstruction.h"
 #include "CodeGenSchedule.h"
 #include "CodeGenTarget.h"
 #include "SequenceToOffsetTable.h"
 #include "TableGenBackends.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
-#include <algorithm>
-#include <cstdio>
+#include <cassert>
+#include <cstdint>
 #include <map>
+#include <string>
+#include <utility>
 #include <vector>
 
 using namespace llvm;
 
 namespace {
+
 class InstrInfoEmitter {
   RecordKeeper &Records;
   CodeGenDAGPatterns CDP;
@@ -50,7 +57,7 @@ private:
   /// and instruction operand indices as their values.  The values of this map
   /// are lists of instruction names.
   typedef std::map<std::map<unsigned, unsigned>,
-                   std::vector<std::string> > OpNameMapTy;
+                   std::vector<std::string>> OpNameMapTy;
   typedef std::map<std::string, unsigned>::iterator StrUintMapIter;
   void emitRecord(const CodeGenInstruction &Inst, unsigned Num,
                   Record *InstrInfo,
@@ -70,6 +77,7 @@ private:
   void EmitOperandInfo(raw_ostream &OS, OperandInfoMapTy &OperandInfoIDs);
   std::vector<std::string> GetOperandInfo(const CodeGenInstruction &Inst);
 };
+
 } // end anonymous namespace
 
 static void PrintDefList(const std::vector<Record*> &Uses,
@@ -106,7 +114,7 @@ InstrInfoEmitter::GetOperandInfo(const CodeGenInstruction &Inst) {
       for (unsigned j = 0, e = Op.MINumOperands; j != e; ++j) {
         OperandList.push_back(Op);
 
-        Record *OpR = cast<DefInit>(MIOI->getArg(j))->getDef();
+        auto *OpR = cast<DefInit>(MIOI->getArg(j))->getDef();
         OperandList.back().Rec = OpR;
       }
     }
@@ -202,7 +210,6 @@ void InstrInfoEmitter::initOperandMapData(
         const std::string &Namespace,
         std::map<std::string, unsigned> &Operands,
         OpNameMapTy &OperandMap) {
-
   unsigned NumOperands = 0;
   for (const CodeGenInstruction *Inst : NumberedInstructions) {
     if (!Inst->TheDef->getValueAsBit("UseNamedOperandTable"))
@@ -217,7 +224,8 @@ void InstrInfoEmitter::initOperandMapData(
       }
       OpList[I->second] = Info.MIOperandNo;
     }
-    OperandMap[OpList].push_back(Namespace + "::" + Inst->TheDef->getName());
+    OperandMap[OpList].push_back(Namespace + "::" +
+                                 Inst->TheDef->getName().str());
   }
 }
 
@@ -235,7 +243,6 @@ void InstrInfoEmitter::initOperandMapData(
 void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
            const CodeGenTarget &Target,
            ArrayRef<const CodeGenInstruction*> NumberedInstructions) {
-
   const std::string &Namespace = Target.getInstNamespace();
   std::string OpNameNS = "OpName";
   // Map of operand names to their enumeration value.  This will be used to
@@ -300,7 +307,6 @@ void InstrInfoEmitter::emitOperandNameMappings(raw_ostream &OS,
   OS << "} // end namespace " << Namespace << "\n";
   OS << "} // end namespace llvm\n";
   OS << "#endif //GET_INSTRINFO_NAMED_OPS\n\n";
-
 }
 
 /// Generate an enum for all the operand types for this target, under the
@@ -429,7 +435,7 @@ void InstrInfoEmitter::run(raw_ostream &OS) {
   OS << "struct " << ClassName << " : public TargetInstrInfo {\n"
      << "  explicit " << ClassName
      << "(int CFSetupOpcode = -1, int CFDestroyOpcode = -1, int CatchRetOpcode = -1, int ReturnOpcode = -1);\n"
-     << "  ~" << ClassName << "() override {}\n"
+     << "  ~" << ClassName << "() override = default;\n"
      << "};\n";
   OS << "} // end llvm namespace\n";
 
@@ -482,6 +488,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
   if (Inst.isCompare)          OS << "|(1ULL<<MCID::Compare)";
   if (Inst.isMoveImm)          OS << "|(1ULL<<MCID::MoveImm)";
   if (Inst.isBitcast)          OS << "|(1ULL<<MCID::Bitcast)";
+  if (Inst.isAdd)              OS << "|(1ULL<<MCID::Add)";
   if (Inst.isSelect)           OS << "|(1ULL<<MCID::Select)";
   if (Inst.isBarrier)          OS << "|(1ULL<<MCID::Barrier)";
   if (Inst.hasDelaySlot)       OS << "|(1ULL<<MCID::DelaySlot)";
@@ -514,7 +521,7 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
     PrintFatalError("no TSFlags?");
   uint64_t Value = 0;
   for (unsigned i = 0, e = TSF->getNumBits(); i != e; ++i) {
-    if (BitInit *Bit = dyn_cast<BitInit>(TSF->getBit(i)))
+    if (const auto *Bit = dyn_cast<BitInit>(TSF->getBit(i)))
       Value |= uint64_t(Bit->getValue()) << i;
     else
       PrintFatalError("Invalid TSFlags bit in " + Inst.TheDef->getName());
@@ -561,7 +568,6 @@ void InstrInfoEmitter::emitRecord(const CodeGenInstruction &Inst, unsigned Num,
 
 // emitEnums - Print out enum values for all of the instructions.
 void InstrInfoEmitter::emitEnums(raw_ostream &OS) {
-
   OS << "#ifdef GET_INSTRINFO_ENUM\n";
   OS << "#undef GET_INSTRINFO_ENUM\n";
 
diff --git a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
index a676159e494b..33256ccba46c 100644
--- a/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/IntrinsicEmitter.cpp
@@ -213,10 +213,11 @@ enum IIT_Info {
   IIT_HALF_VEC_ARG = 30,
   IIT_SAME_VEC_WIDTH_ARG = 31,
   IIT_PTR_TO_ARG = 32,
-  IIT_VEC_OF_PTRS_TO_ELT = 33,
-  IIT_I128 = 34,
-  IIT_V512 = 35,
-  IIT_V1024 = 36
+  IIT_PTR_TO_ELT = 33,
+  IIT_VEC_OF_PTRS_TO_ELT = 34,
+  IIT_I128 = 35,
+  IIT_V512 = 36,
+  IIT_V1024 = 37
 };
 
 
@@ -251,7 +252,7 @@ static void EncodeFixedValueType(MVT::SimpleValueType VT,
 }
 
 #if defined(_MSC_VER) && !defined(__clang__)
-#pragma optimize("",off) // MSVC 2010 optimizer can't deal with this function.
+#pragma optimize("",off) // MSVC 2015 optimizer can't deal with this function.
 #endif
 
 static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
@@ -277,6 +278,8 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
       Sig.push_back(IIT_PTR_TO_ARG);
     else if (R->isSubClassOf("LLVMVectorOfPointersToElt"))
       Sig.push_back(IIT_VEC_OF_PTRS_TO_ELT);
+    else if (R->isSubClassOf("LLVMPointerToElt"))
+      Sig.push_back(IIT_PTR_TO_ELT);
     else
       Sig.push_back(IIT_ARG);
     return Sig.push_back((Number << 3) | ArgCodes[Number]);
@@ -287,10 +290,10 @@ static void EncodeFixedType(Record *R, std::vector<unsigned char> &ArgCodes,
   unsigned Tmp = 0;
   switch (VT) {
   default: break;
-  case MVT::iPTRAny: ++Tmp; // FALL THROUGH.
-  case MVT::vAny: ++Tmp; // FALL THROUGH.
-  case MVT::fAny: ++Tmp; // FALL THROUGH.
-  case MVT::iAny: ++Tmp; // FALL THROUGH.
+  case MVT::iPTRAny: ++Tmp; LLVM_FALLTHROUGH;
+  case MVT::vAny: ++Tmp;    LLVM_FALLTHROUGH;
+  case MVT::fAny: ++Tmp;    LLVM_FALLTHROUGH;
+  case MVT::iAny: ++Tmp;    LLVM_FALLTHROUGH;
   case MVT::Any: {
     // If this is an "any" valuetype, then the type is the type of the next
     // type in the list specified to getIntrinsic().
@@ -643,6 +646,18 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
           OS << ",";
         OS << "Attribute::ReadOnly";
         break;
+      case CodeGenIntrinsic::ReadInaccessibleMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::ReadOnly,";
+        OS << "Attribute::InaccessibleMemOnly";
+        break;
+      case CodeGenIntrinsic::ReadInaccessibleMemOrArgMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::ReadOnly,";
+        OS << "Attribute::InaccessibleMemOrArgMemOnly";
+        break;
       case CodeGenIntrinsic::WriteArgMem:
         if (addComma)
           OS << ",";
@@ -654,11 +669,32 @@ void IntrinsicEmitter::EmitAttributes(const CodeGenIntrinsicTable &Ints,
           OS << ",";
         OS << "Attribute::WriteOnly";
         break;
+      case CodeGenIntrinsic::WriteInaccessibleMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::WriteOnly,";
+        OS << "Attribute::InaccessibleMemOnly";
+        break;
+      case CodeGenIntrinsic::WriteInaccessibleMemOrArgMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::WriteOnly,";
+        OS << "Attribute::InaccessibleMemOrArgOnly";
+        break;
       case CodeGenIntrinsic::ReadWriteArgMem:
         if (addComma)
           OS << ",";
         OS << "Attribute::ArgMemOnly";
         break;
+      case CodeGenIntrinsic::ReadWriteInaccessibleMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::InaccessibleMemOnly";
+        break;
+      case CodeGenIntrinsic::ReadWriteInaccessibleMemOrArgMem:
+        if (addComma)
+          OS << ",";
+        OS << "Attribute::InaccessibleMemOrArgMemOnly";
       case CodeGenIntrinsic::ReadWriteMem:
         break;
       }
@@ -714,11 +750,11 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
   if (TargetOnly) {
     OS << "static " << TargetPrefix << "Intrinsic::ID "
        << "getIntrinsicFor" << CompilerName << "Builtin(const char "
-       << "*TargetPrefixStr, const char *BuiltinNameStr) {\n";
+       << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
   } else {
     OS << "Intrinsic::ID Intrinsic::getIntrinsicFor" << CompilerName
        << "Builtin(const char "
-       << "*TargetPrefixStr, const char *BuiltinNameStr) {\n";
+       << "*TargetPrefixStr, StringRef BuiltinNameStr) {\n";
   }
   OS << "  static const char BuiltinNames[] = {\n";
   Table.EmitCharArray(OS);
@@ -730,13 +766,11 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
   OS << "    const char *getName() const {\n";
   OS << "      return &BuiltinNames[StrTabOffset];\n";
   OS << "    }\n";
-  OS << "    bool operator<(const char *RHS) const {\n";
-  OS << "      return strcmp(getName(), RHS) < 0;\n";
+  OS << "    bool operator<(StringRef RHS) const {\n";
+  OS << "      return strncmp(getName(), RHS.data(), RHS.size()) < 0;\n";
   OS << "    }\n";
   OS << "  };\n";
 
-
-  OS << "  StringRef BuiltinName(BuiltinNameStr);\n";
   OS << "  StringRef TargetPrefix(TargetPrefixStr);\n\n";
 
   // Note: this could emit significantly better code if we cared.
@@ -759,7 +793,7 @@ void IntrinsicEmitter::EmitIntrinsicToBuiltinMap(
     OS << "                              std::end(" << I->first << "Names),\n";
     OS << "                              BuiltinNameStr);\n";
     OS << "    if (I != std::end(" << I->first << "Names) &&\n";
-    OS << "        strcmp(I->getName(), BuiltinNameStr) == 0)\n";
+    OS << "        I->getName() == BuiltinNameStr)\n";
     OS << "      return I->IntrinID;\n";
     OS << "  }\n";
   }
diff --git a/contrib/llvm/utils/TableGen/PseudoLoweringEmitter.cpp b/contrib/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
index 01e41d1060e0..63bdd36235a0 100644
--- a/contrib/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/PseudoLoweringEmitter.cpp
@@ -177,10 +177,10 @@ void PseudoLoweringEmitter::evaluateExpansion(Record *Rec) {
     if (OperandMap[Insn.Operands[i].MIOperandNo].Kind != OpData::Operand)
       continue;
     StringMap<unsigned>::iterator SourceOp =
-      SourceOperands.find(Dag->getArgName(i));
+      SourceOperands.find(Dag->getArgNameStr(i));
     if (SourceOp == SourceOperands.end())
       PrintFatalError(Rec->getLoc(),
-                      "Pseudo output operand '" + Dag->getArgName(i) +
+                      "Pseudo output operand '" + Dag->getArgNameStr(i) +
                       "' has no matching source operand.");
     // Map the source operand to the destination operand index for each
     // MachineInstr operand.
diff --git a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index 9bb988f30f18..b75be13c0480 100644
--- a/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -15,6 +15,7 @@
 
 #include "CodeGenRegisters.h"
 #include "CodeGenTarget.h"
+#include "Types.h"
 #include "SequenceToOffsetTable.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/BitVector.h"
@@ -103,7 +104,7 @@ void RegisterInfoEmitter::runEnums(raw_ostream &OS,
   OS << "namespace llvm {\n\n";
 
   OS << "class MCRegisterClass;\n"
-     << "extern const MCRegisterClass " << Namespace
+     << "extern const MCRegisterClass " << Target.getName()
      << "MCRegisterClasses[];\n\n";
 
   if (!Namespace.empty())
@@ -177,15 +178,6 @@ static void printInt(raw_ostream &OS, int Val) {
   OS << Val;
 }
 
-static const char *getMinimalTypeForRange(uint64_t Range) {
-  assert(Range < 0xFFFFFFFFULL && "Enum too large");
-  if (Range > 0xFFFF)
-    return "uint32_t";
-  if (Range > 0xFF)
-    return "uint16_t";
-  return "uint8_t";
-}
-
 void RegisterInfoEmitter::
 EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
                     const std::string &ClassName) {
@@ -264,8 +256,9 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
   OS << "// Get the register unit pressure limit for this dimension.\n"
      << "// This limit must be adjusted dynamically for reserved registers.\n"
      << "unsigned " << ClassName << "::\n"
-     << "getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const {\n"
-     << "  static const " << getMinimalTypeForRange(MaxRegUnitWeight)
+     << "getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const "
+        "{\n"
+     << "  static const " << getMinimalTypeForRange(MaxRegUnitWeight, 32)
      << " PressureLimitTable[] = {\n";
   for (unsigned i = 0; i < NumSets; ++i ) {
     const RegUnitSet &RegUnits = RegBank.getRegSetAt(i);
@@ -306,7 +299,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "/// Returns a -1 terminated array of pressure set IDs\n"
      << "const int* " << ClassName << "::\n"
      << "getRegClassPressureSets(const TargetRegisterClass *RC) const {\n";
-  OS << "  static const " << getMinimalTypeForRange(PSetsSeqs.size()-1)
+  OS << "  static const " << getMinimalTypeForRange(PSetsSeqs.size() - 1, 32)
      << " RCSetStartTable[] = {\n    ";
   for (unsigned i = 0, e = NumRCs; i != e; ++i) {
     OS << PSetsSeqs.get(PSets[i]) << ",";
@@ -322,7 +315,7 @@ EmitRegUnitPressure(raw_ostream &OS, const CodeGenRegBank &RegBank,
      << "getRegUnitPressureSets(unsigned RegUnit) const {\n"
      << "  assert(RegUnit < " << RegBank.getNumNativeRegUnits()
      << " && \"invalid register unit\");\n";
-  OS << "  static const " << getMinimalTypeForRange(PSetsSeqs.size()-1)
+  OS << "  static const " << getMinimalTypeForRange(PSetsSeqs.size() - 1, 32)
      << " RUSetStartTable[] = {\n    ";
   for (unsigned UnitIdx = 0, UnitEnd = RegBank.getNumNativeRegUnits();
        UnitIdx < UnitEnd; ++UnitIdx) {
@@ -585,7 +578,7 @@ static void printSubRegIndex(raw_ostream &OS, const CodeGenSubRegIndex *Idx) {
 // 0 differential which means we can't encode repeated elements.
 
 typedef SmallVector<uint16_t, 4> DiffVec;
-typedef SmallVector<unsigned, 4> MaskVec;
+typedef SmallVector<LaneBitmask, 4> MaskVec;
 
 // Differentially encode a sequence of numbers into V. The starting value and
 // terminating 0 are not added to V, so it will have the same size as List.
@@ -618,8 +611,8 @@ static void printDiff16(raw_ostream &OS, uint16_t Val) {
   OS << Val;
 }
 
-static void printMask(raw_ostream &OS, unsigned Val) {
-  OS << format("0x%08X", Val);
+static void printMask(raw_ostream &OS, LaneBitmask Val) {
+  OS << "LaneBitmask(0x" << PrintLaneMask(Val) << ')';
 }
 
 // Try to combine Idx's compose map into Vec if it is compatible.
@@ -683,15 +676,15 @@ RegisterInfoEmitter::emitComposeSubRegIndices(raw_ostream &OS,
 
   // Output the row map if there is multiple rows.
   if (Rows.size() > 1) {
-    OS << "  static const " << getMinimalTypeForRange(Rows.size()) << " RowMap["
-       << SubRegIndicesSize << "] = {\n    ";
+    OS << "  static const " << getMinimalTypeForRange(Rows.size(), 32)
+       << " RowMap[" << SubRegIndicesSize << "] = {\n    ";
     for (unsigned i = 0, e = SubRegIndicesSize; i != e; ++i)
       OS << RowMap[i] << ", ";
     OS << "\n  };\n";
   }
 
   // Output the rows.
-  OS << "  static const " << getMinimalTypeForRange(SubRegIndicesSize + 1)
+  OS << "  static const " << getMinimalTypeForRange(SubRegIndicesSize + 1, 32)
      << " Rows[" << Rows.size() << "][" << SubRegIndicesSize << "] = {\n";
   for (unsigned r = 0, re = Rows.size(); r != re; ++r) {
     OS << "    { ";
@@ -746,7 +739,7 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
   }
 
   OS << "  struct MaskRolOp {\n"
-        "    unsigned Mask;\n"
+        "    LaneBitmask Mask;\n"
         "    uint8_t  RotateLeft;\n"
         "  };\n"
         "  static const MaskRolOp LaneMaskComposeSequences[] = {\n";
@@ -756,9 +749,10 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
     const SmallVectorImpl<MaskRolPair> &Sequence = Sequences[s];
     for (size_t p = 0, pe = Sequence.size(); p != pe; ++p) {
       const MaskRolPair &P = Sequence[p];
-      OS << format("{ 0x%08X, %2u }, ", P.Mask, P.RotateLeft);
+      printMask(OS << "{ ", P.Mask);
+      OS << format(", %2u }, ", P.RotateLeft);
     }
-    OS << "{ 0, 0 }";
+    OS << "{ LaneBitmask::getNone(), 0 }";
     if (s+1 != se)
       OS << ", ";
     OS << "  // Sequence " << Idx << "\n";
@@ -781,12 +775,13 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
         " const {\n"
         "  --IdxA; assert(IdxA < " << SubRegIndices.size()
      << " && \"Subregister index out of bounds\");\n"
-        "  LaneBitmask Result = 0;\n"
-        "  for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask != 0; ++Ops)"
-        " {\n"
-        "    LaneBitmask Masked = LaneMask & Ops->Mask;\n"
-        "    Result |= (Masked << Ops->RotateLeft) & 0xFFFFFFFF;\n"
-        "    Result |= (Masked >> ((32 - Ops->RotateLeft) & 0x1F));\n"
+        "  LaneBitmask Result;\n"
+        "  for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n"
+        "    LaneBitmask::Type M = LaneMask.getAsInteger() & Ops->Mask.getAsInteger();\n"
+        "    if (unsigned S = Ops->RotateLeft)\n"
+        "      Result |= LaneBitmask((M << S) | (M >> (LaneBitmask::BitWidth - S)));\n"
+        "    else\n"
+        "      Result |= LaneBitmask(M);\n"
         "  }\n"
         "  return Result;\n"
         "}\n\n";
@@ -797,12 +792,13 @@ RegisterInfoEmitter::emitComposeSubRegIndexLaneMask(raw_ostream &OS,
         "  LaneMask &= getSubRegIndexLaneMask(IdxA);\n"
         "  --IdxA; assert(IdxA < " << SubRegIndices.size()
      << " && \"Subregister index out of bounds\");\n"
-        "  LaneBitmask Result = 0;\n"
-        "  for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask != 0; ++Ops)"
-        " {\n"
-        "    LaneBitmask Rotated = (LaneMask >> Ops->RotateLeft) |\n"
-        "                          ((LaneMask << ((32 - Ops->RotateLeft) & 0x1F)) & 0xFFFFFFFF);\n"
-        "    Result |= Rotated & Ops->Mask;\n"
+        "  LaneBitmask Result;\n"
+        "  for (const MaskRolOp *Ops = CompositeSequences[IdxA]; Ops->Mask.any(); ++Ops) {\n"
+        "    LaneBitmask::Type M = LaneMask.getAsInteger();\n"
+        "    if (unsigned S = Ops->RotateLeft)\n"
+        "      Result |= LaneBitmask((M >> S) | (M << (LaneBitmask::BitWidth - S)));\n"
+        "    else\n"
+        "      Result |= LaneBitmask(M);\n"
         "  }\n"
         "  return Result;\n"
         "}\n\n";
@@ -901,8 +897,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
     LaneMaskVec.insert(LaneMaskVec.begin(), RUMasks.begin(), RUMasks.end());
     // Terminator mask should not be used inside of the list.
 #ifndef NDEBUG
-    for (unsigned M : LaneMaskVec) {
-      assert(M != ~0u && "terminator mask should not be part of the list");
+    for (LaneBitmask M : LaneMaskVec) {
+      assert(!M.all() && "terminator mask should not be part of the list");
     }
 #endif
     LaneMaskSeqs.add(LaneMaskVec);
@@ -923,8 +919,8 @@ RegisterInfoEmitter::runMCDesc(raw_ostream &OS, CodeGenTarget &Target,
   OS << "};\n\n";
 
   // Emit the shared table of regunit lane mask sequences.
-  OS << "extern const unsigned " << TargetName << "LaneMaskLists[] = {\n";
-  LaneMaskSeqs.emit(OS, printMask, "~0u");
+  OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[] = {\n";
+  LaneMaskSeqs.emit(OS, printMask, "LaneBitmask::getAll()");
   OS << "};\n\n";
 
   // Emit the table of sub-register indexes.
@@ -1204,9 +1200,10 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   OS << "\" };\n\n";
 
   // Emit SubRegIndex lane masks, including 0.
-  OS << "\nstatic const unsigned SubRegIndexLaneMaskTable[] = {\n  ~0u,\n";
+  OS << "\nstatic const LaneBitmask SubRegIndexLaneMaskTable[] = {\n  LaneBitmask::getAll(),\n";
   for (const auto &Idx : SubRegIndices) {
-    OS << format("  0x%08x, // ", Idx.LaneMask) << Idx.getName() << '\n';
+    printMask(OS << "  ", Idx.LaneMask);
+    OS << ", // " << Idx.getName() << '\n';
   }
   OS << " };\n\n";
 
@@ -1324,9 +1321,9 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
          << "MCRegisterClasses[" << RC.getName() << "RegClassID],\n    "
          << "VTLists + " << VTSeqs.get(RC.VTs) << ",\n    " << RC.getName()
          << "SubClassMask,\n    SuperRegIdxSeqs + "
-         << SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n    "
-         << format("0x%08x,\n    ", RC.LaneMask)
-         << (unsigned)RC.AllocationPriority << ",\n    "
+         << SuperRegIdxSeqs.get(SuperRegIdxLists[RC.EnumValue]) << ",\n    ";
+      printMask(OS, RC.LaneMask);
+      OS << ",\n    " << (unsigned)RC.AllocationPriority << ",\n    "
          << (RC.HasDisjunctSubRegs?"true":"false")
          << ", /* HasDisjunctSubRegs */\n    "
          << (RC.CoveredBySubRegs?"true":"false")
@@ -1368,7 +1365,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   OS << "};\n";      // End of register descriptors...
 
 
-  std::string ClassName = Target.getName() + "GenRegisterInfo";
+  std::string ClassName = Target.getName().str() + "GenRegisterInfo";
 
   auto SubRegIndicesSize =
       std::distance(SubRegIndices.begin(), SubRegIndices.end());
@@ -1415,7 +1412,7 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
   // Emit the constructor of the class...
   OS << "extern const MCRegisterDesc " << TargetName << "RegDesc[];\n";
   OS << "extern const MCPhysReg " << TargetName << "RegDiffLists[];\n";
-  OS << "extern const unsigned " << TargetName << "LaneMaskLists[];\n";
+  OS << "extern const LaneBitmask " << TargetName << "LaneMaskLists[];\n";
   OS << "extern const char " << TargetName << "RegStrings[];\n";
   OS << "extern const char " << TargetName << "RegClassStrings[];\n";
   OS << "extern const MCPhysReg " << TargetName << "RegUnitRoots[][2];\n";
@@ -1430,8 +1427,8 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
      << "(unsigned RA, unsigned DwarfFlavour, unsigned EHFlavour, unsigned PC)\n"
      << "  : TargetRegisterInfo(" << TargetName << "RegInfoDesc"
      << ", RegisterClasses, RegisterClasses+" << RegisterClasses.size() <<",\n"
-     << "             SubRegIndexNameTable, SubRegIndexLaneMaskTable, 0x";
-  OS.write_hex(RegBank.CoveringLanes);
+     << "             SubRegIndexNameTable, SubRegIndexLaneMaskTable, ";
+  printMask(OS, RegBank.CoveringLanes);
   OS << ") {\n"
      << "  InitMCRegisterInfo(" << TargetName << "RegDesc, " << Regs.size() + 1
      << ", RA, PC,\n                     " << TargetName
diff --git a/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp b/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
index 8c1b8804d174..80f0b0d4aaf4 100644
--- a/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SearchableTableEmitter.cpp
@@ -193,7 +193,7 @@ void SearchableTableEmitter::emitLookupFunction(StringRef Name, StringRef Field,
   } else {
     // Make sure the result is null terminated because it's going via "char *".
     OS << "  std::string CanonicalVal = " << Field << ".upper();\n";
-    OS << "  " << PairType << " Val = {CanonicalVal.data(), 0};\n";
+    OS << "  " << PairType << " Val = {CanonicalVal.c_str(), 0};\n";
   }
 
   OS << "  ArrayRef<" << PairType << "> Table(" << Name << "sBy" << Field
@@ -206,7 +206,7 @@ void SearchableTableEmitter::emitLookupFunction(StringRef Name, StringRef Field,
     OS << ",\n                              ";
     OS << "[](const " << PairType << " &LHS, const " << PairType
        << " &RHS) {\n";
-    OS << "    return StringRef(LHS.first) < StringRef(RHS.first);\n";
+    OS << "    return std::strcmp(LHS.first, RHS.first) < 0;\n";
     OS << "  });\n\n";
   }
 
diff --git a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
index 228882177bdf..bf7b392b15e5 100644
--- a/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
+++ b/contrib/llvm/utils/TableGen/SubtargetEmitter.cpp
@@ -14,7 +14,9 @@
 #include "CodeGenTarget.h"
 #include "CodeGenSchedule.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCInstrItineraries.h"
 #include "llvm/MC/MCSchedule.h"
 #include "llvm/MC/SubtargetFeature.h"
@@ -27,6 +29,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <iterator>
 #include <map>
 #include <string>
 #include <vector>
@@ -42,7 +45,7 @@ class SubtargetEmitter {
   // The SchedClassDesc table indexes into a global write resource table, write
   // latency table, and read advance table.
   struct SchedClassTables {
-    std::vector<std::vector<MCSchedClassDesc> > ProcSchedClasses;
+    std::vector<std::vector<MCSchedClassDesc>> ProcSchedClasses;
     std::vector<MCWriteProcResEntry> WriteProcResources;
     std::vector<MCWriteLatencyEntry> WriteLatencies;
     std::vector<std::string> WriterNames;
@@ -81,12 +84,12 @@ class SubtargetEmitter {
                                  Record *ItinData,
                                  std::string &ItinString, unsigned NOperandCycles);
   void EmitStageAndOperandCycleData(raw_ostream &OS,
-                                    std::vector<std::vector<InstrItinerary> >
+                                    std::vector<std::vector<InstrItinerary>>
                                       &ProcItinLists);
   void EmitItineraries(raw_ostream &OS,
-                       std::vector<std::vector<InstrItinerary> >
+                       std::vector<std::vector<InstrItinerary>>
                          &ProcItinLists);
-  void EmitProcessorProp(raw_ostream &OS, const Record *R, const char *Name,
+  void EmitProcessorProp(raw_ostream &OS, const Record *R, StringRef Name,
                          char Separator);
   void EmitProcessorResources(const CodeGenProcModel &ProcModel,
                               raw_ostream &OS);
@@ -294,7 +297,7 @@ void SubtargetEmitter::FormItineraryStageString(const std::string &Name,
     // For each unit
     for (unsigned j = 0, M = UnitList.size(); j < M;) {
       // Add name and bitwise or
-      ItinString += Name + "FU::" + UnitList[j]->getName();
+      ItinString += Name + "FU::" + UnitList[j]->getName().str();
       if (++j < M) ItinString += " | ";
     }
 
@@ -341,7 +344,7 @@ void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
   unsigned N = BypassList.size();
   unsigned i = 0;
   for (; i < N;) {
-    ItinString += Name + "Bypass::" + BypassList[i]->getName();
+    ItinString += Name + "Bypass::" + BypassList[i]->getName().str();
     if (++i < NOperandCycles) ItinString += ", ";
   }
   for (; i < NOperandCycles;) {
@@ -357,9 +360,8 @@ void SubtargetEmitter::FormItineraryBypassString(const std::string &Name,
 //
 void SubtargetEmitter::
 EmitStageAndOperandCycleData(raw_ostream &OS,
-                             std::vector<std::vector<InstrItinerary> >
+                             std::vector<std::vector<InstrItinerary>>
                                &ProcItinLists) {
-
   // Multiple processor models may share an itinerary record. Emit it once.
   SmallPtrSet<Record*, 8> ItinsDefSet;
 
@@ -498,7 +500,7 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
       int NumUOps = ItinData ? ItinData->getValueAsInt("NumMicroOps") : 0;
       InstrItinerary Intinerary = { NumUOps, FindStage, FindStage + NStages,
                                     FindOperandCycle,
-                                    FindOperandCycle + NOperandCycles};
+                                    FindOperandCycle + NOperandCycles };
 
       // Inject - empty slots will be 0, 0
       ItinList[SchedClassIdx] = Intinerary;
@@ -530,13 +532,12 @@ EmitStageAndOperandCycleData(raw_ostream &OS,
 //
 void SubtargetEmitter::
 EmitItineraries(raw_ostream &OS,
-                std::vector<std::vector<InstrItinerary> > &ProcItinLists) {
-
+                std::vector<std::vector<InstrItinerary>> &ProcItinLists) {
   // Multiple processor models may share an itinerary record. Emit it once.
   SmallPtrSet<Record*, 8> ItinsDefSet;
 
   // For each processor's machine model
-  std::vector<std::vector<InstrItinerary> >::iterator
+  std::vector<std::vector<InstrItinerary>>::iterator
       ProcItinListsIter = ProcItinLists.begin();
   for (CodeGenSchedModels::ProcIter PI = SchedModels.procModelBegin(),
          PE = SchedModels.procModelEnd(); PI != PE; ++PI, ++ProcItinListsIter) {
@@ -587,7 +588,7 @@ EmitItineraries(raw_ostream &OS,
 // value defined in the C++ header. The Record is null if the processor does not
 // define a model.
 void SubtargetEmitter::EmitProcessorProp(raw_ostream &OS, const Record *R,
-                                         const char *Name, char Separator) {
+                                         StringRef Name, char Separator) {
   OS << "  ";
   int V = R ? R->getValueAsInt(Name) : -1;
   if (V >= 0)
@@ -783,8 +784,7 @@ void SubtargetEmitter::ExpandProcResources(RecVec &PRVec,
       RecVec SuperResources = PR->getValueAsListOfDefs("Resources");
       RecIter SubI = SubResources.begin(), SubE = SubResources.end();
       for( ; SubI != SubE; ++SubI) {
-        if (std::find(SuperResources.begin(), SuperResources.end(), *SubI)
-            == SuperResources.end()) {
+        if (!is_contained(SuperResources, *SubI)) {
           break;
         }
       }
@@ -827,9 +827,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
         HasVariants = true;
         break;
       }
-      IdxIter PIPos = std::find(TI->ProcIndices.begin(),
-                                TI->ProcIndices.end(), ProcModel.Index);
-      if (PIPos != TI->ProcIndices.end()) {
+      if (is_contained(TI->ProcIndices, ProcModel.Index)) {
         HasVariants = true;
         break;
       }
@@ -844,9 +842,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
     // If ProcIndices contains 0, this class applies to all processors.
     assert(!SC.ProcIndices.empty() && "expect at least one procidx");
     if (SC.ProcIndices[0] != 0) {
-      IdxIter PIPos = std::find(SC.ProcIndices.begin(),
-                                SC.ProcIndices.end(), ProcModel.Index);
-      if (PIPos == SC.ProcIndices.end())
+      if (!is_contained(SC.ProcIndices, ProcModel.Index))
         continue;
     }
     IdxVec Writes = SC.Writes;
@@ -873,8 +869,7 @@ void SubtargetEmitter::GenSchedClassTables(const CodeGenProcModel &ProcModel,
       // Check this processor's itinerary class resources.
       for (Record *I : ProcModel.ItinRWDefs) {
         RecVec Matched = I->getValueAsListOfDefs("MatchedItinClasses");
-        if (std::find(Matched.begin(), Matched.end(), SC.ItinClassDef)
-            != Matched.end()) {
+        if (is_contained(Matched, SC.ItinClassDef)) {
           SchedModels.findRWs(I->getValueAsListOfDefs("OperandReadWrites"),
                               Writes, Reads);
           break;
@@ -1246,7 +1241,7 @@ void SubtargetEmitter::EmitSchedModel(raw_ostream &OS) {
      << "#endif\n";
 
   if (SchedModels.hasItineraries()) {
-    std::vector<std::vector<InstrItinerary> > ProcItinLists;
+    std::vector<std::vector<InstrItinerary>> ProcItinLists;
     // Emit the stage data
     EmitStageAndOperandCycleData(OS, ProcItinLists);
     EmitItineraries(OS, ProcItinLists);
@@ -1430,13 +1425,13 @@ void SubtargetEmitter::run(raw_ostream &OS) {
      << Target << "WriteProcResTable, "
      << Target << "WriteLatencyTable, "
      << Target << "ReadAdvanceTable, ";
+  OS << '\n'; OS.indent(22);
   if (SchedModels.hasItineraries()) {
-    OS << '\n'; OS.indent(22);
     OS << Target << "Stages, "
        << Target << "OperandCycles, "
        << Target << "ForwardingPaths";
   } else
-    OS << "0, 0, 0";
+    OS << "nullptr, nullptr, nullptr";
   OS << ");\n}\n\n";
 
   OS << "} // end namespace llvm\n\n";
@@ -1516,7 +1511,7 @@ void SubtargetEmitter::run(raw_ostream &OS) {
        << Target << "OperandCycles, "
        << Target << "ForwardingPaths";
   } else
-    OS << "0, 0, 0";
+    OS << "nullptr, nullptr, nullptr";
   OS << ") {}\n\n";
 
   EmitSchedModelHelpers(ClassName, OS);
diff --git a/contrib/llvm/utils/TableGen/SubtargetFeatureInfo.cpp b/contrib/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
new file mode 100644
index 000000000000..7db8813050fe
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/SubtargetFeatureInfo.cpp
@@ -0,0 +1,119 @@
+//===- SubtargetFeatureInfo.cpp - Helpers for subtarget features ----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SubtargetFeatureInfo.h"
+
+#include "Types.h"
+#include "llvm/TableGen/Record.h"
+
+#include <map>
+
+using namespace llvm;
+
+void SubtargetFeatureInfo::dump() const {
+  errs() << getEnumName() << " " << Index << "\n";
+  TheDef->dump();
+}
+
+std::vector<std::pair<Record *, SubtargetFeatureInfo>>
+SubtargetFeatureInfo::getAll(const RecordKeeper &Records) {
+  std::vector<std::pair<Record *, SubtargetFeatureInfo>> SubtargetFeatures;
+  std::vector<Record *> AllPredicates =
+      Records.getAllDerivedDefinitions("Predicate");
+  for (Record *Pred : AllPredicates) {
+    // Ignore predicates that are not intended for the assembler.
+    //
+    // The "AssemblerMatcherPredicate" string should be promoted to an argument
+    // if we re-use the machinery for non-assembler purposes in future.
+    if (!Pred->getValueAsBit("AssemblerMatcherPredicate"))
+      continue;
+
+    if (Pred->getName().empty())
+      PrintFatalError(Pred->getLoc(), "Predicate has no name!");
+
+    SubtargetFeatures.emplace_back(
+        Pred, SubtargetFeatureInfo(Pred, SubtargetFeatures.size()));
+  }
+  return SubtargetFeatures;
+}
+
+void SubtargetFeatureInfo::emitSubtargetFeatureFlagEnumeration(
+    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
+    raw_ostream &OS) {
+  OS << "// Flags for subtarget features that participate in "
+     << "instruction matching.\n";
+  OS << "enum SubtargetFeatureFlag : "
+     << getMinimalTypeForEnumBitfield(SubtargetFeatures.size()) << " {\n";
+  for (const auto &SF : SubtargetFeatures) {
+    const SubtargetFeatureInfo &SFI = SF.second;
+    OS << "  " << SFI.getEnumName() << " = (1ULL << " << SFI.Index << "),\n";
+  }
+  OS << "  Feature_None = 0\n";
+  OS << "};\n\n";
+}
+
+void SubtargetFeatureInfo::emitNameTable(
+    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
+    raw_ostream &OS) {
+  OS << "static const char *SubtargetFeatureNames[] = {\n";
+  for (const auto &SF : SubtargetFeatures) {
+    const SubtargetFeatureInfo &SFI = SF.second;
+    OS << "  \"" << SFI.getEnumName() << "\",\n";
+  }
+  // A small number of targets have no predicates. Null terminate the array to
+  // avoid a zero-length array.
+  OS << "  nullptr\n"
+     << "};\n\n";
+}
+
+void SubtargetFeatureInfo::emitComputeAvailableFeatures(
+    StringRef TargetName, StringRef ClassName, StringRef FuncName,
+    std::map<Record *, SubtargetFeatureInfo, LessRecordByID> &SubtargetFeatures,
+    raw_ostream &OS) {
+  OS << "uint64_t " << TargetName << ClassName << "::\n"
+     << FuncName << "(const FeatureBitset& FB) const {\n";
+  OS << "  uint64_t Features = 0;\n";
+  for (const auto &SF : SubtargetFeatures) {
+    const SubtargetFeatureInfo &SFI = SF.second;
+
+    OS << "  if (";
+    std::string CondStorage =
+        SFI.TheDef->getValueAsString("AssemblerCondString");
+    StringRef Conds = CondStorage;
+    std::pair<StringRef, StringRef> Comma = Conds.split(',');
+    bool First = true;
+    do {
+      if (!First)
+        OS << " && ";
+
+      bool Neg = false;
+      StringRef Cond = Comma.first;
+      if (Cond[0] == '!') {
+        Neg = true;
+        Cond = Cond.substr(1);
+      }
+
+      OS << "(";
+      if (Neg)
+        OS << "!";
+      OS << "FB[" << TargetName << "::" << Cond << "])";
+
+      if (Comma.second.empty())
+        break;
+
+      First = false;
+      Comma = Comma.second.split(',');
+    } while (true);
+
+    OS << ")\n";
+    OS << "    Features |= " << SFI.getEnumName() << ";\n";
+  }
+  OS << "  return Features;\n";
+  OS << "}\n\n";
+}
diff --git a/contrib/llvm/utils/TableGen/SubtargetFeatureInfo.h b/contrib/llvm/utils/TableGen/SubtargetFeatureInfo.h
new file mode 100644
index 000000000000..99f380f2a1d7
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/SubtargetFeatureInfo.h
@@ -0,0 +1,72 @@
+//===- SubtargetFeatureInfo.h - Helpers for subtarget features ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
+#define LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
+
+#include "llvm/TableGen/Error.h"
+#include "llvm/TableGen/Record.h"
+
+#include <map>
+#include <string>
+#include <vector>
+
+namespace llvm {
+class Record;
+class RecordKeeper;
+
+/// Helper class for storing information on a subtarget feature which
+/// participates in instruction matching.
+struct SubtargetFeatureInfo {
+  /// \brief The predicate record for this feature.
+  Record *TheDef;
+
+  /// \brief An unique index assigned to represent this feature.
+  uint64_t Index;
+
+  SubtargetFeatureInfo(Record *D, uint64_t Idx) : TheDef(D), Index(Idx) {}
+
+  /// \brief The name of the enumerated constant identifying this feature.
+  std::string getEnumName() const {
+    return "Feature_" + TheDef->getName().str();
+  }
+
+  void dump() const;
+  static std::vector<std::pair<Record *, SubtargetFeatureInfo>>
+  getAll(const RecordKeeper &Records);
+
+  /// Emit the subtarget feature flag definitions.
+  static void emitSubtargetFeatureFlagEnumeration(
+      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
+          &SubtargetFeatures,
+      raw_ostream &OS);
+
+  static void emitNameTable(std::map<Record *, SubtargetFeatureInfo,
+                                     LessRecordByID> &SubtargetFeatures,
+                            raw_ostream &OS);
+
+  /// Emit the function to compute the list of available features given a
+  /// subtarget.
+  ///
+  /// \param TargetName The name of the target as used in class prefixes (e.g.
+  ///                   <TargetName>Subtarget)
+  /// \param ClassName  The name of the class (without the <Target> prefix)
+  ///                   that will contain the generated functions.
+  /// \param FuncName   The name of the function to emit.
+  /// \param SubtargetFeatures A map of TableGen records to the
+  ///                          SubtargetFeatureInfo equivalent.
+  static void emitComputeAvailableFeatures(
+      StringRef TargetName, StringRef ClassName, StringRef FuncName,
+      std::map<Record *, SubtargetFeatureInfo, LessRecordByID>
+          &SubtargetFeatures,
+      raw_ostream &OS);
+};
+} // end namespace llvm
+
+#endif // LLVM_UTIL_TABLEGEN_SUBTARGETFEATUREINFO_H
diff --git a/contrib/llvm/utils/TableGen/TableGen.cpp b/contrib/llvm/utils/TableGen/TableGen.cpp
index 24dbe5d3a280..79a773161e4b 100644
--- a/contrib/llvm/utils/TableGen/TableGen.cpp
+++ b/contrib/llvm/utils/TableGen/TableGen.cpp
@@ -45,6 +45,7 @@ enum ActionType {
   GenCTags,
   GenAttributes,
   GenSearchableTables,
+  GenGlobalISel,
 };
 
 namespace {
@@ -92,7 +93,8 @@ namespace {
                                "Generate attributes"),
                     clEnumValN(GenSearchableTables, "gen-searchable-tables",
                                "Generate generic binary-searchable table"),
-                    clEnumValEnd));
+                    clEnumValN(GenGlobalISel, "gen-global-isel",
+                               "Generate GlobalISel selector")));
 
   cl::opt<std::string>
   Class("class", cl::desc("Print Enum list for this class"),
@@ -178,6 +180,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenSearchableTables:
     EmitSearchableTables(Records, OS);
     break;
+  case GenGlobalISel:
+    EmitGlobalISel(Records, OS);
+    break;
   }
 
   return false;
diff --git a/contrib/llvm/utils/TableGen/TableGenBackends.h b/contrib/llvm/utils/TableGen/TableGenBackends.h
index 51e017fe6594..eb306d28180c 100644
--- a/contrib/llvm/utils/TableGen/TableGenBackends.h
+++ b/contrib/llvm/utils/TableGen/TableGenBackends.h
@@ -80,6 +80,7 @@ void EmitOptParser(RecordKeeper &RK, raw_ostream &OS);
 void EmitCTags(RecordKeeper &RK, raw_ostream &OS);
 void EmitAttributes(RecordKeeper &RK, raw_ostream &OS);
 void EmitSearchableTables(RecordKeeper &RK, raw_ostream &OS);
+void EmitGlobalISel(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
 
diff --git a/contrib/llvm/utils/TableGen/Types.cpp b/contrib/llvm/utils/TableGen/Types.cpp
new file mode 100644
index 000000000000..35458296f8fd
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/Types.cpp
@@ -0,0 +1,44 @@
+//===- Types.cpp - Helper for the selection of C++ data types. ------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Types.h"
+
+// For LLVM_ATTRIBUTE_UNUSED
+#include "llvm/Support/Compiler.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+const char *llvm::getMinimalTypeForRange(uint64_t Range, unsigned MaxSize LLVM_ATTRIBUTE_UNUSED) {
+  // TODO: The original callers only used 32 and 64 so these are the only
+  //       values permitted. Rather than widen the supported values we should
+  //       allow 64 for the callers that currently use 32 and remove the
+  //       argument altogether.
+  assert((MaxSize == 32 || MaxSize == 64) && "Unexpected size");
+  assert(MaxSize <= 64 && "Unexpected size");
+  assert(((MaxSize > 32) ? Range <= 0xFFFFFFFFFFFFFFFFULL
+                         : Range <= 0xFFFFFFFFULL) &&
+         "Enum too large");
+
+  if (Range > 0xFFFFFFFFULL)
+    return "uint64_t";
+  if (Range > 0xFFFF)
+    return "uint32_t";
+  if (Range > 0xFF)
+    return "uint16_t";
+  return "uint8_t";
+}
+
+const char *llvm::getMinimalTypeForEnumBitfield(uint64_t Size) {
+  uint64_t MaxIndex = Size;
+  if (MaxIndex > 0)
+    MaxIndex--;
+  return getMinimalTypeForRange(1ULL << MaxIndex);
+}
diff --git a/contrib/llvm/utils/TableGen/Types.h b/contrib/llvm/utils/TableGen/Types.h
new file mode 100644
index 000000000000..d511b7eae6e8
--- /dev/null
+++ b/contrib/llvm/utils/TableGen/Types.h
@@ -0,0 +1,25 @@
+//===- Types.h - Helper for the selection of C++ types. ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_UTILS_TABLEGEN_TYPES_H
+#define LLVM_UTILS_TABLEGEN_TYPES_H
+
+#include <cstdint>
+
+namespace llvm {
+/// Returns the smallest unsigned integer type that can hold the given range.
+/// MaxSize indicates the largest size of integer to consider (in bits) and only
+/// supports values of at least 32.
+const char *getMinimalTypeForRange(uint64_t Range, unsigned MaxSize = 64);
+
+/// Returns the smallest unsigned integer type that can hold the given bitfield.
+const char *getMinimalTypeForEnumBitfield(uint64_t Size);
+}
+
+#endif
diff --git a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
index ca937d09726d..4736c4e510d1 100644
--- a/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/contrib/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -23,96 +23,100 @@
 using namespace llvm;
 
 #define MRM_MAPPING     \
-  MAP(C0, 32)           \
-  MAP(C1, 33)           \
-  MAP(C2, 34)           \
-  MAP(C3, 35)           \
-  MAP(C4, 36)           \
-  MAP(C5, 37)           \
-  MAP(C6, 38)           \
-  MAP(C7, 39)           \
-  MAP(C8, 40)           \
-  MAP(C9, 41)           \
-  MAP(CA, 42)           \
-  MAP(CB, 43)           \
-  MAP(CC, 44)           \
-  MAP(CD, 45)           \
-  MAP(CE, 46)           \
-  MAP(CF, 47)           \
-  MAP(D0, 48)           \
-  MAP(D1, 49)           \
-  MAP(D2, 50)           \
-  MAP(D3, 51)           \
-  MAP(D4, 52)           \
-  MAP(D5, 53)           \
-  MAP(D6, 54)           \
-  MAP(D7, 55)           \
-  MAP(D8, 56)           \
-  MAP(D9, 57)           \
-  MAP(DA, 58)           \
-  MAP(DB, 59)           \
-  MAP(DC, 60)           \
-  MAP(DD, 61)           \
-  MAP(DE, 62)           \
-  MAP(DF, 63)           \
-  MAP(E0, 64)           \
-  MAP(E1, 65)           \
-  MAP(E2, 66)           \
-  MAP(E3, 67)           \
-  MAP(E4, 68)           \
-  MAP(E5, 69)           \
-  MAP(E6, 70)           \
-  MAP(E7, 71)           \
-  MAP(E8, 72)           \
-  MAP(E9, 73)           \
-  MAP(EA, 74)           \
-  MAP(EB, 75)           \
-  MAP(EC, 76)           \
-  MAP(ED, 77)           \
-  MAP(EE, 78)           \
-  MAP(EF, 79)           \
-  MAP(F0, 80)           \
-  MAP(F1, 81)           \
-  MAP(F2, 82)           \
-  MAP(F3, 83)           \
-  MAP(F4, 84)           \
-  MAP(F5, 85)           \
-  MAP(F6, 86)           \
-  MAP(F7, 87)           \
-  MAP(F8, 88)           \
-  MAP(F9, 89)           \
-  MAP(FA, 90)           \
-  MAP(FB, 91)           \
-  MAP(FC, 92)           \
-  MAP(FD, 93)           \
-  MAP(FE, 94)           \
-  MAP(FF, 95)
+  MAP(C0, 64)           \
+  MAP(C1, 65)           \
+  MAP(C2, 66)           \
+  MAP(C3, 67)           \
+  MAP(C4, 68)           \
+  MAP(C5, 69)           \
+  MAP(C6, 70)           \
+  MAP(C7, 71)           \
+  MAP(C8, 72)           \
+  MAP(C9, 73)           \
+  MAP(CA, 74)           \
+  MAP(CB, 75)           \
+  MAP(CC, 76)           \
+  MAP(CD, 77)           \
+  MAP(CE, 78)           \
+  MAP(CF, 79)           \
+  MAP(D0, 80)           \
+  MAP(D1, 81)           \
+  MAP(D2, 82)           \
+  MAP(D3, 83)           \
+  MAP(D4, 84)           \
+  MAP(D5, 85)           \
+  MAP(D6, 86)           \
+  MAP(D7, 87)           \
+  MAP(D8, 88)           \
+  MAP(D9, 89)           \
+  MAP(DA, 90)           \
+  MAP(DB, 91)           \
+  MAP(DC, 92)           \
+  MAP(DD, 93)           \
+  MAP(DE, 94)           \
+  MAP(DF, 95)           \
+  MAP(E0, 96)           \
+  MAP(E1, 97)           \
+  MAP(E2, 98)           \
+  MAP(E3, 99)           \
+  MAP(E4, 100)          \
+  MAP(E5, 101)          \
+  MAP(E6, 102)          \
+  MAP(E7, 103)          \
+  MAP(E8, 104)          \
+  MAP(E9, 105)          \
+  MAP(EA, 106)          \
+  MAP(EB, 107)          \
+  MAP(EC, 108)          \
+  MAP(ED, 109)          \
+  MAP(EE, 110)          \
+  MAP(EF, 111)          \
+  MAP(F0, 112)          \
+  MAP(F1, 113)          \
+  MAP(F2, 114)          \
+  MAP(F3, 115)          \
+  MAP(F4, 116)          \
+  MAP(F5, 117)          \
+  MAP(F6, 118)          \
+  MAP(F7, 119)          \
+  MAP(F8, 120)          \
+  MAP(F9, 121)          \
+  MAP(FA, 122)          \
+  MAP(FB, 123)          \
+  MAP(FC, 124)          \
+  MAP(FD, 125)          \
+  MAP(FE, 126)          \
+  MAP(FF, 127)
 
 // A clone of X86 since we can't depend on something that is generated.
 namespace X86Local {
   enum {
-    Pseudo      = 0,
-    RawFrm      = 1,
-    AddRegFrm   = 2,
-    MRMDestReg  = 3,
-    MRMDestMem  = 4,
-    MRMSrcReg   = 5,
-    MRMSrcMem   = 6,
-    RawFrmMemOffs = 7,
-    RawFrmSrc   = 8,
-    RawFrmDst   = 9,
-    RawFrmDstSrc = 10,
-    RawFrmImm8  = 11,
-    RawFrmImm16 = 12,
-    MRMXr = 14, MRMXm = 15,
-    MRM0r = 16, MRM1r = 17, MRM2r = 18, MRM3r = 19,
-    MRM4r = 20, MRM5r = 21, MRM6r = 22, MRM7r = 23,
-    MRM0m = 24, MRM1m = 25, MRM2m = 26, MRM3m = 27,
-    MRM4m = 28, MRM5m = 29, MRM6m = 30, MRM7m = 31,
+    Pseudo        = 0,
+    RawFrm        = 1,
+    AddRegFrm     = 2,
+    RawFrmMemOffs = 3,
+    RawFrmSrc     = 4,
+    RawFrmDst     = 5,
+    RawFrmDstSrc  = 6,
+    RawFrmImm8    = 7,
+    RawFrmImm16   = 8,
+    MRMDestMem     = 32,
+    MRMSrcMem      = 33,
+    MRMSrcMem4VOp3 = 34,
+    MRMSrcMemOp4   = 35,
+    MRMXm = 39,
+    MRM0m = 40, MRM1m = 41, MRM2m = 42, MRM3m = 43,
+    MRM4m = 44, MRM5m = 45, MRM6m = 46, MRM7m = 47,
+    MRMDestReg     = 48,
+    MRMSrcReg      = 49,
+    MRMSrcReg4VOp3 = 50,
+    MRMSrcRegOp4   = 51,
+    MRMXr = 55,
+    MRM0r = 56, MRM1r = 57, MRM2r = 58, MRM3r = 59,
+    MRM4r = 60, MRM5r = 61, MRM6r = 62, MRM7r = 63,
 #define MAP(from, to) MRM_##from = to,
     MRM_MAPPING
 #undef MAP
-    lastMRM
   };
 
   enum {
@@ -138,19 +142,6 @@ namespace X86Local {
 
 using namespace X86Disassembler;
 
-/// isRegFormat - Indicates whether a particular form requires the Mod field of
-///   the ModR/M byte to be 0b11.
-///
-/// @param form - The form of the instruction.
-/// @return     - true if the form implies that Mod must be 0b11, false
-///               otherwise.
-static bool isRegFormat(uint8_t form) {
-  return (form == X86Local::MRMDestReg ||
-          form == X86Local::MRMSrcReg  ||
-          form == X86Local::MRMXr ||
-          (form >= X86Local::MRM0r && form <= X86Local::MRM7r));
-}
-
 /// byteFromBitsInit - Extracts a value at most 8 bits in width from a BitsInit.
 ///   Useful for switch statements and the like.
 ///
@@ -212,9 +203,7 @@ RecognizableInstr::RecognizableInstr(DisassemblerTables &tables,
   AdSize           = byteFromRec(Rec, "AdSizeBits");
   HasREX_WPrefix   = Rec->getValueAsBit("hasREX_WPrefix");
   HasVEX_4V        = Rec->getValueAsBit("hasVEX_4V");
-  HasVEX_4VOp3     = Rec->getValueAsBit("hasVEX_4VOp3");
   HasVEX_WPrefix   = Rec->getValueAsBit("hasVEX_WPrefix");
-  HasMemOp4Prefix  = Rec->getValueAsBit("hasMemOp4Prefix");
   IgnoresVEX_L     = Rec->getValueAsBit("ignoresVEX_L");
   HasEVEX_L2Prefix = Rec->getValueAsBit("hasEVEX_L2");
   HasEVEX_K        = Rec->getValueAsBit("hasEVEX_K");
@@ -565,7 +554,7 @@ void RecognizableInstr::emitInstructionSpecifier() {
   // Given the set of prefix bits, how many additional operands does the
   // instruction have?
   unsigned additionalOperands = 0;
-  if (HasVEX_4V || HasVEX_4VOp3)
+  if (HasVEX_4V)
     ++additionalOperands;
   if (HasEVEX_K)
     ++additionalOperands;
@@ -666,19 +655,27 @@ void RecognizableInstr::emitInstructionSpecifier() {
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
 
-    if (HasMemOp4Prefix)
-      HANDLE_OPERAND(immediate)
-
     HANDLE_OPERAND(rmRegister)
-
-    if (HasVEX_4VOp3)
-      HANDLE_OPERAND(vvvvRegister)
-
-    if (!HasMemOp4Prefix)
-      HANDLE_OPTIONAL(immediate)
+    HANDLE_OPTIONAL(immediate)
     HANDLE_OPTIONAL(immediate) // above might be a register in 7:4
     HANDLE_OPTIONAL(immediate)
     break;
+  case X86Local::MRMSrcReg4VOp3:
+    assert(numPhysicalOperands == 3 &&
+           "Unexpected number of operands for MRMSrcRegFrm");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(rmRegister)
+    HANDLE_OPERAND(vvvvRegister)
+    break;
+  case X86Local::MRMSrcRegOp4:
+    assert(numPhysicalOperands >= 4 && numPhysicalOperands <= 5 &&
+           "Unexpected number of operands for MRMSrcRegOp4Frm");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(vvvvRegister)
+    HANDLE_OPERAND(immediate) // Register in imm[7:4]
+    HANDLE_OPERAND(rmRegister)
+    HANDLE_OPTIONAL(immediate)
+    break;
   case X86Local::MRMSrcMem:
     // Operand 1 is a register operand in the Reg/Opcode field.
     // Operand 2 is a memory operand (possibly SIB-extended)
@@ -699,18 +696,26 @@ void RecognizableInstr::emitInstructionSpecifier() {
       // in ModRMVEX and the one above the one in the VEX.VVVV field
       HANDLE_OPERAND(vvvvRegister)
 
-    if (HasMemOp4Prefix)
-      HANDLE_OPERAND(immediate)
-
     HANDLE_OPERAND(memory)
-
-    if (HasVEX_4VOp3)
-      HANDLE_OPERAND(vvvvRegister)
-
-    if (!HasMemOp4Prefix)
-      HANDLE_OPTIONAL(immediate)
+    HANDLE_OPTIONAL(immediate)
     HANDLE_OPTIONAL(immediate) // above might be a register in 7:4
     break;
+  case X86Local::MRMSrcMem4VOp3:
+    assert(numPhysicalOperands == 3 &&
+           "Unexpected number of operands for MRMSrcMemFrm");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(memory)
+    HANDLE_OPERAND(vvvvRegister)
+    break;
+  case X86Local::MRMSrcMemOp4:
+    assert(numPhysicalOperands >= 4 && numPhysicalOperands <= 5 &&
+           "Unexpected number of operands for MRMSrcMemOp4Frm");
+    HANDLE_OPERAND(roRegister)
+    HANDLE_OPERAND(vvvvRegister)
+    HANDLE_OPERAND(immediate) // Register in imm[7:4]
+    HANDLE_OPERAND(memory)
+    HANDLE_OPTIONAL(immediate)
+    break;
   case X86Local::MRMXr:
   case X86Local::MRM0r:
   case X86Local::MRM1r:
@@ -841,13 +846,31 @@ void RecognizableInstr::emitDecodePath(DisassemblerTables &tables) const {
     }
 
     switch (Form) {
-    default:
+    default: llvm_unreachable("Invalid form!");
+    case X86Local::Pseudo: llvm_unreachable("Pseudo should not be emitted!");
+    case X86Local::RawFrm:
+    case X86Local::AddRegFrm:
+    case X86Local::RawFrmMemOffs:
+    case X86Local::RawFrmSrc:
+    case X86Local::RawFrmDst:
+    case X86Local::RawFrmDstSrc:
+    case X86Local::RawFrmImm8:
+    case X86Local::RawFrmImm16:
       filter = new DumbFilter();
       break;
-    case X86Local::MRMDestReg: case X86Local::MRMDestMem:
-    case X86Local::MRMSrcReg:  case X86Local::MRMSrcMem:
-    case X86Local::MRMXr:      case X86Local::MRMXm:
-      filter = new ModFilter(isRegFormat(Form));
+    case X86Local::MRMDestReg:
+    case X86Local::MRMSrcReg:
+    case X86Local::MRMSrcReg4VOp3:
+    case X86Local::MRMSrcRegOp4:
+    case X86Local::MRMXr:
+      filter = new ModFilter(true);
+      break;
+    case X86Local::MRMDestMem:
+    case X86Local::MRMSrcMem:
+    case X86Local::MRMSrcMem4VOp3:
+    case X86Local::MRMSrcMemOp4:
+    case X86Local::MRMXm:
+      filter = new ModFilter(false);
       break;
     case X86Local::MRM0r:      case X86Local::MRM1r:
     case X86Local::MRM2r:      case X86Local::MRM3r:
diff --git a/contrib/llvm/utils/TableGen/X86RecognizableInstr.h b/contrib/llvm/utils/TableGen/X86RecognizableInstr.h
index f6f50065f704..2e611587cc31 100644
--- a/contrib/llvm/utils/TableGen/X86RecognizableInstr.h
+++ b/contrib/llvm/utils/TableGen/X86RecognizableInstr.h
@@ -55,14 +55,10 @@ private:
   bool HasREX_WPrefix;
   /// The hasVEX_4V field from the record
   bool HasVEX_4V;
-  /// The hasVEX_4VOp3 field from the record
-  bool HasVEX_4VOp3;
   /// The hasVEX_WPrefix field from the record
   bool HasVEX_WPrefix;
   /// Inferred from the operands; indicates whether the L bit in the VEX prefix is set
   bool HasVEX_LPrefix;
-  /// The hasMemOp4Prefix field from the record
-  bool HasMemOp4Prefix;
   /// The ignoreVEX_L field from the record
   bool IgnoresVEX_L;
   /// The hasEVEX_L2Prefix field from the record